summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChunseok Lee <chunseok.lee@samsung.com>2020-04-23 14:45:49 +0900
committerChunseok Lee <chunseok.lee@samsung.com>2020-04-23 14:45:49 +0900
commite2ef8438a24f7c56a0744eb579a6e293ee2fbf8e (patch)
tree44a1a7951d168dd4370e13593ed03f4bc6d920c5
parent302e6564a7a76109e1178207e44e45a58631c477 (diff)
downloadnnfw-e2ef8438a24f7c56a0744eb579a6e293ee2fbf8e.tar.gz
nnfw-e2ef8438a24f7c56a0744eb579a6e293ee2fbf8e.tar.bz2
nnfw-e2ef8438a24f7c56a0744eb579a6e293ee2fbf8e.zip
Imported Upstream version 1.4.0upstream/1.4.0submit/tizen/20200423.054851
-rw-r--r--.ctags2
-rw-r--r--.gitignore2
-rw-r--r--LICENSE426
-rw-r--r--Makefile.template4
-rw-r--r--README.md29
-rw-r--r--compiler/CMakeLists.txt78
-rw-r--r--compiler/adtidas/CMakeLists.txt2
-rw-r--r--compiler/adtidas/include/adtidas/SmallVector.h156
-rw-r--r--compiler/angkor/CMakeLists.txt22
-rw-r--r--compiler/angkor/README.md51
-rw-r--r--compiler/angkor/include/angkor/TensorIndex.h29
-rw-r--r--compiler/angkor/include/angkor/TensorShape.h29
-rw-r--r--compiler/angkor/include/nncc/core/ADT/feature/Accessor.h43
-rw-r--r--compiler/angkor/include/nncc/core/ADT/feature/Buffer.h59
-rw-r--r--compiler/angkor/include/nncc/core/ADT/feature/CHWLayout.h41
-rw-r--r--compiler/angkor/include/nncc/core/ADT/feature/HWCLayout.h41
-rw-r--r--compiler/angkor/include/nncc/core/ADT/feature/Layout.h54
-rw-r--r--compiler/angkor/include/nncc/core/ADT/feature/Overlay.h60
-rw-r--r--compiler/angkor/include/nncc/core/ADT/feature/Reader.h43
-rw-r--r--compiler/angkor/include/nncc/core/ADT/feature/Shape.h74
-rw-r--r--compiler/angkor/include/nncc/core/ADT/feature/View.h71
-rw-r--r--compiler/angkor/include/nncc/core/ADT/kernel/Accessor.h43
-rw-r--r--compiler/angkor/include/nncc/core/ADT/kernel/Buffer.h72
-rw-r--r--compiler/angkor/include/nncc/core/ADT/kernel/IndexEnumerator.h70
-rw-r--r--compiler/angkor/include/nncc/core/ADT/kernel/Layout.h54
-rw-r--r--compiler/angkor/include/nncc/core/ADT/kernel/NCHWLayout.h41
-rw-r--r--compiler/angkor/include/nncc/core/ADT/kernel/NHWCLayout.h41
-rw-r--r--compiler/angkor/include/nncc/core/ADT/kernel/Overlay.h81
-rw-r--r--compiler/angkor/include/nncc/core/ADT/kernel/Reader.h43
-rw-r--r--compiler/angkor/include/nncc/core/ADT/kernel/Shape.h73
-rw-r--r--compiler/angkor/include/nncc/core/ADT/kernel/View.h43
-rw-r--r--compiler/angkor/include/nncc/core/ADT/kernel/ViewImpl.h67
-rw-r--r--compiler/angkor/include/nncc/core/ADT/tensor/Accessor.h43
-rw-r--r--compiler/angkor/include/nncc/core/ADT/tensor/Buffer.h57
-rw-r--r--compiler/angkor/include/nncc/core/ADT/tensor/Index.h65
-rw-r--r--compiler/angkor/include/nncc/core/ADT/tensor/IndexEnumerator.h63
-rw-r--r--compiler/angkor/include/nncc/core/ADT/tensor/Layout.h52
-rw-r--r--compiler/angkor/include/nncc/core/ADT/tensor/LexicalLayout.h41
-rw-r--r--compiler/angkor/include/nncc/core/ADT/tensor/Overlay.h58
-rw-r--r--compiler/angkor/include/nncc/core/ADT/tensor/Reader.h43
-rw-r--r--compiler/angkor/include/nncc/core/ADT/tensor/Shape.h70
-rw-r--r--compiler/angkor/include/nncc/core/ADT/tensor/View.h70
-rw-r--r--compiler/angkor/src/ADT/feature/Accessor.cpp21
-rw-r--r--compiler/angkor/src/ADT/feature/Buffer.test.cpp48
-rw-r--r--compiler/angkor/src/ADT/feature/CHWLayout.cpp43
-rw-r--r--compiler/angkor/src/ADT/feature/CHWLayout.test.cpp45
-rw-r--r--compiler/angkor/src/ADT/feature/HWCLayout.cpp43
-rw-r--r--compiler/angkor/src/ADT/feature/HWCLayout.test.cpp57
-rw-r--r--compiler/angkor/src/ADT/feature/Layout.cpp35
-rw-r--r--compiler/angkor/src/ADT/feature/Layout.test.cpp56
-rw-r--r--compiler/angkor/src/ADT/feature/Overlay.test.cpp72
-rw-r--r--compiler/angkor/src/ADT/feature/Reader.cpp21
-rw-r--r--compiler/angkor/src/ADT/feature/Shape.test.cpp56
-rw-r--r--compiler/angkor/src/ADT/kernel/Buffer.test.cpp49
-rw-r--r--compiler/angkor/src/ADT/kernel/IndexEnumerator.cpp84
-rw-r--r--compiler/angkor/src/ADT/kernel/IndexEnumerator.test.cpp46
-rw-r--r--compiler/angkor/src/ADT/kernel/Layout.cpp38
-rw-r--r--compiler/angkor/src/ADT/kernel/Layout.test.cpp56
-rw-r--r--compiler/angkor/src/ADT/kernel/NCHWLayout.cpp43
-rw-r--r--compiler/angkor/src/ADT/kernel/NCHWLayout.test.cpp53
-rw-r--r--compiler/angkor/src/ADT/kernel/NHWCLayout.cpp43
-rw-r--r--compiler/angkor/src/ADT/kernel/NHWCLayout.test.cpp74
-rw-r--r--compiler/angkor/src/ADT/kernel/Overlay.test.cpp73
-rw-r--r--compiler/angkor/src/ADT/kernel/Reader.cpp20
-rw-r--r--compiler/angkor/src/ADT/kernel/Shape.cpp37
-rw-r--r--compiler/angkor/src/ADT/kernel/Shape.test.cpp58
-rw-r--r--compiler/angkor/src/ADT/tensor/Buffer.test.cpp49
-rw-r--r--compiler/angkor/src/ADT/tensor/Index.cpp81
-rw-r--r--compiler/angkor/src/ADT/tensor/Index.test.cpp119
-rw-r--r--compiler/angkor/src/ADT/tensor/IndexEnumerator.cpp100
-rw-r--r--compiler/angkor/src/ADT/tensor/IndexEnumerator.test.cpp48
-rw-r--r--compiler/angkor/src/ADT/tensor/Layout.cpp35
-rw-r--r--compiler/angkor/src/ADT/tensor/Layout.test.cpp56
-rw-r--r--compiler/angkor/src/ADT/tensor/LexicalLayout.cpp60
-rw-r--r--compiler/angkor/src/ADT/tensor/LexicalLayout.test.cpp54
-rw-r--r--compiler/angkor/src/ADT/tensor/Overlay.test.cpp75
-rw-r--r--compiler/angkor/src/ADT/tensor/Reader.cpp21
-rw-r--r--compiler/angkor/src/ADT/tensor/Shape.cpp91
-rw-r--r--compiler/angkor/src/ADT/tensor/Shape.test.cpp185
-rw-r--r--compiler/angkor/src/TensorIndex.test.cpp87
-rw-r--r--compiler/angkor/src/TensorShape.test.cpp99
-rw-r--r--compiler/ann-api/CMakeLists.txt2
-rw-r--r--compiler/ann-api/include/.FORMATDENY0
-rw-r--r--compiler/ann-api/include/NeuralNetworks.h2075
-rw-r--r--compiler/ann-ref/.FORMATDENY0
-rw-r--r--compiler/ann-ref/CMakeLists.txt32
-rw-r--r--compiler/ann-ref/README.md7
-rw-r--r--compiler/ann-ref/requires.cmake1
-rw-r--r--compiler/ann-ref/src/Assert.h34
-rw-r--r--compiler/ann-ref/src/CompilationBuilder.cpp52
-rw-r--r--compiler/ann-ref/src/CompilationBuilder.h44
-rw-r--r--compiler/ann-ref/src/ExecutionBuilder.cpp196
-rw-r--r--compiler/ann-ref/src/ExecutionBuilder.h73
-rw-r--r--compiler/ann-ref/src/Executor.cpp814
-rw-r--r--compiler/ann-ref/src/Executor.h114
-rw-r--r--compiler/ann-ref/src/Logging.cpp30
-rw-r--r--compiler/ann-ref/src/Logging.h42
-rw-r--r--compiler/ann-ref/src/Macro.h22
-rw-r--r--compiler/ann-ref/src/Memory.cpp105
-rw-r--r--compiler/ann-ref/src/Memory.h106
-rw-r--r--compiler/ann-ref/src/MemoryTracker.cpp50
-rw-r--r--compiler/ann-ref/src/MemoryTracker.h49
-rw-r--r--compiler/ann-ref/src/Model.h39
-rw-r--r--compiler/ann-ref/src/ModelArgumentInfo.cpp121
-rw-r--r--compiler/ann-ref/src/ModelArgumentInfo.h58
-rw-r--r--compiler/ann-ref/src/ModelBuilder.cpp483
-rw-r--r--compiler/ann-ref/src/ModelBuilder.h142
-rw-r--r--compiler/ann-ref/src/NeuralNetworks.cpp338
-rw-r--r--compiler/ann-ref/src/Operand.h61
-rw-r--r--compiler/ann-ref/src/OperandType.cpp55
-rw-r--r--compiler/ann-ref/src/OperandType.h43
-rw-r--r--compiler/ann-ref/src/OperandType.probe.cpp32
-rw-r--r--compiler/ann-ref/src/Operation.h32
-rw-r--r--compiler/ann-ref/src/OperationType.cpp67
-rw-r--r--compiler/ann-ref/src/OperationType.h66
-rw-r--r--compiler/ann-ref/src/OperationType.probe.cpp85
-rw-r--r--compiler/ann-ref/src/Probe.cpp89
-rw-r--r--compiler/ann-ref/src/Request.h35
-rw-r--r--compiler/ann-ref/src/Shape.cpp68
-rw-r--r--compiler/ann-ref/src/Shape.h47
-rw-r--r--compiler/ann-ref/src/Validation.cpp263
-rw-r--r--compiler/ann-ref/src/Validation.h34
-rw-r--r--compiler/ann-ref/src/ops/Add.cpp57
-rw-r--r--compiler/ann-ref/src/ops/Add.float.cpp122
-rw-r--r--compiler/ann-ref/src/ops/Add.float.h28
-rw-r--r--compiler/ann-ref/src/ops/Add.h25
-rw-r--r--compiler/ann-ref/src/ops/AvgPool2D.cpp30
-rw-r--r--compiler/ann-ref/src/ops/AvgPool2D.float.cpp123
-rw-r--r--compiler/ann-ref/src/ops/AvgPool2D.float.h31
-rw-r--r--compiler/ann-ref/src/ops/AvgPool2D.h30
-rw-r--r--compiler/ann-ref/src/ops/Concatenation.cpp66
-rw-r--r--compiler/ann-ref/src/ops/Concatenation.float.cpp84
-rw-r--r--compiler/ann-ref/src/ops/Concatenation.float.h30
-rw-r--r--compiler/ann-ref/src/ops/Concatenation.h28
-rw-r--r--compiler/ann-ref/src/ops/Conv2D.cpp57
-rw-r--r--compiler/ann-ref/src/ops/Conv2D.float.cpp256
-rw-r--r--compiler/ann-ref/src/ops/Conv2D.float.h31
-rw-r--r--compiler/ann-ref/src/ops/Conv2D.h29
-rw-r--r--compiler/ann-ref/src/ops/DepthwiseConv2D.cpp57
-rw-r--r--compiler/ann-ref/src/ops/DepthwiseConv2D.float.cpp311
-rw-r--r--compiler/ann-ref/src/ops/DepthwiseConv2D.float.h32
-rw-r--r--compiler/ann-ref/src/ops/DepthwiseConv2D.h30
-rw-r--r--compiler/ann-ref/src/ops/Div.cpp57
-rw-r--r--compiler/ann-ref/src/ops/Div.float.cpp122
-rw-r--r--compiler/ann-ref/src/ops/Div.float.h28
-rw-r--r--compiler/ann-ref/src/ops/Div.h25
-rw-r--r--compiler/ann-ref/src/ops/FullyConnected.cpp70
-rw-r--r--compiler/ann-ref/src/ops/FullyConnected.float.cpp65
-rw-r--r--compiler/ann-ref/src/ops/FullyConnected.float.h29
-rw-r--r--compiler/ann-ref/src/ops/FullyConnected.h28
-rw-r--r--compiler/ann-ref/src/ops/MaxPool2D.cpp30
-rw-r--r--compiler/ann-ref/src/ops/MaxPool2D.float.cpp118
-rw-r--r--compiler/ann-ref/src/ops/MaxPool2D.float.h31
-rw-r--r--compiler/ann-ref/src/ops/MaxPool2D.h30
-rw-r--r--compiler/ann-ref/src/ops/Mul.cpp57
-rw-r--r--compiler/ann-ref/src/ops/Mul.float.cpp122
-rw-r--r--compiler/ann-ref/src/ops/Mul.float.h28
-rw-r--r--compiler/ann-ref/src/ops/Mul.h25
-rw-r--r--compiler/ann-ref/src/ops/Pad.cpp189
-rw-r--r--compiler/ann-ref/src/ops/Pad.h31
-rw-r--r--compiler/ann-ref/src/ops/ReLU.cpp25
-rw-r--r--compiler/ann-ref/src/ops/ReLU.float.cpp31
-rw-r--r--compiler/ann-ref/src/ops/ReLU.float.h26
-rw-r--r--compiler/ann-ref/src/ops/ReLU.h25
-rw-r--r--compiler/ann-ref/src/ops/ReLU6.cpp25
-rw-r--r--compiler/ann-ref/src/ops/ReLU6.float.cpp31
-rw-r--r--compiler/ann-ref/src/ops/ReLU6.float.h26
-rw-r--r--compiler/ann-ref/src/ops/ReLU6.h25
-rw-r--r--compiler/ann-ref/src/ops/Reshape.cpp73
-rw-r--r--compiler/ann-ref/src/ops/Reshape.h31
-rw-r--r--compiler/ann-ref/src/ops/Softmax.cpp25
-rw-r--r--compiler/ann-ref/src/ops/Softmax.float.cpp71
-rw-r--r--compiler/ann-ref/src/ops/Softmax.float.h28
-rw-r--r--compiler/ann-ref/src/ops/Softmax.h27
-rw-r--r--compiler/ann-ref/src/ops/Sub.cpp57
-rw-r--r--compiler/ann-ref/src/ops/Sub.float.cpp122
-rw-r--r--compiler/ann-ref/src/ops/Sub.float.h28
-rw-r--r--compiler/ann-ref/src/ops/Sub.h25
-rw-r--r--compiler/ann-ref/src/ops/internal/ActivationUtils.h59
-rw-r--r--compiler/ann-ref/src/ops/internal/Array.h46
-rw-r--r--compiler/ann-ref/src/ops/internal/Dims.h167
-rw-r--r--compiler/ann-ref/src/ops/internal/Elementwise.cpp25
-rw-r--r--compiler/ann-ref/src/ops/internal/Elementwise.h25
-rw-r--r--compiler/ann-ref/src/ops/internal/FeatureMap.h26
-rw-r--r--compiler/ann-ref/src/ops/internal/Fused.cpp28
-rw-r--r--compiler/ann-ref/src/ops/internal/Fused.h84
-rw-r--r--compiler/ann-ref/src/ops/internal/GEMM.h38
-rw-r--r--compiler/ann-ref/src/ops/internal/Macro.h58
-rw-r--r--compiler/ann-ref/src/ops/internal/Matrix.h127
-rw-r--r--compiler/ann-ref/src/ops/internal/NDArray.h133
-rw-r--r--compiler/ann-ref/src/ops/internal/Pooling.cpp43
-rw-r--r--compiler/ann-ref/src/ops/internal/Pooling.h31
-rw-r--r--compiler/ann-ref/src/ops/internal/Spatial.h29
-rw-r--r--compiler/bino/CMakeLists.txt14
-rw-r--r--compiler/bino/README.md5
-rw-r--r--compiler/bino/include/bino.h57
-rw-r--r--compiler/bino/tests/Functional.tests.cpp35
-rw-r--r--compiler/caffe2circle/CMakeLists.txt16
-rw-r--r--compiler/caffe2circle/README.md3
-rw-r--r--compiler/caffe2circle/requires.cmake3
-rw-r--r--compiler/caffe2circle/src/caffe2circle.cpp39
-rw-r--r--compiler/caffegen/CMakeLists.txt14
-rw-r--r--compiler/caffegen/README.md45
-rw-r--r--compiler/caffegen/src/DecodeCommand.cpp46
-rw-r--r--compiler/caffegen/src/DecodeCommand.h27
-rw-r--r--compiler/caffegen/src/Driver.cpp42
-rw-r--r--compiler/caffegen/src/EncodeCommand.cpp51
-rw-r--r--compiler/caffegen/src/EncodeCommand.h27
-rw-r--r--compiler/caffegen/src/InitCommand.cpp65
-rw-r--r--compiler/caffegen/src/InitCommand.h27
-rw-r--r--compiler/caffegen/src/MergeCommand.cpp58
-rw-r--r--compiler/caffegen/src/MergeCommand.h33
-rw-r--r--compiler/circle-inspect/CMakeLists.txt13
-rw-r--r--compiler/circle-inspect/README.md22
-rw-r--r--compiler/circle-inspect/driver/Driver.cpp96
-rw-r--r--compiler/circle-inspect/requires.cmake3
-rw-r--r--compiler/circle-inspect/src/Dump.cpp135
-rw-r--r--compiler/circle-inspect/src/Dump.h56
-rw-r--r--compiler/circle-inspect/src/Model.cpp143
-rw-r--r--compiler/circle-inspect/src/Model.h43
-rw-r--r--compiler/circle-inspect/src/Reader.cpp166
-rw-r--r--compiler/circle-inspect/src/Reader.h91
-rw-r--r--compiler/circle-verify/CMakeLists.txt12
-rw-r--r--compiler/circle-verify/README.md23
-rw-r--r--compiler/circle-verify/requires.cmake4
-rw-r--r--compiler/circle-verify/src/Driver.cpp51
-rw-r--r--compiler/circle-verify/src/Model.cpp90
-rw-r--r--compiler/circle-verify/src/Model.h38
-rw-r--r--compiler/circle-verify/src/VerifyFlatBuffers.cpp36
-rw-r--r--compiler/circle-verify/src/VerifyFlatBuffers.h32
-rw-r--r--compiler/circle2circle/CMakeLists.txt42
-rw-r--r--compiler/circle2circle/README.md3
-rw-r--r--compiler/circle2circle/include/CircleExpContract.h50
-rw-r--r--compiler/circle2circle/include/Model.h43
-rw-r--r--compiler/circle2circle/requires.cmake10
-rw-r--r--compiler/circle2circle/src/Circle2Circle.cpp120
-rw-r--r--compiler/circle2circle/src/Circle2Circle.test.cpp29
-rw-r--r--compiler/circle2circle/src/CircleExpContract.cpp33
-rw-r--r--compiler/circle2circle/src/Model.cpp78
-rw-r--r--compiler/circle2circle/src/TestHelper.h55
-rw-r--r--compiler/circledump/CMakeLists.txt14
-rw-r--r--compiler/circledump/README.md71
-rw-r--r--compiler/circledump/driver/Driver.cpp52
-rw-r--r--compiler/circledump/include/circledump/Dump.h32
-rw-r--r--compiler/circledump/include/circleread/Model.h43
-rw-r--r--compiler/circledump/requires.cmake3
-rw-r--r--compiler/circledump/src/Dump.cpp310
-rw-r--r--compiler/circledump/src/Load.cpp133
-rw-r--r--compiler/circledump/src/OpPrinter.cpp307
-rw-r--r--compiler/circledump/src/OpPrinter.h61
-rw-r--r--compiler/circledump/src/Read.cpp169
-rw-r--r--compiler/circledump/src/Read.h101
-rw-r--r--compiler/cli/CMakeLists.txt15
-rw-r--r--compiler/cli/README.md13
-rw-r--r--compiler/cli/include/cli/App.h50
-rw-r--r--compiler/cli/include/cli/Command.h32
-rw-r--r--compiler/cli/include/cli/FunctionCommand.h46
-rw-r--r--compiler/cli/src/App.cpp74
-rw-r--r--compiler/cli/src/App.test.cpp63
-rw-r--r--compiler/coco/CMakeLists.txt2
-rw-r--r--compiler/coco/README.md3
-rw-r--r--compiler/coco/core/CMakeLists.txt25
-rw-r--r--compiler/coco/core/include/coco/ADT/DLinkedList.h288
-rw-r--r--compiler/coco/core/include/coco/ADT/PtrList.h54
-rw-r--r--compiler/coco/core/include/coco/ADT/PtrManager.h67
-rw-r--r--compiler/coco/core/include/coco/IR.h34
-rw-r--r--compiler/coco/core/include/coco/IR/Arg.h80
-rw-r--r--compiler/coco/core/include/coco/IR/Bag.h164
-rw-r--r--compiler/coco/core/include/coco/IR/BagManager.h47
-rw-r--r--compiler/coco/core/include/coco/IR/Block.forward.h27
-rw-r--r--compiler/coco/core/include/coco/IR/Block.h78
-rw-r--r--compiler/coco/core/include/coco/IR/BlockIndex.h63
-rw-r--r--compiler/coco/core/include/coco/IR/BlockManager.h47
-rw-r--r--compiler/coco/core/include/coco/IR/Def.forward.h27
-rw-r--r--compiler/coco/core/include/coco/IR/Def.h52
-rw-r--r--compiler/coco/core/include/coco/IR/Dep.forward.h27
-rw-r--r--compiler/coco/core/include/coco/IR/Dep.h59
-rw-r--r--compiler/coco/core/include/coco/IR/DepSet.h31
-rw-r--r--compiler/coco/core/include/coco/IR/ElemID.h51
-rw-r--r--compiler/coco/core/include/coco/IR/Entity.h51
-rw-r--r--compiler/coco/core/include/coco/IR/EntityBuilder.h48
-rw-r--r--compiler/coco/core/include/coco/IR/EntityManager.h67
-rw-r--r--compiler/coco/core/include/coco/IR/FeatureLayout.h54
-rw-r--r--compiler/coco/core/include/coco/IR/FeatureLayouts.h159
-rw-r--r--compiler/coco/core/include/coco/IR/FeatureObject.forward.h27
-rw-r--r--compiler/coco/core/include/coco/IR/FeatureObject.h63
-rw-r--r--compiler/coco/core/include/coco/IR/FeatureShape.h70
-rw-r--r--compiler/coco/core/include/coco/IR/Input.forward.h27
-rw-r--r--compiler/coco/core/include/coco/IR/Input.h44
-rw-r--r--compiler/coco/core/include/coco/IR/InputList.h31
-rw-r--r--compiler/coco/core/include/coco/IR/InputManager.h39
-rw-r--r--compiler/coco/core/include/coco/IR/Instr.forward.h28
-rw-r--r--compiler/coco/core/include/coco/IR/Instr.h161
-rw-r--r--compiler/coco/core/include/coco/IR/Instr.lst9
-rw-r--r--compiler/coco/core/include/coco/IR/InstrIndex.h63
-rw-r--r--compiler/coco/core/include/coco/IR/InstrManager.h66
-rw-r--r--compiler/coco/core/include/coco/IR/Instrs.h175
-rw-r--r--compiler/coco/core/include/coco/IR/KernelLayout.h58
-rw-r--r--compiler/coco/core/include/coco/IR/KernelLayouts.h117
-rw-r--r--compiler/coco/core/include/coco/IR/KernelObject.forward.h27
-rw-r--r--compiler/coco/core/include/coco/IR/KernelObject.h65
-rw-r--r--compiler/coco/core/include/coco/IR/Locatable.h37
-rw-r--r--compiler/coco/core/include/coco/IR/Module.forward.h27
-rw-r--r--compiler/coco/core/include/coco/IR/Module.h67
-rw-r--r--compiler/coco/core/include/coco/IR/Object.forward.h27
-rw-r--r--compiler/coco/core/include/coco/IR/Object.h144
-rw-r--r--compiler/coco/core/include/coco/IR/ObjectManager.h53
-rw-r--r--compiler/coco/core/include/coco/IR/ObjectSet.h31
-rw-r--r--compiler/coco/core/include/coco/IR/Op.forward.h27
-rw-r--r--compiler/coco/core/include/coco/IR/Op.h255
-rw-r--r--compiler/coco/core/include/coco/IR/Op.lst19
-rw-r--r--compiler/coco/core/include/coco/IR/OpManager.h63
-rw-r--r--compiler/coco/core/include/coco/IR/Ops.h412
-rw-r--r--compiler/coco/core/include/coco/IR/Output.forward.h27
-rw-r--r--compiler/coco/core/include/coco/IR/Output.h44
-rw-r--r--compiler/coco/core/include/coco/IR/OutputList.h31
-rw-r--r--compiler/coco/core/include/coco/IR/OutputManager.h39
-rw-r--r--compiler/coco/core/include/coco/IR/Padding2D.h65
-rw-r--r--compiler/coco/core/include/coco/IR/Part.forward.h27
-rw-r--r--compiler/coco/core/include/coco/IR/Part.h53
-rw-r--r--compiler/coco/core/include/coco/IR/Read.forward.h27
-rw-r--r--compiler/coco/core/include/coco/IR/Read.h55
-rw-r--r--compiler/coco/core/include/coco/IR/ReadSet.h31
-rw-r--r--compiler/coco/core/include/coco/IR/Step.forward.h27
-rw-r--r--compiler/coco/core/include/coco/IR/Step.h54
-rw-r--r--compiler/coco/core/include/coco/IR/Stride2D.h54
-rw-r--r--compiler/coco/core/include/coco/IR/Update.forward.h27
-rw-r--r--compiler/coco/core/include/coco/IR/Update.h51
-rw-r--r--compiler/coco/core/include/coco/IR/UpdateSet.h31
-rw-r--r--compiler/coco/core/include/coco/IR/Use.forward.h27
-rw-r--r--compiler/coco/core/include/coco/IR/Use.h52
-rw-r--r--compiler/coco/core/include/coco/IR/UseSet.h31
-rw-r--r--compiler/coco/core/include/coco/IR/Window2D.h55
-rw-r--r--compiler/coco/core/src/ADT/DLinkedList.test.cpp281
-rw-r--r--compiler/coco/core/src/ADT/PtrList.cpp19
-rw-r--r--compiler/coco/core/src/ADT/PtrList.test.cpp47
-rw-r--r--compiler/coco/core/src/ADT/PtrManager.test.cpp99
-rw-r--r--compiler/coco/core/src/IR.test.cpp303
-rw-r--r--compiler/coco/core/src/IR/Arg.cpp78
-rw-r--r--compiler/coco/core/src/IR/Arg.test.cpp100
-rw-r--r--compiler/coco/core/src/IR/AvgPool2D.test.cpp113
-rw-r--r--compiler/coco/core/src/IR/Bag.cpp147
-rw-r--r--compiler/coco/core/src/IR/Bag.test.cpp30
-rw-r--r--compiler/coco/core/src/IR/BagManager.cpp33
-rw-r--r--compiler/coco/core/src/IR/BagManager.test.cpp38
-rw-r--r--compiler/coco/core/src/IR/Block.cpp56
-rw-r--r--compiler/coco/core/src/IR/Block.test.cpp28
-rw-r--r--compiler/coco/core/src/IR/BlockIndex.cpp30
-rw-r--r--compiler/coco/core/src/IR/BlockIndex.test.cpp50
-rw-r--r--compiler/coco/core/src/IR/BlockManager.cpp41
-rw-r--r--compiler/coco/core/src/IR/BlockManager.test.cpp60
-rw-r--r--compiler/coco/core/src/IR/Consumer.mock.h33
-rw-r--r--compiler/coco/core/src/IR/Conv2D.cpp75
-rw-r--r--compiler/coco/core/src/IR/Conv2D.test.cpp154
-rw-r--r--compiler/coco/core/src/IR/Def.cpp43
-rw-r--r--compiler/coco/core/src/IR/Def.test.cpp82
-rw-r--r--compiler/coco/core/src/IR/Dep.cpp53
-rw-r--r--compiler/coco/core/src/IR/Dep.test.cpp73
-rw-r--r--compiler/coco/core/src/IR/ElemID.cpp25
-rw-r--r--compiler/coco/core/src/IR/ElemID.test.cpp62
-rw-r--r--compiler/coco/core/src/IR/EntityManager.cpp20
-rw-r--r--compiler/coco/core/src/IR/Eval.cpp28
-rw-r--r--compiler/coco/core/src/IR/Eval.test.cpp60
-rw-r--r--compiler/coco/core/src/IR/FeatureLayouts.cpp211
-rw-r--r--compiler/coco/core/src/IR/FeatureLayouts.test.cpp66
-rw-r--r--compiler/coco/core/src/IR/FeatureObject.cpp31
-rw-r--r--compiler/coco/core/src/IR/FeatureObject.test.cpp122
-rw-r--r--compiler/coco/core/src/IR/FeatureShape.test.cpp29
-rw-r--r--compiler/coco/core/src/IR/Input.cpp41
-rw-r--r--compiler/coco/core/src/IR/Input.test.cpp79
-rw-r--r--compiler/coco/core/src/IR/InputManager.cpp31
-rw-r--r--compiler/coco/core/src/IR/InputManager.test.cpp29
-rw-r--r--compiler/coco/core/src/IR/Instr.cpp56
-rw-r--r--compiler/coco/core/src/IR/InstrIndex.cpp30
-rw-r--r--compiler/coco/core/src/IR/InstrIndex.test.cpp50
-rw-r--r--compiler/coco/core/src/IR/InstrManager.cpp33
-rw-r--r--compiler/coco/core/src/IR/InstrManager.test.cpp52
-rw-r--r--compiler/coco/core/src/IR/KernelLayouts.cpp155
-rw-r--r--compiler/coco/core/src/IR/KernelLayouts.test.cpp126
-rw-r--r--compiler/coco/core/src/IR/KernelObject.cpp42
-rw-r--r--compiler/coco/core/src/IR/KernelObject.test.cpp78
-rw-r--r--compiler/coco/core/src/IR/Load.cpp53
-rw-r--r--compiler/coco/core/src/IR/MaxPool2D.test.cpp101
-rw-r--r--compiler/coco/core/src/IR/Module.cpp150
-rw-r--r--compiler/coco/core/src/IR/Module.test.cpp196
-rw-r--r--compiler/coco/core/src/IR/Object.cpp116
-rw-r--r--compiler/coco/core/src/IR/Object.test.cpp110
-rw-r--r--compiler/coco/core/src/IR/ObjectManager.cpp52
-rw-r--r--compiler/coco/core/src/IR/ObjectManager.test.cpp57
-rw-r--r--compiler/coco/core/src/IR/Op.cpp153
-rw-r--r--compiler/coco/core/src/IR/OpManager.cpp99
-rw-r--r--compiler/coco/core/src/IR/OpManager.test.cpp120
-rw-r--r--compiler/coco/core/src/IR/Ops.cpp22
-rw-r--r--compiler/coco/core/src/IR/Ops.test.cpp129
-rw-r--r--compiler/coco/core/src/IR/Output.cpp41
-rw-r--r--compiler/coco/core/src/IR/Output.test.cpp83
-rw-r--r--compiler/coco/core/src/IR/OutputManager.cpp31
-rw-r--r--compiler/coco/core/src/IR/OutputManager.test.cpp29
-rw-r--r--compiler/coco/core/src/IR/PadF.test.cpp89
-rw-r--r--compiler/coco/core/src/IR/Padding2D.cpp46
-rw-r--r--compiler/coco/core/src/IR/Padding2D.test.cpp51
-rw-r--r--compiler/coco/core/src/IR/Part.cpp45
-rw-r--r--compiler/coco/core/src/IR/Part.test.cpp70
-rw-r--r--compiler/coco/core/src/IR/Producer.mock.h33
-rw-r--r--compiler/coco/core/src/IR/ReLU.test.cpp85
-rw-r--r--compiler/coco/core/src/IR/ReLU6.test.cpp85
-rw-r--r--compiler/coco/core/src/IR/Read.cpp49
-rw-r--r--compiler/coco/core/src/IR/Read.test.cpp81
-rw-r--r--compiler/coco/core/src/IR/Reader.mock.h33
-rw-r--r--compiler/coco/core/src/IR/Shuffle.cpp41
-rw-r--r--compiler/coco/core/src/IR/Shuffle.test.cpp95
-rw-r--r--compiler/coco/core/src/IR/Sqrt.test.cpp85
-rw-r--r--compiler/coco/core/src/IR/Step.cpp52
-rw-r--r--compiler/coco/core/src/IR/Stride2D.cpp34
-rw-r--r--compiler/coco/core/src/IR/Stride2D.test.cpp45
-rw-r--r--compiler/coco/core/src/IR/Sub.test.cpp87
-rw-r--r--compiler/coco/core/src/IR/Update.cpp49
-rw-r--r--compiler/coco/core/src/IR/Update.test.cpp81
-rw-r--r--compiler/coco/core/src/IR/Updater.mock.h33
-rw-r--r--compiler/coco/core/src/IR/Use.cpp43
-rw-r--r--compiler/coco/core/src/IR/Use.test.cpp86
-rw-r--r--compiler/coco/core/src/IR/Window2D.test.cpp46
-rw-r--r--compiler/coco/generic/CMakeLists.txt22
-rw-r--r--compiler/coco/generic/include/coco/ADT/Span.h67
-rw-r--r--compiler/coco/generic/include/coco/IR/Data.h54
-rw-r--r--compiler/coco/generic/include/coco/IR/PlainWeightContext.h66
-rw-r--r--compiler/coco/generic/src/ADT/Span.test.cpp60
-rw-r--r--compiler/coco/generic/src/IR/Data.cpp217
-rw-r--r--compiler/coco/generic/src/IR/Data.test.cpp64
-rw-r--r--compiler/coco/requires.cmake1
-rw-r--r--compiler/cwrap/CMakeLists.txt22
-rw-r--r--compiler/cwrap/README.md23
-rw-r--r--compiler/cwrap/include/cwrap/Fildes.h57
-rw-r--r--compiler/cwrap/src/Fildes.cpp100
-rw-r--r--compiler/cwrap/src/Fildes.test.cpp97
-rw-r--r--compiler/dredd-rule-lib/CMakeLists.txt21
-rw-r--r--compiler/dredd-rule-lib/README.md112
-rwxr-xr-xcompiler/dredd-rule-lib/rule-lib.sh203
-rw-r--r--compiler/enco-intf/CMakeLists.txt2
-rw-r--r--compiler/enco-intf/cmdline/CMakeLists.txt2
-rw-r--r--compiler/enco-intf/cmdline/include/cmdline/View.h35
-rw-r--r--compiler/enco-intf/frontend/CMakeLists.txt4
-rw-r--r--compiler/enco-intf/frontend/include/enco/Bundle.h48
-rw-r--r--compiler/enco-intf/frontend/include/enco/Frontend.h34
-rw-r--r--compiler/enco/CMakeLists.txt4
-rw-r--r--compiler/enco/README.md25
-rw-r--r--compiler/enco/cli/CMakeLists.txt11
-rw-r--r--compiler/enco/cli/src/Driver.cpp221
-rw-r--r--compiler/enco/core/CMakeLists.txt35
-rw-r--r--compiler/enco/core/include/enco/Backend.h41
-rw-r--r--compiler/enco/core/src/ANN/Binder.h219
-rw-r--r--compiler/enco/core/src/ANN/Context.cpp31
-rw-r--r--compiler/enco/core/src/ANN/Context.h57
-rw-r--r--compiler/enco/core/src/ANN/Context.test.cpp73
-rw-r--r--compiler/enco/core/src/ANN/IR/DType.cpp25
-rw-r--r--compiler/enco/core/src/ANN/IR/DType.h36
-rw-r--r--compiler/enco/core/src/ANN/IR/DType.test.cpp25
-rw-r--r--compiler/enco/core/src/ANN/IR/InputList.h31
-rw-r--r--compiler/enco/core/src/ANN/IR/Module.h60
-rw-r--r--compiler/enco/core/src/ANN/IR/Module.test.cpp36
-rw-r--r--compiler/enco/core/src/ANN/IR/Operand.h82
-rw-r--r--compiler/enco/core/src/ANN/IR/Operand.test.cpp37
-rw-r--r--compiler/enco/core/src/ANN/IR/OperandID.h48
-rw-r--r--compiler/enco/core/src/ANN/IR/OperandID.test.cpp33
-rw-r--r--compiler/enco/core/src/ANN/IR/OperandInventory.cpp57
-rw-r--r--compiler/enco/core/src/ANN/IR/OperandInventory.h56
-rw-r--r--compiler/enco/core/src/ANN/IR/OperandInventory.test.cpp30
-rw-r--r--compiler/enco/core/src/ANN/IR/Operation.def17
-rw-r--r--compiler/enco/core/src/ANN/IR/Operation.h59
-rw-r--r--compiler/enco/core/src/ANN/IR/Operation.test.cpp28
-rw-r--r--compiler/enco/core/src/ANN/IR/OperationInventory.cpp32
-rw-r--r--compiler/enco/core/src/ANN/IR/OperationInventory.h48
-rw-r--r--compiler/enco/core/src/ANN/IR/OperationInventory.test.cpp40
-rw-r--r--compiler/enco/core/src/ANN/IR/OutputList.h31
-rw-r--r--compiler/enco/core/src/ANN/IR/Weight.h70
-rw-r--r--compiler/enco/core/src/ANN/IR/Weight.test.cpp53
-rw-r--r--compiler/enco/core/src/ANN/IR/WeightInventory.cpp34
-rw-r--r--compiler/enco/core/src/ANN/IR/WeightInventory.h38
-rw-r--r--compiler/enco/core/src/ANN/IR/WeightInventory.test.cpp29
-rw-r--r--compiler/enco/core/src/AsmCode.cpp33
-rw-r--r--compiler/enco/core/src/AsmCode.h51
-rw-r--r--compiler/enco/core/src/Backend.cpp178
-rw-r--r--compiler/enco/core/src/Code.h47
-rw-r--r--compiler/enco/core/src/Code.test.cpp30
-rw-r--r--compiler/enco/core/src/CodeIndex.h76
-rw-r--r--compiler/enco/core/src/CppCode.cpp553
-rw-r--r--compiler/enco/core/src/CppCode.h51
-rw-r--r--compiler/enco/core/src/CppGen/Host.cpp306
-rw-r--r--compiler/enco/core/src/CppGen/Host.h48
-rw-r--r--compiler/enco/core/src/CppGen/MemoryContext.cpp40
-rw-r--r--compiler/enco/core/src/CppGen/MemoryContext.h55
-rw-r--r--compiler/enco/core/src/CppGen/Subnet.cpp422
-rw-r--r--compiler/enco/core/src/CppGen/Subnet.h91
-rw-r--r--compiler/enco/core/src/Dims.h34
-rw-r--r--compiler/enco/core/src/IRUtils.cpp65
-rw-r--r--compiler/enco/core/src/IRUtils.h41
-rw-r--r--compiler/enco/core/src/IRValidator.cpp85
-rw-r--r--compiler/enco/core/src/IRValidator.h29
-rw-r--r--compiler/enco/core/src/IRValidator.test.cpp130
-rw-r--r--compiler/enco/core/src/Pass.h78
-rw-r--r--compiler/enco/core/src/Pass.test.cpp41
-rw-r--r--compiler/enco/core/src/Pipeline.h46
-rw-r--r--compiler/enco/core/src/Pipeline.test.cpp26
-rw-r--r--compiler/enco/core/src/Session.cpp58
-rw-r--r--compiler/enco/core/src/Session.h45
-rw-r--r--compiler/enco/core/src/String.h57
-rw-r--r--compiler/enco/core/src/Support/Debugging.cpp533
-rw-r--r--compiler/enco/core/src/Support/Debugging.h110
-rw-r--r--compiler/enco/core/src/Support/Debugging.test.cpp26
-rw-r--r--compiler/enco/core/src/Transforms/AvgPoolLowering.cpp229
-rw-r--r--compiler/enco/core/src/Transforms/AvgPoolLowering.h43
-rw-r--r--compiler/enco/core/src/Transforms/ConcatLowering.cpp196
-rw-r--r--compiler/enco/core/src/Transforms/ConcatLowering.h43
-rw-r--r--compiler/enco/core/src/Transforms/ConstantFolding.cpp442
-rw-r--r--compiler/enco/core/src/Transforms/ConstantFolding.h43
-rw-r--r--compiler/enco/core/src/Transforms/ConstantFolding.test.cpp327
-rw-r--r--compiler/enco/core/src/Transforms/CopyLowering.cpp105
-rw-r--r--compiler/enco/core/src/Transforms/CopyLowering.h43
-rw-r--r--compiler/enco/core/src/Transforms/DataLayoutConversion.cpp383
-rw-r--r--compiler/enco/core/src/Transforms/DataLayoutConversion.h43
-rw-r--r--compiler/enco/core/src/Transforms/DataLayoutConversion.test.cpp33
-rw-r--r--compiler/enco/core/src/Transforms/DeadBagElimination.cpp72
-rw-r--r--compiler/enco/core/src/Transforms/DeadBagElimination.h48
-rw-r--r--compiler/enco/core/src/Transforms/DeadObjectElimination.cpp77
-rw-r--r--compiler/enco/core/src/Transforms/DeadObjectElimination.h47
-rw-r--r--compiler/enco/core/src/Transforms/Duplicate.cpp135
-rw-r--r--compiler/enco/core/src/Transforms/Duplicate.h43
-rw-r--r--compiler/enco/core/src/Transforms/DuplicatedObjectReduction.cpp119
-rw-r--r--compiler/enco/core/src/Transforms/DuplicatedObjectReduction.h73
-rw-r--r--compiler/enco/core/src/Transforms/FeatureUnification.cpp216
-rw-r--r--compiler/enco/core/src/Transforms/FeatureUnification.h68
-rw-r--r--compiler/enco/core/src/Transforms/FreeInstrElimination.cpp65
-rw-r--r--compiler/enco/core/src/Transforms/FreeInstrElimination.h54
-rw-r--r--compiler/enco/core/src/Transforms/FreeInstrElimination.test.cpp34
-rw-r--r--compiler/enco/core/src/Transforms/FreeOpElimination.cpp59
-rw-r--r--compiler/enco/core/src/Transforms/FreeOpElimination.h54
-rw-r--r--compiler/enco/core/src/Transforms/FreeOpElimination.test.cpp34
-rw-r--r--compiler/enco/core/src/Transforms/GlobalDataGeneration.cpp181
-rw-r--r--compiler/enco/core/src/Transforms/GlobalDataGeneration.h54
-rw-r--r--compiler/enco/core/src/Transforms/IdenticalObjectReduction.cpp139
-rw-r--r--compiler/enco/core/src/Transforms/IdenticalObjectReduction.h69
-rw-r--r--compiler/enco/core/src/Transforms/IdenticalObjectReduction.test.cpp32
-rw-r--r--compiler/enco/core/src/Transforms/IndirectCopyElimination.cpp84
-rw-r--r--compiler/enco/core/src/Transforms/IndirectCopyElimination.h60
-rw-r--r--compiler/enco/core/src/Transforms/IntrinsicSelection.cpp100
-rw-r--r--compiler/enco/core/src/Transforms/IntrinsicSelection.h47
-rw-r--r--compiler/enco/core/src/Transforms/Optimizations.cpp257
-rw-r--r--compiler/enco/core/src/Transforms/Optimizations.h123
-rw-r--r--compiler/enco/core/src/Transforms/Split.cpp1233
-rw-r--r--compiler/enco/core/src/Transforms/Split.h48
-rw-r--r--compiler/enco/core/src/Usage.cpp58
-rw-r--r--compiler/enco/core/src/Usage.h34
-rw-r--r--compiler/enco/core/src/coex/IR.h109
-rw-r--r--compiler/enco/core/src/coex/IR.test.cpp38
-rw-r--r--compiler/enco/frontend/CMakeLists.txt (renamed from runtime/neurun/frontend/CMakeLists.txt)0
-rw-r--r--compiler/enco/frontend/caffe/CMakeLists.txt39
-rw-r--r--compiler/enco/frontend/caffe/src/ConcatSpec.cpp40
-rw-r--r--compiler/enco/frontend/caffe/src/ConcatSpec.h47
-rw-r--r--compiler/enco/frontend/caffe/src/ConcatSpec.test.cpp42
-rw-r--r--compiler/enco/frontend/caffe/src/Context.cpp21
-rw-r--r--compiler/enco/frontend/caffe/src/Context.h112
-rw-r--r--compiler/enco/frontend/caffe/src/Convert.cpp40
-rw-r--r--compiler/enco/frontend/caffe/src/Convert.h36
-rw-r--r--compiler/enco/frontend/caffe/src/ConvolutionSpec.cpp147
-rw-r--r--compiler/enco/frontend/caffe/src/ConvolutionSpec.h59
-rw-r--r--compiler/enco/frontend/caffe/src/ConvolutionSpec.test.cpp405
-rw-r--r--compiler/enco/frontend/caffe/src/Entry.cpp62
-rw-r--r--compiler/enco/frontend/caffe/src/Frontend.cpp135
-rw-r--r--compiler/enco/frontend/caffe/src/Frontend.h43
-rw-r--r--compiler/enco/frontend/caffe/src/GraphBuilder.cpp21
-rw-r--r--compiler/enco/frontend/caffe/src/GraphBuilder.h36
-rw-r--r--compiler/enco/frontend/caffe/src/GraphBuilderRegistry.cpp47
-rw-r--r--compiler/enco/frontend/caffe/src/GraphBuilderRegistry.h54
-rw-r--r--compiler/enco/frontend/caffe/src/IRBuilder.h180
-rw-r--r--compiler/enco/frontend/caffe/src/Importer.cpp52
-rw-r--r--compiler/enco/frontend/caffe/src/Importer.h29
-rw-r--r--compiler/enco/frontend/caffe/src/Layer/BatchNorm.cpp254
-rw-r--r--compiler/enco/frontend/caffe/src/Layer/BatchNorm.h35
-rw-r--r--compiler/enco/frontend/caffe/src/Layer/Concatenation.cpp138
-rw-r--r--compiler/enco/frontend/caffe/src/Layer/Concatenation.h35
-rw-r--r--compiler/enco/frontend/caffe/src/Layer/Convolution.cpp197
-rw-r--r--compiler/enco/frontend/caffe/src/Layer/Convolution.h35
-rw-r--r--compiler/enco/frontend/caffe/src/Layer/Eltwise.cpp134
-rw-r--r--compiler/enco/frontend/caffe/src/Layer/Eltwise.h35
-rw-r--r--compiler/enco/frontend/caffe/src/Layer/Input.cpp60
-rw-r--r--compiler/enco/frontend/caffe/src/Layer/Input.h35
-rw-r--r--compiler/enco/frontend/caffe/src/Layer/Pooling.cpp138
-rw-r--r--compiler/enco/frontend/caffe/src/Layer/Pooling.h35
-rw-r--r--compiler/enco/frontend/caffe/src/Layer/ReLU.cpp83
-rw-r--r--compiler/enco/frontend/caffe/src/Layer/ReLU.h35
-rw-r--r--compiler/enco/frontend/caffe/src/Layer/Scale.cpp160
-rw-r--r--compiler/enco/frontend/caffe/src/Layer/Scale.h35
-rw-r--r--compiler/enco/frontend/caffe/src/Padding.h69
-rw-r--r--compiler/enco/frontend/caffe/src/Padding.test.cpp48
-rw-r--r--compiler/enco/frontend/caffe/src/PaddingUtils.cpp131
-rw-r--r--compiler/enco/frontend/caffe/src/PaddingUtils.h81
-rw-r--r--compiler/enco/frontend/caffe/src/PoolingSpec.cpp148
-rw-r--r--compiler/enco/frontend/caffe/src/PoolingSpec.h62
-rw-r--r--compiler/enco/frontend/caffe/src/PoolingSpec.test.cpp294
-rw-r--r--compiler/enco/frontend/caffe/src/ShapeQuery.cpp40
-rw-r--r--compiler/enco/frontend/caffe/src/ShapeQuery.h75
-rw-r--r--compiler/enco/frontend/tflite/CMakeLists.txt36
-rw-r--r--compiler/enco/frontend/tflite/schema/schema.fbs734
-rw-r--r--compiler/enco/frontend/tflite/schema/schema.meta2
-rw-r--r--compiler/enco/frontend/tflite/src/Context.cpp116
-rw-r--r--compiler/enco/frontend/tflite/src/Context.h169
-rw-r--r--compiler/enco/frontend/tflite/src/Convert.cpp57
-rw-r--r--compiler/enco/frontend/tflite/src/Convert.h43
-rw-r--r--compiler/enco/frontend/tflite/src/Entry.cpp36
-rw-r--r--compiler/enco/frontend/tflite/src/Frontend.cpp198
-rw-r--r--compiler/enco/frontend/tflite/src/Frontend.h40
-rw-r--r--compiler/enco/frontend/tflite/src/Frontend.test.cpp41
-rw-r--r--compiler/enco/frontend/tflite/src/GraphBuilder.h46
-rw-r--r--compiler/enco/frontend/tflite/src/GraphBuilderRegistry.h88
-rw-r--r--compiler/enco/frontend/tflite/src/IRBuilder.h178
-rw-r--r--compiler/enco/frontend/tflite/src/Op/Activation.cpp96
-rw-r--r--compiler/enco/frontend/tflite/src/Op/Activation.h37
-rw-r--r--compiler/enco/frontend/tflite/src/Op/AveragePool2D.cpp126
-rw-r--r--compiler/enco/frontend/tflite/src/Op/AveragePool2D.h39
-rw-r--r--compiler/enco/frontend/tflite/src/Op/Concatenation.cpp252
-rw-r--r--compiler/enco/frontend/tflite/src/Op/Concatenation.h38
-rw-r--r--compiler/enco/frontend/tflite/src/Op/Conv2D.cpp181
-rw-r--r--compiler/enco/frontend/tflite/src/Op/Conv2D.h39
-rw-r--r--compiler/enco/frontend/tflite/src/Op/DepthwiseConv2D.cpp230
-rw-r--r--compiler/enco/frontend/tflite/src/Op/DepthwiseConv2D.h39
-rw-r--r--compiler/enco/frontend/tflite/src/Op/Div.cpp116
-rw-r--r--compiler/enco/frontend/tflite/src/Op/Div.h38
-rw-r--r--compiler/enco/frontend/tflite/src/Op/MaxPool2D.cpp123
-rw-r--r--compiler/enco/frontend/tflite/src/Op/MaxPool2D.h39
-rw-r--r--compiler/enco/frontend/tflite/src/Op/Padding.cpp105
-rw-r--r--compiler/enco/frontend/tflite/src/Op/Padding.h42
-rw-r--r--compiler/enco/frontend/tflite/src/Op/ReLU.cpp89
-rw-r--r--compiler/enco/frontend/tflite/src/Op/ReLU.h38
-rw-r--r--compiler/enco/frontend/tflite/src/Op/ReLU6.cpp89
-rw-r--r--compiler/enco/frontend/tflite/src/Op/ReLU6.h38
-rw-r--r--compiler/enco/frontend/tflite/src/Op/Reshape.cpp89
-rw-r--r--compiler/enco/frontend/tflite/src/Op/Reshape.h38
-rw-r--r--compiler/enco/frontend/tflite/src/Op/Sub.cpp112
-rw-r--r--compiler/enco/frontend/tflite/src/Op/Sub.h38
-rw-r--r--compiler/enco/frontend/tflite/src/RawModel.h29
-rw-r--r--compiler/enco/frontend/tflite/src/RawModelLoader.cpp89
-rw-r--r--compiler/enco/frontend/tflite/src/RawModelLoader.h29
-rw-r--r--compiler/enco/frontend/tflite/src/TensorBags.h65
-rw-r--r--compiler/enco/requires.cmake8
-rw-r--r--compiler/enco/test/CMakeLists.txt1
-rw-r--r--compiler/enco/test/basic/000/CMakeLists.txt26
-rw-r--r--compiler/enco/test/basic/000/enco.test.cpp81
-rw-r--r--compiler/enco/test/basic/CMakeLists.txt1
-rw-r--r--compiler/enco/test/binder.cpp188
-rw-r--r--compiler/enco/test/caffe/CMakeLists.txt141
-rwxr-xr-xcompiler/enco/test/caffe/runall.sh85
-rw-r--r--compiler/enco/test/tflite/AveragePool2D_000/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/AveragePool2D_000/test.recipe24
-rw-r--r--compiler/enco/test/tflite/AveragePool2D_001/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/AveragePool2D_001/test.recipe24
-rw-r--r--compiler/enco/test/tflite/CMakeLists.txt108
-rw-r--r--compiler/enco/test/tflite/Concat_000/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/Concat_000/test.recipe28
-rw-r--r--compiler/enco/test/tflite/Concat_001/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/Concat_001/test.recipe29
-rw-r--r--compiler/enco/test/tflite/Concat_002/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/Concat_002/test.recipe29
-rw-r--r--compiler/enco/test/tflite/Concat_003/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/Concat_003/test.recipe29
-rw-r--r--compiler/enco/test/tflite/Conv2D_000/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/Conv2D_000/test.recipe45
-rw-r--r--compiler/enco/test/tflite/Conv2D_001/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/Conv2D_001/test.recipe45
-rw-r--r--compiler/enco/test/tflite/Conv2D_002/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/Conv2D_002/test.recipe46
-rw-r--r--compiler/enco/test/tflite/Conv2D_003/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/Conv2D_003/test.recipe45
-rw-r--r--compiler/enco/test/tflite/Conv2D_004/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/Conv2D_004/test.recipe45
-rw-r--r--compiler/enco/test/tflite/DepthwiseConv2D_000/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/DepthwiseConv2D_000/test.recipe48
-rw-r--r--compiler/enco/test/tflite/DepthwiseConv2D_001/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/DepthwiseConv2D_001/test.recipe46
-rw-r--r--compiler/enco/test/tflite/Div_000/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/Div_000/test.recipe27
-rw-r--r--compiler/enco/test/tflite/MaxPool2D_000/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/MaxPool2D_000/test.recipe24
-rw-r--r--compiler/enco/test/tflite/ReLU6_000/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/ReLU6_000/test.recipe17
-rw-r--r--compiler/enco/test/tflite/ReLU_000/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/ReLU_000/test.recipe17
-rw-r--r--compiler/enco/test/tflite/Regression_0000/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/Regression_0000/test.recipe84
-rw-r--r--compiler/enco/test/tflite/Regression_0001/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/Regression_0001/test.recipe50
-rw-r--r--compiler/enco/test/tflite/Regression_0002/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/Regression_0002/test.recipe45
-rw-r--r--compiler/enco/test/tflite/Regression_0003/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/Regression_0003/test.recipe33
-rw-r--r--compiler/enco/test/tflite/Regression_0004/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/Regression_0004/test.recipe27
-rw-r--r--compiler/enco/test/tflite/Reshape_000/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/Reshape_000/test.recipe21
-rw-r--r--compiler/enco/test/tflite/Sub_000/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/Sub_000/test.recipe27
-rw-r--r--compiler/enco/test/tflite/empty/INFERENCE0
-rw-r--r--compiler/enco/test/tflite/empty/test.recipe0
-rwxr-xr-xcompiler/enco/test/tflite/runall.sh83
-rw-r--r--compiler/encodump/CMakeLists.txt17
-rw-r--r--compiler/encodump/README.md69
-rw-r--r--compiler/encodump/requires.cmake1
-rw-r--r--compiler/encodump/src/Driver.cpp207
-rw-r--r--compiler/encodump/src/Dump.cpp371
-rw-r--r--compiler/encodump/src/Dump.h24
-rw-r--r--compiler/exo/CMakeLists.txt73
-rw-r--r--compiler/exo/README.md12
-rw-r--r--compiler/exo/include/exo/CircleExporter.h62
-rw-r--r--compiler/exo/include/exo/LoggingContext.h35
-rw-r--r--compiler/exo/include/exo/TFLExporter.h62
-rw-r--r--compiler/exo/requires.cmake6
-rw-r--r--compiler/exo/src/Check.h37
-rw-r--r--compiler/exo/src/Circle/CircleExporter.cpp49
-rw-r--r--compiler/exo/src/Circle/CircleExporterImpl.cpp181
-rw-r--r--compiler/exo/src/Circle/CircleExporterImpl.h78
-rw-r--r--compiler/exo/src/Circle/CircleExporterUtils.cpp163
-rw-r--r--compiler/exo/src/Circle/CircleExporterUtils.h120
-rw-r--r--compiler/exo/src/Circle/CircleOperationExporter.cpp1228
-rw-r--r--compiler/exo/src/Circle/CircleOperationExporter.h39
-rw-r--r--compiler/exo/src/Circle/CircleTensorExporter.cpp261
-rw-r--r--compiler/exo/src/Circle/CircleTensorExporter.h42
-rw-r--r--compiler/exo/src/Circle/CircleTypeInference.cpp85
-rw-r--r--compiler/exo/src/Circle/CircleTypeInference.h45
-rw-r--r--compiler/exo/src/Conversion/AvgPool2DConverter.cpp79
-rw-r--r--compiler/exo/src/Conversion/AvgPool2DConverter.h41
-rw-r--r--compiler/exo/src/Conversion/CanonicalNodeConverter.cpp19
-rw-r--r--compiler/exo/src/Conversion/CanonicalNodeConverter.h71
-rw-r--r--compiler/exo/src/Conversion/ConstGenConverter.cpp60
-rw-r--r--compiler/exo/src/Conversion/ConstGenConverter.h38
-rw-r--r--compiler/exo/src/Conversion/ConstGenConverter.test.cpp65
-rw-r--r--compiler/exo/src/Conversion/Conv2DConverter.cpp97
-rw-r--r--compiler/exo/src/Conversion/Conv2DConverter.h41
-rw-r--r--compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp114
-rw-r--r--compiler/exo/src/Conversion/DepthwiseConv2DConverter.h61
-rw-r--r--compiler/exo/src/Conversion/EltwiseAddConverter.cpp29
-rw-r--r--compiler/exo/src/Conversion/EltwiseAddConverter.h41
-rw-r--r--compiler/exo/src/Conversion/EltwiseBinaryConverter.h110
-rw-r--r--compiler/exo/src/Conversion/EltwiseDivConverter.cpp29
-rw-r--r--compiler/exo/src/Conversion/EltwiseDivConverter.h41
-rw-r--r--compiler/exo/src/Conversion/EltwiseMaxConverter.cpp75
-rw-r--r--compiler/exo/src/Conversion/EltwiseMaxConverter.h41
-rw-r--r--compiler/exo/src/Conversion/EltwiseMulConverter.cpp29
-rw-r--r--compiler/exo/src/Conversion/EltwiseMulConverter.h41
-rw-r--r--compiler/exo/src/Conversion/EltwiseSqrtConverter.cpp68
-rw-r--r--compiler/exo/src/Conversion/EltwiseSqrtConverter.h41
-rw-r--r--compiler/exo/src/Conversion/EltwiseSubConverter.cpp29
-rw-r--r--compiler/exo/src/Conversion/EltwiseSubConverter.h41
-rw-r--r--compiler/exo/src/Conversion/FeatureBiasAddConverter.cpp91
-rw-r--r--compiler/exo/src/Conversion/FeatureBiasAddConverter.h38
-rw-r--r--compiler/exo/src/Conversion/FeatureBiasAddConverter.test.cpp102
-rw-r--r--compiler/exo/src/Conversion/MatMulConverter.cpp103
-rw-r--r--compiler/exo/src/Conversion/MatMulConverter.h41
-rw-r--r--compiler/exo/src/Conversion/MaxPool2DConverter.cpp67
-rw-r--r--compiler/exo/src/Conversion/MaxPool2DConverter.h41
-rw-r--r--compiler/exo/src/Conversion/Relu6Converter.cpp68
-rw-r--r--compiler/exo/src/Conversion/Relu6Converter.h41
-rw-r--r--compiler/exo/src/Conversion/ReluConverter.cpp68
-rw-r--r--compiler/exo/src/Conversion/ReluConverter.h41
-rw-r--r--compiler/exo/src/Conversion/ReluConverter.test.cpp97
-rw-r--r--compiler/exo/src/Conversion/TensorBroadcastConverter.cpp189
-rw-r--r--compiler/exo/src/Conversion/TensorBroadcastConverter.h40
-rw-r--r--compiler/exo/src/Conversion/TensorConcatConverter.cpp66
-rw-r--r--compiler/exo/src/Conversion/TensorConcatConverter.h41
-rw-r--r--compiler/exo/src/Conversion/TensorReduceConverter.cpp95
-rw-r--r--compiler/exo/src/Conversion/TensorReduceConverter.h46
-rw-r--r--compiler/exo/src/Conversion/TensorTransposeConverter.cpp102
-rw-r--r--compiler/exo/src/Conversion/TensorTransposeConverter.h41
-rw-r--r--compiler/exo/src/Conversion/TransposedConv2DConverter.cpp92
-rw-r--r--compiler/exo/src/Conversion/TransposedConv2DConverter.h62
-rw-r--r--compiler/exo/src/Conversions.h46
-rw-r--r--compiler/exo/src/Convert.cpp97
-rw-r--r--compiler/exo/src/Convert.h29
-rw-r--r--compiler/exo/src/Dialect/IR/CircleDialect.cpp28
-rw-r--r--compiler/exo/src/Dialect/IR/CircleDialect.h40
-rw-r--r--compiler/exo/src/Dialect/IR/CircleDialect.test.cpp31
-rw-r--r--compiler/exo/src/Dialect/IR/CircleNode.cpp26
-rw-r--r--compiler/exo/src/Dialect/IR/CircleNode.h23
-rw-r--r--compiler/exo/src/Dialect/IR/CircleNodeDecl.h50
-rw-r--r--compiler/exo/src/Dialect/IR/CircleNodeImpl.h70
-rw-r--r--compiler/exo/src/Dialect/IR/CircleNodeVisitor.forward.h30
-rw-r--r--compiler/exo/src/Dialect/IR/CircleNodeVisitor.h86
-rw-r--r--compiler/exo/src/Dialect/IR/CircleNodes.cpp18
-rw-r--r--compiler/exo/src/Dialect/IR/CircleNodes.h79
-rw-r--r--compiler/exo/src/Dialect/IR/CircleNodes.lst8
-rw-r--r--compiler/exo/src/Dialect/IR/CircleNodes.test.cpp36
-rw-r--r--compiler/exo/src/Dialect/IR/CircleOpcode.h32
-rw-r--r--compiler/exo/src/Dialect/IR/FusedActFunc.h35
-rw-r--r--compiler/exo/src/Dialect/IR/NodeMixins.cpp18
-rw-r--r--compiler/exo/src/Dialect/IR/NodeMixins.h66
-rw-r--r--compiler/exo/src/Dialect/IR/TFLDialect.cpp28
-rw-r--r--compiler/exo/src/Dialect/IR/TFLDialect.h40
-rw-r--r--compiler/exo/src/Dialect/IR/TFLDialect.test.cpp31
-rw-r--r--compiler/exo/src/Dialect/IR/TFLNode.cpp26
-rw-r--r--compiler/exo/src/Dialect/IR/TFLNode.h23
-rw-r--r--compiler/exo/src/Dialect/IR/TFLNodeDecl.h50
-rw-r--r--compiler/exo/src/Dialect/IR/TFLNodeImpl.h70
-rw-r--r--compiler/exo/src/Dialect/IR/TFLNodeVisitor.forward.h30
-rw-r--r--compiler/exo/src/Dialect/IR/TFLNodeVisitor.h86
-rw-r--r--compiler/exo/src/Dialect/IR/TFLNodes.cpp91
-rw-r--r--compiler/exo/src/Dialect/IR/TFLNodes.h551
-rw-r--r--compiler/exo/src/Dialect/IR/TFLNodes.lst30
-rw-r--r--compiler/exo/src/Dialect/IR/TFLNodes.test.cpp159
-rw-r--r--compiler/exo/src/Dialect/IR/TFLOpcode.h32
-rw-r--r--compiler/exo/src/Dialect/Service/CircleShapeInferenceRule.cpp67
-rw-r--r--compiler/exo/src/Dialect/Service/CircleShapeInferenceRule.h33
-rw-r--r--compiler/exo/src/Dialect/Service/CircleTypeInferenceRule.cpp58
-rw-r--r--compiler/exo/src/Dialect/Service/CircleTypeInferenceRule.h36
-rw-r--r--compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.cpp627
-rw-r--r--compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.h33
-rw-r--r--compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.test.cpp277
-rw-r--r--compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.cpp141
-rw-r--r--compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.h37
-rw-r--r--compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.test.cpp57
-rw-r--r--compiler/exo/src/ExoFormattedGraph.cpp525
-rw-r--r--compiler/exo/src/ExoFormattedGraph.h56
-rw-r--r--compiler/exo/src/ExoOptimize.cpp74
-rw-r--r--compiler/exo/src/ExoOptimize.h34
-rw-r--r--compiler/exo/src/ExporterUtils.cpp139
-rw-r--r--compiler/exo/src/ExporterUtils.h57
-rw-r--r--compiler/exo/src/GraphBlock.cpp243
-rw-r--r--compiler/exo/src/GraphBlock.h199
-rw-r--r--compiler/exo/src/Knob.cpp122
-rw-r--r--compiler/exo/src/Knob.h51
-rw-r--r--compiler/exo/src/Knob.lst11
-rw-r--r--compiler/exo/src/Log.cpp84
-rw-r--r--compiler/exo/src/Log.h75
-rw-r--r--compiler/exo/src/LogHelper.cpp79
-rw-r--r--compiler/exo/src/LogHelper.h70
-rw-r--r--compiler/exo/src/LoggingContext.cpp40
-rw-r--r--compiler/exo/src/Pass/FoldReshapeOfConstPass.cpp116
-rw-r--r--compiler/exo/src/Pass/FoldReshapeOfConstPass.h46
-rw-r--r--compiler/exo/src/Pass/FoldTransposeOfConstPass.cpp154
-rw-r--r--compiler/exo/src/Pass/FoldTransposeOfConstPass.h46
-rw-r--r--compiler/exo/src/Pass/FuseBiasAddPass.cpp362
-rw-r--r--compiler/exo/src/Pass/FuseBiasAddPass.h61
-rw-r--r--compiler/exo/src/Pass/FuseBiasAddPass.test.cpp361
-rw-r--r--compiler/exo/src/Pass/FuseInstanceNormPass.cpp402
-rw-r--r--compiler/exo/src/Pass/FuseInstanceNormPass.h40
-rw-r--r--compiler/exo/src/Pass/FuseReluPass.cpp115
-rw-r--r--compiler/exo/src/Pass/FuseReluPass.h40
-rw-r--r--compiler/exo/src/Pass/FuseReluPass.test.cpp115
-rw-r--r--compiler/exo/src/Pass/FuseRsqrtPass.cpp95
-rw-r--r--compiler/exo/src/Pass/FuseRsqrtPass.h47
-rw-r--r--compiler/exo/src/Pass/FuseSquaredDifferencePass.cpp86
-rw-r--r--compiler/exo/src/Pass/FuseSquaredDifferencePass.h49
-rw-r--r--compiler/exo/src/Pass/MergeConcatNodesPass.cpp191
-rw-r--r--compiler/exo/src/Pass/MergeConcatNodesPass.h41
-rw-r--r--compiler/exo/src/Pass/ShapeInferencePass.cpp59
-rw-r--r--compiler/exo/src/Pass/ShapeInferencePass.h40
-rw-r--r--compiler/exo/src/Pass/TypeInferencePass.cpp57
-rw-r--r--compiler/exo/src/Pass/TypeInferencePass.h42
-rw-r--r--compiler/exo/src/Passes.cpp19
-rw-r--r--compiler/exo/src/Passes.h38
-rw-r--r--compiler/exo/src/ProgressReporter.cpp84
-rw-r--r--compiler/exo/src/ProgressReporter.h53
-rw-r--r--compiler/exo/src/ShapeInference.cpp44
-rw-r--r--compiler/exo/src/ShapeInference.h41
-rw-r--r--compiler/exo/src/TFLite/TFLExporter.cpp49
-rw-r--r--compiler/exo/src/TFLite/TFLExporterImpl.cpp179
-rw-r--r--compiler/exo/src/TFLite/TFLExporterImpl.h78
-rw-r--r--compiler/exo/src/TFLite/TFLExporterImpl.test.cpp413
-rw-r--r--compiler/exo/src/TFLite/TFLExporterUtils.cpp160
-rw-r--r--compiler/exo/src/TFLite/TFLExporterUtils.h118
-rw-r--r--compiler/exo/src/TFLite/TFLExporterUtils.test.cpp108
-rw-r--r--compiler/exo/src/TFLite/TFLOperationExporter.cpp1199
-rw-r--r--compiler/exo/src/TFLite/TFLOperationExporter.h39
-rw-r--r--compiler/exo/src/TFLite/TFLTensorExporter.cpp249
-rw-r--r--compiler/exo/src/TFLite/TFLTensorExporter.h42
-rw-r--r--compiler/exo/src/TFLite/TFLTypeInference.cpp82
-rw-r--r--compiler/exo/src/TFLite/TFLTypeInference.h42
-rw-r--r--compiler/exo/src/TFLite/TFLTypeInference.test.cpp118
-rw-r--r--compiler/exo/src/TestGraph.h315
-rw-r--r--compiler/exo/src/TestHelper.h110
-rw-r--r--compiler/fipe/CMakeLists.txt11
-rw-r--r--compiler/fipe/fipe.test.cpp73
-rw-r--r--compiler/fipe/include/fipe.h40
-rw-r--r--compiler/gen-core/CMakeLists.txt17
-rw-r--r--compiler/gen-core/README.md3
-rw-r--r--compiler/gen-core/include/gencore/HDF5Common.h65
-rw-r--r--compiler/gen-core/include/gencore/HDF5Exporter.h52
-rw-r--r--compiler/gen-core/include/gencore/HDF5Importer.h55
-rw-r--r--compiler/gen-core/requires.cmake2
-rw-r--r--compiler/gen-core/src/HDF5Common.cpp43
-rw-r--r--compiler/gen-core/src/HDF5Exporter.cpp95
-rw-r--r--compiler/gen-core/src/HDF5Importer.cpp85
-rw-r--r--compiler/gen-tf-input/CMakeLists.txt4
-rw-r--r--compiler/gen-tf-input/README.md11
-rw-r--r--compiler/gen-tf-input/src/Driver.cpp56
-rw-r--r--compiler/gen-tf-output/CMakeLists.txt3
-rw-r--r--compiler/gen-tf-output/README.md13
-rw-r--r--compiler/gen-tf-output/src/Driver.cpp54
-rw-r--r--compiler/gen-tflite-output/CMakeLists.txt3
-rw-r--r--compiler/gen-tflite-output/README.md14
-rw-r--r--compiler/gen-tflite-output/src/Driver.cpp54
-rw-r--r--compiler/hermes-std/CMakeLists.txt27
-rw-r--r--compiler/hermes-std/README.md3
-rw-r--r--compiler/hermes-std/include/hermes/ConsoleReporter.h35
-rw-r--r--compiler/hermes-std/include/hermes/EnvConfig.h55
-rw-r--r--compiler/hermes-std/requires.cmake1
-rw-r--r--compiler/hermes-std/src/ConsoleReporter.cpp32
-rw-r--r--compiler/hermes-std/src/ConsoleReporter.test.cpp46
-rw-r--r--compiler/hermes-std/src/EnvConfig.cpp44
-rw-r--r--compiler/hermes/CMakeLists.txt28
-rw-r--r--compiler/hermes/README.md3
-rw-r--r--compiler/hermes/include/hermes.h25
-rw-r--r--compiler/hermes/include/hermes/core/Config.h43
-rw-r--r--compiler/hermes/include/hermes/core/Context.h78
-rw-r--r--compiler/hermes/include/hermes/core/Message.h69
-rw-r--r--compiler/hermes/include/hermes/core/MessageBuffer.h51
-rw-r--r--compiler/hermes/include/hermes/core/MessageBus.h40
-rw-r--r--compiler/hermes/include/hermes/core/Severity.h83
-rw-r--r--compiler/hermes/include/hermes/core/Sink.h49
-rw-r--r--compiler/hermes/include/hermes/core/Source.h118
-rw-r--r--compiler/hermes/include/hermes/core/SourceSetting.h108
-rw-r--r--compiler/hermes/requires.cmake1
-rw-r--r--compiler/hermes/src/core/Context.cpp79
-rw-r--r--compiler/hermes/src/core/Context.test.cpp28
-rw-r--r--compiler/hermes/src/core/Message.cpp43
-rw-r--r--compiler/hermes/src/core/Message.test.cpp41
-rw-r--r--compiler/hermes/src/core/MessageBuffer.cpp40
-rw-r--r--compiler/hermes/src/core/MessageBuffer.test.cpp66
-rw-r--r--compiler/hermes/src/core/MessageBus.cpp19
-rw-r--r--compiler/hermes/src/core/Severity.test.cpp59
-rw-r--r--compiler/hermes/src/core/Sink.cpp19
-rw-r--r--compiler/hermes/src/core/Source.cpp70
-rw-r--r--compiler/hermes/src/core/Source.test.cpp101
-rw-r--r--compiler/hermes/src/hermes.cpp19
-rw-r--r--compiler/hermes/src/hermes.test.cpp24
-rw-r--r--compiler/i5diff/CMakeLists.txt15
-rw-r--r--compiler/i5diff/README.md20
-rw-r--r--compiler/i5diff/requires.cmake2
-rw-r--r--compiler/i5diff/src/entry.cpp313
-rw-r--r--compiler/kuma/CMakeLists.txt19
-rw-r--r--compiler/kuma/README.md7
-rw-r--r--compiler/kuma/include/kuma.h98
-rw-r--r--compiler/kuma/src/IntervalSet.cpp92
-rw-r--r--compiler/kuma/src/IntervalSet.h78
-rw-r--r--compiler/kuma/src/IntervalSet.test.cpp31
-rw-r--r--compiler/kuma/src/kuma.cpp93
-rw-r--r--compiler/kuma/src/kuma.test.cpp89
-rw-r--r--compiler/loco/CMakeLists.txt28
-rw-r--r--compiler/loco/README.md3
-rw-r--r--compiler/loco/doc/LEP_000_Dialect_Service.md116
-rw-r--r--compiler/loco/include/loco.h26
-rw-r--r--compiler/loco/include/loco/ADT/AnnotatedItem.h82
-rw-r--r--compiler/loco/include/loco/ADT/ObjectPool.h77
-rw-r--r--compiler/loco/include/loco/IR/Algorithm.h48
-rw-r--r--compiler/loco/include/loco/IR/BiasShape.h43
-rw-r--r--compiler/loco/include/loco/IR/CanonicalDialect.h45
-rw-r--r--compiler/loco/include/loco/IR/CanonicalNode.h23
-rw-r--r--compiler/loco/include/loco/IR/CanonicalNodeDecl.h50
-rw-r--r--compiler/loco/include/loco/IR/CanonicalNodeImpl.h64
-rw-r--r--compiler/loco/include/loco/IR/CanonicalNodeVisitor.forward.h29
-rw-r--r--compiler/loco/include/loco/IR/CanonicalNodeVisitor.h79
-rw-r--r--compiler/loco/include/loco/IR/CanonicalNodes.lst49
-rw-r--r--compiler/loco/include/loco/IR/CanonicalOpcode.h37
-rw-r--r--compiler/loco/include/loco/IR/DataType.h51
-rw-r--r--compiler/loco/include/loco/IR/DataTypeTraits.h86
-rw-r--r--compiler/loco/include/loco/IR/DepthwiseFilterAxis.h33
-rw-r--r--compiler/loco/include/loco/IR/DepthwiseFilterCodec.h69
-rw-r--r--compiler/loco/include/loco/IR/DepthwiseFilterIndex.h65
-rw-r--r--compiler/loco/include/loco/IR/DepthwiseFilterShape.h63
-rw-r--r--compiler/loco/include/loco/IR/Dialect.h66
-rw-r--r--compiler/loco/include/loco/IR/DialectService.h35
-rw-r--r--compiler/loco/include/loco/IR/Dimension.h85
-rw-r--r--compiler/loco/include/loco/IR/Domain.h53
-rw-r--r--compiler/loco/include/loco/IR/FeatureAxis.h33
-rw-r--r--compiler/loco/include/loco/IR/FeatureCodec.h77
-rw-r--r--compiler/loco/include/loco/IR/FeatureIndex.h65
-rw-r--r--compiler/loco/include/loco/IR/FeatureShape.h66
-rw-r--r--compiler/loco/include/loco/IR/FilterAxis.h33
-rw-r--r--compiler/loco/include/loco/IR/FilterCodec.h61
-rw-r--r--compiler/loco/include/loco/IR/FilterIndex.h65
-rw-r--r--compiler/loco/include/loco/IR/FilterShape.h69
-rw-r--r--compiler/loco/include/loco/IR/Graph.forward.h28
-rw-r--r--compiler/loco/include/loco/IR/Graph.h284
-rw-r--r--compiler/loco/include/loco/IR/GraphInputIndex.h29
-rw-r--r--compiler/loco/include/loco/IR/GraphOutputIndex.h29
-rw-r--r--compiler/loco/include/loco/IR/MatrixAxis.h31
-rw-r--r--compiler/loco/include/loco/IR/MatrixCodec.h73
-rw-r--r--compiler/loco/include/loco/IR/MatrixIndex.h55
-rw-r--r--compiler/loco/include/loco/IR/MatrixShape.h56
-rw-r--r--compiler/loco/include/loco/IR/Node.forward.h28
-rw-r--r--compiler/loco/include/loco/IR/Node.h147
-rw-r--r--compiler/loco/include/loco/IR/NodeMixins.h133
-rw-r--r--compiler/loco/include/loco/IR/NodePool.forward.h28
-rw-r--r--compiler/loco/include/loco/IR/NodePool.h62
-rw-r--r--compiler/loco/include/loco/IR/NodeShape.h70
-rw-r--r--compiler/loco/include/loco/IR/Nodes.h1123
-rw-r--r--compiler/loco/include/loco/IR/Padding2D.h65
-rw-r--r--compiler/loco/include/loco/IR/PaddingND.h56
-rw-r--r--compiler/loco/include/loco/IR/PermutingCodec.h421
-rw-r--r--compiler/loco/include/loco/IR/Stride.h50
-rw-r--r--compiler/loco/include/loco/IR/TensorAxis.h29
-rw-r--r--compiler/loco/include/loco/IR/TensorAxisSet.h42
-rw-r--r--compiler/loco/include/loco/IR/TensorIndex.h30
-rw-r--r--compiler/loco/include/loco/IR/TensorShape.h62
-rw-r--r--compiler/loco/include/loco/IR/Use.h71
-rw-r--r--compiler/loco/include/loco/IR/Verifier.h100
-rw-r--r--compiler/loco/include/loco/IR/Window.h52
-rw-r--r--compiler/loco/include/loco/Service/CanonicalShapeInferenceRule.h38
-rw-r--r--compiler/loco/include/loco/Service/MultiDialectShapeInferenceRule.h45
-rw-r--r--compiler/loco/include/loco/Service/ShapeInference.h66
-rw-r--r--compiler/loco/include/loco/Service/ShapeInferenceRule.h97
-rw-r--r--compiler/loco/include/loco/Service/TypeInference.h114
-rw-r--r--compiler/loco/src/ADT/AnnotatedItem.test.cpp75
-rw-r--r--compiler/loco/src/ADT/ObjectPool.cpp19
-rw-r--r--compiler/loco/src/IR/Algorithm.cpp121
-rw-r--r--compiler/loco/src/IR/Algorithm.test.cpp122
-rw-r--r--compiler/loco/src/IR/BiasShape.test.cpp26
-rw-r--r--compiler/loco/src/IR/CanonicalDialect.cpp67
-rw-r--r--compiler/loco/src/IR/CanonicalDialect.test.cpp29
-rw-r--r--compiler/loco/src/IR/CanonicalNode.cpp25
-rw-r--r--compiler/loco/src/IR/CanonicalNode.test.cpp72
-rw-r--r--compiler/loco/src/IR/CanonicalOpcode.cpp19
-rw-r--r--compiler/loco/src/IR/DataType.cpp19
-rw-r--r--compiler/loco/src/IR/DataTypeTraits.test.cpp29
-rw-r--r--compiler/loco/src/IR/DepthwiseFilterAxis.cpp19
-rw-r--r--compiler/loco/src/IR/DepthwiseFilterCodec.cpp19
-rw-r--r--compiler/loco/src/IR/DepthwiseFilterIndex.test.cpp67
-rw-r--r--compiler/loco/src/IR/DepthwiseFilterShape.test.cpp80
-rw-r--r--compiler/loco/src/IR/Dialect.cpp19
-rw-r--r--compiler/loco/src/IR/Dialect.test.cpp41
-rw-r--r--compiler/loco/src/IR/DialectService.cpp19
-rw-r--r--compiler/loco/src/IR/Dimension.cpp32
-rw-r--r--compiler/loco/src/IR/Dimension.test.cpp100
-rw-r--r--compiler/loco/src/IR/Domain.cpp19
-rw-r--r--compiler/loco/src/IR/FeatureAxis.cpp19
-rw-r--r--compiler/loco/src/IR/FeatureCodec.cpp19
-rw-r--r--compiler/loco/src/IR/FeatureIndex.test.cpp67
-rw-r--r--compiler/loco/src/IR/FeatureShape.test.cpp80
-rw-r--r--compiler/loco/src/IR/FilterAxis.cpp19
-rw-r--r--compiler/loco/src/IR/FilterCodec.cpp19
-rw-r--r--compiler/loco/src/IR/FilterIndex.test.cpp67
-rw-r--r--compiler/loco/src/IR/FilterShape.test.cpp80
-rw-r--r--compiler/loco/src/IR/Graph.cpp137
-rw-r--r--compiler/loco/src/IR/Graph.test.cpp218
-rw-r--r--compiler/loco/src/IR/GraphInputIndex.cpp19
-rw-r--r--compiler/loco/src/IR/GraphOutputIndex.cpp19
-rw-r--r--compiler/loco/src/IR/MatrixAxis.cpp19
-rw-r--r--compiler/loco/src/IR/MatrixCodec.cpp19
-rw-r--r--compiler/loco/src/IR/MockupNode.h58
-rw-r--r--compiler/loco/src/IR/Node.cpp88
-rw-r--r--compiler/loco/src/IR/Node.test.cpp102
-rw-r--r--compiler/loco/src/IR/NodeMixins.cpp19
-rw-r--r--compiler/loco/src/IR/NodePool.cpp31
-rw-r--r--compiler/loco/src/IR/NodeShape.cpp284
-rw-r--r--compiler/loco/src/IR/NodeShape.test.cpp125
-rw-r--r--compiler/loco/src/IR/Nodes.cpp243
-rw-r--r--compiler/loco/src/IR/Nodes.test.cpp588
-rw-r--r--compiler/loco/src/IR/Padding2D.test.cpp29
-rw-r--r--compiler/loco/src/IR/PaddingND.test.cpp32
-rw-r--r--compiler/loco/src/IR/PermutingCodec.cpp630
-rw-r--r--compiler/loco/src/IR/PermutingCodec.test.cpp553
-rw-r--r--compiler/loco/src/IR/Stride.test.cpp42
-rw-r--r--compiler/loco/src/IR/TensorAxis.cpp19
-rw-r--r--compiler/loco/src/IR/TensorAxisSet.cpp19
-rw-r--r--compiler/loco/src/IR/TensorIndex.cpp19
-rw-r--r--compiler/loco/src/IR/TensorShape.cpp39
-rw-r--r--compiler/loco/src/IR/TensorShape.test.cpp109
-rw-r--r--compiler/loco/src/IR/Use.cpp45
-rw-r--r--compiler/loco/src/IR/Use.test.cpp42
-rw-r--r--compiler/loco/src/IR/Verifier.cpp119
-rw-r--r--compiler/loco/src/IR/Verifier.test.cpp64
-rw-r--r--compiler/loco/src/IR/Window.test.cpp42
-rw-r--r--compiler/loco/src/Service/CanonicalShapeInferenceRule.cpp774
-rw-r--r--compiler/loco/src/Service/CanonicalShapeInferenceRule.test.cpp400
-rw-r--r--compiler/loco/src/Service/GraphBuilder.h547
-rw-r--r--compiler/loco/src/Service/GraphBuilder.test.cpp47
-rw-r--r--compiler/loco/src/Service/GraphTestcase.h541
-rw-r--r--compiler/loco/src/Service/MultiDialectShapeInferenceRule.cpp67
-rw-r--r--compiler/loco/src/Service/MultiDialectShapeInferenceRule.test.cpp134
-rw-r--r--compiler/loco/src/Service/ShapeInference.cpp105
-rw-r--r--compiler/loco/src/Service/ShapeInference.test.cpp87
-rw-r--r--compiler/loco/src/Service/ShapeInferenceRule.cpp31
-rw-r--r--compiler/loco/src/Service/TypeInference.cpp228
-rw-r--r--compiler/loco/src/Service/TypeInference.test.cpp282
-rw-r--r--compiler/loco/src/loco.test.cpp108
-rw-r--r--compiler/loco/src/tensorflow.test.cpp386
-rw-r--r--compiler/locoex-customop/CMakeLists.txt18
-rw-r--r--compiler/locoex-customop/README.md9
-rw-r--r--compiler/locoex-customop/include/locoex/COpAttrTypes.h101
-rw-r--r--compiler/locoex-customop/include/locoex/COpCall.h71
-rw-r--r--compiler/locoex-customop/include/locoex/COpDialect.h43
-rw-r--r--compiler/locoex-customop/include/locoex/COpNode.h37
-rw-r--r--compiler/locoex-customop/include/locoex/Service/COpFormattedGraph.h47
-rw-r--r--compiler/locoex-customop/include/locoex/Service/COpShapeInferenceRule.h41
-rw-r--r--compiler/locoex-customop/include/locoex/Service/COpTypeInference.h36
-rw-r--r--compiler/locoex-customop/include/locoex/VariadicArityNode.h77
-rw-r--r--compiler/locoex-customop/requires.cmake4
-rw-r--r--compiler/locoex-customop/src/COpCall.cpp67
-rw-r--r--compiler/locoex-customop/src/COpCall.test.cpp90
-rw-r--r--compiler/locoex-customop/src/COpDialect.cpp28
-rw-r--r--compiler/locoex-customop/src/COpDialect.test.cpp29
-rw-r--r--compiler/locoex-customop/src/COpNode.cpp25
-rw-r--r--compiler/locoex-customop/src/Service/COpFormattedGraph.cpp66
-rw-r--r--compiler/locoex-customop/src/Service/COpShapeInferenceRule.cpp59
-rw-r--r--compiler/locoex-customop/src/Service/COpShapeInferenceRule.test.cpp54
-rw-r--r--compiler/locoex-customop/src/Service/COpTypeInference.cpp47
-rw-r--r--compiler/locoex-customop/src/Service/COpTypeInference.test.cpp63
-rw-r--r--compiler/locoex-customop/src/VariadicArityNode.test.cpp72
-rw-r--r--compiler/locomotiv/CMakeLists.txt29
-rw-r--r--compiler/locomotiv/README.md90
-rw-r--r--compiler/locomotiv/include/locomotiv/NodeData.h61
-rw-r--r--compiler/locomotiv/include/locomotiv/Session.h108
-rw-r--r--compiler/locomotiv/requires.cmake2
-rw-r--r--compiler/locomotiv/src/Node.lst40
-rw-r--r--compiler/locomotiv/src/Node/AvgPool2D.cpp179
-rw-r--r--compiler/locomotiv/src/Node/AvgPool2D.test.cpp176
-rw-r--r--compiler/locomotiv/src/Node/BiasAdd.cpp121
-rw-r--r--compiler/locomotiv/src/Node/BiasAdd.test.cpp204
-rw-r--r--compiler/locomotiv/src/Node/BiasEncode.cpp63
-rw-r--r--compiler/locomotiv/src/Node/BiasEncode.test.cpp95
-rw-r--r--compiler/locomotiv/src/Node/ConstGen.cpp116
-rw-r--r--compiler/locomotiv/src/Node/ConstGen.test.cpp100
-rw-r--r--compiler/locomotiv/src/Node/Conv2D.cpp179
-rw-r--r--compiler/locomotiv/src/Node/Conv2D.test.cpp231
-rw-r--r--compiler/locomotiv/src/Node/DepthwiseConv2D.cpp185
-rw-r--r--compiler/locomotiv/src/Node/DepthwiseConv2D.test.cpp164
-rw-r--r--compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp113
-rw-r--r--compiler/locomotiv/src/Node/DepthwiseFilterEncode.test.cpp90
-rw-r--r--compiler/locomotiv/src/Node/EltwiseAdd.cpp34
-rw-r--r--compiler/locomotiv/src/Node/EltwiseAdd.test.cpp121
-rw-r--r--compiler/locomotiv/src/Node/EltwiseDiv.cpp34
-rw-r--r--compiler/locomotiv/src/Node/EltwiseDiv.test.cpp121
-rw-r--r--compiler/locomotiv/src/Node/EltwiseMax.cpp36
-rw-r--r--compiler/locomotiv/src/Node/EltwiseMax.test.cpp121
-rw-r--r--compiler/locomotiv/src/Node/EltwiseMul.cpp34
-rw-r--r--compiler/locomotiv/src/Node/EltwiseMul.test.cpp124
-rw-r--r--compiler/locomotiv/src/Node/EltwiseSqrt.cpp43
-rw-r--r--compiler/locomotiv/src/Node/EltwiseSqrt.test.cpp69
-rw-r--r--compiler/locomotiv/src/Node/EltwiseSub.cpp34
-rw-r--r--compiler/locomotiv/src/Node/EltwiseSub.test.cpp121
-rw-r--r--compiler/locomotiv/src/Node/FeatureCodec.test.cpp223
-rw-r--r--compiler/locomotiv/src/Node/FeatureDecode.cpp112
-rw-r--r--compiler/locomotiv/src/Node/FeatureEncode.cpp114
-rw-r--r--compiler/locomotiv/src/Node/FilterEncode.cpp114
-rw-r--r--compiler/locomotiv/src/Node/FilterEncode.test.cpp144
-rw-r--r--compiler/locomotiv/src/Node/Forward.cpp62
-rw-r--r--compiler/locomotiv/src/Node/Forward.test.cpp88
-rw-r--r--compiler/locomotiv/src/Node/MatMul.cpp133
-rw-r--r--compiler/locomotiv/src/Node/MatMul.test.cpp188
-rw-r--r--compiler/locomotiv/src/Node/MatrixCodec.test.cpp207
-rw-r--r--compiler/locomotiv/src/Node/MatrixDecode.cpp109
-rw-r--r--compiler/locomotiv/src/Node/MatrixEncode.cpp112
-rw-r--r--compiler/locomotiv/src/Node/MaxPool2D.cpp167
-rw-r--r--compiler/locomotiv/src/Node/MaxPool2D.test.cpp159
-rw-r--r--compiler/locomotiv/src/Node/Pull.cpp72
-rw-r--r--compiler/locomotiv/src/Node/Pull.test.cpp61
-rw-r--r--compiler/locomotiv/src/Node/Push.cpp61
-rw-r--r--compiler/locomotiv/src/Node/Push.test.cpp88
-rw-r--r--compiler/locomotiv/src/Node/ReLU.cpp41
-rw-r--r--compiler/locomotiv/src/Node/ReLU.test.cpp62
-rw-r--r--compiler/locomotiv/src/Node/ReLU6.cpp96
-rw-r--r--compiler/locomotiv/src/Node/ReLU6.test.cpp66
-rw-r--r--compiler/locomotiv/src/Node/Reshape.cpp90
-rw-r--r--compiler/locomotiv/src/Node/Reshape.test.cpp67
-rw-r--r--compiler/locomotiv/src/Node/Softmax.cpp122
-rw-r--r--compiler/locomotiv/src/Node/Softmax.test.cpp68
-rw-r--r--compiler/locomotiv/src/Node/Tanh.cpp41
-rw-r--r--compiler/locomotiv/src/Node/Tanh.test.cpp64
-rw-r--r--compiler/locomotiv/src/Node/TensorBroadcast.cpp106
-rw-r--r--compiler/locomotiv/src/Node/TensorBroadcast.test.cpp63
-rw-r--r--compiler/locomotiv/src/Node/TensorConcat.cpp113
-rw-r--r--compiler/locomotiv/src/Node/TensorConcat.test.cpp128
-rw-r--r--compiler/locomotiv/src/Node/TensorConstantPad.cpp113
-rw-r--r--compiler/locomotiv/src/Node/TensorConstantPad.test.cpp218
-rw-r--r--compiler/locomotiv/src/Node/TensorReduce.cpp153
-rw-r--r--compiler/locomotiv/src/Node/TensorReduce.test.cpp104
-rw-r--r--compiler/locomotiv/src/Node/TransposedConv2D.cpp189
-rw-r--r--compiler/locomotiv/src/Node/TransposedConv2D.test.cpp144
-rw-r--r--compiler/locomotiv/src/NodeData.cpp33
-rw-r--r--compiler/locomotiv/src/NodeData.test.cpp54
-rw-r--r--compiler/locomotiv/src/NodeDataImpl.cpp77
-rw-r--r--compiler/locomotiv/src/NodeDataImpl.h66
-rw-r--r--compiler/locomotiv/src/NodeDataImpl.test.cpp58
-rw-r--r--compiler/locomotiv/src/NodeDomain.cpp53
-rw-r--r--compiler/locomotiv/src/NodeDomain.h37
-rw-r--r--compiler/locomotiv/src/NodeDomain.test.cpp37
-rw-r--r--compiler/locomotiv/src/NodeExecution.cpp158
-rw-r--r--compiler/locomotiv/src/NodeExecution.h83
-rw-r--r--compiler/locomotiv/src/Session.cpp93
-rw-r--r--compiler/locomotiv/src/Session.test.cpp379
-rw-r--r--compiler/locomotiv/src/UserData.cpp63
-rw-r--r--compiler/locomotiv/src/UserData.h31
-rw-r--r--compiler/locomotiv/src/Validation.h34
-rw-r--r--compiler/locop/CMakeLists.txt27
-rw-r--r--compiler/locop/README.md3
-rw-r--r--compiler/locop/include/locop/CanonicalNodeSummaryBuilder.h45
-rw-r--r--compiler/locop/include/locop/FormattedGraph.h89
-rw-r--r--compiler/locop/include/locop/FormattedTensorShape.h76
-rw-r--r--compiler/locop/include/locop/GenericNodeSummaryBuilder.h45
-rw-r--r--compiler/locop/include/locop/Interfaces.h43
-rw-r--r--compiler/locop/include/locop/NodeSummary.h114
-rw-r--r--compiler/locop/include/locop/NodeSummaryBuilder.h47
-rw-r--r--compiler/locop/include/locop/SymbolTable.h41
-rw-r--r--compiler/locop/src/CanonicalNodeSummaryBuilder.cpp297
-rw-r--r--compiler/locop/src/ExampleGraph.h69
-rw-r--r--compiler/locop/src/FormattedGraph.cpp390
-rw-r--r--compiler/locop/src/FormattedGraph.test.cpp143
-rw-r--r--compiler/locop/src/FormattedTensorShape.cpp68
-rw-r--r--compiler/locop/src/FormattedTensorShape.test.cpp33
-rw-r--r--compiler/locop/src/GenericNodeSummaryBuilder.cpp38
-rw-r--r--compiler/locop/src/GenericNodeSummaryBuilder.test.cpp58
-rw-r--r--compiler/locop/src/Interfaces.cpp28
-rw-r--r--compiler/locop/src/NodeSummary.cpp41
-rw-r--r--compiler/locop/src/NodeSummaryBuilder.cpp21
-rw-r--r--compiler/logo-core/CMakeLists.txt19
-rw-r--r--compiler/logo-core/README.md3
-rw-r--r--compiler/logo-core/include/logo/Pass.h48
-rw-r--r--compiler/logo-core/include/logo/Phase.h192
-rw-r--r--compiler/logo-core/requires.cmake1
-rw-r--r--compiler/logo-core/src/Pass.cpp32
-rw-r--r--compiler/logo-core/src/Pass.test.cpp46
-rw-r--r--compiler/logo-core/src/Phase.cpp71
-rw-r--r--compiler/logo/CMakeLists.txt23
-rw-r--r--compiler/logo/README.md3
-rw-r--r--compiler/logo/include/logo/ConstantFoldingPass.h41
-rw-r--r--compiler/logo/include/logo/Passes.h30
-rw-r--r--compiler/logo/include/logo/RemoveDeadNodePass.h34
-rw-r--r--compiler/logo/include/logo/RemoveForwardNodePass.h46
-rw-r--r--compiler/logo/include/logo/ReorderDecodePass.h51
-rw-r--r--compiler/logo/include/logo/ResolveDuplicateReshapePass.h41
-rw-r--r--compiler/logo/include/logo/ResolveRedundantReshapePass.h44
-rw-r--r--compiler/logo/include/logo/SimplifyDomainConversionPass.h47
-rw-r--r--compiler/logo/requires.cmake4
-rw-r--r--compiler/logo/src/Passes/ConstantFoldingPass.cpp174
-rw-r--r--compiler/logo/src/Passes/ConstantFoldingPass.test.cpp179
-rw-r--r--compiler/logo/src/Passes/RemoveDeadNodePass.cpp77
-rw-r--r--compiler/logo/src/Passes/RemoveForwardNodePass.cpp64
-rw-r--r--compiler/logo/src/Passes/ReorderDecodePass.cpp311
-rw-r--r--compiler/logo/src/Passes/ResolveDuplicateReshapePass.cpp108
-rw-r--r--compiler/logo/src/Passes/ResolveRedundantReshapePass.cpp103
-rw-r--r--compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp445
-rw-r--r--compiler/logo/src/Passes/SimplifyDomainConversionPass.test.cpp234
-rw-r--r--compiler/logo/src/TestHelper.h44
-rw-r--r--compiler/luci/CMakeLists.txt10
-rw-r--r--compiler/luci/README.md3
-rw-r--r--compiler/luci/export/CMakeLists.txt29
-rw-r--r--compiler/luci/export/README.md3
-rw-r--r--compiler/luci/export/include/luci/CircleExporter.h64
-rw-r--r--compiler/luci/export/src/Check.h35
-rw-r--r--compiler/luci/export/src/CircleExporter.cpp64
-rw-r--r--compiler/luci/export/src/CircleExporterImpl.cpp266
-rw-r--r--compiler/luci/export/src/CircleExporterImpl.h82
-rw-r--r--compiler/luci/export/src/CircleExporterUtils.cpp169
-rw-r--r--compiler/luci/export/src/CircleExporterUtils.h51
-rw-r--r--compiler/luci/export/src/CircleOperationExporter.cpp643
-rw-r--r--compiler/luci/export/src/CircleOperationExporter.h37
-rw-r--r--compiler/luci/export/src/CircleTensorExporter.cpp264
-rw-r--r--compiler/luci/export/src/CircleTensorExporter.h44
-rw-r--r--compiler/luci/export/src/Optimize.cpp48
-rw-r--r--compiler/luci/export/src/Optimize.h33
-rw-r--r--compiler/luci/export/src/ProgressReporter.cpp84
-rw-r--r--compiler/luci/export/src/ProgressReporter.h53
-rw-r--r--compiler/luci/export/src/SerializedData.h95
-rw-r--r--compiler/luci/import/CMakeLists.txt26
-rw-r--r--compiler/luci/import/README.md3
-rw-r--r--compiler/luci/import/include/luci/Import/CircleReader.h87
-rw-r--r--compiler/luci/import/include/luci/Import/GraphBuilder.h56
-rw-r--r--compiler/luci/import/include/luci/Import/GraphBuilderContext.h79
-rw-r--r--compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h85
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes.h48
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleAbs.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleAdd.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleArgMax.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleAveragePool2D.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleBatchToSpaceND.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleConcatenation.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleConst.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleConv2D.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleCos.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleDepthwiseConv2D.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleDiv.h36
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleEqual.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleExp.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleFullyConnected.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleLogicalNot.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleLogicalOr.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleMaxPool2D.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleMean.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleMul.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CirclePack.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CirclePad.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleRelu.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleReshape.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleRsqrt.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleSoftmax.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleSub.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleTranspose.h37
-rw-r--r--compiler/luci/import/include/luci/Importer.h54
-rw-r--r--compiler/luci/import/src/CircleReader.cpp211
-rw-r--r--compiler/luci/import/src/GraphBuilder.cpp61
-rw-r--r--compiler/luci/import/src/GraphBuilderContext.cpp47
-rw-r--r--compiler/luci/import/src/GraphBuilderRegistry.cpp163
-rw-r--r--compiler/luci/import/src/Importer.cpp253
-rw-r--r--compiler/luci/import/src/Importer.test.cpp23
-rw-r--r--compiler/luci/import/src/Nodes/CircleAbs.cpp44
-rw-r--r--compiler/luci/import/src/Nodes/CircleAdd.cpp48
-rw-r--r--compiler/luci/import/src/Nodes/CircleArgMax.cpp48
-rw-r--r--compiler/luci/import/src/Nodes/CircleAveragePool2D.cpp50
-rw-r--r--compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp80
-rw-r--r--compiler/luci/import/src/Nodes/CircleConcatenation.cpp52
-rw-r--r--compiler/luci/import/src/Nodes/CircleConst.cpp110
-rw-r--r--compiler/luci/import/src/Nodes/CircleConv2D.cpp58
-rw-r--r--compiler/luci/import/src/Nodes/CircleCos.cpp46
-rw-r--r--compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp60
-rw-r--r--compiler/luci/import/src/Nodes/CircleDiv.cpp49
-rw-r--r--compiler/luci/import/src/Nodes/CircleEqual.cpp51
-rw-r--r--compiler/luci/import/src/Nodes/CircleExp.cpp59
-rw-r--r--compiler/luci/import/src/Nodes/CircleFullyConnected.cpp56
-rw-r--r--compiler/luci/import/src/Nodes/CircleLogicalNot.cpp51
-rw-r--r--compiler/luci/import/src/Nodes/CircleLogicalOr.cpp55
-rw-r--r--compiler/luci/import/src/Nodes/CircleMaxPool2D.cpp52
-rw-r--r--compiler/luci/import/src/Nodes/CircleMean.cpp46
-rw-r--r--compiler/luci/import/src/Nodes/CircleMul.cpp49
-rw-r--r--compiler/luci/import/src/Nodes/CirclePack.cpp61
-rw-r--r--compiler/luci/import/src/Nodes/CirclePad.cpp50
-rw-r--r--compiler/luci/import/src/Nodes/CircleRelu.cpp47
-rw-r--r--compiler/luci/import/src/Nodes/CircleReshape.cpp82
-rw-r--r--compiler/luci/import/src/Nodes/CircleRsqrt.cpp60
-rw-r--r--compiler/luci/import/src/Nodes/CircleSoftmax.cpp49
-rw-r--r--compiler/luci/import/src/Nodes/CircleSub.cpp51
-rw-r--r--compiler/luci/import/src/Nodes/CircleTranspose.cpp51
-rw-r--r--compiler/luci/lang/CMakeLists.txt22
-rw-r--r--compiler/luci/lang/README.md3
-rw-r--r--compiler/luci/lang/include/luci/IR/AttrFilter.h43
-rw-r--r--compiler/luci/lang/include/luci/IR/AttrFusedActFunc.h36
-rw-r--r--compiler/luci/lang/include/luci/IR/AttrPadding.h33
-rw-r--r--compiler/luci/lang/include/luci/IR/AttrStride.h43
-rw-r--r--compiler/luci/lang/include/luci/IR/CircleDialect.h43
-rw-r--r--compiler/luci/lang/include/luci/IR/CircleNode.h23
-rw-r--r--compiler/luci/lang/include/luci/IR/CircleNodeDecl.h68
-rw-r--r--compiler/luci/lang/include/luci/IR/CircleNodeImpl.h70
-rw-r--r--compiler/luci/lang/include/luci/IR/CircleNodeVisitor.forward.h30
-rw-r--r--compiler/luci/lang/include/luci/IR/CircleNodeVisitor.h87
-rw-r--r--compiler/luci/lang/include/luci/IR/CircleNodes.h73
-rw-r--r--compiler/luci/lang/include/luci/IR/CircleNodes.lst52
-rw-r--r--compiler/luci/lang/include/luci/IR/CircleOpcode.h32
-rw-r--r--compiler/luci/lang/include/luci/IR/CircleQuantParam.h36
-rw-r--r--compiler/luci/lang/include/luci/IR/LuciNodeMixins.h104
-rw-r--r--compiler/luci/lang/include/luci/IR/Module.h70
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleAbs.h40
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleAdd.h45
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleArgMax.h50
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleAveragePool2D.h63
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleBatchToSpaceND.h47
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleConcatenation.h72
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h57
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleConv2D.h62
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleCos.h40
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleDepthwiseConv2D.h68
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleDiv.h51
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleEqual.h43
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleExp.h40
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h50
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleGather.h51
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleInput.h55
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleInstanceNorm.h56
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalNot.h40
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalOr.h43
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleMaxPool2D.h62
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleMaximum.h44
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleMean.h51
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleMul.h45
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleOutput.h55
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CirclePack.h67
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CirclePad.h46
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleRelu.h44
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleRelu6.h44
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleReshape.h69
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleRsqrt.h44
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleSoftmax.h47
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleSqrt.h44
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleSquaredDifference.h48
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleSub.h48
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleTranspose.h50
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h64
-rw-r--r--compiler/luci/lang/include/luci/IR/VariadicArityNode.h77
-rw-r--r--compiler/luci/lang/src/Check.h35
-rw-r--r--compiler/luci/lang/src/CircleDialect.cpp88
-rw-r--r--compiler/luci/lang/src/CircleDialect.test.cpp34
-rw-r--r--compiler/luci/lang/src/CircleNode.cpp25
-rw-r--r--compiler/luci/lang/src/CircleNodes.cpp50
-rw-r--r--compiler/luci/lang/src/LuciNodeMixins.cpp18
-rw-r--r--compiler/luci/lang/src/Module.cpp46
-rw-r--r--compiler/luci/lang/src/Module.test.cpp73
-rw-r--r--compiler/luci/lang/src/Nodes/CircleAbs.test.cpp31
-rw-r--r--compiler/luci/lang/src/Nodes/CircleAdd.test.cpp32
-rw-r--r--compiler/luci/lang/src/Nodes/CircleArgMax.test.cpp32
-rw-r--r--compiler/luci/lang/src/Nodes/CircleBatchToSpaceND.test.cpp33
-rw-r--r--compiler/luci/lang/src/Nodes/CircleConcatenation.test.cpp35
-rw-r--r--compiler/luci/lang/src/Nodes/CircleConst.cpp79
-rw-r--r--compiler/luci/lang/src/Nodes/CircleConv2D.test.cpp33
-rw-r--r--compiler/luci/lang/src/Nodes/CircleCos.test.cpp31
-rw-r--r--compiler/luci/lang/src/Nodes/CircleDepthwiseConv2D.test.cpp38
-rw-r--r--compiler/luci/lang/src/Nodes/CircleDiv.test.cpp32
-rw-r--r--compiler/luci/lang/src/Nodes/CircleEqual.test.cpp32
-rw-r--r--compiler/luci/lang/src/Nodes/CircleExp.test.cpp31
-rw-r--r--compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp34
-rw-r--r--compiler/luci/lang/src/Nodes/CircleGather.test.cpp33
-rw-r--r--compiler/luci/lang/src/Nodes/CircleInput.cpp38
-rw-r--r--compiler/luci/lang/src/Nodes/CircleInstanceNorm.test.cpp35
-rw-r--r--compiler/luci/lang/src/Nodes/CircleLogicalNot.test.cpp31
-rw-r--r--compiler/luci/lang/src/Nodes/CircleLogicalOr.test.cpp32
-rw-r--r--compiler/luci/lang/src/Nodes/CircleMaxPool2D.test.cpp33
-rw-r--r--compiler/luci/lang/src/Nodes/CircleMaximum.test.cpp32
-rw-r--r--compiler/luci/lang/src/Nodes/CircleMul.test.cpp32
-rw-r--r--compiler/luci/lang/src/Nodes/CircleOutput.cpp38
-rw-r--r--compiler/luci/lang/src/Nodes/CirclePack.test.cpp35
-rw-r--r--compiler/luci/lang/src/Nodes/CirclePad.test.cpp32
-rw-r--r--compiler/luci/lang/src/Nodes/CircleRelu.test.cpp31
-rw-r--r--compiler/luci/lang/src/Nodes/CircleRelu6.test.cpp31
-rw-r--r--compiler/luci/lang/src/Nodes/CircleReshape.test.cpp48
-rw-r--r--compiler/luci/lang/src/Nodes/CircleRsqrt.test.cpp31
-rw-r--r--compiler/luci/lang/src/Nodes/CircleSoftmax.test.cpp31
-rw-r--r--compiler/luci/lang/src/Nodes/CircleSqrt.test.cpp31
-rw-r--r--compiler/luci/lang/src/Nodes/CircleSquaredDifference.test.cpp32
-rw-r--r--compiler/luci/lang/src/Nodes/CircleSub.test.cpp32
-rw-r--r--compiler/luci/lang/src/Nodes/CircleTranspose.test.cpp32
-rw-r--r--compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp33
-rw-r--r--compiler/luci/log/CMakeLists.txt9
-rw-r--r--compiler/luci/log/README.md3
-rw-r--r--compiler/luci/log/include/luci/Log.h75
-rw-r--r--compiler/luci/log/include/luci/LoggingContext.h35
-rw-r--r--compiler/luci/log/src/Log.cpp87
-rw-r--r--compiler/luci/log/src/LoggingContext.cpp41
-rw-r--r--compiler/luci/logex/CMakeLists.txt13
-rw-r--r--compiler/luci/logex/README.md3
-rw-r--r--compiler/luci/logex/include/luci/FormattedGraph.h56
-rw-r--r--compiler/luci/logex/include/luci/LogHelper.h36
-rw-r--r--compiler/luci/logex/src/FormattedGraph.cpp606
-rw-r--r--compiler/luci/logex/src/LogHelper.cpp29
-rw-r--r--compiler/luci/pass/CMakeLists.txt29
-rw-r--r--compiler/luci/pass/README.md3
-rw-r--r--compiler/luci/pass/include/luci/CircleOptimizer.h55
-rw-r--r--compiler/luci/pass/include/luci/Pass/FuseInstanceNormPass.h40
-rw-r--r--compiler/luci/pass/include/luci/Pass/ShapeInferencePass.h41
-rw-r--r--compiler/luci/pass/include/luci/Pass/TypeInferencePass.h42
-rw-r--r--compiler/luci/pass/src/CircleOptimizer.cpp96
-rw-r--r--compiler/luci/pass/src/FuseInstanceNormPass.cpp401
-rw-r--r--compiler/luci/pass/src/ProgressReporter.cpp84
-rw-r--r--compiler/luci/pass/src/ProgressReporter.h53
-rw-r--r--compiler/luci/pass/src/ShapeInferencePass.cpp44
-rw-r--r--compiler/luci/pass/src/TypeInferencePass.cpp42
-rw-r--r--compiler/luci/requires.cmake9
-rw-r--r--compiler/luci/service/CMakeLists.txt25
-rw-r--r--compiler/luci/service/README.md3
-rw-r--r--compiler/luci/service/include/luci/Service/CircleShapeInference.h41
-rw-r--r--compiler/luci/service/include/luci/Service/CircleShapeInferenceRule.h33
-rw-r--r--compiler/luci/service/include/luci/Service/CircleTypeInference.h42
-rw-r--r--compiler/luci/service/include/luci/Service/CircleTypeInferenceRule.h36
-rw-r--r--compiler/luci/service/include/luci/Service/ShapeDescription.h59
-rw-r--r--compiler/luci/service/include/luci/Service/Validate.h29
-rw-r--r--compiler/luci/service/src/Check.h35
-rw-r--r--compiler/luci/service/src/CircleShapeInference.cpp37
-rw-r--r--compiler/luci/service/src/CircleShapeInferenceRule.cpp907
-rw-r--r--compiler/luci/service/src/CircleShapeInferenceRule.test.cpp282
-rw-r--r--compiler/luci/service/src/CircleTypeInference.cpp78
-rw-r--r--compiler/luci/service/src/CircleTypeInferenceRule.cpp202
-rw-r--r--compiler/luci/service/src/CircleTypeInferenceRule.test.cpp57
-rw-r--r--compiler/luci/service/src/GraphBlock.h201
-rw-r--r--compiler/luci/service/src/GraphBlock.test.cpp246
-rw-r--r--compiler/luci/service/src/ShapeDescription.cpp139
-rw-r--r--compiler/luci/service/src/TestGraph.h315
-rw-r--r--compiler/luci/service/src/Validate.cpp109
-rw-r--r--compiler/luci/tester/CMakeLists.txt22
-rw-r--r--compiler/luci/tester/src/Model.cpp62
-rw-r--r--compiler/luci/tester/src/Model.h27
-rw-r--r--compiler/luci/tester/src/ReadTester.cpp92
-rw-r--r--compiler/luci/tester/src/WriteTester.cpp142
-rw-r--r--compiler/luci/tests/.gitignore1
-rw-r--r--compiler/luci/tests/CMakeLists.txt97
-rwxr-xr-xcompiler/luci/tests/readverify.sh53
-rw-r--r--compiler/luci/tests/test.lst91
-rwxr-xr-xcompiler/luci/tests/writeverify.sh53
-rw-r--r--compiler/mio-circle/CMakeLists.txt28
-rw-r--r--compiler/mio-circle/README.md3
-rw-r--r--compiler/mio-circle/example.cpp41
-rw-r--r--compiler/mio-tf/CMakeLists.txt48
-rw-r--r--compiler/mio-tf/README.md3
-rw-r--r--compiler/mio-tf/src/mio_tf.test.cpp27
-rw-r--r--compiler/mio-tflite/CMakeLists.txt37
-rw-r--r--compiler/mio-tflite/README.md3
-rw-r--r--compiler/mio-tflite/example.cpp41
-rw-r--r--compiler/mir-caffe-importer/CMakeLists.txt17
-rw-r--r--compiler/mir-caffe-importer/caffe_importer.cpp439
-rw-r--r--compiler/mir-caffe-importer/caffe_importer.h35
-rw-r--r--compiler/mir-caffe-importer/caffe_op_creator.cpp834
-rw-r--r--compiler/mir-caffe-importer/caffe_op_creator.h146
-rw-r--r--compiler/mir-caffe-importer/caffe_op_types.h89
-rw-r--r--compiler/mir-caffe-importer/requires.cmake1
-rw-r--r--compiler/mir-caffe2-importer/CMakeLists.txt29
-rw-r--r--compiler/mir-caffe2-importer/caffe2_importer.cpp343
-rw-r--r--compiler/mir-caffe2-importer/caffe2_importer.h34
-rw-r--r--compiler/mir-caffe2-importer/caffe2_op_creator.cpp547
-rw-r--r--compiler/mir-caffe2-importer/caffe2_op_creator.h117
-rw-r--r--compiler/mir-caffe2-importer/caffe2_op_types.h48
-rw-r--r--compiler/mir-caffe2-importer/caffe2_proto_helper.cpp62
-rw-r--r--compiler/mir-caffe2-importer/caffe2_proto_helper.h40
-rw-r--r--compiler/mir-caffe2-importer/requires.cmake1
-rw-r--r--compiler/mir-interpreter/CMakeLists.txt4
-rw-r--r--compiler/mir-interpreter/include/MirInterpreter.h101
-rw-r--r--compiler/mir-interpreter/requires.cmake1
-rw-r--r--compiler/mir-interpreter/src/MirInterpreter.cpp420
-rw-r--r--compiler/mir-interpreter/src/ops/Abs.cpp55
-rw-r--r--compiler/mir-interpreter/src/ops/Abs.h29
-rw-r--r--compiler/mir-interpreter/src/ops/Add.cpp130
-rw-r--r--compiler/mir-interpreter/src/ops/Add.h29
-rw-r--r--compiler/mir-interpreter/src/ops/AvgPool2D.cpp173
-rw-r--r--compiler/mir-interpreter/src/ops/AvgPool2D.h31
-rw-r--r--compiler/mir-interpreter/src/ops/CappedReLU.cpp82
-rw-r--r--compiler/mir-interpreter/src/ops/CappedReLU.h29
-rw-r--r--compiler/mir-interpreter/src/ops/Common.cpp37
-rw-r--r--compiler/mir-interpreter/src/ops/Common.h65
-rw-r--r--compiler/mir-interpreter/src/ops/Concat.cpp172
-rw-r--r--compiler/mir-interpreter/src/ops/Concat.h30
-rw-r--r--compiler/mir-interpreter/src/ops/Conv2D.cpp261
-rw-r--r--compiler/mir-interpreter/src/ops/Conv2D.h32
-rw-r--r--compiler/mir-interpreter/src/ops/DeConv2D.cpp122
-rw-r--r--compiler/mir-interpreter/src/ops/DeConv2D.h41
-rw-r--r--compiler/mir-interpreter/src/ops/DepthwiseConv2D.cpp225
-rw-r--r--compiler/mir-interpreter/src/ops/DepthwiseConv2D.h32
-rw-r--r--compiler/mir-interpreter/src/ops/Div.cpp62
-rw-r--r--compiler/mir-interpreter/src/ops/Div.h29
-rw-r--r--compiler/mir-interpreter/src/ops/ELU.cpp51
-rw-r--r--compiler/mir-interpreter/src/ops/ELU.h29
-rw-r--r--compiler/mir-interpreter/src/ops/Equal.cpp58
-rw-r--r--compiler/mir-interpreter/src/ops/Equal.h29
-rw-r--r--compiler/mir-interpreter/src/ops/Fill.h48
-rw-r--r--compiler/mir-interpreter/src/ops/FullyConnected.cpp214
-rw-r--r--compiler/mir-interpreter/src/ops/FullyConnected.h32
-rw-r--r--compiler/mir-interpreter/src/ops/Gather.cpp92
-rw-r--r--compiler/mir-interpreter/src/ops/Gather.h31
-rw-r--r--compiler/mir-interpreter/src/ops/Greater.cpp57
-rw-r--r--compiler/mir-interpreter/src/ops/Greater.h29
-rw-r--r--compiler/mir-interpreter/src/ops/HardSwish.cpp54
-rw-r--r--compiler/mir-interpreter/src/ops/HardSwish.h29
-rw-r--r--compiler/mir-interpreter/src/ops/LeakyReLU.cpp49
-rw-r--r--compiler/mir-interpreter/src/ops/LeakyReLU.h29
-rw-r--r--compiler/mir-interpreter/src/ops/Less.cpp57
-rw-r--r--compiler/mir-interpreter/src/ops/Less.h29
-rw-r--r--compiler/mir-interpreter/src/ops/Max.cpp66
-rw-r--r--compiler/mir-interpreter/src/ops/Max.h29
-rw-r--r--compiler/mir-interpreter/src/ops/MaxPool2D.cpp157
-rw-r--r--compiler/mir-interpreter/src/ops/MaxPool2D.h31
-rw-r--r--compiler/mir-interpreter/src/ops/Mul.cpp61
-rw-r--r--compiler/mir-interpreter/src/ops/Mul.h29
-rw-r--r--compiler/mir-interpreter/src/ops/Pad.cpp84
-rw-r--r--compiler/mir-interpreter/src/ops/Pad.h36
-rw-r--r--compiler/mir-interpreter/src/ops/Quantization.cpp71
-rw-r--r--compiler/mir-interpreter/src/ops/Quantization.h32
-rw-r--r--compiler/mir-interpreter/src/ops/QuantizationHelpers.h126
-rw-r--r--compiler/mir-interpreter/src/ops/ReLU.cpp58
-rw-r--r--compiler/mir-interpreter/src/ops/ReLU.h29
-rw-r--r--compiler/mir-interpreter/src/ops/ReduceMean.cpp98
-rw-r--r--compiler/mir-interpreter/src/ops/ReduceMean.h30
-rw-r--r--compiler/mir-interpreter/src/ops/Reshape.cpp38
-rw-r--r--compiler/mir-interpreter/src/ops/Reshape.h29
-rw-r--r--compiler/mir-interpreter/src/ops/Sigmoid.cpp58
-rw-r--r--compiler/mir-interpreter/src/ops/Sigmoid.h29
-rw-r--r--compiler/mir-interpreter/src/ops/Slice.cpp52
-rw-r--r--compiler/mir-interpreter/src/ops/Slice.h29
-rw-r--r--compiler/mir-interpreter/src/ops/Softmax.cpp155
-rw-r--r--compiler/mir-interpreter/src/ops/Softmax.h29
-rw-r--r--compiler/mir-interpreter/src/ops/Sqrt.cpp58
-rw-r--r--compiler/mir-interpreter/src/ops/Sqrt.h29
-rw-r--r--compiler/mir-interpreter/src/ops/Sub.cpp61
-rw-r--r--compiler/mir-interpreter/src/ops/Sub.h29
-rw-r--r--compiler/mir-interpreter/src/ops/Tanh.cpp58
-rw-r--r--compiler/mir-interpreter/src/ops/Tanh.h29
-rw-r--r--compiler/mir-interpreter/src/ops/Transpose.cpp64
-rw-r--r--compiler/mir-interpreter/src/ops/Transpose.h31
-rw-r--r--compiler/mir-onnx-importer/AttributeHelpers.h105
-rw-r--r--compiler/mir-onnx-importer/CMakeLists.txt119
-rw-r--r--compiler/mir-onnx-importer/ConvPoolHelpers.cpp113
-rw-r--r--compiler/mir-onnx-importer/ConvPoolHelpers.h44
-rw-r--r--compiler/mir-onnx-importer/ONNXHelpers.cpp188
-rw-r--r--compiler/mir-onnx-importer/ONNXHelpers.h50
-rw-r--r--compiler/mir-onnx-importer/ONNXImporterImpl.cpp241
-rw-r--r--compiler/mir-onnx-importer/ONNXImporterImpl.h35
-rw-r--r--compiler/mir-onnx-importer/ONNXNodeConverterRegistry.cpp142
-rw-r--r--compiler/mir-onnx-importer/ONNXNodeConverterRegistry.h80
-rw-r--r--compiler/mir-onnx-importer/ONNXNodeConverterRegistry.test.cpp64
-rw-r--r--compiler/mir-onnx-importer/ONNXOpRegistration.h256
-rw-r--r--compiler/mir-onnx-importer/Op/Abs.cpp47
-rw-r--r--compiler/mir-onnx-importer/Op/Abs.h30
-rw-r--r--compiler/mir-onnx-importer/Op/Add.cpp53
-rw-r--r--compiler/mir-onnx-importer/Op/Add.h31
-rw-r--r--compiler/mir-onnx-importer/Op/AveragePool.cpp99
-rw-r--r--compiler/mir-onnx-importer/Op/AveragePool.h31
-rw-r--r--compiler/mir-onnx-importer/Op/BatchNormalization.cpp119
-rw-r--r--compiler/mir-onnx-importer/Op/BatchNormalization.h32
-rw-r--r--compiler/mir-onnx-importer/Op/Concat.cpp54
-rw-r--r--compiler/mir-onnx-importer/Op/Concat.h30
-rw-r--r--compiler/mir-onnx-importer/Op/Constant.cpp61
-rw-r--r--compiler/mir-onnx-importer/Op/Constant.h31
-rw-r--r--compiler/mir-onnx-importer/Op/Conv.cpp156
-rw-r--r--compiler/mir-onnx-importer/Op/Conv.h29
-rw-r--r--compiler/mir-onnx-importer/Op/ConvTranspose.cpp138
-rw-r--r--compiler/mir-onnx-importer/Op/ConvTranspose.h29
-rw-r--r--compiler/mir-onnx-importer/Op/Div.cpp38
-rw-r--r--compiler/mir-onnx-importer/Op/Div.h29
-rw-r--r--compiler/mir-onnx-importer/Op/Dropout.cpp54
-rw-r--r--compiler/mir-onnx-importer/Op/Dropout.h32
-rw-r--r--compiler/mir-onnx-importer/Op/Equal.cpp43
-rw-r--r--compiler/mir-onnx-importer/Op/Equal.h30
-rw-r--r--compiler/mir-onnx-importer/Op/Expand.cpp43
-rw-r--r--compiler/mir-onnx-importer/Op/Expand.h29
-rw-r--r--compiler/mir-onnx-importer/Op/Flatten.cpp58
-rw-r--r--compiler/mir-onnx-importer/Op/Flatten.h30
-rw-r--r--compiler/mir-onnx-importer/Op/Gather.cpp40
-rw-r--r--compiler/mir-onnx-importer/Op/Gather.h29
-rw-r--r--compiler/mir-onnx-importer/Op/Gemm.cpp120
-rw-r--r--compiler/mir-onnx-importer/Op/Gemm.h33
-rw-r--r--compiler/mir-onnx-importer/Op/GlobalAveragePool.cpp50
-rw-r--r--compiler/mir-onnx-importer/Op/GlobalAveragePool.h29
-rw-r--r--compiler/mir-onnx-importer/Op/Greater.cpp47
-rw-r--r--compiler/mir-onnx-importer/Op/Greater.h30
-rw-r--r--compiler/mir-onnx-importer/Op/Identity.cpp30
-rw-r--r--compiler/mir-onnx-importer/Op/Identity.h29
-rw-r--r--compiler/mir-onnx-importer/Op/Less.cpp47
-rw-r--r--compiler/mir-onnx-importer/Op/Less.h30
-rw-r--r--compiler/mir-onnx-importer/Op/MatMul.cpp50
-rw-r--r--compiler/mir-onnx-importer/Op/MatMul.h30
-rw-r--r--compiler/mir-onnx-importer/Op/Max.cpp54
-rw-r--r--compiler/mir-onnx-importer/Op/Max.h31
-rw-r--r--compiler/mir-onnx-importer/Op/MaxPool.cpp107
-rw-r--r--compiler/mir-onnx-importer/Op/MaxPool.h31
-rw-r--r--compiler/mir-onnx-importer/Op/Mul.cpp35
-rw-r--r--compiler/mir-onnx-importer/Op/Mul.h29
-rw-r--r--compiler/mir-onnx-importer/Op/Pad.cpp70
-rw-r--r--compiler/mir-onnx-importer/Op/Pad.h30
-rw-r--r--compiler/mir-onnx-importer/Op/Reciprocal.cpp53
-rw-r--r--compiler/mir-onnx-importer/Op/Reciprocal.h30
-rw-r--r--compiler/mir-onnx-importer/Op/ReduceMean.cpp60
-rw-r--r--compiler/mir-onnx-importer/Op/ReduceMean.h29
-rw-r--r--compiler/mir-onnx-importer/Op/Relu.cpp46
-rw-r--r--compiler/mir-onnx-importer/Op/Relu.h30
-rw-r--r--compiler/mir-onnx-importer/Op/Reshape.cpp97
-rw-r--r--compiler/mir-onnx-importer/Op/Reshape.h30
-rw-r--r--compiler/mir-onnx-importer/Op/Shape.cpp46
-rw-r--r--compiler/mir-onnx-importer/Op/Shape.h29
-rw-r--r--compiler/mir-onnx-importer/Op/Sigmoid.cpp46
-rw-r--r--compiler/mir-onnx-importer/Op/Sigmoid.h30
-rw-r--r--compiler/mir-onnx-importer/Op/Softmax.cpp40
-rw-r--r--compiler/mir-onnx-importer/Op/Softmax.h29
-rw-r--r--compiler/mir-onnx-importer/Op/Sqrt.cpp46
-rw-r--r--compiler/mir-onnx-importer/Op/Sqrt.h30
-rw-r--r--compiler/mir-onnx-importer/Op/Sub.cpp53
-rw-r--r--compiler/mir-onnx-importer/Op/Sub.h31
-rw-r--r--compiler/mir-onnx-importer/Op/Sum.cpp41
-rw-r--r--compiler/mir-onnx-importer/Op/Sum.h29
-rw-r--r--compiler/mir-onnx-importer/Op/Tanh.cpp46
-rw-r--r--compiler/mir-onnx-importer/Op/Tanh.h30
-rw-r--r--compiler/mir-onnx-importer/Op/Transpose.cpp57
-rw-r--r--compiler/mir-onnx-importer/Op/Transpose.h29
-rw-r--r--compiler/mir-onnx-importer/Op/Unsqueeze.cpp56
-rw-r--r--compiler/mir-onnx-importer/Op/Unsqueeze.h29
-rw-r--r--compiler/mir-onnx-importer/Op/Upsample.cpp124
-rw-r--r--compiler/mir-onnx-importer/Op/Upsample.h31
-rw-r--r--compiler/mir-onnx-importer/requires.cmake2
-rw-r--r--compiler/mir-tflite-importer/CMakeLists.txt22
-rw-r--r--compiler/mir-tflite-importer/requires.cmake1
-rw-r--r--compiler/mir-tflite-importer/schema/schema.fbs937
-rw-r--r--compiler/mir-tflite-importer/schema/schema.meta2
-rw-r--r--compiler/mir-tflite-importer/schema/schema_v0.fbs247
-rw-r--r--compiler/mir-tflite-importer/schema/schema_v0.meta2
-rw-r--r--compiler/mir-tflite-importer/schema/schema_v1.fbs295
-rw-r--r--compiler/mir-tflite-importer/schema/schema_v1.meta2
-rw-r--r--compiler/mir-tflite-importer/schema/schema_v2.fbs303
-rw-r--r--compiler/mir-tflite-importer/schema/schema_v2.meta2
-rw-r--r--compiler/mir-tflite-importer/schema/schema_v3.fbs326
-rw-r--r--compiler/mir-tflite-importer/schema/schema_v3.meta2
-rw-r--r--compiler/mir-tflite-importer/tflite_importer.cpp428
-rw-r--r--compiler/mir-tflite-importer/tflite_importer.h32
-rw-r--r--compiler/mir-tflite-importer/tflite_op_creator.cpp649
-rw-r--r--compiler/mir-tflite-importer/tflite_op_creator.h163
-rw-r--r--compiler/mir/CMakeLists.txt38
-rw-r--r--compiler/mir/Readme.md36
-rw-r--r--compiler/mir/include/mir/Attributes.h83
-rw-r--r--compiler/mir/include/mir/Common.h36
-rw-r--r--compiler/mir/include/mir/DataFormat.h90
-rw-r--r--compiler/mir/include/mir/DataType.h58
-rw-r--r--compiler/mir/include/mir/ExternalRegion.h44
-rw-r--r--compiler/mir/include/mir/Graph.h111
-rw-r--r--compiler/mir/include/mir/GraphPatternMatcher.h55
-rw-r--r--compiler/mir/include/mir/Index.h81
-rw-r--r--compiler/mir/include/mir/IrDotDumper.h31
-rw-r--r--compiler/mir/include/mir/OpDefs.h60
-rw-r--r--compiler/mir/include/mir/Operation.h188
-rw-r--r--compiler/mir/include/mir/Operations.inc59
-rw-r--r--compiler/mir/include/mir/Quantization.h47
-rw-r--r--compiler/mir/include/mir/Region.h37
-rw-r--r--compiler/mir/include/mir/Shape.h75
-rw-r--r--compiler/mir/include/mir/ShapeRange.h109
-rw-r--r--compiler/mir/include/mir/Tensor.h62
-rw-r--r--compiler/mir/include/mir/TensorType.h56
-rw-r--r--compiler/mir/include/mir/TensorUtil.h59
-rw-r--r--compiler/mir/include/mir/TensorVariant.h86
-rw-r--r--compiler/mir/include/mir/Visitor.h63
-rw-r--r--compiler/mir/include/mir/ops/AbsOp.h45
-rw-r--r--compiler/mir/include/mir/ops/AddOp.h41
-rw-r--r--compiler/mir/include/mir/ops/AvgPool2DOp.h68
-rw-r--r--compiler/mir/include/mir/ops/BinaryElementwiseOp.h42
-rw-r--r--compiler/mir/include/mir/ops/BroadcastOp.h47
-rw-r--r--compiler/mir/include/mir/ops/CappedReluOp.h50
-rw-r--r--compiler/mir/include/mir/ops/ConcatOp.h67
-rw-r--r--compiler/mir/include/mir/ops/ConstantOp.h52
-rw-r--r--compiler/mir/include/mir/ops/Conv2DOp.h73
-rw-r--r--compiler/mir/include/mir/ops/Deconv2DOp.h84
-rw-r--r--compiler/mir/include/mir/ops/DepthwiseConv2DOp.h72
-rw-r--r--compiler/mir/include/mir/ops/DequantizeOp.h46
-rw-r--r--compiler/mir/include/mir/ops/DivOp.h41
-rw-r--r--compiler/mir/include/mir/ops/EluOp.h49
-rw-r--r--compiler/mir/include/mir/ops/EqualOp.h44
-rw-r--r--compiler/mir/include/mir/ops/FullyConnectedOp.h58
-rw-r--r--compiler/mir/include/mir/ops/GatherOp.h57
-rw-r--r--compiler/mir/include/mir/ops/GreaterOp.h44
-rw-r--r--compiler/mir/include/mir/ops/HardSwishOp.h44
-rw-r--r--compiler/mir/include/mir/ops/InputOp.h49
-rw-r--r--compiler/mir/include/mir/ops/LeakyReluOp.h50
-rw-r--r--compiler/mir/include/mir/ops/LessOp.h44
-rw-r--r--compiler/mir/include/mir/ops/MaxOp.h41
-rw-r--r--compiler/mir/include/mir/ops/MaxPool2DOp.h66
-rw-r--r--compiler/mir/include/mir/ops/MulOp.h41
-rw-r--r--compiler/mir/include/mir/ops/OutputOp.h41
-rw-r--r--compiler/mir/include/mir/ops/PadOp.h58
-rw-r--r--compiler/mir/include/mir/ops/PaddingType.h37
-rw-r--r--compiler/mir/include/mir/ops/QuantizeOp.h44
-rw-r--r--compiler/mir/include/mir/ops/ReduceMeanOp.h45
-rw-r--r--compiler/mir/include/mir/ops/ReduceOp.h52
-rw-r--r--compiler/mir/include/mir/ops/ReluOp.h45
-rw-r--r--compiler/mir/include/mir/ops/ReshapeOp.h64
-rw-r--r--compiler/mir/include/mir/ops/ResizeOp.h87
-rw-r--r--compiler/mir/include/mir/ops/SigmoidOp.h45
-rw-r--r--compiler/mir/include/mir/ops/SliceOp.h55
-rw-r--r--compiler/mir/include/mir/ops/SoftmaxOp.h64
-rw-r--r--compiler/mir/include/mir/ops/SqrtOp.h44
-rw-r--r--compiler/mir/include/mir/ops/SqueezeOp.h56
-rw-r--r--compiler/mir/include/mir/ops/SubOp.h41
-rw-r--r--compiler/mir/include/mir/ops/TanhOp.h45
-rw-r--r--compiler/mir/include/mir/ops/TransposeOp.h54
-rw-r--r--compiler/mir/src/DotGraph.cpp41
-rw-r--r--compiler/mir/src/DotGraph.h55
-rw-r--r--compiler/mir/src/DotNodeBuilder.cpp205
-rw-r--r--compiler/mir/src/DotNodeBuilder.h70
-rw-r--r--compiler/mir/src/Graph.cpp136
-rw-r--r--compiler/mir/src/GraphPatternMatcher.cpp76
-rw-r--r--compiler/mir/src/Index.cpp50
-rw-r--r--compiler/mir/src/IrDotDumper.cpp41
-rw-r--r--compiler/mir/src/Operation.cpp87
-rw-r--r--compiler/mir/src/Shape.cpp87
-rw-r--r--compiler/mir/src/Tensor.cpp26
-rw-r--r--compiler/mir/src/TensorVariant.cpp74
-rw-r--r--compiler/mir/src/Visitor.cpp29
-rw-r--r--compiler/mir/src/ops/AvgPool2DOp.cpp60
-rw-r--r--compiler/mir/src/ops/BinaryElementwiseOp.cpp39
-rw-r--r--compiler/mir/src/ops/BroadcastOp.cpp35
-rw-r--r--compiler/mir/src/ops/ConcatOp.cpp40
-rw-r--r--compiler/mir/src/ops/Conv2DOp.cpp67
-rw-r--r--compiler/mir/src/ops/DeConv2DOp.cpp96
-rw-r--r--compiler/mir/src/ops/DepthwiseConv2DOp.cpp60
-rw-r--r--compiler/mir/src/ops/FullyConnectedOp.cpp46
-rw-r--r--compiler/mir/src/ops/GatherOp.cpp52
-rw-r--r--compiler/mir/src/ops/MaxPool2DOp.cpp60
-rw-r--r--compiler/mir/src/ops/PadOp.cpp40
-rw-r--r--compiler/mir/src/ops/ReduceOp.cpp61
-rw-r--r--compiler/mir/src/ops/SliceOp.cpp45
-rw-r--r--compiler/mir/src/ops/SqueezeOp.cpp81
-rw-r--r--compiler/mir/src/ops/TransposeOp.cpp43
-rw-r--r--compiler/mir/unittests/CMakeLists.txt16
-rw-r--r--compiler/mir/unittests/Index.cpp63
-rw-r--r--compiler/mir/unittests/NodeReplacer.cpp65
-rw-r--r--compiler/mir/unittests/Operation.cpp93
-rw-r--r--compiler/mir/unittests/ShapeInference.cpp203
-rw-r--r--compiler/mir/unittests/ShapeRange.cpp78
-rw-r--r--compiler/mir/unittests/TensorVariant.cpp39
-rw-r--r--compiler/mir2loco/CMakeLists.txt19
-rw-r--r--compiler/mir2loco/include/mir2loco.h58
-rw-r--r--compiler/mir2loco/requires.cmake2
-rw-r--r--compiler/mir2loco/src/mir2loco.cpp725
-rw-r--r--compiler/mir2loco/src/mir2loco.test.cpp736
-rw-r--r--compiler/moco-log/CMakeLists.txt9
-rw-r--r--compiler/moco-log/README.md3
-rw-r--r--compiler/moco-log/include/moco/Log.h75
-rw-r--r--compiler/moco-log/include/moco/LoggingContext.h35
-rw-r--r--compiler/moco-log/requires.cmake2
-rw-r--r--compiler/moco-log/src/Log.cpp87
-rw-r--r--compiler/moco-log/src/LoggingContext.cpp40
-rw-r--r--compiler/moco-tf/CMakeLists.txt51
-rw-r--r--compiler/moco-tf/README.md57
-rw-r--r--compiler/moco-tf/doc/Conversion.md140
-rw-r--r--compiler/moco-tf/include/moco/tf/Frontend.h54
-rw-r--r--compiler/moco-tf/requires.cmake13
-rw-r--r--compiler/moco-tf/src/BroadcastHelper.cpp226
-rw-r--r--compiler/moco-tf/src/BroadcastHelper.h76
-rw-r--r--compiler/moco-tf/src/BroadcastHelper.test.cpp88
-rw-r--r--compiler/moco-tf/src/CanonicalEltwiseInputConnector.cpp49
-rw-r--r--compiler/moco-tf/src/CanonicalEltwiseInputConnector.h60
-rw-r--r--compiler/moco-tf/src/Canonicalization/AddCanonicalizer.cpp35
-rw-r--r--compiler/moco-tf/src/Canonicalization/AddCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/AvgPoolCanonicalizer.cpp114
-rw-r--r--compiler/moco-tf/src/Canonicalization/AvgPoolCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/BiasAddCanonicalizer.cpp109
-rw-r--r--compiler/moco-tf/src/Canonicalization/BiasAddCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/ConcatV2Canonicalizer.cpp160
-rw-r--r--compiler/moco-tf/src/Canonicalization/ConcatV2Canonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/ConstCanonicalizer.cpp127
-rw-r--r--compiler/moco-tf/src/Canonicalization/ConstCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/Conv2DBackpropInputCanonicalizer.cpp371
-rw-r--r--compiler/moco-tf/src/Canonicalization/Conv2DBackpropInputCanonicalizer.h45
-rw-r--r--compiler/moco-tf/src/Canonicalization/Conv2DCanonicalizer.cpp132
-rw-r--r--compiler/moco-tf/src/Canonicalization/Conv2DCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/DepthwiseConv2dNativeCanonicalizer.cpp137
-rw-r--r--compiler/moco-tf/src/Canonicalization/DepthwiseConv2dNativeCanonicalizer.h45
-rw-r--r--compiler/moco-tf/src/Canonicalization/IdentityCanonicalizer.cpp78
-rw-r--r--compiler/moco-tf/src/Canonicalization/IdentityCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/MaxPoolCanonicalizer.cpp111
-rw-r--r--compiler/moco-tf/src/Canonicalization/MaxPoolCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/MaximumCanonicalizer.cpp34
-rw-r--r--compiler/moco-tf/src/Canonicalization/MaximumCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/MeanCanonicalizer.cpp31
-rw-r--r--compiler/moco-tf/src/Canonicalization/MeanCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/MulCanonicalizer.cpp34
-rw-r--r--compiler/moco-tf/src/Canonicalization/MulCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/PadCanonicalizer.cpp100
-rw-r--r--compiler/moco-tf/src/Canonicalization/PadCanonicalizer.h45
-rw-r--r--compiler/moco-tf/src/Canonicalization/PlaceholderCanonicalizer.cpp102
-rw-r--r--compiler/moco-tf/src/Canonicalization/PlaceholderCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/RealDivCanonicalizer.cpp34
-rw-r--r--compiler/moco-tf/src/Canonicalization/RealDivCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/Relu6Canonicalizer.cpp70
-rw-r--r--compiler/moco-tf/src/Canonicalization/Relu6Canonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/ReluCanonicalizer.cpp70
-rw-r--r--compiler/moco-tf/src/Canonicalization/ReluCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/ReshapeCanonicalizer.cpp169
-rw-r--r--compiler/moco-tf/src/Canonicalization/ReshapeCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/RsqrtCanonicalizer.cpp150
-rw-r--r--compiler/moco-tf/src/Canonicalization/RsqrtCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.cpp78
-rw-r--r--compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/SqrtCanonicalizer.cpp68
-rw-r--r--compiler/moco-tf/src/Canonicalization/SqrtCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/SqueezeCanonicalizer.cpp86
-rw-r--r--compiler/moco-tf/src/Canonicalization/SqueezeCanonicalizer.h49
-rw-r--r--compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.cpp71
-rw-r--r--compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/SubCanonicalizer.cpp34
-rw-r--r--compiler/moco-tf/src/Canonicalization/SubCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/TFPushCanonicalizer.cpp74
-rw-r--r--compiler/moco-tf/src/Canonicalization/TFPushCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalization/TanhCanonicalizer.cpp70
-rw-r--r--compiler/moco-tf/src/Canonicalization/TanhCanonicalizer.h47
-rw-r--r--compiler/moco-tf/src/Canonicalizer.cpp142
-rw-r--r--compiler/moco-tf/src/Canonicalizer.h36
-rw-r--r--compiler/moco-tf/src/Canonicalizer.test.cpp33
-rw-r--r--compiler/moco-tf/src/CodecHelper.h74
-rw-r--r--compiler/moco-tf/src/Convert.cpp34
-rw-r--r--compiler/moco-tf/src/Convert.h31
-rw-r--r--compiler/moco-tf/src/Convert.test.cpp29
-rw-r--r--compiler/moco-tf/src/Frontend.cpp277
-rw-r--r--compiler/moco-tf/src/Frontend.test.cpp84
-rw-r--r--compiler/moco-tf/src/Knob.cpp123
-rw-r--r--compiler/moco-tf/src/Knob.h47
-rw-r--r--compiler/moco-tf/src/Knob.lst39
-rw-r--r--compiler/moco-tf/src/LogHelper.cpp82
-rw-r--r--compiler/moco-tf/src/LogHelper.h73
-rw-r--r--compiler/moco-tf/src/Op/COpCall.cpp126
-rw-r--r--compiler/moco-tf/src/Op/COpCall.h46
-rw-r--r--compiler/moco-tf/src/Op/COpCall.test.cpp121
-rw-r--r--compiler/moco-tf/src/Optimizer.cpp90
-rw-r--r--compiler/moco-tf/src/Optimizer.h36
-rw-r--r--compiler/moco-tf/src/Optimizer.test.cpp87
-rw-r--r--compiler/moco-tf/src/ProgressReporter.cpp88
-rw-r--r--compiler/moco-tf/src/ProgressReporter.h56
-rw-r--r--compiler/moco-tf/src/SimpleNodeTransform.h64
-rw-r--r--compiler/moco-tf/src/SimpleNodeTransform.test.cpp56
-rw-r--r--compiler/moco-tf/src/TFEltwiseBinaryCanonicalzeHelper.h117
-rw-r--r--compiler/moco-tf/src/TFFormattedGraph.cpp400
-rw-r--r--compiler/moco-tf/src/TFFormattedGraph.h59
-rw-r--r--compiler/moco-tf/src/TFOptimizer.cpp81
-rw-r--r--compiler/moco-tf/src/TFOptimizer.h36
-rw-r--r--compiler/moco-tf/src/TFOptimizer.test.cpp33
-rw-r--r--compiler/moco-tf/src/TFReduceCanonicalzeHelper.h118
-rw-r--r--compiler/moco-tf/src/TestHelper.h113
-rw-r--r--compiler/moco-tf/src/TestHelper.test.cpp121
-rw-r--r--compiler/moco-tf/src/Transform.cpp35
-rw-r--r--compiler/moco-tf/src/Transform.h44
-rw-r--r--compiler/moco-tf/src/Transform.test.cpp46
-rw-r--r--compiler/moco-tf/src/Transforms.h26
-rw-r--r--compiler/moco-tf/src/Transforms/ShapeInferencePass.cpp56
-rw-r--r--compiler/moco-tf/src/Transforms/ShapeInferencePass.h44
-rw-r--r--compiler/moco-tf/src/Transforms/TypeInferencePass.cpp54
-rw-r--r--compiler/moco-tf/src/Transforms/TypeInferencePass.h44
-rw-r--r--compiler/moco-value-pbtxt-test/.gitignore1
-rw-r--r--compiler/moco-value-pbtxt-test/CMakeLists.txt136
-rw-r--r--compiler/moco-value-pbtxt-test/README.md1
-rw-r--r--compiler/moco-value-pbtxt-test/requires.cmake2
-rwxr-xr-xcompiler/moco-value-pbtxt-test/runall.sh96
-rw-r--r--compiler/moco-value-pbtxt-test/test.lst103
-rw-r--r--compiler/moco/CMakeLists.txt5
-rw-r--r--compiler/moco/README.md3
-rw-r--r--compiler/moco/import/CMakeLists.txt26
-rw-r--r--compiler/moco/import/README.md3
-rw-r--r--compiler/moco/import/include/moco/GraphHelper.h59
-rw-r--r--compiler/moco/import/include/moco/Import/GraphBuilder.h40
-rw-r--r--compiler/moco/import/include/moco/Import/GraphBuilderContext.h144
-rw-r--r--compiler/moco/import/include/moco/Import/GraphBuilderRegistry.h87
-rw-r--r--compiler/moco/import/include/moco/Import/ModelSignature.h80
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes.h53
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Add.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/AvgPool.h34
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/BiasAdd.h34
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Concat.h34
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Const.h34
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Conv2D.h34
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Conv2DBackpropInput.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/DepthwiseConv2dNative.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/FakeQuantWithMinMaxVars.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/FusedBatchNorm.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Identity.h34
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/MaxPool.h34
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Maximum.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Mean.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Mul.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Pack.h34
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Pad.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Placeholder.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/RealDiv.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Relu.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Relu6.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Reshape.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Rsqrt.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Shape.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Softmax.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Sqrt.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/SquaredDifference.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Squeeze.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/StopGradient.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/StridedSlice.h34
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Sub.h37
-rw-r--r--compiler/moco/import/include/moco/Import/Nodes/Tanh.h37
-rw-r--r--compiler/moco/import/include/moco/Importer.h54
-rw-r--r--compiler/moco/import/src/Convert.cpp34
-rw-r--r--compiler/moco/import/src/Convert.h31
-rw-r--r--compiler/moco/import/src/GraphBuilderContext.cpp80
-rw-r--r--compiler/moco/import/src/GraphBuilderContext.test.cpp77
-rw-r--r--compiler/moco/import/src/GraphBuilderRegistry.cpp63
-rw-r--r--compiler/moco/import/src/Importer.cpp197
-rw-r--r--compiler/moco/import/src/Importer.test.cpp223
-rw-r--r--compiler/moco/import/src/ModelSignature.cpp66
-rw-r--r--compiler/moco/import/src/Nodes/Add.cpp85
-rw-r--r--compiler/moco/import/src/Nodes/Add.test.cpp58
-rw-r--r--compiler/moco/import/src/Nodes/AvgPool.cpp140
-rw-r--r--compiler/moco/import/src/Nodes/AvgPool.test.cpp99
-rw-r--r--compiler/moco/import/src/Nodes/BiasAdd.cpp122
-rw-r--r--compiler/moco/import/src/Nodes/BiasAdd.test.cpp112
-rw-r--r--compiler/moco/import/src/Nodes/Concat.cpp109
-rw-r--r--compiler/moco/import/src/Nodes/Concat.test.cpp134
-rw-r--r--compiler/moco/import/src/Nodes/Const.cpp242
-rw-r--r--compiler/moco/import/src/Nodes/Const.test.cpp465
-rw-r--r--compiler/moco/import/src/Nodes/Conv2D.cpp139
-rw-r--r--compiler/moco/import/src/Nodes/Conv2D.test.cpp119
-rw-r--r--compiler/moco/import/src/Nodes/Conv2DBackpropInput.cpp140
-rw-r--r--compiler/moco/import/src/Nodes/Conv2DBackpropInput.test.cpp98
-rw-r--r--compiler/moco/import/src/Nodes/DepthwiseConv2dNative.cpp148
-rw-r--r--compiler/moco/import/src/Nodes/DepthwiseConv2dNative.test.cpp97
-rw-r--r--compiler/moco/import/src/Nodes/FakeQuantWithMinMaxVars.cpp123
-rw-r--r--compiler/moco/import/src/Nodes/FakeQuantWithMinMaxVars.test.cpp65
-rw-r--r--compiler/moco/import/src/Nodes/FusedBatchNorm.cpp102
-rw-r--r--compiler/moco/import/src/Nodes/FusedBatchNorm.test.cpp88
-rw-r--r--compiler/moco/import/src/Nodes/Identity.cpp95
-rw-r--r--compiler/moco/import/src/Nodes/MaxPool.cpp145
-rw-r--r--compiler/moco/import/src/Nodes/MaxPool.test.cpp98
-rw-r--r--compiler/moco/import/src/Nodes/Maximum.cpp87
-rw-r--r--compiler/moco/import/src/Nodes/Maximum.test.cpp58
-rw-r--r--compiler/moco/import/src/Nodes/Mean.cpp99
-rw-r--r--compiler/moco/import/src/Nodes/Mean.test.cpp120
-rw-r--r--compiler/moco/import/src/Nodes/Mul.cpp85
-rw-r--r--compiler/moco/import/src/Nodes/Mul.test.cpp58
-rw-r--r--compiler/moco/import/src/Nodes/Pack.cpp102
-rw-r--r--compiler/moco/import/src/Nodes/Pack.test.cpp84
-rw-r--r--compiler/moco/import/src/Nodes/Pad.cpp91
-rw-r--r--compiler/moco/import/src/Nodes/Pad.test.cpp65
-rw-r--r--compiler/moco/import/src/Nodes/Placeholder.cpp90
-rw-r--r--compiler/moco/import/src/Nodes/Placeholder.test.cpp71
-rw-r--r--compiler/moco/import/src/Nodes/RealDiv.cpp86
-rw-r--r--compiler/moco/import/src/Nodes/RealDiv.test.cpp58
-rw-r--r--compiler/moco/import/src/Nodes/Relu.cpp86
-rw-r--r--compiler/moco/import/src/Nodes/Relu.test.cpp58
-rw-r--r--compiler/moco/import/src/Nodes/Relu6.cpp80
-rw-r--r--compiler/moco/import/src/Nodes/Relu6.test.cpp58
-rw-r--r--compiler/moco/import/src/Nodes/Reshape.cpp102
-rw-r--r--compiler/moco/import/src/Nodes/Reshape.test.cpp61
-rw-r--r--compiler/moco/import/src/Nodes/Rsqrt.cpp82
-rw-r--r--compiler/moco/import/src/Nodes/Rsqrt.test.cpp57
-rw-r--r--compiler/moco/import/src/Nodes/Shape.cpp100
-rw-r--r--compiler/moco/import/src/Nodes/Shape.test.cpp65
-rw-r--r--compiler/moco/import/src/Nodes/Softmax.cpp86
-rw-r--r--compiler/moco/import/src/Nodes/Softmax.test.cpp58
-rw-r--r--compiler/moco/import/src/Nodes/Sqrt.cpp81
-rw-r--r--compiler/moco/import/src/Nodes/Sqrt.test.cpp57
-rw-r--r--compiler/moco/import/src/Nodes/SquaredDifference.cpp92
-rw-r--r--compiler/moco/import/src/Nodes/SquaredDifference.test.cpp59
-rw-r--r--compiler/moco/import/src/Nodes/Squeeze.cpp112
-rw-r--r--compiler/moco/import/src/Nodes/Squeeze.test.cpp109
-rw-r--r--compiler/moco/import/src/Nodes/StopGradient.cpp87
-rw-r--r--compiler/moco/import/src/Nodes/StopGradient.test.cpp57
-rw-r--r--compiler/moco/import/src/Nodes/StridedSlice.cpp187
-rw-r--r--compiler/moco/import/src/Nodes/StridedSlice.test.cpp107
-rw-r--r--compiler/moco/import/src/Nodes/Sub.cpp85
-rw-r--r--compiler/moco/import/src/Nodes/Sub.test.cpp58
-rw-r--r--compiler/moco/import/src/Nodes/Tanh.cpp81
-rw-r--r--compiler/moco/import/src/Nodes/Tanh.test.cpp57
-rw-r--r--compiler/moco/import/src/TestHelper.h83
-rw-r--r--compiler/moco/import/src/TestHelper.test.cpp101
-rw-r--r--compiler/moco/lang/CMakeLists.txt21
-rw-r--r--compiler/moco/lang/README.md3
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFAdd.h56
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFAvgPool.h101
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFBiasAdd.h68
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFConcatV2.h90
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFConst.h94
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFConv2D.h55
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFConv2DBackpropInput.h102
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFDepthwiseConv2dNative.h56
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFFakeQuantWithMinMaxVars.h54
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFFusedBatchNorm.h55
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFIdentity.h52
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFMaxPool.h101
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFMaximum.h56
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFMean.h71
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFMul.h56
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFPack.h86
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFPad.h61
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFPlaceholder.h90
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFPush.h84
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFRealDiv.h56
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFRelu.h52
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFRelu6.h50
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFReshape.h54
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFRsqrt.h52
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFShape.h60
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFSoftmax.h37
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFSqrt.h52
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFSquaredDifference.h56
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFSqueeze.h71
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFStopGradient.h52
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFStridedSlice.h123
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFSub.h56
-rw-r--r--compiler/moco/lang/include/moco/IR/Nodes/TFTanh.h37
-rw-r--r--compiler/moco/lang/include/moco/IR/TFDataLayout.h29
-rw-r--r--compiler/moco/lang/include/moco/IR/TFDialect.h43
-rw-r--r--compiler/moco/lang/include/moco/IR/TFNode.h23
-rw-r--r--compiler/moco/lang/include/moco/IR/TFNodeDecl.h104
-rw-r--r--compiler/moco/lang/include/moco/IR/TFNodeImpl.h68
-rw-r--r--compiler/moco/lang/include/moco/IR/TFNodeVisitor.forward.h30
-rw-r--r--compiler/moco/lang/include/moco/IR/TFNodeVisitor.h83
-rw-r--r--compiler/moco/lang/include/moco/IR/TFNodes.h55
-rw-r--r--compiler/moco/lang/include/moco/IR/TFNodes.lst48
-rw-r--r--compiler/moco/lang/include/moco/IR/TFOpcode.h35
-rw-r--r--compiler/moco/lang/include/moco/IR/TFPadding.h29
-rw-r--r--compiler/moco/lang/include/moco/IR/VariadicArityNode.h77
-rw-r--r--compiler/moco/lang/include/moco/Names.h96
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFAdd.test.cpp31
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFAvgPool.test.cpp34
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFBiasAdd.test.cpp32
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFConcatV2.test.cpp34
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFConst.cpp113
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFConst.test.cpp95
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFConv2D.test.cpp34
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFConv2DBackpropInput.test.cpp35
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFDepthwiseConv2dNative.test.cpp34
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFFakeQuantWithMinMaxVars.test.cpp34
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFFusedBatchNorm.test.cpp35
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFIdentity.test.cpp30
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFMaxPool.test.cpp34
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFMaximum.test.cpp31
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFMean.test.cpp32
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFMul.test.cpp31
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFPack.test.cpp34
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFPad.test.cpp31
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFPlaceholder.test.cpp46
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFRealDiv.test.cpp31
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFRelu.test.cpp30
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFRelu6.test.cpp30
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFReshape.test.cpp31
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFRsqrt.test.cpp30
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFShape.test.cpp31
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFSoftmax.test.cpp30
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFSqrt.test.cpp30
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFSquaredDifference.test.cpp31
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFSqueeze.test.cpp31
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFStopGradient.test.cpp30
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFStridedSlice.test.cpp38
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFSub.test.cpp31
-rw-r--r--compiler/moco/lang/src/IR/Nodes/TFTanh.test.cpp30
-rw-r--r--compiler/moco/lang/src/IR/TFDialect.cpp91
-rw-r--r--compiler/moco/lang/src/IR/TFDialect.test.cpp29
-rw-r--r--compiler/moco/lang/src/IR/TFNode.cpp137
-rw-r--r--compiler/moco/lang/src/IR/TFNode.test.cpp44
-rw-r--r--compiler/moco/lang/src/IR/VariadicArityNode.test.cpp55
-rw-r--r--compiler/moco/pass/CMakeLists.txt26
-rw-r--r--compiler/moco/pass/README.md3
-rw-r--r--compiler/moco/pass/include/moco/Pass/Passes.h32
-rw-r--r--compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldAdd.h41
-rw-r--r--compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldMul.h41
-rw-r--r--compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldPack.h43
-rw-r--r--compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldStridedSlice.h41
-rw-r--r--compiler/moco/pass/include/moco/Pass/Passes/FuseBinaryIntoPreceding.h41
-rw-r--r--compiler/moco/pass/include/moco/Pass/Passes/RemoveTFIdentityNode.h49
-rw-r--r--compiler/moco/pass/include/moco/Pass/Passes/ResolveConstantShape.h41
-rw-r--r--compiler/moco/pass/include/moco/Pass/Passes/ResolveFusedBatchNorm.h41
-rw-r--r--compiler/moco/pass/include/moco/Pass/Passes/ResolveReshapeWildcardDim.h42
-rw-r--r--compiler/moco/pass/include/moco/Pass/Passes/ResolveSquaredDifference.h41
-rw-r--r--compiler/moco/pass/include/moco/Pass/Passes/SqueezeReduceNode.h42
-rw-r--r--compiler/moco/pass/src/ConstantFoldAdd.test.cpp109
-rw-r--r--compiler/moco/pass/src/ConstantFoldHelper.cpp238
-rw-r--r--compiler/moco/pass/src/ConstantFoldHelper.h64
-rw-r--r--compiler/moco/pass/src/ConstantFoldMul.test.cpp109
-rw-r--r--compiler/moco/pass/src/ConstantFoldPack.test.cpp90
-rw-r--r--compiler/moco/pass/src/ConstantFoldStridedSlice.test.cpp268
-rw-r--r--compiler/moco/pass/src/Passes/ConstantFoldAdd.cpp116
-rw-r--r--compiler/moco/pass/src/Passes/ConstantFoldMul.cpp116
-rw-r--r--compiler/moco/pass/src/Passes/ConstantFoldPack.cpp191
-rw-r--r--compiler/moco/pass/src/Passes/ConstantFoldStridedSlice.cpp292
-rw-r--r--compiler/moco/pass/src/Passes/FuseBinaryIntoPreceding.cpp539
-rw-r--r--compiler/moco/pass/src/Passes/RemoveTFIdentityNode.cpp66
-rw-r--r--compiler/moco/pass/src/Passes/ResolveConstantShape.cpp129
-rw-r--r--compiler/moco/pass/src/Passes/ResolveFusedBatchNorm.cpp254
-rw-r--r--compiler/moco/pass/src/Passes/ResolveReshapeWildcardDim.cpp153
-rw-r--r--compiler/moco/pass/src/Passes/ResolveSquaredDifference.cpp97
-rw-r--r--compiler/moco/pass/src/Passes/SqueezeReduceNode.cpp105
-rw-r--r--compiler/moco/pass/src/TensorPackEnumerator.cpp134
-rw-r--r--compiler/moco/pass/src/TensorPackEnumerator.h66
-rw-r--r--compiler/moco/pass/src/TensorSliceEnumerator.cpp84
-rw-r--r--compiler/moco/pass/src/TensorSliceEnumerator.h62
-rw-r--r--compiler/moco/pass/src/TensorSliceEnumerator.test.cpp83
-rw-r--r--compiler/moco/pass/src/TestHelper.h67
-rw-r--r--compiler/moco/pass/src/TestHelper.test.cpp38
-rw-r--r--compiler/moco/requires.cmake8
-rw-r--r--compiler/moco/service/CMakeLists.txt24
-rw-r--r--compiler/moco/service/README.md3
-rw-r--r--compiler/moco/service/include/moco/Service/TFShapeInferenceRule.h38
-rw-r--r--compiler/moco/service/include/moco/Service/TFTypeInferenceRule.h36
-rw-r--r--compiler/moco/service/src/Service/TFShapeInferenceRule.cpp891
-rw-r--r--compiler/moco/service/src/Service/TFShapeInferenceRule.test.cpp500
-rw-r--r--compiler/moco/service/src/Service/TFTypeInferenceRule.cpp113
-rw-r--r--compiler/moco/service/src/TestHelper.h69
-rw-r--r--compiler/moco/service/src/TestHelper.test.cpp38
-rw-r--r--compiler/moco/support/CMakeLists.txt9
-rw-r--r--compiler/moco/support/README.md3
-rw-r--r--compiler/moco/support/include/moco/Support/NodeAs.h29
-rw-r--r--compiler/moco/support/include/moco/Support/TFShapeInferenceHelper.h221
-rw-r--r--compiler/moco/support/src/TFShapeInferenceHelper.cpp354
-rw-r--r--compiler/morph/CMakeLists.txt20
-rw-r--r--compiler/morph/README.md3
-rw-r--r--compiler/morph/include/morph/caffe.h38
-rw-r--r--compiler/morph/include/morph/dims.h33
-rw-r--r--compiler/morph/include/morph/nnapi.h38
-rw-r--r--compiler/morph/include/morph/tflite.h38
-rw-r--r--compiler/morph/requires.cmake1
-rw-r--r--compiler/morph/src/caffe.cpp68
-rw-r--r--compiler/morph/src/caffe.test.cpp65
-rw-r--r--compiler/morph/src/dims.cpp36
-rw-r--r--compiler/morph/src/dims.test.cpp32
-rw-r--r--compiler/morph/src/nnapi.cpp68
-rw-r--r--compiler/morph/src/nnapi.test.cpp65
-rw-r--r--compiler/morph/src/tflite.cpp68
-rw-r--r--compiler/morph/src/tflite.test.cpp65
-rw-r--r--compiler/nest/CMakeLists.txt1
-rw-r--r--compiler/nest/README.md8
-rw-r--r--compiler/nest/core/CMakeLists.txt27
-rw-r--r--compiler/nest/core/examples/conv2d.cpp152
-rw-r--r--compiler/nest/core/include/nest/Block.h46
-rw-r--r--compiler/nest/core/include/nest/Bound.h47
-rw-r--r--compiler/nest/core/include/nest/Closure.h49
-rw-r--r--compiler/nest/core/include/nest/Domain.h54
-rw-r--r--compiler/nest/core/include/nest/DomainContext.h45
-rw-r--r--compiler/nest/core/include/nest/DomainID.h54
-rw-r--r--compiler/nest/core/include/nest/DomainInfo.h48
-rw-r--r--compiler/nest/core/include/nest/Expr.h44
-rw-r--r--compiler/nest/core/include/nest/FV.h39
-rw-r--r--compiler/nest/core/include/nest/Level.h45
-rw-r--r--compiler/nest/core/include/nest/Module.h70
-rw-r--r--compiler/nest/core/include/nest/Ret.h49
-rw-r--r--compiler/nest/core/include/nest/Schedule.h52
-rw-r--r--compiler/nest/core/include/nest/Stmt.h34
-rw-r--r--compiler/nest/core/include/nest/Var.h49
-rw-r--r--compiler/nest/core/include/nest/VarContext.h46
-rw-r--r--compiler/nest/core/include/nest/VarID.h54
-rw-r--r--compiler/nest/core/include/nest/expr/AddNode.h53
-rw-r--r--compiler/nest/core/include/nest/expr/DerefNode.h54
-rw-r--r--compiler/nest/core/include/nest/expr/Forward.h34
-rw-r--r--compiler/nest/core/include/nest/expr/Macro.h28
-rw-r--r--compiler/nest/core/include/nest/expr/MulNode.h53
-rw-r--r--compiler/nest/core/include/nest/expr/Node.def9
-rw-r--r--compiler/nest/core/include/nest/expr/Node.h62
-rw-r--r--compiler/nest/core/include/nest/expr/Subscript.h53
-rw-r--r--compiler/nest/core/include/nest/expr/VarNode.h52
-rw-r--r--compiler/nest/core/include/nest/expr/Visitor.h40
-rw-r--r--compiler/nest/core/include/nest/stmt/Forward.h34
-rw-r--r--compiler/nest/core/include/nest/stmt/Macro.h28
-rw-r--r--compiler/nest/core/include/nest/stmt/Node.def6
-rw-r--r--compiler/nest/core/include/nest/stmt/Node.h62
-rw-r--r--compiler/nest/core/include/nest/stmt/PushNode.h49
-rw-r--r--compiler/nest/core/include/nest/stmt/Visitor.h40
-rw-r--r--compiler/nest/core/src/Block.test.cpp41
-rw-r--r--compiler/nest/core/src/Bound.test.cpp27
-rw-r--r--compiler/nest/core/src/Closure.cpp24
-rw-r--r--compiler/nest/core/src/Closure.test.cpp45
-rw-r--r--compiler/nest/core/src/Domain.test.cpp41
-rw-r--r--compiler/nest/core/src/DomainContext.cpp38
-rw-r--r--compiler/nest/core/src/DomainContext.test.cpp56
-rw-r--r--compiler/nest/core/src/DomainID.cpp30
-rw-r--r--compiler/nest/core/src/DomainID.test.cpp38
-rw-r--r--compiler/nest/core/src/DomainInfo.test.cpp30
-rw-r--r--compiler/nest/core/src/Expr.cpp27
-rw-r--r--compiler/nest/core/src/Expr.test.cpp58
-rw-r--r--compiler/nest/core/src/FV.cpp75
-rw-r--r--compiler/nest/core/src/FV.test.cpp75
-rw-r--r--compiler/nest/core/src/Level.cpp48
-rw-r--r--compiler/nest/core/src/Level.test.cpp39
-rw-r--r--compiler/nest/core/src/Module.cpp44
-rw-r--r--compiler/nest/core/src/Module.test.cpp99
-rw-r--r--compiler/nest/core/src/Ret.test.cpp58
-rw-r--r--compiler/nest/core/src/Schedule.cpp51
-rw-r--r--compiler/nest/core/src/Schedule.test.cpp44
-rw-r--r--compiler/nest/core/src/Var.cpp24
-rw-r--r--compiler/nest/core/src/Var.test.cpp38
-rw-r--r--compiler/nest/core/src/VarContext.cpp36
-rw-r--r--compiler/nest/core/src/VarContext.test.cpp82
-rw-r--r--compiler/nest/core/src/VarID.cpp30
-rw-r--r--compiler/nest/core/src/VarID.test.cpp38
-rw-r--r--compiler/nest/core/src/expr/AddNode.test.cpp43
-rw-r--r--compiler/nest/core/src/expr/DerefNode.test.cpp39
-rw-r--r--compiler/nest/core/src/expr/Macro.cpp21
-rw-r--r--compiler/nest/core/src/expr/MulNode.test.cpp43
-rw-r--r--compiler/nest/core/src/expr/Node.cpp21
-rw-r--r--compiler/nest/core/src/expr/Subscript.test.cpp37
-rw-r--r--compiler/nest/core/src/expr/VarNode.test.cpp47
-rw-r--r--compiler/nest/core/src/expr/Visitor.cpp21
-rw-r--r--compiler/nest/core/src/stmt/Macro.cpp21
-rw-r--r--compiler/nest/core/src/stmt/Node.cpp21
-rw-r--r--compiler/nest/core/src/stmt/PushNode.test.cpp37
-rw-r--r--compiler/nest/core/src/stmt/Visitor.cpp21
-rw-r--r--compiler/nike/CMakeLists.txt15
-rw-r--r--compiler/nike/README.md4
-rw-r--r--compiler/nike/include/nike/AbsoluteEpsilonEqual.h50
-rw-r--r--compiler/nike/include/nike/RelativeEpsilonEqual.h50
-rw-r--r--compiler/nike/src/AbsoluteEpsilonEqual.cpp42
-rw-r--r--compiler/nike/src/AbsoluteEpsilonEqual.test.cpp31
-rw-r--r--compiler/nike/src/RelativeEpsilonEqual.cpp46
-rw-r--r--compiler/nike/src/RelativeEpsilonEqual.test.cpp39
-rw-r--r--compiler/nnc/CMakeLists.txt42
-rw-r--r--compiler/nnc/README.md58
-rw-r--r--compiler/nnc/backends/CMakeLists.txt3
-rw-r--r--compiler/nnc/backends/acl_soft_backend/AclArtifactUtilities.in29
-rw-r--r--compiler/nnc/backends/acl_soft_backend/AclCppGenerator.cpp76
-rw-r--r--compiler/nnc/backends/acl_soft_backend/AclCppOpGenerator.cpp1000
-rw-r--r--compiler/nnc/backends/acl_soft_backend/AclCppOpGenerator.h387
-rw-r--r--compiler/nnc/backends/acl_soft_backend/ArtifactGeneratorCppCode.cpp302
-rw-r--r--compiler/nnc/backends/acl_soft_backend/ArtifactGeneratorCppCode.h64
-rw-r--r--compiler/nnc/backends/acl_soft_backend/ArtifactGeneratorCppDecl.cpp173
-rw-r--r--compiler/nnc/backends/acl_soft_backend/ArtifactGeneratorCppDecl.h64
-rw-r--r--compiler/nnc/backends/acl_soft_backend/ArtifactIndent.h63
-rw-r--r--compiler/nnc/backends/acl_soft_backend/ArtifactModel.cpp119
-rw-r--r--compiler/nnc/backends/acl_soft_backend/ArtifactModel.h859
-rw-r--r--compiler/nnc/backends/acl_soft_backend/CMakeLists.txt14
-rw-r--r--compiler/nnc/backends/acl_soft_backend/IArtifactGenerator.h80
-rw-r--r--compiler/nnc/backends/interpreter/CMakeLists.txt11
-rw-r--r--compiler/nnc/backends/interpreter/InterpreterBackend.cpp171
-rw-r--r--compiler/nnc/backends/soft_backend/CMakeLists.txt14
-rw-r--r--compiler/nnc/backends/soft_backend/CPPGenerator.cpp489
-rw-r--r--compiler/nnc/backends/soft_backend/CommonData.def41
-rw-r--r--compiler/nnc/backends/soft_backend/ModelAnalyzer.cpp452
-rw-r--r--compiler/nnc/backends/soft_backend/ModelAnalyzer.h207
-rw-r--r--compiler/nnc/backends/soft_backend/SBSerializer.cpp414
-rw-r--r--compiler/nnc/backends/soft_backend/SBSerializer.h134
-rw-r--r--compiler/nnc/backends/soft_backend/SequencedIR.cpp17
-rw-r--r--compiler/nnc/backends/soft_backend/SequencedIR.h144
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_broadcast.def73
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_capped_relu.def24
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_common_funcs.def750
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_concat.def45
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_conv.def237
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_conv_transpose.def111
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_depthwise_conv.def1029
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_elementwise.def273
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_elu.def29
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_fully_connected.def27
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_gather.def48
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_header_types.def234
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_leaky_relu.def25
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_operations.def656
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_pad.def96
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_pool.def116
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_reduce.def185
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_relu.def23
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_resize.def61
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_sigmoid.def24
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_slice.def56
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_softmax.def33
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_sqrt.def23
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_tanh.def21
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_transpose.def65
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/eigen.def29033
-rw-r--r--compiler/nnc/cmake/config.cmake55
-rw-r--r--compiler/nnc/cmake/utils.cmake65
-rw-r--r--compiler/nnc/doxygen.config2427
-rw-r--r--compiler/nnc/driver/Driver.cpp216
-rw-r--r--compiler/nnc/driver/Driver.h67
-rw-r--r--compiler/nnc/driver/Options.cpp137
-rw-r--r--compiler/nnc/driver/Options.h68
-rw-r--r--compiler/nnc/driver/main.cpp75
-rw-r--r--compiler/nnc/include/Definitions.h.in44
-rw-r--r--compiler/nnc/include/backends/acl_soft_backend/AclCppException.h39
-rw-r--r--compiler/nnc/include/backends/acl_soft_backend/AclCppGenerator.h50
-rw-r--r--compiler/nnc/include/backends/interpreter/InterpreterBackend.h41
-rw-r--r--compiler/nnc/include/backends/soft_backend/CPPGenerator.h162
-rw-r--r--compiler/nnc/include/pass/Pass.h53
-rw-r--r--compiler/nnc/include/pass/PassData.h100
-rw-r--r--compiler/nnc/include/pass/PassException.h44
-rw-r--r--compiler/nnc/include/pass/PassManager.h57
-rw-r--r--compiler/nnc/include/passes/dot_dumper/DumperPass.h42
-rw-r--r--compiler/nnc/include/passes/optimizations/CombineTransposes.h41
-rw-r--r--compiler/nnc/include/passes/optimizations/ConstantFoldTranspose.h39
-rw-r--r--compiler/nnc/include/passes/optimizations/DeadCodeElimination.h40
-rw-r--r--compiler/nnc/include/passes/optimizations/FuseArithmeticOps.h44
-rw-r--r--compiler/nnc/include/passes/optimizations/OptimizationUtils.h40
-rw-r--r--compiler/nnc/include/passes/optimizations/SinkRelu.h39
-rw-r--r--compiler/nnc/include/passes/optimizations/SinkTranspose.h40
-rw-r--r--compiler/nnc/include/passes/transformations/DataFormatSwitcher.h62
-rw-r--r--compiler/nnc/include/passes/transformations/LowerConv2D.h41
-rw-r--r--compiler/nnc/include/support/CommandLine.h556
-rw-r--r--compiler/nnc/pass/CMakeLists.txt5
-rw-r--r--compiler/nnc/pass/PassManager.cpp36
-rw-r--r--compiler/nnc/passes/CMakeLists.txt12
-rw-r--r--compiler/nnc/passes/dot_dumper/CMakeLists.txt6
-rw-r--r--compiler/nnc/passes/dot_dumper/DumperPass.cpp38
-rw-r--r--compiler/nnc/passes/optimizations/CMakeLists.txt13
-rw-r--r--compiler/nnc/passes/optimizations/CombineTransposes.cpp102
-rw-r--r--compiler/nnc/passes/optimizations/ConstantFoldTranspose.cpp87
-rw-r--r--compiler/nnc/passes/optimizations/DeadCodeElimination.cpp48
-rw-r--r--compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp250
-rw-r--r--compiler/nnc/passes/optimizations/OptimizationUtils.cpp55
-rw-r--r--compiler/nnc/passes/optimizations/SinkRelu.cpp77
-rw-r--r--compiler/nnc/passes/optimizations/SinkTranspose.cpp82
-rw-r--r--compiler/nnc/passes/transformations/CMakeLists.txt7
-rw-r--r--compiler/nnc/passes/transformations/DataFormatSwitcher.cpp248
-rw-r--r--compiler/nnc/passes/transformations/LowerConv2D.cpp75
-rw-r--r--compiler/nnc/requires.cmake7
-rw-r--r--compiler/nnc/support/CLOptionChecker.cpp74
-rw-r--r--compiler/nnc/support/CMakeLists.txt7
-rw-r--r--compiler/nnc/support/CommandLine.cpp637
-rw-r--r--compiler/nnc/tests/CMakeLists.txt3
-rw-r--r--compiler/nnc/tests/acl_soft_backend/AclCppOperations.cpp193
-rw-r--r--compiler/nnc/tests/acl_soft_backend/BuildInfo.h.in22
-rw-r--r--compiler/nnc/tests/acl_soft_backend/CMakeLists.txt125
-rw-r--r--compiler/nnc/tests/acl_soft_backend/artifact_cmake/CMakeLists.txt21
-rw-r--r--compiler/nnc/tests/acl_soft_backend/artifact_cmake/main.cpp139
-rw-r--r--compiler/nnc/tests/acl_soft_backend/artifact_cmake/odroid.cmake17
-rw-r--r--compiler/nnc/tests/acl_soft_backend/models/concatenate.prototxt23
-rw-r--r--compiler/nnc/tests/acl_soft_backend/models/convolution.prototxt34
-rw-r--r--compiler/nnc/tests/acl_soft_backend/models/convolution_with_bias.prototxt38
-rw-r--r--compiler/nnc/tests/acl_soft_backend/models/depthwise_convolution.prototxt35
-rw-r--r--compiler/nnc/tests/acl_soft_backend/models/fully_connected.prototxt29
-rw-r--r--compiler/nnc/tests/acl_soft_backend/models/pooling_avg.prototxt26
-rw-r--r--compiler/nnc/tests/acl_soft_backend/models/pooling_max.prototxt26
-rw-r--r--compiler/nnc/tests/acl_soft_backend/models/relu.prototxt20
-rw-r--r--compiler/nnc/tests/acl_soft_backend/models/reshape.prototxt26
-rw-r--r--compiler/nnc/tests/acl_soft_backend/models/scale.prototxt30
-rw-r--r--compiler/nnc/tests/import/CMakeLists.txt20
-rw-r--r--compiler/nnc/tests/import/caffe.cpp41
-rw-r--r--compiler/nnc/tests/import/tflite.cpp41
-rw-r--r--compiler/nnc/tests/soft_backend/CMakeLists.txt9
-rw-r--r--compiler/nnc/tests/soft_backend/CompileCPP.cpp120
-rw-r--r--compiler/nnc/tests/soft_backend/test_main.def10
-rw-r--r--compiler/nnc/unittests/CMakeLists.txt10
-rw-r--r--compiler/nnc/unittests/acl_backend/CMakeLists.txt9
-rw-r--r--compiler/nnc/unittests/acl_backend/DOMToText.cpp497
-rw-r--r--compiler/nnc/unittests/acl_backend/MIRToDOM.cpp539
-rw-r--r--compiler/nnc/unittests/caffe_frontend/test_data/unsupported.caffemodelbin0 -> 803260 bytes
-rw-r--r--compiler/nnc/unittests/optimizations/CMakeLists.txt7
-rw-r--r--compiler/nnc/unittests/optimizations/CombineTransposes.cpp150
-rw-r--r--compiler/nnc/unittests/optimizations/DeadCodeElimination.cpp89
-rw-r--r--compiler/nnc/unittests/optimizations/FuseArithmeticOps.cpp70
-rw-r--r--compiler/nnc/unittests/optimizations/SinkTest.cpp195
-rw-r--r--compiler/nnc/unittests/optimizations/Util.h80
-rw-r--r--compiler/nnc/unittests/pass/CMakeLists.txt4
-rw-r--r--compiler/nnc/unittests/pass/PassExceptionTest.cpp58
-rw-r--r--compiler/nnc/unittests/pass/PassManagerTest.cpp70
-rw-r--r--compiler/nnc/unittests/soft_backend/CMakeLists.txt7
-rw-r--r--compiler/nnc/unittests/soft_backend/CPPHeaderTypes.cpp125
-rw-r--r--compiler/nnc/unittests/soft_backend/CPPOperations.cpp1007
-rw-r--r--compiler/nnc/unittests/soft_backend/Generator.cpp112
-rw-r--r--compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp83
-rw-r--r--compiler/nnc/unittests/support/CMakeLists.txt4
-rw-r--r--compiler/nnc/unittests/support/CommandLineTest.cpp271
-rw-r--r--compiler/nnc/unittests/transformations/CMakeLists.txt4
-rw-r--r--compiler/nnc/unittests/transformations/Switcher.cpp272
-rw-r--r--compiler/nnc/utils/CMakeLists.txt7
-rw-r--r--compiler/nnc/utils/caffe2_dot_dumper/CMakeLists.txt6
-rw-r--r--compiler/nnc/utils/caffe2_dot_dumper/model_dump.cpp51
-rw-r--r--compiler/nnc/utils/caffe_dot_dumper/CMakeLists.txt6
-rw-r--r--compiler/nnc/utils/caffe_dot_dumper/model_dump.cpp45
-rwxr-xr-xcompiler/nnc/utils/caffe_model_maker/AllFill.sh48
-rwxr-xr-xcompiler/nnc/utils/caffe_model_maker/Filler.sh28
-rwxr-xr-xcompiler/nnc/utils/caffe_model_maker/GenerateCaffeModels.py722
-rwxr-xr-xcompiler/nnc/utils/caffe_model_maker/Pyloss.py83
-rw-r--r--compiler/nnc/utils/caffe_model_maker/README.md22
-rw-r--r--compiler/nnc/utils/def2src.cpp105
-rw-r--r--compiler/nnc/utils/infer_tests/README.md9
-rwxr-xr-xcompiler/nnc/utils/infer_tests/infer_testcases.py154
-rwxr-xr-xcompiler/nnc/utils/infer_tests/res2bin.py33
-rw-r--r--compiler/nnc/utils/input_gen/CMakeLists.txt9
-rw-r--r--compiler/nnc/utils/input_gen/tensor_gen.cpp215
-rwxr-xr-xcompiler/nnc/utils/model_runner/common_place.py70
-rwxr-xr-xcompiler/nnc/utils/model_runner/model_runner_caffe.py22
-rwxr-xr-xcompiler/nnc/utils/model_runner/model_runner_caffe2.py23
-rwxr-xr-xcompiler/nnc/utils/model_runner/model_runner_onnx.py27
-rwxr-xr-xcompiler/nnc/utils/model_runner/model_runner_tflite.py25
-rw-r--r--compiler/nnc/utils/model_runner/readme.md39
-rw-r--r--compiler/nnc/utils/prepare_inputs/README.md8
-rwxr-xr-xcompiler/nnc/utils/prepare_inputs/jpeg2hdf5.py170
-rw-r--r--compiler/nnc/utils/tflite_dot_dumper/CMakeLists.txt7
-rw-r--r--compiler/nnc/utils/tflite_dot_dumper/model_dump.cpp45
-rw-r--r--compiler/nnkit-caffe/CMakeLists.txt8
-rw-r--r--compiler/nnkit-caffe/backend/CMakeLists.txt3
-rw-r--r--compiler/nnkit-caffe/backend/Module.cpp34
-rw-r--r--compiler/nnkit-caffe/requires.cmake1
-rw-r--r--compiler/nnkit-caffe/support/CMakeLists.txt4
-rw-r--r--compiler/nnkit-caffe/support/include/nnkit/support/caffe/Backend.h73
-rw-r--r--compiler/nnkit-caffe/support/include/nnkit/support/caffe/BlobContext.h45
-rw-r--r--compiler/nnkit-caffe/support/include/nnkit/support/caffe/InputBlobContext.h57
-rw-r--r--compiler/nnkit-caffe/support/include/nnkit/support/caffe/OutputBlobContext.h57
-rw-r--r--compiler/nnkit-caffe/support/include/nnkit/support/caffe/TensorContext.h114
-rw-r--r--compiler/nnkit-intf/CMakeLists.txt4
-rw-r--r--compiler/nnkit-intf/README.md3
-rw-r--r--compiler/nnkit-intf/action/CMakeLists.txt4
-rw-r--r--compiler/nnkit-intf/action/include/nnkit/Action.h34
-rw-r--r--compiler/nnkit-intf/backend/CMakeLists.txt4
-rw-r--r--compiler/nnkit-intf/backend/include/nnkit/Backend.h38
-rw-r--r--compiler/nnkit-intf/cmdline/CMakeLists.txt2
-rw-r--r--compiler/nnkit-intf/cmdline/include/nnkit/CmdlineArguments.h36
-rw-r--r--compiler/nnkit-intf/tensor/CMakeLists.txt3
-rw-r--r--compiler/nnkit-intf/tensor/include/nnkit/TensorContext.h91
-rw-r--r--compiler/nnkit-misc/CMakeLists.txt1
-rw-r--r--compiler/nnkit-misc/README.md3
-rw-r--r--compiler/nnkit-misc/backend/CMakeLists.txt14
-rw-r--r--compiler/nnkit-misc/backend/include/nnkit/BackendPlugin.h60
-rw-r--r--compiler/nnkit-misc/backend/src/BackendPlugin.cpp88
-rw-r--r--compiler/nnkit-misc/cmdline/CMakeLists.txt5
-rw-r--r--compiler/nnkit-misc/cmdline/include/nnkit/VectorArguments.h43
-rw-r--r--compiler/nnkit-misc/cmdline/src/VectorArguments.cpp28
-rw-r--r--compiler/nnkit-mocotf/CMakeLists.txt6
-rw-r--r--compiler/nnkit-mocotf/backend/Backend.cpp31
-rw-r--r--compiler/nnkit-mocotf/backend/CMakeLists.txt3
-rw-r--r--compiler/nnkit-mocotf/requires.cmake7
-rw-r--r--compiler/nnkit-mocotf/support/CMakeLists.txt13
-rw-r--r--compiler/nnkit-mocotf/support/include/nnkit/support/moco/tf/Backend.h68
-rw-r--r--compiler/nnkit-mocotf/support/src/Backend.cpp162
-rw-r--r--compiler/nnkit-mocotf/support/src/InputTensorContext.cpp49
-rw-r--r--compiler/nnkit-mocotf/support/src/InputTensorContext.h71
-rw-r--r--compiler/nnkit-mocotf/support/src/OutputTensorContext.cpp47
-rw-r--r--compiler/nnkit-mocotf/support/src/OutputTensorContext.h72
-rw-r--r--compiler/nnkit-mocotf/support/src/TensorContext.h86
-rw-r--r--compiler/nnkit-onnxrt/CMakeLists.txt8
-rw-r--r--compiler/nnkit-onnxrt/backend/Backend.cpp29
-rw-r--r--compiler/nnkit-onnxrt/backend/CMakeLists.txt3
-rw-r--r--compiler/nnkit-onnxrt/requires.cmake2
-rw-r--r--compiler/nnkit-onnxrt/support/CMakeLists.txt10
-rw-r--r--compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/Allocator.h56
-rw-r--r--compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/Backend.h53
-rw-r--r--compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/Runner.h67
-rw-r--r--compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/Status.h85
-rw-r--r--compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/TensorContext.h118
-rw-r--r--compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/TensorSet.h97
-rw-r--r--compiler/nnkit-onnxrt/support/src/Allocator.cpp88
-rw-r--r--compiler/nnkit-onnxrt/support/src/Backend.cpp47
-rw-r--r--compiler/nnkit-onnxrt/support/src/Runner.cpp183
-rw-r--r--compiler/nnkit-tf/CMakeLists.txt8
-rw-r--r--compiler/nnkit-tf/backend/Backend.cpp31
-rw-r--r--compiler/nnkit-tf/backend/CMakeLists.txt3
-rw-r--r--compiler/nnkit-tf/requires.cmake3
-rw-r--r--compiler/nnkit-tf/support/CMakeLists.txt9
-rw-r--r--compiler/nnkit-tf/support/include/nnkit/support/tf/Backend.h67
-rw-r--r--compiler/nnkit-tf/support/include/nnkit/support/tf/Runner.h105
-rw-r--r--compiler/nnkit-tf/support/include/nnkit/support/tf/TensorContext.h80
-rw-r--r--compiler/nnkit-tf/support/include/nnkit/support/tf/TensorDataMap.h81
-rw-r--r--compiler/nnkit-tf/support/src/Backend.cpp113
-rw-r--r--compiler/nnkit-tf/support/src/Runner.cpp323
-rw-r--r--compiler/nnkit-tf/support/src/TensorContext.cpp60
-rw-r--r--compiler/nnkit-tflite/CMakeLists.txt8
-rw-r--r--compiler/nnkit-tflite/backend/Backend.cpp62
-rw-r--r--compiler/nnkit-tflite/backend/CMakeLists.txt7
-rw-r--r--compiler/nnkit-tflite/requires.cmake2
-rw-r--r--compiler/nnkit-tflite/support/CMakeLists.txt10
-rw-r--r--compiler/nnkit-tflite/support/include/nnkit/support/tflite/AbstractBackend.h48
-rw-r--r--compiler/nnkit-tflite/support/include/nnkit/support/tflite/TensorContext.h63
-rw-r--r--compiler/nnkit-tflite/support/include/nnkit/support/tflite/TensorSet.h44
-rw-r--r--compiler/nnkit-tflite/support/include/nnkit/support/tflite/TensorSets.h71
-rw-r--r--compiler/nnkit-tflite/support/include/nnkit/support/tflite/TensorUtils.h36
-rw-r--r--compiler/nnkit-tflite/support/src/Backend.cpp52
-rw-r--r--compiler/nnkit-tflite/support/src/TensorContext.cpp65
-rw-r--r--compiler/nnkit-tflite/support/src/TensorUtils.cpp43
-rw-r--r--compiler/nnkit/CMakeLists.txt2
-rw-r--r--compiler/nnkit/README.md179
-rw-r--r--compiler/nnkit/actions/CMakeLists.txt1
-rw-r--r--compiler/nnkit/actions/HDF5/CMakeLists.txt21
-rw-r--r--compiler/nnkit/actions/HDF5/Common.cpp38
-rw-r--r--compiler/nnkit/actions/HDF5/Common.h60
-rw-r--r--compiler/nnkit/actions/HDF5/Export.cpp109
-rw-r--r--compiler/nnkit/actions/HDF5/Import.cpp100
-rw-r--r--compiler/nnkit/actions/builtin/CMakeLists.txt7
-rw-r--r--compiler/nnkit/actions/builtin/Randomize.cpp60
-rw-r--r--compiler/nnkit/actions/builtin/Show.cpp71
-rw-r--r--compiler/nnkit/requires.cmake8
-rw-r--r--compiler/nnkit/tools/CMakeLists.txt1
-rw-r--r--compiler/nnkit/tools/benchmark/CMakeLists.txt14
-rw-r--r--compiler/nnkit/tools/benchmark/src/Benchmark.cpp207
-rw-r--r--compiler/nnkit/tools/run/CMakeLists.txt22
-rw-r--r--compiler/nnkit/tools/run/nnkit-run.cpp266
-rw-r--r--compiler/nnop/CMakeLists.txt16
-rw-r--r--compiler/nnop/include/nnop/Conv2D.h87
-rw-r--r--compiler/nnop/include/nnop/PadInfo.h49
-rw-r--r--compiler/nnop/include/nnop/StrideInfo.h44
-rw-r--r--compiler/nnop/requires.cmake1
-rw-r--r--compiler/nnop/src/Conv2D.test.cpp50
-rw-r--r--compiler/nnop/src/PadInfo.test.cpp34
-rw-r--r--compiler/nnop/src/StrideInfo.test.cpp30
-rw-r--r--compiler/nnsuite/CMakeLists.txt1
-rw-r--r--compiler/nnsuite/conv/CMakeLists.txt3
-rw-r--r--compiler/nnsuite/conv/model/CMakeLists.txt6
-rw-r--r--compiler/nnsuite/conv/model/include/nnsuite/conv/Model.h53
-rw-r--r--compiler/nnsuite/conv/model/include/nnsuite/conv/RandomModel.h70
-rw-r--r--compiler/nnsuite/conv/model/src/RandomModel.cpp58
-rw-r--r--compiler/nnsuite/conv/nnkit-caffe/CMakeLists.txt24
-rw-r--r--compiler/nnsuite/conv/nnkit-caffe/ConvBackend.cpp97
-rw-r--r--compiler/nnsuite/conv/nnkit-caffe/ConvBackend.h30
-rw-r--r--compiler/nnsuite/conv/nnkit-caffe/ConvBackend.test.cpp126
-rw-r--r--compiler/nnsuite/conv/nnkit-caffe/Entry.cpp40
-rw-r--r--compiler/nnsuite/conv/nnkit-tflite/CMakeLists.txt23
-rw-r--r--compiler/nnsuite/conv/nnkit-tflite/ConvBackend.cpp145
-rw-r--r--compiler/nnsuite/conv/nnkit-tflite/ConvBackend.h51
-rw-r--r--compiler/nnsuite/conv/nnkit-tflite/ConvBackend.test.cpp129
-rw-r--r--compiler/nnsuite/conv/nnkit-tflite/Entry.cpp44
-rw-r--r--compiler/nnsuite/requires.cmake1
-rw-r--r--compiler/oneco-value-pbtxt-test/CMakeLists.txt53
-rw-r--r--compiler/oneco-value-pbtxt-test/Const_000/test.pbtxt52
-rw-r--r--compiler/oneco-value-pbtxt-test/Identity_000/test.pbtxt66
-rw-r--r--compiler/oneco-value-pbtxt-test/requires.cmake1
-rw-r--r--compiler/oneco/CMakeLists.txt36
-rw-r--r--compiler/oneco/include/moco/onnx/Frontend.h48
-rw-r--r--compiler/oneco/proto/CMakeLists.txt13
-rw-r--r--compiler/oneco/requires.cmake3
-rw-r--r--compiler/oneco/src/Convert.cpp113
-rw-r--r--compiler/oneco/src/Convert.h38
-rw-r--r--compiler/oneco/src/Frontend.cpp275
-rw-r--r--compiler/oneco/src/Frontend.test.cpp21
-rw-r--r--compiler/oneco/src/GraphBuilder.h46
-rw-r--r--compiler/oneco/src/GraphBuilderContext.cpp92
-rw-r--r--compiler/oneco/src/GraphBuilderContext.h100
-rw-r--r--compiler/oneco/src/GraphBuilderRegistry.h78
-rw-r--r--compiler/oneco/src/Onnxutil.cpp109
-rw-r--r--compiler/oneco/src/Onnxutil.h46
-rw-r--r--compiler/oneco/src/Op/Constant.cpp52
-rw-r--r--compiler/oneco/src/Op/Constant.h59
-rw-r--r--compiler/oneco/src/Op/Constant_V1.cpp71
-rw-r--r--compiler/oneco/src/Op/Constant_V9.cpp67
-rw-r--r--compiler/oneco/src/Op/Identity.cpp48
-rw-r--r--compiler/oneco/src/Op/Identity.h47
-rw-r--r--compiler/oneco/src/Op/Identity_V1.cpp51
-rw-r--r--compiler/onnx2circle/CMakeLists.txt27
-rw-r--r--compiler/onnx2circle/README.md3
-rw-r--r--compiler/onnx2circle/requires.cmake9
-rw-r--r--compiler/onnx2circle/src/onnx2circle.cpp111
-rw-r--r--compiler/onnx2tflite-integration-test/CMakeLists.txt120
-rw-r--r--compiler/onnx2tflite-integration-test/requires.cmake6
-rw-r--r--compiler/onnx2tflite-integration-test/test.lst5
-rwxr-xr-xcompiler/onnx2tflite-integration-test/testall.sh112
-rw-r--r--compiler/onnx2tflite/CMakeLists.txt8
-rw-r--r--compiler/onnx2tflite/requires.cmake3
-rw-r--r--compiler/onnx2tflite/src/Driver.cpp83
-rw-r--r--compiler/onnxkit/CMakeLists.txt30
-rw-r--r--compiler/onnxkit/README.md61
-rw-r--r--compiler/onnxkit/src/DecodeCommand.cpp47
-rw-r--r--compiler/onnxkit/src/DecodeCommand.hpp27
-rw-r--r--compiler/onnxkit/src/EncodeCommand.cpp52
-rw-r--r--compiler/onnxkit/src/EncodeCommand.hpp27
-rw-r--r--compiler/onnxkit/src/Main.cpp31
-rw-r--r--compiler/onnxkit/src/Support.cpp73
-rw-r--r--compiler/onnxkit/src/Support.hpp59
-rw-r--r--compiler/oops/CMakeLists.txt12
-rw-r--r--compiler/oops/include/oops/InternalExn.h80
-rw-r--r--compiler/oops/include/oops/UserExn.h89
-rw-r--r--compiler/oops/test.cpp94
-rw-r--r--compiler/pepper-assert/CMakeLists.txt2
-rw-r--r--compiler/pepper-assert/include/pepper/assert.h38
-rw-r--r--compiler/pepper-env/CMakeLists.txt19
-rw-r--r--compiler/pepper-env/README.md3
-rw-r--r--compiler/pepper-env/include/pepper/env.h101
-rw-r--r--compiler/pepper-env/src/env.cpp47
-rw-r--r--compiler/pepper-env/src/env.test.cpp40
-rw-r--r--compiler/pepper-str/CMakeLists.txt12
-rw-r--r--compiler/pepper-str/README.md15
-rw-r--r--compiler/pepper-str/include/pepper/str.h65
-rw-r--r--compiler/pepper-str/test.cpp51
-rw-r--r--compiler/pepper-strcast/CMakeLists.txt19
-rw-r--r--compiler/pepper-strcast/README.md3
-rw-r--r--compiler/pepper-strcast/include/pepper/strcast.h34
-rw-r--r--compiler/pepper-strcast/src/strcast.cpp29
-rw-r--r--compiler/pepper-strcast/src/strcast.test.cpp26
-rw-r--r--compiler/plier-tf/CMakeLists.txt28
-rw-r--r--compiler/plier-tf/README.md3
-rw-r--r--compiler/plier-tf/include/plier/tf/Convert.h78
-rw-r--r--compiler/plier-tf/include/plier/tf/TestHelper.h38
-rw-r--r--compiler/plier-tf/requires.cmake3
-rw-r--r--compiler/plier-tf/src/Convert.cpp198
-rw-r--r--compiler/plier-tf/src/Convert.test.cpp115
-rw-r--r--compiler/plier-tf/src/TestHelper.cpp70
-rw-r--r--compiler/pp/CMakeLists.txt20
-rw-r--r--compiler/pp/README.md35
-rw-r--r--compiler/pp/include/pp/EnclosedDocument.h53
-rw-r--r--compiler/pp/include/pp/Format.h43
-rw-r--r--compiler/pp/include/pp/IndentedStringBuilder.h52
-rw-r--r--compiler/pp/include/pp/LinearDocument.h92
-rw-r--r--compiler/pp/include/pp/MultiLineText.h37
-rw-r--r--compiler/pp/include/pp/MultiLineTextUtils.h26
-rw-r--r--compiler/pp/src/EnclosedDocument.cpp34
-rw-r--r--compiler/pp/src/EnclosedDocument.test.cpp47
-rw-r--r--compiler/pp/src/Format.test.cpp30
-rw-r--r--compiler/pp/src/IndentedStringBuilder.cpp48
-rw-r--r--compiler/pp/src/IndentedStringBuilder.test.cpp30
-rw-r--r--compiler/pp/src/LinearDocument.cpp71
-rw-r--r--compiler/pp/src/LinearDocument.test.cpp160
-rw-r--r--compiler/pp/src/MultiLineTextUtils.cpp32
-rw-r--r--compiler/pp/src/MultiLineTextUtils.test.cpp49
-rw-r--r--compiler/safemain/CMakeLists.txt2
-rw-r--r--compiler/safemain/SafeMain.cpp43
-rw-r--r--compiler/stdex/CMakeLists.txt16
-rw-r--r--compiler/stdex/README.md22
-rw-r--r--compiler/stdex/include/stdex/Memory.h29
-rw-r--r--compiler/stdex/include/stdex/Queue.h38
-rw-r--r--compiler/stdex/include/stdex/Set.h55
-rw-r--r--compiler/stdex/src/Memory.test.cpp60
-rw-r--r--compiler/stdex/src/Queue.test.cpp32
-rw-r--r--compiler/stdex/src/Set.test.cpp37
-rw-r--r--compiler/tf2circle-conversion-test/.gitignore1
-rw-r--r--compiler/tf2circle-conversion-test/CMakeLists.txt138
-rw-r--r--compiler/tf2circle-conversion-test/README.md3
-rw-r--r--compiler/tf2circle-conversion-test/requires.cmake2
-rw-r--r--compiler/tf2circle-conversion-test/test.lst103
-rwxr-xr-xcompiler/tf2circle-conversion-test/testall.sh90
-rw-r--r--compiler/tf2circle-dredd-pb-test/.gitignore1
-rw-r--r--compiler/tf2circle-dredd-pb-test/CMakeLists.txt141
-rw-r--r--compiler/tf2circle-dredd-pb-test/README.md3
-rw-r--r--compiler/tf2circle-dredd-pb-test/contrib/.gitignore3
-rw-r--r--compiler/tf2circle-dredd-pb-test/requires.cmake4
-rwxr-xr-xcompiler/tf2circle-dredd-pb-test/runner.sh121
-rw-r--r--compiler/tf2circle-dredd-pbtxt-test/.gitignore1
-rw-r--r--compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt184
-rw-r--r--compiler/tf2circle-dredd-pbtxt-test/README.md3
-rw-r--r--compiler/tf2circle-dredd-pbtxt-test/requires.cmake5
-rwxr-xr-xcompiler/tf2circle-dredd-pbtxt-test/runner.sh121
-rw-r--r--compiler/tf2circle-dredd-pbtxt-test/test.lst4
-rw-r--r--compiler/tf2circle-model-test/.gitignore1
-rw-r--r--compiler/tf2circle-model-test/CMakeLists.txt110
-rw-r--r--compiler/tf2circle-model-test/README.md1
-rw-r--r--compiler/tf2circle-model-test/contrib/.gitignore3
-rw-r--r--compiler/tf2circle-model-test/requires.cmake2
-rwxr-xr-xcompiler/tf2circle-model-test/runner.sh83
-rw-r--r--compiler/tf2circle-ui-check/.gitignore1
-rw-r--r--compiler/tf2circle-ui-check/CMakeLists.txt44
-rw-r--r--compiler/tf2circle-ui-check/README.md21
-rwxr-xr-xcompiler/tf2circle-ui-check/checkall.sh57
-rw-r--r--compiler/tf2circle-ui-check/requires.cmake2
-rw-r--r--compiler/tf2circle-value-pbtxt-remote-test/.gitignore1
-rw-r--r--compiler/tf2circle-value-pbtxt-remote-test/CMakeLists.txt170
-rw-r--r--compiler/tf2circle-value-pbtxt-remote-test/README.md138
-rw-r--r--compiler/tf2circle-value-pbtxt-remote-test/requires.cmake3
-rwxr-xr-xcompiler/tf2circle-value-pbtxt-remote-test/testall.sh161
-rw-r--r--compiler/tf2circle/CMakeLists.txt47
-rw-r--r--compiler/tf2circle/README.md3
-rw-r--r--compiler/tf2circle/proto/CustomOpInfo.proto57
-rw-r--r--compiler/tf2circle/requires.cmake8
-rw-r--r--compiler/tf2circle/src/CustomopConfLoader.cpp138
-rw-r--r--compiler/tf2circle/src/CustomopConfLoader.h32
-rw-r--r--compiler/tf2circle/src/tf2circle.cpp225
-rw-r--r--compiler/tf2nnpkg/CMakeLists.txt35
-rw-r--r--compiler/tf2nnpkg/requires.cmake8
-rw-r--r--compiler/tf2nnpkg/src/filesystem.h43
-rw-r--r--compiler/tf2nnpkg/src/filesystem_common.cpp44
-rw-r--r--compiler/tf2nnpkg/src/filesystem_linux.cpp46
-rw-r--r--compiler/tf2nnpkg/src/filesystem_windows.cpp55
-rw-r--r--compiler/tf2nnpkg/src/tf2nnpkg.cpp300
-rw-r--r--compiler/tf2tflite-dredd-pb-test/.gitignore1
-rw-r--r--compiler/tf2tflite-dredd-pb-test/CMakeLists.txt141
-rw-r--r--compiler/tf2tflite-dredd-pb-test/README.md6
-rw-r--r--compiler/tf2tflite-dredd-pb-test/contrib/.gitignore3
-rw-r--r--compiler/tf2tflite-dredd-pb-test/requires.cmake4
-rwxr-xr-xcompiler/tf2tflite-dredd-pb-test/runner.sh121
-rw-r--r--compiler/tf2tflite-dredd-pbtxt-test/.gitignore1
-rw-r--r--compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt184
-rw-r--r--compiler/tf2tflite-dredd-pbtxt-test/requires.cmake5
-rwxr-xr-xcompiler/tf2tflite-dredd-pbtxt-test/runner.sh121
-rw-r--r--compiler/tf2tflite-dredd-pbtxt-test/test.lst1
-rw-r--r--compiler/tf2tflite-value-pb-test/.gitignore1
-rw-r--r--compiler/tf2tflite-value-pb-test/CMakeLists.txt131
-rw-r--r--compiler/tf2tflite-value-pb-test/README.md1
-rw-r--r--compiler/tf2tflite-value-pb-test/contrib/.gitignore3
-rw-r--r--compiler/tf2tflite-value-pb-test/requires.cmake6
-rwxr-xr-xcompiler/tf2tflite-value-pb-test/runner.sh112
-rw-r--r--compiler/tf2tflite-value-pbtxt-test/.gitignore1
-rw-r--r--compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt159
-rw-r--r--compiler/tf2tflite-value-pbtxt-test/README.md3
-rw-r--r--compiler/tf2tflite-value-pbtxt-test/requires.cmake4
-rw-r--r--compiler/tf2tflite-value-pbtxt-test/test.lst101
-rwxr-xr-xcompiler/tf2tflite-value-pbtxt-test/testall.sh106
-rw-r--r--compiler/tf2tflite/.gitignore1
-rw-r--r--compiler/tf2tflite/CMakeLists.txt44
-rw-r--r--compiler/tf2tflite/README.md3
-rw-r--r--compiler/tf2tflite/proto/CustomOpInfo.proto57
-rw-r--r--compiler/tf2tflite/requires.cmake8
-rw-r--r--compiler/tf2tflite/src/CustomopConfLoader.cpp137
-rw-r--r--compiler/tf2tflite/src/CustomopConfLoader.h32
-rw-r--r--compiler/tf2tflite/src/Driver.cpp167
-rw-r--r--compiler/tf2tfliteV2-value-pbtxt-test/CMakeLists.txt183
-rw-r--r--compiler/tf2tfliteV2-value-pbtxt-test/requirements.txt2
-rw-r--r--compiler/tf2tfliteV2-value-pbtxt-test/requires.cmake4
-rw-r--r--compiler/tf2tfliteV2-value-pbtxt-test/test.lst101
-rwxr-xr-xcompiler/tf2tfliteV2-value-pbtxt-test/testall.sh110
-rw-r--r--compiler/tf2tfliteV2/CMakeLists.txt11
-rw-r--r--compiler/tf2tfliteV2/README.md47
-rwxr-xr-xcompiler/tf2tfliteV2/tf2tfliteV2.py183
-rw-r--r--compiler/tfgraph-xform/CMakeLists.txt328
-rw-r--r--compiler/tfgraph-xform/README.md5
-rw-r--r--compiler/tfinfo-v2/CMakeLists.txt36
-rw-r--r--compiler/tfinfo-v2/include/tfinfo-v2/TensorInfoLoader.h43
-rw-r--r--compiler/tfinfo-v2/include/tfinfo-v2/TensorSignature.h124
-rw-r--r--compiler/tfinfo-v2/proto/tfinfo-v2.proto46
-rw-r--r--compiler/tfinfo-v2/requires.cmake2
-rw-r--r--compiler/tfinfo-v2/src/TFInfo_v2.test.cpp246
-rw-r--r--compiler/tfinfo-v2/src/TensorInfoLoader.cpp179
-rw-r--r--compiler/tfinfo-v2/src/TensorSignature.cpp17
-rw-r--r--compiler/tfinfo/CMakeLists.txt20
-rw-r--r--compiler/tfinfo/README.md12
-rw-r--r--compiler/tfinfo/include/nnkit/support/tftestinfo/ParsedTensor.h121
-rw-r--r--compiler/tfinfo/include/nnkit/support/tftestinfo/TensorInfoParser.h46
-rw-r--r--compiler/tfinfo/requires.cmake3
-rw-r--r--compiler/tfinfo/src/Compat.h27
-rw-r--r--compiler/tfinfo/src/TensorInfoParser.cpp230
-rw-r--r--compiler/tfinfo/src/TensorInfoParser.test.cpp121
-rw-r--r--compiler/tfkit/CMakeLists.txt13
-rw-r--r--compiler/tfkit/README.md73
-rw-r--r--compiler/tfkit/src/ConvertCommand.cpp146
-rw-r--r--compiler/tfkit/src/ConvertCommand.hpp32
-rw-r--r--compiler/tfkit/src/DecodeCommand.cpp52
-rw-r--r--compiler/tfkit/src/DecodeCommand.hpp32
-rw-r--r--compiler/tfkit/src/EncodeCommand.cpp57
-rw-r--r--compiler/tfkit/src/EncodeCommand.hpp32
-rw-r--r--compiler/tfkit/src/Main.cpp37
-rw-r--r--compiler/tfkit/src/PackCommand.cpp173
-rw-r--r--compiler/tfkit/src/PackCommand.hpp32
-rw-r--r--compiler/tfkit/src/Support.cpp125
-rw-r--r--compiler/tfkit/src/Support.hpp75
-rw-r--r--compiler/tfkit/src/UnpackCommand.cpp202
-rw-r--r--compiler/tfkit/src/UnpackCommand.hpp32
-rw-r--r--compiler/tfl-inspect/CMakeLists.txt13
-rw-r--r--compiler/tfl-inspect/README.md51
-rw-r--r--compiler/tfl-inspect/driver/Driver.cpp96
-rw-r--r--compiler/tfl-inspect/requires.cmake3
-rw-r--r--compiler/tfl-inspect/src/Dump.cpp137
-rw-r--r--compiler/tfl-inspect/src/Dump.h56
-rw-r--r--compiler/tfl-inspect/src/Model.cpp143
-rw-r--r--compiler/tfl-inspect/src/Model.h43
-rw-r--r--compiler/tfl-inspect/src/Reader.cpp166
-rw-r--r--compiler/tfl-inspect/src/Reader.h91
-rw-r--r--compiler/tfl-verify/CMakeLists.txt12
-rw-r--r--compiler/tfl-verify/README.md23
-rw-r--r--compiler/tfl-verify/requires.cmake4
-rw-r--r--compiler/tfl-verify/src/Driver.cpp51
-rw-r--r--compiler/tfl-verify/src/Model.cpp90
-rw-r--r--compiler/tfl-verify/src/Model.h38
-rw-r--r--compiler/tfl-verify/src/VerifyFlatBuffers.cpp36
-rw-r--r--compiler/tfl-verify/src/VerifyFlatBuffers.h32
-rw-r--r--compiler/tflchef/CMakeLists.txt19
-rw-r--r--compiler/tflchef/README.md76
-rw-r--r--compiler/tflchef/core/CMakeLists.txt7
-rw-r--r--compiler/tflchef/core/include/tflchef/ModelChef.h56
-rw-r--r--compiler/tflchef/core/src/Arguments.h34
-rw-r--r--compiler/tflchef/core/src/Convert.cpp72
-rw-r--r--compiler/tflchef/core/src/Convert.h31
-rw-r--r--compiler/tflchef/core/src/Data/Constant.h62
-rw-r--r--compiler/tflchef/core/src/Data/Explicit.h75
-rw-r--r--compiler/tflchef/core/src/Data/Gaussian.cpp135
-rw-r--r--compiler/tflchef/core/src/Data/Gaussian.h88
-rw-r--r--compiler/tflchef/core/src/DataChef.def15
-rw-r--r--compiler/tflchef/core/src/DataChef.h56
-rw-r--r--compiler/tflchef/core/src/DataChefs.h24
-rw-r--r--compiler/tflchef/core/src/Dataset.h57
-rw-r--r--compiler/tflchef/core/src/LexicalCast.cpp36
-rw-r--r--compiler/tflchef/core/src/LexicalCast.h32
-rw-r--r--compiler/tflchef/core/src/ModelChef.cpp765
-rw-r--r--compiler/tflchef/core/src/Op/Abs.cpp30
-rw-r--r--compiler/tflchef/core/src/Op/Abs.h46
-rw-r--r--compiler/tflchef/core/src/Op/Add.cpp39
-rw-r--r--compiler/tflchef/core/src/Op/Add.h46
-rw-r--r--compiler/tflchef/core/src/Op/ArgMax.cpp39
-rw-r--r--compiler/tflchef/core/src/Op/ArgMax.h46
-rw-r--r--compiler/tflchef/core/src/Op/AveragePool2D.cpp47
-rw-r--r--compiler/tflchef/core/src/Op/AveragePool2D.h49
-rw-r--r--compiler/tflchef/core/src/Op/BatchToSpaceND.cpp31
-rw-r--r--compiler/tflchef/core/src/Op/BatchToSpaceND.h52
-rw-r--r--compiler/tflchef/core/src/Op/Concatenation.cpp43
-rw-r--r--compiler/tflchef/core/src/Op/Concatenation.h52
-rw-r--r--compiler/tflchef/core/src/Op/Conv2D.cpp43
-rw-r--r--compiler/tflchef/core/src/Op/Conv2D.h46
-rw-r--r--compiler/tflchef/core/src/Op/Cos.cpp29
-rw-r--r--compiler/tflchef/core/src/Op/Cos.h46
-rw-r--r--compiler/tflchef/core/src/Op/DepthwiseConv2D.cpp47
-rw-r--r--compiler/tflchef/core/src/Op/DepthwiseConv2D.h52
-rw-r--r--compiler/tflchef/core/src/Op/Div.cpp39
-rw-r--r--compiler/tflchef/core/src/Op/Div.h46
-rw-r--r--compiler/tflchef/core/src/Op/Equal.cpp29
-rw-r--r--compiler/tflchef/core/src/Op/Equal.h46
-rw-r--r--compiler/tflchef/core/src/Op/Exp.cpp30
-rw-r--r--compiler/tflchef/core/src/Op/Exp.h46
-rw-r--r--compiler/tflchef/core/src/Op/FloorDiv.cpp30
-rw-r--r--compiler/tflchef/core/src/Op/FloorDiv.h49
-rw-r--r--compiler/tflchef/core/src/Op/FullyConnected.cpp39
-rw-r--r--compiler/tflchef/core/src/Op/FullyConnected.h52
-rw-r--r--compiler/tflchef/core/src/Op/LogicalNot.cpp29
-rw-r--r--compiler/tflchef/core/src/Op/LogicalNot.h49
-rw-r--r--compiler/tflchef/core/src/Op/LogicalOr.cpp29
-rw-r--r--compiler/tflchef/core/src/Op/LogicalOr.h49
-rw-r--r--compiler/tflchef/core/src/Op/MaxPool2D.cpp47
-rw-r--r--compiler/tflchef/core/src/Op/MaxPool2D.h46
-rw-r--r--compiler/tflchef/core/src/Op/Mean.cpp39
-rw-r--r--compiler/tflchef/core/src/Op/Mean.h46
-rw-r--r--compiler/tflchef/core/src/Op/Mul.cpp39
-rw-r--r--compiler/tflchef/core/src/Op/Mul.h46
-rw-r--r--compiler/tflchef/core/src/Op/Pack.cpp38
-rw-r--r--compiler/tflchef/core/src/Op/Pack.h46
-rw-r--r--compiler/tflchef/core/src/Op/Pad.cpp28
-rw-r--r--compiler/tflchef/core/src/Op/Pad.h46
-rw-r--r--compiler/tflchef/core/src/Op/ReLU.cpp27
-rw-r--r--compiler/tflchef/core/src/Op/ReLU.h46
-rw-r--r--compiler/tflchef/core/src/Op/ReLU6.cpp27
-rw-r--r--compiler/tflchef/core/src/Op/ReLU6.h46
-rw-r--r--compiler/tflchef/core/src/Op/Reshape.cpp62
-rw-r--r--compiler/tflchef/core/src/Op/Reshape.h46
-rw-r--r--compiler/tflchef/core/src/Op/Rsqrt.cpp28
-rw-r--r--compiler/tflchef/core/src/Op/Rsqrt.h46
-rw-r--r--compiler/tflchef/core/src/Op/Shape.cpp39
-rw-r--r--compiler/tflchef/core/src/Op/Shape.h46
-rw-r--r--compiler/tflchef/core/src/Op/Softmax.cpp39
-rw-r--r--compiler/tflchef/core/src/Op/Softmax.h46
-rw-r--r--compiler/tflchef/core/src/Op/Sqrt.cpp27
-rw-r--r--compiler/tflchef/core/src/Op/Sqrt.h46
-rw-r--r--compiler/tflchef/core/src/Op/Sub.cpp39
-rw-r--r--compiler/tflchef/core/src/Op/Sub.h46
-rw-r--r--compiler/tflchef/core/src/Op/Tanh.cpp28
-rw-r--r--compiler/tflchef/core/src/Op/Tanh.h46
-rw-r--r--compiler/tflchef/core/src/Op/Transpose.cpp32
-rw-r--r--compiler/tflchef/core/src/Op/Transpose.h49
-rw-r--r--compiler/tflchef/core/src/OpChef.def37
-rw-r--r--compiler/tflchef/core/src/OpChef.h41
-rw-r--r--compiler/tflchef/core/src/OpChefs.h52
-rw-r--r--compiler/tflchef/proto/CMakeLists.txt5
-rw-r--r--compiler/tflchef/proto/tflchef.proto232
-rw-r--r--compiler/tflchef/requires.cmake4
-rw-r--r--compiler/tflchef/tests/CMakeLists.txt129
-rw-r--r--compiler/tflchef/tests/explicit_datachef/test.recipe28
-rw-r--r--compiler/tflchef/tests/explicit_datachef/test.reverse0
-rw-r--r--compiler/tflchef/tests/multisubgraph/test.recipe72
-rw-r--r--compiler/tflchef/tests/readme/test.recipe44
-rw-r--r--compiler/tflchef/tests/readme/test.reverse0
-rwxr-xr-xcompiler/tflchef/tests/runall.sh60
-rwxr-xr-xcompiler/tflchef/tests/runvalidate.sh56
-rw-r--r--compiler/tflchef/tflite/CMakeLists.txt9
-rw-r--r--compiler/tflchef/tflite/include/tflchef/RawModel.h41
-rw-r--r--compiler/tflchef/tflite/include/tflchef/RecipeChef.h41
-rw-r--r--compiler/tflchef/tflite/src/Convert.cpp78
-rw-r--r--compiler/tflchef/tflite/src/Convert.h55
-rw-r--r--compiler/tflchef/tflite/src/Op/Abs.cpp40
-rw-r--r--compiler/tflchef/tflite/src/Op/Abs.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/Add.cpp47
-rw-r--r--compiler/tflchef/tflite/src/Op/Add.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/ArgMax.cpp54
-rw-r--r--compiler/tflchef/tflite/src/Op/ArgMax.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/AveragePool2D.cpp52
-rw-r--r--compiler/tflchef/tflite/src/Op/AveragePool2D.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/BatchToSpaceND.cpp53
-rw-r--r--compiler/tflchef/tflite/src/Op/BatchToSpaceND.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/Concatenation.cpp48
-rw-r--r--compiler/tflchef/tflite/src/Op/Concatenation.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/Conv2D.cpp58
-rw-r--r--compiler/tflchef/tflite/src/Op/Conv2D.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/Cos.cpp40
-rw-r--r--compiler/tflchef/tflite/src/Op/Cos.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/DepthwiseConv2D.cpp61
-rw-r--r--compiler/tflchef/tflite/src/Op/DepthwiseConv2D.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/Div.cpp47
-rw-r--r--compiler/tflchef/tflite/src/Op/Div.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/Equal.cpp40
-rw-r--r--compiler/tflchef/tflite/src/Op/Equal.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/Exp.cpp40
-rw-r--r--compiler/tflchef/tflite/src/Op/Exp.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/FloorDiv.cpp40
-rw-r--r--compiler/tflchef/tflite/src/Op/FloorDiv.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/FullyConnected.cpp47
-rw-r--r--compiler/tflchef/tflite/src/Op/FullyConnected.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/LogicalNot.cpp40
-rw-r--r--compiler/tflchef/tflite/src/Op/LogicalNot.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/LogicalOr.cpp40
-rw-r--r--compiler/tflchef/tflite/src/Op/LogicalOr.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/MaxPool2D.cpp52
-rw-r--r--compiler/tflchef/tflite/src/Op/MaxPool2D.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/Mean.cpp54
-rw-r--r--compiler/tflchef/tflite/src/Op/Mean.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/Pack.cpp48
-rw-r--r--compiler/tflchef/tflite/src/Op/Pack.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/Pad.cpp47
-rw-r--r--compiler/tflchef/tflite/src/Op/Pad.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/ReLU.cpp40
-rw-r--r--compiler/tflchef/tflite/src/Op/ReLU.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/ReLU6.cpp40
-rw-r--r--compiler/tflchef/tflite/src/Op/ReLU6.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/Reshape.cpp62
-rw-r--r--compiler/tflchef/tflite/src/Op/Reshape.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/Rsqrt.cpp39
-rw-r--r--compiler/tflchef/tflite/src/Op/Rsqrt.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/Softmax.cpp47
-rw-r--r--compiler/tflchef/tflite/src/Op/Softmax.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/Sqrt.cpp53
-rw-r--r--compiler/tflchef/tflite/src/Op/Sqrt.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/Sub.cpp48
-rw-r--r--compiler/tflchef/tflite/src/Op/Sub.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/Tanh.cpp39
-rw-r--r--compiler/tflchef/tflite/src/Op/Tanh.h39
-rw-r--r--compiler/tflchef/tflite/src/Op/Transpose.cpp53
-rw-r--r--compiler/tflchef/tflite/src/Op/Transpose.h39
-rw-r--r--compiler/tflchef/tflite/src/RawModelLoader.cpp94
-rw-r--r--compiler/tflchef/tflite/src/RecipeChef.cpp241
-rw-r--r--compiler/tflchef/tflite/src/TFliteImport.cpp145
-rw-r--r--compiler/tflchef/tflite/src/TFliteImport.h140
-rw-r--r--compiler/tflchef/tflite/src/TFliteOpChef.h44
-rw-r--r--compiler/tflchef/tflite/src/TFliteOpChefs.h51
-rw-r--r--compiler/tflchef/tflite/src/TFliteOpRegistry.h97
-rw-r--r--compiler/tflchef/tools/CMakeLists.txt6
-rw-r--r--compiler/tflchef/tools/console/CMakeLists.txt3
-rw-r--r--compiler/tflchef/tools/console/Driver.cpp58
-rw-r--r--compiler/tflchef/tools/file/CMakeLists.txt3
-rw-r--r--compiler/tflchef/tools/file/Driver.cpp72
-rw-r--r--compiler/tflchef/tools/reverse/CMakeLists.txt3
-rw-r--r--compiler/tflchef/tools/reverse/Driver.cpp64
-rw-r--r--compiler/tfldump/CMakeLists.txt14
-rw-r--r--compiler/tfldump/README.md67
-rw-r--r--compiler/tfldump/driver/Driver.cpp52
-rw-r--r--compiler/tfldump/include/tfldump/Dump.h32
-rw-r--r--compiler/tfldump/include/tflread/Model.h43
-rw-r--r--compiler/tfldump/requires.cmake1
-rw-r--r--compiler/tfldump/src/Dump.cpp297
-rw-r--r--compiler/tfldump/src/Load.cpp133
-rw-r--r--compiler/tfldump/src/OpPrinter.cpp306
-rw-r--r--compiler/tfldump/src/OpPrinter.h61
-rw-r--r--compiler/tfldump/src/Read.cpp168
-rw-r--r--compiler/tfldump/src/Read.h99
-rw-r--r--compiler/tflite2circle-conversion-test/CMakeLists.txt94
-rw-r--r--compiler/tflite2circle-conversion-test/README.md3
-rw-r--r--compiler/tflite2circle-conversion-test/requires.cmake2
-rw-r--r--compiler/tflite2circle-conversion-test/test.lst20
-rwxr-xr-xcompiler/tflite2circle-conversion-test/testall.sh76
-rw-r--r--compiler/tflite2circle/CMakeLists.txt16
-rw-r--r--compiler/tflite2circle/README.md11
-rw-r--r--compiler/tflite2circle/driver/Driver.cpp59
-rw-r--r--compiler/tflite2circle/include/CircleModel.h102
-rw-r--r--compiler/tflite2circle/include/TFLModel.h55
-rw-r--r--compiler/tflite2circle/requires.cmake4
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions.h54
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/AbsOptions.cpp32
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/AbsOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/AddOptions.cpp36
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/AddOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/ArgMaxOptions.cpp36
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/ArgMaxOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/BatchToSpaceNDOptions.cpp32
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/BatchToSpaceNDOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/CastOptions.cpp38
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/CastOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/ConcatenationOptions.cpp37
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/ConcatenationOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/Conv2DOptions.cpp41
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/Conv2DOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/CosOptions.cpp32
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/CosOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/DepthwiseConv2DOptions.cpp42
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/DepthwiseConv2DOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/DivOptions.cpp36
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/DivOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/EqualOptions.cpp29
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/EqualOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/ExpOptions.cpp32
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/ExpOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/ExpandDimsOptions.cpp29
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/ExpandDimsOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/FillOptions.cpp29
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/FillOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp43
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/GreaterEqualOptions.cpp29
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/GreaterEqualOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/LogicalNotOptions.cpp32
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/LogicalNotOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/LogicalOrOptions.cpp32
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/LogicalOrOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/MulOptions.cpp36
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/MulOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/NotEqualOptions.cpp29
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/NotEqualOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/PackOptions.cpp35
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/PackOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/PadOptions.cpp31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/PadOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/Pool2DOptions.cpp41
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/Pool2DOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/ReducerOptions.cpp34
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/ReducerOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/ReshapeOptions.cpp37
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/ReshapeOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/ShapeOptions.cpp35
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/ShapeOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/SoftmaxOptions.cpp35
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/SoftmaxOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/SplitOptions.cpp34
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/SplitOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/SqueezeOptions.cpp37
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/SqueezeOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/SubOptions.cpp36
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/SubOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/TransposeOptions.cpp31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/TransposeOptions.h31
-rw-r--r--compiler/tflite2circle/src/CircleModel.cpp238
-rw-r--r--compiler/tflite2circle/src/DataLookup.cpp113
-rw-r--r--compiler/tflite2circle/src/DataLookup.h37
-rw-r--r--compiler/tflite2circle/src/TFLActivationFunctionType.lst12
-rw-r--r--compiler/tflite2circle/src/TFLBuiltinOptions.lst103
-rw-r--r--compiler/tflite2circle/src/TFLModel.cpp43
-rw-r--r--compiler/tflite2circle/src/TFLOperator.lst128
-rw-r--r--compiler/tflite2circle/src/TFLTensorType.lst16
-rw-r--r--compiler/tfts/CMakeLists.txt30
-rw-r--r--compiler/tfts/README.md3
-rwxr-xr-xcompiler/tfts/check_all.sh72
-rw-r--r--compiler/tfts/requires.cmake2
-rw-r--r--compiler/v4tf/README.md16
-rw-r--r--compute/ARMComputeEx/CMakeLists.txt6
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h26
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h26
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h117
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h106
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h26
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h26
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h101
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h102
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h26
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h26
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h26
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPOneHotKernelEx.h110
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/NEElementwiseOperationFuncs.h15
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h135
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastKernel.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h15
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEPReLUKernel.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/core/TypesEx.h26
-rw-r--r--compute/ARMComputeEx/arm_compute/core/UtilsEx.h26
-rw-r--r--compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h26
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h26
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h26
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h186
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h235
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h22
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h142
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h26
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h26
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h26
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h18
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h29
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h26
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h26
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h19
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h27
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPOneHotEx.h68
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h3
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEActivationLayerEx.h103
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h81
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECast.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h19
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h15
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h17
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReductionOperationEx.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h16
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/misc/functions/Utils.h4
-rwxr-xr-x[-rw-r--r--]compute/ARMComputeEx/resolve_includes.py14
-rw-r--r--compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp44
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/gather_ex.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl354
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h17
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h19
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl16
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl122
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl136
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/scale_factor.cl108
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl25
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl25
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl25
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp37
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.cpp372
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp173
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp50
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp172
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp154
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp30
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CPP/kernels/CPPOneHotKernelEx.cpp103
-rw-r--r--compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp18
-rw-r--r--compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp78
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEActivationLayerKernelEx.cpp730
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp30
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp18
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp22
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp19
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/UtilsEx.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp337
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp583
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp50
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp180
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp29
-rw-r--r--compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp54
-rw-r--r--compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp66
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp109
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp15
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp15
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEElementwiseUnaryLayerEx.cpp44
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp20
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp41
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp36
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp15
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp18
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp15
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp15
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp16
-rw-r--r--compute/cker/CMakeLists.txt14
-rw-r--r--compute/cker/README.md4
-rw-r--r--compute/cker/include/cker/NeonTensorUtils.h319
-rw-r--r--compute/cker/include/cker/PortableTensorUtils.h167
-rw-r--r--compute/cker/include/cker/Shape.h45
-rw-r--r--compute/cker/include/cker/TensorUtils.h78
-rw-r--r--compute/cker/include/cker/Types.h231
-rw-r--r--compute/cker/include/cker/Utils.h106
-rw-r--r--compute/cker/include/cker/eigen/EigenSupport.h122
-rw-r--r--compute/cker/include/cker/eigen/Utils.h23
-rw-r--r--compute/cker/include/cker/eigen/eigen_convolution_helpers.h88
-rw-r--r--compute/cker/include/cker/eigen/eigen_spatial_convolutions-inl.h1754
-rw-r--r--compute/cker/include/cker/eigen/eigen_spatial_convolutions.h27
-rw-r--r--compute/cker/include/cker/eigen/eigen_tensor_reduced_instantiations_oss.h170
-rw-r--r--compute/cker/include/cker/gemmlowp/FixedPoint.h289
-rw-r--r--compute/cker/include/cker/gemmlowp/GEMMSupport.h67
-rw-r--r--compute/cker/include/cker/neon/neon_check.h48
-rw-r--r--compute/cker/include/cker/operation/AveragePool.h361
-rw-r--r--compute/cker/include/cker/operation/BinaryArithmeticOps.h128
-rw-r--r--compute/cker/include/cker/operation/Common.h103
-rw-r--r--compute/cker/include/cker/operation/Comparison.h146
-rw-r--r--compute/cker/include/cker/operation/Concatenation.h88
-rw-r--r--compute/cker/include/cker/operation/Conv.h259
-rw-r--r--compute/cker/include/cker/operation/DepthwiseConv.h126
-rw-r--r--compute/cker/include/cker/operation/Elementwise.h62
-rw-r--r--compute/cker/include/cker/operation/Exp.h43
-rw-r--r--compute/cker/include/cker/operation/FullyConnected.h138
-rw-r--r--compute/cker/include/cker/operation/Gather.h5
-rw-r--r--compute/cker/include/cker/operation/InstanceNorm.h7
-rw-r--r--compute/cker/include/cker/operation/MaxMin.h104
-rw-r--r--compute/cker/include/cker/operation/MaxPool.h152
-rw-r--r--compute/cker/include/cker/operation/OneHot.h64
-rw-r--r--compute/cker/include/cker/operation/Pack.h62
-rw-r--r--compute/cker/include/cker/operation/Reduce.h170
-rw-r--r--compute/cker/include/cker/operation/Slice.h82
-rw-r--r--compute/cker/include/cker/operation/SoftMax.h30
-rw-r--r--compute/cker/include/cker/operation/Split.h65
-rw-r--r--compute/cker/include/cker/operation/StridedSlice.h308
-rw-r--r--compute/cker/include/cker/operation/Tanh.h42
-rw-r--r--compute/cker/include/cker/operation/Transpose.h580
-rw-r--r--compute/cker/include/cker/operation/TransposeConv.h24
-rw-r--r--compute/cker/include/cker/operation/Unpack.h63
-rw-r--r--compute/cker/include/cker/operation/optimized/AveragePool.h105
-rw-r--r--compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h202
-rw-r--r--compute/cker/include/cker/operation/optimized/Conv.h290
-rw-r--r--compute/cker/include/cker/operation/optimized/DepthwiseConvUint8.h2123
-rw-r--r--compute/cker/include/cker/operation/optimized/MaxPool.h97
-rw-r--r--compute/cker/include/cker/operation/optimized/OptimizedUtils.h176
-rw-r--r--compute/cker/include/cker/operation/optimized/SoftMax.h59
-rw-r--r--compute/cker/include/cker/operation/reference/AveragePool.h90
-rw-r--r--compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h68
-rw-r--r--compute/cker/include/cker/operation/reference/Conv.h197
-rw-r--r--compute/cker/include/cker/operation/reference/MaxPool.h84
-rw-r--r--compute/cker/include/cker/operation/reference/SoftMax.h70
-rw-r--r--compute/ncnn/CMakeLists.txt34
-rw-r--r--compute/ncnn/README.md9
-rw-r--r--compute/ncnn/include/ncnn/layer/binaryop.h69
-rw-r--r--compute/ncnn/include/ncnn/layer/instance_norm.h59
-rw-r--r--compute/ncnn/include/ncnn/mat.h738
-rw-r--r--compute/ncnn/include/ncnn/srcn/conv_type.h74
-rw-r--r--compute/ncnn/include/ncnn/srcn/srcn_conv.h65
-rw-r--r--compute/ncnn/src/layer/arm/neon_mathfun.h315
-rw-r--r--compute/ncnn/src/layer/binaryop.cc1640
-rw-r--r--compute/ncnn/src/layer/instance_norm.cc371
-rw-r--r--compute/ncnn/src/mat.cc940
-rw-r--r--compute/ncnn/src/srcn/common.h162
-rw-r--r--compute/ncnn/src/srcn/conv_sgemm_multithreads.cc483
-rw-r--r--compute/ncnn/src/srcn/conv_sgemm_multithreads.h86
-rw-r--r--compute/ncnn/src/srcn/conv_sgemm_singlethread.cc366
-rw-r--r--compute/ncnn/src/srcn/conv_sgemm_singlethread.h73
-rw-r--r--compute/ncnn/src/srcn/conv_sparse.cc271
-rw-r--r--compute/ncnn/src/srcn/conv_sparse.h79
-rw-r--r--compute/ncnn/src/srcn/conv_winograd.cc341
-rw-r--r--compute/ncnn/src/srcn/conv_winograd.h72
-rw-r--r--compute/ncnn/src/srcn/conv_winograd_batch.cc304
-rw-r--r--compute/ncnn/src/srcn/conv_winograd_batch.h67
-rw-r--r--compute/ncnn/src/srcn/deconv_sgemm_multithreads.cc387
-rw-r--r--compute/ncnn/src/srcn/deconv_sgemm_multithreads.h85
-rw-r--r--compute/ncnn/src/srcn/depthwise_conv.cc2684
-rw-r--r--compute/ncnn/src/srcn/direct_conv_colmajor.cc5872
-rw-r--r--compute/ncnn/src/srcn/direct_conv_colmajor.h33
-rw-r--r--compute/ncnn/src/srcn/sgemm_kernel.cc2508
-rw-r--r--compute/ncnn/src/srcn/sgemm_kernel.h52
-rw-r--r--compute/ncnn/src/srcn/sgemm_pack.cc2316
-rw-r--r--compute/ncnn/src/srcn/sgemm_pack.h73
-rw-r--r--compute/ncnn/src/srcn/sgemm_singlethread.cc689
-rw-r--r--compute/ncnn/src/srcn/sgemm_singlethread.h88
-rw-r--r--compute/ncnn/src/srcn/sgemm_test.cc1883
-rw-r--r--compute/ncnn/src/srcn/srcn_conv.cc614
-rw-r--r--compute/ncnn/src/srcn/winograd.h148
-rw-r--r--docs/HowToContribute.md72
-rw-r--r--docs/UseDoxygen.md36
-rw-r--r--docs/fig/compiler_flow.pngbin56456 -> 0 bytes
-rw-r--r--docs/fig/nnfw_compiler_structure.pngbin75343 -> 0 bytes
-rw-r--r--docs/fig/nnfw_compiler_structure.pptxbin40532 -> 0 bytes
-rw-r--r--docs/fig/nnfw_components.pngbin82620 -> 0 bytes
-rw-r--r--docs/fig/nnfw_components.pptxbin46596 -> 0 bytes
-rw-r--r--docs/fig/nnfw_nativeapi_flow.pngbin105745 -> 0 bytes
-rw-r--r--docs/fig/nnfw_nativeapi_flow.pptxbin51156 -> 0 bytes
-rw-r--r--docs/fig/nnfw_nnapi_flow.pngbin52314 -> 0 bytes
-rw-r--r--docs/fig/nnfw_nnapi_flow.pptxbin45988 -> 0 bytes
-rw-r--r--docs/fig/nnfw_runtime_behavior.pngbin51473 -> 0 bytes
-rw-r--r--docs/fig/nnfw_runtime_behavior.pptxbin45204 -> 0 bytes
-rw-r--r--docs/fig/nnfw_runtime_structure.pngbin64652 -> 0 bytes
-rw-r--r--docs/fig/nnfw_runtime_structure.pptxbin41044 -> 0 bytes
-rw-r--r--docs/fig/runtime_nativeapi_flow.pngbin63638 -> 0 bytes
-rw-r--r--docs/nncc/README.md56
-rw-r--r--docs/nncc/design.md10
-rw-r--r--docs/nncc/getting_started.md73
-rw-r--r--docs/nncc/images/nncc_components.pngbin45359 -> 0 bytes
-rw-r--r--docs/nncc/images/nncc_idef0_a0.pngbin50434 -> 0 bytes
-rw-r--r--docs/nncc/images/nncc_idef0_a1.pngbin86576 -> 0 bytes
-rw-r--r--docs/nncc/images/nncc_idef0_a12.pngbin42778 -> 0 bytes
-rw-r--r--docs/nncc/project/detailed_level_design.md329
-rw-r--r--docs/nncc/project/development_document.md257
-rw-r--r--docs/nncc/project/high_level_design.md457
-rw-r--r--docs/nncc/project/requirements_specification.md272
-rw-r--r--docs/nncc/project/test_plan.md442
-rw-r--r--docs/nncc/project_guide.md27
-rw-r--r--docs/nncc/roadmap.md6
-rw-r--r--docs/nncc/v1.0.0/getting_started.md59
-rw-r--r--docs/nncc/v1.0.0/operation-list.md34
-rw-r--r--docs/nncc/v1.0.0/tutorial.md49
-rw-r--r--docs/nncc/v1.1.0/nncc_in_tizen_studio.md52
-rw-r--r--docs/nncc/v1.1.0/nncc_in_visual_studio.md61
-rw-r--r--docs/nnfw/2018/fig/nnfw_architecture.pngbin28876 -> 0 bytes
-rw-r--r--docs/nnfw/2018/fig/nnfw_architecture.pptxbin72036 -> 0 bytes
-rw-r--r--docs/nnfw/2018/roadmap.md123
-rw-r--r--docs/nnfw/HowToImplementOperatorKernel.md1
-rw-r--r--docs/nnfw/fig/nnfw_architecture.pngbin280284 -> 0 bytes
-rw-r--r--docs/nnfw/fig/nnfw_architecture.pptxbin45709 -> 0 bytes
-rw-r--r--docs/nnfw/fig/nnfw_behavior.pngbin14254 -> 0 bytes
-rw-r--r--docs/nnfw/fig/nnfw_behavior.pptxbin59844 -> 0 bytes
-rw-r--r--docs/nnfw/howto.md38
-rw-r--r--docs/nnfw/howto/BuildTFfromSource.md66
-rw-r--r--docs/nnfw/howto/CrossBuildForAarch64.md77
-rw-r--r--docs/nnfw/howto/CrossBuildForAndroid.md52
-rw-r--r--docs/nnfw/howto/CrossBuildForArm.md118
-rw-r--r--docs/nnfw/howto/HowToAddUnittest.md31
-rw-r--r--docs/nnfw/howto/HowToRunNnpackge.md75
-rw-r--r--docs/nnfw/howto/HowToTestManualy.md62
-rw-r--r--docs/nnfw/howto/HowToUseDockerImage.md154
-rw-r--r--docs/nnfw/howto/HowToUseNNFWAPI.md63
-rw-r--r--docs/nnfw/howto/HowtoMakeSampleAppOnNnfw.md132
-rw-r--r--docs/nnfw/howto/RemoteDebuggingForVSCode.md147
-rw-r--r--docs/nnfw/howto/device/xu3-dip.pngbin262925 -> 0 bytes
-rw-r--r--docs/nnfw/howto/device/xu3_tizen.md140
-rw-r--r--docs/nnfw/howto/device/xu3_ubuntu.md114
-rw-r--r--docs/nnfw/howto/device/xu4_tizen.md228
-rw-r--r--docs/nnfw/howto/device/xu4_ubuntu.md99
-rw-r--r--docs/nnfw/op_list.md71
-rw-r--r--docs/nnfw/roadmap.md76
-rw-r--r--docs/nnfw/tests/Convolution_manual_3x3.xlsxbin19844 -> 0 bytes
-rw-r--r--docs/nnfw/tests/Softmax_manual.xlsxbin15940 -> 0 bytes
-rw-r--r--docs/release/release_note_1.0.0.md65
-rw-r--r--docs/release/release_note_1.1.0.md40
-rw-r--r--docs/release/release_note_1.4.0.md23
-rw-r--r--infra/cmake/modules/ExternalBuildTools.cmake7
-rw-r--r--infra/cmake/packages/ARMComputeConfig.cmake3
-rw-r--r--infra/cmake/packages/ARMComputeSourceConfig.cmake2
-rw-r--r--infra/cmake/packages/BoostConfig.cmake29
-rw-r--r--infra/cmake/packages/EigenConfig.cmake2
-rw-r--r--infra/cmake/packages/NNPACKSourceConfig.cmake2
-rw-r--r--infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake18
-rw-r--r--infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfigVersion.cmake10
-rw-r--r--infra/cmake/packages/TensorFlowVersionChecker.c16
-rw-r--r--infra/command/format83
-rw-r--r--infra/command/gen-coverage-report4
-rw-r--r--infra/command/install-githooks68
-rw-r--r--infra/docker/Dockerfile2
-rw-r--r--infra/docker/Dockerfile.18044
-rw-r--r--infra/doxygen/Doxyfile2
-rwxr-xr-xinfra/git-hooks/pre-commit.sh32
-rwxr-xr-xinfra/git-hooks/pre-push.sh2
-rw-r--r--infra/nncc/CMakeLists.txt2
-rw-r--r--infra/nncc/command/utcount41
-rw-r--r--infra/nnfw/CMakeLists.txt8
-rw-r--r--infra/nnfw/cmake/ApplyCompileFlags.cmake2
-rw-r--r--infra/nnfw/cmake/CfgOptionFlags.cmake9
-rw-r--r--infra/nnfw/cmake/buildtool/config/config_armv7l-linux.cmake2
-rw-r--r--infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-android.cmake3
-rw-r--r--infra/nnfw/cmake/options/options_aarch64-android.cmake2
-rw-r--r--infra/nnfw/cmake/options/options_aarch64-tizen.cmake2
-rw-r--r--infra/nnfw/cmake/options/options_armv7l-tizen.cmake3
-rw-r--r--infra/nnfw/cmake/packages/EigenConfig.cmake2
-rw-r--r--infra/nnfw/cmake/packages/GEMMLowpConfig.cmake20
-rw-r--r--infra/nnfw/command/copyright-check5
-rw-r--r--infra/nnfw/command/gen-coverage-report59
-rw-r--r--infra/nnfw/config/docker.configuration2
-rw-r--r--infra/packaging/preset/20191231_windows3
-rw-r--r--infra/packaging/preset/20200115_windows3
-rw-r--r--infra/packaging/preset/2020022044
-rw-r--r--infra/packaging/res/tf2nnpkg.2020022089
-rwxr-xr-xinfra/scripts/build_android_runtime_release.sh21
-rwxr-xr-x[-rw-r--r--]infra/scripts/common.sh0
-rwxr-xr-xinfra/scripts/docker_build_cross_aarch64_runtime.sh48
-rwxr-xr-xinfra/scripts/docker_build_cross_arm_benchmark_model.sh2
-rwxr-xr-xinfra/scripts/docker_build_cross_arm_runtime.sh (renamed from infra/scripts/docker_build_cross_arm_neurun.sh)0
-rwxr-xr-xinfra/scripts/docker_build_cross_arm_runtime_release.sh (renamed from infra/scripts/docker_build_cross_arm_neurun_release.sh)0
-rwxr-xr-xinfra/scripts/docker_build_test_x64.sh5
-rwxr-xr-xinfra/scripts/test_arm_neurun_acl_cl.sh31
-rwxr-xr-xinfra/scripts/test_arm_neurun_acl_neon.sh27
-rwxr-xr-xinfra/scripts/test_arm_neurun_cpu.sh23
-rwxr-xr-xinfra/scripts/test_arm_neurun_mixed.sh30
-rwxr-xr-xinfra/scripts/test_arm_nnpkg.sh2
-rwxr-xr-xinfra/scripts/test_coverage.sh12
-rwxr-xr-xinfra/scripts/test_neurun_interp.sh11
-rwxr-xr-xinfra/scripts/test_ubuntu_runtime.sh84
-rwxr-xr-xinfra/scripts/test_ubuntu_runtime_interp.sh12
-rwxr-xr-xinfra/scripts/test_ubuntu_runtime_mixed.sh37
-rwxr-xr-xinfra/scripts/test_x64_neurun_cpu.sh12
-rwxr-xr-xinfra/scripts/tizen_xu4_test.sh28
-rw-r--r--nnpackage/schema/circle_schema.fbs325
-rw-r--r--nnpackage/schema/circle_schema_v0.fbs811
-rw-r--r--nnpackage/spec/30_custom_op.md2
-rw-r--r--packaging/eigen.tar.gzbin0 -> 2502909 bytes
-rw-r--r--packaging/gemmlowp.tar.gzbin0 -> 830368 bytes
-rw-r--r--packaging/nnfw.spec61
-rw-r--r--res/ONNXTests/UNIT_Gemm_000/test.pbtxt79
-rw-r--r--res/ONNXTests/UNIT_Gemm_001/test.pbtxt70
-rw-r--r--res/TensorFlowLiteRecipes/Abs_000/test.recipe17
-rw-r--r--res/TensorFlowLiteRecipes/Abs_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Add_000/test.recipe27
-rw-r--r--res/TensorFlowLiteRecipes/Add_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Add_U8_000/test.recipe30
-rw-r--r--res/TensorFlowLiteRecipes/Add_U8_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/ArgMax_000/test.recipe30
-rw-r--r--res/TensorFlowLiteRecipes/ArgMax_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/ArgMax_001/test.recipe30
-rw-r--r--res/TensorFlowLiteRecipes/ArgMax_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/ArgMax_002/test.recipe30
-rw-r--r--res/TensorFlowLiteRecipes/ArgMax_002/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/ArgMax_003/test.recipe30
-rw-r--r--res/TensorFlowLiteRecipes/ArgMax_003/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/ArgMax_U8_000/test.recipe31
-rw-r--r--res/TensorFlowLiteRecipes/ArgMax_U8_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/ArgMax_U8_001/test.recipe31
-rw-r--r--res/TensorFlowLiteRecipes/ArgMax_U8_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/ArgMax_U8_002/test.recipe31
-rw-r--r--res/TensorFlowLiteRecipes/ArgMax_U8_002/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/ArgMax_U8_003/test.recipe31
-rw-r--r--res/TensorFlowLiteRecipes/ArgMax_U8_003/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/AveragePool2D_000/test.recipe24
-rw-r--r--res/TensorFlowLiteRecipes/AveragePool2D_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/BatchToSpaceND_000/test.recipe38
-rw-r--r--res/TensorFlowLiteRecipes/BatchToSpaceND_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Concatenation_000/test.recipe28
-rw-r--r--res/TensorFlowLiteRecipes/Concatenation_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Concatenation_U8_000/test.recipe31
-rw-r--r--res/TensorFlowLiteRecipes/Conv2D_000/test.recipe44
-rw-r--r--res/TensorFlowLiteRecipes/Conv2D_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Conv2D_001/test.recipe44
-rw-r--r--res/TensorFlowLiteRecipes/Conv2D_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Conv2D_002/test.recipe45
-rw-r--r--res/TensorFlowLiteRecipes/Conv2D_U8_000/test.recipe48
-rw-r--r--res/TensorFlowLiteRecipes/Cos_000/test.recipe17
-rw-r--r--res/TensorFlowLiteRecipes/Cos_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/DepthwiseConv2D_000/test.recipe41
-rw-r--r--res/TensorFlowLiteRecipes/DepthwiseConv2D_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/DepthwiseConv2D_U8_000/test.recipe46
-rw-r--r--res/TensorFlowLiteRecipes/DepthwiseConv2D_U8_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Div_000/test.recipe27
-rw-r--r--res/TensorFlowLiteRecipes/Div_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Equal_000/test.recipe26
-rw-r--r--res/TensorFlowLiteRecipes/Equal_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Exp_000/test.recipe17
-rw-r--r--res/TensorFlowLiteRecipes/Exp_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/FullyConnected_000/test.recipe34
-rw-r--r--res/TensorFlowLiteRecipes/FullyConnected_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/FullyConnected_001/test.recipe34
-rw-r--r--res/TensorFlowLiteRecipes/FullyConnected_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/FullyConnected_U8_000/test.recipe35
-rw-r--r--res/TensorFlowLiteRecipes/FullyConnected_U8_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/LogicalNot_000/test.recipe17
-rw-r--r--res/TensorFlowLiteRecipes/LogicalNot_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/LogicalOr_000/test.recipe24
-rw-r--r--res/TensorFlowLiteRecipes/LogicalOr_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/MaxPool2D_000/test.recipe24
-rw-r--r--res/TensorFlowLiteRecipes/MaxPool2D_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/MaxPool2D_U8_000/test.recipe26
-rw-r--r--res/TensorFlowLiteRecipes/Mean_000/test.recipe27
-rw-r--r--res/TensorFlowLiteRecipes/Mean_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Mul_000/test.recipe27
-rw-r--r--res/TensorFlowLiteRecipes/Mul_U8_000/test.recipe30
-rw-r--r--res/TensorFlowLiteRecipes/Pack_000/test.recipe28
-rw-r--r--res/TensorFlowLiteRecipes/Pack_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Pack_U8_000/test.recipe31
-rw-r--r--res/TensorFlowLiteRecipes/Pack_U8_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Pad_000/test.recipe30
-rw-r--r--res/TensorFlowLiteRecipes/Pad_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Pad_U8_000/test.recipe32
-rw-r--r--res/TensorFlowLiteRecipes/Pad_U8_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quantization_000/test.recipe46
-rw-r--r--res/TensorFlowLiteRecipes/Quantization_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/ReLU6_000/test.recipe17
-rw-r--r--res/TensorFlowLiteRecipes/ReLU6_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/ReLU_000/test.recipe17
-rw-r--r--res/TensorFlowLiteRecipes/ReLU_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Reshape_000/test.recipe20
-rw-r--r--res/TensorFlowLiteRecipes/Reshape_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Reshape_001/test.recipe28
-rw-r--r--res/TensorFlowLiteRecipes/Reshape_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Reshape_U8_000/test.recipe22
-rw-r--r--res/TensorFlowLiteRecipes/Reshape_U8_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Rsqrt_000/test.recipe17
-rw-r--r--res/TensorFlowLiteRecipes/Rsqrt_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Softmax_000/test.recipe20
-rw-r--r--res/TensorFlowLiteRecipes/Softmax_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Softmax_U8_000/test.recipe22
-rw-r--r--res/TensorFlowLiteRecipes/Softmax_U8_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Sqrt_000/test.recipe18
-rw-r--r--res/TensorFlowLiteRecipes/Sqrt_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Sub_000/test.recipe27
-rw-r--r--res/TensorFlowLiteRecipes/Sub_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Sub_001/test.recipe42
-rw-r--r--res/TensorFlowLiteRecipes/Sub_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Sub_U8_000/test.recipe30
-rw-r--r--res/TensorFlowLiteRecipes/Sub_U8_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Transpose_000/test.recipe27
-rw-r--r--res/TensorFlowLiteRecipes/Transpose_000/test.reverse0
-rw-r--r--res/TensorFlowLiteSchema/1.13.1/schema.fbs794
-rw-r--r--res/TensorFlowLiteSchema/1.14.0/schema.fbs873
-rw-r--r--res/TensorFlowLiteSchema/1.15.2/schema.fbs922
-rw-r--r--res/TensorFlowLiteSchema/2.1.0/schema.fbs940
-rw-r--r--res/TensorFlowLiteSchema/README.md7
-rw-r--r--res/TensorFlowLiteSchema/SCHEMA.lst5
-rwxr-xr-xres/TensorFlowLiteSchema/download.sh9
-rw-r--r--res/TensorFlowPythonExamples/.gitignore1
-rw-r--r--res/TensorFlowPythonExamples/README.md31
-rwxr-xr-xres/TensorFlowPythonExamples/examples/abs/__init__.py4
-rwxr-xr-xres/TensorFlowPythonExamples/examples/add/__init__.py5
-rwxr-xr-xres/TensorFlowPythonExamples/examples/argmax/__init__.py4
-rwxr-xr-xres/TensorFlowPythonExamples/examples/biasadd/__init__.py4
-rwxr-xr-xres/TensorFlowPythonExamples/examples/cos/__init__.py4
-rwxr-xr-xres/TensorFlowPythonExamples/examples/div/__init__.py5
-rwxr-xr-xres/TensorFlowPythonExamples/examples/elu/__init__.py4
-rw-r--r--res/TensorFlowPythonExamples/examples/exp/__init__.py4
-rwxr-xr-xres/TensorFlowPythonExamples/examples/floor/__init__.py4
-rwxr-xr-xres/TensorFlowPythonExamples/examples/floordiv/__init__.py5
-rwxr-xr-xres/TensorFlowPythonExamples/examples/greater/__init__.py5
-rwxr-xr-xres/TensorFlowPythonExamples/examples/greater_equal/__init__.py5
-rwxr-xr-xres/TensorFlowPythonExamples/examples/leaky_relu/__init__.py4
-rwxr-xr-xres/TensorFlowPythonExamples/examples/less/__init__.py5
-rwxr-xr-xres/TensorFlowPythonExamples/examples/less_equal/__init__.py5
-rwxr-xr-xres/TensorFlowPythonExamples/examples/logical_not/__init__.py4
-rwxr-xr-xres/TensorFlowPythonExamples/examples/logical_or/__init__.py5
-rwxr-xr-xres/TensorFlowPythonExamples/examples/matmul/__init__.py5
-rwxr-xr-xres/TensorFlowPythonExamples/examples/multiply/__init__.py5
-rwxr-xr-xres/TensorFlowPythonExamples/examples/not_equal/__init__.py5
-rwxr-xr-xres/TensorFlowPythonExamples/examples/pack/__init__.py5
-rwxr-xr-xres/TensorFlowPythonExamples/examples/pad/__init__.py5
-rwxr-xr-xres/TensorFlowPythonExamples/examples/pow/__init__.py5
-rwxr-xr-xres/TensorFlowPythonExamples/examples/prelu/__init__.py7
-rwxr-xr-xres/TensorFlowPythonExamples/examples/relu/__init__.py4
-rwxr-xr-xres/TensorFlowPythonExamples/examples/relu6/__init__.py4
-rw-r--r--res/TensorFlowPythonExamples/examples/reshape/__init.py__4
-rwxr-xr-xres/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py4
-rwxr-xr-xres/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py4
-rwxr-xr-xres/TensorFlowPythonExamples/examples/rsqrt/__init__.py4
-rwxr-xr-xres/TensorFlowPythonExamples/examples/sigmoid/__init__.py4
-rwxr-xr-xres/TensorFlowPythonExamples/examples/softmax/__init__.py4
-rwxr-xr-xres/TensorFlowPythonExamples/examples/sqrt/__init__.py4
-rwxr-xr-xres/TensorFlowPythonExamples/examples/subtract/__init__.py5
-rwxr-xr-xres/TensorFlowPythonExamples/examples/tanh/__init__.py4
-rwxr-xr-xres/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py4
-rw-r--r--res/TensorFlowPythonExamples/requirements.txt18
-rwxr-xr-xres/TensorFlowPythonExamples/tfpem.py25
-rwxr-xr-x[-rw-r--r--]res/TensorFlowTests/NET_0003/test.py0
-rwxr-xr-x[-rw-r--r--]res/TensorFlowTests/NET_0004/test.py0
-rw-r--r--res/TensorFlowTests/UNIT_Maximum_000/test.info3
-rw-r--r--res/TensorFlowTests/UNIT_Maximum_000/test.pbtxt70
-rw-r--r--res/TensorFlowTests/UNIT_Maximum_001/test.info3
-rw-r--r--res/TensorFlowTests/UNIT_Maximum_001/test.pbtxt70
-rw-r--r--res/TensorFlowTests/UNIT_Maximum_002/test.info3
-rw-r--r--res/TensorFlowTests/UNIT_Maximum_002/test.pbtxt61
-rw-r--r--runtime/contrib/README.md10
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h2
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h2
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteNative/src/tflite_nativewrapper.cpp2
-rw-r--r--runtime/contrib/android_benchmark_app/CMakeLists.txt10
-rw-r--r--runtime/contrib/android_benchmark_app/README.md4
-rw-r--r--runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp16
-rw-r--r--runtime/contrib/android_benchmark_app/cpp/ndk_main.h16
-rw-r--r--runtime/contrib/android_tflite/builtin_ops_jni.cc31
-rw-r--r--runtime/contrib/custom_op/customOp-workflow.pngbin22082 -> 0 bytes
-rw-r--r--runtime/contrib/heap_trace/CMakeLists.txt5
-rw-r--r--runtime/contrib/heap_trace/src/aligned_alloc_stub.cc45
-rw-r--r--runtime/contrib/heap_trace/src/calloc_stub.cc46
-rw-r--r--runtime/contrib/heap_trace/src/cl_retain_mem_object_stub.cc43
-rw-r--r--runtime/contrib/heap_trace/src/free_stub.cc7
-rw-r--r--runtime/contrib/heap_trace/src/malloc_stub.cc6
-rw-r--r--runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.cc21
-rw-r--r--runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.h78
-rw-r--r--runtime/contrib/heap_trace/src/posix_memalign_stub.cc46
-rw-r--r--runtime/contrib/heap_trace/src/realloc_stub.cc6
-rw-r--r--runtime/contrib/heap_trace/src/symbol_searcher.cc22
-rw-r--r--runtime/contrib/heap_trace/src/symbol_searcher.h4
-rw-r--r--runtime/contrib/heap_trace/src/trace.cc32
-rw-r--r--runtime/contrib/heap_trace/src/trace.h15
-rw-r--r--runtime/contrib/heap_trace/src/valloc_stub.cc6
-rw-r--r--runtime/contrib/heap_trace/tests/CMakeLists.txt5
-rw-r--r--runtime/contrib/heap_trace/tests/src/aligned_alloc_interception_test.cc90
-rw-r--r--runtime/contrib/heap_trace/tests/src/calloc_interception_test.cc91
-rw-r--r--runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc23
-rw-r--r--runtime/contrib/heap_trace/tests/src/cl_retain_mem_object_interception_test.cc85
-rw-r--r--runtime/contrib/heap_trace/tests/src/free_interception_test.cc17
-rw-r--r--runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc18
-rw-r--r--runtime/contrib/heap_trace/tests/src/memory_pool_for_symbol_searcher_internals_test.cc69
-rw-r--r--runtime/contrib/heap_trace/tests/src/posix_memalign_interception_test.cc101
-rw-r--r--runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc20
-rw-r--r--runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc28
-rw-r--r--runtime/contrib/heap_trace/tests/src/valloc_interception_test.cc18
-rw-r--r--runtime/contrib/hi_perf_cpu/CMakeLists.txt47
-rw-r--r--runtime/contrib/hi_perf_cpu/HighPerformanceBackend.test.cc (renamed from runtime/neurun/backend/hi_perf_cpu/HighPerformanceBackend.test.cc)0
-rw-r--r--runtime/contrib/hi_perf_cpu/KernelGenerator.cc (renamed from runtime/neurun/backend/hi_perf_cpu/KernelGenerator.cc)0
-rw-r--r--runtime/contrib/hi_perf_cpu/KernelGenerator.h47
-rw-r--r--runtime/contrib/hi_perf_cpu/TensorBuilder.cc (renamed from runtime/neurun/backend/hi_perf_cpu/TensorBuilder.cc)0
-rw-r--r--runtime/contrib/hi_perf_cpu/TensorBuilder.h44
-rw-r--r--runtime/contrib/labs/jniacl/src/jniacl_main.cc23
-rw-r--r--runtime/contrib/labs/tflite_examples/src/conv.cpp6
-rw-r--r--runtime/contrib/logging/src/nnapi_logging.cc18
-rw-r--r--runtime/contrib/mlapse/tfl/CMakeLists.txt1
-rw-r--r--runtime/contrib/mlapse/tfl/driver.cc11
-rw-r--r--runtime/contrib/mlapse/tfl/mlapse/benchmark_observer.h2
-rw-r--r--runtime/contrib/mlapse/tfl/mlapse/benchmark_runner.h2
-rw-r--r--runtime/contrib/pure_arm_compute/CMakeLists.txt2
-rw-r--r--runtime/contrib/pure_arm_compute/src/compilation.cc159
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/MatrixSink.h4
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/MatrixSource.h4
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/Sinks.h2
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/Tensor3DSink.h4
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/Tensor3DSource.h4
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/arm_compute.cc6
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/arm_compute.h13
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/arm_compute/Cast.h10
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/BatchToSpaceNd.cc3
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Mean.h12
-rw-r--r--runtime/contrib/pure_arm_compute/src/memory.cc4
-rw-r--r--runtime/contrib/pure_arm_compute/src/model.cc128
-rw-r--r--runtime/contrib/style_transfer_app/CMakeLists.txt40
-rw-r--r--runtime/contrib/style_transfer_app/README.md23
-rw-r--r--runtime/contrib/style_transfer_app/src/args.cc96
-rw-r--r--runtime/contrib/style_transfer_app/src/args.h53
-rw-r--r--runtime/contrib/style_transfer_app/src/bitmap_helper.cc236
-rw-r--r--runtime/contrib/style_transfer_app/src/bitmap_helper.h61
-rw-r--r--runtime/contrib/style_transfer_app/src/jpeg_helper.cc132
-rw-r--r--runtime/contrib/style_transfer_app/src/jpeg_helper.h43
-rw-r--r--runtime/contrib/style_transfer_app/src/style_transfer_app.cc301
-rw-r--r--runtime/contrib/tflite_classify/src/tflite_classify.cc14
-rw-r--r--runtime/contrib/tflite_test/tflite_test.cpp7
-rw-r--r--runtime/libs/benchmark/include/benchmark/CsvWriter.h1
-rw-r--r--runtime/libs/benchmark/include/benchmark/Phase.h14
-rw-r--r--runtime/libs/benchmark/include/benchmark/Result.h2
-rw-r--r--runtime/libs/benchmark/include/benchmark/Util.h9
-rw-r--r--runtime/libs/benchmark/src/CsvWriter.cpp7
-rw-r--r--runtime/libs/benchmark/src/MemoryPoller.cpp2
-rw-r--r--runtime/libs/cpp14/CMakeLists.txt2
-rw-r--r--runtime/libs/cpp14/include/cpp14/memory.h66
-rw-r--r--runtime/libs/jsoncpp/.FORMATDENY0
-rw-r--r--runtime/libs/misc/CMakeLists.txt6
-rw-r--r--runtime/libs/misc/examples/tensor_index_iterator.cpp2
-rw-r--r--runtime/libs/misc/include/misc/EventRecorder.h1
-rw-r--r--runtime/libs/misc/include/misc/benchmark.h4
-rw-r--r--runtime/libs/misc/include/misc/string_helpers.h2
-rw-r--r--runtime/libs/misc/src/tensor/Comparator.cpp16
-rw-r--r--runtime/libs/profiling/include/profiling/profiling.h2
-rw-r--r--runtime/libs/profiling/include/profiling/time.h30
-rw-r--r--runtime/libs/profiling/src/profiling/time.cpp30
-rw-r--r--runtime/libs/rua/core/include/rua/Service.h1
-rw-r--r--runtime/libs/rua/dyn/include/rua/DynamicBinder.h2
-rw-r--r--runtime/libs/rua/dyn/src/DynamicBinder.cpp1
-rw-r--r--runtime/libs/rua/shim/include/rua/Shim.h1
-rw-r--r--runtime/libs/tflite/CMakeLists.txt10
-rw-r--r--runtime/libs/tflite/include/tflite/Diff.h1
-rw-r--r--runtime/libs/tflite/port/1.13.1/CMakeLists.txt2
-rw-r--r--runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/Abs.h41
-rw-r--r--runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/CustomOps.h6
-rw-r--r--runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/TensorFlowMax.h75
-rw-r--r--runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/TensorFlowSum.h41
-rw-r--r--runtime/libs/tflite/port/1.13.1/src/kernels/Abs.cpp103
-rw-r--r--runtime/libs/tflite/port/1.13.1/src/kernels/TensorFlowMax.cpp405
-rw-r--r--runtime/libs/tflite/port/1.13.1/src/kernels/TensorFlowSum.cpp400
-rw-r--r--runtime/libs/tflite/port/1.13.1/src/kernels/register.cpp3
-rw-r--r--runtime/libs/tflite/port/1.13.1/src/nnapi_delegate.cpp196
-rw-r--r--runtime/libs/tflite/port/1.13.1/src/nnapi_delegate_ex_AddOpsAndParams_lambda.inc32
-rw-r--r--runtime/libs/tflite/src/Diff.cpp17
-rw-r--r--runtime/libs/tflite/src/TensorShapeUtils.cpp16
-rw-r--r--runtime/neurun/CMakeLists.txt16
-rw-r--r--runtime/neurun/api/CMakeLists.txt21
-rw-r--r--runtime/neurun/api/include/nnfw.h378
-rw-r--r--runtime/neurun/api/include/nnfw_debug.h24
-rw-r--r--runtime/neurun/api/src/CustomKernel.cc98
-rw-r--r--runtime/neurun/api/src/CustomKernel.h59
-rw-r--r--runtime/neurun/api/src/CustomKernelRegistry.cc64
-rw-r--r--runtime/neurun/api/src/CustomKernelRegistry.h64
-rw-r--r--runtime/neurun/api/src/OpMap.lst89
-rw-r--r--runtime/neurun/api/src/nnfw_api.cc267
-rw-r--r--runtime/neurun/api/src/nnfw_api_internal.cc435
-rw-r--r--runtime/neurun/api/src/nnfw_api_internal.h84
-rw-r--r--runtime/neurun/api/src/nnfw_debug.cc24
-rw-r--r--runtime/neurun/api/src/nnfw_debug_internal.cc25
-rw-r--r--runtime/neurun/backend/CMakeLists.txt10
-rw-r--r--runtime/neurun/backend/acl_cl/Backend.h65
-rw-r--r--runtime/neurun/backend/acl_cl/CLTimer.h108
-rw-r--r--runtime/neurun/backend/acl_cl/CMakeLists.txt21
-rw-r--r--runtime/neurun/backend/acl_cl/Config.cc50
-rw-r--r--runtime/neurun/backend/acl_cl/Config.h45
-rw-r--r--runtime/neurun/backend/acl_cl/ConstantInitializer.cc266
-rw-r--r--runtime/neurun/backend/acl_cl/ConstantInitializer.h63
-rw-r--r--runtime/neurun/backend/acl_cl/KernelGenerator.cc2151
-rw-r--r--runtime/neurun/backend/acl_cl/KernelGenerator.h112
-rw-r--r--runtime/neurun/backend/acl_cl/PluginClassesAllocator.cc33
-rw-r--r--runtime/neurun/backend/acl_cl/ShapeFixer.cc434
-rw-r--r--runtime/neurun/backend/acl_cl/ShapeFixer.h110
-rw-r--r--runtime/neurun/backend/acl_cl/TensorBuilder.h39
-rw-r--r--runtime/neurun/backend/acl_cl/TensorManager.h80
-rw-r--r--runtime/neurun/backend/acl_cl/TensorRegister.h51
-rw-r--r--runtime/neurun/backend/acl_cl/operand/CLSubTensor.cc44
-rw-r--r--runtime/neurun/backend/acl_cl/operand/CLSubTensor.h63
-rw-r--r--runtime/neurun/backend/acl_cl/operand/CLTensor.cc62
-rw-r--r--runtime/neurun/backend/acl_cl/operand/CLTensor.h75
-rw-r--r--runtime/neurun/backend/acl_cl/operand/ICLTensor.cc45
-rw-r--r--runtime/neurun/backend/acl_cl/operand/ICLTensor.h50
-rw-r--r--runtime/neurun/backend/acl_common/AclFunction.h60
-rw-r--r--runtime/neurun/backend/acl_common/AclInternalBufferManager.h97
-rw-r--r--runtime/neurun/backend/acl_common/AclLinearMemoryManager.h110
-rw-r--r--runtime/neurun/backend/acl_common/AclMemoryManager.h98
-rw-r--r--runtime/neurun/backend/acl_common/AclTensorManager.h300
-rw-r--r--runtime/neurun/backend/acl_common/AclTensorRegister.cc49
-rw-r--r--runtime/neurun/backend/acl_common/AclTensorRegister.h56
-rw-r--r--runtime/neurun/backend/acl_common/CMakeLists.txt19
-rw-r--r--runtime/neurun/backend/acl_common/Convert.cc193
-rw-r--r--runtime/neurun/backend/acl_common/Convert.h68
-rw-r--r--runtime/neurun/backend/acl_common/IACLTensor.cc63
-rw-r--r--runtime/neurun/backend/acl_common/IACLTensor.h62
-rw-r--r--runtime/neurun/backend/acl_common/Swizzle.h160
-rw-r--r--runtime/neurun/backend/acl_common/TemplTensorBuilder.h612
-rw-r--r--runtime/neurun/backend/acl_neon/Backend.h65
-rw-r--r--runtime/neurun/backend/acl_neon/CMakeLists.txt21
-rw-r--r--runtime/neurun/backend/acl_neon/Config.cc30
-rw-r--r--runtime/neurun/backend/acl_neon/Config.h49
-rw-r--r--runtime/neurun/backend/acl_neon/ConstantInitializer.cc246
-rw-r--r--runtime/neurun/backend/acl_neon/ConstantInitializer.h60
-rw-r--r--runtime/neurun/backend/acl_neon/KernelGenerator.cc2152
-rw-r--r--runtime/neurun/backend/acl_neon/KernelGenerator.h111
-rw-r--r--runtime/neurun/backend/acl_neon/PluginClassesAllocator.cc33
-rw-r--r--runtime/neurun/backend/acl_neon/ShapeFixer.cc439
-rw-r--r--runtime/neurun/backend/acl_neon/ShapeFixer.h109
-rw-r--r--runtime/neurun/backend/acl_neon/TensorBuilder.h39
-rw-r--r--runtime/neurun/backend/acl_neon/TensorManager.h78
-rw-r--r--runtime/neurun/backend/acl_neon/TensorRegister.cc30
-rw-r--r--runtime/neurun/backend/acl_neon/TensorRegister.h51
-rw-r--r--runtime/neurun/backend/acl_neon/operand/INETensor.cc33
-rw-r--r--runtime/neurun/backend/acl_neon/operand/INETensor.h46
-rw-r--r--runtime/neurun/backend/acl_neon/operand/NESubTensor.cc44
-rw-r--r--runtime/neurun/backend/acl_neon/operand/NESubTensor.h63
-rw-r--r--runtime/neurun/backend/acl_neon/operand/NETensor.cc45
-rw-r--r--runtime/neurun/backend/acl_neon/operand/NETensor.h64
-rw-r--r--runtime/neurun/backend/cpu/Backend.h64
-rw-r--r--runtime/neurun/backend/cpu/CMakeLists.txt16
-rw-r--r--runtime/neurun/backend/cpu/Config.cc30
-rw-r--r--runtime/neurun/backend/cpu/Config.h53
-rw-r--r--runtime/neurun/backend/cpu/ConstantInitializer.cc68
-rw-r--r--runtime/neurun/backend/cpu/ConstantInitializer.h55
-rw-r--r--runtime/neurun/backend/cpu/KernelGenerator.cc624
-rw-r--r--runtime/neurun/backend/cpu/KernelGenerator.h71
-rw-r--r--runtime/neurun/backend/cpu/MemoryManager.cc91
-rw-r--r--runtime/neurun/backend/cpu/MemoryManager.h63
-rw-r--r--runtime/neurun/backend/cpu/PluginClassesAllocator.cc33
-rw-r--r--runtime/neurun/backend/cpu/ShapeFixer.cc135
-rw-r--r--runtime/neurun/backend/cpu/ShapeFixer.h65
-rw-r--r--runtime/neurun/backend/cpu/TensorBuilder.cc104
-rw-r--r--runtime/neurun/backend/cpu/TensorBuilder.h88
-rw-r--r--runtime/neurun/backend/cpu/TensorManager.cc95
-rw-r--r--runtime/neurun/backend/cpu/TensorManager.h64
-rw-r--r--runtime/neurun/backend/cpu/TensorRegister.cc35
-rw-r--r--runtime/neurun/backend/cpu/TensorRegister.h50
-rw-r--r--runtime/neurun/backend/cpu/kernel/AddLayer.cc101
-rw-r--r--runtime/neurun/backend/cpu/kernel/AddLayer.h77
-rw-r--r--runtime/neurun/backend/cpu/kernel/AvgPoolLayer.cc116
-rw-r--r--runtime/neurun/backend/cpu/kernel/AvgPoolLayer.h85
-rw-r--r--runtime/neurun/backend/cpu/kernel/ConcatLayer.cc137
-rw-r--r--runtime/neurun/backend/cpu/kernel/ConcatLayer.h73
-rw-r--r--runtime/neurun/backend/cpu/kernel/ConvolutionLayer.cc140
-rw-r--r--runtime/neurun/backend/cpu/kernel/ConvolutionLayer.h88
-rw-r--r--runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.cc143
-rw-r--r--runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.h90
-rw-r--r--runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.cc119
-rw-r--r--runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.h77
-rw-r--r--runtime/neurun/backend/cpu/kernel/GatherLayer.cc79
-rw-r--r--runtime/neurun/backend/cpu/kernel/GatherLayer.h74
-rw-r--r--runtime/neurun/backend/cpu/kernel/LogisticLayer.cc75
-rw-r--r--runtime/neurun/backend/cpu/kernel/LogisticLayer.h69
-rw-r--r--runtime/neurun/backend/cpu/kernel/MaxPoolLayer.cc116
-rw-r--r--runtime/neurun/backend/cpu/kernel/MaxPoolLayer.h85
-rw-r--r--runtime/neurun/backend/cpu/kernel/MulLayer.cc101
-rw-r--r--runtime/neurun/backend/cpu/kernel/MulLayer.h77
-rw-r--r--runtime/neurun/backend/cpu/kernel/OperationUtils.cc273
-rw-r--r--runtime/neurun/backend/cpu/kernel/OperationUtils.h152
-rw-r--r--runtime/neurun/backend/cpu/kernel/PadLayer.cc76
-rw-r--r--runtime/neurun/backend/cpu/kernel/PadLayer.h75
-rw-r--r--runtime/neurun/backend/cpu/kernel/PermuteLayer.cc71
-rw-r--r--runtime/neurun/backend/cpu/kernel/PermuteLayer.h209
-rw-r--r--runtime/neurun/backend/cpu/kernel/ReshapeLayer.cc54
-rw-r--r--runtime/neurun/backend/cpu/kernel/ReshapeLayer.h65
-rw-r--r--runtime/neurun/backend/cpu/kernel/SoftMaxLayer.cc172
-rw-r--r--runtime/neurun/backend/cpu/kernel/SoftMaxLayer.h71
-rw-r--r--runtime/neurun/backend/cpu/kernel/SubLayer.cc100
-rw-r--r--runtime/neurun/backend/cpu/kernel/SubLayer.h77
-rw-r--r--runtime/neurun/backend/cpu/operand/Tensor.cc45
-rw-r--r--runtime/neurun/backend/cpu/operand/Tensor.h77
-rw-r--r--runtime/neurun/backend/cpu_common/CMakeLists.txt28
-rw-r--r--runtime/neurun/backend/cpu_common/MemoryPlanner.cc220
-rw-r--r--runtime/neurun/backend/cpu_common/MemoryPlanner.h217
-rw-r--r--runtime/neurun/backend/cpu_common/MemoryPlanner.test.cc193
-rw-r--r--runtime/neurun/backend/cpu_common/MemoryPlannerFactory.cc51
-rw-r--r--runtime/neurun/backend/cpu_common/MemoryPlannerFactory.h45
-rw-r--r--runtime/neurun/backend/hi_perf_cpu/CMakeLists.txt44
-rw-r--r--runtime/neurun/backend/hi_perf_cpu/KernelGenerator.h47
-rw-r--r--runtime/neurun/backend/hi_perf_cpu/TensorBuilder.h44
-rw-r--r--runtime/neurun/backend/srcn/Backend.h64
-rw-r--r--runtime/neurun/backend/srcn/CMakeLists.txt21
-rw-r--r--runtime/neurun/backend/srcn/Config.cc30
-rw-r--r--runtime/neurun/backend/srcn/Config.h46
-rw-r--r--runtime/neurun/backend/srcn/ConstantInitializer.cc191
-rw-r--r--runtime/neurun/backend/srcn/ConstantInitializer.h60
-rw-r--r--runtime/neurun/backend/srcn/Convert.cc75
-rw-r--r--runtime/neurun/backend/srcn/Convert.h46
-rw-r--r--runtime/neurun/backend/srcn/KernelGenerator.cc275
-rw-r--r--runtime/neurun/backend/srcn/KernelGenerator.h59
-rw-r--r--runtime/neurun/backend/srcn/MemoryManager.cc92
-rw-r--r--runtime/neurun/backend/srcn/MemoryManager.h63
-rw-r--r--runtime/neurun/backend/srcn/PluginClassesAllocator.cc33
-rw-r--r--runtime/neurun/backend/srcn/ShapeFixer.cc47
-rw-r--r--runtime/neurun/backend/srcn/ShapeFixer.h53
-rw-r--r--runtime/neurun/backend/srcn/Swizzle.h84
-rw-r--r--runtime/neurun/backend/srcn/TensorBuilder.cc107
-rw-r--r--runtime/neurun/backend/srcn/TensorBuilder.h89
-rw-r--r--runtime/neurun/backend/srcn/TensorManager.cc95
-rw-r--r--runtime/neurun/backend/srcn/TensorManager.h65
-rw-r--r--runtime/neurun/backend/srcn/TensorRegister.cc118
-rw-r--r--runtime/neurun/backend/srcn/TensorRegister.h55
-rw-r--r--runtime/neurun/backend/srcn/kernel/AddLayer.cc123
-rw-r--r--runtime/neurun/backend/srcn/kernel/AddLayer.h80
-rw-r--r--runtime/neurun/backend/srcn/kernel/ConvolutionLayer.cc233
-rw-r--r--runtime/neurun/backend/srcn/kernel/ConvolutionLayer.h89
-rw-r--r--runtime/neurun/backend/srcn/kernel/DepthwiseConvolutionLayer.cc212
-rw-r--r--runtime/neurun/backend/srcn/kernel/DepthwiseConvolutionLayer.h85
-rw-r--r--runtime/neurun/backend/srcn/kernel/InstanceNormLayer.cc155
-rw-r--r--runtime/neurun/backend/srcn/kernel/InstanceNormLayer.h77
-rw-r--r--runtime/neurun/backend/srcn/kernel/OperationUtils.cc139
-rw-r--r--runtime/neurun/backend/srcn/kernel/OperationUtils.h84
-rw-r--r--runtime/neurun/backend/srcn/kernel/TransposeConvLayer.cc136
-rw-r--r--runtime/neurun/backend/srcn/kernel/TransposeConvLayer.h83
-rw-r--r--runtime/neurun/backend/srcn/operand/Tensor.cc45
-rw-r--r--runtime/neurun/backend/srcn/operand/Tensor.h79
-rw-r--r--runtime/neurun/core/CMakeLists.txt18
-rw-r--r--runtime/neurun/core/include/backend/Backend.h67
-rw-r--r--runtime/neurun/core/include/backend/CustomKernelBuilder.h69
-rw-r--r--runtime/neurun/core/include/backend/ExecTime.h111
-rw-r--r--runtime/neurun/core/include/backend/IConfig.h47
-rw-r--r--runtime/neurun/core/include/backend/IConstantInitializer.h288
-rw-r--r--runtime/neurun/core/include/backend/IKernelGenerator.h63
-rw-r--r--runtime/neurun/core/include/backend/IMemoryManager.h49
-rw-r--r--runtime/neurun/core/include/backend/IShapeFixer.h55
-rw-r--r--runtime/neurun/core/include/backend/ITensorBuilder.h89
-rw-r--r--runtime/neurun/core/include/backend/ITensorManager.h56
-rw-r--r--runtime/neurun/core/include/backend/ITensorRegister.h164
-rw-r--r--runtime/neurun/core/include/backend/JSONExecTime.h96
-rw-r--r--runtime/neurun/core/include/backend/operand/ITensor.h54
-rw-r--r--runtime/neurun/core/include/compiler/Compiler.h91
-rw-r--r--runtime/neurun/core/include/compiler/IExecutionBuilder.h39
-rw-r--r--runtime/neurun/core/include/compiler/SubTensorInfo.h83
-rw-r--r--runtime/neurun/core/include/exec/Execution.h144
-rw-r--r--runtime/neurun/core/include/exec/ExecutionObservers.h83
-rw-r--r--runtime/neurun/core/include/exec/IExecutor.h72
-rw-r--r--runtime/neurun/core/include/exec/IFunction.h37
-rw-r--r--runtime/neurun/core/include/exec/IODescription.h66
-rw-r--r--runtime/neurun/core/include/exec/NopFunction.h54
-rw-r--r--runtime/neurun/core/include/ir/BackendSet.h40
-rw-r--r--runtime/neurun/core/include/ir/Data.h75
-rw-r--r--runtime/neurun/core/include/ir/DataType.h62
-rw-r--r--runtime/neurun/core/include/ir/Graph.h153
-rw-r--r--runtime/neurun/core/include/ir/Index.h42
-rw-r--r--runtime/neurun/core/include/ir/InternalType.h68
-rw-r--r--runtime/neurun/core/include/ir/Layout.h67
-rw-r--r--runtime/neurun/core/include/ir/LowerInfoMap.h42
-rw-r--r--runtime/neurun/core/include/ir/OpCode.h56
-rw-r--r--runtime/neurun/core/include/ir/OpSequence.h106
-rw-r--r--runtime/neurun/core/include/ir/Operand.h130
-rw-r--r--runtime/neurun/core/include/ir/OperandConstraint.h58
-rw-r--r--runtime/neurun/core/include/ir/OperandIndexMap.h34
-rw-r--r--runtime/neurun/core/include/ir/OperandIndexSequence.h60
-rw-r--r--runtime/neurun/core/include/ir/OperandInfo.h90
-rw-r--r--runtime/neurun/core/include/ir/Operands.h39
-rw-r--r--runtime/neurun/core/include/ir/Operation.h71
-rw-r--r--runtime/neurun/core/include/ir/OperationIndexList.h59
-rw-r--r--runtime/neurun/core/include/ir/OperationIndexMap.h34
-rw-r--r--runtime/neurun/core/include/ir/OperationVisitor.h52
-rw-r--r--runtime/neurun/core/include/ir/Operations.Include.h83
-rw-r--r--runtime/neurun/core/include/ir/Operations.h36
-rw-r--r--runtime/neurun/core/include/ir/Operations.lst86
-rw-r--r--runtime/neurun/core/include/ir/Shape.h84
-rw-r--r--runtime/neurun/core/include/ir/Subgraphs.h87
-rw-r--r--runtime/neurun/core/include/ir/TypeInfo.h59
-rw-r--r--runtime/neurun/core/include/ir/operand/LowerInfo.h93
-rw-r--r--runtime/neurun/core/include/ir/operand/ParentInfo.h77
-rw-r--r--runtime/neurun/core/include/ir/operand/PermuteFactor.h130
-rw-r--r--runtime/neurun/core/include/ir/operation/Abs.h49
-rw-r--r--runtime/neurun/core/include/ir/operation/Add.h62
-rw-r--r--runtime/neurun/core/include/ir/operation/ArgMax.h62
-rw-r--r--runtime/neurun/core/include/ir/operation/AvgPool2D.h69
-rw-r--r--runtime/neurun/core/include/ir/operation/BatchToSpaceND.h50
-rw-r--r--runtime/neurun/core/include/ir/operation/Cast.h49
-rw-r--r--runtime/neurun/core/include/ir/operation/Comparison.h72
-rw-r--r--runtime/neurun/core/include/ir/operation/Concat.h59
-rw-r--r--runtime/neurun/core/include/ir/operation/Conv2D.h68
-rw-r--r--runtime/neurun/core/include/ir/operation/Custom.h66
-rw-r--r--runtime/neurun/core/include/ir/operation/DepthToSpace.h63
-rw-r--r--runtime/neurun/core/include/ir/operation/DepthwiseConv2D.h69
-rw-r--r--runtime/neurun/core/include/ir/operation/Dequantize.h49
-rw-r--r--runtime/neurun/core/include/ir/operation/Div.h62
-rw-r--r--runtime/neurun/core/include/ir/operation/EmbeddingLookup.h50
-rw-r--r--runtime/neurun/core/include/ir/operation/Exp.h49
-rw-r--r--runtime/neurun/core/include/ir/operation/Floor.h51
-rw-r--r--runtime/neurun/core/include/ir/operation/FullyConnected.h66
-rw-r--r--runtime/neurun/core/include/ir/operation/Gather.h65
-rw-r--r--runtime/neurun/core/include/ir/operation/HashtableLookup.h57
-rw-r--r--runtime/neurun/core/include/ir/operation/InstanceNorm.h65
-rw-r--r--runtime/neurun/core/include/ir/operation/L2Normalization.h62
-rw-r--r--runtime/neurun/core/include/ir/operation/L2Pool2D.h68
-rw-r--r--runtime/neurun/core/include/ir/operation/LSTM.h89
-rw-r--r--runtime/neurun/core/include/ir/operation/LocalResponseNormalization.h66
-rw-r--r--runtime/neurun/core/include/ir/operation/LogicalAnd.h50
-rw-r--r--runtime/neurun/core/include/ir/operation/LogicalNot.h49
-rw-r--r--runtime/neurun/core/include/ir/operation/LogicalOr.h50
-rw-r--r--runtime/neurun/core/include/ir/operation/Logistic.h49
-rw-r--r--runtime/neurun/core/include/ir/operation/LowerInfo.h54
-rw-r--r--runtime/neurun/core/include/ir/operation/Max.h50
-rw-r--r--runtime/neurun/core/include/ir/operation/MaxPool2D.h68
-rw-r--r--runtime/neurun/core/include/ir/operation/Mean.h62
-rw-r--r--runtime/neurun/core/include/ir/operation/Min.h50
-rw-r--r--runtime/neurun/core/include/ir/operation/Mul.h62
-rw-r--r--runtime/neurun/core/include/ir/operation/Neg.h49
-rw-r--r--runtime/neurun/core/include/ir/operation/OneHot.h60
-rw-r--r--runtime/neurun/core/include/ir/operation/PReLU.h50
-rw-r--r--runtime/neurun/core/include/ir/operation/Pack.h53
-rw-r--r--runtime/neurun/core/include/ir/operation/Pad.h63
-rw-r--r--runtime/neurun/core/include/ir/operation/Permute.h78
-rw-r--r--runtime/neurun/core/include/ir/operation/RNN.h70
-rw-r--r--runtime/neurun/core/include/ir/operation/RSQRT.h49
-rw-r--r--runtime/neurun/core/include/ir/operation/ReLU.h49
-rw-r--r--runtime/neurun/core/include/ir/operation/ReLU1.h49
-rw-r--r--runtime/neurun/core/include/ir/operation/ReLU6.h49
-rw-r--r--runtime/neurun/core/include/ir/operation/ReduceMax.h65
-rw-r--r--runtime/neurun/core/include/ir/operation/ReduceMin.h65
-rw-r--r--runtime/neurun/core/include/ir/operation/ReduceSum.h63
-rw-r--r--runtime/neurun/core/include/ir/operation/Reshape.h51
-rw-r--r--runtime/neurun/core/include/ir/operation/ResizeBilinear.h64
-rw-r--r--runtime/neurun/core/include/ir/operation/SQRT.h49
-rw-r--r--runtime/neurun/core/include/ir/operation/Slice.h64
-rw-r--r--runtime/neurun/core/include/ir/operation/Softmax.h63
-rw-r--r--runtime/neurun/core/include/ir/operation/SpaceToBatchND.h53
-rw-r--r--runtime/neurun/core/include/ir/operation/SpaceToDepth.h63
-rw-r--r--runtime/neurun/core/include/ir/operation/Split.h59
-rw-r--r--runtime/neurun/core/include/ir/operation/SquaredDifference.h50
-rw-r--r--runtime/neurun/core/include/ir/operation/Squeeze.h62
-rw-r--r--runtime/neurun/core/include/ir/operation/StridedSlice.h69
-rw-r--r--runtime/neurun/core/include/ir/operation/Sub.h62
-rw-r--r--runtime/neurun/core/include/ir/operation/Tanh.h49
-rw-r--r--runtime/neurun/core/include/ir/operation/TopKV2.h69
-rw-r--r--runtime/neurun/core/include/ir/operation/Transpose.h64
-rw-r--r--runtime/neurun/core/include/ir/operation/TransposeConv.h67
-rw-r--r--runtime/neurun/core/include/ir/operation/Unpack.h59
-rw-r--r--runtime/neurun/core/include/util/Config.lst43
-rw-r--r--runtime/neurun/core/include/util/ConfigSource.h55
-rw-r--r--runtime/neurun/core/include/util/Coordinates.h103
-rw-r--r--runtime/neurun/core/include/util/EnvConfigSource.h41
-rw-r--r--runtime/neurun/core/include/util/EventCollectorGlobal.h155
-rw-r--r--runtime/neurun/core/include/util/GeneralConfigSource.h44
-rw-r--r--runtime/neurun/core/include/util/IConfigSource.h46
-rw-r--r--runtime/neurun/core/include/util/ITimer.h59
-rw-r--r--runtime/neurun/core/include/util/Index.h154
-rw-r--r--runtime/neurun/core/include/util/ObjectManager.h144
-rw-r--r--runtime/neurun/core/include/util/Padding.h41
-rw-r--r--runtime/neurun/core/include/util/Set.h166
-rw-r--r--runtime/neurun/core/include/util/ShapeInference.h59
-rw-r--r--runtime/neurun/core/include/util/Utils.h51
-rw-r--r--runtime/neurun/core/include/util/feature/Coordinate4D.h111
-rw-r--r--runtime/neurun/core/include/util/feature/nchw/Reader.h120
-rw-r--r--runtime/neurun/core/include/util/feature/nchw/View.h138
-rw-r--r--runtime/neurun/core/include/util/feature/nhwc/Reader.h121
-rw-r--r--runtime/neurun/core/include/util/feature/nhwc/View.h140
-rw-r--r--runtime/neurun/core/include/util/logging.h63
-rw-r--r--runtime/neurun/core/src/backend/Backend.cc30
-rw-r--r--runtime/neurun/core/src/backend/BackendManager.cc150
-rw-r--r--runtime/neurun/core/src/backend/BackendManager.h82
-rw-r--r--runtime/neurun/core/src/backend/ExecTime.cc133
-rw-r--r--runtime/neurun/core/src/backend/JSONExecTime.cc231
-rw-r--r--runtime/neurun/core/src/compiler/BackendResolver.cc47
-rw-r--r--runtime/neurun/core/src/compiler/BackendResolver.h102
-rw-r--r--runtime/neurun/core/src/compiler/CodeWithInfo.h44
-rw-r--r--runtime/neurun/core/src/compiler/Compiler.cc143
-rw-r--r--runtime/neurun/core/src/compiler/ExecutorFactory.cc379
-rw-r--r--runtime/neurun/core/src/compiler/ExecutorFactory.h52
-rw-r--r--runtime/neurun/core/src/compiler/HEScheduler.cc628
-rw-r--r--runtime/neurun/core/src/compiler/HEScheduler.h175
-rw-r--r--runtime/neurun/core/src/compiler/IScheduler.h38
-rw-r--r--runtime/neurun/core/src/compiler/Linear.cc317
-rw-r--r--runtime/neurun/core/src/compiler/Linear.h81
-rw-r--r--runtime/neurun/core/src/compiler/ManualScheduler.cc142
-rw-r--r--runtime/neurun/core/src/compiler/ManualScheduler.h36
-rw-r--r--runtime/neurun/core/src/compiler/OperandContext.cc45
-rw-r--r--runtime/neurun/core/src/compiler/OperandContext.h60
-rw-r--r--runtime/neurun/core/src/compiler/OperationValidator.cc985
-rw-r--r--runtime/neurun/core/src/compiler/OperationValidator.h86
-rw-r--r--runtime/neurun/core/src/compiler/ParamChecker.cc33
-rw-r--r--runtime/neurun/core/src/compiler/ParamChecker.h73
-rw-r--r--runtime/neurun/core/src/compiler/SubTensorAnalyzer.cc96
-rw-r--r--runtime/neurun/core/src/compiler/SubTensorAnalyzer.h59
-rw-r--r--runtime/neurun/core/src/dumper/dot/DotBuilder.cc83
-rw-r--r--runtime/neurun/core/src/dumper/dot/DotBuilder.h62
-rw-r--r--runtime/neurun/core/src/dumper/dot/DotDumper.cc199
-rw-r--r--runtime/neurun/core/src/dumper/dot/DotDumper.h60
-rw-r--r--runtime/neurun/core/src/dumper/dot/DotSubgraphInfo.cc56
-rw-r--r--runtime/neurun/core/src/dumper/dot/DotSubgraphInfo.h59
-rw-r--r--runtime/neurun/core/src/dumper/dot/Node.cc56
-rw-r--r--runtime/neurun/core/src/dumper/dot/Node.h127
-rw-r--r--runtime/neurun/core/src/dumper/dot/OperandNode.cc60
-rw-r--r--runtime/neurun/core/src/dumper/dot/OperandNode.h79
-rw-r--r--runtime/neurun/core/src/dumper/dot/OperationNode.cc46
-rw-r--r--runtime/neurun/core/src/dumper/dot/OperationNode.h62
-rw-r--r--runtime/neurun/core/src/exec/DataflowExecutor.cc176
-rw-r--r--runtime/neurun/core/src/exec/DataflowExecutor.h97
-rw-r--r--runtime/neurun/core/src/exec/Execution.cc135
-rw-r--r--runtime/neurun/core/src/exec/ExecutionObservee.cc64
-rw-r--r--runtime/neurun/core/src/exec/ExecutionObservee.h56
-rw-r--r--runtime/neurun/core/src/exec/ExecutionObservers.cc130
-rw-r--r--runtime/neurun/core/src/exec/ExecutorBase.cc145
-rw-r--r--runtime/neurun/core/src/exec/ExecutorBase.h127
-rw-r--r--runtime/neurun/core/src/exec/FunctionSequence.cc62
-rw-r--r--runtime/neurun/core/src/exec/FunctionSequence.h56
-rw-r--r--runtime/neurun/core/src/exec/Job.cc33
-rw-r--r--runtime/neurun/core/src/exec/Job.h69
-rw-r--r--runtime/neurun/core/src/exec/LinearExecutor.cc39
-rw-r--r--runtime/neurun/core/src/exec/LinearExecutor.h64
-rw-r--r--runtime/neurun/core/src/exec/ParallelExecutor.cc147
-rw-r--r--runtime/neurun/core/src/exec/ParallelExecutor.h69
-rw-r--r--runtime/neurun/core/src/exec/ParallelScheduler.cc55
-rw-r--r--runtime/neurun/core/src/exec/ParallelScheduler.h60
-rw-r--r--runtime/neurun/core/src/exec/Sink.h205
-rw-r--r--runtime/neurun/core/src/exec/Source.h211
-rw-r--r--runtime/neurun/core/src/exec/ThreadPool.cc65
-rw-r--r--runtime/neurun/core/src/exec/ThreadPool.h73
-rw-r--r--runtime/neurun/core/src/exec/WorkQueue.cc104
-rw-r--r--runtime/neurun/core/src/exec/WorkQueue.h87
-rw-r--r--runtime/neurun/core/src/exec/interp/Buffer.h94
-rw-r--r--runtime/neurun/core/src/exec/interp/ExecEnv.h165
-rw-r--r--runtime/neurun/core/src/exec/interp/ExecManager.cc125
-rw-r--r--runtime/neurun/core/src/exec/interp/ExecManager.h71
-rw-r--r--runtime/neurun/core/src/exec/interp/Interpreter.cc210
-rw-r--r--runtime/neurun/core/src/exec/interp/Interpreter.h67
-rw-r--r--runtime/neurun/core/src/exec/interp/Registration.h63
-rw-r--r--runtime/neurun/core/src/exec/interp/Tensor.cc59
-rw-r--r--runtime/neurun/core/src/exec/interp/Tensor.h180
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/AvgPool2D.cc129
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/BinaryArithmeticOps.cc202
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/Concat.cc150
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/Conv2D.cc152
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/DepthwiseConv.cc159
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/FullyConnected.cc137
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/Gather.cc141
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/InstanceNorm.cc124
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/Logistic.cc102
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/MaxPool2D.cc128
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/OperationUtil.h177
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/Pad.cc109
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/Reshape.cc66
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/SoftMax.cc163
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/TransposeConv.cc145
-rw-r--r--runtime/neurun/core/src/exec/interp/operations/UnaryActivations.cc156
-rw-r--r--runtime/neurun/core/src/ir/Graph.cc551
-rw-r--r--runtime/neurun/core/src/ir/GraphIterator.cc84
-rw-r--r--runtime/neurun/core/src/ir/GraphIterator.h74
-rw-r--r--runtime/neurun/core/src/ir/LayoutSet.cc66
-rw-r--r--runtime/neurun/core/src/ir/LayoutSet.h58
-rw-r--r--runtime/neurun/core/src/ir/OpCode.cc37
-rw-r--r--runtime/neurun/core/src/ir/OpSequence.cc83
-rw-r--r--runtime/neurun/core/src/ir/Operand.cc70
-rw-r--r--runtime/neurun/core/src/ir/OperandIndexSequence.cc58
-rw-r--r--runtime/neurun/core/src/ir/Operation.cc55
-rw-r--r--runtime/neurun/core/src/ir/OperationIndexList.cc37
-rw-r--r--runtime/neurun/core/src/ir/Shape.cc85
-rw-r--r--runtime/neurun/core/src/ir/Subgraphs.cc87
-rw-r--r--runtime/neurun/core/src/ir/TypeInfo.cc47
-rw-r--r--runtime/neurun/core/src/ir/dumper/Dumper.cc633
-rw-r--r--runtime/neurun/core/src/ir/dumper/Dumper.h102
-rw-r--r--runtime/neurun/core/src/ir/operand/Shape4DConvert.h57
-rw-r--r--runtime/neurun/core/src/ir/operation/Abs.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/Add.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/ArgMax.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/AvgPool2D.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/BatchToSpaceND.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/Cast.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/Comparison.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/Concat.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/Conv2D.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/Custom.cc46
-rw-r--r--runtime/neurun/core/src/ir/operation/DepthToSpace.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/DepthwiseConv2D.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/Dequantize.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/Div.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/EmbeddingLookup.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/Exp.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/Floor.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/FullyConnected.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/Gather.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/HashtableLookup.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/InstanceNorm.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/L2Normalization.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/L2Pool2D.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/LSTM.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/LocalResponseNormalization.cc41
-rw-r--r--runtime/neurun/core/src/ir/operation/LogicalAnd.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/LogicalNot.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/LogicalOr.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/Logistic.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/LowerInfo.cc34
-rw-r--r--runtime/neurun/core/src/ir/operation/Max.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/MaxPool2D.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/Mean.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/Min.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/Mul.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/Neg.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/OneHot.cc37
-rw-r--r--runtime/neurun/core/src/ir/operation/PReLU.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/Pack.cc33
-rw-r--r--runtime/neurun/core/src/ir/operation/Pad.cc38
-rw-r--r--runtime/neurun/core/src/ir/operation/Permute.cc44
-rw-r--r--runtime/neurun/core/src/ir/operation/RNN.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/RSQRT.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/ReLU.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/ReLU1.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/ReLU6.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/ReduceMax.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/ReduceMin.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/ReduceSum.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/Reshape.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/ResizeBilinear.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/SQRT.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/Slice.cc37
-rw-r--r--runtime/neurun/core/src/ir/operation/Softmax.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/SpaceToBatchND.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/SpaceToDepth.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/Split.cc33
-rw-r--r--runtime/neurun/core/src/ir/operation/SquaredDifference.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/Squeeze.cc37
-rw-r--r--runtime/neurun/core/src/ir/operation/StridedSlice.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/Sub.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/Tanh.cc39
-rw-r--r--runtime/neurun/core/src/ir/operation/TopKV2.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/Transpose.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/TransposeConv.cc40
-rw-r--r--runtime/neurun/core/src/ir/operation/Unpack.cc33
-rw-r--r--runtime/neurun/core/src/ir/pass/ConstantInsertionPass.cc104
-rw-r--r--runtime/neurun/core/src/ir/pass/ConstantInsertionPass.h75
-rw-r--r--runtime/neurun/core/src/ir/pass/OperandPass.cc36
-rw-r--r--runtime/neurun/core/src/ir/pass/OperandPass.h53
-rw-r--r--runtime/neurun/core/src/ir/pass/OperationPass.cc38
-rw-r--r--runtime/neurun/core/src/ir/pass/OperationPass.h76
-rw-r--r--runtime/neurun/core/src/ir/pass/Pass.h55
-rw-r--r--runtime/neurun/core/src/ir/pass/PermutationEliminationPass.cc195
-rw-r--r--runtime/neurun/core/src/ir/pass/PermutationEliminationPass.h86
-rw-r--r--runtime/neurun/core/src/ir/pass/PermutationInsertionPass.cc209
-rw-r--r--runtime/neurun/core/src/ir/pass/PermutationInsertionPass.h59
-rw-r--r--runtime/neurun/core/src/ir/pass/PermutationOperationPass.cc230
-rw-r--r--runtime/neurun/core/src/ir/pass/PermutationOperationPass.h54
-rw-r--r--runtime/neurun/core/src/ir/verifier/Verifier.cc96
-rw-r--r--runtime/neurun/core/src/ir/verifier/Verifier.h68
-rw-r--r--runtime/neurun/core/src/library_info.cc17
-rw-r--r--runtime/neurun/core/src/util/ConfigSource.cc116
-rw-r--r--runtime/neurun/core/src/util/EnvConfigSource.cc40
-rw-r--r--runtime/neurun/core/src/util/EventCollectorGlobal.cc86
-rw-r--r--runtime/neurun/core/src/util/GeneralConfigSource.cc45
-rw-r--r--runtime/neurun/core/src/util/Padding.cc119
-rw-r--r--runtime/neurun/core/src/util/ShapeInference.cc200
-rw-r--r--runtime/neurun/core/src/util/Utils.cc68
-rw-r--r--runtime/neurun/core/src/util/logging.cc7
-rw-r--r--runtime/neurun/frontend/base_loader/CMakeLists.txt7
-rw-r--r--runtime/neurun/frontend/base_loader/include/base_loader.h1278
-rw-r--r--runtime/neurun/frontend/circle/CMakeLists.txt17
-rw-r--r--runtime/neurun/frontend/circle/include/circle_loader.h32
-rw-r--r--runtime/neurun/frontend/circle/src/circle_loader.cc116
-rw-r--r--runtime/neurun/frontend/circle/src/circle_schema_generated.h7546
-rw-r--r--runtime/neurun/frontend/nnapi/CMakeLists.txt23
-rw-r--r--runtime/neurun/frontend/nnapi/compilation.cc110
-rw-r--r--runtime/neurun/frontend/nnapi/execution.cc480
-rw-r--r--runtime/neurun/frontend/nnapi/memory.cc42
-rw-r--r--runtime/neurun/frontend/nnapi/model.cc411
-rw-r--r--runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc42
-rw-r--r--runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h42
-rw-r--r--runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc43
-rw-r--r--runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksEvent.h44
-rw-r--r--runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc289
-rw-r--r--runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksExecution.h74
-rw-r--r--runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksModel.cc268
-rw-r--r--runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksModel.h71
-rw-r--r--runtime/neurun/frontend/nnapi/wrapper/NNAPIConvert.cc100
-rw-r--r--runtime/neurun/frontend/nnapi/wrapper/NNAPIConvert.h78
-rw-r--r--runtime/neurun/frontend/nnapi/wrapper/OperationFactory.cc1680
-rw-r--r--runtime/neurun/frontend/nnapi/wrapper/OperationFactory.h60
-rw-r--r--runtime/neurun/frontend/tflite/CMakeLists.txt17
-rw-r--r--runtime/neurun/frontend/tflite/include/tflite_loader.h34
-rw-r--r--runtime/neurun/frontend/tflite/src/tflite_loader.cc105
-rw-r--r--runtime/neurun/frontend/tflite/src/tflite_schema_generated.h7275
-rw-r--r--runtime/neurun/frontend/tflite/tflite_schema.fbs795
-rw-r--r--runtime/neurun/sample/minimal/src/minimal.cc67
-rw-r--r--runtime/neurun/test/CMakeLists.txt15
-rw-r--r--runtime/neurun/test/core/backend/ExecTime.test.cc98
-rw-r--r--runtime/neurun/test/core/compiler/Scheduler.cc550
-rw-r--r--runtime/neurun/test/core/exec/ExecInstance.cc307
-rw-r--r--runtime/neurun/test/core/exec/interp/ExecManager.cc334
-rw-r--r--runtime/neurun/test/graph/Graph.cc52
-rw-r--r--runtime/neurun/test/graph/Index.cc34
-rw-r--r--runtime/neurun/test/graph/MockNode.h47
-rw-r--r--runtime/neurun/test/graph/operand/IndexSet.cc52
-rw-r--r--runtime/neurun/test/graph/operand/LayoutSet.cc43
-rw-r--r--runtime/neurun/test/graph/operand/Set.cc45
-rw-r--r--runtime/neurun/test/graph/operand/UseDef.cc85
-rw-r--r--runtime/neurun/test/graph/operation/Set.cc33
-rw-r--r--runtime/neurun/test/graph/operation/SetIO.cc99
-rw-r--r--runtime/neurun/test/graph/verifier/Verifier.cc49
-rw-r--r--runtime/neurun/test/util/ShapeInference.cc233
-rw-r--r--runtime/nnapi-header/include/NeuralNetworksEx.h309
-rw-r--r--runtime/onert/CMakeLists.txt15
-rw-r--r--runtime/onert/api/CMakeLists.txt21
-rw-r--r--runtime/onert/api/include/nnfw.h409
-rw-r--r--runtime/onert/api/include/nnfw_debug.h26
-rw-r--r--runtime/onert/api/include/nnfw_dev.h (renamed from runtime/neurun/api/include/nnfw_dev.h)0
-rw-r--r--runtime/onert/api/include/nnfw_version.h26
-rw-r--r--runtime/onert/api/src/CustomKernel.cc98
-rw-r--r--runtime/onert/api/src/CustomKernel.h60
-rw-r--r--runtime/onert/api/src/CustomKernelRegistry.cc64
-rw-r--r--runtime/onert/api/src/CustomKernelRegistry.h64
-rw-r--r--runtime/onert/api/src/OpMap.lst89
-rw-r--r--runtime/onert/api/src/nnfw_api.cc299
-rw-r--r--runtime/onert/api/src/nnfw_api_internal.cc518
-rw-r--r--runtime/onert/api/src/nnfw_api_internal.h92
-rw-r--r--runtime/onert/api/src/nnfw_debug.cc29
-rw-r--r--runtime/onert/api/src/nnfw_debug_internal.cc25
-rw-r--r--runtime/onert/api/src/nnfw_debug_internal.h (renamed from runtime/neurun/api/src/nnfw_debug_internal.h)0
-rw-r--r--runtime/onert/backend/CMakeLists.txt8
-rw-r--r--runtime/onert/backend/acl_cl/Backend.h68
-rw-r--r--runtime/onert/backend/acl_cl/CLTimer.h108
-rw-r--r--runtime/onert/backend/acl_cl/CMakeLists.txt19
-rw-r--r--runtime/onert/backend/acl_cl/Config.cc50
-rw-r--r--runtime/onert/backend/acl_cl/Config.h44
-rw-r--r--runtime/onert/backend/acl_cl/ConstantInitializer.cc196
-rw-r--r--runtime/onert/backend/acl_cl/ConstantInitializer.h63
-rw-r--r--runtime/onert/backend/acl_cl/KernelGenerator.cc2023
-rw-r--r--runtime/onert/backend/acl_cl/KernelGenerator.h112
-rw-r--r--runtime/onert/backend/acl_cl/Optimizer.cc58
-rw-r--r--runtime/onert/backend/acl_cl/Optimizer.h47
-rw-r--r--runtime/onert/backend/acl_cl/ShapeFixer.cc431
-rw-r--r--runtime/onert/backend/acl_cl/ShapeFixer.h110
-rw-r--r--runtime/onert/backend/acl_cl/TensorBuilder.h39
-rw-r--r--runtime/onert/backend/acl_cl/TensorManager.h78
-rw-r--r--runtime/onert/backend/acl_cl/acl_cl.cc33
-rw-r--r--runtime/onert/backend/acl_cl/operand/CLSubTensor.cc44
-rw-r--r--runtime/onert/backend/acl_cl/operand/CLSubTensor.h62
-rw-r--r--runtime/onert/backend/acl_cl/operand/CLTensor.cc62
-rw-r--r--runtime/onert/backend/acl_cl/operand/CLTensor.h75
-rw-r--r--runtime/onert/backend/acl_cl/operand/ICLTensor.cc45
-rw-r--r--runtime/onert/backend/acl_cl/operand/ICLTensor.h50
-rw-r--r--runtime/onert/backend/acl_common/AclActivationBuilder.h125
-rw-r--r--runtime/onert/backend/acl_common/AclFunction.h69
-rw-r--r--runtime/onert/backend/acl_common/AclInternalBufferManager.h97
-rw-r--r--runtime/onert/backend/acl_common/AclLinearMemoryManager.h110
-rw-r--r--runtime/onert/backend/acl_common/AclMemoryManager.h98
-rw-r--r--runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h105
-rw-r--r--runtime/onert/backend/acl_common/AclTensorBuilder.h483
-rw-r--r--runtime/onert/backend/acl_common/AclTensorManager.h301
-rw-r--r--runtime/onert/backend/acl_common/CMakeLists.txt19
-rw-r--r--runtime/onert/backend/acl_common/Convert.cc198
-rw-r--r--runtime/onert/backend/acl_common/Convert.h74
-rw-r--r--runtime/onert/backend/acl_common/IACLTensor.cc63
-rw-r--r--runtime/onert/backend/acl_common/IACLTensor.h68
-rw-r--r--runtime/onert/backend/acl_common/ParentInfo.h44
-rw-r--r--runtime/onert/backend/acl_common/Swizzle.h160
-rw-r--r--runtime/onert/backend/acl_neon/Backend.h69
-rw-r--r--runtime/onert/backend/acl_neon/CMakeLists.txt19
-rw-r--r--runtime/onert/backend/acl_neon/Config.cc30
-rw-r--r--runtime/onert/backend/acl_neon/Config.h45
-rw-r--r--runtime/onert/backend/acl_neon/ConstantInitializer.cc183
-rw-r--r--runtime/onert/backend/acl_neon/ConstantInitializer.h60
-rw-r--r--runtime/onert/backend/acl_neon/KernelGenerator.cc2030
-rw-r--r--runtime/onert/backend/acl_neon/KernelGenerator.h112
-rw-r--r--runtime/onert/backend/acl_neon/Optimizer.cc58
-rw-r--r--runtime/onert/backend/acl_neon/Optimizer.h47
-rw-r--r--runtime/onert/backend/acl_neon/ShapeFixer.cc437
-rw-r--r--runtime/onert/backend/acl_neon/ShapeFixer.h110
-rw-r--r--runtime/onert/backend/acl_neon/TensorBuilder.h39
-rw-r--r--runtime/onert/backend/acl_neon/TensorManager.h77
-rw-r--r--runtime/onert/backend/acl_neon/acl_neon.cc33
-rw-r--r--runtime/onert/backend/acl_neon/operand/INETensor.cc33
-rw-r--r--runtime/onert/backend/acl_neon/operand/INETensor.h46
-rw-r--r--runtime/onert/backend/acl_neon/operand/NESubTensor.cc44
-rw-r--r--runtime/onert/backend/acl_neon/operand/NESubTensor.h62
-rw-r--r--runtime/onert/backend/acl_neon/operand/NETensor.cc45
-rw-r--r--runtime/onert/backend/acl_neon/operand/NETensor.h64
-rw-r--r--runtime/onert/backend/cpu/Backend.h67
-rw-r--r--runtime/onert/backend/cpu/CMakeLists.txt14
-rw-r--r--runtime/onert/backend/cpu/Config.cc30
-rw-r--r--runtime/onert/backend/cpu/Config.h45
-rw-r--r--runtime/onert/backend/cpu/ConstantInitializer.cc68
-rw-r--r--runtime/onert/backend/cpu/ConstantInitializer.h54
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.cc932
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.h93
-rw-r--r--runtime/onert/backend/cpu/ShapeFixer.cc181
-rw-r--r--runtime/onert/backend/cpu/ShapeFixer.h85
-rw-r--r--runtime/onert/backend/cpu/TensorBuilder.cc93
-rw-r--r--runtime/onert/backend/cpu/TensorBuilder.h76
-rw-r--r--runtime/onert/backend/cpu/TensorManager.cc114
-rw-r--r--runtime/onert/backend/cpu/TensorManager.h65
-rw-r--r--runtime/onert/backend/cpu/cpu.cc33
-rw-r--r--runtime/onert/backend/cpu/kernel/AbsLayer.cc67
-rw-r--r--runtime/onert/backend/cpu/kernel/AbsLayer.h63
-rw-r--r--runtime/onert/backend/cpu/kernel/AddLayer.cc96
-rw-r--r--runtime/onert/backend/cpu/kernel/AddLayer.h71
-rw-r--r--runtime/onert/backend/cpu/kernel/AvgPoolLayer.cc116
-rw-r--r--runtime/onert/backend/cpu/kernel/AvgPoolLayer.h81
-rw-r--r--runtime/onert/backend/cpu/kernel/CastLayer.cc108
-rw-r--r--runtime/onert/backend/cpu/kernel/CastLayer.h63
-rw-r--r--runtime/onert/backend/cpu/kernel/CompareLayer.cc180
-rw-r--r--runtime/onert/backend/cpu/kernel/CompareLayer.h65
-rw-r--r--runtime/onert/backend/cpu/kernel/ConcatLayer.cc134
-rw-r--r--runtime/onert/backend/cpu/kernel/ConcatLayer.h65
-rw-r--r--runtime/onert/backend/cpu/kernel/ConvolutionLayer.cc159
-rw-r--r--runtime/onert/backend/cpu/kernel/ConvolutionLayer.h96
-rw-r--r--runtime/onert/backend/cpu/kernel/DepthwiseConvolutionLayer.cc136
-rw-r--r--runtime/onert/backend/cpu/kernel/DepthwiseConvolutionLayer.h83
-rw-r--r--runtime/onert/backend/cpu/kernel/DivLayer.cc92
-rw-r--r--runtime/onert/backend/cpu/kernel/DivLayer.h71
-rw-r--r--runtime/onert/backend/cpu/kernel/ExpLayer.cc71
-rw-r--r--runtime/onert/backend/cpu/kernel/ExpLayer.h63
-rw-r--r--runtime/onert/backend/cpu/kernel/FullyConnectedLayer.cc141
-rw-r--r--runtime/onert/backend/cpu/kernel/FullyConnectedLayer.h81
-rw-r--r--runtime/onert/backend/cpu/kernel/GatherLayer.cc77
-rw-r--r--runtime/onert/backend/cpu/kernel/GatherLayer.h66
-rw-r--r--runtime/onert/backend/cpu/kernel/LogisticLayer.cc71
-rw-r--r--runtime/onert/backend/cpu/kernel/LogisticLayer.h63
-rw-r--r--runtime/onert/backend/cpu/kernel/MaxLayer.cc78
-rw-r--r--runtime/onert/backend/cpu/kernel/MaxLayer.h67
-rw-r--r--runtime/onert/backend/cpu/kernel/MaxPoolLayer.cc113
-rw-r--r--runtime/onert/backend/cpu/kernel/MaxPoolLayer.h81
-rw-r--r--runtime/onert/backend/cpu/kernel/MinLayer.cc78
-rw-r--r--runtime/onert/backend/cpu/kernel/MinLayer.h67
-rw-r--r--runtime/onert/backend/cpu/kernel/MulLayer.cc92
-rw-r--r--runtime/onert/backend/cpu/kernel/MulLayer.h71
-rw-r--r--runtime/onert/backend/cpu/kernel/OneHotLayer.cc70
-rw-r--r--runtime/onert/backend/cpu/kernel/OneHotLayer.h73
-rw-r--r--runtime/onert/backend/cpu/kernel/OperationUtils.cc269
-rw-r--r--runtime/onert/backend/cpu/kernel/OperationUtils.h162
-rw-r--r--runtime/onert/backend/cpu/kernel/PackLayer.cc98
-rw-r--r--runtime/onert/backend/cpu/kernel/PackLayer.h65
-rw-r--r--runtime/onert/backend/cpu/kernel/PadLayer.cc70
-rw-r--r--runtime/onert/backend/cpu/kernel/PadLayer.h71
-rw-r--r--runtime/onert/backend/cpu/kernel/PermuteLayer.cc71
-rw-r--r--runtime/onert/backend/cpu/kernel/PermuteLayer.h209
-rw-r--r--runtime/onert/backend/cpu/kernel/ReduceLayer.cc137
-rw-r--r--runtime/onert/backend/cpu/kernel/ReduceLayer.h84
-rw-r--r--runtime/onert/backend/cpu/kernel/ReshapeLayer.cc55
-rw-r--r--runtime/onert/backend/cpu/kernel/ReshapeLayer.h63
-rw-r--r--runtime/onert/backend/cpu/kernel/RsqrtLayer.cc70
-rw-r--r--runtime/onert/backend/cpu/kernel/RsqrtLayer.h59
-rw-r--r--runtime/onert/backend/cpu/kernel/ShapeLayer.cc81
-rw-r--r--runtime/onert/backend/cpu/kernel/ShapeLayer.h61
-rw-r--r--runtime/onert/backend/cpu/kernel/SinLayer.cc69
-rw-r--r--runtime/onert/backend/cpu/kernel/SinLayer.h60
-rw-r--r--runtime/onert/backend/cpu/kernel/SliceLayer.cc111
-rw-r--r--runtime/onert/backend/cpu/kernel/SliceLayer.h71
-rw-r--r--runtime/onert/backend/cpu/kernel/SoftMaxLayer.cc173
-rw-r--r--runtime/onert/backend/cpu/kernel/SoftMaxLayer.h65
-rw-r--r--runtime/onert/backend/cpu/kernel/SplitLayer.cc98
-rw-r--r--runtime/onert/backend/cpu/kernel/SplitLayer.h66
-rw-r--r--runtime/onert/backend/cpu/kernel/StridedSliceLayer.cc96
-rw-r--r--runtime/onert/backend/cpu/kernel/StridedSliceLayer.h78
-rw-r--r--runtime/onert/backend/cpu/kernel/SubLayer.cc92
-rw-r--r--runtime/onert/backend/cpu/kernel/SubLayer.h71
-rw-r--r--runtime/onert/backend/cpu/kernel/TanhLayer.cc71
-rw-r--r--runtime/onert/backend/cpu/kernel/TanhLayer.h63
-rw-r--r--runtime/onert/backend/cpu/kernel/TransposeLayer.cc81
-rw-r--r--runtime/onert/backend/cpu/kernel/TransposeLayer.h61
-rw-r--r--runtime/onert/backend/cpu/kernel/UnpackLayer.cc104
-rw-r--r--runtime/onert/backend/cpu/kernel/UnpackLayer.h66
-rw-r--r--runtime/onert/backend/cpu/operand/Tensor.cc45
-rw-r--r--runtime/onert/backend/cpu/operand/Tensor.h124
-rw-r--r--runtime/onert/backend/cpu_common/Allocator.cc38
-rw-r--r--runtime/onert/backend/cpu_common/Allocator.h56
-rw-r--r--runtime/onert/backend/cpu_common/CMakeLists.txt35
-rw-r--r--runtime/onert/backend/cpu_common/MemoryManager.cc91
-rw-r--r--runtime/onert/backend/cpu_common/MemoryManager.h72
-rw-r--r--runtime/onert/backend/cpu_common/MemoryPlanner.cc212
-rw-r--r--runtime/onert/backend/cpu_common/MemoryPlanner.h200
-rw-r--r--runtime/onert/backend/cpu_common/MemoryPlanner.test.cc193
-rw-r--r--runtime/onert/backend/cpu_common/MemoryPlannerFactory.cc51
-rw-r--r--runtime/onert/backend/cpu_common/MemoryPlannerFactory.h47
-rw-r--r--runtime/onert/core/CMakeLists.txt21
-rw-r--r--runtime/onert/core/include/backend/Backend.h50
-rw-r--r--runtime/onert/core/include/backend/BackendContext.h91
-rw-r--r--runtime/onert/core/include/backend/CustomKernelBuilder.h77
-rw-r--r--runtime/onert/core/include/backend/IConfig.h45
-rw-r--r--runtime/onert/core/include/backend/IConstantInitializer.h280
-rw-r--r--runtime/onert/core/include/backend/IKernelGenerator.h76
-rw-r--r--runtime/onert/core/include/backend/IMemoryManager.h49
-rw-r--r--runtime/onert/core/include/backend/IOptimizer.h51
-rw-r--r--runtime/onert/core/include/backend/IShapeFixer.h55
-rw-r--r--runtime/onert/core/include/backend/ITensor.h58
-rw-r--r--runtime/onert/core/include/backend/ITensorBuilder.h77
-rw-r--r--runtime/onert/core/include/backend/ITensorManager.h51
-rw-r--r--runtime/onert/core/include/backend/ITensorRegister.h97
-rw-r--r--runtime/onert/core/include/compiler/BackendManager.h81
-rw-r--r--runtime/onert/core/include/compiler/BackendResolver.h60
-rw-r--r--runtime/onert/core/include/compiler/CodeMap.h45
-rw-r--r--runtime/onert/core/include/compiler/Compiler.h118
-rw-r--r--runtime/onert/core/include/compiler/ExecutionBuilder.h49
-rw-r--r--runtime/onert/core/include/exec/ExecTime.h112
-rw-r--r--runtime/onert/core/include/exec/Execution.h144
-rw-r--r--runtime/onert/core/include/exec/ExecutionObservers.h83
-rw-r--r--runtime/onert/core/include/exec/FunctionSequence.h72
-rw-r--r--runtime/onert/core/include/exec/IExecutor.h72
-rw-r--r--runtime/onert/core/include/exec/IFunction.h37
-rw-r--r--runtime/onert/core/include/exec/IODescription.h66
-rw-r--r--runtime/onert/core/include/exec/JSONExecTime.h97
-rw-r--r--runtime/onert/core/include/exec/NopFunction.h54
-rw-r--r--runtime/onert/core/include/interp/InterpExecutor.h70
-rw-r--r--runtime/onert/core/include/ir/BackendSet.h40
-rw-r--r--runtime/onert/core/include/ir/Coordinates.h113
-rw-r--r--runtime/onert/core/include/ir/Data.h75
-rw-r--r--runtime/onert/core/include/ir/DataType.h62
-rw-r--r--runtime/onert/core/include/ir/Graph.h114
-rw-r--r--runtime/onert/core/include/ir/Index.h45
-rw-r--r--runtime/onert/core/include/ir/InternalType.h46
-rw-r--r--runtime/onert/core/include/ir/Layout.h67
-rw-r--r--runtime/onert/core/include/ir/LowerInfoMap.h42
-rw-r--r--runtime/onert/core/include/ir/LoweredGraph.h78
-rw-r--r--runtime/onert/core/include/ir/OpCode.h56
-rw-r--r--runtime/onert/core/include/ir/OpSequence.h106
-rw-r--r--runtime/onert/core/include/ir/OpSequences.h87
-rw-r--r--runtime/onert/core/include/ir/Operand.h114
-rw-r--r--runtime/onert/core/include/ir/OperandConstraint.h58
-rw-r--r--runtime/onert/core/include/ir/OperandIndexMap.h34
-rw-r--r--runtime/onert/core/include/ir/OperandIndexSequence.h63
-rw-r--r--runtime/onert/core/include/ir/OperandInfo.h129
-rw-r--r--runtime/onert/core/include/ir/Operands.h46
-rw-r--r--runtime/onert/core/include/ir/Operation.h71
-rw-r--r--runtime/onert/core/include/ir/OperationIndexList.h59
-rw-r--r--runtime/onert/core/include/ir/OperationIndexMap.h34
-rw-r--r--runtime/onert/core/include/ir/OperationVisitor.h52
-rw-r--r--runtime/onert/core/include/ir/Operations.Include.h87
-rw-r--r--runtime/onert/core/include/ir/Operations.h43
-rw-r--r--runtime/onert/core/include/ir/Operations.lst90
-rw-r--r--runtime/onert/core/include/ir/Padding.h73
-rw-r--r--runtime/onert/core/include/ir/Shape.h86
-rw-r--r--runtime/onert/core/include/ir/TypeInfo.h59
-rw-r--r--runtime/onert/core/include/ir/operand/LowerInfo.h69
-rw-r--r--runtime/onert/core/include/ir/operand/PermuteFactor.h130
-rw-r--r--runtime/onert/core/include/ir/operation/Abs.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Add.h62
-rw-r--r--runtime/onert/core/include/ir/operation/ArgMax.h62
-rw-r--r--runtime/onert/core/include/ir/operation/AvgPool2D.h70
-rw-r--r--runtime/onert/core/include/ir/operation/BatchToSpaceND.h50
-rw-r--r--runtime/onert/core/include/ir/operation/Cast.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Comparison.h72
-rw-r--r--runtime/onert/core/include/ir/operation/Concat.h59
-rw-r--r--runtime/onert/core/include/ir/operation/Conv2D.h69
-rw-r--r--runtime/onert/core/include/ir/operation/ConvertFp16ToFp32.h49
-rw-r--r--runtime/onert/core/include/ir/operation/ConvertFp32ToFp16.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Custom.h75
-rw-r--r--runtime/onert/core/include/ir/operation/DepthToSpace.h63
-rw-r--r--runtime/onert/core/include/ir/operation/DepthwiseConv2D.h70
-rw-r--r--runtime/onert/core/include/ir/operation/Dequantize.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Div.h62
-rw-r--r--runtime/onert/core/include/ir/operation/EmbeddingLookup.h50
-rw-r--r--runtime/onert/core/include/ir/operation/Exp.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Floor.h51
-rw-r--r--runtime/onert/core/include/ir/operation/FullyConnected.h66
-rw-r--r--runtime/onert/core/include/ir/operation/Gather.h65
-rw-r--r--runtime/onert/core/include/ir/operation/HashtableLookup.h57
-rw-r--r--runtime/onert/core/include/ir/operation/InstanceNorm.h65
-rw-r--r--runtime/onert/core/include/ir/operation/L2Normalization.h62
-rw-r--r--runtime/onert/core/include/ir/operation/L2Pool2D.h69
-rw-r--r--runtime/onert/core/include/ir/operation/LSTM.h89
-rw-r--r--runtime/onert/core/include/ir/operation/LocalResponseNormalization.h66
-rw-r--r--runtime/onert/core/include/ir/operation/LogicalAnd.h50
-rw-r--r--runtime/onert/core/include/ir/operation/LogicalNot.h49
-rw-r--r--runtime/onert/core/include/ir/operation/LogicalOr.h50
-rw-r--r--runtime/onert/core/include/ir/operation/Logistic.h49
-rw-r--r--runtime/onert/core/include/ir/operation/LowerInfo.h54
-rw-r--r--runtime/onert/core/include/ir/operation/Max.h50
-rw-r--r--runtime/onert/core/include/ir/operation/MaxPool2D.h69
-rw-r--r--runtime/onert/core/include/ir/operation/Mean.h62
-rw-r--r--runtime/onert/core/include/ir/operation/Min.h50
-rw-r--r--runtime/onert/core/include/ir/operation/Mul.h62
-rw-r--r--runtime/onert/core/include/ir/operation/Neg.h49
-rw-r--r--runtime/onert/core/include/ir/operation/OneHot.h63
-rw-r--r--runtime/onert/core/include/ir/operation/PReLU.h50
-rw-r--r--runtime/onert/core/include/ir/operation/Pack.h53
-rw-r--r--runtime/onert/core/include/ir/operation/Pad.h63
-rw-r--r--runtime/onert/core/include/ir/operation/Permute.h78
-rw-r--r--runtime/onert/core/include/ir/operation/RNN.h70
-rw-r--r--runtime/onert/core/include/ir/operation/RSQRT.h49
-rw-r--r--runtime/onert/core/include/ir/operation/ReLU.h49
-rw-r--r--runtime/onert/core/include/ir/operation/ReLU1.h49
-rw-r--r--runtime/onert/core/include/ir/operation/ReLU6.h49
-rw-r--r--runtime/onert/core/include/ir/operation/ReduceMax.h65
-rw-r--r--runtime/onert/core/include/ir/operation/ReduceMin.h65
-rw-r--r--runtime/onert/core/include/ir/operation/ReduceSum.h63
-rw-r--r--runtime/onert/core/include/ir/operation/Reshape.h52
-rw-r--r--runtime/onert/core/include/ir/operation/ResizeBilinear.h64
-rw-r--r--runtime/onert/core/include/ir/operation/SQRT.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Shape.h51
-rw-r--r--runtime/onert/core/include/ir/operation/Sin.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Slice.h64
-rw-r--r--runtime/onert/core/include/ir/operation/Softmax.h63
-rw-r--r--runtime/onert/core/include/ir/operation/SpaceToBatchND.h53
-rw-r--r--runtime/onert/core/include/ir/operation/SpaceToDepth.h63
-rw-r--r--runtime/onert/core/include/ir/operation/Split.h59
-rw-r--r--runtime/onert/core/include/ir/operation/SquaredDifference.h50
-rw-r--r--runtime/onert/core/include/ir/operation/Squeeze.h62
-rw-r--r--runtime/onert/core/include/ir/operation/StridedSlice.h69
-rw-r--r--runtime/onert/core/include/ir/operation/Sub.h62
-rw-r--r--runtime/onert/core/include/ir/operation/Tanh.h49
-rw-r--r--runtime/onert/core/include/ir/operation/TopKV2.h69
-rw-r--r--runtime/onert/core/include/ir/operation/Transpose.h64
-rw-r--r--runtime/onert/core/include/ir/operation/TransposeConv.h68
-rw-r--r--runtime/onert/core/include/ir/operation/Unpack.h59
-rw-r--r--runtime/onert/core/include/util/Config.lst44
-rw-r--r--runtime/onert/core/include/util/ConfigSource.h58
-rw-r--r--runtime/onert/core/include/util/EnvConfigSource.h41
-rw-r--r--runtime/onert/core/include/util/EventCollectorGlobal.h155
-rw-r--r--runtime/onert/core/include/util/GeneralConfigSource.h44
-rw-r--r--runtime/onert/core/include/util/IConfigSource.h46
-rw-r--r--runtime/onert/core/include/util/ITimer.h59
-rw-r--r--runtime/onert/core/include/util/Index.h154
-rw-r--r--runtime/onert/core/include/util/ObjectManager.h148
-rw-r--r--runtime/onert/core/include/util/Set.h166
-rw-r--r--runtime/onert/core/include/util/ShapeInference.h90
-rw-r--r--runtime/onert/core/include/util/Utils.h27
-rw-r--r--runtime/onert/core/include/util/feature/nchw/Reader.h118
-rw-r--r--runtime/onert/core/include/util/feature/nchw/View.h137
-rw-r--r--runtime/onert/core/include/util/feature/nhwc/Reader.h120
-rw-r--r--runtime/onert/core/include/util/feature/nhwc/View.h139
-rw-r--r--runtime/onert/core/include/util/logging.h63
-rw-r--r--runtime/onert/core/src/backend/BackendContext.cc64
-rw-r--r--runtime/onert/core/src/compiler/BackendManager.cc140
-rw-r--r--runtime/onert/core/src/compiler/BackendResolver.cc25
-rw-r--r--runtime/onert/core/src/compiler/CachedDataDeleter.h103
-rw-r--r--runtime/onert/core/src/compiler/Compiler.cc209
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.cc379
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.h62
-rw-r--r--runtime/onert/core/src/compiler/HEScheduler.cc615
-rw-r--r--runtime/onert/core/src/compiler/HEScheduler.h186
-rw-r--r--runtime/onert/core/src/compiler/IScheduler.h38
-rw-r--r--runtime/onert/core/src/compiler/Linear.cc280
-rw-r--r--runtime/onert/core/src/compiler/Linear.h54
-rw-r--r--runtime/onert/core/src/compiler/ManualScheduler.cc103
-rw-r--r--runtime/onert/core/src/compiler/ManualScheduler.h41
-rw-r--r--runtime/onert/core/src/compiler/OperandContext.cc45
-rw-r--r--runtime/onert/core/src/compiler/OperandContext.h55
-rw-r--r--runtime/onert/core/src/compiler/OperationValidator.cc1079
-rw-r--r--runtime/onert/core/src/compiler/OperationValidator.h93
-rw-r--r--runtime/onert/core/src/compiler/ParamChecker.cc33
-rw-r--r--runtime/onert/core/src/compiler/ParamChecker.h73
-rw-r--r--runtime/onert/core/src/dumper/dot/DotBuilder.cc83
-rw-r--r--runtime/onert/core/src/dumper/dot/DotBuilder.h62
-rw-r--r--runtime/onert/core/src/dumper/dot/DotDumper.cc199
-rw-r--r--runtime/onert/core/src/dumper/dot/DotDumper.h69
-rw-r--r--runtime/onert/core/src/dumper/dot/DotOpSequenceInfo.cc56
-rw-r--r--runtime/onert/core/src/dumper/dot/DotOpSequenceInfo.h59
-rw-r--r--runtime/onert/core/src/dumper/dot/Node.cc56
-rw-r--r--runtime/onert/core/src/dumper/dot/Node.h127
-rw-r--r--runtime/onert/core/src/dumper/dot/OperandNode.cc60
-rw-r--r--runtime/onert/core/src/dumper/dot/OperandNode.h79
-rw-r--r--runtime/onert/core/src/dumper/dot/OperationNode.cc46
-rw-r--r--runtime/onert/core/src/dumper/dot/OperationNode.h62
-rw-r--r--runtime/onert/core/src/exec/DataflowExecutor.cc175
-rw-r--r--runtime/onert/core/src/exec/DataflowExecutor.h96
-rw-r--r--runtime/onert/core/src/exec/ExecTime.cc137
-rw-r--r--runtime/onert/core/src/exec/Execution.cc131
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservee.cc64
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservee.h56
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservers.cc126
-rw-r--r--runtime/onert/core/src/exec/ExecutorBase.cc165
-rw-r--r--runtime/onert/core/src/exec/ExecutorBase.h133
-rw-r--r--runtime/onert/core/src/exec/FunctionSequence.cc62
-rw-r--r--runtime/onert/core/src/exec/JSONExecTime.cc231
-rw-r--r--runtime/onert/core/src/exec/Job.cc33
-rw-r--r--runtime/onert/core/src/exec/Job.h69
-rw-r--r--runtime/onert/core/src/exec/LinearExecutor.cc39
-rw-r--r--runtime/onert/core/src/exec/LinearExecutor.h70
-rw-r--r--runtime/onert/core/src/exec/ParallelExecutor.cc146
-rw-r--r--runtime/onert/core/src/exec/ParallelExecutor.h67
-rw-r--r--runtime/onert/core/src/exec/ParallelScheduler.cc55
-rw-r--r--runtime/onert/core/src/exec/ParallelScheduler.h60
-rw-r--r--runtime/onert/core/src/exec/Sink.h199
-rw-r--r--runtime/onert/core/src/exec/Source.h208
-rw-r--r--runtime/onert/core/src/exec/ThreadPool.cc65
-rw-r--r--runtime/onert/core/src/exec/ThreadPool.h73
-rw-r--r--runtime/onert/core/src/exec/WorkQueue.cc104
-rw-r--r--runtime/onert/core/src/exec/WorkQueue.h87
-rw-r--r--runtime/onert/core/src/interp/Buffer.h91
-rw-r--r--runtime/onert/core/src/interp/ExecEnv.h162
-rw-r--r--runtime/onert/core/src/interp/InterpExecutor.cc114
-rw-r--r--runtime/onert/core/src/interp/InterpOps.lst89
-rw-r--r--runtime/onert/core/src/interp/Interpreter.cc184
-rw-r--r--runtime/onert/core/src/interp/Interpreter.h64
-rw-r--r--runtime/onert/core/src/interp/Registration.h43
-rw-r--r--runtime/onert/core/src/interp/Tensor.cc53
-rw-r--r--runtime/onert/core/src/interp/Tensor.h177
-rw-r--r--runtime/onert/core/src/interp/operations/AvgPool2D.cc125
-rw-r--r--runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc193
-rw-r--r--runtime/onert/core/src/interp/operations/Concat.cc147
-rw-r--r--runtime/onert/core/src/interp/operations/Conv2D.cc150
-rw-r--r--runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc155
-rw-r--r--runtime/onert/core/src/interp/operations/FullyConnected.cc135
-rw-r--r--runtime/onert/core/src/interp/operations/Gather.cc138
-rw-r--r--runtime/onert/core/src/interp/operations/InstanceNorm.cc121
-rw-r--r--runtime/onert/core/src/interp/operations/Logistic.cc99
-rw-r--r--runtime/onert/core/src/interp/operations/MaxPool2D.cc124
-rw-r--r--runtime/onert/core/src/interp/operations/OperationUtil.h210
-rw-r--r--runtime/onert/core/src/interp/operations/Pad.cc106
-rw-r--r--runtime/onert/core/src/interp/operations/Reshape.cc63
-rw-r--r--runtime/onert/core/src/interp/operations/Softmax.cc160
-rw-r--r--runtime/onert/core/src/interp/operations/TransposeConv.cc141
-rw-r--r--runtime/onert/core/src/interp/operations/UnaryActivations.cc153
-rw-r--r--runtime/onert/core/src/ir/Coordinates.cc50
-rw-r--r--runtime/onert/core/src/ir/Graph.cc103
-rw-r--r--runtime/onert/core/src/ir/GraphIterator.cc84
-rw-r--r--runtime/onert/core/src/ir/GraphIterator.h74
-rw-r--r--runtime/onert/core/src/ir/LayoutSet.cc66
-rw-r--r--runtime/onert/core/src/ir/LayoutSet.h58
-rw-r--r--runtime/onert/core/src/ir/LoweredGraph.cc496
-rw-r--r--runtime/onert/core/src/ir/OpCode.cc37
-rw-r--r--runtime/onert/core/src/ir/OpSequence.cc93
-rw-r--r--runtime/onert/core/src/ir/OpSequences.cc88
-rw-r--r--runtime/onert/core/src/ir/Operand.cc61
-rw-r--r--runtime/onert/core/src/ir/OperandIndexSequence.cc65
-rw-r--r--runtime/onert/core/src/ir/Operands.cc36
-rw-r--r--runtime/onert/core/src/ir/Operation.cc55
-rw-r--r--runtime/onert/core/src/ir/OperationCloner.cc42
-rw-r--r--runtime/onert/core/src/ir/OperationCloner.h46
-rw-r--r--runtime/onert/core/src/ir/OperationDumper.cc634
-rw-r--r--runtime/onert/core/src/ir/OperationDumper.h99
-rw-r--r--runtime/onert/core/src/ir/OperationIndexList.cc37
-rw-r--r--runtime/onert/core/src/ir/Operations.cc37
-rw-r--r--runtime/onert/core/src/ir/Padding.cc154
-rw-r--r--runtime/onert/core/src/ir/Shape.cc104
-rw-r--r--runtime/onert/core/src/ir/TypeInfo.cc47
-rw-r--r--runtime/onert/core/src/ir/operation/Abs.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Add.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/ArgMax.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/AvgPool2D.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/BatchToSpaceND.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Cast.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Comparison.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Concat.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Conv2D.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Custom.cc44
-rw-r--r--runtime/onert/core/src/ir/operation/DepthToSpace.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Dequantize.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Div.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/EmbeddingLookup.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Exp.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Floor.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/FullyConnected.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Gather.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/HashtableLookup.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/InstanceNorm.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/L2Normalization.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/L2Pool2D.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/LSTM.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc41
-rw-r--r--runtime/onert/core/src/ir/operation/LogicalAnd.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/LogicalNot.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/LogicalOr.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Logistic.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/LowerInfo.cc34
-rw-r--r--runtime/onert/core/src/ir/operation/Max.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/MaxPool2D.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Mean.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Min.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Mul.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Neg.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/OneHot.cc37
-rw-r--r--runtime/onert/core/src/ir/operation/PReLU.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Pack.cc33
-rw-r--r--runtime/onert/core/src/ir/operation/Pad.cc38
-rw-r--r--runtime/onert/core/src/ir/operation/Permute.cc44
-rw-r--r--runtime/onert/core/src/ir/operation/RNN.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/RSQRT.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/ReLU.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/ReLU1.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/ReLU6.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/ReduceMax.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/ReduceMin.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/ReduceSum.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Reshape.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/ResizeBilinear.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/SQRT.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Shape.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Sin.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Slice.cc37
-rw-r--r--runtime/onert/core/src/ir/operation/Softmax.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/SpaceToBatchND.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/SpaceToDepth.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Split.cc33
-rw-r--r--runtime/onert/core/src/ir/operation/SquaredDifference.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Squeeze.cc37
-rw-r--r--runtime/onert/core/src/ir/operation/StridedSlice.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Sub.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Tanh.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/TopKV2.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Transpose.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/TransposeConv.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Unpack.cc33
-rw-r--r--runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc102
-rw-r--r--runtime/onert/core/src/ir/pass/ConstantInsertionPass.h75
-rw-r--r--runtime/onert/core/src/ir/pass/LoweredOperandPass.h52
-rw-r--r--runtime/onert/core/src/ir/pass/LoweredOperationPass.h52
-rw-r--r--runtime/onert/core/src/ir/pass/OperandPass.cc36
-rw-r--r--runtime/onert/core/src/ir/pass/OperandPass.h54
-rw-r--r--runtime/onert/core/src/ir/pass/OperationPass.cc38
-rw-r--r--runtime/onert/core/src/ir/pass/OperationPass.h77
-rw-r--r--runtime/onert/core/src/ir/pass/Pass.h55
-rw-r--r--runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc195
-rw-r--r--runtime/onert/core/src/ir/pass/PermutationEliminationPass.h86
-rw-r--r--runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc207
-rw-r--r--runtime/onert/core/src/ir/pass/PermutationInsertionPass.h59
-rw-r--r--runtime/onert/core/src/ir/pass/PermutationOperationPass.cc231
-rw-r--r--runtime/onert/core/src/ir/pass/PermutationOperationPass.h54
-rw-r--r--runtime/onert/core/src/ir/verifier/Verifier.cc96
-rw-r--r--runtime/onert/core/src/ir/verifier/Verifier.h68
-rw-r--r--runtime/onert/core/src/library_info.cc17
-rw-r--r--runtime/onert/core/src/util/ConfigSource.cc122
-rw-r--r--runtime/onert/core/src/util/EnvConfigSource.cc40
-rw-r--r--runtime/onert/core/src/util/EventCollectorGlobal.cc85
-rw-r--r--runtime/onert/core/src/util/GeneralConfigSource.cc45
-rw-r--r--runtime/onert/core/src/util/ShapeInference.cc241
-rw-r--r--runtime/onert/core/src/util/logging.cc23
-rw-r--r--runtime/onert/frontend/CMakeLists.txt1
-rw-r--r--runtime/onert/frontend/base_loader/CMakeLists.txt7
-rw-r--r--runtime/onert/frontend/base_loader/include/base_loader.h1362
-rw-r--r--runtime/onert/frontend/circle/CMakeLists.txt17
-rw-r--r--runtime/onert/frontend/circle/include/circle_loader.h32
-rw-r--r--runtime/onert/frontend/circle/src/circle_loader.cc134
-rw-r--r--runtime/onert/frontend/circle/src/circle_schema_generated.h9952
-rw-r--r--runtime/onert/frontend/nnapi/ANeuralNetworksModel.test.cc (renamed from runtime/neurun/frontend/nnapi/ANeuralNetworksModel.test.cc)0
-rw-r--r--runtime/onert/frontend/nnapi/CMakeLists.txt27
-rw-r--r--runtime/onert/frontend/nnapi/compilation.cc110
-rw-r--r--runtime/onert/frontend/nnapi/event.cc (renamed from runtime/neurun/frontend/nnapi/event.cc)0
-rw-r--r--runtime/onert/frontend/nnapi/execution.cc480
-rw-r--r--runtime/onert/frontend/nnapi/memory.cc42
-rw-r--r--runtime/onert/frontend/nnapi/model.cc411
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc42
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h42
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc42
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.h44
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc288
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h74
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksMemory.cc (renamed from runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksMemory.cc)0
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksMemory.h (renamed from runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksMemory.h)0
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc267
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h71
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc100
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.h79
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc1899
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/OperationFactory.h60
-rw-r--r--runtime/onert/frontend/tflite/CMakeLists.txt17
-rw-r--r--runtime/onert/frontend/tflite/include/tflite_loader.h34
-rw-r--r--runtime/onert/frontend/tflite/src/tflite_loader.cc110
-rw-r--r--runtime/onert/frontend/tflite/src/tflite_schema_generated.h9553
-rw-r--r--runtime/onert/frontend/tflite/tflite_schema-1.13.1.fbs795
-rw-r--r--runtime/onert/frontend/tflite/tflite_schema.fbs1095
-rw-r--r--runtime/onert/sample/CMakeLists.txt (renamed from runtime/neurun/sample/CMakeLists.txt)0
-rw-r--r--runtime/onert/sample/minimal/CMakeLists.txt (renamed from runtime/neurun/sample/minimal/CMakeLists.txt)0
-rw-r--r--runtime/onert/sample/minimal/README.md (renamed from runtime/neurun/sample/minimal/README.md)0
-rw-r--r--runtime/onert/sample/minimal/src/minimal.cc69
-rw-r--r--runtime/onert/test/CMakeLists.txt15
-rw-r--r--runtime/onert/test/core/compiler/Scheduler.cc569
-rw-r--r--runtime/onert/test/core/exec/ExecInstance.cc306
-rw-r--r--runtime/onert/test/core/exec/ExecTime.test.cc99
-rw-r--r--runtime/onert/test/core/interp/ExecManager.cc333
-rw-r--r--runtime/onert/test/graph/Graph.cc52
-rw-r--r--runtime/onert/test/graph/Index.cc34
-rw-r--r--runtime/onert/test/graph/MockNode.h47
-rw-r--r--runtime/onert/test/graph/operand/IndexSet.cc52
-rw-r--r--runtime/onert/test/graph/operand/LayoutSet.cc43
-rw-r--r--runtime/onert/test/graph/operand/Set.cc45
-rw-r--r--runtime/onert/test/graph/operand/UseDef.cc85
-rw-r--r--runtime/onert/test/graph/operation/Set.cc33
-rw-r--r--runtime/onert/test/graph/operation/SetIO.cc99
-rw-r--r--runtime/onert/test/graph/verifier/Verifier.cc49
-rw-r--r--runtime/onert/test/util/ObjectManager.cc97
-rw-r--r--runtime/onert/test/util/ShapeInference.cc230
-rw-r--r--tests/CMakeLists.txt8
-rw-r--r--tests/custom_op/CMakeLists.txt64
-rw-r--r--tests/custom_op/FillFrom/CMakeLists.txt7
-rw-r--r--tests/custom_op/FillFrom/FillFrom_runner.cc2
-rwxr-xr-xtests/framework/run_test.sh277
-rw-r--r--tests/framework/tests/custom/abs/config.sh1
-rw-r--r--tests/framework/tests/custom/tensorflowmax/config.sh1
-rw-r--r--tests/framework/tests/custom/tensorflowsum/config.sh1
-rw-r--r--tests/nnapi/CMakeLists.txt7
-rw-r--r--tests/nnapi/include/NeuralNetworksWrapper.h10
-rw-r--r--tests/nnapi/include/TestHarness.h72
-rw-r--r--tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl36
-rw-r--r--tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon51
-rw-r--r--tests/nnapi/nnapi_gtest.skip.aarch64-linux.cpu84
-rw-r--r--tests/nnapi/nnapi_gtest.skip.aarch64-linux.srcn79
-rw-r--r--tests/nnapi/nnapi_gtest.skip.armv7l-linux31
-rw-r--r--tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl36
-rw-r--r--tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon58
-rw-r--r--tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu84
-rw-r--r--tests/nnapi/nnapi_gtest.skip.armv7l-linux.ncnn77
-rw-r--r--tests/nnapi/nnapi_gtest.skip.armv7l-linux.srcn79
-rw-r--r--tests/nnapi/nnapi_gtest.skip.armv7l-tizen35
-rw-r--r--tests/nnapi/nnapi_gtest.skip.armv7l-tizen.acl_cl41
-rw-r--r--tests/nnapi/nnapi_gtest.skip.noarch.interp72
-rw-r--r--tests/nnapi/nnapi_gtest.skip.x86_64-linux68
-rw-r--r--tests/nnapi/nnapi_gtest.skip.x86_64-linux.cpu74
-rw-r--r--tests/nnapi/nnapi_test_generator/android-10/README.md6
-rwxr-xr-xtests/nnapi/nnapi_test_generator/android-10/cts_generator.py36
-rwxr-xr-xtests/nnapi/nnapi_test_generator/android-10/test_generator.py15
-rw-r--r--tests/nnapi/specs/Ex/argmax_ex_float_1.mod.py18
-rw-r--r--tests/nnapi/specs/Ex/argmax_ex_float_2.mod.py18
-rw-r--r--tests/nnapi/specs/Ex/argmax_ex_int32.mod.py18
-rw-r--r--tests/nnapi/specs/Ex/argmax_ex_neg_axis_float.mod.py17
-rw-r--r--tests/nnapi/specs/Ex/argmax_ex_neg_axis_int32.mod.py17
-rw-r--r--tests/nnapi/specs/Ex/argmax_ex_quant8.mod.py18
-rw-r--r--tests/nnapi/specs/Ex/argmax_ex_quant8_neg_axis.mod.py17
-rw-r--r--tests/nnapi/specs/Ex/equal_ex_1D_float.mod.py18
-rw-r--r--tests/nnapi/specs/Ex/equal_ex_4D_float.mod.py18
-rw-r--r--tests/nnapi/specs/Ex/equal_ex_broadcast_4D_2D_float.mod.py30
-rw-r--r--tests/nnapi/specs/Ex/equal_ex_quant8.mod.py18
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/Ex/fully_connected_float_2_weights_as_inputs.mod.py0
-rw-r--r--tests/nnapi/specs/Ex/greater_equal_ex.mod.py35
-rw-r--r--tests/nnapi/specs/Ex/less_ex.mod.py35
-rw-r--r--tests/nnapi/specs/Ex/logical_and_ex_1D.mod.py19
-rw-r--r--tests/nnapi/specs/Ex/logical_and_ex_2D.mod.py19
-rw-r--r--tests/nnapi/specs/Ex/logical_and_ex_3D.mod.py19
-rw-r--r--tests/nnapi/specs/Ex/logical_and_ex_4D.mod.py19
-rw-r--r--tests/nnapi/specs/Ex/logical_and_ex_broadcast.mod.py19
-rw-r--r--tests/nnapi/specs/Ex/logical_and_ex_broadcast_4D_2D.mod.py25
-rw-r--r--tests/nnapi/specs/Ex/logical_not_ex_1D.mod.py16
-rw-r--r--tests/nnapi/specs/Ex/logical_not_ex_4D.mod.py16
-rw-r--r--tests/nnapi/specs/Ex/logical_or_ex_1D.mod.py19
-rw-r--r--tests/nnapi/specs/Ex/logical_or_ex_2D.mod.py19
-rw-r--r--tests/nnapi/specs/Ex/logical_or_ex_3D.mod.py19
-rw-r--r--tests/nnapi/specs/Ex/logical_or_ex_4D.mod.py19
-rw-r--r--tests/nnapi/specs/Ex/logical_or_ex_broadcast.mod.py19
-rw-r--r--tests/nnapi/specs/Ex/logical_or_ex_broadcast_4D_2D.mod.py25
-rw-r--r--tests/nnapi/specs/Ex/notequal_ex_broadcast_4D_2D_float.mod.py30
-rw-r--r--tests/nnapi/specs/Ex/notequal_ex_broadcast_float.mod.py19
-rw-r--r--tests/nnapi/specs/Ex/notequal_ex_float.mod.py18
-rw-r--r--tests/nnapi/specs/Ex/notequal_ex_quant8.mod.py18
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/Ex/pack_ex_2D_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/Ex/pack_ex_2D_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/Ex/pack_ex_2D_int_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/Ex/pack_ex_2D_int_2.mod.py0
-rw-r--r--tests/nnapi/specs/Ex/prelu_ex_broadcast_float_1.mod.py23
-rw-r--r--tests/nnapi/specs/Ex/prelu_ex_broadcast_quant8_1.mod.py24
-rw-r--r--tests/nnapi/specs/Ex/prelu_ex_float_1.mod.py22
-rw-r--r--tests/nnapi/specs/Ex/prelu_ex_quant8_1.mod.py23
-rw-r--r--tests/nnapi/specs/Ex/reduce_min_ex_float.mod.py19
-rw-r--r--tests/nnapi/specs/Ex/reduce_min_ex_float_1.mod.py18
-rw-r--r--tests/nnapi/specs/Ex/reduce_min_ex_float_2.mod.py18
-rw-r--r--tests/nnapi/specs/Ex/reduce_sum_ex_2D_float.mod.py19
-rw-r--r--tests/nnapi/specs/Ex/reduce_sum_ex_4D_float.mod.py19
-rw-r--r--tests/nnapi/specs/Ex/reduce_sum_ex_4D_float_reducing_C.mod.py33
-rw-r--r--tests/nnapi/specs/Ex/reduce_sum_ex_4D_float_reducing_HW.mod.py33
-rw-r--r--tests/nnapi/specs/Ex/split_ex_1D_float.mod.py40
-rw-r--r--tests/nnapi/specs/Ex/split_ex_1D_int32.mod.py40
-rw-r--r--tests/nnapi/specs/Ex/split_ex_4D_float_1.mod.py21
-rw-r--r--tests/nnapi/specs/Ex/split_ex_4D_float_2.mod.py21
-rw-r--r--tests/nnapi/specs/Ex/split_ex_4D_float_3.mod.py21
-rw-r--r--tests/nnapi/specs/Ex/split_ex_4D_int32_1.mod.py21
-rw-r--r--tests/nnapi/specs/Ex/split_ex_4D_int32_2.mod.py21
-rw-r--r--tests/nnapi/specs/Ex/split_ex_4D_int32_3.mod.py21
-rw-r--r--tests/nnapi/specs/Ex/split_ex_4D_int32_4.mod.py21
-rw-r--r--tests/nnapi/specs/Ex/split_ex_4D_int32_5.mod.py21
-rw-r--r--tests/nnapi/specs/Ex/split_ex_4D_quant8.mod.py21
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/Ex/squared_difference_ex_1D_float.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/Ex/squared_difference_ex_2D_float.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/Ex/squared_difference_ex_3D_float.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/Ex/squared_difference_ex_4D_float.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/Ex/squared_difference_ex_broadcast_4D_2D_float.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/Ex/squared_difference_ex_broadcast_float.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/Ex/transpose_conv_ex_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/Ex/transpose_conv_ex_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/Ex/transpose_conv_ex_float_3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/Ex/transpose_conv_ex_float_4.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/Ex/unpack_ex_3D_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/Ex/unpack_ex_3D_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/Ex/unpack_ex_3D_int_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/Ex/unpack_ex_3D_int_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/add.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/add_broadcast_4D_2D_after_nops_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/add_broadcast_quant8.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/add_quant8.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/avg_pool_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/avg_pool_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/avg_pool_float_3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/avg_pool_float_4.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/avg_pool_float_5.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/avg_pool_quant8_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/avg_pool_quant8_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/avg_pool_quant8_3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/avg_pool_quant8_4.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/avg_pool_quant8_5.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/concat_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/concat_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/concat_float_3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/concat_float_4D_axis3_1_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/concat_quant8_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/concat_quant8_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/concat_quant8_3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_1_h3_w2_SAME.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_1_h3_w2_VALID.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_3_h3_w2_SAME.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_3_h3_w2_VALID.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_float.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_float_channels.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_float_channels_weights_as_inputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_float_large.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_float_large_weights_as_inputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_float_weights_as_inputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_quant8.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_quant8_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_quant8_channels.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_quant8_channels_weights_as_inputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_quant8_large.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_quant8_large_weights_as_inputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_quant8_overflow.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_quant8_overflow_weights_as_inputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/conv_quant8_weights_as_inputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/depth_to_space_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/depth_to_space_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/depth_to_space_float_3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/depth_to_space_quant8_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/depth_to_space_quant8_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/depthwise_conv.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/depthwise_conv2d_float.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/depthwise_conv2d_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/depthwise_conv2d_float_large.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/depthwise_conv2d_float_large_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/depthwise_conv2d_float_large_2_weights_as_inputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/depthwise_conv2d_float_large_weights_as_inputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/depthwise_conv2d_float_weights_as_inputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/depthwise_conv2d_quant8.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_large.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_large_weights_as_inputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_weights_as_inputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/dequantize.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/embedding_lookup.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/embedding_lookup_2d_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/embedding_lookup_4d_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/floor_.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/fully_connected_float.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/fully_connected_float_1_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/fully_connected_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/fully_connected_float_3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/fully_connected_float_large.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/fully_connected_float_large_weights_as_inputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/fully_connected_float_weights_as_inputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/fully_connected_hybrid_1_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/fully_connected_hybrid_2_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/fully_connected_quant8.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/fully_connected_quant8_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/fully_connected_quant8_large.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/fully_connected_quant8_large_weights_as_inputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/fully_connected_quant8_weights_as_inputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/hashtable_lookup_float.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/hashtable_lookup_float_4D_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/hashtable_lookup_quant8.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/l2_normalization.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/l2_normalization_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/l2_normalization_large.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/l2_pool_float.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/l2_pool_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/l2_pool_float_large.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/local_response_norm_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/local_response_norm_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/local_response_norm_float_3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/local_response_norm_float_4.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/logistic_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/logistic_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/logistic_quant8_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/logistic_quant8_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/lsh_projection.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/lsh_projection_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/lsh_projection_weights_as_inputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/lstm.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/lstm2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/lstm2_state.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/lstm2_state2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/lstm3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/lstm3_state.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/lstm3_state2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/lstm3_state3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/lstm_state.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/lstm_state2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/max_pool_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/max_pool_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/max_pool_float_3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/max_pool_float_4.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/max_pool_quant8_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/max_pool_quant8_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/max_pool_quant8_3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/max_pool_quant8_4.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/mul.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/mul_4D_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/mul_broadcast_3D_1D_1_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/mul_broadcast_3D_1D_2_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/mul_broadcast_quant8.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/mul_float_square_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/mul_quant8.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/mul_relu.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/relu1_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/relu1_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/relu1_quant8_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/relu1_quant8_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/relu6_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/relu6_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/relu6_quant8_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/relu6_quant8_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/relu_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/relu_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/relu_quant8_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/relu_quant8_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/reshape.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/reshape_quant8.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/reshape_quant8_weights_as_inputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/reshape_weights_as_inputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/resize_bilinear.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/resize_bilinear_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/rnn.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/rnn_state.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/softmax_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/softmax_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/softmax_quant8_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/softmax_quant8_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/space_to_depth_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/space_to_depth_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/space_to_depth_float_3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/space_to_depth_quant8_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/space_to_depth_quant8_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/svdf.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/svdf2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/svdf_bias_present.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/svdf_state.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_0/tanh_.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/batch_to_space.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/batch_to_space_float_1.mod.py2
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/batch_to_space_quant8_1.mod.py2
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/div_.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/div_broadcast_float.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/div_broadcast_float_4D_2D_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/fully_connected_float_4d_simple.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/mean.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/mean_4D_float_reducing_C_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/mean_4D_float_reducing_HW_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/mean_axis01_1_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/mean_axis01_2_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/mean_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/mean_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/mean_quant8_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/mean_quant8_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/pad.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/pad_2D_HW_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/pad_3D_HWC_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/pad_BHWC_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/pad_BHW_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/pad_HWD_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/pad_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/pad_quant8_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/space_to_batch.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/space_to_batch_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/space_to_batch_float_1_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/space_to_batch_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/space_to_batch_float_3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/space_to_batch_quant8_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/space_to_batch_quant8_1_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/space_to_batch_quant8_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/space_to_batch_quant8_2_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/space_to_batch_quant8_3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/squeeze.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/squeeze_2D_float_1_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/squeeze_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/squeeze_quant8_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_float_10.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_float_11.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_float_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_float_3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_float_4.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_float_5.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_float_6.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_float_7.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_float_8.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_float_9.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_qaunt8_10.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_qaunt8_11.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_quant8_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_quant8_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_quant8_3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_quant8_4.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_quant8_5.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_quant8_6.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_quant8_7.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_quant8_8.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/strided_slice_quant8_9.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/sub.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/sub_broadcast_4D_2D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/sub_broadcast_float.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/transpose.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/transpose_2D_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/transpose_3D_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/transpose_float_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_1/transpose_quant8_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/abs_.mod.py0
-rwxr-xr-xtests/nnapi/specs/V1_2/abs_1D_float_nnfw.mod.py20
-rwxr-xr-xtests/nnapi/specs/V1_2/abs_2D_float_nnfw.mod.py20
-rwxr-xr-xtests/nnapi/specs/V1_2/abs_3D_float_nnfw.mod.py20
-rwxr-xr-xtests/nnapi/specs/V1_2/abs_4D_float_nnfw.mod.py18
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/argmax_1.mod.py (renamed from tests/nnapi/specs/skip/V1_2/argmax_1.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/argmax_2.mod.py (renamed from tests/nnapi/specs/skip/V1_2/argmax_2.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/argmax_3.mod.py (renamed from tests/nnapi/specs/skip/V1_2/argmax_3.mod.py)0
-rwxr-xr-xtests/nnapi/specs/V1_2/argmax_float_1_nnfw.mod.py18
-rwxr-xr-xtests/nnapi/specs/V1_2/argmax_float_2_nnfw.mod.py18
-rwxr-xr-xtests/nnapi/specs/V1_2/argmax_int32_nnfw.mod.py18
-rwxr-xr-xtests/nnapi/specs/V1_2/argmax_neg_axis_float_nnfw.mod.py17
-rwxr-xr-xtests/nnapi/specs/V1_2/argmax_neg_axis_int32_nnfw.mod.py17
-rwxr-xr-xtests/nnapi/specs/V1_2/argmax_quant8_neg_axis_nnfw.mod.py17
-rwxr-xr-xtests/nnapi/specs/V1_2/argmax_quant8_nnfw.mod.py18
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/cast.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/cast_float32_to_int32_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/cast_int32_to_float32_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/equal.mod.py (renamed from tests/nnapi/specs/skip/V1_2/equal.mod.py)0
-rwxr-xr-xtests/nnapi/specs/V1_2/equal_1D_float_nnfw.mod.py18
-rwxr-xr-xtests/nnapi/specs/V1_2/equal_4D_float_nnfw.mod.py18
-rwxr-xr-xtests/nnapi/specs/V1_2/equal_broadcast_4D_2D_float_nnfw.mod.py30
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/equal_broadcast_float_nnfw.mod.py (renamed from tests/nnapi/specs/Ex/equal_ex_broadcast_float.mod.py)0
-rwxr-xr-xtests/nnapi/specs/V1_2/equal_quant8_nnfw.mod.py18
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/exp_.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/exp_1D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/exp_2D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/exp_3D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/exp_4D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/gather.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/gather_1D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/gather_1D_int32_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/gather_1D_quant8_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/gather_2D_2D_float_1_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/gather_2D_2D_float_2_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/gather_2D_3D_float_1_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/gather_2D_3D_float_2_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/gather_2D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/gather_2D_int32_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/gather_2D_quant8_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/gather_3D_2D_float_1_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/gather_3D_2D_float_2_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/gather_3D_2D_float_3_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/gather_4D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/gather_higher_rank.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/greater_equal.mod.py (renamed from tests/nnapi/specs/skip/V1_2/greater_equal.mod.py)0
-rwxr-xr-xtests/nnapi/specs/V1_2/greater_equal_nnfw.mod.py35
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/less.mod.py (renamed from tests/nnapi/specs/skip/V1_2/less.mod.py)0
-rwxr-xr-xtests/nnapi/specs/V1_2/less_nnfw.mod.py35
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/logical_and.mod.py (renamed from tests/nnapi/specs/skip/V1_2/logical_and.mod.py)0
-rwxr-xr-xtests/nnapi/specs/V1_2/logical_and_1D_nnfw.mod.py19
-rwxr-xr-xtests/nnapi/specs/V1_2/logical_and_2D_nnfw.mod.py19
-rwxr-xr-xtests/nnapi/specs/V1_2/logical_and_3D_nnfw.mod.py19
-rwxr-xr-xtests/nnapi/specs/V1_2/logical_and_4D_nnfw.mod.py19
-rwxr-xr-xtests/nnapi/specs/V1_2/logical_and_broadcast_4D_2D_nnfw.mod.py25
-rwxr-xr-xtests/nnapi/specs/V1_2/logical_and_broadcast_nnfw.mod.py19
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/logical_not.mod.py (renamed from tests/nnapi/specs/skip/V1_2/logical_not.mod.py)0
-rwxr-xr-xtests/nnapi/specs/V1_2/logical_not_1D_nnfw.mod.py16
-rwxr-xr-xtests/nnapi/specs/V1_2/logical_not_4D_nnfw.mod.py16
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/logical_or.mod.py (renamed from tests/nnapi/specs/skip/V1_2/logical_or.mod.py)0
-rwxr-xr-xtests/nnapi/specs/V1_2/logical_or_1D_nnfw.mod.py19
-rwxr-xr-xtests/nnapi/specs/V1_2/logical_or_2D_nnfw.mod.py19
-rwxr-xr-xtests/nnapi/specs/V1_2/logical_or_3D_nnfw.mod.py19
-rwxr-xr-xtests/nnapi/specs/V1_2/logical_or_4D_nnfw.mod.py19
-rwxr-xr-xtests/nnapi/specs/V1_2/logical_or_broadcast_4D_2D_nnfw.mod.py25
-rwxr-xr-xtests/nnapi/specs/V1_2/logical_or_broadcast_nnfw.mod.py19
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/maximum.mod.py (renamed from tests/nnapi/specs/skip/V1_2/maximum.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/minimum.mod.py (renamed from tests/nnapi/specs/skip/V1_2/minimum.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/neg.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/neg_1D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/neg_2D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/neg_3D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/neg_3D_int_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/neg_4D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/neg_4D_int_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/not_equal.mod.py (renamed from tests/nnapi/specs/skip/V1_2/not_equal.mod.py)0
-rwxr-xr-xtests/nnapi/specs/V1_2/not_equal_broadcast_4D_2D_float_nnfw.mod.py30
-rwxr-xr-xtests/nnapi/specs/V1_2/not_equal_broadcast_float_nnfw.mod.py19
-rwxr-xr-xtests/nnapi/specs/V1_2/not_equal_float_nnfw.mod.py18
-rwxr-xr-xtests/nnapi/specs/V1_2/not_equal_quant8_nnfw.mod.py18
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/prelu.mod.py (renamed from tests/nnapi/specs/skip/V1_2/prelu.mod.py)0
-rwxr-xr-xtests/nnapi/specs/V1_2/prelu_broadcast_float_1_nnfw.mod.py23
-rwxr-xr-xtests/nnapi/specs/V1_2/prelu_broadcast_quant8_1_nnfw.mod.py24
-rwxr-xr-xtests/nnapi/specs/V1_2/prelu_float_1_nnfw.mod.py22
-rwxr-xr-xtests/nnapi/specs/V1_2/prelu_quant8_1_nnfw.mod.py23
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/reduce_max.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/reduce_max_2D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/reduce_max_2D_int32_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/reduce_max_4D_float_reducing_C_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/reduce_max_4D_float_reducing_HW_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/reduce_max_float_1_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/reduce_max_float_2_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/reduce_max_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/reduce_max_quant8_1_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/reduce_max_quant8_2_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/reduce_min.mod.py (renamed from tests/nnapi/specs/skip/V1_2/reduce_min.mod.py)0
-rwxr-xr-xtests/nnapi/specs/V1_2/reduce_min_float_1_nnfw.mod.py18
-rwxr-xr-xtests/nnapi/specs/V1_2/reduce_min_float_2_nnfw.mod.py18
-rwxr-xr-xtests/nnapi/specs/V1_2/reduce_min_float_nnfw.mod.py19
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/reduce_sum.mod.py (renamed from tests/nnapi/specs/skip/V1_2/reduce_sum.mod.py)0
-rwxr-xr-xtests/nnapi/specs/V1_2/reduce_sum_2D_float_nnfw.mod.py19
-rwxr-xr-xtests/nnapi/specs/V1_2/reduce_sum_4D_float_nnfw.mod.py19
-rwxr-xr-xtests/nnapi/specs/V1_2/reduce_sum_4D_float_reducing_C_nnfw.mod.py33
-rwxr-xr-xtests/nnapi/specs/V1_2/reduce_sum_4D_float_reducing_HW_nnfw.mod.py33
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/rsqrt.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/rsqrt_1D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/rsqrt_2D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/rsqrt_3D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/rsqrt_4D_float_nnfw.mod.py0
-rwxr-xr-xtests/nnapi/specs/V1_2/sin_1D_float_nnfw.mod.py13
-rwxr-xr-xtests/nnapi/specs/V1_2/sin_4D_float_nnfw.mod.py18
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/slice.mod.py (renamed from tests/nnapi/specs/skip/V1_2/slice.mod.py)0
-rwxr-xr-xtests/nnapi/specs/V1_2/split_1D_float_nnfw.mod.py40
-rwxr-xr-xtests/nnapi/specs/V1_2/split_1D_int32_nnfw.mod.py40
-rwxr-xr-xtests/nnapi/specs/V1_2/split_4D_float_1_nnfw.mod.py21
-rwxr-xr-xtests/nnapi/specs/V1_2/split_4D_float_2_nnfw.mod.py21
-rwxr-xr-xtests/nnapi/specs/V1_2/split_4D_float_3_nnfw.mod.py21
-rwxr-xr-xtests/nnapi/specs/V1_2/split_4D_int32_1_nnfw.mod.py21
-rwxr-xr-xtests/nnapi/specs/V1_2/split_4D_int32_2_nnfw.mod.py21
-rwxr-xr-xtests/nnapi/specs/V1_2/split_4D_int32_3_nnfw.mod.py21
-rwxr-xr-xtests/nnapi/specs/V1_2/split_4D_int32_4_nnfw.mod.py21
-rwxr-xr-xtests/nnapi/specs/V1_2/split_4D_int32_5_nnfw.mod.py21
-rwxr-xr-xtests/nnapi/specs/V1_2/split_4D_quant8_nnfw.mod.py21
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/split_float_1.mod.py (renamed from tests/nnapi/specs/skip/V1_2/split_float_1.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/split_float_2.mod.py (renamed from tests/nnapi/specs/skip/V1_2/split_float_2.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/split_float_3.mod.py (renamed from tests/nnapi/specs/skip/V1_2/split_float_3.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/split_float_4.mod.py (renamed from tests/nnapi/specs/skip/V1_2/split_float_4.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/split_float_5.mod.py (renamed from tests/nnapi/specs/skip/V1_2/split_float_5.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/split_int32_1.mod.py (renamed from tests/nnapi/specs/skip/V1_2/split_int32_1.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/split_int32_2.mod.py (renamed from tests/nnapi/specs/skip/V1_2/split_int32_2.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/split_int32_3.mod.py (renamed from tests/nnapi/specs/skip/V1_2/split_int32_3.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/split_int32_4.mod.py (renamed from tests/nnapi/specs/skip/V1_2/split_int32_4.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/split_quant8_1.mod.py (renamed from tests/nnapi/specs/skip/V1_2/split_quant8_1.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/split_quant8_2.mod.py (renamed from tests/nnapi/specs/skip/V1_2/split_quant8_2.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/split_quant8_3.mod.py (renamed from tests/nnapi/specs/skip/V1_2/split_quant8_3.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/split_quant8_4.mod.py (renamed from tests/nnapi/specs/skip/V1_2/split_quant8_4.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/sqrt_.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/sqrt_1D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/sqrt_2D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/sqrt_3D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/sqrt_4D_float_nnfw.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/sub_v1_2.mod.py (renamed from tests/nnapi/specs/skip/V1_2/sub_v1_2.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/sub_v1_2_broadcast.mod.py (renamed from tests/nnapi/specs/skip/V1_2/sub_v1_2_broadcast.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/tanh_v1_2.mod.py (renamed from tests/nnapi/specs/skip/V1_2/tanh_v1_2.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/topk_v2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/topk_v2_1D_float_nnfw.mod.py2
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/topk_v2_1D_int32_nnfw.mod.py2
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/topk_v2_1D_quant8_nnfw.mod.py2
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/topk_v2_2D_float_nnfw.mod.py4
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/topk_v2_2D_int32_nnfw.mod.py2
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/topk_v2_2D_quant8_nnfw.mod.py2
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/V1_2/transpose_v1_2.mod.py (renamed from tests/nnapi/specs/skip/V1_2/transpose_v1_2.mod.py)0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_0/mobilenet_224_gender_basic_fixed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_0/mobilenet_quantized.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/add_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/avg_pool_float_1_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/avg_pool_float_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/avg_pool_float_3_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/avg_pool_float_4_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/avg_pool_float_5_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/batch_to_space_float_1_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/batch_to_space_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/concat_float_1_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/concat_float_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/concat_float_3_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/conv_1_h3_w2_SAME_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/conv_1_h3_w2_VALID_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/conv_3_h3_w2_SAME_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/conv_3_h3_w2_VALID_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/conv_float_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/conv_float_channels_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/conv_float_channels_weights_as_inputs_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/conv_float_large_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/conv_float_large_weights_as_inputs_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/conv_float_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/conv_float_weights_as_inputs_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/depth_to_space_float_1_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/depth_to_space_float_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/depth_to_space_float_3_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_2_weights_as_inputs_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_weights_as_inputs_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_weights_as_inputs_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/depthwise_conv_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/dequantize_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/div_broadcast_float_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/div_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/embedding_lookup_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/floor_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/fully_connected_float_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/fully_connected_float_4d_simple_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/fully_connected_float_large_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/fully_connected_float_large_weights_as_inputs_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/fully_connected_float_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/fully_connected_float_weights_as_inputs_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/hashtable_lookup_float_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/l2_normalization_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/l2_normalization_large_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/l2_normalization_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/l2_pool_float_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/l2_pool_float_large_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/l2_pool_float_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/local_response_norm_float_1_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/local_response_norm_float_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/local_response_norm_float_3_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/local_response_norm_float_4_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/logistic_float_1_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/logistic_float_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/lsh_projection_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/lsh_projection_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/lsh_projection_weights_as_inputs_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/lstm2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/lstm2_state2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/lstm2_state_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/lstm3_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/lstm3_state2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/lstm3_state3_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/lstm3_state_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/lstm_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/lstm_state2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/lstm_state_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/max_pool_float_1_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/max_pool_float_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/max_pool_float_3_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/max_pool_float_4_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/mean_float_1_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/mean_float_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/mean_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/mobilenet_224_gender_basic_fixed_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/mul_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/mul_relu_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/pad_float_1_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/pad_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/relu1_float_1_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/relu1_float_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/relu6_float_1_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/relu6_float_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/relu_float_1_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/relu_float_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/reshape_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/reshape_weights_as_inputs_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/resize_bilinear_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/resize_bilinear_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/rnn_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/rnn_state_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/softmax_float_1_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/softmax_float_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/space_to_batch_float_1_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/space_to_batch_float_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/space_to_batch_float_3_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/space_to_batch_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/space_to_depth_float_1_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/space_to_depth_float_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/space_to_depth_float_3_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/squeeze_float_1_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/squeeze_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/strided_slice_float_10_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/strided_slice_float_11_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/strided_slice_float_1_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/strided_slice_float_2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/strided_slice_float_3_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/strided_slice_float_4_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/strided_slice_float_5_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/strided_slice_float_6_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/strided_slice_float_7_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/strided_slice_float_8_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/strided_slice_float_9_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/strided_slice_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/sub_broadcast_float_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/sub_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/svdf2_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/svdf_bias_present_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/svdf_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/svdf_state_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/tanh_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/transpose_float_1_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_1/transpose_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/add_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/argmin_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/argmin_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/argmin_3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/avg_pool_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/axis_aligned_bbox_transform.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/batch_to_space_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/bbox_graph.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_aux_input.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_cifg_peephole.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_float16_batch_major.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_float16_batch_major_aux_input.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_float16_batch_major_merge_outputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_merge_outputs.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_norm_fw_output.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/bidirectional_sequence_rnn.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/box_with_nms_limit_gaussian.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/box_with_nms_limit_hard.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/box_with_nms_limit_linear.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/channel_shuffle.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/concat_float16_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/concat_float16_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/concat_float16_3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/concat_mixed_quant.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/concat_zero_sized.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/conv2d_dilation.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/conv2d_per_channel.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/conv2d_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/depth_to_space_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/depthwise_conv2d_dilation.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/depthwise_conv2d_per_channel.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/depthwise_conv2d_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/dequantize_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/detection_postprocess.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/div_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/expand_dims.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/floor_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/fully_connected_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/generate_proposals.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/greater.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/grouped_conv2d.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/heatmap_max_keypoint.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/instance_normalization.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/l2_normalization_axis.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/l2_normalization_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/l2_pool_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/layer_norm_lstm.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/less_equal.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/local_response_normalization_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/log.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/log_softmax.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/logistic_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/lsh_projection_3_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/lsh_projection_4_relaxed.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/lsh_projection_deprecated.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/lsh_projection_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/lstm2_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/lstm2_state2_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/lstm2_state_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/lstm3_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/lstm3_state2_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/lstm3_state3_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/lstm3_state_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/lstm_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/lstm_state2_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/lstm_state_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/max_pool_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/mean_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/mul_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/pad_all_dims.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/pad_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/pad_low_rank.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/pad_low_rank_quant8.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/pad_quant8.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/pad_quant8_nonzero.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/pad_v2_1_float.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/pad_v2_1_quant8.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/pad_v2_all_dims.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/pad_v2_all_dims_quant8.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/pad_v2_low_rank.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/pad_v2_low_rank_quant8.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/pow.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/quantize.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/quantized_lstm.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/random_multinomial.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/random_multinomial_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/reduce_all.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/reduce_any.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/reduce_prod.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/relu1_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/relu6_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/relu_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/reshape_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/resize_bilinear_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/resize_nearest_neighbor.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/rnn_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/roi_align.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/roi_pooling.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/select_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/sin.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/softmax_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/space_to_batch_quant8_nonzero.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/space_to_batch_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/space_to_depth_v1_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/squeeze_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/strided_slice_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/sub_quantized_different_scales.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/svdf_bias_present_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/svdf_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/svdf_state_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/tile_1.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/tile_2.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/tile_3.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/transpose_conv2d.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/transpose_conv2d_large.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/transpose_float16.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_1step.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_batch_major_norm_peephole_projection.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_batch_major_peephole_projection_bias.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_cifg_peephole.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_f16_batch_major.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_f16_norm_peephole_projection.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_layer_norm_cifg_peephole.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_norm_peephole_projection.mod.py0
-rwxr-xr-x[-rw-r--r--]tests/nnapi/specs/skip/V1_2/unidirectional_sequence_rnn.mod.py0
-rw-r--r--tests/nnapi/src/TestGenerated.cpp28
-rw-r--r--tests/nnapi/src/TestMain.cpp8
-rw-r--r--tests/nnapi/src/TestNeuralNetworksWrapper.h8
-rw-r--r--tests/nnapi/src/TestValidation.cpp6
-rw-r--r--tests/nnfw_api/CMakeLists.txt25
-rw-r--r--tests/nnfw_api/src/FourOneOpModelSetInput.cc36
-rw-r--r--tests/nnfw_api/src/create_session.cc30
-rw-r--r--tests/nnfw_api/src/fixtures.h132
-rw-r--r--tests/nnfw_api/src/load_model.cc47
-rw-r--r--tests/nnfw_api/src/main.cc55
-rw-r--r--tests/nnfw_api/src/model_path.cc58
-rw-r--r--tests/nnfw_api/src/model_path.h62
-rw-r--r--tests/nnfw_api/src/prepare.cc27
-rw-r--r--tests/scripts/CMakeLists.txt19
-rw-r--r--tests/scripts/README.md6
-rwxr-xr-xtests/scripts/benchmark_nnapi.sh28
-rwxr-xr-xtests/scripts/benchmark_nnpkg.sh6
-rwxr-xr-xtests/scripts/framework/run_test.sh277
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/MODELS/inception_module/config.sh (renamed from tests/framework/tests/MODELS/inception_module/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/MODELS/inception_nonslim/config.sh (renamed from tests/framework/tests/MODELS/inception_nonslim/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/MODELS/inception_slim/config.sh (renamed from tests/framework/tests/MODELS/inception_slim/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/MODELS/mobilenet/config.sh (renamed from tests/framework/tests/MODELS/mobilenet/config.sh)0
-rwxr-xr-xtests/scripts/framework/tests/abs/config.sh1
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/add/1D/config.sh (renamed from tests/framework/tests/add/1D/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/add/4D/config.sh (renamed from tests/framework/tests/add/4D/config.sh)0
-rwxr-xr-xtests/scripts/framework/tests/average_pool_2d/aligned/config.sh1
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/average_pool_2d/avgpool1/config.sh (renamed from tests/framework/tests/average_pool_2d/avgpool1/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/average_pool_2d/avgpool2/config.sh (renamed from tests/framework/tests/average_pool_2d/avgpool2/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/batch_to_space_nd2/config.sh (renamed from tests/framework/tests/batch_to_space_nd2/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/cast/config.sh (renamed from tests/framework/tests/cast/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/concat/2D/config.sh (renamed from tests/framework/tests/concat/2D/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/concat/concat1/config.sh (renamed from tests/framework/tests/concat/concat1/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/concat/concat2/config.sh (renamed from tests/framework/tests/concat/concat2/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/conv_2d/convolution1/config.sh (renamed from tests/framework/tests/conv_2d/convolution1/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/conv_2d/convolution2/config.sh (renamed from tests/framework/tests/conv_2d/convolution2/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/custom/squared_difference/config.sh (renamed from tests/framework/tests/custom/squared_difference/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/depthwise_conv_2d/depthconv1/config.sh (renamed from tests/framework/tests/depthwise_conv_2d/depthconv1/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/depthwise_conv_2d/depthconv2/config.sh (renamed from tests/framework/tests/depthwise_conv_2d/depthconv2/config.sh)0
-rwxr-xr-xtests/scripts/framework/tests/depthwise_conv_2d_no_fuse/config.sh1
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/div/broadcast/config.sh (renamed from tests/framework/tests/div/broadcast/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/embedding_lookup/config.sh (renamed from tests/framework/tests/embedding_lookup/config.sh)0
-rwxr-xr-xtests/scripts/framework/tests/equal/config.sh1
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/exp/config.sh (renamed from tests/framework/tests/exp/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/floor/floor1/config.sh (renamed from tests/framework/tests/floor/floor1/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/floor/floor2/config.sh (renamed from tests/framework/tests/floor/floor2/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/fullyconnected/fc1/config.sh (renamed from tests/framework/tests/fullyconnected/fc1/config.sh)0
-rwxr-xr-xtests/scripts/framework/tests/fullyconnected/hybrid/config.sh1
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/fullyconnected/matmul2x2/config.sh (renamed from tests/framework/tests/fullyconnected/matmul2x2/config.sh)0
-rwxr-xr-xtests/scripts/framework/tests/fullyconnected/weights_as_input/config.sh1
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/gather/config.sh (renamed from tests/framework/tests/gather/config.sh)0
-rwxr-xr-xtests/scripts/framework/tests/greater/config.sh1
-rwxr-xr-xtests/scripts/framework/tests/greater_equal/config.sh1
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/hashtable_lookup/config.sh (renamed from tests/framework/tests/hashtable_lookup/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/l2_normalization/config.sh (renamed from tests/framework/tests/l2_normalization/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/l2_pool_2d/config.sh (renamed from tests/framework/tests/l2_pool_2d/config.sh)0
-rwxr-xr-xtests/scripts/framework/tests/less/config.sh1
-rwxr-xr-xtests/scripts/framework/tests/less_equal/config.sh1
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/logistic/config.sh (renamed from tests/framework/tests/logistic/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/max/config.sh (renamed from tests/framework/tests/max/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/max_pool_2d/maxpool1/config.sh (renamed from tests/framework/tests/max_pool_2d/maxpool1/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/max_pool_2d/maxpool2/config.sh (renamed from tests/framework/tests/max_pool_2d/maxpool2/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/mean/config.sh (renamed from tests/framework/tests/mean/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/min/config.sh (renamed from tests/framework/tests/min/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/mul/broadcast/config.sh (renamed from tests/framework/tests/mul/broadcast/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/neg/config.sh (renamed from tests/framework/tests/neg/config.sh)0
-rwxr-xr-xtests/scripts/framework/tests/not_equal/config.sh1
-rwxr-xr-xtests/scripts/framework/tests/one_hot/config.sh1
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/pack/config.sh (renamed from tests/framework/tests/pack/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/pad/4D_2D/config.sh (renamed from tests/framework/tests/pad/4D_2D/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/pad/pad1/config.sh (renamed from tests/framework/tests/pad/pad1/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/pad/pad2/config.sh (renamed from tests/framework/tests/pad/pad2/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/reduce_max/config.sh (renamed from tests/framework/tests/reduce_max/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/reduce_mean/test1/config.sh (renamed from tests/framework/tests/reduce_mean/test1/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/reduce_mean/test2/config.sh (renamed from tests/framework/tests/reduce_mean/test2/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/reduce_sum/config.sh (renamed from tests/framework/tests/reduce_sum/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/relu/config.sh (renamed from tests/framework/tests/relu/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/relu6/config.sh (renamed from tests/framework/tests/relu6/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/reshape/3D/config.sh (renamed from tests/framework/tests/reshape/3D/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/reshape/reshape1/config.sh (renamed from tests/framework/tests/reshape/reshape1/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/reshape/reshape2/config.sh (renamed from tests/framework/tests/reshape/reshape2/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/resize_bilinear/config.sh (renamed from tests/framework/tests/resize_bilinear/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/rnn/config.sh (renamed from tests/framework/tests/rnn/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/rsqrt/config.sh (renamed from tests/framework/tests/rsqrt/config.sh)0
-rw-r--r--tests/scripts/framework/tests/shape/config.sh1
-rwxr-xr-xtests/scripts/framework/tests/sin/config.sh1
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/slice/config.sh (renamed from tests/framework/tests/slice/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/softmax/config.sh (renamed from tests/framework/tests/softmax/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/space_to_batch_nd2/config.sh (renamed from tests/framework/tests/space_to_batch_nd2/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/space_to_depth/config.sh (renamed from tests/framework/tests/space_to_depth/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/sqrt/config.sh (renamed from tests/framework/tests/sqrt/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/squeeze/config.sh (renamed from tests/framework/tests/squeeze/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/strided_slice/config.sh (renamed from tests/framework/tests/strided_slice/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/sub/broadcast/config.sh (renamed from tests/framework/tests/sub/broadcast/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/tanh/config.sh (renamed from tests/framework/tests/tanh/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/topk_v2/config.sh (renamed from tests/framework/tests/topk_v2/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/transpose/config.sh (renamed from tests/framework/tests/transpose/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/transpose_conv/same/config.sh (renamed from tests/framework/tests/transpose_conv/same/config.sh)0
-rwxr-xr-x[-rw-r--r--]tests/scripts/framework/tests/transpose_conv/valid/config.sh (renamed from tests/framework/tests/transpose_conv/valid/config.sh)0
-rw-r--r--tests/scripts/list/benchmark_nnpkg_model_list.txt8
-rw-r--r--tests/scripts/list/frameworktest_list.aarch64.acl_cl.txt46
-rw-r--r--tests/scripts/list/frameworktest_list.aarch64.acl_neon.txt41
-rw-r--r--tests/scripts/list/frameworktest_list.aarch64.cpu.txt33
-rw-r--r--tests/scripts/list/frameworktest_list.armv7l.acl_cl.txt46
-rw-r--r--tests/scripts/list/frameworktest_list.armv7l.acl_neon.txt42
-rw-r--r--tests/scripts/list/frameworktest_list.armv7l.cpu.txt38
-rw-r--r--tests/scripts/list/frameworktest_list.noarch.interp.txt16
-rw-r--r--tests/scripts/list/frameworktest_list.x86_64.cpu.txt15
-rw-r--r--tests/scripts/list/neurun_frameworktest_list.armv7l.acl_cl.txt46
-rw-r--r--tests/scripts/list/neurun_frameworktest_list.armv7l.acl_neon.txt41
-rw-r--r--tests/scripts/list/neurun_frameworktest_list.armv7l.cpu.txt13
-rw-r--r--tests/scripts/list/neurun_frameworktest_list.armv7l.ncnn.txt2
-rw-r--r--tests/scripts/list/neurun_frameworktest_list.noarch.interp.txt16
-rw-r--r--tests/scripts/list/neurun_frameworktest_list.x86-64.cpu.txt12
-rw-r--r--tests/scripts/list/tflite_loader_list.aarch64.txt35
-rw-r--r--tests/scripts/list/tflite_loader_list.armv7l.txt44
-rwxr-xr-xtests/scripts/merge_result_of_benchmark_nnpkg.py32
-rwxr-xr-xtests/scripts/oneapi_test/install_oneapi_test_nnpackages.sh142
-rw-r--r--tests/scripts/oneapi_test/models/add/config.sh1
-rwxr-xr-xtests/scripts/test-driver.sh20
-rwxr-xr-xtests/scripts/test_framework.sh4
-rwxr-xr-xtests/scripts/test_scheduler_with_profiling.sh6
-rwxr-xr-xtests/scripts/unittest.sh22
-rw-r--r--tests/tools/nnapi_test/src/nnapi_test.cc6
-rw-r--r--tests/tools/nnpackage_run/CMakeLists.txt10
-rw-r--r--tests/tools/nnpackage_run/src/args.cc20
-rw-r--r--tests/tools/nnpackage_run/src/args.h2
-rw-r--r--tests/tools/nnpackage_run/src/nnpackage_run.cc43
-rw-r--r--tests/tools/tflite_benchmark/src/tflite_benchmark.cc44
-rw-r--r--tests/tools/tflite_loader/CMakeLists.txt8
-rw-r--r--tests/tools/tflite_loader/src/tflite_loader.cc22
-rw-r--r--tests/tools/tflite_run/CMakeLists.txt2
-rw-r--r--tests/tools/tflite_run/src/args.cc7
-rw-r--r--tests/tools/tflite_run/src/tensor_loader.cc16
-rw-r--r--tests/tools/tflite_run/src/tensor_loader.h16
-rw-r--r--tests/tools/tflite_run/src/tflite_run.cc32
-rwxr-xr-xtools/cross/build_android_ndk.sh2
-rwxr-xr-xtools/cross/install_android_sdk.sh167
-rwxr-xr-xtools/kernel_report/kernel_report.py171
-rw-r--r--tools/nnpackage_tool/nnpkg_test/list.armv7l-linux.acl_cl2
-rw-r--r--tools/nnpackage_tool/nnpkg_test/list.armv7l-linux.acl_neon4
-rw-r--r--tools/nnpackage_tool/nnpkg_test/list.armv7l-linux.cpu16
-rw-r--r--tools/nnpackage_tool/nnpkg_test/list.noarch.interp6
-rw-r--r--tools/nnpackage_tool/sth2nnpkgtc/tflite2nnpkgtc.md2
-rw-r--r--tools/nnpackage_tool/tf2tfliteV2/README.md47
-rwxr-xr-xtools/nnpackage_tool/tf2tfliteV2/tf2tfliteV2.py173
-rwxr-xr-xtools/nnpackage_tool/tflite2circle/tflitejson2circlejson.py1
-rwxr-xr-x[-rw-r--r--]tools/pbfile_tool/convert_ckpt_to_pb.py0
-rwxr-xr-x[-rw-r--r--]tools/pbfile_tool/convert_pb_to_pbtxt.py0
-rwxr-xr-x[-rw-r--r--]tools/pbfile_tool/extract_subgraph.py0
-rwxr-xr-x[-rw-r--r--]tools/tensorflow_model_freezer/__init__.py0
-rwxr-xr-x[-rw-r--r--]tools/tensorflow_model_freezer/base_freezer.py0
-rwxr-xr-x[-rw-r--r--]tools/tensorflow_model_freezer/model_freezer_util.py0
-rwxr-xr-x[-rw-r--r--]tools/tensorflow_model_freezer/sample/Operation_gen.py0
-rwxr-xr-x[-rw-r--r--]tools/tensorflow_model_freezer/sample/UNSTACK_gen.py0
-rwxr-xr-x[-rw-r--r--]tools/tensorflow_model_freezer/sample/__init__.py0
-rw-r--r--tools/tflite_accuracy/README.md2
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/config_saver.py5
-rwxr-xr-xtools/tflitefile_tool/graph_stats.py84
-rwxr-xr-xtools/tflitefile_tool/model_parser.py31
-rw-r--r--tools/tflitefile_tool/model_printer.py146
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/model_saver.py0
-rwxr-xr-xtools/tflitefile_tool/operator_parser.py6
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/operator_printer.py15
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/option_printer.py0
-rwxr-xr-xtools/tflitefile_tool/perf_predictor.py28
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/select_operator.py27
-rwxr-xr-xtools/tflitefile_tool/subgraph_printer.py92
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tensor_printer.py0
-rwxr-xr-xtools/tflitefile_tool/tensor_wrapping.py8
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/AbsOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/ActivationFunctionType.py0
-rwxr-xr-xtools/tflitefile_tool/tflite/AddNOptions.py28
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/AddOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/ArgMaxOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/ArgMinOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/BatchToSpaceNDOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/BidirectionalSequenceLSTMOptions.py14
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/BidirectionalSequenceRNNOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/Buffer.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/BuiltinOperator.py21
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/BuiltinOptions.py18
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/CallOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/CastOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/CombinerType.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/ConcatEmbeddingsOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/ConcatenationOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/Conv2DOptions.py0
-rwxr-xr-xtools/tflitefile_tool/tflite/CosOptions.py28
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/CustomOptionsFormat.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/CustomQuantization.py0
-rwxr-xr-xtools/tflitefile_tool/tflite/DepthToSpaceOptions.py39
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/DepthwiseConv2DOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/DequantizeOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/DivOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/EmbeddingLookupSparseOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/EqualOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/ExpOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/ExpandDimsOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/FakeQuantOptions.py8
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/FillOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/FloorDivOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/FloorModOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/FullyConnectedOptions.py14
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/FullyConnectedOptionsWeightsFormat.py0
-rwxr-xr-xtools/tflitefile_tool/tflite/GatherNdOptions.py28
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/GatherOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/GreaterEqualOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/GreaterOptions.py0
-rwxr-xr-xtools/tflitefile_tool/tflite/HardSwishOptions.py28
-rwxr-xr-xtools/tflitefile_tool/tflite/IfOptions.py50
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/L2NormOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/LSHProjectionOptions.py4
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/LSHProjectionType.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/LSTMKernelType.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/LSTMOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/LeakyReluOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/LessEqualOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/LessOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/LocalResponseNormalizationOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/LogSoftmaxOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/LogicalAndOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/LogicalNotOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/LogicalOrOptions.py0
-rwxr-xr-xtools/tflitefile_tool/tflite/MatrixDiagOptions.py28
-rwxr-xr-xtools/tflitefile_tool/tflite/MatrixSetDiagOptions.py28
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/MaximumMinimumOptions.py0
-rw-r--r--tools/tflitefile_tool/tflite/MeanOptions.py39
-rwxr-xr-xtools/tflitefile_tool/tflite/Metadata.py51
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/MirrorPadMode.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/MirrorPadOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/Model.py31
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/MulOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/NegOptions.py0
-rwxr-xr-xtools/tflitefile_tool/tflite/NonMaxSuppressionV4Options.py28
-rwxr-xr-xtools/tflitefile_tool/tflite/NonMaxSuppressionV5Options.py28
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/NotEqualOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/OneHotOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/Operator.py35
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/OperatorCode.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/PackOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/PadOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/PadV2Options.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/Padding.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/Pool2DOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/PowOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/QuantizationDetails.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/QuantizationParameters.py21
-rwxr-xr-xtools/tflitefile_tool/tflite/QuantizeOptions.py28
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/RNNOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/RangeOptions.py0
-rwxr-xr-xtools/tflitefile_tool/tflite/RankOptions.py28
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/ReducerOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/ReshapeOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/ResizeBilinearOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/ResizeNearestNeighborOptions.py0
-rwxr-xr-xtools/tflitefile_tool/tflite/ReverseSequenceOptions.py50
-rwxr-xr-xtools/tflitefile_tool/tflite/ReverseV2Options.py28
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/SVDFOptions.py0
-rwxr-xr-xtools/tflitefile_tool/tflite/ScatterNdOptions.py28
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/SelectOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/SequenceRNNOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/ShapeOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/SkipGramOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/SliceOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/SoftmaxOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/SpaceToBatchNDOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/SpaceToDepthOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/SparseToDenseOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/SplitOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/SplitVOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/SquareOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/SquaredDifferenceOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/SqueezeOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/StridedSliceOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/SubGraph.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/SubOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/Tensor.py4
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/TensorType.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/TileOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/TopKV2Options.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/TransposeConvOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/TransposeOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/UnidirectionalSequenceLSTMOptions.py0
-rwxr-xr-xtools/tflitefile_tool/tflite/UniqueOptions.py39
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/UnpackOptions.py0
-rwxr-xr-xtools/tflitefile_tool/tflite/WhereOptions.py28
-rwxr-xr-xtools/tflitefile_tool/tflite/WhileOptions.py50
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/ZerosLikeOptions.py0
-rwxr-xr-x[-rw-r--r--]tools/tflitefile_tool/tflite/__init__.py0
-rw-r--r--tools/tflkit/README.md2
-rwxr-xr-x[-rw-r--r--]tools/tflkit/summarize_pb.py0
5895 files changed, 369514 insertions, 106246 deletions
diff --git a/.ctags b/.ctags
index 6f33a26d3..13c27abbe 100644
--- a/.ctags
+++ b/.ctags
@@ -2,6 +2,6 @@
--exclude=Product
--exclude=build
--exclude=tags
---exclude=tests/framework/cache
+--exclude=tests/scripts/framework/cache
--exclude=tools/cross/rootfs
--exclude=doxygen
diff --git a/.gitignore b/.gitignore
index 32c33603f..d0931912a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,7 +8,7 @@
*.pyc
# Test cache for model download
-/tests/framework/cache
+/tests/scripts/framework/cache
# Test report
/report
diff --git a/LICENSE b/LICENSE
index 2411d90dc..c1507bf42 100644
--- a/LICENSE
+++ b/LICENSE
@@ -3,6 +3,8 @@ This file provides full text of licenses used in this project
- Apache Licence 2.0
- MIT
- BSD-2-Clause
+- BSD 3-Clause
+- Mozilla Public License 2.0
...............................................................................
@@ -211,9 +213,9 @@ limitations under the License.
...............................................................................
-Copyright (c) 2016-2018 ARM Limited.
+The MIT License
-SPDX-License-Identifier: MIT
+Copyright (c) <year> <copyright holder>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -234,33 +236,19 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
.............................................................................
-COPYRIGHT
-All contributions by the University of California:
-Copyright (c) 2014-2017 The Regents of the University of California (Regents)
-All rights reserved.
+The BSD 2-Clause License
-All other contributions:
-Copyright (c) 2014-2017, the respective contributors
-All rights reserved.
-
-Caffe uses a shared copyright model: each contributor holds copyright over
-their contributions to Caffe. The project versioning records all such
-contribution and copyright details. If a contributor wants to further mark
-their specific copyright on a particular contribution, they should indicate
-their copyright solely in the commit message of the change when it is
-committed.
-
-LICENSE
+Copyright <year> <copyright holder>
Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
+modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
- list of conditions and the following disclaimer.
+ list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
+ and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
@@ -273,8 +261,396 @@ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-CONTRIBUTION AGREEMENT
+.............................................................................
+
+The BSD 3-Clause License
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+.............................................................................
+
+Mozilla Public License Version 2.0
+
+1. Definitions
+
+1.1. “Contributor”
+
+ means each individual or legal entity that creates, contributes to the
+ creation of, or owns Covered Software.
+
+1.2. “Contributor Version”
+
+ means the combination of the Contributions of others (if any) used
+ by a Contributor and that particular Contributor’s Contribution.
+
+1.3. “Contribution”
+
+ means Covered Software of a particular Contributor.
+
+1.4. “Covered Software”
+
+ means Source Code Form to which the initial Contributor has
+ attached the notice in Exhibit A, the Executable Form of such Source
+ Code Form, and Modifications of such Source Code Form, in each
+ case including portions thereof.
+
+1.5. “Incompatible With Secondary Licenses”
+
+ means
+
+ a. that the initial Contributor has attached the notice described in
+ Exhibit B to the Covered Software; or
+
+ b. that the Covered Software was made available under the terms
+ of version 1.1 or earlier of the License, but not also under the
+ terms of a Secondary License.
+
+1.6. “Executable Form”
+
+ means any form of the work other than Source Code Form.
+
+1.7. “Larger Work”
+
+ means a work that combines Covered Software with other material,
+ in a separate file or files, that is not Covered Software.
+
+1.8. “License”
+
+ means this document.
+
+1.9. “Licensable”
+
+ means having the right to grant, to the maximum extent possible,
+ whether at the time of the initial grant or subsequently, any and all of
+ the rights conveyed by this License.
+
+1.10. “Modifications”
+
+ means any of the following:
+
+ a. any file in Source Code Form that results from an addition to,
+ deletion from, or modification of the contents of Covered
+ Software; or
+
+ b. any new file in Source Code Form that contains any Covered
+ Software.
+
+1.11. “Patent Claims” of a Contributor
+
+ means any patent claim(s), including without limitation, method,
+ process, and apparatus claims, in any patent Licensable by such
+ Contributor that would be infringed, but for the grant of the License,
+ by the making, using, selling, offering for sale, having made, import,
+ or transfer of either its Contributions or its Contributor Version.
+
+1.12. “Secondary License”
+
+ means either the GNU General Public License, Version 2.0, the GNU
+ Lesser General Public License, Version 2.1, the GNU Affero General
+ Public License, Version 3.0, or any later versions of those licenses.
+
+1.13. “Source Code Form”
+
+ means the form of the work preferred for making modifications.
+
+1.14. “You” (or “Your”)
+
+ means an individual or a legal entity exercising rights under this
+ License. For legal entities, “You” includes any entity that controls, is
+ controlled by, or is under common control with You. For purposes of
+ this definition, “control” means (a) the power, direct or indirect, to
+ cause the direction or management of such entity, whether by
+ contract or otherwise, or (b) ownership of more than fifty percent
+ (50%) of the outstanding shares or beneficial ownership of such
+ entity.
+
+2. License Grants and Conditions
+
+2.1. Grants
+
+Each Contributor hereby grants You a world-wide, royalty-free, non-
+exclusive license:
+
+ a. under intellectual property rights (other than patent or trademark)
+ Licensable by such Contributor to use, reproduce, make available,
+ modify, display, perform, distribute, and otherwise exploit its
+ Contributions, either on an unmodified basis, with Modifications, or
+ as part of a Larger Work; and
+
+ b. under Patent Claims of such Contributor to make, use, sell, offer for
+ sale, have made, import, and otherwise transfer either its
+ Contributions or its Contributor Version.
+
+2.2. Effective Date
+
+The licenses granted in Section 2.1 with respect to any Contribution
+become effective for each Contribution on the date the Contributor first
+distributes such Contribution.
+
+2.3. Limitations on Grant Scope
+
+The licenses granted in this Section 2 are the only rights granted under this
+License. No additional rights or licenses will be implied from the
+distribution or licensing of Covered Software under this License.
+Notwithstanding Section 2.1(b) above, no patent license is granted by a
+Contributor:
+
+ a. for any code that a Contributor has removed from Covered Software;
+ or
+
+ b. for infringements caused by: (i) Your and any other third party’s
+ modifications of Covered Software, or (ii) the combination of its
+ Contributions with other software (except as part of its Contributor
+ Version); or
+
+ c. under Patent Claims infringed by Covered Software in the absence
+ of its Contributions.
+
+This License does not grant any rights in the trademarks, service marks, or
+logos of any Contributor (except as may be necessary to comply with the
+notice requirements in Section 3.4).
+
+2.4. Subsequent Licenses
+
+No Contributor makes additional grants as a result of Your choice to
+distribute the Covered Software under a subsequent version of this License
+(see Section 10.2) or under the terms of a Secondary License (if permitted
+under the terms of Section 3.3).
+
+2.5. Representation
+
+Each Contributor represents that the Contributor believes its Contributions
+are its original creation(s) or it has sufficient rights to grant the rights to its
+Contributions conveyed by this License.
+
+2.6. Fair Use
+
+This License is not intended to limit any rights You have under applicable
+copyright doctrines of fair use, fair dealing, or other equivalents.
+
+2.7. Conditions
+
+Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in
+Section 2.1.
+
+3. Responsibilities
-By contributing to the BVLC/caffe repository through pull-request, comment,
-or otherwise, the contributor releases their content to the
-license and copyright terms herein. \ No newline at end of file
+3.1. Distribution of Source Form
+
+All distribution of Covered Software in Source Code Form, including any
+Modifications that You create or to which You contribute, must be under
+the terms of this License. You must inform recipients that the Source Code
+Form of the Covered Software is governed by the terms of this License,
+and how they can obtain a copy of this License. You may not attempt to
+alter or restrict the recipients’ rights in the Source Code Form.
+
+3.2. Distribution of Executable Form
+
+If You distribute Covered Software in Executable Form then:
+
+ a. such Covered Software must also be made available in Source Code
+ Form, as described in Section 3.1, and You must inform recipients of
+ the Executable Form how they can obtain a copy of such Source
+ Code Form by reasonable means in a timely manner, at a charge no
+ more than the cost of distribution to the recipient; and
+
+ b. You may distribute such Executable Form under the terms of this
+ License, or sublicense it under different terms, provided that the
+ license for the Executable Form does not attempt to limit or alter the
+ recipients’ rights in the Source Code Form under this License.
+
+3.3. Distribution of a Larger Work
+
+You may create and distribute a Larger Work under terms of Your choice,
+provided that You also comply with the requirements of this License for
+the Covered Software. If the Larger Work is a combination of Covered
+Software with a work governed by one or more Secondary Licenses, and
+the Covered Software is not Incompatible With Secondary Licenses, this
+License permits You to additionally distribute such Covered Software
+under the terms of such Secondary License(s), so that the recipient of the
+Larger Work may, at their option, further distribute the Covered Software
+under the terms of either this License or such Secondary License(s).
+
+3.4. Notices
+
+You may not remove or alter the substance of any license notices
+(including copyright notices, patent notices, disclaimers of warranty, or
+limitations of liability) contained within the Source Code Form of the
+Covered Software, except that You may alter any license notices to the
+extent required to remedy known factual inaccuracies.
+
+3.5. Application of Additional Terms
+
+You may choose to offer, and to charge a fee for, warranty, support,
+indemnity or liability obligations to one or more recipients of Covered
+Software. However, You may do so only on Your own behalf, and not on
+behalf of any Contributor. You must make it absolutely clear that any such
+warranty, support, indemnity, or liability obligation is offered by You
+alone, and You hereby agree to indemnify every Contributor for any
+liability incurred by such Contributor as a result of warranty, support,
+indemnity or liability terms You offer. You may include additional
+disclaimers of warranty and limitations of liability specific to any
+jurisdiction.
+
+4. Inability to Comply Due to Statute or Regulation
+
+If it is impossible for You to comply with any of the terms of this License
+with respect to some or all of the Covered Software due to statute, judicial
+order, or regulation then You must: (a) comply with the terms of this
+License to the maximum extent possible; and (b) describe the limitations
+and the code they affect. Such description must be placed in a text file
+included with all distributions of the Covered Software under this License.
+Except to the extent prohibited by statute or regulation, such description
+must be sufficiently detailed for a recipient of ordinary skill to be able to
+understand it.
+
+5. Termination
+
+5.1. The rights granted under this License will terminate automatically if
+You fail to comply with any of its terms. However, if You become
+compliant, then the rights granted under this License from a particular
+Contributor are reinstated (a) provisionally, unless and until such
+Contributor explicitly and finally terminates Your grants, and (b) on an
+ongoing basis, if such Contributor fails to notify You of the non-
+compliance by some reasonable means prior to 60 days after You have
+come back into compliance. Moreover, Your grants from a particular
+Contributor are reinstated on an ongoing basis if such Contributor notifies
+You of the non-compliance by some reasonable means, this is the first
+time You have received notice of non-compliance with this License from
+such Contributor, and You become compliant prior to 30 days after Your
+receipt of the notice.
+
+5.2. If You initiate litigation against any entity by asserting a patent
+infringement claim (excluding declaratory judgment actions, counter-
+claims, and cross-claims) alleging that a Contributor Version directly or
+indirectly infringes any patent, then the rights granted to You by any and
+all Contributors for the Covered Software under Section 2.1 of this
+License shall terminate.
+
+5.3. In the event of termination under Sections 5.1 or 5.2 above, all end
+user license agreements (excluding distributors and resellers) which have
+been validly granted by You or Your distributors under this License prior
+to termination shall survive termination.
+
+6. Disclaimer of Warranty
+
+ Covered Software is provided under this License on an “as is”
+ basis, without warranty of any kind, either expressed, implied,
+ or statutory, including, without limitation, warranties that the
+ Covered Software is free of defects, merchantable, fit for a
+ particular purpose or non-infringing. The entire risk as to the
+ quality and performance of the Covered Software is with You.
+ Should any Covered Software prove defective in any respect,
+ You (not any Contributor) assume the cost of any necessary
+ servicing, repair, or correction. This disclaimer of warranty
+ constitutes an essential part of this License. No use of any
+ Covered Software is authorized under this License except
+ under this disclaimer.
+
+7. Limitation of Liability
+
+ Under no circumstances and under no legal theory, whether tort
+ (including negligence), contract, or otherwise, shall any
+ Contributor, or anyone who distributes Covered Software as
+ permitted above, be liable to You for any direct, indirect,
+ special, incidental, or consequential damages of any character
+ including, without limitation, damages for lost profits, loss of
+ goodwill, work stoppage, computer failure or malfunction, or
+ any and all other commercial damages or losses, even if such
+ party shall have been informed of the possibility of such
+ damages. This limitation of liability shall not apply to liability
+ for death or personal injury resulting from such party’s
+ negligence to the extent applicable law prohibits such
+ limitation. Some jurisdictions do not allow the exclusion or
+ limitation of incidental or consequential damages, so this
+ exclusion and limitation may not apply to You.
+
+8. Litigation
+
+Any litigation relating to this License may be brought only in the courts of
+a jurisdiction where the defendant maintains its principal place of business
+and such litigation shall be governed by laws of that jurisdiction, without
+reference to its conflict-of-law provisions. Nothing in this Section shall
+prevent a party’s ability to bring cross-claims or counter-claims.
+
+9. Miscellaneous
+
+This License represents the complete agreement concerning the subject
+matter hereof. If any provision of this License is held to be unenforceable,
+such provision shall be reformed only to the extent necessary to make it
+enforceable. Any law or regulation which provides that the language of a
+contract shall be construed against the drafter shall not be used to construe
+this License against a Contributor.
+
+10. Versions of the License
+
+10.1. New Versions
+
+Mozilla Foundation is the license steward. Except as provided in
+Section 10.3, no one other than the license steward has the right to modify
+or publish new versions of this License. Each version will be given a
+distinguishing version number.
+
+10.2. Effect of New Versions
+
+You may distribute the Covered Software under the terms of the version of
+the License under which You originally received the Covered Software, or
+under the terms of any subsequent version published by the license
+steward.
+
+10.3. Modified Versions
+
+If you create software not governed by this License, and you want to
+create a new license for such software, you may create and use a modified
+version of this License if you rename the license and remove any
+references to the name of the license steward (except to note that such
+modified license differs from this License).
+
+10.4. Distributing Source Code Form that isIncompatible With Secondary Licenses
+
+If You choose to distribute Source Code Form that is Incompatible With
+Secondary Licenses under the terms of this version of the License, the
+notice described in Exhibit B of this License must be attached.
+
+Exhibit A - Source Code Form License Notice
+
+ This Source Code Form is subject to the terms of the Mozilla
+ Public License, v. 2.0. If a copy of the MPL was not distributed
+ with this file, You can obtain one at
+ https://mozilla.org/MPL/2.0/.
+
+If it is not possible or desirable to put the notice in a particular file, then
+You may include the notice in a location (such as a LICENSE file in a
+relevant directory) where a recipient would be likely to look for such a
+notice.
+
+You may add additional accurate notices of copyright ownership.
+
+Exhibit B - “Incompatible With Secondary Licenses” Notice
+
+ This Source Code Form is “Incompatible With Secondary
+ Licenses”, as defined by the Mozilla Public License, v. 2.0.
diff --git a/Makefile.template b/Makefile.template
index ef3a44c15..588a30052 100644
--- a/Makefile.template
+++ b/Makefile.template
@@ -155,7 +155,7 @@ build_test_suite: install_internal install_internal_acl
@echo "packaging test suite"
@rm -rf $(INSTALL_PATH)/test-suite.tar.gz
# TODO Divide runtime package, external library package, and test suite
- @tar -zcf test-suite.tar.gz tests/scripts tests/framework infra Product/out --dereference
+ @tar -zcf test-suite.tar.gz tests/scripts infra Product/out --dereference
@mv test-suite.tar.gz $(INSTALL_PATH)/.
build_coverage_suite: install_internal install_internal_acl
@@ -163,7 +163,7 @@ build_coverage_suite: install_internal install_internal_acl
@rm -rf $(INSTALL_PATH)/coverage-suite.tar.gz
@find Product -name "*.gcno" > include_lists.txt
@pwd | grep -o '/' | wc -l > tests/scripts/build_path_depth.txt
- @tar -zcf coverage-suite.tar.gz tests/scripts tests/framework infra Product/out --dereference -T include_lists.txt
+ @tar -zcf coverage-suite.tar.gz tests/scripts infra Product/out --dereference -T include_lists.txt
@rm -rf include_lists.txt tests/scripts/build_path_depth.txt
@mv coverage-suite.tar.gz $(INSTALL_PATH)/.
diff --git a/README.md b/README.md
index 8772bb120..dfa91afa8 100644
--- a/README.md
+++ b/README.md
@@ -34,18 +34,18 @@ the target platform, such as the Linux kernel based OS including Tizen.
You can suggest development of nnfw's features that are not yet available.
-The functions requested so far can be checked in the [popular feature request](https://github.sec.samsung.net/STAR/nnfw/issues?utf8=%E2%9C%93&q=is%3Aopen+is%3Aissue+label%3AFEATURE_REQUEST+sort%3Areactions-%2B1-desc) list.
+The functions requested so far can be checked in the [popular feature request](https://github.com/Samsung/ONE/issues?utf8=%E2%9C%93&q=is%3Aopen+is%3Aissue+label%3AFEATURE_REQUEST+sort%3Areactions-%2B1-desc) list.
- If the feature you want is on the list, :+1: to the body of the issue. The feature with the most
:+1: is placed at the top of the list. When adding new features, we will prioritize them with this reference.
Of course, it is good to add an additional comment which describes your request in detail.
-- For features not listed, [create a new issue](https://github.sec.samsung.net/STAR/nnfw/issues/new).
+- For features not listed, [create a new issue](https://github.com/Samsung/ONE/issues/new).
Sooner or later, the maintainer will tag the `FEATURE_REQUEST` label and appear on the list.
We expect one of the most frequent feature requests would be the operator kernel implementation.
It is good to make a request, but it is better if you contribute by yourself. See the following guide,
-[How to Implement Operator Kernel](docs/nnfw/HowToImplementOperatorKernel.md), for help.
+[How to Implement Operator Kernel](docs/nnfw/howto/HowToAddNewOperation.md), for help.
We are looking forward to your participation.
Thank you in advance!
@@ -71,25 +71,4 @@ NPU.
## How to Contact
-- Please post questions, issues, or suggestions into [Issues](https://github.sec.samsung.net/STAR/nnfw/issues).
-
-----
-
-## Notice
-
-### 22/07/2019
-
-Congratulations! On July 22nd, 2019, _nnfw_ repo and
-[_nncc_](https://github.sec.samsung.net/STAR/nncc) repo are finally integrated into single one. Now
-all activities related to the development of _nnas(Neural Network Acceleration Solution)_ will
-proceed in this integrated _nnfw_ repo. The old _nncc_ repo will only be maintained for follow up on
-remaining issues and for preserving development history. The following notice will remain in place
-until the update of documents in integrated repo is complete.
-
-### 02/05/2019
-
-~~We are currently working on [_nncc_](https://github.sec.samsung.net/STAR/nncc) as a sibling project.
-In our plan, the two projects will soon be integrated into one, and focusing on their roles as
-front-end(_nncc_) and back-end(_nnfw_), respectively. It will accompany the physical combination of
-the github repo.~~ You can find the latest roadmap of the integrated project
-[here](https://github.sec.samsung.net/orgs/STAR/projects/1).
+- Please post questions, issues, or suggestions into [Issues](https://github.com/Samsung/ONE/issues).
diff --git a/compiler/CMakeLists.txt b/compiler/CMakeLists.txt
new file mode 100644
index 000000000..7cf12f164
--- /dev/null
+++ b/compiler/CMakeLists.txt
@@ -0,0 +1,78 @@
+# TODO Validate the argument of "requires"
+function(get_project_build_order VAR)
+ # This file will describe the dependencies among projects
+ set(DEPS_FILE "${CMAKE_CURRENT_BINARY_DIR}/compiler.deps")
+
+ # Remove .deps file
+ file(REMOVE "${DEPS_FILE}")
+
+ # Let's create .deps file
+ list_subdirectories(PROJECT_DIRS)
+
+ foreach(PROJECT_DIR IN ITEMS ${PROJECT_DIRS})
+ set(SUCC "${PROJECT_DIR}")
+ set(REQUIRES_FILE "${CMAKE_CURRENT_SOURCE_DIR}/${PROJECT_DIR}/requires.cmake")
+
+ macro(require PRED)
+ file(APPEND "${DEPS_FILE}" "${PRED} ${SUCC} ")
+ endmacro(require)
+
+ file(APPEND "${DEPS_FILE}" "${SUCC} ${SUCC} ")
+ if(EXISTS "${REQUIRES_FILE}")
+ include(${REQUIRES_FILE})
+ endif(EXISTS "${REQUIRES_FILE}")
+ endforeach(PROJECT_DIR)
+
+ # NOTE "tsort" is a part of the POSIX.1 standard.
+ #
+ # Reference: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/tsort.html
+ execute_process(COMMAND tsort "${DEPS_FILE}"
+ OUTPUT_VARIABLE ORDER
+ OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+ # Remove newline characters
+ # TODO Check which one (UNIX_COMMAND or WINDOWS_COMMAND) is correct
+ separate_arguments(ORDER UNIX_COMMAND ${ORDER})
+
+ set(${VAR} "${ORDER}" PARENT_SCOPE)
+endfunction(get_project_build_order)
+
+function(add_compiler_directory DIR)
+ string(TOUPPER ${DIR} PREFIX)
+
+ option(BUILD_COMPILER_${PREFIX} "Build compiler/${dir}" ON)
+ set(BUILD_WHITELIST "" CACHE STRING "Set modules to be built")
+
+ if(NOT BUILD_WHITELIST STREQUAL "")
+ set(ENABLE OFF)
+ set(CURRENT_DIR ${DIR})
+ foreach(ACCEPTED_DIR IN ITEMS ${BUILD_WHITELIST})
+ if(ACCEPTED_DIR STREQUAL CURRENT_DIR)
+ set(ENABLE ON)
+ endif()
+ endforeach(ACCEPTED_DIR)
+ else()
+ set(ENABLE ${BUILD_COMPILER_${PREFIX}})
+ endif()
+
+ # This line prevents some errors in this CMakeLists.txt
+ if(NOT DEFINED ENABLE)
+ message(FATAL_ERROR "Undefined ENABLE! Please check CMakeLists.txt")
+ endif()
+
+ if(ENABLE)
+ message(STATUS "Configure ${PREFIX}")
+ add_subdirectory(${DIR})
+ message(STATUS "Configure ${PREFIX} - Done")
+ endif(ENABLE)
+endfunction(add_compiler_directory)
+
+function(add_compiler_directories)
+ get_project_build_order(PROJECT_DIRS)
+
+ foreach(PROJECT_DIR IN ITEMS ${PROJECT_DIRS})
+ add_compiler_directory(${PROJECT_DIR})
+ endforeach(PROJECT_DIR)
+endfunction(add_compiler_directories)
+
+add_compiler_directories()
diff --git a/compiler/adtidas/CMakeLists.txt b/compiler/adtidas/CMakeLists.txt
new file mode 100644
index 000000000..0d84740b7
--- /dev/null
+++ b/compiler/adtidas/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_library(adtidas INTERFACE)
+target_include_directories(adtidas INTERFACE include)
diff --git a/compiler/adtidas/include/adtidas/SmallVector.h b/compiler/adtidas/include/adtidas/SmallVector.h
new file mode 100644
index 000000000..1ad630c63
--- /dev/null
+++ b/compiler/adtidas/include/adtidas/SmallVector.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _ADTIDAS_SMALL_VECTOR_H_
+#define _ADTIDAS_SMALL_VECTOR_H_
+
+#include <cassert>
+#include <iterator>
+#include <initializer_list>
+
+namespace adt
+{
+
+/**
+ * @brief vector with cheap memory allocation
+ * @tparam T type of elements
+ * @tparam Capacity maximum number of elements
+ * @note much like std::array, but tracks number of used elements. Stored in stack
+ */
+template <typename T, size_t Capacity> class small_vector
+{
+public:
+ using value_type = T;
+ using reference = T &;
+ using iterator = T *;
+ using const_iterator = const T *;
+ using reverse_iterator = std::reverse_iterator<iterator>;
+ using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+ using size_type = size_t;
+
+ template <typename It> small_vector(It begin, It end) : _size(std::distance(begin, end))
+ {
+ assert(_size <= Capacity);
+ std::copy(begin, end, this->begin());
+ }
+
+ explicit small_vector(size_t size, value_type initializer = value_type()) : _size(size)
+ {
+ assert(_size <= Capacity);
+ std::fill(begin(), end(), initializer);
+ }
+
+ explicit small_vector() : _size(0) {}
+
+ small_vector(std::initializer_list<value_type> l) : _size(l.size())
+ {
+ assert(_size <= Capacity);
+ std::copy(std::begin(l), std::end(l), begin());
+ }
+
+ /**
+ * @return current size
+ */
+ inline size_t size() const noexcept { return _size; }
+
+ /**
+ * @return maximum number of elements this vector can hold
+ */
+ constexpr size_t capacity() const { return Capacity; }
+
+ /**
+ * @brief resize to given new size
+ * @note if new size is greater than current size, new elements are default-initialized
+ */
+ void resize(size_t new_size) noexcept
+ {
+ assert(new_size <= Capacity);
+ if (new_size > _size)
+ {
+ std::fill(_storage + _size, _storage + new_size, T());
+ }
+ _size = new_size;
+ }
+
+ /**
+ * @return reference to the element at position idx
+ */
+ inline reference operator[](size_t idx) noexcept
+ {
+ assert(idx < _size);
+ return _storage[idx];
+ }
+
+ /**
+ * @return value of element at position idx
+ */
+ inline constexpr value_type operator[](size_t idx) const noexcept
+ {
+ // assert on the same line since c++11 does not allow multi-line constexpr functions
+ return assert(idx < _size), _storage[idx];
+ }
+
+ inline iterator begin() noexcept { return std::begin(_storage); }
+ inline iterator end() noexcept { return _storage + _size; }
+
+ inline reverse_iterator rbegin() noexcept { return reverse_iterator{end()}; }
+ inline reverse_iterator rend() noexcept { return reverse_iterator{begin()}; }
+
+ // const overloads
+ inline const_iterator begin() const noexcept { return std::begin(_storage); }
+ inline const_iterator end() const noexcept { return _storage + _size; }
+
+ inline const_reverse_iterator rbegin() const noexcept { return reverse_iterator{end()}; }
+ inline const_reverse_iterator rend() const noexcept { return reverse_iterator{begin()}; }
+
+ inline void push_back(const value_type &e) noexcept
+ {
+ assert(_size < Capacity);
+ _storage[_size++] = e;
+ }
+
+ inline void push_back(value_type &&e) noexcept
+ {
+ assert(_size < Capacity);
+ _storage[_size++] = std::move(e);
+ }
+
+private:
+ size_t _size;
+ value_type _storage[Capacity]{};
+};
+
+template <typename T, size_t LCapacity, size_t RCapacity>
+bool operator==(const small_vector<T, LCapacity> &lhs, const small_vector<T, RCapacity> &rhs)
+{
+ if (lhs.size() != rhs.size())
+ {
+ return false;
+ }
+
+ bool equal = true;
+ size_t end = lhs.size();
+ for (size_t i = 0; i < end; ++i)
+ {
+ equal &= (lhs[i] == rhs[i]);
+ }
+
+ return equal;
+}
+
+} // namespace adt
+
+#endif //_ADTIDAS_SMALL_VECTOR_H_
diff --git a/compiler/angkor/CMakeLists.txt b/compiler/angkor/CMakeLists.txt
new file mode 100644
index 000000000..44b5e9058
--- /dev/null
+++ b/compiler/angkor/CMakeLists.txt
@@ -0,0 +1,22 @@
+file(GLOB_RECURSE HEADERS "include/*.h")
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+# NOTE STATIC is deliberately used here to allow clients to use 'angkor' without installation
+add_library(angkor STATIC ${HEADERS} ${SOURCES})
+set_target_properties(angkor PROPERTIES POSITION_INDEPENDENT_CODE ON)
+set_target_properties(angkor PROPERTIES LINKER_LANGUAGE CXX)
+target_include_directories(angkor PUBLIC include)
+target_link_libraries(angkor PRIVATE nncc_common)
+target_link_libraries(angkor PUBLIC nncc_coverage)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Google Test is mandatory for test
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(angkor_test ${TESTS})
+target_link_libraries(angkor_test angkor)
diff --git a/compiler/angkor/README.md b/compiler/angkor/README.md
new file mode 100644
index 000000000..f761b8740
--- /dev/null
+++ b/compiler/angkor/README.md
@@ -0,0 +1,51 @@
+# angkor
+
+## Purpose
+
+_angkor_ is a `nncc` core library
+
+## How to use
+
+_angkor_ implements abstract data type(ADT) for feature, kernel, tensor.
+There are layout, shape information and enumerator and so on.
+
+To use some of these things, just insert `include`!
+```cpp
+#include <nncc/core/ADT/feature/WHAT_YOU_WANT>
+#include <nncc/core/ADT/kernel/WHAT_YOU_WANT>
+#include <nncc/core/ADT/tensor/WHAT_YOU_WANT>
+```
+
+## Example
+
+- `compiler/coco/core/CMakeLists.txt`
+
+```cmake
+target_link_libraries(coco_core PUBLIC angkor)
+```
+
+- `compiler/coco/core/src/IR/Arg.cpp`
+
+```cpp
+#include "coco/IR/Arg.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+namespace
+{
+const nncc::core::ADT::tensor::LexicalLayout l;
+}
+
+namespace coco
+{
+
+Arg::Arg(const nncc::core::ADT::tensor::Shape &shape) : _shape{shape}, _bag{nullptr}
+{
+ _map.resize(nncc::core::ADT::tensor::num_elements(shape));
+}
+
+// ....
+
+}
+```
diff --git a/compiler/angkor/include/angkor/TensorIndex.h b/compiler/angkor/include/angkor/TensorIndex.h
new file mode 100644
index 000000000..2fc10509e
--- /dev/null
+++ b/compiler/angkor/include/angkor/TensorIndex.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ANGKOR_TENSOR_INDEX_H__
+#define __ANGKOR_TENSOR_INDEX_H__
+
+#include "nncc/core/ADT/tensor/Index.h"
+
+namespace angkor
+{
+
+using TensorIndex = ::nncc::core::ADT::tensor::Index;
+
+} // namespace angkor
+
+#endif // __ANGKOR_TENSOR_INDEX_H__
diff --git a/compiler/angkor/include/angkor/TensorShape.h b/compiler/angkor/include/angkor/TensorShape.h
new file mode 100644
index 000000000..ab62bd8d9
--- /dev/null
+++ b/compiler/angkor/include/angkor/TensorShape.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ANGKOR_TENSOR_SHAPE_H__
+#define __ANGKOR_TENSOR_SHAPE_H__
+
+#include "nncc/core/ADT/tensor/Shape.h"
+
+namespace angkor
+{
+
+using TensorShape = ::nncc::core::ADT::tensor::Shape;
+
+} // namespace angkor
+
+#endif // __ANGKOR_TENSOR_SHAPE_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/feature/Accessor.h b/compiler/angkor/include/nncc/core/ADT/feature/Accessor.h
new file mode 100644
index 000000000..aa4621851
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/feature/Accessor.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_FEATURE_ACCESSOR_H__
+#define __NNCC_CORE_ADT_FEATURE_ACCESSOR_H__
+
+#include <cstdint>
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace feature
+{
+
+template <typename T> struct Accessor
+{
+ virtual ~Accessor() = default;
+
+ virtual T &at(uint32_t ch, uint32_t row, uint32_t col) = 0;
+};
+
+} // namespace feature
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_FEATURE_ACCESSOR_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/feature/Buffer.h b/compiler/angkor/include/nncc/core/ADT/feature/Buffer.h
new file mode 100644
index 000000000..86fd60295
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/feature/Buffer.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_FEATURE_BUFFER_H__
+#define __NNCC_CORE_ADT_FEATURE_BUFFER_H__
+
+#include "nncc/core/ADT/feature/View.h"
+
+#include <vector>
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace feature
+{
+
+template <typename T> class Buffer final : public View<T>
+{
+public:
+ explicit Buffer(const Shape &shape, const Layout &layout) : View<T>{shape, layout}
+ {
+ _buffer.resize(num_elements(shape));
+ }
+
+public:
+ virtual T *base(void) { return _buffer.data(); }
+ virtual const T *base(void) const { return _buffer.data(); }
+
+private:
+ std::vector<T> _buffer;
+};
+
+template <typename T, typename LayoutImpl> Buffer<T> make_buffer(const Shape &shape)
+{
+ return Buffer<T>{shape, LayoutImpl{}};
+}
+
+} // namespace feature
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_FEATURE_BUFFER_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/feature/CHWLayout.h b/compiler/angkor/include/nncc/core/ADT/feature/CHWLayout.h
new file mode 100644
index 000000000..d84841d10
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/feature/CHWLayout.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_FEATURE_CHW_LAYOUT_H__
+#define __NNCC_CORE_ADT_FEATURE_CHW_LAYOUT_H__
+
+#include "nncc/core/ADT/feature/Layout.h"
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace feature
+{
+
+struct CHWLayout final : public Layout
+{
+ CHWLayout();
+};
+
+} // namespace feature
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_FEATURE_CHW_LAYOUT_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/feature/HWCLayout.h b/compiler/angkor/include/nncc/core/ADT/feature/HWCLayout.h
new file mode 100644
index 000000000..df885ad82
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/feature/HWCLayout.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_FEATURE_HWC_LAYOUT_H__
+#define __NNCC_CORE_ADT_FEATURE_HWC_LAYOUT_H__
+
+#include "nncc/core/ADT/feature/Layout.h"
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace feature
+{
+
+struct HWCLayout final : public Layout
+{
+ HWCLayout();
+};
+
+} // namespace feature
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_FEATURE_HWC_LAYOUT_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/feature/Layout.h b/compiler/angkor/include/nncc/core/ADT/feature/Layout.h
new file mode 100644
index 000000000..762545a84
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/feature/Layout.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_FEATURE_LAYOUT_H__
+#define __NNCC_CORE_ADT_FEATURE_LAYOUT_H__
+
+#include "nncc/core/ADT/feature/Shape.h"
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace feature
+{
+
+class Layout
+{
+public:
+ using Func = uint32_t (*)(const Shape &, uint32_t ch, uint32_t row, uint32_t col);
+
+public:
+ explicit Layout(const Func &func);
+
+public:
+ uint32_t offset(const Shape &shape, uint32_t ch, uint32_t row, uint32_t col) const
+ {
+ return _func(shape, ch, row, col);
+ }
+
+private:
+ Func _func;
+};
+
+} // namespace feature
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_FEATURE_LAYOUT_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/feature/Overlay.h b/compiler/angkor/include/nncc/core/ADT/feature/Overlay.h
new file mode 100644
index 000000000..93d86f56b
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/feature/Overlay.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_FEATURE_OVERLAY_H__
+#define __NNCC_CORE_ADT_FEATURE_OVERLAY_H__
+
+#include "nncc/core/ADT/feature/View.h"
+
+#include <vector>
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace feature
+{
+
+template <typename T> class Overlay final : public View<T>
+{
+public:
+ explicit Overlay(const Shape &shape, const Layout &layout, T *base)
+ : View<T>{shape, layout}, _base{base}
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual T *base(void) { return _base; }
+ virtual const T *base(void) const { return _base; }
+
+private:
+ T *const _base;
+};
+
+template <typename T, typename LayoutImpl> Overlay<T> make_overlay(const Shape &shape, T *base)
+{
+ return Overlay<T>{shape, LayoutImpl{}, base};
+}
+
+} // namespace feature
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_FEATURE_OVERLAY_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/feature/Reader.h b/compiler/angkor/include/nncc/core/ADT/feature/Reader.h
new file mode 100644
index 000000000..9a6fb724b
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/feature/Reader.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_FEATURE_READER_H__
+#define __NNCC_CORE_ADT_FEATURE_READER_H__
+
+#include <cstdint>
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace feature
+{
+
+template <typename T> struct Reader
+{
+ virtual ~Reader() = default;
+
+ virtual T at(uint32_t ch, uint32_t row, uint32_t col) const = 0;
+};
+
+} // namespace feature
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_FEATURE_READER_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/feature/Shape.h b/compiler/angkor/include/nncc/core/ADT/feature/Shape.h
new file mode 100644
index 000000000..319326308
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/feature/Shape.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_FEATURE_SHAPE_H__
+#define __NNCC_CORE_ADT_FEATURE_SHAPE_H__
+
+#include <cstdint>
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace feature
+{
+
+//
+// Shape of Feature Map for Convolution
+//
+class Shape
+{
+public:
+ Shape(uint32_t depth, uint32_t height, uint32_t width)
+ : _depth{depth}, _height{height}, _width{width}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t depth(void) const { return _depth; }
+ uint32_t height(void) const { return _height; }
+ uint32_t width(void) const { return _width; }
+
+private:
+ uint32_t _depth;
+ uint32_t _height;
+ uint32_t _width;
+};
+
+/**
+ * @brief The number of elements of a feature map of a given shape
+ *
+ * WARN The result is valid only when the expected value is less than 2^32 - 1
+ */
+inline uint32_t num_elements(const Shape &shape)
+{
+ return shape.depth() * shape.height() * shape.width();
+}
+
+inline bool operator==(const Shape &l, const Shape &r)
+{
+ return (l.depth() == r.depth()) && (l.height() == r.height()) && (l.width() == r.width());
+}
+
+} // namespace feature
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_FEATURE_SHAPE_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/feature/View.h b/compiler/angkor/include/nncc/core/ADT/feature/View.h
new file mode 100644
index 000000000..856e22b4b
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/feature/View.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_FEATURE_VIEW_H__
+#define __NNCC_CORE_ADT_FEATURE_VIEW_H__
+
+#include "nncc/core/ADT/feature/Shape.h"
+#include "nncc/core/ADT/feature/Reader.h"
+#include "nncc/core/ADT/feature/Accessor.h"
+#include "nncc/core/ADT/feature/Layout.h"
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace feature
+{
+
+template <typename T> class View : public Reader<T>, public Accessor<T>
+{
+public:
+ explicit View(const Shape &shape, const Layout &layout) : _shape{shape}, _layout{layout}
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual T *base(void) = 0;
+ virtual const T *base(void) const = 0;
+
+public:
+ T at(uint32_t ch, uint32_t row, uint32_t col) const override final
+ {
+ return *(base() + _layout.offset(_shape, ch, row, col));
+ }
+
+public:
+ T &at(uint32_t ch, uint32_t row, uint32_t col) override final
+ {
+ return *(base() + _layout.offset(_shape, ch, row, col));
+ }
+
+public:
+ const Shape &shape(void) const { return _shape; }
+
+private:
+ const Shape _shape;
+ const Layout _layout;
+};
+
+} // namespace feature
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_FEATURE_VIEW_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/kernel/Accessor.h b/compiler/angkor/include/nncc/core/ADT/kernel/Accessor.h
new file mode 100644
index 000000000..5bc46de36
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/kernel/Accessor.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_KERNEL_ACCESSOR_H__
+#define __NNCC_CORE_ADT_KERNEL_ACCESSOR_H__
+
+#include <cstdint>
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace kernel
+{
+
+template <typename T> struct Accessor
+{
+ virtual ~Accessor() = default;
+
+ virtual T &at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) = 0;
+};
+
+} // namespace kernel
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_KERNEL_ACCESSOR_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/kernel/Buffer.h b/compiler/angkor/include/nncc/core/ADT/kernel/Buffer.h
new file mode 100644
index 000000000..3497d4829
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/kernel/Buffer.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_KERNEL_BUFFER_H__
+#define __NNCC_CORE_ADT_KERNEL_BUFFER_H__
+
+#include "nncc/core/ADT/kernel/View.h"
+#include "nncc/core/ADT/kernel/ViewImpl.h"
+
+#include <vector>
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace kernel
+{
+
+template <typename T> class Buffer final : public View<T>
+{
+public:
+ explicit Buffer(const Shape &shape, const Layout &layout) : _impl{shape, layout}
+ {
+ _buffer.resize(num_elements(shape));
+ }
+
+public:
+ T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const override
+ {
+ return _impl.at(_buffer.begin(), nth, ch, row, col);
+ }
+
+public:
+ T &at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) override
+ {
+ return _impl.at(_buffer.begin(), nth, ch, row, col);
+ }
+
+public:
+ const Shape &shape(void) const override { return _impl.shape(); }
+
+private:
+ std::vector<T> _buffer;
+ ViewImpl<T> _impl;
+};
+
+template <typename T, typename LayoutImpl> Buffer<T> make_buffer(const Shape &shape)
+{
+ return Buffer<T>{shape, LayoutImpl{}};
+}
+
+} // namespace kernel
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_KERNEL_BUFFER_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/kernel/IndexEnumerator.h b/compiler/angkor/include/nncc/core/ADT/kernel/IndexEnumerator.h
new file mode 100644
index 000000000..4167ef972
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/kernel/IndexEnumerator.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_KERNEL_INDEX_ENUMERATOR_H__
+#define __NNCC_CORE_ADT_KERNEL_INDEX_ENUMERATOR_H__
+
+#include "nncc/core/ADT/kernel/Shape.h"
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace kernel
+{
+
+class IndexEnumerator
+{
+public:
+ explicit IndexEnumerator(const Shape &shape);
+
+public:
+ IndexEnumerator(IndexEnumerator &&) = delete;
+ IndexEnumerator(const IndexEnumerator &) = delete;
+
+public:
+ bool valid(void) const;
+
+public:
+ uint32_t count(void) const;
+ uint32_t depth(void) const;
+ uint32_t height(void) const;
+ uint32_t width(void) const;
+
+public:
+ void advance(void);
+
+private:
+ // Store max and current offset for count/depth/height/width
+ //
+ // NOTE Here explicit array is used instead of kernel::Shape to make
+ // a room for improvement such as enumeration order (NHWC, NCHW)
+ // support
+ uint32_t _max[4];
+ uint32_t _cur[4];
+
+private:
+ uint32_t _cursor;
+};
+
+} // namespace kernel
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_KERNEL_INDEX_ENUMERATOR_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/kernel/Layout.h b/compiler/angkor/include/nncc/core/ADT/kernel/Layout.h
new file mode 100644
index 000000000..1e85e1ed4
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/kernel/Layout.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_KERNEL_LAYOUT_H__
+#define __NNCC_CORE_ADT_KERNEL_LAYOUT_H__
+
+#include "nncc/core/ADT/kernel/Shape.h"
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace kernel
+{
+
+class Layout
+{
+public:
+ using Func = uint32_t (*)(const Shape &, uint32_t n, uint32_t ch, uint32_t row, uint32_t col);
+
+public:
+ Layout(const Func &func);
+
+public:
+ uint32_t offset(const Shape &shape, uint32_t n, uint32_t ch, uint32_t row, uint32_t col) const
+ {
+ return _func(shape, n, ch, row, col);
+ }
+
+private:
+ Func _func;
+};
+
+} // namespace kernel
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_KERNEL_LAYOUT_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/kernel/NCHWLayout.h b/compiler/angkor/include/nncc/core/ADT/kernel/NCHWLayout.h
new file mode 100644
index 000000000..72bd89fb9
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/kernel/NCHWLayout.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_KERNEL_NCHW_LAYOUT_H__
+#define __NNCC_CORE_ADT_KERNEL_NCHW_LAYOUT_H__
+
+#include "nncc/core/ADT/kernel/Layout.h"
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace kernel
+{
+
+struct NCHWLayout final : public Layout
+{
+ NCHWLayout();
+};
+
+} // namespace kernel
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_KERNEL_NCHW_LAYOUT_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/kernel/NHWCLayout.h b/compiler/angkor/include/nncc/core/ADT/kernel/NHWCLayout.h
new file mode 100644
index 000000000..bb239b91f
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/kernel/NHWCLayout.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_KERNEL_NHWC_LAYOUT_H__
+#define __NNCC_CORE_ADT_KERNEL_NHWC_LAYOUT_H__
+
+#include "nncc/core/ADT/kernel/Layout.h"
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace kernel
+{
+
+struct NHWCLayout final : public Layout
+{
+ NHWCLayout();
+};
+
+} // namespace kernel
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_KERNEL_NHWC_LAYOUT_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/kernel/Overlay.h b/compiler/angkor/include/nncc/core/ADT/kernel/Overlay.h
new file mode 100644
index 000000000..e348a8769
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/kernel/Overlay.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_KERNEL_OVERLAY_H__
+#define __NNCC_CORE_ADT_KERNEL_OVERLAY_H__
+
+#include "nncc/core/ADT/kernel/View.h"
+#include "nncc/core/ADT/kernel/ViewImpl.h"
+
+#include <vector>
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace kernel
+{
+
+template <typename T, typename InputIt> class Overlay final : public View<T>
+{
+public:
+ explicit Overlay(const Shape &shape, const Layout &layout, InputIt it)
+ : _impl{shape, layout}, _it{it}
+ {
+ // DO NOTHING
+ }
+
+public:
+ T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const override
+ {
+ return _impl.at(_it, nth, ch, row, col);
+ }
+
+public:
+ T &at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) override
+ {
+ return _impl.at(_it, nth, ch, row, col);
+ }
+
+public:
+ const Shape &shape(void) const override { return _impl.shape(); }
+
+private:
+ InputIt const _it;
+ ViewImpl<T> _impl;
+};
+
+template <typename T, typename LayoutImpl> struct OverlayFactory
+{
+ template <typename InputIt> static Overlay<T, InputIt> make(const Shape &shape, InputIt it)
+ {
+ return Overlay<T, InputIt>{shape, LayoutImpl{}, it};
+ }
+};
+
+template <typename T, typename LayoutImpl> Overlay<T, T *> make_overlay(const Shape &shape, T *base)
+{
+ return OverlayFactory<T, LayoutImpl>::make(shape, base);
+}
+
+} // namespace kernel
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_KERNEL_OVERLAY_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/kernel/Reader.h b/compiler/angkor/include/nncc/core/ADT/kernel/Reader.h
new file mode 100644
index 000000000..af0267745
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/kernel/Reader.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_KERNEL_READER_H__
+#define __NNCC_CORE_ADT_KERNEL_READER_H__
+
+#include <cstdint>
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace kernel
+{
+
+template <typename T> struct Reader
+{
+ virtual ~Reader() = default;
+
+ virtual T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const = 0;
+};
+
+} // namespace kernel
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_KERNEL_READER_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/kernel/Shape.h b/compiler/angkor/include/nncc/core/ADT/kernel/Shape.h
new file mode 100644
index 000000000..d485d526b
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/kernel/Shape.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_KERNEL_SHAPE_H__
+#define __NNCC_CORE_ADT_KERNEL_SHAPE_H__
+
+#include <cstdint>
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace kernel
+{
+
+//
+// Shape of Convolution Kernel
+//
+class Shape
+{
+public:
+ Shape(uint32_t count, uint32_t depth, uint32_t height, uint32_t width)
+ : _count{count}, _depth{depth}, _height{height}, _width{width}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t count(void) const { return _count; }
+ uint32_t depth(void) const { return _depth; }
+ uint32_t height(void) const { return _height; }
+ uint32_t width(void) const { return _width; }
+
+private:
+ uint32_t _count;
+ uint32_t _depth;
+ uint32_t _height;
+ uint32_t _width;
+};
+
+/**
+ * @brief Return the number of elements in a kernel of a given shape
+ *
+ * WARN The result is valid only when the expected value is less than 2^32 - 1
+ */
+inline uint32_t num_elements(const Shape &shape)
+{
+ return shape.count() * shape.depth() * shape.height() * shape.width();
+}
+
+bool operator==(const Shape &lhs, const Shape &rhs);
+
+} // namespace kernel
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_KERNEL_SHAPE_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/kernel/View.h b/compiler/angkor/include/nncc/core/ADT/kernel/View.h
new file mode 100644
index 000000000..2ed682a51
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/kernel/View.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_KERNEL_VIEW_H__
+#define __NNCC_CORE_ADT_KERNEL_VIEW_H__
+
+#include "nncc/core/ADT/kernel/Shape.h"
+#include "nncc/core/ADT/kernel/Reader.h"
+#include "nncc/core/ADT/kernel/Accessor.h"
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace kernel
+{
+
+template <typename T> struct View : public Reader<T>, public Accessor<T>
+{
+ virtual const Shape &shape(void) const = 0;
+};
+
+} // namespace kernel
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_KERNEL_VIEW_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/kernel/ViewImpl.h b/compiler/angkor/include/nncc/core/ADT/kernel/ViewImpl.h
new file mode 100644
index 000000000..f4e8ed5e2
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/kernel/ViewImpl.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_KERNEL_VIEW_IMPL_H__
+#define __NNCC_CORE_ADT_KERNEL_VIEW_IMPL_H__
+
+#include "nncc/core/ADT/kernel/Shape.h"
+#include "nncc/core/ADT/kernel/Layout.h"
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace kernel
+{
+
+template <typename T> class ViewImpl
+{
+public:
+ explicit ViewImpl(const Shape &shape, const Layout &layout) : _shape{shape}, _layout{layout}
+ {
+ // DO NOTHING
+ }
+
+public:
+ template <typename InputIt>
+ T at(InputIt it, uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const
+ {
+ return *(it + _layout.offset(_shape, nth, ch, row, col));
+ }
+
+public:
+ template <typename InputIt>
+ T &at(InputIt it, uint32_t nth, uint32_t ch, uint32_t row, uint32_t col)
+ {
+ return *(it + _layout.offset(_shape, nth, ch, row, col));
+ }
+
+public:
+ const Shape &shape(void) const { return _shape; }
+
+private:
+ const Shape _shape;
+ const Layout _layout;
+};
+
+} // namespace kernel
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_KERNEL_VIEW_IMPL_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/tensor/Accessor.h b/compiler/angkor/include/nncc/core/ADT/tensor/Accessor.h
new file mode 100644
index 000000000..6a60b4b34
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/tensor/Accessor.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_TENSOR_ACCESSOR_H__
+#define __NNCC_CORE_ADT_TENSOR_ACCESSOR_H__
+
+#include "nncc/core/ADT/tensor/Index.h"
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace tensor
+{
+
+template <typename T> struct Accessor
+{
+ virtual ~Accessor() = default;
+
+ virtual T &at(const Index &) = 0;
+};
+
+} // namespace tensor
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_TENSOR_ACCESSOR_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/tensor/Buffer.h b/compiler/angkor/include/nncc/core/ADT/tensor/Buffer.h
new file mode 100644
index 000000000..f62f3040f
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/tensor/Buffer.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_TENSOR_BUFFER_H__
+#define __NNCC_CORE_ADT_TENSOR_BUFFER_H__
+
+#include "nncc/core/ADT/tensor/View.h"
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace tensor
+{
+
+template <typename T> class Buffer final : public View<T>
+{
+public:
+ explicit Buffer(const Shape &shape, const Layout &layout) : View<T>{shape, layout}
+ {
+ _buffer.resize(num_elements(shape));
+ }
+
+public:
+ T *base(void) override { return _buffer.data(); }
+ const T *base(void) const override { return _buffer.data(); }
+
+private:
+ std::vector<T> _buffer;
+};
+
+template <typename T, typename LayoutImpl> Buffer<T> make_buffer(const Shape &shape)
+{
+ return Buffer<T>{shape, LayoutImpl{}};
+}
+
+} // namespace tensor
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_TENSOR_BUFFER_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/tensor/Index.h b/compiler/angkor/include/nncc/core/ADT/tensor/Index.h
new file mode 100644
index 000000000..19beafafc
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/tensor/Index.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_TENSOR_INDEX_H__
+#define __NNCC_CORE_ADT_TENSOR_INDEX_H__
+
+#include <initializer_list>
+#include <vector>
+#include <cstdint>
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace tensor
+{
+
+class Index
+{
+public:
+ Index() = default;
+ Index(std::initializer_list<uint32_t> &&l);
+
+public:
+ uint32_t rank(void) const;
+
+public:
+ Index &resize(uint32_t size);
+
+public:
+ Index &fill(uint32_t index);
+
+public:
+ uint32_t &at(uint32_t axis);
+ uint32_t at(uint32_t axis) const;
+
+private:
+ std::vector<uint32_t> _indices;
+};
+
+// It throws an exception when rank of inputs does not match.
+Index operator+(const Index &lhs, const Index &rhs);
+bool operator==(const Index &lhs, const Index &rhs);
+
+} // namespace tensor
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_TENSOR_INDEX_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/tensor/IndexEnumerator.h b/compiler/angkor/include/nncc/core/ADT/tensor/IndexEnumerator.h
new file mode 100644
index 000000000..ef85b2c10
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/tensor/IndexEnumerator.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_TENSOR_INDEX_ENUMERATOR_H__
+#define __NNCC_CORE_ADT_TENSOR_INDEX_ENUMERATOR_H__
+
+#include "nncc/core/ADT/tensor/Index.h"
+#include "nncc/core/ADT/tensor/Shape.h"
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace tensor
+{
+
+class IndexEnumerator
+{
+public:
+ explicit IndexEnumerator(const Shape &shape);
+
+public:
+ IndexEnumerator(IndexEnumerator &&) = delete;
+ IndexEnumerator(const IndexEnumerator &) = delete;
+
+public:
+ bool valid(void) const { return _cursor < _shape.rank(); }
+
+public:
+ const Index &current(void) const { return _index; }
+
+public:
+ void advance(void);
+
+private:
+ const Shape _shape;
+ Index _index;
+
+private:
+ uint32_t _cursor;
+};
+
+} // namespace tensor
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_TENSOR_INDEX_ENUMERATOR_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/tensor/Layout.h b/compiler/angkor/include/nncc/core/ADT/tensor/Layout.h
new file mode 100644
index 000000000..0e410ff01
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/tensor/Layout.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_TENSOR_LAYOUT_H__
+#define __NNCC_CORE_ADT_TENSOR_LAYOUT_H__
+
+#include "nncc/core/ADT/tensor/Shape.h"
+#include "nncc/core/ADT/tensor/Index.h"
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace tensor
+{
+
+class Layout
+{
+public:
+ using Func = uint32_t (*)(const Shape &, const Index &);
+
+public:
+ explicit Layout(const Func &func);
+
+public:
+ uint32_t offset(const Shape &shape, const Index &index) const { return _func(shape, index); }
+
+private:
+ Func _func;
+};
+
+} // namespace tensor
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_TENSOR_LAYOUT_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/tensor/LexicalLayout.h b/compiler/angkor/include/nncc/core/ADT/tensor/LexicalLayout.h
new file mode 100644
index 000000000..b497ad844
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/tensor/LexicalLayout.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_TENSOR_LEXICAL_LAYOUT_H__
+#define __NNCC_CORE_ADT_TENSOR_LEXICAL_LAYOUT_H__
+
+#include "nncc/core/ADT/tensor/Layout.h"
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace tensor
+{
+
+struct LexicalLayout final : public Layout
+{
+ LexicalLayout();
+};
+
+} // namespace tensor
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_TENSOR_LEXICAL_LAYOUT_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/tensor/Overlay.h b/compiler/angkor/include/nncc/core/ADT/tensor/Overlay.h
new file mode 100644
index 000000000..11ee5350c
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/tensor/Overlay.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_TENSOR_OVERLAY_H__
+#define __NNCC_CORE_ADT_TENSOR_OVERLAY_H__
+
+#include "nncc/core/ADT/tensor/View.h"
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace tensor
+{
+
+template <typename T> class Overlay final : public View<T>
+{
+public:
+ explicit Overlay(const Shape &shape, const Layout &layout, T *base)
+ : View<T>{shape, layout}, _base{base}
+ {
+ // DO NOTHING
+ }
+
+public:
+ T *base(void) override { return _base; }
+ const T *base(void) const override { return _base; }
+
+private:
+ T *const _base;
+};
+
+template <typename T, typename LayoutImpl> Overlay<T> make_overlay(const Shape &shape, T *base)
+{
+ return Overlay<T>{shape, LayoutImpl{}, base};
+}
+
+} // namespace tensor
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_TENSOR_OVERLAY_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/tensor/Reader.h b/compiler/angkor/include/nncc/core/ADT/tensor/Reader.h
new file mode 100644
index 000000000..49f1287d2
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/tensor/Reader.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_TENSOR_READER_H__
+#define __NNCC_CORE_ADT_TENSOR_READER_H__
+
+#include "nncc/core/ADT/tensor/Index.h"
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace tensor
+{
+
+template <typename T> struct Reader
+{
+ virtual ~Reader() = default;
+
+ virtual T at(const Index &) const = 0;
+};
+
+} // namespace tensor
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_TENSOR_READER_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/tensor/Shape.h b/compiler/angkor/include/nncc/core/ADT/tensor/Shape.h
new file mode 100644
index 000000000..3eaab0e54
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/tensor/Shape.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_TENSOR_SHAPE_H__
+#define __NNCC_CORE_ADT_TENSOR_SHAPE_H__
+
+#include <initializer_list>
+#include <vector>
+#include <cstdint>
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace tensor
+{
+
+class Shape
+{
+public:
+ Shape() = default;
+ Shape(std::initializer_list<uint32_t> &&l);
+
+public:
+ uint32_t rank(void) const;
+
+public:
+ Shape &resize(uint32_t size);
+
+public:
+ uint32_t &dim(uint32_t axis);
+ uint32_t dim(uint32_t axis) const;
+
+public:
+ Shape &squeeze(void);
+
+private:
+ std::vector<uint32_t> _dims;
+};
+
+/**
+ * NOTE num_elements returns 1 for rank-0 tensors
+ */
+uint64_t num_elements(const Shape &);
+
+Shape squeeze(const Shape &);
+
+bool operator==(const Shape &, const Shape &);
+
+} // namespace tensor
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_TENSOR_SHAPE_H__
diff --git a/compiler/angkor/include/nncc/core/ADT/tensor/View.h b/compiler/angkor/include/nncc/core/ADT/tensor/View.h
new file mode 100644
index 000000000..4c9a91539
--- /dev/null
+++ b/compiler/angkor/include/nncc/core/ADT/tensor/View.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNCC_CORE_ADT_TENSOR_VIEW_H__
+#define __NNCC_CORE_ADT_TENSOR_VIEW_H__
+
+#include "nncc/core/ADT/tensor/Shape.h"
+#include "nncc/core/ADT/tensor/Index.h"
+#include "nncc/core/ADT/tensor/Reader.h"
+#include "nncc/core/ADT/tensor/Accessor.h"
+#include "nncc/core/ADT/tensor/Layout.h"
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace tensor
+{
+
+template <typename T> class View : public Reader<T>, public Accessor<T>
+{
+public:
+ explicit View(const Shape &shape, const Layout &layout)
+ : _shape{shape}, _layout{std::move(layout)}
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~View() = default;
+
+public:
+ virtual T *base(void) = 0;
+ virtual const T *base(void) const = 0;
+
+public:
+ T at(const Index &index) const override { return *(base() + _layout.offset(_shape, index)); }
+
+public:
+ T &at(const Index &index) override { return *(base() + _layout.offset(_shape, index)); }
+
+public:
+ const Shape &shape(void) const { return _shape; }
+
+private:
+ const Shape _shape;
+ const Layout _layout;
+};
+
+} // namespace tensor
+} // namespace ADT
+} // namespace core
+} // namespace nncc
+
+#endif // __NNCC_CORE_ADT_TENSOR_VIEW_H__
diff --git a/compiler/angkor/src/ADT/feature/Accessor.cpp b/compiler/angkor/src/ADT/feature/Accessor.cpp
new file mode 100644
index 000000000..03ff9a31e
--- /dev/null
+++ b/compiler/angkor/src/ADT/feature/Accessor.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/feature/Accessor.h"
+
+// DO NOT REMOVE THIS FILE
+//
+// This file is introduced to check the self-completeness of 'Accessor.h'
diff --git a/compiler/angkor/src/ADT/feature/Buffer.test.cpp b/compiler/angkor/src/ADT/feature/Buffer.test.cpp
new file mode 100644
index 000000000..1e4430251
--- /dev/null
+++ b/compiler/angkor/src/ADT/feature/Buffer.test.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/feature/Buffer.h"
+#include "nncc/core/ADT/feature/CHWLayout.h"
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::feature::Shape;
+using nncc::core::ADT::feature::CHWLayout;
+using nncc::core::ADT::feature::Buffer;
+
+using nncc::core::ADT::feature::make_buffer;
+
+TEST(ADT_FEATURE_BUFFER, ctor)
+{
+ const Shape shape{4, 6, 3};
+ auto buffer = make_buffer<int, CHWLayout>(shape);
+
+ ASSERT_EQ(buffer.shape().depth(), shape.depth());
+ ASSERT_EQ(buffer.shape().height(), shape.height());
+ ASSERT_EQ(buffer.shape().width(), shape.width());
+}
+
+TEST(ADT_FEATURE_BUFFER, access)
+{
+ const Shape shape{4, 6, 3};
+ auto buffer = make_buffer<int, CHWLayout>(shape);
+
+ ASSERT_EQ(buffer.at(3, 5, 2), 0);
+ buffer.at(3, 5, 2) = 4;
+
+ // Casting is introduced to use 'const T &at(...) const' method
+ ASSERT_EQ(static_cast<const Buffer<int> &>(buffer).at(3, 5, 2), 4);
+}
diff --git a/compiler/angkor/src/ADT/feature/CHWLayout.cpp b/compiler/angkor/src/ADT/feature/CHWLayout.cpp
new file mode 100644
index 000000000..31415a1bd
--- /dev/null
+++ b/compiler/angkor/src/ADT/feature/CHWLayout.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/feature/CHWLayout.h"
+
+using nncc::core::ADT::feature::Shape;
+
+static uint32_t CHW_offset(const Shape &shape, uint32_t ch, uint32_t row, uint32_t col)
+{
+ return (ch * shape.height() + row) * shape.width() + col;
+}
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace feature
+{
+
+CHWLayout::CHWLayout() : Layout{CHW_offset}
+{
+ // DO NOTHING
+}
+
+} // namespace feature
+} // namespace ADT
+} // namespace core
+} // namespace nncc
diff --git a/compiler/angkor/src/ADT/feature/CHWLayout.test.cpp b/compiler/angkor/src/ADT/feature/CHWLayout.test.cpp
new file mode 100644
index 000000000..5610df8f3
--- /dev/null
+++ b/compiler/angkor/src/ADT/feature/CHWLayout.test.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/feature/CHWLayout.h"
+
+#include <gtest/gtest.h>
+
+using namespace nncc::core::ADT::feature;
+
+TEST(ADT_FEATURE_CHW_LAYOUT, col_increase)
+{
+ const Shape shape{4, 3, 6};
+ const CHWLayout l;
+
+ ASSERT_EQ(l.offset(shape, 1, 2, 1) + 1, l.offset(shape, 1, 2, 2));
+}
+
+TEST(ADT_FEATURE_CHW_LAYOUT, row_increase)
+{
+ const Shape shape{4, 3, 6};
+ const CHWLayout l;
+
+ ASSERT_EQ(l.offset(shape, 1, 1, 1) + 6, l.offset(shape, 1, 2, 1));
+}
+
+TEST(ADT_FEATURE_CHW_LAYOUT, ch_increase)
+{
+ const Shape shape{4, 3, 6};
+ const CHWLayout l;
+
+ ASSERT_EQ(l.offset(shape, 1, 1, 1) + 6 * 3, l.offset(shape, 2, 1, 1));
+}
diff --git a/compiler/angkor/src/ADT/feature/HWCLayout.cpp b/compiler/angkor/src/ADT/feature/HWCLayout.cpp
new file mode 100644
index 000000000..016535625
--- /dev/null
+++ b/compiler/angkor/src/ADT/feature/HWCLayout.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/feature/HWCLayout.h"
+
+using nncc::core::ADT::feature::Shape;
+
+static uint32_t HWC_offset(const Shape &shape, uint32_t ch, uint32_t row, uint32_t col)
+{
+ return (row * shape.width() + col) * shape.depth() + ch;
+}
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace feature
+{
+
+HWCLayout::HWCLayout() : Layout{HWC_offset}
+{
+ // DO NOTHING
+}
+
+} // namespace feature
+} // namespace ADT
+} // namespace core
+} // namespace nncc
diff --git a/compiler/angkor/src/ADT/feature/HWCLayout.test.cpp b/compiler/angkor/src/ADT/feature/HWCLayout.test.cpp
new file mode 100644
index 000000000..d1f359753
--- /dev/null
+++ b/compiler/angkor/src/ADT/feature/HWCLayout.test.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/feature/HWCLayout.h"
+
+#include <gtest/gtest.h>
+
+using namespace nncc::core::ADT::feature;
+
+TEST(ADT_FEATURE_HWC_LAYOUT, C_increase)
+{
+ const uint32_t C = 4;
+ const uint32_t H = 3;
+ const uint32_t W = 6;
+
+ const Shape shape{C, H, W};
+ const HWCLayout l;
+
+ ASSERT_EQ(l.offset(shape, 1, 1, 1) + 1, l.offset(shape, 2, 1, 1));
+}
+
+TEST(ADT_FEATURE_HWC_LAYOUT, W_increase)
+{
+ const uint32_t C = 4;
+ const uint32_t H = 3;
+ const uint32_t W = 6;
+
+ const Shape shape{C, H, W};
+ const HWCLayout l;
+
+ ASSERT_EQ(l.offset(shape, 1, 2, 1) + C, l.offset(shape, 1, 2, 2));
+}
+
+TEST(ADT_FEATURE_HWC_LAYOUT, H_increase)
+{
+ const uint32_t C = 4;
+ const uint32_t H = 3;
+ const uint32_t W = 6;
+
+ const Shape shape{C, H, W};
+ const HWCLayout l;
+
+ ASSERT_EQ(l.offset(shape, 1, 1, 1) + W * C, l.offset(shape, 1, 2, 1));
+}
diff --git a/compiler/angkor/src/ADT/feature/Layout.cpp b/compiler/angkor/src/ADT/feature/Layout.cpp
new file mode 100644
index 000000000..49ab7cbf9
--- /dev/null
+++ b/compiler/angkor/src/ADT/feature/Layout.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/feature/Layout.h"
+
+#include <cassert>
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace feature
+{
+
+Layout::Layout(const Func &func) : _func{func} { assert(_func != nullptr); }
+
+} // namespace feature
+} // namespace ADT
+} // namespace core
+} // namespace nncc
diff --git a/compiler/angkor/src/ADT/feature/Layout.test.cpp b/compiler/angkor/src/ADT/feature/Layout.test.cpp
new file mode 100644
index 000000000..023594e16
--- /dev/null
+++ b/compiler/angkor/src/ADT/feature/Layout.test.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/feature/Layout.h"
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::feature::Shape;
+using nncc::core::ADT::feature::Layout;
+
+static uint32_t offset_0(const Shape &, uint32_t, uint32_t, uint32_t) { return 0; }
+static uint32_t offset_1(const Shape &, uint32_t, uint32_t, uint32_t) { return 1; }
+
+TEST(ADT_FEATURE_LAYOUT, ctor)
+{
+ Layout l{offset_0};
+
+ ASSERT_EQ(l.offset(Shape{4, 3, 6}, 1, 1, 1), 0);
+}
+
+TEST(ADT_FEATURE_LAYOUT, copy)
+{
+ Layout orig{offset_0};
+ Layout copy{offset_1};
+
+ ASSERT_EQ(copy.offset(Shape{4, 3, 6}, 1, 1, 1), 1);
+
+ copy = orig;
+
+ ASSERT_EQ(copy.offset(Shape{4, 3, 6}, 1, 1, 1), 0);
+}
+
+TEST(ADT_FEATURE_LAYOUT, move)
+{
+ Layout orig{offset_0};
+ Layout move{offset_1};
+
+ ASSERT_EQ(move.offset(Shape{4, 3, 6}, 1, 1, 1), 1);
+
+ move = std::move(orig);
+
+ ASSERT_EQ(move.offset(Shape{4, 3, 6}, 1, 1, 1), 0);
+}
diff --git a/compiler/angkor/src/ADT/feature/Overlay.test.cpp b/compiler/angkor/src/ADT/feature/Overlay.test.cpp
new file mode 100644
index 000000000..c8e2943f8
--- /dev/null
+++ b/compiler/angkor/src/ADT/feature/Overlay.test.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/feature/Overlay.h"
+#include "nncc/core/ADT/feature/CHWLayout.h"
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::feature::Shape;
+using nncc::core::ADT::feature::CHWLayout;
+using nncc::core::ADT::feature::Overlay;
+
+using nncc::core::ADT::feature::make_overlay;
+
+TEST(ADT_FEATURE_OVERLAY, ctor)
+{
+ const Shape shape{4, 6, 3};
+
+ int data[4 * 6 * 3] = {
+ 0,
+ };
+ auto overlay = make_overlay<int, CHWLayout>(shape, data);
+
+ ASSERT_EQ(overlay.shape().depth(), shape.depth());
+ ASSERT_EQ(overlay.shape().height(), shape.height());
+ ASSERT_EQ(overlay.shape().width(), shape.width());
+}
+
+TEST(ADT_FEATURE_OVERLAY, read)
+{
+ const Shape shape{4, 6, 3};
+
+ int data[4 * 6 * 3] = {
+ 0,
+ };
+ const auto overlay = make_overlay<int, CHWLayout>(shape, data);
+
+ CHWLayout layout{};
+
+ ASSERT_EQ(data[layout.offset(shape, 3, 5, 2)], 0);
+ data[layout.offset(shape, 3, 5, 2)] = 2;
+ ASSERT_EQ(overlay.at(3, 5, 2), 2);
+}
+
+TEST(ADT_FEATURE_OVERLAY, access)
+{
+ const Shape shape{4, 6, 3};
+
+ int data[4 * 6 * 3] = {
+ 0,
+ };
+ auto overlay = make_overlay<int, CHWLayout>(shape, data);
+
+ CHWLayout layout{};
+
+ ASSERT_EQ(data[layout.offset(shape, 3, 5, 2)], 0);
+ overlay.at(3, 5, 2) = 4;
+ ASSERT_EQ(data[layout.offset(shape, 3, 5, 2)], 4);
+}
diff --git a/compiler/angkor/src/ADT/feature/Reader.cpp b/compiler/angkor/src/ADT/feature/Reader.cpp
new file mode 100644
index 000000000..5f1c0d22b
--- /dev/null
+++ b/compiler/angkor/src/ADT/feature/Reader.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/feature/Reader.h"
+
+// DO NOT REMOVE THIS FILE
+//
+// This file is introduced to check the self-completeness of 'Reader.h'
diff --git a/compiler/angkor/src/ADT/feature/Shape.test.cpp b/compiler/angkor/src/ADT/feature/Shape.test.cpp
new file mode 100644
index 000000000..9216182f0
--- /dev/null
+++ b/compiler/angkor/src/ADT/feature/Shape.test.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <nncc/core/ADT/feature/Shape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ADT_FEATURE_SHAPE, ctor)
+{
+ const uint32_t C = 3;
+ const uint32_t H = 4;
+ const uint32_t W = 5;
+
+ nncc::core::ADT::feature::Shape shape{C, H, W};
+
+ ASSERT_EQ(shape.depth(), C);
+ ASSERT_EQ(shape.height(), H);
+ ASSERT_EQ(shape.width(), W);
+}
+
+TEST(ADT_FEATURE_SHAPE, num_elements)
+{
+ const uint32_t C = 3;
+ const uint32_t H = 4;
+ const uint32_t W = 5;
+
+ using nncc::core::ADT::feature::Shape;
+ using nncc::core::ADT::feature::num_elements;
+
+ ASSERT_EQ(num_elements(Shape{C, H, W}), C * H * W);
+}
+
+TEST(ADT_FEATURE_SHAPE, operator_eq)
+{
+ using nncc::core::ADT::feature::Shape;
+
+ // NOTE We use ASSERT_TRUE/ASSERT_FALSE instead of ASSERT_EQ/ASSERT_NE as it is impossible to
+ // introduce negative tests with ASSERT_NE (it uses operator!= instead of operator==).
+ ASSERT_TRUE(Shape(1, 1, 1) == Shape(1, 1, 1));
+ ASSERT_FALSE(Shape(1, 1, 1) == Shape(2, 1, 1));
+ ASSERT_FALSE(Shape(1, 1, 1) == Shape(1, 2, 1));
+ ASSERT_FALSE(Shape(1, 1, 1) == Shape(1, 1, 2));
+}
diff --git a/compiler/angkor/src/ADT/kernel/Buffer.test.cpp b/compiler/angkor/src/ADT/kernel/Buffer.test.cpp
new file mode 100644
index 000000000..da344593e
--- /dev/null
+++ b/compiler/angkor/src/ADT/kernel/Buffer.test.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/kernel/Buffer.h"
+#include "nncc/core/ADT/kernel/NCHWLayout.h"
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::kernel::Shape;
+using nncc::core::ADT::kernel::NCHWLayout;
+using nncc::core::ADT::kernel::Buffer;
+
+using nncc::core::ADT::kernel::make_buffer;
+
+TEST(ADT_KERNEL_BUFFER, ctor)
+{
+ const Shape shape{2, 4, 6, 3};
+ auto buffer = make_buffer<int, NCHWLayout>(shape);
+
+ ASSERT_EQ(buffer.shape().count(), shape.count());
+ ASSERT_EQ(buffer.shape().depth(), shape.depth());
+ ASSERT_EQ(buffer.shape().height(), shape.height());
+ ASSERT_EQ(buffer.shape().width(), shape.width());
+}
+
+TEST(ADT_KERNEL_BUFFER, access)
+{
+ const Shape shape{2, 4, 6, 3};
+ auto buffer = make_buffer<int, NCHWLayout>(shape);
+
+ ASSERT_EQ(buffer.at(1, 3, 5, 2), 0);
+ buffer.at(1, 3, 5, 2) = 4;
+
+ // Casting is introduced to use 'const T &at(...) const' method
+ ASSERT_EQ(static_cast<const Buffer<int> &>(buffer).at(1, 3, 5, 2), 4);
+}
diff --git a/compiler/angkor/src/ADT/kernel/IndexEnumerator.cpp b/compiler/angkor/src/ADT/kernel/IndexEnumerator.cpp
new file mode 100644
index 000000000..0b1db090d
--- /dev/null
+++ b/compiler/angkor/src/ADT/kernel/IndexEnumerator.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/kernel/IndexEnumerator.h"
+
+#include <cassert>
+#include <algorithm>
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace kernel
+{
+
+IndexEnumerator::IndexEnumerator(const Shape &shape) : _cursor(0)
+{
+ _max[0] = shape.width();
+ _max[1] = shape.height();
+ _max[2] = shape.depth();
+ _max[3] = shape.count();
+
+ std::fill(_cur, _cur + 4, 0);
+
+ // NOTE Null dimension should NOT exist
+ assert(std::find(_max, _max + 4, 0) == (_max + 4));
+}
+
+bool IndexEnumerator::valid(void) const { return _cursor < 4; }
+
+uint32_t IndexEnumerator::count(void) const { return _cur[3]; }
+uint32_t IndexEnumerator::depth(void) const { return _cur[2]; }
+uint32_t IndexEnumerator::height(void) const { return _cur[1]; }
+uint32_t IndexEnumerator::width(void) const { return _cur[0]; }
+
+void IndexEnumerator::advance(void)
+{
+ while (_cursor < 4)
+ {
+ if (_cur[_cursor] + 1 < _max[_cursor])
+ {
+ break;
+ }
+
+ ++_cursor;
+ }
+
+ if (_cursor == 4)
+ {
+ return;
+ }
+
+ // Increment index
+ _cur[_cursor] += 1;
+
+ // Reset indices for lower dimensions
+ for (uint32_t head = 0; head < _cursor; ++head)
+ {
+ _cur[head] = 0;
+ }
+
+ // Reset cursor
+ _cursor = 0;
+}
+
+} // namespace kernel
+} // namespace ADT
+} // namespace core
+} // namespace nncc
diff --git a/compiler/angkor/src/ADT/kernel/IndexEnumerator.test.cpp b/compiler/angkor/src/ADT/kernel/IndexEnumerator.test.cpp
new file mode 100644
index 000000000..21ba19209
--- /dev/null
+++ b/compiler/angkor/src/ADT/kernel/IndexEnumerator.test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/kernel/IndexEnumerator.h"
+
+#include <vector>
+#include <algorithm>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::kernel::Shape;
+using nncc::core::ADT::kernel::IndexEnumerator;
+
+TEST(ADT_KERNEL_INDEX_ENUMERATOR, iterate_full_range)
+{
+ const uint32_t N = 2;
+ const uint32_t C = 3;
+ const uint32_t H = 4;
+ const uint32_t W = 5;
+
+ const Shape shape{N, C, H, W};
+
+ std::vector<uint32_t> count;
+ count.resize(N * C * H * W, 0);
+
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ const uint32_t offset = ((e.count() * C + e.depth()) * H + e.height()) * W + e.width();
+ count.at(offset) += 1;
+ }
+
+ ASSERT_TRUE(std::all_of(count.begin(), count.end(), [](uint32_t n) { return n == 1; }));
+}
diff --git a/compiler/angkor/src/ADT/kernel/Layout.cpp b/compiler/angkor/src/ADT/kernel/Layout.cpp
new file mode 100644
index 000000000..acadd2448
--- /dev/null
+++ b/compiler/angkor/src/ADT/kernel/Layout.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/kernel/Layout.h"
+
+#include <cassert>
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace kernel
+{
+
+Layout::Layout(const Func &func) : _func{func}
+{
+ // DO NOTHING
+}
+
+} // namespace kernel
+} // namespace ADT
+} // namespace core
+} // namespace nncc
diff --git a/compiler/angkor/src/ADT/kernel/Layout.test.cpp b/compiler/angkor/src/ADT/kernel/Layout.test.cpp
new file mode 100644
index 000000000..94885cd4e
--- /dev/null
+++ b/compiler/angkor/src/ADT/kernel/Layout.test.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/kernel/Layout.h"
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::kernel::Shape;
+using nncc::core::ADT::kernel::Layout;
+
+static uint32_t offset_0(const Shape &, uint32_t, uint32_t, uint32_t, uint32_t) { return 0; }
+static uint32_t offset_1(const Shape &, uint32_t, uint32_t, uint32_t, uint32_t) { return 1; }
+
+TEST(ADT_KERNEL_LAYOUT, ctor)
+{
+ Layout l{offset_0};
+
+ ASSERT_EQ(l.offset(Shape{4, 3, 6, 5}, 1, 1, 1, 1), 0);
+}
+
+TEST(ADT_KERNEL_LAYOUT, copy)
+{
+ Layout orig{offset_0};
+ Layout copy{offset_1};
+
+ ASSERT_EQ(copy.offset(Shape{4, 3, 6, 5}, 1, 1, 1, 1), 1);
+
+ copy = orig;
+
+ ASSERT_EQ(copy.offset(Shape{4, 3, 6, 5}, 1, 1, 1, 1), 0);
+}
+
+TEST(ADT_KERNEL_LAYOUT, move)
+{
+ Layout orig{offset_0};
+ Layout move{offset_1};
+
+ ASSERT_EQ(move.offset(Shape{4, 3, 6, 5}, 1, 1, 1, 1), 1);
+
+ move = std::move(orig);
+
+ ASSERT_EQ(move.offset(Shape{4, 3, 6, 5}, 1, 1, 1, 1), 0);
+}
diff --git a/compiler/angkor/src/ADT/kernel/NCHWLayout.cpp b/compiler/angkor/src/ADT/kernel/NCHWLayout.cpp
new file mode 100644
index 000000000..be7551182
--- /dev/null
+++ b/compiler/angkor/src/ADT/kernel/NCHWLayout.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/kernel/NCHWLayout.h"
+
+using nncc::core::ADT::kernel::Shape;
+
+static uint32_t NCHW_offset(const Shape &shape, uint32_t n, uint32_t ch, uint32_t row, uint32_t col)
+{
+ return (((n * shape.depth() + ch) * shape.height() + row) * shape.width() + col);
+}
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace kernel
+{
+
+NCHWLayout::NCHWLayout() : Layout{NCHW_offset}
+{
+ // DO NOTHING
+}
+
+} // namespace kernel
+} // namespace ADT
+} // namespace core
+} // namespace nncc
diff --git a/compiler/angkor/src/ADT/kernel/NCHWLayout.test.cpp b/compiler/angkor/src/ADT/kernel/NCHWLayout.test.cpp
new file mode 100644
index 000000000..ba03b7b04
--- /dev/null
+++ b/compiler/angkor/src/ADT/kernel/NCHWLayout.test.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/kernel/NCHWLayout.h"
+
+#include <gtest/gtest.h>
+
+using namespace nncc::core::ADT::kernel;
+
+TEST(ADT_KERNEL_KERNEL_NCHW_LAYOUT, col_increment)
+{
+ const Shape shape{4, 3, 6, 5};
+ const NCHWLayout l;
+
+ ASSERT_EQ(l.offset(shape, 1, 1, 1, 1) + 1, l.offset(shape, 1, 1, 1, 2));
+}
+
+TEST(ADT_KERNEL_KERNEL_NCHW_LAYOUT, row_increment)
+{
+ const Shape shape{4, 3, 6, 5};
+ const NCHWLayout l;
+
+ ASSERT_EQ(l.offset(shape, 1, 1, 1, 1) + 5, l.offset(shape, 1, 1, 2, 1));
+}
+
+TEST(ADT_KERNEL_KERNEL_NCHW_LAYOUT, ch_increment)
+{
+ const Shape shape{4, 3, 6, 5};
+ const NCHWLayout l;
+
+ ASSERT_EQ(l.offset(shape, 1, 1, 1, 1) + 6 * 5, l.offset(shape, 1, 2, 1, 1));
+}
+
+TEST(ADT_KERNEL_KERNEL_NCHW_LAYOUT, n_increment)
+{
+ const Shape shape{4, 3, 6, 5};
+ const NCHWLayout l;
+
+ ASSERT_EQ(l.offset(shape, 1, 1, 1, 1) + 3 * 6 * 5, l.offset(shape, 2, 1, 1, 1));
+}
diff --git a/compiler/angkor/src/ADT/kernel/NHWCLayout.cpp b/compiler/angkor/src/ADT/kernel/NHWCLayout.cpp
new file mode 100644
index 000000000..8e0524425
--- /dev/null
+++ b/compiler/angkor/src/ADT/kernel/NHWCLayout.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/kernel/NHWCLayout.h"
+
+using nncc::core::ADT::kernel::Shape;
+
+static uint32_t NHWC_offset(const Shape &shape, uint32_t n, uint32_t ch, uint32_t row, uint32_t col)
+{
+ return ((n * shape.height() + row) * shape.width() + col) * shape.depth() + ch;
+}
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace kernel
+{
+
+NHWCLayout::NHWCLayout() : Layout{NHWC_offset}
+{
+ // DO NOTHING
+}
+
+} // namespace kernel
+} // namespace ADT
+} // namespace core
+} // namespace nncc
diff --git a/compiler/angkor/src/ADT/kernel/NHWCLayout.test.cpp b/compiler/angkor/src/ADT/kernel/NHWCLayout.test.cpp
new file mode 100644
index 000000000..2c5df7d89
--- /dev/null
+++ b/compiler/angkor/src/ADT/kernel/NHWCLayout.test.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/kernel/NHWCLayout.h"
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::kernel::Shape;
+using nncc::core::ADT::kernel::NHWCLayout;
+
+TEST(ADT_KERNEL_KERNEL_NHWC_LAYOUT, ch_increment)
+{
+ const uint32_t N = 4;
+ const uint32_t C = 3;
+ const uint32_t H = 6;
+ const uint32_t W = 5;
+
+ const Shape shape{N, C, H, W};
+ const NHWCLayout l;
+
+ ASSERT_EQ(l.offset(shape, 1, 1, 1, 1) + 1, l.offset(shape, 1, 2, 1, 1));
+}
+
+TEST(ADT_KERNEL_KERNEL_NHWC_LAYOUT, col_increment)
+{
+ const uint32_t N = 4;
+ const uint32_t C = 3;
+ const uint32_t H = 6;
+ const uint32_t W = 5;
+
+ const Shape shape{N, C, H, W};
+ const NHWCLayout l;
+
+ ASSERT_EQ(l.offset(shape, 1, 1, 1, 1) + C, l.offset(shape, 1, 1, 1, 2));
+}
+
+TEST(ADT_KERNEL_KERNEL_NHWC_LAYOUT, row_increment)
+{
+ const uint32_t N = 4;
+ const uint32_t C = 3;
+ const uint32_t H = 6;
+ const uint32_t W = 5;
+
+ const Shape shape{N, C, H, W};
+ const NHWCLayout l;
+
+ ASSERT_EQ(l.offset(shape, 1, 1, 1, 1) + C * W, l.offset(shape, 1, 1, 2, 1));
+}
+
+TEST(ADT_KERNEL_KERNEL_NHWC_LAYOUT, n_increment)
+{
+ const uint32_t N = 4;
+ const uint32_t C = 3;
+ const uint32_t H = 6;
+ const uint32_t W = 5;
+
+ const Shape shape{N, C, H, W};
+ const NHWCLayout l;
+
+ ASSERT_EQ(l.offset(shape, 1, 1, 1, 1) + H * W * C, l.offset(shape, 2, 1, 1, 1));
+}
diff --git a/compiler/angkor/src/ADT/kernel/Overlay.test.cpp b/compiler/angkor/src/ADT/kernel/Overlay.test.cpp
new file mode 100644
index 000000000..e80ebbc30
--- /dev/null
+++ b/compiler/angkor/src/ADT/kernel/Overlay.test.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/kernel/Overlay.h"
+#include "nncc/core/ADT/kernel/NCHWLayout.h"
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::kernel::Shape;
+using nncc::core::ADT::kernel::NCHWLayout;
+using nncc::core::ADT::kernel::Overlay;
+
+using nncc::core::ADT::kernel::make_overlay;
+
+TEST(ADT_KERNEL_OVERLAY, ctor)
+{
+ const Shape shape{2, 4, 6, 3};
+
+ int data[2 * 4 * 6 * 3] = {
+ 0,
+ };
+ auto overlay = make_overlay<int, NCHWLayout>(shape, data);
+
+ ASSERT_EQ(overlay.shape().count(), shape.count());
+ ASSERT_EQ(overlay.shape().depth(), shape.depth());
+ ASSERT_EQ(overlay.shape().height(), shape.height());
+ ASSERT_EQ(overlay.shape().width(), shape.width());
+}
+
+TEST(ADT_KERNEL_OVERLAY, read)
+{
+ const Shape shape{2, 4, 6, 3};
+
+ int data[2 * 4 * 6 * 3] = {
+ 0,
+ };
+ const auto overlay = make_overlay<int, NCHWLayout>(shape, data);
+
+ NCHWLayout layout{};
+
+ ASSERT_EQ(data[layout.offset(shape, 1, 3, 5, 2)], 0);
+ data[layout.offset(shape, 1, 3, 5, 2)] = 2;
+ ASSERT_EQ(overlay.at(1, 3, 5, 2), 2);
+}
+
+TEST(ADT_KERNEL_OVERLAY, access)
+{
+ const Shape shape{2, 4, 6, 3};
+
+ int data[2 * 4 * 6 * 3] = {
+ 0,
+ };
+ auto overlay = make_overlay<int, NCHWLayout>(shape, data);
+
+ NCHWLayout layout{};
+
+ ASSERT_EQ(data[layout.offset(shape, 1, 3, 5, 2)], 0);
+ overlay.at(1, 3, 5, 2) = 4;
+ ASSERT_EQ(data[layout.offset(shape, 1, 3, 5, 2)], 4);
+}
diff --git a/compiler/angkor/src/ADT/kernel/Reader.cpp b/compiler/angkor/src/ADT/kernel/Reader.cpp
new file mode 100644
index 000000000..9e34167c8
--- /dev/null
+++ b/compiler/angkor/src/ADT/kernel/Reader.cpp
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/kernel/Reader.h"
+
+// DO NOT REMOVE THIS FILE
+// This file is introduced to test the self-completeness of 'Reader.h'
diff --git a/compiler/angkor/src/ADT/kernel/Shape.cpp b/compiler/angkor/src/ADT/kernel/Shape.cpp
new file mode 100644
index 000000000..8ad1edb67
--- /dev/null
+++ b/compiler/angkor/src/ADT/kernel/Shape.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/kernel/Shape.h"
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace kernel
+{
+
+bool operator==(const Shape &l, const Shape &r)
+{
+ return (l.count() == r.count()) && (l.depth() == r.depth()) && (l.height() == r.height()) &&
+ (l.width() == r.width());
+}
+
+} // namespace kernel
+} // namespace ADT
+} // namespace core
+} // namespace nncc
diff --git a/compiler/angkor/src/ADT/kernel/Shape.test.cpp b/compiler/angkor/src/ADT/kernel/Shape.test.cpp
new file mode 100644
index 000000000..da608fb7f
--- /dev/null
+++ b/compiler/angkor/src/ADT/kernel/Shape.test.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <nncc/core/ADT/kernel/Shape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ADT_KERNEL_SHAPE, ctor)
+{
+ const uint32_t N = 1;
+ const uint32_t C = 3;
+ const uint32_t H = 4;
+ const uint32_t W = 5;
+
+ nncc::core::ADT::kernel::Shape shape{N, C, H, W};
+
+ ASSERT_EQ(shape.count(), N);
+ ASSERT_EQ(shape.depth(), C);
+ ASSERT_EQ(shape.height(), H);
+ ASSERT_EQ(shape.width(), W);
+}
+
+TEST(ADT_KERNEL_SHAPE, num_elements)
+{
+ const uint32_t N = 1;
+ const uint32_t C = 3;
+ const uint32_t H = 4;
+ const uint32_t W = 5;
+
+ using nncc::core::ADT::kernel::Shape;
+ using nncc::core::ADT::kernel::num_elements;
+
+ ASSERT_EQ(num_elements(Shape{N, C, H, W}), N * C * H * W);
+}
+
+TEST(ADT_KERNEL_SHAPE, operator_eq)
+{
+ using nncc::core::ADT::kernel::Shape;
+
+ EXPECT_TRUE(Shape(1, 1, 1, 1) == Shape(1, 1, 1, 1));
+ EXPECT_FALSE(Shape(1, 1, 1, 1) == Shape(1, 1, 1, 2));
+ EXPECT_FALSE(Shape(1, 1, 1, 1) == Shape(1, 1, 2, 1));
+ EXPECT_FALSE(Shape(1, 1, 1, 1) == Shape(1, 2, 1, 1));
+ EXPECT_FALSE(Shape(1, 1, 1, 1) == Shape(2, 1, 1, 1));
+}
diff --git a/compiler/angkor/src/ADT/tensor/Buffer.test.cpp b/compiler/angkor/src/ADT/tensor/Buffer.test.cpp
new file mode 100644
index 000000000..c2b6a9983
--- /dev/null
+++ b/compiler/angkor/src/ADT/tensor/Buffer.test.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/tensor/Buffer.h"
+#include "nncc/core/ADT/tensor/LexicalLayout.h"
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::Buffer;
+
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(ADT_TENSOR_BUFFER, ctor)
+{
+ const Shape shape{2, 3};
+ auto buffer = make_buffer<int, LexicalLayout>(shape);
+
+ ASSERT_EQ(buffer.shape(), shape);
+}
+
+TEST(ADT_TENSOR_BUFFER, access)
+{
+ const Shape shape{2, 3};
+ auto buffer = make_buffer<int, LexicalLayout>(shape);
+
+ const Index index{1, 2};
+
+ ASSERT_EQ(buffer.at(index), 0);
+ buffer.at(index) = 4;
+
+ // Casting is introduced to use 'const T &at(...) const' method
+ ASSERT_EQ(static_cast<const Buffer<int> &>(buffer).at(index), 4);
+}
diff --git a/compiler/angkor/src/ADT/tensor/Index.cpp b/compiler/angkor/src/ADT/tensor/Index.cpp
new file mode 100644
index 000000000..61f0a7106
--- /dev/null
+++ b/compiler/angkor/src/ADT/tensor/Index.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/tensor/Index.h"
+
+#include <stdexcept>
+#include <algorithm>
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace tensor
+{
+
+Index::Index(std::initializer_list<uint32_t> &&l) : _indices{l}
+{
+ // DO NOTHING
+}
+
+uint32_t Index::rank(void) const { return _indices.size(); }
+Index &Index::resize(uint32_t size)
+{
+ _indices.resize(size);
+ return *this;
+}
+
+Index &Index::fill(uint32_t index)
+{
+ std::fill(_indices.begin(), _indices.end(), index);
+ return (*this);
+}
+
+uint32_t &Index::at(uint32_t axis) { return _indices.at(axis); }
+uint32_t Index::at(uint32_t axis) const { return _indices.at(axis); }
+
+Index operator+(const Index &lhs, const Index &rhs)
+{
+ if (lhs.rank() != rhs.rank())
+ throw std::runtime_error("Two tensors should have same rank");
+
+ Index ret;
+ ret.resize(lhs.rank());
+ for (uint32_t axis = 0; axis < lhs.rank(); axis++)
+ {
+ ret.at(axis) = lhs.at(axis) + rhs.at(axis);
+ }
+ return ret;
+}
+
+bool operator==(const Index &lhs, const Index &rhs)
+{
+ if (lhs.rank() != rhs.rank())
+ return false;
+ for (uint32_t axis = 0; axis < lhs.rank(); axis++)
+ {
+ if (lhs.at(axis) != rhs.at(axis))
+ return false;
+ }
+ return true;
+}
+
+} // namespace tensor
+} // namespace ADT
+} // namespace core
+} // namespace nncc
diff --git a/compiler/angkor/src/ADT/tensor/Index.test.cpp b/compiler/angkor/src/ADT/tensor/Index.test.cpp
new file mode 100644
index 000000000..230602816
--- /dev/null
+++ b/compiler/angkor/src/ADT/tensor/Index.test.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/tensor/Index.h"
+
+#include <gtest/gtest.h>
+
+TEST(ADT_TENSOR_INDEX, ctor)
+{
+ nncc::core::ADT::tensor::Index index;
+
+ ASSERT_EQ(index.rank(), 0);
+}
+
+TEST(ADT_TENSOR_INDEX, ctor_initializer_list)
+{
+ const nncc::core::ADT::tensor::Index index{1, 3, 5, 7};
+
+ ASSERT_EQ(index.rank(), 4);
+
+ ASSERT_EQ(index.at(0), 1);
+ ASSERT_EQ(index.at(1), 3);
+ ASSERT_EQ(index.at(2), 5);
+ ASSERT_EQ(index.at(3), 7);
+}
+
+TEST(ADT_TENSOR_INDEX, operator_add)
+{
+ nncc::core::ADT::tensor::Index index1{1, 2, 3, 4};
+ nncc::core::ADT::tensor::Index index2{5, 6, 7, 8};
+ nncc::core::ADT::tensor::Index result{index1 + index2};
+
+ ASSERT_EQ(result.at(0), 6);
+ ASSERT_EQ(result.at(1), 8);
+ ASSERT_EQ(result.at(2), 10);
+ ASSERT_EQ(result.at(3), 12);
+}
+
+TEST(ADT_TENSOR_INDEX, operator_eqaul)
+{
+ nncc::core::ADT::tensor::Index index1{1, 2, 3, 4};
+ nncc::core::ADT::tensor::Index index2{1, 2, 3, 4};
+ nncc::core::ADT::tensor::Index index3{5, 6, 7, 8};
+ nncc::core::ADT::tensor::Index index4{1, 2};
+
+ ASSERT_TRUE(index1 == index2);
+ ASSERT_FALSE(index1 == index3);
+ ASSERT_FALSE(index1 == index4);
+}
+
+TEST(ADT_TENSOR_INDEX, operator_add_different_size)
+{
+ nncc::core::ADT::tensor::Index index1{1, 2, 3, 4};
+ nncc::core::ADT::tensor::Index index2{5, 6};
+
+ EXPECT_THROW(index1 + index2, std::runtime_error);
+}
+
+TEST(ADT_TENSOR_INDEX, resize)
+{
+ nncc::core::ADT::tensor::Index index;
+
+ index.resize(4);
+
+ ASSERT_EQ(index.rank(), 4);
+}
+
+TEST(ADT_TENSOR_INDEX, at)
+{
+ nncc::core::ADT::tensor::Index index;
+
+ index.resize(4);
+
+ uint32_t indices[4] = {3, 5, 2, 7};
+
+ for (uint32_t axis = 0; axis < 4; ++axis)
+ {
+ index.at(axis) = indices[axis];
+ ASSERT_EQ(index.at(axis), indices[axis]);
+ }
+}
+
+TEST(ADT_TENSOR_INDEX, copy)
+{
+ const nncc::core::ADT::tensor::Index original{3, 5, 2, 7};
+ const nncc::core::ADT::tensor::Index copied{original};
+
+ ASSERT_EQ(original.rank(), copied.rank());
+
+ for (uint32_t axis = 0; axis < 4; ++axis)
+ {
+ ASSERT_EQ(original.at(axis), copied.at(axis));
+ }
+}
+
+TEST(ADT_TENSOR_INDEX, fill)
+{
+ nncc::core::ADT::tensor::Index index{1, 6};
+
+ index.fill(3);
+
+ ASSERT_EQ(index.rank(), 2);
+
+ ASSERT_EQ(index.at(0), 3);
+ ASSERT_EQ(index.at(1), 3);
+}
diff --git a/compiler/angkor/src/ADT/tensor/IndexEnumerator.cpp b/compiler/angkor/src/ADT/tensor/IndexEnumerator.cpp
new file mode 100644
index 000000000..623313a2e
--- /dev/null
+++ b/compiler/angkor/src/ADT/tensor/IndexEnumerator.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/tensor/IndexEnumerator.h"
+
+#include <cassert>
+
+using nncc::core::ADT::tensor::Shape;
+
+inline uint32_t axis_of(const Shape &shape, uint32_t cursor)
+{
+ const uint32_t rank = shape.rank();
+ assert(cursor < rank);
+ return rank - cursor - 1;
+}
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace tensor
+{
+
+IndexEnumerator::IndexEnumerator(const Shape &shape) : _shape{shape}, _cursor(0)
+{
+ const uint32_t rank = _shape.rank();
+
+ // Initialize _index
+ _index.resize(rank);
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ _index.at(axis) = 0;
+ }
+
+ // Initialize _cursor
+ for (_cursor = 0; _cursor < rank; ++_cursor)
+ {
+ const auto axis = axis_of(_shape, _cursor);
+
+ if (_index.at(axis) < _shape.dim(axis))
+ {
+ break;
+ }
+ }
+}
+
+void IndexEnumerator::advance(void)
+{
+ const uint32_t rank = _shape.rank();
+
+ // Find axis to be updated
+ while (_cursor < rank)
+ {
+ const auto axis = axis_of(_shape, _cursor);
+
+ if ((_index.at(axis)) + 1 < _shape.dim(axis))
+ {
+ break;
+ }
+
+ ++_cursor;
+ }
+
+ if (_cursor == rank)
+ {
+ return;
+ }
+
+ // Update index
+ _index.at(axis_of(_shape, _cursor)) += 1;
+
+ for (uint32_t pos = 0; pos < _cursor; ++pos)
+ {
+ const auto axis = axis_of(_shape, pos);
+ _index.at(axis) = 0;
+ }
+
+ // Reset cursor
+ _cursor = 0;
+}
+
+} // namespace tensor
+} // namespace ADT
+} // namespace core
+} // namespace nncc
diff --git a/compiler/angkor/src/ADT/tensor/IndexEnumerator.test.cpp b/compiler/angkor/src/ADT/tensor/IndexEnumerator.test.cpp
new file mode 100644
index 000000000..204a8aa21
--- /dev/null
+++ b/compiler/angkor/src/ADT/tensor/IndexEnumerator.test.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/tensor/IndexEnumerator.h"
+
+#include <vector>
+#include <algorithm>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+TEST(ADT_TENSOR_INDEX_ENUMERATOR, iterate_full_range)
+{
+ const uint32_t H = 3;
+ const uint32_t W = 4;
+
+ const Shape shape{H, W};
+
+ std::vector<uint32_t> count;
+
+ count.resize(H * W, 0);
+
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+
+ ASSERT_EQ(ind.rank(), 2);
+ count.at(ind.at(0) * W + ind.at(1)) += 1;
+ }
+
+ ASSERT_TRUE(std::all_of(count.begin(), count.end(), [](uint32_t n) { return n == 1; }));
+}
diff --git a/compiler/angkor/src/ADT/tensor/Layout.cpp b/compiler/angkor/src/ADT/tensor/Layout.cpp
new file mode 100644
index 000000000..7faf7507d
--- /dev/null
+++ b/compiler/angkor/src/ADT/tensor/Layout.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/tensor/Layout.h"
+
+#include <cassert>
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace tensor
+{
+
+Layout::Layout(const Func &func) : _func{func} { assert(_func != nullptr); }
+
+} // namespace tensor
+} // namespace ADT
+} // namespace core
+} // namespace nncc
diff --git a/compiler/angkor/src/ADT/tensor/Layout.test.cpp b/compiler/angkor/src/ADT/tensor/Layout.test.cpp
new file mode 100644
index 000000000..145adfecc
--- /dev/null
+++ b/compiler/angkor/src/ADT/tensor/Layout.test.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/tensor/Layout.h"
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+
+static uint32_t offset_0(const Shape &, const Index &) { return 0; }
+static uint32_t offset_1(const Shape &, const Index &) { return 1; }
+
+TEST(ADT_TENSOR_LAYOUT, ctor)
+{
+ nncc::core::ADT::tensor::Layout l{offset_0};
+
+ ASSERT_EQ(l.offset(Shape{4, 3, 6}, Index{1, 1, 1}), 0);
+}
+
+TEST(ADT_TENSOR_LAYOUT, copy)
+{
+ nncc::core::ADT::tensor::Layout orig{offset_0};
+ nncc::core::ADT::tensor::Layout copy{offset_1};
+
+ ASSERT_EQ(copy.offset(Shape{4, 3, 6}, Index{1, 1, 1}), 1);
+
+ copy = orig;
+
+ ASSERT_EQ(copy.offset(Shape{4, 3, 6}, Index{1, 1, 1}), 0);
+}
+
+TEST(ADT_TENSOR_LAYOUT, move)
+{
+ nncc::core::ADT::tensor::Layout orig{offset_0};
+ nncc::core::ADT::tensor::Layout move{offset_1};
+
+ ASSERT_EQ(move.offset(Shape{4, 3, 6}, Index{1, 1, 1}), 1);
+
+ move = std::move(orig);
+
+ ASSERT_EQ(move.offset(Shape{4, 3, 6}, Index{1, 1, 1}), 0);
+}
diff --git a/compiler/angkor/src/ADT/tensor/LexicalLayout.cpp b/compiler/angkor/src/ADT/tensor/LexicalLayout.cpp
new file mode 100644
index 000000000..671c60cec
--- /dev/null
+++ b/compiler/angkor/src/ADT/tensor/LexicalLayout.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/tensor/LexicalLayout.h"
+
+#include <cassert>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+
+// NOTE This forward declaration is introduced to minimize code diff
+static uint32_t lexical_offset(const Shape &shape, const Index &index)
+{
+ assert(shape.rank() > 0);
+ assert(shape.rank() == index.rank());
+
+ const uint32_t rank = shape.rank();
+
+ uint32_t res = index.at(0);
+
+ for (uint32_t axis = 1; axis < rank; ++axis)
+ {
+ res *= shape.dim(axis);
+ res += index.at(axis);
+ }
+
+ return res;
+}
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace tensor
+{
+
+LexicalLayout::LexicalLayout() : Layout(lexical_offset)
+{
+ // DO NOTHING
+}
+
+} // namespace tensor
+} // namespace ADT
+} // namespace core
+} // namespace nncc
diff --git a/compiler/angkor/src/ADT/tensor/LexicalLayout.test.cpp b/compiler/angkor/src/ADT/tensor/LexicalLayout.test.cpp
new file mode 100644
index 000000000..8f9b7296f
--- /dev/null
+++ b/compiler/angkor/src/ADT/tensor/LexicalLayout.test.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/tensor/LexicalLayout.h"
+
+#include <type_traits>
+
+#include <gtest/gtest.h>
+
+TEST(ADT_TENSOR_LEXICAL_LAYOUT, last)
+{
+ const nncc::core::ADT::tensor::Shape shape{4, 3, 6};
+ const nncc::core::ADT::tensor::Index curr{1, 1, 1};
+ const nncc::core::ADT::tensor::Index next{1, 1, 2};
+
+ const nncc::core::ADT::tensor::LexicalLayout l;
+
+ ASSERT_EQ(l.offset(shape, curr) + 1, l.offset(shape, next));
+}
+
+TEST(ADT_TENSOR_LEXICAL_LAYOUT, lexical_middle)
+{
+ const nncc::core::ADT::tensor::Shape shape{4, 3, 6};
+ const nncc::core::ADT::tensor::Index curr{1, 1, 1};
+ const nncc::core::ADT::tensor::Index next{1, 2, 1};
+
+ const nncc::core::ADT::tensor::LexicalLayout l;
+
+ ASSERT_EQ(l.offset(shape, curr) + 6, l.offset(shape, next));
+}
+
+TEST(ADT_TENSOR_LEXICAL_LAYOUT, lexical_first)
+{
+ const nncc::core::ADT::tensor::Shape shape{4, 3, 6};
+ const nncc::core::ADT::tensor::Index curr{1, 1, 1};
+ const nncc::core::ADT::tensor::Index next{2, 1, 1};
+
+ const nncc::core::ADT::tensor::LexicalLayout l;
+
+ ASSERT_EQ(l.offset(shape, curr) + 6 * 3, l.offset(shape, next));
+}
diff --git a/compiler/angkor/src/ADT/tensor/Overlay.test.cpp b/compiler/angkor/src/ADT/tensor/Overlay.test.cpp
new file mode 100644
index 000000000..aacb5a9a1
--- /dev/null
+++ b/compiler/angkor/src/ADT/tensor/Overlay.test.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/tensor/Overlay.h"
+#include "nncc/core/ADT/tensor/LexicalLayout.h"
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::Overlay;
+
+using nncc::core::ADT::tensor::make_overlay;
+
+TEST(ADT_TENSOR_OVERLAY, ctor)
+{
+ const Shape shape{2, 3};
+
+ int data[2 * 3] = {
+ 0,
+ };
+ auto view = make_overlay<int, LexicalLayout>(shape, data);
+
+ ASSERT_EQ(view.shape(), shape);
+}
+
+TEST(ADT_TENSOR_OVERLAY, read)
+{
+ const Shape shape{2, 3};
+
+ int data[2 * 3] = {
+ 0,
+ };
+ const auto view = make_overlay<int, LexicalLayout>(shape, data);
+
+ LexicalLayout layout{};
+
+ const Index index{1, 2};
+
+ ASSERT_EQ(data[layout.offset(shape, index)], 0);
+ data[layout.offset(shape, index)] = 2;
+ ASSERT_EQ(view.at(index), 2);
+}
+
+TEST(ADT_TENSOR_OVERLAY, access)
+{
+ const Shape shape{2, 3};
+
+ int data[2 * 3] = {
+ 0,
+ };
+ auto view = make_overlay<int, LexicalLayout>(shape, data);
+
+ LexicalLayout layout{};
+
+ const Index index{1, 2};
+
+ ASSERT_EQ(data[layout.offset(shape, index)], 0);
+ view.at(index) = 4;
+ ASSERT_EQ(data[layout.offset(shape, index)], 4);
+}
diff --git a/compiler/angkor/src/ADT/tensor/Reader.cpp b/compiler/angkor/src/ADT/tensor/Reader.cpp
new file mode 100644
index 000000000..d79e66dac
--- /dev/null
+++ b/compiler/angkor/src/ADT/tensor/Reader.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/tensor/Reader.h"
+
+// DO NOT REMOVE THIS FILE
+//
+// This file is introduced to check the self-completeness of 'Reader.h'
diff --git a/compiler/angkor/src/ADT/tensor/Shape.cpp b/compiler/angkor/src/ADT/tensor/Shape.cpp
new file mode 100644
index 000000000..fb39ba192
--- /dev/null
+++ b/compiler/angkor/src/ADT/tensor/Shape.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/tensor/Shape.h"
+
+#include <algorithm>
+
+namespace nncc
+{
+namespace core
+{
+namespace ADT
+{
+namespace tensor
+{
+
+Shape::Shape(std::initializer_list<uint32_t> &&l) : _dims{l}
+{
+ // DO NOTHING
+}
+
+uint32_t Shape::rank(void) const { return _dims.size(); }
+Shape &Shape::resize(uint32_t size)
+{
+ _dims.resize(size);
+ return *this;
+}
+
+uint32_t &Shape::dim(uint32_t axis) { return _dims.at(axis); }
+uint32_t Shape::dim(uint32_t axis) const { return _dims.at(axis); }
+
+Shape &Shape::squeeze(void)
+{
+ _dims.erase(std::remove(_dims.begin(), _dims.end(), 1), _dims.end());
+ return *this;
+}
+
+uint64_t num_elements(const Shape &shape)
+{
+ uint64_t res = 1;
+
+ for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+ {
+ res *= shape.dim(axis);
+ }
+
+ return res;
+}
+
+Shape squeeze(const Shape &shape)
+{
+ Shape res{shape};
+ res.squeeze();
+ return res;
+}
+
+bool operator==(const Shape &lhs, const Shape &rhs)
+{
+ if (lhs.rank() != rhs.rank())
+ {
+ return false;
+ }
+
+ for (uint32_t axis = 0; axis < lhs.rank(); ++axis)
+ {
+ if (lhs.dim(axis) != rhs.dim(axis))
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+} // namespace tensor
+} // namespace ADT
+} // namespace core
+} // namespace nncc
diff --git a/compiler/angkor/src/ADT/tensor/Shape.test.cpp b/compiler/angkor/src/ADT/tensor/Shape.test.cpp
new file mode 100644
index 000000000..711ae3d40
--- /dev/null
+++ b/compiler/angkor/src/ADT/tensor/Shape.test.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nncc/core/ADT/tensor/Shape.h"
+
+#include <gtest/gtest.h>
+
+TEST(ADT_TENSOR_SHAPE, ctor)
+{
+ nncc::core::ADT::tensor::Shape shape;
+
+ ASSERT_EQ(shape.rank(), 0);
+}
+
+TEST(ADT_TENSOR_SHAPE, ctor_initializer_list)
+{
+ nncc::core::ADT::tensor::Shape shape{1, 3, 5, 7};
+
+ ASSERT_EQ(shape.rank(), 4);
+
+ ASSERT_EQ(shape.dim(0), 1);
+ ASSERT_EQ(shape.dim(1), 3);
+ ASSERT_EQ(shape.dim(2), 5);
+ ASSERT_EQ(shape.dim(3), 7);
+}
+
+TEST(ADT_TENSOR_SHAPE, resize)
+{
+ nncc::core::ADT::tensor::Shape shape;
+
+ shape.resize(4);
+
+ ASSERT_EQ(shape.rank(), 4);
+}
+
+TEST(ADT_TENSOR_SHAPE, dim)
+{
+ nncc::core::ADT::tensor::Shape shape;
+
+ shape.resize(4);
+
+ uint32_t dims[4] = {3, 5, 2, 7};
+
+ for (uint32_t axis = 0; axis < 4; ++axis)
+ {
+ shape.dim(axis) = dims[axis];
+ ASSERT_EQ(shape.dim(axis), dims[axis]);
+ }
+}
+
+TEST(ADT_TENSOR_SHAPE, copy)
+{
+ const nncc::core::ADT::tensor::Shape original{3, 5, 2, 7};
+ const nncc::core::ADT::tensor::Shape copied{original};
+
+ ASSERT_EQ(original.rank(), copied.rank());
+
+ for (uint32_t axis = 0; axis < 4; ++axis)
+ {
+ ASSERT_EQ(original.dim(axis), copied.dim(axis));
+ }
+}
+
+TEST(ADT_TENSOR_SHAPE, num_elements_rank_0)
+{
+ using nncc::core::ADT::tensor::Shape;
+ using nncc::core::ADT::tensor::num_elements;
+
+ Shape rank_0_shape;
+
+ ASSERT_EQ(num_elements(rank_0_shape), 1);
+}
+
+TEST(ADT_TENSOR_SHAPE, num_elements_zero)
+{
+ using nncc::core::ADT::tensor::Shape;
+ using nncc::core::ADT::tensor::num_elements;
+
+ ASSERT_EQ(num_elements(Shape{0, 0, 0, 0}), 0);
+}
+
+TEST(ADT_TENSOR_SHAPE, num_elements_nonzero)
+{
+ using nncc::core::ADT::tensor::Shape;
+ using nncc::core::ADT::tensor::num_elements;
+
+ ASSERT_EQ(num_elements(Shape{2, 3}), 6);
+}
+
+TEST(ADT_TENSOR_SHAPE, num_elements_nulldim)
+{
+ using nncc::core::ADT::tensor::Shape;
+ using nncc::core::ADT::tensor::num_elements;
+
+ ASSERT_EQ(num_elements(Shape{2, 0, 3}), 0);
+}
+
+TEST(ADT_TENSOR_SHAPE, squeeze_neg)
+{
+ using nncc::core::ADT::tensor::Shape;
+ using nncc::core::ADT::tensor::squeeze;
+
+ auto squeezed = squeeze(Shape{3, 5, 2});
+
+ ASSERT_EQ(squeezed.rank(), 3);
+ ASSERT_EQ(squeezed.dim(0), 3);
+ ASSERT_EQ(squeezed.dim(1), 5);
+ ASSERT_EQ(squeezed.dim(2), 2);
+}
+
+TEST(ADT_TENSOR_SHAPE, squeeze_neg_0)
+{
+ using nncc::core::ADT::tensor::Shape;
+ using nncc::core::ADT::tensor::squeeze;
+
+ auto squeezed = squeeze(Shape{3, 0, 2});
+
+ ASSERT_EQ(squeezed.rank(), 3);
+ ASSERT_EQ(squeezed.dim(0), 3);
+ ASSERT_EQ(squeezed.dim(1), 0);
+ ASSERT_EQ(squeezed.dim(2), 2);
+}
+
+TEST(ADT_TENSOR_SHAPE, squeeze_pos)
+{
+ using nncc::core::ADT::tensor::Shape;
+ using nncc::core::ADT::tensor::squeeze;
+
+ auto squeezed = squeeze(Shape{3, 1, 2});
+
+ ASSERT_EQ(squeezed.rank(), 2);
+ ASSERT_EQ(squeezed.dim(0), 3);
+ ASSERT_EQ(squeezed.dim(1), 2);
+}
+
+TEST(ADT_TENSOR_SHAPE, squeeze_nested)
+{
+ using nncc::core::ADT::tensor::Shape;
+ using nncc::core::ADT::tensor::squeeze;
+
+ Shape shape{3, 1, 2};
+
+ shape.squeeze().squeeze();
+
+ ASSERT_EQ(shape.rank(), 2);
+ ASSERT_EQ(shape.dim(0), 3);
+ ASSERT_EQ(shape.dim(1), 2);
+}
+
+TEST(ADT_TENSOR_SHAPE, eq_negative_on_unmatched_rank)
+{
+ const nncc::core::ADT::tensor::Shape left{1, 1, 1};
+ const nncc::core::ADT::tensor::Shape right{1, 1, 1, 1};
+
+ ASSERT_FALSE(left == right);
+}
+
+TEST(ADT_TENSOR_SHAPE, eq_negative_on_unmatched_dim)
+{
+ const nncc::core::ADT::tensor::Shape left{2, 3};
+ const nncc::core::ADT::tensor::Shape right{2, 4};
+
+ ASSERT_FALSE(left == right);
+}
+
+TEST(ADT_TENSOR_SHAPE, eq_positive)
+{
+ const nncc::core::ADT::tensor::Shape left{2, 3};
+ const nncc::core::ADT::tensor::Shape right{2, 3};
+
+ ASSERT_TRUE(left == right);
+}
diff --git a/compiler/angkor/src/TensorIndex.test.cpp b/compiler/angkor/src/TensorIndex.test.cpp
new file mode 100644
index 000000000..68cf3917a
--- /dev/null
+++ b/compiler/angkor/src/TensorIndex.test.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "angkor/TensorIndex.h"
+
+#include <gtest/gtest.h>
+
+TEST(TensorIndexTest, ctor)
+{
+ angkor::TensorIndex index;
+
+ ASSERT_EQ(index.rank(), 0);
+}
+
+TEST(TensorIndexTest, ctor_initializer_list)
+{
+ const angkor::TensorIndex index{1, 3, 5, 7};
+
+ ASSERT_EQ(index.rank(), 4);
+
+ ASSERT_EQ(index.at(0), 1);
+ ASSERT_EQ(index.at(1), 3);
+ ASSERT_EQ(index.at(2), 5);
+ ASSERT_EQ(index.at(3), 7);
+}
+
+TEST(TensorIndexTest, resize)
+{
+ angkor::TensorIndex index;
+
+ index.resize(4);
+
+ ASSERT_EQ(index.rank(), 4);
+}
+
+TEST(TensorIndexTest, at)
+{
+ angkor::TensorIndex index;
+
+ index.resize(4);
+
+ uint32_t indices[4] = {3, 5, 2, 7};
+
+ for (uint32_t axis = 0; axis < 4; ++axis)
+ {
+ index.at(axis) = indices[axis];
+ ASSERT_EQ(index.at(axis), indices[axis]);
+ }
+}
+
+TEST(TensorIndexTest, copy)
+{
+ const angkor::TensorIndex original{3, 5, 2, 7};
+ const angkor::TensorIndex copied{original};
+
+ ASSERT_EQ(original.rank(), copied.rank());
+
+ for (uint32_t axis = 0; axis < 4; ++axis)
+ {
+ ASSERT_EQ(original.at(axis), copied.at(axis));
+ }
+}
+
+TEST(TensorIndexTest, fill)
+{
+ angkor::TensorIndex index{1, 6};
+
+ index.fill(3);
+
+ ASSERT_EQ(index.rank(), 2);
+
+ ASSERT_EQ(index.at(0), 3);
+ ASSERT_EQ(index.at(1), 3);
+}
diff --git a/compiler/angkor/src/TensorShape.test.cpp b/compiler/angkor/src/TensorShape.test.cpp
new file mode 100644
index 000000000..5e6766a96
--- /dev/null
+++ b/compiler/angkor/src/TensorShape.test.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "angkor/TensorShape.h"
+
+#include <gtest/gtest.h>
+
+TEST(TensorShapeTest, ctor)
+{
+ angkor::TensorShape shape;
+
+ ASSERT_EQ(shape.rank(), 0);
+}
+
+TEST(TensorShapeTest, ctor_initializer_list)
+{
+ angkor::TensorShape shape{1, 3, 5, 7};
+
+ ASSERT_EQ(shape.rank(), 4);
+
+ ASSERT_EQ(shape.dim(0), 1);
+ ASSERT_EQ(shape.dim(1), 3);
+ ASSERT_EQ(shape.dim(2), 5);
+ ASSERT_EQ(shape.dim(3), 7);
+}
+
+TEST(TensorShapeTest, resize)
+{
+ angkor::TensorShape shape;
+
+ shape.resize(4);
+
+ ASSERT_EQ(shape.rank(), 4);
+}
+
+TEST(TensorShapeTest, dim)
+{
+ angkor::TensorShape shape;
+
+ shape.resize(4);
+
+ uint32_t dims[4] = {3, 5, 2, 7};
+
+ for (uint32_t axis = 0; axis < 4; ++axis)
+ {
+ shape.dim(axis) = dims[axis];
+ ASSERT_EQ(shape.dim(axis), dims[axis]);
+ }
+}
+
+TEST(TensorShapeTest, copy)
+{
+ const angkor::TensorShape original{3, 5, 2, 7};
+ const angkor::TensorShape copied{original};
+
+ ASSERT_EQ(original.rank(), copied.rank());
+
+ for (uint32_t axis = 0; axis < 4; ++axis)
+ {
+ ASSERT_EQ(original.dim(axis), copied.dim(axis));
+ }
+}
+
+TEST(TensorShapeTest, eq_negative_on_unmatched_rank)
+{
+ const angkor::TensorShape left{1, 1, 1};
+ const angkor::TensorShape right{1, 1, 1, 1};
+
+ ASSERT_FALSE(left == right);
+}
+
+TEST(TensorShapeTest, eq_negative_on_unmatched_dim)
+{
+ const angkor::TensorShape left{2, 3};
+ const angkor::TensorShape right{2, 4};
+
+ ASSERT_FALSE(left == right);
+}
+
+TEST(TensorShapeTest, eq_positive)
+{
+ const angkor::TensorShape left{2, 3};
+ const angkor::TensorShape right{2, 3};
+
+ ASSERT_TRUE(left == right);
+}
diff --git a/compiler/ann-api/CMakeLists.txt b/compiler/ann-api/CMakeLists.txt
new file mode 100644
index 000000000..d2c45f9f0
--- /dev/null
+++ b/compiler/ann-api/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_library(ann_api INTERFACE)
+target_include_directories(ann_api INTERFACE include)
diff --git a/compiler/ann-api/include/.FORMATDENY b/compiler/ann-api/include/.FORMATDENY
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/ann-api/include/.FORMATDENY
diff --git a/compiler/ann-api/include/NeuralNetworks.h b/compiler/ann-api/include/NeuralNetworks.h
new file mode 100644
index 000000000..606156927
--- /dev/null
+++ b/compiler/ann-api/include/NeuralNetworks.h
@@ -0,0 +1,2075 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @addtogroup NeuralNetworks
+ * @{
+ */
+
+/**
+ * @file NeuralNetworks.h
+ */
+
+#ifndef ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_H
+#define ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_H
+
+/******************************************************************
+ *
+ * IMPORTANT NOTICE:
+ *
+ * This file is part of Android's set of stable system headers
+ * exposed by the Android NDK (Native Development Kit).
+ *
+ * Third-party source AND binary code relies on the definitions
+ * here to be FROZEN ON ALL UPCOMING PLATFORM RELEASES.
+ *
+ * - DO NOT MODIFY ENUMS (EXCEPT IF YOU ADD NEW 32-BIT VALUES)
+ * - DO NOT MODIFY CONSTANTS OR FUNCTIONAL MACROS
+ * - DO NOT CHANGE THE SIGNATURE OF FUNCTIONS IN ANY WAY
+ * - DO NOT CHANGE THE LAYOUT OR SIZE OF STRUCTURES
+ */
+
+#if __ANDROID_API__ >= __ANDROID_API_O_MR1__
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+
+/**
+ * Operand types.
+ *
+ * The type of operands that can be added to a model.
+ *
+ * Although we define many types, most operators accept just a few
+ * types. Most used are {@link ANEURALNETWORKS_TENSOR_FLOAT32},
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+ * and {@link ANEURALNETWORKS_INT32}.
+ */
+typedef enum {
+ /** The following entries are used to declare scalars. */
+
+ /** A 32 bit floating point scalar value. */
+ ANEURALNETWORKS_FLOAT32 = 0,
+ /** A signed 32 bit integer scalar value. */
+ ANEURALNETWORKS_INT32 = 1,
+ /** An unsigned 32 bit integer scalar value. */
+ ANEURALNETWORKS_UINT32 = 2,
+
+ /** The following entries are used to declare tensors. */
+
+ /** A tensor of 32 bit floating point values. */
+ ANEURALNETWORKS_TENSOR_FLOAT32 = 3,
+ /** A tensor of 32 bit integer values. */
+ ANEURALNETWORKS_TENSOR_INT32 = 4,
+ /** A tensor of 8 bit integers that represent real numbers.
+ *
+ * Attached to this tensor are two numbers that can be used to convert
+ * the 8 bit integer to the real value and vice versa. These two numbers are:
+ * - scale: a 32 bit non-negative floating point value.
+ * - zeroPoint: an 32 bit integer, in range [0, 255].
+ *
+ * The formula is:
+ * real_value = (integer_value - zeroPoint) * scale.
+ */
+ ANEURALNETWORKS_TENSOR_QUANT8_ASYMM = 5,
+} OperandCode;
+
+/**
+ * Operation types.
+ *
+ * The type of operations that can be added to a model.
+ */
+typedef enum {
+ /** Adds two tensors, element-wise.
+ *
+ * Takes two input tensors of identical type and compatible dimensions. The output
+ * is the sum of both input tensors, optionally modified by an activation function.
+ *
+ * Two dimensions are compatible when:
+ * 1. they are equal, or
+ * 2. one of them is 1
+ *
+ * The size of the output is the maximum size along each dimension of the input operands.
+ * It starts with the trailing dimensions, and works its way forward.
+ *
+ * Example:
+ *
+ * input1.dimension = {4, 1, 2}
+ * input2.dimension = {5, 4, 3, 1}
+ * output.dimension = {5, 4, 3, 2}
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4
+ *
+ * Inputs:
+ * * 0: A tensor.
+ * * 1: A tensor of the same type, and compatible dimensions as input0.
+ * * 2: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Outputs:
+ * * 0: The sum, a tensor of the same type as input0.
+ */
+ ANEURALNETWORKS_ADD = 0,
+
+ /** Performs a 2-D average pooling operation.
+ *
+ * The output dimensions are functions of the filter dimensions, stride, and padding.
+ *
+ * The values in the output tensor are computed as:
+ *
+ * output[batch, row, col, channel] =
+ * sum_{i, j}(input[batch, row + i, col + j, channel]) / sum(1)
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: 4, with "NHWC" (i.e., Num_samples, Height, Width, and Channels)
+ * data layout.
+ *
+ * Both explicit padding and implicit padding are supported.
+ *
+ * Inputs (explicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
+ * * 1: An INT32 value, specifying the padding on the left, in the ‘width’ dimension.
+ * * 2: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
+ * * 3: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
+ * * 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
+ * * 5: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 6: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 7: An INT32 value, specifying the filter width.
+ * * 8: An INT32 value, specifying the filter height.
+ * * 9: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Inputs (implicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
+ * * 1: An INT32 value, specifying the implicit padding scheme, has to be one of the
+ * {@link PaddingCode} values.
+ * * 2: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 3: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 4: An INT32 value, specifying the filter width.
+ * * 5: An INT32 value, specifying the filter height.
+ * * 6: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Outputs:
+ * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth].
+ */
+ ANEURALNETWORKS_AVERAGE_POOL_2D = 1,
+
+ /** Concatenates the input tensors along the given dimension.
+ *
+ * The input tensors must have identical type and the same dimensions except the
+ * dimension along the concatenation axis.
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4
+ *
+ * Inputs:
+ * * 0 ~ n-1: The list of n input tensors, of shape [D0, D1, ..., Daxis(i), ..., Dm].
+ * For inputs of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, all
+ * input tensors must have the same scale and zeroPoint.
+ * * n: An INT32 value, specifying the concatenation axis.
+ *
+ * Outputs:
+ * * 0: The output, a tensor of the same type as the input tensors.
+ * The output shape is [D0, D1, ..., sum(Daxis(i)), ..., Dm].
+ */
+ ANEURALNETWORKS_CONCATENATION = 2,
+
+ /** Performs an 2-D convolution operation.
+ *
+ * The CONV_2D op sweeps a 2-D filter that can mix channels together over a batch of
+ * images, applying the filter to each window of each image of the appropriate size.
+ *
+ * The output dimensions are functions of the filter dimensions, stride, and padding.
+ *
+ * The values in the output tensor are computed as:
+ *
+ * output[batch, row, col, channel] =
+ * sum_{i, j} (
+ * input[batch, row + i, col + j, k] *
+ * filter[channel, row + i, col + j, k] +
+ * bias[channel]
+ * )
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: 4, with "NHWC" data layout.
+ *
+ * Both explicit padding and implicit padding are supported.
+ *
+ * Inputs (explicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input.
+ * * 1: A 4-D tensor, of shape [depth_out, filter_height, filter_width, depth_in],
+ * specifying the filter.
+ * * 2: A 1-D tensor, of shape [depth_out], specifying the bias.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} type, the bias should
+ * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the bias
+ * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and
+ * bias_scale == input_scale * filter_scale.
+ * * 3: An INT32 value, specifying the padding on the left, in the ‘width’ dimension.
+ * * 4: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
+ * * 5: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
+ * * 6: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
+ * * 7: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 8: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 9: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Inputs (implicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input.
+ * * 1: A 4-D tensor, of shape [depth_out, filter_height, filter_width, depth_in],
+ * specifying the filter.
+ * * 2: A 1-D tensor, of shape [depth_out], specifying the bias.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} type, the bias should
+ * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the bias
+ * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and
+ * bias_scale == input_scale * filter_scale.
+ * * 3: An INT32 value, specifying the implicit padding scheme, has to be one of the
+ * {@link PaddingCode} values.
+ * * 4: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 5: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 6: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Outputs:
+ * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth_out].
+ * For output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the following
+ * condition must be satisfied: output_scale > input_scale * filter_scale.
+ */
+ ANEURALNETWORKS_CONV_2D = 3,
+
+ /** Performs a depthwise 2-D convolution operation.
+ *
+ * Given an input tensor of shape [batches, height, width, depth_in] and a filter
+ * tensor of shape [1, filter_height, filter_width, depth_out] containing
+ * depth_out convolutional filters of depth 1, DEPTHWISE_CONV applies a different
+ * filter to each input channel (expanding from 1 channel to channel_multiplier channels
+ * for each), then concatenates the results together.
+ *
+ * The output has depth_out = depth_in * depth_multiplier channels.
+ * The output dimensions are functions of the filter dimensions, stride, and padding.
+ *
+ * The values in the output tensor are computed as:
+ *
+ * output[b, i, j, k * channel_multiplier + q] =
+ * sum_{di, dj} (
+ * input[b, strides[1] * i + di, strides[2] * j + dj, k] *
+ * filter[1, di, dj, k * channel_multiplier + q]
+ * )
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: 4, with "NHWC" data layout.
+ *
+ * Both explicit padding and implicit padding are supported.
+ *
+ * Inputs (explicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input.
+ * * 1: A 4-D tensor, of shape [1, filter_height, filter_width, depth_out],
+ * specifying the filter.
+ * * 2: A 1-D tensor, of shape [depth_out], specifying the bias.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} type, the bias should
+ * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the bias
+ * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and
+ * bias_scale == input_scale * filter_scale.
+ * * 3: An INT32 value, specifying the padding on the left, in the ‘width’ dimension.
+ * * 4: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
+ * * 5: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
+ * * 6: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
+ * * 7: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 8: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 9: An INT32 value, specifying the depthwise multiplier.
+ * * 10: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Inputs (explicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input.
+ * * 1: A 4-D tensor, of shape [1, filter_height, filter_width, depth_out],
+ * specifying the filter.
+ * * 2: A 1-D tensor, of shape [depth_out], specifying the bias.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} type, the bias should
+ * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the bias
+ * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and
+ * bias_scale == input_scale * filter_scale.
+ * * 3: An INT32 value, specifying the implicit padding scheme, has to be one of the
+ * {@link PaddingCode} values.
+ * * 4: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 5: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 6: An INT32 value, specifying the depthwise multiplier.
+ * * 7: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Outputs:
+ * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth_out].
+ * For output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the following
+ * condition must be satisfied: output_scale > input_scale * filter_scale.
+ */
+ ANEURALNETWORKS_DEPTHWISE_CONV_2D = 4,
+
+ /** Rearranges data from depth into blocks of spatial data.
+ *
+ * More specifically, this op outputs a copy of the input tensor where values from
+ * the depth dimension are moved in spatial blocks to the height and width dimensions.
+ * The value block_size indicates the input block size and how the data is moved.
+ *
+ * Chunks of data of size block_size * block_size from depth are rearranged into
+ * non-overlapping blocks of size block_size x block_size.
+ *
+ * The width of the output tensor is input_depth * block_size, whereas the height is
+ * input_height * block_size.
+ * The depth of the input tensor must be divisible by block_size * block_size
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: 4, with "NHWC" data layout.
+ *
+ * Inputs:
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input.
+ * * 1: An INT32 value, specifying the block_size. block_size must be >=1 and
+ * block_size * block_size must be a divisor of the input depth.
+ *
+ * Outputs:
+ * * 0: The output 4-D tensor, of shape [batch, height*block_size, width*block_size,
+ * depth/(block_size*block_size)].
+ */
+ ANEURALNETWORKS_DEPTH_TO_SPACE = 5,
+
+ /** Dequantizes the input tensor.
+ *
+ * The formula is:
+ *
+ * output = (input - zeroPoint) * scale.
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4
+ *
+ * Inputs:
+ * * 0: A tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}.
+ *
+ * Outputs:
+ * * 0: The output tensor of same shape as input0, but with type
+ * {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
+ */
+ ANEURALNETWORKS_DEQUANTIZE = 6,
+
+ /** Looks up sub-tensors in the input tensor.
+ *
+ * This operator takes for input a tensor of values (Values) and
+ * a one-dimensional tensor of selection indices (Lookups).
+ * The output tensor is the concatenation of sub-tensors of Values as
+ * selected by Lookups.
+ *
+ * Think of Values as being sliced along its first dimension:
+ * The entries in Lookups select which slices are concatenated together
+ * to create the output tensor.
+ *
+ * For example, if Values has shape of [40, 200, 300] and
+ * Lookups has shape of [3], we would expect all three values
+ * found in Lookups to be between 0 and 39. The resulting tensor will
+ * have shape of [3, 200, 300].
+ *
+ * If a value in Lookups is out of bounds, the operation will fail
+ * and an error will be reported.
+ *
+ * Inputs:
+ * * 0: Lookups. A 1-D tensor of {@link ANEURALNETWORKS_TENSOR_INT32} type.
+ * The values are indices into the first dimension of Values.
+ * * 1: Values. An n-D tensor, where n >= 2, from which sub-tensors are
+ * extracted.
+ *
+ * Output:
+ * * 0: A n-D tensor with the same rank and shape as the Values
+ * tensor, except for the first dimension which has the same size
+ * as Lookups' only dimension.
+ */
+ ANEURALNETWORKS_EMBEDDING_LOOKUP = 7,
+
+ /** Computes element-wise floor() on the input tensor.
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Supported tensor rank: up to 4
+ *
+ * Inputs:
+ * * 0: A tensor.
+ *
+ * Outputs:
+ * * 0: The output tensor, of the same type and dimensions as the input tensor.
+ */
+ ANEURALNETWORKS_FLOOR = 8,
+
+ /** Denotes a fully (densely) connected layer, which connects all elements in the input
+ * tensor with each element in the output tensor.
+ *
+ * This layer implements the operation:
+ *
+ * outputs = activation(inputs * weights’ + bias)
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4.
+ *
+ * Inputs:
+ * * 0: A tensor, specifying the input. If rank is greater than 2, then it gets flattened to
+ * a 2-D Tensor. The 2-D Tensor is handled as if dimensions corresponded to shape
+ * [batch_size, input_size], where “batch_size” corresponds to the batching dimension,
+ * and “input_size” is the size of the input.
+ * * 1: A 2-D tensor, specifying the weights, of shape [num_units, input_size], where
+ * "num_units" corresponds to the number of output nodes.
+ * * 2: A 1-D tensor, of shape [num_units], specifying the bias.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} type, the bias should
+ * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the bias
+ * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and
+ * bias_scale == input_scale * filter_scale.
+ * * 3: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Outputs:
+ * * 0: The output tensor, of shape [batch_size, num_units].
+ * For output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the following
+ * condition must be satisfied: output_scale > input_scale * filter_scale.
+ */
+ ANEURALNETWORKS_FULLY_CONNECTED = 9,
+
+ /** Looks up sub-tensors in the input tensor using a key-value map.
+ *
+ * This operator takes for input a tensor of values (Values),
+ * a one-dimensional tensor of selection values (Lookups) and
+ * a one-dimensional tensor that maps these values to Values
+ * indexes. The output tensor is the concatenation of sub-tensors of
+ * Values as selected by Lookups via Keys.
+ *
+ * Think of Values as being sliced along its outer-most dimension.
+ * The output is a concatenation of selected slices, with one slice
+ * for each entry of Lookups. The slice selected is the one at the
+ * same index as the Maps entry that matches the value in Lookups.
+ *
+ * For a hit, the corresponding sub-tensor of Values is included
+ * in the Output tensor. For a miss, the corresponding sub-tensor in
+ * Output will have zero values.
+ *
+ * For example, if Values has shape of [40, 200, 300],
+ * Keys should have a shape of [40]. If Lookups tensor has shape
+ * of [3], we're concatenating three slices, so the resulting tensor
+ * will have the shape of [3, 200, 300]. If the first entry in
+ * Lookups has the value 123456, we'll look for that value in Keys tensor.
+ * If the sixth entry of Keys contains 123456, we'll select the sixth
+ * slice of Values. If no entry in Keys has 123456, a slice of zeroes
+ * will be concatenated.
+ *
+ * Inputs:
+ * * 0: Lookups. A 1-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor with shape [ k ].
+ * * 1: Keys. A 1-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor with shape [ n ];
+ * Keys and Values pair represent a map, i.e., the ith element
+ * in Keys (Keys[i]) is the key to select the ith sub-tensor
+ * in Values (Values[i]), where 0 <= i <= n-1.
+ * Keys tensor *MUST* be sorted in ascending order.
+ * * 2: Values. A tensor with shape of [ n, … ]; i.e., the first dimension must be n.
+ *
+ * Outputs:
+ * * 0: Output. A tensor with shape [ k …].
+ * * 1: Hits. A boolean tensor with shape [ k ] indicates whether the lookup
+ * hits (True) or not (False).
+ * Stored as {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} with offset 0 and scale 1.0f.
+ * A non-zero byte represents True, a hit. A zero indicates otherwise.
+ */
+ ANEURALNETWORKS_HASHTABLE_LOOKUP = 10,
+
+ /** Applies L2 normalization along the depth dimension.
+ *
+ * The values in the output tensor are computed as:
+ *
+ * output[batch, row, col, channel] =
+ * input[batch, row, col, channel] /
+ * sqrt(sum_{c} pow(input[batch, row, col, c], 2))
+ *
+ * For input tensor with more dimensions, independently normalizes each 1-D slice along dimension dim.
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Supported tensor rank: 4, with "NHWC" data layout (i.e., Num_samples, Height, Width, and Channels).
+ *
+ * Inputs:
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth].
+ *
+ * Outputs:
+ * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth].
+ */
+ ANEURALNETWORKS_L2_NORMALIZATION = 11,
+
+ /** Performs an 2-D L2 pooling operation.
+ *
+ * The output dimensions are functions of the filter dimensions, stride, and padding.
+ *
+ * The values in the output tensor are computed as:
+ *
+ * output[batch, row, col, channel] =
+ * sqrt(sum_{i, j} pow(input[batch, row + i, col + j, channel], 2) / sum(1))
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Supported tensor rank: 4, with "NHWC" data layout.
+ *
+ * Both explicit padding and implicit padding are supported.
+ *
+ * Inputs (explicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
+ * * 1: An INT32 value, specifying the padding on the left, in the ‘width’ dimension.
+ * * 2: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
+ * * 3: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
+ * * 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
+ * * 5: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 6: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 7: An INT32 value, specifying the filter width.
+ * * 8: An INT32 value, specifying the filter height.
+ * * 9: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Inputs (implicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
+ * * 1: An INT32 value, specifying the implicit padding scheme, has to be one of the
+ * {@link PaddingCode} values.
+ * * 2: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 3: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 4: An INT32 value, specifying the filter width.
+ * * 5: An INT32 value, specifying the filter height.
+ * * 6: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Outputs:
+ * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth].
+ */
+ ANEURALNETWORKS_L2_POOL_2D = 12,
+
+ /** Applies Local Response Normalization along the depth dimension.
+ *
+ * The 4-D input tensor is treated as a 3-D array of 1-D vectors (along the last
+ * dimension), and each vector is normalized independently. Within a given vector,
+ * each component is divided by the weighted, squared sum of inputs within depth_radius.
+ *
+ * The output is calculated using this formula:
+ *
+ * sqr_sum[a, b, c, d] =
+ * sum(pow(input[a, b, c, d - depth_radius : d + depth_radius + 1], 2)
+ * output = input / pow((bias + alpha * sqr_sum), beta)
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Supported tensor rank: 4, with "NHWC" data layout.
+ *
+ * Inputs:
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
+ * * 1: An INT32 value, specifying the radius of the normalization window.
+ * * 2: A FLOAT32 value, specifying the bias, must not be zero.
+ * * 3: A FLOAT32 value, specifying the scale factor, alpha.
+ * * 4: A FLOAT32 value, specifying the exponent, beta.
+ *
+ * Outputs:
+ * * 0: The output tensor of same shape as input0.
+ */
+ ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION = 13,
+
+ /** Computes sigmoid activation on the input tensor element-wise.
+ *
+ * The output is calculated using this formula:
+ *
+ * output = 1 / (1 + exp(-input))
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4.
+ *
+ * Inputs:
+ * * 0: A tensor, specifying the input.
+ *
+ * Outputs:
+ * * 0: The output tensor of same shape as input0.
+ * For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type,
+ * the scale must be 1.f / 256 and the zeroPoint must be 0.
+ */
+ ANEURALNETWORKS_LOGISTIC = 14,
+
+ /**
+ * Projects an input to a bit vector via locality senstive hashing.
+ *
+ * Inputs:
+ * * 0: Hash functions. Dim.size == 2, DataType: Float.
+ * Tensor[0].Dim[0]: Number of hash functions.
+ * Tensor[0].Dim[1]: Number of seeds per hash functions.
+ * Tensor[0].Dim[1] <= 32 in sparse case.
+ *
+ * * 1: Input. Dim.size >= 1, no restriction on DataType.
+ * * 2: Weight. Optional. Dim.size == 1, DataType: Float.
+ * If not set, each input element is considered to have the same weight of
+ * 1.0.
+ * Tensor[1].Dim[0] == Tensor[2].Dim[0]
+ * * 3: Type:
+ * Sparse: Value LSHProjectionType_SPARSE(=1).
+ * Computed bit vector is considered to be sparse.
+ * Each output element is an int32 made up of multiple bits computed from
+ * hash functions.
+ *
+ * Dense: Value LSHProjectionType_DENSE(=2).
+ * Computed bit vector is considered to be dense. Each output element
+ * represents a bit and can take the value of either 0 or 1.
+ *
+ * Outputs:
+ * * 0: If the projection type is sparse:
+ * Output.Dim == { Tensor[0].Dim[0] }
+ * A tensor of int32 that represents hash signatures.
+ * If the projection type is Dense:
+ * Output.Dim == { Tensor[0].Dim[0] * Tensor[0].Dim[1] }
+ * A flattened tensor that represents projected bit vectors.
+ */
+ ANEURALNETWORKS_LSH_PROJECTION = 15,
+
+ /**
+ * Long short-term memory unit (LSTM) recurrent network layer.
+ *
+ * The default non-peephole implementation is based on:
+ * http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf
+ * S. Hochreiter and J. Schmidhuber. "Long Short-Term Memory". Neural
+ * Computation, 9(8):1735-1780, 1997.
+ *
+ * The peephole implementation is based on:
+ * https://research.google.com/pubs/archive/43905.pdf
+ * Hasim Sak, Andrew Senior, and Francoise Beaufays. "Long short-term memory
+ * recurrent neural network architectures for large scale acoustic modeling."
+ * INTERSPEECH, 2014.
+ *
+ * The coupling of input and forget gate (CIFG) is based on:
+ * http://arxiv.org/pdf/1503.04069.pdf
+ * Greff et al. "LSTM: A Search Space Odyssey"
+ *
+ * The class has the following independently optional inputs:
+ * * If input gate (if CIFG): “input_to_forget_weights”,
+ * “recurrent_to_input_weights”, “cell_to_input_weights”, “input_gate_bias”.
+ * * If no peephole connections: “cell_to_input_weights”,
+ * “cell_to_forget_weights”, “cell_to_output_weights”.
+ * * If no projection layer: “projection_weights” and “projection_bias”.
+ * * If no projection bias: “projection_bias”.
+ *
+ * Supported tensor types (type T):
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Inputs:
+ * * 0: Input.
+ * A 2-D tensor of type T, of shape [batch_size, input_size], where
+ * “batch_size” corresponds to the batching dimension, and “input_size”
+ * is the size of the input.
+ * * 1: input_to_input_weights.
+ * A 2-D tensor of type T, of shape [num_units, input_size], where
+ * “num_units” corresponds to the number of cell units.
+ * * 2: input_to_forget_weights.
+ * A 2-D tensor of type T, of shape [num_units, input_size].
+ * * 3: input_to_cell_weights.
+ * A 2-D tensor of type T, of shape [num_units, input_size].
+ * * 4: input_to_output_weights.
+ * A 2-D tensor of type T, of shape [num_units, input_size].
+ * * 5: recurrent_to_input_weights.
+ * A 2-D tensor of type T, of shape [num_units, output_size], where
+ * “output_size” corresponds to either the number of cell units (i.e.,
+ * “num_units”), or the second dimension of the “projection_weights”, if
+ * defined.
+ * * 6: recurrent_to_forget_weights.
+ * A 2-D tensor of type T, of shape [num_units, output_size].
+ * * 7: recurrent_to_cell_weights.
+ * A 2-D tensor of type T, of shape [num_units, output_size].
+ * * 8: recurrent_to_output_weights.
+ * A 2-D tensor of type T, of shape [num_units, output_size].
+ * * 9: cell_to_input_weights.
+ * A 1-D tensor of type T, of shape [num_units].
+ * * 10:cell_to_forget_weights.
+ * A 1-D tensor of type T, of shape [num_units].
+ * * 11:cell_to_output_weights.
+ * A 1-D tensor of type T, of shape [num_units].
+ * * 12:input_gate_bias.
+ * A 1-D tensor of type T, of shape [num_units].
+ * * 13:forget_gate_bias.
+ * A 1-D tensor of type T, of shape [num_units].
+ * * 14:cell_bias.
+ * A 1-D tensor of type T, of shape [num_units].
+ * * 15:output_gate_bias.
+ * A 1-D tensor of type T, of shape [num_units].
+ * * 16:projection_weights.
+ * A 2-D tensor of type T, of shape [output_size, num_units].
+ * * 17:projection_bias.
+ * A 1-D tensor of type T, of shape [output_size].
+ * * 18: output_state (in).
+ * A 2-D tensor of type T, of shape [batch_size, output_size].
+ * * 19: cell_state (in).
+ * A 2-D tensor of type T, of shape [batch_size, num_units].
+ * * 20:fused_activation_function.
+ * An optional {@link FuseCode} value indicating the activation
+ * function.
+ * If “NONE” is specified then it results in a linear activation.
+ * * 21:cell_clip.
+ * A clipping threshold for the cell state, such that values are bound
+ * within [-cell_clip, cell_clip]. If set to 0.0 then clipping is
+ * disabled.
+ * * 22:proj_clip.
+ * A clipping threshold for the output from the projection layer, such
+ * that values are bound within [-proj_clip, proj_clip]. If set to 0.0
+ * then clipping is disabled.
+ *
+ * Outputs:
+ * * 0: scratch_buffer.
+ * A 3-D tensor of type T, of shape [batch_size, num_cell, 4].
+ * * 1: output_state (out).
+ * A 2-D tensor of type T, of shape [batch_size, output_size].
+ * * 2: cell_state (out).
+ * A 2-D tensor of type T, of shape [batch_size, num_units].
+ * * 3: output.
+ * A 2-D tensor of type T, of shape [batch_size, output_size]. This is
+ * effectively the same as the current “output_state” value.
+ */
+ ANEURALNETWORKS_LSTM = 16,
+
+ /** Performs an 2-D max pooling operation.
+ *
+ * The output dimensions are functions of the filter dimensions, stride, and padding.
+ *
+ * The values in the output tensor are computed as:
+ *
+ * output[batch, row, col, channel] =
+ * max_{i, j} (input[batch, row + i, col + j, channel])
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: 4, with "NHWC" data layout.
+ *
+ * Both explicit padding and implicit padding are supported.
+ *
+ * Inputs (explicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
+ * * 1: An INT32 value, specifying the padding on the left, in the ‘width’ dimension.
+ * * 2: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
+ * * 3: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
+ * * 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
+ * * 5: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 6: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 7: An INT32 value, specifying the filter width.
+ * * 8: An INT32 value, specifying the filter height.
+ * * 9: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Inputs (implicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
+ * * 1: An INT32 value, specifying the implicit padding scheme, has to be one of the
+ * {@link PaddingCode} values.
+ * * 2: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 3: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 4: An INT32 value, specifying the filter width.
+ * * 5: An INT32 value, specifying the filter height.
+ * * 6: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Outputs:
+ * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth].
+ */
+ ANEURALNETWORKS_MAX_POOL_2D = 17,
+
+ /** Multiplies two tensors, element-wise.
+ *
+ * Takes two input tensors of identical type and compatible dimensions. The output
+ * is the product of both input tensors, optionally modified by an activation function.
+ *
+ * Two dimensions are compatible when:
+ * 1. they are equal, or
+ * 2. one of them is 1
+ *
+ * The size of the resulting output is the maximum size along each dimension of the
+ * input operands. It starts with the trailing dimensions, and works its way forward.
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4
+ *
+ * Inputs:
+ * * 0: A tensor.
+ * * 1: A tensor of the same type, and compatible dimensions as input0.
+ * * 2: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Outputs:
+ * * 0: The product, a tensor of the same type as input0.
+ * For output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the following
+ * condition must be satisfied: output_scale > input1_scale * input2_scale.
+ */
+ ANEURALNETWORKS_MUL = 18,
+
+ /** Computes rectified linear activation on the input tensor element-wise.
+ *
+ * The output is calculated using this formula:
+ *
+ * output = max(0, input)
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4.
+ *
+ * Inputs:
+ * * 0: A tensor, specifying the input.
+ *
+ * Outputs:
+ * * 0: The output tensor of same shape as input0.
+ */
+ ANEURALNETWORKS_RELU = 19,
+
+ /** Computes rectified linear 1 activation on the input tensor element-wise.
+ *
+ * The output is calculated using this formula:
+ *
+ * output = min(1.f, max(-1.f, input))
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4.
+ *
+ * Inputs:
+ * * 0: A tensor, specifying the input.
+ *
+ * Outputs:
+ * * 0: The output tensor of same shape as input0.
+ */
+ ANEURALNETWORKS_RELU1 = 20,
+
+ /** Computes rectified linear 6 activation on the input tensor element-wise.
+ *
+ * The output is calculated using this formula:
+ *
+ * output = min(6, max(0, input))
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4.
+ *
+ * Inputs:
+ * * 0: A tensor, specifying the input.
+ *
+ * Outputs:
+ * * 0: The output tensor of same shape as input0.
+ */
+ ANEURALNETWORKS_RELU6 = 21,
+
+ /** Reshapes a tensor.
+ *
+ * Given tensor, this operation returns a tensor that has the same values as tensor,
+ * but with a newly specified shape.
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4.
+ *
+ * Inputs:
+ * * 0: A tensor, specifying the tensor to be reshaped.
+ * * 1: A 1-D tensor of type {@link ANEURALNETWORKS_TENSOR_INT32}, defining the shape
+ * of the output tensor. The number of elements implied by shape must be the same
+ * as the number of elements in the input tensor.
+ *
+ * Outputs:
+ * * 0: The output tensor, of shape specified by the input shape.
+ */
+ ANEURALNETWORKS_RESHAPE = 22,
+
+ /** Resizes images to given size using the bilinear interpretation.
+ *
+ * Resized images will be distorted if their output aspect ratio is not the same as
+ * input aspect ratio.
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Supported tensor rank: 4, with "NHWC" data layout.
+ *
+ * Inputs:
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
+ * * 1: An INT32 value, specifying the output height of the output tensor.
+ * * 2: An INT32 value, specifying the output width of the output tensor.
+ *
+ * Outputs:
+ * * 0: The output 4-D tensor, of shape [batches, new_height, new_width, depth].
+ */
+ ANEURALNETWORKS_RESIZE_BILINEAR = 23,
+
+ /**
+ * A basic recurrent neural network layer.
+ *
+ * This layer implements the operation:
+ * outputs = state = activation(inputs * input_weights + state * recurrent_weights + bias)
+ *
+ * Where:
+ * * “input_weights” is a weight matrix that multiplies the inputs;
+ * * “recurrent_weights” is a weight matrix that multiplies the current
+ * “state” which itself is the output from the previous time step
+ * computation;
+ * * “bias” is a bias vector (added to each output vector in the batch);
+ * * “activation” is the function passed as the “fused_activation_function”
+ * argument (if not “NONE”).
+ *
+ * Supported tensor types (Type T):
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Inputs:
+ * * 0: input.
+ * A 2-D tensor of type T, of shape [batch_size, input_size], where
+ * “batch_size” corresponds to the batching dimension, and “input_size” is
+ * the size of the input.
+ * * 1: weights.
+ * A 2-D tensor of type T, of shape [num_units, input_size], where
+ * “num_units” corresponds to the number of units.
+ * * 2: recurrent_weights.
+ * A 2-D tensor of type T, of shape [num_units, num_units], with columns
+ * corresponding to the weights from each unit.
+ * * 3: bias.
+ * A 1-D tensor of type T, of shape [num_units].
+ * * 4: hidden state (in).
+ * A 2-D tensor of type T, of shape [batch_size, num_units].
+ * * 5: fused_activation_function.
+ * An optional {@link FuseCode} value indicating the activation
+ * function. If “NONE” is specified then it results in a linear
+ * activation.
+ *
+ * Outputs:
+ * * 0: hidden state (out).
+ * A 2-D tensor of type T, of shape [batch_size, num_units].
+ *
+ * * 1: output.
+ * A 2-D tensor of type T, of shape [batch_size, num_units]. This is
+ * effectively the same as the current state value.
+ */
+ ANEURALNETWORKS_RNN = 24,
+
+ /** Computes the softmax activation on the input tensor element-wise, per batch, by
+ * normalizing the input vector so the maximum coefficient is zero.
+ *
+ * The output is calculated using this formula:
+ *
+ * output[batch, i] =
+ * exp((input[batch, i] - max(input[batch, :])) * beta) /
+ * sum_{k}{exp((input[batch, k] - max(input[batch, :])) * beta)}
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: 2 or 4.
+ *
+ * Inputs:
+ * * 0: A 2-D or 4-D tensor, specifying the tensor to be reshaped.
+ * * 1: A FLOAT32 value, specifying the positive scaling factor for the exponent, beta.
+ *
+ * Outputs:
+ * * 0: The output tensor of same shape as input0.
+ * For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type,
+ * the scale must be 1.f / 256 and the zeroPoint must be 0.
+ */
+ ANEURALNETWORKS_SOFTMAX = 25,
+
+ /** Rearranges blocks of spatial data, into depth.
+ *
+ * More specifically, this op outputs a copy of the input tensor where values from
+ * the height and width dimensions are moved to the depth dimension.
+ * The value block_size indicates the input block size and how the data is moved.
+ *
+ * Chunks of data of size block_size * block_size from depth are rearranged into
+ * non-overlapping blocks of size block_size x block_size.
+ *
+ * The depth of the output tensor is input_depth * block_size * block_size.
+ * The input tensor's height and width must be divisible by block_size.
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: 4, with "NHWC" data layout.
+ *
+ * Inputs:
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input.
+ * * 1: An INT32 value, specifying the block_size. block_size must be >=1 and
+ * block_size must be a divisor of both the input height and width.
+ *
+ * Outputs:
+ * * 0: The output 4-D tensor, of shape [batch, height/block_size, width/block_size,
+ * depth*block_size*block_size].
+ */
+ ANEURALNETWORKS_SPACE_TO_DEPTH = 26,
+
+ /**
+ * SVDF op is a kind of stateful layer derived from the notion that a
+ * densely connected layer that's processing a sequence of input frames can
+ * be approximated by using a singular value decomposition of each of its
+ * nodes. The implementation is based on:
+ *
+ * https://research.google.com/pubs/archive/43813.pdf
+ *
+ * P. Nakkiran, R. Alvarez, R. Prabhavalkar, C. Parada.
+ * “Compressing Deep Neural Networks using a Rank-Constrained Topology”.
+ * INTERSPEECH, 2015.
+ *
+ * It processes the incoming input using a 2-stage filtering mechanism:
+ * * stage 1 performs filtering on the "features" dimension, whose outputs get
+ * pushed into a memory of fixed-size memory_size.
+ * * stage 2 performs filtering on the "time" dimension of the memory_size
+ * memoized outputs of stage 1.
+ *
+ * Specifically, for rank 1, this layer implements the operation:
+ *
+ * memory = push(conv1d(inputs, weights_feature, feature_dim,
+ * "ANEURALNETWORKS_PADDING_VALID"));
+ * outputs = activation(memory * weights_time + bias);
+ *
+ * Where:
+ * * “weights_feature” is a weights matrix that processes the inputs (by
+ * convolving the input with every “feature filter”), and whose outputs get
+ * pushed, stacked in order, into the fixed-size “memory” (the oldest entry
+ * gets dropped);
+ * * “weights_time” is a weights matrix that processes the “memory” (by a
+ * batched matrix multiplication on the num_units);
+ * * “bias” is an optional bias vector (added to each output vector in the
+ * batch); and
+ * * “activation” is the function passed as the “fused_activation_function”
+ * argument (if not “NONE”).
+ *
+ * Each rank adds a dimension to the weights matrices by means of stacking
+ * the filters.
+ *
+ * Supported tensor types (type T):
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Inputs:
+ * * 0: input.
+ * A 2-D tensor of type T, of shape [batch_size, input_size], where
+ * “batch_size” corresponds to the batching dimension, and “input_size” is
+ * the size of the input.
+ * * 1: weights_feature.
+ * A 2-D tensor of type T, of shape [num_units, input_size], where
+ * “num_units” corresponds to the number of units.
+ * * 2: weights_time.
+ * A 2-D tensor of type T, of shape [num_units, memory_size], where
+ * “memory_size” corresponds to the fixed-size of the memory.
+ * * 3: bias.
+ * An optional 1-D tensor of type T, of shape [num_units].
+ * * 4: state (in).
+ * A 2-D tensor of type T, of shape [batch_size, (memory_size - 1) * num_units * rank].
+ * * 5: rank.
+ * The rank of the SVD approximation.
+ * * 6: fused_activation_function.
+ * An optional {@link FuseCode} value indicating the activation function.
+ * If “NONE” is specified then it results in a linear activation.
+ *
+ * Outputs:
+ * * 0: state (out).
+ * A 2-D tensor of type T, of shape [batch_size, (memory_size - 1) * num_units * rank].
+ * * 1: output.
+ * A 2-D tensor of type T, of shape [batch_size, num_units].
+ */
+ ANEURALNETWORKS_SVDF = 27,
+
+ /** Computes hyperbolic tangent of input tensor element-wise.
+ *
+ * The output is calculated using this formula:
+ *
+ * output = tanh(input)
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Supported tensor rank: up to 4.
+ *
+ * Inputs:
+ * * 0: A tensor, specifying the input.
+ *
+ * Outputs:
+ * * 0: The output tensor of same shape as input0.
+ */
+ ANEURALNETWORKS_TANH = 28,
+
+ /**
+ * Element-wise division of two tensors.
+ *
+ * Takes two input tensors of identical type and compatible dimensions. The output
+ * is the result of dividing the first input tensor by the second, optionally
+ * modified by an activation function.
+ *
+ * Two dimensions are compatible when:
+ * 1. they are equal, or
+ * 2. one of them is 1
+ *
+ * The size of the output is the maximum size along each dimension of the input operands.
+ * It starts with the trailing dimensions, and works its way forward.
+ *
+ * Example:
+ * input1.dimension = {4, 1, 2}
+ * input2.dimension = {5, 4, 3, 1}
+ * output.dimension = {5, 4, 3, 2}
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Supported tensor rank: up to 4
+ *
+ * Inputs:
+ * 0: An n-D tensor, specifying the first input.
+ * 1: A tensor of the same type, and compatible dimensions as input0.
+ * 2: An INT32 value, and has to be one of the {@link FusedActivationFunc} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Outputs:
+ * 0: A tensor of the same type as input0.
+ */
+ ANEURALNETWORKS_DIV = 30,
+
+ /**
+ * Pads a tensor.
+ *
+ * This operation pads a tensor according to the specified paddings.
+ *
+ * Supported tensor {@link OperandCode}:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4
+ *
+ * Inputs:
+ * * 0: An n-D tensor, specifying the tensor to be padded.
+ * * 1: A 2-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the paddings
+ * for each spatial dimension of the input tensor. The shape of the
+ * tensor must be {rank(input0), 2}.
+ * padding[i, 0] specifies the number of elements to be padded in the
+ * front of dimension i.
+ * padding[i, 1] specifies the number of elements to be padded after the
+ * end of dimension i.
+ *
+ * Outputs:
+ * * 0: A tensor of the same {@link OperandCode} as input0. The
+ * output tensor has the same rank as input0, and each
+ * dimension of the output tensor has the same size as the
+ * corresponding dimension of the input tensor plus the size
+ * of the padding:
+ * output0.dimension[i] =
+ * padding[i, 0] + input0.dimension[i] + padding[i, 1]
+ *
+ * Available since API level 28.
+ */
+ ANEURALNETWORKS_PAD = 32,
+
+ /**
+ * Extracts a strided slice of a tensor.
+ *
+ * Roughly speaking, this op extracts a slice of size (end - begin) / stride
+ * from the given input tensor. Starting at the location specified by begin
+ * the slice continues by adding stride to the index until all dimensions
+ * are not less than end. Note that a stride can be negative, which causes a
+ * reverse slice.
+ *
+ * Supported tensor {@link OperandCode}:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4
+ *
+ * Inputs:
+ * * 0: An n-D tensor, specifying the tensor to be sliced.
+ * * 1: A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the starts of
+ * the dimensions of the input tensor to be sliced. The length must be
+ * of rank(input0).
+ * * 2: A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the ends of
+ * the dimensions of the input tensor to be sliced. The length must be
+ * of rank(input0).
+ * * 3: A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the strides of
+ * the dimensions of the input tensor to be sliced. The length must be
+ * of rank(input0).
+ * * 4: An {@link ANEURALNETWORKS_INT32} scalar, begin_mask. If the ith bit
+ * of begin_mask is set, begin[i] is ignored and the fullest possible
+ * range in that dimension is used instead.
+ * * 5: An {@link ANEURALNETWORKS_INT32} scalar, end_mask. If the ith bit of
+ * end_mask is set, end[i] is ignored and the fullest possible range in
+ * that dimension is used instead.
+ * * 6: An {@link ANEURALNETWORKS_INT32} scalar, shrink_axis_mask. An int32
+ * mask. If the ith bit of shrink_axis_mask is set, it implies that the
+ * ith specification shrinks the dimensionality by 1. A slice of size 1
+ * starting from begin[i] in the dimension must be preserved.
+ *
+ * Outputs:
+ * * 0: A tensor of the same {@link OperandCode} as input0.
+ */
+ ANEURALNETWORKS_STRIDED_SLICE = 35,
+
+ /**
+ * Element-wise subtraction of two tensors.
+ *
+ * Takes two input tensors of identical type and compatible dimensions. The output
+ * is the result of subtracting the second input tensor from the first one, optionally
+ * modified by an activation function.
+ *
+ * Two dimensions are compatible when:
+ * 1. they are equal, or
+ * 2. one of them is 1
+ *
+ * The size of the output is the maximum size along each dimension of the input operands.
+ * It starts with the trailing dimensions, and works its way forward.
+ *
+ * Example:
+ * input1.dimension = {4, 1, 2}
+ * input2.dimension = {5, 4, 3, 1}
+ * output.dimension = {5, 4, 3, 2}
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Supported tensor rank: up to 4
+ *
+ * Inputs:
+ * 0: An n-D tensor, specifying the first input.
+ * 1: A tensor of the same type, and compatible dimensions as input0.
+ * 2: An INT32 value, and has to be one of the {@link FusedActivationFunc} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Outputs:
+ * 0: A tensor of the same type as input0.
+ */
+ ANEURALNETWORKS_SUB = 36,
+} OperationCode;
+
+/**
+ * Fused activation function types.
+ *
+ */
+typedef enum {
+ /** NO fused activation function. */
+ ANEURALNETWORKS_FUSED_NONE = 0,
+ /** Fused ReLU activation function. */
+ ANEURALNETWORKS_FUSED_RELU = 1,
+ /** Fused ReLU1 activation function. */
+ ANEURALNETWORKS_FUSED_RELU1 = 2,
+ /** Fused ReLU6 activation function. */
+ ANEURALNETWORKS_FUSED_RELU6 = 3,
+} FuseCode;
+
+/**
+ * Implicit padding algorithms.
+ *
+ */
+typedef enum {
+ /**
+ * SAME padding.
+ * Padding on both ends are the "same":
+ * padding_to_beginning = total_padding / 2
+ * padding_to_end = (total_padding + 1)/2.
+ * i.e., for even number of padding, padding to both ends are exactly
+ * the same; for odd number of padding, padding to the ending is bigger
+ * than the padding to the beginning by 1.
+ *
+ * total_padding is a function of input, stride and filter size.
+ * It could be computed as follows:
+ * out_size = (input + stride - 1) / stride;
+ * needed_input = (out_size - 1) * stride + filter_size
+ * total_padding = max(0, needed_input - output_size)
+ * The computation is the same for the horizontal and vertical directions.
+ */
+ ANEURALNETWORKS_PADDING_SAME = 1,
+
+ /**
+ * VALID padding.
+ * No padding. When the input size is not evenly divisible by
+ * the filter size, the input at the end that could not fill
+ * the whole filter tile will simply be ignored.
+ */
+ ANEURALNETWORKS_PADDING_VALID = 2,
+} PaddingCode;
+
+/**
+ * Execution preferences.
+ */
+typedef enum {
+ /**
+ * Prefer executing in a way that minimizes battery drain.
+ * This is desirable for compilations that will be executed often.
+ */
+ ANEURALNETWORKS_PREFER_LOW_POWER = 0,
+ /**
+ * Prefer returning a single answer as fast as possible, even if this causes
+ * more power consumption.
+ */
+ ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER = 1,
+ /**
+ * Prefer maximizing the throughput of successive frames, for example when
+ * processing successive frames coming from the camera.
+ */
+ ANEURALNETWORKS_PREFER_SUSTAINED_SPEED = 2,
+} PreferenceCode;
+
+/**
+ * Result codes.
+ */
+typedef enum {
+ ANEURALNETWORKS_NO_ERROR = 0,
+ ANEURALNETWORKS_OUT_OF_MEMORY = 1,
+ ANEURALNETWORKS_INCOMPLETE = 2,
+ ANEURALNETWORKS_UNEXPECTED_NULL = 3,
+ ANEURALNETWORKS_BAD_DATA = 4,
+ ANEURALNETWORKS_OP_FAILED = 5,
+ ANEURALNETWORKS_UNMAPPABLE = 5,
+ ANEURALNETWORKS_BAD_STATE = 6,
+} ResultCode;
+
+/**
+ * For {@link ANeuralNetworksModel_setOperandValue}, values with a
+ * length smaller or equal to this will be immediately copied into
+ * the model. The size is in bytes.
+ */
+enum {
+ ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES = 128
+};
+
+/**
+ * ANeuralNetworksMemory is an opaque type that represents memory.
+ *
+ * This type is used to represent shared memory, memory mapped files,
+ * and similar memories.
+ *
+ * By using shared memory, a program can efficiently communicate to the
+ * runtime and drivers the tensors that define a model. See
+ * {@link ANeuralNetworksModel_setOperandValueFromMemory}. An application
+ * should typically create one shared memory object that contains every tensor
+ * needed to define a model. {@link ANeuralNetworksMemory_createFromFd} can be
+ * used to create shared memory from a file handle. {@link ANeuralNetworksMemory_createShared}
+ * can be used to directly created shared memory.
+ *
+ * Memory objects can also be used to specify the input and output arguments of
+ * an execution. See {@link ANeuralNetworksExecution_setInputFromMemory}
+ * and {@link ANeuralNetworksExecution_setOutputFromMemory}.
+ */
+typedef struct ANeuralNetworksMemory ANeuralNetworksMemory;
+
+/**
+ * ANeuralNetworksModel is an opaque type that contains a description of the
+ * mathematical operations that constitute the model.
+ *
+ * <p>The model will be built by calling<ul>
+ * <li>{@link ANeuralNetworksModel_create},</li>
+ * <li>{@link ANeuralNetworksModel_addOperation},</li>
+ * <li>{@link ANeuralNetworksModel_addOperand},</li>
+ * </ul>
+ *
+ * A model is completed by calling {@link ANeuralNetworksModel_finish}.
+ * A model is destroyed by calling {@link ANeuralNetworksModel_free}.
+ *
+ * <p>A model cannot be modified once {@link ANeuralNetworksModel_finish}
+ * has been called on it.</p>
+ *
+ * <p>It is the application's responsibility to make sure that only one thread
+ * modifies a model at a given time. It is however safe for more than one
+ * thread to use the model once {@link ANeuralNetworksModel_finish} has returned.</p>
+ *
+ * <p>It is also the application's responsibility to ensure that there are no other
+ * uses of the model after calling {@link ANeuralNetworksModel_free}.
+ * This includes any compilation or execution object created using the model.</p>
+ */
+typedef struct ANeuralNetworksModel ANeuralNetworksModel;
+
+/**
+ * ANeuralNetworksCompilation is an opaque type that can be used to compile
+ * a machine learning model.
+ *
+ * <p>To use:<ul>
+ * <li>Create a new compilation instance by calling the
+ * {@link ANeuralNetworksCompilation_create} function.</li>
+ * <li>Set any desired properties on the compilation (for example,
+ * {@link ANeuralNetworksCompilation_setPreference}).</li>
+ * <li>Complete the compilation with {@link ANeuralNetworksCompilation_finish}.</li>
+ * <li>Use the compilation as many times as needed
+ * with {@link ANeuralNetworksExecution_create}.</li>
+ * <li>Destroy the compilation with {@link ANeuralNetworksCompilation_free}
+ * once all executions using the compilation have completed.</li></ul></p>
+ *
+ * A compilation is completed by calling {@link ANeuralNetworksCompilation_finish}.
+ * A compilation is destroyed by calling {@link ANeuralNetworksCompilation_free}.
+ *
+ * <p>A compilation cannot be modified once {@link ANeuralNetworksCompilation_finish}
+ * has been called on it.</p>
+ *
+ * <p>It is the application's responsibility to make sure that only
+ * one thread modifies a compilation at a given time. It is however
+ * safe for more than one thread to use the compilation once
+ * {@link ANeuralNetworksCompilation_finish} has returned.</p>
+ *
+ * <p>It is also the application's responsibility to ensure that there are no other
+ * uses of the compilation after calling {@link ANeuralNetworksCompilation_free}.
+ * This includes any execution object created using the compilation.</p>
+ */
+typedef struct ANeuralNetworksCompilation ANeuralNetworksCompilation;
+
+/**
+ * ANeuralNetworksExecution is an opaque type that can be used to apply a machine
+ * learning model to a set of inputs.
+ *
+ * <p>To use:<ul>
+ * <li>Create a new execution instance by calling the
+ * {@link ANeuralNetworksExecution_create} function.</li>
+ * <li>Associate data to the model inputs with
+ * {@link ANeuralNetworksExecution_setInput} or
+ * {@link ANeuralNetworksExecution_setInputFromMemory}.</li>
+ * <li>Associate output buffers to the model outputs with
+ * {@link ANeuralNetworksExecution_setOutput} or
+ * {@link ANeuralNetworksExecution_setOutputFromMemory}.</li>
+ * <li>Apply the model with {@link ANeuralNetworksExecution_startCompute}.</li>
+ * <li>Wait for the execution to complete with {@link
+ * ANeuralNetworksEvent_wait}.</li>
+ * <li>Destroy the execution with
+ * {@link ANeuralNetworksExecution_free}.</li></ul></p>
+ *
+ * <p>An execution cannot be modified once {@link ANeuralNetworksExecution_startCompute}
+ * has been called on it.</p>
+ *
+ * <p>An execution can be applied to a model with
+ * {@link ANeuralNetworksExecution_startCompute} only once. Create new executions
+ * to do new evaluations of the model.</p>
+ *
+ * <p>It is the application's responsibility to make sure that only one thread
+ * modifies an execution at a given time. It is however safe for more than one
+ * thread to use {@link ANeuralNetworksEvent_wait} at the same time.</p>
+ *
+ * <p>It is also the application's responsibility to ensure that there are no other
+ * uses of the request after calling {@link ANeuralNetworksExecution_free}.</p>
+ */
+typedef struct ANeuralNetworksExecution ANeuralNetworksExecution;
+
+/**
+ * ANeuralNetworksOperandType describes the type of an operand.
+ * This structure is used to describe both scalars and tensors.
+ */
+typedef struct ANeuralNetworksOperandType {
+ /** The data type, e.g ANEURALNETWORKS_INT8. */
+ int32_t type;
+ /** The number of dimensions. It should be 0 for scalars. */
+ uint32_t dimensionCount;
+ /** The dimensions of the tensor. It should be nullptr for scalars. */
+ const uint32_t* dimensions;
+ /** These two fields are only used for quantized tensors.
+ * They should be zero for scalars and non-fixed point tensors.
+ * The dequantized value of each entry is (value - zeroPoint) * scale.
+ */
+ float scale;
+ int32_t zeroPoint;
+} ANeuralNetworksOperandType;
+
+typedef int32_t ANeuralNetworksOperationType;
+
+/**
+ * ANeuralNetworksEvent is an opaque type that represents an event
+ * that will be signaled once an execution completes.
+ */
+typedef struct ANeuralNetworksEvent ANeuralNetworksEvent;
+
+
+/**
+ * Creates a shared memory object from a file descriptor.
+ *
+ * The shared memory is backed by a file descriptor via mmap.
+ * See {@link ANeuralNetworksMemory} for a description on how to use
+ * this shared memory.
+ *
+ * @param size The requested size in bytes.
+ * Must not be larger than the file size.
+ * @param prot The desired memory protection for the mapping.
+ * It is either PROT_NONE or the bitwise OR of one or
+ * more of the following flags: PROT_READ, PROT_WRITE.
+ * @param fd The requested file descriptor.
+ * The file descriptor has to be mmap-able. The file
+ * descriptor will be duplicated.
+ * @param offset The offset to the beginning of the file of the area to map.
+ * The offset has to be aligned to a page size.
+ * @param memory The memory object to be created.
+ * Set to NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if the request completed normally.
+ */
+int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t offset,
+ ANeuralNetworksMemory** memory);
+
+/**
+ * Delete a memory object.
+ *
+ * Destroys the object used by the run time to keep track of the memory.
+ * This will free the underlying actual memory if no other code has open
+ * handles to this memory.
+ *
+ * @param memory The memory object to be freed.
+ */
+void ANeuralNetworksMemory_free(ANeuralNetworksMemory* memory);
+
+/**
+ * Create an empty {@link ANeuralNetworksModel}.
+ *
+ * <p>This only creates the object. Computation is performed once
+ * {@link ANeuralNetworksExecution_startCompute} is invoked.
+ *
+ * The model should be constructed with calls to
+ * {@link ANeuralNetworksModel_addOperation} and
+ * {@link ANeuralNetworksModel_addOperand}
+ *
+ * <p>{@link ANeuralNetworksModel_finish} should be called once the model
+ * has been fully constructed.</p>
+ *
+ * <p>{@link ANeuralNetworksModel_free} should be called once the model
+ * is no longer needed.</p>
+ *
+ * @param model The {@link ANeuralNetworksModel} to be created.
+ * Set to NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksModel_create(ANeuralNetworksModel** model);
+
+/**
+ * Destroy a model.
+ *
+ * The model need not have been finished by a call to
+ * {@link ANeuralNetworksModel_finish}.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * @param model The model to be destroyed. Passing NULL is acceptable and
+ * results in no operation.
+ */
+void ANeuralNetworksModel_free(ANeuralNetworksModel* model);
+
+/**
+ * Indicate that we have finished modifying a model. Required before
+ * calling {@link ANeuralNetworksCompilation_create}.
+ *
+ * An application is responsible to make sure that no other thread uses
+ * the model at the same time.
+ *
+ * This function must only be called once for a given model.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * @param model The model to be finished.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksModel_finish(ANeuralNetworksModel* model);
+
+/**
+ * Add an operand to a model.
+ *
+ * The order in which the operands are added is important. The first one added
+ * to a model will have the index value 0, the second 1, etc. These indexes are
+ * used as operand identifiers in {@link ANeuralNetworksModel_addOperation},
+ * {@link ANeuralNetworksExecution_setInput},
+ * {@link ANeuralNetworksExecution_setInputFromMemory},
+ * {@link ANeuralNetworksExecution_setOutput},
+ * {@link ANeuralNetworksExecution_setOutputFromMemory} and
+ * {@link ANeuralNetworksExecution_setOperandValue}.
+ *
+ * To build a model that can accomodate inputs of various sizes, as you may want
+ * to do for a CNN, set the size of the dimensions that will vary at run time to 0.
+ * If you do so, provide the full dimensions when calling
+ * {@link ANeuralNetworksExecution_setInput} or {@link ANeuralNetworksExecution_setInputFromMemory}.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has been
+ * called will return an error.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * @param model The model to be modified.
+ * @param type The {@link ANeuralNetworksOperandType} that describes the shape
+ * of the operand.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksModel_addOperand(ANeuralNetworksModel* model,
+ const ANeuralNetworksOperandType* type);
+
+/**
+ * Sets an operand to a constant value.
+ *
+ * Values of length smaller or equal to
+ * {@link ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES}
+ * are immediately copied into the model.
+ *
+ * For values of length greater than {@link ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES},
+ * a pointer to the buffer is stored within the model. The application is responsible
+ * for not changing the content of this region until all executions using this model
+ * have completed. As the data may be copied during processing, modifying the data
+ * after this call yields undefined results.
+ *
+ * For large tensors, using {@link ANeuralNetworksModel_setOperandValueFromMemory}
+ * is likely to be more efficient.
+ *
+ * To indicate that an optional operand should be considered missing,
+ * pass nullptr for buffer and 0 for length.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has been
+ * called will return an error.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * @param model The model to be modified.
+ * @param index The index of the model operand we're setting.
+ * @param buffer A pointer to the data to use.
+ * @param length The size in bytes of the data value.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel* model, int32_t index,
+ const void* buffer, size_t length);
+
+/**
+ * Sets an operand to a value stored in a memory object.
+ *
+ * The content of the memory is not copied. A reference to that memory is stored
+ * inside the model. The application is responsible for not changing the content
+ * of the memory region until all executions using this model have completed.
+ * As the data may be copied during processing, modifying the data after this call
+ * yields undefined results.
+ *
+ * To indicate that an optional operand should be considered missing,
+ * use {@link ANeuralNetworksModel_setOperandValue} instead, passing nullptr for buffer.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has been
+ * called will return an error.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * @param model The model to be modified.
+ * @param index The index of the model operand we're setting.
+ * @param buffer A pointer to the data to use.
+ * @param memory The memory containing the data.
+ * @param offset This specifies the location of the data within the memory.
+ * The offset is in bytes from the start of memory.
+ * @param length The size in bytes of the data value.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksModel_setOperandValueFromMemory(ANeuralNetworksModel* model, int32_t index,
+ const ANeuralNetworksMemory* memory,
+ size_t offset, size_t length);
+
+/**
+ * Add an operation to a model.
+ *
+ * @param model The model to be modified.
+ * @param type The type of the operation.
+ * @param inputCount The number of entries in the inputs array.
+ * @param inputs An array of indexes identifying each operand.
+ * @param outputCount The number of entries in the outputs array.
+ * @param outputs An array of indexes identifying each operand.
+ *
+ * The operands specified by inputs and outputs must have been
+ * previously added by calls to {@link ANeuralNetworksModel_addOperand}.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has been
+ * called will return an error.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksModel_addOperation(ANeuralNetworksModel* model,
+ ANeuralNetworksOperationType type, uint32_t inputCount,
+ const uint32_t* inputs, uint32_t outputCount,
+ const uint32_t* outputs);
+
+/**
+ * Specfifies which operands will be the model's inputs and outputs.
+ *
+ * An operand cannot be used for both input and output. Doing so will
+ * return an error.
+ *
+ * @param model The model to be modified.
+ * @param inputCount The number of entries in the inputs array.
+ * @param inputs An array of indexes identifying the input operands.
+ * @param outputCount The number of entries in the outputs array.
+ * @param outputs An array of indexes identifying the output operands.
+ *
+ * The operands specified by inputs and outputs must have been
+ * previously added by calls to {@link ANeuralNetworksModel_addOperand}.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has been
+ * called will return an error.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ */
+int ANeuralNetworksModel_identifyInputsAndOutputs(ANeuralNetworksModel* model, uint32_t inputCount,
+ const uint32_t* inputs, uint32_t outputCount,
+ const uint32_t* outputs);
+
+/**
+ * Create a {@link ANeuralNetworksCompilation} to compile the given model.
+ *
+ * <p>This only creates the object. Compilation is only performed once
+ * {@link ANeuralNetworksCompilation_finish} is invoked.</p>
+ *
+ * <p>{@link ANeuralNetworksCompilation_finish} should be called once
+ * all desired properties have been set on the compilation.</p>
+ *
+ * <p>{@link ANeuralNetworksModel_free} should be called once the compilation
+ * is no longer needed.</p>
+ *
+ * <p>The provided model must outlive the compilation.</p>
+ *
+ * The model must already have been finished by a call to
+ * {@link ANeuralNetworksModel_finish}.
+ *
+ * See {@link ANeuralNetworksCompilation} for information on multithreaded usage.
+ *
+ * @param model The {@link ANeuralNetworksModel} to be compiled.
+ * @param compilation The newly created object or NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
+ * if the model is invalid.
+ */
+int ANeuralNetworksCompilation_create(ANeuralNetworksModel* model,
+ ANeuralNetworksCompilation** compilation);
+
+/**
+ * Destroy a compilation.
+ *
+ * The compilation need not have been finished by a call to
+ * {@link ANeuralNetworksModel_finish}.
+ *
+ * See {@link ANeuralNetworksCompilation} for information on multithreaded usage.
+ *
+ * @param compilation The compilation to be destroyed. Passing NULL is acceptable and
+ * results in no operation.
+ */
+void ANeuralNetworksCompilation_free(ANeuralNetworksCompilation* compilation);
+
+/**
+ * Sets the execution preference.
+ *
+ * <p>Provides guidance to the runtime when trade-offs are possible.</p>
+ *
+ * See {@link ANeuralNetworksCompilation} for information on multithreaded usage.
+ *
+ * @param compilation The compilation to be modified.
+ * @param preference Either {@link PREFER_LOW_POWER},
+ * {@link PREFER_SINGLE_FAST_ANSWER}, or
+ * {@link PREFER_SUSTAINED_SPEED}.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksCompilation_setPreference(ANeuralNetworksCompilation* compilation,
+ int32_t preference);
+
+/**
+ * Indicate that we have finished modifying a compilation. Required before
+ * calling {@link ANeuralNetworksExecution_create}.
+ *
+ * An application is responsible to make sure that no other thread uses
+ * the compilation at the same time.
+ *
+ * This function must only be called once for a given compilation.
+ *
+ * See {@link ANeuralNetworksCompilation} for information on multithreaded usage.
+ *
+ * @param compilation The compilation to be finished.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation* compilation);
+
+/**
+ * Create a {@link ANeuralNetworksExecution} to apply the given compilation.
+ * This only creates the object. Computation is only performed once
+ * {@link ANeuralNetworksExecution_startCompute} is invoked.
+ *
+ * <p>The provided compilation must outlive the execution.</p>
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param compilation The {@link ANeuralNetworksCompilation} to be evaluated.
+ * @param execution The newly created object or NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
+ * if the compilation is invalid.
+ */
+int ANeuralNetworksExecution_create(ANeuralNetworksCompilation* compilation,
+ ANeuralNetworksExecution** execution);
+
+/**
+ * Destroy an execution.
+ *
+ * <p>If called on an execution for which
+ * {@link ANeuralNetworksExecution_startCompute} has been called, the
+ * function will return immediately but will mark the execution to be deleted
+ * once the computation completes. The related {@link ANeuralNetworksEvent}
+ * will be signaled and the {@link ANeuralNetworksEvent_wait} will return
+ * ANEURALNETWORKS_ERROR_DELETED.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be destroyed. Passing NULL is acceptable and
+ * results in no operation.
+ */
+void ANeuralNetworksExecution_free(ANeuralNetworksExecution* execution);
+
+/**
+ * Associate a user buffer with an input of the model of the
+ * {@link ANeuralNetworksExecution}.
+ *
+ * <p>The provided buffer must outlive the execution.</p>
+ *
+ * If the input is optional, you can indicate that it is omitted by
+ * passing nullptr for buffer and 0 for length.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be modified.
+ * @param index The index of the input argument we are setting. It is
+ * an index into the lists passed to
+ * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ * the index associated with {@link ANeuralNetworksModel_addOperand}.
+ * @param type The type of the operand. This should be used to specify the
+ * dimensions that were set to 0 when the operand was added to the
+ * model. All other properties of the type must be the same as
+ * specified in the model. If the type is the same as specified
+ * when the model was built, NULL can be passed.
+ * @param buffer The buffer containing the data.
+ * @param length The length in bytes of the buffer.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if the
+ * name is not recognized or the buffer is too small for the input.
+ */
+int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution* execution, int32_t index,
+ const ANeuralNetworksOperandType* type, const void* buffer,
+ size_t length);
+
+/**
+ * Associate part of a memory object with an input of the model of the
+ * {@link ANeuralNetworksExecution}.
+ *
+ * <p>The provided memory must outlive the execution.</p>
+ *
+ * If the input is optional, you can indicate that it is omitted by
+ * using @{Link ANeuralNetworks_setInput} instead, passing nullptr for buffer
+ * and 0 for length.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be modified.
+ * @param index The index of the input argument we are setting. It is
+ * an index into the lists passed to
+ * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ * the index associated with {@link ANeuralNetworksModel_addOperand}.
+ * @param type The type of the operand. This can be used to specify the
+ * dimensions that were set to 0 when the operand was added to the
+ * model. All other values must be the same as specified in the
+ * model. If the type is the same as specified when the model
+ * was built, NULL can be passed.
+ * @param memory The memory containing the data.
+ * @param offset This specifies the location of the data whithin the memory.
+ * The offset is in bytes from the start of memory.
+ * @param length The size in bytes of the data value.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if the
+ * name is not recognized or the buffer is too small for the input.
+ */
+int ANeuralNetworksExecution_setInputFromMemory(ANeuralNetworksExecution* execution, int32_t index,
+ const ANeuralNetworksOperandType* type,
+ const ANeuralNetworksMemory* memory, size_t offset,
+ size_t length);
+
+/**
+ * Associate a user buffer with an output of the model of the
+ * {@link ANeuralNetworksExecution}.
+ *
+ * If the output is optional, you can indicate that it is omitted by
+ * passing nullptr for buffer and 0 for length.
+ *
+ * <p>The provided buffer must outlive the execution.</p>
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be modified.
+ * @param index The index of the output argument we are setting. It is
+ * an index into the lists passed to
+ * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ * the index associated with {@link ANeuralNetworksModel_addOperand}.
+ * @param type The type of the operand. This can be used to specify the
+ * dimensions that were set to 0 when the operand was added to the
+ * model. All other values must be the same as specified in the
+ * model. If the type is the same as specified when the model
+ * was built, NULL can be passed.
+ * @param buffer The buffer where the data is to be written.
+ * @param length The length in bytes of the buffer.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if the
+ * name is not recognized or the buffer is too small for the output.
+ */
+int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution* execution, int32_t index,
+ const ANeuralNetworksOperandType* type, void* buffer,
+ size_t length);
+
+/**
+ * Associate part of a memory object with an output of the model of the
+ * {@link ANeuralNetworksExecution}.
+ *
+ * If the output is optional, you can indicate that it is omitted by
+ * using @{Link ANeuralNetworks_setOutput} instead, passing nullptr for buffer
+ * and 0 for length.
+ *
+ * <p>The provided memory must outlive the execution.</p>
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be modified.
+ * @param index The index of the output argument we are setting. It is
+ * an index into the lists passed to
+ * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ * the index associated with {@link ANeuralNetworksModel_addOperand}.
+ * @param type The type of the operand. This can be used to specify the
+ * dimensions that were set to 0 when the operand was added to the
+ * model. All other values must be the same as specified in the
+ * model. If the type is the same as specified when the model
+ * was built, NULL can be passed.
+ * @param memory The memory where the data is to be stored.
+ * @param offset This specifies the location of the data whithin the memory.
+ * The offset is in bytes from the start of memory.
+ * @param length The length in bytes of the data value.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if the
+ * name is not recognized or the buffer is too small for the output.
+ */
+int ANeuralNetworksExecution_setOutputFromMemory(ANeuralNetworksExecution* execution, int32_t index,
+ const ANeuralNetworksOperandType* type,
+ const ANeuralNetworksMemory* memory, size_t offset,
+ size_t length);
+
+/**
+ * Schedule evaluation of the execution.
+ *
+ * <p>Schedules evaluation of the execution. Once the model has been
+ * applied and the outputs are ready to be consumed, the returned event will be
+ * signaled. Use {@link ANeuralNetworksEvent_wait} to wait for that event.
+ * </p>
+ *
+ * Multiple executions can be scheduled and evaluated concurrently. The
+ * runtime makes no guarantee on the ordering of completion of
+ * executions. If it's important to the application, the application
+ * should enforce the ordering by using
+ * {@link ANeuralNetworksEvent_wait}.
+ *
+ * ANeuralNetworksEvent_wait must be called to recuperate the resources used
+ * by the execution.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be scheduled and executed.
+ * @param event The event that will be signaled on completion. event is set to
+ * NULL if there's an error.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution* execution,
+ ANeuralNetworksEvent** event);
+
+/**
+ * Waits until the execution completes.
+ *
+ * More than one thread can wait on an event. When the execution completes,
+ * all threads will be released.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally.
+ */
+int ANeuralNetworksEvent_wait(ANeuralNetworksEvent* event);
+
+/**
+ * Destroys the event.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ */
+void ANeuralNetworksEvent_free(ANeuralNetworksEvent* event);
+
+__END_DECLS
+
+#endif // __ANDROID_API__ >= 27
+
+#endif // ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_H
+
+/** @} */
diff --git a/compiler/ann-ref/.FORMATDENY b/compiler/ann-ref/.FORMATDENY
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/ann-ref/.FORMATDENY
diff --git a/compiler/ann-ref/CMakeLists.txt b/compiler/ann-ref/CMakeLists.txt
new file mode 100644
index 000000000..0f3822514
--- /dev/null
+++ b/compiler/ann-ref/CMakeLists.txt
@@ -0,0 +1,32 @@
+nnas_find_package(Eigen QUIET)
+
+if(NOT Eigen_FOUND)
+ return()
+endif(NOT Eigen_FOUND)
+
+nnas_find_package(GEMMLowp QUIET)
+
+if(NOT GEMMLowp_FOUND)
+ return()
+endif(NOT GEMMLowp_FOUND)
+
+nnas_include(TargetRequire)
+
+TargetRequire_Assert(ann_api eigen gemmlowp)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+function(ann_ref_configure TARGET)
+ target_include_directories(${TARGET} PRIVATE src)
+ target_link_libraries(${TARGET} PRIVATE ann_api)
+ target_link_libraries(${TARGET} PRIVATE eigen)
+ target_link_libraries(${TARGET} PRIVATE gemmlowp)
+endfunction(ann_ref_configure)
+
+add_library(ann_ref_static STATIC ${SOURCES})
+set_target_properties(ann_ref_static PROPERTIES POSITION_INDEPENDENT_CODE ON)
+ann_ref_configure(ann_ref_static)
+
+add_library(ann_ref_shared SHARED ${SOURCES})
+set_target_properties(ann_ref_shared PROPERTIES OUTPUT_NAME neuralnetworks)
+ann_ref_configure(ann_ref_shared)
diff --git a/compiler/ann-ref/README.md b/compiler/ann-ref/README.md
new file mode 100644
index 000000000..6b13b5fdd
--- /dev/null
+++ b/compiler/ann-ref/README.md
@@ -0,0 +1,7 @@
+# ann-ref
+
+_ann-ref_ is a reference Android NN API implementation for Linux.
+
+**DISCLAIMER**
+
+_ann-ref_ is incomplete in terms of its functionalities.
diff --git a/compiler/ann-ref/requires.cmake b/compiler/ann-ref/requires.cmake
new file mode 100644
index 000000000..b6b647600
--- /dev/null
+++ b/compiler/ann-ref/requires.cmake
@@ -0,0 +1 @@
+require("ann-api")
diff --git a/compiler/ann-ref/src/Assert.h b/compiler/ann-ref/src/Assert.h
new file mode 100644
index 000000000..744305607
--- /dev/null
+++ b/compiler/ann-ref/src/Assert.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ASSERT_H__
+#define __ASSERT_H__
+
+#include "Logging.h"
+
+// Assert macro, as Android does not generally support assert.
+#define ASSERT(v) \
+ do \
+ { \
+ if (!(v)) \
+ { \
+ LOG(ERROR) << "'" << #v << "' failed at " << __FILE__ << ":" << __LINE__ << "'\n"; \
+ abort(); \
+ } \
+ } while (0)
+
+#endif // __ASSERT_H__
diff --git a/compiler/ann-ref/src/CompilationBuilder.cpp b/compiler/ann-ref/src/CompilationBuilder.cpp
new file mode 100644
index 000000000..a14dbc1b6
--- /dev/null
+++ b/compiler/ann-ref/src/CompilationBuilder.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExecutionBuilder.h"
+#include "CompilationBuilder.h"
+
+#include "Logging.h"
+
+CompilationBuilder::CompilationBuilder(const ModelBuilder *model) : mModel(model)
+{
+ VLOG(COMPILATION) << "CompilationBuilder::CompilationBuilder";
+}
+
+int CompilationBuilder::finish()
+{
+ if (mFinished)
+ {
+ LOG(ERROR) << "ANeuralNetworksCompilation_finish called more than once";
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+ // TODO validate the rest
+
+ mFinished = true;
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int CompilationBuilder::createExecution(ExecutionBuilder **execution)
+{
+ if (!mFinished)
+ {
+ LOG(ERROR) << "ANeuralNetworksExecution_create passed an unfinished compilation";
+ *execution = nullptr;
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+ *execution = new ExecutionBuilder(mModel);
+ return (*execution ? ANEURALNETWORKS_NO_ERROR : ANEURALNETWORKS_OUT_OF_MEMORY);
+}
diff --git a/compiler/ann-ref/src/CompilationBuilder.h b/compiler/ann-ref/src/CompilationBuilder.h
new file mode 100644
index 000000000..92c1ab4bf
--- /dev/null
+++ b/compiler/ann-ref/src/CompilationBuilder.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COMPILATION_BUILDER_H__
+#define __COMPILATION_BUILDER_H__
+
+#include "NeuralNetworks.h"
+
+class ModelBuilder;
+class ExecutionBuilder;
+
+class CompilationBuilder
+{
+public:
+ CompilationBuilder(const ModelBuilder *model);
+
+public:
+ int finish();
+
+ int createExecution(ExecutionBuilder **execution);
+
+private:
+ const ModelBuilder *mModel;
+
+ // Once the compilation has been finished, we should not allow further
+ // modifications to the compilation.
+ bool mFinished = false;
+};
+
+#endif // __COMPILATION_BUILDER_H__
diff --git a/compiler/ann-ref/src/ExecutionBuilder.cpp b/compiler/ann-ref/src/ExecutionBuilder.cpp
new file mode 100644
index 000000000..9df78bfc3
--- /dev/null
+++ b/compiler/ann-ref/src/ExecutionBuilder.cpp
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExecutionBuilder.h"
+#include "CompilationBuilder.h"
+#include "ModelBuilder.h"
+
+#include "Executor.h"
+
+#include "Logging.h"
+#include "Validation.h"
+
+static void setRequestArgumentArray(const std::vector<ModelArgumentInfo> &argumentInfos,
+ std::vector<RequestArgument> *ioInfos)
+{
+ size_t count = argumentInfos.size();
+ ioInfos->resize(count);
+ for (size_t i = 0; i < count; i++)
+ {
+ const auto &info = argumentInfos[i];
+ (*ioInfos)[i] = {
+ .hasNoValue = info.state == ModelArgumentInfo::HAS_NO_VALUE,
+ .location = info.locationAndLength,
+ .dimensions = info.dimensions,
+ };
+ }
+}
+
+bool setRunTimePoolInfosFromMemories(std::vector<RunTimePoolInfo> *poolInfos,
+ const std::vector<uint8_t *> &pools)
+{
+ poolInfos->resize(pools.size());
+ for (size_t i = 0; i < pools.size(); i++)
+ {
+ auto &poolInfo = (*poolInfos)[i];
+ if (!poolInfo.set(pools[i]))
+ {
+ LOG(ERROR) << "Could not map pool";
+ return false;
+ }
+ }
+ return true;
+}
+
+ExecutionBuilder::ExecutionBuilder(const ModelBuilder *model)
+ : mModel(model), mInputs(mModel->inputCount()), mOutputs(mModel->outputCount())
+{
+ VLOG(EXECUTION) << "ExecutionBuilder::ExecutionBuilder";
+}
+
+int ExecutionBuilder::setInput(uint32_t index, const ANeuralNetworksOperandType *type,
+ const void *buffer, size_t length)
+{
+ uint32_t count = static_cast<uint32_t>(mInputs.size());
+ if (index >= count)
+ {
+ LOG(ERROR) << "ANeuralNetworksExecution_setInput bad index " << index << " " << count;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ if (type != nullptr)
+ {
+ int n = validateOperandType(*type, "ANeuralNetworksExecution_setInput", false);
+ if (n != ANEURALNETWORKS_NO_ERROR)
+ {
+ return n;
+ }
+ }
+ if (length > 0xFFFFFFFF)
+ {
+ LOG(ERROR) << "ANeuralNetworksExecution_setInput input exceeds max length " << length;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ uint32_t l = static_cast<uint32_t>(length);
+ return mInputs[index].setFromPointer(mModel->getInputOperand(index), type,
+ const_cast<void *>(buffer), l);
+}
+
+int ExecutionBuilder::setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType *type,
+ const Memory *memory, size_t offset, size_t length)
+{
+ uint32_t count = static_cast<uint32_t>(mInputs.size());
+ if (index >= count)
+ {
+ LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory bad index " << index << " " << count;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ if (!memory->validateSize(offset, length))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ uint32_t poolIndex = mMemories.add(memory);
+ return mInputs[index].setFromMemory(mModel->getInputOperand(index), type, poolIndex, offset,
+ length);
+}
+
+int ExecutionBuilder::setOutput(uint32_t index, const ANeuralNetworksOperandType *type,
+ void *buffer, size_t length)
+{
+ uint32_t count = static_cast<uint32_t>(mOutputs.size());
+ if (index >= count)
+ {
+ LOG(ERROR) << "ANeuralNetworksExecution_setOutput bad index " << index << " " << count;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ if (type != nullptr)
+ {
+ int n = validateOperandType(*type, "ANeuralNetworksExecution_setOutput", false);
+ if (n != ANEURALNETWORKS_NO_ERROR)
+ {
+ return n;
+ }
+ }
+ if (length > 0xFFFFFFFF)
+ {
+ LOG(ERROR) << "ANeuralNetworksExecution_setOutput input exceeds max length " << length;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ uint32_t l = static_cast<uint32_t>(length);
+ return mOutputs[index].setFromPointer(mModel->getOutputOperand(index), type, buffer, l);
+}
+
+int ExecutionBuilder::setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType *type,
+ const Memory *memory, size_t offset, size_t length)
+{
+ // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory()
+
+ uint32_t count = static_cast<uint32_t>(mOutputs.size());
+ if (index >= count)
+ {
+ LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory bad index " << index << " "
+ << count;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ if (!memory->validateSize(offset, length))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ // TODO validate the rest
+ uint32_t poolIndex = mMemories.add(memory);
+ return mOutputs[index].setFromMemory(mModel->getOutputOperand(index), type, poolIndex, offset,
+ length);
+}
+
+int ExecutionBuilder::startCompute(void)
+{
+ Model model;
+ mModel->publish(&model);
+
+ // modelPoolInfo holds the infomation of pre-allocated memory pools during model construction
+ std::vector<RunTimePoolInfo> modelPoolInfos;
+ if (!setRunTimePoolInfosFromMemories(&modelPoolInfos, model.pools))
+ {
+ return ANEURALNETWORKS_UNMAPPABLE;
+ }
+
+ std::vector<RunTimePoolInfo> requestPoolInfos;
+ uint32_t count = mMemories.size();
+ requestPoolInfos.resize(count);
+ // Create as many pools as there are input / output
+ auto fixPointerArguments = [&requestPoolInfos](std::vector<ModelArgumentInfo> &argumentInfos) {
+ for (ModelArgumentInfo &argumentInfo : argumentInfos)
+ {
+ if (argumentInfo.state == ModelArgumentInfo::POINTER)
+ {
+ RunTimePoolInfo runTimeInfo;
+ runTimeInfo.buffer = static_cast<uint8_t *>(argumentInfo.buffer);
+ argumentInfo.locationAndLength.poolIndex = static_cast<uint32_t>(requestPoolInfos.size());
+ argumentInfo.locationAndLength.offset = 0;
+ requestPoolInfos.push_back(runTimeInfo);
+ }
+ }
+ };
+ fixPointerArguments(mInputs);
+ fixPointerArguments(mOutputs);
+
+ Request request;
+ setRequestArgumentArray(mInputs, &request.inputs);
+ setRequestArgumentArray(mOutputs, &request.outputs);
+
+ Executor executor;
+ return executor.run(model, request, modelPoolInfos, requestPoolInfos);
+}
diff --git a/compiler/ann-ref/src/ExecutionBuilder.h b/compiler/ann-ref/src/ExecutionBuilder.h
new file mode 100644
index 000000000..0bf5ef755
--- /dev/null
+++ b/compiler/ann-ref/src/ExecutionBuilder.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __EXECUTION_BUILDER_H__
+#define __EXECUTION_BUILDER_H__
+
+#include "NeuralNetworks.h"
+
+#include "ModelBuilder.h"
+#include "ModelArgumentInfo.h"
+
+#include "Memory.h"
+
+#include <vector>
+
+class ModelBuilder;
+
+class ExecutionBuilder
+{
+public:
+ ExecutionBuilder(const ModelBuilder *);
+
+public:
+ int setInput(uint32_t index, const ANeuralNetworksOperandType *type, const void *buffer,
+ size_t length);
+ int setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType *type,
+ const Memory *memory, size_t offset, size_t length);
+
+public:
+ int setOutput(uint32_t index, const ANeuralNetworksOperandType *type, void *buffer,
+ size_t length);
+ int setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType *type,
+ const Memory *memory, size_t offset, size_t length);
+
+public:
+ int startCompute(void);
+
+private:
+ const ModelBuilder *mModel;
+
+private:
+ // The information we'll send to the driver about the inputs and outputs.
+ // Note that we build this in two steps:
+ // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element.
+ // If set from a pointer, don't set the location in the RequestArgument but store it
+ // instead in mInputBuffers or mOutputBuffers.
+ // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for
+ // the m*Buffers entries. Copy the input values into the shared memory.
+ // We do this to avoid creating a lot of shared memory objects if we have a lot of
+ // parameters specified via pointers. We also avoid copying in the case where
+ // some of the nodes will interpreted on the CPU anyway.
+ std::vector<ModelArgumentInfo> mInputs;
+ std::vector<ModelArgumentInfo> mOutputs;
+
+private:
+ MemoryTracker mMemories;
+};
+
+#endif // __EXECUTION_BUILDER_H__
diff --git a/compiler/ann-ref/src/Executor.cpp b/compiler/ann-ref/src/Executor.cpp
new file mode 100644
index 000000000..888fc9c81
--- /dev/null
+++ b/compiler/ann-ref/src/Executor.cpp
@@ -0,0 +1,814 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Executor.h"
+
+#include "NeuralNetworks.h"
+#include "Shape.h"
+
+#include "ops/Add.h"
+#include "ops/Add.float.h"
+#include "ops/Conv2D.h"
+#include "ops/Conv2D.float.h"
+#include "ops/DepthwiseConv2D.h"
+#include "ops/DepthwiseConv2D.float.h"
+#include "ops/AvgPool2D.h"
+#include "ops/AvgPool2D.float.h"
+#include "ops/MaxPool2D.h"
+#include "ops/MaxPool2D.float.h"
+#include "ops/Mul.h"
+#include "ops/Mul.float.h"
+#include "ops/ReLU.h"
+#include "ops/ReLU.float.h"
+#include "ops/ReLU6.h"
+#include "ops/ReLU6.float.h"
+#include "ops/Concatenation.h"
+#include "ops/Concatenation.float.h"
+#include "ops/Reshape.h"
+#include "ops/Softmax.h"
+#include "ops/Softmax.float.h"
+#include "ops/FullyConnected.h"
+#include "ops/FullyConnected.float.h"
+#include "ops/Pad.h"
+#include "ops/Sub.h"
+#include "ops/Sub.float.h"
+#include "ops/Div.h"
+#include "ops/Div.float.h"
+
+#include "Logging.h"
+#include "Assert.h"
+
+enum PaddingScheme
+{
+ kPaddingUnknown = 0,
+ kPaddingSame = 1,
+ kPaddingValid = 2,
+};
+
+inline void calculateExplicitPadding(int32_t in_size, int32_t stride, int32_t filter_size,
+ int32_t padding_implicit, int32_t *padding_head,
+ int32_t *padding_tail)
+{
+ *padding_head = 0;
+ *padding_tail = 0;
+
+ if (padding_implicit == kPaddingSame)
+ {
+ int32_t out_size = (in_size + stride - 1) / stride;
+ int32_t tmp = (out_size - 1) * stride + filter_size;
+ if (tmp > in_size)
+ {
+ *padding_head = (tmp - in_size) / 2;
+ *padding_tail = (tmp - in_size) - *padding_head;
+ }
+ }
+}
+
+template <typename T> static inline T getScalarData(const RunTimeOperandInfo &info)
+{
+ // TODO: Check buffer is at least as long as size of data.
+ T *data = reinterpret_cast<T *>(info.buffer);
+ return data[0];
+}
+
+// Updates the RunTimeOperandInfo with the newly calculated shape.
+// Allocate the buffer if we need to.
+static bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo *info, const Shape &shape)
+{
+ // For user-provided model output operands, the parameters must match the Shape
+ // calculated from the preparation step.
+ if (info->lifetime == OperandLifeTime::MODEL_OUTPUT)
+ {
+ if (info->type != shape.type || info->dimensions != shape.dimensions)
+ {
+ LOG(ERROR) << "Invalid type or dimensions for model output";
+ return false;
+ }
+ if (info->type == OperandType::TENSOR_QUANT8_ASYMM &&
+ (info->scale != shape.scale || info->zeroPoint != shape.offset))
+ {
+ LOG(ERROR) << "Invalid scale or zeroPoint for model output";
+ return false;
+ }
+ }
+ info->type = shape.type;
+ info->dimensions = shape.dimensions;
+ info->scale = shape.scale;
+ info->zeroPoint = shape.offset;
+ if (info->lifetime == OperandLifeTime::TEMPORARY_VARIABLE && info->buffer == nullptr)
+ {
+ uint32_t length = sizeOfData(info->type, info->dimensions);
+ info->buffer = new uint8_t[length];
+ if (info->buffer == nullptr)
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+// Ignore the .pools entry in model and request. This will have been taken care of
+// by the caller.
+int Executor::run(const Model &model, const Request &request,
+ const std::vector<RunTimePoolInfo> &modelPoolInfos,
+ const std::vector<RunTimePoolInfo> &requestPoolInfos)
+{
+ VLOG(CPUEXE) << "Executor::run()";
+
+ mModel = &model;
+ mRequest = &request; // TODO check if mRequest is needed
+ initializeRunTimeInfo(modelPoolInfos, requestPoolInfos);
+ // The model has serialized the operation in execution order.
+ for (const auto &operation : model.operations)
+ {
+ int n = executeOperation(operation);
+ if (n != ANEURALNETWORKS_NO_ERROR)
+ {
+ return n;
+ }
+ }
+ mModel = nullptr;
+ mRequest = nullptr;
+ VLOG(CPUEXE) << "Completed run normally";
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+bool Executor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo> &modelPoolInfos,
+ const std::vector<RunTimePoolInfo> &requestPoolInfos)
+{
+ VLOG(CPUEXE) << "Executor::initializeRunTimeInfo";
+ const size_t count = mModel->operands.size();
+ mOperands.resize(count);
+
+ // Start by setting the runtime info to what's in the model.
+ for (size_t i = 0; i < count; i++)
+ {
+ const Operand &from = mModel->operands[i];
+ RunTimeOperandInfo &to = mOperands[i];
+ to.type = from.type;
+ to.dimensions = from.dimensions;
+ to.scale = from.scale;
+ to.zeroPoint = from.zeroPoint;
+ to.length = from.location.length;
+ to.lifetime = from.lifetime;
+ switch (from.lifetime)
+ {
+ case OperandLifeTime::TEMPORARY_VARIABLE:
+ to.buffer = nullptr;
+ to.numberOfUsesLeft = from.numberOfConsumers;
+ break;
+ case OperandLifeTime::CONSTANT_COPY:
+ to.buffer = const_cast<uint8_t *>(&mModel->operandValues[from.location.offset]);
+ to.numberOfUsesLeft = 0;
+ break;
+ case OperandLifeTime::CONSTANT_REFERENCE:
+ {
+ auto poolIndex = from.location.poolIndex;
+ ASSERT(poolIndex < modelPoolInfos.size());
+ auto &r = modelPoolInfos[poolIndex];
+ to.buffer = r.buffer + from.location.offset;
+ to.numberOfUsesLeft = 0;
+ break;
+ }
+ case OperandLifeTime::MODEL_INPUT:
+ case OperandLifeTime::MODEL_OUTPUT:
+ case OperandLifeTime::NO_VALUE:
+ to.buffer = nullptr;
+ to.numberOfUsesLeft = 0;
+ break;
+ default:
+ ASSERT(false);
+ break;
+ }
+ }
+
+ // Adjust the runtime info for the arguments passed to the model,
+ // modifying the buffer location, and possibly the dimensions.
+ auto updateForArguments = [this, &requestPoolInfos](const std::vector<uint32_t> &indexes,
+ const std::vector<RequestArgument> &arguments) {
+ ASSERT(indexes.size() == arguments.size());
+ for (size_t i = 0; i < indexes.size(); i++)
+ {
+ const uint32_t operandIndex = indexes[i];
+ const RequestArgument &from = arguments[i];
+ RunTimeOperandInfo &to = mOperands[operandIndex];
+ if (from.dimensions.size() > 0)
+ {
+ // It's the responsibility of the caller to validate that
+ // from.dimensions only modifies the dimensions that were
+ // unspecified in the model. That's the case in SampleDriver.cpp
+ // with the call to validateRequest().
+ // TODO make sure that's the case for the default CPU path.
+ to.dimensions = from.dimensions;
+ }
+ if (from.hasNoValue)
+ {
+ to.lifetime = OperandLifeTime::NO_VALUE;
+ ASSERT(to.buffer == nullptr);
+ }
+ else
+ {
+ auto poolIndex = from.location.poolIndex;
+ ASSERT(poolIndex < requestPoolInfos.size());
+ auto &r = requestPoolInfos[poolIndex];
+ to.buffer = r.buffer + from.location.offset;
+ }
+ }
+ };
+ updateForArguments(mModel->inputIndexes, mRequest->inputs);
+ updateForArguments(mModel->outputIndexes, mRequest->outputs);
+
+ return true;
+}
+
+void Executor::freeNoLongerUsedOperands(const std::vector<uint32_t> &inputs)
+{
+ for (uint32_t i : inputs)
+ {
+ auto &info = mOperands[i];
+ // Check if it's a static or model input/output.
+ if (info.numberOfUsesLeft == 0)
+ {
+ continue;
+ }
+ info.numberOfUsesLeft--;
+ if (info.numberOfUsesLeft == 0)
+ {
+ ASSERT(info.buffer != nullptr);
+ delete[] info.buffer;
+ info.buffer = nullptr;
+ }
+ }
+}
+
+int Executor::executeOperation(const Operation &operation)
+{
+ const std::vector<uint32_t> &ins = operation.inputs;
+ const std::vector<uint32_t> &outs = operation.outputs;
+ bool success = false;
+
+ // Function to verify that the number of input and output parameters
+ // matches what is expected. Also checks that all the parameters have
+ // values. This function is to be used only for operations that do not
+ // accept optional arguments.
+ // TODO Have a version that works for optional arguments.
+ auto allParametersPresent = [&operation, &ins, &outs, this](size_t requiredIns,
+ size_t requiredOuts) -> bool {
+ auto verify = [&operation, this](size_t requiredCount, const std::vector<uint32_t> &indexes,
+ const char *type) -> bool {
+ size_t actualCount = indexes.size();
+ if (actualCount != requiredCount)
+ {
+ LOG(ERROR) << getOperationName(operation.type) << ": Invalid number of " << type
+ << " operands. Got " << actualCount << " of " << requiredCount;
+ return false;
+ }
+ for (size_t i = 0; i < actualCount; i++)
+ {
+ if (mOperands[indexes[i]].lifetime == OperandLifeTime::NO_VALUE)
+ {
+ LOG(ERROR) << getOperationName(operation.type) << " " << type << " operand " << i
+ << " is required but missing.";
+ return false;
+ }
+ }
+ return true;
+ };
+ return verify(requiredIns, ins, "in") && verify(requiredOuts, outs, "out");
+ };
+
+ switch (operation.type)
+ {
+ case OperationType::ADD:
+ {
+ if (!allParametersPresent(3, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &in1 = mOperands[ins[0]];
+ const RunTimeOperandInfo &in2 = mOperands[ins[1]];
+ int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
+
+ RunTimeOperandInfo &out = mOperands[outs[0]];
+ Shape outShape = out.shape();
+
+ ASSERT(in1.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = addPrepare(in1.shape(), in2.shape(), &outShape) &&
+ setInfoAndAllocateIfNeeded(&out, outShape) &&
+ addFloat32(reinterpret_cast<const float *>(in1.buffer), in1.shape(),
+ reinterpret_cast<const float *>(in2.buffer), in2.shape(), activation,
+ reinterpret_cast<float *>(out.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::DEPTHWISE_CONV_2D:
+ {
+ const size_t inCount = ins.size();
+ if ((inCount != 11 && inCount != 8) || !allParametersPresent(inCount, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &input = mOperands[ins[0]];
+ const RunTimeOperandInfo &filter = mOperands[ins[1]];
+ const RunTimeOperandInfo &bias = mOperands[ins[2]];
+
+ int32_t padding_left, padding_right;
+ int32_t padding_top, padding_bottom;
+ int32_t stride_width, stride_height;
+ int32_t depth_multiplier;
+ int32_t activation;
+
+ if (inCount == 11)
+ {
+ padding_left = getScalarData<int32_t>(mOperands[ins[3]]);
+ padding_right = getScalarData<int32_t>(mOperands[ins[4]]);
+ padding_top = getScalarData<int32_t>(mOperands[ins[5]]);
+ padding_bottom = getScalarData<int32_t>(mOperands[ins[6]]);
+ stride_width = getScalarData<int32_t>(mOperands[ins[7]]);
+ stride_height = getScalarData<int32_t>(mOperands[ins[8]]);
+ depth_multiplier = getScalarData<int32_t>(mOperands[ins[9]]);
+ activation = getScalarData<int32_t>(mOperands[ins[10]]);
+ }
+ else
+ {
+ int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
+ stride_width = getScalarData<int32_t>(mOperands[ins[4]]);
+ stride_height = getScalarData<int32_t>(mOperands[ins[5]]);
+ depth_multiplier = getScalarData<int32_t>(mOperands[ins[6]]);
+ activation = getScalarData<int32_t>(mOperands[ins[7]]);
+
+ Shape inputShape = input.shape();
+ Shape filterShape = filter.shape();
+ int32_t input_width = getSizeOfDimension(inputShape, 2);
+ int32_t input_height = getSizeOfDimension(inputShape, 1);
+ int32_t filter_width = getSizeOfDimension(filterShape, 2);
+ int32_t filter_height = getSizeOfDimension(filterShape, 1);
+ calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit,
+ &padding_left, &padding_right);
+ calculateExplicitPadding(input_height, stride_height, filter_height, padding_implicit,
+ &padding_top, &padding_bottom);
+ }
+
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ ASSERT(input.type == OperandType::TENSOR_FLOAT32);
+ {
+ success =
+ depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(), padding_left,
+ padding_right, padding_top, padding_bottom, stride_width,
+ stride_height, &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ depthwiseConvFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(),
+ reinterpret_cast<const float *>(filter.buffer), filter.shape(),
+ reinterpret_cast<const float *>(bias.buffer), bias.shape(), padding_left,
+ padding_right, padding_top, padding_bottom, stride_width, stride_height,
+ depth_multiplier, activation, reinterpret_cast<float *>(output.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::CONV_2D:
+ {
+ const size_t inCount = ins.size();
+ if ((inCount != 10 && inCount != 7) || !allParametersPresent(inCount, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &input = mOperands[ins[0]];
+ const RunTimeOperandInfo &filter = mOperands[ins[1]];
+ const RunTimeOperandInfo &bias = mOperands[ins[2]];
+
+ int32_t padding_left, padding_right;
+ int32_t padding_top, padding_bottom;
+ int32_t stride_width, stride_height;
+ int32_t activation;
+
+ if (inCount == 10)
+ {
+ padding_left = getScalarData<int32_t>(mOperands[ins[3]]);
+ padding_right = getScalarData<int32_t>(mOperands[ins[4]]);
+ padding_top = getScalarData<int32_t>(mOperands[ins[5]]);
+ padding_bottom = getScalarData<int32_t>(mOperands[ins[6]]);
+ stride_width = getScalarData<int32_t>(mOperands[ins[7]]);
+ stride_height = getScalarData<int32_t>(mOperands[ins[8]]);
+ activation = getScalarData<int32_t>(mOperands[ins[9]]);
+ }
+ else
+ {
+ int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
+ stride_width = getScalarData<int32_t>(mOperands[ins[4]]);
+ stride_height = getScalarData<int32_t>(mOperands[ins[5]]);
+ activation = getScalarData<int32_t>(mOperands[ins[6]]);
+
+ Shape inputShape = input.shape();
+ Shape filterShape = filter.shape();
+ int32_t input_width = getSizeOfDimension(inputShape, 2);
+ int32_t input_height = getSizeOfDimension(inputShape, 1);
+ int32_t filter_width = getSizeOfDimension(filterShape, 2);
+ int32_t filter_height = getSizeOfDimension(filterShape, 1);
+ calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit,
+ &padding_left, &padding_right);
+ calculateExplicitPadding(input_height, stride_height, filter_height, padding_implicit,
+ &padding_top, &padding_bottom);
+ }
+
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ ASSERT(input.type == OperandType::TENSOR_FLOAT32);
+ {
+ success =
+ convPrepare(input.shape(), filter.shape(), bias.shape(), padding_left, padding_right,
+ padding_top, padding_bottom, stride_width, stride_height, &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ convFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(),
+ reinterpret_cast<const float *>(filter.buffer), filter.shape(),
+ reinterpret_cast<const float *>(bias.buffer), bias.shape(), padding_left,
+ padding_right, padding_top, padding_bottom, stride_width, stride_height,
+ activation, reinterpret_cast<float *>(output.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::AVERAGE_POOL_2D:
+ {
+ const size_t inCount = ins.size();
+ if ((inCount != 10 && inCount != 7) || !allParametersPresent(inCount, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &input = mOperands[ins[0]];
+
+ int32_t padding_left, padding_right;
+ int32_t padding_top, padding_bottom;
+ int32_t stride_width, stride_height;
+ int32_t filter_width, filter_height;
+ int32_t activation;
+
+ if (inCount == 10)
+ {
+ padding_left = getScalarData<int32_t>(mOperands[ins[1]]);
+ padding_right = getScalarData<int32_t>(mOperands[ins[2]]);
+ padding_top = getScalarData<int32_t>(mOperands[ins[3]]);
+ padding_bottom = getScalarData<int32_t>(mOperands[ins[4]]);
+ stride_width = getScalarData<int32_t>(mOperands[ins[5]]);
+ stride_height = getScalarData<int32_t>(mOperands[ins[6]]);
+ filter_width = getScalarData<int32_t>(mOperands[ins[7]]);
+ filter_height = getScalarData<int32_t>(mOperands[ins[8]]);
+ activation = getScalarData<int32_t>(mOperands[ins[9]]);
+ }
+ else
+ {
+ int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
+ stride_width = getScalarData<int32_t>(mOperands[ins[2]]);
+ stride_height = getScalarData<int32_t>(mOperands[ins[3]]);
+ filter_width = getScalarData<int32_t>(mOperands[ins[4]]);
+ filter_height = getScalarData<int32_t>(mOperands[ins[5]]);
+ activation = getScalarData<int32_t>(mOperands[ins[6]]);
+
+ Shape inputShape = input.shape();
+ int32_t input_width = getSizeOfDimension(inputShape, 2);
+ int32_t input_height = getSizeOfDimension(inputShape, 1);
+ calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit,
+ &padding_left, &padding_right);
+ calculateExplicitPadding(input_height, stride_height, filter_height, padding_implicit,
+ &padding_top, &padding_bottom);
+ }
+
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ ASSERT(input.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = averagePoolPrepare(input.shape(), padding_left, padding_right, padding_top,
+ padding_bottom, stride_width, stride_height, filter_width,
+ filter_height, &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ averagePoolFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(), padding_left,
+ padding_right, padding_top, padding_bottom, stride_width, stride_height,
+ filter_width, filter_height, activation,
+ reinterpret_cast<float *>(output.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::MAX_POOL_2D:
+ {
+ const size_t inCount = ins.size();
+ if ((inCount != 10 && inCount != 7) || !allParametersPresent(inCount, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &input = mOperands[ins[0]];
+
+ int32_t padding_left, padding_right;
+ int32_t padding_top, padding_bottom;
+ int32_t stride_width, stride_height;
+ int32_t filter_width, filter_height;
+ int32_t activation;
+
+ if (inCount == 10)
+ {
+ padding_left = getScalarData<int32_t>(mOperands[ins[1]]);
+ padding_right = getScalarData<int32_t>(mOperands[ins[2]]);
+ padding_top = getScalarData<int32_t>(mOperands[ins[3]]);
+ padding_bottom = getScalarData<int32_t>(mOperands[ins[4]]);
+ stride_width = getScalarData<int32_t>(mOperands[ins[5]]);
+ stride_height = getScalarData<int32_t>(mOperands[ins[6]]);
+ filter_width = getScalarData<int32_t>(mOperands[ins[7]]);
+ filter_height = getScalarData<int32_t>(mOperands[ins[8]]);
+ activation = getScalarData<int32_t>(mOperands[ins[9]]);
+ }
+ else
+ {
+ int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
+ stride_width = getScalarData<int32_t>(mOperands[ins[2]]);
+ stride_height = getScalarData<int32_t>(mOperands[ins[3]]);
+ filter_width = getScalarData<int32_t>(mOperands[ins[4]]);
+ filter_height = getScalarData<int32_t>(mOperands[ins[5]]);
+ activation = getScalarData<int32_t>(mOperands[ins[6]]);
+
+ Shape inputShape = input.shape();
+ int32_t input_width = getSizeOfDimension(inputShape, 2);
+ int32_t input_height = getSizeOfDimension(inputShape, 1);
+ calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit,
+ &padding_left, &padding_right);
+ calculateExplicitPadding(input_height, stride_height, filter_height, padding_implicit,
+ &padding_top, &padding_bottom);
+ }
+
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ ASSERT(input.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = maxPoolPrepare(input.shape(), padding_left, padding_right, padding_top,
+ padding_bottom, stride_width, stride_height, filter_width,
+ filter_height, &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ maxPoolFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(), padding_left,
+ padding_right, padding_top, padding_bottom, stride_width, stride_height,
+ filter_width, filter_height, activation,
+ reinterpret_cast<float *>(output.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::MUL:
+ {
+ if (!allParametersPresent(3, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &in1 = mOperands[ins[0]];
+ const RunTimeOperandInfo &in2 = mOperands[ins[1]];
+ int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
+
+ RunTimeOperandInfo &out = mOperands[outs[0]];
+ Shape outShape = out.shape();
+
+ ASSERT(in1.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = mulPrepare(in1.shape(), in2.shape(), &outShape) &&
+ setInfoAndAllocateIfNeeded(&out, outShape) &&
+ mulFloat32(reinterpret_cast<const float *>(in1.buffer), in1.shape(),
+ reinterpret_cast<const float *>(in2.buffer), in2.shape(), activation,
+ reinterpret_cast<float *>(out.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::RELU:
+ {
+ if (!allParametersPresent(1, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &input = mOperands[ins[0]];
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ ASSERT(input.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = reluPrepare(input.shape(), &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ reluFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(),
+ reinterpret_cast<float *>(output.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::RELU6:
+ {
+ if (!allParametersPresent(1, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &input = mOperands[ins[0]];
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ ASSERT(input.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = relu6Prepare(input.shape(), &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ relu6Float32(reinterpret_cast<const float *>(input.buffer), input.shape(),
+ reinterpret_cast<float *>(output.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::SOFTMAX:
+ {
+ if (!allParametersPresent(2, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ RunTimeOperandInfo &input = mOperands[ins[0]];
+ float beta = getScalarData<float>(mOperands[ins[1]]);
+ if (beta <= 0.0f)
+ {
+ LOG(ERROR) << "beta must be positive for softmax";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ ASSERT(input.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = softmaxPrepare(input.shape(), &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ softmaxFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(), beta,
+ reinterpret_cast<float *>(output.buffer), output.shape());
+ }
+ }
+ break;
+ case OperationType::FULLY_CONNECTED:
+ {
+ if (!allParametersPresent(4, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ RunTimeOperandInfo &input = mOperands[ins[0]];
+ RunTimeOperandInfo &weights = mOperands[ins[1]];
+ RunTimeOperandInfo &bias = mOperands[ins[2]];
+
+ int32_t activation = getScalarData<int32_t>(mOperands[ins[3]]);
+
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ ASSERT(input.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(), &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ fullyConnectedFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(),
+ reinterpret_cast<const float *>(weights.buffer), weights.shape(),
+ reinterpret_cast<const float *>(bias.buffer), bias.shape(), activation,
+ reinterpret_cast<float *>(output.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::CONCATENATION:
+ {
+ if (outs.size() != 1 || ins.size() < 2)
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ int numInputTensors = ins.size() - 1;
+ int32_t axis = getScalarData<int32_t>(mOperands[ins[numInputTensors]]);
+
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ const RunTimeOperandInfo &firstInput = mOperands[ins[0]];
+ ASSERT(firstInput.type == OperandType::TENSOR_FLOAT32);
+ {
+ std::vector<Shape> inputShapes(numInputTensors);
+ std::vector<const float *> inputDataPtrs(numInputTensors);
+
+ for (int i = 0; i < numInputTensors; i++)
+ {
+ RunTimeOperandInfo &input = mOperands[ins[i]];
+ inputShapes[i] = input.shape();
+ inputDataPtrs[i] = reinterpret_cast<const float *>(input.buffer);
+ }
+ success = concatenationPrepare(inputShapes, axis, &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ concatenationFloat32(inputDataPtrs, inputShapes, axis, reinterpret_cast<float *>(output.buffer),
+ outShape);
+ }
+ }
+ break;
+ case OperationType::RESHAPE:
+ {
+ if (!allParametersPresent(2, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &input = mOperands[ins[0]];
+ const RunTimeOperandInfo &targetShape = mOperands[ins[1]];
+
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ success = reshapePrepare(input.shape(), reinterpret_cast<const int32_t *>(targetShape.buffer),
+ getNumberOfElements(targetShape.shape()), &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ reshapeGeneric(reinterpret_cast<const void *>(input.buffer), input.shape(),
+ reinterpret_cast<void *>(output.buffer), outShape);
+ }
+ break;
+ case OperationType::PAD:
+ {
+ if (!allParametersPresent(2, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo& input = mOperands[ins[0]];
+ const RunTimeOperandInfo& paddings = mOperands[ins[1]];
+
+ RunTimeOperandInfo& output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ success = padPrepare(input.shape(),
+ reinterpret_cast<const int32_t*>(paddings.buffer),
+ paddings.shape(),
+ &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ padGeneric(input.buffer,
+ input.shape(),
+ reinterpret_cast<const int32_t*>(paddings.buffer),
+ output.buffer,
+ outShape);
+ }
+ break;
+ case OperationType::SUB:
+ {
+ if (!allParametersPresent(3, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &in1 = mOperands[ins[0]];
+ const RunTimeOperandInfo &in2 = mOperands[ins[1]];
+ int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
+
+ RunTimeOperandInfo &out = mOperands[outs[0]];
+ Shape outShape = out.shape();
+
+ ASSERT(in1.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = subPrepare(in1.shape(), in2.shape(), &outShape) &&
+ setInfoAndAllocateIfNeeded(&out, outShape) &&
+ subFloat32(reinterpret_cast<const float *>(in1.buffer), in1.shape(),
+ reinterpret_cast<const float *>(in2.buffer), in2.shape(), activation,
+ reinterpret_cast<float *>(out.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::DIV:
+ {
+ if (!allParametersPresent(3, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &in1 = mOperands[ins[0]];
+ const RunTimeOperandInfo &in2 = mOperands[ins[1]];
+ int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
+
+ RunTimeOperandInfo &out = mOperands[outs[0]];
+ Shape outShape = out.shape();
+
+ ASSERT(in1.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = divPrepare(in1.shape(), in2.shape(), &outShape) &&
+ setInfoAndAllocateIfNeeded(&out, outShape) &&
+ divFloat32(reinterpret_cast<const float *>(in1.buffer), in1.shape(),
+ reinterpret_cast<const float *>(in2.buffer), in2.shape(), activation,
+ reinterpret_cast<float *>(out.buffer), outShape);
+ }
+ }
+ break;
+ default:
+ NYI(getOperationName(operation.type));
+ break;
+ }
+ if (!success)
+ {
+ LOG(ERROR) << getOperationName(operation.type) << " failed.";
+ return ANEURALNETWORKS_OP_FAILED;
+ }
+
+ freeNoLongerUsedOperands(ins);
+ return ANEURALNETWORKS_NO_ERROR;
+}
diff --git a/compiler/ann-ref/src/Executor.h b/compiler/ann-ref/src/Executor.h
new file mode 100644
index 000000000..66dcca116
--- /dev/null
+++ b/compiler/ann-ref/src/Executor.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __EXECUTOR_H__
+#define __EXECUTOR_H__
+
+#include "Model.h"
+
+#include "Shape.h"
+#include "Request.h"
+
+#include <vector>
+
+// Information we maintain about each operand during execution that
+// may change during execution.
+struct RunTimeOperandInfo
+{
+ // TODO Storing the type here is redundant, as it won't change during execution.
+ OperandType type;
+
+ // The type and dimensions of the operand. The dimensions can
+ // change at runtime. We include the type because it's useful
+ // to pass together with the dimension to the functions implementing
+ // the operators.
+ //
+ // Q: Is it possible??
+ std::vector<uint32_t> dimensions;
+ float scale;
+ int32_t zeroPoint;
+
+ // Where the operand's data is stored. Check the corresponding
+ // location information in the model to figure out if this points
+ // to memory we have allocated for an temporary operand.
+ uint8_t *buffer;
+ // The length of the buffer.
+ uint32_t length;
+
+ // Whether this is a temporary variable, a model input, a constant, etc.
+ OperandLifeTime lifetime;
+
+ // Keeps track of how many operations have yet to make use
+ // of this temporary variable. When the count is decremented to 0,
+ // we free the buffer. For non-temporary variables, this count is
+ // always 0.
+ uint32_t numberOfUsesLeft;
+
+ Shape shape() const
+ {
+ return Shape{.type = type, .dimensions = dimensions, .scale = scale, .offset = zeroPoint};
+ }
+};
+
+// Used to keep a pointer to each of the memory pools
+struct RunTimePoolInfo
+{
+ uint8_t *buffer;
+
+ bool set(uint8_t *m)
+ {
+ buffer = m;
+ return true;
+ }
+};
+
+// This class is used to execute a model on the CPU.
+class Executor
+{
+public:
+ // Executes the model. The results will be stored at the locations
+ // specified in the constructor.
+ // The model must outlive the executor. We prevent it from being modified
+ // while this is executing.
+ int run(const Model &model, const Request &request,
+ const std::vector<RunTimePoolInfo> &modelPoolInfos,
+ const std::vector<RunTimePoolInfo> &requestPoolInfos);
+
+private:
+ bool initializeRunTimeInfo(const std::vector<RunTimePoolInfo> &modelPoolInfos,
+ const std::vector<RunTimePoolInfo> &requestPoolInfos);
+ // Runs one operation of the graph.
+ int executeOperation(const Operation &entry);
+ // Decrement the usage count for the operands listed. Frees the memory
+ // allocated for any temporary variable with a count of zero.
+ void freeNoLongerUsedOperands(const std::vector<uint32_t> &inputs);
+
+ // The model and the request that we'll execute. Only valid while run()
+ // is being executed.
+ const Model *mModel = nullptr;
+ const Request *mRequest = nullptr;
+
+ // We're copying the list of all the dimensions from the model, as
+ // these may be modified when we run the operatins. Since we're
+ // making a full copy, the indexes used in the operand description
+ // stay valid.
+ // std::vector<uint32_t> mDimensions;
+ // Runtime information about all the operands.
+ std::vector<RunTimeOperandInfo> mOperands;
+};
+
+#endif // __CPU_EXECUTOR_H__
diff --git a/compiler/ann-ref/src/Logging.cpp b/compiler/ann-ref/src/Logging.cpp
new file mode 100644
index 000000000..4f849efaa
--- /dev/null
+++ b/compiler/ann-ref/src/Logging.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Logging.h"
+
+VLogging::VLogging()
+{
+ _enabled = false;
+}
+
+VLogging &VLogging::access()
+{
+ static VLogging instance;
+ return instance;
+}
+
+std::ostream &VLogging::stream() { return std::cout; }
diff --git a/compiler/ann-ref/src/Logging.h b/compiler/ann-ref/src/Logging.h
new file mode 100644
index 000000000..1f81ad6e3
--- /dev/null
+++ b/compiler/ann-ref/src/Logging.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOGGING_H__
+#define __LOGGING_H__
+
+#include <iostream>
+
+class VLogging
+{
+public:
+ static VLogging &access(void);
+ bool enabled() const { return _enabled; }
+ std::ostream &stream(void);
+
+private:
+ VLogging();
+
+private:
+ bool _enabled;
+};
+
+#define LOG(...) std::cout << std::endl
+#define VLOG(...) \
+ if (VLogging::access().enabled()) \
+ (VLogging::access().stream() << std::endl)
+#define NYI(module) std::cout << "NYI : '" << module << "' is not supported now." << std::endl;
+
+#endif // __LOGGING_H__
diff --git a/compiler/ann-ref/src/Macro.h b/compiler/ann-ref/src/Macro.h
new file mode 100644
index 000000000..829c15425
--- /dev/null
+++ b/compiler/ann-ref/src/Macro.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MACRO_H__
+#define __MACRO_H__
+
+#define COUNT(X) (sizeof(X) / sizeof(X[0]))
+
+#endif // __MACRO_H__
diff --git a/compiler/ann-ref/src/Memory.cpp b/compiler/ann-ref/src/Memory.cpp
new file mode 100644
index 000000000..fd70f8db7
--- /dev/null
+++ b/compiler/ann-ref/src/Memory.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "Memory"
+
+#include "Memory.h"
+#include "NeuralNetworks.h" // ANEURALNETWORKS_XXX
+
+#include <sys/mman.h>
+
+MappedMemory::~MappedMemory()
+{
+ if (_base)
+ {
+ munmap(_base, _size);
+ }
+}
+
+int MappedMemory::set(size_t size, int prot, int fd, size_t offset)
+{
+#if 0
+ if (fd < 0)
+ {
+ LOG(ERROR) << "ANeuralNetworksMemory_createFromFd invalid fd " << fd;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+ if (size == 0 || fd < 0)
+ {
+ LOG(ERROR) << "Invalid size or fd";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ int dupfd = dup(fd);
+ if (dupfd == -1)
+ {
+ LOG(ERROR) << "Failed to dup the fd";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+#endif
+ void * const base = mmap(nullptr, size, prot, MAP_PRIVATE, fd, offset);
+
+ if (base == MAP_FAILED)
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ _base = static_cast<uint8_t *>(base);
+ _size = size;
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int MappedMemory::getPointer(uint8_t **buffer) const
+{
+ *buffer = _base;
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+bool MappedMemory::validateSize(uint32_t offset, uint32_t length) const
+{
+ return true;
+}
+
+PrivateMemory::~PrivateMemory()
+{
+ if (_base)
+ {
+ delete[] _base;
+ }
+}
+
+int PrivateMemory::create(uint32_t size)
+{
+ auto base = new uint8_t[size];
+
+ // TODO Check allocation failure
+ _base = base;
+ _size = size;
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int PrivateMemory::getPointer(uint8_t **buffer) const
+{
+ *buffer = _base;
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+bool PrivateMemory::validateSize(uint32_t offset, uint32_t length) const
+{
+ return true;
+}
diff --git a/compiler/ann-ref/src/Memory.h b/compiler/ann-ref/src/Memory.h
new file mode 100644
index 000000000..648b5c7d1
--- /dev/null
+++ b/compiler/ann-ref/src/Memory.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEMORY_H__
+#define __MEMORY_H__
+
+#include <cstdint>
+#include <cstddef>
+
+// Represents a memory region.
+struct Memory
+{
+ Memory() = default;
+ virtual ~Memory() = default;
+
+ // Disallow copy semantics to ensure the runtime object can only be freed
+ // once. Copy semantics could be enabled if some sort of reference counting
+ // or deep-copy system for runtime objects is added later.
+ Memory(const Memory &) = delete;
+ Memory &operator=(const Memory &) = delete;
+
+ // Returns a pointer to the underlying memory of this memory object.
+ virtual int getPointer(uint8_t **buffer) const = 0;
+ virtual bool validateSize(uint32_t offset, uint32_t length) const = 0;
+};
+
+class MappedMemory final : public Memory
+{
+public:
+ MappedMemory() = default;
+
+public:
+ ~MappedMemory();
+
+public:
+ // Create the native_handle based on input size, prot, and fd.
+ // Existing native_handle will be deleted, and mHidlMemory will wrap
+ // the newly created native_handle.
+ int set(size_t size, int prot, int fd, size_t offset);
+
+public:
+ int getPointer(uint8_t **buffer) const override;
+ bool validateSize(uint32_t offset, uint32_t length) const override;
+
+private:
+ uint8_t *_base = nullptr;
+ size_t _size = 0;
+};
+
+// Represents a memory region.
+class AllocatedMemory : public Memory
+{
+public:
+ AllocatedMemory() = default;
+ virtual ~AllocatedMemory() = default;
+
+public:
+ virtual int create(uint32_t size) = 0;
+
+public:
+ // Returns a pointer to the underlying memory of this memory object.
+ virtual int getPointer(uint8_t **buffer) const = 0;
+ virtual bool validateSize(uint32_t offset, uint32_t length) const = 0;
+};
+
+class PrivateMemory final : public AllocatedMemory
+{
+public:
+ PrivateMemory() = default;
+ ~PrivateMemory();
+
+public:
+ // Disallow copy semantics to ensure the runtime object can only be freed
+ // once. Copy semantics could be enabled if some sort of reference counting
+ // or deep-copy system for runtime objects is added later.
+ PrivateMemory(const PrivateMemory &) = delete;
+ PrivateMemory &operator=(const PrivateMemory &) = delete;
+
+public:
+ virtual int create(uint32_t size);
+
+public:
+ // Returns a pointer to the underlying memory of this memory object.
+ virtual int getPointer(uint8_t **buffer) const;
+ virtual bool validateSize(uint32_t offset, uint32_t length) const;
+
+private:
+ uint8_t *_base = nullptr;
+ size_t _size = 0;
+};
+
+#endif // __MEMORY_H__
diff --git a/compiler/ann-ref/src/MemoryTracker.cpp b/compiler/ann-ref/src/MemoryTracker.cpp
new file mode 100644
index 000000000..3c65149c6
--- /dev/null
+++ b/compiler/ann-ref/src/MemoryTracker.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "Memory"
+
+#include "NeuralNetworks.h" // For ANEURALNETWORKS_...
+#include "MemoryTracker.h"
+
+#include "Logging.h"
+
+#include <unistd.h> // It's for 'close' and 'dup'
+ // TODO-NNRT : Remove this if included another header including this.
+
+uint32_t MemoryTracker::add(const Memory *memory)
+{
+ VLOG(MODEL) << __func__ << " for " << memory;
+ // See if we already have this memory. If so,
+ // return its index.
+ auto i = mKnown.find(memory);
+ if (i != mKnown.end())
+ {
+ return i->second;
+ }
+ VLOG(MODEL) << "It's new";
+ // It's a new one. Save it an assign an index to it.
+ size_t next = mKnown.size();
+ if (next > 0xFFFFFFFF)
+ {
+ LOG(ERROR) << "ANeuralNetworks more than 2^32 memories.";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ uint32_t idx = static_cast<uint32_t>(next);
+ mKnown[memory] = idx;
+ mMemories.push_back(memory);
+ return idx;
+}
diff --git a/compiler/ann-ref/src/MemoryTracker.h b/compiler/ann-ref/src/MemoryTracker.h
new file mode 100644
index 000000000..af687d183
--- /dev/null
+++ b/compiler/ann-ref/src/MemoryTracker.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEMORY_TRACKER_H__
+#define __MEMORY_TRACKER_H__
+
+#include "Memory.h"
+
+#include <vector>
+#include <unordered_map>
+
+// A utility class to accumulate mulitple Memory objects and assign each
+// a distinct index number, starting with 0.
+//
+// The user of this class is responsible for avoiding concurrent calls
+// to this class from multiple threads.
+class MemoryTracker
+{
+public:
+ // Adds the memory, if it does not already exists. Returns its index.
+ // The memories should survive the tracker.
+ uint32_t add(const Memory *memory);
+ // Returns the number of memories contained.
+ uint32_t size() const { return static_cast<uint32_t>(mKnown.size()); }
+ // Returns the ith memory.
+ const Memory *operator[](size_t i) const { return mMemories[i]; }
+
+private:
+ // The vector of Memory pointers we are building.
+ std::vector<const Memory *> mMemories;
+ // A faster way to see if we already have a memory than doing find().
+ std::unordered_map<const Memory *, uint32_t> mKnown;
+};
+
+#endif // __MEMORY_TRACKER_H__
diff --git a/compiler/ann-ref/src/Model.h b/compiler/ann-ref/src/Model.h
new file mode 100644
index 000000000..dc6a0d3c9
--- /dev/null
+++ b/compiler/ann-ref/src/Model.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MODEL_H__
+#define __MODEL_H__
+
+#include "Operand.h"
+#include "Operation.h"
+
+#include <cstdint>
+#include <vector>
+
+struct Model final {
+ std::vector<Operand> operands;
+ std::vector<Operation> operations;
+
+ std::vector<uint32_t> inputIndexes;
+ std::vector<uint32_t> outputIndexes;
+
+ std::vector<uint8_t> operandValues;
+
+ std::vector<uint8_t *> pools;
+};
+
+#endif // __MODEL_H__
diff --git a/compiler/ann-ref/src/ModelArgumentInfo.cpp b/compiler/ann-ref/src/ModelArgumentInfo.cpp
new file mode 100644
index 000000000..3c10cd0ea
--- /dev/null
+++ b/compiler/ann-ref/src/ModelArgumentInfo.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModelArgumentInfo.h"
+#include "NeuralNetworks.h" // For ANEURALNETWORKS_XXX
+#include "Logging.h"
+#include "Assert.h"
+
+// TODO-NNRT: Consider removing ModelArgumentInfo completely if it's not necessary
+int ModelArgumentInfo::setFromPointer(const Operand &operand,
+ const ANeuralNetworksOperandType *type, void *data,
+ uint32_t length)
+{
+ if ((data == nullptr) != (length == 0))
+ {
+ LOG(ERROR) << "Data pointer must be nullptr if and only if length is zero (data = " << data
+ << ", length = " << length << ")";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ if (data == nullptr)
+ {
+ state = ModelArgumentInfo::HAS_NO_VALUE;
+ }
+ else
+ {
+ int n = updateDimensionInfo(operand, type);
+ if (n != ANEURALNETWORKS_NO_ERROR)
+ {
+ return n;
+ }
+ uint32_t neededLength = sizeOfData(operand.type, dimensions);
+ if (neededLength != length)
+ {
+ LOG(ERROR) << "Setting argument with invalid length: " << length
+ << ", expected length: " << neededLength;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ state = ModelArgumentInfo::POINTER;
+ }
+ buffer = data;
+ locationAndLength = {.poolIndex = 0, .offset = 0, .length = length};
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ModelArgumentInfo::setFromMemory(const Operand &operand, const ANeuralNetworksOperandType *type,
+ uint32_t poolIndex, uint32_t offset, uint32_t length)
+{
+ int n = updateDimensionInfo(operand, type);
+ if (n != ANEURALNETWORKS_NO_ERROR)
+ {
+ return n;
+ }
+ uint32_t neededLength = sizeOfData(operand.type, dimensions);
+ if (neededLength != length)
+ {
+ LOG(ERROR) << "Setting argument with invalid length: " << length
+ << ", expected length: " << neededLength;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ state = ModelArgumentInfo::MEMORY;
+ locationAndLength = {.poolIndex = poolIndex, .offset = offset, .length = length};
+ buffer = nullptr;
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ModelArgumentInfo::updateDimensionInfo(const Operand &operand,
+ const ANeuralNetworksOperandType *newType)
+{
+ ASSERT(dimensions.empty());
+ if (newType == nullptr)
+ {
+ for (auto i : operand.dimensions)
+ {
+ if (i == 0)
+ {
+ LOG(ERROR) << "Setting input/output with unspecified dimensions";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+ dimensions = operand.dimensions;
+ }
+ else
+ {
+ uint32_t count = newType->dimensionCount;
+ if (static_cast<OperandType>(newType->type) != operand.type ||
+ count != operand.dimensions.size())
+ {
+ LOG(ERROR) << "Setting input/output with incompatible types";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ dimensions = std::vector<uint32_t>(count);
+ for (uint32_t i = 0; i < count; i++)
+ {
+ if (operand.dimensions[i] != 0 && operand.dimensions[i] != newType->dimensions[i])
+ {
+ LOG(ERROR) << "Overriding a fully specified dimension is disallowed";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ else
+ {
+ dimensions[i] = newType->dimensions[i];
+ }
+ }
+ }
+ return ANEURALNETWORKS_NO_ERROR;
+}
diff --git a/compiler/ann-ref/src/ModelArgumentInfo.h b/compiler/ann-ref/src/ModelArgumentInfo.h
new file mode 100644
index 000000000..5773da743
--- /dev/null
+++ b/compiler/ann-ref/src/ModelArgumentInfo.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MODEL_ARGUMENT_INFO_H__
+#define __MODEL_ARGUMENT_INFO_H__
+
+#include "NeuralNetworks.h"
+
+#include "Operand.h"
+
+#include <vector>
+
+struct ModelArgumentInfo
+{
+ // Whether the argument was specified as being in a Memory, as a pointer,
+ // has no value, or has not been specified.
+ // If POINTER then:
+ // locationAndLength.length is valid.
+ // dimensions is valid.
+ // buffer is valid
+ // If MEMORY then:
+ // locationAndLength.location.{poolIndex, offset, length} is valid.
+ // dimensions is valid.
+ enum
+ {
+ POINTER,
+ MEMORY,
+ HAS_NO_VALUE,
+ UNSPECIFIED
+ } state = UNSPECIFIED;
+
+ DataLocation locationAndLength;
+
+ std::vector<uint32_t> dimensions;
+ void *buffer;
+
+ int setFromPointer(const Operand &operand, const ANeuralNetworksOperandType *type, void *buffer,
+ uint32_t length);
+ int setFromMemory(const Operand &operand, const ANeuralNetworksOperandType *type,
+ uint32_t poolIndex, uint32_t offset, uint32_t length);
+ int updateDimensionInfo(const Operand &operand, const ANeuralNetworksOperandType *newType);
+};
+
+#endif // __MODEL_ARGUMENT_INFO_H__
diff --git a/compiler/ann-ref/src/ModelBuilder.cpp b/compiler/ann-ref/src/ModelBuilder.cpp
new file mode 100644
index 000000000..1f966bd2e
--- /dev/null
+++ b/compiler/ann-ref/src/ModelBuilder.cpp
@@ -0,0 +1,483 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModelBuilder.h"
+
+#include "CompilationBuilder.h"
+#include "Validation.h"
+#include "Logging.h"
+#include "Assert.h"
+
+#include <string.h>
+#include <map>
+
+static inline void setFromIntList(std::vector<uint32_t> *vec, uint32_t count, const uint32_t *data)
+{
+ vec->resize(count);
+ for (uint32_t i = 0; i < count; i++)
+ {
+ (*vec)[i] = data[i];
+ }
+}
+
+// Returns the number of padding bytes needed to align data of the
+// specified length. It aligns object of length:
+// 2, 3 on a 2 byte boundary,
+// 4+ on a 4 byte boundary.
+// We may want to have different alignments for tensors.
+// TODO: This is arbitrary, more a proof of concept. We need
+// to determine what this should be.
+uint32_t alignBytesNeeded(uint32_t index, size_t length)
+{
+ uint32_t pattern;
+ if (length < 2)
+ {
+ pattern = 0; // No alignment necessary
+ }
+ else if (length < 4)
+ {
+ pattern = 1; // Align on 2-byte boundary
+ }
+ else
+ {
+ pattern = 3; // Align on 4-byte boundary
+ }
+ uint32_t extra = (~(index - 1)) & pattern;
+ return extra;
+}
+
+
+// The maximum number of operands and operations that a model may have.
+const uint32_t MAX_NUMBER_OF_OPERANDS = 0xFFFFFFFE;
+const uint32_t MAX_NUMBER_OF_OPERATIONS = 0xFFFFFFFE;
+
+bool ModelBuilder::badState(const char *name)
+{
+ if (mCompletedModel)
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_" << name << " can't modify after model finished";
+ return true;
+ }
+ if (mInvalidModel)
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_" << name << " can't modify an invalid model";
+ return true;
+ }
+ return false;
+}
+
+int ModelBuilder::addOperand(const ANeuralNetworksOperandType &type)
+{
+ if (badState("addOperand"))
+ {
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ int n = validateOperandType(type, "ANeuralNetworksModel_addOperand", true);
+ if (n != ANEURALNETWORKS_NO_ERROR)
+ {
+ return n;
+ }
+ size_t idx = mOperands.size();
+ if (idx >= MAX_NUMBER_OF_OPERANDS)
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_addOperand exceed max operands";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ mOperands.resize(idx + 1);
+ auto &operand = mOperands[idx];
+ operand.type = static_cast<OperandType>(type.type);
+ setFromIntList(&operand.dimensions, type.dimensionCount, type.dimensions);
+ operand.numberOfConsumers = 0;
+ operand.scale = type.scale;
+ operand.zeroPoint = type.zeroPoint;
+ operand.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
+ operand.location = {.poolIndex = 0, .offset = 0, .length = 0};
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ModelBuilder::setOperandValue(uint32_t index, const void *buffer, size_t length)
+{
+ if (badState("setOperandValue"))
+ {
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ VLOG(MODEL) << __func__ << " for operand " << index << " size " << length;
+ if (index >= operandCount())
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_setOperandValue setting operand " << index << " of "
+ << operandCount();
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ Operand &operand = mOperands[index];
+ if (buffer == nullptr)
+ {
+ if (length)
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_setOperandValue buffer is nullptr but length is "
+ "not 0";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ operand.lifetime = OperandLifeTime::NO_VALUE;
+ // The location is unused and is set to zeros.
+ operand.location = {.poolIndex = 0, .offset = 0, .length = 0};
+ }
+ else
+ {
+ if (length > 0xFFFFFFFF)
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_setOperandValue value length of " << length
+ << " exceeds max size";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ uint32_t valueLength = static_cast<uint32_t>(length);
+ uint32_t neededLength = sizeOfData(operand.type, operand.dimensions);
+ if (neededLength != valueLength)
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_setOperandValue setting " << valueLength
+ << " bytes when needing " << neededLength;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ if (valueLength <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES)
+ {
+ uint32_t existingSize = static_cast<uint32_t>(mSmallOperandValues.size());
+ uint32_t extraBytes = alignBytesNeeded(existingSize, valueLength);
+ mSmallOperandValues.resize(existingSize + extraBytes + valueLength);
+ operand.lifetime = OperandLifeTime::CONSTANT_COPY;
+ operand.location = {
+ .poolIndex = 0, .offset = existingSize + extraBytes, .length = neededLength};
+ memcpy(&mSmallOperandValues[operand.location.offset], buffer, valueLength);
+ VLOG(MODEL) << "Copied small value to offset " << operand.location.offset;
+ }
+ else
+ {
+ VLOG(MODEL) << "Saving large value";
+ operand.lifetime = OperandLifeTime::CONSTANT_REFERENCE;
+ // The values for poolIndex and offset will be set when the model is finished.
+ operand.location = {.poolIndex = 0, .offset = 0, .length = valueLength};
+ // We keep track of the buffers. We'll allocate the shared memory only
+ // once we know the total size, to avoid needless copies.
+ mLargeOperandValues.push_back(LargeValue{.operandIndex = index, .buffer = buffer});
+ }
+ }
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ModelBuilder::setOperandValueFromMemory(uint32_t index, const Memory *memory, uint32_t offset,
+ size_t length)
+{
+ VLOG(MODEL) << __func__ << " for operand " << index << " offset " << offset << " size " << length;
+ if (badState("setOperandValueFromMemory"))
+ {
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ if (index >= operandCount())
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_setOperandValueFromMemory setting operand " << index
+ << " of " << operandCount();
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ Operand &operand = mOperands[index];
+ uint32_t neededLength = sizeOfData(operand.type, operand.dimensions);
+ if (neededLength != length)
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_setOperandValueFromMemory setting " << length
+ << " bytes when needing " << neededLength;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ if (!memory->validateSize(offset, length))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ // TODO validate does not exceed length of memory
+ operand.lifetime = OperandLifeTime::CONSTANT_REFERENCE;
+ operand.location = {.poolIndex = mMemories.add(memory), .offset = offset, .length = neededLength};
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ModelBuilder::addOperation(OperationType type, uint32_t inputCount, const uint32_t *inputs,
+ uint32_t outputCount, const uint32_t *outputs)
+{
+
+ if (badState("addOperation"))
+ {
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ if (!validateOperationType(type))
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_addOperation invalid operations type "
+ << static_cast<uint32_t>(type);
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ int n = validateOperandList(inputCount, inputs, operandCount(),
+ "ANeuralNetworksModel_addOperation inputs");
+ if (n != ANEURALNETWORKS_NO_ERROR)
+ {
+ return n;
+ }
+ n = validateOperandList(outputCount, outputs, operandCount(),
+ "ANeuralNetworksModel_addOperation outputs");
+ if (n != ANEURALNETWORKS_NO_ERROR)
+ {
+ return n;
+ }
+
+ uint32_t operationIndex = operationCount();
+ if (operationIndex >= MAX_NUMBER_OF_OPERATIONS)
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_addOperation exceed max operations";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ mOperations.resize(operationIndex + 1);
+ auto &entry = mOperations[operationIndex];
+ entry.type = type;
+
+ setFromIntList(&entry.inputs, inputCount, inputs);
+ setFromIntList(&entry.outputs, outputCount, outputs);
+ for (uint32_t i : entry.inputs)
+ {
+ mOperands[i].numberOfConsumers++;
+ // TODO mOperands[i].consumers.push_back(operationIndex);
+ }
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ModelBuilder::identifyInputsAndOutputs(uint32_t inputCount, const uint32_t *inputs,
+ uint32_t outputCount, const uint32_t *outputs)
+{
+ if (badState("identifyInputsAndOutputs"))
+ {
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ int n = validateOperandList(inputCount, inputs, operandCount(),
+ "ANeuralNetworksModel_identifyInputsAndOutputs inputs");
+ if (n != ANEURALNETWORKS_NO_ERROR)
+ {
+ return n;
+ }
+ n = validateOperandList(outputCount, outputs, operandCount(),
+ "ANeuralNetworksModel_identifyInputsAndOutputs outputs");
+ if (n != ANEURALNETWORKS_NO_ERROR)
+ {
+ return n;
+ }
+
+ // Makes a copy of the index list, validates the arguments, and changes
+ // the lifetime info of the corresponding operand.
+ auto setArguments = [&](std::vector<uint32_t> *indexVector, uint32_t indexCount,
+ const uint32_t *indexList, OperandLifeTime lifetime) -> bool {
+ indexVector->resize(indexCount);
+ for (uint32_t i = 0; i < indexCount; i++)
+ {
+ const uint32_t operandIndex = indexList[i];
+ if (operandIndex >= mOperands.size())
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_identifyInputsAndOutputs Can't set input or output "
+ "to be "
+ << operandIndex << " as this exceeds the numbe of operands " << mOperands.size();
+ return false;
+ }
+ (*indexVector)[i] = operandIndex;
+ Operand &operand = mOperands[operandIndex];
+ if (operand.lifetime != OperandLifeTime::TEMPORARY_VARIABLE)
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_identifyInputsAndOutputs Can't set operand "
+ << operandIndex
+ << " to be an input or output. Check that it's not a constant or "
+ "already an input or output";
+ return false;
+ }
+ operand.lifetime = lifetime;
+ }
+ return true;
+ };
+
+ if (!setArguments(&mInputIndexes, inputCount, inputs, OperandLifeTime::MODEL_INPUT) ||
+ !setArguments(&mOutputIndexes, outputCount, outputs, OperandLifeTime::MODEL_OUTPUT))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ModelBuilder::createCompilation(CompilationBuilder **compilation)
+{
+ if (!mCompletedModel || mInvalidModel)
+ {
+ LOG(ERROR) << "ANeuralNetworksCompilation_create passed an unfinished model";
+ *compilation = nullptr;
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+ *compilation = new CompilationBuilder(this);
+ return (*compilation ? ANEURALNETWORKS_NO_ERROR : ANEURALNETWORKS_OUT_OF_MEMORY);
+}
+
+int ModelBuilder::finish()
+{
+ if (mCompletedModel)
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_finish called more than once";
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+ if (mInvalidModel)
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_finish called on an invalid model";
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ int n = copyLargeValuesToMemory();
+ if (n != ANEURALNETWORKS_NO_ERROR)
+ {
+ return n;
+ }
+
+ Model modelForValidation;
+ publish(&modelForValidation);
+ if (!validateModel(modelForValidation))
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_finish called on invalid model";
+ mInvalidModel = true;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ // We sort the operations so that they will be in the appropriate
+ // order for a single-threaded, op at a time execution.
+ // TODO: we don't need this if we always run the partitioner.
+ sortIntoRunOrder();
+ mCompletedModel = true;
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+void ModelBuilder::sortIntoRunOrder()
+{
+ // Tracks the operations that can be executed.
+ std::vector<uint32_t> opsReadyToRun;
+ std::vector<Operation> runOrder;
+
+ // Tracks how many inputs are needed for each operation to be ready to run.
+ std::multimap<uint32_t, uint32_t> operandToOperations;
+ std::vector<uint32_t> unknownInputCount(operationCount());
+ for (uint32_t operationIndex = 0; operationIndex < operationCount(); operationIndex++)
+ {
+ uint32_t &count = unknownInputCount[operationIndex];
+ count = 0;
+ for (uint32_t operandIndex : mOperations[operationIndex].inputs)
+ {
+ auto lifetime = mOperands[operandIndex].lifetime;
+ if (lifetime == OperandLifeTime::TEMPORARY_VARIABLE ||
+ lifetime == OperandLifeTime::MODEL_OUTPUT)
+ {
+ count++;
+ operandToOperations.insert(std::pair<uint32_t, uint32_t>(operandIndex, operationIndex));
+ }
+ }
+ if (count == 0)
+ {
+ opsReadyToRun.push_back(operationIndex);
+ }
+ }
+
+ while (opsReadyToRun.size() > 0)
+ {
+ // Execute the next op
+ int opIndex = opsReadyToRun.back();
+ opsReadyToRun.pop_back();
+ const Operation &operation = mOperations[opIndex];
+
+ runOrder.push_back(mOperations[opIndex]);
+
+ // Mark all its outputs as known.
+ for (uint32_t operandIndex : operation.outputs)
+ {
+ auto range = operandToOperations.equal_range(operandIndex);
+ for (auto i = range.first; i != range.second; i++)
+ {
+ uint32_t &count = unknownInputCount[i->second];
+ if (--count == 0)
+ {
+ opsReadyToRun.push_back(i->second);
+ }
+ }
+ }
+ }
+ mOperations = runOrder;
+}
+
+void ModelBuilder::publish(Model *model) const
+{
+ model->operands = mOperands;
+ model->operations = mOperations;
+ model->inputIndexes = mInputIndexes;
+ model->outputIndexes = mOutputIndexes;
+ model->operandValues = mSmallOperandValues;
+
+ uint32_t count = mMemories.size();
+ model->pools.resize(count);
+ for (uint32_t i = 0; i < count; i++)
+ {
+ uint8_t *buffer;
+ mMemories[i]->getPointer(&buffer);
+ model->pools[i] = buffer;
+ }
+}
+
+int ModelBuilder::copyLargeValuesToMemory()
+{
+ if (!mLargeOperandValues.empty())
+ {
+ // Calculate the size of the shared memory needed for all the large values.
+ // Also sets the offset for each value within the memory.
+ size_t poolSize = 0;
+ for (LargeValue &l : mLargeOperandValues)
+ {
+ Operand &operand = mOperands[l.operandIndex];
+ ASSERT(operand.lifetime == OperandLifeTime::CONSTANT_REFERENCE);
+ poolSize += alignBytesNeeded(poolSize, operand.location.length);
+ operand.location.offset = poolSize;
+ poolSize += operand.location.length;
+ }
+
+ // Allocated the shared memory.
+ int n = mLargeValueMemory.create(poolSize);
+ if (n != ANEURALNETWORKS_NO_ERROR)
+ {
+ return n;
+ }
+ uint8_t *memoryPointer = nullptr;
+ n = mLargeValueMemory.getPointer(&memoryPointer);
+ if (n != ANEURALNETWORKS_NO_ERROR)
+ {
+ return n;
+ }
+ uint32_t poolIndex = mMemories.add(&mLargeValueMemory);
+ VLOG(MODEL) << "Allocated large value pool of size " << poolSize << " at index " << poolIndex;
+
+ // Copy the values to this memory.
+ for (LargeValue &l : mLargeOperandValues)
+ {
+ Operand &operand = mOperands[l.operandIndex];
+ operand.location.poolIndex = poolIndex;
+ memcpy(memoryPointer + operand.location.offset, l.buffer, operand.location.length);
+ }
+ }
+ return ANEURALNETWORKS_NO_ERROR;
+}
diff --git a/compiler/ann-ref/src/ModelBuilder.h b/compiler/ann-ref/src/ModelBuilder.h
new file mode 100644
index 000000000..ad50fad1d
--- /dev/null
+++ b/compiler/ann-ref/src/ModelBuilder.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MODEL_BUILDER_H__
+#define __MODEL_BUILDER_H__
+
+#include "NeuralNetworks.h"
+
+#include "Model.h"
+
+#include "Memory.h"
+#include "MemoryTracker.h"
+
+#include <vector>
+#include <memory>
+
+class CompilationBuilder;
+
+class ModelBuilder
+{
+public:
+ virtual ~ModelBuilder() = default;
+
+public:
+ // Adds an operand to the model.
+ int addOperand(const ANeuralNetworksOperandType &type);
+
+public:
+ int setOperandValue(uint32_t index, const void *buffer, size_t length);
+ int setOperandValueFromMemory(uint32_t index, const Memory *memory, uint32_t offset,
+ size_t length);
+
+public:
+ int addOperation(OperationType type, uint32_t inputCount, const uint32_t *inputs,
+ uint32_t outputCount, const uint32_t *outputs);
+
+public:
+ int identifyInputsAndOutputs(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs);
+
+public:
+ int finish();
+ bool isFinished() const { return mCompletedModel; }
+
+public:
+ int createCompilation(CompilationBuilder **compilation);
+
+public:
+ void publish(Model *model) const;
+
+public:
+ uint32_t operandCount() const
+ {
+ // We don't allow more than uint32_t worth of operands
+ return static_cast<uint32_t>(mOperands.size());
+ }
+ uint32_t operationCount() const
+ {
+ // We don't allow more than uint32_t worth of operations
+ return static_cast<uint32_t>(mOperations.size());
+ }
+
+public:
+ uint32_t inputCount() const { return static_cast<uint32_t>(mInputIndexes.size()); }
+ uint32_t getInputOperandIndex(uint32_t i) const { return mInputIndexes[i]; }
+ const Operand &getInputOperand(uint32_t i) const { return mOperands[getInputOperandIndex(i)]; }
+
+public:
+ uint32_t outputCount() const { return static_cast<uint32_t>(mOutputIndexes.size()); }
+ uint32_t getOutputOperandIndex(uint32_t i) const { return mOutputIndexes[i]; }
+ const Operand &getOutputOperand(uint32_t i) const { return mOperands[getOutputOperandIndex(i)]; }
+
+public:
+ const Operand &getOperand(uint32_t index) const { return mOperands[index]; }
+ const Operation &getOperation(uint32_t index) const { return mOperations[index]; }
+
+public:
+ const MemoryTracker &getMemories() const { return mMemories; }
+ const std::vector<Operation> &getOperations() const { return mOperations; }
+
+private:
+ // Return true if either mCompleteModel or mInvalidModel is true.
+ bool badState(const char *name);
+
+ // Sorts the operations to be in the correct order for single threaded
+ // node-at-a-time execution.
+ void sortIntoRunOrder();
+
+ // Copies the large values to a shared memory, if we have any.
+ int copyLargeValuesToMemory();
+
+private:
+ // The operations of the graph.
+ std::vector<Operation> mOperations;
+ // The description of the operands of the graph.
+ std::vector<Operand> mOperands;
+
+ // Specifies where to find the list of indexes identifying
+ // the inputs and outputs of the model. The offset is into
+ // the mOperandIndexes table.
+ std::vector<uint32_t> mInputIndexes;
+ std::vector<uint32_t> mOutputIndexes;
+
+ MemoryTracker mMemories;
+
+ // The value of the small operands that are defined at model
+ // creation time.
+ std::vector<uint8_t> mSmallOperandValues;
+
+ struct LargeValue
+ {
+ uint32_t operandIndex;
+ const void *buffer;
+ };
+ // Operand index and buffer pointer for all the large operand values of this model.
+ std::vector<LargeValue> mLargeOperandValues;
+ PrivateMemory mLargeValueMemory;
+
+ // Once the model has been finished, we should not allow further
+ // modifications to the model.
+ mutable bool mCompletedModel = false;
+
+ // Any invalid manipulation of the model will mark the model invalid.
+ // No further modifications are allowed to the model.
+ mutable bool mInvalidModel = false;
+};
+
+#endif // __MODEL_BUILDER_H__
diff --git a/compiler/ann-ref/src/NeuralNetworks.cpp b/compiler/ann-ref/src/NeuralNetworks.cpp
new file mode 100644
index 000000000..e43a82667
--- /dev/null
+++ b/compiler/ann-ref/src/NeuralNetworks.cpp
@@ -0,0 +1,338 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NeuralNetworks.h"
+
+#include "CompilationBuilder.h"
+#include "ExecutionBuilder.h"
+#include "ModelBuilder.h"
+#include "Memory.h"
+
+#include "Logging.h"
+
+#include <memory>
+
+int ANeuralNetworksMemory_createFromFd(size_t size, int prot, int fd, size_t offset,
+ ANeuralNetworksMemory **memory)
+{
+ *memory = nullptr;
+ auto m = std::make_unique<MappedMemory>();
+ if (m == nullptr)
+ {
+ return ANEURALNETWORKS_OUT_OF_MEMORY;
+ }
+ int n = m->set(size, prot, fd, offset);
+ if (n != ANEURALNETWORKS_NO_ERROR)
+ {
+ return n;
+ }
+ *memory = reinterpret_cast<ANeuralNetworksMemory *>(m.release());
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+void ANeuralNetworksMemory_free(ANeuralNetworksMemory *memory)
+{
+ // No validation. Free of nullptr is valid.
+ Memory *m = reinterpret_cast<Memory *>(memory);
+ delete m;
+}
+
+int ANeuralNetworksModel_create(ANeuralNetworksModel **model)
+{
+ if (!model)
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_create passed a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+ ModelBuilder *m = new ModelBuilder();
+ if (m == nullptr)
+ {
+ *model = nullptr;
+ return ANEURALNETWORKS_OUT_OF_MEMORY;
+ }
+ *model = reinterpret_cast<ANeuralNetworksModel *>(m);
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+void ANeuralNetworksModel_free(ANeuralNetworksModel *model)
+{
+ // No validation. Free of nullptr is valid.
+ ModelBuilder *m = reinterpret_cast<ModelBuilder *>(model);
+ delete m;
+}
+
+int ANeuralNetworksModel_finish(ANeuralNetworksModel *model)
+{
+ if (!model)
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_finish passed a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+ ModelBuilder *m = reinterpret_cast<ModelBuilder *>(model);
+ return m->finish();
+}
+
+int ANeuralNetworksModel_addOperand(ANeuralNetworksModel *model,
+ const ANeuralNetworksOperandType *type)
+{
+ if (!model || !type)
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_addOperand passed a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+ ModelBuilder *m = reinterpret_cast<ModelBuilder *>(model);
+ return m->addOperand(*type);
+}
+
+int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel *model, int32_t index,
+ const void *buffer, size_t length)
+{
+ if (!model || !buffer)
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_setOperandValue passed a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+ ModelBuilder *m = reinterpret_cast<ModelBuilder *>(model);
+ return m->setOperandValue(index, buffer, length);
+}
+
+int ANeuralNetworksModel_setOperandValueFromMemory(ANeuralNetworksModel *model, int32_t index,
+ const ANeuralNetworksMemory *memory,
+ size_t offset, size_t length)
+{
+ if (!model || !memory)
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_setOperandValue passed a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+ const Memory *mem = reinterpret_cast<const Memory *>(memory);
+ ModelBuilder *m = reinterpret_cast<ModelBuilder *>(model);
+ return m->setOperandValueFromMemory(index, mem, offset, length);
+}
+
+int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
+ ANeuralNetworksOperationType type, uint32_t inputCount,
+ const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ if (!model || !inputs || !outputs)
+ {
+ LOG(ERROR) << "ANeuralNetworksModel_addOperation passed a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+ ModelBuilder *m = reinterpret_cast<ModelBuilder *>(model);
+ return m->addOperation(static_cast<OperationType>(type), inputCount, inputs, outputCount,
+ outputs);
+}
+
+int ANeuralNetworksModel_identifyInputsAndOutputs(ANeuralNetworksModel *model, uint32_t inputCount,
+ const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ if (!model || !inputs || !outputs)
+ {
+ LOG(ERROR) << ("ANeuralNetworksModel_identifyInputsAndOutputs passed a nullptr");
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+ ModelBuilder *m = reinterpret_cast<ModelBuilder *>(model);
+ return m->identifyInputsAndOutputs(inputCount, inputs, outputCount, outputs);
+}
+
+int ANeuralNetworksCompilation_create(ANeuralNetworksModel *model,
+ ANeuralNetworksCompilation **compilation)
+{
+ if (!model || !compilation)
+ {
+ LOG(ERROR) << "ANeuralNetworksCompilation_create passed a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ ModelBuilder *m = reinterpret_cast<ModelBuilder *>(model);
+ CompilationBuilder *c = nullptr;
+ int result = m->createCompilation(&c);
+ *compilation = reinterpret_cast<ANeuralNetworksCompilation *>(c);
+ return result;
+}
+
+void ANeuralNetworksCompilation_free(ANeuralNetworksCompilation *compilation)
+{
+ // No validation. Free of nullptr is valid.
+ // TODO specification says that a compilation-in-flight can be deleted
+ CompilationBuilder *c = reinterpret_cast<CompilationBuilder *>(compilation);
+ delete c;
+}
+
+int ANeuralNetworksCompilation_setPreference(ANeuralNetworksCompilation *compilation,
+ int32_t preference)
+{
+ if (!compilation)
+ {
+ LOG(ERROR) << "ANeuralNetworksCompilation_setPreference passed a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+ // NOTE Ignore preference
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation *compilation)
+{
+ if (!compilation)
+ {
+ LOG(ERROR) << "ANeuralNetworksCompilation_finish passed a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+ CompilationBuilder *c = reinterpret_cast<CompilationBuilder *>(compilation);
+ return c->finish();
+}
+
+int ANeuralNetworksExecution_create(ANeuralNetworksCompilation *compilation,
+ ANeuralNetworksExecution **execution)
+{
+ if (!compilation || !execution)
+ {
+ LOG(ERROR) << "ANeuralNetworksExecution_create passed a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ CompilationBuilder *c = reinterpret_cast<CompilationBuilder *>(compilation);
+ ExecutionBuilder *r = nullptr;
+ int result = c->createExecution(&r);
+ *execution = reinterpret_cast<ANeuralNetworksExecution *>(r);
+ return result;
+}
+
+void ANeuralNetworksExecution_free(ANeuralNetworksExecution *execution)
+{
+ // TODO specification says that an execution-in-flight can be deleted
+ // No validation. Free of nullptr is valid.
+ ExecutionBuilder *r = reinterpret_cast<ExecutionBuilder *>(execution);
+ delete r;
+}
+
+int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution *execution, int32_t index,
+ const ANeuralNetworksOperandType *type, const void *buffer,
+ size_t length)
+{
+ if (!execution)
+ {
+ LOG(ERROR) << "ANeuralNetworksExecution_setInput passed execution with a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+ if (!buffer && length != 0)
+ {
+ LOG(ERROR) << "ANeuralNetworksExecution_setInput passed buffer with a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ ExecutionBuilder *r = reinterpret_cast<ExecutionBuilder *>(execution);
+ return r->setInput(index, type, buffer, length);
+}
+
+int ANeuralNetworksExecution_setInputFromMemory(ANeuralNetworksExecution *execution, int32_t index,
+ const ANeuralNetworksOperandType *type,
+ const ANeuralNetworksMemory *memory, size_t offset,
+ size_t length)
+{
+ if (!execution || !memory)
+ {
+ LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory passed a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ const Memory *m = reinterpret_cast<const Memory *>(memory);
+ ExecutionBuilder *r = reinterpret_cast<ExecutionBuilder *>(execution);
+ return r->setInputFromMemory(index, type, m, offset, length);
+}
+
+int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution *execution, int32_t index,
+ const ANeuralNetworksOperandType *type, void *buffer,
+ size_t length)
+{
+ if (!execution || !buffer)
+ {
+ LOG(ERROR) << "ANeuralNetworksExecution_setOutput passed a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+ ExecutionBuilder *r = reinterpret_cast<ExecutionBuilder *>(execution);
+ return r->setOutput(index, type, buffer, length);
+}
+
+int ANeuralNetworksExecution_setOutputFromMemory(ANeuralNetworksExecution *execution, int32_t index,
+ const ANeuralNetworksOperandType *type,
+ const ANeuralNetworksMemory *memory, size_t offset,
+ size_t length)
+{
+ if (!execution || !memory)
+ {
+ LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory passed a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ ExecutionBuilder *r = reinterpret_cast<ExecutionBuilder *>(execution);
+ const Memory *m = reinterpret_cast<const Memory *>(memory);
+ return r->setOutputFromMemory(index, type, m, offset, length);
+}
+
+int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution *execution,
+ ANeuralNetworksEvent **event)
+{
+ if (!execution || !event)
+ {
+ LOG(ERROR) << "ANeuralNetworksExecution_startCompute passed a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+ // TODO validate the rest
+
+ ExecutionBuilder *r = reinterpret_cast<ExecutionBuilder *>(execution);
+
+ // Dynamically allocate an sp to wrap an ExecutionCallback, seen in the NN
+ // API as an abstract event object. The sp<ExecutionCallback> object is
+ // returned when the execution has been successfully launched, otherwise a
+ // nullptr is returned. The sp is used for ref-counting purposes. Without
+ // it, the HIDL service could attempt to communicate with a dead callback
+ // object.
+ *event = nullptr;
+
+ int n = r->startCompute();
+ if (n != ANEURALNETWORKS_NO_ERROR)
+ {
+ return n;
+ }
+ *event = reinterpret_cast<ANeuralNetworksEvent *>(new int);
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksEvent_wait(ANeuralNetworksEvent *event)
+{
+ if (event == nullptr)
+ {
+ LOG(ERROR) << "ANeuralNetworksEvent_wait passed a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+void ANeuralNetworksEvent_free(ANeuralNetworksEvent *event)
+{
+ // No validation. Free of nullptr is valid.
+ if (event)
+ {
+ int *e = reinterpret_cast<int *>(event);
+ delete e;
+ }
+}
diff --git a/compiler/ann-ref/src/Operand.h b/compiler/ann-ref/src/Operand.h
new file mode 100644
index 000000000..870a05644
--- /dev/null
+++ b/compiler/ann-ref/src/Operand.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OPERAND_H__
+#define __OPERAND_H__
+
+#include "OperandType.h"
+
+#include <cstdint>
+#include <vector>
+
+enum class OperandLifeTime : int32_t {
+ TEMPORARY_VARIABLE = 0,
+ MODEL_INPUT = 1,
+ MODEL_OUTPUT = 2,
+ CONSTANT_COPY = 3,
+ CONSTANT_REFERENCE = 4,
+ NO_VALUE = 5,
+};
+
+struct DataLocation final {
+ uint32_t poolIndex;
+ uint32_t offset;
+ uint32_t length;
+};
+
+struct Operand final {
+ OperandType type;
+ float scale;
+ int32_t zeroPoint;
+
+ std::vector<uint32_t> dimensions;
+
+ DataLocation location;
+
+ uint32_t numberOfConsumers;
+ OperandLifeTime lifetime;
+};
+
+// Returns the amount of space needed to store a value of the dimensions and
+// type of this operand.
+inline uint32_t sizeOfData(const Operand &operand)
+{
+ return sizeOfData(operand.type, operand.dimensions);
+}
+
+#endif // __OPERAND_H__
diff --git a/compiler/ann-ref/src/OperandType.cpp b/compiler/ann-ref/src/OperandType.cpp
new file mode 100644
index 000000000..9f75fcc54
--- /dev/null
+++ b/compiler/ann-ref/src/OperandType.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperandType.h"
+#include "Macro.h"
+
+const char *kTypeNames[] = {
+ "FLOAT32", "INT32", "UINT32", "TENSOR_FLOAT32", "TENSOR_INT32", "TENSOR_QUANT8_ASYMM",
+};
+
+static_assert(COUNT(kTypeNames) == kNumberOfDataTypes, "kTypeNames is incorrect");
+
+const uint32_t kSizeOfDataType[]{
+ 4, // ANEURALNETWORKS_FLOAT32
+ 4, // ANEURALNETWORKS_INT32
+ 4, // ANEURALNETWORKS_UINT32
+ 4, // ANEURALNETWORKS_TENSOR_FLOAT32
+ 4, // ANEURALNETWORKS_TENSOR_INT32
+ 1 // ANEURALNETWORKS_TENSOR_SYMMETRICAL_QUANT8
+};
+
+static_assert(COUNT(kSizeOfDataType) == kNumberOfDataTypes, "kSizeOfDataType is incorrect");
+
+const char *getOperandTypeName(OperandType type)
+{
+ uint32_t n = static_cast<uint32_t>(type);
+ return kTypeNames[n];
+}
+
+uint32_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions)
+{
+ int n = static_cast<int>(type);
+
+ uint32_t size = kSizeOfDataType[n];
+
+ for (auto d : dimensions)
+ {
+ size *= d;
+ }
+ return size;
+}
diff --git a/compiler/ann-ref/src/OperandType.h b/compiler/ann-ref/src/OperandType.h
new file mode 100644
index 000000000..3dfd2329b
--- /dev/null
+++ b/compiler/ann-ref/src/OperandType.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OPERAND_TYPES_H__
+#define __OPERAND_TYPES_H__
+
+#include <cstdint>
+#include <vector>
+
+enum class OperandType : int32_t {
+ FLOAT32 = 0,
+ INT32 = 1,
+ UINT32 = 2,
+ TENSOR_FLOAT32 = 3,
+ TENSOR_INT32 = 4,
+ TENSOR_QUANT8_ASYMM = 5,
+};
+
+// The number of data types (OperandCode) defined in NeuralNetworks.h.
+const int kNumberOfDataTypes = 6;
+
+// Returns the name of the operand type in ASCII.
+const char *getOperandTypeName(OperandType type);
+
+// Returns the amount of space needed to store a value of the specified
+// dimensions and type.
+uint32_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions);
+
+#endif // __OPERAND_TYPES_H__
diff --git a/compiler/ann-ref/src/OperandType.probe.cpp b/compiler/ann-ref/src/OperandType.probe.cpp
new file mode 100644
index 000000000..2caffdeeb
--- /dev/null
+++ b/compiler/ann-ref/src/OperandType.probe.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperandType.h"
+#include "NeuralNetworks.h"
+
+static_assert(static_cast<int32_t>(OperandType::FLOAT32) == ANEURALNETWORKS_FLOAT32,
+ "FLOAT32 != ANEURALNETWORKS_FLOAT32");
+static_assert(static_cast<int32_t>(OperandType::INT32) == ANEURALNETWORKS_INT32,
+ "INT32 != ANEURALNETWORKS_INT32");
+static_assert(static_cast<int32_t>(OperandType::UINT32) == ANEURALNETWORKS_UINT32,
+ "UINT32 != ANEURALNETWORKS_UINT32");
+
+static_assert(static_cast<int32_t>(OperandType::TENSOR_FLOAT32) == ANEURALNETWORKS_TENSOR_FLOAT32,
+ "TENSOR_FLOAT32 != ANEURALNETWORKS_TENSOR_FLOAT32");
+static_assert(static_cast<int32_t>(OperandType::TENSOR_QUANT8_ASYMM) ==
+ ANEURALNETWORKS_TENSOR_QUANT8_ASYMM,
+ "TENSOR_QUANT8_ASYMM != ANEURALNETWORKS_TENSOR_QUANT8_ASYMM");
diff --git a/compiler/ann-ref/src/Operation.h b/compiler/ann-ref/src/Operation.h
new file mode 100644
index 000000000..37f6a8727
--- /dev/null
+++ b/compiler/ann-ref/src/Operation.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OPERATION_H__
+#define __OPERATION_H__
+
+#include "OperationType.h"
+
+#include <cstdint>
+#include <vector>
+
+struct Operation final {
+ OperationType type;
+ std::vector<uint32_t> inputs;
+ std::vector<uint32_t> outputs;
+};
+
+#endif // __OPERATION_H__
diff --git a/compiler/ann-ref/src/OperationType.cpp b/compiler/ann-ref/src/OperationType.cpp
new file mode 100644
index 000000000..f938b4d1c
--- /dev/null
+++ b/compiler/ann-ref/src/OperationType.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationType.h"
+#include "Macro.h"
+
+const char *kOperationNames[kNumberOfOperationTypes] = {
+ "ADD",
+ "AVERAGE_POOL",
+ "CONCATENATION",
+ "CONV",
+ "DEPTHWISE_CONV",
+ "DEPTH_TO_SPACE",
+ "DEQUANTIZE",
+ "EMBEDDING_LOOKUP",
+ "FLOOR",
+ "FULLY_CONNECTED",
+ "HASHTABLE_LOOKUP",
+ "L2_NORMALIZATION",
+ "L2_POOL",
+ "LOCAL_RESPONSE_NORMALIZATION",
+ "LOGISTIC",
+ "LSH_PROJECTION",
+ "LSTM",
+ "MAX_POOL",
+ "MUL",
+ "RELU",
+ "RELU1",
+ "RELU6",
+ "RESHAPE",
+ "RESIZE_BILINEAR",
+ "RNN",
+ "SOFTMAX",
+ "SPACE_TO_DEPTH",
+ "SVDF",
+ "TANH",
+ "BATCH_TO_SPACE_ND", // V1_1, will not be merged till V1_1 is finalized
+ "DIV",
+ "MEAN", // V1_1, will not be merged till V1_1 is finalized
+ "PAD", // V1_1, will not be merged till V1_1 is finalized
+ "SPACE_TO_BATCH_ND", // V1_1, will not be merged till V1_1 is finalized
+ "SQUEEZE", // V1_1, will not be merged till V1_1 is finalized
+ "STRIDED_SLICE",
+ "SUB",
+};
+
+static_assert(COUNT(kOperationNames) == kNumberOfOperationTypes, "kOperationNames is incorrect");
+
+const char *getOperationName(OperationType type)
+{
+ uint32_t n = static_cast<uint32_t>(type);
+ return kOperationNames[n];
+}
diff --git a/compiler/ann-ref/src/OperationType.h b/compiler/ann-ref/src/OperationType.h
new file mode 100644
index 000000000..fc66eeeab
--- /dev/null
+++ b/compiler/ann-ref/src/OperationType.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OPERATION_TYPE_H__
+#define __OPERATION_TYPE_H__
+
+#include <cstdint>
+
+enum class OperationType : int32_t {
+ ADD = 0,
+ AVERAGE_POOL_2D = 1,
+ CONCATENATION = 2,
+ CONV_2D = 3,
+ DEPTHWISE_CONV_2D = 4,
+ DEPTH_TO_SPACE = 5,
+ DEQUANTIZE = 6,
+ EMBEDDING_LOOKUP = 7,
+ FLOOR = 8,
+ FULLY_CONNECTED = 9,
+ HASHTABLE_LOOKUP = 10,
+ L2_NORMALIZATION = 11,
+ L2_POOL_2D = 12,
+ LOCAL_RESPONSE_NORMALIZATION = 13,
+ LOGISTIC = 14,
+ LSH_PROJECTION = 15,
+ LSTM = 16,
+ MAX_POOL_2D = 17,
+ MUL = 18,
+ RELU = 19,
+ RELU1 = 20,
+ RELU6 = 21,
+ RESHAPE = 22,
+ RESIZE_BILINEAR = 23,
+ RNN = 24,
+ SOFTMAX = 25,
+ SPACE_TO_DEPTH = 26,
+ SVDF = 27,
+ TANH = 28,
+ DIV = 30,
+ PAD = 32,
+ STRIDED_SLICE = 35,
+ SUB = 36,
+ OEM_OPERATION = 10000,
+};
+
+// The number of operation types (OperationCode) defined in NeuralNetworks.h.
+const int kNumberOfOperationTypes = 37;
+
+// Returns the name of the operation in ASCII.
+const char *getOperationName(OperationType opCode);
+
+#endif // __OPERATION_TYPE_H__
diff --git a/compiler/ann-ref/src/OperationType.probe.cpp b/compiler/ann-ref/src/OperationType.probe.cpp
new file mode 100644
index 000000000..c9886f35b
--- /dev/null
+++ b/compiler/ann-ref/src/OperationType.probe.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationType.h"
+#include "NeuralNetworks.h"
+
+static_assert(static_cast<int32_t>(OperationType::ADD) == ANEURALNETWORKS_ADD,
+ "OperationType::ADD != ANEURALNETWORKS_ADD");
+static_assert(static_cast<int32_t>(OperationType::AVERAGE_POOL_2D) ==
+ ANEURALNETWORKS_AVERAGE_POOL_2D,
+ "OperationType::AVERAGE_POOL_2D != ANEURALNETWORKS_AVERAGE_POOL_2D");
+static_assert(static_cast<int32_t>(OperationType::CONV_2D) == ANEURALNETWORKS_CONV_2D,
+ "OperationType::CONV_2D != ANEURALNETWORKS_CONV_2D");
+static_assert(static_cast<int32_t>(OperationType::DEPTHWISE_CONV_2D) ==
+ ANEURALNETWORKS_DEPTHWISE_CONV_2D,
+ "OperationType::DEPTHWISE_CONV_2D != ANEURALNETWORKS_DEPTHWISE_CONV_2D");
+static_assert(static_cast<int32_t>(OperationType::DEPTH_TO_SPACE) == ANEURALNETWORKS_DEPTH_TO_SPACE,
+ "OperationType::DEPTH_TO_SPACE != ANEURALNETWORKS_DEPTH_TO_SPACE");
+static_assert(static_cast<int32_t>(OperationType::DEQUANTIZE) == ANEURALNETWORKS_DEQUANTIZE,
+ "OperationType::DEQUANTIZE != ANEURALNETWORKS_DEQUANTIZE");
+static_assert(static_cast<int32_t>(OperationType::EMBEDDING_LOOKUP) ==
+ ANEURALNETWORKS_EMBEDDING_LOOKUP,
+ "OperationType::EMBEDDING_LOOKUP != ANEURALNETWORKS_EMBEDDING_LOOKUP");
+static_assert(static_cast<int32_t>(OperationType::FLOOR) == ANEURALNETWORKS_FLOOR,
+ "OperationType::FLOOR != ANEURALNETWORKS_FLOOR");
+static_assert(static_cast<int32_t>(OperationType::FULLY_CONNECTED) ==
+ ANEURALNETWORKS_FULLY_CONNECTED,
+ "OperationType::FULLY_CONNECTED != ANEURALNETWORKS_FULLY_CONNECTED");
+static_assert(static_cast<int32_t>(OperationType::HASHTABLE_LOOKUP) ==
+ ANEURALNETWORKS_HASHTABLE_LOOKUP,
+ "OperationType::HASHTABLE_LOOKUP != ANEURALNETWORKS_HASHTABLE_LOOKUP");
+static_assert(static_cast<int32_t>(OperationType::L2_NORMALIZATION) ==
+ ANEURALNETWORKS_L2_NORMALIZATION,
+ "OperationType::L2_NORMALIZATION != ANEURALNETWORKS_L2_NORMALIZATION");
+static_assert(static_cast<int32_t>(OperationType::L2_POOL_2D) == ANEURALNETWORKS_L2_POOL_2D,
+ "OperationType::L2_POOL_2D != ANEURALNETWORKS_L2_POOL_2D");
+static_assert(static_cast<int32_t>(OperationType::LOCAL_RESPONSE_NORMALIZATION) ==
+ ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION,
+ "OperationType::LOCAL_RESPONSE_NORMALIZATION != "
+ "ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION");
+static_assert(static_cast<int32_t>(OperationType::LOGISTIC) == ANEURALNETWORKS_LOGISTIC,
+ "OperationType::LOGISTIC != ANEURALNETWORKS_LOGISTIC");
+static_assert(static_cast<int32_t>(OperationType::LSH_PROJECTION) == ANEURALNETWORKS_LSH_PROJECTION,
+ "OperationType::LSH_PROJECTION != ANEURALNETWORKS_LSH_PROJECTION");
+static_assert(static_cast<int32_t>(OperationType::LSTM) == ANEURALNETWORKS_LSTM,
+ "OperationType::LSTM != ANEURALNETWORKS_LSTM");
+static_assert(static_cast<int32_t>(OperationType::MAX_POOL_2D) == ANEURALNETWORKS_MAX_POOL_2D,
+ "OperationType::MAX_POOL_2D != ANEURALNETWORKS_MAX_POOL_2D");
+static_assert(static_cast<int32_t>(OperationType::MUL) == ANEURALNETWORKS_MUL,
+ "OperationType::MUL != ANEURALNETWORKS_MUL");
+static_assert(static_cast<int32_t>(OperationType::RELU) == ANEURALNETWORKS_RELU,
+ "OperationType::RELU != ANEURALNETWORKS_RELU");
+static_assert(static_cast<int32_t>(OperationType::RELU1) == ANEURALNETWORKS_RELU1,
+ "OperationType::RELU1 != ANEURALNETWORKS_RELU1");
+static_assert(static_cast<int32_t>(OperationType::RELU6) == ANEURALNETWORKS_RELU6,
+ "OperationType::RELU6 != ANEURALNETWORKS_RELU6");
+static_assert(static_cast<int32_t>(OperationType::RESHAPE) == ANEURALNETWORKS_RESHAPE,
+ "OperationType::RESHAPE != ANEURALNETWORKS_RESHAPE");
+static_assert(static_cast<int32_t>(OperationType::RESIZE_BILINEAR) ==
+ ANEURALNETWORKS_RESIZE_BILINEAR,
+ "OperationType::RESIZE_BILINEAR != ANEURALNETWORKS_RESIZE_BILINEAR");
+static_assert(static_cast<int32_t>(OperationType::RNN) == ANEURALNETWORKS_RNN,
+ "OperationType::RNN != ANEURALNETWORKS_RNN");
+static_assert(static_cast<int32_t>(OperationType::SOFTMAX) == ANEURALNETWORKS_SOFTMAX,
+ "OperationType::SOFTMAX != ANEURALNETWORKS_SOFTMAX");
+static_assert(static_cast<int32_t>(OperationType::SPACE_TO_DEPTH) == ANEURALNETWORKS_SPACE_TO_DEPTH,
+ "OperationType::SPACE_TO_DEPTH != ANEURALNETWORKS_SPACE_TO_DEPTH");
+static_assert(static_cast<int32_t>(OperationType::SVDF) == ANEURALNETWORKS_SVDF,
+ "OperationType::SVDF != ANEURALNETWORKS_SVDF");
+static_assert(static_cast<int32_t>(OperationType::TANH) == ANEURALNETWORKS_TANH,
+ "OperationType::TANH != ANEURALNETWORKS_TANH");
diff --git a/compiler/ann-ref/src/Probe.cpp b/compiler/ann-ref/src/Probe.cpp
new file mode 100644
index 000000000..3a085a19d
--- /dev/null
+++ b/compiler/ann-ref/src/Probe.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NeuralNetworks.h"
+
+// Make sure the constants defined in the header files have not changed values.
+// IMPORTANT: When adding new values, update kNumberOfDataTypes or kNumberOfDataTypesOEM
+// in Utils.h.
+static_assert(ANEURALNETWORKS_FLOAT32 == 0, "ANEURALNETWORKS_FLOAT32 has changed");
+static_assert(ANEURALNETWORKS_INT32 == 1, "ANEURALNETWORKS_INT32 has changed");
+static_assert(ANEURALNETWORKS_UINT32 == 2, "ANEURALNETWORKS_UINT32 has changed");
+static_assert(ANEURALNETWORKS_TENSOR_FLOAT32 == 3, "ANEURALNETWORKS_TENSOR_FLOAT32 has changed");
+static_assert(ANEURALNETWORKS_TENSOR_INT32 == 4, "ANEURALNETWORKS_TENSOR_INT32 has changed");
+static_assert(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM == 5,
+ "ANEURALNETWORKS_TENSOR_QUANT8_ASYMM has changed");
+
+// IMPORTANT: When adding new values, update kNumberOfOperationTypes or
+// kNumberOfOperationTypesOEM kNumberOfOperationTypesEx in Utils.h.
+static_assert(ANEURALNETWORKS_ADD == 0, "ANEURALNETWORKS_ADD has changed");
+static_assert(ANEURALNETWORKS_AVERAGE_POOL_2D == 1, "ANEURALNETWORKS_AVERAGE_POOL_2D has changed");
+static_assert(ANEURALNETWORKS_CONCATENATION == 2, "ANEURALNETWORKS_CONCATENATION has changed");
+static_assert(ANEURALNETWORKS_CONV_2D == 3, "ANEURALNETWORKS_CONV_2D has changed");
+static_assert(ANEURALNETWORKS_DEPTHWISE_CONV_2D == 4,
+ "ANEURALNETWORKS_DEPTHWISE_CONV_2D has changed");
+static_assert(ANEURALNETWORKS_DEPTH_TO_SPACE == 5, "ANEURALNETWORKS_DEPTH_TO_SPACE has changed");
+static_assert(ANEURALNETWORKS_DEQUANTIZE == 6, "ANEURALNETWORKS_DEQUANTIZE has changed");
+static_assert(ANEURALNETWORKS_EMBEDDING_LOOKUP == 7,
+ "ANEURALNETWORKS_EMBEDDING_LOOKUP has changed");
+static_assert(ANEURALNETWORKS_FLOOR == 8, "ANEURALNETWORKS_FLOOR has changed");
+static_assert(ANEURALNETWORKS_FULLY_CONNECTED == 9, "ANEURALNETWORKS_FULLY_CONNECTED has changed");
+static_assert(ANEURALNETWORKS_HASHTABLE_LOOKUP == 10,
+ "ANEURALNETWORKS_HASHTABLE_LOOKUP has changed");
+static_assert(ANEURALNETWORKS_L2_NORMALIZATION == 11,
+ "ANEURALNETWORKS_L2_NORMALIZATION has changed");
+static_assert(ANEURALNETWORKS_L2_POOL_2D == 12, "ANEURALNETWORKS_L2_POOL has changed");
+static_assert(ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION == 13,
+ "ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION has changed");
+static_assert(ANEURALNETWORKS_LOGISTIC == 14, "ANEURALNETWORKS_LOGISTIC has changed");
+static_assert(ANEURALNETWORKS_LSH_PROJECTION == 15, "ANEURALNETWORKS_LSH_PROJECTION has changed");
+static_assert(ANEURALNETWORKS_LSTM == 16, "ANEURALNETWORKS_LSTM has changed");
+static_assert(ANEURALNETWORKS_MAX_POOL_2D == 17, "ANEURALNETWORKS_MAX_POOL has changed");
+static_assert(ANEURALNETWORKS_MUL == 18, "ANEURALNETWORKS_MUL has changed");
+static_assert(ANEURALNETWORKS_RELU == 19, "ANEURALNETWORKS_RELU has changed");
+static_assert(ANEURALNETWORKS_RELU1 == 20, "ANEURALNETWORKS_RELU1 has changed");
+static_assert(ANEURALNETWORKS_RELU6 == 21, "ANEURALNETWORKS_RELU6 has changed");
+static_assert(ANEURALNETWORKS_RESHAPE == 22, "ANEURALNETWORKS_RESHAPE has changed");
+static_assert(ANEURALNETWORKS_RESIZE_BILINEAR == 23, "ANEURALNETWORKS_RESIZE_BILINEAR has changed");
+static_assert(ANEURALNETWORKS_RNN == 24, "ANEURALNETWORKS_RNN has changed");
+static_assert(ANEURALNETWORKS_SOFTMAX == 25, "ANEURALNETWORKS_SOFTMAX has changed");
+static_assert(ANEURALNETWORKS_SPACE_TO_DEPTH == 26, "ANEURALNETWORKS_SPACE_TO_DEPTH has changed");
+static_assert(ANEURALNETWORKS_SVDF == 27, "ANEURALNETWORKS_SVDF has changed");
+static_assert(ANEURALNETWORKS_TANH == 28, "ANEURALNETWORKS_TANH has changed");
+
+static_assert(ANEURALNETWORKS_FUSED_NONE == 0, "ANEURALNETWORKS_FUSED_NONE has changed");
+static_assert(ANEURALNETWORKS_FUSED_RELU == 1, "ANEURALNETWORKS_FUSED_RELU has changed");
+static_assert(ANEURALNETWORKS_FUSED_RELU1 == 2, "ANEURALNETWORKS_FUSED_RELU1 has changed");
+static_assert(ANEURALNETWORKS_FUSED_RELU6 == 3, "ANEURALNETWORKS_FUSED_RELU6 has changed");
+
+static_assert(ANEURALNETWORKS_PREFER_LOW_POWER == 0,
+ "ANEURALNETWORKS_PREFER_LOW_POWER has changed");
+static_assert(ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER == 1,
+ "ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER has changed");
+static_assert(ANEURALNETWORKS_PREFER_SUSTAINED_SPEED == 2,
+ "ANEURALNETWORKS_PREFER_SUSTAINED_SPEED has changed");
+
+static_assert(ANEURALNETWORKS_NO_ERROR == 0, "ANEURALNETWORKS_NO_ERROR has changed");
+static_assert(ANEURALNETWORKS_OUT_OF_MEMORY == 1, "ANEURALNETWORKS_OUT_OF_MEMORY has changed");
+static_assert(ANEURALNETWORKS_INCOMPLETE == 2, "ANEURALNETWORKS_INCOMPLETE has changed");
+static_assert(ANEURALNETWORKS_UNEXPECTED_NULL == 3, "ANEURALNETWORKS_UNEXPECTED_NULL has changed");
+static_assert(ANEURALNETWORKS_BAD_DATA == 4, "ANEURALNETWORKS_BAD_DATA has changed");
+static_assert(ANEURALNETWORKS_OP_FAILED == 5, "ANEURALNETWORKS_OP_FAILED has changed");
+static_assert(ANEURALNETWORKS_BAD_STATE == 6, "ANEURALNETWORKS_BAD_STATE has changed");
+
+static_assert(ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES == 128,
+ "ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES has changed");
diff --git a/compiler/ann-ref/src/Request.h b/compiler/ann-ref/src/Request.h
new file mode 100644
index 000000000..49f74fdf5
--- /dev/null
+++ b/compiler/ann-ref/src/Request.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __REQUEST_H__
+#define __REQUEST_H__
+
+#include <cstdint>
+#include <vector>
+
+struct RequestArgument final {
+ bool hasNoValue;
+ DataLocation location;
+ std::vector<uint32_t> dimensions;
+};
+
+struct Request final {
+ std::vector<RequestArgument> inputs;
+ std::vector<RequestArgument> outputs;
+};
+
+#endif // __REQUEST_H__
diff --git a/compiler/ann-ref/src/Shape.cpp b/compiler/ann-ref/src/Shape.cpp
new file mode 100644
index 000000000..37a54c213
--- /dev/null
+++ b/compiler/ann-ref/src/Shape.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Shape.h"
+
+#include <cstddef> // For 'size_t'
+
+bool SameShape(const Shape &in1, const Shape &in2)
+{
+ if (in1.type != in2.type || in1.dimensions.size() != in2.dimensions.size())
+ {
+ return false;
+ }
+ for (size_t i = 0; i < in1.dimensions.size(); i++)
+ {
+ if (in1.dimensions[i] != in2.dimensions[i])
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool SetShape(const Shape &in, Shape *out)
+{
+ if (in.type != out->type || in.dimensions.size() != out->dimensions.size())
+ {
+ return false;
+ }
+ out->dimensions = in.dimensions;
+ return true;
+}
+
+uint32_t getNumberOfElements(const Shape &shape)
+{
+ uint32_t count = 1;
+ for (size_t i = 0; i < shape.dimensions.size(); i++)
+ {
+ count *= shape.dimensions[i];
+ }
+ return count;
+}
+
+uint32_t getNumberOfDimensions(const Shape &shape) { return shape.dimensions.size(); }
+
+uint32_t getSizeOfDimension(const Shape &shape, uint32_t dimensionIdx)
+{
+ if (dimensionIdx >= shape.dimensions.size())
+ {
+ // TODO, log the error
+ return 0;
+ }
+ return shape.dimensions[dimensionIdx];
+}
diff --git a/compiler/ann-ref/src/Shape.h b/compiler/ann-ref/src/Shape.h
new file mode 100644
index 000000000..2e3d92e50
--- /dev/null
+++ b/compiler/ann-ref/src/Shape.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SHAPE_H__
+#define __SHAPE_H__
+
+#include "OperandType.h"
+
+#include <vector>
+#include <cstdint>
+
+// The type and dimensions of an operand.
+struct Shape
+{
+ OperandType type;
+ std::vector<uint32_t> dimensions;
+ float scale;
+ int32_t offset;
+};
+
+// Verifies that the two shapes are the same.
+bool SameShape(const Shape &in1, const Shape &in2);
+
+// Sets out to the same shape as in.
+bool SetShape(const Shape &in, Shape *out);
+
+// Return the total number of elements, i.e. all the dimensions multiplied
+// together. For a scalar, returns one.
+uint32_t getNumberOfElements(const Shape &shape);
+uint32_t getNumberOfDimensions(const Shape &shape);
+uint32_t getSizeOfDimension(const Shape &shape, uint32_t dimensionIdx);
+
+#endif // __SHAPE_H__
diff --git a/compiler/ann-ref/src/Validation.cpp b/compiler/ann-ref/src/Validation.cpp
new file mode 100644
index 000000000..679b14a9a
--- /dev/null
+++ b/compiler/ann-ref/src/Validation.cpp
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Validation.h"
+#include "Macro.h"
+#include "Assert.h"
+
+static inline bool validCode(uint32_t codeCount, uint32_t code)
+{
+ return (code < codeCount);
+}
+
+int validateOperationType(const OperationType &type)
+{
+ return validCode(kNumberOfOperationTypes, static_cast<uint32_t>(type));
+}
+
+// Validates the type. The used dimensions can be underspecified.
+int validateOperandType(const ANeuralNetworksOperandType &type, const char *tag, bool allowPartial)
+{
+ if (!allowPartial)
+ {
+ for (uint32_t i = 0; i < type.dimensionCount; i++)
+ {
+ if (type.dimensions[i] == 0)
+ {
+ LOG(ERROR) << tag << " OperandType invalid dimensions[" << i
+ << "] = " << type.dimensions[i];
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+ }
+ if (!validCode(kNumberOfDataTypes, type.type))
+ {
+ LOG(ERROR) << tag << " OperandType invalid type " << type.type;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ if (type.type == ANEURALNETWORKS_TENSOR_QUANT8_ASYMM)
+ {
+ if (type.zeroPoint < 0 || type.zeroPoint > 255)
+ {
+ LOG(ERROR) << tag << " OperandType invalid zeroPoint " << type.zeroPoint;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ if (type.scale < 0.f)
+ {
+ LOG(ERROR) << tag << " OperandType invalid scale " << type.scale;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+
+ // TODO-NNRT : add 'type.type == ANEURALNETWORKS_OEM_SCALAR' later.
+ // OEM operaters are not supported now.
+ if (type.type == ANEURALNETWORKS_FLOAT32 || type.type == ANEURALNETWORKS_INT32 ||
+ type.type == ANEURALNETWORKS_UINT32)
+ {
+ if (type.dimensionCount != 0 || type.dimensions != nullptr)
+ {
+ LOG(ERROR) << tag << " Invalid dimensions for scalar type";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int validateOperandList(uint32_t count, const uint32_t *list, uint32_t operandCount,
+ const char *tag)
+{
+ for (uint32_t i = 0; i < count; i++)
+ {
+ if (list[i] >= operandCount)
+ {
+ LOG(ERROR) << tag << " invalid operand index at " << i << " = " << list[i]
+ << ", operandCount " << operandCount;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+static bool validOperandIndexes(const std::vector<uint32_t> indexes, size_t operandCount)
+{
+ for (uint32_t i : indexes)
+ {
+ if (i >= operandCount)
+ {
+ LOG(ERROR) << "Index out of range " << i << "/" << operandCount;
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool validOperands(const std::vector<Operand> &operands, const std::vector<uint8_t> &operandValues)
+{
+ for (auto &operand : operands)
+ {
+ if (!validCode(kNumberOfDataTypes, static_cast<uint32_t>(operand.type)))
+ {
+ LOG(ERROR) << "Invalid operand type ";
+ return false;
+ }
+ /* TODO validate dim with type
+ if (!validOperandIndexes(operand.dimensions, mDimensions)) {
+ return false;
+ }
+ */
+ switch (operand.lifetime)
+ {
+ case OperandLifeTime::CONSTANT_COPY:
+ if (operand.location.offset + operand.location.length > operandValues.size())
+ {
+ LOG(ERROR) << "OperandValue location out of range. Starts at " << operand.location.offset
+ << ", length " << operand.location.length << ", max " << operandValues.size();
+ return false;
+ }
+ break;
+ case OperandLifeTime::TEMPORARY_VARIABLE:
+ case OperandLifeTime::MODEL_INPUT:
+ case OperandLifeTime::MODEL_OUTPUT:
+ case OperandLifeTime::NO_VALUE:
+ if (operand.location.offset != 0 || operand.location.length != 0)
+ {
+ LOG(ERROR) << "Unexpected offset " << operand.location.offset << " or length "
+ << operand.location.length << " for runtime location.";
+ return false;
+ }
+ break;
+ case OperandLifeTime::CONSTANT_REFERENCE:
+#if 0
+ if (operand.location.poolIndex >= poolCount)
+ {
+ LOG(ERROR) << "Invalid poolIndex " << operand.location.poolIndex << "/" << poolCount;
+ return false;
+ }
+#endif
+ break;
+ // TODO: Validate that we are within the pool.
+ default:
+ LOG(ERROR) << "Invalid lifetime";
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool validOperations(const std::vector<Operation> &operations, size_t operandCount)
+{
+ for (auto &op : operations)
+ {
+ if (!validCode(kNumberOfOperationTypes, static_cast<uint32_t>(op.type)))
+ {
+ LOG(ERROR) << "Invalid operation type ";
+ return false;
+ }
+ if (!validOperandIndexes(op.inputs, operandCount) ||
+ !validOperandIndexes(op.outputs, operandCount))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+// TODO doublecheck
+bool validateModel(const Model &model)
+{
+ const size_t operandCount = model.operands.size();
+ return (validOperands(model.operands, model.operandValues) &&
+ validOperations(model.operations, operandCount) &&
+ validOperandIndexes(model.inputIndexes, operandCount) &&
+ validOperandIndexes(model.outputIndexes, operandCount));
+}
+
+bool validRequestArguments(const std::vector<RequestArgument> &arguments,
+ const std::vector<uint32_t> &operandIndexes,
+ const std::vector<Operand> &operands, size_t poolCount, const char *type)
+{
+ const size_t argumentCount = arguments.size();
+ if (argumentCount != operandIndexes.size())
+ {
+ LOG(ERROR) << "Request specifies " << argumentCount << " " << type << "s but the model has "
+ << operandIndexes.size();
+ return false;
+ }
+ for (size_t argumentIndex = 0; argumentIndex < argumentCount; argumentIndex++)
+ {
+ const RequestArgument &argument = arguments[argumentIndex];
+ const uint32_t operandIndex = operandIndexes[argumentIndex];
+ const Operand &operand = operands[operandIndex];
+ if (argument.hasNoValue)
+ {
+ if (argument.location.poolIndex != 0 || argument.location.offset != 0 ||
+ argument.location.length != 0 || argument.dimensions.size() != 0)
+ {
+ LOG(ERROR) << "Request " << type << " " << argumentIndex
+ << " has no value yet has details.";
+ return false;
+ }
+ }
+ if (argument.location.poolIndex >= poolCount)
+ {
+ LOG(ERROR) << "Request " << type << " " << argumentIndex << " has an invalid poolIndex "
+ << argument.location.poolIndex << "/" << poolCount;
+ return false;
+ }
+ // TODO: Validate that we are within the pool.
+ uint32_t rank = argument.dimensions.size();
+ if (rank > 0)
+ {
+ if (rank != operand.dimensions.size())
+ {
+ LOG(ERROR) << "Request " << type << " " << argumentIndex << " has number of dimensions ("
+ << rank << ") different than the model's (" << operand.dimensions.size() << ")";
+ return false;
+ }
+ for (size_t i = 0; i < rank; i++)
+ {
+ if (argument.dimensions[i] != operand.dimensions[i] && operand.dimensions[i] != 0)
+ {
+ LOG(ERROR) << "Request " << type << " " << argumentIndex << " has dimension " << i
+ << " of " << operand.dimensions[i] << " different than the model's "
+ << operand.dimensions[i];
+ return false;
+ }
+ if (argument.dimensions[i] == 0)
+ {
+ LOG(ERROR) << "Request " << type << " " << argumentIndex << " has dimension " << i
+ << " of zero";
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+}
+
+// TODO doublecheck
+bool validateRequest(const Request &request, const Model &model)
+{
+ //const size_t poolCount = request.pools.size();
+ const size_t poolCount = 0;
+ return (validRequestArguments(request.inputs, model.inputIndexes, model.operands, poolCount,
+ "input") &&
+ validRequestArguments(request.outputs, model.outputIndexes, model.operands, poolCount,
+ "output"));
+}
+
diff --git a/compiler/ann-ref/src/Validation.h b/compiler/ann-ref/src/Validation.h
new file mode 100644
index 000000000..dab426af4
--- /dev/null
+++ b/compiler/ann-ref/src/Validation.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __VALIDATION_H__
+#define __VALIDATION_H__
+
+#include "OperationType.h"
+#include "Model.h"
+#include "Request.h"
+#include "NeuralNetworks.h"
+
+int validateOperationType(const OperationType &);
+int validateOperandType(const ANeuralNetworksOperandType &type, const char *tag, bool allowPartial);
+int validateOperandList(uint32_t count, const uint32_t *list, uint32_t operandCount,
+ const char *tag);
+
+bool validateModel(const Model &model);
+bool validateRequest(const Request &request, const Model &model);
+
+#endif // __VALIDATION_H__
diff --git a/compiler/ann-ref/src/ops/Add.cpp b/compiler/ann-ref/src/ops/Add.cpp
new file mode 100644
index 000000000..0b826f05d
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Add.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Add.h"
+#include "Assert.h"
+
+bool addPrepare(const Shape &in1, const Shape &in2, Shape *out)
+{
+ ASSERT(getNumberOfDimensions(in1) <= 4 && getNumberOfDimensions(in2) <= 4);
+ ASSERT(in1.type == in2.type);
+ if (SameShape(in1, in2))
+ {
+ return SetShape(in1, out);
+ }
+ else
+ {
+ // BroadcastAdd needed
+ uint32_t numberOfDims1 = getNumberOfDimensions(in1);
+ uint32_t numberOfDims2 = getNumberOfDimensions(in2);
+ uint32_t maxDims = std::max(numberOfDims1, numberOfDims2);
+ out->dimensions = std::vector<uint32_t>(maxDims);
+ for (uint32_t i = 1; i <= maxDims; i++)
+ {
+ uint32_t dim1 = 1;
+ if (i <= numberOfDims1)
+ {
+ dim1 = getSizeOfDimension(in1, numberOfDims1 - i);
+ }
+ uint32_t dim2 = 1;
+ if (i <= numberOfDims2)
+ {
+ dim2 = getSizeOfDimension(in2, numberOfDims2 - i);
+ }
+ if (dim1 != dim2 && dim1 != 1 && dim2 != 1)
+ {
+ LOG(ERROR) << "Dimensions mismatch for BroadcastAdd";
+ return false;
+ }
+ out->dimensions[maxDims - i] = std::max(dim1, dim2);
+ }
+ }
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/Add.float.cpp b/compiler/ann-ref/src/ops/Add.float.cpp
new file mode 100644
index 000000000..ce825d43d
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Add.float.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Add.float.h"
+
+#include "internal/Array.h"
+#include "internal/NDArray.h"
+#include "internal/Matrix.h"
+#include "internal/Fused.h"
+#include "internal/ActivationUtils.h"
+
+template <FusedActivationFunctionType Ac>
+void Add(const float *input1_data, const Dims<4> &input1_dims, const float *input2_data,
+ const Dims<4> &input2_dims, float *output_data, const Dims<4> &output_dims)
+{
+ MatchingArraySize(input1_dims, 3, input2_dims, 3, output_dims, 3);
+ MatchingArraySize(input1_dims, 2, input2_dims, 2, output_dims, 2);
+ MatchingArraySize(input1_dims, 1, input2_dims, 1, output_dims, 1);
+ MatchingArraySize(input1_dims, 0, input2_dims, 0, output_dims, 0);
+ DCHECK(IsPackedWithoutStrides(input1_dims));
+ DCHECK(IsPackedWithoutStrides(input2_dims));
+ DCHECK(IsPackedWithoutStrides(output_dims));
+
+ int i = 0;
+ const int size = input1_dims.sizes[3] * input1_dims.strides[3];
+
+ for (; i < size; i++)
+ {
+ auto x = input1_data[i] + input2_data[i];
+ output_data[i] = ActivationFunction<Ac>(x);
+ }
+}
+
+// From optimized_ops.h in TensorFlow Lite
+//
+// TODO: We can implement BroadcastAdd on buffers of arbitrary
+// dimensionality if the runtime code does a single loop over one dimension
+// that handles broadcasting as the base case. The code generator would then
+// generate max(D1, D2) nested for loops.
+// TODO: BroadcastAdd is intentionally duplicated from
+// reference_ops.h. Once an optimized version is implemented and NdArrayDesc<T>
+// is no longer referenced in this file, move NdArrayDesc<T> from types.h to
+// reference_ops.h.
+template <FusedActivationFunctionType Ac>
+void BroadcastAdd(const float *input1_data, const Dims<4> &input1_dims, const float *input2_data,
+ const Dims<4> &input2_dims, float *output_data, const Dims<4> &output_dims)
+{
+ NdArrayDesc<4> desc1;
+ NdArrayDesc<4> desc2;
+ NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2);
+
+ // In Tensorflow, the dimensions are canonically named (batch_number, row,
+ // col, channel), with extents (batches, height, width, depth), with the
+ // trailing dimension changing most rapidly (channels has the smallest stride,
+ // typically 1 element).
+ //
+ // In generated C code, we store arrays with the dimensions reversed. The
+ // first dimension has smallest stride.
+ //
+ // We name our variables by their Tensorflow convention, but generate C code
+ // nesting loops such that the innermost loop has the smallest stride for the
+ // best cache behavior.
+ for (int b = 0; b < ArraySize(output_dims, 3); ++b)
+ {
+ for (int y = 0; y < ArraySize(output_dims, 2); ++y)
+ {
+ for (int x = 0; x < ArraySize(output_dims, 1); ++x)
+ {
+ for (int c = 0; c < ArraySize(output_dims, 0); ++c)
+ {
+ output_data[Offset(output_dims, c, x, y, b)] =
+ ActivationFunction<Ac>(input1_data[SubscriptToIndex(desc1, c, x, y, b)] +
+ input2_data[SubscriptToIndex(desc2, c, x, y, b)]);
+ }
+ }
+ }
+ }
+}
+
+bool addFloat32(const float *in1, const Shape &shape1, const float *in2, const Shape &shape2,
+ int32_t activation, float *out, const Shape &shapeOut)
+{
+ bool needBroadcast = !SameShape(shape1, shape2);
+
+#define ANDROID_NN_NORMAL_ADD(activation) \
+ Add<FusedActivationFunctionType::activation>(in1, convertShapeToDims(shape1), \
+ in2, convertShapeToDims(shape2), \
+ out, convertShapeToDims(shapeOut))
+
+#define ANDROID_NN_BROADCAST_ADD(activation) \
+ BroadcastAdd<FusedActivationFunctionType::activation>( \
+ in1, convertShapeToDims(shape1), in2, convertShapeToDims(shape2), out, \
+ convertShapeToDims(shapeOut))
+
+ if (needBroadcast)
+ {
+ ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_BROADCAST_ADD)
+ }
+ else
+ {
+ ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_NORMAL_ADD)
+ }
+
+#undef ANDROID_NN_NORMAL_ADD
+#undef ANDROID_NN_BROADCAST_ADD
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/Add.float.h b/compiler/ann-ref/src/ops/Add.float.h
new file mode 100644
index 000000000..3657a045d
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Add.float.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_ADD_FLOAT_H__
+#define __OP_ADD_FLOAT_H__
+
+#include "Shape.h"
+
+#include <cstdint>
+
+bool addFloat32(const float *in1, const Shape &shape1, const float *in2, const Shape &shape2,
+ int32_t activation, float *out, const Shape &shapeOut);
+
+#endif // __OP_ADD_FLOAT_H__
diff --git a/compiler/ann-ref/src/ops/Add.h b/compiler/ann-ref/src/ops/Add.h
new file mode 100644
index 000000000..c6751fc00
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Add.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_ADD_H__
+#define __OP_ADD_H__
+
+#include "Shape.h"
+
+bool addPrepare(const Shape &in1, const Shape &in2, Shape *out1);
+
+#endif // __OP_ADD_FLOAT_H__
diff --git a/compiler/ann-ref/src/ops/AvgPool2D.cpp b/compiler/ann-ref/src/ops/AvgPool2D.cpp
new file mode 100644
index 000000000..cd9fcff66
--- /dev/null
+++ b/compiler/ann-ref/src/ops/AvgPool2D.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AvgPool2D.h"
+
+#include "internal/Pooling.h"
+
+bool averagePoolPrepare(const Shape &input, int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom, int32_t stride_width,
+ int32_t stride_height, int32_t filter_width, int32_t filter_height,
+ Shape *output)
+{
+ return genericPoolingPrepare(input, padding_left, padding_right, padding_top, padding_bottom,
+ stride_width, stride_height, filter_width, filter_height,
+ output);
+}
diff --git a/compiler/ann-ref/src/ops/AvgPool2D.float.cpp b/compiler/ann-ref/src/ops/AvgPool2D.float.cpp
new file mode 100644
index 000000000..21d3e977c
--- /dev/null
+++ b/compiler/ann-ref/src/ops/AvgPool2D.float.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AvgPool2D.float.h"
+
+#include "internal/Array.h"
+#include "internal/Matrix.h"
+#include "internal/FeatureMap.h"
+#include "internal/Fused.h"
+#include "internal/ActivationUtils.h"
+
+// From optimized_ops.h in TensorFlow Lite
+template <FusedActivationFunctionType Ac>
+void AveragePool(const float *input_data, const Dims<4> &input_dims, int stride_width,
+ int stride_height, int pad_width, int pad_height, int kwidth, int kheight,
+ float *output_data, const Dims<4> &output_dims)
+{
+ const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
+ const int input_height = ArraySize(input_dims, 2);
+ const int input_width = ArraySize(input_dims, 1);
+ const int output_height = ArraySize(output_dims, 2);
+ const int output_width = ArraySize(output_dims, 1);
+ const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
+
+ const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims);
+ auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
+ // TODO: get rid of the dynamic memory allocation here!
+ Eigen::VectorXf out_count(out_mat.cols());
+ out_count.setZero();
+ // Prefill the output to 0.
+ out_mat.setZero();
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int h = 0; h < input_height; ++h)
+ {
+ for (int w = 0; w < input_width; ++w)
+ {
+ // (h_start, h_end) * (w_start, w_end) is the range that the input
+ // vector projects to.
+ int hpad = h + pad_height;
+ int wpad = w + pad_width;
+ int h_start = (hpad < kheight) ? 0 : (hpad - kheight) / stride_height + 1;
+ int h_end = std::min(hpad / stride_height + 1, output_height);
+ int w_start = (wpad < kwidth) ? 0 : (wpad - kwidth) / stride_width + 1;
+ int w_end = std::min(wpad / stride_width + 1, output_width);
+ // compute elementwise sum
+ for (int ph = h_start; ph < h_end; ++ph)
+ {
+ for (int pw = w_start; pw < w_end; ++pw)
+ {
+ int out_offset = NodeOffset(b, ph, pw, output_height, output_width);
+ out_mat.col(out_offset) += in_mat.col(NodeOffset(b, h, w, input_height, input_width));
+ out_count(out_offset)++;
+ }
+ }
+ }
+ }
+ }
+ // Divide the output by the actual number of elements being averaged over
+ DCHECK_GT(out_count.minCoeff(), 0);
+ out_mat.array().rowwise() /= out_count.transpose().array();
+
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int y = 0; y < output_height; ++y)
+ {
+ for (int x = 0; x < output_width; ++x)
+ {
+ for (int c = 0; c < depth; ++c)
+ {
+ output_data[Offset(output_dims, c, x, y, b)] =
+ ActivationFunction<Ac>(output_data[Offset(output_dims, c, x, y, b)]);
+ }
+ }
+ }
+ }
+}
+
+#define ANDROID_NN_POOLING_PARAMETERS \
+ uint32_t height = getSizeOfDimension(inputShape, 1); \
+ uint32_t width = getSizeOfDimension(inputShape, 2); \
+ uint32_t outHeight = getSizeOfDimension(outputShape, 1); \
+ uint32_t outWidth = getSizeOfDimension(outputShape, 2); \
+ \
+ uint32_t paddingHeight = (uint32_t)padding_top; \
+ uint32_t paddingWidth = (uint32_t)padding_left;
+
+bool averagePoolFloat32(const float *inputData, const Shape &inputShape, int32_t padding_left,
+ int32_t padding_right, int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height, int32_t filter_width,
+ int32_t filter_height, int32_t activation, float *outputData,
+ const Shape &outputShape)
+{
+
+ ANDROID_NN_POOLING_PARAMETERS
+
+#define ANDROID_NN_AVERAGE_POOL(activation) \
+ AveragePool<FusedActivationFunctionType::activation>( \
+ inputData, convertShapeToDims(inputShape), stride_width, stride_height, paddingWidth, \
+ paddingHeight, filter_width, filter_height, outputData, convertShapeToDims(outputShape))
+
+ ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_AVERAGE_POOL)
+#undef ANDROID_NN_AVERAGE_POOL
+
+ return true;
+}
+
+#undef ANDROID_NN_POOLING_PARAMETERS
diff --git a/compiler/ann-ref/src/ops/AvgPool2D.float.h b/compiler/ann-ref/src/ops/AvgPool2D.float.h
new file mode 100644
index 000000000..b980e004b
--- /dev/null
+++ b/compiler/ann-ref/src/ops/AvgPool2D.float.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_AVG_POOL_2D_FLOAT_H__
+#define __OP_AVG_POOL_2D_FLOAT_H__
+
+#include "Shape.h"
+
+#include <cstdint>
+
+bool averagePoolFloat32(const float *inputData, const Shape &inputShape, int32_t padding_left,
+ int32_t padding_right, int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height, int32_t filter_width,
+ int32_t filter_height, int32_t activation, float *outputData,
+ const Shape &outputShape);
+
+#endif // __OP_AVG_POOL_2D_FLOAT_H__
diff --git a/compiler/ann-ref/src/ops/AvgPool2D.h b/compiler/ann-ref/src/ops/AvgPool2D.h
new file mode 100644
index 000000000..c86385531
--- /dev/null
+++ b/compiler/ann-ref/src/ops/AvgPool2D.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_AVG_POOL_2D_H__
+#define __OP_AVG_POOL_2D_H__
+
+#include "Shape.h"
+
+#include <cstdint>
+
+bool averagePoolPrepare(const Shape &input, int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom, int32_t stride_width,
+ int32_t stride_height, int32_t filter_width, int32_t filter_height,
+ Shape *output);
+
+#endif // __OP_AVG_POOL_2D_H__
diff --git a/compiler/ann-ref/src/ops/Concatenation.cpp b/compiler/ann-ref/src/ops/Concatenation.cpp
new file mode 100644
index 000000000..6bfe640b5
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Concatenation.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Concatenation.h"
+#include "Assert.h"
+
+bool concatenationPrepare(const std::vector<Shape> &inputShapes, int32_t axis, Shape *output)
+{
+
+ int num_inputs = inputShapes.size();
+ OperandType input_type = inputShapes[0].type;
+ uint32_t num_dimensions = getNumberOfDimensions(inputShapes[0]);
+
+ ASSERT(axis >= 0);
+ ASSERT(axis < (int32_t)num_dimensions);
+
+ int sum_axis = getSizeOfDimension(inputShapes[0], axis);
+ for (int i = 1; i < num_inputs; ++i)
+ {
+ ASSERT(getNumberOfDimensions(inputShapes[i]) == num_dimensions);
+ ASSERT(inputShapes[i].type == inputShapes[0].type);
+ if (input_type == OperandType::TENSOR_QUANT8_ASYMM)
+ {
+ ASSERT(inputShapes[0].offset == inputShapes[i].offset);
+ ASSERT(inputShapes[0].scale == inputShapes[i].scale);
+ }
+ for (int d = 0; d < (int32_t)num_dimensions; ++d)
+ {
+ if (d == axis)
+ {
+ sum_axis += getSizeOfDimension(inputShapes[i], axis);
+ }
+ else
+ {
+ ASSERT(getSizeOfDimension(inputShapes[0], d) ==
+ getSizeOfDimension(inputShapes[i], d));
+ }
+ }
+ }
+
+ output->type = input_type;
+ output->dimensions = inputShapes[0].dimensions;
+ output->dimensions[axis] = sum_axis;
+
+ if (input_type == OperandType::TENSOR_QUANT8_ASYMM)
+ {
+ ASSERT(inputShapes[0].offset == output->offset);
+ ASSERT(inputShapes[0].scale == output->scale);
+ }
+
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/Concatenation.float.cpp b/compiler/ann-ref/src/ops/Concatenation.float.cpp
new file mode 100644
index 000000000..ac32aa0ff
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Concatenation.float.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Concatenation.float.h"
+
+#include "internal/Array.h"
+#include "internal/Matrix.h"
+#include "internal/Fused.h"
+
+// From optimized_ops.h in TensorFlow Lite
+template <FusedActivationFunctionType Ac, typename Scalar>
+void Concatenation(int concat_dim, const Scalar *const *input_data,
+ const Dims<4> *const *input_dims, int inputs_count, Scalar *output_data,
+ const Dims<4> &output_dims)
+{
+ DCHECK_GT(inputs_count, 1);
+ int concat_size = 0;
+ for (int i = 0; i < inputs_count; i++)
+ {
+ for (int j = 0; j < 4; j++)
+ {
+ if (j != concat_dim)
+ {
+ MatchingArraySize(*input_dims[i], j, output_dims, j);
+ }
+ }
+ concat_size += ArraySize(*input_dims[i], concat_dim);
+ }
+ DCHECK_EQ(concat_size, ArraySize(output_dims, concat_dim));
+ DCHECK(IsPackedWithoutStrides(output_dims));
+ // for now we dont have a model with a Concatenation
+ // with fused activation function.
+ DCHECK(Ac == FusedActivationFunctionType::kNone);
+ int outer_size = 1;
+ for (int i = concat_dim + 1; i < 4; i++)
+ {
+ outer_size *= output_dims.sizes[i];
+ }
+ Scalar *output_ptr = output_data;
+ for (int k = 0; k < outer_size; k++)
+ {
+ for (int i = 0; i < inputs_count; ++i)
+ {
+ const int copy_size = input_dims[i]->sizes[concat_dim] * input_dims[i]->strides[concat_dim];
+ memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar));
+ output_ptr += copy_size;
+ }
+ }
+}
+
+bool concatenationFloat32(const std::vector<const float *> &inputDataPtrs,
+ const std::vector<Shape> &inputShapes, int32_t axis, float *outputData,
+ const Shape &outputShape)
+{
+ int num_inputs = inputShapes.size();
+ std::vector<Dims<4> *> inputDimsPtr(num_inputs);
+ std::vector<Dims<4>> inputDims(num_inputs);
+ for (int i = 0; i < num_inputs; i++)
+ {
+ inputDims[i] = convertShapeToDims(inputShapes[i]);
+ inputDimsPtr[i] = &inputDims[i];
+ }
+
+ Concatenation<FusedActivationFunctionType::kNone, float>(
+ getNumberOfDimensions(outputShape) - axis - 1, inputDataPtrs.data(), inputDimsPtr.data(),
+ num_inputs, outputData, convertShapeToDims(outputShape));
+
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/Concatenation.float.h b/compiler/ann-ref/src/ops/Concatenation.float.h
new file mode 100644
index 000000000..65bca1880
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Concatenation.float.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_CONCATENATION_FLOAT_H__
+#define __OP_CONCATENATION_FLOAT_H__
+
+#include "Shape.h"
+
+#include <vector>
+#include <cstdint>
+
+bool concatenationFloat32(const std::vector<const float *> &inputDataPtrs,
+ const std::vector<Shape> &inputShapes, int32_t axis, float *outputData,
+ const Shape &outputShape);
+
+#endif // __OP_CONCATENATION_FLOAT_H__
diff --git a/compiler/ann-ref/src/ops/Concatenation.h b/compiler/ann-ref/src/ops/Concatenation.h
new file mode 100644
index 000000000..b92071e45
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Concatenation.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_CONCATENATION_H__
+#define __OP_CONCATENATION_H__
+
+#include "Shape.h"
+
+#include <vector>
+#include <cstdint>
+
+bool concatenationPrepare(const std::vector<Shape> &inputShapes, int32_t axis, Shape *output);
+
+#endif // __OP_CONCATENATION_H__
diff --git a/compiler/ann-ref/src/ops/Conv2D.cpp b/compiler/ann-ref/src/ops/Conv2D.cpp
new file mode 100644
index 000000000..ef4407e00
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Conv2D.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Conv2D.h"
+#include "Assert.h"
+
+#include "internal/Spatial.h"
+
+bool convPrepare(const Shape &input, const Shape &filter, const Shape &bias, int32_t padding_left,
+ int32_t padding_right, int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height, Shape *output)
+{
+ ASSERT(input.type == filter.type);
+ if (input.type == OperandType::TENSOR_QUANT8_ASYMM)
+ {
+ ASSERT(bias.type == OperandType::TENSOR_INT32);
+ }
+ else
+ {
+ ASSERT(input.type == bias.type);
+ }
+ ASSERT(getNumberOfDimensions(input) == 4);
+ ASSERT(getNumberOfDimensions(filter) == 4);
+ ASSERT(getNumberOfDimensions(bias) == 1);
+
+ ASSERT(getSizeOfDimension(filter, 0) == getSizeOfDimension(bias, 0));
+ ASSERT(getSizeOfDimension(filter, 3) == getSizeOfDimension(input, 3));
+
+ uint32_t channels_out = getSizeOfDimension(filter, 0);
+ uint32_t width = getSizeOfDimension(input, 2);
+ uint32_t height = getSizeOfDimension(input, 1);
+ uint32_t filterWidth = getSizeOfDimension(filter, 2);
+ uint32_t filterHeight = getSizeOfDimension(filter, 1);
+ uint32_t batches = getSizeOfDimension(input, 0);
+
+ uint32_t outWidth = computeOutSize(width, filterWidth, stride_width, padding_left, padding_right);
+ uint32_t outHeight =
+ computeOutSize(height, filterHeight, stride_height, padding_top, padding_bottom);
+
+ output->type = input.type;
+ output->dimensions = {batches, outHeight, outWidth, channels_out};
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/Conv2D.float.cpp b/compiler/ann-ref/src/ops/Conv2D.float.cpp
new file mode 100644
index 000000000..b47fcce27
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Conv2D.float.cpp
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Conv2D.float.h"
+
+#include "internal/Spatial.h"
+#include "internal/Array.h"
+#include "internal/Matrix.h"
+#include "internal/Fused.h"
+#include "internal/GEMM.h"
+#include "internal/ActivationUtils.h"
+
+// From optimized_ops.h in TensorFlow Lite
+template <typename T>
+inline void ExtractPatchIntoBufferColumn(const Dims<4> &input_dims, int w, int h, int b,
+ int kheight, int kwidth, int stride_width,
+ int stride_height, int pad_width, int pad_height,
+ int in_width, int in_height, int in_depth,
+ int single_buffer_length, int buffer_id, const T *in_data,
+ T *conv_buffer_data, uint8 byte_zero)
+{
+ // This chunk of code reshapes all the inputs corresponding to
+ // output (b, h, w) to a column vector in conv_buffer(:, buffer_id).
+ const int kwidth_times_indepth = kwidth * in_depth;
+ const int inwidth_times_indepth = in_width * in_depth;
+ const int ih_ungated_start = h * stride_height - pad_height;
+ const int ih_ungated_end = (ih_ungated_start + kheight);
+ const int ih_end = std::min(ih_ungated_end, in_height);
+ const int iw_ungated_start = w * stride_width - pad_width;
+ const int iw_ungated_end = (iw_ungated_start + kwidth);
+ const int iw_end = std::min(iw_ungated_end, in_width);
+ // If the patch is off the edge of the input image, skip writing those rows
+ // and columns from the patch into the output array.
+ const int h_offset = std::max(0, -ih_ungated_start);
+ const int w_offset = std::max(0, -iw_ungated_start);
+ const int ih_start = std::max(0, ih_ungated_start);
+ const int iw_start = std::max(0, iw_ungated_start);
+ const int single_row_num = std::min(kwidth - w_offset, in_width - iw_start) * in_depth;
+ const int output_row_offset = (buffer_id * single_buffer_length);
+ int out_offset = output_row_offset + (h_offset * kwidth + w_offset) * in_depth;
+ int in_offset = Offset(input_dims, 0, iw_start, ih_start, b);
+
+ // Express all of the calculations as padding around the input patch.
+ const int top_padding = h_offset;
+ const int bottom_padding = (ih_ungated_end - ih_end);
+ const int left_padding = w_offset;
+ const int right_padding = (iw_ungated_end - iw_end);
+ assert(single_row_num == ((kwidth - (left_padding + right_padding)) * in_depth));
+
+ // Write out zeroes to the elements representing the top rows of the input
+ // patch that are off the edge of the input image.
+ if (top_padding > 0)
+ {
+ const int top_row_elements = (top_padding * kwidth * in_depth);
+ memset(conv_buffer_data + output_row_offset, byte_zero, (top_row_elements * sizeof(T)));
+ }
+
+ // If the patch is on the interior of the input image horizontally, just copy
+ // over the rows sequentially, otherwise add zero padding at the start or end.
+ if ((left_padding == 0) && (right_padding == 0))
+ {
+ for (int ih = ih_start; ih < ih_end; ++ih)
+ {
+ memcpy(conv_buffer_data + out_offset, in_data + in_offset, single_row_num * sizeof(T));
+ out_offset += kwidth_times_indepth;
+ in_offset += inwidth_times_indepth;
+ }
+ }
+ else
+ {
+ for (int ih = ih_start; ih < ih_end; ++ih)
+ {
+ if (left_padding > 0)
+ {
+ const int left_start = (out_offset - (left_padding * in_depth));
+ memset(conv_buffer_data + left_start, byte_zero, (left_padding * in_depth * sizeof(T)));
+ }
+ memcpy(conv_buffer_data + out_offset, in_data + in_offset, single_row_num * sizeof(T));
+ if (right_padding > 0)
+ {
+ const int right_start = (out_offset + single_row_num);
+ memset(conv_buffer_data + right_start, byte_zero, (right_padding * in_depth * sizeof(T)));
+ }
+ out_offset += kwidth_times_indepth;
+ in_offset += inwidth_times_indepth;
+ }
+ }
+
+ // If the bottom of the patch falls off the input image, pad the values
+ // representing those input rows with zeroes.
+ if (bottom_padding > 0)
+ {
+ const int bottom_row_elements = (bottom_padding * kwidth * in_depth);
+ const int bottom_start =
+ output_row_offset + ((top_padding + (ih_end - ih_start)) * kwidth * in_depth);
+ memset(conv_buffer_data + bottom_start, byte_zero, (bottom_row_elements * sizeof(T)));
+ }
+}
+
+template <typename T>
+void Im2col(const T *input_data, const Dims<4> &input_dims, int stride_width, int stride_height,
+ int pad_width, int pad_height, int kheight, int kwidth, uint8 byte_zero, T *output_data,
+ const Dims<4> &output_dims)
+{
+ DCHECK(IsPackedWithoutStrides(input_dims));
+ DCHECK(IsPackedWithoutStrides(output_dims));
+ const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
+ const int input_depth = ArraySize(input_dims, 0);
+ const int input_width = ArraySize(input_dims, 1);
+ const int input_height = ArraySize(input_dims, 2);
+ const int output_depth = ArraySize(output_dims, 0);
+ const int output_width = ArraySize(output_dims, 1);
+ const int output_height = ArraySize(output_dims, 2);
+
+ int buffer_id = 0;
+ // Loop over the output nodes.
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int h = 0; h < output_height; ++h)
+ {
+ for (int w = 0; w < output_width; ++w)
+ {
+ ExtractPatchIntoBufferColumn(input_dims, w, h, b, kheight, kwidth, stride_width,
+ stride_height, pad_width, pad_height, input_width,
+ input_height, input_depth, output_depth, buffer_id, input_data,
+ output_data, byte_zero);
+ ++buffer_id;
+ }
+ }
+ }
+}
+
+// From optimized_ops.h in TensorFlow Lite
+template <FusedActivationFunctionType Ac>
+void Conv(const float *input_data, const Dims<4> &input_dims, const float *filter_data,
+ const Dims<4> &filter_dims, const float *bias_data, const Dims<4> &bias_dims,
+ int stride_width, int stride_height, int pad_width, int pad_height, float *output_data,
+ const Dims<4> &output_dims, float *im2col_data, const Dims<4> &im2col_dims)
+{
+ (void)im2col_data;
+ (void)im2col_dims;
+
+ const float *gemm_input_data = nullptr;
+ const Dims<4> *gemm_input_dims = nullptr;
+ const int filter_width = ArraySize(filter_dims, 1);
+ const int filter_height = ArraySize(filter_dims, 2);
+ const bool need_im2col =
+ stride_width != 1 || stride_height != 1 || filter_width != 1 || filter_height != 1;
+ if (need_im2col)
+ {
+ DCHECK(im2col_data);
+ Im2col(input_data, input_dims, stride_width, stride_height, pad_width, pad_height,
+ filter_height, filter_width, 0, im2col_data, im2col_dims);
+ gemm_input_data = im2col_data;
+ gemm_input_dims = &im2col_dims;
+ }
+ else
+ {
+#if 0 // TODO-NNRT : Check if it needs, 'im2col_data' seems to be always not null.
+ DCHECK(!im2col_data);
+#endif
+ gemm_input_data = input_data;
+ gemm_input_dims = &input_dims;
+ }
+
+ const auto im2col_matrix_map = MapAsMatrixWithFirstDimAsRows(gemm_input_data, *gemm_input_dims);
+ const auto filter_matrix_map = MapAsMatrixWithLastDimAsCols(filter_data, filter_dims);
+ auto output_matrix_map = MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
+
+ Gemm(filter_matrix_map.transpose(), im2col_matrix_map, &output_matrix_map);
+
+ AddBiasAndEvalActivationFunction<Ac>(bias_data, bias_dims, output_data, output_dims);
+}
+
+// If possible we will use this static buffer for the tensor.
+static constexpr int kStaticBufferSize = 1605632;
+static char static_scratch_buffer[kStaticBufferSize];
+
+#define ANDROID_NN_CONV_PARAMETERS(Type) \
+ uint32_t height = getSizeOfDimension(inputShape, 1); \
+ uint32_t width = getSizeOfDimension(inputShape, 2); \
+ uint32_t filterHeight = getSizeOfDimension(filterShape, 1); \
+ uint32_t filterWidth = getSizeOfDimension(filterShape, 2); \
+ uint32_t outHeight = getSizeOfDimension(outputShape, 1); \
+ uint32_t outWidth = getSizeOfDimension(outputShape, 2); \
+ uint32_t inDepth = getSizeOfDimension(inputShape, 3); \
+ \
+ uint32_t paddingHeight = (uint32_t)padding_top; \
+ uint32_t paddingWidth = (uint32_t)padding_left; \
+ \
+ Dims<4> im2colDim; \
+ im2colDim.sizes[3] = (int)getSizeOfDimension(outputShape, 0); \
+ im2colDim.sizes[2] = (int)getSizeOfDimension(outputShape, 1); \
+ im2colDim.sizes[1] = (int)getSizeOfDimension(outputShape, 2); \
+ im2colDim.sizes[0] = (int)inDepth * filterHeight * filterWidth; \
+ \
+ im2colDim.strides[0] = 1; \
+ for (int i = 1; i < 4; i++) \
+ { \
+ im2colDim.strides[i] = im2colDim.strides[i - 1] * im2colDim.sizes[i - 1]; \
+ } \
+ \
+ Type *im2colData = nullptr; \
+ int im2colByteSize = sizeof(Type); \
+ for (int i = 0; i < 4; i++) \
+ { \
+ im2colByteSize *= im2colDim.sizes[i]; \
+ } \
+ if (im2colByteSize <= kStaticBufferSize) \
+ { \
+ im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \
+ } \
+ else \
+ { \
+ im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \
+ }
+
+bool convFloat32(const float *inputData, const Shape &inputShape, const float *filterData,
+ const Shape &filterShape, const float *biasData, const Shape &biasShape,
+ int32_t padding_left, int32_t padding_right, int32_t padding_top,
+ int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
+ int32_t activation, float *outputData, const Shape &outputShape)
+{
+
+ ANDROID_NN_CONV_PARAMETERS(float)
+
+#define ANDROID_NN_CONV(activation) \
+ Conv<FusedActivationFunctionType::activation>( \
+ inputData, convertShapeToDims(inputShape), filterData, convertShapeToDims(filterShape), \
+ biasData, convertShapeToDims(biasShape), stride_width, stride_height, paddingWidth, \
+ paddingHeight, outputData, convertShapeToDims(outputShape), im2colData, im2colDim)
+
+ ANDROID_NN_MACRO_DISPATCH_WITH_DELETE(ANDROID_NN_CONV)
+#undef ANDROID_NN_CONV
+
+ if (im2colByteSize > kStaticBufferSize)
+ {
+ delete[] im2colData;
+ }
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/Conv2D.float.h b/compiler/ann-ref/src/ops/Conv2D.float.h
new file mode 100644
index 000000000..620263fc3
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Conv2D.float.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_CONV_2D_FLOAT_H__
+#define __OP_CONV_2D_FLOAT_H__
+
+#include "Shape.h"
+
+#include <cstdint>
+
+bool convFloat32(const float *inputData, const Shape &inputShape, const float *filterData,
+ const Shape &filterShape, const float *biasData, const Shape &biasShape,
+ int32_t padding_left, int32_t padding_right, int32_t padding_top,
+ int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
+ int32_t activation, float *outputData, const Shape &outputShape);
+
+#endif // __OP_CONV_2D_FLOAT_H__
diff --git a/compiler/ann-ref/src/ops/Conv2D.h b/compiler/ann-ref/src/ops/Conv2D.h
new file mode 100644
index 000000000..7dc1e3424
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Conv2D.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_CONV_2D_H__
+#define __OP_CONV_2D_H__
+
+#include "Shape.h"
+
+#include <cstdint>
+
+bool convPrepare(const Shape &input, const Shape &filter, const Shape &bias, int32_t padding_left,
+ int32_t padding_right, int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height, Shape *output);
+
+#endif // __OP_CONV_2D_H__
diff --git a/compiler/ann-ref/src/ops/DepthwiseConv2D.cpp b/compiler/ann-ref/src/ops/DepthwiseConv2D.cpp
new file mode 100644
index 000000000..4692564e7
--- /dev/null
+++ b/compiler/ann-ref/src/ops/DepthwiseConv2D.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthwiseConv2D.h"
+#include "Assert.h"
+
+#include "internal/Spatial.h"
+
+bool depthwiseConvPrepare(const Shape &input, const Shape &filter, const Shape &bias,
+ int32_t padding_left, int32_t padding_right, int32_t padding_top,
+ int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
+ Shape *output)
+{
+ ASSERT(input.type == filter.type);
+ if (input.type == OperandType::TENSOR_QUANT8_ASYMM)
+ {
+ ASSERT(bias.type == OperandType::TENSOR_INT32);
+ }
+ else
+ {
+ ASSERT(input.type == bias.type);
+ }
+ ASSERT(getNumberOfDimensions(input) == 4);
+ ASSERT(getNumberOfDimensions(filter) == 4);
+ ASSERT(getNumberOfDimensions(bias) == 1);
+
+ ASSERT(getSizeOfDimension(filter, 3) == getSizeOfDimension(bias, 0));
+
+ uint32_t channels_out = getSizeOfDimension(filter, 3);
+ uint32_t width = getSizeOfDimension(input, 2);
+ uint32_t height = getSizeOfDimension(input, 1);
+ uint32_t filterWidth = getSizeOfDimension(filter, 2);
+ uint32_t filterHeight = getSizeOfDimension(filter, 1);
+ uint32_t batches = getSizeOfDimension(input, 0);
+
+ uint32_t outWidth = computeOutSize(width, filterWidth, stride_width, padding_left, padding_right);
+ uint32_t outHeight =
+ computeOutSize(height, filterHeight, stride_height, padding_top, padding_bottom);
+
+ output->type = input.type;
+ output->dimensions = {batches, outHeight, outWidth, channels_out};
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/DepthwiseConv2D.float.cpp b/compiler/ann-ref/src/ops/DepthwiseConv2D.float.cpp
new file mode 100644
index 000000000..936b24ec7
--- /dev/null
+++ b/compiler/ann-ref/src/ops/DepthwiseConv2D.float.cpp
@@ -0,0 +1,311 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthwiseConv2D.float.h"
+#include "Assert.h"
+
+#include "internal/Spatial.h"
+#include "internal/Array.h"
+#include "internal/Fused.h"
+#include "internal/ActivationUtils.h"
+
+#include <cstring> // 'memcpy'
+
+namespace optimized_ops
+{
+
+// Implementation of float DepthwiseConv
+
+template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
+struct FloatDepthwiseConvKernel
+{
+};
+
+// From optimized_ops.h in TensorFlow Lite
+//
+// Accumulates the effect of one row of the filter, on a segment of one row
+// of the output, accessing the corresponding one row of the input.
+template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
+void FloatDepthwiseConvAccumRow(int stride, int input_depth, int input_width,
+ const float *input_data, int pad_width, int depth_multiplier,
+ int filter_width, const float *filter_data, int out_x_buffer_start,
+ int out_x_buffer_end, int output_depth, float *acc_buffer)
+{
+ // Sanity check parameters. This is important in particular to ensure
+ // that we keep the number of template instantiations minimal, so we don't
+ // increase binary size unnecessarily.
+ static_assert(kFixedDepthMultiplier || !kFixedInputDepth, "");
+ static_assert(kFixedInputDepth || kAllowStrided, "");
+ DCHECK(stride == 1 || kAllowStrided);
+ if (kFixedInputDepth)
+ {
+ DCHECK_EQ(input_depth, kFixedInputDepth);
+ }
+ if (kFixedDepthMultiplier)
+ {
+ DCHECK_EQ(depth_multiplier, kFixedDepthMultiplier);
+ }
+ DCHECK_EQ(output_depth, input_depth * depth_multiplier);
+ const int input_ptr_increment = stride * input_depth;
+ const float *filter_base_ptr = filter_data;
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ // For the current (filter_x, filter_y) point in the filter,
+ // compute the boundaries of the corresponding output row segment.
+ int out_x_loop_start_unclampled = 0;
+ int out_x_loop_end_unclampled = 0;
+ if (kAllowStrided)
+ {
+ if (stride == 2)
+ {
+ out_x_loop_start_unclampled = (pad_width - filter_x + 1) / 2;
+ out_x_loop_end_unclampled = (pad_width + input_width - filter_x + 1) / 2;
+ }
+ else if (stride == 4)
+ {
+ out_x_loop_start_unclampled = (pad_width - filter_x + 3) / 4;
+ out_x_loop_end_unclampled = (pad_width + input_width - filter_x + 3) / 4;
+ }
+ else
+ {
+ out_x_loop_start_unclampled = (pad_width - filter_x + stride - 1) / stride;
+ out_x_loop_end_unclampled = (pad_width + input_width - filter_x + stride - 1) / stride;
+ }
+ }
+ else
+ {
+ out_x_loop_start_unclampled = pad_width - filter_x;
+ out_x_loop_end_unclampled = pad_width + input_width - filter_x;
+ }
+ // The kernel will have to iterate on the segment of the
+ // output row that starts at out_x_loop_start and out_x_loop_end.
+ const int out_x_loop_start = std::max(out_x_buffer_start, out_x_loop_start_unclampled);
+ const int out_x_loop_end = std::min(out_x_buffer_end, out_x_loop_end_unclampled);
+
+ float *acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
+ const int in_x_origin = (out_x_loop_start * stride) - pad_width + filter_x;
+ const float *input_ptr = input_data + in_x_origin * input_depth;
+ const int num_output_pixels = out_x_loop_end - out_x_loop_start;
+ FloatDepthwiseConvKernel<kAllowStrided, kFixedInputDepth, kFixedDepthMultiplier>::Run(
+ num_output_pixels, input_depth, depth_multiplier, input_ptr, input_ptr_increment,
+ filter_base_ptr, acc_buffer_ptr);
+ filter_base_ptr += output_depth;
+ }
+}
+
+// From optimized_ops.h in TensorFlow Lite
+//
+// generic fallback of FloatDepthwiseConvAccumRow, portable, non-templatized.
+inline void FloatDepthwiseConvAccumRowGeneric(int stride, int input_depth, int input_width,
+ const float *input_data, int pad_width,
+ int depth_multiplier, int filter_width,
+ const float *filter_data, int out_x_buffer_start,
+ int out_x_buffer_end, int output_depth,
+ float *acc_buffer)
+{
+ const float *filter_base_ptr = filter_data;
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int out_x_loop_start =
+ std::max(out_x_buffer_start, (pad_width - filter_x + stride - 1) / stride);
+ const int out_x_loop_end =
+ std::min(out_x_buffer_end, (pad_width + input_width - filter_x + stride - 1) / stride);
+
+ float *acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
+ const int in_x_origin = (out_x_loop_start * stride) - pad_width + filter_x;
+ const float *input_ptr = input_data + in_x_origin * input_depth;
+ const int input_ptr_increment = (stride - 1) * input_depth;
+ for (int out_x = out_x_loop_start; out_x < out_x_loop_end; out_x++)
+ {
+ const float *filter_ptr = filter_base_ptr;
+ for (int ic = 0; ic < input_depth; ++ic)
+ {
+ const float input_val = *input_ptr++;
+ for (int m = 0; m < depth_multiplier; m++)
+ {
+ const float filter_val = *filter_ptr++;
+ *acc_buffer_ptr++ += filter_val * input_val;
+ }
+ }
+ input_ptr += input_ptr_increment;
+ }
+ filter_base_ptr += output_depth;
+ }
+}
+
+// From optimized_ops.h in TensorFlow Lite
+//
+// Initializes the accumulator buffer with bias values.
+inline void DepthwiseConvInitAccBuffer(int num_output_pixels, int output_depth,
+ const float *bias_data, float *acc_buffer)
+{
+ for (int i = 0; i < num_output_pixels; i++)
+ {
+ memcpy(acc_buffer + i * output_depth, bias_data, sizeof(acc_buffer[0]) * output_depth);
+ }
+}
+
+// From optimized_ops.h in TensorFlow Lite
+template <FusedActivationFunctionType Ac>
+void DepthwiseConv(const float *input_data, const Dims<4> &input_dims, const float *filter_data,
+ const Dims<4> &filter_dims, const float *bias_data, const Dims<4> &bias_dims,
+ int stride_width, int stride_height, int pad_width, int pad_height,
+ int depth_multiplier, float *output_data, const Dims<4> &output_dims)
+{
+ static_assert(
+ Ac == FusedActivationFunctionType::kNone || Ac == FusedActivationFunctionType::kRelu ||
+ Ac == FusedActivationFunctionType::kRelu6 || Ac == FusedActivationFunctionType::kRelu1,
+ "");
+ const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
+ const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0);
+ const int input_height = ArraySize(input_dims, 2);
+ const int input_width = ArraySize(input_dims, 1);
+ const int input_depth = ArraySize(input_dims, 0);
+ const int filter_height = ArraySize(filter_dims, 2);
+ const int filter_width = ArraySize(filter_dims, 1);
+ const int output_height = ArraySize(output_dims, 2);
+ const int output_width = ArraySize(output_dims, 1);
+#if 0 // TODO-NNRT : Check if assertion is needed, output depth some times not equal to input *
+ // depthmultiplier
+ DCHECK(output_depth == input_depth * depth_multiplier);
+#endif
+
+ static const int kAccBufferMaxSize = 1024;
+ float acc_buffer[kAccBufferMaxSize];
+ DCHECK_GE(kAccBufferMaxSize, output_depth);
+ const int kOutputPixelsInAccBuffer = kAccBufferMaxSize / output_depth;
+ const int kAccBufferActualSize = kOutputPixelsInAccBuffer * output_depth;
+ DCHECK_LE(kOutputPixelsInAccBuffer * output_depth, kAccBufferActualSize);
+ DCHECK_LE(kAccBufferActualSize, kAccBufferMaxSize);
+ DCHECK_GE(kOutputPixelsInAccBuffer, 1);
+
+ // row_accum_func will point to the core accumulation function to be used
+ // for this DepthwiseConv op.
+ auto *row_accum_func = FloatDepthwiseConvAccumRowGeneric;
+
+ const int kMaxFixedDepthMultiplier = 16;
+ int fixed_depth_multiplier = 0;
+ if (depth_multiplier <= kMaxFixedDepthMultiplier)
+ {
+ fixed_depth_multiplier = depth_multiplier;
+ }
+ // kMaxUnrolling is the max number of output values that we aim to handle
+ // in one unrolled iteration of the inner loop. For practical performance
+ // reasons, it is limited by the number of available registers. We could
+ // fine-tune it depending on the architecture, but that's not worth doing
+ // since this whole code is not very optimized to begin with. The
+ // present value reflects what's realistic on ARM 32bit NEON with 16 128-bit
+ // vector registers.
+ const int kMaxUnrolling = 8;
+ int fixed_input_depth = 0;
+ if (fixed_depth_multiplier && input_depth * fixed_depth_multiplier <= kMaxUnrolling)
+ {
+ fixed_input_depth = input_depth;
+ }
+
+ // Now that we have determined row_accum_func, we can start work.
+ float *output_ptr = output_data;
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int out_y = 0; out_y < output_height; ++out_y)
+ {
+ const int in_y_origin = (out_y * stride_height) - pad_height;
+ const int filter_y_start = std::max(0, -in_y_origin);
+ const int filter_y_end = std::min(filter_height, input_height - in_y_origin);
+ for (int out_x_buffer_start = 0; out_x_buffer_start < output_width;
+ out_x_buffer_start += kOutputPixelsInAccBuffer)
+ {
+ const int out_x_buffer_end =
+ std::min(output_width, out_x_buffer_start + kOutputPixelsInAccBuffer);
+ // We call a 'pixel' a group of activation that share all but the
+ // 'depth'/'channel' coordinate. num_output_pixels is the number of
+ // output pixels that we will accumulate in this loop iteration.
+ const int num_output_pixels = out_x_buffer_end - out_x_buffer_start;
+ // Initialize our local accumulator with the bias values, so we don't
+ // have to add them later.
+ DepthwiseConvInitAccBuffer(num_output_pixels, output_depth, bias_data, acc_buffer);
+ // Accumulation loop. Most of the time should be spent in here.
+ for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+ {
+ const int in_y = in_y_origin + filter_y;
+ row_accum_func(stride_width, input_depth, input_width,
+ input_data + in_y * input_dims.strides[2] + b * input_dims.strides[3],
+ pad_width, depth_multiplier, filter_width,
+ filter_data + filter_y * filter_dims.strides[2], out_x_buffer_start,
+ out_x_buffer_end, output_depth, acc_buffer);
+ }
+ // Finished accumulating. Now store to destination.
+ const int num_output_values = output_depth * num_output_pixels;
+ int i = 0;
+ // Handle leftover values, one by one. This is very slow.
+ for (; i < num_output_values; i++)
+ {
+ float acc = acc_buffer[i];
+ if (Ac == FusedActivationFunctionType::kRelu)
+ {
+ acc = std::max(0.f, acc);
+ }
+ else if (Ac == FusedActivationFunctionType::kRelu6)
+ {
+ acc = std::max(0.f, std::min(6.f, acc));
+ }
+ else if (Ac == FusedActivationFunctionType::kRelu1)
+ {
+ acc = std::max(-1.f, std::min(1.f, acc));
+ }
+ *output_ptr++ = acc;
+ }
+ }
+ }
+ }
+}
+
+} // namespace optimized_ops
+
+#define ANDROID_NN_DEPTHWISE_CONV_PARAMETERS \
+ uint32_t height = getSizeOfDimension(inputShape, 1); \
+ uint32_t width = getSizeOfDimension(inputShape, 2); \
+ uint32_t filterHeight = getSizeOfDimension(filterShape, 1); \
+ uint32_t filterWidth = getSizeOfDimension(filterShape, 2); \
+ uint32_t outHeight = getSizeOfDimension(outputShape, 1); \
+ uint32_t outWidth = getSizeOfDimension(outputShape, 2); \
+ \
+ uint32_t paddingHeight = (uint32_t)padding_top; \
+ uint32_t paddingWidth = (uint32_t)padding_left;
+
+bool depthwiseConvFloat32(const float *inputData, const Shape &inputShape, const float *filterData,
+ const Shape &filterShape, const float *biasData, const Shape &biasShape,
+ int32_t padding_left, int32_t padding_right, int32_t padding_top,
+ int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
+ int32_t depth_multiplier, int32_t activation, float *outputData,
+ const Shape &outputShape)
+{
+
+ ANDROID_NN_DEPTHWISE_CONV_PARAMETERS
+
+#define ANDROID_NN_DEPTHWISE_CONV(activation) \
+ optimized_ops::DepthwiseConv<FusedActivationFunctionType::activation>( \
+ inputData, convertShapeToDims(inputShape), filterData, convertShapeToDims(filterShape), \
+ biasData, convertShapeToDims(biasShape), stride_width, stride_height, paddingWidth, \
+ paddingHeight, depth_multiplier, outputData, convertShapeToDims(outputShape))
+
+ ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_DEPTHWISE_CONV)
+#undef ANDROID_NN_DEPTHWISE_CONV
+
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/DepthwiseConv2D.float.h b/compiler/ann-ref/src/ops/DepthwiseConv2D.float.h
new file mode 100644
index 000000000..3fbfeae67
--- /dev/null
+++ b/compiler/ann-ref/src/ops/DepthwiseConv2D.float.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_DEPTHWISE_CONV_2D_FLOAT_H__
+#define __OP_DEPTHWISE_CONV_2D_FLOAT_H__
+
+#include "Shape.h"
+
+#include <cstdint>
+
+bool depthwiseConvFloat32(const float *inputData, const Shape &inputShape, const float *filterData,
+ const Shape &filterShape, const float *biasData, const Shape &biasShape,
+ int32_t padding_left, int32_t padding_right, int32_t padding_top,
+ int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
+ int32_t depth_multiplier, int32_t activation, float *outputData,
+ const Shape &outputShape);
+
+#endif // __OP_DEPTHWISE_CONV_2D_FLOAT_H__
diff --git a/compiler/ann-ref/src/ops/DepthwiseConv2D.h b/compiler/ann-ref/src/ops/DepthwiseConv2D.h
new file mode 100644
index 000000000..13f520219
--- /dev/null
+++ b/compiler/ann-ref/src/ops/DepthwiseConv2D.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_DEPTHWISE_CONV_2D_H__
+#define __OP_DEPTHWISE_CONV_2D_H__
+
+#include "Shape.h"
+
+#include <cstdint>
+
+bool depthwiseConvPrepare(const Shape &input, const Shape &filter, const Shape &bias,
+ int32_t padding_left, int32_t padding_right, int32_t padding_top,
+ int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
+ Shape *output);
+
+#endif // __OP_DEPTHWISE_CONV_2D_H__
diff --git a/compiler/ann-ref/src/ops/Div.cpp b/compiler/ann-ref/src/ops/Div.cpp
new file mode 100644
index 000000000..250e72b1d
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Div.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Div.h"
+#include "Assert.h"
+
+bool divPrepare(const Shape &in1, const Shape &in2, Shape *out)
+{
+ ASSERT(getNumberOfDimensions(in1) <= 4 && getNumberOfDimensions(in2) <= 4);
+ ASSERT(in1.type == in2.type);
+ if (SameShape(in1, in2))
+ {
+ return SetShape(in1, out);
+ }
+ else
+ {
+ // Broadcast needed
+ uint32_t numberOfDims1 = getNumberOfDimensions(in1);
+ uint32_t numberOfDims2 = getNumberOfDimensions(in2);
+ uint32_t maxDims = std::max(numberOfDims1, numberOfDims2);
+ out->dimensions = std::vector<uint32_t>(maxDims);
+ for (uint32_t i = 1; i <= maxDims; i++)
+ {
+ uint32_t dim1 = 1;
+ if (i <= numberOfDims1)
+ {
+ dim1 = getSizeOfDimension(in1, numberOfDims1 - i);
+ }
+ uint32_t dim2 = 1;
+ if (i <= numberOfDims2)
+ {
+ dim2 = getSizeOfDimension(in2, numberOfDims2 - i);
+ }
+ if (dim1 != dim2 && dim1 != 1 && dim2 != 1)
+ {
+ LOG(ERROR) << "Dimensions mismatch for BroadcastDiv";
+ return false;
+ }
+ out->dimensions[maxDims - i] = std::max(dim1, dim2);
+ }
+ }
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/Div.float.cpp b/compiler/ann-ref/src/ops/Div.float.cpp
new file mode 100644
index 000000000..a1a39e546
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Div.float.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Div.float.h"
+
+#include "internal/Array.h"
+#include "internal/NDArray.h"
+#include "internal/Matrix.h"
+#include "internal/Fused.h"
+#include "internal/ActivationUtils.h"
+
+template <FusedActivationFunctionType Ac>
+void Div(const float *input1_data, const Dims<4> &input1_dims, const float *input2_data,
+ const Dims<4> &input2_dims, float *output_data, const Dims<4> &output_dims)
+{
+ MatchingArraySize(input1_dims, 3, input2_dims, 3, output_dims, 3);
+ MatchingArraySize(input1_dims, 2, input2_dims, 2, output_dims, 2);
+ MatchingArraySize(input1_dims, 1, input2_dims, 1, output_dims, 1);
+ MatchingArraySize(input1_dims, 0, input2_dims, 0, output_dims, 0);
+ DCHECK(IsPackedWithoutStrides(input1_dims));
+ DCHECK(IsPackedWithoutStrides(input2_dims));
+ DCHECK(IsPackedWithoutStrides(output_dims));
+
+ const int size = input1_dims.sizes[3] * input1_dims.strides[3];
+
+ for (int i = 0; i < size; i++)
+ {
+ auto x = input1_data[i] / input2_data[i];
+ output_data[i] = ActivationFunction<Ac>(x);
+ }
+}
+
+// From optimized_ops.h in TensorFlow Lite
+//
+// TODO: We can implement BroadcastDiv on buffers of arbitrary
+// dimensionality if the runtime code does a single loop over one dimension
+// that handles broadcasting as the base case. The code generator would then
+// generate max(D1, D2) nested for loops.
+// TODO: BroadcastDiv is intentionally duplicated from
+// reference_ops.h. Once an optimized version is implemented and NdArrayDesc<T>
+// is no longer referenced in this file, move NdArrayDesc<T> from types.h to
+// reference_ops.h.
+template <FusedActivationFunctionType Ac>
+void BroadcastDiv(const float *input1_data, const Dims<4> &input1_dims, const float *input2_data,
+ const Dims<4> &input2_dims, float *output_data, const Dims<4> &output_dims)
+{
+ NdArrayDesc<4> desc1;
+ NdArrayDesc<4> desc2;
+ NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2);
+
+ // In Tensorflow, the dimensions are canonically named (batch_number, row,
+ // col, channel), with extents (batches, height, width, depth), with the
+ // trailing dimension changing most rapidly (channels has the smallest stride,
+ // typically 1 element).
+ //
+ // In generated C code, we store arrays with the dimensions reversed. The
+ // first dimension has smallest stride.
+ //
+ // We name our variables by their Tensorflow convention, but generate C code
+ // nesting loops such that the innermost loop has the smallest stride for the
+ // best cache behavior.
+ for (int b = 0; b < ArraySize(output_dims, 3); ++b)
+ {
+ for (int y = 0; y < ArraySize(output_dims, 2); ++y)
+ {
+ for (int x = 0; x < ArraySize(output_dims, 1); ++x)
+ {
+ for (int c = 0; c < ArraySize(output_dims, 0); ++c)
+ {
+ output_data[Offset(output_dims, c, x, y, b)] =
+ ActivationFunction<Ac>(input1_data[SubscriptToIndex(desc1, c, x, y, b)] /
+ input2_data[SubscriptToIndex(desc2, c, x, y, b)]);
+ }
+ }
+ }
+ }
+}
+
+bool divFloat32(const float *in1, const Shape &shape1, const float *in2, const Shape &shape2,
+ int32_t activation, float *out, const Shape &shapeOut)
+{
+ bool needBroadcast = !SameShape(shape1, shape2);
+
+#define ANDROID_NN_NORMAL_DIV(activation) \
+ Div<FusedActivationFunctionType::activation>(in1, convertShapeToDims(shape1), \
+ in2, convertShapeToDims(shape2), \
+ out, convertShapeToDims(shapeOut))
+
+#define ANDROID_NN_BROADCAST_DIV(activation) \
+ BroadcastDiv<FusedActivationFunctionType::activation>( \
+ in1, convertShapeToDims(shape1), \
+ in2, convertShapeToDims(shape2), \
+ out, convertShapeToDims(shapeOut))
+
+ if (needBroadcast)
+ {
+ ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_BROADCAST_DIV)
+ }
+ else
+ {
+ ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_NORMAL_DIV)
+ }
+
+#undef ANDROID_NN_NORMAL_ADD
+#undef ANDROID_NN_BROADCAST_ADD
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/Div.float.h b/compiler/ann-ref/src/ops/Div.float.h
new file mode 100644
index 000000000..a2aa7e1a9
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Div.float.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_DIV_FLOAT_H__
+#define __OP_DIV_FLOAT_H__
+
+#include "Shape.h"
+
+#include <cstdint>
+
+bool divFloat32(const float *in1, const Shape &shape1, const float *in2, const Shape &shape2,
+ int32_t activation, float *out, const Shape &shapeOut);
+
+#endif // __OP_DIV_FLOAT_H__
diff --git a/compiler/ann-ref/src/ops/Div.h b/compiler/ann-ref/src/ops/Div.h
new file mode 100644
index 000000000..5eb98a3f2
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Div.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_DIV_H__
+#define __OP_DIV_H__
+
+#include "Shape.h"
+
+bool divPrepare(const Shape &in1, const Shape &in2, Shape *out);
+
+#endif // __OP_DIV_H__
diff --git a/compiler/ann-ref/src/ops/FullyConnected.cpp b/compiler/ann-ref/src/ops/FullyConnected.cpp
new file mode 100644
index 000000000..d21389e7e
--- /dev/null
+++ b/compiler/ann-ref/src/ops/FullyConnected.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnected.h"
+#include "Assert.h"
+
+#if 0
+#include "internal/Matrix.h"
+#include "internal/Fused.h"
+#include "internal/GEMM.h"
+#include "internal/ActivationUtils.h"
+#endif
+
+bool fullyConnectedPrepare(const Shape &input, const Shape &weights, const Shape &bias,
+ Shape *output)
+{
+ // Check all the parameters of tensor match within themselves and match the
+ // input configuration.
+ ASSERT(input.type == weights.type);
+ if (input.type == OperandType::TENSOR_QUANT8_ASYMM)
+ {
+ ASSERT(bias.type == OperandType::TENSOR_INT32);
+ }
+ else
+ {
+ ASSERT(input.type == bias.type);
+ }
+ ASSERT(getNumberOfDimensions(input) >= 2);
+ uint32_t input_size = getNumberOfElements(input);
+ uint32_t num_units = getSizeOfDimension(weights, 0);
+
+ // modified to resolve Coverity 118949 (Apr 25, 2018) by hyunsik.yoon
+ // Original Code:
+ // uint32_t batch_size = input_size / getSizeOfDimension(weights, 1);
+ //
+ // Coverity Detection: Division by zero
+ //
+ // Code below is modified code
+
+ uint32_t shape_size = getSizeOfDimension(weights, 1);
+ if (shape_size == 0)
+ {
+ return false;
+ }
+
+ uint32_t batch_size = input_size / shape_size;
+
+ ASSERT(getSizeOfDimension(bias, 0) == num_units);
+ ASSERT(getSizeOfDimension(weights, 1) * batch_size == input_size);
+ ASSERT(getNumberOfDimensions(weights) == 2);
+
+ output->type = input.type;
+ output->dimensions = {batch_size, num_units};
+
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/FullyConnected.float.cpp b/compiler/ann-ref/src/ops/FullyConnected.float.cpp
new file mode 100644
index 000000000..4d12382ca
--- /dev/null
+++ b/compiler/ann-ref/src/ops/FullyConnected.float.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnected.float.h"
+#include "Assert.h"
+
+#include "internal/Matrix.h"
+#include "internal/Fused.h"
+#include "internal/GEMM.h"
+#include "internal/ActivationUtils.h"
+
+// From optimized_ops.h in TensorFlow Lite
+template <FusedActivationFunctionType Ac>
+void FullyConnected(const float *input_data, const Dims<4> &input_dims, const float *weights_data,
+ const Dims<4> &weights_dims, const float *bias_data, const Dims<4> &bias_dims,
+ float *output_data, const Dims<4> &output_dims)
+{
+ // TODO(b/62193649): this convoluted shape computation (determining
+ // input_rows from the weights_dims, then MapAsMatrixWithGivenNumberOfRows)
+ // is because the current --variable_batch hack consists in overwriting the
+ // 3rd dimension with the runtime batch size, as we don't keep track for each
+ // array of which dimension is the batch dimension in it.
+ // When that is fixed, this should become:
+ // const auto input_matrix_map =
+ // MapAsMatrixWithFirstDimAsRows(input_data, input_dims);
+ const int input_rows = ArraySize(weights_dims, 0);
+ const auto input_matrix_map =
+ MapAsMatrixWithGivenNumberOfRows(input_data, input_dims, input_rows);
+ const auto filter_matrix_map = MapAsMatrixWithFirstDimAsRows(weights_data, weights_dims);
+ auto output_matrix_map = MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
+
+ Gemm(filter_matrix_map.transpose(), input_matrix_map, &output_matrix_map);
+ AddBiasAndEvalActivationFunction<Ac>(bias_data, bias_dims, output_data, output_dims);
+}
+
+bool fullyConnectedFloat32(const float *inputData, const Shape &inputShape,
+ const float *weightsData, const Shape &weightsShape,
+ const float *biasData, const Shape &biasShape, int32_t activation,
+ float *outputData, const Shape &outputShape)
+{
+
+#define ANDROID_NN_FULLY_CONNECTED(activation) \
+ FullyConnected<FusedActivationFunctionType::activation>( \
+ inputData, convertShapeToDims(inputShape), weightsData, convertShapeToDims(weightsShape), \
+ biasData, convertShapeToDims(biasShape), outputData, convertShapeToDims(outputShape))
+
+ ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_FULLY_CONNECTED)
+#undef ANDROID_NN_FULLY_CONNECTED
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/FullyConnected.float.h b/compiler/ann-ref/src/ops/FullyConnected.float.h
new file mode 100644
index 000000000..3412fdb06
--- /dev/null
+++ b/compiler/ann-ref/src/ops/FullyConnected.float.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_FULLY_CONNECTED_FLOAT_H__
+#define __OP_FULLY_CONNECTED_FLOAT_H__
+
+#include "Shape.h"
+
+#include <cstdint>
+
+bool fullyConnectedFloat32(const float *inputData, const Shape &inputShape, const float *weights,
+ const Shape &weightsShape, const float *biasData, const Shape &biasShape,
+ int32_t activation, float *outputData, const Shape &outputShape);
+
+#endif // __OP_FULLY_CONNECTED_FLOAT_H__
diff --git a/compiler/ann-ref/src/ops/FullyConnected.h b/compiler/ann-ref/src/ops/FullyConnected.h
new file mode 100644
index 000000000..985fd7ec2
--- /dev/null
+++ b/compiler/ann-ref/src/ops/FullyConnected.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_FULLY_CONNECTED_H__
+#define __OP_FULLY_CONNECTED_H__
+
+#include "Shape.h"
+
+#include <cstdint>
+
+bool fullyConnectedPrepare(const Shape &input, const Shape &weights, const Shape &bias,
+ Shape *output);
+
+#endif // __OP_FULLY_CONNECTED_H__
diff --git a/compiler/ann-ref/src/ops/MaxPool2D.cpp b/compiler/ann-ref/src/ops/MaxPool2D.cpp
new file mode 100644
index 000000000..405afbbdc
--- /dev/null
+++ b/compiler/ann-ref/src/ops/MaxPool2D.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MaxPool2D.h"
+
+#include "internal/Pooling.h"
+
+bool maxPoolPrepare(const Shape &input, int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom, int32_t stride_width,
+ int32_t stride_height, int32_t filter_width, int32_t filter_height,
+ Shape *output)
+{
+ return genericPoolingPrepare(input, padding_left, padding_right, padding_top, padding_bottom,
+ stride_width, stride_height, filter_width, filter_height,
+ output);
+}
diff --git a/compiler/ann-ref/src/ops/MaxPool2D.float.cpp b/compiler/ann-ref/src/ops/MaxPool2D.float.cpp
new file mode 100644
index 000000000..d49b6aad8
--- /dev/null
+++ b/compiler/ann-ref/src/ops/MaxPool2D.float.cpp
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MaxPool2D.float.h"
+
+#include "internal/Array.h"
+#include "internal/Matrix.h"
+#include "internal/FeatureMap.h"
+#include "internal/Fused.h"
+#include "internal/ActivationUtils.h"
+
+// From optimized_ops.h in TensorFlow Lite
+template <FusedActivationFunctionType Ac>
+void MaxPool(const float *input_data, const Dims<4> &input_dims, int stride_width,
+ int stride_height, int pad_width, int pad_height, int kwidth, int kheight,
+ float *output_data, const Dims<4> &output_dims)
+{
+ const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
+ const int input_height = ArraySize(input_dims, 2);
+ const int input_width = ArraySize(input_dims, 1);
+ const int output_height = ArraySize(output_dims, 2);
+ const int output_width = ArraySize(output_dims, 1);
+ const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
+
+ const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims);
+ auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
+ // Prefill the output to minimum representable float value
+ out_mat.setConstant(std::numeric_limits<float>::lowest());
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int h = 0; h < input_height; ++h)
+ {
+ for (int w = 0; w < input_width; ++w)
+ {
+ // (h_start, h_end) * (w_start, w_end) is the range that the input
+ // vector projects to.
+ int hpad = h + pad_height;
+ int wpad = w + pad_width;
+ int h_start = (hpad < kheight) ? 0 : (hpad - kheight) / stride_height + 1;
+ int h_end = std::min(hpad / stride_height + 1, output_height);
+ int w_start = (wpad < kwidth) ? 0 : (wpad - kwidth) / stride_width + 1;
+ int w_end = std::min(wpad / stride_width + 1, output_width);
+ // compute elementwise sum
+ for (int ph = h_start; ph < h_end; ++ph)
+ {
+ for (int pw = w_start; pw < w_end; ++pw)
+ {
+ int out_offset = NodeOffset(b, ph, pw, output_height, output_width);
+ out_mat.col(out_offset) =
+ out_mat.col(out_offset)
+ .cwiseMax(in_mat.col(NodeOffset(b, h, w, input_height, input_width)));
+ }
+ }
+ }
+ }
+ }
+
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int y = 0; y < output_height; ++y)
+ {
+ for (int x = 0; x < output_width; ++x)
+ {
+ for (int c = 0; c < depth; ++c)
+ {
+ output_data[Offset(output_dims, c, x, y, b)] =
+ ActivationFunction<Ac>(output_data[Offset(output_dims, c, x, y, b)]);
+ }
+ }
+ }
+ }
+}
+
+#define ANDROID_NN_POOLING_PARAMETERS \
+ uint32_t height = getSizeOfDimension(inputShape, 1); \
+ uint32_t width = getSizeOfDimension(inputShape, 2); \
+ uint32_t outHeight = getSizeOfDimension(outputShape, 1); \
+ uint32_t outWidth = getSizeOfDimension(outputShape, 2); \
+ \
+ uint32_t paddingHeight = (uint32_t)padding_top; \
+ uint32_t paddingWidth = (uint32_t)padding_left;
+
+bool maxPoolFloat32(const float *inputData, const Shape &inputShape, int32_t padding_left,
+ int32_t padding_right, int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height, int32_t filter_width,
+ int32_t filter_height, int32_t activation, float *outputData,
+ const Shape &outputShape)
+{
+
+ ANDROID_NN_POOLING_PARAMETERS
+
+#define ANDROID_NN_MAX_POOL(activation) \
+ MaxPool<FusedActivationFunctionType::activation>( \
+ inputData, convertShapeToDims(inputShape), stride_width, stride_height, paddingWidth, \
+ paddingHeight, filter_width, filter_height, outputData, convertShapeToDims(outputShape))
+
+ ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_MAX_POOL)
+#undef ANDROID_NN_MAX_POOL
+
+ return true;
+}
+
+#undef ANDROID_NN_POOLING_PARAMETERS
diff --git a/compiler/ann-ref/src/ops/MaxPool2D.float.h b/compiler/ann-ref/src/ops/MaxPool2D.float.h
new file mode 100644
index 000000000..fd320f3b4
--- /dev/null
+++ b/compiler/ann-ref/src/ops/MaxPool2D.float.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_MAX_POOL_2D_FLOAT_H__
+#define __OP_MAX_POOL_2D_FLOAT_H__
+
+#include "Shape.h"
+
+#include <cstdint>
+
+bool maxPoolFloat32(const float *inputData, const Shape &inputShape, int32_t padding_left,
+ int32_t padding_right, int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height, int32_t filter_width,
+ int32_t filter_height, int32_t activation, float *outputData,
+ const Shape &outputShape);
+
+#endif // __OP_MAX_POOL_2D_FLOAT_H__
diff --git a/compiler/ann-ref/src/ops/MaxPool2D.h b/compiler/ann-ref/src/ops/MaxPool2D.h
new file mode 100644
index 000000000..e15a030bb
--- /dev/null
+++ b/compiler/ann-ref/src/ops/MaxPool2D.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_MAX_POOL_2D_H__
+#define __OP_MAX_POOL_2D_H__
+
+#include "Shape.h"
+
+#include <cstdint>
+
+bool maxPoolPrepare(const Shape &input, int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom, int32_t stride_width,
+ int32_t stride_height, int32_t filter_width, int32_t filter_height,
+ Shape *output);
+
+#endif // __OP_MAX_POOL_2D_H__
diff --git a/compiler/ann-ref/src/ops/Mul.cpp b/compiler/ann-ref/src/ops/Mul.cpp
new file mode 100644
index 000000000..03ea9383a
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Mul.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Mul.h"
+#include "Assert.h"
+
+bool mulPrepare(const Shape &in1, const Shape &in2, Shape *out)
+{
+ ASSERT(getNumberOfDimensions(in1) <= 4 && getNumberOfDimensions(in2) <= 4);
+ ASSERT(in1.type == in2.type);
+ if (SameShape(in1, in2))
+ {
+ return SetShape(in1, out);
+ }
+ else
+ {
+ // Broadcast needed
+ uint32_t numberOfDims1 = getNumberOfDimensions(in1);
+ uint32_t numberOfDims2 = getNumberOfDimensions(in2);
+ uint32_t maxDims = std::max(numberOfDims1, numberOfDims2);
+ out->dimensions = std::vector<uint32_t>(maxDims);
+ for (uint32_t i = 1; i <= maxDims; i++)
+ {
+ uint32_t dim1 = 1;
+ if (i <= numberOfDims1)
+ {
+ dim1 = getSizeOfDimension(in1, numberOfDims1 - i);
+ }
+ uint32_t dim2 = 1;
+ if (i <= numberOfDims2)
+ {
+ dim2 = getSizeOfDimension(in2, numberOfDims2 - i);
+ }
+ if (dim1 != dim2 && dim1 != 1 && dim2 != 1)
+ {
+ LOG(ERROR) << "Dimensions mismatch for BroadcastAdd";
+ return false;
+ }
+ out->dimensions[maxDims - i] = std::max(dim1, dim2);
+ }
+ }
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/Mul.float.cpp b/compiler/ann-ref/src/ops/Mul.float.cpp
new file mode 100644
index 000000000..8a6f039d4
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Mul.float.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Mul.float.h"
+
+#include "internal/Array.h"
+#include "internal/NDArray.h"
+#include "internal/Matrix.h"
+#include "internal/Fused.h"
+#include "internal/ActivationUtils.h"
+
+template <FusedActivationFunctionType Ac>
+void Mul(const float *input1_data, const Dims<4> &input1_dims, const float *input2_data,
+ const Dims<4> &input2_dims, float *output_data, const Dims<4> &output_dims)
+{
+ MatchingArraySize(input1_dims, 3, input2_dims, 3, output_dims, 3);
+ MatchingArraySize(input1_dims, 2, input2_dims, 2, output_dims, 2);
+ MatchingArraySize(input1_dims, 1, input2_dims, 1, output_dims, 1);
+ MatchingArraySize(input1_dims, 0, input2_dims, 0, output_dims, 0);
+ DCHECK(IsPackedWithoutStrides(input1_dims));
+ DCHECK(IsPackedWithoutStrides(input2_dims));
+ DCHECK(IsPackedWithoutStrides(output_dims));
+
+ int i = 0;
+ const int size = input1_dims.sizes[3] * input1_dims.strides[3];
+
+ for (; i < size; i++)
+ {
+ auto x = input1_data[i] * input2_data[i];
+ output_data[i] = ActivationFunction<Ac>(x);
+ }
+}
+
+// From optimized_ops.h in TensorFlow Lite
+//
+// TODO: We can implement BroadcastMul on buffers of arbitrary
+// dimensionality if the runtime code does a single loop over one dimension
+// that handles broadcasting as the base case. The code generator would then
+// generate max(D1, D2) nested for loops.
+// TODO: BroadcastMul is intentionally duplicated from
+// reference_ops.h. Once an optimized version is implemented and NdArrayDesc<T>
+// is no longer referenced in this file, move NdArrayDesc<T> from types.h to
+// reference_ops.h.
+template <FusedActivationFunctionType Ac>
+void BroadcastMul(const float *input1_data, const Dims<4> &input1_dims, const float *input2_data,
+ const Dims<4> &input2_dims, float *output_data, const Dims<4> &output_dims)
+{
+ NdArrayDesc<4> desc1;
+ NdArrayDesc<4> desc2;
+ NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2);
+
+ // In Tensorflow, the dimensions are canonically named (batch_number, row,
+ // col, channel), with extents (batches, height, width, depth), with the
+ // trailing dimension changing most rapidly (channels has the smallest stride,
+ // typically 1 element).
+ //
+ // In generated C code, we store arrays with the dimensions reversed. The
+ // first dimension has smallest stride.
+ //
+ // We name our variables by their Tensorflow convention, but generate C code
+ // nesting loops such that the innermost loop has the smallest stride for the
+ // best cache behavior.
+ for (int b = 0; b < ArraySize(output_dims, 3); ++b)
+ {
+ for (int y = 0; y < ArraySize(output_dims, 2); ++y)
+ {
+ for (int x = 0; x < ArraySize(output_dims, 1); ++x)
+ {
+ for (int c = 0; c < ArraySize(output_dims, 0); ++c)
+ {
+ output_data[Offset(output_dims, c, x, y, b)] =
+ ActivationFunction<Ac>(input1_data[SubscriptToIndex(desc1, c, x, y, b)] *
+ input2_data[SubscriptToIndex(desc2, c, x, y, b)]);
+ }
+ }
+ }
+ }
+}
+
+bool mulFloat32(const float *in1, const Shape &shape1, const float *in2, const Shape &shape2,
+ int32_t activation, float *out, const Shape &shapeOut)
+{
+ bool needBroadcast = !SameShape(shape1, shape2);
+
+#define ANDROID_NN_NORMAL_MUL(activation) \
+ Mul<FusedActivationFunctionType::activation>(in1, convertShapeToDims(shape1), \
+ in2, convertShapeToDims(shape2), \
+ out, convertShapeToDims(shapeOut))
+
+#define ANDROID_NN_BROADCAST_MUL(activation) \
+ BroadcastMul<FusedActivationFunctionType::activation>( \
+ in1, convertShapeToDims(shape1), in2, convertShapeToDims(shape2), out, \
+ convertShapeToDims(shapeOut))
+
+ if (needBroadcast)
+ {
+ ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_BROADCAST_MUL)
+ }
+ else
+ {
+ ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_NORMAL_MUL)
+ }
+
+#undef ANDROID_NN_NORMAL_ADD
+#undef ANDROID_NN_BROADCAST_ADD
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/Mul.float.h b/compiler/ann-ref/src/ops/Mul.float.h
new file mode 100644
index 000000000..bb6b9410b
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Mul.float.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_MUL_FLOAT_H__
+#define __OP_MUL_FLOAT_H__
+
+#include "Shape.h"
+
+#include <cstdint>
+
+bool mulFloat32(const float *in1, const Shape &shape1, const float *in2, const Shape &shape2,
+ int32_t activation, float *out, const Shape &shapeOut);
+
+#endif // __OP_MUL_FLOAT_H__
diff --git a/compiler/ann-ref/src/ops/Mul.h b/compiler/ann-ref/src/ops/Mul.h
new file mode 100644
index 000000000..ed808062b
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Mul.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_MUL_H__
+#define __OP_MUL_H__
+
+#include "Shape.h"
+
+bool mulPrepare(const Shape &in1, const Shape &in2, Shape *out1);
+
+#endif // __OP_MUL_H__
diff --git a/compiler/ann-ref/src/ops/Pad.cpp b/compiler/ann-ref/src/ops/Pad.cpp
new file mode 100644
index 000000000..91741762d
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Pad.cpp
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Pad.h"
+#include "Assert.h"
+#include "Logging.h"
+
+#include "internal/Dims.h"
+
+#include <vector>
+#include <cstring> // For 'memset'
+
+bool padPrepare(const Shape& input, const int32_t* paddingsData, const Shape& paddingsShape,
+ Shape* output)
+{
+ // Currently only 4D tensors are supported.
+ uint32_t numInputDims = getNumberOfDimensions(input);
+ ASSERT(numInputDims == 4);
+
+ // paddings need to be provided as a 2-D int32 tensor.
+ ASSERT(paddingsShape.type == OperandType::TENSOR_INT32);
+ ASSERT(getNumberOfDimensions(paddingsShape) == 2);
+ ASSERT(getSizeOfDimension(paddingsShape, 0) == numInputDims);
+ ASSERT(getSizeOfDimension(paddingsShape, 1) == 2);
+
+ std::vector<uint32_t> outDims(numInputDims);
+ for (uint32_t i = 0; i < numInputDims; ++i)
+ {
+ int32_t beforePadding = *paddingsData++;
+ int32_t afterPadding = *paddingsData++;
+ // Pad value has to be greater than equal to 0.
+ ASSERT(beforePadding >= 0 && afterPadding >= 0);
+ outDims[i] = beforePadding + getSizeOfDimension(input, i) + afterPadding;
+ }
+ output->type = input.type;
+ output->dimensions = outDims;
+ output->offset = input.offset;
+ output->scale = input.scale;
+
+ return true;
+}
+
+namespace
+{
+
+// From optimized_ops.h in TensorFlow Lite
+template <typename T>
+inline void Pad(const T* input_data, const Dims<4>& input_dims,
+ const std::vector<int>& left_paddings,
+ const std::vector<int>& right_paddings, T* output_data,
+ const Dims<4>& output_dims) {
+ const int output_batch = ArraySize(output_dims, 3);
+ const int output_height = ArraySize(output_dims, 2);
+ const int output_width = ArraySize(output_dims, 1);
+ const int output_depth = ArraySize(output_dims, 0);
+
+ const int left_b_padding = left_paddings[3];
+ const int left_h_padding = left_paddings[2];
+ const int left_w_padding = left_paddings[1];
+ const int left_d_padding = left_paddings[0];
+
+ const int right_b_padding = right_paddings[3];
+ const int right_h_padding = right_paddings[2];
+ const int right_w_padding = right_paddings[1];
+ const int right_d_padding = right_paddings[0];
+
+ const int input_depth = ArraySize(input_dims, 0);
+
+ if (left_b_padding != 0)
+ {
+ memset(output_data, 0, left_b_padding * output_height * output_width * output_depth *
+ sizeof(T));
+ }
+ for (int out_b = left_b_padding; out_b < output_batch - right_b_padding; ++out_b)
+ {
+ if (left_h_padding != 0)
+ {
+ memset(output_data + Offset(output_dims, 0, 0, 0, out_b), 0,
+ left_h_padding * output_width * output_depth * sizeof(T));
+ }
+ for (int out_h = left_h_padding; out_h < output_height - right_h_padding; ++out_h)
+ {
+ if (left_w_padding != 0)
+ {
+ memset(output_data + Offset(output_dims, 0, 0, out_h, out_b), 0,
+ left_w_padding * output_depth * sizeof(T));
+ }
+ for (int out_w = left_w_padding; out_w < output_width - right_w_padding; ++out_w)
+ {
+ if (left_d_padding != 0)
+ {
+ memset(output_data + Offset(output_dims, 0, out_w, out_h, out_b), 0,
+ left_d_padding * sizeof(T));
+ }
+
+ T* out = output_data +
+ Offset(output_dims, left_d_padding, out_w, out_h, out_b);
+ const T* in =
+ input_data + Offset(input_dims, 0, out_w - left_w_padding,
+ out_h - left_h_padding, out_b - left_b_padding);
+ memcpy(out, in, input_depth * sizeof(T));
+
+ if (right_d_padding != 0)
+ {
+ memset(
+ output_data + Offset(output_dims, output_depth - right_d_padding,
+ out_w, out_h, out_b),
+ 0, right_d_padding * sizeof(T));
+ }
+ }
+ if (right_w_padding != 0)
+ {
+ memset(
+ output_data + Offset(output_dims, 0, output_width - right_w_padding,
+ out_h, out_b),
+ 0, right_w_padding * output_depth * sizeof(T));
+ }
+ }
+ if (right_h_padding != 0)
+ {
+ memset(output_data + Offset(output_dims, 0, 0,
+ output_height - right_h_padding, out_b),
+ 0, right_h_padding * output_width * output_depth * sizeof(T));
+ }
+ }
+ if (right_b_padding != 0)
+ {
+ memset(output_data +
+ Offset(output_dims, 0, 0, 0, output_batch - right_b_padding),
+ 0,
+ right_b_padding * output_height * output_width * output_depth *
+ sizeof(T));
+ }
+}
+
+} // namespace
+
+bool padGeneric(const uint8_t* inputData, const Shape& inputShape, const int32_t* paddings,
+ uint8_t* outputData, const Shape& outputShape)
+{
+ int32_t numInputDims = static_cast<int32_t>(getNumberOfDimensions(inputShape));
+
+ std::vector<int> beforePadding;
+ std::vector<int> afterPadding;
+ // The lower level implementation expects the paddings in the reverse order.
+ for (int32_t i = numInputDims - 1; i >= 0; --i)
+ {
+ beforePadding.push_back(paddings[i * 2]);
+ afterPadding.push_back(paddings[i * 2 + 1]);
+ }
+
+ if (inputShape.type == OperandType::TENSOR_FLOAT32)
+ {
+ ::Pad(reinterpret_cast<const float*>(inputData),
+ convertShapeToDims(inputShape),
+ beforePadding, afterPadding,
+ reinterpret_cast<float*>(outputData),
+ convertShapeToDims(outputShape));
+ }
+ else if (inputShape.type == OperandType::TENSOR_QUANT8_ASYMM)
+ {
+ ::Pad(reinterpret_cast<const uint8_t*>(inputData),
+ convertShapeToDims(inputShape),
+ beforePadding, afterPadding,
+ reinterpret_cast<uint8_t*>(outputData),
+ convertShapeToDims(outputShape));
+ }
+ else
+ {
+ LOG(ERROR) << "Unsupported data type";
+ return false;
+ }
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/Pad.h b/compiler/ann-ref/src/ops/Pad.h
new file mode 100644
index 000000000..542ab8962
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Pad.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_PAD_H__
+#define __OP_PAD_H__
+
+#include "Shape.h"
+
+#include <cstdint>
+
+bool padPrepare(const Shape& input, const int32_t* paddingsData, const Shape& paddingsShape,
+ Shape* output);
+
+bool padGeneric(const uint8_t* inputData, const Shape& inputShape, const int32_t* paddings,
+ uint8_t* outputData, const Shape& outputShape);
+
+#endif // __OP_PAD_H__
diff --git a/compiler/ann-ref/src/ops/ReLU.cpp b/compiler/ann-ref/src/ops/ReLU.cpp
new file mode 100644
index 000000000..334291ae5
--- /dev/null
+++ b/compiler/ann-ref/src/ops/ReLU.cpp
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReLU.h"
+
+#include "internal/Elementwise.h"
+
+bool reluPrepare(const Shape &input, Shape *output)
+{
+ return genericActivationPrepare(input, output);
+}
diff --git a/compiler/ann-ref/src/ops/ReLU.float.cpp b/compiler/ann-ref/src/ops/ReLU.float.cpp
new file mode 100644
index 000000000..df170e48e
--- /dev/null
+++ b/compiler/ann-ref/src/ops/ReLU.float.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReLU.float.h"
+
+#include <algorithm>
+
+bool reluFloat32(const float *inputData, const Shape &inputShape, float *outputData,
+ const Shape &outputShape)
+{
+ int numElements = getNumberOfElements(inputShape);
+ for (int i = 0; i < numElements; i++, inputData++, outputData++)
+ {
+ *outputData = std::max(0.f, *inputData);
+ }
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/ReLU.float.h b/compiler/ann-ref/src/ops/ReLU.float.h
new file mode 100644
index 000000000..4c6cf3833
--- /dev/null
+++ b/compiler/ann-ref/src/ops/ReLU.float.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_RELU_FLOAT_H__
+#define __OP_RELU_FLOAT_H__
+
+#include "Shape.h"
+
+bool reluFloat32(const float *inputData, const Shape &inputShape, float *outputData,
+ const Shape &outputShape);
+
+#endif // __OP_RELU_FLOAT_H__
diff --git a/compiler/ann-ref/src/ops/ReLU.h b/compiler/ann-ref/src/ops/ReLU.h
new file mode 100644
index 000000000..4b329fb8d
--- /dev/null
+++ b/compiler/ann-ref/src/ops/ReLU.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_RELU_H__
+#define __OP_RELU_H__
+
+#include "Shape.h"
+
+bool reluPrepare(const Shape &input, Shape *output);
+
+#endif // __OP_RELU_H__
diff --git a/compiler/ann-ref/src/ops/ReLU6.cpp b/compiler/ann-ref/src/ops/ReLU6.cpp
new file mode 100644
index 000000000..acaa58bda
--- /dev/null
+++ b/compiler/ann-ref/src/ops/ReLU6.cpp
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReLU6.h"
+
+#include "internal/Elementwise.h"
+
+bool relu6Prepare(const Shape &input, Shape *output)
+{
+ return genericActivationPrepare(input, output);
+}
diff --git a/compiler/ann-ref/src/ops/ReLU6.float.cpp b/compiler/ann-ref/src/ops/ReLU6.float.cpp
new file mode 100644
index 000000000..b8aa790b5
--- /dev/null
+++ b/compiler/ann-ref/src/ops/ReLU6.float.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReLU6.float.h"
+
+#include <algorithm>
+
+bool relu6Float32(const float *inputData, const Shape &inputShape, float *outputData,
+ const Shape &outputShape)
+{
+ int numElements = getNumberOfElements(inputShape);
+ for (int i = 0; i < numElements; i++, inputData++, outputData++)
+ {
+ *outputData = std::min(std::max(0.f, *inputData),6.f);
+ }
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/ReLU6.float.h b/compiler/ann-ref/src/ops/ReLU6.float.h
new file mode 100644
index 000000000..06c421a0b
--- /dev/null
+++ b/compiler/ann-ref/src/ops/ReLU6.float.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_RELU6_FLOAT_H__
+#define __OP_RELU6_FLOAT_H__
+
+#include "Shape.h"
+
+bool relu6Float32(const float *inputData, const Shape &inputShape, float *outputData,
+ const Shape &outputShape);
+
+#endif // __OP_RELU6_FLOAT_H__
diff --git a/compiler/ann-ref/src/ops/ReLU6.h b/compiler/ann-ref/src/ops/ReLU6.h
new file mode 100644
index 000000000..625db4b6e
--- /dev/null
+++ b/compiler/ann-ref/src/ops/ReLU6.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_RELU6_H__
+#define __OP_RELU6_H__
+
+#include "Shape.h"
+
+bool relu6Prepare(const Shape &input, Shape *output);
+
+#endif // __OP_RELU6_H__
diff --git a/compiler/ann-ref/src/ops/Reshape.cpp b/compiler/ann-ref/src/ops/Reshape.cpp
new file mode 100644
index 000000000..a88e81ae4
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Reshape.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Reshape.h"
+#include "Operand.h"
+#include "Assert.h"
+
+#include <cstring>
+
+bool reshapePrepare(const Shape &input, const int32_t *targetDims, const int32_t targetDimsSize,
+ Shape *output)
+{
+ // Reshape allows one of the targetDims components to have the
+ // special -1 value, meaning it will be calculated automatically based on the
+ // input. Here we calculate what that dimension should be so that the number
+ // of output elements in the same as the number of input elements.
+ int32_t numInputElements = (int32_t)getNumberOfElements(input);
+
+ std::vector<uint32_t> outDims(targetDimsSize);
+ int32_t numOutputElements = 1;
+ int32_t strechDim = -1;
+ for (int32_t i = 0; i < targetDimsSize; ++i)
+ {
+ int32_t value = targetDims[i];
+ if (value == -1)
+ {
+ ASSERT(strechDim == -1);
+ strechDim = i;
+ }
+ else
+ {
+ numOutputElements *= value;
+ outDims[i] = (uint32_t)value;
+ }
+ }
+ if (strechDim != -1)
+ {
+ int32_t strechValue = numInputElements / numOutputElements;
+ outDims[strechDim] = (uint32_t)strechValue;
+ numOutputElements *= strechValue;
+ }
+
+ ASSERT(numInputElements == numOutputElements);
+
+ output->type = input.type;
+ output->dimensions = outDims;
+ output->offset = input.offset;
+ output->scale = input.scale;
+
+ return true;
+}
+
+bool reshapeGeneric(const void *inputData, const Shape &inputShape, void *outputData,
+ const Shape &outputShape)
+{
+ size_t count = sizeOfData(inputShape.type, inputShape.dimensions);
+ memcpy(outputData, inputData, count);
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/Reshape.h b/compiler/ann-ref/src/ops/Reshape.h
new file mode 100644
index 000000000..47609ff3c
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Reshape.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_RESHAPE_H__
+#define __OP_RESHAPE_H__
+
+#include "Shape.h"
+
+#include <cstdint>
+
+bool reshapePrepare(const Shape &input, const int32_t *targetDims, const int32_t targetDimsSize,
+ Shape *output);
+
+bool reshapeGeneric(const void *inputData, const Shape &inputShape, void *outputData,
+ const Shape &outputShape);
+
+#endif // __OP_RESHAPE_H__
diff --git a/compiler/ann-ref/src/ops/Softmax.cpp b/compiler/ann-ref/src/ops/Softmax.cpp
new file mode 100644
index 000000000..9e9044636
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Softmax.cpp
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Softmax.h"
+
+#include "internal/Elementwise.h"
+
+bool softmaxPrepare(const Shape &input, Shape *output)
+{
+ return genericActivationPrepare(input, output);
+}
diff --git a/compiler/ann-ref/src/ops/Softmax.float.cpp b/compiler/ann-ref/src/ops/Softmax.float.cpp
new file mode 100644
index 000000000..31c29c0c6
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Softmax.float.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Softmax.float.h"
+#include "Logging.h"
+
+#include "internal/Array.h"
+#include "internal/Matrix.h"
+
+// From optimized_ops.h in TensorFlow Lite
+inline void Softmax(const float *input_data, const Dims<4> &input_dims, float beta,
+ float *output_data, const Dims<4> &output_dims)
+{
+ MatchingArraySize(input_dims, 3, output_dims, 3);
+ MatchingArraySize(input_dims, 2, output_dims, 2);
+ MatchingArraySize(input_dims, 1, output_dims, 1);
+ MatchingArraySize(input_dims, 0, output_dims, 0);
+
+ const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims);
+ auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
+ // Compute the exponential first, removing the max coefficient for numerical
+ // stability.
+ out_mat = (in_mat.rowwise() - in_mat.colwise().maxCoeff()).array() * beta;
+ // We are separating out the exp function so that exp can be vectorized.
+ out_mat = out_mat.array().exp();
+ // Normalize to get the activations.
+ Eigen::Array<float, 1, Eigen::Dynamic> scale = out_mat.array().colwise().sum().inverse();
+ out_mat.array().rowwise() *= scale;
+}
+
+bool softmaxFloat32(const float *inputData, const Shape &inputShape, const float beta,
+ float *outputData, const Shape &outputShape)
+{
+ Dims<4> dim;
+ if (getNumberOfDimensions(inputShape) == 2)
+ {
+ uint32_t batch_size = getSizeOfDimension(inputShape, 0);
+ uint32_t input_size = getNumberOfElements(inputShape) / batch_size;
+
+ Shape shapeIn4D;
+ shapeIn4D.dimensions = {batch_size, 1, 1, input_size};
+ dim = convertShapeToDims(shapeIn4D);
+ }
+ else if (getNumberOfDimensions(inputShape) == 4)
+ {
+ dim = convertShapeToDims(inputShape);
+ }
+ else
+ {
+ LOG(ERROR) << "only 2D and 4D tensors supported";
+ return false;
+ }
+
+ Softmax(inputData, dim, beta, outputData, dim);
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/Softmax.float.h b/compiler/ann-ref/src/ops/Softmax.float.h
new file mode 100644
index 000000000..227b65807
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Softmax.float.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_SOFTMAX_FLOAT_H__
+#define __OP_SOFTMAX_FLOAT_H__
+
+#include "Shape.h"
+
+#include <cstdint>
+
+bool softmaxFloat32(const float *inputData, const Shape &inputShape, const float beta,
+ float *outputData, const Shape &outputShape);
+
+#endif // __OP_SOFTMAX_FLOAT_H__
diff --git a/compiler/ann-ref/src/ops/Softmax.h b/compiler/ann-ref/src/ops/Softmax.h
new file mode 100644
index 000000000..a1e2e9c1b
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Softmax.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_SOFTMAX_H__
+#define __OP_SOFTMAX_H__
+
+#include "Shape.h"
+
+#include <cstdint>
+
+bool softmaxPrepare(const Shape &input, Shape *output);
+
+#endif // __OP_SOFTMAX_H__
diff --git a/compiler/ann-ref/src/ops/Sub.cpp b/compiler/ann-ref/src/ops/Sub.cpp
new file mode 100644
index 000000000..accda9127
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Sub.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Sub.h"
+#include "Assert.h"
+
+bool subPrepare(const Shape &in1, const Shape &in2, Shape *out)
+{
+ ASSERT(getNumberOfDimensions(in1) <= 4 && getNumberOfDimensions(in2) <= 4);
+ ASSERT(in1.type == in2.type);
+ if (SameShape(in1, in2))
+ {
+ return SetShape(in1, out);
+ }
+ else
+ {
+ // BroadcastSub needed
+ uint32_t numberOfDims1 = getNumberOfDimensions(in1);
+ uint32_t numberOfDims2 = getNumberOfDimensions(in2);
+ uint32_t maxDims = std::max(numberOfDims1, numberOfDims2);
+ out->dimensions = std::vector<uint32_t>(maxDims);
+ for (uint32_t i = 1; i <= maxDims; i++)
+ {
+ uint32_t dim1 = 1;
+ if (i <= numberOfDims1)
+ {
+ dim1 = getSizeOfDimension(in1, numberOfDims1 - i);
+ }
+ uint32_t dim2 = 1;
+ if (i <= numberOfDims2)
+ {
+ dim2 = getSizeOfDimension(in2, numberOfDims2 - i);
+ }
+ if (dim1 != dim2 && dim1 != 1 && dim2 != 1)
+ {
+ LOG(ERROR) << "Dimensions mismatch for BroadcastSub";
+ return false;
+ }
+ out->dimensions[maxDims - i] = std::max(dim1, dim2);
+ }
+ }
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/Sub.float.cpp b/compiler/ann-ref/src/ops/Sub.float.cpp
new file mode 100644
index 000000000..deb5d9855
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Sub.float.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Sub.float.h"
+
+#include "internal/Array.h"
+#include "internal/NDArray.h"
+#include "internal/Matrix.h"
+#include "internal/Fused.h"
+#include "internal/ActivationUtils.h"
+
+template <FusedActivationFunctionType Ac>
+void Sub(const float *input1_data, const Dims<4> &input1_dims, const float *input2_data,
+ const Dims<4> &input2_dims, float *output_data, const Dims<4> &output_dims)
+{
+ MatchingArraySize(input1_dims, 3, input2_dims, 3, output_dims, 3);
+ MatchingArraySize(input1_dims, 2, input2_dims, 2, output_dims, 2);
+ MatchingArraySize(input1_dims, 1, input2_dims, 1, output_dims, 1);
+ MatchingArraySize(input1_dims, 0, input2_dims, 0, output_dims, 0);
+ DCHECK(IsPackedWithoutStrides(input1_dims));
+ DCHECK(IsPackedWithoutStrides(input2_dims));
+ DCHECK(IsPackedWithoutStrides(output_dims));
+
+ int i = 0;
+ const int size = input1_dims.sizes[3] * input1_dims.strides[3];
+
+ for (; i < size; i++)
+ {
+ auto x = input1_data[i] - input2_data[i];
+ output_data[i] = ActivationFunction<Ac>(x);
+ }
+}
+
+// From optimized_ops.h in TensorFlow Lite
+//
+// TODO: We can implement BroadcastSub on buffers of arbitrary
+// dimensionality if the runtime code does a single loop over one dimension
+// that handles broadcasting as the base case. The code generator would then
+// generate max(D1, D2) nested for loops.
+// TODO: BroadcastSub is intentionally duplicated from
+// reference_ops.h. Once an optimized version is implemented and NdArrayDesc<T>
+// is no longer referenced in this file, move NdArrayDesc<T> from types.h to
+// reference_ops.h.
+template <FusedActivationFunctionType Ac>
+void BroadcastSub(const float *input1_data, const Dims<4> &input1_dims, const float *input2_data,
+ const Dims<4> &input2_dims, float *output_data, const Dims<4> &output_dims)
+{
+ NdArrayDesc<4> desc1;
+ NdArrayDesc<4> desc2;
+ NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2);
+
+ // In Tensorflow, the dimensions are canonically named (batch_number, row,
+ // col, channel), with extents (batches, height, width, depth), with the
+ // trailing dimension changing most rapidly (channels has the smallest stride,
+ // typically 1 element).
+ //
+ // In generated C code, we store arrays with the dimensions reversed. The
+ // first dimension has smallest stride.
+ //
+ // We name our variables by their Tensorflow convention, but generate C code
+ // nesting loops such that the innermost loop has the smallest stride for the
+ // best cache behavior.
+ for (int b = 0; b < ArraySize(output_dims, 3); ++b)
+ {
+ for (int y = 0; y < ArraySize(output_dims, 2); ++y)
+ {
+ for (int x = 0; x < ArraySize(output_dims, 1); ++x)
+ {
+ for (int c = 0; c < ArraySize(output_dims, 0); ++c)
+ {
+ output_data[Offset(output_dims, c, x, y, b)] =
+ ActivationFunction<Ac>(input1_data[SubscriptToIndex(desc1, c, x, y, b)] -
+ input2_data[SubscriptToIndex(desc2, c, x, y, b)]);
+ }
+ }
+ }
+ }
+}
+
+bool subFloat32(const float *in1, const Shape &shape1, const float *in2, const Shape &shape2,
+ int32_t activation, float *out, const Shape &shapeOut)
+{
+ bool needBroadcast = !SameShape(shape1, shape2);
+
+#define ANDROID_NN_NORMAL_SUB(activation) \
+ Sub<FusedActivationFunctionType::activation>(in1, convertShapeToDims(shape1), \
+ in2, convertShapeToDims(shape2), \
+ out, convertShapeToDims(shapeOut))
+
+#define ANDROID_NN_BROADCAST_SUB(activation) \
+ BroadcastSub<FusedActivationFunctionType::activation>( \
+ in1, convertShapeToDims(shape1), in2, convertShapeToDims(shape2), out, \
+ convertShapeToDims(shapeOut))
+
+ if (needBroadcast)
+ {
+ ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_BROADCAST_SUB)
+ }
+ else
+ {
+ ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_NORMAL_SUB)
+ }
+
+#undef ANDROID_NN_NORMAL_SUB
+#undef ANDROID_NN_BROADCAST_SUB
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/Sub.float.h b/compiler/ann-ref/src/ops/Sub.float.h
new file mode 100644
index 000000000..d494f7576
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Sub.float.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_SUB_FLOAT_H__
+#define __OP_SUB_FLOAT_H__
+
+#include "Shape.h"
+
+#include <cstdint>
+
+bool subFloat32(const float *in1, const Shape &shape1, const float *in2, const Shape &shape2,
+ int32_t activation, float *out, const Shape &shapeOut);
+
+#endif // __OP_SUB_FLOAT_H__
diff --git a/compiler/ann-ref/src/ops/Sub.h b/compiler/ann-ref/src/ops/Sub.h
new file mode 100644
index 000000000..d3626205b
--- /dev/null
+++ b/compiler/ann-ref/src/ops/Sub.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_SUB_H__
+#define __OP_SUB_H__
+
+#include "Shape.h"
+
+bool subPrepare(const Shape &in1, const Shape &in2, Shape *out1);
+
+#endif // __OP_SUB_H__
diff --git a/compiler/ann-ref/src/ops/internal/ActivationUtils.h b/compiler/ann-ref/src/ops/internal/ActivationUtils.h
new file mode 100644
index 000000000..9d413c6a4
--- /dev/null
+++ b/compiler/ann-ref/src/ops/internal/ActivationUtils.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ACTIVATION_UTILS_H__
+#define __ACTIVATION_UTILS_H__
+
+#include "Logging.h"
+
+#define ANDROID_NN_MACRO_DISPATCH_INTERNAL(macro) \
+ case (int32_t)FusedActivationFunc::NONE: \
+ macro(kNone); \
+ break; \
+ case (int32_t)FusedActivationFunc::RELU: \
+ macro(kRelu); \
+ break; \
+ case (int32_t)FusedActivationFunc::RELU1: \
+ macro(kRelu1); \
+ break; \
+ case (int32_t)FusedActivationFunc::RELU6: \
+ macro(kRelu6); \
+ break;
+
+#define ANDROID_NN_MACRO_DISPATCH(macro) \
+ switch (activation) \
+ { \
+ ANDROID_NN_MACRO_DISPATCH_INTERNAL(macro) \
+ default: \
+ LOG(ERROR) << "Unsupported fused activation function type"; \
+ return false; \
+ }
+
+#define ANDROID_NN_MACRO_DISPATCH_WITH_DELETE(macro) \
+ switch (activation) \
+ { \
+ ANDROID_NN_MACRO_DISPATCH_INTERNAL(macro) \
+ default: \
+ LOG(ERROR) << "Unsupported fused activation function type"; \
+ if (im2colByteSize > kStaticBufferSize) \
+ { \
+ delete[] im2colData; \
+ } \
+ return false; \
+ }
+
+#endif // __ACTIVATION_UTILS_H__
diff --git a/compiler/ann-ref/src/ops/internal/Array.h b/compiler/ann-ref/src/ops/internal/Array.h
new file mode 100644
index 000000000..49a3e771b
--- /dev/null
+++ b/compiler/ann-ref/src/ops/internal/Array.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ARRAY_H__
+#define __ARRAY_H__
+
+#include "Shape.h"
+#include "Dims.h"
+
+#include "Macro.h"
+
+// From types.h in TensorFlow Lite
+//
+// Get common array size, DCHECKing that they all agree.
+template <typename ArrayType1, typename ArrayType2>
+int MatchingArraySize(const ArrayType1 &array1, int index1, const ArrayType2 &array2, int index2)
+{
+ DCHECK_EQ(ArraySize(array1, index1), ArraySize(array2, index2));
+ return ArraySize(array1, index1);
+}
+
+// From types.h in TensorFlow Lite
+template <typename ArrayType1, typename ArrayType2, typename... Args>
+int MatchingArraySize(const ArrayType1 &array1, int index1, const ArrayType2 &array2, int index2,
+ Args... args)
+{
+ DCHECK_EQ(ArraySize(array1, index1), ArraySize(array2, index2));
+ return MatchingArraySize(array1, index1, args...);
+}
+
+#endif // __ARRAY_H__
diff --git a/compiler/ann-ref/src/ops/internal/Dims.h b/compiler/ann-ref/src/ops/internal/Dims.h
new file mode 100644
index 000000000..2b3aaa65a
--- /dev/null
+++ b/compiler/ann-ref/src/ops/internal/Dims.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DIMS_H__
+#define __DIMS_H__
+
+#include "Shape.h"
+#include "Macro.h"
+
+template <int N> struct Dims
+{
+ int sizes[N];
+ int strides[N];
+};
+
+inline Dims<4> convertShapeToDims(const Shape &shape)
+{
+ Dims<4> dims;
+ for (int i = 0; i < 4; i++)
+ {
+ dims.sizes[i] = 1;
+ }
+
+ if (shape.dimensions.size() == 1)
+ {
+ dims.sizes[0] = (int)getSizeOfDimension(shape, 0);
+ }
+ else
+ {
+ for (int i = 0; i < 4; i++)
+ {
+ int src = (int)shape.dimensions.size() - i - 1;
+ if (src >= 0)
+ {
+ dims.sizes[i] = (int)getSizeOfDimension(shape, src);
+ }
+ }
+ }
+
+ dims.strides[0] = 1;
+ for (int i = 1; i < 4; i++)
+ {
+ dims.strides[i] = dims.strides[i - 1] * dims.sizes[i - 1];
+ }
+ return dims;
+}
+
+// From types.h in TensorFlow Lite
+inline int Offset(const Dims<4> &dims, int i0, int i1, int i2, int i3)
+{
+ DCHECK(i0 >= 0 && i0 < dims.sizes[0]);
+ DCHECK(i1 >= 0 && i1 < dims.sizes[1]);
+ DCHECK(i2 >= 0 && i2 < dims.sizes[2]);
+ DCHECK(i3 >= 0 && i3 < dims.sizes[3]);
+ return i0 * dims.strides[0] + i1 * dims.strides[1] + i2 * dims.strides[2] + i3 * dims.strides[3];
+}
+
+// From types.h in TensorFlow Lite
+//
+// Get array size, DCHECKing that the dim index is in range.
+template <int N> int ArraySize(const Dims<N> &array, int index)
+{
+ DCHECK(index >= 0 && index < N);
+ return array.sizes[index];
+}
+
+// From types.h in TensorFlow Lite
+template <int N> inline int FlatSize(const Dims<N> &dims)
+{
+ int flat_size = 1;
+ for (int i = 0; i < N; ++i)
+ {
+ flat_size *= dims.sizes[i];
+ }
+ return flat_size;
+}
+
+// From types.h in TensorFlow Lite
+inline int RequiredBufferSizeForDims(const Dims<4> &dims)
+{
+ int max_offset = 0;
+ for (int i = 0; i < 4; i++)
+ {
+ max_offset += (dims.sizes[i] - 1) * dims.strides[i];
+ }
+ return max_offset + 1;
+}
+
+// From types.h in TensorFlow Lite
+//
+// Flat size calculation, checking that dimensions match with one or more other
+// arrays.
+template <int N> inline int MatchingFlatSize(const Dims<N> &dims, const Dims<N> &check_dims_0)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i));
+ }
+ return FlatSize(dims);
+}
+
+// From types.h in TensorFlow Lite
+template <int N>
+inline int MatchingFlatSize(const Dims<N> &dims, const Dims<N> &check_dims_0,
+ const Dims<N> &check_dims_1)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i));
+ }
+ return MatchingFlatSize(dims, check_dims_1);
+}
+
+// From types.h in TensorFlow Lite
+template <int N>
+inline int MatchingFlatSize(const Dims<N> &dims, const Dims<N> &check_dims_0,
+ const Dims<N> &check_dims_1, const Dims<N> &check_dims_2)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i));
+ }
+ return FlatSize(dims, check_dims_1, check_dims_2);
+}
+
+// From types.h in TensorFlow Lite
+template <int N>
+inline int MatchingFlatSize(const Dims<N> &dims, const Dims<N> &check_dims_0,
+ const Dims<N> &check_dims_1, const Dims<N> &check_dims_2,
+ const Dims<N> &check_dims_3)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i));
+ }
+ return FlatSize(dims, check_dims_1, check_dims_2, check_dims_3);
+}
+
+// From types.h in TensorFlow Lite
+template <int N> bool IsPackedWithoutStrides(const Dims<N> &dims)
+{
+ int expected_stride = 1;
+ for (int d = 0; d < N; d++)
+ {
+ if (dims.strides[d] != expected_stride)
+ return false;
+ expected_stride *= dims.sizes[d];
+ }
+ return true;
+}
+
+#endif // __DIMS_H__
diff --git a/compiler/ann-ref/src/ops/internal/Elementwise.cpp b/compiler/ann-ref/src/ops/internal/Elementwise.cpp
new file mode 100644
index 000000000..5615e309d
--- /dev/null
+++ b/compiler/ann-ref/src/ops/internal/Elementwise.cpp
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Elementwise.h"
+#include "Assert.h"
+
+bool genericActivationPrepare(const Shape &input, Shape *output)
+{
+ ASSERT(getNumberOfDimensions(input) <= 4);
+ return SetShape(input, output);
+}
diff --git a/compiler/ann-ref/src/ops/internal/Elementwise.h b/compiler/ann-ref/src/ops/internal/Elementwise.h
new file mode 100644
index 000000000..732f9b8a2
--- /dev/null
+++ b/compiler/ann-ref/src/ops/internal/Elementwise.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ELEMENTWISE_H__
+#define __ELEMENTWISE_H__
+
+#include "Shape.h"
+
+bool genericActivationPrepare(const Shape &input, Shape *output);
+
+#endif // __ELEMENTWISE_H__
diff --git a/compiler/ann-ref/src/ops/internal/FeatureMap.h b/compiler/ann-ref/src/ops/internal/FeatureMap.h
new file mode 100644
index 000000000..e4d323f62
--- /dev/null
+++ b/compiler/ann-ref/src/ops/internal/FeatureMap.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FEATURE_MAP_H__
+#define __FEATURE_MAP_H__
+
+inline int NodeOffset(int b, int h, int w, int height, int width)
+{
+ return (b * height + h) * width + w;
+}
+
+#endif // __FEATURE_MAP_H__
diff --git a/compiler/ann-ref/src/ops/internal/Fused.cpp b/compiler/ann-ref/src/ops/internal/Fused.cpp
new file mode 100644
index 000000000..c50b9dea0
--- /dev/null
+++ b/compiler/ann-ref/src/ops/internal/Fused.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Fused.h"
+#include "NeuralNetworks.h"
+
+static_assert(static_cast<int32_t>(FusedActivationFunc::NONE) == ANEURALNETWORKS_FUSED_NONE,
+ "FusedActivationFunc::NONE != ANEURALNETWORKS_FUSED_NONE");
+static_assert(static_cast<int32_t>(FusedActivationFunc::RELU) == ANEURALNETWORKS_FUSED_RELU,
+ "FusedActivationFunc::RELU != ANEURALNETWORKS_FUSED_RELU");
+static_assert(static_cast<int32_t>(FusedActivationFunc::RELU1) == ANEURALNETWORKS_FUSED_RELU1,
+ "FusedActivationFunc::RELU1 != ANEURALNETWORKS_FUSED_RELU1");
+static_assert(static_cast<int32_t>(FusedActivationFunc::RELU6) == ANEURALNETWORKS_FUSED_RELU6,
+ "FusedActivationFunc::RELU6 != ANEURALNETWORKS_FUSED_RELU6");
diff --git a/compiler/ann-ref/src/ops/internal/Fused.h b/compiler/ann-ref/src/ops/internal/Fused.h
new file mode 100644
index 000000000..fccd72cc3
--- /dev/null
+++ b/compiler/ann-ref/src/ops/internal/Fused.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FUSED_H__
+#define __FUSED_H__
+
+#include "Dims.h"
+
+#include <cstdint>
+
+enum class FusedActivationFunc : int32_t {
+ NONE = 0,
+ RELU = 1,
+ RELU1 = 2,
+ RELU6 = 3,
+};
+
+enum class FusedActivationFunctionType
+{
+ kNone,
+ kRelu6,
+ kRelu1,
+ kRelu
+};
+
+template <FusedActivationFunctionType Ac> struct ActivationFunctionImpl;
+
+template <> struct ActivationFunctionImpl<FusedActivationFunctionType::kNone>
+{
+ static float Eval(float x) { return x; }
+};
+
+template <> struct ActivationFunctionImpl<FusedActivationFunctionType::kRelu>
+{
+ static float Eval(float x) { return x < 0.f ? 0.f : x; }
+};
+
+template <> struct ActivationFunctionImpl<FusedActivationFunctionType::kRelu1>
+{
+ static float Eval(float x) { return x > 1.f ? 1.f : x < -1.f ? -1.f : x; }
+};
+
+template <> struct ActivationFunctionImpl<FusedActivationFunctionType::kRelu6>
+{
+ static float Eval(float x) { return x > 6.f ? 6.f : x < 0.f ? 0.f : x; }
+};
+
+template <FusedActivationFunctionType Ac> float ActivationFunction(float x)
+{
+ return ActivationFunctionImpl<Ac>::Eval(x);
+}
+
+template <FusedActivationFunctionType Ac>
+void AddBiasAndEvalActivationFunction(const float *bias_data, const Dims<4> &bias_dims,
+ float *array_data, const Dims<4> &array_dims)
+{
+ const int bias_size = bias_dims.sizes[3] * bias_dims.strides[3];
+ const int array_size = array_dims.sizes[3] * array_dims.strides[3];
+ DCHECK_EQ((array_size % bias_size), 0);
+ for (int array_offset = 0; array_offset < array_size; array_offset += bias_size)
+ {
+ for (int i = 0; i < bias_size; i++)
+ {
+ array_data[array_offset + i] =
+ ActivationFunction<Ac>(array_data[array_offset + i] + bias_data[i]);
+ }
+ }
+}
+
+#endif // __FUSED_H__
diff --git a/compiler/ann-ref/src/ops/internal/GEMM.h b/compiler/ann-ref/src/ops/internal/GEMM.h
new file mode 100644
index 000000000..e94b35855
--- /dev/null
+++ b/compiler/ann-ref/src/ops/internal/GEMM.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __GEMM_H__
+#define __GEMM_H__
+
+#include "Eigen/Core"
+
+template <typename Lhs, typename Rhs, typename Result>
+void Gemm(const Eigen::MatrixBase<Lhs> &lhs, const Eigen::MatrixBase<Rhs> &rhs,
+ Eigen::MatrixBase<Result> *result)
+{
+ if (rhs.cols() == 1)
+ {
+ result->col(0).noalias() = lhs * rhs.col(0);
+ }
+ else
+ {
+ result->noalias() = lhs * rhs;
+ }
+}
+
+
+#endif // __GEMM_H__
diff --git a/compiler/ann-ref/src/ops/internal/Macro.h b/compiler/ann-ref/src/ops/internal/Macro.h
new file mode 100644
index 000000000..b80a748bb
--- /dev/null
+++ b/compiler/ann-ref/src/ops/internal/Macro.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COMPATIBILITY_H__
+#define __COMPATIBILITY_H__
+
+#include <cassert>
+#include <cstdint>
+
+#ifndef DCHECK
+#define DCHECK(condition) assert((condition))
+#endif
+
+#ifndef DCHECK_EQ
+#define DCHECK_EQ(x, y) assert((x) == (y))
+#endif
+
+#ifndef DCHECK_GE
+#define DCHECK_GE(x, y) assert((x) >= (y))
+#endif
+
+#ifndef DCHECK_GT
+#define DCHECK_GT(x, y) assert((x) > (y))
+#endif
+
+#ifndef DCHECK_LE
+#define DCHECK_LE(x, y) assert((x) <= (y))
+#endif
+
+#ifndef DCHECK_LT
+#define DCHECK_LT(x, y) assert((x) < (y))
+#endif
+
+#ifndef CHECK_EQ
+#define CHECK_EQ(x, y) assert((x) == (y))
+#endif
+
+using uint8 = std::uint8_t;
+using int16 = std::int16_t;
+using uint16 = std::uint16_t;
+using int32 = std::int32_t;
+using uint32 = std::uint32_t;
+
+#endif // __COMPATIBILITY_H__
diff --git a/compiler/ann-ref/src/ops/internal/Matrix.h b/compiler/ann-ref/src/ops/internal/Matrix.h
new file mode 100644
index 000000000..71b1fc5d7
--- /dev/null
+++ b/compiler/ann-ref/src/ops/internal/Matrix.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MATRIX_H__
+#define __MATRIX_H__
+
+#include "Dims.h"
+#include "Eigen/Core"
+
+// From optimized_ops.h in TensorFlow Lite
+//
+// Make a local VectorMap typedef allowing to map a float array
+// as a Eigen vector expression. The std::conditional here is to
+// construct the suitable Eigen type for the constness of the
+// data. Indeed, for const data, we need to produce
+// Eigen::Map<const Eigen::Matrix<float, ...>>
+// and not the more straightforward
+// Eigen::Map<Eigen::Matrix<const float, ...>>
+template <typename Scalar>
+using VectorMap = typename std::conditional<
+ std::is_const<Scalar>::value,
+ Eigen::Map<const Eigen::Matrix<typename std::remove_const<Scalar>::type, Eigen::Dynamic, 1>>,
+ Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, 1>>>::type;
+
+template <typename Scalar, int N> VectorMap<Scalar> MapAsVector(Scalar *data, const Dims<N> &dims)
+{
+ const int size = RequiredBufferSizeForDims(dims);
+ return VectorMap<Scalar>(data, size, 1);
+}
+
+// From optimized_ops.h in TensorFlow Lite
+//
+// Make a local VectorMap typedef allowing to map a float array
+// as a Eigen matrix expression. The same explanation as for VectorMap
+// above also applies here.
+template <typename Scalar>
+using MatrixMap = typename std::conditional<
+ std::is_const<Scalar>::value,
+ Eigen::Map<const Eigen::Matrix<typename std::remove_const<Scalar>::type, Eigen::Dynamic,
+ Eigen::Dynamic>>,
+ Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>>>::type;
+
+// From optimized_ops.h in TensorFlow Lite
+template <typename Scalar, int N>
+MatrixMap<Scalar> MapAsMatrixWithFirstDimAsRows(Scalar *data, const Dims<N> &dims)
+{
+ const int rows = dims.sizes[0];
+ int cols = 1;
+ for (int d = 1; d < N; d++)
+ {
+ cols *= dims.sizes[d];
+ }
+ return MatrixMap<Scalar>(data, rows, cols);
+}
+
+// From optimized_ops.h in TensorFlow Lite
+template <typename Scalar, int N>
+MatrixMap<Scalar> MapAsMatrixWithLastDimAsCols(Scalar *data, const Dims<N> &dims)
+{
+ const int cols = dims.sizes[N - 1];
+ int rows = 1;
+ for (int d = 0; d < N - 1; d++)
+ {
+ rows *= dims.sizes[d];
+ }
+ return MatrixMap<Scalar>(data, rows, cols);
+}
+
+// From optimized_ops.h in TensorFlow Lite
+template <typename Scalar>
+using ArrayMap = typename std::conditional<
+ std::is_const<Scalar>::value,
+ Eigen::Map<const Eigen::Array<typename std::remove_const<Scalar>::type, Eigen::Dynamic,
+ Eigen::Dynamic>>,
+ Eigen::Map<Eigen::Array<Scalar, Eigen::Dynamic, Eigen::Dynamic>>>::type;
+
+// From optimized_ops.h in TensorFlow Lite
+template <typename Scalar, int N>
+ArrayMap<Scalar> MapAsArrayWithFirstDimAsRows(Scalar *data, const Dims<N> &dims)
+{
+ const int rows = dims.sizes[0];
+ int cols = 1;
+ for (int d = 1; d < N; d++)
+ {
+ cols *= dims.sizes[d];
+ }
+ return ArrayMap<Scalar>(data, rows, cols);
+}
+
+// From optimized_ops.h in TensorFlow Lite
+//
+// TODO(b/62193649): this function is only needed as long
+// as we have the --variable_batch hack.
+template <typename Scalar, int N>
+MatrixMap<Scalar> MapAsMatrixWithGivenNumberOfRows(Scalar *data, const Dims<N> &dims, int rows)
+{
+ int cols = 1;
+ bool matched_rows = false;
+ for (int d = 0; d < N; d++)
+ {
+ cols *= dims.sizes[d];
+ if (cols == rows)
+ {
+ matched_rows = true;
+ cols = 1;
+ }
+ }
+ DCHECK(matched_rows);
+ return MatrixMap<Scalar>(data, rows, cols);
+}
+
+#endif // __MATRIX_H__
diff --git a/compiler/ann-ref/src/ops/internal/NDArray.h b/compiler/ann-ref/src/ops/internal/NDArray.h
new file mode 100644
index 000000000..14b160469
--- /dev/null
+++ b/compiler/ann-ref/src/ops/internal/NDArray.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ND_ARRAY_H__
+#define __ND_ARRAY_H__
+
+#include "Dims.h"
+#include "Macro.h"
+
+// From optimized_ops.h in TensorFlow Lite
+//
+// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING ELEMENT-WISE
+// BROADCASTING.
+//
+// NdArrayDesc<N> describes the shape and memory layout of an N-dimensional
+// rectangular array of numbers.
+//
+// NdArrayDesc<N> is basically identical to Dims<N> defined in types.h.
+// However, as Dims<N> is to be deprecated, this class exists as an adaptor
+// to enable simple unoptimized implementations of element-wise broadcasting
+// operations.
+template <int N> struct NdArrayDesc
+{
+ // The "extent" of each dimension. Indices along dimension d must be in the
+ // half-open interval [0, extents[d]).
+ int extents[N];
+
+ // The number of *elements* (not bytes) between consecutive indices of each
+ // dimension.
+ int strides[N];
+};
+
+// From optimized_ops.h in TensorFlow Lite
+//
+// DO NOT USE THIS FUNCTION FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
+// ELEMENT-WISE BROADCASTING.
+//
+// Same as Offset(), except takes as NdArrayDesc<N> instead of Dims<N>.
+inline int SubscriptToIndex(const NdArrayDesc<4> &desc, int i0, int i1, int i2, int i3)
+{
+ DCHECK(i0 >= 0 && i0 < desc.extents[0]);
+ DCHECK(i1 >= 0 && i1 < desc.extents[1]);
+ DCHECK(i2 >= 0 && i2 < desc.extents[2]);
+ DCHECK(i3 >= 0 && i3 < desc.extents[3]);
+ return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] + i3 * desc.strides[3];
+}
+
+// From optimized_ops.h in TensorFlow Lite
+//
+// Given the dimensions of the operands for an element-wise binary broadcast,
+// adjusts them so that they can be directly iterated over with simple loops.
+// Returns the adjusted dims as instances of NdArrayDesc in 'desc0_out' and
+// 'desc1_out'. 'desc0_out' and 'desc1_out' cannot be nullptr.
+//
+// This function assumes that the two input shapes are compatible up to
+// broadcasting and the shorter one has already been prepended with 1s to be the
+// same length. E.g., if shape0 is (1, 16, 16, 64) and shape1 is (1, 64),
+// shape1 must already have been prepended to be (1, 1, 1, 64). Recall that
+// Dims<N> refer to shapes in reverse order. In this case, input0_dims will be
+// (64, 16, 16, 1) and input1_dims will be (64, 1, 1, 1).
+//
+// When two shapes are compatible up to broadcasting, for each dimension d,
+// the input extents are either equal, or one of them is 1.
+//
+// This function performs the following for each dimension d:
+// - If the extents are equal, then do nothing since the loop that walks over
+// both of the input arrays is correct.
+// - Otherwise, one (and only one) of the extents must be 1. Say extent0 is 1
+// and extent1 is e1. Then set extent0 to e1 and stride0 *to 0*. This allows
+// array0 to be referenced *at any index* in dimension d and still access the
+// same slice.
+template <int N>
+inline void
+NdArrayDescsForElementwiseBroadcast(const Dims<N> &input0_dims, const Dims<N> &input1_dims,
+ NdArrayDesc<N> *desc0_out, NdArrayDesc<N> *desc1_out)
+{
+ DCHECK(desc0_out != nullptr);
+ DCHECK(desc1_out != nullptr);
+
+ // Copy dims to desc.
+ for (int i = 0; i < N; ++i)
+ {
+ desc0_out->extents[i] = input0_dims.sizes[i];
+ desc0_out->strides[i] = input0_dims.strides[i];
+ desc1_out->extents[i] = input1_dims.sizes[i];
+ desc1_out->strides[i] = input1_dims.strides[i];
+ }
+
+ // Walk over each dimension. If the extents are equal do nothing.
+ // Otherwise, set the desc with extent 1 to have extent equal to the other and
+ // stride 0.
+ for (int i = 0; i < N; ++i)
+ {
+ const int extent0 = ArraySize(input0_dims, i);
+ const int extent1 = ArraySize(input1_dims, i);
+ if (extent0 != extent1)
+ {
+ if (extent0 == 1)
+ {
+ desc0_out->strides[i] = 0;
+ desc0_out->extents[i] = extent1;
+ }
+ else
+ {
+ DCHECK_EQ(extent1, 1);
+ desc1_out->strides[i] = 0;
+ desc1_out->extents[i] = extent0;
+ }
+ }
+ }
+}
+
+inline int NodeOffset(int b, int h, int w, int height, int width)
+{
+ return (b * height + h) * width + w;
+}
+
+#endif // __ND_ARRAY_H__
diff --git a/compiler/ann-ref/src/ops/internal/Pooling.cpp b/compiler/ann-ref/src/ops/internal/Pooling.cpp
new file mode 100644
index 000000000..a3b8cf326
--- /dev/null
+++ b/compiler/ann-ref/src/ops/internal/Pooling.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Pooling.h"
+#include "Spatial.h"
+
+#include "Assert.h"
+
+bool genericPoolingPrepare(const Shape &input, int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom, int32_t stride_width,
+ int32_t stride_height, int32_t filter_width, int32_t filter_height,
+ Shape *output)
+{
+ ASSERT(getNumberOfDimensions(input) == 4);
+
+ uint32_t batches = getSizeOfDimension(input, 0);
+ uint32_t width = getSizeOfDimension(input, 2);
+ uint32_t height = getSizeOfDimension(input, 1);
+ uint32_t channels_out = getSizeOfDimension(input, 3);
+
+ uint32_t outWidth =
+ computeOutSize(width, filter_width, stride_width, padding_left, padding_right);
+ uint32_t outHeight =
+ computeOutSize(height, filter_height, stride_height, padding_top, padding_bottom);
+
+ output->type = input.type;
+ output->dimensions = {batches, outHeight, outWidth, channels_out};
+ return true;
+}
diff --git a/compiler/ann-ref/src/ops/internal/Pooling.h b/compiler/ann-ref/src/ops/internal/Pooling.h
new file mode 100644
index 000000000..c55bc16cd
--- /dev/null
+++ b/compiler/ann-ref/src/ops/internal/Pooling.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __POOLING_H__
+#define __POOLING_H__
+
+#include "Shape.h"
+
+#include <cstdint>
+
+bool genericPoolingPrepare(const Shape &input, int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom, int32_t stride_width,
+ int32_t stride_height, int32_t filter_width, int32_t filter_height,
+ Shape *output);
+
+
+#endif // __POOLING_H__
diff --git a/compiler/ann-ref/src/ops/internal/Spatial.h b/compiler/ann-ref/src/ops/internal/Spatial.h
new file mode 100644
index 000000000..6b8f0c11f
--- /dev/null
+++ b/compiler/ann-ref/src/ops/internal/Spatial.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SPATIAL_H__
+#define __SPATIAL_H__
+
+#include <cstdint>
+
+inline uint32_t computeOutSize(uint32_t imageSize, uint32_t filterSize, uint32_t stride,
+ uint32_t paddingHead, uint32_t paddingTail)
+{
+ return (imageSize - filterSize + stride + paddingHead + paddingTail) / stride;
+}
+
+#endif // __SPATIAL_H__
diff --git a/compiler/bino/CMakeLists.txt b/compiler/bino/CMakeLists.txt
new file mode 100644
index 000000000..519eecdc8
--- /dev/null
+++ b/compiler/bino/CMakeLists.txt
@@ -0,0 +1,14 @@
+add_library(bino INTERFACE)
+target_include_directories(bino INTERFACE include)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Google Test is mandatory for internal testing
+nnas_find_package(GTest REQUIRED)
+
+file(GLOB_RECURSE TESTS "tests/*.cpp")
+
+GTest_AddTest(bino_test ${TESTS})
+target_link_libraries(bino_test bino)
diff --git a/compiler/bino/README.md b/compiler/bino/README.md
new file mode 100644
index 000000000..9d58c725d
--- /dev/null
+++ b/compiler/bino/README.md
@@ -0,0 +1,5 @@
+# bino
+
+Let's manipulate std::pair values with UNIX pipe-like syntax.
+
+**NOTE** The _bino_ originates from a binocular telescope.
diff --git a/compiler/bino/include/bino.h b/compiler/bino/include/bino.h
new file mode 100644
index 000000000..fc22d1285
--- /dev/null
+++ b/compiler/bino/include/bino.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BINO_H__
+#define __BINO_H__
+
+#include <utility>
+
+namespace bino
+{
+
+template <typename Callable> class UniformTransform
+{
+public:
+ UniformTransform(Callable &&cb) : f{std::forward<Callable>(cb)}
+ {
+ // DO NOTHING
+ }
+
+public:
+ template <typename T>
+ auto operator()(const std::pair<T, T> &p) const
+ -> decltype(std::make_pair(std::declval<Callable>()(p.first),
+ std::declval<Callable>()(p.second)))
+ {
+ return std::make_pair(f(p.first), f(p.second));
+ }
+
+private:
+ Callable f;
+};
+
+template <typename Callable> UniformTransform<Callable> transform_both(Callable &&f)
+{
+ return UniformTransform<Callable>{std::forward<Callable>(f)};
+}
+
+// TODO Implement transform_both(f, g)
+// TODO Implement transform_first(f)
+// TODO Implement transform_second(f)
+
+} // namespace bino
+
+#endif // __BINO_H__
diff --git a/compiler/bino/tests/Functional.tests.cpp b/compiler/bino/tests/Functional.tests.cpp
new file mode 100644
index 000000000..14dde6a45
--- /dev/null
+++ b/compiler/bino/tests/Functional.tests.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Let's test functionals in "bino".
+ *
+ * NOTE The tests in this file assume that operator overloading works well.
+ */
+
+#include "bino.h"
+
+#include <gtest/gtest.h>
+
+TEST(FunctionalTests, transform_both_uniform)
+{
+ auto inc = [](int n) { return n + 1; };
+ auto f = bino::transform_both(inc);
+ auto res = f(std::make_pair(1, 3));
+
+ ASSERT_EQ(res.first, 2);
+ ASSERT_EQ(res.second, 4);
+}
diff --git a/compiler/caffe2circle/CMakeLists.txt b/compiler/caffe2circle/CMakeLists.txt
new file mode 100644
index 000000000..eaf541705
--- /dev/null
+++ b/compiler/caffe2circle/CMakeLists.txt
@@ -0,0 +1,16 @@
+if(NOT TARGET mir_caffe_importer)
+ return()
+endif()
+
+if(NOT TARGET mir2loco)
+ return()
+endif()
+
+if(NOT TARGET exo)
+ return()
+endif()
+
+message(STATUS "Build caffe2circle: TRUE")
+
+add_executable(caffe2circle src/caffe2circle.cpp)
+target_link_libraries(caffe2circle PRIVATE mir_caffe_importer mir2loco exo)
diff --git a/compiler/caffe2circle/README.md b/compiler/caffe2circle/README.md
new file mode 100644
index 000000000..fe9ea26dd
--- /dev/null
+++ b/compiler/caffe2circle/README.md
@@ -0,0 +1,3 @@
+# caffe2circle
+
+_caffe2circle_ is a Caffe-to-Circle model converter.
diff --git a/compiler/caffe2circle/requires.cmake b/compiler/caffe2circle/requires.cmake
new file mode 100644
index 000000000..cc05edd84
--- /dev/null
+++ b/compiler/caffe2circle/requires.cmake
@@ -0,0 +1,3 @@
+require("mir-onnx-importer")
+require("mir2loco")
+require("exo")
diff --git a/compiler/caffe2circle/src/caffe2circle.cpp b/compiler/caffe2circle/src/caffe2circle.cpp
new file mode 100644
index 000000000..fb09c0a1c
--- /dev/null
+++ b/compiler/caffe2circle/src/caffe2circle.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <caffe_importer.h>
+#include <mir2loco.h>
+#include <exo/CircleExporter.h>
+
+#include <cstdlib>
+#include <iostream>
+
+int main(int argc, char *argv[])
+{
+ if (argc != 3)
+ {
+ std::cerr << "Usage: caffe2circle <path/to/caffe/model> <path/to/circle/model>\n";
+ return EXIT_FAILURE;
+ }
+
+ const char *caffe_path = argv[1];
+ const char *circle_path = argv[2];
+
+ std::unique_ptr<mir::Graph> mir_graph = mir_caffe::importModelFromBinaryFile(caffe_path);
+ std::unique_ptr<loco::Graph> loco_graph = mir2loco::Transformer().transform(mir_graph.get());
+ exo::CircleExporter(loco_graph.get()).dumpToFile(circle_path);
+ return EXIT_SUCCESS;
+}
diff --git a/compiler/caffegen/CMakeLists.txt b/compiler/caffegen/CMakeLists.txt
new file mode 100644
index 000000000..334174dcd
--- /dev/null
+++ b/compiler/caffegen/CMakeLists.txt
@@ -0,0 +1,14 @@
+nnas_find_package(Caffe QUIET)
+
+if(NOT Caffe_FOUND)
+ return()
+endif(NOT Caffe_FOUND)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(caffegen ${SOURCES})
+target_link_libraries(caffegen stdex)
+target_link_libraries(caffegen cli)
+# NOTE "Caffe" package provides both caffe and caffeproto target
+# NOTE "caffeproto" is linked to "caffe"
+target_link_libraries(caffegen caffe)
diff --git a/compiler/caffegen/README.md b/compiler/caffegen/README.md
new file mode 100644
index 000000000..c322721b3
--- /dev/null
+++ b/compiler/caffegen/README.md
@@ -0,0 +1,45 @@
+# caffegen
+
+`caffegen` is a tool for generating caffe model and decoding binary file of caffe model
+
+## How caffegen works
+
+Some of commands in `caffegen` use standard input for reading data and standard output for exporting result.
+In this case, we strongly recommand you to use pipe, not copy & paste the content of file itself.
+
+Otherwise, `caffegen` use arguments to pass some directories.
+
+## Supported command
+
+Basically, caffgen command is used as `caffegen [COMMAND]` and there are four `COMMAND` types.
+ - init : initialize parameters using prototxt.
+ - encode : make a binary file(caffemodel) using initialized data
+ - decode : decode a binary file(caffemodel) and reproduce the initialized data
+ - merge : copy the trained weights from a caffemodel into a prototxt file
+
+## How to use each command
+
+1. Init (Using stdin and stdout)
+ - `./build/compiler/caffegen/caffegen init`
+ - Type the prototxt by yourself
+ - Then you can get the result on the shell.
+ - `cat ./res/BVLCCaffeTests/Convolution_000/test.prototxt | ./build/compiler/caffegen/caffegen init`
+ - Prototxt will be automatically passed
+ - Then you can get the result on the shell.
+
+2. Encode (Using stdin and stdout)
+ - `./build/compiler/caffegen/caffegen encode`
+ - Type the initialized data by yourself
+ - Then you can get the result on the shell.
+ - `cat ./res/BVLCCaffeTests/Convolution_000/test.prototxt | ./build/compiler/caffegen/caffegen init | ./build/compiler/caffegen/caffegen encode > Convolution_000.caffemodel`
+ - The initialized data will be automatically passed
+ - The encoded result will be automatically saved in caffemodel file
+
+3. Decode (Using stdin and stdout)
+ - `cat Convolution_000.caffemodel | ./build/compiler/caffegen/caffegen decode`
+ - Caffemodel file will be automatically passed
+ - Then you can get the result on the shell
+
+4. Merge (Using arguments)
+ - `./build/compiler/caffegen/caffegen merge ./res/BVLCCaffeTests/Convolution_000/test.prototxt Convolution_000.caffemodel`
+ - `./build/compiler/caffegen/caffegen merge ./res/BVLCCaffeTests/Convolution_000/test.prototxt Convolution_000.caffemodel > Convolution_000.merged`
diff --git a/compiler/caffegen/src/DecodeCommand.cpp b/compiler/caffegen/src/DecodeCommand.cpp
new file mode 100644
index 000000000..02d044ed3
--- /dev/null
+++ b/compiler/caffegen/src/DecodeCommand.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DecodeCommand.h"
+
+#include <caffe/proto/caffe.pb.h>
+
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+#include <iostream>
+
+int DecodeCommand::run(int, const char *const *) const
+{
+ caffe::NetParameter param;
+
+ // Load binary from standard input
+ google::protobuf::io::IstreamInputStream is{&std::cin};
+ google::protobuf::io::CodedInputStream coded_is{&is};
+
+ if (!param.ParseFromCodedStream(&coded_is))
+ {
+ std::cerr << "ERROR: Failed to parse caffemodel" << std::endl;
+ return 255;
+ }
+
+ // Write text into standard output
+ google::protobuf::io::OstreamOutputStream os{&std::cout};
+ google::protobuf::TextFormat::Print(param, &os);
+
+ return 0;
+}
diff --git a/compiler/caffegen/src/DecodeCommand.h b/compiler/caffegen/src/DecodeCommand.h
new file mode 100644
index 000000000..4b43b465b
--- /dev/null
+++ b/compiler/caffegen/src/DecodeCommand.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DECODE_COMMAND_H__
+#define __DECODE_COMMAND_H__
+
+#include <cli/Command.h>
+
+struct DecodeCommand final : public cli::Command
+{
+ int run(int argc, const char *const *argv) const override;
+};
+
+#endif // __DECODE_COMMAND_H__
diff --git a/compiler/caffegen/src/Driver.cpp b/compiler/caffegen/src/Driver.cpp
new file mode 100644
index 000000000..81b01e6f1
--- /dev/null
+++ b/compiler/caffegen/src/Driver.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "InitCommand.h"
+#include "EncodeCommand.h"
+#include "DecodeCommand.h"
+#include "MergeCommand.h"
+
+#include <cli/App.h>
+#include <stdex/Memory.h>
+
+#include <map>
+#include <string>
+
+using stdex::make_unique;
+
+int main(int argc, char **argv)
+{
+ cli::App app{argv[0]};
+
+ // all receive data from stdin
+ app.insert("init", make_unique<InitCommand>());
+ app.insert("encode", make_unique<EncodeCommand>());
+ app.insert("decode", make_unique<DecodeCommand>());
+ // takes 2 args: prototxt model and caffemodel weights in that order
+ app.insert("merge", make_unique<MergeCommand>());
+
+ return app.run(argc - 1, argv + 1);
+}
diff --git a/compiler/caffegen/src/EncodeCommand.cpp b/compiler/caffegen/src/EncodeCommand.cpp
new file mode 100644
index 000000000..4b35030bd
--- /dev/null
+++ b/compiler/caffegen/src/EncodeCommand.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EncodeCommand.h"
+
+#include <caffe/proto/caffe.pb.h>
+
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+#include <iostream>
+
+int EncodeCommand::run(int, const char *const *) const
+{
+ caffe::NetParameter param;
+
+ // Load text from standard input
+ google::protobuf::io::IstreamInputStream is{&std::cin};
+
+ if (!google::protobuf::TextFormat::Parse(&is, &param))
+ {
+ std::cerr << "ERROR: Failed to parse prototxt" << std::endl;
+ return 255;
+ }
+
+ // Write binary into standard output
+ google::protobuf::io::OstreamOutputStream os{&std::cout};
+ google::protobuf::io::CodedOutputStream coded_os{&os};
+
+ if (!param.SerializeToCodedStream(&coded_os))
+ {
+ std::cerr << "ERROR: Failed to serialize" << std::endl;
+ return 255;
+ }
+
+ return 0;
+}
diff --git a/compiler/caffegen/src/EncodeCommand.h b/compiler/caffegen/src/EncodeCommand.h
new file mode 100644
index 000000000..1115c2363
--- /dev/null
+++ b/compiler/caffegen/src/EncodeCommand.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCODE_COMMAND_H__
+#define __ENCODE_COMMAND_H__
+
+#include <cli/Command.h>
+
+struct EncodeCommand final : public cli::Command
+{
+ int run(int argc, const char *const *argv) const override;
+};
+
+#endif // __ENCODE_COMMAND_H__
diff --git a/compiler/caffegen/src/InitCommand.cpp b/compiler/caffegen/src/InitCommand.cpp
new file mode 100644
index 000000000..fd5b8a467
--- /dev/null
+++ b/compiler/caffegen/src/InitCommand.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "InitCommand.h"
+
+#include <caffe/net.hpp>
+#include <caffe/util/upgrade_proto.hpp>
+#include <caffe/proto/caffe.pb.h>
+
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+#include <iostream>
+
+int InitCommand::run(int, const char *const *) const
+{
+ // Read prototxt from standard input
+ ::caffe::NetParameter in;
+ {
+ google::protobuf::io::IstreamInputStream is{&std::cin};
+ if (!google::protobuf::TextFormat::Parse(&is, &in))
+ {
+ std::cerr << "ERROR: Failed to parse prototxt" << std::endl;
+ return 255;
+ }
+ }
+
+ // Upgrade prototxt if necessary
+ if (::caffe::NetNeedsUpgrade(in))
+ {
+ if (!::caffe::UpgradeNetAsNeeded("<stdin>", &in))
+ {
+ std::cerr << "ERROR: Failed to upgrade prototxt" << std::endl;
+ return 255;
+ }
+ }
+
+ ::caffe::Net<float> net(in);
+
+ // Extract initialized parameters
+ ::caffe::NetParameter out;
+ {
+ net.ToProto(&out);
+ }
+
+ // Write initialized parameters to standard output
+ google::protobuf::io::OstreamOutputStream os{&std::cout};
+ google::protobuf::TextFormat::Print(out, &os);
+
+ return 0;
+}
diff --git a/compiler/caffegen/src/InitCommand.h b/compiler/caffegen/src/InitCommand.h
new file mode 100644
index 000000000..8d86a195f
--- /dev/null
+++ b/compiler/caffegen/src/InitCommand.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INIT_COMMAND_H__
+#define __INIT_COMMAND_H__
+
+#include <cli/Command.h>
+
+struct InitCommand final : public cli::Command
+{
+ int run(int argc, const char *const *argv) const override;
+};
+
+#endif // __INIT_COMMAND_H__
diff --git a/compiler/caffegen/src/MergeCommand.cpp b/compiler/caffegen/src/MergeCommand.cpp
new file mode 100644
index 000000000..4e1d863bc
--- /dev/null
+++ b/compiler/caffegen/src/MergeCommand.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MergeCommand.h"
+
+#include <caffe/proto/caffe.pb.h>
+#include <caffe/caffe.hpp>
+
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+#include <iostream>
+#include <string>
+
+int MergeCommand::run(int argc, const char *const *argv) const
+{
+ if (argc != 2)
+ {
+ std::cerr << "ERROR: this command requires exactly 2 arguments" << std::endl;
+ return 254;
+ }
+
+ std::string model_file = argv[0];
+ std::string trained_file = argv[1];
+
+ // Load the network
+ caffe::Net<float> caffe_test_net(model_file, caffe::TEST);
+ // Load the weights
+ caffe_test_net.CopyTrainedLayersFrom(trained_file);
+
+ caffe::NetParameter net_param;
+ caffe_test_net.ToProto(&net_param);
+
+ // Write binary with initialized params into standard output
+ google::protobuf::io::OstreamOutputStream os(&std::cout);
+ google::protobuf::io::CodedOutputStream coded_os{&os};
+
+ if (!net_param.SerializeToCodedStream(&coded_os))
+ {
+ std::cerr << "ERROR: Failed to serialize" << std::endl;
+ return 255;
+ }
+ return 0;
+}
diff --git a/compiler/caffegen/src/MergeCommand.h b/compiler/caffegen/src/MergeCommand.h
new file mode 100644
index 000000000..e0134626c
--- /dev/null
+++ b/compiler/caffegen/src/MergeCommand.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MERGE_COMMAND_H__
+#define __MERGE_COMMAND_H__
+
+#include <cli/Command.h>
+
+/**
+ * @brief Takes .prototxt and .caffemodel filenames from ARGV
+ * and fills the model with trained weights.
+ * The resulting binary model with weights to be consumed by nnc is printed to StdOut
+ * @return error code
+ */
+struct MergeCommand final : public cli::Command
+{
+ int run(int argc, const char *const *argv) const override;
+};
+
+#endif //__MERGE_COMMAND_H__
diff --git a/compiler/circle-inspect/CMakeLists.txt b/compiler/circle-inspect/CMakeLists.txt
new file mode 100644
index 000000000..222f8cb1a
--- /dev/null
+++ b/compiler/circle-inspect/CMakeLists.txt
@@ -0,0 +1,13 @@
+if(NOT TARGET mio_circle)
+ return()
+endif(NOT TARGET mio_circle)
+
+set(DRIVER "driver/Driver.cpp")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(circle-inspect ${DRIVER} ${SOURCES})
+target_include_directories(circle-inspect PRIVATE src)
+target_link_libraries(circle-inspect mio_circle)
+target_link_libraries(circle-inspect safemain)
+target_link_libraries(circle-inspect stdex)
diff --git a/compiler/circle-inspect/README.md b/compiler/circle-inspect/README.md
new file mode 100644
index 000000000..1f76c8ede
--- /dev/null
+++ b/compiler/circle-inspect/README.md
@@ -0,0 +1,22 @@
+# circle-inspect
+
+_circle-inspect_ allows users to retrieve various information from a Circle model file
+
+## Information to inspect
+
+Operators with `--operators`
+- show operator codes one line at a time in execution order
+
+Example
+```
+$ circle-inspect --operators model.circle
+```
+
+Result
+```
+RESHAPE
+DEPTHWISE_CONV_2D
+ADD
+```
+
+To get the count of specific operator, use other tools like sort, uniq, etc.
diff --git a/compiler/circle-inspect/driver/Driver.cpp b/compiler/circle-inspect/driver/Driver.cpp
new file mode 100644
index 000000000..d23cd0f8b
--- /dev/null
+++ b/compiler/circle-inspect/driver/Driver.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Model.h"
+#include "Dump.h"
+
+#include <stdex/Memory.h>
+
+#include <functional>
+#include <iostream>
+#include <map>
+#include <vector>
+#include <string>
+
+using OptionHook = std::function<std::unique_ptr<circleinspect::DumpInterface>(void)>;
+
+int entry(int argc, char **argv)
+{
+ if (argc < 3)
+ {
+ std::cerr << "ERROR: Failed to parse arguments" << std::endl;
+ std::cerr << std::endl;
+ std::cerr << "USAGE: " << argv[0] << " [options] [circle]" << std::endl;
+ std::cerr << " --operators : dump operators in circle file" << std::endl;
+ std::cerr << " --conv2d_weight : dump Conv2D series weight operators in circle file"
+ << std::endl;
+ return 255;
+ }
+
+ // Simple argument parser (based on map)
+ std::map<std::string, OptionHook> argparse;
+
+ argparse["--operators"] = [&](void) {
+ // dump all operators
+ return std::move(stdex::make_unique<circleinspect::DumpOperators>());
+ };
+
+ argparse["--conv2d_weight"] = [&](void) {
+ // dump Conv2D, DepthwiseConv2D weight operators
+ return std::move(stdex::make_unique<circleinspect::DumpConv2DWeight>());
+ };
+
+ std::vector<std::unique_ptr<circleinspect::DumpInterface>> dumps;
+
+ for (int n = 1; n < argc - 1; ++n)
+ {
+ const std::string tag{argv[n]};
+
+ auto it = argparse.find(tag);
+ if (it == argparse.end())
+ {
+ std::cerr << "Option '" << tag << "' is not supported" << std::endl;
+ return 255;
+ }
+ auto dump = it->second();
+ assert(dump != nullptr);
+ dumps.push_back(std::move(dump));
+ }
+
+ std::string model_file = argv[argc - 1];
+
+ // Load Circle model from a circle file
+ auto model = circleinspect::load_circle(model_file);
+ if (model == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load circle '" << model_file << "'" << std::endl;
+ return 255;
+ }
+
+ const circle::Model *circlemodel = model->model();
+ if (circlemodel == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load circle '" << model_file << "'" << std::endl;
+ return 255;
+ }
+
+ for (auto &dump : dumps)
+ {
+ dump->run(std::cout, circlemodel);
+ }
+
+ return 0;
+}
diff --git a/compiler/circle-inspect/requires.cmake b/compiler/circle-inspect/requires.cmake
new file mode 100644
index 000000000..b090dbd4d
--- /dev/null
+++ b/compiler/circle-inspect/requires.cmake
@@ -0,0 +1,3 @@
+require("mio-circle")
+require("safemain")
+require("stdex")
diff --git a/compiler/circle-inspect/src/Dump.cpp b/compiler/circle-inspect/src/Dump.cpp
new file mode 100644
index 000000000..fbc092b89
--- /dev/null
+++ b/compiler/circle-inspect/src/Dump.cpp
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dump.h"
+#include "Reader.h"
+
+#include <ostream>
+
+namespace circleinspect
+{
+
+void DumpOperators::run(std::ostream &os, const circle::Model *model)
+{
+ circleinspect::Reader reader(model);
+
+ assert(reader.num_subgraph() == 1);
+ reader.select_subgraph(0);
+
+ auto ops = reader.operators();
+
+ // dump operators
+ for (uint32_t i = 0; i < ops->Length(); ++i)
+ {
+ const auto op = ops->Get(i);
+
+ auto op_name = reader.opcode_name(op);
+
+ os << op_name << std::endl;
+ }
+}
+
+} // namespace circleinspect
+
+namespace
+{
+
+const circle::Operator *operator_match_output(circleinspect::Reader &reader, const int32_t tensor)
+{
+ auto ops = reader.operators();
+
+ for (uint32_t i = 0; i < ops->Length(); ++i)
+ {
+ const auto op = ops->Get(i);
+
+ const std::vector<int32_t> &outputs = circleinspect::as_index_vector(op->outputs());
+
+ for (auto output : outputs)
+ {
+ if (output == tensor)
+ return op;
+ }
+ }
+ return nullptr;
+}
+
+size_t tensor_buffer_size(circleinspect::Reader &reader, const int32_t tensor_id)
+{
+ auto tensors = reader.tensors();
+
+ if (tensor_id < 0 || tensor_id >= tensors->Length())
+ {
+ throw std::runtime_error("Invalid Tensor ID");
+ }
+
+ auto tensor = tensors->Get(tensor_id);
+ auto buffer_id = tensor->buffer();
+
+ size_t size = reader.buffer_info(buffer_id, nullptr);
+
+ return size;
+}
+
+} // namespace
+
+namespace circleinspect
+{
+
+void DumpConv2DWeight::run(std::ostream &os, const circle::Model *model)
+{
+ circleinspect::Reader reader(model);
+
+ assert(reader.num_subgraph() == 1);
+ reader.select_subgraph(0);
+
+ auto ops = reader.operators();
+
+ // dump Conv2D, DepthwiseConv2D and its weight input operator
+ for (uint32_t i = 0; i < ops->Length(); ++i)
+ {
+ const auto op = ops->Get(i);
+ auto bc = reader.builtin_code(op);
+
+ if (bc == circle::BuiltinOperator_CONV_2D || bc == circle::BuiltinOperator_DEPTHWISE_CONV_2D)
+ {
+ const std::vector<int32_t> &inputs = circleinspect::as_index_vector(op->inputs());
+ if (inputs.size() < 2)
+ {
+ throw std::runtime_error("Operator has invalid input");
+ }
+ auto weight_input = inputs[1]; // Tensor ID of weight input
+
+ const auto op_weight = operator_match_output(reader, weight_input);
+ const auto buffer_size = tensor_buffer_size(reader, weight_input);
+
+ std::string weight_op_name = "?";
+
+ if (op_weight == nullptr && buffer_size > 0)
+ {
+ weight_op_name = "CONST";
+ }
+ else if (op_weight != nullptr)
+ {
+ weight_op_name = reader.opcode_name(op_weight);
+ }
+
+ auto op_name = reader.opcode_name(op);
+ os << op_name << "," << weight_op_name << std::endl;
+ }
+ }
+}
+
+} // namespace circleinspect
diff --git a/compiler/circle-inspect/src/Dump.h b/compiler/circle-inspect/src/Dump.h
new file mode 100644
index 000000000..6afba83b3
--- /dev/null
+++ b/compiler/circle-inspect/src/Dump.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DUMP_H__
+#define __DUMP_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <ostream>
+
+namespace circleinspect
+{
+
+class DumpInterface
+{
+public:
+ virtual ~DumpInterface() = default;
+
+public:
+ virtual void run(std::ostream &os, const circle::Model *model) = 0;
+};
+
+class DumpOperators final : public DumpInterface
+{
+public:
+ DumpOperators() = default;
+
+public:
+ void run(std::ostream &os, const circle::Model *model);
+};
+
+class DumpConv2DWeight final : public DumpInterface
+{
+public:
+ DumpConv2DWeight() = default;
+
+public:
+ void run(std::ostream &os, const circle::Model *model);
+};
+
+} // namespace circleinspect
+
+#endif // __DUMP_H__
diff --git a/compiler/circle-inspect/src/Model.cpp b/compiler/circle-inspect/src/Model.cpp
new file mode 100644
index 000000000..1924bfafc
--- /dev/null
+++ b/compiler/circle-inspect/src/Model.cpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Model.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+namespace
+{
+
+class MemoryMappedModel final : public circleinspect::Model
+{
+public:
+ /**
+ * @require fd and data SHOULD be valid
+ */
+ explicit MemoryMappedModel(int fd, void *data, size_t size) : _fd{fd}, _data{data}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ ~MemoryMappedModel()
+ {
+ munmap(_data, _size);
+ close(_fd);
+ }
+
+public:
+ MemoryMappedModel(const MemoryMappedModel &) = delete;
+ MemoryMappedModel(MemoryMappedModel &&) = delete;
+
+public:
+ const ::circle::Model *model(void) const override { return ::circle::GetModel(_data); }
+
+private:
+ int _fd = -1;
+ void *_data = nullptr;
+ size_t _size = 0;
+};
+
+class FileDescriptor final
+{
+public:
+ FileDescriptor(int value) : _value{value}
+ {
+ // DO NOTHING
+ }
+
+public:
+ // NOTE Copy is not allowed
+ FileDescriptor(const FileDescriptor &) = delete;
+
+public:
+ // NOTE Move is allowed
+ FileDescriptor(FileDescriptor &&fd) { _value = fd.release(); }
+
+public:
+ ~FileDescriptor()
+ {
+ if (_value != -1)
+ {
+ // Close on descturction
+ close(_value);
+ }
+ }
+
+public:
+ int value(void) const { return _value; }
+
+public:
+ int release(void)
+ {
+ auto res = _value;
+ _value = -1;
+ return res;
+ }
+
+private:
+ int _value = -1;
+};
+
+} // namespace
+
+namespace circleinspect
+{
+
+std::unique_ptr<Model> load_circle(const std::string &path)
+{
+ FileDescriptor fd = open(path.c_str(), O_RDONLY);
+
+ if (fd.value() == -1)
+ {
+ // Return nullptr on open failure
+ return nullptr;
+ }
+
+ struct stat st;
+ if (fstat(fd.value(), &st) == -1)
+ {
+ // Return nullptr on fstat failure
+ return nullptr;
+ }
+
+ auto size = st.st_size;
+ auto data = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd.value(), 0);
+
+ if (data == MAP_FAILED)
+ {
+ // Return nullptr on mmap failure
+ return nullptr;
+ }
+
+ // Check if file is a valid Flatbuffer file
+ const uint8_t *u8data = reinterpret_cast<const uint8_t *>(data);
+ flatbuffers::Verifier verifier{u8data, static_cast<size_t>(size)};
+ if (!circle::VerifyModelBuffer(verifier))
+ {
+ munmap(data, size);
+ close(fd.release());
+ return nullptr;
+ }
+
+ return std::unique_ptr<circleinspect::Model>{new MemoryMappedModel(fd.release(), data, size)};
+}
+
+} // namespace circleinspect
diff --git a/compiler/circle-inspect/src/Model.h b/compiler/circle-inspect/src/Model.h
new file mode 100644
index 000000000..8206ed364
--- /dev/null
+++ b/compiler/circle-inspect/src/Model.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MODEL_H__
+#define __MODEL_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <memory>
+
+namespace circleinspect
+{
+
+struct Model
+{
+ virtual ~Model() = default;
+
+ virtual const ::circle::Model *model(void) const = 0;
+};
+
+/**
+ * @brief Load Circle model (as a raw Model) from a given path
+ *
+ * @note May return a nullptr
+ */
+std::unique_ptr<Model> load_circle(const std::string &path);
+
+} // namespace circleinspect
+
+#endif // __MODEL_H__
diff --git a/compiler/circle-inspect/src/Reader.cpp b/compiler/circle-inspect/src/Reader.cpp
new file mode 100644
index 000000000..dbbc7c75e
--- /dev/null
+++ b/compiler/circle-inspect/src/Reader.cpp
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Reader.h"
+
+#include <sstream>
+#include <string>
+
+namespace circleinspect
+{
+
+bool is_valid(const circle::OperatorCode *opcode)
+{
+ circle::BuiltinOperator code = opcode->builtin_code();
+ return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
+}
+
+bool is_custom(const circle::OperatorCode *opcode)
+{
+ circle::BuiltinOperator code = opcode->builtin_code();
+ return (code == circle::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const circle::OperatorCode *opcode)
+{
+ assert(opcode);
+
+ if (!is_valid(opcode))
+ {
+ std::ostringstream oss;
+ oss << "(invalid)";
+ return oss.str();
+ }
+
+ if (is_custom(opcode))
+ {
+ if (!opcode->custom_code())
+ return "(invalid custom)";
+
+ return opcode->custom_code()->c_str();
+ }
+
+ circle::BuiltinOperator code = opcode->builtin_code();
+ return circle::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const circle::Tensor *tensor)
+{
+ return circle::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const circle::Tensor *tensor)
+{
+ static const char *kEmptyTensorName = "(noname)";
+
+ auto name = tensor->name();
+ if (name)
+ return name->c_str();
+
+ return kEmptyTensorName;
+}
+
+Reader::Reader(const circle::Model *model)
+{
+ _subgraphs = model->subgraphs();
+ _buffers = model->buffers();
+
+ auto opcodes = model->operator_codes();
+ for (const ::circle::OperatorCode *opcode : *opcodes)
+ {
+ _op_codes.push_back(opcode);
+ }
+}
+
+size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data)
+{
+ if (buff_data != nullptr)
+ {
+ *buff_data = nullptr;
+ }
+
+ if (buf_idx == 0)
+ return 0;
+
+ if (auto *buffer = (*_buffers)[buf_idx])
+ {
+ if (auto *array = buffer->data())
+ {
+ if (size_t size = array->size())
+ {
+ if (buff_data != nullptr)
+ {
+ *buff_data = reinterpret_cast<const uint8_t *>(array->data());
+ }
+ return size;
+ }
+ }
+ }
+
+ return 0;
+}
+
+circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const
+{
+ uint32_t index = op->opcode_index();
+ assert(index < _op_codes.size());
+ const circle::OperatorCode *opcode = _op_codes.at(index);
+
+ return opcode->builtin_code();
+}
+
+std::string Reader::opcode_name(const circle::Operator *op) const
+{
+ uint32_t index = op->opcode_index();
+ assert(index < _op_codes.size());
+ const circle::OperatorCode *opcode = _op_codes.at(index);
+
+ if (!is_valid(opcode))
+ {
+ std::ostringstream oss;
+ oss << "(invalid: " << index << ")";
+ return oss.str();
+ }
+
+ return circleinspect::opcode_name(opcode);
+}
+
+bool Reader::select_subgraph(uint32_t sgindex)
+{
+ _tensors = nullptr;
+ _operators = nullptr;
+
+ _inputs.clear();
+ _outputs.clear();
+
+ if (_subgraphs->Length() <= sgindex)
+ {
+ assert(false);
+ return false;
+ }
+
+ const circle::SubGraph *subgraph = (*_subgraphs)[sgindex];
+
+ _tensors = subgraph->tensors();
+ _operators = subgraph->operators();
+
+ _inputs = as_index_vector(subgraph->inputs());
+ _outputs = as_index_vector(subgraph->outputs());
+
+ return true;
+}
+
+} // namespace circleinspect
diff --git a/compiler/circle-inspect/src/Reader.h b/compiler/circle-inspect/src/Reader.h
new file mode 100644
index 000000000..b5a99df3f
--- /dev/null
+++ b/compiler/circle-inspect/src/Reader.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __READER_H__
+#define __READER_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+namespace circleinspect
+{
+
+template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
+{
+ std::vector<T> ret(flat_array->Length());
+ for (uint32_t i = 0; i < flat_array->Length(); i++)
+ {
+ ret[i] = flat_array->Get(i);
+ }
+ return ret;
+}
+
+bool is_valid(const circle::OperatorCode *opcode);
+bool is_custom(const circle::OperatorCode *opcode);
+std::string opcode_name(const circle::OperatorCode *opcode);
+const char *tensor_type(const circle::Tensor *tensor);
+const char *tensor_name(const circle::Tensor *tensor);
+
+/**
+ * @brief Loads Circle file and provides helpers to access attributes
+ */
+class Reader
+{
+private:
+ using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>;
+ using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>;
+ using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
+ using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>;
+
+public:
+ Reader(const circle::Model *model);
+
+ Reader() = delete;
+
+public:
+ const std::vector<const circle::OperatorCode *> &opcodes() { return _op_codes; }
+ const CircleBuffers_t *buffers() { return _buffers; }
+ const CircleTensors_t *tensors() { return _tensors; }
+ const CircleOperators_t *operators() { return _operators; }
+ const std::vector<int32_t> &inputs() const { return _inputs; }
+ const std::vector<int32_t> &outputs() const { return _outputs; }
+
+ uint32_t num_subgraph() const { return _subgraphs->Length(); }
+
+ size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
+ circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
+ std::string opcode_name(const circle::Operator *op) const;
+
+public:
+ bool select_subgraph(uint32_t subgraph);
+
+private:
+ const CircleSubGraphs_t *_subgraphs{nullptr};
+ const CircleBuffers_t *_buffers{nullptr};
+ const CircleTensors_t *_tensors{nullptr};
+ const CircleOperators_t *_operators{nullptr};
+
+ std::vector<const circle::OperatorCode *> _op_codes;
+ std::vector<int32_t> _inputs;
+ std::vector<int32_t> _outputs;
+};
+
+} // namespace circleinspect
+
+#endif // __READER_H__
diff --git a/compiler/circle-verify/CMakeLists.txt b/compiler/circle-verify/CMakeLists.txt
new file mode 100644
index 000000000..2e19951e1
--- /dev/null
+++ b/compiler/circle-verify/CMakeLists.txt
@@ -0,0 +1,12 @@
+if(NOT TARGET mio_circle)
+ return()
+endif(NOT TARGET mio_circle)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(circle-verify ${SOURCES})
+target_include_directories(circle-verify PRIVATE src)
+target_link_libraries(circle-verify mio_circle)
+target_link_libraries(circle-verify safemain)
+target_link_libraries(circle-verify cwrap)
+target_link_libraries(circle-verify stdex)
diff --git a/compiler/circle-verify/README.md b/compiler/circle-verify/README.md
new file mode 100644
index 000000000..1eda8a99e
--- /dev/null
+++ b/compiler/circle-verify/README.md
@@ -0,0 +1,23 @@
+# circle-verify
+
+_circle-verify_ allows users to verify Circle models.
+
+## Usage
+
+Provide _circle_ file as a parameter to verify validity.
+
+```
+$ circle-verify circlefile.circle
+```
+
+Result for valid file
+```
+[ RUN ] Check circlefile.circle
+[ PASS ] Check circlefile.circle
+```
+
+Result for invalid file
+```
+[ RUN ] Check circlefile.circle
+[ FAIL ] Check circlefile.circle
+```
diff --git a/compiler/circle-verify/requires.cmake b/compiler/circle-verify/requires.cmake
new file mode 100644
index 000000000..2509b6931
--- /dev/null
+++ b/compiler/circle-verify/requires.cmake
@@ -0,0 +1,4 @@
+require("mio-circle")
+require("safemain")
+require("cwrap")
+require("stdex")
diff --git a/compiler/circle-verify/src/Driver.cpp b/compiler/circle-verify/src/Driver.cpp
new file mode 100644
index 000000000..ad13e504f
--- /dev/null
+++ b/compiler/circle-verify/src/Driver.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "VerifyFlatBuffers.h"
+
+#include <stdex/Memory.h>
+
+#include <iostream>
+#include <string>
+
+int entry(int argc, char **argv)
+{
+ if (argc != 2)
+ {
+ std::cerr << "ERROR: Failed to parse arguments" << std::endl;
+ std::cerr << std::endl;
+ std::cerr << "USAGE: " << argv[0] << " [circle]" << std::endl;
+ return 255;
+ }
+ auto verifier = stdex::make_unique<VerifyFlatbuffers>();
+
+ std::string model_file = argv[argc - 1];
+
+ std::cout << "[ RUN ] Check " << model_file << std::endl;
+
+ auto result = verifier->run(model_file);
+
+ if (result == 0)
+ {
+ std::cout << "[ PASS ] Check " << model_file << std::endl;
+ }
+ else
+ {
+ std::cout << "[ FAIL ] Check " << model_file << std::endl;
+ }
+
+ return result;
+}
diff --git a/compiler/circle-verify/src/Model.cpp b/compiler/circle-verify/src/Model.cpp
new file mode 100644
index 000000000..efac1210d
--- /dev/null
+++ b/compiler/circle-verify/src/Model.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Model.h"
+
+#include <cwrap/Fildes.h>
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+namespace
+{
+
+class MemoryMappedModel final : public ModelData
+{
+public:
+ /**
+ * @require fd and data SHOULD be valid
+ */
+ explicit MemoryMappedModel(int fd, void *data, size_t size) : _fd{fd}, _data{data}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ ~MemoryMappedModel()
+ {
+ munmap(_data, _size);
+ close(_fd);
+ }
+
+public:
+ MemoryMappedModel(const MemoryMappedModel &) = delete;
+ MemoryMappedModel(MemoryMappedModel &&) = delete;
+
+public:
+ const void *data(void) const override { return _data; };
+ const size_t size(void) const override { return _size; };
+
+private:
+ int _fd = -1;
+ void *_data = nullptr;
+ size_t _size = 0;
+};
+
+} // namespace
+
+std::unique_ptr<ModelData> load_modeldata(const std::string &path)
+{
+ cwrap::Fildes fd(open(path.c_str(), O_RDONLY));
+
+ if (fd.get() == -1)
+ {
+ // Return nullptr on open failure
+ return nullptr;
+ }
+
+ struct stat st;
+ if (fstat(fd.get(), &st) == -1)
+ {
+ // Return nullptr on fstat failure
+ return nullptr;
+ }
+
+ auto size = st.st_size;
+ auto data = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd.get(), 0);
+
+ if (data == MAP_FAILED)
+ {
+ // Return nullptr on mmap failure
+ return nullptr;
+ }
+
+ return std::unique_ptr<ModelData>{new MemoryMappedModel(fd.release(), data, size)};
+}
diff --git a/compiler/circle-verify/src/Model.h b/compiler/circle-verify/src/Model.h
new file mode 100644
index 000000000..e1bd83971
--- /dev/null
+++ b/compiler/circle-verify/src/Model.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MODEL_H__
+#define __MODEL_H__
+
+#include <memory>
+#include <string>
+
+struct ModelData
+{
+ virtual ~ModelData() = default;
+
+ virtual const void *data(void) const = 0;
+ virtual const size_t size(void) const = 0;
+};
+
+/**
+ * @brief Load Circle model (as a raw data) from a given path
+ *
+ * @note May return a nullptr
+ */
+std::unique_ptr<ModelData> load_modeldata(const std::string &path);
+
+#endif // __MODEL_H__
diff --git a/compiler/circle-verify/src/VerifyFlatBuffers.cpp b/compiler/circle-verify/src/VerifyFlatBuffers.cpp
new file mode 100644
index 000000000..36b16685f
--- /dev/null
+++ b/compiler/circle-verify/src/VerifyFlatBuffers.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "VerifyFlatBuffers.h"
+
+#include "Model.h"
+
+#include <mio/circle/schema_generated.h>
+
+int VerifyFlatbuffers::run(const std::string &model_file)
+{
+ auto modeldata = load_modeldata(model_file);
+
+ const uint8_t *data = reinterpret_cast<const uint8_t *>(modeldata->data());
+ flatbuffers::Verifier verifier{data, modeldata->size()};
+
+ if (!circle::VerifyModelBuffer(verifier))
+ {
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/compiler/circle-verify/src/VerifyFlatBuffers.h b/compiler/circle-verify/src/VerifyFlatBuffers.h
new file mode 100644
index 000000000..c301b5b10
--- /dev/null
+++ b/compiler/circle-verify/src/VerifyFlatBuffers.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __VERIFY_FLATBUFFERS_H__
+#define __VERIFY_FLATBUFFERS_H__
+
+#include <ostream>
+#include <string>
+
+class VerifyFlatbuffers
+{
+public:
+ VerifyFlatbuffers() = default;
+
+public:
+ int run(const std::string &model_file);
+};
+
+#endif // __VERIFY_FLATBUFFERS_H__
diff --git a/compiler/circle2circle/CMakeLists.txt b/compiler/circle2circle/CMakeLists.txt
new file mode 100644
index 000000000..644179941
--- /dev/null
+++ b/compiler/circle2circle/CMakeLists.txt
@@ -0,0 +1,42 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_executable(circle2circle "${SOURCES}")
+target_include_directories(circle2circle PRIVATE include)
+target_include_directories(circle2circle PRIVATE src)
+target_link_libraries(circle2circle nncc_common)
+target_link_libraries(circle2circle safemain)
+target_link_libraries(circle2circle stdex)
+target_link_libraries(circle2circle oops)
+target_link_libraries(circle2circle hermes)
+target_link_libraries(circle2circle hermes_std)
+target_link_libraries(circle2circle loco)
+target_link_libraries(circle2circle mio_circle)
+target_link_libraries(circle2circle luci_import)
+target_link_libraries(circle2circle luci_service)
+target_link_libraries(circle2circle luci_pass)
+target_link_libraries(circle2circle luci_export)
+
+install(TARGETS circle2circle DESTINATION bin)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(circle2circle_test ${TESTS} ${SOURCES})
+target_include_directories(circle2circle_test PRIVATE include)
+target_include_directories(circle2circle_test PRIVATE src)
+target_link_libraries(circle2circle_test nncc_common)
+target_link_libraries(circle2circle_test stdex)
+target_link_libraries(circle2circle_test oops)
+target_link_libraries(circle2circle_test hermes)
+target_link_libraries(circle2circle_test hermes_std)
+target_link_libraries(circle2circle_test loco)
+target_link_libraries(circle2circle_test mio_circle)
+target_link_libraries(circle2circle_test luci_import)
+target_link_libraries(circle2circle_test luci_service)
+target_link_libraries(circle2circle_test luci_pass)
+target_link_libraries(circle2circle_test luci_export)
diff --git a/compiler/circle2circle/README.md b/compiler/circle2circle/README.md
new file mode 100644
index 000000000..7bc1b7f59
--- /dev/null
+++ b/compiler/circle2circle/README.md
@@ -0,0 +1,3 @@
+# circle2circle
+
+_circle2circle_ provides Circle optimizations and quantizations as executable tool
diff --git a/compiler/circle2circle/include/CircleExpContract.h b/compiler/circle2circle/include/CircleExpContract.h
new file mode 100644
index 000000000..313b16d22
--- /dev/null
+++ b/compiler/circle2circle/include/CircleExpContract.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE2CIRCLE_CIRCLEXPCONTRACT_H__
+#define __CIRCLE2CIRCLE_CIRCLEXPCONTRACT_H__
+
+#include <loco.h>
+#include <luci/CircleExporter.h>
+#include <luci/IR/Module.h>
+#include <mio/circle/schema_generated.h>
+
+#include <memory>
+#include <string>
+
+struct CircleExpContract : public luci::CircleExporter::Contract
+{
+public:
+ CircleExpContract(luci::Module *module, const std::string &filename)
+ : _module(module), _filepath(filename)
+ {
+ // NOTHING TO DO
+ }
+ virtual ~CircleExpContract() = default;
+
+public:
+ loco::Graph *graph(void) const final { return nullptr; }
+ luci::Module *module(void) const final { return _module; };
+
+public:
+ bool store(const char *ptr, const size_t size) const final;
+
+private:
+ luci::Module *_module;
+ const std::string _filepath;
+};
+
+#endif // __CIRCLE2CIRCLE_CIRCLEXPCONTRACT_H__
diff --git a/compiler/circle2circle/include/Model.h b/compiler/circle2circle/include/Model.h
new file mode 100644
index 000000000..35b7e3239
--- /dev/null
+++ b/compiler/circle2circle/include/Model.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE2CIRCLE_MODEL_H__
+#define __CIRCLE2CIRCLE_MODEL_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <memory>
+
+namespace luci
+{
+
+struct Model
+{
+ virtual ~Model() = default;
+
+ virtual const ::circle::Model *model(void) = 0;
+};
+
+/**
+ * @brief Load Circle model (as a raw Model) from a given path
+ *
+ * @note May return a nullptr
+ */
+std::unique_ptr<Model> load_model(const std::string &path);
+
+} // namespace luci
+
+#endif // __CIRCLE2CIRCLE_MODEL_H__
diff --git a/compiler/circle2circle/requires.cmake b/compiler/circle2circle/requires.cmake
new file mode 100644
index 000000000..5b1e657ca
--- /dev/null
+++ b/compiler/circle2circle/requires.cmake
@@ -0,0 +1,10 @@
+require("loco")
+require("locop")
+require("logo-core")
+require("stdex")
+require("safemain")
+require("mio-circle")
+require("oops")
+require("hermes")
+require("hermes-std")
+require("luci")
diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp
new file mode 100644
index 000000000..781825fdd
--- /dev/null
+++ b/compiler/circle2circle/src/Circle2Circle.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Model.h"
+#include "CircleExpContract.h"
+
+#include <luci/Importer.h>
+#include <luci/CircleOptimizer.h>
+#include <luci/Service/Validate.h>
+#include <luci/CircleExporter.h>
+
+#include <stdex/Memory.h>
+#include <oops/InternalExn.h>
+
+#include <functional>
+#include <iostream>
+#include <map>
+#include <string>
+
+using OptionHook = std::function<int(const char **)>;
+
+using Algorithms = luci::CircleOptimizer::Options::Algorithm;
+
+void print_help(const char *progname)
+{
+ std::cerr << "USAGE: " << progname << " [options] input output" << std::endl;
+ std::cerr << " --fuse_instnorm : Enable FuseInstanceNormalization Pass" << std::endl;
+ std::cerr << std::endl;
+}
+
+int entry(int argc, char **argv)
+{
+ if (argc < 3)
+ {
+ std::cerr << "ERROR: Failed to parse arguments" << std::endl;
+ std::cerr << std::endl;
+ print_help(argv[0]);
+ return 255;
+ }
+
+ // Simple argument parser (based on map)
+ std::map<std::string, OptionHook> argparse;
+ luci::CircleOptimizer optimizer;
+
+ auto options = optimizer.options();
+
+ // TODO merge this with help message
+ argparse["--fuse_instnorm"] = [&options](const char **) {
+ options->enable(Algorithms::FuseInstanceNorm);
+ return 0;
+ };
+
+ for (int n = 1; n < argc - 2; ++n)
+ {
+ const std::string tag{argv[n]};
+ auto it = argparse.find(tag);
+ if (it == argparse.end())
+ {
+ std::cerr << "Option '" << tag << "' is not supported" << std::endl;
+ std::cerr << std::endl;
+ print_help(argv[0]);
+ return 255;
+ }
+
+ n += it->second((const char **)&argv[n + 1]);
+ }
+
+ std::string input_path = argv[argc - 2];
+ std::string output_path = argv[argc - 1];
+
+ // Load model from the file
+ std::unique_ptr<luci::Model> model = luci::load_model(input_path);
+ if (model == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load '" << input_path << "'" << std::endl;
+ return 255;
+ }
+
+ const circle::Model *input_model = model->model();
+ if (input_model == nullptr)
+ {
+ std::cerr << "ERROR: Failed to read '" << input_path << "'" << std::endl;
+ return 255;
+ }
+
+ // Import from input Circle file
+ luci::Importer importer;
+ auto module = importer.importModule(input_model);
+
+ for (size_t idx = 0; idx < module->size(); ++idx)
+ {
+ auto graph = module->graph(idx);
+
+ // call luci optimizations
+ optimizer.optimize(graph);
+
+ if (!luci::validate(graph))
+ return 255;
+ }
+
+ // Export to output Circle file
+ luci::CircleExporter exporter;
+
+ CircleExpContract contract(module.get(), output_path);
+
+ return exporter.invoke(&contract) ? 0 : 255;
+}
diff --git a/compiler/circle2circle/src/Circle2Circle.test.cpp b/compiler/circle2circle/src/Circle2Circle.test.cpp
new file mode 100644
index 000000000..015358ae7
--- /dev/null
+++ b/compiler/circle2circle/src/Circle2Circle.test.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+TEST(Circle2CircleTest, NoArg_NEG)
+{
+ Argv<1> argv;
+ argv.add("circle2circle");
+
+ ::testing::internal::CaptureStdout();
+ int result = entry(1, argv.argv());
+ ASSERT_EQ(result, 255);
+}
diff --git a/compiler/circle2circle/src/CircleExpContract.cpp b/compiler/circle2circle/src/CircleExpContract.cpp
new file mode 100644
index 000000000..b56b7eedc
--- /dev/null
+++ b/compiler/circle2circle/src/CircleExpContract.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleExpContract.h"
+
+#include <oops/InternalExn.h>
+
+#include <fstream>
+#include <iostream>
+
+bool CircleExpContract::store(const char *ptr, const size_t size) const
+{
+ if (!ptr)
+ INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
+
+ std::ofstream fs(_filepath.c_str(), std::ofstream::binary);
+ fs.write(ptr, size);
+
+ return fs.good();
+}
diff --git a/compiler/circle2circle/src/Model.cpp b/compiler/circle2circle/src/Model.cpp
new file mode 100644
index 000000000..20d55a131
--- /dev/null
+++ b/compiler/circle2circle/src/Model.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Model.h"
+
+#include <fstream>
+#include <vector>
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+namespace
+{
+
+class FileModel final : public luci::Model
+{
+public:
+ explicit FileModel(const std::string &filename) : _filename(filename) {}
+
+public:
+ FileModel(const FileModel &) = delete;
+ FileModel(FileModel &&) = delete;
+
+public:
+ const ::circle::Model *model(void) override
+ {
+ std::ifstream file(_filename, std::ios::binary | std::ios::in);
+ if (!file.good())
+ return nullptr;
+
+ file.unsetf(std::ios::skipws);
+
+ std::streampos fileSize;
+ file.seekg(0, std::ios::end);
+ fileSize = file.tellg();
+ file.seekg(0, std::ios::beg);
+
+ // reserve capacity
+ _data.reserve(fileSize);
+
+ // read the data
+ file.read(_data.data(), fileSize);
+ if (file.fail())
+ return nullptr;
+
+ return ::circle::GetModel(_data.data());
+ }
+
+private:
+ const std::string _filename;
+ std::vector<char> _data;
+};
+
+} // namespace
+
+namespace luci
+{
+
+std::unique_ptr<Model> load_model(const std::string &path)
+{
+ return std::unique_ptr<Model>{new FileModel(path)};
+}
+
+} // namespace luci
diff --git a/compiler/circle2circle/src/TestHelper.h b/compiler/circle2circle/src/TestHelper.h
new file mode 100644
index 000000000..f4dbe23a9
--- /dev/null
+++ b/compiler/circle2circle/src/TestHelper.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE2CIRCLE_TEST_HELPER_H__
+#define __CIRCLE2CIRCLE_TEST_HELPER_H__
+
+#include <cassert>
+#include <cstdio>
+#include <string.h>
+
+int entry(int argc, char **argv);
+
+template <size_t N> class Argv
+{
+public:
+ typedef char *pchar_t;
+
+public:
+ ~Argv()
+ {
+ for (size_t n = 0; n < _ptr; ++n)
+ delete _argv[n];
+ }
+
+ void add(const char *in)
+ {
+ assert(_ptr < N);
+ _argv[_ptr] = new char[strlen(in) + 1];
+ strcpy(_argv[_ptr], in);
+ _ptr++;
+ }
+
+ pchar_t *argv(void) { return _argv; }
+
+private:
+ pchar_t _argv[N] = {
+ nullptr,
+ };
+ size_t _ptr = 0;
+};
+
+#endif // __CIRCLE2CIRCLE_TEST_HELPER_H__
diff --git a/compiler/circledump/CMakeLists.txt b/compiler/circledump/CMakeLists.txt
new file mode 100644
index 000000000..a117e7285
--- /dev/null
+++ b/compiler/circledump/CMakeLists.txt
@@ -0,0 +1,14 @@
+if(NOT TARGET mio_circle)
+ return()
+endif(NOT TARGET mio_circle)
+
+set(DRIVER "driver/Driver.cpp")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(circledump ${DRIVER} ${SOURCES})
+target_include_directories(circledump PRIVATE include)
+target_link_libraries(circledump mio_circle)
+target_link_libraries(circledump safemain)
+target_link_libraries(circledump stdex)
+target_link_libraries(circledump flatbuffers)
diff --git a/compiler/circledump/README.md b/compiler/circledump/README.md
new file mode 100644
index 000000000..686e918ac
--- /dev/null
+++ b/compiler/circledump/README.md
@@ -0,0 +1,71 @@
+# circledump
+
+### What is this?
+
+circledump is a tool that dumps binary circle file into human readable text to console.
+
+circledump is implemented with C++ not python. We can do the same thing much easier
+with python but this tool doesn't need to install TensorFlow python package.
+
+Schema for FlatBuffer used is from TensorFlow v1.13.1 release.
+
+### Design philosophy
+
+Make the code simple.
+
+### To do
+
+- Print weight values other than uint8_t
+- Add more operators
+
+### How to use
+
+Command argument format:
+```
+circledump circle_file
+```
+
+Example output of dump `readme.circle` file
+```
+Dump: readme.circle
+
+Data Format:
+CHANNEL_LAST (NHWC for 2d, NDHWC for 3d data)
+
+Operator Codes: [order] OpCodeName (OpCode Enum)
+[0] CONV_2D (code: 3)
+
+Buffers: B(index) (length) values, if any
+B(0) (0)
+B(1) (8) 0x94 0x5b 0x95 0xbf 0x42 0xa4 0x52 0xbf ...
+B(2) (4) 0xcd 0xcc 0x8c 0x3f
+
+Operands: T(tensor index) TYPE (shape) B(buffer index) OperandName
+T(0) FLOAT32 (1, 3, 3, 2) B(0) ifm
+T(1) FLOAT32 (1, 1, 1, 2) B(1) ker
+T(2) FLOAT32 (1) B(2) bias
+T(3) FLOAT32 (1, 3, 3, 1) B(0) ofm
+
+Operators: O(operator index) OpCodeName
+ Option(values) ... <-- depending on OpCode
+ I T(tensor index) OperandName <-- as input
+ O T(tensor index) OperandName <-- as output
+O(0) CONV_2D
+ Padding(1) Stride.W(1) Stride.H(1) Activation(0)
+ I T(0) ifm
+ I T(1) ker
+ I T(2) bias
+ O T(3) ofm
+
+Inputs/Outputs: I(input)/O(output) T(tensor index) OperandName
+I T(0) ifm
+I T(1) ker
+O T(3) ofm
+```
+
+### Dependency
+
+- mio-circle
+- safemain
+- stdex
+- FlatBuffers
diff --git a/compiler/circledump/driver/Driver.cpp b/compiler/circledump/driver/Driver.cpp
new file mode 100644
index 000000000..8ed88e1d8
--- /dev/null
+++ b/compiler/circledump/driver/Driver.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <circleread/Model.h>
+#include <circledump/Dump.h>
+
+#include <iostream>
+
+int entry(int argc, char **argv)
+{
+ if (argc != 2)
+ {
+ std::cerr << "ERROR: Failed to parse arguments" << std::endl;
+ std::cerr << std::endl;
+ std::cerr << "USAGE: " << argv[0] << " [circle]" << std::endl;
+ return 255;
+ }
+
+ // Load Circle model from a circle file
+ std::unique_ptr<circleread::Model> model = circleread::load_circle(argv[1]);
+ if (model == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load circle '" << argv[1] << "'" << std::endl;
+ return 255;
+ }
+
+ const circle::Model *circlemodel = model->model();
+ if (circlemodel == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load circle '" << argv[1] << "'" << std::endl;
+ return 255;
+ }
+
+ std::cout << "Dump: " << argv[1] << std::endl << std::endl;
+
+ std::cout << circlemodel << std::endl;
+
+ return 0;
+}
diff --git a/compiler/circledump/include/circledump/Dump.h b/compiler/circledump/include/circledump/Dump.h
new file mode 100644
index 000000000..a129458f4
--- /dev/null
+++ b/compiler/circledump/include/circledump/Dump.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLEDUMP_DUMP_H__
+#define __CIRCLEDUMP_DUMP_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <ostream>
+
+namespace circledump
+{
+
+void dump_model(std::ostream &os, const circle::Model *model);
+}
+
+std::ostream &operator<<(std::ostream &os, const circle::Model *model);
+
+#endif // __CIRCLEDUMP_DUMP_H__
diff --git a/compiler/circledump/include/circleread/Model.h b/compiler/circledump/include/circleread/Model.h
new file mode 100644
index 000000000..234db8b4c
--- /dev/null
+++ b/compiler/circledump/include/circleread/Model.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLEREAD_MODEL_H__
+#define __CIRCLEREAD_MODEL_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <memory>
+
+namespace circleread
+{
+
+struct Model
+{
+ virtual ~Model() = default;
+
+ virtual const ::circle::Model *model(void) const = 0;
+};
+
+/**
+ * @brief Load Circle model (as a raw Model) from a given path
+ *
+ * @note May return a nullptr
+ */
+std::unique_ptr<Model> load_circle(const std::string &path);
+
+} // namespace circleread
+
+#endif // __CIRCLEREAD_MODEL_H__
diff --git a/compiler/circledump/requires.cmake b/compiler/circledump/requires.cmake
new file mode 100644
index 000000000..b090dbd4d
--- /dev/null
+++ b/compiler/circledump/requires.cmake
@@ -0,0 +1,3 @@
+require("mio-circle")
+require("safemain")
+require("stdex")
diff --git a/compiler/circledump/src/Dump.cpp b/compiler/circledump/src/Dump.cpp
new file mode 100644
index 000000000..3d99189f9
--- /dev/null
+++ b/compiler/circledump/src/Dump.cpp
@@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <circledump/Dump.h>
+
+#include "Read.h"
+#include "OpPrinter.h"
+
+#include <ostream>
+
+#include <algorithm> // min
+#include <iomanip> // setfill
+
+namespace circledump
+{
+
+void dump_buffer(std::ostream &os, const uint8_t *buffer, size_t size, size_t amount)
+{
+ std::ios_base::fmtflags saveflags(os.flags());
+
+ bool second = false;
+ bool ellipsis = amount > 0 && size > 4;
+ size_t count = ellipsis ? std::min(size, amount) : size;
+
+ for (size_t i = 0; i < count; i++)
+ {
+ if (second)
+ {
+ os << " ";
+ }
+
+ os << std::showbase << std::setfill('0') << std::setw(2);
+ os << std::hex << (uint32_t)buffer[i];
+
+ second = true;
+ }
+ if (ellipsis)
+ {
+ os << " ...";
+ }
+
+ os.flags(saveflags);
+}
+
+void dump_vector(std::ostream &os, const std::vector<int32_t> &vs)
+{
+ uint32_t seq = 0;
+ for (auto &v : vs)
+ {
+ if (seq)
+ os << ", ";
+ os << v;
+ seq++;
+ }
+}
+
+std::ostream &operator<<(std::ostream &os, const std::vector<int32_t> &vect)
+{
+ circledump::dump_vector(os, vect);
+ return os;
+}
+
+template <typename T> void dump_fbvect(std::ostream &os, const flatbuffers::Vector<T> *fbvect)
+{
+ if (fbvect == nullptr)
+ return;
+
+ bool ellipsis = (fbvect->size() > 4);
+ auto limit_size = ellipsis ? 4 : fbvect->size();
+
+ if (ellipsis)
+ {
+ os << "(" << fbvect->size() << ") ";
+ }
+ for (uint32_t q = 0; q < limit_size; q++)
+ {
+ if (q)
+ os << ", ";
+ os << fbvect->Get(q);
+ }
+ if (ellipsis)
+ {
+ os << " ... ";
+ }
+}
+
+template <typename T>
+std::ostream &operator<<(std::ostream &os, const flatbuffers::Vector<T> *fbvect)
+{
+ dump_fbvect(os, fbvect);
+ return os;
+}
+
+void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
+{
+ auto tensors = reader.tensors();
+ auto operators = reader.operators();
+ auto data_format = reader.data_format();
+
+ // dump data_format
+ os << "Data Format:" << std::endl;
+ if (data_format == circle::DataFormat::DataFormat_CHANNELS_LAST)
+ {
+ os << "CHANNEL_LAST (NHWC for 2d, NDHWC for 3d data)" << std::endl;
+ }
+ else if (data_format == circle::DataFormat::DataFormat_CHANNELS_FIRST)
+ {
+ os << "CHANNEL_FIRST (NCHW for 2d, NCDHW for 3d data)" << std::endl;
+ }
+ os << std::endl;
+
+ // dump operands(tensors)
+ os << "Operands: T(subgraph index : tensor index) TYPE (shape) B(buffer index) OperandName"
+ << std::endl;
+ for (uint32_t i = 0; i < tensors->Length(); ++i)
+ {
+ // TODO refactor to some better structure
+ auto tensor = tensors->Get(i);
+ std::vector<int32_t> dims = {-1};
+
+ if (tensor->shape())
+ dims = circleread::as_index_vector(tensor->shape());
+
+ os << "T(" << reader.subgraph_index() << ":" << i << ") " << circleread::tensor_type(tensor)
+ << " ";
+ os << "(" << dims << ") ";
+ os << "B(" << tensor->buffer() << ") ";
+ os << circleread::tensor_name(tensor) << std::endl;
+
+ if (auto q_params = tensor->quantization())
+ {
+ if ((q_params->min() && q_params->max()) || (q_params->scale() && q_params->zero_point()))
+ {
+ std::string strquantiz = " Quantization: ";
+ std::string strqindent(strquantiz.size(), ' ');
+ os << strquantiz;
+
+ if (q_params->min())
+ {
+ os << "min(" << q_params->min() << ") ";
+ if (q_params->min()->size() > 1)
+ os << std::endl << strqindent;
+ }
+ if (q_params->max())
+ {
+ os << "max(" << q_params->max() << ") ";
+ if (q_params->max()->size() > 1)
+ os << std::endl << strqindent;
+ }
+ if (q_params->scale())
+ {
+ os << "scale(" << q_params->scale() << ") ";
+ if (q_params->scale()->size() > 1)
+ os << std::endl << strqindent;
+ }
+ if (q_params->zero_point())
+ os << "zeropt(" << q_params->zero_point() << ") ";
+
+ os << std::endl;
+ }
+ }
+ }
+ os << std::endl;
+
+ // dump operators
+ os << "Operators: O(subgraph index : operator index) OpCodeName " << std::endl;
+ os << " Option(values) ... <-- depending on OpCode" << std::endl;
+ os << " I T(tensor index) OperandName <-- as input" << std::endl;
+ os << " O T(tensor index) OperandName <-- as output" << std::endl;
+ for (uint32_t i = 0; i < operators->Length(); ++i)
+ {
+ const auto op = operators->Get(i);
+ circle::BuiltinOperator builtincode = reader.builtin_code(op);
+
+ const std::vector<int32_t> &inputs = circleread::as_index_vector(op->inputs());
+ const std::vector<int32_t> &outputs = circleread::as_index_vector(op->outputs());
+ auto op_name = reader.opcode_name(op);
+
+ os << "O(" << reader.subgraph_index() << ":" << i << ") " << op_name << " ";
+ os << std::endl;
+
+ if (auto op_prn = OpPrinterRegistry::get().lookup(builtincode))
+ {
+ op_prn->options(op, os);
+ }
+
+ for (auto input : inputs)
+ {
+ os << " I T(" << input << ") ";
+ if (input >= 0)
+ {
+ auto tensor = tensors->Get(input);
+ os << circleread::tensor_name(tensor);
+ }
+ os << std::endl;
+ }
+ for (auto output : outputs)
+ {
+ os << " O T(" << output << ") ";
+ if (output >= 0)
+ {
+ auto tensor = tensors->Get(output);
+ os << circleread::tensor_name(tensor);
+ }
+ os << std::endl;
+ }
+ }
+ os << std::endl;
+
+ // dump network inputs/outputs
+ os << "Inputs/Outputs: I(input)/O(output) T(tensor index) OperandName" << std::endl;
+
+ for (const auto input : reader.inputs())
+ {
+ auto tensor = tensors->Get(input);
+ std::string name = circleread::tensor_name(tensor);
+ os << "I T(" << input << ") " << name << std::endl;
+ }
+
+ for (const auto output : reader.outputs())
+ {
+ auto tensor = tensors->Get(output);
+ std::string name = circleread::tensor_name(tensor);
+ os << "O T(" << output << ") " << name << std::endl;
+ }
+
+ os << std::endl;
+}
+
+void dump_model(std::ostream &os, const circle::Model *model)
+{
+ circleread::Reader reader(model);
+
+ uint32_t num_subgraph = reader.num_subgraph();
+
+ // dump model version
+ os << "===================================================================" << std::endl;
+ os << "Model version: " << reader.version() << std::endl;
+ os << " # sub graphs: " << num_subgraph << std::endl;
+ os << std::endl;
+
+ auto opcodes = reader.opcodes();
+ auto buffers = reader.buffers();
+
+ // dump operator_codes
+ os << "Operator Codes: [order] OpCodeName (OpCode Enum)" << std::endl;
+ int32_t opcode_index = 0;
+ for (auto opcode : opcodes)
+ {
+ circle::BuiltinOperator op_code = opcode->builtin_code();
+ auto op_name = circleread::opcode_name(opcode);
+ auto op_version = opcode->version();
+
+ os << "[" << opcode_index << "] " << op_name << " (code: " << op_code
+ << ", version: " << op_version << ")" << std::endl;
+
+ opcode_index++;
+ }
+ os << std::endl;
+
+ // dump buffer
+ os << "Buffers: B(index) (length) values, if any" << std::endl;
+ for (uint32_t i = 0; i < buffers->Length(); ++i)
+ {
+ const uint8_t *buff_data;
+ size_t size = reader.buffer_info(i, &buff_data);
+
+ os << "B(" << i << ") (" << size << ") ";
+ if (buff_data != nullptr)
+ {
+ dump_buffer(os, buff_data, size, 16);
+ }
+ os << std::endl;
+ }
+ os << std::endl;
+
+ for (uint32_t sg = 0; sg < num_subgraph; ++sg)
+ {
+ reader.select_subgraph(sg);
+
+ os << "-------------------------------------------------------------------" << std::endl;
+ os << "Sub-Graph: #" << sg << " " << reader.subgraph_name() << std::endl;
+ os << std::endl;
+
+ dump_sub_graph(os, reader);
+ }
+
+ os << "===================================================================" << std::endl;
+}
+
+} // namespace circledump
+
+std::ostream &operator<<(std::ostream &os, const circle::Model *model)
+{
+ circledump::dump_model(os, model);
+ return os;
+}
diff --git a/compiler/circledump/src/Load.cpp b/compiler/circledump/src/Load.cpp
new file mode 100644
index 000000000..ec91ed189
--- /dev/null
+++ b/compiler/circledump/src/Load.cpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <circleread/Model.h>
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+namespace
+{
+
+class MemoryMappedModel final : public circleread::Model
+{
+public:
+ /**
+ * @require fd and data SHOULD be valid
+ */
+ explicit MemoryMappedModel(int fd, void *data, size_t size) : _fd{fd}, _data{data}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ ~MemoryMappedModel()
+ {
+ munmap(_data, _size);
+ close(_fd);
+ }
+
+public:
+ MemoryMappedModel(const MemoryMappedModel &) = delete;
+ MemoryMappedModel(MemoryMappedModel &&) = delete;
+
+public:
+ const ::circle::Model *model(void) const override { return ::circle::GetModel(_data); }
+
+private:
+ int _fd = -1;
+ void *_data = nullptr;
+ size_t _size = 0;
+};
+
+class FileDescriptor final
+{
+public:
+ FileDescriptor(int value) : _value{value}
+ {
+ // DO NOTHING
+ }
+
+public:
+ // NOTE Copy is not allowed
+ FileDescriptor(const FileDescriptor &) = delete;
+
+public:
+ // NOTE Move is allowed
+ FileDescriptor(FileDescriptor &&fd) { _value = fd.release(); }
+
+public:
+ ~FileDescriptor()
+ {
+ if (_value != -1)
+ {
+ // Close on descturction
+ close(_value);
+ }
+ }
+
+public:
+ int value(void) const { return _value; }
+
+public:
+ int release(void)
+ {
+ auto res = _value;
+ _value = -1;
+ return res;
+ }
+
+private:
+ int _value = -1;
+};
+
+} // namespace
+
+namespace circleread
+{
+
+std::unique_ptr<Model> load_circle(const std::string &path)
+{
+ FileDescriptor fd = open(path.c_str(), O_RDONLY);
+
+ if (fd.value() == -1)
+ {
+ // Return nullptr on open failure
+ return nullptr;
+ }
+
+ struct stat st;
+ if (fstat(fd.value(), &st) == -1)
+ {
+ // Return nullptr on fstat failure
+ return nullptr;
+ }
+
+ auto size = st.st_size;
+ auto data = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd.value(), 0);
+
+ if (data == MAP_FAILED)
+ {
+ // Return nullptr on mmap failure
+ return nullptr;
+ }
+
+ return std::unique_ptr<circleread::Model>{new MemoryMappedModel(fd.release(), data, size)};
+}
+
+} // namespace circleread
diff --git a/compiler/circledump/src/OpPrinter.cpp b/compiler/circledump/src/OpPrinter.cpp
new file mode 100644
index 000000000..f9daab494
--- /dev/null
+++ b/compiler/circledump/src/OpPrinter.cpp
@@ -0,0 +1,307 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OpPrinter.h"
+#include "Read.h"
+
+#include <stdex/Memory.h>
+
+#include <flatbuffers/flexbuffers.h>
+
+using stdex::make_unique;
+
+namespace circledump
+{
+
+// TODO move to some header
+std::ostream &operator<<(std::ostream &os, const std::vector<int32_t> &vect);
+
+// TODO Re-arrange in alphabetical order
+
+class AddPrinter : public OpPrinter
+{
+public:
+ void options(const circle::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_AddOptions())
+ {
+ os << " ";
+ os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+ << ") ";
+ os << std::endl;
+ }
+ }
+};
+
+class ArgMaxPrinter : public OpPrinter
+{
+public:
+ void options(const circle::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_ArgMaxOptions())
+ {
+ os << " ";
+ os << "OutputType(" << EnumNameTensorType(params->output_type()) << ") ";
+ os << std::endl;
+ }
+ }
+};
+
+class Conv2DPrinter : public OpPrinter
+{
+public:
+ void options(const circle::Operator *op, std::ostream &os) const override
+ {
+ if (auto conv_params = op->builtin_options_as_Conv2DOptions())
+ {
+ os << " ";
+ os << "Padding(" << conv_params->padding() << ") ";
+ os << "Stride.W(" << conv_params->stride_w() << ") ";
+ os << "Stride.H(" << conv_params->stride_h() << ") ";
+ os << "Activation("
+ << EnumNameActivationFunctionType(conv_params->fused_activation_function()) << ")";
+ os << std::endl;
+ }
+ }
+};
+
+class DivPrinter : public OpPrinter
+{
+public:
+ void options(const circle::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_DivOptions())
+ {
+ os << " ";
+ os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+ << ") ";
+ os << std::endl;
+ }
+ }
+};
+
+class Pool2DPrinter : public OpPrinter
+{
+public:
+ void options(const circle::Operator *op, std::ostream &os) const override
+ {
+ if (auto pool_params = op->builtin_options_as_Pool2DOptions())
+ {
+ os << " ";
+ os << "Padding(" << pool_params->padding() << ") ";
+ os << "Stride.W(" << pool_params->stride_w() << ") ";
+ os << "Stride.H(" << pool_params->stride_h() << ") ";
+ os << "Filter.W(" << pool_params->filter_width() << ") ";
+ os << "Filter.H(" << pool_params->filter_height() << ") ";
+ os << "Activation("
+ << EnumNameActivationFunctionType(pool_params->fused_activation_function()) << ")";
+ os << std::endl;
+ }
+ }
+};
+
+class ConcatenationPrinter : public OpPrinter
+{
+public:
+ void options(const circle::Operator *op, std::ostream &os) const override
+ {
+ if (auto *concatenation_params = op->builtin_options_as_ConcatenationOptions())
+ {
+ os << " ";
+ os << "Activation("
+ << EnumNameActivationFunctionType(concatenation_params->fused_activation_function())
+ << ") ";
+ os << "Axis(" << concatenation_params->axis() << ")";
+ os << std::endl;
+ }
+ }
+};
+
+class ReshapePrinter : public OpPrinter
+{
+public:
+ void options(const circle::Operator *op, std::ostream &os) const override
+ {
+ if (auto *reshape_params = op->builtin_options_as_ReshapeOptions())
+ {
+ auto new_shape = circleread::as_index_vector(reshape_params->new_shape());
+ os << " ";
+ os << "NewShape(" << new_shape << ")";
+ os << std::endl;
+ }
+ }
+};
+
+class DepthwiseConv2DPrinter : public OpPrinter
+{
+public:
+ void options(const circle::Operator *op, std::ostream &os) const override
+ {
+ if (auto conv_params = op->builtin_options_as_DepthwiseConv2DOptions())
+ {
+ os << " ";
+ os << "Padding(" << conv_params->padding() << ") ";
+ os << "Stride.W(" << conv_params->stride_w() << ") ";
+ os << "Stride.H(" << conv_params->stride_h() << ") ";
+ os << "DepthMultiplier(" << conv_params->depth_multiplier() << ") ";
+ os << "Dilation.W(" << conv_params->dilation_w_factor() << ") ";
+ os << "Dilation.H(" << conv_params->dilation_h_factor() << ")";
+ os << "Activation("
+ << EnumNameActivationFunctionType(conv_params->fused_activation_function()) << ") ";
+ os << std::endl;
+ }
+ }
+};
+
+class FullyConnectedPrinter : public OpPrinter
+{
+public:
+ void options(const circle::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_FullyConnectedOptions())
+ {
+ os << " ";
+ os << "WeightFormat(" << EnumNameFullyConnectedOptionsWeightsFormat(params->weights_format())
+ << ") ";
+ os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+ << ") ";
+
+ os << std::endl;
+ }
+ }
+};
+
+class MulPrinter : public OpPrinter
+{
+public:
+ void options(const circle::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_MulOptions())
+ {
+ os << " ";
+ os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+ << ") ";
+ os << std::endl;
+ }
+ }
+};
+
+class PackPrinter : public OpPrinter
+{
+public:
+ void options(const circle::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_PackOptions())
+ {
+ os << " ";
+ os << "ValuesCount(" << params->values_count() << ") ";
+ os << "Axis(" << params->axis() << ") ";
+ os << std::endl;
+ }
+ }
+};
+
+class SoftmaxPrinter : public OpPrinter
+{
+public:
+ void options(const circle::Operator *op, std::ostream &os) const override
+ {
+ if (auto *softmax_params = op->builtin_options_as_SoftmaxOptions())
+ {
+ os << " ";
+ os << "Beta(" << softmax_params->beta() << ")";
+ os << std::endl;
+ }
+ }
+};
+
+class SubPrinter : public OpPrinter
+{
+public:
+ void options(const circle::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_SubOptions())
+ {
+ os << " ";
+ os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+ << ") ";
+ os << std::endl;
+ }
+ }
+};
+
+class CustomOpPrinter : public OpPrinter
+{
+public:
+ void options(const circle::Operator *op, std::ostream &os) const override
+ {
+ if (op->custom_options_format() != circle::CustomOptionsFormat::CustomOptionsFormat_FLEXBUFFERS)
+ {
+ os << " ";
+ os << "Unknown custom option format";
+ return;
+ }
+
+ const flatbuffers::Vector<uint8_t> *option_buf = op->custom_options();
+
+ if (option_buf == nullptr || option_buf->size() == 0)
+ {
+ os << "No attrs found." << std::endl;
+ return;
+ }
+
+ // printing attrs
+ // attrs of custom ops are encoded in flexbuffer format
+ auto attr_map = flexbuffers::GetRoot(option_buf->data(), option_buf->size()).AsMap();
+
+ os << " ";
+ auto keys = attr_map.Keys();
+ for (int i = 0; i < keys.size(); i++)
+ {
+ auto key = keys[i].ToString();
+ os << key << "(" << attr_map[key].ToString() << ") ";
+ }
+
+ // Note: attr in "Shape" type does not seem to be converted by circle_convert.
+ // When the converted circle file (with custom op) is opened with hexa editory,
+ // attrs names can be found but attr name in "Shape" type is not found.
+
+ os << std::endl;
+ }
+};
+
+OpPrinterRegistry::OpPrinterRegistry()
+{
+ _op_map[circle::BuiltinOperator_ADD] = make_unique<AddPrinter>();
+ _op_map[circle::BuiltinOperator_ARG_MAX] = make_unique<ArgMaxPrinter>();
+ _op_map[circle::BuiltinOperator_AVERAGE_POOL_2D] = make_unique<Pool2DPrinter>();
+ _op_map[circle::BuiltinOperator_CONCATENATION] = make_unique<ConcatenationPrinter>();
+ _op_map[circle::BuiltinOperator_CONV_2D] = make_unique<Conv2DPrinter>();
+ _op_map[circle::BuiltinOperator_DEPTHWISE_CONV_2D] = make_unique<DepthwiseConv2DPrinter>();
+ _op_map[circle::BuiltinOperator_DIV] = make_unique<DivPrinter>();
+ _op_map[circle::BuiltinOperator_FULLY_CONNECTED] = make_unique<FullyConnectedPrinter>();
+ _op_map[circle::BuiltinOperator_MAX_POOL_2D] = make_unique<Pool2DPrinter>();
+ _op_map[circle::BuiltinOperator_MUL] = make_unique<MulPrinter>();
+ _op_map[circle::BuiltinOperator_PACK] = make_unique<PackPrinter>();
+ // There is no Option for Pad
+ // There is no Option for ReLU and ReLU6
+ _op_map[circle::BuiltinOperator_RESHAPE] = make_unique<ReshapePrinter>();
+ _op_map[circle::BuiltinOperator_SOFTMAX] = make_unique<SoftmaxPrinter>();
+ _op_map[circle::BuiltinOperator_SUB] = make_unique<SubPrinter>();
+ _op_map[circle::BuiltinOperator_CUSTOM] = make_unique<CustomOpPrinter>();
+}
+
+} // namespace circledump
diff --git a/compiler/circledump/src/OpPrinter.h b/compiler/circledump/src/OpPrinter.h
new file mode 100644
index 000000000..6b978a4c7
--- /dev/null
+++ b/compiler/circledump/src/OpPrinter.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLEDUMP_OPPRINTER_H__
+#define __CIRCLEDUMP_OPPRINTER_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <ostream>
+#include <map>
+
+namespace circledump
+{
+
+class OpPrinter
+{
+public:
+ virtual void options(const circle::Operator *, std::ostream &) const {};
+};
+
+class OpPrinterRegistry
+{
+public:
+ OpPrinterRegistry();
+
+public:
+ const OpPrinter *lookup(circle::BuiltinOperator op) const
+ {
+ if (_op_map.find(op) == _op_map.end())
+ return nullptr;
+
+ return _op_map.at(op).get();
+ }
+
+public:
+ static OpPrinterRegistry &get()
+ {
+ static OpPrinterRegistry me;
+ return me;
+ }
+
+private:
+ std::map<circle::BuiltinOperator, std::unique_ptr<OpPrinter>> _op_map;
+};
+
+} // namespace circledump
+
+#endif // __CIRCLEDUMP_OPPRINTER_H__
diff --git a/compiler/circledump/src/Read.cpp b/compiler/circledump/src/Read.cpp
new file mode 100644
index 000000000..053225536
--- /dev/null
+++ b/compiler/circledump/src/Read.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Read.h"
+
+#include <sstream>
+#include <string>
+
+namespace circleread
+{
+
+bool is_valid(const circle::OperatorCode *opcode)
+{
+ circle::BuiltinOperator code = opcode->builtin_code();
+ return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
+}
+
+bool is_custom(const circle::OperatorCode *opcode)
+{
+ circle::BuiltinOperator code = opcode->builtin_code();
+ return (code == circle::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const circle::OperatorCode *opcode)
+{
+ assert(opcode);
+
+ if (!is_valid(opcode))
+ {
+ std::ostringstream oss;
+ oss << "(invalid)";
+ return oss.str();
+ }
+
+ if (is_custom(opcode))
+ {
+ if (!opcode->custom_code())
+ return "(invalid custom)";
+
+ std::string custom_op = "CUSTOM(";
+ custom_op += opcode->custom_code()->c_str();
+ custom_op += ")";
+ return custom_op;
+ }
+
+ circle::BuiltinOperator code = opcode->builtin_code();
+ return circle::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const circle::Tensor *tensor)
+{
+ return circle::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const circle::Tensor *tensor)
+{
+ static const char *kEmptyTensorName = "(noname)";
+
+ auto name = tensor->name();
+ if (name)
+ return name->c_str();
+
+ return kEmptyTensorName;
+}
+
+Reader::Reader(const circle::Model *model)
+{
+ _version = model->version();
+ _subgraphs = model->subgraphs();
+ _buffers = model->buffers();
+
+ auto opcodes = model->operator_codes();
+ for (const ::circle::OperatorCode *opcode : *opcodes)
+ {
+ _op_codes.push_back(opcode);
+ }
+}
+
+size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data)
+{
+ *buff_data = nullptr;
+
+ if (buf_idx == 0)
+ return 0;
+
+ if (auto *buffer = (*_buffers)[buf_idx])
+ {
+ if (auto *array = buffer->data())
+ {
+ if (size_t size = array->size())
+ {
+ *buff_data = reinterpret_cast<const uint8_t *>(array->data());
+ return size;
+ }
+ }
+ }
+
+ return 0;
+}
+
+circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const
+{
+ uint32_t index = op->opcode_index();
+ assert(index < _op_codes.size());
+ const circle::OperatorCode *opcode = _op_codes.at(index);
+
+ return opcode->builtin_code();
+}
+
+std::string Reader::opcode_name(const circle::Operator *op) const
+{
+ uint32_t index = op->opcode_index();
+ assert(index < _op_codes.size());
+ const circle::OperatorCode *opcode = _op_codes.at(index);
+
+ if (!is_valid(opcode))
+ {
+ std::ostringstream oss;
+ oss << "(invalid: " << index << ")";
+ return oss.str();
+ }
+
+ return circleread::opcode_name(opcode);
+}
+
+bool Reader::select_subgraph(uint32_t sgindex)
+{
+ _subgraph_index = sgindex;
+ _tensors = nullptr;
+ _operators = nullptr;
+
+ _inputs.clear();
+ _outputs.clear();
+
+ if (_subgraphs->Length() <= sgindex)
+ {
+ assert(false);
+ return false;
+ }
+
+ const circle::SubGraph *subgraph = (*_subgraphs)[sgindex];
+
+ auto name = subgraph->name();
+ _subgraph_name = name ? name->c_str() : "(noname)";
+
+ _tensors = subgraph->tensors();
+ _operators = subgraph->operators();
+ _data_format = subgraph->data_format();
+
+ _inputs = as_index_vector(subgraph->inputs());
+ _outputs = as_index_vector(subgraph->outputs());
+
+ return true;
+}
+
+} // namespace circleread
diff --git a/compiler/circledump/src/Read.h b/compiler/circledump/src/Read.h
new file mode 100644
index 000000000..dd1ef20b6
--- /dev/null
+++ b/compiler/circledump/src/Read.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLEREAD_READ_H__
+#define __CIRCLEREAD_READ_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+namespace circleread
+{
+
+template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
+{
+ std::vector<T> ret(flat_array->Length());
+ for (uint32_t i = 0; i < flat_array->Length(); i++)
+ {
+ ret[i] = flat_array->Get(i);
+ }
+ return ret;
+}
+
+bool is_valid(const circle::OperatorCode *opcode);
+bool is_custom(const circle::OperatorCode *opcode);
+std::string opcode_name(const circle::OperatorCode *opcode);
+const char *tensor_type(const circle::Tensor *tensor);
+const char *tensor_name(const circle::Tensor *tensor);
+
+/**
+ * @brief Loads Circle file and provides helpers to access attributes
+ */
+class Reader
+{
+private:
+ using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>;
+ using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>;
+ using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
+ using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>;
+
+public:
+ Reader(const circle::Model *model);
+
+ Reader() = delete;
+
+public:
+ uint32_t version() const { return _version; }
+
+ const std::vector<const circle::OperatorCode *> &opcodes() { return _op_codes; }
+ const CircleBuffers_t *buffers() { return _buffers; }
+ const CircleTensors_t *tensors() { return _tensors; }
+ const CircleOperators_t *operators() { return _operators; }
+ const std::vector<int32_t> &inputs() const { return _inputs; }
+ const std::vector<int32_t> &outputs() const { return _outputs; }
+ const circle::DataFormat &data_format() const { return _data_format; }
+
+ uint32_t num_subgraph() const { return _subgraphs->Length(); }
+
+ size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
+ circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
+ std::string opcode_name(const circle::Operator *op) const;
+
+public:
+ bool select_subgraph(uint32_t subgraph);
+ const std::string &subgraph_name(void) const { return _subgraph_name; }
+ uint32_t subgraph_index(void) const { return _subgraph_index; }
+
+private:
+ uint32_t _version;
+
+ const CircleSubGraphs_t *_subgraphs{nullptr};
+ const CircleBuffers_t *_buffers{nullptr};
+ const CircleTensors_t *_tensors{nullptr};
+ const CircleOperators_t *_operators{nullptr};
+
+ uint32_t _subgraph_index;
+ std::string _subgraph_name;
+ std::vector<const circle::OperatorCode *> _op_codes;
+ std::vector<int32_t> _inputs;
+ std::vector<int32_t> _outputs;
+ circle::DataFormat _data_format;
+};
+
+} // namespace circleread
+
+#endif // __CIRCLEREAD_READ_H__
diff --git a/compiler/cli/CMakeLists.txt b/compiler/cli/CMakeLists.txt
new file mode 100644
index 000000000..22948fff9
--- /dev/null
+++ b/compiler/cli/CMakeLists.txt
@@ -0,0 +1,15 @@
+list(APPEND SOURCES "src/App.cpp")
+list(APPEND TESTS "src/App.test.cpp")
+
+add_library(cli ${SOURCES})
+target_include_directories(cli PUBLIC include)
+
+nnas_find_package(GTest QUIET)
+
+if(NOT GTest_FOUND)
+ return()
+endif(NOT GTest_FOUND)
+
+GTest_AddTEst(cli_test ${TESTS})
+target_link_libraries(cli_test cli)
+target_link_libraries(cli_test stdex)
diff --git a/compiler/cli/README.md b/compiler/cli/README.md
new file mode 100644
index 000000000..6095c73bf
--- /dev/null
+++ b/compiler/cli/README.md
@@ -0,0 +1,13 @@
+# cli
+
+`cli` is a CLI (Command Line Interface) application framework.
+
+# Background
+
+Many tools in `nncc` are command-line interface (CLI) applications. They generally need to handle command line parameters.
+`cli` was written to reduce code duplication across such applications.
+
+
+# How to use
+
+Please refer to `cli/src/App.test.cpp` for an example.
diff --git a/compiler/cli/include/cli/App.h b/compiler/cli/include/cli/App.h
new file mode 100644
index 000000000..61554e933
--- /dev/null
+++ b/compiler/cli/include/cli/App.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CLI_APP_H__
+#define __CLI_APP_H__
+
+#include "Command.h"
+
+#include <map>
+#include <memory>
+#include <string>
+
+namespace cli
+{
+
+class App
+{
+public:
+ explicit App(const std::string &name);
+
+public:
+ App &insert(const std::string &tag, std::unique_ptr<Command> &&command);
+
+public:
+ int run(int argc, const char *const *argv) const;
+
+private:
+ void usage(std::ostream &os) const;
+
+private:
+ const std::string _name;
+ std::map<std::string, std::unique_ptr<Command>> _commands;
+};
+
+} // namespace cli
+
+#endif // __APP_H__
diff --git a/compiler/cli/include/cli/Command.h b/compiler/cli/include/cli/Command.h
new file mode 100644
index 000000000..2e264f9ef
--- /dev/null
+++ b/compiler/cli/include/cli/Command.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CLI_COMMAND_H__
+#define __CLI_COMMAND_H__
+
+namespace cli
+{
+
+struct Command
+{
+ virtual ~Command() = default;
+
+ virtual int run(int argc, const char *const *argv) const = 0;
+};
+
+} // namespace cli
+
+#endif // __CLI_COMMAND_H__
diff --git a/compiler/cli/include/cli/FunctionCommand.h b/compiler/cli/include/cli/FunctionCommand.h
new file mode 100644
index 000000000..585653099
--- /dev/null
+++ b/compiler/cli/include/cli/FunctionCommand.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CLI_FUNCTION_COMMAND_H__
+#define __CLI_FUNCTION_COMMAND_H__
+
+#include <cli/Command.h>
+
+namespace cli
+{
+
+class FunctionCommand final : public Command
+{
+public:
+ // NOTE The use of pure funtion pointer here is intended to disallow variable capture
+ using Entry = int (*)(int argc, const char *const *argv);
+
+public:
+ FunctionCommand(const Entry &entry) : _entry{entry}
+ {
+ // DO NOTHING
+ }
+
+public:
+ int run(int argc, const char *const *argv) const override { return _entry(argc, argv); };
+
+private:
+ Entry const _entry;
+};
+
+} // namespace cli
+
+#endif // __CLI_FUNCTION_COMMAND_H__
diff --git a/compiler/cli/src/App.cpp b/compiler/cli/src/App.cpp
new file mode 100644
index 000000000..5052f682a
--- /dev/null
+++ b/compiler/cli/src/App.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cli/App.h"
+
+#include <iostream>
+#include <cassert>
+
+namespace cli
+{
+
+App::App(const std::string &name) : _name{name}
+{
+ // DO NOTHING
+}
+
+App &App::insert(const std::string &tag, std::unique_ptr<Command> &&command)
+{
+ assert(_commands.find(tag) == _commands.end());
+
+ _commands[tag] = std::move(command);
+
+ return (*this);
+}
+
+int App::run(int argc, const char *const *argv) const
+{
+ if (argc < 1)
+ {
+ std::cerr << "ERROR: COMMAND is not provided" << std::endl;
+ usage(std::cerr);
+ return 255;
+ }
+
+ const std::string command{argv[0]};
+
+ auto it = _commands.find(command);
+
+ if (it == _commands.end())
+ {
+ std::cerr << "ERROR: '" << command << "' is not a valid command" << std::endl;
+ usage(std::cerr);
+ return 255;
+ }
+
+ return it->second->run(argc - 1, argv + 1);
+}
+
+void App::usage(std::ostream &os) const
+{
+ os << std::endl;
+ os << "USAGE: " << _name << " [COMMAND] ..." << std::endl;
+ os << std::endl;
+ os << "SUPPORTED COMMANDS:" << std::endl;
+ for (auto it = _commands.begin(); it != _commands.end(); ++it)
+ {
+ os << " " << it->first << std::endl;
+ }
+}
+
+} // namespace cli
diff --git a/compiler/cli/src/App.test.cpp b/compiler/cli/src/App.test.cpp
new file mode 100644
index 000000000..fe2d44179
--- /dev/null
+++ b/compiler/cli/src/App.test.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cli/App.h"
+
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+class RecordCommand final : public cli::Command
+{
+public:
+ RecordCommand(int ret, std::string &out) : _ret{ret}, _out(out)
+ {
+ // DO NOTHING
+ }
+
+public:
+ int run(int argc, const char *const *argv) const override
+ {
+ _out += std::to_string(argc);
+
+ for (int n = 0; n < argc; ++n)
+ {
+ _out += ";";
+ _out += argv[n];
+ }
+
+ return _ret;
+ }
+
+private:
+ int const _ret;
+ std::string &_out;
+};
+
+TEST(APP, run)
+{
+ cli::App app("test");
+
+ std::string args;
+ app.insert("record", stdex::make_unique<RecordCommand>(3, args));
+
+ const char *argv[] = {"record", "hello", "world"};
+
+ int ret = app.run(3, argv);
+
+ ASSERT_EQ(ret, 3);
+ ASSERT_EQ(args, "2;hello;world");
+}
diff --git a/compiler/coco/CMakeLists.txt b/compiler/coco/CMakeLists.txt
new file mode 100644
index 000000000..4be53e8a8
--- /dev/null
+++ b/compiler/coco/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(core)
+add_subdirectory(generic)
diff --git a/compiler/coco/README.md b/compiler/coco/README.md
new file mode 100644
index 000000000..cfef8bafe
--- /dev/null
+++ b/compiler/coco/README.md
@@ -0,0 +1,3 @@
+# coco
+
+_coco_ is an experimental coarse-grained intermediate representation (IR) for NN compilers.
diff --git a/compiler/coco/core/CMakeLists.txt b/compiler/coco/core/CMakeLists.txt
new file mode 100644
index 000000000..8c6844733
--- /dev/null
+++ b/compiler/coco/core/CMakeLists.txt
@@ -0,0 +1,25 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(coco_core SHARED ${SOURCES})
+target_include_directories(coco_core PUBLIC include)
+# NOTE Some coco_core PUBLIC headers include angkor headers
+target_link_libraries(coco_core PUBLIC angkor)
+target_link_libraries(coco_core PRIVATE pepper_assert)
+target_link_libraries(coco_core PRIVATE stdex)
+# Let's apply nncc common compile options
+# NOTE This will enable strict compilation (warnings as error).
+# Please refer to top-level CMakeLists.txt for details
+target_link_libraries(coco_core PRIVATE nncc_common)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Google Test is required for internal testing
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(coco_core_test ${TESTS})
+target_link_libraries(coco_core_test coco_core)
+target_link_libraries(coco_core_test stdex)
diff --git a/compiler/coco/core/include/coco/ADT/DLinkedList.h b/compiler/coco/core/include/coco/ADT/DLinkedList.h
new file mode 100644
index 000000000..e3c275041
--- /dev/null
+++ b/compiler/coco/core/include/coco/ADT/DLinkedList.h
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_ADT_DLINKED_LIST_H__
+#define __COCO_ADT_DLINKED_LIST_H__
+
+#include <cassert>
+#include <type_traits>
+
+namespace coco
+{
+
+// **CAUTION** Child SHOULD inherit DLinkedList<Child, Parent>::Node
+template <typename Child, typename Parent> struct DLinkedList
+{
+ /// @brief A hook for Child-Join event
+ static void joined(Parent *, Child *);
+ /// @brief A hook for Child-Leave event
+ static void leaving(Parent *, Child *);
+
+ class Head
+ {
+ public:
+ Head(Parent *parent) : _parent{parent}
+ {
+ _head = nullptr;
+ _tail = nullptr;
+ }
+
+ public:
+ Head(const Head &) = delete;
+ Head(Head &&) = delete;
+
+ public:
+ Child *head(void) const { return _head; }
+ Child *tail(void) const { return _tail; }
+
+ public:
+ bool empty(void) const
+ {
+ if (_head == nullptr)
+ {
+ assert(_head == _tail);
+ return true;
+ }
+
+ assert(_head != nullptr);
+ assert(_tail != nullptr);
+ return false;
+ }
+
+ public:
+ void enlist(Child *child)
+ {
+ assert((child->prev() == nullptr) || (child->prev()->parent() == _parent));
+ assert((child->next() == nullptr) || (child->next()->parent() == _parent));
+
+ if (empty())
+ {
+ _head = child;
+ _tail = child;
+ }
+ else
+ {
+ if (child->next() == _head)
+ {
+ // _child is a new head
+ assert(child->prev() == nullptr);
+ _head = child;
+ }
+
+ if (child->prev() == _tail)
+ {
+ // _child is a new tail
+ assert(child->next() == nullptr);
+ _tail = child;
+ }
+ }
+
+ // Update parent-child relation
+ child->parent(_parent);
+
+ // Notify Child-Joining event
+ joined(_parent, child);
+ }
+
+ public:
+ void delist(Child *child)
+ {
+ assert(child->parent() == _parent);
+ assert(!empty());
+
+ // Notify Child-Leaving event
+ leaving(_parent, child);
+
+ if (child == _head)
+ {
+ _head = child->next();
+ }
+
+ if (child == _tail)
+ {
+ _tail = child->prev();
+ }
+
+ // Update parent-child relation
+ child->parent(nullptr);
+ }
+
+ public:
+ void prepend(Child *child)
+ {
+ if (empty())
+ {
+ enlist(child);
+ }
+ else
+ {
+ child->insertBefore(_head);
+ }
+ }
+
+ public:
+ void append(Child *child)
+ {
+ if (empty())
+ {
+ enlist(child);
+ }
+ else
+ {
+ child->insertAfter(_tail);
+ }
+ }
+
+ private:
+ Parent *const _parent;
+
+ private:
+ Child *_head;
+ Child *_tail;
+ };
+
+ // NOTE Client SHOULD implement this static method
+ static Head *head(Parent *);
+
+ class Node
+ {
+ public:
+ friend class Head;
+
+ public:
+ Node()
+ {
+ static_assert(std::is_base_of<Node, Child>::value,
+ "Type `Child` must be subclass of `Node`.");
+
+ _prev = nullptr;
+ _next = nullptr;
+ }
+
+ public:
+ virtual ~Node()
+ {
+ // Each Child should unlink itself on destruction
+ //
+ // NOTE detach invokes "leaving" hook which may access the internal of each Child,
+ // so it is not safe to invoke detach here
+ assert(parent() == nullptr);
+ }
+
+ public:
+ Parent *parent(void) const { return _parent; }
+
+ private:
+ Child *curr(void) { return reinterpret_cast<Child *>(this); }
+ const Child *curr(void) const { return reinterpret_cast<const Child *>(this); }
+
+ public:
+ Child *prev(void) const { return _prev; }
+ Child *next(void) const { return _next; }
+
+ public:
+ void insertBefore(Node *next)
+ {
+ assert(next != nullptr);
+ assert(next->parent() != nullptr);
+ assert(head(next->parent()) != nullptr);
+
+ assert(_prev == nullptr);
+ assert(_next == nullptr);
+
+ // Update the link of the current node
+ _prev = next->prev();
+ _next = next->curr();
+
+ if (auto prev = next->prev())
+ {
+ prev->_next = curr();
+ }
+ next->_prev = curr();
+
+ // Update parent-child relation
+ assert(parent() == nullptr);
+ head(next->parent())->enlist(curr());
+ assert(parent() == next->parent());
+ }
+
+ public:
+ void insertAfter(Node *prev)
+ {
+ assert(prev != nullptr);
+ assert(prev->parent() != nullptr);
+ assert(head(prev->parent()) != nullptr);
+
+ assert(_prev == nullptr);
+ assert(_next == nullptr);
+
+ // Update the link of the current node
+ _prev = prev->curr();
+ _next = prev->next();
+
+ // Update the link of the sibling nodes
+ if (auto next = prev->next())
+ {
+ next->_prev = curr();
+ }
+ prev->_next = curr();
+
+ // Update parent-child relation
+ assert(parent() == nullptr);
+ head(prev->parent())->enlist(curr());
+ assert(parent() == prev->parent());
+ };
+
+ public:
+ void detach(void)
+ {
+ // Update parent-child relation
+ assert(parent() != nullptr);
+ assert(head(parent()) != nullptr);
+ head(parent())->delist(curr());
+ assert(parent() == nullptr);
+
+ // Update the link of sibling nodes
+ if (prev())
+ {
+ prev()->_next = next();
+ }
+
+ if (next())
+ {
+ next()->_prev = prev();
+ }
+
+ // Update the link of the current node
+ _prev = nullptr;
+ _next = nullptr;
+ }
+
+ private:
+ // WARN Do NOT invoke this method outside Head::enlist
+ void parent(Parent *p) { _parent = p; }
+
+ private:
+ // WARN Do NOT modify this field inside Node.
+ Parent *_parent = nullptr;
+ Child *_prev;
+ Child *_next;
+ };
+};
+
+} // namespace coco
+
+#endif // __COCO_ADT_DLINKED_LIST_H__
diff --git a/compiler/coco/core/include/coco/ADT/PtrList.h b/compiler/coco/core/include/coco/ADT/PtrList.h
new file mode 100644
index 000000000..37fead728
--- /dev/null
+++ b/compiler/coco/core/include/coco/ADT/PtrList.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_ADT_PTR_LIST_H__
+#define __COCO_ADT_PTR_LIST_H__
+
+#include <vector>
+
+#include <cstdint>
+
+namespace coco
+{
+
+template <typename T> class PtrList
+{
+public:
+ PtrList() = default;
+
+public:
+ PtrList(const PtrList &) = delete;
+ PtrList(PtrList &&) = delete;
+
+public:
+ virtual ~PtrList() = default;
+
+public:
+ uint32_t size(void) const { return _ptrs.size(); }
+
+public:
+ T *at(uint32_t n) const { return _ptrs.at(n); }
+
+public:
+ void insert(T *ptr) { _ptrs.emplace_back(ptr); }
+
+private:
+ std::vector<T *> _ptrs;
+};
+
+} // namespace coco
+
+#endif // __COCO_ADT_PTR_LIST_H__
diff --git a/compiler/coco/core/include/coco/ADT/PtrManager.h b/compiler/coco/core/include/coco/ADT/PtrManager.h
new file mode 100644
index 000000000..2b254c70a
--- /dev/null
+++ b/compiler/coco/core/include/coco/ADT/PtrManager.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_ADT_PTR_MANAGER_H__
+#define __COCO_ADT_PTR_MANAGER_H__
+
+#include <vector>
+
+#include <memory>
+#include <stdexcept>
+
+namespace coco
+{
+
+template <typename T> class PtrManager
+{
+public:
+ /// @brief Return the number of managed objects
+ uint32_t size(void) const { return _ptrs.size(); }
+
+public:
+ T *at(uint32_t n) const { return _ptrs.at(n).get(); }
+
+protected:
+ template <typename U> U *take(std::unique_ptr<U> &&o)
+ {
+ auto res = o.get();
+ _ptrs.emplace_back(std::move(o));
+ return res;
+ }
+
+protected:
+ std::unique_ptr<T> release(T *ptr)
+ {
+ for (auto it = _ptrs.begin(); it != _ptrs.end(); ++it)
+ {
+ if (it->get() == ptr)
+ {
+ std::unique_ptr<T> res = std::move(*it);
+ _ptrs.erase(it);
+ return res;
+ }
+ }
+
+ throw std::invalid_argument{"ptr"};
+ }
+
+private:
+ std::vector<std::unique_ptr<T>> _ptrs;
+};
+
+} // namespace coco
+
+#endif // __COCO_ADT_PTR_MANAGER_H__
diff --git a/compiler/coco/core/include/coco/IR.h b/compiler/coco/core/include/coco/IR.h
new file mode 100644
index 000000000..aa7ad5727
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_H__
+#define __COCO_IR_H__
+
+#include "coco/IR/Bag.h"
+#include "coco/IR/Object.h"
+#include "coco/IR/FeatureLayouts.h"
+#include "coco/IR/KernelLayouts.h"
+
+#include "coco/IR/Op.h"
+#include "coco/IR/Instr.h"
+#include "coco/IR/Block.h"
+
+#include "coco/IR/Input.h"
+#include "coco/IR/Output.h"
+
+#include "coco/IR/Module.h"
+
+#endif // __COCO_IR_H__
diff --git a/compiler/coco/core/include/coco/IR/Arg.h b/compiler/coco/core/include/coco/IR/Arg.h
new file mode 100644
index 000000000..fc451a231
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Arg.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_ARG_H__
+#define __COCO_IR_ARG_H__
+
+#include "coco/IR/Bag.h"
+#include "coco/IR/ElemID.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/Layout.h>
+
+#include <string>
+#include <vector>
+
+namespace coco
+{
+
+/**
+ * @brief Base class for NN model arguments (Input/Output)
+ */
+class Arg
+{
+public:
+ explicit Arg(const nncc::core::ADT::tensor::Shape &shape);
+
+public:
+ virtual ~Arg() = default;
+
+public:
+ const nncc::core::ADT::tensor::Shape &shape(void) const { return _shape; }
+
+public:
+ const std::string &name(void) const { return _name; }
+ void name(const std::string &s) { _name = s; }
+
+protected:
+ virtual void onTake(Bag *) { return; }
+ virtual void onRelease(Bag *) { return; }
+
+public:
+ Bag *bag(void) const { return _bag; }
+ void bag(Bag *);
+
+public:
+ ElemID &at(const nncc::core::ADT::tensor::Index &);
+ const ElemID &at(const nncc::core::ADT::tensor::Index &) const;
+
+public:
+ void reorder(const nncc::core::ADT::tensor::Layout &l);
+ template <typename LayoutImpl> void reorder(void) { reorder(LayoutImpl{}); }
+
+private:
+ nncc::core::ADT::tensor::Shape const _shape;
+
+private:
+ std::string _name;
+
+private:
+ Bag *_bag;
+ std::vector<ElemID> _map;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_ARG_H__
diff --git a/compiler/coco/core/include/coco/IR/Bag.h b/compiler/coco/core/include/coco/IR/Bag.h
new file mode 100644
index 000000000..1c86899d7
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Bag.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_BAG_H__
+#define __COCO_IR_BAG_H__
+
+#include "coco/IR/Entity.h"
+#include "coco/IR/ObjectSet.h"
+#include "coco/IR/DepSet.h"
+#include "coco/IR/ReadSet.h"
+#include "coco/IR/UpdateSet.h"
+#include "coco/IR/Input.forward.h"
+#include "coco/IR/Output.forward.h"
+#include "coco/IR/Locatable.h"
+
+#include <set>
+
+#include <memory>
+
+namespace coco
+{
+
+/**
+ * @brief A collection of (abstracted) elements of the same type
+ *
+ * When there are N elements in a bag, we refer to N as the size of this bag, and every
+ * element in a bag has a unique numeric ID whose range is [0, N).
+ *
+ * NOTE 'Bag' is not a container (such as std::vector). 'Bag' just assures that there are
+ * N elements. It does not state about its value.
+ *
+ * NOTE coco IR treats Bag as virtual memory allocation
+ */
+class Bag final : public Entity
+{
+public:
+ struct Updater : public Locatable
+ {
+ virtual ~Updater() = default;
+ };
+
+ using UpdaterSet = std::set<Updater *>;
+
+ struct Reader : public Locatable
+ {
+ virtual ~Reader() = default;
+ };
+
+ using ReaderSet = std::set<Reader *>;
+
+public:
+ friend class Dep;
+ friend class Read;
+ friend class Update;
+ friend class Input;
+ friend class Output;
+
+public:
+ explicit Bag(uint32_t size);
+
+public:
+ ~Bag();
+
+public:
+ uint32_t size(void) const;
+
+public:
+ bool isInput(void) const;
+ bool isOutput(void) const;
+
+public:
+ /// @brief Return the set of Dep links that point to this bag
+ const DepSet *deps(void) const;
+ /// @brief Return the set of Read links that point to this bag
+ const ReadSet *reads(void) const;
+ /// @brief Return the set of Update links that point to this bag
+ const UpdateSet *updates(void) const;
+
+public:
+ /// @brief Return a valid pointer if this bag is marked as an input of the model
+ Input *input(void) const { return _input; }
+ /// @brief Return a valid pointer if this bag is marked as an output of the model
+ Output *output(void) const { return _output; }
+
+public:
+ /**
+ * @brief Replace all the occurence of a bag (except those in Input/Output) with another bag
+ *
+ * NOTE reaplceWith(b) works correctly only when b is neither Input nor Output
+ */
+ void replaceWith(Bag *b);
+
+ /**
+ * @brief Replace all the occurence of a bag in Object with another bag
+ *
+ * NOTE Unlike replaceWith(b), replaceAllDepsWith(b) has no restriction
+ */
+ void replaceAllDepsWith(Bag *);
+
+private:
+ // "mutable_" prefix is deliberately introduced below to avoid resolution issue.
+ //
+ // Let's assume that two "deps" are overloaded in Bag as follows:
+ // class Bag
+ // {
+ // private:
+ // DepSet *deps(void); <-- 1
+ // public:
+ // const DepSet *deps(void) const; <-- 2
+ // };
+ //
+ // C++ compiler tries to invoke method 1 unless a bag itself is const. Thus, any "deps" calls
+ // over non-const bags except those calls from friend classes will introduce build error.
+
+ // WARN Only Dep is allowed to access this method
+ DepSet *mutable_deps(void) { return &_deps; }
+ // WARN Only Read is allowed to access this method
+ ReadSet *mutable_reads(void) { return &_reads; }
+ // WARN Only Update is allowed to access this method
+ UpdateSet *mutable_updates(void) { return &_updates; }
+
+private:
+ // WARN Only Input is allowed to access this method
+ void input(Input *i) { _input = i; }
+ // WARN Only Output is allowed to access this method
+ void output(Output *o) { _output = o; }
+
+private:
+ uint32_t _size;
+
+ /** @brief Links to dependent Object(s) */
+ DepSet _deps;
+ /** @brief Direct reads (not through Object) */
+ ReadSet _reads;
+ /** @brief Direct updates (not through Object) */
+ UpdateSet _updates;
+
+ Input *_input = nullptr;
+ Output *_output = nullptr;
+};
+
+/// @brief Return a set of objects that depends on a given bag
+ObjectSet dependent_objects(const Bag *);
+/// @brief Return a set of readers that reads a given bag
+Bag::ReaderSet readers(const Bag *);
+/// @brief Return a set of updaters that updates a given bag
+Bag::UpdaterSet updaters(const Bag *);
+
+} // namespace coco
+
+#endif // __COCO_IR_BAG_H__
diff --git a/compiler/coco/core/include/coco/IR/BagManager.h b/compiler/coco/core/include/coco/IR/BagManager.h
new file mode 100644
index 000000000..6ba644101
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/BagManager.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_BAG_MANAGER_H__
+#define __COCO_IR_BAG_MANAGER_H__
+
+#include "coco/IR/Bag.h"
+#include "coco/IR/EntityBuilder.h"
+
+#include "coco/ADT/PtrManager.h"
+
+namespace coco
+{
+
+class BagManager final : public PtrManager<Bag>, public EntityBuilder
+{
+public:
+ BagManager(Module *m = nullptr) { module(m); }
+
+public:
+ Bag *create(uint32_t size);
+
+public:
+ /**
+ * @brief Destroy (= deallocate) a Bag entity
+ *
+ * NOTE A Bag SHOULD BE detached from IR before destruction
+ */
+ void destroy(Bag *b);
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_BAG_MANAGER_H__
diff --git a/compiler/coco/core/include/coco/IR/Block.forward.h b/compiler/coco/core/include/coco/IR/Block.forward.h
new file mode 100644
index 000000000..6d1793141
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Block.forward.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_BLOCK_FORWARD_H__
+#define __COCO_IR_BLOCK_FORWARD_H__
+
+namespace coco
+{
+
+class Block;
+
+} // namespace coco
+
+#endif // __COCO_IR_BLOCK_FORWARD_H__
diff --git a/compiler/coco/core/include/coco/IR/Block.h b/compiler/coco/core/include/coco/IR/Block.h
new file mode 100644
index 000000000..1bb3f47c7
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Block.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_BLOCK_H__
+#define __COCO_IR_BLOCK_H__
+
+#include "coco/IR/Module.forward.h"
+#include "coco/IR/Block.forward.h"
+#include "coco/IR/BlockIndex.h"
+#include "coco/IR/Instr.h"
+#include "coco/IR/Entity.h"
+
+#include "coco/ADT/DLinkedList.h"
+
+namespace coco
+{
+
+using BlockList = DLinkedList<Block, Module>::Head;
+
+/**
+ * @brief A unit of (grouped) instructions
+ *
+ * Block allows backend to manage a set of instructions as one unit, which is useful for H/W that
+ * has a restriction on code size
+ */
+class Block final : public DLinkedList<Block, Module>::Node, public Entity
+{
+public:
+ friend void DLinkedList<Block, Module>::joined(Module *, Block *);
+ friend void DLinkedList<Block, Module>::leaving(Module *, Block *);
+
+public:
+ Block() : _instr{this}
+ {
+ // DO NOTHING
+ }
+
+public:
+ Block(const Block &) = delete;
+ Block(Block &&) = delete;
+
+public:
+ ~Block()
+ {
+ if (parent())
+ {
+ detach();
+ }
+ }
+
+public:
+ InstrList *instr(void) { return &_instr; }
+ const InstrList *instr(void) const { return &_instr; }
+
+public:
+ const BlockIndex &index(void) const { return _index; }
+
+private:
+ BlockIndex _index;
+ DLinkedList<Instr, Block>::Head _instr;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_BLOCK_H__
diff --git a/compiler/coco/core/include/coco/IR/BlockIndex.h b/compiler/coco/core/include/coco/IR/BlockIndex.h
new file mode 100644
index 000000000..7deabf488
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/BlockIndex.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_BLOCK_INDEX_H__
+#define __COCO_IR_BLOCK_INDEX_H__
+
+#include <cstdint>
+
+namespace coco
+{
+
+/**
+ * @brief A BlockIndex denotes the index of a block in a block list
+ */
+class BlockIndex final
+{
+private:
+ static const uint32_t undefined = 0xffffffff;
+
+public:
+ BlockIndex() : _value{undefined}
+ {
+ // DO NOTHING
+ }
+
+public:
+ BlockIndex(uint32_t value) { set(value); }
+
+public:
+ bool valid(void) const { return _value != undefined; }
+
+public:
+ uint32_t value(void) const { return _value; }
+
+public:
+ void set(uint32_t value);
+ void reset(void) { _value = undefined; }
+
+private:
+ uint32_t _value;
+};
+
+static inline bool operator<(const BlockIndex &lhs, const BlockIndex &rhs)
+{
+ return lhs.value() < rhs.value();
+}
+
+} // namespace coco
+
+#endif // __COCO_IR_BLOCK_INDEX_H__
diff --git a/compiler/coco/core/include/coco/IR/BlockManager.h b/compiler/coco/core/include/coco/IR/BlockManager.h
new file mode 100644
index 000000000..f81f1f22b
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/BlockManager.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_BLOCK_MANAGER_H__
+#define __COCO_IR_BLOCK_MANAGER_H__
+
+#include "coco/IR/Block.h"
+#include "coco/IR/EntityBuilder.h"
+
+#include "coco/ADT/PtrManager.h"
+
+namespace coco
+{
+
+class BlockManager final : public PtrManager<Block>, public EntityBuilder
+{
+public:
+ BlockManager(Module *m = nullptr) { module(m); }
+
+public:
+ Block *create(void);
+
+public:
+ /**
+ * @brief Free 'Block' object
+ *
+ * NOTE Block SHOULD be detached from any list before it is destructed
+ */
+ void destroy(Block *);
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_BLOCK_MANAGER_H__
diff --git a/compiler/coco/core/include/coco/IR/Def.forward.h b/compiler/coco/core/include/coco/IR/Def.forward.h
new file mode 100644
index 000000000..93878c658
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Def.forward.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_DEF_FORWARD_H__
+#define __COCO_IR_DEF_FORWARD_H__
+
+namespace coco
+{
+
+class Def;
+
+} // namespace coco
+
+#endif // __COCO_IR_DEF_FORWARD_H__
diff --git a/compiler/coco/core/include/coco/IR/Def.h b/compiler/coco/core/include/coco/IR/Def.h
new file mode 100644
index 000000000..d9b1567e5
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Def.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_DEF_H__
+#define __COCO_IR_DEF_H__
+
+#include "coco/IR/Object.h"
+
+namespace coco
+{
+
+class Def final
+{
+public:
+ Def(Object::Producer *producer) : _producer{producer}
+ {
+ // DO NOTHING
+ }
+
+public:
+ ~Def() { value(nullptr); }
+
+public:
+ Object *value(void) const { return _value; }
+
+public:
+ void value(Object *value);
+
+public:
+ Object::Producer *producer(void) const { return _producer; }
+
+private:
+ Object *_value = nullptr;
+ Object::Producer *_producer = nullptr;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_DEF_H__
diff --git a/compiler/coco/core/include/coco/IR/Dep.forward.h b/compiler/coco/core/include/coco/IR/Dep.forward.h
new file mode 100644
index 000000000..596ee3126
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Dep.forward.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_DEP_FORWARD_H__
+#define __COCO_IR_DEP_FORWARD_H__
+
+namespace coco
+{
+
+class Dep;
+
+} // namespace coco
+
+#endif // __COCO_IR_DEP_FORWARD_H__
diff --git a/compiler/coco/core/include/coco/IR/Dep.h b/compiler/coco/core/include/coco/IR/Dep.h
new file mode 100644
index 000000000..645c3befe
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Dep.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_DEP_H__
+#define __COCO_IR_DEP_H__
+
+#include "coco/IR/Bag.h"
+#include "coco/IR/Object.forward.h"
+
+namespace coco
+{
+
+/**
+ * @brief A Dep represents the edge between a Bag and its dependent Object
+ *
+ * WARNING A Dep will update dependent Object set (stored BagInfo) only when
+ * users properly initialize object and link values.
+ */
+class Dep final
+{
+public:
+ Dep() = default;
+
+public:
+ Dep(const Dep &) = delete;
+ Dep(Dep &&) = delete;
+
+public:
+ ~Dep();
+
+public:
+ Bag *bag(void) const { return _bag; }
+ void bag(Bag *);
+
+public:
+ Object *object(void) const { return _object; }
+ void object(Object *object) { _object = object; }
+
+private:
+ Bag *_bag = nullptr;
+ Object *_object = nullptr;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_DEP_H__
diff --git a/compiler/coco/core/include/coco/IR/DepSet.h b/compiler/coco/core/include/coco/IR/DepSet.h
new file mode 100644
index 000000000..c4e2df979
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/DepSet.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_DEP_SET_H__
+#define __COCO_IR_DEP_SET_H__
+
+#include "coco/IR/Dep.forward.h"
+
+#include <set>
+
+namespace coco
+{
+
+using DepSet = std::set<Dep *>;
+
+} // namespace coco
+
+#endif // __COCO_IR_DEP_SET_H__
diff --git a/compiler/coco/core/include/coco/IR/ElemID.h b/compiler/coco/core/include/coco/IR/ElemID.h
new file mode 100644
index 000000000..7065d13eb
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/ElemID.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_ELEM_ID_H__
+#define __COCO_IR_ELEM_ID_H__
+
+#include <cstdint>
+
+namespace coco
+{
+
+class ElemID final
+{
+public:
+ ElemID() : _value{0xffffffff}
+ {
+ // DO NOTHING
+ }
+
+public:
+ explicit ElemID(uint32_t value) : _value{value}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t value(void) const { return _value; }
+
+private:
+ uint32_t _value;
+};
+
+bool operator==(const ElemID &lhs, const ElemID &rhs);
+bool operator<(const ElemID &lhs, const ElemID &rhs);
+
+} // namespace coco
+
+#endif // __COCO_IR_ELEM_ID_H__
diff --git a/compiler/coco/core/include/coco/IR/Entity.h b/compiler/coco/core/include/coco/IR/Entity.h
new file mode 100644
index 000000000..4bf9df651
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Entity.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_ENTITY_H__
+#define __COCO_IR_ENTITY_H__
+
+#include "coco/IR/Module.forward.h"
+
+namespace coco
+{
+
+/**
+ * @brief A base class for IR entities
+ *
+ * NOTE Each IR entity has a link to a module that it belongs to
+ */
+class Entity
+{
+public:
+ friend class EntityBuilder;
+
+public:
+ virtual ~Entity() = default;
+
+public:
+ Module *module(void) const { return _module; }
+
+private:
+ // WARN Only EntityBuilder is allowed to access this method
+ void module(Module *m) { _module = m; }
+
+private:
+ Module *_module = nullptr;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_ENTITY_H__
diff --git a/compiler/coco/core/include/coco/IR/EntityBuilder.h b/compiler/coco/core/include/coco/IR/EntityBuilder.h
new file mode 100644
index 000000000..161f3f294
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/EntityBuilder.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_ENTITY_BUILDER_H__
+#define __COCO_IR_ENTITY_BUILDER_H__
+
+#include "coco/IR/Entity.h"
+#include "coco/IR/Module.forward.h"
+
+namespace coco
+{
+
+/**
+ * @brief A base class for IR entity builders
+ *
+ * NOTE Only EntityBuilder is allowed to update module field of each Entity
+ */
+class EntityBuilder
+{
+public:
+ virtual ~EntityBuilder() = default;
+
+protected:
+ Module *module(void) const { return _module; }
+
+ void module(Module *m) { _module = m; }
+ void modulize(Entity *entity) const { entity->module(_module); }
+
+private:
+ Module *_module = nullptr;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_ENTITY_BUILDER_H__
diff --git a/compiler/coco/core/include/coco/IR/EntityManager.h b/compiler/coco/core/include/coco/IR/EntityManager.h
new file mode 100644
index 000000000..e76dec7aa
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/EntityManager.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_ENTITY_MANAGER_H__
+#define __COCO_IR_ENTITY_MANAGER_H__
+
+#include "coco/IR/BagManager.h"
+#include "coco/IR/ObjectManager.h"
+
+#include "coco/IR/OpManager.h"
+#include "coco/IR/InstrManager.h"
+#include "coco/IR/BlockManager.h"
+
+#include "coco/IR/InputManager.h"
+#include "coco/IR/OutputManager.h"
+
+namespace coco
+{
+
+/**
+ * @brief Meta (lifetime) manager interface
+ *
+ * EntityManager is referred as meta manager as it is a gateway to other
+ * managers.
+ */
+struct EntityManager
+{
+ virtual ~EntityManager() = default;
+
+ virtual BagManager *bag(void) = 0;
+ virtual const BagManager *bag(void) const = 0;
+
+ virtual ObjectManager *object(void) = 0;
+ virtual const ObjectManager *object(void) const = 0;
+
+ virtual OpManager *op(void) = 0;
+ virtual const OpManager *op(void) const = 0;
+
+ virtual InstrManager *instr(void) = 0;
+ virtual const InstrManager *instr(void) const = 0;
+
+ virtual BlockManager *block(void) = 0;
+ virtual const BlockManager *block(void) const = 0;
+
+ virtual InputManager *input(void) = 0;
+ virtual const InputManager *input(void) const = 0;
+
+ virtual OutputManager *output(void) = 0;
+ virtual const OutputManager *output(void) const = 0;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_ENTITY_MANAGER_H__
diff --git a/compiler/coco/core/include/coco/IR/FeatureLayout.h b/compiler/coco/core/include/coco/IR/FeatureLayout.h
new file mode 100644
index 000000000..63f02c8ba
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/FeatureLayout.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_FEATURE_LAYOUT_H__
+#define __COCO_IR_FEATURE_LAYOUT_H__
+
+#include "coco/IR/ElemID.h"
+#include "coco/IR/FeatureShape.h"
+
+namespace coco
+{
+
+/**
+ * @brief A FeatureLayout connects each feature index to a Bag element
+ *
+ * NOTE FeatureLayout is an immutable interface
+ */
+struct FeatureLayout
+{
+ struct ID
+ {
+ virtual ~ID() = default;
+ };
+
+ virtual ~FeatureLayout() = default;
+
+ virtual const ID *id(void) const = 0;
+
+ virtual const FeatureShape &shape(void) const = 0;
+
+ uint32_t batch(void) const { return shape().batch(); }
+ uint32_t depth(void) const { return shape().depth(); }
+ uint32_t height(void) const { return shape().height(); }
+ uint32_t width(void) const { return shape().width(); }
+
+ virtual ElemID at(uint32_t b, uint32_t ch, uint32_t row, uint32_t col) const = 0;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_FEATURE_LAYOUT_H__
diff --git a/compiler/coco/core/include/coco/IR/FeatureLayouts.h b/compiler/coco/core/include/coco/IR/FeatureLayouts.h
new file mode 100644
index 000000000..23b9c4919
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/FeatureLayouts.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_FEATURE_LAYOUTS_H__
+#define __COCO_IR_FEATURE_LAYOUTS_H__
+
+#include "coco/IR/FeatureLayout.h"
+
+#include <nncc/core/ADT/feature/Layout.h>
+
+#include <vector>
+#include <memory>
+
+namespace coco
+{
+namespace FeatureLayouts
+{
+
+/**
+ * @brief BCHW Feature Layout
+ */
+class BCHW final : public FeatureLayout
+{
+private:
+ BCHW(const FeatureShape &shape) : _shape{shape}
+ {
+ // DO NOTHING
+ }
+
+public:
+ static const FeatureLayout::ID *uid(void);
+ const FeatureLayout::ID *id(void) const override { return uid(); }
+
+ const FeatureShape &shape(void) const override { return _shape; }
+
+ ElemID at(uint32_t b, uint32_t ch, uint32_t row, uint32_t col) const override;
+
+private:
+ FeatureShape _shape;
+
+public:
+ static std::unique_ptr<BCHW> create(const nncc::core::ADT::feature::Shape &shape);
+};
+
+/**
+ * @brief BHWC Feature Layout
+ */
+class BHWC : public coco::FeatureLayout
+{
+private:
+ BHWC(const FeatureShape &shape) : _shape{shape}
+ {
+ // DO NOTHING
+ }
+
+public:
+ static const FeatureLayout::ID *uid(void);
+ const FeatureLayout::ID *id(void) const override { return uid(); }
+
+ const FeatureShape &shape(void) const override { return _shape; }
+
+ coco::ElemID at(uint32_t b, uint32_t ch, uint32_t row, uint32_t col) const override;
+
+private:
+ FeatureShape _shape;
+
+public:
+ static std::unique_ptr<BHWC> create(const nncc::core::ADT::feature::Shape &shape);
+ static std::unique_ptr<BHWC> create(const FeatureShape &shape);
+};
+
+/**
+ * @brief BC (Channel-wise Channel-major) Feature Layout
+ *
+ * 1. A layout is said to be channel-wise if the following holds:
+ *
+ * For each pair of valid feature index I and J,
+ * at(I) == at(J) if batch(I) == batch(J) and channel(I) == channel(J)
+ *
+ * 2. A layout is said to be channel-major if the followings hold:
+ *
+ * For each pair of valid feature index I and J,
+ * at(I) + 1 == at(J) if batch(I) == batch(J) and channel(I) + 1 == channel(J)
+ *
+ * For each pair of valid feature index I and J,
+ * at(I) + 1 == at(J) if batch(I) + 1 == batch(J), channel(I) == depth - 1, and channel(J) == 0
+ */
+class BC : public coco::FeatureLayout
+{
+private:
+ BC(const FeatureShape &shape) : _shape{shape}
+ {
+ // DO NOTHING
+ }
+
+public:
+ static const FeatureLayout::ID *uid(void);
+ const FeatureLayout::ID *id(void) const override { return uid(); }
+
+ const FeatureShape &shape(void) const override { return _shape; }
+
+ coco::ElemID at(uint32_t b, uint32_t ch, uint32_t row, uint32_t col) const override;
+
+private:
+ FeatureShape _shape;
+
+public:
+ static std::unique_ptr<BC> create(const nncc::core::ADT::feature::Shape &shape);
+};
+
+/**
+ * @brief Generic Feature Layout
+ */
+class Generic final : public FeatureLayout
+{
+private:
+ Generic(const FeatureShape &shape);
+
+public:
+ static const FeatureLayout::ID *uid(void);
+ const FeatureLayout::ID *id(void) const override { return uid(); }
+
+ const FeatureShape &shape(void) const override { return _shape; }
+
+ ElemID &at(uint32_t b, uint32_t ch, uint32_t row, uint32_t col);
+ ElemID at(uint32_t b, uint32_t ch, uint32_t row, uint32_t col) const override;
+
+ void reorder(const nncc::core::ADT::feature::Layout &l);
+
+private:
+ uint32_t offset(uint32_t b, uint32_t ch, uint32_t row, uint32_t col) const;
+
+private:
+ FeatureShape _shape;
+
+private:
+ std::vector<ElemID> _content;
+
+public:
+ static std::unique_ptr<Generic> create(const nncc::core::ADT::feature::Shape &shape);
+};
+
+} // namespace FeatureLayouts
+} // namespace coco
+
+#endif // __COCO_IR_FEATURE_LAYOUTS_H__
diff --git a/compiler/coco/core/include/coco/IR/FeatureObject.forward.h b/compiler/coco/core/include/coco/IR/FeatureObject.forward.h
new file mode 100644
index 000000000..41477e853
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/FeatureObject.forward.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_FEATURE_OBJECT_FORWARD_H__
+#define __COCO_IR_FEATURE_OBJECT_FORWARD_H__
+
+namespace coco
+{
+
+class FeatureObject;
+
+} // namespace coco
+
+#endif // __COCO_IR_FEATURE_OBJECT_FORWARD_H__
diff --git a/compiler/coco/core/include/coco/IR/FeatureObject.h b/compiler/coco/core/include/coco/IR/FeatureObject.h
new file mode 100644
index 000000000..f4244d9be
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/FeatureObject.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_FEATURE_OBJECT_H__
+#define __COCO_IR_FEATURE_OBJECT_H__
+
+#include "coco/IR/Object.h"
+#include "coco/IR/FeatureShape.h"
+#include "coco/IR/FeatureLayout.h"
+#include "coco/IR/ElemID.h"
+
+#include <nncc/core/ADT/feature/Layout.h>
+
+#include <vector>
+
+namespace coco
+{
+
+/**
+ * @brief FeatureMap values (used in CNN)
+ */
+class FeatureObject final : public Object
+{
+public:
+ FeatureObject() = default;
+
+public:
+ ~FeatureObject();
+
+public:
+ Object::Kind kind(void) const override { return Object::Kind::Feature; }
+
+public:
+ FeatureObject *asFeature(void) override { return this; }
+ const FeatureObject *asFeature(void) const override { return this; }
+
+public:
+ const FeatureShape &shape(void) const;
+
+public:
+ const FeatureLayout *layout(void) const { return _layout.get(); }
+ void layout(std::unique_ptr<FeatureLayout> &&l) { _layout = std::move(l); }
+
+private:
+ std::unique_ptr<FeatureLayout> _layout;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_FEATURE_OBJECT_H__
diff --git a/compiler/coco/core/include/coco/IR/FeatureShape.h b/compiler/coco/core/include/coco/IR/FeatureShape.h
new file mode 100644
index 000000000..015fc709d
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/FeatureShape.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_FEATURE_SHAPE_H__
+#define __COCO_IR_FEATURE_SHAPE_H__
+
+#include <nncc/core/ADT/feature/Shape.h>
+
+namespace coco
+{
+
+/**
+ * @brief The shape of a feature map
+ *
+ * TODO Implement coco's own FeatureShape without "nncc::core::ADT::feature::Shape"
+ */
+class FeatureShape : public nncc::core::ADT::feature::Shape
+{
+public:
+ FeatureShape(uint32_t depth, uint32_t height, uint32_t width)
+ : Shape{depth, height, width}, _batch{1}
+ {
+ // DO NOTHING
+ }
+
+ FeatureShape(uint32_t batch, uint32_t depth, uint32_t height, uint32_t width)
+ : Shape{depth, height, width}, _batch{batch}
+ {
+ // DO NOTHING
+ }
+
+ FeatureShape(const nncc::core::ADT::feature::Shape &shape) : Shape{shape}, _batch{1}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t batch(void) const { return _batch; }
+
+private:
+ uint32_t _batch;
+};
+
+static inline bool operator==(const FeatureShape &lhs, const FeatureShape &rhs)
+{
+ return (lhs.batch() == rhs.batch()) && (lhs.depth() == rhs.depth()) &&
+ (lhs.height() == rhs.height()) && (lhs.width() == rhs.width());
+}
+
+static inline bool operator!=(const FeatureShape &lhs, const FeatureShape &rhs)
+{
+ return !(lhs == rhs);
+}
+
+} // namespace coco
+
+#endif // __COCO_IR_FEATURE_SHAPE_H__
diff --git a/compiler/coco/core/include/coco/IR/Input.forward.h b/compiler/coco/core/include/coco/IR/Input.forward.h
new file mode 100644
index 000000000..4b529cddf
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Input.forward.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_INPUT_FORWARD_H__
+#define __COCO_IR_INPUT_FORWARD_H__
+
+namespace coco
+{
+
+class Input;
+
+} // namespace coco
+
+#endif // __COCO_IR_INPUT_FORWARD_H__
diff --git a/compiler/coco/core/include/coco/IR/Input.h b/compiler/coco/core/include/coco/IR/Input.h
new file mode 100644
index 000000000..ef8e88c9d
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Input.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_INPUT_H__
+#define __COCO_IR_INPUT_H__
+
+#include "coco/IR/Arg.h"
+#include "coco/IR/Entity.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Index.h>
+
+#include <string>
+#include <vector>
+
+namespace coco
+{
+
+class Input final : public Arg, public Entity
+{
+public:
+ Input(const nncc::core::ADT::tensor::Shape &shape);
+
+private:
+ void onTake(Bag *) override;
+ void onRelease(Bag *) override;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_INPUT_H__
diff --git a/compiler/coco/core/include/coco/IR/InputList.h b/compiler/coco/core/include/coco/IR/InputList.h
new file mode 100644
index 000000000..cd6337a5a
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/InputList.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_INPUT_LIST_H__
+#define __COCO_IR_INPUT_LIST_H__
+
+#include "coco/IR/Input.h"
+
+#include "coco/ADT/PtrList.h"
+
+namespace coco
+{
+
+using InputList = PtrList<Input>;
+
+} // namespace coco
+
+#endif // __COCO_IR_INPUT_LIST_H__
diff --git a/compiler/coco/core/include/coco/IR/InputManager.h b/compiler/coco/core/include/coco/IR/InputManager.h
new file mode 100644
index 000000000..bfbd712b5
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/InputManager.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_INPUT_MANAGER_H__
+#define __COCO_IR_INPUT_MANAGER_H__
+
+#include "coco/IR/Input.h"
+#include "coco/IR/EntityBuilder.h"
+
+#include "coco/ADT/PtrManager.h"
+
+namespace coco
+{
+
+class InputManager final : public PtrManager<Input>, public EntityBuilder
+{
+public:
+ InputManager(Module *m = nullptr) { module(m); }
+
+public:
+ Input *create(const nncc::core::ADT::tensor::Shape &);
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_INPUT_MANAGER_H__
diff --git a/compiler/coco/core/include/coco/IR/Instr.forward.h b/compiler/coco/core/include/coco/IR/Instr.forward.h
new file mode 100644
index 000000000..4043970db
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Instr.forward.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_INSTR_FORWARD_H__
+#define __COCO_IR_INSTR_FORWARD_H__
+
+namespace coco
+{
+
+// WARNING This header should be aligned with Instr.h
+class Instr;
+
+} // namespace coco
+
+#endif // __COCO_IR_INSTR_FORWARD_H__
diff --git a/compiler/coco/core/include/coco/IR/Instr.h b/compiler/coco/core/include/coco/IR/Instr.h
new file mode 100644
index 000000000..fc1cc332d
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Instr.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_INSTR_H__
+#define __COCO_IR_INSTR_H__
+
+#include "coco/IR/Bag.h"
+#include "coco/IR/Block.forward.h"
+#include "coco/IR/Instr.forward.h"
+#include "coco/IR/InstrIndex.h"
+#include "coco/IR/Entity.h"
+
+#include "coco/ADT/DLinkedList.h"
+
+#include <cassert>
+#include <stdexcept>
+
+namespace coco
+{
+
+#define INSTR(Name) class Name;
+#include "coco/IR/Instr.lst"
+#undef INSTR
+
+using InstrList = coco::DLinkedList<Instr, Block>::Head;
+
+/**
+ * @brief Base interface on explicit computation steps in coco IR
+ *
+ * NOTE Input/output is explicit in Instr, but implicit in Op
+ * NOTE Instr is may (not always) be a combination of multiple NN operations
+ *
+ * One may find a set of supported instructions from "Instrs.h"
+ *
+ * >> How to add a new base instruction in coco IR <<
+ *
+ * To introduce a new instruction (whose name is INS),
+ * 1. Append "INSTR(INS)" to "Instr.lst"
+ * 2. Declare class INS which inherits Instr class in "Instrs.h"
+ * NOTE This class SHOULD be default constructible
+ *
+ */
+class Instr : public coco::DLinkedList<Instr, Block>::Node, public Entity
+{
+public:
+ friend void DLinkedList<Instr, Block>::joined(Block *, Instr *);
+ friend void DLinkedList<Instr, Block>::leaving(Block *, Instr *);
+
+public:
+ Instr() = default;
+
+public:
+ Instr(const Instr &) = delete;
+ Instr(Instr &&) = delete;
+
+public:
+ virtual ~Instr()
+ {
+ if (parent())
+ {
+ // NOTE It is safe to invoke detach here (although "Instr" is not a final class)
+ // as "leaving" hook accesses only the internal of "Instr" class
+ detach();
+ }
+ }
+
+public:
+#define INSTR(Name) \
+ virtual Name *as##Name(void) { return nullptr; } \
+ virtual const Name *as##Name(void) const { return nullptr; }
+#include "coco/IR/Instr.lst"
+#undef INSTR
+
+public:
+ /**
+ * @brief Instr visitor interface
+ *
+ * WARN Use this interface only for coco-internal classes
+ * (to minimize changes upon Instr extension)
+ */
+ template <typename T> struct IVisitor
+ {
+ virtual ~IVisitor() = default;
+
+#define INSTR(Name) virtual T visit(const Name *) = 0;
+#include "coco/IR/Instr.lst"
+#undef INSTR
+ };
+
+ template <typename T> struct Visitor : public IVisitor<T>
+ {
+ virtual ~Visitor() = default;
+
+#define INSTR(Name) \
+ T visit(const Name *) override { throw std::runtime_error{"NYI"}; }
+#include "coco/IR/Instr.lst"
+#undef INSTR
+ };
+
+public:
+ template <typename T> T accept(IVisitor<T> *v) const
+ {
+#define INSTR(Name) \
+ if (auto ins = as##Name()) \
+ { \
+ return v->visit(ins); \
+ }
+#include "coco/IR/Instr.lst"
+#undef INSTR
+ throw std::runtime_error{"unreachable"};
+ }
+
+ template <typename T> T accept(IVisitor<T> &v) const { return accept(&v); }
+ template <typename T> T accept(IVisitor<T> &&v) const { return accept(&v); }
+
+public:
+ const InstrIndex &index(void) const { return _index; }
+
+private:
+ InstrIndex _index;
+};
+
+/**
+ * @brief Return true if a given instruction is of T type
+ *
+ * @note "ins" cannot be a null pointer
+ */
+template <typename T> bool isa(const Instr *ins)
+{
+ assert(ins != nullptr);
+ return dynamic_cast<const T *>(ins) != nullptr;
+}
+
+/**
+ * @brief Cast as a derived instruction
+ *
+ * @note "safe_cast<T>(ins)" returns a null pointer if "ins" is not of T type
+ * @note "safe_cast<T>(ins)" returns a null pointer if "ins" is a null pointer
+ */
+template <typename T> T *safe_cast(Instr *ins)
+{
+ // NOTE dynamic_cast<T *>(nullptr) returns nullptr
+ return dynamic_cast<T *>(ins);
+}
+
+} // namespace coco
+
+#endif // __COCO_IR_INSTR_H__
diff --git a/compiler/coco/core/include/coco/IR/Instr.lst b/compiler/coco/core/include/coco/IR/Instr.lst
new file mode 100644
index 000000000..f13a65bf2
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Instr.lst
@@ -0,0 +1,9 @@
+#ifndef INSTR
+#error Define INSTR first
+#endif // INSTR
+
+// INSTR(Name)
+
+INSTR(Eval)
+INSTR(Shuffle)
+INSTR(Copy)
diff --git a/compiler/coco/core/include/coco/IR/InstrIndex.h b/compiler/coco/core/include/coco/IR/InstrIndex.h
new file mode 100644
index 000000000..a61d97cad
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/InstrIndex.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_INSTR_INDEX_H__
+#define __COCO_IR_INSTR_INDEX_H__
+
+#include <cstdint>
+
+namespace coco
+{
+
+/**
+ * @brief A InstrIndex denotes the index of an instruction in an instruction list
+ */
+class InstrIndex final
+{
+private:
+ static const uint32_t undefined = 0xffffffff;
+
+public:
+ InstrIndex() : _value{undefined}
+ {
+ // DO NOTHING
+ }
+
+public:
+ InstrIndex(uint32_t value) { set(value); }
+
+public:
+ bool valid(void) const { return _value != undefined; }
+
+public:
+ uint32_t value(void) const { return _value; }
+
+public:
+ void set(uint32_t value);
+ void reset(void) { _value = undefined; }
+
+private:
+ uint32_t _value;
+};
+
+static inline bool operator<(const InstrIndex &lhs, const InstrIndex &rhs)
+{
+ return lhs.value() < rhs.value();
+}
+
+} // namespace coco
+
+#endif // __COCO_IR_INSTR_INDEX_H__
diff --git a/compiler/coco/core/include/coco/IR/InstrManager.h b/compiler/coco/core/include/coco/IR/InstrManager.h
new file mode 100644
index 000000000..537467ae2
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/InstrManager.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_INSTR_MANAGER_H__
+#define __COCO_IR_INSTR_MANAGER_H__
+
+#include "coco/IR/Instr.h"
+#include "coco/IR/Instrs.h"
+
+#include "coco/IR/Op.forward.h"
+
+#include "coco/IR/Bag.h"
+
+#include "coco/IR/Object.forward.h"
+
+#include "coco/IR/EntityBuilder.h"
+
+#include "coco/ADT/PtrManager.h"
+
+namespace coco
+{
+
+class InstrManager final : public PtrManager<Instr>, public EntityBuilder
+{
+public:
+ InstrManager(Module *m = nullptr) { module(m); }
+
+public:
+ template <typename Ins> Ins *create(void);
+
+public:
+ /**
+ * @brief Destroy (= deallocate) an Instr instance
+ *
+ * NOTE destroy(ins) WILL NOT update ins->parent(). An Instruction SHOULD BE detacted from a
+ * module before destroy call
+ */
+ void destroy(Instr *);
+};
+
+//
+// Every instruction class SHOULD be default constructible
+//
+template <typename Ins> Ins *InstrManager::create(void)
+{
+ auto ins = new Ins;
+ modulize(ins);
+ return take(std::unique_ptr<Ins>(ins));
+}
+
+} // namespace coco
+
+#endif // __COCO_IR_INSTR_MANAGER_H__
diff --git a/compiler/coco/core/include/coco/IR/Instrs.h b/compiler/coco/core/include/coco/IR/Instrs.h
new file mode 100644
index 000000000..9245443e9
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Instrs.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_INSTRS_H__
+#define __COCO_IR_INSTRS_H__
+
+#include "coco/IR/Instr.h"
+
+#include "coco/IR/ElemID.h"
+
+#include "coco/IR/Bag.h"
+#include "coco/IR/Object.h"
+
+#include "coco/IR/Def.h"
+#include "coco/IR/Use.h"
+#include "coco/IR/Read.h"
+#include "coco/IR/Update.h"
+
+#include "coco/IR/Step.h"
+
+#include <map>
+
+namespace coco
+{
+
+/**
+ * @brief Evaluate an Object from a given Op
+ */
+class Eval final : public Instr, public Object::Producer
+{
+public:
+ explicit Eval();
+
+public:
+ Eval *asEval(void) override { return this; }
+ const Eval *asEval(void) const override { return this; }
+
+public:
+ Instr *loc(void) override { return this; }
+
+public:
+ Object *out(void) const { return _out.value(); }
+ void out(Object *obj) { _out.value(obj); }
+
+public:
+ Op *op(void) const { return _step.op(); }
+ void op(Op *op) { _step.op(op); }
+
+private:
+ Def _out;
+ Step _step;
+};
+
+/**
+ * @brief Index-wise element transfer between two objects
+ *
+ * Given two objects "src" and "dst" of the same kind/shape, "copy(src, dst)"
+ * denotes index-wise element transfer.
+ *
+ * For example, the following pseudo-code describes "copy(src, dat)"
+ * when both src and dst are a feature map of the shape B x C x H x W:
+ *
+ * for each valid index b, ch, row, col:
+ * load the "src->at(b, ch, row, col)"-th element from bag(src)
+ * store it as the "dst->at(b, ch, row, col)"-th element of bag(dst)
+ *
+ * In principle, "copy" is unnecessary as it is always possible to rewrite "copy"
+ * as a "shuffle" below. However, "shuffle"-based optimization is too heavy as it
+ * requires much of iterations.
+ */
+class Copy final : public Instr, public Object::Producer, public Object::Consumer
+{
+public:
+ Copy() : _from{this}, _into{this}
+ {
+ // DO NOTHING
+ }
+
+public:
+ Copy *asCopy(void) override { return this; }
+ const Copy *asCopy(void) const override { return this; }
+
+public:
+ Instr *loc(void) override { return this; }
+
+public:
+ Object *from(void) const { return _from.value(); }
+ void from(Object *o) { _from.value(o); }
+
+public:
+ Object *into(void) const { return _into.value(); }
+ void into(Object *o) { _into.value(o); }
+
+private:
+ Use _from;
+ Def _into;
+};
+
+/**
+ * @brief Generic element transfer
+ */
+class Shuffle final : public Instr, public Bag::Reader, public Bag::Updater
+{
+public:
+ Shuffle() : _from{this}, _into{this}
+ {
+ // DO NOTHING
+ }
+
+public:
+ Shuffle *asShuffle(void) override { return this; }
+ const Shuffle *asShuffle(void) const override { return this; }
+
+public:
+ Instr *loc(void) override { return this; }
+
+public:
+ Bag *from(void) const { return _from.bag(); }
+ void from(Bag *bag);
+
+public:
+ Bag *into(void) const { return _into.bag(); }
+ void into(Bag *);
+
+public:
+ /**
+ * @brief Return the number of Element-wise transfers
+ *
+ * NOTE size() SHOULD BE identical to range().size()
+ */
+ uint32_t size(void) const;
+
+ /// @brief Return a set of elements in the destination bag that Shuffle will update
+ std::set<ElemID> range(void) const;
+
+public:
+ /// @brief Return true if a given elem is updated after execution
+ bool defined(const ElemID &dst) const { return _content.find(dst) != _content.end(); }
+
+public:
+ /**
+ * Let M be the return of at(N). This means that N-th element in the destination
+ * bag will be filled with the value of M-th element in the source bag.
+ *
+ * NOTE at(n) may be undefined on partial shuffle
+ */
+ const ElemID &at(const ElemID &dst) const { return _content.at(dst); }
+
+public:
+ void insert(const ElemID &from, const ElemID &into);
+
+private:
+ Read _from;
+ Update _into;
+
+private:
+ std::map<ElemID /* DST */, ElemID /* SRC */> _content;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_INSTRS_H__
diff --git a/compiler/coco/core/include/coco/IR/KernelLayout.h b/compiler/coco/core/include/coco/IR/KernelLayout.h
new file mode 100644
index 000000000..49aaf1a81
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/KernelLayout.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_KERNEL_LAYOUT_H__
+#define __COCO_IR_KERNEL_LAYOUT_H__
+
+#include "coco/IR/ElemID.h"
+
+#include <nncc/core/ADT/kernel/Shape.h>
+
+namespace coco
+{
+
+/**
+ * @brief A KernelLayout connectes each kernel index to an element (in a bag)
+ *
+ * NOTE KernelLayout is an immutable interface
+ */
+struct KernelLayout
+{
+ struct ID
+ {
+ virtual ~ID() = default;
+ };
+
+ virtual ~KernelLayout() = default;
+
+ /**
+ * @brief Return the identifier of each layout
+ *
+ * REQUIRED
+ *
+ * Given l1 and l2 of KernelLayout * type,
+ * typeid(*l1) == typeif(*l2) SHOULD hold if l1->id() == l2->id() holds.
+ */
+ virtual const ID *id(void) const = 0;
+
+ virtual const nncc::core::ADT::kernel::Shape &shape(void) const = 0;
+
+ virtual ElemID at(uint32_t n, uint32_t ch, uint32_t row, uint32_t col) const = 0;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_KERNEL_LAYOUT_H__
diff --git a/compiler/coco/core/include/coco/IR/KernelLayouts.h b/compiler/coco/core/include/coco/IR/KernelLayouts.h
new file mode 100644
index 000000000..0a04cf163
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/KernelLayouts.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_KERNEL_LAYOUTS_H__
+#define __COCO_IR_KERNEL_LAYOUTS_H__
+
+#include "coco/IR/KernelLayout.h"
+
+#include <nncc/core/ADT/kernel/Layout.h>
+
+#include <vector>
+#include <memory>
+
+namespace coco
+{
+namespace KernelLayouts
+{
+
+/**
+ * @brief NCHW Kernel Layout
+ */
+class NCHW final : public KernelLayout
+{
+private:
+ NCHW(const nncc::core::ADT::kernel::Shape &shape) : _shape{shape}
+ {
+ // DO NOTHING
+ }
+
+public:
+ static const KernelLayout::ID *uid(void);
+ const KernelLayout::ID *id(void) const override { return uid(); }
+
+ const nncc::core::ADT::kernel::Shape &shape(void) const override { return _shape; }
+
+ ElemID at(uint32_t n, uint32_t ch, uint32_t row, uint32_t col) const override;
+
+private:
+ nncc::core::ADT::kernel::Shape _shape;
+
+public:
+ static std::unique_ptr<NCHW> create(const nncc::core::ADT::kernel::Shape &shape);
+};
+
+/**
+ * @brief NHWC Kernel Layout
+ */
+class NHWC final : public KernelLayout
+{
+private:
+ NHWC(const nncc::core::ADT::kernel::Shape &shape) : _shape{shape}
+ {
+ // DO NOTHING
+ }
+
+public:
+ static const KernelLayout::ID *uid(void);
+ const KernelLayout::ID *id(void) const override { return uid(); }
+
+ const nncc::core::ADT::kernel::Shape &shape(void) const override { return _shape; }
+
+ ElemID at(uint32_t n, uint32_t ch, uint32_t row, uint32_t col) const override;
+
+private:
+ nncc::core::ADT::kernel::Shape _shape;
+
+public:
+ static std::unique_ptr<NHWC> create(const nncc::core::ADT::kernel::Shape &shape);
+};
+
+/**
+ * @brief Generic Kernel Layout
+ */
+class Generic final : public KernelLayout
+{
+private:
+ Generic(const nncc::core::ADT::kernel::Shape &shape);
+
+public:
+ static const KernelLayout::ID *uid(void);
+ const KernelLayout::ID *id(void) const override { return uid(); }
+
+ const nncc::core::ADT::kernel::Shape &shape(void) const override { return _shape; }
+
+ ElemID &at(uint32_t n, uint32_t ch, uint32_t row, uint32_t col);
+ ElemID at(uint32_t n, uint32_t ch, uint32_t row, uint32_t col) const override;
+
+ void reorder(const nncc::core::ADT::kernel::Layout &l);
+ template <typename LayoutImpl> void reorder(void) { reorder(LayoutImpl{}); }
+
+private:
+ nncc::core::ADT::kernel::Shape _shape;
+
+private:
+ std::vector<ElemID> _content;
+
+public:
+ static std::unique_ptr<Generic> create(const nncc::core::ADT::kernel::Shape &shape);
+};
+
+} // namespace KernelLayouts
+} // namespace coco
+
+#endif // __COCO_IR_KERNEL_LAYOUTS_H__
diff --git a/compiler/coco/core/include/coco/IR/KernelObject.forward.h b/compiler/coco/core/include/coco/IR/KernelObject.forward.h
new file mode 100644
index 000000000..10fbac4ca
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/KernelObject.forward.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_KERNEL_OBJECT_FORWARD_H__
+#define __COCO_IR_KERNEL_OBJECT_FORWARD_H__
+
+namespace coco
+{
+
+class KernelObject;
+
+} // namespace coco
+
+#endif // __COCO_IR_KERNEL_OBJECT_FORWARD_H__
diff --git a/compiler/coco/core/include/coco/IR/KernelObject.h b/compiler/coco/core/include/coco/IR/KernelObject.h
new file mode 100644
index 000000000..2ec0cee0b
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/KernelObject.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_KERNEL_OBJECT_H__
+#define __COCO_IR_KERNEL_OBJECT_H__
+
+#include "coco/IR/Object.h"
+#include "coco/IR/KernelLayout.h"
+#include "coco/IR/ElemID.h"
+
+#include <nncc/core/ADT/kernel/Shape.h>
+#include <nncc/core/ADT/kernel/Layout.h>
+
+namespace coco
+{
+
+/**
+ * @brief Convolution Kernel (in CNN) values
+ */
+class KernelObject final : public Object
+{
+public:
+ KernelObject() = default;
+ explicit KernelObject(const nncc::core::ADT::kernel::Shape &shape);
+
+public:
+ virtual ~KernelObject();
+
+public:
+ Object::Kind kind(void) const override { return Object::Kind::Kernel; }
+
+public:
+ KernelObject *asKernel(void) override { return this; }
+ const KernelObject *asKernel(void) const override { return this; }
+
+public:
+ const nncc::core::ADT::kernel::Shape &shape(void) const;
+
+public:
+ ElemID at(uint32_t n, uint32_t ch, uint32_t row, uint32_t col) const;
+
+public:
+ const KernelLayout *layout(void) const { return _layout.get(); }
+ void layout(std::unique_ptr<KernelLayout> &&l) { _layout = std::move(l); }
+
+private:
+ std::unique_ptr<KernelLayout> _layout;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_KERNEL_OBJECT_H__
diff --git a/compiler/coco/core/include/coco/IR/Locatable.h b/compiler/coco/core/include/coco/IR/Locatable.h
new file mode 100644
index 000000000..b80a4a360
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Locatable.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_LOCATABLE_H__
+#define __COCO_IR_LOCATABLE_H__
+
+#include "coco/IR/Instr.forward.h"
+
+namespace coco
+{
+
+/**
+ * @brief Return the associated instruction if exists.
+ */
+struct Locatable
+{
+ virtual ~Locatable() = default;
+
+ virtual Instr *loc(void) = 0;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_LOCATABLE_H__
diff --git a/compiler/coco/core/include/coco/IR/Module.forward.h b/compiler/coco/core/include/coco/IR/Module.forward.h
new file mode 100644
index 000000000..94f8cc7d2
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Module.forward.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_MODULE_FORWARD_H__
+#define __COCO_IR_MODULE_FORWARD_H__
+
+namespace coco
+{
+
+class Module;
+
+} // namespace coco
+
+#endif // __COCO_IR_MODULE_FORWARD_H__
diff --git a/compiler/coco/core/include/coco/IR/Module.h b/compiler/coco/core/include/coco/IR/Module.h
new file mode 100644
index 000000000..9eb0b248b
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Module.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_MODULE_H__
+#define __COCO_IR_MODULE_H__
+
+#include "coco/IR/EntityManager.h"
+#include "coco/IR/Block.h"
+#include "coco/IR/InputList.h"
+#include "coco/IR/OutputList.h"
+
+#include <memory>
+
+namespace coco
+{
+
+/**
+ * @brief Top-level element of coco IR which represents a neural network
+ */
+class Module
+{
+public:
+ Module() = default;
+
+public:
+ Module(const Module &) = delete;
+ Module(Module &&) = delete;
+
+public:
+ virtual ~Module() = default;
+
+public:
+ virtual EntityManager *entity(void) = 0;
+ virtual const EntityManager *entity(void) const = 0;
+
+public:
+ virtual BlockList *block(void) = 0;
+ virtual const BlockList *block(void) const = 0;
+
+public:
+ virtual InputList *input(void) = 0;
+ virtual const InputList *input(void) const = 0;
+
+public:
+ virtual OutputList *output(void) = 0;
+ virtual const OutputList *output(void) const = 0;
+
+public:
+ static std::unique_ptr<Module> create(void);
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_MODULE_H__
diff --git a/compiler/coco/core/include/coco/IR/Object.forward.h b/compiler/coco/core/include/coco/IR/Object.forward.h
new file mode 100644
index 000000000..d9a6c0422
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Object.forward.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_OBJECT_FORWARD_H__
+#define __COCO_IR_OBJECT_FORWARD_H__
+
+namespace coco
+{
+
+class Object;
+
+} // namespace coco
+
+#endif // __COCO_IR_OBJECT_FORWARD_H__
diff --git a/compiler/coco/core/include/coco/IR/Object.h b/compiler/coco/core/include/coco/IR/Object.h
new file mode 100644
index 000000000..617e8a198
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Object.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_OBJECT_H__
+#define __COCO_IR_OBJECT_H__
+
+#include "coco/IR/Entity.h"
+#include "coco/IR/Bag.h"
+#include "coco/IR/Dep.h"
+#include "coco/IR/Def.forward.h"
+#include "coco/IR/UseSet.h"
+
+#include "coco/IR/FeatureObject.forward.h"
+#include "coco/IR/KernelObject.forward.h"
+
+#include <set>
+
+namespace coco
+{
+
+/**
+ * @brief Base interface on all typed NN values
+ */
+class Object : public Entity
+{
+public:
+ friend class Def;
+ friend class Use;
+
+public:
+ enum class Kind
+ {
+ Unknown,
+ Feature,
+ Kernel,
+ };
+
+public:
+ struct Producer : public Bag::Updater
+ {
+ virtual ~Producer() = default;
+ };
+
+ struct Consumer : public Bag::Reader
+ {
+ virtual ~Consumer() = default;
+ };
+
+ using ConsumerSet = std::set<Consumer *>;
+
+public:
+ Object();
+
+public:
+ virtual ~Object() = default;
+
+public:
+ virtual Kind kind(void) const { return Kind::Unknown; }
+
+public:
+ coco::Bag *bag(void) const { return _dep.bag(); }
+ void bag(coco::Bag *bag) { _dep.bag(bag); }
+
+public:
+ virtual FeatureObject *asFeature(void) { return nullptr; }
+ virtual const FeatureObject *asFeature(void) const { return nullptr; }
+
+ virtual KernelObject *asKernel(void) { return nullptr; }
+ virtual const KernelObject *asKernel(void) const { return nullptr; }
+
+public:
+ Def *def(void) const;
+ const UseSet *uses(void) const;
+
+private:
+ /**
+ * @brief Update the link to a producer
+ *
+ * WARN Only Def class is allowed to access this method
+ */
+ void def(Def *d);
+
+ // NOTE "mutable_" prefix is introduced to avoid resolution issue similarly as in Bag
+ // WARN Only Use class is allowed to access this method
+ UseSet *mutable_uses(void);
+
+private:
+ Dep _dep;
+ Def *_def = nullptr;
+ UseSet _uses;
+};
+
+/**
+ * @brief Check whether a given object is of type T
+ *
+ * The example below shows how to use this "isa<T>" helper:
+ * auto obj = new FeatureObject{};
+ *
+ * if (isa<FeatureObject>())
+ * {
+ * std::cout << "FeatureObject" << std::endl;
+ * }
+ */
+template <typename T> bool isa(const Object *);
+
+/**
+ * @brief Cast a generic object as a specific one
+ *
+ * "cast<T>(o)" accepts only a valid object pointer "o" that "isa<T>(o)" holds
+ * - Then, "cast<T>(o)" always returns a valid object pointer.
+ */
+template <typename T> T *cast(Object *);
+
+/**
+ * @brief Cast a generic object as a specific one
+ *
+ * Unlike "cast<T>", "safe_cast<T>" accepts any object pointer
+ * - "safe_cast<T>(nullptr)" returns "nullptr"
+ * - "safe_cast<T>(o)" returns "nullptr" if "isa<T>(o)" does not hold
+ */
+template <typename T> T *safe_cast(Object *);
+
+/// @brief Return the producer of a given object if it exists
+Object::Producer *producer(const Object *);
+
+/// @brief Return a set of consumers of a given object.
+Object::ConsumerSet consumers(const Object *);
+
+} // namespace coco
+
+#endif // __COCO_IR_OBJECT_H__
diff --git a/compiler/coco/core/include/coco/IR/ObjectManager.h b/compiler/coco/core/include/coco/IR/ObjectManager.h
new file mode 100644
index 000000000..a05b724ce
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/ObjectManager.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_OBJECT_MANAGER_H__
+#define __COCO_IR_OBJECT_MANAGER_H__
+
+#include "coco/IR/Object.h"
+#include "coco/IR/FeatureShape.h"
+#include "coco/IR/FeatureObject.h"
+#include "coco/IR/KernelObject.forward.h"
+#include "coco/IR/EntityBuilder.h"
+
+#include "coco/ADT/PtrManager.h"
+
+#include <nncc/core/ADT/kernel/Shape.h>
+
+namespace coco
+{
+
+class ObjectManager final : public PtrManager<Object>, public EntityBuilder
+{
+public:
+ ObjectManager(Module *m = nullptr) { module(m); }
+
+public:
+ template <typename T> T *create(void);
+
+public:
+ /**
+ * @brief Destroy (= deallocate) an Object entity
+ *
+ * NOTE An Object SHOULD HAVE NO DEF & USES to be destructed
+ * NOTE An Object WILL BE unlinked from its dependent bag (if has) on destruction
+ */
+ void destroy(Object *o);
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_OBJECT_MANAGER_H__
diff --git a/compiler/coco/core/include/coco/IR/ObjectSet.h b/compiler/coco/core/include/coco/IR/ObjectSet.h
new file mode 100644
index 000000000..d97781996
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/ObjectSet.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_OBJECT_SET_H__
+#define __COCO_IR_OBJECT_SET_H__
+
+#include "coco/IR/Object.forward.h"
+
+#include <set>
+
+namespace coco
+{
+
+using ObjectSet = std::set<Object *>;
+
+} // namespace coco
+
+#endif // __COCO_IR_OBJECT_SET_H__
diff --git a/compiler/coco/core/include/coco/IR/Op.forward.h b/compiler/coco/core/include/coco/IR/Op.forward.h
new file mode 100644
index 000000000..9ba3c94e3
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Op.forward.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_OP_FORWARD_H__
+#define __COCO_IR_OP_FORWARD_H__
+
+namespace coco
+{
+
+struct Op;
+
+} // namespace coco
+
+#endif // __COCO_IR_OP_FORWARD_H__
diff --git a/compiler/coco/core/include/coco/IR/Op.h b/compiler/coco/core/include/coco/IR/Op.h
new file mode 100644
index 000000000..090527e2f
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Op.h
@@ -0,0 +1,255 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Op.h
+ * @brief This header file declares "Op" class and several traits related with "Op"
+ */
+#ifndef __COCO_IR_OP_H__
+#define __COCO_IR_OP_H__
+
+#include "coco/IR/Object.forward.h"
+#include "coco/IR/Instr.forward.h"
+#include "coco/IR/Step.forward.h"
+#include "coco/IR/Part.h"
+#include "coco/IR/Entity.h"
+
+#include <set>
+
+#include <stdexcept>
+
+namespace coco
+{
+
+#define OP(Name) class Name;
+#include "coco/IR/Op.lst"
+#undef OP
+
+/**
+ * @brief Base interface on all supported NN operations
+ */
+struct Op : public Entity
+{
+ friend class Step;
+ friend class Part;
+
+ virtual ~Op();
+
+ /**
+ * @brief Return the number of arguments (# of child Ops)
+ */
+ virtual uint32_t arity(void) const = 0;
+
+ /**
+ * @brief Return N-th argument
+ *
+ * @note The behavior of arg(n) is defined only when n < artiy()
+ */
+ virtual Op *arg(uint32_t n) const = 0;
+
+ /**
+ * @brief Return a set of object(s) used during execution
+ *
+ * NOTE There is no 'def' method as Op is not allowed to define a new object
+ */
+ virtual std::set<Object *> uses(void) const = 0;
+
+#define OP(Name) \
+ virtual Name *as##Name(void) { return nullptr; } \
+ virtual const Name *as##Name(void) const { return nullptr; }
+#include "coco/IR/Op.lst"
+#undef OP
+
+ /**
+ * @brief Op visitor interface
+ *
+ * WARN Use this interface only for coco-internal classes
+ * (to minimize changes upon Op extension)
+ */
+ template <typename T> struct IVisitor
+ {
+ virtual ~IVisitor() = default;
+
+#define OP(Name) virtual T visit(const Name *) = 0;
+#include "coco/IR/Op.lst"
+#undef OP
+ };
+
+ template <typename T> struct Visitor : public IVisitor<T>
+ {
+ virtual ~Visitor() = default;
+
+#define OP(Name) \
+ T visit(const Name *) override { throw std::runtime_error{"NYI"}; }
+#include "coco/IR/Op.lst"
+#undef OP
+ };
+
+ template <typename T> T accept(IVisitor<T> *v) const
+ {
+#define OP(Name) \
+ if (auto op = as##Name()) \
+ { \
+ return v->visit(op); \
+ }
+#include "coco/IR/Op.lst"
+#undef OP
+ throw std::runtime_error{"unreachable"};
+ }
+
+ template <typename T> T accept(IVisitor<T> &v) const { return accept(&v); }
+ template <typename T> T accept(IVisitor<T> &&v) const { return accept(&v); }
+
+public:
+ /**
+ * @brief Op mutator interface
+ *
+ * WARN Use this interface only for coco-internal classes
+ * (to minimize changes upon Instr extension)
+ */
+ struct IMutator
+ {
+ virtual ~IMutator() = default;
+
+#define OP(Name) virtual void mutate(Name *) = 0;
+#include "coco/IR/Op.lst"
+#undef OP
+ };
+
+ struct Mutator : public IMutator
+ {
+ virtual ~Mutator() = default;
+
+#define OP(Name) \
+ void mutate(Name *) override { throw std::runtime_error{"NYI"}; }
+#include "coco/IR/Op.lst"
+#undef OP
+ };
+
+ void accept(IMutator *m)
+ {
+#define OP(Name) \
+ if (auto op = as##Name()) \
+ { \
+ return m->mutate(op); \
+ }
+#include "coco/IR/Op.lst"
+#undef OP
+ throw std::runtime_error{"unreachable"};
+ }
+
+ void accept(IMutator &m) { return accept(&m); }
+ void accept(IMutator &&m) { return accept(&m); }
+
+public:
+ Instr *parent(void) const;
+
+ /// @brief Return a pointer to the parent Op
+ Op *up(void) const;
+
+private:
+ /**
+ * @brief A link to Instr from Op
+ *
+ * WARN Update this field only through Step
+ */
+ Step *_step = nullptr;
+
+ /**
+ * @brief A link to a parent Op
+ *
+ * WARN Update this field only through Part
+ * NOTE An "Op" CANNOT have a link to a parent Op if it is linked to an "Instr"
+ */
+ Part *_part = nullptr;
+};
+
+/**
+ * @brief Op with a single argument
+ */
+class UnaryOp : public Op
+{
+public:
+ explicit UnaryOp();
+
+public:
+ UnaryOp(const UnaryOp &) = delete;
+ UnaryOp(UnaryOp &&) = delete;
+
+public:
+ virtual ~UnaryOp() = default;
+
+public:
+ uint32_t arity(void) const final;
+ Op *arg(uint32_t n) const final;
+
+ std::set<Object *> uses(void) const final;
+
+public:
+ Op *arg(void) const { return _arg.child(); }
+ void arg(Op *arg) { _arg.child(arg); }
+
+private:
+ /// @brief Link to Op's argument
+ Part _arg;
+};
+
+/**
+ * @brief Op with two arguments
+ */
+class BinaryOp : public Op
+{
+public:
+ explicit BinaryOp();
+
+public:
+ BinaryOp(const BinaryOp &) = delete;
+ BinaryOp(BinaryOp &&) = delete;
+
+public:
+ virtual ~BinaryOp() = default;
+
+public:
+ uint32_t arity(void) const final;
+ Op *arg(uint32_t n) const final;
+
+ std::set<Object *> uses(void) const final;
+
+public:
+ Op *left(void) const { return _left.child(); }
+ void left(Op *op) { _left.child(op); }
+
+public:
+ Op *right(void) const { return _right.child(); }
+ void right(Op *op) { _right.child(op); }
+
+private:
+ /// @brief Left-hand side (LHS) argument
+ Part _left;
+ /// @brief Right-hand side (RHS) argument
+ Part _right;
+};
+
+/**
+ * @brief Return the root Op from a given Op node
+ *
+ * @note root(op) == op holds for a root op
+ */
+Op *root(Op *);
+
+} // namespace coco
+
+#endif // __COCO_IR_OP_H__
diff --git a/compiler/coco/core/include/coco/IR/Op.lst b/compiler/coco/core/include/coco/IR/Op.lst
new file mode 100644
index 000000000..a3028bde2
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Op.lst
@@ -0,0 +1,19 @@
+#ifndef OP
+#error OP should be defined before including this file
+#endif // OP
+
+// OP(Name)
+
+OP(Load)
+OP(Conv2D)
+OP(MaxPool2D)
+OP(AvgPool2D)
+OP(PadF)
+OP(ReLU)
+OP(ReLU6)
+OP(Add)
+OP(Sqrt)
+OP(Sub)
+OP(Mul)
+OP(Div)
+OP(ConcatF)
diff --git a/compiler/coco/core/include/coco/IR/OpManager.h b/compiler/coco/core/include/coco/IR/OpManager.h
new file mode 100644
index 000000000..2c88867de
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/OpManager.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_OP_MANAGER_H__
+#define __COCO_IR_OP_MANAGER_H__
+
+#include "coco/IR/Op.h"
+#include "coco/IR/Ops.h"
+
+#include "coco/IR/Instr.forward.h"
+
+#include "coco/IR/Object.forward.h"
+
+#include "coco/IR/EntityBuilder.h"
+
+#include "coco/ADT/PtrManager.h"
+
+namespace coco
+{
+
+class OpManager final : public PtrManager<Op>, public EntityBuilder
+{
+public:
+ OpManager(Module *m = nullptr) { module(m); }
+
+public:
+ ~OpManager();
+
+public:
+ template <typename T> T *create(void);
+
+public:
+ /**
+ * @brief Destroy (= deallocate) a Op instance
+ *
+ * NOTE destroy(op) WILL NOT update op->parent(). Client SHOULD detach op before destroy(op) call
+ */
+ void destroy(Op *);
+
+ /**
+ * @brief Destroy a Op tree
+ *
+ * @require op->parent() == nullptr && op->up() == nullptr
+ */
+ void destroy_all(Op *);
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_OP_MANAGER_H__
diff --git a/compiler/coco/core/include/coco/IR/Ops.h b/compiler/coco/core/include/coco/IR/Ops.h
new file mode 100644
index 000000000..01ac92b7f
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Ops.h
@@ -0,0 +1,412 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_OPS_H__
+#define __COCO_IR_OPS_H__
+
+#include "coco/IR/Op.h"
+#include "coco/IR/Object.h"
+#include "coco/IR/KernelObject.h"
+
+#include "coco/IR/Use.h"
+#include "coco/IR/Part.h"
+
+#include "coco/IR/Padding2D.h"
+#include "coco/IR/Stride2D.h"
+#include "coco/IR/Window2D.h"
+
+namespace coco
+{
+
+/**
+ * @brief Load an Object
+ */
+class Load final : public Op, public Object::Consumer
+{
+public:
+ explicit Load();
+
+public:
+ Load(const Load &) = delete;
+ Load(Load &&) = delete;
+
+public:
+ uint32_t arity(void) const final;
+ Op *arg(uint32_t n) const final;
+
+ std::set<Object *> uses(void) const override;
+
+public:
+ Load *asLoad(void) override { return this; }
+ const Load *asLoad(void) const override { return this; }
+
+public:
+ Instr *loc(void) override { return parent(); }
+
+public:
+ void object(Object *o) { _obj.value(o); }
+ Object *object(void) const { return _obj.value(); }
+
+private:
+ Use _obj;
+};
+
+/**
+ * @brief 2D Convolution over 3D Feature Map with 4D kernel
+ *
+ * NOTE IFM and OFM are implicit. Only 4D kernel is explicit in this class
+ * TODO Decide source code layout policy and extract this class if necessary
+ */
+class Conv2D : public Op, public Object::Consumer
+{
+public:
+ explicit Conv2D();
+
+public:
+ uint32_t arity(void) const final;
+ Op *arg(uint32_t n) const final;
+
+ std::set<Object *> uses(void) const override;
+
+public:
+ Conv2D *asConv2D(void) override { return this; }
+ const Conv2D *asConv2D(void) const override { return this; }
+
+public:
+ Instr *loc(void) override { return parent(); }
+
+private:
+ Use _ker;
+
+public:
+ Op *arg(void) const { return _arg.child(); }
+ void arg(Op *arg) { _arg.child(arg); }
+
+public:
+ KernelObject *ker(void) const;
+ void ker(KernelObject *ker);
+
+public:
+ /**
+ * @brief Divide an input and kernel (= convolution filter) into G independent groups
+ *
+ * Given an input of shape(Ic, Ih, Iw), a kernel of shape(Kn, Kc, Kh, Kw), and group G,
+ * Conv2D is identical to G independent convolutions over G inputs of shape(Ic / G, Ih, Iw)
+ * and a kernel of shape(Kn / G, Kc, Kh, Kw) followed by concatenation.
+ *
+ * REQUIRED
+ * - "Ic" SHOULD BE a multiple of "G"
+ * - "Kc" SHOULD BE identical to "Ic /G"
+ *
+ * NOTE Depthwise convolution is a special case of group convolution where Ic == G.
+ */
+ uint32_t group(void) const { return _group; }
+ void group(uint32_t g) { _group = g; }
+
+public:
+ Padding2D *pad(void) { return &_pad; }
+ const Padding2D *pad(void) const { return &_pad; }
+
+public:
+ Stride2D *stride(void) { return &_stride; }
+ const Stride2D *stride(void) const { return &_stride; }
+
+private:
+ uint32_t _group = 1;
+
+ Padding2D _pad;
+ Stride2D _stride;
+
+private:
+ /// @brief Link to an argument of Conv2D operation (= IFM)
+ Part _arg;
+};
+
+/**
+ * @brief 2D Max Pooling
+ */
+class MaxPool2D final : public UnaryOp
+{
+public:
+ explicit MaxPool2D() = default;
+
+public:
+ MaxPool2D(const MaxPool2D &) = delete;
+ MaxPool2D(MaxPool2D &&) = delete;
+
+public:
+ MaxPool2D *asMaxPool2D(void) override { return this; }
+ const MaxPool2D *asMaxPool2D(void) const override { return this; }
+
+public:
+ Window2D *window(void) { return &_window; }
+ const Window2D *window(void) const { return &_window; }
+
+public:
+ Stride2D *stride(void) { return &_stride; }
+ const Stride2D *stride(void) const { return &_stride; }
+
+public:
+ Padding2D *pad(void) { return &_pad; }
+ const Padding2D *pad(void) const { return &_pad; }
+
+private:
+ Window2D _window;
+ Stride2D _stride;
+ Padding2D _pad;
+};
+
+/**
+ * @brief 2D Average Pooling
+ */
+class AvgPool2D final : public UnaryOp
+{
+public:
+ enum class Divisor
+ {
+ Unknown,
+ // Use the number of elements in each receptive field as a divisor
+ Static,
+ // Use the number of valid (non-padding) elements in each receptive field as a divisor
+ PaddingExcluded
+ };
+
+public:
+ explicit AvgPool2D() = default;
+
+public:
+ AvgPool2D(const AvgPool2D &) = delete;
+ AvgPool2D(AvgPool2D &&) = delete;
+
+public:
+ AvgPool2D *asAvgPool2D(void) override { return this; }
+ const AvgPool2D *asAvgPool2D(void) const override { return this; }
+
+public:
+ Divisor divisor(void) const { return _divisor; }
+ void divisor(const Divisor &divisor) { _divisor = divisor; }
+
+public:
+ Window2D *window(void) { return &_window; }
+ const Window2D *window(void) const { return &_window; }
+
+public:
+ Padding2D *pad(void) { return &_pad; }
+ const Padding2D *pad(void) const { return &_pad; }
+
+public:
+ Stride2D *stride(void) { return &_stride; }
+ const Stride2D *stride(void) const { return &_stride; }
+
+private:
+ Divisor _divisor = Divisor::Unknown;
+
+ Window2D _window;
+ Stride2D _stride;
+ Padding2D _pad;
+};
+
+/**
+ * @brief Introduce padding area
+ */
+class PadF final : public UnaryOp
+{
+public:
+ explicit PadF() = default;
+
+public:
+ PadF(const PadF &) = delete;
+ PadF(PadF &&) = delete;
+
+public:
+ PadF *asPadF(void) override { return this; }
+ const PadF *asPadF(void) const override { return this; }
+
+public:
+ Padding2D *pad(void) { return &_pad; }
+ const Padding2D *pad(void) const { return &_pad; }
+
+private:
+ Padding2D _pad;
+};
+
+/**
+ * @brief Apply ReLU over elements
+ */
+class ReLU final : public UnaryOp
+{
+public:
+ explicit ReLU() = default;
+
+public:
+ ReLU(const ReLU &) = delete;
+ ReLU(ReLU &&) = delete;
+
+public:
+ ReLU *asReLU(void) override { return this; }
+ const ReLU *asReLU(void) const override { return this; }
+};
+
+/**
+ * @brief Apply ReLU6 over elements
+ * @note ReLU6 is subject to change
+ */
+class ReLU6 final : public UnaryOp
+{
+public:
+ explicit ReLU6() = default;
+
+public:
+ ReLU6(const ReLU6 &) = delete;
+ ReLU6(ReLU6 &&) = delete;
+
+public:
+ ReLU6 *asReLU6(void) override { return this; }
+ const ReLU6 *asReLU6(void) const override { return this; }
+};
+
+/**
+ * @brief Element-wise addition
+ *
+ * Add(L, R) is valid only when L and R have identical kind/shape/dtype
+ */
+class Add final : public BinaryOp
+{
+public:
+ explicit Add() = default;
+
+public:
+ Add(const Add &) = delete;
+ Add(Add &&) = delete;
+
+public:
+ Add *asAdd(void) override { return this; }
+ const Add *asAdd(void) const override { return this; }
+};
+
+/**
+ * @brief Element-wise subtraction
+ *
+ * Sub(L, R) is valid only when L and R have identical kind/shape/dtype
+ */
+class Sub final : public BinaryOp
+{
+public:
+ explicit Sub() = default;
+
+public:
+ Sub(const Sub &) = delete;
+ Sub(Sub &&) = delete;
+
+public:
+ Sub *asSub(void) override { return this; }
+ const Sub *asSub(void) const override { return this; }
+};
+
+/**
+ * @brief Element-wise multiplication
+ *
+ * Mul(L, R) is valid only when L and R have identical kind/shape/dtype
+ */
+class Mul final : public BinaryOp
+{
+public:
+ explicit Mul() = default;
+
+public:
+ Mul(const Mul &) = delete;
+ Mul(Mul &&) = delete;
+
+public:
+ Mul *asMul(void) override { return this; }
+ const Mul *asMul(void) const override { return this; }
+};
+
+/**
+ * @brief Element-wise division
+ *
+ * Div(L, R) is valid only when L and R have identical kind/shape/dtype
+ */
+class Div final : public BinaryOp
+{
+public:
+ explicit Div() = default;
+
+public:
+ Div(const Div &) = delete;
+ Div(Div &&) = delete;
+
+public:
+ Div *asDiv(void) override { return this; }
+ const Div *asDiv(void) const override { return this; }
+};
+
+/**
+ * @brief Concatenate two feature maps
+ *
+ * ConcatF(L, R) requires
+ */
+class ConcatF final : public BinaryOp
+{
+public:
+ enum class Axis
+ {
+ Unknown = 0,
+ Batch = 1,
+ Depth = 2,
+ Height = 3,
+ Width = 4,
+ };
+
+public:
+ explicit ConcatF() = default;
+
+public:
+ ConcatF(const ConcatF &) = delete;
+ ConcatF(ConcatF &&) = delete;
+
+public:
+ ConcatF *asConcatF(void) override { return this; }
+ const ConcatF *asConcatF(void) const override { return this; }
+
+public:
+ const Axis &axis(void) const { return _axis; }
+ void axis(const Axis &axis) { _axis = axis; }
+
+private:
+ Axis _axis = Axis::Unknown;
+};
+
+/**
+ * @brief Apply Sqrt over elements
+ */
+class Sqrt final : public UnaryOp
+{
+public:
+ explicit Sqrt() = default;
+
+public:
+ Sqrt(const Sqrt &) = delete;
+ Sqrt(Sqrt &&) = delete;
+
+public:
+ Sqrt *asSqrt(void) override { return this; }
+ const Sqrt *asSqrt(void) const override { return this; }
+};
+
+} // namesapce coco
+
+#endif // __COCO_IR_OPS_H__
diff --git a/compiler/coco/core/include/coco/IR/Output.forward.h b/compiler/coco/core/include/coco/IR/Output.forward.h
new file mode 100644
index 000000000..f011400c0
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Output.forward.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_OUTPUT_FORWARD_H__
+#define __COCO_IR_OUTPUT_FORWARD_H__
+
+namespace coco
+{
+
+class Output;
+
+} // namespace coco
+
+#endif // __COCO_IR_OUTPUT_FORWARD_H__
diff --git a/compiler/coco/core/include/coco/IR/Output.h b/compiler/coco/core/include/coco/IR/Output.h
new file mode 100644
index 000000000..3f77c131d
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Output.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_OUTPUT_H__
+#define __COCO_IR_OUTPUT_H__
+
+#include "coco/IR/Arg.h"
+#include "coco/IR/Entity.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Index.h>
+
+#include <string>
+#include <vector>
+
+namespace coco
+{
+
+class Output final : public Arg, public Entity
+{
+public:
+ Output(const nncc::core::ADT::tensor::Shape &shape);
+
+private:
+ void onTake(Bag *) override;
+ void onRelease(Bag *) override;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_OUTPUT_H__
diff --git a/compiler/coco/core/include/coco/IR/OutputList.h b/compiler/coco/core/include/coco/IR/OutputList.h
new file mode 100644
index 000000000..0e2abad75
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/OutputList.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_OUTPUT_LIST_H__
+#define __COCO_IR_OUTPUT_LIST_H__
+
+#include "coco/IR/Output.h"
+
+#include "coco/ADT/PtrList.h"
+
+namespace coco
+{
+
+using OutputList = PtrList<Output>;
+
+} // namespace coco
+
+#endif // __COCO_IR_OUTPUT_LIST_H__
diff --git a/compiler/coco/core/include/coco/IR/OutputManager.h b/compiler/coco/core/include/coco/IR/OutputManager.h
new file mode 100644
index 000000000..b40380388
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/OutputManager.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_OUTPUT_MANAGER_H__
+#define __COCO_IR_OUTPUT_MANAGER_H__
+
+#include "coco/IR/Output.h"
+#include "coco/IR/EntityBuilder.h"
+
+#include "coco/ADT/PtrManager.h"
+
+namespace coco
+{
+
+class OutputManager final : public PtrManager<Output>, public EntityBuilder
+{
+public:
+ OutputManager(Module *m = nullptr) { module(m); }
+
+public:
+ Output *create(const nncc::core::ADT::tensor::Shape &);
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_OUTPUT_MANAGER_H__
diff --git a/compiler/coco/core/include/coco/IR/Padding2D.h b/compiler/coco/core/include/coco/IR/Padding2D.h
new file mode 100644
index 000000000..b764656cc
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Padding2D.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_PADDING_2D_H__
+#define __COCO_IR_PADDING_2D_H__
+
+#include <cstdint>
+
+namespace coco
+{
+
+class Padding2D
+{
+public:
+ Padding2D() : _top{0}, _bottom{0}, _left{0}, _right{0}
+ {
+ // DO NOTHING
+ }
+
+public:
+ Padding2D(uint32_t top, uint32_t bottom, uint32_t left, uint32_t right)
+ : _top{top}, _bottom{bottom}, _left{left}, _right{right}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t top(void) const { return _top; }
+ Padding2D &top(uint32_t value);
+
+public:
+ uint32_t bottom(void) const { return _bottom; }
+ Padding2D &bottom(uint32_t value);
+
+public:
+ uint32_t left(void) const { return _left; }
+ Padding2D &left(uint32_t value);
+
+public:
+ uint32_t right(void) const { return _right; }
+ Padding2D &right(uint32_t value);
+
+private:
+ uint32_t _top;
+ uint32_t _bottom;
+ uint32_t _left;
+ uint32_t _right;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_PADDING_2D_H__
diff --git a/compiler/coco/core/include/coco/IR/Part.forward.h b/compiler/coco/core/include/coco/IR/Part.forward.h
new file mode 100644
index 000000000..642ea56b5
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Part.forward.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_PART_FORWARD_H__
+#define __COCO_IR_PART_FORWARD_H__
+
+namespace coco
+{
+
+class Part;
+
+} // namespace coco
+
+#endif // __COCO_IR_PART_FORWARD_H__
diff --git a/compiler/coco/core/include/coco/IR/Part.h b/compiler/coco/core/include/coco/IR/Part.h
new file mode 100644
index 000000000..72af217cc
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Part.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_PART_H__
+#define __COCO_IR_PART_H__
+
+#include "coco/IR/Op.forward.h"
+
+namespace coco
+{
+
+/**
+ * @brief A Part represents the edge between a child Op and its parent Op
+ */
+class Part final
+{
+public:
+ Part(Op *parent) : _parent{parent}
+ {
+ // DO NOTHING
+ }
+
+public:
+ ~Part() { child(nullptr); }
+
+public:
+ Op *child(void) const { return _child; }
+ void child(Op *c);
+
+public:
+ Op *parent(void) const { return _parent; }
+
+private:
+ Op *_parent = nullptr;
+ Op *_child = nullptr;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_PART_H__
diff --git a/compiler/coco/core/include/coco/IR/Read.forward.h b/compiler/coco/core/include/coco/IR/Read.forward.h
new file mode 100644
index 000000000..7fd99e212
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Read.forward.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_READ_FORWARD_H__
+#define __COCO_IR_READ_FORWARD_H__
+
+namespace coco
+{
+
+class Read;
+
+} // namespace coco
+
+#endif // __COCO_IR_READ_FORWARD_H__
diff --git a/compiler/coco/core/include/coco/IR/Read.h b/compiler/coco/core/include/coco/IR/Read.h
new file mode 100644
index 000000000..9f62d8bf8
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Read.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_READ_H__
+#define __COCO_IR_READ_H__
+
+#include "coco/IR/Bag.h"
+
+namespace coco
+{
+
+/**
+ * @brief A Read represents an edge between a Bag and its Reader
+ */
+class Read final
+{
+public:
+ Read(Bag::Reader *r)
+ {
+ // Initialize link and reader
+ reader(r);
+ }
+
+public:
+ ~Read();
+
+public:
+ Bag *bag(void) const { return _bag; }
+ void bag(Bag *bag);
+
+public:
+ Bag::Reader *reader(void) const { return _reader; }
+ void reader(Bag::Reader *r) { _reader = r; }
+
+private:
+ Bag *_bag = nullptr;
+ Bag::Reader *_reader = nullptr;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_READ_H__
diff --git a/compiler/coco/core/include/coco/IR/ReadSet.h b/compiler/coco/core/include/coco/IR/ReadSet.h
new file mode 100644
index 000000000..c470c4bfd
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/ReadSet.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_READ_SET_H__
+#define __COCO_IR_READ_SET_H__
+
+#include "coco/IR/Read.forward.h"
+
+#include <set>
+
+namespace coco
+{
+
+using ReadSet = std::set<Read *>;
+
+} // namespace coco
+
+#endif // __COCO_IR_READ_SET_H__
diff --git a/compiler/coco/core/include/coco/IR/Step.forward.h b/compiler/coco/core/include/coco/IR/Step.forward.h
new file mode 100644
index 000000000..635069122
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Step.forward.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_STEP_FORWARD_H__
+#define __COCO_IR_STEP_FORWARD_H__
+
+namespace coco
+{
+
+class Step;
+
+} // namespace coco
+
+#endif // __COCO_IR_STEP_FORWARD_H__
diff --git a/compiler/coco/core/include/coco/IR/Step.h b/compiler/coco/core/include/coco/IR/Step.h
new file mode 100644
index 000000000..31dad4389
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Step.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_STEP_H__
+#define __COCO_IR_STEP_H__
+
+#include "coco/IR/Op.forward.h"
+#include "coco/IR/Instr.forward.h"
+
+namespace coco
+{
+
+/**
+ * @brief A Step denotes the edge between Op and Instr
+ */
+class Step final
+{
+public:
+ explicit Step(Instr *instr) : _instr{instr}
+ {
+ // DO NOTHING
+ }
+
+public:
+ ~Step() { op(nullptr); }
+
+public:
+ Op *op(void) const { return _op; }
+ void op(Op *o);
+
+public:
+ Instr *instr(void) const { return _instr; }
+
+private:
+ Op *_op = nullptr;
+ Instr *_instr = nullptr;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_STEP_H__
diff --git a/compiler/coco/core/include/coco/IR/Stride2D.h b/compiler/coco/core/include/coco/IR/Stride2D.h
new file mode 100644
index 000000000..9e69ffa40
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Stride2D.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_STRIDE_2D_H__
+#define __COCO_IR_STRIDE_2D_H__
+
+#include <cstdint>
+
+namespace coco
+{
+
+class Stride2D
+{
+public:
+ Stride2D() : _vertical{1}, _horizontal{1}
+ {
+ // DO NOTHING
+ }
+
+public:
+ Stride2D(uint32_t vertical, uint32_t horizontal) : _vertical{vertical}, _horizontal{horizontal}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t vertical(void) const { return _vertical; }
+ Stride2D &vertical(uint32_t value);
+
+public:
+ uint32_t horizontal(void) const { return _horizontal; }
+ Stride2D &horizontal(uint32_t value);
+
+private:
+ uint32_t _vertical;
+ uint32_t _horizontal;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_STRIDE_2D_H__
diff --git a/compiler/coco/core/include/coco/IR/Update.forward.h b/compiler/coco/core/include/coco/IR/Update.forward.h
new file mode 100644
index 000000000..059f318c9
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Update.forward.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_UPDATE_FORWARD_H__
+#define __COCO_IR_UPDATE_FORWARD_H__
+
+namespace coco
+{
+
+class Update;
+
+} // namespace coco
+
+#endif // __COCO_IR_UPDATE_FORWARD_H__
diff --git a/compiler/coco/core/include/coco/IR/Update.h b/compiler/coco/core/include/coco/IR/Update.h
new file mode 100644
index 000000000..7cf876d74
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Update.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_UPDATE_H__
+#define __COCO_IR_UPDATE_H__
+
+#include "coco/IR/Bag.h"
+
+namespace coco
+{
+
+/**
+ * @brief A Update represents an edge between a Bag and its Updater
+ */
+class Update final
+{
+public:
+ Update(Bag::Updater *u) { updater(u); }
+
+public:
+ ~Update();
+
+public:
+ Bag *bag(void) const { return _bag; }
+ void bag(Bag *bag);
+
+public:
+ Bag::Updater *updater(void) const { return _updater; }
+ void updater(Bag::Updater *u) { _updater = u; }
+
+private:
+ Bag *_bag = nullptr;
+ Bag::Updater *_updater = nullptr;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_UPDATE_H__
diff --git a/compiler/coco/core/include/coco/IR/UpdateSet.h b/compiler/coco/core/include/coco/IR/UpdateSet.h
new file mode 100644
index 000000000..1e772adf3
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/UpdateSet.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_UPDATE_SET_H__
+#define __COCO_IR_UPDATE_SET_H__
+
+#include "coco/IR/Update.forward.h"
+
+#include <set>
+
+namespace coco
+{
+
+using UpdateSet = std::set<Update *>;
+
+} // namespace coco
+
+#endif // __COCO_IR_UPDATE_SET_H__
diff --git a/compiler/coco/core/include/coco/IR/Use.forward.h b/compiler/coco/core/include/coco/IR/Use.forward.h
new file mode 100644
index 000000000..329430bb3
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Use.forward.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_USE_FORWARD_H__
+#define __COCO_IR_USE_FORWARD_H__
+
+namespace coco
+{
+
+class Use;
+
+} // namespace coco
+
+#endif // __COCO_IR_USE_FORWARD_H__
diff --git a/compiler/coco/core/include/coco/IR/Use.h b/compiler/coco/core/include/coco/IR/Use.h
new file mode 100644
index 000000000..c4c9b98b4
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Use.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_USE_H__
+#define __COCO_IR_USE_H__
+
+#include "coco/IR/Object.h"
+
+namespace coco
+{
+
+class Use final
+{
+public:
+ Use(Object::Consumer *use) : _value{nullptr}, _consumer{use}
+ {
+ // DO NOTHING
+ }
+
+public:
+ ~Use() { value(nullptr); }
+
+public:
+ Object *value(void) const { return _value; }
+
+public:
+ void value(Object *value);
+
+public:
+ Object::Consumer *consumer(void) const { return _consumer; }
+
+private:
+ Object *_value;
+ Object::Consumer *_consumer = nullptr;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_USE_H__
diff --git a/compiler/coco/core/include/coco/IR/UseSet.h b/compiler/coco/core/include/coco/IR/UseSet.h
new file mode 100644
index 000000000..a698a733f
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/UseSet.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_USE_SET_H__
+#define __COCO_IR_USE_SET_H__
+
+#include "coco/IR/Use.forward.h"
+
+#include <set>
+
+namespace coco
+{
+
+using UseSet = std::set<Use *>;
+
+} // namespace coco
+
+#endif // __COCO_IR_USE_SET_H__
diff --git a/compiler/coco/core/include/coco/IR/Window2D.h b/compiler/coco/core/include/coco/IR/Window2D.h
new file mode 100644
index 000000000..a434538f3
--- /dev/null
+++ b/compiler/coco/core/include/coco/IR/Window2D.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_WINDOW_2D_H__
+#define __COCO_IR_WINDOW_2D_H__
+
+#include <cstdint>
+
+namespace coco
+{
+
+class Window2D
+{
+public:
+ Window2D() : _vertical{1}, _horizontal{1}
+ {
+ // DO NOTHING
+ }
+
+public:
+ Window2D(uint32_t vertical, uint32_t horizontal) : _vertical{vertical}, _horizontal{horizontal}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t height(void) const { return _vertical; }
+ void height(uint32_t value) { _vertical = value; }
+
+public:
+ uint32_t width(void) const { return _horizontal; }
+ void width(uint32_t value) { _horizontal = value; }
+
+private:
+ // TODO Rename these fields as _height and _width, respectively
+ uint32_t _vertical;
+ uint32_t _horizontal;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_WINDOW_2D_H__
diff --git a/compiler/coco/core/src/ADT/DLinkedList.test.cpp b/compiler/coco/core/src/ADT/DLinkedList.test.cpp
new file mode 100644
index 000000000..563a39653
--- /dev/null
+++ b/compiler/coco/core/src/ADT/DLinkedList.test.cpp
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/ADT/DLinkedList.h"
+
+#include <set>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class Parent;
+class Child;
+
+using ChildList = coco::DLinkedList<Child, Parent>::Head;
+
+class Parent
+{
+public:
+ friend void coco::DLinkedList<Child, Parent>::joined(Parent *, Child *);
+ friend void coco::DLinkedList<Child, Parent>::leaving(Parent *, Child *);
+
+public:
+ Parent() : _list{this}, _count{0}
+ {
+ // DO NOTHING
+ }
+
+public:
+ ChildList *children(void) { return &_list; }
+ uint32_t count(void) const { return _count; }
+
+private:
+ ChildList _list;
+ uint32_t _count;
+};
+
+class Child final : public coco::DLinkedList<Child, Parent>::Node
+{
+public:
+ ~Child()
+ {
+ if (parent())
+ {
+ detach();
+ }
+ }
+};
+
+} // namespace
+
+namespace coco
+{
+
+template <> void DLinkedList<Child, Parent>::joined(Parent *p, Child *) { p->_count += 1; }
+template <> void DLinkedList<Child, Parent>::leaving(Parent *p, Child *) { p->_count -= 1; }
+
+template <> ChildList *DLinkedList<Child, Parent>::head(Parent *p) { return p->children(); }
+
+} // namespace coco
+
+namespace
+{
+class DLinkedListTest : public ::testing::Test
+{
+public:
+ virtual ~DLinkedListTest()
+ {
+ // NOTE Child SHOULD BE freed before parent
+ for (auto child : _children)
+ {
+ delete child;
+ }
+
+ for (auto parent : _parents)
+ {
+ delete parent;
+ }
+ }
+
+protected:
+ template <typename T> T *create(void);
+
+ void destroy(Child *);
+
+private:
+ std::set<::Parent *> _parents;
+ std::set<::Child *> _children;
+};
+
+template <>::Parent *DLinkedListTest::create(void)
+{
+ auto parent = new ::Parent;
+ _parents.insert(parent);
+ return parent;
+}
+
+template <>::Child *DLinkedListTest::create(void)
+{
+ auto child = new ::Child;
+ _children.insert(child);
+ return child;
+}
+
+void DLinkedListTest::destroy(Child *child)
+{
+ _children.erase(child);
+ delete child;
+}
+
+} // namespace
+
+TEST_F(DLinkedListTest, append)
+{
+ auto parent = create<::Parent>();
+ auto child = create<::Child>();
+
+ parent->children()->append(child);
+
+ ASSERT_EQ(child->parent(), parent);
+ ASSERT_EQ(child->prev(), nullptr);
+ ASSERT_EQ(child->next(), nullptr);
+
+ ASSERT_EQ(parent->children()->head(), child);
+ ASSERT_EQ(parent->children()->tail(), child);
+ ASSERT_EQ(parent->count(), 1);
+}
+
+TEST_F(DLinkedListTest, insert_two_elements)
+{
+ auto parent = create<::Parent>();
+
+ ASSERT_EQ(parent->children()->head(), nullptr);
+ ASSERT_EQ(parent->children()->tail(), nullptr);
+
+ auto child_1 = create<::Child>();
+
+ ASSERT_EQ(child_1->parent(), nullptr);
+ ASSERT_EQ(child_1->prev(), nullptr);
+ ASSERT_EQ(child_1->next(), nullptr);
+
+ parent->children()->append(child_1);
+
+ ASSERT_EQ(child_1->parent(), parent);
+ ASSERT_EQ(child_1->prev(), nullptr);
+ ASSERT_EQ(child_1->next(), nullptr);
+
+ ASSERT_EQ(parent->children()->head(), child_1);
+ ASSERT_EQ(parent->children()->tail(), child_1);
+
+ auto child_2 = create<::Child>();
+
+ ASSERT_EQ(child_2->parent(), nullptr);
+ ASSERT_EQ(child_2->prev(), nullptr);
+ ASSERT_EQ(child_2->next(), nullptr);
+
+ child_2->insertAfter(child_1);
+
+ ASSERT_EQ(child_2->parent(), parent);
+ ASSERT_EQ(child_2->prev(), child_1);
+ ASSERT_EQ(child_2->next(), nullptr);
+
+ ASSERT_EQ(child_1->parent(), parent);
+ ASSERT_EQ(child_1->prev(), nullptr);
+ ASSERT_EQ(child_1->next(), child_2);
+
+ ASSERT_EQ(parent->children()->head(), child_1);
+ ASSERT_EQ(parent->children()->tail(), child_2);
+}
+
+TEST_F(DLinkedListTest, insertBefore)
+{
+ auto parent = create<::Parent>();
+
+ auto child_1 = create<::Child>();
+ auto child_2 = create<::Child>();
+
+ parent->children()->append(child_1);
+ child_2->insertBefore(child_1);
+
+ ASSERT_EQ(child_2->parent(), parent);
+ ASSERT_EQ(child_2->prev(), nullptr);
+ ASSERT_EQ(child_2->next(), child_1);
+
+ ASSERT_EQ(child_1->parent(), parent);
+ ASSERT_EQ(child_1->prev(), child_2);
+ ASSERT_EQ(child_1->next(), nullptr);
+
+ ASSERT_EQ(parent->children()->head(), child_2);
+ ASSERT_EQ(parent->children()->tail(), child_1);
+}
+
+TEST_F(DLinkedListTest, prepend_after_append)
+{
+ auto parent = create<::Parent>();
+
+ auto child_1 = create<::Child>();
+ auto child_2 = create<::Child>();
+
+ parent->children()->append(child_1);
+ parent->children()->prepend(child_2);
+
+ ASSERT_EQ(child_2->next(), child_1);
+
+ ASSERT_EQ(child_1->parent(), parent);
+ ASSERT_EQ(child_1->prev(), child_2);
+ ASSERT_EQ(child_1->next(), nullptr);
+
+ ASSERT_EQ(parent->children()->head(), child_2);
+ ASSERT_EQ(parent->children()->tail(), child_1);
+}
+
+TEST_F(DLinkedListTest, detach)
+{
+ auto parent = create<::Parent>();
+
+ auto child_1 = create<::Child>();
+ auto child_2 = create<::Child>();
+
+ parent->children()->append(child_1);
+ parent->children()->append(child_2);
+
+ child_1->detach();
+
+ ASSERT_EQ(child_1->parent(), nullptr);
+ ASSERT_EQ(child_1->prev(), nullptr);
+ ASSERT_EQ(child_1->next(), nullptr);
+
+ ASSERT_EQ(child_2->parent(), parent);
+ ASSERT_EQ(child_2->prev(), nullptr);
+
+ ASSERT_EQ(parent->children()->head(), child_2);
+ ASSERT_EQ(parent->children()->tail(), child_2);
+
+ child_2->detach();
+
+ ASSERT_EQ(child_2->parent(), nullptr);
+ ASSERT_EQ(child_2->prev(), nullptr);
+ ASSERT_EQ(child_2->next(), nullptr);
+
+ ASSERT_TRUE(parent->children()->empty());
+ ASSERT_EQ(parent->children()->head(), nullptr);
+ ASSERT_EQ(parent->children()->tail(), nullptr);
+}
+
+TEST_F(DLinkedListTest, node_destructor)
+{
+ auto parent = create<::Parent>();
+
+ auto child_1 = create<::Child>();
+ auto child_2 = create<::Child>();
+
+ parent->children()->append(child_1);
+ parent->children()->append(child_2);
+
+ destroy(child_2);
+
+ ASSERT_EQ(parent->children()->head(), child_1);
+ ASSERT_EQ(parent->children()->tail(), child_1);
+ ASSERT_EQ(child_1->next(), nullptr);
+ ASSERT_EQ(child_1->prev(), nullptr);
+
+ destroy(child_1);
+
+ ASSERT_EQ(parent->children()->head(), nullptr);
+ ASSERT_EQ(parent->children()->tail(), nullptr);
+}
diff --git a/compiler/coco/core/src/ADT/PtrList.cpp b/compiler/coco/core/src/ADT/PtrList.cpp
new file mode 100644
index 000000000..ea2beb06b
--- /dev/null
+++ b/compiler/coco/core/src/ADT/PtrList.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/ADT/PtrList.h"
+
+// NOTE Do NOT delete this file; this file checks the completeness of 'PtrList.h'
diff --git a/compiler/coco/core/src/ADT/PtrList.test.cpp b/compiler/coco/core/src/ADT/PtrList.test.cpp
new file mode 100644
index 000000000..dcbad8b90
--- /dev/null
+++ b/compiler/coco/core/src/ADT/PtrList.test.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/ADT/PtrList.h"
+
+#include <memory>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+struct Object
+{
+};
+}
+
+TEST(ADT_PTR_LIST, ctor)
+{
+ coco::PtrList<Object> l;
+
+ ASSERT_EQ(l.size(), 0);
+}
+
+TEST(ADT_PTR_LIST, insert)
+{
+ coco::PtrList<Object> l;
+
+ std::unique_ptr<Object> ptr{new Object};
+
+ l.insert(ptr.get());
+
+ ASSERT_EQ(l.size(), 1);
+ ASSERT_EQ(l.at(0), ptr.get());
+}
diff --git a/compiler/coco/core/src/ADT/PtrManager.test.cpp b/compiler/coco/core/src/ADT/PtrManager.test.cpp
new file mode 100644
index 000000000..bb9056f29
--- /dev/null
+++ b/compiler/coco/core/src/ADT/PtrManager.test.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/ADT/PtrManager.h"
+
+#include <memory>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+struct Count
+{
+ uint32_t allocated;
+ uint32_t freed;
+
+ Count() : allocated{0}, freed{0}
+ {
+ // DO NOTHING
+ }
+};
+
+class Object
+{
+public:
+ Object(Count *count, uint32_t value) : _count{count}, _value{value} { _count->allocated += 1; }
+
+public:
+ ~Object() { _count->freed += 1; }
+
+public:
+ uint32_t value(void) const { return _value; }
+
+private:
+ Count *const _count;
+
+private:
+ uint32_t _value;
+};
+
+struct ObjectManager final : public coco::PtrManager<Object>
+{
+ Object *alloc(Count *count, uint32_t value)
+ {
+ std::unique_ptr<Object> o{new Object{count, value}};
+ return take(std::move(o));
+ }
+
+ void free(Object *o) { release(o); }
+};
+}
+
+TEST(ADT_PTR_MANAGER, usecase)
+{
+ Count c;
+
+ ASSERT_EQ(c.allocated, 0);
+ ASSERT_EQ(c.freed, 0);
+
+ {
+ ::ObjectManager mgr;
+
+ auto obj_1 = mgr.alloc(&c, 3);
+ auto obj_2 = mgr.alloc(&c, 4);
+
+ EXPECT_EQ(c.allocated, 2);
+ ASSERT_EQ(c.freed, 0);
+
+ EXPECT_EQ(mgr.size(), 2);
+ EXPECT_EQ(mgr.at(0), obj_1);
+ EXPECT_EQ(mgr.at(1), obj_2);
+
+ // Let's delete obj_1
+ mgr.free(obj_1);
+
+ EXPECT_EQ(c.allocated, 2);
+ ASSERT_EQ(c.freed, 1);
+
+ EXPECT_EQ(mgr.size(), 1);
+ EXPECT_EQ(mgr.at(0), obj_2);
+ }
+
+ // PtrManger SHOULD destruct all of the allocated object when it is destructed.
+ ASSERT_EQ(c.allocated, 2);
+ ASSERT_EQ(c.freed, 2);
+}
diff --git a/compiler/coco/core/src/IR.test.cpp b/compiler/coco/core/src/IR.test.cpp
new file mode 100644
index 000000000..3f8c0ad34
--- /dev/null
+++ b/compiler/coco/core/src/IR.test.cpp
@@ -0,0 +1,303 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <nncc/core/ADT/feature/CHWLayout.h>
+
+#include <nncc/core/ADT/kernel/NCHWLayout.h>
+
+#include <gtest/gtest.h>
+
+#include <set>
+#include <map>
+#include <string>
+
+using nncc::core::ADT::feature::num_elements;
+
+using nncc::core::ADT::kernel::num_elements;
+
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::num_elements;
+
+//
+// 'caffe_conv' test demonstrates how to translate the following Caffe network into coco IR:
+//
+// layer {
+// name: "data"
+// type: "Input"
+// top: "data"
+// input_param: { shape: { dim: 1 dim: 1 dim: 3 dim: 3 } }
+// }
+//
+// layer {
+// name: "conv"
+// type: "Convolution"
+// bottom: "data"
+// top: "conv"
+// blobs {
+// ...
+// shape { dim: 1 dim: 1 dim: 3 dim: 3 }
+// }
+// convolution_param {
+// bias_term: false
+// num_output: 1
+// kernel_size: 3
+// }
+// }
+//
+TEST(IR, caffe_conv)
+{
+ // For inter-layer communication
+ std::map<std::string, coco::Bag *> bags;
+ std::map<std::string, nncc::core::ADT::tensor::Shape> shapes;
+
+ std::set<std::string> top_blobs;
+
+ // Create a module and block
+ auto m = coco::Module::create();
+ auto blk = m->entity()->block()->create();
+
+ // Next, append the block to the module
+ m->block()->append(blk);
+
+ // Now, the block belongs to the module (and has no sibling)
+ ASSERT_EQ(blk->parent(), m.get());
+ ASSERT_EQ(blk->next(), nullptr);
+ ASSERT_EQ(blk->prev(), nullptr);
+
+ // The head and tail points to the appended block
+ ASSERT_EQ(m->block()->head(), blk);
+ ASSERT_EQ(m->block()->tail(), blk);
+
+ // Let's translate the first 'Input' layer
+ {
+ using nncc::core::ADT::tensor::Shape;
+
+ const Shape shape{1, 1, 3, 3};
+
+ auto bag = m->entity()->bag()->create(num_elements(shape));
+ auto input = m->entity()->input()->create(shape);
+
+ input->bag(bag);
+ input->name("data");
+
+ // Caffe uses lexical layout for tensors
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ const static LexicalLayout l{};
+ const auto offset = static_cast<uint32_t>(l.offset(shape, e.current()));
+
+ input->at(e.current()) = coco::ElemID{offset};
+ }
+
+ m->input()->insert(input);
+
+ bags["data"] = bag;
+ shapes["data"] = shape;
+
+ top_blobs = {"data"};
+ }
+
+ // Next, translate 'Convolution' layer
+ {
+ using nncc::core::ADT::feature::CHWLayout;
+ using nncc::core::ADT::kernel::NCHWLayout;
+
+ const nncc::core::ADT::feature::Shape ifm_shape{1, 3, 3};
+ auto ifm_bag = bags["data"];
+ auto ifm_obj = m->entity()->object()->create<coco::FeatureObject>();
+ auto ifm_layout = coco::FeatureLayouts::BCHW::create(ifm_shape);
+
+ ifm_obj->bag(ifm_bag);
+ ifm_obj->layout(std::move(ifm_layout));
+
+ const nncc::core::ADT::kernel::Shape ker_shape{1, 1, 3, 3};
+ auto ker_bag = m->entity()->bag()->create(num_elements(ker_shape));
+ auto ker_layout = coco::KernelLayouts::Generic::create(ker_shape);
+
+ ker_layout->reorder<NCHWLayout>();
+
+ auto ker_obj = m->entity()->object()->create<coco::KernelObject>();
+
+ ker_obj->bag(ker_bag);
+ ker_obj->layout(std::move(ker_layout));
+
+ const nncc::core::ADT::feature::Shape ofm_shape{1, 1, 1};
+ auto ofm_bag = m->entity()->bag()->create(1 * 1 * 1);
+ auto ofm_obj = m->entity()->object()->create<coco::FeatureObject>();
+ auto ofm_layout = coco::FeatureLayouts::BCHW::create(ifm_shape);
+
+ ofm_obj->bag(ofm_bag);
+ ofm_obj->layout(std::move(ofm_layout));
+
+ // Create Load operation
+ auto load = m->entity()->op()->create<coco::Load>();
+
+ load->object(ifm_obj);
+
+ // Create Conv2D operation
+ //
+ // NOTE Conv2D op in coco IR does not perform BiasAdd
+ auto op = m->entity()->op()->create<coco::Conv2D>();
+
+ op->ker(ker_obj);
+
+ // Create UnitF instruction with Conv2D operation
+ auto ins = m->entity()->instr()->create<coco::Eval>();
+
+ ins->out(ofm_obj);
+ ins->op(op);
+
+ // Append the instruction (to the block)
+ blk->instr()->append(ins);
+
+ bags["conv"] = ofm_bag;
+ shapes["conv"] = nncc::core::ADT::tensor::Shape{1, 1, 1, 1};
+
+ top_blobs = {"conv"};
+ }
+
+ // Finalize
+ for (const auto &top_blob : top_blobs)
+ {
+ const auto &shape = shapes[top_blob];
+
+ auto output = m->entity()->output()->create(shape);
+
+ output->bag(bags[top_blob]);
+ output->name(top_blob);
+
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ const static LexicalLayout l{};
+ const auto offset = static_cast<uint32_t>(l.offset(shape, e.current()));
+
+ output->at(e.current()) = coco::ElemID{offset};
+ }
+
+ m->output()->insert(output);
+ }
+
+ // Let's validate the constructed IR
+ {
+ // There is one input whose name is 'data'
+ ASSERT_EQ(m->input()->size(), 1);
+ ASSERT_EQ(m->input()->at(0)->name(), "data");
+
+ // There is one output whose name is 'conv'
+ ASSERT_EQ(m->output()->size(), 1);
+ ASSERT_EQ(m->output()->at(0)->name(), "conv");
+
+ ASSERT_FALSE(m->block()->empty());
+
+ // There is one block in the module
+ auto blk = m->block()->head();
+
+ ASSERT_EQ(blk->next(), nullptr);
+ ASSERT_FALSE(blk->instr()->empty());
+
+ // There is one instruction in the block
+ auto ins = blk->instr()->head();
+
+ ASSERT_EQ(ins->next(), nullptr);
+
+ // That instruction is 'Eval'
+ // TODO Rename 'unit'
+ auto unit = ins->asEval();
+
+ ASSERT_NE(unit, nullptr);
+
+// TODO Rewrite below test
+#if 0
+ // Input #0 points to IFM
+ ASSERT_NE(unit->ifm(), nullptr);
+ ASSERT_EQ(unit->ifm()->bag(), m->input()->at(0)->bag());
+#endif
+
+ // Output #0 points to OFM
+ ASSERT_NE(unit->out(), nullptr);
+ ASSERT_EQ(unit->out()->bag(), m->output()->at(0)->bag());
+
+ // The actual operation is Conv2D
+ auto conv = unit->op()->asConv2D();
+
+ ASSERT_NE(conv, nullptr);
+
+ // Let's check Kernel Object
+ ASSERT_NE(conv->ker(), nullptr);
+// TODO Rewrite below test
+#if 0
+ ASSERT_NE(conv->ker()->bag(), unit->ifm()->bag());
+ ASSERT_NE(conv->ker()->bag(), unit->ofm()->bag());
+#endif
+
+// One may find the correspondence among Input, Output, and Objects through ElemID
+// TODO Rewrite below test
+#if 0
+ {
+ auto input_0 = m->input()->at(0);
+ auto ifm = unit->ifm();
+
+ nncc::core::ADT::tensor::Index input_index{0, 0, 2, 2};
+
+ // Here we can check that Input(0, 0, 2, 2) corresponds to IFM(0, 2, 2)
+ ASSERT_EQ(input_0->at(input_index).value(), ifm->at(0, 2, 2).value());
+ }
+#endif
+ }
+}
+
+//
+// This test demonstrates how to use 'replaceWith' method
+//
+TEST(IR, bag_replaceWith)
+{
+ auto m = coco::Module::create();
+
+ auto bag_1 = m->entity()->bag()->create(1);
+ auto bag_2 = m->entity()->bag()->create(1);
+
+ auto obj = m->entity()->object()->create<coco::FeatureObject>();
+ obj->bag(bag_1);
+
+ auto shuffle_1 = m->entity()->instr()->create<coco::Shuffle>();
+ shuffle_1->into(bag_1);
+
+ auto shuffle_2 = m->entity()->instr()->create<coco::Shuffle>();
+ shuffle_2->from(bag_1);
+
+ ASSERT_EQ(obj->bag(), bag_1);
+ ASSERT_EQ(shuffle_1->into(), bag_1);
+ ASSERT_EQ(shuffle_2->from(), bag_1);
+
+ bag_1->replaceAllDepsWith(bag_2);
+
+ ASSERT_EQ(obj->bag(), bag_2);
+ ASSERT_EQ(shuffle_1->into(), bag_1);
+ ASSERT_EQ(shuffle_2->from(), bag_1);
+
+ bag_1->replaceWith(bag_2);
+
+ ASSERT_EQ(obj->bag(), bag_2);
+ ASSERT_EQ(shuffle_1->into(), bag_2);
+ ASSERT_EQ(shuffle_2->from(), bag_2);
+}
diff --git a/compiler/coco/core/src/IR/Arg.cpp b/compiler/coco/core/src/IR/Arg.cpp
new file mode 100644
index 000000000..b6f9c4777
--- /dev/null
+++ b/compiler/coco/core/src/IR/Arg.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Arg.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <cassert>
+
+namespace
+{
+
+const nncc::core::ADT::tensor::LexicalLayout l;
+
+} // namespace
+
+namespace coco
+{
+
+Arg::Arg(const nncc::core::ADT::tensor::Shape &shape) : _shape{shape}, _bag{nullptr}
+{
+ _map.resize(nncc::core::ADT::tensor::num_elements(shape));
+}
+
+void Arg::bag(Bag *bag)
+{
+ if (_bag != nullptr)
+ {
+ onRelease(_bag);
+ _bag = nullptr;
+ }
+
+ assert(_bag == nullptr);
+
+ if (bag != nullptr)
+ {
+ _bag = bag;
+ onTake(_bag);
+ }
+}
+
+ElemID &Arg::at(const nncc::core::ADT::tensor::Index &index)
+{
+ return _map.at(l.offset(_shape, index));
+}
+
+const ElemID &Arg::at(const nncc::core::ADT::tensor::Index &index) const
+{
+ return _map.at(l.offset(_shape, index));
+}
+
+void Arg::reorder(const nncc::core::ADT::tensor::Layout &l)
+{
+ using nncc::core::ADT::tensor::IndexEnumerator;
+
+ for (IndexEnumerator e{shape()}; e.valid(); e.advance())
+ {
+ const auto offset = static_cast<uint32_t>(l.offset(shape(), e.current()));
+
+ at(e.current()) = coco::ElemID{offset};
+ }
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Arg.test.cpp b/compiler/coco/core/src/IR/Arg.test.cpp
new file mode 100644
index 000000000..391e05901
--- /dev/null
+++ b/compiler/coco/core/src/IR/Arg.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Arg.h"
+
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <memory>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+
+namespace
+{
+class ArgTest : public ::testing::Test
+{
+protected:
+ coco::Arg *allocate(const Shape &shape)
+ {
+ auto arg = new coco::Arg{shape};
+ _allocated.emplace_back(arg);
+ return arg;
+ }
+
+private:
+ std::vector<std::unique_ptr<coco::Arg>> _allocated;
+};
+} // namespace
+
+TEST_F(ArgTest, constructor)
+{
+ const Shape shape{1, 3, 3, 1};
+
+ auto arg = allocate(shape);
+
+ ASSERT_EQ(arg->shape(), shape);
+ ASSERT_TRUE(arg->name().empty());
+ ASSERT_EQ(arg->bag(), nullptr);
+}
+
+TEST_F(ArgTest, name_update)
+{
+ const Shape shape{1, 3, 3, 1};
+
+ auto arg = allocate(shape);
+
+ arg->name("data");
+ ASSERT_EQ(arg->name(), "data");
+}
+
+TEST_F(ArgTest, at)
+{
+ const Shape shape{1, 3, 3, 1};
+
+ auto arg = allocate(shape);
+
+ coco::Arg *mutable_ptr = arg;
+ const coco::Arg *immutable_ptr = arg;
+
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ mutable_ptr->at(e.current()) = coco::ElemID{16};
+ }
+
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ ASSERT_EQ(immutable_ptr->at(e.current()).value(), 16);
+ }
+}
+
+TEST_F(ArgTest, reorder)
+{
+ const Shape shape{2, 2, 2, 2};
+
+ auto arg = allocate(shape);
+
+ arg->reorder<LexicalLayout>();
+
+ ASSERT_EQ(arg->at(Index{0, 0, 0, 0}).value(), 0);
+ ASSERT_EQ(arg->at(Index{0, 0, 0, 1}).value(), 1);
+}
diff --git a/compiler/coco/core/src/IR/AvgPool2D.test.cpp b/compiler/coco/core/src/IR/AvgPool2D.test.cpp
new file mode 100644
index 000000000..d62bc9b7d
--- /dev/null
+++ b/compiler/coco/core/src/IR/AvgPool2D.test.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Ops.h"
+
+#include <memory>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+struct IsAvgPool2D : public coco::Op::Visitor<bool>
+{
+ bool visit(const coco::AvgPool2D *) override { return true; }
+};
+
+class AvgPool2DTest : public ::testing::Test
+{
+public:
+ AvgPool2DTest()
+ {
+ // DO NOTHING
+ }
+
+protected:
+ coco::AvgPool2D *allocate(void)
+ {
+ auto op = new coco::AvgPool2D;
+ _allocated.emplace_back(op);
+ return op;
+ }
+
+private:
+ std::vector<std::unique_ptr<coco::AvgPool2D>> _allocated;
+};
+} // namespace
+
+TEST_F(AvgPool2DTest, initialization)
+{
+ auto op = allocate();
+
+ coco::AvgPool2D *mutable_ptr = op;
+ const coco::AvgPool2D *immutable_ptr = op;
+
+ // uses() should be empty on construction
+ ASSERT_EQ(op->uses().size(), 0);
+ // parent() should be nullptr on construction
+ ASSERT_EQ(op->parent(), nullptr);
+
+ // arg() should be nullptr on construction
+ ASSERT_EQ(immutable_ptr->arg(), nullptr);
+
+ // divisor() SHOULD be unknow on construction
+ ASSERT_EQ(immutable_ptr->divisor(), coco::AvgPool2D::Divisor::Unknown);
+
+ // window() SHOULD return a valid pointer
+ ASSERT_NE(mutable_ptr->window(), nullptr);
+ ASSERT_EQ(mutable_ptr->window(), immutable_ptr->window());
+
+ // pad() SHOULD return a valid pointer
+ ASSERT_NE(mutable_ptr->pad(), nullptr);
+ ASSERT_EQ(mutable_ptr->pad(), immutable_ptr->pad());
+
+ // stride() SHOULD return a valid pointer
+ ASSERT_NE(mutable_ptr->stride(), nullptr);
+ ASSERT_EQ(mutable_ptr->stride(), immutable_ptr->stride());
+}
+
+TEST_F(AvgPool2DTest, asAvgPool2D)
+{
+ auto op = allocate();
+
+ coco::Op *mutable_base = op;
+ const coco::Op *immutable_base = op;
+
+ ASSERT_EQ(mutable_base->asAvgPool2D(), op);
+ ASSERT_EQ(mutable_base->asAvgPool2D(), immutable_base->asAvgPool2D());
+}
+
+TEST_F(AvgPool2DTest, accept)
+{
+ // Test 'AvgPool2D' class
+ auto op = allocate();
+
+ coco::AvgPool2D *mutable_ptr = op;
+ const coco::AvgPool2D *immutable_ptr = op;
+
+ ASSERT_TRUE(mutable_ptr->accept(IsAvgPool2D{}));
+ ASSERT_TRUE(immutable_ptr->accept(IsAvgPool2D{}));
+}
+
+TEST_F(AvgPool2DTest, disivor)
+{
+ auto op = allocate();
+
+ op->divisor(coco::AvgPool2D::Divisor::Static);
+
+ ASSERT_EQ(op->divisor(), coco::AvgPool2D::Divisor::Static);
+}
diff --git a/compiler/coco/core/src/IR/Bag.cpp b/compiler/coco/core/src/IR/Bag.cpp
new file mode 100644
index 000000000..7dce48587
--- /dev/null
+++ b/compiler/coco/core/src/IR/Bag.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Bag.h"
+
+#include "coco/IR/Object.h"
+#include "coco/IR/Read.h"
+#include "coco/IR/Update.h"
+
+#include <cassert>
+
+namespace coco
+{
+
+Bag::Bag(uint32_t size) : _size{size}
+{
+ // DO NOTHING
+}
+
+Bag::~Bag()
+{
+ // All the references over a bag SHOULD be dropped before its destruction
+ assert(deps()->size() == 0);
+ assert(reads()->size() == 0);
+ assert(updates()->size() == 0);
+}
+
+uint32_t Bag::size(void) const { return _size; }
+
+bool Bag::isInput(void) const { return _input != nullptr; }
+bool Bag::isOutput(void) const { return _output != nullptr; }
+
+const DepSet *Bag::deps(void) const { return &_deps; }
+const ReadSet *Bag::reads(void) const { return &_reads; }
+const UpdateSet *Bag::updates(void) const { return &_updates; }
+
+void Bag::replaceWith(Bag *b)
+{
+ assert(!isInput() && !isOutput());
+
+ replaceAllDepsWith(b);
+ // Replace all the occurence inside Read
+ while (!(reads()->empty()))
+ {
+ auto read = *(reads()->begin());
+ assert(read->bag() == this);
+ read->bag(b);
+ }
+
+ // Replace all the occurence insider Update
+ while (!(updates()->empty()))
+ {
+ auto update = *(updates()->begin());
+ assert(update->bag() == this);
+ update->bag(b);
+ }
+
+ assert(deps()->empty());
+ assert(reads()->empty());
+ assert(updates()->empty());
+}
+
+void Bag::replaceAllDepsWith(Bag *b)
+{
+ // Replace all the occurence inside Dep
+ while (!(deps()->empty()))
+ {
+ auto dep = *(deps()->begin());
+ assert(dep->bag() == this);
+ dep->bag(b);
+ }
+}
+
+ObjectSet dependent_objects(const Bag *b)
+{
+ ObjectSet res;
+
+ for (const auto &dep : *(b->deps()))
+ {
+ if (auto obj = dep->object())
+ {
+ res.insert(obj);
+ }
+ }
+
+ return res;
+}
+
+Bag::ReaderSet readers(const Bag *b)
+{
+ Bag::ReaderSet res;
+
+ for (auto obj : dependent_objects(b))
+ {
+ for (auto consumer : consumers(obj))
+ {
+ // NOTE Object::Consumer inherits Bag::Reader
+ res.insert(consumer);
+ }
+ }
+
+ for (auto read : *b->reads())
+ {
+ auto reader = read->reader();
+ assert(reader != nullptr);
+ res.insert(reader);
+ }
+
+ return res;
+}
+
+Bag::UpdaterSet updaters(const Bag *b)
+{
+ Bag::UpdaterSet res;
+
+ for (auto obj : dependent_objects(b))
+ {
+ if (auto p = producer(obj))
+ {
+ res.insert(p);
+ }
+ }
+
+ for (auto update : *b->updates())
+ {
+ auto updater = update->updater();
+ assert(updater != nullptr);
+ res.insert(updater);
+ }
+
+ return res;
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Bag.test.cpp b/compiler/coco/core/src/IR/Bag.test.cpp
new file mode 100644
index 000000000..9995e81ef
--- /dev/null
+++ b/compiler/coco/core/src/IR/Bag.test.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Bag.h"
+
+#include <gtest/gtest.h>
+
+TEST(IR_BAG, ctor_should_set_size)
+{
+ coco::Bag b{3};
+
+ ASSERT_EQ(b.size(), 3);
+
+ // Bag has no read/updates at the beginning
+ EXPECT_EQ(b.reads()->size(), 0);
+ EXPECT_EQ(b.updates()->size(), 0);
+}
diff --git a/compiler/coco/core/src/IR/BagManager.cpp b/compiler/coco/core/src/IR/BagManager.cpp
new file mode 100644
index 000000000..10fe69d57
--- /dev/null
+++ b/compiler/coco/core/src/IR/BagManager.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/BagManager.h"
+
+#include <stdex/Memory.h>
+
+namespace coco
+{
+
+Bag *BagManager::create(uint32_t size)
+{
+ auto bag = stdex::make_unique<Bag>(size);
+ modulize(bag.get());
+ return take(std::move(bag));
+}
+
+void BagManager::destroy(Bag *b) { release(b); }
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/BagManager.test.cpp b/compiler/coco/core/src/IR/BagManager.test.cpp
new file mode 100644
index 000000000..bf135a951
--- /dev/null
+++ b/compiler/coco/core/src/IR/BagManager.test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/BagManager.h"
+
+#include <gtest/gtest.h>
+
+TEST(IR_BAG_MANAGER, create)
+{
+ coco::BagManager mgr;
+
+ auto bag = mgr.create(3);
+
+ ASSERT_EQ(bag->size(), 3);
+}
+
+TEST(IR_BAG_MANAGER, destruct)
+{
+ coco::BagManager mgr;
+
+ auto b = mgr.create(3);
+ mgr.destroy(b);
+
+ ASSERT_EQ(mgr.size(), 0);
+}
diff --git a/compiler/coco/core/src/IR/Block.cpp b/compiler/coco/core/src/IR/Block.cpp
new file mode 100644
index 000000000..14c026039
--- /dev/null
+++ b/compiler/coco/core/src/IR/Block.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Block.h"
+#include "coco/IR/Module.h"
+
+#include <cassert>
+
+namespace coco
+{
+
+template <> void DLinkedList<Block, Module>::joined(Module *, Block *curr_blk)
+{
+ assert(!curr_blk->index().valid());
+ uint32_t value = 0;
+
+ if (auto prev_blk = curr_blk->prev())
+ {
+ value = prev_blk->index().value() + 1;
+ }
+
+ for (auto blk = curr_blk; blk; blk = blk->next())
+ {
+ blk->_index.set(value++);
+ }
+}
+
+template <> void DLinkedList<Block, Module>::leaving(Module *, Block *curr_blk)
+{
+ assert(curr_blk->index().valid());
+ uint32_t value = curr_blk->index().value();
+
+ for (auto blk = curr_blk->next(); blk; blk = blk->next())
+ {
+ blk->_index.set(value++);
+ }
+
+ curr_blk->_index.reset();
+}
+
+template <> BlockList *DLinkedList<Block, Module>::head(Module *m) { return m->block(); }
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Block.test.cpp b/compiler/coco/core/src/IR/Block.test.cpp
new file mode 100644
index 000000000..c2acc89f7
--- /dev/null
+++ b/compiler/coco/core/src/IR/Block.test.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Block.h"
+
+#include <gtest/gtest.h>
+
+TEST(IR_BLOCK, default_block_has_empty_instr_list)
+{
+ coco::Block blk;
+
+ ASSERT_TRUE(blk.instr()->empty());
+ ASSERT_EQ(blk.instr()->head(), nullptr);
+ ASSERT_EQ(blk.instr()->tail(), nullptr);
+}
diff --git a/compiler/coco/core/src/IR/BlockIndex.cpp b/compiler/coco/core/src/IR/BlockIndex.cpp
new file mode 100644
index 000000000..8cb56724c
--- /dev/null
+++ b/compiler/coco/core/src/IR/BlockIndex.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/BlockIndex.h"
+
+#include <cassert>
+
+namespace coco
+{
+
+void BlockIndex::set(uint32_t value)
+{
+ assert(value != undefined);
+ _value = value;
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/BlockIndex.test.cpp b/compiler/coco/core/src/IR/BlockIndex.test.cpp
new file mode 100644
index 000000000..68afb889e
--- /dev/null
+++ b/compiler/coco/core/src/IR/BlockIndex.test.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/BlockIndex.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class BlockIndexTest : public ::testing::Test
+{
+};
+
+} // namespace
+
+TEST_F(BlockIndexTest, default_constructor)
+{
+ coco::BlockIndex blk_ind;
+
+ ASSERT_FALSE(blk_ind.valid());
+}
+
+TEST_F(BlockIndexTest, explicit_constructor)
+{
+ coco::BlockIndex blk_ind{3};
+
+ ASSERT_TRUE(blk_ind.valid());
+ ASSERT_EQ(blk_ind.value(), 3);
+}
+
+TEST_F(BlockIndexTest, operator_lt)
+{
+ // Valid index is always less than undefined one.
+ ASSERT_TRUE(coco::BlockIndex(3) < coco::BlockIndex());
+ ASSERT_TRUE(coco::BlockIndex(3) < coco::BlockIndex(4));
+}
diff --git a/compiler/coco/core/src/IR/BlockManager.cpp b/compiler/coco/core/src/IR/BlockManager.cpp
new file mode 100644
index 000000000..5e3b88173
--- /dev/null
+++ b/compiler/coco/core/src/IR/BlockManager.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/BlockManager.h"
+
+#include <stdex/Memory.h>
+
+#include <cassert>
+
+namespace coco
+{
+
+Block *BlockManager::create(void)
+{
+ auto blk = stdex::make_unique<Block>();
+ modulize(blk.get());
+ return take(std::move(blk));
+}
+
+void BlockManager::destroy(Block *blk)
+{
+ assert(blk->parent() == nullptr);
+ assert(blk->prev() == nullptr);
+ assert(blk->next() == nullptr);
+ release(blk);
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/BlockManager.test.cpp b/compiler/coco/core/src/IR/BlockManager.test.cpp
new file mode 100644
index 000000000..94f69b773
--- /dev/null
+++ b/compiler/coco/core/src/IR/BlockManager.test.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/BlockManager.h"
+
+#include <memory>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+class BlockManagerTest : public ::testing::Test
+{
+public:
+ // Create a coco::BlockManager for testing
+ coco::BlockManager *allocate(void)
+ {
+ auto p = new coco::BlockManager;
+ _allocated.emplace_back(p);
+ return p;
+ }
+
+private:
+ std::vector<std::unique_ptr<coco::BlockManager>> _allocated;
+};
+} // namespace
+
+TEST_F(BlockManagerTest, create)
+{
+ auto mgr = allocate();
+ auto blk = mgr->create();
+
+ ASSERT_NE(blk, nullptr);
+}
+
+TEST_F(BlockManagerTest, destroy)
+{
+ auto mgr = allocate();
+ auto blk_1 = mgr->create();
+ auto blk_2 = mgr->create();
+
+ mgr->destroy(blk_1);
+
+ ASSERT_EQ(mgr->size(), 1);
+ ASSERT_EQ(mgr->at(0), blk_2);
+}
diff --git a/compiler/coco/core/src/IR/Consumer.mock.h b/compiler/coco/core/src/IR/Consumer.mock.h
new file mode 100644
index 000000000..7d7cc492a
--- /dev/null
+++ b/compiler/coco/core/src/IR/Consumer.mock.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_CONSUMER_MOCK_H__
+#define __COCO_IR_CONSUMER_MOCK_H__
+
+#include "coco/IR/Object.h"
+
+namespace
+{
+namespace mock
+{
+struct Consumer final : public coco::Object::Consumer
+{
+ coco::Instr *loc(void) override { return nullptr; }
+};
+} // namespace mock
+} // namespace
+
+#endif // __COCO_IR_CONSUMER_MOCK_H__
diff --git a/compiler/coco/core/src/IR/Conv2D.cpp b/compiler/coco/core/src/IR/Conv2D.cpp
new file mode 100644
index 000000000..19395a158
--- /dev/null
+++ b/compiler/coco/core/src/IR/Conv2D.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Ops.h"
+
+#include <pepper/assert.h>
+
+namespace coco
+{
+
+Conv2D::Conv2D() : _ker{this}, _arg{this}
+{
+ // DO NOTHING
+}
+
+uint32_t Conv2D::arity(void) const
+{
+ // Conv2D has one argument (IFM)
+ // NOTE This design is subject to change
+ return 1;
+}
+
+Op *Conv2D::arg(DBGARG(uint32_t, n)) const
+{
+ assert(n < arity());
+ return arg();
+}
+
+std::set<Object *> Conv2D::uses(void) const
+{
+ std::set<Object *> res;
+
+ if (ker())
+ {
+ res.insert(ker());
+ }
+
+ if (auto ifm = arg())
+ {
+ for (auto obj : ifm->uses())
+ {
+ res.insert(obj);
+ }
+ }
+
+ return res;
+}
+
+void Conv2D::ker(KernelObject *ker) { _ker.value(ker); }
+
+KernelObject *Conv2D::ker(void) const
+{
+ if (auto obj = _ker.value())
+ {
+ assert(obj->asKernel() != nullptr);
+ return obj->asKernel();
+ }
+
+ return nullptr;
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Conv2D.test.cpp b/compiler/coco/core/src/IR/Conv2D.test.cpp
new file mode 100644
index 000000000..df0a2470b
--- /dev/null
+++ b/compiler/coco/core/src/IR/Conv2D.test.cpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Ops.h"
+#include "coco/IR/ObjectManager.h"
+
+#include <vector>
+#include <memory>
+
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+using stdex::make_unique;
+
+namespace
+{
+class Conv2DTest : public ::testing::Test
+{
+public:
+ Conv2DTest()
+ {
+ // DO NOTHING
+ }
+
+protected:
+ coco::Conv2D *allocate(void)
+ {
+ auto op = new coco::Conv2D;
+ _allocated.emplace_back(op);
+ return op;
+ }
+
+protected:
+ coco::ObjectManager obj_mgr;
+
+private:
+ std::vector<std::unique_ptr<coco::Conv2D>> _allocated;
+};
+} // namespace
+
+TEST_F(Conv2DTest, ctor)
+{
+ auto op = allocate();
+
+ // arg() should be initialized as nullptr on construction
+ ASSERT_EQ(op->arg(), nullptr);
+ // ker() should be initialized as nullptr on construction
+ ASSERT_EQ(op->ker(), nullptr);
+
+ // uses() should be empty on construction
+ ASSERT_EQ(op->uses().size(), 0);
+ // parent() should be nullptr on construction
+ ASSERT_EQ(op->parent(), nullptr);
+
+ ASSERT_EQ(op->group(), 1);
+
+ ASSERT_NE(op->pad(), nullptr);
+ ASSERT_EQ(op->pad()->top(), 0);
+ ASSERT_EQ(op->pad()->bottom(), 0);
+ ASSERT_EQ(op->pad()->left(), 0);
+ ASSERT_EQ(op->pad()->right(), 0);
+
+ ASSERT_NE(op->stride(), nullptr);
+ ASSERT_EQ(op->stride()->vertical(), 1);
+ ASSERT_EQ(op->stride()->horizontal(), 1);
+}
+
+TEST_F(Conv2DTest, asConv2D)
+{
+ auto op = allocate();
+
+ coco::Op *mutable_base = op;
+ const coco::Op *immutable_base = op;
+
+ ASSERT_EQ(mutable_base->asConv2D(), op);
+ ASSERT_EQ(mutable_base->asConv2D(), immutable_base->asConv2D());
+}
+
+namespace
+{
+struct IsConv2D : public coco::Op::Visitor<bool>
+{
+ bool visit(const coco::Conv2D *) override { return true; }
+};
+} // namespace
+
+TEST_F(Conv2DTest, ker_update)
+{
+ // Prepare a kernel object for testing
+ auto obj = obj_mgr.create<coco::KernelObject>();
+
+ // Test 'Conv2D' class
+ auto op = allocate();
+
+ op->ker(obj);
+ ASSERT_EQ(op->ker(), obj);
+
+ // Op now uses 'obj'
+ {
+ auto uses = op->uses();
+
+ ASSERT_NE(uses.find(obj), uses.end());
+ }
+
+ // ker method should enlist op itself as a consumer of a given kernel object
+ {
+ auto consumers = coco::consumers(obj);
+
+ ASSERT_EQ(consumers.size(), 1);
+ ASSERT_NE(consumers.find(op), consumers.end());
+ }
+}
+
+TEST_F(Conv2DTest, accept)
+{
+ // Test 'Conv2D' class
+ auto op = allocate();
+
+ coco::Conv2D *mutable_ptr = op;
+ const coco::Conv2D *immutable_ptr = op;
+
+ ASSERT_TRUE(mutable_ptr->accept(IsConv2D{}));
+ ASSERT_TRUE(immutable_ptr->accept(IsConv2D{}));
+}
+
+TEST_F(Conv2DTest, destructor)
+{
+ // Prepare a kernel object for testing
+ auto obj = obj_mgr.create<coco::KernelObject>();
+
+ // Create 'Conv2D' op
+ auto op = make_unique<coco::Conv2D>();
+
+ op->ker(obj);
+
+ // Destroy 'Conv2D' op
+ op.reset();
+
+ ASSERT_EQ(obj->uses()->size(), 0);
+}
diff --git a/compiler/coco/core/src/IR/Def.cpp b/compiler/coco/core/src/IR/Def.cpp
new file mode 100644
index 000000000..1546b6693
--- /dev/null
+++ b/compiler/coco/core/src/IR/Def.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Def.h"
+
+#include <cassert>
+
+namespace coco
+{
+
+void Def::value(Object *value)
+{
+ if (_value)
+ {
+ _value->def(nullptr);
+ _value = nullptr;
+ }
+
+ assert(_value == nullptr);
+
+ if (value)
+ {
+ _value = value;
+ _value->def(this);
+ }
+
+ assert(_value == value);
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Def.test.cpp b/compiler/coco/core/src/IR/Def.test.cpp
new file mode 100644
index 000000000..98455c09e
--- /dev/null
+++ b/compiler/coco/core/src/IR/Def.test.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Def.h"
+#include "coco/IR/ObjectManager.h"
+
+#include "coco/IR/FeatureObject.h"
+
+#include <stdex/Memory.h>
+
+#include "Producer.mock.h"
+
+#include <gtest/gtest.h>
+
+using stdex::make_unique;
+
+namespace
+{
+class DefTest : public ::testing::Test
+{
+protected:
+ coco::ObjectManager obj_mgr;
+};
+} // namespace
+
+TEST_F(DefTest, constructor)
+{
+ auto o = obj_mgr.create<coco::FeatureObject>();
+
+ ::mock::Producer producer;
+ coco::Def def{&producer};
+
+ ASSERT_EQ(def.value(), nullptr);
+}
+
+TEST_F(DefTest, value)
+{
+ auto o = obj_mgr.create<coco::FeatureObject>();
+
+ ::mock::Producer producer;
+ coco::Def def{&producer};
+
+ def.value(o);
+
+ ASSERT_EQ(def.value(), o);
+
+ ASSERT_EQ(o->def(), &def);
+
+ def.value(nullptr);
+
+ ASSERT_EQ(o->def(), nullptr);
+}
+
+TEST_F(DefTest, unlink_on_destruction)
+{
+ auto o = obj_mgr.create<coco::FeatureObject>();
+
+ ::mock::Producer producer;
+ auto def = make_unique<coco::Def>(&producer);
+
+ def->value(o);
+ ASSERT_EQ(o->def(), def.get());
+
+ // Let's destruct the allocated slot
+ def.reset(nullptr);
+
+ // The def of Object SHOULD BE updated
+ ASSERT_EQ(o->def(), nullptr);
+}
diff --git a/compiler/coco/core/src/IR/Dep.cpp b/compiler/coco/core/src/IR/Dep.cpp
new file mode 100644
index 000000000..6a5d3cafb
--- /dev/null
+++ b/compiler/coco/core/src/IR/Dep.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Dep.h"
+#include "coco/IR/Object.h"
+
+#include <cassert>
+
+namespace coco
+{
+
+Dep::~Dep() { bag(nullptr); }
+
+void Dep::bag(Bag *bag)
+{
+ if (_bag != nullptr)
+ {
+ // Remove bag <-> dep link
+ assert(_bag->deps()->find(this) != _bag->deps()->end());
+ _bag->mutable_deps()->erase(this);
+
+ // Reset _bag
+ _bag = nullptr;
+ }
+
+ assert(_bag == nullptr);
+
+ if (bag != nullptr)
+ {
+ // Set _bag
+ _bag = bag;
+
+ // Create bag <-> dep link
+ _bag->mutable_deps()->insert(this);
+ }
+
+ assert(_bag == bag);
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Dep.test.cpp b/compiler/coco/core/src/IR/Dep.test.cpp
new file mode 100644
index 000000000..e2104a8af
--- /dev/null
+++ b/compiler/coco/core/src/IR/Dep.test.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Dep.h"
+
+#include "coco/IR/BagManager.h"
+
+#include "coco/IR/ObjectManager.h"
+#include "coco/IR/FeatureObject.h"
+
+#include <gtest/gtest.h>
+
+using namespace nncc::core::ADT;
+
+namespace
+{
+class DepTest : public ::testing::Test
+{
+protected:
+ coco::BagManager bag_mgr;
+ coco::ObjectManager obj_mgr;
+};
+} // namespace
+
+TEST_F(DepTest, default_constructor)
+{
+ coco::Dep dep;
+
+ ASSERT_EQ(dep.bag(), nullptr);
+ ASSERT_EQ(dep.object(), nullptr);
+}
+
+TEST_F(DepTest, bag_update)
+{
+ auto bag = bag_mgr.create(3);
+
+ coco::Dep dep;
+
+ // NOTE b->object() is not updated here
+ dep.bag(bag);
+
+ ASSERT_EQ(dep.bag(), bag);
+}
+
+TEST_F(DepTest, bag_update_with_link_and_object)
+{
+ auto bag = bag_mgr.create(3);
+ auto obj = obj_mgr.create<coco::FeatureObject>();
+
+ coco::Dep dep;
+
+ dep.object(obj);
+
+ dep.bag(bag);
+
+ auto deps = coco::dependent_objects(bag);
+
+ ASSERT_EQ(deps.size(), 1);
+ ASSERT_NE(deps.count(obj), 0);
+}
diff --git a/compiler/coco/core/src/IR/ElemID.cpp b/compiler/coco/core/src/IR/ElemID.cpp
new file mode 100644
index 000000000..145bb986a
--- /dev/null
+++ b/compiler/coco/core/src/IR/ElemID.cpp
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/ElemID.h"
+
+namespace coco
+{
+
+bool operator==(const ElemID &lhs, const ElemID &rhs) { return lhs.value() == rhs.value(); }
+bool operator<(const ElemID &lhs, const ElemID &rhs) { return lhs.value() < rhs.value(); }
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/ElemID.test.cpp b/compiler/coco/core/src/IR/ElemID.test.cpp
new file mode 100644
index 000000000..dff2fa27c
--- /dev/null
+++ b/compiler/coco/core/src/IR/ElemID.test.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/ElemID.h"
+
+#include <vector>
+
+#include <gtest/gtest.h>
+
+TEST(IR_ELEM_ID, constructor)
+{
+ coco::ElemID id{128};
+
+ ASSERT_EQ(id.value(), 128);
+}
+
+TEST(IR_ELEM_ID, copy)
+{
+ coco::ElemID src{16};
+ coco::ElemID dst{32};
+
+ dst = src;
+
+ ASSERT_EQ(dst.value(), 16);
+}
+
+TEST(IR_ELEM_ID, std_vector_compatible)
+{
+ // ElemID SHOULD be compatible with standard container (including std::vector)
+ std::vector<coco::ElemID> vec;
+
+ vec.resize(16);
+ vec.clear();
+ vec.emplace_back(coco::ElemID{128});
+
+ ASSERT_EQ(vec.at(0).value(), 128);
+}
+
+TEST(IR_ELEM_ID, operator_eq)
+{
+ ASSERT_TRUE(coco::ElemID{16} == coco::ElemID{16});
+ ASSERT_FALSE(coco::ElemID{16} == coco::ElemID{17});
+}
+
+TEST(IR_ELEM_ID, operator_lt)
+{
+ ASSERT_FALSE(coco::ElemID{16} < coco::ElemID{16});
+ ASSERT_TRUE(coco::ElemID{16} < coco::ElemID{17});
+}
diff --git a/compiler/coco/core/src/IR/EntityManager.cpp b/compiler/coco/core/src/IR/EntityManager.cpp
new file mode 100644
index 000000000..f6f2cb382
--- /dev/null
+++ b/compiler/coco/core/src/IR/EntityManager.cpp
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/EntityManager.h"
+
+// NOTE Do NOT delete this file; this file enforces compiler to check whether 'EntityManager.h' is
+// complete.
diff --git a/compiler/coco/core/src/IR/Eval.cpp b/compiler/coco/core/src/IR/Eval.cpp
new file mode 100644
index 000000000..dcf579049
--- /dev/null
+++ b/compiler/coco/core/src/IR/Eval.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Instrs.h"
+#include "coco/IR/Op.h"
+
+namespace coco
+{
+
+Eval::Eval() : _out{this}, _step{this}
+{
+ // DO NOTHING
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Eval.test.cpp b/compiler/coco/core/src/IR/Eval.test.cpp
new file mode 100644
index 000000000..6469f6763
--- /dev/null
+++ b/compiler/coco/core/src/IR/Eval.test.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Instrs.h"
+#include "coco/IR/ObjectManager.h"
+#include "coco/IR/OpManager.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+class EvalTest : public ::testing::Test
+{
+public:
+ virtual ~EvalTest() = default;
+
+protected:
+ coco::Eval *allocate(void)
+ {
+ auto ins = new coco::Eval{};
+ _allocated.emplace_back(ins);
+ return ins;
+ }
+
+private:
+ std::vector<std::unique_ptr<coco::Instr>> _allocated;
+};
+} // namespace
+
+TEST_F(EvalTest, constructor)
+{
+ auto ins = allocate();
+
+ ASSERT_EQ(ins->out(), nullptr);
+ ASSERT_EQ(ins->op(), nullptr);
+}
+
+TEST_F(EvalTest, asEval)
+{
+ auto ins = allocate();
+
+ coco::Instr *mutable_ptr = ins;
+ const coco::Instr *immutable_ptr = ins;
+
+ ASSERT_NE(mutable_ptr->asEval(), nullptr);
+ ASSERT_EQ(mutable_ptr->asEval(), immutable_ptr->asEval());
+}
diff --git a/compiler/coco/core/src/IR/FeatureLayouts.cpp b/compiler/coco/core/src/IR/FeatureLayouts.cpp
new file mode 100644
index 000000000..98423e01f
--- /dev/null
+++ b/compiler/coco/core/src/IR/FeatureLayouts.cpp
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/FeatureLayouts.h"
+
+#include <nncc/core/ADT/feature/CHWLayout.h>
+#include <nncc/core/ADT/feature/HWCLayout.h>
+
+#include <cassert>
+
+using namespace nncc::core::ADT::feature;
+
+//
+// BCHW Layout
+//
+namespace coco
+{
+namespace FeatureLayouts
+{
+
+const FeatureLayout::ID *BCHW::uid(void)
+{
+ struct LayoutID final : public FeatureLayout::ID
+ {
+ };
+ static LayoutID id;
+ return &id;
+}
+
+ElemID BCHW::at(uint32_t b, uint32_t ch, uint32_t row, uint32_t col) const
+{
+ static CHWLayout l;
+
+ uint32_t offset = 0;
+ offset += b * num_elements(_shape);
+ offset += l.offset(_shape, ch, row, col);
+ return ElemID{offset};
+}
+
+std::unique_ptr<BCHW> BCHW::create(const nncc::core::ADT::feature::Shape &shape)
+{
+ // NOTE It is impossible to use make_unique here as the constructor is private
+ return std::unique_ptr<BCHW>{new BCHW{FeatureShape{shape}}};
+}
+
+} // namespace FeatureLayouts
+} // namespace coco
+
+//
+// BHWC Layout
+//
+namespace coco
+{
+namespace FeatureLayouts
+{
+
+const FeatureLayout::ID *BHWC::uid(void)
+{
+ struct LayoutID final : public FeatureLayout::ID
+ {
+ };
+ static LayoutID id;
+ return &id;
+}
+
+ElemID BHWC::at(uint32_t b, uint32_t ch, uint32_t row, uint32_t col) const
+{
+ static HWCLayout l;
+
+ uint32_t offset = 0;
+ offset += b * num_elements(_shape);
+ offset += l.offset(_shape, ch, row, col);
+
+ return ElemID{offset};
+}
+
+std::unique_ptr<BHWC> BHWC::create(const nncc::core::ADT::feature::Shape &shape)
+{
+ // NOTE It is impossible to use make_unique here as the constructor is private
+ return std::unique_ptr<BHWC>{new BHWC{FeatureShape{shape}}};
+}
+
+std::unique_ptr<BHWC> BHWC::create(const FeatureShape &shape)
+{
+ // NOTE It is impossible to use make_unique here as the constructor is private
+ return std::unique_ptr<BHWC>{new BHWC{shape}};
+}
+
+} // namespace FeatureLayouts
+} // namespace coco
+
+//
+// BC: Channel-major Channel-wise Layout
+//
+namespace coco
+{
+namespace FeatureLayouts
+{
+
+const FeatureLayout::ID *BC::uid(void)
+{
+ struct LayoutID final : public FeatureLayout::ID
+ {
+ };
+ static LayoutID id;
+ return &id;
+}
+
+// NOTE BC layout ignores row/col as its name suggests
+ElemID BC::at(uint32_t b, uint32_t ch, uint32_t /*row*/, uint32_t /*col*/) const
+{
+ assert(b < shape().batch());
+
+ uint32_t offset = 0;
+
+ offset += b * _shape.depth();
+ offset += ch;
+
+ return ElemID{offset};
+}
+
+std::unique_ptr<BC> BC::create(const nncc::core::ADT::feature::Shape &shape)
+{
+ // NOTE It is impossible to use make_unique here as the constructor is private
+ return std::unique_ptr<BC>{new BC{FeatureShape{shape}}};
+}
+
+} // namespace FeatureLayouts
+} // namespace coco
+
+//
+// Generic Layout
+//
+namespace coco
+{
+namespace FeatureLayouts
+{
+
+Generic::Generic(const FeatureShape &shape) : _shape{shape}
+{
+ _content.resize(_shape.batch() * num_elements(_shape));
+}
+
+const FeatureLayout::ID *Generic::uid(void)
+{
+ struct LayoutID final : public FeatureLayout::ID
+ {
+ };
+ static LayoutID id;
+ return &id;
+}
+
+uint32_t Generic::offset(uint32_t b, uint32_t ch, uint32_t row, uint32_t col) const
+{
+ static nncc::core::ADT::feature::CHWLayout l{};
+
+ uint32_t res = 0;
+
+ res += b * num_elements(_shape);
+ res += l.offset(shape(), ch, row, col);
+
+ return res;
+}
+
+ElemID &Generic::at(uint32_t b, uint32_t ch, uint32_t row, uint32_t col)
+{
+ return _content.at(offset(b, ch, row, col));
+}
+
+ElemID Generic::at(uint32_t b, uint32_t ch, uint32_t row, uint32_t col) const
+{
+ return _content.at(offset(b, ch, row, col));
+}
+
+void Generic::reorder(const nncc::core::ADT::feature::Layout &l)
+{
+ assert(shape().batch() == 1);
+
+ for (uint32_t ch = 0; ch < shape().depth(); ++ch)
+ {
+ for (uint32_t row = 0; row < shape().height(); ++row)
+ {
+ for (uint32_t col = 0; col < shape().width(); ++col)
+ {
+ at(0, ch, row, col) = ElemID{l.offset(shape(), ch, row, col)};
+ }
+ }
+ }
+}
+
+std::unique_ptr<Generic> Generic::create(const nncc::core::ADT::feature::Shape &shape)
+{
+ // NOTE It is impossible to use make_unique here as the constructor is private
+ return std::unique_ptr<Generic>{new Generic{shape}};
+}
+
+} // namespace FeatureLayouts
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/FeatureLayouts.test.cpp b/compiler/coco/core/src/IR/FeatureLayouts.test.cpp
new file mode 100644
index 000000000..9f9772dd8
--- /dev/null
+++ b/compiler/coco/core/src/IR/FeatureLayouts.test.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/FeatureLayouts.h"
+
+#include <gtest/gtest.h>
+
+using namespace nncc::core::ADT;
+
+TEST(FeatureLayoutsTest, BC)
+{
+ // NOTE The current implementation uses a hard-coded "batch" value
+ const uint32_t B = 1;
+ const uint32_t C = 3;
+ const uint32_t H = 4;
+ const uint32_t W = 5;
+
+ auto l = coco::FeatureLayouts::BC::create(feature::Shape{C, H, W});
+
+ ASSERT_EQ(l->batch(), B);
+ ASSERT_EQ(l->depth(), C);
+ ASSERT_EQ(l->height(), H);
+ ASSERT_EQ(l->width(), W);
+
+ // Check whether BC layout is actually channel-wise
+ for (uint32_t b = 0; b < B; ++b)
+ {
+ for (uint32_t ch = 0; ch < C; ++ch)
+ {
+ for (uint32_t row = 0; row < H; ++row)
+ {
+ for (uint32_t col = 0; col < W; ++col)
+ {
+ ASSERT_EQ(l->at(b, ch, 0, 0), l->at(b, ch, row, col));
+ }
+ }
+ }
+ }
+
+ // Check whether BC layout is actually channel-major
+ for (uint32_t b = 0; b < B; ++b)
+ {
+ for (uint32_t ch = 1; ch < C; ++ch)
+ {
+ ASSERT_EQ(l->at(b, ch - 1, 0, 0).value() + 1, l->at(b, ch, 0, 0).value());
+ }
+ }
+
+ for (uint32_t b = 1; b < B; ++b)
+ {
+ ASSERT_EQ(l->at(b - 1, C - 1, 0, 0).value() + 1, l->at(b, 0, 0, 0).value());
+ }
+}
diff --git a/compiler/coco/core/src/IR/FeatureObject.cpp b/compiler/coco/core/src/IR/FeatureObject.cpp
new file mode 100644
index 000000000..46de98874
--- /dev/null
+++ b/compiler/coco/core/src/IR/FeatureObject.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/FeatureObject.h"
+
+#include <cassert>
+
+namespace coco
+{
+
+FeatureObject::~FeatureObject()
+{
+ // DO NOTHING
+}
+
+const FeatureShape &FeatureObject::shape(void) const { return _layout->shape(); }
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/FeatureObject.test.cpp b/compiler/coco/core/src/IR/FeatureObject.test.cpp
new file mode 100644
index 000000000..23188f866
--- /dev/null
+++ b/compiler/coco/core/src/IR/FeatureObject.test.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/FeatureObject.h"
+#include "coco/IR/FeatureLayouts.h"
+
+#include <vector>
+#include <memory>
+
+#include <gtest/gtest.h>
+
+using namespace nncc::core::ADT;
+
+namespace
+{
+class FeatureObjectTest : public ::testing::Test
+{
+protected:
+ coco::FeatureObject *allocate()
+ {
+ auto o = new coco::FeatureObject{};
+ _allocated.emplace_back(o);
+ return o;
+ }
+
+ // TODO Deprecate this method
+ coco::FeatureObject *allocate(const coco::FeatureShape &shape)
+ {
+ auto o = new coco::FeatureObject{};
+ o->layout(coco::FeatureLayouts::Generic::create(shape));
+ _allocated.emplace_back(o);
+ return o;
+ }
+
+private:
+ std::vector<std::unique_ptr<coco::FeatureObject>> _allocated;
+};
+} // namespace
+
+TEST_F(FeatureObjectTest, ctor)
+{
+ const coco::FeatureShape shape{1, 3, 3};
+
+ auto o = allocate(shape);
+
+ ASSERT_EQ(o->shape(), shape);
+ ASSERT_EQ(o->kind(), coco::Object::Kind::Feature);
+}
+
+// TODO Reimplement this test as a test for GenericFeatureLayout
+#if 0
+TEST_F(FeatureObjectTest, at)
+{
+ const uint32_t C = 1;
+ const uint32_t H = 3;
+ const uint32_t W = 3;
+
+ const coco::FeatureShape shape{C, H, W};
+
+ auto o = allocate(shape);
+
+ coco::FeatureObject *mutable_ptr = o;
+ const coco::FeatureObject *immutable_ptr = o;
+
+ for (uint32_t ch = 0; ch < C; ++ch)
+ {
+ for (uint32_t row = 0; row < H; ++row)
+ {
+ for (uint32_t col = 0; col < W; ++col)
+ {
+ mutable_ptr->at(ch, row, col) = coco::ElemID{16};
+ }
+ }
+ }
+
+ for (uint32_t ch = 0; ch < C; ++ch)
+ {
+ for (uint32_t row = 0; row < H; ++row)
+ {
+ for (uint32_t col = 0; col < W; ++col)
+ {
+ ASSERT_EQ(immutable_ptr->at(ch, row, col).value(), 16);
+ }
+ }
+ }
+}
+#endif
+
+TEST_F(FeatureObjectTest, asFeature)
+{
+ const coco::FeatureShape shape{1, 3, 3};
+
+ auto o = allocate(shape);
+
+ coco::Object *mutable_object = o;
+ const coco::Object *immutable_object = o;
+
+ ASSERT_NE(mutable_object->asFeature(), nullptr);
+ ASSERT_EQ(mutable_object->asFeature(), immutable_object->asFeature());
+}
+
+TEST_F(FeatureObjectTest, casting_helpers)
+{
+ auto obj = allocate();
+
+ ASSERT_TRUE(coco::isa<coco::FeatureObject>(obj));
+ ASSERT_EQ(coco::cast<coco::FeatureObject>(obj), obj);
+ ASSERT_EQ(coco::safe_cast<coco::FeatureObject>(obj), obj);
+}
diff --git a/compiler/coco/core/src/IR/FeatureShape.test.cpp b/compiler/coco/core/src/IR/FeatureShape.test.cpp
new file mode 100644
index 000000000..ceeab02b7
--- /dev/null
+++ b/compiler/coco/core/src/IR/FeatureShape.test.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/FeatureShape.h"
+
+#include <gtest/gtest.h>
+
+TEST(FeatureShapeTest, constructor_with_4_arguments)
+{
+ const coco::FeatureShape shape{1, 2, 3, 4};
+
+ ASSERT_EQ(shape.batch(), 1);
+ ASSERT_EQ(shape.depth(), 2);
+ ASSERT_EQ(shape.height(), 3);
+ ASSERT_EQ(shape.width(), 4);
+}
diff --git a/compiler/coco/core/src/IR/Input.cpp b/compiler/coco/core/src/IR/Input.cpp
new file mode 100644
index 000000000..4385ac26c
--- /dev/null
+++ b/compiler/coco/core/src/IR/Input.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Input.h"
+
+#include <cassert>
+
+namespace coco
+{
+
+Input::Input(const nncc::core::ADT::tensor::Shape &shape) : Arg{shape}
+{
+ // DO NOT?HING
+}
+
+void Input::onTake(Bag *bag)
+{
+ assert(bag->input() == nullptr);
+ bag->input(this);
+}
+
+void Input::onRelease(Bag *bag)
+{
+ assert(bag->input() == this);
+ bag->input(nullptr);
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Input.test.cpp b/compiler/coco/core/src/IR/Input.test.cpp
new file mode 100644
index 000000000..7cc1731cc
--- /dev/null
+++ b/compiler/coco/core/src/IR/Input.test.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Input.h"
+#include "coco/IR/BagManager.h"
+
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+TEST(IR_INPUT, ctor_should_set_shape)
+{
+ const nncc::core::ADT::tensor::Shape shape{1, 3, 3, 1};
+ coco::Input input{shape};
+
+ ASSERT_EQ(input.shape(), shape);
+ ASSERT_TRUE(input.name().empty());
+}
+
+TEST(IR_INPUT, bag_update)
+{
+ // Create a bag
+ coco::BagManager bag_mgr;
+
+ auto bag = bag_mgr.create(9);
+
+ const nncc::core::ADT::tensor::Shape shape{1, 3, 3, 1};
+ coco::Input input{shape};
+
+ input.bag(bag);
+ ASSERT_EQ(input.bag(), bag);
+
+ // bag(...) method SHOULD update 'bag' type
+ ASSERT_TRUE(bag->isInput());
+}
+
+TEST(IR_INPUT, name_update)
+{
+ const nncc::core::ADT::tensor::Shape shape{1, 3, 3, 1};
+ coco::Input input{shape};
+
+ input.name("data");
+ ASSERT_EQ(input.name(), "data");
+}
+
+TEST(IR_INPUT, at)
+{
+ const Shape shape{1, 3, 3, 1};
+ coco::Input input{shape};
+
+ coco::Input *mutable_ptr = &input;
+ const coco::Input *immutable_ptr = &input;
+
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ mutable_ptr->at(e.current()) = coco::ElemID{16};
+ }
+
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ ASSERT_EQ(immutable_ptr->at(e.current()).value(), 16);
+ }
+}
diff --git a/compiler/coco/core/src/IR/InputManager.cpp b/compiler/coco/core/src/IR/InputManager.cpp
new file mode 100644
index 000000000..6d5b9470b
--- /dev/null
+++ b/compiler/coco/core/src/IR/InputManager.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/InputManager.h"
+
+#include <stdex/Memory.h>
+
+namespace coco
+{
+
+Input *InputManager::create(const nncc::core::ADT::tensor::Shape &shape)
+{
+ auto input = stdex::make_unique<Input>(shape);
+ modulize(input.get());
+ return take(std::move(input));
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/InputManager.test.cpp b/compiler/coco/core/src/IR/InputManager.test.cpp
new file mode 100644
index 000000000..be43113b4
--- /dev/null
+++ b/compiler/coco/core/src/IR/InputManager.test.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/InputManager.h"
+
+#include <gtest/gtest.h>
+
+TEST(IR_INPUT_MANAGER, make)
+{
+ coco::InputManager mgr;
+
+ const nncc::core::ADT::tensor::Shape shape{1, 3, 3, 1};
+ auto input = mgr.create(shape);
+
+ ASSERT_EQ(input->shape(), shape);
+}
diff --git a/compiler/coco/core/src/IR/Instr.cpp b/compiler/coco/core/src/IR/Instr.cpp
new file mode 100644
index 000000000..9f000ba1c
--- /dev/null
+++ b/compiler/coco/core/src/IR/Instr.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Instr.h"
+#include "coco/IR/Block.h"
+
+#include <cassert>
+
+namespace coco
+{
+
+template <> void DLinkedList<Instr, Block>::joined(Block *, Instr *curr_ins)
+{
+ assert(!curr_ins->index().valid());
+ uint32_t value = 0;
+
+ if (auto prev_ins = curr_ins->prev())
+ {
+ value = prev_ins->index().value() + 1;
+ }
+
+ for (auto ins = curr_ins; ins; ins = ins->next())
+ {
+ ins->_index.set(value++);
+ }
+}
+
+template <> void DLinkedList<Instr, Block>::leaving(Block *, Instr *curr_ins)
+{
+ assert(curr_ins->index().valid());
+ uint32_t value = curr_ins->index().value();
+
+ for (auto ins = curr_ins->next(); ins; ins = ins->next())
+ {
+ ins->_index.set(value++);
+ }
+
+ curr_ins->_index.reset();
+}
+
+template <> InstrList *DLinkedList<Instr, Block>::head(Block *b) { return b->instr(); }
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/InstrIndex.cpp b/compiler/coco/core/src/IR/InstrIndex.cpp
new file mode 100644
index 000000000..c447cfc42
--- /dev/null
+++ b/compiler/coco/core/src/IR/InstrIndex.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/InstrIndex.h"
+
+#include <cassert>
+
+namespace coco
+{
+
+void InstrIndex::set(uint32_t value)
+{
+ assert(value != undefined);
+ _value = value;
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/InstrIndex.test.cpp b/compiler/coco/core/src/IR/InstrIndex.test.cpp
new file mode 100644
index 000000000..40f5d49de
--- /dev/null
+++ b/compiler/coco/core/src/IR/InstrIndex.test.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/InstrIndex.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class InstrIndexTest : public ::testing::Test
+{
+};
+
+} // namespace
+
+TEST_F(InstrIndexTest, default_constructor)
+{
+ coco::InstrIndex ins_ind;
+
+ ASSERT_FALSE(ins_ind.valid());
+}
+
+TEST_F(InstrIndexTest, explicit_constructor)
+{
+ coco::InstrIndex ins_ind{3};
+
+ ASSERT_TRUE(ins_ind.valid());
+ ASSERT_EQ(ins_ind.value(), 3);
+}
+
+TEST_F(InstrIndexTest, operator_lt)
+{
+ // Valid index is always less than undefined one.
+ ASSERT_TRUE(coco::InstrIndex(3) < coco::InstrIndex());
+ ASSERT_TRUE(coco::InstrIndex(3) < coco::InstrIndex(4));
+}
diff --git a/compiler/coco/core/src/IR/InstrManager.cpp b/compiler/coco/core/src/IR/InstrManager.cpp
new file mode 100644
index 000000000..32f1cbf28
--- /dev/null
+++ b/compiler/coco/core/src/IR/InstrManager.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/InstrManager.h"
+
+#include "coco/IR/Op.h"
+
+#include <cassert>
+
+namespace coco
+{
+
+void InstrManager::destroy(Instr *ins)
+{
+ // ins SHOULD BE detached from any block before destroy call
+ assert(ins->parent() == nullptr);
+ release(ins);
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/InstrManager.test.cpp b/compiler/coco/core/src/IR/InstrManager.test.cpp
new file mode 100644
index 000000000..23d9f8e86
--- /dev/null
+++ b/compiler/coco/core/src/IR/InstrManager.test.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/InstrManager.h"
+#include "coco/IR/Op.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+// Dummy custom instruction for testing
+struct CustomInstr final : public coco::Instr
+{
+};
+
+class InstrManagerTest : public ::testing::Test
+{
+public:
+ virtual ~InstrManagerTest() = default;
+
+protected:
+ coco::InstrManager mgr;
+};
+} // namespace
+
+TEST_F(InstrManagerTest, create_Shuffle)
+{
+ auto ins = mgr.create<coco::Shuffle>();
+ ASSERT_NE(ins, nullptr);
+ mgr.destroy(ins);
+}
+
+TEST_F(InstrManagerTest, create_Custom)
+{
+ auto ins = mgr.create<CustomInstr>();
+ ASSERT_NE(ins, nullptr);
+ mgr.destroy(ins);
+}
diff --git a/compiler/coco/core/src/IR/KernelLayouts.cpp b/compiler/coco/core/src/IR/KernelLayouts.cpp
new file mode 100644
index 000000000..6e9a1575a
--- /dev/null
+++ b/compiler/coco/core/src/IR/KernelLayouts.cpp
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/KernelLayouts.h"
+
+#include <nncc/core/ADT/kernel/NCHWLayout.h>
+#include <nncc/core/ADT/kernel/NHWCLayout.h>
+
+#include <cassert>
+
+using namespace nncc::core::ADT::kernel;
+
+using nncc::core::ADT::kernel::num_elements;
+using nncc::core::ADT::kernel::Shape;
+
+//
+// NCHW Layout
+//
+namespace coco
+{
+namespace KernelLayouts
+{
+
+const KernelLayout::ID *NCHW::uid(void)
+{
+ struct LayoutID final : public KernelLayout::ID
+ {
+ };
+ static LayoutID id;
+ return &id;
+}
+
+ElemID NCHW::at(uint32_t n, uint32_t ch, uint32_t row, uint32_t col) const
+{
+ static NCHWLayout l;
+ return ElemID{l.offset(_shape, n, ch, row, col)};
+}
+
+std::unique_ptr<NCHW> NCHW::create(const nncc::core::ADT::kernel::Shape &shape)
+{
+ // NOTE It is impossible to use make_unique here as the constructor is private
+ return std::unique_ptr<NCHW>{new NCHW{shape}};
+}
+
+} // namespace KernelLayouts
+} // namespace coco
+
+//
+// NHWC Layout
+//
+namespace coco
+{
+namespace KernelLayouts
+{
+
+const KernelLayout::ID *NHWC::uid(void)
+{
+ struct LayoutID final : public KernelLayout::ID
+ {
+ };
+ static LayoutID id;
+ return &id;
+}
+
+ElemID NHWC::at(uint32_t n, uint32_t ch, uint32_t row, uint32_t col) const
+{
+ static NHWCLayout l;
+ return ElemID{l.offset(_shape, n, ch, row, col)};
+}
+
+std::unique_ptr<NHWC> NHWC::create(const nncc::core::ADT::kernel::Shape &shape)
+{
+ // NOTE It is impossible to use make_unique here as the constructor is private
+ return std::unique_ptr<NHWC>{new NHWC{shape}};
+}
+
+} // namespace KernelLayouts
+} // namespace coco
+
+//
+// Generic Layout
+//
+namespace
+{
+
+nncc::core::ADT::kernel::NCHWLayout l;
+
+} // namespace
+
+namespace coco
+{
+namespace KernelLayouts
+{
+
+Generic::Generic(const nncc::core::ADT::kernel::Shape &shape) : _shape{shape}
+{
+ _content.resize(num_elements(_shape));
+}
+
+const KernelLayout::ID *Generic::uid(void)
+{
+ struct LayoutID final : public KernelLayout::ID
+ {
+ };
+ static LayoutID id;
+ return &id;
+}
+
+ElemID &Generic::at(uint32_t n, uint32_t ch, uint32_t row, uint32_t col)
+{
+ return _content.at(l.offset(_shape, n, ch, row, col));
+}
+
+ElemID Generic::at(uint32_t n, uint32_t ch, uint32_t row, uint32_t col) const
+{
+ return _content.at(l.offset(_shape, n, ch, row, col));
+}
+
+void Generic::reorder(const nncc::core::ADT::kernel::Layout &l)
+{
+ for (uint32_t n = 0; n < shape().count(); ++n)
+ {
+ for (uint32_t ch = 0; ch < shape().depth(); ++ch)
+ {
+ for (uint32_t row = 0; row < shape().height(); ++row)
+ {
+ for (uint32_t col = 0; col < shape().width(); ++col)
+ {
+ at(n, ch, row, col) = ElemID{l.offset(shape(), n, ch, row, col)};
+ }
+ }
+ }
+ }
+}
+
+std::unique_ptr<Generic> Generic::create(const Shape &shape)
+{
+ return std::unique_ptr<Generic>{new Generic{shape}};
+}
+
+} // namespace KernelLayouts
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/KernelLayouts.test.cpp b/compiler/coco/core/src/IR/KernelLayouts.test.cpp
new file mode 100644
index 000000000..df13cb051
--- /dev/null
+++ b/compiler/coco/core/src/IR/KernelLayouts.test.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/KernelLayouts.h"
+
+#include <nncc/core/ADT/kernel/NCHWLayout.h>
+#include <nncc/core/ADT/kernel/NHWCLayout.h>
+
+#include <gtest/gtest.h>
+
+using namespace nncc::core::ADT;
+
+TEST(KernelLayoutsTest, NCHW_increment)
+{
+ const uint32_t N = 2;
+ const uint32_t C = 3;
+ const uint32_t H = 4;
+ const uint32_t W = 4;
+
+ auto l = coco::KernelLayouts::NCHW::create(kernel::Shape{N, C, H, W});
+
+ // check NCHW order
+ ASSERT_EQ(l->at(0, 0, 0, 1).value(), l->at(0, 0, 0, 0).value() + 1);
+ ASSERT_EQ(l->at(0, 0, 1, 0).value(), l->at(0, 0, 0, 0).value() + W);
+ ASSERT_EQ(l->at(0, 1, 0, 0).value(), l->at(0, 0, 0, 0).value() + H * W);
+ ASSERT_EQ(l->at(1, 0, 0, 0).value(), l->at(0, 0, 0, 0).value() + C * H * W);
+}
+
+TEST(KernelLayoutsTest, NHWC_increment)
+{
+ const uint32_t N = 2;
+ const uint32_t C = 3;
+ const uint32_t H = 4;
+ const uint32_t W = 4;
+
+ auto l = coco::KernelLayouts::NHWC::create(kernel::Shape{N, C, H, W});
+
+ // check NHWC order
+ ASSERT_EQ(l->at(0, 1, 0, 0).value(), l->at(0, 0, 0, 0).value() + 1);
+ ASSERT_EQ(l->at(0, 0, 0, 1).value(), l->at(0, 0, 0, 0).value() + C);
+ ASSERT_EQ(l->at(0, 0, 1, 0).value(), l->at(0, 0, 0, 0).value() + W * C);
+ ASSERT_EQ(l->at(1, 0, 0, 0).value(), l->at(0, 0, 0, 0).value() + H * W * C);
+}
+
+TEST(KernelLayoutsTest, Generic_increment)
+{
+ const uint32_t N = 2;
+ const uint32_t C = 3;
+ const uint32_t H = 4;
+ const uint32_t W = 4;
+
+ auto nchw = coco::KernelLayouts::Generic::create(kernel::Shape{N, C, H, W});
+ auto nhwc = coco::KernelLayouts::Generic::create(kernel::Shape{N, C, H, W});
+
+ // reorder
+ nchw->reorder(kernel::NCHWLayout());
+ nhwc->reorder(kernel::NHWCLayout());
+
+ // check NCHW order
+ ASSERT_EQ(nchw->at(0, 0, 0, 1).value(), nchw->at(0, 0, 0, 0).value() + 1);
+ ASSERT_EQ(nchw->at(0, 0, 1, 0).value(), nchw->at(0, 0, 0, 0).value() + W);
+ ASSERT_EQ(nchw->at(0, 1, 0, 0).value(), nchw->at(0, 0, 0, 0).value() + H * W);
+ ASSERT_EQ(nchw->at(1, 0, 0, 0).value(), nchw->at(0, 0, 0, 0).value() + C * H * W);
+
+ // check NHWC order
+ ASSERT_EQ(nhwc->at(0, 1, 0, 0).value(), nhwc->at(0, 0, 0, 0).value() + 1);
+ ASSERT_EQ(nhwc->at(0, 0, 0, 1).value(), nhwc->at(0, 0, 0, 0).value() + C);
+ ASSERT_EQ(nhwc->at(0, 0, 1, 0).value(), nhwc->at(0, 0, 0, 0).value() + W * C);
+ ASSERT_EQ(nhwc->at(1, 0, 0, 0).value(), nhwc->at(0, 0, 0, 0).value() + H * W * C);
+}
+
+TEST(KernelLayoutsTest, Generic_at)
+{
+ const uint32_t N = 2;
+ const uint32_t C = 3;
+ const uint32_t H = 4;
+ const uint32_t W = 4;
+
+ auto l = coco::KernelLayouts::Generic::create(kernel::Shape{N, C, H, W});
+
+ ASSERT_NE(l.get(), nullptr);
+
+ coco::KernelLayouts::Generic *mutable_ptr = l.get();
+ const coco::KernelLayouts::Generic *immutable_ptr = l.get();
+
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ for (uint32_t ch = 0; ch < C; ++ch)
+ {
+ for (uint32_t row = 0; row < H; ++row)
+ {
+ for (uint32_t col = 0; col < W; ++col)
+ {
+ mutable_ptr->at(n, ch, row, col) = coco::ElemID{16};
+ }
+ }
+ }
+ }
+
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ for (uint32_t ch = 0; ch < C; ++ch)
+ {
+ for (uint32_t row = 0; row < H; ++row)
+ {
+ for (uint32_t col = 0; col < W; ++col)
+ {
+ ASSERT_EQ(immutable_ptr->at(n, ch, row, col).value(), 16);
+ }
+ }
+ }
+ }
+}
diff --git a/compiler/coco/core/src/IR/KernelObject.cpp b/compiler/coco/core/src/IR/KernelObject.cpp
new file mode 100644
index 000000000..79c298b43
--- /dev/null
+++ b/compiler/coco/core/src/IR/KernelObject.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/KernelObject.h"
+#include "coco/IR/KernelLayouts.h"
+
+#include <nncc/core/ADT/kernel/NCHWLayout.h>
+
+namespace coco
+{
+
+KernelObject::KernelObject(const nncc::core::ADT::kernel::Shape &shape)
+{
+ _layout = KernelLayouts::Generic::create(shape);
+}
+
+KernelObject::~KernelObject()
+{
+ // DO NOTHING
+}
+
+const nncc::core::ADT::kernel::Shape &KernelObject::shape(void) const { return _layout->shape(); }
+
+ElemID KernelObject::at(uint32_t n, uint32_t ch, uint32_t row, uint32_t col) const
+{
+ return _layout->at(n, ch, row, col);
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/KernelObject.test.cpp b/compiler/coco/core/src/IR/KernelObject.test.cpp
new file mode 100644
index 000000000..f227764ca
--- /dev/null
+++ b/compiler/coco/core/src/IR/KernelObject.test.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/KernelObject.h"
+
+#include <vector>
+#include <memory>
+
+#include <gtest/gtest.h>
+
+using namespace nncc::core::ADT;
+
+namespace
+{
+class KernelObjectTest : public ::testing::Test
+{
+protected:
+ coco::KernelObject *allocate()
+ {
+ auto o = new coco::KernelObject{};
+ _allocated.emplace_back(o);
+ return o;
+ }
+
+ coco::KernelObject *allocate(const kernel::Shape &shape)
+ {
+ auto o = new coco::KernelObject{shape};
+ _allocated.emplace_back(o);
+ return o;
+ }
+
+private:
+ std::vector<std::unique_ptr<coco::KernelObject>> _allocated;
+};
+} // namespace
+
+TEST_F(KernelObjectTest, constructor)
+{
+ const nncc::core::ADT::kernel::Shape shape{1, 1, 3, 3};
+ auto o = allocate(shape);
+
+ ASSERT_EQ(o->shape(), shape);
+ ASSERT_EQ(o->kind(), coco::Object::Kind::Kernel);
+}
+
+TEST_F(KernelObjectTest, asKernel)
+{
+ const nncc::core::ADT::kernel::Shape shape{1, 1, 3, 3};
+ auto o = allocate(shape);
+
+ coco::Object *mutable_object = o;
+ const coco::Object *immutable_object = o;
+
+ ASSERT_NE(mutable_object->asKernel(), nullptr);
+ ASSERT_EQ(mutable_object->asKernel(), immutable_object->asKernel());
+}
+
+TEST_F(KernelObjectTest, casting_helpers)
+{
+ auto obj = allocate();
+
+ ASSERT_TRUE(coco::isa<coco::KernelObject>(obj));
+ ASSERT_EQ(coco::cast<coco::KernelObject>(obj), obj);
+ ASSERT_EQ(coco::safe_cast<coco::KernelObject>(obj), obj);
+}
diff --git a/compiler/coco/core/src/IR/Load.cpp b/compiler/coco/core/src/IR/Load.cpp
new file mode 100644
index 000000000..4985e9254
--- /dev/null
+++ b/compiler/coco/core/src/IR/Load.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Ops.h"
+
+#include <cassert>
+
+namespace coco
+{
+
+Load::Load() : _obj{this}
+{
+ // DO NOTHING
+}
+
+uint32_t Load::arity(void) const
+{
+ // Load has no child Op
+ return 0;
+}
+
+Op *Load::arg(uint32_t) const
+{
+ assert(!"Load has no argument");
+ return nullptr;
+}
+
+std::set<Object *> Load::uses(void) const
+{
+ std::set<Object *> res;
+
+ if (auto obj = object())
+ {
+ res.insert(obj);
+ }
+
+ return res;
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/MaxPool2D.test.cpp b/compiler/coco/core/src/IR/MaxPool2D.test.cpp
new file mode 100644
index 000000000..864edddb3
--- /dev/null
+++ b/compiler/coco/core/src/IR/MaxPool2D.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Ops.h"
+
+#include <memory>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+struct IsMaxPool2D : public coco::Op::Visitor<bool>
+{
+ bool visit(const coco::MaxPool2D *) override { return true; }
+};
+
+class MaxPool2DTest : public ::testing::Test
+{
+public:
+ MaxPool2DTest()
+ {
+ // DO NOTHING
+ }
+
+protected:
+ coco::MaxPool2D *allocate(void)
+ {
+ auto op = new coco::MaxPool2D;
+ _allocated.emplace_back(op);
+ return op;
+ }
+
+private:
+ std::vector<std::unique_ptr<coco::MaxPool2D>> _allocated;
+};
+} // namespace
+
+TEST_F(MaxPool2DTest, initialization)
+{
+ auto op = allocate();
+
+ coco::MaxPool2D *mutable_ptr = op;
+ const coco::MaxPool2D *immutable_ptr = op;
+
+ // uses() should be empty on construction
+ ASSERT_EQ(op->uses().size(), 0);
+ // parent() should be nullptr on construction
+ ASSERT_EQ(op->parent(), nullptr);
+
+ // arg() should be nullptr on construction
+ ASSERT_EQ(immutable_ptr->arg(), nullptr);
+
+ // window() SHOULD return a valid pointer
+ ASSERT_NE(mutable_ptr->window(), nullptr);
+ ASSERT_EQ(mutable_ptr->window(), immutable_ptr->window());
+
+ // stride() SHOULD return a valid pointer
+ ASSERT_NE(mutable_ptr->stride(), nullptr);
+ ASSERT_EQ(mutable_ptr->stride(), immutable_ptr->stride());
+
+ // pad() SHOULD return a valid pointer
+ ASSERT_NE(mutable_ptr->pad(), nullptr);
+ ASSERT_EQ(mutable_ptr->pad(), immutable_ptr->pad());
+}
+
+TEST_F(MaxPool2DTest, asMaxPool2D)
+{
+ auto op = allocate();
+
+ coco::Op *mutable_base = op;
+ const coco::Op *immutable_base = op;
+
+ ASSERT_EQ(mutable_base->asMaxPool2D(), op);
+ ASSERT_EQ(mutable_base->asMaxPool2D(), immutable_base->asMaxPool2D());
+}
+
+TEST_F(MaxPool2DTest, accept)
+{
+ // Test 'MaxPool2D' class
+ auto op = allocate();
+
+ coco::MaxPool2D *mutable_ptr = op;
+ const coco::MaxPool2D *immutable_ptr = op;
+
+ ASSERT_TRUE(mutable_ptr->accept(IsMaxPool2D{}));
+ ASSERT_TRUE(immutable_ptr->accept(IsMaxPool2D{}));
+}
diff --git a/compiler/coco/core/src/IR/Module.cpp b/compiler/coco/core/src/IR/Module.cpp
new file mode 100644
index 000000000..0b65ceedc
--- /dev/null
+++ b/compiler/coco/core/src/IR/Module.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Module.h"
+
+#include <stdex/Memory.h>
+
+using stdex::make_unique;
+
+namespace
+{
+
+struct EntityManagerImpl final : public coco::EntityManager
+{
+public:
+ std::unique_ptr<coco::BagManager> _bag;
+
+public:
+ coco::BagManager *bag(void) override { return _bag.get(); }
+ const coco::BagManager *bag(void) const override { return _bag.get(); }
+
+public:
+ std::unique_ptr<coco::ObjectManager> _object;
+
+public:
+ coco::ObjectManager *object(void) override { return _object.get(); }
+ const coco::ObjectManager *object(void) const override { return _object.get(); }
+
+public:
+ std::unique_ptr<coco::OpManager> _op;
+
+public:
+ coco::OpManager *op(void) override { return _op.get(); }
+ const coco::OpManager *op(void) const override { return _op.get(); }
+
+public:
+ coco::InstrManager *instr(void) override { return _instr.get(); }
+ const coco::InstrManager *instr(void) const override { return _instr.get(); }
+
+public:
+ coco::BlockManager *block(void) override { return _block.get(); }
+ const coco::BlockManager *block(void) const override { return _block.get(); }
+
+public:
+ std::unique_ptr<coco::InputManager> _input;
+
+public:
+ coco::InputManager *input(void) override { return _input.get(); }
+ const coco::InputManager *input(void) const override { return _input.get(); }
+
+public:
+ std::unique_ptr<coco::OutputManager> _output;
+
+public:
+ coco::OutputManager *output(void) override { return _output.get(); }
+ const coco::OutputManager *output(void) const override { return _output.get(); }
+
+public:
+ // WARN Do NOT change the order of these fields: _block -> _instr
+ //
+ // Note that each instruction may have a reference to a block, and
+ // the destructor of Instr accesses this 'block' reference.
+ //
+ // Thus, Instr entities SHOULD BE destructed before Block entities are destructed.
+ std::unique_ptr<coco::BlockManager> _block;
+ std::unique_ptr<coco::InstrManager> _instr;
+};
+
+} // namespace
+
+namespace
+{
+
+class ModuleImpl final : public coco::Module
+{
+public:
+ coco::EntityManager *entity(void) override { return _entity.get(); }
+ const coco::EntityManager *entity(void) const override { return _entity.get(); }
+
+public:
+ std::unique_ptr<coco::BlockList> _block;
+
+public:
+ coco::BlockList *block(void) override { return _block.get(); }
+ const coco::BlockList *block(void) const override { return _block.get(); }
+
+public:
+ std::unique_ptr<coco::InputList> _input;
+
+public:
+ coco::InputList *input(void) override { return _input.get(); }
+ const coco::InputList *input(void) const override { return _input.get(); }
+
+public:
+ std::unique_ptr<coco::OutputList> _output;
+
+public:
+ coco::OutputList *output(void) override { return _output.get(); }
+ const coco::OutputList *output(void) const override { return _output.get(); }
+
+public:
+ // WARN _entity SHOULD BE declared after _block in order to allow each Block(s) to detach itself.
+ //
+ // If not, Block is destructed after its corresponding BlockList is destructed, which results
+ // in invalid memory access during the update on BlockList (inside Block's destructor).
+ std::unique_ptr<coco::EntityManager> _entity;
+};
+
+} // namespace
+
+namespace coco
+{
+
+std::unique_ptr<Module> Module::create(void)
+{
+ auto m = make_unique<::ModuleImpl>();
+
+ auto mgr = make_unique<::EntityManagerImpl>();
+ {
+ mgr->_bag = make_unique<coco::BagManager>(m.get());
+ mgr->_object = make_unique<coco::ObjectManager>(m.get());
+ mgr->_op = make_unique<coco::OpManager>(m.get());
+ mgr->_instr = make_unique<coco::InstrManager>(m.get());
+ mgr->_block = make_unique<coco::BlockManager>(m.get());
+ mgr->_input = make_unique<coco::InputManager>(m.get());
+ mgr->_output = make_unique<coco::OutputManager>(m.get());
+ }
+ m->_entity = std::move(mgr);
+
+ m->_block = make_unique<coco::BlockList>(m.get());
+ m->_input = make_unique<coco::InputList>();
+ m->_output = make_unique<coco::OutputList>();
+
+ return std::move(m);
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Module.test.cpp b/compiler/coco/core/src/IR/Module.test.cpp
new file mode 100644
index 000000000..b55ceacb8
--- /dev/null
+++ b/compiler/coco/core/src/IR/Module.test.cpp
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Module.h"
+
+#include <gtest/gtest.h>
+
+TEST(IR_MODULE, create)
+{
+ auto m = coco::Module::create();
+
+ ASSERT_NE(m.get(), nullptr);
+
+ coco::Module *mutable_m = m.get();
+ const coco::Module *immutable_m = m.get();
+
+ ASSERT_NE(mutable_m->entity(), nullptr);
+ ASSERT_NE(immutable_m->entity(), nullptr);
+
+ ASSERT_NE(mutable_m->entity()->bag(), nullptr);
+ ASSERT_EQ(immutable_m->entity()->bag(), mutable_m->entity()->bag());
+
+ ASSERT_NE(mutable_m->entity()->object(), nullptr);
+ ASSERT_EQ(immutable_m->entity()->object(), mutable_m->entity()->object());
+
+ ASSERT_NE(mutable_m->entity()->op(), nullptr);
+ ASSERT_EQ(immutable_m->entity()->op(), mutable_m->entity()->op());
+
+ ASSERT_NE(mutable_m->entity()->instr(), nullptr);
+ ASSERT_EQ(immutable_m->entity()->instr(), mutable_m->entity()->instr());
+
+ ASSERT_NE(mutable_m->entity()->block(), nullptr);
+ ASSERT_EQ(immutable_m->entity()->block(), mutable_m->entity()->block());
+
+ ASSERT_NE(mutable_m->entity()->input(), nullptr);
+ ASSERT_EQ(immutable_m->entity()->input(), mutable_m->entity()->input());
+
+ ASSERT_NE(mutable_m->entity()->output(), nullptr);
+ ASSERT_EQ(immutable_m->entity()->output(), mutable_m->entity()->output());
+
+ ASSERT_NE(mutable_m->block(), nullptr);
+ ASSERT_EQ(immutable_m->block(), mutable_m->block());
+
+ ASSERT_NE(mutable_m->input(), nullptr);
+ ASSERT_EQ(immutable_m->input(), mutable_m->input());
+
+ ASSERT_NE(mutable_m->output(), nullptr);
+ ASSERT_EQ(immutable_m->output(), mutable_m->output());
+}
+
+TEST(IR_MODULE, append_two_blocks)
+{
+ auto m = coco::Module::create();
+
+ auto blk_1 = m->entity()->block()->create();
+ m->block()->append(blk_1);
+
+ auto blk_2 = m->entity()->block()->create();
+ m->block()->append(blk_2);
+
+ ASSERT_EQ(m->block()->head(), blk_1);
+ ASSERT_EQ(m->block()->tail(), blk_2);
+
+ ASSERT_EQ(blk_1->prev(), nullptr);
+ ASSERT_EQ(blk_1->next(), blk_2);
+
+ ASSERT_EQ(blk_2->prev(), blk_1);
+ ASSERT_EQ(blk_2->next(), nullptr);
+
+ ASSERT_EQ(blk_1->index().value(), 0);
+ ASSERT_EQ(blk_2->index().value(), 1);
+}
+
+TEST(IR_MODULE, append_two_instrs)
+{
+ auto m = coco::Module::create();
+
+ auto blk = m->entity()->block()->create();
+ auto ins_1 = m->entity()->instr()->create<coco::Eval>();
+ auto ins_2 = m->entity()->instr()->create<coco::Eval>();
+
+ blk->instr()->append(ins_1);
+ blk->instr()->append(ins_2);
+
+ ASSERT_EQ(blk->instr()->head(), ins_1);
+ ASSERT_EQ(blk->instr()->tail(), ins_2);
+
+ ASSERT_EQ(ins_1->parent(), blk);
+ ASSERT_EQ(ins_1->prev(), nullptr);
+ ASSERT_EQ(ins_1->next(), ins_2);
+
+ ASSERT_EQ(ins_2->parent(), blk);
+ ASSERT_EQ(ins_2->prev(), ins_1);
+ ASSERT_EQ(ins_2->next(), nullptr);
+
+ ASSERT_EQ(ins_1->index().value(), 0);
+ ASSERT_EQ(ins_2->index().value(), 1);
+}
+
+TEST(IR_MODULE, iterate_constant_block)
+{
+ auto m = coco::Module::create();
+ auto blk = m->entity()->block()->create();
+ auto ins_1 = m->entity()->instr()->create<coco::Eval>();
+ auto ins_2 = m->entity()->instr()->create<coco::Eval>();
+
+ blk->instr()->append(ins_1);
+ blk->instr()->append(ins_2);
+
+ const coco::Block *immutable_blk = blk;
+
+ ASSERT_EQ(immutable_blk->instr()->head(), ins_1);
+ ASSERT_EQ(immutable_blk->instr()->head()->next(), ins_2);
+}
+
+TEST(IR_MODULE, input_as_output)
+{
+ // Some NN frameworks allows users to use a network input as its output.
+ //
+ // For example, let us consider the following Caffe network
+ //
+ // name: "example"
+ // layer {
+ // name: "l"
+ // type: "Input"
+ // top: "data"
+ // input_param { shape: { dim: 1 dim: 1 dim: 3 dim: 3 } }
+ // }
+ //
+ // "data" blob is the input of this network, and it is also the output of this network.
+ const nncc::core::ADT::tensor::Shape shape{1, 1, 3, 3};
+
+ auto m = coco::Module::create();
+ auto bag = m->entity()->bag()->create(9);
+
+ auto input = m->entity()->input()->create(shape);
+ auto output = m->entity()->output()->create(shape);
+
+ input->name("data");
+ input->bag(bag);
+
+ output->name("data");
+ output->bag(bag);
+
+ ASSERT_TRUE(bag->isInput());
+ ASSERT_TRUE(bag->isOutput());
+
+ output->bag(nullptr);
+
+ ASSERT_TRUE(bag->isInput());
+ ASSERT_FALSE(bag->isOutput());
+}
+
+/**
+ * This test ensures that IR entities allocated via EntityManager have a correct module link
+ */
+TEST(IR_Module, create_entites)
+{
+ using namespace coco;
+ using namespace nncc::core::ADT;
+
+ auto m = Module::create();
+ auto entity = m->entity();
+
+ ASSERT_EQ(entity->bag()->create(1)->module(), m.get());
+ ASSERT_EQ(entity->object()->create<coco::FeatureObject>()->module(), m.get());
+ ASSERT_EQ(entity->object()->create<coco::KernelObject>()->module(), m.get());
+#define OP(Name) ASSERT_EQ(entity->op()->create<Name>()->module(), m.get());
+#include "coco/IR/Op.lst"
+#undef OP
+#define INSTR(Name) \
+ { \
+ auto ins = entity->instr()->create<Name>(); \
+ ASSERT_EQ(ins->module(), m.get()); \
+ ASSERT_TRUE(coco::isa<Name>(ins)); \
+ ASSERT_NE(coco::safe_cast<Name>(ins), nullptr); \
+ }
+#include "coco/IR/Instr.lst"
+#undef INSTR
+ ASSERT_EQ(entity->block()->create()->module(), m.get());
+ ASSERT_EQ(entity->input()->create(tensor::Shape{1})->module(), m.get());
+ ASSERT_EQ(entity->output()->create(tensor::Shape{1})->module(), m.get());
+}
diff --git a/compiler/coco/core/src/IR/Object.cpp b/compiler/coco/core/src/IR/Object.cpp
new file mode 100644
index 000000000..6a51a61a3
--- /dev/null
+++ b/compiler/coco/core/src/IR/Object.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Object.h"
+#include "coco/IR/Def.h"
+#include "coco/IR/Use.h"
+
+#include <cassert>
+#include <stdexcept>
+
+namespace coco
+{
+
+Object::Object()
+{
+ // Register self to Dep
+ _dep.object(this);
+}
+
+Def *Object::def(void) const { return _def; }
+
+void Object::def(Def *d)
+{
+ // This assert enforces users to explicitly reset def before update.
+ //
+ // Let's consider an object o with def d0.
+ //
+ // The following code is allowed:
+ // o->def(nullptr);
+ // o->def(d1);
+ //
+ // However, the following code is not allowed:
+ // o->def(d1);
+ //
+ assert((_def == nullptr) || (d == nullptr));
+ _def = d;
+}
+
+const UseSet *Object::uses(void) const { return &_uses; }
+UseSet *Object::mutable_uses(void) { return &_uses; }
+
+Object::Producer *producer(const Object *obj)
+{
+ if (auto d = obj->def())
+ {
+ return d->producer();
+ }
+
+ return nullptr;
+}
+
+Object::ConsumerSet consumers(const Object *obj)
+{
+ Object::ConsumerSet res;
+
+ for (const auto &use : *(obj->uses()))
+ {
+ if (auto consumer = use->consumer())
+ {
+ res.insert(consumer);
+ }
+ }
+
+ return res;
+}
+
+/**
+ * Casting Helpers
+ *
+ * TODO Use Macro to reduce code duplication
+ */
+template <> bool isa<FeatureObject>(const Object *o) { return o->asFeature() != nullptr; }
+template <> bool isa<KernelObject>(const Object *o) { return o->asKernel() != nullptr; }
+
+template <> FeatureObject *cast(Object *o)
+{
+ assert(o != nullptr);
+ auto res = o->asFeature();
+ assert(res != nullptr);
+ return res;
+}
+
+template <> KernelObject *cast(Object *o)
+{
+ assert(o != nullptr);
+ auto res = o->asKernel();
+ assert(res != nullptr);
+ return res;
+}
+
+template <> FeatureObject *safe_cast(Object *o)
+{
+ // NOTE o may be nullptr
+ return (o == nullptr) ? nullptr : o->asFeature();
+}
+
+template <> KernelObject *safe_cast(Object *o)
+{
+ // NOTE o may be nullptr
+ return (o == nullptr) ? nullptr : o->asKernel();
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Object.test.cpp b/compiler/coco/core/src/IR/Object.test.cpp
new file mode 100644
index 000000000..2a2e4db23
--- /dev/null
+++ b/compiler/coco/core/src/IR/Object.test.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Object.h"
+#include "coco/IR/BagManager.h"
+
+#include <vector>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+class ObjectTest : public ::testing::Test
+{
+protected:
+ coco::BagManager bag_mgr;
+};
+} // namespace
+
+namespace
+{
+namespace mock
+{
+struct Object : public coco::Object
+{
+public:
+ virtual ~Object() = default;
+};
+} // namespace mock
+} // namespace
+
+TEST_F(ObjectTest, ctor)
+{
+ ::mock::Object obj;
+
+ // Newly created object should not have a backing bag
+ ASSERT_EQ(obj.bag(), nullptr);
+
+ // Newly created object should not have def and uses
+ ASSERT_EQ(obj.def(), nullptr);
+ ASSERT_TRUE(obj.uses()->empty());
+}
+
+TEST_F(ObjectTest, bag_update)
+{
+ // Prepare bag
+ auto bag = bag_mgr.create(1);
+
+ // Test 'Object' class through a mock-up object
+ ::mock::Object obj;
+
+ obj.bag(bag);
+
+ // 'bag(Bag *)' should affect the return of 'bag(void)'
+ ASSERT_EQ(obj.bag(), bag);
+
+ // User SHOULD be able to access dependent objects through 'bag'
+ {
+ auto deps = coco::dependent_objects(bag);
+ ASSERT_EQ(deps.size(), 1);
+ ASSERT_EQ(deps.count(&obj), 1);
+ }
+
+ // Unlink Object-Bag relation
+ obj.bag(nullptr);
+
+ ASSERT_EQ(obj.bag(), nullptr);
+
+ {
+ auto deps = coco::dependent_objects(bag);
+ ASSERT_EQ(deps.size(), 0);
+ }
+}
+
+TEST_F(ObjectTest, destructor)
+{
+ auto bag = bag_mgr.create(1);
+
+ // Destruct Object after proper initialization
+ {
+ ::mock::Object obj;
+
+ obj.bag(bag);
+ }
+
+ // Object SHOULD be unlinked from Bag on destruction
+ {
+ auto deps = coco::dependent_objects(bag);
+ ASSERT_EQ(deps.size(), 0);
+ }
+}
+
+TEST_F(ObjectTest, safe_cast)
+{
+ ASSERT_EQ(coco::safe_cast<coco::FeatureObject>(nullptr), nullptr);
+ ASSERT_EQ(coco::safe_cast<coco::KernelObject>(nullptr), nullptr);
+}
diff --git a/compiler/coco/core/src/IR/ObjectManager.cpp b/compiler/coco/core/src/IR/ObjectManager.cpp
new file mode 100644
index 000000000..1b7215a04
--- /dev/null
+++ b/compiler/coco/core/src/IR/ObjectManager.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/ObjectManager.h"
+
+#include "coco/IR/FeatureObject.h"
+#include "coco/IR/KernelObject.h"
+
+#include <stdex/Memory.h>
+
+#include <cassert>
+
+using stdex::make_unique;
+
+namespace coco
+{
+
+template <> FeatureObject *ObjectManager::create(void)
+{
+ auto feature = make_unique<FeatureObject>();
+ modulize(feature.get());
+ return take(std::move(feature));
+}
+
+template <> KernelObject *ObjectManager::create(void)
+{
+ auto kernel = make_unique<KernelObject>();
+ modulize(kernel.get());
+ return take(std::move(kernel));
+}
+
+void ObjectManager::destroy(Object *o)
+{
+ assert(o->def() == nullptr);
+ assert(o->uses()->size() == 0);
+ release(o);
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/ObjectManager.test.cpp b/compiler/coco/core/src/IR/ObjectManager.test.cpp
new file mode 100644
index 000000000..781775f25
--- /dev/null
+++ b/compiler/coco/core/src/IR/ObjectManager.test.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/ObjectManager.h"
+#include "coco/IR/BagManager.h"
+
+#include "coco/IR/FeatureObject.h"
+#include "coco/IR/KernelObject.h"
+
+#include <gtest/gtest.h>
+
+TEST(IR_OBJECT_MANAGER, create_feature_with_template)
+{
+ coco::ObjectManager mgr;
+
+ auto feature = mgr.create<coco::FeatureObject>();
+
+ ASSERT_EQ(feature->layout(), nullptr);
+}
+
+TEST(IR_OBJECT_MANAGER, create_kernel_with_template)
+{
+ coco::ObjectManager mgr;
+
+ auto kernel = mgr.create<coco::KernelObject>();
+
+ ASSERT_EQ(kernel->layout(), nullptr);
+}
+
+TEST(IR_OBJECT_MANAGER, destroy)
+{
+ coco::BagManager bag_mgr;
+ coco::ObjectManager obj_mgr;
+
+ auto bag = bag_mgr.create(3);
+ auto feature = obj_mgr.create<coco::FeatureObject>();
+
+ feature->bag(bag);
+
+ obj_mgr.destroy(feature);
+
+ // Object SHOULD BE unlinked from its dependent bag on destruction
+ ASSERT_EQ(bag->deps()->size(), 0);
+}
diff --git a/compiler/coco/core/src/IR/Op.cpp b/compiler/coco/core/src/IR/Op.cpp
new file mode 100644
index 000000000..d3808a9d6
--- /dev/null
+++ b/compiler/coco/core/src/IR/Op.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Op.h"
+#include "coco/IR/Step.h"
+#include "coco/IR/Part.h"
+
+#include <pepper/assert.h>
+
+namespace coco
+{
+Op::~Op()
+{
+ // NOTE Op SHOULD NOT be referred by an instruction to be destructed
+ assert(_step == nullptr);
+}
+
+Instr *Op::parent(void) const
+{
+ // Get the parent instruction specified by _step for root nodes
+ if (_step)
+ {
+ // Op SHOULD BE a root node
+ assert(_part == nullptr);
+ assert(_step->instr() != nullptr);
+ return _step->instr();
+ }
+
+ // Get the parent instruction of its parent Op for non-root nodes
+ if (_part)
+ {
+ assert(_part->parent() != nullptr);
+ return _part->parent()->parent();
+ }
+
+ return nullptr;
+}
+
+Op *Op::up(void) const
+{
+ if (_part)
+ {
+ assert(_part->parent() != nullptr);
+ return _part->parent();
+ }
+ return nullptr;
+}
+
+//
+// UnaryOP trait
+//
+UnaryOp::UnaryOp() : _arg{this}
+{
+ // DO NOTHING
+}
+
+uint32_t UnaryOp::arity(void) const
+{
+ // There is only one argument
+ return 1;
+}
+
+Op *UnaryOp::arg(DBGARG(uint32_t, n)) const
+{
+ assert(n < 1);
+ return arg();
+}
+
+std::set<Object *> UnaryOp::uses(void) const
+{
+ std::set<Object *> res;
+
+ if (auto ifm = arg())
+ {
+ for (auto obj : ifm->uses())
+ {
+ res.insert(obj);
+ }
+ }
+
+ return res;
+}
+
+//
+// BinaryOp trait
+//
+BinaryOp::BinaryOp() : _left{this}, _right{this}
+{
+ // DO NOTHING
+}
+
+uint32_t BinaryOp::arity(void) const
+{
+ // There are two arguments
+ return 2;
+}
+
+Op *BinaryOp::arg(uint32_t n) const
+{
+ assert(n < arity());
+
+ return (n == 0) ? left() : right();
+}
+
+std::set<Object *> BinaryOp::uses(void) const
+{
+ std::set<Object *> res;
+
+ if (auto l = left())
+ {
+ for (auto obj : l->uses())
+ {
+ res.insert(obj);
+ }
+ }
+
+ if (auto r = right())
+ {
+ for (auto obj : r->uses())
+ {
+ res.insert(obj);
+ }
+ }
+
+ return res;
+}
+
+//
+// Additional Helpers
+//
+Op *root(Op *cur)
+{
+ while (cur->up())
+ {
+ cur = cur->up();
+ }
+ return cur;
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/OpManager.cpp b/compiler/coco/core/src/IR/OpManager.cpp
new file mode 100644
index 000000000..c87b704fe
--- /dev/null
+++ b/compiler/coco/core/src/IR/OpManager.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/OpManager.h"
+
+#include <stdex/Memory.h>
+
+#include <cassert>
+#include <queue>
+#include <set>
+
+using stdex::make_unique;
+
+namespace coco
+{
+
+OpManager::~OpManager()
+{
+ std::set<coco::Op *> roots;
+
+ for (uint32_t n = 0; n < size(); ++n)
+ {
+ auto op = at(n);
+
+ if (op->up() != nullptr)
+ {
+ continue;
+ }
+
+ roots.insert(op);
+ }
+
+ for (const auto &op : roots)
+ {
+ destroy_all(op);
+ }
+}
+
+//
+// Each Op class SHOULD be default constructible
+//
+#define OP(Name) \
+ template <> Name *OpManager::create<Name>(void) \
+ { \
+ auto op = make_unique<Name>(); \
+ modulize(op.get()); \
+ return take(std::move(op)); \
+ }
+#include "coco/IR/Op.lst"
+#undef OP
+
+void OpManager::destroy(Op *op)
+{
+ assert(op->parent() == nullptr);
+ release(op);
+}
+
+void OpManager::destroy_all(Op *op)
+{
+ assert(op->parent() == nullptr);
+ assert(op->up() == nullptr);
+
+ std::queue<coco::Op *> q;
+
+ q.emplace(op);
+
+ while (q.size() > 0)
+ {
+ auto cur = q.front();
+ q.pop();
+
+ // Insert child op nodes
+ for (uint32_t n = 0; n < cur->arity(); ++n)
+ {
+ if (auto child = cur->arg(n))
+ {
+ q.emplace(child);
+ }
+ }
+
+ // Destroy the current op node
+ destroy(cur);
+ }
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/OpManager.test.cpp b/compiler/coco/core/src/IR/OpManager.test.cpp
new file mode 100644
index 000000000..9d463b3e4
--- /dev/null
+++ b/compiler/coco/core/src/IR/OpManager.test.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/OpManager.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class OpManagerTest : public ::testing::Test
+{
+protected:
+ coco::OpManager mgr;
+};
+
+} // namespace
+
+TEST(IR_OP_MANAGER, create_Conv2D)
+{
+ coco::OpManager mgr;
+
+ auto obj = mgr.create<coco::Conv2D>();
+
+ ASSERT_NE(obj, nullptr);
+}
+
+TEST(IR_OP_MANAGER, create_AvgPool2D)
+{
+ coco::OpManager mgr;
+
+ auto obj = mgr.create<coco::AvgPool2D>();
+
+ ASSERT_NE(obj, nullptr);
+}
+
+TEST_F(OpManagerTest, ReLU)
+{
+ auto obj = mgr.create<coco::ReLU>();
+
+ ASSERT_NE(obj, nullptr);
+}
+
+TEST_F(OpManagerTest, ReLU6)
+{
+ auto obj = mgr.create<coco::ReLU6>();
+
+ ASSERT_NE(obj, nullptr);
+}
+
+TEST_F(OpManagerTest, Sqrt)
+{
+ auto obj = mgr.create<coco::Sqrt>();
+
+ ASSERT_NE(obj, nullptr);
+}
+
+TEST_F(OpManagerTest, Sub)
+{
+ auto obj = mgr.create<coco::Sub>();
+
+ ASSERT_NE(obj, nullptr);
+}
+
+TEST_F(OpManagerTest, Div)
+{
+ auto obj = mgr.create<coco::Div>();
+
+ ASSERT_NE(obj, nullptr);
+}
+
+TEST_F(OpManagerTest, PadF)
+{
+ auto op = mgr.create<coco::PadF>();
+ ASSERT_NE(op, nullptr);
+ mgr.destroy(op);
+}
+
+TEST_F(OpManagerTest, destroy)
+{
+ auto op = mgr.create<coco::Conv2D>();
+ mgr.destroy(op);
+ ASSERT_EQ(mgr.size(), 0);
+}
+
+TEST_F(OpManagerTest, destroy_all)
+{
+ // Create a Op tree
+ auto load_op = mgr.create<coco::Load>();
+ auto conv_op = mgr.create<coco::Conv2D>();
+
+ conv_op->arg(load_op);
+
+ mgr.destroy_all(conv_op);
+
+ ASSERT_EQ(mgr.size(), 0);
+}
+
+TEST_F(OpManagerTest, destroy_all_partial_tree)
+{
+ // Create a (partial) Op tree
+ auto conv_op = mgr.create<coco::Conv2D>();
+
+ mgr.destroy_all(conv_op);
+
+ ASSERT_EQ(mgr.size(), 0);
+}
diff --git a/compiler/coco/core/src/IR/Ops.cpp b/compiler/coco/core/src/IR/Ops.cpp
new file mode 100644
index 000000000..1c1ef5d28
--- /dev/null
+++ b/compiler/coco/core/src/IR/Ops.cpp
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Ops.h"
+
+namespace coco
+{
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Ops.test.cpp b/compiler/coco/core/src/IR/Ops.test.cpp
new file mode 100644
index 000000000..ae979b2bf
--- /dev/null
+++ b/compiler/coco/core/src/IR/Ops.test.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Ops.h"
+#include "coco/IR/ObjectManager.h"
+#include "coco/IR/OpManager.h"
+
+#include <vector>
+#include <memory>
+
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+using stdex::make_unique;
+
+/**
+ * Section: Add Op
+ */
+namespace
+{
+
+class AddTest : public ::testing::Test
+{
+public:
+ AddTest()
+ {
+ // DO NOTHING
+ }
+
+protected:
+ coco::Add *allocate(void)
+ {
+ auto op = new coco::Add;
+ _allocated.emplace_back(op);
+ return op;
+ }
+
+protected:
+ coco::ObjectManager obj_mgr;
+
+private:
+ std::vector<std::unique_ptr<coco::Op>> _allocated;
+};
+
+} // namespace
+
+TEST_F(AddTest, constructor)
+{
+ auto op = allocate();
+
+ ASSERT_EQ(op->left(), nullptr);
+ ASSERT_EQ(op->right(), nullptr);
+}
+
+/**
+ * Section: Mul Op
+ */
+TEST(MulTest, constructor)
+{
+ auto op = make_unique<coco::Mul>();
+
+ ASSERT_EQ(op->left(), nullptr);
+ ASSERT_EQ(op->right(), nullptr);
+}
+
+/**
+ * Section: Div Op
+ */
+TEST(DivTest, constructor)
+{
+ auto op = make_unique<coco::Div>();
+
+ ASSERT_EQ(op->left(), nullptr);
+ ASSERT_EQ(op->right(), nullptr);
+}
+
+/**
+ * Section: Op Helpers
+ */
+namespace
+{
+
+class OpHelperTest : public ::testing::Test
+{
+public:
+ OpHelperTest()
+ {
+ // DO NOTHING
+ }
+
+protected:
+ template <typename Op> Op *allocate(void) { return op_mgr.create<Op>(); }
+
+protected:
+ coco::ObjectManager obj_mgr;
+
+private:
+ coco::OpManager op_mgr;
+};
+
+} // namespace
+
+TEST_F(OpHelperTest, root)
+{
+ auto load = allocate<coco::Load>();
+
+ ASSERT_EQ(root(load), load);
+
+ auto avgpool = allocate<coco::AvgPool2D>();
+
+ avgpool->arg(load);
+
+ ASSERT_EQ(root(load), avgpool);
+ ASSERT_EQ(root(avgpool), avgpool);
+}
diff --git a/compiler/coco/core/src/IR/Output.cpp b/compiler/coco/core/src/IR/Output.cpp
new file mode 100644
index 000000000..7b6d1870b
--- /dev/null
+++ b/compiler/coco/core/src/IR/Output.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Output.h"
+
+#include <cassert>
+
+namespace coco
+{
+
+Output::Output(const nncc::core::ADT::tensor::Shape &shape) : Arg{shape}
+{
+ // DO NOTHING
+}
+
+void Output::onTake(Bag *bag)
+{
+ assert(bag->output() == nullptr);
+ bag->output(this);
+}
+
+void Output::onRelease(Bag *bag)
+{
+ assert(bag->output() == this);
+ bag->output(nullptr);
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Output.test.cpp b/compiler/coco/core/src/IR/Output.test.cpp
new file mode 100644
index 000000000..715a83875
--- /dev/null
+++ b/compiler/coco/core/src/IR/Output.test.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Output.h"
+#include "coco/IR/BagManager.h"
+
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+TEST(IR_OUTPUT, ctor_should_set_shape)
+{
+ const nncc::core::ADT::tensor::Shape shape{1, 3, 3, 1};
+ coco::Output output{shape};
+
+ ASSERT_EQ(output.shape(), shape);
+}
+
+TEST(IR_OUTPUT, bag_update)
+{
+ // Create a bag for test
+ coco::BagManager bag_mgr;
+
+ auto bag = bag_mgr.create(9);
+
+ const nncc::core::ADT::tensor::Shape shape{1, 3, 3, 1};
+ coco::Output output{shape};
+
+ output.bag(bag);
+ ASSERT_EQ(output.bag(), bag);
+
+ // bag(...) method SHOULD update 'bag' type
+ ASSERT_TRUE(bag->isOutput());
+
+ output.bag(nullptr);
+
+ // bag(nullptr) SHOULD revert 'bag' type
+ ASSERT_FALSE(bag->isOutput());
+}
+
+TEST(IR_OUTPUT, name_update)
+{
+ const nncc::core::ADT::tensor::Shape shape{1, 3, 3, 1};
+ coco::Output output{shape};
+
+ output.name("softmax");
+ ASSERT_EQ(output.name(), "softmax");
+}
+
+TEST(IR_OUTPUT, at)
+{
+ const Shape shape{1, 3, 3, 1};
+ coco::Output input{shape};
+
+ coco::Output *mutable_ptr = &input;
+ const coco::Output *immutable_ptr = &input;
+
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ mutable_ptr->at(e.current()) = coco::ElemID{16};
+ }
+
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ ASSERT_EQ(immutable_ptr->at(e.current()).value(), 16);
+ }
+}
diff --git a/compiler/coco/core/src/IR/OutputManager.cpp b/compiler/coco/core/src/IR/OutputManager.cpp
new file mode 100644
index 000000000..86b9580ac
--- /dev/null
+++ b/compiler/coco/core/src/IR/OutputManager.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/OutputManager.h"
+
+#include <stdex/Memory.h>
+
+namespace coco
+{
+
+Output *OutputManager::create(const nncc::core::ADT::tensor::Shape &shape)
+{
+ auto output = stdex::make_unique<Output>(shape);
+ modulize(output.get());
+ return take(std::move(output));
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/OutputManager.test.cpp b/compiler/coco/core/src/IR/OutputManager.test.cpp
new file mode 100644
index 000000000..80b38b42c
--- /dev/null
+++ b/compiler/coco/core/src/IR/OutputManager.test.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/OutputManager.h"
+
+#include <gtest/gtest.h>
+
+TEST(IR_OUTPUT_MANAGER, make)
+{
+ coco::OutputManager mgr;
+
+ const nncc::core::ADT::tensor::Shape shape{1, 3, 3, 1};
+ auto output = mgr.create(shape);
+
+ ASSERT_EQ(output->shape(), shape);
+}
diff --git a/compiler/coco/core/src/IR/PadF.test.cpp b/compiler/coco/core/src/IR/PadF.test.cpp
new file mode 100644
index 000000000..b443d86fb
--- /dev/null
+++ b/compiler/coco/core/src/IR/PadF.test.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Ops.h"
+
+#include <memory>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+struct IsPadF : public coco::Op::Visitor<bool>
+{
+ bool visit(const coco::PadF *) override { return true; }
+};
+
+class PadFTest : public ::testing::Test
+{
+public:
+ PadFTest()
+ {
+ // DO NOTHING
+ }
+
+protected:
+ coco::PadF *allocate(void)
+ {
+ auto op = new coco::PadF;
+ _allocated.emplace_back(op);
+ return op;
+ }
+
+private:
+ std::vector<std::unique_ptr<coco::PadF>> _allocated;
+};
+} // namespace
+
+TEST_F(PadFTest, initialization)
+{
+ auto op = allocate();
+
+ // uses() should be empty on construction
+ ASSERT_EQ(op->uses().size(), 0);
+ // parent() should be nullptr on construction
+ ASSERT_EQ(op->parent(), nullptr);
+
+ // arg() should be nullptr on construction
+ ASSERT_EQ(op->arg(), nullptr);
+
+ // pad() should be a valid
+ ASSERT_NE(op->pad(), nullptr);
+}
+
+TEST_F(PadFTest, asPadF)
+{
+ auto op = allocate();
+
+ coco::Op *mutable_base = op;
+ const coco::Op *immutable_base = op;
+
+ ASSERT_EQ(mutable_base->asPadF(), op);
+ ASSERT_EQ(mutable_base->asPadF(), immutable_base->asPadF());
+}
+
+TEST_F(PadFTest, accept)
+{
+ // Test 'PadF' class
+ auto op = allocate();
+
+ coco::PadF *mutable_ptr = op;
+ const coco::PadF *immutable_ptr = op;
+
+ ASSERT_TRUE(mutable_ptr->accept(IsPadF{}));
+ ASSERT_TRUE(immutable_ptr->accept(IsPadF{}));
+}
diff --git a/compiler/coco/core/src/IR/Padding2D.cpp b/compiler/coco/core/src/IR/Padding2D.cpp
new file mode 100644
index 000000000..8cdc42638
--- /dev/null
+++ b/compiler/coco/core/src/IR/Padding2D.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Padding2D.h"
+
+namespace coco
+{
+
+Padding2D &Padding2D::top(uint32_t value)
+{
+ _top = value;
+ return (*this);
+}
+
+Padding2D &Padding2D::bottom(uint32_t value)
+{
+ _bottom = value;
+ return (*this);
+}
+
+Padding2D &Padding2D::left(uint32_t value)
+{
+ _left = value;
+ return (*this);
+}
+
+Padding2D &Padding2D::right(uint32_t value)
+{
+ _right = value;
+ return (*this);
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Padding2D.test.cpp b/compiler/coco/core/src/IR/Padding2D.test.cpp
new file mode 100644
index 000000000..292ce7d17
--- /dev/null
+++ b/compiler/coco/core/src/IR/Padding2D.test.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Padding2D.h"
+
+#include <gtest/gtest.h>
+
+TEST(IR_PADDING, default_constructor)
+{
+ coco::Padding2D pad;
+
+ ASSERT_EQ(pad.top(), 0);
+ ASSERT_EQ(pad.bottom(), 0);
+ ASSERT_EQ(pad.left(), 0);
+ ASSERT_EQ(pad.right(), 0);
+}
+
+TEST(IR_PADDING, explicit_constructor_4)
+{
+ coco::Padding2D pad{1, 2, 3, 4};
+
+ ASSERT_EQ(pad.top(), 1);
+ ASSERT_EQ(pad.bottom(), 2);
+ ASSERT_EQ(pad.left(), 3);
+ ASSERT_EQ(pad.right(), 4);
+}
+
+TEST(IR_PADDING, update)
+{
+ coco::Padding2D pad;
+
+ pad.top(1).bottom(2).left(3).right(4);
+
+ ASSERT_EQ(pad.top(), 1);
+ ASSERT_EQ(pad.bottom(), 2);
+ ASSERT_EQ(pad.left(), 3);
+ ASSERT_EQ(pad.right(), 4);
+}
diff --git a/compiler/coco/core/src/IR/Part.cpp b/compiler/coco/core/src/IR/Part.cpp
new file mode 100644
index 000000000..bf68c1feb
--- /dev/null
+++ b/compiler/coco/core/src/IR/Part.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Part.h"
+#include "coco/IR/Op.h"
+
+#include <cassert>
+
+namespace coco
+{
+
+void Part::child(Op *c)
+{
+ if (_child != nullptr)
+ {
+ assert(_child->_part == this);
+ _child->_part = nullptr;
+ _child = nullptr;
+ }
+
+ assert(_child == nullptr);
+
+ if (c != nullptr)
+ {
+ assert(c->_part == nullptr);
+ assert(c->_step == nullptr);
+ _child = c;
+ _child->_part = this;
+ }
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Part.test.cpp b/compiler/coco/core/src/IR/Part.test.cpp
new file mode 100644
index 000000000..87e0e1516
--- /dev/null
+++ b/compiler/coco/core/src/IR/Part.test.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Part.h"
+#include "coco/IR/Op.h"
+
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+using stdex::make_unique;
+
+namespace
+{
+namespace mock
+{
+
+// TODO Inherit UnaryOp instead of Op
+struct Op final : public coco::Op
+{
+public:
+ Op() : _arg{this}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t arity(void) const final { return 1; }
+ coco::Op *arg(uint32_t n) const final { return arg(); }
+
+ std::set<coco::Object *> uses() const override { throw std::runtime_error{"Not supported"}; }
+
+public:
+ ::coco::Op *arg(void) const { return _arg.child(); }
+ void arg(::coco::Op *child) { _arg.child(child); }
+
+private:
+ coco::Part _arg;
+};
+
+} // namespace mock
+} // namespace
+
+TEST(PartTest, destructor)
+{
+ auto parent = make_unique<::mock::Op>();
+ auto child = make_unique<::mock::Op>();
+
+ parent->arg(child.get());
+ ASSERT_EQ(parent->arg(), child.get());
+ ASSERT_EQ(child->up(), parent.get());
+
+ parent.reset();
+
+ // NOTE parent SHOULD unlink itself from child on destruction
+ ASSERT_EQ(child->up(), nullptr);
+}
diff --git a/compiler/coco/core/src/IR/Producer.mock.h b/compiler/coco/core/src/IR/Producer.mock.h
new file mode 100644
index 000000000..ffc343ee8
--- /dev/null
+++ b/compiler/coco/core/src/IR/Producer.mock.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_PRODUCER_MOCK_H__
+#define __COCO_IR_PRODUCER_MOCK_H__
+
+#include "coco/IR/Object.h"
+
+namespace
+{
+namespace mock
+{
+struct Producer final : public coco::Object::Producer
+{
+ coco::Instr *loc(void) override { return nullptr; }
+};
+} // namespace mock
+} // namespace
+
+#endif // __COCO_IR_PRODUCER_MOCK_H__
diff --git a/compiler/coco/core/src/IR/ReLU.test.cpp b/compiler/coco/core/src/IR/ReLU.test.cpp
new file mode 100644
index 000000000..22ef1730e
--- /dev/null
+++ b/compiler/coco/core/src/IR/ReLU.test.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Ops.h"
+
+#include <memory>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+struct IsReLU : public coco::Op::Visitor<bool>
+{
+ bool visit(const coco::ReLU *) override { return true; }
+};
+
+class ReLUTest : public ::testing::Test
+{
+public:
+ ReLUTest()
+ {
+ // DO NOTHING
+ }
+
+protected:
+ coco::ReLU *allocate(void)
+ {
+ auto op = new coco::ReLU;
+ _allocated.emplace_back(op);
+ return op;
+ }
+
+private:
+ std::vector<std::unique_ptr<coco::ReLU>> _allocated;
+};
+} // namespace
+
+TEST_F(ReLUTest, initialization)
+{
+ auto op = allocate();
+
+ // uses() should be empty on construction
+ ASSERT_EQ(op->uses().size(), 0);
+ // parent() should be nullptr on construction
+ ASSERT_EQ(op->parent(), nullptr);
+
+ ASSERT_EQ(op->arg(), nullptr);
+}
+
+TEST_F(ReLUTest, asReLU)
+{
+ auto op = allocate();
+
+ coco::Op *mutable_base = op;
+ const coco::Op *immutable_base = op;
+
+ ASSERT_EQ(mutable_base->asReLU(), op);
+ ASSERT_EQ(mutable_base->asReLU(), immutable_base->asReLU());
+}
+
+TEST_F(ReLUTest, accept)
+{
+ // Test 'ReLU' class
+ auto op = allocate();
+
+ coco::ReLU *mutable_ptr = op;
+ const coco::ReLU *immutable_ptr = op;
+
+ ASSERT_TRUE(mutable_ptr->accept(IsReLU{}));
+ ASSERT_TRUE(immutable_ptr->accept(IsReLU{}));
+}
diff --git a/compiler/coco/core/src/IR/ReLU6.test.cpp b/compiler/coco/core/src/IR/ReLU6.test.cpp
new file mode 100644
index 000000000..dd148254f
--- /dev/null
+++ b/compiler/coco/core/src/IR/ReLU6.test.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Ops.h"
+
+#include <memory>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+struct IsReLU6 : public coco::Op::Visitor<bool>
+{
+ bool visit(const coco::ReLU6 *) override { return true; }
+};
+
+class ReLU6Test : public ::testing::Test
+{
+public:
+ ReLU6Test()
+ {
+ // DO NOTHING
+ }
+
+protected:
+ coco::ReLU6 *allocate(void)
+ {
+ auto op = new coco::ReLU6;
+ _allocated.emplace_back(op);
+ return op;
+ }
+
+private:
+ std::vector<std::unique_ptr<coco::ReLU6>> _allocated;
+};
+} // namespace
+
+TEST_F(ReLU6Test, initialization)
+{
+ auto op = allocate();
+
+ // uses() should be empty on construction
+ ASSERT_EQ(op->uses().size(), 0);
+ // parent() should be nullptr on construction
+ ASSERT_EQ(op->parent(), nullptr);
+
+ ASSERT_EQ(op->arg(), nullptr);
+}
+
+TEST_F(ReLU6Test, asReLU6)
+{
+ auto op = allocate();
+
+ coco::Op *mutable_base = op;
+ const coco::Op *immutable_base = op;
+
+ ASSERT_EQ(mutable_base->asReLU6(), op);
+ ASSERT_EQ(mutable_base->asReLU6(), immutable_base->asReLU6());
+}
+
+TEST_F(ReLU6Test, accept)
+{
+ // Test 'ReLU6' class
+ auto op = allocate();
+
+ coco::ReLU6 *mutable_ptr = op;
+ const coco::ReLU6 *immutable_ptr = op;
+
+ ASSERT_TRUE(mutable_ptr->accept(IsReLU6{}));
+ ASSERT_TRUE(immutable_ptr->accept(IsReLU6{}));
+}
diff --git a/compiler/coco/core/src/IR/Read.cpp b/compiler/coco/core/src/IR/Read.cpp
new file mode 100644
index 000000000..ea01cce1d
--- /dev/null
+++ b/compiler/coco/core/src/IR/Read.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Read.h"
+
+#include <cassert>
+
+namespace coco
+{
+
+Read::~Read()
+{
+ // Unlink self from Bag if there is a linked bag
+ bag(nullptr);
+}
+
+void Read::bag(Bag *bag)
+{
+ if (_bag)
+ {
+ _bag->mutable_reads()->erase(this);
+ _bag = nullptr;
+ }
+
+ assert(_bag == nullptr);
+
+ if (bag)
+ {
+ _bag = bag;
+ _bag->mutable_reads()->insert(this);
+ }
+
+ assert(_bag == bag);
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Read.test.cpp b/compiler/coco/core/src/IR/Read.test.cpp
new file mode 100644
index 000000000..7c36820a6
--- /dev/null
+++ b/compiler/coco/core/src/IR/Read.test.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Read.h"
+#include "coco/IR/BagManager.h"
+
+#include "Reader.mock.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+class ReadTest : public ::testing::Test
+{
+protected:
+ coco::BagManager bag_mgr;
+};
+} // namespace
+
+TEST_F(ReadTest, constructor)
+{
+ // TODO Rename 'read' as 'reader'
+ ::mock::Reader read;
+
+ // TODO Rename 'slot'
+ coco::Read slot{&read};
+
+ ASSERT_EQ(slot.bag(), nullptr);
+}
+
+TEST_F(ReadTest, value)
+{
+ // TODO Rename 'read' as 'reader'
+ ::mock::Reader read;
+
+ // TODO Rename 'slot'
+ coco::Read slot{&read};
+
+ auto bag = bag_mgr.create(16);
+
+ slot.bag(bag);
+
+ ASSERT_EQ(slot.bag(), bag);
+
+ ASSERT_EQ(bag->reads()->size(), 1);
+ ASSERT_NE(bag->reads()->find(&slot), bag->reads()->end());
+
+ slot.bag(nullptr);
+
+ ASSERT_EQ(slot.bag(), nullptr);
+
+ ASSERT_EQ(bag->reads()->size(), 0);
+}
+
+TEST_F(ReadTest, unlink_on_destruction)
+{
+ // TODO Rename 'read' as 'reader'
+ ::mock::Reader reader;
+
+ auto bag = bag_mgr.create(1);
+
+ {
+ coco::Read read{&reader};
+ read.bag(bag);
+ }
+
+ ASSERT_EQ(bag->reads()->size(), 0);
+}
diff --git a/compiler/coco/core/src/IR/Reader.mock.h b/compiler/coco/core/src/IR/Reader.mock.h
new file mode 100644
index 000000000..0965abfeb
--- /dev/null
+++ b/compiler/coco/core/src/IR/Reader.mock.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_READER_MOCK_H__
+#define __COCO_IR_READER_MOCK_H__
+
+#include "coco/IR/Bag.h"
+
+namespace
+{
+namespace mock
+{
+struct Reader final : public coco::Bag::Reader
+{
+ coco::Instr *loc(void) override { return nullptr; }
+};
+} // namespace mock
+} // namespace
+
+#endif // __COCO_IR_READER_MOCK_H__
diff --git a/compiler/coco/core/src/IR/Shuffle.cpp b/compiler/coco/core/src/IR/Shuffle.cpp
new file mode 100644
index 000000000..f8007dd1b
--- /dev/null
+++ b/compiler/coco/core/src/IR/Shuffle.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Instrs.h"
+
+namespace coco
+{
+
+uint32_t Shuffle::size(void) const { return _content.size(); }
+
+std::set<ElemID> Shuffle::range(void) const
+{
+ std::set<ElemID> res;
+
+ for (auto it = _content.begin(); it != _content.end(); ++it)
+ {
+ res.insert(it->first);
+ }
+
+ return res;
+}
+
+void Shuffle::insert(const ElemID &from, const ElemID &into) { _content[into] = from; }
+
+void Shuffle::from(Bag *b) { _from.bag(b); }
+void Shuffle::into(Bag *b) { _into.bag(b); }
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Shuffle.test.cpp b/compiler/coco/core/src/IR/Shuffle.test.cpp
new file mode 100644
index 000000000..f564c08c3
--- /dev/null
+++ b/compiler/coco/core/src/IR/Shuffle.test.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Instrs.h"
+#include "coco/IR/ObjectManager.h"
+#include "coco/IR/OpManager.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+class ShuffleTest : public ::testing::Test
+{
+public:
+ virtual ~ShuffleTest() = default;
+
+protected:
+ coco::Shuffle *allocate(void)
+ {
+ auto ins = new coco::Shuffle;
+ _allocated.emplace_back(ins);
+ return ins;
+ }
+
+private:
+ std::vector<std::unique_ptr<coco::Instr>> _allocated;
+};
+} // namespace
+
+TEST_F(ShuffleTest, constructor)
+{
+ auto ins = allocate();
+
+ ASSERT_EQ(ins->from(), nullptr);
+ ASSERT_EQ(ins->into(), nullptr);
+}
+
+TEST_F(ShuffleTest, asShuffle)
+{
+ auto ins = allocate();
+
+ coco::Instr *mutable_ptr = ins;
+ const coco::Instr *immutable_ptr = ins;
+
+ ASSERT_NE(mutable_ptr->asShuffle(), nullptr);
+ ASSERT_EQ(mutable_ptr->asShuffle(), immutable_ptr->asShuffle());
+}
+
+TEST_F(ShuffleTest, size)
+{
+ auto shuffle = allocate();
+
+ shuffle->insert(coco::ElemID{3}, coco::ElemID{2});
+ shuffle->insert(coco::ElemID{3}, coco::ElemID{5});
+
+ ASSERT_EQ(shuffle->size(), 2);
+ ASSERT_EQ(shuffle->range().size(), shuffle->size());
+}
+
+TEST_F(ShuffleTest, range)
+{
+ auto shuffle = allocate();
+
+ shuffle->insert(coco::ElemID{3}, coco::ElemID{2});
+ shuffle->insert(coco::ElemID{3}, coco::ElemID{5});
+
+ auto range = shuffle->range();
+
+ EXPECT_EQ(range.size(), 2);
+ EXPECT_NE(range.count(coco::ElemID{2}), 0);
+ EXPECT_NE(range.count(coco::ElemID{5}), 0);
+}
+
+TEST_F(ShuffleTest, defined)
+{
+ auto shuffle = allocate();
+
+ shuffle->insert(coco::ElemID{3}, coco::ElemID{2});
+
+ EXPECT_TRUE(shuffle->defined(coco::ElemID{2}));
+ EXPECT_FALSE(shuffle->defined(coco::ElemID{3}));
+}
diff --git a/compiler/coco/core/src/IR/Sqrt.test.cpp b/compiler/coco/core/src/IR/Sqrt.test.cpp
new file mode 100644
index 000000000..cf9b232ea
--- /dev/null
+++ b/compiler/coco/core/src/IR/Sqrt.test.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Ops.h"
+
+#include <memory>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+struct IsSqrt : public coco::Op::Visitor<bool>
+{
+ bool visit(const coco::Sqrt *) override { return true; }
+};
+
+class SqrtTest : public ::testing::Test
+{
+public:
+ SqrtTest()
+ {
+ // DO NOTHING
+ }
+
+protected:
+ coco::Sqrt *allocate(void)
+ {
+ auto op = new coco::Sqrt;
+ _allocated.emplace_back(op);
+ return op;
+ }
+
+private:
+ std::vector<std::unique_ptr<coco::Sqrt>> _allocated;
+};
+} // namespace
+
+TEST_F(SqrtTest, initialization)
+{
+ auto op = allocate();
+
+ // uses() should be empty on construction
+ ASSERT_EQ(op->uses().size(), 0);
+ // parent() should be nullptr on construction
+ ASSERT_EQ(op->parent(), nullptr);
+
+ ASSERT_EQ(op->arg(), nullptr);
+}
+
+TEST_F(SqrtTest, asSqrt)
+{
+ auto op = allocate();
+
+ coco::Op *mutable_base = op;
+ const coco::Op *immutable_base = op;
+
+ ASSERT_EQ(mutable_base->asSqrt(), op);
+ ASSERT_EQ(mutable_base->asSqrt(), immutable_base->asSqrt());
+}
+
+TEST_F(SqrtTest, accept)
+{
+ // Test 'Sqrt' class
+ auto op = allocate();
+
+ coco::Sqrt *mutable_ptr = op;
+ const coco::Sqrt *immutable_ptr = op;
+
+ ASSERT_TRUE(mutable_ptr->accept(IsSqrt{}));
+ ASSERT_TRUE(immutable_ptr->accept(IsSqrt{}));
+}
diff --git a/compiler/coco/core/src/IR/Step.cpp b/compiler/coco/core/src/IR/Step.cpp
new file mode 100644
index 000000000..04400d46b
--- /dev/null
+++ b/compiler/coco/core/src/IR/Step.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Step.h"
+#include "coco/IR/Op.h"
+
+#include <cassert>
+
+namespace coco
+{
+
+void Step::op(Op *o)
+{
+ if (_op != nullptr)
+ {
+ // Unlink step from _op
+ assert(_op->_step == this);
+ _op->_step = nullptr;
+
+ // Reset _op
+ _op = nullptr;
+ }
+
+ assert(_op == nullptr);
+
+ if (o)
+ {
+ // Update _op
+ _op = o;
+
+ // Link step to _op
+ assert(_op->_step == nullptr);
+ _op->_step = this;
+ }
+
+ assert(_op == o);
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Stride2D.cpp b/compiler/coco/core/src/IR/Stride2D.cpp
new file mode 100644
index 000000000..a034876ef
--- /dev/null
+++ b/compiler/coco/core/src/IR/Stride2D.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Stride2D.h"
+
+namespace coco
+{
+
+Stride2D &Stride2D::vertical(uint32_t value)
+{
+ _vertical = value;
+ return (*this);
+}
+
+Stride2D &Stride2D::horizontal(uint32_t value)
+{
+ _horizontal = value;
+ return (*this);
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Stride2D.test.cpp b/compiler/coco/core/src/IR/Stride2D.test.cpp
new file mode 100644
index 000000000..43d159ee0
--- /dev/null
+++ b/compiler/coco/core/src/IR/Stride2D.test.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Stride2D.h"
+
+#include <gtest/gtest.h>
+
+TEST(IR_STRIDE_2D, default_constructor)
+{
+ coco::Stride2D stride;
+
+ ASSERT_EQ(stride.vertical(), 1);
+ ASSERT_EQ(stride.horizontal(), 1);
+}
+
+TEST(IR_STRIDE_2D, explicit_constructor_4)
+{
+ coco::Stride2D stride{2, 3};
+
+ ASSERT_EQ(stride.vertical(), 2);
+ ASSERT_EQ(stride.horizontal(), 3);
+}
+
+TEST(IR_STRIDE_2D, update)
+{
+ coco::Stride2D stride;
+
+ stride.vertical(2).horizontal(3);
+
+ ASSERT_EQ(stride.vertical(), 2);
+ ASSERT_EQ(stride.horizontal(), 3);
+}
diff --git a/compiler/coco/core/src/IR/Sub.test.cpp b/compiler/coco/core/src/IR/Sub.test.cpp
new file mode 100644
index 000000000..6c8b9ba54
--- /dev/null
+++ b/compiler/coco/core/src/IR/Sub.test.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Ops.h"
+
+#include <memory>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+struct IsSub : public coco::Op::Visitor<bool>
+{
+ bool visit(const coco::Sub *) override { return true; }
+};
+
+class SubTest : public ::testing::Test
+{
+public:
+ SubTest()
+ {
+ // DO NOTHING
+ }
+
+protected:
+ coco::Sub *allocate(void)
+ {
+ auto op = new coco::Sub;
+ _allocated.emplace_back(op);
+ return op;
+ }
+
+private:
+ std::vector<std::unique_ptr<coco::Sub>> _allocated;
+};
+} // namespace
+
+TEST_F(SubTest, initialization)
+{
+ auto op = allocate();
+
+ // arguments should be empty on construction
+ ASSERT_EQ(op->left(), nullptr);
+ ASSERT_EQ(op->right(), nullptr);
+
+ // uses() should be empty on construction
+ ASSERT_EQ(op->uses().size(), 0);
+ // parent() should be nullptr on construction
+ ASSERT_EQ(op->parent(), nullptr);
+}
+
+TEST_F(SubTest, asSub)
+{
+ auto op = allocate();
+
+ coco::Op *mutable_base = op;
+ const coco::Op *immutable_base = op;
+
+ ASSERT_EQ(mutable_base->asSub(), op);
+ ASSERT_EQ(mutable_base->asSub(), immutable_base->asSub());
+}
+
+TEST_F(SubTest, accept)
+{
+ // Test 'Sub' class
+ auto op = allocate();
+
+ coco::Sub *mutable_ptr = op;
+ const coco::Sub *immutable_ptr = op;
+
+ ASSERT_TRUE(mutable_ptr->accept(IsSub{}));
+ ASSERT_TRUE(immutable_ptr->accept(IsSub{}));
+}
diff --git a/compiler/coco/core/src/IR/Update.cpp b/compiler/coco/core/src/IR/Update.cpp
new file mode 100644
index 000000000..8e81c85cf
--- /dev/null
+++ b/compiler/coco/core/src/IR/Update.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Update.h"
+
+#include <cassert>
+
+namespace coco
+{
+
+Update::~Update()
+{
+ // Unlink self from a linked bag if it exists
+ bag(nullptr);
+}
+
+void Update::bag(Bag *bag)
+{
+ if (_bag)
+ {
+ _bag->mutable_updates()->erase(this);
+ _bag = nullptr;
+ }
+
+ assert(_bag == nullptr);
+
+ if (bag)
+ {
+ _bag = bag;
+ _bag->mutable_updates()->insert(this);
+ }
+
+ assert(_bag == bag);
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Update.test.cpp b/compiler/coco/core/src/IR/Update.test.cpp
new file mode 100644
index 000000000..0bd355998
--- /dev/null
+++ b/compiler/coco/core/src/IR/Update.test.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Update.h"
+#include "coco/IR/BagManager.h"
+
+#include "Updater.mock.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+class UpdateTest : public ::testing::Test
+{
+protected:
+ coco::BagManager bag_mgr;
+};
+} // namespace
+
+TEST_F(UpdateTest, constructor)
+{
+ // TODO Rename 'update'
+ ::mock::Updater update;
+
+ // TODO Rename 'slot'
+ coco::Update slot{&update};
+
+ ASSERT_EQ(slot.bag(), nullptr);
+}
+
+TEST_F(UpdateTest, value)
+{
+ // TODO Rename 'update'
+ ::mock::Updater update;
+
+ // TODO Rename 'slot'
+ coco::Update slot{&update};
+
+ auto bag = bag_mgr.create(16);
+
+ slot.bag(bag);
+
+ ASSERT_EQ(slot.bag(), bag);
+
+ ASSERT_EQ(bag->updates()->size(), 1);
+ ASSERT_NE(bag->updates()->find(&slot), bag->updates()->end());
+
+ slot.bag(nullptr);
+
+ ASSERT_EQ(slot.bag(), nullptr);
+
+ ASSERT_EQ(bag->updates()->size(), 0);
+}
+
+TEST_F(UpdateTest, unlink_on_destruction)
+{
+ ::mock::Updater updater;
+
+ auto bag = bag_mgr.create(1);
+
+ {
+ coco::Update update{&updater};
+ update.bag(bag);
+ ASSERT_EQ(bag->updates()->size(), 1);
+ }
+
+ ASSERT_EQ(bag->updates()->size(), 0);
+}
diff --git a/compiler/coco/core/src/IR/Updater.mock.h b/compiler/coco/core/src/IR/Updater.mock.h
new file mode 100644
index 000000000..6441cdd02
--- /dev/null
+++ b/compiler/coco/core/src/IR/Updater.mock.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_UPDATER_MOCK_H__
+#define __COCO_IR_UPDATER_MOCK_H__
+
+#include "coco/IR/Bag.h"
+
+namespace
+{
+namespace mock
+{
+struct Updater final : public coco::Bag::Updater
+{
+ coco::Instr *loc(void) override { return nullptr; }
+};
+} // namespace mock
+} // namespace
+
+#endif // __COCO_IR_UPDATER_MOCK_H__
diff --git a/compiler/coco/core/src/IR/Use.cpp b/compiler/coco/core/src/IR/Use.cpp
new file mode 100644
index 000000000..cd9b68105
--- /dev/null
+++ b/compiler/coco/core/src/IR/Use.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Use.h"
+
+#include <cassert>
+
+namespace coco
+{
+
+void Use::value(Object *value)
+{
+ if (_value)
+ {
+ _value->mutable_uses()->erase(this);
+ _value = nullptr;
+ }
+
+ assert(_value == nullptr);
+
+ if (value)
+ {
+ _value = value;
+ _value->mutable_uses()->insert(this);
+ }
+
+ assert(_value == value);
+}
+
+} // namespace coco
diff --git a/compiler/coco/core/src/IR/Use.test.cpp b/compiler/coco/core/src/IR/Use.test.cpp
new file mode 100644
index 000000000..3191e9852
--- /dev/null
+++ b/compiler/coco/core/src/IR/Use.test.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Use.h"
+#include "coco/IR/ObjectManager.h"
+
+#include "coco/IR/FeatureObject.h"
+
+#include "Consumer.mock.h"
+
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+using stdex::make_unique;
+
+namespace
+{
+class UseTest : public ::testing::Test
+{
+protected:
+ coco::ObjectManager obj_mgr;
+};
+} // namespace
+
+TEST_F(UseTest, constructor)
+{
+ auto o = obj_mgr.create<coco::FeatureObject>();
+
+ // TODO Rename 'use'
+ ::mock::Consumer use;
+
+ coco::Use slot{&use};
+
+ ASSERT_EQ(slot.value(), nullptr);
+}
+
+TEST_F(UseTest, value)
+{
+ auto o = obj_mgr.create<coco::FeatureObject>();
+
+ // TODO Rename 'use'
+ ::mock::Consumer use;
+
+ coco::Use slot{&use};
+
+ slot.value(o);
+
+ ASSERT_EQ(slot.value(), o);
+
+ ASSERT_EQ(o->uses()->size(), 1);
+ ASSERT_NE(o->uses()->find(&slot), o->uses()->end());
+
+ slot.value(nullptr);
+
+ ASSERT_EQ(slot.value(), nullptr);
+
+ ASSERT_EQ(o->uses()->size(), 0);
+}
+
+TEST_F(UseTest, destructor)
+{
+ ::mock::Consumer consumer;
+
+ auto o = obj_mgr.create<coco::FeatureObject>();
+ auto use = make_unique<coco::Use>(&consumer);
+
+ use->value(o);
+ use.reset();
+
+ // ~Use SHOULD unlink itself from linked Object (if exists)
+ ASSERT_EQ(o->uses()->size(), 0);
+}
diff --git a/compiler/coco/core/src/IR/Window2D.test.cpp b/compiler/coco/core/src/IR/Window2D.test.cpp
new file mode 100644
index 000000000..c0e919237
--- /dev/null
+++ b/compiler/coco/core/src/IR/Window2D.test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Window2D.h"
+
+#include <gtest/gtest.h>
+
+TEST(IR_WINDOW_2D, default_constructor)
+{
+ coco::Window2D window;
+
+ ASSERT_EQ(window.height(), 1);
+ ASSERT_EQ(window.width(), 1);
+}
+
+TEST(IR_WINDOW_2D, explicit_constructor_4)
+{
+ coco::Window2D window{2, 3};
+
+ ASSERT_EQ(window.height(), 2);
+ ASSERT_EQ(window.width(), 3);
+}
+
+TEST(IR_WINDOW_2D, update)
+{
+ coco::Window2D window;
+
+ window.height(2);
+ window.width(3);
+
+ ASSERT_EQ(window.height(), 2);
+ ASSERT_EQ(window.width(), 3);
+}
diff --git a/compiler/coco/generic/CMakeLists.txt b/compiler/coco/generic/CMakeLists.txt
new file mode 100644
index 000000000..02fbf67f5
--- /dev/null
+++ b/compiler/coco/generic/CMakeLists.txt
@@ -0,0 +1,22 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(coco_generic SHARED ${SOURCES})
+target_include_directories(coco_generic PUBLIC include)
+target_link_libraries(coco_generic PUBLIC coco_core)
+target_link_libraries(coco_generic PRIVATE stdex)
+target_link_libraries(coco_generic PRIVATE nncc_common)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Google Test is required for internal testing
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(coco_generic_test ${TESTS})
+target_link_libraries(coco_generic_test coco_generic)
+# stdex is a PRIVATE dependency of coco_generic, and thus is not linked to coco_generic_test
+# even though coco_generic_test is linked to coco_generic
+target_link_libraries(coco_generic_test stdex)
diff --git a/compiler/coco/generic/include/coco/ADT/Span.h b/compiler/coco/generic/include/coco/ADT/Span.h
new file mode 100644
index 000000000..240e6afec
--- /dev/null
+++ b/compiler/coco/generic/include/coco/ADT/Span.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_ADT_SPAN_H__
+#define __COCO_ADT_SPAN_H__
+
+#include <cstdint>
+#include <cassert>
+
+namespace coco
+{
+
+/**
+ * @brief A Span is a non-owing reference to a memory chunk
+ *
+ * @note A Span DOES NOT OWN a memory chunk.
+ */
+template <typename T> class Span
+{
+public:
+ Span(T *data, uint32_t size) : _data{data}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ T *data(void) { return _data; }
+ const T *data(void) const { return _data; }
+
+public:
+ uint32_t size(void) const { return _size; }
+
+public:
+ T &operator[](uint32_t n)
+ {
+ assert(n < _size);
+ return *(_data + n);
+ }
+
+public:
+ const T &operator[](uint32_t n) const
+ {
+ assert(n < _size);
+ return *(_data + n);
+ }
+
+private:
+ T *_data;
+ uint32_t _size;
+};
+
+} // namespace coco
+
+#endif // __COCO_ADT_SPAN_H__
diff --git a/compiler/coco/generic/include/coco/IR/Data.h b/compiler/coco/generic/include/coco/IR/Data.h
new file mode 100644
index 000000000..0cbee85e9
--- /dev/null
+++ b/compiler/coco/generic/include/coco/IR/Data.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_DATA_H__
+#define __COCO_IR_DATA_H__
+
+#include "coco/IR/PlainWeightContext.h"
+
+#include <memory>
+
+namespace coco
+{
+
+/**
+ * @brief Core coco entity for constant weights
+ */
+struct Data
+{
+ virtual ~Data() = default;
+
+ /**
+ * @brief Return true if a given bag has an allocated weight data
+ */
+ virtual bool allocated(const coco::Bag *) const = 0;
+
+ /**
+ * @brief Release a memory chunk allocated for weight data of a given bag
+ *
+ * WARN Do NOT invoke release for a bag "b" for which allocated(b) does NOT hold
+ */
+ virtual void release(const coco::Bag *) = 0;
+
+ virtual PlainWeightContext<float> *f32(void) = 0;
+ virtual const PlainWeightContext<float> *f32(void) const = 0;
+
+ static std::unique_ptr<Data> create(void);
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_DATA_H__
diff --git a/compiler/coco/generic/include/coco/IR/PlainWeightContext.h b/compiler/coco/generic/include/coco/IR/PlainWeightContext.h
new file mode 100644
index 000000000..5100e9d90
--- /dev/null
+++ b/compiler/coco/generic/include/coco/IR/PlainWeightContext.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COCO_IR_PLAIN_WEIGHT_CONTEXT_H__
+#define __COCO_IR_PLAIN_WEIGHT_CONTEXT_H__
+
+#include "coco/IR/Bag.h"
+#include "coco/IR/KernelObject.h"
+
+#include "coco/ADT/Span.h"
+
+#include <nncc/core/ADT/kernel/Accessor.h>
+#include <nncc/core/ADT/kernel/Reader.h>
+
+#include <memory>
+
+namespace coco
+{
+
+/**
+ * @brief Non-quantized (plain) Weight Data Accessor
+ */
+template <typename T> struct PlainWeightContext
+{
+ virtual ~PlainWeightContext() = default;
+
+ /**
+ * @brief Allocate a weight space for a given blob
+ *
+ * @require the following code SHOULD work for any bag "b":
+ * PlainWeightContext<T> ctx;
+ *
+ * auto span = ctx.allocate(b);
+ * assert(span.data() != nullptr);
+ * assert(span.size() == bag->size());
+ */
+ virtual Span<T> allocate(const Bag *) = 0;
+
+ /**
+ * @brief Return a pointer to the underlying storage
+ *
+ * @note weight returns a null-span S for an invalid bag
+ * i.e S.data() == nullptr and S.size() == 0
+ */
+ virtual Span<T> weight(const Bag *) = 0;
+
+ virtual std::unique_ptr<nncc::core::ADT::kernel::Accessor<T>> access(const KernelObject *) = 0;
+ virtual std::unique_ptr<nncc::core::ADT::kernel::Reader<T>> read(const KernelObject *) const = 0;
+};
+
+} // namespace coco
+
+#endif // __COCO_IR_PLAIN_WEIGHT_CONTEXT_H__
diff --git a/compiler/coco/generic/src/ADT/Span.test.cpp b/compiler/coco/generic/src/ADT/Span.test.cpp
new file mode 100644
index 000000000..c313233a2
--- /dev/null
+++ b/compiler/coco/generic/src/ADT/Span.test.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/ADT/Span.h"
+
+#include <gtest/gtest.h>
+
+TEST(SpanTest, constructor)
+{
+ const uint32_t arr_size = 16;
+ int arr_data[arr_size];
+
+ coco::Span<int> span{arr_data, arr_size};
+
+ coco::Span<int> &ref = span;
+ const coco::Span<int> &cref = span;
+
+ ASSERT_EQ(ref.data(), arr_data);
+ ASSERT_EQ(cref.data(), arr_data);
+ ASSERT_EQ(ref.size(), arr_size);
+}
+
+TEST(SpanTest, array_subscript_operator)
+{
+ // Create a stack-allocated chunk
+ const uint32_t arr_size = 16;
+ int arr_data[arr_size];
+
+ for (uint32_t n = 0; n < arr_size; ++n)
+ {
+ arr_data[n] = n;
+ }
+
+ // Create a Span
+ coco::Span<int> span{arr_data, arr_size};
+
+ coco::Span<int> &ref = span;
+ const coco::Span<int> &cref = span;
+
+ ASSERT_EQ(ref[3], 3);
+ ASSERT_EQ(cref[3], 3);
+
+ arr_data[3] = 16;
+
+ ASSERT_EQ(ref[3], 16);
+ ASSERT_EQ(cref[3], 16);
+}
diff --git a/compiler/coco/generic/src/IR/Data.cpp b/compiler/coco/generic/src/IR/Data.cpp
new file mode 100644
index 000000000..b71947253
--- /dev/null
+++ b/compiler/coco/generic/src/IR/Data.cpp
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Data.h"
+
+#include <nncc/core/ADT/kernel/NCHWLayout.h>
+#include <nncc/core/ADT/kernel/Overlay.h>
+
+#include <stdex/Memory.h>
+
+#include <map>
+
+using namespace nncc::core::ADT;
+
+using stdex::make_unique;
+
+namespace
+{
+class BlobContext
+{
+public:
+ void allocate(const coco::Bag *b, uint32_t elemsize)
+ {
+ auto buffer = make_unique<std::vector<uint8_t>>();
+ buffer->resize(b->size() * elemsize);
+
+ _data[b] = std::move(buffer);
+ }
+
+ void release(const coco::Bag *b) { _data.erase(b); }
+
+public:
+ uint8_t *at(const coco::Bag *b)
+ {
+ auto it = _data.find(b);
+
+ if (it != _data.end())
+ {
+ return it->second->data();
+ }
+
+ return nullptr;
+ }
+
+public:
+ uint32_t size(const coco::Bag *b) const
+ {
+ auto it = _data.find(b);
+
+ if (it != _data.end())
+ {
+ return it->second->size();
+ }
+
+ return 0;
+ }
+
+private:
+ std::map<const coco::Bag *, std::unique_ptr<std::vector<uint8_t>>> _data;
+};
+}
+
+namespace
+{
+
+template <typename T> class KernelOverlay : public kernel::Reader<T>, public kernel::Accessor<T>
+{
+public:
+ KernelOverlay(T *base, const coco::KernelObject *object) : _base{base}, _object{object}
+ {
+ // DO NOTHING
+ }
+
+public:
+ T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const override
+ {
+ assert(_object->layout() != nullptr);
+ auto offset = _object->layout()->at(nth, ch, row, col);
+ return *(_base + offset.value());
+ }
+
+public:
+ T &at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) override
+ {
+ assert(_object->layout() != nullptr);
+ auto offset = _object->layout()->at(nth, ch, row, col);
+ return *(_base + offset.value());
+ }
+
+private:
+ T *_base;
+ const coco::KernelObject *_object;
+};
+
+} // namespace
+
+namespace
+{
+template <typename T> class PlainWeightContextImpl final : public coco::PlainWeightContext<T>
+{
+public:
+ PlainWeightContextImpl(BlobContext *blob) : _blob{blob}
+ {
+ // DO NOTHING
+ }
+
+public:
+ PlainWeightContextImpl(const PlainWeightContextImpl &) = delete;
+ PlainWeightContextImpl(PlainWeightContextImpl &&) = delete;
+
+public:
+ coco::Span<T> allocate(const coco::Bag *bag) override
+ {
+ assert(bag != nullptr);
+ _blob->allocate(bag, sizeof(T));
+ return weight(bag);
+ }
+
+ coco::Span<T> weight(const coco::Bag *b) override
+ {
+ // TODO Check type later
+ if (auto data = _blob->at(b))
+ {
+ uint32_t byte_size = _blob->size(b);
+ assert(byte_size % sizeof(T) == 0);
+ uint32_t elem_size = static_cast<uint32_t>(byte_size / sizeof(T));
+
+ return coco::Span<T>{reinterpret_cast<T *>(data), elem_size};
+ }
+
+ return coco::Span<T>{nullptr, 0};
+ }
+
+public:
+ std::unique_ptr<kernel::Accessor<T>> access(const coco::KernelObject *o) override
+ {
+ auto b = o->bag();
+ assert(b != nullptr);
+
+ if (auto base = reinterpret_cast<T *>(_blob->at(b)))
+ {
+ return make_unique<KernelOverlay<T>>(base, o);
+ }
+
+ return nullptr;
+ }
+
+public:
+ std::unique_ptr<kernel::Reader<T>> read(const coco::KernelObject *o) const override
+ {
+ auto b = o->bag();
+ assert(b != nullptr);
+
+ if (auto base = reinterpret_cast<T *>(_blob->at(b)))
+ {
+ return make_unique<KernelOverlay<T>>(base, o);
+ }
+
+ return nullptr;
+ }
+
+private:
+ BlobContext *const _blob;
+};
+} // namespace
+
+namespace
+{
+struct DataImpl final : public coco::Data
+{
+ std::unique_ptr<BlobContext> _blob;
+ std::unique_ptr<PlainWeightContextImpl<float>> _fp32;
+
+ bool allocated(const coco::Bag *b) const override { return _blob->at(b) != nullptr; }
+
+ void release(const coco::Bag *b) override
+ {
+ assert(allocated(b));
+ _blob->release(b);
+ }
+
+ coco::PlainWeightContext<float> *f32(void) override { return _fp32.get(); }
+ const coco::PlainWeightContext<float> *f32(void) const override { return _fp32.get(); }
+};
+} // namespace
+
+namespace coco
+{
+
+std::unique_ptr<Data> Data::create(void)
+{
+ auto blob = make_unique<BlobContext>();
+ auto fp32 = make_unique<PlainWeightContextImpl<float>>(blob.get());
+
+ auto data = make_unique<DataImpl>();
+
+ data->_blob = std::move(blob);
+ data->_fp32 = std::move(fp32);
+
+ // GCC 4.9 tries to copy data (while GCC 6.X doesn't)
+ return std::move(data);
+}
+
+} // namespace coco
diff --git a/compiler/coco/generic/src/IR/Data.test.cpp b/compiler/coco/generic/src/IR/Data.test.cpp
new file mode 100644
index 000000000..1029dfe9f
--- /dev/null
+++ b/compiler/coco/generic/src/IR/Data.test.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coco/IR/Data.h"
+#include "coco/IR/Module.h"
+#include "coco/IR/KernelLayouts.h"
+
+#include <nncc/core/ADT/kernel/NCHWLayout.h>
+
+#include <gtest/gtest.h>
+
+TEST(IR_DATA, construct)
+{
+ auto data = coco::Data::create();
+
+ coco::Data *mutable_ptr = data.get();
+ const coco::Data *immutable_ptr = data.get();
+
+ ASSERT_NE(mutable_ptr->f32(), nullptr);
+ ASSERT_EQ(mutable_ptr->f32(), immutable_ptr->f32());
+}
+
+TEST(IR_DATA, allocate_and_link_bag)
+{
+ auto m = coco::Module::create();
+ auto d = coco::Data::create();
+
+ // Create a bag
+ auto bag = m->entity()->bag()->create(9);
+
+ // weight(...) SHOULD return a null-span for an invalid bag
+ {
+ auto span = d->f32()->weight(bag);
+
+ ASSERT_EQ(span.data(), nullptr);
+ ASSERT_EQ(span.size(), 0);
+ }
+
+ // Allocate a weight space
+ {
+ auto allocated_span = d->f32()->allocate(bag);
+
+ ASSERT_NE(allocated_span.data(), nullptr);
+ ASSERT_EQ(allocated_span.size(), bag->size());
+
+ auto retrieved_span = d->f32()->weight(bag);
+
+ ASSERT_EQ(allocated_span.data(), retrieved_span.data());
+ ASSERT_EQ(allocated_span.size(), retrieved_span.size());
+ }
+}
diff --git a/compiler/coco/requires.cmake b/compiler/coco/requires.cmake
new file mode 100644
index 000000000..654db88c3
--- /dev/null
+++ b/compiler/coco/requires.cmake
@@ -0,0 +1 @@
+require("angkor")
diff --git a/compiler/cwrap/CMakeLists.txt b/compiler/cwrap/CMakeLists.txt
new file mode 100644
index 000000000..e1ae4d0b5
--- /dev/null
+++ b/compiler/cwrap/CMakeLists.txt
@@ -0,0 +1,22 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(cwrap STATIC ${SOURCES})
+set_target_properties(cwrap PROPERTIES POSITION_INDEPENDENT_CODE ON)
+set_target_properties(cwrap PROPERTIES LINKER_LANGUAGE CXX)
+target_include_directories(cwrap PUBLIC include)
+# Let's apply nncc common compile options
+# NOTE This will enable strict compilation (warnings as error).
+# Please refer to top-level CMakeLists.txt for details
+target_link_libraries(cwrap PRIVATE nncc_common)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Google Test is mandatory for testing
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(cwrap_test ${TESTS})
+target_link_libraries(cwrap_test cwrap)
diff --git a/compiler/cwrap/README.md b/compiler/cwrap/README.md
new file mode 100644
index 000000000..5440ca3f9
--- /dev/null
+++ b/compiler/cwrap/README.md
@@ -0,0 +1,23 @@
+# cwrap
+
+_cwrap_ is a collection of C++ wrappers for POSIX C API.
+
+## How to use
+
+Currently it supports only file descriptor.
+
+## Example
+- File Descriptor
+
+```cpp
+cwrap::Fildes fildes{open(path.c_str(), O_RDONLY)};
+
+if (fildes.get() < 0)
+{
+ std::ostringstream ostr;
+ ostr << "Error: " << path << " not found" << std::endl;
+ throw std::runtime_error{ostr.str()};
+}
+
+google::protobuf::io::FileInputStream fis(fildes.get());
+```
diff --git a/compiler/cwrap/include/cwrap/Fildes.h b/compiler/cwrap/include/cwrap/Fildes.h
new file mode 100644
index 000000000..f1061cc57
--- /dev/null
+++ b/compiler/cwrap/include/cwrap/Fildes.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CWRAP_FILDES_H__
+#define __CWRAP_FILDES_H__
+
+namespace cwrap
+{
+
+/**
+ * @brief POSIX File Descriptor
+ *
+ * @note Fildes owns underlying file descriptor
+ */
+class Fildes final
+{
+public:
+ Fildes();
+ explicit Fildes(int value);
+
+ // NOTE Copy is not allowed
+ Fildes(const Fildes &) = delete;
+ Fildes(Fildes &&);
+
+ ~Fildes();
+
+public:
+ Fildes &operator=(Fildes &&);
+
+public:
+ int get(void) const;
+ void set(int value);
+
+ int release(void);
+
+private:
+ int _value;
+};
+
+bool valid(const Fildes &);
+
+} // namespace cwrap
+
+#endif // __CWRAP_FILDES_H__
diff --git a/compiler/cwrap/src/Fildes.cpp b/compiler/cwrap/src/Fildes.cpp
new file mode 100644
index 000000000..5ccb83f05
--- /dev/null
+++ b/compiler/cwrap/src/Fildes.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cwrap/Fildes.h"
+
+#include <cassert>
+#include <unistd.h>
+
+namespace
+{
+
+/**
+ * @note making inline to this function will prevent unused function error
+ * as error_value() is used only inside assert()
+ */
+inline bool error_value(int fd) { return fd == -1; }
+
+inline bool valid_value(int fd) { return fd >= 0; }
+
+} // namespace
+
+namespace cwrap
+{
+
+Fildes::Fildes() : _value{-1}
+{
+ // DO NOTHING
+}
+
+Fildes::Fildes(int value) : _value{value}
+{
+ // DO NOTHING
+ assert(error_value(value) || valid_value(value));
+}
+
+Fildes::Fildes(Fildes &&fildes)
+{
+ set(fildes.release());
+ assert(error_value(fildes.get()));
+}
+
+Fildes::~Fildes()
+{
+ assert(error_value(_value) || valid_value(_value));
+
+ if (valid_value(_value))
+ {
+ close(_value);
+ _value = -1;
+ }
+
+ assert(error_value(_value));
+}
+
+Fildes &Fildes::operator=(Fildes &&fildes)
+{
+ set(fildes.release());
+ return (*this);
+}
+
+int Fildes::get(void) const { return _value; }
+
+void Fildes::set(int value)
+{
+ assert(error_value(_value) || valid_value(_value));
+
+ if (valid_value(_value))
+ {
+ close(_value);
+ _value = -1;
+ }
+ assert(error_value(_value));
+
+ _value = value;
+ assert(_value == value);
+}
+
+int Fildes::release(void)
+{
+ int res = get();
+ _value = -1;
+ return res;
+}
+
+bool valid(const Fildes &fildes) { return valid_value(fildes.get()); }
+
+} // namespace cwrap
diff --git a/compiler/cwrap/src/Fildes.test.cpp b/compiler/cwrap/src/Fildes.test.cpp
new file mode 100644
index 000000000..08e1e2a5e
--- /dev/null
+++ b/compiler/cwrap/src/Fildes.test.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cwrap/Fildes.h"
+
+#include <cstdlib>
+#include <string>
+#include <stdexcept>
+
+#include <unistd.h>
+#include <fcntl.h>
+
+#include <gtest/gtest.h>
+
+#define DECLARE_TEMPLATE(NAME) char NAME[] = "FILDES-TEST-XXXXXX"
+
+namespace
+{
+
+int make_temp(char *name_template)
+{
+ int fd = mkstemp(name_template);
+
+ if (fd == -1)
+ {
+ throw std::runtime_error{"mkstemp failed"};
+ }
+
+ return fd;
+}
+
+} // namespace make_temp
+
+TEST(FildesTest, default_constructor)
+{
+ cwrap::Fildes fildes;
+
+ ASSERT_FALSE(cwrap::valid(fildes));
+}
+
+TEST(FildesTest, value_constructor)
+{
+ DECLARE_TEMPLATE(name_template);
+
+ cwrap::Fildes fildes{make_temp(name_template)};
+
+ ASSERT_TRUE(cwrap::valid(fildes));
+}
+
+TEST(FildesTest, move_constructor)
+{
+ DECLARE_TEMPLATE(src_template);
+ DECLARE_TEMPLATE(dst_template);
+
+ int src_fd = make_temp(src_template);
+ int dst_fd = make_temp(dst_template);
+
+ cwrap::Fildes src{src_fd};
+ cwrap::Fildes dst{dst_fd};
+
+ dst = std::move(src);
+
+ ASSERT_FALSE(cwrap::valid(src));
+ ASSERT_TRUE(cwrap::valid(dst));
+
+ ASSERT_EQ(dst.get(), src_fd);
+
+ // "src_fd" SHOULD be valid, and "dst_fd" SHOULD be closed
+ ASSERT_NE(fcntl(src_fd, F_GETFD), -1);
+ ASSERT_EQ(fcntl(dst_fd, F_GETFD), -1);
+}
+
+TEST(FildesTest, destructor)
+{
+ DECLARE_TEMPLATE(name_template);
+
+ int fd = make_temp(name_template);
+
+ ASSERT_NE(fcntl(fd, F_GETFD), -1);
+ {
+ cwrap::Fildes fildes{fd};
+ }
+ ASSERT_EQ(fcntl(fd, F_GETFD), -1);
+}
diff --git a/compiler/dredd-rule-lib/CMakeLists.txt b/compiler/dredd-rule-lib/CMakeLists.txt
new file mode 100644
index 000000000..b39d86272
--- /dev/null
+++ b/compiler/dredd-rule-lib/CMakeLists.txt
@@ -0,0 +1,21 @@
+#
+# copy rule-lib.sh (a library of shell script functions)
+#
+set(SOURCE_RULE_LIB "${CMAKE_CURRENT_SOURCE_DIR}/rule-lib.sh")
+set(TARGET_RULE_LIB "${CMAKE_CURRENT_BINARY_DIR}/rule-lib.sh")
+
+add_custom_command(
+ OUTPUT ${TARGET_RULE_LIB}
+ COMMAND ${CMAKE_COMMAND} -E copy "${SOURCE_RULE_LIB}" "${TARGET_RULE_LIB}"
+ DEPENDS ${SOURCE_RULE_LIB}
+ COMMENT "Generate rule lib"
+)
+
+# Generate dependencies
+add_custom_target(dredd_rule_lib ALL DEPENDS ${TARGET_RULE_LIB})
+
+# How to get the path of rule-lib.sh in other CMakeLists.txt
+#
+# get_target_property(DREDD_RULE_LIB_DIR
+# dredd_rule_lib BINARY_DIR)
+# set(RULE_LIB_PATH "${DREDD_RULE_LIB_DIR}/rule-lib.sh")
diff --git a/compiler/dredd-rule-lib/README.md b/compiler/dredd-rule-lib/README.md
new file mode 100644
index 000000000..348b0aefb
--- /dev/null
+++ b/compiler/dredd-rule-lib/README.md
@@ -0,0 +1,112 @@
+# dredd-rule-lib
+
+*dredd-rule-lib* is a library that defines functions to run *dredd* tests, which checks non-functional aspect of compiled files.
+
+## Terms
+
+Assume that we want to check the size of generated tflite file to be less than 1024 Bytes.
+In such case, we'd like to use the following terms:
+
+- "metric" : *file size*
+- "rule" : *file size < 1024*
+- "metric function": `file_size` that returns size of a compiled tflite file
+
+Models (input of test) exist in *model repo*, where
+
+- "model repo" : directory where models exist. For *tf2tflite-dredd-pbtxt-test*, model repo is
+ `res/TensorFlowTests`.
+
+## Metrics supported
+
+The following metric functions are provided:
+- `all_op_count` : the count of operations inside a compiled tflite file
+- `file_size` : the size of compiled tflite file
+- In addition, `op_count`, `conv2d_weight_not_constant`, etc.
+- Please , refer to [`rule-lib.sh`](rule-lib.sh) for metric functions
+
+## Related projects - *dredd* tests
+
+Four *dredd* test projects use *dredd-rule-lib*:
+
+- *tf2tflite-dredd-pbtxt-test*
+ - Models in `pbtxt`, text file, are compiled into `tflite` file.
+ - Then `rule` file that each model has is checked against the `tflite` file.
+- *tf2tflite-dredd-pb-test*
+ - Models in `pb`, binary file, are compiled into `tflite` file.
+ - Then `rule` file that each model has is checked against the `tflite` file.
+- *tf2circle-dredd-pbtxt-test*
+ - Models in `pbtxt`, text file, are compiled into `circle` file.
+ - Then `rule` file that each model has is checked against the `circle` file.
+- *tf2circle-dredd-pb-test*
+ - Models in `pb`, binary file, are compiled into `circle` file.
+ - Then `rule` file that each model has is checked against the `circle` file.
+
+## Rule file
+
+To be a target of *dredd*-tests, a `.rule` file **must** exist in a model directory.
+Please refer to `res/TensorFlowTests/NET_0025/tflite_1.0_rel_requirement.rule` for an example.
+
+### Naming convention of rule file
+
+Note that the file name `tflite_1.0_rel_requirement.rule` is our convention containing the
+information below:
+- Generated file type (`tflite`)
+- SDK version (`1.0_rel`)
+- Purpose (`requirement`)
+
+## How do all these work?
+
+For *tf2tflite-dredd-pbtxt-test*, (*tf2circle-dredd-pbtxt-test* works similarly)
+
+```
+model repo tf2tflite-dredd-pbtxt-test
+-----------------------------------------------------------------------------------------------
+ NET_0025
+ ├── test.pbtxt ----------------------> converted to NET_0025.pb, and then NET_0025.tflite
+ | /|\
+ ├── test.info ---------------------------+
+ | (input/output info of model)
+ |
+ └── tflite_1.0_rel_requirement.rule --> running rule file against tflite --> pass or fail
+ /|\
+ dredd-rule-lib | (using)
+ ---------------------- |
+ rule-lib.sh |
+ - defining rule function --+
+```
+
+For *tf2tflite-dredd-pb-test*, (*tf2circle-dredd-pb-test* works similarly)
+
+```
+model repo tf2tflite-dredd-pb-test
+-----------------------------------------------------------------------------------------------
+ Inception_v3
+ ├── model.pb ------------------------> converted to Inception_v3.tflite
+ | /|\
+ ├── model.info --------------------------+
+ | (input/output info of model)
+ |
+ └── tflite_1.0_rel_requirement.rule --> running rule file against tflite --> pass or fail
+ /|\
+ dredd-rule-lib | (using)
+ ---------------------- |
+ rule-lib.sh |
+ - defining rule function --+
+```
+
+## Model repo and How to add a model as a target of a *dredd*-test.
+
+For *tf2tflite-dredd-pbtxt-test* and *tf2circle-dredd-pbtxt-test*,
+model repo is `res/TensorFlowTests`.
+
+To add a model into these tests, the model directory name should be added into one of the following files:
+- `test.lst` : This file resides in git
+- `test.local.lst` : This file is ignored by git. Use this for personal purpose.
+
+For *tf2tflite-dredd-pb-test* and *tf2circle-dredd-pb-test*,
+model repo is `tf2tflite-dredd-pb-test/contrib` and .`tf2circle-dredd-pb-test/contrib` respectively.
+
+Use these tests for binary models in large size.
+
+To add a model into these tests, the model directory name should be added into the following file:
+- `contrib.lst` : This file is ignored by git.
diff --git a/compiler/dredd-rule-lib/rule-lib.sh b/compiler/dredd-rule-lib/rule-lib.sh
new file mode 100755
index 000000000..8ebe3d7af
--- /dev/null
+++ b/compiler/dredd-rule-lib/rule-lib.sh
@@ -0,0 +1,203 @@
+#!/bin/bash
+
+# the following env vars should be defined to call dredd function (except RULE):
+# COMPILED_FILE
+# INSPECT_PROG_PATH
+# VERIFY_PROG_PATH
+# ERROR_LOG
+
+# exit if unknown var is used
+set -u
+
+# ---------------
+# HELPER FUNCTION
+
+init_error_log()
+{
+ # create ${ERROR_LOG} that redirect stderr for pipe
+ exec 2>"${ERROR_LOG}"
+}
+
+argc_check()
+{
+ ACTUAL_ARGC=$1
+ EXPECTED_ARGC=$2
+
+ if [ "$#" -ne 2 ];then
+ echo "argc_check : param count must be 2" > ${ERROR_LOG}
+ echo "error" # return value of sub-shell
+ exit 1
+ fi
+
+ if [ ${ACTUAL_ARGC} -ne ${EXPECTED_ARGC} ];then
+ echo "arg count mismatch: actual = ${ACTUAL_ARGC} vs expected = ${EXPECTED_ARGC}" > ${ERROR_LOG}
+ echo "error" # return value of sub-shell
+ exit 1
+ fi
+}
+
+file_path_check()
+{
+ argc_check $# 1
+
+ if [ ! -f $1 ]; then
+ echo "$1 does not exist" > ${ERROR_LOG}
+ echo "error" # return value of sub-shell
+ exit 1
+ fi
+}
+
+check_success_exit_code()
+{
+ ACTUAL_EXIT_CODE=$1
+ EXPECTED_SUCCESS_CODE=$2
+
+ if [ ${ACTUAL_EXIT_CODE} -ne ${EXPECTED_SUCCESS_CODE} ];then
+ echo "error"
+ exit 1
+ fi
+}
+
+check_error_exit_code()
+{
+ ACTUAL_EXIT_CODE=$1
+ EXPECTED_ERROR_CODE=$2
+
+ if [ ${ACTUAL_EXIT_CODE} -eq ${EXPECTED_ERROR_CODE} ];then
+ echo "error"
+ exit 1
+ fi
+}
+
+# END of HELPER FUNCTION
+# ----------------------
+
+#
+# Define rule
+#
+# - Params: rule name (metric), actual value, condition, expected value
+# - condition is '=', '!=', '<', '>', '<=', '>='. Refer to "man expr"
+# - Return
+# - 0 : success
+# - 1 : fail (condition check fail)
+#
+
+RULE()
+{
+ argc_check $# 4
+
+ RULE_NAME=$1
+ ACTUAL=$2
+ COND=$3
+ EXPECTED=$4
+
+ # not to exit when expr result with 0
+ set +e
+
+ expr ${ACTUAL} ${COND} ${EXPECTED} > /dev/null
+ RESULT=$?
+
+ # roll-back
+ set -e
+
+ # Note: return value of 'expr'
+ # - 0 : result is true
+ # - 1 : result is false
+ # - 2 : error
+
+ if [ ${RESULT} -eq 0 ];then
+ echo -e "** [${RULE_NAME}] \t success \t ([actual: ${ACTUAL}] ${COND} [expected: ${EXPECTED}])"
+ elif [ ${RESULT} -eq 1 ];then
+ echo -e "** [${RULE_NAME}] \t ** fail \t ([actual: ${ACTUAL}] ${COND} [expected: ${EXPECTED}])"
+ else
+ echo -e "\t** Error in [expr ${ACTUAL} ${COND} ${EXPECTED}]"
+ fi
+
+ return ${RESULT}
+}
+
+#
+# Define each function to get quality value
+#
+
+# Note: These function is called by a sub-shell.
+# So return value should be passed through "echo return_value"
+# tip: for debugging, surround the code with "set -x" and "set +x"
+
+file_size()
+{
+ file_path_check ${COMPILED_FILE}
+
+ set -o pipefail
+
+ ACTUAL=`init_error_log ; cat ${COMPILED_FILE} | wc -c`
+
+ check_success_exit_code $? 0
+
+ echo ${ACTUAL}
+}
+
+all_op_count()
+{
+ file_path_check ${COMPILED_FILE}
+ file_path_check ${INSPECT_PROG_PATH}
+
+ set -o pipefail
+
+ ACTUAL=`init_error_log ; ${INSPECT_PROG_PATH} --operators ${COMPILED_FILE} | wc -l`
+
+ check_success_exit_code $? 0
+
+ echo ${ACTUAL}
+}
+
+op_count()
+{
+ argc_check $# 1
+ file_path_check ${COMPILED_FILE}
+ file_path_check ${INSPECT_PROG_PATH}
+
+ set -o pipefail
+
+ RESULT=`init_error_log ; ${INSPECT_PROG_PATH} --operators ${COMPILED_FILE}`
+ check_success_exit_code $? 0
+
+ # note : grep's exit code is 2 in case of error.
+ ACTUAL=`init_error_log ; echo "${RESULT}" | grep -wc "$1"`
+ check_error_exit_code $? 2
+
+ echo ${ACTUAL}
+}
+
+conv2d_weight_not_constant()
+{
+ file_path_check ${COMPILED_FILE}
+ file_path_check ${INSPECT_PROG_PATH}
+
+ set -o pipefail
+
+ ACTUAL=`init_error_log ; \
+ ${INSPECT_PROG_PATH} --conv2d_weight ${COMPILED_FILE} | \
+ awk -F, '{ if ($2 != "CONST") print $0}' | wc -l`
+
+ check_success_exit_code $? 0
+
+ echo ${ACTUAL}
+}
+
+verify_file_format()
+{
+ file_path_check ${COMPILED_FILE}
+ file_path_check ${VERIFY_PROG_PATH}
+
+ set -o pipefail
+
+ ACTUAL=`init_error_log ; ${VERIFY_PROG_PATH} ${COMPILED_FILE} | grep -c "PASS"`
+
+ # note grep can exit with 1 ("PASS" not found) and this is treated as an error
+ check_success_exit_code $? 0
+
+ echo ${ACTUAL}
+}
+
+# TODO define more qullity test function
diff --git a/compiler/enco-intf/CMakeLists.txt b/compiler/enco-intf/CMakeLists.txt
new file mode 100644
index 000000000..6014512c8
--- /dev/null
+++ b/compiler/enco-intf/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(frontend)
+add_subdirectory(cmdline)
diff --git a/compiler/enco-intf/cmdline/CMakeLists.txt b/compiler/enco-intf/cmdline/CMakeLists.txt
new file mode 100644
index 000000000..91221ca1a
--- /dev/null
+++ b/compiler/enco-intf/cmdline/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_library(enco_intf_cmdline INTERFACE)
+target_include_directories(enco_intf_cmdline INTERFACE include)
diff --git a/compiler/enco-intf/cmdline/include/cmdline/View.h b/compiler/enco-intf/cmdline/include/cmdline/View.h
new file mode 100644
index 000000000..dd8d1d7eb
--- /dev/null
+++ b/compiler/enco-intf/cmdline/include/cmdline/View.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CMDLINE_VIEW_H__
+#define __CMDLINE_VIEW_H__
+
+#include <cstdint>
+
+namespace cmdline
+{
+
+struct View
+{
+ virtual ~View() = default;
+
+ virtual uint32_t size(void) const = 0;
+ virtual const char *at(uint32_t n) const = 0;
+};
+
+} // namespace cmdline
+
+#endif // __CMDLINE_VIEW_H__
diff --git a/compiler/enco-intf/frontend/CMakeLists.txt b/compiler/enco-intf/frontend/CMakeLists.txt
new file mode 100644
index 000000000..164dbd2b5
--- /dev/null
+++ b/compiler/enco-intf/frontend/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_library(enco_intf_frontend INTERFACE)
+target_include_directories(enco_intf_frontend INTERFACE include)
+target_link_libraries(enco_intf_frontend INTERFACE coco_core)
+target_link_libraries(enco_intf_frontend INTERFACE coco_generic)
diff --git a/compiler/enco-intf/frontend/include/enco/Bundle.h b/compiler/enco-intf/frontend/include/enco/Bundle.h
new file mode 100644
index 000000000..7c3dca88f
--- /dev/null
+++ b/compiler/enco-intf/frontend/include/enco/Bundle.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_BUNDLE_H__
+#define __ENCO_BUNDLE_H__
+
+#include "coco/IR/Module.h"
+#include "coco/IR/Data.h"
+
+#include <memory>
+
+namespace enco
+{
+
+class Bundle
+{
+public:
+ Bundle() = default;
+
+public:
+ coco::Module *module(void) const { return _m.get(); }
+ void module(std::unique_ptr<coco::Module> &&m) { _m = std::move(m); }
+
+public:
+ coco::Data *data(void) const { return _d.get(); }
+ void data(std::unique_ptr<coco::Data> &&d) { _d = std::move(d); }
+
+private:
+ std::unique_ptr<coco::Module> _m;
+ std::unique_ptr<coco::Data> _d;
+};
+
+} // namespace enco
+
+#endif // __ENCO_BUNDLE_H__
diff --git a/compiler/enco-intf/frontend/include/enco/Frontend.h b/compiler/enco-intf/frontend/include/enco/Frontend.h
new file mode 100644
index 000000000..d3a48183a
--- /dev/null
+++ b/compiler/enco-intf/frontend/include/enco/Frontend.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_FRONTEND_H__
+#define __ENCO_FRONTEND_H__
+
+#include "Bundle.h"
+
+namespace enco
+{
+
+struct Frontend
+{
+ virtual ~Frontend() = default;
+
+ virtual Bundle load(void) const = 0;
+};
+
+} // namespace enco
+
+#endif // __FRONTEND_H__
diff --git a/compiler/enco/CMakeLists.txt b/compiler/enco/CMakeLists.txt
new file mode 100644
index 000000000..17300e25e
--- /dev/null
+++ b/compiler/enco/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_subdirectory(core)
+add_subdirectory(frontend)
+add_subdirectory(cli)
+add_subdirectory(test)
diff --git a/compiler/enco/README.md b/compiler/enco/README.md
new file mode 100644
index 000000000..d995a1e55
--- /dev/null
+++ b/compiler/enco/README.md
@@ -0,0 +1,25 @@
+# enco
+
+_enco_ is a tool which translates a NN model into a C++ source code that implements the following functions:
+```
+struct Network;
+
+Network *Network_construct();
+void Network_destruct(Network *net);
+
+unsigned Network_input_count(const Network *);
+const char *Network_input_name(const Network *, unsigned n);
+unsigned Network_input_rank(const Network *, unsigned n);
+unsigned Network_input_dim(const Network *, unsigned n, unsigned axis);
+void Network_input_bind(Network *net, unsigned n, const void *ptr, unsigned len);
+
+unsigned Network_output_count(const Network *net);
+const char *Network_output_name(const Network *, unsigned n);
+unsigned Network_output_rank(const Network *, unsigned n);
+unsigned Network_output_dim(const Network *, unsigned n, unsigned axis);
+void Network_output_bind(Network *net, unsigned n, void *ptr, unsigned len);
+
+void Network_invoke(Network *net);
+```
+
+Generated C++ code internally uses Android NN API for acceleration.
diff --git a/compiler/enco/cli/CMakeLists.txt b/compiler/enco/cli/CMakeLists.txt
new file mode 100644
index 000000000..5a43ab655
--- /dev/null
+++ b/compiler/enco/cli/CMakeLists.txt
@@ -0,0 +1,11 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(enco-cli ${SOURCES})
+target_include_directories(enco-cli PRIVATE src)
+target_link_libraries(enco-cli enco_intf_cmdline)
+target_link_libraries(enco-cli enco_intf_frontend)
+target_link_libraries(enco-cli enco_core)
+target_link_libraries(enco-cli stdex)
+target_link_libraries(enco-cli dl)
+# Let's use project-wide compile options
+target_link_libraries(enco-cli nncc_common)
diff --git a/compiler/enco/cli/src/Driver.cpp b/compiler/enco/cli/src/Driver.cpp
new file mode 100644
index 000000000..185bb13b9
--- /dev/null
+++ b/compiler/enco/cli/src/Driver.cpp
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <enco/Frontend.h>
+#include <enco/Backend.h>
+
+#include <cmdline/View.h>
+
+#include <string>
+#include <vector>
+
+#include <functional>
+
+namespace cmdline
+{
+
+// TODO Extract this helper class
+class Vector : public cmdline::View
+{
+public:
+ uint32_t size(void) const { return _args.size(); }
+
+public:
+ const char *at(uint32_t nth) const { return _args.at(nth).c_str(); }
+
+public:
+ Vector &append(const std::string &arg)
+ {
+ _args.emplace_back(arg);
+ return (*this);
+ }
+
+private:
+ std::vector<std::string> _args;
+};
+
+} // namespace cmdline
+
+namespace
+{
+
+class Zone
+{
+public:
+ Zone() = default;
+
+public:
+ const cmdline::View *args(void) const { return &_args; }
+
+public:
+ void append(const std::string &arg) { _args.append(arg); }
+
+private:
+ cmdline::Vector _args;
+};
+
+} // namespace
+
+#include <dlfcn.h>
+
+namespace
+{
+
+class FrontendFactory
+{
+public:
+ FrontendFactory(const std::string &path)
+ {
+ _handle = dlopen(path.c_str(), RTLD_LAZY);
+ assert(_handle != nullptr);
+ }
+
+public:
+ // Copy is not allowed to avoid double close
+ FrontendFactory(const FrontendFactory &) = delete;
+ FrontendFactory(FrontendFactory &&) = delete;
+
+public:
+ ~FrontendFactory() { dlclose(_handle); }
+
+private:
+ using Entry = std::unique_ptr<enco::Frontend> (*)(const cmdline::View &);
+
+private:
+ Entry entry(void) const
+ {
+ auto entry = reinterpret_cast<Entry>(dlsym(_handle, "make_frontend"));
+ assert(entry != nullptr);
+ return entry;
+ }
+
+public:
+ std::unique_ptr<enco::Frontend> make(const cmdline::View *args) const
+ {
+ auto fn = entry();
+ return fn(*args);
+ }
+
+private:
+ void *_handle;
+};
+
+} // namespace
+
+namespace
+{
+
+class FrontendZone : public Zone
+{
+public:
+ FrontendZone(const std::string &path) : _factory{path}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const FrontendFactory *factory(void) const { return &_factory; }
+
+private:
+ FrontendFactory _factory;
+};
+
+} // namespace
+
+#include <stdex/Memory.h>
+
+#include <map>
+
+#include <iostream>
+#include <stdexcept>
+
+static int entry(int argc, char **argv)
+{
+ // Usage:
+ // [Command] --frontend [Frontend .so path] --frontend-arg ...
+ std::unique_ptr<FrontendZone> frontend_zone;
+ cmdline::Vector backend_args;
+
+ // Simple argument parser (based on map)
+ std::map<std::string, std::function<void(const std::string &arg)>> argparse;
+
+ argparse["--frontend"] = [&](const std::string &path) {
+ frontend_zone = stdex::make_unique<FrontendZone>(path);
+ };
+
+ argparse["--frontend-arg"] = [&](const std::string &arg) { frontend_zone->append(arg); };
+ argparse["--backend-arg"] = [&](const std::string &arg) { backend_args.append(arg); };
+
+ if (argc < 2)
+ {
+ std::cerr << "Usage:" << std::endl;
+ std::cerr << "[Command] --frontend [.so path]" << std::endl;
+ std::cerr << " --frontend-arg [argument] ..." << std::endl;
+ std::cerr << " --backend-arg [argument] ..." << std::endl;
+ return 255;
+ }
+
+ for (int n = 1; n < argc; n += 2)
+ {
+ const std::string tag{argv[n]};
+ const std::string arg{argv[n + 1]};
+
+ auto it = argparse.find(tag);
+
+ if (it == argparse.end())
+ {
+ std::cerr << "Option '" << tag << "' is not supported" << std::endl;
+ return 255;
+ }
+
+ it->second(arg);
+ }
+
+ assert(frontend_zone != nullptr);
+
+ auto frontend = frontend_zone->factory()->make(frontend_zone->args());
+
+ auto bundle = frontend->load();
+
+ auto backend = make_backend(backend_args);
+
+ backend->compile(bundle.module(), bundle.data());
+
+ return 0;
+}
+
+#ifdef NDEBUG
+int main(int argc, char **argv)
+{
+ try
+ {
+ return entry(argc, argv);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "ERROR: " << e.what() << std::endl;
+ }
+
+ return 255;
+}
+#else // NDEBUG
+int main(int argc, char **argv)
+{
+ // NOTE main does not catch internal exceptions for debug build to make it easy to
+ // check the stacktrace with a debugger
+ return entry(argc, argv);
+}
+#endif // !NDEBUG
diff --git a/compiler/enco/core/CMakeLists.txt b/compiler/enco/core/CMakeLists.txt
new file mode 100644
index 000000000..f437e687a
--- /dev/null
+++ b/compiler/enco/core/CMakeLists.txt
@@ -0,0 +1,35 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+###
+### enco_core is built as a shared library to support "interactive debugging".
+###
+### interactive debugging helpers are stripped during linking when enco_core is
+### built as a static library
+###
+add_library(enco_core SHARED ${SOURCES})
+target_include_directories(enco_core PRIVATE src)
+target_include_directories(enco_core PUBLIC include)
+target_link_libraries(enco_core PUBLIC enco_intf_cmdline)
+target_link_libraries(enco_core PUBLIC coco_core)
+target_link_libraries(enco_core PUBLIC coco_generic)
+# These libraries are linked for internal use, and thus does not appear in public headers.
+target_link_libraries(enco_core PRIVATE pp)
+target_link_libraries(enco_core PRIVATE morph)
+target_link_libraries(enco_core PRIVATE stdex)
+# Let's use nncc project-wide build options
+target_link_libraries(enco_core PRIVATE nncc_common)
+
+nnas_find_package(GTest QUIET)
+
+if(NOT GTest_FOUND)
+ return()
+endif(NOT GTest_FOUND)
+
+add_executable(enco_core_test ${TESTS})
+target_include_directories(enco_core_test PRIVATE src)
+target_link_libraries(enco_core_test gtest_main)
+target_link_libraries(enco_core_test enco_core)
+target_link_libraries(enco_core_test morph)
+add_test(enco_core_test enco_core_test)
diff --git a/compiler/enco/core/include/enco/Backend.h b/compiler/enco/core/include/enco/Backend.h
new file mode 100644
index 000000000..5da903ed2
--- /dev/null
+++ b/compiler/enco/core/include/enco/Backend.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_BACKEND_H__
+#define __ENCO_BACKEND_H__
+
+#include "cmdline/View.h"
+
+#include "coco/IR/Module.h"
+#include "coco/IR/Data.h"
+
+#include <memory>
+
+namespace enco
+{
+
+struct Backend
+{
+ virtual ~Backend() = default;
+
+ virtual void compile(coco::Module *m, coco::Data *d) = 0;
+};
+
+} // namespace enco
+
+std::unique_ptr<enco::Backend> make_backend(const cmdline::View &);
+
+#endif // __ENCO_BACKEND_H__
diff --git a/compiler/enco/core/src/ANN/Binder.h b/compiler/enco/core/src/ANN/Binder.h
new file mode 100644
index 000000000..71b95676b
--- /dev/null
+++ b/compiler/enco/core/src/ANN/Binder.h
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ANN_BINDER_H__
+#define __ANN_BINDER_H__
+
+#include "ANN/IR/Module.h"
+
+#include <coco/IR.h>
+
+#include <morph/nnapi.h>
+
+#include <type_traits>
+
+/**
+ * @brief A bridge between ann::Module and coco::Block
+ */
+class ANNBinder
+{
+public:
+ ANNBinder(coco::Block *block, std::unique_ptr<ann::Module> &&module)
+ : _block{block}, _module{std::move(module)}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const coco::Block *block(void) const { return _block; }
+ coco::Block *block(void) { return _block; }
+
+public:
+ const ann::Module *module(void) const { return _module.get(); }
+
+public:
+ /**
+ * @brief Return the set of bags that the current ANN subnet accesses
+ */
+ std::set<coco::Bag *> bags(void) const
+ {
+ std::set<coco::Bag *> res;
+
+ for (auto it = _operands.begin(); it != _operands.end(); ++it)
+ {
+ res.insert(it->first);
+ }
+
+ return res;
+ }
+
+public:
+ template <typename T> ann::OperandID addOperand(void)
+ {
+ return _module->operand()->create(ann::dtype<T>());
+ };
+
+ template <typename T> ann::OperandID addOperand(const nncc::core::ADT::tensor::Shape &shape)
+ {
+ return _module->operand()->create(ann::dtype<T>(), shape);
+ }
+
+public:
+ template <typename T> ann::OperandID addOperand(const coco::FeatureObject *obj)
+ {
+ auto bag = obj->bag();
+ assert(bag != nullptr);
+
+ auto it = _operands.find(bag);
+
+ if (it != _operands.end())
+ {
+ return it->second;
+ }
+
+ auto operand = addOperand<T>(morph::nnapi::as_tensor_shape(obj->shape()));
+ _operands[obj->bag()] = operand;
+ return operand;
+ };
+
+ template <typename T> ann::OperandID addOperand(const coco::KernelObject *obj)
+ {
+ auto bag = obj->bag();
+ assert(bag != nullptr);
+
+ auto it = _operands.find(bag);
+
+ if (it != _operands.end())
+ {
+ return it->second;
+ }
+
+ auto operand = addOperand<T>(morph::nnapi::as_tensor_shape(obj->shape()));
+ _operands[obj->bag()] = operand;
+ return operand;
+ };
+
+public:
+ /// @brief Set scalar weight
+ template <typename T> void setOperand(const ann::OperandID &id, const T &value)
+ {
+ static_assert(std::is_arithmetic<T>::value, "T should be arithmetic");
+ auto weight = _module->weight()->create();
+ weight->fill(value);
+ _module->operand()->at(id)->weight(weight);
+ }
+
+ /// @brief Set non-scalar weight
+ template <typename It> void setOperand(const ann::OperandID &id, It beg, It end)
+ {
+ auto weight = _module->weight()->create();
+ weight->fill(beg, end);
+ _module->operand()->at(id)->weight(weight);
+ }
+
+public:
+ void addOperation(ann::Operation::Code code, std::initializer_list<ann::OperandID> inputs,
+ std::initializer_list<ann::OperandID> outputs)
+ {
+ _module->operation()->create(code, inputs, outputs);
+ }
+
+public:
+ /**
+ * @brief Identify a sequence of coco::Bag * as subnet's inputs
+ *
+ * NOTE 1. This method takes input iterator over coco::Bag * values
+ * NOTE 2. All the identifyInputs class except the last one will be ignored if there are
+ * multiple identifyInputs calls
+ */
+ template <typename It> void identifyInputs(It beg, It end)
+ {
+ _inputs.clear();
+ _module->input()->clear();
+
+ for (auto it = beg; it != end; ++it)
+ {
+ auto const bag = *it;
+ _inputs.emplace_back(*it);
+ _module->input()->emplace_back(_operands.at(bag));
+ }
+ }
+
+ template <typename T> void identifyInputs(T &&values)
+ {
+ identifyInputs(std::begin(values), std::end(values));
+ }
+
+public:
+ /**
+ * @brief Identify a sequence of coco::Bag * as subnet's outputs
+ *
+ * NOTE 1. This method takes input iterator over coco::Bag * values
+ * NOTE 2. All the identifyOutputs class except the last one will be ignored if there are
+ * multiple identifyOutputs calls
+ */
+ template <typename It> void identifyOutputs(It beg, It end)
+ {
+ _outputs.clear();
+ _module->output()->clear();
+
+ for (auto it = beg; it != end; ++it)
+ {
+ auto const bag = *it;
+ _outputs.emplace_back(bag);
+ _module->output()->emplace_back(_operands.at(bag));
+ }
+ }
+
+ template <typename T> void identifyOutputs(T &&values)
+ {
+ identifyOutputs(std::begin(values), std::end(values));
+ }
+
+public:
+ coco::Bag *input(uint32_t n) const { return _inputs.at(n); }
+ coco::Bag *output(uint32_t n) const { return _outputs.at(n); }
+
+public:
+ /**
+ * @brief Return true if a given bag has an associated operand in ANN IR
+ */
+ bool associated(coco::Bag *b) const { return _operands.find(b) != _operands.end(); }
+
+ /**
+ * @brief Return operand ID associated with a given bag
+ * @note The behavior of operand(b) is defined only when associated(b) holds.
+ */
+ ann::OperandID operand(coco::Bag *b) const
+ {
+ assert(associated(b));
+ return _operands.at(b);
+ }
+
+private:
+ coco::Block *const _block;
+ std::unique_ptr<ann::Module> _module;
+
+private:
+ std::vector<coco::Bag *> _inputs;
+ std::vector<coco::Bag *> _outputs;
+
+private:
+ /// @brief Operand ID assigned for each coco::Bag
+ std::map<coco::Bag *, ann::OperandID> _operands;
+};
+
+#endif // __ANN_BINDER_H__
diff --git a/compiler/enco/core/src/ANN/Context.cpp b/compiler/enco/core/src/ANN/Context.cpp
new file mode 100644
index 000000000..d4d1882fa
--- /dev/null
+++ b/compiler/enco/core/src/ANN/Context.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ANN/Context.h"
+
+#include <stdex/Memory.h>
+
+ANNBinder *ANNContext::create(coco::Block *blk)
+{
+ auto mod = stdex::make_unique<ann::Module>();
+ auto obj = stdex::make_unique<ANNBinder>(blk, std::move(mod));
+ auto ptr = obj.get();
+
+ _binders.emplace_back(std::move(obj));
+ _map[blk] = ptr;
+
+ return ptr;
+}
diff --git a/compiler/enco/core/src/ANN/Context.h b/compiler/enco/core/src/ANN/Context.h
new file mode 100644
index 000000000..915651eb5
--- /dev/null
+++ b/compiler/enco/core/src/ANN/Context.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ANN_CONTEXT_H__
+#define __ANN_CONTEXT_H__
+
+#include "ANN/Binder.h"
+
+#include <map>
+#include <vector>
+
+#include <memory>
+
+struct ANNContext
+{
+public:
+ ANNBinder *create(coco::Block *blk);
+
+public:
+ uint32_t count(void) const { return _binders.size(); }
+
+public:
+ ANNBinder *nth(uint32_t n) { return _binders.at(n).get(); }
+ const ANNBinder *nth(uint32_t n) const { return _binders.at(n).get(); }
+
+public:
+ ANNBinder *find(const coco::Block *blk) const
+ {
+ auto it = _map.find(blk);
+
+ if (it == _map.end())
+ {
+ return nullptr;
+ }
+
+ return it->second;
+ }
+
+private:
+ std::vector<std::unique_ptr<ANNBinder>> _binders;
+ std::map<const coco::Block *, ANNBinder *> _map;
+};
+
+#endif // __ANN_CONTEXT_H__
diff --git a/compiler/enco/core/src/ANN/Context.test.cpp b/compiler/enco/core/src/ANN/Context.test.cpp
new file mode 100644
index 000000000..7fd26f30c
--- /dev/null
+++ b/compiler/enco/core/src/ANN/Context.test.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Context.h"
+
+#include <set>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+class ANNContextTest : public ::testing::Test
+{
+public:
+ ANNContextTest() { m = coco::Module::create(); }
+
+public:
+ virtual ~ANNContextTest() = default;
+
+protected:
+ std::unique_ptr<coco::Module> m;
+};
+}
+
+TEST_F(ANNContextTest, constructor)
+{
+ ANNContext ann_ctx;
+
+ ASSERT_EQ(ann_ctx.count(), 0);
+}
+
+TEST_F(ANNContextTest, create)
+{
+ ANNContext ann_ctx;
+
+ auto blk = m->entity()->block()->create();
+ auto binder = ann_ctx.create(blk);
+
+ ASSERT_NE(binder, nullptr);
+}
+
+TEST_F(ANNContextTest, find)
+{
+ ANNContext ann_ctx;
+
+ // CASE: Corresponding binder does not exist
+ {
+ auto blk = m->entity()->block()->create();
+ ASSERT_EQ(ann_ctx.find(blk), nullptr);
+ }
+
+ // CASE: Corresponding binder does exist
+ {
+ auto blk = m->entity()->block()->create();
+ auto binder_created = ann_ctx.create(blk);
+ auto binder_found = ann_ctx.find(blk);
+
+ ASSERT_EQ(binder_created, binder_found);
+ }
+}
diff --git a/compiler/enco/core/src/ANN/IR/DType.cpp b/compiler/enco/core/src/ANN/IR/DType.cpp
new file mode 100644
index 000000000..7d4585a49
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/DType.cpp
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DType.h"
+
+namespace ann
+{
+
+template <> DType dtype<int32_t>(void) { return DType::S32; }
+template <> DType dtype<float>(void) { return DType::F32; }
+
+} // namespace ann
diff --git a/compiler/enco/core/src/ANN/IR/DType.h b/compiler/enco/core/src/ANN/IR/DType.h
new file mode 100644
index 000000000..b7583b09a
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/DType.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ANN_IR_DTYPE_H__
+#define __ANN_IR_DTYPE_H__
+
+#include <cstdint>
+
+namespace ann
+{
+
+enum class DType
+{
+ UNK,
+ S32,
+ F32
+};
+
+template <typename T> DType dtype(void);
+
+} // namespace ann
+
+#endif // __ANN_IR_DTYPE_H__
diff --git a/compiler/enco/core/src/ANN/IR/DType.test.cpp b/compiler/enco/core/src/ANN/IR/DType.test.cpp
new file mode 100644
index 000000000..8184ece9b
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/DType.test.cpp
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DType.h"
+
+#include <gtest/gtest.h>
+
+TEST(ANN_IR_DTYPE, dtype)
+{
+ ASSERT_EQ(ann::dtype<int>(), ann::DType::S32);
+ ASSERT_EQ(ann::dtype<float>(), ann::DType::F32);
+}
diff --git a/compiler/enco/core/src/ANN/IR/InputList.h b/compiler/enco/core/src/ANN/IR/InputList.h
new file mode 100644
index 000000000..51f0fd95a
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/InputList.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ANN_IR_INPUT_LIST_H__
+#define __ANN_IR_INPUT_LIST_H__
+
+#include "ANN/IR/OperandID.h"
+
+#include <vector>
+
+namespace ann
+{
+
+using InputList = std::vector<OperandID>;
+
+} // namespace ann
+
+#endif // __ANN_IR_INPUT_LIST_H__
diff --git a/compiler/enco/core/src/ANN/IR/Module.h b/compiler/enco/core/src/ANN/IR/Module.h
new file mode 100644
index 000000000..b443b4235
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/Module.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ANN_IR_MODULE_H__
+#define __ANN_IR_MODULE_H__
+
+#include "ANN/IR/WeightInventory.h"
+#include "ANN/IR/OperandInventory.h"
+#include "ANN/IR/OperationInventory.h"
+#include "ANN/IR/InputList.h"
+#include "ANN/IR/OutputList.h"
+
+namespace ann
+{
+
+class Module
+{
+public:
+ Module() = default;
+
+public:
+ WeightInventory *weight(void) { return &_weight; }
+ const WeightInventory *weight(void) const { return &_weight; }
+
+ OperandInventory *operand(void) { return &_operand; }
+ const OperandInventory *operand(void) const { return &_operand; }
+
+ OperationInventory *operation(void) { return &_operation; }
+ const OperationInventory *operation(void) const { return &_operation; }
+
+ InputList *input(void) { return &_input; }
+ const InputList *input(void) const { return &_input; }
+
+ OutputList *output(void) { return &_output; }
+ const OutputList *output(void) const { return &_output; }
+
+private:
+ WeightInventory _weight;
+ OperandInventory _operand;
+ OperationInventory _operation;
+ InputList _input;
+ OutputList _output;
+};
+
+} // namespace ann
+
+#endif // __ANN_IR_MODULE_H__
diff --git a/compiler/enco/core/src/ANN/IR/Module.test.cpp b/compiler/enco/core/src/ANN/IR/Module.test.cpp
new file mode 100644
index 000000000..4b946c875
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/Module.test.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Module.h"
+
+#include <gtest/gtest.h>
+
+TEST(ANN_IR_MODULE, constructor)
+{
+ ann::Module m;
+
+ ann::Module *mutable_ptr = &m;
+ const ann::Module *immutable_ptr = &m;
+
+ ASSERT_NE(mutable_ptr->weight(), nullptr);
+ ASSERT_EQ(mutable_ptr->weight(), immutable_ptr->weight());
+
+ ASSERT_NE(mutable_ptr->operand(), nullptr);
+ ASSERT_EQ(mutable_ptr->operand(), immutable_ptr->operand());
+
+ ASSERT_NE(mutable_ptr->operation(), nullptr);
+ ASSERT_EQ(mutable_ptr->operation(), immutable_ptr->operation());
+}
diff --git a/compiler/enco/core/src/ANN/IR/Operand.h b/compiler/enco/core/src/ANN/IR/Operand.h
new file mode 100644
index 000000000..3b15ed739
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/Operand.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ANN_IR_OPERAND_H__
+#define __ANN_IR_OPERAND_H__
+
+#include "ANN/IR/DType.h"
+#include "ANN/IR/Weight.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+
+namespace ann
+{
+
+class Operand
+{
+public:
+ virtual ~Operand() = default;
+
+public:
+ DType dtype(void) const { return _dtype; }
+ void dtype(const DType &dtype) { _dtype = dtype; }
+
+ const Weight *weight(void) const { return _weight; }
+ void weight(const Weight *weight) { _weight = weight; }
+
+private:
+ DType _dtype = DType::UNK;
+ const Weight *_weight = nullptr;
+};
+
+} // namespace ann
+
+namespace ann
+{
+
+/**
+ * @brief Plain (non-qunatized) Scalar Operand
+ */
+struct ScalarOperand final : public Operand
+{
+};
+
+} // namespace ann
+
+namespace ann
+{
+
+/**
+ * @brief Plain (non-qunatized) Tensor Operand
+ */
+struct TensorOperand final : public Operand
+{
+public:
+ TensorOperand(const nncc::core::ADT::tensor::Shape &shape) : _shape{shape}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const nncc::core::ADT::tensor::Shape &shape(void) const { return _shape; }
+
+private:
+ nncc::core::ADT::tensor::Shape _shape;
+};
+
+} // namespace ann
+
+#endif // __ANN_IR_OPERAND_H__
diff --git a/compiler/enco/core/src/ANN/IR/Operand.test.cpp b/compiler/enco/core/src/ANN/IR/Operand.test.cpp
new file mode 100644
index 000000000..98ac4ebd0
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/Operand.test.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Operand.h"
+
+#include <gtest/gtest.h>
+
+TEST(ANN_IR_SCALAR_OPERAND, constructor)
+{
+ const ann::ScalarOperand operand;
+
+ ASSERT_EQ(operand.dtype(), ann::DType::UNK);
+ ASSERT_EQ(operand.weight(), nullptr);
+}
+
+TEST(ANN_IR_TENSOR_OPERAND, constructor)
+{
+ const nncc::core::ADT::tensor::Shape shape{1, 2};
+ const ann::TensorOperand operand{shape};
+
+ ASSERT_EQ(operand.dtype(), ann::DType::UNK);
+ ASSERT_EQ(operand.weight(), nullptr);
+ ASSERT_EQ(operand.shape(), shape);
+}
diff --git a/compiler/enco/core/src/ANN/IR/OperandID.h b/compiler/enco/core/src/ANN/IR/OperandID.h
new file mode 100644
index 000000000..f1617aacb
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/OperandID.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ANN_IR_OPERAND_ID_H__
+#define __ANN_IR_OPERAND_ID_H__
+
+#include <cstdint>
+
+namespace ann
+{
+
+class OperandID
+{
+public:
+ OperandID() : _value{0}
+ {
+ // DO NOTHING
+ }
+
+public:
+ explicit OperandID(uint32_t value) : _value{value}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t value(void) const { return _value; }
+
+private:
+ uint32_t _value;
+};
+
+} // namespace ann
+
+#endif // __ANN_IR_OPERAND_ID_H__
diff --git a/compiler/enco/core/src/ANN/IR/OperandID.test.cpp b/compiler/enco/core/src/ANN/IR/OperandID.test.cpp
new file mode 100644
index 000000000..04c23b9c8
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/OperandID.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperandID.h"
+
+#include <gtest/gtest.h>
+
+TEST(ANN_IR_OPERAND_ID, default_constructor)
+{
+ ann::OperandID id;
+
+ ASSERT_EQ(id.value(), 0);
+}
+
+TEST(ANN_IR_OPERAND_ID, explicit_constructor)
+{
+ ann::OperandID id{4};
+
+ ASSERT_EQ(id.value(), 4);
+}
diff --git a/compiler/enco/core/src/ANN/IR/OperandInventory.cpp b/compiler/enco/core/src/ANN/IR/OperandInventory.cpp
new file mode 100644
index 000000000..c7ad38811
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/OperandInventory.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ANN/IR/OperandInventory.h"
+
+#include <stdex/Memory.h>
+
+using stdex::make_unique;
+
+namespace ann
+{
+
+OperandID OperandInventory::create(const DType &dtype)
+{
+ uint32_t id = _operands.size();
+
+ auto operand = make_unique<ScalarOperand>();
+ operand->dtype(dtype);
+
+ _operands.emplace_back(std::move(operand));
+
+ return OperandID{id};
+}
+
+OperandID OperandInventory::create(const DType &dtype, const nncc::core::ADT::tensor::Shape &shape)
+{
+ uint32_t id = _operands.size();
+
+ auto operand = make_unique<TensorOperand>(shape);
+ operand->dtype(dtype);
+
+ _operands.emplace_back(std::move(operand));
+
+ return OperandID{id};
+}
+
+Operand *OperandInventory::at(const OperandID &id) { return _operands.at(id.value()).get(); }
+
+const Operand *OperandInventory::at(const OperandID &id) const
+{
+ return _operands.at(id.value()).get();
+}
+
+} // namespace ann
diff --git a/compiler/enco/core/src/ANN/IR/OperandInventory.h b/compiler/enco/core/src/ANN/IR/OperandInventory.h
new file mode 100644
index 000000000..23eb08119
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/OperandInventory.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ANN_IR_OPERAND_INVENTORY_H__
+#define __ANN_IR_OPERAND_INVENTORY_H__
+
+#include "ANN/IR/OperandID.h"
+#include "ANN/IR/Operand.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+
+#include <memory>
+#include <vector>
+
+namespace ann
+{
+
+class OperandInventory
+{
+public:
+ OperandID create(const DType &);
+ OperandID create(const DType &, const nncc::core::ADT::tensor::Shape &);
+
+public:
+ template <typename Callable> void each(Callable &&cb) const
+ {
+ for (uint32_t n = 0; n < _operands.size(); ++n)
+ {
+ cb(OperandID{n}, _operands.at(n).get());
+ }
+ }
+
+public:
+ Operand *at(const OperandID &id);
+ const Operand *at(const OperandID &id) const;
+
+private:
+ std::vector<std::unique_ptr<Operand>> _operands;
+};
+
+} // namespace ann
+
+#endif // __ANN_IR_OPERAND_INVENTORY_H__
diff --git a/compiler/enco/core/src/ANN/IR/OperandInventory.test.cpp b/compiler/enco/core/src/ANN/IR/OperandInventory.test.cpp
new file mode 100644
index 000000000..e576752bc
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/OperandInventory.test.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperandInventory.h"
+
+#include <gtest/gtest.h>
+
+TEST(ANN_IR_OPERAND_INVENTORY, constructor)
+{
+ ann::OperandInventory inven;
+
+ uint32_t count = 0;
+
+ inven.each([&](const ann::OperandID &, const ann::Operand *) { ++count; });
+
+ ASSERT_EQ(count, 0);
+}
diff --git a/compiler/enco/core/src/ANN/IR/Operation.def b/compiler/enco/core/src/ANN/IR/Operation.def
new file mode 100644
index 000000000..68fd394cf
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/Operation.def
@@ -0,0 +1,17 @@
+#ifndef ANN_OPERATION
+#error Define ANN_OPERATION first
+#endif // ANN_OPERATION
+
+// ANN_OPERATION(TAG, ENUM_VALUE)
+ANN_OPERATION(ADD, ANEURALNETWORKS_ADD)
+ANN_OPERATION(MUL, ANEURALNETWORKS_MUL)
+ANN_OPERATION(CONV_2D, ANEURALNETWORKS_CONV_2D)
+ANN_OPERATION(DEPTHWISE_CONV_2D, ANEURALNETWORKS_DEPTHWISE_CONV_2D)
+ANN_OPERATION(MAX_POOL_2D, ANEURALNETWORKS_MAX_POOL_2D)
+ANN_OPERATION(AVG_POOL_2D, ANEURALNETWORKS_AVERAGE_POOL_2D)
+ANN_OPERATION(RELU, ANEURALNETWORKS_RELU)
+ANN_OPERATION(RELU6, ANEURALNETWORKS_RELU6)
+ANN_OPERATION(PAD, ANEURALNETWORKS_PAD)
+ANN_OPERATION(CONCAT, ANEURALNETWORKS_CONCATENATION)
+ANN_OPERATION(SUB, ANEURALNETWORKS_SUB)
+ANN_OPERATION(DIV, ANEURALNETWORKS_DIV)
diff --git a/compiler/enco/core/src/ANN/IR/Operation.h b/compiler/enco/core/src/ANN/IR/Operation.h
new file mode 100644
index 000000000..cacc2b794
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/Operation.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ANN_IR_OPERATION_H__
+#define __ANN_IR_OPERATION_H__
+
+#include "ANN/IR/OperandID.h"
+
+#include <initializer_list>
+#include <vector>
+
+namespace ann
+{
+
+class Operation
+{
+public:
+ enum class Code
+ {
+#define ANN_OPERATION(TAG, VALUE) TAG,
+#include "Operation.def"
+#undef ANN_OPERATION
+ };
+
+public:
+ Operation(const Code &code, std::initializer_list<OperandID> inputs,
+ std::initializer_list<OperandID> outputs)
+ : _code{code}, _inputs{inputs}, _outputs{outputs}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const Code &code(void) const { return _code; }
+ const std::vector<OperandID> &inputs(void) const { return _inputs; }
+ const std::vector<OperandID> &outputs(void) const { return _outputs; }
+
+private:
+ Code _code;
+ std::vector<OperandID> _inputs;
+ std::vector<OperandID> _outputs;
+};
+
+} // namespace ann
+
+#endif // __ANN_IR_OPERATION_H__
diff --git a/compiler/enco/core/src/ANN/IR/Operation.test.cpp b/compiler/enco/core/src/ANN/IR/Operation.test.cpp
new file mode 100644
index 000000000..d1b716733
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/Operation.test.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Operation.h"
+
+#include <gtest/gtest.h>
+
+TEST(ANN_IR_OPERATION, constructor)
+{
+ ann::Operation op{ann::Operation::Code::CONV_2D, {}, {}};
+
+ ASSERT_EQ(op.code(), ann::Operation::Code::CONV_2D);
+ ASSERT_EQ(op.inputs().size(), 0);
+ ASSERT_EQ(op.outputs().size(), 0);
+}
diff --git a/compiler/enco/core/src/ANN/IR/OperationInventory.cpp b/compiler/enco/core/src/ANN/IR/OperationInventory.cpp
new file mode 100644
index 000000000..37d48c170
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/OperationInventory.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationInventory.h"
+
+#include <stdex/Memory.h>
+
+using stdex::make_unique;
+
+namespace ann
+{
+
+void OperationInventory::create(Operation::Code code, std::initializer_list<OperandID> inputs,
+ std::initializer_list<OperandID> outputs)
+{
+ _operations.emplace_back(make_unique<Operation>(code, inputs, outputs));
+}
+
+} // namespace ann
diff --git a/compiler/enco/core/src/ANN/IR/OperationInventory.h b/compiler/enco/core/src/ANN/IR/OperationInventory.h
new file mode 100644
index 000000000..11c6be98a
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/OperationInventory.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ANN_IR_OPERATION_INVENTORY_H__
+#define __ANN_IR_OPERATION_INVENTORY_H__
+
+#include "ANN/IR/Operation.h"
+#include "ANN/IR/OperandID.h"
+
+#include <initializer_list>
+
+#include <memory>
+
+namespace ann
+{
+
+class OperationInventory
+{
+public:
+ void create(Operation::Code code, std::initializer_list<OperandID> inputs,
+ std::initializer_list<OperandID> outputs);
+
+public:
+ uint32_t count(void) const { return _operations.size(); }
+
+public:
+ const Operation *at(uint32_t n) const { return _operations.at(n).get(); }
+
+private:
+ std::vector<std::unique_ptr<Operation>> _operations;
+};
+
+} // namespace ann
+
+#endif // __ANN_IR_OPERATION_INVENTORY_H__
diff --git a/compiler/enco/core/src/ANN/IR/OperationInventory.test.cpp b/compiler/enco/core/src/ANN/IR/OperationInventory.test.cpp
new file mode 100644
index 000000000..0e91a4f53
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/OperationInventory.test.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationInventory.h"
+
+#include <gtest/gtest.h>
+
+TEST(ANN_IR_OPERATION_INVENTORY, constructor)
+{
+ ann::OperationInventory inven;
+
+ ASSERT_EQ(inven.count(), 0);
+}
+
+TEST(ANN_IR_OPERATION_INVENTORY, create)
+{
+ ann::OperationInventory inven;
+
+ inven.create(ann::Operation::Code::CONV_2D, {ann::OperandID{0}}, {ann::OperandID{3}});
+
+ ASSERT_EQ(inven.count(), 1);
+ ASSERT_NE(inven.at(0), nullptr);
+
+ ASSERT_EQ(inven.at(0)->code(), ann::Operation::Code::CONV_2D);
+ ASSERT_EQ(inven.at(0)->inputs().size(), 1);
+ ASSERT_EQ(inven.at(0)->outputs().size(), 1);
+}
diff --git a/compiler/enco/core/src/ANN/IR/OutputList.h b/compiler/enco/core/src/ANN/IR/OutputList.h
new file mode 100644
index 000000000..2dd891138
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/OutputList.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ANN_IR_OUTPUT_LIST_H__
+#define __ANN_IR_OUTPUT_LIST_H__
+
+#include "ANN/IR/OperandID.h"
+
+#include <vector>
+
+namespace ann
+{
+
+using OutputList = std::vector<OperandID>;
+
+} // namespace ann
+
+#endif // __ANN_IR_OUTPUT_LIST_H__
diff --git a/compiler/enco/core/src/ANN/IR/Weight.h b/compiler/enco/core/src/ANN/IR/Weight.h
new file mode 100644
index 000000000..062aa6d19
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/Weight.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ANN_IR_WEIGHT_H__
+#define __ANN_IR_WEIGHT_H__
+
+#include <vector>
+
+#include <cstdint>
+#include <type_traits>
+
+namespace ann
+{
+
+class Weight
+{
+public:
+ const uint8_t *base(void) const { return _buffer.data(); }
+ uint32_t size(void) const { return _buffer.size(); }
+
+public:
+ template <typename T> void fill(const T &value)
+ {
+ static_assert(std::is_arithmetic<T>::value, "T should be arithmetic");
+ _buffer.clear();
+
+ auto arr = reinterpret_cast<const uint8_t *>(&value);
+
+ for (uint32_t b = 0; b < sizeof(T); ++b)
+ {
+ _buffer.emplace_back(arr[b]);
+ }
+ }
+
+ template <typename It> void fill(It beg, It end)
+ {
+ _buffer.clear();
+
+ for (auto it = beg; it != end; ++it)
+ {
+ const auto value = *it;
+ auto arr = reinterpret_cast<const uint8_t *>(&value);
+
+ for (uint32_t b = 0; b < sizeof(value); ++b)
+ {
+ _buffer.emplace_back(arr[b]);
+ }
+ }
+ }
+
+private:
+ std::vector<uint8_t> _buffer;
+};
+
+} // namespace ann
+
+#endif // __ANN_IR_WEIGHT_H__
diff --git a/compiler/enco/core/src/ANN/IR/Weight.test.cpp b/compiler/enco/core/src/ANN/IR/Weight.test.cpp
new file mode 100644
index 000000000..53532114c
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/Weight.test.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Weight.h"
+
+#include <gtest/gtest.h>
+
+TEST(ANN_IR_WEIGHT, constructor)
+{
+ ann::Weight weight;
+
+ ASSERT_EQ(weight.base(), nullptr);
+ ASSERT_EQ(weight.size(), 0);
+}
+
+TEST(ANN_IR_WEIGHT, fill_scalar_int)
+{
+ ann::Weight weight;
+
+ weight.fill(3);
+
+ ASSERT_NE(weight.base(), nullptr);
+ ASSERT_EQ(*reinterpret_cast<const int *>(weight.base()), 3);
+}
+
+TEST(ANN_IR_WEIGHT, fill_vector_float)
+{
+ std::vector<float> values{1.0f, 2.0f};
+
+ ann::Weight weight;
+
+ weight.fill(values.begin(), values.end());
+
+ ASSERT_NE(weight.base(), nullptr);
+
+ auto arr = reinterpret_cast<const float *>(weight.base());
+
+ ASSERT_FLOAT_EQ(arr[0], 1.0f);
+ ASSERT_FLOAT_EQ(arr[1], 2.0f);
+}
diff --git a/compiler/enco/core/src/ANN/IR/WeightInventory.cpp b/compiler/enco/core/src/ANN/IR/WeightInventory.cpp
new file mode 100644
index 000000000..d8809ac08
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/WeightInventory.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "WeightInventory.h"
+
+#include <stdex/Memory.h>
+
+using stdex::make_unique;
+
+namespace ann
+{
+
+Weight *WeightInventory::create(void)
+{
+ auto hnd = make_unique<Weight>();
+ auto ptr = hnd.get();
+ _weights.push_back(std::move(hnd));
+ return ptr;
+}
+
+} // namespace ann
diff --git a/compiler/enco/core/src/ANN/IR/WeightInventory.h b/compiler/enco/core/src/ANN/IR/WeightInventory.h
new file mode 100644
index 000000000..fd166837f
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/WeightInventory.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __WEIGHT_INVENTORY_H__
+#define __WEIGHT_INVENTORY_H__
+
+#include "ANN/IR/Weight.h"
+
+#include <memory>
+
+namespace ann
+{
+
+class WeightInventory
+{
+public:
+ Weight *create(void);
+
+private:
+ std::vector<std::unique_ptr<Weight>> _weights;
+};
+
+} // namespace ann
+
+#endif // __WEIGHT_INVENTORY_H__
diff --git a/compiler/enco/core/src/ANN/IR/WeightInventory.test.cpp b/compiler/enco/core/src/ANN/IR/WeightInventory.test.cpp
new file mode 100644
index 000000000..143bdfddf
--- /dev/null
+++ b/compiler/enco/core/src/ANN/IR/WeightInventory.test.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "WeightInventory.h"
+
+#include <gtest/gtest.h>
+
+TEST(ANN_IR_WEIGHT_INVENTORY, create)
+{
+ ann::WeightInventory inven;
+
+ auto weight = inven.create();
+
+ ASSERT_EQ(weight->base(), nullptr);
+ ASSERT_EQ(weight->size(), 0);
+}
diff --git a/compiler/enco/core/src/AsmCode.cpp b/compiler/enco/core/src/AsmCode.cpp
new file mode 100644
index 000000000..70d6f30b3
--- /dev/null
+++ b/compiler/enco/core/src/AsmCode.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AsmCode.h"
+
+namespace enco
+{
+
+void AsmCode::dump(std::ostream &os) const
+{
+ os << ".section .rodata" << std::endl;
+ os << ".global " << _varname << std::endl;
+ // Please refer to https://www.sourceware.org/binutils/docs/as/Type.html#Type for details
+ os << ".type " << _varname << ", STT_OBJECT" << std::endl;
+ os << ".align " << 4 << std::endl;
+ os << _varname << ":" << std::endl;
+ os << ".incbin " << '"' << _filename << '"' << std::endl;
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/AsmCode.h b/compiler/enco/core/src/AsmCode.h
new file mode 100644
index 000000000..c43892888
--- /dev/null
+++ b/compiler/enco/core/src/AsmCode.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_ASM_CODE_H__
+#define __ENCO_ASM_CODE_H__
+
+#include <ostream>
+#include <string>
+
+namespace enco
+{
+
+class AsmCode
+{
+public:
+ AsmCode(const std::string &filename, const std::string &varname)
+ : _filename{filename}, _varname{varname}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void dump(std::ostream &) const;
+
+private:
+ std::string _filename;
+ std::string _varname;
+};
+
+} // namespace enco
+
+static inline std::ostream &operator<<(std::ostream &os, const enco::AsmCode &code)
+{
+ code.dump(os);
+ return os;
+}
+
+#endif // __ENCO_ASM_CODE_H__
diff --git a/compiler/enco/core/src/Backend.cpp b/compiler/enco/core/src/Backend.cpp
new file mode 100644
index 000000000..d4bec7447
--- /dev/null
+++ b/compiler/enco/core/src/Backend.cpp
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "enco/Backend.h"
+
+#include "IRValidator.h"
+
+#include "Session.h"
+#include "Pipeline.h"
+
+#include "Code.h"
+#include "AsmCode.h"
+#include "CppCode.h"
+
+#include "Transforms/Duplicate.h"
+#include "Transforms/FeatureUnification.h"
+#include "Transforms/AvgPoolLowering.h"
+#include "Transforms/IntrinsicSelection.h"
+#include "Transforms/DataLayoutConversion.h"
+#include "Transforms/IndirectCopyElimination.h"
+#include "Transforms/IdenticalObjectReduction.h"
+#include "Transforms/DuplicatedObjectReduction.h"
+#include "Transforms/DeadObjectElimination.h"
+#include "Transforms/ConstantFolding.h"
+#include "Transforms/CopyLowering.h"
+#include "Transforms/ConcatLowering.h"
+#include "Transforms/FreeInstrElimination.h"
+#include "Transforms/FreeOpElimination.h"
+#include "Transforms/DeadBagElimination.h"
+#include "Transforms/Optimizations.h"
+#include "Transforms/Split.h"
+#include "Transforms/GlobalDataGeneration.h"
+
+#include <stdex/Memory.h>
+
+#include <stdexcept>
+#include <iostream>
+#include <fstream>
+
+using stdex::make_unique;
+using namespace enco;
+
+namespace
+{
+
+// has_inout_bag(m) returns true if there is a pair of coco::Input and coco::Output that share
+// the same bag as their backing storage
+inline bool has_inout_bag(const coco::Module *m)
+{
+ for (uint32_t n = 0; n < m->entity()->bag()->size(); ++n)
+ {
+ auto bag = m->entity()->bag()->at(n);
+
+ if (bag->isInput() && bag->isOutput())
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+class BackendImpl final : public enco::Backend
+{
+public:
+ BackendImpl(const std::string &prefix) : _prefix{prefix}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void compile(coco::Module *m, coco::Data *d) override;
+
+private:
+ std::string _prefix;
+};
+
+void BackendImpl::compile(coco::Module *m, coco::Data *d)
+{
+ auto sess = make_session(m, d);
+
+ // validate if IR from frontend is correct
+ assert(validate(code(sess)));
+
+ enco::Pipeline pipeline;
+
+ // Configure pipeline
+
+ // As explained below, the current implementation does not work if there is a pair of input/output
+ // that share the same bag as their underlying bag.
+ //
+ // BagDuplicationPass creates a copy of such bags in order to eliminate such a pair.
+ pipeline.append(make_unique<BagDuplicationPass>());
+ pipeline.append(make_unique<FeatureUnificationPass>());
+ pipeline.append(make_unique<AvgPoolLoweringPass>());
+ pipeline.append(make_unique<IntrinsicSelectionPass>());
+ // Insert data ordering if necessary
+ pipeline.append(make_unique<DataLayoutConversionPass>());
+ pipeline.append(make_unique<IndirectCopyEliminationPass>());
+ pipeline.append(make_unique<IdenticalObjectReductionPass>());
+ pipeline.append(make_unique<DuplicatedObjectReductionPass>());
+ pipeline.append(make_unique<ConstantFoldingPass>());
+ // Eliminate dead object
+ //
+ // NOTE Dead Object Elimination (DOE) is performed before Copy lowering
+ // in order to reduce compilation overhead.
+ pipeline.append(make_unique<DeadObjectEliminationPass>());
+ // Lower Copy as Shuffle
+ pipeline.append(make_unique<CopyLoweringPass>());
+ // Lower ConcatF as Shuffle if it is not delegated to NNAPI yet
+ pipeline.append(make_unique<ConcatLoweringPass>());
+ pipeline.append(make_unique<BypassGenerationPass>());
+ pipeline.append(make_unique<FreeInstrEliminationPass>());
+ // NOTE Free Op Elimination should be applied after Free Instr Elimination
+ // - Free Instr Elimination may generate additional free Op(s)
+ pipeline.append(make_unique<FreeOpEliminationPass>());
+ pipeline.append(make_unique<DeadBagEliminationPass>());
+ // Split instructions into a set of phases (each block serves as a phase)
+ pipeline.append(make_unique<PhaseConstructionPass>());
+
+ // Apply transforms in the pipeline
+ for (uint32_t n = 0; n < pipeline.size(); ++n)
+ {
+ const auto &pass = pipeline.at(n);
+
+ pass.run(sess);
+ }
+
+ // The current implementation will assign memory region for each bag as follows:
+ // Bind input bag to the region provided by Network_input_bind
+ // Bind output bag to the region provided by Network_output_bind
+ // Bind intermediate bag to the region allocated during execution
+ //
+ // Note that this scheme does not work if there is a pair of input/output
+ // that share the same bag as their underlying bag
+ assert(!has_inout_bag(code(sess)->module()));
+
+ const std::string data_var = "data";
+ const std::string data_filename = _prefix + ".bin";
+
+ // Generate 'bin' file
+ {
+ std::ofstream ofs{data_filename, std::ios::binary};
+ generate_global_data(ofs, code(sess));
+ }
+
+ // Generate 'embed.S' file
+ {
+ std::ofstream ofs{_prefix + ".embed.S"};
+ ofs << AsmCode{data_filename, data_var};
+ }
+
+ // TODO Run various transforms over enco::Code
+
+ std::ofstream ofs{_prefix + ".cpp"};
+ ofs << CppCode{data_var, code(sess)} << std::endl;
+}
+
+} // namespace enco
+
+#include <iostream>
+
+std::unique_ptr<enco::Backend> make_backend(const cmdline::View &cmdline)
+{
+ return make_unique<::BackendImpl>(cmdline.at(0));
+}
diff --git a/compiler/enco/core/src/Code.h b/compiler/enco/core/src/Code.h
new file mode 100644
index 000000000..91756d5f8
--- /dev/null
+++ b/compiler/enco/core/src/Code.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_CODE_H__
+#define __ENCO_CODE_H__
+
+#include "ANN/Context.h"
+
+#include <coco/IR/Module.h>
+#include <coco/IR/Data.h>
+
+namespace enco
+{
+
+struct Code
+{
+public:
+ Code(coco::Module *module, coco::Data *data) : _module{module}, _data{data}
+ {
+ // DO NOTHING
+ }
+
+public:
+ coco::Module *module(void) const { return _module; }
+ coco::Data *data(void) const { return _data; }
+
+private:
+ coco::Module *const _module;
+ coco::Data *const _data;
+};
+
+} // namespace enco
+
+#endif // __ENCO_CODE_H__
diff --git a/compiler/enco/core/src/Code.test.cpp b/compiler/enco/core/src/Code.test.cpp
new file mode 100644
index 000000000..8e96e4751
--- /dev/null
+++ b/compiler/enco/core/src/Code.test.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Code.h"
+
+#include <gtest/gtest.h>
+
+TEST(CODE, constructor)
+{
+ auto m = coco::Module::create();
+ auto d = coco::Data::create();
+
+ enco::Code code{m.get(), d.get()};
+
+ ASSERT_EQ(code.module(), m.get());
+ ASSERT_EQ(code.data(), d.get());
+}
diff --git a/compiler/enco/core/src/CodeIndex.h b/compiler/enco/core/src/CodeIndex.h
new file mode 100644
index 000000000..7f2da6463
--- /dev/null
+++ b/compiler/enco/core/src/CodeIndex.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CODE_INDEX_H__
+#define __CODE_INDEX_H__
+
+#include <coco/IR/Block.h>
+#include <coco/IR/Instr.h>
+
+/**
+ * @brief A CodeIndex denotes the index of instruction inside the whole module
+ */
+class CodeIndex
+{
+public:
+ CodeIndex() = default;
+
+public:
+ CodeIndex(const coco::BlockIndex &blk_ind, const coco::InstrIndex &ins_ind)
+ : _blk_ind{blk_ind}, _ins_ind{ins_ind}
+ {
+ }
+
+public:
+ const coco::BlockIndex &block(void) const { return _blk_ind; }
+ const coco::InstrIndex &instr(void) const { return _ins_ind; }
+
+private:
+ coco::BlockIndex _blk_ind;
+ coco::InstrIndex _ins_ind;
+};
+
+static inline coco::BlockIndex block_index(const coco::Block *blk)
+{
+ if (blk == nullptr)
+ {
+ return coco::BlockIndex{};
+ }
+
+ return blk->index();
+}
+
+static inline CodeIndex code_index(const coco::Instr *ins)
+{
+ return CodeIndex{block_index(ins->parent()), ins->index()};
+}
+
+static inline bool operator<(const CodeIndex &lhs, const CodeIndex &rhs)
+{
+ if (lhs.block() < rhs.block())
+ {
+ return true;
+ }
+
+ if (lhs.block().value() > rhs.block().value())
+ {
+ return false;
+ }
+
+ return lhs.instr() < rhs.instr();
+}
+
+#endif // __CODE_INDEX_H__
diff --git a/compiler/enco/core/src/CppCode.cpp b/compiler/enco/core/src/CppCode.cpp
new file mode 100644
index 000000000..aa5ef3156
--- /dev/null
+++ b/compiler/enco/core/src/CppCode.cpp
@@ -0,0 +1,553 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CppCode.h"
+
+#include "Transforms/GlobalDataGeneration.h"
+#include "Transforms/Split.h"
+
+#include "CppGen/MemoryContext.h"
+
+#include "CppGen/Host.h"
+#include "CppGen/Subnet.h"
+
+#include "Dims.h"
+
+#include <pp/LinearDocument.h>
+#include <pp/MultiLineTextUtils.h>
+
+#include <map>
+#include <set>
+#include <string>
+#include <stdexcept>
+
+namespace
+{
+
+struct SubnetInfo
+{
+ std::string struct_name;
+ /// @brief The field name (in this subnet struct) of ANeuralNetworksCompilation value
+ std::string compilation_field;
+
+ /// @brief The field name (in Network struct) for this subnet
+ std::string field_name;
+};
+
+struct NetworkStruct
+{
+ pp::LinearDocument def;
+};
+
+struct InvokeFunction
+{
+ pp::LinearDocument head;
+ pp::LinearDocument body;
+ pp::LinearDocument tail{pp::LinearDocument::Direction::Reverse};
+
+public:
+ /** @brief Create a (fresh) local variable */
+ std::string local(void) { return pp::fmt("v_", ++_var_count); }
+
+private:
+ uint32_t _var_count = 0;
+};
+
+/**
+ * @brief Enumerate a set of Bag accessed by a given instruction
+ *
+ * Supported instruction:
+ * "Shuffle"
+ */
+class AccessedBagAccumulator : public coco::Instr::Visitor<void>
+{
+public:
+ AccessedBagAccumulator(std::set<coco::Bag *> *out) : _out{out}
+ {
+ // Validate "out"
+ assert(_out != nullptr);
+ }
+
+public:
+ void visit(const coco::Shuffle *shuffle) override
+ {
+ assert(shuffle->from() != nullptr);
+ assert(shuffle->into() != nullptr);
+
+ _out->insert(shuffle->from());
+ _out->insert(shuffle->into());
+ }
+
+private:
+ std::set<coco::Bag *> *_out;
+};
+
+/**
+ * @brief Return a set of bags that SHOULD have a host allocation
+ */
+std::set<coco::Bag *> hosted(const enco::Code *code)
+{
+ std::set<coco::Bag *> res;
+
+ auto m = code->module();
+ auto ann_ctx = enco::SubnetManager::context(m);
+
+ for (auto blk = m->block()->head(); blk; blk = blk->next())
+ {
+ if (auto ann_binder = ann_ctx->find(blk))
+ {
+ // Case: The current block is ANN-compatible
+
+ // Each ANN input SHOULD have a corresponding host allocation
+ for (uint32_t n = 0; n < ann_binder->module()->input()->size(); ++n)
+ {
+ res.insert(ann_binder->input(n));
+ }
+
+ // Each ANN output SHOULD have a corresponding host allocation
+ for (uint32_t n = 0; n < ann_binder->module()->output()->size(); ++n)
+ {
+ res.insert(ann_binder->output(n));
+ }
+ }
+ else
+ {
+ // Every bag that ANN-incompatible block accesses SHOULD have a corresponding host allocation
+ AccessedBagAccumulator acc{&res};
+
+ for (auto ins = blk->instr()->head(); ins; ins = ins->next())
+ {
+ ins->accept(acc);
+ }
+ }
+ }
+
+ return res;
+}
+} // namespace
+
+namespace enco
+{
+
+void CppCode::dump(std::ostream &os) const
+{
+ auto m = _code->module();
+ auto d = _code->data();
+ auto ann_ctx = enco::SubnetManager::context(m);
+
+ NetworkStruct network;
+ InvokeFunction invoke;
+ pp::LinearDocument internal;
+
+ auto data_exp = [this](const GlobalOffset &off) { return pp::fmt(_varname, " + ", off); };
+
+ // Record the subnet information
+ std::map<const ANNBinder *, SubnetInfo> subnet_ctx;
+
+ /**
+ * Create a struct for each android NN network of the following form:
+ *
+ * struct [Name]
+ * {
+ * ...
+ *
+ * [Name]() // constructor
+ * {
+ * ...
+ * }
+ *
+ * ~[Name]() // destructor
+ * {
+ * ...
+ * }
+ * };
+ *
+ */
+ for (uint32_t n = 0; n < ann_ctx->count(); ++n)
+ {
+ SubnetStructBuilder builder;
+
+ auto subnet_binder = ann_ctx->nth(n);
+ auto subnet_struct_name = pp::fmt("Subnet_", subnet_ctx.size());
+ auto subnet_field_name = pp::fmt("_subnet_", subnet_ctx.size());
+
+ // Create global data variable
+ auto emit_weight = [&](const ann::OperandID &, const ann::Operand *info) {
+ if (info->weight())
+ {
+ auto size = info->weight()->size();
+ auto off = enco::GlobalData::data_offset(info);
+ auto base_exp = pp::fmt("reinterpret_cast<const void *>(", data_exp(off), ")");
+ auto size_exp = pp::fmt(size);
+
+ builder.expr(info, base_exp, size_exp);
+ }
+ };
+ subnet_binder->module()->operand()->each(emit_weight);
+
+ auto subnet_struct_content = builder.build(subnet_binder);
+
+ // Emit C++ declaration
+ internal.append("struct ", subnet_struct_name);
+ internal.append("{");
+ internal.indent();
+
+ internal.append(subnet_struct_content->def());
+
+ internal.append(subnet_struct_name, "()");
+ internal.append("{");
+ internal.indent();
+ internal.append(subnet_struct_content->ctor());
+ internal.unindent();
+ internal.append("}");
+
+ internal.append("~", subnet_struct_name, "()");
+ internal.append("{");
+ internal.indent();
+ internal.append(subnet_struct_content->dtor());
+ internal.unindent();
+ internal.append("}");
+
+ internal.unindent();
+ internal.append("};");
+
+ // Declare subnet field
+ network.def.append(subnet_struct_name, " ", subnet_field_name, ";");
+
+ // Update subnet context
+ SubnetInfo subnet_info;
+
+ subnet_info.struct_name = subnet_struct_name;
+ subnet_info.compilation_field = subnet_struct_content->compilation();
+ subnet_info.field_name = subnet_field_name;
+
+ assert(subnet_ctx.find(subnet_binder) == subnet_ctx.end());
+ subnet_ctx[subnet_binder] = subnet_info;
+ }
+
+ MemoryContext mem;
+
+ // Set dedicated memory region for network inputs
+ for (uint32_t n = 0; n < m->input()->size(); ++n)
+ {
+ mem.base(m->input()->at(n)->bag(), pp::fmt("net->inputs[", n, "].ptr"));
+ mem.size(m->input()->at(n)->bag(), pp::fmt("net->inputs[", n, "].len"));
+ }
+
+ // Set dedicated memory region for network outputs
+ for (uint32_t n = 0; n < m->output()->size(); ++n)
+ {
+ mem.base(m->output()->at(n)->bag(), pp::fmt("net->outputs[", n, "].ptr"));
+ mem.size(m->output()->at(n)->bag(), pp::fmt("net->outputs[", n, "].len"));
+ }
+
+ // Set dedicated memory region for constant weight values
+ // TODO Support non-constant bags with initial values
+ for (uint32_t n = 0; n < m->entity()->bag()->size(); ++n)
+ {
+ auto bag = m->entity()->bag()->at(n);
+
+ if (!d->allocated(bag))
+ {
+ // Skip if no weight exists
+ continue;
+ }
+
+ // TODO Support non-float(fp32) weight
+ auto offset = enco::GlobalData::data_offset(bag);
+
+ auto base_expr = data_exp(offset);
+ auto size_expr = pp::fmt(bag->size() * sizeof(float));
+
+ mem.base(bag, base_expr);
+ mem.size(bag, size_expr);
+ }
+
+ // Set dedicated memory reigion for intermediate buffer(s)
+ for (const auto &bag : hosted(_code))
+ {
+ // Skip if a bag is already allocated
+ if (mem.member(bag))
+ {
+ continue;
+ }
+
+ auto name = invoke.local();
+
+ invoke.head.append("auto ", name, " = new uint8_t[", bag->size() * sizeof(float), "];");
+ invoke.tail.append("delete[] ", name, ";");
+
+ mem.base(bag, name);
+ mem.size(bag, pp::fmt(bag->size() * sizeof(float)));
+ }
+
+ // Create Code Block Builder
+ SubnetBlockCompiler subnet_compiler{mem};
+
+ for (auto it = subnet_ctx.begin(); it != subnet_ctx.end(); ++it)
+ {
+ // Specify how to access ANeuralNetworksCompilation
+ const auto &info = it->second;
+ subnet_compiler.bind(it->first, pp::fmt("net->", info.field_name, ".", info.compilation_field));
+ }
+
+ HostBlockCompiler host_compiler{mem};
+
+ for (auto blk = m->block()->head(); blk; blk = blk->next())
+ {
+ invoke.body.append("{");
+ invoke.body.indent();
+
+ if (auto binder = ann_ctx->find(blk))
+ {
+ // Generate code that invokes Android NN sub-network
+ auto lines = subnet_compiler.compile(binder);
+ invoke.body.append(*lines);
+ }
+ else
+ {
+ // Generate code on-the-fly for Android NN-incompatible blocks
+ auto lines = host_compiler.compile(blk);
+ invoke.body.append(*lines);
+ }
+
+ invoke.body.unindent();
+ invoke.body.append("}");
+ }
+
+ //
+ // Generate full C++ source code with code snippet
+ //
+ const std::string name{"Network"};
+
+ pp::LinearDocument includes;
+ {
+ // Include Android NN API header
+ includes.append("#include <NeuralNetworks.h>");
+ includes.append();
+
+ includes.append("#include <cstdint>");
+ includes.append("#include <cassert>");
+ includes.append("#include <array>");
+ }
+
+ pp::LinearDocument net_def;
+ {
+ net_def.append("struct ", name, " {");
+ net_def.indent();
+ net_def.append("struct Shape { uint32_t rank; const uint32_t *dims; };");
+ net_def.append("struct Input {");
+ net_def.indent();
+ net_def.append("const char *name;");
+ net_def.append("const uint8_t *ptr;");
+ net_def.append("unsigned len;");
+ net_def.append("Shape shape;");
+ net_def.unindent();
+ net_def.append("};");
+ net_def.append("struct Output {");
+ net_def.indent();
+ net_def.append("const char *name;");
+ net_def.append("uint8_t *ptr;");
+ net_def.append("unsigned len;");
+ net_def.append("Shape shape;");
+ net_def.unindent();
+ net_def.append("};");
+ net_def.append();
+ net_def.append(name, "();");
+ net_def.append("~", name, "();");
+
+ net_def.append();
+ net_def.append(network.def);
+ net_def.append();
+
+ net_def.append("std::array<Input, ", m->input()->size(), "> inputs;");
+ net_def.append("std::array<Output, ", m->output()->size(), "> outputs;");
+
+ net_def.unindent();
+ net_def.append("};");
+ }
+
+ pp::LinearDocument net_ctor;
+ {
+ net_ctor.append("Network::Network() {");
+ net_ctor.indent();
+
+ // Initialize input metadata
+ for (uint32_t n = 0; n < m->input()->size(); ++n)
+ {
+ auto input = m->input()->at(n);
+ auto dims = as_dims(input->shape());
+
+ auto name_off = enco::GlobalData::name_offset(input);
+ auto name_exp = pp::fmt("reinterpret_cast<const char *>(", data_exp(name_off), ")");
+ auto dims_off = enco::GlobalData::dims_offset(input);
+ auto dims_exp = pp::fmt("reinterpret_cast<const unsigned *>(", data_exp(dims_off), ")");
+
+ net_ctor.append("inputs.at(", n, ").name = ", name_exp, ";");
+ net_ctor.append("inputs.at(", n, ").shape.rank = ", dims.size(), ";");
+ net_ctor.append("inputs.at(", n, ").shape.dims = ", dims_exp, ";");
+ }
+
+ // Initialize output metadata
+ for (uint32_t n = 0; n < m->output()->size(); ++n)
+ {
+ auto output = m->output()->at(n);
+ auto dims = as_dims(output->shape());
+
+ auto name_off = enco::GlobalData::name_offset(output);
+ auto name_exp = pp::fmt("reinterpret_cast<const char *>(", data_exp(name_off), ")");
+ auto dims_off = enco::GlobalData::dims_offset(output);
+ auto dims_exp = pp::fmt("reinterpret_cast<const unsigned *>(", data_exp(dims_off), ")");
+
+ net_ctor.append("outputs.at(", n, ").name = ", name_exp, ";");
+ net_ctor.append("outputs.at(", n, ").shape.rank = ", dims.size(), ";");
+ net_ctor.append("outputs.at(", n, ").shape.dims = ", dims_exp, ";");
+ }
+
+ // TODO Implement this
+ net_ctor.unindent();
+ net_ctor.append("}");
+ }
+
+ pp::LinearDocument net_dtor;
+ {
+ net_dtor.append("Network::~Network() {");
+ net_dtor.indent();
+ // TODO Implement this
+ net_dtor.unindent();
+ net_dtor.append("}");
+ }
+
+ pp::LinearDocument source;
+
+ source.append(includes);
+ source.append();
+ source.append("extern uint8_t ", _varname, "[];");
+ source.append();
+
+ source.append("namespace");
+ source.append("{");
+ source.append(internal);
+ source.append("} // namespace");
+ source.append();
+ source.append(net_def);
+ source.append();
+ source.append(net_ctor);
+ source.append();
+ source.append(net_dtor);
+
+ source.append();
+ source.append(name, " *", name, "_construct() { return new ", name, "{}; }");
+ source.append("void ", name, "_destruct(", name, " *net) { delete net; }");
+
+ source.append();
+
+ // Emit Network_input_count function
+ source.append("unsigned ", name, "_input_count(const ", name, " *net) {");
+ source.indent();
+ source.append("return net->inputs.size();");
+ source.unindent();
+ source.append("}");
+
+ source.append();
+
+ // Emit Network_input_name function
+ source.append("const char *", name, "_input_name(const ", name, " *net, unsigned n) {");
+ source.indent();
+ source.append("return net->inputs.at(n).name;");
+ source.unindent();
+ source.append("}");
+
+ // Emit Network_input_rank function
+ source.append("unsigned ", name, "_input_rank(const ", name, " *net, unsigned n) {");
+ source.indent();
+ source.append("return net->inputs.at(n).shape.rank;");
+ source.unindent();
+ source.append("}");
+
+ // Emit Network_input_dim function
+ source.append("unsigned ", name, "_input_dim(const ", name, " *net, unsigned n, unsigned axe)");
+ source.append("{");
+ source.indent();
+ source.append("return net->inputs.at(n).shape.dims[axe];");
+ source.unindent();
+ source.append("}");
+
+ // Emit Network_input_bind function
+ source.append("void ", name, "_input_bind(", name,
+ " *net, unsigned n, const void *ptr, unsigned len) {");
+ source.indent();
+ source.append("net->inputs.at(n).ptr = reinterpret_cast<const uint8_t *>(ptr);");
+ source.append("net->inputs.at(n).len = len;");
+ source.unindent();
+ source.append("}");
+
+ source.append();
+
+ // Emit Network_output_count function
+ source.append("unsigned ", name, "_output_count(const ", name, " *net) {");
+ source.indent();
+ source.append("return net->outputs.size();");
+ source.unindent();
+ source.append("}");
+
+ source.append();
+
+ // Emit Network_output_name function
+ source.append("const char *", name, "_output_name(const ", name, " *net, unsigned n) {");
+ source.indent();
+ source.append("return net->outputs.at(n).name;");
+ source.unindent();
+ source.append("}");
+
+ // Emit Network_output_rank function
+ source.append("unsigned ", name, "_output_rank(const ", name, " *net, unsigned n) {");
+ source.indent();
+ source.append("return net->outputs.at(n).shape.rank;");
+ source.unindent();
+ source.append("}");
+
+ // Emit Network_output_dim function
+ source.append("unsigned ", name, "_output_dim(const ", name, " *net, unsigned n, unsigned axe)");
+ source.append("{");
+ source.indent();
+ source.append("return net->outputs.at(n).shape.dims[axe];");
+ source.unindent();
+ source.append("}");
+
+ // Emit Network_output_bind function
+ source.append("void ", name, "_output_bind(", name,
+ " *net, unsigned n, void *ptr, unsigned len) {");
+ source.indent();
+ source.append("net->outputs.at(n).ptr = reinterpret_cast<uint8_t *>(ptr);");
+ source.append("net->outputs.at(n).len = len;");
+ source.unindent();
+ source.append("}");
+
+ source.append();
+
+ source.append("void ", name, "_invoke(", name, " *net) {");
+ source.indent();
+ source.append(invoke.head);
+ source.append(invoke.body);
+ source.append(invoke.tail);
+ source.unindent();
+ source.append("}");
+
+ os << source;
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/CppCode.h b/compiler/enco/core/src/CppCode.h
new file mode 100644
index 000000000..c52ea1d5d
--- /dev/null
+++ b/compiler/enco/core/src/CppCode.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_CPP_CODE_H__
+#define __ENCO_CPP_CODE_H__
+
+#include "Code.h"
+
+#include <ostream>
+
+namespace enco
+{
+
+class CppCode
+{
+public:
+ CppCode(const std::string &varname, const Code *code) : _varname{varname}, _code{code}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void dump(std::ostream &) const;
+
+private:
+ const std::string _varname;
+ const Code *_code;
+};
+
+} // namespace enco
+
+static inline std::ostream &operator<<(std::ostream &os, const enco::CppCode &code)
+{
+ code.dump(os);
+ return os;
+}
+
+#endif // __ENCO_CPP_CODE_H__
diff --git a/compiler/enco/core/src/CppGen/Host.cpp b/compiler/enco/core/src/CppGen/Host.cpp
new file mode 100644
index 000000000..37e0583d7
--- /dev/null
+++ b/compiler/enco/core/src/CppGen/Host.cpp
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Host.h"
+
+#include <pp/EnclosedDocument.h>
+
+#include <stdex/Memory.h>
+
+#include <map>
+#include <string>
+
+namespace
+{
+
+/**
+ * @brief Data transfer between flat arrays
+ *
+ * Transfer(from, into) denotes the following C code:
+ * dst[into] = src[from];
+ */
+class Transfer
+{
+public:
+ Transfer(uint32_t from, uint32_t into) : _from{from}, _into{into}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t from(void) const { return _from; }
+ uint32_t into(void) const { return _into; }
+
+private:
+ uint32_t _from;
+ uint32_t _into;
+};
+
+using TransferSequence = std::vector<Transfer>;
+
+/**
+ * @brief Convert Shuffle instruction as a sequence of data transfer
+ */
+TransferSequence as_transfer_sequence(const coco::Shuffle *shuffle)
+{
+ TransferSequence seq;
+
+ for (const auto &dst : shuffle->range())
+ {
+ const auto src = shuffle->at(dst);
+ seq.emplace_back(src.value(), dst.value());
+ }
+
+ return seq;
+}
+
+/**
+ * Given a sequence of N data transfers,
+ * find_loop tries to compute count, src_step, dst_step that satisfies
+ * the following properties:
+ *
+ * First, N should be a multiple of count.
+ * Below we refer to that multiplier as 'window' (= N / count)
+ *
+ * Second,
+ * for all n in [0, count),
+ * for all k in [0, window),
+ * from[n * window + k] == from[k] + src_step, and
+ * into[n * window + k] == into[k] + dst_step
+ */
+bool find_loop(TransferSequence::const_iterator beg, TransferSequence::const_iterator end,
+ uint32_t *p_count, uint32_t *p_src_step, uint32_t *p_dst_step)
+{
+ assert(p_count != nullptr);
+ assert(p_src_step != nullptr);
+ assert(p_dst_step != nullptr);
+
+ const uint32_t size = end - beg;
+
+ for (uint32_t window = 1; window <= size; ++window)
+ {
+ if (size % window != 0)
+ {
+ continue;
+ }
+
+ auto src_step_at = [&beg, window](uint32_t n) {
+ return (beg + n)->from() - (beg + n - window)->from();
+ };
+
+ auto dst_step_at = [&beg, window](uint32_t n) {
+ return (beg + n)->into() - (beg + n - window)->into();
+ };
+
+ const uint32_t count = size / window;
+ const uint32_t src_step = src_step_at(window);
+ const uint32_t dst_step = dst_step_at(window);
+
+ bool consistent = true;
+
+ for (uint32_t n = window + 1; n < size; ++n)
+ {
+ if ((src_step_at(n) != src_step) || (dst_step_at(n) != dst_step))
+ {
+ consistent = false;
+ break;
+ }
+ }
+
+ if (consistent)
+ {
+ *p_count = count;
+ *p_src_step = src_step;
+ *p_dst_step = dst_step;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/**
+ * @brief Single transfer loop (a triple of count, source step, detination step)
+ */
+class TransferLoop
+{
+public:
+ class Step
+ {
+ public:
+ Step(uint32_t src, uint32_t dst) : _src{src}, _dst{dst}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ uint32_t src(void) const { return _src; }
+ uint32_t dst(void) const { return _dst; }
+
+ private:
+ uint32_t _src;
+ uint32_t _dst;
+ };
+
+public:
+ TransferLoop(uint32_t count, uint32_t src_step, uint32_t dst_step)
+ : _count{count}, _step{src_step, dst_step}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t count(void) const { return _count; }
+ const Step &step(void) const { return _step; }
+
+private:
+ uint32_t _count;
+ Step _step;
+};
+
+/**
+ * @brief Nested transfer loops
+ */
+using TransferNest = std::vector<TransferLoop>;
+
+/**
+ * @brief Construct nested transfer loop-nest that correponds to a given Shuffle instruction
+ */
+TransferNest as_nest(const TransferSequence &seq)
+{
+ TransferNest nest;
+
+ auto beg = seq.begin();
+ auto end = seq.end();
+
+ uint32_t window = end - beg;
+ uint32_t count = 0;
+ uint32_t src_step = 0;
+ uint32_t dst_step = 0;
+
+ while ((window > 1) && find_loop(beg, end, &count, &src_step, &dst_step))
+ {
+ assert(window % count == 0);
+
+ window /= count;
+ end = beg + window;
+
+ nest.emplace_back(count, src_step, dst_step);
+ }
+
+ return nest;
+};
+
+uint32_t loop_count(const TransferNest &nest)
+{
+ uint32_t count = 1;
+
+ for (const auto &loop : nest)
+ {
+ count *= loop.count();
+ }
+
+ return count;
+};
+
+class InstrPrinter : public coco::Instr::Visitor<pp::LinearDocument>
+{
+public:
+ InstrPrinter(const enco::MemoryContext &mem) : _mem(mem)
+ {
+ // DO NOTHING
+ }
+
+private:
+ pp::LinearDocument visit(const coco::Shuffle *shuffle) override
+ {
+ auto from = shuffle->from();
+ auto into = shuffle->into();
+
+ //
+ // Analyze 'Shuffle' pattern, and convert it as nested loops
+ //
+ auto tseq = as_transfer_sequence(shuffle);
+ auto nest = as_nest(tseq);
+ assert(tseq.size() % loop_count(nest) == 0);
+ uint32_t window = tseq.size() / loop_count(nest);
+
+ //
+ // Generate loop body
+ //
+ pp::EnclosedDocument loop_body;
+
+ auto var_at = [](uint32_t lv) { return pp::fmt("_", lv); };
+
+ for (uint32_t lv = 0; lv < nest.size(); ++lv)
+ {
+ auto var = var_at(lv);
+
+ loop_body.front().append("for (uint32_t ", var, " = 0; ", var, " < ", nest.at(lv).count(),
+ "; ++", var, ") {");
+ loop_body.front().indent();
+
+ loop_body.back().append("}");
+ loop_body.back().indent();
+ }
+
+ std::string src_index = "0";
+ std::string dst_index = "0";
+
+ for (uint32_t lv = 0; lv < nest.size(); ++lv)
+ {
+ src_index += pp::fmt(" + ", nest.at(lv).step().src(), " * ", var_at(lv));
+ dst_index += pp::fmt(" + ", nest.at(lv).step().dst(), " * ", var_at(lv));
+ }
+
+ for (uint32_t n = 0; n < window; ++n)
+ {
+ const auto src_base = pp::fmt("reinterpret_cast<const float *>(", _mem.base(from), ")");
+ const auto dst_base = pp::fmt("reinterpret_cast<float *>(", _mem.base(into), ")");
+
+ loop_body.front().append(dst_base, "[", dst_index, " + ", tseq.at(n).into(), "] = ", src_base,
+ "[", src_index, " + ", tseq.at(n).from(), "];");
+ }
+
+ pp::LinearDocument res;
+ res.append(loop_body);
+ return res;
+ }
+
+private:
+ const enco::MemoryContext &_mem;
+};
+
+} // namespace
+
+namespace enco
+{
+
+std::unique_ptr<pp::MultiLineText> HostBlockCompiler::compile(const coco::Block *blk) const
+{
+ InstrPrinter prn{_mem};
+
+ auto res = stdex::make_unique<pp::LinearDocument>();
+
+ for (auto ins = blk->instr()->head(); ins; ins = ins->next())
+ {
+ res->append(ins->accept(prn));
+ }
+
+ return std::move(res);
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/CppGen/Host.h b/compiler/enco/core/src/CppGen/Host.h
new file mode 100644
index 000000000..0adb7fe1f
--- /dev/null
+++ b/compiler/enco/core/src/CppGen/Host.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_CPP_GEN_HOST_H__
+#define __ENCO_CPP_GEN_HOST_H__
+
+#include "CppGen/MemoryContext.h"
+
+#include <coco/IR.h>
+#include <pp/MultiLineText.h>
+
+namespace enco
+{
+
+/***
+ * @brief Generate C++ code that does not depend on Anroid NN API
+ */
+class HostBlockCompiler
+{
+public:
+ HostBlockCompiler(const enco::MemoryContext &mem) : _mem(mem)
+ {
+ // DO NOTHING
+ }
+
+public:
+ std::unique_ptr<pp::MultiLineText> compile(const coco::Block *blk) const;
+
+private:
+ const enco::MemoryContext &_mem;
+};
+
+} // namespace enco
+
+#endif // __ENCO_CPP_GEN_HOST_H__
diff --git a/compiler/enco/core/src/CppGen/MemoryContext.cpp b/compiler/enco/core/src/CppGen/MemoryContext.cpp
new file mode 100644
index 000000000..e522968a8
--- /dev/null
+++ b/compiler/enco/core/src/CppGen/MemoryContext.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MemoryContext.h"
+
+#include <cassert>
+
+namespace enco
+{
+
+bool MemoryContext::member(const coco::Bag *bag) const
+{
+ // NOTE _base and _size SHOULD BE consistent
+ if (_base.find(bag) != _base.end())
+ {
+ assert(_size.find(bag) != _size.end());
+ return true;
+ }
+
+ assert(_size.find(bag) == _size.end());
+ return false;
+}
+
+void MemoryContext::base(const coco::Bag *bag, const std::string &exp) { _base[bag] = exp; }
+void MemoryContext::size(const coco::Bag *bag, const std::string &exp) { _size[bag] = exp; }
+
+} // namespace enco
diff --git a/compiler/enco/core/src/CppGen/MemoryContext.h b/compiler/enco/core/src/CppGen/MemoryContext.h
new file mode 100644
index 000000000..99c20f3e8
--- /dev/null
+++ b/compiler/enco/core/src/CppGen/MemoryContext.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_CPP_GEN_MEMORY_CONTEXT_H__
+#define __ENCO_CPP_GEN_MEMORY_CONTEXT_H__
+
+#include <coco/IR/Bag.h>
+
+#include <string>
+#include <map>
+
+namespace enco
+{
+
+/**
+ * @brief Record C/C++ expression that denotes the base and size of memory region
+ * dedicated to each bag
+ */
+class MemoryContext
+{
+public:
+ /**
+ * @brief Check whether a base/size expression for a given bag
+ */
+ bool member(const coco::Bag *bag) const;
+
+public:
+ void base(const coco::Bag *bag, const std::string &exp);
+ void size(const coco::Bag *bag, const std::string &exp);
+
+public:
+ const std::string &base(const coco::Bag *bag) const { return _base.at(bag); }
+ const std::string &size(const coco::Bag *bag) const { return _size.at(bag); }
+
+private:
+ std::map<const coco::Bag *, std::string> _base;
+ std::map<const coco::Bag *, std::string> _size;
+};
+
+} // namespace enco
+
+#endif // __ENCO_CPP_GEN_MEMORY_CONTEXT_H__
diff --git a/compiler/enco/core/src/CppGen/Subnet.cpp b/compiler/enco/core/src/CppGen/Subnet.cpp
new file mode 100644
index 000000000..9a636c6ae
--- /dev/null
+++ b/compiler/enco/core/src/CppGen/Subnet.cpp
@@ -0,0 +1,422 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CppGen/Subnet.h"
+
+#include "Dims.h"
+#include "String.h"
+
+#include <pp/LinearDocument.h>
+
+#include <stdex/Memory.h>
+
+#include <sstream>
+
+using stdex::make_unique;
+using enco::concat;
+
+#define S(content) #content
+
+namespace ann
+{
+static std::ostream &operator<<(std::ostream &os, const ann::OperandID &id)
+{
+ os << id.value();
+ return os;
+}
+} // namespace ann
+
+namespace
+{
+
+class SubnetStructImpl final : public enco::SubnetStruct
+{
+public:
+ SubnetStructImpl() : _dtor{pp::LinearDocument::Direction::Reverse}
+ {
+ // DO NOTHING
+ }
+
+public:
+ std::string model(void) const override { return "_model"; }
+ std::string compilation(void) const override { return "_compilation"; }
+
+public:
+ const pp::MultiLineText &def(void) const override { return _def; }
+ pp::LinearDocument *def(void) { return &_def; }
+
+public:
+ const pp::MultiLineText &ctor(void) const override { return _ctor; }
+ pp::LinearDocument *ctor(void) { return &_ctor; }
+
+public:
+ const pp::MultiLineText &dtor(void) const override { return _dtor; }
+ pp::LinearDocument *dtor(void) { return &_dtor; }
+
+private:
+ pp::LinearDocument _def;
+ pp::LinearDocument _ctor;
+ pp::LinearDocument _dtor;
+};
+
+struct CodeFragment
+{
+ virtual ~CodeFragment() = default;
+
+ virtual void dump(pp::LinearDocument *) const = 0;
+};
+
+pp::LinearDocument *operator<<(pp::LinearDocument *doc, const CodeFragment &fragment)
+{
+ fragment.dump(doc);
+ return doc;
+}
+
+const char *scalar_operand_code(const ann::DType &dtype)
+{
+ switch (dtype)
+ {
+ case ann::DType::S32:
+ return "ANEURALNETWORKS_INT32";
+ default:
+ break;
+ };
+
+ throw std::invalid_argument("dtype");
+}
+
+const char *tensor_operand_code(const ann::DType &dtype)
+{
+ switch (dtype)
+ {
+ case ann::DType::S32:
+ return "ANEURALNETWORKS_TENSOR_INT32";
+ case ann::DType::F32:
+ return "ANEURALNETWORKS_TENSOR_FLOAT32";
+ default:
+ break;
+ };
+
+ throw std::invalid_argument("dtype");
+}
+
+class ScalarOperandDecl final : public CodeFragment
+{
+public:
+ ScalarOperandDecl(const std::string &model, const ann::DType &dtype)
+ : _model{model}, _dtype{dtype}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void dump(pp::LinearDocument *doc) const override
+ {
+ doc->append("{");
+ doc->indent();
+ doc->append("ANeuralNetworksOperandType t;");
+ doc->append();
+ doc->append("t.type = ", scalar_operand_code(_dtype), ";");
+ doc->append("t.dimensionCount = 0;");
+ doc->append("t.dimensions = nullptr;");
+ doc->append("t.scale = 1.0f;");
+ doc->append("t.zeroPoint = 0;");
+ doc->append();
+ doc->append("ANeuralNetworksModel_addOperand(", _model, ", &t);");
+ doc->unindent();
+ doc->append("}");
+ }
+
+private:
+ std::string _model;
+ ann::DType _dtype;
+};
+
+class TensorOperandDecl final : public CodeFragment
+{
+public:
+ TensorOperandDecl(const std::string &model, const ann::DType &dtype,
+ const nncc::core::ADT::tensor::Shape &shape)
+ : _model{model}, _dtype{dtype}, _shape{shape}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void dump(pp::LinearDocument *doc) const override
+ {
+ const auto rank = _shape.rank();
+ const auto dims = as_dims(_shape);
+
+ assert(rank == dims.size());
+
+ doc->append("{");
+ doc->indent();
+ doc->append("uint32_t d[", rank, "] = { ", concat(", ", dims.begin(), dims.end()), " };");
+ doc->append();
+ doc->append("ANeuralNetworksOperandType t;");
+ doc->append();
+ doc->append("t.type = ", tensor_operand_code(_dtype), ";");
+ doc->append("t.dimensionCount = ", rank, ";");
+ doc->append("t.dimensions = d;");
+ doc->append("t.scale = 1.0f;");
+ doc->append("t.zeroPoint = 0;");
+ doc->append();
+ doc->append("ANeuralNetworksModel_addOperand(", _model, ", &t);");
+ doc->unindent();
+ doc->append("}");
+ }
+
+private:
+ std::string _model;
+ ann::DType _dtype;
+ nncc::core::ADT::tensor::Shape _shape;
+};
+
+/**
+ * @brief Code fragment that calls ANeuralNetworksModel_setOperandValue
+ */
+class WeightDecl final : public CodeFragment
+{
+public:
+ WeightDecl(const std::string &model, const ann::OperandID &id, const std::string &base,
+ const std::string &size)
+ : _model{model}, _id{id}, _base{base}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void dump(pp::LinearDocument *doc) const override
+ {
+ doc->append("ANeuralNetworksModel_setOperandValue(", _model, ", ", _id.value(), ", ", _base,
+ ", ", _size, ");");
+ }
+
+private:
+ std::string _model;
+ ann::OperandID _id;
+ std::string _base;
+ std::string _size;
+};
+
+/**
+ * @brief Code fragment that calls ANeuralNetworksModel_addOperation
+ */
+class OperationDecl final : public CodeFragment
+{
+public:
+ OperationDecl(const std::string &model, const ann::Operation *op) : _model{model}, _op{op}
+ {
+ // DO NOTHING
+ }
+
+private:
+ static std::string opcode(const ann::Operation::Code &code)
+ {
+ switch (code)
+ {
+#define ANN_OPERATION(TAG, ENUM) \
+ case ann::Operation::Code::TAG: \
+ return #ENUM;
+#include "ANN/IR/Operation.def"
+#undef ANN_OPERATION
+ default:
+ throw std::invalid_argument{"code"};
+ };
+ }
+
+public:
+ void dump(pp::LinearDocument *doc) const override
+ {
+ const auto in_count = _op->inputs().size();
+ auto in_beg = _op->inputs().begin();
+ auto in_end = _op->inputs().end();
+
+ const auto out_count = _op->outputs().size();
+ auto out_beg = _op->outputs().begin();
+ auto out_end = _op->outputs().end();
+
+ auto op = opcode(_op->code());
+
+ doc->append("{");
+ doc->indent();
+ doc->append("uint32_t inputs[", in_count, "] = { ", concat(", ", in_beg, in_end), " };");
+ doc->append("uint32_t outputs[", out_count, "] = { ", concat(", ", out_beg, out_end), " };");
+ doc->append();
+ doc->append("ANeuralNetworksModel_addOperation(", _model, ", ", op, ", ", in_count,
+ ", inputs, ", out_count, ", outputs);");
+ doc->unindent();
+ doc->append("}");
+ }
+
+private:
+ std::string _model;
+ const ann::Operation *_op;
+};
+
+/**
+ * @brief Code fragment that calls ANeuralNetworksModel_identifyInputsAndOutputs
+ */
+class ArgumentDecl final : public CodeFragment
+{
+public:
+ ArgumentDecl(const std::string &mname, const ANNBinder *binder) : _mname{mname}, _binder{binder}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void dump(pp::LinearDocument *doc) const override
+ {
+ doc->append("{");
+ doc->indent();
+
+ auto module = _binder->module();
+ const uint32_t input_count = module->input()->size();
+
+ doc->append("uint32_t inputs[", input_count, "];");
+ for (uint32_t n = 0; n < input_count; ++n)
+ {
+ doc->append("inputs[", n, "] = ", module->input()->at(n), ";");
+ }
+
+ const uint32_t output_count = module->output()->size();
+
+ doc->append("uint32_t outputs[", output_count, "];");
+ for (uint32_t n = 0; n < output_count; ++n)
+ {
+ doc->append("outputs[", n, "] = ", module->output()->at(n), ";");
+ }
+
+ doc->append("ANeuralNetworksModel_identifyInputsAndOutputs(", _mname, ", ", input_count,
+ ", inputs, ", output_count, ", outputs);");
+ doc->unindent();
+ doc->append("}");
+ }
+
+private:
+ std::string _mname;
+ const ANNBinder *_binder;
+};
+
+} // namespace
+
+namespace enco
+{
+
+std::unique_ptr<SubnetStruct> SubnetStructBuilder::build(const ANNBinder *binder) const
+{
+ auto res = make_unique<SubnetStructImpl>();
+
+ auto mname = res->model();
+ auto cname = res->compilation();
+
+ res->def()->append("ANeuralNetworksModel *", mname, ";");
+ res->def()->append("ANeuralNetworksCompilation *", cname, ";");
+
+ res->ctor()->append("ANeuralNetworksModel_create(&", mname, ");");
+ res->dtor()->append("ANeuralNetworksModel_free(", mname, ");");
+
+ binder->module()->operand()->each([&](const ann::OperandID &id, const ann::Operand *info) {
+ // TODO Remove dynamic cast
+ if (auto scalar = dynamic_cast<const ann::ScalarOperand *>(info))
+ {
+ res->ctor() << ScalarOperandDecl{mname, scalar->dtype()};
+ }
+ else if (auto tensor = dynamic_cast<const ann::TensorOperand *>(info))
+ {
+ res->ctor() << TensorOperandDecl{mname, tensor->dtype(), tensor->shape()};
+ }
+ else
+ {
+ throw std::runtime_error{"Unsupported"};
+ }
+
+ if (_weighted.find(info) != _weighted.end())
+ {
+ const auto &base_exp = _base_exprs.at(info);
+ const auto &size_exp = _size_exprs.at(info);
+
+ res->ctor() << WeightDecl{mname, id, base_exp, size_exp};
+ }
+ });
+
+ for (unsigned n = 0; n < binder->module()->operation()->count(); ++n)
+ {
+ auto op = binder->module()->operation()->at(n);
+ res->ctor() << OperationDecl{mname, op};
+ }
+
+ // Emit ANeuralNetworksModel_identifyInputsAndOutputs call
+ res->ctor() << ArgumentDecl{mname, binder};
+
+ // Emit ANeuralNetworksModel_finish call
+ res->ctor()->append("ANeuralNetworksModel_finish(", mname, ");");
+
+ // Create compilation
+ res->ctor()->append("ANeuralNetworksCompilation_create(", mname, ", &", cname, ");");
+ res->dtor()->append("ANeuralNetworksCompilation_free(", cname, ");");
+
+ // Finalize compilation
+ res->ctor()->append("ANeuralNetworksCompilation_finish(", cname, ");");
+
+ return std::move(res);
+}
+
+std::unique_ptr<pp::MultiLineText> SubnetBlockCompiler::compile(const ANNBinder *binder) const
+{
+ auto res = make_unique<pp::LinearDocument>();
+
+ const auto compilation = _compilation_ctx.at(binder);
+
+ res->append("ANeuralNetworksExecution *execution;");
+ res->append("ANeuralNetworksEvent *event;");
+ res->append();
+ res->append("ANeuralNetworksExecution_create(", compilation, ", &execution);");
+
+ // Emit ANeuralNetworksExecution_setInput call(s)
+ for (uint32_t n = 0; n < binder->module()->input()->size(); ++n)
+ {
+ auto bag = binder->input(n);
+ auto base = _mem.base(bag);
+ auto size = _mem.size(bag);
+
+ res->append("ANeuralNetworksExecution_setInput(execution, ", n, ", nullptr, ", base, ", ", size,
+ ");");
+ }
+
+ // Emit ANeuralNetworksExecution_setOutput call(s)
+ for (uint32_t n = 0; n < binder->module()->output()->size(); ++n)
+ {
+ auto bag = binder->output(n);
+ auto base = _mem.base(bag);
+ auto size = _mem.size(bag);
+
+ res->append("ANeuralNetworksExecution_setOutput(execution, ", n, ", nullptr, ", base, ", ",
+ size, ");");
+ }
+
+ res->append("ANeuralNetworksExecution_startCompute(execution, &event);");
+ res->append("ANeuralNetworksEvent_wait(event);");
+ res->append("ANeuralNetworksEvent_free(event);");
+
+ res->append("ANeuralNetworksExecution_free(execution);");
+
+ return std::move(res);
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/CppGen/Subnet.h b/compiler/enco/core/src/CppGen/Subnet.h
new file mode 100644
index 000000000..4a5738876
--- /dev/null
+++ b/compiler/enco/core/src/CppGen/Subnet.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_CPP_GEN_SUBNET_H__
+#define __ENCO_CPP_GEN_SUBNET_H__
+
+#include "ANN/Binder.h"
+#include "CppGen/MemoryContext.h"
+
+#include <pp/MultiLineText.h>
+#include <map>
+#include <set>
+
+namespace enco
+{
+
+/**
+ * @brief A C++ struct that provides Android NN model & compilation
+ */
+struct SubnetStruct
+{
+ virtual ~SubnetStruct() = default;
+
+ /// @brief Return the field name of ANeuralNetworksModel value
+ virtual std::string model(void) const = 0;
+ /// @brief Return the field name of ANeuralNetworksCompilatoin value
+ virtual std::string compilation(void) const = 0;
+
+ virtual const pp::MultiLineText &def(void) const = 0;
+ virtual const pp::MultiLineText &ctor(void) const = 0;
+ virtual const pp::MultiLineText &dtor(void) const = 0;
+};
+
+class SubnetStructBuilder
+{
+public:
+ std::unique_ptr<SubnetStruct> build(const ANNBinder *binder) const;
+
+public:
+ void expr(const ann::Operand *oper, const std::string &base, const std::string &size)
+ {
+ _weighted.insert(oper);
+ _base_exprs[oper] = base;
+ _size_exprs[oper] = size;
+ }
+
+private:
+ std::set<const ann::Operand *> _weighted;
+ std::map<const ann::Operand *, std::string> _base_exprs;
+ std::map<const ann::Operand *, std::string> _size_exprs;
+};
+
+/**
+ * @brief Generate C++ code that invokes Android NN subnet
+ */
+class SubnetBlockCompiler
+{
+public:
+ SubnetBlockCompiler(const enco::MemoryContext &mem) : _mem(mem)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /// @brief Specify how to access ANeuralNetworksCompilation value (C expression)
+ void bind(const ANNBinder *binder, const std::string &exp) { _compilation_ctx[binder] = exp; }
+
+public:
+ std::unique_ptr<pp::MultiLineText> compile(const ANNBinder *binder) const;
+
+private:
+ const enco::MemoryContext &_mem;
+ std::map<const ANNBinder *, std::string> _compilation_ctx;
+};
+
+} // namespace enco
+
+#endif // __ENCO_CPP_GEN_SUBNET_H__
diff --git a/compiler/enco/core/src/Dims.h b/compiler/enco/core/src/Dims.h
new file mode 100644
index 000000000..e0a4fd44d
--- /dev/null
+++ b/compiler/enco/core/src/Dims.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DIMS_H__
+#define __DIMS_H__
+
+#include <nncc/core/ADT/tensor/Shape.h>
+
+static inline std::vector<uint32_t> as_dims(const nncc::core::ADT::tensor::Shape &shape)
+{
+ std::vector<uint32_t> res;
+
+ for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+ {
+ res.emplace_back(shape.dim(axis));
+ }
+
+ return res;
+}
+
+#endif // __DIMS_H__
diff --git a/compiler/enco/core/src/IRUtils.cpp b/compiler/enco/core/src/IRUtils.cpp
new file mode 100644
index 000000000..59f6b0dbe
--- /dev/null
+++ b/compiler/enco/core/src/IRUtils.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IRUtils.h"
+
+#include <cassert>
+
+namespace enco
+{
+
+/**
+ * @brief Substitute all the USE occurrences of an object with another object
+ * @param from Object to be replaced
+ * @param into Object to be used instead
+ * NOTE This maybe used when something like -- 'from' will be removed so we need
+ * to replace object Consumers that use 'from' to 'into'
+ * EXAMPLE
+ * {
+ * subst(child, bigone);
+ * m->entity()->object()->destroy(child);
+ * }
+ * This code will change all the Consumers that use 'child' to 'bigone' and
+ * destroy the 'child' object.
+ */
+void subst(coco::Object *from, coco::Object *into)
+{
+ assert(from != into);
+
+ while (!from->uses()->empty())
+ {
+ auto use = *(from->uses()->begin());
+
+ use->value(into);
+ }
+}
+
+std::vector<coco::Instr *> instr_sequence(coco::Module *m)
+{
+ std::vector<coco::Instr *> res;
+
+ for (auto B = m->block()->head(); B; B = B->next())
+ {
+ for (auto I = B->instr()->head(); I; I = I->next())
+ {
+ res.emplace_back(I);
+ }
+ }
+
+ return res;
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/IRUtils.h b/compiler/enco/core/src/IRUtils.h
new file mode 100644
index 000000000..da0754303
--- /dev/null
+++ b/compiler/enco/core/src/IRUtils.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_IR_UTILS_H__
+#define __ENCO_IR_UTILS_H__
+
+#include <coco/IR.h>
+
+#include <vector>
+
+namespace enco
+{
+
+/**
+ * @brief Replace all the "USE" of 'from' with 'into'
+ *
+ * NOTE subst(from, into) WILL NOT update 'DEF'
+ */
+void subst(coco::Object *from, coco::Object *into);
+
+/**
+ * @brief Return instructions in execution order
+ */
+std::vector<coco::Instr *> instr_sequence(coco::Module *m);
+
+} // namespace enco
+
+#endif // __ENCO_IR_UTILS_H__
diff --git a/compiler/enco/core/src/IRValidator.cpp b/compiler/enco/core/src/IRValidator.cpp
new file mode 100644
index 000000000..1337b88e4
--- /dev/null
+++ b/compiler/enco/core/src/IRValidator.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IRValidator.h"
+
+#include <cassert>
+
+namespace enco
+{
+
+coco::FeatureShape output_shape(coco::Conv2D *conv2D)
+{
+ auto load = conv2D->arg()->asLoad();
+ assert(load);
+
+ auto ifm = load->object()->asFeature();
+ assert(ifm);
+
+ auto ker = conv2D->ker();
+ auto stride = conv2D->stride();
+ auto pad = conv2D->pad();
+
+ auto striding_width = ifm->shape().width() + pad->left() + pad->right() - ker->shape().width();
+ auto striding_height = ifm->shape().height() + pad->top() + pad->bottom() - ker->shape().height();
+
+ // Normally the formula is round(striding_width)/stride->horizontal.
+ // in coco IR, striding_width should be a multiple of stride->horizontal(), so round(...) was
+ // removed. So does striding_height.
+ assert(striding_width % stride->horizontal() == 0);
+ assert(striding_height % stride->vertical() == 0);
+
+ auto ofm_width = striding_width / stride->horizontal() + 1;
+ auto ofm_height = striding_height / stride->vertical() + 1;
+
+ return coco::FeatureShape(ifm->shape().batch(), ker->shape().count(), ofm_height, ofm_width);
+}
+
+bool validate_output_shape(Code *code)
+{
+ auto module = code->module();
+
+ // for each eval ( conv2d ( ... ) ), check the output shape of conv2D matches output of eval
+ for (auto blk = module->block()->head(); blk; blk = blk->next())
+ {
+ for (auto instr = blk->instr()->head(); instr; instr = instr->next())
+ {
+ auto eval = instr->asEval();
+ if (eval == nullptr)
+ continue;
+
+ auto op = eval->op();
+ if (!op->asConv2D())
+ continue;
+
+ auto conv2D = op->asConv2D();
+ auto expected_shape = output_shape(conv2D);
+
+ auto eval_out = eval->out()->asFeature();
+ assert(eval_out);
+
+ auto actual_shape = eval_out->shape();
+
+ if (actual_shape != expected_shape)
+ return false;
+ }
+ }
+ return true;
+}
+
+bool validate(Code *code) { return validate_output_shape(code); }
+
+} // namespace enco
diff --git a/compiler/enco/core/src/IRValidator.h b/compiler/enco/core/src/IRValidator.h
new file mode 100644
index 000000000..f4adb0a5e
--- /dev/null
+++ b/compiler/enco/core/src/IRValidator.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_IR_VALIDATOR_H__
+#define __ENCO_IR_VALIDATOR_H__
+
+#include "Code.h"
+
+namespace enco
+{
+
+bool validate(Code *code);
+
+} // namespace enco
+
+#endif // __ENCO_IR_VALIDATOR_H__
diff --git a/compiler/enco/core/src/IRValidator.test.cpp b/compiler/enco/core/src/IRValidator.test.cpp
new file mode 100644
index 000000000..14cda6173
--- /dev/null
+++ b/compiler/enco/core/src/IRValidator.test.cpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IRValidator.h"
+
+#include "Code.h"
+
+#include <gtest/gtest.h>
+
+#include <array>
+
+namespace
+{
+
+using IntList4 = std::array<int, 4>;
+using IntList2 = std::array<int, 2>;
+
+} // namespace
+
+// The layout of ifm, ker, ofm is NHWC, pad == {top, bottom, left, right}, and stride == {vertical,
+// horizontal}.
+std::unique_ptr<coco::Module> get_conv2D(IntList4 ifm, IntList4 ker, IntList4 ofm, IntList4 pad,
+ IntList2 stride)
+{
+ auto module = coco::Module::create();
+ auto block = module->entity()->block()->create();
+ auto eval = module->entity()->instr()->create<coco::Eval>();
+ auto load = module->entity()->op()->create<coco::Load>();
+ auto conv2D = module->entity()->op()->create<coco::Conv2D>();
+
+ auto ifm_obj = module->entity()->object()->create<coco::FeatureObject>();
+ coco::FeatureShape ifm_shape(ifm[0], ifm[3], ifm[1], ifm[2]);
+ ifm_obj->layout(coco::FeatureLayouts::BHWC::create(ifm_shape));
+
+ auto ofm_obj = module->entity()->object()->create<coco::FeatureObject>();
+ coco::FeatureShape ofm_shape(ofm[0], ofm[3], ofm[1], ofm[2]);
+ ofm_obj->layout(coco::FeatureLayouts::BHWC::create(ofm_shape));
+
+ auto ker_obj = module->entity()->object()->create<coco::KernelObject>();
+ nncc::core::ADT::kernel::Shape ker_shape(ker[0], ker[3], ker[1], ker[2]);
+ ker_obj->layout(coco::KernelLayouts::NHWC::create(ker_shape));
+
+ // linking entities
+ module->block()->append(block);
+ block->instr()->append(eval);
+ eval->op(conv2D);
+ eval->out(ofm_obj);
+ load->object(ifm_obj);
+ conv2D->ker(ker_obj);
+ conv2D->arg(load);
+
+ // param setting
+ conv2D->pad()->top(pad[0]).bottom(pad[1]).left(pad[2]).right(pad[3]);
+ conv2D->stride()->vertical(stride[0]).horizontal(stride[1]);
+
+ return std::move(module);
+}
+
+TEST(IRValidatorTest, conv2D_simple)
+{
+ auto ifm_nhwc = IntList4{1, 3, 3, 2};
+ auto ker_nhwc = IntList4{1, 1, 1, 2};
+ auto ofm_nhwc = IntList4{1, 3, 3, 1};
+
+ auto pad_tblr = IntList4{0, 0, 0, 0};
+ auto stride_vh = IntList2{1, 1};
+
+ auto module = get_conv2D(ifm_nhwc, ker_nhwc, ofm_nhwc, pad_tblr, stride_vh);
+ enco::Code code{module.get(), nullptr};
+
+ ASSERT_TRUE(enco::validate(&code));
+}
+
+TEST(IRValidatorTest, conv2D_stride_2)
+{
+ auto ifm_nhwc = IntList4{1, 4, 4, 3};
+ auto ker_nhwc = IntList4{2, 2, 2, 3};
+ auto ofm_nhwc = IntList4{1, 3, 3, 2};
+
+ auto pad_tblr = IntList4{1, 1, 1, 1};
+ auto stride_vh = IntList2{2, 2};
+
+ auto module = get_conv2D(ifm_nhwc, ker_nhwc, ofm_nhwc, pad_tblr, stride_vh);
+ enco::Code code{module.get(), nullptr};
+
+ ASSERT_TRUE(enco::validate(&code));
+}
+
+TEST(IRValidatorTest, conv2D_output_batch_check)
+{
+ auto ifm_nhwc = IntList4{1, 2, 2, 2};
+ auto ker_nhwc = IntList4{3, 1, 1, 2}; // expected output depth is 3
+ auto ofm_nhwc = IntList4{1, 2, 2, 1}; // but 1
+
+ auto pad_tblr = IntList4{0, 0, 0, 0};
+ auto stride_vh = IntList2{1, 1};
+
+ auto module = get_conv2D(ifm_nhwc, ker_nhwc, ofm_nhwc, pad_tblr, stride_vh);
+ enco::Code code{module.get(), nullptr};
+
+ ASSERT_FALSE(enco::validate(&code));
+}
+
+TEST(IRValidatorTest, conv2D_wrong_HW)
+{
+ auto ifm_nhwc = IntList4{1, 2, 2, 1};
+ auto ker_nhwc = IntList4{1, 2, 2, 1};
+ auto ofm_nhwc = IntList4{1, 1, 1, 1}; // HW should be 2, 2
+
+ auto pad_tblr = IntList4{1, 1, 1, 1};
+ auto stride_vh = IntList2{2, 2};
+
+ auto module = get_conv2D(ifm_nhwc, ker_nhwc, ofm_nhwc, pad_tblr, stride_vh);
+ enco::Code code{module.get(), nullptr};
+
+ ASSERT_FALSE(enco::validate(&code));
+}
diff --git a/compiler/enco/core/src/Pass.h b/compiler/enco/core/src/Pass.h
new file mode 100644
index 000000000..d78cfaad3
--- /dev/null
+++ b/compiler/enco/core/src/Pass.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_PASS_H__
+#define __ENCO_PASS_H__
+
+#include "Session.h"
+
+#include <string>
+
+namespace enco
+{
+
+class Pass
+{
+public:
+ class Name
+ {
+ public:
+ Name(const std::string &content) : _content{content}
+ {
+ // DO NOTHING
+ }
+
+ Name(const Name &) = default;
+ Name(Name &&) = default;
+
+ ~Name() = default;
+
+ public:
+ const std::string &content(void) const { return _content; }
+
+ private:
+ std::string _content;
+ };
+
+public:
+ Pass(const Name &name) : _name{name}
+ {
+ // DO NOTHING
+ }
+
+ Pass(const Pass &) = delete;
+ Pass(Pass &&) = delete;
+
+ virtual ~Pass() = default;
+
+public:
+ const Name &name(void) const { return _name; }
+
+public:
+ virtual void run(const SessionID &) const = 0;
+
+private:
+ Name _name;
+};
+
+static inline Pass::Name pass_name(const std::string &name) { return Pass::Name{name}; }
+
+} // namespace enco
+
+#define PASS_CTOR(NAME) \
+ NAME() : enco::Pass { enco::pass_name(#NAME) }
+
+#endif // __ENCO_PASS_H__
diff --git a/compiler/enco/core/src/Pass.test.cpp b/compiler/enco/core/src/Pass.test.cpp
new file mode 100644
index 000000000..112bd7478
--- /dev/null
+++ b/compiler/enco/core/src/Pass.test.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Pass.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+struct ExamplePass final : public enco::Pass
+{
+ PASS_CTOR(ExamplePass)
+ {
+ // DO NOTHING
+ }
+
+ void run(const enco::SessionID &) const override { return; }
+};
+
+} // namespace
+
+TEST(PASS, ctor)
+{
+ ExamplePass pass;
+
+ ASSERT_EQ(pass.name().content(), "ExamplePass");
+}
diff --git a/compiler/enco/core/src/Pipeline.h b/compiler/enco/core/src/Pipeline.h
new file mode 100644
index 000000000..8ab43c16a
--- /dev/null
+++ b/compiler/enco/core/src/Pipeline.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_PIPELINE_H__
+#define __ENCO_PIPELINE_H__
+
+#include "Pass.h"
+
+#include <memory>
+#include <vector>
+#include <cstdint>
+
+namespace enco
+{
+
+class Pipeline
+{
+public:
+ uint32_t size(void) const { return _passes.size(); }
+
+public:
+ const Pass &at(uint32_t n) const { return *(_passes.at(n)); }
+
+public:
+ void append(std::unique_ptr<Pass> &&pass) { _passes.emplace_back(std::move(pass)); }
+
+private:
+ std::vector<std::unique_ptr<Pass>> _passes;
+};
+
+} // namespace enco
+
+#endif // __ENCO_PIPELINE_H__
diff --git a/compiler/enco/core/src/Pipeline.test.cpp b/compiler/enco/core/src/Pipeline.test.cpp
new file mode 100644
index 000000000..1cd730e98
--- /dev/null
+++ b/compiler/enco/core/src/Pipeline.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Pipeline.h"
+
+#include <gtest/gtest.h>
+
+TEST(PIPELINE, default_ctor)
+{
+ enco::Pipeline pipeline;
+
+ ASSERT_EQ(pipeline.size(), 0);
+}
diff --git a/compiler/enco/core/src/Session.cpp b/compiler/enco/core/src/Session.cpp
new file mode 100644
index 000000000..034f23892
--- /dev/null
+++ b/compiler/enco/core/src/Session.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Session.h"
+
+#include <stdex/Memory.h>
+
+#include <map>
+#include <memory>
+
+using stdex::make_unique;
+
+namespace
+{
+
+std::map<enco::SessionID, std::unique_ptr<enco::Code>> sess_to_code;
+std::map<const coco::Module *, enco::SessionID> module_to_sess;
+std::map<const coco::Data *, enco::SessionID> data_to_sess;
+
+} // namespace
+
+namespace enco
+{
+
+SessionID make_session(coco::Module *m, coco::Data *d)
+{
+ static uint32_t sess = 0;
+ SessionID curr{sess++};
+
+ sess_to_code[curr] = make_unique<Code>(m, d);
+ module_to_sess[m] = curr;
+ data_to_sess[d] = curr;
+
+ return curr;
+}
+
+SessionID session(const coco::Module *m) { return module_to_sess.at(m); }
+SessionID session(const coco::Data *d) { return data_to_sess.at(d); }
+
+coco::Module *module(const SessionID &sess) { return sess_to_code.at(sess)->module(); }
+coco::Data *data(const SessionID &sess) { return sess_to_code.at(sess)->data(); }
+
+Code *code(const SessionID &sess) { return sess_to_code.at(sess).get(); }
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Session.h b/compiler/enco/core/src/Session.h
new file mode 100644
index 000000000..b6d502f3b
--- /dev/null
+++ b/compiler/enco/core/src/Session.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_SESSION_H__
+#define __ENCO_SESSION_H__
+
+#include "Code.h"
+
+namespace enco
+{
+
+// TODO Rewrite this definition
+using SessionID = uint32_t;
+
+SessionID make_session(coco::Module *m, coco::Data *d);
+
+SessionID session(const coco::Module *m);
+SessionID session(const coco::Data *d);
+
+coco::Module *module(const SessionID &);
+coco::Data *data(const SessionID &);
+
+static inline coco::Module *module(const coco::Data *d) { return module(session(d)); }
+static inline coco::Data *data(const coco::Module *m) { return data(session(m)); }
+
+// WARN This API is introduced just for backward compatibility
+// Do NOT use this anymore as it will be removed
+Code *code(const SessionID &);
+
+} // namespace enco
+
+#endif // __ENCO_SESSION_H__
diff --git a/compiler/enco/core/src/String.h b/compiler/enco/core/src/String.h
new file mode 100644
index 000000000..0f04f1ffe
--- /dev/null
+++ b/compiler/enco/core/src/String.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_STRING_H__
+#define __ENCO_STRING_H__
+
+//
+// String-manipulating routines
+//
+#include <ostream>
+#include <sstream>
+
+#include <string>
+
+namespace enco
+{
+
+template <typename It> void concat(std::ostream &os, const std::string &sep, It beg, It end)
+{
+ uint32_t count = 0;
+
+ for (auto it = beg; it != end; ++it, ++count)
+ {
+ if (count == 0)
+ {
+ os << *it;
+ }
+ else
+ {
+ os << sep << *it;
+ }
+ }
+}
+
+template <typename It> std::string concat(const std::string &sep, It beg, It end)
+{
+ std::stringstream ss;
+ concat(ss, sep, beg, end);
+ return ss.str();
+}
+
+} // namespace enco
+
+#endif // __ENCO_STRING_H__
diff --git a/compiler/enco/core/src/Support/Debugging.cpp b/compiler/enco/core/src/Support/Debugging.cpp
new file mode 100644
index 000000000..bd65a27d8
--- /dev/null
+++ b/compiler/enco/core/src/Support/Debugging.cpp
@@ -0,0 +1,533 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Debugging.h"
+
+#include <pp/LinearDocument.h>
+#include <pp/MultiLineTextUtils.h>
+
+#include <stack>
+
+#include <iostream>
+
+#define DEBUGGING_API_P(NAME, TYPE, VAR) \
+ static void _##NAME(const TYPE *); \
+ void NAME(long p) { NAME(reinterpret_cast<const TYPE *>(p)); } \
+ void NAME(const TYPE *p) \
+ { \
+ if (p == nullptr) \
+ { \
+ std::cout << "(nullptr)" << std::endl; \
+ } \
+ else \
+ { \
+ _##NAME(p); \
+ } \
+ } \
+ void _##NAME(const TYPE *VAR)
+
+namespace
+{
+
+class SectionBuilder
+{
+public:
+ SectionBuilder(const std::string &tag) : _tag{tag}
+ {
+ // DO NOTHING
+ }
+
+public:
+ template <typename Callback> pp::LinearDocument build(Callback cb) const
+ {
+ pp::LinearDocument res;
+
+ res.append(_tag, " {");
+ res.indent();
+
+ cb(res);
+
+ res.unindent();
+ res.append("}");
+
+ return res;
+ }
+
+private:
+ std::string _tag;
+};
+
+template <typename Callback>
+pp::LinearDocument operator<<(const SectionBuilder &builder, Callback cb)
+{
+ return builder.build(std::forward<Callback>(cb));
+}
+
+SectionBuilder section(const std::string &tag) { return SectionBuilder{tag}; }
+}
+
+/**
+ * SECTION: Bag
+ */
+namespace
+{
+
+pp::LinearDocument describe(const coco::Bag *bag)
+{
+ pp::LinearDocument doc;
+
+ doc.append("addr: ", bag);
+ doc.append("size: ", bag->size());
+ // TODO Print Read
+ // TODO Print Update
+ // TODO Print Dep
+ return doc;
+}
+
+} // namespace
+
+DEBUGGING_API_P(enco_dump_all_bags, coco::Module, m)
+{
+ for (uint32_t n = 0; n < m->entity()->bag()->size(); ++n)
+ {
+ auto bag = m->entity()->bag()->at(n);
+ assert(bag != nullptr);
+
+ auto set = [bag](pp::LinearDocument &doc) { doc.append(describe(bag)); };
+ auto desc = section("bag").build(set);
+
+ std::cout << desc << std::endl;
+ }
+}
+
+/**
+ * SECTION: Object
+ */
+namespace
+{
+std::string op_kind(const coco::Op *op);
+
+/**
+ * @brief Return the def(producer) type of object
+ */
+std::string def_kind(const coco::Def *def)
+{
+ if (def)
+ {
+ if (auto instr = dynamic_cast<coco::Instr *>(def->producer()))
+ {
+ std::stringstream ss;
+
+ if (auto eval = instr->asEval())
+ {
+ ss << op_kind(eval->op()) << "(" << instr << ")";
+ return ss.str();
+ }
+ else if (instr->asCopy())
+ {
+ ss << "Copy(" << instr << ")";
+ return ss.str();
+ }
+ else if (instr->asShuffle())
+ {
+ ss << "Shuffle(" << instr << ")";
+ return ss.str();
+ }
+ }
+ else
+ {
+ return "(unknown)";
+ }
+ }
+
+ return "(none)";
+}
+
+pp::LinearDocument describe(const coco::Object *obj)
+{
+ pp::LinearDocument doc;
+
+ doc.append("addr: ", obj);
+ doc.append("bag: ", obj->bag());
+ doc.append("producer: ", def_kind(obj->def()));
+ // TODO Show Uses
+ // TODO Show FeatureObject/KernelObect info
+
+ return doc;
+}
+
+} // namespace
+
+DEBUGGING_API_P(enco_dump_all_objects, coco::Module, m)
+{
+ for (uint32_t n = 0; n < m->entity()->object()->size(); ++n)
+ {
+ auto obj = m->entity()->object()->at(n);
+ assert(obj != nullptr);
+
+ auto set = [obj](pp::LinearDocument &doc) { doc.append(describe(obj)); };
+ auto desc = section("object").build(set);
+
+ std::cout << desc << std::endl;
+ }
+}
+
+/**
+ * SECTION: Op
+ */
+namespace
+{
+
+struct OpTree
+{
+public:
+ OpTree(const coco::Op *op) : _op{op}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const coco::Op *root(void) const { return _op; }
+
+private:
+ const coco::Op *_op;
+};
+
+std::string op_kind(const coco::Op *op)
+{
+ struct OpKind : public coco::Op::Visitor<std::string>
+ {
+ std::string visit(const coco::Load *) override { return "Load"; }
+ std::string visit(const coco::Conv2D *) override { return "Conv2D"; }
+ std::string visit(const coco::MaxPool2D *) override { return "MaxPool2D"; }
+ std::string visit(const coco::AvgPool2D *) override { return "AvgPool2D"; }
+ std::string visit(const coco::PadF *) override { return "PadF"; }
+ std::string visit(const coco::ReLU *) override { return "ReLU"; }
+ std::string visit(const coco::Add *) override { return "Add"; }
+ std::string visit(const coco::Mul *) override { return "Mul"; }
+ std::string visit(const coco::ConcatF *) override { return "ConcatF"; }
+ std::string visit(const coco::Sub *) override { return "Sub"; }
+ std::string visit(const coco::Sqrt *) override { return "Sqrt"; }
+ std::string visit(const coco::Div *) override { return "Div"; }
+ };
+
+ OpKind v;
+
+ return op->accept(v);
+}
+
+pp::LinearDocument describe(const coco::Padding2D *pad)
+{
+ pp::LinearDocument doc;
+
+ doc.append("top: ", pad->top());
+ doc.append("bottom: ", pad->bottom());
+ doc.append("left: ", pad->left());
+ doc.append("right: ", pad->right());
+
+ return doc;
+}
+
+pp::LinearDocument describe(const coco::Stride2D *stride)
+{
+ pp::LinearDocument doc;
+
+ doc.append("vertical: ", stride->vertical());
+ doc.append("horizontal ", stride->horizontal());
+
+ return doc;
+}
+
+pp::LinearDocument describe(const coco::Conv2D *conv)
+{
+ pp::LinearDocument doc;
+
+ doc.append("arg: ", conv->arg());
+ doc.append("ker: ", conv->ker());
+ doc.append("group: ", conv->group());
+
+ if (auto pad = conv->pad())
+ {
+ auto set = [pad](pp::LinearDocument &doc) { doc.append(describe(pad)); };
+ auto desc = section("pad").build(set);
+ doc.append(desc);
+ }
+
+ if (auto stride = conv->stride())
+ {
+ auto set = [stride](pp::LinearDocument &doc) { doc.append(describe(stride)); };
+ auto desc = section("stride").build(set);
+ doc.append(desc);
+ }
+
+ return doc;
+}
+
+pp::LinearDocument describe(const coco::Op *op)
+{
+ pp::LinearDocument doc;
+
+ doc.append("addr: ", op);
+ doc.append("kind: ", op_kind(op));
+ doc.append("parent(instr): ", op->parent());
+ doc.append("up(op): ", op->up());
+
+ if (auto conv = op->asConv2D())
+ {
+ auto set = [conv](pp::LinearDocument &doc) { doc.append(describe(conv)); };
+ auto desc = section("conv2d").build(set);
+ doc.append(desc);
+ }
+ else if (auto load = op->asLoad())
+ {
+ auto set = [load](pp::LinearDocument &doc) { doc.append(describe(load->object())); };
+ auto desc = section("load").build(set);
+ doc.append(desc);
+ }
+
+ return doc;
+}
+
+pp::LinearDocument describe(const OpTree &t, bool verbose = false)
+{
+ pp::LinearDocument doc;
+
+ struct Frame
+ {
+ public:
+ Frame(const coco::Op *op) : _op{op}, _indicator{0}
+ {
+ // op SHOULD BE valid
+ assert(_op != nullptr);
+ }
+
+ public:
+ /**
+ * @brief Return a pointer to coco::Op of interest
+ */
+ const coco::Op *op(void) const { return _op; }
+
+ /**
+ * @brief Return the indicator
+ *
+ * Let's assume that the arity of a coco::Op of interest is N
+ * INDICATOR 0 -> Print the op itself
+ * INDICATOR 1 -> Print the first argument
+ * ...
+ * INDICATOR N -> Print the N-th argument
+ * INDICATOR N + 1 -> Done
+ */
+ uint32_t indicator(void) const { return _indicator; }
+
+ public:
+ void advance(void) { _indicator += 1; }
+
+ private:
+ const coco::Op *_op;
+ uint32_t _indicator;
+ };
+
+ std::stack<Frame> stack;
+
+ stack.emplace(t.root());
+
+ while (stack.size() > 0)
+ {
+ auto op = stack.top().op();
+ uint32_t indicator = stack.top().indicator();
+
+ if (indicator == 0)
+ {
+ doc.append(op_kind(op), " (", op, ")");
+
+ doc.indent();
+ stack.top().advance();
+
+ // TODO Need to update it to better design for verbose flag
+ if (verbose)
+ {
+ auto set = [op](pp::LinearDocument &doc) { doc.append(describe(op)); };
+ auto desc = section("op").build(set);
+ doc.append(desc);
+ }
+ }
+ else if (indicator < op->arity() + 1)
+ {
+ stack.top().advance();
+ stack.emplace(op->arg(indicator - 1));
+ }
+ else
+ {
+ assert(indicator == op->arity() + 1);
+ doc.unindent();
+ stack.pop();
+ }
+ }
+
+ return doc;
+}
+
+} // namespace
+
+DEBUGGING_API_P(enco_dump_op, coco::Op, op)
+{
+ {
+ std::cout << describe(op) << std::endl;
+ }
+}
+
+DEBUGGING_API_P(enco_dump_op_tree, coco::Op, op)
+{
+ {
+ std::cout << describe(OpTree(op)) << std::endl;
+ }
+}
+
+DEBUGGING_API_P(enco_dump_all_ops, coco::Module, m)
+{
+ SectionBuilder section_builder{"op"};
+
+ for (uint32_t n = 0; n < m->entity()->op()->size(); ++n)
+ {
+ auto op = m->entity()->op()->at(n);
+ assert(op != nullptr);
+
+ auto desc = section("op").build([op](pp::LinearDocument &doc) { doc.append(describe(op)); });
+
+ std::cout << desc << std::endl;
+ }
+}
+
+/**
+ * SECTION: Instr
+ */
+namespace
+{
+
+std::string kind(const coco::Instr *ins)
+{
+ struct InstrKind : public coco::Instr::Visitor<std::string>
+ {
+ std::string visit(const coco::Eval *) override { return "Eval"; }
+ std::string visit(const coco::Copy *) override { return "Copy"; }
+ std::string visit(const coco::Shuffle *) override { return "Shuffle"; }
+ };
+
+ InstrKind v;
+
+ return ins->accept(v);
+}
+
+pp::LinearDocument describe(const coco::Instr *ins, bool verbose = false)
+{
+ pp::LinearDocument doc;
+
+ doc.append("addr: ", ins);
+ doc.append("kind: ", kind(ins));
+ doc.append("parent: ", ins->parent());
+
+ // TODO Need to update it to better design for verbose flag
+ if (verbose)
+ {
+ if (auto eval = ins->asEval())
+ {
+ auto optset = [eval, verbose](pp::LinearDocument &doc) {
+ doc.append(describe(OpTree(eval->op()), verbose));
+ };
+ auto optdesc = section("op").build(optset);
+ doc.append(optdesc);
+
+ auto outset = [eval](pp::LinearDocument &doc) { doc.append(describe(eval->out())); };
+ auto outdesc = section("out").build(outset);
+ doc.append(outdesc);
+ }
+ else if (auto copy = ins->asCopy())
+ {
+ auto from = [copy](pp::LinearDocument &doc) { doc.append(describe(copy->from())); };
+ auto into = [copy](pp::LinearDocument &doc) { doc.append(describe(copy->into())); };
+
+ auto fdesc = section("from").build(from);
+ doc.append(fdesc);
+
+ auto idesc = section("into").build(into);
+ doc.append(idesc);
+ }
+ }
+
+ return doc;
+}
+
+} // namespace
+
+DEBUGGING_API_P(enco_dump_all_instrs, coco::Module, m)
+{
+ for (uint32_t n = 0; n < m->entity()->instr()->size(); ++n)
+ {
+ auto ins = m->entity()->instr()->at(n);
+ assert(ins != nullptr);
+
+ auto setter = [ins](pp::LinearDocument &doc) { doc.append(describe(ins)); };
+ auto desc = section("instr").build(setter);
+
+ std::cout << desc << std::endl;
+ }
+}
+
+DEBUGGING_API_P(enco_dump_all_instrs_v, coco::Module, m)
+{
+ for (uint32_t n = 0; n < m->entity()->instr()->size(); ++n)
+ {
+ auto ins = m->entity()->instr()->at(n);
+ assert(ins != nullptr);
+
+ auto setter = [ins](pp::LinearDocument &doc) { doc.append(describe(ins, true)); };
+ auto desc = section("instr").build(setter);
+
+ std::cout << desc << std::endl;
+ }
+}
+
+DEBUGGING_API_P(enco_dump_instr, coco::Instr, ins)
+{
+ auto setter = [ins](pp::LinearDocument &doc) { doc.append(describe(ins, true)); };
+ auto desc = section("instr").build(setter);
+
+ std::cout << desc << std::endl;
+}
+
+/**
+ * SECTION: Block
+ */
+namespace
+{
+
+pp::LinearDocument describe(const coco::Block *blk)
+{
+ pp::LinearDocument doc;
+
+ for (auto ins = blk->instr()->head(); ins; ins = ins->next())
+ {
+ auto setter = [ins](pp::LinearDocument &doc) { doc.append(describe(ins)); };
+ auto desc = section("instr").build(setter);
+ doc.append(desc);
+ }
+
+ return doc;
+}
+
+} // namespace
+
+DEBUGGING_API_P(enco_dump_block, coco::Block, blk) { std::cout << describe(blk) << std::endl; }
diff --git a/compiler/enco/core/src/Support/Debugging.h b/compiler/enco/core/src/Support/Debugging.h
new file mode 100644
index 000000000..c28356e76
--- /dev/null
+++ b/compiler/enco/core/src/Support/Debugging.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Debugging.h
+ * @brief This file includes various interactive debugging helpers
+ */
+
+#ifndef __ENCO_SUPPORT_DEBUGGING_H__
+#define __ENCO_SUPPORT_DEBUGGING_H__
+
+#include <coco/IR.h>
+
+static_assert(sizeof(long) == sizeof(void *), "sizeof(long) == sizeof(pointer)");
+
+/**
+ * Debugging API with a single pointer argument
+ */
+#define DEBUGGING_API_P(NAME, TYPE) \
+ void NAME(const TYPE *); \
+ void NAME(long);
+
+/**
+ * Print the details of all the allocated coco::Bag in coco::Module
+ *
+ * (gdb) call enco_dump_all_bags(bag->module())
+ * (gdb) call enco_dump_all_bags(0x...)
+ */
+DEBUGGING_API_P(enco_dump_all_bags, coco::Module);
+
+/**
+ * Print the details of all the allocated coco::Object in coco::Module
+ *
+ * (gdb) call enco_dump_all_objects(obj->module())
+ * (gdb) call enco_dump_all_objects(0x...)
+ */
+DEBUGGING_API_P(enco_dump_all_objects, coco::Module);
+
+/**
+ * Print the details of coco::Op
+ *
+ * (gdb) call enco_dump_op(op)
+ * (gdb) call enco_dump_op(0x....)
+ */
+DEBUGGING_API_P(enco_dump_op, coco::Op);
+
+/**
+ * Print the (simplified) tree layout of coco::Op
+ *
+ * (gdb) call enco_dump_op_tree(op)
+ * (gdb) call enco_dump_op_tree(0x....)
+ */
+DEBUGGING_API_P(enco_dump_op_tree, coco::Op);
+
+/**
+ * Print the details of all the allocated coco::Op in coco::Module
+ *
+ * (gdb) call enco_dump_all_ops(op->module())
+ * (gdb) call enco_dump_all_ops(0x....)
+ */
+DEBUGGING_API_P(enco_dump_all_ops, coco::Module);
+
+/**
+ * Print the details of all the allocated coco::Instr in coco::Module
+ *
+ * (gdb) call enco_dump_all_instrs(instr->module())
+ * (gdb) call enco_dump_all_instrs(0x...)
+ */
+DEBUGGING_API_P(enco_dump_all_instrs, coco::Module);
+
+/**
+ * Print the more details of all the allocated coco::Instr in coco::Module
+ *
+ * (gdb) call enco_dump_all_instrs_v(instr->module())
+ * (gdb) call enco_dump_all_instrs_v(0x...)
+ */
+DEBUGGING_API_P(enco_dump_all_instrs_v, coco::Module);
+
+/**
+ * Print the details of a given coco::Instr
+ *
+ * (gdb) call enco_dump_instr(instr)
+ * (gdb) call enco_dump_instr(0x...)
+ */
+DEBUGGING_API_P(enco_dump_instr, coco::Instr);
+
+/**
+ * Print the details of all the instruction in a given block
+ *
+ * (gdb) call enco_dump_block(b)
+ * (gdb) call enco_dump_block(0x...)
+ */
+DEBUGGING_API_P(enco_dump_block, coco::Block);
+
+#undef DEBUGGING_API_P
+
+#endif // __ENCO_SUPPORT_DEBUGGING_H__
diff --git a/compiler/enco/core/src/Support/Debugging.test.cpp b/compiler/enco/core/src/Support/Debugging.test.cpp
new file mode 100644
index 000000000..49a2ad162
--- /dev/null
+++ b/compiler/enco/core/src/Support/Debugging.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Debugging.h"
+
+#include <gtest/gtest.h>
+
+// This test aims to check whether debugging API is actually defined
+TEST(DebuggingTest, defined)
+{
+ enco_dump_op(nullptr);
+ enco_dump_all_ops(nullptr);
+}
diff --git a/compiler/enco/core/src/Transforms/AvgPoolLowering.cpp b/compiler/enco/core/src/Transforms/AvgPoolLowering.cpp
new file mode 100644
index 000000000..17502fb1f
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/AvgPoolLowering.cpp
@@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AvgPoolLowering.h"
+#include "IRUtils.h"
+
+#include <coco/IR/FeatureLayouts.h>
+
+#include <nncc/core/ADT/feature/Shape.h>
+#include <nncc/core/ADT/feature/HWCLayout.h>
+
+#include <set>
+#include <cassert>
+
+using namespace nncc::core::ADT;
+using nncc::core::ADT::feature::num_elements;
+
+namespace
+{
+
+bool empty(coco::Padding2D *pad)
+{
+ return (pad->top() == 0) && (pad->bottom() == 0) && (pad->left() == 0) && (pad->right() == 0);
+}
+
+/**
+ * @brief Return a set of AvgPool2D operations (in Eval instruction) that SHOULD be lowered
+ */
+std::set<coco::AvgPool2D *> candidates(coco::Module *m)
+{
+ std::set<coco::AvgPool2D *> res;
+
+ for (auto I : enco::instr_sequence(m))
+ {
+ if (auto eval = I->asEval())
+ {
+ if (auto avgpool = eval->op()->asAvgPool2D())
+ {
+ /* Originally it was preferred to use `auto load = avgpool->arg()->asLoad()' for
+ * consitent style with other if statements.
+ * Someone may think compiler will be happy because `load` in `if` statement can
+ * be considered as a use, however, it turend out that it is not the case.
+ */
+ if (avgpool->arg()->asLoad())
+ {
+ if (avgpool->divisor() == coco::AvgPool2D::Divisor::Static)
+ {
+ res.insert(avgpool);
+ }
+ }
+ }
+ }
+ }
+
+ return res;
+}
+
+} // namespace
+
+namespace
+{
+namespace ShapeTransform
+{
+
+class Pad
+{
+public:
+ Pad(const coco::Padding2D *pad) : _pad{pad}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /// @brief Return the expected OFM shape for a given IFM shape
+ feature::Shape forward(const feature::Shape &ifm_shape) const
+ {
+ const uint32_t OFM_C = ifm_shape.depth();
+ const uint32_t OFM_H = ifm_shape.height() + _pad->top() + _pad->bottom();
+ const uint32_t OFM_W = ifm_shape.width() + _pad->left() + _pad->right();
+
+ return feature::Shape{OFM_C, OFM_H, OFM_W};
+ }
+
+private:
+ const coco::Padding2D *_pad;
+};
+
+} // namespace ShapeTransform
+
+ShapeTransform::Pad shape_xform(const coco::Padding2D *pad) { return ShapeTransform::Pad{pad}; }
+
+} // namespace
+
+namespace
+{
+
+class PadInstrBuilder final
+{
+public:
+ PadInstrBuilder(const coco::Padding2D *pad) : _pad{pad}
+ {
+ // DO NOTHING
+ }
+
+public:
+ coco::Instr *build(coco::FeatureObject *ifm_obj, coco::FeatureObject *ofm_obj) const
+ {
+ assert(ifm_obj->module() == ofm_obj->module());
+ auto m = ifm_obj->module();
+ assert(m != nullptr);
+
+ auto load_op = m->entity()->op()->create<coco::Load>();
+
+ load_op->object(ifm_obj);
+
+ auto pad_op = m->entity()->op()->create<coco::PadF>();
+
+ pad_op->arg(load_op);
+
+ pad_op->pad()->top(_pad->top());
+ pad_op->pad()->bottom(_pad->bottom());
+ pad_op->pad()->left(_pad->left());
+ pad_op->pad()->right(_pad->right());
+
+ auto pad_instr = m->entity()->instr()->create<coco::Eval>();
+
+ pad_instr->out(ofm_obj);
+ pad_instr->op(pad_op);
+
+ return pad_instr;
+ }
+
+private:
+ const coco::Padding2D *_pad;
+};
+
+PadInstrBuilder pad_instr_builder(const coco::Padding2D *pad) { return PadInstrBuilder{pad}; }
+
+} // namespace
+
+namespace
+{
+
+class AvgPoolRewritePass
+{
+private:
+ void runOnModule(coco::Module *m) const;
+
+public:
+ void runOnCode(enco::Code *) const;
+};
+
+void AvgPoolRewritePass::runOnModule(coco::Module *m) const
+{
+ // Lower AvgPool2D op that resides in Eval instruction
+ for (auto avgpool : candidates(m))
+ {
+ auto ins = avgpool->parent();
+ auto load = avgpool->arg()->asLoad();
+
+ assert(ins != nullptr);
+ assert(load != nullptr);
+ assert(avgpool->divisor() == coco::AvgPool2D::Divisor::Static);
+
+ if (empty(avgpool->pad()))
+ {
+ // NOTE If there is no padding, Static and PaddingExcluded schemes are equivalent
+ avgpool->divisor(coco::AvgPool2D::Divisor::PaddingExcluded);
+ }
+ else
+ {
+ // Before: Static AvgPool2D with Padding
+ // After: PadF; PaddingExcluded AvgPool2D without Padding
+
+ // Create PadF
+ auto ifm_obj = load->object()->asFeature();
+ assert(ifm_obj != nullptr);
+
+ auto pad_shape = shape_xform(avgpool->pad()).forward(ifm_obj->shape());
+ auto pad_bag = m->entity()->bag()->create(num_elements(pad_shape));
+ auto pad_obj = m->entity()->object()->create<coco::FeatureObject>();
+
+ pad_obj->bag(pad_bag);
+ pad_obj->layout(coco::FeatureLayouts::BHWC::create(pad_shape));
+
+ auto pad_instr = pad_instr_builder(avgpool->pad()).build(ifm_obj, pad_obj);
+
+ // Insert PadF before AvgPool2D
+ pad_instr->insertBefore(ins);
+
+ // Rewrite AvgPool2D as PaddingExcluded AvgPool2D without Padding
+ load->object(pad_obj);
+
+ avgpool->divisor(coco::AvgPool2D::Divisor::PaddingExcluded);
+ avgpool->pad()->top(0);
+ avgpool->pad()->bottom(0);
+ avgpool->pad()->left(0);
+ avgpool->pad()->right(0);
+ }
+ }
+}
+
+void AvgPoolRewritePass::runOnCode(enco::Code *code) const { runOnModule(code->module()); }
+
+} // namespace
+
+namespace enco
+{
+
+void lower_avgpool(enco::Code *code)
+{
+ AvgPoolRewritePass pass;
+ pass.runOnCode(code);
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Transforms/AvgPoolLowering.h b/compiler/enco/core/src/Transforms/AvgPoolLowering.h
new file mode 100644
index 000000000..71a5253df
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/AvgPoolLowering.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+#include "Code.h"
+#include "Pass.h"
+
+namespace enco
+{
+
+/**
+ * @brief Rewrite NN API-incompatible average pooling
+ */
+void lower_avgpool(enco::Code *);
+
+struct AvgPoolLoweringPass final : public Pass
+{
+ PASS_CTOR(AvgPoolLoweringPass)
+ {
+ // DO NOTHING
+ }
+
+ void run(const SessionID &sess) const override { lower_avgpool(code(sess)); }
+};
+
+} // namespace enco
+
+#endif // __REWRITE_H__
diff --git a/compiler/enco/core/src/Transforms/ConcatLowering.cpp b/compiler/enco/core/src/Transforms/ConcatLowering.cpp
new file mode 100644
index 000000000..bf613c983
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/ConcatLowering.cpp
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CopyLowering.h"
+#include "IRUtils.h"
+
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <set>
+#include <cassert>
+
+using namespace nncc::core::ADT;
+
+namespace
+{
+
+inline uint32_t as_tensor_axis(const coco::ConcatF::Axis &axis)
+{
+ switch (axis)
+ {
+ case coco::ConcatF::Axis::Batch:
+ return 0;
+ case coco::ConcatF::Axis::Depth:
+ return 1;
+ case coco::ConcatF::Axis::Height:
+ return 2;
+ case coco::ConcatF::Axis::Width:
+ return 3;
+ default:
+ break;
+ };
+
+ throw std::invalid_argument{"axis is unknown value"};
+}
+
+tensor::Shape as_tensor_shape(const coco::FeatureLayout *l)
+{
+ assert(l != nullptr);
+
+ tensor::Shape res;
+
+ res.resize(4);
+
+ res.dim(as_tensor_axis(coco::ConcatF::Axis::Batch)) = l->batch();
+ res.dim(as_tensor_axis(coco::ConcatF::Axis::Depth)) = l->depth();
+ res.dim(as_tensor_axis(coco::ConcatF::Axis::Height)) = l->height();
+ res.dim(as_tensor_axis(coco::ConcatF::Axis::Width)) = l->width();
+
+ return res;
+}
+
+coco::ElemID as_element_index(const coco::FeatureLayout *l, const tensor::Index &idx)
+{
+ assert(l != nullptr);
+ assert(idx.rank() == 4);
+
+ const auto b = idx.at(as_tensor_axis(coco::ConcatF::Axis::Batch));
+ const auto ch = idx.at(as_tensor_axis(coco::ConcatF::Axis::Depth));
+ const auto row = idx.at(as_tensor_axis(coco::ConcatF::Axis::Height));
+ const auto col = idx.at(as_tensor_axis(coco::ConcatF::Axis::Width));
+
+ return l->at(b, ch, row, col);
+}
+
+std::set<coco::Eval *> candidates(coco::Module *m)
+{
+ std::set<coco::Eval *> res;
+
+ for (auto ins : enco::instr_sequence(m))
+ {
+ if (auto eval = ins->asEval())
+ {
+ if (eval->op()->asConcatF())
+ {
+ res.insert(eval);
+ }
+ }
+ }
+
+ return res;
+}
+
+} // namespace
+
+namespace enco
+{
+
+void lower_concat(enco::Code *code)
+{
+ auto m = code->module();
+
+ for (auto eval : candidates(m))
+ {
+ auto concat_f = eval->op()->asConcatF();
+ assert(concat_f != nullptr);
+
+ auto left_feature = concat_f->left()->asLoad()->object()->asFeature();
+ assert(left_feature != nullptr);
+ auto left_shape = as_tensor_shape(left_feature->layout());
+
+ auto right_feature = concat_f->right()->asLoad()->object()->asFeature();
+ assert(right_feature != nullptr);
+ auto right_shape = as_tensor_shape(right_feature->layout());
+
+ auto out_feature = eval->out()->asFeature();
+ assert(out_feature != nullptr);
+ auto out_shape = as_tensor_shape(out_feature->layout());
+
+ auto concat_axe = as_tensor_axis(concat_f->axis());
+
+ // Lower: Left -> Output
+ {
+ auto src_feature = left_feature;
+ auto src_shape = left_shape;
+
+ auto ins = m->entity()->instr()->create<coco::Shuffle>();
+
+ assert(src_feature->bag() != nullptr);
+ assert(out_feature->bag() != nullptr);
+
+ ins->from(src_feature->bag());
+ ins->into(out_feature->bag());
+
+ for (tensor::IndexEnumerator e{src_shape}; e.valid(); e.advance())
+ {
+ tensor::Index src_index = e.current();
+ tensor::Index out_index = e.current();
+
+ auto from = as_element_index(src_feature->layout(), src_index);
+ auto into = as_element_index(out_feature->layout(), out_index);
+
+ ins->insert(from, into);
+ }
+
+ ins->insertAfter(eval);
+ }
+
+ // Lower: Right -> Output
+ {
+ auto src_feature = right_feature;
+ auto src_shape = right_shape;
+
+ auto ins = m->entity()->instr()->create<coco::Shuffle>();
+
+ assert(src_feature->bag() != nullptr);
+ assert(out_feature->bag() != nullptr);
+
+ ins->from(src_feature->bag());
+ ins->into(out_feature->bag());
+
+ for (tensor::IndexEnumerator e{src_shape}; e.valid(); e.advance())
+ {
+ tensor::Index src_index = e.current();
+ tensor::Index out_index = e.current();
+
+ out_index.at(concat_axe) = out_index.at(concat_axe) + left_shape.dim(concat_axe);
+
+ auto from = as_element_index(src_feature->layout(), src_index);
+ auto into = as_element_index(out_feature->layout(), out_index);
+
+ ins->insert(from, into);
+ }
+
+ ins->insertAfter(eval);
+ }
+
+ // Unlink "Eval" and "ConcatF" op tree
+ eval->op(nullptr);
+
+ // Delete "Concat" op tree
+ m->entity()->op()->destroy(concat_f->left());
+ m->entity()->op()->destroy(concat_f->right());
+ m->entity()->op()->destroy(concat_f);
+
+ // Deatch "Eval" instruction from the block
+ eval->detach();
+
+ // Delete "Eval" instruction
+ m->entity()->instr()->destroy(eval);
+ }
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Transforms/ConcatLowering.h b/compiler/enco/core/src/Transforms/ConcatLowering.h
new file mode 100644
index 000000000..5d20e627b
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/ConcatLowering.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_CONCAT_LOWERING_H__
+#define __ENCO_CONCAT_LOWERING_H__
+
+#include "Code.h"
+#include "Pass.h"
+
+namespace enco
+{
+
+/**
+ * @brief Lower eval(Concat(...)) as a sequence of shuffle instructions
+ */
+void lower_concat(enco::Code *code);
+
+struct ConcatLoweringPass final : public Pass
+{
+ PASS_CTOR(ConcatLoweringPass)
+ {
+ // DO NOTHING
+ }
+
+ void run(const SessionID &sess) const override { lower_concat(code(sess)); }
+};
+
+} // namespace enco
+
+#endif // __ENCO_CONCAT_LOWERING_H__
diff --git a/compiler/enco/core/src/Transforms/ConstantFolding.cpp b/compiler/enco/core/src/Transforms/ConstantFolding.cpp
new file mode 100644
index 000000000..cd6f22351
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/ConstantFolding.cpp
@@ -0,0 +1,442 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConstantFolding.h"
+#include "Session.h"
+
+#include <queue>
+#include <cmath>
+#include <cassert>
+
+namespace
+{
+
+/**
+ * @brief is_constant_bag(b) returns true if the bag "b" has corresponding weight
+ */
+bool is_constant_bag(coco::Bag *b)
+{
+ auto m = b->module();
+ auto d = enco::data(m);
+ return d->allocated(b);
+}
+
+class ConstantBagEnumerator
+{
+public:
+ ConstantBagEnumerator(enco::Code *code) : _code{code}
+ {
+ // DO NOTHING
+ }
+
+public:
+ template <typename Callable> void enumerate(Callable cb) const
+ {
+ auto m = _code->module();
+
+ for (uint32_t n = 0; n < m->entity()->bag()->size(); ++n)
+ {
+ auto b = m->entity()->bag()->at(n);
+
+ if (is_constant_bag(b))
+ {
+ cb(b);
+ }
+ }
+ }
+
+private:
+ enco::Code *_code;
+};
+
+template <typename Callable> void operator<<(const ConstantBagEnumerator &e, Callable &&cb)
+{
+ e.enumerate(std::forward<Callable>(cb));
+}
+
+ConstantBagEnumerator constant_bag_enumerator(enco::Code *code)
+{
+ return ConstantBagEnumerator{code};
+}
+
+} // namespace
+
+namespace
+{
+
+/**
+ * @brief Take the first element from the queue
+ * @note The queue SHOULD have at least one element.
+ */
+template <typename T> T take(std::queue<T> &q)
+{
+ assert(q.size() > 0);
+ auto res = q.front();
+ q.pop();
+ return res;
+}
+
+} // namespace
+
+namespace
+{
+
+void fold_constant(std::queue<coco::Bag *> &q, coco::Copy *copy)
+{
+ auto m = copy->module();
+ auto d = enco::data(m);
+
+ auto src_obj = copy->from();
+ auto src_bag = src_obj->bag();
+
+ auto dst_obj = copy->into();
+ auto dst_bag = dst_obj->bag();
+
+ // Output calculation should not be folded
+ // TODO Reduce code duplication of this kind
+ if (dst_bag->isOutput())
+ {
+ return;
+ }
+
+ // NOTE d->allocated(bag) returns true if bag has corresponding initial
+ // values (e.g. convolution kernel)
+ assert(d->allocated(src_bag));
+ assert(!d->allocated(dst_bag));
+
+ // TODO Support other data type
+ auto src_span = d->f32()->weight(src_bag);
+
+ assert(src_span.data() != nullptr);
+
+ auto src_feature = src_obj->asFeature();
+ auto dst_feature = dst_obj->asFeature();
+
+ // TODO Support other object type
+ if (src_feature == nullptr || dst_feature == nullptr)
+ {
+ return;
+ }
+
+ assert(src_feature != nullptr);
+ assert(dst_feature != nullptr);
+
+ // Allocate weight for destination
+ d->f32()->allocate(dst_bag);
+
+ auto dst_span = d->f32()->weight(dst_bag);
+
+ assert(src_feature->layout()->batch() == dst_feature->layout()->batch());
+ assert(src_feature->layout()->depth() == dst_feature->layout()->depth());
+ assert(src_feature->layout()->height() == dst_feature->layout()->height());
+ assert(src_feature->layout()->width() == dst_feature->layout()->width());
+
+ uint32_t const B = src_feature->layout()->batch();
+ uint32_t const C = src_feature->layout()->depth();
+ uint32_t const H = src_feature->layout()->height();
+ uint32_t const W = src_feature->layout()->width();
+
+ for (uint32_t b = 0; b < B; ++b)
+ {
+ for (uint32_t ch = 0; ch < C; ++ch)
+ {
+ for (uint32_t row = 0; row < H; ++row)
+ {
+ for (uint32_t col = 0; col < W; ++col)
+ {
+ auto src_ind = src_feature->layout()->at(b, ch, row, col);
+ auto dst_ind = dst_feature->layout()->at(b, ch, row, col);
+
+ dst_span[dst_ind.value()] = src_span[src_ind.value()];
+ }
+ }
+ }
+ }
+
+ // Let's detach copy
+ copy->from(nullptr);
+ copy->into(nullptr);
+ copy->detach();
+
+ // Let's visit destination bag!
+ q.push(dst_bag);
+}
+
+template <typename Callable>
+void fold_constant_op(std::queue<coco::Bag *> &q, coco::UnaryOp *op, Callable evaluate)
+{
+ auto m = op->module();
+ auto d = enco::data(m);
+
+ auto ins = op->parent();
+ auto eval = ins->asEval();
+
+ // UnaryOp has only one arg
+ auto src_obj = *(op->uses().begin());
+ auto src_bag = src_obj->bag();
+
+ auto dst_obj = eval->out();
+ auto dst_bag = dst_obj->bag();
+
+ // Output calculation should not be folded
+ // TODO Reduce code duplication of this kind
+ if (dst_bag->isOutput())
+ {
+ return;
+ }
+
+ assert(d->allocated(src_bag));
+ assert(!d->allocated(dst_bag));
+
+ // TODO Support other data type
+ auto src_span = d->f32()->weight(src_bag);
+ assert(src_span.data() != nullptr);
+
+ auto src_feature = src_obj->asFeature();
+ auto dst_feature = dst_obj->asFeature();
+
+ // TODO Support other object type
+ if (src_feature == nullptr || dst_feature == nullptr)
+ {
+ return;
+ }
+
+ assert(src_feature != nullptr);
+ assert(dst_feature != nullptr);
+
+ // Allocate weight for destination
+ d->f32()->allocate(dst_bag);
+ auto dst_span = d->f32()->weight(dst_bag);
+
+ assert(src_feature->layout()->batch() == dst_feature->layout()->batch());
+ assert(src_feature->layout()->depth() == dst_feature->layout()->depth());
+ assert(src_feature->layout()->height() == dst_feature->layout()->height());
+ assert(src_feature->layout()->width() == dst_feature->layout()->width());
+
+ uint32_t const B = src_feature->layout()->batch();
+ uint32_t const C = src_feature->layout()->depth();
+ uint32_t const H = src_feature->layout()->height();
+ uint32_t const W = src_feature->layout()->width();
+
+ for (uint32_t b = 0; b < B; ++b)
+ {
+ for (uint32_t ch = 0; ch < C; ++ch)
+ {
+ for (uint32_t row = 0; row < H; ++row)
+ {
+ for (uint32_t col = 0; col < W; ++col)
+ {
+ auto src_ind = src_feature->layout()->at(b, ch, row, col);
+ auto dst_ind = dst_feature->layout()->at(b, ch, row, col);
+
+ evaluate(&dst_span[dst_ind.value()], src_span[src_ind.value()]);
+ }
+ }
+ }
+ }
+
+ // Let's detach eval
+ eval->out(nullptr);
+ eval->detach();
+
+ // Let's visit destination bag!
+ q.push(dst_bag);
+}
+
+template <typename Callable>
+void fold_constant_op(std::queue<coco::Bag *> &q, coco::BinaryOp *op, Callable evaluate)
+{
+ auto m = op->module();
+ auto d = enco::data(m);
+
+ auto ins = op->parent();
+ auto eval = ins->asEval();
+
+ // Already folded by the other bag
+ if (!eval->out())
+ {
+ return;
+ }
+
+ auto lhs_load = op->left()->asLoad();
+ auto lhs_obj = lhs_load->object();
+ auto lhs_bag = lhs_obj->bag();
+
+ auto rhs_load = op->right()->asLoad();
+ auto rhs_obj = rhs_load->object();
+ auto rhs_bag = rhs_obj->bag();
+
+ auto dst_obj = eval->out();
+ auto dst_bag = dst_obj->bag();
+
+ // Output calculation should not be folded
+ // TODO Reduce code duplication of this kind
+ if (dst_bag->isOutput())
+ {
+ return;
+ }
+
+ // The other bag is non-constant
+ if (!d->allocated(lhs_bag) || !d->allocated(rhs_bag))
+ {
+ return;
+ }
+
+ assert(d->allocated(lhs_bag));
+ assert(d->allocated(rhs_bag));
+ assert(!d->allocated(dst_bag));
+
+ // TODO Support other data type
+ auto lhs_span = d->f32()->weight(lhs_bag);
+ auto rhs_span = d->f32()->weight(rhs_bag);
+ assert(lhs_span.data() != nullptr);
+ assert(rhs_span.data() != nullptr);
+
+ auto lhs_feature = lhs_obj->asFeature();
+ auto rhs_feature = rhs_obj->asFeature();
+ auto dst_feature = dst_obj->asFeature();
+
+ // TODO Support other object type
+ if (lhs_feature == nullptr || rhs_feature == nullptr || dst_feature == nullptr)
+ {
+ return;
+ }
+
+ assert(lhs_feature != nullptr);
+ assert(rhs_feature != nullptr);
+ assert(dst_feature != nullptr);
+
+ // Allocate weight for destination
+ d->f32()->allocate(dst_bag);
+ auto dst_span = d->f32()->weight(dst_bag);
+
+ assert(lhs_feature->layout()->batch() == rhs_feature->layout()->batch());
+ assert(lhs_feature->layout()->depth() == rhs_feature->layout()->depth());
+ assert(lhs_feature->layout()->height() == rhs_feature->layout()->height());
+ assert(lhs_feature->layout()->width() == rhs_feature->layout()->width());
+
+ assert(lhs_feature->layout()->batch() == dst_feature->layout()->batch());
+ assert(lhs_feature->layout()->depth() == dst_feature->layout()->depth());
+ assert(lhs_feature->layout()->height() == dst_feature->layout()->height());
+ assert(lhs_feature->layout()->width() == dst_feature->layout()->width());
+
+ uint32_t const B = lhs_feature->layout()->batch();
+ uint32_t const C = lhs_feature->layout()->depth();
+ uint32_t const H = lhs_feature->layout()->height();
+ uint32_t const W = lhs_feature->layout()->width();
+
+ for (uint32_t b = 0; b < B; ++b)
+ {
+ for (uint32_t ch = 0; ch < C; ++ch)
+ {
+ for (uint32_t row = 0; row < H; ++row)
+ {
+ for (uint32_t col = 0; col < W; ++col)
+ {
+ auto lhs_ind = lhs_feature->layout()->at(b, ch, row, col);
+ auto rhs_ind = rhs_feature->layout()->at(b, ch, row, col);
+ auto dst_ind = dst_feature->layout()->at(b, ch, row, col);
+
+ evaluate(&dst_span[dst_ind.value()], lhs_span[lhs_ind.value()],
+ rhs_span[rhs_ind.value()]);
+ }
+ }
+ }
+ }
+
+ // Let's detach eval
+ eval->out(nullptr);
+ eval->detach();
+
+ // Let's visit destination bag!
+ q.push(dst_bag);
+}
+
+void fold_constant(std::queue<coco::Bag *> &q, coco::Eval *eval)
+{
+ // TODO Support other data types
+ if (auto op = eval->op()->asSqrt())
+ {
+ fold_constant_op(q, op, [](float *dst, float value) { *dst = std::sqrt(value); });
+ }
+ else if (auto op = eval->op()->asAdd())
+ {
+ fold_constant_op(q, op, [](float *dst, float lhs, float rhs) { *dst = lhs + rhs; });
+ }
+ else if (auto op = eval->op()->asSub())
+ {
+ fold_constant_op(q, op, [](float *dst, float lhs, float rhs) { *dst = lhs - rhs; });
+ }
+ else if (auto op = eval->op()->asMul())
+ {
+ fold_constant_op(q, op, [](float *dst, float lhs, float rhs) { *dst = lhs * rhs; });
+ }
+ else if (auto op = eval->op()->asDiv())
+ {
+ fold_constant_op(q, op, [](float *dst, float lhs, float rhs) { *dst = lhs / rhs; });
+ }
+ else
+ {
+ // Not supported opteration, do nothing
+ // TODO Support other operations
+ }
+}
+
+void fold_constant(std::queue<coco::Bag *> &q, coco::Instr *ins)
+{
+ if (auto copy = coco::safe_cast<coco::Copy>(ins))
+ {
+ fold_constant(q, copy);
+ return;
+ }
+ if (auto eval = coco::safe_cast<coco::Eval>(ins))
+ {
+ fold_constant(q, eval);
+ return;
+ }
+
+ // TODO Add more cases for constant folding
+}
+
+} // namespace
+
+namespace enco
+{
+
+void fold_constants(enco::Code *code)
+{
+ std::queue<coco::Bag *> q;
+
+ // Collect the initial set of "constant" bag
+ constant_bag_enumerator(code) << [&q](coco::Bag *bag) { q.push(bag); };
+
+ while (!q.empty())
+ {
+ auto candidate_bag = take(q);
+
+ // Scan the readers of each candidate bag
+ for (auto reader : coco::readers(candidate_bag))
+ {
+ // TODO Decide how to handle the reader with unknown instruction
+ if (auto ins = reader->loc())
+ {
+ fold_constant(q, ins);
+ }
+ }
+ }
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Transforms/ConstantFolding.h b/compiler/enco/core/src/Transforms/ConstantFolding.h
new file mode 100644
index 000000000..6faa9c876
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/ConstantFolding.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONSTANT_FOLDING_H__
+#define __CONSTANT_FOLDING_H__
+
+#include "Code.h"
+#include "Pass.h"
+
+namespace enco
+{
+
+/**
+ * @brief Evaluate "constant" expressions at compile time
+ */
+void fold_constants(enco::Code *);
+
+struct ConstantFoldingPass final : public Pass
+{
+ PASS_CTOR(ConstantFoldingPass)
+ {
+ // DO NOTHING
+ }
+
+ void run(const SessionID &sess) const override { fold_constants(code(sess)); }
+};
+
+} // namespace enco
+
+#endif // __CONSTANT_FOLDING_H__
diff --git a/compiler/enco/core/src/Transforms/ConstantFolding.test.cpp b/compiler/enco/core/src/Transforms/ConstantFolding.test.cpp
new file mode 100644
index 000000000..5ac71ac14
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/ConstantFolding.test.cpp
@@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConstantFolding.h"
+#include "Session.h"
+
+#include <cmath>
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class BinaryNetwork
+{
+public:
+ BinaryNetwork(coco::Module *module, coco::Data *data) : _module{module}, _data{data}
+ {
+ // DO NOTHING
+ }
+
+ template <typename Op> void build(void);
+
+ void fold(void)
+ {
+ // Execute constant folding
+ enco::make_session(_module, _data);
+ enco::Code code{_module, _data};
+ enco::fold_constants(&code);
+ }
+
+public:
+ coco::Bag *out;
+ coco::Bag *lhs;
+ coco::Bag *rhs;
+
+ coco::Eval *eval;
+
+private:
+ coco::Module *_module;
+ coco::Data *_data;
+};
+
+template <typename Op> void BinaryNetwork::build(void)
+{
+ // Create lhs bag and object
+ auto lhs_bag = _module->entity()->bag()->create(12);
+ auto lhs_obj = _module->entity()->object()->template create<coco::FeatureObject>();
+ coco::FeatureShape lhs_shape(1, 2, 2, 3);
+ lhs_obj->bag(lhs_bag);
+ lhs_obj->layout(coco::FeatureLayouts::BHWC::create(lhs_shape));
+
+ // Create rhs bag and object
+ auto rhs_bag = _module->entity()->bag()->create(12);
+ auto rhs_obj = _module->entity()->object()->template create<coco::FeatureObject>();
+ coco::FeatureShape rhs_shape(1, 2, 2, 3);
+ rhs_obj->bag(rhs_bag);
+ rhs_obj->layout(coco::FeatureLayouts::BHWC::create(rhs_shape));
+
+ // Create output bag and object
+ auto output_bag = _module->entity()->bag()->create(12);
+ auto output_obj = _module->entity()->object()->template create<coco::FeatureObject>();
+ coco::FeatureShape ofm_shape(1, 2, 2, 3);
+ output_obj->bag(output_bag);
+ output_obj->layout(coco::FeatureLayouts::BHWC::create(ofm_shape));
+
+ // Create instruction and operations
+ auto block = _module->entity()->block()->create();
+ auto eval = _module->entity()->instr()->template create<coco::Eval>();
+ auto load_lhs = _module->entity()->op()->template create<coco::Load>();
+ auto load_rhs = _module->entity()->op()->template create<coco::Load>();
+ auto add_op = _module->entity()->op()->template create<Op>();
+
+ _module->block()->append(block);
+ block->instr()->append(eval);
+
+ load_lhs->object(lhs_obj);
+ load_rhs->object(rhs_obj);
+ add_op->left(load_lhs);
+ add_op->right(load_rhs);
+
+ eval->op(add_op);
+ eval->out(output_obj);
+
+ // Create a handle
+ this->lhs = lhs_bag;
+ this->rhs = rhs_bag;
+ this->out = output_bag;
+
+ this->eval = eval;
+}
+
+} // namespace
+
+TEST(ConstantFoldingTest, sqrt)
+{
+ auto module = coco::Module::create();
+ auto data = coco::Data::create();
+
+ // Create input bag and object
+ auto input_bag = module->entity()->bag()->create(12);
+ auto input_obj = module->entity()->object()->create<coco::FeatureObject>();
+ coco::FeatureShape ifm_shape(1, 2, 2, 3);
+ input_obj->bag(input_bag);
+ input_obj->layout(coco::FeatureLayouts::BHWC::create(ifm_shape));
+
+ // Create output bag and object
+ auto output_bag = module->entity()->bag()->create(12);
+ auto output_obj = module->entity()->object()->create<coco::FeatureObject>();
+ coco::FeatureShape ofm_shape(1, 2, 2, 3);
+ output_obj->bag(output_bag);
+ output_obj->layout(coco::FeatureLayouts::BHWC::create(ofm_shape));
+
+ // Insert values into input bag
+ data->f32()->allocate(input_bag);
+ auto input = data->f32()->weight(input_bag);
+ for (uint32_t idx = 0; idx < input.size(); ++idx)
+ {
+ input[idx] = (float)idx;
+ }
+
+ // Create instruction and operations
+ auto block = module->entity()->block()->create();
+ auto eval = module->entity()->instr()->create<coco::Eval>();
+ auto load = module->entity()->op()->create<coco::Load>();
+ auto sqrt_op = module->entity()->op()->create<coco::Sqrt>();
+
+ module->block()->append(block);
+ block->instr()->append(eval);
+
+ load->object(input_obj);
+ sqrt_op->arg(load);
+
+ eval->op(sqrt_op);
+ eval->out(output_obj);
+
+ // Execute constant folding
+ enco::make_session(module.get(), data.get());
+ enco::Code code{module.get(), data.get()};
+ enco::fold_constants(&code);
+
+ // Validate the result
+ ASSERT_EQ(data->allocated(output_bag), true);
+ ASSERT_EQ(eval->out(), nullptr);
+
+ auto output = data->f32()->weight(output_bag);
+ for (uint32_t idx = 0; idx < output.size(); ++idx)
+ {
+ ASSERT_FLOAT_EQ(output[idx], std::sqrt(input[idx]));
+ }
+}
+
+TEST(ConstantFoldingTest, element_wise_add)
+{
+ auto module = coco::Module::create();
+ auto data = coco::Data::create();
+
+ BinaryNetwork net{module.get(), data.get()};
+
+ // Build a network
+ net.build<coco::Add>();
+
+ // Create alises
+ auto lhs_bag = net.lhs;
+ auto rhs_bag = net.rhs;
+ auto output_bag = net.out;
+ auto eval = net.eval;
+
+ // Insert values into lhs and rhs bag
+ data->f32()->allocate(lhs_bag);
+ data->f32()->allocate(rhs_bag);
+ auto lhs = data->f32()->weight(lhs_bag);
+ auto rhs = data->f32()->weight(rhs_bag);
+ for (uint32_t idx = 0; idx < lhs.size(); ++idx)
+ {
+ lhs[idx] = (float)idx;
+ rhs[idx] = 1.5;
+ }
+
+ // Execute constant folding
+ net.fold();
+
+ // Validate the result
+ ASSERT_EQ(data->allocated(output_bag), true);
+ ASSERT_EQ(eval->out(), nullptr);
+
+ auto output = data->f32()->weight(output_bag);
+ for (uint32_t idx = 0; idx < output.size(); ++idx)
+ {
+ ASSERT_FLOAT_EQ(output[idx], lhs[idx] + rhs[idx]);
+ }
+}
+
+TEST(ConstantFoldingTest, element_wise_sub)
+{
+ auto module = coco::Module::create();
+ auto data = coco::Data::create();
+
+ BinaryNetwork net{module.get(), data.get()};
+
+ // Build a network
+ net.build<coco::Sub>();
+
+ // Create alises
+ auto lhs_bag = net.lhs;
+ auto rhs_bag = net.rhs;
+ auto output_bag = net.out;
+ auto eval = net.eval;
+
+ // Insert values into lhs and rhs bag
+ data->f32()->allocate(lhs_bag);
+ data->f32()->allocate(rhs_bag);
+ auto lhs = data->f32()->weight(lhs_bag);
+ auto rhs = data->f32()->weight(rhs_bag);
+ for (uint32_t idx = 0; idx < lhs.size(); ++idx)
+ {
+ lhs[idx] = (float)idx;
+ rhs[idx] = 1.5;
+ }
+
+ // Execute constant folding
+ net.fold();
+
+ // Validate the result
+ ASSERT_EQ(data->allocated(output_bag), true);
+ ASSERT_EQ(eval->out(), nullptr);
+
+ auto output = data->f32()->weight(output_bag);
+ for (uint32_t idx = 0; idx < output.size(); ++idx)
+ {
+ ASSERT_FLOAT_EQ(output[idx], lhs[idx] - rhs[idx]);
+ }
+}
+
+TEST(ConstantFoldingTest, element_wise_mul)
+{
+ auto module = coco::Module::create();
+ auto data = coco::Data::create();
+
+ BinaryNetwork net{module.get(), data.get()};
+
+ // Build a network
+ net.build<coco::Mul>();
+
+ // Create alises
+ auto lhs_bag = net.lhs;
+ auto rhs_bag = net.rhs;
+ auto output_bag = net.out;
+ auto eval = net.eval;
+
+ // Insert values into lhs and rhs bag
+ data->f32()->allocate(lhs_bag);
+ data->f32()->allocate(rhs_bag);
+ auto lhs = data->f32()->weight(lhs_bag);
+ auto rhs = data->f32()->weight(rhs_bag);
+ for (uint32_t idx = 0; idx < lhs.size(); ++idx)
+ {
+ lhs[idx] = (float)idx;
+ rhs[idx] = 1.5;
+ }
+
+ // Execute constant folding
+ net.fold();
+
+ // Validate the result
+ ASSERT_EQ(data->allocated(output_bag), true);
+ ASSERT_EQ(eval->out(), nullptr);
+
+ auto output = data->f32()->weight(output_bag);
+ for (uint32_t idx = 0; idx < output.size(); ++idx)
+ {
+ ASSERT_FLOAT_EQ(output[idx], lhs[idx] * rhs[idx]);
+ }
+}
+
+TEST(ConstantFoldingTest, element_wise_div)
+{
+ auto module = coco::Module::create();
+ auto data = coco::Data::create();
+
+ BinaryNetwork net{module.get(), data.get()};
+
+ // Build a network
+ net.build<coco::Div>();
+
+ // Create alises
+ auto lhs_bag = net.lhs;
+ auto rhs_bag = net.rhs;
+ auto output_bag = net.out;
+ auto eval = net.eval;
+
+ // Insert values into lhs and rhs bag
+ data->f32()->allocate(lhs_bag);
+ data->f32()->allocate(rhs_bag);
+ auto lhs = data->f32()->weight(lhs_bag);
+ auto rhs = data->f32()->weight(rhs_bag);
+ for (uint32_t idx = 0; idx < lhs.size(); ++idx)
+ {
+ lhs[idx] = (float)idx;
+ rhs[idx] = 1.5;
+ }
+
+ // Execute constant folding
+ net.fold();
+
+ // Validate the result
+ ASSERT_EQ(data->allocated(output_bag), true);
+ ASSERT_EQ(eval->out(), nullptr);
+
+ auto output = data->f32()->weight(output_bag);
+ for (uint32_t idx = 0; idx < output.size(); ++idx)
+ {
+ ASSERT_FLOAT_EQ(output[idx], lhs[idx] / rhs[idx]);
+ }
+}
diff --git a/compiler/enco/core/src/Transforms/CopyLowering.cpp b/compiler/enco/core/src/Transforms/CopyLowering.cpp
new file mode 100644
index 000000000..ceb3bbd5c
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/CopyLowering.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CopyLowering.h"
+
+#include <set>
+#include <cassert>
+
+//
+// Lower Copy as Shuffle
+//
+namespace enco
+{
+
+void lower_copy(enco::Code *code)
+{
+ auto m = code->module();
+
+ std::set<coco::Copy *> lowered_copies;
+
+ for (uint32_t n = 0; n < m->entity()->instr()->size(); ++n)
+ {
+ auto ins = m->entity()->instr()->at(n);
+
+ assert(ins != nullptr);
+
+ if (ins->parent() == nullptr)
+ {
+ // Skip if instruction does not belong to a list
+ continue;
+ }
+
+ auto copy = ins->asCopy();
+
+ if (copy == nullptr)
+ {
+ // Skip if instruction is not a copy
+ continue;
+ }
+
+ // TODO Support non-Feature objects
+ auto ifm = copy->from()->asFeature();
+ auto ofm = copy->into()->asFeature();
+
+ if ((ifm == nullptr) || (ofm == nullptr))
+ {
+ continue;
+ }
+
+ assert(ifm->layout()->batch() == ofm->layout()->batch());
+ assert(ifm->layout()->shape() == ofm->layout()->shape());
+
+ auto shuffle = m->entity()->instr()->create<coco::Shuffle>();
+
+ shuffle->from(ifm->bag());
+ shuffle->into(ofm->bag());
+
+ const uint32_t B = ifm->layout()->batch();
+ const uint32_t C = ifm->layout()->shape().depth();
+ const uint32_t H = ifm->layout()->shape().height();
+ const uint32_t W = ifm->layout()->shape().width();
+
+ for (uint32_t b = 0; b < B; ++b)
+ {
+ for (uint32_t ch = 0; ch < C; ++ch)
+ {
+ for (uint32_t row = 0; row < H; ++row)
+ {
+ for (uint32_t col = 0; col < W; ++col)
+ {
+ const auto from = ifm->layout()->at(b, ch, row, col);
+ const auto into = ofm->layout()->at(b, ch, row, col);
+
+ shuffle->insert(from, into);
+ }
+ }
+ }
+ }
+
+ shuffle->insertBefore(copy);
+ lowered_copies.insert(copy);
+ }
+
+ // Destroy lowered copy
+ for (const auto &copy : lowered_copies)
+ {
+ copy->detach();
+ m->entity()->instr()->destroy(copy);
+ }
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Transforms/CopyLowering.h b/compiler/enco/core/src/Transforms/CopyLowering.h
new file mode 100644
index 000000000..51f0f83e2
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/CopyLowering.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_LOWER_H__
+#define __ENCO_LOWER_H__
+
+#include "Code.h"
+#include "Pass.h"
+
+namespace enco
+{
+
+/**
+ * @brief Lower copy(...) instruction into shuffle(...)
+ */
+void lower_copy(enco::Code *code);
+
+struct CopyLoweringPass final : public Pass
+{
+ PASS_CTOR(CopyLoweringPass)
+ {
+ // DO NOTHING
+ }
+
+ void run(const SessionID &sess) const override { lower_copy(code(sess)); }
+};
+
+} // namespace enco
+
+#endif // __ENCO_LOWER_H__
diff --git a/compiler/enco/core/src/Transforms/DataLayoutConversion.cpp b/compiler/enco/core/src/Transforms/DataLayoutConversion.cpp
new file mode 100644
index 000000000..9d65d1c0b
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/DataLayoutConversion.cpp
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DataLayoutConversion.h"
+#include "Session.h"
+#include "IRUtils.h"
+
+#include "coex/IR.h"
+
+#include <coco/IR/FeatureLayouts.h>
+#include <coco/IR/KernelLayouts.h>
+
+#include <nncc/core/ADT/feature/Layout.h>
+#include <nncc/core/ADT/kernel/Layout.h>
+
+#include <nncc/core/ADT/feature/HWCLayout.h>
+#include <nncc/core/ADT/kernel/NHWCLayout.h>
+
+#include <set>
+
+namespace
+{
+
+coco::Copy *make_copy(coco::FeatureObject *from, coco::FeatureObject *into)
+{
+ auto m = from->module();
+ assert(m != nullptr);
+ assert(from->module() == into->module());
+
+ auto copy = m->entity()->instr()->create<coco::Copy>();
+
+ copy->from(from);
+ copy->into(into);
+
+ return copy;
+}
+
+coco::FeatureObject *clone_feature(const coco::FeatureObject *oldobj)
+{
+ auto module = oldobj->module();
+ auto newobj = module->entity()->object()->create<coco::FeatureObject>();
+ newobj->layout(coco::FeatureLayouts::BHWC::create(oldobj->shape()));
+
+ if (oldobj->bag() != nullptr)
+ {
+ using nncc::core::ADT::feature::num_elements;
+
+ // NOTE The size of bag should be at least "BxHxWxC" as "newobj" uses BHWC layout
+ const uint32_t batch = newobj->layout()->batch();
+ const uint32_t count = num_elements(newobj->layout()->shape());
+ const uint32_t bag_size = batch * count;
+
+ // Clone bag only when there is a backing bag for a given feature object
+ auto newbag = module->entity()->bag()->create(bag_size);
+ newobj->bag(newbag);
+ }
+
+ return newobj;
+}
+
+/**
+ * @brief Insert Copy before Load if necessary
+ *
+ * @require "load" should be bounded
+ */
+void insert_copy_before_load(coco::Load *load)
+{
+ assert(load->parent() != nullptr);
+ assert(load->parent()->parent() != nullptr);
+
+ if (auto obj = load->object())
+ {
+ if (auto ifm = obj->asFeature())
+ {
+ if (ifm->layout()->id() != coco::FeatureLayouts::BHWC::uid())
+ {
+ auto oldobj = ifm;
+ auto newobj = clone_feature(oldobj);
+
+ load->object(newobj);
+
+ auto copy = make_copy(oldobj, newobj);
+ copy->insertBefore(load->parent());
+ }
+ }
+ }
+}
+
+/**
+ * @brief Insert Copy after Eval if necessary
+ */
+void insert_copy_after_eval(coco::Eval *eval)
+{
+ if (auto out = eval->out())
+ {
+ if (auto ofm = out->asFeature())
+ {
+ if (ofm->layout()->id() != coco::FeatureLayouts::BHWC::uid())
+ {
+ auto oldobj = ofm;
+ auto newobj = clone_feature(oldobj);
+
+ eval->out(newobj);
+
+ auto copy = make_copy(newobj, oldobj);
+ copy->insertAfter(eval);
+ }
+ }
+ }
+}
+
+/**
+ * @brief Insert copy (for data layout change) before/after ANNDepthConcatF if necessary
+ */
+void convert_data_layout(ANNDepthConcatF *concat)
+{
+ if (auto out = concat->out())
+ {
+ if (auto ofm = out->asFeature())
+ {
+ if (ofm->layout()->id() != coco::FeatureLayouts::BHWC::uid())
+ {
+ auto oldobj = ofm;
+ auto newobj = clone_feature(oldobj);
+
+ concat->out(newobj);
+
+ auto copy = make_copy(newobj, oldobj);
+ copy->insertAfter(concat);
+ }
+ }
+ }
+
+ if (auto obj = concat->fst())
+ {
+ if (auto ifm = obj->asFeature())
+ {
+ if (ifm->layout()->id() != coco::FeatureLayouts::BHWC::uid())
+ {
+ auto oldobj = ifm;
+ auto newobj = clone_feature(oldobj);
+
+ concat->fst(newobj);
+
+ auto copy = make_copy(oldobj, newobj);
+ copy->insertBefore(concat);
+ }
+ }
+ }
+
+ if (auto obj = concat->snd())
+ {
+ if (auto ifm = obj->asFeature())
+ {
+ if (ifm->layout()->id() != coco::FeatureLayouts::BHWC::uid())
+ {
+ auto oldobj = ifm;
+ auto newobj = clone_feature(oldobj);
+
+ concat->snd(newobj);
+
+ auto copy = make_copy(oldobj, newobj);
+ copy->insertBefore(concat);
+ }
+ }
+ }
+}
+
+/**
+ * @brief Update convolution kernel data layout
+ */
+void change_conv2d_kernel_layout(coco::Conv2D *conv)
+{
+ auto m = conv->module();
+ assert(m != nullptr);
+ auto d = enco::data(enco::session(m));
+ assert(d != nullptr);
+
+ auto old_obj = conv->ker();
+ assert(old_obj != nullptr);
+ auto old_bag = old_obj->bag();
+ assert(old_bag != nullptr);
+
+ if (old_obj->layout()->id() == coco::KernelLayouts::NHWC::uid())
+ {
+ // Skip if kernel already uses NHWC layout
+ return;
+ }
+
+ const auto &ker_shape = old_obj->shape();
+
+ assert(d->allocated(old_bag));
+
+ auto new_bag = m->entity()->bag()->create(old_bag->size());
+ auto new_obj = m->entity()->object()->create<coco::KernelObject>();
+
+ new_obj->bag(new_bag);
+ new_obj->layout(coco::KernelLayouts::NHWC::create(ker_shape));
+
+ d->f32()->allocate(new_bag);
+
+ auto src = d->f32()->read(old_obj);
+ auto dst = d->f32()->access(new_obj);
+
+ const auto ker_N = ker_shape.count();
+ const auto ker_C = ker_shape.depth();
+ const auto ker_H = ker_shape.height();
+ const auto ker_W = ker_shape.width();
+
+ for (uint32_t n = 0; n < ker_N; ++n)
+ {
+ for (uint32_t ch = 0; ch < ker_C; ++ch)
+ {
+ for (uint32_t row = 0; row < ker_H; ++row)
+ {
+ for (uint32_t col = 0; col < ker_W; ++col)
+ {
+ dst->at(n, ch, row, col) = src->at(n, ch, row, col);
+ }
+ }
+ }
+ }
+
+ conv->ker(new_obj);
+ d->release(old_bag);
+}
+
+} // namespace
+
+namespace
+{
+
+/**
+ * @brief Return the set of all of bounded Load Op(s) in a given module
+ *
+ * @note 'bounded' means it will be exectuted
+ */
+std::set<coco::Load *> loads(coco::Module *m)
+{
+ std::set<coco::Load *> res;
+
+ for (uint32_t n = 0; n < m->entity()->op()->size(); ++n)
+ {
+ auto op = m->entity()->op()->at(n);
+
+ // Skip if this op is dangling
+ if (op->parent() == nullptr)
+ {
+ continue;
+ }
+
+ // Skip if eval instruction of this op is dangling
+ if (op->parent()->parent() == nullptr)
+ {
+ continue;
+ }
+
+ if (auto load = m->entity()->op()->at(n)->asLoad())
+ {
+ res.insert(load);
+ }
+ }
+
+ return res;
+}
+
+/**
+ * @brief Return the set of every (allocated) Eval instruction in a given module
+ */
+std::set<coco::Eval *> evals(coco::Module *m)
+{
+ std::set<coco::Eval *> res;
+
+ for (uint32_t n = 0; n < m->entity()->instr()->size(); ++n)
+ {
+ if (auto eval = m->entity()->instr()->at(n)->asEval())
+ {
+ res.insert(eval);
+ }
+ }
+
+ return res;
+}
+
+/**
+ * @brief Return the set of allocated Conv2D op in a given module
+ */
+std::set<coco::Conv2D *> convs(coco::Module *m)
+{
+ std::set<coco::Conv2D *> res;
+
+ for (uint32_t n = 0; n < m->entity()->op()->size(); ++n)
+ {
+ if (auto op = m->entity()->op()->at(n)->asConv2D())
+ {
+ res.insert(op);
+ }
+ }
+
+ return res;
+}
+
+/**
+ * @brief Return the set of "bounded" ANNDepthConcatF instructions
+ */
+std::set<ANNDepthConcatF *> depth_concats(coco::Module *m)
+{
+ std::set<ANNDepthConcatF *> res;
+
+ for (auto ins : enco::instr_sequence(m))
+ {
+ if (auto depth_concat_f = coco::safe_cast<ANNDepthConcatF>(ins))
+ {
+ res.insert(depth_concat_f);
+ }
+ }
+
+ return res;
+}
+
+class NormalizePass
+{
+private:
+ void runOnModule(coco::Module *m) const;
+
+public:
+ void runOnCode(enco::Code *) const;
+};
+
+void NormalizePass::runOnModule(coco::Module *m) const
+{
+ // Insert Copy before all Load Op (if necessary)
+ for (auto load : loads(m))
+ {
+ insert_copy_before_load(load);
+ }
+
+ // Insert Copy after all Eval Instr (if necessary)
+ for (auto eval : evals(m))
+ {
+ insert_copy_after_eval(eval);
+ }
+
+ // Change Kernel Layout of Conv2D opertion (if necessary)
+ for (auto conv : convs(m))
+ {
+ change_conv2d_kernel_layout(conv);
+ }
+
+ // Insert Copy (for Layout Conversion) before/after ANNDepthConcatF instructions (if necessary)
+ for (auto depth_concat : depth_concats(m))
+ {
+ convert_data_layout(depth_concat);
+ }
+}
+
+void NormalizePass::runOnCode(enco::Code *code) const { runOnModule(code->module()); }
+
+} // namespace
+
+namespace enco
+{
+
+void convert_data_layout(enco::Code *code)
+{
+ NormalizePass pass;
+ pass.runOnCode(code);
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Transforms/DataLayoutConversion.h b/compiler/enco/core/src/Transforms/DataLayoutConversion.h
new file mode 100644
index 000000000..ac4052c8b
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/DataLayoutConversion.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_TRANSFORM_DATA_LAYOUT_CONVERSION_H__
+#define __ENCO_TRANSFORM_DATA_LAYOUT_CONVERSION_H__
+
+#include "Code.h"
+#include "Pass.h"
+
+namespace enco
+{
+
+/**
+ * @brief Insert data reordering if necessary
+ */
+void convert_data_layout(enco::Code *code);
+
+struct DataLayoutConversionPass final : public enco::Pass
+{
+ PASS_CTOR(DataLayoutConversionPass)
+ {
+ // DO NOTHING
+ }
+
+ void run(const SessionID &sess) const override { convert_data_layout(code(sess)); }
+};
+
+} // namespace enco
+
+#endif // __ENCO_TRANSFORM_DATA_LAYOUT_CONVERSION_H__
diff --git a/compiler/enco/core/src/Transforms/DataLayoutConversion.test.cpp b/compiler/enco/core/src/Transforms/DataLayoutConversion.test.cpp
new file mode 100644
index 000000000..812e38a78
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/DataLayoutConversion.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DataLayoutConversion.h"
+
+#include <gtest/gtest.h>
+
+TEST(DataLayoutConversionTest, case_000)
+{
+ auto m = coco::Module::create();
+
+ // Create a "free" Load op
+ m->entity()->instr()->create<coco::Eval>();
+
+ enco::Code code{m.get(), nullptr};
+ ASSERT_EQ(m->entity()->instr()->size(), 1);
+
+ // "conver_data_layout" SHOULD NOT crash even if there is a "free" Load op
+ enco::convert_data_layout(&code);
+}
diff --git a/compiler/enco/core/src/Transforms/DeadBagElimination.cpp b/compiler/enco/core/src/Transforms/DeadBagElimination.cpp
new file mode 100644
index 000000000..b3c598a55
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/DeadBagElimination.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DeadBagElimination.h"
+
+#include <set>
+
+namespace
+{
+
+/// @brief Return true if a given bag is marked as either input or output
+bool is_public(const coco::Bag *b) { return b->isInput() || b->isOutput(); }
+
+/// @brief Return the set of "dead" bags in a given module
+std::set<coco::Bag *> dead_bags(const coco::Module *m)
+{
+ std::set<coco::Bag *> res;
+
+ for (uint32_t n = 0; n < m->entity()->bag()->size(); ++n)
+ {
+ auto bag = m->entity()->bag()->at(n);
+
+ if (coco::readers(bag).empty() && !is_public(bag))
+ {
+ res.insert(bag);
+ }
+ }
+
+ return res;
+}
+
+} // namespace
+
+namespace enco
+{
+
+void eliminate_dead_bag(enco::Code *code)
+{
+ auto m = code->module();
+
+ // Destroy a dead bag and its updaters
+ for (auto bag : dead_bags(m))
+ {
+ for (auto updater : coco::updaters(bag))
+ {
+ auto ins = updater->loc();
+
+ assert(ins != nullptr);
+
+ ins->detach();
+ m->entity()->instr()->destroy(ins);
+ }
+
+ bag->replaceWith(nullptr);
+ m->entity()->bag()->destroy(bag);
+ }
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Transforms/DeadBagElimination.h b/compiler/enco/core/src/Transforms/DeadBagElimination.h
new file mode 100644
index 000000000..87e03e8ac
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/DeadBagElimination.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_TRANSFORM_DEAD_BAG_ELIMINATION_H__
+#define __ENCO_TRANSFORM_DEAD_BAG_ELIMINATION_H__
+
+#include "Code.h"
+#include "Pass.h"
+
+namespace enco
+{
+
+/**
+ * @brief Eliminate dead bags
+ *
+ * A bag is referred to as dead if it is neither input nor output, and has no read. If a bag is
+ * dead, it is unnecessary to updates its values as these values are never used.
+ *
+ * "eliminate_dead_bag" removes all the dead bags and its updaters from IR.
+ */
+void eliminate_dead_bag(enco::Code *code);
+
+struct DeadBagEliminationPass final : public Pass
+{
+ PASS_CTOR(DeadBagEliminationPass)
+ {
+ // DO NOTHING
+ }
+
+ void run(const SessionID &sess) const override { eliminate_dead_bag(code(sess)); }
+};
+
+} // namespace enco
+
+#endif // __ENCO_TRANSFORM_DEAD_BAG_ELIMINATION_H__
diff --git a/compiler/enco/core/src/Transforms/DeadObjectElimination.cpp b/compiler/enco/core/src/Transforms/DeadObjectElimination.cpp
new file mode 100644
index 000000000..df8cc628a
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/DeadObjectElimination.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DeadObjectElimination.h"
+
+#include <set>
+
+namespace
+{
+
+std::set<coco::Object *> dead_objects(const coco::Module *m)
+{
+ std::set<coco::Object *> res;
+
+ for (uint32_t n = 0; n < m->entity()->object()->size(); ++n)
+ {
+ auto obj = m->entity()->object()->at(n);
+
+ if (auto bag = obj->bag())
+ {
+ if (coco::readers(bag).empty() && !(bag->isOutput()))
+ {
+ res.insert(obj);
+ }
+ }
+ else
+ {
+ // NOTE Just in case if there are Objects not related to Bags
+ if (obj->uses()->size() == 0)
+ {
+ res.insert(obj);
+ }
+ }
+ }
+
+ return res;
+}
+
+} // namespace
+
+namespace enco
+{
+
+void eliminate_dead_object(enco::Code *code)
+{
+ auto m = code->module();
+
+ // Destroy a dead object and its producer
+ for (auto obj : dead_objects(m))
+ {
+ if (auto producer = coco::producer(obj))
+ {
+ auto ins = producer->loc();
+ assert(ins != nullptr);
+
+ ins->detach();
+ m->entity()->instr()->destroy(ins);
+ }
+
+ m->entity()->object()->destroy(obj);
+ }
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Transforms/DeadObjectElimination.h b/compiler/enco/core/src/Transforms/DeadObjectElimination.h
new file mode 100644
index 000000000..4923e56fd
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/DeadObjectElimination.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_TRANSFORM_DEAD_OBJECT_ELIMINATION_H__
+#define __ENCO_TRANSFORM_DEAD_OBJECT_ELIMINATION_H__
+
+#include "Code.h"
+#include "Pass.h"
+
+namespace enco
+{
+
+/**
+ * @brief Eliminate dead objects in IR
+ *
+ * An object whose backing bag is unused is referred to as a dead object.
+ *
+ * Dead Object Elimination (DOE) eliminates such dead objects along with their producer.
+ */
+void eliminate_dead_object(enco::Code *code);
+
+struct DeadObjectEliminationPass final : public Pass
+{
+ PASS_CTOR(DeadObjectEliminationPass)
+ {
+ // DO NOTHING
+ }
+
+ void run(const SessionID &sess) const override { eliminate_dead_object(code(sess)); }
+};
+
+} // namespace enco
+
+#endif // __ENCO_TRANSFORM_DEAD_OBJECT_ELIMINATION_H__
diff --git a/compiler/enco/core/src/Transforms/Duplicate.cpp b/compiler/enco/core/src/Transforms/Duplicate.cpp
new file mode 100644
index 000000000..91f64a0ad
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/Duplicate.cpp
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Duplicate.h"
+
+#include <map>
+#include <set>
+
+#include <cassert>
+
+namespace
+{
+
+coco::Block *find_or_create_first_block(coco::Module *m)
+{
+ if (m->block()->empty())
+ {
+ auto blk = m->entity()->block()->create();
+ m->block()->append(blk);
+ return blk;
+ }
+
+ return m->block()->head();
+}
+
+} // namespace
+
+namespace
+{
+
+class DuplicatePass
+{
+private:
+ void runOnModule(coco::Module *m) const;
+
+public:
+ void runOnCode(enco::Code *) const;
+};
+
+void DuplicatePass::runOnModule(coco::Module *m) const
+{
+ // Let's find candidates
+ std::set<coco::Bag *> candidates;
+
+ for (uint32_t n = 0; n < m->entity()->bag()->size(); ++n)
+ {
+ auto bag = m->entity()->bag()->at(n);
+
+ if (bag->isInput() && bag->isOutput())
+ {
+ candidates.insert(bag);
+ }
+ }
+
+ // Return if there is no candidate
+ if (candidates.empty())
+ {
+ return;
+ }
+
+ std::map<const coco::Bag *, coco::Input *> input_map;
+ std::map<const coco::Bag *, coco::Output *> output_map;
+
+ for (uint32_t n = 0; n < m->input()->size(); ++n)
+ {
+ auto input = m->input()->at(n);
+ assert(input->bag() != nullptr);
+ input_map[input->bag()] = input;
+ }
+
+ for (uint32_t n = 0; n < m->output()->size(); ++n)
+ {
+ auto output = m->output()->at(n);
+ assert(output->bag() != nullptr);
+ output_map[output->bag()] = output;
+ }
+
+ // For each in/out bag,
+ // 1. Create a new bag of the same size
+ // 2. Copy the content from the original bag
+ // 3. Mark the newly created bag as an output
+ for (const auto &candidate : candidates)
+ {
+ assert(coco::updaters(candidate).empty());
+ assert(input_map.find(candidate) != input_map.end());
+ assert(output_map.find(candidate) != output_map.end());
+
+ auto src = candidate;
+ auto dst = m->entity()->bag()->create(src->size());
+
+ // Create a copy instruction
+ auto shuffle = m->entity()->instr()->create<coco::Shuffle>();
+
+ shuffle->from(src);
+ shuffle->into(dst);
+
+ for (uint32_t n = 0; n < src->size(); ++n)
+ {
+ shuffle->insert(coco::ElemID{n} /* FROM */, coco::ElemID{n} /* INTO */);
+ }
+
+ find_or_create_first_block(m)->instr()->prepend(shuffle);
+
+ // Let's use the new bag as an output
+ output_map.at(src)->bag(dst);
+ }
+}
+
+void DuplicatePass::runOnCode(enco::Code *code) const { runOnModule(code->module()); }
+
+} // namespace
+
+namespace enco
+{
+
+void duplicate_inout_bag(enco::Code *code)
+{
+ DuplicatePass duplicate;
+ duplicate.runOnCode(code);
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Transforms/Duplicate.h b/compiler/enco/core/src/Transforms/Duplicate.h
new file mode 100644
index 000000000..93baa4589
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/Duplicate.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DUPLICATE_H__
+#define __DUPLICATE_H__
+
+#include "Code.h"
+#include "Pass.h"
+
+namespace enco
+{
+
+/**
+ * @brief Eliminate in/out bags by duplication
+ */
+void duplicate_inout_bag(enco::Code *code);
+
+struct BagDuplicationPass final : public Pass
+{
+ PASS_CTOR(BagDuplicationPass)
+ {
+ // DO NOTHING
+ }
+
+ void run(const SessionID &sess) const override { duplicate_inout_bag(code(sess)); }
+};
+
+} // namespace enco
+
+#endif // __DUPLICATE_H__
diff --git a/compiler/enco/core/src/Transforms/DuplicatedObjectReduction.cpp b/compiler/enco/core/src/Transforms/DuplicatedObjectReduction.cpp
new file mode 100644
index 000000000..fa84c005c
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/DuplicatedObjectReduction.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DuplicatedObjectReduction.h"
+
+#include "CodeIndex.h"
+#include "IRUtils.h"
+
+#include <set>
+
+namespace
+{
+
+/**
+ * @brief Collect feature objects in coco IR
+ */
+std::set<coco::FeatureObject *> features(const coco::Module *m)
+{
+ std::set<coco::FeatureObject *> res;
+
+ for (uint32_t n = 0; n < m->entity()->object()->size(); ++n)
+ {
+ if (auto feature = m->entity()->object()->at(n)->asFeature())
+ {
+ res.insert(feature);
+ }
+ }
+
+ return res;
+}
+
+std::set<coco::FeatureObject *> candidates(const coco::FeatureObject *src)
+{
+ std::set<coco::FeatureObject *> res;
+
+ for (auto consumer : coco::consumers(src))
+ {
+ if (auto copy = consumer->loc()->asCopy())
+ {
+ auto dst = copy->into()->asFeature();
+ assert(dst != nullptr);
+
+ if (dst->layout()->id() == coco::FeatureLayouts::BHWC::uid())
+ {
+ res.insert(dst);
+ }
+ }
+ }
+
+ return res;
+}
+
+CodeIndex code_index(coco::Object::Producer *p)
+{
+ if (auto ins = p->loc())
+ {
+ return ::code_index(ins);
+ }
+
+ return CodeIndex{};
+}
+
+} // namespace
+
+namespace enco
+{
+
+void reduce_duplicated_object(enco::Code *code)
+{
+ auto m = code->module();
+
+ for (const auto &src : features(m))
+ {
+ auto copied = candidates(src);
+
+ if (copied.size() <= 1)
+ {
+ continue;
+ }
+
+ // Find the dominator
+ coco::FeatureObject *dominator = nullptr;
+
+ for (auto candidate : copied)
+ {
+ if (dominator == nullptr)
+ {
+ dominator = candidate;
+ }
+ else if (code_index(coco::producer(candidate)) < code_index(coco::producer(dominator)))
+ {
+ dominator = candidate;
+ }
+ }
+
+ // Replace all the occurunce of dominated objects with its dominator
+ copied.erase(dominator);
+
+ for (auto dominatee : copied)
+ {
+ subst(dominatee, dominator);
+ }
+ }
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Transforms/DuplicatedObjectReduction.h b/compiler/enco/core/src/Transforms/DuplicatedObjectReduction.h
new file mode 100644
index 000000000..3aa20058e
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/DuplicatedObjectReduction.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_TRANSFORM_DUPLICATED_OBJECT_REDUCTION_H__
+#define __ENCO_TRANSFORM_DUPLICATED_OBJECT_REDUCTION_H__
+
+#include "Code.h"
+#include "Pass.h"
+
+namespace enco
+{
+
+/**
+ * @brief Reduce duplicated feature objects as its dominating feature object
+ *
+ * >>> BEFORE <<<
+ * %obj_0 = Feature(layout: ???) at ...
+ * %obj_1 = Feature(layout: BHWC) at ...
+ * %obj_2 = Feature(layout: BHWC) at ...
+ *
+ * copy(from: %obj_0, into: %obj_1)
+ * copy(from: %obj_0, into: %obj_2)
+ *
+ * ...
+ * Use(%obj_1)
+ * Use(%obj_2)
+ * ...
+ *
+ * >>> AFTER <<<
+ * %obj_0 = Feature(layout: ???) at ...
+ * %obj_1 = Feature(layout: BHWC) at ...
+ * %obj_2 = Feature(layout: BHWC) at ...
+ *
+ * copy(from: %obj_0, into: %obj_1)
+ * copy(from: %obj_0, into: %obj_2)
+ *
+ * ...
+ * Use(%obj_1)
+ * Use(%obj_1) <-- CHANGED
+ * ...
+ *
+ * NOTE Given a set of feature objects, a feature object referred to as a dominating
+ * feature object if its producer proceeds the producer of every feature object
+ * in the given set
+ */
+void reduce_duplicated_object(enco::Code *code);
+
+struct DuplicatedObjectReductionPass final : public Pass
+{
+ PASS_CTOR(DuplicatedObjectReductionPass)
+ {
+ // DO NOTHING
+ }
+
+ void run(const SessionID &sess) const override { reduce_duplicated_object(code(sess)); }
+};
+
+} // namespace enco
+
+#endif // __ENCO_TRANSFORM_DUPLICATED_OBJECT_REDUCTION_H__
diff --git a/compiler/enco/core/src/Transforms/FeatureUnification.cpp b/compiler/enco/core/src/Transforms/FeatureUnification.cpp
new file mode 100644
index 000000000..1a7a0a8a4
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/FeatureUnification.cpp
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FeatureUnification.h"
+#include "IRUtils.h"
+
+#include <stdex/Memory.h>
+
+#include <set>
+#include <vector>
+
+#include <cassert>
+
+using stdex::make_unique;
+
+namespace
+{
+
+bool is_static_layout(const coco::FeatureLayout::ID *id)
+{
+ if (id == coco::FeatureLayouts::BHWC::uid())
+ {
+ return true;
+ }
+
+ if (id == coco::FeatureLayouts::BCHW::uid())
+ {
+ return true;
+ }
+
+ return false;
+}
+
+bool is_static_layout(const coco::FeatureLayout *l) { return is_static_layout(l->id()); }
+bool is_static_layout(const coco::FeatureObject *f) { return is_static_layout(f->layout()); }
+
+/**
+ * @brief Return ture if a given 'feature' is the candidate of unification
+ */
+bool candidate(const coco::FeatureObject *f) { return is_static_layout(f); }
+
+/**
+ * @brief Return true if two features are compatible
+ *
+ * Two features are referred to as compatible if these feature are interchangeable.
+ *
+ * NOTE The current implementation of "compatible" is sound, but incomplete.
+ *
+ * Soundness:
+ * For all feature objects "lhs" and "rhs" that "compatible(lhs, rhs)" returns true,
+ * "lhs" and "rhs" are interchangeable.
+ *
+ * Completeness:
+ * For all interchangeable feature objects "lhs" and "rhs", "compatible(lhs, rhs)" returns true.
+ */
+bool compatible(const coco::FeatureObject *lhs, const coco::FeatureObject *rhs)
+{
+ assert(candidate(lhs) && candidate(rhs));
+
+ if (lhs->layout()->id() != rhs->layout()->id())
+ {
+ return false;
+ }
+
+ if (lhs->layout()->batch() != rhs->layout()->batch())
+ {
+ return false;
+ }
+
+ if (!(lhs->layout()->shape() == rhs->layout()->shape()))
+ {
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * @brief A FeatureGroup denotes a group of FeatureObject(s)
+ *
+ * Each FeatureGroup includes at most 1 DEF FeatureObject (a FeatureObject that has a producer),
+ * and may include multiple USE FeatureObject(s) (a FeatureObject that has no producer).
+ *
+ * NOTE FeatureUnification pass internally uses this FeatureGroup to store a group of compatible
+ * FeatureObject(s)
+ */
+class FeatureGroup
+{
+public:
+ explicit FeatureGroup(coco::FeatureObject *feature) { insert(feature); }
+
+public:
+ uint32_t size(void) const { return _uses.size() + (_def ? 1 : 0); }
+
+public:
+ void insert(coco::FeatureObject *feature)
+ {
+ if (feature->def() != nullptr)
+ {
+ assert(_def == nullptr);
+ _def = feature;
+ }
+ else
+ {
+ _uses.insert(feature);
+ }
+ }
+
+public:
+ coco::FeatureObject *parent(void) const
+ {
+ if (_def)
+ {
+ return _def;
+ }
+
+ assert(_uses.size() > 0);
+ return *(_uses.begin());
+ }
+
+public:
+ std::set<coco::FeatureObject *> children(void) const
+ {
+ auto res = _uses;
+ res.erase(parent());
+ return res;
+ }
+
+private:
+ coco::FeatureObject *_def = nullptr;
+ std::set<coco::FeatureObject *> _uses;
+};
+
+} // namespace
+
+namespace enco
+{
+
+void unify_feature(enco::Code *code)
+{
+ auto m = code->module();
+
+ for (uint32_t n = 0; n < m->entity()->bag()->size(); ++n)
+ {
+ std::vector<std::unique_ptr<FeatureGroup>> groups;
+
+ auto assign_group = [&](coco::FeatureObject *feature) {
+ // Find a compatible FeatureGroup
+ FeatureGroup *group = nullptr;
+
+ for (const auto &g : groups)
+ {
+ FeatureGroup *candidate = g.get();
+
+ if (!compatible(candidate->parent(), feature))
+ {
+ continue;
+ }
+
+ group = candidate;
+ break;
+ }
+
+ if (group == nullptr)
+ {
+ // Insert FeatureObject into a new FeatureGroup
+ groups.emplace_back(make_unique<FeatureGroup>(feature));
+ }
+ else
+ {
+ // Insert FeatureObject into the compatible FeatureGroup
+ group->insert(feature);
+ }
+ };
+
+ auto bag = m->entity()->bag()->at(n);
+
+ for (auto o : coco::dependent_objects(bag))
+ {
+ if (auto feature = o->asFeature())
+ {
+ if (candidate(feature))
+ {
+ assign_group(feature);
+ }
+ }
+ }
+
+ for (const auto &g : groups)
+ {
+ auto group = g.get();
+ for (const auto child : group->children())
+ {
+ subst(child, group->parent());
+ assert(child->def() == nullptr);
+ assert(child->uses()->size() == 0);
+ m->entity()->object()->destroy(child);
+ }
+ }
+ }
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Transforms/FeatureUnification.h b/compiler/enco/core/src/Transforms/FeatureUnification.h
new file mode 100644
index 000000000..5ab0f9d7a
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/FeatureUnification.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_TRANSFORM_FEATURE_UNIFICATION_H__
+#define __ENCO_TRANSFORM_FEATURE_UNIFICATION_H__
+
+#include "Code.h"
+#include "Pass.h"
+
+namespace enco
+{
+
+/**
+ * @brief Remove duplicated feature objects inside each bag
+ *
+ * >>> BEFORE <<<
+ * %b = Bag(...)
+ *
+ * %feature_0 = Feature(...) at %b
+ * %feature_1 = Feature(...) at %b
+ *
+ * ...
+ * Use(%feature_0)
+ * ...
+ * Use(%feature_1)
+ * ...
+ *
+ * >>> AFTER <<<
+ * %b = Bag(...)
+ *
+ * %feature_0 = Feature(...) at %b
+ * ~~%feature_1 = Feature(...) at %b~~ <- REMOVED
+ *
+ * ...
+ * Use(%feature_0)
+ * ...
+ * Use(%feature_0)
+ * ...
+ *
+ * Note that all the occurrences of "%feature_1" are replaced with "%feature_0"
+ */
+void unify_feature(enco::Code *code);
+
+struct FeatureUnificationPass final : public Pass
+{
+ PASS_CTOR(FeatureUnificationPass)
+ {
+ // DO NOTHING
+ }
+ void run(const SessionID &sess) const override { unify_feature(code(sess)); }
+};
+
+} // namespace enco
+
+#endif // __ENCO_TRANSFORM_FEATURE_UNIFICATION_H__
diff --git a/compiler/enco/core/src/Transforms/FreeInstrElimination.cpp b/compiler/enco/core/src/Transforms/FreeInstrElimination.cpp
new file mode 100644
index 000000000..a62324b28
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/FreeInstrElimination.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FreeInstrElimination.h"
+
+#include <cassert>
+#include <set>
+
+namespace
+{
+
+/**
+ * @brief Return the set of "free" instructions in a given module
+ */
+std::set<coco::Instr *> free_instrs(const coco::Module *m)
+{
+ std::set<coco::Instr *> res;
+
+ for (uint32_t n = 0; n < m->entity()->instr()->size(); ++n)
+ {
+ if (auto ins = m->entity()->instr()->at(n))
+ {
+ if (ins->parent() == nullptr)
+ {
+ res.insert(ins);
+ }
+ }
+ }
+
+ return res;
+}
+
+void destroy(coco::Instr *ins)
+{
+ auto m = ins->module();
+ m->entity()->instr()->destroy(ins);
+}
+
+} // namespace
+
+namespace enco
+{
+
+void eliminate_free_instr(coco::Module *m)
+{
+ for (auto ins : free_instrs(m))
+ {
+ destroy(ins);
+ }
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Transforms/FreeInstrElimination.h b/compiler/enco/core/src/Transforms/FreeInstrElimination.h
new file mode 100644
index 000000000..1d311cd35
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/FreeInstrElimination.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_TRANSFORM_FREE_INSTR_ELIMINATION_H__
+#define __ENCO_TRANSFORM_FREE_INSTR_ELIMINATION_H__
+
+#include "Code.h"
+#include "Pass.h"
+
+namespace enco
+{
+
+/**
+ * @brief Eliminate free instructions
+ *
+ * An instruction is referred to as "free" if it is not bound to any "block"
+ */
+void eliminate_free_instr(coco::Module *mod);
+
+/**
+ * @brief Eliminate free instructions
+ */
+static inline void eliminate_free_instr(enco::Code *code)
+{
+ // This function is just a wrapper of the above "void eliminate_free_instr(coco::Module *mod)"
+ eliminate_free_instr(code->module());
+}
+
+struct FreeInstrEliminationPass final : public Pass
+{
+ PASS_CTOR(FreeInstrEliminationPass)
+ {
+ // DO NOTHING
+ }
+
+ void run(const SessionID &sess) const override { eliminate_free_instr(code(sess)); }
+};
+
+} // namespace enco
+
+#endif // __ENCO_TRANSFORM_FREE_INSTR_ELIMINATION_H__
diff --git a/compiler/enco/core/src/Transforms/FreeInstrElimination.test.cpp b/compiler/enco/core/src/Transforms/FreeInstrElimination.test.cpp
new file mode 100644
index 000000000..c15f32e7d
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/FreeInstrElimination.test.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FreeInstrElimination.h"
+
+#include <gtest/gtest.h>
+
+TEST(FreeInstrEliminationTest, case_000)
+{
+ auto m = coco::Module::create();
+
+ // Create a "free" Eval instruction
+ m->entity()->instr()->create<coco::Eval>();
+
+ ASSERT_EQ(m->entity()->instr()->size(), 1);
+
+ // Apply "Free Instruction Elimination"
+ enco::eliminate_free_instr(m.get());
+
+ ASSERT_EQ(m->entity()->instr()->size(), 0);
+}
diff --git a/compiler/enco/core/src/Transforms/FreeOpElimination.cpp b/compiler/enco/core/src/Transforms/FreeOpElimination.cpp
new file mode 100644
index 000000000..25f2f44d0
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/FreeOpElimination.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FreeOpElimination.h"
+
+#include <cassert>
+#include <set>
+
+namespace
+{
+
+/**
+ * @brief Return the set of Free Op Elimination candidates
+ */
+std::set<coco::Op *> candidates(const coco::Module *m)
+{
+ std::set<coco::Op *> res;
+
+ for (uint32_t n = 0; n < m->entity()->op()->size(); ++n)
+ {
+ if (auto op = m->entity()->op()->at(n))
+ {
+ if ((op->parent() == nullptr) && (op->up() == nullptr))
+ {
+ res.insert(op);
+ }
+ }
+ }
+
+ return res;
+}
+
+} // namespace
+
+namespace enco
+{
+
+void eliminate_free_op(coco::Module *m)
+{
+ for (auto op : candidates(m))
+ {
+ m->entity()->op()->destroy_all(op);
+ }
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Transforms/FreeOpElimination.h b/compiler/enco/core/src/Transforms/FreeOpElimination.h
new file mode 100644
index 000000000..3aeacada5
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/FreeOpElimination.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_TRANSFORM_FREE_OP_ELIMINATION_H__
+#define __ENCO_TRANSFORM_FREE_OP_ELIMINATION_H__
+
+#include "Code.h"
+#include "Pass.h"
+
+namespace enco
+{
+
+/**
+ * @brief Eliminate free op
+ *
+ * An op is referred to as "free" if it is not bound to any "instruction"
+ */
+void eliminate_free_op(coco::Module *mod);
+
+/**
+ * @brief Eliminate free op
+ */
+static inline void eliminate_free_op(enco::Code *code)
+{
+ // This function is just a wrapper of the above "void eliminate_free_op(coco::Module *mod)"
+ eliminate_free_op(code->module());
+}
+
+struct FreeOpEliminationPass final : public Pass
+{
+ PASS_CTOR(FreeOpEliminationPass)
+ {
+ // DO NOTHING
+ }
+
+ void run(const SessionID &sess) const override { eliminate_free_op(code(sess)); }
+};
+
+} // namespace enco
+
+#endif // __ENCO_TRANSFORM_FREE_OP_ELIMINATION_H__
diff --git a/compiler/enco/core/src/Transforms/FreeOpElimination.test.cpp b/compiler/enco/core/src/Transforms/FreeOpElimination.test.cpp
new file mode 100644
index 000000000..41600526b
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/FreeOpElimination.test.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FreeOpElimination.h"
+
+#include <gtest/gtest.h>
+
+TEST(FreeOpEliminationTest, case_000)
+{
+ auto m = coco::Module::create();
+
+ // Create a "free" Load op
+ m->entity()->op()->create<coco::Load>();
+
+ ASSERT_EQ(m->entity()->op()->size(), 1);
+
+ // Apply "Free Op Elimination"
+ enco::eliminate_free_op(m.get());
+
+ ASSERT_EQ(m->entity()->op()->size(), 0);
+}
diff --git a/compiler/enco/core/src/Transforms/GlobalDataGeneration.cpp b/compiler/enco/core/src/Transforms/GlobalDataGeneration.cpp
new file mode 100644
index 000000000..152477a51
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/GlobalDataGeneration.cpp
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GlobalDataGeneration.h"
+#include "Split.h"
+#include "Dims.h"
+
+#include <stdex/Memory.h>
+
+#include <map>
+
+using stdex::make_unique;
+
+namespace
+{
+
+/**
+ * @brief Manage global variable declarations
+ */
+class Global
+{
+public:
+ Global(std::ostream &os) : _os(os)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /// @brief Create a global constant string (const char *) literal, and return variable name
+ enco::GlobalOffset constant(const std::string &value);
+
+ /// @brief Create a global constant array variable of type T
+ template <typename T> enco::GlobalOffset constant(const std::vector<T> &values);
+
+ /// @brief Create a global constant array variable of byte (uint8_t) type
+ enco::GlobalOffset constant(const uint8_t *base, uint32_t size);
+
+private:
+ uint32_t _offset = 0;
+ std::ostream &_os;
+};
+
+enco::GlobalOffset Global::constant(const std::string &s)
+{
+ auto const base = reinterpret_cast<const uint8_t *>(s.c_str());
+ auto const size = s.size() + 1 /* NUL */;
+ return constant(base, size);
+}
+
+template <> enco::GlobalOffset Global::constant(const std::vector<uint32_t> &values)
+{
+ auto const base = reinterpret_cast<const uint8_t *>(values.data());
+ auto const size = sizeof(uint32_t) * values.size();
+ return constant(base, size);
+}
+
+enco::GlobalOffset Global::constant(const uint8_t *base, uint32_t size)
+{
+ auto pos = _os.tellp();
+ assert(pos != -1);
+
+ _os.write(reinterpret_cast<const char *>(base), size);
+
+ return static_cast<enco::GlobalOffset>(pos);
+}
+
+} // namespace
+
+namespace
+{
+
+std::map<const ann::Operand *, enco::GlobalOffset> data_offset_ctx;
+std::map<const coco::Bag *, enco::GlobalOffset> bag_data_offset_ctx;
+
+std::map<const coco::Arg *, enco::GlobalOffset> name_offset_ctx;
+std::map<const coco::Arg *, enco::GlobalOffset> dims_offset_ctx;
+
+} // namespace
+
+namespace enco
+{
+
+GlobalOffset GlobalData::data_offset(const ann::Operand *o) { return data_offset_ctx.at(o); }
+
+GlobalOffset GlobalData::data_offset(const coco::Bag *bag)
+{
+ assert(bag_data_offset_ctx.find(bag) != bag_data_offset_ctx.end());
+ return bag_data_offset_ctx.at(bag);
+}
+
+GlobalOffset GlobalData::name_offset(const coco::Input *in) { return name_offset_ctx.at(in); }
+GlobalOffset GlobalData::dims_offset(const coco::Input *in) { return dims_offset_ctx.at(in); }
+
+GlobalOffset GlobalData::name_offset(const coco::Output *out) { return name_offset_ctx.at(out); }
+GlobalOffset GlobalData::dims_offset(const coco::Output *out) { return dims_offset_ctx.at(out); }
+
+void generate_global_data(std::ostream &os, enco::Code *code)
+{
+ auto m = code->module();
+ auto d = code->data();
+
+ auto ann_ctx = enco::SubnetManager::context(m);
+
+ auto global = make_unique<Global>(os);
+
+ //
+ // Emit Bag's weight
+ //
+ for (uint32_t n = 0; n < m->entity()->bag()->size(); ++n)
+ {
+ auto bag = m->entity()->bag()->at(n);
+
+ if (!d->allocated(bag))
+ {
+ // Skip if the weight value does not exist for a given bag
+ continue;
+ }
+
+ // NOTE The current implementation assumes that all the values are of float(fp32) type
+ // TODO Support non-float values
+ auto span = d->f32()->weight(bag);
+
+ assert(span.data() != nullptr);
+ assert(span.size() > 0);
+
+ auto const base = reinterpret_cast<const uint8_t *>(span.data());
+ uint32_t const size = span.size() * sizeof(float);
+
+ assert(bag_data_offset_ctx.find(bag) == bag_data_offset_ctx.end());
+ bag_data_offset_ctx[bag] = global->constant(base, size);
+ }
+
+ for (uint32_t n = 0; n < ann_ctx->count(); ++n)
+ {
+ auto binder = ann_ctx->nth(n);
+
+ auto emit = [&](const ann::OperandID & /*id*/, const ann::Operand *info) {
+ if (info->weight())
+ {
+ auto base = info->weight()->base();
+ auto size = info->weight()->size();
+
+ data_offset_ctx[info] = global->constant(base, size);
+ }
+ };
+ binder->module()->operand()->each(emit);
+ }
+
+ for (uint32_t n = 0; n < m->input()->size(); ++n)
+ {
+ auto input = m->input()->at(n);
+ auto dims = as_dims(input->shape());
+
+ name_offset_ctx[input] = global->constant(input->name());
+ dims_offset_ctx[input] = global->constant<uint32_t>(dims);
+ }
+
+ for (uint32_t n = 0; n < m->output()->size(); ++n)
+ {
+ auto output = m->output()->at(n);
+ auto dims = as_dims(output->shape());
+
+ name_offset_ctx[output] = global->constant(output->name());
+ dims_offset_ctx[output] = global->constant<uint32_t>(dims);
+ }
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Transforms/GlobalDataGeneration.h b/compiler/enco/core/src/Transforms/GlobalDataGeneration.h
new file mode 100644
index 000000000..433431401
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/GlobalDataGeneration.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_TRANSFORM_GLOBAL_DATA_GENERATION_H__
+#define __ENCO_TRANSFORM_GLOBAL_DATA_GENERATION_H__
+
+#include "Code.h"
+
+#include <ostream>
+
+namespace enco
+{
+
+using GlobalOffset = uint32_t;
+
+struct GlobalData
+{
+ static GlobalOffset data_offset(const ann::Operand *);
+ /**
+ * @brief Return the weight offset of a given bag
+ *
+ * @note The behavior of "data_offset" is undefined if a bag has no weight.
+ */
+ static GlobalOffset data_offset(const coco::Bag *);
+
+ static GlobalOffset name_offset(const coco::Input *);
+ static GlobalOffset dims_offset(const coco::Input *);
+ static GlobalOffset name_offset(const coco::Output *);
+ static GlobalOffset dims_offset(const coco::Output *);
+};
+
+/**
+ * @brief Generate 'Global' weight array.
+ *
+ * NOTE Succeeding passes can access offsets via "GlobalData"
+ */
+void generate_global_data(std::ostream &, enco::Code *);
+
+} // namespace enco
+
+#endif // __ENCO_TRANSFORM_GLOBAL_DATA_GENERATION_H__
diff --git a/compiler/enco/core/src/Transforms/IdenticalObjectReduction.cpp b/compiler/enco/core/src/Transforms/IdenticalObjectReduction.cpp
new file mode 100644
index 000000000..cb996d2ac
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/IdenticalObjectReduction.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IdenticalObjectReduction.h"
+#include "IRUtils.h"
+
+#include <set>
+
+namespace enco
+{
+
+void reduce_identical_object(enco::Code *code)
+{
+ auto m = code->module();
+
+ std::set<coco::Copy *> detached;
+
+ // Preceding optimizations may generate "free" instructions.
+ // - i.e. an instruction not linked to a block
+ //
+ // Let's iterate over only a sequence of "bounded" instructions.
+ for (auto ins : instr_sequence(m))
+ {
+ assert(ins != nullptr);
+ assert(ins->parent() != nullptr);
+
+ auto copy = ins->asCopy();
+
+ if (copy == nullptr)
+ {
+ // Skip if instruction is not a copy
+ continue;
+ }
+
+ // TODO Support non-Feature Objects
+ auto ifm = copy->from()->asFeature();
+ auto ofm = copy->into()->asFeature();
+
+ assert(ofm->bag() != nullptr);
+
+ if (ifm->layout()->id() != ofm->layout()->id())
+ {
+ continue;
+ }
+
+ if (ifm->layout()->id() != coco::FeatureLayouts::BHWC::uid())
+ {
+ continue;
+ }
+
+ // Skip if this copy produces network output
+ if (ofm->bag()->output())
+ {
+ // TODO Optimize this case
+ //
+ // Note that the code under optimization is of the following form:
+ //
+ // %ifm <- Instr(...)
+ // %ofm <- Copy(%ifm)
+ //
+ // Let's assume that "Copy" is the only reader of %ifm (to be precise, its bag).
+ //
+ // Then, it is possible to rewrite the above fragment as follows:
+ //
+ // %ofm <- Instr(...)
+ //
+ continue;
+ }
+
+ if (ofm->bag()->reads()->size() > 0)
+ {
+ // Let us consider the following code:
+ //
+ // Bag:
+ // %bag_0 = Bag(...)
+ // %bag_1 = Bag(...)
+ // %bag_2 = Bag(...)
+ //
+ // Object:
+ // %obj_0 = FeatureObject(bag: %bag_0)
+ // %obj_1 = FeatureObject(bag: %bag_1)
+ //
+ // Instr:
+ // copy an object from %obj_0 into %obj_1
+ // shuffle values from %bag_1 into %bag_2
+ // eval Conv2D with %obj_1
+ //
+ // Identical Object Reduction (IOR) tries to eliminate the first copy via
+ // substitution (substitute all the occurrence of %obj_1 as use with %obj_0).
+ //
+ // Here is the code transformed by IOR:
+ //
+ // Bag:
+ // %bag_0 = Bag(...)
+ // %bag_1 = Bag(...)
+ // %bag_2 = Bag(...)
+ //
+ // Object:
+ // %obj_0 = FeatureObject(bag: %bag_0)
+ // %obj_1 = FeatureObject(bag: %bag_1)
+ //
+ // Instr:
+ // shuffle values from %bag_1 into %bag_2
+ // eval Conv2D with %obj_0
+ //
+ // Note that there is no updater of %bag_1 after IOR, and thus the behavior
+ // of the first shuffle instruction has changed.
+ //
+ // This examples shows that it is impossible to simply substitute %obj_1
+ // with %obj_0 in the presence of readers over its backing bag.
+ continue;
+ }
+
+ subst(copy->into(), copy->from());
+
+ copy->detach();
+ detached.insert(copy);
+ }
+
+ for (auto copy : detached)
+ {
+ m->entity()->instr()->destroy(copy);
+ }
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Transforms/IdenticalObjectReduction.h b/compiler/enco/core/src/Transforms/IdenticalObjectReduction.h
new file mode 100644
index 000000000..b5bb25d7c
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/IdenticalObjectReduction.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_TRANSFORM_IDENTICAL_OBJECT_REDUCTION_H__
+#define __ENCO_TRANSFORM_IDENTICAL_OBJECT_REDUCTION_H__
+
+#include "Code.h"
+#include "Pass.h"
+
+namespace enco
+{
+
+/**
+ * @brief Reduce identically copied objects as its original object
+ *
+ * >>> BEFORE <<<
+ * %bag_0 = Bag(size: N)
+ * %bag_1 = Bag(size: N)
+ *
+ * %obj_0 = Feature(layout: BHWC) at %bag_0
+ * %obj_1 = Feature(layout: BHWC) at %bag_1
+ *
+ * copy(from: %obj_0, into: %obj_1)
+ * ...
+ * Use(%obj_0)
+ * Use(%obj_1)
+ * ...
+ *
+ * >>> AFTER <<<
+ * %bag_0 = Bag(size: N)
+ * %bag_1 = Bag(size: N)
+ *
+ * %obj_0 = Feature(layout: BHWC) at %bag_0
+ * %obj_1 = Feature(layout: BHWC) at %bag_1
+ *
+ * copy(from: %obj_0, into: %obj_1)
+ * ...
+ * Use(%obj_0)
+ * Use(%obj_0) <- %obj_1 is replaced
+ * ...
+ */
+void reduce_identical_object(enco::Code *code);
+
+struct IdenticalObjectReductionPass final : public Pass
+{
+ PASS_CTOR(IdenticalObjectReductionPass)
+ {
+ // DO NOTHING
+ }
+
+ void run(const SessionID &sess) const override { reduce_identical_object(code(sess)); }
+};
+
+} // namespace enco
+
+#endif // __ENCO_TRANSFORM_IDENTICAL_OBJECT_REDUCTION_H__
diff --git a/compiler/enco/core/src/Transforms/IdenticalObjectReduction.test.cpp b/compiler/enco/core/src/Transforms/IdenticalObjectReduction.test.cpp
new file mode 100644
index 000000000..772bea08e
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/IdenticalObjectReduction.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IdenticalObjectReduction.h"
+
+#include <gtest/gtest.h>
+
+TEST(IdenticalObjectReductionTest, case_000)
+{
+ auto m = coco::Module::create();
+
+ // Create a "free" Eval instruction
+ m->entity()->instr()->create<coco::Eval>();
+
+ enco::Code code{m.get(), nullptr};
+
+ // NOTE This code SHOULD NOT crash
+ enco::reduce_identical_object(&code);
+}
diff --git a/compiler/enco/core/src/Transforms/IndirectCopyElimination.cpp b/compiler/enco/core/src/Transforms/IndirectCopyElimination.cpp
new file mode 100644
index 000000000..b36620f61
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/IndirectCopyElimination.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IndirectCopyElimination.h"
+
+#include <cassert>
+
+namespace
+{
+
+coco::Copy *as_copy(coco::Instr *ins) { return ins ? ins->asCopy() : nullptr; }
+
+/**
+ * @brief Return a set of copy instructions that are accessible from top-level module
+ */
+std::set<coco::Copy *> linked_copy_instrs(coco::Module *m)
+{
+ std::set<coco::Copy *> res;
+
+ for (uint32_t n = 0; n < m->entity()->instr()->size(); ++n)
+ {
+ auto ins = m->entity()->instr()->at(n);
+ assert(ins != nullptr);
+
+ if (ins->parent() && ins->parent()->parent())
+ {
+ if (auto copy = ins->asCopy())
+ {
+ res.insert(copy);
+ }
+ }
+ }
+
+ return res;
+}
+
+} // namespace
+
+namespace enco
+{
+
+void eliminate_indirect_copy(enco::Code *code)
+{
+ auto m = code->module();
+
+ for (auto child : linked_copy_instrs(m))
+ {
+ auto from = child->from();
+ assert(from != nullptr);
+
+ // Find the irreducible origin
+ while (true)
+ {
+ if (auto producer = coco::producer(from))
+ {
+ if (auto parent = as_copy(producer->loc()))
+ {
+ assert(parent->from() != nullptr);
+ from = parent->from();
+ continue;
+ }
+ }
+
+ break;
+ }
+
+ child->from(from);
+ }
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Transforms/IndirectCopyElimination.h b/compiler/enco/core/src/Transforms/IndirectCopyElimination.h
new file mode 100644
index 000000000..acfdf569b
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/IndirectCopyElimination.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_TRANSFORM_INDIRECT_COPY_ELIMINATION_H__
+#define __ENCO_TRANSFORM_INDIRECT_COPY_ELIMINATION_H__
+
+#include "Code.h"
+#include "Pass.h"
+
+namespace enco
+{
+
+/**
+ * @brief Convert all the indirect copies as a direct copy
+ *
+ * >>> BEFORE <<<
+ * %obj_0 = ...
+ * %obj_1 = ...
+ * %obj_2 = ...
+ *
+ * copy(from: %obj_0, into: %obj_1)
+ * copy(from: %obj_1, into: %obj_2)
+ *
+ * >>> AFTER <<<
+ * %obj_0 = ...
+ * %obj_1 = ...
+ * %obj_2 = ...
+ *
+ * copy(from: %obj_0, into: %obj_1)
+ * copy(from: %obj_0, into: %obj_2)
+ *
+ */
+void eliminate_indirect_copy(enco::Code *code);
+
+struct IndirectCopyEliminationPass final : public enco::Pass
+{
+ PASS_CTOR(IndirectCopyEliminationPass)
+ {
+ // DO NOTHING
+ }
+
+ void run(const SessionID &sess) const override { eliminate_indirect_copy(code(sess)); }
+};
+
+} // namespace enco
+
+#endif // __ENCO_TRANSFORM_INDIRECT_COPY_ELIMINATION_H__
diff --git a/compiler/enco/core/src/Transforms/IntrinsicSelection.cpp b/compiler/enco/core/src/Transforms/IntrinsicSelection.cpp
new file mode 100644
index 000000000..7bf1c4926
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/IntrinsicSelection.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IntrinsicSelection.h"
+
+#include "coex/IR.h"
+
+namespace
+{
+
+/**
+ * @brief Return a backend-speicific coco (extend) instruction
+ *
+ * @note rewrite(ins) returns nullptr if selection fails
+ */
+coco::Instr *rewrite(coco::Instr *curr)
+{
+ auto m = curr->module();
+ assert(m != nullptr);
+
+ if (auto eval = coco::safe_cast<coco::Eval>(curr))
+ {
+ if (auto concat_f = eval->op()->asConcatF())
+ {
+ auto fst_load = concat_f->left()->asLoad();
+ auto snd_load = concat_f->right()->asLoad();
+
+ if (fst_load && snd_load && (concat_f->axis() == coco::ConcatF::Axis::Depth))
+ {
+ // Here is the pattern of interest
+ //
+ // %ofm = eval(ConcatF(Depth, Load(%left), Load(%right)))
+ //
+ auto fst_feature = fst_load->object()->asFeature();
+ auto snd_feature = snd_load->object()->asFeature();
+ assert((fst_feature != nullptr) && (snd_feature != nullptr));
+
+ auto out_feature = eval->out()->asFeature();
+ assert(out_feature != nullptr);
+
+ eval->out(nullptr);
+
+ auto depth_concat = m->entity()->instr()->create<ANNDepthConcatF>();
+
+ depth_concat->out(out_feature);
+ depth_concat->fst(fst_feature);
+ depth_concat->snd(snd_feature);
+
+ return depth_concat;
+ }
+
+ return nullptr;
+ }
+ }
+
+ return nullptr;
+}
+
+} // namespace
+
+namespace enco
+{
+
+void select_intrinsic(enco::Code *code)
+{
+ auto m = code->module();
+
+ for (auto blk = m->block()->head(); blk; blk = blk->next())
+ {
+ auto ins = blk->instr()->head();
+
+ while (ins)
+ {
+ if (auto rewritten_ins = rewrite(ins))
+ {
+ rewritten_ins->insertBefore(ins);
+ ins->detach();
+
+ ins = rewritten_ins;
+ }
+
+ ins = ins->next();
+ }
+ }
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Transforms/IntrinsicSelection.h b/compiler/enco/core/src/Transforms/IntrinsicSelection.h
new file mode 100644
index 000000000..67d38eaeb
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/IntrinsicSelection.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTRINSIC_SELECTION_H__
+#define __INTRINSIC_SELECTION_H__
+
+#include "Code.h"
+#include "Pass.h"
+
+namespace enco
+{
+
+/**
+ * @brief Select Intricsic (API) to be used
+ *
+ * This pass is analogue of "Instruction Selection" pass. This "Intrisic Selection" pass
+ * will replace a general coco IR instruction into a backend-specific coco (extended) IR
+ * instruction.
+ */
+void select_intrinsic(enco::Code *);
+
+struct IntrinsicSelectionPass final : public Pass
+{
+ PASS_CTOR(IntrinsicSelectionPass)
+ {
+ // DO NOTHING
+ }
+
+ void run(const SessionID &sess) const override { select_intrinsic(code(sess)); }
+};
+
+} // namespace enco
+
+#endif // __INTRINSIC_SELECTION_H__
diff --git a/compiler/enco/core/src/Transforms/Optimizations.cpp b/compiler/enco/core/src/Transforms/Optimizations.cpp
new file mode 100644
index 000000000..7f0974dd0
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/Optimizations.cpp
@@ -0,0 +1,257 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Optimizations.h"
+#include "CodeIndex.h"
+
+#include <cassert>
+
+namespace enco
+{
+
+void generate_bypass_shuffle(enco::Code *code)
+{
+ auto m = code->module();
+
+ for (uint32_t n = 0; n < m->entity()->bag()->size(); ++n)
+ {
+ auto bag = m->entity()->bag()->at(n);
+
+ // NOTE The current implementation assumes that all the updates occurs before the first read
+ // TODO Remove this assumption
+ for (auto u : coco::updaters(bag))
+ {
+ if ((u->loc() == nullptr) || (u->loc()->asShuffle() == nullptr))
+ {
+ // Skip if updater is not a Shuffle instruction
+ continue;
+ }
+
+ for (auto r : coco::readers(bag))
+ {
+ if ((r->loc() == nullptr) || (r->loc()->asShuffle() == nullptr))
+ {
+ // Skip if reader is not a Shuffle instruction
+ continue;
+ }
+
+ auto shuffle_1 = u->loc()->asShuffle();
+ auto shuffle_2 = r->loc()->asShuffle();
+
+ // Construct a shuffle instruction
+ auto shuffle_3 = m->entity()->instr()->create<coco::Shuffle>();
+
+ shuffle_3->from(shuffle_1->from());
+ shuffle_3->into(shuffle_2->into());
+
+ // Attempt to construct a valid bypass shuffle instruction
+ bool valid = true;
+
+ for (const auto &C : shuffle_2->range())
+ {
+ auto B = shuffle_2->at(C);
+
+ if (!shuffle_1->defined(B))
+ {
+ valid = false;
+ break;
+ }
+
+ auto A = shuffle_1->at(B);
+
+ shuffle_3->insert(A, C);
+ }
+
+ if (valid)
+ {
+ // Insert shuffle_3 before shuffle_2 if shuffle_3 is a valid bypass of shuffle_2
+ shuffle_3->insertBefore(shuffle_2);
+
+ // NOTE shuffle_2 SHOULD BE detached and destroyed after shuffle_3 is inserted
+ shuffle_2->detach();
+ m->entity()->instr()->destroy(shuffle_2);
+ }
+ else
+ {
+ // Destroy shuffle_3 (bypass shuffle) if it is invalid
+ m->entity()->instr()->destroy(shuffle_3);
+ }
+ }
+ }
+ }
+}
+
+} // namespace enco
+
+//
+// Hoist Object
+//
+namespace
+{
+
+bool hoistable(const coco::Shuffle *shuffle)
+{
+ auto range = shuffle->range();
+
+ if (range.size() != shuffle->into()->size())
+ {
+ return false;
+ }
+
+ for (const auto &dst : range)
+ {
+ if (shuffle->at(dst).value() != dst.value())
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool complete(const coco::Shuffle *s) { return s->range().size() == s->into()->size(); }
+
+bool compatible(const coco::Shuffle *s1, const coco::Shuffle *s2)
+{
+ if (s1->from() != s2->from())
+ {
+ return false;
+ }
+
+ if (s1->into()->size() != s2->into()->size())
+ {
+ return false;
+ }
+
+ auto range_1 = s1->range();
+ auto range_2 = s2->range();
+
+ if (range_1.size() != range_2.size())
+ {
+ return false;
+ }
+
+ bool res = true;
+
+ for (const auto &dst : range_2)
+ {
+ if (!s1->defined(dst))
+ {
+ res = false;
+ break;
+ }
+
+ auto src_1 = s1->at(dst);
+ auto src_2 = s2->at(dst);
+
+ if (src_1.value() != src_2.value())
+ {
+ res = false;
+ break;
+ }
+ }
+
+ return res;
+}
+
+} // namespace
+
+namespace enco
+{
+
+void hoist_object(enco::Code *code)
+{
+ auto m = code->module();
+
+ //
+ // Case 1
+ //
+ for (uint32_t n = 0; n < m->entity()->instr()->size(); ++n)
+ {
+ if (auto shuffle = m->entity()->instr()->at(n)->asShuffle())
+ {
+ if (shuffle->parent() == nullptr)
+ {
+ continue;
+ }
+
+ if (hoistable(shuffle))
+ {
+ auto from = shuffle->from();
+ auto into = shuffle->into();
+
+ into->replaceAllDepsWith(from);
+ }
+ }
+ }
+
+ //
+ // Case 2
+ //
+ for (uint32_t n = 0; n < m->entity()->bag()->size(); ++n)
+ {
+ auto bag = m->entity()->bag()->at(n);
+
+ std::map<CodeIndex, coco::Shuffle *> collected;
+
+ for (auto reader : coco::readers(bag))
+ {
+ if (auto ins = reader->loc())
+ {
+ if (auto shuffle = ins->asShuffle())
+ {
+ collected[code_index(shuffle)] = shuffle;
+ }
+ }
+ }
+
+ std::vector<coco::Shuffle *> sorted;
+
+ for (auto it = collected.begin(); it != collected.end(); ++it)
+ {
+ sorted.emplace_back(it->second);
+ }
+
+ for (uint32_t curr = 0; curr < sorted.size(); ++curr)
+ {
+ auto const curr_ins = sorted.at(curr);
+ auto const curr_bag = curr_ins->into();
+
+ if (!complete(curr_ins))
+ {
+ continue;
+ }
+
+ for (uint32_t next = curr + 1; next < sorted.size(); ++next)
+ {
+ auto const next_ins = sorted.at(next);
+ auto const next_bag = next_ins->into();
+
+ if (!complete(next_ins))
+ {
+ continue;
+ }
+
+ if (compatible(curr_ins, next_ins))
+ {
+ next_bag->replaceAllDepsWith(curr_bag);
+ }
+ }
+ }
+ }
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Transforms/Optimizations.h b/compiler/enco/core/src/Transforms/Optimizations.h
new file mode 100644
index 000000000..7cfc2305c
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/Optimizations.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_OPTIMIZATIONS_H__
+#define __ENCO_OPTIMIZATIONS_H__
+
+#include "Code.h"
+#include "Pass.h"
+
+namespace enco
+{
+
+/**
+ * @brief Add a bypass Shuffle if two continued Shuffles map same from-into
+ *
+ * %bag_1 = Bag(size: N)
+ * %bag_2 = Bag(size: N)
+ * %bag_3 = Bag(size: N)
+ *
+ * >>> BEFORE <<<
+ * Shuffle(from: %bag_1, into: %bag_2, [0 -> 0])
+ * Shuffle(from: %bag_2, into: %bag_3, [0 -> 0])
+ *
+ * Let's refer to the former shuffle as Shuffle 1 and the latter one as Shuffle 2.
+ * We can replace Shuffle 2 with new Shuffle 3 as follows when Shuffle 1 and
+ * Shuffle 2 map to the same position.
+ *
+ * >>> AFTER <<<
+ * Shuffle(from: %bag_1, into: %bag_2, [0 -> 0]) <- Shuffle 1
+ * Shuffle(from: %bag_1, into: %bag_3, [0 -> 0]) <- Shuffle 3
+ *
+ * Note that Shuffle 1 can be eliminated when %bag_2 is not used
+ */
+void generate_bypass_shuffle(enco::Code *code);
+
+struct BypassGenerationPass final : public Pass
+{
+ PASS_CTOR(BypassGenerationPass)
+ {
+ // DO NOTHING
+ }
+
+ void run(const SessionID &sess) const override { generate_bypass_shuffle(code(sess)); }
+};
+
+/**
+ * @brief Update the base bag of each object if possible
+ *
+ * --- Case 1 ---
+ * Let us consider the following code:
+ *
+ * %bag_1 = Bag(size: 4)
+ * %bag_2 = Bag(size: 1)
+ *
+ * %obj_1 = ... at %bag_1
+ * %obj_2 = ... at %bag_2
+ *
+ * ...
+ * Shuffle(from: %bag_1, into: %bag_2, [0 -> 0]) <- shuffle
+ * ...
+ *
+ * Note that the content of %bag_2 after shuffle is identical to a part of %bag_1, so
+ * the following code is identical to the above code
+ *
+ * %bag_1 = Bag(size: 4)
+ * %bag_2 = Bag(size: 1)
+ *
+ * %obj_1 = ... at %bag_1
+ * %obj_2 = ... at %bag_1
+ *
+ * ...
+ * Shuffle(from: %bag_1, into: %bag_2, [0 -> 0])
+ * ...
+ *
+ * --- Case 2 ---
+ * Let us consider the following code:
+ *
+ * %bag_1 = Bag(size: 4)
+ * %bag_2 = Bag(size: 1)
+ * %bag_3 = Bag(size: 1)
+ *
+ * %obj_1 = ... at %bag_2
+ * %obj_2 = ... at %bag_3
+ *
+ * Shuffle(from: %bag_1, into: %bag_2, [0 -> 0]) <- shuffle_1
+ * Shuffle(from: %bag_1, into: %bag_3, [0 -> 0]) <- shuffle_2
+ *
+ * Note that the content of %bag_3 after shuffle_2 is identical to that of %bag_2 after shuffle_1,
+ * so the following code is identical to the above one:
+ *
+ * %bag_1 = Bag(size: 4)
+ * %bag_2 = Bag(size: 1)
+ * %bag_3 = Bag(size: 1)
+ *
+ * %obj_1 = ... at %bag_2
+ * %obj_2 = ... at %bag_2 <- HERE
+ *
+ * Shuffle(from: %bag_1, into: %bag_2, [0 -> 0]) <- shuffle_1
+ * Shuffle(from: %bag_1, into: %bag_3, [0 -> 0]) <- shuffle_2
+ *
+ * "hoist_object" optimization rewrites the former code as the latter one.
+ *
+ * NOTE "hoist_object" DOES NOT change any instruction. It just updates the base bag of objects of
+ * interest.
+ */
+void hoist_object(enco::Code *code);
+
+} // namespace enco
+
+#endif // __ENCO_OPTIMIZATIONS_H__
diff --git a/compiler/enco/core/src/Transforms/Split.cpp b/compiler/enco/core/src/Transforms/Split.cpp
new file mode 100644
index 000000000..b57b8f882
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/Split.cpp
@@ -0,0 +1,1233 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Split.h"
+#include "Usage.h"
+#include "Session.h"
+#include "coex/IR.h"
+
+#include <coco/IR.h>
+
+#include <nncc/core/ADT/kernel/NHWCLayout.h>
+#include <stdex/Memory.h>
+
+#include <map>
+#include <stdexcept>
+#include <functional>
+
+using stdex::make_unique;
+
+namespace
+{
+
+std::map<const coco::Module *, std::unique_ptr<ANNContext>> _subnet_contexts;
+
+} // namespace
+
+namespace enco
+{
+
+const ANNContext *SubnetManager::context(const coco::Module *m)
+{
+ return _subnet_contexts.at(m).get();
+}
+
+} // namespace enco
+
+namespace
+{
+
+using Appender = std::function<void(ANNBinder *binder)>;
+
+struct ANNOpAppender
+{
+ virtual ~ANNOpAppender() = default;
+
+ virtual void append(ANNBinder *binder) const = 0;
+};
+
+class ANNAddAppender final : public ANNOpAppender
+{
+public:
+ void left(coco::FeatureObject *o) { _left = o; }
+ void right(coco::FeatureObject *o) { _right = o; }
+ void out(coco::FeatureObject *o) { _out = o; }
+
+public:
+ void append(ANNBinder *binder) const override
+ {
+ auto left = binder->addOperand<float>(_left);
+ auto right = binder->addOperand<float>(_right);
+ auto fuse = binder->addOperand<int32_t>();
+ binder->setOperand(fuse, 0);
+
+ auto out = binder->addOperand<float>(_out);
+
+ binder->addOperation(ann::Operation::Code::ADD, {left, right, fuse}, {out});
+ }
+
+private:
+ coco::FeatureObject *_left = nullptr;
+ coco::FeatureObject *_right = nullptr;
+ coco::FeatureObject *_out = nullptr;
+};
+
+class ANNMulAppender final : public ANNOpAppender
+{
+public:
+ void left(coco::FeatureObject *o) { _left = o; }
+ void right(coco::FeatureObject *o) { _right = o; }
+ void out(coco::FeatureObject *o) { _out = o; }
+
+public:
+ void append(ANNBinder *binder) const override
+ {
+ auto left = binder->addOperand<float>(_left);
+ auto right = binder->addOperand<float>(_right);
+ auto fuse = binder->addOperand<int32_t>();
+ binder->setOperand(fuse, 0);
+
+ auto out = binder->addOperand<float>(_out);
+
+ binder->addOperation(ann::Operation::Code::MUL, {left, right, fuse}, {out});
+ }
+
+private:
+ coco::FeatureObject *_left = nullptr;
+ coco::FeatureObject *_right = nullptr;
+ coco::FeatureObject *_out = nullptr;
+};
+
+/**
+ * WARN The current implementation supports concatenation along depth only
+ */
+class ANNConcatAppender final : public ANNOpAppender
+{
+public:
+ void left(coco::FeatureObject *o) { _left = o; }
+ void right(coco::FeatureObject *o) { _right = o; }
+ void out(coco::FeatureObject *o) { _out = o; }
+
+public:
+ void append(ANNBinder *binder) const override
+ {
+ auto left = binder->addOperand<float>(_left);
+ auto right = binder->addOperand<float>(_right);
+ auto axis = binder->addOperand<int32_t>();
+ binder->setOperand(axis, 3 /* DEPTH */);
+
+ auto out = binder->addOperand<float>(_out);
+
+ binder->addOperation(ann::Operation::Code::CONCAT, {left, right, axis}, {out});
+ }
+
+private:
+ coco::FeatureObject *_left = nullptr;
+ coco::FeatureObject *_right = nullptr;
+ coco::FeatureObject *_out = nullptr;
+};
+
+class ANNConv2DAppender final : public ANNOpAppender
+{
+public:
+ void session(const enco::SessionID &sess) { _sess = sess; }
+
+ void pad(const coco::Padding2D *pad) { _pad = *pad; }
+ void stride(const coco::Stride2D *stride) { _stride = *stride; }
+
+ void ifm(coco::FeatureObject *ifm) { _ifm = ifm; }
+ void ker(coco::KernelObject *ker) { _ker = ker; }
+ // Q: Should we take a bias as a feature object?
+ // NOTE This interface is subject to change
+ void bias(coco::FeatureObject *bias) { _bias = bias; }
+ void ofm(coco::FeatureObject *ofm) { _ofm = ofm; }
+
+public:
+ void append(ANNBinder *binder) const override
+ {
+ auto data = enco::data(_sess);
+
+ auto ifm = binder->addOperand<float>(_ifm);
+ auto ker = binder->addOperand<float>(_ker);
+
+ // Fill kernel data
+ {
+ auto ker_bag = _ker->bag();
+ auto ker_weight = data->f32()->weight(ker_bag);
+
+ assert(ker_weight.data() != nullptr);
+
+ binder->setOperand(ker, ker_weight.data(), ker_weight.data() + ker_weight.size());
+ }
+
+ // Conv2D in coco IR has no bias, but bias is mandatory in Android NN API
+ auto bias = binder->addOperand<float>(nncc::core::ADT::tensor::Shape{_ker->shape().count()});
+
+ // Fill bias data
+ if (_bias == nullptr)
+ {
+ // Use a fresh empty bias if "bias" is not specified
+ auto length = _ker->shape().count();
+
+ std::vector<float> values;
+ values.resize(length, 0.0f);
+
+ binder->setOperand(bias, values.begin(), values.end());
+ }
+ else
+ {
+ // Use specified "bias"
+ auto bias_bag = _bias->bag();
+ auto bias_weight = data->f32()->weight(bias_bag);
+
+ assert(bias_weight.data() != nullptr);
+ assert(bias_weight.size() == _ker->shape().count());
+
+ binder->setOperand(bias, bias_weight.data(), bias_weight.data() + bias_weight.size());
+ }
+
+ auto left = binder->addOperand<int32_t>();
+ binder->setOperand(left, _pad.left());
+ auto right = binder->addOperand<int32_t>();
+ binder->setOperand(right, _pad.right());
+ auto top = binder->addOperand<int32_t>();
+ binder->setOperand(top, _pad.top());
+ auto bottom = binder->addOperand<int32_t>();
+ binder->setOperand(bottom, _pad.bottom());
+ auto hstride = binder->addOperand<int32_t>();
+ binder->setOperand(hstride, _stride.horizontal());
+ auto vstride = binder->addOperand<int32_t>();
+ binder->setOperand(vstride, _stride.vertical());
+ auto fuse = binder->addOperand<int32_t>();
+ binder->setOperand(fuse, 0);
+
+ auto ofm = binder->addOperand<float>(_ofm);
+
+ binder->addOperation(ann::Operation::Code::CONV_2D,
+ {ifm, ker, bias, left, right, top, bottom, hstride, vstride, fuse}, {ofm});
+ }
+
+private:
+ enco::SessionID _sess;
+
+private:
+ coco::Padding2D _pad;
+ coco::Stride2D _stride;
+
+private:
+ coco::FeatureObject *_ifm = nullptr;
+ coco::KernelObject *_ker = nullptr;
+ coco::FeatureObject *_bias = nullptr;
+ coco::FeatureObject *_ofm = nullptr;
+};
+
+class ANNDepthwiseConv2DAppender final : public ANNOpAppender
+{
+public:
+ void session(const enco::SessionID &sess) { _sess = sess; }
+
+ void multiplier(const uint32_t &multiplier) { _multiplier = multiplier; }
+ void pad(const coco::Padding2D *pad) { _pad = *pad; }
+ void stride(const coco::Stride2D *stride) { _stride = *stride; }
+
+ void ifm(coco::FeatureObject *ifm) { _ifm = ifm; }
+ void ker(coco::KernelObject *ker) { _ker = ker; }
+ void ofm(coco::FeatureObject *ofm) { _ofm = ofm; }
+
+public:
+ void append(ANNBinder *binder) const override
+ {
+ using namespace nncc::core::ADT;
+
+ auto data = enco::data(_sess);
+
+ const uint32_t ker_N = _ker->shape().count();
+ const uint32_t ker_H = _ker->shape().height();
+ const uint32_t ker_W = _ker->shape().width();
+
+ assert(ker_N % _multiplier == 0);
+ const uint32_t group = ker_N / _multiplier;
+
+ auto ifm = binder->addOperand<float>(_ifm);
+ auto ker = binder->addOperand<float>(tensor::Shape{1, ker_H, ker_W, ker_N});
+
+ // Fill kernel data
+ {
+ auto obj = _ker;
+ auto shape = obj->shape();
+
+ auto ovl = data->f32()->read(obj);
+ assert(ovl != nullptr);
+
+ // Flatten?
+ std::vector<float> values;
+
+ /**
+ * Android NN computes DEPTHWISE_CONV_2D as follows:
+ *
+ * output[b, i, j, k * channel_multiplier + q] =
+ * sum_{di, dj} (
+ * input[b, strides[1] * i + di, strides[2] * j + dj, k] *
+ * filter[1, di, dj, k * channel_multiplier + q]
+ * ) + bias[k * channel_multiplier + q]
+ *
+ */
+ for (uint32_t row = 0; row < shape.height(); ++row)
+ {
+ for (uint32_t col = 0; col < shape.width(); ++col)
+ {
+ for (uint32_t g = 0; g < group; ++g)
+ {
+ for (uint32_t m = 0; m < _multiplier; ++m)
+ {
+ const auto value = ovl->at(g * _multiplier + m, 0, row, col);
+ values.emplace_back(value);
+ }
+ }
+ }
+ }
+
+ assert(values.size() == nncc::core::ADT::kernel::num_elements(shape));
+ binder->setOperand(ker, values.begin(), values.end());
+ }
+
+ // Conv2D in coco IR has no bias, but bias is mandatory in Android NN API
+ auto bias = binder->addOperand<float>(nncc::core::ADT::tensor::Shape{_ker->shape().count()});
+
+ // Fill bias data
+ {
+ auto length = _ker->shape().count();
+
+ std::vector<float> values;
+ values.resize(length, 0.0f);
+
+ binder->setOperand(bias, values.begin(), values.end());
+ }
+
+ auto left = binder->addOperand<int32_t>();
+ binder->setOperand(left, _pad.left());
+ auto right = binder->addOperand<int32_t>();
+ binder->setOperand(right, _pad.right());
+ auto top = binder->addOperand<int32_t>();
+ binder->setOperand(top, _pad.top());
+ auto bottom = binder->addOperand<int32_t>();
+ binder->setOperand(bottom, _pad.bottom());
+ auto hstride = binder->addOperand<int32_t>();
+ binder->setOperand(hstride, _stride.horizontal());
+ auto vstride = binder->addOperand<int32_t>();
+ binder->setOperand(vstride, _stride.vertical());
+ auto multiplier = binder->addOperand<int32_t>();
+ binder->setOperand(multiplier, _multiplier);
+ auto fuse = binder->addOperand<int32_t>();
+ binder->setOperand(fuse, 0);
+
+ auto ofm = binder->addOperand<float>(_ofm);
+
+ binder->addOperation(
+ ann::Operation::Code::DEPTHWISE_CONV_2D,
+ {ifm, ker, bias, left, right, top, bottom, hstride, vstride, multiplier, fuse}, {ofm});
+ }
+
+private:
+ enco::SessionID _sess;
+
+private:
+ uint32_t _multiplier;
+ coco::Padding2D _pad;
+ coco::Stride2D _stride;
+
+private:
+ coco::FeatureObject *_ifm = nullptr;
+ coco::KernelObject *_ker = nullptr;
+ coco::FeatureObject *_ofm = nullptr;
+};
+
+class ANNReLUAppender final : public ANNOpAppender
+{
+public:
+ void ifm(coco::FeatureObject *ifm) { _ifm = ifm; }
+ void ofm(coco::FeatureObject *ofm) { _ofm = ofm; }
+
+public:
+ void append(ANNBinder *binder) const override
+ {
+ auto ifm = binder->addOperand<float>(_ifm);
+ auto ofm = binder->addOperand<float>(_ofm);
+
+ binder->addOperation(ann::Operation::Code::RELU, {ifm}, {ofm});
+ }
+
+private:
+ coco::FeatureObject *_ifm = nullptr;
+ coco::FeatureObject *_ofm = nullptr;
+};
+
+class ANNReLU6Appender final : public ANNOpAppender
+{
+public:
+ void ifm(coco::FeatureObject *ifm) { _ifm = ifm; }
+ void ofm(coco::FeatureObject *ofm) { _ofm = ofm; }
+
+public:
+ void append(ANNBinder *binder) const override
+ {
+ auto ifm = binder->addOperand<float>(_ifm);
+ auto ofm = binder->addOperand<float>(_ofm);
+
+ binder->addOperation(ann::Operation::Code::RELU6, {ifm}, {ofm});
+ }
+
+private:
+ coco::FeatureObject *_ifm = nullptr;
+ coco::FeatureObject *_ofm = nullptr;
+};
+
+class ANNMaxPool2DAppender final : public ANNOpAppender
+{
+public:
+ void pad(const coco::Padding2D *pad) { _pad = *pad; }
+ void stride(const coco::Stride2D *stride) { _stride = *stride; }
+ void window(const coco::Window2D *window) { _window = *window; }
+
+ void ifm(coco::FeatureObject *ifm) { _ifm = ifm; }
+ void ofm(coco::FeatureObject *ofm) { _ofm = ofm; }
+
+public:
+ void append(ANNBinder *binder) const override
+ {
+ auto ifm = binder->addOperand<float>(_ifm);
+
+ // Set padding
+ auto left = binder->addOperand<int32_t>();
+ binder->setOperand(left, _pad.left());
+ auto right = binder->addOperand<int32_t>();
+ binder->setOperand(right, _pad.right());
+ auto top = binder->addOperand<int32_t>();
+ binder->setOperand(top, _pad.top());
+ auto bottom = binder->addOperand<int32_t>();
+ binder->setOperand(bottom, _pad.bottom());
+
+ // Set horizontal/vertical stride
+ auto hstride = binder->addOperand<int32_t>();
+ binder->setOperand(hstride, _stride.horizontal());
+ auto vstride = binder->addOperand<int32_t>();
+ binder->setOperand(vstride, _stride.vertical());
+
+ // Set receptive field size
+ auto width = binder->addOperand<int32_t>();
+ binder->setOperand(width, _window.width());
+ auto height = binder->addOperand<int32_t>();
+ binder->setOperand(height, _window.height());
+
+ // Set fuse code
+ // TODO Suport operation fusion
+ auto fuse = binder->addOperand<int32_t>();
+ binder->setOperand(fuse, 0);
+
+ auto ofm = binder->addOperand<float>(_ofm);
+
+ binder->addOperation(ann::Operation::Code::MAX_POOL_2D,
+ {ifm, left, right, top, bottom, hstride, vstride, width, height, fuse},
+ {ofm});
+ }
+
+private:
+ coco::Padding2D _pad;
+ coco::Stride2D _stride;
+ coco::Window2D _window;
+
+private:
+ coco::FeatureObject *_ifm = nullptr;
+ coco::FeatureObject *_ofm = nullptr;
+};
+
+class ANNAvgPool2DAppender final : public ANNOpAppender
+{
+public:
+ void pad(const coco::Padding2D *pad) { _pad = *pad; }
+ void stride(const coco::Stride2D *stride) { _stride = *stride; }
+ void window(const coco::Window2D *window) { _window = *window; }
+
+ void ifm(coco::FeatureObject *ifm) { _ifm = ifm; }
+ void ofm(coco::FeatureObject *ofm) { _ofm = ofm; }
+
+public:
+ void append(ANNBinder *binder) const override
+ {
+ auto ifm = binder->addOperand<float>(_ifm);
+
+ // Set padding
+ auto left = binder->addOperand<int32_t>();
+ binder->setOperand(left, _pad.left());
+ auto right = binder->addOperand<int32_t>();
+ binder->setOperand(right, _pad.right());
+ auto top = binder->addOperand<int32_t>();
+ binder->setOperand(top, _pad.top());
+ auto bottom = binder->addOperand<int32_t>();
+ binder->setOperand(bottom, _pad.bottom());
+
+ // Set horizontal/vertical stride
+ auto hstride = binder->addOperand<int32_t>();
+ binder->setOperand(hstride, _stride.horizontal());
+ auto vstride = binder->addOperand<int32_t>();
+ binder->setOperand(vstride, _stride.vertical());
+
+ // Set receptive field size
+ auto width = binder->addOperand<int32_t>();
+ binder->setOperand(width, _window.width());
+ auto height = binder->addOperand<int32_t>();
+ binder->setOperand(height, _window.height());
+
+ // Set fuse code
+ // TODO Suport operation fusion
+ auto fuse = binder->addOperand<int32_t>();
+ binder->setOperand(fuse, 0);
+
+ auto ofm = binder->addOperand<float>(_ofm);
+
+ binder->addOperation(ann::Operation::Code::AVG_POOL_2D,
+ {ifm, left, right, top, bottom, hstride, vstride, width, height, fuse},
+ {ofm});
+ }
+
+private:
+ coco::Padding2D _pad;
+ coco::Stride2D _stride;
+ coco::Window2D _window;
+
+private:
+ coco::FeatureObject *_ifm = nullptr;
+ coco::FeatureObject *_ofm = nullptr;
+};
+
+class ANNPadFAppender final : public ANNOpAppender
+{
+public:
+ void pad(const coco::Padding2D *pad) { _pad = *pad; }
+
+public:
+ void ifm(coco::FeatureObject *ifm) { _ifm = ifm; }
+ void ofm(coco::FeatureObject *ofm) { _ofm = ofm; }
+
+public:
+ void append(ANNBinder *binder) const override
+ {
+ using nncc::core::ADT::tensor::Shape;
+
+ auto ifm = binder->addOperand<float>(_ifm);
+ auto pad = binder->addOperand<int32_t>(Shape{4, 2});
+ {
+ std::vector<int32_t> values;
+ values.resize(8);
+ // For 'N'
+ values.at(0) = values.at(1) = 0;
+ // For 'H'
+ values.at(2) = _pad.top();
+ values.at(3) = _pad.bottom();
+ // For 'W'
+ values.at(4) = _pad.left();
+ values.at(5) = _pad.right();
+ // For 'C'
+ values.at(6) = values.at(7) = 0;
+
+ binder->setOperand(pad, values.begin(), values.end());
+ }
+
+ auto ofm = binder->addOperand<float>(_ofm);
+
+ binder->addOperation(ann::Operation::Code::PAD, {ifm, pad}, {ofm});
+ }
+
+private:
+ coco::Padding2D _pad;
+
+private:
+ coco::FeatureObject *_ifm = nullptr;
+ coco::FeatureObject *_ofm = nullptr;
+};
+
+class ANNOpFunctionalAppender final : public ANNOpAppender
+{
+public:
+ ANNOpFunctionalAppender(const Appender &fun) : _fun{fun}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void append(ANNBinder *binder) const { _fun(binder); }
+
+private:
+ Appender _fun;
+};
+
+class ANNSubAppender final : public ANNOpAppender
+{
+public:
+ void left(coco::FeatureObject *o) { _left = o; }
+ void right(coco::FeatureObject *o) { _right = o; }
+ void out(coco::FeatureObject *o) { _out = o; }
+
+public:
+ void append(ANNBinder *binder) const override
+ {
+ auto left = binder->addOperand<float>(_left);
+ auto right = binder->addOperand<float>(_right);
+ auto fuse = binder->addOperand<int32_t>();
+ binder->setOperand(fuse, 0);
+
+ auto out = binder->addOperand<float>(_out);
+
+ binder->addOperation(ann::Operation::Code::SUB, {left, right, fuse}, {out});
+ }
+
+private:
+ coco::FeatureObject *_left = nullptr;
+ coco::FeatureObject *_right = nullptr;
+ coco::FeatureObject *_out = nullptr;
+};
+
+class ANNDivAppender final : public ANNOpAppender
+{
+public:
+ void left(coco::FeatureObject *o) { _left = o; }
+ void right(coco::FeatureObject *o) { _right = o; }
+ void out(coco::FeatureObject *o) { _out = o; }
+
+public:
+ void append(ANNBinder *binder) const override
+ {
+ auto left = binder->addOperand<float>(_left);
+ auto right = binder->addOperand<float>(_right);
+ auto fuse = binder->addOperand<int32_t>();
+ binder->setOperand(fuse, 0);
+
+ auto out = binder->addOperand<float>(_out);
+
+ binder->addOperation(ann::Operation::Code::DIV, {left, right, fuse}, {out});
+ }
+
+private:
+ coco::FeatureObject *_left = nullptr;
+ coco::FeatureObject *_right = nullptr;
+ coco::FeatureObject *_out = nullptr;
+};
+
+class ANNOpBuilder : public coco::Instr::Visitor<std::unique_ptr<ANNOpAppender>>
+{
+public:
+ std::unique_ptr<ANNOpAppender> visit(const coco::Eval *eval)
+ {
+ if (auto conv = eval->op()->asConv2D())
+ {
+ if (auto load = conv->arg()->asLoad())
+ {
+ auto sess = enco::session(eval->module());
+
+ auto ifm = load->object()->asFeature();
+ auto ker = conv->ker();
+ auto ofm = eval->out()->asFeature();
+
+ const auto group = conv->group();
+
+ if (group == 1)
+ {
+ auto app = make_unique<ANNConv2DAppender>();
+
+ app->session(sess);
+
+ app->pad(conv->pad());
+ app->stride(conv->stride());
+
+ app->ifm(ifm);
+ app->ofm(ofm);
+ app->ker(ker);
+
+ return std::move(app);
+ }
+ else
+ {
+ assert(ifm->shape().depth() == group);
+ assert(ker->shape().count() % group == 0);
+ assert(ker->shape().depth() == 1);
+
+ auto app = make_unique<ANNDepthwiseConv2DAppender>();
+
+ app->session(sess);
+
+ app->multiplier(ker->shape().count() / group);
+ app->pad(conv->pad());
+ app->stride(conv->stride());
+
+ app->ifm(ifm);
+ app->ofm(ofm);
+ app->ker(ker);
+
+ return std::move(app);
+ }
+ }
+ }
+ else if (auto op = eval->op()->asAdd())
+ {
+ auto left_load = op->left()->asLoad();
+ auto right_load = op->right()->asLoad();
+
+ if (left_load && right_load)
+ {
+ // Let's compile the following code fragment:
+ //
+ // %ofm = eval(Add(Load(%left), Load(%right)))
+ //
+ auto left = left_load->object()->asFeature();
+ auto right = right_load->object()->asFeature();
+ assert(left != nullptr && right != nullptr);
+
+ auto out = eval->out()->asFeature();
+ assert(out != nullptr);
+
+ auto app = make_unique<ANNAddAppender>();
+
+ app->left(left);
+ app->right(right);
+ app->out(out);
+
+ return std::move(app);
+ }
+ }
+ else if (auto op = eval->op()->asMul())
+ {
+ auto left_load = op->left()->asLoad();
+ auto right_load = op->right()->asLoad();
+
+ if (left_load && right_load)
+ {
+ // Let's compile the following code fragment:
+ //
+ // %ofm = eval(Mul(Load(%left), Load(%right)))
+ //
+ auto left = left_load->object()->asFeature();
+ auto right = right_load->object()->asFeature();
+ assert(left != nullptr && right != nullptr);
+
+ auto out = eval->out()->asFeature();
+ assert(out != nullptr);
+
+ auto app = make_unique<ANNMulAppender>();
+
+ app->left(left);
+ app->right(right);
+ app->out(out);
+
+ return std::move(app);
+ }
+ }
+ else if (auto op = eval->op()->asPadF())
+ {
+ if (auto load = op->arg()->asLoad())
+ {
+ // Let's compile the following code fragment:
+ //
+ // %ofm = eval(PadF(Load(%ifm))
+ //
+ auto ifm = load->object()->asFeature();
+ auto ofm = eval->out()->asFeature();
+
+ assert(ifm != nullptr && ofm != nullptr);
+
+ auto app = make_unique<ANNPadFAppender>();
+
+ app->pad(op->pad());
+
+ app->ifm(ifm);
+ app->ofm(ofm);
+
+ return std::move(app);
+ }
+ }
+ else if (auto maxpool = eval->op()->asMaxPool2D())
+ {
+ if (auto load = maxpool->arg()->asLoad())
+ {
+ // Let's compile the following code fragment:
+ //
+ // %ofm = eval(MaxPool2D(Load(%ifm))
+ //
+ auto ifm = load->object()->asFeature();
+ auto ofm = eval->out()->asFeature();
+
+ assert(ifm != nullptr && ofm != nullptr);
+
+ auto app = make_unique<ANNMaxPool2DAppender>();
+
+ app->pad(maxpool->pad());
+ app->stride(maxpool->stride());
+ app->window(maxpool->window());
+
+ app->ifm(ifm);
+ app->ofm(ofm);
+
+ return std::move(app);
+ }
+ }
+ else if (auto avgpool = eval->op()->asAvgPool2D())
+ {
+ if (auto load = avgpool->arg()->asLoad())
+ {
+ // Let's compile the following code fragment:
+ //
+ // %ofm = eval(AvgPool2D(Load(%ifm))
+ //
+ if (avgpool->divisor() == coco::AvgPool2D::Divisor::PaddingExcluded)
+ {
+ // When ANN runtime computes the average of each receptive field,
+ // it uses the number of valid(=non-padding) elements as a divisor.
+ auto ifm = load->object()->asFeature();
+ auto ofm = eval->out()->asFeature();
+
+ assert(ifm != nullptr && ofm != nullptr);
+
+ auto app = make_unique<ANNAvgPool2DAppender>();
+
+ app->pad(avgpool->pad());
+ app->stride(avgpool->stride());
+ app->window(avgpool->window());
+
+ app->ifm(ifm);
+ app->ofm(ofm);
+
+ return std::move(app);
+ }
+ }
+ }
+ else if (auto relu = eval->op()->asReLU())
+ {
+ if (auto load = relu->arg()->asLoad())
+ {
+ // Let's compile the following code fragment:
+ //
+ // %ofm = eval(ReLU(Load(%ifm))
+ //
+ // TODO Support objects of other kinds, such as Tensor
+ auto ifm = load->object()->asFeature();
+ auto ofm = eval->out()->asFeature();
+
+ assert(ifm != nullptr && ofm != nullptr);
+
+ auto app = make_unique<ANNReLUAppender>();
+
+ app->ifm(ifm);
+ app->ofm(ofm);
+
+ return std::move(app);
+ }
+ }
+ else if (auto relu6 = eval->op()->asReLU6())
+ {
+ if (auto load = relu6->arg()->asLoad())
+ {
+ // Let's compile the following code fragment:
+ //
+ // %ofm = eval(ReLU6(Load(%ifm))
+ //
+ // TODO Support objects of other kinds, such as Tensor
+ auto ifm = load->object()->asFeature();
+ auto ofm = eval->out()->asFeature();
+
+ assert(ifm != nullptr && ofm != nullptr);
+
+ auto app = make_unique<ANNReLU6Appender>();
+
+ app->ifm(ifm);
+ app->ofm(ofm);
+
+ return std::move(app);
+ }
+ }
+ else if (auto op = eval->op()->asConcatF())
+ {
+ auto left_load = op->left()->asLoad();
+ auto right_load = op->right()->asLoad();
+
+ if (left_load && right_load && (op->axis() == coco::ConcatF::Axis::Depth))
+ {
+ // Let's compile the following code fragment:
+ //
+ // %ofm = eval(ConcatF(Depth, Load(%left), Load(%right)))
+ //
+ auto left = left_load->object()->asFeature();
+ auto right = right_load->object()->asFeature();
+ assert(left != nullptr && right != nullptr);
+
+ auto out = eval->out()->asFeature();
+ assert(out != nullptr);
+
+ auto app = make_unique<ANNConcatAppender>();
+
+ app->left(left);
+ app->right(right);
+ app->out(out);
+
+ return std::move(app);
+ }
+ }
+ else if (auto op = eval->op()->asSub())
+ {
+ auto left_load = op->left()->asLoad();
+ auto right_load = op->right()->asLoad();
+
+ if (left_load && right_load)
+ {
+ // Let's compile the following code fragment:
+ //
+ // %out = eval(Sub(Load(%left), Load(%right)))
+ //
+ auto left = left_load->object()->asFeature();
+ auto right = right_load->object()->asFeature();
+ assert(left != nullptr && right != nullptr);
+
+ auto out = eval->out()->asFeature();
+ assert(out != nullptr);
+
+ auto app = make_unique<ANNSubAppender>();
+
+ app->left(left);
+ app->right(right);
+ app->out(out);
+
+ return std::move(app);
+ }
+ }
+ else if (auto op = eval->op()->asDiv())
+ {
+ auto left_load = op->left()->asLoad();
+ auto right_load = op->right()->asLoad();
+
+ if (left_load && right_load)
+ {
+ // Let's compile the following code fragment:
+ //
+ // %out = eval(Div(Load(%left), Load(%right)))
+ //
+ auto left = left_load->object()->asFeature();
+ auto right = right_load->object()->asFeature();
+ assert(left != nullptr && right != nullptr);
+
+ auto out = eval->out()->asFeature();
+ assert(out != nullptr);
+
+ auto app = make_unique<ANNDivAppender>();
+
+ app->left(left);
+ app->right(right);
+ app->out(out);
+
+ return std::move(app);
+ }
+ }
+
+ // Return nullptr if a given Eval instruction is incompatible
+ return nullptr;
+ }
+
+public:
+ std::unique_ptr<ANNOpAppender> visit(const coco::Shuffle *) { return nullptr; }
+};
+
+namespace
+{
+
+std::unique_ptr<ANNOpAppender> make_appender(coco::Instr *ins)
+{
+ ANNOpBuilder op_builder;
+
+ if (auto eval = coco::safe_cast<coco::Eval>(ins))
+ {
+ return eval->accept(op_builder);
+ }
+
+ if (auto depth_concat = coco::safe_cast<ANNDepthConcatF>(ins))
+ {
+ auto app = make_unique<ANNConcatAppender>();
+
+ app->out(depth_concat->out()->asFeature());
+
+ app->left(depth_concat->fst()->asFeature());
+ app->right(depth_concat->snd()->asFeature());
+
+ return std::move(app);
+ }
+
+ // Build ANN IR from ANNConv2D instruction
+ if (auto conv2d = coco::safe_cast<ANNConv2D>(ins))
+ {
+ auto sess = enco::session(conv2d->module());
+ auto app = make_unique<ANNConv2DAppender>();
+
+ app->session(sess);
+
+ app->pad(conv2d->pad());
+ app->stride(conv2d->stride());
+
+ app->ofm(conv2d->ofm()->asFeature());
+ app->ifm(conv2d->ifm()->asFeature());
+ app->ker(conv2d->ker()->asKernel());
+ app->bias(coco::safe_cast<coco::FeatureObject>(conv2d->bias()));
+
+ return std::move(app);
+ }
+
+ return nullptr;
+}
+
+enum Compatibility
+{
+ COMPATIBLE,
+ INCOMPATIBLE
+};
+
+class ANNGroupBuilder
+{
+public:
+ ANNGroupBuilder(ANNContext *ctx) : _ctx{ctx}
+ {
+ // DO NOTHING
+ }
+
+public:
+ Compatibility kind(const coco::Block *blk) const;
+ Compatibility kind(const std::unique_ptr<ANNOpAppender> &appender) const;
+
+public:
+ void build(enco::Code *code) const;
+
+private:
+ ANNContext *_ctx;
+};
+
+Compatibility ANNGroupBuilder::kind(const std::unique_ptr<ANNOpAppender> &app) const
+{
+ return app ? COMPATIBLE : INCOMPATIBLE;
+}
+
+Compatibility ANNGroupBuilder::kind(const coco::Block *blk) const
+{
+ return (_ctx->find(blk) != nullptr) ? COMPATIBLE : INCOMPATIBLE;
+}
+
+void ANNGroupBuilder::build(enco::Code *code) const
+{
+ auto m = code->module();
+
+ // ANNGroupBuilder will construct a sequence of blocks from the original block sequence, and
+ // a destination block (that dst_blk points to) is the tail of the generated sequence.
+ coco::Block *dst_blk = nullptr;
+
+ auto append = [&](const Compatibility &t) {
+ auto blk = m->entity()->block()->create();
+
+ if (dst_blk == nullptr)
+ {
+ m->block()->prepend(blk);
+ }
+ else
+ {
+ blk->insertAfter(dst_blk);
+ }
+
+ dst_blk = blk;
+
+ if (COMPATIBLE == t)
+ {
+ _ctx->create(blk);
+ }
+ };
+
+ for (auto blk = m->block()->head(); blk;)
+ {
+ // Let's move instructions from a block of interest (referred to as source block) into
+ // a destination block
+ auto src_blk = blk;
+ blk = src_blk->next();
+ src_blk->detach();
+
+ for (auto ins = src_blk->instr()->head(); ins;)
+ {
+ auto cur_ins = ins;
+ ins = cur_ins->next();
+ cur_ins->detach();
+
+ auto cur_append = make_appender(cur_ins);
+
+ // Create a new compatible block and use it as a destination block if the current
+ // destination block is absent or incompatible with the instruction of intereset.
+ if ((dst_blk == nullptr) || (kind(cur_append) != kind(dst_blk)))
+ {
+ append(kind(cur_append));
+ }
+
+ assert(dst_blk != nullptr);
+ assert(kind(cur_append) == kind(dst_blk));
+
+ // Append ins to the dst_blk block
+ dst_blk->instr()->append(cur_ins);
+
+ if (cur_append)
+ {
+ // Update Android NN IR if the current instruction is compatible
+ auto binder = _ctx->find(dst_blk);
+ assert(binder != nullptr);
+ cur_append->append(binder);
+ }
+ }
+
+ // Destroy the source block
+ assert(src_blk->instr()->empty());
+ m->entity()->block()->destroy(src_blk);
+ }
+}
+
+} // namespace
+
+class ANNModuleBuilder
+{
+private:
+ std::set<coco::Bag *> inputs(ANNBinder *binder) const;
+ std::set<coco::Bag *> outputs(ANNBinder *binder) const;
+
+public:
+ void build(ANNContext *ann_ctx) const;
+};
+
+std::set<coco::Bag *> ANNModuleBuilder::inputs(ANNBinder *binder) const
+{
+ std::set<coco::Bag *> res;
+
+ for (auto bag : binder->bags())
+ {
+ auto u = enco::updaters(bag);
+ u.erase(binder->block());
+
+ /**
+ * A bag is the input of this block if
+ * 1. it is an input of the whole network, or
+ * 2. it is updated by preceding blocks during execution
+ */
+ if (bag->isInput() || (u.size() > 0))
+ {
+ res.insert(bag);
+ }
+ }
+
+ return res;
+}
+
+std::set<coco::Bag *> ANNModuleBuilder::outputs(ANNBinder *binder) const
+{
+ std::set<coco::Bag *> res;
+
+ for (auto bag : binder->bags())
+ {
+ auto u = enco::updaters(bag);
+ auto r = enco::readers(bag);
+ r.erase(binder->block());
+
+ /**
+ * Only a bag that this block updates can be the output of this block
+ */
+ if (u.find(binder->block()) == u.end())
+ {
+ continue;
+ }
+
+ /**
+ * A bag is the output of this block if
+ * 1. it is an output of the whole network, or
+ * 2. it is read by following blocks during execution
+ */
+ if (bag->isOutput() || (r.size() > 0))
+ {
+ res.insert(bag);
+ }
+ }
+
+ return res;
+}
+
+void ANNModuleBuilder::build(ANNContext *ann_ctx) const
+{
+ for (uint32_t n = 0; n < ann_ctx->count(); ++n)
+ {
+ auto binder = ann_ctx->nth(n);
+
+ // NOTE binder->module() returns an ANN IR module (not coco IR module)
+ auto m = binder->block()->module();
+ auto d = enco::data(m);
+
+ // Let's identify operands with initial values
+ for (auto bag : binder->bags())
+ {
+ if (binder->associated(bag) && d->allocated(bag))
+ {
+ // TODO Support other datatype
+ auto span = d->f32()->weight(bag);
+ assert(span.data() != nullptr);
+
+ binder->setOperand(binder->operand(bag), span.data(), span.data() + span.size());
+ }
+ }
+
+ // Let's identify input/output bags
+ binder->identifyInputs(inputs(binder));
+ binder->identifyOutputs(outputs(binder));
+ }
+}
+
+} // namespace
+
+namespace
+{
+
+class SplitPass
+{
+public:
+ void runOnCode(enco::Code *code) const;
+};
+
+void SplitPass::runOnCode(enco::Code *code) const
+{
+ auto ann_ctx = make_unique<ANNContext>();
+
+ ANNGroupBuilder group_builder{ann_ctx.get()};
+ group_builder.build(code);
+
+ ANNModuleBuilder module_builder;
+ module_builder.build(ann_ctx.get());
+
+ _subnet_contexts[code->module()] = std::move(ann_ctx);
+}
+
+} // namespace
+
+namespace enco
+{
+
+void split_into_phases(enco::Code *code)
+{
+ SplitPass split;
+ split.runOnCode(code);
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Transforms/Split.h b/compiler/enco/core/src/Transforms/Split.h
new file mode 100644
index 000000000..b4e1d7baf
--- /dev/null
+++ b/compiler/enco/core/src/Transforms/Split.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SPLIT_H__
+#define __SPLIT_H__
+
+#include "Code.h"
+#include "Pass.h"
+
+namespace enco
+{
+
+struct SubnetManager
+{
+ static const ANNContext *context(const coco::Module *m);
+};
+
+/**
+ * @brief Split instructions into a set of phases
+ */
+void split_into_phases(enco::Code *code);
+
+struct PhaseConstructionPass final : public Pass
+{
+ PASS_CTOR(PhaseConstructionPass)
+ {
+ // DO NOTHING
+ }
+
+ void run(const SessionID &sess) const override { split_into_phases(code(sess)); }
+};
+
+} // namespace enco;
+
+#endif // __SPLIT_H__
diff --git a/compiler/enco/core/src/Usage.cpp b/compiler/enco/core/src/Usage.cpp
new file mode 100644
index 000000000..92ccba5a0
--- /dev/null
+++ b/compiler/enco/core/src/Usage.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Usage.h"
+
+namespace enco
+{
+
+std::set<coco::Block *> readers(const coco::Bag *bag)
+{
+ std::set<coco::Block *> res;
+
+ for (auto read : coco::readers(bag))
+ {
+ assert(read != nullptr);
+ auto instr = read->loc();
+ assert(instr != nullptr);
+ auto block = instr->parent();
+ assert(block != nullptr);
+
+ res.insert(block);
+ }
+
+ return res;
+}
+
+std::set<coco::Block *> updaters(const coco::Bag *bag)
+{
+ std::set<coco::Block *> res;
+
+ for (auto update : coco::updaters(bag))
+ {
+ assert(update != nullptr);
+ auto instr = update->loc();
+ assert(instr != nullptr);
+ auto block = instr->parent();
+ assert(block != nullptr);
+
+ res.insert(block);
+ }
+
+ return res;
+}
+
+} // namespace enco
diff --git a/compiler/enco/core/src/Usage.h b/compiler/enco/core/src/Usage.h
new file mode 100644
index 000000000..8fa05f9b9
--- /dev/null
+++ b/compiler/enco/core/src/Usage.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_USAGE_H__
+#define __ENCO_USAGE_H__
+
+#include "coco/IR.h"
+
+#include <set>
+
+namespace enco
+{
+
+/// @brief Returns the set of blocks that reads a given bag
+std::set<coco::Block *> readers(const coco::Bag *bag);
+/// @brief Return the set of blocks that updates a given bag
+std::set<coco::Block *> updaters(const coco::Bag *bag);
+
+} // namespace enco
+
+#endif // __ENCO_USAGE_H__
diff --git a/compiler/enco/core/src/coex/IR.h b/compiler/enco/core/src/coex/IR.h
new file mode 100644
index 000000000..e81943f18
--- /dev/null
+++ b/compiler/enco/core/src/coex/IR.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCO_COEX_IR_H__
+#define __ENCO_COEX_IR_H__
+
+#include <coco/IR.h>
+
+/**
+ * @brief 2D Convolution through Andoird NN API
+ *
+ * TODO Support FusedActivation
+ */
+class ANNConv2D : public coco::Instr, public coco::Object::Producer, public coco::Object::Consumer
+{
+public:
+ ANNConv2D() : _ofm{this}, _ifm{this}, _ker{this}, _bias{this}
+ {
+ // DO NOTHING
+ }
+
+public:
+ coco::Instr *loc(void) override { return this; }
+
+public:
+ coco::Object *ofm(void) const { return _ofm.value(); }
+ void ofm(coco::Object *o) { _ofm.value(o); }
+
+ coco::Object *ifm(void) const { return _ifm.value(); }
+ void ifm(coco::Object *o) { _ifm.value(o); }
+
+ coco::Object *ker(void) const { return _ker.value(); }
+ void ker(coco::Object *o) { _ker.value(o); }
+
+ /**
+ * Currently, this "bias" is a Feature object with channel-wise layout
+ *
+ * NOTE This design is subject to change
+ */
+ coco::Object *bias(void) const { return _bias.value(); }
+ void bias(coco::Object *o) { _bias.value(o); }
+
+public:
+ coco::Padding2D *pad(void) { return &_pad; }
+ const coco::Padding2D *pad(void) const { return &_pad; }
+
+ coco::Stride2D *stride(void) { return &_stride; }
+ const coco::Stride2D *stride(void) const { return &_stride; }
+
+private:
+ coco::Def _ofm;
+
+ coco::Use _ifm;
+ coco::Use _ker;
+ coco::Use _bias;
+
+private:
+ coco::Padding2D _pad;
+ coco::Stride2D _stride;
+};
+
+/**
+ * @brief Concatenate feature maps along "depth" dimension through Andoird NN API
+ */
+class ANNDepthConcatF : public coco::Instr,
+ public coco::Object::Producer,
+ public coco::Object::Consumer
+{
+public:
+ ANNDepthConcatF() : _out{this}, _fst{this}, _snd{this}
+ {
+ // DO NOTHING
+ }
+
+public:
+ coco::Instr *loc(void) override { return this; }
+
+public:
+ coco::Object *out(void) const { return _out.value(); }
+ void out(coco::Object *o) { _out.value(o); }
+
+ coco::Object *fst(void) const { return _fst.value(); }
+ void fst(coco::Object *o) { _fst.value(o); }
+
+ coco::Object *snd(void) const { return _snd.value(); }
+ void snd(coco::Object *o) { _snd.value(o); }
+
+private:
+ coco::Def _out;
+
+ // TODO Support variadic-length inputs
+ coco::Use _fst;
+ coco::Use _snd;
+};
+
+#endif // __ENCO_COEX_IR_H__
diff --git a/compiler/enco/core/src/coex/IR.test.cpp b/compiler/enco/core/src/coex/IR.test.cpp
new file mode 100644
index 000000000..e20cbe4fd
--- /dev/null
+++ b/compiler/enco/core/src/coex/IR.test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IR.h"
+
+#include <gtest/gtest.h>
+
+TEST(IRTest, ANNConv2D_default_constructor)
+{
+ ANNConv2D ins;
+
+ ASSERT_EQ(ins.ofm(), nullptr);
+ ASSERT_EQ(ins.ifm(), nullptr);
+ ASSERT_EQ(ins.ker(), nullptr);
+ ASSERT_EQ(ins.bias(), nullptr);
+}
+
+TEST(IRTest, ANNDepthConcatF_default_constructor)
+{
+ ANNDepthConcatF ins;
+
+ ASSERT_EQ(ins.out(), nullptr);
+ ASSERT_EQ(ins.fst(), nullptr);
+ ASSERT_EQ(ins.snd(), nullptr);
+}
diff --git a/runtime/neurun/frontend/CMakeLists.txt b/compiler/enco/frontend/CMakeLists.txt
index 5ea6cdadd..5ea6cdadd 100644
--- a/runtime/neurun/frontend/CMakeLists.txt
+++ b/compiler/enco/frontend/CMakeLists.txt
diff --git a/compiler/enco/frontend/caffe/CMakeLists.txt b/compiler/enco/frontend/caffe/CMakeLists.txt
new file mode 100644
index 000000000..ce43a41d3
--- /dev/null
+++ b/compiler/enco/frontend/caffe/CMakeLists.txt
@@ -0,0 +1,39 @@
+nnas_find_package(CaffeProto QUIET)
+
+if(NOT CaffeProto_FOUND)
+ return()
+endif(NOT CaffeProto_FOUND)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(enco_caffe_frontend SHARED ${SOURCES})
+target_include_directories(enco_caffe_frontend PRIVATE src)
+target_link_libraries(enco_caffe_frontend coco_core)
+target_link_libraries(enco_caffe_frontend coco_generic)
+target_link_libraries(enco_caffe_frontend enco_intf_frontend)
+target_link_libraries(enco_caffe_frontend enco_intf_cmdline)
+target_link_libraries(enco_caffe_frontend morph)
+target_link_libraries(enco_caffe_frontend caffeproto)
+target_link_libraries(enco_caffe_frontend stdex)
+
+nnas_find_package(GTest QUIET)
+
+if(NOT GTest_FOUND)
+ return()
+endif(NOT GTest_FOUND)
+
+nnas_find_package(Caffe QUIET)
+
+if(NOT Caffe_FOUND)
+ return()
+endif(NOT Caffe_FOUND)
+
+add_executable(enco_caffe_frontend_test ${TESTS})
+target_include_directories(enco_caffe_frontend_test PRIVATE src)
+target_link_libraries(enco_caffe_frontend_test gtest_main)
+target_link_libraries(enco_caffe_frontend_test enco_caffe_frontend)
+target_link_libraries(enco_caffe_frontend_test morph)
+target_link_libraries(enco_caffe_frontend_test caffe)
+add_test(enco_caffe_frontend_test enco_caffe_frontend_test)
diff --git a/compiler/enco/frontend/caffe/src/ConcatSpec.cpp b/compiler/enco/frontend/caffe/src/ConcatSpec.cpp
new file mode 100644
index 000000000..b83a1f902
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/ConcatSpec.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConcatSpec.h"
+
+#include <cassert>
+
+using namespace nncc::core::ADT::tensor;
+
+nncc::core::ADT::tensor::Shape ConcatSpec::forward(const ShapeList &inputs) const
+{
+ assert(inputs.size() > 0);
+
+ Shape output_shape = inputs.at(0);
+
+ for (uint32_t n = 1; n < inputs.size(); ++n)
+ {
+ // The current implementation assumes that "inputs" is well-formed
+ // TODO Verify whether "inputs" is really well-formed
+ const auto &input_shape = inputs.at(n);
+ output_shape.dim(_axis) += input_shape.dim(_axis);
+ }
+
+ return output_shape;
+}
+
+ConcatSpec concat_spec(uint32_t axis) { return ConcatSpec{axis}; }
diff --git a/compiler/enco/frontend/caffe/src/ConcatSpec.h b/compiler/enco/frontend/caffe/src/ConcatSpec.h
new file mode 100644
index 000000000..cc636c778
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/ConcatSpec.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONCAT_SPEC_H__
+#define __CONCAT_SPEC_H__
+
+#include <nncc/core/ADT/tensor/Shape.h>
+
+#include <vector>
+
+using ShapeList = std::vector<nncc::core::ADT::tensor::Shape>;
+
+class ConcatSpec
+{
+public:
+ explicit ConcatSpec(uint32_t axis) : _axis{axis}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Return the output shape when inputs of given shape are
+ * concatenated along _axis
+ */
+ nncc::core::ADT::tensor::Shape forward(const ShapeList &) const;
+
+private:
+ uint32_t _axis;
+};
+
+ConcatSpec concat_spec(uint32_t axis);
+
+#endif // __CONCAT_SPEC_H__
diff --git a/compiler/enco/frontend/caffe/src/ConcatSpec.test.cpp b/compiler/enco/frontend/caffe/src/ConcatSpec.test.cpp
new file mode 100644
index 000000000..1cb2ea5af
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/ConcatSpec.test.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConcatSpec.h"
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+
+namespace
+{
+class ConcatSpecTest : public ::testing::Test
+{
+ // FOR FUTURE USE
+};
+} // namespace
+
+TEST_F(ConcatSpecTest, ifm_shape)
+{
+ const Shape in_1{1, 1, 4, 4};
+ const Shape in_2{1, 2, 4, 4};
+ const Shape in_3{1, 3, 4, 4};
+ const Shape in_4{1, 4, 4, 4};
+
+ auto expected = Shape{1, 10, 4, 4};
+ auto obtained = concat_spec(1).forward({in_1, in_2, in_3, in_4});
+
+ ASSERT_EQ(expected, obtained);
+}
diff --git a/compiler/enco/frontend/caffe/src/Context.cpp b/compiler/enco/frontend/caffe/src/Context.cpp
new file mode 100644
index 000000000..9f7204b25
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Context.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @note: This cpp file exist to check compilation integrity
+ */
+
+#include "Context.h"
diff --git a/compiler/enco/frontend/caffe/src/Context.h b/compiler/enco/frontend/caffe/src/Context.h
new file mode 100644
index 000000000..aca57ce6f
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Context.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONTEXT_H__
+#define __CONTEXT_H__
+
+#include <caffe/proto/caffe.pb.h>
+
+#include <coco/IR.h>
+#include <coco/IR/Data.h>
+
+#include <cassert>
+#include <map>
+#include <string>
+
+namespace caffeimport
+{
+
+using LayerName = std::string;
+using BlobName = std::string;
+// Note: these two maybe evolved to a class
+using ShapeContext = std::map<BlobName, nncc::core::ADT::tensor::Shape>;
+using StoreContext = std::map<BlobName, coco::Bag *>;
+
+class WeightContext
+{
+public:
+ WeightContext(::caffe::NetParameter *caffemodel) : _caffemodel(caffemodel)
+ {
+ for (uint32_t n = 0; n < _caffemodel->layer_size(); ++n)
+ {
+ auto layer = _caffemodel->mutable_layer(n);
+
+ if (layer->has_name())
+ {
+ _data[layer->name()] = layer;
+ }
+ }
+ }
+
+public:
+ int blob_count(const LayerName &name)
+ {
+ if (_data.find(name) != _data.end())
+ return _data.at(name)->blobs_size();
+
+ assert(false);
+ return 0;
+ }
+
+ ::caffe::BlobProto *blob_get(const LayerName &name, uint32_t n)
+ {
+ if (_data.find(name) != _data.end())
+ return _data.at(name)->mutable_blobs(n);
+
+ assert(false);
+ return nullptr;
+ };
+
+private:
+ ::caffe::NetParameter *_caffemodel;
+ std::map<LayerName, ::caffe::LayerParameter *> _data;
+};
+
+class GraphBuilderContext
+{
+public:
+ explicit GraphBuilderContext(coco::Module *module, coco::Data *data, coco::Block *block,
+ ShapeContext &shape_ctx, StoreContext &bag_ctx,
+ WeightContext &weight_ctx)
+ : _module(module), _data(data), _block(block), _shape_ctx(shape_ctx), _bag_ctx(bag_ctx),
+ _weight_ctx(weight_ctx)
+ {
+ // DO NOTHING
+ }
+
+ GraphBuilderContext(const GraphBuilderContext &) = delete;
+ GraphBuilderContext(GraphBuilderContext &&) = delete;
+
+public:
+ coco::Module *module() { return _module; }
+ coco::Data *data() { return _data; }
+ coco::Block *block() { return _block; }
+ ShapeContext &shape_ctx() { return _shape_ctx; }
+ StoreContext &bag_ctx() { return _bag_ctx; }
+ WeightContext &weight_ctx() { return _weight_ctx; }
+
+private:
+ coco::Module *_module;
+ coco::Data *_data;
+ coco::Block *_block;
+ ShapeContext &_shape_ctx;
+ StoreContext &_bag_ctx;
+ WeightContext &_weight_ctx;
+};
+
+} // namespace caffeimport
+
+#endif // __CONTEXT_H__
diff --git a/compiler/enco/frontend/caffe/src/Convert.cpp b/compiler/enco/frontend/caffe/src/Convert.cpp
new file mode 100644
index 000000000..d697b1bd8
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Convert.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Convert.h"
+
+using namespace nncc::core::ADT;
+
+namespace caffeimport
+{
+
+tensor::Shape as_tensor_shape(const ::caffe::BlobShape &blob_shape)
+{
+ const uint32_t rank = blob_shape.dim_size();
+
+ tensor::Shape res;
+
+ res.resize(rank);
+
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ res.dim(axis) = blob_shape.dim(axis);
+ }
+
+ return res;
+}
+
+} // namespace caffeimport
diff --git a/compiler/enco/frontend/caffe/src/Convert.h b/compiler/enco/frontend/caffe/src/Convert.h
new file mode 100644
index 000000000..9f6f9f104
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Convert.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERT_H__
+#define __CONVERT_H__
+
+#include <nncc/core/ADT/tensor/Shape.h>
+
+#include <caffe/proto/caffe.pb.h>
+
+namespace caffeimport
+{
+
+nncc::core::ADT::tensor::Shape as_tensor_shape(const ::caffe::BlobShape &blob_shape);
+
+inline nncc::core::ADT::tensor::Shape as_tensor_shape(const ::caffe::BlobProto *blob_proto)
+{
+ return as_tensor_shape(blob_proto->shape());
+}
+
+} // namespace caffeimport
+
+#endif // __CONVERT_H__
diff --git a/compiler/enco/frontend/caffe/src/ConvolutionSpec.cpp b/compiler/enco/frontend/caffe/src/ConvolutionSpec.cpp
new file mode 100644
index 000000000..e13ada836
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/ConvolutionSpec.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionSpec.h"
+#include "PaddingUtils.h"
+#include "ShapeQuery.h"
+
+#include <cassert>
+
+ConvolutionSpec::ConvolutionSpec(const ::caffe::ConvolutionParameter &param) : _param(param)
+{
+ // NOTE Dilation is not supported, yet
+ // TODO Support dilation
+ assert(param.dilation().size() == 0);
+}
+
+uint32_t ConvolutionSpec::group(void) const { return _param.group(); }
+
+uint32_t ConvolutionSpec::channel_axis(void) const
+{
+ return query_on(ifm_shape()).axis(axis_specifier(_param.axis()));
+}
+
+uint32_t ConvolutionSpec::pad(uint32_t spatial_axis) const
+{
+ assert(spatial_axis < num_spatial_axes());
+
+ auto raw_padding = build_raw_padding().with(_param);
+ auto spatial_padding = build_spatial_padding(num_spatial_axes()).with(raw_padding);
+
+ return spatial_padding.value(spatial_axis);
+}
+
+uint32_t ConvolutionSpec::stride(uint32_t spatial_axis) const
+{
+ assert(spatial_axis < num_spatial_axes());
+
+ // TODO Support stride_h/stride_w parameters
+ assert(!_param.has_stride_h());
+ assert(!_param.has_stride_w());
+
+ if (_param.stride().size() == 0)
+ {
+ // NOTE default stride is 1
+ return 1;
+ }
+
+ if (_param.stride().size() == 1)
+ {
+ return _param.stride(0);
+ }
+
+ assert(_param.stride().size() == num_spatial_axes());
+ return _param.stride(spatial_axis);
+}
+
+uint32_t ConvolutionSpec::ker_dim(uint32_t spatial_axis) const
+{
+ assert(spatial_axis < num_spatial_axes());
+ if (_param.kernel_size().size() == 0)
+ {
+ if (_param.has_kernel_h() && (spatial_axis == 0))
+ {
+ assert(num_spatial_axes() == 2);
+ return _param.kernel_h();
+ }
+
+ if (_param.has_kernel_w() && (spatial_axis == 1))
+ {
+ assert(num_spatial_axes() == 2);
+ return _param.kernel_w();
+ }
+
+ return 0;
+ }
+
+ assert(!_param.has_kernel_h());
+ assert(!_param.has_kernel_w());
+ if (_param.kernel_size().size() == 1)
+ {
+ return _param.kernel_size(0);
+ }
+ else
+ {
+ assert(_param.kernel_size().size() == num_spatial_axes());
+ return _param.kernel_size(spatial_axis);
+ }
+}
+
+nncc::core::ADT::tensor::Shape ConvolutionSpec::ker_shape(void) const
+{
+ nncc::core::ADT::tensor::Shape res;
+
+ res.resize(2 + num_spatial_axes());
+
+ res.dim(0) = ker_count();
+ assert(ifm_dim(channel_axis()) % group() == 0);
+ res.dim(1) = ifm_dim(channel_axis()) / group();
+ for (uint32_t axis = 0; axis < num_spatial_axes(); ++axis)
+ {
+ res.dim(2 + axis) = ker_dim(axis);
+ }
+
+ return res;
+}
+
+nncc::core::ADT::tensor::Shape ConvolutionSpec::ofm_shape(void) const
+{
+ nncc::core::ADT::tensor::Shape res;
+
+ res.resize(num_batch_axes() + 1 + num_spatial_axes());
+
+ for (uint32_t axis = 0; axis < num_batch_axes(); ++axis)
+ {
+ res.dim(axis) = ifm_dim(axis);
+ }
+
+ res.dim(num_batch_axes()) = ker_count();
+
+ for (uint32_t spatial_axis = 0; spatial_axis < num_spatial_axes(); ++spatial_axis)
+ {
+ const uint32_t full_axis = num_batch_axes() + 1 + spatial_axis;
+
+ uint32_t dim = 0;
+
+ dim += ifm_dim(full_axis) - ker_dim(spatial_axis) + 2 * pad(spatial_axis);
+ dim /= stride(spatial_axis);
+ dim += 1;
+
+ res.dim(full_axis) = dim;
+ }
+
+ return res;
+}
diff --git a/compiler/enco/frontend/caffe/src/ConvolutionSpec.h b/compiler/enco/frontend/caffe/src/ConvolutionSpec.h
new file mode 100644
index 000000000..c5c7c9024
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/ConvolutionSpec.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVOLUTION_SPEC_H__
+#define __CONVOLUTION_SPEC_H__
+
+#include <caffe/proto/caffe.pb.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+
+class ConvolutionSpec
+{
+public:
+ ConvolutionSpec(const ::caffe::ConvolutionParameter &param);
+
+public:
+ uint32_t ifm_rank(void) const { return _ifm_shape.rank(); }
+ uint32_t ifm_dim(uint32_t axis) const { return _ifm_shape.dim(axis); }
+
+ uint32_t group(void) const;
+
+ uint32_t channel_axis(void) const;
+
+ uint32_t num_batch_axes(void) const { return channel_axis(); }
+ uint32_t num_spatial_axes(void) const { return ifm_rank() - channel_axis() - 1; }
+
+ uint32_t pad(uint32_t spatial_axis) const;
+ uint32_t stride(uint32_t spatial_axis) const;
+ uint32_t ker_dim(uint32_t spatial_axis) const;
+
+public:
+ const nncc::core::ADT::tensor::Shape &ifm_shape(void) const { return _ifm_shape; }
+ void ifm_shape(const nncc::core::ADT::tensor::Shape &shape) { _ifm_shape = shape; }
+
+public:
+ uint32_t ker_count(void) const { return _param.num_output(); }
+ nncc::core::ADT::tensor::Shape ker_shape(void) const;
+
+public:
+ nncc::core::ADT::tensor::Shape ofm_shape(void) const;
+
+private:
+ const ::caffe::ConvolutionParameter &_param;
+ nncc::core::ADT::tensor::Shape _ifm_shape;
+};
+#endif // __CONVOLUTION_SPEC_H__
diff --git a/compiler/enco/frontend/caffe/src/ConvolutionSpec.test.cpp b/compiler/enco/frontend/caffe/src/ConvolutionSpec.test.cpp
new file mode 100644
index 000000000..02670b0cc
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/ConvolutionSpec.test.cpp
@@ -0,0 +1,405 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionSpec.h"
+#include "Importer.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+
+#include <caffe/net.hpp>
+
+#include <sstream>
+#include <stdexcept>
+
+#include <gtest/gtest.h>
+
+using namespace nncc::core::ADT;
+
+#define STRING(content) #content
+
+namespace
+{
+class ConvolutionSpecTest : public ::testing::Test
+{
+protected:
+ tensor::Shape as_tensor_shape(const std::vector<int> &dims)
+ {
+ const uint32_t rank = dims.size();
+
+ tensor::Shape res;
+
+ res.resize(rank);
+
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ res.dim(axis) = dims.at(axis);
+ }
+
+ return res;
+ }
+
+ bool load(const std::string &prototxt, ::caffe::NetParameter &param)
+ {
+ std::stringstream ss{prototxt};
+
+ return from_txt(ss, param);
+ }
+};
+} // namespace
+
+TEST_F(ConvolutionSpecTest, ifm_shape)
+{
+ ::caffe::ConvolutionParameter param;
+ ConvolutionSpec spec{param};
+
+ const tensor::Shape ifm_shape{1, 3, 244, 244};
+
+ spec.ifm_shape(ifm_shape);
+
+ ASSERT_EQ(spec.ifm_shape(), ifm_shape);
+ ASSERT_EQ(spec.num_batch_axes(), 1);
+ ASSERT_EQ(spec.num_spatial_axes(), 2);
+}
+
+namespace
+{
+// clang-format off
+const char *conv_0 = STRING(
+layer {
+ name: "data"
+ type : "Input"
+ top : "data"
+ input_param { shape: { dim: 1 dim : 3 dim : 244 dim : 244 } }
+}
+layer{
+ name : "conv"
+ type : "Convolution"
+ bottom : "data"
+ top : "conv"
+ convolution_param {
+ bias_term : false
+ num_output : 1
+ kernel_size : 1
+ }
+});
+// clang-format on
+} // namespace
+
+TEST_F(ConvolutionSpecTest, conv_0)
+{
+ ::caffe::NetParameter param;
+
+ ASSERT_TRUE(load(conv_0, param));
+
+ ::caffe::Net<float> net{param};
+
+ const tensor::Shape ifm_shape{1, 3, 244, 244};
+ ConvolutionSpec spec{param.layer(1).convolution_param()};
+
+ spec.ifm_shape(ifm_shape);
+
+ // Check 'ker_shape'
+ {
+ auto expected = as_tensor_shape(net.layer_by_name("conv")->blobs().at(0)->shape());
+ auto obtained = spec.ker_shape();
+
+ ASSERT_EQ(expected, obtained);
+ }
+
+ // Check 'ofm_shape'
+ {
+ auto expected = as_tensor_shape(net.blob_by_name("conv")->shape());
+ auto obtained = spec.ofm_shape();
+
+ ASSERT_EQ(expected, obtained);
+ }
+}
+
+namespace
+{
+// clang-format off
+const char *conv_1 = STRING(
+layer {
+ name: "data"
+ type : "Input"
+ top : "data"
+ input_param { shape: { dim: 1 dim : 3 dim : 244 dim : 244 } }
+}
+layer{
+ name : "conv"
+ type : "Convolution"
+ bottom : "data"
+ top : "conv"
+ convolution_param {
+ bias_term : false
+ num_output : 1
+ kernel_size : 1
+ kernel_size : 3
+ }
+});
+// clang-format on
+} // namespace
+
+TEST_F(ConvolutionSpecTest, conv_1)
+{
+ ::caffe::NetParameter param;
+
+ ASSERT_TRUE(load(conv_1, param));
+
+ ::caffe::Net<float> net{param};
+
+ const tensor::Shape ifm_shape{1, 3, 244, 244};
+ ConvolutionSpec spec{param.layer(1).convolution_param()};
+
+ spec.ifm_shape(ifm_shape);
+
+ // Check 'ker_shape'
+ {
+ auto expected = as_tensor_shape(net.layer_by_name("conv")->blobs().at(0)->shape());
+ auto obtained = spec.ker_shape();
+
+ ASSERT_EQ(expected, obtained);
+ }
+
+ // Check 'ofm_shape'
+ {
+ auto expected = as_tensor_shape(net.blob_by_name("conv")->shape());
+ auto obtained = spec.ofm_shape();
+
+ ASSERT_EQ(expected, obtained);
+ }
+}
+
+namespace
+{
+// NOTE This example is derived from conv1_3x3_s2 layer in reference inception v3 layer
+// clang-format off
+const char *conv_2 = STRING(
+layer {
+ name: "data"
+ type: "Input"
+ top: "data"
+ input_param {
+ shape: { dim: 1 dim: 3 dim: 299 dim: 299 }
+ }
+}
+layer {
+ name: "conv"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv"
+ convolution_param {
+ bias_term: false
+ num_output: 2
+ stride: 2
+ kernel_size: 3
+ }
+}
+);
+// clang-format on
+} // namespace
+
+TEST_F(ConvolutionSpecTest, conv_2)
+{
+ ::caffe::NetParameter param;
+
+ ASSERT_TRUE(load(conv_2, param));
+
+ ::caffe::Net<float> net{param};
+
+ const tensor::Shape ifm_shape{1, 3, 299, 299};
+ ConvolutionSpec spec{param.layer(1).convolution_param()};
+
+ spec.ifm_shape(ifm_shape);
+
+ // Check 'stride'
+ ASSERT_EQ(spec.stride(0), 2);
+ ASSERT_EQ(spec.stride(1), 2);
+
+ // Check 'ofm_shape'
+ {
+ auto expected = as_tensor_shape(net.blob_by_name("conv")->shape());
+ auto obtained = spec.ofm_shape();
+
+ ASSERT_EQ(expected, obtained);
+ }
+}
+
+namespace
+{
+// clang-format off
+const char *conv_pad = STRING(
+layer {
+ name: "data"
+ type: "Input"
+ top: "data"
+ input_param {
+ shape: { dim: 1 dim: 3 dim: 16 dim: 16 }
+ }
+}
+layer {
+ name: "conv"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv"
+ convolution_param {
+ bias_term: false
+ num_output: 2
+ pad: 2
+ kernel_size: 3
+ }
+}
+);
+// clang-format on
+} // namespace
+
+TEST_F(ConvolutionSpecTest, conv_pad)
+{
+ ::caffe::NetParameter param;
+
+ ASSERT_TRUE(load(conv_pad, param));
+
+ ::caffe::Net<float> net{param};
+
+ const tensor::Shape ifm_shape{1, 3, 16, 16};
+ ConvolutionSpec spec{param.layer(1).convolution_param()};
+
+ spec.ifm_shape(ifm_shape);
+
+ // Check 'pad'
+ ASSERT_EQ(spec.pad(0), 2);
+ ASSERT_EQ(spec.pad(1), 2);
+
+ // Check 'ofm_shape'
+ {
+ auto expected = as_tensor_shape(net.blob_by_name("conv")->shape());
+ auto obtained = spec.ofm_shape();
+
+ ASSERT_EQ(expected, obtained);
+ }
+}
+
+namespace
+{
+// clang-format off
+const char *conv_ker_hw = STRING(
+layer {
+ name: "data"
+ type: "Input"
+ top: "data"
+ input_param {
+ shape: { dim: 1 dim: 3 dim: 16 dim: 16 }
+ }
+}
+layer {
+ name: "conv"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv"
+ convolution_param {
+ bias_term: false
+ num_output: 2
+ kernel_h: 3
+ kernel_w: 1
+ }
+}
+);
+// clang-format on
+} // namespace
+
+TEST_F(ConvolutionSpecTest, conv_ker_hw)
+{
+ ::caffe::NetParameter param;
+
+ ASSERT_TRUE(load(conv_ker_hw, param));
+
+ ::caffe::Net<float> net{param};
+
+ const tensor::Shape ifm_shape{1, 3, 16, 16};
+ ConvolutionSpec spec{param.layer(1).convolution_param()};
+
+ spec.ifm_shape(ifm_shape);
+
+ // Check 'pad'
+ ASSERT_EQ(spec.ker_dim(0), 3);
+ ASSERT_EQ(spec.ker_dim(1), 1);
+
+ // Check 'ofm_shape'
+ {
+ auto expected = as_tensor_shape(net.blob_by_name("conv")->shape());
+ auto obtained = spec.ofm_shape();
+
+ ASSERT_EQ(expected, obtained);
+ }
+}
+
+namespace
+{
+// clang-format off
+const char *dconv = STRING(
+layer {
+ name: "data"
+ type: "Input"
+ top: "data"
+ input_param {
+ shape: { dim: 1 dim: 3 dim: 16 dim: 16 }
+ }
+}
+layer {
+ name: "conv"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv"
+ convolution_param {
+ bias_term: false
+ num_output: 3
+ kernel_size: 3
+ group: 3
+ }
+}
+);
+// clang-format on
+} // namespace
+
+TEST_F(ConvolutionSpecTest, dconv)
+{
+ ::caffe::NetParameter param;
+
+ ASSERT_TRUE(load(dconv, param));
+
+ ::caffe::Net<float> net{param};
+
+ const tensor::Shape ifm_shape{1, 3, 16, 16};
+ ConvolutionSpec spec{param.layer(1).convolution_param()};
+
+ spec.ifm_shape(ifm_shape);
+
+ // Check 'ker_shape'
+ {
+ auto expected = as_tensor_shape(net.layer_by_name("conv")->blobs().at(0)->shape());
+ auto obtained = spec.ker_shape();
+
+ ASSERT_EQ(expected, obtained);
+ }
+
+ // Check 'ofm_shape'
+ {
+ auto expected = as_tensor_shape(net.blob_by_name("conv")->shape());
+ auto obtained = spec.ofm_shape();
+
+ ASSERT_EQ(expected, obtained);
+ }
+}
diff --git a/compiler/enco/frontend/caffe/src/Entry.cpp b/compiler/enco/frontend/caffe/src/Entry.cpp
new file mode 100644
index 000000000..2bdb73eac
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Entry.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Frontend.h"
+#include "Importer.h"
+
+#include <cmdline/View.h>
+
+#include <stdex/Memory.h>
+
+#include <fstream>
+#include <cassert>
+
+extern "C" std::unique_ptr<enco::Frontend> make_frontend(const cmdline::View &cmdline)
+{
+ assert(cmdline.size() == 2);
+
+ auto frontend = stdex::make_unique<Frontend>();
+
+ // Fill prototxt
+ {
+ std::ifstream ifs{cmdline.at(0)};
+ if (!ifs.is_open())
+ {
+ throw std::runtime_error("Prototxt file open fail");
+ }
+
+ if (!from_txt(ifs, *frontend->prototxt()))
+ {
+ throw std::runtime_error("Filling prototxt fail");
+ }
+ }
+
+ // Fill caffemodel
+ {
+ std::ifstream ifs{cmdline.at(1), std::ios::binary};
+ if (!ifs.is_open())
+ {
+ throw std::runtime_error("Caffemodel file open fail");
+ }
+
+ if (!from_bin(ifs, *frontend->caffemodel()))
+ {
+ throw std::runtime_error("Filling caffemodel fail");
+ }
+ }
+
+ return std::move(frontend);
+}
diff --git a/compiler/enco/frontend/caffe/src/Frontend.cpp b/compiler/enco/frontend/caffe/src/Frontend.cpp
new file mode 100644
index 000000000..7d2b3d36c
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Frontend.cpp
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Frontend.h"
+#include "Context.h"
+#include "GraphBuilderRegistry.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <map>
+#include <set>
+#include <string>
+
+#include <cassert>
+#include <stdexcept>
+
+using namespace nncc::core::ADT;
+
+using tensor::LexicalLayout;
+
+Frontend::Frontend() : _prototxt{new ::caffe::NetParameter}, _caffemodel{new ::caffe::NetParameter}
+{
+ // DO NOTHING
+}
+
+enco::Bundle Frontend::load(void) const
+{
+ auto module = coco::Module::create();
+ auto blk = module->entity()->block()->create();
+ module->block()->append(blk);
+
+ auto data = coco::Data::create();
+
+ // For weight access
+ caffeimport::WeightContext weight_ctx(_caffemodel.get());
+
+ // For inter-layer communication
+ std::map<std::string, tensor::Shape> shape_ctx;
+ std::map<std::string, coco::Bag *> bag_ctx;
+
+ std::set<std::string> bags;
+ std::map<std::string, uint32_t> def_count;
+ std::map<std::string, uint32_t> use_count;
+
+ auto def = [&bags, &def_count, &use_count](const std::string &name) {
+ if (bags.find(name) == bags.end())
+ {
+ bags.insert(name);
+ def_count[name] = 0;
+ use_count[name] = 0;
+ }
+
+ def_count.at(name) += 1;
+ };
+
+ auto use = [&use_count](const std::string &name) { use_count.at(name) += 1; };
+
+ auto outputs = [&bags, &def_count, &use_count](void) {
+ std::set<std::string> res;
+
+ for (const auto &bag : bags)
+ {
+ if (def_count.at(bag) > use_count.at(bag))
+ {
+ res.insert(bag);
+ }
+ }
+
+ return res;
+ };
+
+ caffeimport::GraphBuilderContext opbuilder_context(module.get(), data.get(), blk, shape_ctx,
+ bag_ctx, weight_ctx);
+
+ for (const auto &layer : _prototxt->layer())
+ {
+ assert(layer.has_name());
+ assert(layer.has_type());
+
+ for (uint32_t n = 0; n < layer.top().size(); ++n)
+ {
+ def(layer.top(n));
+ }
+
+ for (uint32_t n = 0; n < layer.bottom().size(); ++n)
+ {
+ use(layer.bottom(n));
+ }
+
+ if (const auto *graph_builder = caffeimport::GraphBuilderRegistry::get().lookup(layer.type()))
+ {
+ graph_builder->build(layer, &opbuilder_context);
+ }
+ else
+ {
+ throw std::runtime_error{"Not supported: " + layer.type()};
+ }
+ }
+
+ // Finalize: Create output for each top blob
+ for (const auto &name : outputs())
+ {
+ const auto &shape = shape_ctx.at(name);
+ auto bag = bag_ctx.at(name);
+
+ auto output = module->entity()->output()->create(shape);
+
+ output->bag(bag);
+ output->name(name);
+ output->reorder<LexicalLayout>();
+
+ module->output()->insert(output);
+ }
+
+ enco::Bundle bundle;
+
+ bundle.module(std::move(module));
+ bundle.data(std::move(data));
+
+ return std::move(bundle);
+}
diff --git a/compiler/enco/frontend/caffe/src/Frontend.h b/compiler/enco/frontend/caffe/src/Frontend.h
new file mode 100644
index 000000000..34fe90eba
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Frontend.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FRONTEND_H__
+#define __FRONTEND_H__
+
+#include <enco/Frontend.h>
+
+#include <caffe/proto/caffe.pb.h>
+
+#include <memory>
+
+class Frontend final : public enco::Frontend
+{
+public:
+ Frontend();
+
+public:
+ ::caffe::NetParameter *prototxt(void) { return _prototxt.get(); }
+ ::caffe::NetParameter *caffemodel(void) { return _caffemodel.get(); }
+
+public:
+ enco::Bundle load(void) const override;
+
+private:
+ std::unique_ptr<::caffe::NetParameter> _prototxt;
+ std::unique_ptr<::caffe::NetParameter> _caffemodel;
+};
+
+#endif // __FRONTEND_H__
diff --git a/compiler/enco/frontend/caffe/src/GraphBuilder.cpp b/compiler/enco/frontend/caffe/src/GraphBuilder.cpp
new file mode 100644
index 000000000..18ba10c08
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/GraphBuilder.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @note: This cpp file exist to check compilation integrity
+ */
+
+#include "GraphBuilder.h"
diff --git a/compiler/enco/frontend/caffe/src/GraphBuilder.h b/compiler/enco/frontend/caffe/src/GraphBuilder.h
new file mode 100644
index 000000000..04adb96f4
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/GraphBuilder.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __GRAPH_BUILDER_H__
+#define __GRAPH_BUILDER_H__
+
+#include "Context.h"
+
+#include <caffe/proto/caffe.pb.h>
+
+namespace caffeimport
+{
+
+class GraphBuilder
+{
+public:
+ virtual void build(const ::caffe::LayerParameter &layer, GraphBuilderContext *context) const = 0;
+ virtual ~GraphBuilder() {}
+};
+
+} // namespace caffeimport
+
+#endif // __GRAPH_BUILDER_H__
diff --git a/compiler/enco/frontend/caffe/src/GraphBuilderRegistry.cpp b/compiler/enco/frontend/caffe/src/GraphBuilderRegistry.cpp
new file mode 100644
index 000000000..e9db31177
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/GraphBuilderRegistry.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GraphBuilderRegistry.h"
+
+#include "Layer/Concatenation.h"
+#include "Layer/Convolution.h"
+#include "Layer/Eltwise.h"
+#include "Layer/Input.h"
+#include "Layer/Pooling.h"
+#include "Layer/ReLU.h"
+#include "Layer/Scale.h"
+#include "Layer/BatchNorm.h"
+
+#include <stdex/Memory.h>
+
+using stdex::make_unique;
+
+namespace caffeimport
+{
+
+GraphBuilderRegistry::GraphBuilderRegistry()
+{
+ _builder_map["Concat"] = make_unique<ConcatBuilder>();
+ _builder_map["Convolution"] = make_unique<ConvolutionBuilder>();
+ _builder_map["Eltwise"] = make_unique<EltwiseBuilder>();
+ _builder_map["Input"] = make_unique<InputBuilder>();
+ _builder_map["Pooling"] = make_unique<PoolingBuilder>();
+ _builder_map["ReLU"] = make_unique<ReLUBuilder>();
+ _builder_map["Scale"] = make_unique<ScaleBuilder>();
+ _builder_map["BatchNorm"] = make_unique<BatchNormBuilder>();
+}
+
+} // namespace caffeimport
diff --git a/compiler/enco/frontend/caffe/src/GraphBuilderRegistry.h b/compiler/enco/frontend/caffe/src/GraphBuilderRegistry.h
new file mode 100644
index 000000000..035d32a4b
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/GraphBuilderRegistry.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __GRAPH_BUILDER_REGISTRY_H__
+#define __GRAPH_BUILDER_REGISTRY_H__
+
+#include "GraphBuilder.h"
+
+#include <map>
+#include <string>
+
+namespace caffeimport
+{
+
+class GraphBuilderRegistry
+{
+public:
+ const GraphBuilder *lookup(const std::string &layer) const
+ {
+ if (_builder_map.find(layer) == _builder_map.end())
+ return nullptr;
+
+ return _builder_map.at(layer).get();
+ }
+
+ static GraphBuilderRegistry &get()
+ {
+ static GraphBuilderRegistry me;
+ return me;
+ }
+
+private:
+ GraphBuilderRegistry();
+
+private:
+ std::map<std::string, std::unique_ptr<GraphBuilder>> _builder_map;
+};
+
+} // namespace caffeimport
+
+#endif // __GRAPH_BUILDER_REGISTRY_H__
diff --git a/compiler/enco/frontend/caffe/src/IRBuilder.h b/compiler/enco/frontend/caffe/src/IRBuilder.h
new file mode 100644
index 000000000..fe34328af
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/IRBuilder.h
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __IR_BUILDER_H__
+#define __IR_BUILDER_H__
+
+#include "coco/IR/Module.h"
+
+#include <deque>
+
+/**
+ * coco IR builders
+ */
+
+class OpBuilder
+{
+public:
+ OpBuilder(coco::Module *module) : _module{module}
+ {
+ // module SHOULD BE valid
+ assert(_module != nullptr);
+ }
+
+public:
+ /**
+ * @brief Return true if the internal stack is empty
+ */
+ bool empty(void) const { return _stack.empty(); }
+
+ /**
+ * @brief Return the operation at the top of the internal stack
+ */
+ coco::Op *top(void) const
+ {
+ assert(_stack.size() > 0);
+ return _stack.front();
+ }
+
+ /**
+ * @brief Push op onto the internal stack
+ *
+ * BEFORE| Stack
+ * AFTER | Op; Stack
+ */
+ OpBuilder &push(coco::Op *op)
+ {
+ _stack.push_front(op);
+ return (*this);
+ }
+
+ /**
+ * @brief Create "Load" op and push it onto the internal stack
+ *
+ * BEFORE| Stack
+ * AFTER | Load(obj); Stack
+ */
+ OpBuilder &load(coco::Object *obj)
+ {
+ auto op = _module->entity()->op()->create<coco::Load>();
+ op->object(obj);
+ push(op);
+ return (*this);
+ }
+
+ /**
+ * @brief Create "Add" op and push it onto the internal stack
+ *
+ * BEFORE| Left; Right; Stack
+ * AFTER | Add(Left, Right); Stack
+ */
+ OpBuilder &add(void) { return binary<coco::Add>(); }
+
+ /**
+ * @brief Create "Sub" op and push it onto the internal stack
+ *
+ * BEFORE| Left; Right; Stack
+ * AFTER | Sub(Left, Right); Stack
+ */
+ OpBuilder &sub(void) { return binary<coco::Sub>(); }
+
+ /**
+ * @brief Create "Mul" op and push it onto the internal stack
+ *
+ * BEFORE| Left; Right; Stack
+ * AFTER | Mul(Left, Right); Stack
+ */
+ OpBuilder &mul(void) { return binary<coco::Mul>(); }
+
+ /**
+ * @brief Create "Div" op and push it onto the internal stack
+ *
+ * BEFORE| Left; Right; Stack
+ * AFTER | Div(Left, Right); Stack
+ */
+ OpBuilder &div(void) { return binary<coco::Div>(); }
+
+ /**
+ * @brief Pop op from the internal stack
+ *
+ * BEFORE| Op; Stack
+ * AFTER | Stack
+ */
+ coco::Op *pop(void)
+ {
+ assert(_stack.size() > 0);
+ auto op = _stack.front();
+ _stack.pop_front();
+ return op;
+ }
+
+private:
+ template <typename ConcreteOp> OpBuilder &binary()
+ {
+ assert(_stack.size() >= 2);
+ auto left = pop();
+ auto right = pop();
+
+ auto op = _module->entity()->op()->create<ConcreteOp>();
+ op->left(left);
+ op->right(right);
+ push(op);
+
+ return (*this);
+ }
+
+private:
+ coco::Module *_module;
+ std::deque<coco::Op *> _stack;
+};
+
+inline OpBuilder op_builder(coco::Module *m) { return OpBuilder{m}; }
+inline OpBuilder op_builder(const std::unique_ptr<coco::Module> &m) { return op_builder(m.get()); }
+
+class InstrBuilder
+{
+public:
+ InstrBuilder(coco::Module *module) : _module{module}
+ {
+ // NOTE _module SHOULD be valid
+ assert(_module != nullptr);
+ }
+
+public:
+ /**
+ * @brief Create "Eval" instruction with a given "Object" and "Op"
+ *
+ * @note "eval(out, op)" will create "%out <- Eval(op)" instruction
+ */
+ coco::Eval *eval(coco::Object *out, coco::Op *op) const
+ {
+ auto ins = _module->entity()->instr()->create<coco::Eval>();
+ ins->op(op);
+ ins->out(out);
+ return ins;
+ }
+
+private:
+ coco::Module *_module;
+};
+
+inline InstrBuilder instr_builder(coco::Module *m) { return InstrBuilder{m}; }
+inline InstrBuilder instr_builder(const std::unique_ptr<coco::Module> &m)
+{
+ return instr_builder(m.get());
+}
+
+#endif // __IR_BUILDER_H__
diff --git a/compiler/enco/frontend/caffe/src/Importer.cpp b/compiler/enco/frontend/caffe/src/Importer.cpp
new file mode 100644
index 000000000..943a54e5d
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Importer.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Importer.h"
+
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+bool from_txt(std::istream &is, ::caffe::NetParameter &param)
+{
+ google::protobuf::io::IstreamInputStream iis{&is};
+
+ if (!google::protobuf::TextFormat::Parse(&iis, &param))
+ {
+ return false;
+ }
+
+ return true;
+}
+
+bool from_bin(std::istream &is, ::caffe::NetParameter &param)
+{
+ google::protobuf::io::IstreamInputStream iis{&is};
+ google::protobuf::io::CodedInputStream cis{&iis};
+
+ if (!param.ParseFromCodedStream(&cis))
+ {
+ return false;
+ }
+
+ return true;
+}
+
+bool from_txt(std::istream &is, ::caffe::PoolingParameter &param)
+{
+ ::google::protobuf::io::IstreamInputStream iis{&is};
+ return google::protobuf::TextFormat::Parse(&iis, &param);
+}
diff --git a/compiler/enco/frontend/caffe/src/Importer.h b/compiler/enco/frontend/caffe/src/Importer.h
new file mode 100644
index 000000000..ac83c0b27
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Importer.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __IMPORTER_H__
+#define __IMPORTER_H__
+
+#include <caffe/proto/caffe.pb.h>
+
+#include <istream>
+
+bool from_txt(std::istream &is, ::caffe::NetParameter &param);
+bool from_bin(std::istream &is, ::caffe::NetParameter &param);
+
+bool from_txt(std::istream &is, ::caffe::PoolingParameter &param);
+
+#endif // __IMPORTER_H__
diff --git a/compiler/enco/frontend/caffe/src/Layer/BatchNorm.cpp b/compiler/enco/frontend/caffe/src/Layer/BatchNorm.cpp
new file mode 100644
index 000000000..ff1e86570
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Layer/BatchNorm.cpp
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BatchNorm.h"
+#include "IRBuilder.h"
+
+#include <morph/caffe.h>
+
+#include <cassert>
+
+using namespace nncc::core::ADT;
+using namespace morph::caffe;
+
+using tensor::num_elements;
+
+namespace caffeimport
+{
+
+void BatchNormBuilder::build(const ::caffe::LayerParameter &layer,
+ GraphBuilderContext *context) const
+{
+ coco::Module *module = context->module();
+ coco::Data *data = context->data();
+ coco::Block *blk = context->block();
+ std::map<std::string, tensor::Shape> &shape_ctx = context->shape_ctx();
+ std::map<std::string, coco::Bag *> &bag_ctx = context->bag_ctx();
+ WeightContext &weight_ctx = context->weight_ctx();
+
+ assert(layer.bottom().size() == 1);
+ assert(layer.top().size() == 1);
+
+ assert(layer.has_batch_norm_param());
+ const auto &param = layer.batch_norm_param();
+
+ // TODO Support training case
+ assert(param.use_global_stats() == true);
+
+ // Create an object for an input feature map
+ const auto ifm_name = layer.bottom(0);
+ const auto ifm_shape = shape_ctx.at(ifm_name);
+ auto ifm_bag = bag_ctx.at(ifm_name);
+ auto ifm_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ ifm_obj->bag(ifm_bag);
+ ifm_obj->layout(coco::FeatureLayouts::BCHW::create(as_feature_shape(ifm_shape)));
+
+ // Create an object for an output feature map
+ const auto ofm_name = layer.top(0);
+ const auto ofm_shape = ifm_shape;
+ auto ofm_bag = module->entity()->bag()->create(num_elements(ofm_shape));
+ auto ofm_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ ofm_obj->bag(ofm_bag);
+ ofm_obj->layout(coco::FeatureLayouts::BCHW::create(as_feature_shape(ofm_shape)));
+
+ // Create an object for the scaled mean estimates data
+ auto mean_bag = module->entity()->bag()->create(ofm_shape.dim(1));
+ auto mean_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ mean_obj->bag(mean_bag);
+ mean_obj->layout(coco::FeatureLayouts::BC::create(as_feature_shape(ofm_shape)));
+
+ // Create an object for the scaled variance estimates data
+ auto variance_bag = module->entity()->bag()->create(ofm_shape.dim(1));
+ auto variance_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ variance_obj->bag(variance_bag);
+ variance_obj->layout(coco::FeatureLayouts::BC::create(as_feature_shape(ofm_shape)));
+
+ if (param.use_global_stats())
+ {
+ // Use the stored mean/variance estimates.
+ assert(weight_ctx.blob_count(layer.name()) == 3);
+
+ // Create an object for scale factor data
+ auto factor_bag = module->entity()->bag()->create(ofm_shape.dim(1));
+ auto factor_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ factor_obj->bag(factor_bag);
+ factor_obj->layout(coco::FeatureLayouts::BC::create(as_feature_shape(ofm_shape)));
+
+ // Fill "scale factor" data
+ {
+ data->f32()->allocate(factor_bag);
+
+ auto dst = data->f32()->weight(factor_bag);
+ // Calculate scale factor
+ auto blob = weight_ctx.blob_get(layer.name(), 2);
+ const auto scale_factor = blob->data(0) == 0 ? 0.f : 1 / blob->data(0);
+
+ for (uint32_t ch = 0; ch < factor_obj->shape().depth(); ++ch)
+ {
+ dst[ch] = scale_factor;
+ }
+ }
+
+ // Create an object for saved mean data
+ auto saved_mean_bag = module->entity()->bag()->create(ofm_shape.dim(1));
+ auto saved_mean_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ saved_mean_obj->bag(saved_mean_bag);
+ saved_mean_obj->layout(coco::FeatureLayouts::BC::create(as_feature_shape(ofm_shape)));
+
+ // Fill "saved mean estimates" data
+ {
+ data->f32()->allocate(saved_mean_bag);
+
+ auto dst = data->f32()->weight(saved_mean_bag);
+ auto blob = weight_ctx.blob_get(layer.name(), 0);
+
+ for (uint32_t ch = 0; ch < saved_mean_obj->shape().depth(); ++ch)
+ {
+ dst[ch] = blob->data(ch);
+ }
+ }
+
+ // Multiply scale factor to mean data
+ {
+ auto mul_op = op_builder(module).load(factor_obj).load(saved_mean_obj).mul().pop();
+ auto mul_ins = instr_builder(module).eval(mean_obj, mul_op);
+
+ blk->instr()->append(mul_ins);
+ }
+
+ // Create an object for saved variance data
+ auto saved_variance_bag = module->entity()->bag()->create(ofm_shape.dim(1));
+ auto saved_variance_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ saved_variance_obj->bag(saved_variance_bag);
+ saved_variance_obj->layout(coco::FeatureLayouts::BC::create(as_feature_shape(ofm_shape)));
+
+ // Fill "saved variance estimates" data
+ {
+ data->f32()->allocate(saved_variance_bag);
+
+ auto dst = data->f32()->weight(saved_variance_bag);
+ auto blob = weight_ctx.blob_get(layer.name(), 1);
+
+ for (uint32_t ch = 0; ch < saved_variance_obj->shape().depth(); ++ch)
+ {
+ dst[ch] = blob->data(ch);
+ }
+ }
+
+ // Multiply scale factor to variance data
+ {
+ auto mul_op = op_builder(module).load(factor_obj).load(saved_variance_obj).mul().pop();
+ auto mul_ins = instr_builder(module).eval(variance_obj, mul_op);
+
+ blk->instr()->append(mul_ins);
+ }
+ }
+ else
+ {
+ // TODO use_global_stats() == false case
+ }
+
+ // Create an object for subtraction
+ auto sub_bag = module->entity()->bag()->create(num_elements(ofm_shape));
+ auto sub_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ sub_obj->bag(sub_bag);
+ sub_obj->layout(coco::FeatureLayouts::BCHW::create(as_feature_shape(ofm_shape)));
+
+ // Subtract mean
+ {
+ auto sub_op = op_builder(module).load(mean_obj).load(ifm_obj).sub().pop();
+ auto sub_ins = instr_builder(module).eval(sub_obj, sub_op);
+
+ blk->instr()->append(sub_ins);
+ }
+
+ // Create an object for normalize variance data
+ auto norm_bag = module->entity()->bag()->create(ofm_shape.dim(1));
+ auto norm_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ norm_obj->bag(norm_bag);
+ norm_obj->layout(coco::FeatureLayouts::BC::create(as_feature_shape(ofm_shape)));
+
+ // Normalize variance
+ {
+ // Create an object for epsilon data
+ auto eps_bag = module->entity()->bag()->create(ofm_shape.dim(1));
+ auto eps_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ eps_obj->bag(eps_bag);
+ eps_obj->layout(coco::FeatureLayouts::BC::create(as_feature_shape(ofm_shape)));
+
+ // Fill "epsilon" data
+ {
+ data->f32()->allocate(eps_bag);
+
+ auto dst = data->f32()->weight(eps_bag);
+ auto eps = param.eps();
+
+ for (uint32_t ch = 0; ch < eps_obj->shape().depth(); ++ch)
+ {
+ dst[ch] = eps;
+ }
+ }
+
+ // Create a temp object
+ auto temp_bag = module->entity()->bag()->create(ofm_shape.dim(1));
+ auto temp_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ temp_obj->bag(temp_bag);
+ temp_obj->layout(coco::FeatureLayouts::BC::create(as_feature_shape(ofm_shape)));
+
+ // Add epsilon to variance
+ {
+ auto add_op = op_builder(module).load(variance_obj).load(eps_obj).add().pop();
+ auto add_ins = instr_builder(module).eval(temp_obj, add_op);
+
+ blk->instr()->append(add_ins);
+ }
+
+ // Sqrt variance
+ {
+ auto load = op_builder(module).load(temp_obj).pop();
+ auto sqrt_op = module->entity()->op()->create<coco::Sqrt>();
+ sqrt_op->arg(load);
+ auto sqrt_ins = instr_builder(module).eval(norm_obj, sqrt_op);
+
+ blk->instr()->append(sqrt_ins);
+ }
+ }
+
+ // Replicate variance to input size
+ {
+ auto div_op = op_builder(module).load(norm_obj).load(sub_obj).div().pop();
+ auto div_ins = instr_builder(module).eval(ofm_obj, div_op);
+
+ blk->instr()->append(div_ins);
+ }
+
+ // Update bag and shape context
+ bag_ctx[ofm_name] = ofm_bag;
+ shape_ctx[ofm_name] = ofm_shape;
+}
+
+} // namespace caffeimport
diff --git a/compiler/enco/frontend/caffe/src/Layer/BatchNorm.h b/compiler/enco/frontend/caffe/src/Layer/BatchNorm.h
new file mode 100644
index 000000000..613b6687e
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Layer/BatchNorm.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BATCHNORM_BUILDER_H__
+#define __BATCHNORM_BUILDER_H__
+
+#include "GraphBuilder.h"
+
+#include "Context.h"
+
+namespace caffeimport
+{
+
+class BatchNormBuilder final : public GraphBuilder
+{
+public:
+ void build(const ::caffe::LayerParameter &layer, GraphBuilderContext *context) const override;
+};
+
+} // namespace caffeimport
+
+#endif // __BATCHNORM_BUILDER_H__
diff --git a/compiler/enco/frontend/caffe/src/Layer/Concatenation.cpp b/compiler/enco/frontend/caffe/src/Layer/Concatenation.cpp
new file mode 100644
index 000000000..f05f5908a
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Layer/Concatenation.cpp
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Concatenation.h"
+#include "IRBuilder.h"
+
+#include <coco/IR/FeatureLayouts.h>
+
+#include <morph/caffe.h>
+
+#include <cassert>
+
+using namespace nncc::core::ADT;
+using namespace morph::caffe;
+
+namespace caffeimport
+{
+
+void ConcatBuilder::build(const ::caffe::LayerParameter &layer, GraphBuilderContext *context) const
+{
+ coco::Module *module = context->module();
+ coco::Block *blk = context->block();
+ std::map<std::string, tensor::Shape> &shape_ctx = context->shape_ctx();
+ std::map<std::string, coco::Bag *> &bag_ctx = context->bag_ctx();
+
+ assert(layer.bottom().size() > 0);
+ assert(layer.top().size() == 1);
+
+ // Assume default concat axis
+ // - Please refer to http://caffe.berkeleyvision.org/tutorial/layers/concat.html for details
+ // TODO Get concat axis from concat param
+ assert(!layer.has_concat_param());
+ const uint32_t concat_axis = 1;
+
+ // Construct a vector of input objects
+ std::vector<coco::FeatureObject *> input_objects;
+
+ for (const auto &input_name : layer.bottom())
+ {
+ const auto input_shape = as_feature_shape(shape_ctx.at(input_name));
+
+ auto input_bag = bag_ctx.at(input_name);
+ auto input_feature = module->entity()->object()->create<coco::FeatureObject>();
+
+ input_feature->bag(input_bag);
+ input_feature->layout(coco::FeatureLayouts::BCHW::create(input_shape));
+
+ input_objects.emplace_back(input_feature);
+ }
+
+ coco::FeatureObject *last_feature = input_objects.at(0);
+
+ assert(last_feature != nullptr);
+ assert(last_feature->bag() != nullptr);
+
+ // Update coco IR
+ //
+ // Given a sequence of input features %in[0] / %in[1] / ... / %in[N]
+ // the below code constructs a sequence of eval instructions
+ // - Load is omitted for simplicity
+ //
+ // %out[0] = eval(ConcatF(%in[0], %in[1]))
+ // %out[1] = eval(ConcatF(%out[0], %in[2]))
+ // ...
+ // %out[N - 1] = eval(ConcatF(%out[N - 2], %in[N]))
+ //
+ for (uint32_t n = 1; n < input_objects.size(); ++n)
+ {
+ auto const left_feature = last_feature;
+ auto const left_shape = left_feature->layout()->shape();
+
+ auto right_feature = input_objects.at(n);
+ auto right_shape = right_feature->layout()->shape();
+
+ // Batch is not supported, yet
+ assert(left_feature->layout()->batch() == 1);
+ assert(right_feature->layout()->batch() == 1);
+
+ // Height and Width SHOULD BE IDENTICAL for depth concat
+ assert(left_shape.height() == right_shape.height());
+ assert(left_shape.width() == right_shape.width());
+
+ const uint32_t C = left_shape.depth() + right_shape.depth();
+ const uint32_t H = left_shape.height();
+ const uint32_t W = left_shape.width();
+
+ const nncc::core::ADT::feature::Shape out_shape{C, H, W};
+
+ auto out_bag = module->entity()->bag()->create(num_elements(out_shape));
+ auto out_feature = module->entity()->object()->create<coco::FeatureObject>();
+
+ out_feature->bag(out_bag);
+ out_feature->layout(coco::FeatureLayouts::BCHW::create(out_shape));
+
+ auto left_load = op_builder(module).load(left_feature).pop();
+ auto right_load = op_builder(module).load(right_feature).pop();
+
+ auto concat_f = module->entity()->op()->create<coco::ConcatF>();
+
+ concat_f->axis(coco::ConcatF::Axis::Depth);
+ concat_f->left(left_load);
+ concat_f->right(right_load);
+
+ auto eval = instr_builder(module).eval(out_feature, concat_f);
+
+ // Append the constructed Shuffle instruction
+ blk->instr()->append(eval);
+
+ // Update 'last_feature'
+ last_feature = out_feature;
+ }
+
+ assert(last_feature != nullptr);
+ assert(last_feature->bag() != nullptr);
+
+ // Update bag and shape context
+ auto const out_name = layer.top(0);
+ auto const out_shape = as_tensor_shape(last_feature->layout()->shape());
+ auto const out_bag = last_feature->bag();
+
+ bag_ctx[out_name] = out_bag;
+ shape_ctx[out_name] = out_shape;
+}
+
+} // namespace caffeimport
diff --git a/compiler/enco/frontend/caffe/src/Layer/Concatenation.h b/compiler/enco/frontend/caffe/src/Layer/Concatenation.h
new file mode 100644
index 000000000..85e04000d
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Layer/Concatenation.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONCAT_BUILDER_H__
+#define __CONCAT_BUILDER_H__
+
+#include "GraphBuilder.h"
+
+#include "Context.h"
+
+namespace caffeimport
+{
+
+class ConcatBuilder final : public GraphBuilder
+{
+public:
+ void build(const ::caffe::LayerParameter &layer, GraphBuilderContext *context) const override;
+};
+
+} // namespace caffeimport
+
+#endif // __CONCAT_BUILDER_H__
diff --git a/compiler/enco/frontend/caffe/src/Layer/Convolution.cpp b/compiler/enco/frontend/caffe/src/Layer/Convolution.cpp
new file mode 100644
index 000000000..9fb096d49
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Layer/Convolution.cpp
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Convolution.h"
+#include "ConvolutionSpec.h"
+#include "Convert.h"
+#include "IRBuilder.h"
+
+#include <nncc/core/ADT/kernel/Overlay.h>
+#include <nncc/core/ADT/kernel/NCHWLayout.h>
+
+#include <morph/caffe.h>
+
+#include <cassert>
+
+using namespace nncc::core::ADT;
+using namespace morph::caffe;
+
+using tensor::num_elements;
+
+namespace caffeimport
+{
+
+void ConvolutionBuilder::build(const ::caffe::LayerParameter &layer,
+ GraphBuilderContext *context) const
+{
+ coco::Module *module = context->module();
+ coco::Data *data = context->data();
+ coco::Block *blk = context->block();
+ std::map<std::string, tensor::Shape> &shape_ctx = context->shape_ctx();
+ std::map<std::string, coco::Bag *> &bag_ctx = context->bag_ctx();
+ WeightContext &weight_ctx = context->weight_ctx();
+
+ assert(layer.bottom().size() == 1);
+ assert(layer.top().size() == 1);
+
+ assert(layer.has_convolution_param());
+ const auto &param = layer.convolution_param();
+
+ ConvolutionSpec spec{param};
+ {
+ const auto ifm_name = layer.bottom(0);
+ const auto ifm_shape = shape_ctx.at(ifm_name);
+ spec.ifm_shape(ifm_shape);
+ }
+
+ // NOTE The current implementation focuses on 2D convolution
+ // TODO Support general ND convolution
+ assert(spec.num_batch_axes() == 1);
+ assert(spec.num_spatial_axes() == 2);
+
+ // Create an object for an input feature map
+ const auto ifm_name = layer.bottom(0);
+ const auto ifm_shape = shape_ctx.at(ifm_name);
+ auto ifm_bag = bag_ctx.at(ifm_name);
+ auto ifm_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ ifm_obj->bag(ifm_bag);
+ ifm_obj->layout(coco::FeatureLayouts::BCHW::create(as_feature_shape(ifm_shape)));
+
+ // Create an object for an output feature map
+ const auto ofm_name = layer.top(0);
+ const auto ofm_shape = spec.ofm_shape();
+ auto ofm_bag = module->entity()->bag()->create(num_elements(ofm_shape));
+ auto ofm_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ ofm_obj->bag(ofm_bag);
+ ofm_obj->layout(coco::FeatureLayouts::BCHW::create(as_feature_shape(ofm_shape)));
+
+ // Create an object for kernel
+ using namespace coco::KernelLayouts;
+
+ const auto ker_shape = spec.ker_shape();
+ auto ker_bag = module->entity()->bag()->create(num_elements(ker_shape));
+ auto ker_obj = module->entity()->object()->create<coco::KernelObject>();
+
+ ker_obj->bag(ker_bag);
+ ker_obj->layout(NCHW::create(as_kernel_shape(ker_shape)));
+
+ // Create a kernel overlay for the kernel object
+ data->f32()->allocate(ker_bag);
+
+ // Initialize the kernel overlay
+ assert(weight_ctx.blob_count(layer.name()) >= 1);
+ auto ker_blob = weight_ctx.blob_get(layer.name(), 0);
+
+ assert(ker_shape == caffeimport::as_tensor_shape(ker_blob));
+
+ auto ker_dst = data->f32()->access(ker_obj);
+ auto ker_src = kernel::OverlayFactory<float, kernel::NCHWLayout>::make(
+ ker_obj->shape(), ker_blob->mutable_data()->begin());
+
+ for (uint32_t n = 0; n < ker_obj->shape().count(); ++n)
+ {
+ for (uint32_t ch = 0; ch < ker_obj->shape().depth(); ++ch)
+ {
+ for (uint32_t row = 0; row < ker_obj->shape().height(); ++row)
+ {
+ for (uint32_t col = 0; col < ker_obj->shape().width(); ++col)
+ {
+ ker_dst->at(n, ch, row, col) = ker_src.at(n, ch, row, col);
+ }
+ }
+ }
+ }
+
+ // Create a Load op
+ auto load = op_builder(module).load(ifm_obj).pop();
+
+ // Create a Conv2D op
+ auto op = module->entity()->op()->create<coco::Conv2D>();
+
+ op->group(spec.group());
+
+ op->ker(ker_obj);
+ op->stride()->vertical(spec.stride(0));
+ op->stride()->horizontal(spec.stride(1));
+
+ op->pad()->top(spec.pad(0));
+ op->pad()->bottom(spec.pad(0));
+ op->pad()->left(spec.pad(1));
+ op->pad()->right(spec.pad(1));
+
+ op->arg(load);
+
+ // Create an Eval instruction
+ auto ins = instr_builder(module).eval(ofm_obj, op);
+
+ // Append the instruction to the block
+ blk->instr()->append(ins);
+
+ //
+ // coco IR allows Conv2D fused with Add, but the current implementation of enco backend
+ // is unable to process such a tree.
+ //
+ // As a workaround, caffe frontend constructs a instruction for Conv2D and Add.
+ //
+ if (param.bias_term())
+ {
+ assert(weight_ctx.blob_count(layer.name()) >= 2);
+
+ // Create Bag & Object
+ auto bias_bag = module->entity()->bag()->create(ker_shape.dim(0));
+ auto bias_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ bias_obj->bag(bias_bag);
+ bias_obj->layout(coco::FeatureLayouts::BC::create(as_feature_shape(ofm_shape)));
+
+ auto added_bag = module->entity()->bag()->create(num_elements(ofm_shape));
+ auto added_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ added_obj->bag(added_bag);
+ added_obj->layout(coco::FeatureLayouts::BCHW::create(as_feature_shape(ofm_shape)));
+
+ // Create Op
+ auto bias_add = op_builder(module).load(bias_obj).load(ofm_obj).add().pop();
+
+ // Create Instr
+ auto bias_add_ins = instr_builder(module).eval(added_obj, bias_add);
+
+ // Append the instruction
+ blk->instr()->append(bias_add_ins);
+
+ // Fill bias data
+ data->f32()->allocate(bias_bag);
+
+ auto bias_span = data->f32()->weight(bias_bag);
+ auto bias_blob = weight_ctx.blob_get(layer.name(), 1);
+
+ for (uint32_t ch = 0; ch < ker_obj->shape().count(); ++ch)
+ {
+ bias_span[ch] = bias_blob->data(ch);
+ }
+
+ // Update output
+ ofm_bag = added_bag;
+ }
+
+ // Update bag and shape context
+ bag_ctx[ofm_name] = ofm_bag;
+ shape_ctx[ofm_name] = ofm_shape;
+}
+
+} // namespace caffeimport
diff --git a/compiler/enco/frontend/caffe/src/Layer/Convolution.h b/compiler/enco/frontend/caffe/src/Layer/Convolution.h
new file mode 100644
index 000000000..a944f12a3
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Layer/Convolution.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVOLUTION_BUILDER_H__
+#define __CONVOLUTION_BUILDER_H__
+
+#include "GraphBuilder.h"
+
+#include "Context.h"
+
+namespace caffeimport
+{
+
+class ConvolutionBuilder final : public GraphBuilder
+{
+public:
+ void build(const ::caffe::LayerParameter &layer, GraphBuilderContext *context) const override;
+};
+
+} // namespace caffeimport
+
+#endif // __CONVOLUTION_BUILDER_H__
diff --git a/compiler/enco/frontend/caffe/src/Layer/Eltwise.cpp b/compiler/enco/frontend/caffe/src/Layer/Eltwise.cpp
new file mode 100644
index 000000000..6a5d4f196
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Layer/Eltwise.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Eltwise.h"
+#include "IRBuilder.h"
+
+#include <coco/IR/FeatureLayouts.h>
+
+#include <morph/caffe.h>
+
+#include <cassert>
+#include <functional>
+
+using namespace nncc::core::ADT;
+using namespace morph::caffe;
+
+namespace caffeimport
+{
+
+void EltwiseBuilder::build(const ::caffe::LayerParameter &layer, GraphBuilderContext *context) const
+{
+ coco::Module *module = context->module();
+ coco::Block *blk = context->block();
+ std::map<std::string, tensor::Shape> &shape_ctx = context->shape_ctx();
+ std::map<std::string, coco::Bag *> &bag_ctx = context->bag_ctx();
+
+ using coco::FeatureLayouts::BCHW;
+
+ assert(layer.bottom().size() > 1);
+ assert(layer.top().size() == 1);
+
+ assert(layer.has_eltwise_param());
+ const auto &param = layer.eltwise_param();
+
+ using ::caffe::EltwiseParameter_EltwiseOp;
+ using ::caffe::EltwiseParameter_EltwiseOp_SUM;
+ using ::caffe::EltwiseParameter_EltwiseOp_PROD;
+
+ using Reducer = std::function<coco::Op *(coco::Op * lhs, coco::Op * rhs)>;
+ using ReducerRegistry = std::map<EltwiseParameter_EltwiseOp, Reducer>;
+
+ ReducerRegistry registry;
+
+ // MAX are not supported, yet
+ registry[EltwiseParameter_EltwiseOp_SUM] = [](coco::Op *lhs, coco::Op *rhs) -> coco::Op * {
+ if (lhs == nullptr)
+ {
+ assert(rhs != nullptr);
+ return rhs;
+ }
+
+ assert(lhs != nullptr && rhs != nullptr);
+ assert(lhs->module() == rhs->module());
+ assert(lhs->module() != nullptr);
+
+ auto m = lhs->module();
+ return op_builder(m).push(rhs).push(lhs).add().pop();
+ };
+
+ registry[EltwiseParameter_EltwiseOp_PROD] = [](coco::Op *lhs, coco::Op *rhs) -> coco::Op * {
+ if (lhs == nullptr)
+ {
+ assert(rhs != nullptr);
+ return rhs;
+ }
+
+ assert(lhs != nullptr && rhs != nullptr);
+ assert(lhs->module() == rhs->module());
+ assert(lhs->module() != nullptr);
+
+ auto m = lhs->module();
+ return op_builder(m).push(rhs).push(lhs).mul().pop();
+ };
+
+ // coeff is not supported, yet
+ assert(!param.coeff().size());
+
+ // Decide appropriate reduce function
+ auto reduce = registry.at(param.operation());
+
+ coco::Op *op = nullptr;
+
+ for (const auto &ifm_name : layer.bottom())
+ {
+ auto ifm_shape = shape_ctx.at(ifm_name);
+
+ // NOTE The current implementation does not work in general
+ auto ifm_bag = bag_ctx.at(ifm_name);
+ auto ifm_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ ifm_obj->bag(ifm_bag);
+ ifm_obj->layout(BCHW::create(as_feature_shape(ifm_shape)));
+
+ auto load = op_builder(module).load(ifm_obj).pop();
+
+ op = reduce(op, load);
+ }
+
+ assert(op != nullptr);
+
+ const auto ofm_name = layer.top(0);
+ const auto ofm_shape = shape_ctx.at(layer.bottom(0));
+
+ auto ofm_bag = module->entity()->bag()->create(num_elements(ofm_shape));
+ auto ofm_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ ofm_obj->bag(ofm_bag);
+ ofm_obj->layout(BCHW::create(as_feature_shape(ofm_shape)));
+
+ // Create "Eval" instruction
+ auto eval = instr_builder(module).eval(ofm_obj, op);
+
+ // Append the instruction to the block
+ blk->instr()->append(eval);
+
+ // Update bag and shape context
+ bag_ctx[ofm_name] = ofm_bag;
+ shape_ctx[ofm_name] = ofm_shape;
+}
+
+} // namespace caffeimport
diff --git a/compiler/enco/frontend/caffe/src/Layer/Eltwise.h b/compiler/enco/frontend/caffe/src/Layer/Eltwise.h
new file mode 100644
index 000000000..e717077ec
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Layer/Eltwise.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ELTWISE_BUILDER_H__
+#define __ELTWISE_BUILDER_H__
+
+#include "GraphBuilder.h"
+
+#include "Context.h"
+
+namespace caffeimport
+{
+
+class EltwiseBuilder final : public GraphBuilder
+{
+public:
+ void build(const ::caffe::LayerParameter &layer, GraphBuilderContext *context) const override;
+};
+
+} // namespace caffeimport
+
+#endif // __ELTWISE_BUILDER_H__
diff --git a/compiler/enco/frontend/caffe/src/Layer/Input.cpp b/compiler/enco/frontend/caffe/src/Layer/Input.cpp
new file mode 100644
index 000000000..39e44fa31
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Layer/Input.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Input.h"
+#include "Convert.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <cassert>
+
+using namespace nncc::core::ADT;
+
+using tensor::num_elements;
+using tensor::LexicalLayout;
+
+namespace caffeimport
+{
+
+void InputBuilder::build(const ::caffe::LayerParameter &layer, GraphBuilderContext *context) const
+{
+ coco::Module *module = context->module();
+ std::map<std::string, tensor::Shape> &shape_ctx = context->shape_ctx();
+ std::map<std::string, coco::Bag *> &bag_ctx = context->bag_ctx();
+
+ assert(layer.has_input_param());
+ const auto &param = layer.input_param();
+
+ for (uint32_t n = 0; n < layer.top_size(); ++n)
+ {
+ const auto &name = layer.top(n);
+ const auto shape = as_tensor_shape(param.shape(n));
+
+ auto bag = module->entity()->bag()->create(num_elements(shape));
+ auto input = module->entity()->input()->create(shape);
+
+ input->bag(bag);
+ input->name(name);
+ input->reorder<LexicalLayout>();
+
+ module->input()->insert(input);
+
+ bag_ctx[name] = bag;
+ shape_ctx[name] = shape;
+ }
+}
+
+} // namespace caffeimport
diff --git a/compiler/enco/frontend/caffe/src/Layer/Input.h b/compiler/enco/frontend/caffe/src/Layer/Input.h
new file mode 100644
index 000000000..2f464748d
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Layer/Input.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INPUT_BUILDER_H__
+#define __INPUT_BUILDER_H__
+
+#include "GraphBuilder.h"
+
+#include "Context.h"
+
+namespace caffeimport
+{
+
+class InputBuilder final : public GraphBuilder
+{
+public:
+ void build(const ::caffe::LayerParameter &layer, GraphBuilderContext *context) const override;
+};
+
+} // namespace caffeimport
+
+#endif // __INPUT_BUILDER_H__
diff --git a/compiler/enco/frontend/caffe/src/Layer/Pooling.cpp b/compiler/enco/frontend/caffe/src/Layer/Pooling.cpp
new file mode 100644
index 000000000..36220d841
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Layer/Pooling.cpp
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Pooling.h"
+#include "PoolingSpec.h"
+#include "IRBuilder.h"
+
+#include <coco/IR/FeatureLayouts.h>
+
+#include <morph/caffe.h>
+
+#include <cassert>
+#include <functional>
+
+using namespace nncc::core::ADT;
+using namespace morph::caffe;
+
+namespace caffeimport
+{
+
+void PoolingBuilder::build(const ::caffe::LayerParameter &layer, GraphBuilderContext *context) const
+{
+ coco::Module *module = context->module();
+ coco::Block *blk = context->block();
+ std::map<std::string, tensor::Shape> &shape_ctx = context->shape_ctx();
+ std::map<std::string, coco::Bag *> &bag_ctx = context->bag_ctx();
+
+ assert(layer.bottom().size() == 1);
+ assert(layer.top().size() == 1);
+
+ assert(layer.has_pooling_param());
+ const auto &param = layer.pooling_param();
+
+ PoolingSpec spec{param};
+ {
+ const auto ifm_name = layer.bottom(0);
+ const auto ifm_shape = shape_ctx.at(ifm_name);
+ spec.ifm_shape(ifm_shape);
+ }
+
+ // Create an object for an input feature map
+ const auto ifm_name = layer.bottom(0);
+ const auto ifm_shape = shape_ctx.at(ifm_name);
+ auto ifm_bag = bag_ctx.at(ifm_name);
+ auto ifm_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ ifm_obj->bag(ifm_bag);
+ ifm_obj->layout(coco::FeatureLayouts::BCHW::create(as_feature_shape(ifm_shape)));
+
+ // Create an object for an output feature map
+ const auto ofm_name = layer.top(0);
+ const auto ofm_shape = spec.ofm_shape();
+ auto ofm_bag = module->entity()->bag()->create(num_elements(ofm_shape));
+ auto ofm_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ ofm_obj->bag(ofm_bag);
+ ofm_obj->layout(coco::FeatureLayouts::BCHW::create(as_feature_shape(ofm_shape)));
+
+ using PoolingOpBuilder = std::function<coco::Op *(coco::Module * m, const PoolingSpec &spec)>;
+
+ std::map<PoolingMethod, PoolingOpBuilder> builders;
+
+ // MaxPool2D op builder
+ builders[PoolingMethod::Max] = [ifm_obj](coco::Module *module, const PoolingSpec &spec) {
+ auto load = op_builder(module).load(ifm_obj).pop();
+
+ auto op = module->entity()->op()->create<coco::MaxPool2D>();
+
+ op->arg(load);
+
+ op->window()->height(spec.window_height());
+ op->window()->width(spec.window_width());
+
+ op->stride()->vertical(spec.vertical_stride());
+ op->stride()->horizontal(spec.horizontal_stride());
+
+ op->pad()->top(spec.vertical_pad());
+ op->pad()->bottom(spec.vertical_pad());
+ op->pad()->left(spec.horizontal_pad());
+ op->pad()->right(spec.horizontal_pad());
+
+ return op;
+ };
+
+ // AvgPool2D op builder
+ builders[PoolingMethod::Avg] = [ifm_obj](coco::Module *module, const PoolingSpec &spec) {
+ auto load = op_builder(module).load(ifm_obj).pop();
+
+ auto op = module->entity()->op()->create<coco::AvgPool2D>();
+
+ op->arg(load);
+
+ // NOTE Caffe use static divisor on average pooling
+ op->divisor(coco::AvgPool2D::Divisor::Static);
+
+ op->window()->height(spec.window_height());
+ op->window()->width(spec.window_width());
+
+ op->stride()->vertical(spec.vertical_stride());
+ op->stride()->horizontal(spec.horizontal_stride());
+
+ op->pad()->top(spec.vertical_pad());
+ op->pad()->bottom(spec.vertical_pad());
+ op->pad()->left(spec.horizontal_pad());
+ op->pad()->right(spec.horizontal_pad());
+
+ return op;
+ };
+
+ // Create a pooling op
+ auto builder = builders.at(spec.method());
+ auto op = builder(module, spec);
+
+ // Create a UnitF instruction
+ auto ins = instr_builder(module).eval(ofm_obj, op);
+
+ // Append the instruction to the block
+ blk->instr()->append(ins);
+
+ // Update bag and shape context
+ bag_ctx[ofm_name] = ofm_bag;
+ shape_ctx[ofm_name] = ofm_shape;
+}
+
+} // namespace caffeimport
diff --git a/compiler/enco/frontend/caffe/src/Layer/Pooling.h b/compiler/enco/frontend/caffe/src/Layer/Pooling.h
new file mode 100644
index 000000000..e72fd7aef
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Layer/Pooling.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __POOLING_BUILDER_H__
+#define __POOLING_BUILDER_H__
+
+#include "GraphBuilder.h"
+
+#include "Context.h"
+
+namespace caffeimport
+{
+
+class PoolingBuilder final : public GraphBuilder
+{
+public:
+ void build(const ::caffe::LayerParameter &layer, GraphBuilderContext *context) const override;
+};
+
+} // namespace caffeimport
+
+#endif // __POOLING_BUILDER_H__
diff --git a/compiler/enco/frontend/caffe/src/Layer/ReLU.cpp b/compiler/enco/frontend/caffe/src/Layer/ReLU.cpp
new file mode 100644
index 000000000..61e206dc2
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Layer/ReLU.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReLU.h"
+#include "IRBuilder.h"
+
+#include <coco/IR/FeatureLayouts.h>
+
+#include <morph/caffe.h>
+
+#include <cassert>
+
+using namespace nncc::core::ADT;
+using namespace morph::caffe;
+
+namespace caffeimport
+{
+
+void ReLUBuilder::build(const ::caffe::LayerParameter &layer, GraphBuilderContext *context) const
+{
+ coco::Module *module = context->module();
+ coco::Block *blk = context->block();
+ std::map<std::string, tensor::Shape> &shape_ctx = context->shape_ctx();
+ std::map<std::string, coco::Bag *> &bag_ctx = context->bag_ctx();
+
+ assert(layer.bottom().size() == 1);
+ assert(layer.top().size() == 1);
+
+ // PReLU is not supported, yet
+ // TODO Support PReLU
+ assert(!layer.has_relu_param());
+
+ // NOTE The current implementation treats ReLU as Feature op
+ // TODO Support ReLU over general tensor
+ const auto ifm_name = layer.bottom(0);
+ const auto ifm_shape = shape_ctx.at(ifm_name);
+ auto ifm_bag = bag_ctx.at(ifm_name);
+ auto ifm_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ ifm_obj->bag(ifm_bag);
+ ifm_obj->layout(coco::FeatureLayouts::BCHW::create(as_feature_shape(ifm_shape)));
+
+ const auto ofm_name = layer.top(0);
+ const auto ofm_shape = ifm_shape;
+ auto ofm_bag = module->entity()->bag()->create(num_elements(ofm_shape));
+ auto ofm_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ ofm_obj->bag(ofm_bag);
+ ofm_obj->layout(coco::FeatureLayouts::BCHW::create(as_feature_shape(ofm_shape)));
+
+ // Create a Load Op
+ auto load = op_builder(module).load(ifm_obj).pop();
+
+ // Create a ReLU op
+ auto op = module->entity()->op()->create<coco::ReLU>();
+
+ op->arg(load);
+
+ // Create a Eval instruction
+ auto ins = instr_builder(module).eval(ofm_obj, op);
+
+ // Append the instruction to the block
+ blk->instr()->append(ins);
+
+ // Update bag and shape context
+ bag_ctx[ofm_name] = ofm_bag;
+ shape_ctx[ofm_name] = ofm_shape;
+}
+
+} // namespace caffeimport
diff --git a/compiler/enco/frontend/caffe/src/Layer/ReLU.h b/compiler/enco/frontend/caffe/src/Layer/ReLU.h
new file mode 100644
index 000000000..94836fd8e
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Layer/ReLU.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __RELU_BUILDER_H__
+#define __RELU_BUILDER_H__
+
+#include "GraphBuilder.h"
+
+#include "Context.h"
+
+namespace caffeimport
+{
+
+class ReLUBuilder final : public GraphBuilder
+{
+public:
+ void build(const ::caffe::LayerParameter &layer, GraphBuilderContext *context) const override;
+};
+
+} // namespace caffeimport
+
+#endif // __RELU_BUILDER_H__
diff --git a/compiler/enco/frontend/caffe/src/Layer/Scale.cpp b/compiler/enco/frontend/caffe/src/Layer/Scale.cpp
new file mode 100644
index 000000000..b9925978c
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Layer/Scale.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Scale.h"
+#include "IRBuilder.h"
+
+#include <coco/IR/FeatureLayouts.h>
+
+#include <morph/caffe.h>
+
+#include <cassert>
+
+using namespace nncc::core::ADT;
+using namespace morph::caffe;
+
+namespace caffeimport
+{
+
+void ScaleBuilder::build(const ::caffe::LayerParameter &layer, GraphBuilderContext *context) const
+{
+ coco::Module *module = context->module();
+ coco::Data *data = context->data();
+ coco::Block *blk = context->block();
+ std::map<std::string, tensor::Shape> &shape_ctx = context->shape_ctx();
+ std::map<std::string, coco::Bag *> &bag_ctx = context->bag_ctx();
+ WeightContext &weight_ctx = context->weight_ctx();
+
+ // TODO Support Scale layer with 2 bottoms
+ assert(layer.bottom().size() == 1);
+ assert(layer.top().size() == 1);
+
+ assert(layer.has_scale_param());
+ const auto &param = layer.scale_param();
+
+ assert(param.axis() == 1);
+ assert(!param.has_num_axes());
+
+ assert(weight_ctx.blob_count(layer.name()) >= 1);
+
+ // NOTE The shape of "Scale" output is same as that of its input
+ // NOTE The current implementation assumes that input/output is of feature type
+ // TODO Support generic tensor arguments
+ auto shape = shape_ctx.at(layer.bottom(0));
+
+ coco::Bag *last_bag = bag_ctx.at(layer.bottom(0));
+
+ // Create channel-wise multiplication
+ {
+ auto in_bag = last_bag;
+ auto in_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ in_obj->bag(in_bag);
+ in_obj->layout(coco::FeatureLayouts::BCHW::create(as_feature_shape(shape)));
+
+ auto factor_bag = module->entity()->bag()->create(num_elements(shape));
+ auto factor_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ factor_obj->bag(factor_bag);
+ factor_obj->layout(coco::FeatureLayouts::BC::create(as_feature_shape(shape)));
+
+ auto out_bag = module->entity()->bag()->create(num_elements(shape));
+ auto out_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ out_obj->bag(out_bag);
+ out_obj->layout(coco::FeatureLayouts::BCHW::create(as_feature_shape(shape)));
+
+ auto mul_op = op_builder(module).load(factor_obj).load(in_obj).mul().pop();
+ auto mul_ins = instr_builder(module).eval(out_obj, mul_op);
+
+ blk->instr()->append(mul_ins);
+
+ // Fill "factor" data
+ {
+ data->f32()->allocate(factor_bag);
+
+ auto span = data->f32()->weight(factor_bag);
+ auto blob = weight_ctx.blob_get(layer.name(), 0);
+
+ for (uint32_t ch = 0; ch < factor_obj->shape().depth(); ++ch)
+ {
+ span[ch] = blob->data(ch);
+ }
+ }
+
+ // Update "last_bag"
+ last_bag = out_bag;
+ }
+
+ assert(last_bag != nullptr);
+
+ // Create bias addition (as channel-wise addition)
+ if (param.bias_term())
+ {
+ assert(weight_ctx.blob_count(layer.name()) >= 2);
+
+ auto in_bag = last_bag; /* Use the output of the last computation as an input */
+ auto in_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ in_obj->bag(in_bag);
+ in_obj->layout(coco::FeatureLayouts::BCHW::create(as_feature_shape(shape)));
+
+ auto bias_bag = module->entity()->bag()->create(num_elements(shape));
+ auto bias_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ bias_obj->bag(bias_bag);
+ bias_obj->layout(coco::FeatureLayouts::BC::create(as_feature_shape(shape)));
+
+ auto out_bag = module->entity()->bag()->create(num_elements(shape));
+ auto out_obj = module->entity()->object()->create<coco::FeatureObject>();
+
+ out_obj->bag(out_bag);
+ out_obj->layout(coco::FeatureLayouts::BCHW::create(as_feature_shape(shape)));
+
+ auto add_op = op_builder(module).load(bias_obj).load(in_obj).add().pop();
+ auto add_ins = instr_builder(module).eval(out_obj, add_op);
+
+ blk->instr()->append(add_ins);
+
+ // Fill bias data
+ {
+ data->f32()->allocate(bias_bag);
+
+ auto bias_span = data->f32()->weight(bias_bag);
+ auto bias_blob = weight_ctx.blob_get(layer.name(), 1);
+
+ for (uint32_t ch = 0; ch < bias_obj->shape().depth(); ++ch)
+ {
+ bias_span[ch] = bias_blob->data(ch);
+ }
+ }
+
+ // Update "last_bag"
+ last_bag = out_bag;
+ }
+
+ // Update bag and shape context
+ {
+ const auto &out_name = layer.top(0);
+ const auto &out_bag = last_bag;
+ const auto &out_shape = shape;
+
+ bag_ctx[out_name] = out_bag;
+ shape_ctx[out_name] = out_shape;
+ }
+}
+
+} // namespace caffeimport
diff --git a/compiler/enco/frontend/caffe/src/Layer/Scale.h b/compiler/enco/frontend/caffe/src/Layer/Scale.h
new file mode 100644
index 000000000..491cc31cf
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Layer/Scale.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SCALE_BUILDER_H__
+#define __SCALE_BUILDER_H__
+
+#include "GraphBuilder.h"
+
+#include "Context.h"
+
+namespace caffeimport
+{
+
+class ScaleBuilder final : public GraphBuilder
+{
+public:
+ void build(const ::caffe::LayerParameter &layer, GraphBuilderContext *context) const override;
+};
+
+} // namespace caffeimport
+
+#endif // __SCALE_BUILDER_H__
diff --git a/compiler/enco/frontend/caffe/src/Padding.h b/compiler/enco/frontend/caffe/src/Padding.h
new file mode 100644
index 000000000..98b018117
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Padding.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Padding.h
+ * @brief This file declares padding-related data structures.
+ */
+#ifndef __PADDING_H__
+#define __PADDING_H__
+
+#include <cstdint>
+#include <vector>
+
+/**
+ * @brief A PaddingBase encapsulates common implementation for derived Padding classes
+ */
+template <typename Derived> class PaddingBase
+{
+public:
+ virtual ~PaddingBase() = default;
+
+public:
+ uint32_t count(void) const { return _values.size(); }
+
+public:
+ uint32_t &value(uint32_t n) { return _values.at(n); }
+ const uint32_t &value(uint32_t n) const { return _values.at(n); }
+
+public:
+ void resize(uint32_t len) { return _values.resize(len); }
+
+private:
+ std::vector<uint32_t> _values;
+};
+
+/**
+ * @brief A RawPadding denotes padding values stored in Caffe model
+ *
+ * @note There may be a mismatch between the number of values in RawPadding and spatial rank
+ */
+struct RawPadding final : public PaddingBase<RawPadding>
+{
+ // Empty
+};
+
+/**
+ * @brief A SpatialPadding denotes padding values for each "spatial" dimension
+ *
+ * @note The number of values in SpatialPadding should be matched with spatial rank
+ */
+struct SpatialPadding final : public PaddingBase<SpatialPadding>
+{
+ // Empty
+};
+
+#endif // __PADDING_H__
diff --git a/compiler/enco/frontend/caffe/src/Padding.test.cpp b/compiler/enco/frontend/caffe/src/Padding.test.cpp
new file mode 100644
index 000000000..cb2495d06
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/Padding.test.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Padding.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+struct DerivedPadding : PaddingBase<DerivedPadding>
+{
+ // Empty
+};
+
+} // namespace
+
+TEST(PaddingTest, PaddingBase)
+{
+ DerivedPadding pad;
+
+ ASSERT_EQ(pad.count(), 0);
+
+ pad.resize(2);
+
+ ASSERT_EQ(pad.count(), 2);
+ ASSERT_EQ(pad.value(0), 0);
+ ASSERT_EQ(pad.value(1), 0);
+
+ pad.value(1) = 4;
+
+ ASSERT_EQ(pad.count(), 2);
+ ASSERT_EQ(pad.value(0), 0);
+ ASSERT_EQ(pad.value(1), 4);
+}
diff --git a/compiler/enco/frontend/caffe/src/PaddingUtils.cpp b/compiler/enco/frontend/caffe/src/PaddingUtils.cpp
new file mode 100644
index 000000000..ffb4bfbfd
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/PaddingUtils.cpp
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PaddingUtils.h"
+
+#include <cassert>
+
+//
+// Section: Raw Padding Builder
+//
+RawPadding RawPaddingBuilder::with(const ::caffe::ConvolutionParameter &param) const
+{
+ RawPadding res;
+
+ if (param.has_pad_h() || param.has_pad_w())
+ {
+ assert(param.pad().size() == 0);
+ assert(param.has_pad_h() && param.has_pad_w());
+
+ res.resize(2);
+ res.value(0) = param.pad_h();
+ res.value(1) = param.pad_w();
+ }
+ else
+ {
+ // NOTE pad and pad_h/pad_w cannot be specified at the same time
+ //
+ // Reference: BaseConvolutionLayer<Dtype>::LayerSetUp in base_conv_layer.cpp
+ assert(!param.has_pad_h() && !param.has_pad_w());
+
+ uint32_t rank = param.pad().size();
+
+ res.resize(rank);
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ res.value(axis) = param.pad(axis);
+ }
+ }
+
+ return res;
+}
+
+RawPadding RawPaddingBuilder::with(const ::caffe::PoolingParameter &param) const
+{
+ RawPadding res;
+
+ if (param.has_pad_h() || param.has_pad_w())
+ {
+ assert(!param.has_pad());
+ assert(param.has_pad_h() && param.has_pad_w());
+
+ res.resize(2);
+ res.value(0) = param.pad_h();
+ res.value(1) = param.pad_w();
+ }
+ else
+ {
+ // NOTE pad and pad_h/pad_w cannot be specified at the same time
+ //
+ // Reference: PoolingLayer<Dtype>::LayerSetUp in pooling_layer.cpp
+ assert(!param.has_pad_h() && !param.has_pad_w());
+
+ if (param.has_pad())
+ {
+ res.resize(1);
+ res.value(0) = param.pad();
+ }
+ }
+
+ return res;
+}
+
+RawPaddingBuilder build_raw_padding(void) { return RawPaddingBuilder{}; }
+
+//
+// Section: Spatial Padding Builder
+//
+SpatialPadding SpatialPaddingBuilder::with(const RawPadding &raw) const
+{
+ const auto spatial_rank = _spatial_rank;
+
+ SpatialPadding res;
+
+ res.resize(spatial_rank);
+
+ if (raw.count() == 0)
+ {
+ // NOTE default padding is 0
+ for (uint32_t spatial_axis = 0; spatial_axis < spatial_rank; ++spatial_axis)
+ {
+ res.value(spatial_axis) = 0;
+ }
+ }
+ else if (raw.count() == 1)
+ {
+ // NOTE One-for-all scheme
+ for (uint32_t spatial_axis = 0; spatial_axis < spatial_rank; ++spatial_axis)
+ {
+ res.value(spatial_axis) = raw.value(0);
+ }
+ }
+ else
+ {
+ // NOTE One-to-one scheme
+ assert(raw.count() == spatial_rank);
+ for (uint32_t spatial_axis = 0; spatial_axis < spatial_rank; ++spatial_axis)
+ {
+ res.value(spatial_axis) = raw.value(spatial_axis);
+ }
+ }
+
+ return res;
+}
+
+SpatialPaddingBuilder build_spatial_padding(uint32_t spatial_rank)
+{
+ return SpatialPaddingBuilder{spatial_rank};
+}
diff --git a/compiler/enco/frontend/caffe/src/PaddingUtils.h b/compiler/enco/frontend/caffe/src/PaddingUtils.h
new file mode 100644
index 000000000..81f32aaa8
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/PaddingUtils.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PADDING_UTILS_H__
+#define __PADDING_UTILS_H__
+
+#include "Padding.h"
+
+#include <caffe/proto/caffe.pb.h>
+
+/**
+ * @brief Construct a raw padding from each Layer parameter
+ *
+ * @note This class is an auxiliary class for build_raw_padding function below
+ */
+class RawPaddingBuilder
+{
+public:
+ friend RawPaddingBuilder build_raw_padding(void);
+
+private:
+ RawPaddingBuilder() = default;
+
+public:
+ RawPadding with(const ::caffe::ConvolutionParameter &) const;
+ RawPadding with(const ::caffe::PoolingParameter &) const;
+};
+
+/**
+ * RawPaddingBuilder is introduced to support the following code pattern:
+ *
+ * auto raw_padding = build_raw_padding().with(conv_param);
+ * ...
+ */
+RawPaddingBuilder build_raw_padding(void);
+
+/**
+ * @brief Convert a raw padding to a spatial padding of a given spatial rank
+ *
+ * @note This class is an auxiliary class for build_raw_padding function below
+ */
+class SpatialPaddingBuilder
+{
+public:
+ friend SpatialPaddingBuilder build_spatial_padding(uint32_t spatial_rank);
+
+private:
+ SpatialPaddingBuilder(uint32_t spatial_rank) : _spatial_rank{spatial_rank}
+ {
+ // DO NOTHING
+ }
+
+public:
+ SpatialPadding with(const RawPadding &raw) const;
+
+private:
+ uint32_t _spatial_rank = 0;
+};
+
+/**
+ * SpatialPaddingBuilder is introduced to support the following code pattern:
+ *
+ * auto raw_padding = build_raw_padding().with(conv_param);
+ * auto spatial_padding = build_spatial_padding(4).with(raw_padding);
+ */
+SpatialPaddingBuilder build_spatial_padding(uint32_t spatial_rank);
+
+#endif // __PADDING_UTILS_H__
diff --git a/compiler/enco/frontend/caffe/src/PoolingSpec.cpp b/compiler/enco/frontend/caffe/src/PoolingSpec.cpp
new file mode 100644
index 000000000..36216a2da
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/PoolingSpec.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PoolingSpec.h"
+#include "PaddingUtils.h"
+
+#include <map>
+#include <cassert>
+
+PoolingSpec::PoolingSpec(const ::caffe::PoolingParameter &param) : _param(param)
+{
+ // DO NOTHING
+}
+
+PoolingMethod PoolingSpec::method(void) const
+{
+ if (!_param.has_pool())
+ {
+ // Default pooling method is MAX
+ // Reference: http://caffe.berkeleyvision.org/tutorial/layers/pooling.html
+ return PoolingMethod::Max;
+ }
+
+ std::map<::caffe::PoolingParameter_PoolMethod, PoolingMethod> methods;
+
+ // NOTE STOCHASTIC Pooling is not supported, yet
+ // TODO Support STOCHASTIC Pooling
+ methods[::caffe::PoolingParameter_PoolMethod_MAX] = PoolingMethod::Max;
+ methods[::caffe::PoolingParameter_PoolMethod_AVE] = PoolingMethod::Avg;
+
+ assert(_param.has_pool());
+ return methods.at(_param.pool());
+}
+
+uint32_t PoolingSpec::window_height(void) const
+{
+ // NOTE Global pooling is not supported, yet
+ // TODO Support global pooling
+ assert(!_param.global_pooling());
+
+ if (_param.has_kernel_h())
+ {
+ return _param.kernel_h();
+ }
+
+ assert(_param.has_kernel_size());
+ return _param.kernel_size();
+}
+
+uint32_t PoolingSpec::window_width(void) const
+{
+ // NOTE Global pooling is not supported, yet
+ // TODO Support global pooling
+ assert(!_param.global_pooling());
+
+ if (_param.has_kernel_w())
+ {
+ return _param.kernel_w();
+ }
+
+ assert(_param.has_kernel_size());
+ return _param.kernel_size();
+}
+
+uint32_t PoolingSpec::vertical_pad(void) const
+{
+ // NOTE The input of Pooling SHOULD BE a rank-4 tensor.
+ // Reference: PoolingLayer<Dtype>::Reshape in pooling_layer.cpp
+ auto raw_padding = build_raw_padding().with(_param);
+ auto spatial_padding = build_spatial_padding(2 /* SPATIAL RANK */).with(raw_padding);
+ return spatial_padding.value(0 /* H */);
+}
+
+uint32_t PoolingSpec::horizontal_pad(void) const
+{
+ // NOTE The input of Pooling SHOULD BE a rank-4 tensor.
+ // Reference: PoolingLayer<Dtype>::Reshape in pooling_layer.cpp
+ auto raw_padding = build_raw_padding().with(_param);
+ auto spatial_padding = build_spatial_padding(2 /* SPATIAL RANK */).with(raw_padding);
+ return spatial_padding.value(1 /* W */);
+}
+
+uint32_t PoolingSpec::vertical_stride(void) const
+{
+ if (_param.has_stride_h())
+ {
+ return _param.stride_h();
+ }
+
+ if (_param.has_stride())
+ {
+ return _param.stride();
+ }
+
+ return 1;
+}
+
+uint32_t PoolingSpec::horizontal_stride(void) const
+{
+ if (_param.has_stride_w())
+ {
+ return _param.stride_w();
+ }
+
+ if (_param.has_stride())
+ {
+ return _param.stride();
+ }
+
+ return 1;
+}
+
+nncc::core::ADT::tensor::Shape PoolingSpec::ofm_shape(void) const
+{
+ nncc::core::ADT::tensor::Shape res;
+
+ // NOTE Caffe supports only pooling over rank-4 tensor
+ assert(_ifm_shape.rank() == 4);
+ res.resize(4);
+
+ // N (= the number of bacths) SHOULD be same
+ res.dim(0) = _ifm_shape.dim(0);
+ // C (= the number of chaanels) SHOULD be same
+ res.dim(1) = _ifm_shape.dim(1);
+
+ // H and W are derived from IFM, Window, and Padding
+ const auto effective_input_height = _ifm_shape.dim(2) + 2 * vertical_pad() - window_height();
+ const auto effective_input_width = _ifm_shape.dim(3) + 2 * horizontal_pad() - window_width();
+ // TODO Remove the following asserts
+ assert(effective_input_height % vertical_stride() == 0);
+ assert(effective_input_width % horizontal_stride() == 0);
+ res.dim(2) = effective_input_height / vertical_stride() + 1;
+ res.dim(3) = effective_input_width / horizontal_stride() + 1;
+ return res;
+}
diff --git a/compiler/enco/frontend/caffe/src/PoolingSpec.h b/compiler/enco/frontend/caffe/src/PoolingSpec.h
new file mode 100644
index 000000000..655a773ba
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/PoolingSpec.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __POOLING_SPEC_H__
+#define __POOLING_SPEC_H__
+
+#include <caffe/proto/caffe.pb.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+
+enum class PoolingMethod
+{
+ Max,
+ Avg
+};
+
+class PoolingSpec
+{
+public:
+ PoolingSpec(const ::caffe::PoolingParameter &param);
+
+public:
+ const nncc::core::ADT::tensor::Shape &ifm_shape(void) const { return _ifm_shape; }
+ void ifm_shape(const nncc::core::ADT::tensor::Shape &shape) { _ifm_shape = shape; }
+
+public:
+ PoolingMethod method(void) const;
+
+public:
+ uint32_t window_height(void) const;
+ uint32_t window_width(void) const;
+
+public:
+ uint32_t vertical_pad(void) const;
+ uint32_t horizontal_pad(void) const;
+
+public:
+ uint32_t vertical_stride(void) const;
+ uint32_t horizontal_stride(void) const;
+
+public:
+ nncc::core::ADT::tensor::Shape ofm_shape(void) const;
+
+private:
+ const ::caffe::PoolingParameter &_param;
+ nncc::core::ADT::tensor::Shape _ifm_shape;
+};
+
+#endif // __POOLING_SPEC_H__
diff --git a/compiler/enco/frontend/caffe/src/PoolingSpec.test.cpp b/compiler/enco/frontend/caffe/src/PoolingSpec.test.cpp
new file mode 100644
index 000000000..26bcaa09b
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/PoolingSpec.test.cpp
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PoolingSpec.h"
+#include "Importer.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+
+#include <caffe/net.hpp>
+
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+#include <sstream>
+#include <stdexcept>
+
+#include <gtest/gtest.h>
+
+using namespace nncc::core::ADT;
+
+#define STRING(content) #content
+
+bool from_txt(const std::string &txt, ::caffe::PoolingParameter &out)
+{
+ std::stringstream ss{txt};
+ return from_txt(ss, out);
+}
+
+namespace
+{
+
+class SequentialBuilder
+{
+public:
+ SequentialBuilder(::caffe::NetParameter *net) : _net{net}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool addLayer(const std::string &prototxt)
+ {
+ auto layer = _net->add_layer();
+ std::stringstream ss{prototxt};
+ ::google::protobuf::io::IstreamInputStream iis{&ss};
+ return google::protobuf::TextFormat::Parse(&iis, layer);
+ }
+
+ bool addInputLayer(const tensor::Shape &shape)
+ {
+ auto param = new ::caffe::InputParameter;
+ {
+ auto s = param->add_shape();
+ for (uint32_t n = 0; n < shape.rank(); ++n)
+ {
+ s->add_dim(shape.dim(n));
+ }
+ }
+
+ auto layer = _net->add_layer();
+
+ layer->set_name("data");
+ layer->set_type("Input");
+ layer->add_top("data");
+ layer->set_allocated_input_param(param);
+
+ return true;
+ }
+
+private:
+ ::caffe::NetParameter *_net;
+};
+
+} // namespace
+
+namespace
+{
+
+class PoolingSpecTest : public ::testing::Test
+{
+protected:
+ tensor::Shape as_tensor_shape(const std::vector<int> &dims)
+ {
+ const uint32_t rank = dims.size();
+
+ tensor::Shape res;
+
+ res.resize(rank);
+
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ res.dim(axis) = dims.at(axis);
+ }
+
+ return res;
+ }
+};
+} // namespace
+
+TEST_F(PoolingSpecTest, ifm_shape)
+{
+ ::caffe::PoolingParameter param;
+ PoolingSpec spec{param};
+
+ const tensor::Shape ifm_shape{1, 3, 244, 244};
+
+ spec.ifm_shape(ifm_shape);
+
+ ASSERT_EQ(spec.ifm_shape(), ifm_shape);
+}
+
+namespace
+{
+} // namespace
+
+TEST_F(PoolingSpecTest, kernel_size_same_for_all)
+{
+ const tensor::Shape ifm_shape{1, 3, 16, 16};
+
+ ::caffe::NetParameter param;
+ {
+ SequentialBuilder builder{&param};
+
+ builder.addInputLayer(ifm_shape);
+
+ // clang-format off
+ const char *prototxt = STRING(
+ name : "pool"
+ type : "Pooling"
+ bottom : "data"
+ top : "pool"
+ pooling_param { kernel_size : 3 }
+ );
+ // clang-format on
+
+ builder.addLayer(prototxt);
+ }
+
+ ::caffe::Net<float> net{param};
+
+ PoolingSpec spec{param.layer(1).pooling_param()};
+
+ spec.ifm_shape(ifm_shape);
+
+ ASSERT_EQ(spec.window_height(), 3);
+ ASSERT_EQ(spec.window_width(), 3);
+
+ // Check 'ofm_shape'
+ {
+ auto expected = as_tensor_shape(net.blob_by_name("pool")->shape());
+ auto obtained = spec.ofm_shape();
+
+ ASSERT_EQ(expected, obtained);
+ }
+}
+
+TEST_F(PoolingSpecTest, pad_for_all)
+{
+ const tensor::Shape ifm_shape{1, 3, 15, 15};
+
+ ::caffe::NetParameter param;
+ {
+ SequentialBuilder builder{&param};
+
+ builder.addInputLayer(ifm_shape);
+
+ // clang-format off
+ const char *prototxt = STRING(
+ name : "pool"
+ type : "Pooling"
+ bottom : "data"
+ top : "pool"
+ pooling_param {
+ pool: MAX
+ kernel_size : 3
+ pad: 2
+ }
+ );
+ // clang-format on
+
+ builder.addLayer(prototxt);
+ }
+
+ ::caffe::Net<float> net{param};
+
+ PoolingSpec spec{param.layer(1).pooling_param()};
+
+ spec.ifm_shape(ifm_shape);
+
+ ASSERT_EQ(spec.vertical_pad(), 2);
+ ASSERT_EQ(spec.horizontal_pad(), 2);
+
+ // Check 'ofm_shape'
+ {
+ auto expected = as_tensor_shape(net.blob_by_name("pool")->shape());
+ auto obtained = spec.ofm_shape();
+
+ ASSERT_EQ(expected, obtained);
+ }
+}
+
+TEST_F(PoolingSpecTest, stride_for_all)
+{
+ const tensor::Shape ifm_shape{1, 3, 15, 15};
+
+ ::caffe::NetParameter param;
+ {
+ SequentialBuilder builder{&param};
+
+ builder.addInputLayer(ifm_shape);
+
+ // clang-format off
+ const char *prototxt = STRING(
+ name : "pool"
+ type : "Pooling"
+ bottom : "data"
+ top : "pool"
+ pooling_param {
+ pool: MAX
+ kernel_size : 3
+ stride: 2
+ }
+ );
+ // clang-format on
+
+ builder.addLayer(prototxt);
+ }
+
+ ::caffe::Net<float> net{param};
+
+ PoolingSpec spec{param.layer(1).pooling_param()};
+
+ spec.ifm_shape(ifm_shape);
+
+ ASSERT_EQ(spec.vertical_stride(), 2);
+ ASSERT_EQ(spec.horizontal_stride(), 2);
+
+ // Check 'ofm_shape'
+ {
+ auto expected = as_tensor_shape(net.blob_by_name("pool")->shape());
+ auto obtained = spec.ofm_shape();
+
+ ASSERT_EQ(expected, obtained);
+ }
+}
+
+TEST_F(PoolingSpecTest, method_none)
+{
+ const char *prototxt = "";
+
+ ::caffe::PoolingParameter param;
+ from_txt(prototxt, param);
+
+ PoolingSpec spec{param};
+
+ ASSERT_EQ(spec.method(), PoolingMethod::Max);
+}
+
+TEST_F(PoolingSpecTest, method_max)
+{
+ const char *prototxt = "pool: MAX";
+
+ ::caffe::PoolingParameter param;
+ from_txt(prototxt, param);
+
+ PoolingSpec spec{param};
+
+ ASSERT_EQ(spec.method(), PoolingMethod::Max);
+}
+
+TEST_F(PoolingSpecTest, method_avg)
+{
+ const char *prototxt = "pool: AVE";
+
+ ::caffe::PoolingParameter param;
+ from_txt(prototxt, param);
+
+ PoolingSpec spec{param};
+
+ ASSERT_EQ(spec.method(), PoolingMethod::Avg);
+}
diff --git a/compiler/enco/frontend/caffe/src/ShapeQuery.cpp b/compiler/enco/frontend/caffe/src/ShapeQuery.cpp
new file mode 100644
index 000000000..1166453b6
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/ShapeQuery.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ShapeQuery.h"
+
+#include <cassert>
+
+//
+// AxisSpecifier
+//
+AxisSpecifier axis_specifier(int32_t value) { return AxisSpecifier{value}; }
+
+//
+// ShapeQuery
+//
+uint32_t ShapeQuery::axis(const AxisSpecifier &specifier) const
+{
+ if (specifier.value() > 0)
+ {
+ return static_cast<uint32_t>(specifier.value());
+ }
+
+ assert(_shape->rank() >= static_cast<uint32_t>(-specifier.value()));
+ return static_cast<uint32_t>(_shape->rank() + specifier.value());
+}
+
+ShapeQuery query_on(const nncc::core::ADT::tensor::Shape &shape) { return ShapeQuery{&shape}; }
diff --git a/compiler/enco/frontend/caffe/src/ShapeQuery.h b/compiler/enco/frontend/caffe/src/ShapeQuery.h
new file mode 100644
index 000000000..260b6ad4d
--- /dev/null
+++ b/compiler/enco/frontend/caffe/src/ShapeQuery.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SHAPE_QUERY_H__
+#define __SHAPE_QUERY_H__
+
+#include <nncc/core/ADT/tensor/Shape.h>
+
+/**
+ * @brief A wrapper class for an integer number that specifies axis
+ *
+ * Several Caffe layers includes 'axis' parameter (which may be negative) which specifies
+ * some axis required for operation.
+ *
+ * Here are several examples:
+ * - Convolution layer uses 'axis' parameter to specify "channel" axis
+ * (http://caffe.berkeleyvision.org/tutorial/layers/convolution.html)
+ * - Concat layer uses 'axis' parameter to specify axis to be concatenated
+ * (http://caffe.berkeleyvision.org/tutorial/layers/concat.html)
+ *
+ * AxisSpecifier class is introduced to distinguish this 'axis' parameter from other integers
+ * (to prevent possible mistake).
+ */
+class AxisSpecifier
+{
+public:
+ explicit AxisSpecifier(int32_t value) : _value{value}
+ {
+ // DO NOTHING
+ }
+
+public:
+ int32_t value(void) const { return _value; }
+
+private:
+ int32_t _value = 1;
+};
+
+AxisSpecifier axis_specifier(int32_t value);
+
+/**
+ * @brief A wrapper class that allows additional queries over tensor shape.
+ */
+class ShapeQuery
+{
+public:
+ explicit ShapeQuery(const nncc::core::ADT::tensor::Shape *shape) : _shape{shape}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /// @brief Return the dimension number (axis) specified by a given axis specifier
+ uint32_t axis(const AxisSpecifier &) const;
+
+private:
+ const nncc::core::ADT::tensor::Shape *_shape;
+};
+
+ShapeQuery query_on(const nncc::core::ADT::tensor::Shape &);
+
+#endif // __SHAPE_QUERY_H__
diff --git a/compiler/enco/frontend/tflite/CMakeLists.txt b/compiler/enco/frontend/tflite/CMakeLists.txt
new file mode 100644
index 000000000..77159879e
--- /dev/null
+++ b/compiler/enco/frontend/tflite/CMakeLists.txt
@@ -0,0 +1,36 @@
+nnas_find_package(FlatBuffers QUIET)
+
+if(NOT FlatBuffers_FOUND)
+ return()
+endif(NOT FlatBuffers_FOUND)
+
+FlatBuffers_Target(enco_tflite_schema
+ OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated"
+ SCHEMA_DIR "${CMAKE_CURRENT_SOURCE_DIR}/schema"
+ SCHEMA_FILES schema.fbs)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(enco_tflite_frontend SHARED ${SOURCES})
+target_include_directories(enco_tflite_frontend PRIVATE src)
+target_link_libraries(enco_tflite_frontend enco_intf_frontend)
+target_link_libraries(enco_tflite_frontend enco_intf_cmdline)
+target_link_libraries(enco_tflite_frontend flatbuffers)
+target_link_libraries(enco_tflite_frontend enco_tflite_schema)
+target_link_libraries(enco_tflite_frontend stdex)
+target_link_libraries(enco_tflite_frontend morph)
+target_link_libraries(enco_tflite_frontend cwrap)
+
+nnas_find_package(GTest QUIET)
+
+if(NOT GTest_FOUND)
+ return()
+endif(NOT GTest_FOUND)
+
+add_executable(enco_tflite_frontend_test ${TESTS})
+target_include_directories(enco_tflite_frontend_test PRIVATE src)
+target_link_libraries(enco_tflite_frontend_test gtest_main)
+target_link_libraries(enco_tflite_frontend_test enco_tflite_frontend)
+add_test(enco_tflite_frontend_test enco_tflite_frontend_test)
diff --git a/compiler/enco/frontend/tflite/schema/schema.fbs b/compiler/enco/frontend/tflite/schema/schema.fbs
new file mode 100644
index 000000000..3045351f2
--- /dev/null
+++ b/compiler/enco/frontend/tflite/schema/schema.fbs
@@ -0,0 +1,734 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+
+namespace tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+ FLOAT32 = 0,
+ FLOAT16 = 1,
+ INT32 = 2,
+ UINT8 = 3,
+ INT64 = 4,
+ STRING = 5,
+ BOOL = 6,
+ INT16 = 7,
+ COMPLEX64 = 8,
+}
+
+// Parameters for converting a quantized tensor back to float. Given a
+// quantized value q, the corresponding float value f should be:
+// f = scale * (q - zero_point)
+table QuantizationParameters {
+ min:[float]; // For importing back into tensorflow.
+ max:[float]; // For importing back into tensorflow.
+ scale:[float]; // For dequantizing the tensor's values.
+ zero_point:[long];
+}
+
+table Tensor {
+ // The tensor shape. The meaning of each entry is operator-specific but
+ // builtin ops use: [batch size, height, width, number of channels] (That's
+ // Tensorflow's NHWC).
+ shape:[int];
+ type:TensorType;
+ // An index that refers to the buffers table at the root of the model. Or,
+ // if there is no data buffer associated (i.e. intermediate results), then
+ // this is 0 (which refers to an always existent empty buffer).
+ //
+ // The data_buffer itself is an opaque container, with the assumption that the
+ // target device is little-endian. In addition, all builtin operators assume
+ // the memory is ordered such that if `shape` is [4, 3, 2], then index
+ // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+ buffer:uint;
+ name:string; // For debugging and importing back into tensorflow.
+ quantization:QuantizationParameters; // Optional.
+
+ is_variable:bool = false;
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+enum BuiltinOperator : byte {
+ ADD = 0,
+ AVERAGE_POOL_2D = 1,
+ CONCATENATION = 2,
+ CONV_2D = 3,
+ DEPTHWISE_CONV_2D = 4,
+ // DEPTH_TO_SPACE = 5,
+ DEQUANTIZE = 6,
+ EMBEDDING_LOOKUP = 7,
+ FLOOR = 8,
+ FULLY_CONNECTED = 9,
+ HASHTABLE_LOOKUP = 10,
+ L2_NORMALIZATION = 11,
+ L2_POOL_2D = 12,
+ LOCAL_RESPONSE_NORMALIZATION = 13,
+ LOGISTIC = 14,
+ LSH_PROJECTION = 15,
+ LSTM = 16,
+ MAX_POOL_2D = 17,
+ MUL = 18,
+ RELU = 19,
+ // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+ // since different model developers use RELU1 in different ways. Never
+ // create another op called RELU1.
+ RELU_N1_TO_1 = 20,
+ RELU6 = 21,
+ RESHAPE = 22,
+ RESIZE_BILINEAR = 23,
+ RNN = 24,
+ SOFTMAX = 25,
+ SPACE_TO_DEPTH = 26,
+ SVDF = 27,
+ TANH = 28,
+ // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
+ CONCAT_EMBEDDINGS = 29,
+ SKIP_GRAM = 30,
+ CALL = 31,
+ CUSTOM = 32,
+ EMBEDDING_LOOKUP_SPARSE = 33,
+ PAD = 34,
+ UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+ GATHER = 36,
+ BATCH_TO_SPACE_ND = 37,
+ SPACE_TO_BATCH_ND = 38,
+ TRANSPOSE = 39,
+ MEAN = 40,
+ SUB = 41,
+ DIV = 42,
+ SQUEEZE = 43,
+ UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+ STRIDED_SLICE = 45,
+ BIDIRECTIONAL_SEQUENCE_RNN = 46,
+ EXP = 47,
+ TOPK_V2 = 48,
+ SPLIT = 49,
+ LOG_SOFTMAX = 50,
+ // DELEGATE is a special op type for the operations which are delegated to
+ // other backends.
+ // WARNING: Experimental interface, subject to change
+ DELEGATE = 51,
+ BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+ CAST = 53,
+ PRELU = 54,
+ MAXIMUM = 55,
+ ARG_MAX = 56,
+ MINIMUM = 57,
+ LESS = 58,
+ NEG = 59,
+ PADV2 = 60,
+ GREATER = 61,
+ GREATER_EQUAL = 62,
+ LESS_EQUAL = 63,
+ SELECT = 64,
+ SLICE = 65,
+ SIN = 66,
+ TRANSPOSE_CONV = 67,
+ SPARSE_TO_DENSE = 68,
+ TILE = 69,
+ EXPAND_DIMS = 70,
+ EQUAL = 71,
+ NOT_EQUAL = 72,
+ LOG = 73,
+ SUM = 74,
+ SQRT = 75,
+ RSQRT = 76,
+ SHAPE = 77,
+ POW = 78,
+ ARG_MIN = 79,
+ FAKE_QUANT = 80,
+ REDUCE_PROD = 81,
+ REDUCE_MAX = 82,
+ PACK = 83,
+ LOGICAL_OR = 84,
+ ONE_HOT = 85,
+ LOGICAL_AND = 86,
+ LOGICAL_NOT = 87,
+ UNPACK = 88,
+ REDUCE_MIN = 89,
+ FLOOR_DIV = 90,
+ REDUCE_ANY = 91,
+ SQUARE = 92,
+ ZEROS_LIKE = 93,
+ FILL = 94,
+ FLOOR_MOD = 95,
+ RANGE = 96,
+}
+
+// Options for the builtin operators.
+union BuiltinOptions {
+ Conv2DOptions,
+ DepthwiseConv2DOptions,
+ ConcatEmbeddingsOptions,
+ LSHProjectionOptions,
+ Pool2DOptions,
+ SVDFOptions,
+ RNNOptions,
+ FullyConnectedOptions,
+ SoftmaxOptions,
+ ConcatenationOptions,
+ AddOptions,
+ L2NormOptions,
+ LocalResponseNormalizationOptions,
+ LSTMOptions,
+ ResizeBilinearOptions,
+ CallOptions,
+ ReshapeOptions,
+ SkipGramOptions,
+ SpaceToDepthOptions,
+ EmbeddingLookupSparseOptions,
+ MulOptions,
+ PadOptions,
+ GatherOptions,
+ BatchToSpaceNDOptions,
+ SpaceToBatchNDOptions,
+ TransposeOptions,
+ ReducerOptions,
+ SubOptions,
+ DivOptions,
+ SqueezeOptions,
+ SequenceRNNOptions,
+ StridedSliceOptions,
+ ExpOptions,
+ TopKV2Options,
+ SplitOptions,
+ LogSoftmaxOptions,
+ CastOptions,
+ DequantizeOptions,
+ MaximumMinimumOptions,
+ ArgMaxOptions,
+ LessOptions,
+ NegOptions,
+ PadV2Options,
+ GreaterOptions,
+ GreaterEqualOptions,
+ LessEqualOptions,
+ SelectOptions,
+ SliceOptions,
+ TransposeConvOptions,
+ SparseToDenseOptions,
+ TileOptions,
+ ExpandDimsOptions,
+ EqualOptions,
+ NotEqualOptions,
+ ShapeOptions,
+ PowOptions,
+ ArgMinOptions,
+ FakeQuantOptions,
+ PackOptions,
+ LogicalOrOptions,
+ OneHotOptions,
+ LogicalAndOptions,
+ LogicalNotOptions,
+ UnpackOptions,
+ FloorDivOptions,
+ SquareOptions,
+ ZerosLikeOptions,
+ FillOptions,
+ BidirectionalSequenceLSTMOptions,
+ BidirectionalSequenceRNNOptions,
+ UnidirectionalSequenceLSTMOptions,
+ FloorModOptions,
+ RangeOptions,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+ NONE = 0,
+ RELU = 1,
+ RELU_N1_TO_1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ filter_width:int;
+ filter_height:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+ // Parameters for DepthwiseConv version 1 or above.
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ depth_multiplier:int;
+ fused_activation_function:ActivationFunctionType;
+ // Parameters for DepthwiseConv version 2 or above.
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+ num_channels:int;
+ num_columns_per_channel:[int];
+ embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+ UNKNOWN = 0,
+ SPARSE = 1,
+ DENSE = 2,
+}
+
+table LSHProjectionOptions {
+ type: LSHProjectionType;
+}
+
+table SVDFOptions {
+ rank:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ merge_outputs: bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+ DEFAULT = 0,
+ SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+ // Parameters for FullyConnected version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+
+ // Parameters for FullyConnected version 2 or above.
+ weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+}
+
+table SoftmaxOptions {
+ beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+ axis:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table MulOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+ radius:int;
+ bias:float;
+ alpha:float;
+ beta:float;
+}
+
+enum LSTMKernelType : byte {
+ // Full LSTM kernel which supports peephole and projection.
+ FULL = 0,
+ // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+ BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+ // Parameters for LSTM version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // Parameters for LSTM version 2 or above.
+ // Basic kernel is only supported in version 2 or above.
+ kernel_type: LSTMKernelType = FULL;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true then first dimension is sequence, otherwise batch.
+ time_major:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true, store the outputs of both directions into the first output.
+ merge_outputs: bool;
+}
+
+table ResizeBilinearOptions {
+ new_height: int (deprecated);
+ new_width: int (deprecated);
+ align_corners: bool;
+}
+
+// A call operation options
+table CallOptions {
+ // The subgraph index that needs to be called.
+ subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+ new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+ ngram_size: int;
+ max_skip_size: int;
+ include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+ block_size: int;
+}
+
+table SubOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DivOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+ SUM = 0,
+ MEAN = 1,
+ SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+ combiner:CombinerType;
+}
+
+table GatherOptions {
+ axis: int;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table ReducerOptions {
+ keep_dims: bool;
+}
+
+table SqueezeOptions {
+ squeeze_dims:[int];
+}
+
+table SplitOptions {
+ num_splits: int;
+}
+
+table StridedSliceOptions {
+ begin_mask: int;
+ end_mask: int;
+ ellipsis_mask: int;
+ new_axis_mask: int;
+ shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+ in_data_type: TensorType;
+ out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+ output_type : TensorType;
+}
+
+table ArgMinOptions {
+ output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+ validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+ // Optional output type of the operation (int32 or int64). Defaults to int32.
+ out_type : TensorType;
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+ // Parameters supported by version 1:
+ min:float;
+ max:float;
+ num_bits:int;
+
+ // Parameters supported by version 2:
+ narrow_range:bool;
+}
+
+table PackOptions {
+ values_count:int;
+ axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+ axis:int;
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+ num:int;
+ axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+ builtin_code:BuiltinOperator;
+ custom_code:string;
+
+ // The version of the operator. The version need to be bumped whenever new
+ // parameters are introduced into an op.
+ version:int = 1;
+}
+
+enum CustomOptionsFormat : byte {
+ FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+ // Index into the operator_codes array. Using an integer here avoids
+ // complicate map lookups.
+ opcode_index:uint;
+
+ // Optional input and output tensors are indicated by -1.
+ inputs:[int];
+ outputs:[int];
+
+ builtin_options:BuiltinOptions;
+ custom_options:[ubyte];
+ custom_options_format:CustomOptionsFormat;
+
+ // A list of booleans indicating the input tensors which are being mutated by
+ // this operator.(e.g. used by RNN and LSTM).
+ // For example, if the "inputs" array refers to 5 tensors and the second and
+ // fifth are mutable variables, then this list will contain
+ // [false, true, false, false, true].
+ //
+ // If the list is empty, no variable is mutated in this operator.
+ // The list either has the same length as `inputs`, or is empty.
+ mutating_variable_inputs:[bool];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+ // A list of all tensors used in this subgraph.
+ tensors:[Tensor];
+
+ // Indices of the tensors that are inputs into this subgraph. Note this is
+ // the list of non-static tensors that feed into the subgraph for inference.
+ inputs:[int];
+
+ // Indices of the tensors that are outputs out of this subgraph. Note this is
+ // the list of output tensors that are considered the product of the
+ // subgraph's inference.
+ outputs:[int];
+
+ // All operators, in execution order.
+ operators:[Operator];
+
+ // Name of this subgraph (used for debugging).
+ name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+ data:[ubyte] (force_align: 16);
+}
+
+table Model {
+ // Version of the schema.
+ version:uint;
+
+ // A list of all operator codes used in this model. This is
+ // kept in order because operators carry an index into this
+ // vector.
+ operator_codes:[OperatorCode];
+
+ // All the subgraphs of the model. The 0th is assumed to be the main
+ // model.
+ subgraphs:[SubGraph];
+
+ // A description of the model.
+ description:string;
+
+ // Buffers of the model.
+ // Note the 0th entry of this array must be an empty buffer (sentinel).
+ // This is a convention so that tensors without a buffer can provide 0 as
+ // their buffer.
+ buffers:[Buffer];
+
+ // Metadata about the model. Indirects into the existings buffers list.
+ metadata_buffer:[int];
+}
+
+root_type Model;
diff --git a/compiler/enco/frontend/tflite/schema/schema.meta b/compiler/enco/frontend/tflite/schema/schema.meta
new file mode 100644
index 000000000..8cc1f4e62
--- /dev/null
+++ b/compiler/enco/frontend/tflite/schema/schema.meta
@@ -0,0 +1,2 @@
+Commit: 24963954a84a3e85dc8dfe79a15a01dc33fedab4
+URL: https://github.com/tensorflow/tensorflow/blob/2496395/tensorflow/contrib/lite/schema/schema.fbs
diff --git a/compiler/enco/frontend/tflite/src/Context.cpp b/compiler/enco/frontend/tflite/src/Context.cpp
new file mode 100644
index 000000000..ef030dc5d
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Context.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Context.h"
+
+#include "Convert.h"
+
+#include <coco/IR/Data.h>
+#include <coco/IR/Module.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <schema_generated.h>
+
+#include <map>
+#include <sstream>
+
+using namespace nncc::core::ADT;
+
+namespace tflimport
+{
+
+void TensorContext::prepare(const tflite::SubGraph *graph)
+{
+ for (uint32_t tensor_id = 0; tensor_id < graph->tensors()->size(); ++tensor_id)
+ {
+ auto const tensor_info = graph->tensors()->Get(tensor_id);
+ auto const tensor_name = tensor_info->name()->str();
+ auto const tensor_shape = as_tensor_shape(tensor_info->shape());
+ auto const tensor_type = tensor_info->type();
+
+ _name_ctx[tensor_id] = tensor_name;
+ _shape_ctx[tensor_id] = tensor_shape;
+ _type_ctx[tensor_id] = tensor_type;
+ }
+}
+
+TflOpCodeContext::TflOpCodeContext(
+ const flatbuffers::Vector<flatbuffers::Offset<tflite::OperatorCode>> *opcodes)
+{
+ for (const tflite::OperatorCode *opcode : *opcodes)
+ {
+ _opcodes.push_back(opcode);
+ }
+}
+
+tflite::BuiltinOperator TflOpCodeContext::builtin_code(const tflite::Operator *op) const
+{
+ uint32_t index = op->opcode_index();
+ assert(index < _opcodes.size());
+ const tflite::OperatorCode *opcode = _opcodes.at(index);
+ return opcode->builtin_code();
+}
+
+std::string TflOpCodeContext::opcode_name(const tflite::Operator *op) const
+{
+ uint32_t index = op->opcode_index();
+ assert(index < _opcodes.size());
+ const tflite::OperatorCode *opcode = _opcodes.at(index);
+
+ if (!is_valid(opcode))
+ {
+ std::ostringstream oss;
+ oss << "(invalid: " << index << ")";
+ return oss.str();
+ }
+
+ if (is_custom(opcode))
+ {
+ if (!opcode->custom_code())
+ return "(invalid custom)";
+
+ return opcode->custom_code()->c_str();
+ }
+
+ tflite::BuiltinOperator code = opcode->builtin_code();
+ return EnumNameBuiltinOperator(code);
+}
+
+bool TflOpCodeContext::is_valid(const tflite::OperatorCode *opcode)
+{
+ tflite::BuiltinOperator code = opcode->builtin_code();
+ return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX);
+}
+
+bool TflOpCodeContext::is_custom(const tflite::OperatorCode *opcode)
+{
+ tflite::BuiltinOperator code = opcode->builtin_code();
+ return (code == tflite::BuiltinOperator_CUSTOM);
+}
+
+TflBufferContext::TflBufferContext(const tflite::Model *tfl_model)
+{
+ const flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>> *tfl_buffers;
+
+ tfl_buffers = tfl_model->buffers();
+
+ for (uint32_t buffer_id = 0; buffer_id < tfl_buffers->size(); ++buffer_id)
+ {
+ _buffer_ctx[buffer_id] = (*tfl_buffers)[buffer_id];
+ }
+}
+
+} // namespace tflimport
diff --git a/compiler/enco/frontend/tflite/src/Context.h b/compiler/enco/frontend/tflite/src/Context.h
new file mode 100644
index 000000000..f72385f9a
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Context.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONTEXT_H__
+#define __CONTEXT_H__
+
+#include "Convert.h"
+#include "TensorBags.h"
+
+#include <coco/IR/Data.h>
+#include <coco/IR/Module.h>
+
+#include <schema_generated.h>
+
+#include <map>
+
+using namespace nncc::core::ADT;
+
+namespace tflimport
+{
+
+/**
+ * @brief Extracts and holds operand(tensor) information such as name, shape, and type
+ */
+class TensorContext
+{
+public:
+ void prepare(const tflite::SubGraph *graph);
+
+ const std::string &name(uint32_t tensor_id) { return _name_ctx[tensor_id]; }
+ const tensor::Shape &shape(uint32_t tensor_id) { return _shape_ctx[tensor_id]; }
+ const tflite::TensorType &type(uint32_t tensor_id) { return _type_ctx[tensor_id]; }
+
+private:
+ std::map<uint32_t, std::string> _name_ctx;
+ std::map<uint32_t, tensor::Shape> _shape_ctx;
+ std::map<uint32_t, tflite::TensorType> _type_ctx;
+};
+
+/**
+ * @brief Class that holds operator codes and related methods
+ */
+class TflOpCodeContext
+{
+public:
+ TflOpCodeContext(const flatbuffers::Vector<flatbuffers::Offset<tflite::OperatorCode>> *opcodes);
+
+ /**
+ * @brief Returns BuiltinOperator value of the operator
+ */
+ tflite::BuiltinOperator builtin_code(const tflite::Operator *op) const;
+
+ /**
+ * @brief Returns human readable name of the operator code of the operator
+ *
+ * @note TF lite InterpreterBuilder sets an error state and returns error code
+ * for invalid opcode. Here we just return human readable message as
+ * this method returns a name for the operator code.
+ */
+ std::string opcode_name(const tflite::Operator *op) const;
+
+public:
+ static bool is_valid(const tflite::OperatorCode *opcode);
+ static bool is_custom(const tflite::OperatorCode *opcode);
+
+private:
+ std::vector<const tflite::OperatorCode *> _opcodes;
+};
+
+/**
+ * @brief Class to read and provide buffer information of tflite
+ */
+class TflBufferContext
+{
+public:
+ template <typename T> struct TflBuffer
+ {
+ TflBuffer(const T *p, size_t s) : ptr{p}, len{s} {};
+ const T *ptr;
+ size_t len;
+ };
+
+public:
+ explicit TflBufferContext(const tflite::Model *tfl_model);
+
+public:
+ template <typename T>
+ TflBuffer<T> tensor_buffer(const tflite::SubGraph *graph, uint32_t tensor_idx) const
+ {
+ TflBufferContext::TflBuffer<T> res{nullptr, 0};
+ const auto *tensor = graph->tensors()->Get(tensor_idx);
+ uint32_t tfl_buf_id = tensor->buffer();
+
+ assert(_buffer_ctx.size() > tfl_buf_id);
+
+ const tflite::Buffer *tfl_buffer = _buffer_ctx.at(tfl_buf_id);
+
+ if (auto *array = tfl_buffer->data())
+ {
+ if (size_t size = array->size())
+ {
+ assert(size % sizeof(T) == 0);
+
+ res.len = size / sizeof(T);
+ res.ptr = reinterpret_cast<const T *>(array->data());
+ }
+ }
+
+ return res;
+ }
+
+private:
+ std::map<uint32_t /* Buffer ID */, const tflite::Buffer *> _buffer_ctx;
+};
+
+/**
+ * @brief Class to store context to build IR from tflite
+ */
+class GraphBuilderContext
+{
+public:
+ explicit GraphBuilderContext(coco::Module *m, coco::Data *d, coco::Block *block,
+ TensorBags &tensor_bags, TensorContext &tensor_context,
+ TflBufferContext &buffer_context, const tflite::SubGraph *graph)
+ : _m(m), _d(d), _block(block), _tensor_bags(tensor_bags), _tensor_context(tensor_context),
+ _buffer_context(buffer_context), _graph(graph)
+ {
+ // DO NOTHING
+ }
+
+ GraphBuilderContext() = delete;
+ GraphBuilderContext(const GraphBuilderContext &) = delete;
+ GraphBuilderContext(GraphBuilderContext &&) = delete;
+
+public:
+ coco::Module *m() { return _m; }
+ coco::Data *d() { return _d; }
+ coco::Block *block() { return _block; }
+ TensorContext &tensor() { return _tensor_context; }
+ TensorBags &bags() { return _tensor_bags; }
+ TflBufferContext &buffer() { return _buffer_context; }
+ const tflite::SubGraph *graph() { return _graph; }
+
+private:
+ coco::Module *_m;
+ coco::Data *_d;
+ coco::Block *_block;
+ TensorContext &_tensor_context;
+ TensorBags &_tensor_bags;
+ TflBufferContext &_buffer_context;
+ const tflite::SubGraph *_graph;
+};
+
+} // namespace tflimport
+
+#endif // __CONTEXT_H__
diff --git a/compiler/enco/frontend/tflite/src/Convert.cpp b/compiler/enco/frontend/tflite/src/Convert.cpp
new file mode 100644
index 000000000..ffae95d01
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Convert.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Convert.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+
+#include <schema_generated.h>
+
+using namespace nncc::core::ADT;
+
+namespace tflimport
+{
+
+IndexVector as_index_vector(const flatbuffers::Vector<int32_t> *array)
+{
+ const uint32_t size = array->size();
+
+ std::vector<int32_t> res(size);
+
+ for (uint32_t i = 0; i < size; i++)
+ {
+ res[i] = array->Get(i);
+ }
+
+ return res;
+}
+
+tensor::Shape as_tensor_shape(const flatbuffers::Vector<int32_t> *shape)
+{
+ const uint32_t rank = shape->size();
+
+ tensor::Shape res;
+
+ res.resize(rank);
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ res.dim(axis) = shape->Get(axis);
+ }
+
+ return res;
+}
+
+} // namespace tflimport
diff --git a/compiler/enco/frontend/tflite/src/Convert.h b/compiler/enco/frontend/tflite/src/Convert.h
new file mode 100644
index 000000000..fb4c248bf
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Convert.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERT_H__
+#define __CONVERT_H__
+
+#include <nncc/core/ADT/tensor/Shape.h>
+
+#include <schema_generated.h>
+
+using namespace nncc::core::ADT;
+
+namespace tflimport
+{
+
+using IndexVector = std::vector<int32_t>;
+
+/**
+ * @brief Converts flatbuffers::Vector to IndexVector
+ */
+IndexVector as_index_vector(const flatbuffers::Vector<int32_t> *array);
+
+/**
+ * @brief Converts flatbuffers::Vector to nncc::core::ADT::tensor::Shape
+ */
+tensor::Shape as_tensor_shape(const flatbuffers::Vector<int32_t> *shape);
+
+} // namespace tflimport
+
+#endif // __CONVERT_H__
diff --git a/compiler/enco/frontend/tflite/src/Entry.cpp b/compiler/enco/frontend/tflite/src/Entry.cpp
new file mode 100644
index 000000000..c69e18074
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Entry.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Frontend.h"
+#include "RawModelLoader.h"
+
+#include <cmdline/View.h>
+
+#include <stdex/Memory.h>
+
+#include <fstream>
+#include <cassert>
+
+using stdex::make_unique;
+
+extern "C" std::unique_ptr<enco::Frontend> make_frontend(const cmdline::View &cmdline)
+{
+ assert(cmdline.size() == 1); // tflite file name
+
+ auto model = load_from(cmdline.at(0));
+
+ return make_unique<Frontend>(std::move(model));
+}
diff --git a/compiler/enco/frontend/tflite/src/Frontend.cpp b/compiler/enco/frontend/tflite/src/Frontend.cpp
new file mode 100644
index 000000000..c64f181f4
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Frontend.cpp
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Frontend.h"
+#include "Context.h"
+#include "Convert.h"
+#include "TensorBags.h"
+#include "GraphBuilderRegistry.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Shape.h>
+
+#include <iostream>
+
+using namespace nncc::core::ADT;
+
+namespace tflimport
+{
+
+/**
+ * @brief Set module input operands and its information
+ */
+void set_module_inputs(coco::Module *m, TensorContext &ctx, TensorBags &bags,
+ const IndexVector &inputs)
+{
+ for (uint32_t n = 0; n < inputs.size(); ++n)
+ {
+ auto const tensor_id = inputs.at(n);
+
+ auto const tensor_name = ctx.name(tensor_id);
+ auto const tensor_shape = ctx.shape(tensor_id);
+ auto const tensor_bag = bags.bag(tensor_id);
+
+ auto input = m->entity()->input()->create(tensor_shape);
+
+ input->name(tensor_name);
+ input->bag(tensor_bag);
+ input->reorder<tensor::LexicalLayout>();
+
+ m->input()->insert(input);
+ }
+}
+
+/**
+ * @brief Set module output operands and its information
+ */
+void set_module_outputs(coco::Module *m, TensorContext &ctx, TensorBags &bags,
+ const IndexVector &outputs)
+{
+ for (uint32_t n = 0; n < outputs.size(); ++n)
+ {
+ auto const tensor_id = outputs.at(n);
+
+ auto const tensor_name = ctx.name(tensor_id);
+ auto const tensor_shape = ctx.shape(tensor_id);
+ auto const tensor_bag = bags.bag(tensor_id);
+
+ auto output = m->entity()->output()->create(tensor_shape);
+
+ output->name(tensor_name);
+ output->bag(tensor_bag);
+ output->reorder<tensor::LexicalLayout>();
+
+ m->output()->insert(output);
+ }
+}
+
+/**
+ * @brief Copy values of tfl tensors into coco::Data if the data was not copied
+ */
+void copy_tensors(GraphBuilderContext *ctx)
+{
+ auto d = ctx->d();
+
+ // for each bag, check if bag is not allocated but tflite tensor has values
+ for (auto &iter : ctx->bags())
+ {
+ auto tfl_tensor_id = iter.first;
+ auto bag = iter.second;
+
+ auto tfl_buffer = ctx->buffer().tensor_buffer<float>(ctx->graph(), tfl_tensor_id);
+
+ // TODO remove this line when support int32 is ready
+ if (ctx->tensor().type(tfl_tensor_id) == tflite::TensorType::TensorType_INT32)
+ {
+ std::cout << "*** INT32 COPYING IS NOT SUPPORTED ***" << std::endl;
+ continue;
+ }
+
+ assert(ctx->tensor().type(tfl_tensor_id) == tflite::TensorType::TensorType_FLOAT32);
+
+ auto span = d->f32()->weight(bag); // TODO support other type
+
+ if (!(span.data() == nullptr && span.size() == 0)) // already allocated
+ continue;
+
+ if (tfl_buffer.ptr == nullptr || tfl_buffer.len == 0) // no data to copy
+ continue;
+
+ d->f32()->allocate(bag);
+
+ auto ifm_span = d->f32()->weight(bag);
+ for (uint32_t idx = 0; idx < tfl_buffer.len; ++idx)
+ {
+ ifm_span[idx] = tfl_buffer.ptr[idx];
+ }
+ }
+}
+
+} // namespace tflimport
+
+Frontend::Frontend(std::unique_ptr<RawModel> &&raw) : _raw{std::move(raw)}
+{
+ // DO NOTHING
+}
+
+enco::Bundle Frontend::load(void) const
+{
+ auto model = _raw->model();
+
+ assert(model->version() == 3);
+ assert(model->subgraphs()->size() == 1);
+
+ auto graph = model->subgraphs()->Get(0);
+
+ auto m = coco::Module::create();
+ auto d = coco::Data::create();
+
+ tflimport::TensorContext tensor_context;
+ tflimport::TensorBags tensor_bags;
+
+ tensor_context.prepare(graph);
+ tensor_bags.prepare(graph, m);
+
+ auto inputs = tflimport::as_index_vector(graph->inputs());
+ auto outputs = tflimport::as_index_vector(graph->outputs());
+
+ tflimport::set_module_inputs(m.get(), tensor_context, tensor_bags, inputs);
+ tflimport::set_module_outputs(m.get(), tensor_context, tensor_bags, outputs);
+
+ auto blk = m->entity()->block()->create();
+ m->block()->append(blk);
+
+ auto opcodes = model->operator_codes();
+
+ tflimport::TflBufferContext buffer_context(model);
+ tflimport::TflOpCodeContext opcode_context(opcodes);
+
+ auto operators = graph->operators();
+
+ tflimport::GraphBuilderContext opbuilder_context(m.get(), d.get(), blk, tensor_bags,
+ tensor_context, buffer_context, graph);
+
+ for (int i = 0; i < operators->Length(); ++i)
+ {
+ const auto *op = operators->Get(i);
+ tflite::BuiltinOperator builtincode = opcode_context.builtin_code(op);
+
+ if (const auto *graph_builder = tflimport::GraphBuilderRegistry::get().lookup(builtincode))
+ {
+ if (!graph_builder->validate(op))
+ {
+ throw std::runtime_error{"Invalid operator"};
+ }
+
+ graph_builder->build(op, &opbuilder_context);
+ }
+ else
+ {
+ std::string opcodename = opcode_context.opcode_name(op);
+ throw std::runtime_error{"Not supported: " + opcodename};
+ }
+
+ // copying unfilled tensor value
+ copy_tensors(&opbuilder_context);
+ }
+
+ // Create "Bundle"
+ enco::Bundle bundle;
+
+ bundle.module(std::move(m));
+ bundle.data(std::move(d));
+
+ return std::move(bundle);
+}
diff --git a/compiler/enco/frontend/tflite/src/Frontend.h b/compiler/enco/frontend/tflite/src/Frontend.h
new file mode 100644
index 000000000..bb0c9cd2c
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Frontend.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FRONTEND_H__
+#define __FRONTEND_H__
+
+#include "RawModel.h"
+
+#include <enco/Frontend.h>
+
+#include <flatbuffers/flatbuffers.h>
+
+#include <memory>
+
+class Frontend final : public enco::Frontend
+{
+public:
+ Frontend(std::unique_ptr<RawModel> &&raw);
+
+public:
+ enco::Bundle load(void) const override;
+
+private:
+ std::unique_ptr<RawModel> _raw;
+};
+
+#endif // __FRONTEND_H__
diff --git a/compiler/enco/frontend/tflite/src/Frontend.test.cpp b/compiler/enco/frontend/tflite/src/Frontend.test.cpp
new file mode 100644
index 000000000..aee6099e7
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Frontend.test.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Frontend.h"
+
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+using stdex::make_unique;
+
+namespace
+{
+
+struct MockRawModel final : public RawModel
+{
+ const tflite::Model *model(void) const override { return nullptr; }
+};
+
+} // namespace
+
+TEST(FrontendTest, constructor)
+{
+ // Let's test whether Frontend is actually constructible.
+ auto frontend = make_unique<Frontend>(make_unique<MockRawModel>());
+
+ ASSERT_NE(frontend, nullptr);
+}
diff --git a/compiler/enco/frontend/tflite/src/GraphBuilder.h b/compiler/enco/frontend/tflite/src/GraphBuilder.h
new file mode 100644
index 000000000..f2cb57848
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/GraphBuilder.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __GRAPH_BUILDER_H__
+#define __GRAPH_BUILDER_H__
+
+#include "Context.h"
+
+#include <schema_generated.h>
+
+namespace tflimport
+{
+
+/**
+ * @brief Parent class of tflite operation graph builders (e.g., Conv2DGraphBuilder)
+ */
+class GraphBuilder
+{
+public:
+ /**
+ * TODO Declare "validate" method as a pure virtual method
+ *
+ * Q: Is it possible to validate T/F Lite model only with this interface?
+ */
+ virtual bool validate(const tflite::Operator *) const { return true; }
+
+ virtual void build(const tflite::Operator *op, GraphBuilderContext *context) const = 0;
+ virtual ~GraphBuilder() {}
+};
+
+} // namespace tflimport
+
+#endif // __GRAPH_BUILDER_H__
diff --git a/compiler/enco/frontend/tflite/src/GraphBuilderRegistry.h b/compiler/enco/frontend/tflite/src/GraphBuilderRegistry.h
new file mode 100644
index 000000000..1ae882e89
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/GraphBuilderRegistry.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __GRAPH_BUILDER_REGISTRY_H__
+#define __GRAPH_BUILDER_REGISTRY_H__
+
+#include "Op/Conv2D.h"
+#include "Op/DepthwiseConv2D.h"
+#include "Op/AveragePool2D.h"
+#include "Op/MaxPool2D.h"
+#include "Op/Concatenation.h"
+#include "Op/ReLU.h"
+#include "Op/ReLU6.h"
+#include "Op/Reshape.h"
+#include "Op/Sub.h"
+#include "Op/Div.h"
+
+#include <schema_generated.h>
+#include <stdex/Memory.h>
+
+#include <map>
+
+using stdex::make_unique;
+
+namespace tflimport
+{
+
+/**
+ * @brief Class to return graph builder for passed tflite::builtinOperator
+ */
+class GraphBuilderRegistry
+{
+public:
+ /**
+ * @brief Returns registered GraphBuilder pointer for BuiltinOperator or
+ * nullptr if not registered
+ */
+ const GraphBuilder *lookup(tflite::BuiltinOperator op) const
+ {
+ if (_builder_map.find(op) == _builder_map.end())
+ return nullptr;
+
+ return _builder_map.at(op).get();
+ }
+
+ static GraphBuilderRegistry &get()
+ {
+ static GraphBuilderRegistry me;
+ return me;
+ }
+
+private:
+ GraphBuilderRegistry()
+ {
+ // add GraphBuilder for each tflite operation.
+ _builder_map[tflite::BuiltinOperator_CONV_2D] = make_unique<Conv2DGraphBuilder>();
+ _builder_map[tflite::BuiltinOperator_DEPTHWISE_CONV_2D] =
+ make_unique<DepthwiseConv2DGraphBuilder>();
+ _builder_map[tflite::BuiltinOperator_AVERAGE_POOL_2D] = make_unique<AvgPool2DGraphBuilder>();
+ _builder_map[tflite::BuiltinOperator_MAX_POOL_2D] = make_unique<MaxPool2DGraphBuilder>();
+ _builder_map[tflite::BuiltinOperator_CONCATENATION] = make_unique<ConcatenationGraphBuilder>();
+ _builder_map[tflite::BuiltinOperator_RELU] = make_unique<ReLUGraphBuilder>();
+ _builder_map[tflite::BuiltinOperator_RELU6] = make_unique<ReLU6GraphBuilder>();
+ _builder_map[tflite::BuiltinOperator_RESHAPE] = make_unique<ReshapeGraphBuilder>();
+ _builder_map[tflite::BuiltinOperator_SUB] = make_unique<SubGraphBuilder>();
+ _builder_map[tflite::BuiltinOperator_DIV] = make_unique<DivGraphBuilder>();
+ }
+
+private:
+ std::map<tflite::BuiltinOperator, std::unique_ptr<GraphBuilder>> _builder_map;
+};
+
+} // namespace tflimport
+
+#endif // __GRAPH_BUILDER_REGISTRY_H__
diff --git a/compiler/enco/frontend/tflite/src/IRBuilder.h b/compiler/enco/frontend/tflite/src/IRBuilder.h
new file mode 100644
index 000000000..edfe247e1
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/IRBuilder.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file IRBuilder.h
+ * @brief coco IR builders. This is code is copied from enco caffe frontend.
+ */
+#ifndef __IR_BUILDER_H__
+#define __IR_BUILDER_H__
+
+#include "coco/IR/Module.h"
+
+#include <deque>
+
+using namespace nncc::core::ADT;
+
+class OpBuilder
+{
+public:
+ OpBuilder(coco::Module *module) : _module{module}
+ {
+ // module SHOULD BE valid
+ assert(_module != nullptr);
+ }
+
+public:
+ /**
+ * @brief Return true if the internal stack is empty
+ */
+ bool empty(void) const { return _stack.empty(); }
+
+ /**
+ * @brief Return the operation at the top of the internal stack
+ */
+ coco::Op *top(void) const
+ {
+ assert(_stack.size() > 0);
+ return _stack.front();
+ }
+
+ /**
+ * @brief Push op onto the internal stack
+ *
+ * BEFORE| Stack
+ * AFTER | Op; Stack
+ */
+ OpBuilder &push(coco::Op *op)
+ {
+ _stack.push_front(op);
+ return (*this);
+ }
+
+ /**
+ * @brief Create "Load" op and push it onto the internal stack
+ *
+ * BEFORE| Stack
+ * AFTER | Load(obj); Stack
+ */
+ OpBuilder &load(coco::Object *obj)
+ {
+ auto op = _module->entity()->op()->create<coco::Load>();
+ op->object(obj);
+ push(op);
+ return (*this);
+ }
+
+ /**
+ * @brief Create "Add" op and push it onto the internal stack
+ *
+ * BEFORE| Left; Right; Stack
+ * AFTER | Add(Left, Right); Stack
+ */
+ OpBuilder &add(void) { return binary<coco::Add>(); }
+
+ /**
+ * @brief Create "Mul" op and push it onto the internal stack
+ *
+ * BEFORE| Left; Right; Stack
+ * AFTER | Mul(Left, Right); Stack
+ */
+ OpBuilder &mul(void) { return binary<coco::Mul>(); }
+
+ /**
+ * @brief Pop op from the internal stack
+ *
+ * BEFORE| Op; Stack
+ * AFTER | Stack
+ */
+ coco::Op *pop(void)
+ {
+ assert(_stack.size() > 0);
+ auto op = _stack.front();
+ _stack.pop_front();
+ return op;
+ }
+
+private:
+ template <typename ConcreteOp> OpBuilder &binary()
+ {
+ assert(_stack.size() >= 2);
+ auto left = pop();
+ auto right = pop();
+
+ auto op = _module->entity()->op()->create<ConcreteOp>();
+ op->left(left);
+ op->right(right);
+ push(op);
+
+ return (*this);
+ }
+
+private:
+ coco::Module *_module;
+ std::deque<coco::Op *> _stack;
+};
+
+inline OpBuilder op_builder(coco::Module *m) { return OpBuilder{m}; }
+inline OpBuilder op_builder(const std::unique_ptr<coco::Module> &m) { return op_builder(m.get()); }
+
+class InstrBuilder
+{
+public:
+ InstrBuilder(coco::Module *module) : _module{module}
+ {
+ // NOTE _module SHOULD be valid
+ assert(_module != nullptr);
+ }
+
+public:
+ /**
+ * @brief Create "Eval" instruction with a given "Object" and "Op"
+ *
+ * @note "eval(out, op)" will create "%out <- Eval(op)" instruction
+ */
+ coco::Eval *eval(coco::Object *out, coco::Op *op) const
+ {
+ auto ins = _module->entity()->instr()->create<coco::Eval>();
+ ins->op(op);
+ ins->out(out);
+ return ins;
+ }
+
+ /**
+ * @brief Create "Copy" instruction with given two "Object"
+ *
+ * @note "copy(into, from)" will create "%into <- Copy(%from)" instruction
+ */
+ coco::Copy *copy(coco::Object *into, coco::Object *from) const
+ {
+ auto ins = _module->entity()->instr()->create<coco::Copy>();
+ ins->from(from);
+ ins->into(into);
+ return ins;
+ }
+
+private:
+ coco::Module *_module;
+};
+
+using ModuleHandle = std::unique_ptr<coco::Module>;
+
+inline InstrBuilder instr_builder(coco::Module *m) { return InstrBuilder{m}; }
+inline InstrBuilder instr_builder(const ModuleHandle &m) { return instr_builder(m.get()); }
+
+#endif // __IR_BUILDER_H__
diff --git a/compiler/enco/frontend/tflite/src/Op/Activation.cpp b/compiler/enco/frontend/tflite/src/Op/Activation.cpp
new file mode 100644
index 000000000..d6215ba34
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/Activation.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Activation.h"
+
+#include <IRBuilder.h>
+
+#include <coco/IR/Module.h>
+#include <coco/IR/FeatureLayouts.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <schema_generated.h>
+
+#include <cassert>
+
+using namespace nncc::core::ADT;
+
+namespace tflimport
+{
+
+coco::FeatureObject *build_activation(tflite::ActivationFunctionType act, coco::Block *block,
+ coco::FeatureObject *ifm)
+{
+ assert(ifm != nullptr && ifm->asFeature() != nullptr); // support feature only in this version
+
+ coco::Module *m = block->module();
+
+ auto shape = ifm->asFeature()->shape();
+
+ // creates output object
+ auto output_obj = m->entity()->object()->create<coco::FeatureObject>();
+ auto output_bag = m->entity()->bag()->create(num_elements(shape));
+ output_obj->bag(output_bag);
+ output_obj->layout(coco::FeatureLayouts::BHWC::create(shape));
+
+ switch (act)
+ {
+ case tflite::ActivationFunctionType::ActivationFunctionType_NONE:
+ {
+ // Create Copy Instr (copying from ifm to output_obj),
+ // redundant layer but optimized by backend
+ auto copy_ins = instr_builder(m).copy(output_obj, ifm);
+
+ // Append the instruction to the block
+ block->instr()->append(copy_ins);
+ break;
+ }
+ case tflite::ActivationFunctionType::ActivationFunctionType_RELU:
+ {
+ // Create Eval(output_obj, ReLU(load(ifm)))
+ auto load_op = op_builder(m).load(ifm).pop();
+ auto relu_op = m->entity()->op()->create<coco::ReLU>();
+ relu_op->arg(load_op);
+
+ auto eval_ins = instr_builder(m).eval(output_obj, relu_op);
+
+ // Append the instruction to the block
+ block->instr()->append(eval_ins);
+ break;
+ }
+ case tflite::ActivationFunctionType::ActivationFunctionType_RELU6:
+ {
+ // Create Eval(output_obj, ReLU6(load(ifm)))
+ auto load_op = op_builder(m).load(ifm).pop();
+ auto relu6_op = m->entity()->op()->create<coco::ReLU6>();
+ relu6_op->arg(load_op);
+
+ auto eval_ins = instr_builder(m).eval(output_obj, relu6_op);
+
+ // Append the instruction to the block
+ block->instr()->append(eval_ins);
+ break;
+ }
+ default:
+ // TODO support other fused activations
+ assert(false);
+ break;
+ }
+
+ return output_obj;
+}
+
+} // namespace tflimport
diff --git a/compiler/enco/frontend/tflite/src/Op/Activation.h b/compiler/enco/frontend/tflite/src/Op/Activation.h
new file mode 100644
index 000000000..05306dd41
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/Activation.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_ACTIVATION_H__
+#define __OP_ACTIVATION_H__
+
+#include <coco/IR/Block.h>
+#include <coco/IR/FeatureObject.h>
+
+#include <schema_generated.h>
+
+namespace tflimport
+{
+
+/**
+ * @brief Add coco::Eval for fused activation.
+ * This method creates an ofm object, appends Eval(ofm object, RELU(...)) into block,
+ * and returns ofm object.
+ */
+coco::FeatureObject *build_activation(tflite::ActivationFunctionType act, coco::Block *block,
+ coco::FeatureObject *ifm);
+} // namespace tflimport
+
+#endif // __OP_ACTIVATION_H__
diff --git a/compiler/enco/frontend/tflite/src/Op/AveragePool2D.cpp b/compiler/enco/frontend/tflite/src/Op/AveragePool2D.cpp
new file mode 100644
index 000000000..16f68fcdb
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/AveragePool2D.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AveragePool2D.h"
+
+#include "Convert.h"
+#include "IRBuilder.h"
+#include "GraphBuilder.h"
+#include "Padding.h"
+#include "Activation.h"
+
+#include <morph/tflite.h>
+#include <coco/IR/Module.h>
+#include <coco/IR/FeatureLayouts.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <schema_generated.h>
+
+#include <cassert>
+
+using namespace nncc::core::ADT;
+using namespace morph::tflite;
+
+namespace tflimport
+{
+
+bool AvgPool2DGraphBuilder::validate(const tflite::Operator *op) const
+{
+ auto const options = op->builtin_options_as_Pool2DOptions();
+
+ if ((options->stride_h() == 0) || (options->stride_w() == 0))
+ {
+ return false;
+ }
+
+ return true;
+}
+
+void AvgPool2DGraphBuilder::build(const tflite::Operator *op, GraphBuilderContext *context) const
+{
+ assert(context != nullptr); // check if init(..) is called
+
+ coco::Module *m = context->m();
+ coco::Block *blk = context->block();
+ TensorContext &tensor_context = context->tensor();
+ TensorBags &bags = context->bags();
+
+ IndexVector opinputs = as_index_vector(op->inputs());
+ IndexVector opoutputs = as_index_vector(op->outputs());
+
+ // these are fixed in tflite
+ // input index 0 : input feature
+ // output index 0 : output feature
+ assert(opinputs.size() == 1);
+ assert(opoutputs.size() == 1);
+
+ int ifm_idx = opinputs.at(0);
+ int ofm_idx = opoutputs.at(0);
+
+ const tensor::Shape &ifm_shape = tensor_context.shape(ifm_idx);
+ const tensor::Shape &ofm_shape = tensor_context.shape(ofm_idx);
+
+ // Create an object for an input feature map
+ coco::FeatureObject *ifm_obj = m->entity()->object()->create<coco::FeatureObject>();
+ coco::Bag *ifm_bag = bags.bag(ifm_idx);
+ ifm_obj->bag(ifm_bag);
+ ifm_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(ifm_shape)));
+
+ // Create an object for an output feature map
+ coco::FeatureObject *ofm_obj = m->entity()->object()->create<coco::FeatureObject>();
+ coco::Bag *ofm_bag = bags.bag(ofm_idx);
+ ofm_obj->bag(ofm_bag);
+ ofm_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(ofm_shape)));
+
+ // Create a Load op
+ auto coco_load = op_builder(m).load(ifm_obj).pop();
+
+ // Create a AvgPool2D
+ auto coco_avgpool2d = m->entity()->op()->create<coco::AvgPool2D>();
+ auto *params = op->builtin_options_as_Pool2DOptions();
+
+ // NOTE For Tensorflow lite, PaddingExcluded is needed
+ coco_avgpool2d->divisor(coco::AvgPool2D::Divisor::PaddingExcluded);
+
+ coco_avgpool2d->window()->height(params->filter_height());
+ coco_avgpool2d->window()->width(params->filter_width());
+
+ coco_avgpool2d->stride()->vertical(params->stride_h());
+ coco_avgpool2d->stride()->horizontal(params->stride_w());
+
+ coco::Padding2D padding =
+ pool2D_padding(params, ifm_shape, params->filter_width(), params->filter_height());
+
+ coco_avgpool2d->pad()->top(padding.top());
+ coco_avgpool2d->pad()->bottom(padding.bottom());
+ coco_avgpool2d->pad()->left(padding.left());
+ coco_avgpool2d->pad()->right(padding.right());
+
+ // Link ops
+ coco_avgpool2d->arg(coco_load);
+
+ // Create an Eval instruction
+ auto ins = instr_builder(m).eval(ofm_obj, coco_avgpool2d);
+
+ // Append the instruction to the block
+ blk->instr()->append(ins);
+
+ // TODO activation, e.g., relu
+ assert(params->fused_activation_function() ==
+ tflite::ActivationFunctionType::ActivationFunctionType_NONE);
+}
+
+} // namespace tflimport
diff --git a/compiler/enco/frontend/tflite/src/Op/AveragePool2D.h b/compiler/enco/frontend/tflite/src/Op/AveragePool2D.h
new file mode 100644
index 000000000..3e37e3cad
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/AveragePool2D.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_AVERAGEPOOL2D_H__
+#define __OP_AVERAGEPOOL2D_H__
+
+#include "GraphBuilder.h"
+
+#include <schema_generated.h>
+
+namespace tflimport
+{
+
+/**
+ * @brief GraphBuilder for AvgPool2D operator
+ */
+class AvgPool2DGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const tflite::Operator *op) const override;
+ void build(const tflite::Operator *op, GraphBuilderContext *) const override;
+};
+
+} // namespace tflimport
+
+#endif // __OP_AVERAGEPOOL2D_H__
diff --git a/compiler/enco/frontend/tflite/src/Op/Concatenation.cpp b/compiler/enco/frontend/tflite/src/Op/Concatenation.cpp
new file mode 100644
index 000000000..ce0f47b21
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/Concatenation.cpp
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Concatenation.h"
+
+#include "IRBuilder.h"
+#include "GraphBuilder.h"
+
+#include <coco/IR/Module.h>
+#include <coco/IR/FeatureLayouts.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <schema_generated.h>
+
+#include <array>
+#include <cassert>
+
+using namespace nncc::core::ADT;
+
+namespace
+{
+
+/**
+ * @brief Convert a numeric tensor axis as a ConcatF FeatureAxis value
+ */
+coco::ConcatF::Axis as_ConcatF_axis(uint32_t axis)
+{
+ // NOTE The feature map (in TensorFlow) is a rank-4 (NHWC) tensor
+ assert(axis < 4);
+
+ coco::ConcatF::Axis res = coco::ConcatF::Axis::Unknown;
+
+ switch (axis)
+ {
+ case 0:
+ res = coco::ConcatF::Axis::Batch;
+ break;
+ case 1:
+ res = coco::ConcatF::Axis::Height;
+ break;
+ case 2:
+ res = coco::ConcatF::Axis::Width;
+ break;
+ case 3:
+ res = coco::ConcatF::Axis::Depth;
+ break;
+ default:
+ break;
+ }
+
+ return res;
+}
+
+/**
+ * @brief Convert a coco FeatureShape as an array of 'uint32_t' values
+ */
+std::array<uint32_t, 4> as_dims(const coco::FeatureShape &shape)
+{
+ std::array<uint32_t, 4> res;
+
+ res[0] = shape.batch();
+ res[1] = shape.height();
+ res[2] = shape.width();
+ res[3] = shape.depth();
+
+ return res;
+}
+
+/**
+ * @brief Convert a tensor shape as a coco FeatureShape
+ */
+coco::FeatureShape as_feature_shape(const tensor::Shape &shape)
+{
+ assert(shape.rank() == 4);
+
+ auto const B = shape.dim(0);
+ auto const C = shape.dim(3);
+ auto const H = shape.dim(1);
+ auto const W = shape.dim(2);
+
+ return coco::FeatureShape{B, C, H, W};
+}
+
+} // namespace
+
+namespace tflimport
+{
+
+void ConcatenationGraphBuilder::build(const tflite::Operator *op,
+ GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ coco::Module *m = context->m();
+ coco::Data *d = context->d();
+ coco::Block *blk = context->block();
+ TensorContext &tensor_context = context->tensor();
+ TensorBags &bags = context->bags();
+ IndexVector opinputs = as_index_vector(op->inputs());
+ IndexVector opoutputs = as_index_vector(op->outputs());
+
+ // these are fixed in tflite
+ // input index 0 ~ N : any number of input features
+ // output index 0 : one output feature
+ assert(opinputs.size() > 0);
+ assert(opoutputs.size() == 1);
+
+ // Default parameter values are referenced from schema_generated.h
+ int32_t concat_axis = 0;
+ tflite::ActivationFunctionType activation = tflite::ActivationFunctionType_NONE;
+
+ if (auto *concatenation_params = op->builtin_options_as_ConcatenationOptions())
+ {
+ activation = concatenation_params->fused_activation_function();
+ concat_axis = concatenation_params->axis();
+
+ const int32_t rank = static_cast<int32_t>(tensor_context.shape(opinputs.at(0)).rank());
+ if (concat_axis < 0)
+ {
+ concat_axis += rank;
+ }
+ assert(concat_axis >= 0);
+ assert(concat_axis < rank);
+ }
+ assert(as_ConcatF_axis(concat_axis) != coco::ConcatF::Axis::Unknown);
+ assert(activation == tflite::ActivationFunctionType_NONE);
+
+ // Construct a vector of input objects
+ std::vector<coco::FeatureObject *> input_objects;
+
+ for (auto &input_index : opinputs)
+ {
+ const tensor::Shape &input_shape = tensor_context.shape(input_index);
+ coco::FeatureObject *input_obj = m->entity()->object()->create<coco::FeatureObject>();
+ coco::Bag *input_bag = bags.bag(input_index);
+ input_obj->bag(input_bag);
+ input_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(input_shape)));
+
+ input_objects.emplace_back(input_obj);
+ }
+
+ coco::FeatureObject *last_feature = input_objects.at(0);
+
+ assert(last_feature != nullptr);
+ assert(last_feature->bag() != nullptr);
+
+ // Update coco IR
+ //
+ // Given a sequence of input features %in[0] / %in[1] / ... / %in[N]
+ // the below code constructs a sequence of eval instructions
+ // - Load is omitted for simplicity
+ //
+ // %tmp = eval(ConcatF(%in[0], %in[1]))
+ // %tmp = eval(ConcatF(%tmp, %in[2]))
+ // ...
+ // %tmp = eval(ConcatF(%tmp, %in[N]))
+ // %out[0] = copy(%tmp)
+ //
+ for (uint32_t n = 1; n < input_objects.size(); ++n)
+ {
+ auto const left_feature = last_feature;
+ auto const left_shape = left_feature->layout()->shape();
+
+ auto right_feature = input_objects.at(n);
+ auto right_shape = right_feature->layout()->shape();
+
+ // Compute output dimensionalities
+ auto compute_out_dims = [&left_shape, &right_shape, concat_axis](void) {
+ std::array<uint32_t, 4> out_dims;
+
+ const auto left_dims = as_dims(left_shape);
+ const auto right_dims = as_dims(right_shape);
+
+ for (uint32_t axis = 0; axis < 4 /* FEATURE MAP RANK */; ++axis)
+ {
+ // The dimensionality of all the axises except 'concat' axis SHOULD BE INDETICAL
+ assert((concat_axis == axis) || (left_dims[axis] == right_dims[axis]));
+
+ out_dims[axis] = left_dims[axis];
+ if (axis == concat_axis)
+ {
+ out_dims[axis] += right_dims[axis];
+ }
+ }
+
+ return out_dims;
+ };
+
+ const auto out_dims = compute_out_dims();
+
+ const uint32_t B = out_dims[0 /* BATCH */];
+ const uint32_t C = out_dims[3 /* DEPTH */];
+ const uint32_t H = out_dims[1 /* HEIGHT */];
+ const uint32_t W = out_dims[2 /* WIDTH */];
+
+ const coco::FeatureShape out_shape{B, C, H, W};
+
+ auto out_bag = m->entity()->bag()->create(B * num_elements(out_shape));
+ auto out_feature = m->entity()->object()->create<coco::FeatureObject>();
+
+ out_feature->bag(out_bag);
+ out_feature->layout(coco::FeatureLayouts::BHWC::create(out_shape));
+
+ auto left_load = op_builder(m).load(left_feature).pop();
+ auto right_load = op_builder(m).load(right_feature).pop();
+
+ auto concat_f = m->entity()->op()->create<coco::ConcatF>();
+
+ concat_f->axis(as_ConcatF_axis(concat_axis));
+ concat_f->left(left_load);
+ concat_f->right(right_load);
+
+ auto eval = instr_builder(m).eval(out_feature, concat_f);
+
+ // Append the constructed Shuffle instruction
+ blk->instr()->append(eval);
+
+ // Update 'last_feature'
+ last_feature = out_feature;
+ }
+
+ // Insert copy instruction from last_feature to output operand
+ int const ofm_idx = opoutputs.at(0);
+ auto const ofm_shape = tensor_context.shape(ofm_idx);
+
+ auto ofm_bag = bags.bag(ofm_idx);
+ auto ofm_obj = m->entity()->object()->create<coco::FeatureObject>();
+
+ ofm_obj->bag(ofm_bag);
+ ofm_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(ofm_shape)));
+
+ // Create a Copy instruction from last into ofm
+ auto copy_ins = instr_builder(m).copy(ofm_obj, last_feature);
+
+ // Append the instruction
+ blk->instr()->append(copy_ins);
+}
+
+} // namespace tflimport
diff --git a/compiler/enco/frontend/tflite/src/Op/Concatenation.h b/compiler/enco/frontend/tflite/src/Op/Concatenation.h
new file mode 100644
index 000000000..eb7625a85
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/Concatenation.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_CONCATENATION_H__
+#define __OP_CONCATENATION_H__
+
+#include "GraphBuilder.h"
+
+#include <schema_generated.h>
+
+namespace tflimport
+{
+
+/**
+ * @brief GraphBuilder for Concatenation operator
+ */
+class ConcatenationGraphBuilder : public GraphBuilder
+{
+public:
+ void build(const tflite::Operator *op, GraphBuilderContext *) const override;
+};
+
+} // namespace tflimport
+
+#endif // __OP_CONCATENATION_H__
diff --git a/compiler/enco/frontend/tflite/src/Op/Conv2D.cpp b/compiler/enco/frontend/tflite/src/Op/Conv2D.cpp
new file mode 100644
index 000000000..e9516c0e9
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/Conv2D.cpp
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Conv2D.h"
+
+#include "Convert.h"
+#include "IRBuilder.h"
+#include "GraphBuilder.h"
+#include "Padding.h"
+#include "Activation.h"
+
+#include <morph/tflite.h>
+#include <coco/IR/Module.h>
+#include <coco/IR/KernelLayouts.h>
+#include <coco/IR/FeatureLayouts.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <schema_generated.h>
+
+#include <cassert>
+
+using namespace nncc::core::ADT;
+using namespace morph::tflite;
+
+namespace tflimport
+{
+
+bool Conv2DGraphBuilder::validate(const tflite::Operator *op) const
+{
+ auto const options = op->builtin_options_as_Conv2DOptions();
+
+ if ((options->stride_h() == 0) || (options->stride_w() == 0))
+ {
+ return false;
+ }
+
+ return true;
+}
+
+void Conv2DGraphBuilder::build(const tflite::Operator *op, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ // preparation
+ coco::Module *m = context->m();
+ coco::Data *d = context->d();
+ coco::Block *blk = context->block();
+ TensorContext &tensor_context = context->tensor();
+ TensorBags &bags = context->bags();
+ TflBufferContext &buffer_context = context->buffer();
+ const tflite::SubGraph *graph = context->graph();
+ IndexVector opinputs = as_index_vector(op->inputs());
+ IndexVector opoutputs = as_index_vector(op->outputs());
+
+ // these are fixed in tflite
+ // input index 0 : input feature
+ // input index 1 : kernel
+ // input index 2 : bias (optional)
+ bool hasBias = (opinputs.size() == 3);
+ assert(opinputs.size() == 2 || hasBias);
+ assert(opoutputs.size() == 1);
+
+ int ifm_idx = opinputs.at(0);
+ int ker_idx = opinputs.at(1);
+ int ofm_idx = opoutputs.at(0);
+
+ const tensor::Shape &ifm_shape = tensor_context.shape(ifm_idx);
+ const tensor::Shape &ofm_shape = tensor_context.shape(ofm_idx);
+ const tensor::Shape &ker_shape = tensor_context.shape(ker_idx);
+
+ // Create an input feature map object
+ auto *ifm_obj = m->entity()->object()->create<coco::FeatureObject>();
+ auto *ifm_bag = bags.bag(ifm_idx);
+ ifm_obj->bag(ifm_bag);
+ ifm_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(ifm_shape)));
+
+ // Create an an output feature map object
+ auto *ofm_obj = m->entity()->object()->create<coco::FeatureObject>();
+ auto *ofm_bag = bags.bag(ofm_idx);
+ ofm_obj->bag(ofm_bag);
+ ofm_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(ofm_shape)));
+
+ // Create an kernel object
+ auto *ker_obj = m->entity()->object()->create<coco::KernelObject>();
+ auto *ker_bag = bags.bag(ker_idx);
+ ker_obj->bag(ker_bag);
+ ker_obj->layout(coco::KernelLayouts::NHWC::create(as_kernel_shape(ker_shape)));
+
+ // Create a Load op
+ auto load = op_builder(m).load(ifm_obj).pop();
+
+ // Create a Conv2D op
+ auto coco_conv2d = m->entity()->op()->create<coco::Conv2D>();
+
+ // populating Conv2D objects and options such as stride and padding
+ coco_conv2d->ker(ker_obj);
+
+ auto *conv_params = op->builtin_options_as_Conv2DOptions();
+
+ coco_conv2d->stride()->vertical(conv_params->stride_h());
+ coco_conv2d->stride()->horizontal(conv_params->stride_w());
+
+ // conv_params->padding() to left, top, right, bottom
+ coco::Padding2D padding = conv2D_padding(conv_params, ifm_shape, ker_shape);
+
+ coco_conv2d->pad()->top(padding.top());
+ coco_conv2d->pad()->bottom(padding.bottom());
+ coco_conv2d->pad()->left(padding.left());
+ coco_conv2d->pad()->right(padding.right());
+
+ // Link ops
+ coco_conv2d->arg(load);
+
+ // Object to store Conv2D output
+ auto *conv2d_obj = m->entity()->object()->create<coco::FeatureObject>();
+ auto *conv2d_bag = m->entity()->bag()->create(num_elements(ofm_shape));
+ conv2d_obj->bag(conv2d_bag);
+ conv2d_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(ofm_shape)));
+
+ // Create an Eval instruction for Conv2D
+ auto conv2d_ins = instr_builder(m).eval(conv2d_obj, coco_conv2d);
+
+ // Append the instruction to the block
+ blk->instr()->append(conv2d_ins);
+
+ // Last Object to make a copy to Output Object
+ coco::FeatureObject *last_obj = conv2d_obj;
+
+ if (hasBias)
+ {
+ // When there is a bias, use btmp_obj as bias add output
+ // Bias is adding last_obj with bias weight values
+ auto *btmp_obj = m->entity()->object()->create<coco::FeatureObject>();
+ auto *btmp_bag = m->entity()->bag()->create(num_elements(ofm_shape));
+ btmp_obj->bag(btmp_bag);
+ btmp_obj->layout(coco::FeatureLayouts::BHWC::create(ofm_obj->shape()));
+
+ int bias_idx = opinputs.at(2);
+
+ // Create an object for bias
+ auto bias_obj = m->entity()->object()->create<coco::FeatureObject>();
+ coco::Bag *bias_bag = bags.bag(bias_idx);
+ bias_obj->bag(bias_bag);
+ bias_obj->layout(coco::FeatureLayouts::BC::create(ofm_obj->shape()));
+
+ // Create Op of conv2d output (last_obj) + bias values(bias_obj)
+ auto bias_add = op_builder(m).load(last_obj).load(bias_obj).add().pop();
+
+ // Create Instr as bias add result write to btmp_obj
+ auto bias_add_ins = instr_builder(m).eval(btmp_obj, bias_add);
+
+ // Append the instruction
+ blk->instr()->append(bias_add_ins);
+
+ // Update last_obj to btmp_obj
+ last_obj = btmp_obj;
+ }
+
+ // fused activation
+ coco::FeatureObject *act_output =
+ build_activation(conv_params->fused_activation_function(), blk, last_obj);
+
+ // Create Copy Instr of last_obj to Output Object
+ auto copy_ins = instr_builder(m).copy(ofm_obj, act_output);
+ blk->instr()->append(copy_ins);
+}
+
+} // namespace tflimport
diff --git a/compiler/enco/frontend/tflite/src/Op/Conv2D.h b/compiler/enco/frontend/tflite/src/Op/Conv2D.h
new file mode 100644
index 000000000..018815bd4
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/Conv2D.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_CONV2D_H__
+#define __OP_CONV2D_H__
+
+#include "GraphBuilder.h"
+
+#include <schema_generated.h>
+
+namespace tflimport
+{
+
+/**
+ * @brief GraphBuilder for Conv2D operator
+ */
+class Conv2DGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const tflite::Operator *op) const override;
+ void build(const tflite::Operator *op, GraphBuilderContext *context) const override;
+};
+
+} // namespace tflimport
+
+#endif // __OP_CONV2D_H__
diff --git a/compiler/enco/frontend/tflite/src/Op/DepthwiseConv2D.cpp b/compiler/enco/frontend/tflite/src/Op/DepthwiseConv2D.cpp
new file mode 100644
index 000000000..e3d7b263e
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/DepthwiseConv2D.cpp
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthwiseConv2D.h"
+
+#include "Convert.h"
+#include "IRBuilder.h"
+#include "GraphBuilder.h"
+#include "Padding.h"
+#include "Activation.h"
+
+#include <morph/tflite.h>
+
+#include <coco/IR/Module.h>
+#include <coco/IR/KernelLayouts.h>
+#include <coco/IR/FeatureLayouts.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <schema_generated.h>
+
+#include <cassert>
+
+using namespace nncc::core::ADT;
+using namespace morph::tflite;
+
+namespace tflimport
+{
+
+bool DepthwiseConv2DGraphBuilder::validate(const tflite::Operator *op) const
+{
+ auto const options = op->builtin_options_as_DepthwiseConv2DOptions();
+
+ if ((options->stride_h() == 0) || (options->stride_w() == 0))
+ {
+ return false;
+ }
+
+ return true;
+}
+
+void DepthwiseConv2DGraphBuilder::build(const tflite::Operator *op,
+ GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ // preparation
+ coco::Module *m = context->m();
+ coco::Data *d = context->d();
+ coco::Block *blk = context->block();
+ TensorContext &tensor_context = context->tensor();
+ TensorBags &bags = context->bags();
+ TflBufferContext &buffer_context = context->buffer();
+ const tflite::SubGraph *graph = context->graph();
+ IndexVector opinputs = as_index_vector(op->inputs());
+ IndexVector opoutputs = as_index_vector(op->outputs());
+
+ // these are fixed in tflite
+ // input index 0 : input feature
+ // input index 1 : kernel
+ // input index 2 : bias (optional)
+ bool hasBias = (opinputs.size() == 3);
+ assert(opinputs.size() == 2 || hasBias);
+ assert(opoutputs.size() == 1);
+
+ int ifm_idx = opinputs.at(0);
+ int ker_idx = opinputs.at(1);
+ int ofm_idx = opoutputs.at(0);
+
+ const tensor::Shape &ifm_shape = tensor_context.shape(ifm_idx);
+ const tensor::Shape &ofm_shape = tensor_context.shape(ofm_idx);
+ tensor::Shape &ker_shape = const_cast<tensor::Shape &>(tensor_context.shape(ker_idx));
+
+ assert(ifm_shape.rank() == 4);
+ assert(ofm_shape.rank() == 4);
+ assert(ker_shape.rank() == 4);
+
+ assert(ker_shape.dim(0) == 1); // value > 1 was not tested. This value seems 1 in DepthwiseConv2D
+ assert(ifm_shape.dim(3) == ofm_shape.dim(3));
+ assert(ofm_shape.dim(3) == ker_shape.dim(3));
+
+ // Create an input feature map object
+ auto *ifm_obj = m->entity()->object()->create<coco::FeatureObject>();
+ auto *ifm_bag = bags.bag(ifm_idx);
+ ifm_obj->bag(ifm_bag);
+ ifm_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(ifm_shape)));
+
+ // Create an an output feature map object
+ auto *ofm_obj = m->entity()->object()->create<coco::FeatureObject>();
+ auto *ofm_bag = bags.bag(ofm_idx);
+ ofm_obj->bag(ofm_bag);
+ ofm_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(ofm_shape)));
+
+ // Create an kernel object
+ auto *ker_obj = m->entity()->object()->create<coco::KernelObject>();
+ auto *ker_bag = bags.bag(ker_idx);
+ ker_obj->bag(ker_bag);
+
+ // Adjust tflite kernel shape [1, h, w, channel_out] for coco::Kernel.
+ // coco::Kernel will have kernel.count = channel_out, kernel.depth = 1 ( == ker_shape.dim(0))
+ kernel::Shape new_shape{ker_shape.dim(3), 1, ker_shape.dim(1), ker_shape.dim(2)};
+ ker_obj->layout(coco::KernelLayouts::NHWC::create(new_shape));
+
+ // Create a kernel overlay for the kernel object
+ // TODO : support for other types
+ d->f32()->allocate(ker_bag);
+
+ TflBufferContext::TflBuffer<float> buffer = buffer_context.tensor_buffer<float>(graph, ker_idx);
+
+ auto ker_spn = d->f32()->weight(ker_bag);
+
+ // Copy data from tflBuffer of [1, h, w, channel_out] shape to coco::Data, which will be accessed
+ // by coco::KernelLayouts::NHWC
+ for (auto n = 0; n < new_shape.count(); n++)
+ {
+ auto tfl_c = n;
+ for (auto h = 0; h < new_shape.height(); h++)
+ {
+ for (auto w = 0; w < new_shape.width(); w++)
+ {
+ auto hw = new_shape.height() * new_shape.width();
+ for (auto c = 0; c < new_shape.depth(); c++)
+ {
+ auto tfl_n = c;
+ auto hwc = hw * new_shape.depth();
+ auto wc = new_shape.width() * new_shape.depth();
+
+ ker_spn[n * hwc + h * wc + w * new_shape.depth() + c] =
+ buffer.ptr[tfl_n * hw * new_shape.count() + /* new_shape.count() is old c */
+ h * new_shape.width() * new_shape.count() + w * new_shape.count() + tfl_c];
+ }
+ }
+ }
+ }
+
+ // Create a Load op
+ auto load = op_builder(m).load(ifm_obj).pop();
+
+ // Create a coco::Conv2D op for DepthwiseConv2D
+ auto coco_dconv2d = m->entity()->op()->create<coco::Conv2D>();
+
+ // populating objects and options such as stride and padding for DepthwiseConv2D
+ coco_dconv2d->ker(ker_obj);
+
+ // setting params passed from TFLITE DepthwiseConv2DOptions
+ auto dconv_params = op->builtin_options_as_DepthwiseConv2DOptions();
+
+ assert(dconv_params->depth_multiplier() == 1); // other depth_multiplier was not tested
+
+ coco_dconv2d->group(ifm_obj->asFeature()->shape().depth());
+
+ coco_dconv2d->stride()->vertical(dconv_params->stride_h());
+ coco_dconv2d->stride()->horizontal(dconv_params->stride_w());
+
+ coco::Padding2D padding = depthwiseConv2D_padding(dconv_params, ifm_shape, ker_shape);
+ coco_dconv2d->pad()->top(padding.top());
+ coco_dconv2d->pad()->bottom(padding.bottom());
+ coco_dconv2d->pad()->left(padding.left());
+ coco_dconv2d->pad()->right(padding.right());
+
+ // Link ops
+ coco_dconv2d->arg(load);
+
+ // Object to store output for DepthwiseConv2D
+ auto *dconv2d_obj = m->entity()->object()->create<coco::FeatureObject>();
+ auto *dconv2d_bag = m->entity()->bag()->create(num_elements(ofm_shape));
+ dconv2d_obj->bag(dconv2d_bag);
+ dconv2d_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(ofm_shape)));
+
+ // Create an Eval instruction for DepthwiseConv2D
+ auto dconv2d_ins = instr_builder(m).eval(dconv2d_obj, coco_dconv2d);
+
+ // Append the instruction to the block
+ blk->instr()->append(dconv2d_ins);
+
+ // Last Object to make a copy to Output Object
+ coco::FeatureObject *last_obj = dconv2d_obj;
+
+ if (hasBias)
+ {
+ // When there is a bias, use btmp_obj as bias add output
+ // Bias is adding last_obj with bias weight values
+ auto *btmp_obj = m->entity()->object()->create<coco::FeatureObject>();
+ auto *btmp_bag = m->entity()->bag()->create(num_elements(ofm_shape));
+ btmp_obj->bag(btmp_bag);
+ btmp_obj->layout(coco::FeatureLayouts::BHWC::create(ofm_obj->shape()));
+
+ int bias_idx = opinputs.at(2);
+
+ // Create an object for bias
+ auto bias_obj = m->entity()->object()->create<coco::FeatureObject>();
+ coco::Bag *bias_bag = bags.bag(bias_idx);
+ bias_obj->bag(bias_bag);
+ bias_obj->layout(coco::FeatureLayouts::BC::create(ofm_obj->shape()));
+
+ // Create Op of conv2d output (last_obj) + bias values(bias_obj)
+ auto bias_add = op_builder(m).load(last_obj).load(bias_obj).add().pop();
+
+ // Create Instr as bias add result write to btmp_obj
+ auto bias_add_ins = instr_builder(m).eval(btmp_obj, bias_add);
+
+ // Append the instruction
+ blk->instr()->append(bias_add_ins);
+
+ // Update last_obj to btmp_obj
+ last_obj = btmp_obj;
+ }
+
+ // fused activation
+ coco::FeatureObject *act_output =
+ build_activation(dconv_params->fused_activation_function(), blk, last_obj);
+
+ // Create Copy Instr of last_obj to Output Object
+ auto copy_ins = instr_builder(m).copy(ofm_obj, act_output);
+ blk->instr()->append(copy_ins);
+}
+
+} // namespace tflimport
diff --git a/compiler/enco/frontend/tflite/src/Op/DepthwiseConv2D.h b/compiler/enco/frontend/tflite/src/Op/DepthwiseConv2D.h
new file mode 100644
index 000000000..b36b36b8f
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/DepthwiseConv2D.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_DEPTHWISECONV2D_H__
+#define __OP_DEPTHWISECONV2D_H__
+
+#include "GraphBuilder.h"
+
+#include <schema_generated.h>
+
+namespace tflimport
+{
+
+/**
+ * @brief GraphBuilder for DepthwiseConv2D operator
+ */
+class DepthwiseConv2DGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const tflite::Operator *op) const override;
+ void build(const tflite::Operator *op, GraphBuilderContext *context) const override;
+};
+
+} // namespace tflimport
+
+#endif // __OP_DEPTHWISECONV2D_H__
diff --git a/compiler/enco/frontend/tflite/src/Op/Div.cpp b/compiler/enco/frontend/tflite/src/Op/Div.cpp
new file mode 100644
index 000000000..6b71be2e6
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/Div.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Div.h"
+
+#include "Convert.h"
+#include "IRBuilder.h"
+#include "GraphBuilder.h"
+#include "Padding.h"
+#include "Activation.h"
+
+#include <morph/tflite.h>
+#include <coco/IR/Module.h>
+#include <coco/IR/FeatureLayouts.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <schema_generated.h>
+
+#include <cassert>
+
+using namespace nncc::core::ADT;
+using namespace morph::tflite;
+
+namespace tflimport
+{
+
+void DivGraphBuilder::build(const tflite::Operator *op, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ coco::Module *m = context->m();
+ coco::Block *blk = context->block();
+ TensorContext &tensor_context = context->tensor();
+ TensorBags &bags = context->bags();
+
+ IndexVector opinputs = as_index_vector(op->inputs());
+ IndexVector opoutputs = as_index_vector(op->outputs());
+
+ // these are fixed in tflite
+ // input index 0 : numerator
+ // input index 1 : denominator
+ // output index 0 : result
+ assert(opinputs.size() == 2);
+ assert(opoutputs.size() == 1);
+
+ tflite::ActivationFunctionType activation;
+ if (auto *options = op->builtin_options_as_DivOptions())
+ {
+ activation = options->fused_activation_function();
+ }
+ else
+ {
+ activation = tflite::ActivationFunctionType_NONE;
+ }
+
+ // TODO activation, e.g. ReLU
+ assert(activation == tflite::ActivationFunctionType_NONE);
+
+ auto num_idx = opinputs.at(0);
+ auto denom_idx = opinputs.at(1);
+ auto out_idx = opoutputs.at(0);
+
+ const tensor::Shape &num_shape = tensor_context.shape(num_idx);
+ const tensor::Shape &denom_shape = tensor_context.shape(denom_idx);
+ const tensor::Shape &out_shape = tensor_context.shape(out_idx);
+
+ // TODO Now input/output assumes Feature map, but Div should support generic object type
+ // Create an object for an input
+ auto *num_obj = m->entity()->object()->create<coco::FeatureObject>();
+ auto *num_bag = bags.bag(num_idx);
+ num_obj->bag(num_bag);
+ num_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(num_shape)));
+
+ auto *denom_obj = m->entity()->object()->create<coco::FeatureObject>();
+ auto *denom_bag = bags.bag(denom_idx);
+ denom_obj->bag(denom_bag);
+ denom_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(denom_shape)));
+
+ // Create an object for an output
+ auto *out_obj = m->entity()->object()->create<coco::FeatureObject>();
+ auto *out_bag = bags.bag(out_idx);
+ out_obj->bag(out_bag);
+ out_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(out_shape)));
+
+ // Create a Load ops for each input
+ auto coco_load_num = op_builder(m).load(num_obj).pop();
+ auto coco_load_denom = op_builder(m).load(denom_obj).pop();
+
+ // Create a Div op
+ auto coco_div = m->entity()->op()->create<coco::Div>();
+
+ // Link ops
+ coco_div->left(coco_load_num);
+ coco_div->right(coco_load_denom);
+
+ // Create an Eval instruction
+ auto eval_ins = instr_builder(m).eval(out_obj, coco_div);
+
+ // Append the instruction to the block
+ blk->instr()->append(eval_ins);
+}
+
+} // namespace tflimport
diff --git a/compiler/enco/frontend/tflite/src/Op/Div.h b/compiler/enco/frontend/tflite/src/Op/Div.h
new file mode 100644
index 000000000..053d1a441
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/Div.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_DIV_H__
+#define __OP_DIV_H__
+
+#include "GraphBuilder.h"
+
+#include <schema_generated.h>
+
+namespace tflimport
+{
+
+/**
+ * @brief GraphBuilder for Div operator
+ */
+class DivGraphBuilder : public GraphBuilder
+{
+public:
+ void build(const tflite::Operator *op, GraphBuilderContext *) const override;
+};
+
+} // namespace tflimport
+
+#endif // __OP_DIV_H__
diff --git a/compiler/enco/frontend/tflite/src/Op/MaxPool2D.cpp b/compiler/enco/frontend/tflite/src/Op/MaxPool2D.cpp
new file mode 100644
index 000000000..ee4406425
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/MaxPool2D.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MaxPool2D.h"
+
+#include "Convert.h"
+#include "IRBuilder.h"
+#include "GraphBuilder.h"
+#include "Padding.h"
+#include "Activation.h"
+
+#include <morph/tflite.h>
+#include <coco/IR/Module.h>
+#include <coco/IR/FeatureLayouts.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <schema_generated.h>
+
+#include <cassert>
+
+using namespace nncc::core::ADT;
+using namespace morph::tflite;
+
+namespace tflimport
+{
+
+bool MaxPool2DGraphBuilder::validate(const tflite::Operator *op) const
+{
+ auto const options = op->builtin_options_as_Pool2DOptions();
+
+ if ((options->stride_h() == 0) || (options->stride_w() == 0))
+ {
+ return false;
+ }
+
+ return true;
+}
+
+void MaxPool2DGraphBuilder::build(const tflite::Operator *op, GraphBuilderContext *context) const
+{
+ assert(context != nullptr); // check if init(..) is called
+
+ coco::Module *m = context->m();
+ coco::Block *blk = context->block();
+ TensorContext &tensor_context = context->tensor();
+ TensorBags &bags = context->bags();
+
+ IndexVector opinputs = as_index_vector(op->inputs());
+ IndexVector opoutputs = as_index_vector(op->outputs());
+
+ // these are fixed in tflite
+ // input index 0 : input feature
+ // output index 0 : output feature
+ assert(opinputs.size() == 1);
+ assert(opoutputs.size() == 1);
+
+ int ifm_idx = opinputs.at(0);
+ int ofm_idx = opoutputs.at(0);
+
+ const tensor::Shape &ifm_shape = tensor_context.shape(ifm_idx);
+ const tensor::Shape &ofm_shape = tensor_context.shape(ofm_idx);
+
+ // Create an object for an input feature map
+ coco::FeatureObject *ifm_obj = m->entity()->object()->create<coco::FeatureObject>();
+ coco::Bag *ifm_bag = bags.bag(ifm_idx);
+ ifm_obj->bag(ifm_bag);
+ ifm_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(ifm_shape)));
+
+ // Create an object for an output feature map
+ coco::FeatureObject *ofm_obj = m->entity()->object()->create<coco::FeatureObject>();
+ coco::Bag *ofm_bag = bags.bag(ofm_idx);
+ ofm_obj->bag(ofm_bag);
+ ofm_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(ofm_shape)));
+
+ // Create a Load op
+ coco::Op *coco_load = op_builder(m).load(ifm_obj).pop();
+
+ // Create a MaxPool2D
+ coco::MaxPool2D *coco_maxpool2d = m->entity()->op()->create<coco::MaxPool2D>();
+ const tflite::Pool2DOptions *params = op->builtin_options_as_Pool2DOptions();
+
+ coco_maxpool2d->window()->height(params->filter_height());
+ coco_maxpool2d->window()->width(params->filter_width());
+
+ coco_maxpool2d->stride()->vertical(params->stride_h());
+ coco_maxpool2d->stride()->horizontal(params->stride_w());
+
+ coco::Padding2D padding =
+ pool2D_padding(params, ifm_shape, params->filter_width(), params->filter_height());
+
+ coco_maxpool2d->pad()->top(padding.top());
+ coco_maxpool2d->pad()->bottom(padding.bottom());
+ coco_maxpool2d->pad()->left(padding.left());
+ coco_maxpool2d->pad()->right(padding.right());
+
+ // Link ops
+ coco_maxpool2d->arg(coco_load);
+
+ // Create an Eval instruction
+ coco::Eval *ins = instr_builder(m).eval(ofm_obj, coco_maxpool2d);
+
+ // Append the instruction to the block
+ blk->instr()->append(ins);
+
+ // TODO activation, e.g., relu
+ assert(params->fused_activation_function() ==
+ tflite::ActivationFunctionType::ActivationFunctionType_NONE);
+}
+
+} // namespace tflimport
diff --git a/compiler/enco/frontend/tflite/src/Op/MaxPool2D.h b/compiler/enco/frontend/tflite/src/Op/MaxPool2D.h
new file mode 100644
index 000000000..06a828528
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/MaxPool2D.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_MAXPOOL2D_H__
+#define __OP_MAXPOOL2D_H__
+
+#include "GraphBuilder.h"
+
+#include <schema_generated.h>
+
+namespace tflimport
+{
+
+/**
+ * @brief GraphBuilder for AvgPool2D operator
+ */
+class MaxPool2DGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const tflite::Operator *op) const override;
+ void build(const tflite::Operator *op, GraphBuilderContext *) const override;
+};
+
+} // namespace tflimport
+
+#endif // __OP_MAXPOOL2D_H__
diff --git a/compiler/enco/frontend/tflite/src/Op/Padding.cpp b/compiler/enco/frontend/tflite/src/Op/Padding.cpp
new file mode 100644
index 000000000..9a0e4ef41
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/Padding.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Padding.h"
+
+#include "Convert.h"
+#include "TensorBags.h"
+
+#include <coco/IR/Data.h>
+#include <coco/IR/Module.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <schema_generated.h>
+
+#include <map>
+#include <sstream>
+#include <algorithm>
+#include <cassert>
+
+using namespace nncc::core::ADT;
+
+namespace tflimport
+{
+
+coco::Padding2D get_padding(const tensor::Shape &ifm_shape, const int kernel_w, const int kernel_h,
+ tflite::Padding padding, int stride_w, int stride_h,
+ int dilation_w_factor, int dilation_h_factor)
+{
+ assert(stride_w != 0);
+ assert(stride_h != 0);
+ assert(ifm_shape.rank() == 4);
+
+ /**
+ * Compute [top padding + bottom padding] (or [left padding + right padding]).
+ * If this returns an even number, top = return value / 2 and bottom = return value - top
+ * If this returns an odd number, top = return value / 2 and bottom = return value - top (so,
+ * bottom = top + 1)
+ *
+ * Code based on https://www.tensorflow.org/api_guides/python/nn#Convolution
+ */
+ auto compute_padding = [](tflite::Padding padding, int stride, int dilation_rate, int in_size,
+ int filter_size) {
+ int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+ if (padding == tflite::Padding_SAME)
+ {
+ if (in_size % stride == 0)
+ return std::max(effective_filter_size - stride, 0);
+ else
+ return std::max(effective_filter_size - (in_size % stride), 0);
+ }
+ else // padding == VALID
+ {
+ return 0;
+ }
+ };
+
+ // ifm shape is from order of NHWC. ifm W = dim(2), ifm H = dim(1)
+ int padding_w = compute_padding(padding, stride_w, dilation_w_factor, ifm_shape.dim(2), kernel_w);
+ int padding_h = compute_padding(padding, stride_h, dilation_h_factor, ifm_shape.dim(1), kernel_h);
+
+ coco::Padding2D coco_padding;
+ coco_padding.top(padding_h / 2).bottom(padding_h - padding_h / 2);
+ coco_padding.left(padding_w / 2).right(padding_w - padding_w / 2);
+
+ return coco_padding;
+}
+
+coco::Padding2D pool2D_padding(const tflite::Pool2DOptions *options, const tensor::Shape &ifm_shape,
+ const int filter_w, const int filter_h)
+{
+ return get_padding(ifm_shape, filter_w, filter_h, options->padding(), options->stride_w(),
+ options->stride_h(), 1, 1);
+}
+
+coco::Padding2D conv2D_padding(const tflite::Conv2DOptions *options, const tensor::Shape &ifm_shape,
+ const tensor::Shape &kernel_shape)
+{
+ return get_padding(ifm_shape, kernel_shape.dim(2), kernel_shape.dim(1), /* kernel layout: NHWC */
+ options->padding(), options->stride_w(), options->stride_h(),
+ options->dilation_w_factor(), options->dilation_h_factor());
+}
+
+coco::Padding2D depthwiseConv2D_padding(const tflite::DepthwiseConv2DOptions *options,
+ const tensor::Shape &ifm_shape,
+ const tensor::Shape &kernel_shape)
+{
+ return get_padding(ifm_shape, kernel_shape.dim(2), kernel_shape.dim(1), /* kernel layout: NHWC */
+ options->padding(), options->stride_w(), options->stride_h(),
+ options->dilation_w_factor(), options->dilation_h_factor());
+}
+
+} // namespace tflimport
diff --git a/compiler/enco/frontend/tflite/src/Op/Padding.h b/compiler/enco/frontend/tflite/src/Op/Padding.h
new file mode 100644
index 000000000..ac84adeb7
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/Padding.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_PADDING_H__
+#define __OP_PADDING_H__
+
+#include <coco/IR/Padding2D.h>
+#include <nncc/core/ADT/tensor/Shape.h>
+
+#include <schema_generated.h>
+
+using namespace nncc::core::ADT;
+
+namespace tflimport
+{
+
+coco::Padding2D pool2D_padding(const tflite::Pool2DOptions *options, const tensor::Shape &ifm_shape,
+ const int filter_w, const int filter_h);
+
+coco::Padding2D conv2D_padding(const tflite::Conv2DOptions *options, const tensor::Shape &ifm_shape,
+ const tensor::Shape &kernel_shape);
+
+coco::Padding2D depthwiseConv2D_padding(const tflite::DepthwiseConv2DOptions *options,
+ const tensor::Shape &ifm_shape,
+ const tensor::Shape &kernel_shape);
+
+} // namespace tflimport
+
+#endif // __OP_PADDING_H__
diff --git a/compiler/enco/frontend/tflite/src/Op/ReLU.cpp b/compiler/enco/frontend/tflite/src/Op/ReLU.cpp
new file mode 100644
index 000000000..4922f4d1f
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/ReLU.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReLU.h"
+
+#include "IRBuilder.h"
+#include "GraphBuilder.h"
+
+#include <morph/tflite.h>
+#include <coco/IR/Module.h>
+#include <coco/IR/FeatureLayouts.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <schema_generated.h>
+
+#include <cassert>
+
+using namespace nncc::core::ADT;
+using namespace morph::tflite;
+
+namespace tflimport
+{
+
+void ReLUGraphBuilder::build(const tflite::Operator *op, GraphBuilderContext *context) const
+{
+ assert(context != nullptr); // check if init(..) is called
+
+ coco::Module *m = context->m();
+ coco::Block *blk = context->block();
+ TensorContext &tensor_context = context->tensor();
+ TensorBags &bags = context->bags();
+
+ IndexVector opinputs = as_index_vector(op->inputs());
+ IndexVector opoutputs = as_index_vector(op->outputs());
+
+ // these are fixed in tflite
+ // input index 0 : input feature
+ // output index 0 : output feature
+ assert(opinputs.size() == 1);
+ assert(opoutputs.size() == 1);
+
+ auto ifm_idx = opinputs.at(0);
+ auto ofm_idx = opoutputs.at(0);
+
+ const tensor::Shape &ifm_shape = tensor_context.shape(ifm_idx);
+ const tensor::Shape &ofm_shape = tensor_context.shape(ofm_idx);
+
+ // Create an object for an input feature map
+ coco::FeatureObject *ifm_obj = m->entity()->object()->create<coco::FeatureObject>();
+ coco::Bag *ifm_bag = bags.bag(ifm_idx);
+ ifm_obj->bag(ifm_bag);
+ ifm_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(ifm_shape)));
+
+ // Create an object for an output feature map
+ coco::FeatureObject *ofm_obj = m->entity()->object()->create<coco::FeatureObject>();
+ coco::Bag *ofm_bag = bags.bag(ofm_idx);
+ ofm_obj->bag(ofm_bag);
+ ofm_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(ofm_shape)));
+
+ // Create a Load op
+ auto coco_load = op_builder(m).load(ifm_obj).pop();
+
+ // Create a ReLU
+ auto coco_relu = m->entity()->op()->create<coco::ReLU>();
+
+ // Link ops
+ coco_relu->arg(coco_load);
+
+ // Create an Eval instruction
+ auto eval_ins = instr_builder(m).eval(ofm_obj, coco_relu);
+
+ // Append the instruction to the block
+ blk->instr()->append(eval_ins);
+}
+
+} // namespace tflimport
diff --git a/compiler/enco/frontend/tflite/src/Op/ReLU.h b/compiler/enco/frontend/tflite/src/Op/ReLU.h
new file mode 100644
index 000000000..c78400d7e
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/ReLU.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_RELU_H__
+#define __OP_RELU_H__
+
+#include "GraphBuilder.h"
+
+#include <schema_generated.h>
+
+namespace tflimport
+{
+
+/**
+ * @brief GraphBuilder for ReLU operator
+ */
+class ReLUGraphBuilder : public GraphBuilder
+{
+public:
+ void build(const tflite::Operator *op, GraphBuilderContext *) const override;
+};
+
+} // namespace tflimport
+
+#endif // __OP_RELU_H__
diff --git a/compiler/enco/frontend/tflite/src/Op/ReLU6.cpp b/compiler/enco/frontend/tflite/src/Op/ReLU6.cpp
new file mode 100644
index 000000000..936fda3e2
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/ReLU6.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReLU6.h"
+
+#include "IRBuilder.h"
+#include "GraphBuilder.h"
+
+#include <morph/tflite.h>
+#include <coco/IR/Module.h>
+#include <coco/IR/FeatureLayouts.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <schema_generated.h>
+
+#include <cassert>
+
+using namespace nncc::core::ADT;
+using namespace morph::tflite;
+
+namespace tflimport
+{
+
+void ReLU6GraphBuilder::build(const tflite::Operator *op, GraphBuilderContext *context) const
+{
+ assert(context != nullptr); // check if init(..) is called
+
+ coco::Module *m = context->m();
+ coco::Block *blk = context->block();
+ TensorContext &tensor_context = context->tensor();
+ TensorBags &bags = context->bags();
+
+ IndexVector opinputs = as_index_vector(op->inputs());
+ IndexVector opoutputs = as_index_vector(op->outputs());
+
+ // these are fixed in tflite
+ // input index 0 : input feature
+ // output index 0 : output feature
+ assert(opinputs.size() == 1);
+ assert(opoutputs.size() == 1);
+
+ int ifm_idx = opinputs.at(0);
+ int ofm_idx = opoutputs.at(0);
+
+ const tensor::Shape &ifm_shape = tensor_context.shape(ifm_idx);
+ const tensor::Shape &ofm_shape = tensor_context.shape(ofm_idx);
+
+ // Create an object for an input feature map
+ coco::FeatureObject *ifm_obj = m->entity()->object()->create<coco::FeatureObject>();
+ coco::Bag *ifm_bag = bags.bag(ifm_idx);
+ ifm_obj->bag(ifm_bag);
+ ifm_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(ifm_shape)));
+
+ // Create an object for an output feature map
+ coco::FeatureObject *ofm_obj = m->entity()->object()->create<coco::FeatureObject>();
+ coco::Bag *ofm_bag = bags.bag(ofm_idx);
+ ofm_obj->bag(ofm_bag);
+ ofm_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(ofm_shape)));
+
+ // Create a Load op
+ auto coco_load = op_builder(m).load(ifm_obj).pop();
+
+ // Create a ReLU6
+ auto coco_relu6 = m->entity()->op()->create<coco::ReLU6>();
+
+ // Link ops
+ coco_relu6->arg(coco_load);
+
+ // Create an Eval instruction
+ auto eval_ins = instr_builder(m).eval(ofm_obj, coco_relu6);
+
+ // Append the instruction to the block
+ blk->instr()->append(eval_ins);
+}
+
+} // namespace tflimport
diff --git a/compiler/enco/frontend/tflite/src/Op/ReLU6.h b/compiler/enco/frontend/tflite/src/Op/ReLU6.h
new file mode 100644
index 000000000..10bcd4f71
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/ReLU6.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_RELU6_H__
+#define __OP_RELU6_H__
+
+#include "GraphBuilder.h"
+
+#include <schema_generated.h>
+
+namespace tflimport
+{
+
+/**
+ * @brief GraphBuilder for ReLU6 operator
+ */
+class ReLU6GraphBuilder : public GraphBuilder
+{
+public:
+ void build(const tflite::Operator *op, GraphBuilderContext *) const override;
+};
+
+} // namespace tflimport
+
+#endif // __OP_RELU6_H__
diff --git a/compiler/enco/frontend/tflite/src/Op/Reshape.cpp b/compiler/enco/frontend/tflite/src/Op/Reshape.cpp
new file mode 100644
index 000000000..9bd473fa9
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/Reshape.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Reshape.h"
+
+#include "IRBuilder.h"
+#include "GraphBuilder.h"
+
+#include <morph/tflite.h>
+#include <coco/IR/Module.h>
+#include <coco/IR/FeatureLayouts.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <schema_generated.h>
+
+#include <cassert>
+
+using namespace nncc::core::ADT;
+using namespace morph::tflite;
+
+namespace tflimport
+{
+
+void ReshapeGraphBuilder::build(const tflite::Operator *op, GraphBuilderContext *context) const
+{
+ assert(context != nullptr); // check if init(..) is called
+
+ coco::Module *m = context->m();
+ coco::Block *blk = context->block();
+ TensorBags &bags = context->bags();
+
+ IndexVector opinputs = as_index_vector(op->inputs());
+ IndexVector opoutputs = as_index_vector(op->outputs());
+
+ // these are fixed in tflite
+ // input index 0 : input feature
+ // input index 1 : output shape (int32_t), (optional or not, is not clear)
+ // output index 0 : output feature
+ assert(opinputs.size() == 1 || opinputs.size() == 2);
+ assert(opoutputs.size() == 1);
+
+ // Note: there are actually 3 places where we can get output shape from
+ // current TF lite implementation. From output operand shape, second input,
+ // and ReshapeOption (new_shape). Here we use output operand shape
+ int ifm_idx = opinputs.at(0);
+ int ofm_idx = opoutputs.at(0);
+
+ auto ifm_bag = bags.bag(ifm_idx);
+ auto ofm_bag = bags.bag(ofm_idx);
+
+ // TODO: move to InstrBuilder as 'shuffle_elements()'
+ // Create a 1:1 shuffle instruction from ifm into ofm
+ // Note: Reshape is change of shape information and there is no value change
+ // in the bag itself. We implement this as just make a element wise copy of
+ // the bag from input to output. So there is no need of 'reshape' operator
+ auto shuffle_ins = m->entity()->instr()->create<coco::Shuffle>();
+ auto num_elem = ifm_bag->size();
+
+ assert(num_elem == ofm_bag->size());
+
+ shuffle_ins->from(ifm_bag);
+ shuffle_ins->into(ofm_bag);
+
+ for (uint32_t n = 0; n < num_elem; ++n)
+ {
+ const auto from = coco::ElemID(n);
+ const auto into = coco::ElemID(n);
+
+ shuffle_ins->insert(from, into);
+ }
+
+ // Append the instruction
+ blk->instr()->append(shuffle_ins);
+}
+
+} // namespace tflimport
diff --git a/compiler/enco/frontend/tflite/src/Op/Reshape.h b/compiler/enco/frontend/tflite/src/Op/Reshape.h
new file mode 100644
index 000000000..7447b56c8
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/Reshape.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_RESHAPE_H__
+#define __OP_RESHAPE_H__
+
+#include "GraphBuilder.h"
+
+#include <schema_generated.h>
+
+namespace tflimport
+{
+
+/**
+ * @brief GraphBuilder for Reshape operator
+ */
+class ReshapeGraphBuilder : public GraphBuilder
+{
+public:
+ void build(const tflite::Operator *op, GraphBuilderContext *) const override;
+};
+
+} // namespace tflimport
+
+#endif // __OP_RESHAPE_H__
diff --git a/compiler/enco/frontend/tflite/src/Op/Sub.cpp b/compiler/enco/frontend/tflite/src/Op/Sub.cpp
new file mode 100644
index 000000000..62973bb22
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/Sub.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Sub.h"
+
+#include "Convert.h"
+#include "IRBuilder.h"
+#include "GraphBuilder.h"
+#include "Activation.h"
+
+#include <morph/tflite.h>
+#include <coco/IR/Module.h>
+#include <coco/IR/FeatureLayouts.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <schema_generated.h>
+
+#include <cassert>
+
+using namespace nncc::core::ADT;
+using namespace morph::tflite;
+
+namespace tflimport
+{
+
+void SubGraphBuilder::build(const tflite::Operator *op, GraphBuilderContext *context) const
+{
+ assert(context != nullptr); // check if init(..) is called
+
+ coco::Module *m = context->m();
+ coco::Block *blk = context->block();
+ TensorContext &tensor_context = context->tensor();
+ TensorBags &bags = context->bags();
+
+ IndexVector opinputs = as_index_vector(op->inputs());
+ IndexVector opoutputs = as_index_vector(op->outputs());
+
+ // these are fixed in tflite
+ // input index 0 : left input feature
+ // input index 1 : right input feature
+ // output index 0 : output feature
+ assert(opinputs.size() == 2);
+ assert(opoutputs.size() == 1);
+
+ // Default parameter values are referenced from schema_generated.h
+ auto *params = op->builtin_options_as_SubOptions();
+ tflite::ActivationFunctionType activation = tflite::ActivationFunctionType_NONE;
+
+ if (auto *params = op->builtin_options_as_SubOptions())
+ {
+ activation = params->fused_activation_function();
+ }
+ assert(activation == tflite::ActivationFunctionType_NONE);
+
+ // Construct a vector of input objects
+ std::vector<coco::FeatureObject *> input_objects;
+
+ for (auto &input_index : opinputs)
+ {
+ // Add objects for input feature map
+ const tensor::Shape &input_shape = tensor_context.shape(input_index);
+ coco::FeatureObject *input_obj = m->entity()->object()->create<coco::FeatureObject>();
+ coco::Bag *input_bag = bags.bag(input_index);
+ input_obj->bag(input_bag);
+ input_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(input_shape)));
+
+ input_objects.emplace_back(input_obj);
+ }
+
+ // Create an object for an output feature map
+ int const output_index = opoutputs.at(0);
+ const tensor::Shape &output_shape = tensor_context.shape(output_index);
+ coco::FeatureObject *output_obj = m->entity()->object()->create<coco::FeatureObject>();
+ coco::Bag *output_bag = bags.bag(output_index);
+ output_obj->bag(output_bag);
+ output_obj->layout(coco::FeatureLayouts::BHWC::create(as_feature_shape(output_shape)));
+
+ // Create Load ops
+ auto left_load = op_builder(m).load(input_objects[0]).pop();
+ auto right_load = op_builder(m).load(input_objects[1]).pop();
+
+ // Create a Sub
+ auto coco_sub = m->entity()->op()->create<coco::Sub>();
+
+ coco_sub->left(left_load);
+ coco_sub->right(right_load);
+
+ // Create an Eval instruction
+ auto eval = instr_builder(m).eval(output_obj, coco_sub);
+
+ // Append the instruction to the block
+ blk->instr()->append(eval);
+
+ // TODO activation, e.g., relu
+ assert(params->fused_activation_function() ==
+ tflite::ActivationFunctionType::ActivationFunctionType_NONE);
+}
+
+} // namespace tflimport
diff --git a/compiler/enco/frontend/tflite/src/Op/Sub.h b/compiler/enco/frontend/tflite/src/Op/Sub.h
new file mode 100644
index 000000000..580d8baa3
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/Op/Sub.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_SUB_H__
+#define __OP_SUB_H__
+
+#include "GraphBuilder.h"
+
+#include <schema_generated.h>
+
+namespace tflimport
+{
+
+/**
+ * @brief GraphBuilder for Sub operator
+ */
+class SubGraphBuilder : public GraphBuilder
+{
+public:
+ void build(const tflite::Operator *op, GraphBuilderContext *) const override;
+};
+
+} // namespace tflimport
+
+#endif // __OP_SUB_H__
diff --git a/compiler/enco/frontend/tflite/src/RawModel.h b/compiler/enco/frontend/tflite/src/RawModel.h
new file mode 100644
index 000000000..02946f1d7
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/RawModel.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __RAW_MODEL_H__
+#define __RAW_MODEL_H__
+
+#include "schema_generated.h"
+
+struct RawModel
+{
+ virtual ~RawModel() = default;
+
+ virtual const tflite::Model *model(void) const = 0;
+};
+
+#endif // __RAW_MODEL_H__
diff --git a/compiler/enco/frontend/tflite/src/RawModelLoader.cpp b/compiler/enco/frontend/tflite/src/RawModelLoader.cpp
new file mode 100644
index 000000000..5c127f37c
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/RawModelLoader.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "RawModelLoader.h"
+
+#include "cwrap/Fildes.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+namespace
+{
+
+class MemoryMappedRawModel final : public RawModel
+{
+public:
+ /**
+ * @require fd and data SHOULD be valid
+ */
+ explicit MemoryMappedRawModel(int fd, void *data, size_t size) : _fd{fd}, _data{data}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ ~MemoryMappedRawModel()
+ {
+ munmap(_data, _size);
+ close(_fd);
+ }
+
+public:
+ MemoryMappedRawModel(const MemoryMappedRawModel &) = delete;
+ MemoryMappedRawModel(MemoryMappedRawModel &&) = delete;
+
+public:
+ const tflite::Model *model(void) const override { return tflite::GetModel(_data); }
+
+private:
+ int _fd = -1;
+ void *_data = nullptr;
+ size_t _size = 0;
+};
+
+} // namespace
+
+std::unique_ptr<RawModel> load_from(const std::string &path)
+{
+ cwrap::Fildes fildes{open(path.c_str(), O_RDONLY)};
+
+ if (fildes.get() == -1)
+ {
+ // Return nullptr on open failure
+ return nullptr;
+ }
+
+ struct stat st;
+ if (fstat(fildes.get(), &st) == -1)
+ {
+ // Return nullptr on fstat failure
+ return nullptr;
+ }
+
+ auto size = st.st_size;
+ auto data = mmap(nullptr, size, PROT_READ, MAP_SHARED, fildes.get(), 0);
+
+ if (data == MAP_FAILED)
+ {
+ // Return nullptr on mmap failure
+ return nullptr;
+ }
+
+ return std::unique_ptr<RawModel>{new MemoryMappedRawModel(fildes.release(), data, size)};
+}
diff --git a/compiler/enco/frontend/tflite/src/RawModelLoader.h b/compiler/enco/frontend/tflite/src/RawModelLoader.h
new file mode 100644
index 000000000..5d93528de
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/RawModelLoader.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __RAW_MODEL_LOADER_H__
+#define __RAW_MODEL_LOADER_H__
+
+#include "RawModel.h"
+
+/**
+ * @brief Load TensorFlow Lite model (as a RawModel) from a given path
+ *
+ * @note May return a nullptr
+ */
+std::unique_ptr<RawModel> load_from(const std::string &path);
+
+#endif // __RAW_MODEL_LOADER_H__
diff --git a/compiler/enco/frontend/tflite/src/TensorBags.h b/compiler/enco/frontend/tflite/src/TensorBags.h
new file mode 100644
index 000000000..29558b85e
--- /dev/null
+++ b/compiler/enco/frontend/tflite/src/TensorBags.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TENSOR_BAGS_H__
+#define __TENSOR_BAGS_H__
+
+#include "Convert.h"
+
+#include <coco/IR/Data.h>
+#include <coco/IR/Module.h>
+
+#include <schema_generated.h>
+
+#include <map>
+
+using namespace nncc::core::ADT;
+
+namespace tflimport
+{
+
+/**
+ * @brief Pre-creates coco:Bags for each operands(tensors)
+ */
+class TensorBags
+{
+public:
+ void prepare(const tflite::SubGraph *graph, std::unique_ptr<coco::Module> &m)
+ {
+ for (uint32_t tensor_id = 0; tensor_id < graph->tensors()->size(); ++tensor_id)
+ {
+ auto const tensor_info = graph->tensors()->Get(tensor_id);
+ auto const tensor_shape = as_tensor_shape(tensor_info->shape());
+ auto const tensor_bag = m->entity()->bag()->create(num_elements(tensor_shape));
+
+ _bag_ctx[tensor_id] = tensor_bag;
+ }
+ }
+
+ coco::Bag *bag(int32_t tensor_id) { return _bag_ctx[tensor_id]; }
+
+public:
+ std::map<uint32_t, coco::Bag *>::iterator begin() { return _bag_ctx.begin(); }
+
+ std::map<uint32_t, coco::Bag *>::iterator end() { return _bag_ctx.end(); }
+
+private:
+ std::map<uint32_t, coco::Bag *> _bag_ctx;
+};
+
+} // namespace tflimport
+
+#endif // __TENSOR_BAGS_H__
diff --git a/compiler/enco/requires.cmake b/compiler/enco/requires.cmake
new file mode 100644
index 000000000..fee0e18e5
--- /dev/null
+++ b/compiler/enco/requires.cmake
@@ -0,0 +1,8 @@
+require("coco")
+require("caffegen")
+require("tflchef")
+require("ann-api")
+require("ann-ref")
+require("nnkit")
+require("cwrap")
+require("enco-intf")
diff --git a/compiler/enco/test/CMakeLists.txt b/compiler/enco/test/CMakeLists.txt
new file mode 100644
index 000000000..5ea6cdadd
--- /dev/null
+++ b/compiler/enco/test/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectories()
diff --git a/compiler/enco/test/basic/000/CMakeLists.txt b/compiler/enco/test/basic/000/CMakeLists.txt
new file mode 100644
index 000000000..20ba3c571
--- /dev/null
+++ b/compiler/enco/test/basic/000/CMakeLists.txt
@@ -0,0 +1,26 @@
+###
+### This test first generates C++ code from an empty model, and check whether is has compile error
+###
+set(PREFIX enco-basic-test-000)
+set(GENERATED_CPP ${PREFIX}.cpp)
+set(GENERATED_ASM ${PREFIX}.embed.S)
+set(GENERATED_BIN ${PREFIX}.bin)
+set(SOURCE_TARGET ${PREFIX}-src)
+set(LIB_TARGET ${PREFIX}-lib)
+
+add_library(${PREFIX}-frontend SHARED enco.test.cpp)
+target_link_libraries(${PREFIX}-frontend enco_intf_cmdline)
+target_link_libraries(${PREFIX}-frontend enco_intf_frontend)
+target_link_libraries(${PREFIX}-frontend stdex)
+
+# NOTE BYPRODUCTS are not specified in order to enforce source code generation
+add_custom_command(OUTPUT ${GENERATED_CPP} ${GENERATED_ASM} ${GENERATED_BIN}
+ COMMAND $<TARGET_FILE:enco-cli>
+ --frontend $<TARGET_FILE:${PREFIX}-frontend>
+ --backend-arg ${PREFIX}
+ DEPENDS enco-cli ${PREFIX}-frontend)
+set_source_files_properties(${GENERATED_ASM} PROPERTIES GENERATED TRUE LANGUAGE C)
+add_library(${LIB_TARGET} SHARED ${GENERATED_CPP} ${GENERATED_ASM})
+# NOTE This line is necessary to compile the generated assembly (it includes the generated bin file)
+target_include_directories(${LIB_TARGET} PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+target_link_libraries(${LIB_TARGET} PRIVATE ann_api)
diff --git a/compiler/enco/test/basic/000/enco.test.cpp b/compiler/enco/test/basic/000/enco.test.cpp
new file mode 100644
index 000000000..3dbf96613
--- /dev/null
+++ b/compiler/enco/test/basic/000/enco.test.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <enco/Frontend.h>
+#include <cmdline/View.h>
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <stdex/Memory.h>
+
+using namespace nncc::core::ADT;
+
+namespace
+{
+
+//
+// Dummy frontend for testing
+//
+struct Frontend final : public enco::Frontend
+{
+ enco::Bundle load(void) const override
+ {
+ auto m = coco::Module::create();
+ auto d = coco::Data::create();
+
+ // Create an input
+ {
+ const tensor::Shape shape{1, 3, 3, 1};
+
+ auto bag = m->entity()->bag()->create(9);
+ auto input = m->entity()->input()->create(shape);
+
+ input->bag(bag);
+ input->name("input");
+ input->reorder<tensor::LexicalLayout>();
+
+ m->input()->insert(input);
+ }
+
+ // Create an output
+ {
+ const tensor::Shape shape{1, 3, 3, 1};
+
+ auto bag = m->entity()->bag()->create(9);
+ auto output = m->entity()->output()->create(shape);
+
+ output->bag(bag);
+ output->name("output");
+ output->reorder<tensor::LexicalLayout>();
+
+ m->output()->insert(output);
+ }
+
+ enco::Bundle bundle;
+
+ bundle.module(std::move(m));
+ bundle.data(std::move(d));
+
+ return std::move(bundle);
+ }
+};
+
+} // namespace
+
+extern "C" std::unique_ptr<enco::Frontend> make_frontend(const cmdline::View &cmdline)
+{
+ return stdex::make_unique<Frontend>();
+}
diff --git a/compiler/enco/test/basic/CMakeLists.txt b/compiler/enco/test/basic/CMakeLists.txt
new file mode 100644
index 000000000..5ea6cdadd
--- /dev/null
+++ b/compiler/enco/test/basic/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectories()
diff --git a/compiler/enco/test/binder.cpp b/compiler/enco/test/binder.cpp
new file mode 100644
index 000000000..c8c72fc8b
--- /dev/null
+++ b/compiler/enco/test/binder.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Generated API
+//
+struct Network;
+
+Network *Network_construct();
+void Network_destruct(Network *net);
+
+unsigned Network_input_count(const Network *);
+const char *Network_input_name(const Network *, unsigned n);
+unsigned Network_input_rank(const Network *, unsigned n);
+unsigned Network_input_dim(const Network *, unsigned n, unsigned axis);
+void Network_input_bind(Network *net, unsigned n, const void *ptr, unsigned len);
+
+unsigned Network_output_count(const Network *net);
+const char *Network_output_name(const Network *, unsigned n);
+unsigned Network_output_rank(const Network *, unsigned n);
+unsigned Network_output_dim(const Network *, unsigned n, unsigned axis);
+void Network_output_bind(Network *net, unsigned n, void *ptr, unsigned len);
+
+void Network_invoke(Network *net);
+
+//
+// nnkit backend
+//
+#include <nnkit/Backend.h>
+#include <nnkit/TensorContext.h>
+#include <nnkit/CmdlineArguments.h>
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+
+#include <stdex/Memory.h>
+
+using stdex::make_unique;
+using namespace nncc::core::ADT;
+
+namespace
+{
+
+class TensorContext final : public nnkit::TensorContext
+{
+public:
+ TensorContext() = default;
+
+public:
+ void allocate(const std::string &name, const tensor::Shape &shape)
+ {
+ using nncc::core::ADT::tensor::num_elements;
+
+ auto blob = make_unique<std::vector<uint8_t>>();
+ blob->resize(num_elements(shape) * sizeof(float));
+
+ _names.emplace_back(name);
+ _shapes.emplace_back(shape);
+ _blobs.emplace_back(std::move(blob));
+ }
+
+public:
+ uint8_t *base(uint32_t n) const { return _blobs.at(n)->data(); }
+
+public:
+ uint32_t size(void) const override { return _blobs.size(); }
+
+public:
+ std::string name(uint32_t n) const override { return _names.at(n); }
+
+public:
+ tensor::Shape shape(uint32_t n) const override { return _shapes.at(n); }
+
+public:
+ uint32_t size(uint32_t n) const { return _blobs.at(n)->size(); }
+
+public:
+ // Float (fp32) tensor support
+ bool isFloatTensor(uint32_t n) const override { return true; }
+ void getMutableFloatTensor(uint32_t n, const TensorContext::TypedAccessor<float> &f) override
+ {
+ using nncc::core::ADT::tensor::LexicalLayout;
+ using nncc::core::ADT::tensor::make_overlay;
+
+ auto base = reinterpret_cast<float *>(this->base(n));
+ auto view = make_overlay<float, LexicalLayout>(shape(n), base);
+
+ f(*this, n, view);
+ }
+
+ void getConstFloatTensor(uint32_t n, const TensorContext::TypedReader<float> &f) const override
+ {
+ using nncc::core::ADT::tensor::LexicalLayout;
+ using nncc::core::ADT::tensor::make_overlay;
+
+ auto base = reinterpret_cast<float *>(this->base(n));
+ auto view = make_overlay<float, LexicalLayout>(shape(n), base);
+
+ f(*this, n, view);
+ }
+
+private:
+ std::vector<std::string> _names;
+ std::vector<tensor::Shape> _shapes;
+ std::vector<std::unique_ptr<std::vector<uint8_t>>> _blobs;
+};
+
+class Backend final : public nnkit::Backend
+{
+public:
+ Backend()
+ {
+ _net = Network_construct();
+
+ // Allocate and bind inputs
+ for (uint32_t n = 0; n < Network_input_count(_net); ++n)
+ {
+ const uint32_t rank = Network_input_rank(_net, n);
+ const std::string name = Network_input_name(_net, n);
+
+ tensor::Shape shape;
+
+ shape.resize(rank);
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ shape.dim(axis) = Network_input_dim(_net, n, axis);
+ }
+
+ _inputs.allocate(name, shape);
+
+ Network_input_bind(_net, n, reinterpret_cast<const void *>(_inputs.base(n)), _inputs.size(n));
+ }
+
+ // Allocate and bind outputs
+ for (uint32_t n = 0; n < Network_output_count(_net); ++n)
+ {
+ const uint32_t rank = Network_output_rank(_net, n);
+ const std::string name = Network_output_name(_net, n);
+
+ tensor::Shape shape;
+
+ shape.resize(rank);
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ shape.dim(axis) = Network_output_dim(_net, n, axis);
+ }
+
+ _outputs.allocate(name, shape);
+
+ Network_output_bind(_net, n, reinterpret_cast<void *>(_outputs.base(n)), _outputs.size(n));
+ }
+ }
+
+public:
+ ~Backend() { Network_destruct(_net); }
+
+public:
+ void prepare(const std::function<void(nnkit::TensorContext &)> &f) override { f(_inputs); }
+ void run(void) override { Network_invoke(_net); }
+ void teardown(const std::function<void(nnkit::TensorContext &)> &f) override { f(_outputs); }
+
+private:
+ Network *_net;
+
+private:
+ TensorContext _inputs;
+ TensorContext _outputs;
+};
+
+} // namespace
+
+extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
+{
+ return make_unique<::Backend>();
+}
diff --git a/compiler/enco/test/caffe/CMakeLists.txt b/compiler/enco/test/caffe/CMakeLists.txt
new file mode 100644
index 000000000..ee49b6b28
--- /dev/null
+++ b/compiler/enco/test/caffe/CMakeLists.txt
@@ -0,0 +1,141 @@
+option(ENCO_CAFFE_TEST "Enable enco test for caffe" ON)
+
+if(NOT ENCO_CAFFE_TEST)
+ return()
+endif(NOT ENCO_CAFFE_TEST)
+
+# TODO Use REQUIRED if supported
+nncc_find_resource(BVLCCaffeTests)
+
+if(NOT BVLCCaffeTests_FOUND)
+ message(FATAL_ERROR "Fail to find BVLCCaffeTests")
+endif(NOT BVLCCaffeTests_FOUND)
+
+# TESTCASE_BASE_DIR indicates where all the testcases are located
+set(TESTCASE_BASE_DIR "${BVLCCaffeTests_DIR}")
+
+###
+### Common function(s)
+###
+function(get_test_configuration PREFIX)
+ set(PROTOTXT_FILE "${PREFIX}.prototxt")
+ set(PROTOTXT_FILE "${PROTOTXT_FILE}" PARENT_SCOPE)
+ set(PROTOTXT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PROTOTXT_FILE}" PARENT_SCOPE)
+ set(CAFFEMODEL_FILE "${PREFIX}.caffemodel")
+ set(CAFFEMODEL_FILE "${CAFFEMODEL_FILE}" PARENT_SCOPE)
+ set(CAFFEMODEL_PATH "${CMAKE_CURRENT_BINARY_DIR}/${CAFFEMODEL_FILE}" PARENT_SCOPE)
+ set(SOURCE_FILE ${PREFIX}.cpp)
+ set(SOURCE_FILE "${SOURCE_FILE}" PARENT_SCOPE)
+ set(SOURCE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${SOURCE_FILE}" PARENT_SCOPE)
+ set(ASM_FILE ${PREFIX}.embed.S)
+ set(ASM_FILE "${ASM_FILE}" PARENT_SCOPE)
+ set(ASM_PATH "${CMAKE_CURRENT_BINARY_DIR}/${ASM_FILE}" PARENT_SCOPE)
+ set(BIN_FILE ${PREFIX}.bin)
+ set(BIN_FILE "${BIN_FILE}" PARENT_SCOPE)
+ set(BIN_PATH "${CMAKE_CURRENT_BINARY_DIR}/${BIN_FILE}" PARENT_SCOPE)
+endfunction(get_test_configuration)
+
+###
+### Prepare test(s)
+###
+if(NOT TARGET caffegen)
+ return()
+endif(NOT TARGET caffegen)
+
+if(NOT TARGET enco_caffe_frontend)
+ return()
+endif(NOT TARGET enco_caffe_frontend)
+
+# TODO Use "whitelist" instead
+#
+# WHY?
+#
+# Tests are now shared by multiple frameworks (not private), and thus
+# some tests may be unsupported.
+#
+file(GLOB MODELS RELATIVE "${TESTCASE_BASE_DIR}" "${TESTCASE_BASE_DIR}/*/test.prototxt")
+
+foreach(MODEL IN ITEMS ${MODELS})
+ get_filename_component(PREFIX ${MODEL} DIRECTORY)
+ get_test_configuration(${PREFIX})
+
+ set(MODEL_FILE ${TESTCASE_BASE_DIR}/${MODEL})
+
+ # Copy prototxt
+ # TODO Fix indentation
+ add_custom_command(OUTPUT ${PROTOTXT_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${MODEL_FILE}" "${PROTOTXT_PATH}"
+ DEPENDS "${MODEL_FILE}"
+ COMMENT "Generating ${PROTOTXT_FILE}")
+
+ # Generate caffemodel
+ # TODO Fix indentation
+ add_custom_command(OUTPUT ${CAFFEMODEL_PATH}
+ COMMAND cat ${PROTOTXT_PATH}
+ | GLOG_minloglevel=2 $<TARGET_FILE:caffegen> init
+ | GLOG_minloglevel=2 $<TARGET_FILE:caffegen> encode
+ > ${CAFFEMODEL_PATH}
+ DEPENDS caffegen ${PROTOTXT_PATH}
+ COMMENT "Generating ${CAFFEMODEL_FILE}")
+
+ # Generate C++ code
+ # TODO Fix indentation
+ add_custom_command(OUTPUT ${SOURCE_PATH} ${ASM_PATH} ${BIN_PATH}
+ COMMAND $<TARGET_FILE:enco-cli>
+ --frontend $<TARGET_FILE:enco_caffe_frontend>
+ --frontend-arg ${PROTOTXT_FILE}
+ --frontend-arg ${CAFFEMODEL_FILE}
+ --backend-arg ${PREFIX}
+ DEPENDS enco-cli enco_caffe_frontend ${CAFFEMODEL_PATH}
+ COMMENT "Generating ${SOURCE_FILE}")
+ set_source_files_properties(${ASM_PATH} PROPERTIES GENERATED TRUE LANGUAGE C)
+
+ list(APPEND CANDIDATES ${PREFIX})
+endforeach(MODEL)
+
+###
+### Inference test
+###
+if(NOT TARGET ann_ref_static)
+ return()
+endif(NOT TARGET ann_ref_static)
+
+find_program(H5DIFF h5diff)
+
+if (NOT H5DIFF)
+ return()
+endif(NOT H5DIFF)
+
+message(STATUS "Enable enco(caffe) inference test")
+
+foreach(PREFIX IN ITEMS ${CANDIDATES})
+ if(NOT EXISTS "${TESTCASE_BASE_DIR}/${PREFIX}/INFERENCE")
+ continue()
+ endif()
+
+ get_test_configuration(${PREFIX})
+
+ set(BINDER_TARGET enco_caffe_test_${PREFIX}_binder)
+
+ # Compile nnkit binder (from generated C++ code)
+ add_library(${BINDER_TARGET} SHARED ${CMAKE_CURRENT_SOURCE_DIR}/../binder.cpp ${SOURCE_PATH} ${ASM_PATH})
+ target_include_directories(${BINDER_TARGET} PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+ target_link_libraries(${BINDER_TARGET} nnkit_intf_backend)
+ target_link_libraries(${BINDER_TARGET} ann_api)
+ target_link_libraries(${BINDER_TARGET} ann_ref_static)
+ target_link_libraries(${BINDER_TARGET} stdex)
+ set_target_properties(${BINDER_TARGET} PROPERTIES OUTPUT_NAME ${PREFIX})
+
+ list(APPEND TESTS ${PREFIX})
+endforeach(PREFIX)
+
+# Run tests
+add_test(NAME enco_test_caffe
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/runall.sh"
+ $<TARGET_FILE:nnkit-run>
+ $<TARGET_FILE:nnkit_caffe_backend>
+ $<TARGET_FILE:nnkit_randomize_action>
+ $<TARGET_FILE:nnkit_HDF5_export_action>
+ $<TARGET_FILE:nnkit_HDF5_import_action>
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ ${TESTS})
diff --git a/compiler/enco/test/caffe/runall.sh b/compiler/enco/test/caffe/runall.sh
new file mode 100755
index 000000000..3b18f1c6b
--- /dev/null
+++ b/compiler/enco/test/caffe/runall.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+
+if [[ $# -le 6 ]]; then
+ echo "USAGE: $0 [nnkit-run path] [reference backend path] [randomize action path] [HDF5 export action path] [HDF5 import action path] [WORKDIR] [Prefix1] [Prefix2]..."
+ exit 255
+fi
+
+NNKIT_RUN_PATH="$1"; shift
+REFERENCE_BACKEND_PATH="$1"; shift
+RANDOMIZE_ACTION_PATH="$1"; shift
+HDF5_EXPORT_ACTION_PATH="$1"; shift
+HDF5_IMPORT_ACTION_PATH="$1"; shift
+WORKDIR="$1"; shift
+
+echo "-- Found nnkit-run: ${NNKIT_RUN_PATH}"
+echo "-- Found reference backend: ${REFERENCE_BACKEND_PATH}"
+echo "-- Found randomize action: ${RANDOMIZE_ACTION_PATH}"
+echo "-- Found HDF5 export action: ${HDF5_EXPORT_ACTION_PATH}"
+echo "-- Found HDF5 import action: ${HDF5_IMPORT_ACTION_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [[ $# -ne 0 ]]; do
+ PREFIX="$1"; shift
+
+ TESTED+=("${PREFIX}")
+
+ PASSED_TAG="${PREFIX}.passed"
+
+ rm -f "${PASSED_TAG}"
+
+ cat > "${PREFIX}.log" <(
+ exec 2>&1
+
+ echo "-- Found prototxt: ${PREFIX}.prototxt"
+ echo "-- Found caffemodel: ${PREFIX}.caffemodel"
+ echo "-- Found backend: lib${PREFIX}.so"
+
+ "${NNKIT_RUN_PATH}" \
+ --backend "${REFERENCE_BACKEND_PATH}" \
+ --backend-arg "${WORKDIR}/${PREFIX}.prototxt" \
+ --backend-arg "${WORKDIR}/${PREFIX}.caffemodel" \
+ --pre "${RANDOMIZE_ACTION_PATH}" \
+ --pre "${HDF5_EXPORT_ACTION_PATH}" \
+ --pre-arg "${PREFIX}.input.h5" \
+ --post "${HDF5_EXPORT_ACTION_PATH}" \
+ --post-arg "${PREFIX}.expected.h5"
+
+ "${NNKIT_RUN_PATH}" \
+ --backend "./lib${PREFIX}.so" \
+ --pre "${HDF5_IMPORT_ACTION_PATH}" \
+ --pre-arg "${PREFIX}.input.h5" \
+ --post "${HDF5_EXPORT_ACTION_PATH}" \
+ --post-arg "${PREFIX}.obtained.h5"
+
+ h5diff -d 0.001 "${PREFIX}.expected.h5" "${PREFIX}.obtained.h5"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("$PREFIX")
+ else
+ FAILED+=("$PREFIX")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/enco/test/tflite/AveragePool2D_000/INFERENCE b/compiler/enco/test/tflite/AveragePool2D_000/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/AveragePool2D_000/INFERENCE
diff --git a/compiler/enco/test/tflite/AveragePool2D_000/test.recipe b/compiler/enco/test/tflite/AveragePool2D_000/test.recipe
new file mode 100644
index 000000000..746c34334
--- /dev/null
+++ b/compiler/enco/test/tflite/AveragePool2D_000/test.recipe
@@ -0,0 +1,24 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+}
+operation {
+ type: "AveragePool2D"
+ averagepool2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ filter_width: 2
+ filter_height: 2
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/compiler/enco/test/tflite/AveragePool2D_001/INFERENCE b/compiler/enco/test/tflite/AveragePool2D_001/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/AveragePool2D_001/INFERENCE
diff --git a/compiler/enco/test/tflite/AveragePool2D_001/test.recipe b/compiler/enco/test/tflite/AveragePool2D_001/test.recipe
new file mode 100644
index 000000000..36bbda78c
--- /dev/null
+++ b/compiler/enco/test/tflite/AveragePool2D_001/test.recipe
@@ -0,0 +1,24 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 5 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 5 }
+}
+operation {
+ type: "AveragePool2D"
+ averagepool2d_options {
+ padding: SAME
+ stride_w: 1
+ stride_h: 1
+ filter_width: 3
+ filter_height: 3
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/compiler/enco/test/tflite/CMakeLists.txt b/compiler/enco/test/tflite/CMakeLists.txt
new file mode 100644
index 000000000..d5a96a6da
--- /dev/null
+++ b/compiler/enco/test/tflite/CMakeLists.txt
@@ -0,0 +1,108 @@
+option(ENCO_TFLITE_TEST "Enable enco test for TFLite" ON)
+
+if(NOT ENCO_TFLITE_TEST)
+ return()
+endif(NOT ENCO_TFLITE_TEST)
+
+###
+### Common function(s)
+###
+function(get_test_configuration PREFIX)
+ set(RECIPE_FILE "${PREFIX}.recipe" PARENT_SCOPE)
+ set(TFLITEMODEL_FILE "${PREFIX}.tflite" PARENT_SCOPE)
+ set(SOURCE_FILE ${PREFIX}.cpp PARENT_SCOPE)
+ set(ASM_FILE ${PREFIX}.embed.S PARENT_SCOPE)
+ set(BIN_FILE ${PREFIX}.bin PARENT_SCOPE)
+endfunction(get_test_configuration)
+
+###
+### Prepare test(s)
+###
+if(NOT TARGET tflchef-file)
+ return()
+endif(NOT TARGET tflchef-file)
+
+if(NOT TARGET enco_tflite_frontend)
+ return()
+endif(NOT TARGET enco_tflite_frontend)
+
+file(GLOB MODELS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*/test.recipe")
+
+foreach(MODEL IN ITEMS ${MODELS})
+ get_filename_component(PREFIX ${MODEL} DIRECTORY)
+ get_test_configuration(${PREFIX})
+
+ set(MODEL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${MODEL})
+
+ # Copy recipe
+ add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${RECIPE_FILE}
+ COMMAND ${CMAKE_COMMAND} -E copy "${MODEL_FILE}"
+ "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE_FILE}"
+ DEPENDS "${MODEL_FILE}"
+ COMMENT "Copying ${RECIPE_FILE}")
+
+ # Generate tflitemodel
+ add_custom_command(OUTPUT ${TFLITEMODEL_FILE}
+ COMMAND $<TARGET_FILE:tflchef-file> ${RECIPE_FILE} ${TFLITEMODEL_FILE}
+ DEPENDS tflchef ${CMAKE_CURRENT_BINARY_DIR}/${RECIPE_FILE}
+ COMMENT "Generating ${TFLITEMODEL_FILE}")
+
+ # Generate C++ code
+ add_custom_command(OUTPUT ${SOURCE_FILE} ${ASM_FILE} ${BIN_FILE}
+ COMMAND $<TARGET_FILE:enco-cli>
+ --frontend $<TARGET_FILE:enco_tflite_frontend>
+ --frontend-arg ${TFLITEMODEL_FILE}
+ --backend-arg ${PREFIX}
+ DEPENDS enco-cli enco_caffe_frontend ${TFLITEMODEL_FILE}
+ COMMENT "Generating ${SOURCE_FILE}")
+ set_source_files_properties(${ASM_FILE} PROPERTIES GENERATED TRUE LANGUAGE C)
+
+ list(APPEND CANDIDATES ${PREFIX})
+endforeach(MODEL)
+
+###
+### Inference test
+###
+if(NOT TARGET ann_ref_static)
+ return()
+endif(NOT TARGET ann_ref_static)
+
+find_program(H5DIFF h5diff)
+
+if (NOT H5DIFF)
+ return()
+endif(NOT H5DIFF)
+
+message(STATUS "Enable enco(tflite) inference test")
+
+foreach(PREFIX IN ITEMS ${CANDIDATES})
+ if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${PREFIX}/INFERENCE")
+ continue()
+ endif()
+
+ get_test_configuration(${PREFIX})
+
+ set(BINDER_TARGET enco_tflite_test_${PREFIX}_binder)
+
+ # Compile nnkit binder (from generated C++ code)
+ add_library(${BINDER_TARGET} SHARED ${CMAKE_CURRENT_SOURCE_DIR}/../binder.cpp ${SOURCE_FILE} ${ASM_FILE})
+ target_include_directories(${BINDER_TARGET} PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+ target_link_libraries(${BINDER_TARGET} nnkit_intf_backend)
+ target_link_libraries(${BINDER_TARGET} ann_api)
+ target_link_libraries(${BINDER_TARGET} ann_ref_static)
+ target_link_libraries(${BINDER_TARGET} stdex)
+ set_target_properties(${BINDER_TARGET} PROPERTIES OUTPUT_NAME ${PREFIX})
+
+ list(APPEND TESTS ${PREFIX})
+endforeach(PREFIX)
+
+# Run tests
+add_test(NAME enco_test_tflite
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/runall.sh"
+ $<TARGET_FILE:nnkit-run>
+ $<TARGET_FILE:nnkit_tflite_backend>
+ $<TARGET_FILE:nnkit_randomize_action>
+ $<TARGET_FILE:nnkit_HDF5_export_action>
+ $<TARGET_FILE:nnkit_HDF5_import_action>
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ ${TESTS})
diff --git a/compiler/enco/test/tflite/Concat_000/INFERENCE b/compiler/enco/test/tflite/Concat_000/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/Concat_000/INFERENCE
diff --git a/compiler/enco/test/tflite/Concat_000/test.recipe b/compiler/enco/test/tflite/Concat_000/test.recipe
new file mode 100644
index 000000000..35641bd07
--- /dev/null
+++ b/compiler/enco/test/tflite/Concat_000/test.recipe
@@ -0,0 +1,28 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 1 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+ type: "Concatenation"
+ concatenation_options {
+ axis: 3
+ activation: NONE
+ }
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/compiler/enco/test/tflite/Concat_001/INFERENCE b/compiler/enco/test/tflite/Concat_001/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/Concat_001/INFERENCE
diff --git a/compiler/enco/test/tflite/Concat_001/test.recipe b/compiler/enco/test/tflite/Concat_001/test.recipe
new file mode 100644
index 000000000..7adaf1645
--- /dev/null
+++ b/compiler/enco/test/tflite/Concat_001/test.recipe
@@ -0,0 +1,29 @@
+# Concatenate two feature maps along "width" dimension
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 1 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 2 dim: 1 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 3 dim: 1 }
+}
+operation {
+ type: "Concatenation"
+ concatenation_options {
+ axis: 2
+ activation: NONE
+ }
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/compiler/enco/test/tflite/Concat_002/INFERENCE b/compiler/enco/test/tflite/Concat_002/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/Concat_002/INFERENCE
diff --git a/compiler/enco/test/tflite/Concat_002/test.recipe b/compiler/enco/test/tflite/Concat_002/test.recipe
new file mode 100644
index 000000000..918cb13d3
--- /dev/null
+++ b/compiler/enco/test/tflite/Concat_002/test.recipe
@@ -0,0 +1,29 @@
+# Concatenate two feature maps along "height" dimension
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 1 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 1 dim: 1 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 1 dim: 1 }
+}
+operation {
+ type: "Concatenation"
+ concatenation_options {
+ axis: 1
+ activation: NONE
+ }
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/compiler/enco/test/tflite/Concat_003/INFERENCE b/compiler/enco/test/tflite/Concat_003/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/Concat_003/INFERENCE
diff --git a/compiler/enco/test/tflite/Concat_003/test.recipe b/compiler/enco/test/tflite/Concat_003/test.recipe
new file mode 100644
index 000000000..8f1b64ea6
--- /dev/null
+++ b/compiler/enco/test/tflite/Concat_003/test.recipe
@@ -0,0 +1,29 @@
+# Concatenate two feature maps along "batch" dimension
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 1 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 2 dim: 1 dim: 1 dim: 1 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 3 dim: 1 dim: 1 dim: 1 }
+}
+operation {
+ type: "Concatenation"
+ concatenation_options {
+ axis: 0
+ activation: NONE
+ }
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/compiler/enco/test/tflite/Conv2D_000/INFERENCE b/compiler/enco/test/tflite/Conv2D_000/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/Conv2D_000/INFERENCE
diff --git a/compiler/enco/test/tflite/Conv2D_000/test.recipe b/compiler/enco/test/tflite/Conv2D_000/test.recipe
new file mode 100644
index 000000000..9f0841819
--- /dev/null
+++ b/compiler/enco/test/tflite/Conv2D_000/test.recipe
@@ -0,0 +1,45 @@
+# Test for basic case: VALID padding, no activation layer, stride=[1,1]
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/compiler/enco/test/tflite/Conv2D_001/INFERENCE b/compiler/enco/test/tflite/Conv2D_001/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/Conv2D_001/INFERENCE
diff --git a/compiler/enco/test/tflite/Conv2D_001/test.recipe b/compiler/enco/test/tflite/Conv2D_001/test.recipe
new file mode 100644
index 000000000..d9d4904da
--- /dev/null
+++ b/compiler/enco/test/tflite/Conv2D_001/test.recipe
@@ -0,0 +1,45 @@
+# Test for SAME padding
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 5 dim: 2 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 5 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: SAME
+ stride_w: 1
+ stride_h: 1
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/compiler/enco/test/tflite/Conv2D_002/INFERENCE b/compiler/enco/test/tflite/Conv2D_002/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/Conv2D_002/INFERENCE
diff --git a/compiler/enco/test/tflite/Conv2D_002/test.recipe b/compiler/enco/test/tflite/Conv2D_002/test.recipe
new file mode 100644
index 000000000..55976c9b9
--- /dev/null
+++ b/compiler/enco/test/tflite/Conv2D_002/test.recipe
@@ -0,0 +1,46 @@
+# Test for RELU activation layer
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ activation: RELU
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/compiler/enco/test/tflite/Conv2D_003/INFERENCE b/compiler/enco/test/tflite/Conv2D_003/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/Conv2D_003/INFERENCE
diff --git a/compiler/enco/test/tflite/Conv2D_003/test.recipe b/compiler/enco/test/tflite/Conv2D_003/test.recipe
new file mode 100644
index 000000000..30c9473b7
--- /dev/null
+++ b/compiler/enco/test/tflite/Conv2D_003/test.recipe
@@ -0,0 +1,45 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ activation: RELU6
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/compiler/enco/test/tflite/Conv2D_004/INFERENCE b/compiler/enco/test/tflite/Conv2D_004/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/Conv2D_004/INFERENCE
diff --git a/compiler/enco/test/tflite/Conv2D_004/test.recipe b/compiler/enco/test/tflite/Conv2D_004/test.recipe
new file mode 100644
index 000000000..20f4a9908
--- /dev/null
+++ b/compiler/enco/test/tflite/Conv2D_004/test.recipe
@@ -0,0 +1,45 @@
+# Conv2D with ifm w, h = 14, 14 && ofm w, h = 7, 7 && stride = 2, 2 && padding = SAME (similar case from Mobile)
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 14 dim: 14 dim: 2 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: SAME
+ stride_w: 2
+ stride_h: 2
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/compiler/enco/test/tflite/DepthwiseConv2D_000/INFERENCE b/compiler/enco/test/tflite/DepthwiseConv2D_000/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/DepthwiseConv2D_000/INFERENCE
diff --git a/compiler/enco/test/tflite/DepthwiseConv2D_000/test.recipe b/compiler/enco/test/tflite/DepthwiseConv2D_000/test.recipe
new file mode 100644
index 000000000..27bc767fc
--- /dev/null
+++ b/compiler/enco/test/tflite/DepthwiseConv2D_000/test.recipe
@@ -0,0 +1,48 @@
+# SAME padding, stride = [1,1], activation=RELU6.
+# In mobilenet, there are two cases using depthwiseConv2D : A case like this one, and another case with stride=[2,2]
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim: 5 dim: 4 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 4 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 4 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim: 5 dim: 4 }
+}
+operation {
+ type: "DepthwiseConv2D"
+ depthwiseconv2d_options {
+ padding: SAME
+ stride_w: 1
+ stride_h: 1
+ depth_multiplier: 1
+ activation: RELU6
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/compiler/enco/test/tflite/DepthwiseConv2D_001/INFERENCE b/compiler/enco/test/tflite/DepthwiseConv2D_001/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/DepthwiseConv2D_001/INFERENCE
diff --git a/compiler/enco/test/tflite/DepthwiseConv2D_001/test.recipe b/compiler/enco/test/tflite/DepthwiseConv2D_001/test.recipe
new file mode 100644
index 000000000..0166474d8
--- /dev/null
+++ b/compiler/enco/test/tflite/DepthwiseConv2D_001/test.recipe
@@ -0,0 +1,46 @@
+# depthwiseConv2D with ifm w, h = 14, 14 && ofm w, h = 7, 7 && stride = 2, 2 && padding = SAME (similar case from Mobile)
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 14 dim: 14 dim: 5 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 5 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 5 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 7 dim: 7 dim: 5 }
+}
+operation {
+ type: "DepthwiseConv2D"
+ depthwiseconv2d_options {
+ padding: SAME
+ stride_w: 2
+ stride_h: 2
+ activation: RELU6
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/compiler/enco/test/tflite/Div_000/INFERENCE b/compiler/enco/test/tflite/Div_000/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/Div_000/INFERENCE
diff --git a/compiler/enco/test/tflite/Div_000/test.recipe b/compiler/enco/test/tflite/Div_000/test.recipe
new file mode 100644
index 000000000..a6335de46
--- /dev/null
+++ b/compiler/enco/test/tflite/Div_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "ifm0"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+ type: "Div"
+ input: "ifm0"
+ input: "ifm1"
+ output: "ofm"
+ div_options {
+ activation: NONE
+ }
+}
+input: "ifm0"
+input: "ifm1"
+output: "ofm"
diff --git a/compiler/enco/test/tflite/MaxPool2D_000/INFERENCE b/compiler/enco/test/tflite/MaxPool2D_000/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/MaxPool2D_000/INFERENCE
diff --git a/compiler/enco/test/tflite/MaxPool2D_000/test.recipe b/compiler/enco/test/tflite/MaxPool2D_000/test.recipe
new file mode 100644
index 000000000..718630f08
--- /dev/null
+++ b/compiler/enco/test/tflite/MaxPool2D_000/test.recipe
@@ -0,0 +1,24 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+}
+operation {
+ type: "MaxPool2D"
+ maxpool2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ filter_width: 2
+ filter_height: 2
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/compiler/enco/test/tflite/ReLU6_000/INFERENCE b/compiler/enco/test/tflite/ReLU6_000/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/ReLU6_000/INFERENCE
diff --git a/compiler/enco/test/tflite/ReLU6_000/test.recipe b/compiler/enco/test/tflite/ReLU6_000/test.recipe
new file mode 100644
index 000000000..226593593
--- /dev/null
+++ b/compiler/enco/test/tflite/ReLU6_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "ReLU6"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/compiler/enco/test/tflite/ReLU_000/INFERENCE b/compiler/enco/test/tflite/ReLU_000/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/ReLU_000/INFERENCE
diff --git a/compiler/enco/test/tflite/ReLU_000/test.recipe b/compiler/enco/test/tflite/ReLU_000/test.recipe
new file mode 100644
index 000000000..8eaa3602f
--- /dev/null
+++ b/compiler/enco/test/tflite/ReLU_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "ReLU"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/compiler/enco/test/tflite/Regression_0000/INFERENCE b/compiler/enco/test/tflite/Regression_0000/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/Regression_0000/INFERENCE
diff --git a/compiler/enco/test/tflite/Regression_0000/test.recipe b/compiler/enco/test/tflite/Regression_0000/test.recipe
new file mode 100644
index 000000000..2f3c03670
--- /dev/null
+++ b/compiler/enco/test/tflite/Regression_0000/test.recipe
@@ -0,0 +1,84 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 2 }
+}
+operand {
+ name: "ker_0"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias_0"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "0.1"
+ }
+}
+operand {
+ name: "ofm_0"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ activation: NONE
+ }
+ input: "ifm"
+ input: "ker_0"
+ input: "bias_0"
+ output: "ofm_0"
+}
+operand {
+ name: "ker_1"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias_1"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "0.1"
+ }
+}
+operand {
+ name: "ofm_1"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ activation: NONE
+ }
+ input: "ifm"
+ input: "ker_1"
+ input: "bias_1"
+ output: "ofm_1"
+}
+input: "ifm"
+output: "ofm_0"
+output: "ofm_1"
diff --git a/compiler/enco/test/tflite/Regression_0001/INFERENCE b/compiler/enco/test/tflite/Regression_0001/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/Regression_0001/INFERENCE
diff --git a/compiler/enco/test/tflite/Regression_0001/test.recipe b/compiler/enco/test/tflite/Regression_0001/test.recipe
new file mode 100644
index 000000000..e6f4eca8f
--- /dev/null
+++ b/compiler/enco/test/tflite/Regression_0001/test.recipe
@@ -0,0 +1,50 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+ filler { tag: "gaussian" arg: "0.0" arg: "1.0" }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler { tag: "gaussian" arg: "0.0" arg: "1.0" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operand {
+ name: "arr"
+ type: FLOAT32
+ shape { dim: 1 dim: 9 }
+}
+operand {
+ name: "shape"
+ type: INT32
+ shape { dim: 2 }
+ filler { tag: "explicit" arg: "-1" arg: "9" }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options { padding: VALID stride_w: 1 stride_h: 1 activation: RELU6 }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+operation {
+ type: "Reshape"
+ input: "ofm"
+ input: "shape"
+ output: "arr"
+ reshape_options { new_shape: [-1, 9] }
+}
+input: "ifm"
+output: "arr"
diff --git a/compiler/enco/test/tflite/Regression_0002/INFERENCE b/compiler/enco/test/tflite/Regression_0002/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/Regression_0002/INFERENCE
diff --git a/compiler/enco/test/tflite/Regression_0002/test.recipe b/compiler/enco/test/tflite/Regression_0002/test.recipe
new file mode 100644
index 000000000..8234c7996
--- /dev/null
+++ b/compiler/enco/test/tflite/Regression_0002/test.recipe
@@ -0,0 +1,45 @@
+# Compilation SHOULD NOT fail even when there is no effective calcualtion
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 2 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "0.1"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ activation: NONE
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
diff --git a/compiler/enco/test/tflite/Regression_0003/INFERENCE b/compiler/enco/test/tflite/Regression_0003/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/Regression_0003/INFERENCE
diff --git a/compiler/enco/test/tflite/Regression_0003/test.recipe b/compiler/enco/test/tflite/Regression_0003/test.recipe
new file mode 100644
index 000000000..693c45543
--- /dev/null
+++ b/compiler/enco/test/tflite/Regression_0003/test.recipe
@@ -0,0 +1,33 @@
+# Compilation SHOULD NOT fail even if all the inputs are constant
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ filler { tag: "constant" arg: "0.1" }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+ filler { tag: "constant" arg: "0.2" }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler { tag: "constant" arg: "0.3" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options { padding: VALID }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+output: "ofm"
diff --git a/compiler/enco/test/tflite/Regression_0004/INFERENCE b/compiler/enco/test/tflite/Regression_0004/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/Regression_0004/INFERENCE
diff --git a/compiler/enco/test/tflite/Regression_0004/test.recipe b/compiler/enco/test/tflite/Regression_0004/test.recipe
new file mode 100644
index 000000000..80705efd5
--- /dev/null
+++ b/compiler/enco/test/tflite/Regression_0004/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "ifm0"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ filler { tag: "constant" arg: "0.1" }
+}
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ filler { tag: "constant" arg: "0.1" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+ type: "Div"
+ input: "ifm0"
+ input: "ifm1"
+ output: "ofm"
+ div_options {
+ activation: NONE
+ }
+}
+output: "ofm"
diff --git a/compiler/enco/test/tflite/Reshape_000/INFERENCE b/compiler/enco/test/tflite/Reshape_000/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/Reshape_000/INFERENCE
diff --git a/compiler/enco/test/tflite/Reshape_000/test.recipe b/compiler/enco/test/tflite/Reshape_000/test.recipe
new file mode 100644
index 000000000..bb7ce48a9
--- /dev/null
+++ b/compiler/enco/test/tflite/Reshape_000/test.recipe
@@ -0,0 +1,21 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 10 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 10 }
+}
+operation {
+ type: "Reshape"
+ reshape_options {
+ new_shape: -1
+ new_shape: 10
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/compiler/enco/test/tflite/Sub_000/INFERENCE b/compiler/enco/test/tflite/Sub_000/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/Sub_000/INFERENCE
diff --git a/compiler/enco/test/tflite/Sub_000/test.recipe b/compiler/enco/test/tflite/Sub_000/test.recipe
new file mode 100644
index 000000000..0397c9c2b
--- /dev/null
+++ b/compiler/enco/test/tflite/Sub_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim:2 dim:3 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim:2 dim:3 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim:2 dim:3 }
+}
+operation {
+ type: "Sub"
+ sub_options {
+ activation: NONE
+ }
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/compiler/enco/test/tflite/empty/INFERENCE b/compiler/enco/test/tflite/empty/INFERENCE
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/empty/INFERENCE
diff --git a/compiler/enco/test/tflite/empty/test.recipe b/compiler/enco/test/tflite/empty/test.recipe
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/enco/test/tflite/empty/test.recipe
diff --git a/compiler/enco/test/tflite/runall.sh b/compiler/enco/test/tflite/runall.sh
new file mode 100755
index 000000000..c274f724b
--- /dev/null
+++ b/compiler/enco/test/tflite/runall.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+if [[ $# -le 6 ]]; then
+ echo "USAGE: $0 [nnkit-run path] [reference backend path] [randomize action path] [HDF5 export action path] [HDF5 import action path] [WORKDIR] [Prefix1] [Prefix2] ..."
+ exit 255
+fi
+
+NNKIT_RUN_PATH="$1"; shift
+REFERENCE_BACKEND_PATH="$1"; shift
+RANDOMIZE_ACTION_PATH="$1"; shift
+HDF5_EXPORT_ACTION_PATH="$1"; shift
+HDF5_IMPORT_ACTION_PATH="$1"; shift
+WORKDIR="$1"; shift
+
+echo "-- Found nnkit-run: ${NNKIT_RUN_PATH}"
+echo "-- Found reference backend: ${REFERENCE_BACKEND_PATH}"
+echo "-- Found randomize action: ${RANDOMIZE_ACTION_PATH}"
+echo "-- Found HDF5 export action: ${HDF5_EXPORT_ACTION_PATH}"
+echo "-- Found HDF5 import action: ${HDF5_IMPORT_ACTION_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [[ $# -ne 0 ]]; do
+ PREFIX="$1"; shift
+
+ TESTED+=("${PREFIX}")
+
+ PASSED_TAG="${PREFIX}.passed"
+
+ rm -f "${PASSED_TAG}"
+
+ cat > "${PREFIX}.log" <(
+ exec 2>&1
+
+ echo "-- Found tflite: ${PREFIX}.tflite"
+ echo "-- Found backend: lib${PREFIX}.so"
+
+ "${NNKIT_RUN_PATH}" \
+ --backend "${REFERENCE_BACKEND_PATH}" \
+ --backend-arg "${WORKDIR}/${PREFIX}.tflite" \
+ --pre "${RANDOMIZE_ACTION_PATH}" \
+ --pre "${HDF5_EXPORT_ACTION_PATH}" \
+ --pre-arg "${PREFIX}.input.h5" \
+ --post "${HDF5_EXPORT_ACTION_PATH}" \
+ --post-arg "${PREFIX}.expected.h5"
+
+ "${NNKIT_RUN_PATH}" \
+ --backend "./lib${PREFIX}.so" \
+ --pre "${HDF5_IMPORT_ACTION_PATH}" \
+ --pre-arg "${PREFIX}.input.h5" \
+ --post "${HDF5_EXPORT_ACTION_PATH}" \
+ --post-arg "${PREFIX}.obtained.h5"
+
+ h5diff -d 0.001 "${PREFIX}.expected.h5" "${PREFIX}.obtained.h5"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("$PREFIX")
+ else
+ FAILED+=("$PREFIX")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/encodump/CMakeLists.txt b/compiler/encodump/CMakeLists.txt
new file mode 100644
index 000000000..58fe17a51
--- /dev/null
+++ b/compiler/encodump/CMakeLists.txt
@@ -0,0 +1,17 @@
+if(NOT TARGET enco_intf_frontend)
+ return()
+endif(NOT TARGET enco_intf_frontend)
+
+if(NOT TARGET enco_core)
+ return()
+endif(NOT TARGET enco_core)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(encodump ${SOURCES})
+target_include_directories(encodump PRIVATE src)
+target_link_libraries(encodump enco_intf_frontend)
+target_link_libraries(encodump enco_core)
+target_link_libraries(encodump safemain)
+target_link_libraries(encodump stdex)
+target_link_libraries(encodump dl)
diff --git a/compiler/encodump/README.md b/compiler/encodump/README.md
new file mode 100644
index 000000000..1a2b44969
--- /dev/null
+++ b/compiler/encodump/README.md
@@ -0,0 +1,69 @@
+# encodump
+
+_encodump_ is a dumper for coco IR generated by enco
+
+## How to use
+Sources for _encodump_ are:
+1. enco frontend library `*.so` file
+1. model description file for matching to enco frontend
+```
+$ path/to/encodump \
+ --frontend [enco frontend library .so file]
+ --frontend-arg [model file] ...
+```
+
+Currently supported enco frontends are Caffe and tensorflow lite. For Caffe, both `*.prototxt` and `*.caffemodel` are required, and for TFlite, `*.tflite` flatbuffers file is required.
+
+Output is dumped into terminal.
+
+## Example
+```
+nncc$ ./build/compiler/encodump/encodump \
+ --frontend ./build/compiler/enco/frontend/tflite/libenco_tflite_frontend.so \
+ --frontend-arg ./build/compiler/enco/test/tflite/Conv2D_000.tflite
+```
+
+Output:
+```
+<Module>
+ <Block> (index: 0)
+ <Inst>:
+ Eval (0x10cfa90)
+ out: 0x10cf960
+ <op>:
+ Load(0x10cf600, obj: 0x10cd670)
+ Conv2D(0x10cf8a0, ker obj: 0x10cf2d0, padding [T/B/L/R=0,0,0,0], stride [V/H = 1,1])
+ <Inst>:
+ Eval (0x10cff80)
+ out: 0x10cfb20
+ <op>:
+ Load(0x10cfe70, obj: 0x10cfcc0)
+ Load(0x10cfdd0, obj: 0x10cf960)
+ Add
+ <Inst>:
+ Copy (0x10d0120)
+ from: 0x10cfb20
+ into: 0x10cfff0
+ <Inst>:
+ Copy (0x10d01f0)
+ from: 0x10cfff0
+ into: 0x10cf210
+ <Input>: bag 0x10ce650, name=ifm
+ <Output>: bag 0x10ce9c0, name=ofm
+ <Bag>:
+ 0x10ce650, obj: [0x10cd670], size: 18, input, const, reader: [x], updater: [x],
+ 0x10ce770, obj: [0x10cf2d0], size: 2, const, reader: [x], updater: [x],
+ 0x10ce890, obj: [0x10cfcc0], size: 1, const, reader: [x], updater: [x],
+ 0x10ce9c0, obj: [0x10cf210], size: 9, output, const, reader: [x], updater: [x],
+ 0x10cf9d0, obj: [0x10cf960], size: 9, const, reader: [x], updater: [x],
+ 0x10cfbe0, obj: [0x10cfb20], size: 9, const, reader: [x], updater: [x],
+ 0x10d0060, obj: [0x10cfff0], size: 9, const, reader: [x], updater: [x],
+ <Object>:
+ 0x10cd670, bag: 0x10ce650, kind: Feature, Shape [H/W/D=3,3,2], producer: x, comsumer: [op: 0x10cf600]
+ 0x10cf210, bag: 0x10ce9c0, kind: Feature, Shape [H/W/D=3,3,1], producer: instr: 0x10d01f0, comsumer: [x]
+ 0x10cf2d0, bag: 0x10ce770, kind: Kernel, Shape [N/H/W/D=1,1,1,2], producer: x, comsumer: [op: 0x10cf8a0]
+ 0x10cf960, bag: 0x10cf9d0, kind: Feature, Shape [H/W/D=3,3,1], producer: instr: 0x10cfa90, comsumer: [op: 0x10cfdd0]
+ 0x10cfb20, bag: 0x10cfbe0, kind: Feature, Shape [H/W/D=3,3,1], producer: instr: 0x10cff80, comsumer: [inst: 0x10d0120]
+ 0x10cfcc0, bag: 0x10ce890, kind: Feature, Shape [H/W/D=3,3,1], producer: x, comsumer: [op: 0x10cfe70]
+ 0x10cfff0, bag: 0x10d0060, kind: Feature, Shape [H/W/D=3,3,1], producer: instr: 0x10d0120, comsumer: [inst: 0x10d01f0]
+```
diff --git a/compiler/encodump/requires.cmake b/compiler/encodump/requires.cmake
new file mode 100644
index 000000000..3d1ca094e
--- /dev/null
+++ b/compiler/encodump/requires.cmake
@@ -0,0 +1 @@
+require("safemain")
diff --git a/compiler/encodump/src/Driver.cpp b/compiler/encodump/src/Driver.cpp
new file mode 100644
index 000000000..f27cbe904
--- /dev/null
+++ b/compiler/encodump/src/Driver.cpp
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <enco/Frontend.h>
+#include <enco/Backend.h>
+
+#include <cmdline/View.h>
+
+#include <string>
+#include <vector>
+
+#include <functional>
+
+#include "Dump.h"
+
+namespace cmdline
+{
+
+// TODO Extract this helper class
+class Vector : public cmdline::View
+{
+public:
+ uint32_t size(void) const { return _args.size(); }
+
+public:
+ const char *at(uint32_t nth) const { return _args.at(nth).c_str(); }
+
+public:
+ Vector &append(const std::string &arg)
+ {
+ _args.emplace_back(arg);
+ return (*this);
+ }
+
+private:
+ std::vector<std::string> _args;
+};
+
+} // namespace cmdline
+
+namespace
+{
+
+class Zone
+{
+public:
+ Zone() = default;
+
+public:
+ const cmdline::View *args(void) const { return &_args; }
+
+public:
+ void append(const std::string &arg) { _args.append(arg); }
+
+private:
+ cmdline::Vector _args;
+};
+
+} // namespace
+
+#include <dlfcn.h>
+
+namespace
+{
+
+class FrontendFactory
+{
+public:
+ FrontendFactory(const std::string &path)
+ {
+ _handle = dlopen(path.c_str(), RTLD_LAZY);
+ assert(_handle != nullptr);
+ }
+
+public:
+ // Copy is not allowed to avoid double close
+ FrontendFactory(const FrontendFactory &) = delete;
+ FrontendFactory(FrontendFactory &&) = delete;
+
+public:
+ ~FrontendFactory() { dlclose(_handle); }
+
+private:
+ using Entry = std::unique_ptr<enco::Frontend> (*)(const cmdline::View &);
+
+private:
+ Entry entry(void) const
+ {
+ auto entry = reinterpret_cast<Entry>(dlsym(_handle, "make_frontend"));
+ assert(entry != nullptr);
+ return entry;
+ }
+
+public:
+ std::unique_ptr<enco::Frontend> make(const cmdline::View *args) const
+ {
+ auto fn = entry();
+ return fn(*args);
+ }
+
+private:
+ void *_handle;
+};
+
+} // namespace
+
+namespace
+{
+
+class FrontendZone : public Zone
+{
+public:
+ FrontendZone(const std::string &path) : _factory{path}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const FrontendFactory *factory(void) const { return &_factory; }
+
+private:
+ FrontendFactory _factory;
+};
+
+} // namespace
+
+#include <stdex/Memory.h>
+
+#include <map>
+
+#include <iostream>
+#include <stdexcept>
+
+/**
+ * @brief Dump IR for given arguments
+ *
+ * Call example:
+ * $ ./build/compiler/encodump/encodump \
+ * --frontend build/compiler/enco/frontend/caffe/libenco_caffe_frontend.so \
+ * --frontend-arg build/compiler/enco/test/caffe/Convolution_003.prototxt \
+ * --frontend-arg build/compiler/enco/test/caffe/Convolution_003.caffemodel
+ */
+int entry(int argc, char **argv)
+{
+ // Usage:
+ // [Command] --frontend [Frontend .so path] --frontend-arg ...
+ std::unique_ptr<FrontendZone> frontend_zone;
+
+ // Simple argument parser (based on map)
+ std::map<std::string, std::function<void(const std::string &arg)>> argparse;
+
+ argparse["--frontend"] = [&](const std::string &path) {
+ frontend_zone = stdex::make_unique<FrontendZone>(path);
+ };
+
+ argparse["--frontend-arg"] = [&](const std::string &arg) { frontend_zone->append(arg); };
+
+ if (argc < 2)
+ {
+ std::cerr << "Usage:" << std::endl;
+ std::cerr << "[Command] --frontend [.so path]" << std::endl;
+ std::cerr << " --frontend-arg [argument] ..." << std::endl;
+ return 255;
+ }
+
+ for (int n = 1; n < argc; n += 2)
+ {
+ const std::string tag{argv[n]};
+ const std::string arg{argv[n + 1]};
+
+ auto it = argparse.find(tag);
+
+ if (it == argparse.end())
+ {
+ std::cerr << "Option '" << tag << "' is not supported" << std::endl;
+ return 255;
+ }
+
+ it->second(arg);
+ }
+
+ assert(frontend_zone != nullptr);
+
+ auto frontend = frontend_zone->factory()->make(frontend_zone->args());
+
+ auto bundle = frontend->load();
+
+ // dump
+ dump(bundle.module());
+
+ // todo : dump data
+
+ return 0;
+}
diff --git a/compiler/encodump/src/Dump.cpp b/compiler/encodump/src/Dump.cpp
new file mode 100644
index 000000000..7ec00e2e2
--- /dev/null
+++ b/compiler/encodump/src/Dump.cpp
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Dump.cpp
+ * @brief Print coco IR produced from enco frontend
+ *
+ * @note Some object inherits multiple parents.
+ * For example, coco:Conv2D inherits coco::Consumer and more. Assume that op is an instance
+ * of coco::Conv2D. In this case, the printing results of the following may be different:
+ * 1) cout << op; // printing address of type coco::Conv2D
+ * 2) cout << reinterpret_cast<const coco::Object::Consumer *>(op);
+ * 3) cout << object->consumer(); // assume that this object->consumer() returns op
+ * 4) cout << dynamic_cast<const coco::Object::Consumer *>(op);
+ * 1) and 2) prints same address. 3) and 4) prints same address but different from 1) and 2).
+ * For details, refer to
+ * https://stackoverflow.com/questions/22256620/why-pointers-to-the-same-object-have-different-values
+ * For dumping, we will use 3), 4)
+ */
+#include "Dump.h"
+
+#include <functional>
+#include <iostream>
+
+std::string tab(int n) { return std::string(n * 2, ' '); }
+
+struct OpPrinter final : public coco::Op::Visitor<void>
+{
+public:
+ OpPrinter(std::ostream &os, int indent) : _os(os), _indent(indent) {}
+
+public:
+ void visit(const coco::Load *op) override
+ {
+ _os << tab(_indent) << "Load(" << dynamic_cast<const coco::Op *>(op)
+ << ", obj: " << op->object() << ")" << std::endl;
+ }
+
+ void visit(const coco::PadF *op) override
+ {
+ op->arg()->accept(this);
+ _os << tab(_indent) << "PadF" << std::endl;
+ }
+
+ void visit(const coco::Conv2D *op) override
+ {
+ op->arg()->accept(this);
+ const coco::Padding2D *pad = op->pad();
+ const coco::Stride2D *stride = op->stride();
+
+ _os << tab(_indent) << "Conv2D(" << dynamic_cast<const coco::Op *>(op)
+ << ", ker obj: " << op->ker() << ", padding [T/B/L/R=" << pad->top() << "," << pad->bottom()
+ << "," << pad->left() << "," << pad->right() << "]"
+ << ", stride [V/H = " << stride->vertical() << "," << stride->horizontal() << "]"
+ << ")" << std::endl;
+ }
+
+ void visit(const coco::MaxPool2D *op) override
+ {
+ op->arg()->accept(this);
+ _os << tab(_indent) << "MaxPool2D" << std::endl;
+ }
+
+ void visit(const coco::AvgPool2D *op) override
+ {
+ op->arg()->accept(this);
+ _os << tab(_indent) << "AvgPool2D" << std::endl;
+ }
+
+ void visit(const coco::Add *op) override
+ {
+ op->left()->accept(this);
+ op->right()->accept(this);
+ _os << tab(_indent) << "Add" << std::endl;
+ }
+
+ void visit(const coco::Mul *op) override
+ {
+ op->left()->accept(this);
+ op->right()->accept(this);
+ _os << tab(_indent) << "Mul" << std::endl;
+ }
+
+ void visit(const coco::ReLU *op) override
+ {
+ op->arg()->accept(this);
+ _os << tab(_indent) << "ReLU" << std::endl;
+ }
+
+ void visit(const coco::ReLU6 *op) override
+ {
+ op->arg()->accept(this);
+ _os << tab(_indent) << "ReLU6" << std::endl;
+ }
+
+ void visit(const coco::Sub *op) override
+ {
+ op->left()->accept(this);
+ op->right()->accept(this);
+ _os << tab(_indent) << "Sub" << std::endl;
+ }
+
+ void visit(const coco::ConcatF *op) override
+ {
+ op->left()->accept(this);
+ op->right()->accept(this);
+ _os << tab(_indent) << "ConcatF" << std::endl;
+ }
+
+ void visit(const coco::Div *op) override
+ {
+ op->left()->accept(this);
+ op->right()->accept(this);
+ _os << tab(_indent) << "Div" << std::endl;
+ }
+
+private:
+ std::ostream &_os;
+
+private:
+ int _indent;
+};
+
+struct InstrPrinter final : public coco::Instr::Visitor<void>
+{
+public:
+ InstrPrinter() = delete;
+
+ InstrPrinter(int indent) : _indent(indent) {}
+
+ void visit(const coco::Eval *ins) override
+ {
+ std::cout << tab(_indent) << "Eval (" << dynamic_cast<const coco::Instr *>(ins) << ")"
+ << std::endl;
+ std::cout << tab(_indent + 1) << "out: " << ins->out() << std::endl;
+ std::cout << tab(_indent + 1) << "<op>: " << std::endl;
+ {
+ OpPrinter prn(std::cout, _indent + 2);
+ ins->op()->accept(prn);
+ }
+ }
+
+ void visit(const coco::Copy *ins) override
+ {
+ // copy is Producer and also Customer. We will use address for Producer
+ std::cout << tab(_indent) << "Copy (" << dynamic_cast<const coco::Instr *>(ins) << ")"
+ << std::endl;
+ std::cout << tab(_indent) << " from: " << ins->from() << std::endl;
+ std::cout << tab(_indent) << " into: " << ins->into() << std::endl;
+ }
+
+ void visit(const coco::Shuffle *ins) override
+ {
+ std::cout << tab(_indent) << "Shuffle (" << dynamic_cast<const coco::Instr *>(ins) << ")"
+ << std::endl;
+ std::cout << tab(_indent) << " from: " << ins->from() << std::endl;
+ std::cout << tab(_indent) << " into: " << ins->into() << std::endl;
+ }
+
+private:
+ int _indent;
+};
+
+void dump(const coco::Op *op, int indent)
+{
+ OpPrinter prn(std::cout, indent);
+ op->accept(prn);
+}
+
+void dump(const coco::Instr *ins, int indent)
+{
+ std::cout << tab(indent) << "<Inst>:" << std::endl;
+
+ static InstrPrinter prn(indent + 1);
+
+ ins->accept(prn);
+}
+
+void dump(const coco::Block *B, int indent)
+{
+ std::cout << tab(indent) << "<Block> (index: " << B->index().value() << ")" << std::endl;
+ for (auto I = B->instr()->head(); I != nullptr; I = I->next())
+ {
+ dump(I, indent + 1);
+ }
+}
+
+void dump(const coco::BlockList *L, int indent)
+{
+ for (auto B = L->head(); B != nullptr; B = B->next())
+ {
+ dump(B, indent);
+ }
+}
+
+template <typename SetT, typename EntityF>
+void dump(std::string header, SetT set, EntityF print_addr_f)
+{
+ std::cout << header << ": [";
+ if (set->size() == 0)
+ std::cout << "x";
+ else
+ {
+ int idx = 0;
+ for (auto entity : *set)
+ {
+ if (idx++ != 0)
+ std::cout << ", ";
+ print_addr_f(entity);
+ }
+ }
+ std::cout << "]";
+}
+
+void dump(const coco::BagManager *l, int indent)
+{
+ std::cout << tab(indent) << "<Bag>:" << std::endl;
+
+ for (auto n = 0; n < l->size(); ++n)
+ {
+ auto bag = l->at(n);
+
+ std::cout << tab(indent + 1) << bag << ", ";
+
+ // print objects in bag->deps()
+ auto print_dep_object = [](coco::Dep *dep) { std::cout << dep->object(); };
+ dump("obj", bag->deps(), print_dep_object);
+ std::cout << ", ";
+
+ std::cout << "size: " << bag->size() << ", ";
+
+ if (bag->isInput())
+ std::cout << "input, ";
+ if (bag->isOutput())
+ std::cout << "output, ";
+ if ((!bag->isInput()) || (!bag->isOutput()))
+ std::cout << "const, ";
+
+ // print readers in bag->reads()
+ auto print_read_reader = [](coco::Read *read) {
+ if (coco::Op *op = dynamic_cast<coco::Op *>(read->reader()))
+ std::cout << "op: " << op;
+ else if (coco::Instr *instr = dynamic_cast<coco::Instr *>(read->reader()))
+ std::cout << "instr: " << instr;
+ else
+ std::cout << "x";
+ };
+ dump("reader", bag->reads(), print_read_reader);
+ std::cout << ", ";
+
+ // print updaters in bag->updates()
+ auto print_update_updater = [](coco::Update *update) {
+ if (coco::Op *op = dynamic_cast<coco::Op *>(update->updater()))
+ std::cout << "op: " << op;
+ else if (coco::Instr *instr = dynamic_cast<coco::Instr *>(update->updater()))
+ std::cout << "instr: " << instr;
+ else
+ std::cout << "x";
+ };
+ dump("updater", bag->updates(), print_update_updater);
+ std::cout << ", ";
+
+ std::cout << std::endl;
+ }
+}
+
+void dump(coco::FeatureObject *feature_ob)
+{
+ auto shape = feature_ob->shape();
+ std::cout << "kind: Feature, Shape [H/W/D=" << shape.height() << "," << shape.width() << ","
+ << shape.depth() << "]";
+}
+
+void dump(coco::KernelObject *kernel_ob)
+{
+ auto shape = kernel_ob->shape();
+ std::cout << "kind: Kernel, Shape [N/H/W/D=" << shape.count() << "," << shape.height() << ","
+ << shape.width() << "," << shape.depth() << "]";
+}
+
+void dump(const coco::ObjectManager *l, int indent)
+{
+ std::cout << tab(indent) << "<Object>:" << std::endl;
+ for (auto n = 0; n < l->size(); ++n)
+ {
+ auto obj = l->at(n);
+ std::cout << tab(indent + 1) << obj << ", bag: " << obj->bag() << ", ";
+
+ using ObDumpers = std::function<void(coco::Object * ob)>;
+
+ std::map<coco::Object::Kind, ObDumpers> ob_dumpers;
+
+ ob_dumpers[coco::Object::Kind::Feature] = [](coco::Object *ob) { dump(ob->asFeature()); };
+ ob_dumpers[coco::Object::Kind::Kernel] = [](coco::Object *ob) { dump(ob->asKernel()); };
+ ob_dumpers[coco::Object::Kind::Unknown] = [](coco::Object *ob) {
+ std::cout << "kind: Unknown";
+ };
+
+ ob_dumpers[obj->kind()](obj);
+
+ std::cout << ", producer: ";
+ auto def = obj->def();
+ if (def)
+ {
+ if (coco::Op *op = dynamic_cast<coco::Op *>(def->producer()))
+ std::cout << "op: " << op;
+ else if (coco::Instr *instr = dynamic_cast<coco::Instr *>(def->producer()))
+ std::cout << "instr: " << instr;
+ else
+ std::cout << "x";
+ }
+ else
+ std::cout << "x";
+ std::cout << ", ";
+
+ // print consumers in obj->uses()
+ auto print_consumer = [](coco::Use *use) {
+ if (coco::Op *op = dynamic_cast<coco::Op *>(use->consumer()))
+ std::cout << "op: " << op;
+ else if (coco::Instr *instr = dynamic_cast<coco::Instr *>(use->consumer()))
+ std::cout << "inst: " << instr;
+ else
+ std::cout << "x";
+ };
+ dump("comsumer", obj->uses(), print_consumer);
+ std::cout << std::endl;
+ }
+}
+
+template <typename T> void head(int indent);
+
+template <> void head<coco::Input>(int indent) { std::cout << tab(indent) << "<Input>: "; }
+
+template <> void head<coco::Output>(int indent) { std::cout << tab(indent) << "<Output>: "; }
+
+template <typename PtrItemT> void dump(const coco::PtrList<PtrItemT> *list, int indent)
+{
+ head<PtrItemT>(indent);
+ for (int n = 0; n < list->size(); n++)
+ {
+ const PtrItemT *item = list->at(n);
+ if (n != 0)
+ std::cout << ", ";
+ std::cout << "bag " << item->bag() << ", name=" << item->name();
+ }
+ std::cout << std::endl;
+}
+
+void dump(const coco::Module *module)
+{
+ std::cout << "<Module>" << std::endl;
+
+ dump(module->block(), 1);
+ dump(module->input(), 1);
+ dump(module->output(), 1);
+ dump(module->entity()->bag(), 1);
+ dump(module->entity()->object(), 1);
+}
diff --git a/compiler/encodump/src/Dump.h b/compiler/encodump/src/Dump.h
new file mode 100644
index 000000000..6ea69b978
--- /dev/null
+++ b/compiler/encodump/src/Dump.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DUMP_H__
+#define __DUMP_H__
+
+#include <coco/IR.h>
+
+void dump(const coco::Module *module);
+
+#endif // __DUMP_H__
diff --git a/compiler/exo/CMakeLists.txt b/compiler/exo/CMakeLists.txt
new file mode 100644
index 000000000..79c75ef2e
--- /dev/null
+++ b/compiler/exo/CMakeLists.txt
@@ -0,0 +1,73 @@
+nnas_find_package(FlatBuffers QUIET)
+
+if(NOT FlatBuffers_FOUND)
+ message(STATUS "Build exo: FALSE (missing FlatBuffers)")
+ return()
+endif(NOT FlatBuffers_FOUND)
+
+nnas_find_package(TensorFlowSource EXACT 1.14 QUIET)
+
+if(NOT TensorFlowSource_FOUND)
+ message(STATUS "Build exo: FALSE (missing TensorFlowSource)")
+ return()
+endif(NOT TensorFlowSource_FOUND)
+
+message(STATUS "Build exo: TRUE")
+
+set(TFLITE_SCHEMA_DIR "${TensorFlowSource_DIR}/tensorflow/lite/schema")
+set(CIRCLE_SCHEMA_DIR "${NNAS_PROJECT_SOURCE_DIR}/nnpackage/schema")
+
+FlatBuffers_Target(exo_tflite_fbs
+ OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+ SCHEMA_DIR "${TFLITE_SCHEMA_DIR}"
+ SCHEMA_FILES schema.fbs
+)
+
+FlatBuffers_Target(exo_circle_fbs
+ OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+ SCHEMA_DIR "${CIRCLE_SCHEMA_DIR}"
+ SCHEMA_FILES circle_schema.fbs
+)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(exo SHARED ${SOURCES})
+target_include_directories(exo PUBLIC include)
+target_include_directories(exo PRIVATE src)
+target_link_libraries(exo PUBLIC exo_tflite_fbs)
+target_link_libraries(exo PUBLIC exo_circle_fbs)
+target_link_libraries(exo PUBLIC loco)
+target_link_libraries(exo PRIVATE stdex)
+target_link_libraries(exo PRIVATE pepper_str)
+target_link_libraries(exo PRIVATE pepper_strcast)
+target_link_libraries(exo PRIVATE locoex_customop)
+target_link_libraries(exo PRIVATE locop)
+target_link_libraries(exo PRIVATE hermes_std)
+target_link_libraries(exo PRIVATE logo)
+target_link_libraries(exo PRIVATE oops)
+install(TARGETS exo DESTINATION lib)
+
+# Let's apply nncc common compile options
+#
+# NOTE This will enable strict compilation (warnings as error).
+# Please refer to the top-level CMakeLists.txt for details
+target_link_libraries(exo PRIVATE nncc_common)
+
+if (NOT ENABLE_TEST)
+ return()
+endif (NOT ENABLE_TEST)
+
+# Google Test is mandatory for internal testing
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(exo_test ${TESTS})
+target_include_directories(exo_test PRIVATE src)
+target_link_libraries(exo_test stdex)
+target_link_libraries(exo_test pepper_str)
+target_link_libraries(exo_test exo)
+target_link_libraries(exo_test hermes_std)
+target_link_libraries(exo_test logo)
+target_link_libraries(exo_test oops)
+target_link_libraries(exo_test locoex_customop)
diff --git a/compiler/exo/README.md b/compiler/exo/README.md
new file mode 100644
index 000000000..bfa7fcfd3
--- /dev/null
+++ b/compiler/exo/README.md
@@ -0,0 +1,12 @@
+# exo
+
+_exo_ includes _loco_-to-_T/F Lite_ exporter (as a library).
+
+## How to add a new TFL node
+
+1. Add a new TFL node into `TFLNodes.lst` and `TFLNodes.h`
+1. Define a knob in `Knob.lst` if you need a knob.
+1. Add appropriate methods in `TFLShapeInferenceRule.cpp` and `TFLTypeInferenceRule.cpp`
+1. Add a new converter under `Conversion` directory
+1. Add an appropriate method in `OperationExporter.cpp`
+1. Register the converter into `Convert.cpp`
diff --git a/compiler/exo/include/exo/CircleExporter.h b/compiler/exo/include/exo/CircleExporter.h
new file mode 100644
index 000000000..7ec159303
--- /dev/null
+++ b/compiler/exo/include/exo/CircleExporter.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __EXO_CIRCLE_EXPORTER_H__
+#define __EXO_CIRCLE_EXPORTER_H__
+
+#include <loco.h>
+
+#include <memory>
+
+namespace exo
+{
+
+/**
+ * HOW TO USE:
+ *
+ * loco::Graph *g = ...;
+ *
+ * CircleExporter e(g);
+ * e.dumpToFile("model.circle");
+ *
+ * HOW TO USE (simplified):
+ *
+ * CircleExporter(g).dumpToFile("model.circle");
+ *
+ */
+class CircleExporter
+{
+public:
+ class Impl;
+
+public:
+ explicit CircleExporter(loco::Graph *graph);
+ ~CircleExporter();
+
+ /**
+ * @brief write to a file
+ * @param path path to file where to write data
+ * @throws any file related exceptions
+ */
+ void dumpToFile(const char *path) const;
+
+private:
+ std::unique_ptr<Impl> _impl;
+};
+
+} // namespace exo
+
+#endif // __EXO_CIRCLE_EXPORTER_H__
diff --git a/compiler/exo/include/exo/LoggingContext.h b/compiler/exo/include/exo/LoggingContext.h
new file mode 100644
index 000000000..5f10ceb93
--- /dev/null
+++ b/compiler/exo/include/exo/LoggingContext.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __EXO_LOGGING_CONTEXT_H__
+#define __EXO_LOGGING_CONTEXT_H__
+
+#include <hermes.h>
+
+namespace exo
+{
+
+/**
+ * @brief Global logging context
+ */
+struct LoggingContext
+{
+ static hermes::Context *get(void);
+};
+
+} // namespace exo
+
+#endif // __EXO_LOGGING_CONTEXT_H__
diff --git a/compiler/exo/include/exo/TFLExporter.h b/compiler/exo/include/exo/TFLExporter.h
new file mode 100644
index 000000000..49cce2af5
--- /dev/null
+++ b/compiler/exo/include/exo/TFLExporter.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __EXO_TFL_EXPORTER_H__
+#define __EXO_TFL_EXPORTER_H__
+
+#include <loco.h>
+
+#include <memory>
+
+namespace exo
+{
+
+/**
+ * HOW TO USE:
+ *
+ * loco::Graph *g = ...;
+ *
+ * TFLExporter e(g);
+ * e.dumpToFile("model.tflite");
+ *
+ * HOW TO USE (simplified):
+ *
+ * TFLExporter(g).dumpToFile("model.tflite");
+ *
+ */
+class TFLExporter
+{
+public:
+ class Impl;
+
+public:
+ explicit TFLExporter(loco::Graph *graph);
+ ~TFLExporter();
+
+ /**
+ * @brief write to a file
+ * @param path path to file where to write data
+ * @throws any file related exceptions
+ */
+ void dumpToFile(const char *path) const;
+
+private:
+ std::unique_ptr<Impl> _impl;
+};
+
+} // namespace exo
+
+#endif // __EXO_TFL_EXPORTER_H__
diff --git a/compiler/exo/requires.cmake b/compiler/exo/requires.cmake
new file mode 100644
index 000000000..6378b942d
--- /dev/null
+++ b/compiler/exo/requires.cmake
@@ -0,0 +1,6 @@
+require("stdex")
+require("loco")
+require("locoex-customop")
+require("logo")
+require("pepper-str")
+require("oops")
diff --git a/compiler/exo/src/Check.h b/compiler/exo/src/Check.h
new file mode 100644
index 000000000..79dac50dd
--- /dev/null
+++ b/compiler/exo/src/Check.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CHECK_H__
+#define __CHECK_H__
+
+#include <pepper/str.h>
+
+#include <stdexcept>
+#include <cassert>
+#include <iostream>
+
+// TODO Add macro for Release version
+
+#define EXO_ASSERT(condition, msg) \
+ { \
+ if (!(condition)) \
+ { \
+ std::cerr << "[assert failed] " << (msg) << ". " << std::endl; \
+ assert((condition)); \
+ } \
+ }
+
+#endif // __CHECK_H__
diff --git a/compiler/exo/src/Circle/CircleExporter.cpp b/compiler/exo/src/Circle/CircleExporter.cpp
new file mode 100644
index 000000000..797749090
--- /dev/null
+++ b/compiler/exo/src/Circle/CircleExporter.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exo/CircleExporter.h"
+
+#include "CircleExporterImpl.h"
+
+#include <stdex/Memory.h>
+
+#include <oops/InternalExn.h>
+
+#include <fstream>
+
+namespace exo
+{
+
+CircleExporter::CircleExporter(loco::Graph *graph) : _impl(stdex::make_unique<Impl>(graph))
+{
+ // NOTHING TO DO
+}
+
+CircleExporter::~CircleExporter() = default;
+
+void CircleExporter::dumpToFile(const char *path) const
+{
+ const char *ptr = _impl->getBufferPointer();
+ const size_t size = _impl->getBufferSize();
+
+ if (!ptr)
+ INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
+
+ std::ofstream file(path, std::ofstream::binary);
+ file.write(ptr, size);
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Circle/CircleExporterImpl.cpp b/compiler/exo/src/Circle/CircleExporterImpl.cpp
new file mode 100644
index 000000000..4cba33da1
--- /dev/null
+++ b/compiler/exo/src/Circle/CircleExporterImpl.cpp
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleExporterImpl.h"
+
+#include "Convert.h"
+#include "ExoOptimize.h"
+
+#include "CircleTensorExporter.h"
+#include "CircleOperationExporter.h"
+#include "CircleExporterUtils.h"
+
+#include "Log.h"
+#include "Knob.h"
+
+#include <oops/InternalExn.h>
+
+#include <cassert>
+#include <unordered_map>
+#include <string>
+#include <stdexcept>
+
+namespace
+{
+
+using namespace exo::circle_detail;
+
+void registerGraphInputTensors(loco::Graph *graph, SubGraphContext &ctx)
+{
+ for (uint32_t n = 0; n < graph->inputs()->size(); ++n)
+ {
+ auto node = loco::pull_node(graph, n);
+ assert(node != nullptr);
+ ctx._inputs.push_back(get_tensor_index(node));
+ }
+}
+
+void registerGraphOutputTensors(loco::Graph *graph, SubGraphContext &ctx)
+{
+ for (uint32_t n = 0; n < graph->outputs()->size(); ++n)
+ {
+ auto push = loco::push_node(graph, n);
+ assert(push != nullptr);
+ auto node = push->from();
+ assert(node != nullptr);
+ ctx._outputs.push_back(get_tensor_index(node));
+ }
+}
+
+} // namespace
+
+namespace
+{
+
+using namespace circle;
+using namespace flatbuffers;
+
+Offset<Vector<Offset<OperatorCode>>>
+encodeOperatorCodes(FlatBufferBuilder &builder, std::unordered_map<OpCode, uint32_t> &opcodes,
+ std::unordered_map<OpCode, std::string> &custom_opcodes)
+{
+ std::vector<Offset<OperatorCode>> operator_codes_vec(opcodes.size());
+ for (auto it : opcodes)
+ {
+ uint32_t idx = it.second;
+ if (it.first.opcode != BuiltinOperator_CUSTOM)
+ {
+ operator_codes_vec[idx] = CreateOperatorCode(builder, it.first.opcode);
+ }
+ else // custom op
+ {
+ auto opCode = it.first;
+ auto custom_code = custom_opcodes.find(opCode);
+ if (custom_code == custom_opcodes.end())
+ INTERNAL_EXN("Cannot find code for customop even though opcode is BuiltinOperator_CUSTOM");
+
+ operator_codes_vec[idx] =
+ CreateOperatorCode(builder, it.first.opcode, builder.CreateString(custom_code->second));
+ }
+ }
+ return builder.CreateVector(operator_codes_vec);
+}
+
+} // namespace
+
+namespace exo
+{
+
+using namespace exo::circle_detail;
+using namespace circle;
+using namespace flatbuffers;
+
+CircleExporter::Impl::Impl(loco::Graph *graph) { exportGraph(graph); }
+
+::flatbuffers::Offset<::circle::SubGraph>
+CircleExporter::Impl::exportSubgraph(SerializedModelData &gd)
+{
+ auto tensors = _builder.CreateVector(gd._tensors);
+ auto inputs = _builder.CreateVector(gd._inputs);
+ auto outputs = _builder.CreateVector(gd._outputs);
+ auto operators = _builder.CreateVector(gd._operators);
+ auto df = gd._data_format;
+ auto subgraph = CreateSubGraph(_builder, tensors, inputs, outputs, operators, df);
+ return subgraph;
+}
+
+void CircleExporter::Impl::exportGraph(loco::Graph *graph)
+{
+ LOGGER(l);
+
+ // IR-level conversion and optimization
+ {
+ convert_to_TFLNodes(graph);
+ set(Dialect::CIRCLE);
+ optimize(graph);
+ }
+
+ _builder.Clear();
+
+ SerializedModelData gd;
+
+ // This version is taken from comment in fbs
+ constexpr uint32_t version = 0;
+
+ registerGraphIOName(graph, gd);
+
+ // parse graph into SerializedModelData structure
+ exportOpDefinedTensors(graph, _builder, gd);
+
+ // NOTE Invoke these register functions only after each node is annotated with its tensor_index
+ registerGraphInputTensors(graph, gd);
+ registerGraphOutputTensors(graph, gd);
+
+ exportNodes(graph, _builder, gd);
+
+ // encode operator codes
+ auto operator_codes =
+ encodeOperatorCodes(_builder, gd._operator_codes, gd._custom_operator_codes);
+
+ // Subgraphs
+ Offset<SubGraph> subgraph = exportSubgraph(gd);
+ auto subgraphs = _builder.CreateVector(std::vector<Offset<SubGraph>>{subgraph});
+
+ // Description
+ std::string description_str = "nnpackage";
+ auto description = _builder.CreateString(description_str);
+
+ // create array of buffers
+ auto buffers = _builder.CreateVector(gd._buffers);
+
+ // empty metadata
+ std::vector<int> metadata_buffer_vec;
+ auto metadata_buffer = _builder.CreateVector(metadata_buffer_vec);
+
+ // Model
+ auto model_offset = CreateModel(_builder, version, operator_codes, subgraphs, description,
+ buffers, metadata_buffer);
+ FinishModelBuffer(_builder, model_offset);
+}
+
+const char *CircleExporter::Impl::getBufferPointer() const
+{
+ return reinterpret_cast<const char *>(_builder.GetBufferPointer());
+}
+
+size_t CircleExporter::Impl::getBufferSize() const { return _builder.GetSize(); }
+
+} // namespace exo
diff --git a/compiler/exo/src/Circle/CircleExporterImpl.h b/compiler/exo/src/Circle/CircleExporterImpl.h
new file mode 100644
index 000000000..b1138fbad
--- /dev/null
+++ b/compiler/exo/src/Circle/CircleExporterImpl.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EXPORTER_IMPL_H__
+#define __CIRCLE_EXPORTER_IMPL_H__
+
+#include "exo/CircleExporter.h"
+#include "circle_schema_generated.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+namespace circle_detail
+{
+
+struct SerializedModelData;
+
+} // namespace circle_detail
+
+using namespace circle_detail;
+
+/**
+ * internal implementation of interface exporter class
+ */
+class CircleExporter::Impl
+{
+public:
+ Impl() = delete;
+ ~Impl() = default;
+
+ explicit Impl(loco::Graph *graph);
+
+ /**
+ * @return pointer to buffer with serialized graph
+ */
+ const char *getBufferPointer() const;
+
+ /**
+ * @return size of buffer with serialized graph
+ */
+ size_t getBufferSize() const;
+
+private:
+ /**
+ * @brief create Subgraph using data stored in SerializedModelData
+ * @param gd information about serializer parts of model
+ * @return offset in buffer corresponding to serialized subgraph
+ */
+ flatbuffers::Offset<circle::SubGraph> exportSubgraph(SerializedModelData &gd);
+
+ /**
+ * @brief root function that writes graph into internal buffer
+ * @param graph
+ */
+ void exportGraph(loco::Graph *graph);
+
+private:
+ flatbuffers::FlatBufferBuilder _builder;
+};
+
+} // namespace exo
+
+#endif // __CIRCLE_EXPORTER_IMPL_H__
diff --git a/compiler/exo/src/Circle/CircleExporterUtils.cpp b/compiler/exo/src/Circle/CircleExporterUtils.cpp
new file mode 100644
index 000000000..12b204ce7
--- /dev/null
+++ b/compiler/exo/src/Circle/CircleExporterUtils.cpp
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleExporterUtils.h"
+
+#include <oops/InternalExn.h>
+
+namespace exo
+{
+
+circle::ActivationFunctionType to_circle_actfunc(locoex::FusedActFunc func)
+{
+ switch (func)
+ {
+ case locoex::FusedActFunc::NONE:
+ return circle::ActivationFunctionType_NONE;
+ case locoex::FusedActFunc::RELU:
+ return circle::ActivationFunctionType_RELU;
+ case locoex::FusedActFunc::RELU6:
+ return circle::ActivationFunctionType_RELU6;
+ default:
+ INTERNAL_EXN_V("trying to convert unsupported locoex::FusedActFunc", oops::to_uint32(func));
+ }
+}
+
+} // namespace exo
+
+namespace exo
+{
+namespace circle_detail
+{
+
+uint32_t SerializedModelData::registerBuiltinOpcode(circle::BuiltinOperator builtin_code)
+{
+ auto it = _operator_codes.find(OpCode{builtin_code});
+ if (it != _operator_codes.end())
+ {
+ return it->second;
+ }
+ auto idx = static_cast<uint32_t>(_operator_codes.size());
+ _operator_codes.emplace(OpCode{builtin_code}, idx);
+ return idx;
+}
+
+uint32_t SerializedModelData::registerCustomOpcode(const std::string &custom_op)
+{
+ circle::BuiltinOperator custom_code = circle::BuiltinOperator_CUSTOM;
+ auto idx = registerBuiltinOpcode(custom_code);
+ _custom_operator_codes.emplace(OpCode{custom_code}, custom_op);
+ return idx;
+}
+
+circle::Padding getOpPadding(const loco::Padding2D *pad, const loco::Stride<2> *stride,
+ const ShapeDescription &ifm, const ShapeDescription &ofm)
+{
+ // VALID padding
+ if (pad->top() == 0 && pad->bottom() == 0 && pad->left() == 0 && pad->right() == 0)
+ return circle::Padding_VALID;
+
+ // SAME padding
+ //
+ // For same padding, by definition, following equation should hold:
+ // O = floor((I - 1) / S) + 1
+ // where input size I, output size O, stride S
+ //
+ // NOTE input and output 'feature' map are shape of NHWC
+ bool same_padding_criterion_1 =
+ (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
+ (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
+
+ // For same padding, rear padding is same or bigger than front padding by at most 1
+ bool same_padding_criterion_2 =
+ (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
+ (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
+
+ if (same_padding_criterion_1 && same_padding_criterion_2)
+ return circle::Padding_SAME;
+
+ INTERNAL_EXN("Unsupported padding criteria");
+}
+
+circle::Padding getOpPadding(const locoex::Padding pad)
+{
+ if (pad == locoex::Padding::VALID)
+ return circle::Padding_VALID;
+ if (pad == locoex::Padding::SAME)
+ return circle::Padding_SAME;
+
+ INTERNAL_EXN_V("Unsupported locoex::Padding", oops::to_uint32(pad));
+}
+
+void registerGraphIOName(loco::Graph *graph, SerializedModelData &gd)
+{
+ for (uint32_t in = 0; in < graph->inputs()->size(); ++in)
+ {
+ auto pull = loco::pull_node(graph, in);
+ auto name = graph->inputs()->at(in)->name();
+
+ gd._pull_to_name[pull] = name;
+ }
+ for (uint32_t out = 0; out < graph->outputs()->size(); ++out)
+ {
+ auto push = loco::push_node(graph, out);
+ auto name = graph->outputs()->at(out)->name();
+
+ gd._push_to_name[push] = name;
+ }
+
+ // TODO set this value properly
+ gd._data_format = circle::DataFormat::DataFormat_CHANNELS_LAST;
+}
+
+#include <stdex/Memory.h>
+
+#include <cassert>
+
+namespace
+{
+
+class TFLTensorIndexAnnotation final : public loco::NodeAnnotation
+{
+public:
+ TFLTensorIndexAnnotation(const TFLTensorIndex &index) : _index{index}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const TFLTensorIndex &index(void) const { return _index; }
+
+private:
+ TFLTensorIndex _index;
+};
+
+} // namespace
+
+void set_tensor_index(loco::Node *node, const TFLTensorIndex &tensor_id)
+{
+ assert(node->annot<TFLTensorIndexAnnotation>() == nullptr);
+ node->annot(stdex::make_unique<TFLTensorIndexAnnotation>(tensor_id));
+}
+
+TFLTensorIndex get_tensor_index(loco::Node *node)
+{
+ assert(node->annot<TFLTensorIndexAnnotation>() != nullptr);
+ return node->annot<TFLTensorIndexAnnotation>()->index();
+}
+
+} // namespace circle_detail
+} // namespace exo
diff --git a/compiler/exo/src/Circle/CircleExporterUtils.h b/compiler/exo/src/Circle/CircleExporterUtils.h
new file mode 100644
index 000000000..fdd162bae
--- /dev/null
+++ b/compiler/exo/src/Circle/CircleExporterUtils.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EXPORTER_UTILS_H__
+#define __CIRCLE_EXPORTER_UTILS_H__
+
+#include "ExporterUtils.h"
+
+#include "circle_schema_generated.h"
+
+#include "Dialect/IR/TFLNodes.h"
+
+#include <loco.h>
+
+#include <unordered_map>
+
+namespace exo
+{
+namespace circle_detail
+{
+
+struct OpCode
+{
+ circle::BuiltinOperator opcode;
+
+ bool operator==(const OpCode &rhs) const { return opcode == rhs.opcode; }
+};
+
+} // namespace circle_detail
+} // namespace exo
+
+namespace exo
+{
+
+circle::ActivationFunctionType to_circle_actfunc(locoex::FusedActFunc func);
+
+} // namespace exo
+
+namespace std
+{
+
+template <> struct hash<exo::circle_detail::OpCode>
+{
+ size_t operator()(const exo::circle_detail::OpCode &x) const { return hash<int>()(x.opcode); }
+};
+
+} // namespace std
+
+namespace exo
+{
+namespace circle_detail
+{
+
+/**
+ * @breif Record the information of T/F Lite SubGraph and its mapping to loco
+ */
+struct SubGraphContext
+{
+ /// @brief SubGraph input tensor id
+ std::vector<int32_t> _inputs;
+ /// @brief SubGraph output tensor id
+ std::vector<int32_t> _outputs;
+ /// @DataFormat for SubGraph
+ circle::DataFormat _data_format{circle::DataFormat::DataFormat_CHANNELS_LAST};
+};
+
+// Prerequisites for circle::Model object creation
+struct SerializedModelData final : public SubGraphContext
+{
+ SerializedModelData() = default;
+ SerializedModelData(const SerializedModelData &) = delete;
+
+ std::unordered_map<OpCode, uint32_t> _operator_codes;
+ std::unordered_map<OpCode, std::string> _custom_operator_codes;
+ std::vector<flatbuffers::Offset<circle::Operator>> _operators;
+ std::vector<flatbuffers::Offset<circle::Tensor>> _tensors;
+ std::vector<flatbuffers::Offset<circle::Buffer>> _buffers;
+
+ // Graph input and output names
+ std::unordered_map<loco::Pull *, std::string> _pull_to_name;
+ std::unordered_map<loco::Push *, std::string> _push_to_name;
+
+ /**
+ * @brief if opcode is not registered in table of opcodes add it
+ * @param builtin_code
+ * @return idx of opcode in table of opcodes (see schema)
+ */
+ uint32_t registerBuiltinOpcode(circle::BuiltinOperator builtin_code);
+ uint32_t registerCustomOpcode(const std::string &custom_op);
+};
+
+circle::Padding getOpPadding(const loco::Padding2D *pad, const loco::Stride<2> *stride,
+ const ShapeDescription &ifm, const ShapeDescription &ofm);
+circle::Padding getOpPadding(const locoex::Padding pad);
+
+/// @brief Register graph input and output names to SerializedModelData
+void registerGraphIOName(loco::Graph *graph, SerializedModelData &gd);
+
+using TFLTensorIndex = int32_t;
+
+void set_tensor_index(loco::Node *node, const TFLTensorIndex &tensor_id);
+TFLTensorIndex get_tensor_index(loco::Node *node);
+
+} // namespace circle_detail
+} // namespace exo
+
+#endif // __TFL_EXPORTER_UTILS_H__
diff --git a/compiler/exo/src/Circle/CircleOperationExporter.cpp b/compiler/exo/src/Circle/CircleOperationExporter.cpp
new file mode 100644
index 000000000..390e2ec99
--- /dev/null
+++ b/compiler/exo/src/Circle/CircleOperationExporter.cpp
@@ -0,0 +1,1228 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleOperationExporter.h"
+#include "CircleExporterUtils.h"
+#include "ShapeInference.h"
+
+#include "Dialect/IR/TFLNode.h"
+#include "Dialect/IR/TFLNodes.h"
+#include "Dialect/IR/TFLNodeVisitor.h"
+
+#include "Dialect/IR/CircleNode.h"
+#include "Dialect/IR/CircleNodes.h"
+#include "Dialect/IR/CircleNodeVisitor.h"
+
+#include "Check.h"
+
+#include <loco/IR/CanonicalNode.h>
+#include <loco/IR/CanonicalNodeVisitor.h>
+#include <loco/Service/ShapeInference.h>
+#include <locoex/COpCall.h>
+
+#include <oops/InternalExn.h>
+
+#include <flatbuffers/flexbuffers.h>
+
+using namespace flatbuffers;
+using namespace circle;
+
+namespace
+{
+
+using namespace exo;
+using namespace exo::circle_detail;
+
+class OperationExporter final : public locoex::TFLNodeMutableVisitor<void>,
+ public locoex::CircleNodeMutableVisitor<void>,
+ public loco::CanonicalNodeMutableVisitor<void>
+{
+public:
+ OperationExporter(FlatBufferBuilder &fbb, SerializedModelData &ctx) : builder{fbb}, gd{ctx}
+ {
+ // DO NOTHING
+ }
+
+public:
+ // FOR TFLNodes
+ void visit(locoex::TFLAdd *) final;
+ void visit(locoex::TFLAveragePool2D *) final;
+ void visit(locoex::TFLConcatenation *) final;
+ void visit(locoex::TFLConst *) final{/* skip, everything is done in exportOpDefinedTensors */};
+ void visit(locoex::TFLConv2D *) final;
+ void visit(locoex::TFLDepthwiseConv2D *) final;
+ void visit(locoex::TFLDiv *) final;
+ void visit(locoex::TFLFullyConnected *) final;
+ void visit(locoex::TFLMaximum *) final;
+ void visit(locoex::TFLMaxPool2D *) final;
+ void visit(locoex::TFLMean *) final;
+ void visit(locoex::TFLMul *) final;
+ void visit(locoex::TFLRelu *) final;
+ void visit(locoex::TFLRelu6 *) final;
+ // TODO TFLReshape
+ void visit(locoex::TFLRsqrt *) final;
+ // TODO TFLSoftmax
+ void visit(locoex::TFLSqrt *) final;
+ void visit(locoex::TFLSquaredDifference *) final;
+ void visit(locoex::TFLSub *) final;
+ // TODO TFLTanh
+ void visit(locoex::TFLTranspose *) final;
+ void visit(locoex::TFLTransposeConv *) final;
+
+ // FOR CircleNodes
+ void visit(locoex::CircleInstanceNorm *) final;
+
+ // FOR canonical nodes. These will be removed later
+ void visit(loco::ReLU *) final;
+ void visit(loco::ReLU6 *) final;
+ void visit(loco::Tanh *) final;
+ void visit(loco::Push *) final { /* DO NOTHING */}
+ void visit(loco::Pull *) final { /* DO NOTHING */}
+ void visit(loco::FeatureEncode *) final;
+ void visit(loco::FeatureDecode *) final;
+ void visit(loco::FilterEncode *) final;
+ void visit(loco::DepthwiseFilterEncode *) final;
+ void visit(loco::ConstGen *) final { /* skip, everything is done in exportOpDefinedTensors */}
+ void visit(loco::MaxPool2D *) final;
+ void visit(loco::AvgPool2D *) final;
+ void visit(loco::Conv2D *) final;
+ void visit(loco::TransposedConv2D *) final;
+ void visit(loco::DepthwiseConv2D *) final;
+ void visit(loco::TensorConcat *) final;
+ void visit(loco::TensorReduce *) final;
+ void visit(loco::TensorSoftmax *) final;
+ void visit(loco::BiasEncode *) final;
+ void visit(loco::TensorBiasAdd *) final;
+ void visit(loco::FeatureBiasAdd *) final;
+ void visit(loco::EltwiseAdd *) final;
+ void visit(loco::EltwiseMax *) final;
+ void visit(loco::EltwiseMul *) final;
+ void visit(loco::EltwiseSub *) final;
+ void visit(loco::EltwiseDiv *) final;
+ void visit(loco::EltwiseSqrt *) final;
+ void visit(loco::FixedReshape *) final;
+ void visit(loco::TensorBroadcast *) final;
+ void visit(loco::TensorConstantPad *) final;
+
+ void visit(locoex::COpCall *);
+
+private:
+ /**
+ * @brief Exports TFLMaxPool2D or TFLAveragePool2D
+ *
+ * @note TFLPool2D should be one of TFLMaxPool2D or TFLAveragePool2D
+ */
+ template <class TFLPool2D>
+ void export_pool_2d(TFLPool2D *node, circle::BuiltinOperator builtin_op);
+
+private:
+ FlatBufferBuilder &builder;
+ SerializedModelData &gd;
+};
+
+void OperationExporter::visit(locoex::TFLAdd *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_ADD);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateAddOptions(builder, to_circle_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_AddOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLAveragePool2D *node)
+{
+ export_pool_2d<locoex::TFLAveragePool2D>(node, circle::BuiltinOperator_AVERAGE_POOL_2D);
+}
+
+void OperationExporter::visit(locoex::TFLConcatenation *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION);
+ std::vector<int32_t> inputs_vec;
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+
+ for (uint32_t i = 0; i < node->numValues(); ++i)
+ inputs_vec.push_back(get_tensor_index(node->values(i)));
+
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateConcatenationOptions(builder, node->axis(),
+ to_circle_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_ConcatenationOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLConv2D *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_CONV_2D);
+
+ // Make input, output and options for operator
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->filter()),
+ get_tensor_index(node->bias())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ circle::Padding padding = getOpPadding(node->padding());
+ auto options = CreateConv2DOptions(builder, padding, node->stride()->w(), node->stride()->h(),
+ to_circle_actfunc(node->fusedActivationFunction()));
+
+ // Make CONV_2D operator
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_Conv2DOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLDepthwiseConv2D *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_DEPTHWISE_CONV_2D);
+
+ // Make input, output and options for operator
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->filter()),
+ get_tensor_index(node->bias())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ circle::Padding padding = getOpPadding(node->padding());
+ auto options = CreateDepthwiseConv2DOptions(builder, padding, node->stride()->w(),
+ node->stride()->h(), node->depthMultiplier(),
+ to_circle_actfunc(node->fusedActivationFunction()));
+
+ // Make DEPTHWISE_CONV_2D operator
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_DepthwiseConv2DOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLDiv *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_DIV);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateDivOptions(builder, to_circle_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_DivOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLFullyConnected *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_FULLY_CONNECTED);
+
+ // Make input, output and options for operator
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
+ get_tensor_index(node->weights()),
+ get_tensor_index(node->bias())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options =
+ CreateFullyConnectedOptions(builder, to_circle_actfunc(node->fusedActivationFunction()));
+
+ // Make FULLY_CONNECTED operator
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_FullyConnectedOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLMaximum *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_MAXIMUM);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateMaximumMinimumOptions(builder);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_MaximumMinimumOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLMaxPool2D *node)
+{
+ export_pool_2d<locoex::TFLMaxPool2D>(node, circle::BuiltinOperator_MAX_POOL_2D);
+}
+
+void OperationExporter::visit(locoex::TFLMean *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_MEAN);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
+ get_tensor_index(node->reduction_indices())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateReducerOptions(builder, node->keep_dims());
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_ReducerOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLMul *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_MUL);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateMulOptions(builder, to_circle_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_MulOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLRelu *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_RELU);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->features())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLRelu6 *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_RELU6);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->features())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+// TODO TFLReshape
+
+void OperationExporter::visit(locoex::TFLRsqrt *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_RSQRT);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+// TODO TFLSoftmax
+
+void OperationExporter::visit(locoex::TFLSqrt *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_SQRT);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLSquaredDifference *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_SQUARED_DIFFERENCE);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateSquaredDifferenceOptions(builder);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_SquaredDifferenceOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLSub *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_SUB);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateSubOptions(builder, to_circle_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_SubOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+// TODO TFLTanh
+
+void OperationExporter::visit(locoex::TFLTranspose *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_TRANSPOSE);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->arg(0)), get_tensor_index(node->arg(1))};
+ std::vector<int32_t> outputs_vec{get_tensor_index(node)};
+
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateTransposeOptions(builder);
+
+ auto op_offset =
+ CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions::BuiltinOptions_TransposeOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLTransposeConv *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_TRANSPOSE_CONV);
+
+ // Make input, output and options for operator
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->inputSizes()),
+ get_tensor_index(node->filter()),
+ get_tensor_index(node->outBackprop())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ circle::Padding padding = getOpPadding(node->padding());
+ auto options =
+ CreateTransposeConvOptions(builder, padding, node->stride()->w(), node->stride()->h());
+
+ // Make TRANSPOSE_CONV operator
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_TransposeConvOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+template <class TFLPool2D>
+void OperationExporter::export_pool_2d(TFLPool2D *node, circle::BuiltinOperator builtin_op)
+{
+ EXO_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D ||
+ builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
+ "should be maxpool or avgpool");
+ EXO_ASSERT(node->padding() != locoex::Padding::UNDEFINED, "Padding is not set");
+
+ uint32_t op_idx = gd.registerBuiltinOpcode(builtin_op);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+
+ circle::Padding padding = getOpPadding(node->padding());
+
+ auto options = CreatePool2DOptions(builder, padding, node->stride()->w(), node->stride()->h(),
+ node->filter()->w(), node->filter()->h(),
+ to_circle_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_Pool2DOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::CircleInstanceNorm *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_INSTANCE_NORM);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->gamma()),
+ get_tensor_index(node->beta())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateInstanceNormOptions(builder, node->epsilon(),
+ to_circle_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_InstanceNormOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::ReLU *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_RELU);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::ReLU6 *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_RELU6);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::Tanh *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_TANH);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::MaxPool2D *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_MAX_POOL_2D);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->ifm())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ circle::Padding padding = getOpPadding(
+ node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+ auto options = CreatePool2DOptions(builder, padding, node->stride()->horizontal(),
+ node->stride()->vertical(), node->window()->horizontal(),
+ node->window()->vertical());
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_Pool2DOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::AvgPool2D *node)
+{
+ // Circle only support Valid convention of average pooling
+ assert(node->convention() == loco::AvgPool2D::Convention::Valid);
+
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_AVERAGE_POOL_2D);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->ifm())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ circle::Padding padding = getOpPadding(
+ node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+ auto options = CreatePool2DOptions(builder, padding, node->stride()->horizontal(),
+ node->stride()->vertical(), node->window()->horizontal(),
+ node->window()->vertical());
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_Pool2DOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::Conv2D *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_CONV_2D);
+
+ // Third input of CONV_2D of Circle should be bias. We will make (and register to gd) dummy zero
+ // bias. Bias would be rank 1, have size of output kernel count, and have all zero values, i.e.
+ // zero bias.
+ auto *ker = dynamic_cast<loco::FilterEncode *>(node->ker());
+ assert(ker);
+ int32_t bias_vec_size = ShapeInference::get(ker)._dims[0]; // output kernel count
+
+ auto bias_vec_shape_offset = builder.CreateVector(std::vector<int32_t>{bias_vec_size});
+ size_t raw_bias_vec_size = bias_vec_size * sizeof(int32_t);
+
+ std::vector<float> bias_vec_data(bias_vec_size); // initialized as zero vector
+
+ auto bias_vec_offset =
+ builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
+
+ auto bias_buffer_offset = CreateBuffer(builder, bias_vec_offset);
+
+ const auto bias_buffer_id = static_cast<uint32_t>(gd._buffers.size());
+
+ gd._buffers.push_back(bias_buffer_offset);
+
+ auto bias_tensor_id = static_cast<int32_t>(gd._tensors.size());
+ auto name_offset = builder.CreateString("t_" + std::to_string(bias_tensor_id));
+
+ auto bias_tensor_offset =
+ CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
+ gd._tensors.push_back(bias_tensor_offset);
+
+ // Make input, output and options for operator
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->ifm()), get_tensor_index(node->ker()),
+ bias_tensor_id};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ circle::Padding padding = getOpPadding(
+ node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+ auto options = CreateConv2DOptions(builder, padding, node->stride()->horizontal(),
+ node->stride()->vertical());
+
+ // Make CONV_2D operator
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_Conv2DOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::TransposedConv2D *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_TRANSPOSE_CONV);
+
+ // TRANSPOSE_CONV's first input is output shape array.
+ const int32_t outshape_vec_size = 4;
+ auto outshape_vec_shape_offset = builder.CreateVector(std::vector<int32_t>{outshape_vec_size});
+ size_t raw_outshape_vec_size = outshape_vec_size * sizeof(int32_t);
+
+ std::vector<int32_t> outshape_vec_data(outshape_vec_size);
+ {
+ // Copy inferred output shape of node
+ auto out_feature_shape = loco::shape_get(node).as<loco::FeatureShape>();
+
+ // Feature tensor in Circle is NHWC
+ outshape_vec_data.at(0) = out_feature_shape.count().value();
+ outshape_vec_data.at(1) = out_feature_shape.height().value();
+ outshape_vec_data.at(2) = out_feature_shape.width().value();
+ outshape_vec_data.at(3) = out_feature_shape.depth().value();
+ }
+
+ auto outshape_vec_offset = builder.CreateVector(
+ reinterpret_cast<uint8_t *>(outshape_vec_data.data()), raw_outshape_vec_size);
+
+ auto outshape_buffer_offset = CreateBuffer(builder, outshape_vec_offset);
+
+ const auto outshape_buffer_id = static_cast<uint32_t>(gd._buffers.size());
+
+ gd._buffers.push_back(outshape_buffer_offset);
+
+ auto outshape_tensor_id = static_cast<int32_t>(gd._tensors.size());
+ auto name_offset = builder.CreateString("t_" + std::to_string(outshape_tensor_id));
+
+ auto outshape_tensor_offset = CreateTensor(builder, outshape_vec_shape_offset, TensorType_INT32,
+ outshape_buffer_id, name_offset);
+ gd._tensors.push_back(outshape_tensor_offset);
+
+ // Make input, output and options for operator
+ std::vector<int32_t> inputs_vec{outshape_tensor_id, get_tensor_index(node->ker()),
+ get_tensor_index(node->ifm())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ // NOTE input and output is inversed to use this function
+ circle::Padding padding = getOpPadding(node->pad(), node->stride(), ShapeInference::get(node),
+ ShapeInference::get(node->ifm()));
+ auto options = CreateTransposeConvOptions(builder, padding, node->stride()->horizontal(),
+ node->stride()->vertical());
+
+ // Make TRANSPOSE_CONV operator
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_TransposeConvOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::DepthwiseConv2D *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_DEPTHWISE_CONV_2D);
+
+ // Third input of DEPTHWISE_CONV2D of Circle should be bias. We will make (and register to gd)
+ // dummy zero bias. Bias would be rank 1, have size of output kernel count, and have all zero
+ // values, i.e. zero bias.
+ auto *ker = dynamic_cast<loco::DepthwiseFilterEncode *>(node->ker());
+ assert(ker);
+
+ int32_t bias_vec_size = ShapeInference::get(ker)._dims[3]; // output_size(C*M)
+ auto bias_vec_shape_offset = builder.CreateVector(std::vector<int32_t>{bias_vec_size});
+
+ size_t raw_bias_vec_size = bias_vec_size * sizeof(int32_t);
+ std::vector<float> bias_vec_data(bias_vec_size);
+ auto bias_vec_offset =
+ builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
+
+ auto bias_buffer_offset = CreateBuffer(builder, bias_vec_offset);
+
+ const auto bias_buffer_id = static_cast<uint32_t>(gd._buffers.size());
+
+ gd._buffers.push_back(bias_buffer_offset);
+
+ auto bias_tensor_id = static_cast<int32_t>(gd._tensors.size());
+ auto name_offset = builder.CreateString("t_" + std::to_string(bias_tensor_id));
+
+ auto bias_tensor_offset =
+ CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
+ gd._tensors.push_back(bias_tensor_offset);
+
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->ifm()), get_tensor_index(node->ker()),
+ bias_tensor_id};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ circle::Padding padding = getOpPadding(
+ node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+
+ int32_t ifm_channel_size = ShapeInference::get(node->ifm())._dims[3];
+ // multiplier = bias_vec_size(output_size)/ifm_channel_size
+ auto options =
+ CreateDepthwiseConv2DOptions(builder, padding, node->stride()->horizontal(),
+ node->stride()->vertical(), bias_vec_size / ifm_channel_size);
+
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_DepthwiseConv2DOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::TensorReduce *node)
+{
+ uint32_t op_idx;
+
+ switch (node->func())
+ {
+ case loco::ReduceFunc::Mean:
+ op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_MEAN);
+ break;
+
+ // TODO Support more reduce type operation
+ default:
+ INTERNAL_EXN_V("Unsupported reduce type", oops::to_uint32(node->func()));
+ }
+
+ // Create a vector for axes data
+ std::vector<int32_t> axes_vec;
+ auto rank = ShapeInference::get(node->input())._dims.size();
+ for (uint32_t i = 0; i < rank; ++i)
+ if (node->axes()->defined(i))
+ axes_vec.push_back(i);
+
+ int32_t axes_vec_size = axes_vec.size();
+ auto axes_vec_shape_offset = builder.CreateVector(std::vector<int32_t>{axes_vec_size});
+
+ size_t raw_axes_vec_size = axes_vec_size * sizeof(int32_t);
+ auto axes_vec_offset =
+ builder.CreateVector(reinterpret_cast<uint8_t *>(axes_vec.data()), raw_axes_vec_size);
+
+ auto axes_buffer_offset = CreateBuffer(builder, axes_vec_offset);
+
+ const auto axes_buffer_id = static_cast<uint32_t>(gd._buffers.size());
+
+ gd._buffers.push_back(axes_buffer_offset);
+
+ auto axes_tensor_id = static_cast<int32_t>(gd._tensors.size());
+ auto name_offset = builder.CreateString("t_" + std::to_string(axes_tensor_id));
+
+ auto axes_tensor_offset =
+ CreateTensor(builder, axes_vec_shape_offset, TensorType_INT32, axes_buffer_id, name_offset);
+ gd._tensors.push_back(axes_tensor_offset);
+
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), axes_tensor_id};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateReducerOptions(builder, true); // true is for keep_dims option
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_ReducerOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::TensorSoftmax *node)
+{
+ // TODO Support when the input rank of TensorSoftmax is not 2
+ assert(ShapeInference::get(node->input())._dims.size() == 2);
+
+ // NOTE Circle only accepts axis when the value is last dimension
+ assert(node->axis() == ShapeInference::get(node->input())._dims.size() - 1);
+
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_SOFTMAX);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateSoftmaxOptions(builder, 1.0f);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_SoftmaxOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+/// @brief Export given node into identity, i.e. CONCATENATION with one input
+template <typename NodeT>
+void exportIdentity(NodeT *node, FlatBufferBuilder &builder, SerializedModelData &gd)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->arg(0))};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateConcatenationOptions(builder); // use dummy 0 axis and NONE activation
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_ConcatenationOptions, options.Union());
+
+ gd._operators.push_back(op_offset);
+}
+
+/// @brief Export loco nodes as TRANSPOSE
+void exportAsTranspose(loco::Node *node, FlatBufferBuilder &builder,
+ std::vector<int32_t> &perm_vec_data, SerializedModelData &gd)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_TRANSPOSE);
+
+ auto options = CreateTransposeOptions(builder);
+
+ // Create constant tensor with perm vector
+ constexpr int perm_vec_size = 4;
+ assert(perm_vec_data.size() == perm_vec_size);
+ auto perm_vec_shape_offset = builder.CreateVector(std::vector<int32_t>{perm_vec_size});
+ constexpr size_t raw_perm_vec_size = perm_vec_size * sizeof(int32_t);
+
+ auto perm_vec_offset =
+ builder.CreateVector(reinterpret_cast<uint8_t *>(perm_vec_data.data()), raw_perm_vec_size);
+
+ auto perm_buffer_offset = CreateBuffer(builder, perm_vec_offset);
+
+ const auto perm_buffer_id = static_cast<uint32_t>(gd._buffers.size());
+
+ gd._buffers.push_back(perm_buffer_offset);
+
+ auto perm_tensor_id = static_cast<int32_t>(gd._tensors.size());
+ auto name_offset = builder.CreateString("t_" + std::to_string(perm_tensor_id));
+
+ auto perm_tensor_offset =
+ CreateTensor(builder, perm_vec_shape_offset, TensorType_INT32, perm_buffer_id, name_offset);
+ gd._tensors.push_back(perm_tensor_offset);
+
+ // Create permutation node
+
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->arg(0)), perm_tensor_id};
+ std::vector<int32_t> outputs_vec{get_tensor_index(node)};
+
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+
+ constexpr auto options_type = circle::BuiltinOptions::BuiltinOptions_TransposeOptions;
+
+ auto transpose_offset =
+ CreateOperator(builder, op_idx, inputs, outputs, options_type, options.Union());
+ gd._operators.push_back(transpose_offset);
+}
+
+void OperationExporter::visit(loco::FeatureEncode *node)
+{
+ auto encoder = dynamic_cast<loco::PermutingEncoder<loco::Domain::Feature> *>(node->encoder());
+ auto perm = encoder->perm();
+
+ if (isNHWC(perm))
+ {
+ // Note that Circle represents feature as NHWC
+ exportIdentity(node, builder, gd);
+ }
+ else
+ {
+ std::vector<int32_t> perm_vec_data(4);
+ perm_vec_data[0] = perm->axis(loco::FeatureAxis::Count);
+ perm_vec_data[1] = perm->axis(loco::FeatureAxis::Height);
+ perm_vec_data[2] = perm->axis(loco::FeatureAxis::Width);
+ perm_vec_data[3] = perm->axis(loco::FeatureAxis::Depth);
+
+ exportAsTranspose(node, builder, perm_vec_data, gd);
+ }
+}
+
+void OperationExporter::visit(loco::FeatureDecode *node)
+{
+ auto decoder = dynamic_cast<loco::PermutingDecoder<loco::Domain::Feature> *>(node->decoder());
+ auto perm = decoder->perm();
+
+ if (isNHWC(perm))
+ {
+ // Note that Circle represents feature as NHWC
+ exportIdentity(node, builder, gd);
+ }
+ else
+ {
+ std::vector<int32_t> perm_vec_data(4);
+ perm_vec_data[perm->axis(loco::FeatureAxis::Count)] = 0;
+ perm_vec_data[perm->axis(loco::FeatureAxis::Height)] = 1;
+ perm_vec_data[perm->axis(loco::FeatureAxis::Width)] = 2;
+ perm_vec_data[perm->axis(loco::FeatureAxis::Depth)] = 3;
+
+ exportAsTranspose(node, builder, perm_vec_data, gd);
+ }
+}
+
+void OperationExporter::visit(loco::FilterEncode *node)
+{
+ auto encoder = dynamic_cast<loco::PermutingEncoder<loco::Domain::Filter> *>(node->encoder());
+ auto perm = encoder->perm();
+
+ if (isNHWC(perm))
+ {
+ // Note that Circle represents filter as NHWC
+ exportIdentity(node, builder, gd);
+ }
+ else
+ {
+ std::vector<int32_t> perm_vec_data(4);
+ // NOTE In Circle, all tensors means NHWC, so 0 = N, 1 = H, 2 = W, 3 = C
+ perm_vec_data[0] = perm->axis(loco::FilterAxis::Count);
+ perm_vec_data[1] = perm->axis(loco::FilterAxis::Height);
+ perm_vec_data[2] = perm->axis(loco::FilterAxis::Width);
+ perm_vec_data[3] = perm->axis(loco::FilterAxis::Depth);
+
+ exportAsTranspose(node, builder, perm_vec_data, gd);
+ }
+}
+
+void exportAsReshape(loco::Node *node, FlatBufferBuilder &builder,
+ std::vector<int32_t> &new_shape_vec, SerializedModelData &gd)
+{
+ // NOTE Circle currently follows TFLite for this.
+ // NOTE TFLite has two ways to get new shape paramter,
+ // one is by attribute 'new_shape' and the other is by input 'shape'.
+ // Therefore TFLite interpreter calculates Reshape operation correctly
+ // if one of them is valid.
+ // However, since NN runtime usually get new shape parameter by input 'shape',
+ // passing new shape only by attribute can cause some problems.
+ // Of course, the opposite situation can be occurred in the future.
+ // To prevent those problems, we pass new shape parameter not only by attribute
+ // but also by input.
+
+ auto input_shape_shape_vec_offset =
+ builder.CreateVector(std::vector<int32_t>{(int32_t)new_shape_vec.size()});
+
+ size_t input_shape_vec_size = new_shape_vec.size() * sizeof(int32_t);
+ auto input_shape_input_vec_offset =
+ builder.CreateVector(reinterpret_cast<uint8_t *>(new_shape_vec.data()), input_shape_vec_size);
+ auto input_shape_buffer_offset = CreateBuffer(builder, input_shape_input_vec_offset);
+
+ const auto input_shape_buffer_id = static_cast<uint32_t>(gd._buffers.size());
+ gd._buffers.push_back(input_shape_buffer_offset);
+
+ auto input_shape_tensor_id = static_cast<int32_t>(gd._tensors.size());
+ auto name_offset = builder.CreateString("t_" + std::to_string(input_shape_tensor_id));
+ auto input_shape_tensor_offset = CreateTensor(
+ builder, input_shape_shape_vec_offset, TensorType_INT32, input_shape_buffer_id, name_offset);
+ gd._tensors.push_back(input_shape_tensor_offset);
+
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_RESHAPE);
+
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->arg(0)), input_shape_tensor_id};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+
+ auto new_shape_vec_offset = builder.CreateVector(new_shape_vec);
+ auto options = CreateReshapeOptions(builder, new_shape_vec_offset);
+
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_ReshapeOptions, options.Union());
+
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::DepthwiseFilterEncode *node)
+{
+ auto ker = node->input(); // [H, W, C, M]
+
+ // Circle represents filter as [1, H, W, C*M] where M is multiplier.
+ std::vector<int32_t> new_shape_vec(4);
+ new_shape_vec[0] = 1;
+ new_shape_vec[1] = ShapeInference::get(ker)._dims[0];
+ new_shape_vec[2] = ShapeInference::get(ker)._dims[1];
+ new_shape_vec[3] = ShapeInference::get(ker)._dims[2] * ShapeInference::get(ker)._dims[3];
+
+ exportAsReshape(node, builder, new_shape_vec, gd);
+}
+
+void OperationExporter::visit(loco::BiasAdd<loco::Domain::Tensor> *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_ADD);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->value()), get_tensor_index(node->bias())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateAddOptions(builder); // dummy option
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_AddOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::FeatureBiasAdd *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_ADD);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->value()), get_tensor_index(node->bias())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateAddOptions(builder); // dummy option
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_AddOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+/// @brief Export CONCATENATION of **TWO** tensors only
+void OperationExporter::visit(loco::TensorConcat *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->lhs()), get_tensor_index(node->rhs())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateConcatenationOptions(builder, node->axis());
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_ConcatenationOptions, options.Union());
+
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::BiasEncode *encode) { exportIdentity(encode, builder, gd); }
+
+void OperationExporter::visit(loco::EltwiseAdd *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_ADD);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->lhs()), get_tensor_index(node->rhs())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateAddOptions(builder); // dummy option
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_AddOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::EltwiseMax *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_MAXIMUM);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->lhs()), get_tensor_index(node->rhs())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateMaximumMinimumOptions(builder); // dummy option
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_MaximumMinimumOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::EltwiseMul *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_MUL);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->lhs()), get_tensor_index(node->rhs())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateMulOptions(builder); // dummy option
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_MulOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::EltwiseSub *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_SUB);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->lhs()), get_tensor_index(node->rhs())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateSubOptions(builder); // dummy option
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_SubOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::EltwiseDiv *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_DIV);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->lhs()), get_tensor_index(node->rhs())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateDivOptions(builder); // dummy option
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_DivOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::EltwiseSqrt *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_SQRT);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::FixedReshape *node)
+{
+ std::vector<int32_t> new_shape_vec;
+ for (uint32_t axis = 0; axis < node->rank(); ++axis)
+ {
+ assert(node->dim(axis).known());
+ new_shape_vec.push_back(node->dim(axis).value());
+ }
+
+ exportAsReshape(node, builder, new_shape_vec, gd);
+}
+
+void OperationExporter::visit(loco::TensorBroadcast *)
+{
+ INTERNAL_EXN("loco graph has loco::TensorBroadcast, which should not exist in the graph");
+}
+
+void OperationExporter::visit(loco::TensorConstantPad *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_PAD);
+
+ // make padding attribute an input
+ auto padding = node->padding();
+ // get padding vector size
+ int32_t padding_vec_size = padding->rank();
+ // get byte size of vector
+ size_t padding_vec_byte_size = padding_vec_size * sizeof(int32_t) * 2; // [rank, 2]
+ // create vector for data
+ std::vector<int32_t> padding_vec_data(padding_vec_size * 2);
+ // set data
+ for (int32_t i = 0; i < padding_vec_size; i++)
+ {
+ padding_vec_data.at(i * 2) = padding->front(i);
+ padding_vec_data.at(i * 2 + 1) = padding->back(i);
+ }
+ // create FlatBuffer vector
+ auto padding_vec_ptr = builder.CreateVector(reinterpret_cast<uint8_t *>(padding_vec_data.data()),
+ padding_vec_byte_size);
+
+ // create buffer
+ auto padding_buffer_ptr = CreateBuffer(builder, padding_vec_ptr);
+ // get buffer id
+ const auto padding_buffer_id = static_cast<uint32_t>(gd._buffers.size());
+
+ gd._buffers.push_back(padding_buffer_ptr);
+
+ // create padding shape vector
+ auto padding_shape_vec_ptr = builder.CreateVector(std::vector<int32_t>{padding_vec_size, 2});
+ // create tensor
+ auto padding_tensor_ptr =
+ CreateTensor(builder, padding_shape_vec_ptr, TensorType_INT32, padding_buffer_id);
+ // get tensor id
+ const auto padding_tensor_id = static_cast<int32_t>(gd._tensors.size());
+
+ gd._tensors.push_back(padding_tensor_ptr);
+
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), padding_tensor_id};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+inline flatbuffers::Offset<flatbuffers::Vector<uint8_t>>
+CreateCOpCallOptions(flatbuffers::FlatBufferBuilder &fbb, locoex::COpCall *copCall)
+{
+ // read attrs in FlexBuffer format and pass them to FlatBuffer builder
+ flexbuffers::Builder flexbuf;
+ {
+ size_t map_start = flexbuf.StartMap();
+
+ // Note: among attrs of COpCall, 'op' and 'name' won't be included into tflite file
+ auto names = copCall->attr_names();
+ for (auto name : names)
+ {
+ if (auto int_val = copCall->attr<locoex::COpAttrType::Int>(name))
+ flexbuf.Int(name.c_str(), int_val->val());
+ else if (auto float_val = copCall->attr<locoex::COpAttrType::Float>(name))
+ flexbuf.Float(name.c_str(), float_val->val());
+ else
+ // TODO Support more attribute types
+ INTERNAL_EXN_V("Unsupported dtype while writing flexbuffer for customop attr", name);
+ }
+
+ flexbuf.EndMap(map_start);
+ flexbuf.Finish();
+ }
+
+ auto offset = fbb.CreateVector(flexbuf.GetBuffer());
+
+ return offset;
+}
+
+void OperationExporter::visit(locoex::COpCall *call)
+{
+ // Registering this custom op name into tflite Operator Codes table
+ uint32_t op_idx = gd.registerCustomOpcode(call->op());
+
+ std::vector<int32_t> inputs_vec;
+ {
+ inputs_vec.resize(call->arity());
+ for (uint32_t i = 0; i < call->arity(); i++)
+ inputs_vec[i] = get_tensor_index(call->arg(i));
+ }
+
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(call))};
+
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+
+ auto custom_options = CreateCOpCallOptions(builder, call);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_NONE, // builtin_options_type
+ 0, // built-in option
+ custom_options, // custom options
+ circle::CustomOptionsFormat_FLEXBUFFERS);
+
+ gd._operators.push_back(op_offset);
+}
+
+void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder,
+ SerializedModelData &data)
+{
+ // TODO Use explicit tagging to prevent possible mistake
+ auto isNoOp = [](loco::Node *node) {
+ if (node->arity() == 1)
+ {
+ assert(node->arg(0) != nullptr);
+ return get_tensor_index(node) == get_tensor_index(node->arg(0));
+ }
+ return false;
+ };
+
+ if (isNoOp(node))
+ {
+ // Skip if a given node is marked as NoOp (op with no effect) before
+ return;
+ }
+
+ if (auto canonical_node = dynamic_cast<loco::CanonicalNode *>(node))
+ { // TODO Consider removing this later
+ OperationExporter exporter{builder, data};
+ canonical_node->accept(&exporter);
+ }
+ else if (auto tfl_node = dynamic_cast<locoex::TFLNode *>(node))
+ {
+ OperationExporter exporter{builder, data};
+ tfl_node->accept(&exporter);
+ }
+ else if (auto circle_node = dynamic_cast<locoex::CircleNode *>(node))
+ {
+ OperationExporter exporter{builder, data};
+ circle_node->accept(&exporter);
+ }
+ else if (dynamic_cast<locoex::COpNode *>(node))
+ {
+ OperationExporter exporter{builder, data};
+ exporter.visit(dynamic_cast<locoex::COpCall *>(node));
+ }
+ else
+ {
+ INTERNAL_EXN("Node with unsupported dialect found");
+ }
+}
+
+} // namespace
+
+namespace exo
+{
+namespace circle_detail
+{
+
+void exportNodes(loco::Graph *g, FlatBufferBuilder &builder, SerializedModelData &gd)
+{
+ for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+ {
+ exportNode(node, builder, gd);
+ }
+}
+
+} // namespace circle_detail
+} // namespace exo
diff --git a/compiler/exo/src/Circle/CircleOperationExporter.h b/compiler/exo/src/Circle/CircleOperationExporter.h
new file mode 100644
index 000000000..19dadbfd1
--- /dev/null
+++ b/compiler/exo/src/Circle/CircleOperationExporter.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_OPERATION_EXPORTER_H__
+#define __CIRCLE_OPERATION_EXPORTER_H__
+
+#include "CircleExporterUtils.h"
+
+#include <loco/IR/Graph.h>
+
+namespace exo
+{
+namespace circle_detail
+{
+
+/**
+ * @brief create Operators corresponding to model nodes
+ * @param nodes container with nodes
+ * @param gd information about serializer parts of model
+ */
+void exportNodes(loco::Graph *g, flatbuffers::FlatBufferBuilder &builder, SerializedModelData &gd);
+
+} // namespace circle_detail
+} // namespace exo
+
+#endif // __CIRCLE_OPERATION_EXPORTER_H__
diff --git a/compiler/exo/src/Circle/CircleTensorExporter.cpp b/compiler/exo/src/Circle/CircleTensorExporter.cpp
new file mode 100644
index 000000000..efceae55d
--- /dev/null
+++ b/compiler/exo/src/Circle/CircleTensorExporter.cpp
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleTensorExporter.h"
+#include "CircleTypeInference.h"
+#include "ShapeInference.h"
+
+// TODO Fix include style
+#include "loco/IR/Algorithm.h"
+#include "loco/IR/CanonicalNode.h"
+#include "loco/IR/CanonicalNodeVisitor.h"
+#include "loco/IR/DataTypeTraits.h"
+
+#include "Dialect/IR/TFLNodes.h"
+
+#include <oops/InternalExn.h>
+
+using namespace circle;
+using namespace flatbuffers;
+
+namespace
+{
+
+using namespace exo;
+using namespace exo::circle_detail;
+
+class TFLTensorInfo
+{
+public:
+ TFLTensorInfo() = default;
+
+public:
+ void name(const std::string &name) { _name = name; }
+ const std::string &name(void) const { return _name; }
+
+public:
+ const circle::TensorType &dtype(void) const { return _dtype; }
+ void dtype(const circle::TensorType &dtype) { _dtype = dtype; }
+
+ const ShapeDescription &shape(void) const { return _shape; }
+ void shape(const ShapeDescription &shape) { _shape = shape; }
+
+public:
+ locoex::TFLConst *tfl_content(void) const { return _tfl_content; }
+ void tfl_content(locoex::TFLConst *c) { _tfl_content = c; }
+
+private:
+ std::string _name;
+
+ circle::TensorType _dtype;
+ ShapeDescription _shape;
+
+ // TODO Find a better design
+ loco::ConstGen *_content = nullptr; // TODO deprecate
+ locoex::TFLConst *_tfl_content = nullptr;
+};
+
+using TFLTensorContext = std::vector<TFLTensorInfo>;
+
+struct NoOpDetector final : public loco::CanonicalNodeMutableVisitor<bool>
+{
+ bool visit(loco::BiasEncode *) final
+ {
+ // BiasEncode is always noop
+ return true;
+ }
+
+ bool visit(loco::FilterEncode *node) final
+ {
+ auto encoder = dynamic_cast<loco::PermutingEncoder<loco::Domain::Filter> *>(node->encoder());
+ if (encoder != nullptr)
+ {
+ auto perm = encoder->perm();
+ return isNHWC(perm);
+ }
+ return false;
+ }
+
+ bool visit(loco::FeatureEncode *node) final
+ {
+ auto encoder = dynamic_cast<loco::PermutingEncoder<loco::Domain::Feature> *>(node->encoder());
+ if (encoder != nullptr)
+ {
+ auto perm = encoder->perm();
+ return isNHWC(perm);
+ }
+ return false;
+ }
+
+ bool visit(loco::FeatureDecode *node) final
+ {
+ auto decoder = dynamic_cast<loco::PermutingDecoder<loco::Domain::Feature> *>(node->decoder());
+ if (decoder != nullptr)
+ {
+ auto perm = decoder->perm();
+ return isNHWC(perm);
+ }
+ return false;
+ }
+
+ // Return false by default
+ bool visit(loco::Node *) final { return false; }
+};
+
+bool isNoOp(loco::Node *node)
+{
+ if (auto canonical_node = dynamic_cast<loco::CanonicalNode *>(node))
+ {
+ NoOpDetector d;
+ return canonical_node->accept(&d);
+ }
+ return false;
+}
+
+void allocateCircleTensor(loco::Node *node, TFLTensorContext &ctx)
+{
+ if (isNoOp(node))
+ {
+ assert(node->arity() == 1 && node->arg(0) != nullptr);
+ set_tensor_index(node, get_tensor_index(node->arg(0)));
+ return;
+ }
+
+ auto tensor_index = static_cast<TFLTensorIndex>(ctx.size());
+ // TODO Use Graph-level metadata for Input & Output
+ auto tensor_name = "t_" + std::to_string(tensor_index);
+
+ TFLTensorInfo tensor_info;
+
+ tensor_info.name(tensor_name);
+ tensor_info.dtype(TypeInference::get(node));
+ tensor_info.shape(ShapeInference::get(node));
+
+ tensor_info.tfl_content(dynamic_cast<locoex::TFLConst *>(node));
+
+ set_tensor_index(node, tensor_index);
+
+ ctx.emplace_back(tensor_info);
+}
+
+} // namespace
+
+namespace
+{
+
+flatbuffers::Offset<Vector<int32_t>> encodeShape(FlatBufferBuilder &builder,
+ const ShapeDescription &shape)
+{
+ assert(shape._rank_known && "unknown number of dimensions is not supported");
+ return builder.CreateVector(shape._dims);
+}
+
+flatbuffers::Offset<circle::Buffer> encodeOpBuffer(FlatBufferBuilder &builder)
+{
+ return CreateBuffer(builder);
+}
+
+template <typename NodeT>
+flatbuffers::Offset<circle::Buffer> encodeOpBuffer(FlatBufferBuilder &builder, NodeT *)
+{
+ return CreateBuffer(builder);
+}
+
+template <loco::DataType DT>
+flatbuffers::Offset<circle::Buffer> encodeOpBufferByDType(FlatBufferBuilder &builder,
+ locoex::TFLConst *c)
+{
+ using NativeType = typename loco::DataTypeImpl<DT>::Type;
+
+ std::vector<NativeType> raw_data;
+ const uint32_t size = c->size<DT>();
+ raw_data.reserve(size);
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ raw_data.push_back(c->at<DT>(i));
+ }
+ const size_t raw_size = size * sizeof(NativeType);
+ auto array_offset = builder.CreateVector(reinterpret_cast<uint8_t *>(raw_data.data()), raw_size);
+ return CreateBuffer(builder, array_offset);
+}
+
+template <>
+flatbuffers::Offset<circle::Buffer> encodeOpBuffer(FlatBufferBuilder &builder, locoex::TFLConst *c)
+{
+ if (c->dtype() == loco::DataType::FLOAT32)
+ {
+ return encodeOpBufferByDType<loco::DataType::FLOAT32>(builder, c);
+ }
+ else if (c->dtype() == loco::DataType::S32)
+ {
+ return encodeOpBufferByDType<loco::DataType::S32>(builder, c);
+ }
+
+ INTERNAL_EXN_V("Unsupported datatype", oops::to_uint32(c->dtype()));
+}
+
+} // namespace
+
+namespace exo
+{
+namespace circle_detail
+{
+
+void exportOpDefinedTensor(const TFLTensorInfo &info, FlatBufferBuilder &builder,
+ SerializedModelData &gd)
+{
+ // Create and register output tensor shape
+ auto shape_offset = encodeShape(builder, info.shape());
+
+ // encode and register output tensor buffer
+ auto buffer = info.tfl_content() == nullptr ? encodeOpBuffer(builder)
+ : encodeOpBuffer(builder, info.tfl_content());
+
+ auto buffer_id = static_cast<uint32_t>(gd._buffers.size());
+ gd._buffers.push_back(buffer);
+
+ auto name_offset = builder.CreateString(info.name());
+ auto tensor_offset = CreateTensor(builder, shape_offset, info.dtype(), buffer_id, name_offset,
+ /*quantization*/ 0, /*is_variable*/ false);
+ gd._tensors.push_back(tensor_offset);
+}
+
+void exportOpDefinedTensors(loco::Graph *g, FlatBufferBuilder &builder, SerializedModelData &gd)
+{
+ TFLTensorContext tensor_ctx;
+
+ for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+ {
+ allocateCircleTensor(node, tensor_ctx);
+ }
+
+ // add one empty buffer
+ // note: this follows TFLite
+ // note: there's a comment in tflite fbs file
+ // - Note the 0th entry of this array must be an empty buffer (sentinel).
+ // - This is a convention so that tensors without a buffer can provide 0 as
+ // - their buffer.
+ auto buffer = encodeOpBuffer(builder);
+ gd._buffers.push_back(buffer);
+
+ for (const auto &tensor_info : tensor_ctx)
+ {
+ exportOpDefinedTensor(tensor_info, builder, gd);
+ }
+}
+
+} // namespace circle_detail
+} // namespace exo
diff --git a/compiler/exo/src/Circle/CircleTensorExporter.h b/compiler/exo/src/Circle/CircleTensorExporter.h
new file mode 100644
index 000000000..39d8e1b86
--- /dev/null
+++ b/compiler/exo/src/Circle/CircleTensorExporter.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_TENSOR_EXPORTER_H__
+#define __CIRCLE_TENSOR_EXPORTER_H__
+
+#include "CircleExporterUtils.h"
+
+#include <loco/IR/Graph.h>
+
+#include <flatbuffers/flatbuffers.h>
+
+namespace exo
+{
+namespace circle_detail
+{
+
+/**
+ * @brief create Tensors corresponding to results of all nodes in graph
+ * @param computational graph
+ * @param gd information about serialized parts of model
+ */
+void exportOpDefinedTensors(loco::Graph *g, flatbuffers::FlatBufferBuilder &builder,
+ SerializedModelData &gd);
+
+} // namespace circle_detail
+} // namespace exo
+
+#endif // __CIRCLE_TENSOR_EXPORTER_H__
diff --git a/compiler/exo/src/Circle/CircleTypeInference.cpp b/compiler/exo/src/Circle/CircleTypeInference.cpp
new file mode 100644
index 000000000..a1e92b884
--- /dev/null
+++ b/compiler/exo/src/Circle/CircleTypeInference.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleTypeInference.h"
+
+#include "circle_schema_generated.h"
+
+#include "Dialect/Service/TFLTypeInferenceRule.h"
+#include "Dialect/IR/TFLDialect.h"
+
+#include <loco/IR/CanonicalNode.h>
+#include <loco/IR/CanonicalNodeVisitor.h>
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/Service/TypeInference.h>
+
+#include <locoex/COpDialect.h>
+#include <locoex/Service/COpTypeInference.h>
+
+#include <oops/InternalExn.h>
+
+#include <stdex/Memory.h>
+
+#include <stdexcept>
+#include <type_traits>
+
+namespace
+{
+
+circle::TensorType translateLocoTypeToCircle(loco::DataType dtype)
+{
+ switch (dtype)
+ {
+ case loco::DataType::U8:
+ return circle::TensorType_UINT8;
+ // case loco::DataType::U16: unsupported
+ // case loco::DataType::U32: unsupported
+ // case loco::DataType::U64: unsupported
+ case loco::DataType::S8:
+ return circle::TensorType_INT8;
+ case loco::DataType::S16:
+ return circle::TensorType_INT16;
+ case loco::DataType::S32:
+ return circle::TensorType_INT32;
+ case loco::DataType::S64:
+ return circle::TensorType_INT64;
+ case loco::DataType::FLOAT16:
+ return circle::TensorType_FLOAT16;
+ case loco::DataType::FLOAT32:
+ return circle::TensorType_FLOAT32;
+ // case loco::DataType::FLOAT64: unsupported
+ default:
+ break;
+ }
+
+ INTERNAL_EXN_V("Invalid loco dtype", oops::to_uint32(dtype));
+}
+
+} // namespace
+
+namespace exo
+{
+namespace circle_detail
+{
+
+circle::TensorType TypeInference::get(loco::Node *node)
+{
+ assert(loco::dtype_known(node));
+ return translateLocoTypeToCircle(loco::dtype_get(node));
+}
+
+} // namespace circle_detail
+} // namespace exo
diff --git a/compiler/exo/src/Circle/CircleTypeInference.h b/compiler/exo/src/Circle/CircleTypeInference.h
new file mode 100644
index 000000000..9c1730233
--- /dev/null
+++ b/compiler/exo/src/Circle/CircleTypeInference.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_TYPE_INFERENCE_H__
+#define __CIRCLE_TYPE_INFERENCE_H__
+
+#include "CircleExporterUtils.h"
+
+#include <loco/IR/Nodes.h>
+
+namespace exo
+{
+namespace circle_detail
+{
+
+/**
+ * @brief Get the type of each node as NodeAnnotation
+ *
+ * HOW TO USE
+ *
+ * TypeInference::get(g->nodes()->at(0));
+ * TypeInference::get(g->nodes()->at(...));
+ */
+struct TypeInference
+{
+ static circle::TensorType get(loco::Node *node);
+};
+
+} // namespace circle_detail
+} // namespace exo
+
+#endif // __CIRCLE_TYPE_INFERENCE_H__
diff --git a/compiler/exo/src/Conversion/AvgPool2DConverter.cpp b/compiler/exo/src/Conversion/AvgPool2DConverter.cpp
new file mode 100644
index 000000000..a95518ac6
--- /dev/null
+++ b/compiler/exo/src/Conversion/AvgPool2DConverter.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AvgPool2DConverter.h"
+
+#include "Dialect/IR/TFLNodes.h"
+
+#include "GraphBlock.h"
+#include "Check.h"
+
+#include <loco.h>
+
+namespace exo
+{
+/**
+ * @brief Converts loco::AvgPool2D to locoex::TFLAveragePool2D
+ *
+ * How it works: (note: ten->fea means input: tensor, output: feature)
+ *
+ * Before:
+ * Foo ---- FeatureEncode ---- AvgPool2D ---- FeatureDecode ---- Bar
+ * ten->ten ten->fea fea->fea fea->ten ten->ten
+ *
+ * After: AvgPool2D
+ * /
+ * Foo -- FeatureEncode - FeatureDecode - TFLAvgPool2D - FeatureEncode - FeatureDecode -- Bar
+ * ten->ten ten->fea fea->ten ten->ten ten->fea fea->ten ten->ten
+ *
+ * @note This method replaces AvgPool2D with "FeatureDecode -- TFLAvgPool2D -- FeatureEncode".
+ * Redundant nodes will be removed during transforms.
+ */
+bool AvgPool2DConverter::convert(loco::AvgPool2D *origin)
+{
+ auto *graph = origin->graph();
+
+ auto dec = make_feature_decode<FeatureLayout::NHWC>(origin->ifm());
+ auto tfl_average = graph->nodes()->create<locoex::TFLAveragePool2D>();
+ {
+ tfl_average->value(dec);
+
+ // set attributes
+ tfl_average->stride()->w(origin->stride()->horizontal());
+ tfl_average->stride()->h(origin->stride()->vertical());
+
+ tfl_average->filter()->w(origin->window()->horizontal());
+ tfl_average->filter()->h(origin->window()->vertical());
+
+ auto pad = origin->pad();
+ if (pad->bottom() == 0 && pad->top() == 0 && pad->left() == 0 && pad->right() == 0)
+ tfl_average->padding(locoex::Padding::VALID);
+ else
+ // TODO This is necessary, but not sufficient condition. More rigorous check required
+ tfl_average->padding(locoex::Padding::SAME);
+
+ tfl_average->fusedActivationFunction(locoex::FusedActFunc::NONE);
+ }
+ auto enc = make_feature_encode<FeatureLayout::NHWC>(tfl_average);
+
+ // replace canonical node
+ loco::replace(origin).with(enc);
+ origin->ifm(nullptr);
+
+ return true;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/AvgPool2DConverter.h b/compiler/exo/src/Conversion/AvgPool2DConverter.h
new file mode 100644
index 000000000..f66d02eb6
--- /dev/null
+++ b/compiler/exo/src/Conversion/AvgPool2DConverter.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSION_AVGPOOL2D_CONVERTER__
+#define __CONVERSION_AVGPOOL2D_CONVERTER__
+
+#include "CanonicalNodeConverter.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+/**
+ * @brief Convert loco::AvgPool2D to locoex::TFLAveragePool2D
+ */
+class AvgPool2DConverter : public CanonicalNodeConverter<loco::AvgPool2D>
+{
+public:
+ const char *name(void) const final { return "exo::AvgPool2DConverter"; }
+
+public:
+ bool convert(loco::AvgPool2D *origin) final;
+};
+
+} // namespace exo
+
+#endif // __CONVERSION_AVGPOOL2D_CONVERTER__
diff --git a/compiler/exo/src/Conversion/CanonicalNodeConverter.cpp b/compiler/exo/src/Conversion/CanonicalNodeConverter.cpp
new file mode 100644
index 000000000..4daf905f8
--- /dev/null
+++ b/compiler/exo/src/Conversion/CanonicalNodeConverter.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CanonicalNodeConverter.h"
+
+// This file is to make sure compilation of "CanonicalNodeConverter.h"
diff --git a/compiler/exo/src/Conversion/CanonicalNodeConverter.h b/compiler/exo/src/Conversion/CanonicalNodeConverter.h
new file mode 100644
index 000000000..76f73d888
--- /dev/null
+++ b/compiler/exo/src/Conversion/CanonicalNodeConverter.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSION_CANONICAL_NODE_CONVERTER_H__
+#define __CONVERSION_CANONICAL_NODE_CONVERTER_H__
+
+#include "Convert.h"
+
+#include <loco.h>
+#include <loco/IR/CanonicalDialect.h>
+#include <logo/Pass.h>
+
+namespace exo
+{
+
+/**
+ * @brief Class to convert a canonical node to TFL node
+ *
+ * TODO Find a better name
+ */
+template <typename CanonicalType> class CanonicalNodeConverter : public logo::Pass
+{
+public:
+ virtual const char *name(void) const { return nullptr; }
+
+public:
+ bool run(loco::Graph *graph);
+
+protected:
+ virtual bool convert(CanonicalType *node) = 0;
+};
+
+template <typename CanonicalType>
+bool CanonicalNodeConverter<CanonicalType>::run(loco::Graph *graph)
+{
+ auto active_nodes = loco::active_nodes(loco::output_nodes(graph));
+ bool changed = false;
+
+ for (auto node : active_nodes)
+ {
+ // TODO Generalize this to all loco dialects
+ if (node->dialect() == loco::CanonicalDialect::get())
+ {
+ auto the_node = dynamic_cast<CanonicalType *>(node);
+ if (the_node != nullptr)
+ {
+ if (convert(the_node))
+ changed = true;
+ }
+ }
+ }
+
+ return changed;
+}
+
+} // namespace exo
+
+#endif //__CONVERSION_CANONICAL_NODE_CONVERTER_H__
diff --git a/compiler/exo/src/Conversion/ConstGenConverter.cpp b/compiler/exo/src/Conversion/ConstGenConverter.cpp
new file mode 100644
index 000000000..b2e2b4bdb
--- /dev/null
+++ b/compiler/exo/src/Conversion/ConstGenConverter.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConstGenConverter.h"
+
+#include "Dialect/IR/TFLNodes.h"
+#include "Check.h"
+
+#include <loco.h>
+
+#include <oops/InternalExn.h>
+
+namespace exo
+{
+
+bool ConstGenConverter::convert(loco::ConstGen *constgen)
+{
+ auto *graph = constgen->graph();
+
+ auto tfl_const = graph->nodes()->create<locoex::TFLConst>();
+ {
+ if (constgen->dtype() == loco::DataType::FLOAT32)
+ {
+ tfl_const->dtype(loco::DataType::FLOAT32);
+
+ tfl_const->rank(constgen->rank());
+ for (uint32_t axis = 0; axis < constgen->rank(); axis++)
+ tfl_const->dim(axis) = constgen->dim(axis);
+
+ auto size = constgen->size<loco::DataType::FLOAT32>();
+ tfl_const->size<loco::DataType::FLOAT32>(size);
+
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ tfl_const->at<loco::DataType::FLOAT32>(i) = constgen->at<loco::DataType::FLOAT32>(i);
+ }
+ }
+ else
+ INTERNAL_EXN_V("Unsupported DataType", oops::to_uint32(constgen->dtype()));
+ }
+
+ loco::replace(constgen).with(tfl_const);
+
+ return true;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/ConstGenConverter.h b/compiler/exo/src/Conversion/ConstGenConverter.h
new file mode 100644
index 000000000..613ccd0e6
--- /dev/null
+++ b/compiler/exo/src/Conversion/ConstGenConverter.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSION_CONSTGEN_CONVERTER_H__
+#define __CONVERSION_CONSTGEN_CONVERTER_H__
+
+#include "CanonicalNodeConverter.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+class ConstGenConverter : public CanonicalNodeConverter<loco::ConstGen>
+{
+public:
+ const char *name(void) const final { return "exo::ConstGenConverter"; }
+
+public:
+ bool convert(loco::ConstGen *constgen) final;
+};
+
+} // namespace exo
+
+#endif // __CONVERSION_CONSTGEN_CONVERTER_H__
diff --git a/compiler/exo/src/Conversion/ConstGenConverter.test.cpp b/compiler/exo/src/Conversion/ConstGenConverter.test.cpp
new file mode 100644
index 000000000..f7a577242
--- /dev/null
+++ b/compiler/exo/src/Conversion/ConstGenConverter.test.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConstGenConverter.h"
+#include "ReluConverter.h"
+
+#include "Dialect/IR/TFLNodes.h"
+#include "TestGraph.h"
+#include "TestHelper.h"
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+TEST(TFLConstGenConverterTest, ConstGen_Relu)
+{
+ exo::test::ExampleGraph<exo::test::ExampleGraphType::ConstGen_ReLU> g;
+
+ // set constgen
+ {
+ g.constgen->dtype(loco::DataType::FLOAT32);
+ g.constgen->shape({2, 1});
+ g.constgen->size<loco::DataType::FLOAT32>(2);
+
+ g.constgen->at<loco::DataType::FLOAT32>(0) = 0.5;
+ g.constgen->at<loco::DataType::FLOAT32>(1) = -0.5;
+ }
+
+ // let's convert
+ {
+ exo::test::TypeShapeReadyPhase test_phase;
+
+ test_phase.add_pass<exo::ConstGenConverter>();
+ test_phase.add_pass<exo::ReluConverter>();
+
+ test_phase.run(g.graph());
+ }
+
+ auto tfl_const = exo::test::find_first_node_bytype<locoex::TFLConst>(g.graph());
+ auto tfl_relu = exo::test::find_first_node_bytype<locoex::TFLRelu>(g.graph());
+
+ ASSERT_TRUE(tfl_const != nullptr and tfl_relu != nullptr);
+ ASSERT_TRUE(tfl_relu->features() == tfl_const);
+
+ ASSERT_TRUE(tfl_const->rank() == g.constgen->rank());
+ ASSERT_TRUE(tfl_const->dim(0) == g.constgen->dim(0));
+ ASSERT_TRUE(tfl_const->dim(1) == g.constgen->dim(1));
+ ASSERT_TRUE(tfl_const->at<loco::DataType::FLOAT32>(0) ==
+ g.constgen->at<loco::DataType::FLOAT32>(0));
+ ASSERT_TRUE(tfl_const->at<loco::DataType::FLOAT32>(1) ==
+ g.constgen->at<loco::DataType::FLOAT32>(1));
+}
diff --git a/compiler/exo/src/Conversion/Conv2DConverter.cpp b/compiler/exo/src/Conversion/Conv2DConverter.cpp
new file mode 100644
index 000000000..c8120171d
--- /dev/null
+++ b/compiler/exo/src/Conversion/Conv2DConverter.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Conv2DConverter.h"
+
+#include "Dialect/IR/TFLNodes.h"
+
+#include "GraphBlock.h"
+#include "Check.h"
+
+#include <loco.h>
+#include <loco/Service/TypeInference.h>
+#include <loco/Service/ShapeInference.h>
+
+namespace exo
+{
+/**
+ * @brief Converts loco::Conv2D to locoex::TFLConv2D
+ * @note Because TFLConv2D accepts input and filter of loco::Domain::Tensor,
+ * loco::FeatureDecode and loco::FilterDecode will be inserted as an inputs
+ * to meet domain invariant.
+ * Please refer to the comment in AvgPool2DConvert.
+ */
+bool Conv2DConverter::convert(loco::Conv2D *origin)
+{
+ auto *graph = origin->graph();
+
+ assert(origin->ifm());
+ assert(origin->ker());
+
+ auto tfl_conv2d = graph->nodes()->create<locoex::TFLConv2D>();
+ {
+ tfl_conv2d->stride()->w(origin->stride()->horizontal());
+ tfl_conv2d->stride()->h(origin->stride()->vertical());
+
+ auto pad = origin->pad();
+ if (pad->bottom() == 0 && pad->top() == 0 && pad->left() == 0 && pad->right() == 0)
+ tfl_conv2d->padding(locoex::Padding::VALID);
+ else
+ // TODO This is necessary, but not sufficient condition. More rigorous check required
+ tfl_conv2d->padding(locoex::Padding::SAME);
+
+ tfl_conv2d->fusedActivationFunction(locoex::FusedActFunc::NONE);
+ }
+
+ // let's create a new graph connection with tfl_conv2d
+ {
+ // input
+ auto feature_dec = make_feature_decode<FeatureLayout::NHWC>(origin->ifm());
+ tfl_conv2d->input(feature_dec);
+
+ // filter
+ auto filter_dec = make_filter_decode<FilterLayout::OHWI>(origin->ker());
+ tfl_conv2d->filter(filter_dec);
+
+ // bias
+ auto zero_const = graph->nodes()->create<locoex::TFLConst>();
+ {
+ assert(loco::shape_known(origin));
+ assert(loco::dtype_known(origin) && loco::dtype_get(origin) == loco::DataType::FLOAT32);
+
+ auto output_depth = loco::shape_get(origin->ker()).as<loco::FilterShape>().count();
+
+ zero_const->dtype(loco::DataType::FLOAT32);
+ zero_const->rank(1);
+ zero_const->dim(0) = output_depth;
+ zero_const->size<loco::DataType::FLOAT32>(output_depth.value());
+ for (uint32_t x = 0; x < output_depth.value(); x++)
+ zero_const->at<loco::DataType::FLOAT32>(x) = 0.0;
+ }
+ tfl_conv2d->bias(zero_const);
+
+ // output
+ auto feature_enc = make_feature_encode<FeatureLayout::NHWC>(tfl_conv2d);
+
+ // replace canonical node
+ loco::replace(origin).with(feature_enc);
+ origin->ifm(nullptr);
+ }
+
+ return true;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/Conv2DConverter.h b/compiler/exo/src/Conversion/Conv2DConverter.h
new file mode 100644
index 000000000..95b3fbfae
--- /dev/null
+++ b/compiler/exo/src/Conversion/Conv2DConverter.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSION_CONV2D_CONVERTER__
+#define __CONVERSION_CONV2D_CONVERTER__
+
+#include "CanonicalNodeConverter.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+/**
+ * @brief Convert loco::Conv2D to locoex::TFLConv2D
+ */
+class Conv2DConverter : public CanonicalNodeConverter<loco::Conv2D>
+{
+public:
+ const char *name(void) const final { return "exo::Conv2DConverter"; }
+
+public:
+ bool convert(loco::Conv2D *origin) final;
+};
+
+} // namespace exo
+
+#endif // __CONVERSION_CONV2D_CONVERTER__
diff --git a/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp b/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp
new file mode 100644
index 000000000..5959fcc45
--- /dev/null
+++ b/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthwiseConv2DConverter.h"
+
+#include "Dialect/IR/TFLNodes.h"
+
+#include "GraphBlock.h"
+#include "Check.h"
+
+#include <loco.h>
+#include <loco/Service/TypeInference.h>
+#include <loco/Service/ShapeInference.h>
+
+namespace exo
+{
+
+bool DepthwiseConv2DConverter::convert(loco::DepthwiseConv2D *origin)
+{
+ // Filter shape is required
+ if (not loco::shape_known(origin->ker()))
+ return false;
+
+ auto filter_shape = loco::shape_get(origin->ker()).as<loco::DepthwiseFilterShape>();
+
+ if ((origin->ifm() == nullptr) or (origin->ker() == nullptr))
+ return false;
+
+ auto *graph = origin->graph();
+
+ auto tfl_dw_conv2d = graph->nodes()->create<locoex::TFLDepthwiseConv2D>();
+ {
+ tfl_dw_conv2d->stride()->w(origin->stride()->horizontal());
+ tfl_dw_conv2d->stride()->h(origin->stride()->vertical());
+
+ auto pad = origin->pad();
+ if (pad->bottom() == 0 && pad->top() == 0 && pad->left() == 0 && pad->right() == 0)
+ tfl_dw_conv2d->padding(locoex::Padding::VALID);
+ else
+ // TODO This is necessary, but not sufficient condition. More rigorous check required
+ tfl_dw_conv2d->padding(locoex::Padding::SAME);
+
+ tfl_dw_conv2d->fusedActivationFunction(locoex::FusedActFunc::NONE);
+
+ uint32_t multiplier = filter_shape.multiplier().value();
+ EXO_ASSERT(multiplier < std::numeric_limits<int32_t>::max(),
+ "Multiplier is too big that casting may occur unintended behavior")
+
+ tfl_dw_conv2d->depthMultiplier(static_cast<int32_t>(multiplier));
+ }
+
+ // let's create a new graph connection with tfl_dw_conv2d
+ {
+ // ifm --- feature_dec --- tfl_dw_conv2d
+ auto feature_dec = make_feature_decode<FeatureLayout::NHWC>(origin->ifm());
+ tfl_dw_conv2d->input(feature_dec);
+
+ // ker --- filter_dec(H x W x C x M) --- reshape(1 x H x W x CM) --- tfl_dw_conv2d
+ auto filter_dec = make_dw_filter_decode<DepthwiseFilterLayout::HWCM>(origin->ker());
+
+ auto reshape = graph->nodes()->create<locoex::TFLReshape>();
+ reshape->tensor(filter_dec);
+
+ int32_t new_shape[4] = {
+ 1, static_cast<int32_t>(filter_shape.height().value()),
+ static_cast<int32_t>(filter_shape.width().value()),
+ static_cast<int32_t>(filter_shape.depth().value() * filter_shape.multiplier().value())};
+ locoex::set_new_shape(reshape, new_shape, 4);
+
+ tfl_dw_conv2d->filter(reshape);
+
+ // bias
+ auto zero_const = graph->nodes()->create<locoex::TFLConst>();
+ {
+ assert(loco::shape_known(origin));
+ assert(loco::dtype_known(origin) && loco::dtype_get(origin) == loco::DataType::FLOAT32);
+
+ // bias size is C * M
+ uint32_t bias_size = filter_shape.depth().value() * filter_shape.multiplier().value();
+
+ zero_const->dtype(loco::DataType::FLOAT32);
+ zero_const->rank(1);
+ zero_const->dim(0) = bias_size;
+ zero_const->size<loco::DataType::FLOAT32>(bias_size);
+ for (uint32_t x = 0; x < bias_size; x++)
+ zero_const->at<loco::DataType::FLOAT32>(x) = 0.0;
+ }
+ tfl_dw_conv2d->bias(zero_const);
+
+ // output
+ auto feature_enc = make_feature_encode<FeatureLayout::NHWC>(tfl_dw_conv2d);
+
+ // replace canonical node
+ loco::replace(origin).with(feature_enc);
+ origin->ifm(nullptr);
+ }
+
+ return true;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/DepthwiseConv2DConverter.h b/compiler/exo/src/Conversion/DepthwiseConv2DConverter.h
new file mode 100644
index 000000000..57cc01e5e
--- /dev/null
+++ b/compiler/exo/src/Conversion/DepthwiseConv2DConverter.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSION_DEPTHWISECONV2D_CONVERTER__
+#define __CONVERSION_DEPTHWISECONV2D_CONVERTER__
+
+#include "CanonicalNodeConverter.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+/**
+ * @brief Convert loco::DepthwiseConv2D to locoex::TFLDepthwiseConv2D and auxiliary
+ *
+ *
+ * <BEFORE>
+ *
+ * IFM -------- DepthwiseConv2D --- Out
+ * [Feature] / [Feature]
+ * /
+ * KER -------
+ * [DWFilter]
+ *
+ *
+ * <AFTER>
+ * TFLConst (bias) ---------------------------
+ * \
+ * IFM ------ FeatureDecode ------------------ TFLDepthwiseConv2D --- FeatureEncode --- Out
+ * [Feature] [Tensor] / [Tensor] [Feature]
+ * /
+ * KER ------- DepthwiseFilterDecode --- TFLReshape
+ * [DWFilter] [Tensor / H W C M] [Tensor / 1 H W CM]
+ *
+ */
+class DepthwiseConv2DConverter : public CanonicalNodeConverter<loco::DepthwiseConv2D>
+{
+public:
+ const char *name(void) const final { return "exo::DepthwiseConv2DConverter"; }
+
+public:
+ bool convert(loco::DepthwiseConv2D *origin) final;
+};
+
+} // namespace exo
+
+#endif // __CONVERSION_DEPTHWISECONV2D_CONVERTER__
diff --git a/compiler/exo/src/Conversion/EltwiseAddConverter.cpp b/compiler/exo/src/Conversion/EltwiseAddConverter.cpp
new file mode 100644
index 000000000..557f47944
--- /dev/null
+++ b/compiler/exo/src/Conversion/EltwiseAddConverter.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EltwiseAddConverter.h"
+
+#include "EltwiseBinaryConverter.h"
+
+namespace exo
+{
+
+bool EltwiseAddConverter::convert(loco::EltwiseAdd *origin)
+{
+ return EltwiseBinaryConvert<loco::EltwiseAdd, locoex::TFLAdd>(origin);
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/EltwiseAddConverter.h b/compiler/exo/src/Conversion/EltwiseAddConverter.h
new file mode 100644
index 000000000..97e1071b5
--- /dev/null
+++ b/compiler/exo/src/Conversion/EltwiseAddConverter.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSION_ELTWISEADD_CONVERTER_H__
+#define __CONVERSION_ELTWISEADD_CONVERTER_H__
+
+#include "CanonicalNodeConverter.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+/**
+ * @brief Convert loco::EltwiseAdd to TFLAdd
+ */
+class EltwiseAddConverter : public CanonicalNodeConverter<loco::EltwiseAdd>
+{
+public:
+ const char *name(void) const final { return "exo::EltwiseAddConverter"; }
+
+public:
+ bool convert(loco::EltwiseAdd *origin) final;
+};
+
+} // namespace exo
+
+#endif // __CONVERSION_ELTWISEADD_CONVERTER_H__
diff --git a/compiler/exo/src/Conversion/EltwiseBinaryConverter.h b/compiler/exo/src/Conversion/EltwiseBinaryConverter.h
new file mode 100644
index 000000000..095da9e5c
--- /dev/null
+++ b/compiler/exo/src/Conversion/EltwiseBinaryConverter.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSION_ELTWISEBINARY_CONVERTER_H__
+#define __CONVERSION_ELTWISEBINARY_CONVERTER_H__
+
+#include "GraphBlock.h"
+#include "Check.h"
+
+#include "Dialect/IR/TFLNodes.h"
+
+#include <loco/IR/Nodes.h>
+
+#include <loco/Service/ShapeInference.h>
+
+namespace
+{
+
+template <class ELTWISEBIN, class TFLBIN>
+class EltwiseBinInputHandler : public exo::InputHandler<ELTWISEBIN, TFLBIN>
+{
+public:
+ void handover(ELTWISEBIN *origin, TFLBIN *replacer) override
+ {
+ assert(origin && replacer);
+ replacer->x(origin->lhs());
+ replacer->y(origin->rhs());
+ }
+
+ std::vector<loco::Node *> getInputsToConvert(ELTWISEBIN *origin) override
+ {
+ assert(origin);
+ std::vector<loco::Node *> inputs({origin->lhs(), origin->rhs()});
+ return inputs;
+ }
+
+ void set(TFLBIN *replacer, std::vector<loco::Node *> &to) override
+ {
+ assert(to.size() == 2);
+
+ replacer->x(to.at(0));
+ replacer->y(to.at(1));
+ }
+
+ void nullify(ELTWISEBIN *origin) override
+ {
+ assert(origin);
+ origin->lhs(nullptr);
+ origin->rhs(nullptr);
+ }
+};
+
+template <class TFLBIN> void init_fused_act_func(TFLBIN *);
+
+template <> inline void init_fused_act_func(locoex::TFLAdd *node)
+{
+ node->fusedActivationFunction(locoex::FusedActFunc::NONE);
+}
+
+template <> inline void init_fused_act_func(locoex::TFLMul *node)
+{
+ node->fusedActivationFunction(locoex::FusedActFunc::NONE);
+}
+
+template <> inline void init_fused_act_func(locoex::TFLSub *node)
+{
+ node->fusedActivationFunction(locoex::FusedActFunc::NONE);
+}
+
+template <> inline void init_fused_act_func(locoex::TFLDiv *node)
+{
+ node->fusedActivationFunction(locoex::FusedActFunc::NONE);
+}
+
+} // namespace
+
+namespace exo
+{
+
+template <class ELTWISEBIN, class TFLBIN> bool EltwiseBinaryConvert(ELTWISEBIN *origin)
+{
+ EltwiseBinInputHandler<ELTWISEBIN, TFLBIN> input_handler;
+ exo::DomainConverter<ELTWISEBIN, TFLBIN> domain_converter;
+
+ auto tfl_node = domain_converter.template convert<FeatureLayout::NHWC>(origin, input_handler);
+
+ if (tfl_node == nullptr)
+ return false;
+
+ init_fused_act_func(tfl_node);
+
+ return true;
+}
+
+} // namespace exo
+
+#endif // __CONVERSION_ELTWISEBINARY_CONVERTER_H__
diff --git a/compiler/exo/src/Conversion/EltwiseDivConverter.cpp b/compiler/exo/src/Conversion/EltwiseDivConverter.cpp
new file mode 100644
index 000000000..dc8eae461
--- /dev/null
+++ b/compiler/exo/src/Conversion/EltwiseDivConverter.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EltwiseDivConverter.h"
+
+#include "EltwiseBinaryConverter.h"
+
+namespace exo
+{
+
+bool EltwiseDivConverter::convert(loco::EltwiseDiv *origin)
+{
+ return EltwiseBinaryConvert<loco::EltwiseDiv, locoex::TFLDiv>(origin);
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/EltwiseDivConverter.h b/compiler/exo/src/Conversion/EltwiseDivConverter.h
new file mode 100644
index 000000000..06b2d685b
--- /dev/null
+++ b/compiler/exo/src/Conversion/EltwiseDivConverter.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSION_ELTWISEDIV_CONVERTER_H__
+#define __CONVERSION_ELTWISEDIV_CONVERTER_H__
+
+#include "CanonicalNodeConverter.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+/**
+ * @brief Convert loco::EltwiseDiv to TFLDiv
+ */
+class EltwiseDivConverter : public CanonicalNodeConverter<loco::EltwiseDiv>
+{
+public:
+ const char *name(void) const final { return "exo::EltwiseDivConverter"; }
+
+public:
+ bool convert(loco::EltwiseDiv *origin) final;
+};
+
+} // namespace exo
+
+#endif // __CONVERSION_ELTWISEDIV_CONVERTER_H__
diff --git a/compiler/exo/src/Conversion/EltwiseMaxConverter.cpp b/compiler/exo/src/Conversion/EltwiseMaxConverter.cpp
new file mode 100644
index 000000000..dd7d34440
--- /dev/null
+++ b/compiler/exo/src/Conversion/EltwiseMaxConverter.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EltwiseMaxConverter.h"
+
+#include "GraphBlock.h"
+#include "Check.h"
+
+#include "Dialect/IR/TFLNodes.h"
+
+#include <loco/Service/ShapeInference.h>
+
+namespace
+{
+
+class EltwiseMaxInputHandler : public exo::InputHandler<loco::EltwiseMax, locoex::TFLMaximum>
+{
+public:
+ void handover(loco::EltwiseMax *origin, locoex::TFLMaximum *replacer) override
+ {
+ replacer->x(origin->lhs());
+ replacer->y(origin->rhs());
+ }
+
+ std::vector<loco::Node *> getInputsToConvert(loco::EltwiseMax *origin) override
+ {
+ std::vector<loco::Node *> inputs({origin->lhs(), origin->rhs()});
+ return inputs;
+ }
+
+ void set(locoex::TFLMaximum *replacer, std::vector<loco::Node *> &to) override
+ {
+ assert(to.size() == 2);
+
+ replacer->x(to.at(0));
+ replacer->y(to.at(1));
+ }
+
+ void nullify(loco::EltwiseMax *origin) override
+ {
+ assert(origin);
+ origin->lhs(nullptr);
+ origin->rhs(nullptr);
+ }
+};
+
+} // namespace
+
+namespace exo
+{
+
+bool EltwiseMaxConverter::convert(loco::EltwiseMax *origin)
+{
+ EltwiseMaxInputHandler input_handler;
+ exo::DomainConverter<loco::EltwiseMax, locoex::TFLMaximum> domain_converter;
+
+ auto tfl_new = domain_converter.convert<FeatureLayout::NHWC>(origin, input_handler);
+
+ return (tfl_new != nullptr);
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/EltwiseMaxConverter.h b/compiler/exo/src/Conversion/EltwiseMaxConverter.h
new file mode 100644
index 000000000..708745419
--- /dev/null
+++ b/compiler/exo/src/Conversion/EltwiseMaxConverter.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSION_ELTWISEMAX_CONVERTER_H__
+#define __CONVERSION_ELTWISEMAX_CONVERTER_H__
+
+#include "CanonicalNodeConverter.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+/**
+ * @brief Convert loco::EltwiseMax to TFLMaximum
+ */
+class EltwiseMaxConverter : public CanonicalNodeConverter<loco::EltwiseMax>
+{
+public:
+ const char *name(void) const final { return "exo::EltwiseMaxConverter"; }
+
+public:
+ bool convert(loco::EltwiseMax *origin) final;
+};
+
+} // namespace exo
+
+#endif // __CONVERSION_ELTWISEMAX_CONVERTER_H__
diff --git a/compiler/exo/src/Conversion/EltwiseMulConverter.cpp b/compiler/exo/src/Conversion/EltwiseMulConverter.cpp
new file mode 100644
index 000000000..f7a4b8298
--- /dev/null
+++ b/compiler/exo/src/Conversion/EltwiseMulConverter.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EltwiseMulConverter.h"
+
+#include "EltwiseBinaryConverter.h"
+
+namespace exo
+{
+
+bool EltwiseMulConverter::convert(loco::EltwiseMul *origin)
+{
+ return EltwiseBinaryConvert<loco::EltwiseMul, locoex::TFLMul>(origin);
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/EltwiseMulConverter.h b/compiler/exo/src/Conversion/EltwiseMulConverter.h
new file mode 100644
index 000000000..4f73484c0
--- /dev/null
+++ b/compiler/exo/src/Conversion/EltwiseMulConverter.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSION_ELTWISEMUL_CONVERTER_H__
+#define __CONVERSION_ELTWISEMUL_CONVERTER_H__
+
+#include "CanonicalNodeConverter.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+/**
+ * @brief Convert loco::EltwiseMul to TFLMul
+ */
+class EltwiseMulConverter : public CanonicalNodeConverter<loco::EltwiseMul>
+{
+public:
+ const char *name(void) const final { return "exo::EltwiseMulConverter"; }
+
+public:
+ bool convert(loco::EltwiseMul *origin) final;
+};
+
+} // namespace exo
+
+#endif // __CONVERSION_ELTWISEMUL_CONVERTER_H__
diff --git a/compiler/exo/src/Conversion/EltwiseSqrtConverter.cpp b/compiler/exo/src/Conversion/EltwiseSqrtConverter.cpp
new file mode 100644
index 000000000..6dead7dc6
--- /dev/null
+++ b/compiler/exo/src/Conversion/EltwiseSqrtConverter.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EltwiseSqrtConverter.h"
+
+#include "GraphBlock.h"
+#include "Check.h"
+
+#include "Dialect/IR/TFLNodes.h"
+
+#include <loco/Service/ShapeInference.h>
+
+namespace
+{
+
+class EltwiseSqrtInputHandler : public exo::InputHandler<loco::EltwiseSqrt, locoex::TFLSqrt>
+{
+public:
+ void handover(loco::EltwiseSqrt *origin, locoex::TFLSqrt *replacer) override
+ {
+ replacer->x(origin->input());
+ }
+
+ std::vector<loco::Node *> getInputsToConvert(loco::EltwiseSqrt *origin) override
+ {
+ std::vector<loco::Node *> inputs({origin->input()});
+ return inputs;
+ }
+
+ void set(locoex::TFLSqrt *replacer, std::vector<loco::Node *> &to) override
+ {
+ assert(to.size() == 1);
+
+ replacer->x(to.at(0));
+ }
+
+ void nullify(loco::EltwiseSqrt *origin) override { origin->input(nullptr); }
+};
+
+} // namespace
+
+namespace exo
+{
+
+bool EltwiseSqrtConverter::convert(loco::EltwiseSqrt *origin)
+{
+ EltwiseSqrtInputHandler input_handler;
+ exo::DomainConverter<loco::EltwiseSqrt, locoex::TFLSqrt> domain_converter;
+
+ auto tfl_new = domain_converter.convert<FeatureLayout::NHWC>(origin, input_handler);
+
+ return (tfl_new != nullptr);
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/EltwiseSqrtConverter.h b/compiler/exo/src/Conversion/EltwiseSqrtConverter.h
new file mode 100644
index 000000000..5ee3185ff
--- /dev/null
+++ b/compiler/exo/src/Conversion/EltwiseSqrtConverter.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ELTWISE_SQRT_CONVERTER_H__
+#define __ELTWISE_SQRT_CONVERTER_H__
+
+#include "CanonicalNodeConverter.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+/**
+ * @brief Convert loco::EltwiseSqrt to TFLSqrt
+ */
+class EltwiseSqrtConverter : public CanonicalNodeConverter<loco::EltwiseSqrt>
+{
+public:
+ const char *name(void) const final { return "exo::EltwiseSqrtConverter"; }
+
+public:
+ bool convert(loco::EltwiseSqrt *origin) final;
+};
+
+} // namespace exo
+
+#endif // __ELTWISE_SQRT_CONVERTER_H__
diff --git a/compiler/exo/src/Conversion/EltwiseSubConverter.cpp b/compiler/exo/src/Conversion/EltwiseSubConverter.cpp
new file mode 100644
index 000000000..5647c47a2
--- /dev/null
+++ b/compiler/exo/src/Conversion/EltwiseSubConverter.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EltwiseSubConverter.h"
+
+#include "EltwiseBinaryConverter.h"
+
+namespace exo
+{
+
+bool EltwiseSubConverter::convert(loco::EltwiseSub *origin)
+{
+ return EltwiseBinaryConvert<loco::EltwiseSub, locoex::TFLSub>(origin);
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/EltwiseSubConverter.h b/compiler/exo/src/Conversion/EltwiseSubConverter.h
new file mode 100644
index 000000000..d61b76ec0
--- /dev/null
+++ b/compiler/exo/src/Conversion/EltwiseSubConverter.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSION_ELTWISESUB_CONVERTER_H__
+#define __CONVERSION_ELTWISESUB_CONVERTER_H__
+
+#include "CanonicalNodeConverter.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+/**
+ * @brief Convert loco::EltwiseSub to TFLSub
+ */
+class EltwiseSubConverter : public CanonicalNodeConverter<loco::EltwiseSub>
+{
+public:
+ const char *name(void) const final { return "exo::EltwiseSubConverter"; }
+
+public:
+ bool convert(loco::EltwiseSub *origin) final;
+};
+
+} // namespace exo
+
+#endif // __CONVERSION_ELTWISESUB_CONVERTER_H__
diff --git a/compiler/exo/src/Conversion/FeatureBiasAddConverter.cpp b/compiler/exo/src/Conversion/FeatureBiasAddConverter.cpp
new file mode 100644
index 000000000..b9aaf140b
--- /dev/null
+++ b/compiler/exo/src/Conversion/FeatureBiasAddConverter.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FeatureBiasAddConverter.h"
+
+#include "Dialect/IR/TFLNodes.h"
+
+#include "GraphBlock.h"
+
+#include <loco.h>
+#include <loco/Service/ShapeInference.h>
+
+#include <cassert>
+
+namespace
+{
+
+inline void init_fused_act_func(locoex::TFLAdd *node)
+{
+ node->fusedActivationFunction(locoex::FusedActFunc::NONE);
+}
+
+} // namespace
+
+namespace exo
+{
+
+/**
+ * @brief Converts loco::FeatureBiasAdd to locoex::TFLAdd
+ *
+ * Before:
+ * Foo ---+
+ * |
+ * loco::FeatureBiasAdd - FeatureDecode - ...
+ * |
+ * Bar - BiasEncode --+
+ *
+ * After:
+ *
+ * Foo - loco::FeatureDecode --+ loco::FeatureBiasAdd
+ * |(x)
+ * TFLAdd -- loco::FeatureEncode - FeatureDecode - ...
+ * |(y)
+ * Bar - BiasEncode - loco::BiasDecode --+
+ */
+bool FeatureBiasAddConverter::convert(loco::FeatureBiasAdd *origin)
+{
+ auto *graph = origin->graph();
+
+ auto tfl_add = graph->nodes()->create<locoex::TFLAdd>();
+
+ // handling input x
+ assert(loco::shape_get(origin->value()).domain() == loco::Domain::Feature);
+
+ auto fea_dec = make_feature_decode<FeatureLayout::NHWC>(origin->value());
+ tfl_add->x(fea_dec);
+
+ // handling input y
+ auto bias_dec = graph->nodes()->create<loco::BiasDecode>();
+ assert(bias_dec != nullptr);
+
+ bias_dec->input(origin->bias());
+
+ tfl_add->y(bias_dec);
+
+ // fused activation function
+ init_fused_act_func(tfl_add);
+
+ // handling output
+ auto fea_enc = make_feature_encode<FeatureLayout::NHWC>(tfl_add);
+
+ loco::replace(origin).with(fea_enc);
+ origin->value(nullptr);
+
+ return true;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/FeatureBiasAddConverter.h b/compiler/exo/src/Conversion/FeatureBiasAddConverter.h
new file mode 100644
index 000000000..5c4f10213
--- /dev/null
+++ b/compiler/exo/src/Conversion/FeatureBiasAddConverter.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSION_FEATUREBIASADD_CONVERTER__
+#define __CONVERSION_FEATUREBIASADD_CONVERTER__
+
+#include "CanonicalNodeConverter.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+class FeatureBiasAddConverter : public CanonicalNodeConverter<loco::FeatureBiasAdd>
+{
+public:
+ const char *name(void) const final { return "exo::TFLAddConverter"; }
+
+public:
+ bool convert(loco::FeatureBiasAdd *origin) final;
+};
+
+} // namespace exo
+
+#endif // __CONVERSION_FEATUREBIASADD_CONVERTER__
diff --git a/compiler/exo/src/Conversion/FeatureBiasAddConverter.test.cpp b/compiler/exo/src/Conversion/FeatureBiasAddConverter.test.cpp
new file mode 100644
index 000000000..f3c4a5f81
--- /dev/null
+++ b/compiler/exo/src/Conversion/FeatureBiasAddConverter.test.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FeatureBiasAddConverter.h"
+
+#include "GraphBlock.h"
+#include "Dialect/IR/TFLNodes.h"
+
+#include "TestGraph.h"
+#include "TestHelper.h"
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+TEST(FeatureBiasAddConverterTest, basic_test)
+{
+ exo::test::ExampleGraph<exo::test::ExampleGraphType::FeatureBiasAdd> g;
+
+ { // attrib setting
+ // pull
+ g.pull->dtype(loco::DataType::FLOAT32);
+ g.pull->shape({1, 2, 2, 3});
+
+ // bias value
+ g.constgen->dtype(loco::DataType::FLOAT32);
+ g.constgen->shape({3});
+ g.constgen->size<loco::DataType::FLOAT32>(3);
+
+ g.constgen->at<loco::DataType::FLOAT32>(0) = 0.5;
+ g.constgen->at<loco::DataType::FLOAT32>(1) = 1;
+ g.constgen->at<loco::DataType::FLOAT32>(2) = 1.5;
+ }
+
+ EXO_TEST_ASSERT_NODE_COUNT({g.push}, 7); // sanity check
+
+ // let's convert!!
+ {
+ exo::test::TypeShapeReadyPhase test_phase;
+
+ test_phase.add_pass<exo::FeatureBiasAddConverter>();
+
+ test_phase.run(g.graph());
+
+ /*
+ Expected:
+
+ Pull - FeatureEncoder - FeatureDecode - TFLAdd - FeatureEncode - FeatureDecode - Push
+ |
+ ConstGen - BiasEncode - BiasDecode ---+
+ */
+ }
+
+ // check surroundings
+ auto tfl_add = exo::test::find_first_node_bytype<locoex::TFLAdd>(g.graph());
+ {
+ ASSERT_TRUE(tfl_add != nullptr);
+
+ // input x and its pred
+ {
+ auto actual_fea_dec = dynamic_cast<loco::FeatureDecode *>(tfl_add->x());
+ ASSERT_TRUE(actual_fea_dec != nullptr);
+
+ auto actual_fea_enc = dynamic_cast<loco::FeatureEncode *>(actual_fea_dec->input());
+ ASSERT_TRUE(actual_fea_enc != nullptr);
+ ASSERT_TRUE(actual_fea_enc == g.fea_enc);
+ }
+
+ // input y and its pred
+ {
+ auto actual_bias_dec = dynamic_cast<loco::BiasDecode *>(tfl_add->y());
+ ASSERT_TRUE(actual_bias_dec != nullptr);
+
+ auto actual_bias_enc = dynamic_cast<loco::BiasEncode *>(actual_bias_dec->input());
+ ASSERT_TRUE(actual_bias_enc != nullptr);
+ ASSERT_TRUE(actual_bias_enc == g.bias_enc);
+ }
+
+ // output check
+ {
+ auto actual_fea_enc = exo::test::get_only_succ<loco::FeatureEncode>(tfl_add);
+ ASSERT_TRUE(actual_fea_enc != nullptr);
+
+ auto actual_fea_dec = exo::test::get_only_succ<loco::FeatureDecode>(actual_fea_enc);
+ ASSERT_TRUE(actual_fea_dec != nullptr);
+ ASSERT_TRUE(actual_fea_dec == g.fea_dec);
+ }
+ }
+}
diff --git a/compiler/exo/src/Conversion/MatMulConverter.cpp b/compiler/exo/src/Conversion/MatMulConverter.cpp
new file mode 100644
index 000000000..b1158b73d
--- /dev/null
+++ b/compiler/exo/src/Conversion/MatMulConverter.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MatMulConverter.h"
+
+#include "Dialect/IR/TFLNodes.h"
+
+#include "GraphBlock.h"
+#include "Check.h"
+
+#include <loco.h>
+#include <loco/Service/TypeInference.h>
+#include <loco/Service/ShapeInference.h>
+
+namespace exo
+{
+/**
+ * @brief Converts loco::MatMul to locoex::TFLFullyConnected
+ * @note Because TFLFullyConnected accepts input and weights of loco::Domain::Matrix,
+ * loco::MatrixDecode will be inserted as an input and weights
+ * to meet domain invariant.
+ *
+ * How it works:
+ *
+ * Before:
+ * Foo1 ---- MatrixEncode ---- MatMul ---- MatrixDecode ---- Bar
+ * Foo2 ---- MatrixEncode ----/
+ *
+ * After:
+ *
+ * Foo1 - MatrixEncode - MatrixDecode - TFLFullyConnected - MatrixEncode - MatrixDecode - Bar
+ * Foo2 - MatrixEncode - MatrixDecode -/
+ *
+ * @note This method replaces MatMul with "- MatrixDecode - TFLFullyConnected - MatrixEncode -".
+ * - MatrixDecode -/
+ * Redundant nodes will be removed during transforms.
+ *
+ * @ref
+ * https://github.com/tensorflow/tensorflow/blob/v1.13.1/tensorflow/lite/kernels/internal/reference/fully_connected.h
+ */
+bool MatMulConverter::convert(loco::MatMul *origin)
+{
+ auto *graph = origin->graph();
+
+ assert(origin->lhs());
+ assert(origin->rhs());
+
+ auto tfl_fc = graph->nodes()->create<locoex::TFLFullyConnected>();
+ tfl_fc->fusedActivationFunction(locoex::FusedActFunc::NONE);
+
+ // let's create a new graph connection with tfl_fc
+ {
+ // input
+ auto lhs_matrix_dec = make_matrix_decode<MatrixLayout::HW>(origin->lhs());
+ tfl_fc->input(lhs_matrix_dec);
+
+ // weights (WH format on TFLite)
+ auto rhs_matrix_dec = make_matrix_decode<MatrixLayout::WH>(origin->rhs());
+ tfl_fc->weights(rhs_matrix_dec);
+
+ // bias
+ auto zero_const = graph->nodes()->create<locoex::TFLConst>();
+ { // TODO Create optimization pass which fuse additional Add into bias of Conv or FC
+ assert(loco::shape_known(origin));
+ assert(loco::dtype_known(origin) && loco::dtype_get(origin) == loco::DataType::FLOAT32);
+
+ auto output_depth = loco::shape_get(origin->rhs()).as<loco::MatrixShape>().width();
+ // TODO Fix it with type inference
+ zero_const->dtype(loco::DataType::FLOAT32);
+ zero_const->rank(1);
+ zero_const->dim(0) = output_depth;
+ zero_const->size<loco::DataType::FLOAT32>(output_depth.value());
+ for (uint32_t x = 0; x < output_depth.value(); x++)
+ zero_const->at<loco::DataType::FLOAT32>(x) = 0.0;
+ }
+ tfl_fc->bias(zero_const);
+
+ // output
+ auto matrix_enc = make_matrix_encode<MatrixLayout::HW>(tfl_fc);
+
+ // replace canonical node
+ loco::replace(origin).with(matrix_enc);
+ origin->lhs(nullptr);
+ origin->rhs(nullptr);
+ }
+
+ return true;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/MatMulConverter.h b/compiler/exo/src/Conversion/MatMulConverter.h
new file mode 100644
index 000000000..e64c4a0f2
--- /dev/null
+++ b/compiler/exo/src/Conversion/MatMulConverter.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSION_FULLY_CONNECTED_CONVERTER__
+#define __CONVERSION_FULLY_CONNECTED_CONVERTER__
+
+#include "CanonicalNodeConverter.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+/**
+ * @brief Convert loco::MatMul to locoex::TFLFullyConnected
+ */
+class MatMulConverter : public CanonicalNodeConverter<loco::MatMul>
+{
+public:
+ const char *name(void) const final { return "exo::MatMulConverter"; }
+
+public:
+ bool convert(loco::MatMul *origin) final;
+};
+
+} // namespace exo
+
+#endif // __CONVERSION_FULLY_CONNECTED_CONVERTER__
diff --git a/compiler/exo/src/Conversion/MaxPool2DConverter.cpp b/compiler/exo/src/Conversion/MaxPool2DConverter.cpp
new file mode 100644
index 000000000..67e5ab833
--- /dev/null
+++ b/compiler/exo/src/Conversion/MaxPool2DConverter.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MaxPool2DConverter.h"
+
+#include "Dialect/IR/TFLNodes.h"
+#include "GraphBlock.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+/**
+ * @brief Converts loco::MaxPool2D to locoex::TFLMaxPool2D
+ *
+ * @note This works similar to AvgPool2DConverter. Please refer to the comment in
+ * AvgPool2DConverter.
+ */
+bool MaxPool2DConverter::convert(loco::MaxPool2D *origin)
+{
+ auto *graph = origin->graph();
+
+ auto dec = make_feature_decode<FeatureLayout::NHWC>(origin->ifm());
+ auto tfl_max = graph->nodes()->create<locoex::TFLMaxPool2D>();
+ {
+ tfl_max->value(dec);
+
+ // set attributes
+ tfl_max->stride()->w(origin->stride()->horizontal());
+ tfl_max->stride()->h(origin->stride()->vertical());
+
+ tfl_max->filter()->w(origin->window()->horizontal());
+ tfl_max->filter()->h(origin->window()->vertical());
+
+ auto pad = origin->pad();
+ if (pad->bottom() == 0 && pad->top() == 0 && pad->left() == 0 && pad->right() == 0)
+ tfl_max->padding(locoex::Padding::VALID);
+ else
+ // TODO This is necessary, but not sufficient condition. More rigorous check required
+ tfl_max->padding(locoex::Padding::SAME);
+
+ tfl_max->fusedActivationFunction(locoex::FusedActFunc::NONE);
+ }
+
+ auto enc = make_feature_encode<FeatureLayout::NHWC>(tfl_max);
+
+ loco::replace(origin).with(enc);
+ origin->ifm(nullptr);
+
+ return true;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/MaxPool2DConverter.h b/compiler/exo/src/Conversion/MaxPool2DConverter.h
new file mode 100644
index 000000000..3f526d88f
--- /dev/null
+++ b/compiler/exo/src/Conversion/MaxPool2DConverter.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSION_MAXPOOL2D_CONVERTER__
+#define __CONVERSION_MAXPOOL2D_CONVERTER__
+
+#include "CanonicalNodeConverter.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+/**
+ * @brief Convert loco::MaxPool2D to locoex::TFLMaxPool2D
+ */
+class MaxPool2DConverter : public CanonicalNodeConverter<loco::MaxPool2D>
+{
+public:
+ const char *name(void) const final { return "exo::MaxPool2DConverter"; }
+
+public:
+ bool convert(loco::MaxPool2D *origin) final;
+};
+
+} // namespace exo
+
+#endif // __CONVERSION_MAXPOOL2D_CONVERTER__
diff --git a/compiler/exo/src/Conversion/Relu6Converter.cpp b/compiler/exo/src/Conversion/Relu6Converter.cpp
new file mode 100644
index 000000000..b694511f5
--- /dev/null
+++ b/compiler/exo/src/Conversion/Relu6Converter.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Relu6Converter.h"
+
+#include "GraphBlock.h"
+#include "Check.h"
+
+#include "Dialect/IR/TFLNodes.h"
+
+#include <loco/Service/ShapeInference.h>
+
+namespace
+{
+
+class Relu6InputHandler : public exo::InputHandler<loco::ReLU6, locoex::TFLRelu6>
+{
+public:
+ void handover(loco::ReLU6 *origin, locoex::TFLRelu6 *replacer) override
+ {
+ replacer->features(origin->input());
+ }
+
+ std::vector<loco::Node *> getInputsToConvert(loco::ReLU6 *origin) override
+ {
+ std::vector<loco::Node *> inputs({origin->input()});
+ return inputs;
+ }
+
+ void set(locoex::TFLRelu6 *replacer, std::vector<loco::Node *> &to) override
+ {
+ assert(to.size() == 1);
+
+ replacer->features(to.at(0));
+ }
+
+ void nullify(loco::ReLU6 *origin) override { origin->input(nullptr); }
+};
+
+} // namespace
+
+namespace exo
+{
+
+bool Relu6Converter::convert(loco::ReLU6 *origin)
+{
+ Relu6InputHandler input_handler;
+ exo::DomainConverter<loco::ReLU6, locoex::TFLRelu6> domain_converter;
+
+ auto tfl_node = domain_converter.convert<FeatureLayout::NHWC>(origin, input_handler);
+
+ return (tfl_node != nullptr);
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/Relu6Converter.h b/compiler/exo/src/Conversion/Relu6Converter.h
new file mode 100644
index 000000000..d987b42d0
--- /dev/null
+++ b/compiler/exo/src/Conversion/Relu6Converter.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSION_RELU6_CONVERTER_H__
+#define __CONVERSION_RELU6_CONVERTER_H__
+
+#include "CanonicalNodeConverter.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+/**
+ * @brief Convert loco::Relu6 to TFLRelu6
+ */
+class Relu6Converter : public CanonicalNodeConverter<loco::ReLU6>
+{
+public:
+ const char *name(void) const final { return "exo::Relu6Converter"; }
+
+public:
+ bool convert(loco::ReLU6 *origin) final;
+};
+
+} // namespace exo
+
+#endif // __CONVERSION_RELU6_CONVERTER_H__
diff --git a/compiler/exo/src/Conversion/ReluConverter.cpp b/compiler/exo/src/Conversion/ReluConverter.cpp
new file mode 100644
index 000000000..92adef94d
--- /dev/null
+++ b/compiler/exo/src/Conversion/ReluConverter.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReluConverter.h"
+
+#include "GraphBlock.h"
+#include "Check.h"
+
+#include "Dialect/IR/TFLNodes.h"
+
+#include <loco/Service/ShapeInference.h>
+
+namespace
+{
+
+class ReluInputHandler : public exo::InputHandler<loco::ReLU, locoex::TFLRelu>
+{
+public:
+ void handover(loco::ReLU *origin, locoex::TFLRelu *replacer) override
+ {
+ replacer->features(origin->input());
+ }
+
+ std::vector<loco::Node *> getInputsToConvert(loco::ReLU *origin) override
+ {
+ std::vector<loco::Node *> inputs({origin->input()});
+ return inputs;
+ }
+
+ void set(locoex::TFLRelu *replacer, std::vector<loco::Node *> &to) override
+ {
+ assert(to.size() == 1);
+
+ replacer->features(to.at(0));
+ }
+
+ void nullify(loco::ReLU *origin) override { origin->input(nullptr); }
+};
+
+} // namespace
+
+namespace exo
+{
+
+bool ReluConverter::convert(loco::ReLU *origin)
+{
+ ReluInputHandler input_handler;
+ exo::DomainConverter<loco::ReLU, locoex::TFLRelu> domain_converter;
+
+ auto tfl_node = domain_converter.convert<FeatureLayout::NHWC>(origin, input_handler);
+
+ return (tfl_node != nullptr);
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/ReluConverter.h b/compiler/exo/src/Conversion/ReluConverter.h
new file mode 100644
index 000000000..e1e82ae4b
--- /dev/null
+++ b/compiler/exo/src/Conversion/ReluConverter.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSION_RELU_CONVERTER_H__
+#define __CONVERSION_RELU_CONVERTER_H__
+
+#include "CanonicalNodeConverter.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+/**
+ * @brief Convert loco::Relu to TFLRelu
+ */
+class ReluConverter : public CanonicalNodeConverter<loco::ReLU>
+{
+public:
+ const char *name(void) const final { return "exo::ReluConverter"; }
+
+public:
+ bool convert(loco::ReLU *origin) final;
+};
+
+} // namespace exo
+
+#endif // __CONVERSION_RELU_CONVERTER_H__
diff --git a/compiler/exo/src/Conversion/ReluConverter.test.cpp b/compiler/exo/src/Conversion/ReluConverter.test.cpp
new file mode 100644
index 000000000..f53d656b4
--- /dev/null
+++ b/compiler/exo/src/Conversion/ReluConverter.test.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReluConverter.h"
+
+#include "GraphBlock.h"
+#include "Dialect/IR/TFLNodes.h"
+
+#include "TestHelper.h"
+#include "TestGraph.h"
+
+#include <gtest/gtest.h>
+
+TEST(ReluConverterTest, relu_tensor_inout)
+{
+ exo::test::TestGraph graph;
+ {
+ auto tanh = graph.append<loco::Tanh>(graph.pull);
+ auto relu = graph.append<loco::ReLU>(tanh);
+ auto relu6 = graph.append<loco::ReLU6>(relu);
+ graph.complete();
+
+ auto pull = graph.pull;
+ {
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({2, 2});
+ }
+ }
+
+ // let's convert
+ exo::test::TypeShapeReadyPhase test_phase;
+ {
+ test_phase.add_pass<exo::ReluConverter>();
+ test_phase.run(graph.g.get());
+ }
+
+ loco::Node *node = exo::test::find_first_node_bytype<loco::Tanh>(graph.g.get());
+ ASSERT_TRUE(node != nullptr);
+ node = exo::test::get_only_succ<locoex::TFLRelu>(node);
+ ASSERT_TRUE(node != nullptr);
+ node = exo::test::get_only_succ<loco::ReLU6>(node);
+ ASSERT_TRUE(node != nullptr);
+}
+
+TEST(ReluConverterTest, relu_feature_inout)
+{
+ // g = Pull - FeatureEncode - Relu - FeatureDecode - Push
+ exo::test::TestGraph graph;
+ {
+ auto enc = exo::make_feature_encode<exo::FeatureLayout::NHWC>(graph.pull);
+ auto relu = graph.append<loco::ReLU>(enc);
+ auto dec = exo::make_feature_decode<exo::FeatureLayout::NHWC>(relu);
+ graph.complete(dec);
+ }
+
+ auto pull = graph.pull;
+ {
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({1, 2, 3, 4});
+ }
+
+ exo::test::TypeShapeReadyPhase test_phase;
+ {
+ test_phase.add_pass<exo::ReluConverter>();
+ test_phase.run(graph.g.get());
+ }
+
+ // now, g = Pull - FeatureEncode - FeatureDecode - TFLRelu - FeatureEncode - FeatureDecode - Push
+
+ // Check
+ EXO_TEST_ASSERT_NODE_COUNT({graph.push}, 7);
+
+ // Check [FeatureEncode - FeatureDecode - TFLRelu - FeatureEncode - FeatureDecode] chunk
+ loco::Node *node = exo::test::find_first_node_bytype<loco::FeatureEncode>(graph.g.get());
+ ASSERT_TRUE(node != nullptr);
+ node = exo::test::get_only_succ<loco::FeatureDecode>(node);
+ ASSERT_TRUE(node != nullptr);
+ node = exo::test::get_only_succ<locoex::TFLRelu>(node);
+ ASSERT_TRUE(node != nullptr);
+ node = exo::test::get_only_succ<loco::FeatureEncode>(node);
+ ASSERT_TRUE(node != nullptr);
+ node = exo::test::get_only_succ<loco::FeatureDecode>(node);
+ ASSERT_TRUE(node != nullptr);
+}
diff --git a/compiler/exo/src/Conversion/TensorBroadcastConverter.cpp b/compiler/exo/src/Conversion/TensorBroadcastConverter.cpp
new file mode 100644
index 000000000..532332742
--- /dev/null
+++ b/compiler/exo/src/Conversion/TensorBroadcastConverter.cpp
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorBroadcastConverter.h"
+
+#include "Dialect/IR/TFLDialect.h"
+#include "Dialect/IR/TFLNodeVisitor.h"
+
+#include <loco.h>
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/IR/CanonicalNode.h>
+
+#include <set>
+
+namespace
+{
+
+template <class T> loco::TensorBroadcast *input_as_tbc(T *node)
+{
+ loco::TensorBroadcast *tbc = dynamic_cast<loco::TensorBroadcast *>(node->x());
+ if (tbc == nullptr)
+ tbc = dynamic_cast<loco::TensorBroadcast *>(node->y());
+
+ return tbc;
+}
+
+struct Collector final : public locoex::TFLNodeMutableVisitor<void>
+{
+ using NodePair = std::pair<loco::TensorBroadcast *, loco::Node *>;
+
+ void visit(locoex::TFLAdd *node) final
+ {
+ if (auto tbc = input_as_tbc<locoex::TFLAdd>(node))
+ {
+ NodePair pair(tbc, node);
+ candidates.insert(pair);
+ }
+ }
+
+ void visit(locoex::TFLDiv *node) final
+ {
+ if (auto tbc = input_as_tbc<locoex::TFLDiv>(node))
+ {
+ NodePair pair(tbc, node);
+ candidates.insert(pair);
+ }
+ }
+
+ void visit(locoex::TFLMul *node) final
+ {
+ if (auto tbc = input_as_tbc<locoex::TFLMul>(node))
+ {
+ NodePair pair(tbc, node);
+ candidates.insert(pair);
+ }
+ }
+
+ void visit(locoex::TFLSub *node) final
+ {
+ if (auto tbc = input_as_tbc<locoex::TFLSub>(node))
+ {
+ NodePair pair(tbc, node);
+ candidates.insert(pair);
+ }
+ }
+
+ void visit(locoex::TFLMaximum *node) final
+ {
+ if (auto tbc = input_as_tbc<locoex::TFLMaximum>(node))
+ {
+ NodePair pair(tbc, node);
+ candidates.insert(pair);
+ }
+ }
+
+ void visit(locoex::TFLNode *) final { return; }
+
+ std::set<NodePair> candidates;
+};
+
+bool mapping_condition(Collector::NodePair &)
+{
+ // TODO fill condition
+
+ return true;
+}
+
+template <class T> void jump_connection(loco::TensorBroadcast *tbc, T *tflnode)
+{
+ if (tflnode->x() == tbc)
+ tflnode->x(tbc->input());
+ else if (tflnode->y() == tbc)
+ tflnode->y(tbc->input());
+ else
+ assert(false);
+
+ tbc->input(nullptr);
+}
+
+} // namespace
+
+namespace exo
+{
+
+/**
+ * @brief Disconnects loco::TensorBroadcast from the graph if following node
+ * is one of binary node: TFLAdd, TFLSub, TFLMul, TFLDiv, TFLMaximum
+ * and meets condition (TBA)
+ * @note
+ * Before:
+ * x --- TensorBroadcast --- TFLXXX --- output
+ * y ----------------------/
+ *
+ * After:
+ * --- TensorBroadcast ---
+ * x --- TFLXXX --- output
+ * y --/
+ */
+bool TensorBroadcastConverter::run(loco::Graph *graph)
+{
+ Collector collector;
+
+ auto active_nodes = loco::active_nodes(loco::output_nodes(graph));
+
+ for (auto node : active_nodes)
+ {
+ if (node->dialect() == locoex::TFLDialect::get())
+ {
+ auto tfl_node = dynamic_cast<locoex::TFLNode *>(node);
+ tfl_node->accept(&collector);
+ }
+ }
+
+ bool changed = false;
+
+ for (auto pair : collector.candidates)
+ {
+ if (mapping_condition(pair))
+ {
+ loco::TensorBroadcast *tensorbroadcast = pair.first;
+ if (auto tfladd = dynamic_cast<locoex::TFLAdd *>(pair.second))
+ {
+ jump_connection<locoex::TFLAdd>(tensorbroadcast, tfladd);
+ changed = true;
+ }
+ else if (auto tfldiv = dynamic_cast<locoex::TFLDiv *>(pair.second))
+ {
+ jump_connection<locoex::TFLDiv>(tensorbroadcast, tfldiv);
+ changed = true;
+ }
+ else if (auto tflmul = dynamic_cast<locoex::TFLMul *>(pair.second))
+ {
+ jump_connection<locoex::TFLMul>(tensorbroadcast, tflmul);
+ changed = true;
+ }
+ else if (auto tflsub = dynamic_cast<locoex::TFLSub *>(pair.second))
+ {
+ jump_connection<locoex::TFLSub>(tensorbroadcast, tflsub);
+ changed = true;
+ }
+ else if (auto tflmaximum = dynamic_cast<locoex::TFLMaximum *>(pair.second))
+ {
+ jump_connection<locoex::TFLMaximum>(tensorbroadcast, tflmaximum);
+ changed = true;
+ }
+ else
+ {
+ assert(false);
+ }
+ }
+ }
+
+ return changed;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/TensorBroadcastConverter.h b/compiler/exo/src/Conversion/TensorBroadcastConverter.h
new file mode 100644
index 000000000..3cf79b0ba
--- /dev/null
+++ b/compiler/exo/src/Conversion/TensorBroadcastConverter.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TENSOR_BROADCAST_CONVERTER_H__
+#define __TENSOR_BROADCAST_CONVERTER_H__
+
+#include <loco.h>
+#include <logo/Pass.h>
+
+namespace exo
+{
+
+/**
+ * @brief Pass to resolve TensorBroadcast IR
+ */
+class TensorBroadcastConverter : public logo::Pass
+{
+public:
+ virtual const char *name(void) const { return "exo::TensorBroadcastConverter"; }
+
+public:
+ bool run(loco::Graph *graph);
+};
+
+} // namespace exo
+
+#endif //__TENSOR_BROADCAST_CONVERTER_H__
diff --git a/compiler/exo/src/Conversion/TensorConcatConverter.cpp b/compiler/exo/src/Conversion/TensorConcatConverter.cpp
new file mode 100644
index 000000000..1c36b11f8
--- /dev/null
+++ b/compiler/exo/src/Conversion/TensorConcatConverter.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorConcatConverter.h"
+
+#include "GraphBlock.h"
+#include "Check.h"
+
+#include "Dialect/IR/TFLNodes.h"
+
+#include <loco/Service/ShapeInference.h>
+
+namespace exo
+{
+/**
+ * @brief Converts loco::TensorConcat to locoex::TFLConcatenate
+ *
+ * Before:
+ * input:0 ----- loco::TensorConcat ------- C
+ * input:1 ----/
+ *
+ * After:
+ * input:0 ----- locoex::TFLConcatenate --- C
+ * input:1 ----/
+ *
+ * input:0 ----- loco::TensorConcat ---
+ * input:1 ----/
+ *
+ */
+bool TensorConcatConverter::convert(loco::TensorConcat *origin)
+{
+ assert(loco::shape_get(origin).domain() == loco::Domain::Tensor);
+
+ if (!loco::shape_known(origin))
+ {
+ return false;
+ }
+
+ auto tfl_concat = origin->graph()->nodes()->create<locoex::TFLConcatenation>(2);
+ tfl_concat->values(0, origin->lhs());
+ tfl_concat->values(1, origin->rhs());
+ tfl_concat->axis(origin->axis());
+ tfl_concat->fusedActivationFunction(locoex::FusedActFunc::NONE);
+
+ loco::replace(origin).with(tfl_concat);
+
+ origin->lhs(nullptr);
+ origin->rhs(nullptr);
+
+ return true;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/TensorConcatConverter.h b/compiler/exo/src/Conversion/TensorConcatConverter.h
new file mode 100644
index 000000000..6b90f4731
--- /dev/null
+++ b/compiler/exo/src/Conversion/TensorConcatConverter.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSION_TENSORCONCAT_CONVERTER_H__
+#define __CONVERSION_TENSORCONCAT_CONVERTER_H__
+
+#include "CanonicalNodeConverter.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+/**
+ * @brief Convert loco::TensorConcat to TFLConcatenate
+ */
+class TensorConcatConverter : public CanonicalNodeConverter<loco::TensorConcat>
+{
+public:
+ const char *name(void) const final { return "exo::TensorConcatConverter"; }
+
+public:
+ bool convert(loco::TensorConcat *origin) final;
+};
+
+} // namespace exo
+
+#endif // __CONVERSION_TENSORCONCAT_CONVERTER_H__
diff --git a/compiler/exo/src/Conversion/TensorReduceConverter.cpp b/compiler/exo/src/Conversion/TensorReduceConverter.cpp
new file mode 100644
index 000000000..8fcb1682d
--- /dev/null
+++ b/compiler/exo/src/Conversion/TensorReduceConverter.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorReduceConverter.h"
+
+#include "Dialect/IR/TFLNodes.h"
+#include "Check.h"
+
+#include <oops/InternalExn.h>
+
+#include <loco.h>
+#include <loco/Service/ShapeInference.h>
+
+namespace
+{
+
+/**
+ * @brief Convert given TensorReduce as TFLMean
+ *
+ * <Before>
+ * In --- loco::TensorReduce --- Out(s)
+ *
+ * <After>
+ * In -------- locoex::TFLMean --- Out(s)
+ * /
+ * TFLConst ---
+ * (reduction indices)
+ */
+bool convert_as_mean(loco::TensorReduce *origin)
+{
+ EXO_ASSERT(origin->func() == loco::ReduceFunc::Mean, "func should be Mean for this helper");
+ EXO_ASSERT(origin->input(), "TensorReduce has no input");
+
+ auto *graph = origin->graph();
+
+ // Make reduction indicies TFLConst node
+ auto reduction = graph->nodes()->create<locoex::TFLConst>();
+ {
+ auto input_rank = loco::shape_get(origin->input()).as<loco::TensorShape>().rank();
+
+ std::vector<int32_t> red_vec;
+ for (uint32_t axis = 0; axis < input_rank; ++axis)
+ if (origin->axes()->defined(axis))
+ red_vec.push_back(static_cast<int32_t>(axis));
+
+ const loco::DataType S32 = loco::DataType::S32;
+
+ reduction->dtype(S32);
+ reduction->rank(1);
+ reduction->dim(0) = red_vec.size();
+ reduction->size<S32>(red_vec.size());
+ for (uint32_t i = 0; i < red_vec.size(); ++i)
+ reduction->at<S32>(i) = red_vec.at(i);
+ }
+
+ // Make TFLMean node to replace
+ auto mean = graph->nodes()->create<locoex::TFLMean>();
+ mean->input(origin->input());
+ mean->reduction_indices(reduction);
+ mean->keep_dims(true); // Canonical TensorReduce always keep dimensions
+
+ // replace canonical node
+ loco::replace(origin).with(mean);
+ origin->input(nullptr);
+
+ return true;
+}
+
+} // namespace
+
+namespace exo
+{
+
+bool TensorReduceConverter::convert(loco::TensorReduce *origin)
+{
+ if (origin->func() == loco::ReduceFunc::Mean)
+ return convert_as_mean(origin);
+ else
+ INTERNAL_EXN_V("Unsupported ReduceFunc", oops::to_uint32(origin->func()));
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/TensorReduceConverter.h b/compiler/exo/src/Conversion/TensorReduceConverter.h
new file mode 100644
index 000000000..dfd65ad2d
--- /dev/null
+++ b/compiler/exo/src/Conversion/TensorReduceConverter.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TENSOR_REDUCE_CONVERTER__
+#define __TENSOR_REDUCE_CONVERTER__
+
+#include "CanonicalNodeConverter.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+/**
+ * @brief Convert loco::TensorReduce to appropriate TFL reduce operation
+ * @note loco::TensorReduce always keep dimensions
+ *
+ * Currently support:
+ * - When loco::TensorReduce::func() == Mean, convert to TFLMean + TFLConst
+ * - TODO Support other cases
+ */
+class TensorReduceConverter : public CanonicalNodeConverter<loco::TensorReduce>
+{
+public:
+ const char *name(void) const final { return "exo::TensorReduceConverter"; }
+
+public:
+ bool convert(loco::TensorReduce *origin) final;
+};
+
+} // namespace exo
+
+#endif // __TENSOR_REDUCE_CONVERTER__
diff --git a/compiler/exo/src/Conversion/TensorTransposeConverter.cpp b/compiler/exo/src/Conversion/TensorTransposeConverter.cpp
new file mode 100644
index 000000000..25c27fe7e
--- /dev/null
+++ b/compiler/exo/src/Conversion/TensorTransposeConverter.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorTransposeConverter.h"
+
+#include "Dialect/IR/TFLNodes.h"
+
+#include <loco.h>
+#include <loco/Service/ShapeInference.h>
+
+#include <oops/InternalExn.h>
+
+#include <algorithm>
+#include <cassert>
+#include <vector>
+
+namespace
+{
+
+void validate_perm(loco::TensorTranspose *origin)
+{
+ // check perm values are correct
+ std::vector<uint32_t> base_perms; // such as {0, 1, 2, 3, ... }
+ std::vector<uint32_t> perms; // perm values in TensorTranspose
+
+ base_perms.resize(origin->perm()->size());
+ perms.resize(origin->perm()->size());
+ for (loco::TensorAxis x = 0; x < origin->perm()->size(); x++)
+ {
+ base_perms[x] = x;
+ perms[x] = origin->perm()->axis(x);
+ }
+
+ if (!std::is_permutation(base_perms.begin(), base_perms.end(), perms.begin()))
+ INTERNAL_EXN("wrong perm value");
+}
+
+} // namespace
+
+namespace exo
+{
+/**
+ * @brief Converts loco::TensorTranspose to locoex::TFLTranspose
+ */
+bool TensorTransposeConverter::convert(loco::TensorTranspose *origin)
+{
+ auto *graph = origin->graph();
+
+ auto tfl_transpose = graph->nodes()->create<locoex::TFLTranspose>();
+ {
+ // validation
+ {
+ assert(origin->input() != nullptr);
+
+ auto input_rank = loco::shape_get(origin->input()).as<loco::TensorShape>().rank();
+ if (input_rank != origin->perm()->size())
+ INTERNAL_EXN_V("perm size should be same with input rank",
+ oops::to_uint32(origin->perm()->size()));
+
+ validate_perm(origin);
+ }
+
+ tfl_transpose->a(origin->input());
+
+ // perm : set TFLConst
+ auto perm_const = graph->nodes()->create<locoex::TFLConst>();
+ {
+ perm_const->dtype(loco::DataType::S32);
+ perm_const->rank(1);
+ perm_const->dim(0) = origin->perm()->size();
+ perm_const->size<loco::DataType::S32>(origin->perm()->size());
+
+ // add perm values into perm TFLConst
+ for (loco::TensorAxis x = 0; x < origin->perm()->size(); x++)
+ {
+ perm_const->at<loco::DataType::S32>(x) = origin->perm()->axis(x);
+ }
+ }
+ tfl_transpose->perm(perm_const);
+ }
+
+ // replace canonical node
+ loco::replace(origin).with(tfl_transpose);
+ origin->input(nullptr);
+
+ return true;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/TensorTransposeConverter.h b/compiler/exo/src/Conversion/TensorTransposeConverter.h
new file mode 100644
index 000000000..9b61ff38d
--- /dev/null
+++ b/compiler/exo/src/Conversion/TensorTransposeConverter.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSION_TENSORTRANSPOSE_CONVERTER__
+#define __CONVERSION_TENSORTRANSPOSE_CONVERTER__
+
+#include "CanonicalNodeConverter.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+/**
+ * @brief Convert loco::TensorTranspose to locoex::TFLTranspose
+ */
+class TensorTransposeConverter : public CanonicalNodeConverter<loco::TensorTranspose>
+{
+public:
+ const char *name(void) const final { return "exo::TensorTransposeConverter"; }
+
+public:
+ bool convert(loco::TensorTranspose *origin) final;
+};
+
+} // namespace exo
+
+#endif // __CONVERSION_TENSORTRANSPOSE_CONVERTER__
diff --git a/compiler/exo/src/Conversion/TransposedConv2DConverter.cpp b/compiler/exo/src/Conversion/TransposedConv2DConverter.cpp
new file mode 100644
index 000000000..c03b64f48
--- /dev/null
+++ b/compiler/exo/src/Conversion/TransposedConv2DConverter.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TransposedConv2DConverter.h"
+
+#include "Dialect/IR/TFLNodes.h"
+
+#include "GraphBlock.h"
+
+#include <loco.h>
+#include <loco/Service/ShapeInference.h>
+
+namespace exo
+{
+
+bool TransposedConv2DConverter::convert(loco::TransposedConv2D *origin)
+{
+ // Shape is required to set origin->inputSizes()
+ if (not loco::shape_known(origin))
+ return false;
+
+ if ((origin->ifm() == nullptr) or (origin->ker() == nullptr))
+ return false;
+
+ auto *graph = origin->graph();
+
+ auto tfl_tr_conv = graph->nodes()->create<locoex::TFLTransposeConv>();
+ {
+ tfl_tr_conv->stride()->w(origin->stride()->horizontal());
+ tfl_tr_conv->stride()->h(origin->stride()->vertical());
+
+ auto pad = origin->pad();
+ if (pad->bottom() == 0 && pad->top() == 0 && pad->left() == 0 && pad->right() == 0)
+ tfl_tr_conv->padding(locoex::Padding::VALID);
+ else
+ // TODO This is necessary, but not sufficient condition. More rigorous check required
+ tfl_tr_conv->padding(locoex::Padding::SAME);
+ }
+
+ // let's create a new graph connection with tfl_tr_conv
+ {
+ // Make inputSizes from shape of origin
+ auto input_sizes_const = graph->nodes()->create<locoex::TFLConst>();
+ auto origin_shape = loco::shape_get(origin).as<loco::FeatureShape>();
+
+ const loco::DataType S32 = loco::DataType::S32;
+
+ input_sizes_const->dtype(S32);
+ input_sizes_const->rank(1);
+ input_sizes_const->dim(0) = 4;
+ input_sizes_const->size<S32>(4);
+ // Note that NHWC is layout for inputSizes determined by tflite format
+ input_sizes_const->at<S32>(0) = origin_shape.count().value(); // N
+ input_sizes_const->at<S32>(1) = origin_shape.height().value(); // H
+ input_sizes_const->at<S32>(2) = origin_shape.width().value(); // W
+ input_sizes_const->at<S32>(3) = origin_shape.depth().value(); // C
+
+ tfl_tr_conv->inputSizes(input_sizes_const);
+
+ // filter
+ auto filter_dec = make_filter_decode<FilterLayout::OHWI>(origin->ker());
+ tfl_tr_conv->filter(filter_dec);
+
+ // outBackprop
+ auto feature_dec = make_feature_decode<FeatureLayout::NHWC>(origin->ifm());
+ tfl_tr_conv->outBackprop(feature_dec);
+
+ // output
+ auto feature_enc = make_feature_encode<FeatureLayout::NHWC>(tfl_tr_conv);
+
+ // replace canonical node
+ loco::replace(origin).with(feature_enc);
+ origin->ifm(nullptr);
+ }
+
+ return true;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Conversion/TransposedConv2DConverter.h b/compiler/exo/src/Conversion/TransposedConv2DConverter.h
new file mode 100644
index 000000000..f51e0a5bc
--- /dev/null
+++ b/compiler/exo/src/Conversion/TransposedConv2DConverter.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSION_TRANSPOSEDCONV2D_CONVERTER__
+#define __CONVERSION_TRANSPOSEDCONV2D_CONVERTER__
+
+#include "CanonicalNodeConverter.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+/**
+ * @brief Convert loco::TransposedConv2D to locoex::TFLTransposeConv and auxiliary
+ *
+ *
+ * <BEFORE>
+ *
+ * IFM ------- TransposedConv2D --- OFM
+ * (Feature) / (Feature)
+ * /
+ * KER ------
+ * (Filter)
+ *
+ *
+ * <AFTER>
+ *
+ * out_backprop : IFM ------- FeatureDecode --- TFLTransposeConv --- FeatureEncode --- OFM
+ * [Feature] [Tensor] / / [Tensor] [Feature]
+ * / /
+ * filter: KER ------- FilterDecode --- /
+ * [Filter] [Tensor] /
+ * /
+ * input_sizes : TFLConst (new) ------------
+ * [Tensor]
+ */
+class TransposedConv2DConverter : public CanonicalNodeConverter<loco::TransposedConv2D>
+{
+public:
+ const char *name(void) const final { return "exo::TransposedConv2DConverter"; }
+
+public:
+ bool convert(loco::TransposedConv2D *origin) final;
+};
+
+} // namespace exo
+
+#endif // __CONVERSION_TRANSPOSEDCONV2D_CONVERTER__
diff --git a/compiler/exo/src/Conversions.h b/compiler/exo/src/Conversions.h
new file mode 100644
index 000000000..8eb4ed2e4
--- /dev/null
+++ b/compiler/exo/src/Conversions.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERSIONS_H__
+#define __CONVERSIONS_H__
+
+#include "Conversion/AvgPool2DConverter.h"
+#include "Conversion/ConstGenConverter.h"
+#include "Conversion/Conv2DConverter.h"
+#include "Conversion/DepthwiseConv2DConverter.h"
+// TODO loco::DepthwiseFilterEncode
+#include "Conversion/EltwiseAddConverter.h"
+#include "Conversion/EltwiseDivConverter.h"
+#include "Conversion/EltwiseMaxConverter.h"
+#include "Conversion/EltwiseMulConverter.h"
+#include "Conversion/EltwiseSqrtConverter.h"
+#include "Conversion/EltwiseSubConverter.h"
+#include "Conversion/FeatureBiasAddConverter.h"
+// TODO loco::FixedReshape
+#include "Conversion/MatMulConverter.h"
+#include "Conversion/MaxPool2DConverter.h"
+#include "Conversion/ReluConverter.h"
+#include "Conversion/Relu6Converter.h"
+// TODO loco::Tanh
+#include "Conversion/TensorConcatConverter.h"
+// TODO loco::TensorBiasAdd
+#include "Conversion/TensorBroadcastConverter.h"
+#include "Conversion/TensorReduceConverter.h"
+// TODO loco::TensorSoftmax
+#include "Conversion/TensorTransposeConverter.h"
+#include "Conversion/TransposedConv2DConverter.h"
+
+#endif // __CONVERSIONS_H__
diff --git a/compiler/exo/src/Convert.cpp b/compiler/exo/src/Convert.cpp
new file mode 100644
index 000000000..45f0481f4
--- /dev/null
+++ b/compiler/exo/src/Convert.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Convert.h"
+
+#include "Conversions.h"
+#include "Pass/ShapeInferencePass.h"
+#include "Pass/TypeInferencePass.h"
+#include "ProgressReporter.h"
+#include "Knob.h"
+
+#include <loco.h>
+#include <loco/Service/ShapeInference.h>
+#include <loco/Service/CanonicalShapeInferenceRule.h>
+#include <loco/Service/TypeInference.h>
+
+#include <logo/SimplifyDomainConversionPass.h>
+#include <logo/RemoveDeadNodePass.h>
+#include <logo/RemoveForwardNodePass.h>
+
+#include <logo/Phase.h>
+#include <stdex/Memory.h>
+
+namespace exo
+{
+
+void convert_to_TFLNodes(loco::Graph *graph)
+{
+ // run Shape and Type inference must be run before conversion
+ loco::CanonicalShapeInferenceRule shape_rule;
+ loco::apply(&shape_rule).to(graph);
+
+ loco::CanonicalTypeInferenceRule type_rule;
+ loco::apply(&type_rule).to(graph);
+
+ logo::Phase phase;
+ {
+ // prepare type and shape before conversion
+ phase.emplace_back(stdex::make_unique<TypeInferencePass>());
+ phase.emplace_back(stdex::make_unique<ShapeInferencePass>());
+
+ // Add converters for canonical nodes. Note: Not all loco canonical nodes are listed.
+ phase.emplace_back(stdex::make_unique<AvgPool2DConverter>());
+ phase.emplace_back(stdex::make_unique<ConstGenConverter>());
+ phase.emplace_back(stdex::make_unique<Conv2DConverter>());
+ phase.emplace_back(stdex::make_unique<DepthwiseConv2DConverter>());
+ // TODO loco::DepthwiseFilterEncode
+ phase.emplace_back(stdex::make_unique<EltwiseAddConverter>());
+ phase.emplace_back(stdex::make_unique<EltwiseDivConverter>());
+ phase.emplace_back(stdex::make_unique<EltwiseMaxConverter>());
+ phase.emplace_back(stdex::make_unique<EltwiseMulConverter>());
+ phase.emplace_back(stdex::make_unique<EltwiseSqrtConverter>());
+ phase.emplace_back(stdex::make_unique<EltwiseSubConverter>());
+ phase.emplace_back(stdex::make_unique<FeatureBiasAddConverter>());
+ // TODO loco::FixedReshape
+ phase.emplace_back(stdex::make_unique<MatMulConverter>());
+ phase.emplace_back(stdex::make_unique<MaxPool2DConverter>());
+ phase.emplace_back(stdex::make_unique<ReluConverter>());
+ phase.emplace_back(stdex::make_unique<Relu6Converter>());
+ // TODO loco::Tanh
+ phase.emplace_back(stdex::make_unique<TensorConcatConverter>());
+ // TODO loco::TensorBiasAdd
+ phase.emplace_back(stdex::make_unique<TensorBroadcastConverter>());
+ phase.emplace_back(stdex::make_unique<TensorReduceConverter>());
+ // TODO loco::TensorSoftmax
+ phase.emplace_back(stdex::make_unique<TensorTransposeConverter>());
+ phase.emplace_back(stdex::make_unique<TransposedConv2DConverter>());
+
+ // Add optimization below
+ phase.emplace_back(stdex::make_unique<logo::SimplifyDomainConversionPass>());
+ phase.emplace_back(stdex::make_unique<logo::RemoveForwardNodePass>());
+ phase.emplace_back(stdex::make_unique<logo::RemoveDeadNodePass>());
+ }
+
+ logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{graph};
+
+ ProgressReporter prog(graph, logo::PhaseStrategy::Restart);
+ phase_runner.attach(&prog);
+ phase_runner.run(phase);
+
+ // TODO Assert if all canonical nodes are converted to TFL node
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Convert.h b/compiler/exo/src/Convert.h
new file mode 100644
index 000000000..7038f9cf7
--- /dev/null
+++ b/compiler/exo/src/Convert.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERT_H__
+#define __CONVERT_H__
+
+#include <loco.h>
+
+namespace exo
+{
+
+void convert_to_TFLNodes(loco::Graph *graph);
+
+} // namespace exo
+
+#endif // __CONVERT_H__
diff --git a/compiler/exo/src/Dialect/IR/CircleDialect.cpp b/compiler/exo/src/Dialect/IR/CircleDialect.cpp
new file mode 100644
index 000000000..ecd43b0a3
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/CircleDialect.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleDialect.h"
+
+namespace locoex
+{
+
+loco::Dialect *CircleDialect::get(void)
+{
+ static CircleDialect d;
+ return &d;
+}
+
+} // namespace locoex
diff --git a/compiler/exo/src/Dialect/IR/CircleDialect.h b/compiler/exo/src/Dialect/IR/CircleDialect.h
new file mode 100644
index 000000000..9857d9e6d
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/CircleDialect.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_IR_CIRCLEDIALECT_H__
+#define __LOCOEX_IR_CIRCLEDIALECT_H__
+
+#include <loco/IR/Dialect.h>
+
+namespace locoex
+{
+
+class CircleDialect final : public loco::Dialect
+{
+private:
+ CircleDialect() = default;
+
+public:
+ CircleDialect(const CircleDialect &) = delete;
+ CircleDialect(CircleDialect &&) = delete;
+
+public:
+ static loco::Dialect *get(void);
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_IR_CIRCLEDIALECT_H__
diff --git a/compiler/exo/src/Dialect/IR/CircleDialect.test.cpp b/compiler/exo/src/Dialect/IR/CircleDialect.test.cpp
new file mode 100644
index 000000000..6132eb361
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/CircleDialect.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleDialectTest, get)
+{
+ using locoex::CircleDialect;
+
+ auto d = CircleDialect::get();
+
+ // get() SHOULD return a valid(non-null) pointer
+ ASSERT_NE(d, nullptr);
+ // The return value SHOULD be stable across multiple invocations
+ ASSERT_EQ(d, CircleDialect::get());
+}
diff --git a/compiler/exo/src/Dialect/IR/CircleNode.cpp b/compiler/exo/src/Dialect/IR/CircleNode.cpp
new file mode 100644
index 000000000..cdcd434ea
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/CircleNode.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleNode.h"
+
+#include "CircleDialect.h"
+
+namespace locoex
+{
+
+const loco::Dialect *CircleNode::dialect(void) const { return CircleDialect::get(); }
+
+} // namespace locoex
diff --git a/compiler/exo/src/Dialect/IR/CircleNode.h b/compiler/exo/src/Dialect/IR/CircleNode.h
new file mode 100644
index 000000000..1ae9d38bd
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/CircleNode.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_IR_CIRCLENODE_H__
+#define __LOCOEX_IR_CIRCLENODE_H__
+
+#include "CircleNodeDecl.h"
+#include "CircleNodeImpl.h"
+
+#endif // __LOCOEX_IR_CIRCLENODE_H__
diff --git a/compiler/exo/src/Dialect/IR/CircleNodeDecl.h b/compiler/exo/src/Dialect/IR/CircleNodeDecl.h
new file mode 100644
index 000000000..358b1f0ce
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/CircleNodeDecl.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_IR_CIRCLENODEDECL_H__
+#define __LOCOEX_IR_CIRCLENODEDECL_H__
+
+#include <loco/IR/Node.h>
+#include <loco/IR/Dialect.h>
+
+#include "CircleOpcode.h"
+#include "CircleNodeVisitor.forward.h"
+
+namespace locoex
+{
+
+struct CircleNode : public loco::Node
+{
+ virtual ~CircleNode() = default;
+
+ const loco::Dialect *dialect(void) const final;
+ virtual CircleOpcode opcode(void) const = 0;
+
+ template <typename T> T accept(CircleNodeVisitorBase<T> *) const;
+ template <typename T> T accept(CircleNodeMutableVisitorBase<T> *);
+};
+
+template <CircleOpcode Code> struct CircleNodeImpl : public CircleNode
+{
+ virtual ~CircleNodeImpl() = default;
+
+ uint32_t opnum(void) const final { return static_cast<uint32_t>(Code); }
+ CircleOpcode opcode(void) const final { return Code; }
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_IR_CIRCLENODEDECL_H__
diff --git a/compiler/exo/src/Dialect/IR/CircleNodeImpl.h b/compiler/exo/src/Dialect/IR/CircleNodeImpl.h
new file mode 100644
index 000000000..d9f487111
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/CircleNodeImpl.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_IR_CIRCLENODEIMPL_H__
+#define __LOCOEX_IR_CIRCLENODEIMPL_H__
+
+#include "CircleNodes.h"
+#include "CircleNodeVisitor.h"
+
+#include <oops/InternalExn.h>
+
+#include <cassert>
+
+namespace locoex
+{
+
+template <typename T> T CircleNode::accept(CircleNodeVisitorBase<T> *v) const
+{
+ switch (this->opcode())
+ {
+#define CIRCLE_NODE(OPCODE, CLASS) \
+ \
+ case CircleOpcode::OPCODE: \
+ return v->visit(dynamic_cast<const CLASS *>(this));
+
+#include "CircleNodes.lst"
+#undef CIRCLE_NODE
+
+ default:
+ break;
+ }
+
+ INTERNAL_EXN("CircleNode::accept(CircleNodeVisitorBase) not handled");
+}
+
+template <typename T> T CircleNode::accept(CircleNodeMutableVisitorBase<T> *v)
+{
+ switch (this->opcode())
+ {
+#define CIRCLE_NODE(OPCODE, CLASS) \
+ \
+ case CircleOpcode::OPCODE: \
+ return v->visit(dynamic_cast<CLASS *>(this));
+
+#include "CircleNodes.lst"
+#undef CIRCLE_NODE
+
+ default:
+ break;
+ }
+
+ INTERNAL_EXN("CircleNode::accept(CircleNodeMutableVisitorBase) not handled");
+}
+
+} // namespace locoex
+
+#endif // __LOCOEX_IR_CIRCLENODEIMPL_H__
diff --git a/compiler/exo/src/Dialect/IR/CircleNodeVisitor.forward.h b/compiler/exo/src/Dialect/IR/CircleNodeVisitor.forward.h
new file mode 100644
index 000000000..8ae28abf3
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/CircleNodeVisitor.forward.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_IR_CIRCLENODE_VISITOR_FORWARD_H__
+#define __LOCOEX_IR_CIRCLENODE_VISITOR_FORWARD_H__
+
+namespace locoex
+{
+
+// NOTE These forward declarations SHOULD BE aligned with Node delcarations in
+// "CircleNodeVisitor.h"
+template <typename T> struct CircleNodeVisitorBase;
+template <typename T> struct CircleNodeMutableVisitorBase;
+
+} // namespace locoex
+
+#endif // __LOCOEX_IR_CIRCLENODE_VISITOR_FORWARD_H__
diff --git a/compiler/exo/src/Dialect/IR/CircleNodeVisitor.h b/compiler/exo/src/Dialect/IR/CircleNodeVisitor.h
new file mode 100644
index 000000000..fc70c9ebc
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/CircleNodeVisitor.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_IR_CIRCLENODE_VISITOR_H__
+#define __LOCOEX_IR_CIRCLENODE_VISITOR_H__
+
+#include "CircleNode.h"
+#include "CircleNodes.h"
+
+#include <oops/InternalExn.h>
+
+namespace locoex
+{
+
+/**
+ * DO NOT use this class. Use CircleNodeVisitor instead.
+ */
+template <typename T> struct CircleNodeVisitorBase
+{
+ virtual ~CircleNodeVisitorBase() = default;
+
+#define CIRCLE_NODE(OPCODE, Circle_CLASS) virtual T visit(const Circle_CLASS *) = 0;
+
+#include "CircleNodes.lst"
+#undef CIRCLE_NODE
+};
+
+template <typename T> struct CircleNodeVisitor : public CircleNodeVisitorBase<T>
+{
+ virtual ~CircleNodeVisitor() = default;
+
+#define CIRCLE_NODE(OPCODE, Circle_CLASS) \
+ \
+ virtual T visit(const Circle_CLASS *node) { return visit(static_cast<const CircleNode *>(node)); }
+
+#include "CircleNodes.lst"
+#undef CIRCLE_NODE
+
+ /// @brief Default fallback
+ virtual T visit(const CircleNode *) { INTERNAL_EXN("CircleNodeVisistor: NYI node"); }
+};
+
+/**
+ * DO NOT use this class. Use CircleNodeMutableVisitor instead.
+ */
+template <typename T> struct CircleNodeMutableVisitorBase
+{
+ virtual ~CircleNodeMutableVisitorBase() = default;
+
+#define CIRCLE_NODE(OPCODE, Circle_CLASS) virtual T visit(Circle_CLASS *) = 0;
+
+#include "CircleNodes.lst"
+#undef CIRCLE_NODE
+};
+
+template <typename T> struct CircleNodeMutableVisitor : public CircleNodeMutableVisitorBase<T>
+{
+ virtual ~CircleNodeMutableVisitor() = default;
+
+#define CIRCLE_NODE(OPCODE, Circle_CLASS) \
+ \
+ virtual T visit(Circle_CLASS *node) { return visit(static_cast<CircleNode *>(node)); }
+
+#include "CircleNodes.lst"
+#undef CIRCLE_NODE
+
+ /// @brief Default fallback
+ virtual T visit(CircleNode *) { INTERNAL_EXN("CircleMutableNodeVisistor: NYI node"); }
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_IR_CIRCLENODE_VISITOR_H__
diff --git a/compiler/exo/src/Dialect/IR/CircleNodes.cpp b/compiler/exo/src/Dialect/IR/CircleNodes.cpp
new file mode 100644
index 000000000..bba59ff4d
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/CircleNodes.cpp
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This is to validate CircleNodes.h
+#include "CircleNodes.h"
diff --git a/compiler/exo/src/Dialect/IR/CircleNodes.h b/compiler/exo/src/Dialect/IR/CircleNodes.h
new file mode 100644
index 000000000..7be093103
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/CircleNodes.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_IR_CIRCLENODES_H__
+#define __LOCOEX_IR_CIRCLENODES_H__
+
+#include "CircleNodeDecl.h"
+#include "CircleOpcode.h"
+
+#include "FusedActFunc.h"
+#include "NodeMixins.h" // FixedArityNode
+
+#include <loco/IR/Node.h>
+
+namespace locoex
+{
+
+/// @brief enumeration of mixin class
+enum class CircleNodeTrait
+{
+ FusedActFunc,
+};
+
+template <CircleNodeTrait T> class CircleNodeMixin;
+
+template <> class CircleNodeMixin<CircleNodeTrait::FusedActFunc>
+{
+public:
+ CircleNodeMixin() = default;
+
+public:
+ FusedActFunc fusedActivationFunction() const { return _fused_act_fun; }
+ void fusedActivationFunction(FusedActFunc fused_act_fun) { _fused_act_fun = fused_act_fun; }
+
+private:
+ FusedActFunc _fused_act_fun = FusedActFunc::UNDEFINED;
+};
+
+/**
+ * @brief INSTANCE_NORM in circle
+ */
+class CircleInstanceNorm final
+ : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::INSTANCE_NORM>>,
+ public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
+{
+public:
+ /// @note Currently only support FLOAT32 as input node
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *gamma(void) const { return at(1)->node(); }
+ void gamma(loco::Node *node) { at(1)->node(node); }
+
+ loco::Node *beta(void) const { return at(2)->node(); }
+ void beta(loco::Node *node) { at(2)->node(node); }
+
+ float epsilon() const { return _epsilon; }
+ void epsilon(float epsilon) { _epsilon = epsilon; }
+
+private:
+ float _epsilon = 1e-05;
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_IR_CIRCLENODES_H__
diff --git a/compiler/exo/src/Dialect/IR/CircleNodes.lst b/compiler/exo/src/Dialect/IR/CircleNodes.lst
new file mode 100644
index 000000000..96baf2917
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/CircleNodes.lst
@@ -0,0 +1,8 @@
+#ifndef CIRCLE_NODE
+#error "Define CIRCLE_NODE"
+#endif // CIRCLE_NODE
+
+//
+// PLEASE SORT NODE DECLS IN ALPHABETICAL ORDER
+//
+CIRCLE_NODE(INSTANCE_NORM, locoex::CircleInstanceNorm)
diff --git a/compiler/exo/src/Dialect/IR/CircleNodes.test.cpp b/compiler/exo/src/Dialect/IR/CircleNodes.test.cpp
new file mode 100644
index 000000000..b63e7ccae
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/CircleNodes.test.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleNodes.h"
+
+#include "CircleDialect.h"
+#include "CircleOpcode.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleInstanceNormTest, constructor)
+{
+ locoex::CircleInstanceNorm instance_norm;
+
+ ASSERT_EQ(instance_norm.dialect(), locoex::CircleDialect::get());
+ ASSERT_EQ(instance_norm.opcode(), locoex::CircleOpcode::INSTANCE_NORM);
+
+ ASSERT_EQ(instance_norm.input(), nullptr);
+ ASSERT_EQ(instance_norm.gamma(), nullptr);
+ ASSERT_EQ(instance_norm.beta(), nullptr);
+ ASSERT_FLOAT_EQ(instance_norm.epsilon(), 1e-05);
+ ASSERT_EQ(instance_norm.fusedActivationFunction(), locoex::FusedActFunc::UNDEFINED);
+}
diff --git a/compiler/exo/src/Dialect/IR/CircleOpcode.h b/compiler/exo/src/Dialect/IR/CircleOpcode.h
new file mode 100644
index 000000000..264304049
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/CircleOpcode.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_IR_CIRCLEOPCODE_H__
+#define __LOCOEX_IR_CIRCLEOPCODE_H__
+
+namespace locoex
+{
+
+enum class CircleOpcode
+{
+#define CIRCLE_NODE(OPCODE, CLASS) OPCODE,
+#include "CircleNodes.lst"
+#undef CIRCLE_NODE
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_IR_CIRCLEOPCODE_H__
diff --git a/compiler/exo/src/Dialect/IR/FusedActFunc.h b/compiler/exo/src/Dialect/IR/FusedActFunc.h
new file mode 100644
index 000000000..b73a0799e
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/FusedActFunc.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DIALECT_IR_FUSEDACTFUNC_H__
+#define __DIALECT_IR_FUSEDACTFUNC_H__
+
+namespace locoex
+{
+
+// TODO Divide into TFL version and Circle version when they go different approach
+enum class FusedActFunc
+{
+ UNDEFINED, // This is not defined by TFLite or Circle. This was added to
+ // prevent programming error.
+ NONE,
+ RELU,
+ RELU6
+};
+
+} // namespace locoex
+
+#endif // __DIALECT_IR_FUSEDACTFUNC_H__
diff --git a/compiler/exo/src/Dialect/IR/NodeMixins.cpp b/compiler/exo/src/Dialect/IR/NodeMixins.cpp
new file mode 100644
index 000000000..cdfe0d8d1
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/NodeMixins.cpp
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This is to validate NodeMixins.h
+#include "NodeMixins.h"
diff --git a/compiler/exo/src/Dialect/IR/NodeMixins.h b/compiler/exo/src/Dialect/IR/NodeMixins.h
new file mode 100644
index 000000000..c35daebc6
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/NodeMixins.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DIALECT_IR_NODEMIXINS_H__
+#define __DIALECT_IR_NODEMIXINS_H__
+
+#include <loco/IR/Node.h>
+
+namespace locoex
+{
+
+/**
+ * @brief Nodes with the fixed number of inputs
+ *
+ * TODO Deprecated this class, and use loco::FixedArity instead
+ */
+template <unsigned N, typename Base> class FixedArityNode : public Base
+{
+public:
+ FixedArityNode()
+ {
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ _args[n] = std::unique_ptr<loco::Use>(new loco::Use{this});
+ }
+ }
+
+ virtual ~FixedArityNode() = default;
+
+public:
+ unsigned arity(void) const final { return N; }
+
+ loco::Node *arg(uint32_t n) const final { return _args.at(n)->node(); }
+
+ void drop(void) final
+ {
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ _args.at(n)->node(nullptr);
+ }
+ }
+
+protected:
+ // This API allows inherited classes to access "_args" field.
+ loco::Use *at(unsigned n) const { return _args.at(n).get(); }
+
+private:
+ std::array<std::unique_ptr<loco::Use>, N> _args;
+};
+
+} // namespace locoex
+
+#endif // __DIALECT_IR_NODEMIXINS_H__
diff --git a/compiler/exo/src/Dialect/IR/TFLDialect.cpp b/compiler/exo/src/Dialect/IR/TFLDialect.cpp
new file mode 100644
index 000000000..8cbf9a364
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/TFLDialect.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFLDialect.h"
+
+namespace locoex
+{
+
+loco::Dialect *TFLDialect::get(void)
+{
+ static TFLDialect d;
+ return &d;
+}
+
+} // namespace locoex
diff --git a/compiler/exo/src/Dialect/IR/TFLDialect.h b/compiler/exo/src/Dialect/IR/TFLDialect.h
new file mode 100644
index 000000000..96463a9f9
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/TFLDialect.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_IR_TFLDIALECT_H__
+#define __LOCOEX_IR_TFLDIALECT_H__
+
+#include <loco/IR/Dialect.h>
+
+namespace locoex
+{
+
+class TFLDialect final : public loco::Dialect
+{
+private:
+ TFLDialect() = default;
+
+public:
+ TFLDialect(const TFLDialect &) = delete;
+ TFLDialect(TFLDialect &&) = delete;
+
+public:
+ static loco::Dialect *get(void);
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_IR_TFLDIALECT_H__
diff --git a/compiler/exo/src/Dialect/IR/TFLDialect.test.cpp b/compiler/exo/src/Dialect/IR/TFLDialect.test.cpp
new file mode 100644
index 000000000..136721e2d
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/TFLDialect.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFLDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFLDialectTest, get)
+{
+ using locoex::TFLDialect;
+
+ auto d = TFLDialect::get();
+
+ // get() SHOULD return a valid(non-null) pointer
+ ASSERT_NE(d, nullptr);
+ // The return value SHOULD be stable across multiple invocations
+ ASSERT_EQ(d, TFLDialect::get());
+}
diff --git a/compiler/exo/src/Dialect/IR/TFLNode.cpp b/compiler/exo/src/Dialect/IR/TFLNode.cpp
new file mode 100644
index 000000000..82d5f1eba
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/TFLNode.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFLNode.h"
+
+#include "TFLDialect.h"
+
+namespace locoex
+{
+
+const loco::Dialect *TFLNode::dialect(void) const { return TFLDialect::get(); }
+
+} // namespace locoex
diff --git a/compiler/exo/src/Dialect/IR/TFLNode.h b/compiler/exo/src/Dialect/IR/TFLNode.h
new file mode 100644
index 000000000..eff69b1a5
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/TFLNode.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_IR_TFLNODE_H__
+#define __LOCOEX_IR_TFLNODE_H__
+
+#include "TFLNodeDecl.h"
+#include "TFLNodeImpl.h"
+
+#endif // __LOCOEX_IR_TFLNODE_H__
diff --git a/compiler/exo/src/Dialect/IR/TFLNodeDecl.h b/compiler/exo/src/Dialect/IR/TFLNodeDecl.h
new file mode 100644
index 000000000..d13900ab3
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/TFLNodeDecl.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_IR_TFLNODEDECL_H__
+#define __LOCOEX_IR_TFLNODEDECL_H__
+
+#include <loco/IR/Node.h>
+#include <loco/IR/Dialect.h>
+
+#include "TFLOpcode.h"
+#include "TFLNodeVisitor.forward.h"
+
+namespace locoex
+{
+
+struct TFLNode : public loco::Node
+{
+ virtual ~TFLNode() = default;
+
+ const loco::Dialect *dialect(void) const final;
+ virtual TFLOpcode opcode(void) const = 0;
+
+ template <typename T> T accept(TFLNodeVisitorBase<T> *) const;
+ template <typename T> T accept(TFLNodeMutableVisitorBase<T> *);
+};
+
+template <TFLOpcode Code> struct TFLNodeImpl : public TFLNode
+{
+ virtual ~TFLNodeImpl() = default;
+
+ uint32_t opnum(void) const final { return static_cast<uint32_t>(Code); }
+ TFLOpcode opcode(void) const final { return Code; }
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_IR_TFLNODEDECL_H__
diff --git a/compiler/exo/src/Dialect/IR/TFLNodeImpl.h b/compiler/exo/src/Dialect/IR/TFLNodeImpl.h
new file mode 100644
index 000000000..63388279a
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/TFLNodeImpl.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_IR_TFLNODEIMPL_H__
+#define __LOCOEX_IR_TFLNODEIMPL_H__
+
+#include "TFLNodes.h"
+#include "TFLNodeVisitor.h"
+
+#include <oops/InternalExn.h>
+
+#include <cassert>
+
+namespace locoex
+{
+
+template <typename T> T TFLNode::accept(TFLNodeVisitorBase<T> *v) const
+{
+ switch (this->opcode())
+ {
+#define TFL_NODE(OPCODE, CLASS) \
+ \
+ case TFLOpcode::OPCODE: \
+ return v->visit(dynamic_cast<const CLASS *>(this));
+
+#include "TFLNodes.lst"
+#undef TFL_NODE
+
+ default:
+ break;
+ }
+
+ INTERNAL_EXN("TFLNode::accept(TFLNodeVisitorBase) not handled");
+}
+
+template <typename T> T TFLNode::accept(TFLNodeMutableVisitorBase<T> *v)
+{
+ switch (this->opcode())
+ {
+#define TFL_NODE(OPCODE, CLASS) \
+ \
+ case TFLOpcode::OPCODE: \
+ return v->visit(dynamic_cast<CLASS *>(this));
+
+#include "TFLNodes.lst"
+#undef TFL_NODE
+
+ default:
+ break;
+ }
+
+ INTERNAL_EXN("TFLNode::accept(TFLNodeMutableVisitorBase) not handled");
+}
+
+} // namespace locoex
+
+#endif // __LOCOEX_IR_TFLNODEIMPL_H__
diff --git a/compiler/exo/src/Dialect/IR/TFLNodeVisitor.forward.h b/compiler/exo/src/Dialect/IR/TFLNodeVisitor.forward.h
new file mode 100644
index 000000000..e98057bc3
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/TFLNodeVisitor.forward.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_IR_TFLNODE_VISITOR_FORWARD_H__
+#define __LOCOEX_IR_TFLNODE_VISITOR_FORWARD_H__
+
+namespace locoex
+{
+
+// NOTE These forward declarations SHOULD BE aligned with Node delcarations in
+// "TFLNodeVisitor.h"
+template <typename T> struct TFLNodeVisitorBase;
+template <typename T> struct TFLNodeMutableVisitorBase;
+
+} // namespace locoex
+
+#endif // __LOCOEX_IR_TFLNODE_VISITOR_FORWARD_H__
diff --git a/compiler/exo/src/Dialect/IR/TFLNodeVisitor.h b/compiler/exo/src/Dialect/IR/TFLNodeVisitor.h
new file mode 100644
index 000000000..e1f5959c0
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/TFLNodeVisitor.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_IR_TFLNODE_VISITOR_H__
+#define __LOCOEX_IR_TFLNODE_VISITOR_H__
+
+#include "TFLNode.h"
+#include "TFLNodes.h"
+
+#include <oops/InternalExn.h>
+
+namespace locoex
+{
+
+/**
+ * DO NOT use this class. Use TFLNodeVisitor instead.
+ */
+template <typename T> struct TFLNodeVisitorBase
+{
+ virtual ~TFLNodeVisitorBase() = default;
+
+#define TFL_NODE(OPCODE, TFL_CLASS) virtual T visit(const TFL_CLASS *) = 0;
+
+#include "TFLNodes.lst"
+#undef TFL_NODE
+};
+
+template <typename T> struct TFLNodeVisitor : public TFLNodeVisitorBase<T>
+{
+ virtual ~TFLNodeVisitor() = default;
+
+#define TFL_NODE(OPCODE, TFL_CLASS) \
+ \
+ virtual T visit(const TFL_CLASS *node) { return visit(static_cast<const TFLNode *>(node)); }
+
+#include "TFLNodes.lst"
+#undef TFL_NODE
+
+ /// @brief Default fallback
+ virtual T visit(const TFLNode *) { INTERNAL_EXN("TFLNodeVisitor: NYI node"); }
+};
+
+/**
+ * DO NOT use this class. Use TFLNodeMutableVisitor instead.
+ */
+template <typename T> struct TFLNodeMutableVisitorBase
+{
+ virtual ~TFLNodeMutableVisitorBase() = default;
+
+#define TFL_NODE(OPCODE, TFL_CLASS) virtual T visit(TFL_CLASS *) = 0;
+
+#include "TFLNodes.lst"
+#undef TFL_NODE
+};
+
+template <typename T> struct TFLNodeMutableVisitor : public TFLNodeMutableVisitorBase<T>
+{
+ virtual ~TFLNodeMutableVisitor() = default;
+
+#define TFL_NODE(OPCODE, TFL_CLASS) \
+ \
+ virtual T visit(TFL_CLASS *node) { return visit(static_cast<TFLNode *>(node)); }
+
+#include "TFLNodes.lst"
+#undef TFL_NODE
+
+ /// @brief Default fallback
+ virtual T visit(TFLNode *) { INTERNAL_EXN("TFLNodeMutableVisitor: NYI node"); }
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_IR_TFLNODE_VISITOR_H__
diff --git a/compiler/exo/src/Dialect/IR/TFLNodes.cpp b/compiler/exo/src/Dialect/IR/TFLNodes.cpp
new file mode 100644
index 000000000..f385ce0d9
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/TFLNodes.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFLNodes.h"
+
+#include "Check.h"
+
+#include <loco.h>
+
+#include <cassert>
+
+namespace locoex
+{
+
+template <loco::DataType DT> uint32_t TFLConst::size(void) const
+{
+ assert(dtype() == DT);
+ assert(_data.size() % sizeof(typename loco::DataTypeImpl<DT>::Type) == 0);
+ return _data.size() / sizeof(typename loco::DataTypeImpl<DT>::Type);
+}
+
+template <loco::DataType DT> void TFLConst::size(uint32_t l)
+{
+ assert(dtype() == DT);
+ _data.resize(l * sizeof(typename loco::DataTypeImpl<DT>::Type));
+}
+
+template <loco::DataType DT>
+const typename loco::DataTypeImpl<DT>::Type &TFLConst::at(uint32_t n) const
+{
+ assert(dtype() == DT);
+ assert(n < size<DT>());
+ return *(reinterpret_cast<const typename loco::DataTypeImpl<DT>::Type *>(_data.data()) + n);
+}
+
+template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &TFLConst::at(uint32_t n)
+{
+ assert(dtype() == DT);
+ assert(n < size<DT>());
+ return *(reinterpret_cast<typename loco::DataTypeImpl<DT>::Type *>(_data.data()) + n);
+}
+
+#define INSTANTIATE(DT) \
+ template uint32_t TFLConst::size<DT>(void) const; \
+ template void TFLConst::size<DT>(uint32_t); \
+ template const typename loco::DataTypeImpl<DT>::Type &TFLConst::at<DT>(uint32_t) const; \
+ template typename loco::DataTypeImpl<DT>::Type &TFLConst::at<DT>(uint32_t);
+
+INSTANTIATE(loco::DataType::S32);
+INSTANTIATE(loco::DataType::FLOAT32);
+
+#undef INSTANTIATE
+
+void set_new_shape(locoex::TFLReshape *node, int32_t *base, uint32_t size)
+{
+ // Check node does not have both of new shape infos
+ EXO_ASSERT(node->shape() == nullptr, "node already has shape input");
+ EXO_ASSERT(node->newShape()->rank() == 0, "node already has newShape attribute");
+
+ const loco::DataType S32 = loco::DataType::S32;
+
+ // Set 2nd input as TFLConst
+ auto const_shape_node = node->graph()->nodes()->create<locoex::TFLConst>();
+ const_shape_node->rank(1);
+ const_shape_node->dim(0) = size;
+ const_shape_node->dtype(S32);
+ const_shape_node->size<S32>(size);
+ for (uint32_t axis = 0; axis < size; ++axis)
+ const_shape_node->at<S32>(axis) = base[axis];
+ node->shape(const_shape_node);
+
+ // Set newShape attribute
+ node->newShape()->rank(size);
+ for (uint32_t axis = 0; axis < size; ++axis)
+ node->newShape()->dim(axis) = base[axis];
+}
+
+} // namespace locoex
diff --git a/compiler/exo/src/Dialect/IR/TFLNodes.h b/compiler/exo/src/Dialect/IR/TFLNodes.h
new file mode 100644
index 000000000..5f521a0a6
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/TFLNodes.h
@@ -0,0 +1,551 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_IR_TFLNODES_H__
+#define __LOCOEX_IR_TFLNODES_H__
+
+#include "TFLNodeDecl.h"
+#include "TFLOpcode.h"
+
+#include "FusedActFunc.h"
+#include "NodeMixins.h"
+
+#include <loco/IR/Node.h>
+#include <loco/IR/NodeMixins.h>
+#include <loco/IR/DataTypeTraits.h>
+
+#include <locoex/VariadicArityNode.h>
+
+#include <array>
+
+namespace locoex
+{
+
+enum class Padding
+{
+ UNDEFINED, // This is not defined by TFLite. This was added to prevent programming error.
+ SAME,
+ VALID,
+};
+
+class Filter final
+{
+public:
+ Filter() : _w(1), _h(1) {}
+
+ int32_t w() const { return _w; }
+ void w(int32_t w) { _w = w; }
+
+ int32_t h() const { return _h; }
+ void h(int32_t h) { _h = h; }
+
+private:
+ int32_t _w;
+ int32_t _h;
+};
+
+class Stride final
+{
+public:
+ Stride() : _w(1), _h(1) {}
+
+ int32_t w() const { return _w; }
+ void w(int32_t w) { _w = w; }
+
+ int32_t h() const { return _h; }
+ void h(int32_t h) { _h = h; }
+
+private:
+ int32_t _w;
+ int32_t _h;
+};
+
+/// @brief enumeration of mixin class
+enum class TFLNodeTrait
+{
+ FusedActFunc,
+ Bias
+};
+
+template <TFLNodeTrait T> class TFLNodeMixin;
+
+template <> class TFLNodeMixin<TFLNodeTrait::FusedActFunc>
+{
+public:
+ TFLNodeMixin() = default;
+
+public:
+ FusedActFunc fusedActivationFunction() const { return _fused_act_fun; }
+ void fusedActivationFunction(FusedActFunc fused_act_fun) { _fused_act_fun = fused_act_fun; }
+
+private:
+ FusedActFunc _fused_act_fun = FusedActFunc::UNDEFINED;
+};
+
+/**
+ * @brief Mixin class for nodes that has a bias input
+ */
+template <> class TFLNodeMixin<TFLNodeTrait::Bias>
+{
+public:
+ TFLNodeMixin() = default;
+
+public:
+ virtual loco::Node *bias(void) const = 0; /// @brief get the input for bias.
+ virtual void bias(loco::Node *node) = 0; /// @brief set the input for bias.
+};
+
+/**
+ * @brief ADD in TensorFlow Lite
+ */
+class TFLAdd final : public FixedArityNode<2, TFLNodeImpl<TFLOpcode::ADD>>,
+ public TFLNodeMixin<TFLNodeTrait::FusedActFunc>
+{
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *y(void) const { return at(1)->node(); }
+ void y(loco::Node *node) { at(1)->node(node); }
+};
+
+/**
+ * @brief AVERAGE_POOL_2D in TensorFlow Lite
+ */
+class TFLAveragePool2D final : public FixedArityNode<1, TFLNodeImpl<TFLOpcode::AVERAGE_POOL_2D>>,
+ public TFLNodeMixin<TFLNodeTrait::FusedActFunc>
+{
+public:
+ TFLAveragePool2D() : _padding(Padding::UNDEFINED) { /* empty */}
+
+public:
+ loco::Node *value(void) const { return at(0)->node(); }
+ void value(loco::Node *node) { at(0)->node(node); }
+
+ Padding padding() const { return _padding; }
+ void padding(Padding padding) { _padding = padding; }
+
+ const Filter *filter(void) const { return &_filter; }
+ Filter *filter(void) { return &_filter; }
+
+ const Stride *stride(void) const { return &_stride; }
+ Stride *stride(void) { return &_stride; }
+
+private:
+ Padding _padding;
+ Stride _stride;
+ Filter _filter;
+};
+
+/**
+ * @brief CONCATENATION in TensorFlow Lite
+ */
+class TFLConcatenation final : public VariadicArityNode<TFLNodeImpl<TFLOpcode::CONCATENATION>>,
+ public TFLNodeMixin<TFLNodeTrait::FusedActFunc>
+{
+public:
+ TFLConcatenation(uint32_t arity) : VariadicArityNode<TFLNodeImpl<TFLOpcode::CONCATENATION>>(arity)
+ {
+ // TODO Support when arity is 0
+ assert(arity >= 1);
+ }
+
+public:
+ uint32_t numValues(void) const { return arity(); }
+
+public:
+ Node *values(uint32_t index) const
+ {
+ assert(index < numValues());
+ return at(index)->node();
+ }
+ void values(uint32_t index, Node *node)
+ {
+ assert(index < numValues());
+ at(index)->node(node);
+ }
+
+public:
+ uint32_t axis(void) const { return _axis; }
+ void axis(uint32_t axis) { _axis = axis; }
+
+private:
+ uint32_t _axis;
+};
+
+/**
+ * @brief Class to build tensor data
+ * @note This will not be exported as a specific op
+ */
+class TFLConst final : public FixedArityNode<0, TFLNodeImpl<TFLOpcode::CONST>>,
+ public loco::NodeMixin<loco::NodeTrait::DataType>,
+ public loco::NodeMixin<loco::NodeTrait::TensorShape>
+{
+public:
+ TFLConst() = default;
+
+public:
+ template <loco::DataType DT> uint32_t size(void) const;
+ template <loco::DataType DT> void size(uint32_t size);
+ template <loco::DataType DT> const typename loco::DataTypeImpl<DT>::Type &at(uint32_t n) const;
+ template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &at(uint32_t n);
+
+private:
+ std::vector<uint8_t> _data;
+};
+
+/**
+ * @brief CONV_2D in TensorFlow Lite
+ */
+class TFLConv2D final : public FixedArityNode<3, TFLNodeImpl<TFLOpcode::CONV_2D>>,
+ public TFLNodeMixin<TFLNodeTrait::FusedActFunc>,
+ public TFLNodeMixin<TFLNodeTrait::Bias>
+{
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *filter(void) const { return at(1)->node(); }
+ void filter(loco::Node *node) { at(1)->node(node); }
+
+ loco::Node *bias(void) const override { return at(2)->node(); }
+ void bias(loco::Node *node) override { at(2)->node(node); }
+
+public:
+ Padding padding() const { return _padding; }
+ void padding(Padding padding) { _padding = padding; }
+
+ const Stride *stride(void) const { return &_stride; }
+ Stride *stride(void) { return &_stride; }
+
+private:
+ Padding _padding = Padding::UNDEFINED;
+ Stride _stride;
+};
+
+/**
+ * @brief DEPTHWISE_CONV_2D in TensorFlow Lite
+ */
+class TFLDepthwiseConv2D final
+ : public FixedArityNode<3, TFLNodeImpl<TFLOpcode::DEPTHWISE_CONV_2D>>,
+ public TFLNodeMixin<TFLNodeTrait::FusedActFunc>,
+ public TFLNodeMixin<TFLNodeTrait::Bias>
+{
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *filter(void) const { return at(1)->node(); }
+ void filter(loco::Node *node) { at(1)->node(node); }
+
+ loco::Node *bias(void) const override { return at(2)->node(); }
+ void bias(loco::Node *node) override { at(2)->node(node); }
+
+public:
+ Padding padding() const { return _padding; }
+ void padding(Padding padding) { _padding = padding; }
+
+ const Stride *stride(void) const { return &_stride; }
+ Stride *stride(void) { return &_stride; }
+
+ int32_t depthMultiplier(void) const { return _depth_multiplier; }
+ void depthMultiplier(int32_t arg) { _depth_multiplier = arg; }
+
+private:
+ Padding _padding = Padding::UNDEFINED;
+ Stride _stride;
+ int32_t _depth_multiplier = 0;
+};
+
+/**
+ * @brief DIV in TensorFlow Lite
+ */
+class TFLDiv final : public FixedArityNode<2, TFLNodeImpl<TFLOpcode::DIV>>,
+ public TFLNodeMixin<TFLNodeTrait::FusedActFunc>
+{
+public:
+ TFLDiv() = default;
+
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *y(void) const { return at(1)->node(); }
+ void y(loco::Node *node) { at(1)->node(node); }
+};
+
+/**
+ * @brief FULLY_CONNECTED in TensorFlow Lite
+ */
+class TFLFullyConnected final : public FixedArityNode<3, TFLNodeImpl<TFLOpcode::FULLY_CONNECTED>>,
+ public TFLNodeMixin<TFLNodeTrait::FusedActFunc>,
+ public TFLNodeMixin<TFLNodeTrait::Bias>
+{
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *weights(void) const { return at(1)->node(); }
+ void weights(loco::Node *node) { at(1)->node(node); }
+
+ loco::Node *bias(void) const override { return at(2)->node(); }
+ void bias(loco::Node *node) override { at(2)->node(node); }
+};
+
+/**
+ * @brief MAXIMUM in TensorFlow Lite
+ */
+class TFLMaximum final : public FixedArityNode<2, TFLNodeImpl<TFLOpcode::MAXIMUM>>
+{
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *y(void) const { return at(1)->node(); }
+ void y(loco::Node *node) { at(1)->node(node); }
+};
+
+/**
+ * @brief MAX_POOL_2D in TensorFlow Lite
+ */
+class TFLMaxPool2D final : public FixedArityNode<1, TFLNodeImpl<TFLOpcode::MAX_POOL_2D>>,
+ public TFLNodeMixin<TFLNodeTrait::FusedActFunc>
+{
+public:
+ TFLMaxPool2D() : _padding(Padding::UNDEFINED) { /* empty */}
+
+public:
+ loco::Node *value(void) const { return at(0)->node(); }
+ void value(loco::Node *node) { at(0)->node(node); }
+
+ Padding padding() const { return _padding; }
+ void padding(Padding padding) { _padding = padding; }
+
+ const Filter *filter(void) const { return &_filter; }
+ Filter *filter(void) { return &_filter; }
+
+ const Stride *stride(void) const { return &_stride; }
+ Stride *stride(void) { return &_stride; }
+
+private:
+ Padding _padding;
+ Stride _stride;
+ Filter _filter;
+};
+
+class TFLMean final : public FixedArityNode<2, TFLNodeImpl<TFLOpcode::MEAN>>
+{
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *reduction_indices(void) const { return at(1)->node(); }
+ void reduction_indices(loco::Node *node) { at(1)->node(node); }
+
+public:
+ bool keep_dims(void) const { return _keep_dims; }
+ void keep_dims(bool keep_dims) { _keep_dims = keep_dims; }
+
+private:
+ bool _keep_dims = false;
+};
+
+/**
+ * @brief MUL in TensorFlow Lite
+ */
+class TFLMul final : public FixedArityNode<2, TFLNodeImpl<TFLOpcode::MUL>>,
+ public TFLNodeMixin<TFLNodeTrait::FusedActFunc>
+{
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *y(void) const { return at(1)->node(); }
+ void y(loco::Node *node) { at(1)->node(node); }
+};
+
+class TFLRelu final : public FixedArityNode<1, TFLNodeImpl<TFLOpcode::RELU>>
+{
+public:
+ TFLRelu() = default;
+
+public:
+ loco::Node *features(void) const { return at(0)->node(); }
+ void features(loco::Node *node) { at(0)->node(node); }
+};
+
+class TFLRelu6 final : public FixedArityNode<1, TFLNodeImpl<TFLOpcode::RELU6>>
+{
+public:
+ TFLRelu6() = default;
+
+public:
+ loco::Node *features(void) const { return at(0)->node(); }
+ void features(loco::Node *node) { at(0)->node(node); }
+};
+
+class TFLReshape final : public FixedArityNode<2, TFLNodeImpl<TFLOpcode::RESHAPE>>
+{
+public:
+ TFLReshape() = default;
+
+public:
+ loco::Node *tensor(void) const { return at(0)->node(); }
+ void tensor(loco::Node *node) { at(0)->node(node); }
+
+ // TODO Make this input optional. That is, loco system does not emit error
+ // with this input being null
+ loco::Node *shape(void) const { return at(1)->node(); }
+ void shape(loco::Node *node) { at(1)->node(node); }
+
+public:
+ class Shape
+ {
+ public:
+ uint32_t rank(void) const { return _shape.size(); }
+ void rank(uint32_t rank) { _shape.resize(rank); }
+
+ int32_t dim(uint32_t n) const { return _shape.at(n); }
+ int32_t &dim(uint32_t n) { return _shape.at(n); }
+
+ private:
+ std::vector<int32_t> _shape;
+ };
+
+ const Shape *newShape(void) const { return &_new_shape; }
+ Shape *newShape(void) { return &_new_shape; }
+
+private:
+ Shape _new_shape;
+};
+
+/**
+ * @brief Set both TFLReshape's 2nd input as TFLConst, and newShape attribute
+ * with same value
+ * @note Shape inference for TFLReshape forces them to be same
+ * TODO find better place for this helper
+ */
+void set_new_shape(locoex::TFLReshape *node, int32_t *base, uint32_t size);
+
+class TFLRsqrt final : public FixedArityNode<1, TFLNodeImpl<TFLOpcode::RSQRT>>
+{
+public:
+ TFLRsqrt() = default;
+
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+};
+
+// TODO TFLSoftmax
+
+class TFLSqrt final : public FixedArityNode<1, TFLNodeImpl<TFLOpcode::SQRT>>
+{
+public:
+ TFLSqrt() = default;
+
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+};
+
+class TFLSquaredDifference final
+ : public FixedArityNode<2, TFLNodeImpl<TFLOpcode::SQUARED_DIFFERENCE>>
+{
+public:
+ TFLSquaredDifference() = default;
+
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *y(void) const { return at(1)->node(); }
+ void y(loco::Node *node) { at(1)->node(node); }
+};
+
+/**
+ * @brief SUB in TensorFlow Lite
+ */
+class TFLSub final : public FixedArityNode<2, TFLNodeImpl<TFLOpcode::SUB>>,
+ public TFLNodeMixin<TFLNodeTrait::FusedActFunc>
+{
+public:
+ TFLSub() = default;
+
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *y(void) const { return at(1)->node(); }
+ void y(loco::Node *node) { at(1)->node(node); }
+};
+
+// TODO TFLTanh
+
+/**
+ * @brief TRANSPOSE in TensorFlow Lite
+ */
+class TFLTranspose final : public FixedArityNode<2, TFLNodeImpl<TFLOpcode::TRANSPOSE>>
+{
+public:
+ TFLTranspose() = default;
+
+public:
+ /// @brief Get the input node to transpose
+ loco::Node *a(void) const { return at(0)->node(); }
+
+ /// @brief Set the input node to transpose
+ void a(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *perm(void) const { return at(1)->node(); }
+ void perm(loco::Node *node) { at(1)->node(node); }
+};
+
+/**
+ * @brief TRANSPOSE_CONV in TensorFlow Lite
+ *
+ * @note Argument node function names are from TensorFlow. So refering 'in' and
+ * 'out' acutally means 'out' and 'in' of the this node.
+ */
+class TFLTransposeConv final : public FixedArityNode<3, TFLNodeImpl<TFLOpcode::TRANSPOSE_CONV>>
+{
+public:
+ loco::Node *inputSizes(void) const { return at(0)->node(); }
+ void inputSizes(Node *node) { at(0)->node(node); }
+
+ loco::Node *filter(void) const { return at(1)->node(); }
+ void filter(Node *node) { at(1)->node(node); }
+
+ loco::Node *outBackprop(void) const { return at(2)->node(); }
+ void outBackprop(Node *node) { at(2)->node(node); }
+
+public:
+ const Padding &padding(void) const { return _padding; }
+ void padding(const Padding &padding) { _padding = padding; }
+
+ const Stride *stride(void) const { return &_stride; }
+ Stride *stride(void) { return &_stride; }
+
+private:
+ Padding _padding;
+ Stride _stride;
+};
+
+// TODO define more children of TFLNode
+
+} // namespace locoex
+
+#endif // __LOCOEX_IR_TFLNODES_H__
diff --git a/compiler/exo/src/Dialect/IR/TFLNodes.lst b/compiler/exo/src/Dialect/IR/TFLNodes.lst
new file mode 100644
index 000000000..225e2be3b
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/TFLNodes.lst
@@ -0,0 +1,30 @@
+#ifndef TFL_NODE
+#error "Define TFL_NODE"
+#endif // TFL_NODE
+
+//
+// PLEASE SORT NODE DECLS IN ALPHABETICAL ORDER
+//
+TFL_NODE(ADD, locoex::TFLAdd)
+TFL_NODE(AVERAGE_POOL_2D, locoex::TFLAveragePool2D)
+TFL_NODE(CONCATENATION, locoex::TFLConcatenation)
+TFL_NODE(CONST, locoex::TFLConst)
+TFL_NODE(CONV_2D, locoex::TFLConv2D)
+TFL_NODE(DEPTHWISE_CONV_2D, locoex::TFLDepthwiseConv2D)
+TFL_NODE(DIV, locoex::TFLDiv)
+TFL_NODE(FULLY_CONNECTED, locoex::TFLFullyConnected)
+TFL_NODE(MAXIMUM, locoex::TFLMaximum)
+TFL_NODE(MAX_POOL_2D, locoex::TFLMaxPool2D)
+TFL_NODE(MEAN, locoex::TFLMean)
+TFL_NODE(MUL, locoex::TFLMul)
+TFL_NODE(RELU, locoex::TFLRelu)
+TFL_NODE(RELU6, locoex::TFLRelu6)
+TFL_NODE(RESHAPE, locoex::TFLReshape)
+TFL_NODE(RSQRT, locoex::TFLRsqrt)
+// TODO TFLSoftmax
+TFL_NODE(SQRT, locoex::TFLSqrt)
+TFL_NODE(SQUARED_DIFFERENCE, locoex::TFLSquaredDifference)
+TFL_NODE(SUB, locoex::TFLSub)
+// TODO TFLTanh
+TFL_NODE(TRANSPOSE, locoex::TFLTranspose)
+TFL_NODE(TRANSPOSE_CONV, locoex::TFLTransposeConv)
diff --git a/compiler/exo/src/Dialect/IR/TFLNodes.test.cpp b/compiler/exo/src/Dialect/IR/TFLNodes.test.cpp
new file mode 100644
index 000000000..09c5c83a0
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/TFLNodes.test.cpp
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFLNodes.h"
+
+#include "TFLDialect.h"
+#include "TFLOpcode.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFLAddTest, constructor)
+{
+ locoex::TFLAdd add_node;
+
+ ASSERT_EQ(add_node.dialect(), locoex::TFLDialect::get());
+ ASSERT_EQ(add_node.opcode(), locoex::TFLOpcode::ADD);
+
+ ASSERT_EQ(add_node.x(), nullptr);
+ ASSERT_EQ(add_node.y(), nullptr);
+}
+
+// TODO TFLAveragePool2D
+
+TEST(TFLConcatTest, constructor)
+{
+ locoex::TFLConcatenation concat_node(3);
+
+ ASSERT_EQ(concat_node.dialect(), locoex::TFLDialect::get());
+ ASSERT_EQ(concat_node.opcode(), locoex::TFLOpcode::CONCATENATION);
+
+ ASSERT_EQ(concat_node.numValues(), 3);
+ ASSERT_EQ(concat_node.values(0), nullptr);
+ ASSERT_EQ(concat_node.values(1), nullptr);
+ ASSERT_EQ(concat_node.values(2), nullptr);
+ ASSERT_EQ(concat_node.fusedActivationFunction(), locoex::FusedActFunc::UNDEFINED);
+}
+
+// TODO TFLConv2D
+
+TEST(TFLDepthwiseConv2DTest, constructor)
+{
+ locoex::TFLDepthwiseConv2D dw_conv2d_node;
+
+ ASSERT_EQ(dw_conv2d_node.dialect(), locoex::TFLDialect::get());
+ ASSERT_EQ(dw_conv2d_node.opcode(), locoex::TFLOpcode::DEPTHWISE_CONV_2D);
+
+ ASSERT_EQ(dw_conv2d_node.input(), nullptr);
+ ASSERT_EQ(dw_conv2d_node.filter(), nullptr);
+ ASSERT_EQ(dw_conv2d_node.bias(), nullptr);
+ ASSERT_EQ(dw_conv2d_node.padding(), locoex::Padding::UNDEFINED);
+ ASSERT_EQ(dw_conv2d_node.stride()->h(), 1);
+ ASSERT_EQ(dw_conv2d_node.stride()->w(), 1);
+ ASSERT_EQ(dw_conv2d_node.depthMultiplier(), 0);
+ ASSERT_EQ(dw_conv2d_node.fusedActivationFunction(), locoex::FusedActFunc::UNDEFINED);
+}
+
+TEST(TFLDivTest, constructor)
+{
+ locoex::TFLDiv div_node;
+
+ ASSERT_EQ(div_node.dialect(), locoex::TFLDialect::get());
+ ASSERT_EQ(div_node.opcode(), locoex::TFLOpcode::DIV);
+
+ ASSERT_EQ(div_node.x(), nullptr);
+ ASSERT_EQ(div_node.y(), nullptr);
+}
+
+// TODO TFLMaxPool2D
+
+TEST(TFLMulTest, constructor)
+{
+ locoex::TFLMul mul_node;
+
+ ASSERT_EQ(mul_node.dialect(), locoex::TFLDialect::get());
+ ASSERT_EQ(mul_node.opcode(), locoex::TFLOpcode::MUL);
+
+ ASSERT_EQ(mul_node.x(), nullptr);
+ ASSERT_EQ(mul_node.y(), nullptr);
+}
+
+TEST(TFLReluTest, constructor)
+{
+ locoex::TFLRelu relu_node;
+
+ ASSERT_EQ(relu_node.dialect(), locoex::TFLDialect::get());
+ ASSERT_EQ(relu_node.opcode(), locoex::TFLOpcode::RELU);
+
+ ASSERT_EQ(relu_node.features(), nullptr);
+}
+
+// TODO TFLRelu6
+
+TEST(TFLReshapeTest, constructor)
+{
+ locoex::TFLReshape reshape;
+
+ ASSERT_EQ(reshape.dialect(), locoex::TFLDialect::get());
+ ASSERT_EQ(reshape.opcode(), locoex::TFLOpcode::RESHAPE);
+
+ ASSERT_EQ(reshape.tensor(), nullptr);
+ ASSERT_EQ(reshape.shape(), nullptr);
+ ASSERT_EQ(reshape.newShape()->rank(), 0);
+}
+
+TEST(TFLReshapeTest, alloc_new_shape)
+{
+ locoex::TFLReshape reshape;
+
+ reshape.newShape()->rank(2);
+ ASSERT_EQ(reshape.newShape()->rank(), 2);
+
+ reshape.newShape()->dim(0) = 0;
+ reshape.newShape()->dim(1) = 1;
+
+ auto &const_reshape = const_cast<const locoex::TFLReshape &>(reshape);
+ ASSERT_EQ(const_reshape.newShape()->dim(0), 0);
+ ASSERT_EQ(const_reshape.newShape()->dim(1), 1);
+}
+
+// TODO TFLSoftmax
+
+// TODO TFLSqrt
+
+TEST(TFLSubTest, constructor)
+{
+ locoex::TFLSub sub_node;
+
+ ASSERT_EQ(sub_node.dialect(), locoex::TFLDialect::get());
+ ASSERT_EQ(sub_node.opcode(), locoex::TFLOpcode::SUB);
+
+ ASSERT_EQ(sub_node.x(), nullptr);
+ ASSERT_EQ(sub_node.y(), nullptr);
+}
+
+// TODO TFLTanh
+
+TEST(TFLTransposeTest, constructor)
+{
+ locoex::TFLTranspose tr_node;
+
+ ASSERT_EQ(tr_node.dialect(), locoex::TFLDialect::get());
+ ASSERT_EQ(tr_node.opcode(), locoex::TFLOpcode::TRANSPOSE);
+
+ ASSERT_EQ(tr_node.a(), nullptr);
+ ASSERT_EQ(tr_node.perm(), nullptr);
+}
diff --git a/compiler/exo/src/Dialect/IR/TFLOpcode.h b/compiler/exo/src/Dialect/IR/TFLOpcode.h
new file mode 100644
index 000000000..0c0ab64bd
--- /dev/null
+++ b/compiler/exo/src/Dialect/IR/TFLOpcode.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_IR_TFLOPCODE_H__
+#define __LOCOEX_IR_TFLOPCODE_H__
+
+namespace locoex
+{
+
+enum class TFLOpcode
+{
+#define TFL_NODE(OPCODE, CLASS) OPCODE,
+#include "TFLNodes.lst"
+#undef TFL_NODE
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_IR_TFLOPCODE_H__
diff --git a/compiler/exo/src/Dialect/Service/CircleShapeInferenceRule.cpp b/compiler/exo/src/Dialect/Service/CircleShapeInferenceRule.cpp
new file mode 100644
index 000000000..2e71aa000
--- /dev/null
+++ b/compiler/exo/src/Dialect/Service/CircleShapeInferenceRule.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleShapeInferenceRule.h"
+
+#include "Dialect/IR/CircleNodes.h"
+#include "Dialect/IR/CircleDialect.h"
+#include "Dialect/IR/CircleNodeVisitor.h"
+
+#include "Check.h"
+
+#include <cassert>
+
+namespace
+{
+
+/**
+ * @brief Class to infer the shape of CircleNode
+ *
+ * @note All CircleNode's inputs and outputs are always loco::Domain::Tensor
+ */
+class ShapeInferenceAlgorithm final : public locoex::CircleNodeVisitor<loco::NodeShape>
+{
+public:
+ loco::NodeShape visit(const locoex::CircleInstanceNorm *node) final
+ {
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+
+ return loco::NodeShape{input_shape};
+ }
+};
+
+} // namespace
+
+namespace locoex
+{
+
+bool CircleShapeInferenceRule::recognize(const loco::Dialect *d) const
+{
+ return CircleDialect::get() == d;
+}
+
+bool CircleShapeInferenceRule::infer(const loco::Node *node, loco::NodeShape &shape) const
+{
+ assert(node->dialect() == CircleDialect::get());
+ assert(dynamic_cast<const CircleNode *>(node) != nullptr);
+
+ ShapeInferenceAlgorithm alg;
+ shape = dynamic_cast<const CircleNode *>(node)->accept(&alg);
+
+ return true;
+}
+
+} // namespace locoex
diff --git a/compiler/exo/src/Dialect/Service/CircleShapeInferenceRule.h b/compiler/exo/src/Dialect/Service/CircleShapeInferenceRule.h
new file mode 100644
index 000000000..92f23c9dd
--- /dev/null
+++ b/compiler/exo/src/Dialect/Service/CircleShapeInferenceRule.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_SERVICE_CIRCLESHAPE_INFERENCE_RULE_H__
+#define __LOCOEX_SERVICE_CIRCLESHAPE_INFERENCE_RULE_H__
+
+#include <loco/Service/ShapeInference.h>
+
+namespace locoex
+{
+
+struct CircleShapeInferenceRule final : public loco::ShapeInferenceRule
+{
+ bool recognize(const loco::Dialect *) const final;
+ bool infer(const loco::Node *, loco::NodeShape &) const final;
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_SERVICE_CIRCLESHAPE_INFERENCE_RULE_H__
diff --git a/compiler/exo/src/Dialect/Service/CircleTypeInferenceRule.cpp b/compiler/exo/src/Dialect/Service/CircleTypeInferenceRule.cpp
new file mode 100644
index 000000000..6bc95a1b5
--- /dev/null
+++ b/compiler/exo/src/Dialect/Service/CircleTypeInferenceRule.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleTypeInferenceRule.h"
+
+#include "Dialect/IR/CircleDialect.h"
+#include "Dialect/IR/CircleNodeVisitor.h"
+#include "Dialect/IR/CircleNodes.h"
+
+#include <cassert>
+
+namespace
+{
+
+struct TypeInferenceAlgorithm final : public locoex::CircleNodeVisitor<loco::DataType>
+{
+ loco::DataType visit(const locoex::CircleInstanceNorm *node) final
+ {
+ return loco::dtype_get(node->input());
+ }
+};
+
+} // namespace
+
+namespace locoex
+{
+
+bool CircleTypeInferenceRule::recognize(const loco::Dialect *d) const
+{
+ return CircleDialect::get() == d;
+}
+
+bool CircleTypeInferenceRule::infer(const loco::Node *node, loco::DataType &dtype) const
+{
+ assert(node->dialect() == CircleDialect::get());
+
+ TypeInferenceAlgorithm alg;
+
+ dtype = dynamic_cast<const CircleNode *>(node)->accept(&alg);
+ assert(dtype != loco::DataType::Unknown);
+
+ return true;
+}
+
+} // namespace locoex
diff --git a/compiler/exo/src/Dialect/Service/CircleTypeInferenceRule.h b/compiler/exo/src/Dialect/Service/CircleTypeInferenceRule.h
new file mode 100644
index 000000000..c073dfc54
--- /dev/null
+++ b/compiler/exo/src/Dialect/Service/CircleTypeInferenceRule.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_SERVICE_CIRCLETYPE_INFERENCE_RULE_H__
+#define __LOCOEX_SERVICE_CIRCLETYPE_INFERENCE_RULE_H__
+
+#include <loco/Service/TypeInference.h>
+
+namespace locoex
+{
+
+/**
+ * @brief Type Inference Rule for CircleDialect
+ */
+struct CircleTypeInferenceRule final : public loco::TypeInferenceRule
+{
+ bool recognize(const loco::Dialect *) const final;
+ bool infer(const loco::Node *, loco::DataType &) const final;
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_SERVICE_CIRCLETYPE_INFERENCE_RULE_H__
diff --git a/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.cpp b/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.cpp
new file mode 100644
index 000000000..f4bb10364
--- /dev/null
+++ b/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.cpp
@@ -0,0 +1,627 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFLShapeInferenceRule.h"
+
+#include "Dialect/IR/TFLNodes.h"
+#include "Dialect/IR/TFLDialect.h"
+#include "Dialect/IR/TFLNodeVisitor.h"
+
+#include "Check.h"
+
+#include <oops/InternalExn.h>
+
+#include <algorithm>
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+// Call this for TFLAvgPool2D and TFLMaxPool2D only
+template <class Pool2DType> loco::NodeShape infer_pool_2d_shape(const Pool2DType *node)
+{
+ EXO_ASSERT(loco::shape_known(node->value()), "Shape must be known");
+
+ auto ifm_shape = loco::shape_get(node->value()).template as<loco::TensorShape>();
+ assert(ifm_shape.rank() == 4);
+
+ uint32_t input_height = ifm_shape.dim(1).value();
+ uint32_t input_width = ifm_shape.dim(2).value();
+ uint32_t stride_height = node->stride()->h();
+ uint32_t stride_width = node->stride()->w();
+ uint32_t window_height = node->filter()->h();
+ uint32_t window_width = node->filter()->w();
+ uint32_t dilation_height = 1; // dilation for TFLAvgPool2D and TFLMaxPool2D is 1
+ uint32_t dilation_width = 1;
+ uint32_t effective_window_height = dilation_height * (window_height - 1) + 1;
+ uint32_t effective_window_width = dilation_width * (window_width - 1) + 1;
+
+ uint32_t output_height = 0;
+ uint32_t output_width = 0;
+
+ if (node->padding() == locoex::Padding::VALID)
+ {
+ output_height = (input_height + stride_height - effective_window_height) / stride_height;
+ output_width = (input_width + stride_width - effective_window_width) / stride_width;
+ }
+ else if (node->padding() == locoex::Padding::SAME)
+ {
+ output_height = (input_height + stride_height - 1) / stride_height;
+ output_width = (input_width + stride_width - 1) / stride_width;
+ }
+ else
+ EXO_ASSERT(false, "Wrong padding type");
+
+ loco::TensorShape ofm_shape;
+ ofm_shape.rank(4);
+ ofm_shape.dim(0) = ifm_shape.dim(0);
+ ofm_shape.dim(1) = output_height;
+ ofm_shape.dim(2) = output_width;
+ ofm_shape.dim(3) = ifm_shape.dim(3);
+
+ return loco::NodeShape{ofm_shape};
+}
+
+/**
+ * @brief Create a higher-rank TensorShape following NumPy broadcasting semantics
+ *
+ * HOW TO USE:
+ *
+ * auto expanded_tensor_shape = expand(tensor_shape).to(N);
+ */
+class TensorShapeExpander
+{
+public:
+ TensorShapeExpander(const loco::TensorShape &shape) : _shape{shape}
+ {
+ // DO NOTHING
+ }
+
+public:
+ loco::TensorShape to(uint32_t output_rank)
+ {
+ auto const &input_shape = _shape;
+ uint32_t const input_rank = input_shape.rank();
+
+ assert(input_rank <= output_rank && "Cannot shrink rank");
+ uint32_t const axis_shift = output_rank - input_rank;
+
+ loco::TensorShape output_shape;
+
+ output_shape.rank(output_rank);
+ for (uint32_t axis = 0; axis < output_rank; ++axis)
+ {
+ output_shape.dim(axis) = (axis < axis_shift) ? 1 : input_shape.dim(axis - axis_shift);
+ }
+
+ return output_shape;
+ }
+
+private:
+ const loco::TensorShape _shape;
+};
+
+/**
+ * @breif Expand shape x and y to same rank by align right and filling with 1
+ */
+void expand_rank(loco::TensorShape &x, loco::TensorShape &y)
+{
+ auto x_rank = x.rank();
+ auto y_rank = y.rank();
+
+ if (x_rank == y_rank)
+ return;
+
+ TensorShapeExpander x_exp(x);
+ TensorShapeExpander y_exp(y);
+
+ auto xy_rank = std::max(x_rank, y_rank);
+
+ x = x_rank > y_rank ? x : x_exp.to(xy_rank);
+ y = y_rank > x_rank ? y : y_exp.to(xy_rank);
+}
+
+/**
+ * @breif Returns shape of expanded dimension of input x and y having same rank
+ */
+loco::TensorShape expand_dimension(const loco::TensorShape &x, const loco::TensorShape &y)
+{
+ assert(x.rank() == y.rank());
+
+ auto rank = x.rank();
+
+ loco::TensorShape output_shape;
+
+ output_shape.rank(rank);
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ assert(x.dim(axis).known() && y.dim(axis).known());
+
+ auto x_dim = x.dim(axis).value();
+ auto y_dim = y.dim(axis).value();
+
+ // each dimension of x and y should be same or one must be 1 if different
+ if (!((x_dim == y_dim) || (x_dim == 1 || y_dim == 1)))
+ INTERNAL_EXN("Cannot produce expand_dimension of two shapes");
+
+ output_shape.dim(axis) = std::max(x_dim, y_dim);
+ }
+
+ return output_shape;
+}
+
+loco::TensorShape broadcast_shape(const loco::TensorShape &x, const loco::TensorShape &y)
+{
+ auto x_match = x;
+ auto y_match = y;
+
+ expand_rank(x_match, y_match);
+
+ auto output_shape = expand_dimension(x_match, y_match);
+
+ return output_shape;
+}
+
+/**
+ * @brief Class to infer the shape of TFLNode
+ *
+ * @note All TFLNode's inputs and outputs are always loco::Domain::Tensor
+ */
+class ShapeInferenceAlgorithm final : public locoex::TFLNodeVisitor<loco::NodeShape>
+{
+public:
+ loco::NodeShape visit(const locoex::TFLAdd *node) final
+ {
+ auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+ auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
+
+ auto output_shape = broadcast_shape(x_shape, y_shape);
+
+ return loco::NodeShape{output_shape};
+ }
+
+ loco::NodeShape visit(const locoex::TFLAveragePool2D *node) final
+ {
+ return infer_pool_2d_shape(node);
+ }
+
+ loco::NodeShape visit(const locoex::TFLConcatenation *node) final
+ {
+ // TODO Support when TFLConcatenation has 0 input
+ assert(node->numValues() > 0);
+
+ auto axis = node->axis();
+ auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
+
+ loco::TensorShape output_shape;
+
+ output_shape.rank(first_shape.rank());
+ for (uint32_t i = 0; i < output_shape.rank(); ++i)
+ output_shape.dim(i) = first_shape.dim(i);
+
+ for (uint32_t i = 1; i < node->numValues(); ++i)
+ {
+ auto input_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+
+ for (uint32_t j = 0; j < output_shape.rank(); ++j)
+ {
+ if (j == axis)
+ output_shape.dim(j) = output_shape.dim(j).value() + input_shape.dim(j).value();
+ else
+ assert(output_shape.dim(j) == input_shape.dim(j));
+ }
+ }
+
+ return loco::NodeShape{output_shape};
+ }
+
+ loco::NodeShape visit(const locoex::TFLConst *node) final
+ {
+ loco::TensorShape shape;
+
+ shape.rank(node->rank());
+ for (uint32_t axis = 0; axis < node->rank(); axis++)
+ shape.dim(axis) = node->dim(axis);
+
+ return loco::NodeShape{shape};
+ }
+
+ loco::NodeShape visit(const locoex::TFLConv2D *node) final
+ {
+ auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC
+ auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in OHWI
+
+ assert(ifm_shape.rank() == 4);
+ assert(ker_shape.rank() == 4);
+ assert(ifm_shape.dim(3) == ker_shape.dim(3));
+
+ uint32_t input_height = ifm_shape.dim(1).value();
+ uint32_t input_width = ifm_shape.dim(2).value();
+ uint32_t stride_height = node->stride()->h();
+ uint32_t stride_width = node->stride()->w();
+ uint32_t ker_height = ker_shape.dim(1).value();
+ uint32_t ker_width = ker_shape.dim(2).value();
+ uint32_t dilation_height = 1;
+ uint32_t dilation_width = 1;
+ uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1;
+ uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1;
+
+ uint32_t output_height = 0;
+ uint32_t output_width = 0;
+
+ if (node->padding() == locoex::Padding::VALID)
+ {
+ output_height = (input_height + stride_height - effective_ker_height) / stride_height;
+ output_width = (input_width + stride_width - effective_ker_width) / stride_width;
+ }
+ else if (node->padding() == locoex::Padding::SAME)
+ {
+ output_height = (input_height + stride_height - 1) / stride_height;
+ output_width = (input_width + stride_width - 1) / stride_width;
+ }
+ else
+ EXO_ASSERT(false, "Wrong padding type");
+
+ loco::TensorShape ofm_shape;
+ ofm_shape.rank(4);
+ ofm_shape.dim(0) = ifm_shape.dim(0);
+ ofm_shape.dim(1) = output_height;
+ ofm_shape.dim(2) = output_width;
+ ofm_shape.dim(3) = ker_shape.dim(0);
+
+ return loco::NodeShape{ofm_shape};
+ }
+
+ loco::NodeShape visit(const locoex::TFLDepthwiseConv2D *node) final
+ {
+ auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC
+ auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in 1 H W CM
+
+ assert(ifm_shape.rank() == 4);
+ assert(ker_shape.rank() == 4);
+ assert(ker_shape.dim(0).value() == 1);
+
+ uint32_t input_height = ifm_shape.dim(1).value();
+ uint32_t input_width = ifm_shape.dim(2).value();
+ uint32_t stride_height = node->stride()->h();
+ uint32_t stride_width = node->stride()->w();
+ uint32_t ker_height = ker_shape.dim(1).value();
+ uint32_t ker_width = ker_shape.dim(2).value();
+ uint32_t dilation_height = 1;
+ uint32_t dilation_width = 1;
+ uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1;
+ uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1;
+
+ uint32_t output_height = 0;
+ uint32_t output_width = 0;
+
+ if (node->padding() == locoex::Padding::VALID)
+ {
+ output_height = (input_height + stride_height - effective_ker_height) / stride_height;
+ output_width = (input_width + stride_width - effective_ker_width) / stride_width;
+ }
+ else if (node->padding() == locoex::Padding::SAME)
+ {
+ output_height = (input_height + stride_height - 1) / stride_height;
+ output_width = (input_width + stride_width - 1) / stride_width;
+ }
+ else
+ EXO_ASSERT(false, "Wrong padding type");
+
+ loco::TensorShape ofm_shape;
+ ofm_shape.rank(4);
+ ofm_shape.dim(0) = ifm_shape.dim(0);
+ ofm_shape.dim(1) = output_height;
+ ofm_shape.dim(2) = output_width;
+ ofm_shape.dim(3) = ker_shape.dim(3);
+
+ return loco::NodeShape{ofm_shape};
+ }
+
+ loco::NodeShape visit(const locoex::TFLDiv *node) final
+ {
+ auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+ auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
+
+ auto output_shape = broadcast_shape(x_shape, y_shape);
+
+ return loco::NodeShape{output_shape};
+ }
+
+ loco::NodeShape visit(const locoex::TFLFullyConnected *node) final
+ {
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto weights_shape = loco::shape_get(node->weights()).as<loco::TensorShape>();
+
+ // Checking shape capability for multiplication
+ EXO_ASSERT(input_shape.rank() == 2, "NYI for input shape rank > 2");
+ EXO_ASSERT(weights_shape.rank() == 2, "Incompatible weights rank for fully connected");
+ EXO_ASSERT(input_shape.dim(1) == weights_shape.dim(1),
+ "Incompatible shapes for fully connected");
+
+ loco::TensorShape out_shape;
+ out_shape.rank(2);
+
+ out_shape.dim(0) = input_shape.dim(0);
+ out_shape.dim(1) = weights_shape.dim(0);
+
+ return loco::NodeShape{out_shape};
+ }
+
+ loco::NodeShape visit(const locoex::TFLMaximum *node) final
+ {
+ auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+ auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
+
+ auto output_shape = broadcast_shape(x_shape, y_shape);
+
+ return loco::NodeShape{output_shape};
+ }
+
+ loco::NodeShape visit(const locoex::TFLMaxPool2D *node) final
+ {
+ return infer_pool_2d_shape(node);
+ }
+
+ loco::NodeShape visit(const locoex::TFLMean *node) final
+ {
+ const loco::DataType S32 = loco::DataType::S32;
+
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto reduction_indices = dynamic_cast<locoex::TFLConst *>(node->reduction_indices());
+
+ { // Exceptions
+ // TODO support non-const case
+ EXO_ASSERT(reduction_indices, "Only support constant reduction_indices");
+ // TODO support other data type
+ EXO_ASSERT(reduction_indices->dtype() == S32, "Only support int 32");
+ }
+
+ std::vector<int32_t> reduction_values;
+
+ for (uint32_t i = 0; i < reduction_indices->size<S32>(); ++i)
+ {
+ int32_t axis = reduction_indices->at<S32>(i);
+ if (axis < 0)
+ axis += input_shape.rank();
+ if (not(0 <= axis and axis < static_cast<int32_t>(input_shape.rank())))
+ INTERNAL_EXN_V("Invalid reduction axis for MEAN", oops::to_uint32(axis));
+ reduction_values.push_back(axis);
+ }
+
+ loco::TensorShape output_shape;
+
+ if (node->keep_dims())
+ {
+ output_shape.rank(input_shape.rank());
+ for (uint32_t i = 0; i < input_shape.rank(); ++i)
+ output_shape.dim(i) = input_shape.dim(i);
+ for (uint32_t i = 0; i < reduction_values.size(); ++i)
+ output_shape.dim(reduction_values.at(i)) = 1;
+ }
+ else
+ {
+ std::vector<bool> check_reduce(input_shape.rank(), false);
+ for (uint32_t i = 0; i < reduction_values.size(); ++i)
+ check_reduce.at(reduction_values.at(i)) = true;
+
+ uint32_t reduce_cnt = 0;
+ for (uint32_t i = 0; i < check_reduce.size(); ++i)
+ if (check_reduce.at(i))
+ ++reduce_cnt;
+
+ output_shape.rank(input_shape.rank() - reduce_cnt);
+ for (uint32_t i = 0, j = 0; i < check_reduce.size(); ++i)
+ if (check_reduce.at(i) == false)
+ output_shape.dim(j++) = i;
+ }
+
+ return loco::NodeShape{output_shape};
+ }
+
+ loco::NodeShape visit(const locoex::TFLMul *node) final
+ {
+ auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+ auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
+
+ auto output_shape = broadcast_shape(x_shape, y_shape);
+
+ return loco::NodeShape{output_shape};
+ }
+
+ loco::NodeShape visit(const locoex::TFLRelu *node) final
+ {
+ auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+
+ return loco::NodeShape{input_shape};
+ }
+
+ loco::NodeShape visit(const locoex::TFLRelu6 *node) final
+ {
+ auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+
+ return loco::NodeShape{input_shape};
+ }
+
+ /**
+ * @note TFLReshape has new shape info in two places: 2nd input and attribute.
+ * This shape inference forces both to exist, and match each other.
+ * When this condition satisfied, it return the inferred shape
+ *
+ * TODO Change this policy when not appropriate
+ */
+ loco::NodeShape visit(const locoex::TFLReshape *node) final
+ {
+ const loco::DataType S32 = loco::DataType::S32;
+
+ loco::TensorShape shape_by_input;
+ {
+ EXO_ASSERT(node->shape(), "2nd input shape() should not be nullptr");
+
+ // Only support node's shape() is TFLConst with S32
+ // TODO support other node with other types
+ auto const_shape_node = dynamic_cast<locoex::TFLConst *>(node->shape());
+ EXO_ASSERT(const_shape_node, "Only support TFLConst for shape of TFLReshape");
+ EXO_ASSERT(const_shape_node->dtype() == S32, "Only support int32 TFLConst");
+
+ if (const_shape_node->rank() != 1)
+ INTERNAL_EXN_V("Only support rank 1 TFLConst", oops::to_uint32(const_shape_node->rank()));
+
+ shape_by_input.rank(const_shape_node->dim(0).value());
+
+ for (uint32_t axis = 0; axis < shape_by_input.rank(); ++axis)
+ {
+ EXO_ASSERT(const_shape_node->at<S32>(axis) > 0, "Dimension should be > 0")
+ shape_by_input.dim(axis) = const_shape_node->at<S32>(axis);
+ }
+ }
+
+ loco::TensorShape shape_by_attr;
+ {
+ shape_by_attr.rank(node->newShape()->rank());
+
+ for (uint32_t axis = 0; axis < shape_by_attr.rank(); ++axis)
+ {
+ EXO_ASSERT(node->newShape()->dim(axis) > 0, "Dimension should be > 0")
+ shape_by_attr.dim(axis) = node->newShape()->dim(axis);
+ }
+ }
+
+ EXO_ASSERT(shape_by_input == shape_by_attr,
+ "Warning: Two new shape information mismatched for TFLReshape");
+
+ return loco::NodeShape{shape_by_input};
+ }
+
+ loco::NodeShape visit(const locoex::TFLRsqrt *node) final
+ {
+ auto input_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+
+ return loco::NodeShape{input_shape};
+ }
+
+ // TODO TFLSoftmax
+
+ loco::NodeShape visit(const locoex::TFLSqrt *node) final
+ {
+ auto input_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+
+ return loco::NodeShape{input_shape};
+ }
+
+ loco::NodeShape visit(const locoex::TFLSquaredDifference *node) final
+ {
+ auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+ auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
+
+ auto output_shape = broadcast_shape(x_shape, y_shape);
+
+ return loco::NodeShape{output_shape};
+ }
+
+ loco::NodeShape visit(const locoex::TFLSub *node) final
+ {
+ auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+ auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
+
+ auto output_shape = broadcast_shape(x_shape, y_shape);
+
+ return loco::NodeShape{output_shape};
+ }
+
+ // TODO TFLTanh
+
+ /// @brief Returns output shape of transpose. Use loco::ConstGen and locoex::TFLConst for ConstT.
+ template <class ConstT>
+ loco::TensorShape output_shape_of_transpose(loco::TensorShape input_shape,
+ const ConstT *perm_node)
+ {
+ loco::TensorShape output_shape;
+ output_shape.rank(input_shape.rank());
+
+ assert(perm_node->dtype() == loco::DataType::S32);
+ assert(input_shape.rank() == perm_node->template size<loco::DataType::S32>());
+
+ for (uint32_t out_axis = 0; out_axis < output_shape.rank(); out_axis++)
+ {
+ auto new_dim = perm_node->template at<loco::DataType::S32>(out_axis);
+ output_shape.dim(new_dim) = input_shape.dim(out_axis);
+ }
+
+ return output_shape;
+ }
+
+ loco::NodeShape visit(const locoex::TFLTranspose *node) final
+ {
+ auto input_shape = loco::shape_get(node->a()).as<loco::TensorShape>();
+
+ auto canon_perm = dynamic_cast<loco::ConstGen *>(node->perm());
+ auto tfl_perm = dynamic_cast<locoex::TFLConst *>(node->perm());
+
+ if (canon_perm)
+ {
+ return loco::NodeShape{output_shape_of_transpose(input_shape, canon_perm)};
+ }
+ else if (tfl_perm)
+ {
+ return loco::NodeShape{output_shape_of_transpose(input_shape, tfl_perm)};
+ }
+ else
+ INTERNAL_EXN("perm of TFLTranspose should be either ConstGen or TFLConst");
+ }
+
+ loco::NodeShape visit(const locoex::TFLTransposeConv *node) final
+ {
+ // TransposeConv's output shape is written in its 'inputSizes' argument
+ auto input_sizes_const = dynamic_cast<locoex::TFLConst *>(node->inputSizes());
+ EXO_ASSERT(input_sizes_const, "Only support when TFLTransposeConv's inputSizes is TFLConst")
+ EXO_ASSERT(input_sizes_const->dtype() == loco::DataType::S32, "Only support S32 dtype")
+ EXO_ASSERT(input_sizes_const->rank() == 1 && input_sizes_const->dim(0).value() == 4,
+ "Only support rank 1 with 4 entries")
+
+ loco::TensorShape shape;
+
+ shape.rank(4);
+ for (uint32_t axis = 0; axis < 4; ++axis)
+ shape.dim(axis) = input_sizes_const->at<loco::DataType::S32>(axis);
+
+ return loco::NodeShape{shape};
+ }
+};
+
+} // namespace
+
+namespace locoex
+{
+
+bool TFLShapeInferenceRule::recognize(const loco::Dialect *d) const
+{
+ return TFLDialect::get() == d;
+}
+
+bool TFLShapeInferenceRule::infer(const loco::Node *node, loco::NodeShape &shape) const
+{
+ assert(node->dialect() == TFLDialect::get());
+ assert(dynamic_cast<const TFLNode *>(node) != nullptr);
+
+ ShapeInferenceAlgorithm alg;
+ shape = dynamic_cast<const TFLNode *>(node)->accept(&alg);
+
+ return true;
+}
+
+} // namespace locoex
diff --git a/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.h b/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.h
new file mode 100644
index 000000000..434a145cc
--- /dev/null
+++ b/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_SERVICE_TFLSHAPE_INFERENCE_RULE_H__
+#define __LOCOEX_SERVICE_TFLSHAPE_INFERENCE_RULE_H__
+
+#include <loco/Service/ShapeInference.h>
+
+namespace locoex
+{
+
+struct TFLShapeInferenceRule final : public loco::ShapeInferenceRule
+{
+ bool recognize(const loco::Dialect *) const final;
+ bool infer(const loco::Node *, loco::NodeShape &) const final;
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_SERVICE_TFLSHAPE_INFERENCE_RULE_H__
diff --git a/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.test.cpp b/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.test.cpp
new file mode 100644
index 000000000..35c8f0b2a
--- /dev/null
+++ b/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.test.cpp
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TestGraph.h"
+
+#include "Dialect/IR/TFLNodes.h"
+#include "Dialect/IR/TFLDialect.h"
+#include "Dialect/Service/TFLShapeInferenceRule.h"
+
+#include <loco.h>
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/Service/ShapeInference.h>
+#include <loco/Service/CanonicalShapeInferenceRule.h>
+#include <loco/Service/MultiDialectShapeInferenceRule.h>
+
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+TEST(TFLShapeInferenceRuleTest, minimal_with_TFLRelu)
+{
+ // Create a simple network
+ exo::test::TestGraph graph;
+ auto tfl_node = graph.append<locoex::TFLRelu>(graph.pull);
+ graph.complete(tfl_node);
+
+ // set shape
+ {
+ graph.pull->rank(2);
+ graph.pull->dim(0) = 3;
+ graph.pull->dim(1) = 4;
+ }
+
+ // pre-check
+ ASSERT_FALSE(loco::shape_known(tfl_node));
+
+ // shape inference
+ locoex::TFLShapeInferenceRule tfl_rule;
+ loco::CanonicalShapeInferenceRule canonical_rule;
+ loco::MultiDialectShapeInferenceRule rules;
+
+ rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
+ .bind(locoex::TFLDialect::get(), &tfl_rule);
+
+ loco::apply(&rules).to(graph.g.get());
+
+ // Verify
+ {
+ ASSERT_TRUE(loco::shape_known(tfl_node));
+ ASSERT_EQ(loco::shape_get(tfl_node).domain(), loco::Domain::Tensor);
+
+ auto shape = loco::shape_get(tfl_node).as<loco::TensorShape>();
+ ASSERT_EQ(shape.rank(), 2);
+ ASSERT_EQ(shape.dim(0), 3);
+ ASSERT_EQ(shape.dim(1), 4);
+ }
+}
+
+// based on the case shown in
+// https://www.corvil.com/kb/what-is-the-difference-between-same-and-valid-padding-in-tf-nn-max-pool-of-tensorflow
+TEST(TFLShapeInferenceRuleTest, avgpool2d_valid)
+{
+ exo::test::TestGraph graph;
+ auto tfl_node = graph.append<locoex::TFLAveragePool2D>(graph.pull);
+ graph.complete();
+
+ auto pull = graph.pull;
+ {
+ pull->shape({1, 4, 3, 1});
+ }
+ // setting TFLAveragePool2D
+ {
+ tfl_node->filter()->h(2);
+ tfl_node->filter()->w(2);
+ tfl_node->stride()->h(2);
+ tfl_node->stride()->w(2);
+ tfl_node->fusedActivationFunction(locoex::FusedActFunc::NONE);
+ tfl_node->padding(locoex::Padding::VALID);
+ }
+ ASSERT_FALSE(loco::shape_known(tfl_node));
+
+ // shape inference
+ locoex::TFLShapeInferenceRule tfl_rule;
+ loco::CanonicalShapeInferenceRule canonical_rule;
+ loco::MultiDialectShapeInferenceRule rules;
+
+ rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
+ .bind(locoex::TFLDialect::get(), &tfl_rule);
+
+ loco::apply(&rules).to(graph.g.get());
+
+ // Verify
+ {
+ ASSERT_TRUE(loco::shape_known(tfl_node));
+ ASSERT_EQ(loco::shape_get(tfl_node).domain(), loco::Domain::Tensor);
+
+ auto shape = loco::shape_get(tfl_node).as<loco::TensorShape>();
+ ASSERT_EQ(shape.rank(), 4);
+ ASSERT_EQ(shape.dim(0).value(), 1);
+ ASSERT_EQ(shape.dim(1).value(), 2);
+ ASSERT_EQ(shape.dim(2).value(), 1);
+ ASSERT_EQ(shape.dim(3).value(), 1);
+ }
+}
+
+TEST(TFLShapeInferenceRuleTest, avgpool2d_same)
+{
+ exo::test::TestGraph graph;
+ auto tfl_node = graph.append<locoex::TFLAveragePool2D>(graph.pull);
+ graph.complete();
+
+ auto pull = graph.pull;
+ {
+ pull->shape({1, 4, 3, 1});
+ }
+
+ // setting TFLAveragePool2D
+ {
+ tfl_node->filter()->h(2);
+ tfl_node->filter()->w(2);
+ tfl_node->stride()->h(2);
+ tfl_node->stride()->w(2);
+ tfl_node->fusedActivationFunction(locoex::FusedActFunc::NONE);
+ tfl_node->padding(locoex::Padding::SAME);
+ }
+
+ ASSERT_FALSE(loco::shape_known(tfl_node));
+
+ // shape inference
+ locoex::TFLShapeInferenceRule tfl_rule;
+ loco::CanonicalShapeInferenceRule canonical_rule;
+ loco::MultiDialectShapeInferenceRule rules;
+
+ rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
+ .bind(locoex::TFLDialect::get(), &tfl_rule);
+
+ loco::apply(&rules).to(graph.g.get());
+
+ // Verify
+ {
+ ASSERT_TRUE(loco::shape_known(tfl_node));
+ ASSERT_EQ(loco::shape_get(tfl_node).domain(), loco::Domain::Tensor);
+
+ auto shape = loco::shape_get(tfl_node).as<loco::TensorShape>();
+ ASSERT_EQ(shape.rank(), 4);
+ ASSERT_EQ(shape.dim(0).value(), 1);
+ ASSERT_EQ(shape.dim(1).value(), 2);
+ ASSERT_EQ(shape.dim(2).value(), 2);
+ ASSERT_EQ(shape.dim(3).value(), 1);
+ }
+}
+
+/**
+ * @note Function to test: Shape inference of two different input shapes
+ *
+ * Rank expansion to higher input side
+ * x(2,1,5) + y(3,5) --> x(2,1,5) + y(1,3,5)
+ * Do output shape inference like numpy
+ * x(2,1,5) + y(1,3,5) --> output(2,3,5)
+ * For each axis, dim value should be same OR one of them should be 1
+ */
+TEST(TFLShapeInferenceRuleTest, TFAdd_shapeinf_different)
+{
+ auto g = loco::make_graph();
+
+ auto x_node = g->nodes()->create<loco::Pull>();
+ {
+ x_node->rank(3);
+ x_node->dim(0) = 2;
+ x_node->dim(1) = 1;
+ x_node->dim(2) = 5;
+ }
+ auto y_node = g->nodes()->create<loco::Pull>();
+ {
+ y_node->rank(2);
+ y_node->dim(0) = 3;
+ y_node->dim(1) = 5;
+ }
+ auto tfl_node = g->nodes()->create<locoex::TFLAdd>();
+ {
+ tfl_node->x(x_node);
+ tfl_node->y(y_node);
+ }
+ auto push_node = g->nodes()->create<loco::Push>();
+ {
+ push_node->from(tfl_node);
+ }
+
+ auto x_input = g->inputs()->create();
+ {
+ x_input->name("x");
+ loco::link(x_input, x_node);
+ }
+ auto y_input = g->inputs()->create();
+ {
+ y_input->name("y");
+ loco::link(y_input, y_node);
+ }
+ auto output = g->outputs()->create();
+ {
+ output->name("output");
+ loco::link(output, push_node);
+ }
+
+ // pre-check
+ ASSERT_FALSE(loco::shape_known(tfl_node));
+
+ exo::ShapeInferencePass pass;
+ while (pass.run(g.get()) == true)
+ {
+ ;
+ }
+
+ // Verify
+ {
+ ASSERT_TRUE(loco::shape_known(tfl_node));
+ ASSERT_EQ(loco::shape_get(tfl_node).domain(), loco::Domain::Tensor);
+
+ auto shape = loco::shape_get(tfl_node).as<loco::TensorShape>();
+ ASSERT_EQ(shape.rank(), 3);
+ ASSERT_EQ(shape.dim(0), 2);
+ ASSERT_EQ(shape.dim(1), 3);
+ ASSERT_EQ(shape.dim(2), 5);
+ }
+}
+
+TEST(TFLShapeInferenceRuleTest, TFLTranspose_simple)
+{
+ exo::test::ExampleGraph<exo::test::ExampleGraphType::TFLTranspose> g;
+
+ g.pull->rank(4);
+ g.pull->dim(0) = 10;
+ g.pull->dim(1) = 20;
+ g.pull->dim(2) = 30;
+ g.pull->dim(3) = 40;
+
+ g.const_perm->dtype(loco::DataType::S32);
+ g.const_perm->rank(1);
+ g.const_perm->dim(0) = 4;
+ g.const_perm->size<loco::DataType::S32>(4);
+ g.const_perm->at<loco::DataType::S32>(0) = 2;
+ g.const_perm->at<loco::DataType::S32>(1) = 3;
+ g.const_perm->at<loco::DataType::S32>(2) = 0;
+ g.const_perm->at<loco::DataType::S32>(3) = 1;
+
+ // pre-check
+ ASSERT_FALSE(loco::shape_known(g.tfl_transpose));
+
+ exo::ShapeInferencePass pass;
+ while (pass.run(g.graph()) == true)
+ ;
+
+ // Verify
+ {
+ ASSERT_TRUE(loco::shape_known(g.tfl_transpose));
+
+ auto shape = loco::shape_get(g.tfl_transpose).as<loco::TensorShape>();
+ ASSERT_EQ(shape.rank(), 4);
+ ASSERT_EQ(shape.dim(0), 30);
+ ASSERT_EQ(shape.dim(1), 40);
+ ASSERT_EQ(shape.dim(2), 10);
+ ASSERT_EQ(shape.dim(3), 20);
+ }
+}
diff --git a/compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.cpp b/compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.cpp
new file mode 100644
index 000000000..3f123a6db
--- /dev/null
+++ b/compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.cpp
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFLTypeInferenceRule.h"
+
+#include "Dialect/IR/TFLDialect.h"
+#include "Dialect/IR/TFLNodeVisitor.h"
+#include "Dialect/IR/TFLNodes.h"
+
+#include <cassert>
+
+namespace
+{
+
+struct TypeInferenceAlgorithm final : public locoex::TFLNodeVisitor<loco::DataType>
+{
+ loco::DataType visit(const locoex::TFLAdd *node) final { return loco::dtype_get(node->x()); }
+
+ loco::DataType visit(const locoex::TFLAveragePool2D *node) final
+ {
+ return loco::dtype_get(node->value());
+ }
+
+ loco::DataType visit(const locoex::TFLConcatenation *node) final
+ {
+ // TODO Support when TFLConcatenation has 0 input
+ assert(node->numValues() > 0);
+
+ for (uint32_t i = 1; i < node->numValues(); ++i)
+ assert(loco::dtype_get(node->values(i - 1)) == loco::dtype_get(node->values(i)));
+
+ return loco::dtype_get(node->values(0));
+ }
+
+ loco::DataType visit(const locoex::TFLConst *node) final { return node->dtype(); }
+
+ loco::DataType visit(const locoex::TFLConv2D *node) final
+ {
+ return loco::dtype_get(node->input());
+ }
+
+ loco::DataType visit(const locoex::TFLDepthwiseConv2D *node) final
+ {
+ return loco::dtype_get(node->input());
+ }
+
+ loco::DataType visit(const locoex::TFLDiv *node) final { return loco::dtype_get(node->x()); }
+
+ loco::DataType visit(const locoex::TFLFullyConnected *node) final
+ {
+ return loco::dtype_get(node->input());
+ }
+
+ loco::DataType visit(const locoex::TFLMaximum *node) final { return loco::dtype_get(node->x()); }
+
+ loco::DataType visit(const locoex::TFLMaxPool2D *node) final
+ {
+ return loco::dtype_get(node->value());
+ }
+
+ loco::DataType visit(const locoex::TFLMean *node) final { return loco::dtype_get(node->input()); }
+
+ loco::DataType visit(const locoex::TFLMul *node) final { return loco::dtype_get(node->x()); }
+
+ loco::DataType visit(const locoex::TFLRelu *node) final
+ {
+ return loco::dtype_get(node->features());
+ }
+
+ loco::DataType visit(const locoex::TFLRelu6 *node) final
+ {
+ return loco::dtype_get(node->features());
+ }
+
+ loco::DataType visit(const locoex::TFLReshape *node) final
+ {
+ return loco::dtype_get(node->tensor());
+ }
+
+ loco::DataType visit(const locoex::TFLRsqrt *node) final { return loco::dtype_get(node->x()); }
+
+ // TODO TFLSoftmax
+
+ loco::DataType visit(const locoex::TFLSqrt *node) final { return loco::dtype_get(node->x()); }
+
+ loco::DataType visit(const locoex::TFLSquaredDifference *node) final
+ {
+ return loco::dtype_get(node->x());
+ }
+
+ loco::DataType visit(const locoex::TFLSub *node) final { return loco::dtype_get(node->x()); }
+
+ // TODO TFLTanh
+
+ loco::DataType visit(const locoex::TFLTranspose *node) final
+ {
+ return loco::dtype_get(node->a());
+ }
+
+ loco::DataType visit(const locoex::TFLTransposeConv *node) final
+ {
+ return loco::dtype_get(node->outBackprop());
+ }
+};
+
+} // namespace
+
+namespace locoex
+{
+
+bool TFLTypeInferenceRule::recognize(const loco::Dialect *d) const
+{
+ return TFLDialect::get() == d;
+}
+
+bool TFLTypeInferenceRule::infer(const loco::Node *node, loco::DataType &dtype) const
+{
+ assert(node->dialect() == TFLDialect::get());
+
+ TypeInferenceAlgorithm alg;
+
+ dtype = dynamic_cast<const TFLNode *>(node)->accept(&alg);
+ assert(dtype != loco::DataType::Unknown);
+
+ return true;
+}
+
+} // namespace locoex
diff --git a/compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.h b/compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.h
new file mode 100644
index 000000000..31765dcba
--- /dev/null
+++ b/compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_SERVICE_TFLTYPE_INFERENCE_RULE_H__
+#define __LOCOEX_SERVICE_TFLTYPE_INFERENCE_RULE_H__
+
+#include <loco/Service/TypeInference.h>
+
+namespace locoex
+{
+
+/**
+ * @brief Type Inference Rule for TFLDialect
+ */
+struct TFLTypeInferenceRule final : public loco::TypeInferenceRule
+{
+ bool recognize(const loco::Dialect *) const final;
+
+ bool infer(const loco::Node *, loco::DataType &) const final;
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_SERVICE_TFLTYPE_INFERENCE_RULE_H__
diff --git a/compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.test.cpp b/compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.test.cpp
new file mode 100644
index 000000000..dd1f93c4d
--- /dev/null
+++ b/compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.test.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dialect/IR/TFLNodes.h"
+#include "Dialect/IR/TFLDialect.h"
+#include "Dialect/Service/TFLTypeInferenceRule.h"
+
+#include "TestGraph.h"
+
+#include <loco.h>
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/Service/TypeInference.h>
+
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+TEST(TFLTypeInferenceRuleTest, minimal_with_TFLRelu)
+{
+ // Create a simple network
+ exo::test::TestGraph graph;
+ auto tfl_node = graph.append<locoex::TFLRelu>(graph.pull);
+ graph.complete(tfl_node);
+
+ graph.pull->dtype(loco::DataType::S32);
+
+ // pre-check
+ ASSERT_FALSE(loco::dtype_known(tfl_node));
+
+ // type inference
+ locoex::TFLTypeInferenceRule tfl_rule;
+ loco::CanonicalTypeInferenceRule canon_rule;
+ loco::MultiDialectTypeInferenceRule rules;
+
+ rules.bind(loco::CanonicalDialect::get(), &canon_rule);
+ rules.bind(locoex::TFLDialect::get(), &tfl_rule);
+
+ loco::apply(&rules).to(graph.g.get());
+
+ // Verify
+ ASSERT_TRUE(loco::dtype_known(tfl_node));
+ auto type = loco::dtype_get(tfl_node);
+ ASSERT_EQ(type, loco::DataType::S32);
+}
diff --git a/compiler/exo/src/ExoFormattedGraph.cpp b/compiler/exo/src/ExoFormattedGraph.cpp
new file mode 100644
index 000000000..5d3b18be1
--- /dev/null
+++ b/compiler/exo/src/ExoFormattedGraph.cpp
@@ -0,0 +1,525 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExoFormattedGraph.h"
+
+#include "Dialect/IR/TFLDialect.h"
+#include "Dialect/IR/TFLNodes.h"
+
+#include "Dialect/IR/CircleDialect.h"
+#include "Dialect/IR/CircleNodes.h"
+
+#include <locoex/Service/COpFormattedGraph.h>
+#include <pepper/str.h>
+
+#include <sstream>
+#include <cassert>
+
+// For TF lite
+namespace
+{
+
+const char *to_str(locoex::FusedActFunc fused)
+{
+ switch (fused)
+ {
+ case locoex::FusedActFunc::NONE:
+ return "NONE";
+ case locoex::FusedActFunc::RELU:
+ return "RELU";
+ case locoex::FusedActFunc::RELU6:
+ return "RELU6";
+ default:
+ return "Error";
+ }
+}
+
+const char *to_str(locoex::Padding padding)
+{
+ switch (padding)
+ {
+ case locoex::Padding::SAME:
+ return "SAME";
+ case locoex::Padding::VALID:
+ return "VALID";
+ default:
+ return "Error";
+ }
+}
+
+std::string to_str(const locoex::Stride *stride)
+{
+ return pepper::str(stride->h(), ",", stride->w());
+}
+
+std::string to_str(const locoex::Filter *filter)
+{
+ return pepper::str(filter->h(), ",", filter->w());
+}
+
+std::string tfl_opname(uint32_t opnum)
+{
+ static std::string prefix{"tfl."};
+
+ switch (static_cast<locoex::TFLOpcode>(opnum))
+ {
+#define TFL_NODE(OPCODE, CLASS) \
+ case locoex::TFLOpcode::OPCODE: \
+ return prefix + #OPCODE;
+#include "Dialect/IR/TFLNodes.lst"
+#undef TFL_NODE
+ default:
+ break;
+ };
+
+ return prefix + "Invalid";
+}
+
+// TFLNodeSummaryBuilder with default implementation
+class TFLNodeSummaryBuilderBase : public locop::NodeSummaryBuilder
+{
+public:
+ TFLNodeSummaryBuilderBase(const locop::SymbolTable *tbl) : _tbl{tbl}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool build(const loco::Node *, locop::NodeSummary &s) const final;
+
+protected:
+#define TFL_NODE(OPCODE, CLASS) \
+ virtual bool summary(const CLASS *, locop::NodeSummary &s) const \
+ { \
+ s.comments().append("Emitted by Default TFLNodeSummaryBuilder"); \
+ s.state(locop::NodeSummary::State::PartiallyKnown); \
+ return true; \
+ }
+#include "Dialect/IR/TFLNodes.lst"
+#undef TFL_NODE
+
+protected:
+ const locop::SymbolTable *tbl(void) const { return _tbl; }
+
+ // Please do not use _tbl directly and use tbl().
+ // This will be changed to private in near future.
+protected:
+ const locop::SymbolTable *_tbl;
+};
+
+class TFLNodeSummaryBuilder final : public TFLNodeSummaryBuilderBase
+{
+public:
+ TFLNodeSummaryBuilder(const locop::SymbolTable *tbl) : TFLNodeSummaryBuilderBase(tbl)
+ {
+ // DO NOTHING
+ }
+
+private:
+#define IMPLEMENT(CLASS) bool summary(const CLASS *, locop::NodeSummary &) const final;
+ IMPLEMENT(locoex::TFLAdd)
+ IMPLEMENT(locoex::TFLAveragePool2D)
+ IMPLEMENT(locoex::TFLConcatenation)
+ IMPLEMENT(locoex::TFLConst)
+ IMPLEMENT(locoex::TFLConv2D)
+ IMPLEMENT(locoex::TFLDepthwiseConv2D)
+ IMPLEMENT(locoex::TFLDiv)
+ IMPLEMENT(locoex::TFLMaximum)
+ IMPLEMENT(locoex::TFLMaxPool2D)
+ IMPLEMENT(locoex::TFLMean)
+ IMPLEMENT(locoex::TFLMul)
+ IMPLEMENT(locoex::TFLRelu)
+ IMPLEMENT(locoex::TFLRelu6)
+ IMPLEMENT(locoex::TFLReshape)
+ IMPLEMENT(locoex::TFLRsqrt)
+ IMPLEMENT(locoex::TFLSqrt)
+ IMPLEMENT(locoex::TFLSquaredDifference)
+ IMPLEMENT(locoex::TFLSub)
+ IMPLEMENT(locoex::TFLTranspose)
+ IMPLEMENT(locoex::TFLTransposeConv)
+#undef IMPLEMENT
+};
+
+bool TFLNodeSummaryBuilderBase::build(const loco::Node *node, locop::NodeSummary &s) const
+{
+ if (node->dialect() != locoex::TFLDialect::get())
+ return false;
+
+#define TFL_NODE(OPCODE, CLASS) \
+ if (dynamic_cast<const CLASS *>(node)) \
+ { \
+ s.opname(tfl_opname(node->opnum())); \
+ return summary(dynamic_cast<const CLASS *>(node), s); \
+ }
+#include "Dialect/IR/TFLNodes.lst"
+#undef TFL_NODE
+
+ return false;
+}
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLAdd *node, locop::NodeSummary &s) const
+{
+ assert(node->fusedActivationFunction() != locoex::FusedActFunc::UNDEFINED);
+
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.args().append("y", tbl()->lookup(node->y()));
+ s.args().append("fused_activation_function", to_str(node->fusedActivationFunction()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLAveragePool2D *node,
+ locop::NodeSummary &s) const
+{
+ assert(node->fusedActivationFunction() != locoex::FusedActFunc::UNDEFINED);
+
+ s.args().append("value", tbl()->lookup(node->value()));
+ s.args().append("filter(h,w)", to_str(node->filter()));
+ s.args().append("stride(h,w)", to_str(node->stride()));
+ s.args().append("padding", to_str(node->padding()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+
+ s.state(locop::NodeSummary::State::Complete);
+
+ return true;
+}
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLConcatenation *node,
+ locop::NodeSummary &s) const
+{
+ assert(node->fusedActivationFunction() != locoex::FusedActFunc::UNDEFINED);
+
+ for (uint32_t i = 0; i < node->numValues(); ++i)
+ s.args().append("values", tbl()->lookup(node->values(i)));
+ s.args().append("axis", pepper::str(node->axis()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLConst *, locop::NodeSummary &s) const
+{
+ s.state(locop::NodeSummary::State::PartiallyKnown);
+ return true;
+}
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLConv2D *node, locop::NodeSummary &s) const
+{
+ assert(node->fusedActivationFunction() != locoex::FusedActFunc::UNDEFINED);
+ assert(node->padding() != locoex::Padding::UNDEFINED);
+
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.args().append("filter", tbl()->lookup(node->filter()));
+ s.args().append("bias", tbl()->lookup(node->bias()));
+
+ s.args().append("stride(h,w)", to_str(node->stride()));
+ s.args().append("padding", to_str(node->padding()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+
+ s.state(locop::NodeSummary::State::Complete);
+
+ return true;
+}
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLDepthwiseConv2D *node,
+ locop::NodeSummary &s) const
+{
+ assert(node->fusedActivationFunction() != locoex::FusedActFunc::UNDEFINED);
+ assert(node->padding() != locoex::Padding::UNDEFINED);
+
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.args().append("filter", tbl()->lookup(node->filter()));
+ s.args().append("bias", tbl()->lookup(node->bias()));
+
+ s.args().append("stride(h,w)", to_str(node->stride()));
+ s.args().append("padding", to_str(node->padding()));
+ s.args().append("depthMultiplier", std::to_string(node->depthMultiplier()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+
+ s.state(locop::NodeSummary::State::Complete);
+
+ return true;
+}
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLDiv *node, locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.args().append("y", tbl()->lookup(node->y()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLMaximum *node, locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.args().append("y", tbl()->lookup(node->y()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLMaxPool2D *node, locop::NodeSummary &s) const
+{
+ assert(node->fusedActivationFunction() != locoex::FusedActFunc::UNDEFINED);
+
+ s.args().append("value", tbl()->lookup(node->value()));
+ s.args().append("filter(h,w)", to_str(node->filter()));
+ s.args().append("stride(h,w)", to_str(node->stride()));
+ s.args().append("padding", to_str(node->padding()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+
+ s.state(locop::NodeSummary::State::Complete);
+
+ return true;
+}
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLMean *node, locop::NodeSummary &s) const
+{
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.args().append("reduction_indices", tbl()->lookup(node->reduction_indices()));
+ s.args().append("keep_dims", node->keep_dims() ? "true" : "false");
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLMul *node, locop::NodeSummary &s) const
+{
+ assert(node->fusedActivationFunction() != locoex::FusedActFunc::UNDEFINED);
+
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.args().append("y", tbl()->lookup(node->y()));
+ s.args().append("fused_activation_function", to_str(node->fusedActivationFunction()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLRelu *node, locop::NodeSummary &s) const
+{
+ s.args().append("features", tbl()->lookup(node->features()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLRelu6 *node, locop::NodeSummary &s) const
+{
+ s.args().append("features", tbl()->lookup(node->features()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLReshape *node, locop::NodeSummary &s) const
+{
+ s.args().append("tensor", tbl()->lookup(node->tensor()));
+ s.args().append("shape", tbl()->lookup(node->shape()));
+ // TODO Show newShape info
+ s.state(locop::NodeSummary::State::PartiallyKnown);
+ return true;
+}
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLRsqrt *node, locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+// TODO TFLSoftmax
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLSqrt *node, locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLSquaredDifference *node,
+ locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.args().append("y", tbl()->lookup(node->y()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLSub *node, locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.args().append("y", tbl()->lookup(node->y()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+// TODO TFLTanh
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLTranspose *node, locop::NodeSummary &s) const
+{
+ s.args().append("a", tbl()->lookup(node->a()));
+ s.args().append("perm", tbl()->lookup(node->perm()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFLNodeSummaryBuilder::summary(const locoex::TFLTransposeConv *node,
+ locop::NodeSummary &s) const
+{
+ assert(node->padding() != locoex::Padding::UNDEFINED);
+
+ s.args().append("inputSizes", tbl()->lookup(node->inputSizes()));
+ s.args().append("filter", tbl()->lookup(node->filter()));
+ s.args().append("outBackprop", tbl()->lookup(node->outBackprop()));
+
+ s.args().append("stride(h,w)", to_str(node->stride()));
+ s.args().append("padding", to_str(node->padding()));
+
+ s.state(locop::NodeSummary::State::Complete);
+
+ return true;
+}
+
+} // namespace
+
+// For Circle
+namespace
+{
+
+std::string circle_opname(uint32_t opnum)
+{
+ static std::string prefix{"circle."};
+
+ switch (static_cast<locoex::CircleOpcode>(opnum))
+ {
+#define CIRCLE_NODE(OPCODE, CLASS) \
+ case locoex::CircleOpcode::OPCODE: \
+ return prefix + #OPCODE;
+#include "Dialect/IR/CircleNodes.lst"
+#undef CIRCLE_NODE
+ default:
+ break;
+ };
+
+ return prefix + "Invalid";
+}
+
+// CircleNodeSummaryBuilder with default implementation
+class CircleNodeSummaryBuilderBase : public locop::NodeSummaryBuilder
+{
+public:
+ CircleNodeSummaryBuilderBase(const locop::SymbolTable *tbl) : _tbl{tbl}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool build(const loco::Node *, locop::NodeSummary &s) const final;
+
+protected:
+#define CIRCLE_NODE(OPCODE, CLASS) \
+ virtual bool summary(const CLASS *, locop::NodeSummary &s) const \
+ { \
+ s.comments().append("Emitted by Default CircleNodeSummaryBuilder"); \
+ s.state(locop::NodeSummary::State::PartiallyKnown); \
+ return true; \
+ }
+#include "Dialect/IR/CircleNodes.lst"
+#undef CIRCLE_NODE
+
+protected:
+ const locop::SymbolTable *tbl(void) const { return _tbl; }
+
+ // Please do not use _tbl directly and use tbl().
+ // This will be changed to private in near future.
+protected:
+ const locop::SymbolTable *_tbl;
+};
+
+class CircleNodeSummaryBuilder final : public CircleNodeSummaryBuilderBase
+{
+public:
+ CircleNodeSummaryBuilder(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
+ {
+ // DO NOTHING
+ }
+
+private:
+#define IMPLEMENT(CLASS) bool summary(const CLASS *, locop::NodeSummary &) const final;
+ IMPLEMENT(locoex::CircleInstanceNorm)
+#undef IMPLEMENT
+};
+
+bool CircleNodeSummaryBuilderBase::build(const loco::Node *node, locop::NodeSummary &s) const
+{
+ if (node->dialect() != locoex::CircleDialect::get())
+ return false;
+
+#define CIRCLE_NODE(OPCODE, CLASS) \
+ if (dynamic_cast<const CLASS *>(node)) \
+ { \
+ s.opname(circle_opname(node->opnum())); \
+ return summary(dynamic_cast<const CLASS *>(node), s); \
+ }
+#include "Dialect/IR/CircleNodes.lst"
+#undef CIRCLE_NODE
+
+ return false;
+}
+
+bool CircleNodeSummaryBuilder::summary(const locoex::CircleInstanceNorm *node,
+ locop::NodeSummary &s) const
+{
+ auto fused = node->fusedActivationFunction();
+ assert(fused != locoex::FusedActFunc::UNDEFINED);
+
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.args().append("gamma", tbl()->lookup(node->gamma()));
+ s.args().append("beta", tbl()->lookup(node->beta()));
+ s.args().append("epsilon", pepper::str(node->epsilon()));
+ s.args().append("fused_activation_function", to_str(fused));
+
+ s.state(locop::NodeSummary::State::Complete);
+
+ return true;
+}
+
+} // namespace
+
+namespace exo
+{
+
+bool NodeSummaryBuilder::build(const loco::Node *node, locop::NodeSummary &s) const
+{
+ if (locop::CanonicalNodeSummaryBuilder(_tbl).build(node, s))
+ {
+ return true;
+ }
+
+ if (TFLNodeSummaryBuilder(_tbl).build(node, s))
+ {
+ return true;
+ }
+
+ if (CircleNodeSummaryBuilder(_tbl).build(node, s))
+ {
+ return true;
+ }
+
+ if (locoex::COpNodeSummaryBuilder(_tbl).build(node, s))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/ExoFormattedGraph.h b/compiler/exo/src/ExoFormattedGraph.h
new file mode 100644
index 000000000..714e483b5
--- /dev/null
+++ b/compiler/exo/src/ExoFormattedGraph.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __EXO_FORMATTED_GRAPH_H__
+#define __EXO_FORMATTED_GRAPH_H__
+
+#include <locop/FormattedGraph.h>
+
+#include <stdex/Memory.h>
+
+namespace exo
+{
+
+class NodeSummaryBuilder final : public locop::NodeSummaryBuilder
+{
+public:
+ NodeSummaryBuilder(const locop::SymbolTable *tbl) : _tbl{tbl}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool build(const loco::Node *node, locop::NodeSummary &s) const final;
+
+private:
+ const locop::SymbolTable *_tbl;
+};
+
+class NodeSummaryBuilderFactory final : public locop::NodeSummaryBuilderFactory
+{
+public:
+ NodeSummaryBuilderFactory() = default;
+
+public:
+ std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *tlb) const final
+ {
+ return stdex::make_unique<NodeSummaryBuilder>(tlb);
+ }
+};
+
+} // namespace exo
+
+#endif // __EXO_FORMATTED_GRAPH_H__
diff --git a/compiler/exo/src/ExoOptimize.cpp b/compiler/exo/src/ExoOptimize.cpp
new file mode 100644
index 000000000..d7278e900
--- /dev/null
+++ b/compiler/exo/src/ExoOptimize.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExoOptimize.h"
+
+#include "Knob.h"
+#include "Passes.h"
+#include "ProgressReporter.h"
+
+#include <logo/Phase.h>
+
+#include <stdex/Memory.h>
+
+namespace exo
+{
+
+void optimize(loco::Graph *g)
+{
+ logo::Phase phase;
+ {
+ // prepare type and shape before optimization
+ phase.emplace_back(stdex::make_unique<TypeInferencePass>());
+ phase.emplace_back(stdex::make_unique<ShapeInferencePass>());
+
+ phase.emplace_back(stdex::make_unique<FoldReshapeOfConstPass>());
+ phase.emplace_back(stdex::make_unique<FoldTransposeOfConstPass>());
+
+ if (get<Knob::UseFuseBiasAddPass>())
+ {
+ phase.emplace_back(stdex::make_unique<FuseBiasAddPass>());
+ }
+
+ if (get<Knob::UseFuseInstanceNormPass>())
+ {
+ phase.emplace_back(stdex::make_unique<FuseInstanceNormPass>());
+ }
+
+ if (get<Knob::UseFuseReluPass>())
+ {
+ phase.emplace_back(stdex::make_unique<FuseReluPass>());
+ }
+ phase.emplace_back(stdex::make_unique<FuseRsqrtPass>());
+
+ if (get<Knob::UseFuseSquaredDifferencePass>())
+ {
+ phase.emplace_back(stdex::make_unique<FuseSquaredDifferencePass>());
+ }
+
+ phase.emplace_back(stdex::make_unique<MergeConcatNodesPass>());
+
+ phase.emplace_back(stdex::make_unique<logo::RemoveDeadNodePass>());
+ }
+
+ logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
+
+ ProgressReporter prog(g, logo::PhaseStrategy::Restart);
+ phase_runner.attach(&prog);
+ phase_runner.run(phase);
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/ExoOptimize.h b/compiler/exo/src/ExoOptimize.h
new file mode 100644
index 000000000..4769c1193
--- /dev/null
+++ b/compiler/exo/src/ExoOptimize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OPTIMIZE_H__
+#define __OPTIMIZE_H__
+
+#include <loco.h>
+
+namespace exo
+{
+
+/**
+ * @brief Run passes for a graph after completion of converting canonical nodes into TFL nodes.
+ *
+ * TODO Separate optimize pass dedicated to TFL and Circle dialect when necessary
+ */
+void optimize(loco::Graph *);
+
+} // namespace exo
+
+#endif // __OPTIMIZE_H__
diff --git a/compiler/exo/src/ExporterUtils.cpp b/compiler/exo/src/ExporterUtils.cpp
new file mode 100644
index 000000000..41ccdcd71
--- /dev/null
+++ b/compiler/exo/src/ExporterUtils.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExporterUtils.h"
+
+#include <oops/InternalExn.h>
+
+#include <cassert>
+
+namespace exo
+{
+
+ShapeDescription to_shape_description(const loco::TensorShape &shape)
+{
+ ShapeDescription res;
+
+ res._rank_known = true;
+
+ res._dims.resize(shape.rank());
+ for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+ {
+ // All the dimensions SHOULD be known
+ assert(shape.dim(axis).known());
+ res._dims.at(axis) = shape.dim(axis).value();
+ }
+
+ return res;
+}
+
+ShapeDescription to_shape_description(const loco::FeatureShape &shape)
+{
+ ShapeDescription res;
+
+ res._rank_known = true;
+
+ // T/F Lite encodes a feature map as a NHWC tensor
+ res._dims.resize(4);
+ res._dims.at(0) = shape.count().value();
+ res._dims.at(1) = shape.height().value();
+ res._dims.at(2) = shape.width().value();
+ res._dims.at(3) = shape.depth().value();
+
+ return res;
+}
+
+ShapeDescription to_shape_description(const loco::FilterShape &shape)
+{
+ ShapeDescription res;
+
+ res._rank_known = true;
+
+ // T/F Lite encodes a convolution filter as a NHWC tensor
+ res._dims.resize(4);
+ res._dims.at(0) = shape.count().value();
+ res._dims.at(1) = shape.height().value();
+ res._dims.at(2) = shape.width().value();
+ res._dims.at(3) = shape.depth().value();
+
+ return res;
+}
+
+ShapeDescription to_shape_description(const loco::DepthwiseFilterShape &shape)
+{
+ ShapeDescription res;
+
+ res._rank_known = true;
+
+ // T/F Lite encodes a depthwise convolution filter as a [1, H, W, C*M] tensor
+ res._dims.resize(4);
+ res._dims.at(0) = 1;
+ res._dims.at(1) = shape.height().value();
+ res._dims.at(2) = shape.width().value();
+ res._dims.at(3) = shape.depth().value() * shape.multiplier().value();
+
+ return res;
+}
+
+ShapeDescription to_shape_description(const loco::BiasShape &shape)
+{
+ ShapeDescription res;
+
+ res._rank_known = true;
+
+ res._dims.resize(1);
+ res._dims.at(0) = shape.length().value();
+
+ return res;
+}
+
+ShapeDescription to_shape_description(const loco::MatrixShape &shape)
+{
+ ShapeDescription res;
+
+ res._rank_known = true;
+
+ res._dims.resize(2);
+ res._dims.at(0) = shape.height().value();
+ res._dims.at(1) = shape.width().value();
+
+ return res;
+}
+
+ShapeDescription to_shape_description(const loco::NodeShape &shape)
+{
+ switch (shape.domain())
+ {
+ case loco::Domain::Tensor:
+ return to_shape_description(shape.as<loco::TensorShape>());
+ case loco::Domain::Feature:
+ return to_shape_description(shape.as<loco::FeatureShape>());
+ case loco::Domain::Filter:
+ return to_shape_description(shape.as<loco::FilterShape>());
+ case loco::Domain::DepthwiseFilter:
+ return to_shape_description(shape.as<loco::DepthwiseFilterShape>());
+ case loco::Domain::Bias:
+ return to_shape_description(shape.as<loco::BiasShape>());
+ case loco::Domain::Matrix:
+ return to_shape_description(shape.as<loco::MatrixShape>());
+ default:
+ break;
+ }
+
+ INTERNAL_EXN_V("Unsupported loco domain", oops::to_uint32(shape.domain()));
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/ExporterUtils.h b/compiler/exo/src/ExporterUtils.h
new file mode 100644
index 000000000..e1f1f66a8
--- /dev/null
+++ b/compiler/exo/src/ExporterUtils.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __EXPORTER_UTILS_H__
+#define __EXPORTER_UTILS_H__
+
+#include "loco.h"
+
+#include "loco/IR/PermutingCodec.h"
+#include "loco/IR/NodeShape.h"
+
+namespace exo
+{
+
+struct ShapeDescription
+{
+ std::vector<int32_t> _dims;
+ bool _rank_known;
+};
+
+ShapeDescription to_shape_description(const loco::TensorShape &shape);
+ShapeDescription to_shape_description(const loco::FeatureShape &shape);
+ShapeDescription to_shape_description(const loco::FilterShape &shape);
+ShapeDescription to_shape_description(const loco::BiasShape &shape);
+ShapeDescription to_shape_description(const loco::MatrixShape &shape);
+ShapeDescription to_shape_description(const loco::NodeShape &shape);
+
+template <typename Permutation> inline bool isNHWC(Permutation *perm);
+
+template <> inline bool isNHWC(loco::Permutation<loco::Domain::Feature> *perm)
+{
+ return perm->axis(loco::FeatureAxis::Count) == 0 && perm->axis(loco::FeatureAxis::Height) == 1 &&
+ perm->axis(loco::FeatureAxis::Width) == 2 && perm->axis(loco::FeatureAxis::Depth) == 3;
+}
+
+template <> inline bool isNHWC(loco::Permutation<loco::Domain::Filter> *perm)
+{
+ return perm->axis(loco::FilterAxis::Count) == 0 && perm->axis(loco::FilterAxis::Height) == 1 &&
+ perm->axis(loco::FilterAxis::Width) == 2 && perm->axis(loco::FilterAxis::Depth) == 3;
+}
+
+} // namespace exo
+
+#endif // __EXPORTER_UTILS_H__
diff --git a/compiler/exo/src/GraphBlock.cpp b/compiler/exo/src/GraphBlock.cpp
new file mode 100644
index 000000000..0a45ce8ad
--- /dev/null
+++ b/compiler/exo/src/GraphBlock.cpp
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GraphBlock.h"
+
+#include "Check.h"
+
+#include <loco.h>
+#include <stdex/Memory.h>
+
+namespace
+{
+
+template <exo::FeatureLayout T> loco::Permutation<loco::Domain::Feature> perm();
+
+template <> loco::Permutation<loco::Domain::Feature> perm<exo::FeatureLayout::NHWC>()
+{
+ // Make NHWC permutation for encoder and decoder
+ loco::Permutation<loco::Domain::Feature> NHWC;
+
+ NHWC.axis(loco::FeatureAxis::Count) = 0;
+ NHWC.axis(loco::FeatureAxis::Height) = 1;
+ NHWC.axis(loco::FeatureAxis::Width) = 2;
+ NHWC.axis(loco::FeatureAxis::Depth) = 3;
+
+ return NHWC;
+}
+
+template <exo::FilterLayout T> loco::Permutation<loco::Domain::Filter> perm();
+
+template <> loco::Permutation<loco::Domain::Filter> perm<exo::FilterLayout::HWIO>()
+{
+ loco::Permutation<loco::Domain::Filter> HWIO; // a.k.a., HWCN
+
+ HWIO.axis(loco::FilterAxis::Height) = 0;
+ HWIO.axis(loco::FilterAxis::Width) = 1;
+ HWIO.axis(loco::FilterAxis::Depth) = 2;
+ HWIO.axis(loco::FilterAxis::Count) = 3;
+
+ return HWIO;
+}
+
+template <> loco::Permutation<loco::Domain::Filter> perm<exo::FilterLayout::OHWI>()
+{
+
+ // Make NHWC permutation for encoder and decoder
+ loco::Permutation<loco::Domain::Filter> OHWI; // a.k.a., NHWC
+
+ OHWI.axis(loco::FilterAxis::Count) = 0;
+ OHWI.axis(loco::FilterAxis::Height) = 1;
+ OHWI.axis(loco::FilterAxis::Width) = 2;
+ OHWI.axis(loco::FilterAxis::Depth) = 3;
+
+ return OHWI;
+}
+
+template <exo::DepthwiseFilterLayout T> loco::Permutation<loco::Domain::DepthwiseFilter> perm();
+
+template <>
+loco::Permutation<loco::Domain::DepthwiseFilter> perm<exo::DepthwiseFilterLayout::HWCM>()
+{
+ loco::Permutation<loco::Domain::DepthwiseFilter> HWCM;
+
+ HWCM.axis(loco::DepthwiseFilterAxis::Height) = 0;
+ HWCM.axis(loco::DepthwiseFilterAxis::Width) = 1;
+ HWCM.axis(loco::DepthwiseFilterAxis::Depth) = 2;
+ HWCM.axis(loco::DepthwiseFilterAxis::Multiplier) = 3;
+
+ return HWCM;
+}
+
+template <exo::MatrixLayout T> loco::Permutation<loco::Domain::Matrix> perm();
+
+template <> loco::Permutation<loco::Domain::Matrix> perm<exo::MatrixLayout::HW>()
+{
+ loco::Permutation<loco::Domain::Matrix> HW;
+
+ HW.axis(loco::MatrixAxis::Height) = 0;
+ HW.axis(loco::MatrixAxis::Width) = 1;
+
+ return HW;
+}
+
+template <> loco::Permutation<loco::Domain::Matrix> perm<exo::MatrixLayout::WH>()
+{
+ loco::Permutation<loco::Domain::Matrix> WH;
+
+ WH.axis(loco::MatrixAxis::Height) = 1;
+ WH.axis(loco::MatrixAxis::Width) = 0;
+
+ return WH;
+}
+
+} // namespace
+
+namespace exo
+{
+
+template <FeatureLayout T> loco::FeatureEncode *make_feature_encode(loco::Node *input_for_encode)
+{
+ EXO_ASSERT(input_for_encode != nullptr, "input should not be nullptr");
+ loco::Graph *g = input_for_encode->graph();
+
+ auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
+
+ encoder->perm(perm<T>());
+
+ auto enc = g->nodes()->create<loco::FeatureEncode>();
+ enc->input(input_for_encode);
+ enc->encoder(std::move(encoder));
+
+ return enc;
+}
+
+template <FeatureLayout T> loco::FeatureDecode *make_feature_decode(loco::Node *input_for_decode)
+{
+ EXO_ASSERT(input_for_decode != nullptr, "input should not be nullptr");
+ loco::Graph *g = input_for_decode->graph();
+
+ auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
+
+ decoder->perm(perm<T>());
+
+ auto dec = g->nodes()->create<loco::FeatureDecode>();
+ dec->input(input_for_decode);
+ dec->decoder(std::move(decoder));
+
+ return dec;
+}
+
+template <FilterLayout T> loco::FilterEncode *make_filter_encode(loco::Node *input_for_encode)
+{
+ EXO_ASSERT(input_for_encode != nullptr, "filter should not be nullptr");
+ loco::Graph *g = input_for_encode->graph();
+
+ auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+
+ encoder->perm(perm<T>());
+
+ auto enc = g->nodes()->create<loco::FilterEncode>();
+ enc->input(input_for_encode);
+ enc->encoder(std::move(encoder));
+
+ return enc;
+}
+
+template <FilterLayout T> loco::FilterDecode *make_filter_decode(loco::Node *input_for_decode)
+{
+ EXO_ASSERT(input_for_decode != nullptr, "filter should not be nullptr");
+ loco::Graph *g = input_for_decode->graph();
+
+ auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Filter>>();
+
+ decoder->perm(perm<T>());
+
+ auto dec = g->nodes()->create<loco::FilterDecode>();
+ dec->input(input_for_decode);
+ dec->decoder(std::move(decoder));
+
+ return dec;
+}
+
+template <DepthwiseFilterLayout T>
+loco::DepthwiseFilterDecode *make_dw_filter_decode(loco::Node *input_for_decode)
+{
+ EXO_ASSERT(input_for_decode != nullptr, "filter should not be nullptr");
+ loco::Graph *g = input_for_decode->graph();
+
+ auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::DepthwiseFilter>>();
+
+ decoder->perm(perm<T>());
+
+ auto dec = g->nodes()->create<loco::DepthwiseFilterDecode>();
+ dec->input(input_for_decode);
+ dec->decoder(std::move(decoder));
+
+ return dec;
+}
+
+template <MatrixLayout T> loco::MatrixEncode *make_matrix_encode(loco::Node *input_for_encode)
+{
+ EXO_ASSERT(input_for_encode != nullptr, "input should not be nullptr");
+ loco::Graph *g = input_for_encode->graph();
+
+ auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Matrix>>();
+
+ encoder->perm(perm<T>());
+
+ auto enc = g->nodes()->create<loco::MatrixEncode>();
+ enc->input(input_for_encode);
+ enc->encoder(std::move(encoder));
+
+ return enc;
+}
+
+template <MatrixLayout T> loco::MatrixDecode *make_matrix_decode(loco::Node *input_for_decode)
+{
+ EXO_ASSERT(input_for_decode != nullptr, "input should not be nullptr");
+ loco::Graph *g = input_for_decode->graph();
+
+ auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Matrix>>();
+
+ decoder->perm(perm<T>());
+
+ auto dec = g->nodes()->create<loco::MatrixDecode>();
+ dec->input(input_for_decode);
+ dec->decoder(std::move(decoder));
+
+ return dec;
+}
+
+// template instantiation
+template loco::FeatureEncode *
+make_feature_encode<FeatureLayout::NHWC>(loco::Node *input_for_encode);
+
+template loco::FeatureDecode *
+make_feature_decode<FeatureLayout::NHWC>(loco::Node *input_for_encode);
+
+template loco::FilterEncode *make_filter_encode<FilterLayout::HWIO>(loco::Node *input_for_encode);
+template loco::FilterDecode *make_filter_decode<FilterLayout::OHWI>(loco::Node *input_for_decode);
+
+template loco::DepthwiseFilterDecode *
+make_dw_filter_decode<DepthwiseFilterLayout::HWCM>(loco::Node *input_for_decode);
+
+template loco::MatrixEncode *make_matrix_encode<MatrixLayout::HW>(loco::Node *input_for_encode);
+template loco::MatrixEncode *make_matrix_encode<MatrixLayout::WH>(loco::Node *input_for_encode);
+template loco::MatrixDecode *make_matrix_decode<MatrixLayout::HW>(loco::Node *input_for_decode);
+template loco::MatrixDecode *make_matrix_decode<MatrixLayout::WH>(loco::Node *input_for_decode);
+
+} // namespace exo
diff --git a/compiler/exo/src/GraphBlock.h b/compiler/exo/src/GraphBlock.h
new file mode 100644
index 000000000..b771c821b
--- /dev/null
+++ b/compiler/exo/src/GraphBlock.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __GRAPH_BLOCK_H__
+#define __GRAPH_BLOCK_H__
+
+#include <loco.h>
+#include <loco/Service/ShapeInference.h>
+
+#include <oops/InternalExn.h>
+
+#include <functional>
+
+namespace exo
+{
+
+/// @brief feature layout of TFLITE file
+enum class FeatureLayout
+{
+ NHWC,
+};
+
+/// @brief Creates a loco::FeatureEncode with T layout (NHWC for tflite) and add it to graph.
+template <FeatureLayout T> loco::FeatureEncode *make_feature_encode(loco::Node *input_for_encode);
+
+/// @brief Creates a loco::FeatureDecode with T layout (NHWC for tflite) and add it to graph.
+template <FeatureLayout T> loco::FeatureDecode *make_feature_decode(loco::Node *input_for_decode);
+
+enum class FilterLayout
+{
+ OHWI, // a.k.a., NHWC, Tensorflow Lite uses this layout for filter
+ HWIO, // a.k.a., HWCN, Tensorflow uses this layout for filter
+};
+
+/// @brief Create a loco::FilterEncode of given layout
+template <FilterLayout T> loco::FilterEncode *make_filter_encode(loco::Node *input_for_encode);
+
+/// @brief Create a loco::FilterDecode of given layout
+template <FilterLayout T> loco::FilterDecode *make_filter_decode(loco::Node *input_for_decode);
+
+enum class DepthwiseFilterLayout
+{
+ HWCM,
+};
+
+/// @brief Create a loco::DepthwiseFilterDecode of given layout
+template <DepthwiseFilterLayout T>
+loco::DepthwiseFilterDecode *make_dw_filter_decode(loco::Node *input_for_decode);
+
+enum class MatrixLayout
+{
+ HW,
+ WH
+};
+
+/// @brief Create a loco::MatrixEncode of given layout
+template <MatrixLayout T> loco::MatrixEncode *make_matrix_encode(loco::Node *input_for_encode);
+
+/// @brief Create a loco::MatrixDecode of given layout
+template <MatrixLayout T> loco::MatrixDecode *make_matrix_decode(loco::Node *input_for_decode);
+
+} // exo
+
+//
+// DomainConverter
+//
+
+/**
+ * Some canonical nodes can have input of various loco::Domain, e.g., loco::Domain::Tensor,
+ * loco::Domain::Feature, etc. However, TFL node accepts only loco::Domain::Tensor.
+ * So, When converting such canonical node to TFL node and input(s) of a canonical node are not
+ * loco::Domain::Tensor, additional nodes need to be inserted.
+ *
+ * The following two classes helps this insertion.
+ *
+ * For example, in case of loco::Relu conversion,
+ *
+ * Before:
+ *
+ * A (output: feature) -- loco::ReLU --- B (input:feature)
+ *
+ * After:
+ *
+ * A -- loco::FeatureDecode -- locoex::TFLRelu -- loco::FeatureEncode --- B
+ *
+ * loco::ReLU (dead node)
+ */
+
+namespace exo
+{
+
+/**
+ * @brief Handles input(s) while converting a canonical node to TFL node(s).
+ * This class informs DomainConverter how to handle inputs of a specific canonical node.
+ */
+template <class CanonicalT, class TFLT> class InputHandler
+{
+public:
+ /**
+ * @brief Assign origin's inputs to replacer's inputs.
+ * (This is called when origin belongs in Tensor domain.)
+ */
+ virtual void handover(CanonicalT *origin, TFLT *replacer) = 0;
+
+ /**
+ * @brief Returns the list of inputs that needs to have FeatureDecode as its input.
+ * (This is called when origin belongs in Feature domain.)
+ */
+ virtual std::vector<loco::Node *> getInputsToConvert(CanonicalT *origin) = 0;
+
+ /// @brief Set the inputs of replacer to new_inputs
+ virtual void set(TFLT *replacer, std::vector<loco::Node *> &new_inputs) = 0;
+
+ /// @brief Set the inputs to nullptr
+ virtual void nullify(CanonicalT *origin) = 0;
+};
+
+/**
+ * @brief Class to handle domain conversion while converting a canonical node to TFL node(s)
+ */
+template <class CanonicalT, class TFLT> class DomainConverter
+{
+public:
+ template <FeatureLayout FeatureLayoutT>
+ TFLT *convert(CanonicalT *origin, InputHandler<CanonicalT, TFLT> &input_handler);
+};
+
+/**
+ * @brief Performs domain conversion
+ *
+ * 1. if origin belong to loco::Domain::Tensor, and replace origin to a TFL node.
+ * 2. if origin belong to loco::Domain::Feature, insert loco::FeatureDecode for input(s) and
+ * insert loco::FeatureEncode for output. Then replace origin to a TFL node.
+ *
+ * @return new TFL node; nullptr if shape of origin cannot be known
+ */
+template <class CanonicalT, class TFLT>
+template <FeatureLayout FeatureLayoutT>
+TFLT *DomainConverter<CanonicalT, TFLT>::convert(CanonicalT *origin,
+ InputHandler<CanonicalT, TFLT> &input_handler)
+{
+ static_assert(FeatureLayoutT == FeatureLayout::NHWC, "Feature layout should be NHWC");
+
+ if (!loco::shape_known(origin))
+ {
+ return nullptr;
+ }
+
+ auto tfl_node = origin->graph()->nodes()->template create<TFLT>();
+
+ // when the input is Tensor, just replace canonical node to TFL node.
+ if (loco::shape_get(origin).domain() == loco::Domain::Tensor)
+ {
+ input_handler.handover(origin, tfl_node);
+
+ loco::replace(origin).with(tfl_node);
+ input_handler.nullify(origin);
+
+ return tfl_node;
+ }
+ else if (loco::shape_get(origin).domain() == loco::Domain::Feature)
+ {
+ std::vector<loco::Node *> feature_decodes;
+
+ for (auto input : input_handler.getInputsToConvert(origin))
+ {
+ auto dec = make_feature_decode<FeatureLayoutT>(input);
+ feature_decodes.emplace_back(dec);
+ }
+
+ input_handler.set(tfl_node, feature_decodes);
+
+ auto enc = make_feature_encode<FeatureLayoutT>(tfl_node);
+
+ loco::replace(origin).with(enc);
+ input_handler.nullify(origin);
+
+ return tfl_node;
+ }
+ else
+ INTERNAL_EXN_V("Unsupported loco::Domain", oops::to_uint32(loco::shape_get(origin).domain()));
+}
+
+} // namespace exo
+
+#endif //__GRAPH_BLOCK_H__
diff --git a/compiler/exo/src/Knob.cpp b/compiler/exo/src/Knob.cpp
new file mode 100644
index 000000000..50d78f4b7
--- /dev/null
+++ b/compiler/exo/src/Knob.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Knob.h"
+
+#include <pepper/strcast.h>
+
+#include <iostream>
+#include <string>
+#include <map>
+
+// Basic Infrastructure to declare and access Knob values
+namespace
+{
+
+using KnobName = std::string;
+
+/**
+ * @brief Load configuration (from somewhere)
+ */
+struct KnobLoader
+{
+ virtual ~KnobLoader() = default;
+
+ virtual bool load(const KnobName &name, bool default_value) const = 0;
+};
+
+/**
+ * @brief Load configuration from environment variables
+ *
+ * Given a prefix P, EnvKnobLoader reads a configuration K from concat(P, K).
+ *
+ * For example, let us assume that P is "MY_" and K is "CONFIG".
+ *
+ * Then, EnvKnobLoader reads configuration CONFIG from environment variable MY_CONFIG.
+ */
+class EnvKnobLoader final : public KnobLoader
+{
+public:
+ EnvKnobLoader() = default;
+
+public:
+ bool load(const KnobName &knob_name, bool default_value) const override
+ {
+ auto envvar = _prefix + knob_name;
+ auto s = std::getenv(envvar.c_str());
+
+ return pepper::safe_strcast<int>(s, default_value ? 1 : 0) != 0;
+ }
+ void knob_set(const KnobName &knob_name, bool value) { _knob[knob_name] = value; }
+ void dialect_set(const exo::Dialect &dialect_name) { _prefix = _label[dialect_name]; }
+ bool knob_get(const KnobName &knob_name) { return load(knob_name, _knob[knob_name]); }
+
+private:
+ /// @brief Environment variable prefix
+ std::string _prefix;
+ std::map<KnobName, bool> _knob;
+ std::map<exo::Dialect, KnobName> _label = {{exo::Dialect::TFLITE, "TFL_"},
+ {exo::Dialect::CIRCLE, "CIRCLE_"}};
+};
+
+} // namespace
+
+namespace
+{
+
+EnvKnobLoader &knob_loader(void)
+{
+ // TODO separate "EXOTFLITE_" and "EXOCIRCLE_" when necessary
+ static EnvKnobLoader loader;
+ return loader;
+}
+
+} // namespace
+
+namespace exo
+{
+
+#define KNOB_BOOL(NAME, TFL_DEFAULT, CIRCLE_DEFAULT, DESC) \
+ template <> typename KnobTrait<Knob::NAME>::ValueType get<Knob::NAME>(void) \
+ { \
+ return ::knob_loader().knob_get(#NAME); \
+ }
+#include "Knob.lst"
+#undef KNOB_BOOL
+
+void set(Dialect d)
+{
+ ::knob_loader().dialect_set(d);
+ switch (d)
+ {
+ case Dialect::TFLITE:
+#define KNOB_BOOL(NAME, TFL_DEFAULT, CIRCLE_DEFAULT, DESC) \
+ ::knob_loader().knob_set(#NAME, TFL_DEFAULT);
+#include "Knob.lst"
+#undef KNOB_BOOL
+ break;
+ case Dialect::CIRCLE:
+#define KNOB_BOOL(NAME, TFL_DEFAULT, CIRCLE_DEFAULT, DESC) \
+ ::knob_loader().knob_set(#NAME, CIRCLE_DEFAULT);
+#include "Knob.lst"
+#undef KNOB_BOOL
+ break;
+ default:
+ std::runtime_error("UnKnown dialect");
+ }
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Knob.h b/compiler/exo/src/Knob.h
new file mode 100644
index 000000000..98613120c
--- /dev/null
+++ b/compiler/exo/src/Knob.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KNOB_H__
+#define __KNOB_H__
+
+namespace exo
+{
+
+enum class Dialect
+{
+ TFLITE,
+ CIRCLE
+};
+
+enum class Knob
+{
+#define KNOB_BOOL(NAME, TFL_DEFAULT, CIRCLE_DEFAULT, DESC) NAME,
+#include "Knob.lst"
+#undef KNOB_BOOL
+};
+
+template <Knob K> struct KnobTrait;
+
+#define KNOB_BOOL(NAME, TFL_DEFAULT, CIRCLE_DEFAULT, DESC) \
+ template <> struct KnobTrait<Knob::NAME> \
+ { \
+ using ValueType = bool; \
+ };
+#include "Knob.lst"
+#undef KNOB_BOOL
+
+template <Knob K> typename KnobTrait<K>::ValueType get(void);
+void set(Dialect);
+
+} // namespace exo
+
+#endif // __KNOB_H__
diff --git a/compiler/exo/src/Knob.lst b/compiler/exo/src/Knob.lst
new file mode 100644
index 000000000..7f59c93f3
--- /dev/null
+++ b/compiler/exo/src/Knob.lst
@@ -0,0 +1,11 @@
+#ifndef KNOB_BOOL
+#error "KNOB_BOOL is not defined"
+#endif // KNOB_BOOL
+
+// KNOB_BOOL(KNOB_NAME, TFL_DEFAULT, CIRCLE_DEFAULT, DESCRIPTION)
+
+// Optimization pass
+KNOB_BOOL(UseFuseBiasAddPass, true, true, Fuse TFLAdd or TFLSub into TFLConv2D)
+KNOB_BOOL(UseFuseInstanceNormPass, false, true, Fuse InstanceNorm pattern)
+KNOB_BOOL(UseFuseReluPass, true, true, Fuse TFLAdd or TFLSub into TFLConv2D or so)
+KNOB_BOOL(UseFuseSquaredDifferencePass, false, true, Fuse SquaredDifference pattern)
diff --git a/compiler/exo/src/Log.cpp b/compiler/exo/src/Log.cpp
new file mode 100644
index 000000000..aa762968b
--- /dev/null
+++ b/compiler/exo/src/Log.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Log.h"
+
+#include <hermes/ConsoleReporter.h>
+#include <stdex/Memory.h>
+
+#include <cstdlib>
+#include <iostream>
+
+// TODO Extract these lexical conversion routines as a library
+namespace
+{
+
+/**
+ * @brief Convert C-string as a value of type T
+ *
+ * safecast(s, v) returns v if s is nullptr.
+ */
+template <typename T> T safecast(const char *, const T &);
+
+template <> bool safecast<bool>(const char *s, const bool &value)
+{
+ return (s == nullptr) ? value : (std::stoi(s) != 0);
+}
+
+} // namespace
+
+namespace exo
+{
+
+//
+// Logger
+//
+Logger::Logger(hermes::Context *ctx) { activate(ctx->sources(), ctx->bus()); }
+Logger::~Logger() { deactivate(); }
+
+//
+// LoggerConfig
+//
+LoggerConfig::LoggerConfig()
+{
+ // Turn on logging if EXO_LOG is set as non-zero value
+ _enabled = safecast<bool>(std::getenv("EXO_LOG"), false);
+}
+
+void LoggerConfig::configure(const hermes::Source *source, hermes::Source::Setting &setting) const
+{
+ // Let's ignore hermes::Sources if that is not a exo logger
+ if (auto logger = dynamic_cast<const Logger *>(source))
+ {
+ configure(logger, setting);
+ }
+}
+
+void LoggerConfig::configure(const Logger *, hermes::Source::Setting &setting) const
+{
+ if (_enabled)
+ {
+ // Enable all catagories
+ setting.accept_all();
+ }
+ else
+ {
+ // Disable all catagories
+ setting.reject_all();
+ }
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Log.h b/compiler/exo/src/Log.h
new file mode 100644
index 000000000..8ca38c3ec
--- /dev/null
+++ b/compiler/exo/src/Log.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOG_H__
+#define __LOG_H__
+
+#include "exo/LoggingContext.h"
+
+#include <hermes.h>
+
+namespace exo
+{
+
+/**
+ * @brief Logger Implementation
+ */
+class Logger final : public hermes::Source
+{
+public:
+ Logger(hermes::Context *ctx);
+ ~Logger();
+};
+
+/**
+ * @brief Logger Configuration
+ *
+ * Users are able to turn logging on/off via EXO_LOG environment variable.
+ */
+class LoggerConfig final : public hermes::Config
+{
+public:
+ LoggerConfig();
+
+public:
+ void configure(const hermes::Source *, hermes::Source::Setting &) const final;
+ void configure(const Logger *, hermes::Source::Setting &) const;
+
+private:
+ bool _enabled;
+};
+
+} // namespace exo
+
+/**
+ * HOW TO USE:
+ *
+ * LOGGER(l);
+ *
+ * INFO(l) << "Hello, World" << std::endl;
+ *
+ */
+#define LOGGER(name) ::exo::Logger name{::exo::LoggingContext::get()};
+
+// TODO Support FATAL, ERROR, WARN, and VERBOSE
+#define INFO(name) HERMES_INFO(name)
+
+// WARNING!
+//
+// THE CURRENT IMPLEMENTATION IS NOT THREAD SAFE.
+//
+
+#endif // __LOG_H__
diff --git a/compiler/exo/src/LogHelper.cpp b/compiler/exo/src/LogHelper.cpp
new file mode 100644
index 000000000..7520b7ec8
--- /dev/null
+++ b/compiler/exo/src/LogHelper.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LogHelper.h"
+
+namespace loco
+{
+
+std::ostream &operator<<(std::ostream &os, const loco::FeatureShape &feature_shape)
+{
+ os << "[" << feature_shape.count().value() << "," << feature_shape.height().value() << ","
+ << feature_shape.width().value() << "," << feature_shape.depth().value() << "]";
+ return os;
+}
+
+std::ostream &operator<<(std::ostream &os, const loco::FilterShape &filter_shape)
+{
+ os << "[" << filter_shape.height().value() << "," << filter_shape.width().value() << ","
+ << filter_shape.depth().value() << "," << filter_shape.count().value() << "]";
+ return os;
+}
+
+std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape)
+{
+ os << "[";
+ for (uint32_t r = 0; r < tensor_shape.rank(); ++r)
+ {
+ if (r)
+ os << ",";
+ os << tensor_shape.dim(r).value();
+ }
+ os << "]";
+ return os;
+}
+
+std::ostream &operator<<(std::ostream &os, const loco::Padding2D &pad)
+{
+ os << "[TLBR " << pad.top() << "," << pad.left() << "," << pad.bottom() << "," << pad.right()
+ << "]";
+
+ return os;
+}
+
+} // namespace loco
+
+std::ostream &operator<<(std::ostream &os, const std::vector<int64_t> &vi64)
+{
+ for (auto vi : vi64)
+ {
+ os << vi << " ";
+ }
+ return os;
+}
+
+#include "ExoFormattedGraph.h"
+
+namespace exo
+{
+
+FormattedGraph fmt(loco::Graph *g)
+{
+ auto node_summary_builder = stdex::make_unique<NodeSummaryBuilderFactory>();
+ return std::move(locop::fmt<locop::LinearV1>(g).with(std::move(node_summary_builder)));
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/LogHelper.h b/compiler/exo/src/LogHelper.h
new file mode 100644
index 000000000..69d81af9e
--- /dev/null
+++ b/compiler/exo/src/LogHelper.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOG_HELPER_H__
+#define __LOG_HELPER_H__
+
+#include <locop/FormattedGraph.h>
+
+#include <loco/IR/FeatureShape.h>
+#include <loco/IR/FilterShape.h>
+#include <loco/IR/TensorShape.h>
+
+#include <sstream>
+#include <vector>
+
+namespace loco
+{
+
+/**
+ * @brief dump FeatureShape values to stream
+ */
+std::ostream &operator<<(std::ostream &os, const loco::FeatureShape &feature_shape);
+
+/**
+ * @brief dump FilterShape values to stream
+ */
+std::ostream &operator<<(std::ostream &os, const loco::FilterShape &filter_shape);
+
+/**
+ * @brief dump TensorShape values to stream
+ */
+std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape);
+
+/**
+ * @brief dump Padding2D values to stream
+ */
+std::ostream &operator<<(std::ostream &os, const loco::Padding2D &pad);
+
+} // namespace loco
+
+/**
+ * @brief dump std::vector<int64_t> values to stream
+ */
+std::ostream &operator<<(std::ostream &os, const std::vector<int64_t> &vi64);
+
+namespace exo
+{
+
+using FormattedGraph = locop::FormattedGraphImpl<locop::Formatter::LinearV1>;
+
+FormattedGraph fmt(loco::Graph *g);
+
+static inline FormattedGraph fmt(const std::unique_ptr<loco::Graph> &g) { return fmt(g.get()); }
+
+} // namespace exo
+
+#endif // __LOG_HELPER_H__
diff --git a/compiler/exo/src/LoggingContext.cpp b/compiler/exo/src/LoggingContext.cpp
new file mode 100644
index 000000000..1c14d97b9
--- /dev/null
+++ b/compiler/exo/src/LoggingContext.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exo/LoggingContext.h"
+#include "Log.h" // To use LoggerConfig
+
+#include <hermes/ConsoleReporter.h>
+#include <stdex/Memory.h>
+
+namespace exo
+{
+
+hermes::Context *LoggingContext::get(void)
+{
+ static hermes::Context *ctx = nullptr;
+
+ if (ctx == nullptr)
+ {
+ ctx = new hermes::Context;
+ ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
+ ctx->config(stdex::make_unique<LoggerConfig>());
+ }
+
+ return ctx;
+}
+
+} // namespac exo
diff --git a/compiler/exo/src/Pass/FoldReshapeOfConstPass.cpp b/compiler/exo/src/Pass/FoldReshapeOfConstPass.cpp
new file mode 100644
index 000000000..0fdcea939
--- /dev/null
+++ b/compiler/exo/src/Pass/FoldReshapeOfConstPass.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FoldReshapeOfConstPass.h"
+
+#include "Check.h"
+
+#include "Dialect/IR/TFLNodes.h"
+#include "Dialect/IR/TFLNodeVisitor.h"
+
+#include <loco/Service/ShapeInference.h>
+
+#include <oops/InternalExn.h>
+
+namespace
+{
+
+/**
+ * @brief Check if node is TFLReshape and its input is TFLConst
+ * @return Casted TFLReshape for foldable candidate, nullptr otherwise
+ */
+locoex::TFLReshape *as_candidate(loco::Node *node)
+{
+ auto reshape = dynamic_cast<locoex::TFLReshape *>(node);
+ if (not reshape)
+ return nullptr;
+
+ // Only accept Constant input of Reshape
+ if (not dynamic_cast<locoex::TFLConst *>(reshape->tensor()))
+ return nullptr;
+
+ return reshape;
+}
+
+uint32_t volume(loco::Node *tensor_node)
+{
+ auto shape = loco::shape_get(tensor_node).as<loco::TensorShape>();
+
+ uint32_t vol = 1;
+ for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+ vol *= shape.dim(axis).value();
+
+ return vol;
+}
+
+void fold_reshape_of_const(locoex::TFLReshape *reshape)
+{
+ const loco::DataType FLOAT32 = loco::DataType::FLOAT32;
+
+ auto const_orig = dynamic_cast<locoex::TFLConst *>(reshape->tensor());
+
+ // Exceptions
+ {
+ EXO_ASSERT(const_orig, "Only support for Reshape-Const pair");
+ // TODO support other data types
+ if (const_orig->dtype() != FLOAT32)
+ INTERNAL_EXN_V("NYI for this data type", oops::to_uint32(const_orig->dtype()));
+
+ if (volume(const_orig) != volume(reshape))
+ INTERNAL_EXN("New shape of Reshape is not matched");
+ }
+
+ auto new_shape = loco::shape_get(reshape).as<loco::TensorShape>();
+
+ // TFLConst to replace
+ auto const_new = reshape->graph()->nodes()->create<locoex::TFLConst>();
+
+ const_new->dtype(FLOAT32);
+ const_new->rank(new_shape.rank());
+ const_new->size<FLOAT32>(const_orig->size<FLOAT32>());
+ for (uint32_t axis = 0; axis < new_shape.rank(); ++axis)
+ const_new->dim(axis) = new_shape.dim(axis);
+
+ for (uint32_t i = 0; i < const_new->size<FLOAT32>(); ++i)
+ {
+ const_new->at<FLOAT32>(i) = const_orig->at<FLOAT32>(i);
+ }
+
+ // replace
+ loco::replace(reshape).with(const_new);
+}
+
+} // namespace
+
+namespace exo
+{
+
+bool FoldReshapeOfConstPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto reshape = as_candidate(node))
+ {
+ fold_reshape_of_const(reshape);
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Pass/FoldReshapeOfConstPass.h b/compiler/exo/src/Pass/FoldReshapeOfConstPass.h
new file mode 100644
index 000000000..10f8004bf
--- /dev/null
+++ b/compiler/exo/src/Pass/FoldReshapeOfConstPass.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PASS_FOLD_RESHAPE_OF_CONST_PASS_H__
+#define __PASS_FOLD_RESHAPE_OF_CONST_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace exo
+{
+
+/**
+ * @brief Class to fuse TFLReshape + TFLConst into one equivalent TFLConst
+ *
+ * <before>
+ * TFLConst --- TFLReshape --- Out
+ *
+ * <after>
+ * TFLConst --- TFLReshape ---
+ * TFLConst (new) ------------ Out
+ *
+ * TODO This pass is for temporary. Deprecate this pass.
+ */
+struct FoldReshapeOfConstPass final : public logo::Pass
+{
+ const char *name(void) const final { return "exo::FoldReshapeOfConstPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace exo
+
+#endif // __PASS_FOLD_RESHAPE_OF_CONST_PASS_H__
diff --git a/compiler/exo/src/Pass/FoldTransposeOfConstPass.cpp b/compiler/exo/src/Pass/FoldTransposeOfConstPass.cpp
new file mode 100644
index 000000000..005c42944
--- /dev/null
+++ b/compiler/exo/src/Pass/FoldTransposeOfConstPass.cpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FoldTransposeOfConstPass.h"
+
+#include "Check.h"
+
+#include "Dialect/IR/TFLNodes.h"
+#include "Dialect/IR/TFLNodeVisitor.h"
+
+// TODO remove dependency to angkor
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <oops/InternalExn.h>
+
+namespace
+{
+
+/**
+ * @brief Check if node is TFLTranspose and its input is TFLConst
+ * @return Casted TFLTranspose for foldable candidate, nullptr otherwise
+ */
+locoex::TFLTranspose *as_candidate(loco::Node *node)
+{
+ auto transpose = dynamic_cast<locoex::TFLTranspose *>(node);
+ if (not transpose)
+ return nullptr;
+
+ // Only accept Constant input of Transpose
+ if (not dynamic_cast<locoex::TFLConst *>(transpose->a()))
+ return nullptr;
+
+ // Only accept Constant permutation of Transpose
+ if (not dynamic_cast<locoex::TFLConst *>(transpose->perm()))
+ return nullptr;
+
+ return transpose;
+}
+
+nncc::core::ADT::tensor::Shape angkor_shape(locoex::TFLConst *node)
+{
+ nncc::core::ADT::tensor::Shape ret;
+
+ ret.resize(node->rank());
+ for (uint32_t axis = 0; axis < node->rank(); ++axis)
+ {
+ ret.dim(axis) = node->dim(axis).value();
+ }
+
+ return ret;
+}
+
+void fold_transpose_of_const(locoex::TFLTranspose *transpose)
+{
+ const loco::DataType FLOAT32 = loco::DataType::FLOAT32;
+ const loco::DataType S32 = loco::DataType::S32;
+
+ auto const_orig = dynamic_cast<locoex::TFLConst *>(transpose->a());
+ auto perm = dynamic_cast<locoex::TFLConst *>(transpose->perm());
+
+ // Exceptions
+ {
+ EXO_ASSERT(const_orig, "Only support for Transpose-Const pair");
+ // TODO support other data types
+ if (const_orig->dtype() != FLOAT32)
+ INTERNAL_EXN_V("NYI for this data type", oops::to_uint32(const_orig->dtype()));
+
+ EXO_ASSERT(perm, "Only support for constant permutation for Transpose");
+ // TODO support other data types
+ if (perm->dtype() != S32)
+ INTERNAL_EXN_V("NYI for this data type", oops::to_uint32(perm->dtype()));
+
+ auto okay = [&]() {
+ if (perm->rank() != 1)
+ return false;
+ if (perm->dim(0).value() != const_orig->rank())
+ return false;
+ return true;
+ };
+ if (not okay())
+ INTERNAL_EXN("Input and permutation for Transpose is not congruent");
+ }
+
+ uint32_t rank = const_orig->rank();
+
+ // TFLConst to replace
+ auto const_new = transpose->graph()->nodes()->create<locoex::TFLConst>();
+
+ const_new->dtype(FLOAT32);
+ const_new->rank(rank);
+ const_new->size<FLOAT32>(const_orig->size<FLOAT32>());
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ const_new->dim(axis) = const_orig->dim(perm->at<S32>(axis)).value();
+
+ // TODO remove dependency to angkor
+ auto shape_orig = angkor_shape(const_orig);
+ auto shape_new = angkor_shape(const_new);
+
+ nncc::core::ADT::tensor::LexicalLayout l;
+ nncc::core::ADT::tensor::IndexEnumerator e{shape_new};
+
+ for (; e.valid(); e.advance())
+ {
+ loco::TensorIndex index_new = e.current();
+ loco::TensorIndex index_orig;
+
+ // Set original index from matching new index
+ index_orig.resize(rank);
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ index_orig.at(perm->at<S32>(axis)) = index_new.at(axis);
+
+ const_new->at<FLOAT32>(l.offset(shape_new, index_new)) =
+ const_orig->at<FLOAT32>(l.offset(shape_orig, index_orig));
+ }
+
+ // replace
+ loco::replace(transpose).with(const_new);
+}
+
+} // namespace
+
+namespace exo
+{
+
+bool FoldTransposeOfConstPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto transpose = as_candidate(node))
+ {
+ fold_transpose_of_const(transpose);
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Pass/FoldTransposeOfConstPass.h b/compiler/exo/src/Pass/FoldTransposeOfConstPass.h
new file mode 100644
index 000000000..26656a118
--- /dev/null
+++ b/compiler/exo/src/Pass/FoldTransposeOfConstPass.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PASS_FOLD_TRANSPOSE_OF_CONST_PASS_H__
+#define __PASS_FOLD_TRANSPOSE_OF_CONST_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace exo
+{
+
+/**
+ * @brief Class to fuse TFLTranspose + TFLConst into one equivalent TFLConst
+ *
+ * <before>
+ * TFLConst --- TFLTranspose --- Out
+ *
+ * <after>
+ * TFLConst --- TFLTranspose ---
+ * TFLConst (new) -------------- Out
+ *
+ * TODO This pass is for temporary. Deprecate this pass.
+ */
+struct FoldTransposeOfConstPass final : public logo::Pass
+{
+ const char *name(void) const final { return "exo::FoldTransposeOfConstPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace exo
+
+#endif // __PASS_FOLD_TRANSPOSE_OF_CONST_PASS_H__
diff --git a/compiler/exo/src/Pass/FuseBiasAddPass.cpp b/compiler/exo/src/Pass/FuseBiasAddPass.cpp
new file mode 100644
index 000000000..aab820995
--- /dev/null
+++ b/compiler/exo/src/Pass/FuseBiasAddPass.cpp
@@ -0,0 +1,362 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FuseBiasAddPass.h"
+
+#include "Dialect/IR/TFLNodes.h"
+#include "Dialect/IR/TFLDialect.h"
+#include "Dialect/IR/TFLNodeVisitor.h"
+
+#include <loco/Service/TypeInference.h>
+#include <loco/Service/ShapeInference.h>
+
+#include <oops/InternalExn.h>
+
+#include <set>
+
+/*
+ Note: Terms for variables in this implementation is as follows:
+
+ ex) subgraph handled: TFLConv2D -------- TFLAdd
+ (or TFLDepthwiseConv2D) (or TFLSub)
+ | |
+ \|/ \|/
+ variable name : former latter
+ Type : FormerT LatterT
+ (shortened name from Mixin) (template type)
+*/
+namespace
+{
+
+using FormerT = locoex::TFLNodeMixin<locoex::TFLNodeTrait::Bias>;
+
+loco::Node *as_loco_node(FormerT *former)
+{
+ auto loco_node = dynamic_cast<loco::Node *>(former);
+ assert(loco_node != nullptr);
+
+ return loco_node;
+}
+
+locoex::TFLConst *get_const(loco::Node *x, loco::Node *y)
+{
+ if (auto const_node = dynamic_cast<locoex::TFLConst *>(x))
+ return const_node;
+ else if (auto const_node = dynamic_cast<locoex::TFLConst *>(y))
+ return const_node;
+
+ return nullptr;
+}
+
+FormerT *get_former(loco::Node *x, loco::Node *y)
+{
+ if (auto node = dynamic_cast<FormerT *>(x))
+ return node;
+ else if (auto node = dynamic_cast<FormerT *>(y))
+ return node;
+
+ return nullptr;
+}
+
+/// @brief Finds input that is TFLConst and set it to new_input
+void set_const_input(locoex::TFLNode *node, locoex::TFLConst *new_input)
+{
+ if (auto add = dynamic_cast<locoex::TFLAdd *>(node))
+ {
+ if (dynamic_cast<locoex::TFLConst *>(add->x()))
+ add->x(new_input);
+ else if (dynamic_cast<locoex::TFLConst *>(add->y()))
+ add->y(new_input);
+ else
+ assert(false and "One node should be TFLConst");
+
+ return;
+ }
+
+ if (auto sub = dynamic_cast<locoex::TFLSub *>(node))
+ {
+ if (dynamic_cast<locoex::TFLConst *>(sub->x()))
+ sub->x(new_input);
+ else if (dynamic_cast<locoex::TFLConst *>(sub->y()))
+ sub->y(new_input);
+ else
+ assert(false and "One node should be TFLConst");
+
+ return;
+ }
+
+ assert(false and "Param should be TFLAdd or TFLSub");
+}
+
+/**
+ * @brief Creates a TFLConst whose shape is [to] and values are all const_node->at(0),
+ * where const_node has only one element(a scalar or a tensor of shape [1])
+ */
+locoex::TFLConst *create_widened(locoex::TFLConst *const_node, uint32_t to)
+{
+ auto const_shape = loco::shape_get(const_node).as<loco::TensorShape>();
+
+ assert(const_shape.rank() == 0 or (const_shape.rank() == 1 and const_shape.dim(0) == 1));
+
+ auto g = const_node->graph();
+
+ auto widened_const = g->nodes()->create<locoex::TFLConst>();
+ {
+ widened_const->dtype(loco::DataType::FLOAT32);
+ widened_const->rank(1);
+ widened_const->dim(0) = to;
+ widened_const->size<loco::DataType::FLOAT32>(to);
+ for (uint32_t x = 0; x < to; x++)
+ widened_const->at<loco::DataType::FLOAT32>(x) = const_node->at<loco::DataType::FLOAT32>(0);
+ }
+ return widened_const;
+}
+
+template <typename TFLType> float calc(float, float);
+
+template <> float calc<locoex::TFLAdd>(float x, float y) { return x + y; }
+template <> float calc<locoex::TFLSub>(float x, float y) { return x - y; }
+
+template <class LatterT> class Fuser
+{
+public:
+ Fuser(LatterT *latter)
+ {
+ static_assert(std::is_same<LatterT, locoex::TFLAdd>::value ||
+ std::is_same<LatterT, locoex::TFLSub>::value,
+ "wrong template type");
+
+ _latter = latter;
+ _graph = _latter->graph();
+ _const_node = get_const(_latter->x(), _latter->y());
+ _former = get_former(_latter->x(), _latter->y());
+
+ assert(_const_node && _former);
+ }
+
+ void fuse(void);
+
+private:
+ loco::Graph *_graph;
+ LatterT *_latter;
+ locoex::TFLConst *_const_node;
+ FormerT *_former;
+
+ locoex::TFLConst *create_fused_bias_const();
+};
+
+// instantiation
+template class Fuser<locoex::TFLAdd>;
+template class Fuser<locoex::TFLSub>;
+
+template <class LatterT> locoex::TFLConst *Fuser<LatterT>::create_fused_bias_const()
+{
+ // we have to create a new bias const by adding/substracting bias and const node (of TFLAdd or
+ // TFLSub)
+ auto bias = dynamic_cast<locoex::TFLConst *>(_former->bias());
+ assert(bias->dtype() == loco::DataType::FLOAT32 &&
+ _const_node->dtype() == loco::DataType::FLOAT32);
+
+ assert(bias->rank() == 1 && _const_node->rank() == 1);
+ assert(bias->dim(0) == _const_node->dim(0));
+
+ // build a new bias const
+ auto new_bias = _graph->nodes()->create<locoex::TFLConst>();
+ {
+ new_bias->dtype(loco::DataType::FLOAT32);
+
+ new_bias->rank(1);
+ new_bias->dim(0) = bias->dim(0);
+
+ new_bias->size<loco::DataType::FLOAT32>(bias->dim(0).value());
+
+ for (uint32_t x = 0; x < bias->dim(0).value(); x++)
+ new_bias->at<loco::DataType::FLOAT32>(x) = calc<LatterT>(
+ bias->at<loco::DataType::FLOAT32>(x), _const_node->at<loco::DataType::FLOAT32>(x));
+ }
+
+ return new_bias;
+}
+
+// FuseBiasAddPass works when former->fusedActivationFunction() == NONE
+bool check_act_func(FormerT *former)
+{
+ using FusedActFuncMixin = locoex::TFLNodeMixin<locoex::TFLNodeTrait::FusedActFunc>;
+
+ if (auto node = dynamic_cast<FusedActFuncMixin *>(former))
+ return node->fusedActivationFunction() == locoex::FusedActFunc::NONE;
+ else
+ return true;
+}
+
+template <class LatterT> void set_act_func(FormerT *former, LatterT *latter)
+{
+ using FusedActFuncMixin = locoex::TFLNodeMixin<locoex::TFLNodeTrait::FusedActFunc>;
+
+ if (auto node = dynamic_cast<FusedActFuncMixin *>(former))
+ node->fusedActivationFunction(latter->fusedActivationFunction());
+}
+
+// instantiation
+template void set_act_func(FormerT *, locoex::TFLAdd *);
+template void set_act_func(FormerT *, locoex::TFLSub *);
+
+/**
+ * @brief Fuse TFLAdd or TFLSub (latter) into TFLConv2d or TFLDepthwiseConv2D (former).
+ * All conditions should be checked before calling this.
+ *
+ * @note TFLAdd can have fused activation function (let's call this FAF for simplicity).
+ *
+ * Conv2D's FAF | TFLAdd's FAF => FAF after fusing TFLAdd into TFLConv2D
+ * ----------------|--------------- --------------------------------------
+ * NONE | NONE, RELU or RELU6 => TFLAdd's FAF
+ * other than NONE | anything => cannot be fused
+ */
+template <class LatterT> void Fuser<LatterT>::fuse(void)
+{
+ // check fused activation function
+ {
+ assert(check_act_func(_former));
+
+ set_act_func<LatterT>(_former, _latter);
+ }
+
+ auto new_bias = create_fused_bias_const();
+
+ // replace node with new_bias
+ // note that loco::replace() is not used because bias could be input of other op just in case
+ _former->bias(new_bias);
+
+ // remove TFLAdd or TFLSub node
+ loco::replace(_latter).with(as_loco_node(_former));
+ _latter->x(nullptr);
+ _latter->y(nullptr);
+}
+
+struct Collector final : public locoex::TFLNodeMutableVisitor<void>
+{
+ template <class LatterT>
+ void setCandidate(FormerT *former, LatterT *latter, locoex::TFLConst *const_node)
+ {
+ static_assert(std::is_same<LatterT, locoex::TFLAdd>::value ||
+ std::is_same<LatterT, locoex::TFLSub>::value,
+ "wrong template type");
+
+ if (!check_act_func(former))
+ return;
+
+ auto depth =
+ loco::shape_get(as_loco_node(former)).template as<loco::TensorShape>().dim(3).value();
+ auto const_shape = loco::shape_get(const_node).template as<loco::TensorShape>();
+
+ if (const_shape.rank() == 1 and const_shape.dim(0) == depth)
+ {
+ candidates.insert(latter);
+ }
+ // when Const has only one value, create a new const with shape [depth]
+ else if (const_shape.rank() == 0 or (const_shape.rank() == 1 and const_shape.dim(0) == 1))
+ {
+ if (!(loco::dtype_get(as_loco_node(former)) == loco::DataType::FLOAT32))
+ INTERNAL_EXN_V("Unsupported data type",
+ oops::to_uint32(loco::dtype_get(as_loco_node(former))));
+ if (!(const_node->dtype() == loco::DataType::FLOAT32))
+ INTERNAL_EXN_V("Unsupported data type", oops::to_uint32(const_node->dtype()));
+
+ auto new_bias_node = create_widened(const_node, depth);
+
+ // Replacing TFLConst input of TFLAdd or TFLSub.
+ // Note that calling loco::replace(const_node).with(new_bias_node) could be dangerous
+ // because const_node could be the input of many nodes
+ set_const_input(latter, new_bias_node);
+
+ candidates.insert(latter);
+ }
+ }
+
+ void visit(locoex::TFLAdd *latter) final
+ {
+ auto former = get_former(latter->x(), latter->y());
+ auto const_node = get_const(latter->x(), latter->y());
+
+ if (former && const_node)
+ setCandidate<locoex::TFLAdd>(former, latter, const_node);
+ }
+
+ void visit(locoex::TFLSub *latter) final
+ {
+ // TFLSub, of which x() = TFLConv2D or TFLDepthwiseConv2D, y() = TFLConst, is fusing target
+ auto former = dynamic_cast<FormerT *>(latter->x());
+ auto const_node = dynamic_cast<locoex::TFLConst *>(latter->y());
+
+ if (former && const_node)
+ setCandidate<locoex::TFLSub>(former, latter, const_node);
+ }
+
+ void visit(locoex::TFLNode *) final { return; }
+
+ std::set<locoex::TFLNode *> candidates;
+};
+
+struct Performer final : public locoex::TFLNodeMutableVisitor<void>
+{
+ void visit(locoex::TFLAdd *latter) final
+ {
+ assert(get_former(latter->x(), latter->y()));
+
+ Fuser<locoex::TFLAdd> fuser(latter);
+ fuser.fuse();
+ }
+
+ void visit(locoex::TFLSub *latter) final
+ {
+ assert(get_former(latter->x(), latter->y()));
+
+ Fuser<locoex::TFLSub> fuser(latter);
+ fuser.fuse();
+ }
+
+ void visit(locoex::TFLNode *) final { assert(false && "should not be called"); }
+};
+
+} // namespace
+
+namespace exo
+{
+
+bool FuseBiasAddPass::run(loco::Graph *g)
+{
+ Collector collector;
+
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (node->dialect() == locoex::TFLDialect::get())
+ {
+ auto tfl_node = dynamic_cast<locoex::TFLNode *>(node);
+ tfl_node->accept(&collector);
+ }
+ }
+
+ Performer performer;
+
+ for (auto node : collector.candidates)
+ {
+ node->accept(&performer);
+ }
+
+ return collector.candidates.size() > 0;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Pass/FuseBiasAddPass.h b/compiler/exo/src/Pass/FuseBiasAddPass.h
new file mode 100644
index 000000000..68e624c6b
--- /dev/null
+++ b/compiler/exo/src/Pass/FuseBiasAddPass.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PASS_FUSE_BIASADD_PASS_H__
+#define __PASS_FUSE_BIASADD_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace exo
+{
+
+/**
+ * @brief Class to fuse TFLAdd or TFLSub into Bias input of the following ops:
+ * - TFLConv2D, TFLDepthwiseConv2D
+ * - TODO Consider to add FullyConnected, etc.
+ *
+ * Case 1. Conv2D and TFLAdd
+ *
+ * BEFORE:
+ *
+ * TFLConst A (a scalar or a tensor of shape [1] or [depth of TFLConv2D])
+ * |
+ * Foo -- TFLConv2D -- TFLAdd (or TFLSub) -- Bar
+ * |
+ * TFLConst B --+ (bias)
+ *
+ * AFTER:
+ * Foo ----- TFLConv2D ----- Bar
+ * |
+ * TFLConst A' --+ (bias)
+ *
+ * TFLConst B (dead node)
+ *
+ * TFLAdd (or TFLSub) (dead node)
+ *
+ * @note TFLSub, of which x() == TFLConv2D and y() == TFLConst, will be fused.
+ * If x() == TFLConst and y() == TFLConv2D, it won't be fused.
+ */
+struct FuseBiasAddPass final : public logo::Pass
+{
+ const char *name(void) const final { return "exo::FuseBiasAddPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace exo
+
+#endif // __PASS_FUSE_BIASADD_PASS_H__
diff --git a/compiler/exo/src/Pass/FuseBiasAddPass.test.cpp b/compiler/exo/src/Pass/FuseBiasAddPass.test.cpp
new file mode 100644
index 000000000..6ba728de0
--- /dev/null
+++ b/compiler/exo/src/Pass/FuseBiasAddPass.test.cpp
@@ -0,0 +1,361 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FuseBiasAddPass.h"
+
+#include "Dialect/IR/TFLNodes.h"
+#include "TestGraph.h"
+#include "TestHelper.h"
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+void init(loco::Pull *pull)
+{
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({2, 3, 3, 2});
+}
+
+/// @brief Initializes TFLConv2D and related filter and bias
+void init(locoex::TFLConv2D *conv2d, locoex::TFLConst *filter, locoex::TFLConst *bias)
+{
+ // set conv2d
+ {
+ conv2d->fusedActivationFunction(locoex::FusedActFunc::NONE);
+ conv2d->padding(locoex::Padding::VALID);
+ }
+
+ // set filter
+ {
+ filter->dtype(loco::DataType::FLOAT32);
+ filter->shape({2, 3, 3, 2});
+ filter->size<loco::DataType::FLOAT32>(2 * 3 * 3 * 2);
+
+ for (uint32_t x = 0; x < 2 * 3 * 3 * 2; x++)
+ filter->at<loco::DataType::FLOAT32>(x) = 0.0;
+ }
+
+ // set bias
+ {
+ bias->dtype(loco::DataType::FLOAT32);
+ bias->shape({2});
+ bias->size<loco::DataType::FLOAT32>(2);
+
+ for (uint32_t x = 0; x < 2; x++)
+ bias->at<loco::DataType::FLOAT32>(x) = 0.0;
+ }
+}
+
+template <class T> void init(T *node, locoex::FusedActFunc f)
+{
+ static_assert(std::is_same<T, locoex::TFLAdd>::value || std::is_same<T, locoex::TFLSub>::value,
+ "wrong template type");
+
+ node->fusedActivationFunction(f);
+}
+
+/// @brief Initializes one param of TFLAdd or TFLSub
+void init(locoex::TFLConst *addsub_param)
+{
+ // set addsub_param : y() value of TFLAdd or TFLSub
+ addsub_param->dtype(loco::DataType::FLOAT32);
+ addsub_param->shape({2});
+ addsub_param->size<loco::DataType::FLOAT32>(2);
+
+ for (uint32_t x = 0; x < 2; x++)
+ addsub_param->at<loco::DataType::FLOAT32>(x) = (x + 1) * 1.5; // 1.5, 3
+}
+
+} // namespace
+
+// A case when
+// - TFLConv2D has bias (0, 0)
+// - TFLAdd, of which x() or y() == TFLConv2D
+// - Another param of TFLAdd is TFLConst, (1.5, 3)
+//
+// After fusion, bias shold be (1.5, 3)
+TEST(FuseBiasAddPassTest, Conv2D_Add_01_basic)
+{
+ exo::test::TestGraph g;
+ auto filter = g.append<locoex::TFLConst>();
+ auto bias = g.append<locoex::TFLConst>();
+ auto conv2d = g.append<locoex::TFLConv2D>(g.pull, filter, bias);
+
+ auto add_y = g.append<locoex::TFLConst>();
+ auto add = g.append<locoex::TFLAdd>(conv2d, add_y);
+
+ g.complete(add);
+
+ init(g.pull);
+ init(conv2d, filter, bias);
+ init(add, locoex::FusedActFunc::NONE);
+ init(add_y);
+
+ // let's run fusion
+ {
+ exo::test::TypeShapeReadyPhase test_phase;
+
+ test_phase.add_pass<exo::FuseBiasAddPass>();
+ test_phase.run(g.graph());
+ }
+
+ auto a_conv2d = exo::test::find_first_node_bytype<locoex::TFLConv2D>(g.graph());
+ ASSERT_TRUE(a_conv2d != nullptr);
+
+ auto a_bias = dynamic_cast<locoex::TFLConst *>(a_conv2d->bias());
+ ASSERT_TRUE(a_bias != nullptr);
+
+ ASSERT_TRUE(a_bias->dim(0) == 2);
+ ASSERT_FLOAT_EQ(a_bias->at<loco::DataType::FLOAT32>(0),
+ bias->at<loco::DataType::FLOAT32>(0) + add_y->at<loco::DataType::FLOAT32>(0));
+ ASSERT_FLOAT_EQ(a_bias->at<loco::DataType::FLOAT32>(1),
+ bias->at<loco::DataType::FLOAT32>(1) + add_y->at<loco::DataType::FLOAT32>(1));
+}
+
+// A case when
+// - TFLConv2D has bias (0, 0)
+// - TFLAdd, of which x() or y() == TFLConv2D
+// - Another param of TFLAdd is TFLConst, (1.5) <-- scalar
+//
+// After fusion, bias shold be (1.5, 1.5)
+TEST(FuseBiasAddPassTest, Conv2D_Add_02_TFLAdd_y_is_scalar)
+{
+ exo::test::TestGraph g;
+ auto filter = g.append<locoex::TFLConst>();
+ auto bias = g.append<locoex::TFLConst>();
+ auto conv2d = g.append<locoex::TFLConv2D>(g.pull, filter, bias);
+
+ auto add_y = g.append<locoex::TFLConst>();
+ auto add = g.append<locoex::TFLAdd>(conv2d, add_y);
+
+ g.complete(add);
+
+ init(g.pull);
+ init(conv2d, filter, bias); // channel of conv2d is 2
+
+ {
+ // Size of this TFLConst is 1.
+ // Note that this should be widened later to the shape of [channel of Conv2D], which is [2]
+ add_y->dtype(loco::DataType::FLOAT32);
+ add_y->shape({1});
+ add_y->size<loco::DataType::FLOAT32>(1);
+ add_y->at<loco::DataType::FLOAT32>(0) = 1.5;
+ }
+ init(add, locoex::FusedActFunc::NONE);
+
+ // let's run fusion
+ {
+ exo::test::TypeShapeReadyPhase test_phase;
+
+ test_phase.add_pass<exo::FuseBiasAddPass>();
+ test_phase.run(g.graph());
+ }
+
+ auto a_conv2d = exo::test::find_first_node_bytype<locoex::TFLConv2D>(g.graph());
+ ASSERT_TRUE(a_conv2d != nullptr);
+
+ auto a_bias = dynamic_cast<locoex::TFLConst *>(a_conv2d->bias());
+ ASSERT_TRUE(a_bias != nullptr);
+
+ ASSERT_TRUE(a_bias->dim(0) == 2);
+ ASSERT_FLOAT_EQ(a_bias->at<loco::DataType::FLOAT32>(0),
+ bias->at<loco::DataType::FLOAT32>(0) + 1.5);
+ ASSERT_FLOAT_EQ(a_bias->at<loco::DataType::FLOAT32>(1),
+ bias->at<loco::DataType::FLOAT32>(1) + 1.5);
+}
+
+// A case when
+// - TFLConv2D has bias (0, 0)
+// - TFLSub.x() == TFLConv2D
+// - TFLSub.y() == TFLConst, (1.5, 3)
+//
+// After fusion, bias shold be (-1.5, -3)
+TEST(FuseBiasAddPassTest, Conv2D_Sub_01_basic)
+{
+ exo::test::TestGraph g;
+ auto filter = g.append<locoex::TFLConst>();
+ auto bias = g.append<locoex::TFLConst>();
+ auto conv2d = g.append<locoex::TFLConv2D>(g.pull, filter, bias);
+
+ auto sub_y = g.append<locoex::TFLConst>();
+ auto sub = g.append<locoex::TFLSub>(conv2d, sub_y);
+
+ g.complete(sub);
+
+ init(g.pull);
+ init(conv2d, filter, bias);
+ init(sub, locoex::FusedActFunc::NONE);
+ init(sub_y);
+
+ // let's run fusion
+ {
+ exo::test::TypeShapeReadyPhase test_phase;
+
+ test_phase.add_pass<exo::FuseBiasAddPass>();
+ test_phase.run(g.graph());
+ }
+
+ auto a_conv2d = exo::test::find_first_node_bytype<locoex::TFLConv2D>(g.graph());
+ ASSERT_TRUE(a_conv2d != nullptr);
+
+ auto a_bias = dynamic_cast<locoex::TFLConst *>(a_conv2d->bias());
+ ASSERT_TRUE(a_bias != nullptr);
+
+ ASSERT_TRUE(a_bias->dim(0) == 2);
+ ASSERT_FLOAT_EQ(a_bias->at<loco::DataType::FLOAT32>(0),
+ bias->at<loco::DataType::FLOAT32>(0) - sub_y->at<loco::DataType::FLOAT32>(0));
+ ASSERT_FLOAT_EQ(a_bias->at<loco::DataType::FLOAT32>(1),
+ bias->at<loco::DataType::FLOAT32>(1) - sub_y->at<loco::DataType::FLOAT32>(1));
+}
+
+// A case when TFLConv2D is input of TFLSub but fusion cannot be performed.
+// - TFLSub.x() == TFLConst
+// - TFLSub.y() == TFLConv2D
+//
+// Here, TFLSub cannot be fused into TFLConst. To be fused, TFLSub.x() should be TFLConv2D and
+// TFLSub.y() should be TFLConst. So fusion will NOT happen.
+TEST(FuseBiasAddPassTest, Conv2D_Sub_02_fusing_will_not_performed)
+{
+ exo::test::TestGraph g;
+ auto filter = g.append<locoex::TFLConst>();
+ auto bias = g.append<locoex::TFLConst>();
+ auto conv2d = g.append<locoex::TFLConv2D>(g.pull, filter, bias);
+
+ auto sub_y = g.append<locoex::TFLConst>();
+ auto sub = g.append<locoex::TFLSub>(sub_y, conv2d); // This WON'T be fused
+
+ g.complete(sub);
+
+ init(g.pull);
+ init(conv2d, filter, bias);
+ init(sub, locoex::FusedActFunc::NONE);
+ init(sub_y);
+
+ // let's run fusion
+ {
+ exo::test::TypeShapeReadyPhase test_phase;
+
+ test_phase.add_pass<exo::FuseBiasAddPass>();
+ test_phase.run(g.graph());
+ }
+
+ auto a_conv2d = exo::test::find_first_node_bytype<locoex::TFLConv2D>(g.graph());
+ ASSERT_TRUE(a_conv2d != nullptr);
+
+ auto a_bias = dynamic_cast<locoex::TFLConst *>(a_conv2d->bias());
+ ASSERT_TRUE(a_bias != nullptr);
+
+ ASSERT_TRUE(a_bias->dim(0) == 2);
+ ASSERT_FLOAT_EQ(a_bias->at<loco::DataType::FLOAT32>(0), 0);
+ ASSERT_FLOAT_EQ(a_bias->at<loco::DataType::FLOAT32>(1), 0);
+
+ auto a_sub = exo::test::find_first_node_bytype<locoex::TFLSub>(g.graph());
+ ASSERT_TRUE(a_sub != nullptr);
+ ASSERT_TRUE(a_sub->y() == a_conv2d); // Checking 'not-fused' state
+}
+
+// A case when
+// - TFLConv2D has an activation function with Relu
+// - TFLAdd, has no activation function
+//
+// No fusion should happen
+TEST(FuseBiasAddPassTest, Regression_Conv2D_Add_fused_action_00)
+{
+ exo::test::TestGraph g;
+ auto filter = g.append<locoex::TFLConst>();
+ auto bias = g.append<locoex::TFLConst>();
+ auto conv2d = g.append<locoex::TFLConv2D>(g.pull, filter, bias);
+
+ auto add_y = g.append<locoex::TFLConst>();
+ auto add = g.append<locoex::TFLAdd>(conv2d, add_y);
+
+ g.complete(add);
+
+ init(g.pull);
+ init(conv2d, filter, bias);
+ init(add, locoex::FusedActFunc::NONE);
+ init(add_y);
+
+ // Updating Fused Activation for this test
+ conv2d->fusedActivationFunction(locoex::FusedActFunc::RELU);
+
+ // let's run fusion
+ {
+ exo::test::TypeShapeReadyPhase test_phase;
+
+ test_phase.add_pass<exo::FuseBiasAddPass>();
+ test_phase.run(g.graph());
+ }
+
+ auto a_conv2d = exo::test::find_first_node_bytype<locoex::TFLConv2D>(g.graph());
+ ASSERT_TRUE(a_conv2d != nullptr);
+ ASSERT_TRUE(a_conv2d->fusedActivationFunction() == locoex::FusedActFunc::RELU);
+
+ auto an_add = exo::test::find_first_node_bytype<locoex::TFLAdd>(g.graph());
+ ASSERT_TRUE(an_add != nullptr);
+ ASSERT_TRUE(an_add->fusedActivationFunction() == locoex::FusedActFunc::NONE);
+
+ ASSERT_TRUE(an_add->x() == a_conv2d or an_add->y() == a_conv2d);
+}
+
+// A case when
+// - TFLConv2D has NONE activation function
+// - TFLAdd has Relu activation function
+//
+// TFLConv2D should have Relu activation function, TFLAdd is fused into bias input
+TEST(FuseBiasAddPassTest, Regression_Conv2D_Add_fused_action_01)
+{
+ exo::test::TestGraph g;
+ auto filter = g.append<locoex::TFLConst>();
+ auto bias = g.append<locoex::TFLConst>();
+ auto conv2d = g.append<locoex::TFLConv2D>(g.pull, filter, bias);
+
+ auto add_y = g.append<locoex::TFLConst>();
+ auto add = g.append<locoex::TFLAdd>(conv2d, add_y);
+
+ g.complete(add);
+
+ init(g.pull);
+ init(conv2d, filter, bias);
+ init(add, locoex::FusedActFunc::RELU);
+ init(add_y);
+
+ // let's run fusion
+ {
+ exo::test::TypeShapeReadyPhase test_phase;
+
+ test_phase.add_pass<exo::FuseBiasAddPass>();
+ test_phase.run(g.graph());
+ }
+
+ auto a_conv2d = exo::test::find_first_node_bytype<locoex::TFLConv2D>(g.graph());
+ ASSERT_TRUE(a_conv2d != nullptr);
+
+ auto a_bias = dynamic_cast<locoex::TFLConst *>(a_conv2d->bias());
+ ASSERT_TRUE(a_bias != nullptr);
+
+ ASSERT_TRUE(a_bias->dim(0) == 2);
+ ASSERT_FLOAT_EQ(a_bias->at<loco::DataType::FLOAT32>(0),
+ bias->at<loco::DataType::FLOAT32>(0) + add_y->at<loco::DataType::FLOAT32>(0));
+ ASSERT_FLOAT_EQ(a_bias->at<loco::DataType::FLOAT32>(1),
+ bias->at<loco::DataType::FLOAT32>(1) + add_y->at<loco::DataType::FLOAT32>(1));
+
+ ASSERT_TRUE(a_conv2d->fusedActivationFunction() == locoex::FusedActFunc::RELU);
+}
diff --git a/compiler/exo/src/Pass/FuseInstanceNormPass.cpp b/compiler/exo/src/Pass/FuseInstanceNormPass.cpp
new file mode 100644
index 000000000..04d4a62cd
--- /dev/null
+++ b/compiler/exo/src/Pass/FuseInstanceNormPass.cpp
@@ -0,0 +1,402 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FuseInstanceNormPass.h"
+
+#include "Dialect/IR/TFLNodes.h"
+#include "Dialect/IR/CircleNodes.h"
+
+#include <loco/Service/ShapeInference.h>
+
+#include <cassert>
+#include <set>
+
+// Helper to find commutative node's arguments
+namespace
+{
+
+/**
+ * INTRODUCTION
+ * Binary operation f(x,y) is 'commutative' when
+ * f(x,y) == f(y,x) holds for all x, y.
+ * For examples, ADD, MUL and SQUARED_DIFFERENCE are commutative.
+ * These helpers make it easy to find commutative arguemnts of commtative node.
+ *
+ * HOW TO USE
+ * COMM_NODE *node;
+ * ARG_TYPE_1 *arg1;
+ * ARG_TYPE_2 *arg2;
+ *
+ * bool ok = fill(&arg1, &arg2).with_commutative_args_of(node);
+ *
+ * Result
+ * If 'node's commutative argument types are actually {ARG_TYPE_1, ARG_TYPE_2}
+ * (as a set), 'arg1' and 'arg2' set as actual 'node's arguemnts with matching
+ * type, and return value 'ok' is true.
+ * Otherwise, 'arg1' and 'arg2' not changed, 'ok' is false.
+ */
+
+template <class ARG_TYPE_1, class ARG_TYPE_2> class NodeFiller final
+{
+public:
+ NodeFiller(ARG_TYPE_1 **arg_1, ARG_TYPE_2 **arg_2) : _arg_1(arg_1), _arg_2(arg_2)
+ {
+ // DO NOTHING
+ }
+
+ /**
+ * @return true When 'node's argument types are 'ARG_TYPE_1' and 'ARG_TYPE_2'
+ * In such case, it assign '_arg_1' and '_arg_2' to actual arguments
+ *
+ * @return false When 'node's argument types are NOT matched with 'ARG_TYPE_*'
+ * In such case, it does not amend '_arg_1' and '_arg_2'
+ *
+ * @require COMM_NODE has member x() and y()
+ */
+ template <class COMM_NODE> bool with_commutative_args_of(const COMM_NODE *node);
+
+private:
+ ARG_TYPE_1 **_arg_1;
+ ARG_TYPE_2 **_arg_2;
+};
+
+template <class ARG_TYPE_1, class ARG_TYPE_2>
+inline NodeFiller<ARG_TYPE_1, ARG_TYPE_2> fill(ARG_TYPE_1 **arg_1, ARG_TYPE_2 **arg_2)
+{
+ return NodeFiller<ARG_TYPE_1, ARG_TYPE_2>{arg_1, arg_2};
+}
+
+template <class ARG_TYPE_1, class ARG_TYPE_2>
+template <class COMM_NODE>
+bool NodeFiller<ARG_TYPE_1, ARG_TYPE_2>::with_commutative_args_of(const COMM_NODE *node)
+{
+ // Case 1) X == ARG_TYPE_1 / Y == ARG_TYPE_2
+ {
+ auto x = dynamic_cast<ARG_TYPE_1 *>(node->x());
+ auto y = dynamic_cast<ARG_TYPE_2 *>(node->y());
+
+ if (x && y)
+ {
+ *_arg_1 = x;
+ *_arg_2 = y;
+ return true;
+ }
+ }
+
+ // Case 2) X == ARG_TYPE_2 / Y == ARG_TYPE_1
+ {
+ auto x = dynamic_cast<ARG_TYPE_2 *>(node->x());
+ auto y = dynamic_cast<ARG_TYPE_1 *>(node->y());
+
+ if (x && y)
+ {
+ *_arg_1 = y;
+ *_arg_2 = x;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+} // namespace
+
+// Helper to check detail
+namespace
+{
+
+/// @return true When node has shape of '1 x .. x 1 x depth'
+bool is_1D_with_dummy_dim(locoex::TFLConst *node, uint32_t depth)
+{
+ auto rank = node->rank();
+ uint32_t axis;
+ for (axis = 0; axis < rank - 1; ++axis)
+ {
+ if (node->dim(axis).value() != 1)
+ return false;
+ }
+ return node->dim(axis).value() == depth;
+}
+
+bool is_instance_mean(locoex::TFLMean *mean)
+{
+ //
+ // CHECK 1) input is rank 4
+ //
+ auto input = mean->input();
+ if (not loco::shape_known(input))
+ return false;
+ auto input_shape = loco::shape_get(input).as<loco::TensorShape>();
+ if (input_shape.rank() != 4)
+ return false;
+
+ //
+ // CHECK 2) 'reduction indices' is TFLConst of value [1,2], that is HW of NHWC
+ //
+ // TODO Support equivalent case, like [-3,-2]
+ // TODO Support non-Const case?
+ // TODO What if input is NCHW format in Circle?
+ auto red_indices = dynamic_cast<locoex::TFLConst *>(mean->reduction_indices());
+ if (not red_indices)
+ return false;
+ if (red_indices->rank() != 1)
+ return false;
+ std::set<int32_t> red_indices_set;
+ {
+ // TODO Currently only support S32, support other types
+ assert(red_indices->dtype() == loco::DataType::S32);
+ for (uint32_t i = 0; i < red_indices->dim(0).value(); ++i)
+ red_indices_set.insert(red_indices->at<loco::DataType::S32>(i));
+ }
+ if (red_indices_set.size() != 2)
+ return false;
+ if (red_indices_set.find(1) == red_indices_set.end())
+ return false;
+ if (red_indices_set.find(2) == red_indices_set.end())
+ return false;
+
+ //
+ // CHECK 3) keep_dims == true (?)
+ //
+ // We only have case of 'keep_dims == true' so far, but it might be okay with 'keep_dims == false'
+ // TODO Check this fact, and if true, return true regardless of keep_dims
+ return mean->keep_dims();
+}
+
+} // namespace
+
+// Helper to fuse Instance Norm
+namespace
+{
+
+/**
+ * SUBGRAPH PATTERN
+ *
+ * - Below diagram shows Instance Norm pattern to fuse.
+ * - Execution dependency order is top to the bottom.
+ * - Node name is matched with variable name of InstanceNormPattern class.
+ * - Usually, first word of node name (variable name) is node type. For e.g.
+ * variable 'mean_as_variance' is pointer to TFLMean.
+ * - (Item in parenthesis) means actually exist, but not having a name and
+ * not a variable of InstanceNormPattern class.
+ *
+ * TODO support other semantically same patterns for instance norm
+ *
+ * [In]
+ * |
+ * V
+ * +----------- ifm -----+ (reduction indicies)
+ * | | | |
+ * | | V V
+ * | | mean_of_ifm ----------------+
+ * | V | |
+ * | sqdiff <--+ (reduction indicies) |
+ * | | | |
+ * | V | |
+ * | mean_as_variance <---+ const_as_epsilon |
+ * | | | |
+ * | V | |
+ * | add_as_variance <--------+ |
+ * | | |
+ * | V |
+ * | rsqrt const_as_gamma |
+ * | | | |
+ * | V | |
+ * | mul_gamma <--+ |
+ * | | | |
+ * V V V |
+ * mul_as_scaled_ifm mul_as_scaled_mean <-------------+
+ * | |
+ * | const_as_beta |
+ * | | V
+ * | +------> sub
+ * V |
+ * add_as_terminal <----------+
+ * |
+ * V
+ * [Out]
+ */
+class InstanceNormPattern final
+{
+public:
+ InstanceNormPattern(locoex::TFLAdd *candidate)
+ {
+ assert(candidate);
+ add_as_terminal = candidate;
+ }
+
+public:
+ bool matched();
+ bool matched() const { return _matched; }
+
+public:
+ // Context
+ loco::Node *ifm = nullptr;
+ locoex::TFLMean *mean_of_ifm = nullptr;
+ locoex::TFLSquaredDifference *sqdiff = nullptr;
+ locoex::TFLMean *mean_as_variance = nullptr;
+ locoex::TFLConst *const_as_epsilon = nullptr;
+ locoex::TFLAdd *add_as_variance = nullptr;
+ locoex::TFLRsqrt *rsqrt = nullptr;
+ locoex::TFLConst *const_as_gamma = nullptr;
+ locoex::TFLMul *mul_gamma = nullptr;
+ locoex::TFLMul *mul_as_scaled_ifm = nullptr;
+ locoex::TFLMul *mul_as_scaled_mean = nullptr;
+ locoex::TFLConst *const_as_beta = nullptr;
+ locoex::TFLSub *sub = nullptr;
+ locoex::TFLAdd *add_as_terminal = nullptr;
+
+private:
+ bool _matched = false;
+};
+
+bool InstanceNormPattern::matched()
+{
+ if (_matched)
+ return true;
+
+#define CHECK_OR_FALSE(condition) \
+ if (not(condition)) \
+ return false;
+
+ // Check order is DFS
+
+ CHECK_OR_FALSE(fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal));
+ CHECK_OR_FALSE(fill(&ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_ifm));
+
+ CHECK_OR_FALSE(loco::shape_known(ifm));
+ auto ifm_shape = loco::shape_get(ifm);
+ CHECK_OR_FALSE(ifm_shape.domain() == loco::Domain::Tensor);
+ auto ifm_tensor_shape = ifm_shape.as<loco::TensorShape>();
+ CHECK_OR_FALSE(ifm_tensor_shape.rank() == 4);
+ uint32_t ifm_channel_depth = ifm_tensor_shape.dim(3).value();
+
+ CHECK_OR_FALSE(fill(&rsqrt, &const_as_gamma).with_commutative_args_of(mul_gamma));
+ CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_gamma, ifm_channel_depth));
+
+ add_as_variance = dynamic_cast<locoex::TFLAdd *>(rsqrt->x());
+ CHECK_OR_FALSE(add_as_variance);
+
+ CHECK_OR_FALSE(
+ fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
+
+ CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
+ // TODO Support regarding broadcast
+ CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1);
+
+ CHECK_OR_FALSE(is_instance_mean(mean_as_variance));
+ sqdiff = dynamic_cast<locoex::TFLSquaredDifference *>(mean_as_variance->input());
+ CHECK_OR_FALSE(sqdiff);
+
+ loco::Node *ifm_should_be = nullptr;
+ CHECK_OR_FALSE(fill(&ifm_should_be, &mean_of_ifm).with_commutative_args_of(sqdiff));
+ CHECK_OR_FALSE(ifm == ifm_should_be);
+ CHECK_OR_FALSE(is_instance_mean(mean_of_ifm));
+ CHECK_OR_FALSE(ifm == mean_of_ifm->input());
+
+ const_as_beta = dynamic_cast<locoex::TFLConst *>(sub->x());
+ CHECK_OR_FALSE(const_as_beta);
+ CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_beta, ifm_channel_depth));
+
+ mul_as_scaled_mean = dynamic_cast<locoex::TFLMul *>(sub->y());
+ CHECK_OR_FALSE(mul_as_scaled_mean);
+
+ locoex::TFLMul *mul_gamma_should_be = nullptr;
+ locoex::TFLMean *mean_of_ifm_should_be = nullptr;
+ CHECK_OR_FALSE(fill(&mul_gamma_should_be, &mean_of_ifm_should_be)
+ .with_commutative_args_of(mul_as_scaled_mean));
+ CHECK_OR_FALSE(mul_gamma == mul_gamma_should_be);
+ CHECK_OR_FALSE(mean_of_ifm == mean_of_ifm_should_be);
+#undef CHECK_OR_FALSE
+ _matched = true;
+ return true;
+}
+
+/**
+ * Instance norm pattern would be fused like following diagram:
+ *
+ * [In] --------------------------- CircleInstanceNorm --- [Out]
+ * / /
+ * const_as_gamma --- TFLReshape --- /
+ * /
+ * const_as_beta ---- TFLReshape ---
+ *
+ * Note
+ * - 'const_as_gamma' and 'const_as_beta' are from original graph
+ * - Value of 'const_as_epsilon' would be copied to CircleInstanceNorm's attribute
+ * - TFLReshape is added as CircleInstanceNorm only accept 1D tensor
+ * - 'TFLConst --- TFLReshape' is expected to be fused in constant folding for Reshape
+ */
+void fuse_instance_norm(const InstanceNormPattern &p)
+{
+ assert(p.matched());
+
+ auto graph = p.add_as_terminal->graph();
+
+ // Make reshape for gamma & beta
+ auto reshape_gamma = graph->nodes()->create<locoex::TFLReshape>();
+ auto reshape_beta = graph->nodes()->create<locoex::TFLReshape>();
+ {
+ auto ifm_shape = loco::shape_get(p.ifm).as<loco::TensorShape>();
+ uint32_t ifm_channel_depth = ifm_shape.dim(3).value();
+
+ int32_t new_shape[1] = {static_cast<int32_t>(ifm_channel_depth)};
+
+ reshape_gamma->tensor(p.const_as_gamma);
+ reshape_beta->tensor(p.const_as_beta);
+
+ locoex::set_new_shape(reshape_gamma, new_shape, 1);
+ locoex::set_new_shape(reshape_beta, new_shape, 1);
+ }
+
+ // Make Instance Norm to replace
+ auto instance_norm = graph->nodes()->create<locoex::CircleInstanceNorm>();
+ instance_norm->input(p.ifm);
+ instance_norm->gamma(reshape_gamma);
+ instance_norm->beta(reshape_beta);
+ float epsilon = p.const_as_epsilon->at<loco::DataType::FLOAT32>(0);
+ instance_norm->epsilon(epsilon);
+ instance_norm->fusedActivationFunction(p.add_as_terminal->fusedActivationFunction());
+
+ replace(p.add_as_terminal).with(instance_norm);
+}
+
+} // namespace
+
+namespace exo
+{
+
+bool FuseInstanceNormPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto add = dynamic_cast<locoex::TFLAdd *>(node);
+ if (not add)
+ continue;
+
+ InstanceNormPattern pattern(add);
+ if (not pattern.matched())
+ continue;
+
+ fuse_instance_norm(pattern);
+ changed = true;
+ }
+
+ return changed;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Pass/FuseInstanceNormPass.h b/compiler/exo/src/Pass/FuseInstanceNormPass.h
new file mode 100644
index 000000000..e6361021c
--- /dev/null
+++ b/compiler/exo/src/Pass/FuseInstanceNormPass.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FUSE_INSTANCE_NORM_PASS_H__
+#define __FUSE_INSTANCE_NORM_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace exo
+{
+
+/**
+ * @brief Class to fuse certain pattern of subgraph into CircleInstanceNorm
+ * with auxiliary nodes
+ *
+ * For detailed subgraph pattern to be fused, please check its implementation.
+ */
+struct FuseInstanceNormPass final : public logo::Pass
+{
+ const char *name(void) const final { return "exo::FuseInstanceNormPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace exo
+
+#endif // __FUSE_INSTANCE_NORM_PASS_H__
diff --git a/compiler/exo/src/Pass/FuseReluPass.cpp b/compiler/exo/src/Pass/FuseReluPass.cpp
new file mode 100644
index 000000000..d7af0c506
--- /dev/null
+++ b/compiler/exo/src/Pass/FuseReluPass.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FuseReluPass.h"
+
+#include "Dialect/IR/TFLNodes.h"
+#include "Dialect/IR/TFLDialect.h"
+#include "Dialect/IR/TFLNodeVisitor.h"
+
+#include <set>
+
+namespace
+{
+
+bool is_pred_fusable(loco::Node *node)
+{
+ using namespace locoex;
+
+ auto fusable_node = dynamic_cast<TFLNodeMixin<TFLNodeTrait::FusedActFunc> *>(node);
+
+ return (fusable_node and fusable_node->fusedActivationFunction() == FusedActFunc::NONE);
+};
+
+struct Collector final : public locoex::TFLNodeMutableVisitor<void>
+{
+ void visit(locoex::TFLRelu *node) final
+ {
+ if (is_pred_fusable(node->features()))
+ candidates.insert(node);
+ }
+
+ void visit(locoex::TFLRelu6 *node) final
+ {
+ if (is_pred_fusable(node->features()))
+ candidates.insert(node);
+ }
+
+ void visit(locoex::TFLNode *) final { return; }
+
+ std::set<locoex::TFLNode *> candidates;
+};
+
+void set_activation_fusion(loco::Node *node, locoex::FusedActFunc f)
+{
+ using namespace locoex;
+
+ if (auto fusable_node = dynamic_cast<TFLNodeMixin<TFLNodeTrait::FusedActFunc> *>(node))
+ fusable_node->fusedActivationFunction(f);
+ else
+ assert(false);
+}
+
+struct Performer final : public locoex::TFLNodeMutableVisitor<void>
+{
+ void visit(locoex::TFLRelu *the_relu) final
+ {
+ set_activation_fusion(the_relu->features(), locoex::FusedActFunc::RELU);
+
+ loco::replace(the_relu).with(the_relu->features());
+ the_relu->features(nullptr);
+ }
+
+ void visit(locoex::TFLRelu6 *the_relu6) final
+ {
+ set_activation_fusion(the_relu6->features(), locoex::FusedActFunc::RELU6);
+
+ loco::replace(the_relu6).with(the_relu6->features());
+ the_relu6->features(nullptr);
+ }
+
+ void visit(locoex::TFLNode *) final { assert(false && "should not be called"); }
+};
+
+} // namespace
+
+namespace exo
+{
+
+bool FuseReluPass::run(loco::Graph *g)
+{
+ Collector collector;
+
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (node->dialect() == locoex::TFLDialect::get())
+ {
+ auto tfl_node = dynamic_cast<locoex::TFLNode *>(node);
+ tfl_node->accept(&collector);
+ }
+ }
+
+ Performer performer;
+
+ for (auto node : collector.candidates)
+ {
+ node->accept(&performer);
+ }
+
+ return collector.candidates.size() > 0;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Pass/FuseReluPass.h b/compiler/exo/src/Pass/FuseReluPass.h
new file mode 100644
index 000000000..1cd276b29
--- /dev/null
+++ b/compiler/exo/src/Pass/FuseReluPass.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PASS_FUSE_RELU_PASS_H__
+#define __PASS_FUSE_RELU_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace exo
+{
+
+/**
+ * @brief Class to fuse TFLRelu or TFLRelu6 into the TensorFlow Lite ops below:
+ *
+ * ADD, AVERAGE_POOL_2D, CONCATENATION, CONV_2D, DEPTHWISE_CONV_2D,
+ * FULLY_CONNECTED, L2_NORMALIZATION, L2_POOL_2D, MAX_POOL_2D, MUL
+ */
+struct FuseReluPass final : public logo::Pass
+{
+ const char *name(void) const final { return "exo::FuseReluPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace exo
+
+#endif // __PASS_FUSE_RELU_PASS_H__
diff --git a/compiler/exo/src/Pass/FuseReluPass.test.cpp b/compiler/exo/src/Pass/FuseReluPass.test.cpp
new file mode 100644
index 000000000..6f83d4dd0
--- /dev/null
+++ b/compiler/exo/src/Pass/FuseReluPass.test.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FuseReluPass.h"
+
+#include "Dialect/IR/TFLNodes.h"
+#include "TestGraph.h"
+
+#include <loco.h>
+#include <logo/RemoveDeadNodePass.h>
+
+#include <gtest/gtest.h>
+
+#include <type_traits> // for std::is_same
+
+namespace
+{
+
+void init(loco::Pull *pull)
+{
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({2, 3, 3, 2});
+}
+
+/// @brief Initializes TFLConv2D and related filter and bias
+void init(locoex::TFLConv2D *conv2d, locoex::TFLConst *filter, locoex::TFLConst *bias)
+{
+ // set conv2d
+ {
+ conv2d->fusedActivationFunction(locoex::FusedActFunc::NONE);
+ conv2d->padding(locoex::Padding::VALID);
+ }
+
+ // set filter
+ {
+ filter->dtype(loco::DataType::FLOAT32);
+ filter->shape({2, 3, 3, 2});
+ filter->size<loco::DataType::FLOAT32>(2 * 3 * 3 * 2);
+
+ for (uint32_t x = 0; x < 2 * 3 * 3 * 2; x++)
+ filter->at<loco::DataType::FLOAT32>(x) = 0.0;
+ }
+
+ // set bias
+ {
+ bias->dtype(loco::DataType::FLOAT32);
+ bias->shape({2});
+ bias->size<loco::DataType::FLOAT32>(2);
+
+ for (uint32_t x = 0; x < 2; x++)
+ bias->at<loco::DataType::FLOAT32>(x) = 0.0;
+ }
+}
+
+} // namespace
+
+/// Test code called by TEST(..)
+/// This tests whether Conv2D - FusedTFLType is fused.
+template <class FusedTFLType, locoex::FusedActFunc FusedActFunc> void test()
+{
+ static_assert((std::is_same<FusedTFLType, locoex::TFLRelu>::value &&
+ FusedActFunc == locoex::FusedActFunc::RELU) ||
+ (std::is_same<FusedTFLType, locoex::TFLRelu6>::value &&
+ FusedActFunc == locoex::FusedActFunc::RELU6),
+ "wrong template type");
+
+ exo::test::TestGraph g;
+ {
+ auto filter = g.append<locoex::TFLConst>();
+ auto bias = g.append<locoex::TFLConst>();
+ auto conv2d = g.append<locoex::TFLConv2D>(g.pull, filter, bias);
+
+ auto fusable_node = g.append<FusedTFLType>(conv2d);
+
+ g.complete(fusable_node);
+
+ init(g.pull);
+ init(conv2d, filter, bias);
+ }
+
+ // let's run fusion
+ {
+ exo::test::TypeShapeReadyPhase test_phase;
+
+ test_phase.add_pass<exo::FuseReluPass>();
+ test_phase.add_pass<logo::RemoveDeadNodePass>(); // to remove TFLRelu
+ test_phase.run(g.graph());
+ }
+
+ auto a_conv2d = exo::test::find_first_node_bytype<locoex::TFLConv2D>(g.graph());
+ ASSERT_TRUE(a_conv2d != nullptr);
+ ASSERT_TRUE(a_conv2d->fusedActivationFunction() == FusedActFunc);
+
+ auto removed_fusable_node = exo::test::find_first_node_bytype<FusedTFLType>(g.graph());
+ ASSERT_TRUE(removed_fusable_node == nullptr);
+}
+
+// A case with Conv2D-Relu
+TEST(FuseReluTest, Conv2D_Relu_basic) { test<locoex::TFLRelu, locoex::FusedActFunc::RELU>(); }
+
+// A case with Conv2D-Relu6
+TEST(FuseReluTest, Conv2D_Relu6_basic) { test<locoex::TFLRelu6, locoex::FusedActFunc::RELU6>(); }
diff --git a/compiler/exo/src/Pass/FuseRsqrtPass.cpp b/compiler/exo/src/Pass/FuseRsqrtPass.cpp
new file mode 100644
index 000000000..08d704139
--- /dev/null
+++ b/compiler/exo/src/Pass/FuseRsqrtPass.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FuseRsqrtPass.h"
+
+#include "Check.h"
+
+#include "Dialect/IR/TFLNodes.h"
+
+namespace
+{
+
+/**
+ * @return Casted TFLDiv for fusable candidate, nullptr otherwise
+ *
+ * This helper checkes fusability with following conditions:
+ * - TFLDiv has no activation
+ * - TFLDiv's first argument is TFLConst with all value 1
+ * - TFLDiv's second argument is TFLSqrt
+ */
+locoex::TFLDiv *as_candidate(loco::Node *node)
+{
+ auto div = dynamic_cast<locoex::TFLDiv *>(node);
+ if (not div)
+ return nullptr;
+
+ // Cannot fuse Div with activation function
+ if (div->fusedActivationFunction() != locoex::FusedActFunc::NONE)
+ return nullptr;
+
+ auto const_one = dynamic_cast<locoex::TFLConst *>(div->x());
+ if (not const_one)
+ return nullptr;
+
+ const loco::DataType FLOAT32 = loco::DataType::FLOAT32;
+ // TODO Support other dtype
+ EXO_ASSERT(const_one->dtype() == FLOAT32, "Only support FLOAT32 now");
+ for (uint32_t i = 0; i < const_one->size<FLOAT32>(); ++i)
+ if (const_one->at<FLOAT32>(i) != 1.0f)
+ return nullptr;
+
+ auto sqrt = dynamic_cast<locoex::TFLSqrt *>(div->y());
+ if (not sqrt)
+ return nullptr;
+
+ return div;
+}
+
+void fuse_rsqrt(locoex::TFLDiv *div)
+{
+ auto sqrt = dynamic_cast<locoex::TFLSqrt *>(div->y());
+ EXO_ASSERT(sqrt, "sqrt should be valid at this point");
+
+ // TFLRsqrt to replace
+ auto rsqrt = div->graph()->nodes()->create<locoex::TFLRsqrt>();
+ rsqrt->x(sqrt->x());
+
+ // replace
+ loco::replace(div).with(rsqrt);
+}
+
+} // namespace
+
+namespace exo
+{
+
+bool FuseRsqrtPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto div = as_candidate(node))
+ {
+ fuse_rsqrt(div);
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Pass/FuseRsqrtPass.h b/compiler/exo/src/Pass/FuseRsqrtPass.h
new file mode 100644
index 000000000..1e60e4a49
--- /dev/null
+++ b/compiler/exo/src/Pass/FuseRsqrtPass.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FUSE_RSQRT_PASS_H__
+#define __FUSE_RSQRT_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace exo
+{
+
+/**
+ * @brief Class to fuse TFLSqrt that is divided(TFLDiv) by 1, into TFLRsqrt
+ *
+ * <BEFORE>
+ *
+ * TFLConst(1) ------
+ * \
+ * A --- TFLSqrt --- TFLDiv --- B
+ *
+ * <AFTER>
+ *
+ * A --- TFLRsqrt --- B
+ */
+struct FuseRsqrtPass final : public logo::Pass
+{
+ const char *name(void) const final { return "exo::FuseRsqrtPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace exo
+
+#endif // __FUSE_RSQRT_PASS_H__
diff --git a/compiler/exo/src/Pass/FuseSquaredDifferencePass.cpp b/compiler/exo/src/Pass/FuseSquaredDifferencePass.cpp
new file mode 100644
index 000000000..3f985a505
--- /dev/null
+++ b/compiler/exo/src/Pass/FuseSquaredDifferencePass.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FuseSquaredDifferencePass.h"
+
+#include "Check.h"
+
+#include "Dialect/IR/TFLNodes.h"
+
+namespace
+{
+
+/**
+ * @return Casted TFLMul for fusable candidate, nullptr otherwise
+ *
+ * This helper checkes fusability with following conditions:
+ * - TFLMul has no activation
+ * - TFLMul's first and second arguments are equal and TFLSub
+ */
+locoex::TFLMul *as_candidate(loco::Node *node)
+{
+ auto mul = dynamic_cast<locoex::TFLMul *>(node);
+ if (not mul)
+ return nullptr;
+
+ // Cannot fuse mul with activation function
+ if (mul->fusedActivationFunction() != locoex::FusedActFunc::NONE)
+ return nullptr;
+
+ if (mul->x() != mul->y())
+ return nullptr;
+
+ if (not dynamic_cast<locoex::TFLSub *>(mul->x()))
+ return nullptr;
+
+ return mul;
+}
+
+void fuse_squared_difference(locoex::TFLMul *mul)
+{
+ auto sub = dynamic_cast<locoex::TFLSub *>(mul->x());
+ EXO_ASSERT(sub, "sub should be valid at this point");
+
+ // TFLSquaredDifference to replace
+ auto sq_diff = mul->graph()->nodes()->create<locoex::TFLSquaredDifference>();
+ sq_diff->x(sub->x());
+ sq_diff->y(sub->y());
+
+ // replace
+ loco::replace(mul).with(sq_diff);
+}
+
+} // namespace
+
+namespace exo
+{
+
+bool FuseSquaredDifferencePass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto mul = as_candidate(node))
+ {
+ fuse_squared_difference(mul);
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Pass/FuseSquaredDifferencePass.h b/compiler/exo/src/Pass/FuseSquaredDifferencePass.h
new file mode 100644
index 000000000..dbc15149f
--- /dev/null
+++ b/compiler/exo/src/Pass/FuseSquaredDifferencePass.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FUSE_SQUARED_DIFFERENCE_PASS_H__
+#define __FUSE_SQUARED_DIFFERENCE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace exo
+{
+
+/**
+ * @brief Class to fuse SquaredDifference pattern
+ *
+ * <BEFORE>
+ *
+ * A --- TFLSub --- TFLMul --- C
+ * / \ /
+ * B ---- -----
+ *
+ * <AFTER>
+ *
+ * A --- TFLSquaredDifference --- C
+ * /
+ * B ----
+ */
+struct FuseSquaredDifferencePass final : public logo::Pass
+{
+ const char *name(void) const final { return "exo::FuseSquaredDifferencePass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace exo
+
+#endif // __FUSE_SQUARED_DIFFERENCE_PASS_H__
diff --git a/compiler/exo/src/Pass/MergeConcatNodesPass.cpp b/compiler/exo/src/Pass/MergeConcatNodesPass.cpp
new file mode 100644
index 000000000..8945fcfce
--- /dev/null
+++ b/compiler/exo/src/Pass/MergeConcatNodesPass.cpp
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MergeConcatNodesPass.h"
+#include "Dialect/IR/TFLNodes.h"
+
+#include <oops/InternalExn.h>
+
+#include <vector>
+
+namespace
+{
+
+bool canMerge(locoex::TFLConcatenation *node1, locoex::TFLConcatenation *node2)
+{
+ if (node1->fusedActivationFunction() != node2->fusedActivationFunction())
+ return false;
+
+ if (node1->axis() != node2->axis())
+ return false;
+
+ switch (node1->fusedActivationFunction())
+ {
+ case locoex::FusedActFunc::NONE:
+ case locoex::FusedActFunc::RELU:
+ case locoex::FusedActFunc::RELU6:
+ return true;
+
+ // case locoex::FusedActFunc::TANH:
+ // return false;
+
+ default:
+ INTERNAL_EXN_V("Unknown FusedActFunc", oops::to_uint32(node1->fusedActivationFunction()));
+ }
+}
+
+/**
+ * @brief Collect all the inputs of newly created TFLConcatenation nodes
+ *
+ * in:0 -------------------------------\
+ * in:1 ---- TFLConcatenation:0 -------- TFLConcatenation:3 --- C
+ * (axis = 0, NONE) (axis = 0, NONE)
+ * in:2 ---/ /
+ * in:3 ---- TFLConcatenation:1 ------/
+ * (axis = 1, NONE) /
+ * in:4 ---/ /
+ * in:5 ---- TFLConcatenation:2 ---/
+ * (axis = 0, RELU)
+ * in:6 ---/
+ *
+ * For exmaple, if graph is like above, dfs(TFLConcatenation:3) will
+ * return [in:0, in:1, in:2, TFLConcatenation:1, TFLConcatenation:2]
+ *
+ * TFLConcatenation:0 can be merged to TFLConcatenation:3,
+ * because axis and fusedActivationFunction are same.
+ * It means that [in:1, in:2] will be linked as inputs of new TFLConcatenation.
+ *
+ * However, TFLConcatenation:1 and TFLConcatenation:2 cannot be merged to
+ * TFLConcatenation:3 because axis and fusedActivationFunction of each are different.
+ * So [in:3, in:4, in:5, in:6] will not be linked as inputs of new TFLConcatenation
+ * and [TFLConcatenation:1, TFLConcatenation:2] will be linked instead.
+ *
+ * Therefore, inputs of newly created TFLConcatenation node for merging
+ * TFLConcatenation:3 will be [in:0, in:1, in:2, TFLConcatenation:1, TFLConcatenation:2]
+ * and dfs(TFLConcatenation:3) will return it.
+ *
+ *
+ * @note The input nodes should be traversed by LRV,
+ * which is from left to right (input:0 --> input:N)
+ */
+std::vector<loco::Node *> dfs(locoex::TFLConcatenation *root)
+{
+ std::vector<loco::Node *> res;
+
+ for (uint32_t i = 0; i < root->numValues(); ++i)
+ {
+ auto input = dynamic_cast<locoex::TFLConcatenation *>(root->values(i));
+ if (input != nullptr && canMerge(input, root))
+ {
+ auto children = dfs(input);
+ for (auto child : children)
+ res.push_back(child);
+ }
+ else
+ {
+ res.push_back(root->values(i));
+ }
+ }
+
+ return res;
+}
+
+} // namespace
+
+namespace exo
+{
+
+/**
+ * @brief Merge TFLConcatenate nodes whose axis and fusedActivationFunction are same
+ *
+ * [Before]
+ * in:0 -------------------------------\
+ * in:1 ---- TFLConcatenation:0 -------- TFLConcatenation:3 --- C
+ * (axis = 0, NONE) (axis = 0, NONE)
+ * in:2 ---/ /
+ * in:3 ---- TFLConcatenation:1 ------/
+ * (axis = 1, NONE) /
+ * in:4 ---/ /
+ * in:5 ---- TFLConcatenation:2 ---/
+ * (axis = 0, RELU)
+ * in:6 ---/
+ *
+ * [After]
+ * in:0 -------------------------------\
+ * in:1 -------------------------------- TFLConcatenation:4 --- C
+ * (axis = 0, NONE)
+ * in:2 -------------------------------/
+ * in:3 ---- TFLConcatenation:1 ------/
+ * (axis = 1, NONE) /
+ * in:4 ---/ /
+ * in:5 ---- TFLConcatenation:2 ---/
+ * (axis = 0, RELU)
+ * in:6 ---/
+ *
+ *
+ * in:1 ---- TFLConcatenation:0 ----
+ * (axis = 0, NONE)
+ * in:2 ---/
+ *
+ *
+ * ---- TFLConcatenation:3 ----
+ * (axis = 0, NONE)
+ */
+bool MergeConcatNodesPass::run(loco::Graph *graph)
+{
+ // Let's enumerate nodes required to compute output nodes
+ auto active_nodes = loco::active_nodes(loco::output_nodes(graph));
+
+ // Find TFLConcatenation nodes which have another TFLConcatenation nodes
+ // as inputs, with same axis and same fusedActivationFunction
+ std::vector<locoex::TFLConcatenation *> candidates;
+ for (auto node : active_nodes)
+ {
+ if (auto concat = dynamic_cast<locoex::TFLConcatenation *>(node))
+ {
+ for (uint32_t i = 0; i < concat->numValues(); ++i)
+ {
+ auto input = dynamic_cast<locoex::TFLConcatenation *>(concat->values(i));
+ if (input != nullptr && canMerge(input, concat))
+ {
+ candidates.push_back(concat);
+ break;
+ }
+ }
+ }
+ }
+
+ // Merge multiple TFLConcatenation nodes as one TFLConcatenation node
+ for (auto node : candidates)
+ {
+ auto inputs = dfs(node);
+
+ auto new_concat = graph->nodes()->create<locoex::TFLConcatenation>(inputs.size());
+ new_concat->axis(node->axis());
+ new_concat->fusedActivationFunction(node->fusedActivationFunction());
+
+ for (uint32_t i = 0; i < inputs.size(); ++i)
+ new_concat->values(i, inputs.at(i));
+
+ loco::replace(node).with(new_concat);
+ for (uint32_t i = 0; i < node->numValues(); ++i)
+ node->values(i, nullptr);
+ }
+
+ return candidates.size() > 0;
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Pass/MergeConcatNodesPass.h b/compiler/exo/src/Pass/MergeConcatNodesPass.h
new file mode 100644
index 000000000..823214f43
--- /dev/null
+++ b/compiler/exo/src/Pass/MergeConcatNodesPass.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PASS_MERGE_CONCAT_NODES_H__
+#define __PASS_MERGE_CONCAT_NODES_H__
+
+#include <loco.h>
+#include <logo/Pass.h>
+
+namespace exo
+{
+
+/**
+ * @brief Merge concat nodes whose axis and fusedActivationFunction are same
+ *
+ */
+class MergeConcatNodesPass : public logo::Pass
+{
+public:
+ virtual const char *name(void) const { return "exo::MergeConcatNodesPass"; }
+
+public:
+ bool run(loco::Graph *graph);
+};
+
+} // namespace exo
+
+#endif // __PASS_MERGE_CONCAT_NODES_H__
diff --git a/compiler/exo/src/Pass/ShapeInferencePass.cpp b/compiler/exo/src/Pass/ShapeInferencePass.cpp
new file mode 100644
index 000000000..bc60f91c4
--- /dev/null
+++ b/compiler/exo/src/Pass/ShapeInferencePass.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ShapeInferencePass.h"
+
+#include "Dialect/IR/TFLDialect.h"
+#include "Dialect/Service/TFLShapeInferenceRule.h"
+
+#include "Dialect/IR/CircleDialect.h"
+#include "Dialect/Service/CircleShapeInferenceRule.h"
+
+#include <loco.h>
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/Service/CanonicalShapeInferenceRule.h>
+#include <loco/Service/ShapeInference.h>
+#include <loco/Service/MultiDialectShapeInferenceRule.h>
+
+#include <locoex/COpDialect.h>
+#include <locoex/Service/COpShapeInferenceRule.h>
+
+namespace exo
+{
+
+/**
+ * @note Currently, TFL and Circle backend share this inference. However, TFL
+ * backend does not require rule for Circle dialect.
+ * TODO Make dedicated inference pass for Circle Dialect.
+ */
+bool ShapeInferencePass::run(loco::Graph *g)
+{
+ loco::CanonicalShapeInferenceRule canonical_rule;
+ locoex::TFLShapeInferenceRule tfl_rule;
+ locoex::CircleShapeInferenceRule circle_rule;
+ locoex::COpShapeInferenceRule cop_rule;
+
+ loco::MultiDialectShapeInferenceRule rules;
+
+ rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
+ .bind(locoex::TFLDialect::get(), &tfl_rule)
+ .bind(locoex::CircleDialect::get(), &circle_rule)
+ .bind(locoex::COpDialect::get(), &cop_rule);
+
+ return loco::apply(&rules).to(g);
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Pass/ShapeInferencePass.h b/compiler/exo/src/Pass/ShapeInferencePass.h
new file mode 100644
index 000000000..518c87403
--- /dev/null
+++ b/compiler/exo/src/Pass/ShapeInferencePass.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PASS_SHAPE_INFERENCE_PASS_H__
+#define __PASS_SHAPE_INFERENCE_PASS_H__
+
+#include <loco.h>
+#include <logo/Pass.h>
+
+namespace exo
+{
+
+/**
+ * @brief Pass to infer shape of nodes
+ */
+class ShapeInferencePass : public logo::Pass
+{
+public:
+ virtual const char *name(void) const { return "exo::ShapeInferencePass"; }
+
+public:
+ bool run(loco::Graph *graph);
+};
+
+} // namespace exo
+
+#endif //__PASS_SHAPE_INFERENCE_PASS_H__
diff --git a/compiler/exo/src/Pass/TypeInferencePass.cpp b/compiler/exo/src/Pass/TypeInferencePass.cpp
new file mode 100644
index 000000000..31d4f13b6
--- /dev/null
+++ b/compiler/exo/src/Pass/TypeInferencePass.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TypeInferencePass.h"
+
+#include "Dialect/IR/TFLDialect.h"
+#include "Dialect/Service/TFLTypeInferenceRule.h"
+
+#include "Dialect/IR/CircleDialect.h"
+#include "Dialect/Service/CircleTypeInferenceRule.h"
+
+#include <loco.h>
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/Service/TypeInference.h>
+
+#include <locoex/COpDialect.h>
+#include <locoex/Service/COpTypeInference.h>
+
+namespace exo
+{
+
+/**
+ * @note Currently, TFL and Circle backend share this inference. However, TFL
+ * backend does not require rule for Circle dialect.
+ * TODO Make dedicated inference pass for Circle Dialect.
+ */
+bool TypeInferencePass::run(loco::Graph *g)
+{
+ loco::CanonicalTypeInferenceRule canonical_rule;
+ locoex::TFLTypeInferenceRule tfl_rule;
+ locoex::CircleTypeInferenceRule circle_rule;
+ locoex::COpTypeInferenceRule cop_rule;
+
+ loco::MultiDialectTypeInferenceRule rules;
+
+ rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
+ .bind(locoex::TFLDialect::get(), &tfl_rule)
+ .bind(locoex::CircleDialect::get(), &circle_rule)
+ .bind(locoex::COpDialect::get(), &cop_rule);
+
+ return loco::apply(&rules).to(g);
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/Pass/TypeInferencePass.h b/compiler/exo/src/Pass/TypeInferencePass.h
new file mode 100644
index 000000000..3ede587a0
--- /dev/null
+++ b/compiler/exo/src/Pass/TypeInferencePass.h
@@ -0,0 +1,42 @@
+
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PASS_TYPE_INFERENCE_PASS_H__
+#define __PASS_TYPE_INFERENCE_PASS_H__
+
+#include <loco.h>
+
+#include <logo/Pass.h>
+
+namespace exo
+{
+
+/**
+ * @brief Pass to infer type of nodes
+ */
+class TypeInferencePass : public logo::Pass
+{
+public:
+ virtual const char *name(void) const { return "exo::TypeInferencePass"; }
+
+public:
+ bool run(loco::Graph *graph);
+};
+
+} // namespace exo
+
+#endif //__PASS_TYPE_INFERENCE_PASS_H__
diff --git a/compiler/exo/src/Passes.cpp b/compiler/exo/src/Passes.cpp
new file mode 100644
index 000000000..99d229c9c
--- /dev/null
+++ b/compiler/exo/src/Passes.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Passes.h"
+
+// This file is to make sure that Passes.h be compiled
diff --git a/compiler/exo/src/Passes.h b/compiler/exo/src/Passes.h
new file mode 100644
index 000000000..2a702d01d
--- /dev/null
+++ b/compiler/exo/src/Passes.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PASSES_H__
+#define __PASSES_H__
+
+// Please add in alphabetical order
+// Please append 'Pass' suffix to Pass class and file names
+
+#include "Pass/FoldReshapeOfConstPass.h"
+#include "Pass/FoldTransposeOfConstPass.h"
+#include "Pass/FuseBiasAddPass.h"
+#include "Pass/FuseInstanceNormPass.h"
+#include "Pass/FuseReluPass.h"
+#include "Pass/FuseRsqrtPass.h"
+#include "Pass/FuseSquaredDifferencePass.h"
+#include "Pass/MergeConcatNodesPass.h"
+#include "Pass/ShapeInferencePass.h"
+#include "Pass/TypeInferencePass.h"
+
+#include <logo/RemoveDeadNodePass.h>
+#include <logo/RemoveForwardNodePass.h>
+#include <logo/SimplifyDomainConversionPass.h>
+
+#endif // __PASSES_H__
diff --git a/compiler/exo/src/ProgressReporter.cpp b/compiler/exo/src/ProgressReporter.cpp
new file mode 100644
index 000000000..ff919dae8
--- /dev/null
+++ b/compiler/exo/src/ProgressReporter.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ProgressReporter.h"
+
+#include "Log.h"
+#include "LogHelper.h"
+
+#include <logo/Phase.h>
+#include <logo/Pass.h>
+
+#include <cassert>
+
+namespace
+{
+
+char to_char(bool b) { return b ? 'Y' : 'N'; }
+
+const char *to_str(logo::PhaseStrategy s)
+{
+ switch (s)
+ {
+ case logo::PhaseStrategy::Saturate:
+ return "Saturate";
+ case logo::PhaseStrategy::Restart:
+ return "Restart";
+ }
+ assert(false);
+ return "";
+}
+
+} // namespace
+
+namespace exo
+{
+
+void ProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseBegin> *)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "==============================================================";
+ INFO(prime) << "exo::PhaseRunner<" << to_str(strategy()) << ">";
+ INFO(prime) << "Initial graph";
+ INFO(prime) << fmt(graph());
+}
+
+void ProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseEnd> *)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "exo::PhaseRunner<" << to_str(strategy()) << "> - done";
+}
+
+void ProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassBegin> *info)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "--------------------------------------------------------------";
+ INFO(prime) << "Before " << logo::pass_name(info->pass());
+}
+
+void ProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassEnd> *info)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "After " << logo::pass_name(info->pass())
+ << " (changed: " << to_char(info->changed()) << ")";
+ INFO(prime) << fmt(graph());
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/ProgressReporter.h b/compiler/exo/src/ProgressReporter.h
new file mode 100644
index 000000000..b0f420df9
--- /dev/null
+++ b/compiler/exo/src/ProgressReporter.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PROGRESSREPORTER_H__
+#define __PROGRESSREPORTER_H__
+
+#include <logo/Phase.h>
+
+#include <loco.h>
+
+namespace exo
+{
+
+class ProgressReporter : public logo::PhaseEventListener
+{
+public:
+ ProgressReporter(loco::Graph *graph, logo::PhaseStrategy strategy)
+ : _graph{graph}, _strategy{strategy}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseBegin> *) override;
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseEnd> *) override;
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassBegin> *) override;
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassEnd> *) override;
+
+public:
+ loco::Graph *graph(void) const { return _graph; }
+ logo::PhaseStrategy strategy(void) const { return _strategy; }
+
+private:
+ loco::Graph *_graph;
+ logo::PhaseStrategy _strategy;
+};
+
+} // namespace exo
+
+#endif // __PROGRESSREPORTER_H__
diff --git a/compiler/exo/src/ShapeInference.cpp b/compiler/exo/src/ShapeInference.cpp
new file mode 100644
index 000000000..bceb1495f
--- /dev/null
+++ b/compiler/exo/src/ShapeInference.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ShapeInference.h"
+#include "Dialect/IR/TFLDialect.h"
+#include "Dialect/Service/TFLShapeInferenceRule.h"
+
+#include <loco/IR/CanonicalNode.h>
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/IR/CanonicalNodeVisitor.h>
+#include <loco/Service/ShapeInference.h>
+#include <loco/Service/CanonicalShapeInferenceRule.h>
+#include <loco/Service/MultiDialectShapeInferenceRule.h>
+
+#include <locoex/COpCall.h>
+#include <locoex/COpDialect.h>
+#include <locoex/Service/COpShapeInferenceRule.h>
+
+namespace exo
+{
+
+ShapeDescription ShapeInference::get(loco::Node *node)
+{
+ // TODO Adjust indentation level
+ {
+ assert(loco::shape_known(node));
+ return to_shape_description(loco::shape_get(node));
+ }
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/ShapeInference.h b/compiler/exo/src/ShapeInference.h
new file mode 100644
index 000000000..ec141ccfc
--- /dev/null
+++ b/compiler/exo/src/ShapeInference.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SHAPE_INFERENCE_H__
+#define __SHAPE_INFERENCE_H__
+
+#include "ExporterUtils.h"
+
+#include <loco/IR/Nodes.h>
+
+namespace exo
+{
+
+/**
+ * @brief Get the shape of each node as a node annotation
+ *
+ * HOW TO USE
+ *
+ * ShapeInference::get(g->nodes()->at(..));
+ */
+struct ShapeInference
+{
+ static ShapeDescription get(loco::Node *node);
+};
+
+} // namespace exo
+
+#endif // __SHAPE_INFERENCE_H__
diff --git a/compiler/exo/src/TFLite/TFLExporter.cpp b/compiler/exo/src/TFLite/TFLExporter.cpp
new file mode 100644
index 000000000..cf002b3e1
--- /dev/null
+++ b/compiler/exo/src/TFLite/TFLExporter.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exo/TFLExporter.h"
+
+#include "TFLExporterImpl.h"
+
+#include <stdex/Memory.h>
+
+#include <oops/InternalExn.h>
+
+#include <fstream>
+
+namespace exo
+{
+
+TFLExporter::TFLExporter(loco::Graph *graph) : _impl(stdex::make_unique<Impl>(graph))
+{
+ // NOTHING TO DO
+}
+
+TFLExporter::~TFLExporter() = default;
+
+void TFLExporter::dumpToFile(const char *path) const
+{
+ const char *ptr = _impl->getBufferPointer();
+ const size_t size = _impl->getBufferSize();
+
+ if (!ptr)
+ INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
+
+ std::ofstream file(path, std::ofstream::binary);
+ file.write(ptr, size);
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/TFLite/TFLExporterImpl.cpp b/compiler/exo/src/TFLite/TFLExporterImpl.cpp
new file mode 100644
index 000000000..07adbfb9d
--- /dev/null
+++ b/compiler/exo/src/TFLite/TFLExporterImpl.cpp
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFLExporterImpl.h"
+
+#include "Convert.h"
+#include "ExoOptimize.h"
+
+#include "TFLTensorExporter.h"
+#include "TFLOperationExporter.h"
+#include "TFLExporterUtils.h"
+
+#include "Log.h"
+#include "Knob.h"
+
+#include <oops/InternalExn.h>
+
+#include <cassert>
+#include <unordered_map>
+#include <string>
+#include <stdexcept>
+
+namespace
+{
+
+using namespace exo;
+using namespace exo::tflite_detail;
+
+void registerGraphInputTensors(loco::Graph *graph, SubGraphContext &ctx)
+{
+ for (uint32_t n = 0; n < graph->inputs()->size(); ++n)
+ {
+ auto node = loco::pull_node(graph, n);
+ assert(node != nullptr);
+ ctx._inputs.push_back(get_tensor_index(node));
+ }
+}
+
+void registerGraphOutputTensors(loco::Graph *graph, SubGraphContext &ctx)
+{
+ for (uint32_t n = 0; n < graph->outputs()->size(); ++n)
+ {
+ auto push = loco::push_node(graph, n);
+ assert(push != nullptr);
+ auto node = push->from();
+ assert(node != nullptr);
+ ctx._outputs.push_back(get_tensor_index(node));
+ }
+}
+
+} // namespace
+
+namespace
+{
+using namespace tflite;
+using namespace flatbuffers;
+
+Offset<Vector<Offset<OperatorCode>>>
+encodeOperatorCodes(FlatBufferBuilder &builder, std::unordered_map<OpCode, uint32_t> &opcodes,
+ std::unordered_map<OpCode, std::string> &custom_opcodes)
+{
+ std::vector<Offset<OperatorCode>> operator_codes_vec(opcodes.size());
+ for (auto it : opcodes)
+ {
+ uint32_t idx = it.second;
+ if (it.first.opcode != BuiltinOperator_CUSTOM)
+ {
+ operator_codes_vec[idx] = CreateOperatorCode(builder, it.first.opcode);
+ }
+ else // custom op
+ {
+ auto opCode = it.first;
+ auto custom_code = custom_opcodes.find(opCode);
+ if (custom_code == custom_opcodes.end())
+ INTERNAL_EXN("Cannot find code for custom op");
+
+ operator_codes_vec[idx] =
+ CreateOperatorCode(builder, it.first.opcode, builder.CreateString(custom_code->second));
+ }
+ }
+ return builder.CreateVector(operator_codes_vec);
+}
+
+} // namespace
+
+namespace exo
+{
+
+using namespace exo::tflite_detail;
+using namespace tflite;
+using namespace flatbuffers;
+
+TFLExporter::Impl::Impl(loco::Graph *graph) { exportGraph(graph); }
+
+::flatbuffers::Offset<::tflite::SubGraph> TFLExporter::Impl::exportSubgraph(SerializedModelData &gd)
+{
+ auto tensors = _builder.CreateVector(gd._tensors);
+ auto inputs = _builder.CreateVector(gd._inputs);
+ auto outputs = _builder.CreateVector(gd._outputs);
+ auto operators = _builder.CreateVector(gd._operators);
+ auto subgraph = CreateSubGraph(_builder, tensors, inputs, outputs, operators);
+ return subgraph;
+}
+
+void TFLExporter::Impl::exportGraph(loco::Graph *graph)
+{
+ LOGGER(l);
+
+ // IR-level conversion and optimization
+ {
+ convert_to_TFLNodes(graph);
+ set(Dialect::TFLITE);
+ optimize(graph);
+ }
+
+ _builder.Clear();
+
+ SerializedModelData gd;
+
+ // This version is taken from comment in fbs
+ constexpr uint32_t version = 3;
+
+ registerGraphIOName(graph, gd);
+
+ // parse graph into SerializedModelData structure
+ exportOpDefinedTensors(graph, _builder, gd);
+
+ // NOTE Invoke these register functions only after each node is annotated with its tensor_index
+ registerGraphInputTensors(graph, gd);
+ registerGraphOutputTensors(graph, gd);
+
+ exportNodes(graph, _builder, gd);
+
+ // encode operator codes
+ auto operator_codes =
+ encodeOperatorCodes(_builder, gd._operator_codes, gd._custom_operator_codes);
+
+ // Subgraphs
+ Offset<SubGraph> subgraph = exportSubgraph(gd);
+ auto subgraphs = _builder.CreateVector(std::vector<Offset<SubGraph>>{subgraph});
+
+ // Description
+ std::string description_str = "nnpackage";
+ auto description = _builder.CreateString(description_str);
+
+ // create array of buffers
+ auto buffers = _builder.CreateVector(gd._buffers);
+
+ // empty metadata
+ std::vector<int> metadata_buffer_vec;
+ auto metadata_buffer = _builder.CreateVector(metadata_buffer_vec);
+
+ // Model
+ auto model_offset = CreateModel(_builder, version, operator_codes, subgraphs, description,
+ buffers, metadata_buffer);
+ FinishModelBuffer(_builder, model_offset);
+}
+
+const char *TFLExporter::Impl::getBufferPointer() const
+{
+ return reinterpret_cast<const char *>(_builder.GetBufferPointer());
+}
+
+size_t TFLExporter::Impl::getBufferSize() const { return _builder.GetSize(); }
+
+} // namespace exo
diff --git a/compiler/exo/src/TFLite/TFLExporterImpl.h b/compiler/exo/src/TFLite/TFLExporterImpl.h
new file mode 100644
index 000000000..01c549a43
--- /dev/null
+++ b/compiler/exo/src/TFLite/TFLExporterImpl.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFL_EXPORTER_IMPL_H__
+#define __TFL_EXPORTER_IMPL_H__
+
+#include "exo/TFLExporter.h"
+#include "schema_generated.h"
+
+#include <loco.h>
+
+namespace exo
+{
+
+namespace tflite_detail
+{
+
+struct SerializedModelData;
+
+} // namespace tflite_detail
+
+using namespace tflite_detail;
+
+/**
+ * internal implementation of interface exporter class
+ */
+class TFLExporter::Impl
+{
+public:
+ Impl() = delete;
+ ~Impl() = default;
+
+ explicit Impl(loco::Graph *graph);
+
+ /**
+ * @return pointer to buffer with serialized graph
+ */
+ const char *getBufferPointer() const;
+
+ /**
+ * @return size of buffer with serialized graph
+ */
+ size_t getBufferSize() const;
+
+private:
+ /**
+ * @brief create Subgraph using data stored in SerializedModelData
+ * @param gd information about serializer parts of model
+ * @return offset in buffer corresponding to serialized subgraph
+ */
+ flatbuffers::Offset<tflite::SubGraph> exportSubgraph(SerializedModelData &gd);
+
+ /**
+ * @brief root function that writes graph into internal buffer
+ * @param graph
+ */
+ void exportGraph(loco::Graph *graph);
+
+private:
+ flatbuffers::FlatBufferBuilder _builder;
+};
+
+} // namespace exo
+
+#endif // __TFL_EXPORTER_IMPL_H__
diff --git a/compiler/exo/src/TFLite/TFLExporterImpl.test.cpp b/compiler/exo/src/TFLite/TFLExporterImpl.test.cpp
new file mode 100644
index 000000000..7d74223c5
--- /dev/null
+++ b/compiler/exo/src/TFLite/TFLExporterImpl.test.cpp
@@ -0,0 +1,413 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFLExporterImpl.h"
+
+#include "schema_generated.h"
+
+#include "TestGraph.h"
+#include "GraphBlock.h"
+#include "Knob.h"
+
+#include <loco/IR/PermutingCodec.h>
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class TFLExporterImplTests : public ::testing::Test
+{
+public:
+ TFLExporterImplTests() { _graph = loco::make_graph(); }
+
+public:
+ virtual ~TFLExporterImplTests() = default;
+
+protected:
+ loco::Graph *graph(void) { return _graph.get(); }
+
+ template <typename NodeT> NodeT *make_node(void);
+
+private:
+ std::unique_ptr<loco::Graph> _graph;
+};
+
+template <typename NodeT> NodeT *TFLExporterImplTests::make_node(void)
+{
+ return graph()->nodes()->create<NodeT>();
+}
+
+template <> loco::FeatureEncode *TFLExporterImplTests::make_node(void)
+{
+ loco::FeatureEncode *encode_layer = graph()->nodes()->create<loco::FeatureEncode>();
+
+ auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
+ (*encoder->perm())[loco::FeatureAxis::Count] = 0;
+ (*encoder->perm())[loco::FeatureAxis::Depth] = 1;
+ (*encoder->perm())[loco::FeatureAxis::Height] = 2;
+ (*encoder->perm())[loco::FeatureAxis::Width] = 3;
+ encode_layer->encoder(std::move(encoder));
+
+ return encode_layer;
+}
+
+template <> loco::FeatureDecode *TFLExporterImplTests::make_node(void)
+{
+ loco::FeatureDecode *decode_layer = graph()->nodes()->create<loco::FeatureDecode>();
+
+ auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
+ (*decoder->perm())[loco::FeatureAxis::Count] = 0;
+ (*decoder->perm())[loco::FeatureAxis::Depth] = 1;
+ (*decoder->perm())[loco::FeatureAxis::Height] = 2;
+ (*decoder->perm())[loco::FeatureAxis::Width] = 3;
+ decode_layer->decoder(std::move(decoder));
+
+ return decode_layer;
+}
+
+} // namespace
+
+// TODO TFLAdd
+
+// TODO TFLAveragePool2D
+
+TEST_F(TFLExporterImplTests, Concatenate)
+{
+ auto pull1 = make_node<loco::Pull>();
+ {
+ pull1->dtype(loco::DataType::FLOAT32);
+ pull1->shape({1, 2, 3, 4});
+ }
+ auto pull2 = make_node<loco::Pull>();
+ {
+ pull2->dtype(loco::DataType::FLOAT32);
+ pull2->shape({1, 2, 3, 4});
+ }
+ auto concat = make_node<loco::TensorConcat>();
+ {
+ concat->lhs(pull1);
+ concat->rhs(pull2);
+ }
+ auto push = make_node<loco::Push>();
+ {
+ push->from(concat);
+ }
+
+ auto input1 = graph()->inputs()->create();
+ {
+ input1->name("input1");
+ loco::link(input1, pull1);
+ }
+ auto input2 = graph()->inputs()->create();
+ {
+ input2->name("input2");
+ loco::link(input2, pull2);
+ }
+ auto output = graph()->outputs()->create();
+ {
+ output->name("output");
+ loco::link(output, push);
+ }
+
+ exo::TFLExporter::Impl exporter{graph()};
+
+ // TODO Add more checks
+ SUCCEED();
+}
+
+// TODO TFLConv2D
+
+// TODO TFLDepthwiseConv2D
+
+// TODO TFLDiv
+
+// TODO TFLMaxPool2D
+
+// TODO TFLMul
+
+TEST_F(TFLExporterImplTests, Relu6)
+{
+ auto pull = make_node<loco::Pull>();
+ {
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({1, 8, 8, 3});
+ }
+ auto relu6 = make_node<loco::ReLU6>();
+ {
+ relu6->input(pull);
+ }
+ auto push = make_node<loco::Push>();
+ {
+ push->from(relu6);
+ }
+
+ auto input = graph()->inputs()->create();
+ {
+ input->name("input");
+ loco::link(input, pull);
+ }
+ auto output = graph()->outputs()->create();
+ {
+ output->name("output");
+ loco::link(output, push);
+ }
+
+ exo::TFLExporter::Impl exporter{graph()};
+
+ // TODO Add more checks
+ SUCCEED();
+}
+
+// TODO TFLRelu6
+
+// TODO TFLReshape
+
+// TODO TFLSoftmax
+
+// TODO TFLSqrt
+
+// TODO TFLSub
+
+// TODO TFLTanh
+
+TEST(TFLExporterImplTest, Transpose_simple)
+{
+ exo::test::ExampleGraph<exo::test::ExampleGraphType::Transpose> g;
+
+ // pull attribute
+ {
+ g.pull->dtype(loco::DataType::FLOAT32);
+ g.pull->shape({1, 2, 2, 3});
+ }
+
+ // transpose attribute
+ {
+ g.transpose->perm()->size(4);
+ g.transpose->perm()->axis(0) = 1;
+ g.transpose->perm()->axis(1) = 2;
+ g.transpose->perm()->axis(2) = 3;
+ g.transpose->perm()->axis(3) = 0;
+ }
+
+ exo::TFLExporter::Impl exporter{g.graph()};
+ {
+ auto model = tflite::GetModel(exporter.getBufferPointer());
+ auto operators = model->subgraphs()->Get(0)->operators();
+
+ assert(operators->Length() == 1);
+
+ int n = 0; // op index of Transpose in tflite file
+
+ auto opcode_index = operators->Get(n)->opcode_index();
+
+ ASSERT_EQ(model->operator_codes()->Get(opcode_index)->builtin_code(),
+ tflite::BuiltinOperator_TRANSPOSE);
+
+ auto perm = operators->Get(n)->inputs()->Get(1);
+
+ auto perm_tensor = model->subgraphs()->Get(0)->tensors()->Get(perm);
+ ASSERT_EQ(perm_tensor->type(), tflite::TensorType::TensorType_INT32);
+ ASSERT_EQ(perm_tensor->shape()->size(), 1);
+ ASSERT_EQ(perm_tensor->shape()->Get(0), 4);
+
+ auto bufs = (model->buffers());
+ auto *perm_buf =
+ reinterpret_cast<const int32_t *>(bufs->Get(perm_tensor->buffer())->data()->data());
+
+ ASSERT_EQ(perm_buf[0], 1);
+ ASSERT_EQ(perm_buf[1], 2);
+ ASSERT_EQ(perm_buf[2], 3);
+ ASSERT_EQ(perm_buf[3], 0);
+ }
+}
+
+/*
+ test case:
+ Pull ----- FeatureEncode ---- FeatureDecode --- Push
+ 0 -----------> H ---------+ O 0
+ 1 W +----> H -----------> 1
+ 2 I(depth) W 2
+ 3 O(coutn) I 3
+
+ axis 0 ----------> H --------------> H -----------> 1
+ axis 1 ----------> W --------------> W -----------> 2
+ axis 2 ----------> I --------------> I -----------> 3
+ axis 3 ----------> O --------------> O -----------> 0
+
+ So, perm vector of Tranpose = [3, 0, 1, 2].
+ Please refer to loco::TensorTranspose about the definition of perm vector.
+*/
+TEST(TFLExporterImplTest, Transpose_from_FilterEncode_FilterDecode)
+{
+ exo::test::ExampleGraph<exo::test::ExampleGraphType::FilterEncode_FilterDecode> g;
+
+ // pull attribute
+ {
+ g.pull->dtype(loco::DataType::FLOAT32);
+ g.pull->shape({1, 2, 3, 4}); // whatever value of rank 4
+ }
+
+ exo::TFLExporter::Impl exporter{g.graph()};
+ {
+ auto model = tflite::GetModel(exporter.getBufferPointer());
+ auto operators = model->subgraphs()->Get(0)->operators();
+
+ assert(operators->Length() == 1);
+
+ int n = 0; // op index of Transpose in tflite file
+
+ auto opcode_index = operators->Get(n)->opcode_index();
+
+ ASSERT_EQ(model->operator_codes()->Get(opcode_index)->builtin_code(),
+ tflite::BuiltinOperator_TRANSPOSE);
+
+ auto perm = operators->Get(n)->inputs()->Get(1);
+
+ auto perm_tensor = model->subgraphs()->Get(0)->tensors()->Get(perm);
+ ASSERT_EQ(perm_tensor->type(), tflite::TensorType::TensorType_INT32);
+ ASSERT_EQ(perm_tensor->shape()->size(), 1);
+ ASSERT_EQ(perm_tensor->shape()->Get(0), 4);
+
+ auto bufs = (model->buffers());
+ auto *perm_buf =
+ reinterpret_cast<const int32_t *>(bufs->Get(perm_tensor->buffer())->data()->data());
+ ASSERT_EQ(perm_buf[0], 3);
+ ASSERT_EQ(perm_buf[1], 0);
+ ASSERT_EQ(perm_buf[2], 1);
+ ASSERT_EQ(perm_buf[3], 2);
+ }
+}
+
+/**
+ * What happens when there is a mismatch between generation and execution order!?
+ */
+TEST_F(TFLExporterImplTests, Regression_0000)
+{
+ // This test was written without considering fusion.
+ // For this reason, this check is needed.
+ // TODO Rewrite this test
+ if (exo::get<exo::Knob::UseFuseReluPass>())
+ return;
+
+ // Execution Order: MaxPool2D -> ReLU
+ // Generation Order: ReLU -> MaxPool2D
+ auto pull = make_node<loco::Pull>();
+ {
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({1, 8, 8, 3});
+ }
+ auto relu = make_node<loco::ReLU>();
+ auto encode = exo::make_feature_encode<exo::FeatureLayout::NHWC>(pull);
+ auto maxpool = make_node<loco::MaxPool2D>();
+ auto decode = exo::make_feature_decode<exo::FeatureLayout::NHWC>(relu);
+ auto push = make_node<loco::Push>();
+
+ ASSERT_EQ(maxpool->window()->vertical(), 1);
+ ASSERT_EQ(maxpool->window()->horizontal(), 1);
+
+ maxpool->ifm(encode);
+ relu->input(maxpool);
+ push->from(decode);
+
+ auto input = graph()->inputs()->create();
+ {
+ input->name("input");
+ loco::link(input, pull);
+ }
+ auto output = graph()->outputs()->create();
+ {
+ output->name("output");
+ loco::link(output, push);
+ }
+
+ exo::TFLExporter::Impl exporter{graph()};
+ {
+ int64_t maxpool_execution_index = -1;
+ int64_t relu_exeuction_index = -1;
+
+ auto model = tflite::GetModel(exporter.getBufferPointer());
+ auto operators = model->subgraphs()->Get(0)->operators();
+
+ for (uint32_t n = 0; n < operators->Length(); ++n)
+ {
+ auto opcode_index = operators->Get(n)->opcode_index();
+
+ switch (model->operator_codes()->Get(opcode_index)->builtin_code())
+ {
+ case tflite::BuiltinOperator_RELU:
+ ASSERT_EQ(relu_exeuction_index, -1);
+ relu_exeuction_index = static_cast<int64_t>(n);
+ break;
+ case tflite::BuiltinOperator_MAX_POOL_2D:
+ ASSERT_EQ(maxpool_execution_index, -1);
+ maxpool_execution_index = static_cast<int64_t>(n);
+ break;
+ default:
+ break;
+ }
+ }
+
+ ASSERT_NE(maxpool_execution_index, -1);
+ ASSERT_NE(relu_exeuction_index, -1);
+ // maxpool SHOULD precede ReLU
+ ASSERT_LT(maxpool_execution_index, relu_exeuction_index);
+ }
+}
+
+/**
+ * @brief Test exporter buffer generation
+ */
+TEST_F(TFLExporterImplTests, Regression_0001)
+{
+ auto cgen = make_node<loco::ConstGen>();
+ cgen->rank(1);
+ cgen->dim(0) = 2;
+ cgen->dtype(loco::DataType::FLOAT32);
+ cgen->size<loco::DataType::FLOAT32>(2);
+ cgen->at<loco::DataType::FLOAT32>(0) = 3.3f;
+ cgen->at<loco::DataType::FLOAT32>(1) = 1.1f;
+
+ auto push = make_node<loco::Push>();
+ push->from(cgen);
+
+ auto output = graph()->outputs()->create();
+ {
+ output->name("output");
+ loco::link(output, push);
+ }
+
+ exo::TFLExporter::Impl exporter{graph()};
+ {
+ auto model = tflite::GetModel(exporter.getBufferPointer());
+ auto buffers = model->buffers();
+
+ // 0'th empty buffer + ConstGen data + ConstGen node output
+ ASSERT_EQ(buffers->Length(), 3);
+
+ // 0'th should be empty buffer
+ auto buffer_0 = (*buffers)[0];
+ auto array_0 = buffer_0->data();
+ ASSERT_EQ(array_0, nullptr);
+
+ // 1'st should be ConstGen data which is two float
+ auto buffer_1 = (*buffers)[1];
+ auto array_1 = buffer_1->data();
+ size_t size_1 = array_1->size();
+ ASSERT_EQ(size_1, 2 * sizeof(float));
+ }
+}
diff --git a/compiler/exo/src/TFLite/TFLExporterUtils.cpp b/compiler/exo/src/TFLite/TFLExporterUtils.cpp
new file mode 100644
index 000000000..d35afc9aa
--- /dev/null
+++ b/compiler/exo/src/TFLite/TFLExporterUtils.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFLExporterUtils.h"
+
+#include <oops/InternalExn.h>
+
+namespace exo
+{
+
+tflite::ActivationFunctionType to_tflite_actfunc(locoex::FusedActFunc func)
+{
+ switch (func)
+ {
+ case locoex::FusedActFunc::NONE:
+ return tflite::ActivationFunctionType_NONE;
+ case locoex::FusedActFunc::RELU:
+ return tflite::ActivationFunctionType_RELU;
+ case locoex::FusedActFunc::RELU6:
+ return tflite::ActivationFunctionType_RELU6;
+ default:
+ INTERNAL_EXN_V("Unsupported locoex FusedActFunc Type", oops::to_uint32(func));
+ }
+}
+
+} // namespace exo
+
+namespace exo
+{
+namespace tflite_detail
+{
+
+uint32_t SerializedModelData::registerBuiltinOpcode(tflite::BuiltinOperator builtin_code)
+{
+ auto it = _operator_codes.find(OpCode{builtin_code});
+ if (it != _operator_codes.end())
+ {
+ return it->second;
+ }
+ auto idx = static_cast<uint32_t>(_operator_codes.size());
+ _operator_codes.emplace(OpCode{builtin_code}, idx);
+ return idx;
+}
+
+uint32_t SerializedModelData::registerCustomOpcode(const std::string &custom_op)
+{
+ tflite::BuiltinOperator custom_code = tflite::BuiltinOperator_CUSTOM;
+ auto idx = registerBuiltinOpcode(custom_code);
+ _custom_operator_codes.emplace(OpCode{custom_code}, custom_op);
+ return idx;
+}
+
+tflite::Padding getOpPadding(const loco::Padding2D *pad, const loco::Stride<2> *stride,
+ const ShapeDescription &ifm, const ShapeDescription &ofm)
+{
+ // VALID padding
+ if (pad->top() == 0 && pad->bottom() == 0 && pad->left() == 0 && pad->right() == 0)
+ return tflite::Padding_VALID;
+
+ // SAME padding
+ //
+ // For same padding, by definition, following equation should hold:
+ // O = floor((I - 1) / S) + 1
+ // where input size I, output size O, stride S
+ //
+ // NOTE input and output 'feature' map are shape of NHWC
+ bool same_padding_criterion_1 =
+ (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
+ (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
+
+ // For same padding, rear padding is same or bigger than front padding by at most 1
+ bool same_padding_criterion_2 =
+ (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
+ (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
+
+ if (same_padding_criterion_1 && same_padding_criterion_2)
+ return tflite::Padding_SAME;
+
+ INTERNAL_EXN("NYI for custom PAD");
+}
+
+tflite::Padding getOpPadding(const locoex::Padding pad)
+{
+ if (pad == locoex::Padding::VALID)
+ return tflite::Padding_VALID;
+ if (pad == locoex::Padding::SAME)
+ return tflite::Padding_SAME;
+
+ INTERNAL_EXN_V("Unknown padding", oops::to_uint32(pad));
+}
+
+void registerGraphIOName(loco::Graph *graph, SerializedModelData &gd)
+{
+ for (uint32_t in = 0; in < graph->inputs()->size(); ++in)
+ {
+ auto pull = loco::pull_node(graph, in);
+ auto name = graph->inputs()->at(in)->name();
+
+ gd._pull_to_name[pull] = name;
+ }
+ for (uint32_t out = 0; out < graph->outputs()->size(); ++out)
+ {
+ auto push = loco::push_node(graph, out);
+ auto name = graph->outputs()->at(out)->name();
+
+ gd._push_to_name[push] = name;
+ }
+}
+
+#include <stdex/Memory.h>
+
+#include <cassert>
+
+namespace
+{
+
+class TFLTensorIndexAnnotation final : public loco::NodeAnnotation
+{
+public:
+ TFLTensorIndexAnnotation(const TFLTensorIndex &index) : _index{index}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const TFLTensorIndex &index(void) const { return _index; }
+
+private:
+ TFLTensorIndex _index;
+};
+
+} // namespace
+
+void set_tensor_index(loco::Node *node, const TFLTensorIndex &tensor_id)
+{
+ assert(node->annot<TFLTensorIndexAnnotation>() == nullptr);
+ node->annot(stdex::make_unique<TFLTensorIndexAnnotation>(tensor_id));
+}
+
+TFLTensorIndex get_tensor_index(loco::Node *node)
+{
+ assert(node->annot<TFLTensorIndexAnnotation>() != nullptr);
+ return node->annot<TFLTensorIndexAnnotation>()->index();
+}
+
+} // namespace tflite_detail
+} // namespace exo
diff --git a/compiler/exo/src/TFLite/TFLExporterUtils.h b/compiler/exo/src/TFLite/TFLExporterUtils.h
new file mode 100644
index 000000000..dbd7a52fb
--- /dev/null
+++ b/compiler/exo/src/TFLite/TFLExporterUtils.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFL_EXPORTER_UTILS_H__
+#define __TFL_EXPORTER_UTILS_H__
+
+#include "ExporterUtils.h"
+
+#include "schema_generated.h"
+
+#include "Dialect/IR/TFLNodes.h"
+
+#include <loco.h>
+
+#include <unordered_map>
+
+namespace exo
+{
+namespace tflite_detail
+{
+
+struct OpCode
+{
+ tflite::BuiltinOperator opcode;
+
+ bool operator==(const OpCode &rhs) const { return opcode == rhs.opcode; }
+};
+
+} // namespace tflite_detail
+} // namespace exo
+
+namespace exo
+{
+
+tflite::ActivationFunctionType to_tflite_actfunc(locoex::FusedActFunc func);
+
+} // namespace exo
+
+namespace std
+{
+
+template <> struct hash<exo::tflite_detail::OpCode>
+{
+ size_t operator()(const exo::tflite_detail::OpCode &x) const { return hash<int>()(x.opcode); }
+};
+
+} // namespace std
+
+namespace exo
+{
+namespace tflite_detail
+{
+
+/**
+ * @breif Record the information of T/F Lite SubGraph and its mapping to loco
+ */
+struct SubGraphContext
+{
+ /// @brief SubGraph input tensor id
+ std::vector<int32_t> _inputs;
+ /// @brief SubGraph output tensor id
+ std::vector<int32_t> _outputs;
+};
+
+// Prerequisites for tflite::Model object creation
+struct SerializedModelData final : public SubGraphContext
+{
+ SerializedModelData() = default;
+ SerializedModelData(const SerializedModelData &) = delete;
+
+ std::unordered_map<OpCode, uint32_t> _operator_codes;
+ std::unordered_map<OpCode, std::string> _custom_operator_codes;
+ std::vector<flatbuffers::Offset<tflite::Operator>> _operators;
+ std::vector<flatbuffers::Offset<tflite::Tensor>> _tensors;
+ std::vector<flatbuffers::Offset<tflite::Buffer>> _buffers;
+
+ // Graph input and output names
+ std::unordered_map<loco::Pull *, std::string> _pull_to_name;
+ std::unordered_map<loco::Push *, std::string> _push_to_name;
+
+ /**
+ * @brief if opcode is not registered in table of opcodes add it
+ * @param builtin_code
+ * @return idx of opcode in table of opcodes (see schema)
+ */
+ uint32_t registerBuiltinOpcode(tflite::BuiltinOperator builtin_code);
+ uint32_t registerCustomOpcode(const std::string &custom_op);
+};
+
+tflite::Padding getOpPadding(const loco::Padding2D *pad, const loco::Stride<2> *stride,
+ const ShapeDescription &ifm, const ShapeDescription &ofm);
+tflite::Padding getOpPadding(const locoex::Padding pad);
+
+/// @brief Register graph input and output names to SerializedModelData
+void registerGraphIOName(loco::Graph *graph, SerializedModelData &gd);
+
+using TFLTensorIndex = int32_t;
+
+void set_tensor_index(loco::Node *node, const TFLTensorIndex &tensor_id);
+TFLTensorIndex get_tensor_index(loco::Node *node);
+
+} // namespace tflite_detail
+} // namespace exo
+
+#endif // __TFL_EXPORTER_UTILS_H__
diff --git a/compiler/exo/src/TFLite/TFLExporterUtils.test.cpp b/compiler/exo/src/TFLite/TFLExporterUtils.test.cpp
new file mode 100644
index 000000000..d19f87d25
--- /dev/null
+++ b/compiler/exo/src/TFLite/TFLExporterUtils.test.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFLExporterUtils.h"
+
+#include <gtest/gtest.h>
+
+using namespace exo::tflite_detail;
+
+TEST(ExporterUtilsTests, getOpPadding)
+{
+ loco::Padding2D pad;
+ loco::Stride<2> stride;
+ exo::ShapeDescription ifm;
+ exo::ShapeDescription ofm;
+
+ ifm._dims.resize(4);
+ ofm._dims.resize(4);
+
+ // VALID padding
+ {
+ pad.top(0);
+ pad.bottom(0);
+ pad.left(0);
+ pad.right(0);
+
+ stride.vertical(2);
+ stride.horizontal(2);
+
+ ifm._dims[1] = 5;
+ ifm._dims[2] = 5;
+
+ ofm._dims[1] = 2;
+ ofm._dims[2] = 2;
+
+ ASSERT_EQ(getOpPadding(&pad, &stride, ifm, ofm), tflite::Padding_VALID);
+ }
+
+ // SAME padding
+ {
+ pad.top(1);
+ pad.bottom(1);
+ pad.left(1);
+ pad.right(1);
+
+ stride.vertical(2);
+ stride.horizontal(2);
+
+ ifm._dims[1] = 5;
+ ifm._dims[2] = 5;
+
+ ofm._dims[1] = 3;
+ ofm._dims[2] = 3;
+
+ ASSERT_EQ(getOpPadding(&pad, &stride, ifm, ofm), tflite::Padding_SAME);
+ }
+
+ // Custom padding 1 - Not supported by tflite
+ {
+ pad.top(2);
+ pad.bottom(0);
+ pad.left(1);
+ pad.right(1);
+
+ stride.vertical(2);
+ stride.horizontal(2);
+
+ ifm._dims[1] = 5;
+ ifm._dims[2] = 5;
+
+ ofm._dims[1] = 3;
+ ofm._dims[2] = 3;
+
+ ASSERT_ANY_THROW(getOpPadding(&pad, &stride, ifm, ofm));
+ }
+
+ // Custom padding 2 - Not supported by tflite
+ {
+ pad.top(2);
+ pad.bottom(2);
+ pad.left(2);
+ pad.right(2);
+
+ stride.vertical(2);
+ stride.horizontal(2);
+
+ ifm._dims[1] = 5;
+ ifm._dims[2] = 5;
+
+ ofm._dims[1] = 4;
+ ofm._dims[2] = 4;
+
+ ASSERT_ANY_THROW(getOpPadding(&pad, &stride, ifm, ofm));
+ }
+}
diff --git a/compiler/exo/src/TFLite/TFLOperationExporter.cpp b/compiler/exo/src/TFLite/TFLOperationExporter.cpp
new file mode 100644
index 000000000..79b5b6287
--- /dev/null
+++ b/compiler/exo/src/TFLite/TFLOperationExporter.cpp
@@ -0,0 +1,1199 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFLOperationExporter.h"
+#include "TFLExporterUtils.h"
+#include "ShapeInference.h"
+
+#include "Dialect/IR/TFLNode.h"
+#include "Dialect/IR/TFLNodes.h"
+#include "Dialect/IR/TFLNodeVisitor.h"
+
+#include "Check.h"
+
+#include <loco/IR/CanonicalNode.h>
+#include <loco/IR/CanonicalNodeVisitor.h>
+#include <loco/Service/ShapeInference.h>
+#include <locoex/COpCall.h>
+
+#include <oops/InternalExn.h>
+
+#include <flatbuffers/flexbuffers.h>
+
+using namespace flatbuffers;
+using namespace tflite;
+
+namespace
+{
+
+using namespace exo;
+using namespace exo::tflite_detail;
+
+class OperationExporter final : public locoex::TFLNodeMutableVisitor<void>,
+ public loco::CanonicalNodeMutableVisitor<void>
+{
+public:
+ OperationExporter(FlatBufferBuilder &fbb, SerializedModelData &ctx) : builder{fbb}, gd{ctx}
+ {
+ // DO NOTHING
+ }
+
+public:
+ // FOR TFLNodes
+ void visit(locoex::TFLAdd *) final;
+ void visit(locoex::TFLAveragePool2D *) final;
+ void visit(locoex::TFLConcatenation *) final;
+ void visit(locoex::TFLConst *) final{/* skip, everything is done in exportOpDefinedTensors */};
+ void visit(locoex::TFLConv2D *) final;
+ void visit(locoex::TFLDepthwiseConv2D *) final;
+ void visit(locoex::TFLDiv *) final;
+ void visit(locoex::TFLFullyConnected *) final;
+ void visit(locoex::TFLMaximum *) final;
+ void visit(locoex::TFLMaxPool2D *) final;
+ void visit(locoex::TFLMean *) final;
+ void visit(locoex::TFLMul *) final;
+ void visit(locoex::TFLRelu *) final;
+ void visit(locoex::TFLRelu6 *) final;
+ // TODO TFLReshape
+ void visit(locoex::TFLRsqrt *) final;
+ // TODO TFLSoftmax
+ void visit(locoex::TFLSqrt *) final;
+ void visit(locoex::TFLSquaredDifference *) final;
+ void visit(locoex::TFLSub *) final;
+ // TODO TFLTanh
+ void visit(locoex::TFLTranspose *) final;
+ void visit(locoex::TFLTransposeConv *) final;
+
+ // FOR canonical nodes. These will be removed later
+ void visit(loco::ReLU *) final;
+ void visit(loco::ReLU6 *) final;
+ void visit(loco::Tanh *) final;
+ void visit(loco::Push *) final { /* DO NOTHING */}
+ void visit(loco::Pull *) final { /* DO NOTHING */}
+ void visit(loco::FeatureEncode *) final;
+ void visit(loco::FeatureDecode *) final;
+ void visit(loco::FilterEncode *) final;
+ void visit(loco::DepthwiseFilterEncode *) final;
+ void visit(loco::ConstGen *) final { /* skip, everything is done in exportOpDefinedTensors */}
+ void visit(loco::MaxPool2D *) final;
+ void visit(loco::AvgPool2D *) final;
+ void visit(loco::Conv2D *) final;
+ void visit(loco::TransposedConv2D *) final;
+ void visit(loco::DepthwiseConv2D *) final;
+ void visit(loco::TensorConcat *) final;
+ void visit(loco::TensorReduce *) final;
+ void visit(loco::TensorSoftmax *) final;
+ void visit(loco::BiasEncode *) final;
+ void visit(loco::TensorBiasAdd *) final;
+ void visit(loco::FeatureBiasAdd *) final;
+ void visit(loco::EltwiseAdd *) final;
+ void visit(loco::EltwiseMax *) final;
+ void visit(loco::EltwiseMul *) final;
+ void visit(loco::EltwiseSub *) final;
+ void visit(loco::EltwiseDiv *) final;
+ void visit(loco::EltwiseSqrt *) final;
+ void visit(loco::FixedReshape *) final;
+ void visit(loco::TensorBroadcast *) final;
+ void visit(loco::TensorConstantPad *) final;
+
+ void visit(locoex::COpCall *);
+
+private:
+ /**
+ * @brief Exports TFLMaxPool2D or TFLAveragePool2D
+ *
+ * @note TFLPool2D should be one of TFLMaxPool2D or TFLAveragePool2D
+ */
+ template <class TFLPool2D>
+ void export_pool_2d(TFLPool2D *node, tflite::BuiltinOperator builtin_op);
+
+private:
+ FlatBufferBuilder &builder;
+ SerializedModelData &gd;
+};
+
+void OperationExporter::visit(locoex::TFLAdd *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_ADD);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateAddOptions(builder, to_tflite_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_AddOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLAveragePool2D *node)
+{
+ export_pool_2d<locoex::TFLAveragePool2D>(node, tflite::BuiltinOperator_AVERAGE_POOL_2D);
+}
+
+void OperationExporter::visit(locoex::TFLConcatenation *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_CONCATENATION);
+ std::vector<int32_t> inputs_vec;
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+
+ for (uint32_t i = 0; i < node->numValues(); ++i)
+ inputs_vec.push_back(get_tensor_index(node->values(i)));
+
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateConcatenationOptions(builder, node->axis(),
+ to_tflite_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_ConcatenationOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLConv2D *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_CONV_2D);
+
+ // Make input, output and options for operator
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->filter()),
+ get_tensor_index(node->bias())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ tflite::Padding padding = getOpPadding(node->padding());
+ auto options = CreateConv2DOptions(builder, padding, node->stride()->w(), node->stride()->h(),
+ to_tflite_actfunc(node->fusedActivationFunction()));
+
+ // Make CONV_2D operator
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_Conv2DOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLDepthwiseConv2D *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_DEPTHWISE_CONV_2D);
+
+ // Make input, output and options for operator
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->filter()),
+ get_tensor_index(node->bias())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ tflite::Padding padding = getOpPadding(node->padding());
+ auto options = CreateDepthwiseConv2DOptions(builder, padding, node->stride()->w(),
+ node->stride()->h(), node->depthMultiplier(),
+ to_tflite_actfunc(node->fusedActivationFunction()));
+
+ // Make DEPTHWISE_CONV_2D operator
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_DepthwiseConv2DOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLDiv *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_DIV);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateDivOptions(builder, to_tflite_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_DivOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLFullyConnected *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_FULLY_CONNECTED);
+
+ // Make input, output and options for operator
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
+ get_tensor_index(node->weights()),
+ get_tensor_index(node->bias())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options =
+ CreateFullyConnectedOptions(builder, to_tflite_actfunc(node->fusedActivationFunction()));
+
+ // Make FULLY_CONNECTED operator
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_FullyConnectedOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLMaximum *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_MAXIMUM);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateMaximumMinimumOptions(builder);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_MaximumMinimumOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLMaxPool2D *node)
+{
+ export_pool_2d<locoex::TFLMaxPool2D>(node, tflite::BuiltinOperator_MAX_POOL_2D);
+}
+
+void OperationExporter::visit(locoex::TFLMean *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_MEAN);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
+ get_tensor_index(node->reduction_indices())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateReducerOptions(builder, node->keep_dims());
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_ReducerOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLMul *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_MUL);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateMulOptions(builder, to_tflite_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_MulOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLRelu *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_RELU);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->features())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLRelu6 *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_RELU6);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->features())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+// TODO TFLReshape
+
+void OperationExporter::visit(locoex::TFLRsqrt *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_RSQRT);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+// TODO TFLSoftmax
+
+void OperationExporter::visit(locoex::TFLSqrt *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_SQRT);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLSquaredDifference *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_SQUARED_DIFFERENCE);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateSquaredDifferenceOptions(builder);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_SquaredDifferenceOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLSub *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_SUB);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateSubOptions(builder, to_tflite_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_SubOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+// TODO TFLTanh
+
+void OperationExporter::visit(locoex::TFLTranspose *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_TRANSPOSE);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->arg(0)), get_tensor_index(node->arg(1))};
+ std::vector<int32_t> outputs_vec{get_tensor_index(node)};
+
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateTransposeOptions(builder);
+
+ auto op_offset =
+ CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions::BuiltinOptions_TransposeOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(locoex::TFLTransposeConv *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_TRANSPOSE_CONV);
+
+ // Make input, output and options for operator
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->inputSizes()),
+ get_tensor_index(node->filter()),
+ get_tensor_index(node->outBackprop())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ tflite::Padding padding = getOpPadding(node->padding());
+ auto options =
+ CreateTransposeConvOptions(builder, padding, node->stride()->w(), node->stride()->h());
+
+ // Make TRANSPOSE_CONV operator
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_TransposeConvOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+template <class TFLPool2D>
+void OperationExporter::export_pool_2d(TFLPool2D *node, tflite::BuiltinOperator builtin_op)
+{
+ EXO_ASSERT(builtin_op == tflite::BuiltinOperator_MAX_POOL_2D ||
+ builtin_op == tflite::BuiltinOperator_AVERAGE_POOL_2D,
+ "should be maxpool or avgpool");
+ EXO_ASSERT(node->padding() != locoex::Padding::UNDEFINED, "Padding is not set");
+
+ uint32_t op_idx = gd.registerBuiltinOpcode(builtin_op);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+
+ tflite::Padding padding = getOpPadding(node->padding());
+
+ auto options = CreatePool2DOptions(builder, padding, node->stride()->w(), node->stride()->h(),
+ node->filter()->w(), node->filter()->h(),
+ to_tflite_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_Pool2DOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::ReLU *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_RELU);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::ReLU6 *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_RELU6);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::Tanh *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_TANH);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::MaxPool2D *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_MAX_POOL_2D);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->ifm())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ tflite::Padding padding = getOpPadding(
+ node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+ auto options = CreatePool2DOptions(builder, padding, node->stride()->horizontal(),
+ node->stride()->vertical(), node->window()->horizontal(),
+ node->window()->vertical());
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_Pool2DOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::AvgPool2D *node)
+{
+ // TFlite only support Valid convention of average pooling
+ assert(node->convention() == loco::AvgPool2D::Convention::Valid);
+
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_AVERAGE_POOL_2D);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->ifm())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ tflite::Padding padding = getOpPadding(
+ node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+ auto options = CreatePool2DOptions(builder, padding, node->stride()->horizontal(),
+ node->stride()->vertical(), node->window()->horizontal(),
+ node->window()->vertical());
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_Pool2DOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::Conv2D *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_CONV_2D);
+
+ // Third input of CONV_2D of tflite should be bias. We will make (and register to gd) dummy zero
+ // bias. Bias would be rank 1, have size of output kernel count, and have all zero values, i.e.
+ // zero bias.
+ auto *ker = dynamic_cast<loco::FilterEncode *>(node->ker());
+ assert(ker);
+ int32_t bias_vec_size = ShapeInference::get(ker)._dims[0]; // output kernel count
+
+ auto bias_vec_shape_offset = builder.CreateVector(std::vector<int32_t>{bias_vec_size});
+ size_t raw_bias_vec_size = bias_vec_size * sizeof(int32_t);
+
+ std::vector<float> bias_vec_data(bias_vec_size); // initialized as zero vector
+
+ auto bias_vec_offset =
+ builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
+
+ auto bias_buffer_offset = CreateBuffer(builder, bias_vec_offset);
+
+ const auto bias_buffer_id = static_cast<uint32_t>(gd._buffers.size());
+
+ gd._buffers.push_back(bias_buffer_offset);
+
+ auto bias_tensor_id = static_cast<int32_t>(gd._tensors.size());
+ auto name_offset = builder.CreateString("t_" + std::to_string(bias_tensor_id));
+
+ auto bias_tensor_offset =
+ CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
+ gd._tensors.push_back(bias_tensor_offset);
+
+ // Make input, output and options for operator
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->ifm()), get_tensor_index(node->ker()),
+ bias_tensor_id};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ tflite::Padding padding = getOpPadding(
+ node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+ auto options = CreateConv2DOptions(builder, padding, node->stride()->horizontal(),
+ node->stride()->vertical());
+
+ // Make CONV_2D operator
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_Conv2DOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::TransposedConv2D *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_TRANSPOSE_CONV);
+
+ // TRANSPOSE_CONV's first input is output shape array.
+ const int32_t outshape_vec_size = 4;
+ auto outshape_vec_shape_offset = builder.CreateVector(std::vector<int32_t>{outshape_vec_size});
+ size_t raw_outshape_vec_size = outshape_vec_size * sizeof(int32_t);
+
+ std::vector<int32_t> outshape_vec_data(outshape_vec_size);
+ {
+ // Copy inferred output shape of node
+ auto out_feature_shape = loco::shape_get(node).as<loco::FeatureShape>();
+
+ // Feature tensor in TFlite is NHWC
+ outshape_vec_data.at(0) = out_feature_shape.count().value();
+ outshape_vec_data.at(1) = out_feature_shape.height().value();
+ outshape_vec_data.at(2) = out_feature_shape.width().value();
+ outshape_vec_data.at(3) = out_feature_shape.depth().value();
+ }
+
+ auto outshape_vec_offset = builder.CreateVector(
+ reinterpret_cast<uint8_t *>(outshape_vec_data.data()), raw_outshape_vec_size);
+
+ auto outshape_buffer_offset = CreateBuffer(builder, outshape_vec_offset);
+
+ const auto outshape_buffer_id = static_cast<uint32_t>(gd._buffers.size());
+
+ gd._buffers.push_back(outshape_buffer_offset);
+
+ auto outshape_tensor_id = static_cast<int32_t>(gd._tensors.size());
+ auto name_offset = builder.CreateString("t_" + std::to_string(outshape_tensor_id));
+
+ auto outshape_tensor_offset = CreateTensor(builder, outshape_vec_shape_offset, TensorType_INT32,
+ outshape_buffer_id, name_offset);
+ gd._tensors.push_back(outshape_tensor_offset);
+
+ // Make input, output and options for operator
+ std::vector<int32_t> inputs_vec{outshape_tensor_id, get_tensor_index(node->ker()),
+ get_tensor_index(node->ifm())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ // NOTE input and output is inversed to use this function
+ tflite::Padding padding = getOpPadding(node->pad(), node->stride(), ShapeInference::get(node),
+ ShapeInference::get(node->ifm()));
+ auto options = CreateTransposeConvOptions(builder, padding, node->stride()->horizontal(),
+ node->stride()->vertical());
+
+ // Make TRANSPOSE_CONV operator
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_TransposeConvOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::DepthwiseConv2D *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_DEPTHWISE_CONV_2D);
+
+ // Third input of DEPTHWISE_CONV2D of tflite should be bias. We will make (and register to gd)
+ // dummy zero bias. Bias would be rank 1, have size of output kernel count, and have all zero
+ // values, i.e. zero bias.
+ auto *ker = dynamic_cast<loco::DepthwiseFilterEncode *>(node->ker());
+ assert(ker);
+
+ int32_t bias_vec_size = ShapeInference::get(ker)._dims[3]; // output_size(C*M)
+ auto bias_vec_shape_offset = builder.CreateVector(std::vector<int32_t>{bias_vec_size});
+
+ size_t raw_bias_vec_size = bias_vec_size * sizeof(int32_t);
+ std::vector<float> bias_vec_data(bias_vec_size);
+ auto bias_vec_offset =
+ builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
+
+ auto bias_buffer_offset = CreateBuffer(builder, bias_vec_offset);
+
+ const auto bias_buffer_id = static_cast<uint32_t>(gd._buffers.size());
+
+ gd._buffers.push_back(bias_buffer_offset);
+
+ auto bias_tensor_id = static_cast<int32_t>(gd._tensors.size());
+ auto name_offset = builder.CreateString("t_" + std::to_string(bias_tensor_id));
+
+ auto bias_tensor_offset =
+ CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
+ gd._tensors.push_back(bias_tensor_offset);
+
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->ifm()), get_tensor_index(node->ker()),
+ bias_tensor_id};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ tflite::Padding padding = getOpPadding(
+ node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+
+ int32_t ifm_channel_size = ShapeInference::get(node->ifm())._dims[3];
+ // multiplier = bias_vec_size(output_size)/ifm_channel_size
+ auto options =
+ CreateDepthwiseConv2DOptions(builder, padding, node->stride()->horizontal(),
+ node->stride()->vertical(), bias_vec_size / ifm_channel_size);
+
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_DepthwiseConv2DOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::TensorReduce *node)
+{
+ uint32_t op_idx;
+
+ switch (node->func())
+ {
+ case loco::ReduceFunc::Mean:
+ op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_MEAN);
+ break;
+
+ // TODO Support more reduce type operation
+ default:
+ INTERNAL_EXN_V("Not supported reduce type", oops::to_uint32(node->func()));
+ }
+
+ // Create a vector for axes data
+ std::vector<int32_t> axes_vec;
+ auto rank = ShapeInference::get(node->input())._dims.size();
+ for (uint32_t i = 0; i < rank; ++i)
+ if (node->axes()->defined(i))
+ axes_vec.push_back(i);
+
+ int32_t axes_vec_size = axes_vec.size();
+ auto axes_vec_shape_offset = builder.CreateVector(std::vector<int32_t>{axes_vec_size});
+
+ size_t raw_axes_vec_size = axes_vec_size * sizeof(int32_t);
+ auto axes_vec_offset =
+ builder.CreateVector(reinterpret_cast<uint8_t *>(axes_vec.data()), raw_axes_vec_size);
+
+ auto axes_buffer_offset = CreateBuffer(builder, axes_vec_offset);
+
+ const auto axes_buffer_id = static_cast<uint32_t>(gd._buffers.size());
+
+ gd._buffers.push_back(axes_buffer_offset);
+
+ auto axes_tensor_id = static_cast<int32_t>(gd._tensors.size());
+ auto name_offset = builder.CreateString("t_" + std::to_string(axes_tensor_id));
+
+ auto axes_tensor_offset =
+ CreateTensor(builder, axes_vec_shape_offset, TensorType_INT32, axes_buffer_id, name_offset);
+ gd._tensors.push_back(axes_tensor_offset);
+
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), axes_tensor_id};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateReducerOptions(builder, true); // true is for keep_dims option
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_ReducerOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::TensorSoftmax *node)
+{
+ // TODO Support when the input rank of TensorSoftmax is not 2
+ assert(ShapeInference::get(node->input())._dims.size() == 2);
+
+ // NOTE TFLite only accepts axis when the value is last dimension
+ assert(node->axis() == ShapeInference::get(node->input())._dims.size() - 1);
+
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_SOFTMAX);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateSoftmaxOptions(builder, 1.0f);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_SoftmaxOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+/// @brief Export given node into identity, i.e. CONCATENATION with one input
+template <typename NodeT>
+void exportIdentity(NodeT *node, FlatBufferBuilder &builder, SerializedModelData &gd)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_CONCATENATION);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->arg(0))};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateConcatenationOptions(builder); // use dummy 0 axis and NONE activation
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_ConcatenationOptions, options.Union());
+
+ gd._operators.push_back(op_offset);
+}
+
+/// @brief Export loco nodes as TRANSPOSE
+void exportAsTranspose(loco::Node *node, FlatBufferBuilder &builder,
+ std::vector<int32_t> &perm_vec_data, SerializedModelData &gd)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_TRANSPOSE);
+
+ auto options = CreateTransposeOptions(builder);
+
+ // Create constant tensor with perm vector
+ constexpr int perm_vec_size = 4;
+ assert(perm_vec_data.size() == perm_vec_size);
+ auto perm_vec_shape_offset = builder.CreateVector(std::vector<int32_t>{perm_vec_size});
+ constexpr size_t raw_perm_vec_size = perm_vec_size * sizeof(int32_t);
+
+ auto perm_vec_offset =
+ builder.CreateVector(reinterpret_cast<uint8_t *>(perm_vec_data.data()), raw_perm_vec_size);
+
+ auto perm_buffer_offset = CreateBuffer(builder, perm_vec_offset);
+
+ const auto perm_buffer_id = static_cast<uint32_t>(gd._buffers.size());
+
+ gd._buffers.push_back(perm_buffer_offset);
+
+ auto perm_tensor_id = static_cast<int32_t>(gd._tensors.size());
+ auto name_offset = builder.CreateString("t_" + std::to_string(perm_tensor_id));
+
+ auto perm_tensor_offset =
+ CreateTensor(builder, perm_vec_shape_offset, TensorType_INT32, perm_buffer_id, name_offset);
+ gd._tensors.push_back(perm_tensor_offset);
+
+ // Create permutation node
+
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->arg(0)), perm_tensor_id};
+ std::vector<int32_t> outputs_vec{get_tensor_index(node)};
+
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+
+ constexpr auto options_type = tflite::BuiltinOptions::BuiltinOptions_TransposeOptions;
+
+ auto transpose_offset =
+ CreateOperator(builder, op_idx, inputs, outputs, options_type, options.Union());
+ gd._operators.push_back(transpose_offset);
+}
+
+void OperationExporter::visit(loco::FeatureEncode *node)
+{
+ auto encoder = dynamic_cast<loco::PermutingEncoder<loco::Domain::Feature> *>(node->encoder());
+ auto perm = encoder->perm();
+
+ if (isNHWC(perm))
+ {
+ // Note that tflite represents feature as NHWC
+ exportIdentity(node, builder, gd);
+ }
+ else
+ {
+ std::vector<int32_t> perm_vec_data(4);
+ perm_vec_data[0] = perm->axis(loco::FeatureAxis::Count);
+ perm_vec_data[1] = perm->axis(loco::FeatureAxis::Height);
+ perm_vec_data[2] = perm->axis(loco::FeatureAxis::Width);
+ perm_vec_data[3] = perm->axis(loco::FeatureAxis::Depth);
+
+ exportAsTranspose(node, builder, perm_vec_data, gd);
+ }
+}
+
+void OperationExporter::visit(loco::FeatureDecode *node)
+{
+ auto decoder = dynamic_cast<loco::PermutingDecoder<loco::Domain::Feature> *>(node->decoder());
+ auto perm = decoder->perm();
+
+ if (isNHWC(perm))
+ {
+ // Note that tflite represents feature as NHWC
+ exportIdentity(node, builder, gd);
+ }
+ else
+ {
+ std::vector<int32_t> perm_vec_data(4);
+ perm_vec_data[perm->axis(loco::FeatureAxis::Count)] = 0;
+ perm_vec_data[perm->axis(loco::FeatureAxis::Height)] = 1;
+ perm_vec_data[perm->axis(loco::FeatureAxis::Width)] = 2;
+ perm_vec_data[perm->axis(loco::FeatureAxis::Depth)] = 3;
+
+ exportAsTranspose(node, builder, perm_vec_data, gd);
+ }
+}
+
+void OperationExporter::visit(loco::FilterEncode *node)
+{
+ auto encoder = dynamic_cast<loco::PermutingEncoder<loco::Domain::Filter> *>(node->encoder());
+ auto perm = encoder->perm();
+
+ if (isNHWC(perm))
+ {
+ // Note that tflite represents filter as NHWC
+ exportIdentity(node, builder, gd);
+ }
+ else
+ {
+ std::vector<int32_t> perm_vec_data(4);
+ // NOTE In tflite, all tensors means NHWC, so 0 = N, 1 = H, 2 = W, 3 = C
+ perm_vec_data[0] = perm->axis(loco::FilterAxis::Count);
+ perm_vec_data[1] = perm->axis(loco::FilterAxis::Height);
+ perm_vec_data[2] = perm->axis(loco::FilterAxis::Width);
+ perm_vec_data[3] = perm->axis(loco::FilterAxis::Depth);
+
+ exportAsTranspose(node, builder, perm_vec_data, gd);
+ }
+}
+
+void exportAsReshape(loco::Node *node, FlatBufferBuilder &builder,
+ std::vector<int32_t> &new_shape_vec, SerializedModelData &gd)
+{
+ // NOTE TFLite has two ways to get new shape paramter,
+ // one is by attribute 'new_shape' and the other is by input 'shape'.
+ // Therefore TFLite interpreter calculates Reshape operation correctly
+ // if one of them is valid.
+ // However, since NN runtime usually get new shape parameter by input 'shape',
+ // passing new shape only by attribute can cause some problems.
+ // Of course, the opposite situation can be occurred in the future.
+ // To prevent those problems, we pass new shape parameter not only by attribute
+ // but also by input.
+
+ auto input_shape_shape_vec_offset =
+ builder.CreateVector(std::vector<int32_t>{(int32_t)new_shape_vec.size()});
+
+ size_t input_shape_vec_size = new_shape_vec.size() * sizeof(int32_t);
+ auto input_shape_input_vec_offset =
+ builder.CreateVector(reinterpret_cast<uint8_t *>(new_shape_vec.data()), input_shape_vec_size);
+ auto input_shape_buffer_offset = CreateBuffer(builder, input_shape_input_vec_offset);
+
+ const auto input_shape_buffer_id = static_cast<uint32_t>(gd._buffers.size());
+ gd._buffers.push_back(input_shape_buffer_offset);
+
+ auto input_shape_tensor_id = static_cast<int32_t>(gd._tensors.size());
+ auto name_offset = builder.CreateString("t_" + std::to_string(input_shape_tensor_id));
+ auto input_shape_tensor_offset = CreateTensor(
+ builder, input_shape_shape_vec_offset, TensorType_INT32, input_shape_buffer_id, name_offset);
+ gd._tensors.push_back(input_shape_tensor_offset);
+
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_RESHAPE);
+
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->arg(0)), input_shape_tensor_id};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+
+ auto new_shape_vec_offset = builder.CreateVector(new_shape_vec);
+ auto options = CreateReshapeOptions(builder, new_shape_vec_offset);
+
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_ReshapeOptions, options.Union());
+
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::DepthwiseFilterEncode *node)
+{
+ auto ker = node->input(); // [H, W, C, M]
+
+ // tflite represents filter as [1, H, W, C*M] where M is multiplier.
+ std::vector<int32_t> new_shape_vec(4);
+ new_shape_vec[0] = 1;
+ new_shape_vec[1] = ShapeInference::get(ker)._dims[0];
+ new_shape_vec[2] = ShapeInference::get(ker)._dims[1];
+ new_shape_vec[3] = ShapeInference::get(ker)._dims[2] * ShapeInference::get(ker)._dims[3];
+
+ exportAsReshape(node, builder, new_shape_vec, gd);
+}
+
+void OperationExporter::visit(loco::BiasAdd<loco::Domain::Tensor> *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_ADD);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->value()), get_tensor_index(node->bias())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateAddOptions(builder); // dummy option
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_AddOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::FeatureBiasAdd *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_ADD);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->value()), get_tensor_index(node->bias())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateAddOptions(builder); // dummy option
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_AddOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+/// @brief Export CONCATENATION of **TWO** tensors only
+void OperationExporter::visit(loco::TensorConcat *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_CONCATENATION);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->lhs()), get_tensor_index(node->rhs())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateConcatenationOptions(builder, node->axis());
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_ConcatenationOptions, options.Union());
+
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::BiasEncode *encode) { exportIdentity(encode, builder, gd); }
+
+void OperationExporter::visit(loco::EltwiseAdd *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_ADD);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->lhs()), get_tensor_index(node->rhs())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateAddOptions(builder); // dummy option
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_AddOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::EltwiseMax *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_MAXIMUM);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->lhs()), get_tensor_index(node->rhs())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateMaximumMinimumOptions(builder); // dummy option
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_MaximumMinimumOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::EltwiseMul *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_MUL);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->lhs()), get_tensor_index(node->rhs())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateMulOptions(builder); // dummy option
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_MulOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::EltwiseSub *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_SUB);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->lhs()), get_tensor_index(node->rhs())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateSubOptions(builder); // dummy option
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_SubOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::EltwiseDiv *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_DIV);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->lhs()), get_tensor_index(node->rhs())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateDivOptions(builder); // dummy option
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_DivOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::EltwiseSqrt *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_SQRT);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(loco::FixedReshape *node)
+{
+ std::vector<int32_t> new_shape_vec;
+ for (uint32_t axis = 0; axis < node->rank(); ++axis)
+ {
+ assert(node->dim(axis).known());
+ new_shape_vec.push_back(node->dim(axis).value());
+ }
+
+ exportAsReshape(node, builder, new_shape_vec, gd);
+}
+
+void OperationExporter::visit(loco::TensorBroadcast *)
+{
+ INTERNAL_EXN("TensorBroadcast should not exist in the graph");
+}
+
+void OperationExporter::visit(loco::TensorConstantPad *node)
+{
+ uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_PAD);
+
+ // make padding attribute an input
+ auto padding = node->padding();
+ // get padding vector size
+ int32_t padding_vec_size = padding->rank();
+ // get byte size of vector
+ size_t padding_vec_byte_size = padding_vec_size * sizeof(int32_t) * 2; // [rank, 2]
+ // create vector for data
+ std::vector<int32_t> padding_vec_data(padding_vec_size * 2);
+ // set data
+ for (int32_t i = 0; i < padding_vec_size; i++)
+ {
+ padding_vec_data.at(i * 2) = padding->front(i);
+ padding_vec_data.at(i * 2 + 1) = padding->back(i);
+ }
+ // create FlatBuffer vector
+ auto padding_vec_ptr = builder.CreateVector(reinterpret_cast<uint8_t *>(padding_vec_data.data()),
+ padding_vec_byte_size);
+
+ // create buffer
+ auto padding_buffer_ptr = CreateBuffer(builder, padding_vec_ptr);
+ // get buffer id
+ const auto padding_buffer_id = static_cast<uint32_t>(gd._buffers.size());
+
+ gd._buffers.push_back(padding_buffer_ptr);
+
+ // create padding shape vector
+ auto padding_shape_vec_ptr = builder.CreateVector(std::vector<int32_t>{padding_vec_size, 2});
+ // create tensor
+ auto padding_tensor_ptr =
+ CreateTensor(builder, padding_shape_vec_ptr, TensorType_INT32, padding_buffer_id);
+ // get tensor id
+ const auto padding_tensor_id = static_cast<int32_t>(gd._tensors.size());
+
+ gd._tensors.push_back(padding_tensor_ptr);
+
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), padding_tensor_id};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+inline flatbuffers::Offset<flatbuffers::Vector<uint8_t>>
+CreateCOpCallOptions(flatbuffers::FlatBufferBuilder &fbb, locoex::COpCall *copCall)
+{
+ // read attrs in FlexBuffer format and pass them to FlatBuffer builder
+ flexbuffers::Builder flexbuf;
+ {
+ size_t map_start = flexbuf.StartMap();
+
+ // Note: among attrs of COpCall, 'op' and 'name' won't be included into tflite file
+ auto names = copCall->attr_names();
+ for (auto name : names)
+ {
+ if (auto int_val = copCall->attr<locoex::COpAttrType::Int>(name))
+ flexbuf.Int(name.c_str(), int_val->val());
+ else if (auto float_val = copCall->attr<locoex::COpAttrType::Float>(name))
+ flexbuf.Float(name.c_str(), float_val->val());
+ else
+ // TODO Support more attribute types
+ INTERNAL_EXN("Not supported type while writing flexbuffer");
+ }
+
+ flexbuf.EndMap(map_start);
+ flexbuf.Finish();
+ }
+
+ auto offset = fbb.CreateVector(flexbuf.GetBuffer());
+
+ return offset;
+}
+
+void OperationExporter::visit(locoex::COpCall *call)
+{
+ // Registering this custom op name into tflite Operator Codes table
+ uint32_t op_idx = gd.registerCustomOpcode(call->op());
+
+ std::vector<int32_t> inputs_vec;
+ {
+ inputs_vec.resize(call->arity());
+ for (uint32_t i = 0; i < call->arity(); i++)
+ inputs_vec[i] = get_tensor_index(call->arg(i));
+ }
+
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(call))};
+
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+
+ auto custom_options = CreateCOpCallOptions(builder, call);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions_NONE, // builtin_options_type
+ 0, // built-in option
+ custom_options, // custom options
+ tflite::CustomOptionsFormat_FLEXBUFFERS);
+
+ gd._operators.push_back(op_offset);
+}
+
+void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder,
+ SerializedModelData &data)
+{
+ // TODO Use explicit tagging to prevent possible mistake
+ auto isNoOp = [](loco::Node *node) {
+ if (node->arity() == 1)
+ {
+ assert(node->arg(0) != nullptr);
+ return get_tensor_index(node) == get_tensor_index(node->arg(0));
+ }
+ return false;
+ };
+
+ if (isNoOp(node))
+ {
+ // Skip if a given node is marked as NoOp (op with no effect) before
+ return;
+ }
+
+ if (auto canonical_node = dynamic_cast<loco::CanonicalNode *>(node))
+ { // TODO Consider removing this later
+ OperationExporter exporter{builder, data};
+ canonical_node->accept(&exporter);
+ }
+ else if (auto tfl_node = dynamic_cast<locoex::TFLNode *>(node))
+ {
+ OperationExporter exporter{builder, data};
+ tfl_node->accept(&exporter);
+ }
+ else if (dynamic_cast<locoex::COpNode *>(node))
+ {
+ OperationExporter exporter{builder, data};
+ exporter.visit(dynamic_cast<locoex::COpCall *>(node));
+ }
+ else
+ {
+ assert(false && "unsupported node found");
+ }
+}
+
+} // namespace
+
+namespace exo
+{
+namespace tflite_detail
+{
+
+void exportNodes(loco::Graph *g, FlatBufferBuilder &builder, SerializedModelData &gd)
+{
+ for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+ {
+ exportNode(node, builder, gd);
+ }
+}
+
+} // namespace tflite_detail
+} // namespace exo
diff --git a/compiler/exo/src/TFLite/TFLOperationExporter.h b/compiler/exo/src/TFLite/TFLOperationExporter.h
new file mode 100644
index 000000000..60f2b5eb2
--- /dev/null
+++ b/compiler/exo/src/TFLite/TFLOperationExporter.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFL_OPERATION_EXPORTER_H__
+#define __TFL_OPERATION_EXPORTER_H__
+
+#include "TFLExporterUtils.h"
+
+#include <loco/IR/Graph.h>
+
+namespace exo
+{
+namespace tflite_detail
+{
+
+/**
+ * @brief create Operators corresponding to model nodes
+ * @param nodes container with nodes
+ * @param gd information about serializer parts of model
+ */
+void exportNodes(loco::Graph *g, flatbuffers::FlatBufferBuilder &builder, SerializedModelData &gd);
+
+} // namespace tflite_detail
+} // namespace exo
+
+#endif // __TFL_OPERATION_EXPORTER_H__
diff --git a/compiler/exo/src/TFLite/TFLTensorExporter.cpp b/compiler/exo/src/TFLite/TFLTensorExporter.cpp
new file mode 100644
index 000000000..66854ef87
--- /dev/null
+++ b/compiler/exo/src/TFLite/TFLTensorExporter.cpp
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFLTensorExporter.h"
+#include "TFLTypeInference.h"
+#include "ShapeInference.h"
+
+// TODO Fix include style
+#include "loco/IR/Algorithm.h"
+#include "loco/IR/CanonicalNode.h"
+#include "loco/IR/CanonicalNodeVisitor.h"
+#include "loco/IR/DataTypeTraits.h"
+
+#include "Dialect/IR/TFLNodes.h"
+
+#include <oops/InternalExn.h>
+
+using namespace tflite;
+using namespace flatbuffers;
+
+namespace
+{
+
+using namespace exo;
+using namespace exo::tflite_detail;
+
+class TFLTensorInfo
+{
+public:
+ TFLTensorInfo() = default;
+
+public:
+ void name(const std::string &name) { _name = name; }
+ const std::string &name(void) const { return _name; }
+
+public:
+ const tflite::TensorType &dtype(void) const { return _dtype; }
+ void dtype(const tflite::TensorType &dtype) { _dtype = dtype; }
+
+ const ShapeDescription &shape(void) const { return _shape; }
+ void shape(const ShapeDescription &shape) { _shape = shape; }
+
+public:
+ locoex::TFLConst *tfl_content(void) const { return _tfl_content; }
+ void tfl_content(locoex::TFLConst *c) { _tfl_content = c; }
+
+private:
+ std::string _name;
+
+ tflite::TensorType _dtype;
+ ShapeDescription _shape;
+
+ // TODO Find a better design
+ loco::ConstGen *_content = nullptr; // TODO deprecate
+ locoex::TFLConst *_tfl_content = nullptr;
+};
+
+using TFLTensorContext = std::vector<TFLTensorInfo>;
+
+struct NoOpDetector final : public loco::CanonicalNodeMutableVisitor<bool>
+{
+ bool visit(loco::BiasEncode *) final
+ {
+ // BiasEncode is always noop
+ return true;
+ }
+
+ bool visit(loco::FilterEncode *node) final
+ {
+ auto encoder = dynamic_cast<loco::PermutingEncoder<loco::Domain::Filter> *>(node->encoder());
+ auto perm = encoder->perm();
+
+ return isNHWC(perm);
+ }
+
+ bool visit(loco::FeatureEncode *node) final
+ {
+ auto encoder = dynamic_cast<loco::PermutingEncoder<loco::Domain::Feature> *>(node->encoder());
+ auto perm = encoder->perm();
+ return isNHWC(perm);
+ }
+
+ bool visit(loco::FeatureDecode *node) final
+ {
+ auto decoder = dynamic_cast<loco::PermutingDecoder<loco::Domain::Feature> *>(node->decoder());
+ auto perm = decoder->perm();
+ return isNHWC(perm);
+ }
+
+ // Return false by default
+ bool visit(loco::Node *) final { return false; }
+};
+
+bool isNoOp(loco::Node *node)
+{
+ if (auto canonical_node = dynamic_cast<loco::CanonicalNode *>(node))
+ {
+ NoOpDetector d;
+ return canonical_node->accept(&d);
+ }
+ return false;
+}
+
+void allocateTFLiteTensor(loco::Node *node, TFLTensorContext &ctx)
+{
+ if (isNoOp(node))
+ {
+ assert(node->arity() == 1 && node->arg(0) != nullptr);
+ set_tensor_index(node, get_tensor_index(node->arg(0)));
+ return;
+ }
+
+ auto tensor_index = static_cast<TFLTensorIndex>(ctx.size());
+ // TODO Use Graph-level metadata for Input & Output
+ auto tensor_name = "t_" + std::to_string(tensor_index);
+
+ TFLTensorInfo tensor_info;
+
+ tensor_info.name(tensor_name);
+ tensor_info.dtype(TypeInference::get(node));
+ tensor_info.shape(ShapeInference::get(node));
+
+ tensor_info.tfl_content(dynamic_cast<locoex::TFLConst *>(node));
+
+ set_tensor_index(node, tensor_index);
+
+ ctx.emplace_back(tensor_info);
+}
+
+} // namespace
+
+namespace
+{
+
+flatbuffers::Offset<Vector<int32_t>> encodeShape(FlatBufferBuilder &builder,
+ const ShapeDescription &shape)
+{
+ assert(shape._rank_known && "unknown number of dimensions is not supported");
+ return builder.CreateVector(shape._dims);
+}
+
+flatbuffers::Offset<tflite::Buffer> encodeOpBuffer(FlatBufferBuilder &builder)
+{
+ return CreateBuffer(builder);
+}
+
+template <typename NodeT>
+flatbuffers::Offset<tflite::Buffer> encodeOpBuffer(FlatBufferBuilder &builder, NodeT *)
+{
+ return CreateBuffer(builder);
+}
+
+template <loco::DataType DT>
+flatbuffers::Offset<tflite::Buffer> encodeOpBufferByDType(FlatBufferBuilder &builder,
+ locoex::TFLConst *c)
+{
+ using NativeType = typename loco::DataTypeImpl<DT>::Type;
+
+ std::vector<NativeType> raw_data;
+ const uint32_t size = c->size<DT>();
+ raw_data.reserve(size);
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ raw_data.push_back(c->at<DT>(i));
+ }
+ const size_t raw_size = size * sizeof(NativeType);
+ auto array_offset = builder.CreateVector(reinterpret_cast<uint8_t *>(raw_data.data()), raw_size);
+ return CreateBuffer(builder, array_offset);
+}
+
+template <>
+flatbuffers::Offset<tflite::Buffer> encodeOpBuffer(FlatBufferBuilder &builder, locoex::TFLConst *c)
+{
+ if (c->dtype() == loco::DataType::FLOAT32)
+ {
+ return encodeOpBufferByDType<loco::DataType::FLOAT32>(builder, c);
+ }
+ else if (c->dtype() == loco::DataType::S32)
+ {
+ return encodeOpBufferByDType<loco::DataType::S32>(builder, c);
+ }
+
+ INTERNAL_EXN_V("Unsupported datatype", oops::to_uint32(c->dtype()));
+}
+
+} // namespace
+
+namespace exo
+{
+namespace tflite_detail
+{
+
+void exportOpDefinedTensor(const TFLTensorInfo &info, FlatBufferBuilder &builder,
+ SerializedModelData &gd)
+{
+ // Create and register output tensor shape
+ auto shape_offset = encodeShape(builder, info.shape());
+
+ // encode and register output tensor buffer
+ auto buffer = info.tfl_content() == nullptr ? encodeOpBuffer(builder)
+ : encodeOpBuffer(builder, info.tfl_content());
+
+ auto buffer_id = static_cast<uint32_t>(gd._buffers.size());
+ gd._buffers.push_back(buffer);
+
+ auto name_offset = builder.CreateString(info.name());
+ auto tensor_offset = CreateTensor(builder, shape_offset, info.dtype(), buffer_id, name_offset,
+ /*quantization*/ 0, /*is_variable*/ false);
+ gd._tensors.push_back(tensor_offset);
+}
+
+void exportOpDefinedTensors(loco::Graph *g, FlatBufferBuilder &builder, SerializedModelData &gd)
+{
+ TFLTensorContext tensor_ctx;
+
+ for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+ {
+ allocateTFLiteTensor(node, tensor_ctx);
+ }
+
+ // add one empty buffer
+ // note: there's a comment in tflite fbs file
+ // - Note the 0th entry of this array must be an empty buffer (sentinel).
+ // - This is a convention so that tensors without a buffer can provide 0 as
+ // - their buffer.
+ auto buffer = encodeOpBuffer(builder);
+ gd._buffers.push_back(buffer);
+
+ for (const auto &tensor_info : tensor_ctx)
+ {
+ exportOpDefinedTensor(tensor_info, builder, gd);
+ }
+}
+
+} // namespace tflite_detail
+} // namespace exo
diff --git a/compiler/exo/src/TFLite/TFLTensorExporter.h b/compiler/exo/src/TFLite/TFLTensorExporter.h
new file mode 100644
index 000000000..97e702665
--- /dev/null
+++ b/compiler/exo/src/TFLite/TFLTensorExporter.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFL_TENSOR_EXPORTER_H__
+#define __TFL_TENSOR_EXPORTER_H__
+
+#include "TFLExporterUtils.h"
+
+#include <loco/IR/Graph.h>
+
+#include <flatbuffers/flatbuffers.h>
+
+namespace exo
+{
+namespace tflite_detail
+{
+
+/**
+ * @brief create Tensors corresponding to results of all nodes in graph
+ * @param computational graph
+ * @param gd information about serialized parts of model
+ */
+void exportOpDefinedTensors(loco::Graph *g, flatbuffers::FlatBufferBuilder &builder,
+ SerializedModelData &gd);
+
+} // namespace tflite_detail
+} // namespace exo
+
+#endif // __TFL_TENSOR_EXPORTER_H__
diff --git a/compiler/exo/src/TFLite/TFLTypeInference.cpp b/compiler/exo/src/TFLite/TFLTypeInference.cpp
new file mode 100644
index 000000000..8d6bb8d8c
--- /dev/null
+++ b/compiler/exo/src/TFLite/TFLTypeInference.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFLTypeInference.h"
+
+#include "schema_generated.h"
+
+#include "Dialect/Service/TFLTypeInferenceRule.h"
+#include "Dialect/IR/TFLDialect.h"
+
+#include <loco/IR/CanonicalNode.h>
+#include <loco/IR/CanonicalNodeVisitor.h>
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/Service/TypeInference.h>
+
+#include <locoex/COpDialect.h>
+#include <locoex/Service/COpTypeInference.h>
+
+#include <oops/InternalExn.h>
+
+#include <stdex/Memory.h>
+
+#include <stdexcept>
+#include <type_traits>
+
+namespace
+{
+
+tflite::TensorType translateLocoTypeToTFLite(loco::DataType dtype)
+{
+ switch (dtype)
+ {
+ case loco::DataType::U8:
+ return tflite::TensorType_UINT8;
+ // case loco::DataType::U16: unsupported
+ // case loco::DataType::U32: unsupported
+ // case loco::DataType::U64: unsupported
+ case loco::DataType::S8:
+ return tflite::TensorType_INT8;
+ case loco::DataType::S16:
+ return tflite::TensorType_INT16;
+ case loco::DataType::S32:
+ return tflite::TensorType_INT32;
+ case loco::DataType::S64:
+ return tflite::TensorType_INT64;
+ case loco::DataType::FLOAT16:
+ return tflite::TensorType_FLOAT16;
+ case loco::DataType::FLOAT32:
+ return tflite::TensorType_FLOAT32;
+ // case loco::DataType::FLOAT64: unsupported
+ default:
+ break;
+ }
+
+ INTERNAL_EXN_V("Trying to converte unsupported loco dtype", oops::to_uint32(dtype));
+}
+
+} // namespace
+
+namespace exo
+{
+
+tflite::TensorType TypeInference::get(loco::Node *node)
+{
+ assert(loco::dtype_known(node));
+ return translateLocoTypeToTFLite(loco::dtype_get(node));
+}
+
+} // namespace exo
diff --git a/compiler/exo/src/TFLite/TFLTypeInference.h b/compiler/exo/src/TFLite/TFLTypeInference.h
new file mode 100644
index 000000000..3d3a2e480
--- /dev/null
+++ b/compiler/exo/src/TFLite/TFLTypeInference.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFL_TYPE_INFERENCE_H__
+#define __TFL_TYPE_INFERENCE_H__
+
+#include "TFLExporterUtils.h"
+
+#include <loco/IR/Nodes.h>
+
+namespace exo
+{
+
+/**
+ * @brief Get the type of each node as NodeAnnotation
+ *
+ * HOW TO USE
+ *
+ * TypeInference::get(g->nodes()->at(0));
+ * TypeInference::get(g->nodes()->at(...));
+ */
+struct TypeInference
+{
+ static tflite::TensorType get(loco::Node *node);
+};
+
+} // namespace exo
+
+#endif // __TFL_TYPE_INFERENCE_H__
diff --git a/compiler/exo/src/TFLite/TFLTypeInference.test.cpp b/compiler/exo/src/TFLite/TFLTypeInference.test.cpp
new file mode 100644
index 000000000..0712f0a25
--- /dev/null
+++ b/compiler/exo/src/TFLite/TFLTypeInference.test.cpp
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFLTypeInference.h"
+#include "Pass/TypeInferencePass.h"
+
+#include <loco/IR/PermutingCodec.h>
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+using stdex::make_unique;
+
+namespace
+{
+
+class Sequential
+{
+public:
+ loco::Pull *addPullLayer(const loco::DataType &dtype = loco::DataType::FLOAT32)
+ {
+ loco::Pull *pull = _graph.nodes()->create<loco::Pull>();
+
+ auto graph_input = _graph.inputs()->create();
+ graph_input->name("graph_input");
+ loco::link(graph_input, pull);
+
+ pull->dtype(dtype);
+ setSampleShape(pull);
+
+ return last(pull);
+ }
+
+ loco::ReLU *addReLULayer(void)
+ {
+ loco::ReLU *relu = _graph.nodes()->create<loco::ReLU>();
+
+ relu->input(_last);
+
+ return last(relu);
+ }
+
+ loco::Push *addPushLayer(void)
+ {
+ loco::Push *push = _graph.nodes()->create<loco::Push>();
+
+ auto graph_output = _graph.outputs()->create();
+ graph_output->name("graph_output");
+ loco::link(graph_output, push);
+
+ push->from(_last);
+
+ return last(push);
+ }
+
+ loco::Graph *graph() { return &_graph; }
+
+private:
+ template <typename T> uint32_t setSampleShape(T *op)
+ {
+ const uint32_t n = 1;
+ const uint32_t h = 100;
+ const uint32_t w = 100;
+ const uint32_t c = 3;
+ op->rank(4);
+ op->dim(0).set(n);
+ op->dim(1).set(c);
+ op->dim(2).set(h);
+ op->dim(3).set(w);
+ return n * h * w * c;
+ }
+
+ template <typename T> T *last(T *node)
+ {
+ _last = node;
+ return node;
+ }
+
+private:
+ loco::Graph _graph;
+ loco::Node *_last;
+};
+
+struct TypeInferenceTest : public Sequential, public ::testing::Test
+{
+ virtual ~TypeInferenceTest() = default;
+};
+
+} // namespace
+
+// TypeInference SHOULD PROPAGATE type information properly
+TEST_F(TypeInferenceTest, Regression_0000)
+{
+ auto pull = addPullLayer(loco::DataType::S8);
+ auto relu = addReLULayer();
+ auto push = addPushLayer();
+
+ using namespace exo;
+
+ TypeInferencePass type_inf_pass;
+ type_inf_pass.run(graph());
+
+ ASSERT_EQ(TypeInference::get(relu), tflite::TensorType_INT8);
+ ASSERT_EQ(TypeInference::get(push), tflite::TensorType_INT8);
+}
diff --git a/compiler/exo/src/TestGraph.h b/compiler/exo/src/TestGraph.h
new file mode 100644
index 000000000..f919cc9ae
--- /dev/null
+++ b/compiler/exo/src/TestGraph.h
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TEST_GRAPH_H__
+#define __TEST_GRAPH_H__
+
+#include "Dialect/IR/TFLNodes.h"
+#include "GraphBlock.h"
+#include "TestHelper.h"
+
+#include <loco.h>
+
+#include <stdex/Memory.h>
+
+#include <cassert>
+
+namespace exo
+{
+namespace test
+{
+
+class TestGraph
+{
+public:
+ std::unique_ptr<loco::Graph> g;
+ loco::Pull *pull;
+ loco::Push *push;
+
+ TestGraph() // creates Pull and Push
+ {
+ g = loco::make_graph();
+
+ pull = g->nodes()->create<loco::Pull>();
+
+ push = g->nodes()->create<loco::Push>();
+
+ auto input = g->inputs()->create();
+ {
+ input->name("input");
+ loco::link(input, pull);
+ }
+ auto output = g->outputs()->create();
+ {
+ output->name("output");
+ loco::link(output, push);
+ }
+
+ _next_input = pull;
+ }
+
+ loco::Graph *graph() { return g.get(); }
+
+ /// @brief Creates node with NO arg and appends it to graph
+ template <class T> T *append()
+ {
+ auto node = g->nodes()->create<T>();
+ _next_input = node;
+
+ return node;
+ }
+
+ /// @brief Creates op T (arity=1) with arg1 as an input and appends it to graph
+ template <class T> T *append(loco::Node *arg1)
+ {
+ auto node = g->nodes()->create<T>();
+ setInput(node, arg1);
+ _next_input = node;
+
+ return node;
+ }
+
+ /// @brief Creates op T (arity=2) with arg1, arg2 as inputs and appends it to graph
+ template <class T> T *append(loco::Node *arg1, loco::Node *arg2)
+ {
+ auto node = g->nodes()->create<T>();
+ setInput(node, arg1, arg2);
+ _next_input = node;
+
+ return node;
+ }
+
+ /// @brief Creates op T (arity=3) with arg1, arg2, arg3 as inputs and appends it to graph
+ template <class T> T *append(loco::Node *arg1, loco::Node *arg2, loco::Node *arg3)
+ {
+ auto node = g->nodes()->create<T>();
+ setInput(node, arg1, arg2, arg3);
+ _next_input = node;
+
+ return node;
+ }
+
+ // push will get the last appended node
+ void complete() { push->from(_next_input); }
+
+ void complete(loco::Node *last_node) { push->from(last_node); }
+
+private:
+ // arity 1
+ void setInput(loco::Node *node, loco::Node *) { assert(false && "NYI"); };
+
+ void setInput(loco::AvgPool2D *node, loco::Node *input) { node->ifm(input); }
+ void setInput(loco::BiasDecode *node, loco::Node *input) { node->input(input); };
+ void setInput(loco::BiasEncode *node, loco::Node *input) { node->input(input); };
+ void setInput(loco::FeatureDecode *node, loco::Node *input) { node->input(input); };
+ void setInput(loco::FeatureEncode *node, loco::Node *input) { node->input(input); };
+ void setInput(loco::MaxPool2D *node, loco::Node *input) { node->ifm(input); }
+ void setInput(loco::Push *node, loco::Node *input) { node->from(input); };
+ void setInput(loco::ReLU *node, loco::Node *input) { node->input(input); };
+ void setInput(loco::ReLU6 *node, loco::Node *input) { node->input(input); };
+ void setInput(loco::Tanh *node, loco::Node *input) { node->input(input); };
+ void setInput(loco::TensorTranspose *node, loco::Node *input) { node->input(input); };
+
+ void setInput(locoex::TFLAveragePool2D *node, loco::Node *input) { node->value(input); };
+ void setInput(locoex::TFLMaxPool2D *node, loco::Node *input) { node->value(input); };
+ void setInput(locoex::TFLRelu *node, loco::Node *input) { node->features(input); };
+ void setInput(locoex::TFLRelu6 *node, loco::Node *input) { node->features(input); };
+
+ // arity 2
+ void setInput(loco::Node *node, loco::Node *, loco::Node *) { assert(false && "NYI"); };
+
+ void setInput(loco::Conv2D *node, loco::Node *input, loco::Node *filter)
+ {
+ node->ifm(input);
+ node->ker(filter);
+ }
+
+ void setInput(loco::EltwiseAdd *node, loco::Node *arg1, loco::Node *arg2)
+ {
+ node->lhs(arg1);
+ node->rhs(arg2);
+ };
+
+ void setInput(loco::FeatureBiasAdd *node, loco::Node *arg1, loco::Node *arg2)
+ {
+ node->value(arg1);
+ node->bias(arg2);
+ };
+
+ void setInput(locoex::TFLAdd *node, loco::Node *arg1, loco::Node *arg2)
+ {
+ node->x(arg1);
+ node->y(arg2);
+ };
+
+ void setInput(locoex::TFLMul *node, loco::Node *arg1, loco::Node *arg2)
+ {
+ node->x(arg1);
+ node->y(arg2);
+ };
+
+ void setInput(locoex::TFLSub *node, loco::Node *arg1, loco::Node *arg2)
+ {
+ node->x(arg1);
+ node->y(arg2);
+ };
+
+ void setInput(locoex::TFLTranspose *node, loco::Node *arg1, loco::Node *arg2)
+ {
+ node->a(arg1);
+ node->perm(arg2);
+ };
+
+ // arity 3
+ void setInput(loco::Node *node, loco::Node *, loco::Node *, loco::Node *)
+ {
+ assert(false && "NYI");
+ };
+
+ void setInput(locoex::TFLConv2D *node, loco::Node *input, loco::Node *filter, loco::Node *bias)
+ {
+ node->input(input);
+ node->filter(filter);
+ node->bias(bias);
+ }
+
+private:
+ loco::Node *_next_input;
+};
+
+enum class ExampleGraphType
+{
+ FeatureBiasAdd,
+ ConstGen_ReLU,
+ FilterEncode_FilterDecode,
+ Transpose,
+
+ TFLTranspose,
+};
+
+template <ExampleGraphType T> class ExampleGraph;
+
+/**
+ * @brief Class to create the following:
+ *
+ * Pull - FeatureEncoder - FeatureBiasAdd - FeatureDecode - Push
+ * |
+ * ConstGen - BiasEncode --+
+ */
+template <> class ExampleGraph<ExampleGraphType::FeatureBiasAdd> : public TestGraph
+{
+public:
+ loco::FeatureEncode *fea_enc = nullptr;
+ loco::ConstGen *constgen = nullptr;
+ loco::BiasEncode *bias_enc = nullptr;
+ loco::FeatureBiasAdd *fea_bias_add = nullptr;
+ loco::FeatureDecode *fea_dec = nullptr;
+
+public:
+ ExampleGraph()
+ {
+ fea_enc = exo::make_feature_encode<exo::FeatureLayout::NHWC>(pull);
+ constgen = append<loco::ConstGen>();
+ bias_enc = append<loco::BiasEncode>(constgen);
+ fea_bias_add = append<loco::FeatureBiasAdd>(fea_enc, bias_enc);
+ fea_dec = exo::make_feature_decode<exo::FeatureLayout::NHWC>(fea_bias_add);
+ complete(fea_dec);
+ }
+};
+
+/**
+ * @brief Class to creates the following:
+ *
+ * ConstGen -- ReLU -- Push
+ */
+template <> class ExampleGraph<ExampleGraphType::ConstGen_ReLU> : public TestGraph
+{
+public:
+ loco::ConstGen *constgen = nullptr;
+ loco::ReLU *relu = nullptr;
+
+public:
+ ExampleGraph()
+ {
+ constgen = append<loco::ConstGen>();
+ relu = append<loco::ReLU>(constgen);
+ complete(relu);
+ }
+};
+
+/**
+ * @brief Class to creates the following:
+ *
+ * Pull -- Transpose -- Push
+ */
+template <> class ExampleGraph<ExampleGraphType::Transpose> : public TestGraph
+{
+public:
+ loco::TensorTranspose *transpose = nullptr;
+
+public:
+ ExampleGraph()
+ {
+ transpose = append<loco::TensorTranspose>(pull);
+ complete(transpose);
+ }
+};
+
+/**
+ * @brief Class to creates the following:
+ *
+ * Pull -- FilterEncode -- FilterDecode -- Push
+ */
+template <> class ExampleGraph<ExampleGraphType::FilterEncode_FilterDecode> : public TestGraph
+{
+public:
+ loco::FilterEncode *filterEncode = nullptr;
+ loco::FilterDecode *filterDecode = nullptr;
+
+public:
+ ExampleGraph()
+ {
+ filterEncode = exo::make_filter_encode<exo::FilterLayout::HWIO>(pull); // from Tensorflow
+ filterDecode =
+ exo::make_filter_decode<exo::FilterLayout::OHWI>(filterEncode); // to Tensorflow Lite
+ complete(filterDecode);
+ }
+};
+
+/**
+ * @brief Class to create the following:
+ *
+ * Pull -- TFLTranspose -- Push
+ */
+template <> class ExampleGraph<ExampleGraphType::TFLTranspose> : public TestGraph
+{
+public:
+ loco::ConstGen *const_perm = nullptr;
+ locoex::TFLTranspose *tfl_transpose = nullptr;
+
+public:
+ ExampleGraph()
+ {
+ const_perm = append<loco::ConstGen>();
+ tfl_transpose = append<locoex::TFLTranspose>(pull, const_perm);
+ complete(tfl_transpose);
+ }
+};
+
+} // namespace test
+} // namespace exo
+
+#endif // __TEST_GRAPH_H__
diff --git a/compiler/exo/src/TestHelper.h b/compiler/exo/src/TestHelper.h
new file mode 100644
index 000000000..1a3de50f5
--- /dev/null
+++ b/compiler/exo/src/TestHelper.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TEST_HELPER_H__
+#define __TEST_HELPER_H__
+
+#include "Check.h"
+#include "ProgressReporter.h"
+#include "Passes.h"
+
+#include <logo/Pass.h>
+#include <logo/Phase.h>
+
+#include <loco.h>
+
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+/**
+ * @brief Check the number of nodes in a graph starting from OUTPUTS
+ */
+#define EXO_TEST_ASSERT_NODE_COUNT(OUTPUTS, COUNT) \
+ { \
+ auto v = loco::postorder_traversal(OUTPUTS); \
+ ASSERT_EQ(v.size(), (COUNT)); \
+ }
+
+namespace exo
+{
+namespace test
+{
+
+/**
+ * @brief Phase for test, that is used to test pass. This phase initially adds TypeInferencePass
+ * and ShapeInferencePass
+ */
+class TypeShapeReadyPhase
+{
+public:
+ TypeShapeReadyPhase()
+ {
+ // Type and Shape inference is prerequisite for run other test
+ _phase.emplace_back(stdex::make_unique<::exo::TypeInferencePass>());
+ _phase.emplace_back(stdex::make_unique<::exo::ShapeInferencePass>());
+ }
+
+ template <typename PassT> void add_pass() { _phase.emplace_back(stdex::make_unique<PassT>()); }
+
+ void run(loco::Graph *g)
+ {
+ const auto restart = logo::PhaseStrategy::Restart;
+ logo::PhaseRunner<restart> phase_runner{g};
+
+ ::exo::ProgressReporter prog(g, restart);
+ phase_runner.attach(&prog);
+ phase_runner.run(_phase);
+ }
+
+private:
+ logo::Phase _phase;
+};
+
+/**
+ * @brief Get the only succ object of type LocoNodeT. (The name `only succ` comes from English word
+ * `only child`.)
+ * parent must have 1 succ only.
+ * When there is no succ of type LocoNodeT, nullptr will be returned.
+ */
+template <typename LocoNodeT> inline LocoNodeT *get_only_succ(loco::Node *parent)
+{
+ auto succs = loco::succs(parent);
+ EXO_ASSERT(succs.size() == 1, "parent has more than 1 succs.");
+
+ return dynamic_cast<LocoNodeT *>(*succs.begin());
+}
+
+template <typename T> inline T *find_first_node_bytype(loco::Graph *g)
+{
+ T *first_node = nullptr;
+ loco::Graph::NodeContext *nodes = g->nodes();
+ uint32_t count = nodes->size();
+
+ for (uint32_t i = 0; i < count; ++i)
+ {
+ first_node = dynamic_cast<T *>(nodes->at(i));
+ if (first_node != nullptr)
+ break;
+ }
+
+ return first_node;
+}
+
+} // namespace test
+} // namespace exo
+
+#endif // __TEST_HELPER_H__
diff --git a/compiler/fipe/CMakeLists.txt b/compiler/fipe/CMakeLists.txt
new file mode 100644
index 000000000..2cabf6279
--- /dev/null
+++ b/compiler/fipe/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_library(fipe INTERFACE)
+target_include_directories(fipe INTERFACE include)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(fipe_test fipe.test.cpp)
+target_link_libraries(fipe_test fipe)
diff --git a/compiler/fipe/fipe.test.cpp b/compiler/fipe/fipe.test.cpp
new file mode 100644
index 000000000..347f26f9b
--- /dev/null
+++ b/compiler/fipe/fipe.test.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fipe.h"
+
+#include <string>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+int dec(int n) { return n - 1; }
+
+} // namespace
+
+TEST(FunctionPipeTests, top_level_function)
+{
+ // GCC rejects this code if dec is not wrapped by "fipe::wrap"
+ // TODO Find a better way
+ ASSERT_EQ(4 | fipe::wrap(dec), 3);
+}
+
+TEST(FunctionPipeTests, static_method)
+{
+ struct Sample
+ {
+ static int dbl(int n) { return n * 2; }
+ };
+
+ ASSERT_EQ(4 | fipe::wrap(Sample::dbl), 8);
+}
+
+TEST(FunctionPipeTests, normal_method)
+{
+ struct Sample
+ {
+ public:
+ int shift(int n) { return n + shiftamt; }
+
+ private:
+ int shiftamt = 6;
+ };
+
+ using namespace std::placeholders;
+
+ Sample s;
+
+ auto value = 4 | std::bind(&Sample::shift, &s, _1);
+
+ ASSERT_EQ(value, 10);
+}
+
+TEST(FunctionPipeTests, lambda)
+{
+ auto inc = [](int n) { return n + 1; };
+ ASSERT_EQ(4 | inc, 5);
+}
+
+TEST(FunctionPipeTests, functor) { ASSERT_EQ(4 | std::negate<int>(), -4); }
diff --git a/compiler/fipe/include/fipe.h b/compiler/fipe/include/fipe.h
new file mode 100644
index 000000000..0a661aa04
--- /dev/null
+++ b/compiler/fipe/include/fipe.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIPE_H__
+#define __FIPE_H__
+
+#include <functional>
+#include <utility>
+
+namespace fipe
+{
+
+/**
+ * @brief Convert a function pointer as a callable std::function
+ *
+ * NOTE "fipe" works only for unary functions.
+ */
+template <typename Ret, typename Arg> std::function<Ret(Arg)> wrap(Ret (*p)(Arg)) { return p; }
+
+} // namespace fipe
+
+template <typename T, typename Callable> auto operator|(T &&v, Callable &&f) -> decltype(f(v))
+{
+ return std::forward<Callable>(f)(v);
+}
+
+#endif // __FIPE_H__
diff --git a/compiler/gen-core/CMakeLists.txt b/compiler/gen-core/CMakeLists.txt
new file mode 100644
index 000000000..3732f493b
--- /dev/null
+++ b/compiler/gen-core/CMakeLists.txt
@@ -0,0 +1,17 @@
+find_package(HDF5 COMPONENTS CXX QUIET)
+
+if(NOT HDF5_FOUND)
+ return()
+endif(NOT HDF5_FOUND)
+
+nnas_find_package(TensorFlow QUIET)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_library(gen_core STATIC ${SOURCES})
+set_target_properties(gen_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(gen_core PUBLIC include)
+target_include_directories(gen_core PRIVATE ${HDF5_INCLUDE_DIRS})
+target_link_libraries(gen_core ${HDF5_CXX_LIBRARIES})
+target_link_libraries(gen_core tfinfo_v2)
+target_link_libraries(gen_core angkor)
diff --git a/compiler/gen-core/README.md b/compiler/gen-core/README.md
new file mode 100644
index 000000000..cc98ef00b
--- /dev/null
+++ b/compiler/gen-core/README.md
@@ -0,0 +1,3 @@
+# gen-core
+
+_gen-core_ is a common library used by _gen-tf-input_, _gen-tf-output_, and _gen-tflite-output_.
diff --git a/compiler/gen-core/include/gencore/HDF5Common.h b/compiler/gen-core/include/gencore/HDF5Common.h
new file mode 100644
index 000000000..87367c99c
--- /dev/null
+++ b/compiler/gen-core/include/gencore/HDF5Common.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __HDF5COMMON_H__
+#define __HDF5COMMON_H__
+
+#include <string>
+
+namespace gencore
+{
+
+/**
+ * @brief Construct HDF5-compatible dataset name from a given string
+ *
+ * When someone attempts to access 'A/B/C' dataset, HDF5 tries to open
+ * dataset C in group B in top-level group A, which means that dataset
+ * names SHOULD NOT contain '/' in it.
+ *
+ * This mangle function replaces all the occurence of '/' in a given
+ * string with '_' to construct HDF5-compatible dataset name.
+ */
+std::string mangle(const std::string &);
+
+#if 0
+Let us assume that a tensor context includes N + 1 tensors.
+
+Then, HDF5 export will generate a HDF5 file whose structure is given as follows:
+[value group]/
+ [file 0] <- A dataset that contains the value of 1st (=0) tensor
+ [file 1]
+ ...
+ [file N]
+[name group]/
+ [file 0] <- An attribute that contains the name of 1st (=0) tensor
+ [file 1]
+ ...
+ [file N]
+#endif
+
+/// @brief Return the name of "value group"
+std::string value_grpname(void);
+/// @brief Return the name of n-th tensor dataset
+std::string value_filename(uint32_t n);
+
+/// @brief Return the name of "name group"
+std::string name_grpname(void);
+/// @brief Return the name of n-th tensor attribute
+std::string name_filename(uint32_t n);
+
+} // namespace gencore
+
+#endif // __HDF5COMMON_H__
diff --git a/compiler/gen-core/include/gencore/HDF5Exporter.h b/compiler/gen-core/include/gencore/HDF5Exporter.h
new file mode 100644
index 000000000..10cc1c613
--- /dev/null
+++ b/compiler/gen-core/include/gencore/HDF5Exporter.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __GENCORE_HDF5EXPORTER_H__
+#define __GENCORE_HDF5EXPORTER_H__
+
+#include "HDF5Common.h"
+
+#include <angkor/TensorShape.h>
+#include <nncc/core/ADT/tensor/Reader.h>
+
+#include <H5Cpp.h>
+
+namespace gencore
+{
+
+class H5Exporter
+{
+public:
+ H5Exporter(const std::string &path) : _file{path.c_str(), H5F_ACC_TRUNC}
+ {
+ _value_grp = _file.createGroup(value_grpname());
+ _name_grp = _file.createGroup(name_grpname());
+ }
+
+public:
+ template <typename DT>
+ void write(uint32_t nth, const std::string &name, const angkor::TensorShape &shape,
+ const nncc::core::ADT::tensor::Reader<DT> &buf_reader);
+
+private:
+ H5::H5File _file;
+ H5::Group _value_grp;
+ H5::Group _name_grp;
+};
+
+} // namespace gencore
+
+#endif // __GENCORE_HDF5EXPORTER_H__
diff --git a/compiler/gen-core/include/gencore/HDF5Importer.h b/compiler/gen-core/include/gencore/HDF5Importer.h
new file mode 100644
index 000000000..853744199
--- /dev/null
+++ b/compiler/gen-core/include/gencore/HDF5Importer.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __GENCORE_HDF5IMPORTER_H__
+#define __GENCORE_HDF5IMPORTER_H__
+
+#include "HDF5Common.h"
+
+#include <tfinfo-v2/TensorSignature.h>
+
+#include <angkor/TensorShape.h>
+#include <nncc/core/ADT/tensor/Accessor.h>
+
+#include <H5Cpp.h>
+
+namespace gencore
+{
+
+class HDF5Importer
+{
+public:
+ HDF5Importer(const std::string &path) : _file{path, H5F_ACC_RDONLY}
+ {
+ _value_grp = _file.openGroup(value_grpname());
+ }
+
+public:
+ /**
+ * @brief Reads tensor data from file and store it into buf_accessor
+ */
+ template <typename DT>
+ void read(uint32_t nth, const std::string &name, const angkor::TensorShape &shape,
+ nncc::core::ADT::tensor::Accessor<DT> *buf_accessor);
+
+private:
+ H5::H5File _file;
+ H5::Group _value_grp;
+};
+
+} // namespace gencore
+
+#endif // __GENCORE_HDF5IMPORTER_H__
diff --git a/compiler/gen-core/requires.cmake b/compiler/gen-core/requires.cmake
new file mode 100644
index 000000000..a424f1f4a
--- /dev/null
+++ b/compiler/gen-core/requires.cmake
@@ -0,0 +1,2 @@
+require("tfinfo-v2")
+require("angkor")
diff --git a/compiler/gen-core/src/HDF5Common.cpp b/compiler/gen-core/src/HDF5Common.cpp
new file mode 100644
index 000000000..c254d9e1e
--- /dev/null
+++ b/compiler/gen-core/src/HDF5Common.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gencore/HDF5Common.h"
+
+namespace gencore
+{
+
+std::string mangle(const std::string &name)
+{
+ std::string res = name;
+
+ for (uint32_t n = 0; n < res.size(); ++n)
+ {
+ if (res.at(n) == '/')
+ {
+ res.at(n) = '_';
+ }
+ }
+
+ return res;
+}
+
+std::string value_grpname(void) { return "value"; }
+std::string value_filename(uint32_t n) { return std::to_string(n); }
+
+std::string name_grpname(void) { return "name"; }
+std::string name_filename(uint32_t n) { return std::to_string(n); }
+
+} // namespace gencore
diff --git a/compiler/gen-core/src/HDF5Exporter.cpp b/compiler/gen-core/src/HDF5Exporter.cpp
new file mode 100644
index 000000000..6b77710c4
--- /dev/null
+++ b/compiler/gen-core/src/HDF5Exporter.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gencore/HDF5Exporter.h"
+
+#include <angkor/TensorShape.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Reader.h>
+
+#include <H5Cpp.h>
+
+namespace
+{
+
+template <typename DT> H5::PredType get_h5_datatype();
+
+template <> H5::PredType get_h5_datatype<float>() { return H5::PredType::NATIVE_FLOAT; }
+
+template <typename DT> H5::PredType get_h5_store_format();
+
+template <> H5::PredType get_h5_store_format<float>() { return H5::PredType::IEEE_F32BE; }
+
+} // namespace
+
+namespace gencore
+{
+
+template <typename DT>
+void H5Exporter::write(uint32_t nth, const std::string &name, const angkor::TensorShape &shape,
+ const nncc::core::ADT::tensor::Reader<DT> &buf_reader)
+{
+ // Record tensor values
+ {
+ const auto rank = shape.rank();
+
+ hsize_t dims[rank];
+
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ dims[axis] = shape.dim(axis);
+ }
+
+ H5::DataSpace dataspace(rank, dims);
+
+ auto dataset =
+ _value_grp.createDataSet(value_filename(nth), get_h5_store_format<DT>(), dataspace);
+
+ DT *h5_data = new DT[nncc::core::ADT::tensor::num_elements(shape)];
+ {
+ using nncc::core::ADT::tensor::IndexEnumerator;
+ using nncc::core::ADT::tensor::LexicalLayout;
+
+ LexicalLayout layout{};
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ auto i = e.current();
+ h5_data[layout.offset(shape, i)] = buf_reader.at(i);
+ }
+ }
+
+ dataset.write(h5_data, get_h5_datatype<DT>());
+
+ delete[] h5_data;
+ }
+
+ // Record name
+ {
+ H5::DataSpace name_dataspace(H5S_SCALAR);
+ H5::StrType name_datatype(H5::PredType::C_S1, name.size());
+
+ auto name_attr = _name_grp.createAttribute(value_filename(nth), name_datatype, name_dataspace);
+
+ name_attr.write(name_datatype, name);
+ }
+}
+
+// template instantiation
+template void H5Exporter::write<float>(uint32_t, const std::string &, const angkor::TensorShape &,
+ const nncc::core::ADT::tensor::Reader<float> &);
+
+} // namespace gencore
diff --git a/compiler/gen-core/src/HDF5Importer.cpp b/compiler/gen-core/src/HDF5Importer.cpp
new file mode 100644
index 000000000..83691b20b
--- /dev/null
+++ b/compiler/gen-core/src/HDF5Importer.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gencore/HDF5Importer.h"
+#include "gencore/HDF5Common.h"
+
+#include <angkor/TensorShape.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+#include <nncc/core/ADT/tensor/Accessor.h>
+
+#include <H5Cpp.h>
+
+#include <cassert>
+
+namespace
+{
+
+template <typename DT> H5::PredType get_h5_datatype();
+
+template <> H5::PredType get_h5_datatype<float>() { return H5::PredType::NATIVE_FLOAT; }
+
+template <typename DT> H5::PredType get_h5_store_format();
+
+template <> H5::PredType get_h5_store_format<float>() { return H5::PredType::IEEE_F32BE; }
+
+} // namespace
+
+namespace gencore
+{
+
+template <typename DT>
+void HDF5Importer::read(uint32_t nth, const std::string &name, const angkor::TensorShape &shape,
+ nncc::core::ADT::tensor::Accessor<DT> *buf_accessor)
+{
+ assert(buf_accessor != nullptr);
+
+ try
+ {
+ auto dataset = _value_grp.openDataSet(value_filename(nth));
+
+ assert(dataset.getDataType() == get_h5_store_format<DT>());
+
+ std::vector<DT> file_buf;
+ {
+ file_buf.resize(nncc::core::ADT::tensor::num_elements(shape));
+ dataset.read(file_buf.data(), get_h5_datatype<DT>());
+ }
+
+ using nncc::core::ADT::tensor::IndexEnumerator;
+ using nncc::core::ADT::tensor::LexicalLayout;
+
+ LexicalLayout layout{};
+
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ auto i = e.current();
+ buf_accessor->at(i) = file_buf[layout.offset(shape, i)];
+ }
+ }
+ catch (const H5::FileIException &)
+ {
+ // Skip if data is not present in HDF5 file
+ }
+}
+
+// template instantiation
+template void HDF5Importer::read<float>(uint32_t, const std::string &, const angkor::TensorShape &,
+ nncc::core::ADT::tensor::Accessor<float> *);
+
+} // namespace gencore
diff --git a/compiler/gen-tf-input/CMakeLists.txt b/compiler/gen-tf-input/CMakeLists.txt
new file mode 100644
index 000000000..12b78b5b3
--- /dev/null
+++ b/compiler/gen-tf-input/CMakeLists.txt
@@ -0,0 +1,4 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+# making gen-tf-input
+add_executable(gen-tf-input ${SOURCES})
diff --git a/compiler/gen-tf-input/README.md b/compiler/gen-tf-input/README.md
new file mode 100644
index 000000000..2ea6f71b4
--- /dev/null
+++ b/compiler/gen-tf-input/README.md
@@ -0,0 +1,11 @@
+# gen-tf-input
+
+_gen-tf-input_ generates random input data for testing in HDF5 format.
+
+# How to use
+
+Use the following to generate a file that contains random values of input tensors:
+
+```
+$ gen-tf-input <info_v2_path> <pb_path> <file_path_to_generate>
+```
diff --git a/compiler/gen-tf-input/src/Driver.cpp b/compiler/gen-tf-input/src/Driver.cpp
new file mode 100644
index 000000000..f2ce20f16
--- /dev/null
+++ b/compiler/gen-tf-input/src/Driver.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cassert>
+#include <iostream>
+
+namespace
+{
+
+void print_help()
+{
+ std::cerr << "This generates a file that contains random values of input tensors" << std::endl
+ << "Usage:" << std::endl
+ << " gen-tf-input <info_v2_path> <pb_path> <file_path_to_generate>" << std::endl;
+}
+
+} // namespace
+
+namespace
+{
+
+void gen_input(const std::string info_v2_path, const std::string pb_path,
+ const std::string input_path)
+{
+ // TODO write code
+ assert("Not yet written" && nullptr);
+}
+
+} // namespace
+
+int main(int argc, char **argv)
+{
+ // TODO We need better args parsing in future
+ if (argc != 4)
+ {
+ print_help();
+ return 255;
+ }
+
+ gen_input(argv[1], argv[2], argv[3]);
+
+ return 0;
+}
diff --git a/compiler/gen-tf-output/CMakeLists.txt b/compiler/gen-tf-output/CMakeLists.txt
new file mode 100644
index 000000000..c2b91a9cd
--- /dev/null
+++ b/compiler/gen-tf-output/CMakeLists.txt
@@ -0,0 +1,3 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(gen-tf-output ${SOURCES})
diff --git a/compiler/gen-tf-output/README.md b/compiler/gen-tf-output/README.md
new file mode 100644
index 000000000..ca54c75d5
--- /dev/null
+++ b/compiler/gen-tf-output/README.md
@@ -0,0 +1,13 @@
+# gen-tf-output
+
+_gen-tf-output_ generates a file containing the result of running TensorFlow in HDF5 format.
+
+# How to use
+
+Use the following:
+
+```
+$ gen-tf-output <info_v2_path> <pb_path> <input_of_TensorFlow_path> <output_path_to_generate>
+```
+
+Use _gen_tf_input_ to generate `<input_of_TensorFlow_path>` file.
diff --git a/compiler/gen-tf-output/src/Driver.cpp b/compiler/gen-tf-output/src/Driver.cpp
new file mode 100644
index 000000000..209651987
--- /dev/null
+++ b/compiler/gen-tf-output/src/Driver.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <stdexcept>
+#include <string>
+
+namespace
+{
+
+void print_help()
+{
+ std::cerr << "This generates a file that contains result of running TensorFlow" << std::endl
+ << "Usage:" << std::endl
+ << "\t"
+ << "gen-tf-output <info_v2_path> <pb_path> <input_of_TensorFlow_path> "
+ "<output_path_to_generate>"
+ << std::endl;
+}
+
+void gen_tf_output(const std::string info_v2_path, const std::string pb_path,
+ const std::string input_path, const std::string output_path)
+{
+ throw std::runtime_error("Not Yet Implemented");
+}
+
+} // namespace
+
+int main(int argc, char **argv)
+{
+ // TODO We need better args parsing in future
+ if (argc != 5)
+ {
+ print_help();
+ return 255;
+ }
+
+ gen_tf_output(argv[1], argv[2], argv[3], argv[4]);
+
+ return 0;
+}
diff --git a/compiler/gen-tflite-output/CMakeLists.txt b/compiler/gen-tflite-output/CMakeLists.txt
new file mode 100644
index 000000000..1c9d2601d
--- /dev/null
+++ b/compiler/gen-tflite-output/CMakeLists.txt
@@ -0,0 +1,3 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(gen-tflite-output ${SOURCES})
diff --git a/compiler/gen-tflite-output/README.md b/compiler/gen-tflite-output/README.md
new file mode 100644
index 000000000..a9c985006
--- /dev/null
+++ b/compiler/gen-tflite-output/README.md
@@ -0,0 +1,14 @@
+# gen-tflite-output
+
+_gen-tflite-output_ generates a file containing the result of running TensorFlow Lite interpreter
+in HDF5 format.
+
+# How to use
+
+Use the following:
+
+```
+$ gen-tflite-output <tflite_file_path> <input_file_path> <output_path_to_generate>
+```
+
+Use _gen_tf_input_ to generate `<input_file_path>` file.
diff --git a/compiler/gen-tflite-output/src/Driver.cpp b/compiler/gen-tflite-output/src/Driver.cpp
new file mode 100644
index 000000000..90559ec2f
--- /dev/null
+++ b/compiler/gen-tflite-output/src/Driver.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <stdexcept>
+#include <string>
+
+namespace
+{
+
+void print_help()
+{
+ std::cerr << "This generates a file that contains result of running TensorFlow Lite interpreter"
+ << std::endl
+ << "Usage:" << std::endl
+ << "\t"
+ << "$ gen-tflite-output <tflite_file_path> <input_file_path> <output_path_to_generate>"
+ << std::endl;
+}
+
+void gen_tflite_output(const std::string tflite_path, const std::string input_path,
+ const std::string output_path)
+{
+ throw std::runtime_error("Not Yet Implemented");
+}
+
+} // namespace
+
+int main(int argc, char **argv)
+{
+ // TODO We need better args parsing in future
+ if (argc != 4)
+ {
+ print_help();
+ return 255;
+ }
+
+ gen_tflite_output(argv[1], argv[2], argv[3]);
+
+ return 0;
+}
diff --git a/compiler/hermes-std/CMakeLists.txt b/compiler/hermes-std/CMakeLists.txt
new file mode 100644
index 000000000..c7b02e14c
--- /dev/null
+++ b/compiler/hermes-std/CMakeLists.txt
@@ -0,0 +1,27 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(hermes_std STATIC ${SOURCES})
+set_target_properties(hermes_std PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(hermes_std PUBLIC include)
+target_link_libraries(hermes_std PUBLIC hermes)
+target_link_libraries(hermes_std PRIVATE stdex)
+target_link_libraries(hermes_std PRIVATE pepper_strcast)
+# Let's apply nncc common compile options
+#
+# NOTE This will enable strict compilation (warnings as error).
+# Please refer to the top-level CMakeLists.txt for details
+target_link_libraries(hermes_std PRIVATE nncc_common)
+target_link_libraries(hermes_std PUBLIC nncc_coverage)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Google Test is mandatory for internal testing
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(hermes_std_test ${TESTS})
+target_link_libraries(hermes_std_test stdex)
+target_link_libraries(hermes_std_test hermes_std)
diff --git a/compiler/hermes-std/README.md b/compiler/hermes-std/README.md
new file mode 100644
index 000000000..f5f4b860f
--- /dev/null
+++ b/compiler/hermes-std/README.md
@@ -0,0 +1,3 @@
+# hermes-std
+
+_hermes-std_ is a collection of **primitive** _hermes_ extensions.
diff --git a/compiler/hermes-std/include/hermes/ConsoleReporter.h b/compiler/hermes-std/include/hermes/ConsoleReporter.h
new file mode 100644
index 000000000..e09dd5785
--- /dev/null
+++ b/compiler/hermes-std/include/hermes/ConsoleReporter.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __HERMES_STD_CONSOLE_REPORTER_H__
+#define __HERMES_STD_CONSOLE_REPORTER_H__
+
+#include <hermes.h>
+
+namespace hermes
+{
+
+/**
+ * @brief Print messages into standard console
+ */
+struct ConsoleReporter final : public hermes::Sink
+{
+ void notify(const Message *m) final;
+};
+
+} // namespace hermes
+
+#endif // __HERMES_STD_CONSOLE_REPORTER_H__
diff --git a/compiler/hermes-std/include/hermes/EnvConfig.h b/compiler/hermes-std/include/hermes/EnvConfig.h
new file mode 100644
index 000000000..e4c392fd6
--- /dev/null
+++ b/compiler/hermes-std/include/hermes/EnvConfig.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __HERMES_STD_ENV_CONFIG_H__
+#define __HERMES_STD_ENV_CONFIG_H__
+
+#include <hermes.h>
+
+#include <string>
+
+namespace hermes
+{
+
+using EnvName = std::string;
+
+enum class EnvFormat
+{
+ // Non-zero -> Enable
+ // Zero -> Diable
+ BooleanNumber,
+};
+
+template <EnvFormat Format> class EnvConfig;
+
+template <> class EnvConfig<EnvFormat::BooleanNumber> : public Config
+{
+public:
+ EnvConfig(const EnvName &name);
+
+public:
+ virtual ~EnvConfig() = default;
+
+public:
+ void configure(const Source *, SourceSetting &) const final;
+
+private:
+ bool _enabled = false;
+};
+
+} // namespace hermes
+
+#endif // __HERMES_STD_ENV_CONFIG_H__
diff --git a/compiler/hermes-std/requires.cmake b/compiler/hermes-std/requires.cmake
new file mode 100644
index 000000000..4aa6b1528
--- /dev/null
+++ b/compiler/hermes-std/requires.cmake
@@ -0,0 +1 @@
+require("pepper-strcast")
diff --git a/compiler/hermes-std/src/ConsoleReporter.cpp b/compiler/hermes-std/src/ConsoleReporter.cpp
new file mode 100644
index 000000000..3cc9f09ed
--- /dev/null
+++ b/compiler/hermes-std/src/ConsoleReporter.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hermes/ConsoleReporter.h"
+
+#include <iostream>
+
+namespace hermes
+{
+
+void ConsoleReporter::notify(const hermes::Message *m)
+{
+ for (uint32_t n = 0; n < m->text()->lines(); ++n)
+ {
+ std::cout << m->text()->line(n) << std::endl;
+ }
+}
+
+} // namespace hermes
diff --git a/compiler/hermes-std/src/ConsoleReporter.test.cpp b/compiler/hermes-std/src/ConsoleReporter.test.cpp
new file mode 100644
index 000000000..c2e1f1c85
--- /dev/null
+++ b/compiler/hermes-std/src/ConsoleReporter.test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hermes/ConsoleReporter.h"
+
+#include <stdex/Memory.h>
+
+#include <sstream>
+
+#include <gtest/gtest.h>
+
+TEST(ConsoleReporterTest, constructor)
+{
+ hermes::ConsoleReporter r;
+
+ SUCCEED();
+}
+
+TEST(ConsoleReporterTest, notify)
+{
+ hermes::Message m;
+ {
+ std::stringstream ss;
+
+ ss << "Hello" << std::endl;
+
+ m.text(stdex::make_unique<hermes::MessageText>(ss));
+ }
+
+ hermes::ConsoleReporter r;
+
+ ASSERT_NO_THROW(r.notify(&m));
+}
diff --git a/compiler/hermes-std/src/EnvConfig.cpp b/compiler/hermes-std/src/EnvConfig.cpp
new file mode 100644
index 000000000..e8f7fcda4
--- /dev/null
+++ b/compiler/hermes-std/src/EnvConfig.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hermes/EnvConfig.h"
+
+#include <pepper/strcast.h>
+
+namespace hermes
+{
+
+EnvConfig<EnvFormat::BooleanNumber>::EnvConfig(const EnvName &name)
+{
+ auto s = std::getenv(name.c_str());
+ _enabled = (pepper::safe_strcast<int>(s, 0 /* DISABLE BY DEFAULT */) != 0);
+}
+
+void EnvConfig<EnvFormat::BooleanNumber>::configure(const Source *, SourceSetting &setting) const
+{
+ if (_enabled)
+ {
+ // Enable all the sources
+ setting.accept_all();
+ }
+ else
+ {
+ // Disable all the sources
+ setting.reject_all();
+ }
+}
+
+} // namespace hermes
diff --git a/compiler/hermes/CMakeLists.txt b/compiler/hermes/CMakeLists.txt
new file mode 100644
index 000000000..5debfbca0
--- /dev/null
+++ b/compiler/hermes/CMakeLists.txt
@@ -0,0 +1,28 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(hermes STATIC ${SOURCES})
+set_target_properties(hermes PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(hermes PUBLIC include)
+target_link_libraries(hermes PRIVATE stdex)
+# Let's apply nncc common compile options
+#
+# NOTE This will enable strict compilation (warnings as error).
+# Please refer to the top-level CMakeLists.txt for details
+target_link_libraries(hermes PRIVATE nncc_common)
+target_link_libraries(hermes PUBLIC nncc_coverage)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Google Test is mandatory for internal testing
+nnas_find_package(GTest REQUIRED)
+
+add_executable(hermes_test ${TESTS})
+target_link_libraries(hermes_test gtest_main)
+target_link_libraries(hermes_test stdex)
+target_link_libraries(hermes_test hermes)
+
+add_test(hermes_test hermes_test)
diff --git a/compiler/hermes/README.md b/compiler/hermes/README.md
new file mode 100644
index 000000000..c896abf6c
--- /dev/null
+++ b/compiler/hermes/README.md
@@ -0,0 +1,3 @@
+# hermes
+
+An **extensible** logging framework
diff --git a/compiler/hermes/include/hermes.h b/compiler/hermes/include/hermes.h
new file mode 100644
index 000000000..13202e621
--- /dev/null
+++ b/compiler/hermes/include/hermes.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __HERMES_H__
+#define __HERMES_H__
+
+#include "hermes/core/Severity.h"
+#include "hermes/core/Message.h"
+#include "hermes/core/Context.h"
+// TO BE FILLED
+
+#endif // __HERMES_H__
diff --git a/compiler/hermes/include/hermes/core/Config.h b/compiler/hermes/include/hermes/core/Config.h
new file mode 100644
index 000000000..d937a36b8
--- /dev/null
+++ b/compiler/hermes/include/hermes/core/Config.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __HERMES_CONFIG_H__
+#define __HERMES_CONFIG_H__
+
+#include "hermes/core/Severity.h" // TODO Put this into SourceSetting.h
+#include "hermes/core/SourceSetting.h"
+
+namespace hermes
+{
+
+// TODO Introduce Source.forward.h
+class Source;
+
+/**
+ * @brief Top-level configuration interface
+ *
+ * All Hermes configurations SHOULD inherit this interface.
+ */
+struct Config
+{
+ virtual ~Config() = default;
+
+ virtual void configure(const Source *, SourceSetting &) const = 0;
+};
+
+} // namespace hermes
+
+#endif // __HERMES_CONFIG_H__
diff --git a/compiler/hermes/include/hermes/core/Context.h b/compiler/hermes/include/hermes/core/Context.h
new file mode 100644
index 000000000..4054587a4
--- /dev/null
+++ b/compiler/hermes/include/hermes/core/Context.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __HERMES_CONTEXT_H__
+#define __HERMES_CONTEXT_H__
+
+#include "hermes/core/Config.h"
+#include "hermes/core/Source.h"
+#include "hermes/core/Sink.h"
+#include "hermes/core/MessageBus.h"
+
+#include <memory>
+#include <set>
+
+namespace hermes
+{
+
+/**
+ * @brief Logging controller
+ *
+ * This "Context" serves as a controller for associated logging source/sink.
+ *
+ * WARNING This "Context" is not yet thread-safe.
+ * TODO Support multi-threaded application logging
+ */
+class Context final : private MessageBus, private Source::Registry, private Sink::Registry
+{
+public:
+ /// @brief Get the global configuration
+ const Config *config(void) const;
+ /// @brief Update the global configuration
+ void config(std::unique_ptr<Config> &&);
+
+public:
+ MessageBus *bus(void) { return this; }
+
+private:
+ /// This implements "post" method that MessageBus interface requires.
+ void post(std::unique_ptr<Message> &&msg) override;
+
+public:
+ Source::Registry *sources(void) { return this; }
+
+private:
+ /// This implements "attach" method that "Source::Registry" interface requires.
+ void attach(Source *source) override;
+ /// This implements "detach" method that "Source::Registry" interface requires.
+ void detach(Source *source) override;
+
+public:
+ Sink::Registry *sinks(void) { return this; }
+
+private:
+ /// This implements "append" method that "Sink::Registry" interface requires.
+ void append(std::unique_ptr<Sink> &&sink) override;
+
+private:
+ std::unique_ptr<Config> _config;
+ std::set<Source *> _sources;
+ std::set<std::unique_ptr<Sink>> _sinks;
+};
+
+} // namespace hermes
+
+#endif // __HERMES_CONTEXT_H__
diff --git a/compiler/hermes/include/hermes/core/Message.h b/compiler/hermes/include/hermes/core/Message.h
new file mode 100644
index 000000000..28cfd7942
--- /dev/null
+++ b/compiler/hermes/include/hermes/core/Message.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __HERMES_MESSAGE_H__
+#define __HERMES_MESSAGE_H__
+
+#include <memory>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace hermes
+{
+
+/**
+ * @brief Mutie-line text message
+ */
+class MessageText
+{
+public:
+ /// WARNING! Be careful. This constructor updates "ss".
+ MessageText(std::stringstream &ss);
+
+public:
+ /// @brief The number of lines
+ uint32_t lines(void) const { return _lines.size(); }
+ /// @breif The content of a specific line
+ const std::string &line(uint32_t n) const { return _lines.at(n); }
+
+private:
+ std::vector<std::string> _lines;
+};
+
+/**
+ * @brief Message with metadata
+ *
+ * TODO Add "Timestamp" field
+ * TODO Add "Severity" field
+ * TODO Support extensible "attribute" annotation
+ */
+class Message final
+{
+public:
+ Message() = default;
+
+public:
+ void text(std::unique_ptr<MessageText> &&text) { _text = std::move(text); }
+ const MessageText *text(void) const { return _text.get(); }
+
+private:
+ std::unique_ptr<MessageText> _text;
+};
+
+} // namespace hermes
+
+#endif // __HERMES_MESSAGE_H__
diff --git a/compiler/hermes/include/hermes/core/MessageBuffer.h b/compiler/hermes/include/hermes/core/MessageBuffer.h
new file mode 100644
index 000000000..a2f1de74d
--- /dev/null
+++ b/compiler/hermes/include/hermes/core/MessageBuffer.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __HERMES_MESSAGE_BUFFER_H__
+#define __HERMES_MESSAGE_BUFFER_H__
+
+#include "hermes/core/MessageBus.h"
+
+#include <ostream>
+#include <sstream>
+
+namespace hermes
+{
+
+/**
+ * @brief A buffer for a message under construction
+ *
+ * MessageBuffer will post the buffered message on destruction.
+ */
+class MessageBuffer final
+{
+public:
+ MessageBuffer(MessageBus *);
+ ~MessageBuffer();
+
+public:
+ std::ostream &os(void) { return _ss; }
+
+private:
+ MessageBus *_bus;
+
+ /// @brief Content buffer
+ std::stringstream _ss;
+};
+
+} // namespace hermes
+
+#endif // __HERMES_MESSAGE_BUFFER_H__
diff --git a/compiler/hermes/include/hermes/core/MessageBus.h b/compiler/hermes/include/hermes/core/MessageBus.h
new file mode 100644
index 000000000..4ec272352
--- /dev/null
+++ b/compiler/hermes/include/hermes/core/MessageBus.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __HERMES_MESSAGE_BUS_H__
+#define __HERMES_MESSAGE_BUS_H__
+
+#include "hermes/core/Message.h"
+
+#include <memory>
+
+namespace hermes
+{
+
+/**
+ * @brief A bridge between Source and Sink
+ */
+struct MessageBus
+{
+ virtual ~MessageBus() = default;
+
+ // "post" takes the ownership of posted messages.
+ virtual void post(std::unique_ptr<Message> &&msg) = 0;
+};
+
+} // namespace hermes
+
+#endif // __HERMES_MESSAGE_BUS_H__
diff --git a/compiler/hermes/include/hermes/core/Severity.h b/compiler/hermes/include/hermes/core/Severity.h
new file mode 100644
index 000000000..25de35d80
--- /dev/null
+++ b/compiler/hermes/include/hermes/core/Severity.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __HERMES_SEVERITY_H__
+#define __HERMES_SEVERITY_H__
+
+#include <cstdint>
+
+namespace hermes
+{
+
+/**
+ * FATAL > ERROR > WARN > INFO > VERBOSE
+ *
+ * Hermes deliberately declares SeverityCategory as "enum" (instead of "enum class")
+ * in order to reduce namespace nesting.
+ */
+enum SeverityCategory : uint16_t
+{
+ FATAL = 0,
+ ERROR = 1,
+ WARN = 2,
+ INFO = 3,
+ VERBOSE = 4,
+};
+
+class Severity final
+{
+public:
+ friend Severity fatal(void);
+ friend Severity error(void);
+ friend Severity warn(void);
+ friend Severity info(void);
+ friend Severity verbose(uint16_t level);
+
+private:
+ /**
+ * Use below "factory" helpers.
+ */
+ Severity(SeverityCategory cat, uint16_t lvl) : _cat{cat}, _lvl{lvl}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const SeverityCategory &category(void) const { return _cat; }
+
+ /**
+ * @brief Verbose level
+ *
+ * "level" is fixed as 0 for all the categories except VERBOSE.
+ *
+ * 0 (most significant) <--- level ---> 65535 (least significant)
+ */
+ const uint16_t &level(void) const { return _lvl; }
+
+private:
+ SeverityCategory _cat;
+ uint16_t _lvl;
+};
+
+inline Severity fatal(void) { return Severity{FATAL, 0}; }
+inline Severity error(void) { return Severity{ERROR, 0}; }
+inline Severity warn(void) { return Severity{WARN, 0}; }
+inline Severity info(void) { return Severity{INFO, 0}; }
+inline Severity verbose(uint16_t level) { return Severity{VERBOSE, level}; }
+
+} // namespace hermes
+
+#endif // __HERMES_SEVERITY_H__
diff --git a/compiler/hermes/include/hermes/core/Sink.h b/compiler/hermes/include/hermes/core/Sink.h
new file mode 100644
index 000000000..f53aff9fc
--- /dev/null
+++ b/compiler/hermes/include/hermes/core/Sink.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __HERMES_SINK_H__
+#define __HERMES_SINK_H__
+
+#include "hermes/core/Message.h"
+
+#include <memory>
+
+namespace hermes
+{
+
+/**
+ * @brief Message consumer interface
+ *
+ * All message consumers should inherit this interface.
+ */
+struct Sink
+{
+ struct Registry
+ {
+ virtual ~Registry() = default;
+
+ // NOTE SinkRegistry takes the ownership of all the appended Sink objects
+ virtual void append(std::unique_ptr<Sink> &&) = 0;
+ };
+
+ virtual ~Sink() = default;
+
+ virtual void notify(const Message *) = 0;
+};
+
+} // namespace hermes
+
+#endif // __HERMES_SINK_H__
diff --git a/compiler/hermes/include/hermes/core/Source.h b/compiler/hermes/include/hermes/core/Source.h
new file mode 100644
index 000000000..b28532b2d
--- /dev/null
+++ b/compiler/hermes/include/hermes/core/Source.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __HERMES_SOURCE_H__
+#define __HERMES_SOURCE_H__
+
+#include "hermes/core/Config.h"
+#include "hermes/core/Severity.h"
+#include "hermes/core/MessageBus.h"
+#include "hermes/core/MessageBuffer.h"
+#include "hermes/core/SourceSetting.h"
+
+namespace hermes
+{
+
+/**
+ * @brief Message Source
+ *
+ * "Source" is the actual interface for users. "Source" accepts log messages from client.
+ */
+class Source
+{
+public:
+ struct Registry
+ {
+ virtual ~Registry() = default;
+
+ // NOTE Each "Source" SHOULD outlive "Registry"
+ virtual void attach(Source *) = 0;
+ virtual void detach(Source *) = 0;
+ };
+
+ // NOTE This using statement is introduced for backward compatibility
+ // TODO Remove this using declaration after migration
+ using Setting = SourceSetting;
+
+protected:
+ Source();
+ virtual ~Source();
+
+protected:
+ // Each "Source" implementation SHOULD invoke activate/deactivate appropriately
+ void activate(Registry *, MessageBus *);
+ void deactivate(void);
+
+protected:
+ Setting &setting(void) { return _setting; }
+
+public:
+ /**
+ * @brief Check whether a message with a given severity is acceptable or not
+ *
+ *
+ * NOTE This routine is performance critical as app always invokes this routine
+ * (even when logging is disabled).
+ */
+ inline bool check(const Severity &s) const
+ {
+ return static_cast<int32_t>(s.level()) < _setting.limit(s.category()).level();
+ }
+
+public:
+ /**
+ * @brief Update Source with a given configuration
+ *
+ * WARNING Do NOT invoke this manually.
+ *
+ * TODO Remove virtual after migration
+ */
+ virtual void reload(const Config *);
+
+public:
+ std::unique_ptr<MessageBuffer> buffer(const Severity &) const;
+
+private:
+ Setting _setting;
+
+private:
+ Registry *_reg = nullptr;
+ MessageBus *_bus = nullptr;
+};
+
+} // namespace hermes
+
+#define HERMES_FATAL(s) \
+ if ((s).check(::hermes::fatal())) \
+ (s).buffer(::hermes::fatal())->os()
+
+#define HERMES_ERROR(s) \
+ if ((s).check(::hermes::error())) \
+ (s).buffer(::hermes::error())->os()
+
+#define HERMES_WARN(s) \
+ if ((s).check(::hermes::warn())) \
+ (s).buffer(::hermes::warn())->os()
+
+#define HERMES_INFO(s) \
+ if ((s).check(::hermes::info())) \
+ (s).buffer(::hermes::info())->os()
+
+#define HERMES_VERBOSE(s, lv) \
+ if ((s).check(::hermes::verbose((lv)))) \
+ (s).buffer(::hermes::verbose((lv)))->os()
+
+#endif // __HERMES_SOURCE_H__
diff --git a/compiler/hermes/include/hermes/core/SourceSetting.h b/compiler/hermes/include/hermes/core/SourceSetting.h
new file mode 100644
index 000000000..3beaaa196
--- /dev/null
+++ b/compiler/hermes/include/hermes/core/SourceSetting.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __HERMES_SOURCE_SETTING_H__
+#define __HERMES_SOURCE_SETTING_H__
+
+#include <array>
+#include <cstdint>
+
+namespace hermes
+{
+
+class Filter final
+{
+public:
+ Filter(int32_t *ptr) : _ptr{ptr}
+ {
+ // DO NOTHING
+ }
+
+public:
+ inline void reject_all(void) { *_ptr = -1; }
+ inline void accept_upto(uint16_t lv) { *_ptr = static_cast<int32_t>(lv); }
+ inline void accept_all(void) { *_ptr = 65536; }
+
+private:
+ int32_t *_ptr;
+};
+
+class Limit final
+{
+public:
+ Limit(const int32_t *ptr) : _ptr{ptr}
+ {
+ // DO NOTHING
+ }
+
+public:
+ inline int32_t level(void) const { return *_ptr; }
+
+private:
+ const int32_t *_ptr;
+};
+
+class SourceSetting final
+{
+public:
+ SourceSetting()
+ {
+ // Reject all the messages by default
+ reject_all();
+ }
+
+public:
+ void reject_all(void)
+ {
+ filter(FATAL).reject_all();
+ filter(ERROR).reject_all();
+ filter(WARN).reject_all();
+ filter(INFO).reject_all();
+ filter(VERBOSE).reject_all();
+ }
+
+ void accept_all(void)
+ {
+ filter(FATAL).accept_all();
+ filter(ERROR).accept_all();
+ filter(WARN).accept_all();
+ filter(INFO).accept_all();
+ filter(VERBOSE).accept_all();
+ }
+
+ inline Filter filter(const SeverityCategory &cat)
+ {
+ return _ulimits.data() + static_cast<uint32_t>(cat);
+ }
+
+ inline Limit limit(const SeverityCategory &cat) const
+ {
+ return _ulimits.data() + static_cast<uint32_t>(cat);
+ }
+
+private:
+ /**
+ * @brief Allowed message level for each category
+ *
+ * This source will accept all the messages whose level belongs to [0, ulimit)
+ * where ulimit corresdpons to "limit(cat).value()"
+ */
+ std::array<int32_t, 5> _ulimits;
+};
+
+} // namespace hermes
+
+#endif // __HERMES_SOURCE_SETTING_H__
diff --git a/compiler/hermes/requires.cmake b/compiler/hermes/requires.cmake
new file mode 100644
index 000000000..a4855289c
--- /dev/null
+++ b/compiler/hermes/requires.cmake
@@ -0,0 +1 @@
+require("stdex")
diff --git a/compiler/hermes/src/core/Context.cpp b/compiler/hermes/src/core/Context.cpp
new file mode 100644
index 000000000..a6970f093
--- /dev/null
+++ b/compiler/hermes/src/core/Context.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hermes/core/Context.h"
+
+#include <cassert>
+
+namespace hermes
+{
+
+const Config *Context::config(void) const
+{
+ // Return the current configuration
+ return _config.get();
+}
+
+void Context::config(std::unique_ptr<Config> &&config)
+{
+ _config = std::move(config);
+
+ // Apply updated configurations
+ for (auto source : _sources)
+ {
+ source->reload(_config.get());
+ }
+}
+
+void Context::post(std::unique_ptr<Message> &&msg)
+{
+ // Validate message
+ assert((msg != nullptr) && "invalid message");
+ assert((msg->text() != nullptr) && "missing text");
+
+ // Take the ownership of a given message
+ auto m = std::move(msg);
+
+ // Notify appended sinks
+ for (const auto &sink : _sinks)
+ {
+ sink->notify(m.get());
+ }
+
+ // TODO Stop the process if "FATAL" message is posted
+}
+
+void Context::attach(Source *source)
+{
+ // Configure source first
+ source->reload(config());
+ // Insert source
+ _sources.insert(source);
+}
+
+void Context::detach(Source *source)
+{
+ // Remove source
+ _sources.erase(source);
+}
+
+void Context::append(std::unique_ptr<Sink> &&sink)
+{
+ // Append sink
+ _sinks.insert(std::move(sink));
+}
+
+} // namespace hermes
diff --git a/compiler/hermes/src/core/Context.test.cpp b/compiler/hermes/src/core/Context.test.cpp
new file mode 100644
index 000000000..0c8defd2a
--- /dev/null
+++ b/compiler/hermes/src/core/Context.test.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hermes/core/Context.h"
+
+#include <gtest/gtest.h>
+
+TEST(ContextTest, constructor)
+{
+ hermes::Context ctx;
+
+ ASSERT_NE(ctx.bus(), nullptr);
+ ASSERT_NE(ctx.sources(), nullptr);
+ ASSERT_NE(ctx.sinks(), nullptr);
+}
diff --git a/compiler/hermes/src/core/Message.cpp b/compiler/hermes/src/core/Message.cpp
new file mode 100644
index 000000000..63fe12b5f
--- /dev/null
+++ b/compiler/hermes/src/core/Message.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hermes/core/Message.h"
+
+#include <cassert>
+
+namespace hermes
+{
+
+MessageText::MessageText(std::stringstream &ss)
+{
+ while (!ss.eof())
+ {
+ assert(ss.good());
+
+ std::string line;
+ std::getline(ss, line);
+
+ // Trim the last empty line (by std::endl)
+ if (ss.eof() && line.empty())
+ {
+ break;
+ }
+
+ _lines.emplace_back(line);
+ }
+}
+
+} // namespace hermes
diff --git a/compiler/hermes/src/core/Message.test.cpp b/compiler/hermes/src/core/Message.test.cpp
new file mode 100644
index 000000000..1db88c711
--- /dev/null
+++ b/compiler/hermes/src/core/Message.test.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hermes/core/Message.h"
+
+#include <gtest/gtest.h>
+
+TEST(MessageTextTest, multiline)
+{
+ std::stringstream ss;
+
+ ss << "Hello, World" << std::endl;
+ ss << "Nice to meet you" << std::endl;
+
+ hermes::MessageText text{ss};
+
+ ASSERT_EQ(text.lines(), 2);
+ ASSERT_EQ(text.line(0), "Hello, World");
+ ASSERT_EQ(text.line(1), "Nice to meet you");
+}
+
+TEST(MessageTest, ctor)
+{
+ hermes::Message msg;
+
+ // Text is empty at the beginning
+ ASSERT_EQ(msg.text(), nullptr);
+}
diff --git a/compiler/hermes/src/core/MessageBuffer.cpp b/compiler/hermes/src/core/MessageBuffer.cpp
new file mode 100644
index 000000000..175a45d3f
--- /dev/null
+++ b/compiler/hermes/src/core/MessageBuffer.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hermes/core/MessageBuffer.h"
+
+#include <stdex/Memory.h>
+
+namespace hermes
+{
+
+MessageBuffer::MessageBuffer(MessageBus *bus) : _bus{bus}
+{
+ // DO NOTHING
+}
+
+MessageBuffer::~MessageBuffer()
+{
+ // NOTE The current implementation is unsafe as it may throw an excpetion.
+ // TODO Find a better safe implementation.
+ auto msg = stdex::make_unique<Message>();
+
+ msg->text(stdex::make_unique<MessageText>(_ss));
+
+ _bus->post(std::move(msg));
+}
+
+} // namespace hermes
diff --git a/compiler/hermes/src/core/MessageBuffer.test.cpp b/compiler/hermes/src/core/MessageBuffer.test.cpp
new file mode 100644
index 000000000..ff08eaa98
--- /dev/null
+++ b/compiler/hermes/src/core/MessageBuffer.test.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hermes/core/MessageBuffer.h"
+
+#include <cassert>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class MockMessageBus final : public hermes::MessageBus
+{
+public:
+ MockMessageBus() = default;
+
+public:
+ void post(std::unique_ptr<hermes::Message> &&msg) override
+ {
+ _count += 1;
+ _message = std::move(msg);
+ }
+
+public:
+ uint32_t count(void) const { return _count; }
+ const hermes::Message *message(void) const { return _message.get(); }
+
+private:
+ unsigned _count = 0;
+ std::unique_ptr<hermes::Message> _message = nullptr;
+};
+
+} // namespace
+
+TEST(MessageBufferTest, pass_constructed_message_on_descturction)
+{
+ MockMessageBus bus;
+
+ {
+ hermes::MessageBuffer buf{&bus};
+
+ buf.os() << "Hello" << std::endl;
+ buf.os() << "Nice to meet you" << std::endl;
+ }
+
+ ASSERT_EQ(bus.count(), 1);
+ ASSERT_NE(bus.message(), nullptr);
+ ASSERT_NE(bus.message()->text(), nullptr);
+ ASSERT_EQ(bus.message()->text()->lines(), 2);
+ ASSERT_EQ(bus.message()->text()->line(0), "Hello");
+ ASSERT_EQ(bus.message()->text()->line(1), "Nice to meet you");
+}
diff --git a/compiler/hermes/src/core/MessageBus.cpp b/compiler/hermes/src/core/MessageBus.cpp
new file mode 100644
index 000000000..05101089e
--- /dev/null
+++ b/compiler/hermes/src/core/MessageBus.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hermes/core/MessageBus.h"
+
+// NOTE This empty file validates "MessageBus.h"
diff --git a/compiler/hermes/src/core/Severity.test.cpp b/compiler/hermes/src/core/Severity.test.cpp
new file mode 100644
index 000000000..44fb800cb
--- /dev/null
+++ b/compiler/hermes/src/core/Severity.test.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hermes/core/Severity.h"
+
+#include <gtest/gtest.h>
+
+TEST(SeverityTest, fatal)
+{
+ auto severity = hermes::fatal();
+
+ ASSERT_EQ(severity.category(), hermes::FATAL);
+ ASSERT_EQ(severity.level(), 0);
+}
+
+TEST(SeverityTest, error)
+{
+ auto severity = hermes::error();
+
+ ASSERT_EQ(severity.category(), hermes::ERROR);
+ ASSERT_EQ(severity.level(), 0);
+}
+
+TEST(SeverityTest, warn)
+{
+ auto severity = hermes::warn();
+
+ ASSERT_EQ(severity.category(), hermes::WARN);
+ ASSERT_EQ(severity.level(), 0);
+}
+
+TEST(SeverityTest, info)
+{
+ auto severity = hermes::info();
+
+ ASSERT_EQ(severity.category(), hermes::INFO);
+ ASSERT_EQ(severity.level(), 0);
+}
+
+TEST(SeverityTest, verbose)
+{
+ auto severity = hermes::verbose(100);
+
+ ASSERT_EQ(severity.category(), hermes::VERBOSE);
+ ASSERT_EQ(severity.level(), 100);
+}
diff --git a/compiler/hermes/src/core/Sink.cpp b/compiler/hermes/src/core/Sink.cpp
new file mode 100644
index 000000000..1677073b1
--- /dev/null
+++ b/compiler/hermes/src/core/Sink.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hermes/core/Sink.h"
+
+// NOTE This empty file validates "Sink.h"
diff --git a/compiler/hermes/src/core/Source.cpp b/compiler/hermes/src/core/Source.cpp
new file mode 100644
index 000000000..33f8b0570
--- /dev/null
+++ b/compiler/hermes/src/core/Source.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hermes/core/Source.h"
+
+#include <stdex/Memory.h>
+
+#include <cassert>
+
+namespace hermes
+{
+
+Source::Source()
+{
+ assert(_reg == nullptr);
+ assert(_bus == nullptr);
+}
+
+Source::~Source()
+{
+ assert(_bus == nullptr);
+ assert(_reg == nullptr);
+}
+
+void Source::activate(Registry *reg, MessageBus *bus)
+{
+ assert((_reg == nullptr) && (_bus == nullptr));
+
+ _reg = reg;
+ _bus = bus;
+
+ _reg->attach(this);
+
+ assert((_bus != nullptr) && (_reg != nullptr));
+}
+
+void Source::deactivate(void)
+{
+ assert((_bus != nullptr) && (_reg != nullptr));
+
+ _reg->detach(this);
+
+ _bus = nullptr;
+ _reg = nullptr;
+
+ assert((_reg == nullptr) && (_bus == nullptr));
+}
+
+void Source::reload(const Config *c) { c->configure(this, _setting); }
+
+std::unique_ptr<MessageBuffer> Source::buffer(const Severity &) const
+{
+ // TODO Pass Severity
+ return stdex::make_unique<MessageBuffer>(_bus);
+}
+
+} // namespace hermes
diff --git a/compiler/hermes/src/core/Source.test.cpp b/compiler/hermes/src/core/Source.test.cpp
new file mode 100644
index 000000000..f98a64509
--- /dev/null
+++ b/compiler/hermes/src/core/Source.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hermes/core/Source.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+struct MockSourceRegistry final : public hermes::Source::Registry
+{
+ void attach(hermes::Source *) override { return; }
+ void detach(hermes::Source *) override { return; }
+};
+
+struct MockMessageBus final : public hermes::MessageBus
+{
+ void post(std::unique_ptr<hermes::Message> &&msg) override
+ {
+ msg.reset();
+ ++cnt;
+ }
+
+ uint32_t cnt = 0;
+};
+
+struct MockSource final : public hermes::Source
+{
+ MockSource(hermes::Source::Registry *r, hermes::MessageBus *b) { activate(r, b); }
+ ~MockSource() { deactivate(); }
+
+ void reload(const hermes::Config *) override { return; }
+
+ void enable(void) { setting().accept_all(); }
+};
+
+} // namespace
+
+TEST(SourceTest, construct)
+{
+ MockSourceRegistry registry;
+ MockMessageBus bus;
+
+ MockSource source{&registry, &bus};
+
+ // Source are off at the beginning
+ ASSERT_FALSE(source.check(::hermes::fatal()));
+ ASSERT_FALSE(source.check(::hermes::error()));
+ ASSERT_FALSE(source.check(::hermes::warn()));
+ ASSERT_FALSE(source.check(::hermes::info()));
+ ASSERT_FALSE(source.check(::hermes::verbose(100)));
+}
+
+TEST(SourceTest, macro)
+{
+ MockSourceRegistry registry;
+
+ MockMessageBus bus;
+
+ MockSource source{&registry, &bus};
+
+ source.enable();
+
+ uint32_t expected_count = 0;
+
+ // No message at the beginning
+ ASSERT_EQ(bus.cnt, 0);
+
+ HERMES_ERROR(source) << "A";
+ ASSERT_EQ(bus.cnt, ++expected_count);
+
+ HERMES_WARN(source) << "A";
+ ASSERT_EQ(bus.cnt, ++expected_count);
+
+ HERMES_INFO(source) << "A";
+ ASSERT_EQ(bus.cnt, ++expected_count);
+
+ HERMES_VERBOSE(source, 100) << "A";
+ ASSERT_EQ(bus.cnt, ++expected_count);
+
+// FATAL message should terminate the execution. Let's check how to check this!
+// TODO Enable FATAL feature and enable this test
+#if 0
+ HERMES_FATAL(source) << "A";
+ ASSERT_EQ(bus.cnt, 1);
+#endif
+}
diff --git a/compiler/hermes/src/hermes.cpp b/compiler/hermes/src/hermes.cpp
new file mode 100644
index 000000000..048521a32
--- /dev/null
+++ b/compiler/hermes/src/hermes.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hermes.h"
+
+// NOTE This empty file validates "hermes.h"
diff --git a/compiler/hermes/src/hermes.test.cpp b/compiler/hermes/src/hermes.test.cpp
new file mode 100644
index 000000000..2cbc0939d
--- /dev/null
+++ b/compiler/hermes/src/hermes.test.cpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hermes.h"
+
+#include <gtest/gtest.h>
+
+TEST(HermesTest, simple_usecase)
+{
+ // TO BE FILLED
+}
diff --git a/compiler/i5diff/CMakeLists.txt b/compiler/i5diff/CMakeLists.txt
new file mode 100644
index 000000000..321ae49a0
--- /dev/null
+++ b/compiler/i5diff/CMakeLists.txt
@@ -0,0 +1,15 @@
+find_package(HDF5 COMPONENTS CXX QUIET)
+
+if(NOT HDF5_FOUND)
+ return()
+endif(NOT HDF5_FOUND)
+
+message(STATUS "Enable i5diff: TRUE")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(i5diff ${SOURCES})
+target_include_directories(i5diff PRIVATE ${HDF5_INCLUDE_DIRS})
+target_link_libraries(i5diff PRIVATE ${HDF5_CXX_LIBRARIES})
+target_link_libraries(i5diff PRIVATE angkor)
+target_link_libraries(i5diff PRIVATE safemain)
diff --git a/compiler/i5diff/README.md b/compiler/i5diff/README.md
new file mode 100644
index 000000000..35d81884a
--- /dev/null
+++ b/compiler/i5diff/README.md
@@ -0,0 +1,20 @@
+# i5diff
+
+_i5diff_ compares two HDF5 files that _nnkit_ HDF5 export action generates.
+
+**DISCLAIMER** _i5diff_ is not designed as a general diff tool.
+It works only for HDF5 files that _nnkit_ HDF5 export action generates.
+
+## Yet Another Diff?
+
+_i5diff_ is able to detect _shape mismatch_ that _h5diff_ cannot detect.
+
+To be precise, _h5diff_ is also able to detect _shape mismatch_.
+Unfortunately, however, _h5diff_ ends with 0 exitcode in the presence of _shape mismatch_, and thus
+it is impossible to use _h5diff_ for continuous integration.
+
+## How to use
+
+```
+$ /path/to/i5diff -d 0.001 /path/to/fst.h5 /path/to/snd.h5
+```
diff --git a/compiler/i5diff/requires.cmake b/compiler/i5diff/requires.cmake
new file mode 100644
index 000000000..a6222db76
--- /dev/null
+++ b/compiler/i5diff/requires.cmake
@@ -0,0 +1,2 @@
+require("angkor")
+require("safemain")
diff --git a/compiler/i5diff/src/entry.cpp b/compiler/i5diff/src/entry.cpp
new file mode 100644
index 000000000..456467f54
--- /dev/null
+++ b/compiler/i5diff/src/entry.cpp
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <H5Cpp.h>
+
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <iostream>
+#include <set>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+namespace
+{
+
+enum class ErrorCode
+{
+ CountMismatch,
+ TypeMismatch,
+ ShapeMismatch,
+ ValueMismatch,
+};
+
+template <ErrorCode EC> class ErrorDetail;
+
+// TODO Record the details
+template <> class ErrorDetail<ErrorCode::CountMismatch>
+{
+public:
+ ErrorDetail() = default;
+};
+
+// TODO Record the details
+template <> class ErrorDetail<ErrorCode::TypeMismatch>
+{
+public:
+ ErrorDetail() = default;
+};
+
+// TODO Record the details
+template <> class ErrorDetail<ErrorCode::ShapeMismatch>
+{
+public:
+ ErrorDetail() = default;
+};
+
+// TODO Record the details
+template <> class ErrorDetail<ErrorCode::ValueMismatch>
+{
+public:
+ ErrorDetail() = default;
+};
+
+struct Observer
+{
+ virtual ~Observer() = default;
+
+ virtual void notify(const ErrorDetail<ErrorCode::CountMismatch> &) = 0;
+ virtual void notify(const ErrorDetail<ErrorCode::TypeMismatch> &) = 0;
+ virtual void notify(const ErrorDetail<ErrorCode::ShapeMismatch> &) = 0;
+ virtual void notify(const ErrorDetail<ErrorCode::ValueMismatch> &) = 0;
+};
+
+class Mux final : public Observer
+{
+public:
+ Mux() = default;
+
+public:
+ void attach(Observer *o) { _observers.insert(o); }
+
+private:
+ template <ErrorCode EC> void notify_all(const ErrorDetail<EC> &e)
+ {
+ for (auto o : _observers)
+ {
+ o->notify(e);
+ }
+ }
+
+public:
+ void notify(const ErrorDetail<ErrorCode::CountMismatch> &e) final { notify_all(e); }
+ void notify(const ErrorDetail<ErrorCode::TypeMismatch> &e) final { notify_all(e); }
+ void notify(const ErrorDetail<ErrorCode::ShapeMismatch> &e) final { notify_all(e); }
+ void notify(const ErrorDetail<ErrorCode::ValueMismatch> &e) final { notify_all(e); }
+
+public:
+ std::set<Observer *> _observers;
+};
+
+class ExitcodeTracker final : public Observer
+{
+public:
+ const int &exitcode(void) const { return _exitcode; }
+
+public:
+ void notify(const ErrorDetail<ErrorCode::CountMismatch> &) { _exitcode = 1; }
+ void notify(const ErrorDetail<ErrorCode::TypeMismatch> &) { _exitcode = 1; }
+ void notify(const ErrorDetail<ErrorCode::ShapeMismatch> &) { _exitcode = 1; }
+ void notify(const ErrorDetail<ErrorCode::ValueMismatch> &) { _exitcode = 1; }
+
+public:
+ int _exitcode = 0;
+};
+
+} // namespace
+
+//
+// HDF5 helpers
+//
+namespace
+{
+
+enum class DataType
+{
+ UNKNOWN,
+ FLOAT32,
+ /* TO BE ADDED */
+};
+
+DataType to_internal_dtype(const H5::DataType &dtype)
+{
+ if (dtype == H5::PredType::IEEE_F32BE)
+ {
+ return DataType::FLOAT32;
+ }
+ return DataType::UNKNOWN;
+}
+
+using TensorShape = nncc::core::ADT::tensor::Shape;
+
+TensorShape to_internal_shape(const H5::DataSpace &dataspace)
+{
+ int rank = dataspace.getSimpleExtentNdims();
+
+ std::vector<hsize_t> dims;
+
+ dims.resize(rank, 0);
+
+ dataspace.getSimpleExtentDims(dims.data());
+
+ TensorShape res;
+
+ res.resize(rank);
+ for (int axis = 0; axis < rank; ++axis)
+ {
+ res.dim(axis) = dims[axis];
+ }
+
+ return res;
+}
+
+uint32_t element_count(const H5::DataSpace &dataspace)
+{
+ return nncc::core::ADT::tensor::num_elements(to_internal_shape(dataspace));
+}
+
+std::vector<float> as_float_vector(const H5::DataSet &dataset)
+{
+ std::vector<float> buffer;
+
+ buffer.resize(element_count(dataset.getSpace()));
+ dataset.read(buffer.data(), H5::PredType::NATIVE_FLOAT);
+
+ return buffer;
+}
+
+using LexicalLayout = nncc::core::ADT::tensor::LexicalLayout;
+using TensorIndexEnumerator = nncc::core::ADT::tensor::IndexEnumerator;
+
+} // namespace
+
+// TODO Report the details
+int entry(int argc, char **argv)
+{
+ // The current implementation works only for command-line of the following form:
+ //
+ // i5diff -d 0.001 /path/to/left.h5 /path/to/right.h5
+ //
+ // TODO Support more options
+ assert(argc == 5);
+ assert(std::string(argv[1]) == "-d");
+ assert(std::string(argv[2]) == "0.001");
+
+ H5::H5File lhs{argv[3], H5F_ACC_RDONLY};
+ H5::H5File rhs{argv[4], H5F_ACC_RDONLY};
+
+ ExitcodeTracker exitcode_tracker;
+
+ Mux mux;
+ mux.attach(&exitcode_tracker);
+
+ // Compare values
+ do
+ {
+ // NOTE The name of value group SHOULD BE aligned with nnkit HDF5 actions
+ const std::string value_grpname{"value"};
+
+ H5::Group lhs_value_grp = lhs.openGroup(value_grpname);
+ H5::Group rhs_value_grp = rhs.openGroup(value_grpname);
+
+ // Compare value count
+ int64_t value_count = -1;
+ {
+ uint32_t lhs_value_count = static_cast<uint32_t>(lhs_value_grp.getNumObjs());
+ uint32_t rhs_value_count = static_cast<uint32_t>(rhs_value_grp.getNumObjs());
+
+ if (lhs_value_count != rhs_value_count)
+ {
+ ErrorDetail<ErrorCode::CountMismatch> error{};
+ mux.notify(error);
+ break;
+ }
+
+ value_count = std::max<int64_t>(lhs_value_count, rhs_value_count);
+ }
+ assert(value_count >= 0);
+
+ // Compare each dataset
+ for (int64_t n = 0; n < value_count; ++n)
+ {
+ // NOTE The name of dataset SHOULD BE aligned with nnkit HDF5 actions
+ const std::string dataset_name = std::to_string(n);
+
+ auto lhs_dataset = lhs_value_grp.openDataSet(dataset_name);
+ auto rhs_dataset = rhs_value_grp.openDataSet(dataset_name);
+
+ auto lhs_dtype = to_internal_dtype(lhs_dataset.getDataType());
+ auto rhs_dtype = to_internal_dtype(rhs_dataset.getDataType());
+
+ // TODO Support other data types
+ assert(rhs_dtype == DataType::FLOAT32);
+ assert(lhs_dtype == DataType::FLOAT32);
+
+ if (lhs_dtype != rhs_dtype)
+ {
+ ErrorDetail<ErrorCode::TypeMismatch> error{};
+ mux.notify(error);
+ continue;
+ }
+
+ auto lhs_shape = to_internal_shape(lhs_dataset.getSpace());
+ auto rhs_shape = to_internal_shape(rhs_dataset.getSpace());
+
+ if (!(lhs_shape == rhs_shape))
+ {
+ ErrorDetail<ErrorCode::ShapeMismatch> error{};
+ mux.notify(error);
+ continue;
+ }
+
+ assert(lhs_shape == rhs_shape);
+ assert(lhs_dtype == rhs_dtype);
+ const auto &shape = lhs_shape;
+ const auto &dtype = lhs_dtype;
+
+ switch (dtype)
+ {
+ case DataType::FLOAT32:
+ {
+ auto lhs_vector = as_float_vector(lhs_dataset);
+ auto rhs_vector = as_float_vector(rhs_dataset);
+
+ assert(lhs_vector.size() == rhs_vector.size());
+
+ LexicalLayout layout;
+
+ for (TensorIndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ auto lhs_value = lhs_vector.at(layout.offset(shape, ind));
+ auto rhs_value = rhs_vector.at(layout.offset(shape, ind));
+
+ // TODO Abstract equality criterion
+ if (std::abs(lhs_value - rhs_value) >= 0.001f)
+ {
+ ErrorDetail<ErrorCode::ValueMismatch> error{};
+ mux.notify(error);
+ continue;
+ }
+ }
+
+ break;
+ }
+ default:
+ throw std::runtime_error{"Not supported, yet"};
+ };
+ }
+ } while (false);
+
+ // TODO Compare names (if requested)
+
+ return exitcode_tracker.exitcode();
+}
diff --git a/compiler/kuma/CMakeLists.txt b/compiler/kuma/CMakeLists.txt
new file mode 100644
index 000000000..e705bfedb
--- /dev/null
+++ b/compiler/kuma/CMakeLists.txt
@@ -0,0 +1,19 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(kuma STATIC ${SOURCES})
+set_target_properties(kuma PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(kuma PUBLIC include)
+target_link_libraries(kuma PRIVATE nncc_common)
+target_link_libraries(kuma PUBLIC nncc_coverage)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Google Test is mandatory for test
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(kuma_test ${TESTS})
+target_link_libraries(kuma_test kuma)
diff --git a/compiler/kuma/README.md b/compiler/kuma/README.md
new file mode 100644
index 000000000..7e5123968
--- /dev/null
+++ b/compiler/kuma/README.md
@@ -0,0 +1,7 @@
+# kuma
+
+_kuma_ is a collection of offline memory allocators.
+
+## What does "kuma" mean?
+
+_kuma_ originates from _cooma_ which is an abbreviation of **C**ollection **O**f **O**ffline **M**emory **A**lloators.
diff --git a/compiler/kuma/include/kuma.h b/compiler/kuma/include/kuma.h
new file mode 100644
index 000000000..a3d9a2e91
--- /dev/null
+++ b/compiler/kuma/include/kuma.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KUMA_H__
+#define __KUMA_H__
+
+#include <cstdint>
+#include <set>
+
+namespace kuma
+{
+
+// Supported algorithms
+enum Algorithm
+{
+ // No reuse
+ Greedy,
+ LinearScanFirstFit,
+};
+
+/**
+ * Each algorithm defines its own context. The context describes its in and out.
+ */
+template <Algorithm Alg> class Context;
+
+using ItemID = uint32_t;
+using ItemSize = uint32_t;
+
+using MemoryOffset = uint32_t;
+using MemorySize = uint32_t;
+
+//
+// Greedy Algorithm
+//
+template <> class Context<Algorithm::Greedy>
+{
+public:
+ virtual ~Context() = default;
+
+public: // Inputs
+ // count() returns the number of items to be allocated
+ virtual uint32_t item_count(void) const = 0;
+
+ // size(N) returns the size of the N-th item
+ virtual ItemSize item_size(const ItemID &) const = 0;
+
+public: // Outputs
+ virtual void mem_offset(const ItemID &, const MemoryOffset &) = 0;
+ virtual void mem_total(const MemorySize &) = 0;
+};
+
+void solve(Context<Greedy> *);
+
+//
+// Linear Scan First-Fit Algorithm
+//
+template <> class Context<Algorithm::LinearScanFirstFit>
+{
+public:
+ virtual ~Context() = default;
+
+public: // Inputs
+ // count() returns the number of items to be allocated
+ virtual uint32_t item_count(void) const = 0;
+
+ // size(N) returns the size of the N-th item
+ virtual ItemSize item_size(const ItemID &) const = 0;
+
+ // conflict_with(N) returns all the items that are in conflict with item N
+ // - An item N is said to be in conflict with item M if item M and N cannot have overlap
+ //
+ // NOTE
+ // - conflict_with(N) SHOULD NOT include N itself
+ virtual std::set<ItemID> conflict_with(const ItemID &) const = 0;
+
+public: // Outputs
+ virtual void mem_offset(const ItemID &, const MemoryOffset &) = 0;
+ virtual void mem_total(const MemorySize &) = 0;
+};
+
+void solve(Context<Algorithm::LinearScanFirstFit> *);
+
+} // namespace kuma
+
+#endif // __KUMA_H__
diff --git a/compiler/kuma/src/IntervalSet.cpp b/compiler/kuma/src/IntervalSet.cpp
new file mode 100644
index 000000000..f6790c654
--- /dev/null
+++ b/compiler/kuma/src/IntervalSet.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IntervalSet.h"
+
+#include <cassert>
+#include <stdexcept>
+
+namespace kuma
+{
+namespace details
+{
+
+IntervalSet::IntervalSet(uint32_t len)
+{
+ // Update _content
+ _content[len] = len;
+}
+
+void IntervalSet::insert(const IntervalMask &m)
+{
+ auto s = m.s;
+ auto e = m.e;
+
+ assert(s <= e);
+
+ if (s == e)
+ {
+ // Empty region, nothing to do
+ return;
+ }
+
+ // lower_bound() returns an iterator to the first element not less than the given key
+ auto lb = _content.lower_bound(s);
+
+ // NOTE 1. "lower_bound" ensures "prev_s < s <= curr_e"
+ // NOTE 2. "e" points to somewhere after "s"
+ auto curr_s = lb->first - lb->second;
+ auto curr_e = lb->first;
+
+ if (curr_s < s)
+ {
+ // Split the current interval
+ _content[s] = s - curr_s;
+ // NOTE The invariant over "_content" is temporarily broken here.
+ }
+
+ if (e < curr_e)
+ {
+ // Adjust the current interval
+ _content[curr_e] = curr_e - e;
+ }
+ else
+ {
+ // Remove the current interval
+ _content.erase(curr_e);
+ // Check the next interval (e > curr_e)
+ //
+ // TODO Remove this recursive call (to prevent stack overflow issue)
+ insert(mask(curr_e, e));
+ }
+}
+
+uint32_t IntervalSet::firstfit(uint32_t len) const
+{
+ for (auto it = _content.begin(); it != _content.end(); ++it)
+ {
+ if (it->second >= len)
+ {
+ // Got it! This interval is larger than "len".
+ return it->first - it->second;
+ }
+ }
+
+ throw std::runtime_error{"infeasible"};
+}
+
+} // namespace details
+} // namespace kuma
diff --git a/compiler/kuma/src/IntervalSet.h b/compiler/kuma/src/IntervalSet.h
new file mode 100644
index 000000000..3b6c5f666
--- /dev/null
+++ b/compiler/kuma/src/IntervalSet.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KUMA_DETAILS_LIVE_INTERVAL_SET_H__
+#define __KUMA_DETAILS_LIVE_INTERVAL_SET_H__
+
+#include <map>
+
+namespace kuma
+{
+namespace details
+{
+
+struct IntervalMask
+{
+ uint32_t s;
+ uint32_t e;
+};
+
+inline IntervalMask mask(uint32_t s, uint32_t e)
+{
+ IntervalMask mask;
+
+ mask.s = s;
+ mask.e = e;
+
+ return mask;
+}
+
+class IntervalSet
+{
+public:
+ // [0, len) is live at the beginning
+ IntervalSet(uint32_t len = 0xffffffff);
+
+public:
+ void insert(const IntervalMask &);
+
+ /**
+ * "firstfit(l)" returns the offset of an interval whose length is larger than "l".
+ *
+ * When multiple intervals meet this condition, "firstfit(l)" chooses the interval
+ * with the smallest offset as its name suggests.
+ *
+ * NOTE This method throws std::runtime_error if fails to find a proper region
+ */
+ uint32_t firstfit(uint32_t len) const;
+
+private:
+ using End = uint32_t;
+ using Len = uint32_t;
+
+ // If [e -> l] is in _content, it means that [e - l, e) is a valid interval.
+ //
+ // INVARIANT
+ //
+ // If key m and n (m <= n) are consecutive in _content, "m <= n - _content.at(n)" holds.
+ //
+ std::map<End, Len> _content;
+};
+
+} // namespace details
+} // namespace kuma
+
+#endif // __KUMA_DETAILS_LIVE_INTERVAL_SET_H__
diff --git a/compiler/kuma/src/IntervalSet.test.cpp b/compiler/kuma/src/IntervalSet.test.cpp
new file mode 100644
index 000000000..848ddee03
--- /dev/null
+++ b/compiler/kuma/src/IntervalSet.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IntervalSet.h"
+
+#include <gtest/gtest.h>
+
+using namespace kuma::details;
+
+TEST(IntervalSetTests, mask_and_firstfit)
+{
+ IntervalSet intervals;
+
+ // Exclude [0, 16) from available region
+ intervals.insert(mask(0, 16));
+
+ ASSERT_EQ(intervals.firstfit(4), 16);
+}
diff --git a/compiler/kuma/src/kuma.cpp b/compiler/kuma/src/kuma.cpp
new file mode 100644
index 000000000..6fad96b26
--- /dev/null
+++ b/compiler/kuma/src/kuma.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kuma.h"
+
+//
+// Greedy Allocation Algorithm
+//
+namespace kuma
+{
+
+void solve(Context<Algorithm::Greedy> *ctx)
+{
+ uint32_t next = 0;
+
+ for (uint32_t n = 0; n < ctx->item_count(); ++n)
+ {
+ ctx->mem_offset(n, next);
+ next += ctx->item_size(n);
+ }
+
+ ctx->mem_total(next);
+};
+
+} // namespace kuma
+
+//
+// Linear Scan First Fit Algorithm
+//
+#include "IntervalSet.h"
+
+namespace kuma
+{
+
+void solve(Context<Algorithm::LinearScanFirstFit> *ctx)
+{
+ using namespace kuma::details;
+
+ uint32_t upper_bound = 0;
+ std::map<ItemID, std::pair<uint32_t /* BEGIN */, uint32_t /* END */>> committed_items;
+
+ // Allocate items in linear order (from item 0, item 1, ...)
+ //
+ // The implementor of Context is responsible for item ordering.
+ for (uint32_t n = 0; n < ctx->item_count(); ++n)
+ {
+ IntervalSet intervals;
+
+ for (auto item_in_conflict : ctx->conflict_with(n))
+ {
+ auto it = committed_items.find(item_in_conflict);
+
+ // Skip if item_in_conflict is not committed yet
+ if (it == committed_items.end())
+ {
+ continue;
+ }
+
+ auto const alloc_s = it->second.first;
+ auto const alloc_e = it->second.second;
+ intervals.insert(mask(alloc_s, alloc_e));
+ }
+
+ uint32_t const item_size = ctx->item_size(n);
+ uint32_t const item_alloc_s = intervals.firstfit(item_size);
+ uint32_t const item_alloc_e = item_alloc_s + item_size;
+
+ // Notify "mem_offset"
+ ctx->mem_offset(n, item_alloc_s);
+
+ // Update "upper bound" and commit allocation
+ upper_bound = std::max(upper_bound, item_alloc_e);
+ committed_items[n] = std::make_pair(item_alloc_s, item_alloc_e);
+ }
+
+ // Notify "mem_total"
+ ctx->mem_total(upper_bound);
+}
+
+} // namespace kuma
diff --git a/compiler/kuma/src/kuma.test.cpp b/compiler/kuma/src/kuma.test.cpp
new file mode 100644
index 000000000..5d947ea6b
--- /dev/null
+++ b/compiler/kuma/src/kuma.test.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kuma.h"
+
+#include <gtest/gtest.h>
+
+using namespace kuma;
+
+TEST(GreedyAlgorithmTests, empty)
+{
+ struct ContextImpl : public Context<Algorithm::Greedy>
+ {
+ uint32_t item_count(void) const final { return 0; }
+ ItemSize item_size(const ItemID &) const final { throw std::runtime_error{"error"}; }
+
+ void mem_offset(const ItemID &, const MemoryOffset &) { throw std::runtime_error{"error"}; };
+ void mem_total(const MemorySize &total) final { _total = total; }
+
+ uint32_t _total = 0xffffffff;
+ };
+
+ ContextImpl ctx;
+
+ solve(&ctx);
+
+ ASSERT_EQ(ctx._total, 0);
+}
+
+TEST(LinearScanFirstFitTests, reuse)
+{
+ struct ContextImpl : public Context<Algorithm::LinearScanFirstFit>
+ {
+ uint32_t item_count(void) const final { return 3; }
+ ItemSize item_size(const ItemID &) const final { return 4; }
+
+ std::set<ItemID> conflict_with(const ItemID &id) const
+ {
+ // 0 <-> 1 <-> 2
+ switch (id)
+ {
+ case 0:
+ return std::set<ItemID>({1});
+ case 1:
+ return std::set<ItemID>({0, 2});
+ case 2:
+ return std::set<ItemID>({1});
+ default:
+ break;
+ };
+
+ throw std::runtime_error{"Invalid"};
+ }
+
+ void mem_offset(const ItemID &id, const MemoryOffset &offset) { _offsets[id] = offset; };
+ void mem_total(const MemorySize &total) final { _total = total; }
+
+ uint32_t _offsets[3];
+ uint32_t _total = 0xffffffff;
+ };
+
+ ContextImpl ctx;
+
+ solve(&ctx);
+
+ // EXPECTED MEMORY LAYOUT:
+ // ------------------ 0
+ // | ITEM 0, ITEM 2 |
+ // ------------------ 4
+ // | ITEM 1 |
+ // ------------------ 8
+ ASSERT_EQ(ctx._total, 8);
+ ASSERT_EQ(ctx._offsets[0], 0);
+ ASSERT_EQ(ctx._offsets[1], 4);
+ ASSERT_EQ(ctx._offsets[2], 0);
+}
diff --git a/compiler/loco/CMakeLists.txt b/compiler/loco/CMakeLists.txt
new file mode 100644
index 000000000..f94052840
--- /dev/null
+++ b/compiler/loco/CMakeLists.txt
@@ -0,0 +1,28 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(loco SHARED ${SOURCES})
+target_include_directories(loco PUBLIC include)
+# TODO Remove dependencies on angkor library
+target_link_libraries(loco PUBLIC angkor)
+target_link_libraries(loco PRIVATE stdex)
+# Let's apply nncc common compile options
+#
+# NOTE This will enable strict compilation (warnings as error).
+# Please refer to the top-level CMakeLists.txt for details
+target_link_libraries(loco PRIVATE nncc_common)
+target_link_libraries(loco PUBLIC nncc_coverage)
+# Q. HOW TO MAKE DEV PACKAGE(?)
+install(TARGETS loco DESTINATION lib)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Google Test is mandatory for internal testing
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(loco_test ${TESTS})
+target_link_libraries(loco_test stdex)
+target_link_libraries(loco_test loco)
diff --git a/compiler/loco/README.md b/compiler/loco/README.md
new file mode 100644
index 000000000..a388d8e48
--- /dev/null
+++ b/compiler/loco/README.md
@@ -0,0 +1,3 @@
+# loco
+
+_loco_ is a graph-based intermediate representation (IR) for neural network compilers.
diff --git a/compiler/loco/doc/LEP_000_Dialect_Service.md b/compiler/loco/doc/LEP_000_Dialect_Service.md
new file mode 100644
index 000000000..f6f6dc809
--- /dev/null
+++ b/compiler/loco/doc/LEP_000_Dialect_Service.md
@@ -0,0 +1,116 @@
+# Dialect Service
+
+This loco enhancement proposal (_LEP_) discusses how to permit a _loco_ graph without canonical dialect.
+
+## Revision
+
+| Date | Status |
+| --- | --- |
+| 2019/09/03 | Proposed |
+
+## Motivation
+
+One of key design principles behind _loco_ is to allow users (= NN compiler writers) to easily define their own intermediate representation (IR) on top of shared infrastructure.
+
+Unfortunately, however, there is a gap between dream and reality.
+It is currently impossible to create a _loco_ graph only with non-canonical dialects;
+there is no way to express the interaction between graph-level output without _canonical.Push_ node.
+
+This proposal aims to remove this restriction in order to bridge the gap between dream and reality.
+
+## Design
+
+Each dialect is now allowed to expose its internal to its client (such as transformations and core algorithms) through a so-called "Service" interface.
+
+Although this proposal focuses on ``output_nodes`` helper in _loco.core_, its coverage is not limited to this helper.
+Any pass and algorithm can take an advantage of this generic infrastructure.
+
+Let us dive into some details.
+
+### What is "service"?
+
+A service declares a collection of APIs that each **client** (not dialect) needs.
+
+Let us consider ``output_nodes``. ``output_nodes`` needs to check whether a node is associated with any graph-level output.
+
+Here is one possible service design that satisfies this need.
+```cxx
+virtual bool associated(const Node *node) const = 0;
+virtual GraphOutputIndex index(const Node *node) const = 0;
+```
+
+### How to declare a service
+
+All of these service interfaces should inherit ``loco::DialectService`` interface that _loco.core_ defines.
+```cxx
+struct DialectService
+{
+ virtual ~DialectService() = default;
+};
+```
+
+For example, it is possible to declare the service that ``output_nodes`` needs as follows:
+```cxx
+struct GraphOutputIndexQueryService : public DialectService
+{
+ virtual ~GraphOutputIndexQueryService() = default;
+
+ virtual bool associated(const Node *node) const = 0;
+ virtual GraphOutputIndex index(const Node *node) const = 0;
+};
+```
+
+### How to access a service
+
+This proposal extends ``Dialect`` class with ``service`` method.
+
+Each dialect SHOULD return a valid pointer on ``service<Service>`` method call if it implements that service. Otherwise, it SHOULD return a null pointer otherwise.
+
+**WARNING** It is impossible to use ``get``. ``get`` is currently reserved for singleton accessor.
+
+Given a ``GraphOutputIndexQueryService``, it is possible to revise ``output_nodes`` as follows:
+```cxx
+std::vector<loco::Node *> output_nodes(loco::Graph *g)
+{
+ std::map<GraphOutputIndex, loco::Node *> table;
+
+ for (uint32_t n = 0; n < g->nodes()->size(); ++n)
+ {
+ auto node = g->nodes()->at(n);
+
+ if (auto service = node->dialect()->service<GraphOutputIndexQueryService>())
+ {
+ if (service->associated(node))
+ {
+ auto output_index = service->index(node);
+ assert(table.find(output_index) == table.end());
+ table[output_index] = node;
+ }
+ }
+ }
+
+ std::vector<loco::Node *> res;
+
+ for (uint32_t n = 0; n < g->outputs()->size(); ++n)
+ {
+ auto it = table.find(n);
+ // NOTE This behavior originates from the current implementation of output_nodes
+ res.emplace_back(it == table.end() ? nullptr : it->second);
+ }
+
+ return res;
+}
+```
+
+**PLEASE NOTE THAT** ``output_nodes`` now works with all the dialects that implement ``GraphOutputIndexQueryService``.
+
+### How to register a service
+
+Each dialect should invoke protected ``service`` method during its construction.
+```cxx
+AwesomeDialect::AwesomeDialect()
+{
+ std::unique_ptr<Impl> impl = ...;
+ service<GraphOutputIndexQueryService>(std::move(impl));
+}
+```
diff --git a/compiler/loco/include/loco.h b/compiler/loco/include/loco.h
new file mode 100644
index 000000000..5cc4487ea
--- /dev/null
+++ b/compiler/loco/include/loco.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_H__
+#define __LOCO_H__
+
+#include "loco/IR/Graph.h"
+#include "loco/IR/Algorithm.h"
+#include "loco/IR/Verifier.h"
+
+#include "loco/IR/PermutingCodec.h"
+
+#endif // __LOCO_H__
diff --git a/compiler/loco/include/loco/ADT/AnnotatedItem.h b/compiler/loco/include/loco/ADT/AnnotatedItem.h
new file mode 100644
index 000000000..be0d9ac1d
--- /dev/null
+++ b/compiler/loco/include/loco/ADT/AnnotatedItem.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_ADT_ANNOTATED_ITEM_H__
+#define __LOCO_ADT_ANNOTATED_ITEM_H__
+
+#include <map>
+#include <memory>
+#include <typeindex>
+
+namespace loco
+{
+
+template <typename Annotation> class AnnotatedItem
+{
+public:
+ AnnotatedItem() = default;
+
+public:
+ virtual ~AnnotatedItem() = default;
+
+public:
+ /**
+ * @brief Retrieve a stored annotation of type T
+ *
+ * @note This method returns nullptr if annotation does not exist
+ */
+ template <typename T> const T *annot(void) const
+ {
+ // TODO Insert static_assert(T derives Annotation);
+
+ auto it = _attrs.find(typeid(T));
+
+ if (it == _attrs.end())
+ {
+ return nullptr;
+ }
+
+ // TODO Insert null check
+ return dynamic_cast<T *>(it->second.get());
+ }
+
+ /**
+ * @brief Attach or remove a new annotation of type T
+ *
+ * @note annot<T>(nullptr) removes an attached annotation if it exists
+ */
+ template <typename T> void annot(std::unique_ptr<T> &&p)
+ {
+ // TODO: Insert static_assert(T derives Annotation);
+
+ if (p == nullptr)
+ {
+ _attrs.erase(typeid(T));
+ }
+ else
+ {
+ // TODO: assert(_attribs.find(typeid(T)) == _attribs.end());
+ _attrs[typeid(T)] = std::move(p);
+ }
+ }
+
+private:
+ std::map<std::type_index, std::unique_ptr<Annotation>> _attrs;
+};
+
+} // namespace loco
+
+#endif // __LOCO_ADT_ANNOTATED_ITEM_H__
diff --git a/compiler/loco/include/loco/ADT/ObjectPool.h b/compiler/loco/include/loco/ADT/ObjectPool.h
new file mode 100644
index 000000000..3f3a25c16
--- /dev/null
+++ b/compiler/loco/include/loco/ADT/ObjectPool.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_ADT_OBJECT_POOL_H__
+#define __LOCO_ADT_OBJECT_POOL_H__
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+namespace loco
+{
+
+/**
+ * @brief Object Pool
+ * @note ObjectPool owns registered objects.
+ */
+template <typename T> class ObjectPool
+{
+public:
+ virtual ~ObjectPool() = default;
+
+public:
+ /// @brief Return the number of objects
+ uint32_t size(void) const { return _pool.size(); }
+
+ /// @brief Access N-th object
+ T *at(uint32_t n) const { return _pool.at(n).get(); }
+
+protected:
+ /// @brief Take the ownership of a given object and returns its raw pointer
+ template <typename U> U *take(std::unique_ptr<U> &&o)
+ {
+ auto res = o.get();
+ _pool.emplace_back(std::move(o));
+ return res;
+ }
+
+ /**
+ * @brief Erase an object from the pool
+ *
+ * erase(p) returns false if p does not belong to this object pool.
+ */
+ bool erase(T *ptr)
+ {
+ auto pred = [ptr](const std::unique_ptr<T> &o) { return o.get() == ptr; };
+ auto it = std::find_if(_pool.begin(), _pool.end(), pred);
+
+ if (it == _pool.end())
+ {
+ return false;
+ }
+
+ _pool.erase(it);
+ return true;
+ }
+
+private:
+ std::vector<std::unique_ptr<T>> _pool;
+};
+
+} // namespace loco
+
+#endif // __LOCO_ADT_OBJECT_POOL_H__
diff --git a/compiler/loco/include/loco/IR/Algorithm.h b/compiler/loco/include/loco/IR/Algorithm.h
new file mode 100644
index 000000000..f7812e85d
--- /dev/null
+++ b/compiler/loco/include/loco/IR/Algorithm.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_ALGORITHM_H__
+#define __LOCO_IR_ALGORITHM_H__
+
+#include "loco/IR/Node.h"
+
+#include <set>
+#include <vector>
+
+namespace loco
+{
+
+/**
+ * @brief Generate postorder traversal sequence starting from "roots"
+ *
+ * HOW TO USE
+ *
+ * for (auto node : postorder_traversal(...))
+ * {
+ * ... node->do_something() ...
+ * }
+ *
+ */
+std::vector<loco::Node *> postorder_traversal(const std::vector<loco::Node *> &roots);
+
+/**
+ * @brief Enumerate all the nodes required to compute "roots"
+ */
+std::set<loco::Node *> active_nodes(const std::vector<loco::Node *> &roots);
+
+} // namespace loco
+
+#endif // __LOCO_IR_ALGORITHM_H__
diff --git a/compiler/loco/include/loco/IR/BiasShape.h b/compiler/loco/include/loco/IR/BiasShape.h
new file mode 100644
index 000000000..037b0873e
--- /dev/null
+++ b/compiler/loco/include/loco/IR/BiasShape.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_BIAS_SHAPE_H__
+#define __LOCO_IR_BIAS_SHAPE_H__
+
+#include "loco/IR/Dimension.h"
+
+namespace loco
+{
+
+/**
+ * \brief Bias Shape
+ */
+class BiasShape final
+{
+public:
+ BiasShape() = default;
+
+public:
+ const Dimension &length(void) const { return _length; }
+ Dimension &length(void) { return _length; }
+
+private:
+ Dimension _length;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_BIAS_SHAPE_H__
diff --git a/compiler/loco/include/loco/IR/CanonicalDialect.h b/compiler/loco/include/loco/IR/CanonicalDialect.h
new file mode 100644
index 000000000..940d29a59
--- /dev/null
+++ b/compiler/loco/include/loco/IR/CanonicalDialect.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_CANONICAL_DIALECT_H__
+#define __LOCO_IR_CANONICAL_DIALECT_H__
+
+#include "loco/IR/Dialect.h"
+
+namespace loco
+{
+
+/**
+ * @brief A singleton for Canonical Dialect
+ *
+ * CanonicalDialect serves as an in-memory unique identifier.
+ */
+class CanonicalDialect final : public Dialect
+{
+private:
+ CanonicalDialect();
+
+public:
+ CanonicalDialect(const CanonicalDialect &) = delete;
+ CanonicalDialect(CanonicalDialect &&) = delete;
+
+public:
+ static Dialect *get(void);
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_CANONICAL_DIALECT_H__
diff --git a/compiler/loco/include/loco/IR/CanonicalNode.h b/compiler/loco/include/loco/IR/CanonicalNode.h
new file mode 100644
index 000000000..2dcc02e5d
--- /dev/null
+++ b/compiler/loco/include/loco/IR/CanonicalNode.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_CANONICAL_NODE_H__
+#define __LOCO_IR_CANONICAL_NODE_H__
+
+#include "loco/IR/CanonicalNodeDecl.h"
+#include "loco/IR/CanonicalNodeImpl.h"
+
+#endif // __LOCO_IR_CANONICAL_NODE_H__
diff --git a/compiler/loco/include/loco/IR/CanonicalNodeDecl.h b/compiler/loco/include/loco/IR/CanonicalNodeDecl.h
new file mode 100644
index 000000000..872edbb3e
--- /dev/null
+++ b/compiler/loco/include/loco/IR/CanonicalNodeDecl.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_CANONICAL_NODE_DECL_H__
+#define __LOCO_IR_CANONICAL_NODE_DECL_H__
+
+#include "loco/IR/Node.h"
+#include "loco/IR/Dialect.h"
+#include "loco/IR/CanonicalOpcode.h"
+#include "loco/IR/CanonicalNodeVisitor.forward.h"
+
+namespace loco
+{
+
+struct CanonicalNode : public Node
+{
+ virtual ~CanonicalNode() = default;
+
+ const Dialect *dialect(void) const final;
+ virtual CanonicalOpcode opcode(void) const = 0;
+
+ template <typename T> T accept(CanonicalNodeVisitorBase<T> *) const;
+ template <typename T> T accept(CanonicalNodeMutableVisitorBase<T> *);
+};
+
+template <CanonicalOpcode Code, template <typename T> class... Mixins>
+struct CanonicalNodeDef : public virtual CanonicalNode, public Mixins<CanonicalNode>...
+{
+ virtual ~CanonicalNodeDef() = default;
+
+ uint32_t opnum(void) const final { return static_cast<uint32_t>(Code); }
+ CanonicalOpcode opcode(void) const final { return Code; }
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_CANONICAL_NODE_H__
diff --git a/compiler/loco/include/loco/IR/CanonicalNodeImpl.h b/compiler/loco/include/loco/IR/CanonicalNodeImpl.h
new file mode 100644
index 000000000..73aa4caa5
--- /dev/null
+++ b/compiler/loco/include/loco/IR/CanonicalNodeImpl.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_CANONICAL_NODE_IMPL_H__
+#define __LOCO_IR_CANONICAL_NODE_IMPL_H__
+
+#include "loco/IR/Nodes.h"
+#include "loco/IR/CanonicalNodeVisitor.h"
+
+#include <stdexcept>
+
+namespace loco
+{
+
+template <typename T> T CanonicalNode::accept(CanonicalNodeVisitorBase<T> *v) const
+{
+ switch (this->opcode())
+ {
+#define CANONICAL_NODE(OPCODE, CLASS) \
+ case CanonicalOpcode::OPCODE: \
+ return v->visit(dynamic_cast<const CLASS *>(this));
+
+#include "CanonicalNodes.lst"
+#undef CANONICAL_NODE
+ default:
+ break;
+ }
+
+ throw std::runtime_error{"NYI"};
+}
+
+template <typename T> T CanonicalNode::accept(CanonicalNodeMutableVisitorBase<T> *v)
+{
+ switch (this->opcode())
+ {
+#define CANONICAL_NODE(OPCODE, CLASS) \
+ case CanonicalOpcode::OPCODE: \
+ return v->visit(dynamic_cast<CLASS *>(this));
+
+#include "CanonicalNodes.lst"
+#undef CANONICAL_NODE
+ default:
+ break;
+ }
+
+ throw std::runtime_error{"NYI"};
+}
+
+} // namespace loco
+
+#endif // __LOCO_IR_CANONICAL_NODE_IMPL_H__
diff --git a/compiler/loco/include/loco/IR/CanonicalNodeVisitor.forward.h b/compiler/loco/include/loco/IR/CanonicalNodeVisitor.forward.h
new file mode 100644
index 000000000..425d77997
--- /dev/null
+++ b/compiler/loco/include/loco/IR/CanonicalNodeVisitor.forward.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_CANONICAL_NODE_VISITOR_FORWARD_H__
+#define __LOCO_IR_CANONICAL_NODE_VISITOR_FORWARD_H__
+
+namespace loco
+{
+
+// NOTE These forward declarations SHOULD BE aligned with "CanonicalNodeVisitor.h"
+template <typename T> struct CanonicalNodeVisitorBase;
+template <typename T> struct CanonicalNodeMutableVisitorBase;
+
+} // namespace loco
+
+#endif // __LOCO_IR_CANONICAL_NODE_VISITOR_FORWARD_H__
diff --git a/compiler/loco/include/loco/IR/CanonicalNodeVisitor.h b/compiler/loco/include/loco/IR/CanonicalNodeVisitor.h
new file mode 100644
index 000000000..b9ffd5472
--- /dev/null
+++ b/compiler/loco/include/loco/IR/CanonicalNodeVisitor.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_CANONICAL_NODE_VISITOR_H__
+#define __LOCO_IR_CANONICAL_NODE_VISITOR_H__
+
+#include "loco/IR/Nodes.h"
+
+#include <stdexcept>
+
+namespace loco
+{
+
+/**
+ * DO NOT use this class. Use CanonicalNodeVisitor instead.
+ */
+template <typename T> struct CanonicalNodeVisitorBase
+{
+ virtual ~CanonicalNodeVisitorBase() = default;
+
+#define CANONICAL_NODE(OPCODE, CLASS) virtual T visit(const CLASS *) = 0;
+#include "CanonicalNodes.lst"
+#undef CANONICAL_NODE
+};
+
+template <typename T> struct CanonicalNodeVisitor : public CanonicalNodeVisitorBase<T>
+{
+ virtual ~CanonicalNodeVisitor() = default;
+
+#define CANONICAL_NODE(OPCODE, CLASS) \
+ virtual T visit(const CLASS *node) { return visit(static_cast<const Node *>(node)); }
+#include "CanonicalNodes.lst"
+#undef CANONICAL_NODE
+
+ /// @brief Default fallback
+ virtual T visit(const Node *) { throw std::runtime_error{"Not implemented, yet"}; }
+};
+
+/**
+ * DO NOT use this class. Use CanonicalNodeMutableVisitor instead.
+ */
+template <typename T> struct CanonicalNodeMutableVisitorBase
+{
+ virtual ~CanonicalNodeMutableVisitorBase() = default;
+
+#define CANONICAL_NODE(OPCODE, CLASS) virtual T visit(CLASS *) = 0;
+#include "CanonicalNodes.lst"
+#undef CANONICAL_NODE
+};
+
+template <typename T> struct CanonicalNodeMutableVisitor : public CanonicalNodeMutableVisitorBase<T>
+{
+ virtual ~CanonicalNodeMutableVisitor() = default;
+
+#define CANONICAL_NODE(OPCODE, CLASS) \
+ virtual T visit(CLASS *node) { return visit(static_cast<Node *>(node)); }
+#include "CanonicalNodes.lst"
+#undef CANONICAL_NODE
+
+ /// @brief Default fallback
+ virtual T visit(Node *) { throw std::runtime_error{"Not implemented, yet"}; }
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_CANONICAL_NODE_VISITOR_H__
diff --git a/compiler/loco/include/loco/IR/CanonicalNodes.lst b/compiler/loco/include/loco/IR/CanonicalNodes.lst
new file mode 100644
index 000000000..527856fbe
--- /dev/null
+++ b/compiler/loco/include/loco/IR/CanonicalNodes.lst
@@ -0,0 +1,49 @@
+#ifndef CANONICAL_NODE
+#error "Define CANONICAL_NODE"
+#endif // CANONICAL_NODE
+
+//
+// PLEASE SORT NODE DECLS IN ALPHABETICAL ORDER
+//
+
+// CANONICAL_NODE(OPCODE, CLASS)
+CANONICAL_NODE(AvgPool2D, AvgPool2D)
+CANONICAL_NODE(BiasDecode, BiasDecode)
+CANONICAL_NODE(BiasEncode, BiasEncode)
+CANONICAL_NODE(ConstGen, ConstGen)
+CANONICAL_NODE(Conv2D, Conv2D)
+CANONICAL_NODE(DepthwiseConv2D, DepthwiseConv2D)
+CANONICAL_NODE(DepthwiseFilterDecode, DepthwiseFilterDecode)
+CANONICAL_NODE(DepthwiseFilterEncode, DepthwiseFilterEncode)
+CANONICAL_NODE(EltwiseAdd, EltwiseAdd)
+CANONICAL_NODE(EltwiseDiv, EltwiseDiv)
+CANONICAL_NODE(EltwiseMax, EltwiseMax)
+CANONICAL_NODE(EltwiseMul, EltwiseMul)
+CANONICAL_NODE(EltwiseSqrt, EltwiseSqrt)
+CANONICAL_NODE(EltwiseSub, EltwiseSub)
+CANONICAL_NODE(FeatureBiasAdd, BiasAdd<Domain::Feature>)
+CANONICAL_NODE(FeatureDecode, FeatureDecode)
+CANONICAL_NODE(FeatureEncode, FeatureEncode)
+CANONICAL_NODE(FilterDecode, FilterDecode)
+CANONICAL_NODE(FilterEncode, FilterEncode)
+CANONICAL_NODE(FixedReshape, Reshape<ReshapeType::Fixed>)
+CANONICAL_NODE(Forward, Forward)
+CANONICAL_NODE(MaxPool2D, MaxPool2D)
+// WARN Push may be excluded from canoncial dialect in the future
+CANONICAL_NODE(Push, Push)
+// WARN Pull may be excluded from canoncial dialect in the future
+CANONICAL_NODE(Pull, Pull)
+CANONICAL_NODE(ReLU, ReLU)
+CANONICAL_NODE(ReLU6, ReLU6)
+CANONICAL_NODE(Tanh, Tanh)
+CANONICAL_NODE(TensorConcat, TensorConcat)
+CANONICAL_NODE(TensorConstantPad, TensorConstantPad)
+CANONICAL_NODE(TensorBiasAdd, BiasAdd<Domain::Tensor>)
+CANONICAL_NODE(TensorBroadcast, TensorBroadcast)
+CANONICAL_NODE(TensorReduce, TensorReduce)
+CANONICAL_NODE(TensorTranspose, TensorTranspose)
+CANONICAL_NODE(TensorSoftmax, Softmax<Domain::Tensor>)
+CANONICAL_NODE(TransposedConv2D, TransposedConv2D)
+CANONICAL_NODE(MatrixEncode, MatrixEncode)
+CANONICAL_NODE(MatrixDecode, MatrixDecode)
+CANONICAL_NODE(MatMul, MatMul)
diff --git a/compiler/loco/include/loco/IR/CanonicalOpcode.h b/compiler/loco/include/loco/IR/CanonicalOpcode.h
new file mode 100644
index 000000000..58aa7de6d
--- /dev/null
+++ b/compiler/loco/include/loco/IR/CanonicalOpcode.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_CANONICAL_OPCODE_H__
+#define __LOCO_IR_CANONICAL_OPCODE_H__
+
+namespace loco
+{
+
+/**
+ * @brief Canonical Node Opcode
+ *
+ * WARNING The order is subject to change. DO NOT serialize this value.
+ */
+enum class CanonicalOpcode
+{
+#define CANONICAL_NODE(OPCODE, CLASS) OPCODE,
+#include "CanonicalNodes.lst"
+#undef CANONICAL_NODE
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_CANONICAL_OPCODE_H__
diff --git a/compiler/loco/include/loco/IR/DataType.h b/compiler/loco/include/loco/IR/DataType.h
new file mode 100644
index 000000000..b07022bf5
--- /dev/null
+++ b/compiler/loco/include/loco/IR/DataType.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_DATA_TYPE_H__
+#define __LOCO_IR_DATA_TYPE_H__
+
+namespace loco
+{
+
+/**
+ * @brief "scalar" value type
+ */
+enum class DataType
+{
+ Unknown, // Unknown type (serves as a default value)
+
+ U8, // 8-bit unsigned integer
+ U16, // 16-bit unsigned integer
+ U32, // 32-bit unsigned integer
+ U64, // 64-bit unsigned integer
+
+ S8, // 8-bit signed integer
+ S16, // 16-bit signed integer
+ S32, // 32-bit signed integer
+ S64, // 64-bit signed integer
+
+ FLOAT16, // IEEE 16-bit floating-point
+ FLOAT32, // IEEE 32-bit floating-point
+ FLOAT64, // IEEE 64-bit floating-point
+
+ // WARNING the size of Bool may vary for NN frameworks
+ // TODO we need to find a way to resolve this issue
+ BOOL, // Boolean
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_DATA_TYPE_H__
diff --git a/compiler/loco/include/loco/IR/DataTypeTraits.h b/compiler/loco/include/loco/IR/DataTypeTraits.h
new file mode 100644
index 000000000..c4479e545
--- /dev/null
+++ b/compiler/loco/include/loco/IR/DataTypeTraits.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_DATA_TYPE_TRAITS_H__
+#define __LOCO_IR_DATA_TYPE_TRAITS_H__
+
+#include "loco/IR/DataType.h"
+
+#include <cassert>
+#include <cstdint>
+
+namespace loco
+{
+
+/**
+ * @brief C++ scalar type corresponding to each DataType
+ */
+template <DataType DT> struct DataTypeImpl
+{
+ // using Type = ...
+};
+
+// TODO Support other enum values
+template <> struct DataTypeImpl<DataType::S8>
+{
+ // Use C++ int8_t type for 8bit integer
+ using Type = int8_t;
+};
+
+template <> struct DataTypeImpl<DataType::U8>
+{
+ // Use C++ uint8_t type for unsigned 8bit integer
+ using Type = uint8_t;
+};
+
+template <> struct DataTypeImpl<DataType::S32>
+{
+ // Use C++ int32_t type for 32bit integer
+ using Type = int32_t;
+};
+
+template <> struct DataTypeImpl<DataType::FLOAT32>
+{
+ // Use C++ float type for IEEE 32-bit floating-point numbers
+ using Type = float;
+};
+
+/**
+ * @brief Returns the size of the data type.
+ * @note If you need the size at compile time, use `sizeof(typename DataTypeImpl<DT>::Type)`.
+ */
+inline uint32_t size(DataType data_type)
+{
+ switch (data_type)
+ {
+ case DataType::S8:
+ return sizeof(DataTypeImpl<DataType::S8>::Type);
+ case DataType::U8:
+ return sizeof(DataTypeImpl<DataType::U8>::Type);
+ case DataType::S32:
+ return sizeof(DataTypeImpl<DataType::S32>::Type);
+ case DataType::FLOAT32:
+ return sizeof(DataTypeImpl<DataType::FLOAT32>::Type);
+ default:
+ // TODO Support remaining data types.
+ assert(false);
+ return UINT32_MAX; // Avoid compiler warning.
+ }
+}
+
+} // namespace loco
+
+#endif // __LOCO_IR_DATA_TYPE_TRAITS_H__
diff --git a/compiler/loco/include/loco/IR/DepthwiseFilterAxis.h b/compiler/loco/include/loco/IR/DepthwiseFilterAxis.h
new file mode 100644
index 000000000..eb4650ec9
--- /dev/null
+++ b/compiler/loco/include/loco/IR/DepthwiseFilterAxis.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_DEPTHWISE_FILTER_AXIS_H__
+#define __LOCO_IR_DEPTHWISE_FILTER_AXIS_H__
+
+namespace loco
+{
+
+enum class DepthwiseFilterAxis
+{
+ Depth,
+ Multiplier,
+ Height,
+ Width
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_DEPTHWISE_FILTER_AXIS_H__
diff --git a/compiler/loco/include/loco/IR/DepthwiseFilterCodec.h b/compiler/loco/include/loco/IR/DepthwiseFilterCodec.h
new file mode 100644
index 000000000..0d9286b46
--- /dev/null
+++ b/compiler/loco/include/loco/IR/DepthwiseFilterCodec.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_DEPTHWISE_FILTER_CODEC_H__
+#define __LOCO_IR_DEPTHWISE_FILTER_CODEC_H__
+
+#include "loco/IR/DepthwiseFilterShape.h"
+#include "loco/IR/DepthwiseFilterIndex.h"
+
+#include "loco/IR/TensorShape.h"
+#include "loco/IR/TensorIndex.h"
+
+namespace loco
+{
+
+/**
+ * @brief Describe how to build a depthwise convolution filter from a tensor
+ *
+ * Let us assume that "enc" is a depthwise filter encoder.
+ *
+ * Given a tensor "inp" and its shape "inp.shape", "enc" builds a depthwise filter
+ * "out" as follows:
+ *
+ * for each valid filter_index for enc.shape(inp.shape)
+ * out.at(filter_index) = inp.at(enc.value(filter_index))
+ */
+struct DepthwiseFilterEncoder
+{
+ virtual ~DepthwiseFilterEncoder() = default;
+
+ virtual DepthwiseFilterShape shape(const TensorShape &shape) const = 0;
+ virtual TensorIndex value(const DepthwiseFilterIndex &index) const = 0;
+};
+
+/**
+ * @brief Describe how to build a tensor from a depthwise convolution filter
+ *
+ * Let us assume that "dec" is a depthwise filter decoder.
+ *
+ * Given a depthwise filter "inp" and its shape "inp.shape", "dec" builds a tensor
+ * "out" as follows:
+ *
+ * for each valid tensor_index for dec.shape(inp.shape)
+ * out.at(tensor_index) = inp.at(dec.value(tensor_index))
+ */
+struct DepthwiseFilterDecoder
+{
+ virtual ~DepthwiseFilterDecoder() = default;
+
+ virtual TensorShape shape(const DepthwiseFilterShape &shape) const = 0;
+ virtual DepthwiseFilterIndex value(const TensorIndex &index) const = 0;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_DEPTHWISE_FILTER_CODEC_H__
diff --git a/compiler/loco/include/loco/IR/DepthwiseFilterIndex.h b/compiler/loco/include/loco/IR/DepthwiseFilterIndex.h
new file mode 100644
index 000000000..884e50a56
--- /dev/null
+++ b/compiler/loco/include/loco/IR/DepthwiseFilterIndex.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_DEPTHWISE_FILTER_INDEX_H__
+#define __LOCO_IR_DEPTHWISE_FILTER_INDEX_H__
+
+#include <cstdint>
+
+namespace loco
+{
+
+/**
+ * @brief DepthwiseFilter Index
+ *
+ * DepthwiseFilter Index indicates an "element" in a given Depthwise convolution filter.
+ *
+ * Assume there is a filter K where KS denotes its shape (of DepthwiseFilterShape type).
+ *
+ * Then, any valid filter index I satisfies the following invariants:
+ * - 0 <= I.channel() < KS.depth()
+ * - 0 <= I.nth() < KS.multiplier()
+ * - 0 <= I.row() < KS.height()
+ * - 0 <= I.column() < KS.width()
+ */
+class DepthwiseFilterIndex final
+{
+public:
+ DepthwiseFilterIndex() = default;
+
+public:
+ const uint32_t &channel(void) const { return _channel; }
+ uint32_t &channel(void) { return _channel; }
+
+ const uint32_t &nth(void) const { return _nth; }
+ uint32_t &nth(void) { return _nth; }
+
+ const uint32_t &row(void) const { return _row; }
+ uint32_t &row(void) { return _row; }
+
+ const uint32_t &column(void) const { return _column; }
+ uint32_t &column(void) { return _column; }
+
+private:
+ uint32_t _channel = 0;
+ uint32_t _nth = 0;
+ uint32_t _row = 0;
+ uint32_t _column = 0;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_DEPTHWISE_FILTER_INDEX_H__
diff --git a/compiler/loco/include/loco/IR/DepthwiseFilterShape.h b/compiler/loco/include/loco/IR/DepthwiseFilterShape.h
new file mode 100644
index 000000000..eb1a1e335
--- /dev/null
+++ b/compiler/loco/include/loco/IR/DepthwiseFilterShape.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_DEPTHWISE_FILTER_SHAPE_H__
+#define __LOCO_IR_DEPTHWISE_FILTER_SHAPE_H__
+
+#include "loco/IR/Dimension.h"
+
+namespace loco
+{
+
+/**
+ * @brief DepthwiseFilter Shape
+ *
+ * This class describes the shape of depthwise filter, which is an input of depthwise 2D
+ * convolutional operation.
+ *
+ * depth() refers to expected channel depth of matching input
+ * multiplier() refers to number of traverse for one input
+ * height() refers to the height of 2D weights
+ * width() refers to the width of 2D weights
+ */
+class DepthwiseFilterShape final
+{
+public:
+ DepthwiseFilterShape() = default;
+
+public:
+ const Dimension &depth(void) const { return _depth; }
+ Dimension &depth(void) { return _depth; }
+
+ const Dimension &multiplier(void) const { return _multiplier; }
+ Dimension &multiplier(void) { return _multiplier; }
+
+ const Dimension &height(void) const { return _height; }
+ Dimension &height(void) { return _height; }
+
+ const Dimension &width(void) const { return _width; }
+ Dimension &width(void) { return _width; }
+
+private:
+ Dimension _depth;
+ Dimension _multiplier;
+ Dimension _height;
+ Dimension _width;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_DEPTHWISE_FILTER_SHAPE_H__
diff --git a/compiler/loco/include/loco/IR/Dialect.h b/compiler/loco/include/loco/IR/Dialect.h
new file mode 100644
index 000000000..b8942bfb4
--- /dev/null
+++ b/compiler/loco/include/loco/IR/Dialect.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_DIALECT_H__
+#define __LOCO_IR_DIALECT_H__
+
+#include "loco/IR/DialectService.h"
+
+#include <map>
+#include <memory>
+#include <typeindex>
+#include <typeinfo>
+
+namespace loco
+{
+
+/**
+ * @brief Dialect interface
+ *
+ * Each dialect implementation is expected to have static "get" method
+ * which returns "const Dialect *" value.
+ */
+class Dialect
+{
+public:
+ virtual ~Dialect() = default;
+
+protected:
+ template <typename ConcreteService> void service(std::unique_ptr<ConcreteService> &&s)
+ {
+ _services[typeid(ConcreteService)] = std::move(s);
+ }
+
+public:
+ template <typename ConcreteService> ConcreteService *service(void) const
+ {
+ auto it = _services.find(typeid(ConcreteService));
+
+ if (it == _services.end())
+ {
+ return nullptr;
+ }
+
+ return dynamic_cast<ConcreteService *>(it->second.get());
+ }
+
+private:
+ std::map<std::type_index, std::unique_ptr<DialectService>> _services;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_DIALECT_H__
diff --git a/compiler/loco/include/loco/IR/DialectService.h b/compiler/loco/include/loco/IR/DialectService.h
new file mode 100644
index 000000000..54a3fac74
--- /dev/null
+++ b/compiler/loco/include/loco/IR/DialectService.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_DIALECT_SERVICE_H__
+#define __LOCO_IR_DIALECT_SERVICE_H__
+
+namespace loco
+{
+
+/**
+ * @brief Dialect Service interface
+ *
+ * Every service that each dialect exposes should inherit this interface.
+ */
+struct DialectService
+{
+ virtual ~DialectService() = default;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_DIALECT_SERVICE_H__
diff --git a/compiler/loco/include/loco/IR/Dimension.h b/compiler/loco/include/loco/IR/Dimension.h
new file mode 100644
index 000000000..7b5d5943f
--- /dev/null
+++ b/compiler/loco/include/loco/IR/Dimension.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_DIMENSION_H__
+#define __LOCO_IR_DIMENSION_H__
+
+#include <cstdint>
+
+namespace loco
+{
+
+/**
+ * @brief The value of one dimension in a tensor shape
+ * @note The value may be unknown
+ */
+class Dimension final
+{
+private:
+ enum class Kind
+ {
+ Known,
+ Unknown
+ };
+
+public:
+ // @brief Construct an "unknown" dimension
+ Dimension() = default;
+
+ // @brief Construct a "known" dimension
+ Dimension(uint32_t value) { set(value); }
+
+public:
+ // @brief Return whether the value is known (or not)
+ bool known(void) const { return _kind == Kind::Known; }
+
+ // @brief Return the value
+ // @note This value is meaningful only for known dimension
+ uint32_t value(void) const { return _value; }
+
+ void set(uint32_t value)
+ {
+ _kind = Kind::Known;
+ _value = value;
+ }
+
+ void unset(void)
+ {
+ _kind = Kind::Unknown;
+ _value = 0;
+ }
+
+private:
+ Kind _kind{Kind::Unknown};
+ uint32_t _value{0};
+};
+
+/**
+ * @brief Equality operator between two Dimensions
+ *
+ * @note Refer to the definition of equality of dimemsion at
+ * https://www.tensorflow.org/api_docs/python/tf/Dimension#__eq__
+ */
+bool operator==(const Dimension &, const Dimension &);
+bool operator==(const Dimension &, uint32_t);
+bool operator==(uint32_t, const Dimension &);
+
+// @brief Make an "unknown" dimension
+Dimension make_dimension(void);
+
+} // namespace loco
+
+#endif // __LOCO_IR_DIMENSION_H__
diff --git a/compiler/loco/include/loco/IR/Domain.h b/compiler/loco/include/loco/IR/Domain.h
new file mode 100644
index 000000000..823bc1833
--- /dev/null
+++ b/compiler/loco/include/loco/IR/Domain.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_DOMAIN_H__
+#define __LOCO_IR_DOMAIN_H__
+
+namespace loco
+{
+
+/**
+ * @brief Describe the kind of (N-dimensional) loco values
+ *
+ * loco is an intermediate representation for neural network compiler, which mainly focuses on
+ * N-dimensional values (usually referred to as Tensor).
+ *
+ * There are several special cases for N-dimensional values according to its usage. For example,
+ * vision community often refers to 4D array as "FeatureMap".
+ *
+ * It is definitely possible to represent all of these special cases using Tensor, but that scheme
+ * may introduces some confusion (e.g. NCHW vs NHWC issue).
+ *
+ * loco distinguishes these special cases from Tensor in order to reduce such confusion.
+ *
+ * This "Domain" enum class enumerates all of these special cases that loco supports.
+ */
+enum class Domain
+{
+ Unknown,
+ Tensor,
+ Feature,
+ Filter, /* 2D Convolution Filter */
+ DepthwiseFilter, /* Depthwise 2D Convolution Filter */
+ Bias,
+ Matrix,
+ /* ... */
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_DOMAIN_H__
diff --git a/compiler/loco/include/loco/IR/FeatureAxis.h b/compiler/loco/include/loco/IR/FeatureAxis.h
new file mode 100644
index 000000000..cf020edd2
--- /dev/null
+++ b/compiler/loco/include/loco/IR/FeatureAxis.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_FEATURE_AXIS_H__
+#define __LOCO_IR_FEATURE_AXIS_H__
+
+namespace loco
+{
+
+enum class FeatureAxis
+{
+ Count,
+ Depth,
+ Height,
+ Width
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_FEATURE_AXIS_H__
diff --git a/compiler/loco/include/loco/IR/FeatureCodec.h b/compiler/loco/include/loco/IR/FeatureCodec.h
new file mode 100644
index 000000000..93094e13a
--- /dev/null
+++ b/compiler/loco/include/loco/IR/FeatureCodec.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_FEATURE_CODEC_H__
+#define __LOCO_IR_FEATURE_CODEC_H__
+
+#include "loco/IR/FeatureShape.h"
+#include "loco/IR/FeatureIndex.h"
+
+#include "loco/IR/TensorShape.h"
+#include "loco/IR/TensorIndex.h"
+
+#include <memory>
+
+namespace loco
+{
+
+/**
+ * @brief Decribe how to build a (convolution) feature map from a tensor
+ *
+ * Let us assume that "enc" is a feature encoder.
+ *
+ * Given a tensor "inp" and its shape "inp.shape", "enc" builds a feature map
+ * "out" as follows:
+ *
+ * for each valid feature index (referred to as feature_idx below) for enc.shape(inp.shape)
+ * out.at(feature_index) = inp.at(enc.value(feature_index))
+ */
+struct FeatureEncoder
+{
+ virtual ~FeatureEncoder() = default;
+
+ virtual FeatureShape shape(const TensorShape &shape) const = 0;
+ virtual TensorIndex value(const FeatureIndex &index) const = 0;
+
+ virtual std::unique_ptr<FeatureEncoder> clone(void) const = 0;
+};
+
+/**
+ * @brief Describe how to build a tensor from a (convolution) feature map
+ *
+ * Let us assume that "dec" is a feature decoder.
+ *
+ * Given a feature map "inp" and its shape "inp.shape", "dec" builds a tensor
+ * "out" as follows:
+ *
+ * for each valid tensor index (referred to as tensor_index below) for dec.shape(inp.shape)
+ * out.at(tensor_index) = inp.at(dec.value(tensor_index))
+ *
+ * NOTE "inp" is a feature value and "out" is a tensor value in this example.
+ */
+struct FeatureDecoder
+{
+ virtual ~FeatureDecoder() = default;
+
+ virtual TensorShape shape(const FeatureShape &) const = 0;
+ virtual FeatureIndex value(const TensorIndex &) const = 0;
+
+ virtual std::unique_ptr<FeatureDecoder> clone(void) const = 0;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_FEATURE_CODEC_H__
diff --git a/compiler/loco/include/loco/IR/FeatureIndex.h b/compiler/loco/include/loco/IR/FeatureIndex.h
new file mode 100644
index 000000000..007f94491
--- /dev/null
+++ b/compiler/loco/include/loco/IR/FeatureIndex.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_FEATURE_INDEX_H__
+#define __LOCO_IR_FEATURE_INDEX_H__
+
+#include <cstdint>
+
+namespace loco
+{
+
+/**
+ * \brief Feature Index
+ *
+ * Feature Index indicates an "element" in a given feature map.
+ *
+ * Let us assume that there is a feature map F and S denotes its shape (of FeatureShape type).
+ *
+ * Then, any valid feature index I satisfies the following invariants:
+ * - 0 <= I.batch() < S.count()
+ * - 0 <= I.channel() < S.depth()
+ * - 0 <= I.row() < S.height()
+ * - 0 <= I.column() < S.width()
+ */
+class FeatureIndex final
+{
+public:
+ FeatureIndex() = default;
+
+public:
+ const uint32_t &batch(void) const { return _batch; }
+ uint32_t &batch(void) { return _batch; }
+
+ const uint32_t &channel(void) const { return _channel; }
+ uint32_t &channel(void) { return _channel; }
+
+ const uint32_t &row(void) const { return _row; }
+ uint32_t &row(void) { return _row; }
+
+ const uint32_t &column(void) const { return _column; }
+ uint32_t &column(void) { return _column; }
+
+private:
+ uint32_t _batch = 0;
+ uint32_t _channel = 0;
+ uint32_t _row = 0;
+ uint32_t _column = 0;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_FEATURE_INDEX_H__
diff --git a/compiler/loco/include/loco/IR/FeatureShape.h b/compiler/loco/include/loco/IR/FeatureShape.h
new file mode 100644
index 000000000..d09a2b2b8
--- /dev/null
+++ b/compiler/loco/include/loco/IR/FeatureShape.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_FEATURE_SHAPE_H__
+#define __LOCO_IR_FEATURE_SHAPE_H__
+
+#include "loco/IR/Dimension.h"
+
+namespace loco
+{
+
+/**
+ * \brief Feature Map Shape
+ *
+ * This class describes the shape of feature maps, which serves as the input/output of 2D
+ * convolutional operations (e.g. Convolution).
+ *
+ * Each feature map is a collection of 3D features conceptually.
+ * Each feature has depth, height, width.
+ *
+ * count() refers to the number of features in a feature map
+ * depth() refers to the depth of features in a given feature map
+ * height() refers to the height of features in a given feature map
+ * width() refers to the width of features in a given feature map
+ */
+class FeatureShape final
+{
+public:
+ FeatureShape() = default;
+
+public:
+ const Dimension &count(void) const { return _count; }
+ Dimension &count(void) { return _count; }
+
+ const Dimension &depth(void) const { return _depth; }
+ Dimension &depth(void) { return _depth; }
+
+ const Dimension &height(void) const { return _height; }
+ Dimension &height(void) { return _height; }
+
+ const Dimension &width(void) const { return _width; }
+ Dimension &width(void) { return _width; }
+
+private:
+ Dimension _count;
+ Dimension _depth;
+ Dimension _height;
+ Dimension _width;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_FEATURE_SHAPE_H__
diff --git a/compiler/loco/include/loco/IR/FilterAxis.h b/compiler/loco/include/loco/IR/FilterAxis.h
new file mode 100644
index 000000000..269e2aecc
--- /dev/null
+++ b/compiler/loco/include/loco/IR/FilterAxis.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_FILTER_AXIS_H__
+#define __LOCO_IR_FILTER_AXIS_H__
+
+namespace loco
+{
+
+enum class FilterAxis
+{
+ Count,
+ Depth,
+ Height,
+ Width
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_FILTER_AXIS_H__
diff --git a/compiler/loco/include/loco/IR/FilterCodec.h b/compiler/loco/include/loco/IR/FilterCodec.h
new file mode 100644
index 000000000..3ff548d6d
--- /dev/null
+++ b/compiler/loco/include/loco/IR/FilterCodec.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_FILTER_CODEC_H__
+#define __LOCO_IR_FILTER_CODEC_H__
+
+#include "loco/IR/FilterShape.h"
+#include "loco/IR/FilterIndex.h"
+
+#include "loco/IR/TensorShape.h"
+#include "loco/IR/TensorIndex.h"
+
+namespace loco
+{
+
+/**
+ * @brief Decribe how to build a (convolution) filter from a tensor
+ *
+ * Let us assume that "enc" is a filter encoder.
+ *
+ * Given a tensor "inp" and its shape "inp.shape", "enc" builds a filter
+ * "out" as follows:
+ *
+ * for each valid filter index (referred to as filter_index below) for enc.shape(inp.shape)
+ * out.at(filter_index) = inp.at(enc.value(filter_index))
+ */
+struct FilterEncoder
+{
+ virtual ~FilterEncoder() = default;
+
+ virtual FilterShape shape(const TensorShape &shape) const = 0;
+ virtual TensorIndex value(const FilterIndex &index) const = 0;
+};
+
+/**
+ * @brief Decribe how to build a a tensor from a filter
+ */
+struct FilterDecoder
+{
+ virtual ~FilterDecoder() = default;
+
+ virtual TensorShape shape(const FilterShape &shape) const = 0;
+ virtual FilterIndex value(const TensorIndex &index) const = 0;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_FILTER_CODEC_H__
diff --git a/compiler/loco/include/loco/IR/FilterIndex.h b/compiler/loco/include/loco/IR/FilterIndex.h
new file mode 100644
index 000000000..5765ea764
--- /dev/null
+++ b/compiler/loco/include/loco/IR/FilterIndex.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_FILTER_INDEX_H__
+#define __LOCO_IR_FILTER_INDEX_H__
+
+#include <cstdint>
+
+namespace loco
+{
+
+/**
+ * \brief Filter Index
+ *
+ * Filter Index indicates an "element" in a given (convolutional) filter.
+ *
+ * Let us assume that there is a filter K where KS denotes its shape (of FilterShape type).
+ *
+ * Then, any valid filter index I satisfies the following invariants:
+ * - 0 <= I.nth() < KS.count()
+ * - 0 <= I.channel() < KS.depth()
+ * - 0 <= I.row() < KS.height()
+ * - 0 <= I.column() < KS.width()
+ */
+class FilterIndex final
+{
+public:
+ FilterIndex() = default;
+
+public:
+ const uint32_t &nth(void) const { return _nth; }
+ uint32_t &nth(void) { return _nth; }
+
+ const uint32_t &channel(void) const { return _channel; }
+ uint32_t &channel(void) { return _channel; }
+
+ const uint32_t &row(void) const { return _row; }
+ uint32_t &row(void) { return _row; }
+
+ const uint32_t &column(void) const { return _column; }
+ uint32_t &column(void) { return _column; }
+
+private:
+ uint32_t _nth = 0;
+ uint32_t _channel = 0;
+ uint32_t _row = 0;
+ uint32_t _column = 0;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_FILTER_INDEX_H__
diff --git a/compiler/loco/include/loco/IR/FilterShape.h b/compiler/loco/include/loco/IR/FilterShape.h
new file mode 100644
index 000000000..00e44892a
--- /dev/null
+++ b/compiler/loco/include/loco/IR/FilterShape.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_FILTER_SHAPE_H__
+#define __LOCO_IR_FILTER_SHAPE_H__
+
+#include "loco/IR/Dimension.h"
+
+namespace loco
+{
+
+/**
+ * \brief Filter Shape
+ *
+ * This class describes the shape of filter, which is an input of 2D
+ * convolutional operations (e.g. Convolution).
+ *
+ * count() refers to the number of 3D weight in a filter
+ * depth() refers to the depth of 3D weights
+ * height() refers to the height of 3D weights
+ * width() refers to the width of 3D weights
+ *
+ * NOTE
+ *
+ * The definition of FilterShape is almost same as that of FeatureShape, but loco
+ * distinguishes FeatureShape and FilterShape in class-level in order to prevent
+ * potential errors by type check.
+ */
+class FilterShape final
+{
+public:
+ FilterShape() = default;
+
+public:
+ const Dimension &count(void) const { return _count; }
+ Dimension &count(void) { return _count; }
+
+ const Dimension &depth(void) const { return _depth; }
+ Dimension &depth(void) { return _depth; }
+
+ const Dimension &height(void) const { return _height; }
+ Dimension &height(void) { return _height; }
+
+ const Dimension &width(void) const { return _width; }
+ Dimension &width(void) { return _width; }
+
+private:
+ Dimension _count;
+ Dimension _depth;
+ Dimension _height;
+ Dimension _width;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_FILTER_SHAPE_H__
diff --git a/compiler/loco/include/loco/IR/Graph.forward.h b/compiler/loco/include/loco/IR/Graph.forward.h
new file mode 100644
index 000000000..2a43be93a
--- /dev/null
+++ b/compiler/loco/include/loco/IR/Graph.forward.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_GRAPH_FORWARD_H__
+#define __LOCO_IR_GRAPH_FORWARD_H__
+
+namespace loco
+{
+
+// This forward declaration SHOULD BE aligned with the actual declaration in "Graph.h".
+class Graph;
+
+} // namespace loco
+
+#endif // __LOCO_IR_GRAPH_FORWARD_H__
diff --git a/compiler/loco/include/loco/IR/Graph.h b/compiler/loco/include/loco/IR/Graph.h
new file mode 100644
index 000000000..a820aba91
--- /dev/null
+++ b/compiler/loco/include/loco/IR/Graph.h
@@ -0,0 +1,284 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_GRAPH_H__
+#define __LOCO_IR_GRAPH_H__
+
+#include "loco/IR/DataType.h"
+// TODO Include "Node.h" instead
+#include "loco/IR/Nodes.h"
+#include "loco/IR/NodePool.h"
+#include "loco/IR/GraphInputIndex.h"
+#include "loco/IR/GraphOutputIndex.h"
+
+#include "loco/ADT/ObjectPool.h"
+
+#include <initializer_list>
+#include <set>
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace loco
+{
+
+// TODO Introduce Named trait
+enum class Trait
+{
+ // Any "DataTyped" class has the following methods
+ // - DataType dtype(void) const;
+ // - void dtype(const DataType &value);
+ DataTyped,
+ // Any "TensorShaped" class has the following methods
+ // - const TensorShape *shape(void) const;
+ // - void shape(std::unique_ptr<TensorShape> &&);
+ // - void shape(std::initializer_list<Dimension> &&);
+ //
+ // TODO Rename NodeMixin::TensorShape as NodeMixin::NDShape
+ TensorShaped,
+};
+
+template <Trait T> class Mixin;
+
+// TODO Re-implement NodeMixin<NodeTrait::DataType> using this mixin
+template <> class Mixin<Trait::DataTyped>
+{
+public:
+ Mixin() = default;
+
+public:
+ const DataType &dtype(void) const { return _dtype; }
+ void dtype(const DataType &value) { _dtype = value; }
+
+private:
+ DataType _dtype = DataType::Unknown;
+};
+
+template <> class Mixin<Trait::TensorShaped>
+{
+public:
+ Mixin() = default;
+
+public:
+ const TensorShape *shape(void) const { return _shape.get(); }
+ void shape(std::unique_ptr<TensorShape> &&shape) { _shape = std::move(shape); }
+ void shape(std::initializer_list<Dimension> dims);
+
+private:
+ std::unique_ptr<TensorShape> _shape = nullptr;
+};
+
+/**
+ * @brief Trait for elements with name
+ */
+class NamedEntity
+{
+public:
+ const std::string &name(void) const { return _name; }
+ void name(const std::string &name) { _name = name; }
+
+/// If new interface methods are added to this class they also will need to
+/// be added in `using` of this macro to get them visible from inherited classes
+#define LOCO_NAMED_ENTITY_EXPOSE using NamedEntity::name
+
+private:
+ std::string _name;
+};
+
+/**
+ * @brief Graph-level Input Metadata
+ */
+class GraphInput final : private NamedEntity,
+ public Mixin<Trait::DataTyped>,
+ public Mixin<Trait::TensorShaped>
+{
+public:
+ LOCO_NAMED_ENTITY_EXPOSE;
+
+ // TODO Use GraphInputIndex (instead of uint32_t)
+ GraphInput(uint32_t index) : _index{index}
+ {
+ // DO NOTHING
+ }
+
+ GraphInput(const GraphInput &) = delete;
+ GraphInput(GraphInput &&) = delete;
+
+ ~GraphInput() = default;
+
+public:
+ GraphInputIndex index(void) const { return _index; }
+
+private:
+ uint32_t _index;
+};
+
+/**
+ * @brief Graph-level Output Metadata
+ */
+class GraphOutput final : private NamedEntity,
+ public Mixin<Trait::DataTyped>,
+ public Mixin<Trait::TensorShaped>
+{
+public:
+ LOCO_NAMED_ENTITY_EXPOSE;
+
+ // TODO Use GraphOutputIndex (instead of uint32_t)
+ GraphOutput(uint32_t index) : _index{index}
+ {
+ // DO NOTHING
+ }
+
+ GraphOutput(const GraphOutput &) = delete;
+ GraphOutput(GraphOutput &&) = delete;
+
+ ~GraphOutput() = default;
+
+public:
+ GraphOutputIndex index(void) const { return _index; }
+
+private:
+ uint32_t _index;
+};
+
+/**
+ * @brief A neural network graph
+ */
+class Graph final : public NamedEntity
+{
+public:
+ /**
+ * @brief Node Pool
+ *
+ * This alias confines the impact of changes to loco internals.
+ *
+ * TODO Remove this alias
+ */
+ using NodeContext = NodePool;
+
+ /**
+ * @brief Object Pool with Simple Factory Method
+ *
+ * TODO Remove this unused class
+ */
+ template <typename T> struct SimpleFactoryObjectPool : public ObjectPool<T>
+ {
+ virtual ~SimpleFactoryObjectPool() = default;
+
+ T *create(void)
+ {
+ std::unique_ptr<T> ptr{new T};
+ return ObjectPool<T>::take(std::move(ptr));
+ }
+ };
+
+ /**
+ * @brief GraphInput Pool
+ */
+ struct InputContext final : public ObjectPool<GraphInput>
+ {
+ GraphInput *create(void);
+ };
+
+ /**
+ * @brief GraphOutput Pool
+ */
+ struct OutputContext final : public ObjectPool<GraphOutput>
+ {
+ GraphOutput *create(void);
+ };
+
+public:
+ Graph()
+ {
+ // Associate "NodeContext" and the current "Graph"
+ _node_ctx.graph(this);
+ }
+
+ // Copy/Move is not allowed for Graph
+ Graph(const Graph &) = delete;
+ Graph(Graph &&) = delete;
+
+ ~Graph() = default;
+
+public:
+ NodeContext *nodes(void) { return &_node_ctx; }
+ const NodeContext *nodes(void) const { return &_node_ctx; }
+ InputContext *inputs(void) { return &_input_ctx; }
+ const InputContext *inputs(void) const { return &_input_ctx; }
+ OutputContext *outputs(void) { return &_output_ctx; }
+ const OutputContext *outputs(void) const { return &_output_ctx; }
+
+private:
+ NodeContext _node_ctx;
+ InputContext _input_ctx;
+ OutputContext _output_ctx;
+};
+
+struct GraphInputIndexQueryService : public DialectService
+{
+ virtual ~GraphInputIndexQueryService() = default;
+
+ /**
+ * @brief Check whether a given node is associated with any Graph-level input
+ */
+ virtual bool associated(const Node *node) const = 0;
+
+ /**
+ * Exceptions
+ * - index SHOULD throw std::invalid_argument exception if a given node is not associated with
+ * any input (i.e. assocaited above returns false).
+ */
+ virtual GraphInputIndex index(const Node *node) const = 0;
+};
+
+std::vector<Node *> input_nodes(const Graph *);
+
+struct GraphOutputIndexQueryService : public DialectService
+{
+ virtual ~GraphOutputIndexQueryService() = default;
+
+ /**
+ * @brief Check whether a given node is associated with any Graph-level output
+ */
+ virtual bool associated(const Node *node) const = 0;
+
+ /**
+ * Exceptions
+ * - index SHOULD throw std::invalid_argument exception if a given node is not associated with
+ * any output (i.e. assocaited above returns false).
+ */
+ virtual GraphOutputIndex index(const Node *node) const = 0;
+};
+
+// TODO Use "const Graph *"
+std::vector<Node *> output_nodes(Graph *);
+
+/**
+ * @brief Enumerate all the nodes in a given graph
+ *
+ * NOTE This method returns std::set<Node *> unlike input_nodes and output_nodes.
+ *
+ * Please use traverse algorithms that "Algorithm.h" provides (such as postorder_traversal)
+ * if order is relevant for implementation.
+ */
+std::set<Node *> all_nodes(Graph *);
+
+std::unique_ptr<Graph> make_graph(void);
+
+} // namespace loco
+
+#endif // __LOCO_IR_GRAPH_H__
diff --git a/compiler/loco/include/loco/IR/GraphInputIndex.h b/compiler/loco/include/loco/IR/GraphInputIndex.h
new file mode 100644
index 000000000..3c7ae98ef
--- /dev/null
+++ b/compiler/loco/include/loco/IR/GraphInputIndex.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_GRAPH_INPUT_INDEX_H__
+#define __LOCO_IR_GRAPH_INPUT_INDEX_H__
+
+#include <cstdint>
+
+namespace loco
+{
+
+using GraphInputIndex = uint32_t;
+
+} // namespace loco
+
+#endif // __LOCO_IR_GRAPH_INPUT_INDEX_H__
diff --git a/compiler/loco/include/loco/IR/GraphOutputIndex.h b/compiler/loco/include/loco/IR/GraphOutputIndex.h
new file mode 100644
index 000000000..3231cbd95
--- /dev/null
+++ b/compiler/loco/include/loco/IR/GraphOutputIndex.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_GRAPH_OUTPUT_INDEX_H__
+#define __LOCO_IR_GRAPH_OUTPUT_INDEX_H__
+
+#include <cstdint>
+
+namespace loco
+{
+
+using GraphOutputIndex = uint32_t;
+
+} // namespace loco
+
+#endif // __LOCO_IR_GRAPH_OUTPUT_INDEX_H__
diff --git a/compiler/loco/include/loco/IR/MatrixAxis.h b/compiler/loco/include/loco/IR/MatrixAxis.h
new file mode 100644
index 000000000..8a1689bb3
--- /dev/null
+++ b/compiler/loco/include/loco/IR/MatrixAxis.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_MATRIX_AXIS_H__
+#define __LOCO_IR_MATRIX_AXIS_H__
+
+namespace loco
+{
+
+enum class MatrixAxis
+{
+ Height,
+ Width
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_MATRIX_AXIS_H__
diff --git a/compiler/loco/include/loco/IR/MatrixCodec.h b/compiler/loco/include/loco/IR/MatrixCodec.h
new file mode 100644
index 000000000..40312641a
--- /dev/null
+++ b/compiler/loco/include/loco/IR/MatrixCodec.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_MATRIX_CODEC_H__
+#define __LOCO_IR_MATRIX_CODEC_H__
+
+#include "loco/IR/MatrixShape.h"
+#include "loco/IR/MatrixIndex.h"
+
+#include "loco/IR/TensorShape.h"
+#include "loco/IR/TensorIndex.h"
+
+#include <memory>
+
+namespace loco
+{
+
+/**
+ * @brief Decribe how to build a matrix from a tensor
+ *
+ * Let us assume that "enc" is a matrix encoder.
+ *
+ * Given a tensor "inp" and its shape "inp.shape", "enc" builds a matrix
+ * "out" as follows:
+ *
+ * for each valid matrix index (referred to as matrix_idx below) for enc.shape(inp.shape)
+ * out.at(matrix_index) = inp.at(enc.value(matrix_index))
+ */
+struct MatrixEncoder
+{
+ virtual ~MatrixEncoder() = default;
+
+ virtual MatrixShape shape(const TensorShape &shape) const = 0;
+ virtual TensorIndex value(const MatrixIndex &index) const = 0;
+};
+
+/**
+ * @brief Describe how to build a tensor from a matrix
+ *
+ * Let us assume that "dec" is a matrix decoder.
+ *
+ * Given a matrix "inp" and its shape "inp.shape", "dec" builds a tensor
+ * "out" as follows:
+ *
+ * for each valid tensor index (referred to as tensor_index below) for dec.shape(inp.shape)
+ * out.at(tensor_index) = inp.at(dec.value(tensor_index))
+ *
+ * NOTE "inp" is a matrix value and "out" is a tensor value in this example.
+ */
+struct MatrixDecoder
+{
+ virtual ~MatrixDecoder() = default;
+
+ virtual TensorShape shape(const MatrixShape &) const = 0;
+ virtual MatrixIndex value(const TensorIndex &) const = 0;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_MATRIX_CODEC_H__
diff --git a/compiler/loco/include/loco/IR/MatrixIndex.h b/compiler/loco/include/loco/IR/MatrixIndex.h
new file mode 100644
index 000000000..eb6d65580
--- /dev/null
+++ b/compiler/loco/include/loco/IR/MatrixIndex.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_MATRIX_INDEX_H__
+#define __LOCO_IR_MATRIX_INDEX_H__
+
+#include <cstdint>
+
+namespace loco
+{
+
+/**
+ * @brief Matrix Index
+ *
+ * Matrix Index indicates an "element" in a given Matrix
+ *
+ * Let us assume that there is a Matrix F and S denotes its shape (of MatrixShape type).
+ *
+ * Then, any valid Matrix index I satisfies the following invariants:
+ * - 0 <= I.row() < S.height()
+ * - 0 <= I.column() < S.width()
+ */
+class MatrixIndex final
+{
+public:
+ MatrixIndex() = default;
+
+public:
+ const uint32_t &row(void) const { return _row; }
+ uint32_t &row(void) { return _row; }
+
+ const uint32_t &column(void) const { return _column; }
+ uint32_t &column(void) { return _column; }
+
+private:
+ uint32_t _row = 0;
+ uint32_t _column = 0;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_MATRIX_INDEX_H__
diff --git a/compiler/loco/include/loco/IR/MatrixShape.h b/compiler/loco/include/loco/IR/MatrixShape.h
new file mode 100644
index 000000000..512691beb
--- /dev/null
+++ b/compiler/loco/include/loco/IR/MatrixShape.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_MATRIX_SHAPE_H__
+#define __LOCO_IR_MATRIX_SHAPE_H__
+
+#include "loco/IR/Dimension.h"
+
+namespace loco
+{
+
+/**
+ * @brief Matrix Shape
+ *
+ * This class describes the shape of matrix, which serves as the input/output of
+ * matrix operations (e.g. Matrix Multiplication).
+ *
+ * Each matrix is a collection of 2D features conceptually.
+ * Each matrix has height, width.
+ *
+ * height() refers to the height of matrix in a given matrix
+ * width() refers to the width of matrix in a given matrix
+ */
+class MatrixShape final
+{
+public:
+ MatrixShape() = default;
+
+public:
+ const Dimension &height(void) const { return _height; }
+ Dimension &height(void) { return _height; }
+
+ const Dimension &width(void) const { return _width; }
+ Dimension &width(void) { return _width; }
+
+private:
+ Dimension _height;
+ Dimension _width;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_MATRIX_SHAPE_H__
diff --git a/compiler/loco/include/loco/IR/Node.forward.h b/compiler/loco/include/loco/IR/Node.forward.h
new file mode 100644
index 000000000..425b28aff
--- /dev/null
+++ b/compiler/loco/include/loco/IR/Node.forward.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_NODE_FORWARD_H__
+#define __LOCO_IR_NODE_FORWARD_H__
+
+namespace loco
+{
+
+// NOTE This forward declaration SHOULD BE aligned with Node delcaration in "Node.h"
+class Node;
+
+} // namespace loco
+
+#endif // __LOCO_IR_NODE_FORWARD_H__
diff --git a/compiler/loco/include/loco/IR/Node.h b/compiler/loco/include/loco/IR/Node.h
new file mode 100644
index 000000000..ef0bf238d
--- /dev/null
+++ b/compiler/loco/include/loco/IR/Node.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_NODE_H__
+#define __LOCO_IR_NODE_H__
+
+#include "loco/ADT/AnnotatedItem.h"
+
+#include "loco/IR/Use.h"
+#include "loco/IR/Dialect.h"
+#include "loco/IR/NodePool.forward.h"
+#include "loco/IR/Graph.forward.h"
+
+#include <array>
+#include <memory>
+#include <set>
+
+namespace loco
+{
+
+/**
+ * @brief Extensible Node Metadata
+ */
+struct NodeAnnotation
+{
+ virtual ~NodeAnnotation() = default;
+};
+
+enum class SubstQualifier
+{
+ Default, // Replace all the occurrences as "Use" (by default)
+};
+
+template <SubstQualifier Q> class Subst;
+
+/**
+ * @brief Logical unit of computation
+ */
+class Node : public AnnotatedItem<NodeAnnotation>
+{
+public:
+ friend class Use;
+ friend class Subst<SubstQualifier::Default>;
+ friend class NodePool;
+ friend std::set<Node *> succs(const Node *node);
+
+public:
+ Node() = default;
+
+ Node(const Node &) = delete;
+ Node(Node &&) = delete;
+
+ virtual ~Node();
+
+public:
+ Graph *graph(void) { return _graph; }
+ const Graph *graph(void) const { return _graph; }
+
+private:
+ /**
+ * @brief Set associated "Graph"
+ *
+ * @note Only "NodePool" class is permitted to invoke this private method.
+ */
+ void graph(Graph *g) { _graph = g; }
+
+public:
+ /**
+ * @brief Return "Dialect" identifier that this node belongs to
+ *
+ * dialect() SHOULD return a valid pointer.
+ */
+ virtual const Dialect *dialect(void) const = 0;
+
+ virtual uint32_t opnum(void) const = 0;
+
+public:
+ /// @brief Return the number of arguments
+ virtual uint32_t arity(void) const = 0;
+
+ /// @brief Access N-th argument node
+ virtual Node *arg(uint32_t N) const = 0;
+
+ /**
+ * @brief Drop all the reference of arguments
+ *
+ * arg(n) SHOULD return nullptr for every valid n after drop() call.
+ */
+ virtual void drop(void) = 0;
+
+private:
+ /**
+ * @brief Associated Graph
+ *
+ * May be nullptr if no associated Graph exists.
+ */
+ Graph *_graph = nullptr;
+
+ /**
+ * @brief The edges to a node that uses this node as its argument
+ *
+ * @note "succs" function below accesses this private field.
+ */
+ std::set<Use *> _uses;
+};
+
+/// @brief Enumerate all the predecessors of a given node
+std::set<Node *> preds(const Node *node);
+/// @brief Enumerate all the successors of a given node
+std::set<Node *> succs(const Node *node);
+
+/**
+ * @brief A helper for below "replace" helper
+ */
+template <> class Subst<SubstQualifier::Default>
+{
+public:
+ friend Subst<SubstQualifier::Default> replace(Node *node);
+
+private:
+ explicit Subst(Node *from);
+
+public:
+ void with(Node *into) const;
+
+private:
+ Node *_from;
+};
+
+Subst<SubstQualifier::Default> replace(Node *node);
+
+} // namespace loco
+
+#endif // __LOCO_IR_NODE_H__
diff --git a/compiler/loco/include/loco/IR/NodeMixins.h b/compiler/loco/include/loco/IR/NodeMixins.h
new file mode 100644
index 000000000..f0e34b0ba
--- /dev/null
+++ b/compiler/loco/include/loco/IR/NodeMixins.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_NODE_MIXINS_H__
+#define __LOCO_IR_NODE_MIXINS_H__
+
+#include "loco/IR/Node.h"
+#include "loco/IR/DataType.h"
+#include "loco/IR/Dimension.h"
+
+#include <vector>
+#include <initializer_list>
+
+namespace loco
+{
+
+enum class NodeTrait
+{
+ DataType,
+ // Nodes with TensorShape trait will provide the following methods:
+ // - rank()
+ // - rank(value)
+ // - dim()
+ // - dim(value)
+ // - shape({...})
+ TensorShape,
+};
+
+template <NodeTrait T> class NodeMixin;
+
+template <> class NodeMixin<NodeTrait::DataType>
+{
+public:
+ NodeMixin() = default;
+
+public:
+ const DataType &dtype(void) const { return _dtype; }
+ void dtype(const DataType &dtype) { _dtype = dtype; }
+
+private:
+ /// @brief Data type
+ DataType _dtype{DataType::Unknown};
+};
+
+template <> class NodeMixin<NodeTrait::TensorShape>
+{
+public:
+ NodeMixin() = default;
+
+public:
+ uint32_t rank(void) const { return _dims.size(); }
+ void rank(uint32_t value) { _dims.resize(value); }
+
+ const Dimension &dim(uint32_t axis) const { return _dims.at(axis); }
+ Dimension &dim(uint32_t axis) { return _dims.at(axis); }
+
+ void shape(std::initializer_list<uint32_t> dims)
+ {
+ rank(dims.size());
+
+ uint32_t axis = 0;
+ for (auto d : dims)
+ {
+ dim(axis++) = d;
+ }
+ }
+
+private:
+ /// @brief Data shape (as tensor)
+ std::vector<Dimension> _dims;
+};
+
+template <unsigned N> struct FixedArity
+{
+ template <typename Base> class Mixin : public virtual Base
+ {
+ public:
+ Mixin()
+ {
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ _args[n] = std::unique_ptr<Use>{new Use{this}};
+ }
+ }
+
+ virtual ~Mixin() = default;
+
+ public:
+ unsigned arity(void) const final { return N; }
+
+ Node *arg(uint32_t n) const final { return _args.at(n)->node(); }
+
+ void drop(void) final
+ {
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ _args.at(n)->node(nullptr);
+ }
+ }
+
+ protected:
+ // This API allows inherited classes to access "_args" field.
+ Use *at(unsigned n) const { return _args.at(n).get(); }
+
+ private:
+ std::array<std::unique_ptr<Use>, N> _args{};
+ };
+};
+
+template <NodeTrait Trait> struct With
+{
+ template <typename Base> struct Mixin : public virtual Base, public NodeMixin<Trait>
+ {
+ // DO NOTHING
+ };
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_NODE_MIXINS_H__
diff --git a/compiler/loco/include/loco/IR/NodePool.forward.h b/compiler/loco/include/loco/IR/NodePool.forward.h
new file mode 100644
index 000000000..87bf01311
--- /dev/null
+++ b/compiler/loco/include/loco/IR/NodePool.forward.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_NODE_POOL_FORWARD_H__
+#define __LOCO_IR_NODE_POOL_FORWARD_H__
+
+namespace loco
+{
+
+// This forward declaration SHOULD BE aligned with the actual declaration in "NodePool.h".
+class NodePool;
+
+} // namespace loco
+
+#endif // __LOCO_IR_NODE_POOL_FORWARD_H__
diff --git a/compiler/loco/include/loco/IR/NodePool.h b/compiler/loco/include/loco/IR/NodePool.h
new file mode 100644
index 000000000..4db4caae3
--- /dev/null
+++ b/compiler/loco/include/loco/IR/NodePool.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_NODE_POOL_H__
+#define __LOCO_IR_NODE_POOL_H__
+
+#include "loco/IR/Node.h"
+#include "loco/IR/Graph.forward.h"
+
+#include "loco/ADT/ObjectPool.h"
+
+namespace loco
+{
+
+class NodePool final : public ObjectPool<Node>
+{
+public:
+ friend class Graph;
+
+public:
+ ~NodePool();
+
+public:
+ template <typename Derived, typename... Args> Derived *create(Args &&... args)
+ {
+ std::unique_ptr<Derived> ptr{new Derived(std::forward<Args>(args)...)};
+ ptr->graph(_graph);
+ return ObjectPool<Node>::take<Derived>(std::move(ptr));
+ }
+
+ void destroy(Node *node)
+ {
+ if (!ObjectPool<Node>::erase(node))
+ {
+ throw std::invalid_argument{"node"};
+ }
+ }
+
+private:
+ /// Only "Graph" is permitted to invoke this private method.
+ void graph(Graph *g) { _graph = g; }
+
+private:
+ Graph *_graph = nullptr;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_NODE_POOL_H__
diff --git a/compiler/loco/include/loco/IR/NodeShape.h b/compiler/loco/include/loco/IR/NodeShape.h
new file mode 100644
index 000000000..5eefd3c19
--- /dev/null
+++ b/compiler/loco/include/loco/IR/NodeShape.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_NODE_SHAPE_H__
+#define __LOCO_IR_NODE_SHAPE_H__
+
+#include "loco/IR/Domain.h"
+
+#include "loco/IR/BiasShape.h"
+#include "loco/IR/DepthwiseFilterShape.h"
+#include "loco/IR/FeatureShape.h"
+#include "loco/IR/FilterShape.h"
+#include "loco/IR/MatrixShape.h"
+#include "loco/IR/TensorShape.h"
+
+#include <vector>
+
+namespace loco
+{
+
+class NodeShape final
+{
+public:
+ NodeShape() = default;
+
+public:
+ NodeShape(const BiasShape &shape) { set(shape); }
+ NodeShape(const DepthwiseFilterShape &shape) { set(shape); }
+ NodeShape(const FeatureShape &shape) { set(shape); }
+ NodeShape(const FilterShape &shape) { set(shape); }
+ NodeShape(const MatrixShape &shape) { set(shape); }
+ NodeShape(const TensorShape &shape) { set(shape); }
+
+public:
+ const Domain &domain(void) const { return _domain; }
+
+public:
+ void set(const BiasShape &);
+ void set(const DepthwiseFilterShape &);
+ void set(const FeatureShape &);
+ void set(const FilterShape &);
+ void set(const MatrixShape &);
+ void set(const TensorShape &);
+
+public:
+ template <typename ShapeType> ShapeType as(void) const;
+
+private:
+ Domain _domain = Domain::Unknown;
+ std::vector<Dimension> _dims;
+};
+
+bool operator==(const NodeShape &lhs, const NodeShape &rhs);
+
+} // namespace loco
+
+#endif // __LOCO_IR_NODE_SHAPE_H__
diff --git a/compiler/loco/include/loco/IR/Nodes.h b/compiler/loco/include/loco/IR/Nodes.h
new file mode 100644
index 000000000..9aac48b6e
--- /dev/null
+++ b/compiler/loco/include/loco/IR/Nodes.h
@@ -0,0 +1,1123 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_NODES_H__
+#define __LOCO_IR_NODES_H__
+
+#include "loco/IR/Node.h"
+#include "loco/IR/Use.h"
+#include "loco/IR/Domain.h"
+#include "loco/IR/DataType.h"
+#include "loco/IR/DataTypeTraits.h"
+#include "loco/IR/Dimension.h"
+#include "loco/IR/Window.h"
+#include "loco/IR/Stride.h"
+#include "loco/IR/Padding2D.h"
+#include "loco/IR/PaddingND.h"
+#include "loco/IR/TensorAxis.h"
+#include "loco/IR/TensorAxisSet.h"
+#include "loco/IR/FeatureCodec.h"
+#include "loco/IR/FilterCodec.h"
+#include "loco/IR/DepthwiseFilterCodec.h"
+#include "loco/IR/MatrixCodec.h"
+#include "loco/IR/NodeMixins.h"
+#include "loco/IR/CanonicalNodeDecl.h"
+#include "loco/IR/GraphInputIndex.h"
+#include "loco/IR/GraphOutputIndex.h"
+
+namespace loco
+{
+
+class Graph;
+class GraphInput;
+class GraphOutput;
+
+/**
+ * @brief Make a value visible to user
+ */
+class Push /* to user */ final
+ : public CanonicalNodeDef<CanonicalOpcode::Push, FixedArity<1>::Mixin>
+{
+public:
+ Push() = default;
+
+public:
+ Node *from(void) const { return at(0)->node(); }
+ void from(Node *node) { at(0)->node(node); }
+
+public:
+ void index(const GraphOutputIndex &index);
+
+ /**
+ * @brief Get associated output index
+ *
+ * The behavior of this method is undefined when "index" is not set before.
+ *
+ * NOTE This method intentionally returns "GraphOutputIndex" instead of "const GraphOutputIndex &"
+ * not to expose the internal implementation details.
+ */
+ GraphOutputIndex index(void) const;
+
+ /**
+ * @brief Check whether index is initialized
+ *
+ * NOTE "indexed" method does not validate whether index is in a valid range
+ */
+ bool indexed(void) const { return _index != -1; }
+
+private:
+ int64_t _index = -1; // Uninitialized
+};
+
+void link(GraphOutput *, Push *push);
+
+/// @brief Find a Push node with a given output index
+Push *push_node(Graph *g, const GraphOutputIndex &index);
+
+/**
+ * @brief Create a value from user data
+ */
+class Pull /* from user */ final
+ : public CanonicalNodeDef<CanonicalOpcode::Pull, FixedArity<0>::Mixin,
+ With<NodeTrait::TensorShape>::Mixin>
+{
+public:
+ Pull() = default;
+
+public:
+ void index(const GraphInputIndex &index);
+
+ /**
+ * @brief Get associated input index
+ *
+ * The behavior of this method is undefined when "index" is not set before.
+ *
+ * NOTE This method intentionally returns "GraphInputIndex" instead of "const GraphInputIndex &"
+ * not to expose the internal implementation details.
+ */
+ GraphInputIndex index(void) const;
+
+ /**
+ * @brief Check whether index is initialized
+ *
+ * NOTE "indexed" method does not validate whether index is in a valid range
+ */
+ bool indexed(void) const { return _index != -1; }
+
+public:
+ void dtype(const DataType &d);
+ DataType dtype(void) const;
+
+private:
+ int64_t _index = -1; // Uninitialized
+
+ /**
+ * @brief Locally cached data type attribute
+ *
+ * TODO Remove this cache once all the clients are updated
+ */
+ DataType _dtype = DataType::Unknown;
+};
+
+void link(GraphInput *, Pull *pull);
+
+/// @brief Find a Pull node with a given input index
+Pull *pull_node(Graph *g, const GraphInputIndex &index);
+
+/**
+ * @brief Create a new value identical to its input
+ *
+ * This node may encode memory transfer (such as CPU -> GPU or GPU -> CPU)
+ */
+class Forward final : public CanonicalNodeDef<CanonicalOpcode::Forward, FixedArity<1>::Mixin>
+{
+public:
+ Forward() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+};
+
+/**
+ * @brief Create a new value that rectifies its input
+ */
+class ReLU final : public CanonicalNodeDef<CanonicalOpcode::ReLU, FixedArity<1>::Mixin>
+{
+public:
+ ReLU() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+};
+
+/**
+ * @brief Create a new value that rectifies its input capping the units at 6.
+ */
+class ReLU6 final : public CanonicalNodeDef<CanonicalOpcode::ReLU6, FixedArity<1>::Mixin>
+{
+public:
+ ReLU6() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+};
+
+/**
+ * @brief Create a new value that rectifies its input by tanh
+ */
+class Tanh final : public CanonicalNodeDef<CanonicalOpcode::Tanh, FixedArity<1>::Mixin>
+{
+public:
+ Tanh() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+};
+
+/**
+ * @brief Create a value from constant byte array
+ *
+ * @note ConstGen assumes "lexical memory layout".
+ *
+ * Let us assume that a 'ConstGen' generates a constant tensor of shape "S".
+ * for each valid index I, the corresponding value comes from offset(S, I)
+ * where the implementation of "offset" is given as follows:
+ *
+ * uint32_t stride(TensorShape shape, uint32_t axis) {
+ * uint32_t res = 1;
+ * for (uint32_t n = rank(shape) - 1; n > axis; --n) { res *= shape.dim(n); }
+ * return res;
+ * }
+ *
+ * uint32_t offset(TensorShape shape, TensorIndex index) {
+ * uint32_t res = 0;
+ * for (uint32_t n = 0; n < rank(shape); ++n) { res += index.at(n) * stride(shape, n); }
+ * return res;
+ * }
+ */
+class ConstGen final
+ : public CanonicalNodeDef<CanonicalOpcode::ConstGen, FixedArity<0>::Mixin,
+ With<NodeTrait::DataType>::Mixin, With<NodeTrait::TensorShape>::Mixin>
+{
+public:
+ ConstGen() = default;
+
+public:
+ /**
+ * @brief Return the number of reserved elements
+ * @note This method returns the number of ELEMENT (not BYTE).
+ */
+ template <DataType DT> uint32_t size(void) const;
+
+ /**
+ * @brief Adjust the number of reserved elements
+ */
+ template <DataType DT> void size(uint32_t size);
+
+ /**
+ * @brief Get the element at a given position
+ * @require at(n) is valid only when n < size()
+ */
+ template <DataType DT> const typename DataTypeImpl<DT>::Type &at(uint32_t n) const;
+
+ /**
+ * @brief Update the element at a given position
+ * @require at(n) is valid only when n < size()
+ */
+ template <DataType DT> typename DataTypeImpl<DT>::Type &at(uint32_t n);
+
+private:
+ /// @brief Data
+ std::vector<uint8_t> _data;
+};
+
+/**
+ * @brief 2D Max Pooling
+ *
+ * MaxPool2D takes as input a feature map, and produces another feature map
+ *
+ * ---
+ * Any valid MaxPool2D nodes SHOULD satisfy the following conditions.
+ *
+ * Let us define several helper functions that takes a MaxPool2D nodes first:
+ * - IFM_DOMAIN returns the domain of its input
+ * - IFM_H returns the height of its input.
+ * - IFM_W returns the width of its input.
+ * - PAD_T returns the top padding required over its input
+ * - PAD_B returns the bottom padding required over its input
+ * - PAD_L returns the left padding required over its input
+ * - PAD_R returns the right padding required over its input
+ * - WIN_H returns the height of its receptive field.
+ * - WIN_W returns the width of its receptive field.
+ * - STRIDE_H returns the vertical(= on height) stride.
+ * - STRIDE_W returns the horizontal(= on width) stride.
+ *
+ * Condition 1
+ * Statement
+ *
+ * A valid MaxPool2D node M SHOULD satisfy the following condition:
+ * - IFM_DOMAIN(M) == Feature
+ *
+ * Motivation
+ *
+ * There are many possible ways to encode a feature map as a tensor.
+ * - e.g. NCHW/NHWC/...
+ *
+ * In order to give some freedom on memory layout to backend, loco requires a feature map
+ * value to be explicitly encoded via FeatureEncode.
+ *
+ * Condition 2:
+ * Statement
+ *
+ * A valid MaxPool2D node M SHOULD satisfy the following conditions:
+ * - (IFM_H(M) + PAD_T(M) + PAD_B(M) - WIN_H(M)) % STRIDE_H(M) == 0
+ * - (IFM_W(M) + PAD_L(M) + PAD_R(M) - WIN_W(M)) % STRIDE_W(M) == 0
+ *
+ * Motivation
+ *
+ * The output shape may differ for each NN framework when these conditions do not hold.
+ *
+ * In order to mitigate such a difference among NN frameworks, loco requires these conditions
+ * for MaxPool2D nodes.
+ *
+ * This means that each frontend implementation SHOULD insert appropriate padding/trimming node
+ * before/after MaxPool2D node according to the semantics of the corresponding NN framework.
+ * ---
+ */
+class MaxPool2D final : public CanonicalNodeDef<CanonicalOpcode::MaxPool2D, FixedArity<1>::Mixin>
+{
+public:
+ Node *ifm(void) const { return at(0)->node(); }
+ void ifm(Node *node) { at(0)->node(node); }
+
+public:
+ const Padding2D *pad(void) const { return &_pad; }
+ Padding2D *pad(void) { return &_pad; }
+
+public:
+ const Window<2> *window(void) const { return &_window; }
+ Window<2> *window(void) { return &_window; }
+
+public:
+ const Stride<2> *stride(void) const { return &_stride; }
+ Stride<2> *stride(void) { return &_stride; }
+
+private:
+ // Pad
+ Padding2D _pad;
+ // Window
+ Window<2> _window;
+ // Stride
+ Stride<2> _stride;
+};
+
+/**
+ * @brief 2D Average Pooling
+ *
+ * @note Follows MaxPool2D (TODO: describe difference)
+ */
+class AvgPool2D final : public CanonicalNodeDef<CanonicalOpcode::AvgPool2D, FixedArity<1>::Mixin>
+{
+public:
+ enum class Convention
+ {
+ Unknown,
+ // Use the number of elements in each receptive field as a divisor
+ Full,
+ // Use the number of valid (non-padding) elements in each receptive field as a divisor
+ Valid
+ };
+
+public:
+ Node *ifm(void) const { return at(0)->node(); }
+ void ifm(Node *node) { at(0)->node(node); }
+
+public:
+ Convention convention(void) const { return _convention; }
+ void convention(const Convention &convention) { _convention = convention; }
+
+public:
+ const Padding2D *pad(void) const { return &_pad; }
+ Padding2D *pad(void) { return &_pad; }
+
+public:
+ const Window<2> *window(void) const { return &_window; }
+ Window<2> *window(void) { return &_window; }
+
+public:
+ const Stride<2> *stride(void) const { return &_stride; }
+ Stride<2> *stride(void) { return &_stride; }
+
+private:
+ Convention _convention = Convention::Unknown;
+ Padding2D _pad;
+ Window<2> _window;
+ Stride<2> _stride;
+};
+
+/**
+ * @brief Create a feature map from a tensor
+ */
+class FeatureEncode final
+ : public CanonicalNodeDef<CanonicalOpcode::FeatureEncode, FixedArity<1>::Mixin>
+{
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+
+public:
+ FeatureEncoder *encoder(void) const { return _enc.get(); }
+ void encoder(std::unique_ptr<FeatureEncoder> &&enc) { _enc = std::move(enc); }
+
+private:
+ /// @note "encoder" is mandatory
+ std::unique_ptr<FeatureEncoder> _enc{nullptr};
+};
+
+/**
+ * @brief Create a tensor from a feature map
+ */
+class FeatureDecode final
+ : public CanonicalNodeDef<CanonicalOpcode::FeatureDecode, FixedArity<1>::Mixin>
+{
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+
+public:
+ FeatureDecoder *decoder(void) const { return _dec.get(); }
+ void decoder(std::unique_ptr<FeatureDecoder> &&dec) { _dec = std::move(dec); }
+
+private:
+ /// @NOTE "decoder" is mandatory
+ std::unique_ptr<FeatureDecoder> _dec{nullptr};
+};
+
+/**
+ * @brief Create a filter from a tensor
+ */
+class FilterEncode final
+ : public CanonicalNodeDef<CanonicalOpcode::FilterEncode, FixedArity<1>::Mixin>
+{
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+
+public:
+ FilterEncoder *encoder(void) const { return _enc.get(); }
+ void encoder(std::unique_ptr<FilterEncoder> &&enc) { _enc = std::move(enc); }
+
+private:
+ /// @note "encoder" is mandatory
+ std::unique_ptr<FilterEncoder> _enc{nullptr};
+};
+
+/**
+ * @brief Create a tensor from a filter
+ */
+class FilterDecode final
+ : public CanonicalNodeDef<CanonicalOpcode::FilterDecode, FixedArity<1>::Mixin>
+{
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+
+public:
+ FilterDecoder *decoder(void) const { return _dec.get(); }
+ void decoder(std::unique_ptr<FilterDecoder> &&dec) { _dec = std::move(dec); }
+
+private:
+ /// @note "decoder" is mandatory
+ std::unique_ptr<FilterDecoder> _dec{nullptr};
+};
+
+/**
+ * @brief Create a depthwise filter from a tensor
+ */
+class DepthwiseFilterEncode final
+ : public CanonicalNodeDef<CanonicalOpcode::DepthwiseFilterEncode, FixedArity<1>::Mixin>
+{
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+
+public:
+ DepthwiseFilterEncoder *encoder(void) const { return _enc.get(); }
+ void encoder(std::unique_ptr<DepthwiseFilterEncoder> &&enc) { _enc = std::move(enc); }
+
+private:
+ /// @note "encoder" is mandatory
+ std::unique_ptr<DepthwiseFilterEncoder> _enc{nullptr};
+};
+
+/**
+ * @brief Create a tensor from a depthwise filter
+ */
+class DepthwiseFilterDecode final
+ : public CanonicalNodeDef<CanonicalOpcode::DepthwiseFilterDecode, FixedArity<1>::Mixin>
+{
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+
+public:
+ DepthwiseFilterDecoder *decoder(void) const { return _dec.get(); }
+ void decoder(std::unique_ptr<DepthwiseFilterDecoder> &&dec) { _dec = std::move(dec); }
+
+private:
+ /// @note "decoder" is mandatory
+ std::unique_ptr<DepthwiseFilterDecoder> _dec{nullptr};
+};
+
+enum class ReshapeType
+{
+ Fixed, // shape is known at compile time
+ // Add another type for a case when shape is not known at compile time
+};
+
+template <ReshapeType RT> class Reshape;
+
+/**
+ * @brief Reshape a tensor to another tensor whose shape is known at compile time
+ *
+ * @note This class reshapes the shape of an input tensor to _shape.
+ * Each dimension of _shape should be known at compile time.
+ * Any dimension of _shape should be greater than 0.
+ *
+ * Interpreter or runtime should lexicographically copy an input tensor into an output tensor.
+ * For example, values of an input tesor of shape [2, 2, 2, 2] will be copied into an output
+ * tensor of new shape [4, 4] like the following:
+ * input[0, 0, 0, 0] => output [0, 0]
+ * input[0, 0, 0, 1] => output [0, 1]
+ * input[0, 0, 1, 0] => output [0, 2]
+ * ...
+ * input[1, 1, 1, 1] => output [3, 3]
+ */
+template <>
+class Reshape<ReshapeType::Fixed> final
+ : public CanonicalNodeDef<CanonicalOpcode::FixedReshape, FixedArity<1>::Mixin,
+ With<NodeTrait::TensorShape>::Mixin>
+{
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+};
+
+using FixedReshape = Reshape<ReshapeType::Fixed>;
+
+/**
+ * @brief Concatenate two tensors
+ *
+ * Given an axis, TensorConcat takes as input two tensors and produces a tensor
+ * concatenated along the given axis.
+ */
+class TensorConcat final
+ : public CanonicalNodeDef<CanonicalOpcode::TensorConcat, FixedArity<2>::Mixin>
+{
+public:
+ Node *lhs(void) const { return at(0)->node(); }
+ void lhs(Node *node) { at(0)->node(node); }
+
+ Node *rhs(void) const { return at(1)->node(); }
+ void rhs(Node *node) { at(1)->node(node); }
+
+public:
+ uint32_t axis(void) const { return _axis; }
+ void axis(uint32_t val) { _axis = val; }
+
+private:
+ // Axis
+ uint32_t _axis{0};
+};
+
+/**
+ * @brief 2D Spatial Convolution
+ */
+class Conv2D final : public CanonicalNodeDef<CanonicalOpcode::Conv2D, FixedArity<2>::Mixin>
+{
+public:
+ Node *ifm(void) const { return at(0)->node(); }
+ void ifm(Node *node) { at(0)->node(node); }
+
+ Node *ker(void) const { return at(1)->node(); }
+ void ker(Node *node) { at(1)->node(node); }
+
+public:
+ const Padding2D *pad(void) const { return &_pad; }
+ Padding2D *pad(void) { return &_pad; }
+
+public:
+ const Stride<2> *stride(void) const { return &_stride; }
+ Stride<2> *stride(void) { return &_stride; }
+
+private:
+ Padding2D _pad;
+ Stride<2> _stride;
+
+ // TODO Support "Dilation"
+};
+
+/**
+ * @brief Depthwise 2D Convolution
+ */
+class DepthwiseConv2D final
+ : public CanonicalNodeDef<CanonicalOpcode::DepthwiseConv2D, FixedArity<2>::Mixin>
+{
+public:
+ Node *ifm(void) const { return at(0)->node(); }
+ void ifm(Node *node) { at(0)->node(node); }
+
+ Node *ker(void) const { return at(1)->node(); }
+ void ker(Node *node) { at(1)->node(node); }
+
+public:
+ const Padding2D *pad(void) const { return &_pad; }
+ Padding2D *pad(void) { return &_pad; }
+
+public:
+ const Stride<2> *stride(void) const { return &_stride; }
+ Stride<2> *stride(void) { return &_stride; }
+
+private:
+ Padding2D _pad;
+ Stride<2> _stride;
+
+ // TODO Support "Dilation"
+};
+
+/**
+ * @brief Reduce type functions
+ */
+enum class ReduceFunc
+{
+ Mean, // ReduceMean
+ // TODO Support other reduce operations
+};
+
+/**
+ * @brief Computes ReduceFunc operations for Tensor domain
+ * @note All the reduce functions always keep dimensions
+ */
+class TensorReduce final
+ : public CanonicalNodeDef<CanonicalOpcode::TensorReduce, FixedArity<1>::Mixin>
+{
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+
+public:
+ const TensorAxisSet *axes(void) const { return &_axes; }
+ TensorAxisSet *axes(void) { return &_axes; }
+
+public:
+ ReduceFunc func(void) const { return _func; }
+ void func(ReduceFunc func) { _func = func; }
+
+private:
+ TensorAxisSet _axes;
+ ReduceFunc _func;
+};
+
+/**
+ * @brief 2D Transposed Convolution
+ *
+ * @note TransposedConv2D have a few important conventions that IR users should
+ * understand and follow, so please check below notice carefully.
+ *
+ *
+ * 1. What is 'input' and 'output'
+ *
+ * For loco canonical TransposedConv2D, 'input' and 'output' mean actual input
+ * and output node of TransposedConv2D node. Be careful that some other
+ * frameworks may use opposite sense, especially TensorFlow which is inspired by
+ * backpropagation of convolution.
+ * For example, loco::TransposedConv2D::ifm() means actual input feature map
+ * node that is sourced into TransposedConv2D.
+ *
+ * 2. How to read kernel representation
+ *
+ * TransposedConv2D::ker() should be a node of Filter domain. Following is what
+ * each FilterAxis means as a kernel of TransposedConv2D:
+ * - FilterAxis::Height : kernel's height
+ * - FilterAxis::Width : kernel's width
+ * - FilterAxis::Depth : IFM's channel depth
+ * - FilterAxis::Count : OFM's channel depth
+ * TODO We may refactor FilterAxis as follow to reduce ambiguity:
+ * - FilterAxis::Height -> FilterAxis::H
+ * - FilterAxis::Width -> FilterAxis::W
+ * - FilterAxis::Depth -> FilterAxis::I
+ * - FilterAxis::Count -> FilterAxis::O
+ *
+ *
+ * 3. Tight fit rule
+ *
+ * TransposedConv2D have no information about its output shape. Instead, it
+ * always satisfy following 'tight fit' rule for horizontal and vertical
+ * dimension:
+ *
+ * O = S * ( I - 1 ) + F - P
+ *
+ * where
+ * O: output size
+ * S: stride
+ * I: input size
+ * F: effective kernal(filter) size
+ * P: whole pad size (= front + rear pad)
+ *
+ * With this, output shape is uniquely determined by all inputs and attributes.
+ */
+class TransposedConv2D final
+ : public CanonicalNodeDef<CanonicalOpcode::TransposedConv2D, FixedArity<2>::Mixin>
+{
+public:
+ Node *ifm(void) const { return at(0)->node(); }
+ void ifm(Node *node) { at(0)->node(node); }
+
+ Node *ker(void) const { return at(1)->node(); }
+ void ker(Node *node) { at(1)->node(node); }
+
+public:
+ const Padding2D *pad(void) const { return &_pad; }
+ Padding2D *pad(void) { return &_pad; }
+
+public:
+ const Stride<2> *stride(void) const { return &_stride; }
+ Stride<2> *stride(void) { return &_stride; }
+
+private:
+ Padding2D _pad;
+ Stride<2> _stride;
+
+ // TODO Support "Dilation"
+};
+
+/**
+ * @brief Computes softmax activations
+ */
+template <Domain D> class Softmax;
+
+/**
+* @brief Computes softmax activations for Tensor domain
+*/
+template <>
+class Softmax<Domain::Tensor> final
+ : public CanonicalNodeDef<CanonicalOpcode::TensorSoftmax, FixedArity<1>::Mixin>
+{
+public:
+ Softmax() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { return at(0)->node(node); }
+
+ uint32_t axis(void) const { return _axis; }
+ void axis(uint32_t axis) { _axis = axis; }
+
+private:
+ uint32_t _axis = 0;
+};
+
+using TensorSoftmax = Softmax<Domain::Tensor>;
+
+/**
+ * @brief Create a "Tensor" from a "Bias"
+ */
+class BiasDecode final : public CanonicalNodeDef<CanonicalOpcode::BiasDecode, FixedArity<1>::Mixin>
+{
+public:
+ BiasDecode() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+};
+
+/**
+ * @brief Create a "Bias" from a "Tensor"
+ *
+ * BiasEncode currently requires a rank-1 tensor as its input.
+ */
+class BiasEncode final : public CanonicalNodeDef<CanonicalOpcode::BiasEncode, FixedArity<1>::Mixin>
+{
+public:
+ BiasEncode() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+};
+
+/**
+ * @brief Produce a value of domain D from an input value (of domain D) and a bias
+ */
+template <Domain D> class BiasAdd;
+
+/**
+ * @brief Add Tensor and Bias
+ *
+ * for each valid tensor index I
+ * out(I) = value(I) + bias(I.at(axis))
+ */
+template <>
+class BiasAdd<Domain::Tensor> final
+ : public CanonicalNodeDef<CanonicalOpcode::TensorBiasAdd, FixedArity<2>::Mixin>
+{
+public:
+ BiasAdd() = default;
+
+public:
+ Node *value(void) const { return at(0)->node(); }
+ void value(Node *node) { return at(0)->node(node); }
+
+ Node *bias(void) const { return at(1)->node(); }
+ void bias(Node *node) { return at(1)->node(node); }
+
+ uint32_t axis(void) const { return _axis; }
+ void axis(uint32_t axis) { _axis = axis; }
+
+private:
+ uint32_t _axis = 0;
+};
+
+//
+// Alias for external users
+//
+// loco::TensorBiasAdd
+// vs.
+// loco::BiasAdd<loco::Domain::Tensor>
+//
+using TensorBiasAdd = BiasAdd<Domain::Tensor>;
+
+/**
+ * @brief Add Feature and Bias along "depth" axis
+ *
+ * for each valid feature index (b, ch, row, col)
+ * out(b, ch, row, col) = value(b, ch, row, col) + bias(ch)
+ */
+template <>
+class BiasAdd<Domain::Feature> final
+ : public CanonicalNodeDef<CanonicalOpcode::FeatureBiasAdd, FixedArity<2>::Mixin>
+{
+public:
+ BiasAdd() = default;
+
+public:
+ Node *value(void) const { return at(0)->node(); }
+ void value(Node *node) { return at(0)->node(node); }
+
+ Node *bias(void) const { return at(1)->node(); }
+ void bias(Node *node) { return at(1)->node(node); }
+};
+
+using FeatureBiasAdd = BiasAdd<Domain::Feature>;
+
+/**
+ * @brief Pads a tensor with constant value
+ *
+ * Pads a input tensor according to the padding with constant value.
+ *
+ * The dimension of each axis n of the output is
+ * output.dim(n) = padding.front(n) + input.dim(n) + padding.back(n)
+ *
+ * For example, input tensor of shape [1, 2] with
+ *
+ * padding.front(0) = 1;
+ * padding.back(0) = 2;
+ *
+ * padding.front(1) = 3;
+ * padding.back(1) = 4;
+ *
+ * will be a output tensor of shape
+ * [padding.front(0) + 1 + padding.back(0), padding.front(1) + 2 + padding.back(1)] = [4,9].
+ */
+class TensorConstantPad final
+ : public CanonicalNodeDef<CanonicalOpcode::TensorConstantPad, FixedArity<2>::Mixin>
+{
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+
+ Node *constant(void) const { return at(1)->node(); }
+ void constant(Node *node) { at(1)->node(node); }
+
+public:
+ const PaddingND *padding(void) const { return &_padding; }
+ PaddingND *padding(void) { return &_padding; }
+
+private:
+ PaddingND _padding;
+};
+
+/**
+ * @brief Elementwise Add lhs and rhs
+ */
+class EltwiseAdd final : public CanonicalNodeDef<CanonicalOpcode::EltwiseAdd, FixedArity<2>::Mixin>
+{
+public:
+ EltwiseAdd() = default;
+
+public:
+ Node *lhs(void) const { return at(0)->node(); }
+ void lhs(Node *node) { return at(0)->node(node); }
+
+ Node *rhs(void) const { return at(1)->node(); }
+ void rhs(Node *node) { return at(1)->node(node); }
+};
+
+/**
+ * @brief Elementwise Maximum of lhs and rhs
+ *
+ * o = (l > r) ? l : r (element-wise)
+ */
+class EltwiseMax final : public CanonicalNodeDef<CanonicalOpcode::EltwiseMax, FixedArity<2>::Mixin>
+{
+public:
+ EltwiseMax() = default;
+
+public:
+ Node *lhs(void) const { return at(0)->node(); }
+ void lhs(Node *node) { return at(0)->node(node); }
+
+ Node *rhs(void) const { return at(1)->node(); }
+ void rhs(Node *node) { return at(1)->node(node); }
+};
+
+/**
+ * @brief Elementwise Mul lhs and rhs
+ */
+class EltwiseMul final : public CanonicalNodeDef<CanonicalOpcode::EltwiseMul, FixedArity<2>::Mixin>
+{
+public:
+ EltwiseMul() = default;
+
+public:
+ Node *lhs(void) const { return at(0)->node(); }
+ void lhs(Node *node) { return at(0)->node(node); }
+
+ Node *rhs(void) const { return at(1)->node(); }
+ void rhs(Node *node) { return at(1)->node(node); }
+};
+
+/**
+ * @brief Elementwise Sub lhs and rhs
+ */
+class EltwiseSub final : public CanonicalNodeDef<CanonicalOpcode::EltwiseSub, FixedArity<2>::Mixin>
+{
+public:
+ EltwiseSub() = default;
+
+public:
+ Node *lhs(void) const { return at(0)->node(); }
+ void lhs(Node *node) { return at(0)->node(node); }
+
+ Node *rhs(void) const { return at(1)->node(); }
+ void rhs(Node *node) { return at(1)->node(node); }
+};
+
+/**
+ * @brief Elementwise Div lhs and rhs
+ */
+class EltwiseDiv final : public CanonicalNodeDef<CanonicalOpcode::EltwiseDiv, FixedArity<2>::Mixin>
+{
+public:
+ EltwiseDiv() = default;
+
+public:
+ Node *lhs(void) const { return at(0)->node(); }
+ void lhs(Node *node) { return at(0)->node(node); }
+
+ Node *rhs(void) const { return at(1)->node(); }
+ void rhs(Node *node) { return at(1)->node(node); }
+};
+
+/**
+ * @brief Elementwise Sqrt of input
+ */
+class EltwiseSqrt final
+ : public CanonicalNodeDef<CanonicalOpcode::EltwiseSqrt, FixedArity<1>::Mixin>
+{
+public:
+ EltwiseSqrt() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+};
+
+/**
+ * @brief Duplicate elements along specified axes
+ *
+ * TensorBroadcast takes a tensor and produces another tensor with the same rank but HIGHER
+ * dimensionality.
+ *
+ * To create such a tensor. TensorBroadcast duplicates the element along the specified axes.
+ *
+ * It is possible to control the degree of duplication with a partial map from TensorAxis to
+ * Dimension.
+ *
+ * TODO Explain the constraints (The dimension of inputs for specified axes SHOULD BE 1).
+ * TODO Explain the operation semantics
+ */
+class TensorBroadcast final
+ : public CanonicalNodeDef<CanonicalOpcode::TensorBroadcast, FixedArity<1>::Mixin>
+{
+public:
+ TensorBroadcast() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+
+public:
+ class Mapping final
+ {
+ public:
+ Mapping() = default;
+
+ public:
+ bool defined(const TensorAxis &axis) const;
+
+ const Dimension &dim(const TensorAxis &axis) const;
+ Dimension &dim(const TensorAxis &axis);
+
+ private:
+ std::map<TensorAxis, Dimension> _content;
+ };
+
+ Mapping *mapping(void) { return &_mapping; }
+ const Mapping *mapping(void) const { return &_mapping; }
+
+private:
+ Mapping _mapping;
+};
+
+/**
+ * @brief Create Matrix from Tensor
+ *
+ * MatrixEncode currently requires a rank-2 Tensor as its input.
+ */
+class MatrixEncode final
+ : public CanonicalNodeDef<CanonicalOpcode::MatrixEncode, FixedArity<1>::Mixin>
+{
+public:
+ MatrixEncode() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+
+public:
+ MatrixEncoder *encoder(void) const { return _enc.get(); }
+ void encoder(std::unique_ptr<MatrixEncoder> &&enc) { _enc = std::move(enc); }
+
+private:
+ /// @note "encoder" is mandatory
+ std::unique_ptr<MatrixEncoder> _enc{nullptr};
+};
+
+/**
+ * @brief Create Tensor from Matrix
+ *
+ * MatrixDecode currently requires a Matrix as its input.
+ */
+class MatrixDecode final
+ : public CanonicalNodeDef<CanonicalOpcode::MatrixDecode, FixedArity<1>::Mixin>
+{
+public:
+ MatrixDecode() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+
+public:
+ MatrixDecoder *decoder(void) const { return _dec.get(); }
+ void decoder(std::unique_ptr<MatrixDecoder> &&dec) { _dec = std::move(dec); }
+
+private:
+ /// @note "decoder" is mandatory
+ std::unique_ptr<MatrixDecoder> _dec{nullptr};
+};
+
+/**
+ * @brief Matrix Multiplication lhs and rhs
+ *
+ * LHS and RHS must be on Matrix domain
+ */
+class MatMul final : public CanonicalNodeDef<CanonicalOpcode::MatMul, FixedArity<2>::Mixin>
+{
+public:
+ MatMul() = default;
+
+public:
+ Node *lhs(void) const { return at(0)->node(); }
+ void lhs(Node *node) { return at(0)->node(node); }
+
+ Node *rhs(void) const { return at(1)->node(); }
+ void rhs(Node *node) { return at(1)->node(node); }
+};
+
+/**
+ * @brief Permute an input
+ *
+ * In the following case,
+ *
+ * output = loco::TensorTranspose(input)
+ *
+ * perm()->axis(output's axis) = input's axis
+ *
+ * Input and output belong to tensor domain.
+ */
+class TensorTranspose final
+ : public CanonicalNodeDef<CanonicalOpcode::TensorTranspose, FixedArity<1>::Mixin>
+{
+public:
+ TensorTranspose() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { return at(0)->node(node); }
+
+ class Perm final
+ {
+ public:
+ Perm() = default;
+
+ public:
+ uint32_t size() const { return _vals.size(); }
+ void size(uint32_t size) { _vals.resize(size); }
+
+ const TensorAxis &axis(TensorAxis n) const { return _vals[n]; }
+ TensorAxis &axis(TensorAxis n) { return _vals[n]; }
+
+ private:
+ std::vector<TensorAxis> _vals;
+ };
+
+ Perm *perm(void) { return &_perm; }
+ const Perm *perm(void) const { return &_perm; }
+
+private:
+ Perm _perm;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_NODES_H__
diff --git a/compiler/loco/include/loco/IR/Padding2D.h b/compiler/loco/include/loco/IR/Padding2D.h
new file mode 100644
index 000000000..30557a891
--- /dev/null
+++ b/compiler/loco/include/loco/IR/Padding2D.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_PADDING2D_H__
+#define __LOCO_IR_PADDING2D_H__
+
+#include <cstdint>
+
+namespace loco
+{
+
+class Padding2D final
+{
+public:
+ Padding2D() : _top{0}, _bottom{0}, _left{0}, _right{0}
+ {
+ // DO NOTHING
+ }
+
+public:
+ Padding2D(uint32_t top, uint32_t bottom, uint32_t left, uint32_t right)
+ : _top{top}, _bottom{bottom}, _left{left}, _right{right}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t top(void) const { return _top; }
+ void top(uint32_t value) { _top = value; }
+
+public:
+ uint32_t bottom(void) const { return _bottom; }
+ void bottom(uint32_t value) { _bottom = value; }
+
+public:
+ uint32_t left(void) const { return _left; }
+ void left(uint32_t value) { _left = value; }
+
+public:
+ uint32_t right(void) const { return _right; }
+ void right(uint32_t value) { _right = value; }
+
+private:
+ uint32_t _top;
+ uint32_t _bottom;
+ uint32_t _left;
+ uint32_t _right;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_PADDING2D_H__
diff --git a/compiler/loco/include/loco/IR/PaddingND.h b/compiler/loco/include/loco/IR/PaddingND.h
new file mode 100644
index 000000000..59be73943
--- /dev/null
+++ b/compiler/loco/include/loco/IR/PaddingND.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_PADDINGND_H__
+#define __LOCO_IR_PADDINGND_H__
+
+#include <cstdint>
+#include <vector>
+
+namespace loco
+{
+
+/**
+ * This class indicates how many pads to add before(front) and after(back) the contents of
+ * tensor in that dimension.
+ */
+class PaddingND final
+{
+
+public:
+ const uint32_t &front(uint32_t dim) const { return _front.at(dim); }
+ uint32_t &front(uint32_t dim) { return _front.at(dim); }
+
+public:
+ const uint32_t &back(uint32_t dim) const { return _back.at(dim); }
+ uint32_t &back(uint32_t dim) { return _back.at(dim); }
+
+public:
+ uint32_t rank(void) const { return _front.size(); }
+ void rank(uint32_t s)
+ {
+ _front.resize(s);
+ _back.resize(s);
+ }
+
+private:
+ std::vector<uint32_t> _front;
+ std::vector<uint32_t> _back;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_PADDINGND_H__
diff --git a/compiler/loco/include/loco/IR/PermutingCodec.h b/compiler/loco/include/loco/IR/PermutingCodec.h
new file mode 100644
index 000000000..60b05dcbb
--- /dev/null
+++ b/compiler/loco/include/loco/IR/PermutingCodec.h
@@ -0,0 +1,421 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_PERMUTING_CODEC_H__
+#define __LOCO_IR_PERMUTING_CODEC_H__
+
+#include "loco/IR/Domain.h"
+
+#include "loco/IR/FeatureAxis.h"
+#include "loco/IR/FeatureCodec.h"
+#include "loco/IR/FilterAxis.h"
+#include "loco/IR/FilterCodec.h"
+#include "loco/IR/DepthwiseFilterAxis.h"
+#include "loco/IR/DepthwiseFilterCodec.h"
+#include "loco/IR/MatrixAxis.h"
+#include "loco/IR/MatrixCodec.h"
+#include "loco/IR/TensorAxis.h"
+
+#include <map>
+
+namespace loco
+{
+
+template <Domain D> class Permutation;
+template <Domain D> class PermutingEncoder;
+template <Domain D> class PermutingDecoder;
+
+/**
+ * @brief Mapping between Feature/Tensor Axis
+ */
+template <> class Permutation<Domain::Feature>
+{
+public:
+ Permutation() = default;
+
+public:
+ /**
+ * @brief Return whether a tensor axis is specified for a given feature axis
+ *
+ * This method does not validate the corresponding value.
+ */
+ bool mapped(const FeatureAxis &axis_f) const;
+
+ /**
+ * @brief Get the tensor axis corresponding to a given feature axis
+ *
+ * This method works correclty only when feature axis is mapped before.
+ */
+ TensorAxis axis(const FeatureAxis &axis_f) const;
+
+ /**
+ * @brief Set the tensor axis corresponding to a given feature axis
+ */
+ TensorAxis &axis(const FeatureAxis &axis_f);
+
+ TensorAxis operator[](const FeatureAxis &axis_f) const { return axis(axis_f); }
+ TensorAxis &operator[](const FeatureAxis &axis_f) { return axis(axis_f); }
+
+private:
+ std::map<FeatureAxis, TensorAxis> _map;
+};
+
+template <> class PermutingEncoder<Domain::Feature> final : public FeatureEncoder
+{
+public:
+ PermutingEncoder() = default;
+
+public:
+ PermutingEncoder(const Permutation<Domain::Feature> &perm) : _perm{perm}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool valid(void) const;
+
+public:
+ FeatureShape shape(const TensorShape &tensor_shape) const override;
+ TensorIndex value(const FeatureIndex &index) const override;
+
+ std::unique_ptr<FeatureEncoder> clone(void) const override;
+
+public:
+ const Permutation<Domain::Feature> *perm(void) const { return &_perm; }
+ Permutation<Domain::Feature> *perm(void) { return &_perm; }
+ void perm(const Permutation<Domain::Feature> &p) { _perm = p; }
+
+private:
+ Permutation<Domain::Feature> _perm;
+};
+
+template <> class PermutingDecoder<Domain::Feature> final : public FeatureDecoder
+{
+public:
+ PermutingDecoder() = default;
+
+public:
+ PermutingDecoder(const Permutation<Domain::Feature> &perm) : _perm{perm}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool valid(void) const;
+
+public:
+ TensorShape shape(const FeatureShape &tensor_shape) const override;
+ FeatureIndex value(const TensorIndex &index) const override;
+
+ std::unique_ptr<FeatureDecoder> clone(void) const override;
+
+public:
+ const Permutation<Domain::Feature> *perm(void) const { return &_perm; }
+ Permutation<Domain::Feature> *perm(void) { return &_perm; }
+ void perm(const Permutation<Domain::Feature> &p) { _perm = p; }
+
+private:
+ Permutation<Domain::Feature> _perm;
+};
+
+/**
+ * @brief Mapping between Filter/Tensor Axis
+ */
+template <> class Permutation<Domain::Filter>
+{
+public:
+ Permutation() = default;
+
+public:
+ /**
+ * @brief Return whether a given filter axis has a corresponding tensor axis
+ *
+ * This method does not validate the corresponding value.
+ */
+ bool mapped(const FilterAxis &axis_f) const;
+
+ /**
+ * @brief Get the tensor axis corresponding to a given filter axis
+ *
+ * This method works correctly only for mapped filter axes.
+ */
+ const TensorAxis &axis(const FilterAxis &axis_f) const;
+
+ /**
+ * @brief Set the tensor axis corresponding to a given filter axis
+ */
+ TensorAxis &axis(const FilterAxis &axis_f);
+
+ TensorAxis operator[](const FilterAxis &axis_f) const { return axis(axis_f); }
+ TensorAxis &operator[](const FilterAxis &axis_f) { return axis(axis_f); }
+
+private:
+ std::map<FilterAxis, TensorAxis> _map;
+};
+
+/**
+ * @brief Permutation-based Tensor-to-Filter converter
+ */
+template <> class PermutingEncoder<Domain::Filter> final : public FilterEncoder
+{
+public:
+ PermutingEncoder() = default;
+
+public:
+ explicit PermutingEncoder(const Permutation<Domain::Filter> &perm) : _perm{perm}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool valid(void) const;
+
+public:
+ FilterShape shape(const TensorShape &tensor_shape) const override;
+ TensorIndex value(const FilterIndex &index) const override;
+
+public:
+ const Permutation<Domain::Filter> *perm(void) const { return &_perm; }
+ Permutation<Domain::Filter> *perm(void) { return &_perm; }
+ void perm(const Permutation<Domain::Filter> &p) { _perm = p; }
+
+private:
+ Permutation<Domain::Filter> _perm;
+};
+
+/**
+ * @brief Permutation-based Filter-to-Tensor converter
+ */
+template <> class PermutingDecoder<Domain::Filter> final : public FilterDecoder
+{
+public:
+ PermutingDecoder() = default;
+
+public:
+ explicit PermutingDecoder(const Permutation<Domain::Filter> &perm) : _perm{perm}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool valid(void) const;
+
+public:
+ TensorShape shape(const FilterShape &tensor_shape) const override;
+ FilterIndex value(const TensorIndex &index) const override;
+
+public:
+ const Permutation<Domain::Filter> *perm(void) const { return &_perm; }
+ Permutation<Domain::Filter> *perm(void) { return &_perm; }
+ void perm(const Permutation<Domain::Filter> &p) { _perm = p; }
+
+private:
+ Permutation<Domain::Filter> _perm;
+};
+
+/**
+ * @brief Mapping between DepthwiseFilter/Tensor Axis
+ */
+template <> class Permutation<Domain::DepthwiseFilter>
+{
+public:
+ Permutation() = default;
+
+public:
+ /**
+ * @brief Return whether a given depthwise filter axis has a corresponding tensor axis
+ *
+ * This method does not validate the corresponding value.
+ */
+ bool mapped(const DepthwiseFilterAxis &axis_f) const;
+
+ /**
+ * @brief Get the tensor axis corresponding to a given depthwise filter axis
+ *
+ * This method works correctly only for mapped depthwise filter axes.
+ */
+ const TensorAxis &axis(const DepthwiseFilterAxis &axis_f) const;
+
+ /**
+ * @brief Set the tensor axis corresponding to a given depthwise filter axis
+ */
+ TensorAxis &axis(const DepthwiseFilterAxis &axis_f);
+
+ TensorAxis operator[](const DepthwiseFilterAxis &axis_f) const { return axis(axis_f); }
+ TensorAxis &operator[](const DepthwiseFilterAxis &axis_f) { return axis(axis_f); }
+
+private:
+ std::map<DepthwiseFilterAxis, TensorAxis> _map;
+};
+
+/**
+ * @brief Permutation-based Tensor-to-DepthwiseFilter converter
+ */
+template <> class PermutingEncoder<Domain::DepthwiseFilter> final : public DepthwiseFilterEncoder
+{
+public:
+ PermutingEncoder() = default;
+
+public:
+ PermutingEncoder(const Permutation<Domain::DepthwiseFilter> &perm) : _perm{perm}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool valid(void) const;
+
+public:
+ DepthwiseFilterShape shape(const TensorShape &tensor_shape) const override;
+ TensorIndex value(const DepthwiseFilterIndex &index) const override;
+
+public:
+ const Permutation<Domain::DepthwiseFilter> *perm(void) const { return &_perm; }
+ Permutation<Domain::DepthwiseFilter> *perm(void) { return &_perm; }
+ void perm(const Permutation<Domain::DepthwiseFilter> &p) { _perm = p; }
+
+private:
+ Permutation<Domain::DepthwiseFilter> _perm;
+};
+
+/**
+ * @brief Permutation-based DepthwiseFilter-to-Tensor converter
+ */
+template <> class PermutingDecoder<Domain::DepthwiseFilter> final : public DepthwiseFilterDecoder
+{
+public:
+ PermutingDecoder() = default;
+
+public:
+ PermutingDecoder(const Permutation<Domain::DepthwiseFilter> &perm) : _perm{perm}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool valid(void) const;
+
+public:
+ TensorShape shape(const DepthwiseFilterShape &shape) const override;
+ DepthwiseFilterIndex value(const TensorIndex &index) const override;
+
+public:
+ const Permutation<Domain::DepthwiseFilter> *perm(void) const { return &_perm; }
+ Permutation<Domain::DepthwiseFilter> *perm(void) { return &_perm; }
+ void perm(const Permutation<Domain::DepthwiseFilter> &p) { _perm = p; }
+
+private:
+ Permutation<Domain::DepthwiseFilter> _perm;
+};
+
+/**
+ * @brief Mapping between Matrix/Tensor Axis
+ */
+template <> class Permutation<Domain::Matrix>
+{
+public:
+ Permutation() = default;
+
+public:
+ /**
+ * @brief Return whether a given matrix axis has a corresponding tensor axis
+ *
+ * This method does not validate the corresponding value.
+ */
+ bool mapped(const MatrixAxis &axis_f) const;
+
+ /**
+ * @brief Get the tensor axis corresponding to a given matrix axis
+ *
+ * This method works correctly only for mapped matrix axes.
+ */
+ TensorAxis axis(const MatrixAxis &axis_f) const;
+
+ /**
+ * @brief Set the tensor axis corresponding to a given matrix axis
+ */
+ TensorAxis &axis(const MatrixAxis &axis_f);
+
+ TensorAxis operator[](const MatrixAxis &axis_f) const { return axis(axis_f); }
+ TensorAxis &operator[](const MatrixAxis &axis_f) { return axis(axis_f); }
+
+private:
+ std::map<MatrixAxis, TensorAxis> _map;
+};
+
+/**
+ * @brief Permutation-based Tensor-to-Matrix converter
+ */
+template <> class PermutingEncoder<Domain::Matrix> final : public MatrixEncoder
+{
+public:
+ PermutingEncoder() = default;
+
+public:
+ PermutingEncoder(const Permutation<Domain::Matrix> &perm) : _perm{perm}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool valid(void) const;
+
+public:
+ MatrixShape shape(const TensorShape &tensor_shape) const override;
+ TensorIndex value(const MatrixIndex &index) const override;
+
+public:
+ const Permutation<Domain::Matrix> *perm(void) const { return &_perm; }
+ Permutation<Domain::Matrix> *perm(void) { return &_perm; }
+ void perm(const Permutation<Domain::Matrix> &p) { _perm = p; }
+
+private:
+ Permutation<Domain::Matrix> _perm;
+};
+
+/**
+ * @brief Permutation-based Matrix-to-Tensor converter
+ */
+template <> class PermutingDecoder<Domain::Matrix> final : public MatrixDecoder
+{
+public:
+ PermutingDecoder() = default;
+
+public:
+ PermutingDecoder(const Permutation<Domain::Matrix> &perm) : _perm{perm}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool valid(void) const;
+
+public:
+ TensorShape shape(const MatrixShape &tensor_shape) const override;
+ MatrixIndex value(const TensorIndex &index) const override;
+
+public:
+ const Permutation<Domain::Matrix> *perm(void) const { return &_perm; }
+ Permutation<Domain::Matrix> *perm(void) { return &_perm; }
+ void perm(const Permutation<Domain::Matrix> &p) { _perm = p; }
+
+private:
+ Permutation<Domain::Matrix> _perm;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_PERMUTING_CODEC_H__
diff --git a/compiler/loco/include/loco/IR/Stride.h b/compiler/loco/include/loco/IR/Stride.h
new file mode 100644
index 000000000..eb9d47115
--- /dev/null
+++ b/compiler/loco/include/loco/IR/Stride.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_STRIDE_H__
+#define __LOCO_IR_STRIDE_H__
+
+#include <cstdint>
+
+namespace loco
+{
+
+/**
+ * @brief Stride configuration for N-dimensional spatial operations
+ */
+template <unsigned N> class Stride;
+
+/**
+ * @brief Stride configuration for 2D spatial operations
+ */
+template <> class Stride<2> final
+{
+public:
+ uint32_t vertical(void) const { return _vertical; }
+ void vertical(uint32_t value) { _vertical = value; }
+
+public:
+ uint32_t horizontal(void) const { return _horizontal; }
+ void horizontal(uint32_t value) { _horizontal = value; }
+
+private:
+ uint32_t _vertical = 1;
+ uint32_t _horizontal = 1;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_STRIDE_H__
diff --git a/compiler/loco/include/loco/IR/TensorAxis.h b/compiler/loco/include/loco/IR/TensorAxis.h
new file mode 100644
index 000000000..c41da512e
--- /dev/null
+++ b/compiler/loco/include/loco/IR/TensorAxis.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_TENSOR_AXIS_H__
+#define __LOCO_IR_TENSOR_AXIS_H__
+
+#include <cstdint>
+
+namespace loco
+{
+
+using TensorAxis = uint32_t;
+
+} // namespace loco
+
+#endif // __LOCO_IR_TENSOR_AXIS_H__
diff --git a/compiler/loco/include/loco/IR/TensorAxisSet.h b/compiler/loco/include/loco/IR/TensorAxisSet.h
new file mode 100644
index 000000000..240dcc556
--- /dev/null
+++ b/compiler/loco/include/loco/IR/TensorAxisSet.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_TENSOR_AXIS_SET_H__
+#define __LOCO_IR_TENSOR_AXIS_SET_H__
+
+#include "loco/IR/TensorAxis.h"
+
+#include <set>
+
+namespace loco
+{
+
+class TensorAxisSet final
+{
+public:
+ TensorAxisSet() = default;
+
+public:
+ bool defined(const TensorAxis &axis) const { return _axes.find(axis) != _axes.end(); }
+ void insert(const TensorAxis &axis) { _axes.insert(axis); }
+
+private:
+ std::set<TensorAxis> _axes;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_TENSOR_AXIS_SET_H__
diff --git a/compiler/loco/include/loco/IR/TensorIndex.h b/compiler/loco/include/loco/IR/TensorIndex.h
new file mode 100644
index 000000000..8f2385104
--- /dev/null
+++ b/compiler/loco/include/loco/IR/TensorIndex.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_TENSOR_INDEX_H__
+#define __LOCO_IR_TENSOR_INDEX_H__
+
+#include <nncc/core/ADT/tensor/Index.h>
+
+namespace loco
+{
+
+// TODO Remove dependencies on angkor
+using TensorIndex = nncc::core::ADT::tensor::Index;
+
+} // namespace loco
+
+#endif // __LOCO_IR_TENSOR_INDEX_H__
diff --git a/compiler/loco/include/loco/IR/TensorShape.h b/compiler/loco/include/loco/IR/TensorShape.h
new file mode 100644
index 000000000..af1066d52
--- /dev/null
+++ b/compiler/loco/include/loco/IR/TensorShape.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_TENSOR_SHAPE_H__
+#define __LOCO_IR_TENSOR_SHAPE_H__
+
+#include "loco/IR/Dimension.h"
+
+#include <initializer_list>
+#include <vector>
+
+namespace loco
+{
+
+class TensorShape
+{
+public:
+ TensorShape() = default;
+ TensorShape(std::initializer_list<Dimension> dims) : _dims(dims.begin(), dims.end()) {}
+
+public:
+ uint32_t rank(void) const { return _dims.size(); }
+ void rank(uint32_t r) { _dims.resize(r); }
+
+ const Dimension &dim(uint32_t axis) const { return _dims.at(axis); }
+ Dimension &dim(uint32_t axis) { return _dims.at(axis); }
+
+private:
+ std::vector<Dimension> _dims;
+};
+
+/**
+ * @brief Return the number of elements in a tensor of given shape
+ *
+ * NOTE 1.
+ *
+ * "volume" returns 1 if the rank is 0.
+ *
+ * NOTE 2.
+ *
+ * "caller" SHOULD pass a valid shape that has no unknown dimension.
+ * - The behavior of "volume" on invalid is undefined.
+ *
+ */
+uint32_t element_count(const loco::TensorShape *tensor_shape);
+
+} // namespace loco
+
+#endif // __LOCO_IR_TENSOR_SHAPE_H__
diff --git a/compiler/loco/include/loco/IR/Use.h b/compiler/loco/include/loco/IR/Use.h
new file mode 100644
index 000000000..a4db924e4
--- /dev/null
+++ b/compiler/loco/include/loco/IR/Use.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_USE_H__
+#define __LOCO_IR_USE_H__
+
+#include "loco/IR/Node.forward.h"
+
+namespace loco
+{
+
+/**
+ * @brief The edge between a node definition and its user.
+ *
+ * Note that this "Use" denotes **one** edge between a node and its users,
+ * and thus there are unique node and user for each Use.
+ *
+ * There will be multiple "Use" edges for the same node if there are multiple
+ * users.
+ *
+ * This class design is heavily inspired from "Use" class in LLVM.
+ */
+class Use final
+{
+public:
+ /**
+ * @brief Construct Use with its user
+ * @note user SHOULD BE set on construction.
+ */
+ Use(Node *user) : _user{user}
+ {
+ // DO NOTHING
+ }
+
+ Use(const Use &) = delete;
+ Use(Use &&) = delete;
+
+ ~Use()
+ {
+ // Unlink itself from the node
+ node(nullptr);
+ }
+
+public:
+ Node *node(void) const { return _node; }
+ void node(Node *node);
+
+public:
+ Node *user(void) const { return _user; }
+
+private:
+ Node *_node{nullptr};
+ Node *_user{nullptr};
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_USE_H__
diff --git a/compiler/loco/include/loco/IR/Verifier.h b/compiler/loco/include/loco/IR/Verifier.h
new file mode 100644
index 000000000..8ff85e16f
--- /dev/null
+++ b/compiler/loco/include/loco/IR/Verifier.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_VERIFIER_H__
+#define __LOCO_IR_VERIFIER_H__
+
+#include "loco/IR/Graph.h"
+
+#include <memory>
+
+namespace loco
+{
+
+/**
+ * @brief Possible error categories
+ *
+ * This enum class enumerates all the possible validation failure reasons.
+ *
+ * WARN DO NOT serialize this code. The tag value is subject to change.
+ */
+enum class ErrorCategory
+{
+ MissingArgument,
+ /* TO BE ADDED */
+};
+
+/**
+ * @brief The details of each error
+ */
+template <ErrorCategory Code> class ErrorDetail;
+
+/**
+ * @brief The details of MissingArgument error
+ */
+template <> class ErrorDetail<ErrorCategory::MissingArgument>
+{
+public:
+ ErrorDetail(loco::Node *node, uint32_t index) : _node{node}, _index{index}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /// @brief The node with missing arguments
+ loco::Node *node(void) const { return _node; }
+ /// @brief The missing argument index
+ uint32_t index(void) const { return _index; }
+
+private:
+ loco::Node *_node;
+ uint32_t _index;
+};
+
+/**
+ * @brief Error listener interface
+ *
+ * DOo NOT inherit this interface. Use DefaultErrorListener instead.
+ */
+struct IErrorListener
+{
+ virtual ~IErrorListener() = default;
+
+ virtual void notify(const ErrorDetail<ErrorCategory::MissingArgument> &) = 0;
+};
+
+/**
+ * @brief Error listener (with default implementation)
+ */
+struct ErrorListener : public IErrorListener
+{
+ virtual ~ErrorListener() = default;
+
+ void notify(const ErrorDetail<ErrorCategory::MissingArgument> &) override { return; }
+};
+
+/**
+ * @brief Validate a loco graph
+ *
+ * "valid" returns true if a given graph has no error.
+ *
+ * NOTE Given a valid(non-null) listener, "valid" notifies error details to the listener.
+ */
+bool valid(Graph *g, std::unique_ptr<ErrorListener> &&l = nullptr);
+
+} // namespace loco
+
+#endif // __LOCO_IR_VERIFIER_H__
diff --git a/compiler/loco/include/loco/IR/Window.h b/compiler/loco/include/loco/IR/Window.h
new file mode 100644
index 000000000..604fea868
--- /dev/null
+++ b/compiler/loco/include/loco/IR/Window.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_WINDOW_H__
+#define __LOCO_IR_WINDOW_H__
+
+#include <cstdint>
+
+namespace loco
+{
+
+/**
+ * @brief ND Receptive Field Shape
+ *
+ * Window<N> describes the shape of N-dimensional receptive field.
+ */
+template <unsigned N> class Window;
+
+/**
+ * @brief 2D Receptive Field Shape
+ */
+template <> class Window<2> final
+{
+public:
+ uint32_t vertical(void) const { return _vertical; }
+ void vertical(uint32_t value) { _vertical = value; }
+
+public:
+ uint32_t horizontal(void) const { return _horizontal; }
+ void horizontal(uint32_t value) { _horizontal = value; }
+
+private:
+ uint32_t _vertical = 1;
+ uint32_t _horizontal = 1;
+};
+
+} // namespace loco
+
+#endif // __LOCO_IR_WINDOW_H__
diff --git a/compiler/loco/include/loco/Service/CanonicalShapeInferenceRule.h b/compiler/loco/include/loco/Service/CanonicalShapeInferenceRule.h
new file mode 100644
index 000000000..cd3bed405
--- /dev/null
+++ b/compiler/loco/include/loco/Service/CanonicalShapeInferenceRule.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_SERVICE_CANONICAL_SHAPE_INFERENCE_RULE_H__
+#define __LOCO_SERVICE_CANONICAL_SHAPE_INFERENCE_RULE_H__
+
+#include "loco/Service/ShapeInferenceRule.h"
+
+namespace loco
+{
+
+/**
+ * @brief Shape inference rule for canonical dialect
+ */
+struct CanonicalShapeInferenceRule final : public ShapeInferenceRule
+{
+ bool support(const API &ver) const final;
+ bool recognize(const Dialect *) const final;
+ bool infer(const Node *, NodeShape &) const final;
+ void infer(const Context *, const Node *, Sink *) const final;
+};
+
+} // namespace loco
+
+#endif // __LOCO_SERVICE_CANONICAL_SHAPE_INFERENCE_RULE_H__
diff --git a/compiler/loco/include/loco/Service/MultiDialectShapeInferenceRule.h b/compiler/loco/include/loco/Service/MultiDialectShapeInferenceRule.h
new file mode 100644
index 000000000..1a6c85b42
--- /dev/null
+++ b/compiler/loco/include/loco/Service/MultiDialectShapeInferenceRule.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_SERVICE_MULTI_DIALECT_SHAPE_INFERENCE_RULE_H__
+#define __LOCO_SERVICE_MULTI_DIALECT_SHAPE_INFERENCE_RULE_H__
+
+#include "loco/Service/ShapeInferenceRule.h"
+
+#include <map>
+
+namespace loco
+{
+
+/**
+ * @brief Shape inference rule for multiple dialects
+ */
+class MultiDialectShapeInferenceRule final : public ShapeInferenceRule
+{
+public:
+ bool recognize(const Dialect *) const final;
+ bool infer(const Node *, NodeShape &) const final;
+
+ /// @brief Bind a specific rule to a Dialect
+ MultiDialectShapeInferenceRule &bind(const Dialect *d, const ShapeInferenceRule *rule);
+
+private:
+ std::map<const Dialect *, const ShapeInferenceRule *> _rules;
+};
+
+} // namespace loco
+
+#endif // __LOCO_SERVICE_MULTI_DIALECT_SHAPE_INFERENCE_RULE_H__
diff --git a/compiler/loco/include/loco/Service/ShapeInference.h b/compiler/loco/include/loco/Service/ShapeInference.h
new file mode 100644
index 000000000..f7bc5d4d6
--- /dev/null
+++ b/compiler/loco/include/loco/Service/ShapeInference.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_SERVICE_SHAPE_INFERENCE_H__
+#define __LOCO_SERVICE_SHAPE_INFERENCE_H__
+
+#include "loco/Service/ShapeInferenceRule.h"
+#include "loco/IR/Graph.h"
+
+/**
+ * @file This file implements dialect-agnostic shape inference framework
+ *
+ * HOW TO USE:
+ *
+ * loco::Graph *g = ...;
+ * loco::ShapeInferenceRule *rule = ...;
+ * loco::apply(rule).to(g);
+ *
+ */
+namespace loco
+{
+
+class ShapeInferenceSession
+{
+public:
+ ShapeInferenceSession(const ShapeInferenceRule *rule) : _rule{rule}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool to(Graph *g) const;
+
+private:
+ const ShapeInferenceRule *_rule;
+};
+
+inline ShapeInferenceSession apply(ShapeInferenceRule *r) { return ShapeInferenceSession{r}; }
+
+struct ShapeInference
+{
+ static bool known(const Node *);
+ static NodeShape get(const Node *);
+ static void erase(Node *);
+};
+
+inline bool shape_known(const Node *node) { return ShapeInference::known(node); }
+inline NodeShape shape_get(const Node *node) { return ShapeInference::get(node); }
+inline void shape_erase(Node *node) { ShapeInference::erase(node); }
+
+} // namespace loco
+
+#endif // __LOCO_SERVICE_SHAPE_INFERENCE_H__
diff --git a/compiler/loco/include/loco/Service/ShapeInferenceRule.h b/compiler/loco/include/loco/Service/ShapeInferenceRule.h
new file mode 100644
index 000000000..889f0b6b2
--- /dev/null
+++ b/compiler/loco/include/loco/Service/ShapeInferenceRule.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_SERVICE_SHAPE_INFERENCE_RULE_H__
+#define __LOCO_SERVICE_SHAPE_INFERENCE_RULE_H__
+
+#include "loco/IR/Domain.h"
+#include "loco/IR/Dialect.h"
+#include "loco/IR/Node.h"
+#include "loco/IR/NodeShape.h"
+
+namespace loco
+{
+
+struct ShapeInferenceRule
+{
+ virtual ~ShapeInferenceRule() = default;
+
+ enum class API
+ {
+ /**
+ * API v1
+ *
+ * This API uses "shape_get" method to query the shape of other nodes.
+ */
+ V1,
+
+ /**
+ * API v2
+ *
+ * This API uses a given context (defined below) to query the shape of other nodes.
+ */
+ V2,
+ };
+
+ /// @brief Check whether a given API is available or not
+ virtual bool support(const API &api) const
+ {
+ // To be backward compatible
+ return api == API::V1;
+ }
+
+ /// @brief Return true if this rule recognizes a given dialect
+ virtual bool recognize(const Dialect *) const = 0;
+
+ /**
+ * @brief Infer node's shape
+ *
+ * WARNING!!
+ *
+ * Implementation SHOULD return true only when it succeeds in inference!
+ *
+ */
+ virtual bool infer(const Node *, NodeShape &) const = 0;
+
+ //
+ // API v2
+ //
+ struct Context
+ {
+ virtual ~Context() = default;
+
+ virtual bool known(const Node *node) const = 0;
+ virtual NodeShape get(const Node *node) const = 0;
+ };
+
+ struct Sink
+ {
+ virtual ~Sink() = default;
+
+ // TODO Add methods for error reporting
+
+ // Each ShapeInferenceRule SHOULD invoke one of okay and fail before it returns
+ virtual void okay(const NodeShape &) = 0;
+ virtual void fail(void) = 0;
+ };
+
+ // WARNING! Invoke this method only when API v2 is supported
+ virtual void infer(const Context *, const Node *, Sink *) const;
+};
+
+} // namespace loco
+
+#endif // __LOCO_SERVICE_SHAPE_INFERENCE_RULE_H__
diff --git a/compiler/loco/include/loco/Service/TypeInference.h b/compiler/loco/include/loco/Service/TypeInference.h
new file mode 100644
index 000000000..c2ce1a4c7
--- /dev/null
+++ b/compiler/loco/include/loco/Service/TypeInference.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_SERVICE_TYPE_INFERENCE_H__
+#define __LOCO_SERVICE_TYPE_INFERENCE_H__
+
+#include "loco/IR/DataType.h"
+
+#include "loco/IR/Node.h"
+#include "loco/IR/Dialect.h"
+#include "loco/IR/Graph.h"
+
+#include <map>
+
+/**
+ * @file This file implements dialect-agnostic type inference framework.
+ *
+ * HOW TO USE:
+ *
+ * loco::Graph *g = ...;
+ * loco::TypeInferenceRule *rule = ...;
+ * loco::apply(rule).to(g);
+ *
+ */
+namespace loco
+{
+
+struct TypeInferenceRule
+{
+ virtual ~TypeInferenceRule() = default;
+
+ /// @brief Return true if this rule recognizes a given dialect
+ virtual bool recognize(const Dialect *) const = 0;
+
+ /**
+ * Framework guarantees the followings:
+ *
+ * 1. Framework tries to infer the data type of each node only after the data type of all of
+ * its valid (= non-nullptr) argument nodes is inferred.
+ * 2. The result of preceding "infer" is accessible through below dtype_get method.
+ * - This holds only when preceding "infer" returns true.
+ */
+ virtual bool infer(const Node *, DataType &) const = 0;
+};
+
+/**
+ * @brief Type Inference Rule for Canonical Dialect
+ */
+struct CanonicalTypeInferenceRule final : public TypeInferenceRule
+{
+ bool recognize(const Dialect *) const final;
+ bool infer(const Node *, DataType &) const final;
+};
+
+/**
+ * @brief Type Inference Rule for multiple dialects
+ */
+class MultiDialectTypeInferenceRule final : public TypeInferenceRule
+{
+public:
+ bool recognize(const Dialect *) const final;
+ bool infer(const Node *, DataType &) const final;
+
+ /// @brief Bind a specific rule to a Dialect
+ MultiDialectTypeInferenceRule &bind(const Dialect *d, const TypeInferenceRule *rule);
+
+private:
+ std::map<const Dialect *, const TypeInferenceRule *> _rules;
+};
+
+class TypeInferenceSession
+{
+public:
+ TypeInferenceSession(const TypeInferenceRule *rule) : _rule{rule}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool to(Graph *g) const;
+
+private:
+ const TypeInferenceRule *_rule;
+};
+
+inline TypeInferenceSession apply(TypeInferenceRule *r) { return TypeInferenceSession{r}; }
+
+struct TypeInference
+{
+ static bool known(const Node *);
+ static DataType get(const Node *);
+ static void erase(Node *);
+};
+
+inline bool dtype_known(const Node *node) { return TypeInference::known(node); }
+inline DataType dtype_get(const Node *node) { return TypeInference::get(node); }
+inline void dtype_erase(Node *node) { TypeInference::erase(node); }
+
+} // namespace loco
+
+#endif // __LOCO_SERVICE_TYPE_INFERENCE_H__
diff --git a/compiler/loco/src/ADT/AnnotatedItem.test.cpp b/compiler/loco/src/ADT/AnnotatedItem.test.cpp
new file mode 100644
index 000000000..42113ff7b
--- /dev/null
+++ b/compiler/loco/src/ADT/AnnotatedItem.test.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/ADT/AnnotatedItem.h"
+
+#include <gtest/gtest.h>
+#include <stdex/Memory.h>
+
+namespace
+{
+
+struct Annotation
+{
+ virtual ~Annotation() = default;
+};
+
+template <int N> struct DerivedAnnotation final : public Annotation
+{
+ static std::unique_ptr<DerivedAnnotation<N>> make(void)
+ {
+ return stdex::make_unique<DerivedAnnotation<N>>();
+ }
+};
+
+} // namespace
+
+TEST(AnnotatedItemTest, annotation)
+{
+ loco::AnnotatedItem<::Annotation> item;
+
+ ASSERT_EQ(item.annot<DerivedAnnotation<0>>(), nullptr);
+
+ item.annot(DerivedAnnotation<0>::make());
+
+ ASSERT_NE(item.annot<DerivedAnnotation<0>>(), nullptr);
+ ASSERT_EQ(item.annot<DerivedAnnotation<1>>(), nullptr);
+
+ item.annot<DerivedAnnotation<0>>(nullptr);
+ ASSERT_EQ(item.annot<DerivedAnnotation<0>>(), nullptr);
+
+ // Below check guarantees that "annot<T>(nullptr)" is allowed even when there is no annotation.
+ // This guarantee allows us to simplify code for some cases.
+ //
+ // Let us consider the following example:
+ //
+ // void f(loco::AnnotatedItem<T> *item)
+ // {
+ // /* DO SOMETHING */
+ // if (cond) { item->annot<T>(nullptr);
+ // }
+ //
+ // void g(loco::AnnotatedItem<T> *item)
+ // {
+ // f(item);
+ // item->annot<T>(nullptr);
+ // }
+ //
+ // The implementation of "g" gets complicated if annot<T>(nullptr) is not allowed if there is
+ // no annotation.
+ //
+ item.annot<DerivedAnnotation<0>>(nullptr);
+}
diff --git a/compiler/loco/src/ADT/ObjectPool.cpp b/compiler/loco/src/ADT/ObjectPool.cpp
new file mode 100644
index 000000000..d15a30a99
--- /dev/null
+++ b/compiler/loco/src/ADT/ObjectPool.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/ADT/ObjectPool.h"
+
+// This file validates "ObjectPool.h". Pleaes DO NOT remove this file.
diff --git a/compiler/loco/src/IR/Algorithm.cpp b/compiler/loco/src/IR/Algorithm.cpp
new file mode 100644
index 000000000..712e29975
--- /dev/null
+++ b/compiler/loco/src/IR/Algorithm.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Algorithm.h"
+
+#include <cassert>
+#include <set>
+#include <stack>
+
+namespace
+{
+
+class Frame final
+{
+public:
+ Frame(loco::Node *ptr) : _ptr{ptr}, _pos{-1}
+ {
+ // DO NOTHING
+ }
+
+public:
+ loco::Node *ptr(void) const { return _ptr; }
+ int64_t pos(void) const { return _pos; }
+
+ loco::Node &node(void) const { return *_ptr; }
+
+ void advance(void) { _pos += 1; }
+
+private:
+ loco::Node *_ptr = nullptr;
+ int64_t _pos = -1;
+};
+
+} // namespace
+
+namespace loco
+{
+
+// TODO Support cyclic graphs
+std::vector<loco::Node *> postorder_traversal(const std::vector<loco::Node *> &roots)
+{
+ std::vector<loco::Node *> res;
+
+ std::set<loco::Node *> visited_nodes;
+ std::stack<Frame> frames;
+
+ auto visited = [&visited_nodes](loco::Node *node) {
+ return visited_nodes.find(node) != visited_nodes.end();
+ };
+
+ // NOTE There is not much difference between "auto" and "auto &" as node is of "loco::Node *"
+ // type.
+ for (auto node : roots)
+ {
+ assert((node != nullptr) && "root is invalid");
+ frames.push(Frame{node});
+ }
+
+ while (!frames.empty())
+ {
+ auto &top_frame = frames.top();
+
+ if (top_frame.pos() == -1)
+ {
+ if (visited(top_frame.ptr()))
+ {
+ frames.pop();
+ continue;
+ }
+ visited_nodes.insert(top_frame.ptr());
+ }
+
+ top_frame.advance();
+
+ assert(top_frame.pos() >= 0);
+
+ if (top_frame.pos() < static_cast<int64_t>(top_frame.node().arity()))
+ {
+ // Let's visit the next argument
+ //
+ // NOTE "next" may be nullptr if a graph is under construction.
+ if (auto next = top_frame.node().arg(top_frame.pos()))
+ {
+ frames.push(Frame{next});
+ }
+ }
+ else
+ {
+ // Let's visit the current argument (all the arguments are already visited)
+ auto curr = top_frame.ptr();
+ res.emplace_back(curr);
+ frames.pop();
+ }
+ }
+
+ return res;
+}
+
+std::set<loco::Node *> active_nodes(const std::vector<loco::Node *> &roots)
+{
+ // This implementation works but may be inefficient
+ //
+ // TODO Use efficient implementation if necessary
+ auto nodes = postorder_traversal(roots);
+ return std::set<loco::Node *>{nodes.begin(), nodes.end()};
+}
+
+} // namespace loco
diff --git a/compiler/loco/src/IR/Algorithm.test.cpp b/compiler/loco/src/IR/Algorithm.test.cpp
new file mode 100644
index 000000000..f0a3585c0
--- /dev/null
+++ b/compiler/loco/src/IR/Algorithm.test.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Algorithm.h"
+#include "loco/IR/Graph.h"
+
+#include <algorithm>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+bool contains(const std::vector<loco::Node *> &vec, loco::Node *val)
+{
+ return std::any_of(vec.begin(), vec.end(), [val](loco::Node *node) { return node == val; });
+}
+
+bool contains(const std::set<loco::Node *> &s, loco::Node *val)
+{
+ return std::any_of(s.begin(), s.end(), [val](loco::Node *node) { return node == val; });
+}
+
+} // namespace
+
+TEST(AlgorithmTest, postorder_traversal)
+{
+ auto g = loco::make_graph();
+
+ auto pull_1 = g->nodes()->create<loco::Pull>();
+ auto push = g->nodes()->create<loco::Push>();
+
+ push->from(pull_1);
+
+ // Create a dummy node unreachable from the above "push" node
+ g->nodes()->create<loco::Pull>();
+
+ auto seq = loco::postorder_traversal({push});
+
+ ASSERT_EQ(seq.size(), 2);
+ ASSERT_EQ(seq.at(0), pull_1);
+ ASSERT_EQ(seq.at(1), push);
+}
+
+TEST(AlgorithmTest, postorder_traversal_visit_once)
+{
+ auto g = loco::make_graph();
+
+ // Create a network of the following form:
+ //
+ // Push1 Push2 <-- outputs
+ // \ /
+ // Pull <-- input
+ //
+ auto pull = g->nodes()->create<loco::Pull>();
+ auto push_1 = g->nodes()->create<loco::Push>();
+ auto push_2 = g->nodes()->create<loco::Push>();
+
+ push_1->from(pull);
+ push_2->from(pull);
+
+ auto seq = loco::postorder_traversal({push_1, push_2});
+
+ ASSERT_EQ(seq.size(), 3);
+ ASSERT_TRUE(contains(seq, pull));
+ ASSERT_TRUE(contains(seq, push_1));
+ ASSERT_TRUE(contains(seq, push_2));
+}
+
+TEST(AlgorithmTest, postorder_traversal_incomplte_graph)
+{
+ auto g = loco::make_graph();
+
+ // Create a network of the following form:
+ //
+ // TensorConcat
+ // / \
+ // Pull X
+ //
+ auto pull = g->nodes()->create<loco::Pull>();
+ auto concat = g->nodes()->create<loco::TensorConcat>();
+
+ concat->lhs(pull);
+
+ auto seq = loco::postorder_traversal({concat});
+
+ ASSERT_EQ(seq.size(), 2);
+ ASSERT_EQ(seq.at(0), pull);
+ ASSERT_EQ(seq.at(1), concat);
+}
+
+TEST(AlgorithmTest, active_nodes)
+{
+ auto g = loco::make_graph();
+
+ auto pull = g->nodes()->create<loco::Pull>();
+ auto push = g->nodes()->create<loco::Push>();
+
+ push->from(pull);
+
+ // NOTE This new Push node is unnecessary to compute "push"
+ g->nodes()->create<loco::Push>();
+
+ auto s = loco::active_nodes({push});
+
+ ASSERT_EQ(s.size(), 2);
+ ASSERT_TRUE(contains(s, pull));
+ ASSERT_TRUE(contains(s, push));
+}
diff --git a/compiler/loco/src/IR/BiasShape.test.cpp b/compiler/loco/src/IR/BiasShape.test.cpp
new file mode 100644
index 000000000..7f9b8dfed
--- /dev/null
+++ b/compiler/loco/src/IR/BiasShape.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/BiasShape.h"
+
+#include <gtest/gtest.h>
+
+TEST(BiasShapeTest, default_constructor)
+{
+ loco::BiasShape shape;
+
+ ASSERT_FALSE(shape.length().known());
+}
diff --git a/compiler/loco/src/IR/CanonicalDialect.cpp b/compiler/loco/src/IR/CanonicalDialect.cpp
new file mode 100644
index 000000000..ea956b80e
--- /dev/null
+++ b/compiler/loco/src/IR/CanonicalDialect.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/CanonicalDialect.h"
+#include "loco/IR/Graph.h"
+#include "loco/IR/Nodes.h"
+
+#include <stdex/Memory.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+struct GraphOutputIndexQueryServiceImpl final : public loco::GraphOutputIndexQueryService
+{
+ bool associated(const loco::Node *node) const final
+ {
+ if (auto push = dynamic_cast<const loco::Push *>(node))
+ {
+ return push->indexed();
+ }
+ return false;
+ }
+
+ loco::GraphOutputIndex index(const loco::Node *node) const final
+ {
+ assert(associated(node));
+ if (auto push = dynamic_cast<const loco::Push *>(node))
+ {
+ return push->index();
+ }
+ throw std::invalid_argument("node");
+ }
+};
+
+} // namespace
+
+namespace loco
+{
+
+CanonicalDialect::CanonicalDialect()
+{
+ service<GraphOutputIndexQueryService>(stdex::make_unique<GraphOutputIndexQueryServiceImpl>());
+}
+
+Dialect *CanonicalDialect::get(void)
+{
+ static CanonicalDialect d;
+ return &d;
+}
+
+} // namespace loco
diff --git a/compiler/loco/src/IR/CanonicalDialect.test.cpp b/compiler/loco/src/IR/CanonicalDialect.test.cpp
new file mode 100644
index 000000000..96b48218d
--- /dev/null
+++ b/compiler/loco/src/IR/CanonicalDialect.test.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/CanonicalDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CanonicalDialectTest, get)
+{
+ auto d = loco::CanonicalDialect::get();
+
+ // get() SHOULD return a valid(non-null) pointer
+ ASSERT_NE(d, nullptr);
+ // The return value SHOULD be stable across multiple invocations
+ ASSERT_EQ(d, loco::CanonicalDialect::get());
+}
diff --git a/compiler/loco/src/IR/CanonicalNode.cpp b/compiler/loco/src/IR/CanonicalNode.cpp
new file mode 100644
index 000000000..d5e13a415
--- /dev/null
+++ b/compiler/loco/src/IR/CanonicalNode.cpp
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/CanonicalNode.h"
+#include "loco/IR/CanonicalDialect.h"
+
+namespace loco
+{
+
+const Dialect *CanonicalNode::dialect(void) const { return CanonicalDialect::get(); }
+
+} // namespace loco
diff --git a/compiler/loco/src/IR/CanonicalNode.test.cpp b/compiler/loco/src/IR/CanonicalNode.test.cpp
new file mode 100644
index 000000000..cb61b5e83
--- /dev/null
+++ b/compiler/loco/src/IR/CanonicalNode.test.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/CanonicalNode.h"
+
+#include <gtest/gtest.h>
+
+TEST(CanonicalNodeTest, visitor_with_user_default_impl)
+{
+ struct MyVisitor final : public loco::CanonicalNodeVisitor<uint32_t>
+ {
+ // This visitor returns 128 if it visits a Forward node.
+ uint32_t visit(const loco::Forward *) final { return 128; }
+
+ // Otherwise, this visitor returns 256.
+ uint32_t visit(const loco::Node *) final { return 256; }
+ };
+
+ loco::Forward forward;
+ loco::ConstGen constgen;
+
+ MyVisitor v;
+
+ ASSERT_EQ(forward.accept(&v), 128);
+ ASSERT_EQ(constgen.accept(&v), 256);
+}
+
+TEST(CanonicalNodeTest, visitor)
+{
+ struct CountingVisitor final : public loco::CanonicalNodeVisitor<uint32_t>
+ {
+ uint32_t visit(const loco::Forward *) final { return 1; }
+ };
+
+ // Visitor can visit constant nodes
+ const loco::Forward node;
+
+ CountingVisitor v;
+
+ ASSERT_EQ(node.accept(&v), 1);
+}
+
+TEST(CanonicalNodeTest, mutable_visitor)
+{
+ struct ResetForward final : public loco::CanonicalNodeMutableVisitor<void>
+ {
+ void visit(loco::Forward *node) final { node->input(nullptr); }
+ };
+
+ loco::Pull pull_node;
+ loco::Forward forward_node;
+
+ forward_node.input(&pull_node);
+
+ ResetForward v;
+ forward_node.accept(&v);
+
+ ASSERT_EQ(forward_node.input(), nullptr);
+}
diff --git a/compiler/loco/src/IR/CanonicalOpcode.cpp b/compiler/loco/src/IR/CanonicalOpcode.cpp
new file mode 100644
index 000000000..6355ecf1f
--- /dev/null
+++ b/compiler/loco/src/IR/CanonicalOpcode.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/CanonicalOpcode.h"
+
+// NOTE This file validates "CanonicalOpcode.h". Please DO NOT remove this file.
diff --git a/compiler/loco/src/IR/DataType.cpp b/compiler/loco/src/IR/DataType.cpp
new file mode 100644
index 000000000..56794dac7
--- /dev/null
+++ b/compiler/loco/src/IR/DataType.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/DataType.h"
+
+// This file validates "DataType.h". Please DO NOT remove this file.
diff --git a/compiler/loco/src/IR/DataTypeTraits.test.cpp b/compiler/loco/src/IR/DataTypeTraits.test.cpp
new file mode 100644
index 000000000..76d2515a9
--- /dev/null
+++ b/compiler/loco/src/IR/DataTypeTraits.test.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/DataTypeTraits.h"
+
+#include <typeindex>
+
+#include <gtest/gtest.h>
+
+TEST(DataTypeTraitsTest, FLOAT32)
+{
+ auto obtained = std::type_index(typeid(loco::DataTypeImpl<loco::DataType::FLOAT32>::Type));
+ auto expected = std::type_index(typeid(float));
+
+ ASSERT_EQ(obtained, expected);
+}
diff --git a/compiler/loco/src/IR/DepthwiseFilterAxis.cpp b/compiler/loco/src/IR/DepthwiseFilterAxis.cpp
new file mode 100644
index 000000000..9d58795b2
--- /dev/null
+++ b/compiler/loco/src/IR/DepthwiseFilterAxis.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/DepthwiseFilterAxis.h"
+
+// NOTE This file validates "DepthwiseFilterAxis.h". Please DO NOT remove this file.
diff --git a/compiler/loco/src/IR/DepthwiseFilterCodec.cpp b/compiler/loco/src/IR/DepthwiseFilterCodec.cpp
new file mode 100644
index 000000000..05a7fd723
--- /dev/null
+++ b/compiler/loco/src/IR/DepthwiseFilterCodec.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/DepthwiseFilterCodec.h"
+
+// NOTE This file validates "DepthwiseFilterCodec.h". Please DO NOT remove this file.
diff --git a/compiler/loco/src/IR/DepthwiseFilterIndex.test.cpp b/compiler/loco/src/IR/DepthwiseFilterIndex.test.cpp
new file mode 100644
index 000000000..202647cfc
--- /dev/null
+++ b/compiler/loco/src/IR/DepthwiseFilterIndex.test.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/DepthwiseFilterIndex.h"
+
+#include <gtest/gtest.h>
+
+TEST(DepthwiseFilterIndexTest, default_constructor)
+{
+ loco::DepthwiseFilterIndex index;
+
+ // All the values are 0 at the beginning
+ ASSERT_EQ(index.channel(), 0);
+ ASSERT_EQ(index.nth(), 0);
+ ASSERT_EQ(index.row(), 0);
+ ASSERT_EQ(index.column(), 0);
+}
+
+TEST(DepthwiseFilterIndexTest, settet_and_getter)
+{
+ loco::DepthwiseFilterIndex index;
+
+ // Set depth
+ index.channel() = 2;
+
+ ASSERT_EQ(index.channel(), 2);
+ ASSERT_EQ(index.nth(), 0);
+ ASSERT_EQ(index.row(), 0);
+ ASSERT_EQ(index.column(), 0);
+
+ // Set multiplier
+ index.nth() = 3;
+
+ ASSERT_EQ(index.channel(), 2);
+ ASSERT_EQ(index.nth(), 3);
+ ASSERT_EQ(index.row(), 0);
+ ASSERT_EQ(index.column(), 0);
+
+ // Set height
+ index.row() = 4;
+
+ ASSERT_EQ(index.channel(), 2);
+ ASSERT_EQ(index.nth(), 3);
+ ASSERT_EQ(index.row(), 4);
+ ASSERT_EQ(index.column(), 0);
+
+ // Set width
+ index.column() = 5;
+
+ ASSERT_EQ(index.channel(), 2);
+ ASSERT_EQ(index.nth(), 3);
+ ASSERT_EQ(index.row(), 4);
+ ASSERT_EQ(index.column(), 5);
+}
diff --git a/compiler/loco/src/IR/DepthwiseFilterShape.test.cpp b/compiler/loco/src/IR/DepthwiseFilterShape.test.cpp
new file mode 100644
index 000000000..2b9518c1f
--- /dev/null
+++ b/compiler/loco/src/IR/DepthwiseFilterShape.test.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/DepthwiseFilterShape.h"
+
+#include <gtest/gtest.h>
+
+TEST(DepthwiseFilterShapeTest, default_constructor)
+{
+ loco::DepthwiseFilterShape shape;
+
+ ASSERT_FALSE(shape.depth().known());
+ ASSERT_FALSE(shape.multiplier().known());
+ ASSERT_FALSE(shape.height().known());
+ ASSERT_FALSE(shape.width().known());
+}
+
+TEST(DepthwiseFilterShapeTest, settet_and_getter)
+{
+ loco::DepthwiseFilterShape shape;
+
+ // Set depth
+ shape.depth() = 2;
+
+ ASSERT_TRUE(shape.depth().known());
+ ASSERT_FALSE(shape.multiplier().known());
+ ASSERT_FALSE(shape.height().known());
+ ASSERT_FALSE(shape.width().known());
+
+ ASSERT_EQ(shape.depth(), 2);
+
+ // Set multiplier
+ shape.multiplier() = 3;
+
+ ASSERT_TRUE(shape.depth().known());
+ ASSERT_TRUE(shape.multiplier().known());
+ ASSERT_FALSE(shape.height().known());
+ ASSERT_FALSE(shape.width().known());
+
+ ASSERT_EQ(shape.depth(), 2);
+ ASSERT_EQ(shape.multiplier(), 3);
+
+ // Set height
+ shape.height() = 4;
+
+ ASSERT_TRUE(shape.depth().known());
+ ASSERT_TRUE(shape.multiplier().known());
+ ASSERT_TRUE(shape.height().known());
+ ASSERT_FALSE(shape.width().known());
+
+ ASSERT_EQ(shape.depth(), 2);
+ ASSERT_EQ(shape.multiplier(), 3);
+ ASSERT_EQ(shape.height(), 4);
+
+ // Set width
+ shape.width() = 5;
+
+ ASSERT_TRUE(shape.depth().known());
+ ASSERT_TRUE(shape.multiplier().known());
+ ASSERT_TRUE(shape.height().known());
+ ASSERT_TRUE(shape.width().known());
+
+ ASSERT_EQ(shape.depth(), 2);
+ ASSERT_EQ(shape.multiplier(), 3);
+ ASSERT_EQ(shape.height(), 4);
+ ASSERT_EQ(shape.width(), 5);
+}
diff --git a/compiler/loco/src/IR/Dialect.cpp b/compiler/loco/src/IR/Dialect.cpp
new file mode 100644
index 000000000..a381b47eb
--- /dev/null
+++ b/compiler/loco/src/IR/Dialect.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Dialect.h"
+
+// NOTE This file validates "Dialect.h". Please DO NOT remove this file.
diff --git a/compiler/loco/src/IR/Dialect.test.cpp b/compiler/loco/src/IR/Dialect.test.cpp
new file mode 100644
index 000000000..312bb52ef
--- /dev/null
+++ b/compiler/loco/src/IR/Dialect.test.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Dialect.h"
+
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+TEST(DialectTest, service)
+{
+ struct S0 final : public loco::DialectService
+ {
+ };
+ struct S1 final : public loco::DialectService
+ {
+ };
+
+ struct MockDialect final : public loco::Dialect
+ {
+ MockDialect() { service<S1>(stdex::make_unique<S1>()); }
+ };
+
+ MockDialect dialect;
+
+ ASSERT_EQ(dialect.service<S0>(), nullptr);
+ ASSERT_NE(dialect.service<S1>(), nullptr);
+}
diff --git a/compiler/loco/src/IR/DialectService.cpp b/compiler/loco/src/IR/DialectService.cpp
new file mode 100644
index 000000000..fb8041e47
--- /dev/null
+++ b/compiler/loco/src/IR/DialectService.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/DialectService.h"
+
+// NOTE This file validates "DialectService.h". Please DO NOT remove this file.
diff --git a/compiler/loco/src/IR/Dimension.cpp b/compiler/loco/src/IR/Dimension.cpp
new file mode 100644
index 000000000..0d11c83e8
--- /dev/null
+++ b/compiler/loco/src/IR/Dimension.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Dimension.h"
+
+namespace loco
+{
+
+bool operator==(const Dimension &lhs, const Dimension &rhs)
+{
+ return lhs.known() && rhs.known() && lhs.value() == rhs.value();
+}
+
+bool operator==(const Dimension &lhs, uint32_t rhs) { return lhs.known() && lhs.value() == rhs; }
+bool operator==(uint32_t lhs, const Dimension &rhs) { return rhs.known() && lhs == rhs.value(); }
+
+Dimension make_dimension(void) { return Dimension{}; }
+
+} // namespace loco
diff --git a/compiler/loco/src/IR/Dimension.test.cpp b/compiler/loco/src/IR/Dimension.test.cpp
new file mode 100644
index 000000000..4faf78ac8
--- /dev/null
+++ b/compiler/loco/src/IR/Dimension.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Dimension.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+struct DimensionTest : public ::testing::Test
+{
+protected:
+ uint32_t value(void) const { return _value; }
+
+private:
+ uint32_t const _value{3};
+};
+
+} // namespace
+
+TEST_F(DimensionTest, default_constructor)
+{
+ loco::Dimension dim;
+
+ ASSERT_FALSE(dim.known());
+}
+
+TEST_F(DimensionTest, value_constructor)
+{
+ loco::Dimension dim{value()};
+
+ ASSERT_TRUE(dim.known());
+ ASSERT_EQ(dim.value(), value());
+}
+
+TEST_F(DimensionTest, set)
+{
+ loco::Dimension dim;
+
+ dim.set(value());
+
+ ASSERT_TRUE(dim.known());
+ ASSERT_EQ(dim.value(), value());
+}
+
+TEST_F(DimensionTest, unset)
+{
+ loco::Dimension dim{value()};
+
+ dim.unset();
+
+ ASSERT_FALSE(dim.known());
+}
+
+TEST_F(DimensionTest, operator_eq)
+{
+ loco::Dimension unknown;
+ loco::Dimension known{3};
+
+ // Compare uint32_t and an unknown dimension
+ ASSERT_FALSE(unknown == 3);
+ ASSERT_FALSE(3 == unknown);
+
+ // Compare uint32_t and a known dimension
+ ASSERT_TRUE(known == 3);
+ ASSERT_TRUE(3 == known);
+
+ ASSERT_FALSE(known == 4);
+ ASSERT_FALSE(4 == known);
+
+ // Compare two known dimensions
+ loco::Dimension another_known{3};
+ ASSERT_TRUE(known == another_known);
+
+ // Compare two unknown dimensions
+ loco::Dimension unknown_a, unknown_b;
+ ASSERT_TRUE(unknown_a.known() == false && unknown_b.known() == false);
+ ASSERT_FALSE(unknown_a == unknown_b);
+}
+
+TEST_F(DimensionTest, make_unknown_dimension)
+{
+ auto dim = loco::make_dimension();
+
+ ASSERT_FALSE(dim.known());
+}
diff --git a/compiler/loco/src/IR/Domain.cpp b/compiler/loco/src/IR/Domain.cpp
new file mode 100644
index 000000000..7bad04750
--- /dev/null
+++ b/compiler/loco/src/IR/Domain.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Domain.h"
+
+// NOTE This file validates "Domain.h". Please DO NOT remove this file.
diff --git a/compiler/loco/src/IR/FeatureAxis.cpp b/compiler/loco/src/IR/FeatureAxis.cpp
new file mode 100644
index 000000000..b0f560677
--- /dev/null
+++ b/compiler/loco/src/IR/FeatureAxis.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/FeatureAxis.h"
+
+// NOTE This file validates "FeatureAxis.h". Please DO NOT remove this file.
diff --git a/compiler/loco/src/IR/FeatureCodec.cpp b/compiler/loco/src/IR/FeatureCodec.cpp
new file mode 100644
index 000000000..99d39a489
--- /dev/null
+++ b/compiler/loco/src/IR/FeatureCodec.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/FeatureCodec.h"
+
+// NOTE This file validates "FeatureCodec.h". Please DO NOT remove this file.
diff --git a/compiler/loco/src/IR/FeatureIndex.test.cpp b/compiler/loco/src/IR/FeatureIndex.test.cpp
new file mode 100644
index 000000000..82b563986
--- /dev/null
+++ b/compiler/loco/src/IR/FeatureIndex.test.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/FeatureIndex.h"
+
+#include <gtest/gtest.h>
+
+TEST(FeatureIndexTest, default_constructor)
+{
+ loco::FeatureIndex index;
+
+ // All the values are 0 at the beginning
+ ASSERT_EQ(index.batch(), 0);
+ ASSERT_EQ(index.channel(), 0);
+ ASSERT_EQ(index.row(), 0);
+ ASSERT_EQ(index.column(), 0);
+}
+
+TEST(FeatureIndexTest, settet_and_getter)
+{
+ loco::FeatureIndex index;
+
+ // Set count
+ index.batch() = 2;
+
+ ASSERT_EQ(index.batch(), 2);
+ ASSERT_EQ(index.channel(), 0);
+ ASSERT_EQ(index.row(), 0);
+ ASSERT_EQ(index.column(), 0);
+
+ // Set channel
+ index.channel() = 3;
+
+ ASSERT_EQ(index.batch(), 2);
+ ASSERT_EQ(index.channel(), 3);
+ ASSERT_EQ(index.row(), 0);
+ ASSERT_EQ(index.column(), 0);
+
+ // Set height
+ index.row() = 4;
+
+ ASSERT_EQ(index.batch(), 2);
+ ASSERT_EQ(index.channel(), 3);
+ ASSERT_EQ(index.row(), 4);
+ ASSERT_EQ(index.column(), 0);
+
+ // Set width
+ index.column() = 5;
+
+ ASSERT_EQ(index.batch(), 2);
+ ASSERT_EQ(index.channel(), 3);
+ ASSERT_EQ(index.row(), 4);
+ ASSERT_EQ(index.column(), 5);
+}
diff --git a/compiler/loco/src/IR/FeatureShape.test.cpp b/compiler/loco/src/IR/FeatureShape.test.cpp
new file mode 100644
index 000000000..59e25ac23
--- /dev/null
+++ b/compiler/loco/src/IR/FeatureShape.test.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/FeatureShape.h"
+
+#include <gtest/gtest.h>
+
+TEST(FeatureShapeTest, default_constructor)
+{
+ loco::FeatureShape shape;
+
+ ASSERT_FALSE(shape.count().known());
+ ASSERT_FALSE(shape.depth().known());
+ ASSERT_FALSE(shape.height().known());
+ ASSERT_FALSE(shape.width().known());
+}
+
+TEST(FeatureShapeTest, settet_and_getter)
+{
+ loco::FeatureShape shape;
+
+ // Set count
+ shape.count() = 2;
+
+ ASSERT_TRUE(shape.count().known());
+ ASSERT_FALSE(shape.depth().known());
+ ASSERT_FALSE(shape.height().known());
+ ASSERT_FALSE(shape.width().known());
+
+ ASSERT_EQ(shape.count(), 2);
+
+ // Set depth
+ shape.depth() = 3;
+
+ ASSERT_TRUE(shape.count().known());
+ ASSERT_TRUE(shape.depth().known());
+ ASSERT_FALSE(shape.height().known());
+ ASSERT_FALSE(shape.width().known());
+
+ ASSERT_EQ(shape.count(), 2);
+ ASSERT_EQ(shape.depth(), 3);
+
+ // Set height
+ shape.height() = 4;
+
+ ASSERT_TRUE(shape.count().known());
+ ASSERT_TRUE(shape.depth().known());
+ ASSERT_TRUE(shape.height().known());
+ ASSERT_FALSE(shape.width().known());
+
+ ASSERT_EQ(shape.count(), 2);
+ ASSERT_EQ(shape.depth(), 3);
+ ASSERT_EQ(shape.height(), 4);
+
+ // Set width
+ shape.width() = 5;
+
+ ASSERT_TRUE(shape.count().known());
+ ASSERT_TRUE(shape.depth().known());
+ ASSERT_TRUE(shape.height().known());
+ ASSERT_TRUE(shape.width().known());
+
+ ASSERT_EQ(shape.count(), 2);
+ ASSERT_EQ(shape.depth(), 3);
+ ASSERT_EQ(shape.height(), 4);
+ ASSERT_EQ(shape.width(), 5);
+}
diff --git a/compiler/loco/src/IR/FilterAxis.cpp b/compiler/loco/src/IR/FilterAxis.cpp
new file mode 100644
index 000000000..be4234e6a
--- /dev/null
+++ b/compiler/loco/src/IR/FilterAxis.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/FilterAxis.h"
+
+// NOTE This file validates "FilterAxis.h". Please DO NOT remove this file.
diff --git a/compiler/loco/src/IR/FilterCodec.cpp b/compiler/loco/src/IR/FilterCodec.cpp
new file mode 100644
index 000000000..f48cf1821
--- /dev/null
+++ b/compiler/loco/src/IR/FilterCodec.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/FilterCodec.h"
+
+// NOTE This file validates "FilterCodec.h". Please DO NOT remove this file.
diff --git a/compiler/loco/src/IR/FilterIndex.test.cpp b/compiler/loco/src/IR/FilterIndex.test.cpp
new file mode 100644
index 000000000..58f38718e
--- /dev/null
+++ b/compiler/loco/src/IR/FilterIndex.test.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/FilterIndex.h"
+
+#include <gtest/gtest.h>
+
+TEST(FilterIndexTest, default_constructor)
+{
+ loco::FilterIndex index;
+
+ // All the values are 0 at the beginning
+ ASSERT_EQ(index.nth(), 0);
+ ASSERT_EQ(index.channel(), 0);
+ ASSERT_EQ(index.row(), 0);
+ ASSERT_EQ(index.column(), 0);
+}
+
+TEST(FilterIndexTest, settet_and_getter)
+{
+ loco::FilterIndex index;
+
+ // Set count
+ index.nth() = 2;
+
+ ASSERT_EQ(index.nth(), 2);
+ ASSERT_EQ(index.channel(), 0);
+ ASSERT_EQ(index.row(), 0);
+ ASSERT_EQ(index.column(), 0);
+
+ // Set channel
+ index.channel() = 3;
+
+ ASSERT_EQ(index.nth(), 2);
+ ASSERT_EQ(index.channel(), 3);
+ ASSERT_EQ(index.row(), 0);
+ ASSERT_EQ(index.column(), 0);
+
+ // Set height
+ index.row() = 4;
+
+ ASSERT_EQ(index.nth(), 2);
+ ASSERT_EQ(index.channel(), 3);
+ ASSERT_EQ(index.row(), 4);
+ ASSERT_EQ(index.column(), 0);
+
+ // Set width
+ index.column() = 5;
+
+ ASSERT_EQ(index.nth(), 2);
+ ASSERT_EQ(index.channel(), 3);
+ ASSERT_EQ(index.row(), 4);
+ ASSERT_EQ(index.column(), 5);
+}
diff --git a/compiler/loco/src/IR/FilterShape.test.cpp b/compiler/loco/src/IR/FilterShape.test.cpp
new file mode 100644
index 000000000..ccb60ed76
--- /dev/null
+++ b/compiler/loco/src/IR/FilterShape.test.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/FilterShape.h"
+
+#include <gtest/gtest.h>
+
+TEST(FilterShapeTest, default_constructor)
+{
+ loco::FilterShape shape;
+
+ ASSERT_FALSE(shape.count().known());
+ ASSERT_FALSE(shape.depth().known());
+ ASSERT_FALSE(shape.height().known());
+ ASSERT_FALSE(shape.width().known());
+}
+
+TEST(FilterShapeTest, settet_and_getter)
+{
+ loco::FilterShape shape;
+
+ // Set count
+ shape.count() = 2;
+
+ ASSERT_TRUE(shape.count().known());
+ ASSERT_FALSE(shape.depth().known());
+ ASSERT_FALSE(shape.height().known());
+ ASSERT_FALSE(shape.width().known());
+
+ ASSERT_EQ(shape.count(), 2);
+
+ // Set depth
+ shape.depth() = 3;
+
+ ASSERT_TRUE(shape.count().known());
+ ASSERT_TRUE(shape.depth().known());
+ ASSERT_FALSE(shape.height().known());
+ ASSERT_FALSE(shape.width().known());
+
+ ASSERT_EQ(shape.count(), 2);
+ ASSERT_EQ(shape.depth(), 3);
+
+ // Set height
+ shape.height() = 4;
+
+ ASSERT_TRUE(shape.count().known());
+ ASSERT_TRUE(shape.depth().known());
+ ASSERT_TRUE(shape.height().known());
+ ASSERT_FALSE(shape.width().known());
+
+ ASSERT_EQ(shape.count(), 2);
+ ASSERT_EQ(shape.depth(), 3);
+ ASSERT_EQ(shape.height(), 4);
+
+ // Set width
+ shape.width() = 5;
+
+ ASSERT_TRUE(shape.count().known());
+ ASSERT_TRUE(shape.depth().known());
+ ASSERT_TRUE(shape.height().known());
+ ASSERT_TRUE(shape.width().known());
+
+ ASSERT_EQ(shape.count(), 2);
+ ASSERT_EQ(shape.depth(), 3);
+ ASSERT_EQ(shape.height(), 4);
+ ASSERT_EQ(shape.width(), 5);
+}
diff --git a/compiler/loco/src/IR/Graph.cpp b/compiler/loco/src/IR/Graph.cpp
new file mode 100644
index 000000000..1d8752252
--- /dev/null
+++ b/compiler/loco/src/IR/Graph.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Graph.h"
+
+#include <stdex/Memory.h>
+
+#include <cassert>
+
+namespace
+{
+
+std::unique_ptr<loco::TensorShape> make_tensor_shape(std::initializer_list<loco::Dimension> dims)
+{
+ auto tensor_shape = stdex::make_unique<loco::TensorShape>();
+
+ tensor_shape->rank(dims.size());
+ {
+ uint32_t axis = 0;
+ for (auto it = dims.begin(); it != dims.end(); ++it)
+ {
+ tensor_shape->dim(axis++) = *it;
+ }
+ assert(axis == dims.size());
+ }
+
+ return std::move(tensor_shape);
+}
+
+} // namespace
+
+namespace loco
+{
+
+void Mixin<Trait::TensorShaped>::shape(std::initializer_list<Dimension> dims)
+{
+ shape(make_tensor_shape(dims));
+}
+
+GraphInput *Graph::InputContext::create(void)
+{
+ return take(stdex::make_unique<GraphInput>(size()));
+}
+
+GraphOutput *Graph::OutputContext::create(void)
+{
+ return take(stdex::make_unique<GraphOutput>(size()));
+}
+
+std::set<loco::Node *> all_nodes(loco::Graph *g)
+{
+ std::set<loco::Node *> res;
+
+ for (uint32_t n = 0; n < g->nodes()->size(); ++n)
+ {
+ res.insert(g->nodes()->at(n));
+ }
+
+ return res;
+}
+
+std::vector<Node *> input_nodes(const Graph *g)
+{
+ std::map<GraphInputIndex, loco::Node *> table;
+
+ for (uint32_t n = 0; n < g->nodes()->size(); ++n)
+ {
+ auto node = g->nodes()->at(n);
+
+ if (auto service = node->dialect()->service<GraphInputIndexQueryService>())
+ {
+ if (service->associated(node))
+ {
+ auto input_index = service->index(node);
+ assert(table.find(input_index) == table.end());
+ table[input_index] = node;
+ }
+ }
+ }
+
+ std::vector<loco::Node *> res;
+
+ for (uint32_t n = 0; n < g->inputs()->size(); ++n)
+ {
+ auto it = table.find(n);
+ res.emplace_back(it == table.end() ? nullptr : it->second);
+ }
+
+ return res;
+}
+
+std::vector<loco::Node *> output_nodes(loco::Graph *g)
+{
+ std::map<GraphOutputIndex, loco::Node *> table;
+
+ for (uint32_t n = 0; n < g->nodes()->size(); ++n)
+ {
+ auto node = g->nodes()->at(n);
+
+ if (auto service = node->dialect()->service<GraphOutputIndexQueryService>())
+ {
+ if (service->associated(node))
+ {
+ auto output_index = service->index(node);
+ assert(table.find(output_index) == table.end());
+ table[output_index] = node;
+ }
+ }
+ }
+
+ std::vector<loco::Node *> res;
+
+ for (uint32_t n = 0; n < g->outputs()->size(); ++n)
+ {
+ auto it = table.find(n);
+ res.emplace_back(it == table.end() ? nullptr : it->second);
+ }
+
+ return res;
+}
+
+std::unique_ptr<Graph> make_graph(void) { return std::unique_ptr<Graph>{new Graph}; }
+
+} // namespace loco
diff --git a/compiler/loco/src/IR/Graph.test.cpp b/compiler/loco/src/IR/Graph.test.cpp
new file mode 100644
index 000000000..6df630b0f
--- /dev/null
+++ b/compiler/loco/src/IR/Graph.test.cpp
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Graph.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+// @brief Mockup class for loco::NamedEntity
+struct NamedElement final : private loco::NamedEntity
+{
+ LOCO_NAMED_ENTITY_EXPOSE;
+};
+
+} // namespace
+
+TEST(NamedTest, constructor)
+{
+ NamedElement elem;
+
+ ASSERT_EQ(elem.name(), "");
+}
+
+TEST(NamedTest, setter_and_getter)
+{
+ NamedElement elem;
+
+ elem.name("name");
+ ASSERT_EQ(elem.name(), "name");
+}
+
+TEST(DataTypedMixinTest, constructor)
+{
+ loco::Mixin<loco::Trait::DataTyped> mixin;
+
+ ASSERT_EQ(mixin.dtype(), loco::DataType::Unknown);
+}
+
+TEST(DataTypedMixinTest, setter_and_getter)
+{
+ loco::Mixin<loco::Trait::DataTyped> mixin;
+
+ mixin.dtype(loco::DataType::FLOAT32);
+ ASSERT_EQ(mixin.dtype(), loco::DataType::FLOAT32);
+}
+
+TEST(TensorShapedMixinTest, setter_and_getter)
+{
+ loco::Mixin<loco::Trait::TensorShaped> mixin;
+
+ mixin.shape({1, 2, 3, 4});
+ ASSERT_NE(mixin.shape(), nullptr);
+ ASSERT_EQ(mixin.shape()->rank(), 4);
+ ASSERT_EQ(mixin.shape()->dim(0), 1);
+ ASSERT_EQ(mixin.shape()->dim(1), 2);
+ ASSERT_EQ(mixin.shape()->dim(2), 3);
+ ASSERT_EQ(mixin.shape()->dim(3), 4);
+}
+
+TEST(GraphTest, create_and_destroy_node)
+{
+ auto g = loco::make_graph();
+
+ auto pull = g->nodes()->create<loco::Pull>();
+
+ ASSERT_NO_THROW(g->nodes()->destroy(pull));
+ ASSERT_THROW(g->nodes()->destroy(pull), std::invalid_argument);
+}
+
+TEST(GraphTest, create_input)
+{
+ auto g = loco::make_graph();
+
+ auto input = g->inputs()->create();
+
+ // TODO Add more checks
+ ASSERT_EQ(input->shape(), nullptr);
+ ASSERT_EQ(input->index(), 0);
+}
+
+TEST(GraphTest, create_output)
+{
+ auto g = loco::make_graph();
+
+ auto output = g->outputs()->create();
+
+ // TODO Add more checks
+ ASSERT_EQ(output->shape(), nullptr);
+ ASSERT_EQ(output->index(), 0);
+}
+
+namespace
+{
+// temp node with multple params for ctor. loco::CanonicalOpcode::ReLU is used for simplicity
+class ParamCtorNode
+ : public loco::CanonicalNodeDef<loco::CanonicalOpcode::ReLU, loco::FixedArity<0>::Mixin>
+{
+public:
+ ParamCtorNode(int i, float f)
+ {
+ _i = i;
+ _f = f;
+ }
+
+ int i() { return _i; }
+ float f() { return _f; }
+
+private:
+ int _i;
+ float _f;
+};
+} // namespace
+
+TEST(GraphTest, consturctor_with_param_node)
+{
+ auto g = loco::make_graph();
+
+ auto test_node = g->nodes()->create<ParamCtorNode>(22, 11.11);
+
+ ASSERT_EQ(test_node->graph(), g.get());
+ ASSERT_EQ(const_cast<const ParamCtorNode *>(test_node)->graph(), g.get());
+
+ ASSERT_EQ(test_node->i(), 22);
+ ASSERT_FLOAT_EQ(test_node->f(), 11.11);
+
+ ASSERT_NO_THROW(g->nodes()->destroy(test_node));
+ ASSERT_THROW(g->nodes()->destroy(test_node), std::invalid_argument);
+}
+
+TEST(GraphTest, getters_over_const_instance)
+{
+ auto g = loco::make_graph();
+
+ auto pull = g->nodes()->create<loco::Pull>();
+ auto push = g->nodes()->create<loco::Push>();
+
+ loco::link(g->inputs()->create(), pull);
+ loco::link(g->outputs()->create(), push);
+
+ auto ptr = const_cast<const loco::Graph *>(g.get());
+
+ EXPECT_EQ(ptr->nodes()->size(), 2);
+ EXPECT_EQ(ptr->inputs()->size(), 1);
+}
+
+TEST(GraphTest, graph_node_enumeration)
+{
+ auto g = loco::make_graph();
+
+ auto pull_1 = g->nodes()->create<loco::Pull>();
+ auto push_1 = g->nodes()->create<loco::Push>();
+
+ auto nodes = loco::all_nodes(g.get());
+
+ // Returns true if "nodes" includes a given node
+ auto member = [&nodes](loco::Node *node) { return nodes.find(node) != nodes.end(); };
+
+ ASSERT_EQ(nodes.size(), 2);
+ ASSERT_TRUE(member(pull_1));
+ ASSERT_TRUE(member(push_1));
+}
+
+TEST(GraphTest, graph_inout_enumeration)
+{
+ auto g = loco::make_graph();
+
+ std::vector<loco::Pull *> pull_nodes;
+
+ auto pull_1 = g->nodes()->create<loco::Pull>();
+ auto pull_2 = g->nodes()->create<loco::Pull>();
+ auto pull_3 = g->nodes()->create<loco::Pull>();
+
+ auto push_1 = g->nodes()->create<loco::Push>();
+ auto push_2 = g->nodes()->create<loco::Push>();
+ auto push_3 = g->nodes()->create<loco::Push>();
+
+ loco::link(g->inputs()->create(), pull_2);
+ loco::link(g->inputs()->create(), pull_1);
+
+ loco::link(g->outputs()->create(), push_1);
+ loco::link(g->outputs()->create(), push_3);
+
+ auto output_nodes = loco::output_nodes(g.get());
+
+ ASSERT_EQ(output_nodes.size(), 2);
+ ASSERT_EQ(output_nodes.at(0), push_1);
+ ASSERT_EQ(output_nodes.at(1), push_3);
+}
+
+TEST(GraphTest, graph_name)
+{
+ auto g = loco::make_graph();
+
+ g->name("HelloGraph");
+ ASSERT_TRUE(g->name() == "HelloGraph");
+}
+
+TEST(GraphTest, graph_name_nullptr_NEG)
+{
+ auto g = loco::make_graph();
+
+ EXPECT_ANY_THROW(g->name(nullptr));
+}
diff --git a/compiler/loco/src/IR/GraphInputIndex.cpp b/compiler/loco/src/IR/GraphInputIndex.cpp
new file mode 100644
index 000000000..0c94d704c
--- /dev/null
+++ b/compiler/loco/src/IR/GraphInputIndex.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/GraphInputIndex.h"
+
+// NOTE This file validates "GraphInputIndex.h". Please DO NOT remove this file.
diff --git a/compiler/loco/src/IR/GraphOutputIndex.cpp b/compiler/loco/src/IR/GraphOutputIndex.cpp
new file mode 100644
index 000000000..e6fdb9f94
--- /dev/null
+++ b/compiler/loco/src/IR/GraphOutputIndex.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/GraphOutputIndex.h"
+
+// NOTE This file validates "GraphOutputIndex.h". Please DO NOT remove this file.
diff --git a/compiler/loco/src/IR/MatrixAxis.cpp b/compiler/loco/src/IR/MatrixAxis.cpp
new file mode 100644
index 000000000..d0773f758
--- /dev/null
+++ b/compiler/loco/src/IR/MatrixAxis.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/MatrixAxis.h"
+
+// NOTE This file validates "MatrixAxis.h". Please DO NOT remove this file.
diff --git a/compiler/loco/src/IR/MatrixCodec.cpp b/compiler/loco/src/IR/MatrixCodec.cpp
new file mode 100644
index 000000000..87ae42610
--- /dev/null
+++ b/compiler/loco/src/IR/MatrixCodec.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/MatrixCodec.h"
+
+// NOTE This file validates "MatrixCodec.h". Please DO NOT remove this file.
diff --git a/compiler/loco/src/IR/MockupNode.h b/compiler/loco/src/IR/MockupNode.h
new file mode 100644
index 000000000..ec56c90e2
--- /dev/null
+++ b/compiler/loco/src/IR/MockupNode.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_IR_MOCKUP_NODE_H__
+#define __LOCO_IR_MOCKUP_NODE_H__
+
+#include "loco/IR/Use.h"
+#include "loco/IR/Node.h"
+
+namespace
+{
+
+struct MockDialect final : public loco::Dialect
+{
+ static loco::Dialect *get(void)
+ {
+ static MockDialect d;
+ return &d;
+ }
+};
+
+// @brief Mockup node for internal testing
+class MockupNode final : public loco::Node
+{
+public:
+ MockupNode() = default;
+
+public:
+ const loco::Dialect *dialect(void) const final { return MockDialect::get(); }
+ uint32_t opnum(void) const final { return 0; }
+
+ uint32_t arity(void) const final { return 1; }
+ Node *arg(uint32_t N) const final { return _arg.node(); }
+ void drop(void) final { _arg.node(nullptr); }
+
+ Node *in(void)const { return _arg.node(); }
+ void in(Node *node) { _arg.node(node); }
+
+private:
+ loco::Use _arg{this};
+};
+
+} // namespace
+
+#endif // __LOCO_IR_MOCKUP_NODE_H__
diff --git a/compiler/loco/src/IR/Node.cpp b/compiler/loco/src/IR/Node.cpp
new file mode 100644
index 000000000..90ec5c997
--- /dev/null
+++ b/compiler/loco/src/IR/Node.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Node.h"
+#include "loco/IR/Use.h"
+
+#include <cassert>
+
+namespace loco
+{
+
+Node::~Node()
+{
+ // To detect dangling references
+ assert(_uses.size() == 0);
+}
+
+std::set<Node *> preds(const Node *node)
+{
+ std::set<Node *> res;
+
+ for (uint32_t n = 0; n < node->arity(); ++n)
+ {
+ if (auto pred = node->arg(n))
+ {
+ res.insert(pred);
+ }
+ }
+
+ return res;
+}
+
+std::set<Node *> succs(const Node *node)
+{
+ std::set<Node *> res;
+
+ for (auto use : node->_uses)
+ {
+ auto user = use->user();
+ assert(user != nullptr);
+ res.insert(user);
+ }
+
+ return res;
+}
+
+Subst<SubstQualifier::Default>::Subst(Node *from) : _from{from}
+{
+ // _from SHOULD be valid
+ assert(_from != nullptr);
+}
+
+void Subst<SubstQualifier::Default>::with(Node *into) const
+{
+ if (_from == into)
+ {
+ return;
+ }
+
+ auto *uses = &(_from->_uses);
+
+ while (!uses->empty())
+ {
+ auto use = *(uses->begin());
+ use->node(into);
+ }
+}
+
+Subst<SubstQualifier::Default> replace(Node *node)
+{
+ // Let's create Subst<SubstQualifier::Default>!
+ return Subst<SubstQualifier::Default>{node};
+}
+
+} // namespace loco
diff --git a/compiler/loco/src/IR/Node.test.cpp b/compiler/loco/src/IR/Node.test.cpp
new file mode 100644
index 000000000..00e444465
--- /dev/null
+++ b/compiler/loco/src/IR/Node.test.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Node.h"
+
+#include "MockupNode.h"
+
+#include <gtest/gtest.h>
+
+TEST(NodeTest, preds)
+{
+ ::MockupNode arg;
+ ::MockupNode node;
+
+ node.in(&arg);
+
+ auto preds = loco::preds(&node);
+
+ ASSERT_EQ(preds.size(), 1);
+ ASSERT_NE(preds.find(&arg), preds.end());
+}
+
+TEST(NodeTest, succs)
+{
+ ::MockupNode node;
+ ::MockupNode succ_1;
+ ::MockupNode succ_2;
+
+ succ_1.in(&node);
+ succ_2.in(&node);
+
+ auto succs = loco::succs(&node);
+
+ ASSERT_EQ(succs.size(), 2);
+ ASSERT_NE(succs.find(&succ_1), succs.end());
+ ASSERT_NE(succs.find(&succ_2), succs.end());
+}
+
+TEST(NodeTest, replace_with)
+{
+ ::MockupNode node_1;
+ ::MockupNode node_2;
+
+ ::MockupNode node_3;
+ ::MockupNode node_4;
+
+ node_3.in(&node_1);
+ node_4.in(&node_2);
+
+ // The following holds at this point
+ // - node_3 USE node_1
+ // - node_4 USE node_2
+ ASSERT_EQ(node_3.in(), &node_1);
+ ASSERT_EQ(node_4.in(), &node_2);
+
+ // Replace all the usage of node_1 with node_2
+ replace(&node_1).with(&node_2);
+
+ // The following holds at this point
+ // - node_3 USE node_2
+ // - node_4 USE node_2
+ ASSERT_EQ(node_3.in(), &node_2);
+ ASSERT_EQ(node_4.in(), &node_2);
+}
+
+TEST(NodeTest, constructor)
+{
+ MockupNode node;
+
+ // graph() SHOULD return nullptr if node is not constructed through "Graph"
+ ASSERT_EQ(node.graph(), nullptr);
+}
+
+// TODO Rewrite this as a FixedAritry mix-in test
+#if 0
+TEST(FixedArityNodeTest, constructor)
+{
+ struct DerivedNode final : public loco::FixedArityNode<1, loco::Node>
+ {
+ loco::Dialect *dialect(void) const final { return MockDialect::get(); }
+ uint32_t opnum(void) const final { return 0; }
+ };
+
+ DerivedNode node;
+
+ ASSERT_EQ(node.arity(), 1);
+ ASSERT_EQ(node.arg(0), nullptr);
+}
+#endif
diff --git a/compiler/loco/src/IR/NodeMixins.cpp b/compiler/loco/src/IR/NodeMixins.cpp
new file mode 100644
index 000000000..66037b17a
--- /dev/null
+++ b/compiler/loco/src/IR/NodeMixins.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/NodeMixins.h"
+
+// NOTE This file validates "NodeMixins.h". Please DO NOT remove this file.
diff --git a/compiler/loco/src/IR/NodePool.cpp b/compiler/loco/src/IR/NodePool.cpp
new file mode 100644
index 000000000..553f15eb5
--- /dev/null
+++ b/compiler/loco/src/IR/NodePool.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/NodePool.h"
+
+namespace loco
+{
+
+NodePool::~NodePool()
+{
+ // Drop all the references before deallocation
+ for (uint32_t n = 0; n < size(); ++n)
+ {
+ at(n)->drop();
+ }
+}
+
+} // namespace loco
diff --git a/compiler/loco/src/IR/NodeShape.cpp b/compiler/loco/src/IR/NodeShape.cpp
new file mode 100644
index 000000000..0130cfbdb
--- /dev/null
+++ b/compiler/loco/src/IR/NodeShape.cpp
@@ -0,0 +1,284 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/NodeShape.h"
+
+#include <cassert>
+#include <stdexcept>
+
+//
+// BiasShape Support
+//
+namespace loco
+{
+
+void NodeShape::set(const BiasShape &shape)
+{
+ _domain = Domain::Bias;
+
+ _dims.resize(1);
+ _dims.at(0) = shape.length();
+}
+
+template <> BiasShape NodeShape::as(void) const
+{
+ assert(_domain == Domain::Bias);
+
+ BiasShape res;
+
+ res.length() = _dims.at(0);
+
+ return res;
+}
+
+} // namespace loco
+
+//
+// DepthwiseFilterShape Support
+//
+namespace loco
+{
+
+void NodeShape::set(const DepthwiseFilterShape &shape)
+{
+ _domain = Domain::DepthwiseFilter;
+
+ _dims.resize(4);
+ _dims.at(0) = shape.multiplier();
+ _dims.at(1) = shape.depth();
+ _dims.at(2) = shape.height();
+ _dims.at(3) = shape.width();
+}
+
+template <> DepthwiseFilterShape NodeShape::as(void) const
+{
+ assert(_domain == Domain::DepthwiseFilter);
+
+ DepthwiseFilterShape res;
+
+ res.multiplier() = _dims.at(0);
+ res.depth() = _dims.at(1);
+ res.height() = _dims.at(2);
+ res.width() = _dims.at(3);
+
+ return res;
+}
+
+} // namespace loco
+
+//
+// FeatureShape Support
+//
+namespace loco
+{
+
+void NodeShape::set(const FeatureShape &shape)
+{
+ _domain = Domain::Feature;
+
+ _dims.resize(4);
+ _dims.at(0) = shape.count();
+ _dims.at(1) = shape.depth();
+ _dims.at(2) = shape.height();
+ _dims.at(3) = shape.width();
+}
+
+template <> FeatureShape NodeShape::as(void) const
+{
+ assert(_domain == Domain::Feature);
+
+ FeatureShape res;
+
+ res.count() = _dims.at(0);
+ res.depth() = _dims.at(1);
+ res.height() = _dims.at(2);
+ res.width() = _dims.at(3);
+
+ return res;
+}
+
+} // namespace loco
+
+//
+// FilterShape Support
+//
+namespace loco
+{
+
+void NodeShape::set(const FilterShape &shape)
+{
+ _domain = Domain::Filter;
+
+ _dims.resize(4);
+ _dims.at(0) = shape.count();
+ _dims.at(1) = shape.depth();
+ _dims.at(2) = shape.height();
+ _dims.at(3) = shape.width();
+}
+
+template <> FilterShape NodeShape::as(void) const
+{
+ assert(_domain == Domain::Filter);
+
+ FilterShape res;
+
+ res.count() = _dims.at(0);
+ res.depth() = _dims.at(1);
+ res.height() = _dims.at(2);
+ res.width() = _dims.at(3);
+
+ return res;
+}
+
+} // namespace loco
+
+//
+// MatrixShape Support
+//
+namespace loco
+{
+
+void NodeShape::set(const MatrixShape &shape)
+{
+ _domain = Domain::Matrix;
+
+ _dims.resize(2);
+ _dims.at(0) = shape.height();
+ _dims.at(1) = shape.width();
+}
+
+template <> MatrixShape NodeShape::as(void) const
+{
+ assert(_domain == Domain::Matrix);
+
+ MatrixShape res;
+
+ res.height() = _dims.at(0);
+ res.width() = _dims.at(1);
+
+ return res;
+}
+
+} // namespace loco
+
+//
+// TensorShape Support
+//
+namespace loco
+{
+
+void NodeShape::set(const TensorShape &shape)
+{
+ _domain = Domain::Tensor;
+
+ _dims.resize(shape.rank());
+ for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+ {
+ _dims.at(axis) = shape.dim(axis);
+ }
+}
+
+template <> TensorShape NodeShape::as(void) const
+{
+ assert(_domain == Domain::Tensor);
+
+ TensorShape res;
+
+ res.rank(_dims.size());
+ for (uint32_t axis = 0; axis < _dims.size(); ++axis)
+ {
+ res.dim(axis) = _dims.at(axis);
+ }
+
+ return res;
+}
+
+} // namespace loco
+
+namespace loco
+{
+
+bool operator==(const NodeShape &lhs, const NodeShape &rhs)
+{
+ if (lhs.domain() != rhs.domain())
+ return false;
+
+ switch (lhs.domain())
+ {
+ case loco::Domain::Tensor:
+ {
+ auto lhs_t = lhs.as<TensorShape>();
+ auto rhs_t = rhs.as<TensorShape>();
+ if (lhs_t.rank() != rhs_t.rank())
+ return false;
+ for (uint32_t axis = 0; axis < lhs_t.rank(); ++axis)
+ {
+ if (!(lhs_t.dim(axis) == rhs_t.dim(axis)))
+ return false;
+ }
+ return true;
+ }
+
+ case loco::Domain::Feature:
+ {
+ auto lhs_f = lhs.as<FeatureShape>();
+ auto rhs_f = rhs.as<FeatureShape>();
+
+ return (lhs_f.count() == rhs_f.count() && lhs_f.depth() == rhs_f.depth() &&
+ lhs_f.height() == rhs_f.height() && lhs_f.width() == rhs_f.width());
+ }
+
+ case loco::Domain::Filter:
+ {
+ auto lhs_f = lhs.as<FilterShape>();
+ auto rhs_f = rhs.as<FilterShape>();
+
+ return (lhs_f.count() == rhs_f.count() && lhs_f.depth() == rhs_f.depth() &&
+ lhs_f.height() == rhs_f.height() && lhs_f.width() == rhs_f.width());
+ }
+
+ case loco::Domain::DepthwiseFilter:
+ {
+ auto lhs_f = lhs.as<DepthwiseFilterShape>();
+ auto rhs_f = rhs.as<DepthwiseFilterShape>();
+
+ return (lhs_f.multiplier() == rhs_f.multiplier() && lhs_f.depth() == rhs_f.depth() &&
+ lhs_f.height() == rhs_f.height() && lhs_f.width() == rhs_f.width());
+ }
+
+ case loco::Domain::Bias:
+ {
+ auto lhs_f = lhs.as<BiasShape>();
+ auto rhs_f = rhs.as<BiasShape>();
+
+ return (lhs_f.length() == rhs_f.length());
+ }
+
+ case loco::Domain::Matrix:
+ {
+ auto lhs_f = lhs.as<MatrixShape>();
+ auto rhs_f = rhs.as<MatrixShape>();
+
+ return (lhs_f.height() == rhs_f.height() && lhs_f.width() == rhs_f.width());
+ }
+
+ default:
+ throw std::runtime_error("Not supported domain for NodeShape equality");
+ }
+ return false;
+}
+
+} // namespace loco
diff --git a/compiler/loco/src/IR/NodeShape.test.cpp b/compiler/loco/src/IR/NodeShape.test.cpp
new file mode 100644
index 000000000..4f092e024
--- /dev/null
+++ b/compiler/loco/src/IR/NodeShape.test.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/NodeShape.h"
+
+#include <gtest/gtest.h>
+
+TEST(NodeShapeTest, default_constructor)
+{
+ loco::NodeShape node_shape;
+
+ ASSERT_EQ(node_shape.domain(), loco::Domain::Unknown);
+}
+
+TEST(NodeShapeTest, bias_shape_constructor)
+{
+ loco::BiasShape bias_shape;
+
+ bias_shape.length() = 4;
+
+ loco::NodeShape node_shape{bias_shape};
+
+ ASSERT_EQ(node_shape.domain(), loco::Domain::Bias);
+ ASSERT_EQ(node_shape.as<loco::BiasShape>().length(), 4);
+}
+
+TEST(NodeShapeTest, dwfilter_shape_constructor)
+{
+ loco::DepthwiseFilterShape dwfilter_shape;
+
+ dwfilter_shape.depth() = 2;
+ dwfilter_shape.multiplier() = 3;
+ dwfilter_shape.height() = 4;
+ dwfilter_shape.width() = 5;
+
+ loco::NodeShape node_shape{dwfilter_shape};
+
+ ASSERT_EQ(node_shape.domain(), loco::Domain::DepthwiseFilter);
+ ASSERT_EQ(node_shape.as<loco::DepthwiseFilterShape>().depth(), 2);
+ ASSERT_EQ(node_shape.as<loco::DepthwiseFilterShape>().multiplier(), 3);
+ ASSERT_EQ(node_shape.as<loco::DepthwiseFilterShape>().height(), 4);
+ ASSERT_EQ(node_shape.as<loco::DepthwiseFilterShape>().width(), 5);
+}
+
+TEST(NodeShapeTest, feature_shape_constructor)
+{
+ loco::FeatureShape feature_shape;
+
+ feature_shape.count() = 2;
+ feature_shape.depth() = 3;
+ feature_shape.height() = 4;
+ feature_shape.width() = 5;
+
+ loco::NodeShape node_shape{feature_shape};
+
+ ASSERT_EQ(node_shape.domain(), loco::Domain::Feature);
+ ASSERT_EQ(node_shape.as<loco::FeatureShape>().count(), 2);
+ ASSERT_EQ(node_shape.as<loco::FeatureShape>().depth(), 3);
+ ASSERT_EQ(node_shape.as<loco::FeatureShape>().height(), 4);
+ ASSERT_EQ(node_shape.as<loco::FeatureShape>().width(), 5);
+}
+
+TEST(NodeShapeTest, filter_shape_constructor)
+{
+ loco::FilterShape filter_shape;
+
+ filter_shape.count() = 2;
+ filter_shape.depth() = 3;
+ filter_shape.height() = 4;
+ filter_shape.width() = 5;
+
+ loco::NodeShape node_shape{filter_shape};
+
+ ASSERT_EQ(node_shape.domain(), loco::Domain::Filter);
+ ASSERT_EQ(node_shape.as<loco::FilterShape>().count(), 2);
+ ASSERT_EQ(node_shape.as<loco::FilterShape>().depth(), 3);
+ ASSERT_EQ(node_shape.as<loco::FilterShape>().height(), 4);
+ ASSERT_EQ(node_shape.as<loco::FilterShape>().width(), 5);
+}
+
+TEST(NodeShapeTest, tensor_shape_constructor)
+{
+ loco::TensorShape tensor_shape;
+
+ tensor_shape.rank(2);
+ tensor_shape.dim(0) = 4;
+ tensor_shape.dim(1) = 5;
+
+ loco::NodeShape node_shape{tensor_shape};
+
+ ASSERT_EQ(node_shape.domain(), loco::Domain::Tensor);
+ ASSERT_EQ(node_shape.as<loco::TensorShape>().rank(), 2);
+ ASSERT_EQ(node_shape.as<loco::TensorShape>().dim(0), 4);
+ ASSERT_EQ(node_shape.as<loco::TensorShape>().dim(1), 5);
+}
+
+TEST(NodeShapeTest, copy_constructible)
+{
+ loco::TensorShape tensor_shape;
+
+ tensor_shape.rank(2);
+ tensor_shape.dim(0) = 4;
+ tensor_shape.dim(1) = 5;
+
+ loco::NodeShape orig{tensor_shape};
+ loco::NodeShape copy{orig}; // Call Copy Constructor
+
+ ASSERT_EQ(copy.domain(), loco::Domain::Tensor);
+ ASSERT_EQ(copy.as<loco::TensorShape>().rank(), 2);
+ ASSERT_EQ(copy.as<loco::TensorShape>().dim(0), 4);
+ ASSERT_EQ(copy.as<loco::TensorShape>().dim(1), 5);
+}
diff --git a/compiler/loco/src/IR/Nodes.cpp b/compiler/loco/src/IR/Nodes.cpp
new file mode 100644
index 000000000..133b69430
--- /dev/null
+++ b/compiler/loco/src/IR/Nodes.cpp
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Nodes.h"
+#include "loco/IR/Graph.h"
+
+#include <cassert>
+#include <limits>
+
+// This file validates "Nodes.h". Please DO NOT remove this file.
+namespace
+{
+
+/**
+ * @note This function is currently only used in assert. Compiler will
+ * warn/error this function as unused in Release build.
+ * Making inline will make compiler happy.
+ */
+// Is it possible to update lhs as rhs?
+inline bool dtype_assignable(loco::DataType lhs, loco::DataType rhs)
+{
+ if (lhs == loco::DataType::Unknown)
+ {
+ return true;
+ }
+
+ // lhs is already known, and thus rhs should be matched
+ return lhs == rhs;
+}
+
+} // namespace
+
+/**
+ * Push
+ */
+namespace loco
+{
+
+void Push::index(const GraphOutputIndex &index)
+{
+ // Push internally stores "GraphOutputIndex" as int64_t
+ _index = static_cast<int64_t>(index);
+}
+
+GraphOutputIndex Push::index(void) const
+{
+ assert(_index >= std::numeric_limits<GraphOutputIndex>::min());
+ assert(_index <= std::numeric_limits<GraphOutputIndex>::max());
+ return static_cast<GraphOutputIndex>(_index);
+}
+
+void link(GraphOutput *output, Push *push) { push->index(output->index()); }
+
+Push *push_node(Graph *g, const GraphOutputIndex &index)
+{
+ for (uint32_t n = 0; n < g->nodes()->size(); ++n)
+ {
+ if (auto push = dynamic_cast<Push *>(g->nodes()->at(n)))
+ {
+ if (push->indexed() && push->index() == index)
+ {
+ return push;
+ }
+ }
+ }
+ return nullptr;
+}
+
+} // namespace loco
+
+/**
+ * Pull
+ */
+namespace loco
+{
+
+void Pull::index(const GraphInputIndex &index)
+{
+ // ASSUMPTION
+ //
+ // It is possible to update index multiple times, but only with the same value!
+ assert(!indexed() or _index == index);
+
+ if (indexed())
+ {
+ assert(_index == index);
+ return;
+ }
+
+ // Push internally stores "GraphInputIndex" as int64_t
+ _index = static_cast<int64_t>(index);
+
+ // ASSUMPTION: The return value of graph() never changes!
+ if (graph() != nullptr && _dtype != loco::DataType::Unknown)
+ {
+ // Update Graph-level input only if it is not yet specified
+ if (graph()->inputs()->at(_index)->dtype() == DataType::Unknown)
+ {
+ graph()->inputs()->at(_index)->dtype(_dtype);
+ }
+ assert(graph()->inputs()->at(_index)->dtype() == _dtype);
+ graph()->inputs()->at(_index)->dtype(_dtype);
+
+ // Reset the locally cached data
+ _dtype = DataType::Unknown;
+ }
+}
+
+GraphInputIndex Pull::index(void) const
+{
+ assert(_index >= std::numeric_limits<GraphInputIndex>::min());
+ assert(_index <= std::numeric_limits<GraphInputIndex>::max());
+ return static_cast<GraphInputIndex>(_index);
+}
+
+void Pull::dtype(const DataType &dt)
+{
+ // ASSUMPTION: "dtype" is never invalidated!
+ assert(dt != loco::DataType::Unknown);
+ // ASSUMPTION
+ //
+ // It is possible to update index multiple times, but only with the same value!
+ if (indexed())
+ {
+ assert(dtype_assignable(graph()->inputs()->at(_index)->dtype(), dt));
+ graph()->inputs()->at(_index)->dtype(dt);
+ return;
+ }
+
+ // Use local cache
+ _dtype = dt;
+}
+
+DataType Pull::dtype(void) const
+{
+ if (graph() != nullptr and _index >= 0)
+ {
+ assert(_dtype == DataType::Unknown);
+ return graph()->inputs()->at(_index)->dtype();
+ }
+ else
+ {
+ return _dtype;
+ }
+}
+
+void link(GraphInput *input, Pull *pull) { pull->index(input->index()); }
+
+Pull *pull_node(Graph *g, const GraphInputIndex &index)
+{
+ for (uint32_t n = 0; n < g->nodes()->size(); ++n)
+ {
+ if (auto pull = dynamic_cast<Pull *>(g->nodes()->at(n)))
+ {
+ if (pull->indexed() && pull->index() == index)
+ {
+ return pull;
+ }
+ }
+ }
+ return nullptr;
+}
+
+} // namespace loco
+
+/**
+ * ConstGen
+ */
+namespace loco
+{
+
+template <DataType DT> uint32_t ConstGen::size(void) const
+{
+ assert(dtype() == DT);
+ assert(_data.size() % sizeof(typename DataTypeImpl<DT>::Type) == 0);
+ return _data.size() / sizeof(typename DataTypeImpl<DT>::Type);
+}
+
+template <DataType DT> void ConstGen::size(uint32_t l)
+{
+ assert(dtype() == DT);
+ _data.resize(l * sizeof(typename DataTypeImpl<DT>::Type));
+}
+
+template <DataType DT> const typename DataTypeImpl<DT>::Type &ConstGen::at(uint32_t n) const
+{
+ assert(dtype() == DT);
+ assert(n < size<DT>());
+ return *(reinterpret_cast<const typename DataTypeImpl<DT>::Type *>(_data.data()) + n);
+}
+
+template <DataType DT> typename DataTypeImpl<DT>::Type &ConstGen::at(uint32_t n)
+{
+ assert(dtype() == DT);
+ assert(n < size<DT>());
+ return *(reinterpret_cast<typename DataTypeImpl<DT>::Type *>(_data.data()) + n);
+}
+
+#define INSTANTIATE(DT) \
+ template uint32_t ConstGen::size<DT>(void) const; \
+ template void ConstGen::size<DT>(uint32_t); \
+ template const typename DataTypeImpl<DT>::Type &ConstGen::at<DT>(uint32_t) const; \
+ template typename DataTypeImpl<DT>::Type &ConstGen::at<DT>(uint32_t);
+
+INSTANTIATE(DataType::S32);
+INSTANTIATE(DataType::FLOAT32);
+
+#undef INSTANTIATE
+
+} // namespace loco
+
+/**
+ * TensorBroadcast
+ */
+namespace loco
+{
+
+bool TensorBroadcast::Mapping::defined(const TensorAxis &axis) const
+{
+ return _content.find(axis) != _content.end();
+}
+
+const Dimension &TensorBroadcast::Mapping::dim(const TensorAxis &axis) const
+{
+ return _content.at(axis);
+}
+
+Dimension &TensorBroadcast::Mapping::dim(const TensorAxis &axis) { return _content[axis]; }
+
+} // namespace loco
diff --git a/compiler/loco/src/IR/Nodes.test.cpp b/compiler/loco/src/IR/Nodes.test.cpp
new file mode 100644
index 000000000..cd51f46c0
--- /dev/null
+++ b/compiler/loco/src/IR/Nodes.test.cpp
@@ -0,0 +1,588 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Nodes.h"
+#include "loco/IR/CanonicalDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(PushTest, constructor)
+{
+ loco::Push push_node;
+
+ ASSERT_EQ(push_node.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(push_node.opcode(), loco::CanonicalOpcode::Push);
+
+ ASSERT_FALSE(push_node.indexed());
+}
+
+TEST(PushTest, shape)
+{
+ const std::vector<uint32_t> dims{1, 8, 16, 3};
+
+ loco::Pull push_node;
+
+ push_node.shape({dims[0], dims[1], dims[2], dims[3]});
+
+ ASSERT_EQ(push_node.rank(), dims.size());
+ ASSERT_EQ(push_node.dim(0), dims[0]);
+ ASSERT_EQ(push_node.dim(1), dims[1]);
+ ASSERT_EQ(push_node.dim(2), dims[2]);
+ ASSERT_EQ(push_node.dim(3), dims[3]);
+}
+
+TEST(PullTest, constructor)
+{
+ loco::Pull pull_node;
+
+ ASSERT_EQ(pull_node.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(pull_node.opcode(), loco::CanonicalOpcode::Pull);
+
+ ASSERT_FALSE(pull_node.indexed());
+
+ ASSERT_EQ(pull_node.dtype(), loco::DataType::Unknown);
+ ASSERT_EQ(pull_node.rank(), 0);
+}
+
+TEST(PullTest, shape)
+{
+ const std::vector<uint32_t> dims{1, 8, 16, 3};
+
+ loco::Pull pull_node;
+
+ pull_node.shape({dims[0], dims[1], dims[2], dims[3]});
+
+ ASSERT_EQ(pull_node.rank(), dims.size());
+ ASSERT_EQ(pull_node.dim(0), dims[0]);
+ ASSERT_EQ(pull_node.dim(1), dims[1]);
+ ASSERT_EQ(pull_node.dim(2), dims[2]);
+ ASSERT_EQ(pull_node.dim(3), dims[3]);
+}
+
+TEST(ForwardTest, constructor)
+{
+ loco::Forward forward_node;
+
+ ASSERT_EQ(forward_node.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(forward_node.opcode(), loco::CanonicalOpcode::Forward);
+
+ ASSERT_EQ(forward_node.input(), nullptr);
+}
+
+TEST(ReLUTest, constructor)
+{
+ loco::ReLU relu_node;
+
+ ASSERT_EQ(relu_node.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(relu_node.opcode(), loco::CanonicalOpcode::ReLU);
+
+ ASSERT_EQ(relu_node.input(), nullptr);
+}
+
+TEST(ReLU6Test, constructor)
+{
+ loco::ReLU6 relu6_node;
+
+ ASSERT_EQ(relu6_node.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(relu6_node.opcode(), loco::CanonicalOpcode::ReLU6);
+
+ ASSERT_EQ(relu6_node.input(), nullptr);
+}
+
+TEST(ConstGenTest, constructor)
+{
+ loco::ConstGen constgen_node;
+
+ ASSERT_EQ(constgen_node.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(constgen_node.opcode(), loco::CanonicalOpcode::ConstGen);
+
+ ASSERT_EQ(constgen_node.dtype(), loco::DataType::Unknown);
+ ASSERT_EQ(constgen_node.rank(), 0);
+
+ constgen_node.dtype(loco::DataType::FLOAT32);
+ ASSERT_EQ(constgen_node.dtype(), loco::DataType::FLOAT32);
+
+ constgen_node.rank(2);
+ ASSERT_EQ(constgen_node.rank(), 2);
+
+ constgen_node.dim(0) = 2;
+ constgen_node.dim(1) = 3;
+
+ ASSERT_TRUE(constgen_node.dim(0).known());
+ ASSERT_TRUE(constgen_node.dim(1).known());
+
+ ASSERT_EQ(constgen_node.dim(0), 2);
+ ASSERT_EQ(constgen_node.dim(1), 3);
+
+ constgen_node.size<loco::DataType::FLOAT32>(6);
+
+ ASSERT_EQ(constgen_node.size<loco::DataType::FLOAT32>(), 6);
+
+ constgen_node.at<loco::DataType::FLOAT32>(0) = 0.0f; // Set 0,0
+ constgen_node.at<loco::DataType::FLOAT32>(1) = 1.0f; // Set 0,1
+ constgen_node.at<loco::DataType::FLOAT32>(2) = 2.0f; // Set 0,2
+ constgen_node.at<loco::DataType::FLOAT32>(3) = 3.0f; // Set 1,0
+ constgen_node.at<loco::DataType::FLOAT32>(4) = 4.0f; // Set 1,1
+ constgen_node.at<loco::DataType::FLOAT32>(5) = 5.0f; // Set 1,2
+
+ ASSERT_EQ(constgen_node.at<loco::DataType::FLOAT32>(0), 0.0f);
+ ASSERT_EQ(constgen_node.at<loco::DataType::FLOAT32>(1), 1.0f);
+ ASSERT_EQ(constgen_node.at<loco::DataType::FLOAT32>(2), 2.0f);
+ ASSERT_EQ(constgen_node.at<loco::DataType::FLOAT32>(3), 3.0f);
+ ASSERT_EQ(constgen_node.at<loco::DataType::FLOAT32>(4), 4.0f);
+ ASSERT_EQ(constgen_node.at<loco::DataType::FLOAT32>(5), 5.0f);
+}
+
+TEST(ConstGenTest, constructor_s32)
+{
+ loco::ConstGen constgen_node;
+
+ ASSERT_EQ(constgen_node.dtype(), loco::DataType::Unknown);
+ ASSERT_EQ(constgen_node.rank(), 0);
+
+ constgen_node.dtype(loco::DataType::S32);
+ ASSERT_EQ(constgen_node.dtype(), loco::DataType::S32);
+
+ constgen_node.rank(2);
+ ASSERT_EQ(constgen_node.rank(), 2);
+
+ constgen_node.dim(0) = 2;
+ constgen_node.dim(1) = 3;
+
+ ASSERT_TRUE(constgen_node.dim(0).known());
+ ASSERT_TRUE(constgen_node.dim(1).known());
+
+ ASSERT_EQ(constgen_node.dim(0), 2);
+ ASSERT_EQ(constgen_node.dim(1), 3);
+
+ constgen_node.size<loco::DataType::S32>(6);
+
+ ASSERT_EQ(constgen_node.size<loco::DataType::S32>(), 6);
+
+ constgen_node.at<loco::DataType::S32>(0) = 0; // Set 0,0
+ constgen_node.at<loco::DataType::S32>(1) = 1; // Set 0,1
+ constgen_node.at<loco::DataType::S32>(2) = 2; // Set 0,2
+ constgen_node.at<loco::DataType::S32>(3) = -3; // Set 1,0
+ constgen_node.at<loco::DataType::S32>(4) = -4; // Set 1,1
+ constgen_node.at<loco::DataType::S32>(5) = -5; // Set 1,2
+
+ ASSERT_EQ(constgen_node.at<loco::DataType::S32>(0), 0);
+ ASSERT_EQ(constgen_node.at<loco::DataType::S32>(1), 1);
+ ASSERT_EQ(constgen_node.at<loco::DataType::S32>(2), 2);
+ ASSERT_EQ(constgen_node.at<loco::DataType::S32>(3), -3);
+ ASSERT_EQ(constgen_node.at<loco::DataType::S32>(4), -4);
+ ASSERT_EQ(constgen_node.at<loco::DataType::S32>(5), -5);
+}
+
+TEST(MaxPool2DTest, constructor)
+{
+ loco::MaxPool2D maxpool_node;
+
+ ASSERT_EQ(maxpool_node.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(maxpool_node.opcode(), loco::CanonicalOpcode::MaxPool2D);
+
+ ASSERT_EQ(maxpool_node.ifm(), nullptr);
+
+ ASSERT_EQ(maxpool_node.pad()->top(), 0);
+ ASSERT_EQ(maxpool_node.pad()->bottom(), 0);
+ ASSERT_EQ(maxpool_node.pad()->left(), 0);
+ ASSERT_EQ(maxpool_node.pad()->right(), 0);
+
+ ASSERT_EQ(maxpool_node.window()->vertical(), 1);
+ ASSERT_EQ(maxpool_node.window()->horizontal(), 1);
+
+ ASSERT_EQ(maxpool_node.stride()->vertical(), 1);
+ ASSERT_EQ(maxpool_node.stride()->horizontal(), 1);
+}
+
+TEST(MaxPool2DTest, pad)
+{
+ const uint32_t t = 1;
+ const uint32_t b = 2;
+ const uint32_t l = 3;
+ const uint32_t r = 4;
+
+ loco::MaxPool2D maxpool_node;
+
+ maxpool_node.pad()->top(t);
+ ASSERT_EQ(maxpool_node.pad()->top(), t);
+
+ maxpool_node.pad()->bottom(b);
+ ASSERT_EQ(maxpool_node.pad()->bottom(), b);
+
+ maxpool_node.pad()->left(l);
+ ASSERT_EQ(maxpool_node.pad()->left(), l);
+
+ maxpool_node.pad()->right(r);
+ ASSERT_EQ(maxpool_node.pad()->right(), r);
+}
+
+TEST(AvgPool2DTest, constructor)
+{
+ loco::AvgPool2D avgpool_node;
+
+ ASSERT_EQ(avgpool_node.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(avgpool_node.opcode(), loco::CanonicalOpcode::AvgPool2D);
+
+ ASSERT_EQ(avgpool_node.ifm(), nullptr);
+
+ ASSERT_EQ(avgpool_node.convention(), loco::AvgPool2D::Convention::Unknown);
+
+ ASSERT_EQ(avgpool_node.pad()->top(), 0);
+ ASSERT_EQ(avgpool_node.pad()->bottom(), 0);
+ ASSERT_EQ(avgpool_node.pad()->left(), 0);
+ ASSERT_EQ(avgpool_node.pad()->right(), 0);
+
+ ASSERT_EQ(avgpool_node.window()->vertical(), 1);
+ ASSERT_EQ(avgpool_node.window()->horizontal(), 1);
+
+ ASSERT_EQ(avgpool_node.stride()->vertical(), 1);
+ ASSERT_EQ(avgpool_node.stride()->horizontal(), 1);
+}
+
+TEST(FeatureEncodeTest, constructor)
+{
+ loco::FeatureEncode feature_encode;
+
+ ASSERT_EQ(feature_encode.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(feature_encode.opcode(), loco::CanonicalOpcode::FeatureEncode);
+
+ ASSERT_EQ(feature_encode.input(), nullptr);
+ ASSERT_EQ(feature_encode.encoder(), nullptr);
+}
+
+TEST(FeatureDecodeTest, constructor)
+{
+ loco::FeatureDecode feature_decode;
+
+ ASSERT_EQ(feature_decode.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(feature_decode.opcode(), loco::CanonicalOpcode::FeatureDecode);
+
+ ASSERT_EQ(feature_decode.input(), nullptr);
+ ASSERT_EQ(feature_decode.decoder(), nullptr);
+}
+
+TEST(Reshape_Fixed_Test, constructor)
+{
+ loco::Reshape<loco::ReshapeType::Fixed> reshape;
+
+ ASSERT_EQ(reshape.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(reshape.opcode(), loco::CanonicalOpcode::FixedReshape);
+
+ ASSERT_EQ(reshape.rank(), 0);
+}
+
+TEST(Reshape_Fixed_Test, shape)
+{
+ loco::Reshape<loco::ReshapeType::Fixed> reshape;
+ reshape.shape({2, 3});
+
+ ASSERT_EQ(reshape.rank(), 2);
+ ASSERT_EQ(reshape.dim(0), 2);
+ ASSERT_EQ(reshape.dim(1), 3);
+}
+
+TEST(FilterEncodeTest, constructor)
+{
+ loco::FilterEncode filter_encode;
+
+ ASSERT_EQ(filter_encode.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(filter_encode.opcode(), loco::CanonicalOpcode::FilterEncode);
+
+ ASSERT_EQ(filter_encode.input(), nullptr);
+ ASSERT_EQ(filter_encode.encoder(), nullptr);
+}
+
+TEST(FilterDecodeTest, constructor)
+{
+ loco::FilterDecode filter_decode;
+
+ ASSERT_EQ(filter_decode.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(filter_decode.opcode(), loco::CanonicalOpcode::FilterDecode);
+
+ ASSERT_EQ(filter_decode.input(), nullptr);
+ ASSERT_EQ(filter_decode.decoder(), nullptr);
+}
+
+TEST(DepthwiseFilterEncodeTest, constructor)
+{
+ loco::DepthwiseFilterEncode dw_filter_encode;
+
+ ASSERT_EQ(dw_filter_encode.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(dw_filter_encode.opcode(), loco::CanonicalOpcode::DepthwiseFilterEncode);
+
+ ASSERT_EQ(dw_filter_encode.input(), nullptr);
+ ASSERT_EQ(dw_filter_encode.encoder(), nullptr);
+}
+
+TEST(DepthwiseFilterDecodeTest, constructor)
+{
+ loco::DepthwiseFilterDecode dw_filter_decode;
+
+ ASSERT_EQ(dw_filter_decode.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(dw_filter_decode.opcode(), loco::CanonicalOpcode::DepthwiseFilterDecode);
+
+ ASSERT_EQ(dw_filter_decode.input(), nullptr);
+ ASSERT_EQ(dw_filter_decode.decoder(), nullptr);
+}
+
+TEST(TensorConcatTest, constructor)
+{
+ loco::TensorConcat tensor_concat;
+
+ ASSERT_EQ(tensor_concat.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(tensor_concat.opcode(), loco::CanonicalOpcode::TensorConcat);
+
+ ASSERT_EQ(tensor_concat.lhs(), nullptr);
+ ASSERT_EQ(tensor_concat.rhs(), nullptr);
+ ASSERT_EQ(tensor_concat.axis(), 0);
+
+ tensor_concat.axis(3);
+ ASSERT_EQ(tensor_concat.axis(), 3);
+}
+
+TEST(Conv2DTest, constructor)
+{
+ loco::Conv2D conv2d;
+
+ ASSERT_EQ(conv2d.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(conv2d.opcode(), loco::CanonicalOpcode::Conv2D);
+
+ ASSERT_EQ(conv2d.ifm(), nullptr);
+ ASSERT_EQ(conv2d.ker(), nullptr);
+
+ ASSERT_NE(conv2d.pad(), nullptr);
+ ASSERT_EQ(conv2d.pad()->top(), 0);
+ ASSERT_EQ(conv2d.pad()->bottom(), 0);
+ ASSERT_EQ(conv2d.pad()->left(), 0);
+ ASSERT_EQ(conv2d.pad()->right(), 0);
+
+ ASSERT_NE(conv2d.stride(), nullptr);
+ ASSERT_EQ(conv2d.stride()->vertical(), 1);
+ ASSERT_EQ(conv2d.stride()->horizontal(), 1);
+}
+
+TEST(DepthwiseConv2DTest, constructor)
+{
+ loco::DepthwiseConv2D dw_conv2d;
+
+ ASSERT_EQ(dw_conv2d.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(dw_conv2d.opcode(), loco::CanonicalOpcode::DepthwiseConv2D);
+
+ ASSERT_EQ(dw_conv2d.ifm(), nullptr);
+ ASSERT_EQ(dw_conv2d.ker(), nullptr);
+
+ ASSERT_NE(dw_conv2d.pad(), nullptr);
+ ASSERT_EQ(dw_conv2d.pad()->top(), 0);
+ ASSERT_EQ(dw_conv2d.pad()->bottom(), 0);
+ ASSERT_EQ(dw_conv2d.pad()->left(), 0);
+ ASSERT_EQ(dw_conv2d.pad()->right(), 0);
+
+ ASSERT_NE(dw_conv2d.stride(), nullptr);
+ ASSERT_EQ(dw_conv2d.stride()->vertical(), 1);
+ ASSERT_EQ(dw_conv2d.stride()->horizontal(), 1);
+}
+
+TEST(TransposedConv2DTest, constructor)
+{
+ loco::TransposedConv2D tr_conv2d;
+
+ ASSERT_EQ(tr_conv2d.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(tr_conv2d.opcode(), loco::CanonicalOpcode::TransposedConv2D);
+
+ ASSERT_EQ(tr_conv2d.ifm(), nullptr);
+ ASSERT_EQ(tr_conv2d.ker(), nullptr);
+
+ ASSERT_NE(tr_conv2d.pad(), nullptr);
+ ASSERT_EQ(tr_conv2d.pad()->top(), 0);
+ ASSERT_EQ(tr_conv2d.pad()->bottom(), 0);
+ ASSERT_EQ(tr_conv2d.pad()->left(), 0);
+ ASSERT_EQ(tr_conv2d.pad()->right(), 0);
+
+ ASSERT_NE(tr_conv2d.stride(), nullptr);
+ ASSERT_EQ(tr_conv2d.stride()->vertical(), 1);
+ ASSERT_EQ(tr_conv2d.stride()->horizontal(), 1);
+}
+
+TEST(BiasEncodeTest, constructor)
+{
+ loco::BiasEncode bias_encode;
+
+ ASSERT_EQ(bias_encode.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(bias_encode.opcode(), loco::CanonicalOpcode::BiasEncode);
+
+ ASSERT_EQ(bias_encode.input(), nullptr);
+}
+
+TEST(TensorBiasAddTest, constructor)
+{
+ loco::BiasAdd<loco::Domain::Tensor> bias_add;
+
+ ASSERT_EQ(bias_add.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(bias_add.opcode(), loco::CanonicalOpcode::TensorBiasAdd);
+
+ ASSERT_EQ(bias_add.value(), nullptr);
+ ASSERT_EQ(bias_add.bias(), nullptr);
+ ASSERT_EQ(bias_add.axis(), 0);
+}
+
+TEST(TensorBiasAddTest, alias)
+{
+ loco::TensorBiasAdd bias_add;
+
+ SUCCEED();
+}
+
+TEST(FeatureBiasAddTest, constructor)
+{
+ loco::BiasAdd<loco::Domain::Feature> bias_add;
+
+ ASSERT_EQ(bias_add.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(bias_add.opcode(), loco::CanonicalOpcode::FeatureBiasAdd);
+
+ ASSERT_EQ(bias_add.value(), nullptr);
+ ASSERT_EQ(bias_add.bias(), nullptr);
+}
+
+TEST(FeatureBiasAddTest, alias)
+{
+ loco::FeatureBiasAdd bias_add;
+
+ SUCCEED();
+}
+
+TEST(EltwiseAddTest, constructor)
+{
+ loco::EltwiseAdd eltwise_add;
+
+ SUCCEED();
+}
+
+TEST(EltwiseMaxTest, constructor)
+{
+ loco::EltwiseMax eltwise_max;
+
+ SUCCEED();
+}
+
+TEST(EltwiseMulTest, constructor)
+{
+ loco::EltwiseMul eltwise_mul;
+
+ SUCCEED();
+}
+
+TEST(EltwiseSubTest, constructor)
+{
+ loco::EltwiseSub eltwise_sub;
+
+ SUCCEED();
+}
+
+TEST(EltwiseDivTest, constructor)
+{
+ loco::EltwiseDiv eltwise_div;
+
+ SUCCEED();
+}
+
+TEST(EltwiseSqrtTest, constructor)
+{
+ loco::EltwiseSqrt sqrt_node;
+
+ ASSERT_EQ(sqrt_node.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(sqrt_node.opcode(), loco::CanonicalOpcode::EltwiseSqrt);
+
+ ASSERT_EQ(sqrt_node.input(), nullptr);
+}
+
+TEST(TensorBroadcastTest, constructor)
+{
+ loco::TensorBroadcast tensor_broadcast_node;
+
+ ASSERT_EQ(tensor_broadcast_node.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(tensor_broadcast_node.opcode(), loco::CanonicalOpcode::TensorBroadcast);
+
+ ASSERT_EQ(tensor_broadcast_node.input(), nullptr);
+}
+
+TEST(TensorBroadcastTest, mapping)
+{
+ loco::TensorBroadcast tensor_broadcast_node;
+
+ ASSERT_EQ(tensor_broadcast_node.mapping()->defined(0), false);
+
+ tensor_broadcast_node.mapping()->dim(0) = 3;
+
+ ASSERT_EQ(tensor_broadcast_node.mapping()->defined(0), true);
+ ASSERT_EQ(tensor_broadcast_node.mapping()->dim(0), 3);
+}
+
+TEST(MatrixEncodeTest, constructor)
+{
+ loco::MatrixEncode matrix_encode;
+
+ ASSERT_EQ(matrix_encode.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(matrix_encode.opcode(), loco::CanonicalOpcode::MatrixEncode);
+
+ ASSERT_EQ(matrix_encode.input(), nullptr);
+}
+
+TEST(MatrixDecodeTest, constructor)
+{
+ loco::MatrixDecode matrix_decode;
+
+ ASSERT_EQ(matrix_decode.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(matrix_decode.opcode(), loco::CanonicalOpcode::MatrixDecode);
+
+ ASSERT_EQ(matrix_decode.input(), nullptr);
+}
+
+TEST(MatMulTest, constructor)
+{
+ loco::MatMul mat_mul;
+
+ ASSERT_EQ(mat_mul.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(mat_mul.opcode(), loco::CanonicalOpcode::MatMul);
+
+ ASSERT_EQ(mat_mul.lhs(), nullptr);
+ ASSERT_EQ(mat_mul.rhs(), nullptr);
+}
+
+TEST(TransposeTest, constructor)
+{
+ loco::TensorTranspose transpose;
+
+ ASSERT_EQ(transpose.dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(transpose.opcode(), loco::CanonicalOpcode::TensorTranspose);
+
+ ASSERT_EQ(transpose.input(), nullptr);
+ ASSERT_EQ(transpose.perm()->size(), 0);
+}
+
+TEST(TransposeTest, perm)
+{
+ loco::TensorTranspose transpose;
+
+ transpose.perm()->size(3);
+ transpose.perm()->axis(0) = 1;
+ transpose.perm()->axis(1) = 2;
+ transpose.perm()->axis(2) = 0;
+
+ ASSERT_EQ(transpose.perm()->axis(0), 1);
+ ASSERT_EQ(transpose.perm()->axis(1), 2);
+ ASSERT_EQ(transpose.perm()->axis(2), 0);
+}
diff --git a/compiler/loco/src/IR/Padding2D.test.cpp b/compiler/loco/src/IR/Padding2D.test.cpp
new file mode 100644
index 000000000..2e3d4af87
--- /dev/null
+++ b/compiler/loco/src/IR/Padding2D.test.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Padding2D.h"
+
+#include <gtest/gtest.h>
+
+TEST(PadTest, default_constructor_2D)
+{
+ loco::Padding2D pad;
+
+ ASSERT_EQ(pad.top(), 0);
+ ASSERT_EQ(pad.bottom(), 0);
+ ASSERT_EQ(pad.left(), 0);
+ ASSERT_EQ(pad.right(), 0);
+}
diff --git a/compiler/loco/src/IR/PaddingND.test.cpp b/compiler/loco/src/IR/PaddingND.test.cpp
new file mode 100644
index 000000000..0e20406ff
--- /dev/null
+++ b/compiler/loco/src/IR/PaddingND.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/PaddingND.h"
+
+#include <gtest/gtest.h>
+
+TEST(PaddingNDTest, default_constructor_ND)
+{
+ loco::PaddingND padding;
+
+ padding.rank(1);
+ padding.front(0) = 1;
+ padding.back(0) = 2;
+
+ ASSERT_EQ(padding.rank(), 1);
+ ASSERT_EQ(padding.front(0), 1);
+ ASSERT_EQ(padding.back(0), 2);
+}
diff --git a/compiler/loco/src/IR/PermutingCodec.cpp b/compiler/loco/src/IR/PermutingCodec.cpp
new file mode 100644
index 000000000..2857e5e28
--- /dev/null
+++ b/compiler/loco/src/IR/PermutingCodec.cpp
@@ -0,0 +1,630 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/PermutingCodec.h"
+
+#include <stdex/Memory.h>
+
+#include <cassert>
+#include <set>
+#include <stdexcept>
+
+/**
+ * Feature Domain
+ */
+namespace
+{
+
+using loco::FeatureAxis;
+
+inline bool valid(const FeatureAxis &axis)
+{
+ switch (axis)
+ {
+ case FeatureAxis::Count:
+ return true;
+ case FeatureAxis::Depth:
+ return true;
+ case FeatureAxis::Height:
+ return true;
+ case FeatureAxis::Width:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+inline bool valid(const loco::Permutation<loco::Domain::Feature> &perm)
+{
+ auto check = [&perm](FeatureAxis axis_f) {
+ if (!perm.mapped(axis_f))
+ return false;
+ return perm.axis(axis_f) < 4;
+ };
+
+ if (!check(FeatureAxis::Count))
+ return false;
+ if (!check(FeatureAxis::Depth))
+ return false;
+ if (!check(FeatureAxis::Height))
+ return false;
+ if (!check(FeatureAxis::Width))
+ return false;
+
+ // Check whether tensor axes are all distinct
+ std::set<loco::TensorAxis> values;
+
+ values.insert(perm[FeatureAxis::Count]);
+ values.insert(perm[FeatureAxis::Depth]);
+ values.insert(perm[FeatureAxis::Height]);
+ values.insert(perm[FeatureAxis::Width]);
+
+ return values.size() == 4;
+}
+
+} // namespace
+
+namespace loco
+{
+
+//
+// Permutation
+//
+bool Permutation<Domain::Feature>::mapped(const FeatureAxis &axis_f) const
+{
+ assert(valid(axis_f) && "invalid feature axis");
+ return _map.find(axis_f) != _map.end();
+}
+
+uint32_t Permutation<Domain::Feature>::axis(const FeatureAxis &axis_f) const
+{
+ assert(valid(axis_f) && "invalid feature axis");
+ assert(mapped(axis_f) && "unmapped feature axis");
+ return _map.at(axis_f);
+}
+
+uint32_t &Permutation<Domain::Feature>::axis(const FeatureAxis &axis_f)
+{
+ assert(valid(axis_f) && "invalid feature axis");
+ return _map[axis_f];
+}
+
+//
+// Permuting Encoder
+//
+FeatureShape PermutingEncoder<Domain::Feature>::shape(const TensorShape &in) const
+{
+ assert(valid() && "invalid permutation");
+
+ FeatureShape out;
+
+ out.count() = in.dim(_perm[FeatureAxis::Count]);
+ out.depth() = in.dim(_perm[FeatureAxis::Depth]);
+ out.height() = in.dim(_perm[FeatureAxis::Height]);
+ out.width() = in.dim(_perm[FeatureAxis::Width]);
+
+ return out;
+}
+
+TensorIndex PermutingEncoder<Domain::Feature>::value(const FeatureIndex &in) const
+{
+ assert(valid() && "invalid permutation");
+
+ TensorIndex out;
+
+ out.resize(4);
+
+ out.at(_perm[FeatureAxis::Count]) = in.batch();
+ out.at(_perm[FeatureAxis::Depth]) = in.channel();
+ out.at(_perm[FeatureAxis::Height]) = in.row();
+ out.at(_perm[FeatureAxis::Width]) = in.column();
+
+ return out;
+}
+
+std::unique_ptr<FeatureEncoder> PermutingEncoder<Domain::Feature>::clone(void) const
+{
+ return stdex::make_unique<PermutingEncoder<Domain::Feature>>(_perm);
+}
+
+bool PermutingEncoder<Domain::Feature>::valid(void) const { return ::valid(_perm); }
+
+//
+// Permuting Decoder
+//
+TensorShape PermutingDecoder<Domain::Feature>::shape(const FeatureShape &in) const
+{
+ assert(valid() && "invalid permuation");
+
+ TensorShape out;
+
+ out.rank(4);
+
+ out.dim(_perm[FeatureAxis::Count]) = in.count();
+ out.dim(_perm[FeatureAxis::Depth]) = in.depth();
+ out.dim(_perm[FeatureAxis::Height]) = in.height();
+ out.dim(_perm[FeatureAxis::Width]) = in.width();
+
+ return out;
+}
+
+FeatureIndex PermutingDecoder<Domain::Feature>::value(const TensorIndex &in) const
+{
+ assert(valid() && "invalid permutation");
+
+ FeatureIndex out;
+
+ out.batch() = in.at(_perm[FeatureAxis::Count]);
+ out.channel() = in.at(_perm[FeatureAxis::Depth]);
+ out.row() = in.at(_perm[FeatureAxis::Height]);
+ out.column() = in.at(_perm[FeatureAxis::Width]);
+
+ return out;
+}
+
+std::unique_ptr<FeatureDecoder> PermutingDecoder<Domain::Feature>::clone(void) const
+{
+ return stdex::make_unique<PermutingDecoder<Domain::Feature>>(_perm);
+}
+
+bool PermutingDecoder<Domain::Feature>::valid(void) const { return ::valid(_perm); }
+
+} // namespace loco
+
+/**
+ * Filter Domain
+ */
+namespace
+{
+
+using loco::FilterAxis;
+
+inline bool valid(const FilterAxis &axis)
+{
+ switch (axis)
+ {
+ case FilterAxis::Count:
+ return true;
+ case FilterAxis::Depth:
+ return true;
+ case FilterAxis::Height:
+ return true;
+ case FilterAxis::Width:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+inline bool valid(const loco::Permutation<loco::Domain::Filter> &perm)
+{
+ auto check = [&perm](FilterAxis axis_f) {
+ if (!perm.mapped(axis_f))
+ return false;
+ return perm.axis(axis_f) < 4;
+ };
+
+ if (!check(FilterAxis::Count))
+ return false;
+ if (!check(FilterAxis::Depth))
+ return false;
+ if (!check(FilterAxis::Height))
+ return false;
+ if (!check(FilterAxis::Width))
+ return false;
+
+ // Check whether tensor axes are all distinct
+ std::set<loco::TensorAxis> values;
+
+ values.insert(perm[FilterAxis::Count]);
+ values.insert(perm[FilterAxis::Depth]);
+ values.insert(perm[FilterAxis::Height]);
+ values.insert(perm[FilterAxis::Width]);
+
+ return values.size() == 4;
+}
+
+} // namespace
+
+namespace loco
+{
+
+//
+// Permutation
+//
+bool Permutation<Domain::Filter>::mapped(const FilterAxis &axis_f) const
+{
+ assert(valid(axis_f) && "invalid filter axis");
+ return _map.find(axis_f) != _map.end();
+}
+
+const uint32_t &Permutation<Domain::Filter>::axis(const FilterAxis &axis_f) const
+{
+ assert(valid(axis_f) && "invalid filter axis");
+ assert(mapped(axis_f) && "unmapped filter axis");
+ return _map.at(axis_f);
+}
+
+uint32_t &Permutation<Domain::Filter>::axis(const FilterAxis &axis_f)
+{
+ assert(valid(axis_f) && "invalid filter axis");
+ return _map[axis_f];
+}
+
+//
+// Permuting Encoder
+//
+FilterShape PermutingEncoder<Domain::Filter>::shape(const TensorShape &in) const
+{
+ assert(valid() && "invalid permutation");
+
+ FilterShape out;
+
+ out.count() = in.dim(_perm[FilterAxis::Count]);
+ out.depth() = in.dim(_perm[FilterAxis::Depth]);
+ out.height() = in.dim(_perm[FilterAxis::Height]);
+ out.width() = in.dim(_perm[FilterAxis::Width]);
+
+ return out;
+}
+
+TensorIndex PermutingEncoder<Domain::Filter>::value(const FilterIndex &in) const
+{
+ assert(valid() && "invalid permutation");
+
+ TensorIndex out;
+
+ out.resize(4);
+
+ out.at(_perm[FilterAxis::Count]) = in.nth();
+ out.at(_perm[FilterAxis::Depth]) = in.channel();
+ out.at(_perm[FilterAxis::Height]) = in.row();
+ out.at(_perm[FilterAxis::Width]) = in.column();
+
+ return out;
+}
+
+bool PermutingEncoder<Domain::Filter>::valid(void) const { return ::valid(_perm); }
+
+//
+// Permuting Decoder
+//
+TensorShape PermutingDecoder<Domain::Filter>::shape(const FilterShape &in) const
+{
+ assert(valid() && "invalid permutation");
+
+ TensorShape out;
+
+ out.rank(4);
+ out.dim(_perm[FilterAxis::Count]) = in.count();
+ out.dim(_perm[FilterAxis::Depth]) = in.depth();
+ out.dim(_perm[FilterAxis::Height]) = in.height();
+ out.dim(_perm[FilterAxis::Width]) = in.width();
+
+ return out;
+}
+
+FilterIndex PermutingDecoder<Domain::Filter>::value(const TensorIndex &in) const
+{
+ assert(valid() && "invalid permutation");
+
+ FilterIndex out;
+
+ out.nth() = in.at(_perm[FilterAxis::Count]);
+ out.channel() = in.at(_perm[FilterAxis::Depth]);
+ out.row() = in.at(_perm[FilterAxis::Height]);
+ out.column() = in.at(_perm[FilterAxis::Width]);
+
+ return out;
+}
+
+bool PermutingDecoder<Domain::Filter>::valid(void) const { return ::valid(_perm); }
+
+} // namespace loco
+
+/**
+ * DepthwiseFilter Domain
+ */
+namespace
+{
+
+using loco::DepthwiseFilterAxis;
+
+inline bool valid(const DepthwiseFilterAxis &axis)
+{
+ switch (axis)
+ {
+ case DepthwiseFilterAxis::Depth:
+ return true;
+ case DepthwiseFilterAxis::Multiplier:
+ return true;
+ case DepthwiseFilterAxis::Height:
+ return true;
+ case DepthwiseFilterAxis::Width:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+inline bool valid(const loco::Permutation<loco::Domain::DepthwiseFilter> &perm)
+{
+ auto check = [&perm](DepthwiseFilterAxis axis_f) {
+ if (!perm.mapped(axis_f))
+ return false;
+ return perm.axis(axis_f) < 4;
+ };
+
+ if (!check(DepthwiseFilterAxis::Depth))
+ return false;
+ if (!check(DepthwiseFilterAxis::Multiplier))
+ return false;
+ if (!check(DepthwiseFilterAxis::Height))
+ return false;
+ if (!check(DepthwiseFilterAxis::Width))
+ return false;
+
+ // Check whether tensor axes are all distinct
+ std::set<loco::TensorAxis> values;
+
+ values.insert(perm[DepthwiseFilterAxis::Depth]);
+ values.insert(perm[DepthwiseFilterAxis::Multiplier]);
+ values.insert(perm[DepthwiseFilterAxis::Height]);
+ values.insert(perm[DepthwiseFilterAxis::Width]);
+
+ return values.size() == 4;
+}
+
+} // namespace
+
+namespace loco
+{
+
+//
+// Permutation
+//
+bool Permutation<Domain::DepthwiseFilter>::mapped(const DepthwiseFilterAxis &axis_f) const
+{
+ assert(valid(axis_f) && "invalid depthwise filter axis");
+ return _map.find(axis_f) != _map.end();
+}
+
+const uint32_t &Permutation<Domain::DepthwiseFilter>::axis(const DepthwiseFilterAxis &axis_f) const
+{
+ assert(valid(axis_f) && "invalid depthwise filter axis");
+ assert(mapped(axis_f) && "unmapped depthwise filter axis");
+ return _map.at(axis_f);
+}
+
+uint32_t &Permutation<Domain::DepthwiseFilter>::axis(const DepthwiseFilterAxis &axis_f)
+{
+ assert(valid(axis_f) && "invalid depthwise filter axis");
+ return _map[axis_f];
+}
+
+//
+// Permuting Encoder
+//
+DepthwiseFilterShape PermutingEncoder<Domain::DepthwiseFilter>::shape(const TensorShape &in) const
+{
+ assert(valid() && "invalid permutation");
+
+ DepthwiseFilterShape out;
+
+ out.depth() = in.dim(_perm[DepthwiseFilterAxis::Depth]);
+ out.multiplier() = in.dim(_perm[DepthwiseFilterAxis::Multiplier]);
+ out.height() = in.dim(_perm[DepthwiseFilterAxis::Height]);
+ out.width() = in.dim(_perm[DepthwiseFilterAxis::Width]);
+
+ return out;
+}
+
+TensorIndex PermutingEncoder<Domain::DepthwiseFilter>::value(const DepthwiseFilterIndex &in) const
+{
+ assert(valid() && "invalid permutation");
+
+ TensorIndex out;
+
+ out.resize(4);
+
+ out.at(_perm[DepthwiseFilterAxis::Depth]) = in.channel();
+ out.at(_perm[DepthwiseFilterAxis::Multiplier]) = in.nth();
+ out.at(_perm[DepthwiseFilterAxis::Height]) = in.row();
+ out.at(_perm[DepthwiseFilterAxis::Width]) = in.column();
+
+ return out;
+}
+
+bool PermutingEncoder<Domain::DepthwiseFilter>::valid(void) const { return ::valid(_perm); }
+
+//
+// Permuting Decoder
+//
+TensorShape PermutingDecoder<Domain::DepthwiseFilter>::shape(const DepthwiseFilterShape &in) const
+{
+ assert(valid() && "invalid permutation");
+
+ TensorShape out;
+ out.rank(4);
+
+ out.dim(_perm[DepthwiseFilterAxis::Depth]) = in.depth();
+ out.dim(_perm[DepthwiseFilterAxis::Multiplier]) = in.multiplier();
+ out.dim(_perm[DepthwiseFilterAxis::Height]) = in.height();
+ out.dim(_perm[DepthwiseFilterAxis::Width]) = in.width();
+
+ return out;
+}
+
+DepthwiseFilterIndex PermutingDecoder<Domain::DepthwiseFilter>::value(const TensorIndex &in) const
+{
+ assert(valid() && "invalid permutation");
+ assert(in.rank() == 4);
+
+ DepthwiseFilterIndex out;
+
+ out.channel() = in.at(_perm[DepthwiseFilterAxis::Depth]);
+ out.nth() = in.at(_perm[DepthwiseFilterAxis::Multiplier]);
+ out.row() = in.at(_perm[DepthwiseFilterAxis::Height]);
+ out.column() = in.at(_perm[DepthwiseFilterAxis::Width]);
+
+ return out;
+}
+
+bool PermutingDecoder<Domain::DepthwiseFilter>::valid(void) const { return ::valid(_perm); }
+
+} // namespace loco
+
+/**
+ * Matrix Domain
+ */
+namespace
+{
+
+using loco::MatrixAxis;
+
+inline bool valid(const MatrixAxis &axis)
+{
+ switch (axis)
+ {
+ case MatrixAxis::Height:
+ return true;
+ case MatrixAxis::Width:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+inline bool valid(const loco::Permutation<loco::Domain::Matrix> &perm)
+{
+ auto check = [&perm](MatrixAxis axis_f) {
+ if (!perm.mapped(axis_f))
+ return false;
+ return perm.axis(axis_f) < 2;
+ };
+
+ if (!check(MatrixAxis::Height))
+ return false;
+ if (!check(MatrixAxis::Width))
+ return false;
+
+ // Check whether tensor axes are all distinct
+ std::set<loco::TensorAxis> values;
+
+ values.insert(perm[MatrixAxis::Height]);
+ values.insert(perm[MatrixAxis::Width]);
+
+ return values.size() == 2;
+}
+
+} // namespace
+
+namespace loco
+{
+
+//
+// Permutation
+//
+bool Permutation<Domain::Matrix>::mapped(const MatrixAxis &axis_f) const
+{
+ assert(valid(axis_f) && "invalid matrix axis");
+ return _map.find(axis_f) != _map.end();
+}
+
+uint32_t Permutation<Domain::Matrix>::axis(const MatrixAxis &axis_f) const
+{
+ assert(valid(axis_f) && "invalid matrix axis");
+ assert(mapped(axis_f) && "unmapped matrix axis");
+ return _map.at(axis_f);
+}
+
+uint32_t &Permutation<Domain::Matrix>::axis(const MatrixAxis &axis_f)
+{
+ assert(valid(axis_f) && "invalid matrix axis");
+ return _map[axis_f];
+}
+
+//
+// Permuting Encoder
+//
+MatrixShape PermutingEncoder<Domain::Matrix>::shape(const TensorShape &in) const
+{
+ assert(valid() && "invalid permutation");
+
+ MatrixShape out;
+
+ out.height() = in.dim(_perm[MatrixAxis::Height]);
+ out.width() = in.dim(_perm[MatrixAxis::Width]);
+
+ return out;
+}
+
+TensorIndex PermutingEncoder<Domain::Matrix>::value(const MatrixIndex &in) const
+{
+ assert(valid() && "invalid permutation");
+
+ TensorIndex out;
+
+ out.resize(2);
+
+ out.at(_perm[MatrixAxis::Height]) = in.row();
+ out.at(_perm[MatrixAxis::Width]) = in.column();
+
+ return out;
+}
+
+bool PermutingEncoder<Domain::Matrix>::valid(void) const { return ::valid(_perm); }
+
+//
+// Permuting Decoder
+//
+TensorShape PermutingDecoder<Domain::Matrix>::shape(const MatrixShape &in) const
+{
+ assert(valid() && "invalid permuation");
+
+ TensorShape out;
+
+ out.rank(2);
+
+ out.dim(_perm[MatrixAxis::Height]) = in.height();
+ out.dim(_perm[MatrixAxis::Width]) = in.width();
+
+ return out;
+}
+
+MatrixIndex PermutingDecoder<Domain::Matrix>::value(const TensorIndex &in) const
+{
+ assert(valid() && "invalid permutation");
+
+ MatrixIndex out;
+
+ out.row() = in.at(_perm[MatrixAxis::Height]);
+ out.column() = in.at(_perm[MatrixAxis::Width]);
+
+ return out;
+}
+
+bool PermutingDecoder<Domain::Matrix>::valid(void) const { return ::valid(_perm); }
+
+} // namespace loco
diff --git a/compiler/loco/src/IR/PermutingCodec.test.cpp b/compiler/loco/src/IR/PermutingCodec.test.cpp
new file mode 100644
index 000000000..2eff286d0
--- /dev/null
+++ b/compiler/loco/src/IR/PermutingCodec.test.cpp
@@ -0,0 +1,553 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/PermutingCodec.h"
+
+#include <gtest/gtest.h>
+
+using namespace loco;
+
+TEST(PemutationTest, feature)
+{
+ Permutation<Domain::Feature> perm;
+
+ // All values are invalid at the beginning
+ ASSERT_FALSE(perm.mapped(FeatureAxis::Count));
+ ASSERT_FALSE(perm.mapped(FeatureAxis::Depth));
+ ASSERT_FALSE(perm.mapped(FeatureAxis::Height));
+ ASSERT_FALSE(perm.mapped(FeatureAxis::Width));
+
+ // Update mapping
+ perm[FeatureAxis::Count] = 5;
+ perm[FeatureAxis::Depth] = 6;
+ perm[FeatureAxis::Height] = 7;
+ perm[FeatureAxis::Width] = 8;
+
+ // Now perm has a mapping for all the axes
+ ASSERT_TRUE(perm.mapped(FeatureAxis::Count));
+ ASSERT_TRUE(perm.mapped(FeatureAxis::Depth));
+ ASSERT_TRUE(perm.mapped(FeatureAxis::Height));
+ ASSERT_TRUE(perm.mapped(FeatureAxis::Width));
+
+ // Check the value
+ ASSERT_EQ(perm[FeatureAxis::Count], 5);
+ ASSERT_EQ(perm[FeatureAxis::Depth], 6);
+ ASSERT_EQ(perm[FeatureAxis::Height], 7);
+ ASSERT_EQ(perm[FeatureAxis::Width], 8);
+}
+
+TEST(PemutationTest, filter)
+{
+ Permutation<Domain::Filter> perm;
+
+ // All values are invalid at the beginning
+ ASSERT_FALSE(perm.mapped(FilterAxis::Count));
+ ASSERT_FALSE(perm.mapped(FilterAxis::Depth));
+ ASSERT_FALSE(perm.mapped(FilterAxis::Height));
+ ASSERT_FALSE(perm.mapped(FilterAxis::Width));
+
+ // Update mapping
+ perm[FilterAxis::Count] = 5;
+ perm[FilterAxis::Depth] = 6;
+ perm[FilterAxis::Height] = 7;
+ perm[FilterAxis::Width] = 8;
+
+ // Now perm has a mapping for all the axes
+ ASSERT_TRUE(perm.mapped(FilterAxis::Count));
+ ASSERT_TRUE(perm.mapped(FilterAxis::Depth));
+ ASSERT_TRUE(perm.mapped(FilterAxis::Height));
+ ASSERT_TRUE(perm.mapped(FilterAxis::Width));
+
+ // Check the value
+ ASSERT_EQ(perm[FilterAxis::Count], 5);
+ ASSERT_EQ(perm[FilterAxis::Depth], 6);
+ ASSERT_EQ(perm[FilterAxis::Height], 7);
+ ASSERT_EQ(perm[FilterAxis::Width], 8);
+}
+
+TEST(PemutationTest, depthwise_filter)
+{
+ Permutation<Domain::DepthwiseFilter> perm;
+
+ // All values are invalid at the beginning
+ ASSERT_FALSE(perm.mapped(DepthwiseFilterAxis::Depth));
+ ASSERT_FALSE(perm.mapped(DepthwiseFilterAxis::Multiplier));
+ ASSERT_FALSE(perm.mapped(DepthwiseFilterAxis::Height));
+ ASSERT_FALSE(perm.mapped(DepthwiseFilterAxis::Width));
+
+ // Update mapping
+ perm[DepthwiseFilterAxis::Depth] = 5;
+ perm[DepthwiseFilterAxis::Multiplier] = 6;
+ perm[DepthwiseFilterAxis::Height] = 7;
+ perm[DepthwiseFilterAxis::Width] = 8;
+
+ // Now perm has a mapping for all the axes
+ ASSERT_TRUE(perm.mapped(DepthwiseFilterAxis::Depth));
+ ASSERT_TRUE(perm.mapped(DepthwiseFilterAxis::Multiplier));
+ ASSERT_TRUE(perm.mapped(DepthwiseFilterAxis::Height));
+ ASSERT_TRUE(perm.mapped(DepthwiseFilterAxis::Width));
+
+ // Check the value
+ ASSERT_EQ(perm[DepthwiseFilterAxis::Depth], 5);
+ ASSERT_EQ(perm[DepthwiseFilterAxis::Multiplier], 6);
+ ASSERT_EQ(perm[DepthwiseFilterAxis::Height], 7);
+ ASSERT_EQ(perm[DepthwiseFilterAxis::Width], 8);
+}
+
+TEST(PermutingEncoderTest, feature)
+{
+ PermutingEncoder<Domain::Feature> enc;
+
+ // Encoder is invalid at the beginning
+ ASSERT_FALSE(enc.valid());
+
+ // Set "invalid" mapping
+ enc.perm()->axis(FeatureAxis::Count) = 0;
+ enc.perm()->axis(FeatureAxis::Depth) = 6;
+ enc.perm()->axis(FeatureAxis::Height) = 1;
+ enc.perm()->axis(FeatureAxis::Width) = 2;
+
+ // Encoder is still invalid
+ ASSERT_FALSE(enc.valid());
+
+ // Set another "invalid" mapping
+ enc.perm()->axis(FeatureAxis::Depth) = 1;
+
+ // Encoder is still invalid
+ ASSERT_FALSE(enc.valid());
+
+ // Set "valid" mapping
+ enc.perm()->axis(FeatureAxis::Depth) = 3;
+
+ // Encoder is now valid
+ ASSERT_TRUE(enc.valid());
+
+ // Let's test with a HD (1280x720) RGB image
+ TensorShape tensor_shape;
+
+ tensor_shape.rank(4);
+ tensor_shape.dim(0) = 1; // COUNT
+ tensor_shape.dim(1) = 720; // HEIGHT
+ tensor_shape.dim(2) = 1280; // WIDTH
+ tensor_shape.dim(3) = 3; // DEPTH
+
+ // Get the feature shape corresponding to a given image
+ auto feature_shape = enc.shape(tensor_shape);
+
+ ASSERT_EQ(feature_shape.count(), 1);
+ ASSERT_EQ(feature_shape.depth(), 3);
+ ASSERT_EQ(feature_shape.height(), 720);
+ ASSERT_EQ(feature_shape.width(), 1280);
+
+ // Let's find a source tensor index!
+ FeatureIndex feature_index;
+
+ feature_index.batch() = 0;
+ feature_index.channel() = 1;
+ feature_index.row() = 2;
+ feature_index.column() = 3;
+
+ auto tensor_index = enc.value(feature_index);
+
+ ASSERT_EQ(tensor_index.at(0), 0); // BATCH(COUNT)
+ ASSERT_EQ(tensor_index.at(1), 2); // ROW(HEIGHT)
+ ASSERT_EQ(tensor_index.at(2), 3); // COLUMN(WIDTH)
+ ASSERT_EQ(tensor_index.at(3), 1); // CHANNEL(DEPTH)
+}
+
+TEST(PermutingEncoderTest, feature_clone)
+{
+ PermutingEncoder<Domain::Feature> src_enc;
+
+ auto src_perm = src_enc.perm();
+
+ src_perm->axis(FeatureAxis::Count) = 0;
+ src_perm->axis(FeatureAxis::Depth) = 3;
+ src_perm->axis(FeatureAxis::Height) = 1;
+ src_perm->axis(FeatureAxis::Width) = 2;
+
+ auto dst_enc = src_enc.clone();
+ auto dst_perm = dynamic_cast<PermutingEncoder<Domain::Feature> *>(dst_enc.get())->perm();
+
+ EXPECT_EQ(dst_perm->axis(FeatureAxis::Count), src_perm->axis(FeatureAxis::Count));
+ EXPECT_EQ(dst_perm->axis(FeatureAxis::Depth), src_perm->axis(FeatureAxis::Depth));
+ EXPECT_EQ(dst_perm->axis(FeatureAxis::Height), src_perm->axis(FeatureAxis::Height));
+ EXPECT_EQ(dst_perm->axis(FeatureAxis::Width), src_perm->axis(FeatureAxis::Width));
+
+ // Update on cloned encoder SHOULD NOT affect the original encoder
+ dst_perm->axis(FeatureAxis::Height) += 1;
+
+ EXPECT_EQ(src_perm->axis(FeatureAxis::Height), 1);
+ EXPECT_EQ(dst_perm->axis(FeatureAxis::Height), 2);
+}
+
+TEST(PermutingEncoderTest, filter)
+{
+ PermutingEncoder<Domain::Filter> enc;
+
+ // Encoder is invalid at the beginning
+ ASSERT_FALSE(enc.valid());
+
+ // Set "invalid" mapping
+ enc.perm()->axis(FilterAxis::Count) = 0;
+ enc.perm()->axis(FilterAxis::Depth) = 6;
+ enc.perm()->axis(FilterAxis::Height) = 1;
+ enc.perm()->axis(FilterAxis::Width) = 2;
+
+ // Encoder is still invalid
+ ASSERT_FALSE(enc.valid());
+
+ // Set another "invalid" mapping
+ enc.perm()->axis(FilterAxis::Depth) = 1;
+
+ // Encoder is still invalid
+ ASSERT_FALSE(enc.valid());
+
+ // Set "valid" mapping
+ enc.perm()->axis(FilterAxis::Depth) = 3;
+
+ // Encoder is now valid
+ ASSERT_TRUE(enc.valid());
+
+ TensorShape tensor_shape;
+
+ tensor_shape.rank(4);
+ tensor_shape.dim(0) = 8; // COUNT
+ tensor_shape.dim(1) = 1; // HEIGHT
+ tensor_shape.dim(2) = 7; // WIDTH
+ tensor_shape.dim(3) = 4; // DEPTH
+
+ // Get the corresponding filter shape
+ auto filter_shape = enc.shape(tensor_shape);
+
+ ASSERT_EQ(filter_shape.count(), 8);
+ ASSERT_EQ(filter_shape.depth(), 4);
+ ASSERT_EQ(filter_shape.height(), 1);
+ ASSERT_EQ(filter_shape.width(), 7);
+
+ // Let's find a source tensor index!
+ FilterIndex filter_index;
+
+ filter_index.nth() = 1;
+ filter_index.channel() = 2;
+ filter_index.row() = 0;
+ filter_index.column() = 3;
+
+ auto tensor_index = enc.value(filter_index);
+
+ ASSERT_EQ(tensor_index.at(0), 1); // NTH(COUNT)
+ ASSERT_EQ(tensor_index.at(1), 0); // ROW(HEIGHT)
+ ASSERT_EQ(tensor_index.at(2), 3); // COLUMN(WIDTH)
+ ASSERT_EQ(tensor_index.at(3), 2); // CHANNEL(DEPTH)
+}
+
+TEST(PermutingEncoderTest, depthwise_filter)
+{
+ PermutingEncoder<Domain::DepthwiseFilter> enc;
+
+ // Encoder is invalid at the beginning
+ ASSERT_FALSE(enc.valid());
+
+ // Set "invalid" mapping
+ enc.perm()->axis(DepthwiseFilterAxis::Depth) = 0;
+ enc.perm()->axis(DepthwiseFilterAxis::Multiplier) = 6;
+ enc.perm()->axis(DepthwiseFilterAxis::Height) = 1;
+ enc.perm()->axis(DepthwiseFilterAxis::Width) = 2;
+
+ // Encoder is still invalid
+ ASSERT_FALSE(enc.valid());
+
+ // Set another "invalid" mapping
+ enc.perm()->axis(DepthwiseFilterAxis::Multiplier) = 1;
+
+ // Encoder is still invalid
+ ASSERT_FALSE(enc.valid());
+
+ // Set "valid" mapping
+ enc.perm()->axis(DepthwiseFilterAxis::Multiplier) = 3;
+
+ // Encoder is now valid
+ ASSERT_TRUE(enc.valid());
+
+ TensorShape tensor_shape;
+
+ tensor_shape.rank(4);
+ tensor_shape.dim(0) = 8; // DEPTH
+ tensor_shape.dim(1) = 1; // HEIGHT
+ tensor_shape.dim(2) = 7; // WIDTH
+ tensor_shape.dim(3) = 4; // MULTIPLIER
+
+ // Get the corresponding depthwise filter shape
+ auto filter_shape = enc.shape(tensor_shape);
+
+ ASSERT_EQ(filter_shape.depth(), 8);
+ ASSERT_EQ(filter_shape.multiplier(), 4);
+ ASSERT_EQ(filter_shape.height(), 1);
+ ASSERT_EQ(filter_shape.width(), 7);
+
+ // Let's find a source tensor index!
+ DepthwiseFilterIndex filter_index;
+
+ filter_index.channel() = 1;
+ filter_index.nth() = 2;
+ filter_index.row() = 0;
+ filter_index.column() = 3;
+
+ auto tensor_index = enc.value(filter_index);
+
+ ASSERT_EQ(tensor_index.at(0), 1); // CHANNEL(DEPTH)
+ ASSERT_EQ(tensor_index.at(1), 0); // ROW(HEIGHT)
+ ASSERT_EQ(tensor_index.at(2), 3); // COLUMN(WIDTH)
+ ASSERT_EQ(tensor_index.at(3), 2); // NTH(MULTIPLIER)
+}
+
+TEST(PermutingEncoderTest, depthwisefilter_init)
+{
+ Permutation<Domain::DepthwiseFilter> src_perm;
+
+ src_perm.axis(DepthwiseFilterAxis::Multiplier) = 0;
+ src_perm.axis(DepthwiseFilterAxis::Depth) = 3;
+ src_perm.axis(DepthwiseFilterAxis::Height) = 1;
+ src_perm.axis(DepthwiseFilterAxis::Width) = 2;
+
+ PermutingEncoder<Domain::DepthwiseFilter> dst_enc{src_perm};
+ auto dst_perm = dst_enc.perm();
+
+ EXPECT_EQ(dst_perm->axis(DepthwiseFilterAxis::Multiplier),
+ src_perm.axis(DepthwiseFilterAxis::Multiplier));
+ EXPECT_EQ(dst_perm->axis(DepthwiseFilterAxis::Depth), src_perm.axis(DepthwiseFilterAxis::Depth));
+ EXPECT_EQ(dst_perm->axis(DepthwiseFilterAxis::Height),
+ src_perm.axis(DepthwiseFilterAxis::Height));
+ EXPECT_EQ(dst_perm->axis(DepthwiseFilterAxis::Width), src_perm.axis(DepthwiseFilterAxis::Width));
+
+ // Update on dst perm SHOULD NOT affect the src perm
+ dst_perm->axis(DepthwiseFilterAxis::Height) += 1;
+
+ EXPECT_EQ(src_perm.axis(DepthwiseFilterAxis::Height), 1);
+ EXPECT_EQ(dst_perm->axis(DepthwiseFilterAxis::Height), 2);
+}
+
+TEST(PermutingDecoderTest, feature)
+{
+ PermutingDecoder<Domain::Feature> dec;
+
+ // Decoder is invalid at the beginning
+ ASSERT_FALSE(dec.valid());
+
+ // Set "invalid" mapping
+ dec.perm()->axis(FeatureAxis::Count) = 0;
+ dec.perm()->axis(FeatureAxis::Depth) = 6;
+ dec.perm()->axis(FeatureAxis::Height) = 1;
+ dec.perm()->axis(FeatureAxis::Width) = 2;
+
+ // Decoder is still invalid
+ ASSERT_FALSE(dec.valid());
+
+ // Set another "invalid" mapping
+ dec.perm()->axis(FeatureAxis::Depth) = 1;
+
+ // Decoder is still invalid
+ ASSERT_FALSE(dec.valid());
+
+ // Set "valid" mapping
+ dec.perm()->axis(FeatureAxis::Depth) = 3;
+
+ // Decoder is now valid
+ ASSERT_TRUE(dec.valid());
+
+ // Let's test with a HD (1280x720) RGB image
+ FeatureShape feature_shape;
+
+ feature_shape.count() = 1;
+ feature_shape.depth() = 3;
+ feature_shape.height() = 720;
+ feature_shape.width() = 1280;
+
+ // Get the tensor shape corresponding to a given image
+ auto tensor_shape = dec.shape(feature_shape);
+
+ ASSERT_EQ(tensor_shape.rank(), 4);
+ ASSERT_EQ(tensor_shape.dim(0), 1); // COUNT
+ ASSERT_EQ(tensor_shape.dim(1), 720); // HEIGHT
+ ASSERT_EQ(tensor_shape.dim(2), 1280); // WIDTH
+ ASSERT_EQ(tensor_shape.dim(3), 3); // DEPTH
+
+ // Let's find a source feature index!
+ TensorIndex tensor_index;
+
+ tensor_index.resize(4);
+
+ tensor_index.at(0) = 0; // BATCH(COUNT)
+ tensor_index.at(3) = 1; // CHANNEL(DEPTH)
+ tensor_index.at(1) = 2; // ROW(HEIGHT)
+ tensor_index.at(2) = 3; // COLUMN(WIDTH)
+
+ auto feature_index = dec.value(tensor_index);
+
+ ASSERT_EQ(feature_index.batch(), 0);
+ ASSERT_EQ(feature_index.channel(), 1);
+ ASSERT_EQ(feature_index.row(), 2);
+ ASSERT_EQ(feature_index.column(), 3);
+}
+
+TEST(PermutingDecoderTest, feature_clone)
+{
+ PermutingDecoder<Domain::Feature> src_enc;
+
+ auto src_perm = src_enc.perm();
+
+ src_perm->axis(FeatureAxis::Count) = 0;
+ src_perm->axis(FeatureAxis::Depth) = 3;
+ src_perm->axis(FeatureAxis::Height) = 1;
+ src_perm->axis(FeatureAxis::Width) = 2;
+
+ auto dst_enc = src_enc.clone();
+ auto dst_perm = dynamic_cast<PermutingDecoder<Domain::Feature> *>(dst_enc.get())->perm();
+
+ EXPECT_EQ(dst_perm->axis(FeatureAxis::Count), src_perm->axis(FeatureAxis::Count));
+ EXPECT_EQ(dst_perm->axis(FeatureAxis::Depth), src_perm->axis(FeatureAxis::Depth));
+ EXPECT_EQ(dst_perm->axis(FeatureAxis::Height), src_perm->axis(FeatureAxis::Height));
+ EXPECT_EQ(dst_perm->axis(FeatureAxis::Width), src_perm->axis(FeatureAxis::Width));
+
+ // Update on cloned decoder SHOULD NOT affect the original decoder
+ dst_perm->axis(FeatureAxis::Height) += 1;
+
+ EXPECT_EQ(src_perm->axis(FeatureAxis::Height), 1);
+ EXPECT_EQ(dst_perm->axis(FeatureAxis::Height), 2);
+}
+
+TEST(PermutingDecoderTest, filter)
+{
+ PermutingDecoder<Domain::Filter> dec;
+
+ // Decoder is invalid at the beginning
+ ASSERT_FALSE(dec.valid());
+
+ // Set "invalid" mapping
+ dec.perm()->axis(FilterAxis::Count) = 0;
+ dec.perm()->axis(FilterAxis::Depth) = 6;
+ dec.perm()->axis(FilterAxis::Height) = 1;
+ dec.perm()->axis(FilterAxis::Width) = 2;
+
+ // Decoder is still invalid
+ ASSERT_FALSE(dec.valid());
+
+ // Set another "invalid" mapping
+ dec.perm()->axis(FilterAxis::Depth) = 1;
+
+ // Decoder is still invalid
+ ASSERT_FALSE(dec.valid());
+
+ // Set "valid" mapping
+ dec.perm()->axis(FilterAxis::Depth) = 3;
+
+ // Decoder is now valid
+ ASSERT_TRUE(dec.valid());
+
+ // Let's test with a small filter
+ FilterShape filter_shape;
+
+ filter_shape.count() = 10;
+ filter_shape.depth() = 3;
+ filter_shape.height() = 6;
+ filter_shape.width() = 8;
+
+ // Get the tensor shape corresponding to a given image
+ auto tensor_shape = dec.shape(filter_shape);
+
+ ASSERT_EQ(tensor_shape.rank(), 4);
+ ASSERT_EQ(tensor_shape.dim(0), 10); // COUNT
+ ASSERT_EQ(tensor_shape.dim(1), 6); // HEIGHT
+ ASSERT_EQ(tensor_shape.dim(2), 8); // WIDTH
+ ASSERT_EQ(tensor_shape.dim(3), 3); // DEPTH
+
+ // Let's find a source filter index!
+ TensorIndex tensor_index;
+
+ tensor_index.resize(4);
+
+ tensor_index.at(0) = 0; // BATCH(COUNT)
+ tensor_index.at(3) = 1; // CHANNEL(DEPTH)
+ tensor_index.at(1) = 2; // ROW(HEIGHT)
+ tensor_index.at(2) = 3; // COLUMN(WIDTH)
+
+ auto filter_index = dec.value(tensor_index);
+
+ ASSERT_EQ(filter_index.nth(), 0);
+ ASSERT_EQ(filter_index.channel(), 1);
+ ASSERT_EQ(filter_index.row(), 2);
+ ASSERT_EQ(filter_index.column(), 3);
+}
+
+TEST(PermutingDecoderTest, depthwise_filter)
+{
+ PermutingDecoder<Domain::DepthwiseFilter> dec;
+
+ // Decoder is invalid at the beginning
+ ASSERT_FALSE(dec.valid());
+
+ // Set "invalid" mapping
+ dec.perm()->axis(DepthwiseFilterAxis::Depth) = 0;
+ dec.perm()->axis(DepthwiseFilterAxis::Multiplier) = 6;
+ dec.perm()->axis(DepthwiseFilterAxis::Height) = 1;
+ dec.perm()->axis(DepthwiseFilterAxis::Width) = 2;
+
+ // Decoder is still invalid
+ ASSERT_FALSE(dec.valid());
+
+ // Set another "invalid" mapping
+ dec.perm()->axis(DepthwiseFilterAxis::Multiplier) = 1;
+
+ // Decoder is still invalid
+ ASSERT_FALSE(dec.valid());
+
+ // Set "valid" mapping
+ dec.perm()->axis(DepthwiseFilterAxis::Multiplier) = 3;
+
+ // Decoder is now valid
+ ASSERT_TRUE(dec.valid());
+
+ DepthwiseFilterShape dw_filter_shape;
+
+ dw_filter_shape.depth() = 8;
+ dw_filter_shape.multiplier() = 1;
+ dw_filter_shape.height() = 7;
+ dw_filter_shape.width() = 4;
+
+ // Get the corresponding depthwise filter shape
+ auto tensor_shape = dec.shape(dw_filter_shape);
+
+ ASSERT_EQ(tensor_shape.dim(0).value(), 8);
+ ASSERT_EQ(tensor_shape.dim(1).value(), 7);
+ ASSERT_EQ(tensor_shape.dim(2).value(), 4);
+ ASSERT_EQ(tensor_shape.dim(3).value(), 1);
+
+ // Let's find a source tensor index!
+ TensorIndex tensor_index;
+ tensor_index.resize(4);
+
+ tensor_index.at(0) = 4;
+ tensor_index.at(1) = 2;
+ tensor_index.at(2) = 1;
+ tensor_index.at(3) = 0;
+
+ auto dw_filter_index = dec.value(tensor_index);
+
+ ASSERT_EQ(dw_filter_index.channel(), 4);
+ ASSERT_EQ(dw_filter_index.nth(), 0);
+ ASSERT_EQ(dw_filter_index.row(), 2);
+ ASSERT_EQ(dw_filter_index.column(), 1);
+}
diff --git a/compiler/loco/src/IR/Stride.test.cpp b/compiler/loco/src/IR/Stride.test.cpp
new file mode 100644
index 000000000..60deb5c6f
--- /dev/null
+++ b/compiler/loco/src/IR/Stride.test.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Stride.h"
+
+#include <gtest/gtest.h>
+
+TEST(StrideTest, default_constructor_2D)
+{
+ loco::Stride<2> stride;
+
+ ASSERT_EQ(stride.vertical(), 1);
+ ASSERT_EQ(stride.horizontal(), 1);
+}
+
+TEST(StrideTest, setter_and_getter_2D)
+{
+ loco::Stride<2> stride;
+
+ stride.vertical(2);
+
+ ASSERT_EQ(stride.vertical(), 2);
+ ASSERT_EQ(stride.horizontal(), 1);
+
+ stride.horizontal(3);
+
+ ASSERT_EQ(stride.vertical(), 2);
+ ASSERT_EQ(stride.horizontal(), 3);
+}
diff --git a/compiler/loco/src/IR/TensorAxis.cpp b/compiler/loco/src/IR/TensorAxis.cpp
new file mode 100644
index 000000000..b083847fc
--- /dev/null
+++ b/compiler/loco/src/IR/TensorAxis.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/TensorAxis.h"
+
+// NOTE This file validates "TensorAxis.h". Please DO NOT remove this file.
diff --git a/compiler/loco/src/IR/TensorAxisSet.cpp b/compiler/loco/src/IR/TensorAxisSet.cpp
new file mode 100644
index 000000000..c58237bf7
--- /dev/null
+++ b/compiler/loco/src/IR/TensorAxisSet.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/TensorAxisSet.h"
+
+// NOTE This file validates "TensorAxisSet.h". Please DO NOT remove this file.
diff --git a/compiler/loco/src/IR/TensorIndex.cpp b/compiler/loco/src/IR/TensorIndex.cpp
new file mode 100644
index 000000000..cbd3698eb
--- /dev/null
+++ b/compiler/loco/src/IR/TensorIndex.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/TensorIndex.h"
+
+// NOTE This file validates "TensorIndex.h". Please DO NOT remove this file.
diff --git a/compiler/loco/src/IR/TensorShape.cpp b/compiler/loco/src/IR/TensorShape.cpp
new file mode 100644
index 000000000..ad30dcbc0
--- /dev/null
+++ b/compiler/loco/src/IR/TensorShape.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/TensorShape.h"
+
+#include <cassert>
+
+namespace loco
+{
+
+uint32_t element_count(const loco::TensorShape *tensor_shape)
+{
+ uint32_t res = 1;
+
+ for (uint32_t axis = 0; axis < tensor_shape->rank(); ++axis)
+ {
+ // Let's use "assert" here as "caller" is responsible for this check.
+ // Please refer to the header for details.
+ assert(tensor_shape->dim(axis).known());
+ res *= tensor_shape->dim(axis).value();
+ }
+
+ return res;
+}
+
+} // namespace loco
diff --git a/compiler/loco/src/IR/TensorShape.test.cpp b/compiler/loco/src/IR/TensorShape.test.cpp
new file mode 100644
index 000000000..ce03ccbd4
--- /dev/null
+++ b/compiler/loco/src/IR/TensorShape.test.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/TensorShape.h"
+
+#include <gtest/gtest.h>
+
+TEST(TensorShapeTest, default_constructor)
+{
+ loco::TensorShape tensor_shape;
+
+ ASSERT_EQ(tensor_shape.rank(), 0);
+}
+
+TEST(TensorShapeTest, initializer_list_constructor)
+{
+ loco::TensorShape tensor_shape{3, 5};
+
+ ASSERT_EQ(tensor_shape.rank(), 2);
+
+ ASSERT_TRUE(tensor_shape.dim(0).known());
+ ASSERT_TRUE(tensor_shape.dim(1).known());
+
+ ASSERT_EQ(tensor_shape.dim(0).value(), 3);
+ ASSERT_EQ(tensor_shape.dim(1).value(), 5);
+}
+
+TEST(TensorShapeTest, rank)
+{
+ loco::TensorShape tensor_shape;
+
+ tensor_shape.rank(2);
+
+ ASSERT_EQ(tensor_shape.rank(), 2);
+ ASSERT_FALSE(tensor_shape.dim(0).known());
+ ASSERT_FALSE(tensor_shape.dim(1).known());
+}
+
+TEST(TensorShapeTest, dim)
+{
+ loco::TensorShape tensor_shape;
+
+ tensor_shape.rank(2);
+
+ tensor_shape.dim(0) = 3;
+
+ ASSERT_TRUE(tensor_shape.dim(0).known());
+ ASSERT_FALSE(tensor_shape.dim(1).known());
+
+ ASSERT_EQ(tensor_shape.dim(0), 3);
+}
+
+TEST(TensorShapeTest, rank_update)
+{
+ loco::TensorShape tensor_shape;
+
+ tensor_shape.rank(2);
+
+ tensor_shape.dim(1) = 3;
+
+ tensor_shape.rank(4);
+
+ ASSERT_FALSE(tensor_shape.dim(0).known());
+ ASSERT_TRUE(tensor_shape.dim(1).known());
+ ASSERT_FALSE(tensor_shape.dim(2).known());
+ ASSERT_FALSE(tensor_shape.dim(3).known());
+
+ ASSERT_EQ(tensor_shape.dim(1), 3);
+}
+
+TEST(TensorShapeTest, copy)
+{
+ loco::TensorShape src;
+
+ src.rank(2);
+ src.dim(1) = 3;
+
+ loco::TensorShape dst;
+
+ dst = src;
+
+ ASSERT_EQ(dst.rank(), 2);
+
+ ASSERT_FALSE(dst.dim(0).known());
+ ASSERT_TRUE(dst.dim(1).known());
+
+ ASSERT_EQ(dst.dim(1), 3);
+}
+
+TEST(TensorShapeTest, element_count)
+{
+ // Check Rank-0 case
+ loco::TensorShape src;
+
+ ASSERT_EQ(loco::element_count(&src), 1);
+}
diff --git a/compiler/loco/src/IR/Use.cpp b/compiler/loco/src/IR/Use.cpp
new file mode 100644
index 000000000..fed562c65
--- /dev/null
+++ b/compiler/loco/src/IR/Use.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Use.h"
+#include "loco/IR/Node.h"
+
+#include <cassert>
+
+namespace loco
+{
+
+void Use::node(Node *node)
+{
+ if (_node != nullptr)
+ {
+ assert(_node->_uses.find(this) != _node->_uses.end());
+ _node->_uses.erase(this);
+ _node = nullptr;
+ }
+
+ assert(_node == nullptr);
+
+ if (node != nullptr)
+ {
+ _node = node;
+ _node->_uses.insert(this);
+ }
+
+ assert(_node == node);
+}
+
+} // namespace loco
diff --git a/compiler/loco/src/IR/Use.test.cpp b/compiler/loco/src/IR/Use.test.cpp
new file mode 100644
index 000000000..4a2f1cc25
--- /dev/null
+++ b/compiler/loco/src/IR/Use.test.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Use.h"
+
+#include "MockupNode.h"
+
+#include <gtest/gtest.h>
+
+TEST(UseTest, constructor)
+{
+ MockupNode user;
+ loco::Use use{&user};
+
+ ASSERT_EQ(use.user(), &user);
+ ASSERT_EQ(use.node(), nullptr);
+}
+
+TEST(UseTest, link_node)
+{
+ MockupNode def;
+ MockupNode user;
+ loco::Use use{&user};
+
+ use.node(&def);
+
+ ASSERT_EQ(use.user(), &user);
+ ASSERT_EQ(use.node(), &def);
+}
diff --git a/compiler/loco/src/IR/Verifier.cpp b/compiler/loco/src/IR/Verifier.cpp
new file mode 100644
index 000000000..42735a327
--- /dev/null
+++ b/compiler/loco/src/IR/Verifier.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Verifier.h"
+
+#include <set>
+#include <cassert>
+
+namespace
+{
+
+using namespace loco;
+
+struct GraphVerifier final
+{
+public:
+ GraphVerifier(loco::Graph *graph) : _graph{graph}
+ {
+ // graph SHOULD NOT BE null
+ assert(_graph != nullptr);
+ }
+
+public:
+ // ErrorListener SHOULD outlive GraphVerifier
+ GraphVerifier &enroll(ErrorListener *l)
+ {
+ if (l != nullptr)
+ {
+ _listeners.insert(l);
+ }
+ return (*this);
+ }
+
+ GraphVerifier &enroll(std::unique_ptr<ErrorListener> &&l)
+ {
+ if (l != nullptr)
+ {
+ _listeners.insert(l.get());
+ // Take the ownership of a given listener
+ _owned_listeners.insert(std::move(l));
+ }
+ return (*this);
+ }
+
+public:
+ void run(void) const
+ {
+ for (auto node : loco::all_nodes(_graph))
+ {
+ // Verify nodes
+ for (uint32_t n = 0; n < node->arity(); ++n)
+ {
+ if (node->arg(n) == nullptr)
+ {
+ notify(ErrorDetail<ErrorCategory::MissingArgument>{node, n});
+ }
+ }
+ }
+ }
+
+private:
+ template <typename Error> void notify(const Error &error) const
+ {
+ for (const auto &listener : _listeners)
+ {
+ listener->notify(error);
+ }
+ }
+
+private:
+ loco::Graph *_graph = nullptr;
+
+ // All active error listeners
+ std::set<ErrorListener *> _listeners;
+
+ // Owned error listeners
+ std::set<std::unique_ptr<ErrorListener>> _owned_listeners;
+};
+
+inline GraphVerifier graph_verifier(loco::Graph *graph) { return GraphVerifier{graph}; }
+
+} // namespace
+
+namespace loco
+{
+
+bool valid(Graph *g, std::unique_ptr<ErrorListener> &&l)
+{
+ class ErrorCounter final : public ErrorListener
+ {
+ public:
+ uint32_t count(void) const { return _count; }
+
+ public:
+ void notify(const ErrorDetail<ErrorCategory::MissingArgument> &) { _count += 1; }
+
+ private:
+ uint32_t _count = 0;
+ };
+
+ ErrorCounter counter;
+ graph_verifier(g).enroll(&counter).enroll(std::move(l)).run();
+ return counter.count() == 0;
+}
+
+} // namespace loco
diff --git a/compiler/loco/src/IR/Verifier.test.cpp b/compiler/loco/src/IR/Verifier.test.cpp
new file mode 100644
index 000000000..247a59390
--- /dev/null
+++ b/compiler/loco/src/IR/Verifier.test.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Verifier.h"
+
+#include <gtest/gtest.h>
+
+#include <stdex/Memory.h>
+#include <vector>
+
+using stdex::make_unique;
+
+TEST(VerifierTest, valid_minimal)
+{
+ auto g = loco::make_graph();
+ auto push = g->nodes()->create<loco::Push>();
+
+ ASSERT_FALSE(loco::valid(g.get()));
+}
+
+TEST(VerifierTest, valid_error_reporter)
+{
+ using namespace loco;
+
+ auto g = loco::make_graph();
+ auto push = g->nodes()->create<loco::Push>();
+
+ class Collector final : public loco::ErrorListener
+ {
+ public:
+ Collector(std::vector<ErrorDetail<ErrorCategory::MissingArgument>> *out) : _out{out}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ void notify(const ErrorDetail<ErrorCategory::MissingArgument> &d) override
+ {
+ _out->emplace_back(d);
+ }
+
+ private:
+ std::vector<ErrorDetail<ErrorCategory::MissingArgument>> *_out;
+ };
+
+ std::vector<ErrorDetail<ErrorCategory::MissingArgument>> errors;
+ ASSERT_FALSE(loco::valid(g.get(), make_unique<Collector>(&errors)));
+ ASSERT_EQ(errors.size(), 1);
+ ASSERT_EQ(errors.at(0).node(), push);
+ ASSERT_EQ(errors.at(0).index(), 0);
+}
diff --git a/compiler/loco/src/IR/Window.test.cpp b/compiler/loco/src/IR/Window.test.cpp
new file mode 100644
index 000000000..c112e0f96
--- /dev/null
+++ b/compiler/loco/src/IR/Window.test.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/IR/Window.h"
+
+#include <gtest/gtest.h>
+
+TEST(WindowTest, default_constructor_2D)
+{
+ loco::Window<2> window;
+
+ ASSERT_EQ(window.vertical(), 1);
+ ASSERT_EQ(window.horizontal(), 1);
+}
+
+TEST(WindowTest, setter_and_getter_2D)
+{
+ loco::Window<2> window;
+
+ window.vertical(2);
+
+ ASSERT_EQ(window.vertical(), 2);
+ ASSERT_EQ(window.horizontal(), 1);
+
+ window.horizontal(3);
+
+ ASSERT_EQ(window.vertical(), 2);
+ ASSERT_EQ(window.horizontal(), 3);
+}
diff --git a/compiler/loco/src/Service/CanonicalShapeInferenceRule.cpp b/compiler/loco/src/Service/CanonicalShapeInferenceRule.cpp
new file mode 100644
index 000000000..d30a8279a
--- /dev/null
+++ b/compiler/loco/src/Service/CanonicalShapeInferenceRule.cpp
@@ -0,0 +1,774 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/Service/CanonicalShapeInferenceRule.h"
+#include "loco/Service/ShapeInference.h"
+
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/IR/CanonicalNode.h>
+#include <loco/IR/CanonicalNodeVisitor.h>
+
+#include <cassert>
+
+namespace
+{
+
+struct PlaneShape
+{
+ loco::Dimension height;
+ loco::Dimension width;
+};
+
+PlaneShape make_plane_shape(const loco::FeatureShape &feature_shape)
+{
+ PlaneShape plane_shape;
+
+ plane_shape.height = feature_shape.height();
+ plane_shape.width = feature_shape.width();
+
+ return plane_shape;
+}
+
+class FeatureShapeUpdater final
+{
+public:
+ FeatureShapeUpdater(loco::FeatureShape *ptr) : _feature_shape_ptr{ptr}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void with(const PlaneShape &plane_shape) const
+ {
+ _feature_shape_ptr->height() = plane_shape.height;
+ _feature_shape_ptr->width() = plane_shape.width;
+ }
+
+private:
+ loco::FeatureShape *_feature_shape_ptr;
+};
+
+/**
+ * HOW TO USE
+ *
+ * loco::FeatureShape feature_shape = ...;
+ *
+ * update(feature_shape).with(...)
+ */
+FeatureShapeUpdater update(loco::FeatureShape &feature_shape)
+{
+ return FeatureShapeUpdater{&feature_shape};
+}
+
+loco::Window<2> window_of(const loco::FilterShape &filter_shape)
+{
+ loco::Window<2> window;
+
+ window.vertical(filter_shape.height().value());
+ window.horizontal(filter_shape.width().value());
+
+ return window;
+}
+
+loco::Window<2> window_of(const loco::DepthwiseFilterShape &depthwise_filter_shape)
+{
+ loco::Window<2> window;
+
+ window.vertical(depthwise_filter_shape.height().value());
+ window.horizontal(depthwise_filter_shape.width().value());
+
+ return window;
+}
+
+enum class Direction
+{
+ Forward,
+ Backward,
+};
+
+template <Direction> class PlaneInference;
+
+template <> class PlaneInference<Direction::Forward> final
+{
+public:
+ PlaneShape operator()(const PlaneShape &in) const
+ {
+ assert(_pad != nullptr);
+ assert(_window != nullptr);
+ assert(_stride != nullptr);
+
+ uint32_t const raw_input_height = in.height.value();
+ uint32_t const raw_input_width = in.width.value();
+
+ uint32_t const raw_window_height = _window->vertical();
+ uint32_t const raw_window_width = _window->horizontal();
+
+ uint32_t const vertical_padding = _pad->top() + _pad->bottom();
+ uint32_t const horizontal_padding = _pad->left() + _pad->right();
+
+ uint32_t const effective_input_height = raw_input_height + vertical_padding;
+ uint32_t const effective_input_width = raw_input_width + horizontal_padding;
+
+ // NOTE To support "dilation" later
+ uint32_t const effective_window_height = raw_window_height;
+ uint32_t const effective_window_width = raw_window_width;
+
+ uint32_t const vertical_stride = _stride->vertical();
+ uint32_t const horizontal_stride = _stride->horizontal();
+
+ assert((effective_input_height - effective_window_height) % vertical_stride == 0);
+ assert((effective_input_width - effective_window_width) % horizontal_stride == 0);
+
+ PlaneShape res;
+
+ res.height = (effective_input_height - effective_window_height) / vertical_stride + 1;
+ res.width = (effective_input_width - effective_window_width) / horizontal_stride + 1;
+
+ return res;
+ }
+
+public:
+ void pad(const loco::Padding2D *value) { _pad = value; }
+ void window(const loco::Window<2> *value) { _window = value; }
+ void stride(const loco::Stride<2> *value) { _stride = value; }
+
+private:
+ const loco::Padding2D *_pad = nullptr;
+ const loco::Window<2> *_window = nullptr;
+ const loco::Stride<2> *_stride = nullptr;
+};
+
+template <> class PlaneInference<Direction::Backward> final
+{
+public:
+ PlaneShape operator()(const PlaneShape &in) const
+ {
+ assert(_pad != nullptr);
+ assert(_window != nullptr);
+ assert(_stride != nullptr);
+
+ uint32_t const input_height = in.height.value();
+ uint32_t const input_width = in.width.value();
+
+ uint32_t const vertical_padding = _pad->top() + _pad->bottom();
+ uint32_t const horizontal_padding = _pad->left() + _pad->right();
+
+ uint32_t const raw_window_height = _window->vertical();
+ uint32_t const raw_window_width = _window->horizontal();
+
+ // TODO Support "dilation"
+ uint32_t const effective_window_height = raw_window_height;
+ uint32_t const effective_window_width = raw_window_width;
+
+ uint32_t const vertical_stride = _stride->vertical();
+ uint32_t const horizontal_stride = _stride->horizontal();
+
+ PlaneShape res;
+
+ res.height = vertical_stride * (input_height - 1) + effective_window_height - vertical_padding;
+ res.width = horizontal_stride * (input_width - 1) + effective_window_width - horizontal_padding;
+
+ return res;
+ }
+
+public:
+ void pad(const loco::Padding2D *value) { _pad = value; }
+ void window(const loco::Window<2> *value) { _window = value; }
+ void stride(const loco::Stride<2> *value) { _stride = value; }
+
+private:
+ const loco::Padding2D *_pad = nullptr;
+ const loco::Window<2> *_window = nullptr;
+ const loco::Stride<2> *_stride = nullptr;
+};
+
+/**
+ * There are two possible maintenance policies.
+ * - Introduce a new canonical node first, and then extend this algorithm later
+ * - Introduce a new canonical node and extend this algorithm at the same time
+ *
+ * The current implementation assumes the former one (for historical reason).
+ *
+ * TODO Evaluate the impact of the latter one
+ *
+ * NOTE "Forward" means that this algorithm computes the ouput shape from inputs shapes
+ */
+class ForwardShapeInferenceAlgorithm final : public loco::CanonicalNodeVisitor<loco::NodeShape>
+{
+public:
+ ForwardShapeInferenceAlgorithm(const loco::ShapeInferenceRule::Context *ctx) : _ctx{ctx}
+ {
+ // DO NOTHING
+ }
+
+private:
+ const loco::ShapeInferenceRule::Context *_ctx;
+
+private:
+ bool shape_known(const loco::Node *node) const { return _ctx->known(node); }
+ loco::NodeShape node_shape(const loco::Node *node) const { return _ctx->get(node); }
+
+private:
+ loco::NodeShape eltwise_binary_node_shape(const loco::Node *node)
+ {
+ // This helper works only for binary node.
+ assert(node->arity() == 2);
+
+ auto lhs_shape = node_shape(node->arg(0));
+ auto rhs_shape = node_shape(node->arg(1));
+
+ // ASSERT: lhs_shape == rhs_shape
+
+ return lhs_shape;
+ }
+
+public:
+ // CASE: AvgPool2D
+ loco::NodeShape visit(const loco::AvgPool2D *node) final
+ {
+ PlaneInference<Direction::Forward> infer_plane_shape;
+
+ infer_plane_shape.pad(node->pad());
+ infer_plane_shape.window(node->window());
+ infer_plane_shape.stride(node->stride());
+
+ auto input_feature_shape = node_shape(node->ifm()).as<loco::FeatureShape>();
+ auto input_plane_shape = make_plane_shape(input_feature_shape);
+ auto output_plane_shape = infer_plane_shape(input_plane_shape);
+ auto output_feature_shape = input_feature_shape; // AvgPool2D does not change count/depth
+
+ // Update the height/width of output_feature_shape with that of output_plane_shape
+ update(output_feature_shape).with(output_plane_shape);
+
+ return loco::NodeShape{output_feature_shape};
+ }
+
+ // CASE: BiasDecode
+ loco::NodeShape visit(const loco::BiasDecode *node) final
+ {
+ // The input of BiasDecode SHOULD BE a bias!
+ assert(node_shape(node->input()).domain() == loco::Domain::Bias);
+ auto input_bias_shape = node_shape(node->input()).as<loco::BiasShape>();
+
+ loco::TensorShape output_tensor_shape;
+
+ output_tensor_shape.rank(1);
+ output_tensor_shape.dim(0) = input_bias_shape.length();
+
+ return loco::NodeShape{output_tensor_shape};
+ }
+
+ // CASE: BiasEncode
+ loco::NodeShape visit(const loco::BiasEncode *node) final
+ {
+ // The input of BiasEncode SHOULD BE a tensor!
+ assert(node_shape(node->input()).domain() == loco::Domain::Tensor);
+ auto input_tensor_shape = node_shape(node->input()).as<loco::TensorShape>();
+
+ loco::BiasShape output_bias_shape;
+
+ output_bias_shape.length() = input_tensor_shape.dim(0);
+
+ return loco::NodeShape{output_bias_shape};
+ }
+
+ // CASE: ConstGen
+ loco::NodeShape visit(const loco::ConstGen *node) final
+ {
+ loco::TensorShape tensor_shape;
+
+ tensor_shape.rank(node->rank());
+ for (uint32_t axis = 0; axis < node->rank(); ++axis)
+ {
+ tensor_shape.dim(axis) = node->dim(axis);
+ }
+
+ return loco::NodeShape{tensor_shape};
+ }
+
+ // CASE: Conv2D
+ loco::NodeShape visit(const loco::Conv2D *node) final
+ {
+ auto filter_shape = node_shape(node->ker()).as<loco::FilterShape>();
+ auto filter_window = window_of(filter_shape);
+
+ PlaneInference<Direction::Forward> infer_plane_shape;
+
+ infer_plane_shape.pad(node->pad());
+ infer_plane_shape.window(&filter_window);
+ infer_plane_shape.stride(node->stride());
+
+ auto input_feature_shape = node_shape(node->ifm()).as<loco::FeatureShape>();
+ auto input_plane_shape = make_plane_shape(input_feature_shape);
+ auto output_plane_shape = infer_plane_shape(input_plane_shape);
+
+ loco::FeatureShape output_feature_shape;
+
+ // "COUNT" does not change
+ output_feature_shape.count() = input_feature_shape.count();
+ // "DEPTH" depends on # of filters
+ output_feature_shape.depth() = filter_shape.count();
+ // Update the height/width of output_feature_shape with that of output_plane_shape
+ update(output_feature_shape).with(output_plane_shape);
+
+ return loco::NodeShape{output_feature_shape};
+ }
+
+ // CASE: DepthwiseConv2D
+ loco::NodeShape visit(const loco::DepthwiseConv2D *node) final
+ {
+ auto depthwise_filter_shape = node_shape(node->ker()).as<loco::DepthwiseFilterShape>();
+ auto dpethwise_filter_window = window_of(depthwise_filter_shape);
+
+ PlaneInference<Direction::Forward> infer_plane_shape;
+
+ infer_plane_shape.pad(node->pad());
+ infer_plane_shape.window(&dpethwise_filter_window);
+ infer_plane_shape.stride(node->stride());
+
+ auto input_feature_shape = node_shape(node->ifm()).as<loco::FeatureShape>();
+ auto input_plane_shape = make_plane_shape(input_feature_shape);
+ auto output_plane_shape = infer_plane_shape(input_plane_shape);
+
+ loco::FeatureShape output_feature_shape;
+
+ // "COUNT" does not change
+ output_feature_shape.count() = input_feature_shape.count();
+ // "DEPTH" depends on [in_channels * channel_multiplier] of filters
+ output_feature_shape.depth() = loco::Dimension(depthwise_filter_shape.depth().value() *
+ depthwise_filter_shape.multiplier().value());
+ // Update the height/width of output_feature_shape with that of output_plane_shape
+ update(output_feature_shape).with(output_plane_shape);
+
+ return loco::NodeShape{output_feature_shape};
+ }
+
+ // CASE: DepthwiseFilterEncode
+ loco::NodeShape visit(const loco::DepthwiseFilterEncode *node) final
+ {
+ auto input_tensor_shape = node_shape(node->input()).as<loco::TensorShape>();
+ return loco::NodeShape{node->encoder()->shape(input_tensor_shape)};
+ }
+
+ // CASE: DepthwiseFilterDecode
+ loco::NodeShape visit(const loco::DepthwiseFilterDecode *node) final
+ {
+ auto input_dw_filter_shape = node_shape(node->input()).as<loco::DepthwiseFilterShape>();
+ return loco::NodeShape{node->decoder()->shape(input_dw_filter_shape)};
+ }
+
+ // CASE: EltwiseAdd
+ loco::NodeShape visit(const loco::EltwiseAdd *node) final
+ {
+ return eltwise_binary_node_shape(node);
+ }
+
+ // CASE: EltwiseDiv
+ loco::NodeShape visit(const loco::EltwiseDiv *node) final
+ {
+ return eltwise_binary_node_shape(node);
+ }
+
+ // CASE: EltwiseMax
+ loco::NodeShape visit(const loco::EltwiseMax *node) final
+ {
+ return eltwise_binary_node_shape(node);
+ }
+
+ // CASE: EltwiseMul
+ loco::NodeShape visit(const loco::EltwiseMul *node) final
+ {
+ return eltwise_binary_node_shape(node);
+ }
+
+ // CASE: EltwiseSqrt
+ loco::NodeShape visit(const loco::EltwiseSqrt *node) final { return node_shape(node->input()); }
+
+ // CASE: EltwiseSub
+ loco::NodeShape visit(const loco::EltwiseSub *node) final
+ {
+ return eltwise_binary_node_shape(node);
+ }
+
+ // CASE: Forward
+ loco::NodeShape visit(const loco::Forward *node) final { return node_shape(node->input()); }
+
+ // CASE: FeatureBiasAdd
+ loco::NodeShape visit(const loco::FeatureBiasAdd *node) final
+ {
+ assert(node_shape(node->value()).domain() == loco::Domain::Feature);
+ assert(node_shape(node->bias()).domain() == loco::Domain::Bias);
+
+ // Q. What to do when there is a mismatch between value's depth and bias's length?
+
+ return node_shape(node->value());
+ }
+
+ // CASE: FeatureDecode
+ loco::NodeShape visit(const loco::FeatureDecode *node) final
+ {
+ auto input_node_shape = node_shape(node->input());
+ return loco::NodeShape{node->decoder()->shape(input_node_shape.as<loco::FeatureShape>())};
+ }
+
+ // CASE: FeatureEncode
+ loco::NodeShape visit(const loco::FeatureEncode *node) final
+ {
+ auto input_node_shape = node_shape(node->input());
+ return loco::NodeShape{node->encoder()->shape(input_node_shape.as<loco::TensorShape>())};
+ }
+
+ // CASE: FilterDecode
+ loco::NodeShape visit(const loco::FilterDecode *node) final
+ {
+ auto input_filter_shape = node_shape(node->input()).as<loco::FilterShape>();
+ return loco::NodeShape{node->decoder()->shape(input_filter_shape)};
+ }
+
+ // CASE: FilterEncode
+ loco::NodeShape visit(const loco::FilterEncode *node) final
+ {
+ auto input_tensor_shape = node_shape(node->input()).as<loco::TensorShape>();
+ return loco::NodeShape{node->encoder()->shape(input_tensor_shape)};
+ }
+
+ // CASE: FixedReshape
+ loco::NodeShape visit(const loco::FixedReshape *node) final
+ {
+ loco::TensorShape tensor_shape;
+
+ tensor_shape.rank(node->rank());
+ for (uint32_t axis = 0; axis < node->rank(); ++axis)
+ {
+ tensor_shape.dim(axis) = node->dim(axis);
+ }
+
+ return loco::NodeShape{tensor_shape};
+ }
+
+ // CASE: MatMul
+ loco::NodeShape visit(const loco::MatMul *node) final
+ {
+ assert(shape_known(node->lhs()));
+ assert(shape_known(node->rhs()));
+ auto const lhs_shape = node_shape(node->lhs()).as<loco::MatrixShape>();
+ auto const rhs_shape = node_shape(node->rhs()).as<loco::MatrixShape>();
+
+ loco::MatrixShape out_shape;
+
+ // Checking shape capability for multiplication
+ assert(lhs_shape.width() == rhs_shape.height());
+
+ out_shape.height() = lhs_shape.height();
+ out_shape.width() = rhs_shape.width();
+
+ return out_shape;
+ }
+
+ // CASE: MatrixDecode
+ loco::NodeShape visit(const loco::MatrixDecode *node) final
+ {
+ auto input_node_shape = node_shape(node->input());
+ return loco::NodeShape{node->decoder()->shape(input_node_shape.as<loco::MatrixShape>())};
+ }
+
+ // CASE: MatrixEncode
+ loco::NodeShape visit(const loco::MatrixEncode *node) final
+ {
+ auto input_node_shape = node_shape(node->input());
+ return loco::NodeShape{node->encoder()->shape(input_node_shape.as<loco::TensorShape>())};
+ }
+
+ // CASE: MaxPool2D
+ loco::NodeShape visit(const loco::MaxPool2D *node) final
+ {
+ PlaneInference<Direction::Forward> infer_plane_shape;
+
+ infer_plane_shape.pad(node->pad());
+ infer_plane_shape.window(node->window());
+ infer_plane_shape.stride(node->stride());
+
+ auto input_feature_shape = node_shape(node->ifm()).as<loco::FeatureShape>();
+ auto input_plane_shape = make_plane_shape(input_feature_shape);
+ auto output_plane_shape = infer_plane_shape(input_plane_shape);
+ auto output_feature_shape = input_feature_shape; // MaxPool2D does not change count/depth
+
+ // Update the height/width of output_feature_shape with that of output_plane_shape
+ update(output_feature_shape).with(output_plane_shape);
+
+ return loco::NodeShape{output_feature_shape};
+ }
+
+ // CASE: Push
+ loco::NodeShape visit(const loco::Push *node) final
+ {
+ assert(shape_known(node->from()));
+ return node_shape(node->from());
+ }
+
+ // CASE: Pull
+ loco::NodeShape visit(const loco::Pull *node) final
+ {
+ // Build a tensor shape from "Pull" node
+ loco::TensorShape tensor_shape;
+
+ tensor_shape.rank(node->rank());
+ for (uint32_t axis = 0; axis < node->rank(); ++axis)
+ {
+ tensor_shape.dim(axis) = node->dim(axis);
+ }
+
+ return loco::NodeShape{tensor_shape};
+ }
+
+ // CASE: ReLU
+ loco::NodeShape visit(const loco::ReLU *node) final { return node_shape(node->input()); }
+
+ // CASE: ReLU6
+ loco::NodeShape visit(const loco::ReLU6 *node) final { return node_shape(node->input()); }
+
+ // CASE: Tanh
+ loco::NodeShape visit(const loco::Tanh *node) final { return node_shape(node->input()); }
+
+ // CASE: TensorBiasAdd
+ loco::NodeShape visit(const loco::TensorBiasAdd *node) final
+ {
+ assert(node_shape(node->value()).domain() == loco::Domain::Tensor);
+ assert(node_shape(node->bias()).domain() == loco::Domain::Bias);
+
+ // Q. What to do when there is a mismatch between value's dim and bias's length?
+
+ return node_shape(node->value());
+ }
+
+ // CASE: TensorConcat
+ loco::NodeShape visit(const loco::TensorConcat *node)
+ {
+ auto const lhs_shape = node_shape(node->lhs()).as<loco::TensorShape>();
+ auto const rhs_shape = node_shape(node->rhs()).as<loco::TensorShape>();
+
+ assert(lhs_shape.rank() == rhs_shape.rank());
+ uint32_t const out_rank = lhs_shape.rank();
+
+ loco::TensorShape out_shape;
+
+ out_shape.rank(out_rank);
+
+ for (uint32_t axis = 0; axis < out_rank; ++axis)
+ {
+ if (axis == node->axis())
+ {
+ out_shape.dim(axis) = lhs_shape.dim(axis).value() + rhs_shape.dim(axis).value();
+ }
+ else
+ {
+ assert(lhs_shape.dim(axis) == rhs_shape.dim(axis));
+ out_shape.dim(axis) = lhs_shape.dim(axis);
+ }
+ }
+
+ return loco::NodeShape{out_shape};
+ }
+
+ // CASE: TensorBroadcast
+ loco::NodeShape visit(const loco::TensorBroadcast *node) final
+ {
+ auto tensor_shape = node_shape(node->input()).as<loco::TensorShape>();
+ auto const tensor_rank = tensor_shape.rank();
+
+ for (uint32_t axis = 0; axis < tensor_rank; ++axis)
+ {
+ if (node->mapping()->defined(axis))
+ {
+ tensor_shape.dim(axis) = node->mapping()->dim(axis);
+ }
+ }
+
+ return loco::NodeShape{tensor_shape};
+ }
+
+ // CASE: TensorReduce
+ loco::NodeShape visit(const loco::TensorReduce *node) final
+ {
+ auto tensor_shape = node_shape(node->input()).as<loco::TensorShape>();
+ auto const tensor_rank = tensor_shape.rank();
+
+ for (uint32_t d = 0; d < tensor_rank; ++d)
+ if (node->axes()->defined(d))
+ tensor_shape.dim(d) = 1;
+
+ return loco::NodeShape{tensor_shape};
+ }
+
+ // CASE: TensorSoftmax
+ loco::NodeShape visit(const loco::TensorSoftmax *node) final { return node_shape(node->input()); }
+
+ // CASE: TensorTranspose
+ loco::NodeShape visit(const loco::TensorTranspose *node) final
+ {
+ loco::TensorShape output_shape;
+
+ auto input_shape = node_shape(node->input()).as<loco::TensorShape>();
+ assert(input_shape.rank() == node->perm()->size());
+
+ output_shape.rank(input_shape.rank());
+
+ for (uint32_t output_axis = 0; output_axis < output_shape.rank(); output_axis++)
+ {
+ auto new_dim = input_shape.dim(node->perm()->axis(output_axis));
+ output_shape.dim(output_axis) = new_dim;
+ }
+
+ return loco::NodeShape(output_shape);
+ }
+
+ // CASE: TransposedConv2D
+ loco::NodeShape visit(const loco::TransposedConv2D *node) final
+ {
+ auto filter_shape = node_shape(node->ker()).as<loco::FilterShape>();
+ auto filter_window = window_of(filter_shape);
+
+ PlaneInference<Direction::Backward> infer_plane_shape;
+
+ infer_plane_shape.pad(node->pad());
+ infer_plane_shape.window(&filter_window);
+ infer_plane_shape.stride(node->stride());
+
+ auto input_feature_shape = node_shape(node->ifm()).as<loco::FeatureShape>();
+ auto input_plane_shape = make_plane_shape(input_feature_shape);
+ auto output_plane_shape = infer_plane_shape(input_plane_shape);
+
+ loco::FeatureShape output_feature_shape;
+
+ // "COUNT" does not change
+ output_feature_shape.count() = input_feature_shape.count();
+ // Output "DEPTH" depends on count of filters
+ output_feature_shape.depth() = filter_shape.count();
+ // Update the height/width of output_feature_shape with that of output_plane_shape
+ update(output_feature_shape).with(output_plane_shape);
+
+ return loco::NodeShape{output_feature_shape};
+ }
+
+ // CASE: TensorConstantPad
+ loco::NodeShape visit(const loco::TensorConstantPad *node) final
+ {
+ auto const tensor_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto padding = node->padding();
+
+ loco::TensorShape out_shape;
+ out_shape.rank(tensor_shape.rank());
+ for (uint32_t axis = 0; axis < out_shape.rank(); ++axis)
+ {
+ out_shape.dim(axis) =
+ tensor_shape.dim(axis).value() + padding->front(axis) + padding->back(axis);
+ }
+
+ return loco::NodeShape{out_shape};
+ }
+};
+
+} // namespace
+
+namespace
+{
+namespace compat
+{
+
+struct Context final : public loco::ShapeInferenceRule::Context
+{
+ bool known(const loco::Node *node) const final { return loco::shape_known(node); }
+ loco::NodeShape get(const loco::Node *node) const final { return loco::shape_get(node); }
+};
+
+class Sink final : public loco::ShapeInferenceRule::Sink
+{
+public:
+ enum Status
+ {
+ Unknown,
+ Okay,
+ Fail,
+ };
+
+public:
+ const Status &status(void) const { return _status; }
+ const loco::NodeShape &shape(void) const { return _shape; }
+
+public:
+ void okay(const loco::NodeShape &shape) final
+ {
+ _status = Okay;
+ _shape = shape;
+ }
+
+ void fail(void) final
+ {
+ // Notify failure
+ _status = Fail;
+ }
+
+private:
+ Status _status = Unknown;
+ loco::NodeShape _shape;
+};
+
+} // namespace compat
+} // namespace
+
+namespace loco
+{
+
+bool CanonicalShapeInferenceRule::support(const API &api) const
+{
+ return api == API::V1 or api == API::V2;
+}
+
+bool CanonicalShapeInferenceRule::recognize(const Dialect *d) const
+{
+ return CanonicalDialect::get() == d;
+}
+
+bool CanonicalShapeInferenceRule::infer(const Node *node, NodeShape &shape) const
+{
+ ::compat::Context ctx;
+ ::compat::Sink sink;
+
+ infer(&ctx, node, &sink);
+
+ assert(sink.status() == ::compat::Sink::Okay or sink.status() == ::compat::Sink::Fail);
+
+ if (sink.status() == ::compat::Sink::Fail)
+ {
+ return false;
+ }
+
+ shape = sink.shape();
+ return true;
+}
+
+void CanonicalShapeInferenceRule::infer(const Context *ctx, const Node *node, Sink *sink) const
+{
+ assert(node->dialect() == loco::CanonicalDialect::get());
+ assert(dynamic_cast<const loco::CanonicalNode *>(node) != nullptr);
+
+ ForwardShapeInferenceAlgorithm alg{ctx};
+ auto shape = dynamic_cast<const loco::CanonicalNode *>(node)->accept(&alg);
+
+ sink->okay(shape);
+}
+
+} // namespace loco
diff --git a/compiler/loco/src/Service/CanonicalShapeInferenceRule.test.cpp b/compiler/loco/src/Service/CanonicalShapeInferenceRule.test.cpp
new file mode 100644
index 000000000..5cc8c3808
--- /dev/null
+++ b/compiler/loco/src/Service/CanonicalShapeInferenceRule.test.cpp
@@ -0,0 +1,400 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/Service/CanonicalShapeInferenceRule.h"
+#include "loco/Service/ShapeInference.h"
+
+#include "GraphTestcase.h"
+
+#include <vector>
+
+#include <gtest/gtest.h>
+
+TEST(CanonicalShapeInferenceRuleTest, minimal)
+{
+ // Create a simple identity network, which takes Tensor<1x2x3x4> as input.
+ GraphTestcase<GraphCode::Identity> testcase{1, 2, 3, 4};
+
+ // Run Inference
+ loco::CanonicalShapeInferenceRule rule;
+
+ loco::apply(&rule).to(testcase.graph());
+
+ // Verify!
+ ASSERT_TRUE(loco::shape_known(testcase.push_node));
+ ASSERT_EQ(loco::shape_get(testcase.push_node).domain(), loco::Domain::Tensor);
+ ASSERT_EQ(loco::shape_get(testcase.push_node).as<loco::TensorShape>().rank(), 4);
+ ASSERT_EQ(loco::shape_get(testcase.push_node).as<loco::TensorShape>().dim(0), 1);
+ ASSERT_EQ(loco::shape_get(testcase.push_node).as<loco::TensorShape>().dim(1), 2);
+ ASSERT_EQ(loco::shape_get(testcase.push_node).as<loco::TensorShape>().dim(2), 3);
+ ASSERT_EQ(loco::shape_get(testcase.push_node).as<loco::TensorShape>().dim(3), 4);
+}
+
+TEST(CanonicalShapeInferenceRuleTest, const_gen)
+{
+ // Create a sample network
+ GraphTestcase<GraphCode::ConstGen> testcase;
+
+ testcase.const_node->dtype(loco::DataType::FLOAT32);
+ testcase.const_node->shape({1, 2});
+
+ // Run Inference
+ loco::CanonicalShapeInferenceRule rule;
+
+ loco::apply(&rule).to(testcase.graph());
+
+ // Verify!
+ ASSERT_TRUE(loco::shape_known(testcase.push_node));
+ ASSERT_EQ(loco::shape_get(testcase.push_node).domain(), loco::Domain::Tensor);
+ ASSERT_EQ(loco::shape_get(testcase.push_node).as<loco::TensorShape>().rank(), 2);
+ ASSERT_EQ(loco::shape_get(testcase.push_node).as<loco::TensorShape>().dim(0), 1);
+ ASSERT_EQ(loco::shape_get(testcase.push_node).as<loco::TensorShape>().dim(1), 2);
+}
+
+TEST(CanonicalShapeInferenceRuleTest, relu)
+{
+ // Create a sample network
+ GraphTestcase<GraphCode::Relu> testcase;
+
+ testcase.pull_node->shape({1, 2, 3, 4});
+
+ // Run Inference
+ loco::CanonicalShapeInferenceRule rule;
+
+ loco::apply(&rule).to(testcase.graph());
+
+ // Verify!
+ ASSERT_TRUE(loco::shape_known(testcase.push_node));
+ ASSERT_EQ(loco::shape_get(testcase.push_node).domain(), loco::Domain::Tensor);
+ ASSERT_EQ(loco::shape_get(testcase.push_node).as<loco::TensorShape>().rank(), 4);
+ ASSERT_EQ(loco::shape_get(testcase.push_node).as<loco::TensorShape>().dim(0), 1);
+ ASSERT_EQ(loco::shape_get(testcase.push_node).as<loco::TensorShape>().dim(1), 2);
+ ASSERT_EQ(loco::shape_get(testcase.push_node).as<loco::TensorShape>().dim(2), 3);
+ ASSERT_EQ(loco::shape_get(testcase.push_node).as<loco::TensorShape>().dim(3), 4);
+}
+
+TEST(CanonicalShapeInferenceRuleTest, feature_codec)
+{
+ // Create a sample network
+ GraphTestcase<GraphCode::FeatureCodec> testcase;
+
+ testcase.pull_node->shape({1, 2, 3, 4});
+
+ // Run Inference
+ loco::CanonicalShapeInferenceRule rule;
+
+ loco::apply(&rule).to(testcase.graph());
+
+ // Verify!
+ ASSERT_TRUE(loco::shape_known(testcase.encode_node));
+ ASSERT_EQ(loco::shape_get(testcase.encode_node).domain(), loco::Domain::Feature);
+
+ ASSERT_TRUE(loco::shape_known(testcase.decode_node));
+ ASSERT_EQ(loco::shape_get(testcase.decode_node).domain(), loco::Domain::Tensor);
+ ASSERT_EQ(loco::shape_get(testcase.decode_node).as<loco::TensorShape>().rank(), 4);
+ ASSERT_EQ(loco::shape_get(testcase.decode_node).as<loco::TensorShape>().dim(0), 1);
+ ASSERT_EQ(loco::shape_get(testcase.decode_node).as<loco::TensorShape>().dim(1), 2);
+ ASSERT_EQ(loco::shape_get(testcase.decode_node).as<loco::TensorShape>().dim(2), 3);
+ ASSERT_EQ(loco::shape_get(testcase.decode_node).as<loco::TensorShape>().dim(3), 4);
+}
+
+TEST(CanonicalShapeInferenceRuleTest, avgpool2d)
+{
+ using namespace loco;
+
+ // Create a sample network
+ GraphTestcase<GraphCode::AvgPool2D> testcase;
+
+ auto perm = make_NHWC_perm<Domain::Feature>();
+
+ testcase.pull_node->shape({1, 8, 4, 3});
+
+ testcase.encode_node->encoder(stdex::make_unique<PermutingEncoder<Domain::Feature>>(perm));
+
+ testcase.avgpool2d_node->window()->vertical(2);
+ testcase.avgpool2d_node->window()->horizontal(2);
+
+ testcase.avgpool2d_node->stride()->vertical(2);
+ testcase.avgpool2d_node->stride()->horizontal(2);
+
+ testcase.decode_node->decoder(stdex::make_unique<PermutingDecoder<Domain::Feature>>(perm));
+
+ // Run Inference
+ loco::CanonicalShapeInferenceRule rule;
+
+ loco::apply(&rule).to(testcase.graph());
+
+ // Verify!
+ //
+ // NOTE AvgPool2D testcase assumes NHWC layout
+ ASSERT_TRUE(loco::shape_known(testcase.avgpool2d_node));
+ ASSERT_EQ(loco::shape_get(testcase.avgpool2d_node).domain(), loco::Domain::Feature);
+ ASSERT_EQ(loco::shape_get(testcase.avgpool2d_node).as<FeatureShape>().count(), 1);
+ ASSERT_EQ(loco::shape_get(testcase.avgpool2d_node).as<FeatureShape>().depth(), 3);
+ ASSERT_EQ(loco::shape_get(testcase.avgpool2d_node).as<FeatureShape>().height(), 4);
+ ASSERT_EQ(loco::shape_get(testcase.avgpool2d_node).as<FeatureShape>().width(), 2);
+}
+
+TEST(CanonicalShapeInferenceRuleTest, depthwiseconv2d)
+{
+ using namespace loco;
+
+ // Create a sample network
+ GraphTestcase<GraphCode::DepthwiseConv2D> testcase;
+
+ testcase.pull_node->shape({1, 4, 4, 3});
+
+ testcase.const_node->dtype(loco::DataType::FLOAT32);
+ testcase.const_node->shape({2, 2, 3, 2});
+
+ testcase.depthwiseconv2d_node->stride()->vertical(1);
+ testcase.depthwiseconv2d_node->stride()->horizontal(1);
+
+ // Run Inference
+ loco::CanonicalShapeInferenceRule rule;
+
+ loco::apply(&rule).to(testcase.graph());
+
+ // Verify!
+ //
+ // NOTE DepthwiseConv2D testcase assumes NHWC layout
+ ASSERT_TRUE(loco::shape_known(testcase.depthwiseconv2d_node));
+ ASSERT_EQ(loco::shape_get(testcase.depthwiseconv2d_node).domain(), loco::Domain::Feature);
+ ASSERT_EQ(loco::shape_get(testcase.depthwiseconv2d_node).as<FeatureShape>().count(), 1);
+ ASSERT_EQ(loco::shape_get(testcase.depthwiseconv2d_node).as<FeatureShape>().depth(), 6);
+ ASSERT_EQ(loco::shape_get(testcase.depthwiseconv2d_node).as<FeatureShape>().height(), 3);
+ ASSERT_EQ(loco::shape_get(testcase.depthwiseconv2d_node).as<FeatureShape>().width(), 3);
+}
+
+TEST(CanonicalShapeInferenceRuleTest, transposedconv2d)
+{
+ using namespace loco;
+
+ // Create a sample network
+ GraphTestcase<GraphCode::TransposedConv2D> testcase;
+
+ testcase.pull_node->shape({1, 270, 480, 24}); // NHWC
+
+ testcase.const_node->dtype(loco::DataType::FLOAT32);
+ testcase.const_node->shape({3, 3, 24, 12}); // HWCN (or HWIO)
+
+ testcase.tr_conv2d_node->stride()->vertical(2);
+ testcase.tr_conv2d_node->stride()->horizontal(2);
+
+ testcase.tr_conv2d_node->pad()->top(0);
+ testcase.tr_conv2d_node->pad()->bottom(1);
+ testcase.tr_conv2d_node->pad()->left(0);
+ testcase.tr_conv2d_node->pad()->right(1);
+
+ // Run Inference
+ loco::CanonicalShapeInferenceRule rule;
+
+ loco::apply(&rule).to(testcase.graph());
+
+ // Verify!
+ ASSERT_TRUE(loco::shape_known(testcase.tr_conv2d_node));
+ ASSERT_EQ(loco::shape_get(testcase.tr_conv2d_node).domain(), loco::Domain::Feature);
+ ASSERT_EQ(loco::shape_get(testcase.tr_conv2d_node).as<FeatureShape>().count(), 1);
+ ASSERT_EQ(loco::shape_get(testcase.tr_conv2d_node).as<FeatureShape>().height(), 540);
+ ASSERT_EQ(loco::shape_get(testcase.tr_conv2d_node).as<FeatureShape>().width(), 960);
+ ASSERT_EQ(loco::shape_get(testcase.tr_conv2d_node).as<FeatureShape>().depth(), 12);
+}
+
+TEST(CanonicalShapeInferenceRuleTest, maxpool2d)
+{
+ using namespace loco;
+
+ // Create a sample network
+ GraphTestcase<GraphCode::MaxPool2D> testcase;
+
+ auto perm = make_NHWC_perm<Domain::Feature>();
+
+ testcase.pull_node->shape({1, 8, 4, 3});
+
+ testcase.encode_node->encoder(stdex::make_unique<PermutingEncoder<Domain::Feature>>(perm));
+
+ testcase.maxpool2d_node->window()->vertical(2);
+ testcase.maxpool2d_node->window()->horizontal(2);
+
+ testcase.maxpool2d_node->stride()->vertical(2);
+ testcase.maxpool2d_node->stride()->horizontal(2);
+
+ testcase.decode_node->decoder(stdex::make_unique<PermutingDecoder<Domain::Feature>>(perm));
+
+ // Run Inference
+ loco::CanonicalShapeInferenceRule rule;
+
+ loco::apply(&rule).to(testcase.graph());
+
+ // Verify!
+ //
+ // NOTE MaxPool2D testcase assumes NHWC layout
+ ASSERT_TRUE(loco::shape_known(testcase.maxpool2d_node));
+ ASSERT_EQ(loco::shape_get(testcase.maxpool2d_node).domain(), loco::Domain::Feature);
+ ASSERT_EQ(loco::shape_get(testcase.maxpool2d_node).as<FeatureShape>().count(), 1);
+ ASSERT_EQ(loco::shape_get(testcase.maxpool2d_node).as<FeatureShape>().depth(), 3);
+ ASSERT_EQ(loco::shape_get(testcase.maxpool2d_node).as<FeatureShape>().height(), 4);
+ ASSERT_EQ(loco::shape_get(testcase.maxpool2d_node).as<FeatureShape>().width(), 2);
+}
+
+TEST(CanonicalShapeInferenceRuleTest, tensor_concat)
+{
+ using namespace loco;
+
+ // Create a sample network
+ GraphTestcase<GraphCode::TensorConcat> testcase;
+
+ testcase.lhs_node->shape({1, 2, 3});
+ testcase.rhs_node->shape({1, 4, 3});
+ testcase.concat_node->axis(1);
+
+ // Run Inference
+ loco::CanonicalShapeInferenceRule rule;
+
+ loco::apply(&rule).to(testcase.graph());
+
+ // Verify!
+ ASSERT_TRUE(loco::shape_known(testcase.concat_node));
+ ASSERT_EQ(loco::shape_get(testcase.concat_node).domain(), loco::Domain::Tensor);
+ ASSERT_EQ(loco::shape_get(testcase.concat_node).as<TensorShape>().rank(), 3);
+ ASSERT_EQ(loco::shape_get(testcase.concat_node).as<TensorShape>().dim(0), 1);
+ ASSERT_EQ(loco::shape_get(testcase.concat_node).as<TensorShape>().dim(1), 6);
+ ASSERT_EQ(loco::shape_get(testcase.concat_node).as<TensorShape>().dim(2), 3);
+}
+
+TEST(CanonicalShapeInferenceRuleTest, fixed_reshape)
+{
+ // Create a sample network
+ GraphTestcase<GraphCode::FixedReshape> testcase;
+
+ testcase.pull_node->shape({6, 6});
+ testcase.reshape_node->shape({4, 9});
+
+ // Run Inference
+ loco::CanonicalShapeInferenceRule rule;
+
+ loco::apply(&rule).to(testcase.graph());
+
+ // Verify!
+ ASSERT_TRUE(loco::shape_known(testcase.push_node));
+ ASSERT_EQ(loco::shape_get(testcase.push_node).domain(), loco::Domain::Tensor);
+ ASSERT_EQ(loco::shape_get(testcase.push_node).as<loco::TensorShape>().rank(), 2);
+ ASSERT_EQ(loco::shape_get(testcase.push_node).as<loco::TensorShape>().dim(0), 4);
+ ASSERT_EQ(loco::shape_get(testcase.push_node).as<loco::TensorShape>().dim(1), 9);
+}
+
+TEST(CanonicalShapeInferenceRuleTest, tensor_broadcast)
+{
+ // Create a sample network
+ GraphTestcase<GraphCode::TensorBroadcast> testcase{1, 2};
+
+ testcase.broadcast_node->mapping()->dim(0) = 4;
+
+ // Run Inference
+ loco::CanonicalShapeInferenceRule rule;
+
+ loco::apply(&rule).to(testcase.graph());
+
+ // Verify!
+ ASSERT_TRUE(loco::shape_known(testcase.push_node));
+ ASSERT_EQ(loco::shape_get(testcase.push_node).domain(), loco::Domain::Tensor);
+ ASSERT_EQ(loco::shape_get(testcase.push_node).as<loco::TensorShape>().rank(), 2);
+ ASSERT_EQ(loco::shape_get(testcase.push_node).as<loco::TensorShape>().dim(0), 4);
+ ASSERT_EQ(loco::shape_get(testcase.push_node).as<loco::TensorShape>().dim(1), 2);
+}
+
+TEST(CanonicalShapeInferenceRuleTest, tensor_transpose)
+{
+ // Create a sample network
+ GraphTestcase<GraphCode::TensorTranspose> tc;
+
+ tc.pull_node->shape({10, 20, 30, 40});
+
+ tc.transpose_node->perm()->size(4);
+ tc.transpose_node->perm()->axis(0) = 2;
+ tc.transpose_node->perm()->axis(1) = 3;
+ tc.transpose_node->perm()->axis(2) = 0;
+ tc.transpose_node->perm()->axis(3) = 1;
+
+ // Run Inference
+ loco::CanonicalShapeInferenceRule rule;
+
+ loco::apply(&rule).to(tc.graph());
+
+ // Verify!
+ ASSERT_TRUE(loco::shape_known(tc.push_node));
+ ASSERT_EQ(loco::shape_get(tc.push_node).domain(), loco::Domain::Tensor);
+ ASSERT_EQ(loco::shape_get(tc.push_node).as<loco::TensorShape>().rank(), 4);
+ ASSERT_EQ(loco::shape_get(tc.push_node).as<loco::TensorShape>().dim(0), 30);
+ ASSERT_EQ(loco::shape_get(tc.push_node).as<loco::TensorShape>().dim(1), 40);
+ ASSERT_EQ(loco::shape_get(tc.push_node).as<loco::TensorShape>().dim(2), 10);
+ ASSERT_EQ(loco::shape_get(tc.push_node).as<loco::TensorShape>().dim(3), 20);
+}
+
+namespace
+{
+
+struct MockContext final : public loco::ShapeInferenceRule::Context
+{
+ bool known(const loco::Node *node) const final { return _content.find(node) != _content.end(); }
+ loco::NodeShape get(const loco::Node *node) const final { return _content.at(node); }
+
+ std::map<const loco::Node *, loco::NodeShape> _content;
+};
+
+struct MockSink final : public loco::ShapeInferenceRule::Sink
+{
+ void okay(const loco::NodeShape &res) final { shape = res; }
+ void fail(void) final { return; }
+
+ loco::NodeShape shape;
+};
+
+} // namespace
+
+TEST(CanonicalShapeInferenceRuleTest, infer_v2)
+{
+ auto g = loco::make_graph();
+
+ // Create an incomplete graph
+ auto relu_1 = g->nodes()->create<loco::ReLU>();
+ auto relu_2 = g->nodes()->create<loco::ReLU>();
+
+ relu_2->input(relu_1);
+
+ // Set up Context
+ MockContext ctx;
+
+ loco::TensorShape tensor_shape;
+
+ tensor_shape.rank(2);
+ tensor_shape.dim(0) = 4;
+ tensor_shape.dim(1) = 5;
+
+ ctx._content[relu_1] = tensor_shape;
+
+ // Create a Sink
+ MockSink sink;
+
+ loco::CanonicalShapeInferenceRule rule;
+
+ rule.infer(&ctx, relu_2, &sink);
+
+ ASSERT_EQ(sink.shape.domain(), loco::Domain::Tensor);
+ ASSERT_EQ(sink.shape.as<loco::TensorShape>().rank(), 2);
+ ASSERT_EQ(sink.shape.as<loco::TensorShape>().dim(0), 4);
+ ASSERT_EQ(sink.shape.as<loco::TensorShape>().dim(1), 5);
+}
diff --git a/compiler/loco/src/Service/GraphBuilder.h b/compiler/loco/src/Service/GraphBuilder.h
new file mode 100644
index 000000000..71084673c
--- /dev/null
+++ b/compiler/loco/src/Service/GraphBuilder.h
@@ -0,0 +1,547 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __GRAPH_BUILDER_H__
+#define __GRAPH_BUILDER_H__
+
+// loco-internal headers
+#include "loco/IR/Graph.h"
+
+// repo-internal headers
+#include <stdex/Memory.h>
+
+// C++ standard headers
+#include <stack>
+
+//
+// This file includes a stack-based loco graph builder
+//
+// HOW TO USE
+//
+// loco::Graph *g = ...
+// auto builder = make_graph_builder(g);
+//
+// builder->push<YourAwesomeLayer>(...);
+//
+
+class GraphBuilder final
+{
+public:
+ class Stack final
+ {
+ public:
+ Stack() = default;
+
+ public:
+ loco::Node *top(void) const { return _content.top(); }
+
+ public:
+ loco::Node *pop(void)
+ {
+ auto ret = top();
+ _content.pop();
+ return ret;
+ }
+
+ public:
+ void push(loco::Node *node) { _content.push(node); }
+
+ private:
+ std::stack<loco::Node *> _content;
+ };
+
+ class Context final
+ {
+ public:
+ Context(loco::Graph *graph) : _graph{graph}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ loco::Graph *graph(void) { return _graph; }
+ Stack *stack(void) { return &_stack; }
+
+ private:
+ loco::Graph *_graph = nullptr;
+ Stack _stack;
+ };
+
+public:
+ GraphBuilder(loco::Graph *graph) : _context{graph}
+ {
+ // DO NOTHING
+ }
+
+public:
+ // "Layer" is in theory a subgraph builder.
+ template <typename Layer, typename... Args>
+ auto push(Args &&... args)
+ -> decltype(static_cast<Layer *>(nullptr)->operator()(static_cast<Context *>(nullptr)))
+ {
+ Layer layer{std::forward<Args>(args)...};
+ return layer(ctx());
+ }
+
+public:
+ loco::Node *pop(void) { return ctx()->stack()->pop(); }
+
+private:
+ Context *ctx(void) { return &_context; }
+
+private:
+ Context _context;
+};
+
+static inline std::unique_ptr<GraphBuilder> make_graph_builder(loco::Graph *g)
+{
+ return stdex::make_unique<GraphBuilder>(g);
+}
+
+// "InputLayer" creates both GraphInput and Pull node at once
+struct InputLayer final
+{
+ class Return
+ {
+ public:
+ Return(loco::GraphInput *input, loco::Pull *node) : _input{input}, _node{node}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ loco::Pull *node(void) { return _node; }
+
+ public:
+ Return *name(const std::string &value)
+ {
+ _input->name(value);
+ return this;
+ }
+
+ public:
+ Return *shape(std::initializer_list<uint32_t> dims)
+ {
+ // TODO Uncomment this line when GraphInput is ready
+ // _graph_input->shape(dims)
+ _node->shape(dims);
+ return this;
+ }
+
+ private:
+ loco::GraphInput *_input = nullptr;
+ loco::Pull *_node = nullptr;
+ };
+
+ std::unique_ptr<Return> operator()(GraphBuilder::Context *ctx)
+ {
+ auto input_index = ctx->graph()->inputs()->size();
+ auto graph_input = ctx->graph()->inputs()->create();
+
+ auto pull_node = ctx->graph()->nodes()->create<loco::Pull>();
+
+ pull_node->index(input_index);
+
+ loco::link(graph_input, pull_node);
+
+ ctx->stack()->push(pull_node);
+
+ return stdex::make_unique<Return>(graph_input, pull_node);
+ }
+};
+
+// "OutputLayer" creates both GraphOutput and Push node at once.
+struct OutputLayer final
+{
+ class Return
+ {
+ public:
+ Return(loco::GraphOutput *output, loco::Push *node) : _output{output}, _node{node}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ loco::Push *node(void) { return _node; }
+
+ public:
+ Return *name(const std::string &value)
+ {
+ // TODO Uncomment this line when GraphOutput is ready
+ // _graph_output->shape(dims)
+ _output->name(value);
+ return this;
+ }
+
+ private:
+ loco::GraphOutput *_output = nullptr;
+ loco::Push *_node = nullptr;
+ };
+
+ std::unique_ptr<Return> operator()(GraphBuilder::Context *ctx)
+ {
+ auto output_index = ctx->graph()->outputs()->size();
+ auto graph_output = ctx->graph()->outputs()->create();
+
+ auto push_node = ctx->graph()->nodes()->create<loco::Push>();
+
+ push_node->from(ctx->stack()->pop());
+ push_node->index(output_index);
+
+ loco::link(graph_output, push_node);
+
+ ctx->stack()->push(push_node);
+
+ return stdex::make_unique<Return>(graph_output, push_node);
+ }
+};
+
+struct ReLULayer final
+{
+ // This "Return" is unnecessary for ReLU as ReLU has no attributes), but
+ // introduced for consistency.
+ class Return
+ {
+ public:
+ Return(loco::ReLU *node) : _node{node}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ loco::ReLU *node(void) { return _node; }
+
+ private:
+ loco::ReLU *_node = nullptr;
+ };
+
+ std::unique_ptr<Return> operator()(GraphBuilder::Context *ctx)
+ {
+ auto relu_node = ctx->graph()->nodes()->create<loco::ReLU>();
+
+ relu_node->input(ctx->stack()->pop());
+
+ ctx->stack()->push(relu_node);
+
+ return stdex::make_unique<Return>(relu_node);
+ }
+};
+
+struct ConstGenLayer final
+{
+ class Return
+ {
+ public:
+ Return(loco::ConstGen *node) : _node{node}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ loco::ConstGen *node(void) { return _node; }
+
+ private:
+ loco::ConstGen *_node = nullptr;
+ };
+
+ std::unique_ptr<Return> operator()(GraphBuilder::Context *ctx)
+ {
+ auto const_node = ctx->graph()->nodes()->create<loco::ConstGen>();
+
+ ctx->stack()->push(const_node);
+
+ return stdex::make_unique<Return>(const_node);
+ }
+};
+
+#include "loco/IR/PermutingCodec.h"
+
+struct FeatureEncodeLayer final
+{
+ class Return
+ {
+ public:
+ Return(loco::FeatureEncode *node) : _node{node}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ Return *perm(const loco::Permutation<loco::Domain::Feature> &perm)
+ {
+ using namespace loco;
+ _node->encoder(stdex::make_unique<PermutingEncoder<Domain::Feature>>(perm));
+ return this;
+ }
+
+ public:
+ loco::FeatureEncode *node(void) { return _node; }
+
+ private:
+ loco::FeatureEncode *_node;
+ };
+
+ std::unique_ptr<Return> operator()(GraphBuilder::Context *ctx)
+ {
+ auto encode_node = ctx->graph()->nodes()->create<loco::FeatureEncode>();
+
+ encode_node->input(ctx->stack()->pop());
+
+ ctx->stack()->push(encode_node);
+
+ return stdex::make_unique<Return>(encode_node);
+ }
+};
+
+struct FeatureDecodeLayer final
+{
+ class Return
+ {
+ public:
+ Return(loco::FeatureDecode *node) : _node{node}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ Return *perm(const loco::Permutation<loco::Domain::Feature> &perm)
+ {
+ using namespace loco;
+ _node->decoder(stdex::make_unique<PermutingDecoder<Domain::Feature>>(perm));
+ return this;
+ }
+
+ public:
+ loco::FeatureDecode *node(void) { return _node; }
+
+ private:
+ loco::FeatureDecode *_node;
+ };
+
+ std::unique_ptr<Return> operator()(GraphBuilder::Context *ctx)
+ {
+ using namespace loco;
+
+ auto decode_node = ctx->graph()->nodes()->create<FeatureDecode>();
+
+ decode_node->input(ctx->stack()->pop());
+
+ ctx->stack()->push(decode_node);
+
+ return stdex::make_unique<Return>(decode_node);
+ }
+};
+
+struct FilterEncodeLayer final
+{
+ class Return
+ {
+ public:
+ Return(loco::FilterEncode *node) : _node{node}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ Return *perm(const loco::Permutation<loco::Domain::Filter> &perm)
+ {
+ auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+ encoder->perm(perm);
+ _node->encoder(std::move(encoder));
+ return this;
+ }
+
+ public:
+ loco::FilterEncode *node(void) { return _node; }
+
+ private:
+ loco::FilterEncode *_node;
+ };
+
+ std::unique_ptr<Return> operator()(GraphBuilder::Context *ctx)
+ {
+ auto encode_node = ctx->graph()->nodes()->create<loco::FilterEncode>();
+
+ encode_node->input(ctx->stack()->pop());
+
+ ctx->stack()->push(encode_node);
+
+ return stdex::make_unique<Return>(encode_node);
+ }
+};
+
+struct DepthwiseFilterEncodeLayer final
+{
+ class Return
+ {
+ public:
+ Return(loco::DepthwiseFilterEncode *node) : _node{node}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ Return *perm(const loco::Permutation<loco::Domain::DepthwiseFilter> &perm)
+ {
+ using namespace loco;
+ _node->encoder(stdex::make_unique<PermutingEncoder<Domain::DepthwiseFilter>>(perm));
+ return this;
+ }
+
+ public:
+ loco::DepthwiseFilterEncode *node(void) { return _node; }
+
+ private:
+ loco::DepthwiseFilterEncode *_node;
+ };
+
+ std::unique_ptr<Return> operator()(GraphBuilder::Context *ctx)
+ {
+ auto encode_node = ctx->graph()->nodes()->create<loco::DepthwiseFilterEncode>();
+
+ encode_node->input(ctx->stack()->pop());
+
+ ctx->stack()->push(encode_node);
+
+ return stdex::make_unique<Return>(encode_node);
+ }
+};
+
+struct DepthwiseConv2DLayer final
+{
+ class Return
+ {
+ public:
+ Return(loco::DepthwiseConv2D *node) : _node{node}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ loco::DepthwiseConv2D *node(void) { return _node; }
+
+ private:
+ loco::DepthwiseConv2D *_node;
+ };
+
+ std::unique_ptr<Return> operator()(GraphBuilder::Context *ctx)
+ {
+ auto depthwiseconv2d_node = ctx->graph()->nodes()->create<loco::DepthwiseConv2D>();
+
+ depthwiseconv2d_node->ker(ctx->stack()->pop());
+ depthwiseconv2d_node->ifm(ctx->stack()->pop());
+
+ ctx->stack()->push(depthwiseconv2d_node);
+
+ return stdex::make_unique<Return>(depthwiseconv2d_node);
+ }
+};
+
+struct TransposedConv2DLayer final
+{
+ class Return
+ {
+ public:
+ Return(loco::TransposedConv2D *node) : _node{node}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ loco::TransposedConv2D *node(void) { return _node; }
+
+ private:
+ loco::TransposedConv2D *_node;
+ };
+
+ std::unique_ptr<Return> operator()(GraphBuilder::Context *ctx)
+ {
+ auto tr_conv2d_node = ctx->graph()->nodes()->create<loco::TransposedConv2D>();
+
+ tr_conv2d_node->ker(ctx->stack()->pop());
+ tr_conv2d_node->ifm(ctx->stack()->pop());
+
+ ctx->stack()->push(tr_conv2d_node);
+
+ return stdex::make_unique<Return>(tr_conv2d_node);
+ }
+};
+
+struct FixedReshapeLayer final
+{
+ class Return
+ {
+ public:
+ Return(loco::FixedReshape *node) : _node{node}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ Return *shape(std::initializer_list<uint32_t> dims)
+ {
+ _node->shape(dims);
+ return this;
+ }
+
+ public:
+ loco::FixedReshape *node(void) { return _node; }
+
+ private:
+ loco::FixedReshape *_node = nullptr;
+ };
+
+ std::unique_ptr<Return> operator()(GraphBuilder::Context *ctx)
+ {
+ auto reshape_node = ctx->graph()->nodes()->create<loco::FixedReshape>();
+
+ reshape_node->input(ctx->stack()->pop());
+
+ ctx->stack()->push(reshape_node);
+
+ return stdex::make_unique<Return>(reshape_node);
+ }
+};
+
+struct TensorBroadcastLayer final
+{
+ class Return
+ {
+ public:
+ Return(loco::TensorBroadcast *node) : _node{node}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ loco::TensorBroadcast *node(void) { return _node; }
+
+ private:
+ loco::TensorBroadcast *_node = nullptr;
+ };
+
+ std::unique_ptr<Return> operator()(GraphBuilder::Context *ctx)
+ {
+ auto broadcast_node = ctx->graph()->nodes()->create<loco::TensorBroadcast>();
+
+ broadcast_node->input(ctx->stack()->pop());
+ ctx->stack()->push(broadcast_node);
+
+ return stdex::make_unique<Return>(broadcast_node);
+ }
+};
+
+#endif // __GRAPH_BUILDER_H__
diff --git a/compiler/loco/src/Service/GraphBuilder.test.cpp b/compiler/loco/src/Service/GraphBuilder.test.cpp
new file mode 100644
index 000000000..7b2ea5198
--- /dev/null
+++ b/compiler/loco/src/Service/GraphBuilder.test.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GraphBuilder.h"
+
+#include "loco/IR/Nodes.h"
+#include "loco/IR/CanonicalDialect.h"
+#include "loco/IR/CanonicalOpcode.h"
+
+#include <gtest/gtest.h>
+
+TEST(GraphBuilderTest, Usecase_000)
+{
+ struct SampleLayer final
+ {
+ loco::Node *operator()(GraphBuilder::Context *ctx)
+ {
+ auto node = ctx->graph()->nodes()->create<loco::ConstGen>();
+ ctx->stack()->push(node);
+ return node;
+ }
+ };
+
+ auto g = loco::make_graph();
+ auto gbuilder = make_graph_builder(g.get());
+
+ gbuilder->push<SampleLayer>();
+
+ auto node = gbuilder->pop();
+
+ ASSERT_EQ(g->nodes()->size(), 1);
+ ASSERT_EQ(node->dialect(), loco::CanonicalDialect::get());
+ ASSERT_EQ(node->opnum(), static_cast<uint32_t>(loco::CanonicalOpcode::ConstGen));
+}
diff --git a/compiler/loco/src/Service/GraphTestcase.h b/compiler/loco/src/Service/GraphTestcase.h
new file mode 100644
index 000000000..6743b9a14
--- /dev/null
+++ b/compiler/loco/src/Service/GraphTestcase.h
@@ -0,0 +1,541 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __GRAPH_TESTCASE_H__
+#define __GRAPH_TESTCASE_H__
+
+#include "loco/IR/Graph.h"
+#include "loco/IR/PermutingCodec.h"
+
+#include "GraphBuilder.h"
+
+#include <stdex/Memory.h>
+
+enum class GraphCode
+{
+ Identity,
+ ConstGen,
+ Relu,
+ FeatureCodec,
+ AvgPool2D,
+ DepthwiseConv2D,
+ TransposedConv2D,
+ MaxPool2D,
+ TensorBroadcast,
+ TensorConcat,
+ TensorTranspose,
+ FixedReshape,
+};
+
+namespace
+{
+
+template <loco::Domain D> loco::Permutation<D> make_NHWC_perm(void);
+
+template <> loco::Permutation<loco::Domain::Feature> make_NHWC_perm(void)
+{
+ loco::Permutation<loco::Domain::Feature> perm;
+
+ perm[loco::FeatureAxis::Count] = 0;
+ perm[loco::FeatureAxis::Height] = 1;
+ perm[loco::FeatureAxis::Width] = 2;
+ perm[loco::FeatureAxis::Depth] = 3;
+
+ return perm;
+}
+
+template <loco::Domain D> loco::Permutation<D> make_HWCN_perm(void);
+
+// @note Also known as HWIO permutation
+template <> loco::Permutation<loco::Domain::Filter> make_HWCN_perm(void)
+{
+ loco::Permutation<loco::Domain::Filter> perm;
+
+ perm[loco::FilterAxis::Height] = 0;
+ perm[loco::FilterAxis::Width] = 1;
+ perm[loco::FilterAxis::Depth] = 2;
+ perm[loco::FilterAxis::Count] = 3;
+
+ return perm;
+}
+
+template <loco::Domain D> loco::Permutation<D> make_HWCM_perm(void);
+
+template <> loco::Permutation<loco::Domain::DepthwiseFilter> make_HWCM_perm(void)
+{
+ loco::Permutation<loco::Domain::DepthwiseFilter> perm;
+
+ perm[loco::DepthwiseFilterAxis::Height] = 0;
+ perm[loco::DepthwiseFilterAxis::Width] = 1;
+ perm[loco::DepthwiseFilterAxis::Depth] = 2;
+ perm[loco::DepthwiseFilterAxis::Multiplier] = 3;
+
+ return perm;
+}
+
+} // namespace
+
+template <GraphCode Code> class GraphTestcase;
+
+template <> class GraphTestcase<GraphCode::Identity> final
+{
+private:
+ void init(std::initializer_list<uint32_t> dims)
+ {
+ // Create a sample network
+ _graph = loco::make_graph();
+
+ auto graph_builder = make_graph_builder(_graph.get());
+
+ pull_node = graph_builder->push<InputLayer>()->name("input")->shape(dims)->node();
+ push_node = graph_builder->push<OutputLayer>()->name("output")->node();
+ }
+
+public:
+ // NOTE This default constructor guarantees backward compatbility.
+ GraphTestcase() { init({1, 4, 8, 3}); }
+ GraphTestcase(std::initializer_list<uint32_t> dims) { init(dims); }
+
+public:
+ loco::Graph *graph() { return _graph.get(); }
+
+ loco::Pull *pull_node = nullptr;
+ loco::Push *push_node = nullptr;
+
+private:
+ std::unique_ptr<loco::Graph> _graph;
+};
+
+template <> class GraphTestcase<GraphCode::ConstGen> final
+{
+public:
+ GraphTestcase()
+ {
+ _graph = loco::make_graph();
+
+ auto graph_builder = make_graph_builder(_graph.get());
+
+ const_node = graph_builder->push<ConstGenLayer>()->node();
+
+ push_node = graph_builder->push<OutputLayer>()->name("output")->node();
+ }
+
+public:
+ loco::Graph *graph() { return _graph.get(); }
+
+ loco::ConstGen *const_node = nullptr;
+ loco::Push *push_node = nullptr;
+
+private:
+ std::unique_ptr<loco::Graph> _graph;
+};
+
+template <> class GraphTestcase<GraphCode::Relu> final
+{
+public:
+ GraphTestcase()
+ {
+ // Create a sample network
+ _graph = loco::make_graph();
+
+ auto graph_builder = make_graph_builder(_graph.get());
+
+ pull_node = graph_builder->push<InputLayer>()->name("input")->node();
+ relu_node = graph_builder->push<ReLULayer>()->node();
+ push_node = graph_builder->push<OutputLayer>()->name("output")->node();
+ }
+
+public:
+ loco::Graph *graph() { return _graph.get(); }
+
+ loco::Pull *pull_node = nullptr;
+ loco::ReLU *relu_node = nullptr;
+ loco::Push *push_node = nullptr;
+
+private:
+ std::unique_ptr<loco::Graph> _graph;
+};
+
+template <> class GraphTestcase<GraphCode::FeatureCodec> final
+{
+public:
+ GraphTestcase()
+ {
+ using namespace loco;
+
+ Permutation<Domain::Feature> perm;
+
+ perm[FeatureAxis::Count] = 0;
+ perm[FeatureAxis::Height] = 1;
+ perm[FeatureAxis::Width] = 2;
+ perm[FeatureAxis::Depth] = 3;
+
+ // Create a sample network
+ _graph = make_graph();
+
+ auto graph_builder = make_graph_builder(_graph.get());
+
+ pull_node = graph_builder->push<InputLayer>()->name("input")->node();
+ encode_node = graph_builder->push<FeatureEncodeLayer>()->perm(perm)->node();
+ decode_node = graph_builder->push<FeatureDecodeLayer>()->perm(perm)->node();
+ push_node = graph_builder->push<OutputLayer>()->name("output")->node();
+ }
+
+public:
+ loco::Graph *graph() { return _graph.get(); }
+
+ loco::Pull *pull_node = nullptr;
+ loco::FeatureEncode *encode_node = nullptr;
+ loco::FeatureDecode *decode_node = nullptr;
+ loco::Push *push_node = nullptr;
+
+private:
+ std::unique_ptr<loco::Graph> _graph;
+};
+
+template <> class GraphTestcase<GraphCode::AvgPool2D> final
+{
+public:
+ GraphTestcase()
+ {
+ using namespace loco;
+
+ // Create a sample network
+ _graph = make_graph();
+
+ // Create Graph Input/Output
+ auto graph_input = _graph->inputs()->create();
+ auto graph_output = _graph->outputs()->create();
+
+ graph_input->name("input");
+ graph_output->name("output");
+
+ // Create and connect nodes
+ pull_node = _graph->nodes()->create<Pull>();
+ pull_node->index(0);
+
+ encode_node = _graph->nodes()->create<FeatureEncode>();
+ encode_node->input(pull_node);
+
+ avgpool2d_node = _graph->nodes()->create<AvgPool2D>();
+ avgpool2d_node->ifm(encode_node);
+
+ decode_node = _graph->nodes()->create<FeatureDecode>();
+ decode_node->input(avgpool2d_node);
+
+ push_node = _graph->nodes()->create<loco::Push>();
+ push_node->index(0);
+ push_node->from(decode_node);
+
+ // Create a link between input/output and corresponding nodes
+ loco::link(graph_input, pull_node);
+ loco::link(graph_output, push_node);
+ }
+
+public:
+ loco::Graph *graph() { return _graph.get(); }
+
+ loco::Pull *pull_node = nullptr;
+ loco::FeatureEncode *encode_node = nullptr;
+ loco::AvgPool2D *avgpool2d_node = nullptr;
+ loco::FeatureDecode *decode_node = nullptr;
+ loco::Push *push_node = nullptr;
+
+private:
+ std::unique_ptr<loco::Graph> _graph;
+};
+
+template <> class GraphTestcase<GraphCode::DepthwiseConv2D> final
+{
+public:
+ GraphTestcase()
+ {
+ using namespace loco;
+
+ _graph = make_graph();
+
+ auto graph_builder = make_graph_builder(_graph.get());
+
+ Permutation<Domain::Feature> perm = make_NHWC_perm<Domain::Feature>();
+ Permutation<Domain::DepthwiseFilter> filter_perm = make_HWCM_perm<Domain::DepthwiseFilter>();
+
+ pull_node = graph_builder->push<InputLayer>()->name("input")->node();
+ encode_node = graph_builder->push<FeatureEncodeLayer>()->perm(perm)->node();
+
+ const_node = graph_builder->push<ConstGenLayer>()->node();
+
+ filter_encode_node =
+ graph_builder->push<DepthwiseFilterEncodeLayer>()->perm(filter_perm)->node();
+
+ depthwiseconv2d_node = graph_builder->push<DepthwiseConv2DLayer>()->node();
+
+ decode_node = graph_builder->push<FeatureDecodeLayer>()->perm(perm)->node();
+ push_node = graph_builder->push<OutputLayer>()->name("output")->node();
+ }
+
+public:
+ loco::Graph *graph() { return _graph.get(); }
+
+ loco::Pull *pull_node = nullptr;
+ loco::FeatureEncode *encode_node = nullptr;
+ loco::ConstGen *const_node = nullptr;
+ loco::DepthwiseFilterEncode *filter_encode_node = nullptr;
+ loco::DepthwiseConv2D *depthwiseconv2d_node = nullptr;
+ loco::FeatureDecode *decode_node = nullptr;
+ loco::Push *push_node = nullptr;
+
+private:
+ std::unique_ptr<loco::Graph> _graph;
+};
+
+template <> class GraphTestcase<GraphCode::TransposedConv2D> final
+{
+public:
+ GraphTestcase()
+ {
+ using namespace loco;
+
+ // Prepare permutations
+ Permutation<Domain::Feature> feature_perm = make_NHWC_perm<Domain::Feature>();
+ Permutation<Domain::Filter> filter_perm = make_HWCN_perm<Domain::Filter>();
+
+ // Build graph
+ _graph = make_graph();
+ auto graph_builder = make_graph_builder(_graph.get());
+
+ pull_node = graph_builder->push<InputLayer>()->name("input")->node();
+ encode_node = graph_builder->push<FeatureEncodeLayer>()->perm(feature_perm)->node();
+ const_node = graph_builder->push<ConstGenLayer>()->node();
+ filter_encode_node = graph_builder->push<FilterEncodeLayer>()->perm(filter_perm)->node();
+ tr_conv2d_node = graph_builder->push<TransposedConv2DLayer>()->node();
+ decode_node = graph_builder->push<FeatureDecodeLayer>()->perm(feature_perm)->node();
+ push_node = graph_builder->push<OutputLayer>()->name("output")->node();
+ }
+
+public:
+ loco::Graph *graph() { return _graph.get(); }
+
+ loco::Pull *pull_node = nullptr;
+ loco::FeatureEncode *encode_node = nullptr;
+ loco::ConstGen *const_node = nullptr;
+ loco::FilterEncode *filter_encode_node = nullptr;
+ loco::TransposedConv2D *tr_conv2d_node = nullptr;
+ loco::FeatureDecode *decode_node = nullptr;
+ loco::Push *push_node = nullptr;
+
+private:
+ std::unique_ptr<loco::Graph> _graph;
+};
+
+template <> class GraphTestcase<GraphCode::MaxPool2D> final
+{
+public:
+ GraphTestcase()
+ {
+ using namespace loco;
+
+ // Create a sample network
+ _graph = make_graph();
+
+ // Create Graph Input/Output
+ auto graph_input = _graph->inputs()->create();
+ auto graph_output = _graph->outputs()->create();
+
+ graph_input->name("input");
+ graph_output->name("output");
+
+ // Create and connect nodes
+ pull_node = _graph->nodes()->create<Pull>();
+ pull_node->index(0);
+
+ encode_node = _graph->nodes()->create<FeatureEncode>();
+ encode_node->input(pull_node);
+
+ maxpool2d_node = _graph->nodes()->create<MaxPool2D>();
+ maxpool2d_node->ifm(encode_node);
+
+ decode_node = _graph->nodes()->create<FeatureDecode>();
+ decode_node->input(maxpool2d_node);
+
+ push_node = _graph->nodes()->create<loco::Push>();
+ push_node->index(0);
+ push_node->from(decode_node);
+
+ // Create a link between input/output and corresponding nodes
+ loco::link(graph_input, pull_node);
+ loco::link(graph_output, push_node);
+ }
+
+public:
+ loco::Graph *graph() { return _graph.get(); }
+
+ loco::Pull *pull_node = nullptr;
+ loco::FeatureEncode *encode_node = nullptr;
+ loco::MaxPool2D *maxpool2d_node = nullptr;
+ loco::FeatureDecode *decode_node = nullptr;
+ loco::Push *push_node = nullptr;
+
+private:
+ std::unique_ptr<loco::Graph> _graph;
+};
+
+template <> class GraphTestcase<GraphCode::TensorConcat> final
+{
+public:
+ GraphTestcase()
+ {
+ using namespace loco;
+
+ // Create a sample network
+ _graph = make_graph();
+
+ // Create Graph Input/Output
+ auto graph_lhs = _graph->inputs()->create();
+ auto graph_rhs = _graph->inputs()->create();
+ auto graph_out = _graph->outputs()->create();
+
+ graph_lhs->name("lhs");
+ graph_rhs->name("rhs");
+ graph_out->name("output");
+
+ // Create and connect nodes
+ lhs_node = _graph->nodes()->create<Pull>();
+ lhs_node->index(0);
+
+ rhs_node = _graph->nodes()->create<Pull>();
+ rhs_node->index(1);
+
+ concat_node = _graph->nodes()->create<TensorConcat>();
+ concat_node->lhs(lhs_node);
+ concat_node->rhs(rhs_node);
+
+ push_node = _graph->nodes()->create<loco::Push>();
+ push_node->index(0);
+ push_node->from(concat_node);
+
+ // Create a link between input/output and corresponding nodes
+ loco::link(graph_lhs, lhs_node);
+ loco::link(graph_rhs, rhs_node);
+ loco::link(graph_out, push_node);
+ }
+
+public:
+ loco::Graph *graph() { return _graph.get(); }
+
+ loco::Pull *lhs_node = nullptr;
+ loco::Pull *rhs_node = nullptr;
+ loco::TensorConcat *concat_node = nullptr;
+ loco::Push *push_node = nullptr;
+
+private:
+ std::unique_ptr<loco::Graph> _graph;
+};
+
+template <> class GraphTestcase<GraphCode::FixedReshape> final
+{
+public:
+ GraphTestcase()
+ {
+ _graph = loco::make_graph();
+
+ auto graph_builder = make_graph_builder(_graph.get());
+
+ pull_node = graph_builder->push<InputLayer>()->name("input")->node();
+ reshape_node = graph_builder->push<FixedReshapeLayer>()->node();
+ push_node = graph_builder->push<OutputLayer>()->name("output")->node();
+ }
+
+public:
+ loco::Graph *graph() { return _graph.get(); }
+
+ loco::Pull *pull_node = nullptr;
+ loco::FixedReshape *reshape_node = nullptr;
+ loco::Push *push_node = nullptr;
+
+private:
+ std::unique_ptr<loco::Graph> _graph;
+};
+
+template <> class GraphTestcase<GraphCode::TensorBroadcast> final
+{
+public:
+ GraphTestcase(std::initializer_list<uint32_t> dims)
+ {
+ _graph = loco::make_graph();
+
+ auto graph_builder = make_graph_builder(_graph.get());
+
+ pull_node = graph_builder->push<InputLayer>()->name("input")->shape(dims)->node();
+ broadcast_node = graph_builder->push<TensorBroadcastLayer>()->node();
+ push_node = graph_builder->push<OutputLayer>()->name("output")->node();
+ }
+
+public:
+ loco::Graph *graph(void) { return _graph.get(); }
+
+ loco::Pull *pull_node = nullptr;
+ loco::TensorBroadcast *broadcast_node = nullptr;
+ loco::Push *push_node = nullptr;
+
+private:
+ std::unique_ptr<loco::Graph> _graph;
+};
+
+template <> class GraphTestcase<GraphCode::TensorTranspose> final
+{
+public:
+ GraphTestcase()
+ {
+ using namespace loco;
+
+ // Create a sample network
+ _graph = make_graph();
+
+ // Create Graph Input/Output
+ auto graph_input = _graph->inputs()->create();
+ auto graph_output = _graph->outputs()->create();
+
+ graph_input->name("input");
+ graph_output->name("output");
+
+ // Create and connect nodes
+ pull_node = _graph->nodes()->create<Pull>();
+ pull_node->index(0);
+
+ transpose_node = _graph->nodes()->create<TensorTranspose>();
+ transpose_node->input(pull_node);
+
+ push_node = _graph->nodes()->create<loco::Push>();
+ push_node->index(0);
+ push_node->from(transpose_node);
+
+ // Create a link between input/output and corresponding nodes
+ loco::link(graph_input, pull_node);
+ loco::link(graph_output, push_node);
+ }
+
+public:
+ loco::Graph *graph() { return _graph.get(); }
+
+ loco::Pull *pull_node = nullptr;
+ loco::TensorTranspose *transpose_node = nullptr;
+ loco::Push *push_node = nullptr;
+
+private:
+ std::unique_ptr<loco::Graph> _graph;
+};
+
+#endif // __GRAPH_TESTCASE_H__
diff --git a/compiler/loco/src/Service/MultiDialectShapeInferenceRule.cpp b/compiler/loco/src/Service/MultiDialectShapeInferenceRule.cpp
new file mode 100644
index 000000000..2178f5d05
--- /dev/null
+++ b/compiler/loco/src/Service/MultiDialectShapeInferenceRule.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/Service/MultiDialectShapeInferenceRule.h"
+#include "loco/Service/ShapeInferenceRule.h"
+
+#include <loco/IR/Dialect.h>
+#include <loco/IR/Node.h>
+#include <loco/IR/NodeShape.h>
+
+#include <cassert>
+
+namespace loco
+{
+
+bool MultiDialectShapeInferenceRule::recognize(const Dialect *d) const
+{
+ const auto found = _rules.find(d);
+
+ if (found == _rules.cend())
+ return false;
+
+ auto rule = found->second;
+ auto result = rule->recognize(d);
+
+ return result;
+}
+
+bool MultiDialectShapeInferenceRule::infer(const Node *node, NodeShape &shape) const
+{
+ const auto found = _rules.find(node->dialect());
+
+ if (found == _rules.cend())
+ return false;
+
+ auto rule = found->second;
+ if (rule->infer(node, shape))
+ return true;
+
+ return false;
+}
+
+MultiDialectShapeInferenceRule &MultiDialectShapeInferenceRule::bind(const Dialect *d,
+ const ShapeInferenceRule *rule)
+{
+ assert(_rules.find(d) == _rules.end());
+ assert(rule->recognize(d));
+
+ _rules[d] = rule;
+
+ return (*this);
+}
+
+} // namespace loco
diff --git a/compiler/loco/src/Service/MultiDialectShapeInferenceRule.test.cpp b/compiler/loco/src/Service/MultiDialectShapeInferenceRule.test.cpp
new file mode 100644
index 000000000..ffa9ee5ca
--- /dev/null
+++ b/compiler/loco/src/Service/MultiDialectShapeInferenceRule.test.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/Service/CanonicalShapeInferenceRule.h"
+#include "loco/Service/MultiDialectShapeInferenceRule.h"
+#include "loco/Service/ShapeInference.h"
+
+#include <loco/IR/Dialect.h>
+#include <loco/IR/CanonicalDialect.h>
+
+#include <gtest/gtest.h>
+
+#include <cassert>
+#include <vector>
+
+// mockup for MultiDialectShapeInferenceRule
+// Each class is dedicated for handling shape { D1, D2 } and D1, D2 are declared as a template
+namespace
+{
+
+template <uint32_t D1, uint32_t D2> class TestDialect final : public loco::Dialect
+{
+public:
+ static Dialect *get(void)
+ {
+ static TestDialect<D1, D2> d;
+ return &d;
+ }
+};
+
+template <uint32_t D1, uint32_t D2>
+struct TestOpNode final : public loco::FixedArity<1>::Mixin<loco::Node>,
+ public loco::NodeMixin<loco::NodeTrait::TensorShape>
+{
+ void input(Node *node) { at(0)->node(node); }
+ const loco::Dialect *dialect(void) const final { return TestDialect<D1, D2>::get(); }
+ uint32_t opnum(void) const final { return static_cast<uint32_t>(D1); /* not used */ }
+};
+
+template <uint32_t D1, uint32_t D2>
+struct TestShapeInferenceRule final : public loco::ShapeInferenceRule
+{
+public:
+ bool recognize(const loco::Dialect *d) const final { return (d == TestDialect<D1, D2>::get()); }
+
+ bool infer(const loco::Node *node, loco::NodeShape &node_shape) const final
+ {
+ assert(recognize(node->dialect()));
+ auto test_node = dynamic_cast<const TestOpNode<D1, D2> *>(node);
+ assert(test_node != nullptr);
+
+ loco::TensorShape ts;
+ {
+ ts.rank(2);
+ ts.dim(0) = D1;
+ ts.dim(1) = D2; // making shape : { D1, D2 }
+ }
+
+ node_shape.set(ts);
+
+ return true;
+ }
+};
+
+} // namespace
+
+TEST(MultiDialectShapeInferenceRuleTest, test1)
+{
+ // Create a simple network : Pull ------- t23<2,3> ------------ t45<4,5> ---------- Push
+ // TensorShape({2, 3}) TensorShape({4, 5})
+ auto g = loco::make_graph();
+
+ auto pull_node = g->nodes()->create<loco::Pull>();
+ auto t23_node = g->nodes()->create<TestOpNode<2, 3>>();
+ auto t45_node = g->nodes()->create<TestOpNode<4, 5>>();
+ auto push_node = g->nodes()->create<loco::Push>();
+
+ t23_node->input(pull_node);
+ t45_node->input(t23_node);
+ push_node->from(t45_node);
+
+ auto graph_input = g->inputs()->create();
+ graph_input->name("input");
+ loco::link(graph_input, pull_node);
+
+ auto graph_output = g->outputs()->create();
+ graph_output->name("output");
+ loco::link(graph_output, push_node);
+
+ // initially they don't have shape info
+ ASSERT_FALSE(loco::shape_known(t23_node));
+ ASSERT_FALSE(loco::shape_known(t45_node));
+
+ // Run Type Inference
+ loco::CanonicalShapeInferenceRule canonical_rule;
+ TestShapeInferenceRule<2, 3> t23_rule;
+ TestShapeInferenceRule<4, 5> t45_rule;
+
+ loco::MultiDialectShapeInferenceRule rules;
+
+ rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
+ .bind(TestDialect<2, 3>::get(), &t23_rule)
+ .bind(TestDialect<4, 5>::get(), &t45_rule);
+
+ loco::apply(&rules).to(g.get());
+
+ // Verify!
+ ASSERT_TRUE(loco::shape_known(t23_node));
+ auto t23_shape = loco::shape_get(t23_node);
+ ASSERT_EQ(t23_shape.domain(), loco::Domain::Tensor);
+ ASSERT_EQ(t23_shape.as<loco::TensorShape>().rank(), 2);
+ ASSERT_EQ(t23_shape.as<loco::TensorShape>().dim(0), 2);
+ ASSERT_EQ(t23_shape.as<loco::TensorShape>().dim(1), 3);
+
+ ASSERT_TRUE(loco::shape_known(t45_node));
+ auto t45_shape = loco::shape_get(t45_node);
+ ASSERT_EQ(t45_shape.domain(), loco::Domain::Tensor);
+ ASSERT_EQ(t45_shape.as<loco::TensorShape>().rank(), 2);
+ ASSERT_EQ(t45_shape.as<loco::TensorShape>().dim(0), 4);
+ ASSERT_EQ(t45_shape.as<loco::TensorShape>().dim(1), 5);
+}
diff --git a/compiler/loco/src/Service/ShapeInference.cpp b/compiler/loco/src/Service/ShapeInference.cpp
new file mode 100644
index 000000000..84eb10963
--- /dev/null
+++ b/compiler/loco/src/Service/ShapeInference.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/Service/ShapeInference.h"
+#include "loco/IR/Algorithm.h"
+
+#include <cassert>
+
+#include <stdex/Memory.h>
+
+namespace
+{
+
+bool inputs_shape_ready(loco::Node *node)
+{
+ assert(node != nullptr);
+
+ for (uint32_t arity = 0; arity < node->arity(); ++arity)
+ {
+ if (!loco::ShapeInference::known(node->arg(arity)))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace
+
+//
+// Infrastructure
+//
+namespace
+{
+
+struct ShapeAnnotation : public loco::NodeAnnotation
+{
+public:
+ ShapeAnnotation(const loco::NodeShape &shape) : _shape{shape}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const loco::NodeShape &shape(void) const { return _shape; }
+
+private:
+ loco::NodeShape _shape;
+};
+
+} // namespace
+
+namespace loco
+{
+
+bool ShapeInferenceSession::to(Graph *g) const
+{
+ assert(_rule->support(ShapeInferenceRule::API::V1) && "API v1 is unavailable");
+
+ bool changed = false;
+
+ for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+ {
+ if (_rule->recognize(node->dialect()))
+ {
+ loco::NodeShape shape;
+
+ if (!shape_known(node) && inputs_shape_ready(node))
+ {
+ if (_rule->infer(node, shape))
+ {
+ node->annot(stdex::make_unique<ShapeAnnotation>(shape));
+ changed = true;
+ }
+ }
+ }
+ }
+
+ return changed;
+}
+
+bool ShapeInference::known(const Node *node) { return node->annot<ShapeAnnotation>() != nullptr; }
+
+NodeShape ShapeInference::get(const Node *node)
+{
+ assert(known(node));
+ return node->annot<ShapeAnnotation>()->shape();
+}
+
+void ShapeInference::erase(Node *node) { node->annot<ShapeAnnotation>(nullptr); }
+
+} // namespace loco
diff --git a/compiler/loco/src/Service/ShapeInference.test.cpp b/compiler/loco/src/Service/ShapeInference.test.cpp
new file mode 100644
index 000000000..e10b98844
--- /dev/null
+++ b/compiler/loco/src/Service/ShapeInference.test.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/Service/ShapeInference.h"
+#include "GraphTestcase.h"
+
+#include <vector>
+
+#include <gtest/gtest.h>
+
+// This test validates whether framework works as expected.
+TEST(ShapeInferenceTest, framework)
+{
+ // Mock-up Shape Inference Rule
+ struct SampleShapeInferenceRule final : public loco::ShapeInferenceRule
+ {
+ public:
+ SampleShapeInferenceRule(std::vector<const loco::Node *> *nodes) : _nodes{nodes}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ // Accept all the dialects
+ bool recognize(const loco::Dialect *) const final { return true; }
+
+ bool infer(const loco::Node *node, loco::NodeShape &shape) const final
+ {
+ // Record the order of inference
+ _nodes->emplace_back(node);
+
+ if (_nodes->size() != 1)
+ {
+ return false;
+ }
+
+ // Set the first node as Tensor<1>
+ loco::TensorShape tensor_shape;
+
+ tensor_shape.rank(1);
+ tensor_shape.dim(0) = 4;
+
+ shape.set(tensor_shape);
+
+ return true;
+ }
+
+ private:
+ std::vector<const loco::Node *> *_nodes;
+ };
+
+ GraphTestcase<GraphCode::Identity> testcase;
+
+ std::vector<const loco::Node *> nodes;
+
+ SampleShapeInferenceRule rule{&nodes};
+
+ loco::apply(&rule).to(testcase.graph());
+
+ // Framework SHOULD visit all the nodes
+ ASSERT_EQ(nodes.size(), 2);
+ // Framework SHOULD visit "pull" before "push"
+ ASSERT_EQ(nodes.at(0), testcase.pull_node);
+ ASSERT_EQ(nodes.at(1), testcase.push_node);
+
+ // Framework SHOULD make an annotation if "rule" returns TRUE
+ ASSERT_TRUE(loco::shape_known(testcase.pull_node));
+ ASSERT_EQ(loco::shape_get(testcase.pull_node).domain(), loco::Domain::Tensor);
+ ASSERT_EQ(loco::shape_get(testcase.pull_node).as<loco::TensorShape>().rank(), 1);
+ ASSERT_EQ(loco::shape_get(testcase.pull_node).as<loco::TensorShape>().dim(0), 4);
+
+ // Framework SHOULD NOT make any annotation if "rule" returns FALSE
+ ASSERT_FALSE(loco::shape_known(testcase.push_node));
+}
diff --git a/compiler/loco/src/Service/ShapeInferenceRule.cpp b/compiler/loco/src/Service/ShapeInferenceRule.cpp
new file mode 100644
index 000000000..bed841260
--- /dev/null
+++ b/compiler/loco/src/Service/ShapeInferenceRule.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/Service/ShapeInferenceRule.h"
+
+#include <stdexcept>
+
+// This file validates "ShapeInferenceRule.h". Please DO NOT remove this file.
+
+namespace loco
+{
+
+void ShapeInferenceRule::infer(const Context *, const Node *, Sink *) const
+{
+ throw std::runtime_error{"API v2 is not supported"};
+}
+
+} // namespace loco
diff --git a/compiler/loco/src/Service/TypeInference.cpp b/compiler/loco/src/Service/TypeInference.cpp
new file mode 100644
index 000000000..fbf0033ee
--- /dev/null
+++ b/compiler/loco/src/Service/TypeInference.cpp
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/Service/TypeInference.h"
+
+#include "loco/IR/Algorithm.h"
+
+#include <cassert>
+
+#include <stdex/Memory.h>
+
+namespace
+{
+
+struct DataTypeAnnotation : public loco::NodeAnnotation
+{
+public:
+ DataTypeAnnotation(const loco::DataType &dtype) : _dtype{dtype}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const loco::DataType &dtype(void) const { return _dtype; }
+
+private:
+ loco::DataType _dtype;
+};
+
+bool inputs_dtype_ready(loco::Node *node)
+{
+ assert(node != nullptr);
+
+ for (uint32_t arity = 0; arity < node->arity(); ++arity)
+ {
+ if (!loco::TypeInference::known(node->arg(arity)))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace
+
+namespace loco
+{
+
+bool TypeInferenceSession::to(Graph *g) const
+{
+ bool changed = false;
+
+ for (auto node : postorder_traversal(output_nodes(g)))
+ {
+ if (_rule->recognize(node->dialect()))
+ {
+ DataType dtype = DataType::Unknown;
+
+ if (!dtype_known(node) && inputs_dtype_ready(node))
+ {
+ if (_rule->infer(node, dtype))
+ {
+ node->annot(stdex::make_unique<DataTypeAnnotation>(dtype));
+ changed = true;
+ }
+ }
+ }
+ }
+
+ return changed;
+}
+
+bool TypeInference::known(const Node *node) { return node->annot<DataTypeAnnotation>() != nullptr; }
+
+DataType TypeInference::get(const Node *node)
+{
+ assert(known(node));
+ return node->annot<DataTypeAnnotation>()->dtype();
+}
+
+void TypeInference::erase(Node *node) { return node->annot<DataTypeAnnotation>(nullptr); }
+
+} // namespace loco
+
+//
+// Canonical (Data) Type Inference Rule
+//
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/IR/CanonicalNode.h>
+#include <loco/IR/CanonicalNodeVisitor.h>
+
+namespace
+{
+
+/**
+ * There are two possible maintenance policies.
+ * - Introduce a new canonical node first, and then extend this algorithm later
+ * - Introduce a new canonical node and extend this algorithm at the same time
+ *
+ * The current implementation assumes the former one (for historical reason).
+ *
+ * TODO Evaluate the impact of the latter one
+ */
+struct CanonicalTypeForwardAlgorithm final : public loco::CanonicalNodeVisitor<loco::DataType>
+{
+ loco::DataType visit(const loco::AvgPool2D *node) { return loco::dtype_get(node->ifm()); }
+ loco::DataType visit(const loco::BiasDecode *node) { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const loco::BiasEncode *node) { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const loco::ConstGen *node) { return node->dtype(); }
+ loco::DataType visit(const loco::Conv2D *node) { return loco::dtype_get(node->ifm()); }
+ loco::DataType visit(const loco::DepthwiseConv2D *node) { return loco::dtype_get(node->ifm()); }
+ loco::DataType visit(const loco::DepthwiseFilterEncode *node)
+ {
+ return loco::dtype_get(node->input());
+ }
+ loco::DataType visit(const loco::DepthwiseFilterDecode *node)
+ {
+ return loco::dtype_get(node->input());
+ }
+ loco::DataType visit(const loco::EltwiseAdd *node) { return loco::dtype_get(node->lhs()); }
+ loco::DataType visit(const loco::EltwiseDiv *node) { return loco::dtype_get(node->lhs()); }
+ loco::DataType visit(const loco::EltwiseMax *node) { return loco::dtype_get(node->lhs()); }
+ loco::DataType visit(const loco::EltwiseMul *node) { return loco::dtype_get(node->lhs()); }
+ loco::DataType visit(const loco::EltwiseSqrt *node) { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const loco::EltwiseSub *node) { return loco::dtype_get(node->lhs()); }
+ loco::DataType visit(const loco::Forward *node) { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const loco::FeatureBiasAdd *node) { return loco::dtype_get(node->value()); }
+ loco::DataType visit(const loco::FeatureDecode *node) { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const loco::FeatureEncode *node) { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const loco::FilterDecode *node) { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const loco::FilterEncode *node) { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const loco::FixedReshape *node) { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const loco::MatrixDecode *node) { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const loco::MatrixEncode *node) { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const loco::MatMul *node) { return loco::dtype_get(node->lhs()); }
+ loco::DataType visit(const loco::MaxPool2D *node) { return loco::dtype_get(node->ifm()); }
+ loco::DataType visit(const loco::Push *node) { return loco::dtype_get(node->from()); }
+ loco::DataType visit(const loco::Pull *node) { return node->dtype(); }
+ loco::DataType visit(const loco::ReLU *node) { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const loco::ReLU6 *node) { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const loco::Tanh *node) { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const loco::TensorConcat *node) { return loco::dtype_get(node->lhs()); }
+ loco::DataType visit(const loco::TensorConstantPad *node)
+ {
+ return loco::dtype_get(node->input());
+ }
+ loco::DataType visit(const loco::TensorBiasAdd *node) { return loco::dtype_get(node->value()); }
+ loco::DataType visit(const loco::TensorBroadcast *node) { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const loco::TensorReduce *node) { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const loco::TensorSoftmax *node) { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const loco::TensorTranspose *node) { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const loco::TransposedConv2D *node) { return loco::dtype_get(node->ifm()); }
+};
+
+} // namespace
+
+namespace loco
+{
+
+bool CanonicalTypeInferenceRule::recognize(const Dialect *d) const
+{
+ // This rule recognizes only "loco.canonical" dialect!
+ return CanonicalDialect::get() == d;
+}
+
+bool CanonicalTypeInferenceRule::infer(const Node *node, DataType &dtype) const
+{
+ assert(node->dialect() == loco::CanonicalDialect::get());
+ assert(dynamic_cast<const loco::CanonicalNode *>(node) != nullptr);
+
+ CanonicalTypeForwardAlgorithm alg;
+ dtype = dynamic_cast<const loco::CanonicalNode *>(node)->accept(&alg);
+
+ return true;
+}
+
+bool MultiDialectTypeInferenceRule::recognize(const Dialect *d) const
+{
+ const auto found = _rules.find(d);
+
+ if (found == _rules.cend())
+ return false;
+
+ auto rule = found->second;
+ auto result = rule->recognize(d);
+
+ return result;
+}
+
+bool MultiDialectTypeInferenceRule::infer(const Node *node, DataType &dtype) const
+{
+ const auto found = _rules.find(node->dialect());
+
+ if (found == _rules.cend())
+ return false;
+
+ auto rule = found->second;
+ if (rule->infer(node, dtype))
+ return true;
+
+ return false;
+}
+
+MultiDialectTypeInferenceRule &MultiDialectTypeInferenceRule::bind(const Dialect *d,
+ const TypeInferenceRule *rule)
+{
+ assert(_rules.find(d) == _rules.end());
+ assert(rule->recognize(d));
+
+ _rules[d] = rule;
+
+ return (*this);
+}
+
+} // namespace loco
diff --git a/compiler/loco/src/Service/TypeInference.test.cpp b/compiler/loco/src/Service/TypeInference.test.cpp
new file mode 100644
index 000000000..4660401db
--- /dev/null
+++ b/compiler/loco/src/Service/TypeInference.test.cpp
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco/Service/TypeInference.h"
+
+#include "GraphTestcase.h"
+
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/Service/TypeInference.h>
+
+#include <vector>
+
+#include <gtest/gtest.h>
+
+// This test validates whether framework works as expected.
+TEST(TypeInferenceTest, framework)
+{
+ // Create a sample network
+ auto g = loco::make_graph();
+
+ auto pull_node = g->nodes()->create<loco::Pull>();
+ auto push_node = g->nodes()->create<loco::Push>();
+
+ push_node->from(pull_node);
+
+ // Create Graph Input & Output
+ auto graph_input = g->inputs()->create();
+
+ graph_input->name("input");
+ loco::link(graph_input, pull_node);
+
+ auto graph_output = g->outputs()->create();
+
+ graph_output->name("output");
+ loco::link(graph_output, push_node);
+
+ // Mock-up Type Inference Rule
+ struct SampleTypeInferenceRule final : public loco::TypeInferenceRule
+ {
+ public:
+ SampleTypeInferenceRule(std::vector<const loco::Node *> *nodes) : _nodes{nodes}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ bool recognize(const loco::Dialect *) const final
+ {
+ // Accept all the dialects
+ return true;
+ }
+
+ bool infer(const loco::Node *node, loco::DataType &dtype) const final
+ {
+ // Record the order of inference
+ _nodes->emplace_back(node);
+
+ if (_nodes->size() != 1)
+ {
+ return false;
+ }
+
+ // Annotate the first node as "U8"
+ dtype = loco::DataType::U8;
+ return true;
+ }
+
+ private:
+ std::vector<const loco::Node *> *_nodes;
+ };
+
+ std::vector<const loco::Node *> nodes;
+
+ SampleTypeInferenceRule rule{&nodes};
+
+ loco::apply(&rule).to(g.get());
+
+ ASSERT_EQ(nodes.size(), 2); // Framework SHOULD visit all the nodes
+ ASSERT_EQ(nodes.at(0), pull_node); // Framework SHOULD visit "pull" before "push"
+ ASSERT_EQ(nodes.at(1), push_node);
+
+ // Framework SHOULD NOT make any annotation if "rule" returns FALSE
+ ASSERT_TRUE(loco::dtype_known(pull_node));
+ // Framework SHOULD make an annotation if "rule" returns TRUE
+ ASSERT_EQ(loco::dtype_get(pull_node), loco::DataType::U8);
+ ASSERT_FALSE(loco::dtype_known(push_node));
+}
+
+TEST(CanonicalTypeInferenceRuleTest, minimal)
+{
+ // Create a simple network
+ auto g = loco::make_graph();
+
+ auto pull_node = g->nodes()->create<loco::Pull>();
+
+ pull_node->dtype(loco::DataType::U8);
+
+ auto push_node = g->nodes()->create<loco::Push>();
+
+ push_node->from(pull_node);
+
+ auto graph_input = g->inputs()->create();
+
+ graph_input->name("input");
+ loco::link(graph_input, pull_node);
+
+ auto graph_output = g->outputs()->create();
+
+ graph_output->name("output");
+ loco::link(graph_output, push_node);
+
+ // Run Type Inference
+ loco::CanonicalTypeInferenceRule rule;
+
+ loco::apply(&rule).to(g.get());
+
+ // Verify!
+ ASSERT_TRUE(loco::dtype_known(push_node));
+ ASSERT_EQ(loco::dtype_get(push_node), loco::DataType::U8);
+}
+
+TEST(CanonicalTypeInferenceRuleTest, relu6)
+{
+ // Create a simple Relu6 network
+ auto g = loco::make_graph();
+
+ auto pull_node = g->nodes()->create<loco::Pull>();
+
+ pull_node->dtype(loco::DataType::FLOAT32);
+
+ auto relu6_node = g->nodes()->create<loco::ReLU6>();
+
+ relu6_node->input(pull_node);
+
+ auto push_node = g->nodes()->create<loco::Push>();
+
+ push_node->from(relu6_node);
+
+ auto graph_input = g->inputs()->create();
+
+ graph_input->name("input");
+ loco::link(graph_input, pull_node);
+
+ auto graph_output = g->outputs()->create();
+
+ graph_output->name("output");
+ loco::link(graph_output, push_node);
+
+ // Run Type Inference
+ loco::CanonicalTypeInferenceRule rule;
+
+ loco::apply(&rule).to(g.get());
+
+ // Verify!
+ ASSERT_TRUE(loco::dtype_known(relu6_node));
+ ASSERT_EQ(loco::dtype_get(relu6_node), loco::DataType::FLOAT32);
+}
+
+TEST(CanonicalTypeInferenceRuleTest, tensor_broadcast)
+{
+ // Create a sample network
+ GraphTestcase<GraphCode::TensorBroadcast> testcase{1, 2};
+
+ testcase.graph()->inputs()->at(0)->dtype(loco::DataType::U8);
+
+ // Run Type Inference
+ loco::CanonicalTypeInferenceRule rule;
+
+ loco::apply(&rule).to(testcase.graph());
+
+ // Verify!
+ ASSERT_TRUE(loco::dtype_known(testcase.push_node));
+ ASSERT_EQ(loco::dtype_get(testcase.push_node), loco::DataType::U8);
+}
+
+// mockup for MultiDialectTypeInferenceRule
+// OpNode of a specific loco datatype (defined in template) will be used.
+// And a Dialect for the OpNode and its inference rules are created.
+#include <loco/IR/Dialect.h>
+
+namespace
+{
+
+template <loco::DataType N> class TestDialect final : public loco::Dialect
+{
+public:
+ static Dialect *get(void)
+ {
+ static TestDialect<N> d;
+ return &d;
+ }
+};
+
+template <loco::DataType N>
+struct TestOpNode final : public loco::FixedArity<1>::Mixin<loco::Node>,
+ public loco::NodeMixin<loco::NodeTrait::DataType>
+{
+ void input(Node *node) { at(0)->node(node); }
+ const loco::Dialect *dialect(void) const final { return TestDialect<N>::get(); }
+ uint32_t opnum(void) const final { return static_cast<uint32_t>(N); }
+};
+
+template <loco::DataType N> struct TestTypeInferenceRule final : public loco::TypeInferenceRule
+{
+public:
+ bool recognize(const loco::Dialect *d) const final { return (d == TestDialect<N>::get()); }
+
+ bool infer(const loco::Node *node, loco::DataType &dtype) const final
+ {
+ assert(node->dialect() == TestDialect<N>::get());
+ auto test_node = dynamic_cast<const TestOpNode<N> *>(node);
+ assert(test_node != nullptr);
+
+ dtype = N;
+ return true;
+ }
+};
+
+} // namespace
+
+TEST(MultiDialectTypeInferenceRuleTest, test1)
+{
+ // Create a simple network : Pull - S8 - U8 - Push
+ auto g = loco::make_graph();
+
+ auto pull_node = g->nodes()->create<loco::Pull>();
+ pull_node->dtype(loco::DataType::FLOAT32);
+
+ auto s8_node = g->nodes()->create<TestOpNode<loco::DataType::S8>>();
+ s8_node->input(pull_node);
+
+ auto u8_node = g->nodes()->create<TestOpNode<loco::DataType::U8>>();
+ u8_node->input(s8_node);
+
+ auto push_node = g->nodes()->create<loco::Push>();
+ push_node->from(u8_node);
+
+ auto graph_input = g->inputs()->create();
+ graph_input->name("input");
+ loco::link(graph_input, pull_node);
+
+ auto graph_output = g->outputs()->create();
+ graph_output->name("output");
+ loco::link(graph_output, push_node);
+
+ // initially they don't have type info
+ ASSERT_FALSE(loco::dtype_known(s8_node));
+ ASSERT_FALSE(loco::dtype_known(u8_node));
+
+ // Run Type Inference
+ TestTypeInferenceRule<loco::DataType::U8> u8_rule;
+ TestTypeInferenceRule<loco::DataType::S8> s8_rule;
+ loco::CanonicalTypeInferenceRule canon_rule;
+
+ loco::MultiDialectTypeInferenceRule rules;
+
+ rules.bind(TestDialect<loco::DataType::S8>::get(), &s8_rule)
+ .bind(TestDialect<loco::DataType::U8>::get(), &u8_rule)
+ .bind(loco::CanonicalDialect::get(), &canon_rule);
+
+ loco::apply(&rules).to(g.get());
+
+ // Verify!
+ ASSERT_TRUE(loco::dtype_known(s8_node));
+ ASSERT_EQ(loco::dtype_get(s8_node), loco::DataType::S8);
+
+ ASSERT_TRUE(loco::dtype_known(u8_node));
+ ASSERT_EQ(loco::dtype_get(u8_node), loco::DataType::U8);
+}
diff --git a/compiler/loco/src/loco.test.cpp b/compiler/loco/src/loco.test.cpp
new file mode 100644
index 000000000..4c4f51aa5
--- /dev/null
+++ b/compiler/loco/src/loco.test.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loco.h"
+
+#include <gtest/gtest.h>
+
+// This test shows how to create an "identity" network with loco.
+//
+// What is "identity" network?
+// - A network simply passes its input as its output
+//
+// TODO Create "Ouput" first and then create "Push" later
+TEST(LOCO, identity_network)
+{
+ auto g = loco::make_graph();
+
+ // Create a "pull" node as an input
+ auto pull_node = g->nodes()->create<loco::Pull>();
+
+ // Set "data type"
+ pull_node->dtype(loco::DataType::FLOAT32);
+
+ // Set "data shape"
+ pull_node->rank(2);
+ pull_node->dim(0) = 3;
+ pull_node->dim(1) = 4;
+
+ // Create a "push" node as an output
+ auto push_node = g->nodes()->create<loco::Push>();
+
+ // Set "source"
+ push_node->from(pull_node);
+
+ // Create Graph Input & Output
+ auto graph_input = g->inputs()->create();
+
+ graph_input->name("input");
+ loco::link(graph_input, pull_node);
+ graph_input->dtype(loco::DataType::FLOAT32);
+
+ auto graph_output = g->outputs()->create();
+
+ graph_output->name("output");
+ loco::link(graph_output, push_node);
+
+ // loco::link SHOULD update "index"
+ ASSERT_EQ(pull_node->index(), 0);
+ ASSERT_EQ(graph_input->dtype(), loco::DataType::FLOAT32);
+
+ // loco::link SHOULD update "index"
+ ASSERT_EQ(push_node->index(), 0);
+}
+
+#if 0
+"identity_network_V2" test shows how to use loco when loco.core and loco.canonical are decoupled.
+
+NOTE "identity_network" test is left for backward compatiblity check
+TODO Remove "identity_network" test once all the clients are migrated.
+#endif
+TEST(LOCO, identity_network_V2)
+{
+ auto g = loco::make_graph();
+
+ // Create Graph Input & Output
+ auto graph_input = g->inputs()->create();
+
+ graph_input->name("input");
+ graph_input->dtype(loco::DataType::FLOAT32);
+ // TODO Set Shape
+
+ auto graph_output = g->outputs()->create();
+
+ graph_output->name("output");
+ graph_output->dtype(loco::DataType::FLOAT32);
+ // TODO Set Shape
+
+ // Create a "pull" node as an input
+ auto pull_node = g->nodes()->create<loco::Pull>();
+
+ pull_node->index(0);
+
+ // Create a "push" node as an output
+ auto push_node = g->nodes()->create<loco::Push>();
+
+ push_node->index(0);
+ push_node->from(pull_node);
+
+ ASSERT_EQ(pull_node->dtype(), loco::DataType::FLOAT32);
+ // TODO Check Shape of pull_node
+ // TODO Check Shape of push_node
+
+ ASSERT_EQ(loco::pull_node(g.get(), 0), pull_node);
+ ASSERT_EQ(loco::push_node(g.get(), 0), push_node);
+}
diff --git a/compiler/loco/src/tensorflow.test.cpp b/compiler/loco/src/tensorflow.test.cpp
new file mode 100644
index 000000000..f534aee7b
--- /dev/null
+++ b/compiler/loco/src/tensorflow.test.cpp
@@ -0,0 +1,386 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @brief This file includes various tests that shows how to encode TensorFlow models using loco.
+ *
+ * @note All the python examples below assume TensorFlow v1.13
+ */
+#include "loco.h"
+
+#include <gtest/gtest.h>
+
+#include <stdex/Memory.h>
+
+using stdex::make_unique;
+
+namespace
+{
+
+loco::Permutation<loco::Domain::Feature> make_NHWC_permutation(void)
+{
+ loco::Permutation<loco::Domain::Feature> NHWC;
+
+ NHWC.axis(loco::FeatureAxis::Count) = 0;
+ NHWC.axis(loco::FeatureAxis::Height) = 1;
+ NHWC.axis(loco::FeatureAxis::Width) = 2;
+ NHWC.axis(loco::FeatureAxis::Depth) = 3;
+
+ return NHWC;
+}
+
+/**
+ * @brief Create a HxWxIxO (or HxWxCxN) permutation which tf.nn.conv2d uses
+ *
+ * Reference: [tf.nn.conv2d](https://www.tensorflow.org/api_docs/python/tf/nn/conv2d)
+ * > Given an input tensor of shape [batch, in_height, in_width, in_channels] and a filter /
+ * > kernel tensor of shape [filter_height, filter_width, in_channels, out_channels], ...
+ *
+ * NOTE "HWIO" is borrowed from TensorFlow Lite Converter
+ *
+ * https://github.com/tensorflow/tensorflow/blob/v1.13.1/tensorflow/lite/toco/model.h#L169
+ */
+loco::Permutation<loco::Domain::Filter> make_HWIO_permutation(void)
+{
+ loco::Permutation<loco::Domain::Filter> HWIO;
+
+ HWIO.axis(loco::FilterAxis::Height) = 0; // H
+ HWIO.axis(loco::FilterAxis::Width) = 1; // W
+ HWIO.axis(loco::FilterAxis::Depth) = 2; // I, a.k.a. C
+ HWIO.axis(loco::FilterAxis::Count) = 3; // O, a.k.a. N
+
+ return HWIO;
+}
+
+} // nemaspace
+
+#if 0
+>>> MaxPool_Float_000 testcase
+
+MaxPool_Float_000 test guarantees that loco is expressive enough to encode the following example.
+
+Python:
+```
+import tensorflow as tf
+value = tf.placeholder(dtype=tf.float32, shape=[1, 16, 16, 2], name="value")
+maxpool = tf.nn.max_pool(value, [1, 3, 3, 1], [1, 1, 1, 1], 'VALID', name="maxpool")
+tf.get_default_graph().as_graph_def()
+```
+
+The above code produces the following TensorFlow GraphDef:
+
+node {
+ name: "value"
+ op: "Placeholder"
+ attr {
+ key: "dtype"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim { size: 1 }
+ dim { size: 16 }
+ dim { size: 16 }
+ dim { size: 2 }
+ }
+ }
+ }
+}
+node {
+ name: "maxpool"
+ op: "MaxPool"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "data_format"
+ value { s: "NHWC" }
+ }
+ attr {
+ key: "ksize"
+ value { list { i: 1 i: 3 i: 3 i: 1 } }
+ }
+ attr {
+ key: "padding"
+ value { s: "VALID" }
+ }
+ attr {
+ key: "strides"
+ value { list { i: 1 i: 1 i: 1 i: 1 } }
+ }
+}
+
+Below test guarantees that loco is expressive enough to encode this example.
+#endif
+TEST(TensorFlowTest, MaxPool_Float_000)
+{
+ auto g = loco::make_graph();
+
+ // The first "value" node corresponds to the following "Pull" node.
+ //
+ // %value = Pull(dtype: FLOAT32, shape: [1, 16, 16, 2])
+ auto value = g->nodes()->create<loco::Pull>();
+
+ value->dtype(loco::DataType::FLOAT32);
+ value->shape({1, 16, 16, 2});
+
+ // The next "maxpool" node corresponds to a sequence of the following loco nodes:
+ // - "FeatureEncode"
+ // - "MaxPool2D
+ // - "FeatureDecode"
+ //
+ // "maxpool.data_format" is 'NHWC' which corresponds to the following permutation
+ // Count <-> 0
+ // Height <-> 1
+ // Width <-> 2
+ // Depth <-> 3
+ loco::Permutation<loco::Domain::Feature> NHWC;
+
+ NHWC.axis(loco::FeatureAxis::Count) = 0;
+ NHWC.axis(loco::FeatureAxis::Height) = 1;
+ NHWC.axis(loco::FeatureAxis::Width) = 2;
+ NHWC.axis(loco::FeatureAxis::Depth) = 3;
+
+ auto encoder = make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
+
+ encoder->perm(NHWC);
+
+ auto decoder = make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
+
+ decoder->perm(NHWC);
+
+ // %node_0 = FeatureEncode(%value, perm { Count = 0, Height = 1, Width = 2, Depth = 3 })
+ auto node_0 = g->nodes()->create<loco::FeatureEncode>();
+
+ node_0->input(value);
+ node_0->encoder(std::move(encoder));
+
+ // %node_1 = MaxPool(%node_0, window.H: 3, window.W: 3, stride.H: 1, stride.W : 1)
+ auto node_1 = g->nodes()->create<loco::MaxPool2D>();
+
+ node_1->ifm(node_0);
+
+ // From "ksize" attributes
+ node_1->window()->horizontal(3);
+ node_1->window()->vertical(3);
+
+ // From "strides" attributes
+ node_1->stride()->horizontal(1);
+ node_1->stride()->vertical(1);
+
+ // %output = FeatureDecode(%node_1, perm { Count = 0, Height = 1, Width = 2, Depth = 3 })
+ auto output = g->nodes()->create<loco::FeatureDecode>();
+
+ output->input(node_1);
+ output->decoder(std::move(decoder));
+
+ // %push = Push(%output)
+ auto push = g->nodes()->create<loco::Push>();
+
+ push->from(output);
+
+ //
+ // Mark network-level input/output
+ //
+ auto input_0 = g->inputs()->create();
+ loco::link(input_0, value);
+
+ auto output_0 = g->outputs()->create();
+ loco::link(output_0, push);
+
+ // NOTE This example SHOULD BE valid.
+ ASSERT_TRUE(loco::valid(g.get()));
+}
+
+#if 0
+>>> Conv2D_Float_000 testcase
+
+Conv2D_Float_000 test guarantees that loco is expressive enough to encode the following example.
+
+Python:
+```
+import tensorflow as tf
+inp = tf.placeholder(dtype=tf.float32, shape=[1, 16, 16, 2], name="inp")
+ker = tf.constant(value=[1.0], dtype=tf.float32, shape=[7, 1, 2, 4], name="ker")
+conv2d = tf.nn.conv2d(input=inp, filter=ker, strides=[1, 1, 1, 1], padding='VALID', name="conv2d")
+tf.get_default_graph().as_graph_def()
+```
+
+TensorFlow GraphDef:
+```
+node {
+ name: "inp"
+ op: "Placeholder"
+ attr {
+ key: "dtype"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim { size: 1 }
+ dim { size: 16 }
+ dim { size: 16 }
+ dim { size: 2 }
+ }
+ }
+ }
+}
+node {
+ name: "ker"
+ op: "Const"
+ attr {
+ key: "dtype"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim { size: 7 }
+ dim { size: 1 }
+ dim { size: 2 }
+ dim { size: 4 }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+node {
+ name: "conv2d"
+ op: "Conv2D"
+ input: "inp"
+ input: "ker"
+ attr {
+ key: "T"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "data_format"
+ value { s: "NHWC" }
+ }
+ attr {
+ key: "dilations"
+ value { list { i: 1 i: 1 i: 1 i: 1 } }
+ }
+ attr {
+ key: "padding"
+ value { s: "VALID" }
+ }
+ attr {
+ key: "strides"
+ value { list { i: 1 i: 1 i: 1 i: 1 } }
+ }
+}
+```
+#endif
+TEST(TensorFlowTest, Conv2D_Float_000)
+{
+ auto g = loco::make_graph();
+
+ // The first "inp" node corresponds to "Pull"
+ auto inp = g->nodes()->create<loco::Pull>();
+ {
+ inp->dtype(loco::DataType::FLOAT32);
+ inp->shape({1, 16, 16, 2});
+ }
+
+ // The seoncd "ker" node corresponds to "ConstGen"
+ auto ker = g->nodes()->create<loco::ConstGen>();
+ {
+ ker->dtype(loco::DataType::FLOAT32);
+ // 'I' denotes IFM DEPTH, and 'O' denotes OFM DEPTH
+ ker->shape({7 /*H*/, 1 /*W*/, 2 /*I*/, 3 /*O*/});
+ ker->size<loco::DataType::FLOAT32>(7 * 1 * 2 * 3);
+ for (uint32_t n = 0; n < 7 * 1 * 2 * 3; ++n)
+ {
+ // NOTE TensorFlow uses the last value to fill unspecified region
+ ker->at<loco::DataType::FLOAT32>(n) = 1.0f;
+ }
+ }
+
+ // The next "conv2d" node is decomposed into the following loco nodes
+ // - "FeatureEncode"
+ // - "FilterEncode"
+ // - "Conv2D"
+ // - "FeatureDecode"
+ auto encoded_ifm = g->nodes()->create<loco::FeatureEncode>();
+ {
+ // From "conv2d.data_format" attribute
+ auto encoder = make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
+ encoder->perm(make_NHWC_permutation());
+
+ encoded_ifm->input(inp);
+ encoded_ifm->encoder(std::move(encoder));
+ }
+
+ auto encoded_ker = g->nodes()->create<loco::FilterEncode>();
+ {
+ // From "tf.nn.conv2d" specification
+ auto encoder = make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+ encoder->perm(make_HWIO_permutation());
+
+ encoded_ker->input(ker);
+ encoded_ker->encoder(std::move(encoder));
+ }
+
+ auto conv2d = g->nodes()->create<loco::Conv2D>();
+ {
+ conv2d->ifm(encoded_ifm);
+ conv2d->ker(encoded_ker);
+
+ // From "stride" attribute
+ conv2d->stride()->horizontal(1);
+ conv2d->stride()->vertical(1);
+ }
+
+ // "decoded_ofm" corresponds to the output of "conv2d" node.
+ auto decoded_ofm = g->nodes()->create<loco::FeatureDecode>();
+ {
+ // From "conv2d.data_format" attribute
+ auto decoder = make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
+ decoder->perm(make_NHWC_permutation());
+
+ decoded_ofm->input(conv2d);
+ decoded_ofm->decoder(std::move(decoder));
+ }
+
+ // Makr "conv2d" as a network-level output with Push
+ auto push = g->nodes()->create<loco::Push>();
+ {
+ push->from(decoded_ofm);
+ }
+
+ //
+ // Mark network-level input/output
+ //
+ auto input_0 = g->inputs()->create();
+ loco::link(input_0, inp);
+
+ auto output_0 = g->outputs()->create();
+ loco::link(output_0, push);
+
+ // NOTE This example SHOULD BE valid.
+ ASSERT_TRUE(loco::valid(g.get()));
+}
diff --git a/compiler/locoex-customop/CMakeLists.txt b/compiler/locoex-customop/CMakeLists.txt
new file mode 100644
index 000000000..df1e01526
--- /dev/null
+++ b/compiler/locoex-customop/CMakeLists.txt
@@ -0,0 +1,18 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(locoex_customop SHARED ${SOURCES})
+target_include_directories(locoex_customop PUBLIC include)
+target_link_libraries(locoex_customop PUBLIC loco)
+target_link_libraries(locoex_customop PRIVATE stdex locop pepper_str)
+install(TARGETS locoex_customop DESTINATION lib)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(locoex_customop_test ${TESTS})
+target_link_libraries(locoex_customop_test loco locoex_customop stdex)
diff --git a/compiler/locoex-customop/README.md b/compiler/locoex-customop/README.md
new file mode 100644
index 000000000..3f71140f9
--- /dev/null
+++ b/compiler/locoex-customop/README.md
@@ -0,0 +1,9 @@
+# locoex
+
+_locoex_ is an extention of loco. Classes with `COp` prefix enables *Custom Operation*.
+In this version, a *custom operation* means one of the following:
+
+1. an op that is supported by Tensorflow but not supported both by the moco and the onert
+1. an op that is not supported by Tensorflow, moco, and the onert
+
+`COpCall` node will represent IR entity that calls custom operations and kernels.
diff --git a/compiler/locoex-customop/include/locoex/COpAttrTypes.h b/compiler/locoex-customop/include/locoex/COpAttrTypes.h
new file mode 100644
index 000000000..9fbd125d9
--- /dev/null
+++ b/compiler/locoex-customop/include/locoex/COpAttrTypes.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_COPATTRTYPES_H__
+#define __LOCOEX_COPATTRTYPES_H__
+
+#include <stdexcept>
+
+namespace locoex
+{
+
+/**
+ * @brief Tensorflow attribute type
+ * Refer to https://www.tensorflow.org/guide/extend/op#attr_types
+ */
+enum class COpAttrType
+{
+ Int,
+ Float,
+ // TODO Support more attr types such as String, Bool, DataType, Tensor, Shape, List
+};
+
+/**
+ * @brief Struct that holds attr type
+ */
+struct COpAttrData
+{
+protected:
+ COpAttrData(COpAttrType attr_type) : _type(attr_type) {}
+
+public:
+ virtual ~COpAttrData() = default;
+
+public:
+ COpAttrType type() const { return _type; }
+ void type(COpAttrType attr_type) { _type = attr_type; }
+
+private:
+ COpAttrType _type;
+};
+
+/**
+ * @brief Struct that holds attr data of int type
+ */
+struct COpAttrInt final : public COpAttrData
+{
+public:
+ COpAttrInt(int tf_val) : COpAttrData(COpAttrType::Int) { _val = tf_val; }
+
+ int val() const { return _val; }
+ void val(int val) { _val = val; }
+
+private:
+ int _val;
+};
+
+/**
+ * @brief Struct that holds attr data of float type
+ */
+struct COpAttrFloat final : public COpAttrData
+{
+public:
+ COpAttrFloat(float tf_val) : COpAttrData(COpAttrType::Float) { _val = tf_val; }
+
+ float val() const { return _val; }
+ void val(float val) { _val = val; }
+
+private:
+ float _val;
+};
+
+template <COpAttrType AT> struct AttrTypeTrait;
+
+template <> struct AttrTypeTrait<COpAttrType::Float>
+{
+ using Type = COpAttrFloat;
+};
+
+template <> struct AttrTypeTrait<COpAttrType::Int>
+{
+ using Type = COpAttrInt;
+};
+
+// TODO support more attr types
+
+} // namespace locoex
+
+#endif // __LOCOEX_COPATTRTYPES_H__
diff --git a/compiler/locoex-customop/include/locoex/COpCall.h b/compiler/locoex-customop/include/locoex/COpCall.h
new file mode 100644
index 000000000..197fd8d0c
--- /dev/null
+++ b/compiler/locoex-customop/include/locoex/COpCall.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_COPCALL_H__
+#define __LOCOEX_COPCALL_H__
+
+#include "VariadicArityNode.h"
+#include "locoex/COpAttrTypes.h"
+#include "locoex/COpNode.h"
+
+#include <loco/IR/NodeMixins.h>
+
+#include <map>
+#include <memory>
+
+namespace locoex
+{
+
+/**
+ * @brief Class to calls custom operation
+ */
+class COpCall final : public VariadicArityNode<COpNode>,
+ public loco::NodeMixin<loco::NodeTrait::TensorShape>,
+ public loco::NodeMixin<loco::NodeTrait::DataType>
+{
+public:
+ COpCall(unsigned arity) : VariadicArityNode<COpNode>(arity) {}
+
+public:
+ void op(const std::string &op) { _op.assign(op); }
+ const std::string &op() { return _op; }
+
+ void name(const std::string &name) { _name.assign(name); }
+ const std::string &name() { return _name; }
+
+ void input(uint32_t nth, loco::Node *node) { at(nth)->node(node); }
+ loco::Node *input(uint32_t nth) const { return at(nth)->node(); }
+
+ /// @brief Store [attr_name, attr_data]
+ void attr(const std::string &attr_name, std::unique_ptr<COpAttrData> &&attr_data);
+
+ /// @brief Retrieve attr_data stored with attr_name
+ template <COpAttrType AT>
+ const typename AttrTypeTrait<AT>::Type *attr(const std::string &attr_name) const;
+
+ /// @brief get all the names of attr
+ std::vector<std::string> attr_names() const;
+
+private:
+ std::string _op;
+ std::string _name;
+
+ std::map<std::string, std::unique_ptr<COpAttrData>> _attrs;
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_COPCALL_H__
diff --git a/compiler/locoex-customop/include/locoex/COpDialect.h b/compiler/locoex-customop/include/locoex/COpDialect.h
new file mode 100644
index 000000000..86ca5a7a1
--- /dev/null
+++ b/compiler/locoex-customop/include/locoex/COpDialect.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_COPDIALECT_H__
+#define __LOCOEX_COPDIALECT_H__
+
+#include <loco/IR/Dialect.h>
+
+namespace locoex
+{
+
+/**
+ * @brief A singleton for locoex custom op Dialect
+ */
+class COpDialect final : public loco::Dialect
+{
+private:
+ COpDialect() = default;
+
+public:
+ COpDialect(const Dialect &) = delete;
+ COpDialect(Dialect &&) = delete;
+
+public:
+ static loco::Dialect *get(void);
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_COPDIALECT_H__
diff --git a/compiler/locoex-customop/include/locoex/COpNode.h b/compiler/locoex-customop/include/locoex/COpNode.h
new file mode 100644
index 000000000..fce99d2d9
--- /dev/null
+++ b/compiler/locoex-customop/include/locoex/COpNode.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_COPNODE_DECL_H__
+#define __LOCOEX_COPNODE_DECL_H__
+
+#include <loco/IR/Node.h>
+#include <loco/IR/Dialect.h>
+
+namespace locoex
+{
+
+struct COpNode : public loco::Node
+{
+ virtual ~COpNode() = default;
+
+ const loco::Dialect *dialect(void) const final;
+
+ uint32_t opnum(void) const final { return 0; /* opnum for custom op */ }
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_COPNODE_DECL_H__
diff --git a/compiler/locoex-customop/include/locoex/Service/COpFormattedGraph.h b/compiler/locoex-customop/include/locoex/Service/COpFormattedGraph.h
new file mode 100644
index 000000000..5decf4ecc
--- /dev/null
+++ b/compiler/locoex-customop/include/locoex/Service/COpFormattedGraph.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_SERVICE_FORMATTED_GRAPH_H__
+#define __LOCOEX_SERVICE_FORMATTED_GRAPH_H__
+
+#include <locop/FormattedGraph.h>
+
+#include <locoex/COpCall.h>
+
+namespace locoex
+{
+
+class COpNodeSummaryBuilder final : public locop::NodeSummaryBuilder
+{
+public:
+ COpNodeSummaryBuilder(const locop::SymbolTable *tbl) : _tbl{tbl}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool build(const loco::Node *node, locop::NodeSummary &s) const final;
+
+private:
+ bool summary(const locoex::COpCall *, locop::NodeSummary &) const;
+
+private:
+ const locop::SymbolTable *_tbl;
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_SERVICE_FORMATTED_GRAPH_H__
diff --git a/compiler/locoex-customop/include/locoex/Service/COpShapeInferenceRule.h b/compiler/locoex-customop/include/locoex/Service/COpShapeInferenceRule.h
new file mode 100644
index 000000000..d2a332da4
--- /dev/null
+++ b/compiler/locoex-customop/include/locoex/Service/COpShapeInferenceRule.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_SERVICE_COP_SHAPE_INFERENCE_RULE_H__
+#define __LOCOEX_SERVICE_COP_SHAPE_INFERENCE_RULE_H__
+
+#include <loco/Service/ShapeInferenceRule.h>
+#include <loco/IR/Dialect.h>
+#include <loco/IR/Node.h>
+#include <loco/IR/NodeShape.h>
+
+namespace locoex
+{
+
+/**
+ * @brief Shape inference rule for COpDialect
+ *
+ * @note the shape of inputs and output of CopCall must belong to loco::Domain::Tensor
+ */
+struct COpShapeInferenceRule final : public loco::ShapeInferenceRule
+{
+ bool recognize(const loco::Dialect *) const final;
+ bool infer(const loco::Node *, loco::NodeShape &) const final;
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_SERVICE_COP_SHAPE_INFERENCE_RULE_H__
diff --git a/compiler/locoex-customop/include/locoex/Service/COpTypeInference.h b/compiler/locoex-customop/include/locoex/Service/COpTypeInference.h
new file mode 100644
index 000000000..13163a5de
--- /dev/null
+++ b/compiler/locoex-customop/include/locoex/Service/COpTypeInference.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_SERVICE_TYPE_INFERENCE_H__
+#define __LOCOEX_SERVICE_TYPE_INFERENCE_H__
+
+#include <loco/Service/TypeInference.h>
+
+namespace locoex
+{
+
+/**
+ * @brief Type Inference Rule for COpDialect
+ */
+struct COpTypeInferenceRule final : public loco::TypeInferenceRule
+{
+ bool recognize(const loco::Dialect *) const final;
+ bool infer(const loco::Node *, loco::DataType &) const final;
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_SERVICE_TYPE_INFERENCE_H__
diff --git a/compiler/locoex-customop/include/locoex/VariadicArityNode.h b/compiler/locoex-customop/include/locoex/VariadicArityNode.h
new file mode 100644
index 000000000..fce754cde
--- /dev/null
+++ b/compiler/locoex-customop/include/locoex/VariadicArityNode.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOEX_VARIADICARITYNODES_OP_H__
+#define __LOCOEX_VARIADICARITYNODES_OP_H__
+
+#include <loco/IR/Node.h>
+#include <loco/IR/Use.h>
+
+#include <vector>
+#include <memory>
+#include <cassert>
+
+namespace locoex
+{
+
+/**
+ * @brief Nodes with the variadic inputs
+ */
+template <typename Base> class VariadicArityNode : public Base
+{
+public:
+ VariadicArityNode(uint32_t arity)
+ {
+ for (uint32_t n = 0; n < arity; ++n)
+ {
+ _args.emplace_back(std::move(std::unique_ptr<loco::Use>{new loco::Use{this}}));
+ }
+ };
+
+ virtual ~VariadicArityNode() = default;
+
+public:
+ uint32_t arity(void) const final { return _args.size(); }
+
+ loco::Node *arg(uint32_t n) const final
+ {
+ assert(n < _args.size());
+ return _args.at(n)->node();
+ }
+
+ void drop(void) final
+ {
+ for (uint32_t n = 0; n < _args.size(); ++n)
+ {
+ _args.at(n)->node(nullptr);
+ }
+ }
+
+protected:
+ // This API allows inherited classes to access "_args" field.
+ loco::Use *at(uint32_t n) const
+ {
+ assert(n < _args.size());
+ return _args.at(n).get();
+ }
+
+private:
+ std::vector<std::unique_ptr<loco::Use>> _args;
+};
+
+} // namespace locoex
+
+#endif // __LOCOEX_VARIADICARITYNODES_OP_H__
diff --git a/compiler/locoex-customop/requires.cmake b/compiler/locoex-customop/requires.cmake
new file mode 100644
index 000000000..9127144f2
--- /dev/null
+++ b/compiler/locoex-customop/requires.cmake
@@ -0,0 +1,4 @@
+require("loco")
+require("stdex")
+require("locop")
+require("pepper-str")
diff --git a/compiler/locoex-customop/src/COpCall.cpp b/compiler/locoex-customop/src/COpCall.cpp
new file mode 100644
index 000000000..029914758
--- /dev/null
+++ b/compiler/locoex-customop/src/COpCall.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locoex/COpCall.h"
+
+#include "locoex/COpAttrTypes.h"
+
+namespace locoex
+{
+
+template <COpAttrType AT>
+const typename AttrTypeTrait<AT>::Type *COpCall::attr(const std::string &attr_name) const
+{
+ COpAttrData *attr_data;
+ auto found = _attrs.find(attr_name);
+ if (found != _attrs.end())
+ {
+ attr_data = found->second.get();
+ return dynamic_cast<const typename AttrTypeTrait<AT>::Type *>(attr_data);
+ }
+ else
+ throw std::runtime_error("Cannot find requested attr");
+}
+
+void COpCall::attr(const std::string &attr_name, std::unique_ptr<COpAttrData> &&attr_data)
+{
+ if (_attrs.find(attr_name) == _attrs.end())
+ _attrs[attr_name] = std::move(attr_data);
+ else
+ throw std::runtime_error("Attr already inserted");
+}
+
+std::vector<std::string> COpCall::attr_names() const
+{
+ std::vector<std::string> attr_names;
+
+ for (auto it = _attrs.cbegin(); it != _attrs.cend(); ++it)
+ {
+ attr_names.emplace_back(it->first);
+ }
+
+ return attr_names;
+}
+
+#define INSTANTIATE(AT) \
+ template const typename AttrTypeTrait<AT>::Type *COpCall::attr<AT>(const std::string &attr_name) \
+ const;
+
+INSTANTIATE(COpAttrType::Float)
+INSTANTIATE(COpAttrType::Int)
+
+#undef INSTANTIATE
+
+} // namespace locoex
diff --git a/compiler/locoex-customop/src/COpCall.test.cpp b/compiler/locoex-customop/src/COpCall.test.cpp
new file mode 100644
index 000000000..d5f01d22d
--- /dev/null
+++ b/compiler/locoex-customop/src/COpCall.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locoex/COpCall.h"
+#include "locoex/COpAttrTypes.h"
+
+#include <loco/IR/Graph.h>
+#include <loco/IR/Nodes.h>
+
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+TEST(CallTest, Test_01)
+{
+ using namespace locoex;
+
+ // attr name
+ std::string int_attr = "my_int";
+ std::string float_attr = "my_float";
+
+ int int_val = 100;
+ float float_val = 3.14;
+
+ // building loco test graph
+ auto g = loco::make_graph();
+
+ // generating input
+ auto inp = g->nodes()->create<loco::Pull>();
+ {
+ inp->dtype(loco::DataType::FLOAT32);
+ inp->shape({1, 2});
+ }
+
+ // generating custom op
+ auto custom = g->nodes()->create<COpCall>(2U);
+ {
+ custom->input(0, inp);
+ custom->input(1, inp);
+
+ custom->attr(int_attr, stdex::make_unique<COpAttrInt>(int_val));
+ custom->attr(float_attr, stdex::make_unique<COpAttrFloat>(float_val));
+ }
+
+ // access custom op input
+ loco::Node *input0 = custom->input(0);
+ loco::Node *input1 = custom->input(1);
+
+ ASSERT_EQ(custom->arity(), 2);
+ ASSERT_EQ(dynamic_cast<loco::Pull *>(input0), inp);
+ ASSERT_EQ(dynamic_cast<loco::Pull *>(input1), inp);
+
+ // access custom op attrs
+ auto names = custom->attr_names();
+
+ bool int_cheched = false, float_cheched = false;
+
+ for (const auto &name : names)
+ {
+ if (auto int_attr = custom->attr<COpAttrType::Int>(name))
+ {
+ ASSERT_EQ(int_attr->val(), int_val);
+ int_cheched = true;
+ }
+ else if (auto float_attr = custom->attr<COpAttrType::Float>(name))
+ {
+ ASSERT_FLOAT_EQ(float_attr->val(), float_val);
+ float_cheched = true;
+ }
+ else
+ {
+ FAIL();
+ }
+ }
+
+ ASSERT_TRUE(int_cheched && float_cheched);
+}
diff --git a/compiler/locoex-customop/src/COpDialect.cpp b/compiler/locoex-customop/src/COpDialect.cpp
new file mode 100644
index 000000000..46b7f8dd8
--- /dev/null
+++ b/compiler/locoex-customop/src/COpDialect.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locoex/COpDialect.h"
+
+namespace locoex
+{
+
+loco::Dialect *COpDialect::get(void)
+{
+ static COpDialect d;
+ return &d;
+}
+
+} // namespace locoex
diff --git a/compiler/locoex-customop/src/COpDialect.test.cpp b/compiler/locoex-customop/src/COpDialect.test.cpp
new file mode 100644
index 000000000..b00bf21a9
--- /dev/null
+++ b/compiler/locoex-customop/src/COpDialect.test.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locoex/COpDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(COpDialectTest, get)
+{
+ auto d = locoex::COpDialect::get();
+
+ // get() SHOULD return a valid(non-null) pointer
+ ASSERT_NE(d, nullptr);
+ // The return value SHOULD be stable across multiple invocations
+ ASSERT_EQ(d, locoex::COpDialect::get());
+}
diff --git a/compiler/locoex-customop/src/COpNode.cpp b/compiler/locoex-customop/src/COpNode.cpp
new file mode 100644
index 000000000..c489eedbc
--- /dev/null
+++ b/compiler/locoex-customop/src/COpNode.cpp
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locoex/COpNode.h"
+#include "locoex/COpDialect.h"
+
+namespace locoex
+{
+
+const loco::Dialect *COpNode::dialect(void) const { return COpDialect::get(); }
+
+} // namespace locoex
diff --git a/compiler/locoex-customop/src/Service/COpFormattedGraph.cpp b/compiler/locoex-customop/src/Service/COpFormattedGraph.cpp
new file mode 100644
index 000000000..916663ec0
--- /dev/null
+++ b/compiler/locoex-customop/src/Service/COpFormattedGraph.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locoex/Service/COpFormattedGraph.h"
+
+#include <locoex/COpCall.h>
+#include <locoex/COpAttrTypes.h>
+#include <locoex/COpDialect.h>
+
+#include <pepper/str.h>
+
+#include <sstream>
+#include <stdexcept>
+
+namespace locoex
+{
+
+bool COpNodeSummaryBuilder::build(const loco::Node *node, locop::NodeSummary &s) const
+{
+ if (node->dialect() != locoex::COpDialect::get())
+ return false;
+
+ if (auto call_node = dynamic_cast<const locoex::COpCall *>(node))
+ {
+ return summary(call_node, s);
+ }
+
+ return false;
+}
+
+bool COpNodeSummaryBuilder::summary(const locoex::COpCall *node, locop::NodeSummary &s) const
+{
+ assert(node != nullptr);
+
+ s.opname("COp.Call");
+ for (uint32_t i = 0; i < node->arity(); i++)
+ s.args().append(pepper::str("input_", i), _tbl->lookup(node->arg(i)));
+
+ for (auto name : node->attr_names())
+ {
+ if (auto int_attr = node->attr<locoex::COpAttrType::Int>(name))
+ s.args().append(name, pepper::str(int_attr->val()));
+ else if (auto float_attr = node->attr<locoex::COpAttrType::Float>(name))
+ s.args().append(name, pepper::str(float_attr->val()));
+ else
+ throw std::runtime_error("Not yet supported Attr Type");
+ }
+
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+} // namespace locoex
diff --git a/compiler/locoex-customop/src/Service/COpShapeInferenceRule.cpp b/compiler/locoex-customop/src/Service/COpShapeInferenceRule.cpp
new file mode 100644
index 000000000..4dc8f461f
--- /dev/null
+++ b/compiler/locoex-customop/src/Service/COpShapeInferenceRule.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locoex/Service/COpShapeInferenceRule.h"
+
+#include "locoex/COpDialect.h"
+#include "locoex/COpNode.h"
+#include "locoex/COpCall.h"
+
+#include <loco/Service/ShapeInference.h>
+
+#include <cassert>
+
+namespace locoex
+{
+
+bool COpShapeInferenceRule::recognize(const loco::Dialect *d) const
+{
+ return COpDialect::get() == d;
+}
+
+bool COpShapeInferenceRule::infer(const loco::Node *node, loco::NodeShape &shape) const
+{
+ assert(node->dialect() == COpDialect::get());
+ assert(dynamic_cast<const COpNode *>(node) != nullptr);
+
+ auto cop_call = dynamic_cast<const COpCall *>(node);
+
+ // Note that the shape of custom op is considered as TensorShape
+ // TODO Decide how to deal with this shape error cases
+ for (uint32_t n = 0; n < cop_call->arity(); n++)
+ if (loco::shape_get(cop_call->input(n)).domain() != loco::Domain::Tensor)
+ throw std::runtime_error("Input of custom op must belong to Tensor domain.");
+
+ loco::TensorShape out_shape;
+
+ out_shape.rank(cop_call->rank());
+ for (uint32_t d = 0; d < cop_call->rank(); d++)
+ out_shape.dim(d) = cop_call->dim(d);
+
+ shape.set(out_shape);
+
+ return true;
+}
+
+} // namespace locoex
diff --git a/compiler/locoex-customop/src/Service/COpShapeInferenceRule.test.cpp b/compiler/locoex-customop/src/Service/COpShapeInferenceRule.test.cpp
new file mode 100644
index 000000000..c86931ba7
--- /dev/null
+++ b/compiler/locoex-customop/src/Service/COpShapeInferenceRule.test.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locoex/Service/COpShapeInferenceRule.h"
+#include "locoex/COpCall.h"
+#include <loco/Service/ShapeInference.h>
+
+#include <gtest/gtest.h>
+
+TEST(COpShapeInferenceRuleTest, minimal)
+{
+ // Create a simple network
+ auto g = loco::make_graph();
+
+ auto call_node = g->nodes()->create<locoex::COpCall>(0);
+ call_node->shape({1, 3});
+
+ auto push_node = g->nodes()->create<loco::Push>();
+ push_node->from(call_node);
+
+ auto graph_output = g->outputs()->create();
+ graph_output->name("output");
+ loco::link(graph_output, push_node);
+
+ // pre-check
+ ASSERT_FALSE(loco::shape_known(call_node));
+
+ // Run Shape Inference
+ locoex::COpShapeInferenceRule rule;
+
+ loco::apply(&rule).to(g.get());
+
+ // Verify!
+ ASSERT_TRUE(loco::shape_known(call_node));
+ ASSERT_EQ(loco::shape_get(call_node).domain(), loco::Domain::Tensor);
+
+ auto shape = loco::shape_get(call_node).as<loco::TensorShape>();
+ ASSERT_EQ(shape.rank(), 2);
+ ASSERT_EQ(shape.dim(0), 1);
+ ASSERT_EQ(shape.dim(1), 3);
+}
diff --git a/compiler/locoex-customop/src/Service/COpTypeInference.cpp b/compiler/locoex-customop/src/Service/COpTypeInference.cpp
new file mode 100644
index 000000000..b41454eb2
--- /dev/null
+++ b/compiler/locoex-customop/src/Service/COpTypeInference.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locoex/Service/COpTypeInference.h"
+
+#include "locoex/COpDialect.h"
+#include "locoex/COpCall.h"
+
+#include <cassert>
+
+namespace locoex
+{
+
+bool COpTypeInferenceRule::recognize(const loco::Dialect *d) const
+{
+ // This rule recognizes only "COpDialect" dialect!
+ return COpDialect::get() == d;
+}
+
+bool COpTypeInferenceRule::infer(const loco::Node *node, loco::DataType &dtype) const
+{
+ assert(node->dialect() == COpDialect::get());
+
+ auto customop = dynamic_cast<const COpCall *>(node);
+
+ assert(customop != nullptr);
+ assert(customop->dtype() != loco::DataType::Unknown);
+
+ dtype = customop->dtype();
+
+ return true;
+}
+
+} // namespace locoex
diff --git a/compiler/locoex-customop/src/Service/COpTypeInference.test.cpp b/compiler/locoex-customop/src/Service/COpTypeInference.test.cpp
new file mode 100644
index 000000000..97ddd8618
--- /dev/null
+++ b/compiler/locoex-customop/src/Service/COpTypeInference.test.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <locoex/Service/COpTypeInference.h>
+#include <locoex/COpCall.h>
+#include <locoex/COpDialect.h>
+
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/Service/TypeInference.h>
+
+#include <gtest/gtest.h>
+
+TEST(TypeInferenceRuleTest, COpTypeInference)
+{
+ // Create a simple Relu6 network
+ auto g = loco::make_graph();
+
+ auto pull_node = g->nodes()->create<loco::Pull>();
+ pull_node->dtype(loco::DataType::FLOAT32);
+
+ auto call_node = g->nodes()->create<locoex::COpCall>(1);
+ call_node->input(0, pull_node);
+ call_node->dtype(loco::DataType::FLOAT32);
+
+ auto push_node = g->nodes()->create<loco::Push>();
+ push_node->from(call_node);
+
+ auto graph_input = g->inputs()->create();
+
+ graph_input->name("input");
+ loco::link(graph_input, pull_node);
+
+ auto graph_output = g->outputs()->create();
+
+ graph_output->name("output");
+ loco::link(graph_output, push_node);
+
+ // Run Type Inference
+ locoex::COpTypeInferenceRule cop_rule;
+ loco::CanonicalTypeInferenceRule canon_rule;
+ loco::MultiDialectTypeInferenceRule rules;
+
+ rules.bind(locoex::COpDialect::get(), &cop_rule).bind(loco::CanonicalDialect::get(), &canon_rule);
+
+ loco::apply(&rules).to(g.get());
+
+ // Verify!
+ ASSERT_TRUE(loco::dtype_known(call_node));
+ ASSERT_EQ(loco::dtype_get(call_node), loco::DataType::FLOAT32);
+}
diff --git a/compiler/locoex-customop/src/VariadicArityNode.test.cpp b/compiler/locoex-customop/src/VariadicArityNode.test.cpp
new file mode 100644
index 000000000..a618824e5
--- /dev/null
+++ b/compiler/locoex-customop/src/VariadicArityNode.test.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locoex/VariadicArityNode.h"
+
+#include <loco/IR/Nodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using namespace locoex;
+
+class TestNode : public VariadicArityNode<loco::Node>
+{
+public:
+ TestNode(uint32_t arity) : VariadicArityNode<loco::Node>(arity) {}
+
+ void input(uint32_t idx, loco::Node *node) { at(idx)->node(node); }
+ loco::Node *input(uint32_t idx) const { return at(idx)->node(); }
+
+ const loco::Dialect *dialect(void) const { return nullptr; } // this won't be called for testing
+ uint32_t opnum(void) const { return -1; } // this won't be called for testing
+};
+
+class ZeroInputNode : public TestNode
+{
+public:
+ ZeroInputNode() : TestNode(0) {}
+};
+
+class BinaryInputNode : public TestNode
+{
+public:
+ BinaryInputNode() : TestNode(2) {}
+};
+}
+
+TEST(CustomOpTest, VariadicArityNode_arity_0)
+{
+ loco::Pull pull;
+
+ ZeroInputNode z_node;
+
+ ASSERT_EQ(z_node.arity(), 0);
+}
+
+TEST(CustomOpTest, VariadicArityNode_arity_2)
+{
+ loco::Pull pull_00, pull_01;
+
+ BinaryInputNode b_node;
+ b_node.input(0, &pull_00);
+ b_node.input(1, &pull_01);
+
+ ASSERT_EQ(b_node.arity(), 2);
+ ASSERT_EQ(b_node.input(0), &pull_00);
+ ASSERT_EQ(b_node.input(1), &pull_01);
+}
diff --git a/compiler/locomotiv/CMakeLists.txt b/compiler/locomotiv/CMakeLists.txt
new file mode 100644
index 000000000..5c0156b78
--- /dev/null
+++ b/compiler/locomotiv/CMakeLists.txt
@@ -0,0 +1,29 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(locomotiv STATIC ${SOURCES})
+set_target_properties(locomotiv PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(locomotiv PUBLIC include)
+target_include_directories(locomotiv PRIVATE src)
+target_link_libraries(locomotiv PUBLIC loco)
+target_link_libraries(locomotiv PUBLIC angkor)
+target_link_libraries(locomotiv PRIVATE stdex)
+# Let's apply nncc common compile options
+#
+# NOTE This will enable strict compilation (warnings as error).
+# Please refer to the top-level CMakeLists.txt for details
+target_link_libraries(locomotiv PRIVATE nncc_common)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Google Test is mandatory for internal testing
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(locomotiv_test ${TESTS})
+target_include_directories(locomotiv_test PRIVATE src)
+target_link_libraries(locomotiv_test locomotiv)
+
+add_test(locomotiv_test locomotiv_test)
diff --git a/compiler/locomotiv/README.md b/compiler/locomotiv/README.md
new file mode 100644
index 000000000..9569f6ea3
--- /dev/null
+++ b/compiler/locomotiv/README.md
@@ -0,0 +1,90 @@
+# locomotiv
+_locomotiv_ is a reference interpreter for _loco_ IR.
+
+# Purpose
+- _locomotiv_ would serve as code level specification and reference implementation for loco IR.
+- _locomotiv_ is required for loco-related tools to be tested.
+
+# Sample code to use locomotiv library
+This sample code shows how to use locomotiv. Please refer to `src/Session.test.cpp` as well for actual usage.
+```cpp
+template <typename T> using Buffer = nncc::core::ADT::tensor::Buffer<T>
+
+loco::Graph *graph;
+// ... building graph ...
+
+// Open interpreter session
+locomotiv::Session sess(graph);
+
+for (uint32_t i = 0; i < s.input_size(); ++i)
+{
+ Buffer<type> buffer;
+ // ... building buffer ...
+
+ locomotiv::NodeData input_data = locomotiv::make_data(buffer);
+
+ sess.set_input(i, input_data);
+}
+
+// Run inference
+sess.infer();
+
+// Query inferred output
+locomotiv::NodeData *output_data = sess.get_output(query_index);
+
+// Get buffer according to data type
+switch(output_data->dtype())
+{
+case loco::DataType::S32:
+{
+ Buffer<int32_t> output_buffer = output_data->as_s32_bufptr();
+ // Do something
+ break;
+}
+case loco::DataType::FLOAT32:
+{
+ Buffer<float> output_buffer = output_data->as_f32_bufptr();
+ // Do something
+ break;
+}
+// ...
+}
+```
+
+# How to support new loco node execution: recommended guide
+
+## Steps to support new loco node
+1. First of all, understand semantics of the node to newly support, especially on calculation spec and valid use cases.
+2. Add the node to `locomotiv/src/Node.lst`. Please keep alphabetical order. This automatically declares `NodeExecution::execute(TheNode *)` and updates `NodeExecution::run()` to deal with the node.
+3. Define `execute(loco::TheNode *)` at `locomotiv/src/Node/TheNode.cpp`.
+4. Test new node execution at `locomotiv/src/Node/TheNode.test.cpp` if possible.
+
+### Note on internal data layout rule
+For each domain(see `loco::Domain`), `locomotiv` has fixed layout rule on how to store its data in memory.
+- Feature is represented as NHWC layout
+ - That is number of batch(N), height(H), width(W) and channel depth(C)
+- Filter is represented as NHWC layout
+ - That is number of filter(N), height(H), width(W) and input channel depth(C)
+- DepthwiseFilter is represented as HWCM layout
+ - That is height(H), width(W), input channel depth(C) and depth multiplier(M)
+- Matrix is represented as HW layout
+ - That is height(H), width(W)
+
+### Notes on step 3
+- Mocking Tensorflow lite `reference_op.h` might be a good place to start.
+- `execute()` can be called multiple time. It just recalculates and updates annotated data. So it should `erase_annot_data()` before newly `annot_data()`.
+- Most node execution behaviour would be implemented for each data type.
+- `execute()` should throw runtime error on invalid cases. Some of these cases are explained:
+ - Invalid argument node
+ - e.g.) Pull -> MaxPool2D is invalid as MaxPool2D requires feature map as its argument.
+ - Lack of argument data
+ - e.g.) Given 'Pull -> Push' graph. On execution of Push, if no NodeData annotated to Pull, it is invalid.
+ - Mismatch of argument shapes
+ - e.g.) Addition between 2x2 and 3x3 tensor is invalid
+ - e.g.) MaxPool2D expects its ifm to be 4D feature, otherwise invalid.
+ - Mismatch between node's own information and inferred information
+ - Some node already have attributes like shape or data type. If inferred information is different with existing node's, it is invalid.
+
+### Recommendation on step 4 (test)
+- If the node has no arguments, create a node object and `NodeExecution::run()` on it. Check whether it operates correctly.
+- If the node has N(>= 1) arguments, make N pull node inputs, source them to the node to be tested. FeatureEncode or FilterEncode node may be required inbetween depending on the node's argument type. Then annotate N pull nodes with its data, `NodeExecution::run()` on the node to test, and check whether it operates correctly.
diff --git a/compiler/locomotiv/include/locomotiv/NodeData.h b/compiler/locomotiv/include/locomotiv/NodeData.h
new file mode 100644
index 000000000..c9960db46
--- /dev/null
+++ b/compiler/locomotiv/include/locomotiv/NodeData.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _LOCOMOTIV_NODEDATA_H_
+#define _LOCOMOTIV_NODEDATA_H_
+
+#include <loco.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Shape.h>
+
+#include <memory>
+
+namespace locomotiv
+{
+
+/**
+ * @brief Read-only no-template wrapper for 'Buffer'. Serves interface for input
+ * and output of 'Session'.
+ *
+ * @note Once NodeData is created, it is not modifiable.
+ */
+struct NodeData
+{
+ template <typename T> using Buffer = nncc::core::ADT::tensor::Buffer<T>;
+ using Shape = nncc::core::ADT::tensor::Shape;
+
+ virtual ~NodeData() = default;
+
+ virtual const loco::DataType &dtype() const = 0;
+
+ virtual const Shape *shape() const = 0;
+
+ // TODO Support more data types
+ virtual const Buffer<int32_t> *as_s32_bufptr() const = 0;
+ virtual const Buffer<float> *as_f32_bufptr() const = 0;
+};
+
+/**
+ * @brief Copy buffer to make NodeData
+ *
+ * @note NodeData is read-only. You may prepare buffer with ALL data, then call
+ * this function to make data.
+ */
+template <typename DT> std::unique_ptr<NodeData> make_data(const NodeData::Buffer<DT> &buffer);
+
+} // namespace locomotiv
+
+#endif // _LOCOMOTIV_NODEDATA_H_
diff --git a/compiler/locomotiv/include/locomotiv/Session.h b/compiler/locomotiv/include/locomotiv/Session.h
new file mode 100644
index 000000000..3268d60b3
--- /dev/null
+++ b/compiler/locomotiv/include/locomotiv/Session.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _LOCOMOTIV_SESSION_H_
+#define _LOCOMOTIV_SESSION_H_
+
+#include "locomotiv/NodeData.h"
+
+#include <loco.h>
+
+#include <memory>
+#include <vector>
+
+namespace locomotiv
+{
+
+/**
+ * @brief Session for loco graph inference
+ */
+class Session final
+{
+public:
+ Session() = delete;
+
+ /// @brief Make Session for graph with graph outputs themselves
+ Session(loco::Graph *g) : Session(g, loco::output_nodes(g))
+ {
+ // DO NOTHING
+ }
+
+ /**
+ * @brief Make Session for graph with selective custom outputs. Only
+ * subgraph to calculate given outputs would be executed.
+ *
+ * @note Set required inputs for given outputs, or inference may fail.
+ * @note custom_outputs don't need to be graph output, but can be any nodes
+ * in the middle of the graph.
+ * @warn This approach may fail in case of graph with control flow
+ */
+ Session(loco::Graph *g, const std::vector<loco::Node *> &custom_outputs)
+ : _graph(g), _outputs(custom_outputs)
+ {
+ // DO NOTHING
+ }
+
+ /// @brief Make Session by range
+ template <typename InputIt>
+ Session(loco::Graph *g, InputIt begin, InputIt end) : _graph(g), _outputs(begin, end)
+ {
+ // DO NOTHING
+ }
+
+ /// @brief Free all node annotations of the graph assigned by this Session
+ ~Session();
+
+ /// @brief Get number of graph inputs held by this Session
+ uint32_t input_size() const { return _graph->inputs()->size(); }
+
+ /**
+ * @brief Set graph input at specific index by NodeData.
+ *
+ * @throw runtime_error In case when another NodeData already annotated for the
+ * input, and when given data type or shape are not
+ * congruent with loco node information.
+ */
+ void set_input(uint32_t index, std::unique_ptr<NodeData> &&data);
+
+ /**
+ * @brief Do inference for this session and graph
+ *
+ * @note Multiple run is possible. Abort program when inputs are not fully set
+ * or invalid calculation found in the middle.
+ */
+ void infer();
+
+ /// @brief Get number of graph outputs held by this Session
+ uint32_t output_size() const { return _outputs.size(); }
+
+ /**
+ * @brief Get output of graph as NodeData
+ *
+ * @note May return nullptr, for example, when graph output not yet calculated
+ */
+ const NodeData *get_output(uint32_t index);
+
+ const loco::Node *get_output_node(uint32_t index) { return _outputs.at(index); }
+
+private:
+ loco::Graph *_graph;
+ std::vector<loco::Node *> _outputs;
+};
+
+} // namespace locomotiv
+
+#endif // _LOCOMOTIV_SESSION_H_
diff --git a/compiler/locomotiv/requires.cmake b/compiler/locomotiv/requires.cmake
new file mode 100644
index 000000000..1c09aa13d
--- /dev/null
+++ b/compiler/locomotiv/requires.cmake
@@ -0,0 +1,2 @@
+require("angkor")
+require("stdex")
diff --git a/compiler/locomotiv/src/Node.lst b/compiler/locomotiv/src/Node.lst
new file mode 100644
index 000000000..be3b10520
--- /dev/null
+++ b/compiler/locomotiv/src/Node.lst
@@ -0,0 +1,40 @@
+#ifndef NODE
+#error Define NODE first
+#endif // NODE
+
+// NODE(Name) : alphabetic order please
+
+NODE(AvgPool2D)
+NODE(BiasAdd<loco::Domain::Feature>)
+NODE(BiasAdd<loco::Domain::Tensor>)
+NODE(BiasEncode)
+NODE(ConstGen)
+NODE(Conv2D)
+NODE(DepthwiseConv2D)
+NODE(DepthwiseFilterEncode)
+NODE(EltwiseAdd)
+NODE(EltwiseDiv)
+NODE(EltwiseMax)
+NODE(EltwiseMul)
+NODE(EltwiseSqrt)
+NODE(EltwiseSub)
+NODE(FeatureDecode)
+NODE(FeatureEncode)
+NODE(FilterEncode)
+NODE(Forward)
+NODE(MatrixDecode)
+NODE(MatrixEncode)
+NODE(MatMul)
+NODE(MaxPool2D)
+NODE(Pull)
+NODE(Push)
+NODE(ReLU)
+NODE(ReLU6)
+NODE(Reshape<loco::ReshapeType::Fixed>)
+NODE(Tanh)
+NODE(TensorBroadcast)
+NODE(TensorConcat)
+NODE(TensorConstantPad)
+NODE(TensorReduce)
+NODE(TensorSoftmax)
+NODE(TransposedConv2D)
diff --git a/compiler/locomotiv/src/Node/AvgPool2D.cpp b/compiler/locomotiv/src/Node/AvgPool2D.cpp
new file mode 100644
index 000000000..ad603badf
--- /dev/null
+++ b/compiler/locomotiv/src/Node/AvgPool2D.cpp
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+/**
+ * @brief Compute 1D output size based on given 1D arguments.
+ *
+ * @param whole_pad Sum of front and back pad
+ */
+inline uint32_t compute_out_size(uint32_t image_size, uint32_t whole_pad, uint32_t filter_size,
+ uint32_t stride)
+{
+ assert((image_size + whole_pad - filter_size) % stride == 0);
+ return (image_size + whole_pad - filter_size) / stride + 1;
+}
+
+template <typename T>
+nncc::core::ADT::tensor::Buffer<T> avgPool2D(const loco::AvgPool2D *avgpool2d,
+ const Buffer<T> *ifm_buf)
+{
+ assert(avgpool2d->convention() == loco::AvgPool2D::Convention::Valid ||
+ avgpool2d->convention() == loco::AvgPool2D::Convention::Full);
+
+ auto ifm_shape = ifm_buf->shape();
+
+ const uint32_t batches = ifm_shape.dim(0);
+ const uint32_t depth = ifm_shape.dim(3);
+
+ const uint32_t ifm_height = ifm_shape.dim(1);
+ const uint32_t ifm_width = ifm_shape.dim(2);
+
+ const uint32_t window_height = avgpool2d->window()->vertical();
+ const uint32_t window_width = avgpool2d->window()->horizontal();
+
+ const uint32_t stride_height = avgpool2d->stride()->vertical();
+ const uint32_t stride_width = avgpool2d->stride()->horizontal();
+
+ const uint32_t pad_top = avgpool2d->pad()->top();
+ const uint32_t pad_bottom = avgpool2d->pad()->bottom();
+
+ const uint32_t pad_left = avgpool2d->pad()->left();
+ const uint32_t pad_right = avgpool2d->pad()->right();
+
+ const uint32_t output_height =
+ compute_out_size(ifm_height, pad_top + pad_bottom, window_height, stride_height);
+ const uint32_t output_width =
+ compute_out_size(ifm_width, pad_left + pad_right, window_width, stride_width);
+
+ // prepare output buffer
+ Shape output_shape{batches, output_height, output_width, depth};
+ auto output_buf = make_buffer<T, LexicalLayout>(output_shape);
+
+ for (uint32_t batch = 0; batch < batches; ++batch)
+ {
+ for (uint32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (uint32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (uint32_t channel = 0; channel < depth; ++channel)
+ {
+ const int in_x_origin = (out_x * stride_width) - pad_left;
+ const int in_y_origin = (out_y * stride_height) - pad_top;
+
+ uint32_t f_x0, f_x1, f_y0, f_y1;
+ if (avgpool2d->convention() == loco::AvgPool2D::Convention::Valid)
+ {
+ f_x0 = std::max(0, -in_x_origin);
+ f_x1 = std::min(window_width, ifm_width - in_x_origin);
+ f_y0 = std::max(0, -in_y_origin);
+ f_y1 = std::min(window_height, ifm_height - in_y_origin);
+ }
+ else
+ {
+ throw std::runtime_error("TODO support AvgPool2D::Convention::Full");
+ }
+ const uint32_t filter_x_start = f_x0;
+ const uint32_t filter_x_end = f_x1;
+
+ const uint32_t filter_y_start = f_y0;
+ const uint32_t filter_y_end = f_y1;
+
+ T total = 0;
+ uint32_t filter_ele_count = 0;
+
+ for (uint32_t filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+ {
+ for (uint32_t filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
+ {
+ const uint32_t in_x = in_x_origin + filter_x;
+ const uint32_t in_y = in_y_origin + filter_y;
+ total += ifm_buf->at(Index({batch, in_y, in_x, channel}));
+ filter_ele_count++;
+ }
+ }
+
+ assert(filter_ele_count > 0);
+ output_buf.at(Index({batch, out_y, out_x, channel})) = total / filter_ele_count;
+ }
+ }
+ }
+ }
+
+ return output_buf;
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::AvgPool2D *avgpool2d)
+{
+ auto ifm_data = annot_data(avgpool2d->ifm());
+
+ validate(ifm_data, "Can't find input data of AvgPool2D");
+ validate(ifm_data->shape()->rank() == 4, "IFM rank should be 4");
+ validate(annot_domain(avgpool2d->ifm()) == loco::Domain::Feature,
+ "ifm of AvgPool2D is not Feature");
+
+ std::unique_ptr<NodeData> avgpool2d_data = nullptr;
+
+ switch (ifm_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto ifm_buf = ifm_data->as_f32_bufptr();
+
+ auto avgpool2d_buf = avgPool2D<float>(avgpool2d, ifm_buf);
+
+ avgpool2d_data = make_data(avgpool2d_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(avgpool2d_data != nullptr);
+
+ annot_data(avgpool2d, std::move(avgpool2d_data));
+ annot_domain(avgpool2d, loco::Domain::Feature);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/AvgPool2D.test.cpp b/compiler/locomotiv/src/Node/AvgPool2D.test.cpp
new file mode 100644
index 000000000..89e10a35e
--- /dev/null
+++ b/compiler/locomotiv/src/Node/AvgPool2D.test.cpp
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include "nncc/core/ADT/tensor/IndexEnumerator.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::make_overlay;
+
+void run_test(const float *ifm, const float *expected_ofm, const Shape &ifm_shape,
+ const Shape &ofm_shape, const uint32_t window_v, const uint32_t window_h,
+ const uint32_t stride_v, const uint32_t stride_h, const uint32_t pad_top,
+ const uint32_t pad_bottom, const uint32_t pad_left, const uint32_t pad_right)
+{
+ // Let's make FeatureEncode-AvgPool2D graph
+ auto g = loco::make_graph();
+ auto enc = g->nodes()->create<loco::FeatureEncode>();
+
+ // Fill output data of FeatureEncode from ifm
+ auto enc_buf = make_buffer<float, LexicalLayout>(ifm_shape);
+
+ auto ifm_overlay = make_overlay<float, LexicalLayout>(ifm_shape, const_cast<float *>(ifm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ifm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ enc_buf.at(ind) = ifm_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(enc_buf);
+ locomotiv::annot_data(enc, std::move(enc_data));
+ locomotiv::annot_domain(enc, loco::Domain::Feature);
+
+ // build TF AvgPool2D
+ auto avgpool2d = g->nodes()->create<loco::AvgPool2D>();
+ avgpool2d->ifm(enc);
+ avgpool2d->convention(loco::AvgPool2D::Convention::Valid);
+ avgpool2d->window()->vertical(window_v);
+ avgpool2d->window()->horizontal(window_h);
+ avgpool2d->stride()->vertical(stride_v);
+ avgpool2d->stride()->horizontal(stride_h);
+ avgpool2d->pad()->top(pad_top);
+ avgpool2d->pad()->bottom(pad_bottom);
+ avgpool2d->pad()->left(pad_left);
+ avgpool2d->pad()->right(pad_right);
+
+ // run interpreter
+ locomotiv::NodeExecution::get().run(avgpool2d);
+
+ // get result of calculation
+ auto avgpool2d_data = locomotiv::annot_data(avgpool2d);
+
+ // check the result
+ ASSERT_NE(avgpool2d_data, nullptr);
+ ASSERT_TRUE(avgpool2d_data->dtype() == loco::DataType::FLOAT32);
+ ASSERT_TRUE(*(avgpool2d_data->shape()) == ofm_shape);
+
+ auto ofm_overlay =
+ make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ASSERT_FLOAT_EQ(avgpool2d_data->as_f32_bufptr()->at(ind), ofm_overlay.at(ind));
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(avgpool2d), loco::Domain::Feature);
+}
+
+} // namespace
+
+// clang-format off
+/* ifm and ofm are from the code below:
+import tensorflow as tf
+
+value = tf.constant([[[[-0.281157], [-1.0601869], [-0.622261], [-1.1777412]],
+ [[1.4411974], [0.01408334], [0.06958964], [-0.08663343]],
+ [[1.3424183], [-0.89015573], [0.2520576], [0.04843695]],
+ [[-1.6668711], [-0.02187406], [1.9362065], [1.3341236]]]])
+avgpool = tf.nn.avg_pool(value, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding= 'VALID',
+ data_format="NHWC")
+with tf.Session() as sess:
+ print(sess.run(avgpool))
+*/
+TEST(NodeExecution_AvgPool2D, f32_1x4x4x1_calculation)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ const float ifm[] =
+ {
+ -0.281157, -1.0601869, -0.622261, -1.1777412,
+ 1.4411974, 0.01408334, 0.06958964, -0.08663343,
+ 1.3424183, -0.89015573, 0.2520576, 0.04843695,
+ -1.6668711, -0.02187406, 1.9362065, 1.3341236
+ };
+
+ const float ofm[] =
+ {
+ 0.02848421, -0.45426148,
+ -0.30912063, 0.89270616
+ };
+
+ run_test(ifm, ofm,
+ Shape{1, 4, 4, 1}, Shape{1, 2, 2, 1}, // input shape , output shape
+ 2, 2, // kernel
+ 2, 2, // stride
+ 0, 0, 0, 0 // padding
+ );
+}
+// clang-format on
+
+// clang-format off
+/* ifm and ofm are from the code below:
+import tensorflow as tf
+
+value = tf.constant([[[[-0.281157], [-1.0601869], [-0.622261]],
+ [[1.4411974], [0.01408334], [0.06958964]],
+ [[1.3424183], [-0.89015573], [0.2520576]]]])
+avgpool = tf.nn.avg_pool(value, ksize = [1, 2, 2, 1], strides = [1, 1, 1, 1], padding= 'SAME',
+ data_format="NHWC")
+with tf.Session() as sess:
+ print(sess.run(avgpool))
+*/
+TEST(NodeExecution_AvgPool2D, f32_1x3x3x1_calculation)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ const float ifm[] =
+ {
+ -0.281157, -1.0601869, -0.622261,
+ 1.4411974, 0.01408334, 0.06958964,
+ 1.3424183, -0.89015573, 0.2520576
+ };
+
+ const float ofm[] =
+ {
+ 0.02848421, -0.39969373, -0.2763357,
+ 0.4768858, -0.13860628, 0.16082363,
+ 0.22613129, -0.31904906, 0.2520576
+ };
+
+ run_test(ifm, ofm,
+ Shape{1, 3, 3, 1}, Shape{1, 3, 3, 1}, // input shape , output shape
+ 2, 2, // kernel
+ 1, 1, // stride
+ 0, 1, 0, 1 // padding
+ );
+}
+// clang-format on
diff --git a/compiler/locomotiv/src/Node/BiasAdd.cpp b/compiler/locomotiv/src/Node/BiasAdd.cpp
new file mode 100644
index 000000000..0724fb728
--- /dev/null
+++ b/compiler/locomotiv/src/Node/BiasAdd.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+using locomotiv::NodeData;
+
+std::unique_ptr<NodeData> calc(const NodeData *input_data, const NodeData *bias_data,
+ uint32_t axis);
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::BiasAdd<loco::Domain::Tensor> *bias_add)
+{
+ auto input_data = locomotiv::annot_data(bias_add->value());
+ auto bias_data = locomotiv::annot_data(bias_add->bias());
+
+ validate(input_data && bias_data, "Input not ready");
+ validate(locomotiv::annot_domain(bias_add->value()) == loco::Domain::Tensor &&
+ locomotiv::annot_domain(bias_add->bias()) == loco::Domain::Bias,
+ "Wrong input domain");
+
+ std::unique_ptr<NodeData> bias_add_data = calc(input_data, bias_data, bias_add->axis());
+
+ assert(bias_add_data != nullptr);
+ annot_data(bias_add, std::move(bias_add_data));
+ annot_domain(bias_add, annot_domain(bias_add->value()));
+}
+
+void NodeExecution::execute(loco::BiasAdd<loco::Domain::Feature> *bias_add)
+{
+ auto input_data = locomotiv::annot_data(bias_add->value());
+ auto bias_data = locomotiv::annot_data(bias_add->bias());
+
+ validate(input_data && bias_data, "Input not ready");
+ validate(locomotiv::annot_domain(bias_add->value()) == loco::Domain::Feature &&
+ locomotiv::annot_domain(bias_add->bias()) == loco::Domain::Bias,
+ "Wrong input domain");
+
+ std::unique_ptr<NodeData> bias_add_data = calc(input_data, bias_data, 3);
+
+ assert(bias_add_data != nullptr);
+ annot_data(bias_add, std::move(bias_add_data));
+ annot_domain(bias_add, loco::Domain::Feature);
+}
+
+} // namespace locomotiv
+
+namespace
+{
+using locomotiv::NodeData;
+using locomotiv::validate;
+using locomotiv::make_data;
+
+std::unique_ptr<NodeData> calc(const NodeData *input_data, const NodeData *bias_data, uint32_t axis)
+{
+ validate(input_data->shape()->dim(axis) == bias_data->shape()->dim(0), "Bias size mismatch");
+
+ std::unique_ptr<NodeData> bias_add_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto input_bufptr = input_data->as_f32_bufptr();
+ auto bias_bufptr = bias_data->as_f32_bufptr();
+ auto bias_add_buf = make_buffer<float, LexicalLayout>(*input_data->shape());
+
+ auto *shape = input_data->shape();
+
+ for (IndexEnumerator e{*shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ nncc::core::ADT::tensor::Index bias_index({index.at(axis)});
+ bias_add_buf.at(index) = input_bufptr->at(index) + bias_bufptr->at(bias_index);
+ }
+
+ bias_add_data = make_data(bias_add_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+ return bias_add_data;
+}
+
+} // namespace
diff --git a/compiler/locomotiv/src/Node/BiasAdd.test.cpp b/compiler/locomotiv/src/Node/BiasAdd.test.cpp
new file mode 100644
index 000000000..0ca826673
--- /dev/null
+++ b/compiler/locomotiv/src/Node/BiasAdd.test.cpp
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+/*
+test case generated from the following:
+
+ inp = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+ bias = tf.constant([1.1, 2.1], shape=[2], dtype=tf.float32)
+ out = tf.nn.bias_add(inp, bias)
+
+ with tf.Session() as sess:
+ print(sess.run(out))
+ */
+
+TEST(NodeExecution_TensorBiasAdd, f32)
+{
+ float in_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ float bias_val[] = {1.1, 2.1};
+ float out_val[] = {2.1, 4.1, 4.1, 6.1, 6.1, 8.1, 8.1, 10.1, 10.1,
+ 12.1, 12.1, 14.1, 14.1, 16.1, 16.1, 18.1, 18.1, 20.1};
+
+ // make BiasAdd(Pull, Const)
+ auto g = loco::make_graph();
+ Shape input_shape{1, 3, 3, 2}; // NHWC
+
+ auto inp = g->nodes()->create<loco::Pull>();
+ {
+ inp->dtype(loco::DataType::FLOAT32);
+ inp->shape({1, 3, 3, 2});
+ }
+
+ auto bias = g->nodes()->create<loco::BiasEncode>();
+ {
+ // nothing to do
+ }
+
+ auto bias_add = g->nodes()->create<loco::BiasAdd<loco::Domain::Tensor>>();
+ {
+ bias_add->value(inp);
+ bias_add->bias(bias);
+ bias_add->axis(3); // axis(3) means C in NHWC
+ }
+
+ // Make and assign data to pull node
+ auto inp_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_buf.at(e.current()) = in_val[n++];
+ }
+ }
+
+ auto bias_buf = make_buffer<float, LexicalLayout>(Shape{2});
+ {
+ int n = 0;
+ for (IndexEnumerator e{bias_buf.shape()}; e.valid(); e.advance())
+ {
+ bias_buf.at(e.current()) = bias_val[n++];
+ }
+ }
+
+ auto inp_data = locomotiv::make_data(inp_buf);
+ locomotiv::annot_data(inp, std::move(inp_data));
+ locomotiv::annot_domain(inp, loco::Domain::Tensor);
+
+ auto bias_data = locomotiv::make_data(bias_buf);
+ locomotiv::annot_data(bias, std::move(bias_data));
+ locomotiv::annot_domain(bias, loco::Domain::Bias);
+
+ locomotiv::NodeExecution::get().run(bias_add);
+
+ auto bias_add_data = locomotiv::annot_data(bias_add);
+
+ // comparing the result
+ ASSERT_NE(bias_add_data, nullptr);
+ ASSERT_EQ(bias_add_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(bias_add_data->shape()), Shape({1, 3, 3, 2}));
+
+ uint32_t n = 0;
+ for (IndexEnumerator e{*(bias_add_data->shape())}; e.valid(); e.advance())
+ {
+ ASSERT_FLOAT_EQ(bias_add_data->as_f32_bufptr()->at(e.current()), out_val[n++]);
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(bias_add), loco::Domain::Tensor);
+}
+
+/*
+test case generated from the following:
+
+ inp = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+ bias = tf.constant([1.1, 2.1], shape=[2], dtype=tf.float32)
+ out = tf.nn.bias_add(inp, bias)
+
+ with tf.Session() as sess:
+ print(sess.run(out))
+ */
+
+TEST(NodeExecution_FeatureBiasAdd, f32)
+{
+ float in_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ float bias_val[] = {1.1, 2.1};
+ float out_val[] = {2.1, 4.1, 4.1, 6.1, 6.1, 8.1, 8.1, 10.1, 10.1,
+ 12.1, 12.1, 14.1, 14.1, 16.1, 16.1, 18.1, 18.1, 20.1};
+
+ // make FeatureBiasAdd(FeatureEncode, BiasEncode)
+ auto g = loco::make_graph();
+ Shape input_shape{1, 3, 3, 2}; // NHWC
+
+ auto feature_encode = g->nodes()->create<loco::FeatureEncode>();
+ {
+ // setting values is ignored for testing
+ }
+
+ auto bias = g->nodes()->create<loco::BiasEncode>();
+ {
+ // nothing to do
+ }
+
+ auto feature_bias_add = g->nodes()->create<loco::BiasAdd<loco::Domain::Feature>>();
+ {
+ feature_bias_add->value(feature_encode);
+ feature_bias_add->bias(bias);
+ }
+
+ // Make and assign data to pull node
+ auto inp_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_buf.at(e.current()) = in_val[n++];
+ }
+ }
+
+ auto bias_buf = make_buffer<float, LexicalLayout>(Shape{2});
+ {
+ int n = 0;
+ for (IndexEnumerator e{bias_buf.shape()}; e.valid(); e.advance())
+ {
+ bias_buf.at(e.current()) = bias_val[n++];
+ }
+ }
+
+ auto inp_data = locomotiv::make_data(inp_buf);
+ locomotiv::annot_data(feature_encode, std::move(inp_data));
+ locomotiv::annot_domain(feature_encode, loco::Domain::Feature);
+
+ auto bias_data = locomotiv::make_data(bias_buf);
+ locomotiv::annot_data(bias, std::move(bias_data));
+ locomotiv::annot_domain(bias, loco::Domain::Bias);
+
+ locomotiv::NodeExecution::get().run(feature_bias_add);
+
+ auto bias_add_data = locomotiv::annot_data(feature_bias_add);
+
+ // comparing the result
+ ASSERT_NE(bias_add_data, nullptr);
+ ASSERT_EQ(bias_add_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(bias_add_data->shape()), Shape({1, 3, 3, 2}));
+
+ uint32_t n = 0;
+ for (IndexEnumerator e{*(bias_add_data->shape())}; e.valid(); e.advance())
+ {
+ ASSERT_FLOAT_EQ(bias_add_data->as_f32_bufptr()->at(e.current()), out_val[n++]);
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(feature_bias_add), loco::Domain::Feature);
+}
diff --git a/compiler/locomotiv/src/Node/BiasEncode.cpp b/compiler/locomotiv/src/Node/BiasEncode.cpp
new file mode 100644
index 000000000..c2f2b44c0
--- /dev/null
+++ b/compiler/locomotiv/src/Node/BiasEncode.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <stdexcept>
+#include <cassert>
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::BiasEncode *bias_enc)
+{
+ auto input_data = annot_data(bias_enc->input());
+
+ validate(input_data, "Input not ready");
+ validate(annot_domain(bias_enc->input()) == loco::Domain::Tensor,
+ "Input domain should be Tensor");
+ validate(input_data->shape()->rank() == 1, "Input data rank must be 1");
+
+ std::unique_ptr<NodeData> bias_enc_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ auto input_bufptr = input_data->as_s32_bufptr();
+ bias_enc_data = make_data(*input_bufptr);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ auto input_bufptr = input_data->as_f32_bufptr();
+ bias_enc_data = make_data(*input_bufptr);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(bias_enc_data != nullptr);
+ annot_data(bias_enc, std::move(bias_enc_data));
+ annot_domain(bias_enc, loco::Domain::Bias);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/BiasEncode.test.cpp b/compiler/locomotiv/src/Node/BiasEncode.test.cpp
new file mode 100644
index 000000000..73e2af8a8
--- /dev/null
+++ b/compiler/locomotiv/src/Node/BiasEncode.test.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::Buffer;
+
+namespace
+{
+template <typename T> loco::DataType loco_dtype() { throw std::runtime_error("Not supported yet"); }
+template <> loco::DataType loco_dtype<int32_t>() { return loco::DataType::S32; }
+template <> loco::DataType loco_dtype<float>() { return loco::DataType::FLOAT32; }
+
+template <typename T> const Buffer<T> *as_bufptr(const locomotiv::NodeData *data)
+{
+ throw std::runtime_error("Not supported yet");
+}
+template <> const Buffer<int32_t> *as_bufptr<int32_t>(const locomotiv::NodeData *data)
+{
+ return data->as_s32_bufptr();
+}
+template <> const Buffer<float> *as_bufptr<float>(const locomotiv::NodeData *data)
+{
+ return data->as_f32_bufptr();
+}
+
+template <typename T> void test()
+{
+ // Make pull-BiasEncode graph
+ auto g = loco::make_graph();
+
+ auto pull = g->nodes()->create<loco::Pull>();
+ {
+ pull->dtype(loco_dtype<T>());
+ pull->shape({1});
+ }
+
+ auto bias_enc = g->nodes()->create<loco::BiasEncode>();
+ {
+ bias_enc->input(pull);
+ }
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<T, LexicalLayout>(Shape{1});
+ {
+ pull_buf.at(Index{0}) = static_cast<T>(100);
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+ }
+
+ locomotiv::NodeExecution::get().run(bias_enc);
+
+ // check
+ auto bias_enc_data = locomotiv::annot_data(bias_enc);
+
+ ASSERT_NE(bias_enc_data, nullptr);
+ ASSERT_EQ(bias_enc_data->dtype(), loco_dtype<T>());
+ ASSERT_EQ(*(bias_enc_data->shape()), Shape{1});
+ ASSERT_EQ(as_bufptr<T>(bias_enc_data)->at(Index{0}), pull_buf.at(Index{0}));
+
+ ASSERT_EQ(locomotiv::annot_domain(bias_enc), loco::Domain::Bias);
+}
+} // namespace
+
+TEST(NodeExecution_BiasEncode, s32) { test<int32_t>(); }
+
+TEST(NodeExecution_BiasEncode, f32) { test<float>(); }
diff --git a/compiler/locomotiv/src/Node/ConstGen.cpp b/compiler/locomotiv/src/Node/ConstGen.cpp
new file mode 100644
index 000000000..0360b9fef
--- /dev/null
+++ b/compiler/locomotiv/src/Node/ConstGen.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <stdexcept>
+#include <cassert>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+namespace
+{
+
+/**
+ * @brief Get offset based on given shape and index. Assume lexical layout.
+ *
+ * examples)
+ * For shape = {3, 4} and index = {1, 2},
+ * offset would be 6 ( = 1 * (4) + 2 )
+ * For shape = {2, 3, 4} and index = {1, 0, 2},
+ * offset would be 14 ( = 1 * (3*4) + 0 *(4) + 2 )
+ */
+inline uint32_t offset_by_index(const Shape &shape, const Index &index)
+{
+ static const nncc::core::ADT::tensor::LexicalLayout l;
+ return l.offset(shape, index);
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::ConstGen *constgen)
+{
+ uint32_t volume = 1;
+
+ Shape shape;
+ shape.resize(constgen->rank());
+ for (uint32_t i = 0; i < shape.rank(); ++i)
+ {
+ shape.dim(i) = constgen->dim(i).value();
+ volume *= shape.dim(i);
+ }
+
+ std::unique_ptr<NodeData> data = nullptr;
+
+ switch (constgen->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ assert(volume == constgen->size<loco::DataType::S32>());
+
+ auto buf = make_buffer<int32_t, LexicalLayout>(shape);
+
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ uint32_t offset = ::offset_by_index(shape, index);
+ buf.at(index) = constgen->at<loco::DataType::S32>(offset);
+ }
+
+ data = locomotiv::make_data(buf);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ assert(volume == constgen->size<loco::DataType::FLOAT32>());
+
+ auto buf = make_buffer<float, LexicalLayout>(shape);
+
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ uint32_t offset = ::offset_by_index(shape, index);
+ buf.at(index) = constgen->at<loco::DataType::FLOAT32>(offset);
+ }
+
+ data = locomotiv::make_data(buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(data != nullptr);
+ annot_data(constgen, std::move(data));
+ annot_domain(constgen, loco::Domain::Tensor);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/ConstGen.test.cpp b/compiler/locomotiv/src/Node/ConstGen.test.cpp
new file mode 100644
index 000000000..838f4c11d
--- /dev/null
+++ b/compiler/locomotiv/src/Node/ConstGen.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_ConstGen, s32)
+{
+ // Make ConstGen node
+ loco::ConstGen constgen;
+
+ constgen.dtype(loco::DataType::S32);
+ constgen.shape({2, 3});
+ constgen.size<loco::DataType::S32>(6);
+
+ constgen.at<loco::DataType::S32>(0) = 0; // Set 0,0
+ constgen.at<loco::DataType::S32>(1) = 1; // Set 0,1
+ constgen.at<loco::DataType::S32>(2) = 2; // Set 0,2
+ constgen.at<loco::DataType::S32>(3) = -3; // Set 1,0
+ constgen.at<loco::DataType::S32>(4) = -4; // Set 1,1
+ constgen.at<loco::DataType::S32>(5) = -5; // Set 1,2
+
+ // run execution
+ locomotiv::NodeExecution::get().run(&constgen);
+
+ // test
+ auto data = locomotiv::annot_data(&constgen);
+ ASSERT_NE(data, nullptr);
+ ASSERT_EQ(data->dtype(), loco::DataType::S32);
+ ASSERT_EQ(*data->shape(), Shape({2, 3}));
+ ASSERT_EQ(data->as_s32_bufptr()->at(Index{0, 0}), 0);
+ ASSERT_EQ(data->as_s32_bufptr()->at(Index{0, 1}), 1);
+ ASSERT_EQ(data->as_s32_bufptr()->at(Index{0, 2}), 2);
+ ASSERT_EQ(data->as_s32_bufptr()->at(Index{1, 0}), -3);
+ ASSERT_EQ(data->as_s32_bufptr()->at(Index{1, 1}), -4);
+ ASSERT_EQ(data->as_s32_bufptr()->at(Index{1, 2}), -5);
+
+ ASSERT_EQ(locomotiv::annot_domain(&constgen), loco::Domain::Tensor);
+}
+
+TEST(NodeExecution_ConstGen, f32)
+{
+ // Make ConstGen node
+ loco::ConstGen constgen;
+
+ constgen.dtype(loco::DataType::FLOAT32);
+ constgen.shape({2, 3});
+ constgen.size<loco::DataType::FLOAT32>(6);
+
+ constgen.at<loco::DataType::FLOAT32>(0) = 0.0f; // Set 0,0
+ constgen.at<loco::DataType::FLOAT32>(1) = 1.0f; // Set 0,1
+ constgen.at<loco::DataType::FLOAT32>(2) = 2.0f; // Set 0,2
+ constgen.at<loco::DataType::FLOAT32>(3) = 3.0f; // Set 1,0
+ constgen.at<loco::DataType::FLOAT32>(4) = 4.0f; // Set 1,1
+ constgen.at<loco::DataType::FLOAT32>(5) = 5.0f; // Set 1,2
+
+ // run execution
+ locomotiv::NodeExecution::get().run(&constgen);
+
+ // test
+ auto data = locomotiv::annot_data(&constgen);
+ ASSERT_NE(data, nullptr);
+ ASSERT_EQ(data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*data->shape(), Shape({2, 3}));
+ ASSERT_FLOAT_EQ(data->as_f32_bufptr()->at(Index{0, 0}), 0.0f);
+ ASSERT_FLOAT_EQ(data->as_f32_bufptr()->at(Index{0, 1}), 1.0f);
+ ASSERT_FLOAT_EQ(data->as_f32_bufptr()->at(Index{0, 2}), 2.0f);
+ ASSERT_FLOAT_EQ(data->as_f32_bufptr()->at(Index{1, 0}), 3.0f);
+ ASSERT_FLOAT_EQ(data->as_f32_bufptr()->at(Index{1, 1}), 4.0f);
+ ASSERT_FLOAT_EQ(data->as_f32_bufptr()->at(Index{1, 2}), 5.0f);
+
+ ASSERT_EQ(locomotiv::annot_domain(&constgen), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/Conv2D.cpp b/compiler/locomotiv/src/Node/Conv2D.cpp
new file mode 100644
index 000000000..2e4185574
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Conv2D.cpp
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+// image size includes padding.
+inline uint32_t compute_out_size(uint32_t image_size, uint32_t filter_size, uint32_t stride)
+{
+ assert((image_size + stride - filter_size) % stride == 0);
+ return (image_size + stride - filter_size) / stride;
+}
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+/**
+ * @brief Calculates Conv2D
+ * @note Both input_buf and filter_buf have NHWC format
+ */
+template <typename RET_T, typename IFM_T, typename FIL_T>
+Buffer<RET_T> calc_conv2D(const loco::Conv2D *conv2d, const Buffer<IFM_T> *input_buf,
+ const Buffer<FIL_T> *filter_buf)
+{
+ auto input_shape = input_buf->shape();
+ auto filter_shape = filter_buf->shape();
+
+ locomotiv::validate(input_shape.rank() == 4, "ifm rank must be 4");
+ locomotiv::validate(filter_shape.rank() == 4, "filter rank must be 4");
+ locomotiv::validate(input_shape.dim(3) == filter_shape.dim(3),
+ "channel value mismatch"); // should have same channel values
+
+ const uint32_t input_height = input_shape.dim(1);
+ const uint32_t input_width = input_shape.dim(2);
+
+ const uint32_t filter_height = filter_shape.dim(1);
+ const uint32_t filter_width = filter_shape.dim(2);
+
+ const uint32_t stride_width = conv2d->stride()->horizontal();
+ const uint32_t stride_height = conv2d->stride()->vertical();
+
+ // TODO Enable dilations. Let's set these to 1 for now.
+ const uint32_t dilation_width_factor = 1;
+ const uint32_t dilation_height_factor = 1;
+
+ const uint32_t pad_top = conv2d->pad()->top();
+ const uint32_t pad_bottom = conv2d->pad()->bottom();
+
+ const uint32_t pad_left = conv2d->pad()->left();
+ const uint32_t pad_right = conv2d->pad()->right();
+
+ const uint32_t output_height =
+ compute_out_size(input_height + pad_top + pad_bottom, filter_height, stride_height);
+ const uint32_t output_width =
+ compute_out_size(input_width + pad_left + pad_right, filter_width, stride_width);
+
+ const uint32_t batches = input_shape.dim(0);
+ const uint32_t input_depth = input_shape.dim(3);
+ const uint32_t output_depth = filter_shape.dim(0);
+
+ Shape output_shape{batches, output_height, output_width, output_depth};
+ auto output_buf = make_buffer<RET_T, LexicalLayout>(output_shape);
+
+ for (uint32_t batch = 0; batch < batches; ++batch)
+ {
+ for (uint32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (uint32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (uint32_t out_channel = 0; out_channel < output_depth; ++out_channel)
+ {
+ const int in_x_origin = (out_x * stride_width) - pad_left;
+ const int in_y_origin = (out_y * stride_height) - pad_top;
+
+ RET_T total = static_cast<RET_T>(0);
+
+ for (uint32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (uint32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ for (uint32_t in_channel = 0; in_channel < input_depth; ++in_channel)
+ {
+ const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+ const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+
+ // If the location is outside the bounds of the input image,
+ // use zero as a default value.
+ if ((in_x >= 0) && ((unsigned)in_x < input_width) && (in_y >= 0) &&
+ ((unsigned)in_y < input_height))
+ {
+ auto input_value =
+ input_buf->at(Index({batch, (unsigned)in_y, (unsigned)in_x, in_channel}));
+ auto filter_value =
+ filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel}));
+ total += (input_value * filter_value);
+ }
+ }
+ }
+ }
+ output_buf.at(Index({batch, out_y, out_x, out_channel})) = total;
+ }
+ }
+ }
+ }
+ return output_buf;
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Conv2D *conv2d)
+{
+ auto ifm_data = annot_data(conv2d->ifm());
+ auto ker_data = annot_data(conv2d->ker());
+
+ validate(ifm_data, "Can't find input data of Conv2D");
+ validate(ifm_data->shape()->rank() == 4, "ifm rank must be 4");
+
+ validate(ker_data, "Can't find kernel data of Conv2D");
+ validate(ker_data->shape()->rank() == 4, "Kernel rank must be 4");
+
+ validate(annot_domain(conv2d->ifm()) == loco::Domain::Feature, "IFM of Conv2D is not feature");
+ validate(annot_domain(conv2d->ker()) == loco::Domain::Filter, "Kernel of Conv2D is not filter");
+
+ std::unique_ptr<NodeData> conv2d_result = nullptr;
+
+ if (ifm_data->dtype() == loco::DataType::FLOAT32 && ker_data->dtype() == loco::DataType::FLOAT32)
+ {
+ auto ifm_buf = ifm_data->as_f32_bufptr();
+ auto ker_buf = ker_data->as_f32_bufptr();
+
+ auto conv2d_buf = calc_conv2D<float, float, float>(conv2d, ifm_buf, ker_buf);
+
+ conv2d_result = make_data(conv2d_buf);
+ }
+ else
+ throw std::runtime_error("NYI for these DataTypes");
+
+ assert(conv2d_result != nullptr);
+
+ annot_data(conv2d, std::move(conv2d_result));
+ annot_domain(conv2d, loco::Domain::Feature);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Conv2D.test.cpp b/compiler/locomotiv/src/Node/Conv2D.test.cpp
new file mode 100644
index 000000000..83d7fc268
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Conv2D.test.cpp
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include "nncc/core/ADT/tensor/IndexEnumerator.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::make_overlay;
+
+void run_test(const float *ifm, const float *ker, const float *expected_ofm, const Shape &ifm_shape,
+ const Shape ker_shape, const Shape ofm_shape, const uint32_t stride_v,
+ const uint32_t stride_h, const uint32_t pad_top = 0, const uint32_t pad_bottom = 0,
+ const uint32_t pad_left = 0, const uint32_t pad_right = 0)
+{
+ auto g = loco::make_graph();
+
+ // Fill output data of FeatureEncode from ifm
+ auto ifm_enc = g->nodes()->create<loco::FeatureEncode>();
+ {
+ auto ifm_enc_buf = make_buffer<float, LexicalLayout>(ifm_shape);
+ auto ifm_overlay = make_overlay<float, LexicalLayout>(ifm_shape, const_cast<float *>(ifm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ifm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ifm_enc_buf.at(ind) = ifm_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(ifm_enc_buf);
+ locomotiv::annot_data(ifm_enc, std::move(enc_data));
+ locomotiv::annot_domain(ifm_enc, loco::Domain::Feature);
+ }
+
+ // Fill output data of FilterEncode from ker
+ auto ker_enc = g->nodes()->create<loco::FilterEncode>();
+ {
+ auto ker_enc_buf = make_buffer<float, LexicalLayout>(ker_shape);
+ auto ker_overlay = make_overlay<float, LexicalLayout>(ker_shape, const_cast<float *>(ker));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ker_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ker_enc_buf.at(ind) = ker_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(ker_enc_buf);
+ locomotiv::annot_data(ker_enc, std::move(enc_data));
+ locomotiv::annot_domain(ker_enc, loco::Domain::Filter);
+ }
+
+ // build Conv2D
+ auto conv2d = g->nodes()->create<loco::Conv2D>();
+ conv2d->ifm(ifm_enc);
+ conv2d->ker(ker_enc);
+ conv2d->stride()->vertical(stride_v);
+ conv2d->stride()->horizontal(stride_h);
+ conv2d->pad()->top(pad_top);
+ conv2d->pad()->bottom(pad_bottom);
+ conv2d->pad()->left(pad_left);
+ conv2d->pad()->right(pad_right);
+
+ // run interpreter
+ locomotiv::NodeExecution::get().run(conv2d);
+
+ // get result of calculation
+ auto conv2d_result = locomotiv::annot_data(conv2d);
+
+ // check the result
+ ASSERT_NE(conv2d_result, nullptr);
+ ASSERT_TRUE(conv2d_result->dtype() == loco::DataType::FLOAT32);
+ ASSERT_TRUE(*(conv2d_result->shape()) == ofm_shape);
+
+ auto ofm_overlay =
+ make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ASSERT_FLOAT_EQ(conv2d_result->as_f32_bufptr()->at(ind), ofm_overlay.at(ind));
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(conv2d), loco::Domain::Feature);
+}
+
+} // namespace
+
+// clang-format off
+/* ifm and ofm are from the code below:
+
+ifm = tf.random_normal([1, 5, 5, 1], stddev=1)
+ker = tf.random_normal([3, 3, 1, 1], stddev=1)
+out = tf.nn.conv2d(ifm, ker, strides = [1, 2, 2, 1], padding= 'VALID')
+
+with tf.Session() as sess:
+ print(sess.run(out))
+*/
+TEST(NodeExecution_Conv2D, f32_1x5x5x1_calculation)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ const float ifm[] =
+ {
+ -0.48850584, 1.4292705, -1.3424522, -0.7441476, -1.8964586,
+ 1.7021934, -0.39246717, 0.6248314, 0.12724274, 1.3915083,
+ 0.382255, 0.7725081, 0.9171561, -1.1847119, 0.61858755,
+ 1.1530193, -0.476239, -0.9038663, -0.48764458, 0.339963,
+ 2.2817912, -0.8464133, -1.0598192, 0.8361126, 1.2344601
+ };
+
+ const float ker[] =
+ {
+ -0.0830195, 0.21088193, -0.11781317,
+ 0.07755677, 1.6337638, 1.0792778,
+ -1.6922939, -1.5437212, 0.96667504
+ };
+
+ const float ofm[] =
+ {
+ -0.28752697, 2.8108592,
+ -5.220376 , 0.7973861
+ };
+
+ run_test(ifm, ker, ofm,
+ Shape{1, 5, 5, 1}, Shape{1, 3, 3, 1}, Shape{1, 2, 2, 1}, // shapes of input, ker, output
+ 2, 2 // stride
+ );
+}
+
+TEST(NodeExecution_Conv2D, f32_multiple_channel)
+{
+ // testing channel != 1, stride = [1,1]
+ using nncc::core::ADT::tensor::Shape;
+
+ float ifm[1*5*5*3];
+ for (int n = 0; n < 5*5*3; n++) ifm[n] = 2.2;
+
+ float ker[2*2*2*3]; // nhwc
+ for (int n = 0; n < 2*2*2*3; n++) ker[n] = 1.1;
+
+ float ofm[1*4*4*2];
+ for (int n = 0; n < 1*4*4*2; n++) ofm[n] = 29.04;
+
+ run_test(ifm, ker, ofm,
+ Shape{1, 5, 5, 3}, Shape{2, 2, 2, 3}, Shape{1, 4, 4, 2}, // shapes of input, ker, output
+ 1, 1 // stride
+ );
+}
+
+/* ifm and ofm are from the code below:
+tensorflow version : 1.12.0
+
+import tensorflow as tf
+
+ifm = tf.constant([-1.3653529, 0.4160791, 0.5059157, 0.7649683, 0.39364856,
+ -1.0164733, 1.506766, -1.1413091, 1.2766701, -0.9253511,
+ 1.3570246, 0.32089928, -0.9898171, 1.983792, -0.3423274,
+ -1.1901658, 1.2288222, -0.47401968, -0.01369802, 0.4136331,
+ 0.06960588, -0.16537654, -0.65015996, -0.555224, 0.7140603
+], shape=[1, 5, 5, 1])
+
+ker = tf.constant([2.3490515, -0.4572366, 0.05790535,
+ 0.3672005, 0.52679914, 0.74607974,
+ -1.7211207, 1.1174419, -0.59663385
+], shape=[3, 3, 1, 1])
+
+ofm = tf.nn.conv2d(ifm, ker, strides=[1, 1, 1, 1], padding='SAME')
+
+with tf.Session() as sess:
+ print(sess.run(ofm))
+*/
+TEST(NodeExecution_Conv2D, with_padding)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ const float ifm[] =
+ {
+ -1.3653529, 0.4160791, 0.5059157, 0.7649683, 0.39364856,
+ -1.0164733, 1.506766, -1.1413091, 1.2766701, -0.9253511,
+ 1.3570246, 0.32089928, -0.9898171, 1.983792, -0.3423274,
+ -1.1901658, 1.2288222, -0.47401968, -0.01369802, 0.4136331,
+ 0.06960588, -0.16537654, -0.65015996, -0.555224, 0.7140603
+ };
+
+ const float ker[] =
+ {
+ 2.3490515, -0.4572366, 0.05790535,
+ 0.3672005, 0.52679914, 0.74607974,
+ -1.7211207, 1.1174419, -0.59663385
+ };
+
+ const float ofm[] =
+ {
+ -2.443676, 4.2094254, -3.6403496, 4.8254814, -2.743059,
+ 2.5620093, -5.185688, -1.1470609, 4.54913, -2.1985974,
+ -0.5567835, 0.49045527, 2.5752437, -2.3383713, 4.455967,
+ -0.13562866, 2.9236434, 1.4019353, -3.0521483, 6.782954,
+ 0.5286269, -3.9317036, 2.285041, -1.0817666, -0.04901773
+ };
+
+ run_test(ifm, ker, ofm,
+ Shape{1, 5, 5, 1}, Shape{1, 3, 3, 1}, Shape{1, 5, 5, 1}, // shapes of input, ker, output
+ 1, 1, // stride
+ 1, 1, 1, 1 // padding
+ );
+}
+// clang-format on
diff --git a/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp b/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp
new file mode 100644
index 000000000..92d5aa161
--- /dev/null
+++ b/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+/**
+ * @brief Compute 1D output size based on given 1D arguments.
+ *
+ * @param whole_pad Sum of front and back pad
+ */
+inline uint32_t compute_out_size(uint32_t image_size, uint32_t whole_pad, uint32_t filter_size,
+ uint32_t stride)
+{
+ assert((image_size + whole_pad - filter_size) % stride == 0);
+ return (image_size + whole_pad - filter_size) / stride + 1;
+}
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+/**
+ * @brief Calculates DepthwiseConv2D
+ * @note ifm_buf has NHWC and ker_buf HWCM format
+ * (Please check locomotiv README for further information)
+ */
+template <typename RET_T, typename IFM_T, typename KER_T>
+Buffer<RET_T> calc_dw_conv2d(const loco::DepthwiseConv2D *dw_conv2d, const Buffer<IFM_T> *ifm_buf,
+ const Buffer<KER_T> *ker_buf)
+{
+ auto ifm_shape = ifm_buf->shape();
+ auto ker_shape = ker_buf->shape();
+
+ locomotiv::validate(ifm_shape.rank() == 4, "ifm rank must be 4");
+ locomotiv::validate(ker_shape.rank() == 4, "depthwise filter rank must be 4");
+ locomotiv::validate(ifm_shape.dim(3 /* of NHWC */) == ker_shape.dim(2 /* of HWCM */),
+ "channel value mismatch"); // should have same channel values
+
+ const uint32_t ifm_height = ifm_shape.dim(1);
+ const uint32_t ifm_width = ifm_shape.dim(2);
+
+ const uint32_t ker_height = ker_shape.dim(0);
+ const uint32_t ker_width = ker_shape.dim(1);
+
+ const uint32_t stride_width = dw_conv2d->stride()->horizontal();
+ const uint32_t stride_height = dw_conv2d->stride()->vertical();
+
+ // TODO Enable dilations. Let's set these to 1 for now.
+ const uint32_t dilation_width_factor = 1;
+ const uint32_t dilation_height_factor = 1;
+
+ const uint32_t pad_top = dw_conv2d->pad()->top();
+ const uint32_t pad_bottom = dw_conv2d->pad()->bottom();
+
+ const uint32_t pad_left = dw_conv2d->pad()->left();
+ const uint32_t pad_right = dw_conv2d->pad()->right();
+
+ const uint32_t ofm_height =
+ compute_out_size(ifm_height, pad_top + pad_bottom, ker_height, stride_height);
+ const uint32_t ofm_width =
+ compute_out_size(ifm_width, pad_left + pad_right, ker_width, stride_width);
+
+ const uint32_t batches = ifm_shape.dim(0);
+ const uint32_t ifm_depth = ifm_shape.dim(3);
+ const uint32_t multiplier = ker_shape.dim(3);
+ const uint32_t ofm_depth = ifm_depth * multiplier;
+
+ Shape ofm_shape{batches, ofm_height, ofm_width, ofm_depth};
+ auto ofm_buf = make_buffer<RET_T, LexicalLayout>(ofm_shape);
+
+ for (uint32_t batch = 0; batch < batches; ++batch)
+ {
+ for (uint32_t ofm_y = 0; ofm_y < ofm_height; ++ofm_y)
+ {
+ for (uint32_t ofm_x = 0; ofm_x < ofm_width; ++ofm_x)
+ {
+ for (uint32_t ch = 0; ch < ifm_depth; ++ch)
+ {
+ for (uint32_t nth = 0; nth < multiplier; nth++)
+ {
+ const int in_x_origin = (ofm_x * stride_width) - pad_left;
+ const int in_y_origin = (ofm_y * stride_height) - pad_top;
+ float total = 0.f;
+ for (uint32_t ker_y = 0; ker_y < ker_height; ++ker_y)
+ {
+ for (uint32_t ker_x = 0; ker_x < ker_width; ++ker_x)
+ {
+ const int in_x = in_x_origin + dilation_width_factor * ker_x;
+ const int in_y = in_y_origin + dilation_height_factor * ker_y;
+ // If the location is outside the bounds of the input image,
+ // use zero as a default value.
+ if ((in_x >= 0) && ((unsigned)in_x < ifm_width) && (in_y >= 0) &&
+ ((unsigned)in_y < ifm_height))
+ {
+ auto ifm_value = ifm_buf->at(Index({batch, (unsigned)in_y, (unsigned)in_x, ch}));
+ auto ker_value = ker_buf->at(Index({ker_y, ker_x, ch, nth}));
+ total += (ifm_value * ker_value);
+ }
+ }
+ }
+ uint32_t ofm_channel = ch * multiplier + nth;
+ ofm_buf.at(Index({batch, ofm_y, ofm_x, ofm_channel})) = total;
+ }
+ }
+ }
+ }
+ }
+ return ofm_buf;
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::DepthwiseConv2D *dw_conv2d)
+{
+ auto ifm_data = annot_data(dw_conv2d->ifm());
+ auto ker_data = annot_data(dw_conv2d->ker());
+
+ validate(ifm_data, "Can't find input data of DepthwiseConv2D");
+ validate(ifm_data->shape()->rank() == 4, "ifm rank must be 4");
+
+ validate(ker_data, "Can't find kernel data of DepthwiseConv2D");
+ validate(ker_data->shape()->rank() == 4, "Kernel rank must be 4");
+
+ validate(annot_domain(dw_conv2d->ifm()) == loco::Domain::Feature,
+ "IFM of DepthwiseConv2D is not feature");
+ validate(annot_domain(dw_conv2d->ker()) == loco::Domain::DepthwiseFilter,
+ "Kernel of DepthwiseConv2D is not depthwise filter");
+
+ std::unique_ptr<NodeData> dw_conv2d_result = nullptr;
+
+ if (ifm_data->dtype() == loco::DataType::FLOAT32 && ker_data->dtype() == loco::DataType::FLOAT32)
+ {
+ auto ifm_buf = ifm_data->as_f32_bufptr();
+ auto ker_buf = ker_data->as_f32_bufptr();
+
+ auto dw_conv2d_buf = calc_dw_conv2d<float, float, float>(dw_conv2d, ifm_buf, ker_buf);
+
+ dw_conv2d_result = make_data(dw_conv2d_buf);
+ }
+ else
+ throw std::runtime_error("NYI for these DataTypes");
+
+ assert(dw_conv2d_result != nullptr);
+
+ annot_data(dw_conv2d, std::move(dw_conv2d_result));
+ annot_domain(dw_conv2d, loco::Domain::Feature);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/DepthwiseConv2D.test.cpp b/compiler/locomotiv/src/Node/DepthwiseConv2D.test.cpp
new file mode 100644
index 000000000..48824c2e0
--- /dev/null
+++ b/compiler/locomotiv/src/Node/DepthwiseConv2D.test.cpp
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include "nncc/core/ADT/tensor/IndexEnumerator.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::make_overlay;
+
+void run_test(const float *ifm, const float *ker, const float *expected_ofm, const Shape &ifm_shape,
+ const Shape ker_shape, const Shape ofm_shape, const uint32_t stride_v,
+ const uint32_t stride_h, const uint32_t pad_top = 0, const uint32_t pad_bottom = 0,
+ const uint32_t pad_left = 0, const uint32_t pad_right = 0)
+{
+ auto g = loco::make_graph();
+
+ // Fill output data of FeatureEncode from ifm
+ auto ifm_enc = g->nodes()->create<loco::FeatureEncode>();
+ {
+ auto ifm_enc_buf = make_buffer<float, LexicalLayout>(ifm_shape);
+ auto ifm_overlay = make_overlay<float, LexicalLayout>(ifm_shape, const_cast<float *>(ifm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ifm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ifm_enc_buf.at(ind) = ifm_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(ifm_enc_buf);
+ locomotiv::annot_data(ifm_enc, std::move(enc_data));
+ locomotiv::annot_domain(ifm_enc, loco::Domain::Feature);
+ }
+
+ // Fill output data of DepthwiseFilterEncode from ker
+ auto ker_enc = g->nodes()->create<loco::DepthwiseFilterEncode>();
+ {
+ auto ker_enc_buf = make_buffer<float, LexicalLayout>(ker_shape);
+ auto ker_overlay = make_overlay<float, LexicalLayout>(ker_shape, const_cast<float *>(ker));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ker_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ker_enc_buf.at(ind) = ker_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(ker_enc_buf);
+ locomotiv::annot_data(ker_enc, std::move(enc_data));
+ locomotiv::annot_domain(ker_enc, loco::Domain::DepthwiseFilter);
+ }
+
+ // build DepthwiseConv2D
+ auto dw_conv2d = g->nodes()->create<loco::DepthwiseConv2D>();
+ dw_conv2d->ifm(ifm_enc);
+ dw_conv2d->ker(ker_enc);
+ dw_conv2d->stride()->vertical(stride_v);
+ dw_conv2d->stride()->horizontal(stride_h);
+ dw_conv2d->pad()->top(pad_top);
+ dw_conv2d->pad()->bottom(pad_bottom);
+ dw_conv2d->pad()->left(pad_left);
+ dw_conv2d->pad()->right(pad_right);
+
+ // run interpreter
+ locomotiv::NodeExecution::get().run(dw_conv2d);
+
+ // get result of calculation
+ auto dw_conv2d_result = locomotiv::annot_data(dw_conv2d);
+
+ // check the result
+ ASSERT_NE(dw_conv2d_result, nullptr);
+ ASSERT_TRUE(dw_conv2d_result->dtype() == loco::DataType::FLOAT32);
+ ASSERT_TRUE(*(dw_conv2d_result->shape()) == ofm_shape);
+
+ auto ofm_overlay =
+ make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ASSERT_FLOAT_EQ(dw_conv2d_result->as_f32_bufptr()->at(ind), ofm_overlay.at(ind));
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(dw_conv2d), loco::Domain::Feature);
+}
+
+} // namespace
+
+// clang-format off
+
+/* ifm, ker and ofm are from the code below:
+
+ifm = tf.random_normal([1, 5, 5, 2], stddev=1.1)
+ker = tf.random_normal([4, 4, 2, 3], stddev=1.1)
+out = tf.nn.depthwise_conv2d(ifm, ker, strides = [1, 1, 1, 1], padding= 'VALID')
+
+with tf.Session() as sess:
+ print(sess.run(out))
+*/
+TEST(NodeExecution_DepthwiseConv2D, f32_random_valid)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ const float ifm[] = {0.8122538, 1.209147, 0.6903842, -0.26646265, 1.516799, -1.8540707,
+ -0.74240327, 1.7811562, -0.03699546, -0.44468504, -1.4982721, -1.1858582,
+ -0.21140318, -0.974522, 1.0000849, -1.294535, -0.6108882, 0.25827602,
+ 1.3631831, -0.5180266, 0.20870179, 0.18333802, -0.42263857, -1.6694735,
+ 0.0415236, -0.3903758, 2.0933757, -0.29660916, 2.1218338, -1.1599928,
+ 0.57163256, 0.48865932, -1.3622656, 0.35924262, 1.2951899, -0.1769997,
+ 0.74513537, -0.31920406, -1.2902768, -0.7095059, 1.9157801, -0.41028237,
+ 1.2502829, 0.3354887, 1.4199319, -0.20366786, -0.8828556, 0.5173567,
+ 1.7708117, -0.30096334};
+ const float ker[] = {
+ -0.19805557, 0.58464956, -0.7804337, 0.06974592, 0.45790604, 0.24833807, 0.43393376,
+ 0.2541043, -0.04406675, -0.32167575, 1.0546446, -1.4978354, 0.20829494, 1.1659569,
+ 0.37908667, -0.94137955, 0.293349, -1.1023049, 0.76133233, 0.55595005, 1.4458209,
+ 1.6128604, 1.5655615, -2.183877, -0.90535915, -0.49858555, 1.7168728, -1.1590382,
+ 0.6706056, 1.2215618, -0.06603386, 0.16559464, 0.541991, -0.44488335, 0.766181,
+ 1.0227629, -0.6352362, -1.670828, -0.63334507, 0.0313305, -0.6721083, 0.50112915,
+ -0.15218066, 0.67222077, -0.3613627, -0.08516614, -0.5024078, -0.9503976, -2.1892295,
+ 1.8308185, -0.15187284, 1.5761136, 0.24869336, -1.7378871, -0.22518761, 1.0175673,
+ 0.7084485, -0.74157554, -1.8185995, -1.3330095, -0.04427439, 1.0556892, -0.68243974,
+ 0.32001218, 2.0901792, -1.1612813, 0.7294674, 0.05740008, -0.00832882, 1.0446658,
+ 0.4477195, -0.09174404, -1.0176039, 1.5066665, -2.148343, 0.29421416, 0.93011874,
+ -0.15737922, -1.6444012, 0.25780794, -0.6545867, -0.3488956, 0.26167992, -0.154414,
+ 0.2798124, -0.8590068, 2.0494444, 0.48268002, 0.81941164, -0.4848027, 0.76870304,
+ 0.7102261, 0.45778143, 0.23214905, -0.17742023, -0.75016516};
+ const float ofm[] = {4.474646, 0.6792067, -1.9799856, 7.484751, 4.3087378, -1.905938,
+ 1.4887369, 0.4361322, 0.79539883, -3.8583446, -4.502204, 4.356392,
+ -5.3030324, 3.493003, -4.349277, 2.3069482, -3.8881323, -0.73901534,
+ -0.6629516, 2.1247253, -4.9229584, 1.6716996, -3.0208125, 1.0597891};
+
+ run_test(ifm, ker, ofm,
+ Shape{1, 5, 5, 2}, Shape{4, 4, 2, 3}, Shape{1, 2, 2, 6}, // shapes of input, ker, output
+ 1, 1 // stride
+ );
+}
+
+// TODO Add same padding test
+
+// clang-format on
diff --git a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp
new file mode 100644
index 000000000..17004901f
--- /dev/null
+++ b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <stdexcept>
+#include <cassert>
+
+namespace
+{
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+/**
+ * @brief Encode input tensor into depthwise filter represented in "HWCM" layout
+ *
+ * (Please check locomotiv README for further information)
+ */
+template <typename T>
+std::unique_ptr<locomotiv::NodeData> dw_filter_encode(const loco::DepthwiseFilterEncode *node,
+ const Buffer<T> *input_buf)
+{
+ auto encoder = node->encoder();
+
+ // Make TensorShape from input
+ loco::TensorShape input_shape;
+ input_shape.rank(input_buf->shape().rank());
+ assert(input_shape.rank() == 4);
+ for (uint32_t i = 0; i < input_shape.rank(); ++i)
+ {
+ input_shape.dim(i) = input_buf->shape().dim(i);
+ }
+
+ loco::DepthwiseFilterShape node_shape = encoder->shape(input_shape);
+
+ // Make HWCM (i.e. height, width, depth, multiplier) buffer from DepthwiseFilterShape
+ Buffer<T> node_buf = make_buffer<T, LexicalLayout>(
+ Shape{node_shape.height().value(), node_shape.width().value(), node_shape.depth().value(),
+ node_shape.multiplier().value()});
+
+ // Copy buffer in an order arranged by encoder
+ for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
+ {
+ loco::DepthwiseFilterIndex index;
+ index.row() = e.current().at(0);
+ index.column() = e.current().at(1);
+ index.channel() = e.current().at(2);
+ index.nth() = e.current().at(3);
+
+ node_buf.at(e.current()) = input_buf->at(encoder->value(index));
+ }
+
+ return locomotiv::make_data(node_buf);
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::DepthwiseFilterEncode *enc)
+{
+ auto input_data = annot_data(enc->input());
+
+ validate(input_data, "Input of DepthwiseFilterEncode not ready");
+ validate(annot_domain(enc->input()) == loco::Domain::Tensor,
+ "Input of DepthwiseFilterEncode is not Tensor");
+ validate(input_data->shape()->rank() == 4, "Input shape mismatch");
+
+ std::unique_ptr<NodeData> enc_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto input_buf = input_data->as_f32_bufptr();
+ enc_data = dw_filter_encode<float>(enc, input_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(enc_data != nullptr);
+ annot_data(enc, std::move(enc_data));
+ annot_domain(enc, loco::Domain::DepthwiseFilter);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.test.cpp b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.test.cpp
new file mode 100644
index 000000000..db828c08b
--- /dev/null
+++ b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <loco/IR/PermutingCodec.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+TEST(NodeExecution_DepthwiseFilterEncode, f32)
+{
+ const uint32_t H = 2;
+ const uint32_t W = 3;
+ const uint32_t C = 4;
+ const uint32_t M = 5;
+
+ auto g = loco::make_graph();
+
+ // Pull
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+
+ // Make and assign "MHWC" data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{M, H, W, C});
+ float f = 1;
+ for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance())
+ {
+ pull_buf.at(e.current()) = f;
+ f += 0.1f; // Doesn't matter what it is
+ }
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ // Encoder to correctly read input tensor as MHWC
+ auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::DepthwiseFilter>>(
+ new loco::PermutingEncoder<loco::Domain::DepthwiseFilter>);
+ encoder->perm()->axis(loco::DepthwiseFilterAxis::Multiplier) = 0;
+ encoder->perm()->axis(loco::DepthwiseFilterAxis::Height) = 1;
+ encoder->perm()->axis(loco::DepthwiseFilterAxis::Width) = 2;
+ encoder->perm()->axis(loco::DepthwiseFilterAxis::Depth) = 3;
+
+ // DepthwiseFilterEncode
+ auto enc = g->nodes()->create<loco::DepthwiseFilterEncode>();
+ enc->input(pull);
+ enc->encoder(std::move(encoder));
+
+ locomotiv::NodeExecution::get().run(enc);
+
+ auto enc_data = locomotiv::annot_data(enc);
+ ASSERT_NE(enc_data, nullptr);
+ ASSERT_EQ(enc_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(enc_data->shape()), (Shape{H, W, C, M})); // locomotiv depthwise filter is HWCM
+ auto enc_buf = enc_data->as_f32_bufptr();
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ for (uint32_t c = 0; c < C; ++c)
+ for (uint32_t m = 0; m < M; ++m)
+ ASSERT_FLOAT_EQ(pull_buf.at(Index{m, h, w, c}), enc_buf->at(Index{h, w, c, m}));
+
+ ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::DepthwiseFilter);
+}
diff --git a/compiler/locomotiv/src/Node/EltwiseAdd.cpp b/compiler/locomotiv/src/Node/EltwiseAdd.cpp
new file mode 100644
index 000000000..e5e2d67c7
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseAdd.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::EltwiseAdd *eltwise_add)
+{
+ struct Func final : public BinaryFunc
+ {
+ float apply(float lhs, float rhs) const { return lhs + rhs; }
+ };
+
+ Func f;
+
+ eltwise_binary(eltwise_add, f);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/EltwiseAdd.test.cpp b/compiler/locomotiv/src/Node/EltwiseAdd.test.cpp
new file mode 100644
index 000000000..2899dccdd
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseAdd.test.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+/*
+test case generated from the following:
+
+x = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+y = tf.constant([-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+out = tf.math.add(x, y)
+
+with tf.Session() as sess:
+ print(sess.run(out))
+*/
+TEST(NodeExecution_EltwiseAdd, f32)
+{
+ float x_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ float y_val[] = {-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18};
+ float out_val[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+ // make EltwiseAdd(Pull, Pull)
+ auto g = loco::make_graph();
+ Shape input_shape{1, 3, 3, 2}; // NHWC
+
+ auto inp_lhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_lhs->dtype(loco::DataType::FLOAT32);
+ inp_lhs->shape({1, 3, 3, 2});
+ }
+
+ auto inp_rhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_rhs->dtype(loco::DataType::FLOAT32);
+ inp_rhs->shape({1, 3, 3, 2});
+ }
+
+ auto eltwise_add = g->nodes()->create<loco::EltwiseAdd>();
+ {
+ eltwise_add->lhs(inp_lhs);
+ eltwise_add->rhs(inp_rhs);
+ }
+
+ // Make and assign data to two pull nodes
+ auto inp_lhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_lhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_lhs_buf.at(e.current()) = x_val[n++];
+ }
+ }
+
+ auto inp_rhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_rhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_rhs_buf.at(e.current()) = y_val[n++];
+ }
+ }
+
+ auto inp_lhs_data = locomotiv::make_data(inp_lhs_buf);
+ locomotiv::annot_data(inp_lhs, std::move(inp_lhs_data));
+ locomotiv::annot_domain(inp_lhs, loco::Domain::Tensor);
+
+ auto inp_rhs_data = locomotiv::make_data(inp_rhs_buf);
+ locomotiv::annot_data(inp_rhs, std::move(inp_rhs_data));
+ locomotiv::annot_domain(inp_rhs, loco::Domain::Tensor);
+
+ // run the network
+ locomotiv::NodeExecution::get().run(eltwise_add);
+
+ // get result
+ auto eltwise_add_data = locomotiv::annot_data(eltwise_add);
+
+ // comparing the result
+ ASSERT_NE(eltwise_add_data, nullptr);
+ ASSERT_EQ(eltwise_add_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(eltwise_add_data->shape()), Shape({1, 3, 3, 2}));
+
+ uint32_t n = 0;
+ for (IndexEnumerator e{*(eltwise_add_data->shape())}; e.valid(); e.advance())
+ {
+ ASSERT_FLOAT_EQ(eltwise_add_data->as_f32_bufptr()->at(e.current()), out_val[n++]);
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(eltwise_add), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/EltwiseDiv.cpp b/compiler/locomotiv/src/Node/EltwiseDiv.cpp
new file mode 100644
index 000000000..a054d9a97
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseDiv.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::EltwiseDiv *eltwise_div)
+{
+ struct Func final : public BinaryFunc
+ {
+ float apply(float lhs, float rhs) const { return lhs / rhs; }
+ };
+
+ Func f;
+
+ eltwise_binary(eltwise_div, f);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/EltwiseDiv.test.cpp b/compiler/locomotiv/src/Node/EltwiseDiv.test.cpp
new file mode 100644
index 000000000..60950c15b
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseDiv.test.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+/*
+test case generated from the following:
+
+x = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+y = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+out = tf.div(x, y)
+
+with tf.Session() as sess:
+ print(sess.run(out))
+*/
+TEST(NodeExecution_EltwiseDiv, f32)
+{
+ float x_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ float y_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ float out_val[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+
+ // make EltwiseDiv(Pull, Pull)
+ auto g = loco::make_graph();
+ Shape input_shape{1, 3, 3, 2}; // NHWC
+
+ auto inp_lhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_lhs->dtype(loco::DataType::FLOAT32);
+ inp_lhs->shape({1, 3, 3, 2});
+ }
+
+ auto inp_rhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_rhs->dtype(loco::DataType::FLOAT32);
+ inp_rhs->shape({1, 3, 3, 2});
+ }
+
+ auto eltwise_div = g->nodes()->create<loco::EltwiseDiv>();
+ {
+ eltwise_div->lhs(inp_lhs);
+ eltwise_div->rhs(inp_rhs);
+ }
+
+ // Make and assign data to two pull nodes
+ auto inp_lhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_lhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_lhs_buf.at(e.current()) = x_val[n++];
+ }
+ }
+
+ auto inp_rhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_rhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_rhs_buf.at(e.current()) = y_val[n++];
+ }
+ }
+
+ auto inp_lhs_data = locomotiv::make_data(inp_lhs_buf);
+ locomotiv::annot_data(inp_lhs, std::move(inp_lhs_data));
+ locomotiv::annot_domain(inp_lhs, loco::Domain::Tensor);
+
+ auto inp_rhs_data = locomotiv::make_data(inp_rhs_buf);
+ locomotiv::annot_data(inp_rhs, std::move(inp_rhs_data));
+ locomotiv::annot_domain(inp_rhs, loco::Domain::Tensor);
+
+ // run the network
+ locomotiv::NodeExecution::get().run(eltwise_div);
+
+ // get result
+ auto eltwise_div_data = locomotiv::annot_data(eltwise_div);
+
+ // comparing the result
+ ASSERT_NE(eltwise_div_data, nullptr);
+ ASSERT_EQ(eltwise_div_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(eltwise_div_data->shape()), Shape({1, 3, 3, 2}));
+
+ uint32_t n = 0;
+ for (IndexEnumerator e{*(eltwise_div_data->shape())}; e.valid(); e.advance())
+ {
+ ASSERT_FLOAT_EQ(eltwise_div_data->as_f32_bufptr()->at(e.current()), out_val[n++]);
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(eltwise_div), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/EltwiseMax.cpp b/compiler/locomotiv/src/Node/EltwiseMax.cpp
new file mode 100644
index 000000000..ec44fd6fa
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseMax.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include <cstdlib>
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::EltwiseMax *eltwise_max)
+{
+ struct Func final : public BinaryFunc
+ {
+ float apply(float lhs, float rhs) const { return std::max(lhs, rhs); }
+ };
+
+ Func f;
+
+ eltwise_binary(eltwise_max, f);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/EltwiseMax.test.cpp b/compiler/locomotiv/src/Node/EltwiseMax.test.cpp
new file mode 100644
index 000000000..c64db8994
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseMax.test.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+/*
+test case generated from the following:
+
+x = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+y = tf.constant([18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+out = tf.math.maximum(x, y)
+
+with tf.Session() as sess:
+ print(sess.run(out))
+*/
+TEST(NodeExecution_EltwiseMax, f32)
+{
+ float x_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ float y_val[] = {18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
+ float out_val[] = {18, 17, 16, 15, 14, 13, 12, 11, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+
+ // make EltwiseMax(Pull, Pull)
+ auto g = loco::make_graph();
+ Shape input_shape{1, 3, 3, 2}; // NHWC
+
+ auto inp_lhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_lhs->dtype(loco::DataType::FLOAT32);
+ inp_lhs->shape({1, 3, 3, 2});
+ }
+
+ auto inp_rhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_rhs->dtype(loco::DataType::FLOAT32);
+ inp_rhs->shape({1, 3, 3, 2});
+ }
+
+ auto eltwise_max = g->nodes()->create<loco::EltwiseMax>();
+ {
+ eltwise_max->lhs(inp_lhs);
+ eltwise_max->rhs(inp_rhs);
+ }
+
+ // Make and assign data to two pull nodes
+ auto inp_lhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_lhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_lhs_buf.at(e.current()) = x_val[n++];
+ }
+ }
+
+ auto inp_rhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_rhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_rhs_buf.at(e.current()) = y_val[n++];
+ }
+ }
+
+ auto inp_lhs_data = locomotiv::make_data(inp_lhs_buf);
+ locomotiv::annot_data(inp_lhs, std::move(inp_lhs_data));
+ locomotiv::annot_domain(inp_lhs, loco::Domain::Tensor);
+
+ auto inp_rhs_data = locomotiv::make_data(inp_rhs_buf);
+ locomotiv::annot_data(inp_rhs, std::move(inp_rhs_data));
+ locomotiv::annot_domain(inp_rhs, loco::Domain::Tensor);
+
+ // run the network
+ locomotiv::NodeExecution::get().run(eltwise_max);
+
+ // get result
+ auto eltwise_max_data = locomotiv::annot_data(eltwise_max);
+
+ // comparing the result
+ ASSERT_NE(eltwise_max_data, nullptr);
+ ASSERT_EQ(eltwise_max_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(eltwise_max_data->shape()), Shape({1, 3, 3, 2}));
+
+ uint32_t n = 0;
+ for (IndexEnumerator e{*(eltwise_max_data->shape())}; e.valid(); e.advance())
+ {
+ ASSERT_FLOAT_EQ(eltwise_max_data->as_f32_bufptr()->at(e.current()), out_val[n++]);
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(eltwise_max), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/EltwiseMul.cpp b/compiler/locomotiv/src/Node/EltwiseMul.cpp
new file mode 100644
index 000000000..6720ab92f
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseMul.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::EltwiseMul *eltwise_mul)
+{
+ struct Func final : public BinaryFunc
+ {
+ float apply(float lhs, float rhs) const { return lhs * rhs; }
+ };
+
+ Func f;
+
+ eltwise_binary(eltwise_mul, f);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/EltwiseMul.test.cpp b/compiler/locomotiv/src/Node/EltwiseMul.test.cpp
new file mode 100644
index 000000000..b76888300
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseMul.test.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+/*
+test case generated from the following:
+
+x = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+y = tf.constant([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
+ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], shape=[1, 3, 3, 2],
+ dtype=tf.float32)
+out = tf.math.multiply(x, y)
+
+with tf.Session() as sess:
+ print(sess.run(out))
+*/
+TEST(NodeExecution_EltwiseMul, f32)
+{
+ float x_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ float y_val[] = {0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
+ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1};
+ float out_val[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,
+ 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8};
+
+ // make EltwiseMul(Pull, Pull)
+ auto g = loco::make_graph();
+ Shape input_shape{1, 3, 3, 2}; // NHWC
+
+ auto inp_lhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_lhs->dtype(loco::DataType::FLOAT32);
+ inp_lhs->shape({1, 3, 3, 2});
+ }
+
+ auto inp_rhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_rhs->dtype(loco::DataType::FLOAT32);
+ inp_rhs->shape({1, 3, 3, 2});
+ }
+
+ auto eltwise_mul = g->nodes()->create<loco::EltwiseMul>();
+ {
+ eltwise_mul->lhs(inp_lhs);
+ eltwise_mul->rhs(inp_rhs);
+ }
+
+ // Make and assign data to two pull nodes
+ auto inp_lhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_lhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_lhs_buf.at(e.current()) = x_val[n++];
+ }
+ }
+
+ auto inp_rhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_rhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_rhs_buf.at(e.current()) = y_val[n++];
+ }
+ }
+
+ auto inp_lhs_data = locomotiv::make_data(inp_lhs_buf);
+ locomotiv::annot_data(inp_lhs, std::move(inp_lhs_data));
+ locomotiv::annot_domain(inp_lhs, loco::Domain::Tensor);
+
+ auto inp_rhs_data = locomotiv::make_data(inp_rhs_buf);
+ locomotiv::annot_data(inp_rhs, std::move(inp_rhs_data));
+ locomotiv::annot_domain(inp_rhs, loco::Domain::Tensor);
+
+ // run the network
+ locomotiv::NodeExecution::get().run(eltwise_mul);
+
+ // get result
+ auto eltwise_mul_data = locomotiv::annot_data(eltwise_mul);
+
+ // comparing the result
+ ASSERT_NE(eltwise_mul_data, nullptr);
+ ASSERT_EQ(eltwise_mul_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(eltwise_mul_data->shape()), Shape({1, 3, 3, 2}));
+
+ uint32_t n = 0;
+ for (IndexEnumerator e{*(eltwise_mul_data->shape())}; e.valid(); e.advance())
+ {
+ ASSERT_FLOAT_EQ(eltwise_mul_data->as_f32_bufptr()->at(e.current()), out_val[n++]);
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(eltwise_mul), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/EltwiseSqrt.cpp b/compiler/locomotiv/src/Node/EltwiseSqrt.cpp
new file mode 100644
index 000000000..b4625a757
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseSqrt.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include <cmath>
+
+namespace
+{
+
+inline float sqrt_ew(float val) { return sqrt(val); }
+
+struct Func final : public locomotiv::UnaryFunc
+{
+ float apply(float v) const final { return sqrt_ew(v); }
+};
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::EltwiseSqrt *sqrt_node)
+{
+ Func f;
+
+ eltwise_unary(sqrt_node, f);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/EltwiseSqrt.test.cpp b/compiler/locomotiv/src/Node/EltwiseSqrt.test.cpp
new file mode 100644
index 000000000..adb1b853e
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseSqrt.test.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+#include <cmath>
+#include <limits>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_EltwiseSqrt, f32)
+{
+ // Make Pull-EltwiseSqrt graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({4});
+ auto sqrt = g->nodes()->create<loco::EltwiseSqrt>();
+ sqrt->input(pull);
+
+ // Make and assign data to Pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{4});
+ pull_buf.at(Index{0}) = 4.0f;
+ pull_buf.at(Index{1}) = 9.0f;
+ pull_buf.at(Index{2}) = 0.0f;
+ pull_buf.at(Index{3}) = -1.0f;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(sqrt);
+
+ auto sqrt_data = locomotiv::annot_data(sqrt);
+ ASSERT_NE(sqrt_data, nullptr);
+ ASSERT_EQ(sqrt_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(sqrt_data->shape()), Shape{4});
+ ASSERT_FLOAT_EQ(sqrt_data->as_f32_bufptr()->at(Index{0}), 2.0f);
+ ASSERT_FLOAT_EQ(sqrt_data->as_f32_bufptr()->at(Index{1}), 3.0f);
+ ASSERT_FLOAT_EQ(sqrt_data->as_f32_bufptr()->at(Index{2}), 0.0f);
+ ASSERT_TRUE(std::isnan(sqrt_data->as_f32_bufptr()->at(Index{3})));
+
+ ASSERT_EQ(locomotiv::annot_domain(sqrt), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/EltwiseSub.cpp b/compiler/locomotiv/src/Node/EltwiseSub.cpp
new file mode 100644
index 000000000..7943f950b
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseSub.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::EltwiseSub *eltwise_sub)
+{
+ struct Func final : public BinaryFunc
+ {
+ float apply(float lhs, float rhs) const { return lhs - rhs; }
+ };
+
+ Func f;
+
+ eltwise_binary(eltwise_sub, f);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/EltwiseSub.test.cpp b/compiler/locomotiv/src/Node/EltwiseSub.test.cpp
new file mode 100644
index 000000000..7eff90f9e
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseSub.test.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+/*
+test case generated from the following:
+
+x = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+y = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+out = tf.math.subtract(x, y)
+
+with tf.Session() as sess:
+ print(sess.run(out))
+*/
+TEST(NodeExecution_EltwiseSub, f32)
+{
+ float x_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ float y_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ float out_val[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+ // make EltwiseSub(Pull, Pull)
+ auto g = loco::make_graph();
+ Shape input_shape{1, 3, 3, 2}; // NHWC
+
+ auto inp_lhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_lhs->dtype(loco::DataType::FLOAT32);
+ inp_lhs->shape({1, 3, 3, 2});
+ }
+
+ auto inp_rhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_rhs->dtype(loco::DataType::FLOAT32);
+ inp_rhs->shape({1, 3, 3, 2});
+ }
+
+ auto eltwise_sub = g->nodes()->create<loco::EltwiseSub>();
+ {
+ eltwise_sub->lhs(inp_lhs);
+ eltwise_sub->rhs(inp_rhs);
+ }
+
+ // Make and assign data to two pull nodes
+ auto inp_lhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_lhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_lhs_buf.at(e.current()) = x_val[n++];
+ }
+ }
+
+ auto inp_rhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_rhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_rhs_buf.at(e.current()) = y_val[n++];
+ }
+ }
+
+ auto inp_lhs_data = locomotiv::make_data(inp_lhs_buf);
+ locomotiv::annot_data(inp_lhs, std::move(inp_lhs_data));
+ locomotiv::annot_domain(inp_lhs, loco::Domain::Tensor);
+
+ auto inp_rhs_data = locomotiv::make_data(inp_rhs_buf);
+ locomotiv::annot_data(inp_rhs, std::move(inp_rhs_data));
+ locomotiv::annot_domain(inp_rhs, loco::Domain::Tensor);
+
+ // run the network
+ locomotiv::NodeExecution::get().run(eltwise_sub);
+
+ // get result
+ auto eltwise_sub_data = locomotiv::annot_data(eltwise_sub);
+
+ // comparing the result
+ ASSERT_NE(eltwise_sub_data, nullptr);
+ ASSERT_EQ(eltwise_sub_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(eltwise_sub_data->shape()), Shape({1, 3, 3, 2}));
+
+ uint32_t n = 0;
+ for (IndexEnumerator e{*(eltwise_sub_data->shape())}; e.valid(); e.advance())
+ {
+ ASSERT_FLOAT_EQ(eltwise_sub_data->as_f32_bufptr()->at(e.current()), out_val[n++]);
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(eltwise_sub), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/FeatureCodec.test.cpp b/compiler/locomotiv/src/Node/FeatureCodec.test.cpp
new file mode 100644
index 000000000..c35f0e69a
--- /dev/null
+++ b/compiler/locomotiv/src/Node/FeatureCodec.test.cpp
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <loco/IR/PermutingCodec.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::Buffer;
+
+// This file is intended to test FeatureEncode and FeatureDecode at once
+namespace
+{
+
+class NodeExecution_FeatureCodec : public ::testing::Test
+{
+private:
+ loco::Graph g;
+
+protected:
+ /// @brief Make Pull node and set data by given buffer and data type
+ template <typename DT> loco::Pull *pull_layer(Buffer<DT> &pull_buf, loco::DataType dtype)
+ {
+ auto pull = g.nodes()->create<loco::Pull>();
+ pull->dtype(dtype);
+
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ return pull;
+ }
+
+ /// @brief Make FeatureEncode node with given input and encoding permutation
+ loco::FeatureEncode *feature_encode_layer(loco::Node *input,
+ const loco::Permutation<loco::Domain::Feature> &perm)
+ {
+ auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Feature>>(
+ new loco::PermutingEncoder<loco::Domain::Feature>);
+
+ encoder->perm(perm);
+
+ auto enc = g.nodes()->create<loco::FeatureEncode>();
+ enc->input(input);
+ enc->encoder(std::move(encoder));
+
+ return enc;
+ }
+
+ /// @brief Make FeatureDecode node with given input and decoding permutation
+ loco::FeatureDecode *feature_decode_layer(loco::Node *input,
+ const loco::Permutation<loco::Domain::Feature> &perm)
+ {
+ auto decoder = std::unique_ptr<loco::PermutingDecoder<loco::Domain::Feature>>(
+ new loco::PermutingDecoder<loco::Domain::Feature>);
+
+ decoder->perm(perm);
+
+ auto dec = g.nodes()->create<loco::FeatureDecode>();
+ dec->input(input);
+ dec->decoder(std::move(decoder));
+
+ return dec;
+ }
+};
+
+} // namespace
+
+TEST_F(NodeExecution_FeatureCodec, s32)
+{
+ const uint32_t N = 2;
+ const uint32_t H = 3;
+ const uint32_t W = 4;
+ const uint32_t C = 5;
+
+ // Make "NCHW" data for pull node
+ auto pull_buf = make_buffer<int32_t, LexicalLayout>(Shape{N, C, H, W});
+ int32_t i = 0;
+ for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance())
+ {
+ pull_buf.at(e.current()) = i;
+ ++i; // Doesn't matter what it is
+ }
+
+ // Make NCHW permutation for encoder and decoder
+ loco::Permutation<loco::Domain::Feature> NCHW;
+
+ NCHW.axis(loco::FeatureAxis::Count) = 0;
+ NCHW.axis(loco::FeatureAxis::Depth) = 1;
+ NCHW.axis(loco::FeatureAxis::Height) = 2;
+ NCHW.axis(loco::FeatureAxis::Width) = 3;
+
+ // Pull
+ auto pull = pull_layer(pull_buf, loco::DataType::S32);
+
+ // FeatureEncode
+ auto enc = feature_encode_layer(pull, NCHW);
+ locomotiv::NodeExecution::get().run(enc);
+
+ // Test FeatureEncode
+ auto enc_data = locomotiv::annot_data(enc);
+ ASSERT_NE(enc_data, nullptr);
+ ASSERT_EQ(enc_data->dtype(), loco::DataType::S32);
+ ASSERT_EQ(*(enc_data->shape()), (Shape{N, H, W, C})); // locomotiv feature is NHWC
+ auto enc_buf = enc_data->as_s32_bufptr();
+ for (uint32_t n = 0; n < N; ++n)
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ for (uint32_t c = 0; c < C; ++c)
+ ASSERT_EQ(pull_buf.at(Index{n, c, h, w}), enc_buf->at(Index{n, h, w, c}));
+
+ ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::Feature);
+
+ // FeatureDecode
+ auto dec = feature_decode_layer(enc, NCHW);
+ locomotiv::NodeExecution::get().run(dec);
+
+ // Test FeatureDecode: Encode -> Decode == identity
+ auto dec_data = locomotiv::annot_data(dec);
+ ASSERT_NE(dec_data, nullptr);
+ ASSERT_EQ(dec_data->dtype(), loco::DataType::S32);
+ ASSERT_EQ(*(dec_data->shape()), (Shape{N, C, H, W}));
+ auto dec_buf = dec_data->as_s32_bufptr();
+ for (uint32_t n = 0; n < N; ++n)
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ for (uint32_t c = 0; c < C; ++c)
+ ASSERT_EQ(pull_buf.at(Index{n, c, h, w}), dec_buf->at(Index{n, c, h, w}));
+
+ ASSERT_EQ(locomotiv::annot_domain(dec), loco::Domain::Tensor);
+}
+
+TEST_F(NodeExecution_FeatureCodec, f32)
+{
+ const uint32_t N = 2;
+ const uint32_t H = 3;
+ const uint32_t W = 4;
+ const uint32_t C = 5;
+
+ // Make crazy "CHNW" data for pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{C, H, N, W});
+ float f = 0.0f;
+ for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance())
+ {
+ pull_buf.at(e.current()) = f;
+ f += 0.1f; // Doesn't matter what it is
+ }
+
+ // Make CHNW permutation for encoder and decoder
+ loco::Permutation<loco::Domain::Feature> CHNW;
+
+ CHNW.axis(loco::FeatureAxis::Depth) = 0;
+ CHNW.axis(loco::FeatureAxis::Height) = 1;
+ CHNW.axis(loco::FeatureAxis::Count) = 2;
+ CHNW.axis(loco::FeatureAxis::Width) = 3;
+
+ // Pull
+ auto pull = pull_layer(pull_buf, loco::DataType::FLOAT32);
+
+ // FeatureEncode
+ auto enc = feature_encode_layer(pull, CHNW);
+ locomotiv::NodeExecution::get().run(enc);
+
+ // Test FeatureEncode
+ auto enc_data = locomotiv::annot_data(enc);
+ ASSERT_NE(enc_data, nullptr);
+ ASSERT_EQ(enc_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(enc_data->shape()), (Shape{N, H, W, C})); // locomotiv feature is NHWC
+ auto enc_buf = enc_data->as_f32_bufptr();
+ for (uint32_t n = 0; n < N; ++n)
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ for (uint32_t c = 0; c < C; ++c)
+ ASSERT_FLOAT_EQ(pull_buf.at(Index{c, h, n, w}), enc_buf->at(Index{n, h, w, c}));
+
+ ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::Feature);
+
+ // FeatureDecode
+ auto dec = feature_decode_layer(enc, CHNW);
+ locomotiv::NodeExecution::get().run(dec);
+
+ // Test FeatureDecode: Encode -> Decode == identity
+ auto dec_data = locomotiv::annot_data(dec);
+ ASSERT_NE(dec_data, nullptr);
+ ASSERT_EQ(dec_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(dec_data->shape()), (Shape{C, H, N, W}));
+ auto dec_buf = dec_data->as_f32_bufptr();
+ for (uint32_t n = 0; n < N; ++n)
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ for (uint32_t c = 0; c < C; ++c)
+ ASSERT_FLOAT_EQ(pull_buf.at(Index{c, h, n, w}), dec_buf->at(Index{c, h, n, w}));
+
+ ASSERT_EQ(locomotiv::annot_domain(dec), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/FeatureDecode.cpp b/compiler/locomotiv/src/Node/FeatureDecode.cpp
new file mode 100644
index 000000000..8a56a56b2
--- /dev/null
+++ b/compiler/locomotiv/src/Node/FeatureDecode.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <stdexcept>
+#include <cassert>
+
+namespace
+{
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::Index;
+
+template <typename T>
+std::unique_ptr<locomotiv::NodeData> feature_decode(const loco::FeatureDecode *node,
+ const Buffer<T> *input_buf)
+{
+ auto decoder = node->decoder();
+
+ // Make FeatureShape from input. Note that feature in locomotiv represented as NHWC
+ loco::FeatureShape input_shape;
+ assert(input_buf->shape().rank() == 4);
+ input_shape.count() = input_buf->shape().dim(0);
+ input_shape.height() = input_buf->shape().dim(1);
+ input_shape.width() = input_buf->shape().dim(2);
+ input_shape.depth() = input_buf->shape().dim(3);
+
+ loco::TensorShape node_shape = decoder->shape(input_shape);
+
+ // Make tensor buffer from TensorShape
+ Buffer<T> node_buf =
+ make_buffer<T, LexicalLayout>(Shape{node_shape.dim(0).value(), node_shape.dim(1).value(),
+ node_shape.dim(2).value(), node_shape.dim(3).value()});
+
+ // Copy buffer in an order arranged by decoder
+ for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
+ {
+ loco::FeatureIndex feature_index = decoder->value(e.current());
+ Index buf_index({feature_index.batch(), feature_index.row(), feature_index.column(),
+ feature_index.channel()});
+
+ node_buf.at(e.current()) = input_buf->at(buf_index);
+ }
+
+ return locomotiv::make_data(node_buf);
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::FeatureDecode *dec)
+{
+ auto input_data = annot_data(dec->input());
+
+ validate(input_data, "Input of FeatureDecode not ready");
+ validate(annot_domain(dec->input()) == loco::Domain::Feature,
+ "Input of FeatureDecode is not Feature");
+ validate(input_data->shape()->rank() == 4, "Input shape mismatch");
+
+ std::unique_ptr<NodeData> dec_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ auto input_buf = input_data->as_s32_bufptr();
+ dec_data = feature_decode<int32_t>(dec, input_buf);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ auto input_buf = input_data->as_f32_bufptr();
+ dec_data = feature_decode<float>(dec, input_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(dec_data != nullptr);
+ annot_data(dec, std::move(dec_data));
+ annot_domain(dec, loco::Domain::Tensor);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/FeatureEncode.cpp b/compiler/locomotiv/src/Node/FeatureEncode.cpp
new file mode 100644
index 000000000..406de76ff
--- /dev/null
+++ b/compiler/locomotiv/src/Node/FeatureEncode.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <stdexcept>
+#include <cassert>
+
+namespace
+{
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+template <typename T>
+std::unique_ptr<locomotiv::NodeData> feature_encode(const loco::FeatureEncode *node,
+ const Buffer<T> *input_buf)
+{
+ auto encoder = node->encoder();
+
+ // Make TensorShape from input
+ loco::TensorShape input_shape;
+ input_shape.rank(input_buf->shape().rank());
+ assert(input_shape.rank() == 4);
+ for (uint32_t i = 0; i < input_shape.rank(); ++i)
+ {
+ input_shape.dim(i) = input_buf->shape().dim(i);
+ }
+
+ loco::FeatureShape node_shape = encoder->shape(input_shape);
+
+ // Make NHWC buffer from FeatureShape
+ Buffer<T> node_buf =
+ make_buffer<T, LexicalLayout>(Shape{node_shape.count().value(), node_shape.height().value(),
+ node_shape.width().value(), node_shape.depth().value()});
+
+ // Copy buffer in an order arranged by encoder
+ for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
+ {
+ loco::FeatureIndex index;
+ index.batch() = e.current().at(0);
+ index.row() = e.current().at(1);
+ index.column() = e.current().at(2);
+ index.channel() = e.current().at(3);
+
+ node_buf.at(e.current()) = input_buf->at(encoder->value(index));
+ }
+
+ return locomotiv::make_data(node_buf);
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::FeatureEncode *enc)
+{
+ auto input_data = annot_data(enc->input());
+
+ validate(input_data, "Input of FeatureEncode not ready");
+ validate(annot_domain(enc->input()) == loco::Domain::Tensor,
+ "Input of FeatureEncode is not Tensor");
+ validate(input_data->shape()->rank() == 4, "Input shape mismatch");
+
+ std::unique_ptr<NodeData> enc_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ auto input_buf = input_data->as_s32_bufptr();
+ enc_data = feature_encode<int32_t>(enc, input_buf);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ auto input_buf = input_data->as_f32_bufptr();
+ enc_data = feature_encode<float>(enc, input_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(enc_data != nullptr);
+ annot_data(enc, std::move(enc_data));
+ annot_domain(enc, loco::Domain::Feature);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/FilterEncode.cpp b/compiler/locomotiv/src/Node/FilterEncode.cpp
new file mode 100644
index 000000000..cd9d708dc
--- /dev/null
+++ b/compiler/locomotiv/src/Node/FilterEncode.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <stdexcept>
+#include <cassert>
+
+namespace
+{
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+template <typename T>
+std::unique_ptr<locomotiv::NodeData> filter_encode(const loco::FilterEncode *node,
+ const Buffer<T> *input_buf)
+{
+ auto encoder = node->encoder();
+
+ // Make TensorShape from input
+ loco::TensorShape input_shape;
+ input_shape.rank(input_buf->shape().rank());
+ assert(input_shape.rank() == 4);
+ for (uint32_t i = 0; i < input_shape.rank(); ++i)
+ {
+ input_shape.dim(i) = input_buf->shape().dim(i);
+ }
+
+ loco::FilterShape node_shape = encoder->shape(input_shape);
+
+ // Make NHWC buffer from FilterShape
+ Buffer<T> node_buf =
+ make_buffer<T, LexicalLayout>(Shape{node_shape.count().value(), node_shape.height().value(),
+ node_shape.width().value(), node_shape.depth().value()});
+
+ // Copy buffer in an order arranged by encoder
+ for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
+ {
+ loco::FilterIndex index;
+ index.nth() = e.current().at(0);
+ index.row() = e.current().at(1);
+ index.column() = e.current().at(2);
+ index.channel() = e.current().at(3);
+
+ node_buf.at(e.current()) = input_buf->at(encoder->value(index));
+ }
+
+ return locomotiv::make_data(node_buf);
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::FilterEncode *enc)
+{
+ auto input_data = annot_data(enc->input());
+
+ validate(input_data, "Input of FilterEncode not ready");
+ validate(annot_domain(enc->input()) == loco::Domain::Tensor,
+ "Input of FilterEncode is not Tensor");
+ validate(input_data->shape()->rank() == 4, "Input shape mismatch");
+
+ std::unique_ptr<NodeData> enc_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ auto input_buf = input_data->as_s32_bufptr();
+ enc_data = filter_encode<int32_t>(enc, input_buf);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ auto input_buf = input_data->as_f32_bufptr();
+ enc_data = filter_encode<float>(enc, input_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(enc_data != nullptr);
+ annot_data(enc, std::move(enc_data));
+ annot_domain(enc, loco::Domain::Filter);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/FilterEncode.test.cpp b/compiler/locomotiv/src/Node/FilterEncode.test.cpp
new file mode 100644
index 000000000..79b8308e2
--- /dev/null
+++ b/compiler/locomotiv/src/Node/FilterEncode.test.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <loco/IR/PermutingCodec.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+TEST(NodeExecution_FilterEncode, s32)
+{
+ const uint32_t N = 2;
+ const uint32_t H = 3;
+ const uint32_t W = 4;
+ const uint32_t C = 5;
+
+ auto g = loco::make_graph();
+
+ // Pull
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::S32);
+
+ // Make and assign "NCHW" data to pull node
+ auto pull_buf = make_buffer<int32_t, LexicalLayout>(Shape{N, C, H, W});
+ int32_t i = 1;
+ for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance())
+ {
+ pull_buf.at(e.current()) = i;
+ ++i; // Doesn't matter what it is
+ }
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ // Encoder to correctly read input tensor as NCHW
+ auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Filter>>(
+ new loco::PermutingEncoder<loco::Domain::Filter>);
+ encoder->perm()->axis(loco::FilterAxis::Count) = 0;
+ encoder->perm()->axis(loco::FilterAxis::Depth) = 1;
+ encoder->perm()->axis(loco::FilterAxis::Height) = 2;
+ encoder->perm()->axis(loco::FilterAxis::Width) = 3;
+
+ // FilterEncode
+ auto enc = g->nodes()->create<loco::FilterEncode>();
+ enc->input(pull);
+ enc->encoder(std::move(encoder));
+
+ locomotiv::NodeExecution::get().run(enc);
+
+ auto enc_data = locomotiv::annot_data(enc);
+ ASSERT_NE(enc_data, nullptr);
+ ASSERT_EQ(enc_data->dtype(), loco::DataType::S32);
+ ASSERT_EQ(*(enc_data->shape()), (Shape{N, H, W, C})); // locomotiv filter is NHWC
+ auto enc_buf = enc_data->as_s32_bufptr();
+ for (uint32_t n = 0; n < N; ++n)
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ for (uint32_t c = 0; c < C; ++c)
+ ASSERT_EQ(pull_buf.at(Index{n, c, h, w}), enc_buf->at(Index{n, h, w, c}));
+
+ ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::Filter);
+}
+
+TEST(NodeExecution_FilterEncode, f32)
+{
+ const uint32_t N = 2;
+ const uint32_t H = 3;
+ const uint32_t W = 4;
+ const uint32_t C = 5;
+
+ auto g = loco::make_graph();
+
+ // Pull
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+
+ // Make and assign crazy "CHNW" data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{C, H, N, W});
+ float f = 1;
+ for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance())
+ {
+ pull_buf.at(e.current()) = f;
+ f += 0.1f; // Doesn't matter what it is
+ }
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ // Encoder to correctly read input tensor as CHNW
+ auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Filter>>(
+ new loco::PermutingEncoder<loco::Domain::Filter>);
+ encoder->perm()->axis(loco::FilterAxis::Depth) = 0;
+ encoder->perm()->axis(loco::FilterAxis::Height) = 1;
+ encoder->perm()->axis(loco::FilterAxis::Count) = 2;
+ encoder->perm()->axis(loco::FilterAxis::Width) = 3;
+
+ // FilterEncode
+ auto enc = g->nodes()->create<loco::FilterEncode>();
+ enc->input(pull);
+ enc->encoder(std::move(encoder));
+
+ locomotiv::NodeExecution::get().run(enc);
+
+ auto enc_data = locomotiv::annot_data(enc);
+ ASSERT_NE(enc_data, nullptr);
+ ASSERT_EQ(enc_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(enc_data->shape()), (Shape{N, H, W, C})); // locomotiv filter is NHWC
+ auto enc_buf = enc_data->as_f32_bufptr();
+ for (uint32_t n = 0; n < N; ++n)
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ for (uint32_t c = 0; c < C; ++c)
+ ASSERT_FLOAT_EQ(pull_buf.at(Index{c, h, n, w}), enc_buf->at(Index{n, h, w, c}));
+
+ ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::Filter);
+}
diff --git a/compiler/locomotiv/src/Node/Forward.cpp b/compiler/locomotiv/src/Node/Forward.cpp
new file mode 100644
index 000000000..eb7d44a59
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Forward.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <stdexcept>
+#include <cassert>
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Forward *forward)
+{
+ auto input_data = annot_data(forward->input());
+
+ validate(input_data, "Input not ready");
+ validate(annot_domain(forward->input()) != loco::Domain::Unknown,
+ "Input domain must not Unknown");
+
+ std::unique_ptr<NodeData> forward_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ auto input_bufptr = input_data->as_s32_bufptr();
+ forward_data = make_data(*input_bufptr);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ auto input_bufptr = input_data->as_f32_bufptr();
+ forward_data = make_data(*input_bufptr);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(forward_data != nullptr);
+ annot_data(forward, std::move(forward_data));
+ annot_domain(forward, annot_domain(forward->input()));
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Forward.test.cpp b/compiler/locomotiv/src/Node/Forward.test.cpp
new file mode 100644
index 000000000..73d37139a
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Forward.test.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_Forward, s32)
+{
+ // Make pull-forward graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::S32);
+ pull->shape({1});
+ auto forward = g->nodes()->create<loco::Forward>();
+ forward->input(pull);
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<int32_t, LexicalLayout>(Shape{1});
+ pull_buf.at(Index{0}) = 42;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(forward);
+
+ auto forward_data = locomotiv::annot_data(forward);
+ ASSERT_NE(forward_data, nullptr);
+ ASSERT_EQ(forward_data->dtype(), loco::DataType::S32);
+ ASSERT_EQ(*(forward_data->shape()), Shape{1});
+ ASSERT_EQ(forward_data->as_s32_bufptr()->at(Index{0}), pull_buf.at(Index{0}));
+
+ ASSERT_EQ(locomotiv::annot_domain(forward), loco::Domain::Tensor);
+}
+
+TEST(NodeExecution_Forward, f32)
+{
+ // Make pull-forward graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({1});
+ auto forward = g->nodes()->create<loco::Forward>();
+ forward->input(pull);
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{1});
+ pull_buf.at(Index{0}) = 3.14f;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(forward);
+
+ auto forward_data = locomotiv::annot_data(forward);
+ ASSERT_NE(forward_data, nullptr);
+ ASSERT_EQ(forward_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(forward_data->shape()), Shape{1});
+ ASSERT_FLOAT_EQ(forward_data->as_f32_bufptr()->at(Index{0}), pull_buf.at(Index{0}));
+
+ ASSERT_EQ(locomotiv::annot_domain(forward), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/MatMul.cpp b/compiler/locomotiv/src/Node/MatMul.cpp
new file mode 100644
index 000000000..77b7315a9
--- /dev/null
+++ b/compiler/locomotiv/src/Node/MatMul.cpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+/**
+ * @brief Calculate Matrix Multiplication
+ */
+template <typename T> Buffer<T> calc_mat_mul(const Buffer<T> *lhs_buf, const Buffer<T> *rhs_buf)
+{
+ const auto lhs_shape = lhs_buf->shape();
+ const auto rhs_shape = rhs_buf->shape();
+
+ assert(lhs_shape.rank() == 2 && "lhs rank must be 2");
+ assert(rhs_shape.rank() == 2 && "rhs rank must be 2");
+ // lhs width should be the same as rhs height
+ assert(lhs_shape.dim(1) == rhs_shape.dim(0) && "height/width mismatch");
+
+ const uint32_t lhs_height = lhs_shape.dim(0);
+ const uint32_t lhs_width = lhs_shape.dim(1);
+
+ const uint32_t rhs_width = rhs_shape.dim(1);
+
+ const uint32_t output_height = lhs_height;
+ const uint32_t output_width = rhs_width;
+
+ Shape output_shape{output_height, output_width};
+ auto output_buf = make_buffer<T, LexicalLayout>(output_shape);
+
+ for (uint32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (uint32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ T total = static_cast<T>(0); // accumulator
+ // Accumulate through axis
+ for (uint32_t axis = 0; axis < lhs_width; ++axis)
+ {
+ total += lhs_buf->at(Index({out_y, axis})) * rhs_buf->at(Index({axis, out_x}));
+ }
+ // Set output value
+ output_buf.at(Index({out_y, out_x})) = total;
+ }
+ }
+
+ return output_buf;
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MatMul *mat_mul)
+{
+ auto lhs_data = annot_data(mat_mul->lhs());
+ auto rhs_data = annot_data(mat_mul->rhs());
+
+ validate(lhs_data, "Can't find left matrix data of MatMul");
+ validate(lhs_data->shape()->rank() == 2, "lhs rank must be 2");
+
+ validate(rhs_data, "Can't find right matrix data of MatMul");
+ validate(rhs_data->shape()->rank() == 2, "rhs rank must be 2");
+
+ validate(annot_domain(mat_mul->lhs()) == loco::Domain::Matrix,
+ "Left matrix of MatMul is not a Matrix");
+ validate(annot_domain(mat_mul->rhs()) == loco::Domain::Matrix,
+ "Right matrix of MatMul is not a Matrix");
+
+ std::unique_ptr<NodeData> mat_mul_result = nullptr;
+
+ if (lhs_data->dtype() == loco::DataType::FLOAT32 && rhs_data->dtype() == loco::DataType::FLOAT32)
+ {
+ const auto lhs_buf = lhs_data->as_f32_bufptr();
+ const auto rhs_buf = rhs_data->as_f32_bufptr();
+
+ auto mat_mul_buf = calc_mat_mul<float>(lhs_buf, rhs_buf);
+
+ mat_mul_result = make_data(mat_mul_buf);
+ }
+ else if (lhs_data->dtype() == loco::DataType::S32 && rhs_data->dtype() == loco::DataType::S32)
+ {
+ const auto lhs_buf = lhs_data->as_s32_bufptr();
+ const auto rhs_buf = rhs_data->as_s32_bufptr();
+
+ auto mat_mul_buf = calc_mat_mul<int32_t>(lhs_buf, rhs_buf);
+
+ mat_mul_result = make_data(mat_mul_buf);
+ }
+ else
+ throw std::runtime_error("NYI for these DataTypes");
+
+ assert(mat_mul_result != nullptr);
+
+ annot_data(mat_mul, std::move(mat_mul_result));
+ annot_domain(mat_mul, loco::Domain::Matrix);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/MatMul.test.cpp b/compiler/locomotiv/src/Node/MatMul.test.cpp
new file mode 100644
index 000000000..bd480f7c7
--- /dev/null
+++ b/compiler/locomotiv/src/Node/MatMul.test.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include "nncc/core/ADT/tensor/IndexEnumerator.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::make_overlay;
+
+template <typename T>
+void run_test(const T *lhs, const T *rhs, const T *expected_output, const Shape &lhs_shape,
+ const Shape &rhs_shape, const Shape &out_shape, loco::DataType expected_datatype)
+{
+ auto g = loco::make_graph();
+ // Fill lhs MatrixEncode
+ auto lhs_enc = g->nodes()->create<loco::MatrixEncode>();
+ {
+ auto lhs_enc_buf = make_buffer<T, LexicalLayout>(lhs_shape);
+ auto lhs_overlay = make_overlay<T, LexicalLayout>(lhs_shape, const_cast<T *>(lhs));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{lhs_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ lhs_enc_buf.at(ind) = lhs_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(lhs_enc_buf);
+ locomotiv::annot_data(lhs_enc, std::move(enc_data));
+ locomotiv::annot_domain(lhs_enc, loco::Domain::Matrix);
+ }
+ // Fill rhs MatrixEncode
+ auto rhs_enc = g->nodes()->create<loco::MatrixEncode>();
+ {
+ auto rhs_enc_buf = make_buffer<T, LexicalLayout>(rhs_shape);
+ auto rhs_overlay = make_overlay<T, LexicalLayout>(rhs_shape, const_cast<T *>(rhs));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{rhs_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ rhs_enc_buf.at(ind) = rhs_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(rhs_enc_buf);
+ locomotiv::annot_data(rhs_enc, std::move(enc_data));
+ locomotiv::annot_domain(rhs_enc, loco::Domain::Matrix);
+ }
+
+ // build MatMul
+ auto mat_mul = g->nodes()->create<loco::MatMul>();
+ mat_mul->lhs(lhs_enc);
+ mat_mul->rhs(rhs_enc);
+
+ // run interpreter
+ locomotiv::NodeExecution::get().run(mat_mul);
+
+ // get result of calculation
+ auto mat_mul_result = locomotiv::annot_data(mat_mul);
+
+ // check the result
+ ASSERT_NE(mat_mul_result, nullptr);
+ ASSERT_TRUE(mat_mul_result->dtype() == expected_datatype);
+ ASSERT_TRUE(*(mat_mul_result->shape()) == out_shape);
+
+ auto out_overlay = make_overlay<T, LexicalLayout>(out_shape, const_cast<T *>(expected_output));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{out_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ if (expected_datatype == loco::DataType::FLOAT32)
+ ASSERT_FLOAT_EQ(mat_mul_result->as_f32_bufptr()->at(ind), out_overlay.at(ind));
+ else if (expected_datatype == loco::DataType::S32)
+ ASSERT_EQ(mat_mul_result->as_s32_bufptr()->at(ind), out_overlay.at(ind));
+ else
+ throw std::runtime_error("NYI for these DataTypes");
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(mat_mul), loco::Domain::Matrix);
+}
+
+} // namespace
+
+// clang-format off
+/* from the code below:
+
+import numpy as np
+
+a = [[-0.48850584, 1.4292705, -1.3424522],
+ [1.7021934, -0.39246717, 0.6248314]]
+
+b = [[-0.0830195, 0.21088193, -0.11781317],
+ [0.07755677, 1.6337638, 1.0792778],
+ [-1.6922939, -1.5437212, 0.96667504]]
+
+print(np.array(a) @ np.array(b))
+*/
+TEST(NodeExecution_MatMul, f32_2x3_3x3)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ const float lhs[] =
+ {
+ -0.48850584, 1.4292705, -1.3424522,
+ 1.7021934, -0.39246717, 0.6248314
+ };
+
+ const float rhs[] =
+ {
+ -0.0830195, 0.21088193, -0.11781317,
+ 0.07755677, 1.6337638, 1.0792778,
+ -1.6922939, -1.5437212, 0.96667504
+ };
+
+ const float out[] =
+ {
+ 2.42322878, 4.30444527, 0.30241731,
+ -1.2291521, -1.2468023, -0.02011299
+ };
+
+ run_test<float>(lhs, rhs, out, Shape{2, 3}, Shape{3, 3}, Shape{2, 3}, loco::DataType::FLOAT32);
+}
+
+/* from the code below:
+
+import numpy as np
+
+a = np.random.randint(10000, size=(4, 2))
+
+b = np.random.randint(10000, size=(2, 6))
+
+print(a)
+print(b)
+print(np.array(a) @ np.array(b))
+*/
+TEST(NodeExecution_MatMul, s32_4x2_2x6)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ const int32_t lhs[] =
+ {
+ 6392, 4993,
+ 54, 9037,
+ 3947, 5820,
+ 5800, 4181
+ };
+
+ const int32_t rhs[] =
+ {
+ 2694, 8376, 8090, 1285, 7492, 1652,
+ 5427, 8798, 7634, 2229, 5439, 6999
+ };
+
+ const int32_t out[] =
+ {
+ 44317059, 97467806, 89827842, 19343117, 75045791, 45505591,
+ 49189275, 79959830, 69425318, 20212863, 49556811, 63339171,
+ 42218358, 84264432, 76361110, 18044675, 61225904, 47254624,
+ 38315487, 85365238, 78839754, 16772449, 66194059, 38844419
+ };
+
+ run_test<int32_t>(lhs, rhs, out, Shape{4, 2}, Shape{2, 6}, Shape{4, 6}, loco::DataType::S32);
+}
+
+// clang-format on
diff --git a/compiler/locomotiv/src/Node/MatrixCodec.test.cpp b/compiler/locomotiv/src/Node/MatrixCodec.test.cpp
new file mode 100644
index 000000000..8fc5d593b
--- /dev/null
+++ b/compiler/locomotiv/src/Node/MatrixCodec.test.cpp
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <loco/IR/PermutingCodec.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::Buffer;
+
+// This file is intended to test MatrixEncode and MatrixDecode at once
+namespace
+{
+
+class NodeExecution_MatrixCodec : public ::testing::Test
+{
+private:
+ loco::Graph g;
+
+protected:
+ /// @brief Make Pull node and set data by given buffer and data type
+ template <typename DT> loco::Pull *pull_layer(Buffer<DT> &pull_buf, loco::DataType dtype)
+ {
+ auto pull = g.nodes()->create<loco::Pull>();
+ pull->dtype(dtype);
+
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ return pull;
+ }
+
+ /// @brief Make MatrixEncode node with given input and encoding permutation
+ loco::MatrixEncode *matrix_encode_layer(loco::Node *input,
+ const loco::Permutation<loco::Domain::Matrix> &perm)
+ {
+ auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Matrix>>(
+ new loco::PermutingEncoder<loco::Domain::Matrix>);
+
+ encoder->perm(perm);
+
+ auto enc = g.nodes()->create<loco::MatrixEncode>();
+ enc->input(input);
+ enc->encoder(std::move(encoder));
+
+ return enc;
+ }
+
+ /// @brief Make MatrixDecode node with given input and decoding permutation
+ loco::MatrixDecode *matrix_decode_layer(loco::Node *input,
+ const loco::Permutation<loco::Domain::Matrix> &perm)
+ {
+ auto decoder = std::unique_ptr<loco::PermutingDecoder<loco::Domain::Matrix>>(
+ new loco::PermutingDecoder<loco::Domain::Matrix>);
+
+ decoder->perm(perm);
+
+ auto dec = g.nodes()->create<loco::MatrixDecode>();
+ dec->input(input);
+ dec->decoder(std::move(decoder));
+
+ return dec;
+ }
+};
+
+} // namespace
+
+TEST_F(NodeExecution_MatrixCodec, HW_s32)
+{
+ const uint32_t H = 3;
+ const uint32_t W = 4;
+
+ // Make HW data for pull node
+ auto pull_buf = make_buffer<int32_t, LexicalLayout>(Shape{H, W});
+ int32_t i = 0;
+ for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance())
+ {
+ pull_buf.at(e.current()) = i;
+ ++i; // Doesn't matter what it is
+ }
+
+ // Make HW permutation for encoder and decoder
+ loco::Permutation<loco::Domain::Matrix> HW;
+
+ HW.axis(loco::MatrixAxis::Height) = 0;
+ HW.axis(loco::MatrixAxis::Width) = 1;
+
+ // Pull
+ auto pull = pull_layer(pull_buf, loco::DataType::S32);
+
+ // MatrixEncode
+ auto enc = matrix_encode_layer(pull, HW);
+ locomotiv::NodeExecution::get().run(enc);
+
+ // Test MatrixEncode
+ auto enc_data = locomotiv::annot_data(enc);
+ ASSERT_NE(enc_data, nullptr);
+ ASSERT_EQ(enc_data->dtype(), loco::DataType::S32);
+ ASSERT_EQ(*(enc_data->shape()), (Shape{H, W})); // locomotiv matrix is HW
+ auto enc_buf = enc_data->as_s32_bufptr();
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ ASSERT_EQ(pull_buf.at(Index{h, w}), enc_buf->at(Index{h, w}));
+
+ ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::Matrix);
+
+ // MatrixDecode
+ auto dec = matrix_decode_layer(enc, HW);
+ locomotiv::NodeExecution::get().run(dec);
+
+ // Test MatrixDecode: Encode -> Decode == identity
+ auto dec_data = locomotiv::annot_data(dec);
+ ASSERT_NE(dec_data, nullptr);
+ ASSERT_EQ(dec_data->dtype(), loco::DataType::S32);
+ ASSERT_EQ(*(dec_data->shape()), (Shape{H, W}));
+ auto dec_buf = dec_data->as_s32_bufptr();
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ ASSERT_EQ(pull_buf.at(Index{h, w}), dec_buf->at(Index{h, w}));
+
+ ASSERT_EQ(locomotiv::annot_domain(dec), loco::Domain::Tensor);
+}
+
+TEST_F(NodeExecution_MatrixCodec, WH_f32)
+{
+ const uint32_t W = 6;
+ const uint32_t H = 5;
+
+ // Make crazy WH data for pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{W, H});
+ float f = 0.0f;
+ for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance())
+ {
+ pull_buf.at(e.current()) = f;
+ f += 0.1f; // Doesn't matter what it is
+ }
+
+ // Make WH permutation for encoder and decoder
+ loco::Permutation<loco::Domain::Matrix> WH;
+
+ WH.axis(loco::MatrixAxis::Width) = 0;
+ WH.axis(loco::MatrixAxis::Height) = 1;
+
+ // Pull
+ auto pull = pull_layer(pull_buf, loco::DataType::FLOAT32);
+
+ // MatrixEncode
+ auto enc = matrix_encode_layer(pull, WH);
+ locomotiv::NodeExecution::get().run(enc);
+
+ // Test MatrixEncode
+ auto enc_data = locomotiv::annot_data(enc);
+ ASSERT_NE(enc_data, nullptr);
+ ASSERT_EQ(enc_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(enc_data->shape()), (Shape{H, W})); // locomotiv matrix is HW
+ auto enc_buf = enc_data->as_f32_bufptr();
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ ASSERT_FLOAT_EQ(pull_buf.at(Index{w, h}), enc_buf->at(Index{h, w}));
+
+ ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::Matrix);
+
+ // MatrixDecode
+ auto dec = matrix_decode_layer(enc, WH);
+ locomotiv::NodeExecution::get().run(dec);
+
+ // Test MatrixDecode: Encode -> Decode == identity
+ auto dec_data = locomotiv::annot_data(dec);
+ ASSERT_NE(dec_data, nullptr);
+ ASSERT_EQ(dec_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(dec_data->shape()), (Shape{W, H}));
+ auto dec_buf = dec_data->as_f32_bufptr();
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ ASSERT_FLOAT_EQ(pull_buf.at(Index{w, h}), dec_buf->at(Index{w, h}));
+
+ ASSERT_EQ(locomotiv::annot_domain(dec), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/MatrixDecode.cpp b/compiler/locomotiv/src/Node/MatrixDecode.cpp
new file mode 100644
index 000000000..c591676ae
--- /dev/null
+++ b/compiler/locomotiv/src/Node/MatrixDecode.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <stdexcept>
+#include <cassert>
+
+namespace
+{
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::Index;
+
+template <typename T>
+std::unique_ptr<locomotiv::NodeData> matrix_decode(const loco::MatrixDecode *node,
+ const Buffer<T> *input_buf)
+{
+ auto decoder = node->decoder();
+
+ // Make MatrixShape from input. Note that matrix in locomotiv represented as HW
+ loco::MatrixShape input_shape;
+ assert(input_buf->shape().rank() == 2);
+ input_shape.height() = input_buf->shape().dim(0);
+ input_shape.width() = input_buf->shape().dim(1);
+
+ loco::TensorShape node_shape = decoder->shape(input_shape);
+
+ // Make tensor buffer from TensorShape
+ Buffer<T> node_buf =
+ make_buffer<T, LexicalLayout>(Shape{node_shape.dim(0).value(), node_shape.dim(1).value()});
+
+ // Copy buffer in an order arranged by decoder
+ for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
+ {
+ loco::MatrixIndex matrix_index = decoder->value(e.current());
+ Index buf_index({matrix_index.row(), matrix_index.column()});
+
+ node_buf.at(e.current()) = input_buf->at(buf_index);
+ }
+
+ return locomotiv::make_data(node_buf);
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MatrixDecode *matrix_dec)
+{
+ auto input_data = annot_data(matrix_dec->input());
+
+ validate(input_data, "Input not ready");
+ validate(annot_domain(matrix_dec->input()) == loco::Domain::Matrix,
+ "Input domain should be Matrix");
+ validate(input_data->shape()->rank() == 2, "Input data rank must be 2");
+
+ std::unique_ptr<NodeData> matrix_dec_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ auto input_buf = input_data->as_s32_bufptr();
+ matrix_dec_data = matrix_decode<int32_t>(matrix_dec, input_buf);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ auto input_buf = input_data->as_f32_bufptr();
+ matrix_dec_data = matrix_decode<float>(matrix_dec, input_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(matrix_dec_data != nullptr);
+
+ annot_data(matrix_dec, std::move(matrix_dec_data));
+ annot_domain(matrix_dec, loco::Domain::Tensor);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/MatrixEncode.cpp b/compiler/locomotiv/src/Node/MatrixEncode.cpp
new file mode 100644
index 000000000..e3554e15a
--- /dev/null
+++ b/compiler/locomotiv/src/Node/MatrixEncode.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <stdexcept>
+#include <cassert>
+
+namespace
+{
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+template <typename T>
+std::unique_ptr<locomotiv::NodeData> matrix_encode(const loco::MatrixEncode *node,
+ const Buffer<T> *input_buf)
+{
+ auto encoder = node->encoder();
+
+ // Make TensorShape from input
+ loco::TensorShape input_shape;
+ input_shape.rank(input_buf->shape().rank());
+ assert(input_shape.rank() == 2);
+ for (uint32_t i = 0; i < input_shape.rank(); ++i)
+ {
+ input_shape.dim(i) = input_buf->shape().dim(i);
+ }
+
+ loco::MatrixShape node_shape = encoder->shape(input_shape);
+
+ // Make HW buffer from MatrixShape
+ Buffer<T> node_buf =
+ make_buffer<T, LexicalLayout>(Shape{node_shape.height().value(), node_shape.width().value()});
+
+ // Copy buffer in an order arranged by encoder
+ for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
+ {
+ loco::MatrixIndex index;
+ index.row() = e.current().at(0);
+ index.column() = e.current().at(1);
+
+ node_buf.at(e.current()) = input_buf->at(encoder->value(index));
+ }
+
+ return locomotiv::make_data(node_buf);
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MatrixEncode *matrix_enc)
+{
+ auto input_data = annot_data(matrix_enc->input());
+
+ validate(input_data, "Input not ready");
+ validate(annot_domain(matrix_enc->input()) == loco::Domain::Tensor,
+ "Input domain should be Tensor");
+ validate(input_data->shape()->rank() == 2, "Input data rank must be 2");
+
+ std::unique_ptr<NodeData> matrix_enc_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ auto input_buf = input_data->as_s32_bufptr();
+ matrix_enc_data = matrix_encode<int32_t>(matrix_enc, input_buf);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ auto input_buf = input_data->as_f32_bufptr();
+ matrix_enc_data = matrix_encode<float>(matrix_enc, input_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(matrix_enc_data != nullptr);
+
+ annot_data(matrix_enc, std::move(matrix_enc_data));
+ annot_domain(matrix_enc, loco::Domain::Matrix);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/MaxPool2D.cpp b/compiler/locomotiv/src/Node/MaxPool2D.cpp
new file mode 100644
index 000000000..5d92f89f5
--- /dev/null
+++ b/compiler/locomotiv/src/Node/MaxPool2D.cpp
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <limits>
+#include <cassert>
+#include <algorithm>
+#include <stdexcept>
+
+namespace
+{
+
+/**
+ * @brief Compute 1D output size based on given 1D arguments.
+ *
+ * @param whole_pad Sum of front and back pad
+ */
+inline uint32_t compute_out_size(uint32_t image_size, uint32_t whole_pad, uint32_t filter_size,
+ uint32_t stride)
+{
+ assert((image_size + whole_pad - filter_size) % stride == 0);
+ return (image_size + whole_pad - filter_size) / stride + 1;
+}
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+template <typename T>
+nncc::core::ADT::tensor::Buffer<T> maxPool2D(const loco::MaxPool2D *maxpool2d,
+ const Buffer<T> *ifm_buf)
+{
+ auto ifm_shape = ifm_buf->shape();
+
+ const uint32_t batches = ifm_shape.dim(0);
+ const uint32_t depth = ifm_shape.dim(3);
+
+ const uint32_t ifm_height = ifm_shape.dim(1);
+ const uint32_t ifm_width = ifm_shape.dim(2);
+
+ const uint32_t window_height = maxpool2d->window()->vertical();
+ const uint32_t window_width = maxpool2d->window()->horizontal();
+
+ const uint32_t stride_height = maxpool2d->stride()->vertical();
+ const uint32_t stride_width = maxpool2d->stride()->horizontal();
+
+ const uint32_t pad_top = maxpool2d->pad()->top();
+ const uint32_t pad_bottom = maxpool2d->pad()->bottom();
+
+ const uint32_t pad_left = maxpool2d->pad()->left();
+ const uint32_t pad_right = maxpool2d->pad()->right();
+
+ const uint32_t output_height =
+ compute_out_size(ifm_height, pad_top + pad_bottom, window_height, stride_height);
+ const uint32_t output_width =
+ compute_out_size(ifm_width, pad_left + pad_right, window_width, stride_width);
+
+ // prepare output buffer
+ Shape output_shape{batches, output_height, output_width, depth};
+ auto output_buf = make_buffer<T, LexicalLayout>(output_shape);
+
+ for (uint32_t batch = 0; batch < batches; ++batch)
+ {
+ for (uint32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (uint32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (uint32_t channel = 0; channel < depth; ++channel)
+ {
+ const int in_x_origin = (out_x * stride_width) - pad_left;
+ const int in_y_origin = (out_y * stride_height) - pad_top;
+
+ // Compute the boundaries of the filter region clamped so as to
+ // ensure that the filter window fits in the input array.
+ const uint32_t filter_x_start = std::max(0, -in_x_origin);
+ const uint32_t filter_x_end = std::min(window_width, ifm_width - in_x_origin);
+
+ const uint32_t filter_y_start = std::max(0, -in_y_origin);
+ const uint32_t filter_y_end = std::min(window_height, ifm_height - in_y_origin);
+
+ T max = std::numeric_limits<T>::lowest();
+
+ for (uint32_t filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+ {
+ for (uint32_t filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
+ {
+ const uint32_t in_x = in_x_origin + filter_x;
+ const uint32_t in_y = in_y_origin + filter_y;
+ max = std::max(max, ifm_buf->at(Index({batch, in_y, in_x, channel})));
+ }
+ }
+
+ output_buf.at(Index({batch, out_y, out_x, channel})) = max;
+ }
+ }
+ }
+ }
+
+ return output_buf;
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MaxPool2D *maxpool2d)
+{
+ auto ifm_data = annot_data(maxpool2d->ifm());
+
+ validate(ifm_data, "Can't find input data of MaxPool2D");
+ validate(ifm_data->shape()->rank() == 4, "IFM rank should be 4");
+ validate(annot_domain(maxpool2d->ifm()) == loco::Domain::Feature,
+ "ifm of MaxPool2D is not Feature");
+
+ std::unique_ptr<NodeData> maxpool2d_data = nullptr;
+
+ switch (ifm_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto ifm_buf = ifm_data->as_f32_bufptr();
+
+ auto maxpool2d_buf = maxPool2D<float>(maxpool2d, ifm_buf);
+
+ maxpool2d_data = make_data(maxpool2d_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(maxpool2d_data != nullptr);
+
+ annot_data(maxpool2d, std::move(maxpool2d_data));
+ annot_domain(maxpool2d, loco::Domain::Feature);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/MaxPool2D.test.cpp b/compiler/locomotiv/src/Node/MaxPool2D.test.cpp
new file mode 100644
index 000000000..9d877a96b
--- /dev/null
+++ b/compiler/locomotiv/src/Node/MaxPool2D.test.cpp
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include "nncc/core/ADT/tensor/IndexEnumerator.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::make_overlay;
+
+void run_test(const float *ifm, const float *expected_ofm, const Shape &ifm_shape,
+ const Shape &ofm_shape, const uint32_t window_v, const uint32_t window_h,
+ const uint32_t stride_v, const uint32_t stride_h, const uint32_t pad_top,
+ const uint32_t pad_bottom, const uint32_t pad_left, const uint32_t pad_right)
+{
+ // Let's make FeatureEncode-MaxPool2D graph
+ auto g = loco::make_graph();
+ auto enc = g->nodes()->create<loco::FeatureEncode>();
+
+ // Fill output data of FeatureEncode from ifm
+ auto enc_buf = make_buffer<float, LexicalLayout>(ifm_shape);
+
+ auto ifm_overlay = make_overlay<float, LexicalLayout>(ifm_shape, const_cast<float *>(ifm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ifm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ enc_buf.at(ind) = ifm_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(enc_buf);
+ locomotiv::annot_data(enc, std::move(enc_data));
+ locomotiv::annot_domain(enc, loco::Domain::Feature);
+
+ // build MaxPool2D
+ auto maxpool2d = g->nodes()->create<loco::MaxPool2D>();
+ maxpool2d->ifm(enc);
+ maxpool2d->window()->vertical(window_v);
+ maxpool2d->window()->horizontal(window_h);
+ maxpool2d->stride()->vertical(stride_v);
+ maxpool2d->stride()->horizontal(stride_h);
+ maxpool2d->pad()->top(pad_top);
+ maxpool2d->pad()->bottom(pad_bottom);
+ maxpool2d->pad()->left(pad_left);
+ maxpool2d->pad()->right(pad_right);
+
+ // run interpreter
+ locomotiv::NodeExecution::get().run(maxpool2d);
+
+ // get result of calculation
+ auto maxpool2d_data = locomotiv::annot_data(maxpool2d);
+
+ // check the result
+ ASSERT_NE(maxpool2d_data, nullptr);
+ ASSERT_TRUE(maxpool2d_data->dtype() == loco::DataType::FLOAT32);
+ ASSERT_TRUE(*(maxpool2d_data->shape()) == ofm_shape);
+
+ auto ofm_overlay =
+ make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ASSERT_FLOAT_EQ(maxpool2d_data->as_f32_bufptr()->at(ind), ofm_overlay.at(ind));
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(maxpool2d), loco::Domain::Feature);
+}
+
+} // namespace
+
+// clang-format off
+/* ifm and ofm are from the code below:
+
+ value = tf.random_normal([1, 3, 3, 1], stddev=1)
+ maxpool = tf.nn.max_pool(value, ksize = [1, 2, 2, 1], strides = [1, 1, 1, 1], padding= 'VALID',
+ data_format="NHWC")
+ with tf.Session() as sess:
+ print(sess.run(maxpool))
+*/
+
+TEST(NodeExecution_MaxPool2D, f32_1x3x3x1_calculation)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ const float ifm[] =
+ {
+ -1.5510627, 0.3653609, 1.9002001,
+ -0.15861237, -0.32944828, 1.2053918,
+ 0.50054574, -0.8533826, 0.131492,
+ };
+
+ const float ofm[] =
+ {
+ 0.3653609, 1.9002001,
+ 0.50054574, 1.2053918
+ };
+
+ run_test(ifm, ofm,
+ Shape{1, 3, 3, 1}, Shape{1, 2, 2, 1}, // input shape , output shape
+ 2, 2, // kernel
+ 1, 1, // stride
+ 0, 0, 0, 0 // padding
+ );
+}
+
+TEST(NodeExecution_MaxPool2D, with_padding)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ const float ifm[] =
+ {
+ 1, 2, 3, 4, 5,
+ 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20,
+ 21, 22, 23, 24, 25
+ };
+
+ const float ofm[] =
+ {
+ 7, 9, 10,
+ 17, 19, 20,
+ 22, 24, 25
+ };
+
+ run_test(ifm, ofm,
+ Shape{1, 5, 5, 1}, Shape{1, 3, 3, 1}, // input shape , output shape
+ 3, 3, // kernel
+ 2, 2, // stride
+ 1, 1, 1, 1 // padding - this mimics SAME padding
+ );
+}
+// clang-format on
diff --git a/compiler/locomotiv/src/Node/Pull.cpp b/compiler/locomotiv/src/Node/Pull.cpp
new file mode 100644
index 000000000..c482d8b04
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Pull.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "UserData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <cassert>
+#include <stdexcept>
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Pull *pull)
+{
+// TODO Remove deprecated code
+#if 0
+ validate(annot_data(pull), "Data for Pull is not ready");
+
+ validate(annot_domain(pull) == loco::Domain::Tensor, "Domain for Pull is not Tensor");
+
+ // DO NOTHING
+#endif
+
+ auto input_data = user_data(pull);
+
+ validate(input_data, "Input not ready");
+ // User always passes a "Tensor"
+
+ std::unique_ptr<NodeData> pull_data = nullptr;
+
+ // Q. Is it possible to use generic one?
+ switch (input_data->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ auto input_bufptr = input_data->as_s32_bufptr();
+ pull_data = make_data(*input_bufptr);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ auto input_bufptr = input_data->as_f32_bufptr();
+ pull_data = make_data(*input_bufptr);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(pull_data != nullptr);
+ annot_data(pull, std::move(pull_data));
+ annot_domain(pull, loco::Domain::Tensor);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Pull.test.cpp b/compiler/locomotiv/src/Node/Pull.test.cpp
new file mode 100644
index 000000000..53e78776b
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Pull.test.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "UserData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_Pull, check_data_ready)
+{
+ // Make graph with Pull node only
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+
+ // Data not ready yet
+ ASSERT_ANY_THROW(locomotiv::NodeExecution::get().run(pull));
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{1});
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::user_data(pull, std::move(pull_data));
+
+// The behavior of Pull is now consistent with that of other nodes.
+// - annot_data and annot_domain is available after evaluating that "pull" node.
+// TODO Remove this
+#if 0
+ // Domain not ready yet
+ ASSERT_ANY_THROW(locomotiv::NodeExecution::get().run(pull));
+
+ // Set Domain
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+#endif
+
+ // Valid run
+ ASSERT_NO_THROW(locomotiv::NodeExecution::get().run(pull));
+}
diff --git a/compiler/locomotiv/src/Node/Push.cpp b/compiler/locomotiv/src/Node/Push.cpp
new file mode 100644
index 000000000..fc5808b15
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Push.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <stdexcept>
+#include <cassert>
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Push *push)
+{
+ auto from_data = annot_data(push->from());
+
+ validate(from_data, "Ingredient not ready");
+ validate(annot_domain(push->from()) == loco::Domain::Tensor, "Ingredient of Push is not tensor");
+
+ std::unique_ptr<NodeData> push_data = nullptr;
+
+ switch (from_data->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ auto from_bufptr = from_data->as_s32_bufptr();
+ push_data = make_data(*from_bufptr);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ auto from_bufptr = from_data->as_f32_bufptr();
+ push_data = make_data(*from_bufptr);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(push_data != nullptr);
+ annot_data(push, std::move(push_data));
+ annot_domain(push, loco::Domain::Tensor);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Push.test.cpp b/compiler/locomotiv/src/Node/Push.test.cpp
new file mode 100644
index 000000000..be8f1e4e9
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Push.test.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_Push, s32)
+{
+ // Make pull-push graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::S32);
+ pull->shape({1});
+ auto push = g->nodes()->create<loco::Push>();
+ push->from(pull);
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<int32_t, LexicalLayout>(Shape{1});
+ pull_buf.at(Index{0}) = 42;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(push);
+
+ auto push_data = locomotiv::annot_data(push);
+ ASSERT_NE(push_data, nullptr);
+ ASSERT_EQ(push_data->dtype(), loco::DataType::S32);
+ ASSERT_EQ(*(push_data->shape()), Shape{1});
+ ASSERT_EQ(push_data->as_s32_bufptr()->at(Index{0}), pull_buf.at(Index{0}));
+
+ ASSERT_EQ(locomotiv::annot_domain(push), loco::Domain::Tensor);
+}
+
+TEST(NodeExecution_Push, f32)
+{
+ // Make pull-push graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({1});
+ auto push = g->nodes()->create<loco::Push>();
+ push->from(pull);
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{1});
+ pull_buf.at(Index{0}) = 3.14f;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(push);
+
+ auto push_data = locomotiv::annot_data(push);
+ ASSERT_NE(push_data, nullptr);
+ ASSERT_EQ(push_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(push_data->shape()), Shape{1});
+ ASSERT_FLOAT_EQ(push_data->as_f32_bufptr()->at(Index{0}), pull_buf.at(Index{0}));
+
+ ASSERT_EQ(locomotiv::annot_domain(push), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/ReLU.cpp b/compiler/locomotiv/src/Node/ReLU.cpp
new file mode 100644
index 000000000..c0f8620e7
--- /dev/null
+++ b/compiler/locomotiv/src/Node/ReLU.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+namespace
+{
+
+inline float relu_ew(float val) { return val > 0.0f ? val : 0.0f; }
+
+struct Func final : public locomotiv::UnaryFunc
+{
+ float apply(float v) const final { return relu_ew(v); }
+};
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::ReLU *relu)
+{
+ Func f;
+
+ eltwise_unary(relu, f);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/ReLU.test.cpp b/compiler/locomotiv/src/Node/ReLU.test.cpp
new file mode 100644
index 000000000..0ddd01d0f
--- /dev/null
+++ b/compiler/locomotiv/src/Node/ReLU.test.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_ReLU, f32)
+{
+ // Make pull-relu graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({2});
+ auto relu = g->nodes()->create<loco::ReLU>();
+ relu->input(pull);
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{2});
+ pull_buf.at(Index{0}) = -10.0f;
+ pull_buf.at(Index{1}) = 10.0f;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(relu);
+
+ auto relu_data = locomotiv::annot_data(relu);
+ ASSERT_NE(relu_data, nullptr);
+ ASSERT_EQ(relu_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(relu_data->shape()), Shape{2});
+ ASSERT_FLOAT_EQ(relu_data->as_f32_bufptr()->at(Index{0}), 0.0f);
+ ASSERT_FLOAT_EQ(relu_data->as_f32_bufptr()->at(Index{1}), 10.0f);
+
+ ASSERT_EQ(locomotiv::annot_domain(relu), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/ReLU6.cpp b/compiler/locomotiv/src/Node/ReLU6.cpp
new file mode 100644
index 000000000..586c015fc
--- /dev/null
+++ b/compiler/locomotiv/src/Node/ReLU6.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+// TODO Remove deprecated code
+#if 0
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+#include <cassert>
+#include <stdexcept>
+#endif
+
+namespace
+{
+
+inline float relu6_ew(float val) { return val < 0.0f ? 0.0f : (val < 6.0f ? val : 6.0f); }
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::ReLU6 *relu6)
+{
+// TODO Remove deprecated code
+#if 0
+ auto input_data = annot_data(relu6->input());
+
+ validate(input_data, "Input not ready");
+ validate(annot_domain(relu6->input()) != loco::Domain::Unknown,
+ "Input domain of ReLU is Unknown");
+
+ std::unique_ptr<NodeData> relu6_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto input_bufptr = input_data->as_f32_bufptr();
+ auto *shape = input_data->shape();
+ auto relu6_buf = make_buffer<float, LexicalLayout>(*shape);
+
+ for (IndexEnumerator e{*shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ relu6_buf.at(index) = relu6_ew(input_bufptr->at(index));
+ }
+
+ relu6_data = make_data(relu6_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(relu6_data != nullptr);
+ annot_data(relu6, std::move(relu6_data));
+ annot_domain(relu6, annot_domain(relu6->input()));
+#endif
+
+ struct Func final : public UnaryFunc
+ {
+ float apply(float v) const final { return relu6_ew(v); }
+ };
+
+ Func f;
+
+ eltwise_unary(relu6, f);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/ReLU6.test.cpp b/compiler/locomotiv/src/Node/ReLU6.test.cpp
new file mode 100644
index 000000000..07f6af23f
--- /dev/null
+++ b/compiler/locomotiv/src/Node/ReLU6.test.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_ReLU6, f32)
+{
+ // Make pull-relu6 graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({2, 2});
+ auto relu6 = g->nodes()->create<loco::ReLU6>();
+ relu6->input(pull);
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{2, 2});
+ pull_buf.at(Index{0, 0}) = -5.0f;
+ pull_buf.at(Index{0, 1}) = 6.0f;
+ pull_buf.at(Index{1, 0}) = 7.0f;
+ pull_buf.at(Index{1, 1}) = -8.0f;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(relu6);
+
+ auto relu6_data = locomotiv::annot_data(relu6);
+ ASSERT_NE(relu6_data, nullptr);
+ ASSERT_EQ(relu6_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(relu6_data->shape()), Shape({2, 2}));
+ ASSERT_FLOAT_EQ(relu6_data->as_f32_bufptr()->at(Index{0, 0}), 0.0f);
+ ASSERT_FLOAT_EQ(relu6_data->as_f32_bufptr()->at(Index{0, 1}), 6.0f);
+ ASSERT_FLOAT_EQ(relu6_data->as_f32_bufptr()->at(Index{1, 0}), 6.0f);
+ ASSERT_FLOAT_EQ(relu6_data->as_f32_bufptr()->at(Index{1, 1}), 0.0f);
+
+ ASSERT_EQ(locomotiv::annot_domain(relu6), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/Reshape.cpp b/compiler/locomotiv/src/Node/Reshape.cpp
new file mode 100644
index 000000000..ac1672024
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Reshape.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::num_elements;
+
+#include <cassert>
+#include <stdexcept>
+#include <cstring>
+#include <vector>
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Reshape<loco::ReshapeType::Fixed> *reshape)
+{
+ auto input_data = annot_data(reshape->input());
+
+ validate(input_data, "Input not ready");
+ validate(annot_domain(reshape->input()) == loco::Domain::Tensor,
+ "Input domain of Reshape is not Tensor");
+
+ std::unique_ptr<NodeData> reshape_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto input_bufptr = input_data->as_f32_bufptr();
+ auto *input_shape = input_data->shape();
+
+ using Shape = nncc::core::ADT::tensor::Shape;
+ std::unique_ptr<Shape> output_shape(new Shape());
+
+ output_shape->resize(reshape->rank());
+ for (uint32_t axis = 0; axis < output_shape->rank(); ++axis)
+ {
+ output_shape->dim(axis) = reshape->dim(axis).value();
+ }
+
+ auto reshape_bufptr = make_buffer<float, LexicalLayout>(*output_shape);
+
+ float *input_ptr = const_cast<float *>(input_bufptr->base());
+ uint64_t input_len = num_elements(*input_shape) * sizeof(float);
+
+ float *output_ptr = reshape_bufptr.base();
+
+ assert(input_len == num_elements(*output_shape) * sizeof(float));
+ memcpy(output_ptr, input_ptr, input_len);
+
+ reshape_data = make_data(reshape_bufptr);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(reshape_data != nullptr);
+ annot_data(reshape, std::move(reshape_data));
+ annot_domain(reshape, annot_domain(reshape->input()));
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Reshape.test.cpp b/compiler/locomotiv/src/Node/Reshape.test.cpp
new file mode 100644
index 000000000..8e54a16df
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Reshape.test.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_Reshape, f32)
+{
+ // Make pull-reshape graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({4});
+ auto reshape = g->nodes()->create<loco::Reshape<loco::ReshapeType::Fixed>>();
+ reshape->input(pull);
+ reshape->shape({2, 2});
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{4});
+ pull_buf.at(Index{0}) = 0.0f;
+ pull_buf.at(Index{1}) = 1.1f;
+ pull_buf.at(Index{2}) = 2.2f;
+ pull_buf.at(Index{3}) = 3.3f;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(reshape);
+
+ auto reshape_data = locomotiv::annot_data(reshape);
+ ASSERT_NE(reshape_data, nullptr);
+ ASSERT_EQ(reshape_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(reshape_data->shape()), (Shape{2, 2}));
+ ASSERT_FLOAT_EQ(reshape_data->as_f32_bufptr()->at(Index{0, 0}), 0.0f);
+ ASSERT_FLOAT_EQ(reshape_data->as_f32_bufptr()->at(Index{0, 1}), 1.1f);
+ ASSERT_FLOAT_EQ(reshape_data->as_f32_bufptr()->at(Index{1, 0}), 2.2f);
+ ASSERT_FLOAT_EQ(reshape_data->as_f32_bufptr()->at(Index{1, 1}), 3.3f);
+
+ ASSERT_EQ(locomotiv::annot_domain(reshape), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/Softmax.cpp b/compiler/locomotiv/src/Node/Softmax.cpp
new file mode 100644
index 000000000..352598b27
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Softmax.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::Shape;
+
+#include <cassert>
+#include <stdexcept>
+#include <cmath>
+
+namespace
+{
+
+Index reduce_index(const Index &index, uint32_t axis)
+{
+ Index r_index;
+
+ r_index.resize(index.rank());
+ for (uint32_t i = 0; i < index.rank(); ++i)
+ r_index.at(i) = index.at(i);
+ r_index.at(axis) = 0;
+
+ return r_index;
+}
+
+Shape reduce_shape(const Shape &shape, uint32_t axis)
+{
+ Shape r_shape;
+
+ r_shape.resize(shape.rank());
+ for (uint32_t i = 0; i < shape.rank(); ++i)
+ r_shape.dim(i) = shape.dim(i);
+ r_shape.dim(axis) = 1;
+
+ return r_shape;
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorSoftmax *softmax)
+{
+ auto input_data = annot_data(softmax->input());
+
+ validate(input_data, "Input not ready");
+ validate(annot_domain(softmax->input()) == loco::Domain::Tensor,
+ "Input domain of TensorSoftmax is not Tensor");
+
+ std::unique_ptr<NodeData> softmax_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto axis = softmax->axis();
+
+ auto *input_shape = input_data->shape();
+ auto input_bufptr = input_data->as_f32_bufptr();
+ auto softmax_buf = make_buffer<float, LexicalLayout>(*input_data->shape());
+
+ auto reduce_sum_shape = reduce_shape(*input_shape, axis);
+ auto reduce_sum_bufptr = make_buffer<float, LexicalLayout>(reduce_sum_shape);
+
+ for (IndexEnumerator e{*input_shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ const auto r_index = reduce_index(index, axis);
+
+ reduce_sum_bufptr.at(r_index) += exp(input_bufptr->at(index));
+ }
+
+ for (IndexEnumerator e{*input_shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ const auto r_index = reduce_index(index, axis);
+
+ softmax_buf.at(index) = exp(input_bufptr->at(index)) / reduce_sum_bufptr.at(r_index);
+ }
+
+ softmax_data = make_data(softmax_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(softmax_data != nullptr);
+ annot_data(softmax, std::move(softmax_data));
+ annot_domain(softmax, annot_domain(softmax->input()));
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Softmax.test.cpp b/compiler/locomotiv/src/Node/Softmax.test.cpp
new file mode 100644
index 000000000..21d240275
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Softmax.test.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_Softmax, f32)
+{
+ // Make pull-softmax graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({2, 2});
+ auto softmax = g->nodes()->create<loco::TensorSoftmax>();
+ softmax->input(pull);
+ softmax->axis(1);
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>({2, 2});
+ pull_buf.at(Index{0, 0}) = 1.1f;
+ pull_buf.at(Index{0, 1}) = 1.1f;
+ pull_buf.at(Index{1, 0}) = 3.3f;
+ pull_buf.at(Index{1, 1}) = 3.3f;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(softmax);
+
+ auto kShape = Shape{2, 2};
+ auto softmax_data = locomotiv::annot_data(softmax);
+ ASSERT_NE(softmax_data, nullptr);
+ ASSERT_EQ(softmax_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(softmax_data->shape()), kShape);
+ ASSERT_FLOAT_EQ(softmax_data->as_f32_bufptr()->at(Index{0, 0}), 0.5f);
+ ASSERT_FLOAT_EQ(softmax_data->as_f32_bufptr()->at(Index{0, 1}), 0.5f);
+ ASSERT_FLOAT_EQ(softmax_data->as_f32_bufptr()->at(Index{1, 0}), 0.5f);
+ ASSERT_FLOAT_EQ(softmax_data->as_f32_bufptr()->at(Index{1, 1}), 0.5f);
+
+ ASSERT_EQ(locomotiv::annot_domain(softmax), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/Tanh.cpp b/compiler/locomotiv/src/Node/Tanh.cpp
new file mode 100644
index 000000000..78d329e7c
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Tanh.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include <cmath>
+
+namespace
+{
+
+struct Func final : public locomotiv::UnaryFunc
+{
+ float apply(float v) const final { return std::tanh(v); }
+};
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Tanh *tanh)
+{
+ Func f;
+
+ eltwise_unary(tanh, f);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Tanh.test.cpp b/compiler/locomotiv/src/Node/Tanh.test.cpp
new file mode 100644
index 000000000..78c3a13ba
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Tanh.test.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_Tanh, f32)
+{
+ // Make pull-Tanh graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({3});
+ auto tanh = g->nodes()->create<loco::Tanh>();
+ tanh->input(pull);
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{3});
+ pull_buf.at(Index{0}) = 0.0f;
+ pull_buf.at(Index{1}) = 1.0f;
+ pull_buf.at(Index{2}) = -1.0f;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(tanh);
+
+ auto tanh_data = locomotiv::annot_data(tanh);
+ ASSERT_NE(tanh_data, nullptr);
+ ASSERT_EQ(tanh_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(tanh_data->shape()), Shape{3});
+ ASSERT_FLOAT_EQ(tanh_data->as_f32_bufptr()->at(Index{0}), 0.0f);
+ ASSERT_FLOAT_EQ(tanh_data->as_f32_bufptr()->at(Index{1}), 0.761594f);
+ ASSERT_FLOAT_EQ(tanh_data->as_f32_bufptr()->at(Index{2}), -0.761594f);
+
+ ASSERT_EQ(locomotiv::annot_domain(tanh), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/TensorBroadcast.cpp b/compiler/locomotiv/src/Node/TensorBroadcast.cpp
new file mode 100644
index 000000000..010ca6821
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TensorBroadcast.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::Shape;
+
+#include <cassert>
+#include <stdexcept>
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorBroadcast *tensor_broadcast)
+{
+ auto input_data = annot_data(tensor_broadcast->input());
+
+ // Calculate output shape
+ Shape input_shape = *(input_data->shape());
+
+ // TODO Reuse "ShapeInferenceService"
+ Shape output_shape;
+
+ output_shape.resize(input_shape.rank());
+ for (uint32_t axis = 0; axis < input_shape.rank(); ++axis)
+ {
+ if (tensor_broadcast->mapping()->defined(axis))
+ {
+ assert(input_shape.dim(axis) == 1); // Required by TensorBroadcast definition
+ output_shape.dim(axis) = tensor_broadcast->mapping()->dim(axis).value();
+ }
+ else
+ {
+ output_shape.dim(axis) = input_shape.dim(axis);
+ }
+ }
+
+ assert(input_shape.rank() == output_shape.rank());
+
+ uint32_t const rank = input_shape.rank();
+
+ std::unique_ptr<NodeData> output_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ // TODO Use type-generic implementation!
+ case loco::DataType::FLOAT32:
+ {
+ auto input_bufptr = input_data->as_f32_bufptr();
+ auto output_buf = make_buffer<float, LexicalLayout>(output_shape);
+
+ for (IndexEnumerator e{output_shape}; e.valid(); e.advance())
+ {
+ auto input_index = e.current();
+ const auto &output_index = e.current();
+
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ if (tensor_broadcast->mapping()->defined(axis))
+ {
+ input_index.at(axis) = 0;
+ }
+ }
+
+ output_buf.at(output_index) = input_bufptr->at(input_index);
+ }
+
+ output_data = make_data(output_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("Not yet supported");
+ }
+
+ assert(output_data != nullptr);
+ annot_data(tensor_broadcast, std::move(output_data));
+ annot_domain(tensor_broadcast, loco::Domain::Tensor);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorBroadcast.test.cpp b/compiler/locomotiv/src/Node/TensorBroadcast.test.cpp
new file mode 100644
index 000000000..e8347d737
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TensorBroadcast.test.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_TensorBroadcast, f32)
+{
+ // Create a sample graph w/ TensorBroadcast
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({1, 1});
+ auto broadcast = g->nodes()->create<loco::TensorBroadcast>();
+ broadcast->input(pull);
+ broadcast->mapping()->dim(0) = 2;
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{1, 1});
+ pull_buf.at(Index{0, 0}) = -1.0f;
+
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(broadcast);
+
+ auto broadcast_data = locomotiv::annot_data(broadcast);
+ ASSERT_NE(broadcast_data, nullptr);
+ ASSERT_EQ(broadcast_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ((*(broadcast_data->shape())), (Shape{2, 1}));
+ ASSERT_FLOAT_EQ(broadcast_data->as_f32_bufptr()->at(Index{0, 0}), -1.0f);
+ ASSERT_FLOAT_EQ(broadcast_data->as_f32_bufptr()->at(Index{1, 0}), -1.0f);
+
+ ASSERT_EQ(locomotiv::annot_domain(broadcast), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/TensorConcat.cpp b/compiler/locomotiv/src/Node/TensorConcat.cpp
new file mode 100644
index 000000000..5097e55c6
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TensorConcat.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::Shape;
+
+#include <cassert>
+#include <stdexcept>
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorConcat *tensor_concat)
+{
+ auto lhs_data = annot_data(tensor_concat->lhs());
+ auto rhs_data = annot_data(tensor_concat->rhs());
+ auto axis = tensor_concat->axis();
+
+ validate(lhs_data && rhs_data, "Ingredient not ready");
+ validate(lhs_data->dtype() == rhs_data->dtype(), "lhs and rhs of Concat should have same dtype");
+
+ validate(annot_domain(tensor_concat->lhs()) == loco::Domain::Tensor &&
+ annot_domain(tensor_concat->rhs()) == loco::Domain::Tensor,
+ "Some ingredients of TensorConcat is not Tensor");
+
+ // Calculate output shape
+ Shape lhs_shape = *lhs_data->shape();
+ Shape rhs_shape = *rhs_data->shape();
+ Shape concat_shape;
+
+ assert(lhs_shape.rank() == rhs_shape.rank());
+ concat_shape.resize(lhs_shape.rank());
+ for (uint32_t index = 0; index < lhs_shape.rank(); ++index)
+ {
+ if (index == axis)
+ concat_shape.dim(index) = lhs_shape.dim(index) + rhs_shape.dim(index);
+ else
+ {
+ assert(lhs_shape.dim(index) == rhs_shape.dim(index));
+ concat_shape.dim(index) = lhs_shape.dim(index);
+ }
+ }
+ auto left_dim_size = lhs_shape.dim(axis);
+
+ // Copy data from two inputs LHS and RHS to Concat
+ std::unique_ptr<NodeData> concat_data = nullptr;
+ switch (lhs_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto lhs_bufptr = lhs_data->as_f32_bufptr();
+ auto rhs_bufptr = rhs_data->as_f32_bufptr();
+ auto concat_buf = make_buffer<float, LexicalLayout>(concat_shape);
+
+ for (IndexEnumerator e{concat_shape}; e.valid(); e.advance())
+ {
+ const auto &e_index = e.current();
+
+ if (e_index.at(axis) < left_dim_size)
+ {
+ // Left index is same as output index
+ concat_buf.at(e_index) = lhs_bufptr->at(e_index);
+ }
+ else
+ {
+ // Adjust right index to valid range
+ Index r_index = e_index;
+ r_index.at(axis) -= left_dim_size;
+ concat_buf.at(e_index) = rhs_bufptr->at(r_index);
+ }
+ }
+
+ concat_data = make_data(concat_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(concat_data != nullptr);
+ annot_data(tensor_concat, std::move(concat_data));
+ annot_domain(tensor_concat, loco::Domain::Tensor);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorConcat.test.cpp b/compiler/locomotiv/src/Node/TensorConcat.test.cpp
new file mode 100644
index 000000000..d71b51524
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TensorConcat.test.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_TensorConcat, f32)
+{
+ // Make (pull, pull)-concat graph
+ auto g = loco::make_graph();
+ auto pull_l = g->nodes()->create<loco::Pull>();
+ pull_l->dtype(loco::DataType::FLOAT32);
+ pull_l->shape({1, 2});
+ auto pull_r = g->nodes()->create<loco::Pull>();
+ pull_r->dtype(loco::DataType::FLOAT32);
+ pull_r->shape({1, 2});
+ auto tconcat = g->nodes()->create<loco::TensorConcat>();
+ tconcat->lhs(pull_l);
+ tconcat->rhs(pull_r);
+ tconcat->axis(0);
+
+ // Make and assign data to pull node
+ auto pull_l_buf = make_buffer<float, LexicalLayout>(Shape{1, 2});
+ pull_l_buf.at(Index{0, 0}) = -1.0f;
+ pull_l_buf.at(Index{0, 1}) = -2.0f;
+ auto pull_r_buf = make_buffer<float, LexicalLayout>(Shape{1, 2});
+ pull_r_buf.at(Index{0, 0}) = 3.0f;
+ pull_r_buf.at(Index{0, 1}) = 4.0f;
+
+ auto pull_l_data = locomotiv::make_data(pull_l_buf);
+ locomotiv::annot_data(pull_l, std::move(pull_l_data));
+ locomotiv::annot_domain(pull_l, loco::Domain::Tensor);
+ auto pull_r_data = locomotiv::make_data(pull_r_buf);
+ locomotiv::annot_data(pull_r, std::move(pull_r_data));
+ locomotiv::annot_domain(pull_r, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(tconcat);
+
+ auto concat_data = locomotiv::annot_data(tconcat);
+ ASSERT_NE(concat_data, nullptr);
+ ASSERT_EQ(concat_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ((*(concat_data->shape())), (Shape{2, 2}));
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{0, 0}), -1.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{0, 1}), -2.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{1, 0}), 3.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{1, 1}), 4.0f);
+
+ ASSERT_EQ(locomotiv::annot_domain(tconcat), loco::Domain::Tensor);
+}
+
+TEST(NodeExecution_TensorConcat, f32_2)
+{
+ // Make (pull, pull)-concat graph
+ auto g = loco::make_graph();
+ auto pull_l = g->nodes()->create<loco::Pull>();
+ pull_l->dtype(loco::DataType::FLOAT32);
+ pull_l->shape({1, 2});
+ auto pull_r = g->nodes()->create<loco::Pull>();
+ pull_r->dtype(loco::DataType::FLOAT32);
+ pull_r->shape({3, 2});
+ auto tconcat = g->nodes()->create<loco::TensorConcat>();
+ tconcat->lhs(pull_l);
+ tconcat->rhs(pull_r);
+ tconcat->axis(0);
+
+ // Make and assign data to pull node
+ auto pull_l_buf = make_buffer<float, LexicalLayout>(Shape{1, 2});
+ pull_l_buf.at(Index{0, 0}) = -1.0f;
+ pull_l_buf.at(Index{0, 1}) = -2.0f;
+ auto pull_r_buf = make_buffer<float, LexicalLayout>(Shape{3, 2});
+ pull_r_buf.at(Index{0, 0}) = 3.0f;
+ pull_r_buf.at(Index{0, 1}) = 4.0f;
+ pull_r_buf.at(Index{1, 0}) = -3.0f;
+ pull_r_buf.at(Index{1, 1}) = -4.0f;
+ pull_r_buf.at(Index{2, 0}) = 5.0f;
+ pull_r_buf.at(Index{2, 1}) = 6.0f;
+
+ auto pull_l_data = locomotiv::make_data(pull_l_buf);
+ locomotiv::annot_data(pull_l, std::move(pull_l_data));
+ locomotiv::annot_domain(pull_l, loco::Domain::Tensor);
+ auto pull_r_data = locomotiv::make_data(pull_r_buf);
+ locomotiv::annot_data(pull_r, std::move(pull_r_data));
+ locomotiv::annot_domain(pull_r, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(tconcat);
+
+ auto concat_data = locomotiv::annot_data(tconcat);
+ ASSERT_NE(concat_data, nullptr);
+ ASSERT_EQ(concat_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ((*(concat_data->shape())), (Shape{4, 2}));
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{0, 0}), -1.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{0, 1}), -2.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{1, 0}), 3.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{1, 1}), 4.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{2, 0}), -3.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{2, 1}), -4.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{3, 0}), 5.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{3, 1}), 6.0f);
+
+ ASSERT_EQ(locomotiv::annot_domain(tconcat), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/TensorConstantPad.cpp b/compiler/locomotiv/src/Node/TensorConstantPad.cpp
new file mode 100644
index 000000000..989afaf94
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TensorConstantPad.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <cassert>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorConstantPad *pad)
+{
+ auto input_data = annot_data(pad->input());
+ auto input_domain = annot_domain(pad->input());
+ validate(input_data, "Input not ready");
+ validate(input_domain == loco::Domain::Tensor, "Input domain of TensorConstantPad is not Tensor");
+
+ auto input_shape = input_data->shape();
+ const uint32_t input_rank = input_shape->rank();
+
+ auto padding = pad->padding();
+ validate(input_rank == padding->rank(), "input and padding should have same rank");
+
+ auto constant_node = pad->constant();
+ auto constant_data = annot_data(constant_node);
+ validate(constant_data->dtype() == input_data->dtype(), "constant and input have same data type");
+ validate(constant_data->shape()->rank() == 1 && constant_data->shape()->dim(0) == 1,
+ "constant should have one rank with one dimension at zero axis");
+
+ std::unique_ptr<NodeData> pad_data = nullptr;
+ Index base_index;
+ base_index.resize(input_rank);
+
+ // Tensor is padded by relocating its base.
+ // padded output index = input index + base index
+ for (uint32_t axis = 0; axis < padding->rank(); axis++)
+ {
+ base_index.at(axis) = padding->front(axis);
+ }
+
+ // calculate output shape
+ Shape output_shape;
+ output_shape.resize(input_rank);
+ for (uint32_t i = 0; i < input_rank; i++)
+ {
+ output_shape.dim(i) = input_shape->dim(i) + padding->front(i) + padding->back(i);
+ }
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto input_buf = input_data->as_f32_bufptr();
+ auto constant_data_buf = constant_data->as_f32_bufptr();
+ const auto constant_value = constant_data_buf->at(Index{0});
+
+ auto output_buf = make_buffer<float, LexicalLayout>(output_shape);
+
+ for (IndexEnumerator ie{*input_shape}, oe{output_shape}; oe.valid(); oe.advance())
+ {
+ auto input_index = ie.current();
+ auto output_index = oe.current();
+
+ if ((input_index + base_index) == output_index)
+ {
+ output_buf.at(output_index) = input_buf->at(input_index);
+ ie.advance();
+ }
+ else
+ {
+ output_buf.at(output_index) = constant_value;
+ }
+ }
+
+ pad_data = make_data(output_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(pad_data != nullptr);
+ annot_data(pad, std::move(pad_data));
+ annot_domain(pad, annot_domain(pad->input()));
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorConstantPad.test.cpp b/compiler/locomotiv/src/Node/TensorConstantPad.test.cpp
new file mode 100644
index 000000000..0f60c5f85
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TensorConstantPad.test.cpp
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::Shape;
+
+TEST(NodeExecution_Pad, tensor_constant_pad_4_dim)
+{
+ auto g = loco::make_graph();
+
+ auto inputTensor = g->nodes()->create<loco::Pull>();
+ inputTensor->dtype(loco::DataType::FLOAT32);
+ inputTensor->shape({1, 2, 2, 1});
+ auto inputTensor_buf = make_buffer<float, LexicalLayout>(Shape{1, 2, 2, 1});
+ inputTensor_buf.at(Index{0, 0, 0, 0}) = 1.0f;
+ inputTensor_buf.at(Index{0, 0, 1, 0}) = 2.0f;
+ inputTensor_buf.at(Index{0, 1, 0, 0}) = 3.0f;
+ inputTensor_buf.at(Index{0, 1, 1, 0}) = 4.0f;
+ auto inputTensor_data = locomotiv::make_data(inputTensor_buf);
+ locomotiv::annot_data(inputTensor, std::move(inputTensor_data));
+ locomotiv::annot_domain(inputTensor, loco::Domain::Tensor);
+
+ auto constant = g->nodes()->create<loco::ConstGen>();
+ constant->dtype(loco::DataType::FLOAT32);
+ constant->shape({1});
+ auto constant_buf = make_buffer<float, LexicalLayout>(Shape{1});
+ constant_buf.at(Index{0}) = 0.0f;
+ auto constant_data = locomotiv::make_data(constant_buf);
+ locomotiv::annot_data(constant, std::move(constant_data));
+ locomotiv::annot_domain(constant, loco::Domain::Tensor);
+
+ auto pad = g->nodes()->create<loco::TensorConstantPad>();
+ pad->input(inputTensor);
+ pad->constant(constant);
+
+ auto padding = pad->padding();
+ padding->rank(4);
+ padding->front(0) = 0;
+ padding->back(0) = 0;
+ padding->front(1) = 3;
+ padding->back(1) = 1;
+ padding->front(2) = 1;
+ padding->back(2) = 1;
+ padding->front(3) = 0;
+ padding->back(3) = 0;
+
+ locomotiv::NodeExecution::get().run(pad);
+
+ auto pad_data = locomotiv::annot_data(pad);
+ ASSERT_NE(pad_data, nullptr);
+ ASSERT_EQ(pad_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(pad_data->shape()), Shape({1, 6, 4, 1}));
+
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{0, 3, 1, 0}), 1.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{0, 3, 2, 0}), 2.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{0, 4, 1, 0}), 3.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{0, 4, 2, 0}), 4.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{0, 0, 0, 0}), 0.0f);
+
+ ASSERT_EQ(locomotiv::annot_domain(pad), loco::Domain::Tensor);
+}
+
+TEST(NodeExecution_Pad, tensor_constant_pad_1_dim)
+{
+ auto g = loco::make_graph();
+
+ auto inputTensor = g->nodes()->create<loco::Pull>();
+ inputTensor->dtype(loco::DataType::FLOAT32);
+ inputTensor->shape({3});
+ auto inputTensor_buf = make_buffer<float, LexicalLayout>(Shape{3});
+ inputTensor_buf.at(Index{0}) = 1.0f;
+ inputTensor_buf.at(Index{1}) = 5.0f;
+ inputTensor_buf.at(Index{2}) = 3.0f;
+ auto inputTensor_data = locomotiv::make_data(inputTensor_buf);
+ locomotiv::annot_data(inputTensor, std::move(inputTensor_data));
+ locomotiv::annot_domain(inputTensor, loco::Domain::Tensor);
+
+ auto constant = g->nodes()->create<loco::ConstGen>();
+ constant->dtype(loco::DataType::FLOAT32);
+ constant->shape({1});
+ auto constant_buf = make_buffer<float, LexicalLayout>(Shape{1});
+ constant_buf.at(Index{0}) = 0.0f;
+ auto constant_data = locomotiv::make_data(constant_buf);
+ locomotiv::annot_data(constant, std::move(constant_data));
+ locomotiv::annot_domain(constant, loco::Domain::Tensor);
+
+ auto pad = g->nodes()->create<loco::TensorConstantPad>();
+ pad->input(inputTensor);
+ pad->constant(constant);
+ auto padding = pad->padding();
+ padding->rank(1);
+ padding->front(0) = 2;
+ padding->back(0) = 1;
+
+ locomotiv::NodeExecution::get().run(pad);
+
+ auto pad_data = locomotiv::annot_data(pad);
+ ASSERT_NE(pad_data, nullptr);
+ ASSERT_EQ(pad_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(pad_data->shape()), Shape({6}));
+
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{0}), 0.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1}), 0.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{2}), 1.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{3}), 5.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{4}), 3.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{5}), 0.0f);
+
+ ASSERT_EQ(locomotiv::annot_domain(pad), loco::Domain::Tensor);
+}
+
+TEST(NodeExecution_Pad, tensor_constant_pad_6_dim)
+{
+ auto g = loco::make_graph();
+
+ auto inputTensor = g->nodes()->create<loco::Pull>();
+ inputTensor->dtype(loco::DataType::FLOAT32);
+ inputTensor->shape({2, 1, 3, 2, 1, 2});
+ auto inputTensor_buf = make_buffer<float, LexicalLayout>(Shape{2, 1, 3, 2, 1, 2});
+ int a, b, c, d, e, f;
+ float dummy = 1.0f;
+ for (uint32_t a = 0; a < 2; a++)
+ {
+ for (uint32_t b = 0; b < 1; b++)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ for (uint32_t d = 0; d < 2; d++)
+ {
+ for (uint32_t e = 0; e < 1; e++)
+ {
+ for (uint32_t f = 0; f < 2; f++)
+ {
+ inputTensor_buf.at(Index{a, b, c, d, e, f}) = dummy++;
+ }
+ }
+ }
+ }
+ }
+ }
+ auto inputTensor_data = locomotiv::make_data(inputTensor_buf);
+ locomotiv::annot_data(inputTensor, std::move(inputTensor_data));
+ locomotiv::annot_domain(inputTensor, loco::Domain::Tensor);
+
+ auto constant = g->nodes()->create<loco::ConstGen>();
+ constant->dtype(loco::DataType::FLOAT32);
+ constant->shape({1});
+ auto constant_buf = make_buffer<float, LexicalLayout>(Shape{1});
+ constant_buf.at(Index{0}) = 0.0f;
+ auto constant_data = locomotiv::make_data(constant_buf);
+ locomotiv::annot_data(constant, std::move(constant_data));
+ locomotiv::annot_domain(constant, loco::Domain::Tensor);
+
+ auto pad = g->nodes()->create<loco::TensorConstantPad>();
+ pad->input(inputTensor);
+ pad->constant(constant);
+ auto padding = pad->padding();
+
+ padding->rank(6);
+ padding->front(0) = 1;
+ padding->back(0) = 1;
+ padding->front(1) = 0;
+ padding->back(1) = 0;
+ padding->front(2) = 1;
+ padding->back(2) = 2;
+ padding->front(3) = 2;
+ padding->back(3) = 1;
+ padding->front(4) = 0;
+ padding->back(4) = 0;
+ padding->front(5) = 1;
+ padding->back(5) = 2;
+
+ locomotiv::NodeExecution::get().run(pad);
+
+ auto pad_data = locomotiv::annot_data(pad);
+ ASSERT_NE(pad_data, nullptr);
+ ASSERT_EQ(pad_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(pad_data->shape()), Shape({4, 1, 6, 5, 1, 5}));
+
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 1, 2, 0, 1}), 1.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 1, 2, 0, 2}), 2.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 1, 3, 0, 1}), 3.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 1, 3, 0, 2}), 4.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 2, 2, 0, 1}), 5.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 2, 2, 0, 2}), 6.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 2, 3, 0, 1}), 7.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 2, 3, 0, 2}), 8.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 3, 2, 0, 1}), 9.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 3, 2, 0, 2}), 10.0f);
+
+ ASSERT_EQ(locomotiv::annot_domain(pad), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/TensorReduce.cpp b/compiler/locomotiv/src/Node/TensorReduce.cpp
new file mode 100644
index 000000000..fae7a75c5
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TensorReduce.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Buffer;
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+Index reduced_index(const Index &index, const loco::TensorAxisSet &axes)
+{
+ Index r_index;
+
+ r_index.resize(index.rank());
+ for (uint32_t i = 0; i < index.rank(); ++i)
+ r_index.at(i) = (axes.defined(i)) ? 0 : index.at(i);
+
+ return r_index;
+}
+
+Shape reduced_shape(const Shape &shape, const loco::TensorAxisSet &axes)
+{
+ Shape r_shape;
+
+ r_shape.resize(shape.rank());
+ for (uint32_t i = 0; i < shape.rank(); ++i)
+ r_shape.dim(i) = (axes.defined(i)) ? 1 : shape.dim(i);
+
+ return r_shape;
+}
+
+} // namespace
+
+namespace
+{
+
+template <typename T, loco::ReduceFunc F> struct ReduceFunction
+{
+ static void apply(Buffer<T> &lhs, const Buffer<T> &rhs, const loco::TensorAxisSet &axes)
+ {
+ throw std::runtime_error("Not supported ReduceFunc type");
+ }
+};
+
+template <typename T> struct ReduceFunction<T, loco::ReduceFunc::Mean>
+{
+ static void apply(Buffer<T> &lhs, const Buffer<T> &rhs, const loco::TensorAxisSet &axes)
+ {
+ for (IndexEnumerator e{rhs.shape()}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ const auto r_index = reduced_index(index, axes);
+
+ lhs.at(r_index) += rhs.at(index);
+ }
+
+ uint32_t r_cnt = 1;
+ for (uint32_t i = 0; i < rhs.shape().rank(); ++i)
+ if (axes.defined(i))
+ r_cnt *= rhs.shape().dim(i);
+
+ for (IndexEnumerator e{lhs.shape()}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ lhs.at(index) /= static_cast<T>(r_cnt);
+ }
+ }
+};
+
+template <typename T>
+void apply(Buffer<T> &lhs, const Buffer<T> &rhs, const loco::TensorReduce &node)
+{
+ switch (node.func())
+ {
+ case loco::ReduceFunc::Mean:
+ ReduceFunction<T, loco::ReduceFunc::Mean>::apply(lhs, rhs, *node.axes());
+ break;
+
+ // TODO Support more ReduceFunc type
+ default:
+ break;
+ }
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorReduce *node)
+{
+ auto input_data = annot_data(node->input());
+ auto input_shape = input_data->shape();
+
+ validate(input_data, "Input not ready");
+ validate(annot_domain(node->input()) == loco::Domain::Tensor,
+ "Input domain of TensorReduce is not Tensor");
+
+ std::unique_ptr<NodeData> reduce_data = nullptr;
+ Shape r_shape = reduced_shape(*input_shape, *node->axes());
+ switch (input_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto input_bufptr = input_data->as_f32_bufptr();
+ auto reduce_buf = make_buffer<float, LexicalLayout>(r_shape);
+
+ apply(reduce_buf, *input_bufptr, *node);
+
+ reduce_data = make_data(reduce_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(reduce_data != nullptr);
+ annot_data(node, std::move(reduce_data));
+ annot_domain(node, annot_domain(node->input()));
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorReduce.test.cpp b/compiler/locomotiv/src/Node/TensorReduce.test.cpp
new file mode 100644
index 000000000..68398cacd
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TensorReduce.test.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_Fixed_Reduce_Mean, f32_0)
+{
+ // Make pull-TensorReduce(Mean) graph
+ auto g = loco::make_graph();
+ auto pull_input = g->nodes()->create<loco::Pull>();
+ pull_input->dtype(loco::DataType::FLOAT32);
+ pull_input->shape({1, 2, 2});
+ auto reduce_node = g->nodes()->create<loco::TensorReduce>();
+ reduce_node->input(pull_input);
+ reduce_node->axes()->insert(0);
+ reduce_node->axes()->insert(1);
+ reduce_node->func(loco::ReduceFunc::Mean);
+
+ // Make and assign data to pull node
+ auto pull_input_buf = make_buffer<float, LexicalLayout>({1, 2, 2});
+ pull_input_buf.at(Index{0, 0, 0}) = 1.1f;
+ pull_input_buf.at(Index{0, 0, 1}) = 2.2f;
+ pull_input_buf.at(Index{0, 1, 0}) = 5.5f;
+ pull_input_buf.at(Index{0, 1, 1}) = 6.6f;
+ auto pull_input_data = locomotiv::make_data(pull_input_buf);
+ locomotiv::annot_data(pull_input, std::move(pull_input_data));
+ locomotiv::annot_domain(pull_input, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(reduce_node);
+
+ auto kShape = Shape{1, 1, 2};
+ auto reduce_data = locomotiv::annot_data(reduce_node);
+ ASSERT_NE(reduce_data, nullptr);
+ ASSERT_EQ(reduce_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(reduce_data->shape()), kShape);
+ ASSERT_FLOAT_EQ(reduce_data->as_f32_bufptr()->at(Index{0, 0, 0}), 3.3f);
+ ASSERT_FLOAT_EQ(reduce_data->as_f32_bufptr()->at(Index{0, 0, 1}), 4.4f);
+
+ ASSERT_EQ(locomotiv::annot_domain(reduce_node), loco::Domain::Tensor);
+}
+
+TEST(NodeExecution_Fixed_Reduce_Mean, f32_1)
+{
+ // Make pull-TensorReduce(Mean) graph
+ auto g = loco::make_graph();
+ auto pull_input = g->nodes()->create<loco::Pull>();
+ pull_input->dtype(loco::DataType::FLOAT32);
+ pull_input->shape({1, 2, 2});
+ auto reduce_node = g->nodes()->create<loco::TensorReduce>();
+ reduce_node->input(pull_input);
+ reduce_node->axes()->insert(1);
+ reduce_node->axes()->insert(2);
+ reduce_node->func(loco::ReduceFunc::Mean);
+
+ // Make and assign data to pull node
+ auto pull_input_buf = make_buffer<float, LexicalLayout>({1, 2, 2});
+ pull_input_buf.at(Index{0, 0, 0}) = 1.1f;
+ pull_input_buf.at(Index{0, 0, 1}) = 2.2f;
+ pull_input_buf.at(Index{0, 1, 0}) = 5.5f;
+ pull_input_buf.at(Index{0, 1, 1}) = 6.6f;
+ auto pull_input_data = locomotiv::make_data(pull_input_buf);
+ locomotiv::annot_data(pull_input, std::move(pull_input_data));
+ locomotiv::annot_domain(pull_input, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(reduce_node);
+
+ auto kShape = Shape{1, 1, 1};
+ auto reduce_data = locomotiv::annot_data(reduce_node);
+ ASSERT_NE(reduce_data, nullptr);
+ ASSERT_EQ(reduce_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(reduce_data->shape()), kShape);
+ ASSERT_FLOAT_EQ(reduce_data->as_f32_bufptr()->at(Index{0, 0, 0}), 3.85f);
+
+ ASSERT_EQ(locomotiv::annot_domain(reduce_node), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/TransposedConv2D.cpp b/compiler/locomotiv/src/Node/TransposedConv2D.cpp
new file mode 100644
index 000000000..3ea4f071d
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TransposedConv2D.cpp
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+/**
+ * @brief Compute 1D output size for transposed convolution based on given 1D arguments.
+ *
+ * @param whole_pad Sum of front and rear pad
+ */
+inline uint32_t compute_transposed_out_size(uint32_t input_size, uint32_t whole_pad,
+ uint32_t filter_size, uint32_t stride)
+{
+ return stride * (input_size - 1) + filter_size - whole_pad;
+}
+
+/**
+ * @brief Calculates TransposedConv2D
+ * @note Both input_buf and filter_buf have NHWC format
+ */
+template <typename RET_T, typename IFM_T, typename FIL_T>
+Buffer<RET_T> calc_tr_conv2D(const loco::TransposedConv2D *tr_conv2d,
+ const Buffer<IFM_T> *input_buf, const Buffer<FIL_T> *filter_buf)
+{
+ auto input_shape = input_buf->shape();
+ auto filter_shape = filter_buf->shape();
+
+ locomotiv::validate(input_shape.rank() == 4, "ifm rank must be 4");
+ locomotiv::validate(filter_shape.rank() == 4, "filter rank must be 4");
+ locomotiv::validate(input_shape.dim(3) /* depth of input */ ==
+ filter_shape.dim(3) /* depth of filter */,
+ "channel value mismatch");
+
+ const uint32_t input_height = input_shape.dim(1);
+ const uint32_t input_width = input_shape.dim(2);
+
+ const uint32_t filter_height = filter_shape.dim(1);
+ const uint32_t filter_width = filter_shape.dim(2);
+
+ const uint32_t stride_width = tr_conv2d->stride()->horizontal();
+ const uint32_t stride_height = tr_conv2d->stride()->vertical();
+
+ const uint32_t pad_top = tr_conv2d->pad()->top();
+ const uint32_t pad_bottom = tr_conv2d->pad()->bottom();
+
+ const uint32_t pad_left = tr_conv2d->pad()->left();
+ const uint32_t pad_right = tr_conv2d->pad()->right();
+
+ // TODO Support dilations
+
+ const uint32_t output_height =
+ compute_transposed_out_size(input_height, pad_top + pad_bottom, filter_height, stride_height);
+ const uint32_t output_width =
+ compute_transposed_out_size(input_width, pad_left + pad_right, filter_width, stride_width);
+
+ const uint32_t batches = input_shape.dim(0);
+ const uint32_t input_depth = input_shape.dim(3);
+ const uint32_t output_depth = filter_shape.dim(0); // count of filter
+
+ Shape output_shape{batches, output_height, output_width, output_depth};
+ auto output_buf = make_buffer<RET_T, LexicalLayout>(output_shape);
+
+ // initialize output
+ for (IndexEnumerator e{output_shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ output_buf.at(index) = static_cast<RET_T>(0);
+ }
+
+ // Loop through input elements one at a time.
+ for (uint32_t batch = 0; batch < batches; ++batch)
+ {
+ for (uint32_t in_y = 0; in_y < input_height; ++in_y)
+ {
+ for (uint32_t in_x = 0; in_x < input_width; ++in_x)
+ {
+ for (uint32_t in_channel = 0; in_channel < input_depth; ++in_channel)
+ {
+ // Loop through the output elements it will influence
+ const int out_x_origin = (in_x * stride_width) - pad_left;
+ const int out_y_origin = (in_y * stride_height) - pad_top;
+ for (uint32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (uint32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ for (uint32_t out_channel = 0; out_channel < output_depth; ++out_channel)
+ {
+ // Compute output element location
+ const int out_x = out_x_origin + filter_x;
+ const int out_y = out_y_origin + filter_y;
+ // We cannot accumulate out of bounds
+ if ((out_x >= 0) && ((unsigned)out_x < output_width) && (out_y >= 0) &&
+ ((unsigned)out_y < output_height))
+ {
+ auto input_value = input_buf->at(Index({batch, in_y, in_x, in_channel}));
+ auto filter_value =
+ filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel}));
+ output_buf.at(Index({batch, (unsigned)out_y, (unsigned)out_x, out_channel})) +=
+ input_value * filter_value;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ return output_buf;
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TransposedConv2D *tr_conv2d)
+{
+ auto ifm_data = annot_data(tr_conv2d->ifm());
+ auto ker_data = annot_data(tr_conv2d->ker());
+
+ validate(ifm_data, "Can't find input data of TransposedConv2D");
+ validate(ifm_data->shape()->rank() == 4, "ifm rank must be 4");
+
+ validate(ker_data, "Can't find kernel data of TransposedConv2D");
+ validate(ker_data->shape()->rank() == 4, "Kernel rank must be 4");
+
+ validate(annot_domain(tr_conv2d->ifm()) == loco::Domain::Feature,
+ "IFM of TransposedConv2D is not feature");
+ validate(annot_domain(tr_conv2d->ker()) == loco::Domain::Filter,
+ "Kernel of TransposedConv2D is not filter");
+
+ std::unique_ptr<NodeData> tr_conv2d_result = nullptr;
+
+ if (ifm_data->dtype() == loco::DataType::FLOAT32 && ker_data->dtype() == loco::DataType::FLOAT32)
+ {
+ auto ifm_buf = ifm_data->as_f32_bufptr();
+ auto ker_buf = ker_data->as_f32_bufptr();
+
+ auto tr_conv2d_buf = calc_tr_conv2D<float, float, float>(tr_conv2d, ifm_buf, ker_buf);
+
+ tr_conv2d_result = make_data(tr_conv2d_buf);
+ }
+ else
+ throw std::runtime_error("NYI for these DataTypes");
+
+ assert(tr_conv2d_result != nullptr);
+
+ annot_data(tr_conv2d, std::move(tr_conv2d_result));
+ annot_domain(tr_conv2d, loco::Domain::Feature);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TransposedConv2D.test.cpp b/compiler/locomotiv/src/Node/TransposedConv2D.test.cpp
new file mode 100644
index 000000000..bd955a06b
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TransposedConv2D.test.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include "nncc/core/ADT/tensor/IndexEnumerator.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::make_overlay;
+
+void run_test(const float *ifm, const float *ker, const float *expected_ofm, const Shape &ifm_shape,
+ const Shape ker_shape, const Shape ofm_shape, const uint32_t stride_v,
+ const uint32_t stride_h, const uint32_t pad_top = 0, const uint32_t pad_bottom = 0,
+ const uint32_t pad_left = 0, const uint32_t pad_right = 0)
+{
+ auto g = loco::make_graph();
+
+ // Fill output data of FeatureEncode from ifm
+ auto ifm_enc = g->nodes()->create<loco::FeatureEncode>();
+ {
+ auto ifm_enc_buf = make_buffer<float, LexicalLayout>(ifm_shape);
+ auto ifm_overlay = make_overlay<float, LexicalLayout>(ifm_shape, const_cast<float *>(ifm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ifm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ifm_enc_buf.at(ind) = ifm_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(ifm_enc_buf);
+ locomotiv::annot_data(ifm_enc, std::move(enc_data));
+ locomotiv::annot_domain(ifm_enc, loco::Domain::Feature);
+ }
+
+ // Fill output data of FilterEncode from ker
+ auto ker_enc = g->nodes()->create<loco::FilterEncode>();
+ {
+ auto ker_enc_buf = make_buffer<float, LexicalLayout>(ker_shape);
+ auto ker_overlay = make_overlay<float, LexicalLayout>(ker_shape, const_cast<float *>(ker));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ker_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ker_enc_buf.at(ind) = ker_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(ker_enc_buf);
+ locomotiv::annot_data(ker_enc, std::move(enc_data));
+ locomotiv::annot_domain(ker_enc, loco::Domain::Filter);
+ }
+
+ // build TransposedConv2D
+ auto tr_conv2d = g->nodes()->create<loco::TransposedConv2D>();
+ tr_conv2d->ifm(ifm_enc);
+ tr_conv2d->ker(ker_enc);
+ tr_conv2d->stride()->vertical(stride_v);
+ tr_conv2d->stride()->horizontal(stride_h);
+ tr_conv2d->pad()->top(pad_top);
+ tr_conv2d->pad()->bottom(pad_bottom);
+ tr_conv2d->pad()->left(pad_left);
+ tr_conv2d->pad()->right(pad_right);
+
+ // run interpreter
+ locomotiv::NodeExecution::get().run(tr_conv2d);
+
+ // get result of calculation
+ auto conv2d_result = locomotiv::annot_data(tr_conv2d);
+
+ // check the result
+ ASSERT_NE(conv2d_result, nullptr);
+ ASSERT_TRUE(conv2d_result->dtype() == loco::DataType::FLOAT32);
+ ASSERT_TRUE(*(conv2d_result->shape()) == ofm_shape);
+
+ auto ofm_overlay =
+ make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ASSERT_FLOAT_EQ(conv2d_result->as_f32_bufptr()->at(ind), ofm_overlay.at(ind));
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(tr_conv2d), loco::Domain::Feature);
+}
+
+} // namespace
+
+// clang-format off
+/*
+ifm = tf.constant(1.1, shape = [1, 2, 2, 4])
+ker = tf.constant(2.2, shape = [3, 3, 2, 4])
+tr_conv = tf.nn.conv2d_transpose(ifm, ker, output_shape = (1, 5, 5, 2), strides = [1, 2, 2, 1], padding = "VALID")
+
+with tf.Session() as session:
+ tr_conv_data = session.run(tr_conv)
+ */
+TEST(NodeExecution_TransposedConv2D, f32)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ float ifm[1 * 2 * 2 * 4];
+ for (int n = 0; n < 1 * 2 * 2 * 4; n++)
+ ifm[n] = 1.1;
+
+ float ker[2 * 3 * 3 * 4]; // NHWC
+ for (int n = 0; n < 2 * 3 * 3 * 4; n++)
+ ker[n] = 2.2;
+
+ float ofm[1 * 5 * 5 * 2] = {9.68, 9.68, 9.68, 9.68, 19.36, 19.36, 9.68, 9.68, 9.68, 9.68,
+ 9.68, 9.68, 9.68, 9.68, 19.36, 19.36, 9.68, 9.68, 9.68, 9.68,
+ 19.36, 19.36, 19.36, 19.36, 38.72, 38.72, 19.36, 19.36, 19.36, 19.36,
+ 9.68, 9.68, 9.68, 9.68, 19.36, 19.36, 9.68, 9.68, 9.68, 9.68,
+ 9.68, 9.68, 9.68, 9.68, 19.36, 19.36, 9.68, 9.68, 9.68, 9.68};
+
+ run_test(ifm, ker, ofm,
+ Shape{1, 2, 2, 4}, Shape{2, 3, 3, 4}, Shape{1, 5, 5, 2}, // shapes of ifm, ker, ofm
+ 2, 2 // stride
+ );
+}
+// clang-format on
diff --git a/compiler/locomotiv/src/NodeData.cpp b/compiler/locomotiv/src/NodeData.cpp
new file mode 100644
index 000000000..69ba4a1c2
--- /dev/null
+++ b/compiler/locomotiv/src/NodeData.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+
+namespace locomotiv
+{
+
+template <> std::unique_ptr<NodeData> make_data(const NodeData::Buffer<int32_t> &buf)
+{
+ return std::unique_ptr<NodeDataImpl>(new NodeDataImpl(buf));
+}
+
+template <> std::unique_ptr<NodeData> make_data(const NodeData::Buffer<float> &buf)
+{
+ return std::unique_ptr<NodeDataImpl>(new NodeDataImpl(buf));
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/NodeData.test.cpp b/compiler/locomotiv/src/NodeData.test.cpp
new file mode 100644
index 000000000..b1c9832d5
--- /dev/null
+++ b/compiler/locomotiv/src/NodeData.test.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locomotiv/NodeData.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeData, as_s32_buffer_wrapper)
+{
+ const Shape shape{1};
+ auto buf = make_buffer<int32_t, LexicalLayout>(shape);
+ buf.at(Index{0}) = 42;
+
+ auto data = locomotiv::make_data(buf);
+
+ ASSERT_EQ(data->dtype(), loco::DataType::S32);
+ ASSERT_EQ(*(data->shape()), shape);
+ ASSERT_EQ(data->as_s32_bufptr()->at(Index{0}), 42);
+}
+
+TEST(NodeData, as_f32_buffer_wrapper)
+{
+ const Shape shape{1};
+ auto buf = make_buffer<float, LexicalLayout>(shape);
+ buf.at(Index{0}) = 3.14f;
+
+ auto data = locomotiv::make_data(buf);
+
+ ASSERT_EQ(data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(data->shape()), shape);
+ ASSERT_FLOAT_EQ(data->as_f32_bufptr()->at(Index{0}), 3.14f);
+}
diff --git a/compiler/locomotiv/src/NodeDataImpl.cpp b/compiler/locomotiv/src/NodeDataImpl.cpp
new file mode 100644
index 000000000..2efebe5a9
--- /dev/null
+++ b/compiler/locomotiv/src/NodeDataImpl.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeDataImpl.h"
+
+#include <stdex/Memory.h>
+
+#include <cassert>
+
+namespace
+{
+
+class NodeDataAnnotation final : public loco::NodeAnnotation
+{
+public:
+ NodeDataAnnotation(std::unique_ptr<locomotiv::NodeData> &&data) : _data{std::move(data)}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const locomotiv::NodeData *data(void) const { return _data.get(); }
+
+private:
+ std::unique_ptr<locomotiv::NodeData> _data;
+};
+
+} // namespace
+
+namespace locomotiv
+{
+
+template <> NodeDataImpl::NodeDataImpl(const Buffer<int32_t> &buf)
+{
+ _dtype = loco::DataType::S32;
+ _s32.reset(new Buffer<int32_t>(buf));
+ _shape = const_cast<Shape *>(&(_s32->shape()));
+}
+
+template <> NodeDataImpl::NodeDataImpl(const Buffer<float> &buf)
+{
+ _dtype = loco::DataType::FLOAT32;
+ _f32.reset(new Buffer<float>(buf));
+ _shape = const_cast<Shape *>(&(_f32->shape()));
+}
+
+void annot_data(loco::Node *node, std::unique_ptr<NodeData> &&data)
+{
+ node->annot(stdex::make_unique<NodeDataAnnotation>(std::move(data)));
+}
+
+const NodeData *annot_data(const loco::Node *node)
+{
+ if (auto annot = node->annot<NodeDataAnnotation>())
+ {
+ return annot->data();
+ }
+
+ return nullptr;
+}
+
+void erase_annot_data(loco::Node *node) { node->annot<NodeDataAnnotation>(nullptr); }
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/NodeDataImpl.h b/compiler/locomotiv/src/NodeDataImpl.h
new file mode 100644
index 000000000..bdd9db386
--- /dev/null
+++ b/compiler/locomotiv/src/NodeDataImpl.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _LOCOMOTIV_NODEDATAIMPL_H_
+#define _LOCOMOTIV_NODEDATAIMPL_H_
+
+#include "locomotiv/NodeData.h"
+
+namespace locomotiv
+{
+
+/**
+ * @brief An implementation of NodeData interface
+ */
+class NodeDataImpl final : public NodeData
+{
+public:
+ template <typename T> using Buffer = nncc::core::ADT::tensor::Buffer<T>;
+ using Shape = nncc::core::ADT::tensor::Shape;
+
+ template <typename DT> NodeDataImpl(const Buffer<DT> &buf);
+
+ const loco::DataType &dtype() const override { return _dtype; }
+
+ const Shape *shape() const override { return _shape; }
+
+ const Buffer<int32_t> *as_s32_bufptr() const override { return _s32.get(); }
+
+ const Buffer<float> *as_f32_bufptr() const override { return _f32.get(); }
+
+private:
+ loco::DataType _dtype = loco::DataType::Unknown;
+ Shape *_shape = nullptr;
+ std::unique_ptr<Buffer<int32_t>> _s32 = nullptr;
+ std::unique_ptr<Buffer<float>> _f32 = nullptr;
+};
+
+/// @brief Bind "NodeData" to "Node"
+void annot_data(loco::Node *node, std::unique_ptr<NodeData> &&data);
+
+/**
+ * @brief Get "NodeData" for a given node
+ *
+ * NOTE Returns nullptr if "NodeData" is not binded yet
+ */
+const NodeData *annot_data(const loco::Node *node);
+
+/// @brief Release "NodeData" bound to a given node
+void erase_annot_data(loco::Node *node);
+
+} // namespace locomotiv
+
+#endif // _LOCOMOTIV_NODEDATAIMPL_H_
diff --git a/compiler/locomotiv/src/NodeDataImpl.test.cpp b/compiler/locomotiv/src/NodeDataImpl.test.cpp
new file mode 100644
index 000000000..b85956063
--- /dev/null
+++ b/compiler/locomotiv/src/NodeDataImpl.test.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeDataImpl, as_annotation)
+{
+ const Shape shape{1};
+ auto buf = make_buffer<float, LexicalLayout>(shape);
+ buf.at(Index{0}) = 3.14f;
+
+ std::unique_ptr<locomotiv::NodeData> data = locomotiv::make_data(buf);
+
+ auto g = loco::make_graph();
+ auto node = g->nodes()->create<loco::Pull>();
+
+ ASSERT_EQ(locomotiv::annot_data(node), nullptr);
+
+ // Set annotation
+ locomotiv::annot_data(node, std::move(data));
+
+ // Get annotation
+ const locomotiv::NodeData *obtained = locomotiv::annot_data(node);
+ ASSERT_NE(obtained, nullptr);
+
+ ASSERT_EQ(obtained->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(obtained->shape()), shape);
+ ASSERT_FLOAT_EQ(obtained->as_f32_bufptr()->at(Index{0}), 3.14f);
+
+ // Erase annotation
+ locomotiv::erase_annot_data(node);
+ ASSERT_EQ(locomotiv::annot_data(node), nullptr);
+}
diff --git a/compiler/locomotiv/src/NodeDomain.cpp b/compiler/locomotiv/src/NodeDomain.cpp
new file mode 100644
index 000000000..709b9fe34
--- /dev/null
+++ b/compiler/locomotiv/src/NodeDomain.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeDomain.h"
+
+#include <cassert>
+
+namespace locomotiv
+{
+
+struct NodeDomain final : public loco::NodeAnnotation
+{
+ NodeDomain(const loco::Domain &domain) : value(domain)
+ {
+ // DO NOTHING
+ }
+
+ loco::Domain value = loco::Domain::Unknown;
+};
+
+void annot_domain(loco::Node *node, const loco::Domain &domain)
+{
+ assert(domain != loco::Domain::Unknown);
+ auto node_domain = std::unique_ptr<NodeDomain>(new NodeDomain(domain));
+ assert(node_domain);
+ node->annot(std::move(node_domain));
+}
+
+loco::Domain annot_domain(const loco::Node *node)
+{
+ auto node_domain = node->annot<NodeDomain>();
+ if (node_domain)
+ return node_domain->value;
+ else
+ return loco::Domain::Unknown;
+}
+
+void erase_annot_domain(loco::Node *node) { node->annot<NodeDomain>(nullptr); }
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/NodeDomain.h b/compiler/locomotiv/src/NodeDomain.h
new file mode 100644
index 000000000..fc93f77f7
--- /dev/null
+++ b/compiler/locomotiv/src/NodeDomain.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _LOCOMOTIV_NODEDOMAIN_H_
+#define _LOCOMOTIV_NODEDOMAIN_H_
+
+#include <loco.h>
+#include <loco/IR/Domain.h>
+
+namespace locomotiv
+{
+
+/// @brief Wrapper to annotate domain to node. Cannot annotate unknown domain.
+void annot_domain(loco::Node *node, const loco::Domain &domain);
+
+/// @brief Wrapper to get domain annotation of node
+loco::Domain annot_domain(const loco::Node *node);
+
+/// @brief Erase already annotated node domain
+void erase_annot_domain(loco::Node *node);
+
+} // namespace locomotiv
+
+#endif // _LOCOMOTIV_NODEDOMAIN_H_
diff --git a/compiler/locomotiv/src/NodeDomain.test.cpp b/compiler/locomotiv/src/NodeDomain.test.cpp
new file mode 100644
index 000000000..9cfcf2eb8
--- /dev/null
+++ b/compiler/locomotiv/src/NodeDomain.test.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeDomain.h"
+
+#include <gtest/gtest.h>
+
+TEST(NodeDomain, as_annotation)
+{
+ loco::Pull node;
+
+ ASSERT_EQ(locomotiv::annot_domain(&node), loco::Domain::Unknown);
+
+ // Set annotation
+ locomotiv::annot_domain(&node, loco::Domain::Tensor);
+
+ // Get annotation
+ const loco::Domain obtained = locomotiv::annot_domain(&node);
+ ASSERT_EQ(obtained, loco::Domain::Tensor);
+
+ // Erase annotation
+ locomotiv::erase_annot_domain(&node);
+ ASSERT_EQ(locomotiv::annot_domain(&node), loco::Domain::Unknown);
+}
diff --git a/compiler/locomotiv/src/NodeExecution.cpp b/compiler/locomotiv/src/NodeExecution.cpp
new file mode 100644
index 000000000..e532b5af6
--- /dev/null
+++ b/compiler/locomotiv/src/NodeExecution.cpp
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDomain.h"
+#include "NodeDataImpl.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <cassert>
+#include <stdexcept>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+namespace locomotiv
+{
+
+float UnaryFunc::apply(float) const { throw std::runtime_error{"F32 is not supported yet"}; }
+int32_t UnaryFunc::apply(int32_t) const { throw std::runtime_error{"S32 is not supported yet"}; }
+
+float BinaryFunc::apply(float, float) const
+{
+ throw std::runtime_error{"F32 is not supported yet"};
+}
+
+int32_t BinaryFunc::apply(int32_t, int32_t) const
+{
+ throw std::runtime_error{"S32 is not supported yet"};
+}
+
+// TODO Use visitor pattern of loco when available
+void NodeExecution::run(loco::Node *node)
+{
+ erase_annot_data(node);
+
+#define NODE(Name) \
+ if (as<loco::Name>(node)) \
+ { \
+ execute(as<loco::Name>(node)); \
+ return; \
+ }
+#include "Node.lst"
+#undef NODE
+
+ throw std::runtime_error("Not supported loco::Node type");
+}
+
+void NodeExecution::eltwise_unary(loco::Node *node, const UnaryFunc &f)
+{
+ auto input_node = node->arg(0);
+ auto input_domain = annot_domain(input_node);
+ auto input_data = annot_data(input_node);
+ auto input_dtype = input_data->dtype();
+
+ validate(input_data, "Input is not ready");
+ validate(input_domain != loco::Domain::Unknown, "Input domain is unknown");
+
+ auto output_node = node;
+ // Element-wise Unary Operation does not affect Domain
+ auto output_domain = input_domain;
+ // Eltwise-wise Unary Operation does not affet Data Type (ASSUMPTION)
+ //
+ // TODO Check this assumption
+ auto output_dtype = input_dtype;
+ std::unique_ptr<NodeData> output_data = nullptr;
+
+ switch (output_dtype)
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto input_bufptr = input_data->as_f32_bufptr();
+ auto output_buf = make_buffer<float, LexicalLayout>(*input_data->shape());
+ auto *shape = input_data->shape();
+
+ for (IndexEnumerator e{*shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ output_buf.at(index) = f.apply(input_bufptr->at(index));
+ }
+
+ output_data = make_data(output_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(output_data != nullptr);
+ annot_data(output_node, std::move(output_data));
+ annot_domain(output_node, output_domain);
+}
+
+void NodeExecution::eltwise_binary(loco::Node *node, const BinaryFunc &f)
+{
+ auto lhs_node = node->arg(0);
+ auto rhs_node = node->arg(1);
+ auto lhs_data = annot_data(lhs_node);
+ auto rhs_data = annot_data(rhs_node);
+
+ validate(lhs_data && rhs_data, "Input not ready");
+ validate(annot_domain(lhs_node) == annot_domain(rhs_node), "Wrong input domain");
+ validate(lhs_data->dtype() == rhs_data->dtype(), "Wrong input type");
+ validate(*lhs_data->shape() == *rhs_data->shape(), "Wrong input shape");
+
+ auto out_node = node;
+ std::unique_ptr<NodeData> out_data = nullptr;
+
+ switch (lhs_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto lhs_bufptr = lhs_data->as_f32_bufptr();
+ auto rhs_bufptr = rhs_data->as_f32_bufptr();
+ auto out_bufptr = make_buffer<float, LexicalLayout>(*lhs_data->shape());
+
+ auto *shape = lhs_data->shape();
+
+ for (IndexEnumerator e{*shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ out_bufptr.at(index) = f.apply(lhs_bufptr->at(index), rhs_bufptr->at(index));
+ }
+
+ out_data = make_data(out_bufptr);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(out_data != nullptr);
+ annot_data(out_node, std::move(out_data));
+ annot_domain(out_node, annot_domain(lhs_node));
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/NodeExecution.h b/compiler/locomotiv/src/NodeExecution.h
new file mode 100644
index 000000000..363188d38
--- /dev/null
+++ b/compiler/locomotiv/src/NodeExecution.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _LOCOMOTIV_NODEEXECUTION_H_
+#define _LOCOMOTIV_NODEEXECUTION_H_
+
+#include <loco.h>
+
+namespace locomotiv
+{
+
+struct UnaryFunc
+{
+ virtual ~UnaryFunc() = default;
+
+ virtual float apply(float) const;
+ virtual int32_t apply(int32_t) const;
+};
+
+// Q. How to support mixed precision binary operators?
+struct BinaryFunc
+{
+ virtual ~BinaryFunc() = default;
+
+ virtual float apply(float, float) const;
+ virtual int32_t apply(int32_t, int32_t) const;
+};
+
+/**
+ * @brief Helper class for Session, responsible to process one node calculation.
+ */
+class NodeExecution
+{
+public:
+ /// @brief Run calculation for one unspecified Node
+ void run(loco::Node *node);
+
+ static NodeExecution &get()
+ {
+ static NodeExecution me;
+ return me;
+ }
+
+private:
+ NodeExecution() {}
+
+ template <typename Derived> Derived *as(loco::Node *node)
+ {
+ return dynamic_cast<Derived *>(node);
+ }
+
+// clang-format off
+ /**
+ * @brief Calculate for one specified node and update its result as NodeData.
+ * Abort program when its ingredients are not ready or not supported.
+ *
+ * @note Definitions of overloaded execute() are in 'Node/' directory
+ */
+// clang-format on
+#define NODE(Name) void execute(loco::Name *);
+#include "Node.lst"
+#undef NODE
+
+ void eltwise_unary(loco::Node *node, const UnaryFunc &f);
+ void eltwise_binary(loco::Node *node, const BinaryFunc &f);
+};
+
+} // namespace locomotiv
+
+#endif // _LOCOMOTIV_NODEEXECUTION_H_
diff --git a/compiler/locomotiv/src/Session.cpp b/compiler/locomotiv/src/Session.cpp
new file mode 100644
index 000000000..841a14a5c
--- /dev/null
+++ b/compiler/locomotiv/src/Session.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locomotiv/Session.h"
+#include "locomotiv/NodeData.h"
+
+#include "UserData.h"
+#include "NodeDataImpl.h"
+#include "NodeExecution.h"
+#include "NodeDomain.h"
+
+#include <cassert>
+
+namespace locomotiv
+{
+
+Session::~Session()
+{
+ for (uint32_t i = 0; i < _graph->nodes()->size(); ++i)
+ {
+ auto node = _graph->nodes()->at(i);
+ erase_user_data(node);
+ erase_annot_data(node);
+ erase_annot_domain(node);
+ }
+}
+
+void Session::set_input(uint32_t index, std::unique_ptr<NodeData> &&data)
+{
+ assert(index < input_size());
+
+ // Check whether already annotated
+ auto pull = loco::pull_node(_graph, index);
+ if (user_data(pull))
+ {
+ throw std::runtime_error("Graph input already has NodeData");
+ }
+
+ // Check data type match
+ if (pull->dtype() != data->dtype())
+ {
+ throw std::runtime_error("Data type mismatch");
+ }
+
+ // Check shape match
+ auto shape = data->shape();
+ if (pull->rank() != shape->rank())
+ {
+ throw std::runtime_error("Shape rank mismatch");
+ }
+ for (uint32_t i = 0; i < pull->rank(); ++i)
+ {
+ if (pull->dim(i).known() && pull->dim(i).value() != shape->dim(i))
+ {
+ throw std::runtime_error("Shape dimension mismatch");
+ }
+ }
+
+ user_data(pull, std::move(data));
+}
+
+void Session::infer()
+{
+ auto schedules = loco::postorder_traversal(_outputs);
+
+ for (auto node : schedules)
+ {
+ NodeExecution::get().run(node);
+ }
+}
+
+const NodeData *Session::get_output(uint32_t index)
+{
+ assert(index < output_size());
+
+ auto output_node = _outputs.at(index);
+ return annot_data(output_node);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Session.test.cpp b/compiler/locomotiv/src/Session.test.cpp
new file mode 100644
index 000000000..6d4a2414f
--- /dev/null
+++ b/compiler/locomotiv/src/Session.test.cpp
@@ -0,0 +1,379 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locomotiv/Session.h"
+#include "locomotiv/NodeData.h"
+
+#include "UserData.h"
+
+#include <loco.h>
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <array>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(Session, graph_IO_size)
+{
+ // Make graph
+ auto g = loco::make_graph();
+
+ // inputs
+ const uint32_t inputs = 2;
+ for (uint32_t i = 0; i < inputs; ++i)
+ {
+ auto pull = g->nodes()->create<loco::Pull>();
+ loco::link(g->inputs()->create(), pull);
+ }
+
+ // outputs
+ const uint32_t outputs = 3;
+ for (uint32_t o = 0; o < outputs; ++o)
+ {
+ auto push = g->nodes()->create<loco::Push>();
+ loco::link(g->outputs()->create(), push);
+ }
+
+ // Make session
+ locomotiv::Session s(g.get());
+
+ ASSERT_EQ(s.input_size(), inputs);
+ ASSERT_EQ(s.output_size(), outputs);
+}
+
+TEST(Session, set_input)
+{
+ // Make graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->rank(1);
+ pull->dim(0) = 1;
+ loco::link(g->inputs()->create(), pull);
+
+ // Make good data
+ auto buf = make_buffer<float, LexicalLayout>(Shape{1});
+ auto data = locomotiv::make_data(buf);
+
+ // Make data with different data type
+ auto buf_not_dtype = make_buffer<int32_t, LexicalLayout>(Shape{1});
+ auto data_not_dtype = locomotiv::make_data(buf_not_dtype);
+
+ // Make data with different rank
+ auto buf_not_rank = make_buffer<float, LexicalLayout>(Shape{1, 1});
+ auto data_not_rank = locomotiv::make_data(buf_not_rank);
+
+ // Make data with different dimension
+ auto buf_not_dim = make_buffer<float, LexicalLayout>(Shape{2});
+ auto data_not_dim = locomotiv::make_data(buf_not_dim);
+
+ // Make session
+ locomotiv::Session s(g.get());
+
+ ASSERT_ANY_THROW(s.set_input(0, std::move(data_not_dtype)));
+ ASSERT_ANY_THROW(s.set_input(0, std::move(data_not_rank)));
+ ASSERT_ANY_THROW(s.set_input(0, std::move(data_not_dim)));
+ ASSERT_NO_THROW(s.set_input(0, std::move(data)));
+ ASSERT_ANY_THROW(s.set_input(0, std::move(data)));
+}
+
+TEST(Session, inference_identity)
+{
+ std::vector<std::unique_ptr<loco::Graph>> graphs;
+
+ // pull-push / f32 / known shape
+ {
+ auto g = loco::make_graph();
+
+ // Pull node
+ auto pull_node = g->nodes()->create<loco::Pull>();
+ pull_node->dtype(loco::DataType::FLOAT32);
+ pull_node->rank(1);
+ pull_node->dim(0) = 1;
+
+ // Push node
+ auto push_node = g->nodes()->create<loco::Push>();
+ push_node->from(pull_node);
+
+ // Input
+ auto graph_input = g->inputs()->create();
+ loco::link(graph_input, pull_node);
+
+ // Output
+ auto graph_output = g->outputs()->create();
+ loco::link(graph_output, push_node);
+
+ graphs.push_back(std::move(g));
+ }
+
+ // pull-push / f32 / unknown shape
+ {
+ auto g = loco::make_graph();
+
+ // Pull node
+ auto pull_node = g->nodes()->create<loco::Pull>();
+ pull_node->dtype(loco::DataType::FLOAT32);
+ pull_node->rank(1);
+ pull_node->dim(0) = loco::make_dimension();
+
+ // Push node
+ auto push_node = g->nodes()->create<loco::Push>();
+ push_node->from(pull_node);
+
+ // Input
+ auto graph_input = g->inputs()->create();
+ loco::link(graph_input, pull_node);
+
+ // Output
+ auto graph_output = g->outputs()->create();
+ loco::link(graph_output, push_node);
+
+ graphs.push_back(std::move(g));
+ }
+
+ for (auto it = graphs.begin(); it != graphs.end(); ++it)
+ {
+ auto g = it->get();
+ locomotiv::Session s(g);
+
+ const Shape shape{1};
+ auto buf = make_buffer<float, LexicalLayout>(shape);
+ buf.at(Index{0}) = 3.14f;
+ auto data = locomotiv::make_data(buf);
+
+ // Input not ready
+ ASSERT_ANY_THROW(s.infer());
+
+ s.set_input(0, std::move(data));
+
+ // Valid run
+ ASSERT_NO_THROW(s.infer());
+ // Multiple run is possible
+ ASSERT_NO_THROW(s.infer());
+
+ auto output_data = s.get_output(0);
+ ASSERT_NE(output_data, nullptr);
+ ASSERT_EQ(output_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(output_data->shape()), Shape{1});
+ ASSERT_EQ(output_data->as_f32_bufptr()->at(Index{0}), 3.14f);
+ }
+}
+
+TEST(Session, session_for_subgraph)
+{
+ /*
+ * Make following graph:
+ * ConstGen_1 --
+ * \
+ * ConstGen_2 --- TensorConcat_1 --- TensorConcat_3 --- Push
+ * /
+ * ConstGen_3 --- TensorConcat_2 --
+ * /
+ * ConstGen_4 --
+ */
+ auto g = loco::make_graph();
+
+ auto c1 = g->nodes()->create<loco::ConstGen>();
+ auto c2 = g->nodes()->create<loco::ConstGen>();
+ auto c3 = g->nodes()->create<loco::ConstGen>();
+ auto c4 = g->nodes()->create<loco::ConstGen>();
+
+ c1->dtype(loco::DataType::FLOAT32);
+ c2->dtype(loco::DataType::FLOAT32);
+ c3->dtype(loco::DataType::FLOAT32);
+ c4->dtype(loco::DataType::FLOAT32);
+ c1->shape({1});
+ c2->shape({1});
+ c3->shape({1});
+ c4->shape({1});
+ c1->size<loco::DataType::FLOAT32>(1);
+ c2->size<loco::DataType::FLOAT32>(1);
+ c3->size<loco::DataType::FLOAT32>(1);
+ c4->size<loco::DataType::FLOAT32>(1);
+
+ c1->at<loco::DataType::FLOAT32>(0) = 0.1f;
+ c2->at<loco::DataType::FLOAT32>(0) = 0.2f;
+ c3->at<loco::DataType::FLOAT32>(0) = 0.3f;
+ c4->at<loco::DataType::FLOAT32>(0) = 0.4f;
+
+ auto t1 = g->nodes()->create<loco::TensorConcat>();
+ auto t2 = g->nodes()->create<loco::TensorConcat>();
+ auto t3 = g->nodes()->create<loco::TensorConcat>();
+
+ // Note: default concat axis is 0
+ t1->lhs(c1);
+ t1->rhs(c2);
+ t2->lhs(c3);
+ t2->rhs(c4);
+ t3->lhs(t1);
+ t3->rhs(t2);
+
+ auto push = g->nodes()->create<loco::Push>();
+ push->from(t3);
+
+ {
+ // Session to get t1 only
+ locomotiv::Session s(g.get(), {t1});
+ ASSERT_EQ(s.output_size(), 1);
+ ASSERT_EQ(s.get_output_node(0), dynamic_cast<loco::Node *>(t1));
+
+ s.infer();
+
+ auto t1_data = s.get_output(0);
+ ASSERT_NE(t1_data, nullptr);
+ ASSERT_EQ(*(t1_data->shape()), Shape{2});
+
+ auto t1_buf = t1_data->as_f32_bufptr();
+ ASSERT_EQ(t1_buf->at({0}), 0.1f);
+ ASSERT_EQ(t1_buf->at({1}), 0.2f);
+ }
+
+ {
+ // Session to get t2 only
+ locomotiv::Session s(g.get(), {t2});
+ ASSERT_EQ(s.output_size(), 1);
+ ASSERT_EQ(s.get_output_node(0), dynamic_cast<loco::Node *>(t2));
+
+ s.infer();
+
+ auto t2_data = s.get_output(0);
+ ASSERT_NE(t2_data, nullptr);
+ ASSERT_EQ(*(t2_data->shape()), Shape{2});
+
+ auto t2_buf = t2_data->as_f32_bufptr();
+ ASSERT_EQ(t2_buf->at({0}), 0.3f);
+ ASSERT_EQ(t2_buf->at({1}), 0.4f);
+ }
+
+ {
+ // Session to get t2 and push
+ locomotiv::Session s(g.get(), {t2, push});
+ ASSERT_EQ(s.output_size(), 2);
+ ASSERT_EQ(s.get_output_node(0), dynamic_cast<loco::Node *>(t2));
+ ASSERT_EQ(s.get_output_node(1), dynamic_cast<loco::Node *>(push));
+
+ s.infer();
+
+ auto t2_data = s.get_output(0);
+ ASSERT_NE(t2_data, nullptr);
+ ASSERT_EQ(*(t2_data->shape()), Shape{2});
+
+ auto t2_buf = t2_data->as_f32_bufptr();
+ ASSERT_EQ(t2_buf->at({0}), 0.3f);
+ ASSERT_EQ(t2_buf->at({1}), 0.4f);
+
+ auto push_data = s.get_output(1);
+ ASSERT_NE(push_data, nullptr);
+ ASSERT_EQ(*(push_data->shape()), Shape{4});
+
+ auto push_buf = push_data->as_f32_bufptr();
+ ASSERT_EQ(push_buf->at({0}), 0.1f);
+ ASSERT_EQ(push_buf->at({1}), 0.2f);
+ ASSERT_EQ(push_buf->at({2}), 0.3f);
+ ASSERT_EQ(push_buf->at({3}), 0.4f);
+ }
+}
+
+TEST(Session, ctor_by_range)
+{
+ // Make graph
+ auto g = loco::make_graph();
+
+ auto constgen = g->nodes()->create<loco::ConstGen>();
+ auto relu = g->nodes()->create<loco::ReLU>();
+ auto push = g->nodes()->create<loco::Push>();
+
+ constgen->dtype(loco::DataType::FLOAT32);
+ constgen->shape({2});
+ constgen->size<loco::DataType::FLOAT32>(2);
+ constgen->at<loco::DataType::FLOAT32>(0) = 0.1f;
+ constgen->at<loco::DataType::FLOAT32>(1) = -0.1f;
+
+ relu->input(constgen);
+ push->from(relu);
+
+ std::array<loco::Node *, 2> custom_outputs = {constgen, push};
+
+ // Make Session by range
+ locomotiv::Session s(g.get(), custom_outputs.begin(), custom_outputs.end());
+
+ s.infer();
+
+ auto constgen_data = s.get_output(0);
+ ASSERT_NE(constgen_data, nullptr);
+ ASSERT_EQ(*(constgen_data->shape()), Shape{2});
+
+ auto constgen_buf = constgen_data->as_f32_bufptr();
+ ASSERT_EQ(constgen_buf->at({0}), 0.1f);
+ ASSERT_EQ(constgen_buf->at({1}), -0.1f);
+
+ auto push_data = s.get_output(1);
+ ASSERT_NE(push_data, nullptr);
+ ASSERT_EQ(*(push_data->shape()), Shape{2});
+
+ auto push_buf = push_data->as_f32_bufptr();
+ ASSERT_EQ(push_buf->at({0}), 0.1f);
+ ASSERT_EQ(push_buf->at({1}), 0.0f);
+}
+
+// Below here is internal test for locomotiv, i.e. not public usage of locomotiv
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+TEST(Session, dtor)
+{
+ auto g = loco::make_graph();
+
+ // Pull node
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->rank(1);
+ pull->dim(0) = 1;
+
+ // Input
+ auto input = g->inputs()->create();
+ loco::link(input, pull);
+
+ {
+ locomotiv::Session s(g.get());
+
+ auto buf = make_buffer<float, LexicalLayout>(Shape{1});
+ auto data = locomotiv::make_data(buf);
+
+ s.set_input(0, std::move(data));
+
+ auto data_annotated = locomotiv::annot_data(pull);
+ ASSERT_EQ(data_annotated, nullptr);
+ auto user_data_annotated = locomotiv::user_data(pull);
+ ASSERT_NE(user_data_annotated, nullptr);
+ auto domain_annotated = locomotiv::annot_domain(pull);
+ ASSERT_EQ(domain_annotated, loco::Domain::Unknown);
+ }
+
+ auto data_annotated = locomotiv::annot_data(pull);
+ ASSERT_EQ(data_annotated, nullptr);
+ auto user_data_annotated = locomotiv::user_data(pull);
+ ASSERT_EQ(user_data_annotated, nullptr);
+ auto domain_annotated = locomotiv::annot_domain(pull);
+ ASSERT_EQ(domain_annotated, loco::Domain::Unknown);
+}
diff --git a/compiler/locomotiv/src/UserData.cpp b/compiler/locomotiv/src/UserData.cpp
new file mode 100644
index 000000000..b658ada9b
--- /dev/null
+++ b/compiler/locomotiv/src/UserData.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "UserData.h"
+
+#include <stdex/Memory.h>
+
+#include <cassert>
+
+namespace
+{
+
+class UserDataAnnotation final : public loco::NodeAnnotation
+{
+public:
+ UserDataAnnotation(std::unique_ptr<locomotiv::NodeData> &&data) : _data{std::move(data)}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const locomotiv::NodeData *data(void) const { return _data.get(); }
+
+private:
+ std::unique_ptr<locomotiv::NodeData> _data;
+};
+
+} // namespace
+
+namespace locomotiv
+{
+
+const NodeData *user_data(const loco::Node *node)
+{
+ if (auto annot = node->annot<UserDataAnnotation>())
+ {
+ return annot->data();
+ }
+
+ return nullptr;
+}
+
+void user_data(loco::Node *node, std::unique_ptr<NodeData> &&data)
+{
+ node->annot(stdex::make_unique<UserDataAnnotation>(std::move(data)));
+}
+
+void erase_user_data(loco::Node *node) { node->annot<UserDataAnnotation>(nullptr); }
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/UserData.h b/compiler/locomotiv/src/UserData.h
new file mode 100644
index 000000000..661d02140
--- /dev/null
+++ b/compiler/locomotiv/src/UserData.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _LOCOMOTIV_USERDATA_H_
+#define _LOCOMOTIV_USERDATA_H_
+
+#include "locomotiv/NodeData.h"
+
+namespace locomotiv
+{
+
+const NodeData *user_data(const loco::Node *node);
+void user_data(loco::Node *node, std::unique_ptr<NodeData> &&data);
+void erase_user_data(loco::Node *node);
+
+} // namespace locomotiv
+
+#endif // _LOCOMOTIV_USERDATA_H_
diff --git a/compiler/locomotiv/src/Validation.h b/compiler/locomotiv/src/Validation.h
new file mode 100644
index 000000000..59b8c40c7
--- /dev/null
+++ b/compiler/locomotiv/src/Validation.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _LOCOMOTIV_VALIDATION_H_
+#define _LOCOMOTIV_VALIDATION_H_
+
+#include <string>
+#include <stdexcept>
+
+namespace locomotiv
+{
+
+inline void validate(bool true_cond, const std::string &&exception_msg)
+{
+ if (!true_cond)
+ throw std::runtime_error(exception_msg);
+}
+
+} // namespace locomotiv
+
+#endif // _LOCOMOTIV_VALIDATION_H_
diff --git a/compiler/locop/CMakeLists.txt b/compiler/locop/CMakeLists.txt
new file mode 100644
index 000000000..107ee8be8
--- /dev/null
+++ b/compiler/locop/CMakeLists.txt
@@ -0,0 +1,27 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(locop STATIC ${SOURCES})
+set_target_properties(locop PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(locop PUBLIC include)
+target_link_libraries(locop PUBLIC loco)
+# Let's apply nncc common compile options
+#
+# NOTE This will enable strict compilation (warnings as error).
+# Please refer to the top-level CMakeLists.txt for details
+target_link_libraries(locop PRIVATE nncc_common)
+target_link_libraries(locop PUBLIC nncc_coverage)
+target_link_libraries(locop PRIVATE pp)
+target_link_libraries(locop PRIVATE stdex)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Google Test is mandatory for internal testing
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(locop_test ${TESTS})
+target_link_libraries(locop_test stdex)
+target_link_libraries(locop_test locop)
diff --git a/compiler/locop/README.md b/compiler/locop/README.md
new file mode 100644
index 000000000..81ef41ca2
--- /dev/null
+++ b/compiler/locop/README.md
@@ -0,0 +1,3 @@
+# locop
+
+_locop_ is a collection of _loco_ pretty printers.
diff --git a/compiler/locop/include/locop/CanonicalNodeSummaryBuilder.h b/compiler/locop/include/locop/CanonicalNodeSummaryBuilder.h
new file mode 100644
index 000000000..e9ced3f17
--- /dev/null
+++ b/compiler/locop/include/locop/CanonicalNodeSummaryBuilder.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOP_CANONICAL_NODE_SUMMARY_BUILDER_H__
+#define __LOCOP_CANONICAL_NODE_SUMMARY_BUILDER_H__
+
+#include "locop/NodeSummaryBuilder.h"
+
+namespace locop
+{
+
+/**
+ * @brief Built-in Node Summary Builder for Canonical Dialect
+ */
+class CanonicalNodeSummaryBuilder final : public NodeSummaryBuilder
+{
+public:
+ CanonicalNodeSummaryBuilder(const SymbolTable *tbl) : _tbl{tbl}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool build(const loco::Node *node, locop::NodeSummary &out) const final;
+
+private:
+ const SymbolTable *_tbl;
+};
+
+} // namespace locop
+
+#endif // __LOCOP_CANONICAL_NODE_SUMMARY_BUILDER_H__
diff --git a/compiler/locop/include/locop/FormattedGraph.h b/compiler/locop/include/locop/FormattedGraph.h
new file mode 100644
index 000000000..0805c0e39
--- /dev/null
+++ b/compiler/locop/include/locop/FormattedGraph.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOP_FORMATTED_GRAPH_H__
+#define __LOCOP_FORMATTED_GRAPH_H__
+
+#include "locop/SymbolTable.h"
+#include "locop/NodeSummary.h"
+#include "locop/NodeSummaryBuilder.h"
+// TODO Remove this redundant include
+#include "locop/CanonicalNodeSummaryBuilder.h"
+
+#include <loco.h>
+
+#include <memory>
+#include <ostream>
+#include <string>
+#include <vector>
+
+namespace locop
+{
+
+struct FormattedGraph
+{
+ virtual ~FormattedGraph() = default;
+
+ virtual void dump(std::ostream &os) const = 0;
+};
+
+std::ostream &operator<<(std::ostream &, const FormattedGraph &);
+
+enum Formatter
+{
+ LinearV1,
+ // TO BE ADDED
+};
+
+template <Formatter F> class FormattedGraphImpl;
+
+template <> class FormattedGraphImpl<Formatter::LinearV1> final : public FormattedGraph
+{
+public:
+ FormattedGraphImpl(loco::Graph *graph) : _graph{graph} {}
+
+public:
+ void dump(std::ostream &os) const final;
+
+public:
+ FormattedGraphImpl<Formatter::LinearV1> &with(std::unique_ptr<NodeSummaryBuilderFactory> &&f)
+ {
+ _factory = std::move(f);
+ return (*this);
+ }
+
+private:
+ loco::Graph *_graph;
+
+ /**
+ * @brief User-provided NodeSummaryBuilderFactory
+ */
+ std::unique_ptr<NodeSummaryBuilderFactory> _factory = nullptr;
+};
+
+template <Formatter F> FormattedGraphImpl<F> fmt(loco::Graph *g)
+{
+ return FormattedGraphImpl<F>{g};
+}
+
+template <Formatter F> FormattedGraphImpl<F> fmt(const std::unique_ptr<loco::Graph> &g)
+{
+ return fmt<F>(g.get());
+}
+
+} // namespace locop
+
+#endif // __LOCOP_FORMATTED_GRAPH_H__
diff --git a/compiler/locop/include/locop/FormattedTensorShape.h b/compiler/locop/include/locop/FormattedTensorShape.h
new file mode 100644
index 000000000..25621d6c3
--- /dev/null
+++ b/compiler/locop/include/locop/FormattedTensorShape.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOP_FORMATTED_TENSOR_SHAPE_H__
+#define __LOCOP_FORMATTED_TENSOR_SHAPE_H__
+
+#include "locop/Interfaces.h"
+
+#include <loco/IR/TensorShape.h>
+
+namespace locop
+{
+
+enum class TensorShapeFormat
+{
+ // D_0 x D_1 x ... D_N
+ Plain,
+ // [ D_0 x D_1 x D_2 x ... ]
+ Bracket,
+};
+
+template <TensorShapeFormat Format> class FormattedTensorShape;
+
+template <>
+class FormattedTensorShape<TensorShapeFormat::Plain> final : public Spec<Interface::Formatted>
+{
+public:
+ FormattedTensorShape(const loco::TensorShape *ptr) : _ptr{ptr}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void dump(std::ostream &os) const final;
+
+private:
+ const loco::TensorShape *_ptr = nullptr;
+};
+
+template <>
+class FormattedTensorShape<TensorShapeFormat::Bracket> final : public Spec<Interface::Formatted>
+{
+public:
+ FormattedTensorShape(const loco::TensorShape *ptr) : _ptr{ptr}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void dump(std::ostream &os) const final;
+
+private:
+ const loco::TensorShape *_ptr = nullptr;
+};
+
+template <TensorShapeFormat F> FormattedTensorShape<F> fmt(loco::TensorShape *ptr)
+{
+ return FormattedTensorShape<F>{ptr};
+}
+
+} // namespace locop
+
+#endif // __LOCOP_FORMATTED_TENSOR_SHAPE_H__
diff --git a/compiler/locop/include/locop/GenericNodeSummaryBuilder.h b/compiler/locop/include/locop/GenericNodeSummaryBuilder.h
new file mode 100644
index 000000000..cdfe45a2b
--- /dev/null
+++ b/compiler/locop/include/locop/GenericNodeSummaryBuilder.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOP_GENERIC_NODE_SUMMARY_BUILDER_H__
+#define __LOCOP_GENERIC_NODE_SUMMARY_BUILDER_H__
+
+#include "locop/NodeSummaryBuilder.h"
+
+namespace locop
+{
+
+/**
+ * @brief Dialect-agnostic Node Summary Builder
+ */
+class GenericNodeSummaryBuilder final : public NodeSummaryBuilder
+{
+public:
+ GenericNodeSummaryBuilder(const SymbolTable *tbl) : _tbl{tbl}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool build(const loco::Node *node, locop::NodeSummary &out) const final;
+
+private:
+ const SymbolTable *_tbl;
+};
+
+} // namespace locop
+
+#endif // __LOCOP_GENERIC_NODE_SUMMARY_BUILDER_H__
diff --git a/compiler/locop/include/locop/Interfaces.h b/compiler/locop/include/locop/Interfaces.h
new file mode 100644
index 000000000..0b4974d0f
--- /dev/null
+++ b/compiler/locop/include/locop/Interfaces.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOP_INTERFACES_H__
+#define __LOCOP_INTERFACES_H__
+
+#include <ostream>
+
+namespace locop
+{
+
+enum class Interface
+{
+ Formatted,
+};
+
+template <Interface I> struct Spec;
+
+template <> struct Spec<Interface::Formatted>
+{
+ virtual ~Spec() = default;
+
+ virtual void dump(std::ostream &os) const = 0;
+};
+
+std::ostream &operator<<(std::ostream &, const Spec<Interface::Formatted> &);
+
+} // namespace locop
+
+#endif // __LOCOP_INTERFACES_H__
diff --git a/compiler/locop/include/locop/NodeSummary.h b/compiler/locop/include/locop/NodeSummary.h
new file mode 100644
index 000000000..59fe66357
--- /dev/null
+++ b/compiler/locop/include/locop/NodeSummary.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCO_NODE_SUMMARY_H__
+#define __LOCO_NODE_SUMMARY_H__
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace locop
+{
+
+using OpName = std::string;
+using ArgName = std::string;
+using ArgValue = std::string;
+using ArgElem = std::pair<ArgName, ArgValue>;
+
+class ArgDesc
+{
+public:
+ ArgDesc() = default;
+
+public:
+ /// @brief The number of presented arguments
+ uint32_t count(void) const { return _args.size(); }
+
+ const ArgElem &at(uint32_t n) const { return _args.at(n); }
+ void append(const ArgName &name, const ArgValue &value) { _args.emplace_back(name, value); }
+
+private:
+ std::vector<ArgElem> _args;
+};
+
+struct NodeDesc
+{
+public:
+ /**
+ * @brief Multi-line comments
+ */
+ class Comments final
+ {
+ public:
+ Comments() = default;
+
+ public:
+ uint32_t count(void) const { return _lines.size(); }
+ const std::string &at(uint32_t n) const { return _lines.at(n); }
+ void append(const std::string &s);
+
+ private:
+ std::vector<std::string> _lines;
+ };
+
+public:
+ enum class State
+ {
+ // All the node descriptions are "Invalid" at the beginning.
+ //
+ // Any valid node description SHOULD NOT be at this state.
+ Invalid,
+ // This state means that the producer is **NOT** confident about the information that
+ // it generates.
+ //
+ // There may be some missing information.
+ PartiallyKnown,
+ // This state means that the producer is confident about the information that it
+ // generates.
+ Complete,
+ };
+
+public:
+ NodeDesc() = default;
+ NodeDesc(const OpName &opname) { this->opname(opname); }
+
+public:
+ const OpName &opname(void) const;
+ void opname(const OpName &value);
+
+ const ArgDesc &args(void) const { return _args; }
+ ArgDesc &args(void) { return _args; }
+
+ const Comments &comments(void) const { return _comments; }
+ Comments &comments(void) { return _comments; }
+
+ const State &state(void) const { return _state; }
+ void state(const State &s) { _state = s; }
+
+private:
+ std::unique_ptr<OpName> _name = nullptr;
+ ArgDesc _args;
+ Comments _comments;
+ State _state = State::Invalid;
+};
+
+using NodeSummary = NodeDesc;
+
+} // namespace locop
+
+#endif // __LOCO_NODE_SUMMARY_H__
diff --git a/compiler/locop/include/locop/NodeSummaryBuilder.h b/compiler/locop/include/locop/NodeSummaryBuilder.h
new file mode 100644
index 000000000..b84bc71cd
--- /dev/null
+++ b/compiler/locop/include/locop/NodeSummaryBuilder.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOP_NODE_SUMMARY_BUILDER_H__
+#define __LOCOP_NODE_SUMMARY_BUILDER_H__
+
+#include "locop/SymbolTable.h"
+#include "locop/NodeSummary.h"
+
+#include <loco.h>
+
+namespace locop
+{
+
+/**
+ * @brief Build a summary from loco Node
+ */
+struct NodeSummaryBuilder
+{
+ virtual ~NodeSummaryBuilder() = default;
+
+ virtual bool build(const loco::Node *, NodeSummary &) const = 0;
+};
+
+struct NodeSummaryBuilderFactory
+{
+ virtual ~NodeSummaryBuilderFactory() = default;
+
+ virtual std::unique_ptr<NodeSummaryBuilder> create(const SymbolTable *) const = 0;
+};
+
+} // namespace locop
+
+#endif // __LOCOP_NODE_SUMMARY_BUILDER_H__
diff --git a/compiler/locop/include/locop/SymbolTable.h b/compiler/locop/include/locop/SymbolTable.h
new file mode 100644
index 000000000..ee9fc78e2
--- /dev/null
+++ b/compiler/locop/include/locop/SymbolTable.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOCOP_SYMBOL_TABLE_H__
+#define __LOCOP_SYMBOL_TABLE_H__
+
+#include <loco.h>
+
+#include <string>
+
+namespace locop
+{
+
+/**
+ * @brief Symbol Table Interface
+ *
+ * Symbol Table gives a name for each node.
+ */
+struct SymbolTable
+{
+ virtual ~SymbolTable() = default;
+
+ virtual std::string lookup(const loco::Node *) const = 0;
+};
+
+} // namespace locop
+
+#endif // __LOCOP_SYMBOL_TABLE_H__
diff --git a/compiler/locop/src/CanonicalNodeSummaryBuilder.cpp b/compiler/locop/src/CanonicalNodeSummaryBuilder.cpp
new file mode 100644
index 000000000..b962f490b
--- /dev/null
+++ b/compiler/locop/src/CanonicalNodeSummaryBuilder.cpp
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locop/CanonicalNodeSummaryBuilder.h"
+
+#include "locop/FormattedTensorShape.h"
+
+#include <loco/IR/CanonicalOpcode.h>
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/IR/CanonicalNodeVisitor.h>
+#include <loco/IR/CanonicalNodeImpl.h>
+
+#include <pp/Format.h>
+
+#include <stdex/Memory.h>
+
+#include <map>
+#include <set>
+
+#include <cassert>
+
+using locop::SymbolTable;
+
+namespace
+{
+
+// TODO Move this into loco
+loco::TensorShape tensor_shape(const loco::NodeMixin<loco::NodeTrait::TensorShape> *m)
+{
+ loco::TensorShape res;
+
+ res.rank(m->rank());
+
+ for (uint32_t axis = 0; axis < m->rank(); ++axis)
+ {
+ res.dim(axis) = m->dim(axis);
+ }
+
+ return res;
+}
+
+using PrettyTensorShape = locop::FormattedTensorShape<locop::TensorShapeFormat::Bracket>;
+
+inline PrettyTensorShape pretty(const loco::TensorShape &shape)
+{
+ return PrettyTensorShape{&shape};
+}
+
+} // namespace
+
+namespace
+{
+
+/**
+ * @brief Return the opname as "<dialect>.<op>"
+ */
+std::string opname(const loco::Node *node)
+{
+ if (node->dialect() == loco::CanonicalDialect::get())
+ {
+ auto canonical_node = dynamic_cast<const loco::CanonicalNode *>(node);
+
+ assert(canonical_node != nullptr);
+
+ switch (canonical_node->opcode())
+ {
+#define CANONICAL_NODE(OPCODE, CLASS) \
+ case loco::CanonicalOpcode::OPCODE: \
+ return "canonical." #OPCODE;
+#include "loco/IR/CanonicalNodes.lst"
+#undef CANONICAL_NODE
+ default:
+ break;
+ };
+
+ return "canonical."
+ "Invalid";
+ }
+
+ return "unknown."
+ "Unknown";
+}
+
+struct NodeDesc : public locop::NodeDesc
+{
+public:
+ NodeDesc() = default;
+ NodeDesc(const locop::OpName &opname) : locop::NodeDesc{opname}
+ {
+ // DO NOTHING
+ }
+
+public:
+ // DEPRECATED
+ const locop::OpName &name(void) const { return opname(); }
+
+ // DEPRECATED
+ uint32_t arg_size(void) const { return args().count(); }
+ // DEPRECATED
+ const locop::ArgElem &arg(uint32_t n) const { return args().at(n); }
+ // DEPRECATED
+ void arg(const locop::ArgName &name, const locop::ArgValue &value) { args().append(name, value); }
+};
+
+NodeDesc default_node_desc(const SymbolTable &tbl, const loco::Node *node)
+{
+ NodeDesc res{opname(node)};
+
+ for (uint32_t n = 0; n < node->arity(); ++n)
+ {
+ res.arg(std::string{"arg"} + std::to_string(n), tbl.lookup(node->arg(n)));
+ }
+ res.state(NodeDesc::State::PartiallyKnown);
+
+ return res;
+}
+
+class CanonicalNodeDescBuilder final : public loco::CanonicalNodeVisitor<NodeDesc>
+{
+public:
+ CanonicalNodeDescBuilder(const SymbolTable *symtbl) : _symtbl{symtbl}
+ {
+ // DO NOTHING
+ }
+
+private:
+ std::string nodename(const loco::Node *node) const { return _symtbl->lookup(node); }
+
+public:
+ // TODO Build a node description for each canonical node
+ NodeDesc visit(const loco::Push *node) final
+ {
+ NodeDesc res{opname(node)};
+
+ res.arg("index", node->indexed() ? pp::fmt(node->index()) : pp::fmt('?'));
+ res.arg("from", nodename(node->from()));
+ res.state(NodeDesc::State::Complete);
+
+ return res;
+ }
+
+ NodeDesc visit(const loco::Pull *node) final
+ {
+ NodeDesc res{opname(node)};
+
+ res.arg("index", node->indexed() ? pp::fmt(node->index()) : pp::fmt('?'));
+ res.state(NodeDesc::State::Complete);
+
+ return res;
+ }
+
+ NodeDesc visit(const loco::Forward *node) final
+ {
+ NodeDesc res{opname(node)};
+
+ res.arg("input", nodename(node->input()));
+ res.state(NodeDesc::State::Complete);
+
+ return res;
+ }
+
+ NodeDesc visit(const loco::ConstGen *node) final
+ {
+ NodeDesc res{opname(node)};
+
+ // TODO Print data type
+ res.arg("shape", pp::fmt(pretty(tensor_shape(node))));
+ res.state(NodeDesc::State::PartiallyKnown);
+
+ return res;
+ }
+
+ NodeDesc visit(const loco::TensorConcat *node) final
+ {
+ NodeDesc res{opname(node)};
+
+ res.arg("lhs", nodename(node->lhs()));
+ res.arg("rhs", nodename(node->rhs()));
+ res.arg("axis", pp::fmt(node->axis()));
+ res.state(NodeDesc::State::Complete);
+
+ return res;
+ }
+
+ NodeDesc visit(const loco::EltwiseAdd *node) final
+ {
+ NodeDesc res{opname(node)};
+
+ res.arg("lhs", nodename(node->lhs()));
+ res.arg("rhs", nodename(node->rhs()));
+ res.state(NodeDesc::State::Complete);
+
+ return res;
+ }
+
+ NodeDesc visit(const loco::EltwiseMul *node) final
+ {
+ NodeDesc res{opname(node)};
+
+ res.arg("lhs", nodename(node->lhs()));
+ res.arg("rhs", nodename(node->rhs()));
+ res.state(NodeDesc::State::Complete);
+
+ return res;
+ }
+
+ NodeDesc visit(const loco::TensorReduce *node) final
+ {
+ NodeDesc res{opname(node)};
+
+ // TODO Print TensorAxisSet
+ res.arg("input", nodename(node->input()));
+ res.arg("func", pp::fmt((int32_t)node->func()));
+
+ res.state(NodeDesc::State::PartiallyKnown);
+
+ return res;
+ }
+
+ NodeDesc visit(const loco::Reshape<loco::ReshapeType::Fixed> *node) final
+ {
+ NodeDesc res{opname(node)};
+
+ res.arg("input", nodename(node->input()));
+ res.arg("shape", pp::fmt(pretty(tensor_shape(node))));
+ res.state(NodeDesc::State::Complete);
+
+ return res;
+ }
+
+ NodeDesc visit(const loco::Tanh *node) final
+ {
+ NodeDesc res{opname(node)};
+
+ res.arg("input", nodename(node->input()));
+ res.state(NodeDesc::State::Complete);
+
+ return res;
+ }
+
+ NodeDesc visit(const loco::TensorSoftmax *node) final
+ {
+ NodeDesc res{opname(node)};
+
+ res.arg("input", nodename(node->input()));
+ res.arg("axis", pp::fmt(node->axis()));
+ res.state(NodeDesc::State::Complete);
+
+ return res;
+ }
+
+public:
+ NodeDesc visit(const loco::Node *node) final { return default_node_desc(*_symtbl, node); }
+
+private:
+ const SymbolTable *_symtbl;
+};
+
+NodeDesc canonical_node_desc(const SymbolTable &tbl, const loco::CanonicalNode *canonical_node)
+{
+ CanonicalNodeDescBuilder builder{&tbl};
+ return canonical_node->accept(&builder);
+}
+
+} // namespace
+
+namespace locop
+{
+
+bool CanonicalNodeSummaryBuilder::build(const loco::Node *node, locop::NodeSummary &out) const
+{
+ // Skip if a given node does not belong to loco.canonical
+ if (node->dialect() != loco::CanonicalDialect::get())
+ {
+ return false;
+ }
+
+ auto canonical_node = dynamic_cast<const loco::CanonicalNode *>(node);
+ assert(canonical_node != nullptr);
+ out = canonical_node_desc(*_tbl, canonical_node);
+ return true;
+}
+
+} // namespace locop
diff --git a/compiler/locop/src/ExampleGraph.h b/compiler/locop/src/ExampleGraph.h
new file mode 100644
index 000000000..76813bcd8
--- /dev/null
+++ b/compiler/locop/src/ExampleGraph.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __EXAMPLE_GRAPH_H__
+#define __EXAMPLE_GRAPH_H__
+
+#include <loco.h>
+
+#include <stdex/Memory.h>
+
+namespace
+{
+
+enum GraphCode
+{
+ PullPush, /* Pull - Push network */
+};
+
+template <GraphCode Code> struct Bundle;
+template <GraphCode Code> std::unique_ptr<Bundle<Code>> make_bundle(void);
+
+template <> struct Bundle<PullPush>
+{
+ std::unique_ptr<loco::Graph> g;
+ loco::Pull *pull;
+ loco::Push *push;
+
+ loco::Graph *graph(void) { return g.get(); }
+};
+
+template <> std::unique_ptr<Bundle<PullPush>> make_bundle(void)
+{
+ auto g = loco::make_graph();
+
+ auto pull = g->nodes()->create<loco::Pull>();
+
+ pull->rank(2);
+ pull->dim(0) = loco::make_dimension(); // Mark dim 0 as unknown
+ pull->dim(1) = 4;
+
+ auto push = g->nodes()->create<loco::Push>();
+
+ push->from(pull);
+
+ auto res = stdex::make_unique<Bundle<PullPush>>();
+
+ res->g = std::move(g);
+ res->pull = pull;
+ res->push = push;
+
+ return std::move(res);
+}
+
+} // namespace
+
+#endif // __EXAMPLE_GRAPH_H__
diff --git a/compiler/locop/src/FormattedGraph.cpp b/compiler/locop/src/FormattedGraph.cpp
new file mode 100644
index 000000000..84de1e888
--- /dev/null
+++ b/compiler/locop/src/FormattedGraph.cpp
@@ -0,0 +1,390 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locop/FormattedGraph.h"
+#include "locop/FormattedTensorShape.h"
+#include "locop/GenericNodeSummaryBuilder.h"
+
+#include <loco/Service/TypeInference.h>
+#include <loco/Service/ShapeInference.h>
+
+#include <pp/Format.h>
+
+#include <stdex/Memory.h>
+
+#include <map>
+#include <set>
+
+#include <cassert>
+
+using locop::SymbolTable;
+
+namespace
+{
+
+std::string str(const loco::DataType &dtype)
+{
+ switch (dtype)
+ {
+ case loco::DataType::Unknown:
+ return "Unknown";
+
+ case loco::DataType::U8:
+ return "U8";
+ case loco::DataType::U16:
+ return "U16";
+ case loco::DataType::U32:
+ return "U32";
+ case loco::DataType::U64:
+ return "U64";
+
+ case loco::DataType::S8:
+ return "S8";
+ case loco::DataType::S16:
+ return "S16";
+ case loco::DataType::S32:
+ return "S32";
+ case loco::DataType::S64:
+ return "S64";
+
+ case loco::DataType::FLOAT16:
+ return "FLOAT16";
+ case loco::DataType::FLOAT32:
+ return "FLOAT32";
+ case loco::DataType::FLOAT64:
+ return "FLOAT64";
+
+ default:
+ break;
+ };
+
+ throw std::invalid_argument{"dtype"};
+}
+
+std::string str(const loco::Domain &domain)
+{
+ // TODO Generate!
+ switch (domain)
+ {
+ case loco::Domain::Unknown:
+ return "Unknown";
+ case loco::Domain::Tensor:
+ return "Tensor";
+ case loco::Domain::Feature:
+ return "Feature";
+ case loco::Domain::Filter:
+ return "Filter";
+ case loco::Domain::DepthwiseFilter:
+ return "DWFilter";
+ case loco::Domain::Bias:
+ return "Bias";
+ default:
+ break;
+ }
+
+ throw std::invalid_argument{"domain"};
+}
+
+std::string str(const loco::NodeShape &node_shape)
+{
+ using namespace locop;
+
+ switch (node_shape.domain())
+ {
+ case loco::Domain::Tensor:
+ {
+ auto tensor_shape = node_shape.as<loco::TensorShape>();
+ return pp::fmt(locop::fmt<TensorShapeFormat::Plain>(&tensor_shape));
+ }
+ // TODO Show details
+ case loco::Domain::Feature:
+ case loco::Domain::Filter:
+ case loco::Domain::DepthwiseFilter:
+ case loco::Domain::Bias:
+ return "...";
+
+ default:
+ break;
+ }
+
+ throw std::invalid_argument{"domain"};
+}
+
+// TODO Use locop::fmt<TensorShapeFormat ...>
+locop::FormattedTensorShape<locop::TensorShapeFormat::Bracket>
+formatted_tensor_shape(const loco::TensorShape *ptr)
+{
+ return locop::FormattedTensorShape<locop::TensorShapeFormat::Bracket>{ptr};
+}
+
+} // namespace
+
+namespace
+{
+
+struct NodeDesc : public locop::NodeDesc
+{
+public:
+ NodeDesc() = default;
+ NodeDesc(const locop::OpName &opname) : locop::NodeDesc{opname}
+ {
+ // DO NOTHING
+ }
+
+public:
+ // DEPRECATED
+ const locop::OpName &name(void) const { return opname(); }
+
+ // DEPRECATED
+ uint32_t arg_size(void) const { return args().count(); }
+ // DEPRECATED
+ const locop::ArgElem &arg(uint32_t n) const { return args().at(n); }
+ // DEPRECATED
+ void arg(const locop::ArgName &name, const locop::ArgValue &value) { args().append(name, value); }
+};
+
+} // namespace
+
+// TODO Remove this workaround
+namespace locop
+{
+
+std::ostream &operator<<(std::ostream &os, const NodeDesc &d)
+{
+ assert(d.state() != NodeDesc::State::Invalid);
+
+ std::vector<std::string> values;
+
+ for (uint32_t n = 0; n < d.args().count(); ++n)
+ {
+ values.emplace_back(d.args().at(n).first + ": " + d.args().at(n).second);
+ }
+
+ if (d.state() == NodeDesc::State::PartiallyKnown)
+ {
+ values.emplace_back("...");
+ }
+
+ os << d.opname();
+ os << "(";
+ if (values.size() > 0)
+ {
+ os << values.at(0);
+ for (uint32_t n = 1; n < values.size(); ++n)
+ {
+ os << ", " << values.at(n);
+ }
+ }
+ os << ")";
+
+ return os;
+}
+
+} // namespace locop
+
+namespace locop
+{
+
+std::ostream &operator<<(std::ostream &os, const FormattedGraph &fmt)
+{
+ fmt.dump(os);
+ return os;
+}
+
+} // namespace locop
+
+namespace locop
+{
+
+void FormattedGraphImpl<Formatter::LinearV1>::dump(std::ostream &os) const
+{
+ struct SymbolTableImpl final : public SymbolTable
+ {
+ std::string lookup(const loco::Node *node) const final
+ {
+ if (node == nullptr)
+ {
+ return "(null)";
+ }
+
+ return _content.at(node);
+ }
+
+ std::map<const loco::Node *, std::string> _content;
+ };
+
+ SymbolTableImpl symbols;
+
+ auto symbol = [&symbols](const loco::Node *node) { return symbols.lookup(node); };
+
+ for (uint32_t n = 0; n < _graph->nodes()->size(); ++n)
+ {
+ symbols._content[_graph->nodes()->at(n)] = pp::fmt("%", n);
+ }
+
+ // Find the disjoint node clusters
+ //
+ // TODO Move this implementation into loco Algorithms.h
+ std::map<loco::Node *, loco::Node *> parents;
+
+ for (auto node : loco::all_nodes(_graph))
+ {
+ parents[node] = nullptr;
+ }
+
+ for (auto node : loco::all_nodes(_graph))
+ {
+ for (uint32_t n = 0; n < node->arity(); ++n)
+ {
+ if (auto arg = node->arg(n))
+ {
+ parents[arg] = node;
+ }
+ }
+ }
+
+ auto find = [&parents](loco::Node *node) {
+ loco::Node *cur = node;
+
+ while (parents.at(cur) != nullptr)
+ {
+ cur = parents.at(cur);
+ }
+
+ return cur;
+ };
+
+ std::set<loco::Node *> roots;
+
+ for (auto node : loco::all_nodes(_graph))
+ {
+ roots.insert(find(node));
+ }
+
+ std::map<loco::Node *, std::set<loco::Node *>> clusters;
+
+ // Create clusters
+ for (auto root : roots)
+ {
+ clusters[root] = std::set<loco::Node *>{};
+ }
+
+ for (auto node : loco::all_nodes(_graph))
+ {
+ clusters.at(find(node)).insert(node);
+ }
+
+ std::unique_ptr<locop::NodeSummaryBuilder> node_summary_builder;
+
+ if (_factory)
+ {
+ // Use User-defined NodeSummaryBuilder if NodeSummaryBuilderFactory is present
+ node_summary_builder = _factory->create(&symbols);
+ }
+ else
+ {
+ // Use Built-in NodeSummaryBuilder otherwise
+ node_summary_builder = stdex::make_unique<GenericNodeSummaryBuilder>(&symbols);
+ }
+
+ // Print Graph Input(s)
+ for (uint32_t n = 0; n < _graph->inputs()->size(); ++n)
+ {
+ auto input = _graph->inputs()->at(n);
+
+ std::string name = input->name();
+
+ std::string shape = "?";
+ if (input->shape() != nullptr)
+ {
+ shape = pp::fmt(formatted_tensor_shape(input->shape()));
+ }
+
+ // TODO Print dtype
+ os << pp::fmt("In #", n, " { name: ", name, ", shape: ", shape, " }") << std::endl;
+ }
+
+ // Print Graph Output(s)
+ for (uint32_t n = 0; n < _graph->outputs()->size(); ++n)
+ {
+ auto output = _graph->outputs()->at(n);
+
+ std::string name = output->name();
+
+ std::string shape = "?";
+ if (output->shape() != nullptr)
+ {
+ shape = pp::fmt(formatted_tensor_shape(output->shape()));
+ }
+
+ // TODO Print dtype
+ os << pp::fmt("Out #", n, " { name: ", name, ", shape: ", shape, " }") << std::endl;
+ }
+
+ if (_graph->inputs()->size() + _graph->outputs()->size() != 0)
+ {
+ os << std::endl;
+ }
+
+ for (auto it = clusters.begin(); it != clusters.end(); ++it)
+ {
+ std::vector<loco::Node *> cluster_outputs;
+
+ for (auto node : it->second)
+ {
+ // NOTE This is inefficient but anyway working :)
+ if (loco::succs(node).empty())
+ {
+ cluster_outputs.emplace_back(node);
+ }
+ }
+
+ for (auto node : loco::postorder_traversal(cluster_outputs))
+ {
+ locop::NodeSummary node_summary;
+
+ // Build a node summary
+ if (!node_summary_builder->build(node, node_summary))
+ {
+ throw std::runtime_error{"Fail to build a node summary"};
+ }
+
+ for (uint32_t n = 0; n < node_summary.comments().count(); ++n)
+ {
+ os << "; " << node_summary.comments().at(n) << std::endl;
+ }
+
+ os << symbol(node);
+
+ if (loco::shape_known(node))
+ {
+ auto node_shape = loco::shape_get(node);
+ os << " : " << str(node_shape.domain());
+ os << "<";
+ os << str(node_shape);
+ os << ", ";
+ // Show DataType
+ os << (loco::dtype_known(node) ? str(loco::dtype_get(node)) : std::string{"?"});
+ os << ">";
+ }
+
+ os << " = " << node_summary << std::endl;
+ }
+ os << std::endl;
+ }
+}
+
+} // namespace locop
diff --git a/compiler/locop/src/FormattedGraph.test.cpp b/compiler/locop/src/FormattedGraph.test.cpp
new file mode 100644
index 000000000..c9808d3a2
--- /dev/null
+++ b/compiler/locop/src/FormattedGraph.test.cpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locop/FormattedGraph.h"
+#include "ExampleGraph.h"
+
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+TEST(LinearV1FormatterTest, simple)
+{
+ auto bundle = make_bundle<PullPush>();
+ auto g = bundle->graph();
+
+ // TODO Validate the output (when the implementation becomes stable)
+ std::cout << locop::fmt<locop::LinearV1>(g) << std::endl;
+}
+
+TEST(LinearV1FormatterTest, user_defined_node_summary_builder)
+{
+ struct MyAnnotation final : public loco::NodeAnnotation
+ {
+ // DO NOTHING
+ };
+
+ auto bundle = make_bundle<PullPush>();
+ auto g = bundle->graph();
+ {
+ bundle->push->annot(stdex::make_unique<MyAnnotation>());
+ }
+
+ struct MyBuilder final : public locop::NodeSummaryBuilder
+ {
+ bool build(const loco::Node *node, locop::NodeSummary &s) const final
+ {
+ s.opname("my.op");
+ if (node->annot<MyAnnotation>())
+ {
+ s.comments().append("annotated");
+ }
+ s.state(locop::NodeSummary::State::PartiallyKnown);
+ return true;
+ }
+ };
+
+ struct MyFactory final : public locop::NodeSummaryBuilderFactory
+ {
+ std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *) const final
+ {
+ return stdex::make_unique<MyBuilder>();
+ }
+ };
+
+ std::cout << locop::fmt<locop::LinearV1>(g).with(stdex::make_unique<MyFactory>()) << std::endl;
+
+ // TODO Check whether MyBuilder actually sees all the nodes in a graph
+ SUCCEED();
+}
+
+// This test shows how to compose two node summary builders.
+TEST(LinearV1FormatterTest, node_summary_builder_composition)
+{
+ struct MyNode : public loco::FixedArity<0>::Mixin<loco::Node>
+ {
+ uint32_t opnum(void) const final { return 0; }
+ const loco::Dialect *dialect(void) const final { return nullptr; };
+ };
+
+ auto g = loco::make_graph();
+ {
+ auto user = g->nodes()->create<MyNode>();
+
+ auto push = g->nodes()->create<loco::Push>();
+
+ push->from(user);
+ }
+
+ // TODO Reuse MyBuilder above
+ struct MyBuilder final : public locop::NodeSummaryBuilder
+ {
+ bool build(const loco::Node *node, locop::NodeSummary &s) const final
+ {
+ s.opname("my.op");
+ s.state(locop::NodeSummary::State::PartiallyKnown);
+ return true;
+ }
+ };
+
+ class CompositeBuilder final : public locop::NodeSummaryBuilder
+ {
+ public:
+ CompositeBuilder(const locop::SymbolTable *tbl) : _tbl{tbl}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ bool build(const loco::Node *node, locop::NodeSummary &s) const final
+ {
+ if (locop::CanonicalNodeSummaryBuilder(_tbl).build(node, s))
+ {
+ return true;
+ }
+
+ if (MyBuilder().build(node, s))
+ {
+ return true;
+ }
+
+ return false;
+ }
+
+ private:
+ const locop::SymbolTable *_tbl;
+ };
+
+ struct MyFactory final : public locop::NodeSummaryBuilderFactory
+ {
+ std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *tbl) const final
+ {
+ return stdex::make_unique<CompositeBuilder>(tbl);
+ }
+ };
+
+ std::cout << locop::fmt<locop::LinearV1>(g).with(stdex::make_unique<MyFactory>()) << std::endl;
+
+ // TODO Check whether MyBuilder actually sees all the nodes in a graph
+ SUCCEED();
+}
diff --git a/compiler/locop/src/FormattedTensorShape.cpp b/compiler/locop/src/FormattedTensorShape.cpp
new file mode 100644
index 000000000..b2b6ea074
--- /dev/null
+++ b/compiler/locop/src/FormattedTensorShape.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locop/FormattedTensorShape.h"
+
+namespace loco
+{
+
+std::ostream &operator<<(std::ostream &os, const loco::Dimension &d)
+{
+ os << (d.known() ? std::to_string(d.value()) : std::string{"?"});
+ return os;
+}
+
+} // namespace
+
+namespace locop
+{
+
+void FormattedTensorShape<TensorShapeFormat::Plain>::dump(std::ostream &os) const
+{
+ if (_ptr->rank() > 0)
+ {
+ os << _ptr->dim(0);
+
+ for (uint32_t axis = 1; axis < _ptr->rank(); ++axis)
+ {
+ os << " x " << _ptr->dim(axis);
+ }
+ }
+}
+
+} // namespace locop
+
+namespace locop
+{
+
+void FormattedTensorShape<TensorShapeFormat::Bracket>::dump(std::ostream &os) const
+{
+ os << "[";
+
+ if (_ptr->rank() > 0)
+ {
+ os << " " << _ptr->dim(0);
+
+ for (uint32_t axis = 1; axis < _ptr->rank(); ++axis)
+ {
+ os << " x " << _ptr->dim(axis);
+ }
+ }
+
+ os << " ]";
+}
+
+} // namespace locop
diff --git a/compiler/locop/src/FormattedTensorShape.test.cpp b/compiler/locop/src/FormattedTensorShape.test.cpp
new file mode 100644
index 000000000..0f0017ab4
--- /dev/null
+++ b/compiler/locop/src/FormattedTensorShape.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locop/FormattedTensorShape.h"
+
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+using namespace locop;
+
+TEST(FormattedTensorShapeTest, BracketFormat)
+{
+ auto tensor_shape = stdex::make_unique<loco::TensorShape>();
+
+ tensor_shape->rank(2);
+ tensor_shape->dim(0) = 4;
+
+ std::cout << fmt<TensorShapeFormat::Bracket>(tensor_shape.get()) << std::endl;
+}
diff --git a/compiler/locop/src/GenericNodeSummaryBuilder.cpp b/compiler/locop/src/GenericNodeSummaryBuilder.cpp
new file mode 100644
index 000000000..e3bbe5aad
--- /dev/null
+++ b/compiler/locop/src/GenericNodeSummaryBuilder.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locop/GenericNodeSummaryBuilder.h"
+
+#include <pp/Format.h>
+
+namespace locop
+{
+
+bool GenericNodeSummaryBuilder::build(const loco::Node *node, locop::NodeSummary &out) const
+{
+ out.opname(pp::fmt(node->dialect(), ".op_", node->opnum()));
+
+ for (uint32_t n = 0; n < node->arity(); ++n)
+ {
+ out.args().append(pp::fmt("arg", n), _tbl->lookup(node->arg(n)));
+ }
+
+ out.state(NodeDesc::State::PartiallyKnown);
+
+ return true;
+}
+
+} // namespace locop
diff --git a/compiler/locop/src/GenericNodeSummaryBuilder.test.cpp b/compiler/locop/src/GenericNodeSummaryBuilder.test.cpp
new file mode 100644
index 000000000..d688b5490
--- /dev/null
+++ b/compiler/locop/src/GenericNodeSummaryBuilder.test.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locop/GenericNodeSummaryBuilder.h"
+#include "locop/FormattedGraph.h"
+
+#include <stdex/Memory.h>
+
+#include <stdexcept>
+
+#include <gtest/gtest.h>
+
+TEST(GenericNodeSummaryBuilderTest, simple)
+{
+ struct MockDialect final : public loco::Dialect
+ {
+ static Dialect *get(void)
+ {
+ static MockDialect d;
+ return &d;
+ }
+ };
+
+ struct MockNode : public loco::FixedArity<0>::Mixin<loco::Node>
+ {
+ const loco::Dialect *dialect(void) const final { return MockDialect::get(); };
+ uint32_t opnum(void) const final { return 0; }
+ };
+
+ struct MockFactory final : public locop::NodeSummaryBuilderFactory
+ {
+ std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *tbl) const final
+ {
+ return stdex::make_unique<locop::GenericNodeSummaryBuilder>(tbl);
+ }
+ };
+
+ auto g = loco::make_graph();
+
+ g->nodes()->create<MockNode>();
+
+ std::cout << locop::fmt<locop::LinearV1>(g).with(stdex::make_unique<MockFactory>()) << std::endl;
+
+ SUCCEED();
+}
diff --git a/compiler/locop/src/Interfaces.cpp b/compiler/locop/src/Interfaces.cpp
new file mode 100644
index 000000000..14e0211ba
--- /dev/null
+++ b/compiler/locop/src/Interfaces.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locop/Interfaces.h"
+
+namespace locop
+{
+
+std::ostream &operator<<(std::ostream &os, const Spec<Interface::Formatted> &formatted)
+{
+ formatted.dump(os);
+ return os;
+}
+
+} // namespace locop
diff --git a/compiler/locop/src/NodeSummary.cpp b/compiler/locop/src/NodeSummary.cpp
new file mode 100644
index 000000000..3f8856997
--- /dev/null
+++ b/compiler/locop/src/NodeSummary.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locop/NodeSummary.h"
+
+#include <stdex/Memory.h>
+
+#include <cassert>
+
+namespace locop
+{
+
+void NodeDesc::Comments::append(const std::string &s)
+{
+ // TODO Check whether s contains any newline character
+ _lines.emplace_back(s);
+}
+
+const std::string &NodeDesc::opname(void) const
+{
+ // _name SHOULD BE set before use
+ assert(_name != nullptr);
+ return *_name;
+}
+
+void NodeDesc::opname(const std::string &v) { _name = stdex::make_unique<std::string>(v); }
+
+} // namespace loco
diff --git a/compiler/locop/src/NodeSummaryBuilder.cpp b/compiler/locop/src/NodeSummaryBuilder.cpp
new file mode 100644
index 000000000..6610bf71f
--- /dev/null
+++ b/compiler/locop/src/NodeSummaryBuilder.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locop/NodeSummaryBuilder.h"
+
+// This file checks whether "NodeSummaryBuilder.h" is self-complete or not.
+//
+// WARNING!! Do NOT remove this file.
diff --git a/compiler/logo-core/CMakeLists.txt b/compiler/logo-core/CMakeLists.txt
new file mode 100644
index 000000000..3bc71dbd0
--- /dev/null
+++ b/compiler/logo-core/CMakeLists.txt
@@ -0,0 +1,19 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(logo_core STATIC ${SOURCES})
+set_target_properties(logo_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(logo_core PRIVATE src)
+target_include_directories(logo_core PUBLIC include)
+target_link_libraries(logo_core PUBLIC loco)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(logo_core_test ${TESTS})
+target_include_directories(logo_core_test PRIVATE src)
+target_link_libraries(logo_core_test logo_core)
diff --git a/compiler/logo-core/README.md b/compiler/logo-core/README.md
new file mode 100644
index 000000000..0dee3954b
--- /dev/null
+++ b/compiler/logo-core/README.md
@@ -0,0 +1,3 @@
+# logo-core
+
+_logo-core_ provides _loco_ General Graph Pass Core for Transformation and Optimization
diff --git a/compiler/logo-core/include/logo/Pass.h b/compiler/logo-core/include/logo/Pass.h
new file mode 100644
index 000000000..4f667f156
--- /dev/null
+++ b/compiler/logo-core/include/logo/Pass.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOGO_PASS_H__
+#define __LOGO_PASS_H__
+
+#include <loco.h>
+
+#include <string>
+
+namespace logo
+{
+
+class Pass
+{
+public:
+ virtual ~Pass() = default;
+
+public:
+ virtual const char *name(void) const { return nullptr; }
+
+public:
+ /**
+ * @brief Run the pass
+ *
+ * @return false if there was nothing changed
+ */
+ virtual bool run(loco::Graph *graph) = 0;
+};
+
+std::string pass_name(const Pass *);
+
+} // namespace logo
+
+#endif // __LOGO_PASS_H__
diff --git a/compiler/logo-core/include/logo/Phase.h b/compiler/logo-core/include/logo/Phase.h
new file mode 100644
index 000000000..d1b7ccd5f
--- /dev/null
+++ b/compiler/logo-core/include/logo/Phase.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOGO_PHASE_H__
+#define __LOGO_PHASE_H__
+
+#include <logo/Pass.h>
+
+#include <loco.h>
+
+#include <vector>
+#include <memory>
+
+namespace logo
+{
+
+// Phase is a collection of Pass(es)
+using Phase = std::vector<std::unique_ptr<Pass>>;
+
+enum class PhaseEvent
+{
+ PhaseBegin,
+ PhaseEnd,
+
+ PassBegin,
+ PassEnd,
+};
+
+template <PhaseEvent E> struct PhaseEventInfo;
+
+template <> class PhaseEventInfo<PhaseEvent::PhaseBegin>
+{
+ // Empty
+};
+
+template <> class PhaseEventInfo<PhaseEvent::PhaseEnd>
+{
+ // Empty
+};
+
+template <> class PhaseEventInfo<PhaseEvent::PassBegin>
+{
+public:
+ void pass(const Pass *pass) { _pass = pass; }
+ const Pass *pass(void) const { return _pass; }
+
+private:
+ const Pass *_pass;
+};
+
+template <> class PhaseEventInfo<PhaseEvent::PassEnd>
+{
+public:
+ void pass(const Pass *pass) { _pass = pass; }
+ const Pass *pass(void) const { return _pass; }
+
+ void changed(bool changed) { _changed = changed; }
+ bool changed(void) const { return _changed; }
+
+private:
+ const Pass *_pass;
+ bool _changed;
+};
+
+struct PhaseEventListener
+{
+ virtual ~PhaseEventListener() = default;
+
+ virtual void notify(const PhaseEventInfo<PhaseEvent::PhaseBegin> *) { return; };
+ virtual void notify(const PhaseEventInfo<PhaseEvent::PhaseEnd> *) { return; };
+ virtual void notify(const PhaseEventInfo<PhaseEvent::PassBegin> *) { return; };
+ virtual void notify(const PhaseEventInfo<PhaseEvent::PassEnd> *) { return; };
+};
+
+// TODO Will be other mix-ins for Phase Runners?
+class PhaseRunnerMixinObservable
+{
+public:
+ PhaseRunnerMixinObservable() = default;
+
+public:
+ virtual ~PhaseRunnerMixinObservable() = default;
+
+public:
+ void attach(PhaseEventListener *listener) { _listener = listener; }
+
+public:
+ void notifyPhaseBegin(void) const
+ {
+ if (_listener)
+ {
+ PhaseEventInfo<PhaseEvent::PhaseBegin> info;
+
+ _listener->notify(&info);
+ }
+ }
+
+ void notifyPhaseEnd(void) const
+ {
+ if (_listener)
+ {
+ PhaseEventInfo<PhaseEvent::PhaseEnd> info;
+
+ _listener->notify(&info);
+ }
+ }
+
+ void notifyPassBegin(Pass *pass) const
+ {
+ if (_listener)
+ {
+ PhaseEventInfo<PhaseEvent::PassBegin> info;
+
+ info.pass(pass);
+
+ _listener->notify(&info);
+ }
+ }
+
+ void notifyPassEnd(Pass *pass, bool changed) const
+ {
+ if (_listener)
+ {
+ PhaseEventInfo<PhaseEvent::PassEnd> info;
+
+ info.pass(pass);
+ info.changed(changed);
+
+ _listener->notify(&info);
+ }
+ }
+
+private:
+ PhaseEventListener *_listener = nullptr;
+};
+
+enum class PhaseStrategy
+{
+ // Run all the passes until there is no pass that makes a change
+ Saturate,
+ // Same as Saturate but will restart from the first when there is a change
+ Restart,
+};
+
+template <PhaseStrategy S> class PhaseRunner;
+
+template <> class PhaseRunner<PhaseStrategy::Saturate> final : public PhaseRunnerMixinObservable
+{
+public:
+ PhaseRunner(loco::Graph *graph) : _graph{graph}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void run(const Phase &) const;
+
+private:
+ loco::Graph *_graph;
+};
+
+template <> class PhaseRunner<PhaseStrategy::Restart> final : public PhaseRunnerMixinObservable
+{
+public:
+ PhaseRunner(loco::Graph *graph) : _graph{graph}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void run(const Phase &) const;
+
+private:
+ loco::Graph *_graph;
+};
+
+} // namespace logo
+
+#endif // __LOGO_PHASE_H__
diff --git a/compiler/logo-core/requires.cmake b/compiler/logo-core/requires.cmake
new file mode 100644
index 000000000..44f6870da
--- /dev/null
+++ b/compiler/logo-core/requires.cmake
@@ -0,0 +1 @@
+require("loco")
diff --git a/compiler/logo-core/src/Pass.cpp b/compiler/logo-core/src/Pass.cpp
new file mode 100644
index 000000000..a44010760
--- /dev/null
+++ b/compiler/logo-core/src/Pass.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/Pass.h>
+
+namespace logo
+{
+
+std::string pass_name(const Pass *t)
+{
+ if (t->name() == nullptr)
+ {
+ return "(unknown)";
+ }
+
+ return t->name();
+}
+
+} // namespace logo
diff --git a/compiler/logo-core/src/Pass.test.cpp b/compiler/logo-core/src/Pass.test.cpp
new file mode 100644
index 000000000..b6bebff62
--- /dev/null
+++ b/compiler/logo-core/src/Pass.test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/Pass.h>
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+TEST(LogoPassTests, pass_name_over_unnamed_pass)
+{
+ struct Bumblebee final : public logo::Pass
+ {
+ bool run(loco::Graph *) final { return false; }
+ };
+
+ Bumblebee bumblebee;
+
+ ASSERT_EQ(logo::pass_name(&bumblebee), "(unknown)");
+}
+
+TEST(LogoPassTests, pass_name_over_named_pass)
+{
+ struct Bumblebee final : public logo::Pass
+ {
+ const char *name(void) const final { return "Bee"; }
+ bool run(loco::Graph *) final { return false; }
+ };
+
+ Bumblebee bumblebee;
+
+ ASSERT_EQ(logo::pass_name(&bumblebee), "Bee");
+}
diff --git a/compiler/logo-core/src/Phase.cpp b/compiler/logo-core/src/Phase.cpp
new file mode 100644
index 000000000..b929a31ba
--- /dev/null
+++ b/compiler/logo-core/src/Phase.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/Phase.h>
+
+namespace logo
+{
+
+void PhaseRunner<PhaseStrategy::Saturate>::run(const Phase &phase) const
+{
+ notifyPhaseBegin();
+
+ for (bool changed = true; changed;)
+ {
+ changed = false;
+
+ for (auto &pass : phase)
+ {
+ notifyPassBegin(pass.get());
+
+ bool pass_changed = pass->run(_graph);
+ changed = changed || pass_changed;
+
+ notifyPassEnd(pass.get(), pass_changed);
+ }
+ }
+
+ notifyPhaseEnd();
+}
+
+void PhaseRunner<PhaseStrategy::Restart>::run(const Phase &phase) const
+{
+ notifyPhaseBegin();
+
+ for (bool changed = true; changed;)
+ {
+ changed = false;
+
+ for (auto &pass : phase)
+ {
+ notifyPassBegin(pass.get());
+
+ bool pass_changed = pass->run(_graph);
+ changed = changed || pass_changed;
+
+ notifyPassEnd(pass.get(), pass_changed);
+
+ if (changed)
+ {
+ break;
+ }
+ }
+ }
+
+ notifyPhaseEnd();
+}
+
+} // namespace logo
diff --git a/compiler/logo/CMakeLists.txt b/compiler/logo/CMakeLists.txt
new file mode 100644
index 000000000..399cb7586
--- /dev/null
+++ b/compiler/logo/CMakeLists.txt
@@ -0,0 +1,23 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(logo STATIC ${SOURCES})
+set_target_properties(logo PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(logo PRIVATE src)
+target_include_directories(logo PUBLIC include)
+target_link_libraries(logo PUBLIC loco)
+target_link_libraries(logo PUBLIC logo_core)
+target_link_libraries(logo PRIVATE locomotiv)
+target_link_libraries(logo PRIVATE stdex)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(logo_test ${TESTS})
+target_include_directories(logo_test PRIVATE src)
+target_link_libraries(logo_test logo)
+target_link_libraries(logo_test stdex)
diff --git a/compiler/logo/README.md b/compiler/logo/README.md
new file mode 100644
index 000000000..0cf1ba313
--- /dev/null
+++ b/compiler/logo/README.md
@@ -0,0 +1,3 @@
+# logo
+
+_logo_ provides _loco_ General Graph Passes for Transformation and Optimization
diff --git a/compiler/logo/include/logo/ConstantFoldingPass.h b/compiler/logo/include/logo/ConstantFoldingPass.h
new file mode 100644
index 000000000..99ccdc315
--- /dev/null
+++ b/compiler/logo/include/logo/ConstantFoldingPass.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOGO_CONSTANT_FOLDING_PASS_H__
+#define __LOGO_CONSTANT_FOLDING_PASS_H__
+
+#include <logo/Pass.h>
+
+#include <loco.h>
+
+namespace logo
+{
+
+/**
+ * @brief Performs constant folding optimization
+ */
+class ConstantFoldingPass : public Pass
+{
+public:
+ const char *name(void) const final { return "ConstantFoldingPass"; }
+
+public:
+ bool run(loco::Graph *graph) override;
+};
+
+} // namespace logo
+
+#endif // __LOGO_CONSTANT_FOLDING_PASS_H__
diff --git a/compiler/logo/include/logo/Passes.h b/compiler/logo/include/logo/Passes.h
new file mode 100644
index 000000000..636251e45
--- /dev/null
+++ b/compiler/logo/include/logo/Passes.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOGO_PASSES_H__
+#define __LOGO_PASSES_H__
+
+// Please keep this in alphabetical order
+
+#include <logo/ConstantFoldingPass.h>
+#include <logo/RemoveDeadNodePass.h>
+#include <logo/RemoveForwardNodePass.h>
+#include <logo/ReorderDecodePass.h>
+#include <logo/ResolveDuplicateReshapePass.h>
+#include <logo/ResolveRedundantReshapePass.h>
+#include <logo/SimplifyDomainConversionPass.h>
+
+#endif // __LOGO_PASSES_H__
diff --git a/compiler/logo/include/logo/RemoveDeadNodePass.h b/compiler/logo/include/logo/RemoveDeadNodePass.h
new file mode 100644
index 000000000..ae1c67feb
--- /dev/null
+++ b/compiler/logo/include/logo/RemoveDeadNodePass.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOGO_REMOVE_DEAD_NODE_PASS_H__
+#define __LOGO_REMOVE_DEAD_NODE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace logo
+{
+
+struct RemoveDeadNodePass final : public Pass
+{
+ const char *name(void) const final { return "RemoveDeadNodePass"; }
+
+ bool run(loco::Graph *g);
+};
+
+} // namespace logo
+
+#endif // __LOGO_REMOVE_DEAD_NODE_PASS_H__
diff --git a/compiler/logo/include/logo/RemoveForwardNodePass.h b/compiler/logo/include/logo/RemoveForwardNodePass.h
new file mode 100644
index 000000000..12437c43f
--- /dev/null
+++ b/compiler/logo/include/logo/RemoveForwardNodePass.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOGO_REMOVE_FORWARD_NODE_PASS_H__
+#define __LOGO_REMOVE_FORWARD_NODE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace logo
+{
+
+/**
+ * @brief Use the input of "Forward" node instead
+ *
+ * BEFORE:
+ * [X] -> [Forward] -> [Y]
+ *
+ * AFTER:
+ * [X] -> [Y]
+ * [Forward]
+ *
+ * NOTE This transform does not remove "Forward" node
+ */
+struct RemoveForwardNodePass final : public Pass
+{
+ const char *name(void) const final { return "RemoveForwardNodePass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace logo
+
+#endif // __LOGO_REMOVE_FORWARD_NODE_PASS_H__
diff --git a/compiler/logo/include/logo/ReorderDecodePass.h b/compiler/logo/include/logo/ReorderDecodePass.h
new file mode 100644
index 000000000..2f74c6afa
--- /dev/null
+++ b/compiler/logo/include/logo/ReorderDecodePass.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOGO_REORDER_DECODE_PASS_H__
+#define __LOGO_REORDER_DECODE_PASS_H__
+
+#include <logo/Pass.h>
+
+#include <loco.h>
+#include <loco/IR/Nodes.h>
+
+namespace logo
+{
+
+/**
+ * @brief Reorder XXXDecode -> ? as ? -> XXXDecode if possible
+ *
+ * This transformation increases the chance of domain conversion simplification.
+ */
+template <typename T> struct ReorderDecodePass;
+
+template <> struct ReorderDecodePass<loco::TensorBiasAdd> final : public Pass
+{
+ const char *name(void) const final { return "ReorderDecodePass<TensorBiasAdd> "; }
+
+ bool run(loco::Graph *g);
+};
+
+template <> struct ReorderDecodePass<loco::ReLU> final : public Pass
+{
+ const char *name(void) const final { return "ReorderDecodePass<ReLU> "; }
+
+ bool run(loco::Graph *g);
+};
+
+} // namespace logo
+
+#endif // __LOGO_REORDER_DECODE_PASS_H__
diff --git a/compiler/logo/include/logo/ResolveDuplicateReshapePass.h b/compiler/logo/include/logo/ResolveDuplicateReshapePass.h
new file mode 100644
index 000000000..7e6c67fcd
--- /dev/null
+++ b/compiler/logo/include/logo/ResolveDuplicateReshapePass.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOGO_RESOLVE_DUPLICATE_RESHAPE_PASS_H__
+#define __LOGO_RESOLVE_DUPLICATE_RESHAPE_PASS_H__
+
+#include <logo/Pass.h>
+
+#include <loco.h>
+
+namespace logo
+{
+
+/**
+ * @brief Resolve duplicated Reshape nodes in a row
+ */
+class ResolveDuplicateReshapePass final : public Pass
+{
+public:
+ const char *name(void) const final { return "ResolveDuplicateReshapePass"; }
+
+public:
+ bool run(loco::Graph *graph) override;
+};
+
+} // namespace logo
+
+#endif // __LOGO_RESOLVE_DUPLICATE_RESHAPE_PASS_H__
diff --git a/compiler/logo/include/logo/ResolveRedundantReshapePass.h b/compiler/logo/include/logo/ResolveRedundantReshapePass.h
new file mode 100644
index 000000000..3a2dc4f3d
--- /dev/null
+++ b/compiler/logo/include/logo/ResolveRedundantReshapePass.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOGO_RESOLVE_REDUNDANT_RESHAPE_PASS_H__
+#define __LOGO_RESOLVE_REDUNDANT_RESHAPE_PASS_H__
+
+#include <logo/Pass.h>
+
+#include <loco.h>
+
+namespace logo
+{
+
+/**
+ * @brief Remove redundant canonical FixedReshape
+ *
+ * @note To effectively run this transform, canonical shape inference should be
+ * done ahead
+ */
+class ResolveRedundantReshapePass final : public Pass
+{
+public:
+ const char *name(void) const final { return "ResolveRedundantReshapePass"; }
+
+public:
+ bool run(loco::Graph *graph) override;
+};
+
+} // namespace logo
+
+#endif // __LOGO_RESOLVE_REDUNDANT_RESHAPE_PASS_H__
diff --git a/compiler/logo/include/logo/SimplifyDomainConversionPass.h b/compiler/logo/include/logo/SimplifyDomainConversionPass.h
new file mode 100644
index 000000000..551806f60
--- /dev/null
+++ b/compiler/logo/include/logo/SimplifyDomainConversionPass.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOGO_SIMPLIFY_DOMAIN_CONVERSION_H__
+#define __LOGO_SIMPLIFY_DOMAIN_CONVERSION_H__
+
+#include <logo/Pass.h>
+
+namespace logo
+{
+
+/**
+ * @brief Simplify redundant domain conversion
+ *
+ * SimplifyDomainConversionPass recognizes the following patterns:
+ * - FeatureDecode followed by FeatureEncode (Feature -> Tensor -> Feature)
+ * - FeatureEncode followed by FeatureDecode (Tensor -> Feature -> Tensor)
+ * - FilterEncode followed by FilterDecode (Tensor -> Filter -> Tensor)
+ * - BiasEncode followed by BiasDecode (Tensor -> Bias -> Tensor)
+ * - DepthwiseFilterEncode followed by DepthwiseFilterDecode (Tensor -> DepthwiseFilter -> Tensor)
+ * - MatrixDecode followed by MatrixEncode (Matrix -> Tensor -> Matrix)
+ * - MatrixEncode followed by MatrixDecode (Tensor -> Matrix -> Tensor)
+ * - (TO BE ADDED)
+ */
+struct SimplifyDomainConversionPass final : public Pass
+{
+ const char *name(void) const final { return "SimplifyDomainConversionPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace logo
+
+#endif // __LOGO_SIMPLIFY_DOMAIN_CONVERSION_H__
diff --git a/compiler/logo/requires.cmake b/compiler/logo/requires.cmake
new file mode 100644
index 000000000..9a7d14788
--- /dev/null
+++ b/compiler/logo/requires.cmake
@@ -0,0 +1,4 @@
+require("loco")
+require("logo-core")
+require("locomotiv")
+require("stdex")
diff --git a/compiler/logo/src/Passes/ConstantFoldingPass.cpp b/compiler/logo/src/Passes/ConstantFoldingPass.cpp
new file mode 100644
index 000000000..e038e7140
--- /dev/null
+++ b/compiler/logo/src/Passes/ConstantFoldingPass.cpp
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/ConstantFoldingPass.h>
+
+#include <loco.h>
+#include <loco/IR/CanonicalDialect.h>
+
+#include <stdex/Memory.h>
+
+#include <locomotiv/Session.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+uint64_t num_elements(const loco::NodeMixin<loco::NodeTrait::TensorShape> &shape)
+{
+ if (shape.rank() == 0)
+ {
+ return 0;
+ }
+
+ uint64_t res = 1;
+
+ for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+ {
+ assert(shape.dim(axis).known());
+ res *= shape.dim(axis).value();
+ }
+
+ return res;
+}
+
+/// @brief For some op, constant folding should not be performed. This returns true if node is such
+/// op.
+bool skip(const loco::Node *node)
+{
+ static std::set<uint32_t> skip_op = {
+ // TODO Current implementation works for 'Tensor' domain only. Support other domains such as
+ // `Feature`, `Filter`, `Bias`, etc.
+ static_cast<uint32_t>(loco::CanonicalOpcode::FilterEncode),
+ static_cast<uint32_t>(loco::CanonicalOpcode::FeatureEncode),
+ static_cast<uint32_t>(loco::CanonicalOpcode::BiasEncode),
+ static_cast<uint32_t>(loco::CanonicalOpcode::DepthwiseFilterEncode),
+
+ // We don't perform constant folding for Push
+ static_cast<uint32_t>(loco::CanonicalOpcode::Push),
+
+ // TensorBroadcast is a good hint for optimization
+ // TODO Let this option be controlled by driver using logo
+ static_cast<uint32_t>(loco::CanonicalOpcode::TensorBroadcast),
+ };
+
+ if (node->dialect() == loco::CanonicalDialect::get())
+ {
+ if (skip_op.find(node->opnum()) != skip_op.end())
+ return true;
+ }
+
+ return false;
+}
+
+/// @brief Checks if a node is a target of constant folding transform
+bool foldable(const loco::Node *node)
+{
+ if (node->dialect() == loco::CanonicalDialect::get())
+ {
+ if (skip(node))
+ return false;
+
+ if (node->arity() == 0) // e.g., when a node is e.g, ConstGen or Pull
+ return false;
+
+ // When all args are ConstGen, let's do Constant Folding Transforms
+ for (int i = 0; i < node->arity(); i++)
+ {
+ if (node->arg(i)->opnum() != static_cast<uint32_t>(loco::CanonicalOpcode::ConstGen))
+ return false;
+ }
+
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+void fold(loco::Graph *graph, loco::Node *node)
+{
+ assert(foldable(node)); // sanity check to find a mistake when this function is reused later
+
+ // calcluate foldable node
+ locomotiv::Session sess(graph, std::vector<loco::Node *>{node});
+ sess.infer();
+ auto data = sess.get_output(0);
+
+ assert(data != nullptr);
+
+ auto shape = data->shape();
+ auto dtype = data->dtype();
+
+ // build ConstGen
+ auto new_const = graph->nodes()->create<loco::ConstGen>();
+ {
+ new_const->dtype(dtype);
+
+ new_const->rank(shape->rank());
+ for (int d = 0; d < shape->rank(); d++)
+ new_const->dim(d) = shape->dim(d);
+
+ auto count = num_elements(*new_const);
+
+ if (dtype == loco::DataType::FLOAT32)
+ {
+ new_const->size<loco::DataType::FLOAT32>(count);
+
+ auto const_buf = data->as_f32_bufptr()->base();
+ for (int x = 0; x < count; x++)
+ new_const->at<loco::DataType::FLOAT32>(x) = const_buf[x];
+ }
+ else if (dtype == loco::DataType::S32)
+ {
+ new_const->size<loco::DataType::S32>(count);
+
+ auto const_buf = data->as_s32_bufptr()->base();
+ for (int x = 0; x < count; x++)
+ new_const->at<loco::DataType::S32>(x) = const_buf[x];
+ }
+ }
+
+ // replace node with new_const
+ loco::replace(node).with(new_const);
+}
+
+} // namespace
+
+namespace logo
+{
+
+bool ConstantFoldingPass::run(loco::Graph *graph)
+{
+ auto outputs = loco::output_nodes(graph);
+
+ bool changed = false;
+ for (auto node : loco::postorder_traversal(outputs))
+ {
+ if (foldable(node))
+ {
+ fold(graph, node);
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace logo
diff --git a/compiler/logo/src/Passes/ConstantFoldingPass.test.cpp b/compiler/logo/src/Passes/ConstantFoldingPass.test.cpp
new file mode 100644
index 000000000..824027762
--- /dev/null
+++ b/compiler/logo/src/Passes/ConstantFoldingPass.test.cpp
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/ConstantFoldingPass.h>
+
+#include "TestHelper.h"
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+using namespace logo::test;
+
+namespace
+{
+
+/*
+ test case:
+ ConstGen ---- Relu ---- Push
+ (-3.14, 3.14) (0, 3.14)
+
+ after constant folding:
+ ConstGen ------Push
+ (0, 3.14)
+*/
+void create_net_const_relu(loco::Graph *graph)
+{
+ assert(graph);
+
+ auto const_node = graph->nodes()->create<loco::ConstGen>();
+ {
+ const_node->dtype(loco::DataType::FLOAT32);
+ const_node->rank(1);
+ const_node->dim(0) = 2;
+ const_node->size<loco::DataType::FLOAT32>(2);
+ const_node->at<loco::DataType::FLOAT32>(0) = -3.14f;
+ const_node->at<loco::DataType::FLOAT32>(1) = 3.14f;
+ }
+
+ auto relu_node = graph->nodes()->create<loco::ReLU>();
+ {
+ relu_node->input(const_node);
+ }
+
+ auto push_node = graph->nodes()->create<loco::Push>();
+ {
+ push_node->from(relu_node);
+ }
+
+ auto graph_output = graph->outputs()->create();
+ {
+ graph_output->name("output");
+ graph_output->dtype(loco::DataType::FLOAT32);
+ loco::link(graph_output, push_node);
+ }
+}
+
+} // namespace
+
+TEST(ConstantFolding, const_relu_to_const)
+{
+ auto graph = loco::make_graph();
+ create_net_const_relu(graph.get());
+
+ logo::ConstantFoldingPass pass;
+ while (pass.run(graph.get()) == true)
+ {
+ ;
+ }
+
+ auto push = logo::test::find_first_node_by_type<loco::Push>(graph.get());
+ auto const_gen = dynamic_cast<loco::ConstGen *>(push->from());
+ ASSERT_NE(const_gen, nullptr);
+
+ ASSERT_EQ(const_gen->size<loco::DataType::FLOAT32>(), 2);
+ ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(0), 0); // result of relu(-3.14)
+ ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(1), 3.14f);
+}
+
+namespace
+{
+
+/*
+ test case:
+ ConstGen ---- Relu ---+
+ (-1, 1) (0, 1) |
+ ConstGen ---+-- ConcatV2 ----- Push
+ (2, 3) | (0, 1, 2, 3)
+ axis(0) ---+
+
+ after constant folding:
+ ConstGen ----- Push
+ (0, 1, 2, 3)
+*/
+void create_net_const_relu_concat(loco::Graph *graph)
+{
+ assert(graph);
+
+ auto const_1_node = graph->nodes()->create<loco::ConstGen>();
+ {
+ const_1_node->dtype(loco::DataType::FLOAT32);
+ const_1_node->rank(1);
+ const_1_node->dim(0) = 2;
+ const_1_node->size<loco::DataType::FLOAT32>(2);
+ const_1_node->at<loco::DataType::FLOAT32>(0) = -1.0f;
+ const_1_node->at<loco::DataType::FLOAT32>(1) = 1.0f;
+ }
+
+ auto relu_node = graph->nodes()->create<loco::ReLU>();
+ {
+ relu_node->input(const_1_node);
+ }
+
+ auto const_2_node = graph->nodes()->create<loco::ConstGen>();
+ {
+ const_2_node->dtype(loco::DataType::FLOAT32);
+ const_2_node->rank(1);
+ const_2_node->dim(0) = 2;
+ const_2_node->size<loco::DataType::FLOAT32>(2);
+ const_2_node->at<loco::DataType::FLOAT32>(0) = 2.0f;
+ const_2_node->at<loco::DataType::FLOAT32>(1) = 3.0f;
+ }
+
+ auto concat_node = graph->nodes()->create<loco::TensorConcat>();
+ {
+ concat_node->lhs(relu_node);
+ concat_node->rhs(const_2_node);
+ concat_node->axis(0);
+ }
+
+ auto push_node = graph->nodes()->create<loco::Push>();
+ {
+ push_node->from(concat_node);
+ }
+
+ auto graph_output = graph->outputs()->create();
+ {
+ graph_output->name("output");
+ graph_output->dtype(loco::DataType::FLOAT32);
+ loco::link(graph_output, push_node);
+ }
+}
+
+} // namespace
+
+TEST(ConstantFolding, const_relu_to_concat)
+{
+ auto graph = loco::make_graph();
+ create_net_const_relu_concat(graph.get());
+
+ logo::ConstantFoldingPass pass;
+ while (pass.run(graph.get()) == true)
+ {
+ ;
+ }
+
+ auto push = logo::test::find_first_node_by_type<loco::Push>(graph.get());
+ auto const_gen = dynamic_cast<loco::ConstGen *>(push->from());
+ ASSERT_NE(const_gen, nullptr);
+
+ ASSERT_EQ(const_gen->size<loco::DataType::FLOAT32>(), 4);
+ ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(0), 0);
+ ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(1), 1);
+ ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(2), 2);
+ ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(3), 3);
+}
diff --git a/compiler/logo/src/Passes/RemoveDeadNodePass.cpp b/compiler/logo/src/Passes/RemoveDeadNodePass.cpp
new file mode 100644
index 000000000..9b6ed6ab0
--- /dev/null
+++ b/compiler/logo/src/Passes/RemoveDeadNodePass.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/RemoveDeadNodePass.h>
+
+#include <loco/IR/Algorithm.h>
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/IR/CanonicalNode.h>
+
+#include <set>
+
+namespace logo
+{
+
+bool RemoveDeadNodePass::run(loco::Graph *g)
+{
+ // Let's enumerate nodes required to compute output nodes
+ auto active_nodes = loco::active_nodes(loco::output_nodes(g));
+
+ // Find dead(= non-active) nodes
+ std::set<loco::Node *> candidates;
+
+ for (auto node : loco::all_nodes(g))
+ {
+ if (active_nodes.find(node) == active_nodes.end())
+ {
+ candidates.insert(node);
+ }
+ }
+
+ // Let's drop the references from each dead node first and then remove these dead nodes
+ //
+ // Why?
+ //
+ // Let us consider the following example:
+ // %0 = Pull(...)
+ // %1 = ConstGen(...)
+ // %2 = Forward(input: %1)
+ // %3 = Push(from: %0) <- OUTPUT
+ //
+ // Forward (%2) is dead as it does not contribute to the final result (%3). However, it
+ // refers to another dead node (%1).
+ //
+ // This example indicates that naive implementation results in dangling references.
+ //
+ // There are two possible solutions:
+ // 1. Destroy nodes in topological order
+ // 2. Drop the reference first and then destroy them
+ //
+ // The current implementation takes the latter approach for the simplicity of implementation.
+ for (auto node : candidates)
+ {
+ node->drop();
+ }
+
+ for (auto node : candidates)
+ {
+ g->nodes()->destroy(node);
+ }
+
+ return candidates.size() > 0;
+}
+
+} // namespace logo
diff --git a/compiler/logo/src/Passes/RemoveForwardNodePass.cpp b/compiler/logo/src/Passes/RemoveForwardNodePass.cpp
new file mode 100644
index 000000000..c951cfac4
--- /dev/null
+++ b/compiler/logo/src/Passes/RemoveForwardNodePass.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/RemoveForwardNodePass.h>
+
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/IR/CanonicalNode.h>
+
+#include <set>
+
+namespace logo
+{
+
+bool RemoveForwardNodePass::run(loco::Graph *g)
+{
+ struct Collector final : public loco::CanonicalNodeMutableVisitor<void>
+ {
+ void visit(loco::Forward *node) final
+ {
+ if (node->input() != nullptr)
+ {
+ candidates.insert(node);
+ }
+ }
+
+ void visit(loco::Node *) final { return; }
+
+ std::set<loco::Forward *> candidates;
+ };
+
+ Collector collector;
+
+ for (auto node : loco::all_nodes(g))
+ {
+ if (node->dialect() == loco::CanonicalDialect::get())
+ {
+ auto canonical_node = dynamic_cast<loco::CanonicalNode *>(node);
+ canonical_node->accept(&collector);
+ }
+ }
+
+ for (auto node : collector.candidates)
+ {
+ replace(node).with(node->input());
+ node->input(nullptr);
+ }
+
+ return collector.candidates.size() > 0;
+}
+
+} // namespace logo
diff --git a/compiler/logo/src/Passes/ReorderDecodePass.cpp b/compiler/logo/src/Passes/ReorderDecodePass.cpp
new file mode 100644
index 000000000..724db5780
--- /dev/null
+++ b/compiler/logo/src/Passes/ReorderDecodePass.cpp
@@ -0,0 +1,311 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/ReorderDecodePass.h>
+
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/IR/CanonicalNode.h>
+
+#include <loco/IR/Nodes.h>
+
+#include <cassert>
+#include <queue>
+
+namespace
+{
+
+bool isTensorBiasAdd(const loco::Node *node)
+{
+ return node->opnum() == static_cast<uint32_t>(loco::CanonicalOpcode::TensorBiasAdd);
+}
+
+bool isReLU(const loco::Node *node)
+{
+ return node->opnum() == static_cast<uint32_t>(loco::CanonicalOpcode::ReLU);
+}
+
+} // namespace
+
+namespace logo
+{
+
+bool ReorderDecodePass<loco::TensorBiasAdd>::run(loco::Graph *g)
+{
+ std::queue<loco::FeatureDecode *> q;
+
+ // Update queue
+ class Collector final : public loco::CanonicalNodeMutableVisitor<void>
+ {
+ public:
+ Collector(std::queue<loco::FeatureDecode *> *out) : _out{out}
+ {
+ // DO NOTHING
+ }
+
+ void visit(loco::FeatureDecode *node) final
+ {
+ if (node->input() != nullptr)
+ {
+ _out->push(node);
+ }
+ }
+
+ void visit(loco::Node *) final { return; }
+
+ private:
+ // TODO This definition should be revised to support other decode operations
+ std::queue<loco::FeatureDecode *> *_out;
+ };
+
+ Collector collector{&q};
+
+ for (auto node : loco::all_nodes(g))
+ {
+ if (node->dialect() == loco::CanonicalDialect::get())
+ {
+ auto canonical_node = dynamic_cast<loco::CanonicalNode *>(node);
+ canonical_node->accept(&collector);
+ }
+ }
+
+ bool changed = false;
+
+ while (!q.empty())
+ {
+ auto cur_decode = q.front();
+ q.pop();
+
+ // Collector IS EXPECTED TO guarantee this property
+ assert(cur_decode->input() != nullptr);
+
+ for (auto u : loco::succs(cur_decode))
+ {
+ /**
+ * Let us consider the following graph:
+ *
+ * A ---> FeatureDecode(1) ---> ReLU(2)
+ *
+ * ReorderDecodeTransform rewrites this graph as follows:
+ *
+ * A -+-> FeatureDecode(1) ---> ReLU(2)
+ * |
+ * +-> ReLU(2') ---> FeatureDecode(1')
+ *
+ * Let us feed this updates graph to ReorderDecodeTransform.
+ *
+ * The naive implementation will create a new ReLU->FeatureDecode
+ * chain again, and results in unbounded graph blow-up.
+ *
+ * A -+-> FeatureDeocde(1) ---> ReLU(2)
+ * |
+ * +-> ReLU(2') ---> FeatureDecode(1')
+ * |
+ * +-> ReLU(2'') ---> FeatureDecode(1'')
+ *
+ * This check prevents such unbounded graph blow-up.
+ */
+ if (loco::succs(u).empty())
+ {
+ continue;
+ }
+
+ // Q. Is it better to create an independent transform for this rewriting rule?
+ if (isTensorBiasAdd(u))
+ {
+ auto old_badd = dynamic_cast<loco::TensorBiasAdd *>(u);
+
+ assert(old_badd != nullptr);
+
+ /**
+ * Let us consider the following example:
+ *
+ * A -=-> FeatureDecode(1) -+-> TensorBiasAdd(2) -+-> B1
+ * | |
+ * | +-> B2
+ * | |
+ * | +-> ...
+ * |
+ * +-> ...
+ *
+ * At this point, "cur_decode" points to (1) and "u" points to (2).
+ *
+ * First rewrite the graph as follows:
+ *
+ * A -+-> FeatureBiasAdd(2') ---> FeatureDecode(1') -+-> B1
+ * | |
+ * | +-> B2
+ * | |
+ * | +-> ...
+ * |
+ * +-> FeatureDecode(1) -+-> TensorBiasAdd(2) ; NO USE
+ * |
+ * +-> ...
+ *
+ * Q. Is it safe to apply this transform without "decoder" check?
+ */
+ auto new_badd = g->nodes()->create<loco::FeatureBiasAdd>();
+ auto new_decode = g->nodes()->create<loco::FeatureDecode>();
+
+ new_badd->value(cur_decode->input());
+ new_badd->bias(old_badd->bias());
+
+ new_decode->input(new_badd);
+ new_decode->decoder(cur_decode->decoder()->clone());
+
+ loco::replace(u).with(new_decode);
+
+ // Enque FeatureDeocde(1') for the further optimization.
+ q.push(new_decode);
+
+ changed = true;
+ }
+ }
+ }
+
+ return changed;
+}
+
+bool ReorderDecodePass<loco::ReLU>::run(loco::Graph *g)
+{
+ std::queue<loco::FeatureDecode *> q;
+
+ // Update queue
+ class Collector final : public loco::CanonicalNodeMutableVisitor<void>
+ {
+ public:
+ Collector(std::queue<loco::FeatureDecode *> *out) : _out{out}
+ {
+ // DO NOTHING
+ }
+
+ void visit(loco::FeatureDecode *node) final
+ {
+ if (node->input() != nullptr)
+ {
+ _out->push(node);
+ }
+ }
+
+ void visit(loco::Node *) final { return; }
+
+ private:
+ // TODO This definition should be revised to support other decode operations
+ std::queue<loco::FeatureDecode *> *_out;
+ };
+
+ Collector collector{&q};
+
+ for (auto node : loco::all_nodes(g))
+ {
+ if (node->dialect() == loco::CanonicalDialect::get())
+ {
+ auto canonical_node = dynamic_cast<loco::CanonicalNode *>(node);
+ canonical_node->accept(&collector);
+ }
+ }
+
+ bool changed = false;
+
+ while (!q.empty())
+ {
+ auto cur_decode = q.front();
+ q.pop();
+
+ // Collector IS EXPECTED TO guarantee this property
+ assert(cur_decode->input() != nullptr);
+
+ for (auto u : loco::succs(cur_decode))
+ {
+ /**
+ * Let us consider the following graph:
+ *
+ * A ---> FeatureDecode(1) ---> ReLU(2)
+ *
+ * ReorderDecodeTransform rewrites this graph as follows:
+ *
+ * A -+-> FeatureDecode(1) ---> ReLU(2)
+ * |
+ * +-> ReLU(2') ---> FeatureDecode(1')
+ *
+ * Let us feed this updates graph to ReorderDecodeTransform.
+ *
+ * The naive implementation will create a new ReLU->FeatureDecode
+ * chain again, and results in unbounded graph blow-up.
+ *
+ * A -+-> FeatureDeocde(1) ---> ReLU(2)
+ * |
+ * +-> ReLU(2') ---> FeatureDecode(1')
+ * |
+ * +-> ReLU(2'') ---> FeatureDecode(1'')
+ *
+ * This check prevents such unbounded graph blow-up.
+ */
+ if (loco::succs(u).empty())
+ {
+ continue;
+ }
+
+ if (isReLU(u))
+ {
+ /**
+ * Let us consider the following example:
+ *
+ * A -=-> FeatureDecode(1) -+-> ReLU(2) -+-> B1
+ * | |
+ * | +-> B2
+ * | |
+ * | +-> ...
+ * |
+ * +-> ...
+ *
+ * At this point, "cur_decode" points to FeatureDecode(1) and "u" points to ReLU(2).
+ *
+ * First rewrite the graph as follows:
+ *
+ * A -+-> ReLU(2') ---> FeatureDecode(1') -+-> B1
+ * | |
+ * | +-> B2
+ * | |
+ * | +-> ...
+ * |
+ * +-> FeatureDecode -+-> ReLU(2) ; NO USE
+ * |
+ * +-> ...
+ */
+ auto new_relu = g->nodes()->create<loco::ReLU>();
+ auto new_decode = g->nodes()->create<loco::FeatureDecode>();
+
+ new_relu->input(cur_decode->input());
+
+ new_decode->input(new_relu);
+ new_decode->decoder(cur_decode->decoder()->clone());
+
+ loco::replace(u).with(new_decode);
+
+ /**
+ * Enque FeatureDeocde(1') for the further optimization.
+ */
+ q.push(new_decode);
+
+ changed = true;
+ }
+ }
+ }
+
+ return changed;
+}
+
+} // namespace logo
diff --git a/compiler/logo/src/Passes/ResolveDuplicateReshapePass.cpp b/compiler/logo/src/Passes/ResolveDuplicateReshapePass.cpp
new file mode 100644
index 000000000..d3c74cb77
--- /dev/null
+++ b/compiler/logo/src/Passes/ResolveDuplicateReshapePass.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/ResolveDuplicateReshapePass.h>
+
+#include <loco.h>
+
+#include <cassert>
+
+namespace
+{
+
+/// @return true when 'node' and its input node are both FixedReshapes
+bool is_duplicate_reshape(loco::Node *node)
+{
+ auto node_as_reshape = dynamic_cast<loco::FixedReshape *>(node);
+
+ if (!node_as_reshape)
+ return false;
+
+ auto input_as_reshape = dynamic_cast<loco::FixedReshape *>(node_as_reshape->input());
+
+ if (!input_as_reshape)
+ return false;
+
+ return true;
+}
+
+/**
+ * @brief Remap reshape's input to its input's input, i.e. bypass input reshape
+ *
+ * Before:
+ *
+ * In ----- FixedReshape_1 ----- [Out_1]*
+ * \
+ * ------- FixedReshape_2 --- [Out_2]*
+ * ('reshape' arg)
+ *
+ * After:
+ *
+ * In ----- FixedReshape_1 ----- [Out_1]*
+ * \
+ * --------------------------- FixedReshape_2 --- [Out_2]*
+ *
+ * Note: In case of no Out_1, FixedReshape_1 becomes dead node.
+ * Out_1 can be another FixedReshape as well, which would be resolved in
+ * another occurance of this transform pass.
+ */
+void remap_input(loco::FixedReshape *reshape)
+{
+ auto input_reshape = dynamic_cast<loco::FixedReshape *>(reshape->input());
+
+ auto volume = [](loco::FixedReshape *node) {
+ uint32_t vol = 1;
+ for (uint32_t axis = 0; axis < node->rank(); ++axis)
+ {
+ assert(node->dim(axis).known());
+ vol *= node->dim(axis).value();
+ }
+ return vol;
+ };
+
+ // Volume mismatch between duplicate reshapes is pointless
+ assert(volume(reshape) == volume(input_reshape));
+
+ // Set node's input as input's input, i.e. bypass
+ reshape->input(input_reshape->input());
+}
+
+} // namespace
+
+namespace logo
+{
+
+bool ResolveDuplicateReshapePass::run(loco::Graph *graph)
+{
+ auto outputs = loco::output_nodes(graph);
+
+ bool changed = false;
+ for (auto node : loco::postorder_traversal(outputs))
+ {
+ if (is_duplicate_reshape(node))
+ {
+ auto node_as_reshape = dynamic_cast<loco::FixedReshape *>(node);
+
+ remap_input(node_as_reshape);
+
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace logo
diff --git a/compiler/logo/src/Passes/ResolveRedundantReshapePass.cpp b/compiler/logo/src/Passes/ResolveRedundantReshapePass.cpp
new file mode 100644
index 000000000..da4af15c1
--- /dev/null
+++ b/compiler/logo/src/Passes/ResolveRedundantReshapePass.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/ResolveRedundantReshapePass.h>
+
+#include <loco/Service/ShapeInference.h>
+
+#include <loco.h>
+
+#include <cassert>
+
+namespace
+{
+
+bool shape_inference_done(loco::FixedReshape *reshape)
+{
+ return loco::shape_known(reshape) && loco::shape_known(reshape->input());
+}
+
+bool are_same_tensor_shapes(const loco::NodeShape &lhs, const loco::NodeShape &rhs)
+{
+ assert(lhs.domain() == loco::Domain::Tensor);
+ assert(rhs.domain() == loco::Domain::Tensor);
+
+ auto lts = lhs.as<loco::TensorShape>();
+ auto rts = rhs.as<loco::TensorShape>();
+
+ if (lts.rank() != rts.rank())
+ return false;
+
+ for (uint32_t axis = 0; axis < lts.rank(); ++axis)
+ {
+ assert(lts.dim(axis).known());
+ assert(rts.dim(axis).known());
+ if (lts.dim(axis).value() != rts.dim(axis).value())
+ return false;
+ }
+ return true;
+}
+
+/// @return true when 'reshape' has same input and output shape
+bool is_redundant_reshape(loco::FixedReshape *reshape)
+{
+ auto input_shape = loco::shape_get(reshape->input());
+ auto output_shape = loco::shape_get(reshape);
+
+ // Note that FixedReshape's input and output are always tensor
+ return are_same_tensor_shapes(input_shape, output_shape);
+}
+
+} // namespace
+
+namespace logo
+{
+
+/**
+ * @brief Bypass redundant FixedReshape
+ *
+ * Before:
+ *
+ * In ----- FixedReshape ----- [Out]*
+ *
+ * After:
+ *
+ * In ------------------------ [Out]*
+ * \
+ * ------ FixedReshape
+ */
+bool ResolveRedundantReshapePass::run(loco::Graph *graph)
+{
+ bool changed = false;
+ for (auto node : loco::postorder_traversal(loco::output_nodes(graph)))
+ {
+ if (auto reshape = dynamic_cast<loco::FixedReshape *>(node))
+ {
+ if (shape_inference_done(reshape))
+ {
+ if (is_redundant_reshape(reshape))
+ {
+ replace(reshape).with(reshape->input());
+ changed = true;
+ }
+ }
+ }
+ }
+
+ return changed;
+}
+
+} // namespace logo
diff --git a/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp b/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp
new file mode 100644
index 000000000..9b7a8d1c7
--- /dev/null
+++ b/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp
@@ -0,0 +1,445 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/SimplifyDomainConversionPass.h>
+
+#include <loco/IR/Algorithm.h>
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/IR/CanonicalNode.h>
+
+#include <stdex/Memory.h>
+
+#include <set>
+#include <vector>
+#include <cassert>
+
+namespace
+{
+
+using namespace loco;
+
+// TODO Move this helper into loco
+bool equal(const Permutation<Domain::Feature> *lhs, const Permutation<Domain::Feature> *rhs)
+{
+ for (const auto &axis :
+ {FeatureAxis::Count, FeatureAxis::Depth, FeatureAxis::Height, FeatureAxis::Width})
+ {
+ if (lhs->axis(axis) != rhs->axis(axis))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool equal(const Permutation<Domain::Filter> *lhs, const Permutation<Domain::Filter> *rhs)
+{
+ for (const auto &axis :
+ {FilterAxis::Count, FilterAxis::Depth, FilterAxis::Height, FilterAxis::Width})
+ {
+ if (lhs->axis(axis) != rhs->axis(axis))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool equal(const Permutation<Domain::DepthwiseFilter> *lhs,
+ const Permutation<Domain::DepthwiseFilter> *rhs)
+{
+ for (const auto &axis : {DepthwiseFilterAxis::Depth, DepthwiseFilterAxis::Multiplier,
+ DepthwiseFilterAxis::Height, DepthwiseFilterAxis::Width})
+ {
+ if (lhs->axis(axis) != rhs->axis(axis))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool equal(const Permutation<Domain::Matrix> *lhs, const Permutation<Domain::Matrix> *rhs)
+{
+ for (const auto &axis : {MatrixAxis::Height, MatrixAxis::Width})
+ {
+ if (lhs->axis(axis) != rhs->axis(axis))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+void set_input_null(loco::Node *node)
+{
+ if (auto casted = dynamic_cast<loco::FeatureEncode *>(node))
+ casted->input(nullptr);
+ else if (auto casted = dynamic_cast<loco::FeatureDecode *>(node))
+ casted->input(nullptr);
+ else if (auto casted = dynamic_cast<loco::BiasDecode *>(node))
+ casted->input(nullptr);
+ else if (auto casted = dynamic_cast<loco::FilterEncode *>(node))
+ casted->input(nullptr);
+ else if (auto casted = dynamic_cast<loco::FilterDecode *>(node))
+ casted->input(nullptr);
+ else if (auto casted = dynamic_cast<loco::DepthwiseFilterEncode *>(node))
+ casted->input(nullptr);
+ else if (auto casted = dynamic_cast<loco::DepthwiseFilterDecode *>(node))
+ casted->input(nullptr);
+ else if (auto casted = dynamic_cast<loco::MatrixEncode *>(node))
+ casted->input(nullptr);
+ else if (auto casted = dynamic_cast<loco::MatrixDecode *>(node))
+ casted->input(nullptr);
+ else
+ assert(false && "not supported node type");
+}
+
+} // namespace
+
+namespace logo
+{
+
+bool SimplifyDomainConversionPass::run(loco::Graph *g)
+{
+ // TODO Introduce and Use "Pattern Match"
+ struct Collector final : public loco::CanonicalNodeMutableVisitor<void>
+ {
+ // Let's find FeatureDecode followed by FeatureEncode
+ void visit(loco::FeatureEncode *encode_node) final
+ {
+ using namespace loco;
+
+ auto encoder = encode_node->encoder();
+ assert(encoder != nullptr);
+
+ auto decode_node = dynamic_cast<loco::FeatureDecode *>(encode_node->input());
+ if (decode_node == nullptr)
+ {
+ return;
+ }
+ assert(decode_node->input() != nullptr);
+
+ auto decoder = decode_node->decoder();
+ assert(decoder != nullptr);
+
+ // NOTE Work only for permuting codec
+ auto perm_decoder = dynamic_cast<const PermutingDecoder<Domain::Feature> *>(decoder);
+ auto perm_encoder = dynamic_cast<const PermutingEncoder<Domain::Feature> *>(encoder);
+
+ if (perm_encoder == nullptr || perm_decoder == nullptr)
+ {
+ return;
+ }
+
+ if (equal(perm_encoder->perm(), perm_decoder->perm()))
+ {
+ forwardCandidates.insert({encode_node, decode_node->input()});
+ }
+ }
+
+ // Let's find `FeatureEncode -- FeatureDecode` pattern
+ void visit(loco::FeatureDecode *decode_node) final
+ {
+ using namespace loco;
+
+ auto encode_node = dynamic_cast<loco::FeatureEncode *>(decode_node->input());
+ if (encode_node == nullptr)
+ {
+ return;
+ }
+ assert(encode_node->input() != nullptr);
+
+ auto encoder = encode_node->encoder();
+ assert(encoder != nullptr);
+
+ auto decoder = decode_node->decoder();
+ assert(decoder != nullptr);
+
+ // NOTE Work only for permuting codec
+ auto perm_decoder = dynamic_cast<const PermutingDecoder<Domain::Feature> *>(decoder);
+ auto perm_encoder = dynamic_cast<const PermutingEncoder<Domain::Feature> *>(encoder);
+
+ if (perm_encoder == nullptr || perm_decoder == nullptr)
+ {
+ return;
+ }
+
+ if (equal(perm_encoder->perm(), perm_decoder->perm()))
+ {
+ forwardCandidates.insert({decode_node, encode_node->input()});
+ }
+ }
+
+ // Let's find `FilterEncode -- FilterDecode` pattern
+ void visit(loco::FilterDecode *decode_node) final
+ {
+ using namespace loco;
+
+ auto encode_node = dynamic_cast<loco::FilterEncode *>(decode_node->input());
+ if (encode_node == nullptr)
+ {
+ return;
+ }
+ assert(encode_node->input() != nullptr);
+
+ auto encoder = encode_node->encoder();
+ assert(encoder != nullptr);
+
+ auto decoder = decode_node->decoder();
+ assert(decoder != nullptr);
+
+ // NOTE Work only for permuting codec
+ auto perm_decoder = dynamic_cast<const PermutingDecoder<Domain::Filter> *>(decoder);
+ auto perm_encoder = dynamic_cast<const PermutingEncoder<Domain::Filter> *>(encoder);
+
+ if (perm_encoder == nullptr || perm_decoder == nullptr)
+ {
+ return;
+ }
+
+ if (equal(perm_encoder->perm(), perm_decoder->perm()))
+ {
+ forwardCandidates.insert({decode_node, encode_node->input()});
+ }
+ else
+ {
+ std::vector<loco::TensorAxis> perm_vec;
+ perm_vec.resize(4);
+
+ auto enc_perm = perm_encoder->perm();
+ auto dec_perm = perm_decoder->perm();
+
+ for (const auto &axis :
+ {FilterAxis::Count, FilterAxis::Height, FilterAxis::Width, FilterAxis::Depth})
+ {
+ auto from = enc_perm->axis(axis);
+ auto to = dec_perm->axis(axis);
+ perm_vec[to] = from;
+ }
+
+ transposeCandidates.insert(stdex::make_unique<TransposeCtx>(
+ encode_node, decode_node, encode_node->input(), perm_vec));
+ }
+ }
+
+ // Let's find `BiasEncode -- BiasDecode` pattern
+ void visit(loco::BiasDecode *decode_node) final
+ {
+ if (auto encode_node = dynamic_cast<loco::BiasEncode *>(decode_node->input()))
+ {
+ assert(encode_node->input() != nullptr);
+ forwardCandidates.insert({decode_node, encode_node->input()});
+ }
+ }
+
+ // Let's find `DepthwiseFilterEncode -- DepthwiseFilterDecode` pattern
+ void visit(loco::DepthwiseFilterDecode *decode_node) final
+ {
+ using namespace loco;
+
+ auto encode_node = dynamic_cast<loco::DepthwiseFilterEncode *>(decode_node->input());
+ if (encode_node == nullptr)
+ {
+ return;
+ }
+ assert(encode_node->input() != nullptr);
+
+ auto encoder = encode_node->encoder();
+ assert(encoder != nullptr);
+
+ auto decoder = decode_node->decoder();
+ assert(decoder != nullptr);
+
+ // NOTE Work only for permuting codec
+ auto perm_decoder = dynamic_cast<const PermutingDecoder<Domain::DepthwiseFilter> *>(decoder);
+ auto perm_encoder = dynamic_cast<const PermutingEncoder<Domain::DepthwiseFilter> *>(encoder);
+
+ if (perm_encoder == nullptr || perm_decoder == nullptr)
+ {
+ return;
+ }
+
+ if (equal(perm_encoder->perm(), perm_decoder->perm()))
+ {
+ forwardCandidates.insert({decode_node, encode_node->input()});
+ }
+ else
+ {
+ std::vector<TensorAxis> perm_vec;
+ perm_vec.resize(4);
+
+ auto enc_perm = perm_encoder->perm();
+ auto dec_perm = perm_decoder->perm();
+
+ for (const auto &axis : {DepthwiseFilterAxis::Depth, DepthwiseFilterAxis::Height,
+ DepthwiseFilterAxis::Width, DepthwiseFilterAxis::Multiplier})
+ {
+ auto from = enc_perm->axis(axis);
+ auto to = dec_perm->axis(axis);
+ perm_vec[to] = from;
+ }
+
+ transposeCandidates.insert(stdex::make_unique<TransposeCtx>(
+ encode_node, decode_node, encode_node->input(), perm_vec));
+ }
+ }
+
+ // Let's find MatrixDecode followed by MatrixEncode
+ void visit(loco::MatrixEncode *encode_node) final
+ {
+ using namespace loco;
+
+ auto encoder = encode_node->encoder();
+ assert(encoder != nullptr);
+
+ auto decode_node = dynamic_cast<loco::MatrixDecode *>(encode_node->input());
+ if (decode_node == nullptr)
+ {
+ return;
+ }
+ assert(decode_node->input() != nullptr);
+
+ auto decoder = decode_node->decoder();
+ assert(decoder != nullptr);
+
+ // NOTE Work only for permuting codec
+ auto perm_decoder = dynamic_cast<const PermutingDecoder<Domain::Matrix> *>(decoder);
+ auto perm_encoder = dynamic_cast<const PermutingEncoder<Domain::Matrix> *>(encoder);
+
+ if (perm_encoder == nullptr || perm_decoder == nullptr)
+ {
+ return;
+ }
+
+ if (equal(perm_encoder->perm(), perm_decoder->perm()))
+ {
+ forwardCandidates.insert({encode_node, decode_node->input()});
+ }
+ }
+
+ // Let's find MatrixEncode followed by MatrixDecode
+ void visit(loco::MatrixDecode *decode_node) final
+ {
+ using namespace loco;
+
+ auto encode_node = dynamic_cast<loco::MatrixEncode *>(decode_node->input());
+ if (encode_node == nullptr)
+ {
+ return;
+ }
+ assert(encode_node->input() != nullptr);
+
+ auto encoder = encode_node->encoder();
+ assert(encoder != nullptr);
+
+ auto decoder = decode_node->decoder();
+ assert(decoder != nullptr);
+
+ // NOTE Work only for permuting codec
+ auto perm_decoder = dynamic_cast<const PermutingDecoder<Domain::Matrix> *>(decoder);
+ auto perm_encoder = dynamic_cast<const PermutingEncoder<Domain::Matrix> *>(encoder);
+
+ if (perm_encoder == nullptr || perm_decoder == nullptr)
+ {
+ return;
+ }
+
+ if (equal(perm_encoder->perm(), perm_decoder->perm()))
+ {
+ forwardCandidates.insert({decode_node, encode_node->input()});
+ }
+ else
+ {
+ std::vector<loco::TensorAxis> perm_vec;
+ perm_vec.resize(2);
+
+ auto enc_perm = perm_encoder->perm();
+ auto dec_perm = perm_decoder->perm();
+
+ for (const auto &axis : {MatrixAxis::Height, MatrixAxis::Width})
+ {
+ auto from = enc_perm->axis(axis);
+ auto to = dec_perm->axis(axis);
+ perm_vec[to] = from;
+ }
+
+ transposeCandidates.insert(stdex::make_unique<TransposeCtx>(
+ encode_node, decode_node, encode_node->input(), perm_vec));
+ }
+ }
+
+ void visit(loco::Node *) final { return; }
+
+ using SimplifyingInfo = std::pair<loco::Node * /* end node of subgraph that will be replaced*/,
+ loco::Node * /* input of subgraph */>;
+ std::set<SimplifyingInfo> forwardCandidates;
+
+ struct TransposeCtx
+ {
+ loco::Node *first_node; // starting node of subgraph that will be replaced
+ loco::Node *last_node; // end node of subgraph that will be replaced
+ loco::Node *input_node; // input of subgraph
+ std::vector<loco::TensorAxis> perm_vec; // perm vector for transpose
+
+ TransposeCtx(loco::Node *first, loco::Node *last, loco::Node *input,
+ std::vector<loco::TensorAxis> perm)
+ : first_node(first), last_node(last), input_node(input), perm_vec(perm)
+ { /* empty */
+ }
+ };
+
+ std::set<std::unique_ptr<TransposeCtx>> transposeCandidates;
+ };
+
+ Collector collector;
+
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (node->dialect() == loco::CanonicalDialect::get())
+ {
+ auto canonical_node = dynamic_cast<loco::CanonicalNode *>(node);
+ canonical_node->accept(&collector);
+ }
+ }
+
+ for (auto p : collector.forwardCandidates)
+ {
+ auto forward_node = g->nodes()->create<loco::Forward>();
+ forward_node->input(p.second);
+ replace(p.first).with(forward_node);
+ set_input_null(p.first);
+ }
+
+ for (auto &ctx : collector.transposeCandidates)
+ {
+ auto transpose_node = g->nodes()->create<loco::TensorTranspose>();
+ {
+ transpose_node->perm()->size(ctx->perm_vec.size());
+
+ for (loco::TensorAxis axis = 0; axis < ctx->perm_vec.size(); axis++)
+ transpose_node->perm()->axis(axis) = ctx->perm_vec[axis];
+ }
+
+ transpose_node->input(ctx->input_node);
+ replace(ctx->last_node).with(transpose_node);
+ set_input_null(ctx->first_node);
+ }
+
+ return (collector.forwardCandidates.size() > 0 or collector.transposeCandidates.size() > 0);
+}
+
+} // namespace logo
diff --git a/compiler/logo/src/Passes/SimplifyDomainConversionPass.test.cpp b/compiler/logo/src/Passes/SimplifyDomainConversionPass.test.cpp
new file mode 100644
index 000000000..6bd93c1b2
--- /dev/null
+++ b/compiler/logo/src/Passes/SimplifyDomainConversionPass.test.cpp
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/SimplifyDomainConversionPass.h>
+
+#include "TestHelper.h"
+
+#include <loco.h>
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+// code borrowed from GraphBlock.h/cpp in exo-tflite
+enum class FilterLayout
+{
+ OHWI, // a.k.a., NHWC, Tensorflow Lite uses this layout
+ HWIO, // Tensorflow format
+};
+
+template <FilterLayout T> loco::Permutation<loco::Domain::Filter> perm();
+
+template <> loco::Permutation<loco::Domain::Filter> perm<FilterLayout::OHWI>()
+{
+ // Make NHWC permutation for encoder and decoder
+ loco::Permutation<loco::Domain::Filter> OHWI; // a.k.a., NHWC
+
+ OHWI.axis(loco::FilterAxis::Count) = 0;
+ OHWI.axis(loco::FilterAxis::Height) = 1;
+ OHWI.axis(loco::FilterAxis::Width) = 2;
+ OHWI.axis(loco::FilterAxis::Depth) = 3;
+
+ return OHWI;
+}
+
+template <> loco::Permutation<loco::Domain::Filter> perm<FilterLayout::HWIO>()
+{
+ // Make NHWC permutation for encoder and decoder
+ loco::Permutation<loco::Domain::Filter> HWIO;
+
+ HWIO.axis(loco::FilterAxis::Height) = 0;
+ HWIO.axis(loco::FilterAxis::Width) = 1;
+ HWIO.axis(loco::FilterAxis::Depth) = 2;
+ HWIO.axis(loco::FilterAxis::Count) = 3;
+
+ return HWIO;
+}
+
+template <FilterLayout T> loco::FilterDecode *make_filter_decode(loco::Node *input_for_decode)
+{
+ loco::Graph *g = input_for_decode->graph();
+
+ auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Filter>>();
+
+ decoder->perm(perm<T>());
+
+ auto dec = g->nodes()->create<loco::FilterDecode>();
+ dec->input(input_for_decode);
+ dec->decoder(std::move(decoder));
+
+ return dec;
+}
+
+template <FilterLayout T> loco::FilterEncode *make_filter_encode(loco::Node *input_for_encode)
+{
+ loco::Graph *g = input_for_encode->graph();
+
+ auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+
+ encoder->perm(perm<T>());
+
+ auto enc = g->nodes()->create<loco::FilterEncode>();
+ enc->input(input_for_encode);
+ enc->encoder(std::move(encoder));
+
+ return enc;
+}
+
+/*
+ test case:
+ ConstGen (2x3x4x5) ---- FeatureEncode ---- FeatureDecode --- Push
+ 0 H O 0
+ 1 W H 1
+ 2 I(depth) W 2
+ 3 O(count) I 3
+
+ axis 0 ---------------------> H --------------> H -----------> 1
+ axis 1 ---------------------> W --------------> W -----------> 2
+ axis 2 ---------------------> I --------------> I -----------> 3
+ axis 3 ---------------------> O --------------> O -----------> 0
+
+ so perm vector of Tranpose = [3, 0, 1, 2]
+*/
+void create_net_FilterEncode_FilterDecode_different_perms(loco::Graph *graph)
+{
+ assert(graph);
+
+ auto const_node = graph->nodes()->create<loco::ConstGen>();
+ {
+ const_node->dtype(loco::DataType::FLOAT32);
+ const_node->rank(4);
+ int count = 1;
+ for (int i = 0; i < 4; ++i)
+ {
+ const_node->dim(i) = i + 2;
+ count *= i + 2;
+ }
+ const_node->size<loco::DataType::FLOAT32>(count);
+ for (uint32_t i = 0; i < count; i++)
+ const_node->at<loco::DataType::FLOAT32>(i) = 3.14f; // any number
+ }
+
+ auto encoder = make_filter_encode<FilterLayout::HWIO>(const_node);
+ auto decoder = make_filter_decode<FilterLayout::OHWI>(encoder);
+
+ auto push_node = graph->nodes()->create<loco::Push>();
+ {
+ push_node->from(decoder);
+ }
+
+ auto graph_output = graph->outputs()->create();
+ {
+ graph_output->name("output");
+ graph_output->dtype(loco::DataType::FLOAT32);
+ loco::link(graph_output, push_node);
+ }
+}
+
+/*
+ test case:
+ ConstGen (2x3x4x5) ---- FeatureEncode ---- FeatureDecode --- Push
+ 0 H H 0
+ 1 W W 1
+ 2 I(depth) I 2
+ 3 O(count) O 3
+
+ axis 0 ---------------------> H --------------> H -----------> 0
+ axis 1 ---------------------> W --------------> W -----------> 1
+ axis 2 ---------------------> I --------------> I -----------> 2
+ axis 3 ---------------------> O --------------> O -----------> 3
+
+ so perm vector of Tranpose = [0, 1, 2, 3] and transposes should be eliminated
+*/
+void create_net_FilterEncode_FilterDecode_equal_perms(loco::Graph *graph)
+{
+ assert(graph);
+
+ auto const_node = graph->nodes()->create<loco::ConstGen>();
+ {
+ const_node->dtype(loco::DataType::FLOAT32);
+ const_node->rank(4);
+ int count = 1;
+ for (int i = 0; i < 4; ++i)
+ {
+ const_node->dim(i) = i + 2;
+ count *= i + 2;
+ }
+ const_node->size<loco::DataType::FLOAT32>(count);
+ for (uint32_t i = 0; i < count; i++)
+ const_node->at<loco::DataType::FLOAT32>(i) = 3.14f; // any number
+ }
+
+ auto encoder = make_filter_encode<FilterLayout::HWIO>(const_node);
+ auto decoder = make_filter_decode<FilterLayout::HWIO>(encoder);
+
+ auto push_node = graph->nodes()->create<loco::Push>();
+ {
+ push_node->from(decoder);
+ }
+
+ auto graph_output = graph->outputs()->create();
+ {
+ graph_output->name("output");
+ graph_output->dtype(loco::DataType::FLOAT32);
+ loco::link(graph_output, push_node);
+ }
+}
+
+} // namespace
+
+TEST(SimplifyDomainConversionPass, FilterEncode_FilterDecode_different_perms)
+{
+ auto graph = loco::make_graph();
+ create_net_FilterEncode_FilterDecode_different_perms(graph.get());
+
+ logo::SimplifyDomainConversionPass pass;
+ while (pass.run(graph.get()) == true)
+ ;
+
+ auto tr = logo::test::find_first_node_by_type<loco::TensorTranspose>(graph.get());
+ {
+ ASSERT_EQ(tr->perm()->size(), 4);
+ ASSERT_EQ(tr->perm()->axis(0), 3);
+ ASSERT_EQ(tr->perm()->axis(1), 0);
+ ASSERT_EQ(tr->perm()->axis(2), 1);
+ ASSERT_EQ(tr->perm()->axis(3), 2);
+ }
+
+ auto const_gen = dynamic_cast<loco::ConstGen *>(tr->input());
+ ASSERT_NE(const_gen, nullptr);
+}
+
+TEST(SimplifyDomainConversionPass, FilterEncode_FilterDecode_equal_perms)
+{
+ auto graph = loco::make_graph();
+ create_net_FilterEncode_FilterDecode_equal_perms(graph.get());
+
+ logo::SimplifyDomainConversionPass pass;
+ while (pass.run(graph.get()) == true)
+ ;
+
+ ASSERT_EQ(loco::output_nodes(graph.get()).size(), 1);
+ loco::Node *output_node = loco::output_nodes(graph.get())[0];
+
+ auto forward = dynamic_cast<loco::Forward *>(output_node->arg(0));
+ ASSERT_NE(forward, nullptr);
+ auto const_gen = dynamic_cast<loco::ConstGen *>(forward->arg(0));
+ ASSERT_NE(const_gen, nullptr);
+}
diff --git a/compiler/logo/src/TestHelper.h b/compiler/logo/src/TestHelper.h
new file mode 100644
index 000000000..43631efa9
--- /dev/null
+++ b/compiler/logo/src/TestHelper.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TEST_HELPER_H__
+#define __TEST_HELPER_H__
+
+#include <loco.h>
+
+namespace logo
+{
+namespace test
+{
+
+template <typename T> T *find_first_node_by_type(loco::Graph *g)
+{
+ T *first_node = nullptr;
+
+ for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+ {
+ first_node = dynamic_cast<T *>(node);
+ if (first_node != nullptr)
+ break;
+ }
+
+ return first_node;
+}
+
+} // namespace test
+} // namespace logo
+
+#endif // __TEST_HELPER_H__
diff --git a/compiler/luci/CMakeLists.txt b/compiler/luci/CMakeLists.txt
new file mode 100644
index 000000000..387c22487
--- /dev/null
+++ b/compiler/luci/CMakeLists.txt
@@ -0,0 +1,10 @@
+add_subdirectory(log)
+add_subdirectory(lang)
+add_subdirectory(service)
+add_subdirectory(pass)
+add_subdirectory(logex)
+add_subdirectory(import)
+add_subdirectory(export)
+add_subdirectory(tester)
+
+add_subdirectory(tests)
diff --git a/compiler/luci/README.md b/compiler/luci/README.md
new file mode 100644
index 000000000..49c833121
--- /dev/null
+++ b/compiler/luci/README.md
@@ -0,0 +1,3 @@
+# luci
+
+_luci_ provides IR for TFLite/Circle and Graph from FlatBuffer.
diff --git a/compiler/luci/export/CMakeLists.txt b/compiler/luci/export/CMakeLists.txt
new file mode 100644
index 000000000..e32eca366
--- /dev/null
+++ b/compiler/luci/export/CMakeLists.txt
@@ -0,0 +1,29 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+# TODO enable tests
+#file(GLOB_RECURSE TESTS "src/*.test.cpp")
+#list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(luci_export SHARED ${SOURCES})
+target_include_directories(luci_export PRIVATE src)
+target_include_directories(luci_export PUBLIC include)
+target_link_libraries(luci_export PRIVATE luci_lang)
+target_link_libraries(luci_export PRIVATE luci_service)
+target_link_libraries(luci_export PRIVATE luci_pass)
+target_link_libraries(luci_export PRIVATE mio_circle)
+target_link_libraries(luci_export PRIVATE luci_log)
+target_link_libraries(luci_export PRIVATE luci_logex)
+target_link_libraries(luci_export PRIVATE nncc_common)
+target_link_libraries(luci_export PRIVATE locop)
+target_link_libraries(luci_export PRIVATE oops)
+install(TARGETS luci_export DESTINATION lib)
+
+#if(NOT ENABLE_TEST)
+# return()
+#endif(NOT ENABLE_TEST)
+#
+#nnas_find_package(GTest REQUIRED)
+#
+#GTest_AddTest(luci_export_test ${TESTS})
+#target_include_directories(luci_export_test PRIVATE src)
+#target_link_libraries(luci_export_test luci_export)
+#target_link_libraries(luci_export_test oops)
diff --git a/compiler/luci/export/README.md b/compiler/luci/export/README.md
new file mode 100644
index 000000000..12b190a2f
--- /dev/null
+++ b/compiler/luci/export/README.md
@@ -0,0 +1,3 @@
+# luci-export
+
+_luci-export_ provides exporting _loco_ graph of Circle IR to Circle model file
diff --git a/compiler/luci/export/include/luci/CircleExporter.h b/compiler/luci/export/include/luci/CircleExporter.h
new file mode 100644
index 000000000..0584c623c
--- /dev/null
+++ b/compiler/luci/export/include/luci/CircleExporter.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLEEXPORTER_H__
+#define __LUCI_CIRCLEEXPORTER_H__
+
+#include <luci/IR/Module.h>
+
+#include <loco.h>
+
+#include <memory>
+
+namespace luci
+{
+
+class CircleExporter
+{
+public:
+ // This contract class describes the interaction between a exporter and its client.
+ struct Contract
+ {
+ public:
+ virtual ~Contract() = default;
+
+ public: // Client -> Exporter
+ // Input Graph (to be exported)
+ // Exporter expects a loco graph that consists of Circle nodes
+ virtual loco::Graph *graph(void) const = 0;
+
+ // Input Module (to be exported)
+ // Exporter expects a luci module that consists of loco graphs
+ // TODO make this pure virtual
+ virtual luci::Module *module(void) const;
+
+ public: // Exporter -> Client
+ // Exporter calls store for export data
+ // Notice: Please DO NOT STORE ptr and size when implementing this in Client
+ virtual bool store(const char *ptr, const size_t size) const = 0;
+ };
+
+public:
+ explicit CircleExporter();
+
+public:
+ // invoke(...) returns false on failure.
+ bool invoke(Contract *) const;
+};
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLEEXPORTER_H__
diff --git a/compiler/luci/export/src/Check.h b/compiler/luci/export/src/Check.h
new file mode 100644
index 000000000..e05ec904a
--- /dev/null
+++ b/compiler/luci/export/src/Check.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CHECK_H__
+#define __CHECK_H__
+
+#include <stdexcept>
+#include <cassert>
+#include <iostream>
+
+// TODO Add macro for Release version
+
+#define LUCI_ASSERT(condition, msg) \
+ { \
+ if (!(condition)) \
+ { \
+ std::cerr << "[assert failed] " << (msg) << ". " << std::endl; \
+ assert((condition)); \
+ } \
+ }
+
+#endif // __CHECK_H__
diff --git a/compiler/luci/export/src/CircleExporter.cpp b/compiler/luci/export/src/CircleExporter.cpp
new file mode 100644
index 000000000..125df7802
--- /dev/null
+++ b/compiler/luci/export/src/CircleExporter.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/CircleExporter.h"
+#include "luci/IR/Module.h"
+#include "CircleExporterImpl.h"
+
+#include <oops/InternalExn.h>
+
+#include <fstream>
+#include <memory>
+
+namespace luci
+{
+
+// TODO remove this
+Module *CircleExporter::Contract::module(void) const { return nullptr; }
+
+CircleExporter::CircleExporter()
+{
+ // NOTHING TO DO
+}
+
+bool CircleExporter::invoke(Contract *contract) const
+{
+ auto module = contract->module();
+ if (module != nullptr)
+ {
+ CircleExporterImpl impl(module);
+
+ const char *ptr = impl.getBufferPointer();
+ const size_t size = impl.getBufferSize();
+
+ // we just send one time
+ return contract->store(ptr, size);
+ }
+
+ auto graph = contract->graph();
+ if (graph == nullptr)
+ return false;
+
+ CircleExporterImpl impl(graph);
+
+ const char *ptr = impl.getBufferPointer();
+ const size_t size = impl.getBufferSize();
+
+ // we just send one time
+ return contract->store(ptr, size);
+}
+
+} // namespace luci
diff --git a/compiler/luci/export/src/CircleExporterImpl.cpp b/compiler/luci/export/src/CircleExporterImpl.cpp
new file mode 100644
index 000000000..81109ee62
--- /dev/null
+++ b/compiler/luci/export/src/CircleExporterImpl.cpp
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleExporterImpl.h"
+#include "Optimize.h"
+#include "CircleTensorExporter.h"
+#include "CircleOperationExporter.h"
+#include "CircleExporterUtils.h"
+
+#include <oops/InternalExn.h>
+#include <mio/circle/schema_generated.h>
+#include <flatbuffers/flatbuffers.h>
+
+#include <cassert>
+#include <unordered_map>
+#include <string>
+#include <stdexcept>
+
+namespace
+{
+
+luci::CircleInput *input_node(loco::Graph *g, const loco::GraphInputIndex &index)
+{
+ for (uint32_t n = 0; n < g->nodes()->size(); ++n)
+ {
+ if (auto pull = dynamic_cast<luci::CircleInput *>(g->nodes()->at(n)))
+ {
+ if (pull->indexed() && pull->index() == index)
+ {
+ return pull;
+ }
+ }
+ }
+ return nullptr;
+}
+
+luci::CircleOutput *output_node(loco::Graph *g, const loco::GraphOutputIndex &index)
+{
+ for (uint32_t n = 0; n < g->nodes()->size(); ++n)
+ {
+ if (auto push = dynamic_cast<luci::CircleOutput *>(g->nodes()->at(n)))
+ {
+ if (push->indexed() && push->index() == index)
+ {
+ return push;
+ }
+ }
+ }
+ return nullptr;
+}
+
+void registerGraphInputTensors(loco::Graph *graph, luci::SubGraphContext &ctx)
+{
+ for (uint32_t n = 0; n < graph->inputs()->size(); ++n)
+ {
+ auto node = input_node(graph, n);
+ assert(node != nullptr);
+ ctx._inputs.push_back(luci::get_tensor_index(node));
+ }
+}
+
+void registerGraphOutputTensors(loco::Graph *graph, luci::SubGraphContext &ctx)
+{
+ for (uint32_t n = 0; n < graph->outputs()->size(); ++n)
+ {
+ auto push = output_node(graph, n);
+ assert(push != nullptr);
+ auto node = push->from();
+ assert(node != nullptr);
+ ctx._outputs.push_back(luci::get_tensor_index(node));
+ }
+}
+
+} // namespace
+
+namespace
+{
+
+using namespace circle;
+using namespace flatbuffers;
+
+Offset<Vector<Offset<OperatorCode>>>
+encodeOperatorCodes(FlatBufferBuilder &builder, std::unordered_map<luci::OpCode, uint32_t> &opcodes,
+ std::unordered_map<luci::OpCode, std::string> &custom_opcodes)
+{
+ std::vector<Offset<OperatorCode>> operator_codes_vec(opcodes.size());
+ for (auto it : opcodes)
+ {
+ uint32_t idx = it.second;
+ if (it.first.opcode != BuiltinOperator_CUSTOM)
+ {
+ operator_codes_vec[idx] = CreateOperatorCode(builder, it.first.opcode);
+ }
+ else // custom op
+ {
+ auto opCode = it.first;
+ auto custom_code = custom_opcodes.find(opCode);
+ if (custom_code == custom_opcodes.end())
+ INTERNAL_EXN("Cannot find code for customop even though opcode is BuiltinOperator_CUSTOM");
+
+ operator_codes_vec[idx] =
+ CreateOperatorCode(builder, it.first.opcode, builder.CreateString(custom_code->second));
+ }
+ }
+ return builder.CreateVector(operator_codes_vec);
+}
+
+} // namespace
+
+namespace luci
+{
+
+using namespace circle;
+using namespace flatbuffers;
+
+CircleExporterImpl::CircleExporterImpl(loco::Graph *graph) { exportGraph(graph); }
+CircleExporterImpl::CircleExporterImpl(Module *module) { exportModule(module); }
+
+::flatbuffers::Offset<::circle::SubGraph>
+CircleExporterImpl::exportSubgraph(SerializedGraphData &gd)
+{
+ auto tensors = _builder.CreateVector(gd._tensors);
+ auto inputs = _builder.CreateVector(gd._inputs);
+ auto outputs = _builder.CreateVector(gd._outputs);
+ auto operators = _builder.CreateVector(gd._operators);
+ auto df = gd._data_format;
+ auto subgraph = CreateSubGraph(_builder, tensors, inputs, outputs, operators, df);
+ return subgraph;
+}
+
+void CircleExporterImpl::exportGraph(loco::Graph *graph)
+{
+ // do graph optimization
+ optimize(graph);
+
+ _builder.Clear();
+
+ SerializedModelData md;
+ SerializedGraphData gd;
+
+ // This version is taken from comment in fbs
+ constexpr uint32_t version = 0;
+
+ // TODO set this value properly
+ gd._data_format = circle::DataFormat::DataFormat_CHANNELS_LAST;
+
+ // prepare model data
+ prepareModelData(_builder, md);
+
+ // parse graph into SerializedModelData structure
+ exportOpDefinedTensors(graph, _builder, md, gd);
+
+ // NOTE Invoke these register functions only after each node is annotated with its tensor_index
+ registerGraphInputTensors(graph, gd);
+ registerGraphOutputTensors(graph, gd);
+
+ exportNodes(graph, _builder, md, gd);
+
+ // encode operator codes
+ auto operator_codes =
+ encodeOperatorCodes(_builder, md._operator_codes, md._custom_operator_codes);
+
+ // Subgraphs
+ Offset<SubGraph> subgraph = exportSubgraph(gd);
+ auto subgraphs = _builder.CreateVector(std::vector<Offset<SubGraph>>{subgraph});
+
+ // Description
+ std::string description_str = "nnpackage";
+ auto description = _builder.CreateString(description_str);
+
+ // create array of buffers
+ auto buffers = _builder.CreateVector(md._buffers);
+
+ // empty metadata
+ std::vector<int> metadata_buffer_vec;
+ auto metadata_buffer = _builder.CreateVector(metadata_buffer_vec);
+
+ // Model
+ auto model_offset = CreateModel(_builder, version, operator_codes, subgraphs, description,
+ buffers, metadata_buffer);
+ FinishModelBuffer(_builder, model_offset);
+}
+
+void CircleExporterImpl::exportModule(Module *module)
+{
+ assert(module->size() > 0);
+ // do graph optimization
+
+ SerializedModelData md;
+
+ _builder.Clear();
+
+ std::vector<flatbuffers::Offset<circle::SubGraph>> subgraph_vec;
+
+ for (size_t g = 0; g < module->size(); ++g)
+ {
+ auto graph = module->graph(g);
+
+ optimize(graph);
+
+ SerializedGraphData gd;
+
+ // TODO set this value properly
+ gd._data_format = circle::DataFormat::DataFormat_CHANNELS_LAST;
+
+ // parse graph into SerializedModelData structure
+ exportOpDefinedTensors(graph, _builder, md, gd);
+
+ // NOTE Invoke these register functions only after each node is annotated with its tensor_index
+ registerGraphInputTensors(graph, gd);
+ registerGraphOutputTensors(graph, gd);
+
+ exportNodes(graph, _builder, md, gd);
+
+ // Subgraphs
+ Offset<SubGraph> subgraph = exportSubgraph(gd);
+ subgraph_vec.push_back(subgraph);
+ }
+
+ auto subgraphs = _builder.CreateVector(std::vector<Offset<SubGraph>>{subgraph_vec});
+
+ // encode operator codes
+ auto operator_codes =
+ encodeOperatorCodes(_builder, md._operator_codes, md._custom_operator_codes);
+
+ // Description
+ std::string description_str = "nnpackage";
+ auto description = _builder.CreateString(description_str);
+
+ // create array of buffers
+ auto buffers = _builder.CreateVector(md._buffers);
+
+ // empty metadata
+ std::vector<int> metadata_buffer_vec;
+ auto metadata_buffer = _builder.CreateVector(metadata_buffer_vec);
+
+ // This version is taken from comment in fbs
+ constexpr uint32_t version = 0;
+
+ // Model
+ auto model_offset = CreateModel(_builder, version, operator_codes, subgraphs, description,
+ buffers, metadata_buffer);
+ FinishModelBuffer(_builder, model_offset);
+}
+
+const char *CircleExporterImpl::getBufferPointer() const
+{
+ return reinterpret_cast<const char *>(_builder.GetBufferPointer());
+}
+
+size_t CircleExporterImpl::getBufferSize() const { return _builder.GetSize(); }
+
+} // namespace luci
diff --git a/compiler/luci/export/src/CircleExporterImpl.h b/compiler/luci/export/src/CircleExporterImpl.h
new file mode 100644
index 000000000..e5d5b5a00
--- /dev/null
+++ b/compiler/luci/export/src/CircleExporterImpl.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EXPORTER_IMPL_H__
+#define __CIRCLE_EXPORTER_IMPL_H__
+
+#include "luci/CircleExporter.h"
+#include "luci/IR/Module.h"
+
+#include "SerializedData.h"
+
+#include "SerializedData.h"
+
+#include <mio/circle/schema_generated.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+/**
+ * internal implementation of interface exporter class
+ */
+class CircleExporterImpl
+{
+public:
+ CircleExporterImpl() = delete;
+ ~CircleExporterImpl() = default;
+
+ explicit CircleExporterImpl(loco::Graph *graph);
+ explicit CircleExporterImpl(Module *module);
+
+ /**
+ * @return pointer to buffer with serialized graph
+ */
+ const char *getBufferPointer() const;
+
+ /**
+ * @return size of buffer with serialized graph
+ */
+ size_t getBufferSize() const;
+
+private:
+ /**
+ * @brief create Subgraph using data stored in SerializedGraphData
+ * @param gd information about serializer parts of model
+ * @return offset in buffer corresponding to serialized subgraph
+ */
+ flatbuffers::Offset<circle::SubGraph> exportSubgraph(SerializedGraphData &gd);
+
+ /**
+ * @brief root function that writes graph into internal buffer
+ * @param graph
+ */
+ void exportGraph(loco::Graph *graph);
+
+ /**
+ * @brief root function that writes Module into internal buffer
+ * @param module
+ */
+ void exportModule(Module *module);
+
+private:
+ flatbuffers::FlatBufferBuilder _builder;
+};
+
+} // namespace luci
+
+#endif // __CIRCLE_EXPORTER_IMPL_H__
diff --git a/compiler/luci/export/src/CircleExporterUtils.cpp b/compiler/luci/export/src/CircleExporterUtils.cpp
new file mode 100644
index 000000000..1272facb2
--- /dev/null
+++ b/compiler/luci/export/src/CircleExporterUtils.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleExporterUtils.h"
+
+#include <oops/InternalExn.h>
+
+#include <cassert>
+#include <memory>
+
+namespace luci
+{
+
+circle::ActivationFunctionType to_circle_actfunc(luci::FusedActFunc func)
+{
+ switch (func)
+ {
+ case luci::FusedActFunc::NONE:
+ return circle::ActivationFunctionType_NONE;
+ case luci::FusedActFunc::RELU:
+ return circle::ActivationFunctionType_RELU;
+ case luci::FusedActFunc::RELU_N1_TO_1:
+ return circle::ActivationFunctionType_RELU_N1_TO_1;
+ case luci::FusedActFunc::RELU6:
+ return circle::ActivationFunctionType_RELU6;
+ default:
+ INTERNAL_EXN_V("trying to convert unsupported luci::FusedActFunc", oops::to_uint32(func));
+ }
+}
+
+circle::TensorType to_circle_tensortype(loco::DataType type)
+{
+ switch (type)
+ {
+ case loco::DataType::U8:
+ return circle::TensorType_UINT8;
+
+ case loco::DataType::S8:
+ return circle::TensorType_INT8;
+ case loco::DataType::S16:
+ return circle::TensorType_INT16;
+ case loco::DataType::S32:
+ return circle::TensorType_INT32;
+ case loco::DataType::S64:
+ return circle::TensorType_INT64;
+
+ case loco::DataType::FLOAT16:
+ return circle::TensorType_FLOAT16;
+ case loco::DataType::FLOAT32:
+ return circle::TensorType_FLOAT32;
+
+ case loco::DataType::BOOL:
+ return circle::TensorType_BOOL;
+
+ default:
+ INTERNAL_EXN_V("failed to convert unsupported loco::DataType", oops::to_uint32(type));
+ }
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+uint32_t SerializedModelData::registerBuiltinOpcode(circle::BuiltinOperator builtin_code)
+{
+ auto it = _operator_codes.find(OpCode{builtin_code});
+ if (it != _operator_codes.end())
+ {
+ return it->second;
+ }
+ auto idx = static_cast<uint32_t>(_operator_codes.size());
+ _operator_codes.emplace(OpCode{builtin_code}, idx);
+ return idx;
+}
+
+uint32_t SerializedModelData::registerCustomOpcode(const std::string &custom_op)
+{
+ circle::BuiltinOperator custom_code = circle::BuiltinOperator_CUSTOM;
+ auto idx = registerBuiltinOpcode(custom_code);
+ _custom_operator_codes.emplace(OpCode{custom_code}, custom_op);
+ return idx;
+}
+
+circle::Padding getOpPadding(const loco::Padding2D *pad, const loco::Stride<2> *stride,
+ const ShapeDescription &ifm, const ShapeDescription &ofm)
+{
+ // VALID padding
+ if (pad->top() == 0 && pad->bottom() == 0 && pad->left() == 0 && pad->right() == 0)
+ return circle::Padding_VALID;
+
+ // SAME padding
+ //
+ // For same padding, by definition, following equation should hold:
+ // O = floor((I - 1) / S) + 1
+ // where input size I, output size O, stride S
+ //
+ // NOTE input and output 'feature' map are shape of NHWC
+ bool same_padding_criterion_1 =
+ (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
+ (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
+
+ // For same padding, rear padding is same or bigger than front padding by at most 1
+ bool same_padding_criterion_2 =
+ (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
+ (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
+
+ if (same_padding_criterion_1 && same_padding_criterion_2)
+ return circle::Padding_SAME;
+
+ INTERNAL_EXN("Unsupported padding criteria");
+}
+
+circle::Padding getOpPadding(const luci::Padding pad)
+{
+ if (pad == luci::Padding::VALID)
+ return circle::Padding_VALID;
+ if (pad == luci::Padding::SAME)
+ return circle::Padding_SAME;
+
+ INTERNAL_EXN_V("Unsupported luci::Padding", oops::to_uint32(pad));
+}
+
+namespace
+{
+
+class CircleTensorIndexAnnotation final : public loco::NodeAnnotation
+{
+public:
+ CircleTensorIndexAnnotation(const CircleTensorIndex &index) : _index{index}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const CircleTensorIndex &index(void) const { return _index; }
+
+private:
+ CircleTensorIndex _index;
+};
+
+} // namespace
+
+void set_tensor_index(loco::Node *node, const CircleTensorIndex &tensor_id)
+{
+ assert(node->annot<CircleTensorIndexAnnotation>() == nullptr);
+ node->annot(std::make_unique<CircleTensorIndexAnnotation>(tensor_id));
+}
+
+CircleTensorIndex get_tensor_index(loco::Node *node)
+{
+ assert(node->annot<CircleTensorIndexAnnotation>() != nullptr);
+ return node->annot<CircleTensorIndexAnnotation>()->index();
+}
+
+} // namespace luci
diff --git a/compiler/luci/export/src/CircleExporterUtils.h b/compiler/luci/export/src/CircleExporterUtils.h
new file mode 100644
index 000000000..6b970fd3c
--- /dev/null
+++ b/compiler/luci/export/src/CircleExporterUtils.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EXPORTER_UTILS_H__
+#define __CIRCLE_EXPORTER_UTILS_H__
+
+#include "SerializedData.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/ShapeDescription.h>
+
+#include <loco.h>
+
+#include <mio/circle/schema_generated.h>
+
+namespace luci
+{
+
+circle::ActivationFunctionType to_circle_actfunc(luci::FusedActFunc func);
+circle::TensorType to_circle_tensortype(loco::DataType type);
+
+} // namespace luci
+
+namespace luci
+{
+
+circle::Padding getOpPadding(const loco::Padding2D *pad, const loco::Stride<2> *stride,
+ const ShapeDescription &ifm, const ShapeDescription &ofm);
+circle::Padding getOpPadding(const luci::Padding pad);
+
+using CircleTensorIndex = int32_t;
+
+void set_tensor_index(loco::Node *node, const CircleTensorIndex &tensor_id);
+CircleTensorIndex get_tensor_index(loco::Node *node);
+
+} // namespace luci
+
+#endif // __CIRCLE_EXPORTER_UTILS_H__
diff --git a/compiler/luci/export/src/CircleOperationExporter.cpp b/compiler/luci/export/src/CircleOperationExporter.cpp
new file mode 100644
index 000000000..ad9c7fd4b
--- /dev/null
+++ b/compiler/luci/export/src/CircleOperationExporter.cpp
@@ -0,0 +1,643 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleOperationExporter.h"
+#include "CircleExporterUtils.h"
+#include "Check.h"
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/CanonicalNodeVisitor.h>
+#include <oops/InternalExn.h>
+
+#include <flatbuffers/flexbuffers.h>
+
+using namespace flatbuffers;
+using namespace circle;
+
+namespace
+{
+
+using namespace luci;
+
+class OperationExporter final : public luci::CircleNodeMutableVisitor<void>,
+ public loco::CanonicalNodeMutableVisitor<void>
+{
+public:
+ OperationExporter(FlatBufferBuilder &fbb, SerializedModelData &m, SerializedGraphData &g)
+ : builder{fbb}, md{m}, gd{g}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void visit(luci::CircleAbs *) final;
+ void visit(luci::CircleAdd *) final;
+ void visit(luci::CircleArgMax *) final;
+ void visit(luci::CircleAveragePool2D *) final;
+ void visit(luci::CircleBatchToSpaceND *) final;
+ void visit(luci::CircleConcatenation *) final;
+ void visit(luci::CircleConst *) final{/* skip, everything is done in exportOpDefinedTensors */};
+ void visit(luci::CircleConv2D *) final;
+ void visit(luci::CircleCos *) final;
+ void visit(luci::CircleDepthwiseConv2D *) final;
+ void visit(luci::CircleDiv *) final;
+ void visit(luci::CircleExp *) final;
+ void visit(luci::CircleEqual *) final;
+ void visit(luci::CircleFullyConnected *) final;
+ void visit(luci::CircleLogicalNot *) final;
+ void visit(luci::CircleLogicalOr *) final;
+ void visit(luci::CircleMaximum *) final;
+ void visit(luci::CircleMaxPool2D *) final;
+ void visit(luci::CircleMean *) final;
+ void visit(luci::CircleMul *) final;
+ void visit(luci::CirclePack *) final;
+ void visit(luci::CirclePad *) final;
+ void visit(luci::CircleRelu *) final;
+ void visit(luci::CircleRelu6 *) final;
+ void visit(luci::CircleReshape *) final;
+ void visit(luci::CircleRsqrt *) final;
+ void visit(luci::CircleSoftmax *) final;
+ void visit(luci::CircleSqrt *) final;
+ void visit(luci::CircleSquaredDifference *) final;
+ void visit(luci::CircleSub *) final;
+ // TODO CircleTanh
+ void visit(luci::CircleTranspose *) final;
+ void visit(luci::CircleTransposeConv *) final;
+ // Circle only
+ void visit(luci::CircleInstanceNorm *) final;
+ // Virtual
+ void visit(luci::CircleInput *) final {}
+ void visit(luci::CircleOutput *) final {}
+
+private:
+ /**
+ * @brief Exports CircleMaxPool2D or CircleAveragePool2D
+ *
+ * @note CirclePool2D should be one of CircleMaxPool2D or CircleAveragePool2D
+ */
+ template <class CirclePool2D>
+ void export_pool_2d(CirclePool2D *node, circle::BuiltinOperator builtin_op);
+
+private:
+ FlatBufferBuilder &builder;
+ SerializedModelData &md;
+ SerializedGraphData &gd;
+};
+
+template <class CirclePool2D>
+void OperationExporter::export_pool_2d(CirclePool2D *node, circle::BuiltinOperator builtin_op)
+{
+ LUCI_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D ||
+ builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
+ "Should be MaxPool or AvgPool");
+ LUCI_ASSERT(node->padding() != luci::Padding::UNDEFINED, "Padding is not set");
+
+ uint32_t op_idx = md.registerBuiltinOpcode(builtin_op);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+
+ circle::Padding padding = getOpPadding(node->padding());
+
+ auto options = CreatePool2DOptions(builder, padding, node->stride()->w(), node->stride()->h(),
+ node->filter()->w(), node->filter()->h(),
+ to_circle_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_Pool2DOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleAbs *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_ABS);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateAbsOptions(builder);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_AbsOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleAdd *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_ADD);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateAddOptions(builder, to_circle_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_AddOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleArgMax *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_ARG_MAX);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
+ get_tensor_index(node->dimension())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateArgMaxOptions(builder, to_circle_tensortype(node->output_type()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_ArgMaxOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleAveragePool2D *node)
+{
+ export_pool_2d<luci::CircleAveragePool2D>(node, circle::BuiltinOperator_AVERAGE_POOL_2D);
+}
+
+void OperationExporter::visit(luci::CircleConcatenation *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION);
+ std::vector<int32_t> inputs_vec;
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+
+ for (uint32_t i = 0; i < node->numValues(); ++i)
+ inputs_vec.push_back(get_tensor_index(node->values(i)));
+
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateConcatenationOptions(builder, node->axis(),
+ to_circle_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_ConcatenationOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleBatchToSpaceND *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_BATCH_TO_SPACE_ND);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
+ get_tensor_index(node->block_shape()),
+ get_tensor_index(node->crops())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateBatchToSpaceNDOptions(builder);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_BatchToSpaceNDOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleConv2D *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_CONV_2D);
+
+ // Make input, output and options for operator
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->filter()),
+ get_tensor_index(node->bias())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ circle::Padding padding = getOpPadding(node->padding());
+ auto options = CreateConv2DOptions(builder, padding, node->stride()->w(), node->stride()->h(),
+ to_circle_actfunc(node->fusedActivationFunction()));
+
+ // Make CONV_2D operator
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_Conv2DOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleCos *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_COS);
+
+ // Make input, output and options for operator
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateCosOptions(builder);
+
+ // Make COS operator
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_CosOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleDepthwiseConv2D *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_DEPTHWISE_CONV_2D);
+
+ // Make input, output and options for operator
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->filter()),
+ get_tensor_index(node->bias())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ circle::Padding padding = getOpPadding(node->padding());
+ auto options = CreateDepthwiseConv2DOptions(builder, padding, node->stride()->w(),
+ node->stride()->h(), node->depthMultiplier(),
+ to_circle_actfunc(node->fusedActivationFunction()));
+
+ // Make DEPTHWISE_CONV_2D operator
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_DepthwiseConv2DOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleDiv *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_DIV);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateDivOptions(builder, to_circle_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_DivOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleExp *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_EXP);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateAbsOptions(builder);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_ExpOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleFullyConnected *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_FULLY_CONNECTED);
+
+ // Make input, output and options for operator
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
+ get_tensor_index(node->weights()),
+ get_tensor_index(node->bias())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options =
+ CreateFullyConnectedOptions(builder, to_circle_actfunc(node->fusedActivationFunction()));
+
+ // Make FULLY_CONNECTED operator
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_FullyConnectedOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleLogicalNot *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_LOGICAL_NOT);
+
+ // Make input, output and options for operator
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateLogicalNotOptions(builder);
+
+ // Make LOGICAL_NOT operator
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_LogicalNotOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleLogicalOr *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_LOGICAL_OR);
+
+ // Make input, output and options for operator
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateLogicalOrOptions(builder);
+
+ // Make LOGICAL_OR operator
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_LogicalOrOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleMaximum *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_MAXIMUM);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateMaximumMinimumOptions(builder);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_MaximumMinimumOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleMaxPool2D *node)
+{
+ export_pool_2d<luci::CircleMaxPool2D>(node, circle::BuiltinOperator_MAX_POOL_2D);
+}
+
+void OperationExporter::visit(luci::CircleMean *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_MEAN);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
+ get_tensor_index(node->reduction_indices())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateReducerOptions(builder, node->keep_dims());
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_ReducerOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleMul *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_MUL);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateMulOptions(builder, to_circle_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_MulOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CirclePack *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_PACK);
+ std::vector<int32_t> inputs_vec;
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+
+ for (uint32_t i = 0; i < node->values_count(); ++i)
+ inputs_vec.push_back(get_tensor_index(node->values(i)));
+
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreatePackOptions(builder, node->values_count(), node->axis());
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_PackOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CirclePad *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_PAD);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
+ get_tensor_index(node->paddings())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreatePadOptions(builder);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_PadOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleRelu *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_RELU);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->features())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleRelu6 *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_RELU6);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->features())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleReshape *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_RESHAPE);
+
+ // Create inputs and outputs.
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->tensor()),
+ get_tensor_index(node->shape())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(node)};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+
+ // Create options.
+ auto new_shape = builder.CreateVector<int32_t>(
+ node->newShape()->rank(), [node](size_t i) { return node->newShape()->dim(i); });
+ auto options = CreateReshapeOptions(builder, new_shape);
+
+ // Create the operator.
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_ReshapeOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleRsqrt *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_RSQRT);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleSoftmax *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_SOFTMAX);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->logits())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateSoftmaxOptions(builder, node->beta());
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_SoftmaxOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleSqrt *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_SQRT);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleSquaredDifference *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_SQUARED_DIFFERENCE);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateSquaredDifferenceOptions(builder);
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_SquaredDifferenceOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleSub *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_SUB);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateSubOptions(builder, to_circle_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_SubOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+// TODO CircleTanh
+
+void OperationExporter::visit(luci::CircleTranspose *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_TRANSPOSE);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->arg(0)), get_tensor_index(node->arg(1))};
+ std::vector<int32_t> outputs_vec{get_tensor_index(node)};
+
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateTransposeOptions(builder);
+
+ auto op_offset =
+ CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions::BuiltinOptions_TransposeOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleTransposeConv *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_TRANSPOSE_CONV);
+
+ // Make input, output and options for operator
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->inputSizes()),
+ get_tensor_index(node->filter()),
+ get_tensor_index(node->outBackprop())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ circle::Padding padding = getOpPadding(node->padding());
+ auto options =
+ CreateTransposeConvOptions(builder, padding, node->stride()->w(), node->stride()->h());
+
+ // Make TRANSPOSE_CONV operator
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_TransposeConvOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleInstanceNorm *node)
+{
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_INSTANCE_NORM);
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->gamma()),
+ get_tensor_index(node->beta())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateInstanceNormOptions(builder, node->epsilon(),
+ to_circle_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_InstanceNormOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
+void OperationExporter::visit(luci::CircleEqual *node)
+{
+ uint32_t opcode_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_EQUAL);
+ std::vector<int32_t> inputs{get_tensor_index(node->x()), get_tensor_index(node->y())};
+ std::vector<int32_t> outputs{get_tensor_index(node)};
+
+ auto fb_inputs = builder.CreateVector(inputs);
+ auto fb_outputs = builder.CreateVector(outputs);
+
+ auto options = CreateEqualOptions(builder);
+
+ auto op_offset = CreateOperator(builder, opcode_idx, fb_inputs, fb_outputs,
+ circle::BuiltinOptions_EqualOptions, options.Union());
+
+ gd._operators.push_back(op_offset);
+}
+
+void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md,
+ SerializedGraphData &gd)
+{
+ // TODO Use explicit tagging to prevent possible mistake
+ auto isNoOp = [](loco::Node *node) {
+ // If there is only one input and the TensorIndex for the input is same
+ // as the TensorIndex of the output then this node is just a dummy node
+ if (node->arity() == 1)
+ {
+ assert(node->arg(0) != nullptr);
+ return get_tensor_index(node) == get_tensor_index(node->arg(0));
+ }
+ return false;
+ };
+
+ if (isNoOp(node))
+ {
+ // Skip if a given node is marked as NoOp (op with no effect) before
+ return;
+ }
+
+ if (auto circle_node = dynamic_cast<luci::CircleNode *>(node))
+ {
+ OperationExporter exporter{builder, md, gd};
+ circle_node->accept(&exporter);
+ }
+ else
+ {
+ INTERNAL_EXN("Node with unsupported dialect found");
+ }
+}
+
+} // namespace
+
+namespace luci
+{
+
+void exportNodes(loco::Graph *g, FlatBufferBuilder &builder, SerializedModelData &md,
+ SerializedGraphData &gd)
+{
+ for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+ {
+ exportNode(node, builder, md, gd);
+ }
+}
+
+} // namespace luci
diff --git a/compiler/luci/export/src/CircleOperationExporter.h b/compiler/luci/export/src/CircleOperationExporter.h
new file mode 100644
index 000000000..de6abfc54
--- /dev/null
+++ b/compiler/luci/export/src/CircleOperationExporter.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_OPERATION_EXPORTER_H__
+#define __CIRCLE_OPERATION_EXPORTER_H__
+
+#include "CircleExporterUtils.h"
+
+#include <loco/IR/Graph.h>
+
+namespace luci
+{
+
+/**
+ * @brief create Operators corresponding to model nodes
+ * @param nodes container with nodes
+ * @param gd information about serializer parts of model
+ */
+void exportNodes(loco::Graph *g, flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md,
+ SerializedGraphData &gd);
+
+} // namespace luci
+
+#endif // __CIRCLE_OPERATION_EXPORTER_H__
diff --git a/compiler/luci/export/src/CircleTensorExporter.cpp b/compiler/luci/export/src/CircleTensorExporter.cpp
new file mode 100644
index 000000000..ef9b9d7d9
--- /dev/null
+++ b/compiler/luci/export/src/CircleTensorExporter.cpp
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleTensorExporter.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Service/CircleTypeInference.h>
+#include <luci/Service/CircleShapeInference.h>
+#include <luci/Log.h>
+
+#include <loco/IR/Algorithm.h>
+#include <loco/IR/CanonicalNode.h>
+#include <loco/IR/CanonicalNodeVisitor.h>
+#include <loco/IR/DataTypeTraits.h>
+#include <oops/InternalExn.h>
+
+using namespace circle;
+using namespace flatbuffers;
+
+namespace
+{
+
+using namespace luci;
+
+class CircleTensoInfo
+{
+public:
+ CircleTensoInfo() = default;
+
+public:
+ void name(const std::string &name) { _name = name; }
+ const std::string &name(void) const { return _name; }
+
+public:
+ const circle::TensorType &dtype(void) const { return _dtype; }
+ void dtype(const circle::TensorType &dtype) { _dtype = dtype; }
+
+ const ShapeDescription &shape(void) const { return _shape; }
+ void shape(const ShapeDescription &shape) { _shape = shape; }
+
+public:
+ luci::CircleConst *content(void) const { return _content; }
+ void content(luci::CircleConst *c) { _content = c; }
+
+ luci::CircleQuantParam *quantparam(void) const { return _quantparam; }
+ void quantparam(luci::CircleQuantParam *qp) { _quantparam = qp; }
+
+private:
+ std::string _name;
+
+ circle::TensorType _dtype;
+ ShapeDescription _shape;
+
+ luci::CircleConst *_content = nullptr;
+ luci::CircleQuantParam *_quantparam = nullptr;
+};
+
+using CircleTensorContext = std::vector<CircleTensoInfo>;
+
+struct NoOpDetector final : public luci::CircleNodeMutableVisitor<bool>
+{
+ // Input is Virtual but does produce a Tensor
+ // Output is Virtual that does not produce any Tensor
+ bool visit(luci::CircleOutput *) final { return true; }
+
+ // Return false by default
+ bool visit(luci::CircleNode *) final { return false; }
+};
+
+void allocateCircleTensor(CircleNode *node, CircleTensorContext &ctx)
+{
+ LOGGER(l);
+
+ auto isNoOp = [](loco::Node *node) {
+ if (auto circle_node = dynamic_cast<luci::CircleNode *>(node))
+ {
+ NoOpDetector d;
+ return circle_node->accept(&d);
+ }
+ return false;
+ };
+
+ if (isNoOp(node))
+ {
+ set_tensor_index(node, get_tensor_index(node->arg(0)));
+ return;
+ }
+
+ auto tensor_index = static_cast<CircleTensorIndex>(ctx.size());
+ // TODO Use Graph-level metadata for Input & Output
+ // auto tensor_name = "t_" + std::to_string(tensor_index);
+ std::string tensor_name = node->name();
+ if (tensor_name.empty())
+ tensor_name = "t_" + std::to_string(tensor_index);
+ INFO(l) << "[luci] Tensor for " << tensor_name << ": " << tensor_index << std::endl;
+
+ CircleTensoInfo tensor_info;
+
+ tensor_info.name(tensor_name);
+ tensor_info.dtype(TypeInference::get(node));
+ tensor_info.shape(ShapeInference::get(node));
+
+ tensor_info.content(dynamic_cast<luci::CircleConst *>(node));
+ tensor_info.quantparam(node->quantparam());
+
+ set_tensor_index(node, tensor_index);
+
+ ctx.emplace_back(tensor_info);
+}
+
+} // namespace
+
+namespace
+{
+
+flatbuffers::Offset<Vector<int32_t>> encodeShape(FlatBufferBuilder &builder,
+ const ShapeDescription &shape)
+{
+ assert(shape._rank_known && "unknown number of dimensions is not supported");
+ return builder.CreateVector(shape._dims);
+}
+
+flatbuffers::Offset<circle::Buffer> encodeOpBuffer(FlatBufferBuilder &builder)
+{
+ return CreateBuffer(builder);
+}
+
+template <typename NodeT>
+flatbuffers::Offset<circle::Buffer> encodeOpBuffer(FlatBufferBuilder &builder, NodeT *)
+{
+ return CreateBuffer(builder);
+}
+
+template <loco::DataType DT>
+flatbuffers::Offset<circle::Buffer> encodeOpBufferByDType(FlatBufferBuilder &builder,
+ luci::CircleConst *c)
+{
+ using NativeType = typename loco::DataTypeImpl<DT>::Type;
+
+ std::vector<NativeType> raw_data;
+ const uint32_t size = c->size<DT>();
+ raw_data.reserve(size);
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ raw_data.push_back(c->at<DT>(i));
+ }
+ const size_t raw_size = size * sizeof(NativeType);
+ auto array_offset = builder.CreateVector(reinterpret_cast<uint8_t *>(raw_data.data()), raw_size);
+ return CreateBuffer(builder, array_offset);
+}
+
+template <>
+flatbuffers::Offset<circle::Buffer> encodeOpBuffer(FlatBufferBuilder &builder, luci::CircleConst *c)
+{
+ // TODO use switch
+ if (c->dtype() == loco::DataType::FLOAT32)
+ {
+ return encodeOpBufferByDType<loco::DataType::FLOAT32>(builder, c);
+ }
+ else if (c->dtype() == loco::DataType::S32)
+ {
+ return encodeOpBufferByDType<loco::DataType::S32>(builder, c);
+ }
+ else if (c->dtype() == loco::DataType::U8)
+ {
+ return encodeOpBufferByDType<loco::DataType::U8>(builder, c);
+ }
+
+ INTERNAL_EXN_V("Unsupported datatype", oops::to_uint32(c->dtype()));
+}
+
+flatbuffers::Offset<circle::QuantizationParameters>
+encodeQuantizationParameters(FlatBufferBuilder &builder, luci::CircleQuantParam *quantparam)
+{
+ if (quantparam == nullptr)
+ return 0;
+
+ flatbuffers::Offset<flatbuffers::Vector<float>> min;
+ flatbuffers::Offset<flatbuffers::Vector<float>> max;
+ flatbuffers::Offset<flatbuffers::Vector<float>> scale;
+ flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point;
+ if (quantparam->min.size() && quantparam->max.size())
+ {
+ min = builder.CreateVector(quantparam->min);
+ max = builder.CreateVector(quantparam->max);
+ }
+ if (quantparam->scale.size() && quantparam->zerop.size())
+ {
+ scale = builder.CreateVector(quantparam->scale);
+ zero_point = builder.CreateVector(quantparam->zerop);
+ }
+ return circle::CreateQuantizationParameters(builder, min, max, scale, zero_point);
+}
+
+void exportOpDefinedTensor(const CircleTensoInfo &info, FlatBufferBuilder &builder,
+ SerializedModelData &md, SerializedGraphData &gd)
+{
+ // Create and register output tensor shape
+ auto shape_offset = encodeShape(builder, info.shape());
+
+ // encode and register output tensor buffer
+ auto buffer =
+ info.content() == nullptr ? encodeOpBuffer(builder) : encodeOpBuffer(builder, info.content());
+
+ auto quantparam = encodeQuantizationParameters(builder, info.quantparam());
+
+ auto buffer_id = static_cast<uint32_t>(md._buffers.size());
+ md._buffers.push_back(buffer);
+
+ auto name_offset = builder.CreateString(info.name());
+ auto tensor_offset = CreateTensor(builder, shape_offset, info.dtype(), buffer_id, name_offset,
+ quantparam, /*is_variable*/ false);
+ gd._tensors.push_back(tensor_offset);
+}
+
+} // namespace
+
+namespace luci
+{
+
+void prepareModelData(FlatBufferBuilder &builder, SerializedModelData &md)
+{
+ // add one empty buffer
+ // note: this follows TFLite
+ // note: there's a comment in tflite fbs file
+ // - Note the 0th entry of this array must be an empty buffer (sentinel).
+ // - This is a convention so that tensors without a buffer can provide 0 as
+ // - their buffer.
+ auto buffer = encodeOpBuffer(builder);
+ md._buffers.push_back(buffer);
+}
+
+void exportOpDefinedTensors(loco::Graph *g, FlatBufferBuilder &builder, SerializedModelData &md,
+ SerializedGraphData &gd)
+{
+ CircleTensorContext tensor_ctx;
+
+ for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+ {
+ CircleNode *circle_node = dynamic_cast<luci::CircleNode *>(node);
+ allocateCircleTensor(circle_node, tensor_ctx);
+ }
+
+ for (const auto &tensor_info : tensor_ctx)
+ {
+ exportOpDefinedTensor(tensor_info, builder, md, gd);
+ }
+}
+
+} // namespace luci
diff --git a/compiler/luci/export/src/CircleTensorExporter.h b/compiler/luci/export/src/CircleTensorExporter.h
new file mode 100644
index 000000000..f9d6107b4
--- /dev/null
+++ b/compiler/luci/export/src/CircleTensorExporter.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_TENSOR_EXPORTER_H__
+#define __CIRCLE_TENSOR_EXPORTER_H__
+
+#include "CircleExporterUtils.h"
+
+#include <loco/IR/Graph.h>
+
+#include <flatbuffers/flatbuffers.h>
+
+namespace luci
+{
+
+/**
+ * @brief one time preparation for SerializedModelData
+ */
+void prepareModelData(flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md);
+
+/**
+ * @brief create Tensors corresponding to results of all nodes in graph
+ * @param computational graph
+ * @param gd information about serialized parts of model
+ */
+void exportOpDefinedTensors(loco::Graph *g, flatbuffers::FlatBufferBuilder &builder,
+ SerializedModelData &md, SerializedGraphData &gd);
+
+} // namespace luci
+
+#endif // __CIRCLE_TENSOR_EXPORTER_H__
diff --git a/compiler/luci/export/src/Optimize.cpp b/compiler/luci/export/src/Optimize.cpp
new file mode 100644
index 000000000..6fa50b564
--- /dev/null
+++ b/compiler/luci/export/src/Optimize.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Optimize.h"
+#include "ProgressReporter.h"
+
+#include <luci/Pass/ShapeInferencePass.h>
+#include <luci/Pass/TypeInferencePass.h>
+
+#include <logo/Phase.h>
+
+#include <memory>
+
+namespace luci
+{
+
+void optimize(loco::Graph *g)
+{
+ logo::Phase phase;
+ {
+ // prepare type and shape before optimization
+ phase.emplace_back(std::make_unique<TypeInferencePass>());
+ phase.emplace_back(std::make_unique<ShapeInferencePass>());
+
+ // TODO add more optimization passes (with a knob)
+ }
+
+ logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
+
+ ProgressReporter prog(g, logo::PhaseStrategy::Restart);
+ phase_runner.attach(&prog);
+ phase_runner.run(phase);
+}
+
+} // namespace luci
diff --git a/compiler/luci/export/src/Optimize.h b/compiler/luci/export/src/Optimize.h
new file mode 100644
index 000000000..c3af7a04c
--- /dev/null
+++ b/compiler/luci/export/src/Optimize.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OPTIMIZE_H__
+#define __OPTIMIZE_H__
+
+#include <loco.h>
+
+namespace luci
+{
+
+/**
+ * @brief Run passes of graph transformations
+ *
+ */
+void optimize(loco::Graph *);
+
+} // namespace luci
+
+#endif // __OPTIMIZE_H__
diff --git a/compiler/luci/export/src/ProgressReporter.cpp b/compiler/luci/export/src/ProgressReporter.cpp
new file mode 100644
index 000000000..ac9c3d9a8
--- /dev/null
+++ b/compiler/luci/export/src/ProgressReporter.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ProgressReporter.h"
+
+#include "luci/Log.h"
+#include "luci/LogHelper.h"
+
+#include <logo/Phase.h>
+#include <logo/Pass.h>
+
+#include <cassert>
+
+namespace
+{
+
+char to_char(bool b) { return b ? 'Y' : 'N'; }
+
+const char *to_str(logo::PhaseStrategy s)
+{
+ switch (s)
+ {
+ case logo::PhaseStrategy::Saturate:
+ return "Saturate";
+ case logo::PhaseStrategy::Restart:
+ return "Restart";
+ }
+ assert(false);
+ return "";
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseBegin> *)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "==============================================================";
+ INFO(prime) << "luci::PhaseRunner<" << to_str(strategy()) << ">";
+ INFO(prime) << "Initial graph";
+ INFO(prime) << fmt(graph());
+}
+
+void ProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseEnd> *)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "luci::PhaseRunner<" << to_str(strategy()) << "> - done";
+}
+
+void ProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassBegin> *info)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "--------------------------------------------------------------";
+ INFO(prime) << "Before " << logo::pass_name(info->pass());
+}
+
+void ProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassEnd> *info)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "After " << logo::pass_name(info->pass())
+ << " (changed: " << to_char(info->changed()) << ")";
+ INFO(prime) << fmt(graph());
+}
+
+} // namespace luci
diff --git a/compiler/luci/export/src/ProgressReporter.h b/compiler/luci/export/src/ProgressReporter.h
new file mode 100644
index 000000000..e91f42592
--- /dev/null
+++ b/compiler/luci/export/src/ProgressReporter.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PROGRESSREPORTER_H__
+#define __PROGRESSREPORTER_H__
+
+#include <logo/Phase.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+class ProgressReporter : public logo::PhaseEventListener
+{
+public:
+ ProgressReporter(loco::Graph *graph, logo::PhaseStrategy strategy)
+ : _graph{graph}, _strategy{strategy}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseBegin> *) override;
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseEnd> *) override;
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassBegin> *) override;
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassEnd> *) override;
+
+public:
+ loco::Graph *graph(void) const { return _graph; }
+ logo::PhaseStrategy strategy(void) const { return _strategy; }
+
+private:
+ loco::Graph *_graph;
+ logo::PhaseStrategy _strategy;
+};
+
+} // namespace luci
+
+#endif // __PROGRESSREPORTER_H__
diff --git a/compiler/luci/export/src/SerializedData.h b/compiler/luci/export/src/SerializedData.h
new file mode 100644
index 000000000..84249653c
--- /dev/null
+++ b/compiler/luci/export/src/SerializedData.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SERIALIZED_DATA_H__
+#define __SERIALIZED_DATA_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <vector>
+
+#include <unordered_map>
+
+namespace luci
+{
+
+struct OpCode
+{
+ circle::BuiltinOperator opcode;
+
+ bool operator==(const OpCode &rhs) const { return opcode == rhs.opcode; }
+};
+
+} // namespace luci
+
+namespace std
+{
+
+template <> struct hash<luci::OpCode>
+{
+ size_t operator()(const luci::OpCode &x) const { return hash<int>()(x.opcode); }
+};
+
+} // namespace std
+
+namespace luci
+{
+
+/**
+ * @breif Record the information of T/F Lite SubGraph and its mapping to loco
+ */
+struct SubGraphContext
+{
+ /// @brief SubGraph input tensor id
+ std::vector<int32_t> _inputs;
+ /// @brief SubGraph output tensor id
+ std::vector<int32_t> _outputs;
+ /// @DataFormat for SubGraph
+ circle::DataFormat _data_format{circle::DataFormat::DataFormat_CHANNELS_LAST};
+};
+
+// Prerequisites for circle::Model object creation
+struct SerializedModelData final
+{
+ SerializedModelData() = default;
+ SerializedModelData(const SerializedModelData &) = delete;
+
+ std::unordered_map<OpCode, uint32_t> _operator_codes;
+ std::unordered_map<OpCode, std::string> _custom_operator_codes;
+ std::vector<flatbuffers::Offset<circle::Buffer>> _buffers;
+
+ /**
+ * @brief if opcode is not registered in table of opcodes add it
+ * @param builtin_code
+ * @return idx of opcode in table of opcodes (see schema)
+ */
+ uint32_t registerBuiltinOpcode(circle::BuiltinOperator builtin_code);
+ uint32_t registerCustomOpcode(const std::string &custom_op);
+};
+
+// Prerequisites for circle::Model object creation
+struct SerializedGraphData final : public SubGraphContext
+{
+ SerializedGraphData() = default;
+ SerializedGraphData(const SerializedModelData &) = delete;
+
+ std::vector<flatbuffers::Offset<circle::Operator>> _operators;
+ std::vector<flatbuffers::Offset<circle::Tensor>> _tensors;
+};
+
+} // namespace luci
+
+#endif // __SERIALIZED_DATA_H__
diff --git a/compiler/luci/import/CMakeLists.txt b/compiler/luci/import/CMakeLists.txt
new file mode 100644
index 000000000..bc9a9152a
--- /dev/null
+++ b/compiler/luci/import/CMakeLists.txt
@@ -0,0 +1,26 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(luci_import SHARED ${SOURCES})
+target_include_directories(luci_import PRIVATE src)
+target_include_directories(luci_import PUBLIC include)
+target_link_libraries(luci_import PUBLIC luci_lang)
+target_link_libraries(luci_import PUBLIC mio_circle)
+target_link_libraries(luci_import PRIVATE luci_log)
+target_link_libraries(luci_import PRIVATE luci_logex)
+target_link_libraries(luci_import PRIVATE nncc_common)
+target_link_libraries(luci_import PRIVATE locop)
+target_link_libraries(luci_import PRIVATE oops)
+install(TARGETS luci_import DESTINATION lib)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_import_test ${TESTS})
+target_include_directories(luci_import_test PRIVATE src)
+target_link_libraries(luci_import_test luci_import)
+target_link_libraries(luci_import_test oops)
diff --git a/compiler/luci/import/README.md b/compiler/luci/import/README.md
new file mode 100644
index 000000000..4ae81ff67
--- /dev/null
+++ b/compiler/luci/import/README.md
@@ -0,0 +1,3 @@
+# luci-import
+
+_luci-import_ provides importing Circle model file to _loco_ graph of _luci_ Circle Dialect IR
diff --git a/compiler/luci/import/include/luci/Import/CircleReader.h b/compiler/luci/import/include/luci/Import/CircleReader.h
new file mode 100644
index 000000000..fcbe09ceb
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/CircleReader.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_GRAPHREADER_H__
+#define __LUCI_IMPORT_GRAPHREADER_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <luci/IR/AttrFusedActFunc.h>
+#include <luci/IR/AttrPadding.h>
+#include <luci/IR/CircleQuantParam.h>
+
+#include <loco.h>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace luci
+{
+
+bool is_valid(const circle::OperatorCodeT &opcode);
+bool is_custom(const circle::OperatorCodeT &opcode);
+std::string opcode_name(const circle::OperatorCodeT &opcode);
+const char *tensor_name(const circle::TensorT &tensor);
+const circle::QuantizationParametersT *tensor_quantization(const circle::TensorT &tensor);
+
+loco::DataType luci_datatype(circle::TensorType type);
+FusedActFunc luci_actfunc(const circle::ActivationFunctionType type);
+Padding luci_padding(const circle::Padding padding);
+std::unique_ptr<CircleQuantParam>
+luci_quantparam(const circle::QuantizationParametersT *quantization);
+
+/**
+ * @brief Loads Circle file and provides helpers to access attributes
+ */
+class CircleReader
+{
+private:
+ using CircleBuffers_t = std::vector<std::unique_ptr<circle::BufferT>>;
+ using CircleTensors_t = std::vector<std::unique_ptr<circle::TensorT>>;
+ using CircleOperators_t = std::vector<std::unique_ptr<circle::OperatorT>>;
+ using CircleOperatorCodes_t = std::vector<std::unique_ptr<circle::OperatorCodeT>>;
+
+public:
+ CircleReader() = default;
+
+public:
+ const CircleOperatorCodes_t &opcodes() const { return _model->operator_codes; }
+ const CircleBuffers_t &buffers() const { return _model->buffers; }
+ const CircleTensors_t &tensors() const { return _current_subgraph->tensors; }
+ const CircleOperators_t &operators() const { return _current_subgraph->operators; }
+ const std::vector<int32_t> &inputs() const { return _current_subgraph->inputs; }
+ const std::vector<int32_t> &outputs() const { return _current_subgraph->outputs; }
+ const std::string &name() const { return _current_subgraph->name; }
+
+ uint32_t num_subgraph() const { return _model->subgraphs.size(); }
+
+ circle::BuiltinOperator builtin_code(const circle::OperatorT &op) const;
+ std::string opcode_name(const circle::OperatorT &op) const;
+
+public:
+ bool parse(const circle::Model *model);
+ bool select_subgraph(uint32_t subgraph);
+
+private:
+ std::unique_ptr<const circle::ModelT> _model;
+ const circle::SubGraphT *_current_subgraph{nullptr};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_GRAPHREADER_H__
diff --git a/compiler/luci/import/include/luci/Import/GraphBuilder.h b/compiler/luci/import/include/luci/Import/GraphBuilder.h
new file mode 100644
index 000000000..61f673fb6
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/GraphBuilder.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_GRAPH_BUILDER_H__
+#define __LUCI_IMPORT_GRAPH_BUILDER_H__
+
+#include "GraphBuilderContext.h"
+
+#include <mio/circle/schema_generated.h>
+
+namespace luci
+{
+
+/**
+ * @brief Interface of convert circle:: NodeDef to loco::Node (e.g., Conv2DGraphBuilder)
+ */
+class GraphBuilder
+{
+public:
+ struct ValidateArgs
+ {
+ ValidateArgs(const circle::OperatorT &o, const CircleReader &r) : op(o), reader(r) {}
+
+ const circle::OperatorT &op;
+ const CircleReader &reader;
+ };
+
+public:
+ virtual ~GraphBuilder() = default;
+
+ virtual bool validate(const ValidateArgs &) const = 0;
+
+ void build(const circle::OperatorT &op, GraphBuilderContext *context) const;
+
+private:
+ virtual CircleNode *build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const = 0;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_GRAPH_BUILDER_H__
diff --git a/compiler/luci/import/include/luci/Import/GraphBuilderContext.h b/compiler/luci/import/include/luci/Import/GraphBuilderContext.h
new file mode 100644
index 000000000..8d464181d
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/GraphBuilderContext.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_GRAPH_BUILDER_CONTEXT_H__
+#define __LUCI_IMPORT_GRAPH_BUILDER_CONTEXT_H__
+
+#include "CircleReader.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <loco.h>
+
+#include <map>
+
+namespace luci
+{
+
+using TensorIndex = int32_t;
+
+/*
+ * @brief Tensor Index to CircleNode
+ * To find CircleNode from TensorIndex
+ */
+class IndexNodeFinder
+{
+public:
+ void enroll(TensorIndex idx, CircleNode *node);
+
+ CircleNode *node(TensorIndex idx) const;
+
+private:
+ using MapIndexNode_t = std::map<TensorIndex, CircleNode *>;
+
+ MapIndexNode_t _table;
+};
+
+/**
+ * @brief Class to store context to build loco graph IR from TensorFlow
+ */
+class GraphBuilderContext
+{
+public:
+ GraphBuilderContext(loco::Graph *g, CircleReader *reader, IndexNodeFinder *nodefinder)
+ : _g(g), _reader(reader), _indexnodefinder(nodefinder)
+ {
+ // DO NOTHING
+ }
+
+ GraphBuilderContext(const GraphBuilderContext &) = delete;
+ GraphBuilderContext(GraphBuilderContext &&) = delete;
+
+public:
+ loco::Graph *graph() { return _g; }
+ CircleReader *reader() { return _reader; }
+
+ IndexNodeFinder *nodefinder() { return _indexnodefinder; }
+
+private:
+ loco::Graph *_g;
+ CircleReader *_reader;
+ IndexNodeFinder *_indexnodefinder;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_GRAPH_BUILDER_CONTEXT_H__
diff --git a/compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h b/compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h
new file mode 100644
index 000000000..99054e7b6
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_GRAPH_BUILDER_REGISTRY_H__
+#define __LUCI_IMPORT_GRAPH_BUILDER_REGISTRY_H__
+
+#include "GraphBuilder.h"
+
+#include <map>
+
+namespace luci
+{
+
+struct GraphBuilderSource
+{
+ virtual ~GraphBuilderSource() = default;
+
+ /**
+ * @brief Returns registered GraphBuilder pointer for operator (nullptr if not present)
+ */
+ virtual const GraphBuilder *lookup(const circle::BuiltinOperator &op) const = 0;
+};
+
+/**
+ * @brief Class to return graph builder for TF nodes
+ */
+class GraphBuilderRegistry final : public GraphBuilderSource
+{
+public:
+ GraphBuilderRegistry();
+
+public:
+ GraphBuilderRegistry(const GraphBuilderSource *parent) : _parent{parent}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Returns registered GraphBuilder pointer for operator or
+ * nullptr if not registered
+ */
+ const GraphBuilder *lookup(const circle::BuiltinOperator &op) const final
+ {
+ if (_builder_map.find(op) == _builder_map.end())
+ return (_parent == nullptr) ? nullptr : _parent->lookup(op);
+
+ return _builder_map.at(op).get();
+ }
+
+ static GraphBuilderRegistry &get()
+ {
+ static GraphBuilderRegistry me;
+ return me;
+ }
+
+public:
+ void add(const circle::BuiltinOperator op, std::unique_ptr<GraphBuilder> &&builder)
+ {
+ _builder_map[op] = std::move(builder);
+ }
+
+private:
+ const GraphBuilderSource *_parent = nullptr;
+
+private:
+ std::map<const circle::BuiltinOperator, std::unique_ptr<GraphBuilder>> _builder_map;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_GRAPH_BUILDER_REGISTRY_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes.h b/compiler/luci/import/include/luci/Import/Nodes.h
new file mode 100644
index 000000000..381d02b97
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_NODES_H__
+#define __LUCI_IMPORT_NODES_H__
+
+#include "Nodes/CircleAbs.h"
+#include "Nodes/CircleAdd.h"
+#include "Nodes/CircleArgMax.h"
+#include "Nodes/CircleAveragePool2D.h"
+#include "Nodes/CircleBatchToSpaceND.h"
+#include "Nodes/CircleConcatenation.h"
+#include "Nodes/CircleConst.h"
+#include "Nodes/CircleConv2D.h"
+#include "Nodes/CircleCos.h"
+#include "Nodes/CircleDepthwiseConv2D.h"
+#include "Nodes/CircleDiv.h"
+#include "Nodes/CircleEqual.h"
+#include "Nodes/CircleExp.h"
+#include "Nodes/CircleFullyConnected.h"
+#include "Nodes/CircleLogicalNot.h"
+#include "Nodes/CircleLogicalOr.h"
+#include "Nodes/CircleMaxPool2D.h"
+#include "Nodes/CircleMean.h"
+#include "Nodes/CircleMul.h"
+#include "Nodes/CirclePack.h"
+#include "Nodes/CirclePad.h"
+#include "Nodes/CircleRelu.h"
+#include "Nodes/CircleReshape.h"
+#include "Nodes/CircleRsqrt.h"
+#include "Nodes/CircleSoftmax.h"
+#include "Nodes/CircleSub.h"
+#include "Nodes/CircleTranspose.h"
+
+#endif // __LUCI_IMPORT_NODES_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleAbs.h b/compiler/luci/import/include/luci/Import/Nodes/CircleAbs.h
new file mode 100644
index 000000000..e0cec26d9
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleAbs.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_ABS_H__
+#define __LUCI_IMPORT_OP_CIRCLE_ABS_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleAbsGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_ABS_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleAdd.h b/compiler/luci/import/include/luci/Import/Nodes/CircleAdd.h
new file mode 100644
index 000000000..d852ee8b3
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleAdd.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_ADD_H__
+#define __LUCI_IMPORT_OP_CIRCLE_ADD_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleAddGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_ADD_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleArgMax.h b/compiler/luci/import/include/luci/Import/Nodes/CircleArgMax.h
new file mode 100644
index 000000000..dae4691dc
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleArgMax.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_ARGMAX_H__
+#define __LUCI_IMPORT_OP_CIRCLE_ARGMAX_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleArgMaxGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_ARGMAX_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleAveragePool2D.h b/compiler/luci/import/include/luci/Import/Nodes/CircleAveragePool2D.h
new file mode 100644
index 000000000..07f6565bc
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleAveragePool2D.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_AVERAGEPOOL2D_H__
+#define __LUCI_IMPORT_OP_CIRCLE_AVERAGEPOOL2D_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleAveragePool2DGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_AVERAGEPOOL2D_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleBatchToSpaceND.h b/compiler/luci/import/include/luci/Import/Nodes/CircleBatchToSpaceND.h
new file mode 100644
index 000000000..4168d248e
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleBatchToSpaceND.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_BATCHTOSPACEND_H__
+#define __LUCI_IMPORT_OP_CIRCLE_BATCHTOSPACEND_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleBatchToSpaceNDGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_BATCHTOSPACEND_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleConcatenation.h b/compiler/luci/import/include/luci/Import/Nodes/CircleConcatenation.h
new file mode 100644
index 000000000..9b4c9ffd1
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleConcatenation.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_CONCATENATION_H__
+#define __LUCI_IMPORT_OP_CIRCLE_CONCATENATION_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleConcatenationGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_CONCATENATION_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleConst.h b/compiler/luci/import/include/luci/Import/Nodes/CircleConst.h
new file mode 100644
index 000000000..7d4f10a59
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleConst.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_CONST_H__
+#define __LUCI_IMPORT_OP_CIRCLE_CONST_H__
+
+#include "luci/Import/GraphBuilderContext.h"
+
+#include <luci/IR/Nodes/CircleConst.h>
+
+/*
+ * @note Circle does not have Const operator.
+ * Methods here provide helper that creates CircleConst from
+ * Tensor and Buffer in circle flatbuffer file.
+ */
+
+namespace luci
+{
+
+CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_index);
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_CONST_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleConv2D.h b/compiler/luci/import/include/luci/Import/Nodes/CircleConv2D.h
new file mode 100644
index 000000000..4529a4f11
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleConv2D.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_CONV_2D_H__
+#define __LUCI_IMPORT_OP_CIRCLE_CONV_2D_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleConv2DGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_CONV_2D_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleCos.h b/compiler/luci/import/include/luci/Import/Nodes/CircleCos.h
new file mode 100644
index 000000000..fb472977e
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleCos.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_COS_H__
+#define __LUCI_IMPORT_OP_CIRCLE_COS_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleCosGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_COS_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleDepthwiseConv2D.h b/compiler/luci/import/include/luci/Import/Nodes/CircleDepthwiseConv2D.h
new file mode 100644
index 000000000..1953cb76c
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleDepthwiseConv2D.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_DEPTHWISECONV_2D_H__
+#define __LUCI_IMPORT_OP_CIRCLE_DEPTHWISECONV_2D_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleDepthwiseConv2DGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_DEPTHWISECONV_2D_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleDiv.h b/compiler/luci/import/include/luci/Import/Nodes/CircleDiv.h
new file mode 100644
index 000000000..6a38118fe
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleDiv.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_DIV_H__
+#define __LUCI_IMPORT_OP_CIRCLE_DIV_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleDivGraphBuilder : public GraphBuilder
+{
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const override;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_DIV_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleEqual.h b/compiler/luci/import/include/luci/Import/Nodes/CircleEqual.h
new file mode 100644
index 000000000..a98adcd08
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleEqual.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_EQUAL_H__
+#define __LUCI_IMPORT_OP_CIRCLE_EQUAL_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleEqualGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_EQUAL_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleExp.h b/compiler/luci/import/include/luci/Import/Nodes/CircleExp.h
new file mode 100644
index 000000000..521809fe4
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleExp.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_EXP_H__
+#define __LUCI_IMPORT_OP_CIRCLE_EXP_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleExpGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_EXP_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleFullyConnected.h b/compiler/luci/import/include/luci/Import/Nodes/CircleFullyConnected.h
new file mode 100644
index 000000000..b7798c688
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleFullyConnected.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_FULLYCONNECTED_H__
+#define __LUCI_IMPORT_OP_CIRCLE_FULLYCONNECTED_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleFullyConnectedGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_FULLYCONNECTED_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleLogicalNot.h b/compiler/luci/import/include/luci/Import/Nodes/CircleLogicalNot.h
new file mode 100644
index 000000000..ec890ecf7
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleLogicalNot.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_LOGICALNOT_H__
+#define __LUCI_IMPORT_OP_CIRCLE_LOGICALNOT_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleLogicalNotGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_LOGICALNOT_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleLogicalOr.h b/compiler/luci/import/include/luci/Import/Nodes/CircleLogicalOr.h
new file mode 100644
index 000000000..9fb0086c1
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleLogicalOr.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_LOGICALOR_H__
+#define __LUCI_IMPORT_OP_CIRCLE_LOGICALOR_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleLogicalOrGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_LOGICALOR_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleMaxPool2D.h b/compiler/luci/import/include/luci/Import/Nodes/CircleMaxPool2D.h
new file mode 100644
index 000000000..bcd2acb30
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleMaxPool2D.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_MAXPOOL2D_H__
+#define __LUCI_IMPORT_OP_CIRCLE_MAXPOOL2D_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleMaxPool2DGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_MAXPOOL2D_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleMean.h b/compiler/luci/import/include/luci/Import/Nodes/CircleMean.h
new file mode 100644
index 000000000..a7919a57c
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleMean.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_MEAN_H__
+#define __LUCI_IMPORT_OP_CIRCLE_MEAN_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleMeanGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_MEAN_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleMul.h b/compiler/luci/import/include/luci/Import/Nodes/CircleMul.h
new file mode 100644
index 000000000..13027a155
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleMul.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_MUL_H__
+#define __LUCI_IMPORT_OP_CIRCLE_MUL_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleMulGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const override;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_MUL_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CirclePack.h b/compiler/luci/import/include/luci/Import/Nodes/CirclePack.h
new file mode 100644
index 000000000..8e4b71995
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CirclePack.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_PACK_H__
+#define __LUCI_IMPORT_OP_CIRCLE_PACK_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CirclePackGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const override;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_PACK_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CirclePad.h b/compiler/luci/import/include/luci/Import/Nodes/CirclePad.h
new file mode 100644
index 000000000..e333ee912
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CirclePad.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_PAD_H__
+#define __LUCI_IMPORT_OP_CIRCLE_PAD_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CirclePadGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_PAD_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleRelu.h b/compiler/luci/import/include/luci/Import/Nodes/CircleRelu.h
new file mode 100644
index 000000000..deb913243
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleRelu.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_RELU_H__
+#define __LUCI_IMPORT_OP_CIRCLE_RELU_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleReluGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_RELU_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleReshape.h b/compiler/luci/import/include/luci/Import/Nodes/CircleReshape.h
new file mode 100644
index 000000000..eb4fb13ba
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleReshape.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_RESHAPE_H__
+#define __LUCI_IMPORT_OP_CIRCLE_RESHAPE_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleReshapeGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_RESHAPE_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleRsqrt.h b/compiler/luci/import/include/luci/Import/Nodes/CircleRsqrt.h
new file mode 100644
index 000000000..90d568f1f
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleRsqrt.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_RSQRT_H__
+#define __LUCI_IMPORT_OP_CIRCLE_RSQRT_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleRsqrtGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_RSQRT_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleSoftmax.h b/compiler/luci/import/include/luci/Import/Nodes/CircleSoftmax.h
new file mode 100644
index 000000000..b93846d67
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleSoftmax.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_SOFTMAX_H__
+#define __LUCI_IMPORT_OP_CIRCLE_SOFTMAX_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleSoftmaxGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_SOFTMAX_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleSub.h b/compiler/luci/import/include/luci/Import/Nodes/CircleSub.h
new file mode 100644
index 000000000..315d1c2f9
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleSub.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_SUB_H__
+#define __LUCI_IMPORT_OP_CIRCLE_SUB_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleSubGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_SUB_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleTranspose.h b/compiler/luci/import/include/luci/Import/Nodes/CircleTranspose.h
new file mode 100644
index 000000000..ac0f1fb41
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleTranspose.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_TRANSPOSE_H__
+#define __LUCI_IMPORT_OP_CIRCLE_TRANSPOSE_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleTransposeGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const override;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_TRANSPOSE_H__
diff --git a/compiler/luci/import/include/luci/Importer.h b/compiler/luci/import/include/luci/Importer.h
new file mode 100644
index 000000000..246df9f27
--- /dev/null
+++ b/compiler/luci/import/include/luci/Importer.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORTER_H__
+#define __LUCI_IMPORTER_H__
+
+#include "luci/Import/GraphBuilderRegistry.h"
+
+#include "luci/IR/Module.h"
+
+#include <loco.h>
+
+#include <mio/circle/schema_generated.h>
+
+#include <memory>
+
+namespace luci
+{
+
+class Importer final
+{
+public:
+ Importer();
+
+public:
+ explicit Importer(const GraphBuilderSource *source) : _source{source}
+ {
+ // DO NOTHING
+ }
+
+public:
+ std::unique_ptr<loco::Graph> import(const circle::Model *model) const;
+ std::unique_ptr<Module> importModule(const circle::Model *model) const;
+
+private:
+ const GraphBuilderSource *_source = nullptr;
+};
+
+} // namespace luci
+
+#endif // __MOCO_IMPORTER_H__
diff --git a/compiler/luci/import/src/CircleReader.cpp b/compiler/luci/import/src/CircleReader.cpp
new file mode 100644
index 000000000..ead0093b8
--- /dev/null
+++ b/compiler/luci/import/src/CircleReader.cpp
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/CircleReader.h"
+
+#include <memory>
+#include <sstream>
+#include <string>
+
+namespace luci
+{
+
+bool is_valid(const circle::OperatorCodeT &opcode)
+{
+ circle::BuiltinOperator code = opcode.builtin_code;
+ return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
+}
+
+bool is_custom(const circle::OperatorCodeT &opcode)
+{
+ circle::BuiltinOperator code = opcode.builtin_code;
+ return (code == circle::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const circle::OperatorCodeT &opcode)
+{
+ if (!is_valid(opcode))
+ {
+ std::ostringstream oss;
+ oss << "(invalid)";
+ return oss.str();
+ }
+
+ if (is_custom(opcode))
+ {
+ if (opcode.custom_code.empty())
+ return "(invalid custom)";
+
+ return opcode.custom_code;
+ }
+
+ circle::BuiltinOperator code = opcode.builtin_code;
+ return circle::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_name(const circle::TensorT &tensor)
+{
+ static const char *kEmptyTensorName = "(noname)";
+
+ if (!tensor.name.empty())
+ return tensor.name.c_str();
+
+ return kEmptyTensorName;
+}
+
+const circle::QuantizationParametersT *tensor_quantization(const circle::TensorT &tensor)
+{
+ return tensor.quantization.get();
+}
+
+loco::DataType luci_datatype(const circle::TensorType type)
+{
+ switch (type)
+ {
+ case circle::TensorType_FLOAT32:
+ return loco::DataType::FLOAT32;
+ case circle::TensorType_FLOAT16:
+ return loco::DataType::FLOAT16;
+ case circle::TensorType_INT32:
+ return loco::DataType::S32;
+ case circle::TensorType_UINT8:
+ return loco::DataType::U8;
+ case circle::TensorType_INT64:
+ return loco::DataType::S64;
+ case circle::TensorType_STRING:
+ break;
+ case circle::TensorType_BOOL:
+ return loco::DataType::BOOL;
+ case circle::TensorType_INT16:
+ return loco::DataType::S16;
+ case circle::TensorType_COMPLEX64:
+ break;
+ case circle::TensorType_INT8:
+ return loco::DataType::S8;
+ default:
+ break;
+ }
+ assert(false);
+ return loco::DataType::Unknown;
+}
+
+FusedActFunc luci_actfunc(const circle::ActivationFunctionType type)
+{
+ switch (type)
+ {
+ case circle::ActivationFunctionType::ActivationFunctionType_NONE:
+ return luci::FusedActFunc::NONE;
+ case circle::ActivationFunctionType::ActivationFunctionType_RELU:
+ return luci::FusedActFunc::RELU;
+ case circle::ActivationFunctionType::ActivationFunctionType_RELU_N1_TO_1:
+ return luci::FusedActFunc::RELU_N1_TO_1;
+ case circle::ActivationFunctionType::ActivationFunctionType_RELU6:
+ return luci::FusedActFunc::RELU6;
+ case circle::ActivationFunctionType::ActivationFunctionType_TANH:
+ break;
+ default:
+ break;
+ }
+ assert(false);
+ return luci::FusedActFunc::UNDEFINED;
+}
+
+Padding luci_padding(const circle::Padding padding)
+{
+ switch (padding)
+ {
+ case circle::Padding::Padding_SAME:
+ return Padding::SAME;
+ case circle::Padding::Padding_VALID:
+ return Padding::VALID;
+ }
+ assert(false);
+ return Padding::UNDEFINED;
+}
+
+std::unique_ptr<CircleQuantParam>
+luci_quantparam(const circle::QuantizationParametersT *quantization)
+{
+ const auto &min = quantization->min;
+ const auto &max = quantization->max;
+ const auto &scale = quantization->scale;
+ const auto &zero_point = quantization->zero_point;
+
+ if ((!min.empty() && !max.empty()) || (!scale.empty() && !zero_point.empty()))
+ {
+ auto quantparam = std::make_unique<CircleQuantParam>();
+
+ quantparam->min = min;
+ quantparam->max = max;
+ quantparam->scale = scale;
+ quantparam->zerop = zero_point;
+
+ return quantparam;
+ }
+
+ return nullptr;
+}
+
+circle::BuiltinOperator CircleReader::builtin_code(const circle::OperatorT &op) const
+{
+ const auto &op_codes = opcodes();
+ uint32_t index = op.opcode_index;
+ assert(index < op_codes.size());
+ const circle::OperatorCodeT &opcode = *op_codes[index];
+
+ return opcode.builtin_code;
+}
+
+std::string CircleReader::opcode_name(const circle::OperatorT &op) const
+{
+ const auto &op_codes = opcodes();
+ uint32_t index = op.opcode_index;
+ assert(index < op_codes.size());
+ const circle::OperatorCodeT &opcode = *op_codes[index];
+
+ if (!is_valid(opcode))
+ {
+ std::ostringstream oss;
+ oss << "(invalid: " << index << ")";
+ return oss.str();
+ }
+
+ return ::luci::opcode_name(opcode);
+}
+
+bool CircleReader::parse(const circle::Model *model)
+{
+ assert(model != nullptr);
+
+ _model.reset(model->UnPack());
+
+ return true;
+}
+
+bool CircleReader::select_subgraph(uint32_t sgindex)
+{
+ if (_model->subgraphs.size() <= sgindex)
+ {
+ assert(false);
+ return false;
+ }
+
+ _current_subgraph = _model->subgraphs[sgindex].get();
+
+ return true;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/GraphBuilder.cpp b/compiler/luci/import/src/GraphBuilder.cpp
new file mode 100644
index 000000000..e0ec9ded5
--- /dev/null
+++ b/compiler/luci/import/src/GraphBuilder.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+void GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ const std::vector<int32_t> &inputs = op.inputs;
+ const std::vector<int32_t> &outputs = op.outputs;
+ const auto &tensors = context->reader()->tensors();
+
+ std::vector<CircleNode *> input_nodes;
+ for (const int32_t input_tensor_index : inputs)
+ {
+ input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
+ }
+
+ CircleNode *node = build_node(op, input_nodes, context->graph());
+
+ // Set up node parameters.
+ assert(outputs.size() == 1);
+ {
+ const circle::TensorT &output_tensor = *tensors[outputs[0]];
+
+ node->name(tensor_name(output_tensor));
+
+ auto quantization = tensor_quantization(output_tensor);
+ if (quantization)
+ {
+ auto quantparam = luci_quantparam(quantization);
+ if (quantparam)
+ node->quantparam(std::move(quantparam));
+ }
+ }
+
+ // Register node's only output.
+ assert(outputs.size() == 1);
+ {
+ context->nodefinder()->enroll(outputs[0], node);
+ }
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/GraphBuilderContext.cpp b/compiler/luci/import/src/GraphBuilderContext.cpp
new file mode 100644
index 000000000..a5162ce83
--- /dev/null
+++ b/compiler/luci/import/src/GraphBuilderContext.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/GraphBuilderContext.h"
+
+#include <luci/Log.h>
+
+#include <oops/UserExn.h>
+
+namespace luci
+{
+
+void IndexNodeFinder::enroll(TensorIndex idx, CircleNode *node)
+{
+ if (_table.find(idx) != _table.end())
+ {
+ LOGGER(l);
+ INFO(l) << "[luci] NodeFinder SKIP (" << idx << ") " << node << std::endl;
+ return;
+ }
+
+ _table[idx] = node;
+}
+
+CircleNode *IndexNodeFinder::node(TensorIndex idx) const
+{
+ MapIndexNode_t::const_iterator iter = _table.find(idx);
+
+ assert(iter != _table.end() && iter->second != nullptr);
+
+ return iter->second;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/GraphBuilderRegistry.cpp b/compiler/luci/import/src/GraphBuilderRegistry.cpp
new file mode 100644
index 000000000..929b71a7d
--- /dev/null
+++ b/compiler/luci/import/src/GraphBuilderRegistry.cpp
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/GraphBuilderRegistry.h"
+
+#include "luci/Import/Nodes.h"
+
+#include <memory>
+
+namespace luci
+{
+
+GraphBuilderRegistry::GraphBuilderRegistry()
+{
+#define CIRCLE_NODE(OPCODE, CLASS) add(circle::BuiltinOperator_##OPCODE, std::make_unique<CLASS>());
+
+ CIRCLE_NODE(ABS, CircleAbsGraphBuilder); // 101
+ CIRCLE_NODE(ADD, CircleAddGraphBuilder); // 0
+ CIRCLE_NODE(ARG_MAX, CircleArgMaxGraphBuilder); // 56
+ CIRCLE_NODE(AVERAGE_POOL_2D, CircleAveragePool2DGraphBuilder); // 1
+ CIRCLE_NODE(BATCH_TO_SPACE_ND, CircleBatchToSpaceNDGraphBuilder); // 37
+ CIRCLE_NODE(CONCATENATION, CircleConcatenationGraphBuilder); // 2
+ CIRCLE_NODE(CONV_2D, CircleConv2DGraphBuilder); // 3
+ CIRCLE_NODE(COS, CircleCosGraphBuilder); // 108
+ CIRCLE_NODE(DEPTHWISE_CONV_2D, CircleDepthwiseConv2DGraphBuilder); // 4
+ CIRCLE_NODE(DIV, CircleDivGraphBuilder); // 42
+ CIRCLE_NODE(EQUAL, CircleEqualGraphBuilder); // 71
+ CIRCLE_NODE(EXP, CircleExpGraphBuilder); // 47
+ CIRCLE_NODE(FULLY_CONNECTED, CircleFullyConnectedGraphBuilder); // 9
+ CIRCLE_NODE(LOGICAL_NOT, CircleLogicalNotGraphBuilder); // 87
+ CIRCLE_NODE(LOGICAL_OR, CircleLogicalOrGraphBuilder); // 84
+ CIRCLE_NODE(MAX_POOL_2D, CircleMaxPool2DGraphBuilder); // 17
+ CIRCLE_NODE(MEAN, CircleMeanGraphBuilder); // 40
+ CIRCLE_NODE(MUL, CircleMulGraphBuilder); // 18
+ CIRCLE_NODE(PACK, CirclePackGraphBuilder); // 83
+ CIRCLE_NODE(PAD, CirclePadGraphBuilder); // 34
+ CIRCLE_NODE(RELU, CircleReluGraphBuilder); // 19
+ CIRCLE_NODE(RESHAPE, CircleReshapeGraphBuilder); // 22
+ CIRCLE_NODE(RSQRT, CircleRsqrtGraphBuilder); // 76
+ CIRCLE_NODE(SOFTMAX, CircleSoftmaxGraphBuilder); // 25
+ CIRCLE_NODE(SUB, CircleSubGraphBuilder); // 41
+ CIRCLE_NODE(TRANSPOSE, CircleTransposeGraphBuilder); // 39
+
+#undef CIRCLE_NODE
+
+ // BuiltinOperator_DEQUANTIZE = 6,
+ // BuiltinOperator_EMBEDDING_LOOKUP = 7,
+ // BuiltinOperator_FLOOR = 8,
+ // BuiltinOperator_HASHTABLE_LOOKUP = 10,
+ // BuiltinOperator_L2_NORMALIZATION = 11,
+ // BuiltinOperator_L2_POOL_2D = 12,
+ // BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION = 13,
+ // BuiltinOperator_LOGISTIC = 14,
+ // BuiltinOperator_LSH_PROJECTION = 15,
+ // BuiltinOperator_LSTM = 16,
+ // BuiltinOperator_RELU_N1_TO_1 = 20,
+ // BuiltinOperator_RELU6 = 21,
+ // BuiltinOperator_RESIZE_BILINEAR = 23,
+ // BuiltinOperator_RNN = 24,
+ // BuiltinOperator_SPACE_TO_DEPTH = 26,
+ // BuiltinOperator_SVDF = 27,
+ // BuiltinOperator_TANH = 28,
+ // BuiltinOperator_CONCAT_EMBEDDINGS = 29,
+ // BuiltinOperator_SKIP_GRAM = 30,
+ // BuiltinOperator_CALL = 31,
+ // BuiltinOperator_CUSTOM = 32,
+ // BuiltinOperator_EMBEDDING_LOOKUP_SPARSE = 33,
+ // BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+ // BuiltinOperator_GATHER = 36,
+ // BuiltinOperator_SPACE_TO_BATCH_ND = 38,
+ // BuiltinOperator_SQUEEZE = 43,
+ // BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+ // BuiltinOperator_STRIDED_SLICE = 45,
+ // BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN = 46,
+ // BuiltinOperator_TOPK_V2 = 48,
+ // BuiltinOperator_SPLIT = 49,
+ // BuiltinOperator_LOG_SOFTMAX = 50,
+ // BuiltinOperator_DELEGATE = 51,
+ // BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+ // BuiltinOperator_CAST = 53,
+ // BuiltinOperator_PRELU = 54,
+ // BuiltinOperator_MAXIMUM = 55,
+ // BuiltinOperator_ARG_MAX = 56,
+ // BuiltinOperator_MINIMUM = 57,
+ // BuiltinOperator_LESS = 58,
+ // BuiltinOperator_NEG = 59,
+ // BuiltinOperator_PADV2 = 60,
+ // BuiltinOperator_GREATER = 61,
+ // BuiltinOperator_GREATER_EQUAL = 62,
+ // BuiltinOperator_LESS_EQUAL = 63,
+ // BuiltinOperator_SELECT = 64,
+ // BuiltinOperator_SLICE = 65,
+ // BuiltinOperator_SIN = 66,
+ // BuiltinOperator_TRANSPOSE_CONV = 67,
+ // BuiltinOperator_SPARSE_TO_DENSE = 68,
+ // BuiltinOperator_TILE = 69,
+ // BuiltinOperator_EXPAND_DIMS = 70,
+ // BuiltinOperator_NOT_EQUAL = 72,
+ // BuiltinOperator_LOG = 73,
+ // BuiltinOperator_SUM = 74,
+ // BuiltinOperator_SQRT = 75,
+ // BuiltinOperator_SHAPE = 77,
+ // BuiltinOperator_POW = 78,
+ // BuiltinOperator_ARG_MIN = 79,
+ // BuiltinOperator_FAKE_QUANT = 80,
+ // BuiltinOperator_REDUCE_PROD = 81,
+ // BuiltinOperator_REDUCE_MAX = 82,
+ // BuiltinOperator_ONE_HOT = 85,
+ // BuiltinOperator_LOGICAL_AND = 86,
+ // BuiltinOperator_UNPACK = 88,
+ // BuiltinOperator_REDUCE_MIN = 89,
+ // BuiltinOperator_FLOOR_DIV = 90,
+ // BuiltinOperator_REDUCE_ANY = 91,
+ // BuiltinOperator_SQUARE = 92,
+ // BuiltinOperator_ZEROS_LIKE = 93,
+ // BuiltinOperator_FILL = 94,
+ // BuiltinOperator_FLOOR_MOD = 95,
+ // BuiltinOperator_RANGE = 96,
+ // BuiltinOperator_RESIZE_NEAREST_NEIGHBOR = 97,
+ // BuiltinOperator_LEAKY_RELU = 98,
+ // BuiltinOperator_SQUARED_DIFFERENCE = 99,
+ // BuiltinOperator_MIRROR_PAD = 100,
+ // BuiltinOperator_SPLIT_V = 102,
+ // BuiltinOperator_UNIQUE = 103,
+ // BuiltinOperator_CEIL = 104,
+ // BuiltinOperator_REVERSE_V2 = 105,
+ // BuiltinOperator_ADD_N = 106,
+ // BuiltinOperator_GATHER_ND = 107,
+ // BuiltinOperator_WHERE = 109,
+ // BuiltinOperator_RANK = 110,
+ // BuiltinOperator_ELU = 111,
+ // BuiltinOperator_REVERSE_SEQUENCE = 112,
+ // BuiltinOperator_MATRIX_DIAG = 113,
+ // BuiltinOperator_QUANTIZE = 114,
+ // BuiltinOperator_MATRIX_SET_DIAG = 115,
+ // BuiltinOperator_ROUND = 116,
+ // BuiltinOperator_HARD_SWISH = 117,
+ // BuiltinOperator_IF = 118,
+ // BuiltinOperator_WHILE = 119,
+ // BuiltinOperator_NON_MAX_SUPPRESSION_V4 = 120,
+ // BuiltinOperator_NON_MAX_SUPPRESSION_V5 = 121,
+ // BuiltinOperator_SCATTER_ND = 122,
+ // BuiltinOperator_SELECT_V2 = 123,
+ // BuiltinOperator_DENSIFY = 124,
+ // BuiltinOperator_SEGMENT_SUM = 125,
+ // BuiltinOperator_BATCH_MATMUL = 126,
+ // BuiltinOperator_INSTANCE_NORM = 254,
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Importer.cpp b/compiler/luci/import/src/Importer.cpp
new file mode 100644
index 000000000..964c47633
--- /dev/null
+++ b/compiler/luci/import/src/Importer.cpp
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Importer.h"
+
+#include "luci/Import/GraphBuilder.h"
+#include "luci/Import/GraphBuilderContext.h"
+#include "luci/Import/GraphBuilderRegistry.h"
+#include "luci/Import/CircleReader.h"
+#include "luci/Import/Nodes/CircleConst.h"
+
+#include <luci/IR/Module.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/Log.h>
+#include <luci/LogHelper.h>
+
+#include <oops/UserExn.h>
+
+#include <memory>
+
+namespace
+{
+
+void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &reader,
+ loco::Graph *graph)
+{
+ LOGGER(l);
+
+ auto nodefinder = std::make_unique<luci::IndexNodeFinder>();
+
+ luci::GraphBuilderContext gb_context(graph, &reader, nodefinder.get());
+
+ const auto &operators = reader.operators();
+ const auto &tensors = reader.tensors();
+
+ // graph inputs; there are no input nodes in TFlite but just Tensors
+ // creating virtual input nodes will make possible to connect nodes that uses them
+ // all attributes of tensor should be copied to CircleInput node
+ for (const auto input : reader.inputs())
+ {
+ auto input_node = graph->nodes()->create<luci::CircleInput>();
+ assert(input_node != nullptr);
+ const circle::TensorT &tensor = *tensors[input];
+
+ auto tname = luci::tensor_name(tensor);
+ input_node->name(tname);
+ auto quantization = luci::tensor_quantization(tensor);
+ if (quantization)
+ {
+ auto quantparam = luci::luci_quantparam(quantization);
+ if (quantparam.get())
+ input_node->quantparam(std::move(quantparam));
+ }
+
+ INFO(l) << "[luci] NodeFinder INPUT(" << input << ") = " << input_node << std::endl;
+ nodefinder->enroll(input, input_node);
+
+ // Shape of Input
+ const std::vector<int32_t> &input_dims = tensor.shape; // in NHWC
+ input_node->rank(input_dims.size());
+ for (uint32_t r = 0; r < input_dims.size(); ++r)
+ input_node->dim(r) = loco::Dimension(input_dims[r]);
+
+ // Data type of Input
+ auto dtype = luci::luci_datatype(tensor.type);
+ input_node->dtype(dtype);
+
+ // Name
+ auto graph_input = graph->inputs()->create();
+ graph_input->name(tname);
+
+ // Set GraphInputOutputIndex for graph
+ input_node->index(graph_input->index());
+
+ // Data type
+ graph_input->dtype(dtype);
+ }
+
+ // Create CircleConst nodes for constant tensors.
+ const auto &buffers = reader.buffers();
+ for (uint32_t i = 0; i < tensors.size(); ++i)
+ {
+ const circle::TensorT &tensor = *tensors[i];
+ const std::vector<uint8_t> &buffer = buffers[tensor.buffer]->data;
+ if (!buffer.empty())
+ {
+ luci::CircleConst *const_node = luci::create_circleconst(&gb_context, i);
+ nodefinder->enroll(i, const_node);
+ }
+ }
+
+ // Import the operators.
+ // Note that operators in model are stored in execution order. This means that when importing
+ // an operator, its input operators have already been imported. We exploit this fact to set up
+ // node's inputs right after creating the node.
+ for (uint32_t i = 0; i < operators.size(); ++i)
+ {
+ const circle::OperatorT &op = *operators[i];
+ circle::BuiltinOperator builtincode = reader.builtin_code(op);
+
+ if (const auto *builder = source.lookup(builtincode))
+ {
+ luci::GraphBuilder::ValidateArgs args(op, reader);
+ if (!builder->validate(args))
+ {
+ throw oops::UserExn("Invalid operator", reader.opcode_name(op));
+ }
+
+ builder->build(op, &gb_context);
+ }
+ else
+ {
+ throw oops::UserExn("Not supported", reader.opcode_name(op));
+ }
+ }
+
+ // graph outputs
+ for (auto output : reader.outputs())
+ {
+ auto output_node = graph->nodes()->create<luci::CircleOutput>();
+ assert(output_node != nullptr);
+ output_node->from(nodefinder->node(output));
+
+ INFO(l) << "[luci] NodeFinder OUTPUT(" << output << ") = " << output_node << std::endl;
+
+ // set the graph output name and node object
+ const circle::TensorT &tensor = *tensors[output];
+ auto graph_output = graph->outputs()->create();
+ std::string tname = luci::tensor_name(tensor);
+ graph_output->name("output_" + tname);
+
+ // Set GraphInputOutputIndex for graph
+ output_node->index(graph_output->index());
+
+ // Shape of Output
+ auto output_shape = std::make_unique<loco::TensorShape>();
+ const std::vector<int32_t> &output_dims = tensor.shape; // in NHWC
+ output_shape->rank(output_dims.size());
+ for (uint32_t r = 0; r < output_dims.size(); ++r)
+ output_shape->dim(r) = loco::Dimension(output_dims[r]);
+ graph_output->shape(std::move(output_shape));
+
+ // Data type
+ auto dtype = luci::luci_datatype(tensor.type);
+ graph_output->dtype(dtype);
+ }
+}
+
+class ValidateCollector final : public loco::ErrorListener
+{
+public:
+ void notify(const loco::ErrorDetail<loco::ErrorCategory::MissingArgument> &d) override
+ {
+ LOGGER(l);
+ INFO(l) << "[luci] GraphValidate error " << d.node() << "(" << d.index() << ")" << std::endl;
+ }
+};
+
+} // namespace
+
+namespace luci
+{
+
+Importer::Importer()
+{
+ // DO NOTHING
+}
+
+std::unique_ptr<loco::Graph> Importer::import(const circle::Model *model) const
+{
+ auto graph = loco::make_graph();
+
+ const GraphBuilderSource *source_ptr = &GraphBuilderRegistry::get();
+
+ if (_source != nullptr)
+ {
+ // Use user-defined GraphBuilderSource
+ source_ptr = _source;
+ }
+
+ CircleReader reader;
+ if (!reader.parse(model))
+ return nullptr;
+
+ // TODO support multiple subgraph when Circle supports
+ assert(reader.num_subgraph() == 1);
+ if (!reader.select_subgraph(0))
+ return nullptr;
+
+ // Convert circle::Model to loco::Graph
+ convert_graph(*source_ptr, reader, graph.get());
+
+ LOGGER(l);
+ INFO(l) << fmt(graph.get());
+
+ assert(loco::valid(graph.get(), std::make_unique<ValidateCollector>()));
+
+ return std::move(graph);
+}
+
+std::unique_ptr<Module> Importer::importModule(const circle::Model *model) const
+{
+ auto module = make_module();
+
+ const GraphBuilderSource *source_ptr = &GraphBuilderRegistry::get();
+
+ if (_source != nullptr)
+ {
+ // Use user-defined GraphBuilderSource
+ source_ptr = _source;
+ }
+
+ CircleReader reader;
+ if (!reader.parse(model))
+ return nullptr;
+
+ for (uint32_t g = 0; g < reader.num_subgraph(); ++g)
+ {
+ auto graph = loco::make_graph();
+
+ if (!reader.select_subgraph(g))
+ return nullptr;
+
+ graph->name(reader.name());
+
+ // Convert circle::Model to loco::Graph
+ convert_graph(*source_ptr, reader, graph.get());
+
+ LOGGER(l);
+ INFO(l) << fmt(graph.get());
+
+ assert(loco::valid(graph.get(), std::make_unique<ValidateCollector>()));
+
+ module->add(std::move(graph));
+ }
+
+ return std::move(module);
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Importer.test.cpp b/compiler/luci/import/src/Importer.test.cpp
new file mode 100644
index 000000000..4426e15fd
--- /dev/null
+++ b/compiler/luci/import/src/Importer.test.cpp
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Importer.h"
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+TEST(TensorFlowLiteImport, Dummy) { luci::Importer import; }
diff --git a/compiler/luci/import/src/Nodes/CircleAbs.cpp b/compiler/luci/import/src/Nodes/CircleAbs.cpp
new file mode 100644
index 000000000..9054986bd
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleAbs.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleAbs.h"
+
+#include <luci/IR/Nodes/CircleAbs.h>
+
+#include <loco.h>
+
+namespace luci
+{
+bool CircleAbsGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 1)
+ return false;
+
+ // TODO Support type check
+ return true;
+}
+
+CircleNode *CircleAbsGraphBuilder::build_node(const circle::OperatorT &,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleAbs>();
+ node->x(inputs[0]);
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleAdd.cpp b/compiler/luci/import/src/Nodes/CircleAdd.cpp
new file mode 100644
index 000000000..3b1bb734f
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleAdd.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleAdd.h"
+
+#include <luci/IR/Nodes/CircleAdd.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleAddGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 2)
+ return false;
+
+ return true;
+}
+
+CircleNode *CircleAddGraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleAdd>();
+ node->x(inputs[0]);
+ node->y(inputs[1]);
+
+ const auto *options = op.builtin_options.AsAddOptions();
+ node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleArgMax.cpp b/compiler/luci/import/src/Nodes/CircleArgMax.cpp
new file mode 100644
index 000000000..2679827e2
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleArgMax.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleArgMax.h"
+
+#include <luci/IR/Nodes/CircleArgMax.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleArgMaxGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 2)
+ return false;
+
+ return true;
+}
+
+CircleNode *CircleArgMaxGraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleArgMax>();
+ node->input(inputs[0]);
+ node->dimension(inputs[1]);
+
+ const auto *options = op.builtin_options.AsArgMaxOptions();
+ node->output_type(luci_datatype(options->output_type));
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleAveragePool2D.cpp b/compiler/luci/import/src/Nodes/CircleAveragePool2D.cpp
new file mode 100644
index 000000000..cfc3cf126
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleAveragePool2D.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleAveragePool2D.h"
+
+#include <luci/IR/Nodes/CircleAveragePool2D.h>
+
+namespace luci
+{
+
+bool CircleAveragePool2DGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 1)
+ return false;
+
+ return true;
+}
+
+CircleNode *CircleAveragePool2DGraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleAveragePool2D>();
+ node->value(inputs[0]);
+
+ const auto *options = op.builtin_options.AsPool2DOptions();
+ node->padding(luci_padding(options->padding));
+ node->stride()->w(options->stride_w);
+ node->stride()->h(options->stride_h);
+ node->filter()->w(options->filter_width);
+ node->filter()->h(options->filter_height);
+ node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp b/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp
new file mode 100644
index 000000000..4bbfadf64
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleBatchToSpaceND.h"
+
+#include <luci/IR/Nodes/CircleBatchToSpaceND.h>
+
+#include <loco.h>
+
+#include <cassert>
+
+namespace luci
+{
+
+bool CircleBatchToSpaceNDGraphBuilder::validate(const ValidateArgs &args) const
+{
+ const auto &inputs = args.op.inputs;
+ if (inputs.size() != 3)
+ return false;
+
+ // input 1 and 2 should have INT32/INT64 type
+ const auto &tensors = args.reader.tensors();
+ const auto &tensor_1 = tensors.at(inputs[1]);
+ switch (tensor_1->type)
+ {
+ case circle::TensorType_INT32:
+ case circle::TensorType_INT64:
+ break;
+ default:
+ return false;
+ }
+ const auto &tensor_2 = tensors.at(inputs[2]);
+ switch (tensor_2->type)
+ {
+ case circle::TensorType_INT32:
+ case circle::TensorType_INT64:
+ break;
+ default:
+ return false;
+ }
+
+ // Only support input shape dimension 3 and 4 only
+ const auto &tensor_0 = tensors.at(inputs[0]);
+ const auto t_0_s = tensor_0->shape.size();
+ if (t_0_s != 3 && t_0_s != 4)
+ return false;
+
+ // TODO check input shape
+
+ return true;
+}
+
+CircleNode *CircleBatchToSpaceNDGraphBuilder::build_node(const circle::OperatorT &,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleBatchToSpaceND>();
+ node->input(inputs[0]);
+ node->block_shape(inputs[1]);
+ node->crops(inputs[2]);
+
+ // No options for BatchToSpaceND
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleConcatenation.cpp b/compiler/luci/import/src/Nodes/CircleConcatenation.cpp
new file mode 100644
index 000000000..7fc616aa0
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleConcatenation.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleConcatenation.h"
+
+#include <luci/IR/Nodes/CircleConcatenation.h>
+
+namespace luci
+{
+
+bool CircleConcatenationGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() < 1)
+ return false;
+
+ if (args.op.outputs.size() != 1)
+ return false;
+
+ return true;
+}
+
+CircleNode *CircleConcatenationGraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleConcatenation>(inputs.size());
+ for (uint32_t i = 0; i < inputs.size(); ++i)
+ {
+ node->values(i, inputs[i]);
+ }
+
+ const auto *options = op.builtin_options.AsConcatenationOptions();
+ node->axis(options->axis);
+ node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleConst.cpp b/compiler/luci/import/src/Nodes/CircleConst.cpp
new file mode 100644
index 000000000..1d798983b
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleConst.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleConst.h"
+
+#include <luci/IR/Nodes/CircleConst.h>
+#include <luci/Log.h>
+
+#include <loco.h>
+#include <oops/UserExn.h>
+
+#include <cassert>
+
+namespace luci
+{
+
+template <loco::DataType DT>
+static void copy_data(const std::vector<uint8_t> &raw_data, uint32_t num_elements,
+ CircleConst *const_node)
+{
+ using T = typename loco::DataTypeImpl<DT>::Type;
+
+ assert(raw_data.size() == num_elements * sizeof(T));
+ const auto *data = reinterpret_cast<const T *>(raw_data.data());
+
+ const_node->size<DT>(num_elements);
+ for (uint32_t i = 0; i < num_elements; ++i)
+ {
+ const_node->at<DT>(i) = data[i];
+ }
+}
+
+//
+// circleconst_from_tensor() ?
+//
+CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_index)
+{
+ LOGGER(l);
+
+ auto graph = context->graph();
+ auto reader = context->reader();
+ const auto &tensors = reader->tensors();
+
+ // (1) create CircleConst
+ auto const_node = graph->nodes()->create<CircleConst>();
+ const circle::TensorT &const_tensor = *tensors[tensor_index];
+ const_node->name(tensor_name(const_tensor));
+ auto quantization = luci::tensor_quantization(const_tensor);
+ if (quantization)
+ {
+ auto quantparam = luci::luci_quantparam(quantization);
+ if (quantparam.get())
+ const_node->quantparam(std::move(quantparam));
+ }
+
+ INFO(l) << "[luci] NodeFinder const_node(" << tensor_index << ") -> " << const_node << std::endl;
+
+ // (2) set data_type to CircleConst
+ const_node->dtype(luci_datatype(const_tensor.type));
+
+ // (3) set shape to CicleConst
+ std::vector<int32_t> const_dims = const_tensor.shape; // in NHWC
+ const_node->rank(const_dims.size());
+ uint32_t num_elements = 1;
+ for (uint32_t r = 0; r < const_dims.size(); ++r)
+ {
+ const_node->dim(r) = loco::Dimension(const_dims[r]);
+ num_elements = num_elements * const_dims[r];
+ }
+
+ // (4) constant values from circle buffer
+ const std::vector<uint8_t> &buffer = reader->buffers()[const_tensor.buffer]->data;
+ if (buffer.empty())
+ throw oops::UserExn("Empty buffer");
+
+ switch (luci_datatype(const_tensor.type))
+ {
+ case loco::DataType::FLOAT32:
+ copy_data<loco::DataType::FLOAT32>(buffer, num_elements, const_node);
+ break;
+
+ case loco::DataType::U8:
+ copy_data<loco::DataType::U8>(buffer, num_elements, const_node);
+ break;
+
+ case loco::DataType::S32:
+ copy_data<loco::DataType::S32>(buffer, num_elements, const_node);
+ break;
+
+ default:
+ throw oops::UserExn("Unsupported tensor type", circle::EnumNameTensorType(const_tensor.type));
+ }
+
+ return const_node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleConv2D.cpp b/compiler/luci/import/src/Nodes/CircleConv2D.cpp
new file mode 100644
index 000000000..ec9dce0d2
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleConv2D.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleConv2D.h"
+
+#include <luci/IR/Nodes/CircleConv2D.h>
+
+#include <loco.h>
+
+#include <cassert>
+
+namespace luci
+{
+
+bool CircleConv2DGraphBuilder::validate(const ValidateArgs &args) const
+{
+ // Circle Conv2D may not have a bias but we won't support this
+ if (args.op.inputs.size() != 3)
+ return false;
+
+ return true;
+}
+
+CircleNode *CircleConv2DGraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleConv2D>();
+ node->input(inputs[0]);
+ node->filter(inputs[1]);
+ // For now, bias is required (checked in `verify` method).
+ assert(inputs.size() == 3);
+ node->bias(inputs[2]);
+
+ const auto *options = op.builtin_options.AsConv2DOptions();
+ node->padding(luci_padding(options->padding));
+ node->stride()->w(options->stride_w);
+ node->stride()->h(options->stride_h);
+ node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
+ // FIXME Check dilation_w_factor, dilation_h_factor.
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleCos.cpp b/compiler/luci/import/src/Nodes/CircleCos.cpp
new file mode 100644
index 000000000..5f61cc7f6
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleCos.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleCos.h"
+
+#include <luci/IR/Nodes/CircleCos.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleCosGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 1)
+ return false;
+
+ return true;
+}
+
+CircleNode *CircleCosGraphBuilder::build_node(const circle::OperatorT &,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleCos>();
+ node->x(inputs[0]);
+
+ // No options for Cos
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp b/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp
new file mode 100644
index 000000000..c6d3b1f1e
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleDepthwiseConv2D.h"
+
+#include <luci/IR/Nodes/CircleDepthwiseConv2D.h>
+
+#include <oops/UserExn.h>
+
+namespace luci
+{
+
+bool CircleDepthwiseConv2DGraphBuilder::validate(const ValidateArgs &args) const
+{
+ // Circle DepthwiseConv2D may not have a bias but we won't support this
+ if (args.op.inputs.size() != 3 && args.op.inputs.size() != 2)
+ return false;
+
+ if (args.op.outputs.size() != 1)
+ return false;
+
+ return true;
+}
+
+CircleNode *CircleDepthwiseConv2DGraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleDepthwiseConv2D>();
+ node->input(inputs[0]);
+ node->filter(inputs[1]);
+ if (inputs.size() != 3)
+ throw oops::UserExn("DepthwiseConv2d without bias is unsupported");
+ node->bias(inputs[2]);
+
+ const auto *options = op.builtin_options.AsDepthwiseConv2DOptions();
+ node->padding(luci_padding(options->padding));
+ node->stride()->w(options->stride_w);
+ node->stride()->h(options->stride_h);
+ node->depthMultiplier(options->depth_multiplier);
+ node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
+ // FIXME Check dilation_w_factor, dilation_h_factor.
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleDiv.cpp b/compiler/luci/import/src/Nodes/CircleDiv.cpp
new file mode 100644
index 000000000..d09cfb815
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleDiv.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleDiv.h"
+
+#include <luci/IR/Nodes/CircleDiv.h>
+
+namespace luci
+{
+
+bool CircleDivGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 2)
+ return false;
+
+ if (args.op.outputs.size() != 1)
+ return false;
+
+ return true;
+}
+
+CircleNode *CircleDivGraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto node = graph->nodes()->create<CircleDiv>();
+ node->x(inputs[0]);
+ node->y(inputs[1]);
+
+ const auto *options = op.builtin_options.AsDivOptions();
+ node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleEqual.cpp b/compiler/luci/import/src/Nodes/CircleEqual.cpp
new file mode 100644
index 000000000..a53f6e94b
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleEqual.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleEqual.h"
+
+#include <luci/IR/Nodes/CircleEqual.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleEqualGraphBuilder::validate(const ValidateArgs &args) const
+{
+ const auto &inputs = args.op.inputs;
+
+ if (inputs.size() != 2)
+ {
+ return false;
+ }
+
+ const auto &tensors = args.reader.tensors();
+
+ return tensors[inputs[0]]->type == tensors[inputs[1]]->type;
+}
+
+CircleNode *CircleEqualGraphBuilder::build_node(const circle::OperatorT &,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleEqual>();
+ node->x(inputs[0]);
+ node->y(inputs[1]);
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleExp.cpp b/compiler/luci/import/src/Nodes/CircleExp.cpp
new file mode 100644
index 000000000..44fc93d09
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleExp.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleExp.h"
+
+#include <luci/IR/Nodes/CircleAbs.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleExpGraphBuilder::validate(const ValidateArgs &args) const
+{
+ const auto &inputs = args.op.inputs;
+ if (inputs.size() != 1)
+ return false;
+
+ // input type check
+ const auto &tensors = args.reader.tensors();
+ const auto &tensor = tensors.at(inputs[0]);
+ switch (tensor->type)
+ {
+ case circle::TensorType_FLOAT16:
+ case circle::TensorType_FLOAT32:
+ case circle::TensorType_FLOAT64:
+ break;
+ // TODO support TensorType_COMPLEX64, complex128, bfloat16
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+CircleNode *CircleExpGraphBuilder::build_node(const circle::OperatorT &,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleAbs>();
+ node->x(inputs[0]);
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp b/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp
new file mode 100644
index 000000000..8f74fe9ce
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleFullyConnected.h"
+
+#include <luci/IR/Nodes/CircleFullyConnected.h>
+
+#include <loco.h>
+#include <oops/UserExn.h>
+
+namespace luci
+{
+
+bool CircleFullyConnectedGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 3)
+ return false;
+
+ return true;
+}
+
+CircleNode *CircleFullyConnectedGraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleFullyConnected>();
+ node->input(inputs[0]);
+ node->weights(inputs[1]);
+ node->bias(inputs[2]);
+
+ const auto *options = op.builtin_options.AsFullyConnectedOptions();
+ node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
+ if (options->weights_format != circle::FullyConnectedOptionsWeightsFormat_DEFAULT)
+ {
+ throw oops::UserExn(
+ "Unsupported weights format",
+ circle::EnumNameFullyConnectedOptionsWeightsFormat(options->weights_format));
+ }
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp b/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp
new file mode 100644
index 000000000..b1ed3ea37
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleLogicalNot.h"
+
+#include <luci/IR/Nodes/CircleLogicalNot.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleLogicalNotGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 1)
+ return false;
+
+ // Only BOOL type is allowed for the input
+ const auto &inputs = args.op.inputs;
+ const auto &tensors = args.reader.tensors();
+ const auto &tensor = tensors.at(inputs[0]);
+ if (tensor->type != circle::TensorType::TensorType_BOOL)
+ return false;
+
+ return true;
+}
+
+CircleNode *CircleLogicalNotGraphBuilder::build_node(const circle::OperatorT &,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleLogicalNot>();
+ node->x(inputs[0]);
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp b/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp
new file mode 100644
index 000000000..00eb9c5df
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleLogicalOr.h"
+
+#include <luci/IR/Nodes/CircleLogicalOr.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleLogicalOrGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 2)
+ return false;
+
+ // Only BOOL type is allowed for inputs
+ const auto &inputs = args.op.inputs;
+ const auto &tensors = args.reader.tensors();
+ for (auto input : inputs)
+ {
+ const auto &tensor = tensors.at(input);
+ if (tensor->type != circle::TensorType::TensorType_BOOL)
+ return false;
+ }
+
+ return true;
+}
+
+CircleNode *CircleLogicalOrGraphBuilder::build_node(const circle::OperatorT &,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleLogicalOr>();
+ node->x(inputs[0]);
+ node->y(inputs[1]);
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleMaxPool2D.cpp b/compiler/luci/import/src/Nodes/CircleMaxPool2D.cpp
new file mode 100644
index 000000000..1798819cf
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleMaxPool2D.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleMaxPool2D.h"
+
+#include <luci/IR/Nodes/CircleMaxPool2D.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleMaxPool2DGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 1)
+ return false;
+
+ return true;
+}
+
+CircleNode *CircleMaxPool2DGraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleMaxPool2D>();
+ node->value(inputs[0]);
+
+ const auto *options = op.builtin_options.AsPool2DOptions();
+ node->padding(luci_padding(options->padding));
+ node->stride()->w(options->stride_w);
+ node->stride()->h(options->stride_h);
+ node->filter()->w(options->filter_width);
+ node->filter()->h(options->filter_height);
+ node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleMean.cpp b/compiler/luci/import/src/Nodes/CircleMean.cpp
new file mode 100644
index 000000000..8261c7b38
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleMean.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleMean.h"
+
+#include <luci/IR/Nodes/CircleMean.h>
+
+namespace luci
+{
+
+bool CircleMeanGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 2)
+ return false;
+
+ return true;
+}
+
+CircleNode *CircleMeanGraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleMean>();
+ node->input(inputs[0]);
+ node->reduction_indices(inputs[1]);
+
+ const auto *options = op.builtin_options.AsReducerOptions();
+ node->keep_dims(options->keep_dims);
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleMul.cpp b/compiler/luci/import/src/Nodes/CircleMul.cpp
new file mode 100644
index 000000000..d4412b96b
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleMul.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleMul.h"
+
+#include <luci/IR/Nodes/CircleMul.h>
+
+namespace luci
+{
+
+bool CircleMulGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 2)
+ return false;
+
+ if (args.op.outputs.size() != 1)
+ return false;
+
+ return true;
+}
+
+CircleNode *CircleMulGraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleMul>();
+ node->x(inputs[0]);
+ node->y(inputs[1]);
+
+ const auto *options = op.builtin_options.AsMulOptions();
+ node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CirclePack.cpp b/compiler/luci/import/src/Nodes/CirclePack.cpp
new file mode 100644
index 000000000..6ba6fae11
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CirclePack.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CirclePack.h"
+
+#include <luci/IR/Nodes/CirclePack.h>
+
+#include <loco.h>
+#include <oops/UserExn.h>
+
+namespace luci
+{
+
+bool CirclePackGraphBuilder::validate(const ValidateArgs &args) const
+{
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
+ const auto *options = args.op.builtin_options.AsPackOptions();
+
+ if (options->values_count < 1)
+ return false;
+
+ if (inputs.size() != static_cast<uint32_t>(options->values_count))
+ return false;
+
+ if (outputs.size() != 1)
+ return false;
+
+ return true;
+}
+
+CircleNode *CirclePackGraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CirclePack>(inputs.size());
+ for (uint32_t i = 0; i < inputs.size(); ++i)
+ {
+ node->values(i, inputs[i]);
+ }
+
+ const auto *options = op.builtin_options.AsPackOptions();
+ node->axis(options->axis);
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CirclePad.cpp b/compiler/luci/import/src/Nodes/CirclePad.cpp
new file mode 100644
index 000000000..6abcf2d6c
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CirclePad.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CirclePad.h"
+
+#include <luci/IR/Nodes/CirclePad.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CirclePadGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 2)
+ return false;
+
+ // TODO do attribute checks
+
+ return true;
+}
+
+CircleNode *CirclePadGraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CirclePad>();
+ node->input(inputs[0]);
+ node->paddings(inputs[1]);
+
+ const auto *options = op.builtin_options.AsPadOptions();
+ (void)options; // There are no options.
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleRelu.cpp b/compiler/luci/import/src/Nodes/CircleRelu.cpp
new file mode 100644
index 000000000..056268a5b
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleRelu.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleRelu.h"
+
+#include <luci/IR/Nodes/CircleRelu.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleReluGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 1)
+ return false;
+
+ if (args.op.outputs.size() != 1)
+ return false;
+
+ return true;
+}
+
+CircleNode *CircleReluGraphBuilder::build_node(const circle::OperatorT &,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleRelu>();
+ node->features(inputs[0]);
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleReshape.cpp b/compiler/luci/import/src/Nodes/CircleReshape.cpp
new file mode 100644
index 000000000..c83f143a6
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleReshape.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleReshape.h"
+
+#include <luci/IR/Nodes/CircleConst.h>
+#include <luci/IR/Nodes/CircleReshape.h>
+
+namespace luci
+{
+
+bool CircleReshapeGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 1 && args.op.inputs.size() != 2)
+ return false;
+
+ if (args.op.outputs.size() != 1)
+ return false;
+
+ return true;
+}
+
+static void setup_shape_attribute(const std::vector<int32_t> &shape, CircleReshape *node)
+{
+ node->newShape()->rank(shape.size());
+ for (uint32_t i = 0; i < shape.size(); ++i)
+ {
+ node->newShape()->dim(i) = shape[i];
+ }
+}
+
+static CircleNode *create_shape_node(const std::vector<int32_t> &shape, loco::Graph *graph)
+{
+ auto *shape_node = graph->nodes()->create<luci::CircleConst>();
+ shape_node->dtype(loco::DataType::S32);
+ shape_node->rank(1);
+ shape_node->dim(0) = shape.size();
+ shape_node->size<loco::DataType::S32>(shape.size());
+ for (uint32_t i = 0; i < shape.size(); ++i)
+ {
+ shape_node->at<loco::DataType::S32>(i) = shape[i];
+ }
+ return shape_node;
+}
+
+CircleNode *CircleReshapeGraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ // If the second input is not provided, generate it based on the value of the attribute.
+ // TODO Presence of the second input is the current requirement of the IR.
+ auto *shape_node = (inputs.size() == 2) ? inputs[1] : nullptr;
+ if (shape_node == nullptr)
+ {
+ const auto *options = op.builtin_options.AsReshapeOptions();
+ shape_node = create_shape_node(options->new_shape, graph);
+ }
+
+ auto *node = graph->nodes()->create<CircleReshape>();
+ node->tensor(inputs[0]);
+ node->shape(shape_node);
+
+ const auto *options = op.builtin_options.AsReshapeOptions();
+ setup_shape_attribute(options->new_shape, node);
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleRsqrt.cpp b/compiler/luci/import/src/Nodes/CircleRsqrt.cpp
new file mode 100644
index 000000000..b5de0b575
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleRsqrt.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleRsqrt.h"
+
+#include <luci/IR/Nodes/CircleRsqrt.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleRsqrtGraphBuilder::validate(const ValidateArgs &args) const
+{
+ const auto &inputs = args.op.inputs;
+ if (inputs.size() != 1)
+ return false;
+
+ // Must be one of the following types
+ // bfloat16, half (float16), float32, float64, complex64, complex128
+ // Currently, circle supports float16, float32, complex64
+ const auto &tensors = args.reader.tensors();
+ const auto &tensor = tensors.at(inputs[0]);
+ switch (tensor->type)
+ {
+ case circle::TensorType_FLOAT16:
+ case circle::TensorType_FLOAT32:
+ case circle::TensorType_COMPLEX64:
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+CircleNode *CircleRsqrtGraphBuilder::build_node(const circle::OperatorT &,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleRsqrt>();
+ node->x(inputs[0]);
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleSoftmax.cpp b/compiler/luci/import/src/Nodes/CircleSoftmax.cpp
new file mode 100644
index 000000000..0d316e18c
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleSoftmax.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleSoftmax.h"
+
+#include <luci/IR/Nodes/CircleSoftmax.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleSoftmaxGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 1)
+ return false;
+
+ // TODO do attribute checks
+
+ return true;
+}
+
+CircleNode *CircleSoftmaxGraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleSoftmax>();
+ node->logits(inputs[0]);
+
+ const auto *options = op.builtin_options.AsSoftmaxOptions();
+ node->beta(options->beta);
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleSub.cpp b/compiler/luci/import/src/Nodes/CircleSub.cpp
new file mode 100644
index 000000000..968e9f51f
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleSub.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleSub.h"
+
+#include <luci/IR/Nodes/CircleSub.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleSubGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 2)
+ return false;
+
+ if (args.op.outputs.size() != 1)
+ return false;
+
+ return true;
+}
+
+CircleNode *CircleSubGraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleSub>();
+ node->x(inputs[0]);
+ node->y(inputs[1]);
+
+ const auto *options = op.builtin_options.AsSubOptions();
+ node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleTranspose.cpp b/compiler/luci/import/src/Nodes/CircleTranspose.cpp
new file mode 100644
index 000000000..8622c8b80
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleTranspose.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleTranspose.h"
+
+#include <luci/IR/Nodes/CircleTranspose.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleTransposeGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 2)
+ return false;
+
+ if (args.op.outputs.size() != 1)
+ return false;
+
+ return true;
+}
+
+CircleNode *CircleTransposeGraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleTranspose>();
+ node->a(inputs[0]);
+ node->perm(inputs[1]);
+
+ const auto *options = op.builtin_options.AsTransposeOptions();
+ (void)options;
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/lang/CMakeLists.txt b/compiler/luci/lang/CMakeLists.txt
new file mode 100644
index 000000000..564e777fb
--- /dev/null
+++ b/compiler/luci/lang/CMakeLists.txt
@@ -0,0 +1,22 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(luci_lang SHARED ${SOURCES})
+target_include_directories(luci_lang PRIVATE src)
+target_include_directories(luci_lang PUBLIC include)
+target_link_libraries(luci_lang PUBLIC loco)
+target_link_libraries(luci_lang PUBLIC oops)
+target_link_libraries(luci_lang PRIVATE nncc_common)
+
+install(TARGETS luci_lang DESTINATION lib)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_lang_test ${TESTS})
+target_include_directories(luci_lang_test PRIVATE src)
+target_link_libraries(luci_lang_test luci_lang)
diff --git a/compiler/luci/lang/README.md b/compiler/luci/lang/README.md
new file mode 100644
index 000000000..ea0e3d5da
--- /dev/null
+++ b/compiler/luci/lang/README.md
@@ -0,0 +1,3 @@
+# luci-lang
+
+`luci-lang` provides TensorFlow Lite and Circle Dialect IR
diff --git a/compiler/luci/lang/include/luci/IR/AttrFilter.h b/compiler/luci/lang/include/luci/IR/AttrFilter.h
new file mode 100644
index 000000000..7909fa523
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/AttrFilter.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_ATTRFILTER_H__
+#define __LUCI_IR_ATTRFILTER_H__
+
+#include <stdint.h>
+
+namespace luci
+{
+
+class Filter final
+{
+public:
+ Filter() : _w(1), _h(1) {}
+
+ int32_t w() const { return _w; }
+ void w(int32_t w) { _w = w; }
+
+ int32_t h() const { return _h; }
+ void h(int32_t h) { _h = h; }
+
+private:
+ int32_t _w;
+ int32_t _h;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_ATTRFILTER_H__
diff --git a/compiler/luci/lang/include/luci/IR/AttrFusedActFunc.h b/compiler/luci/lang/include/luci/IR/AttrFusedActFunc.h
new file mode 100644
index 000000000..2abae604b
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/AttrFusedActFunc.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_ATTRFUSEDACTFUNC_H__
+#define __LUCI_IR_ATTRFUSEDACTFUNC_H__
+
+namespace luci
+{
+
+// TODO Divide into TFL version and Circle version when they go different approach
+enum class FusedActFunc
+{
+ UNDEFINED, // This is not defined by TFLite or Circle. This was added to
+ // prevent programming error.
+ NONE,
+ RELU,
+ RELU_N1_TO_1,
+ RELU6
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_ATTRFUSEDACTFUNC_H__
diff --git a/compiler/luci/lang/include/luci/IR/AttrPadding.h b/compiler/luci/lang/include/luci/IR/AttrPadding.h
new file mode 100644
index 000000000..5c295e0cd
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/AttrPadding.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_ATTRPADDING_H__
+#define __LUCI_IR_ATTRPADDING_H__
+
+namespace luci
+{
+
+enum class Padding
+{
+ UNDEFINED, // This is not defined by TFLite. This was added to prevent programming error.
+
+ SAME,
+ VALID,
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_ATTRPADDING_H__
diff --git a/compiler/luci/lang/include/luci/IR/AttrStride.h b/compiler/luci/lang/include/luci/IR/AttrStride.h
new file mode 100644
index 000000000..654967d73
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/AttrStride.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_ATTRSTRIDE_H__
+#define __LUCI_IR_ATTRSTRIDE_H__
+
+#include <stdint.h>
+
+namespace luci
+{
+
+class Stride final
+{
+public:
+ Stride() : _w(1), _h(1) {}
+
+ int32_t w() const { return _w; }
+ void w(int32_t w) { _w = w; }
+
+ int32_t h() const { return _h; }
+ void h(int32_t h) { _h = h; }
+
+private:
+ int32_t _w;
+ int32_t _h;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_ATTRSTRIDE_H__
diff --git a/compiler/luci/lang/include/luci/IR/CircleDialect.h b/compiler/luci/lang/include/luci/IR/CircleDialect.h
new file mode 100644
index 000000000..1b25dc9c2
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/CircleDialect.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEDIALECT_H__
+#define __LUCI_IR_CIRCLEDIALECT_H__
+
+#include <loco/IR/Dialect.h>
+
+namespace luci
+{
+
+/**
+ * @brief A singleton for Circle Dialect
+ */
+class CircleDialect final : public loco::Dialect
+{
+private:
+ CircleDialect();
+
+public:
+ CircleDialect(const CircleDialect &) = delete;
+ CircleDialect(CircleDialect &&) = delete;
+
+public:
+ static loco::Dialect *get(void);
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEDIALECT_H__
diff --git a/compiler/luci/lang/include/luci/IR/CircleNode.h b/compiler/luci/lang/include/luci/IR/CircleNode.h
new file mode 100644
index 000000000..92816ef04
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/CircleNode.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLENODE_H__
+#define __LUCI_IR_CIRCLENODE_H__
+
+#include "CircleNodeDecl.h"
+#include "CircleNodeImpl.h"
+
+#endif // __LUCI_IR_CIRCLENODE_H__
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodeDecl.h b/compiler/luci/lang/include/luci/IR/CircleNodeDecl.h
new file mode 100644
index 000000000..b87bdf9d0
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/CircleNodeDecl.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLENODEDECL_H__
+#define __LUCI_IR_CIRCLENODEDECL_H__
+
+#include <loco/IR/Node.h>
+#include <loco/IR/Dialect.h>
+
+#include "CircleOpcode.h"
+#include "CircleNodeVisitor.forward.h"
+#include "CircleQuantParam.h"
+
+#include <memory>
+
+namespace luci
+{
+
+using NodeName = std::string;
+
+struct CircleNode : public loco::Node
+{
+ virtual ~CircleNode() = default;
+
+ const loco::Dialect *dialect(void) const final;
+ virtual CircleOpcode opcode(void) const = 0;
+
+ template <typename T> T accept(CircleNodeVisitorBase<T> *) const;
+ template <typename T> T accept(CircleNodeMutableVisitorBase<T> *);
+
+ NodeName name(void) const { return _name; }
+ void name(const NodeName &name) { _name = name; }
+
+ CircleQuantParam *quantparam(void) const { return _quantparam.get(); }
+ void quantparam(std::unique_ptr<CircleQuantParam> &&quantparam)
+ {
+ _quantparam = std::move(quantparam);
+ }
+
+private:
+ NodeName _name;
+ std::unique_ptr<CircleQuantParam> _quantparam;
+};
+
+template <CircleOpcode Code> struct CircleNodeImpl : public CircleNode
+{
+ virtual ~CircleNodeImpl() = default;
+
+ uint32_t opnum(void) const final { return static_cast<uint32_t>(Code); }
+ CircleOpcode opcode(void) const final { return Code; }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLENODEDECL_H__
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodeImpl.h b/compiler/luci/lang/include/luci/IR/CircleNodeImpl.h
new file mode 100644
index 000000000..bdcfc9c9d
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/CircleNodeImpl.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLENODEIMPL_H__
+#define __LUCI_IR_CIRCLENODEIMPL_H__
+
+#include "CircleNodes.h"
+#include "CircleNodeVisitor.h"
+
+#include <oops/InternalExn.h>
+
+#include <cassert>
+
+namespace luci
+{
+
+template <typename T> T CircleNode::accept(CircleNodeVisitorBase<T> *v) const
+{
+ switch (this->opcode())
+ {
+#define CIRCLE_NODE(OPCODE, CLASS) \
+ \
+ case CircleOpcode::OPCODE: \
+ return v->visit(dynamic_cast<const CLASS *>(this));
+
+#include "CircleNodes.lst"
+#undef CIRCLE_NODE
+
+ default:
+ break;
+ }
+
+ INTERNAL_EXN("CircleNode::accept(CircleNodeVisitorBase) not handled");
+}
+
+template <typename T> T CircleNode::accept(CircleNodeMutableVisitorBase<T> *v)
+{
+ switch (this->opcode())
+ {
+#define CIRCLE_NODE(OPCODE, CLASS) \
+ \
+ case CircleOpcode::OPCODE: \
+ return v->visit(dynamic_cast<CLASS *>(this));
+
+#include "CircleNodes.lst"
+#undef CIRCLE_NODE
+
+ default:
+ break;
+ }
+
+ INTERNAL_EXN("CircleNode::accept(CircleNodeMutableVisitorBase) not handled");
+}
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLENODEIMPL_H__
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodeVisitor.forward.h b/compiler/luci/lang/include/luci/IR/CircleNodeVisitor.forward.h
new file mode 100644
index 000000000..70901ca87
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/CircleNodeVisitor.forward.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLENODE_VISITOR_FORWARD_H__
+#define __LUCI_IR_CIRCLENODE_VISITOR_FORWARD_H__
+
+namespace luci
+{
+
+// NOTE These forward declarations SHOULD BE aligned with Node delcarations in
+// "CircleNodeVisitor.h"
+template <typename T> struct CircleNodeVisitorBase;
+template <typename T> struct CircleNodeMutableVisitorBase;
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLENODE_VISITOR_FORWARD_H__
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodeVisitor.h b/compiler/luci/lang/include/luci/IR/CircleNodeVisitor.h
new file mode 100644
index 000000000..43339fe84
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/CircleNodeVisitor.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLENODE_VISITOR_H__
+#define __LUCI_IR_CIRCLENODE_VISITOR_H__
+
+#include "CircleNode.h"
+#include "CircleNodes.h"
+
+#include <oops/InternalExn.h>
+
+namespace luci
+{
+
+/**
+ * DO NOT use this class. Use CircleNodeVisitor instead.
+ */
+template <typename T> struct CircleNodeVisitorBase
+{
+ virtual ~CircleNodeVisitorBase() = default;
+
+#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) virtual T visit(const CIRCLE_CLASS *) = 0;
+
+#include "CircleNodes.lst"
+#undef CIRCLE_NODE
+};
+
+template <typename T> struct CircleNodeVisitor : public CircleNodeVisitorBase<T>
+{
+ virtual ~CircleNodeVisitor() = default;
+
+#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) \
+ virtual T visit(const CIRCLE_CLASS *node) { return visit(static_cast<const CircleNode *>(node)); }
+
+#include "CircleNodes.lst"
+
+#undef CIRCLE_NODE
+
+ /// @brief Default fallback
+ virtual T visit(const CircleNode *) { INTERNAL_EXN("CircleNodeVisitor: NYI node"); }
+};
+
+/**
+ * DO NOT use this class. Use CircleNodeMutableVisitor instead.
+ */
+template <typename T> struct CircleNodeMutableVisitorBase
+{
+ virtual ~CircleNodeMutableVisitorBase() = default;
+
+#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) virtual T visit(CIRCLE_CLASS *) = 0;
+
+#include "CircleNodes.lst"
+
+#undef CIRCLE_NODE
+};
+
+template <typename T> struct CircleNodeMutableVisitor : public CircleNodeMutableVisitorBase<T>
+{
+ virtual ~CircleNodeMutableVisitor() = default;
+
+#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) \
+ virtual T visit(CIRCLE_CLASS *node) { return visit(static_cast<CircleNode *>(node)); }
+
+#include "CircleNodes.lst"
+
+#undef CIRCLE_NODE
+
+ /// @brief Default fallback
+ virtual T visit(CircleNode *) { INTERNAL_EXN("CircleNodeMutableVisitor: NYI node"); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CircleNode_VISITOR_H__
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.h b/compiler/luci/lang/include/luci/IR/CircleNodes.h
new file mode 100644
index 000000000..cc822842b
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLENODES_H__
+#define __LUCI_IR_CIRCLENODES_H__
+
+#include "Nodes/CircleAbs.h"
+#include "Nodes/CircleAdd.h"
+#include "Nodes/CircleArgMax.h"
+#include "Nodes/CircleAveragePool2D.h"
+#include "Nodes/CircleBatchToSpaceND.h"
+#include "Nodes/CircleConcatenation.h"
+#include "Nodes/CircleConst.h"
+#include "Nodes/CircleConv2D.h"
+#include "Nodes/CircleCos.h"
+#include "Nodes/CircleDepthwiseConv2D.h"
+#include "Nodes/CircleDiv.h"
+#include "Nodes/CircleEqual.h"
+#include "Nodes/CircleExp.h"
+#include "Nodes/CircleFullyConnected.h"
+#include "Nodes/CircleGather.h"
+#include "Nodes/CircleLogicalNot.h"
+#include "Nodes/CircleLogicalOr.h"
+#include "Nodes/CircleMaximum.h"
+#include "Nodes/CircleMaxPool2D.h"
+#include "Nodes/CircleMean.h"
+#include "Nodes/CircleMul.h"
+#include "Nodes/CirclePack.h"
+#include "Nodes/CirclePad.h"
+#include "Nodes/CircleRelu6.h"
+#include "Nodes/CircleRelu.h"
+#include "Nodes/CircleReshape.h"
+#include "Nodes/CircleRsqrt.h"
+#include "Nodes/CircleSoftmax.h"
+#include "Nodes/CircleSqrt.h"
+#include "Nodes/CircleSquaredDifference.h"
+#include "Nodes/CircleSub.h"
+#include "Nodes/CircleTransposeConv.h"
+#include "Nodes/CircleTranspose.h"
+// Circle only
+#include "Nodes/CircleInstanceNorm.h"
+// Virtual nodes
+#include "Nodes/CircleInput.h"
+#include "Nodes/CircleOutput.h"
+
+namespace luci
+{
+
+/**
+ * @brief Set both CircleReshape's 2nd input as CircleConst, and newShape attribute
+ * with same value
+ * @note Shape inference for TFLReshape forces them to be same
+ *
+ * TODO find better place for this helper
+ */
+void set_new_shape(CircleReshape *node, int32_t *base, uint32_t size);
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLENODES_H__
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.lst b/compiler/luci/lang/include/luci/IR/CircleNodes.lst
new file mode 100644
index 000000000..ca3f7fb0f
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.lst
@@ -0,0 +1,52 @@
+#ifndef CIRCLE_NODE
+#error "Define CIRCLE_NODE"
+#endif // CIRCLE_NODE
+
+//
+// PLEASE SORT NODE DECLS IN ALPHABETICAL ORDER
+//
+// Naming rule: Follow names in TensorFlow C++ source; same as TFDialect
+// ex) for AvgPool, tensorflow/core/ops/nn_ops.cc
+// REGISTER_OP("AvgPool") <-- OPCODE: AvgPool. Prefix `Circle` for CLASS name
+// .Input("value: T") <-- Input name is 'value'
+//
+
+CIRCLE_NODE(ABS, luci::CircleAbs)
+CIRCLE_NODE(ADD, luci::CircleAdd)
+CIRCLE_NODE(ARG_MAX, luci::CircleArgMax)
+CIRCLE_NODE(AVERAGE_POOL_2D, luci::CircleAveragePool2D)
+CIRCLE_NODE(BATCH_TO_SPACE_ND, luci::CircleBatchToSpaceND)
+CIRCLE_NODE(CONCATENATION, luci::CircleConcatenation)
+CIRCLE_NODE(CONST, luci::CircleConst)
+CIRCLE_NODE(CONV_2D, luci::CircleConv2D)
+CIRCLE_NODE(COS, luci::CircleCos)
+CIRCLE_NODE(DEPTHWISE_CONV_2D, luci::CircleDepthwiseConv2D)
+CIRCLE_NODE(DIV, luci::CircleDiv)
+CIRCLE_NODE(EQUAL, luci::CircleEqual)
+CIRCLE_NODE(EXP, luci::CircleExp)
+CIRCLE_NODE(FULLY_CONNECTED, luci::CircleFullyConnected)
+CIRCLE_NODE(GATHER, luci::CircleGather)
+CIRCLE_NODE(LOGICAL_NOT, luci::CircleLogicalNot)
+CIRCLE_NODE(LOGICAL_OR, luci::CircleLogicalOr)
+CIRCLE_NODE(MAXIMUM, luci::CircleMaximum)
+CIRCLE_NODE(MAX_POOL_2D, luci::CircleMaxPool2D)
+CIRCLE_NODE(MEAN, luci::CircleMean)
+CIRCLE_NODE(MUL, luci::CircleMul)
+CIRCLE_NODE(PACK, luci::CirclePack)
+CIRCLE_NODE(PAD, luci::CirclePad)
+CIRCLE_NODE(RELU, luci::CircleRelu)
+CIRCLE_NODE(RELU6, luci::CircleRelu6)
+CIRCLE_NODE(RESHAPE, luci::CircleReshape)
+CIRCLE_NODE(RSQRT, luci::CircleRsqrt)
+CIRCLE_NODE(SOFTMAX, luci::CircleSoftmax)
+CIRCLE_NODE(SQRT, luci::CircleSqrt)
+CIRCLE_NODE(SQUARED_DIFFERENCE, luci::CircleSquaredDifference)
+CIRCLE_NODE(SUB, luci::CircleSub)
+// TODO TFLTanh
+CIRCLE_NODE(TRANSPOSE, luci::CircleTranspose)
+CIRCLE_NODE(TRANSPOSE_CONV, luci::CircleTransposeConv)
+// Circle Only
+CIRCLE_NODE(INSTANCE_NORM, luci::CircleInstanceNorm)
+// Virtual node(s)
+CIRCLE_NODE(CIRCLEINPUT, luci::CircleInput)
+CIRCLE_NODE(CIRCLEOUTPUT, luci::CircleOutput)
diff --git a/compiler/luci/lang/include/luci/IR/CircleOpcode.h b/compiler/luci/lang/include/luci/IR/CircleOpcode.h
new file mode 100644
index 000000000..703b70da2
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/CircleOpcode.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEOPCODE_H__
+#define __LUCI_IR_CIRCLEOPCODE_H__
+
+namespace luci
+{
+
+enum class CircleOpcode
+{
+#define CIRCLE_NODE(OPCODE, CLASS) OPCODE,
+#include "CircleNodes.lst"
+#undef CIRCLE_NODE
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEOPCODE_H__
diff --git a/compiler/luci/lang/include/luci/IR/CircleQuantParam.h b/compiler/luci/lang/include/luci/IR/CircleQuantParam.h
new file mode 100644
index 000000000..7253e657b
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/CircleQuantParam.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEQUANTPARAM_H__
+#define __LUCI_IR_CIRCLEQUANTPARAM_H__
+
+#include <cstdint>
+#include <vector>
+
+namespace luci
+{
+
+struct CircleQuantParam
+{
+ std::vector<float> min;
+ std::vector<float> max;
+ std::vector<float> scale;
+ std::vector<int64_t> zerop;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEQUANTPARAM_H__
diff --git a/compiler/luci/lang/include/luci/IR/LuciNodeMixins.h b/compiler/luci/lang/include/luci/IR/LuciNodeMixins.h
new file mode 100644
index 000000000..b18ac5dc4
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/LuciNodeMixins.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_LUCINODEMIXINS_H__
+#define __LUCI_IR_LUCINODEMIXINS_H__
+
+#include "luci/IR/AttrFusedActFunc.h"
+
+#include <loco/IR/Node.h>
+#include <loco/IR/NodeMixins.h>
+
+namespace luci
+{
+
+/// @brief enumeration of mixin class
+enum class LuciNodeTrait
+{
+ FusedActFunc,
+ Bias
+};
+
+template <LuciNodeTrait T> class LuciNodeMixin;
+
+template <> class LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+{
+public:
+ LuciNodeMixin() = default;
+
+public:
+ FusedActFunc fusedActivationFunction() const { return _fused_act_fun; }
+ void fusedActivationFunction(FusedActFunc fused_act_fun) { _fused_act_fun = fused_act_fun; }
+
+private:
+ FusedActFunc _fused_act_fun = FusedActFunc::UNDEFINED;
+};
+
+/**
+ * @brief Mixin class for nodes that has a bias input
+ */
+template <> class LuciNodeMixin<LuciNodeTrait::Bias>
+{
+public:
+ LuciNodeMixin() = default;
+
+public:
+ virtual loco::Node *bias(void) const = 0; /// @brief get the input for bias.
+ virtual void bias(loco::Node *node) = 0; /// @brief set the input for bias.
+};
+
+/**
+ * @brief Nodes with the fixed number of inputs
+ *
+ * TODO Deprecated this class, and use loco::FixedArity instead
+ */
+template <unsigned N, typename Base> class FixedArityNode : public Base
+{
+public:
+ FixedArityNode()
+ {
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ _args[n] = std::make_unique<loco::Use>(this);
+ }
+ }
+
+ virtual ~FixedArityNode() = default;
+
+public:
+ unsigned arity(void) const final { return N; }
+
+ loco::Node *arg(uint32_t n) const final { return _args.at(n)->node(); }
+
+ void drop(void) final
+ {
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ _args.at(n)->node(nullptr);
+ }
+ }
+
+protected:
+ // This API allows inherited classes to access "_args" field.
+ loco::Use *at(unsigned n) const { return _args.at(n).get(); }
+
+private:
+ std::array<std::unique_ptr<loco::Use>, N> _args;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_LUCINODEMIXINS_H__
diff --git a/compiler/luci/lang/include/luci/IR/Module.h b/compiler/luci/lang/include/luci/IR/Module.h
new file mode 100644
index 000000000..30eac59ce
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Module.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_MODULE_H__
+#define __LUCI_MODULE_H__
+
+#include <loco/IR/Graph.h>
+
+#include <memory>
+#include <vector>
+
+namespace luci
+{
+
+/**
+ * @brief Collection of 'loco::Graph's
+ */
+class Module final
+{
+public:
+ Module() = default;
+
+ // Copy/Move is not allowed for Module
+ Module(const Module &) = delete;
+ Module(Module &&) = delete;
+
+ ~Module() = default;
+
+public:
+ size_t size(void) const { return _graphs.size(); }
+
+public:
+ void add(std::unique_ptr<loco::Graph> &&g);
+
+ /**
+ * @brief provide main graph
+ */
+ loco::Graph *graph(void) const;
+
+ /**
+ * @brief provide graph with an index
+ *
+ * @note graph(0) is interpreted as a main graph
+ */
+ loco::Graph *graph(size_t idx) const;
+
+ // TODO provide graph accessor with a name
+
+private:
+ std::vector<std::unique_ptr<loco::Graph>> _graphs;
+};
+
+std::unique_ptr<Module> make_module(void);
+
+} // namespace luci
+
+#endif // __LUCI_MODULE_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleAbs.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleAbs.h
new file mode 100644
index 000000000..45dba15bf
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleAbs.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCELABS_H__
+#define __LUCI_IR_CIRCELABS_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief ABS in Circle
+ */
+class CircleAbs final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::ABS>>
+{
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCELABS_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleAdd.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleAdd.h
new file mode 100644
index 000000000..f26eccd1a
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleAdd.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCELADD_H__
+#define __LUCI_IR_CIRCELADD_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief ADD in Circle
+ */
+class CircleAdd final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::ADD>>,
+ public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+{
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *y(void) const { return at(1)->node(); }
+ void y(loco::Node *node) { at(1)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCELADD_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleArgMax.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleArgMax.h
new file mode 100644
index 000000000..dbc4b2b3a
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleArgMax.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCELARGMAX_H__
+#define __LUCI_IR_CIRCELARGMAX_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief ARG_MAX in Circle
+ */
+class CircleArgMax final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::ARG_MAX>>
+{
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *dimension(void) const { return at(1)->node(); }
+ void dimension(loco::Node *node) { at(1)->node(node); }
+
+public:
+ loco::DataType output_type(void) const { return _output_type; }
+ void output_type(loco::DataType ot) { _output_type = ot; }
+
+private:
+ loco::DataType _output_type{loco::DataType::S64};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCELARGMAX_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleAveragePool2D.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleAveragePool2D.h
new file mode 100644
index 000000000..0b43b40c8
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleAveragePool2D.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEAVERAGEPOOL2D_H__
+#define __LUCI_IR_CIRCLEAVERAGEPOOL2D_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFilter.h"
+#include "luci/IR/AttrPadding.h"
+#include "luci/IR/AttrStride.h"
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief AVERAGE_POOL_2D in Circle
+ */
+class CircleAveragePool2D final
+ : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::AVERAGE_POOL_2D>>,
+ public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+{
+public:
+ CircleAveragePool2D() : _padding(Padding::UNDEFINED) { /* empty */}
+
+public:
+ loco::Node *value(void) const { return at(0)->node(); }
+ void value(loco::Node *node) { at(0)->node(node); }
+
+ Padding padding() const { return _padding; }
+ void padding(Padding padding) { _padding = padding; }
+
+ const Filter *filter(void) const { return &_filter; }
+ Filter *filter(void) { return &_filter; }
+
+ const Stride *stride(void) const { return &_stride; }
+ Stride *stride(void) { return &_stride; }
+
+private:
+ Padding _padding;
+ Stride _stride;
+ Filter _filter;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEAVERAGEPOOL2D_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleBatchToSpaceND.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleBatchToSpaceND.h
new file mode 100644
index 000000000..67c0a2102
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleBatchToSpaceND.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEBATCHTOSPACEND_H__
+#define __LUCI_IR_CIRCLEBATCHTOSPACEND_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief BATCH_TO_SPACE_ND in Circle
+ */
+class CircleBatchToSpaceND final
+ : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::BATCH_TO_SPACE_ND>>
+{
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *block_shape(void) const { return at(1)->node(); }
+ void block_shape(loco::Node *node) { at(1)->node(node); }
+
+ loco::Node *crops(void) const { return at(2)->node(); }
+ void crops(loco::Node *node) { at(2)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEBATCHTOSPACEND_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleConcatenation.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleConcatenation.h
new file mode 100644
index 000000000..8a6778a2f
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleConcatenation.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLECONCATENATION_H__
+#define __LUCI_IR_CIRCLECONCATENATION_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/VariadicArityNode.h"
+
+#include <cassert>
+
+namespace luci
+{
+
+/**
+ * @brief CONCATENATION in Circle
+ */
+class CircleConcatenation final
+ : public VariadicArityNode<CircleNodeImpl<CircleOpcode::CONCATENATION>>,
+ public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+{
+public:
+ CircleConcatenation(uint32_t arity)
+ : VariadicArityNode<CircleNodeImpl<CircleOpcode::CONCATENATION>>(arity)
+ {
+ // TODO Support when arity is 0
+ assert(arity >= 1);
+ }
+
+public:
+ uint32_t numValues(void) const { return arity(); }
+
+public:
+ Node *values(uint32_t index) const
+ {
+ assert(index < numValues());
+ return at(index)->node();
+ }
+ void values(uint32_t index, Node *node)
+ {
+ assert(index < numValues());
+ at(index)->node(node);
+ }
+
+public:
+ int32_t axis(void) const { return _axis; }
+ void axis(int32_t axis) { _axis = axis; }
+
+private:
+ int32_t _axis;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLECONCATENATION_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h
new file mode 100644
index 000000000..089836eb9
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLECONST_H__
+#define __LUCI_IR_CIRCLECONST_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+#include <loco/IR/DataTypeTraits.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to build tensor data
+ * @note This will not be exported as a specific op
+ */
+class CircleConst final : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CONST>>,
+ public loco::NodeMixin<loco::NodeTrait::DataType>,
+ public loco::NodeMixin<loco::NodeTrait::TensorShape>
+{
+public:
+ CircleConst() = default;
+
+public:
+ template <loco::DataType DT> uint32_t size(void) const;
+ template <loco::DataType DT> void size(uint32_t size);
+ template <loco::DataType DT> const typename loco::DataTypeImpl<DT>::Type &at(uint32_t n) const;
+ template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &at(uint32_t n);
+
+ template <loco::DataType DT> const typename loco::DataTypeImpl<DT>::Type &scalar(void) const;
+ template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &scalar(void);
+
+private:
+ std::vector<uint8_t> _data;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLECONST_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleConv2D.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleConv2D.h
new file mode 100644
index 000000000..54318e65c
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleConv2D.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLECONV2D_H__
+#define __LUCI_IR_CIRCLECONV2D_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrPadding.h"
+#include "luci/IR/AttrStride.h"
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief CONV_2D in Circle
+ */
+class CircleConv2D final : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::CONV_2D>>,
+ public LuciNodeMixin<LuciNodeTrait::FusedActFunc>,
+ public LuciNodeMixin<LuciNodeTrait::Bias>
+{
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *filter(void) const { return at(1)->node(); }
+ void filter(loco::Node *node) { at(1)->node(node); }
+
+ loco::Node *bias(void) const override { return at(2)->node(); }
+ void bias(loco::Node *node) override { at(2)->node(node); }
+
+public:
+ Padding padding() const { return _padding; }
+ void padding(Padding padding) { _padding = padding; }
+
+ const Stride *stride(void) const { return &_stride; }
+ Stride *stride(void) { return &_stride; }
+
+private:
+ Padding _padding = Padding::UNDEFINED;
+ Stride _stride;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLECONV2D_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleCos.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleCos.h
new file mode 100644
index 000000000..07ced620a
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleCos.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_COS_H__
+#define __LUCI_IR_CIRCLE_COS_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief COS in Circle
+ */
+class CircleCos final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::COS>>
+{
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_COS_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleDepthwiseConv2D.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleDepthwiseConv2D.h
new file mode 100644
index 000000000..15ee62ba7
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleDepthwiseConv2D.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEDEPTHWISECONV2D_H__
+#define __LUCI_IR_CIRCLEDEPTHWISECONV2D_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFilter.h"
+#include "luci/IR/AttrPadding.h"
+#include "luci/IR/AttrStride.h"
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief DEPTHWISE_CONV_2D in Circle
+ */
+class CircleDepthwiseConv2D final
+ : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::DEPTHWISE_CONV_2D>>,
+ public LuciNodeMixin<LuciNodeTrait::FusedActFunc>,
+ public LuciNodeMixin<LuciNodeTrait::Bias>
+{
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *filter(void) const { return at(1)->node(); }
+ void filter(loco::Node *node) { at(1)->node(node); }
+
+ loco::Node *bias(void) const override { return at(2)->node(); }
+ void bias(loco::Node *node) override { at(2)->node(node); }
+
+public:
+ Padding padding() const { return _padding; }
+ void padding(Padding padding) { _padding = padding; }
+
+ const Stride *stride(void) const { return &_stride; }
+ Stride *stride(void) { return &_stride; }
+
+ int32_t depthMultiplier(void) const { return _depth_multiplier; }
+ void depthMultiplier(int32_t arg) { _depth_multiplier = arg; }
+
+private:
+ Padding _padding = Padding::UNDEFINED;
+ Stride _stride;
+ int32_t _depth_multiplier = 0;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEDEPTHWISECONV2D_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleDiv.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleDiv.h
new file mode 100644
index 000000000..1d4d3a239
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleDiv.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEDIV_H__
+#define __LUCI_IR_CIRCLEDIV_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFilter.h"
+#include "luci/IR/AttrPadding.h"
+#include "luci/IR/AttrStride.h"
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief DIV in Circle
+ */
+class CircleDiv final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::DIV>>,
+ public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+{
+public:
+ CircleDiv() = default;
+
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *y(void) const { return at(1)->node(); }
+ void y(loco::Node *node) { at(1)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEDIV_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleEqual.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleEqual.h
new file mode 100644
index 000000000..2087d097a
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleEqual.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_EQUAL_H__
+#define __LUCI_IR_CIRCLE_EQUAL_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief EQUAL in Circle
+ */
+class CircleEqual final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::EQUAL>>
+{
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *y(void) const { return at(1)->node(); }
+ void y(loco::Node *node) { at(1)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_EQUAL_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleExp.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleExp.h
new file mode 100644
index 000000000..97aecb30a
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleExp.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_EXP_H__
+#define __LUCI_IR_CIRCLE_EXP_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief EXP in Circle
+ */
+class CircleExp final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::EXP>>
+{
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_EXP_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h
new file mode 100644
index 000000000..d78f39494
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEFULLYCONNECTED_H__
+#define __LUCI_IR_CIRCLEFULLYCONNECTED_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief FULLY_CONNECTED in Circle
+ */
+class CircleFullyConnected final
+ : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::FULLY_CONNECTED>>,
+ public LuciNodeMixin<LuciNodeTrait::FusedActFunc>,
+ public LuciNodeMixin<LuciNodeTrait::Bias>
+{
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *weights(void) const { return at(1)->node(); }
+ void weights(loco::Node *node) { at(1)->node(node); }
+
+ loco::Node *bias(void) const override { return at(2)->node(); }
+ void bias(loco::Node *node) override { at(2)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEFULLYCONNECTED_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleGather.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleGather.h
new file mode 100644
index 000000000..489596c04
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleGather.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEGATHER_H__
+#define __LUCI_IR_CIRCLEGATHER_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief GATHER in Circle
+ */
+class CircleGather final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::GATHER>>
+{
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *positions(void) const { return at(1)->node(); }
+ void positions(loco::Node *node) { at(1)->node(node); }
+
+public:
+ int32_t axis(void) const { return _axis; }
+ void axis(int32_t axis) { _axis = axis; }
+
+private:
+ int32_t _axis = 0;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEGATHER_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleInput.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleInput.h
new file mode 100644
index 000000000..2c4d60253
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleInput.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEINPUT_H__
+#define __LUCI_IR_CIRCLEINPUT_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+#include <loco/IR/DataTypeTraits.h>
+#include <loco/IR/GraphInputIndex.h>
+
+namespace luci
+{
+
+/**
+ * @brief CircleNode used for Input of the Graph
+ * @note This will not be exported as a specific op
+ */
+class CircleInput final : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEINPUT>>,
+ public loco::NodeMixin<loco::NodeTrait::DataType>,
+ public loco::NodeMixin<loco::NodeTrait::TensorShape>
+{
+public:
+ CircleInput() = default;
+
+public:
+ void index(const loco::GraphInputIndex &index);
+ loco::GraphInputIndex index(void) const;
+
+ bool indexed(void) const { return _index != -1; }
+
+private:
+ int64_t _index = -1; // Uninitialized
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEINPUT_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleInstanceNorm.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleInstanceNorm.h
new file mode 100644
index 000000000..db0faa05e
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleInstanceNorm.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEINSTANCENORM_H__
+#define __LUCI_IR_CIRCLEINSTANCENORM_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief INSTANCE_NORM in Circle
+ */
+class CircleInstanceNorm final
+ : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::INSTANCE_NORM>>,
+ public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+{
+public:
+ /// @note Currently only support FLOAT32 as input node
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *gamma(void) const { return at(1)->node(); }
+ void gamma(loco::Node *node) { at(1)->node(node); }
+
+ loco::Node *beta(void) const { return at(2)->node(); }
+ void beta(loco::Node *node) { at(2)->node(node); }
+
+ float epsilon() const { return _epsilon; }
+ void epsilon(float epsilon) { _epsilon = epsilon; }
+
+private:
+ float _epsilon = 1e-05;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEINSTANCENORM_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalNot.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalNot.h
new file mode 100644
index 000000000..749dbe518
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalNot.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_LOGICALNOT_H__
+#define __LUCI_IR_CIRCLE_LOGICALNOT_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief LOGICAL_NOT in Circle
+ */
+class CircleLogicalNot final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::LOGICAL_NOT>>
+{
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_LOGICALNOT_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalOr.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalOr.h
new file mode 100644
index 000000000..570be57af
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalOr.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_LOGICALOR_H__
+#define __LUCI_IR_CIRCLE_LOGICALOR_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief LOGICAL_OR in Circle
+ */
+class CircleLogicalOr final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::LOGICAL_OR>>
+{
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *y(void) const { return at(1)->node(); }
+ void y(loco::Node *node) { at(1)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_LOGICALOR_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMaxPool2D.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMaxPool2D.h
new file mode 100644
index 000000000..1eb6532ff
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMaxPool2D.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEMAXPOOL2D_H__
+#define __LUCI_IR_CIRCLEMAXPOOL2D_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFilter.h"
+#include "luci/IR/AttrPadding.h"
+#include "luci/IR/AttrStride.h"
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief MAX_POOL_2D in Circle
+ */
+class CircleMaxPool2D final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::MAX_POOL_2D>>,
+ public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+{
+public:
+ CircleMaxPool2D() : _padding(Padding::UNDEFINED) { /* empty */}
+
+public:
+ loco::Node *value(void) const { return at(0)->node(); }
+ void value(loco::Node *node) { at(0)->node(node); }
+
+ Padding padding() const { return _padding; }
+ void padding(Padding padding) { _padding = padding; }
+
+ const Filter *filter(void) const { return &_filter; }
+ Filter *filter(void) { return &_filter; }
+
+ const Stride *stride(void) const { return &_stride; }
+ Stride *stride(void) { return &_stride; }
+
+private:
+ Padding _padding;
+ Stride _stride;
+ Filter _filter;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEMAXPOOL2D_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMaximum.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMaximum.h
new file mode 100644
index 000000000..cf7305e3a
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMaximum.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEMAXIMUM_H__
+#define __LUCI_IR_CIRCLEMAXIMUM_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief MAXIMUM in Circle
+ */
+class CircleMaximum final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::MAXIMUM>>
+{
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *y(void) const { return at(1)->node(); }
+ void y(loco::Node *node) { at(1)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEMAXIMUM_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMean.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMean.h
new file mode 100644
index 000000000..6fd791450
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMean.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEMEAN_H__
+#define __LUCI_IR_CIRCLEMEAN_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief MEAN in Circle
+ */
+class CircleMean final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::MEAN>>
+{
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *reduction_indices(void) const { return at(1)->node(); }
+ void reduction_indices(loco::Node *node) { at(1)->node(node); }
+
+public:
+ bool keep_dims(void) const { return _keep_dims; }
+ void keep_dims(bool keep_dims) { _keep_dims = keep_dims; }
+
+private:
+ bool _keep_dims = false;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEMEAN_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMul.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMul.h
new file mode 100644
index 000000000..67e897170
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMul.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEMUL_H__
+#define __LUCI_IR_CIRCLEMUL_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief MUL in Circle
+ */
+class CircleMul final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::MUL>>,
+ public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+{
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *y(void) const { return at(1)->node(); }
+ void y(loco::Node *node) { at(1)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEMUL_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleOutput.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleOutput.h
new file mode 100644
index 000000000..c65317ad1
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleOutput.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEOUTPUT_H__
+#define __LUCI_IR_CIRCLEOUTPUT_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+#include <loco/IR/GraphOutputIndex.h>
+
+namespace luci
+{
+
+/**
+ * @brief CircleNode for Output of the Graph
+ * @note This will not be exported as a specific op
+ */
+class CircleOutput final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEOUTPUT>>
+{
+public:
+ CircleOutput() = default;
+
+ void index(const loco::GraphOutputIndex &index);
+ loco::GraphOutputIndex index(void) const;
+
+ bool indexed(void) const { return _index != -1; }
+
+public:
+ loco::Node *from(void) const { return at(0)->node(); }
+ void from(loco::Node *node) { at(0)->node(node); }
+
+private:
+ int64_t _index = -1; // Uninitialized
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEOUTPUT_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CirclePack.h b/compiler/luci/lang/include/luci/IR/Nodes/CirclePack.h
new file mode 100644
index 000000000..8330b585a
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CirclePack.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEPACK_H__
+#define __LUCI_IR_CIRCLEPACK_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/VariadicArityNode.h"
+
+#include <cassert>
+
+namespace luci
+{
+
+/**
+ * @brief PACK in Circle
+ */
+class CirclePack final : public VariadicArityNode<CircleNodeImpl<CircleOpcode::PACK>>
+{
+public:
+ CirclePack(uint32_t arity) : VariadicArityNode<CircleNodeImpl<CircleOpcode::PACK>>(arity)
+ {
+ // TODO Support when arity is 0
+ assert(arity >= 1);
+ }
+
+public:
+ uint32_t values_count(void) const { return arity(); }
+
+public:
+ Node *values(uint32_t index) const
+ {
+ assert(index < values_count());
+ return at(index)->node();
+ }
+ void values(uint32_t index, Node *node)
+ {
+ assert(index < values_count());
+ at(index)->node(node);
+ }
+
+public:
+ int32_t axis(void) const { return _axis; }
+ void axis(int32_t axis) { _axis = axis; }
+
+private:
+ int32_t _axis{0};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEPACK_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CirclePad.h b/compiler/luci/lang/include/luci/IR/Nodes/CirclePad.h
new file mode 100644
index 000000000..31599bda0
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CirclePad.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEPAD_H__
+#define __LUCI_IR_CIRCLEPAD_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief PAD in Circle
+ */
+class CirclePad final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::PAD>>
+{
+public:
+ CirclePad() = default;
+
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *paddings(void) const { return at(1)->node(); }
+ void paddings(loco::Node *node) { at(1)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEPAD_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu.h
new file mode 100644
index 000000000..afb2c667a
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLERELU_H__
+#define __LUCI_IR_CIRCLERELU_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief RELU in Circle
+ */
+class CircleRelu final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::RELU>>
+{
+public:
+ CircleRelu() = default;
+
+public:
+ loco::Node *features(void) const { return at(0)->node(); }
+ void features(loco::Node *node) { at(0)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLERELU_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu6.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu6.h
new file mode 100644
index 000000000..b313a5557
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu6.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLERELU6_H__
+#define __LUCI_IR_CIRCLERELU6_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief RELU6 in Circle
+ */
+class CircleRelu6 final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::RELU6>>
+{
+public:
+ CircleRelu6() = default;
+
+public:
+ loco::Node *features(void) const { return at(0)->node(); }
+ void features(loco::Node *node) { at(0)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLERELU6_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleReshape.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleReshape.h
new file mode 100644
index 000000000..a3a2a3f31
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleReshape.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLERESHAPE_H__
+#define __LUCI_IR_CIRCLERESHAPE_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief RESHAPE in Circle
+ */
+class CircleReshape final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::RESHAPE>>
+{
+public:
+ CircleReshape() = default;
+
+public:
+ loco::Node *tensor(void) const { return at(0)->node(); }
+ void tensor(loco::Node *node) { at(0)->node(node); }
+
+ // TODO Make this input optional. That is, loco system does not emit error
+ // with this input being null
+ loco::Node *shape(void) const { return at(1)->node(); }
+ void shape(loco::Node *node) { at(1)->node(node); }
+
+public:
+ class Shape
+ {
+ public:
+ uint32_t rank(void) const { return _shape.size(); }
+ void rank(uint32_t rank) { _shape.resize(rank); }
+
+ int32_t dim(uint32_t n) const { return _shape.at(n); }
+ int32_t &dim(uint32_t n) { return _shape.at(n); }
+
+ private:
+ std::vector<int32_t> _shape;
+ };
+
+ const Shape *newShape(void) const { return &_new_shape; }
+ Shape *newShape(void) { return &_new_shape; }
+
+private:
+ Shape _new_shape;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLERESHAPE_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleRsqrt.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleRsqrt.h
new file mode 100644
index 000000000..44d22ef22
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleRsqrt.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLERSQRT_H__
+#define __LUCI_IR_CIRCLERSQRT_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief RSQRT in Circle
+ */
+class CircleRsqrt final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::RSQRT>>
+{
+public:
+ CircleRsqrt() = default;
+
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLERSQRT_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSoftmax.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSoftmax.h
new file mode 100644
index 000000000..4ea3c4b0e
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSoftmax.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLESOFTMAX_H__
+#define __LUCI_IR_CIRCLESOFTMAX_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief SOFTMAX in Circle
+ */
+class CircleSoftmax final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SOFTMAX>>
+{
+public:
+ loco::Node *logits(void) const { return at(0)->node(); }
+ void logits(loco::Node *node) { at(0)->node(node); }
+
+public:
+ float beta(void) const { return _beta; }
+ void beta(float beta) { _beta = beta; }
+
+private:
+ float _beta;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLESOFTMAX_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSqrt.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSqrt.h
new file mode 100644
index 000000000..bc1f39d90
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSqrt.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLESQRT_H__
+#define __LUCI_IR_CIRCLESQRT_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief SQRT in Circle
+ */
+class CircleSqrt final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SQRT>>
+{
+public:
+ CircleSqrt() = default;
+
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLESQRT_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSquaredDifference.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSquaredDifference.h
new file mode 100644
index 000000000..ff337dfbe
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSquaredDifference.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLESQUAREDIFFERENCE_H__
+#define __LUCI_IR_CIRCLESQUAREDIFFERENCE_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief SQUARED_DIFFERENCE in Circle
+ */
+class CircleSquaredDifference final
+ : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::SQUARED_DIFFERENCE>>
+{
+public:
+ CircleSquaredDifference() = default;
+
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *y(void) const { return at(1)->node(); }
+ void y(loco::Node *node) { at(1)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLESQUAREDIFFERENCE_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSub.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSub.h
new file mode 100644
index 000000000..08208f942
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSub.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLESUB_H__
+#define __LUCI_IR_CIRCLESUB_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief SUB in Circle
+ */
+class CircleSub final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::SUB>>,
+ public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+{
+public:
+ CircleSub() = default;
+
+public:
+ loco::Node *x(void) const { return at(0)->node(); }
+ void x(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *y(void) const { return at(1)->node(); }
+ void y(loco::Node *node) { at(1)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLESUB_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleTranspose.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleTranspose.h
new file mode 100644
index 000000000..198b56afd
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleTranspose.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLETRANSPOSE_H__
+#define __LUCI_IR_CIRCLETRANSPOSE_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief TRANSPOSE in Circle
+ */
+class CircleTranspose final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::TRANSPOSE>>
+{
+public:
+ CircleTranspose() = default;
+
+public:
+ /// @brief Get the input node to transpose
+ loco::Node *a(void) const { return at(0)->node(); }
+
+ /// @brief Set the input node to transpose
+ void a(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *perm(void) const { return at(1)->node(); }
+ void perm(loco::Node *node) { at(1)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLETRANSPOSE_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h
new file mode 100644
index 000000000..54a0d010c
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLETRANSPOSECONV_H__
+#define __LUCI_IR_CIRCLETRANSPOSECONV_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrPadding.h"
+#include "luci/IR/AttrStride.h"
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief TRANSPOSE_CONV in Circle
+ *
+ * @note Argument node function names are from TensorFlow. So referring 'in' and
+ * 'out' acutally means 'out' and 'in' of the this node.
+ */
+class CircleTransposeConv final
+ : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::TRANSPOSE_CONV>>
+{
+public:
+ loco::Node *inputSizes(void) const { return at(0)->node(); }
+ void inputSizes(Node *node) { at(0)->node(node); }
+
+ loco::Node *filter(void) const { return at(1)->node(); }
+ void filter(Node *node) { at(1)->node(node); }
+
+ loco::Node *outBackprop(void) const { return at(2)->node(); }
+ void outBackprop(Node *node) { at(2)->node(node); }
+
+public:
+ const Padding &padding(void) const { return _padding; }
+ void padding(const Padding &padding) { _padding = padding; }
+
+ const Stride *stride(void) const { return &_stride; }
+ Stride *stride(void) { return &_stride; }
+
+private:
+ Padding _padding;
+ Stride _stride;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLETRANSPOSECONV_H__
diff --git a/compiler/luci/lang/include/luci/IR/VariadicArityNode.h b/compiler/luci/lang/include/luci/IR/VariadicArityNode.h
new file mode 100644
index 000000000..a4814ee48
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/VariadicArityNode.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_VARIADICARITYNODES_H__
+#define __LUCI_IR_VARIADICARITYNODES_H__
+
+#include <loco/IR/Node.h>
+#include <loco/IR/Use.h>
+
+#include <vector>
+#include <memory>
+#include <cassert>
+
+namespace luci
+{
+
+/**
+ * @brief Nodes with the variadic inputs
+ */
+template <typename Base> class VariadicArityNode : public Base
+{
+public:
+ VariadicArityNode(uint32_t arity)
+ {
+ for (uint32_t n = 0; n < arity; ++n)
+ {
+ _args.push_back(std::make_unique<loco::Use>(this));
+ }
+ };
+
+ virtual ~VariadicArityNode() = default;
+
+public:
+ uint32_t arity(void) const final { return _args.size(); }
+
+ loco::Node *arg(uint32_t n) const final
+ {
+ assert(n < _args.size());
+ return _args.at(n)->node();
+ }
+
+ void drop(void) final
+ {
+ for (uint32_t n = 0; n < _args.size(); ++n)
+ {
+ _args.at(n)->node(nullptr);
+ }
+ }
+
+protected:
+ // This API allows inherited classes to access "_args" field.
+ loco::Use *at(uint32_t n) const
+ {
+ assert(n < _args.size());
+ return _args.at(n).get();
+ }
+
+private:
+ std::vector<std::unique_ptr<loco::Use>> _args;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_VARIADICARITYNODES_H__
diff --git a/compiler/luci/lang/src/Check.h b/compiler/luci/lang/src/Check.h
new file mode 100644
index 000000000..e05ec904a
--- /dev/null
+++ b/compiler/luci/lang/src/Check.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CHECK_H__
+#define __CHECK_H__
+
+#include <stdexcept>
+#include <cassert>
+#include <iostream>
+
+// TODO Add macro for Release version
+
+#define LUCI_ASSERT(condition, msg) \
+ { \
+ if (!(condition)) \
+ { \
+ std::cerr << "[assert failed] " << (msg) << ". " << std::endl; \
+ assert((condition)); \
+ } \
+ }
+
+#endif // __CHECK_H__
diff --git a/compiler/luci/lang/src/CircleDialect.cpp b/compiler/luci/lang/src/CircleDialect.cpp
new file mode 100644
index 000000000..e1c925de4
--- /dev/null
+++ b/compiler/luci/lang/src/CircleDialect.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/Nodes/CircleInput.h"
+#include "luci/IR/Nodes/CircleOutput.h"
+
+#include <loco/IR/Graph.h>
+#include <loco/IR/GraphInputIndex.h>
+#include <loco/IR/GraphOutputIndex.h>
+
+#include <cassert>
+#include <memory>
+
+namespace
+{
+
+struct GiiQueryServiceImpl final : public loco::GraphInputIndexQueryService
+{
+ bool associated(const loco::Node *node) const final
+ {
+ if (auto circleinput = dynamic_cast<const luci::CircleInput *>(node))
+ {
+ return circleinput->indexed();
+ }
+ return false;
+ }
+
+ loco::GraphOutputIndex index(const loco::Node *node) const final
+ {
+ assert(associated(node));
+ auto circleinput = dynamic_cast<const luci::CircleInput *>(node);
+ assert(circleinput != nullptr);
+ return circleinput->index();
+ }
+};
+
+struct GoiQueryServiceImpl final : public loco::GraphOutputIndexQueryService
+{
+ bool associated(const loco::Node *node) const final
+ {
+ if (auto circleoutput = dynamic_cast<const luci::CircleOutput *>(node))
+ {
+ return circleoutput->indexed();
+ }
+ return false;
+ }
+
+ loco::GraphOutputIndex index(const loco::Node *node) const final
+ {
+ assert(associated(node));
+ auto circleoutput = dynamic_cast<const luci::CircleOutput *>(node);
+ assert(circleoutput != nullptr);
+ return circleoutput->index();
+ }
+};
+
+} // namespace
+
+namespace luci
+{
+
+CircleDialect::CircleDialect()
+{
+ service<loco::GraphInputIndexQueryService>(std::make_unique<GiiQueryServiceImpl>());
+ service<loco::GraphOutputIndexQueryService>(std::make_unique<GoiQueryServiceImpl>());
+}
+
+loco::Dialect *CircleDialect::get(void)
+{
+ static CircleDialect d;
+ return &d;
+}
+
+} // namespace luci
diff --git a/compiler/luci/lang/src/CircleDialect.test.cpp b/compiler/luci/lang/src/CircleDialect.test.cpp
new file mode 100644
index 000000000..78221f199
--- /dev/null
+++ b/compiler/luci/lang/src/CircleDialect.test.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleDialectTest, get_P)
+{
+ auto d = luci::CircleDialect::get();
+
+ // get() SHOULD return a valid(non-null) pointer
+ ASSERT_NE(d, nullptr);
+ // The return value SHOULD be stable across multiple invocations
+ ASSERT_EQ(d, luci::CircleDialect::get());
+}
+
+TEST(CircleDialectTest, get_N)
+{
+ // TBD
+}
diff --git a/compiler/luci/lang/src/CircleNode.cpp b/compiler/luci/lang/src/CircleNode.cpp
new file mode 100644
index 000000000..cc273ba91
--- /dev/null
+++ b/compiler/luci/lang/src/CircleNode.cpp
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/CircleNode.h"
+#include "luci/IR/CircleDialect.h"
+
+namespace luci
+{
+
+const loco::Dialect *CircleNode::dialect(void) const { return CircleDialect::get(); }
+
+} // namespace luci
diff --git a/compiler/luci/lang/src/CircleNodes.cpp b/compiler/luci/lang/src/CircleNodes.cpp
new file mode 100644
index 000000000..76ff7ec5a
--- /dev/null
+++ b/compiler/luci/lang/src/CircleNodes.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/CircleNodes.h"
+
+#include "Check.h"
+
+#include <loco.h>
+
+namespace luci
+{
+
+void set_new_shape(CircleReshape *node, int32_t *base, uint32_t size)
+{
+ // Check node does not have both of new shape infos
+ LUCI_ASSERT(node->shape() == nullptr, "node already has shape input");
+ LUCI_ASSERT(node->newShape()->rank() == 0, "node already has newShape attribute");
+
+ const loco::DataType S32 = loco::DataType::S32;
+
+ // Set 2nd input as CircleConst
+ auto const_shape_node = node->graph()->nodes()->create<CircleConst>();
+ const_shape_node->rank(1);
+ const_shape_node->dim(0) = size;
+ const_shape_node->dtype(S32);
+ const_shape_node->size<S32>(size);
+ for (uint32_t axis = 0; axis < size; ++axis)
+ const_shape_node->at<S32>(axis) = base[axis];
+ node->shape(const_shape_node);
+
+ // Set newShape attribute
+ node->newShape()->rank(size);
+ for (uint32_t axis = 0; axis < size; ++axis)
+ node->newShape()->dim(axis) = base[axis];
+}
+
+} // namespace luci
diff --git a/compiler/luci/lang/src/LuciNodeMixins.cpp b/compiler/luci/lang/src/LuciNodeMixins.cpp
new file mode 100644
index 000000000..660cbe1a5
--- /dev/null
+++ b/compiler/luci/lang/src/LuciNodeMixins.cpp
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This is to validate LuciNodeMixins.h
+#include "luci/IR/LuciNodeMixins.h"
diff --git a/compiler/luci/lang/src/Module.cpp b/compiler/luci/lang/src/Module.cpp
new file mode 100644
index 000000000..e52d897a5
--- /dev/null
+++ b/compiler/luci/lang/src/Module.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Module.h"
+
+#include <stdexcept>
+
+namespace luci
+{
+
+void Module::add(std::unique_ptr<loco::Graph> &&g)
+{
+ if (g.get() == nullptr)
+ throw std::invalid_argument("Module: Graph cannot be null");
+
+ _graphs.emplace_back(std::move(g));
+}
+
+loco::Graph *Module::graph(void) const
+{
+ auto &graph = _graphs.at(0);
+ return graph.get();
+}
+
+loco::Graph *Module::graph(size_t idx) const
+{
+ auto &graph = _graphs.at(idx);
+ return graph.get();
+}
+
+std::unique_ptr<Module> make_module(void) { return std::make_unique<Module>(); }
+
+} // namespace loco
diff --git a/compiler/luci/lang/src/Module.test.cpp b/compiler/luci/lang/src/Module.test.cpp
new file mode 100644
index 000000000..f60319944
--- /dev/null
+++ b/compiler/luci/lang/src/Module.test.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Module.h"
+
+#include <gtest/gtest.h>
+
+TEST(ModuleTest, consturctor)
+{
+ auto gs = luci::make_module();
+
+ GTEST_SUCCEED();
+}
+
+TEST(ModuleTest, add)
+{
+ auto m = luci::make_module();
+ auto g = loco::make_graph();
+ auto g_ptr = g.get();
+
+ m->add(std::move(g));
+
+ ASSERT_EQ(m->graph(), g_ptr);
+ ASSERT_EQ(m->graph(0), g_ptr);
+}
+
+TEST(ModuleTest, add_more)
+{
+ auto m = luci::make_module();
+ auto g1 = loco::make_graph();
+ auto g2 = loco::make_graph();
+ auto g3 = loco::make_graph();
+ auto g1_ptr = g1.get();
+ auto g2_ptr = g2.get();
+ auto g3_ptr = g3.get();
+
+ m->add(std::move(g1));
+ m->add(std::move(g2));
+ m->add(std::move(g3));
+
+ ASSERT_EQ(m->size(), 3);
+ ASSERT_EQ(m->graph(), g1_ptr);
+ ASSERT_EQ(m->graph(0), g1_ptr);
+ ASSERT_EQ(m->graph(1), g2_ptr);
+ ASSERT_EQ(m->graph(2), g3_ptr);
+}
+
+TEST(ModuleTest, add_nullptr_NEG)
+{
+ auto m = luci::make_module();
+
+ EXPECT_THROW(m->add(nullptr), std::invalid_argument);
+}
+
+TEST(ModuleTest, graph_index_overflow_NEG)
+{
+ auto m = luci::make_module();
+
+ EXPECT_ANY_THROW(m->graph(100));
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleAbs.test.cpp b/compiler/luci/lang/src/Nodes/CircleAbs.test.cpp
new file mode 100644
index 000000000..847f1500b
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleAbs.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleAbs.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleAbsTest, constructor)
+{
+ luci::CircleAbs abs_node;
+
+ ASSERT_EQ(abs_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(abs_node.opcode(), luci::CircleOpcode::ABS);
+
+ ASSERT_EQ(abs_node.x(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleAdd.test.cpp b/compiler/luci/lang/src/Nodes/CircleAdd.test.cpp
new file mode 100644
index 000000000..a7701963d
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleAdd.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleAdd.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleAddTest, constructor_P)
+{
+ luci::CircleAdd add_node;
+
+ ASSERT_EQ(add_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(add_node.opcode(), luci::CircleOpcode::ADD);
+
+ ASSERT_EQ(add_node.x(), nullptr);
+ ASSERT_EQ(add_node.y(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleArgMax.test.cpp b/compiler/luci/lang/src/Nodes/CircleArgMax.test.cpp
new file mode 100644
index 000000000..6b2cff11c
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleArgMax.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleArgMax.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleArgMaxTest, constructor_P)
+{
+ luci::CircleArgMax add_node;
+
+ ASSERT_EQ(add_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(add_node.opcode(), luci::CircleOpcode::ARG_MAX);
+
+ ASSERT_EQ(add_node.input(), nullptr);
+ ASSERT_EQ(add_node.dimension(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleBatchToSpaceND.test.cpp b/compiler/luci/lang/src/Nodes/CircleBatchToSpaceND.test.cpp
new file mode 100644
index 000000000..e995718a1
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleBatchToSpaceND.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleBatchToSpaceND.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleBatchToSpaceNDTest, constructor)
+{
+ luci::CircleBatchToSpaceND bts_node;
+
+ ASSERT_EQ(bts_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(bts_node.opcode(), luci::CircleOpcode::BATCH_TO_SPACE_ND);
+
+ ASSERT_EQ(bts_node.input(), nullptr);
+ ASSERT_EQ(bts_node.block_shape(), nullptr);
+ ASSERT_EQ(bts_node.crops(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleConcatenation.test.cpp b/compiler/luci/lang/src/Nodes/CircleConcatenation.test.cpp
new file mode 100644
index 000000000..7167682b2
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleConcatenation.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleConcatenation.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleConcatenationTest, constructor_P)
+{
+ luci::CircleConcatenation concat_node(3);
+
+ ASSERT_EQ(concat_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(concat_node.opcode(), luci::CircleOpcode::CONCATENATION);
+
+ ASSERT_EQ(concat_node.numValues(), 3);
+ ASSERT_EQ(concat_node.values(0), nullptr);
+ ASSERT_EQ(concat_node.values(1), nullptr);
+ ASSERT_EQ(concat_node.values(2), nullptr);
+ ASSERT_EQ(concat_node.fusedActivationFunction(), luci::FusedActFunc::UNDEFINED);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleConst.cpp b/compiler/luci/lang/src/Nodes/CircleConst.cpp
new file mode 100644
index 000000000..1c46884d8
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleConst.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleConst.h"
+
+#include <cassert>
+
+namespace luci
+{
+
+template <loco::DataType DT> uint32_t CircleConst::size(void) const
+{
+ assert(dtype() == DT);
+ assert(_data.size() % sizeof(typename loco::DataTypeImpl<DT>::Type) == 0);
+ return _data.size() / sizeof(typename loco::DataTypeImpl<DT>::Type);
+}
+
+template <loco::DataType DT> void CircleConst::size(uint32_t l)
+{
+ assert(dtype() == DT);
+ _data.resize(l * sizeof(typename loco::DataTypeImpl<DT>::Type));
+}
+
+template <loco::DataType DT>
+const typename loco::DataTypeImpl<DT>::Type &CircleConst::at(uint32_t n) const
+{
+ assert(dtype() == DT);
+ assert(n < size<DT>());
+ return *(reinterpret_cast<const typename loco::DataTypeImpl<DT>::Type *>(_data.data()) + n);
+}
+
+template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &CircleConst::at(uint32_t n)
+{
+ assert(dtype() == DT);
+ assert(n < size<DT>());
+ return *(reinterpret_cast<typename loco::DataTypeImpl<DT>::Type *>(_data.data()) + n);
+}
+
+template <loco::DataType DT>
+const typename loco::DataTypeImpl<DT>::Type &CircleConst::scalar(void) const
+{
+ assert(dtype() == DT);
+ return *(reinterpret_cast<const typename loco::DataTypeImpl<DT>::Type *>(_data.data()));
+}
+
+template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &CircleConst::scalar(void)
+{
+ assert(dtype() == DT);
+ return *(reinterpret_cast<typename loco::DataTypeImpl<DT>::Type *>(_data.data()));
+}
+
+#define INSTANTIATE(DT) \
+ template uint32_t CircleConst::size<DT>(void) const; \
+ template void CircleConst::size<DT>(uint32_t); \
+ template const typename loco::DataTypeImpl<DT>::Type &CircleConst::at<DT>(uint32_t) const; \
+ template typename loco::DataTypeImpl<DT>::Type &CircleConst::at<DT>(uint32_t); \
+ template const typename loco::DataTypeImpl<DT>::Type &CircleConst::scalar<DT>(void) const; \
+ template typename loco::DataTypeImpl<DT>::Type &CircleConst::scalar<DT>(void);
+
+INSTANTIATE(loco::DataType::S32);
+INSTANTIATE(loco::DataType::FLOAT32);
+INSTANTIATE(loco::DataType::U8);
+
+#undef INSTANTIATE
+
+} // namespace luci
diff --git a/compiler/luci/lang/src/Nodes/CircleConv2D.test.cpp b/compiler/luci/lang/src/Nodes/CircleConv2D.test.cpp
new file mode 100644
index 000000000..7931c7eba
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleConv2D.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleConv2D.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleConv2Dest, constructor_P)
+{
+ luci::CircleConv2D conv2d_node;
+
+ ASSERT_EQ(conv2d_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(conv2d_node.opcode(), luci::CircleOpcode::CONV_2D);
+
+ ASSERT_EQ(conv2d_node.input(), nullptr);
+ ASSERT_EQ(conv2d_node.filter(), nullptr);
+ ASSERT_EQ(conv2d_node.bias(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleCos.test.cpp b/compiler/luci/lang/src/Nodes/CircleCos.test.cpp
new file mode 100644
index 000000000..34c2cfdf0
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleCos.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleCos.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleCosTest, constructor_P)
+{
+ luci::CircleCos cos_node;
+
+ ASSERT_EQ(cos_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(cos_node.opcode(), luci::CircleOpcode::COS);
+
+ ASSERT_EQ(cos_node.x(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleDepthwiseConv2D.test.cpp b/compiler/luci/lang/src/Nodes/CircleDepthwiseConv2D.test.cpp
new file mode 100644
index 000000000..bbc1ea543
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleDepthwiseConv2D.test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleDepthwiseConv2D.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleDepthwiseConv2DTest, constructor_P)
+{
+ luci::CircleDepthwiseConv2D dw_conv2d_node;
+
+ ASSERT_EQ(dw_conv2d_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(dw_conv2d_node.opcode(), luci::CircleOpcode::DEPTHWISE_CONV_2D);
+
+ ASSERT_EQ(dw_conv2d_node.input(), nullptr);
+ ASSERT_EQ(dw_conv2d_node.filter(), nullptr);
+ ASSERT_EQ(dw_conv2d_node.bias(), nullptr);
+ ASSERT_EQ(dw_conv2d_node.padding(), luci::Padding::UNDEFINED);
+ ASSERT_EQ(dw_conv2d_node.stride()->h(), 1);
+ ASSERT_EQ(dw_conv2d_node.stride()->w(), 1);
+ ASSERT_EQ(dw_conv2d_node.depthMultiplier(), 0);
+ ASSERT_EQ(dw_conv2d_node.fusedActivationFunction(), luci::FusedActFunc::UNDEFINED);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleDiv.test.cpp b/compiler/luci/lang/src/Nodes/CircleDiv.test.cpp
new file mode 100644
index 000000000..e950cc6be
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleDiv.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleDiv.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleDivTest, constructor_P)
+{
+ luci::CircleDiv div_node;
+
+ ASSERT_EQ(div_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(div_node.opcode(), luci::CircleOpcode::DIV);
+
+ ASSERT_EQ(div_node.x(), nullptr);
+ ASSERT_EQ(div_node.y(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleEqual.test.cpp b/compiler/luci/lang/src/Nodes/CircleEqual.test.cpp
new file mode 100644
index 000000000..e2757f094
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleEqual.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleEqual.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleEqualTest, constructor_P)
+{
+ luci::CircleEqual or_node;
+
+ ASSERT_EQ(or_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(or_node.opcode(), luci::CircleOpcode::EQUAL);
+
+ ASSERT_EQ(or_node.x(), nullptr);
+ ASSERT_EQ(or_node.y(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleExp.test.cpp b/compiler/luci/lang/src/Nodes/CircleExp.test.cpp
new file mode 100644
index 000000000..db10d0b03
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleExp.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleExp.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleExpTest, constructor)
+{
+ luci::CircleExp exp_node;
+
+ ASSERT_EQ(exp_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(exp_node.opcode(), luci::CircleOpcode::EXP);
+
+ ASSERT_EQ(exp_node.x(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp b/compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp
new file mode 100644
index 000000000..994dcd239
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleFullyConnected.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleFullyConnectedTest, constructor)
+{
+ luci::CircleFullyConnected fc_node;
+
+ ASSERT_EQ(fc_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(fc_node.opcode(), luci::CircleOpcode::FULLY_CONNECTED);
+
+ ASSERT_EQ(fc_node.input(), nullptr);
+ ASSERT_EQ(fc_node.weights(), nullptr);
+ ASSERT_EQ(fc_node.bias(), nullptr);
+ ASSERT_EQ(fc_node.fusedActivationFunction(), luci::FusedActFunc::UNDEFINED);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleGather.test.cpp b/compiler/luci/lang/src/Nodes/CircleGather.test.cpp
new file mode 100644
index 000000000..4eace9a02
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleGather.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleGather.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleGatherTest, constructor)
+{
+ luci::CircleGather gather_node;
+
+ ASSERT_EQ(gather_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(gather_node.opcode(), luci::CircleOpcode::GATHER);
+
+ ASSERT_EQ(gather_node.input(), nullptr);
+ ASSERT_EQ(gather_node.positions(), nullptr);
+ ASSERT_EQ(gather_node.axis(), 0);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleInput.cpp b/compiler/luci/lang/src/Nodes/CircleInput.cpp
new file mode 100644
index 000000000..dcf54f3b0
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleInput.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleInput.h"
+
+#include <cassert>
+#include <limits>
+
+namespace luci
+{
+
+void CircleInput::index(const loco::GraphInputIndex &index)
+{
+ // CircleInput internally stores "GraphInputIndex" as int64_t
+ _index = static_cast<int64_t>(index);
+}
+
+loco::GraphInputIndex CircleInput::index(void) const
+{
+ assert(_index >= std::numeric_limits<loco::GraphInputIndex>::min());
+ assert(_index <= std::numeric_limits<loco::GraphInputIndex>::max());
+ return static_cast<loco::GraphInputIndex>(_index);
+}
+
+} // namespace luci
diff --git a/compiler/luci/lang/src/Nodes/CircleInstanceNorm.test.cpp b/compiler/luci/lang/src/Nodes/CircleInstanceNorm.test.cpp
new file mode 100644
index 000000000..b87e81791
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleInstanceNorm.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleInstanceNorm.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleInstanceNormTest, constructor)
+{
+ luci::CircleInstanceNorm instance_norm;
+
+ ASSERT_EQ(instance_norm.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(instance_norm.opcode(), luci::CircleOpcode::INSTANCE_NORM);
+
+ ASSERT_EQ(instance_norm.input(), nullptr);
+ ASSERT_EQ(instance_norm.gamma(), nullptr);
+ ASSERT_EQ(instance_norm.beta(), nullptr);
+ ASSERT_FLOAT_EQ(instance_norm.epsilon(), 1e-05);
+ ASSERT_EQ(instance_norm.fusedActivationFunction(), luci::FusedActFunc::UNDEFINED);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleLogicalNot.test.cpp b/compiler/luci/lang/src/Nodes/CircleLogicalNot.test.cpp
new file mode 100644
index 000000000..360dd4711
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleLogicalNot.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleLogicalNot.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleLogicalNotTest, constructor_P)
+{
+ luci::CircleLogicalNot not_node;
+
+ ASSERT_EQ(not_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(not_node.opcode(), luci::CircleOpcode::LOGICAL_NOT);
+
+ ASSERT_EQ(not_node.x(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleLogicalOr.test.cpp b/compiler/luci/lang/src/Nodes/CircleLogicalOr.test.cpp
new file mode 100644
index 000000000..039db4afc
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleLogicalOr.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleLogicalOr.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleLogicalOrTest, constructor_P)
+{
+ luci::CircleLogicalOr or_node;
+
+ ASSERT_EQ(or_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(or_node.opcode(), luci::CircleOpcode::LOGICAL_OR);
+
+ ASSERT_EQ(or_node.x(), nullptr);
+ ASSERT_EQ(or_node.y(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleMaxPool2D.test.cpp b/compiler/luci/lang/src/Nodes/CircleMaxPool2D.test.cpp
new file mode 100644
index 000000000..874ecec0e
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleMaxPool2D.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleMaxPool2D.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleMaxPool2DTest, constructor_P)
+{
+ luci::CircleMaxPool2D maxpool2d_node;
+
+ ASSERT_EQ(maxpool2d_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(maxpool2d_node.opcode(), luci::CircleOpcode::MAX_POOL_2D);
+
+ ASSERT_EQ(maxpool2d_node.value(), nullptr);
+ ASSERT_NE(maxpool2d_node.filter(), nullptr);
+ ASSERT_NE(maxpool2d_node.stride(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleMaximum.test.cpp b/compiler/luci/lang/src/Nodes/CircleMaximum.test.cpp
new file mode 100644
index 000000000..efe62f11a
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleMaximum.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleMaximum.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleMaximumTest, constructor_P)
+{
+ luci::CircleMaximum max_node;
+
+ ASSERT_EQ(max_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(max_node.opcode(), luci::CircleOpcode::MAXIMUM);
+
+ ASSERT_EQ(max_node.x(), nullptr);
+ ASSERT_EQ(max_node.y(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleMul.test.cpp b/compiler/luci/lang/src/Nodes/CircleMul.test.cpp
new file mode 100644
index 000000000..f9eca42f9
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleMul.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleMul.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleMulTest, constructor_P)
+{
+ luci::CircleMul mul_node;
+
+ ASSERT_EQ(mul_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(mul_node.opcode(), luci::CircleOpcode::MUL);
+
+ ASSERT_EQ(mul_node.x(), nullptr);
+ ASSERT_EQ(mul_node.y(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleOutput.cpp b/compiler/luci/lang/src/Nodes/CircleOutput.cpp
new file mode 100644
index 000000000..31380456f
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleOutput.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleOutput.h"
+
+#include <cassert>
+#include <limits>
+
+namespace luci
+{
+
+void CircleOutput::index(const loco::GraphOutputIndex &index)
+{
+ // CircleOutput internally stores "GraphOutputIndex" as int64_t
+ _index = static_cast<int64_t>(index);
+}
+
+loco::GraphOutputIndex CircleOutput::index(void) const
+{
+ assert(_index >= std::numeric_limits<loco::GraphOutputIndex>::min());
+ assert(_index <= std::numeric_limits<loco::GraphOutputIndex>::max());
+ return static_cast<loco::GraphOutputIndex>(_index);
+}
+
+} // namespace luci
diff --git a/compiler/luci/lang/src/Nodes/CirclePack.test.cpp b/compiler/luci/lang/src/Nodes/CirclePack.test.cpp
new file mode 100644
index 000000000..5c9a96f7c
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CirclePack.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CirclePack.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CirclePackTest, constructor)
+{
+ luci::CirclePack pack_node(3);
+
+ ASSERT_EQ(pack_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(pack_node.opcode(), luci::CircleOpcode::PACK);
+
+ ASSERT_EQ(pack_node.axis(), 0);
+ ASSERT_EQ(pack_node.values_count(), 3);
+ ASSERT_EQ(pack_node.values(0), nullptr);
+ ASSERT_EQ(pack_node.values(1), nullptr);
+ ASSERT_EQ(pack_node.values(2), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CirclePad.test.cpp b/compiler/luci/lang/src/Nodes/CirclePad.test.cpp
new file mode 100644
index 000000000..3a23fa0f0
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CirclePad.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CirclePad.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CirclePadTest, constructor_P)
+{
+ luci::CirclePad pad_node;
+
+ ASSERT_EQ(pad_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(pad_node.opcode(), luci::CircleOpcode::PAD);
+
+ ASSERT_EQ(pad_node.input(), nullptr);
+ ASSERT_EQ(pad_node.paddings(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleRelu.test.cpp b/compiler/luci/lang/src/Nodes/CircleRelu.test.cpp
new file mode 100644
index 000000000..19ea88aa6
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleRelu.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleRelu.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleReluTest, constructor_P)
+{
+ luci::CircleRelu relu_node;
+
+ ASSERT_EQ(relu_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(relu_node.opcode(), luci::CircleOpcode::RELU);
+
+ ASSERT_EQ(relu_node.features(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleRelu6.test.cpp b/compiler/luci/lang/src/Nodes/CircleRelu6.test.cpp
new file mode 100644
index 000000000..74bf2e86a
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleRelu6.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleRelu6.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleRelu6Test, constructor_P)
+{
+ luci::CircleRelu6 relu6_node;
+
+ ASSERT_EQ(relu6_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(relu6_node.opcode(), luci::CircleOpcode::RELU6);
+
+ ASSERT_EQ(relu6_node.features(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleReshape.test.cpp b/compiler/luci/lang/src/Nodes/CircleReshape.test.cpp
new file mode 100644
index 000000000..7bc2d32a4
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleReshape.test.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleReshape.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleReshapeTest, constructor_P)
+{
+ luci::CircleReshape reshape;
+
+ ASSERT_EQ(reshape.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(reshape.opcode(), luci::CircleOpcode::RESHAPE);
+
+ ASSERT_EQ(reshape.tensor(), nullptr);
+ ASSERT_EQ(reshape.shape(), nullptr);
+ ASSERT_EQ(reshape.newShape()->rank(), 0);
+}
+
+TEST(CircleReshapeTest, alloc_new_shape_P)
+{
+ luci::CircleReshape reshape;
+
+ reshape.newShape()->rank(2);
+ ASSERT_EQ(reshape.newShape()->rank(), 2);
+
+ reshape.newShape()->dim(0) = 0;
+ reshape.newShape()->dim(1) = 1;
+
+ auto &const_reshape = const_cast<const luci::CircleReshape &>(reshape);
+ ASSERT_EQ(const_reshape.newShape()->dim(0), 0);
+ ASSERT_EQ(const_reshape.newShape()->dim(1), 1);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleRsqrt.test.cpp b/compiler/luci/lang/src/Nodes/CircleRsqrt.test.cpp
new file mode 100644
index 000000000..51f6bab36
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleRsqrt.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleRsqrt.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleRsqrtTest, constructor)
+{
+ luci::CircleRsqrt rsqrt_node;
+
+ ASSERT_EQ(rsqrt_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(rsqrt_node.opcode(), luci::CircleOpcode::RSQRT);
+
+ ASSERT_EQ(rsqrt_node.x(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleSoftmax.test.cpp b/compiler/luci/lang/src/Nodes/CircleSoftmax.test.cpp
new file mode 100644
index 000000000..7e994490c
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleSoftmax.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleSoftmax.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleSoftmaxTest, constructor_P)
+{
+ luci::CircleSoftmax softmax_node;
+
+ ASSERT_EQ(softmax_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(softmax_node.opcode(), luci::CircleOpcode::SOFTMAX);
+
+ ASSERT_EQ(softmax_node.logits(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleSqrt.test.cpp b/compiler/luci/lang/src/Nodes/CircleSqrt.test.cpp
new file mode 100644
index 000000000..6cfb3bc94
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleSqrt.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleSqrt.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleSqrtTest, constructor_P)
+{
+ luci::CircleSqrt sqrt_node;
+
+ ASSERT_EQ(sqrt_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(sqrt_node.opcode(), luci::CircleOpcode::SQRT);
+
+ ASSERT_EQ(sqrt_node.x(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleSquaredDifference.test.cpp b/compiler/luci/lang/src/Nodes/CircleSquaredDifference.test.cpp
new file mode 100644
index 000000000..71df189b9
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleSquaredDifference.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleSquaredDifference.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleSquaredDifferenceTest, constructor_P)
+{
+ luci::CircleSquaredDifference sd_node;
+
+ ASSERT_EQ(sd_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(sd_node.opcode(), luci::CircleOpcode::SQUARED_DIFFERENCE);
+
+ ASSERT_EQ(sd_node.x(), nullptr);
+ ASSERT_EQ(sd_node.y(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleSub.test.cpp b/compiler/luci/lang/src/Nodes/CircleSub.test.cpp
new file mode 100644
index 000000000..ebb29446a
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleSub.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleSub.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleSubTest, constructor_P)
+{
+ luci::CircleSub sub_node;
+
+ ASSERT_EQ(sub_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(sub_node.opcode(), luci::CircleOpcode::SUB);
+
+ ASSERT_EQ(sub_node.x(), nullptr);
+ ASSERT_EQ(sub_node.y(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleTranspose.test.cpp b/compiler/luci/lang/src/Nodes/CircleTranspose.test.cpp
new file mode 100644
index 000000000..7233869e6
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleTranspose.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleTranspose.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleTransposeTest, constructor_P)
+{
+ luci::CircleTranspose tr_node;
+
+ ASSERT_EQ(tr_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(tr_node.opcode(), luci::CircleOpcode::TRANSPOSE);
+
+ ASSERT_EQ(tr_node.a(), nullptr);
+ ASSERT_EQ(tr_node.perm(), nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp b/compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp
new file mode 100644
index 000000000..9615082d9
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleTransposeConv.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleTransposeConvTest, constructor_P)
+{
+ luci::CircleTransposeConv trc_node;
+
+ ASSERT_EQ(trc_node.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(trc_node.opcode(), luci::CircleOpcode::TRANSPOSE_CONV);
+
+ ASSERT_EQ(trc_node.inputSizes(), nullptr);
+ ASSERT_EQ(trc_node.filter(), nullptr);
+ ASSERT_EQ(trc_node.outBackprop(), nullptr);
+}
diff --git a/compiler/luci/log/CMakeLists.txt b/compiler/luci/log/CMakeLists.txt
new file mode 100644
index 000000000..af2e7a768
--- /dev/null
+++ b/compiler/luci/log/CMakeLists.txt
@@ -0,0 +1,9 @@
+# TODO Find how to test logging framework
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_library(luci_log SHARED ${SOURCES})
+target_include_directories(luci_log PUBLIC include)
+target_link_libraries(luci_log PUBLIC hermes)
+target_link_libraries(luci_log PRIVATE hermes_std)
+target_link_libraries(luci_log PRIVATE nncc_common)
+install(TARGETS luci_log DESTINATION lib)
diff --git a/compiler/luci/log/README.md b/compiler/luci/log/README.md
new file mode 100644
index 000000000..512bc96d2
--- /dev/null
+++ b/compiler/luci/log/README.md
@@ -0,0 +1,3 @@
+# luci-log
+
+_luci-log_ is a logging framework for _luci_ compiler framework.
diff --git a/compiler/luci/log/include/luci/Log.h b/compiler/luci/log/include/luci/Log.h
new file mode 100644
index 000000000..51299a082
--- /dev/null
+++ b/compiler/luci/log/include/luci/Log.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_LOG_H__
+#define __LUCI_LOG_H__
+
+#include <hermes.h>
+
+namespace luci
+{
+
+/**
+ * @brief Logger Implementation
+ */
+class Logger final : public hermes::Source
+{
+public:
+ Logger(hermes::Context *ctx);
+ ~Logger();
+};
+
+/**
+ * @brief Logger Configuration
+ *
+ * Users are able to turn logging on/off via MOCO_LOG environment variable.
+ */
+class LoggerConfig final : public hermes::Config
+{
+public:
+ LoggerConfig();
+
+public:
+ void configure(const hermes::Source *, hermes::Source::Setting &) const final;
+ void configure(const Logger *, hermes::Source::Setting &) const;
+
+private:
+ bool _enabled;
+};
+
+} // namespace luci
+
+#include "luci/LoggingContext.h"
+
+/**
+ * HOW TO USE:
+ *
+ * LOGGER(l);
+ *
+ * INFO(l) << "Hello, World" << std::endl;
+ *
+ */
+#define LOGGER(name) ::luci::Logger name{::luci::LoggingContext::get()};
+
+// TODO Support FATAL, ERROR, WARN, and VERBOSE
+#define INFO(name) HERMES_INFO(name)
+
+// WARNING!
+//
+// THE CURRENT IMPLEMENTATION IS NOT THREAD SAFE.
+//
+
+#endif // __LUCI_LOG_H__
diff --git a/compiler/luci/log/include/luci/LoggingContext.h b/compiler/luci/log/include/luci/LoggingContext.h
new file mode 100644
index 000000000..f5091099f
--- /dev/null
+++ b/compiler/luci/log/include/luci/LoggingContext.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_LOGGING_CONTEXT_H__
+#define __LUCI_LOGGING_CONTEXT_H__
+
+#include <hermes.h>
+
+namespace luci
+{
+
+/**
+ * @brief Global logging context
+ */
+struct LoggingContext
+{
+ static hermes::Context *get(void);
+};
+
+} // namespace luci
+
+#endif // __LUCI_LOGGING_CONTEXT_H__
diff --git a/compiler/luci/log/src/Log.cpp b/compiler/luci/log/src/Log.cpp
new file mode 100644
index 000000000..7e1634009
--- /dev/null
+++ b/compiler/luci/log/src/Log.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Log.h"
+
+#include <cassert>
+#include <cstdlib>
+#include <iostream>
+
+// TODO Extract these lexical conversion routines as a library
+namespace
+{
+
+/**
+ * @brief Convert C-string as a value of type T
+ *
+ * safecast(s, v) returns v if s is nullptr.
+ */
+template <typename T> T safecast(const char *, const T &);
+
+template <> bool safecast<bool>(const char *s, const bool &value)
+{
+ return (s == nullptr) ? value : (std::stoi(s) != 0);
+}
+
+} // namespace
+
+//
+// Logger
+//
+namespace luci
+{
+
+Logger::Logger(hermes::Context *ctx) { activate(ctx->sources(), ctx->bus()); }
+Logger::~Logger() { deactivate(); }
+
+} // namespace luci
+
+//
+// LoggerConfig
+//
+namespace luci
+{
+
+LoggerConfig::LoggerConfig()
+{
+ // Turn on logging if LUCI_LOG is set as non-zero value
+ _enabled = safecast<bool>(std::getenv("LUCI_LOG"), false);
+}
+
+void LoggerConfig::configure(const hermes::Source *source, hermes::Source::Setting &setting) const
+{
+ // Let's ignore hermes::Sources if that is not a moco logger
+ if (auto logger = dynamic_cast<const Logger *>(source))
+ {
+ configure(logger, setting);
+ }
+}
+
+void LoggerConfig::configure(const Logger *, hermes::Source::Setting &setting) const
+{
+ if (_enabled)
+ {
+ // Enable all catagories
+ setting.accept_all();
+ }
+ else
+ {
+ // Disable all catagories
+ setting.reject_all();
+ }
+}
+
+} // namespace luci
diff --git a/compiler/luci/log/src/LoggingContext.cpp b/compiler/luci/log/src/LoggingContext.cpp
new file mode 100644
index 000000000..8d7997869
--- /dev/null
+++ b/compiler/luci/log/src/LoggingContext.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/LoggingContext.h"
+#include "luci/Log.h"
+
+#include <hermes/ConsoleReporter.h>
+
+#include <memory>
+
+namespace luci
+{
+
+hermes::Context *LoggingContext::get(void)
+{
+ static hermes::Context *ctx = nullptr;
+
+ if (ctx == nullptr)
+ {
+ ctx = new hermes::Context;
+ ctx->sinks()->append(std::make_unique<hermes::ConsoleReporter>());
+ ctx->config(std::make_unique<LoggerConfig>());
+ }
+
+ return ctx;
+}
+
+} // namespace luci
diff --git a/compiler/luci/logex/CMakeLists.txt b/compiler/luci/logex/CMakeLists.txt
new file mode 100644
index 000000000..fa2ea123c
--- /dev/null
+++ b/compiler/luci/logex/CMakeLists.txt
@@ -0,0 +1,13 @@
+# TODO Find how to test logging-ex utility
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_library(luci_logex SHARED ${SOURCES})
+target_include_directories(luci_logex PUBLIC include)
+target_link_libraries(luci_logex PUBLIC loco)
+target_link_libraries(luci_logex PUBLIC locop)
+target_link_libraries(luci_logex PRIVATE luci_log)
+target_link_libraries(luci_logex PRIVATE luci_lang)
+target_link_libraries(luci_logex PRIVATE hermes_std)
+target_link_libraries(luci_logex PRIVATE nncc_common)
+target_link_libraries(luci_logex PRIVATE pepper_str)
+install(TARGETS luci_logex DESTINATION lib)
diff --git a/compiler/luci/logex/README.md b/compiler/luci/logex/README.md
new file mode 100644
index 000000000..03b6baf35
--- /dev/null
+++ b/compiler/luci/logex/README.md
@@ -0,0 +1,3 @@
+# luci-logex
+
+_luci-logex_ is a extended logging utility for _luci_ compiler framework.
diff --git a/compiler/luci/logex/include/luci/FormattedGraph.h b/compiler/luci/logex/include/luci/FormattedGraph.h
new file mode 100644
index 000000000..da4af3bfa
--- /dev/null
+++ b/compiler/luci/logex/include/luci/FormattedGraph.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FORMATTED_GRAPH_H__
+#define __LUCI_FORMATTED_GRAPH_H__
+
+#include <locop/FormattedGraph.h>
+
+#include <memory>
+
+namespace luci
+{
+
+class NodeSummaryBuilder final : public locop::NodeSummaryBuilder
+{
+public:
+ NodeSummaryBuilder(const locop::SymbolTable *tbl) : _tbl{tbl}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool build(const loco::Node *node, locop::NodeSummary &s) const final;
+
+private:
+ const locop::SymbolTable *_tbl;
+};
+
+class NodeSummaryBuilderFactory final : public locop::NodeSummaryBuilderFactory
+{
+public:
+ NodeSummaryBuilderFactory() = default;
+
+public:
+ std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *tlb) const final
+ {
+ return std::make_unique<NodeSummaryBuilder>(tlb);
+ }
+};
+
+} // namespace luci
+
+#endif // __LUCI_FORMATTED_GRAPH_H__
diff --git a/compiler/luci/logex/include/luci/LogHelper.h b/compiler/luci/logex/include/luci/LogHelper.h
new file mode 100644
index 000000000..37cdd735b
--- /dev/null
+++ b/compiler/luci/logex/include/luci/LogHelper.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_LOG_HELPER_H__
+#define __LUCI_LOG_HELPER_H__
+
+#include <locop/FormattedGraph.h>
+#include <loco.h>
+
+#include <memory>
+
+namespace luci
+{
+
+using FormattedGraph = locop::FormattedGraphImpl<locop::Formatter::LinearV1>;
+
+FormattedGraph fmt(loco::Graph *g);
+
+static inline FormattedGraph fmt(const std::unique_ptr<loco::Graph> &g) { return fmt(g.get()); }
+
+} // namespace luci
+
+#endif // __LUCI_LOG_HELPER_H__
diff --git a/compiler/luci/logex/src/FormattedGraph.cpp b/compiler/luci/logex/src/FormattedGraph.cpp
new file mode 100644
index 000000000..894ebc151
--- /dev/null
+++ b/compiler/luci/logex/src/FormattedGraph.cpp
@@ -0,0 +1,606 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/FormattedGraph.h"
+
+#include <luci/IR/CircleDialect.h>
+#include <luci/IR/CircleNodes.h>
+
+#include <pepper/str.h>
+
+#include <cassert>
+#include <sstream>
+#include <vector>
+
+/**
+ * @brief dump std::vector<int64_t> values to stream
+ */
+std::ostream &operator<<(std::ostream &os, const std::vector<int64_t> &vi64)
+{
+ for (auto vi : vi64)
+ {
+ os << vi << " ";
+ }
+ return os;
+}
+
+// For TF lite
+namespace
+{
+
+const char *to_str(loco::DataType type)
+{
+ switch (type)
+ {
+ case loco::DataType::U8:
+ return "UINT8";
+ case loco::DataType::U16:
+ return "UINT16";
+ case loco::DataType::U32:
+ return "UINT32";
+ case loco::DataType::U64:
+ return "UINT64";
+
+ case loco::DataType::S8:
+ return "INT8";
+ case loco::DataType::S16:
+ return "INT16";
+ case loco::DataType::S32:
+ return "INT32";
+ case loco::DataType::S64:
+ return "INT64";
+
+ case loco::DataType::FLOAT16:
+ return "FLOAT16";
+ case loco::DataType::FLOAT32:
+ return "FLOAT32";
+ case loco::DataType::FLOAT64:
+ return "FLOAT64";
+
+ case loco::DataType::BOOL:
+ return "BOOL";
+
+ default:
+ return "Error";
+ }
+}
+
+const char *to_str(luci::FusedActFunc fused)
+{
+ switch (fused)
+ {
+ case luci::FusedActFunc::NONE:
+ return "NONE";
+ case luci::FusedActFunc::RELU:
+ return "RELU";
+ case luci::FusedActFunc::RELU_N1_TO_1:
+ return "RELU_N1_TO_1";
+ case luci::FusedActFunc::RELU6:
+ return "RELU6";
+ default:
+ return "Error";
+ }
+}
+
+const char *to_str(luci::Padding padding)
+{
+ switch (padding)
+ {
+ case luci::Padding::SAME:
+ return "SAME";
+ case luci::Padding::VALID:
+ return "VALID";
+ default:
+ return "Error";
+ }
+}
+
+std::string to_str(const luci::Stride *stride)
+{
+ return pepper::str(stride->h(), ",", stride->w());
+}
+
+std::string to_str(const luci::Filter *filter)
+{
+ return pepper::str(filter->h(), ",", filter->w());
+}
+
+std::string circle_opname(uint32_t opnum)
+{
+ static const std::string prefix{"circle."};
+
+ switch (static_cast<luci::CircleOpcode>(opnum))
+ {
+#define CIRCLE_NODE(OPCODE, CLASS) \
+ case luci::CircleOpcode::OPCODE: \
+ return prefix + #OPCODE;
+#include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_NODE
+ default:
+ break;
+ };
+
+ return prefix + "Invalid";
+}
+
+// CircleNodeSummaryBuilder with default implementation
+class CircleNodeSummaryBuilderBase : public locop::NodeSummaryBuilder
+{
+public:
+ CircleNodeSummaryBuilderBase(const locop::SymbolTable *tbl) : _tbl{tbl}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool build(const loco::Node *, locop::NodeSummary &s) const final;
+
+protected:
+#define CIRCLE_NODE(OPCODE, CLASS) \
+ virtual bool summary(const CLASS *, locop::NodeSummary &s) const \
+ { \
+ s.comments().append("Emitted by Default CircleNodeSummaryBuilder"); \
+ s.state(locop::NodeSummary::State::PartiallyKnown); \
+ return true; \
+ }
+#include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_NODE
+
+protected:
+ const locop::SymbolTable *tbl(void) const { return _tbl; }
+
+ // Please do not use _tbl directly and use tbl().
+ // This will be changed to private in near future.
+protected:
+ const locop::SymbolTable *_tbl;
+};
+
+class CircleNodeSummaryBuilder final : public CircleNodeSummaryBuilderBase
+{
+public:
+ CircleNodeSummaryBuilder(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
+ {
+ // DO NOTHING
+ }
+
+private:
+#define IMPLEMENT(CLASS) bool summary(const CLASS *, locop::NodeSummary &) const final;
+ IMPLEMENT(luci::CircleAbs)
+ IMPLEMENT(luci::CircleAdd)
+ IMPLEMENT(luci::CircleArgMax)
+ IMPLEMENT(luci::CircleAveragePool2D)
+ IMPLEMENT(luci::CircleBatchToSpaceND)
+ IMPLEMENT(luci::CircleConcatenation)
+ IMPLEMENT(luci::CircleConst)
+ IMPLEMENT(luci::CircleConv2D)
+ IMPLEMENT(luci::CircleCos)
+ IMPLEMENT(luci::CircleDepthwiseConv2D)
+ IMPLEMENT(luci::CircleDiv)
+ IMPLEMENT(luci::CircleExp)
+ IMPLEMENT(luci::CircleFullyConnected)
+ IMPLEMENT(luci::CircleLogicalNot)
+ IMPLEMENT(luci::CircleLogicalOr)
+ IMPLEMENT(luci::CircleMaximum)
+ IMPLEMENT(luci::CircleMaxPool2D)
+ IMPLEMENT(luci::CircleMean)
+ IMPLEMENT(luci::CircleMul)
+ IMPLEMENT(luci::CirclePack)
+ IMPLEMENT(luci::CirclePad)
+ IMPLEMENT(luci::CircleRelu)
+ IMPLEMENT(luci::CircleRelu6)
+ IMPLEMENT(luci::CircleReshape)
+ IMPLEMENT(luci::CircleRsqrt)
+ IMPLEMENT(luci::CircleSoftmax)
+ IMPLEMENT(luci::CircleSqrt)
+ IMPLEMENT(luci::CircleSquaredDifference)
+ IMPLEMENT(luci::CircleSub)
+ IMPLEMENT(luci::CircleTranspose)
+ IMPLEMENT(luci::CircleTransposeConv)
+ // Circle Only
+ IMPLEMENT(luci::CircleInstanceNorm)
+ // Virtual nodes
+ IMPLEMENT(luci::CircleInput)
+ IMPLEMENT(luci::CircleOutput)
+#undef IMPLEMENT
+};
+
+bool CircleNodeSummaryBuilderBase::build(const loco::Node *node, locop::NodeSummary &s) const
+{
+ if (node->dialect() != luci::CircleDialect::get())
+ return false;
+
+#define CIRCLE_NODE(OPCODE, CLASS) \
+ if (dynamic_cast<const CLASS *>(node)) \
+ { \
+ s.opname(circle_opname(node->opnum())); \
+ return summary(dynamic_cast<const CLASS *>(node), s); \
+ }
+#include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_NODE
+
+ return false;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleAbs *node, locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleAdd *node, locop::NodeSummary &s) const
+{
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.args().append("y", tbl()->lookup(node->y()));
+ s.args().append("fused_activation_function", to_str(node->fusedActivationFunction()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleArgMax *node, locop::NodeSummary &s) const
+{
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.args().append("dimension", tbl()->lookup(node->dimension()));
+ s.args().append("output_type", to_str(node->output_type()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleAveragePool2D *node,
+ locop::NodeSummary &s) const
+{
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+
+ s.args().append("value", tbl()->lookup(node->value()));
+ s.args().append("filter(h,w)", to_str(node->filter()));
+ s.args().append("stride(h,w)", to_str(node->stride()));
+ s.args().append("padding", to_str(node->padding()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+
+ s.state(locop::NodeSummary::State::Complete);
+
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchToSpaceND *node,
+ locop::NodeSummary &s) const
+{
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.args().append("block_shape", tbl()->lookup(node->block_shape()));
+ s.args().append("crops", tbl()->lookup(node->crops()));
+
+ s.state(locop::NodeSummary::State::Complete);
+
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleConcatenation *node,
+ locop::NodeSummary &s) const
+{
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+
+ for (uint32_t i = 0; i < node->numValues(); ++i)
+ s.args().append("values", tbl()->lookup(node->values(i)));
+ s.args().append("axis", pepper::str(node->axis()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleConst *, locop::NodeSummary &s) const
+{
+ s.state(locop::NodeSummary::State::PartiallyKnown);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleConv2D *node, locop::NodeSummary &s) const
+{
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+ assert(node->padding() != luci::Padding::UNDEFINED);
+
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.args().append("filter", tbl()->lookup(node->filter()));
+ s.args().append("bias", tbl()->lookup(node->bias()));
+
+ s.args().append("stride(h,w)", to_str(node->stride()));
+ s.args().append("padding", to_str(node->padding()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+
+ s.state(locop::NodeSummary::State::Complete);
+
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleCos *node, locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleDepthwiseConv2D *node,
+ locop::NodeSummary &s) const
+{
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+ assert(node->padding() != luci::Padding::UNDEFINED);
+
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.args().append("filter", tbl()->lookup(node->filter()));
+ s.args().append("bias", tbl()->lookup(node->bias()));
+
+ s.args().append("stride(h,w)", to_str(node->stride()));
+ s.args().append("padding", to_str(node->padding()));
+ s.args().append("depthMultiplier", std::to_string(node->depthMultiplier()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+
+ s.state(locop::NodeSummary::State::Complete);
+
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleDiv *node, locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.args().append("y", tbl()->lookup(node->y()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleExp *node, locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleFullyConnected *node,
+ locop::NodeSummary &s) const
+{
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.args().append("weights", tbl()->lookup(node->weights()));
+ s.args().append("bias", tbl()->lookup(node->bias()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+
+ s.state(locop::NodeSummary::State::Complete);
+
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleLogicalNot *node,
+ locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleLogicalOr *node,
+ locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.args().append("y", tbl()->lookup(node->y()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleMaximum *node, locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.args().append("y", tbl()->lookup(node->y()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleMaxPool2D *node,
+ locop::NodeSummary &s) const
+{
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+
+ s.args().append("value", tbl()->lookup(node->value()));
+ s.args().append("filter(h,w)", to_str(node->filter()));
+ s.args().append("stride(h,w)", to_str(node->stride()));
+ s.args().append("padding", to_str(node->padding()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+
+ s.state(locop::NodeSummary::State::Complete);
+
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleMean *node, locop::NodeSummary &s) const
+{
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.args().append("reduction_indices", tbl()->lookup(node->reduction_indices()));
+ s.args().append("keep_dims", node->keep_dims() ? "true" : "false");
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleMul *node, locop::NodeSummary &s) const
+{
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.args().append("y", tbl()->lookup(node->y()));
+ s.args().append("fused_activation_function", to_str(node->fusedActivationFunction()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CirclePack *node, locop::NodeSummary &s) const
+{
+ for (uint32_t i = 0; i < node->values_count(); ++i)
+ s.args().append("values", tbl()->lookup(node->values(i)));
+ s.args().append("values_count", pepper::str(node->values_count()));
+ s.args().append("axis", pepper::str(node->axis()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CirclePad *node, locop::NodeSummary &s) const
+{
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.args().append("paddings", tbl()->lookup(node->paddings()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleRelu *node, locop::NodeSummary &s) const
+{
+ s.args().append("features", tbl()->lookup(node->features()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleRelu6 *node, locop::NodeSummary &s) const
+{
+ s.args().append("features", tbl()->lookup(node->features()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleReshape *node, locop::NodeSummary &s) const
+{
+ s.args().append("tensor", tbl()->lookup(node->tensor()));
+ s.args().append("shape", tbl()->lookup(node->shape()));
+ // TODO Show newShape info
+ s.state(locop::NodeSummary::State::PartiallyKnown);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleRsqrt *node, locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleSoftmax *node, locop::NodeSummary &s) const
+{
+ s.args().append("logits", tbl()->lookup(node->logits()));
+ s.args().append("beta", pepper::str(node->beta()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleSqrt *node, locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleSquaredDifference *node,
+ locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.args().append("y", tbl()->lookup(node->y()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleSub *node, locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.args().append("y", tbl()->lookup(node->y()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+// TODO TFLTanh
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleTranspose *node,
+ locop::NodeSummary &s) const
+{
+ s.args().append("a", tbl()->lookup(node->a()));
+ s.args().append("perm", tbl()->lookup(node->perm()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleTransposeConv *node,
+ locop::NodeSummary &s) const
+{
+ assert(node->padding() != luci::Padding::UNDEFINED);
+
+ s.args().append("inputSizes", tbl()->lookup(node->inputSizes()));
+ s.args().append("filter", tbl()->lookup(node->filter()));
+ s.args().append("outBackprop", tbl()->lookup(node->outBackprop()));
+
+ s.args().append("stride(h,w)", to_str(node->stride()));
+ s.args().append("padding", to_str(node->padding()));
+
+ s.state(locop::NodeSummary::State::Complete);
+
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleInput *, locop::NodeSummary &s) const
+{
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleOutput *node, locop::NodeSummary &s) const
+{
+ s.args().append("from", tbl()->lookup(node->from()));
+
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleInstanceNorm *node,
+ locop::NodeSummary &s) const
+{
+ auto fused = node->fusedActivationFunction();
+ assert(fused != luci::FusedActFunc::UNDEFINED);
+
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.args().append("gamma", tbl()->lookup(node->gamma()));
+ s.args().append("beta", tbl()->lookup(node->beta()));
+ s.args().append("epsilon", pepper::str(node->epsilon()));
+ s.args().append("fused_activation_function", to_str(fused));
+
+ s.state(locop::NodeSummary::State::Complete);
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool NodeSummaryBuilder::build(const loco::Node *node, locop::NodeSummary &s) const
+{
+ if (locop::CanonicalNodeSummaryBuilder(_tbl).build(node, s))
+ {
+ return true;
+ }
+
+ if (CircleNodeSummaryBuilder(_tbl).build(node, s))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+} // namespace exo
diff --git a/compiler/luci/logex/src/LogHelper.cpp b/compiler/luci/logex/src/LogHelper.cpp
new file mode 100644
index 000000000..caf659906
--- /dev/null
+++ b/compiler/luci/logex/src/LogHelper.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/LogHelper.h"
+#include "luci/FormattedGraph.h"
+
+namespace luci
+{
+
+FormattedGraph fmt(loco::Graph *g)
+{
+ auto node_summary_builder = std::make_unique<NodeSummaryBuilderFactory>();
+ return std::move(locop::fmt<locop::LinearV1>(g).with(std::move(node_summary_builder)));
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/CMakeLists.txt b/compiler/luci/pass/CMakeLists.txt
new file mode 100644
index 000000000..93130ce60
--- /dev/null
+++ b/compiler/luci/pass/CMakeLists.txt
@@ -0,0 +1,29 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+#file(GLOB_RECURSE TESTS "src/*.test.cpp")
+#list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(luci_pass SHARED ${SOURCES})
+target_include_directories(luci_pass PRIVATE src)
+target_include_directories(luci_pass PUBLIC include)
+target_link_libraries(luci_pass PUBLIC loco)
+target_link_libraries(luci_pass PUBLIC logo_core)
+target_link_libraries(luci_pass PRIVATE logo)
+target_link_libraries(luci_pass PRIVATE luci_lang)
+target_link_libraries(luci_pass PRIVATE luci_log)
+target_link_libraries(luci_pass PRIVATE luci_service)
+target_link_libraries(luci_pass PRIVATE luci_logex)
+target_link_libraries(luci_pass PRIVATE nncc_common)
+target_link_libraries(luci_pass PRIVATE oops)
+install(TARGETS luci_pass DESTINATION lib)
+
+# TODO enable for tests
+#if(NOT ENABLE_TEST)
+# return()
+#endif(NOT ENABLE_TEST)
+#
+#nnas_find_package(GTest REQUIRED)
+#
+#GTest_AddTest(luci_pass_test ${TESTS})
+#target_include_directories(luci_pass_test PRIVATE src)
+#target_link_libraries(luci_pass_test luci_pass)
+#target_link_libraries(luci_pass_test oops)
diff --git a/compiler/luci/pass/README.md b/compiler/luci/pass/README.md
new file mode 100644
index 000000000..9b6cdebd3
--- /dev/null
+++ b/compiler/luci/pass/README.md
@@ -0,0 +1,3 @@
+# luci-pass
+
+_luci-pass_ provides Circle Dialect transformation passes
diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h
new file mode 100644
index 000000000..a969cca85
--- /dev/null
+++ b/compiler/luci/pass/include/luci/CircleOptimizer.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_OPTIMIZER_H__
+#define __LUCI_CIRCLE_OPTIMIZER_H__
+
+#include <loco.h>
+
+#include <string>
+#include <vector>
+
+namespace luci
+{
+
+class CircleOptimizer final
+{
+public:
+ struct Options
+ {
+ enum Algorithm
+ {
+ FuseInstanceNorm,
+ };
+
+ virtual void enable(Algorithm) = 0;
+ virtual bool query(Algorithm) = 0;
+ };
+
+public:
+ // TODO maybe caller can provide Options as ctor parameters
+ Options *options(void);
+
+public:
+ void optimize(loco::Graph *) const;
+
+private:
+ std::unique_ptr<Options> _options;
+};
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_OPTIMIZER_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FuseInstanceNormPass.h b/compiler/luci/pass/include/luci/Pass/FuseInstanceNormPass.h
new file mode 100644
index 000000000..800a5f789
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseInstanceNormPass.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_INSTANCE_NORM_PASS_H__
+#define __LUCI_FUSE_INSTANCE_NORM_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to fuse certain pattern of subgraph into CircleInstanceNorm
+ * with auxiliary nodes
+ *
+ * For detailed subgraph pattern to be fused, please check its implementation.
+ */
+struct FuseInstanceNormPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::FuseInstanceNormPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_INSTANCE_NORM_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ShapeInferencePass.h b/compiler/luci/pass/include/luci/Pass/ShapeInferencePass.h
new file mode 100644
index 000000000..86bb2ab42
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ShapeInferencePass.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SHAPE_INFERENCE_PASS_H__
+#define __LUCI_SHAPE_INFERENCE_PASS_H__
+
+#include <loco.h>
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to infer shape of nodes
+ */
+class ShapeInferencePass : public logo::Pass
+{
+public:
+ virtual const char *name(void) const { return "luci::ShapeInferencePass"; }
+
+public:
+ bool run(loco::Graph *graph);
+};
+
+} // namespace luci
+
+#endif //__LUCI_SHAPE_INFERENCE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/TypeInferencePass.h b/compiler/luci/pass/include/luci/Pass/TypeInferencePass.h
new file mode 100644
index 000000000..c607ac63f
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/TypeInferencePass.h
@@ -0,0 +1,42 @@
+
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_TYPE_INFERENCE_PASS_H__
+#define __LUCI_TYPE_INFERENCE_PASS_H__
+
+#include <loco.h>
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to infer type of nodes
+ */
+class TypeInferencePass : public logo::Pass
+{
+public:
+ virtual const char *name(void) const { return "luci::TypeInferencePass"; }
+
+public:
+ bool run(loco::Graph *graph);
+};
+
+} // namespace luci
+
+#endif //__LUCI_TYPE_INFERENCE_PASS_H__
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp
new file mode 100644
index 000000000..dcb05a0b5
--- /dev/null
+++ b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/CircleOptimizer.h"
+
+#include "luci/Pass/FuseInstanceNormPass.h"
+// TODO add more passes
+
+#include "luci/Pass/ShapeInferencePass.h"
+#include "luci/Pass/TypeInferencePass.h"
+
+// logo passes
+#include <logo/RemoveDeadNodePass.h>
+
+#include "ProgressReporter.h"
+
+#include <logo/Phase.h>
+
+#include <memory>
+
+namespace
+{
+
+using namespace luci;
+
+class OptimizeOptionsImpl : public luci::CircleOptimizer::Options
+{
+public:
+ void enable(Algorithm) final;
+ bool query(Algorithm) final;
+
+private:
+ std::vector<Algorithm> _algorithms;
+};
+
+void OptimizeOptionsImpl::enable(Algorithm algo) { _algorithms.push_back(algo); }
+
+bool OptimizeOptionsImpl::query(Algorithm algo)
+{
+ std::vector<Algorithm>::iterator it = std::find(_algorithms.begin(), _algorithms.end(), algo);
+ if (it == _algorithms.end())
+ return false;
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+CircleOptimizer::Options *CircleOptimizer::options(void)
+{
+ if (_options == nullptr)
+ {
+ _options = std::make_unique<OptimizeOptionsImpl>();
+ }
+
+ return _options.get();
+}
+
+void CircleOptimizer::optimize(loco::Graph *g) const
+{
+ logo::Phase phase;
+
+ /* TRANSFORM DECLARATION BEGIN */
+ if (_options->query(Options::Algorithm::FuseInstanceNorm))
+ {
+ phase.emplace_back(std::make_unique<FuseInstanceNormPass>());
+ }
+ // Shape inference is needed for added nodes doing above transformations
+ phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
+ phase.emplace_back(std::make_unique<luci::TypeInferencePass>());
+ phase.emplace_back(std::make_unique<logo::RemoveDeadNodePass>());
+ /* TRANSFORM DECLARATION END */
+
+ ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
+ logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+ phase_runner.attach(&prog);
+ phase_runner.run(phase);
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FuseInstanceNormPass.cpp b/compiler/luci/pass/src/FuseInstanceNormPass.cpp
new file mode 100644
index 000000000..180b5bbef
--- /dev/null
+++ b/compiler/luci/pass/src/FuseInstanceNormPass.cpp
@@ -0,0 +1,401 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseInstanceNormPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <loco/Service/ShapeInference.h>
+
+#include <cassert>
+#include <set>
+
+// Helper to find commutative node's arguments
+namespace
+{
+
+/**
+ * INTRODUCTION
+ * Binary operation f(x,y) is 'commutative' when
+ * f(x,y) == f(y,x) holds for all x, y.
+ * For examples, ADD, MUL and SQUARED_DIFFERENCE are commutative.
+ * These helpers make it easy to find commutative arguemnts of commtative node.
+ *
+ * HOW TO USE
+ * COMM_NODE *node;
+ * ARG_TYPE_1 *arg1;
+ * ARG_TYPE_2 *arg2;
+ *
+ * bool ok = fill(&arg1, &arg2).with_commutative_args_of(node);
+ *
+ * Result
+ * If 'node's commutative argument types are actually {ARG_TYPE_1, ARG_TYPE_2}
+ * (as a set), 'arg1' and 'arg2' set as actual 'node's arguemnts with matching
+ * type, and return value 'ok' is true.
+ * Otherwise, 'arg1' and 'arg2' not changed, 'ok' is false.
+ */
+
+template <class ARG_TYPE_1, class ARG_TYPE_2> class NodeFiller final
+{
+public:
+ NodeFiller(ARG_TYPE_1 **arg_1, ARG_TYPE_2 **arg_2) : _arg_1(arg_1), _arg_2(arg_2)
+ {
+ // DO NOTHING
+ }
+
+ /**
+ * @return true When 'node's argument types are 'ARG_TYPE_1' and 'ARG_TYPE_2'
+ * In such case, it assign '_arg_1' and '_arg_2' to actual arguments
+ *
+ * @return false When 'node's argument types are NOT matched with 'ARG_TYPE_*'
+ * In such case, it does not amend '_arg_1' and '_arg_2'
+ *
+ * @require COMM_NODE has member x() and y()
+ */
+ template <class COMM_NODE> bool with_commutative_args_of(const COMM_NODE *node);
+
+private:
+ ARG_TYPE_1 **_arg_1;
+ ARG_TYPE_2 **_arg_2;
+};
+
+template <class ARG_TYPE_1, class ARG_TYPE_2>
+inline NodeFiller<ARG_TYPE_1, ARG_TYPE_2> fill(ARG_TYPE_1 **arg_1, ARG_TYPE_2 **arg_2)
+{
+ return NodeFiller<ARG_TYPE_1, ARG_TYPE_2>{arg_1, arg_2};
+}
+
+template <class ARG_TYPE_1, class ARG_TYPE_2>
+template <class COMM_NODE>
+bool NodeFiller<ARG_TYPE_1, ARG_TYPE_2>::with_commutative_args_of(const COMM_NODE *node)
+{
+ // Case 1) X == ARG_TYPE_1 / Y == ARG_TYPE_2
+ {
+ auto x = dynamic_cast<ARG_TYPE_1 *>(node->x());
+ auto y = dynamic_cast<ARG_TYPE_2 *>(node->y());
+
+ if (x && y)
+ {
+ *_arg_1 = x;
+ *_arg_2 = y;
+ return true;
+ }
+ }
+
+ // Case 2) X == ARG_TYPE_2 / Y == ARG_TYPE_1
+ {
+ auto x = dynamic_cast<ARG_TYPE_2 *>(node->x());
+ auto y = dynamic_cast<ARG_TYPE_1 *>(node->y());
+
+ if (x && y)
+ {
+ *_arg_1 = y;
+ *_arg_2 = x;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+} // namespace
+
+// Helper to check detail
+namespace
+{
+
+/// @return true When node has shape of '1 x .. x 1 x depth'
+bool is_1D_with_dummy_dim(luci::CircleConst *node, uint32_t depth)
+{
+ auto rank = node->rank();
+ uint32_t axis;
+ for (axis = 0; axis < rank - 1; ++axis)
+ {
+ if (node->dim(axis).value() != 1)
+ return false;
+ }
+ return node->dim(axis).value() == depth;
+}
+
+bool is_instance_mean(luci::CircleMean *mean)
+{
+ //
+ // CHECK 1) input is rank 4
+ //
+ auto input = mean->input();
+ if (not loco::shape_known(input))
+ return false;
+ auto input_shape = loco::shape_get(input).as<loco::TensorShape>();
+ if (input_shape.rank() != 4)
+ return false;
+
+ //
+ // CHECK 2) 'reduction indices' is CircleConst of value [1,2], that is HW of NHWC
+ //
+ // TODO Support equivalent case, like [-3,-2]
+ // TODO Support non-Const case?
+ // TODO What if input is NCHW format in Circle?
+ auto red_indices = dynamic_cast<luci::CircleConst *>(mean->reduction_indices());
+ if (not red_indices)
+ return false;
+ if (red_indices->rank() != 1)
+ return false;
+ std::set<int32_t> red_indices_set;
+ {
+ // TODO Currently only support S32, support other types
+ assert(red_indices->dtype() == loco::DataType::S32);
+ for (uint32_t i = 0; i < red_indices->dim(0).value(); ++i)
+ red_indices_set.insert(red_indices->at<loco::DataType::S32>(i));
+ }
+ if (red_indices_set.size() != 2)
+ return false;
+ if (red_indices_set.find(1) == red_indices_set.end())
+ return false;
+ if (red_indices_set.find(2) == red_indices_set.end())
+ return false;
+
+ //
+ // CHECK 3) keep_dims == true (?)
+ //
+ // We only have case of 'keep_dims == true' so far, but it might be okay with 'keep_dims == false'
+ // TODO Check this fact, and if true, return true regardless of keep_dims
+ return mean->keep_dims();
+}
+
+} // namespace
+
+// Helper to fuse Instance Norm
+namespace
+{
+
+/**
+ * SUBGRAPH PATTERN
+ *
+ * - Below diagram shows Instance Norm pattern to fuse.
+ * - Execution dependency order is top to the bottom.
+ * - Node name is matched with variable name of InstanceNormPattern class.
+ * - Usually, first word of node name (variable name) is node type. For e.g.
+ * variable 'mean_as_variance' is pointer to TFLMean.
+ * - (Item in parenthesis) means actually exist, but not having a name and
+ * not a variable of InstanceNormPattern class.
+ *
+ * TODO support other semantically same patterns for instance norm
+ *
+ * [In]
+ * |
+ * V
+ * +----------- ifm -----+ (reduction indicies)
+ * | | | |
+ * | | V V
+ * | | mean_of_ifm ----------------+
+ * | V | |
+ * | sqdiff <--+ (reduction indicies) |
+ * | | | |
+ * | V | |
+ * | mean_as_variance <---+ const_as_epsilon |
+ * | | | |
+ * | V | |
+ * | add_as_variance <--------+ |
+ * | | |
+ * | V |
+ * | rsqrt const_as_gamma |
+ * | | | |
+ * | V | |
+ * | mul_gamma <--+ |
+ * | | | |
+ * V V V |
+ * mul_as_scaled_ifm mul_as_scaled_mean <-------------+
+ * | |
+ * | const_as_beta |
+ * | | V
+ * | +------> sub
+ * V |
+ * add_as_terminal <----------+
+ * |
+ * V
+ * [Out]
+ */
+class InstanceNormPattern final
+{
+public:
+ InstanceNormPattern(luci::CircleAdd *candidate)
+ {
+ assert(candidate);
+ add_as_terminal = candidate;
+ }
+
+public:
+ bool matched();
+ bool matched() const { return _matched; }
+
+public:
+ // Context
+ loco::Node *ifm = nullptr;
+ luci::CircleMean *mean_of_ifm = nullptr;
+ luci::CircleSquaredDifference *sqdiff = nullptr;
+ luci::CircleMean *mean_as_variance = nullptr;
+ luci::CircleConst *const_as_epsilon = nullptr;
+ luci::CircleAdd *add_as_variance = nullptr;
+ luci::CircleRsqrt *rsqrt = nullptr;
+ luci::CircleConst *const_as_gamma = nullptr;
+ luci::CircleMul *mul_gamma = nullptr;
+ luci::CircleMul *mul_as_scaled_ifm = nullptr;
+ luci::CircleMul *mul_as_scaled_mean = nullptr;
+ luci::CircleConst *const_as_beta = nullptr;
+ luci::CircleSub *sub = nullptr;
+ luci::CircleAdd *add_as_terminal = nullptr;
+
+private:
+ bool _matched = false;
+};
+
+bool InstanceNormPattern::matched()
+{
+ if (_matched)
+ return true;
+
+#define CHECK_OR_FALSE(condition) \
+ if (not(condition)) \
+ return false;
+
+ // Check order is DFS
+
+ CHECK_OR_FALSE(fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal));
+ CHECK_OR_FALSE(fill(&ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_ifm));
+
+ CHECK_OR_FALSE(loco::shape_known(ifm));
+ auto ifm_shape = loco::shape_get(ifm);
+ CHECK_OR_FALSE(ifm_shape.domain() == loco::Domain::Tensor);
+ auto ifm_tensor_shape = ifm_shape.as<loco::TensorShape>();
+ CHECK_OR_FALSE(ifm_tensor_shape.rank() == 4);
+ uint32_t ifm_channel_depth = ifm_tensor_shape.dim(3).value();
+
+ CHECK_OR_FALSE(fill(&rsqrt, &const_as_gamma).with_commutative_args_of(mul_gamma));
+ CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_gamma, ifm_channel_depth));
+
+ add_as_variance = dynamic_cast<luci::CircleAdd *>(rsqrt->x());
+ CHECK_OR_FALSE(add_as_variance);
+
+ CHECK_OR_FALSE(
+ fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
+
+ CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
+ // TODO Support regarding broadcast
+ CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1);
+
+ CHECK_OR_FALSE(is_instance_mean(mean_as_variance));
+ sqdiff = dynamic_cast<luci::CircleSquaredDifference *>(mean_as_variance->input());
+ CHECK_OR_FALSE(sqdiff);
+
+ loco::Node *ifm_should_be = nullptr;
+ CHECK_OR_FALSE(fill(&ifm_should_be, &mean_of_ifm).with_commutative_args_of(sqdiff));
+ CHECK_OR_FALSE(ifm == ifm_should_be);
+ CHECK_OR_FALSE(is_instance_mean(mean_of_ifm));
+ CHECK_OR_FALSE(ifm == mean_of_ifm->input());
+
+ const_as_beta = dynamic_cast<luci::CircleConst *>(sub->x());
+ CHECK_OR_FALSE(const_as_beta);
+ CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_beta, ifm_channel_depth));
+
+ mul_as_scaled_mean = dynamic_cast<luci::CircleMul *>(sub->y());
+ CHECK_OR_FALSE(mul_as_scaled_mean);
+
+ luci::CircleMul *mul_gamma_should_be = nullptr;
+ luci::CircleMean *mean_of_ifm_should_be = nullptr;
+ CHECK_OR_FALSE(fill(&mul_gamma_should_be, &mean_of_ifm_should_be)
+ .with_commutative_args_of(mul_as_scaled_mean));
+ CHECK_OR_FALSE(mul_gamma == mul_gamma_should_be);
+ CHECK_OR_FALSE(mean_of_ifm == mean_of_ifm_should_be);
+#undef CHECK_OR_FALSE
+ _matched = true;
+ return true;
+}
+
+/**
+ * Instance norm pattern would be fused like following diagram:
+ *
+ * [In] --------------------------- CircleInstanceNorm --- [Out]
+ * / /
+ * const_as_gamma --- TFLReshape --- /
+ * /
+ * const_as_beta ---- TFLReshape ---
+ *
+ * Note
+ * - 'const_as_gamma' and 'const_as_beta' are from original graph
+ * - Value of 'const_as_epsilon' would be copied to CircleInstanceNorm's attribute
+ * - TFLReshape is added as CircleInstanceNorm only accept 1D tensor
+ * - 'CircleConst --- TFLReshape' is expected to be fused in constant folding for Reshape
+ */
+void fuse_instance_norm(const InstanceNormPattern &p)
+{
+ assert(p.matched());
+
+ auto graph = p.add_as_terminal->graph();
+
+ // Make reshape for gamma & beta
+ auto reshape_gamma = graph->nodes()->create<luci::CircleReshape>();
+ auto reshape_beta = graph->nodes()->create<luci::CircleReshape>();
+ {
+ auto ifm_shape = loco::shape_get(p.ifm).as<loco::TensorShape>();
+ uint32_t ifm_channel_depth = ifm_shape.dim(3).value();
+
+ int32_t new_shape[1] = {static_cast<int32_t>(ifm_channel_depth)};
+
+ reshape_gamma->tensor(p.const_as_gamma);
+ reshape_beta->tensor(p.const_as_beta);
+
+ luci::set_new_shape(reshape_gamma, new_shape, 1);
+ luci::set_new_shape(reshape_beta, new_shape, 1);
+ }
+
+ // Make Instance Norm to replace
+ auto instance_norm = graph->nodes()->create<luci::CircleInstanceNorm>();
+ instance_norm->input(p.ifm);
+ instance_norm->gamma(reshape_gamma);
+ instance_norm->beta(reshape_beta);
+ float epsilon = p.const_as_epsilon->at<loco::DataType::FLOAT32>(0);
+ instance_norm->epsilon(epsilon);
+ instance_norm->fusedActivationFunction(p.add_as_terminal->fusedActivationFunction());
+
+ replace(p.add_as_terminal).with(instance_norm);
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseInstanceNormPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto add = dynamic_cast<luci::CircleAdd *>(node);
+ if (not add)
+ continue;
+
+ InstanceNormPattern pattern(add);
+ if (not pattern.matched())
+ continue;
+
+ fuse_instance_norm(pattern);
+ changed = true;
+ }
+
+ return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ProgressReporter.cpp b/compiler/luci/pass/src/ProgressReporter.cpp
new file mode 100644
index 000000000..dcf47aba6
--- /dev/null
+++ b/compiler/luci/pass/src/ProgressReporter.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ProgressReporter.h"
+
+#include <luci/Log.h>
+#include <luci/LogHelper.h>
+
+#include <logo/Phase.h>
+#include <logo/Pass.h>
+
+#include <cassert>
+
+namespace
+{
+
+char to_char(bool b) { return b ? 'Y' : 'N'; }
+
+const char *to_str(logo::PhaseStrategy s)
+{
+ switch (s)
+ {
+ case logo::PhaseStrategy::Saturate:
+ return "Saturate";
+ case logo::PhaseStrategy::Restart:
+ return "Restart";
+ }
+ assert(false);
+ return "";
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseBegin> *)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "==============================================================";
+ INFO(prime) << "PhaseRunner<" << to_str(strategy()) << ">";
+ INFO(prime) << "Initial graph";
+ INFO(prime) << luci::fmt(graph());
+}
+
+void ProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseEnd> *)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "PhaseRunner<" << to_str(strategy()) << "> - done";
+}
+
+void ProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassBegin> *info)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "--------------------------------------------------------------";
+ INFO(prime) << "Before " << logo::pass_name(info->pass());
+}
+
+void ProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassEnd> *info)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "After " << logo::pass_name(info->pass())
+ << " (changed: " << to_char(info->changed()) << ")";
+ INFO(prime) << luci::fmt(graph());
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ProgressReporter.h b/compiler/luci/pass/src/ProgressReporter.h
new file mode 100644
index 000000000..bd2ba9849
--- /dev/null
+++ b/compiler/luci/pass/src/ProgressReporter.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PROGRESSREPORTER_H__
+#define __LUCI_PROGRESSREPORTER_H__
+
+#include <logo/Phase.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+class ProgressReporter : public logo::PhaseEventListener
+{
+public:
+ ProgressReporter(loco::Graph *graph, logo::PhaseStrategy strategy)
+ : _graph{graph}, _strategy{strategy}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseBegin> *) override;
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseEnd> *) override;
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassBegin> *) override;
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassEnd> *) override;
+
+public:
+ loco::Graph *graph(void) const { return _graph; }
+ logo::PhaseStrategy strategy(void) const { return _strategy; }
+
+private:
+ loco::Graph *_graph;
+ logo::PhaseStrategy _strategy;
+};
+
+} // namespace luci
+
+#endif // __LUCI_PROGRESSREPORTER_H__
diff --git a/compiler/luci/pass/src/ShapeInferencePass.cpp b/compiler/luci/pass/src/ShapeInferencePass.cpp
new file mode 100644
index 000000000..f681b3d5f
--- /dev/null
+++ b/compiler/luci/pass/src/ShapeInferencePass.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ShapeInferencePass.h"
+
+#include <luci/IR/CircleDialect.h>
+#include <luci/Service/CircleShapeInferenceRule.h>
+
+#include <loco.h>
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/Service/CanonicalShapeInferenceRule.h>
+#include <loco/Service/ShapeInference.h>
+#include <loco/Service/MultiDialectShapeInferenceRule.h>
+
+namespace luci
+{
+
+bool ShapeInferencePass::run(loco::Graph *g)
+{
+ loco::CanonicalShapeInferenceRule canonical_rule;
+ luci::CircleShapeInferenceRule circle_rule;
+
+ loco::MultiDialectShapeInferenceRule rules;
+
+ rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
+ .bind(luci::CircleDialect::get(), &circle_rule);
+
+ return loco::apply(&rules).to(g);
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/TypeInferencePass.cpp b/compiler/luci/pass/src/TypeInferencePass.cpp
new file mode 100644
index 000000000..2c7b3a897
--- /dev/null
+++ b/compiler/luci/pass/src/TypeInferencePass.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/TypeInferencePass.h"
+
+#include <luci/IR/CircleDialect.h>
+#include <luci/Service/CircleTypeInferenceRule.h>
+
+#include <loco.h>
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/Service/TypeInference.h>
+
+namespace luci
+{
+
+bool TypeInferencePass::run(loco::Graph *g)
+{
+ loco::CanonicalTypeInferenceRule canonical_rule;
+ luci::CircleTypeInferenceRule circle_rule;
+
+ loco::MultiDialectTypeInferenceRule rules;
+
+ rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
+ .bind(luci::CircleDialect::get(), &circle_rule);
+
+ return loco::apply(&rules).to(g);
+}
+
+} // namespace luci
diff --git a/compiler/luci/requires.cmake b/compiler/luci/requires.cmake
new file mode 100644
index 000000000..e88dabd24
--- /dev/null
+++ b/compiler/luci/requires.cmake
@@ -0,0 +1,9 @@
+require("loco")
+require("locop")
+require("logo-core")
+require("mio-circle")
+require("oops")
+require("hermes")
+require("hermes-std")
+require("tflchef")
+require("tflite2circle")
diff --git a/compiler/luci/service/CMakeLists.txt b/compiler/luci/service/CMakeLists.txt
new file mode 100644
index 000000000..9f50c9c4f
--- /dev/null
+++ b/compiler/luci/service/CMakeLists.txt
@@ -0,0 +1,25 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(luci_service SHARED ${SOURCES})
+target_include_directories(luci_service PRIVATE src)
+target_include_directories(luci_service PUBLIC include)
+target_link_libraries(luci_service PUBLIC luci_lang)
+target_link_libraries(luci_service PUBLIC mio_circle)
+target_link_libraries(luci_service PUBLIC logo_core)
+target_link_libraries(luci_service PRIVATE luci_log)
+target_link_libraries(luci_service PRIVATE nncc_common)
+target_link_libraries(luci_service PRIVATE oops)
+install(TARGETS luci_service DESTINATION lib)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_service_test ${TESTS})
+target_include_directories(luci_service_test PRIVATE src)
+target_link_libraries(luci_service_test luci_service)
+target_link_libraries(luci_service_test oops)
diff --git a/compiler/luci/service/README.md b/compiler/luci/service/README.md
new file mode 100644
index 000000000..ac3583145
--- /dev/null
+++ b/compiler/luci/service/README.md
@@ -0,0 +1,3 @@
+# luci-service
+
+_luci-service_ provides Circle Dialect Services
diff --git a/compiler/luci/service/include/luci/Service/CircleShapeInference.h b/compiler/luci/service/include/luci/Service/CircleShapeInference.h
new file mode 100644
index 000000000..fb934c2cf
--- /dev/null
+++ b/compiler/luci/service/include/luci/Service/CircleShapeInference.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_SHAPE_INFERENCE_H__
+#define __LUCI_CIRCLE_SHAPE_INFERENCE_H__
+
+#include "ShapeDescription.h"
+
+#include <loco/IR/Nodes.h>
+
+namespace luci
+{
+
+/**
+ * @brief Get the shape of each node as a node annotation
+ *
+ * HOW TO USE
+ *
+ * ShapeInference::get(g->nodes()->at(..));
+ */
+struct ShapeInference
+{
+ static ShapeDescription get(loco::Node *node);
+};
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_SHAPE_INFERENCE_H__
diff --git a/compiler/luci/service/include/luci/Service/CircleShapeInferenceRule.h b/compiler/luci/service/include/luci/Service/CircleShapeInferenceRule.h
new file mode 100644
index 000000000..3f63c9633
--- /dev/null
+++ b/compiler/luci/service/include/luci/Service/CircleShapeInferenceRule.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_SHAPE_INFERENCE_RULE_H__
+#define __LUCI_CIRCLE_SHAPE_INFERENCE_RULE_H__
+
+#include <loco/Service/ShapeInference.h>
+
+namespace luci
+{
+
+struct CircleShapeInferenceRule final : public loco::ShapeInferenceRule
+{
+ bool recognize(const loco::Dialect *) const final;
+ bool infer(const loco::Node *, loco::NodeShape &) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_SHAPE_INFERENCE_RULE_H__
diff --git a/compiler/luci/service/include/luci/Service/CircleTypeInference.h b/compiler/luci/service/include/luci/Service/CircleTypeInference.h
new file mode 100644
index 000000000..ea7a3c5ed
--- /dev/null
+++ b/compiler/luci/service/include/luci/Service/CircleTypeInference.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_TYPE_INFERENCE_H__
+#define __LUCI_CIRCLE_TYPE_INFERENCE_H__
+
+#include <loco/IR/Nodes.h>
+
+#include <mio/circle/schema_generated.h>
+
+namespace luci
+{
+
+/**
+ * @brief Get the type of each node as NodeAnnotation
+ *
+ * HOW TO USE
+ *
+ * TypeInference::get(g->nodes()->at(0));
+ * TypeInference::get(g->nodes()->at(...));
+ */
+struct TypeInference
+{
+ static circle::TensorType get(loco::Node *node);
+};
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_TYPE_INFERENCE_H__
diff --git a/compiler/luci/service/include/luci/Service/CircleTypeInferenceRule.h b/compiler/luci/service/include/luci/Service/CircleTypeInferenceRule.h
new file mode 100644
index 000000000..3b21081ef
--- /dev/null
+++ b/compiler/luci/service/include/luci/Service/CircleTypeInferenceRule.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_TYPE_INFERENCE_RULE_H__
+#define __LUCI_CIRCLE_TYPE_INFERENCE_RULE_H__
+
+#include <loco/Service/TypeInference.h>
+
+namespace luci
+{
+
+/**
+ * @brief Type Inference Rule for CircleDialect
+ */
+struct CircleTypeInferenceRule final : public loco::TypeInferenceRule
+{
+ bool recognize(const loco::Dialect *) const final;
+ bool infer(const loco::Node *, loco::DataType &) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_TYPE_INFERENCE_RULE_H__
diff --git a/compiler/luci/service/include/luci/Service/ShapeDescription.h b/compiler/luci/service/include/luci/Service/ShapeDescription.h
new file mode 100644
index 000000000..949cce535
--- /dev/null
+++ b/compiler/luci/service/include/luci/Service/ShapeDescription.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SHAPE_DESCRIPTION_H__
+#define __LUCI_SHAPE_DESCRIPTION_H__
+
+#include <loco/IR/PermutingCodec.h>
+#include <loco/IR/NodeShape.h>
+
+#include <cstdint>
+#include <vector>
+
+namespace luci
+{
+
+struct ShapeDescription
+{
+ std::vector<int32_t> _dims;
+ bool _rank_known;
+};
+
+// TODO remove these when CircleDialect is fully functioal
+ShapeDescription to_shape_description(const loco::TensorShape &shape);
+ShapeDescription to_shape_description(const loco::FeatureShape &shape);
+ShapeDescription to_shape_description(const loco::FilterShape &shape);
+ShapeDescription to_shape_description(const loco::BiasShape &shape);
+ShapeDescription to_shape_description(const loco::MatrixShape &shape);
+ShapeDescription to_shape_description(const loco::NodeShape &shape);
+
+template <typename Permutation> inline bool isNHWC(Permutation *perm);
+
+template <> inline bool isNHWC(loco::Permutation<loco::Domain::Feature> *perm)
+{
+ return perm->axis(loco::FeatureAxis::Count) == 0 && perm->axis(loco::FeatureAxis::Height) == 1 &&
+ perm->axis(loco::FeatureAxis::Width) == 2 && perm->axis(loco::FeatureAxis::Depth) == 3;
+}
+
+template <> inline bool isNHWC(loco::Permutation<loco::Domain::Filter> *perm)
+{
+ return perm->axis(loco::FilterAxis::Count) == 0 && perm->axis(loco::FilterAxis::Height) == 1 &&
+ perm->axis(loco::FilterAxis::Width) == 2 && perm->axis(loco::FilterAxis::Depth) == 3;
+}
+
+} // namespace luci
+
+#endif // __LUCI_SHAPE_DESCRIPTION_H__
diff --git a/compiler/luci/service/include/luci/Service/Validate.h b/compiler/luci/service/include/luci/Service/Validate.h
new file mode 100644
index 000000000..4b80d1d16
--- /dev/null
+++ b/compiler/luci/service/include/luci/Service/Validate.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SERVICE_VALIDATE_H__
+#define __LUCI_SERVICE_VALIDATE_H__
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool validate(loco::Graph *);
+
+} // namespace luci
+
+#endif // __LUCI_SERVICE_VALIDATE_H__
diff --git a/compiler/luci/service/src/Check.h b/compiler/luci/service/src/Check.h
new file mode 100644
index 000000000..e05ec904a
--- /dev/null
+++ b/compiler/luci/service/src/Check.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CHECK_H__
+#define __CHECK_H__
+
+#include <stdexcept>
+#include <cassert>
+#include <iostream>
+
+// TODO Add macro for Release version
+
+#define LUCI_ASSERT(condition, msg) \
+ { \
+ if (!(condition)) \
+ { \
+ std::cerr << "[assert failed] " << (msg) << ". " << std::endl; \
+ assert((condition)); \
+ } \
+ }
+
+#endif // __CHECK_H__
diff --git a/compiler/luci/service/src/CircleShapeInference.cpp b/compiler/luci/service/src/CircleShapeInference.cpp
new file mode 100644
index 000000000..fdcfa76bc
--- /dev/null
+++ b/compiler/luci/service/src/CircleShapeInference.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleShapeInference.h"
+#include "luci/Service/ShapeDescription.h"
+
+#include <loco.h>
+#include <loco/Service/ShapeInference.h>
+
+#include <cassert>
+
+namespace luci
+{
+
+ShapeDescription ShapeInference::get(loco::Node *node)
+{
+ // TODO Adjust indentation level
+ {
+ assert(loco::shape_known(node));
+ return to_shape_description(loco::shape_get(node));
+ }
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.cpp
new file mode 100644
index 000000000..c8e872b1e
--- /dev/null
+++ b/compiler/luci/service/src/CircleShapeInferenceRule.cpp
@@ -0,0 +1,907 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleShapeInferenceRule.h"
+#include "Check.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleDialect.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Log.h>
+
+#include <oops/InternalExn.h>
+
+#include <algorithm>
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+// Call this for CircleAvgPool2D and CircleMaxPool2D only
+template <class Pool2DType> loco::NodeShape infer_pool_2d_shape(const Pool2DType *node)
+{
+ LUCI_ASSERT(loco::shape_known(node->value()), "Shape must be known");
+
+ auto ifm_shape = loco::shape_get(node->value()).template as<loco::TensorShape>();
+ assert(ifm_shape.rank() == 4);
+
+ uint32_t input_height = ifm_shape.dim(1).value();
+ uint32_t input_width = ifm_shape.dim(2).value();
+ uint32_t stride_height = node->stride()->h();
+ uint32_t stride_width = node->stride()->w();
+ uint32_t window_height = node->filter()->h();
+ uint32_t window_width = node->filter()->w();
+ uint32_t dilation_height = 1; // dilation for CircleAvgPool2D and CircleMaxPool2D is 1
+ uint32_t dilation_width = 1;
+ uint32_t effective_window_height = dilation_height * (window_height - 1) + 1;
+ uint32_t effective_window_width = dilation_width * (window_width - 1) + 1;
+
+ uint32_t output_height = 0;
+ uint32_t output_width = 0;
+
+ if (node->padding() == luci::Padding::VALID)
+ {
+ output_height = (input_height + stride_height - effective_window_height) / stride_height;
+ output_width = (input_width + stride_width - effective_window_width) / stride_width;
+ }
+ else if (node->padding() == luci::Padding::SAME)
+ {
+ output_height = (input_height + stride_height - 1) / stride_height;
+ output_width = (input_width + stride_width - 1) / stride_width;
+ }
+ else
+ LUCI_ASSERT(false, "Wrong padding type");
+
+ loco::TensorShape ofm_shape;
+ ofm_shape.rank(4);
+ ofm_shape.dim(0) = ifm_shape.dim(0);
+ ofm_shape.dim(1) = output_height;
+ ofm_shape.dim(2) = output_width;
+ ofm_shape.dim(3) = ifm_shape.dim(3);
+
+ return loco::NodeShape{ofm_shape};
+}
+
+/**
+ * @brief Create a higher-rank TensorShape following NumPy broadcasting semantics
+ *
+ * HOW TO USE:
+ *
+ * auto expanded_tensor_shape = expand(tensor_shape).to(N);
+ */
+class TensorShapeExpander
+{
+public:
+ TensorShapeExpander(const loco::TensorShape &shape) : _shape{shape}
+ {
+ // DO NOTHING
+ }
+
+public:
+ loco::TensorShape to(uint32_t output_rank)
+ {
+ auto const &input_shape = _shape;
+ uint32_t const input_rank = input_shape.rank();
+
+ assert(input_rank <= output_rank && "Cannot shrink rank");
+ uint32_t const axis_shift = output_rank - input_rank;
+
+ loco::TensorShape output_shape;
+
+ output_shape.rank(output_rank);
+ for (uint32_t axis = 0; axis < output_rank; ++axis)
+ {
+ output_shape.dim(axis) = (axis < axis_shift) ? 1 : input_shape.dim(axis - axis_shift);
+ }
+
+ return output_shape;
+ }
+
+private:
+ const loco::TensorShape _shape;
+};
+
+/**
+ * @breif Expand shape x and y to same rank by align right and filling with 1
+ */
+void expand_rank(loco::TensorShape &x, loco::TensorShape &y)
+{
+ auto x_rank = x.rank();
+ auto y_rank = y.rank();
+
+ if (x_rank == y_rank)
+ return;
+
+ TensorShapeExpander x_exp(x);
+ TensorShapeExpander y_exp(y);
+
+ auto xy_rank = std::max(x_rank, y_rank);
+
+ x = x_rank > y_rank ? x : x_exp.to(xy_rank);
+ y = y_rank > x_rank ? y : y_exp.to(xy_rank);
+}
+
+/**
+ * @breif Returns shape of expanded dimension of input x and y having same rank
+ */
+loco::TensorShape expand_dimension(const loco::TensorShape &x, const loco::TensorShape &y)
+{
+ assert(x.rank() == y.rank());
+
+ auto rank = x.rank();
+
+ loco::TensorShape output_shape;
+
+ output_shape.rank(rank);
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ assert(x.dim(axis).known() && y.dim(axis).known());
+
+ auto x_dim = x.dim(axis).value();
+ auto y_dim = y.dim(axis).value();
+
+ // each dimension of x and y should be same or one must be 1 if different
+ if (!((x_dim == y_dim) || (x_dim == 1 || y_dim == 1)))
+ INTERNAL_EXN("Cannot produce expand_dimension of two shapes");
+
+ output_shape.dim(axis) = std::max(x_dim, y_dim);
+ }
+
+ return output_shape;
+}
+
+loco::TensorShape broadcast_shape(const loco::TensorShape &x, const loco::TensorShape &y)
+{
+ auto x_match = x;
+ auto y_match = y;
+
+ expand_rank(x_match, y_match);
+
+ auto output_shape = expand_dimension(x_match, y_match);
+
+ return output_shape;
+}
+
+/**
+ * @brief Class to infer the shape of CircleNode
+ *
+ * @note All CircleNode's inputs and outputs are always loco::Domain::Tensor
+ */
+class ShapeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::NodeShape>
+{
+public:
+ loco::NodeShape visit(const luci::CircleAbs *node) final
+ {
+ auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+ return loco::NodeShape{x_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleAdd *node) final
+ {
+ auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+ auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
+
+ auto output_shape = broadcast_shape(x_shape, y_shape);
+
+ return loco::NodeShape{output_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleArgMax *node) final
+ {
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
+
+ int64_t select_axis = 0;
+ {
+ LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
+
+ // Only support node's shape() is CircleConst with S32/S64
+ // Support S32 for now.
+ auto const_shape_node = dynamic_cast<luci::CircleConst *>(node->dimension());
+ LUCI_ASSERT(const_shape_node, "Only support CircleConst for shape of CircleArgMax");
+ LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
+ "Only support int32 CircleConst for CircleArgMax");
+
+ if (const_shape_node->rank() > 1)
+ INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
+ oops::to_uint32(const_shape_node->rank()));
+
+ select_axis = const_shape_node->scalar<loco::DataType::S32>();
+ }
+ assert(select_axis < input_shape.rank());
+ assert(select_axis >= 0); // TODO support minus of this breaks
+
+ // NOTE select_axis is removed
+ loco::TensorShape shape_output;
+ uint32_t rank = input_shape.rank();
+ uint32_t shrink = static_cast<uint32_t>(select_axis);
+ assert(rank > 0);
+ shape_output.rank(rank - 1);
+ for (uint32_t r = 0, d = 0; r < rank; ++r)
+ {
+ if (r == shrink)
+ continue;
+ shape_output.dim(d++) = input_shape.dim(r);
+ }
+ return loco::NodeShape{shape_output};
+ }
+
+ loco::NodeShape visit(const luci::CircleAveragePool2D *node) final
+ {
+ return infer_pool_2d_shape(node);
+ }
+
+ loco::NodeShape visit(const luci::CircleBatchToSpaceND *node) final
+ {
+ const loco::DataType S32 = loco::DataType::S32;
+
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ // Support only input rank is 3 and 4
+ assert(input_shape.rank() == 3 || input_shape.rank() == 4);
+
+ // Only support block_shape() with S32 type CircleConst for now
+ auto const_block_shape = dynamic_cast<luci::CircleConst *>(node->block_shape());
+ LUCI_ASSERT(const_block_shape, "Only support CircleConst for block_shape");
+ LUCI_ASSERT(const_block_shape->dtype() == loco::DataType::S32,
+ "Only support int32 block_shape");
+
+ // Only support crops() with S32 type CircleConst for now
+ auto const_crops = dynamic_cast<luci::CircleConst *>(node->crops());
+ LUCI_ASSERT(const_crops, "Only support CircleConst for crops");
+ LUCI_ASSERT(const_crops->dtype() == loco::DataType::S32, "Only support int32 crops");
+
+ auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
+ auto const_crops_shape = loco::shape_get(const_crops).as<loco::TensorShape>();
+ assert(const_block_shape_shape.rank() == 1);
+ assert(const_crops_shape.rank() == 2);
+
+ int32_t input_spatial_dim = input_shape.rank() - 2;
+ assert(const_block_shape_shape.dim(0) == input_spatial_dim);
+ assert(const_crops_shape.dim(0) == input_spatial_dim);
+ assert(const_crops_shape.dim(1) == 2);
+
+ loco::TensorShape shape_output;
+
+ shape_output.rank(input_shape.rank());
+
+ int32_t output_batch_size = input_shape.dim(0).value();
+ for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
+ {
+ int dim_size = input_shape.dim(dim + 1).value() * const_block_shape->at<S32>(dim);
+ dim_size -= const_crops->at<S32>(dim * 2);
+ dim_size -= const_crops->at<S32>(dim * 2 + 1);
+ shape_output.dim(dim + 1) = dim_size;
+
+ assert(output_batch_size % const_block_shape->at<S32>(dim) == 0);
+ output_batch_size = output_batch_size / const_block_shape->at<S32>(dim);
+ }
+ shape_output.dim(0) = output_batch_size;
+ shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
+
+ return loco::NodeShape{shape_output};
+ }
+
+ loco::NodeShape visit(const luci::CircleConcatenation *node) final
+ {
+ // TODO Support when CircleConcatenation has 0 input
+ assert(node->numValues() > 0);
+
+ auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
+ auto axis = node->axis();
+ if (axis < 0)
+ axis += first_shape.rank();
+
+ assert(0 <= axis);
+ assert(first_shape.rank() > static_cast<uint32_t>(axis));
+
+ loco::TensorShape output_shape;
+
+ output_shape.rank(first_shape.rank());
+ for (uint32_t i = 0; i < output_shape.rank(); ++i)
+ output_shape.dim(i) = first_shape.dim(i);
+
+ for (uint32_t i = 1; i < node->numValues(); ++i)
+ {
+ auto input_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+
+ for (uint32_t j = 0; j < output_shape.rank(); ++j)
+ {
+ if (j == static_cast<uint32_t>(axis))
+ output_shape.dim(j) = output_shape.dim(j).value() + input_shape.dim(j).value();
+ else
+ assert(output_shape.dim(j) == input_shape.dim(j));
+ }
+ }
+
+ return loco::NodeShape{output_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleConst *node) final
+ {
+ loco::TensorShape shape;
+
+ shape.rank(node->rank());
+ for (uint32_t axis = 0; axis < node->rank(); axis++)
+ shape.dim(axis) = node->dim(axis);
+
+ return loco::NodeShape{shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleConv2D *node) final
+ {
+ LOGGER(l);
+
+ auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC
+ auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in OHWI
+
+ INFO(l) << "[luci] CircleConv2D ShapeInf ifm(" << ifm_shape.rank() << ") ker("
+ << ker_shape.rank() << ")" << std::endl;
+
+ assert(ifm_shape.rank() == 4);
+ assert(ker_shape.rank() == 4);
+ assert(ifm_shape.dim(3) == ker_shape.dim(3));
+
+ uint32_t input_height = ifm_shape.dim(1).value();
+ uint32_t input_width = ifm_shape.dim(2).value();
+ uint32_t stride_height = node->stride()->h();
+ uint32_t stride_width = node->stride()->w();
+ uint32_t ker_height = ker_shape.dim(1).value();
+ uint32_t ker_width = ker_shape.dim(2).value();
+ uint32_t dilation_height = 1;
+ uint32_t dilation_width = 1;
+ uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1;
+ uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1;
+
+ uint32_t output_height = 0;
+ uint32_t output_width = 0;
+
+ if (node->padding() == luci::Padding::VALID)
+ {
+ output_height = (input_height + stride_height - effective_ker_height) / stride_height;
+ output_width = (input_width + stride_width - effective_ker_width) / stride_width;
+ }
+ else if (node->padding() == luci::Padding::SAME)
+ {
+ output_height = (input_height + stride_height - 1) / stride_height;
+ output_width = (input_width + stride_width - 1) / stride_width;
+ }
+ else
+ LUCI_ASSERT(false, "Wrong padding type");
+
+ loco::TensorShape ofm_shape;
+ ofm_shape.rank(4);
+ ofm_shape.dim(0) = ifm_shape.dim(0);
+ ofm_shape.dim(1) = output_height;
+ ofm_shape.dim(2) = output_width;
+ ofm_shape.dim(3) = ker_shape.dim(0);
+
+ return loco::NodeShape{ofm_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleCos *node) final
+ {
+ auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+
+ return loco::NodeShape{x_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleDepthwiseConv2D *node) final
+ {
+ auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC
+ auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in 1 H W CM
+
+ assert(ifm_shape.rank() == 4);
+ assert(ker_shape.rank() == 4);
+ assert(ker_shape.dim(0).value() == 1);
+
+ uint32_t input_height = ifm_shape.dim(1).value();
+ uint32_t input_width = ifm_shape.dim(2).value();
+ uint32_t stride_height = node->stride()->h();
+ uint32_t stride_width = node->stride()->w();
+ uint32_t ker_height = ker_shape.dim(1).value();
+ uint32_t ker_width = ker_shape.dim(2).value();
+ uint32_t dilation_height = 1;
+ uint32_t dilation_width = 1;
+ uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1;
+ uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1;
+
+ uint32_t output_height = 0;
+ uint32_t output_width = 0;
+
+ if (node->padding() == luci::Padding::VALID)
+ {
+ output_height = (input_height + stride_height - effective_ker_height) / stride_height;
+ output_width = (input_width + stride_width - effective_ker_width) / stride_width;
+ }
+ else if (node->padding() == luci::Padding::SAME)
+ {
+ output_height = (input_height + stride_height - 1) / stride_height;
+ output_width = (input_width + stride_width - 1) / stride_width;
+ }
+ else
+ LUCI_ASSERT(false, "Wrong padding type");
+
+ loco::TensorShape ofm_shape;
+ ofm_shape.rank(4);
+ ofm_shape.dim(0) = ifm_shape.dim(0);
+ ofm_shape.dim(1) = output_height;
+ ofm_shape.dim(2) = output_width;
+ ofm_shape.dim(3) = ker_shape.dim(3);
+
+ return loco::NodeShape{ofm_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleDiv *node) final
+ {
+ auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+ auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
+
+ auto output_shape = broadcast_shape(x_shape, y_shape);
+
+ return loco::NodeShape{output_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleEqual *node) final
+ {
+ const auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+ const auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
+ loco::TensorShape output_shape = broadcast_shape(x_shape, y_shape);
+ return loco::NodeShape{output_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleExp *node) final
+ {
+ auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+ return loco::NodeShape{x_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleFullyConnected *node) final
+ {
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto weights_shape = loco::shape_get(node->weights()).as<loco::TensorShape>();
+
+ // Checking shape capability for fully connected layer
+ // Input: a tensor of at least rank 2 [D1, D2, ... Dn]
+ // Weight: [# of units, K]
+ // Output: [D1 * D2 * ... * Dn / K, # of units]
+ LUCI_ASSERT(input_shape.rank() >= 2, "Input rank should be at least 2");
+ LUCI_ASSERT(weights_shape.rank() == 2, "Incompatible weights rank for fully connected");
+
+ uint32_t input_size = 1;
+ for (uint32_t i = 0; i < input_shape.rank(); i++)
+ {
+ input_size = input_size * input_shape.dim(i).value();
+ }
+ const uint32_t batch_size = input_size / weights_shape.dim(1).value();
+ loco::TensorShape out_shape;
+ out_shape.rank(2);
+ out_shape.dim(0) = batch_size;
+ out_shape.dim(1) = weights_shape.dim(0);
+
+ return loco::NodeShape{out_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleLogicalNot *node) final
+ {
+ const auto input_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+ return loco::NodeShape{input_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleLogicalOr *node) final
+ {
+ const auto input_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+ return loco::NodeShape{input_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleMaximum *node) final
+ {
+ auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+ auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
+
+ auto output_shape = broadcast_shape(x_shape, y_shape);
+
+ return loco::NodeShape{output_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleMaxPool2D *node) final
+ {
+ return infer_pool_2d_shape(node);
+ }
+
+ loco::NodeShape visit(const luci::CircleMean *node) final
+ {
+ const loco::DataType S32 = loco::DataType::S32;
+
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto reduction_indices = dynamic_cast<luci::CircleConst *>(node->reduction_indices());
+
+ { // Exceptions
+ // TODO support non-const case
+ LUCI_ASSERT(reduction_indices, "Only support constant reduction_indices");
+ // TODO support other data type
+ LUCI_ASSERT(reduction_indices->dtype() == S32, "Only support int 32");
+ }
+
+ std::vector<int32_t> reduction_values;
+
+ for (uint32_t i = 0; i < reduction_indices->size<S32>(); ++i)
+ {
+ int32_t axis = reduction_indices->at<S32>(i);
+ if (axis < 0)
+ axis += input_shape.rank();
+ if (not(0 <= axis and axis < static_cast<int32_t>(input_shape.rank())))
+ INTERNAL_EXN_V("Invalid reduction axis for MEAN", oops::to_uint32(axis));
+ reduction_values.push_back(axis);
+ }
+
+ loco::TensorShape output_shape;
+
+ if (node->keep_dims())
+ {
+ output_shape.rank(input_shape.rank());
+ for (uint32_t i = 0; i < input_shape.rank(); ++i)
+ output_shape.dim(i) = input_shape.dim(i);
+ for (uint32_t i = 0; i < reduction_values.size(); ++i)
+ output_shape.dim(reduction_values.at(i)) = 1;
+ }
+ else
+ {
+ std::vector<bool> check_reduce(input_shape.rank(), false);
+ for (uint32_t i = 0; i < reduction_values.size(); ++i)
+ check_reduce.at(reduction_values.at(i)) = true;
+
+ uint32_t reduce_cnt = 0;
+ for (uint32_t i = 0; i < check_reduce.size(); ++i)
+ if (check_reduce.at(i))
+ ++reduce_cnt;
+
+ output_shape.rank(input_shape.rank() - reduce_cnt);
+ for (uint32_t i = 0, j = 0; i < check_reduce.size(); ++i)
+ if (check_reduce.at(i) == false)
+ output_shape.dim(j++) = i;
+ }
+
+ return loco::NodeShape{output_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleMul *node) final
+ {
+ auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+ auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
+
+ auto output_shape = broadcast_shape(x_shape, y_shape);
+
+ return loco::NodeShape{output_shape};
+ }
+
+ loco::NodeShape visit(const luci::CirclePack *node) final
+ {
+ LUCI_ASSERT(node->values_count() > 0, "Only support one or more inputs");
+
+ auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
+ // Make sure all inputs have the same shape.
+ for (uint32_t i = 1; i < node->values_count(); ++i)
+ {
+ auto in_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+ LUCI_ASSERT(loco::NodeShape{first_shape} == loco::NodeShape{in_shape},
+ "All inputs must have the same shape");
+ }
+
+ // Checking shape capability for pack layer
+ // Input: tensors [D1, D2, ... Dn]
+ // Axis: K
+ // Output: [D1, D2, ... , D_K-1, n, D_K+1, ... Dn]
+ auto axis = node->axis();
+ if (axis < 0)
+ axis += first_shape.rank() + 1;
+
+ LUCI_ASSERT(0 <= axis, "Axis is out of range");
+ LUCI_ASSERT(static_cast<uint32_t>(axis) <= first_shape.rank(), "Axis is out of range");
+
+ loco::TensorShape output_shape;
+ output_shape.rank(first_shape.rank() + 1);
+
+ uint32_t j = 0;
+ for (uint32_t i = 0; i < output_shape.rank(); ++i)
+ {
+ if (i == static_cast<uint32_t>(axis))
+ {
+ output_shape.dim(i) = node->values_count();
+ }
+ else
+ {
+ output_shape.dim(i) = first_shape.dim(j++);
+ }
+ }
+
+ return loco::NodeShape{output_shape};
+ }
+
+ loco::NodeShape visit(const luci::CirclePad *node) final
+ {
+ const loco::DataType S32 = loco::DataType::S32;
+
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto paddings = dynamic_cast<luci::CircleConst *>(node->paddings());
+
+ // TODO support non-const case
+ LUCI_ASSERT(paddings, "Only support constant reduction_indices");
+ // TODO support other data type
+ LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now");
+ LUCI_ASSERT(paddings->rank() == 2, "paddings should be rank 2")
+
+ int32_t n = paddings->dim(0).value();
+ int32_t v = paddings->dim(1).value();
+
+ LUCI_ASSERT(v == 2, "paddings should be [n, 2]");
+ LUCI_ASSERT(n == int32_t(input_shape.rank()),
+ "paddings [n, 2] should have same value of input rank");
+
+ loco::TensorShape output_shape;
+
+ output_shape.rank(input_shape.rank());
+ for (int32_t ni = 0; ni < n; ++ni)
+ {
+ int32_t idx = ni * 2;
+ int value = input_shape.dim(ni).value();
+ value += paddings->at<S32>(idx + 0); // left
+ value += paddings->at<S32>(idx + 1); // right
+ output_shape.dim(ni) = value;
+ }
+
+ return loco::NodeShape{output_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleRelu *node) final
+ {
+ auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+
+ return loco::NodeShape{input_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleRelu6 *node) final
+ {
+ auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+
+ return loco::NodeShape{input_shape};
+ }
+
+ /**
+ * @note CircleReshape has new shape info in two places: 2nd input and attribute.
+ * This shape inference forces both to exist, and match each other.
+ * When this condition satisfied, it return the inferred shape
+ *
+ * TODO Change this policy when not appropriate
+ */
+ loco::NodeShape visit(const luci::CircleReshape *node) final
+ {
+ const loco::DataType S32 = loco::DataType::S32;
+
+ loco::TensorShape shape_by_input;
+ {
+ LUCI_ASSERT(node->shape(), "2nd input shape() should not be nullptr");
+
+ // Only support node's shape() is CircleConst with S32
+ // TODO support other node with other types
+ auto const_shape_node = dynamic_cast<luci::CircleConst *>(node->shape());
+ LUCI_ASSERT(const_shape_node, "Only support CircleConst for shape of CircleReshape");
+ LUCI_ASSERT(const_shape_node->dtype() == S32, "Only support int32 CircleConst");
+
+ if (const_shape_node->rank() != 1)
+ INTERNAL_EXN_V("Only support rank 1 CircleConst",
+ oops::to_uint32(const_shape_node->rank()));
+
+ shape_by_input.rank(const_shape_node->dim(0).value());
+
+ for (uint32_t axis = 0; axis < shape_by_input.rank(); ++axis)
+ {
+ shape_by_input.dim(axis) = const_shape_node->at<S32>(axis);
+ }
+ }
+
+ loco::TensorShape shape_by_attr;
+ {
+ shape_by_attr.rank(node->newShape()->rank());
+
+ for (uint32_t axis = 0; axis < shape_by_attr.rank(); ++axis)
+ {
+ shape_by_attr.dim(axis) = node->newShape()->dim(axis);
+ }
+ }
+
+ LUCI_ASSERT(shape_by_input == shape_by_attr,
+ "Warning: Two new shape information mismatched for CircleReshape");
+
+ loco::TensorShape output_shape = shape_by_input;
+
+ // One of the dimensions can have special value -1, meaning its actual value should be inferred.
+ const auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
+ const uint32_t input_element_count = loco::element_count(&input_shape);
+ uint32_t output_element_count = 1;
+ uint32_t unknown_dim_index = UINT32_MAX;
+ for (uint32_t dim_index = 0; dim_index < output_shape.rank(); ++dim_index)
+ {
+ const uint32_t dim_value = output_shape.dim(dim_index).value();
+ if (static_cast<int>(dim_value) == -1)
+ {
+ LUCI_ASSERT(unknown_dim_index == UINT32_MAX, "More than one unknown dimension");
+ unknown_dim_index = dim_index;
+ }
+ else
+ {
+ output_element_count *= dim_value;
+ }
+ }
+ if (unknown_dim_index != UINT32_MAX)
+ {
+ output_shape.dim(unknown_dim_index) = input_element_count / output_element_count;
+ }
+
+ return loco::NodeShape{output_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleRsqrt *node) final
+ {
+ auto input_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+
+ return loco::NodeShape{input_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleSoftmax *node) final
+ {
+ auto input_shape = loco::shape_get(node->logits()).as<loco::TensorShape>();
+
+ return loco::NodeShape{input_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleSqrt *node) final
+ {
+ auto input_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+
+ return loco::NodeShape{input_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleSquaredDifference *node) final
+ {
+ auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+ auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
+
+ auto output_shape = broadcast_shape(x_shape, y_shape);
+
+ return loco::NodeShape{output_shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleSub *node) final
+ {
+ auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+ auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
+
+ auto output_shape = broadcast_shape(x_shape, y_shape);
+
+ return loco::NodeShape{output_shape};
+ }
+
+ // TODO CircleTanh
+
+ /// @brief Returns output shape of transpose. Use loco::ConstGen and luci::CircleConst for ConstT.
+ template <class ConstT>
+ loco::TensorShape output_shape_of_transpose(loco::TensorShape input_shape,
+ const ConstT *perm_node)
+ {
+ loco::TensorShape output_shape;
+ output_shape.rank(input_shape.rank());
+
+ assert(perm_node->dtype() == loco::DataType::S32);
+ assert(input_shape.rank() == perm_node->template size<loco::DataType::S32>());
+
+ for (uint32_t out_axis = 0; out_axis < output_shape.rank(); out_axis++)
+ {
+ auto in_axis = perm_node->template at<loco::DataType::S32>(out_axis);
+ output_shape.dim(out_axis) = input_shape.dim(in_axis);
+ }
+
+ return output_shape;
+ }
+
+ loco::NodeShape visit(const luci::CircleTranspose *node) final
+ {
+ auto input_shape = loco::shape_get(node->a()).as<loco::TensorShape>();
+
+ auto canon_perm = dynamic_cast<loco::ConstGen *>(node->perm());
+ auto circle_perm = dynamic_cast<luci::CircleConst *>(node->perm());
+
+ if (canon_perm)
+ {
+ return loco::NodeShape{output_shape_of_transpose(input_shape, canon_perm)};
+ }
+ else if (circle_perm)
+ {
+ return loco::NodeShape{output_shape_of_transpose(input_shape, circle_perm)};
+ }
+ else
+ INTERNAL_EXN("perm of CircleTranspose should be either ConstGen or CircleConst");
+ }
+
+ loco::NodeShape visit(const luci::CircleTransposeConv *node) final
+ {
+ // TransposeConv's output shape is written in its 'inputSizes' argument
+ auto input_sizes_const = dynamic_cast<luci::CircleConst *>(node->inputSizes());
+ LUCI_ASSERT(input_sizes_const,
+ "Only support when CircleTransposeConv's inputSizes is CircleConst")
+ LUCI_ASSERT(input_sizes_const->dtype() == loco::DataType::S32, "Only support S32 dtype")
+ LUCI_ASSERT(input_sizes_const->rank() == 1 && input_sizes_const->dim(0).value() == 4,
+ "Only support rank 1 with 4 entries")
+
+ loco::TensorShape shape;
+
+ shape.rank(4);
+ for (uint32_t axis = 0; axis < 4; ++axis)
+ shape.dim(axis) = input_sizes_const->at<loco::DataType::S32>(axis);
+
+ return loco::NodeShape{shape};
+ }
+
+ // Circle Only
+ loco::NodeShape visit(const luci::CircleInstanceNorm *node) final
+ {
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+
+ return loco::NodeShape{input_shape};
+ }
+
+ // Virtual
+ loco::NodeShape visit(const luci::CircleInput *node) final
+ {
+ loco::TensorShape shape;
+
+ shape.rank(node->rank());
+ for (uint32_t axis = 0; axis < node->rank(); axis++)
+ shape.dim(axis) = node->dim(axis);
+
+ return loco::NodeShape{shape};
+ }
+
+ loco::NodeShape visit(const luci::CircleOutput *node) final
+ {
+ auto from_shape = loco::shape_get(node->from()).as<loco::TensorShape>();
+
+ return loco::NodeShape{from_shape};
+ }
+};
+
+} // namespace
+
+namespace luci
+{
+
+bool CircleShapeInferenceRule::recognize(const loco::Dialect *d) const
+{
+ return CircleDialect::get() == d;
+}
+
+bool CircleShapeInferenceRule::infer(const loco::Node *node, loco::NodeShape &shape) const
+{
+ assert(node->dialect() == CircleDialect::get());
+ assert(dynamic_cast<const CircleNode *>(node) != nullptr);
+
+ ShapeInferenceAlgorithm alg;
+ shape = dynamic_cast<const CircleNode *>(node)->accept(&alg);
+
+ return true;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.test.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.test.cpp
new file mode 100644
index 000000000..0374251a0
--- /dev/null
+++ b/compiler/luci/service/src/CircleShapeInferenceRule.test.cpp
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TestGraph.h"
+#include "luci/Service/CircleShapeInferenceRule.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleDialect.h>
+
+#include <loco.h>
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/Service/ShapeInference.h>
+#include <loco/Service/CanonicalShapeInferenceRule.h>
+#include <loco/Service/MultiDialectShapeInferenceRule.h>
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+namespace
+{
+
+bool shape_pass(loco::Graph *g)
+{
+ loco::CanonicalShapeInferenceRule canonical_rule;
+ luci::CircleShapeInferenceRule circle_rule;
+ loco::MultiDialectShapeInferenceRule rules;
+
+ rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
+ .bind(luci::CircleDialect::get(), &circle_rule);
+
+ return loco::apply(&rules).to(g);
+}
+
+} // namespace
+
+TEST(CircleShapeInferenceRuleTest, minimal_with_CircleRelu)
+{
+ // Create a simple network
+ luci::test::TestGraph graph;
+ auto tfl_node = graph.append<luci::CircleRelu>(graph.pull);
+ graph.complete(tfl_node);
+
+ // set shape
+ {
+ graph.pull->rank(2);
+ graph.pull->dim(0) = 3;
+ graph.pull->dim(1) = 4;
+ }
+
+ // pre-check
+ ASSERT_FALSE(loco::shape_known(tfl_node));
+
+ // shape inference
+ luci::CircleShapeInferenceRule tfl_rule;
+ loco::CanonicalShapeInferenceRule canonical_rule;
+ loco::MultiDialectShapeInferenceRule rules;
+
+ rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
+ .bind(luci::CircleDialect::get(), &tfl_rule);
+
+ loco::apply(&rules).to(graph.g.get());
+
+ // Verify
+ {
+ ASSERT_TRUE(loco::shape_known(tfl_node));
+ ASSERT_EQ(loco::shape_get(tfl_node).domain(), loco::Domain::Tensor);
+
+ auto shape = loco::shape_get(tfl_node).as<loco::TensorShape>();
+ ASSERT_EQ(shape.rank(), 2);
+ ASSERT_EQ(shape.dim(0), 3);
+ ASSERT_EQ(shape.dim(1), 4);
+ }
+}
+
+// based on the case shown in
+// https://www.corvil.com/kb/what-is-the-difference-between-same-and-valid-padding-in-tf-nn-max-pool-of-tensorflow
+TEST(CircleShapeInferenceRuleTest, avgpool2d_valid)
+{
+ luci::test::TestGraph graph;
+ auto tfl_node = graph.append<luci::CircleAveragePool2D>(graph.pull);
+ graph.complete();
+
+ auto pull = graph.pull;
+ {
+ pull->shape({1, 4, 3, 1});
+ }
+ // setting CircleAveragePool2D
+ {
+ tfl_node->filter()->h(2);
+ tfl_node->filter()->w(2);
+ tfl_node->stride()->h(2);
+ tfl_node->stride()->w(2);
+ tfl_node->fusedActivationFunction(luci::FusedActFunc::NONE);
+ tfl_node->padding(luci::Padding::VALID);
+ }
+ ASSERT_FALSE(loco::shape_known(tfl_node));
+
+ // shape inference
+ luci::CircleShapeInferenceRule tfl_rule;
+ loco::CanonicalShapeInferenceRule canonical_rule;
+ loco::MultiDialectShapeInferenceRule rules;
+
+ rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
+ .bind(luci::CircleDialect::get(), &tfl_rule);
+
+ loco::apply(&rules).to(graph.g.get());
+
+ // Verify
+ {
+ ASSERT_TRUE(loco::shape_known(tfl_node));
+ ASSERT_EQ(loco::shape_get(tfl_node).domain(), loco::Domain::Tensor);
+
+ auto shape = loco::shape_get(tfl_node).as<loco::TensorShape>();
+ ASSERT_EQ(shape.rank(), 4);
+ ASSERT_EQ(shape.dim(0).value(), 1);
+ ASSERT_EQ(shape.dim(1).value(), 2);
+ ASSERT_EQ(shape.dim(2).value(), 1);
+ ASSERT_EQ(shape.dim(3).value(), 1);
+ }
+}
+
+TEST(CircleShapeInferenceRuleTest, avgpool2d_same)
+{
+ luci::test::TestGraph graph;
+ auto tfl_node = graph.append<luci::CircleAveragePool2D>(graph.pull);
+ graph.complete();
+
+ auto pull = graph.pull;
+ {
+ pull->shape({1, 4, 3, 1});
+ }
+
+ // setting CircleAveragePool2D
+ {
+ tfl_node->filter()->h(2);
+ tfl_node->filter()->w(2);
+ tfl_node->stride()->h(2);
+ tfl_node->stride()->w(2);
+ tfl_node->fusedActivationFunction(luci::FusedActFunc::NONE);
+ tfl_node->padding(luci::Padding::SAME);
+ }
+
+ ASSERT_FALSE(loco::shape_known(tfl_node));
+
+ // shape inference
+ shape_pass(graph.g.get());
+
+ // Verify
+ {
+ ASSERT_TRUE(loco::shape_known(tfl_node));
+ ASSERT_EQ(loco::shape_get(tfl_node).domain(), loco::Domain::Tensor);
+
+ auto shape = loco::shape_get(tfl_node).as<loco::TensorShape>();
+ ASSERT_EQ(shape.rank(), 4);
+ ASSERT_EQ(shape.dim(0).value(), 1);
+ ASSERT_EQ(shape.dim(1).value(), 2);
+ ASSERT_EQ(shape.dim(2).value(), 2);
+ ASSERT_EQ(shape.dim(3).value(), 1);
+ }
+}
+
+/**
+ * @note Function to test: Shape inference of two different input shapes
+ *
+ * Rank expansion to higher input side
+ * x(2,1,5) + y(3,5) --> x(2,1,5) + y(1,3,5)
+ * Do output shape inference like numpy
+ * x(2,1,5) + y(1,3,5) --> output(2,3,5)
+ * For each axis, dim value should be same OR one of them should be 1
+ */
+TEST(CircleShapeInferenceRuleTest, TFAdd_shapeinf_different)
+{
+ auto g = loco::make_graph();
+
+ auto x_node = g->nodes()->create<loco::Pull>();
+ {
+ x_node->rank(3);
+ x_node->dim(0) = 2;
+ x_node->dim(1) = 1;
+ x_node->dim(2) = 5;
+ }
+ auto y_node = g->nodes()->create<loco::Pull>();
+ {
+ y_node->rank(2);
+ y_node->dim(0) = 3;
+ y_node->dim(1) = 5;
+ }
+ auto tfl_node = g->nodes()->create<luci::CircleAdd>();
+ {
+ tfl_node->x(x_node);
+ tfl_node->y(y_node);
+ }
+ auto push_node = g->nodes()->create<loco::Push>();
+ {
+ push_node->from(tfl_node);
+ }
+
+ auto x_input = g->inputs()->create();
+ {
+ x_input->name("x");
+ loco::link(x_input, x_node);
+ }
+ auto y_input = g->inputs()->create();
+ {
+ y_input->name("y");
+ loco::link(y_input, y_node);
+ }
+ auto output = g->outputs()->create();
+ {
+ output->name("output");
+ loco::link(output, push_node);
+ }
+
+ // pre-check
+ ASSERT_FALSE(loco::shape_known(tfl_node));
+
+ // shape inference
+ while (shape_pass(g.get()) == true)
+ ;
+
+ // Verify
+ {
+ ASSERT_TRUE(loco::shape_known(tfl_node));
+ ASSERT_EQ(loco::shape_get(tfl_node).domain(), loco::Domain::Tensor);
+
+ auto shape = loco::shape_get(tfl_node).as<loco::TensorShape>();
+ ASSERT_EQ(shape.rank(), 3);
+ ASSERT_EQ(shape.dim(0), 2);
+ ASSERT_EQ(shape.dim(1), 3);
+ ASSERT_EQ(shape.dim(2), 5);
+ }
+}
+
+TEST(CircleShapeInferenceRuleTest, CircleTranspose_simple)
+{
+ luci::test::ExampleGraph<luci::test::ExampleGraphType::CircleTranspose> g;
+
+ g.pull->rank(3);
+ g.pull->dim(0) = 3;
+ g.pull->dim(1) = 8;
+ g.pull->dim(2) = 1;
+
+ g.const_perm->dtype(loco::DataType::S32);
+ g.const_perm->rank(1);
+ g.const_perm->dim(0) = 3;
+ g.const_perm->size<loco::DataType::S32>(3);
+ g.const_perm->at<loco::DataType::S32>(0) = 1;
+ g.const_perm->at<loco::DataType::S32>(1) = 2;
+ g.const_perm->at<loco::DataType::S32>(2) = 0;
+
+ // pre-check
+ ASSERT_FALSE(loco::shape_known(g.transpose_node));
+
+ // shape inference
+ while (shape_pass(g.graph()) == true)
+ ;
+
+ // Verify
+ {
+ ASSERT_TRUE(loco::shape_known(g.transpose_node));
+
+ auto shape = loco::shape_get(g.transpose_node).as<loco::TensorShape>();
+ ASSERT_EQ(shape.rank(), 3);
+ ASSERT_EQ(shape.dim(0), 8);
+ ASSERT_EQ(shape.dim(1), 1);
+ ASSERT_EQ(shape.dim(2), 3);
+ }
+}
diff --git a/compiler/luci/service/src/CircleTypeInference.cpp b/compiler/luci/service/src/CircleTypeInference.cpp
new file mode 100644
index 000000000..669906159
--- /dev/null
+++ b/compiler/luci/service/src/CircleTypeInference.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleTypeInference.h"
+#include "luci/Service/CircleTypeInferenceRule.h"
+
+#include <luci/IR/CircleDialect.h>
+
+#include <loco/IR/CanonicalNode.h>
+#include <loco/IR/CanonicalNodeVisitor.h>
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/Service/TypeInference.h>
+#include <mio/circle/schema_generated.h>
+#include <oops/InternalExn.h>
+
+#include <memory>
+#include <stdexcept>
+#include <type_traits>
+
+namespace
+{
+
+circle::TensorType translateLocoTypeToCircle(loco::DataType dtype)
+{
+ switch (dtype)
+ {
+ case loco::DataType::U8:
+ return circle::TensorType_UINT8;
+ // case loco::DataType::U16: unsupported
+ // case loco::DataType::U32: unsupported
+ // case loco::DataType::U64: unsupported
+ case loco::DataType::S8:
+ return circle::TensorType_INT8;
+ case loco::DataType::S16:
+ return circle::TensorType_INT16;
+ case loco::DataType::S32:
+ return circle::TensorType_INT32;
+ case loco::DataType::S64:
+ return circle::TensorType_INT64;
+ case loco::DataType::FLOAT16:
+ return circle::TensorType_FLOAT16;
+ case loco::DataType::FLOAT32:
+ return circle::TensorType_FLOAT32;
+ // case loco::DataType::FLOAT64: unsupported
+ case loco::DataType::BOOL:
+ return circle::TensorType_BOOL;
+ default:
+ break;
+ }
+
+ INTERNAL_EXN_V("Invalid loco dtype", oops::to_uint32(dtype));
+}
+
+} // namespace
+
+namespace luci
+{
+
+circle::TensorType TypeInference::get(loco::Node *node)
+{
+ assert(loco::dtype_known(node));
+ return translateLocoTypeToCircle(loco::dtype_get(node));
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
new file mode 100644
index 000000000..21a28c1b6
--- /dev/null
+++ b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleTypeInferenceRule.h"
+
+#include <luci/IR/CircleDialect.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/IR/CircleNodes.h>
+
+#include <cassert>
+
+namespace
+{
+
+struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataType>
+{
+ // TODO Given a tensor x of complex numbers, Abs operation returns a tensor of type float32 or
+ // float64.
+ loco::DataType visit(const luci::CircleAbs *node) final { return loco::dtype_get(node->x()); }
+
+ loco::DataType visit(const luci::CircleAdd *node) final { return loco::dtype_get(node->x()); }
+
+ loco::DataType visit(const luci::CircleArgMax *node) final { return node->output_type(); }
+
+ loco::DataType visit(const luci::CircleAveragePool2D *node) final
+ {
+ return loco::dtype_get(node->value());
+ }
+
+ loco::DataType visit(const luci::CircleBatchToSpaceND *node) final
+ {
+ return loco::dtype_get(node->input());
+ }
+
+ loco::DataType visit(const luci::CircleConcatenation *node) final
+ {
+ // TODO Support when CircleConcatenation has 0 input
+ assert(node->numValues() > 0);
+
+ for (uint32_t i = 1; i < node->numValues(); ++i)
+ assert(loco::dtype_get(node->values(i - 1)) == loco::dtype_get(node->values(i)));
+
+ return loco::dtype_get(node->values(0));
+ }
+
+ loco::DataType visit(const luci::CircleConst *node) final { return node->dtype(); }
+
+ loco::DataType visit(const luci::CircleConv2D *node) final
+ {
+ return loco::dtype_get(node->input());
+ }
+
+ loco::DataType visit(const luci::CircleCos *node) final { return loco::dtype_get(node->x()); }
+
+ loco::DataType visit(const luci::CircleDepthwiseConv2D *node) final
+ {
+ return loco::dtype_get(node->input());
+ }
+
+ loco::DataType visit(const luci::CircleDiv *node) final { return loco::dtype_get(node->x()); }
+
+ loco::DataType visit(const luci::CircleEqual *) final { return loco::DataType::BOOL; }
+
+ loco::DataType visit(const luci::CircleExp *node) final { return loco::dtype_get(node->x()); }
+
+ loco::DataType visit(const luci::CircleFullyConnected *node) final
+ {
+ return loco::dtype_get(node->input());
+ }
+
+ loco::DataType visit(const luci::CircleLogicalNot *node) final
+ {
+ return loco::dtype_get(node->x());
+ }
+
+ loco::DataType visit(const luci::CircleLogicalOr *node) final
+ {
+ return loco::dtype_get(node->x());
+ }
+
+ loco::DataType visit(const luci::CircleMaximum *node) final { return loco::dtype_get(node->x()); }
+
+ loco::DataType visit(const luci::CircleMaxPool2D *node) final
+ {
+ return loco::dtype_get(node->value());
+ }
+
+ loco::DataType visit(const luci::CircleMean *node) final
+ {
+ return loco::dtype_get(node->input());
+ }
+
+ loco::DataType visit(const luci::CirclePack *node) final
+ {
+ // Only support CirclePack with one or more inputs
+ assert(node->values_count() > 0);
+
+ auto first_value_type = loco::dtype_get(node->values(0));
+ for (uint32_t i = 1; i < node->values_count(); ++i)
+ assert(first_value_type == loco::dtype_get(node->values(i)));
+
+ return first_value_type;
+ }
+
+ loco::DataType visit(const luci::CirclePad *node) final { return loco::dtype_get(node->input()); }
+
+ loco::DataType visit(const luci::CircleMul *node) final { return loco::dtype_get(node->x()); }
+
+ loco::DataType visit(const luci::CircleRelu *node) final
+ {
+ return loco::dtype_get(node->features());
+ }
+
+ loco::DataType visit(const luci::CircleRelu6 *node) final
+ {
+ return loco::dtype_get(node->features());
+ }
+
+ loco::DataType visit(const luci::CircleReshape *node) final
+ {
+ return loco::dtype_get(node->tensor());
+ }
+
+ loco::DataType visit(const luci::CircleRsqrt *node) final { return loco::dtype_get(node->x()); }
+
+ loco::DataType visit(const luci::CircleSoftmax *node) final
+ {
+ return loco::dtype_get(node->logits());
+ }
+
+ loco::DataType visit(const luci::CircleSqrt *node) final { return loco::dtype_get(node->x()); }
+
+ loco::DataType visit(const luci::CircleSquaredDifference *node) final
+ {
+ return loco::dtype_get(node->x());
+ }
+
+ loco::DataType visit(const luci::CircleSub *node) final { return loco::dtype_get(node->x()); }
+
+ // TODO CircleTanh
+
+ loco::DataType visit(const luci::CircleTranspose *node) final
+ {
+ return loco::dtype_get(node->a());
+ }
+
+ loco::DataType visit(const luci::CircleTransposeConv *node) final
+ {
+ return loco::dtype_get(node->outBackprop());
+ }
+
+ // Circle Only
+ loco::DataType visit(const luci::CircleInstanceNorm *node) final
+ {
+ return loco::dtype_get(node->input());
+ }
+
+ // Virtual
+ loco::DataType visit(const luci::CircleInput *node) final { return node->dtype(); }
+
+ loco::DataType visit(const luci::CircleOutput *node) final
+ {
+ return loco::dtype_get(node->from());
+ }
+};
+
+} // namespace
+
+namespace luci
+{
+
+bool CircleTypeInferenceRule::recognize(const loco::Dialect *d) const
+{
+ return CircleDialect::get() == d;
+}
+
+bool CircleTypeInferenceRule::infer(const loco::Node *node, loco::DataType &dtype) const
+{
+ assert(node->dialect() == CircleDialect::get());
+
+ TypeInferenceAlgorithm alg;
+
+ dtype = dynamic_cast<const CircleNode *>(node)->accept(&alg);
+ assert(dtype != loco::DataType::Unknown);
+
+ return true;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.test.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.test.cpp
new file mode 100644
index 000000000..29f45173e
--- /dev/null
+++ b/compiler/luci/service/src/CircleTypeInferenceRule.test.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TestGraph.h"
+#include <luci/Service/CircleTypeInferenceRule.h>
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleDialect.h>
+
+#include <loco.h>
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/Service/TypeInference.h>
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+TEST(CircleTypeInferenceRuleTest, minimal_with_CircleRelu)
+{
+ // Create a simple network
+ luci::test::TestGraph graph;
+ auto tfl_node = graph.append<luci::CircleRelu>(graph.pull);
+ graph.complete(tfl_node);
+
+ graph.pull->dtype(loco::DataType::S32);
+
+ // pre-check
+ ASSERT_FALSE(loco::dtype_known(tfl_node));
+
+ // type inference
+ luci::CircleTypeInferenceRule tfl_rule;
+ loco::CanonicalTypeInferenceRule canon_rule;
+ loco::MultiDialectTypeInferenceRule rules;
+
+ rules.bind(loco::CanonicalDialect::get(), &canon_rule);
+ rules.bind(luci::CircleDialect::get(), &tfl_rule);
+
+ loco::apply(&rules).to(graph.g.get());
+
+ // Verify
+ ASSERT_TRUE(loco::dtype_known(tfl_node));
+ auto type = loco::dtype_get(tfl_node);
+ ASSERT_EQ(type, loco::DataType::S32);
+}
diff --git a/compiler/luci/service/src/GraphBlock.h b/compiler/luci/service/src/GraphBlock.h
new file mode 100644
index 000000000..2a455888a
--- /dev/null
+++ b/compiler/luci/service/src/GraphBlock.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __GRAPH_BLOCK_H__
+#define __GRAPH_BLOCK_H__
+
+#include <loco.h>
+#include <loco/Service/ShapeInference.h>
+
+#include <oops/InternalExn.h>
+
+#include <functional>
+
+// TODO Change all Canonical nodes to Circle nodes
+
+namespace luci
+{
+
+/// @brief feature layout of TFlite/Circle file
+enum class FeatureLayout
+{
+ NHWC,
+};
+
+/// @brief Creates a loco::FeatureEncode with T layout (NHWC for tflite) and add it to graph.
+template <FeatureLayout T> loco::FeatureEncode *make_feature_encode(loco::Node *input_for_encode);
+
+/// @brief Creates a loco::FeatureDecode with T layout (NHWC for tflite) and add it to graph.
+template <FeatureLayout T> loco::FeatureDecode *make_feature_decode(loco::Node *input_for_decode);
+
+enum class FilterLayout
+{
+ OHWI, // a.k.a., NHWC, Tensorflow Lite uses this layout for filter
+ HWIO, // a.k.a., HWCN, Tensorflow uses this layout for filter
+};
+
+/// @brief Create a loco::FilterEncode of given layout
+template <FilterLayout T> loco::FilterEncode *make_filter_encode(loco::Node *input_for_encode);
+
+/// @brief Create a loco::FilterDecode of given layout
+template <FilterLayout T> loco::FilterDecode *make_filter_decode(loco::Node *input_for_decode);
+
+enum class DepthwiseFilterLayout
+{
+ HWCM,
+};
+
+/// @brief Create a loco::DepthwiseFilterDecode of given layout
+template <DepthwiseFilterLayout T>
+loco::DepthwiseFilterDecode *make_dw_filter_decode(loco::Node *input_for_decode);
+
+enum class MatrixLayout
+{
+ HW,
+ WH
+};
+
+/// @brief Create a loco::MatrixEncode of given layout
+template <MatrixLayout T> loco::MatrixEncode *make_matrix_encode(loco::Node *input_for_encode);
+
+/// @brief Create a loco::MatrixDecode of given layout
+template <MatrixLayout T> loco::MatrixDecode *make_matrix_decode(loco::Node *input_for_decode);
+
+} // luci
+
+//
+// DomainConverter
+//
+
+/**
+ * Some canonical nodes can have input of various loco::Domain, e.g., loco::Domain::Tensor,
+ * loco::Domain::Feature, etc. However, TFL node accepts only loco::Domain::Tensor.
+ * So, When converting such canonical node to TFL node and input(s) of a canonical node are not
+ * loco::Domain::Tensor, additional nodes need to be inserted.
+ *
+ * The following two classes helps this insertion.
+ *
+ * For example, in case of loco::Relu conversion,
+ *
+ * Before:
+ *
+ * A (output: feature) -- loco::ReLU --- B (input:feature)
+ *
+ * After:
+ *
+ * A -- loco::FeatureDecode -- locoex::TFLRelu -- loco::FeatureEncode --- B
+ *
+ * loco::ReLU (dead node)
+ */
+
+namespace luci
+{
+
+/**
+ * @brief Handles input(s) while converting a canonical node to TFL node(s).
+ * This class informs DomainConverter how to handle inputs of a specific canonical node.
+ */
+template <class CanonicalT, class TFLT> class InputHandler
+{
+public:
+ /**
+ * @brief Assign origin's inputs to replacer's inputs.
+ * (This is called when origin belongs in Tensor domain.)
+ */
+ virtual void handover(CanonicalT *origin, TFLT *replacer) = 0;
+
+ /**
+ * @brief Returns the list of inputs that needs to have FeatureDecode as its input.
+ * (This is called when origin belongs in Feature domain.)
+ */
+ virtual std::vector<loco::Node *> getInputsToConvert(CanonicalT *origin) = 0;
+
+ /// @brief Set the inputs of replacer to new_inputs
+ virtual void set(TFLT *replacer, std::vector<loco::Node *> &new_inputs) = 0;
+
+ /// @brief Set the inputs to nullptr
+ virtual void nullify(CanonicalT *origin) = 0;
+};
+
+/**
+ * @brief Class to handle domain conversion while converting a canonical node to TFL node(s)
+ */
+template <class CanonicalT, class TFLT> class DomainConverter
+{
+public:
+ template <FeatureLayout FeatureLayoutT>
+ TFLT *convert(CanonicalT *origin, InputHandler<CanonicalT, TFLT> &input_handler);
+};
+
+/**
+ * @brief Performs domain conversion
+ *
+ * 1. if origin belong to loco::Domain::Tensor, and replace origin to a TFL node.
+ * 2. if origin belong to loco::Domain::Feature, insert loco::FeatureDecode for input(s) and
+ * insert loco::FeatureEncode for output. Then replace origin to a TFL node.
+ *
+ * @return new TFL node; nullptr if shape of origin cannot be known
+ */
+template <class CanonicalT, class TFLT>
+template <FeatureLayout FeatureLayoutT>
+TFLT *DomainConverter<CanonicalT, TFLT>::convert(CanonicalT *origin,
+ InputHandler<CanonicalT, TFLT> &input_handler)
+{
+ static_assert(FeatureLayoutT == FeatureLayout::NHWC, "Feature layout should be NHWC");
+
+ if (!loco::shape_known(origin))
+ {
+ return nullptr;
+ }
+
+ auto tfl_node = origin->graph()->nodes()->template create<TFLT>();
+
+ // when the input is Tensor, just replace canonical node to TFL node.
+ if (loco::shape_get(origin).domain() == loco::Domain::Tensor)
+ {
+ input_handler.handover(origin, tfl_node);
+
+ loco::replace(origin).with(tfl_node);
+ input_handler.nullify(origin);
+
+ return tfl_node;
+ }
+ else if (loco::shape_get(origin).domain() == loco::Domain::Feature)
+ {
+ std::vector<loco::Node *> feature_decodes;
+
+ for (auto input : input_handler.getInputsToConvert(origin))
+ {
+ auto dec = make_feature_decode<FeatureLayoutT>(input);
+ feature_decodes.emplace_back(dec);
+ }
+
+ input_handler.set(tfl_node, feature_decodes);
+
+ auto enc = make_feature_encode<FeatureLayoutT>(tfl_node);
+
+ loco::replace(origin).with(enc);
+ input_handler.nullify(origin);
+
+ return tfl_node;
+ }
+ else
+ INTERNAL_EXN_V("Unsupported loco::Domain", oops::to_uint32(loco::shape_get(origin).domain()));
+}
+
+} // namespace luci
+
+#endif //__GRAPH_BLOCK_H__
diff --git a/compiler/luci/service/src/GraphBlock.test.cpp b/compiler/luci/service/src/GraphBlock.test.cpp
new file mode 100644
index 000000000..1da8c18fa
--- /dev/null
+++ b/compiler/luci/service/src/GraphBlock.test.cpp
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GraphBlock.h"
+
+#include "Check.h"
+
+#include <loco.h>
+
+#include <memory>
+
+// TODO Change all Canonical nodes to Circle nodes
+
+namespace
+{
+
+template <luci::FeatureLayout T> loco::Permutation<loco::Domain::Feature> perm();
+
+template <> loco::Permutation<loco::Domain::Feature> perm<luci::FeatureLayout::NHWC>()
+{
+ // Make NHWC permutation for encoder and decoder
+ loco::Permutation<loco::Domain::Feature> NHWC;
+
+ NHWC.axis(loco::FeatureAxis::Count) = 0;
+ NHWC.axis(loco::FeatureAxis::Height) = 1;
+ NHWC.axis(loco::FeatureAxis::Width) = 2;
+ NHWC.axis(loco::FeatureAxis::Depth) = 3;
+
+ return NHWC;
+}
+
+template <luci::FilterLayout T> loco::Permutation<loco::Domain::Filter> perm();
+
+template <> loco::Permutation<loco::Domain::Filter> perm<luci::FilterLayout::HWIO>()
+{
+ loco::Permutation<loco::Domain::Filter> HWIO; // a.k.a., HWCN
+
+ HWIO.axis(loco::FilterAxis::Height) = 0;
+ HWIO.axis(loco::FilterAxis::Width) = 1;
+ HWIO.axis(loco::FilterAxis::Depth) = 2;
+ HWIO.axis(loco::FilterAxis::Count) = 3;
+
+ return HWIO;
+}
+
+template <> loco::Permutation<loco::Domain::Filter> perm<luci::FilterLayout::OHWI>()
+{
+
+ // Make NHWC permutation for encoder and decoder
+ loco::Permutation<loco::Domain::Filter> OHWI; // a.k.a., NHWC
+
+ OHWI.axis(loco::FilterAxis::Count) = 0;
+ OHWI.axis(loco::FilterAxis::Height) = 1;
+ OHWI.axis(loco::FilterAxis::Width) = 2;
+ OHWI.axis(loco::FilterAxis::Depth) = 3;
+
+ return OHWI;
+}
+
+template <luci::DepthwiseFilterLayout T> loco::Permutation<loco::Domain::DepthwiseFilter> perm();
+
+template <>
+loco::Permutation<loco::Domain::DepthwiseFilter> perm<luci::DepthwiseFilterLayout::HWCM>()
+{
+ loco::Permutation<loco::Domain::DepthwiseFilter> HWCM;
+
+ HWCM.axis(loco::DepthwiseFilterAxis::Height) = 0;
+ HWCM.axis(loco::DepthwiseFilterAxis::Width) = 1;
+ HWCM.axis(loco::DepthwiseFilterAxis::Depth) = 2;
+ HWCM.axis(loco::DepthwiseFilterAxis::Multiplier) = 3;
+
+ return HWCM;
+}
+
+template <luci::MatrixLayout T> loco::Permutation<loco::Domain::Matrix> perm();
+
+template <> loco::Permutation<loco::Domain::Matrix> perm<luci::MatrixLayout::HW>()
+{
+ loco::Permutation<loco::Domain::Matrix> HW;
+
+ HW.axis(loco::MatrixAxis::Height) = 0;
+ HW.axis(loco::MatrixAxis::Width) = 1;
+
+ return HW;
+}
+
+template <> loco::Permutation<loco::Domain::Matrix> perm<luci::MatrixLayout::WH>()
+{
+ loco::Permutation<loco::Domain::Matrix> WH;
+
+ WH.axis(loco::MatrixAxis::Height) = 1;
+ WH.axis(loco::MatrixAxis::Width) = 0;
+
+ return WH;
+}
+
+} // namespace
+
+namespace luci
+{
+
+template <FeatureLayout T> loco::FeatureEncode *make_feature_encode(loco::Node *input_for_encode)
+{
+ LUCI_ASSERT(input_for_encode != nullptr, "input should not be nullptr");
+ loco::Graph *g = input_for_encode->graph();
+
+ auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
+
+ encoder->perm(perm<T>());
+
+ auto enc = g->nodes()->create<loco::FeatureEncode>();
+ enc->input(input_for_encode);
+ enc->encoder(std::move(encoder));
+
+ return enc;
+}
+
+template <FeatureLayout T> loco::FeatureDecode *make_feature_decode(loco::Node *input_for_decode)
+{
+ LUCI_ASSERT(input_for_decode != nullptr, "input should not be nullptr");
+ loco::Graph *g = input_for_decode->graph();
+
+ auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
+
+ decoder->perm(perm<T>());
+
+ auto dec = g->nodes()->create<loco::FeatureDecode>();
+ dec->input(input_for_decode);
+ dec->decoder(std::move(decoder));
+
+ return dec;
+}
+
+template <FilterLayout T> loco::FilterEncode *make_filter_encode(loco::Node *input_for_encode)
+{
+ LUCI_ASSERT(input_for_encode != nullptr, "filter should not be nullptr");
+ loco::Graph *g = input_for_encode->graph();
+
+ auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+
+ encoder->perm(perm<T>());
+
+ auto enc = g->nodes()->create<loco::FilterEncode>();
+ enc->input(input_for_encode);
+ enc->encoder(std::move(encoder));
+
+ return enc;
+}
+
+template <FilterLayout T> loco::FilterDecode *make_filter_decode(loco::Node *input_for_decode)
+{
+ LUCI_ASSERT(input_for_decode != nullptr, "filter should not be nullptr");
+ loco::Graph *g = input_for_decode->graph();
+
+ auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::Filter>>();
+
+ decoder->perm(perm<T>());
+
+ auto dec = g->nodes()->create<loco::FilterDecode>();
+ dec->input(input_for_decode);
+ dec->decoder(std::move(decoder));
+
+ return dec;
+}
+
+template <DepthwiseFilterLayout T>
+loco::DepthwiseFilterDecode *make_dw_filter_decode(loco::Node *input_for_decode)
+{
+ LUCI_ASSERT(input_for_decode != nullptr, "filter should not be nullptr");
+ loco::Graph *g = input_for_decode->graph();
+
+ auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::DepthwiseFilter>>();
+
+ decoder->perm(perm<T>());
+
+ auto dec = g->nodes()->create<loco::DepthwiseFilterDecode>();
+ dec->input(input_for_decode);
+ dec->decoder(std::move(decoder));
+
+ return dec;
+}
+
+template <MatrixLayout T> loco::MatrixEncode *make_matrix_encode(loco::Node *input_for_encode)
+{
+ LUCI_ASSERT(input_for_encode != nullptr, "input should not be nullptr");
+ loco::Graph *g = input_for_encode->graph();
+
+ auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Matrix>>();
+
+ encoder->perm(perm<T>());
+
+ auto enc = g->nodes()->create<loco::MatrixEncode>();
+ enc->input(input_for_encode);
+ enc->encoder(std::move(encoder));
+
+ return enc;
+}
+
+template <MatrixLayout T> loco::MatrixDecode *make_matrix_decode(loco::Node *input_for_decode)
+{
+ LUCI_ASSERT(input_for_decode != nullptr, "input should not be nullptr");
+ loco::Graph *g = input_for_decode->graph();
+
+ auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::Matrix>>();
+
+ decoder->perm(perm<T>());
+
+ auto dec = g->nodes()->create<loco::MatrixDecode>();
+ dec->input(input_for_decode);
+ dec->decoder(std::move(decoder));
+
+ return dec;
+}
+
+// template instantiation
+template loco::FeatureEncode *
+make_feature_encode<FeatureLayout::NHWC>(loco::Node *input_for_encode);
+
+template loco::FeatureDecode *
+make_feature_decode<FeatureLayout::NHWC>(loco::Node *input_for_encode);
+
+template loco::FilterEncode *make_filter_encode<FilterLayout::HWIO>(loco::Node *input_for_encode);
+template loco::FilterDecode *make_filter_decode<FilterLayout::OHWI>(loco::Node *input_for_decode);
+
+template loco::DepthwiseFilterDecode *
+make_dw_filter_decode<DepthwiseFilterLayout::HWCM>(loco::Node *input_for_decode);
+
+template loco::MatrixEncode *make_matrix_encode<MatrixLayout::HW>(loco::Node *input_for_encode);
+template loco::MatrixEncode *make_matrix_encode<MatrixLayout::WH>(loco::Node *input_for_encode);
+template loco::MatrixDecode *make_matrix_decode<MatrixLayout::HW>(loco::Node *input_for_decode);
+template loco::MatrixDecode *make_matrix_decode<MatrixLayout::WH>(loco::Node *input_for_decode);
+
+} // namespace luci
diff --git a/compiler/luci/service/src/ShapeDescription.cpp b/compiler/luci/service/src/ShapeDescription.cpp
new file mode 100644
index 000000000..cbc302f70
--- /dev/null
+++ b/compiler/luci/service/src/ShapeDescription.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/ShapeDescription.h"
+
+#include <oops/InternalExn.h>
+
+#include <cassert>
+
+namespace luci
+{
+
+ShapeDescription to_shape_description(const loco::TensorShape &shape)
+{
+ ShapeDescription res;
+
+ res._rank_known = true;
+
+ res._dims.resize(shape.rank());
+ for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+ {
+ // All the dimensions SHOULD be known
+ assert(shape.dim(axis).known());
+ res._dims.at(axis) = shape.dim(axis).value();
+ }
+
+ return res;
+}
+
+ShapeDescription to_shape_description(const loco::FeatureShape &shape)
+{
+ ShapeDescription res;
+
+ res._rank_known = true;
+
+ // T/F Lite encodes a feature map as a NHWC tensor
+ res._dims.resize(4);
+ res._dims.at(0) = shape.count().value();
+ res._dims.at(1) = shape.height().value();
+ res._dims.at(2) = shape.width().value();
+ res._dims.at(3) = shape.depth().value();
+
+ return res;
+}
+
+ShapeDescription to_shape_description(const loco::FilterShape &shape)
+{
+ ShapeDescription res;
+
+ res._rank_known = true;
+
+ // T/F Lite encodes a convolution filter as a NHWC tensor
+ res._dims.resize(4);
+ res._dims.at(0) = shape.count().value();
+ res._dims.at(1) = shape.height().value();
+ res._dims.at(2) = shape.width().value();
+ res._dims.at(3) = shape.depth().value();
+
+ return res;
+}
+
+ShapeDescription to_shape_description(const loco::DepthwiseFilterShape &shape)
+{
+ ShapeDescription res;
+
+ res._rank_known = true;
+
+ // T/F Lite encodes a depthwise convolution filter as a [1, H, W, C*M] tensor
+ res._dims.resize(4);
+ res._dims.at(0) = 1;
+ res._dims.at(1) = shape.height().value();
+ res._dims.at(2) = shape.width().value();
+ res._dims.at(3) = shape.depth().value() * shape.multiplier().value();
+
+ return res;
+}
+
+ShapeDescription to_shape_description(const loco::BiasShape &shape)
+{
+ ShapeDescription res;
+
+ res._rank_known = true;
+
+ res._dims.resize(1);
+ res._dims.at(0) = shape.length().value();
+
+ return res;
+}
+
+ShapeDescription to_shape_description(const loco::MatrixShape &shape)
+{
+ ShapeDescription res;
+
+ res._rank_known = true;
+
+ res._dims.resize(2);
+ res._dims.at(0) = shape.height().value();
+ res._dims.at(1) = shape.width().value();
+
+ return res;
+}
+
+ShapeDescription to_shape_description(const loco::NodeShape &shape)
+{
+ switch (shape.domain())
+ {
+ case loco::Domain::Tensor:
+ return to_shape_description(shape.as<loco::TensorShape>());
+ case loco::Domain::Feature:
+ return to_shape_description(shape.as<loco::FeatureShape>());
+ case loco::Domain::Filter:
+ return to_shape_description(shape.as<loco::FilterShape>());
+ case loco::Domain::DepthwiseFilter:
+ return to_shape_description(shape.as<loco::DepthwiseFilterShape>());
+ case loco::Domain::Bias:
+ return to_shape_description(shape.as<loco::BiasShape>());
+ case loco::Domain::Matrix:
+ return to_shape_description(shape.as<loco::MatrixShape>());
+ default:
+ break;
+ }
+
+ INTERNAL_EXN_V("Unsupported loco domain", oops::to_uint32(shape.domain()));
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/TestGraph.h b/compiler/luci/service/src/TestGraph.h
new file mode 100644
index 000000000..73562040f
--- /dev/null
+++ b/compiler/luci/service/src/TestGraph.h
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TEST_GRAPH_H__
+#define __TEST_GRAPH_H__
+
+#include <luci/IR/CircleNodes.h>
+#include "GraphBlock.h"
+
+#include <loco.h>
+
+#include <cassert>
+#include <memory>
+
+// TODO Change all Canonical nodes to Circle nodes
+
+namespace luci
+{
+namespace test
+{
+
+class TestGraph
+{
+public:
+ std::unique_ptr<loco::Graph> g;
+ loco::Pull *pull;
+ loco::Push *push;
+
+ TestGraph() // creates Pull and Push
+ {
+ g = loco::make_graph();
+
+ pull = g->nodes()->create<loco::Pull>();
+
+ push = g->nodes()->create<loco::Push>();
+
+ auto input = g->inputs()->create();
+ {
+ input->name("input");
+ loco::link(input, pull);
+ }
+ auto output = g->outputs()->create();
+ {
+ output->name("output");
+ loco::link(output, push);
+ }
+
+ _next_input = pull;
+ }
+
+ loco::Graph *graph() { return g.get(); }
+
+ /// @brief Creates node with NO arg and appends it to graph
+ template <class T> T *append()
+ {
+ auto node = g->nodes()->create<T>();
+ _next_input = node;
+
+ return node;
+ }
+
+ /// @brief Creates op T (arity=1) with arg1 as an input and appends it to graph
+ template <class T> T *append(loco::Node *arg1)
+ {
+ auto node = g->nodes()->create<T>();
+ setInput(node, arg1);
+ _next_input = node;
+
+ return node;
+ }
+
+ /// @brief Creates op T (arity=2) with arg1, arg2 as inputs and appends it to graph
+ template <class T> T *append(loco::Node *arg1, loco::Node *arg2)
+ {
+ auto node = g->nodes()->create<T>();
+ setInput(node, arg1, arg2);
+ _next_input = node;
+
+ return node;
+ }
+
+ /// @brief Creates op T (arity=3) with arg1, arg2, arg3 as inputs and appends it to graph
+ template <class T> T *append(loco::Node *arg1, loco::Node *arg2, loco::Node *arg3)
+ {
+ auto node = g->nodes()->create<T>();
+ setInput(node, arg1, arg2, arg3);
+ _next_input = node;
+
+ return node;
+ }
+
+ // push will get the last appended node
+ void complete() { push->from(_next_input); }
+
+ void complete(loco::Node *last_node) { push->from(last_node); }
+
+private:
+ // arity 1
+ void setInput(loco::Node *node, loco::Node *) { assert(false && "NYI"); };
+
+ void setInput(loco::AvgPool2D *node, loco::Node *input) { node->ifm(input); }
+ void setInput(loco::BiasDecode *node, loco::Node *input) { node->input(input); };
+ void setInput(loco::BiasEncode *node, loco::Node *input) { node->input(input); };
+ void setInput(loco::FeatureDecode *node, loco::Node *input) { node->input(input); };
+ void setInput(loco::FeatureEncode *node, loco::Node *input) { node->input(input); };
+ void setInput(loco::MaxPool2D *node, loco::Node *input) { node->ifm(input); }
+ void setInput(loco::Push *node, loco::Node *input) { node->from(input); };
+ void setInput(loco::ReLU *node, loco::Node *input) { node->input(input); };
+ void setInput(loco::ReLU6 *node, loco::Node *input) { node->input(input); };
+ void setInput(loco::Tanh *node, loco::Node *input) { node->input(input); };
+ void setInput(loco::TensorTranspose *node, loco::Node *input) { node->input(input); };
+
+ void setInput(luci::CircleAveragePool2D *node, loco::Node *input) { node->value(input); };
+ void setInput(luci::CircleMaxPool2D *node, loco::Node *input) { node->value(input); };
+ void setInput(luci::CircleRelu *node, loco::Node *input) { node->features(input); };
+ void setInput(luci::CircleRelu6 *node, loco::Node *input) { node->features(input); };
+
+ // arity 2
+ void setInput(loco::Node *node, loco::Node *, loco::Node *) { assert(false && "NYI"); };
+
+ void setInput(loco::Conv2D *node, loco::Node *input, loco::Node *filter)
+ {
+ node->ifm(input);
+ node->ker(filter);
+ }
+
+ void setInput(loco::EltwiseAdd *node, loco::Node *arg1, loco::Node *arg2)
+ {
+ node->lhs(arg1);
+ node->rhs(arg2);
+ };
+
+ void setInput(loco::FeatureBiasAdd *node, loco::Node *arg1, loco::Node *arg2)
+ {
+ node->value(arg1);
+ node->bias(arg2);
+ };
+
+ void setInput(luci::CircleAdd *node, loco::Node *arg1, loco::Node *arg2)
+ {
+ node->x(arg1);
+ node->y(arg2);
+ };
+
+ void setInput(luci::CircleMul *node, loco::Node *arg1, loco::Node *arg2)
+ {
+ node->x(arg1);
+ node->y(arg2);
+ };
+
+ void setInput(luci::CircleSub *node, loco::Node *arg1, loco::Node *arg2)
+ {
+ node->x(arg1);
+ node->y(arg2);
+ };
+
+ void setInput(luci::CircleTranspose *node, loco::Node *arg1, loco::Node *arg2)
+ {
+ node->a(arg1);
+ node->perm(arg2);
+ };
+
+ // arity 3
+ void setInput(loco::Node *node, loco::Node *, loco::Node *, loco::Node *)
+ {
+ assert(false && "NYI");
+ };
+
+ void setInput(luci::CircleConv2D *node, loco::Node *input, loco::Node *filter, loco::Node *bias)
+ {
+ node->input(input);
+ node->filter(filter);
+ node->bias(bias);
+ }
+
+private:
+ loco::Node *_next_input;
+};
+
+enum class ExampleGraphType
+{
+ FeatureBiasAdd,
+ ConstGen_ReLU,
+ FilterEncode_FilterDecode,
+ Transpose,
+
+ CircleTranspose,
+};
+
+template <ExampleGraphType T> class ExampleGraph;
+
+/**
+ * @brief Class to create the following:
+ *
+ * Pull - FeatureEncoder - FeatureBiasAdd - FeatureDecode - Push
+ * |
+ * ConstGen - BiasEncode --+
+ */
+template <> class ExampleGraph<ExampleGraphType::FeatureBiasAdd> : public TestGraph
+{
+public:
+ loco::FeatureEncode *fea_enc = nullptr;
+ loco::ConstGen *constgen = nullptr;
+ loco::BiasEncode *bias_enc = nullptr;
+ loco::FeatureBiasAdd *fea_bias_add = nullptr;
+ loco::FeatureDecode *fea_dec = nullptr;
+
+public:
+ ExampleGraph()
+ {
+ fea_enc = luci::make_feature_encode<luci::FeatureLayout::NHWC>(pull);
+ constgen = append<loco::ConstGen>();
+ bias_enc = append<loco::BiasEncode>(constgen);
+ fea_bias_add = append<loco::FeatureBiasAdd>(fea_enc, bias_enc);
+ fea_dec = luci::make_feature_decode<luci::FeatureLayout::NHWC>(fea_bias_add);
+ complete(fea_dec);
+ }
+};
+
+/**
+ * @brief Class to creates the following:
+ *
+ * ConstGen -- ReLU -- Push
+ */
+template <> class ExampleGraph<ExampleGraphType::ConstGen_ReLU> : public TestGraph
+{
+public:
+ loco::ConstGen *constgen = nullptr;
+ loco::ReLU *relu = nullptr;
+
+public:
+ ExampleGraph()
+ {
+ constgen = append<loco::ConstGen>();
+ relu = append<loco::ReLU>(constgen);
+ complete(relu);
+ }
+};
+
+/**
+ * @brief Class to creates the following:
+ *
+ * Pull -- Transpose -- Push
+ */
+template <> class ExampleGraph<ExampleGraphType::Transpose> : public TestGraph
+{
+public:
+ loco::TensorTranspose *transpose = nullptr;
+
+public:
+ ExampleGraph()
+ {
+ transpose = append<loco::TensorTranspose>(pull);
+ complete(transpose);
+ }
+};
+
+/**
+ * @brief Class to creates the following:
+ *
+ * Pull -- FilterEncode -- FilterDecode -- Push
+ */
+template <> class ExampleGraph<ExampleGraphType::FilterEncode_FilterDecode> : public TestGraph
+{
+public:
+ loco::FilterEncode *filterEncode = nullptr;
+ loco::FilterDecode *filterDecode = nullptr;
+
+public:
+ ExampleGraph()
+ {
+ filterEncode = luci::make_filter_encode<luci::FilterLayout::HWIO>(pull); // from Tensorflow
+ filterDecode =
+ luci::make_filter_decode<luci::FilterLayout::OHWI>(filterEncode); // to Tensorflow Lite
+ complete(filterDecode);
+ }
+};
+
+/**
+ * @brief Class to create the following:
+ *
+ * Pull -- CircleTranspose -- Push
+ */
+template <> class ExampleGraph<ExampleGraphType::CircleTranspose> : public TestGraph
+{
+public:
+ loco::ConstGen *const_perm = nullptr;
+ luci::CircleTranspose *transpose_node = nullptr;
+
+public:
+ ExampleGraph()
+ {
+ const_perm = append<loco::ConstGen>();
+ transpose_node = append<luci::CircleTranspose>(pull, const_perm);
+ complete(transpose_node);
+ }
+};
+
+} // namespace test
+} // namespace luci
+
+#endif // __TEST_GRAPH_H__
diff --git a/compiler/luci/service/src/Validate.cpp b/compiler/luci/service/src/Validate.cpp
new file mode 100644
index 000000000..65b82c2b4
--- /dev/null
+++ b/compiler/luci/service/src/Validate.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/Validate.h"
+
+#include <luci/IR/Nodes/CircleOutput.h>
+#include <luci/Log.h>
+
+#include <loco/IR/NodeShape.h>
+#include <loco/Service/ShapeInference.h>
+#include <loco/Service/TypeInference.h>
+
+#include <cassert>
+#include <vector>
+
+namespace
+{
+
+/**
+ * @brief returns a node that is CircleOutput with index is out_index in nodes
+ */
+luci::CircleOutput *find_node(std::vector<loco::Node *> nodes, loco::GraphOutputIndex out_index)
+{
+ for (auto node : nodes)
+ {
+ auto circle_output = dynamic_cast<luci::CircleOutput *>(node);
+ if (circle_output != nullptr)
+ {
+ if (circle_output->indexed() && circle_output->index() == out_index)
+ return circle_output;
+ }
+ }
+ return nullptr;
+}
+
+bool validate_shape_type(loco::Graph *g)
+{
+ LOGGER(l);
+
+ auto output_nodes = loco::output_nodes(g);
+
+ auto count = g->outputs()->size();
+ for (uint32_t out = 0; out < count; ++out)
+ {
+ auto graph_out = g->outputs()->at(out);
+ auto out_index = graph_out->index();
+
+ auto circle_output = find_node(output_nodes, out_index);
+ assert(circle_output != nullptr);
+ assert(circle_output->from() != nullptr);
+ auto circle_node = dynamic_cast<luci::CircleNode *>(circle_output->from());
+ assert(circle_node != nullptr);
+ assert(loco::shape_known(circle_node));
+
+ // check if output node shape is same as graph output shape
+ auto co_shape = loco::shape_get(circle_node);
+ auto go_tensor_shape = graph_out->shape();
+ assert(go_tensor_shape);
+ auto go_shape = loco::NodeShape(*go_tensor_shape);
+ if (!(co_shape == go_shape))
+ {
+ INFO(l) << "Shape for #" << out_index << " not same " << std::endl;
+ return false;
+ }
+
+ // check if data type match
+ assert(loco::dtype_known(circle_node));
+ if (graph_out->dtype() != loco::dtype_get(circle_node))
+ {
+ INFO(l) << "Type for #" << out_index << " not same " << std::endl;
+ return false;
+ }
+ }
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool validate(loco::Graph *g)
+{
+ if (!loco::valid(g))
+ return false;
+
+ if (!validate_shape_type(g))
+ return false;
+
+ // TODO add more validation
+
+ return true;
+}
+
+} // namespace luci
diff --git a/compiler/luci/tester/CMakeLists.txt b/compiler/luci/tester/CMakeLists.txt
new file mode 100644
index 000000000..bcb47183e
--- /dev/null
+++ b/compiler/luci/tester/CMakeLists.txt
@@ -0,0 +1,22 @@
+set(SRCS_READ_TESTER
+ src/ReadTester.cpp
+ src/Model.cpp
+ )
+
+add_executable(luci_readtester "${SRCS_READ_TESTER}")
+target_link_libraries(luci_readtester PRIVATE luci_import)
+target_link_libraries(luci_readtester PRIVATE luci_service)
+target_link_libraries(luci_readtester PRIVATE luci_pass)
+target_link_libraries(luci_readtester PRIVATE oops)
+
+set(SRCS_WRITE_TESTER
+ src/WriteTester.cpp
+ src/Model.cpp
+ )
+
+add_executable(luci_writetester "${SRCS_WRITE_TESTER}")
+target_link_libraries(luci_writetester PRIVATE luci_import)
+target_link_libraries(luci_writetester PRIVATE luci_service)
+target_link_libraries(luci_writetester PRIVATE luci_pass)
+target_link_libraries(luci_writetester PRIVATE luci_export)
+target_link_libraries(luci_writetester PRIVATE oops)
diff --git a/compiler/luci/tester/src/Model.cpp b/compiler/luci/tester/src/Model.cpp
new file mode 100644
index 000000000..b02c19161
--- /dev/null
+++ b/compiler/luci/tester/src/Model.cpp
@@ -0,0 +1,62 @@
+#include "Model.h"
+
+#include <fstream>
+#include <vector>
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+namespace
+{
+
+class FileModel final : public luci::Model
+{
+public:
+ explicit FileModel(const std::string &filename) : _filename(filename) {}
+
+public:
+ FileModel(const FileModel &) = delete;
+ FileModel(FileModel &&) = delete;
+
+public:
+ const ::circle::Model *model(void) override
+ {
+ std::ifstream file(_filename, std::ios::binary | std::ios::in);
+ if (!file.good())
+ return nullptr;
+
+ file.unsetf(std::ios::skipws);
+
+ std::streampos fileSize;
+ file.seekg(0, std::ios::end);
+ fileSize = file.tellg();
+ file.seekg(0, std::ios::beg);
+
+ // reserve capacity
+ _data.reserve(fileSize);
+
+ // read the data
+ file.read(_data.data(), fileSize);
+ if (file.fail())
+ return nullptr;
+
+ return ::circle::GetModel(_data.data());
+ }
+
+private:
+ const std::string _filename;
+ std::vector<char> _data;
+};
+
+} // namespace
+
+namespace luci
+{
+
+std::unique_ptr<Model> load_model(const std::string &path)
+{
+ return std::make_unique<FileModel>(path);
+}
+
+} // namespace luci
diff --git a/compiler/luci/tester/src/Model.h b/compiler/luci/tester/src/Model.h
new file mode 100644
index 000000000..e40faf33e
--- /dev/null
+++ b/compiler/luci/tester/src/Model.h
@@ -0,0 +1,27 @@
+#ifndef __TESTER_MODEL_H__
+#define __TESTER_MODEL_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <memory>
+
+namespace luci
+{
+
+struct Model
+{
+ virtual ~Model() = default;
+
+ virtual const ::circle::Model *model(void) = 0;
+};
+
+/**
+ * @brief Load Circle model (as a raw Model) from a given path
+ *
+ * @note May return a nullptr
+ */
+std::unique_ptr<Model> load_model(const std::string &path);
+
+} // namespace luci
+
+#endif // __TESTER_MODEL_H__
diff --git a/compiler/luci/tester/src/ReadTester.cpp b/compiler/luci/tester/src/ReadTester.cpp
new file mode 100644
index 000000000..c105d6ce3
--- /dev/null
+++ b/compiler/luci/tester/src/ReadTester.cpp
@@ -0,0 +1,92 @@
+#include "Model.h"
+
+#include <luci/Importer.h>
+#include <luci/Service/Validate.h>
+#include <luci/Pass/ShapeInferencePass.h>
+#include <luci/Pass/TypeInferencePass.h>
+
+#include <iostream>
+#include <map>
+#include <string>
+
+namespace
+{
+
+void show_help_message(const char *progname, std::ostream &os)
+{
+ os << "USAGE: " << progname << " circlefile" << std::endl << std::endl;
+}
+
+void show_error_message(const char *progname, std::ostream &os, const std::string &msg)
+{
+ os << "ERROR: " << msg << std::endl;
+ os << std::endl;
+
+ show_help_message(progname, os);
+}
+
+} // namespace
+
+/*
+ * @brief ReadTest main
+ *
+ * Give one Circle file as an argument
+ *
+ * This will use luci_import to read the file and get loco graph
+ * In luci_import, LUCI_LOG environment will be checked and will
+ * dump graph to console if set.
+ * i.e. "LUCI_LOG=1 luci_readtester mymodel.circle"
+ */
+int main(int argc, char **argv)
+{
+ if (argc != 2)
+ {
+ show_error_message(argv[0], std::cerr, "Circle file is not specified");
+ return 255;
+ }
+
+ std::string input_path = argv[1];
+
+ std::cout << "[INFO] Circle is '" << input_path << "'" << std::endl;
+
+ // Load model from the file
+ std::unique_ptr<luci::Model> model = luci::load_model(input_path);
+ if (model == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load '" << input_path << "'" << std::endl;
+ return 255;
+ }
+
+ const circle::Model *input_model = model->model();
+ if (input_model == nullptr)
+ {
+ std::cerr << "ERROR: Failed to read '" << input_path << "'" << std::endl;
+ return 255;
+ }
+
+ luci::Importer importer;
+ auto module = importer.importModule(input_model);
+ assert(module->size() > 0);
+
+ for (size_t g = 0; g < module->size(); ++g)
+ {
+ auto graph = module->graph(g);
+ if (graph == nullptr)
+ return 255;
+
+ {
+ luci::ShapeInferencePass pass;
+ while (pass.run(graph) == true)
+ ;
+ }
+ {
+ luci::TypeInferencePass pass;
+ while (pass.run(graph) == true)
+ ;
+ }
+
+ if (!luci::validate(graph))
+ return 255;
+ }
+ return 0;
+}
diff --git a/compiler/luci/tester/src/WriteTester.cpp b/compiler/luci/tester/src/WriteTester.cpp
new file mode 100644
index 000000000..80019d1b1
--- /dev/null
+++ b/compiler/luci/tester/src/WriteTester.cpp
@@ -0,0 +1,142 @@
+#include "Model.h"
+
+#include <luci/Importer.h>
+#include <luci/Pass/ShapeInferencePass.h>
+#include <luci/Pass/TypeInferencePass.h>
+#include <luci/Service/Validate.h>
+#include <luci/CircleExporter.h>
+#include <oops/InternalExn.h>
+
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <string>
+
+namespace
+{
+
+void show_help_message(const char *progname, std::ostream &os)
+{
+ os << "USAGE: " << progname << " circlefile_in circlefile_out" << std::endl << std::endl;
+}
+
+void show_error_message(const char *progname, std::ostream &os, const std::string &msg)
+{
+ os << "ERROR: " << msg << std::endl;
+ os << std::endl;
+
+ show_help_message(progname, os);
+}
+
+struct CircleExpContract : public luci::CircleExporter::Contract
+{
+public:
+ CircleExpContract(loco::Graph *graph, const std::string &filename)
+ : _graph(graph), _filepath(filename)
+ {
+ // NOTHING TO DO
+ }
+ CircleExpContract(luci::Module *module, const std::string &filename)
+ : _module(module), _filepath(filename)
+ {
+ // NOTHING TO DO
+ }
+ virtual ~CircleExpContract() = default;
+
+public:
+ loco::Graph *graph(void) const final { return _graph; }
+
+ luci::Module *module(void) const final { return _module; }
+
+public:
+ bool store(const char *ptr, const size_t size) const final;
+
+private:
+ loco::Graph *_graph;
+ luci::Module *_module;
+ const std::string _filepath;
+};
+
+bool CircleExpContract::store(const char *ptr, const size_t size) const
+{
+ if (!ptr)
+ INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
+
+ std::ofstream fs(_filepath.c_str(), std::ofstream::binary);
+ fs.write(ptr, size);
+
+ return fs.good();
+}
+
+} // namespace
+
+/*
+ * @brief WriteTester main
+ *
+ * Give two Circle file as an argument
+ *
+ * This will use luci_import to read the first file and get loco graph
+ * With the graph, this will use luci_export to write to the second file
+ * Like ReadTester, LUCI_LOG=1 environment variable is available to dump the graph
+ */
+int main(int argc, char **argv)
+{
+ if (argc != 3)
+ {
+ show_error_message(argv[0], std::cerr, "In/Out Circle file path is not specified");
+ return 255;
+ }
+
+ std::string input_path = argv[1];
+ std::string output_path = argv[2];
+
+ std::cout << "[INFO] Circle from '" << input_path << "' to '" << output_path << "'" << std::endl;
+
+ // Load model from the file
+ std::unique_ptr<luci::Model> model = luci::load_model(input_path);
+ if (model == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load '" << input_path << "'" << std::endl;
+ return 255;
+ }
+
+ const circle::Model *input_model = model->model();
+ if (input_model == nullptr)
+ {
+ std::cerr << "ERROR: Failed to read '" << input_path << "'" << std::endl;
+ return 255;
+ }
+
+ // Import from input Circle file
+ luci::Importer importer;
+ auto module = importer.importModule(input_model);
+ assert(module->size() > 0);
+
+ for (size_t g = 0; g < module->size(); ++g)
+ {
+ auto graph = module->graph(g);
+ if (graph == nullptr)
+ return 255;
+
+ {
+ luci::ShapeInferencePass pass;
+ while (pass.run(graph) == true)
+ ;
+ }
+ {
+ luci::TypeInferencePass pass;
+ while (pass.run(graph) == true)
+ ;
+ }
+
+ if (!luci::validate(graph))
+ return 255;
+ }
+
+ // Export to output Circle file
+ luci::CircleExporter exporter;
+
+ CircleExpContract contract(module.get(), output_path);
+
+ return exporter.invoke(&contract) ? 0 : 255;
+}
diff --git a/compiler/luci/tests/.gitignore b/compiler/luci/tests/.gitignore
new file mode 100644
index 000000000..8dbfa9012
--- /dev/null
+++ b/compiler/luci/tests/.gitignore
@@ -0,0 +1 @@
+/test.local.lst
diff --git a/compiler/luci/tests/CMakeLists.txt b/compiler/luci/tests/CMakeLists.txt
new file mode 100644
index 000000000..4e5639047
--- /dev/null
+++ b/compiler/luci/tests/CMakeLists.txt
@@ -0,0 +1,97 @@
+# TODO use local test.recipe files for small networks
+file(GLOB RECIPES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*/test.recipe")
+
+foreach(RECIPE IN ITEMS ${RECIPES})
+ get_filename_component(RECIPE_PREFIX ${RECIPE} DIRECTORY)
+
+ set(RECIPE_SOURCE_FILE "${RECIPE_PREFIX}.recipe")
+ set(RECIPE_OUTPUT_FILE "${RECIPE_PREFIX}.tflite")
+ set(CIRCLE_OUTPUT_FILE "${RECIPE_PREFIX}.circle")
+
+ # Copy .recipe
+ add_custom_command(OUTPUT "${RECIPE_SOURCE_FILE}"
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different
+ "${CMAKE_CURRENT_SOURCE_DIR}/${RECIPE}" "${RECIPE_SOURCE_FILE}"
+ DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RECIPE}"
+ COMMENT "Generating ${RECIPE_SOURCE_FILE}")
+
+ # Generate .tflite
+ add_custom_command(OUTPUT "${RECIPE_OUTPUT_FILE}"
+ COMMAND tflchef-file "${RECIPE_SOURCE_FILE}" "${RECIPE_OUTPUT_FILE}"
+ DEPENDS tflchef-file "${RECIPE_SOURCE_FILE}"
+ COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
+
+ # Generate .circle
+ add_custom_command(OUTPUT "${CIRCLE_OUTPUT_FILE}"
+ COMMAND tflite2circle "${RECIPE_OUTPUT_FILE}" "${CIRCLE_OUTPUT_FILE}"
+ DEPENDS tflite2circle "${RECIPE_OUTPUT_FILE}"
+ COMMENT "Generating ${CIRCLE_OUTPUT_FILE}")
+
+ list(APPEND TESTFILES "${CIRCLE_OUTPUT_FILE}")
+endforeach(RECIPE)
+
+# Generate from res/TensorFlowLiteRecipes
+nncc_find_resource(TensorFlowLiteRecipes)
+set(TENSORFLOWLITERECIPES_DIR "${TensorFlowLiteRecipes_DIR}")
+
+file(GLOB RECIPES RELATIVE ${TENSORFLOWLITERECIPES_DIR} "${TENSORFLOWLITERECIPES_DIR}/*/test.recipe")
+
+foreach(RECIPE IN ITEMS ${RECIPES})
+ get_filename_component(RECIPE_PREFIX ${RECIPE} DIRECTORY)
+
+ set(RECIPE_SOURCE_FILE "${RECIPE_PREFIX}.recipe")
+ set(RECIPE_OUTPUT_FILE "${RECIPE_PREFIX}.tflite")
+ set(CIRCLE_OUTPUT_FILE "${RECIPE_PREFIX}.circle")
+
+ # Copy .recipe
+ add_custom_command(OUTPUT "${RECIPE_SOURCE_FILE}"
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different
+ "${TENSORFLOWLITERECIPES_DIR}/${RECIPE}" "${RECIPE_SOURCE_FILE}"
+ DEPENDS "${TENSORFLOWLITERECIPES_DIR}/${RECIPE}"
+ COMMENT "Generating ${RECIPE_SOURCE_FILE}")
+
+ # Generate .tflite
+ add_custom_command(OUTPUT "${RECIPE_OUTPUT_FILE}"
+ COMMAND tflchef-file "${RECIPE_SOURCE_FILE}" "${RECIPE_OUTPUT_FILE}"
+ DEPENDS tflchef-file "${RECIPE_SOURCE_FILE}"
+ COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
+
+ # Generate .circle
+ add_custom_command(OUTPUT "${CIRCLE_OUTPUT_FILE}"
+ COMMAND tflite2circle "${RECIPE_OUTPUT_FILE}" "${CIRCLE_OUTPUT_FILE}"
+ DEPENDS tflite2circle "${RECIPE_OUTPUT_FILE}"
+ COMMENT "Generating ${CIRCLE_OUTPUT_FILE}")
+
+ list(APPEND TESTFILES "${CIRCLE_OUTPUT_FILE}")
+endforeach(RECIPE)
+
+# Add a dummy target to create a target-level dependency.
+# TODO Find a way to create dependency between CTest tests (added below) and generated testfiles.
+add_custom_target(luci_testfiles ALL DEPENDS ${TESTFILES})
+
+macro(addread NAME)
+ list(APPEND DAILY_READ_TESTS ${NAME})
+endmacro(addread)
+
+macro(addwrite NAME)
+ list(APPEND DAILY_WRITE_TESTS ${NAME})
+endmacro(addwrite)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+add_test(NAME luci_unit_readtest
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/readverify.sh"
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ "$<TARGET_FILE:luci_readtester>"
+ ${DAILY_READ_TESTS}
+)
+
+add_test(NAME luci_unit_writetest
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/writeverify.sh"
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ "$<TARGET_FILE:luci_writetester>"
+ ${DAILY_WRITE_TESTS}
+)
diff --git a/compiler/luci/tests/readverify.sh b/compiler/luci/tests/readverify.sh
new file mode 100755
index 000000000..3403e9c19
--- /dev/null
+++ b/compiler/luci/tests/readverify.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# This script verifies the basic behavior of luci frontend
+#
+# HOW TO USE
+#
+# ./readverify.sh <path/to/luci_readtester> <TEST 1> <TEST 2> ...
+VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+WORKDIR="$1"; shift
+VERIFY_BINARY_PATH="$1"; shift
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+for TESTCASE in "$@"; do
+ TESTED+=("${TESTCASE}")
+
+ TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+
+ PASSED_TAG="${TESTCASE_FILE}.passed"
+ rm -f "${PASSED_TAG}"
+
+ cat > "${TESTCASE_FILE}.log" <(
+ exec 2>&1
+ set -ex
+
+ "${VERIFY_BINARY_PATH}" "${TESTCASE_FILE}.circle"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("${TESTCASE}")
+ else
+ FAILED+=("${TESTCASE}")
+ fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/luci/tests/test.lst b/compiler/luci/tests/test.lst
new file mode 100644
index 000000000..08cbd6b1a
--- /dev/null
+++ b/compiler/luci/tests/test.lst
@@ -0,0 +1,91 @@
+addread(Add_000)
+addread(Add_U8_000)
+addread(ArgMax_000)
+addread(ArgMax_001)
+addread(ArgMax_002)
+addread(ArgMax_003)
+addread(ArgMax_U8_000)
+addread(ArgMax_U8_001)
+addread(ArgMax_U8_002)
+addread(ArgMax_U8_003)
+addread(BatchToSpaceND_000)
+addread(Concatenation_000)
+addread(Concatenation_U8_000)
+addread(Conv2D_000)
+addread(Conv2D_U8_000)
+addread(Conv2D_002)
+addread(Cos_000)
+addread(DepthwiseConv2D_000)
+addread(DepthwiseConv2D_U8_000)
+addread(Div_000)
+addread(Equal_000)
+addread(Exp_000)
+addread(FullyConnected_000)
+addread(FullyConnected_001)
+addread(FullyConnected_U8_000)
+addread(LogicalNot_000)
+addread(LogicalOr_000)
+addread(MaxPool2D_000)
+addread(MaxPool2D_U8_000)
+addread(Mean_000)
+addread(Mul_000)
+addread(Mul_U8_000)
+addread(Pack_000)
+addread(Pack_U8_000)
+addread(Pad_000)
+addread(ReLU_000)
+addread(Reshape_000)
+addread(Reshape_001)
+addread(Reshape_U8_000)
+addread(Rsqrt_000)
+addread(Softmax_000)
+addread(Softmax_U8_000)
+addread(Sub_000)
+addread(Sub_U8_000)
+addread(Transpose_000)
+
+addwrite(Add_000)
+addwrite(Add_U8_000)
+addwrite(ArgMax_000)
+addwrite(ArgMax_001)
+addwrite(ArgMax_002)
+addwrite(ArgMax_003)
+addwrite(ArgMax_U8_000)
+addwrite(ArgMax_U8_001)
+addwrite(ArgMax_U8_002)
+addwrite(ArgMax_U8_003)
+addwrite(BatchToSpaceND_000)
+addwrite(Concatenation_000)
+addwrite(Concatenation_U8_000)
+addwrite(Conv2D_000)
+addwrite(Conv2D_U8_000)
+addwrite(Conv2D_002)
+addwrite(Cos_000)
+addwrite(DepthwiseConv2D_000)
+addwrite(DepthwiseConv2D_U8_000)
+addwrite(Div_000)
+addwrite(Equal_000)
+addwrite(Exp_000)
+addwrite(FullyConnected_000)
+addwrite(FullyConnected_001)
+addwrite(FullyConnected_U8_000)
+addwrite(LogicalNot_000)
+addwrite(LogicalOr_000)
+addwrite(MaxPool2D_000)
+addwrite(MaxPool2D_U8_000)
+addwrite(Mean_000)
+addwrite(Mul_000)
+addwrite(Mul_U8_000)
+addwrite(Pack_000)
+addwrite(Pack_U8_000)
+addwrite(Pad_000)
+addwrite(ReLU_000)
+addwrite(Reshape_000)
+addwrite(Reshape_001)
+addwrite(Reshape_U8_000)
+addwrite(Rsqrt_000)
+addwrite(Softmax_000)
+addwrite(Softmax_U8_000)
+addwrite(Sub_000)
+addwrite(Sub_U8_000)
+addwrite(Transpose_000)
diff --git a/compiler/luci/tests/writeverify.sh b/compiler/luci/tests/writeverify.sh
new file mode 100755
index 000000000..6980bac44
--- /dev/null
+++ b/compiler/luci/tests/writeverify.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# This script verifies the basic behavior of luci frontend
+#
+# HOW TO USE
+#
+# ./writeverify.sh <path/to/luci_writetester> <TEST 1> <TEST 2> ...
+VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+WORKDIR="$1"; shift
+VERIFY_BINARY_PATH="$1"; shift
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+for TESTCASE in "$@"; do
+ TESTED+=("${TESTCASE}")
+
+ TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+
+ PASSED_TAG="${TESTCASE_FILE}_w.passed"
+ rm -f "${PASSED_TAG}"
+
+ cat > "${TESTCASE_FILE}_w.log" <(
+ exec 2>&1
+ set -ex
+
+ "${VERIFY_BINARY_PATH}" "${TESTCASE_FILE}.circle" "${TESTCASE_FILE}_w.circle"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("${TESTCASE}")
+ else
+ FAILED+=("${TESTCASE}")
+ fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/mio-circle/CMakeLists.txt b/compiler/mio-circle/CMakeLists.txt
new file mode 100644
index 000000000..f97ec2b99
--- /dev/null
+++ b/compiler/mio-circle/CMakeLists.txt
@@ -0,0 +1,28 @@
+nnas_find_package(FlatBuffers QUIET)
+
+if(NOT FlatBuffers_FOUND)
+ return()
+endif(NOT FlatBuffers_FOUND)
+
+message(STATUS "Build mio-circle: TRUE")
+
+# TODO Find a better way
+set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/nnpackage/schema/circle_schema.fbs")
+
+# NOTE Copy circle_schema.fbs as schema.fbs to generate "schema_generated.fbs" instead of "circle_schema_generated.fbs"
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
+ COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs
+ WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+ DEPENDS "${NNAS_PROJECT_SOURCE_DIR}/nnpackage/schema/circle_schema.fbs"
+)
+
+FlatBuffers_Target(mio_circle
+ OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/mio/circle"
+ INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+ SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}"
+ SCHEMA_FILES "schema.fbs"
+)
+
+# This example shows how to use "mio-circle" library
+add_executable(mio_circle_example example.cpp)
+target_link_libraries(mio_circle_example mio_circle)
diff --git a/compiler/mio-circle/README.md b/compiler/mio-circle/README.md
new file mode 100644
index 000000000..e90ec513f
--- /dev/null
+++ b/compiler/mio-circle/README.md
@@ -0,0 +1,3 @@
+# mio-circle
+
+Let's make it easy to read and write Circle models.
diff --git a/compiler/mio-circle/example.cpp b/compiler/mio-circle/example.cpp
new file mode 100644
index 000000000..6418e0411
--- /dev/null
+++ b/compiler/mio-circle/example.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// This example shows how to include and use "mio-circle"
+//
+#include <mio/circle/schema_generated.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+int main(int argc, char **argv)
+{
+ std::ifstream ifs(argv[1], std::ios_base::binary);
+ std::vector<char> buf(std::istreambuf_iterator<char>{ifs}, std::istreambuf_iterator<char>{});
+
+ flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(buf.data()), buf.size()};
+
+ if (!circle::VerifyModelBuffer(verifier))
+ {
+ std::cout << "Fail" << std::endl;
+ return 255;
+ }
+
+ std::cout << "Pass" << std::endl;
+ return 0;
+}
diff --git a/compiler/mio-tf/CMakeLists.txt b/compiler/mio-tf/CMakeLists.txt
new file mode 100644
index 000000000..d670f6bab
--- /dev/null
+++ b/compiler/mio-tf/CMakeLists.txt
@@ -0,0 +1,48 @@
+nnas_find_package(Protobuf QUIET)
+# TensorFlowSource package is used to use ~.proto files
+nnas_find_package(TensorFlowSource EXACT 1.12 QUIET)
+
+if(NOT Protobuf_FOUND)
+ return()
+endif(NOT Protobuf_FOUND)
+
+if(NOT TensorFlowSource_FOUND)
+ return()
+endif(NOT TensorFlowSource_FOUND)
+
+message(STATUS "Build mio-tf: TRUE")
+
+# Minimal Protocol Buffer specification for GraphDef file (.pb) encoding/decoding
+unset(PROTO_FILES)
+list(APPEND PROTO_FILES tensorflow/core/framework/versions.proto)
+list(APPEND PROTO_FILES tensorflow/core/framework/resource_handle.proto)
+list(APPEND PROTO_FILES tensorflow/core/framework/types.proto)
+list(APPEND PROTO_FILES tensorflow/core/framework/tensor.proto)
+list(APPEND PROTO_FILES tensorflow/core/framework/tensor_shape.proto)
+list(APPEND PROTO_FILES tensorflow/core/framework/attr_value.proto)
+list(APPEND PROTO_FILES tensorflow/core/framework/op_def.proto)
+list(APPEND PROTO_FILES tensorflow/core/framework/node_def.proto)
+list(APPEND PROTO_FILES tensorflow/core/framework/function.proto)
+list(APPEND PROTO_FILES tensorflow/core/framework/graph.proto)
+
+Protobuf_Generate(GRAPHDEF_PROTO
+ "${CMAKE_CURRENT_BINARY_DIR}/generated"
+ "${TensorFlowSource_DIR}"
+ ${PROTO_FILES})
+
+add_library(mio_tf STATIC ${GRAPHDEF_PROTO_SOURCES})
+set_target_properties(mio_tf PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(mio_tf PUBLIC ${GRAPHDEF_PROTO_INCLUDE_DIRS})
+target_link_libraries(mio_tf PUBLIC ${GRAPHDEF_PROTO_LIBRARIES})
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+
+GTest_AddTest(mio_tf_test ${TESTS})
+target_include_directories(mio_tf_test PRIVATE src)
+target_link_libraries(mio_tf_test mio_tf)
diff --git a/compiler/mio-tf/README.md b/compiler/mio-tf/README.md
new file mode 100644
index 000000000..18f475a85
--- /dev/null
+++ b/compiler/mio-tf/README.md
@@ -0,0 +1,3 @@
+# mio-tf
+
+_mio-tf_ provides a library to access TensorFlow model files
diff --git a/compiler/mio-tf/src/mio_tf.test.cpp b/compiler/mio-tf/src/mio_tf.test.cpp
new file mode 100644
index 000000000..013dc2d54
--- /dev/null
+++ b/compiler/mio-tf/src/mio_tf.test.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <tensorflow/core/framework/graph.pb.h>
+
+#include <gtest/gtest.h>
+
+TEST(MIO_TF_Test, instance)
+{
+ tensorflow::GraphDef gd;
+ tensorflow::NodeDef nd;
+
+ SUCCEED();
+}
diff --git a/compiler/mio-tflite/CMakeLists.txt b/compiler/mio-tflite/CMakeLists.txt
new file mode 100644
index 000000000..cb0795a08
--- /dev/null
+++ b/compiler/mio-tflite/CMakeLists.txt
@@ -0,0 +1,37 @@
+nnas_find_package(FlatBuffers QUIET)
+
+if(NOT FlatBuffers_FOUND)
+ return()
+endif(NOT FlatBuffers_FOUND)
+
+nnas_find_package(TensorFlowSource EXACT 2.1.0 QUIET)
+
+if(NOT TensorFlowSource_FOUND)
+ return()
+endif(NOT TensorFlowSource_FOUND)
+
+message(STATUS "Build mio-tflite: TRUE")
+
+set(SCHEMA_FILE "${TensorFlowSource_DIR}/tensorflow/lite/schema/schema.fbs")
+
+# NOTE Use copy of schema.fbs as to provide unified way for circle also
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
+ COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs
+ WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+ DEPENDS "${SCHEMA_FILE}"
+)
+
+FlatBuffers_Target(mio_tflite
+ OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/mio/tflite"
+ INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+ SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}"
+ SCHEMA_FILES "schema.fbs"
+)
+
+add_executable(mio_tflite_example example.cpp)
+target_link_libraries(mio_tflite_example mio_tflite)
+
+# Temporay tflite validation tool to replace nnkit-tflite
+# TODO provide full tflite validation with runtime/interpreter
+add_executable(mio_tflite_validate example.cpp)
+target_link_libraries(mio_tflite_validate mio_tflite)
diff --git a/compiler/mio-tflite/README.md b/compiler/mio-tflite/README.md
new file mode 100644
index 000000000..187b1a5c6
--- /dev/null
+++ b/compiler/mio-tflite/README.md
@@ -0,0 +1,3 @@
+# mio-tflite
+
+_mio-tflite_ provides a library to access TensorFlow lite model files
diff --git a/compiler/mio-tflite/example.cpp b/compiler/mio-tflite/example.cpp
new file mode 100644
index 000000000..54d15103c
--- /dev/null
+++ b/compiler/mio-tflite/example.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// This example shows how to include and use "mio-tflite"
+//
+#include <mio/tflite/schema_generated.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+int main(int argc, char **argv)
+{
+ std::ifstream ifs(argv[1], std::ios_base::binary);
+ std::vector<char> buf(std::istreambuf_iterator<char>{ifs}, std::istreambuf_iterator<char>{});
+
+ flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(buf.data()), buf.size()};
+
+ if (!tflite::VerifyModelBuffer(verifier))
+ {
+ std::cout << "Fail" << std::endl;
+ return 255;
+ }
+
+ std::cout << "Pass" << std::endl;
+ return 0;
+}
diff --git a/compiler/mir-caffe-importer/CMakeLists.txt b/compiler/mir-caffe-importer/CMakeLists.txt
new file mode 100644
index 000000000..83176510e
--- /dev/null
+++ b/compiler/mir-caffe-importer/CMakeLists.txt
@@ -0,0 +1,17 @@
+nnas_find_package(CaffeProto QUIET)
+
+if (NOT CaffeProto_FOUND)
+ return()
+endif ()
+
+set(MIR_CAFFE_IMPORTER_SOURCES
+ caffe_importer.cpp
+ caffe_importer.h
+ caffe_op_creator.cpp
+ caffe_op_creator.h
+ caffe_op_types.h)
+
+add_library(mir_caffe_importer STATIC ${MIR_CAFFE_IMPORTER_SOURCES})
+set_target_properties(mir_caffe_importer PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(mir_caffe_importer PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+target_link_libraries(mir_caffe_importer PUBLIC mir caffeproto PRIVATE stdex)
diff --git a/compiler/mir-caffe-importer/caffe_importer.cpp b/compiler/mir-caffe-importer/caffe_importer.cpp
new file mode 100644
index 000000000..8e5ebda15
--- /dev/null
+++ b/compiler/mir-caffe-importer/caffe_importer.cpp
@@ -0,0 +1,439 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "caffe_importer.h"
+#include "caffe/proto/caffe.pb.h"
+#include "caffe_op_creator.h"
+#include "caffe_op_types.h"
+
+#include "mir/ops/OutputOp.h"
+
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/text_format.h>
+
+#include <fcntl.h>
+
+#include <cassert>
+#include <cerrno>
+#include <cstring>
+#include <stdex/Memory.h>
+#include <stdexcept>
+#include <utility>
+#include <vector>
+#include <set>
+
+namespace mir_caffe
+{
+
+namespace
+{
+
+class CaffeImporter
+{
+public:
+ /// @brief Load the model and convert it into a MIR Graph.
+ std::unique_ptr<mir::Graph> importModelFromBinaryFile(const std::string &filename);
+ std::unique_ptr<mir::Graph> importModelFromTextFile(const std::string &filename);
+
+private:
+ std::unique_ptr<mir::Graph> importModel();
+
+ std::unique_ptr<caffe::NetParameter> _net;
+ std::unique_ptr<CaffeOpCreator> _opCreator;
+
+ // Maps Caffe blob names to corresponding MIR operation outputs.
+ std::map<std::string, mir::Operation::Output *> _blobNameToOpOutput;
+
+ static const std::map<std::string, CaffeOpType> _operatorTypes;
+
+ /**
+ * @brief Mark output MIR nodes
+ */
+ void setGraphOutputs(mir::Graph *graph);
+
+ /**
+ * @brief Pass through caffe graph and collect unsupported by NNC layers
+ * @throw PassException with message, containing detected problems
+ */
+ void collectUnsupportedLayers();
+
+ /**
+ * @brief Create MIR node from single caffe layer
+ */
+ void createMIRNodesFromLayer(const caffe::LayerParameter &layer);
+
+ mir::Operation::Output *getOutputForBlob(const std::string &blob_name) const;
+ void setOutputForBlob(const std::string &blob_name, mir::Operation::Output *output);
+
+ /**
+ * @brief Collect unsupported parts of caffe layer
+ */
+ void collectUnsupportedOp(const caffe::LayerParameter &layer, std::set<std::string> &problems);
+
+ /**
+ * @brief Returns MIR operation outputs corresponding to the inputs of the given layer.
+ */
+ std::vector<mir::Operation::Output *> getMIRInputsForLayer(const caffe::LayerParameter &layer);
+
+ void processDeprecatedInput();
+};
+
+void loadModelFromBinaryFile(const std::string &filename, caffe::NetParameter *net)
+{
+ GOOGLE_PROTOBUF_VERIFY_VERSION;
+
+ int file_handle = open(filename.c_str(), O_RDONLY);
+
+ if (file_handle == -1)
+ throw std::runtime_error("Couldn't open file \"" + filename + "\": " + std::strerror(errno) +
+ ".");
+
+ google::protobuf::io::FileInputStream file_stream(file_handle);
+ file_stream.SetCloseOnDelete(true);
+
+ google::protobuf::io::CodedInputStream coded_stream(&file_stream);
+ coded_stream.SetTotalBytesLimit(INT_MAX, INT_MAX);
+
+ if (!net->ParseFromCodedStream(&coded_stream))
+ throw std::runtime_error("Couldn't parse file \"" + filename + "\".");
+
+ // If the file has not been consumed entirely, assume that the file is in the wrong format.
+ if (!coded_stream.ConsumedEntireMessage())
+ throw std::runtime_error("File \"" + filename + "\" has not been consumed entirely.");
+}
+
+void loadModelFromTextFile(const std::string &filename, caffe::NetParameter *net)
+{
+ GOOGLE_PROTOBUF_VERIFY_VERSION;
+
+ int file_handle = open(filename.c_str(), O_RDONLY);
+
+ if (file_handle == -1)
+ throw std::runtime_error("Couldn't open file \"" + filename + "\": " + std::strerror(errno) +
+ ".");
+
+ google::protobuf::io::FileInputStream file_stream(file_handle);
+ file_stream.SetCloseOnDelete(true);
+
+ if (!google::protobuf::TextFormat::Parse(&file_stream, net))
+ throw std::runtime_error("Couldn't parse file \"" + filename + "\".");
+}
+
+std::unique_ptr<mir::Graph> CaffeImporter::importModel()
+{
+ auto graph = stdex::make_unique<mir::Graph>();
+ _opCreator = stdex::make_unique<CaffeOpCreator>(graph.get());
+
+ collectUnsupportedLayers();
+
+ for (int i = 0; i < _net->layer_size(); ++i)
+ createMIRNodesFromLayer(_net->layer(i));
+
+ setGraphOutputs(graph.get());
+
+ return std::move(graph);
+}
+
+std::unique_ptr<mir::Graph> CaffeImporter::importModelFromBinaryFile(const std::string &filename)
+{
+ _net = stdex::make_unique<caffe::NetParameter>();
+ loadModelFromBinaryFile(filename, _net.get());
+
+ return importModel();
+}
+
+std::unique_ptr<mir::Graph> CaffeImporter::importModelFromTextFile(const std::string &filename)
+{
+ _net = stdex::make_unique<caffe::NetParameter>();
+ loadModelFromTextFile(filename, _net.get());
+
+ return importModel();
+}
+
+void CaffeImporter::collectUnsupportedLayers()
+{
+ processDeprecatedInput();
+
+ std::set<std::string> problems;
+
+ for (const caffe::LayerParameter &layer : _net->layer())
+ collectUnsupportedOp(layer, problems);
+
+ if (!problems.empty())
+ {
+ std::string msg("NNC can't load model. Detected problems:");
+ for (const auto &problemStr : problems)
+ msg.append("\n * " + problemStr);
+ throw std::runtime_error(msg);
+ }
+}
+
+void CaffeImporter::createMIRNodesFromLayer(const caffe::LayerParameter &layer)
+{
+ std::vector<mir::Operation::Output *> inputs = getMIRInputsForLayer(layer);
+ std::vector<mir::Operation::Output *> outputs;
+
+ switch (_operatorTypes.at(layer.type()))
+ {
+ case CaffeOpType::input:
+ outputs = _opCreator->convertInput(layer);
+ break;
+ case CaffeOpType::convolution:
+ outputs = _opCreator->convertConvolution(layer, inputs);
+ break;
+ case CaffeOpType::innerProduct:
+ outputs = _opCreator->convertInnerProduct(layer, inputs);
+ break;
+ case CaffeOpType::pooling:
+ outputs = _opCreator->convertPooling(layer, inputs);
+ break;
+ case CaffeOpType::concat:
+ outputs = _opCreator->convertConcat(layer, inputs);
+ break;
+ case CaffeOpType::reshape:
+ outputs = _opCreator->convertReshape(layer, inputs);
+ break;
+ case CaffeOpType::ReLU:
+ outputs = _opCreator->convertReLU(layer, inputs);
+ break;
+ case CaffeOpType::softmax:
+ outputs = _opCreator->convertSoftmax(layer, inputs);
+ break;
+ case CaffeOpType::scale:
+ outputs = _opCreator->convertScale(layer, inputs);
+ break;
+ case CaffeOpType::batchNorm:
+ outputs = _opCreator->convertBatchNorm(layer, inputs);
+ break;
+ case CaffeOpType::dropout:
+ outputs = _opCreator->convertDropout(layer, inputs);
+ break;
+ case CaffeOpType::tanh:
+ outputs = _opCreator->convertTanH(layer, inputs);
+ break;
+ case CaffeOpType::ELU:
+ outputs = _opCreator->convertELU(layer, inputs);
+ break;
+ case CaffeOpType::eltwise:
+ outputs = _opCreator->convertEltwise(layer, inputs);
+ break;
+ case CaffeOpType::embed:
+ outputs = _opCreator->convertEmbed(layer, inputs);
+ break;
+ case CaffeOpType::deconvolution:
+ outputs = _opCreator->convertDeconvolution(layer, inputs);
+ break;
+ case CaffeOpType::split:
+ outputs = _opCreator->convertSplit(layer, inputs);
+ break;
+ case CaffeOpType::sigmoid:
+ outputs = _opCreator->convertSigmoid(layer, inputs);
+ break;
+ case CaffeOpType::LSTM:
+ outputs = _opCreator->convertLSTM(layer, inputs);
+ break;
+ default:
+ assert(false && "All unsupported types should have been found before this pass.");
+ }
+
+ assert(static_cast<int>(outputs.size()) == layer.top_size() && "Number of outputs differs.");
+ for (int i = 0; i < layer.top_size(); ++i)
+ setOutputForBlob(layer.top(i), outputs[i]);
+}
+
+void CaffeImporter::collectUnsupportedOp(const caffe::LayerParameter &layer,
+ std::set<std::string> &problems)
+{
+ auto it = _operatorTypes.find(layer.type());
+ if (it == _operatorTypes.end())
+ {
+ problems.insert(layer.type() + ": unknown layer");
+ return;
+ }
+
+ CaffeOpType op_type = it->second;
+
+ switch (op_type)
+ {
+ case CaffeOpType::concat:
+ case CaffeOpType::input:
+ case CaffeOpType::softmax:
+ case CaffeOpType::scale:
+ case CaffeOpType::dropout:
+ case CaffeOpType::split:
+ case CaffeOpType::eltwise:
+ case CaffeOpType::ELU:
+ case CaffeOpType::ReLU:
+ case CaffeOpType::embed:
+ case CaffeOpType::sigmoid:
+ case CaffeOpType::tanh:
+ case CaffeOpType::innerProduct:
+ // No checks
+ break;
+ case CaffeOpType::deconvolution:
+ case CaffeOpType::convolution:
+ _opCreator->checkConvolution(layer, problems);
+ break;
+ case CaffeOpType::pooling:
+ _opCreator->checkPooling(layer, problems);
+ break;
+ case CaffeOpType::reshape:
+ _opCreator->checkReshape(layer, problems);
+ break;
+ case CaffeOpType::batchNorm:
+ _opCreator->checkBatchNorm(layer, problems);
+ break;
+ case CaffeOpType::LSTM:
+ _opCreator->checkLSTM(layer, problems);
+ break;
+ default:
+ problems.insert(layer.type() + ": unsupported layer");
+ break;
+ }
+}
+
+void CaffeImporter::processDeprecatedInput()
+{
+ if (_net->input_dim_size() != 0 || _net->input_shape_size() != 0)
+ throw std::runtime_error("Deprecated Caffe input types are not supported");
+}
+
+std::vector<mir::Operation::Output *>
+CaffeImporter::getMIRInputsForLayer(const caffe::LayerParameter &layer)
+{
+ std::vector<mir::Operation::Output *> inputs;
+
+ for (const auto &input_name : layer.bottom())
+ inputs.push_back(getOutputForBlob(input_name));
+
+ return inputs;
+}
+
+mir::Operation::Output *CaffeImporter::getOutputForBlob(const std::string &blob_name) const
+{
+ return _blobNameToOpOutput.at(blob_name);
+}
+
+void CaffeImporter::setOutputForBlob(const std::string &blob_name, mir::Operation::Output *output)
+{
+ const auto it = _blobNameToOpOutput.find(blob_name);
+ if (it != _blobNameToOpOutput.cend())
+ {
+ // caffe input blob name could be same as output blob name, and next line will overwrite
+ // '_blobNameToOpOutput' element, but in all networks that I saw it was not a problem
+ it->second->setName("");
+ }
+
+ // Do not overwrite the name in case of fall-through layers (ex. Dropout, Split).
+ // TODO Find a way to handle it properly.
+ if (output->getName().empty())
+ output->setName(blob_name);
+
+ _blobNameToOpOutput[blob_name] = output;
+}
+
+void CaffeImporter::setGraphOutputs(mir::Graph *graph)
+{
+ // TODO For now, we assume that:
+ // - there is exactly one output;
+ // - the output is from the last layer.
+ const auto &last_layer = *_net->layer().rbegin();
+ auto output = getOutputForBlob(last_layer.top(0));
+ graph->create<mir::ops::OutputOp>(output);
+}
+
+const std::map<std::string, CaffeOpType> CaffeImporter::_operatorTypes = {
+ {"AbsVal", CaffeOpType::absVal},
+ {"Accuracy", CaffeOpType::accuracy},
+ {"ArgMax", CaffeOpType::argMax},
+ {"BatchNorm", CaffeOpType::batchNorm},
+ {"BatchReindex", CaffeOpType::batchReindex},
+ {"Bias", CaffeOpType::bias},
+ {"BNLL", CaffeOpType::BNLL},
+ {"Clip", CaffeOpType::clip},
+ {"Concat", CaffeOpType::concat},
+ {"ContrastiveLoss", CaffeOpType::contrastiveLoss},
+ {"Convolution", CaffeOpType::convolution},
+ {"Crop", CaffeOpType::crop},
+ {"Data", CaffeOpType::data},
+ {"Deconvolution", CaffeOpType::deconvolution},
+ {"Dropout", CaffeOpType::dropout},
+ {"DummyData", CaffeOpType::dummyData},
+ {"Eltwise", CaffeOpType::eltwise},
+ {"ELU", CaffeOpType::ELU},
+ {"Embed", CaffeOpType::embed},
+ {"EuclidianLoss", CaffeOpType::euclidianLoss},
+ {"Exp", CaffeOpType::exp},
+ {"Filter", CaffeOpType::filter},
+ {"Flatten", CaffeOpType::flatten},
+ {"HDF5Data", CaffeOpType::HDF5Data},
+ {"HDF5Output", CaffeOpType::HDF5Output},
+ {"HingeLoss", CaffeOpType::hingeLoss},
+ {"Im2Col", CaffeOpType::im2Col},
+ {"ImageData", CaffeOpType::imageData},
+ {"InfogainLoss", CaffeOpType::infogainLoss},
+ {"InnerProduct", CaffeOpType::innerProduct},
+ {"Input", CaffeOpType::input},
+ {"Log", CaffeOpType::log},
+ {"LRN", CaffeOpType::LRN},
+ {"LSTM", CaffeOpType::LSTM},
+ {"MemoryData", CaffeOpType::memoryData},
+ {"MultinomialLogisticLoss", CaffeOpType::multinomialLogisticLoss},
+ {"MVN", CaffeOpType::MVN},
+ {"Parameter", CaffeOpType::parameter},
+ {"Pooling", CaffeOpType::pooling},
+ {"Power", CaffeOpType::power},
+ {"PReLU", CaffeOpType::PReLU},
+ {"Python", CaffeOpType::python},
+ {"Recurrent", CaffeOpType::recurrent},
+ {"Reduction", CaffeOpType::reduction},
+ {"ReLU", CaffeOpType::ReLU},
+ {"Reshape", CaffeOpType::reshape},
+ {"RNN", CaffeOpType::RNN},
+ {"Scale", CaffeOpType::scale},
+ {"SigmoidCrossEntropyLoss", CaffeOpType::sigmoidCrossEntropyLoss},
+ {"Sigmoid", CaffeOpType::sigmoid},
+ {"Silence", CaffeOpType::silence},
+ {"Softmax", CaffeOpType::softmax},
+ {"SoftmaxWithLoss", CaffeOpType::softmaxWithLoss},
+ {"SPP", CaffeOpType::SPP},
+ {"Split", CaffeOpType::split},
+ {"Slice", CaffeOpType::slice},
+ {"TanH", CaffeOpType::tanh},
+ {"Threshold", CaffeOpType::threshold},
+ {"Tile", CaffeOpType::tile},
+ {"WindowData", CaffeOpType::windowData}};
+} // namespace
+
+std::unique_ptr<mir::Graph> importModelFromBinaryFile(const std::string &filename)
+{
+ CaffeImporter importer;
+ return importer.importModelFromBinaryFile(filename);
+}
+
+std::unique_ptr<mir::Graph> importModelFromTextFile(const std::string &filename)
+{
+ CaffeImporter importer;
+ return importer.importModelFromTextFile(filename);
+}
+
+std::unique_ptr<mir::Graph> loadModel(const std::string &filename)
+{
+ return importModelFromBinaryFile(filename);
+}
+
+} // namespace mir_caffe
diff --git a/compiler/mir-caffe-importer/caffe_importer.h b/compiler/mir-caffe-importer/caffe_importer.h
new file mode 100644
index 000000000..cf2c055bc
--- /dev/null
+++ b/compiler/mir-caffe-importer/caffe_importer.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_CAFFE_IMPORTER_H
+#define MIR_CAFFE_IMPORTER_H
+
+#include <string>
+#include <memory>
+
+#include "mir/Graph.h"
+
+namespace mir_caffe
+{
+
+std::unique_ptr<mir::Graph> importModelFromBinaryFile(const std::string &filename);
+std::unique_ptr<mir::Graph> importModelFromTextFile(const std::string &filename);
+// TODO Remove after changing all uses.
+std::unique_ptr<mir::Graph> loadModel(const std::string &filename);
+
+} // namespace mir_caffe
+
+#endif // MIR_CAFFE_IMPORTER_H
diff --git a/compiler/mir-caffe-importer/caffe_op_creator.cpp b/compiler/mir-caffe-importer/caffe_op_creator.cpp
new file mode 100644
index 000000000..5d43d248e
--- /dev/null
+++ b/compiler/mir-caffe-importer/caffe_op_creator.cpp
@@ -0,0 +1,834 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "caffe_op_creator.h"
+
+#include "mir/ops/AddOp.h"
+#include "mir/ops/AvgPool2DOp.h"
+#include "mir/ops/ConcatOp.h"
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/Conv2DOp.h"
+#include "mir/ops/Deconv2DOp.h"
+#include "mir/ops/EluOp.h"
+#include "mir/ops/FullyConnectedOp.h"
+#include "mir/ops/GatherOp.h"
+#include "mir/ops/LeakyReluOp.h"
+#include "mir/ops/MaxOp.h"
+#include "mir/ops/MaxPool2DOp.h"
+#include "mir/ops/MulOp.h"
+#include "mir/ops/ReluOp.h"
+#include "mir/ops/ReshapeOp.h"
+#include "mir/ops/SigmoidOp.h"
+#include "mir/ops/SliceOp.h"
+#include "mir/ops/SoftmaxOp.h"
+#include "mir/ops/TanhOp.h"
+#include "mir/ops/TransposeOp.h"
+#include "mir/Index.h"
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+
+#include <cmath>
+#include <iostream>
+#include <set>
+
+namespace mir_caffe
+{
+
+static mir::Shape convertBlobShape(const caffe::BlobShape &shape)
+{
+ mir::Shape mir_shape(shape.dim_size());
+
+ for (int i = 0; i < shape.dim_size(); ++i)
+ {
+ mir_shape.dim(i) = shape.dim(i);
+ }
+
+ return mir_shape;
+}
+
+using namespace mir;
+
+/// @brief Split arg into @p num_parts equal parts along @p axis axis.
+std::vector<mir::Operation::Output *> CaffeOpCreator::createSplit(mir::Operation::Output *arg,
+ int32_t num_parts, int32_t axis)
+{
+ const auto &arg_shape = arg->getShape();
+
+ assert(axis >= 0 && axis < arg_shape.rank());
+ int32_t part_size = arg_shape.dim(axis) / num_parts;
+ assert(part_size * num_parts == arg_shape.dim(axis));
+
+ Shape starts(arg_shape.rank());
+ Shape sizes(arg_shape);
+ sizes.dim(axis) = part_size;
+
+ std::vector<mir::Operation::Output *> outputs(num_parts);
+ for (int32_t i = 0; i < num_parts; ++i)
+ {
+ outputs[i] = createOp<ops::SliceOp>(arg, starts, sizes)->getOutput(0);
+ starts.dim(axis) += part_size;
+ }
+
+ return outputs;
+}
+
+/// @brief Helper function for creating FullyConnected operation with non-square input.
+mir::Operation::Output *CaffeOpCreator::createFullyConnected(mir::Operation::Output *input,
+ mir::Operation::Output *weights,
+ int32_t axis)
+{
+ const auto &input_shape = input->getShape();
+ const auto &weights_shape = weights->getShape();
+
+ assert(axis >= 0 && axis < input_shape.rank());
+ assert(weights_shape.rank() == 2);
+
+ // Result shape is: input.shape[0:axis] + weights.shape[1].
+ Shape result_shape = input_shape;
+ result_shape.resize(axis + 1);
+ result_shape.dim(axis) = weights_shape.dim(1);
+
+ // Flatten input to 2-D shape.
+ int32_t outer_size = 1;
+ for (int32_t i = 0; i < axis; ++i)
+ outer_size *= input_shape.dim(i);
+ int32_t inner_size = 1;
+ for (int32_t i = axis; i < input_shape.rank(); ++i)
+ inner_size *= input_shape.dim(i);
+
+ auto flatten = createOp<ops::ReshapeOp>(input, Shape{outer_size, inner_size})->getOutput(0);
+ auto fc = createOp<ops::FullyConnectedOp>(flatten, weights)->getOutput(0);
+ return createOp<ops::ReshapeOp>(fc, result_shape)->getOutput(0);
+}
+
+TensorVariant CaffeOpCreator::convertBlob(const caffe::BlobProto &blob)
+{
+ const void *src_data;
+
+ mir::DataType dtype;
+ if (blob.data_size() != 0)
+ {
+ assert(blob.double_data_size() == 0);
+ dtype = mir::DataType::FLOAT32;
+ src_data = blob.data().data();
+ }
+ else if (blob.double_data_size() != 0)
+ {
+ dtype = mir::DataType::FLOAT64;
+ src_data = blob.double_data().data();
+ }
+ else
+ {
+ throw std::runtime_error("No data in Caffe BlobProto, investigate");
+ }
+
+ const mir::Shape shape = convertBlobShape(blob.shape());
+ return TensorVariant({dtype, shape}, src_data);
+}
+
+std::vector<mir::Operation::Output *>
+CaffeOpCreator::convertInput(const caffe::LayerParameter &layer)
+{
+ const auto &params = layer.input_param();
+ const auto num_inputs = layer.top_size();
+ const auto num_shapes = params.shape_size();
+ std::vector<mir::Operation::Output *> outputs;
+
+ assert((num_shapes == 1 || num_shapes == num_inputs) && "Unsupported number of shapes.");
+
+ for (int i = 0; i < num_inputs; ++i)
+ {
+ const auto &blob_shape = params.shape(num_shapes == 1 ? 0 : i);
+ mir::TensorType input_type(DataType::FLOAT32, convertBlobShape(blob_shape));
+ auto input = createOp<ops::InputOp>(input_type)->getOutput(0);
+ outputs.push_back(input);
+ }
+
+ return outputs;
+}
+
+template <class OperationAttributes>
+static void convertConvolutionParam(const caffe::ConvolutionParameter &conv_param,
+ OperationAttributes &attributes)
+{
+ std::int32_t stride_h, stride_w;
+ if (conv_param.has_stride_h() || conv_param.has_stride_w())
+ {
+ // If stride_h or stride_w are set, they take precedence.
+ stride_h = conv_param.stride_h();
+ stride_w = conv_param.stride_w();
+ }
+ else if (conv_param.stride_size() == 0)
+ {
+ // If no strides specified, they defaults to 1.
+ stride_h = stride_w = 1;
+ }
+ else if (conv_param.stride_size() == 1)
+ {
+ // If only one stride specified, all strides take the same value.
+ stride_h = stride_w = conv_param.stride(0);
+ }
+ else
+ {
+ // Otherwise, there must be a stride for each dimension.
+ assert(conv_param.stride_size() == 2);
+ stride_h = conv_param.stride(0);
+ stride_w = conv_param.stride(1);
+ }
+ attributes.strides = {stride_h, stride_w};
+
+ std::int32_t pad_h, pad_w;
+ if (conv_param.has_pad_h() || conv_param.has_pad_w())
+ {
+ // If pad_h or pad_w are set, they take precedence.
+ pad_h = conv_param.pad_h();
+ pad_w = conv_param.pad_w();
+ }
+ else if (conv_param.pad_size() == 0)
+ {
+ // If no pads specified, they defaults to 0.
+ pad_h = pad_w = 0;
+ }
+ else if (conv_param.pad_size() == 1)
+ {
+ // If only one pad specified, all pads take the same value.
+ pad_h = pad_w = conv_param.pad(0);
+ }
+ else
+ {
+ // Otherwise, there must be a pad for each dimension.
+ assert(conv_param.pad_size() == 2);
+ pad_h = conv_param.pad(0);
+ pad_w = conv_param.pad(1);
+ }
+ attributes.padding_after = attributes.padding_before = {pad_h, pad_w};
+}
+
+void CaffeOpCreator::checkConvolution(const caffe::LayerParameter &layer,
+ std::set<std::string> &problems_ops_set)
+{
+ const caffe::ConvolutionParameter &params = layer.convolution_param();
+
+ assert(params.stride_size() <= 2);
+
+ if (params.axis() != 1)
+ problems_ops_set.insert("Conv2D: Unsupported axis");
+
+ if (params.pad_size() != 0 && (params.has_pad_h() || params.has_pad_w()))
+ problems_ops_set.insert("Conv2D: Conflicting padding properties");
+
+ if (params.pad_size() > 2)
+ problems_ops_set.insert("Conv2D: Unsupported number of pads");
+}
+
+std::vector<mir::Operation::Output *>
+CaffeOpCreator::convertConvolution(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ const auto &params = layer.convolution_param();
+ Conv2DOpAttributes attributes;
+
+ convertConvolutionParam(params, attributes);
+ attributes.num_groups = params.group();
+ attributes.data_format = DataFormat::NCHW;
+
+ assert(layer.blobs(0).shape().dim_size() == 4);
+ auto kernel = createOp<ops::ConstantOp>(convertBlob(layer.blobs(0)))->getOutput(0);
+ std::vector<std::size_t> perm{0, 2, 3, 1}; // OIHW -> OHWI
+ kernel = createOp<ops::TransposeOp>(kernel, perm)->getOutput(0);
+ auto result = createOp<ops::Conv2DOp>(inputs[0], kernel, attributes)->getOutput(0);
+
+ // Add the bias, if any.
+ if (params.bias_term())
+ {
+ auto bias = createOp<ops::ConstantOp>(convertBlob(layer.blobs(1)))->getOutput(0);
+ bias = createOp<ops::ReshapeOp>(bias, Shape{1, bias->getShape().dim(0), 1, 1})->getOutput(0);
+ result = createOp<ops::AddOp>(result, bias)->getOutput(0);
+ }
+
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+CaffeOpCreator::convertDeconvolution(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ const caffe::ConvolutionParameter &params = layer.convolution_param();
+ Deconv2DOpAttributes attributes;
+
+ convertConvolutionParam(params, attributes);
+ attributes.data_format = DataFormat::NCHW;
+
+ if (params.group() != 1)
+ {
+ throw std::runtime_error("Deconvolution: 'group' != 1 is not supported.");
+ }
+
+ auto kernel = createOp<ops::ConstantOp>(convertBlob(layer.blobs(0)))->getOutput(0);
+ std::vector<std::size_t> perm{2, 3, 1, 0}; // IOHW -> HWOI
+ kernel = createOp<ops::TransposeOp>(kernel, perm)->getOutput(0);
+ auto result = createOp<ops::DeConv2DOp>(inputs[0], kernel, attributes)->getOutput(0);
+
+ // bias_term is optional (so might not be present) and defaults to true
+ if (params.bias_term())
+ {
+ auto bias = createOp<ops::ConstantOp>(convertBlob(layer.blobs(1)))->getOutput(0);
+ bias = createOp<ops::ReshapeOp>(bias, Shape{1, bias->getShape().dim(0), 1, 1})->getOutput(0);
+ result = createOp<ops::AddOp>(result, bias)->getOutput(0);
+ }
+
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+CaffeOpCreator::convertInnerProduct(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ const auto &params = layer.inner_product_param();
+ auto weights = createOp<ops::ConstantOp>(convertBlob(layer.blobs(0)))->getOutput(0);
+
+ if (!params.transpose())
+ weights = createOp<ops::TransposeOp>(weights, std::vector<std::size_t>{1, 0})->getOutput(0);
+
+ auto result = createFullyConnected(inputs[0], weights, params.axis());
+
+ // Add the bias, if any.
+ if (params.bias_term())
+ {
+ auto bias = createOp<ops::ConstantOp>(convertBlob(layer.blobs(1)))->getOutput(0);
+ result = createOp<ops::AddOp>(result, bias)->getOutput(0);
+ }
+
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+CaffeOpCreator::convertConcat(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ const auto &params = layer.concat_param();
+ auto concat = createOp<ops::ConcatOp>(inputs, params.axis());
+ return {concat->getOutput(0)};
+}
+
+template <class PoolingAttributes>
+static void convertPoolingParam(const caffe::PoolingParameter &params,
+ const mir::Shape &input_shape, PoolingAttributes &attributes)
+{
+ std::int32_t kernel_h, kernel_w;
+ assert(!params.global_pooling());
+ if (params.has_kernel_size())
+ {
+ kernel_h = kernel_w = params.kernel_size();
+ }
+ else
+ {
+ kernel_h = params.kernel_h();
+ kernel_w = params.kernel_w();
+ }
+ attributes.window = {kernel_h, kernel_w};
+
+ std::int32_t stride_h, stride_w;
+ if (params.has_stride_h() || params.has_stride_w())
+ {
+ stride_h = params.stride_h();
+ stride_w = params.stride_w();
+ }
+ else
+ {
+ stride_h = stride_w = params.stride();
+ }
+ attributes.strides = {stride_h, stride_w};
+
+ std::int32_t pad_h, pad_w;
+ if (params.has_pad_h() || params.has_pad_w())
+ {
+ pad_h = params.pad_h();
+ pad_w = params.pad_w();
+ }
+ else
+ {
+ pad_h = pad_w = params.pad();
+ }
+
+ attributes.padding_before = attributes.padding_after = {pad_h, pad_w};
+
+ // Caffe uses different formula for computing output shape than MIR. Adjust padding so that
+ // the output shape stays the same.
+ constexpr int num_spatial_dims = 2;
+ for (int i = 0; i < num_spatial_dims; ++i)
+ {
+ // Assuming NCHW format.
+ const std::int32_t padded_input =
+ input_shape.dim(2 + i) + attributes.padding_before[i] + attributes.padding_after[i];
+ if ((padded_input - attributes.window[i]) % attributes.strides[i] != 0)
+ ++attributes.padding_after[i];
+ }
+}
+
+void CaffeOpCreator::checkPooling(const caffe::LayerParameter &layer,
+ std::set<std::string> &problems_ops_set)
+{
+ const caffe::PoolingParameter &params = layer.pooling_param();
+
+ if (params.has_global_pooling() && params.global_pooling())
+ problems_ops_set.insert("Pooling: pooling layer global_pooling param is not supported yet");
+
+ if (params.pool() != caffe::PoolingParameter::AVE &&
+ params.pool() != caffe::PoolingParameter::MAX)
+ problems_ops_set.insert("Pooling: unsupported pooling type");
+
+ if (params.has_pad() && (params.has_pad_h() || params.has_pad_w()))
+ problems_ops_set.insert("Pooling: conflicting padding properties in pooling");
+}
+
+std::vector<mir::Operation::Output *>
+CaffeOpCreator::convertPooling(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ const auto &params = layer.pooling_param();
+
+ assert(inputs.size() == 1);
+ auto input = inputs[0];
+
+ mir::Operation::Output *result;
+
+ switch (params.pool())
+ {
+ case caffe::PoolingParameter::AVE:
+ {
+ AvgPool2DOpAttributes attributes_avg;
+ attributes_avg.data_format = DataFormat::NCHW;
+ convertPoolingParam(params, input->getShape(), attributes_avg);
+ result = createOp<ops::AvgPool2DOp>(input, attributes_avg)->getOutput(0);
+ break;
+ }
+ case caffe::PoolingParameter::MAX:
+ {
+ MaxPool2DOpAttributes attributes_max;
+ attributes_max.data_format = DataFormat::NCHW;
+ convertPoolingParam(params, input->getShape(), attributes_max);
+ result = createOp<ops::MaxPool2DOp>(input, attributes_max)->getOutput(0);
+ break;
+ }
+ default:
+ assert(false);
+ }
+
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+CaffeOpCreator::convertSoftmax(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ const auto &params = layer.softmax_param();
+
+ // CPP and ACL backends are able to perform Softmax only along the last axis.
+ // FIXME Do it in backends.
+ if (inputs[0]->getShape().rank() == 4)
+ {
+ // For now, we only account for the most common case.
+ if (params.axis() != 1)
+ throw std::runtime_error("Softmax: unsupported axis");
+ int32_t axis = 3;
+ auto input = createOp<ops::TransposeOp>(inputs[0], std::vector<std::size_t>{0, 2, 3, 1});
+ auto softmax = createOp<ops::SoftmaxOp>(input->getOutput(0), axis);
+ auto result =
+ createOp<ops::TransposeOp>(softmax->getOutput(0), std::vector<std::size_t>{0, 3, 1, 2});
+ return {result->getOutput(0)};
+ }
+
+ auto softmax = createOp<ops::SoftmaxOp>(inputs[0], params.axis());
+ return {softmax->getOutput(0)};
+}
+
+void CaffeOpCreator::checkReshape(const caffe::LayerParameter &layer,
+ std::set<std::string> &problems_ops_set)
+{
+ const caffe::ReshapeParameter &params = layer.reshape_param();
+
+ if (params.has_axis() || params.has_num_axes())
+ problems_ops_set.insert("Reshape layer axis and num_axes params are not supported yet");
+
+ if (!params.has_shape())
+ problems_ops_set.insert("Reshape layer doesn't have shape parameter");
+
+ const mir::Shape newShape = convertBlobShape(params.shape());
+
+ for (int32_t i = 0; i < newShape.rank(); ++i)
+ if (newShape.dim(i) == 0)
+ problems_ops_set.insert("Reshape layer zero shape values are not supported yet");
+}
+
+/**
+ * @brief Converts Caffe Reshape layer to Model IR Reshape operation.
+ * @todo Support "axis" and "num_axes" parameters as needed.
+ * @todo Decide how to react to the absence of "shape" parameter.
+ * @todo Support zero values in "shape" parameter.
+ */
+std::vector<mir::Operation::Output *>
+CaffeOpCreator::convertReshape(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ const caffe::ReshapeParameter &params = layer.reshape_param();
+
+ const mir::Shape new_shape = convertBlobShape(params.shape());
+ auto reshape = createOp<ops::ReshapeOp>(inputs[0], new_shape);
+ return {reshape->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+CaffeOpCreator::convertReLU(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ mir::Operation *relu;
+ if (layer.relu_param().has_negative_slope())
+ {
+ float alpha = layer.relu_param().negative_slope();
+ relu = createOp<ops::LeakyReluOp>(inputs[0], alpha);
+ }
+ else
+ {
+ relu = createOp<ops::ReluOp>(inputs[0]);
+ }
+
+ return {relu->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+CaffeOpCreator::convertScale(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ const auto &params = layer.scale_param();
+ auto scale = createOp<ops::ConstantOp>(convertBlob(layer.blobs(0)))->getOutput(0);
+ scale = createOp<ops::ReshapeOp>(scale, Shape{1, scale->getShape().dim(0), 1, 1})->getOutput(0);
+ auto result = createOp<ops::MulOp>(inputs[0], scale)->getOutput(0);
+
+ // Add the bias, if any.
+ if (params.bias_term())
+ {
+ auto bias = createOp<ops::ConstantOp>(convertBlob(layer.blobs(1)))->getOutput(0);
+ bias = createOp<ops::ReshapeOp>(bias, Shape{1, bias->getShape().dim(0), 1, 1})->getOutput(0);
+ result = createOp<ops::AddOp>(result, bias)->getOutput(0);
+ }
+
+ return {result};
+}
+
+void CaffeOpCreator::checkBatchNorm(const caffe::LayerParameter &layer,
+ std::set<std::string> &problems_ops_set)
+{
+ const auto &scale_shape = layer.blobs(2).shape();
+
+ // Check that last blob(with scaleFactor) containing only one number
+ if (scale_shape.dim_size() != 1 || scale_shape.dim(0) != 1)
+ problems_ops_set.insert("Unexpected shape of scale parameter in batch norm");
+}
+
+std::vector<mir::Operation::Output *>
+CaffeOpCreator::convertBatchNorm(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ const caffe::BatchNormParameter &params = layer.batch_norm_param();
+
+ auto input = inputs[0];
+ auto mean_tensor = convertBlob(layer.blobs(0));
+ auto var_tensor = convertBlob(layer.blobs(1));
+ auto scale_tensor = convertBlob(layer.blobs(2));
+ const float eps = params.eps();
+
+ float scale_factor = *reinterpret_cast<float *>(scale_tensor.at(mir::Index{0}));
+
+ // See https://github.com/BVLC/caffe/blob/master/src/caffe/layers/batch_norm_layer.cpp#L100
+ // Y = (X - mean / scale_factor) / sqrt(var / scale_factor + epsilon) =
+ // = (X + C1) * C2
+ if (scale_factor != 0.0f)
+ scale_factor = 1.0f / scale_factor;
+
+ // C1 = -mean / scale_factor
+ Tensor<float> mean_accessor(mean_tensor);
+ for (const auto &idx : ShapeRange(mean_accessor.getShape()))
+ mean_accessor.at(idx) *= -scale_factor;
+ auto c1 = createOp<ops::ConstantOp>(mean_tensor)->getOutput(0);
+
+ // C2 = 1 / sqrt(var / scale_factor + epsilon)
+ Tensor<float> var_accessor(var_tensor);
+ for (const auto &idx : ShapeRange(var_accessor.getShape()))
+ var_accessor.at(idx) = 1.0f / std::sqrt(var_accessor.at(idx) * scale_factor + eps);
+ auto c2 = createOp<ops::ConstantOp>(var_tensor)->getOutput(0);
+
+ c1 = createOp<ops::ReshapeOp>(c1, Shape{1, c1->getShape().dim(0), 1, 1})->getOutput(0);
+ c2 = createOp<ops::ReshapeOp>(c2, Shape{1, c2->getShape().dim(0), 1, 1})->getOutput(0);
+
+ // Y = (X + C1) * C2
+ auto result = createOp<ops::AddOp>(input, c1)->getOutput(0);
+ result = createOp<ops::MulOp>(result, c2)->getOutput(0);
+
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+CaffeOpCreator::convertDropout(const caffe::LayerParameter &,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ // This is a no-op in inference mode.
+ return {inputs[0]};
+}
+
+std::vector<mir::Operation::Output *>
+CaffeOpCreator::convertELU(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ const caffe::ELUParameter &params = layer.elu_param();
+
+ auto elu = createOp<ops::EluOp>(inputs[0], params.alpha());
+ return {elu->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+CaffeOpCreator::convertEmbed(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ const auto &params = layer.embed_param();
+ auto data = createOp<ops::ConstantOp>(convertBlob(layer.blobs(0)));
+ auto result = createOp<ops::GatherOp>(data->getOutput(0), inputs[0], 0)->getOutput(0);
+
+ // Add the bias, if any.
+ if (params.bias_term())
+ {
+ auto bias = createOp<ops::ConstantOp>(convertBlob(layer.blobs(1)))->getOutput(0);
+ result = createOp<ops::AddOp>(result, bias)->getOutput(0);
+ }
+
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+CaffeOpCreator::convertSigmoid(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto result = createOp<ops::SigmoidOp>(inputs[0]);
+ return {result->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+CaffeOpCreator::convertTanH(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto tanh = createOp<ops::TanhOp>(inputs[0]);
+ return {tanh->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+CaffeOpCreator::convertEltwise(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto &params = layer.eltwise_param();
+
+ mir::Operation::Output *result;
+ switch (params.operation())
+ {
+ case caffe::EltwiseParameter::PROD:
+ {
+ result = createOp<ops::MulOp>(inputs[0], inputs[1])->getOutput(0);
+ for (int i = 2; i < layer.bottom_size(); ++i)
+ {
+ result = createOp<ops::MulOp>(result, inputs[i])->getOutput(0);
+ }
+ break;
+ }
+ case caffe::EltwiseParameter::SUM:
+ {
+ std::vector<mir::Operation::Output *> scaled_inputs = inputs;
+ if (params.coeff_size() > 0)
+ {
+ assert(params.coeff_size() == layer.bottom_size());
+ for (int i = 0; i < layer.bottom_size(); i++)
+ {
+ if (params.coeff(i) != 1.0f)
+ {
+ const float coeff_val = params.coeff(i);
+ TensorVariant coeff_tensor({DataType::FLOAT32, {}}, &coeff_val);
+ auto coeff_const = createOp<ops::ConstantOp>(coeff_tensor)->getOutput(0);
+ scaled_inputs[i] = createOp<ops::MulOp>(coeff_const, inputs[i])->getOutput(0);
+ }
+ }
+ }
+ result = createOp<ops::AddOp>(scaled_inputs[0], scaled_inputs[1])->getOutput(0);
+ for (int i = 2; i < layer.bottom_size(); ++i)
+ {
+ result = createOp<ops::AddOp>(result, scaled_inputs[i])->getOutput(0);
+ }
+ break;
+ }
+ case caffe::EltwiseParameter::MAX:
+ {
+ result = createOp<ops::MaxOp>(inputs[0], inputs[1])->getOutput(0);
+ for (int i = 2; i < layer.bottom_size(); ++i)
+ {
+ result = createOp<ops::MaxOp>(result, inputs[i])->getOutput(0);
+ }
+ break;
+ }
+ default:
+ throw std::runtime_error("Unknown element-wise operation.");
+ }
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+CaffeOpCreator::convertSplit(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ std::vector<mir::Operation::Output *> outputs(layer.top_size(), inputs.at(0));
+ return outputs;
+}
+
+void CaffeOpCreator::checkLSTM(const caffe::LayerParameter &layer,
+ std::set<std::string> &problems_ops_set)
+{
+ const auto &params = layer.recurrent_param();
+ if (params.expose_hidden())
+ problems_ops_set.insert("LSTM: parameter 'expose_hidden' has unsupported value: " +
+ std::to_string(params.expose_hidden()));
+}
+
+static TensorVariant createZeroedTensor(const mir::Shape &shape)
+{
+ // TODO For now it is hardcoded float32.
+ auto elem_type = mir::DataType::FLOAT32;
+ std::vector<float> zeros(static_cast<std::size_t>(shape.numElements()), 0.0f);
+ return TensorVariant({elem_type, shape}, zeros.data());
+}
+
+/* See the following links for details on implementation:
+ * https://github.com/BVLC/caffe/blob/master/src/caffe/layers/recurrent_layer.cpp
+ * https://github.com/BVLC/caffe/blob/master/src/caffe/layers/lstm_layer.cpp
+ * https://github.com/BVLC/caffe/blob/master/src/caffe/layers/lstm_unit_layer.cpp
+ *
+ * Inputs:
+ * x -- The time-varying input. Shape: [T, N, d0, d1, ..., dn].
+ * cont -- The sequence continuation indicators. Shape: [T, N].
+ * x_static -- The static (non-time-varying) input. Shape: [N, ...].
+ * This parameter is optional and not currently supported.
+ *
+ * Additional inputs when parameter "expose_hidden" is true (not currently supported):
+ * h_0 -- The initial value of the hidden state. Shape: [1, N, D].
+ * c_0 -- The initial value of the cell state. Shape: [1, N, D].
+ *
+ * Learned parameters:
+ * xw -- x weights for input, output, forget and cell gates concatenated.
+ * Shape: [4 * D, d0 * d1 * ... * dn].
+ * xb -- x biases for input, output, forget and cell gates concatenated. Shape: [4 * D].
+ * hw -- h weights for input, output, forget and cell gates concatenated. Shape: [4 * D, D].
+ *
+ * Outputs:
+ * h -- The time-varying output. Shape: [T, N, D].
+ *
+ * Additional outputs when parameter "expose_hidden" is true (not currently supported):
+ * h_T -- The value of the hidden state at the last timestep. Shape: [1, N, D].
+ * c_T -- The value of the cell state at the last timestep. Shape: [1, N, D].
+ *
+ * Here:
+ * T - the number of timesteps,
+ * N - the number of independent streams.
+ * D - the number of hidden parameters.
+ *
+ * Formulas:
+ * c_cont = c[t-1] * cont[t]
+ * h_cont = h[t-1] * cont[t]
+ * i[t] = Sigmoid(x[t] . xw_i + xb_i + h_cont . hw_i)
+ * f[t] = Sigmoid(x[t] . xw_f + xb_f + h_cont . hw_f)
+ * o[t] = Sigmoid(x[t] . xw_o + xb_o + h_cont . hw_o)
+ * g[t] = Tanh(x[t] . xw_g + xb_g + h_cont . hw_g)
+ * c[t] = c_cont * f[t] + i[t] * g[t]
+ * h[t] = o[t] * Tanh(c[t])
+ *
+ * Here:
+ * t -- the timestep (ranges from 1 to T),
+ * * -- the inner product,
+ * . -- the Hadamard product (elementwise product).
+ *
+ * In this implementation the inner products for all gates are performed as single inner product for
+ * efficiency.
+ */
+std::vector<mir::Operation::Output *>
+CaffeOpCreator::convertLSTM(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ const auto &params = layer.recurrent_param();
+
+ // Inputs to the layer.
+ auto x = inputs[0];
+ auto cont = inputs[1];
+ assert(inputs.size() == 2);
+
+ const auto &x_shape = x->getShape();
+ const int32_t seq_length = x_shape.dim(0);
+ const int32_t batch_size = x_shape.dim(1);
+ const int32_t hidden_size = params.num_output();
+
+ // Learned parameters of the layer. Tensors are transposed to match the ModelIR.
+ auto xw = createOp<ops::ConstantOp>(convertBlob(layer.blobs(0)))->getOutput(0);
+ auto xb = createOp<ops::ConstantOp>(convertBlob(layer.blobs(1)))->getOutput(0);
+ auto hw = createOp<ops::ConstantOp>(convertBlob(layer.blobs(2)))->getOutput(0);
+ xw = createOp<ops::TransposeOp>(xw, std::vector<std::size_t>{1, 0})->getOutput(0);
+ hw = createOp<ops::TransposeOp>(hw, std::vector<std::size_t>{1, 0})->getOutput(0);
+
+ // Add a dummy dimension so that element-wise operations perform properly.
+ cont = createOp<ops::ReshapeOp>(cont, Shape{seq_length, batch_size, 1})->getOutput(0);
+
+ // Initialize cell and hidden states with zeros.
+ auto zero_tensor = createZeroedTensor(Shape{1, batch_size, hidden_size});
+ auto c_t = createOp<ops::ConstantOp>(zero_tensor)->getOutput(0);
+ auto h_t = createOp<ops::ConstantOp>(zero_tensor)->getOutput(0);
+
+ auto x_xw = createFullyConnected(x, xw, 2);
+ auto x_xw_b = createOp<ops::AddOp>(x_xw, xb)->getOutput(0);
+
+ // Split input and continuation tensors into seq_length slices.
+ std::vector<mir::Operation::Output *> x_xw_b_slices = createSplit(x_xw_b, seq_length, 0);
+ std::vector<mir::Operation::Output *> cont_slices = createSplit(cont, seq_length, 0);
+ std::vector<mir::Operation::Output *> h_slices(seq_length);
+
+ for (int32_t t = 0; t < seq_length; t++)
+ {
+ auto c_cont_t = createOp<ops::MulOp>(c_t, cont_slices[t])->getOutput(0);
+ auto h_cont_t = createOp<ops::MulOp>(h_t, cont_slices[t])->getOutput(0);
+
+ auto x_xw_b_t = x_xw_b_slices[t];
+ auto h_hw_t = createFullyConnected(h_cont_t, hw, 2);
+ auto activation_inputs_concat = createOp<ops::AddOp>(x_xw_b_t, h_hw_t)->getOutput(0);
+ auto activation_inputs = createSplit(activation_inputs_concat, 4, 2);
+
+ auto i_t = createOp<ops::SigmoidOp>(activation_inputs[0])->getOutput(0);
+ auto f_t = createOp<ops::SigmoidOp>(activation_inputs[1])->getOutput(0);
+ auto o_t = createOp<ops::SigmoidOp>(activation_inputs[2])->getOutput(0);
+ auto g_t = createOp<ops::TanhOp>(activation_inputs[3])->getOutput(0);
+
+ c_t = createOp<ops::AddOp>(createOp<ops::MulOp>(c_cont_t, f_t)->getOutput(0),
+ createOp<ops::MulOp>(i_t, g_t)->getOutput(0))
+ ->getOutput(0);
+ h_t = createOp<ops::MulOp>(createOp<ops::TanhOp>(c_t)->getOutput(0), o_t)->getOutput(0);
+
+ h_slices[t] = h_t;
+ }
+
+ return {createOp<ops::ConcatOp>(h_slices, 0)->getOutput(0)};
+}
+
+} // namespace mir_caffe
diff --git a/compiler/mir-caffe-importer/caffe_op_creator.h b/compiler/mir-caffe-importer/caffe_op_creator.h
new file mode 100644
index 000000000..721bb90b8
--- /dev/null
+++ b/compiler/mir-caffe-importer/caffe_op_creator.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_CAFFE_OP_CREATOR_H
+#define MIR_CAFFE_OP_CREATOR_H
+
+#include <set>
+#include <map>
+#include <vector>
+#include <memory>
+
+#include "mir/Graph.h"
+#include "mir/TensorVariant.h"
+#include "mir/Shape.h"
+
+#include "caffe/proto/caffe.pb.h"
+
+namespace mir_caffe
+{
+
+class CaffeOpCreator
+{
+public:
+ explicit CaffeOpCreator(mir::Graph *g) : _graph(g){};
+
+ std::vector<mir::Operation::Output *> convertInput(const caffe::LayerParameter &layer);
+
+ std::vector<mir::Operation::Output *>
+ convertConvolution(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertInnerProduct(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertConcat(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertPooling(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertSoftmax(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertReshape(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertReLU(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertScale(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertBatchNorm(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertDropout(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertDeconvolution(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertELU(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertEmbed(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertSigmoid(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertTanH(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertEltwise(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertSplit(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertLSTM(const caffe::LayerParameter &layer,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ void checkConvolution(const caffe::LayerParameter &layer,
+ std::set<std::string> &problems_ops_set);
+
+ void checkPooling(const caffe::LayerParameter &layer, std::set<std::string> &problems_ops_set);
+
+ void checkReshape(const caffe::LayerParameter &layer, std::set<std::string> &problems_ops_set);
+
+ void checkBatchNorm(const caffe::LayerParameter &layer, std::set<std::string> &problems_ops_set);
+
+ void checkLSTM(const caffe::LayerParameter &layer, std::set<std::string> &problems_ops_set);
+
+private:
+ mir::Graph *_graph = nullptr;
+
+ std::vector<mir::Operation::Output *> createSplit(mir::Operation::Output *arg, int32_t num_parts,
+ int32_t axis);
+
+ mir::Operation::Output *createFullyConnected(mir::Operation::Output *input,
+ mir::Operation::Output *weights, int32_t axis);
+
+ mir::TensorVariant convertBlob(const caffe::BlobProto &blob);
+
+ template <typename OpType, typename... Types> mir::Operation *createOp(Types &&... args);
+};
+
+template <typename OpType, typename... Types>
+mir::Operation *CaffeOpCreator::createOp(Types &&... args)
+{
+ return _graph->create<OpType>(std::forward<Types>(args)...);
+}
+
+} // namespace mir_caffe
+
+#endif // MIR_CAFFE_OP_CREATOR_H
diff --git a/compiler/mir-caffe-importer/caffe_op_types.h b/compiler/mir-caffe-importer/caffe_op_types.h
new file mode 100644
index 000000000..30fce7d5f
--- /dev/null
+++ b/compiler/mir-caffe-importer/caffe_op_types.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_CAFFE_OP_TYPES_H
+#define MIR_CAFFE_OP_TYPES_H
+
+namespace mir_caffe
+{
+
+enum class CaffeOpType
+{
+ absVal,
+ accuracy,
+ argMax,
+ batchNorm,
+ batchReindex,
+ bias,
+ BNLL,
+ clip,
+ concat,
+ contrastiveLoss,
+ convolution,
+ crop,
+ data,
+ deconvolution,
+ dropout,
+ dummyData,
+ eltwise,
+ ELU,
+ embed,
+ euclidianLoss,
+ exp,
+ filter,
+ flatten,
+ HDF5Data,
+ HDF5Output,
+ hingeLoss,
+ im2Col,
+ imageData,
+ infogainLoss,
+ innerProduct,
+ input,
+ log,
+ LRN,
+ LSTM,
+ memoryData,
+ multinomialLogisticLoss,
+ MVN,
+ parameter,
+ pooling,
+ power,
+ PReLU,
+ python,
+ recurrent,
+ reduction,
+ ReLU,
+ reshape,
+ RNN,
+ scale,
+ sigmoidCrossEntropyLoss,
+ sigmoid,
+ silence,
+ slice,
+ softmax,
+ softmaxWithLoss,
+ split,
+ SPP,
+ tanh,
+ threshold,
+ tile,
+ windowData
+};
+
+} // namespace mir_caffe
+
+#endif // MIR_CAFFE_OP_TYPES_H
diff --git a/compiler/mir-caffe-importer/requires.cmake b/compiler/mir-caffe-importer/requires.cmake
new file mode 100644
index 000000000..1059c50d3
--- /dev/null
+++ b/compiler/mir-caffe-importer/requires.cmake
@@ -0,0 +1 @@
+require("mir")
diff --git a/compiler/mir-caffe2-importer/CMakeLists.txt b/compiler/mir-caffe2-importer/CMakeLists.txt
new file mode 100644
index 000000000..da55839a7
--- /dev/null
+++ b/compiler/mir-caffe2-importer/CMakeLists.txt
@@ -0,0 +1,29 @@
+nnas_find_package(PytorchSource QUIET)
+nnas_find_package(Protobuf QUIET)
+
+if (NOT PytorchSource_FOUND OR NOT Protobuf_FOUND)
+ return()
+endif()
+
+Protobuf_Generate(CAFFE2_PROTO "${CMAKE_CURRENT_BINARY_DIR}/generated/caffe2"
+ "${PytorchSource_DIR}" "caffe2/proto/caffe2.proto")
+
+add_library(caffe2proto STATIC ${CAFFE2_PROTO_SOURCES})
+set_target_properties(caffe2proto PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(caffe2proto PUBLIC ${CAFFE2_PROTO_INCLUDE_DIRS})
+target_link_libraries(caffe2proto PUBLIC libprotobuf)
+
+
+set(MIR_CAFFE2_IMPORTER_SOURCES
+ caffe2_importer.cpp
+ caffe2_importer.h
+ caffe2_op_creator.cpp
+ caffe2_op_creator.h
+ caffe2_op_types.h
+ caffe2_proto_helper.cpp
+ caffe2_proto_helper.h)
+
+add_library(mir_caffe2_importer STATIC ${MIR_CAFFE2_IMPORTER_SOURCES})
+set_target_properties(mir_caffe2_importer PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(mir_caffe2_importer PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+target_link_libraries(mir_caffe2_importer PUBLIC mir caffe2proto PRIVATE stdex)
diff --git a/compiler/mir-caffe2-importer/caffe2_importer.cpp b/compiler/mir-caffe2-importer/caffe2_importer.cpp
new file mode 100644
index 000000000..5a6eef0aa
--- /dev/null
+++ b/compiler/mir-caffe2-importer/caffe2_importer.cpp
@@ -0,0 +1,343 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "caffe2_importer.h"
+#include "caffe2/proto/caffe2.pb.h"
+#include "caffe2_op_types.h"
+#include "caffe2_op_creator.h"
+#include "caffe2_proto_helper.h"
+
+#include "mir/ops/InputOp.h"
+#include "mir/ops/OutputOp.h"
+
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/io/coded_stream.h>
+
+#include <fcntl.h>
+
+#include <cassert>
+#include <cerrno>
+#include <cstring>
+#include <stdex/Memory.h>
+#include <stdexcept>
+#include <utility>
+#include <set>
+
+namespace
+{
+
+using namespace mir_caffe2;
+
+class Caffe2Importer
+{
+public:
+ explicit Caffe2Importer(std::string predict_net, std::string init_net,
+ const std::vector<std::vector<int>> &input_shapes);
+
+ /// @brief Load the model and convert it into a MIR Graph.
+ std::unique_ptr<mir::Graph> importModel();
+
+ ~Caffe2Importer();
+
+private:
+ std::string _predictNet;
+ std::string _initNet;
+ std::unique_ptr<mir::Graph> _graph;
+ std::unique_ptr<caffe2::NetDef> _predict_net;
+ std::unique_ptr<caffe2::NetDef> _init_net;
+ std::unique_ptr<Caffe2OpCreator> _opCreator;
+ std::vector<mir::Shape> _inputShapes;
+
+ static const std::map<std::string, SupportedCaffe2OpType> _operatorTypes;
+
+ // Maps Caffe2 operator input names to corresponding MIR operation outputs.
+ std::unordered_map<std::string, mir::Operation::Output *> _blobNameToOutput;
+
+ void import();
+ std::unique_ptr<mir::Graph> createIR();
+
+ /**
+ * @brief Pass through caffe2 graph and collect ops unsupported by NNC
+ * @throw PassException with message, containing detected problems
+ */
+ void collectUnsupportedOps();
+
+ /**
+ * @brief Creating MIR node from single caffe2 operator
+ */
+ void createMIRNodesFromOp(const ::caffe2::OperatorDef &op);
+
+ /**
+ * @brief Returns MIR operation outputs corresponding to the inputs of the given operator.
+ */
+ std::vector<mir::Operation::Output *> getInputMIROps(const ::caffe2::OperatorDef &op);
+
+ void setOutputForTensor(const std::string &tensor_name, Operation::Output *output);
+ mir::Operation::Output *getOutputForTensor(const std::string &name) const;
+
+ /**
+ * @brief Mark output MIR nodes
+ */
+ void setGraphOutputs();
+};
+
+using namespace ::caffe2;
+using mir::Shape;
+
+Caffe2Importer::Caffe2Importer(std::string predict_net, std::string init_net,
+ const std::vector<std::vector<int>> &input_shapes)
+ : _predictNet(std::move(predict_net)), _initNet(std::move(init_net))
+{
+ for (auto &shape : input_shapes)
+ _inputShapes.emplace_back(shape);
+
+ _graph = stdex::make_unique<mir::Graph>();
+ _opCreator = stdex::make_unique<Caffe2OpCreator>(_graph.get());
+}
+
+Caffe2Importer::~Caffe2Importer() = default;
+
+static void loadModelFile(const std::string &filename, caffe2::NetDef *net)
+{
+ GOOGLE_PROTOBUF_VERIFY_VERSION;
+
+ int file_handle = open(filename.c_str(), O_RDONLY);
+
+ if (file_handle == -1)
+ throw std::runtime_error("Couldn't open file \"" + filename + "\": " + std::strerror(errno) +
+ ".");
+
+ google::protobuf::io::FileInputStream file_stream(file_handle);
+ file_stream.SetCloseOnDelete(true);
+
+ google::protobuf::io::CodedInputStream coded_stream(&file_stream);
+ coded_stream.SetTotalBytesLimit(INT_MAX, INT_MAX);
+
+ if (!net->ParseFromCodedStream(&coded_stream))
+ throw std::runtime_error("Couldn't parse file \"" + filename + "\".");
+
+ // If the file has not been consumed entirely, assume that the file is in the wrong format.
+ if (!coded_stream.ConsumedEntireMessage())
+ throw std::runtime_error("File \"" + filename + "\" has not been consumed entirely.");
+}
+
+void Caffe2Importer::import()
+{
+ _predict_net = stdex::make_unique<NetDef>();
+ loadModelFile(_predictNet, _predict_net.get());
+
+ _init_net = stdex::make_unique<NetDef>();
+ loadModelFile(_initNet, _init_net.get());
+
+ collectUnsupportedOps();
+}
+
+std::unique_ptr<mir::Graph> Caffe2Importer::createIR()
+{
+ // Load initializers.
+ for (const auto &op : _init_net->op())
+ createMIRNodesFromOp(op);
+
+ // Create inputs. This has to be done after processing initializers, because they may contain
+ // fake inputs.
+ // TODO Caffe2 does not provide a way to detect model inputs and outputs. For now assume that:
+ // - there is exactly one input;
+ // - the input is for the first layer;
+ // - the input has 'float' element type.
+ const auto &input_name = _predict_net->op(0).input(0);
+ mir::TensorType input_type(mir::DataType::FLOAT32, _inputShapes[0]);
+ auto input = _graph->create<mir::ops::InputOp>(input_type)->getOutput(0);
+ setOutputForTensor(input_name, input);
+
+ for (const auto &op : _predict_net->op())
+ createMIRNodesFromOp(op);
+
+ setGraphOutputs();
+
+ return std::move(_graph);
+}
+
+std::unique_ptr<mir::Graph> Caffe2Importer::importModel()
+{
+ import();
+ return createIR();
+}
+
+void Caffe2Importer::collectUnsupportedOps()
+{
+ std::set<std::string> unsupportedOps;
+ for (const auto &op : _predict_net->op())
+ {
+ if (_operatorTypes.find(op.type()) == _operatorTypes.end())
+ unsupportedOps.insert(op.type());
+ }
+
+ if (!unsupportedOps.empty())
+ {
+ std::string exceptionMsg("Can't load model, unsupported operators:");
+ for (const auto &op : unsupportedOps)
+ exceptionMsg.append("\n * " + op);
+ throw std::runtime_error(exceptionMsg);
+ }
+}
+
+void Caffe2Importer::createMIRNodesFromOp(const OperatorDef &op)
+{
+ std::vector<mir::Operation::Output *> outputs;
+
+ auto inputs = getInputMIROps(op);
+
+ SupportedCaffe2OpType opType = _operatorTypes.at(op.type());
+ switch (opType)
+ {
+ case SupportedCaffe2OpType::constantFill:
+ case SupportedCaffe2OpType::givenTensorFill:
+ case SupportedCaffe2OpType::givenTensorInt64Fill:
+ outputs = _opCreator->convertConstant(inputs, op);
+ break;
+ case SupportedCaffe2OpType::add:
+ outputs = _opCreator->convertAdd(inputs, op);
+ break;
+ case SupportedCaffe2OpType::averagePool:
+ outputs = _opCreator->convertAveragePool(inputs, op);
+ break;
+ case SupportedCaffe2OpType::conv:
+ outputs = _opCreator->convertConv(inputs, op);
+ break;
+ case SupportedCaffe2OpType::concat:
+ outputs = _opCreator->convertConcat(inputs, op);
+ break;
+ case SupportedCaffe2OpType::dropout:
+ outputs = _opCreator->convertDropout(inputs, op);
+ break;
+ case SupportedCaffe2OpType::FC:
+ outputs = _opCreator->convertFC(inputs, op);
+ break;
+ case SupportedCaffe2OpType::maxPool:
+ outputs = _opCreator->convertMaxPool(inputs, op);
+ break;
+ case SupportedCaffe2OpType::mul:
+ outputs = _opCreator->convertMul(inputs, op);
+ break;
+ case SupportedCaffe2OpType::relu:
+ outputs = _opCreator->convertRelu(inputs);
+ break;
+ case SupportedCaffe2OpType::resizeNearest:
+ outputs = _opCreator->convertResizeNearest(inputs, op);
+ break;
+ case SupportedCaffe2OpType::sigmoid:
+ outputs = _opCreator->convertSigmoid(inputs);
+ break;
+ case SupportedCaffe2OpType::softmax:
+ outputs = _opCreator->convertSoftmax(inputs, op);
+ break;
+ case SupportedCaffe2OpType::spatialBN:
+ outputs = _opCreator->convertSpatialBN(inputs, op);
+ break;
+ case SupportedCaffe2OpType::sum:
+ outputs = _opCreator->convertSum(inputs);
+ break;
+ case SupportedCaffe2OpType::clip:
+ outputs = _opCreator->convertClip(inputs, op);
+ break;
+ case SupportedCaffe2OpType::reshape:
+ outputs = _opCreator->convertReshape(inputs, op);
+ break;
+ default:
+ assert(false && "All unsupported types should have been found before this pass.");
+ }
+
+ for (size_t i = 0; i < outputs.size(); ++i)
+ {
+ setOutputForTensor(op.output(i), outputs[i]);
+ }
+}
+
+std::vector<mir::Operation::Output *> Caffe2Importer::getInputMIROps(const OperatorDef &op)
+{
+ std::vector<mir::Operation::Output *> inputs;
+
+ for (const auto &input_name : op.input())
+ {
+ inputs.push_back(getOutputForTensor(input_name));
+ }
+
+ return inputs;
+}
+
+void Caffe2Importer::setOutputForTensor(const std::string &tensor_name, Operation::Output *output)
+{
+ auto it = _blobNameToOutput.find(tensor_name);
+ if (it != _blobNameToOutput.cend())
+ {
+ // caffe2 input blob name could be same as output blob name, and next line will overwrite
+ // '_blobNameToOpOutput' element, but in all networks that I saw it was not a problem
+ it->second->setName("");
+ }
+ output->setName(tensor_name);
+ _blobNameToOutput[tensor_name] = output;
+}
+
+mir::Operation::Output *Caffe2Importer::getOutputForTensor(const std::string &name) const
+{
+ return _blobNameToOutput.at(name);
+}
+
+void Caffe2Importer::setGraphOutputs()
+{
+ // Create outputs.
+ // TODO Caffe2 does not provide a way to detect model inputs and outputs. For now assume that:
+ // - there is exactly one output;
+ // - the output is from the last layer.
+ const auto &output_name = _predict_net->op().rbegin()->output(0);
+ auto output = getOutputForTensor(output_name);
+ _graph->create<mir::ops::OutputOp>(output);
+}
+
+const std::map<std::string, SupportedCaffe2OpType> Caffe2Importer::_operatorTypes = {
+ {"Add", SupportedCaffe2OpType::add},
+ {"AveragePool", SupportedCaffe2OpType::averagePool},
+ {"Conv", SupportedCaffe2OpType::conv},
+ {"Concat", SupportedCaffe2OpType::concat},
+ {"ConstantFill", SupportedCaffe2OpType::constantFill},
+ {"Dropout", SupportedCaffe2OpType::dropout},
+ {"FC", SupportedCaffe2OpType::FC},
+ {"GivenTensorFill", SupportedCaffe2OpType::givenTensorFill},
+ {"MaxPool", SupportedCaffe2OpType::maxPool},
+ {"Mul", SupportedCaffe2OpType::mul},
+ {"Relu", SupportedCaffe2OpType::relu},
+ {"ResizeNearest", SupportedCaffe2OpType::resizeNearest},
+ {"Sigmoid", SupportedCaffe2OpType::sigmoid},
+ {"Softmax", SupportedCaffe2OpType::softmax},
+ {"SpatialBN", SupportedCaffe2OpType::spatialBN},
+ {"Sum", SupportedCaffe2OpType::sum},
+ {"Clip", SupportedCaffe2OpType::clip},
+ {"Reshape", SupportedCaffe2OpType::reshape},
+ {"GivenTensorInt64Fill", SupportedCaffe2OpType::givenTensorInt64Fill},
+};
+}
+
+namespace mir_caffe2
+{
+
+std::unique_ptr<mir::Graph> loadModel(std::string predict_net, std::string init_net,
+ const std::vector<std::vector<int>> &input_shapes)
+{
+ Caffe2Importer importer(std::move(predict_net), std::move(init_net), input_shapes);
+ return importer.importModel();
+}
+
+} // namespace mir_caffe2
diff --git a/compiler/mir-caffe2-importer/caffe2_importer.h b/compiler/mir-caffe2-importer/caffe2_importer.h
new file mode 100644
index 000000000..213fbe98d
--- /dev/null
+++ b/compiler/mir-caffe2-importer/caffe2_importer.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_CAFFE2_IMPORTER_H
+#define MIR_CAFFE2_IMPORTER_H
+
+#include <string>
+#include <memory>
+#include <vector>
+
+#include "mir/Graph.h"
+
+namespace mir_caffe2
+{
+
+std::unique_ptr<mir::Graph> loadModel(std::string predict_net, std::string init_net,
+ const std::vector<std::vector<int>> &input_shapes);
+
+} // namespace mir_caffe2
+
+#endif // MIR_CAFFE2_IMPORTER_H
diff --git a/compiler/mir-caffe2-importer/caffe2_op_creator.cpp b/compiler/mir-caffe2-importer/caffe2_op_creator.cpp
new file mode 100644
index 000000000..d279fb1ed
--- /dev/null
+++ b/compiler/mir-caffe2-importer/caffe2_op_creator.cpp
@@ -0,0 +1,547 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "caffe2_op_creator.h"
+#include "caffe2_proto_helper.h"
+
+#include "mir/ops/AddOp.h"
+#include "mir/ops/AvgPool2DOp.h"
+#include "mir/ops/CappedReluOp.h"
+#include "mir/ops/ConcatOp.h"
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/Conv2DOp.h"
+#include "mir/ops/FullyConnectedOp.h"
+#include "mir/ops/MaxPool2DOp.h"
+#include "mir/ops/MulOp.h"
+#include "mir/ops/ReluOp.h"
+#include "mir/ops/ReshapeOp.h"
+#include "mir/ops/ResizeOp.h"
+#include "mir/ops/SigmoidOp.h"
+#include "mir/ops/SoftmaxOp.h"
+#include "mir/ops/TransposeOp.h"
+
+#include "mir/Index.h"
+#include "mir/Shape.h"
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+#include "mir/TensorUtil.h"
+
+#include <cmath>
+#include <vector>
+
+namespace mir_caffe2
+{
+
+using namespace ::caffe2;
+using namespace mir;
+
+//
+// Helper functions
+//
+
+static std::pair<std::vector<int32_t>, std::vector<int32_t>>
+getPadding(const ::caffe2::OperatorDef &op)
+{
+
+ if (hasArgument(op.arg(), "pads"))
+ {
+ // pads order: t l b r
+ auto pads_arg = findArgumentByName(op.arg(), "pads");
+
+ std::vector<int32_t> paddings;
+ for (const auto &pad : pads_arg.ints())
+ paddings.push_back(static_cast<int32_t>(pad));
+
+ assert(paddings.size() == 4);
+
+ int32_t pad_t = paddings[0];
+ int32_t pad_l = paddings[1];
+ int32_t pad_b = paddings[2];
+ int32_t pad_r = paddings[3];
+
+ std::vector<int32_t> padding_before{pad_t, pad_l};
+ std::vector<int32_t> padding_after{pad_b, pad_r};
+ return {padding_before, padding_after};
+ }
+
+ bool has_custom_pad = hasArgument(op.arg(), "pad_l") || hasArgument(op.arg(), "pad_r") ||
+ hasArgument(op.arg(), "pad_t") || hasArgument(op.arg(), "pad_b");
+
+ if (has_custom_pad)
+ {
+ int32_t pad_l = getSingleArgument(op, "pad_l", 0);
+ int32_t pad_t = getSingleArgument(op, "pad_t", 0);
+ int32_t pad_r = getSingleArgument(op, "pad_r", 0);
+ int32_t pad_b = getSingleArgument(op, "pad_b", 0);
+
+ std::vector<int32_t> padding_before{pad_t, pad_l};
+ std::vector<int32_t> padding_after{pad_b, pad_r};
+ return {padding_before, padding_after};
+ }
+
+ int32_t pad = getSingleArgument(op, "pad", 0);
+ return {{pad, pad}, {pad, pad}};
+}
+
+static std::vector<std::int32_t> getStrides(const ::caffe2::OperatorDef &op)
+{
+ std::vector<std::int32_t> strides;
+
+ if (hasArgument(op.arg(), "stride"))
+ {
+ std::int32_t stride = getSingleArgument(op, "stride", 1);
+ strides = {stride, stride};
+ }
+
+ if (hasArgument(op.arg(), "strides"))
+ {
+ // strides order: h w
+ auto strides_arg = findArgumentByName(op.arg(), "strides");
+ for (const auto &s : strides_arg.ints())
+ strides.push_back(s);
+ }
+
+ assert(!strides.empty() && "Strides not found");
+
+ return strides;
+}
+
+static std::vector<std::int32_t> getWindowSize(const ::caffe2::OperatorDef &op,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ int is_global_pooling = getSingleArgument(op, "global_pooling", 0);
+ bool has_custom_kernel_size =
+ hasArgument(op.arg(), "kernel_h") || hasArgument(op.arg(), "kernel_w");
+ bool has_custom_kernels_size = hasArgument(op.arg(), "kernels");
+
+ int kernel_h(0), kernel_w(0);
+ if (is_global_pooling)
+ {
+ const auto &input_shape = inputs[0]->getShape();
+ assert(input_shape.rank() == 4 && "getWindowSize() inputs must be of rank 4");
+ kernel_h = input_shape.dim(2);
+ kernel_w = input_shape.dim(3);
+ }
+ else
+ {
+ if (has_custom_kernel_size)
+ {
+ kernel_h = getSingleArgument(op, "kernel_h", 0);
+ kernel_w = getSingleArgument(op, "kernel_w", 0);
+ }
+ else
+ {
+ if (has_custom_kernels_size)
+ {
+ // kernels order: h w
+ std::vector<int32_t> kernels;
+ auto kernels_arg = findArgumentByName(op.arg(), "kernels");
+ for (const auto &ker : kernels_arg.ints())
+ kernels.push_back(static_cast<int32_t>(ker));
+ assert(kernels.size() == 2);
+ kernel_h = kernels[0];
+ kernel_w = kernels[1];
+ }
+ else
+ {
+ kernel_h = kernel_w = getSingleArgument(op, "kernel", 0);
+ }
+ }
+ }
+ return {kernel_h, kernel_w};
+}
+
+//
+// Check functions
+//
+
+static void checkLayout(const OperatorDef &op)
+{
+ if (getSingleArgument(op, "order", "NCHW") != "NCHW")
+ throw std::runtime_error(op.type() + ": only 'NCHW' axis order is supported");
+}
+
+static void checkConvLikeOp(const ::caffe2::OperatorDef &op)
+{
+ checkLayout(op);
+
+ // Padding
+ bool has_custom_pad = hasArgument(op.arg(), "pad_l") || hasArgument(op.arg(), "pad_r") ||
+ hasArgument(op.arg(), "pad_t") || hasArgument(op.arg(), "pad_b");
+
+ if (has_custom_pad && hasArgument(op.arg(), "pad"))
+ throw std::runtime_error("Custom pad can't be combined with overall pad");
+
+ if (has_custom_pad &&
+ !(hasArgument(op.arg(), "pad_l") && hasArgument(op.arg(), "pad_r") &&
+ hasArgument(op.arg(), "pad_t") && hasArgument(op.arg(), "pad_b")))
+ throw std::runtime_error("If one custom pad specified - all custom pads must be specified");
+
+ // Kernel size
+ bool has_custom_kernel_size =
+ hasArgument(op.arg(), "kernel_h") || hasArgument(op.arg(), "kernel_w");
+
+ if (has_custom_kernel_size && hasArgument(op.arg(), "kernel"))
+ throw std::runtime_error("Custom kernel size can't be combined with overall kernel size");
+
+ if (has_custom_kernel_size &&
+ !(hasArgument(op.arg(), "kernel_h") && hasArgument(op.arg(), "kernel_w")))
+ throw std::runtime_error(
+ "If one custom kernel size specified - all custom kernel sizes must be specified");
+}
+
+static mir::TensorVariant createTensor(const OperatorDef &op)
+{
+ assert(hasArgument(op.arg(), "shape") && hasArgument(op.arg(), "values"));
+
+ const auto &shape = findArgumentByName(op.arg(), "shape");
+ const auto &values = findArgumentByName(op.arg(), "values");
+
+ mir::DataType element_type;
+ const void *src_data;
+ // if values on floats
+ if (!values.floats().empty())
+ {
+ element_type = mir::DataType::FLOAT32;
+ src_data = values.floats().data();
+ }
+ else
+ {
+ assert(!values.ints().empty());
+ if (op.type() == "GivenTensorInt64Fill")
+ {
+ element_type = mir::DataType::INT64;
+ }
+ else
+ {
+ element_type = mir::DataType::INT32;
+ }
+ src_data = values.ints().data();
+ }
+
+ mir::Shape tensor_shape(shape.ints_size());
+
+ for (int i = 0; i < shape.ints_size(); ++i)
+ {
+ tensor_shape.dim(i) = shape.ints(i);
+ }
+
+ return mir::TensorVariant({element_type, tensor_shape}, src_data);
+}
+
+//
+// Convert functions
+//
+
+std::vector<mir::Operation::Output *>
+Caffe2OpCreator::convertConstant(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op)
+{
+ // Constant may not contain any data if it is a fake input.
+ if (!hasArgument(op.arg(), "values"))
+ return {};
+
+ return {createOp<ops::ConstantOp>(createTensor(op))->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+Caffe2OpCreator::convertAdd(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op)
+{
+ assert(inputs.size() == 2);
+ auto lhs = inputs[0];
+ auto rhs = inputs[1];
+
+ if (getSingleArgument(op, "broadcast", 0) != 0)
+ {
+ // FIXME This only works when 'axis' == 1 and the second input is 1-D.
+ rhs = createOp<ops::ReshapeOp>(rhs, Shape{1, rhs->getShape().dim(0), 1, 1})->getOutput(0);
+ auto result = createOp<ops::AddOp>(lhs, rhs)->getOutput(0);
+ return {result};
+ }
+
+ auto result = createOp<ops::AddOp>(lhs, rhs)->getOutput(0);
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+Caffe2OpCreator::convertAveragePool(const std::vector<mir::Operation::Output *> &inputs,
+ const OperatorDef &op)
+{
+ checkConvLikeOp(op);
+
+ assert(inputs.size() == 1);
+ auto input = inputs[0];
+
+ AvgPool2DOpAttributes attributes;
+ std::tie(attributes.padding_before, attributes.padding_after) = getPadding(op);
+ attributes.window = getWindowSize(op, inputs);
+ attributes.strides = getStrides(op);
+ attributes.include_pad = false;
+ attributes.data_format = DataFormat::NCHW;
+ auto result = createOp<ops::AvgPool2DOp>(input, attributes)->getOutput(0);
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+Caffe2OpCreator::convertConv(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op)
+{
+ // dilation order: h w (not used)
+ mir::Conv2DOpAttributes attributes;
+ attributes.strides = getStrides(op);
+ std::tie(attributes.padding_before, attributes.padding_after) = getPadding(op);
+ attributes.num_groups = getSingleArgument(op, "group", 1);
+ attributes.data_format = DataFormat::NCHW;
+
+ std::vector<std::size_t> perm{0, 2, 3, 1}; // OIHW -> OHWI
+ auto kernel = createOp<ops::TransposeOp>(inputs[1], perm)->getOutput(0);
+ auto result = createOp<ops::Conv2DOp>(inputs[0], kernel, attributes)->getOutput(0);
+
+ if (op.input_size() > 2)
+ {
+ auto bias = inputs[2];
+ bias = createOp<ops::ReshapeOp>(bias, Shape{1, bias->getShape().dim(0), 1, 1})->getOutput(0);
+ result = createOp<ops::AddOp>(result, bias)->getOutput(0);
+ }
+
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+Caffe2OpCreator::convertConcat(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op)
+{
+ checkLayout(op);
+
+ // `1` corresponds to the default (channels) axis.
+ int axis = getSingleArgument(op, "axis", 1);
+ auto result = createOp<ops::ConcatOp>(inputs, axis);
+ return {result->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+Caffe2OpCreator::convertDropout(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &)
+{
+ // This is a no-op in inference mode.
+ return {inputs[0]};
+}
+
+std::vector<mir::Operation::Output *>
+Caffe2OpCreator::convertFC(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op)
+{
+ for (auto &s : {"axis", "axis_w", "float16_compute"})
+ if (hasArgument(op.arg(), s))
+ throw std::runtime_error(std::string("FC: only default '") + s + "' value is supported");
+
+ const auto &input_shape = inputs[0]->getShape();
+ // Transform input into 2-D tensor by flattening axes
+ Shape shape{input_shape.dim(0), input_shape.numElements() / input_shape.dim(0)};
+
+ auto reshape = createOp<ops::ReshapeOp>(inputs[0], shape)->getOutput(0);
+ auto weights =
+ createOp<ops::TransposeOp>(inputs[1], std::vector<std::size_t>{1, 0})->getOutput(0);
+ auto result = createOp<ops::FullyConnectedOp>(reshape, weights)->getOutput(0);
+ result = createOp<ops::AddOp>(result, inputs[2])->getOutput(0);
+
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+Caffe2OpCreator::convertMaxPool(const std::vector<mir::Operation::Output *> &inputs,
+ const OperatorDef &op)
+{
+ checkConvLikeOp(op);
+
+ assert(inputs.size() == 1);
+ auto input = inputs[0];
+
+ MaxPool2DOpAttributes attributes;
+ std::tie(attributes.padding_before, attributes.padding_after) = getPadding(op);
+ attributes.window = getWindowSize(op, inputs);
+ attributes.strides = getStrides(op);
+ attributes.data_format = DataFormat::NCHW;
+ auto result = createOp<ops::MaxPool2DOp>(input, attributes)->getOutput(0);
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+Caffe2OpCreator::convertMul(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op)
+{
+ assert(inputs.size() == 2);
+ auto lhs = inputs[0];
+ auto rhs = inputs[1];
+
+ if (getSingleArgument(op, "broadcast", 0) != 0)
+ {
+ // FIXME This only works when `axis` == 1 and the second input is 1-D.
+ rhs = createOp<ops::ReshapeOp>(rhs, Shape{1, rhs->getShape().dim(0), 1, 1})->getOutput(0);
+ auto result = createOp<ops::MulOp>(lhs, rhs)->getOutput(0);
+ return {result};
+ }
+
+ auto result = createOp<ops::MulOp>(lhs, rhs)->getOutput(0);
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+Caffe2OpCreator::convertRelu(const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto relu = createOp<ops::ReluOp>(inputs[0]);
+ return {relu->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+Caffe2OpCreator::convertResizeNearest(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op)
+{
+ std::vector<float> scales(4);
+ assert(inputs[0]->getShape().rank() == 4 && "only 4d tensors is supported");
+ // Assuming NCHW format.
+ scales[0] = 1.0f;
+ scales[1] = 1.0f;
+ scales[2] = getSingleArgument(op, "height_scale", 1.0f);
+ scales[3] = getSingleArgument(op, "width_scale", 1.0f);
+ auto result =
+ createOp<ops::ResizeOp>(inputs[0], ops::ResizeOp::ResizeMethod::nearestNeighbor, scales)
+ ->getOutput(0);
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+Caffe2OpCreator::convertSigmoid(const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto result = createOp<ops::SigmoidOp>(inputs[0]);
+ return {result->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+Caffe2OpCreator::convertSoftmax(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op)
+{
+ int axis = getSingleArgument(op, "axis", 1);
+ auto softmax = createOp<ops::SoftmaxOp>(inputs[0], axis);
+ return {softmax->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+Caffe2OpCreator::convertSpatialBN(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op)
+{
+ checkLayout(op);
+
+ // Sanity checks
+ if (op.input_size() != 5)
+ throw std::runtime_error(
+ "SpatialBN must have exactly 5 inputs ('sums' and 'sumsq' are not supported yet)");
+ if (getSingleArgument(op, "is_test", 1) != 1)
+ throw std::runtime_error("SpatialBN: only test mode supported");
+
+ // overall_res = (X - mean) / sqrt(var + epsilon) * scale + bias
+
+ auto scale_op = dynamic_cast<mir::ops::ConstantOp *>(inputs[1]->getNode());
+ auto bias_op = dynamic_cast<mir::ops::ConstantOp *>(inputs[2]->getNode());
+ auto mean_op = dynamic_cast<mir::ops::ConstantOp *>(inputs[3]->getNode());
+ auto var_op = dynamic_cast<mir::ops::ConstantOp *>(inputs[4]->getNode());
+ if (scale_op == nullptr || bias_op == nullptr || mean_op == nullptr || var_op == nullptr)
+ throw std::runtime_error(
+ "SpatialBN: non-constant 'scale', 'bias', 'mean' and 'var' inputs are not supported yet.");
+
+ const auto &scale_tensor = scale_op->getValue();
+ const auto &bias_tensor = bias_op->getValue();
+ const auto &mean_tensor = mean_op->getValue();
+ const auto &var_tensor = var_op->getValue();
+ float eps = getSingleArgument(op, "epsilon", 1e-5f);
+
+ // res1 = X - mean
+ Tensor<float> bias_data(mean_tensor);
+ for (auto &idx : ShapeRange(bias_data.getShape()))
+ bias_data.at(idx) *= -1;
+
+ auto mean = createOp<ops::ConstantOp>(mean_tensor)->getOutput(0);
+ mean = createOp<ops::ReshapeOp>(mean, Shape{1, mean->getShape().dim(0), 1, 1})->getOutput(0);
+ auto result = createOp<ops::AddOp>(inputs[0], mean)->getOutput(0);
+
+ // res2 = res1 * scale / (var + epsilon)
+ Tensor<float> multiplier(scale_tensor);
+ for (auto &idx : ShapeRange(scale_tensor.getShape()))
+ multiplier.at(idx) /= std::sqrt(*reinterpret_cast<float *>(var_tensor.at(idx)) + eps);
+ auto scale = createOp<ops::ConstantOp>(scale_tensor)->getOutput(0);
+ scale = createOp<ops::ReshapeOp>(scale, Shape{1, scale->getShape().dim(0), 1, 1})->getOutput(0);
+ result = createOp<ops::MulOp>(result, scale)->getOutput(0);
+
+ // overall_res = res2 + bias
+ auto bias = createOp<ops::ConstantOp>(bias_tensor)->getOutput(0);
+ bias = createOp<ops::ReshapeOp>(bias, Shape{1, bias->getShape().dim(0), 1, 1})->getOutput(0);
+ result = createOp<ops::AddOp>(result, bias)->getOutput(0);
+
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+Caffe2OpCreator::convertSum(const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto result = createOp<ops::AddOp>(inputs[0], inputs[1])->getOutput(0);
+ for (int i = 2; i < static_cast<int>(inputs.size()); ++i)
+ {
+ result = createOp<ops::AddOp>(result, inputs[i])->getOutput(0);
+ }
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+Caffe2OpCreator::convertClip(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op)
+{
+
+ float max = getSingleArgument(op, "max", float(0));
+ float min = getSingleArgument(op, "min", float(0));
+
+ assert(max > 0.0 && min == 0.0 && "Support only if clip is CappedRelu");
+ auto cap_relu = createOp<ops::CappedReluOp>(inputs[0], max);
+
+ return {cap_relu->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+Caffe2OpCreator::convertReshape(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op)
+{
+ auto shape_op = dynamic_cast<mir::ops::ConstantOp *>(inputs[1]->getNode());
+ if (shape_op == nullptr)
+ throw std::runtime_error("Reshape: non-constant shape is not supported yet.");
+
+ const auto &shape_tensor = shape_op->getValue();
+
+ Tensor<int64_t> out_shape_tensor(shape_tensor);
+
+ ShapeRange range(out_shape_tensor.getShape());
+ std::vector<int32_t> shape_vec;
+ for (const auto &index : range)
+ {
+ shape_vec.push_back(static_cast<int32_t>(out_shape_tensor.at(index)));
+ }
+ Shape out_shape(shape_vec);
+
+ auto reshape = createOp<ops::ReshapeOp>(inputs[0], out_shape);
+
+ return {reshape->getOutput(0)};
+}
+
+} // namespace mir_caffe2
diff --git a/compiler/mir-caffe2-importer/caffe2_op_creator.h b/compiler/mir-caffe2-importer/caffe2_op_creator.h
new file mode 100644
index 000000000..2b29378e9
--- /dev/null
+++ b/compiler/mir-caffe2-importer/caffe2_op_creator.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_CAFFE2_OP_CREATOR_H
+#define MIR_CAFFE2_OP_CREATOR_H
+
+#include <set>
+#include <unordered_map>
+#include <vector>
+#include <memory>
+
+#include "mir/Graph.h"
+#include "mir/Operation.h"
+#include "mir/TensorVariant.h"
+#include "mir/Shape.h"
+
+#include "caffe2/proto/caffe2.pb.h"
+
+namespace mir_caffe2
+{
+
+using mir::Operation;
+using mir::Shape;
+
+class Caffe2OpCreator
+{
+public:
+ explicit Caffe2OpCreator(mir::Graph *g) : _graph(g) {}
+
+ std::vector<mir::Operation::Output *>
+ convertConstant(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op);
+
+ std::vector<mir::Operation::Output *>
+ convertAdd(const std::vector<mir::Operation::Output *> &inputs, const ::caffe2::OperatorDef &op);
+
+ std::vector<mir::Operation::Output *>
+ convertAveragePool(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op);
+
+ std::vector<mir::Operation::Output *>
+ convertConv(const std::vector<mir::Operation::Output *> &inputs, const ::caffe2::OperatorDef &op);
+
+ std::vector<mir::Operation::Output *>
+ convertConcat(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op);
+
+ std::vector<mir::Operation::Output *>
+ convertDropout(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op);
+
+ std::vector<mir::Operation::Output *>
+ convertFC(const std::vector<mir::Operation::Output *> &inputs, const ::caffe2::OperatorDef &op);
+
+ std::vector<mir::Operation::Output *>
+ convertMaxPool(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op);
+
+ std::vector<mir::Operation::Output *>
+ convertMul(const std::vector<mir::Operation::Output *> &inputs, const ::caffe2::OperatorDef &op);
+
+ std::vector<mir::Operation::Output *>
+ convertRelu(const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertResizeNearest(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op);
+
+ std::vector<mir::Operation::Output *>
+ convertSigmoid(const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertSoftmax(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op);
+
+ std::vector<mir::Operation::Output *>
+ convertSpatialBN(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op);
+
+ std::vector<mir::Operation::Output *>
+ convertSum(const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertClip(const std::vector<mir::Operation::Output *> &inputs, const ::caffe2::OperatorDef &op);
+
+ std::vector<mir::Operation::Output *>
+ convertReshape(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op);
+
+private:
+ mir::Graph *_graph = nullptr;
+
+ template <typename OpType, typename... Types> mir::Operation *createOp(Types &&... args);
+};
+
+template <typename OpType, typename... Types>
+mir::Operation *Caffe2OpCreator::createOp(Types &&... args)
+{
+ return _graph->create<OpType>(std::forward<Types>(args)...);
+}
+
+} // namespace mir_caffe2
+
+#endif // MIR_CAFFE2_OP_CREATOR_H
diff --git a/compiler/mir-caffe2-importer/caffe2_op_types.h b/compiler/mir-caffe2-importer/caffe2_op_types.h
new file mode 100644
index 000000000..b5e7e7631
--- /dev/null
+++ b/compiler/mir-caffe2-importer/caffe2_op_types.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_CAFFE2_OP_TYPES_H
+#define MIR_CAFFE2_OP_TYPES_H
+
+namespace mir_caffe2
+{
+
+enum class SupportedCaffe2OpType
+{
+ add,
+ averagePool,
+ clip,
+ concat,
+ conv,
+ constantFill,
+ dropout,
+ FC,
+ givenTensorFill,
+ givenTensorInt64Fill,
+ maxPool,
+ mul,
+ relu,
+ reshape,
+ resizeNearest,
+ sigmoid,
+ softmax,
+ spatialBN,
+ sum,
+};
+
+} // namespace mir_caffe2
+
+#endif // MIR_CAFFE2_OP_TYPES_H
diff --git a/compiler/mir-caffe2-importer/caffe2_proto_helper.cpp b/compiler/mir-caffe2-importer/caffe2_proto_helper.cpp
new file mode 100644
index 000000000..a7cde64cf
--- /dev/null
+++ b/compiler/mir-caffe2-importer/caffe2_proto_helper.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "caffe2_proto_helper.h"
+
+namespace mir_caffe2
+{
+
+const ::caffe2::Argument &findArgumentByName(RepArgument args, const std::string &name)
+{
+ for (auto &arg : args)
+ if (arg.name() == name)
+ return arg;
+ throw std::runtime_error("Can't find argument with name: " + name);
+}
+
+const bool hasArgument(RepArgument args, const std::string &name)
+{
+ for (auto &arg : args)
+ if (arg.name() == name)
+ return true;
+ return false;
+}
+
+int getSingleArgument(const ::caffe2::OperatorDef &op, const std::string &argument_name,
+ const int default_value)
+{
+ if (hasArgument(op.arg(), argument_name))
+ return static_cast<int>(findArgumentByName(op.arg(), argument_name).i());
+ return default_value;
+}
+
+float getSingleArgument(const ::caffe2::OperatorDef &op, const std::string &argument_name,
+ const float default_value)
+{
+ if (hasArgument(op.arg(), argument_name))
+ return findArgumentByName(op.arg(), argument_name).f();
+ return default_value;
+}
+
+std::string getSingleArgument(const ::caffe2::OperatorDef &op, const std::string &argument_name,
+ const std::string &default_value)
+{
+ if (hasArgument(op.arg(), argument_name))
+ return findArgumentByName(op.arg(), argument_name).s();
+ return default_value;
+}
+
+} // namespace mir_caffe2
diff --git a/compiler/mir-caffe2-importer/caffe2_proto_helper.h b/compiler/mir-caffe2-importer/caffe2_proto_helper.h
new file mode 100644
index 000000000..4c47edec8
--- /dev/null
+++ b/compiler/mir-caffe2-importer/caffe2_proto_helper.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_CAFFE2_PROTO_HELPER_H
+#define MIR_CAFFE2_PROTO_HELPER_H
+
+#include "caffe2/proto/caffe2.pb.h"
+
+namespace mir_caffe2
+{
+
+using RepArgument = const ::google::protobuf::RepeatedPtrField<::caffe2::Argument> &;
+
+const ::caffe2::Argument &findArgumentByName(RepArgument args, const std::string &name);
+
+const bool hasArgument(RepArgument args, const std::string &name);
+
+int getSingleArgument(const ::caffe2::OperatorDef &op, const std::string &argument_name,
+ int default_value);
+float getSingleArgument(const ::caffe2::OperatorDef &op, const std::string &argument_name,
+ float default_value);
+std::string getSingleArgument(const ::caffe2::OperatorDef &op, const std::string &argument_name,
+ const std::string &default_value);
+
+} // namespace mir_caffe2
+
+#endif // MIR_CAFFE2_PROTO_HELPER_H
diff --git a/compiler/mir-caffe2-importer/requires.cmake b/compiler/mir-caffe2-importer/requires.cmake
new file mode 100644
index 000000000..1059c50d3
--- /dev/null
+++ b/compiler/mir-caffe2-importer/requires.cmake
@@ -0,0 +1 @@
+require("mir")
diff --git a/compiler/mir-interpreter/CMakeLists.txt b/compiler/mir-interpreter/CMakeLists.txt
new file mode 100644
index 000000000..814612ae9
--- /dev/null
+++ b/compiler/mir-interpreter/CMakeLists.txt
@@ -0,0 +1,4 @@
+file(GLOB_RECURSE interp_src ./*.cpp ./*.h)
+add_library(mir_interpreter SHARED ${interp_src})
+target_link_libraries(mir_interpreter PUBLIC mir)
+target_include_directories(mir_interpreter PUBLIC include)
diff --git a/compiler/mir-interpreter/include/MirInterpreter.h b/compiler/mir-interpreter/include/MirInterpreter.h
new file mode 100644
index 000000000..c3d971716
--- /dev/null
+++ b/compiler/mir-interpreter/include/MirInterpreter.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_INTERPRETER_
+#define _MIR_INTERPRETER_
+
+#include "mir/Visitor.h"
+#include "mir/Operation.h"
+#include "mir/TensorVariant.h"
+#include <unordered_map>
+#include <vector>
+
+namespace mir_interpreter
+{
+
+class MIRInterpreter : public mir::Visitor
+{
+public:
+ explicit MIRInterpreter() = default;
+
+ ~MIRInterpreter() override = default;
+
+ /// @brief Set tensor to the interpreter environment.
+ void setTensor(const mir::Operation::Output *output, mir::TensorVariant tensor);
+
+ /// @brief Get tensor from the interpreter environment.
+ const mir::TensorVariant &getTensor(const mir::Operation::Output *) const;
+
+ void visit(mir::ops::AddOp &op) override;
+ void visit(mir::ops::AbsOp &op) override;
+ void visit(mir::ops::AvgPool2DOp &op) override;
+ void visit(mir::ops::CappedReluOp &op) override;
+ void visit(mir::ops::ConcatOp &op) override;
+ void visit(mir::ops::ConstantOp &op) override;
+ void visit(mir::ops::Conv2DOp &op) override;
+ void visit(mir::ops::DeConv2DOp &op) override;
+ void visit(mir::ops::DepthwiseConv2DOp &op) override;
+ void visit(mir::ops::DequantizeOp &op) override;
+ void visit(mir::ops::DivOp &op) override;
+ void visit(mir::ops::EluOp &op) override;
+ void visit(mir::ops::EqualOp &op) override;
+ void visit(mir::ops::FullyConnectedOp &op) override;
+ void visit(mir::ops::GatherOp &op) override;
+ void visit(mir::ops::GreaterOp &op) override;
+ void visit(mir::ops::HardSwishOp &op) override;
+ void visit(mir::ops::InputOp &op) override;
+ void visit(mir::ops::LeakyReluOp &op) override;
+ void visit(mir::ops::LessOp &op) override;
+ void visit(mir::ops::MaxOp &op) override;
+ void visit(mir::ops::MaxPool2DOp &op) override;
+ void visit(mir::ops::MulOp &op) override;
+ void visit(mir::ops::OutputOp &op) override;
+ void visit(mir::ops::PadOp &op) override;
+ void visit(mir::ops::QuantizeOp &op) override;
+ void visit(mir::ops::ReduceMeanOp &op) override;
+ void visit(mir::ops::ReluOp &op) override;
+ void visit(mir::ops::ReshapeOp &op) override;
+ void visit(mir::ops::ResizeOp &op) override;
+ void visit(mir::ops::SigmoidOp &op) override;
+ void visit(mir::ops::SliceOp &op) override;
+ void visit(mir::ops::SoftmaxOp &op) override;
+ void visit(mir::ops::SqrtOp &op) override;
+ void visit(mir::ops::SqueezeOp &op) override;
+ void visit(mir::ops::SubOp &op) override;
+ void visit(mir::ops::TanhOp &op) override;
+ void visit(mir::ops::TransposeOp &op) override;
+ void visit(mir::ops::BroadcastOp &op) override;
+
+protected:
+ void visit_fallback(mir::Operation &op) override;
+
+private:
+ mir::TensorVariant &allocateTensor(const mir::Operation::Output *output);
+
+ /// @brief Gets the computed inputs for the operation.
+ std::vector<std::reference_wrapper<const mir::TensorVariant>>
+ getInputTensors(const mir::Operation &op);
+
+ std::vector<std::reference_wrapper<mir::TensorVariant>>
+ allocateOutputTensors(const mir::Operation &op);
+
+ /// @brief Mapping of operation outputs to corresponding tensors.
+ std::unordered_map<const mir::Operation::Output *, mir::TensorVariant> _tensors;
+};
+
+} // namespace mir_interpreter
+
+#endif // _MIR_INTERPRETER_
diff --git a/compiler/mir-interpreter/requires.cmake b/compiler/mir-interpreter/requires.cmake
new file mode 100644
index 000000000..1059c50d3
--- /dev/null
+++ b/compiler/mir-interpreter/requires.cmake
@@ -0,0 +1 @@
+require("mir")
diff --git a/compiler/mir-interpreter/src/MirInterpreter.cpp b/compiler/mir-interpreter/src/MirInterpreter.cpp
new file mode 100644
index 000000000..245f7ddab
--- /dev/null
+++ b/compiler/mir-interpreter/src/MirInterpreter.cpp
@@ -0,0 +1,420 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MirInterpreter.h"
+
+#include "ops/Add.h"
+#include "ops/Abs.h"
+#include "ops/AvgPool2D.h"
+#include "ops/CappedReLU.h"
+#include "ops/Concat.h"
+#include "ops/Conv2D.h"
+#include "ops/DeConv2D.h"
+#include "ops/DepthwiseConv2D.h"
+#include "ops/Div.h"
+#include "ops/ELU.h"
+#include "ops/Equal.h"
+#include "ops/Fill.h"
+#include "ops/FullyConnected.h"
+#include "ops/Gather.h"
+#include "ops/Greater.h"
+#include "ops/HardSwish.h"
+#include "ops/LeakyReLU.h"
+#include "ops/Less.h"
+#include "ops/Max.h"
+#include "ops/MaxPool2D.h"
+#include "ops/Mul.h"
+#include "ops/Pad.h"
+#include "ops/Quantization.h"
+#include "ops/ReduceMean.h"
+#include "ops/ReLU.h"
+#include "ops/Reshape.h"
+#include "ops/Sigmoid.h"
+#include "ops/Slice.h"
+#include "ops/Softmax.h"
+#include "ops/Sqrt.h"
+#include "ops/Sub.h"
+#include "ops/Tanh.h"
+#include "ops/Transpose.h"
+
+#include "ops/Common.h"
+
+#include "mir/OpDefs.h"
+
+#include <cassert>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+namespace mir_interpreter
+{
+
+using namespace mir;
+
+void MIRInterpreter::setTensor(const Operation::Output *output, TensorVariant tensor)
+{
+ const auto result = _tensors.emplace(output, std::move(tensor));
+ if (!result.second)
+ {
+ const std::string &name = output->getName();
+ throw std::runtime_error("Attempt to overwrite data for tensor \"" + name + "\".");
+ }
+}
+
+TensorVariant &MIRInterpreter::allocateTensor(const Operation::Output *output)
+{
+ const auto result = _tensors.emplace(output, output->getType());
+ if (!result.second)
+ {
+ const std::string &name = output->getName();
+ throw std::runtime_error("Attempt to overwrite data for tensor \"" + name + "\".");
+ }
+ return result.first->second;
+}
+
+const TensorVariant &MIRInterpreter::getTensor(const Operation::Output *output) const
+{
+ const auto it = _tensors.find(output);
+ if (it == _tensors.end())
+ {
+ const std::string &name = output->getName();
+ throw std::runtime_error("Can't find data for tensor \"" + name + "\".");
+ }
+ return it->second;
+}
+
+std::vector<std::reference_wrapper<const TensorVariant>>
+MIRInterpreter::getInputTensors(const Operation &op)
+{
+ std::vector<std::reference_wrapper<const TensorVariant>> tensors;
+ for (const Operation::Output *input : op.getInputs())
+ {
+ tensors.emplace_back(getTensor(input));
+ }
+ return tensors;
+}
+
+std::vector<std::reference_wrapper<TensorVariant>>
+MIRInterpreter::allocateOutputTensors(const Operation &op)
+{
+ std::vector<std::reference_wrapper<TensorVariant>> tensors;
+ for (const Operation::Output &output : op.getOutputs())
+ {
+ tensors.emplace_back(allocateTensor(&output));
+ }
+ return tensors;
+}
+
+void MIRInterpreter::visit(ops::InputOp &op)
+{
+ assert(_tensors.find(op.getOutput(0)) != _tensors.end());
+}
+
+void MIRInterpreter::visit(ops::AvgPool2DOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ AvgPool2D(op, inputs[0], outputs[0]);
+}
+
+void MIRInterpreter::visit(ops::ConstantOp &op) { setTensor(op.getOutput(0), op.getValue()); }
+
+void MIRInterpreter::visit(ops::ConcatOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ Concat(inputs, op.getAxis(), outputs[0]);
+}
+
+void MIRInterpreter::visit(ops::Conv2DOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ const mir::TensorVariant *bias = nullptr;
+ if (inputs.size() > 2)
+ {
+ bias = &(inputs[2].get());
+ }
+ Conv2D(inputs[0], inputs[1], op.getAttributes(), outputs[0], bias);
+}
+
+void MIRInterpreter::visit(ops::MaxPool2DOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ MaxPool2D(inputs[0], op, outputs[0]);
+}
+
+void MIRInterpreter::visit(ops::ReshapeOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ Reshape(inputs[0], outputs[0]);
+}
+
+void MIRInterpreter::visit(ops::ReluOp &op)
+{
+ auto args = getInputTensors(op);
+ auto results = allocateOutputTensors(op);
+ ReLU(args[0], results[0]);
+}
+
+void MIRInterpreter::visit(ops::SigmoidOp &op)
+{
+ auto args = getInputTensors(op);
+ auto results = allocateOutputTensors(op);
+ Sigmoid(args[0], results[0]);
+}
+
+void MIRInterpreter::visit(ops::SoftmaxOp &op)
+{
+ auto inputs = getInputTensors(op);
+ assert(inputs.size() == 1);
+ auto outputs = allocateOutputTensors(op);
+ Softmax(inputs[0], op.getAxis(), outputs[0]);
+}
+
+void MIRInterpreter::visit(ops::FullyConnectedOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ const mir::TensorVariant *bias = nullptr;
+ if (inputs.size() > 2)
+ {
+ bias = &(inputs[3].get());
+ }
+ FullyConnected(inputs[0], inputs[1], op, outputs[0], bias);
+}
+
+void MIRInterpreter::visit(ops::CappedReluOp &op)
+{
+ auto args = getInputTensors(op);
+ auto results = allocateOutputTensors(op);
+ CappedReLU(args[0], op.getCap(), results[0]);
+}
+
+void MIRInterpreter::visit(ops::DepthwiseConv2DOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ const mir::TensorVariant *bias = nullptr;
+ if (inputs.size() > 2)
+ {
+ bias = &inputs[3].get();
+ }
+ DepthwiseConv2D(op, inputs[0], inputs[1], outputs[0], bias);
+}
+
+void MIRInterpreter::visit(ops::SliceOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto input = inputs[0];
+ auto outputs = allocateOutputTensors(op);
+ Slice(input, op.getStarts(), outputs[0]);
+}
+
+void MIRInterpreter::visit(ops::TanhOp &op)
+{
+ auto args = getInputTensors(op);
+ auto results = allocateOutputTensors(op);
+ Tanh(args[0], results[0]);
+}
+
+void MIRInterpreter::visit(ops::DeConv2DOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ DeConv2D(inputs[0], inputs[1], op.getAttributes(), outputs[0]);
+}
+
+void MIRInterpreter::visit(ops::EluOp &op)
+{
+ auto args = getInputTensors(op);
+ auto results = allocateOutputTensors(op);
+ ELU(args[0], op.getAlpha(), results[0]);
+}
+
+void MIRInterpreter::visit(ops::SqueezeOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ // Squeeze is just a special case of reshape.
+ Reshape(inputs[0], outputs[0]);
+}
+
+void MIRInterpreter::visit(ops::PadOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ Pad(inputs[0], op, outputs[0]);
+}
+
+void MIRInterpreter::visit(ops::SqrtOp &op)
+{
+ auto args = getInputTensors(op);
+ auto results = allocateOutputTensors(op);
+ Sqrt(args[0], results[0]);
+}
+
+void MIRInterpreter::visit(ops::ResizeOp &op)
+{
+ // TODO support types other than float32
+ auto inputs = getInputTensors(op);
+ assert(inputs[0].get().getElementType() == mir::DataType::FLOAT32);
+ auto outputs = allocateOutputTensors(op);
+
+ Tensor<float> input(inputs[0]);
+ assert(op.getMode() == ops::ResizeOp::ResizeMethod::nearestNeighbor);
+
+ auto scales = op.getScales();
+ Fill(outputs[0], [&scales, &input](const Index &id) {
+ Index in_idx;
+ in_idx.resize(4);
+ for (int i = 0; i < input.getShape().rank(); i++)
+ {
+ in_idx.at(i) = static_cast<int>(floorf(id.at(i) / scales[i]));
+ }
+ return input.at(in_idx);
+ });
+}
+
+void MIRInterpreter::visit(ops::ReduceMeanOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ ReduceMean(inputs[0], op, outputs[0]);
+}
+
+void MIRInterpreter::visit(ops::TransposeOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ Transpose(inputs[0], op, outputs[0]);
+}
+
+void MIRInterpreter::visit(ops::GatherOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ Gather(inputs[0], inputs[1], op, outputs[0]);
+}
+
+void MIRInterpreter::visit(ops::LeakyReluOp &op)
+{
+ auto args = getInputTensors(op);
+ auto results = allocateOutputTensors(op);
+ LeakyReLU(args[0], op.getAlpha(), results[0]);
+}
+
+void MIRInterpreter::visit(ops::OutputOp &op)
+{
+ assert(_tensors.find(op.getInput(0)) != _tensors.end());
+}
+
+void MIRInterpreter::visit(ops::AddOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ Add(inputs[0], inputs[1], outputs[0]);
+}
+
+void MIRInterpreter::visit(mir::ops::DivOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ Div(inputs[0], inputs[1], outputs[0]);
+}
+
+void MIRInterpreter::visit(mir::ops::MaxOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ Max(inputs[0], inputs[1], outputs[0]);
+}
+
+void MIRInterpreter::visit(mir::ops::MulOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ Mul(inputs[0], inputs[1], outputs[0]);
+}
+
+void MIRInterpreter::visit(mir::ops::SubOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ Sub(inputs[0], inputs[1], outputs[0]);
+}
+
+void MIRInterpreter::visit(mir::ops::DequantizeOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ Dequantize(inputs[0], outputs[0]);
+}
+
+void MIRInterpreter::visit(mir::ops::QuantizeOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ Quantize(inputs[0], outputs[0]);
+}
+
+void MIRInterpreter::visit(mir::ops::HardSwishOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ HardSwish(inputs[0], outputs[0]);
+}
+
+void MIRInterpreter::visit(mir::ops::GreaterOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ Greater(inputs[0], inputs[1], outputs[0]);
+}
+
+void MIRInterpreter::visit(mir::ops::LessOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ Less(inputs[0], inputs[1], outputs[0]);
+}
+
+void MIRInterpreter::visit(mir::ops::EqualOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ Equal(inputs[0], inputs[1], outputs[0]);
+}
+
+void MIRInterpreter::visit(mir::ops::AbsOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ Abs(inputs[0], outputs[0]);
+}
+
+void MIRInterpreter::visit(mir::ops::BroadcastOp &op)
+{
+ auto inputs = getInputTensors(op);
+ auto outputs = allocateOutputTensors(op);
+ outputs[0].get() = TensorVariant{inputs[0], op.getOutputShape(0)};
+}
+
+void MIRInterpreter::visit_fallback(mir::Operation &) { throw std::runtime_error("NYI operation"); }
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Abs.cpp b/compiler/mir-interpreter/src/ops/Abs.cpp
new file mode 100644
index 000000000..547009ffd
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Abs.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Abs.h"
+#include "Common.h"
+
+#include <mir/ShapeRange.h>
+#include <mir/Tensor.h>
+
+#include <cmath>
+
+namespace mir_interpreter
+{
+
+template <typename T> struct AbsImpl
+{
+ static void run(const mir::TensorVariant &arg, mir::TensorVariant &result)
+ {
+ mir::Tensor<T> arg_accessor(arg);
+ mir::Tensor<T> res_accessor(result);
+
+ for (const auto &index : mir::ShapeRange(result.getShape()))
+ {
+ res_accessor.at(index) = std::abs(arg_accessor.at(index));
+ }
+ }
+};
+
+template <> struct AbsImpl<uint8_t>
+{
+ static void run(const mir::TensorVariant &arg, mir::TensorVariant &result)
+ {
+ throw std::runtime_error{"NYI"};
+ }
+};
+
+void Abs(const mir::TensorVariant &arg, mir::TensorVariant &result)
+{
+ dispatch<AbsImpl>(arg.getElementType(), arg, result);
+};
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Abs.h b/compiler/mir-interpreter/src/ops/Abs.h
new file mode 100644
index 000000000..1ba59e647
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Abs.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_ABS_
+#define _NNC_CORE_BACKEND_INTERPRETER_ABS_
+
+#include <mir/TensorVariant.h>
+
+namespace mir_interpreter
+{
+
+void Abs(const mir::TensorVariant &arg, mir::TensorVariant &result);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_ABS_
diff --git a/compiler/mir-interpreter/src/ops/Add.cpp b/compiler/mir-interpreter/src/ops/Add.cpp
new file mode 100644
index 000000000..631b854b7
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Add.cpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Add.h"
+#include "Common.h"
+
+#include "QuantizationHelpers.h"
+#include "mir/Tensor.h"
+#include "mir/ShapeRange.h"
+
+#include <cmath>
+
+namespace mir_interpreter
+{
+
+using namespace mir;
+
+template <typename T> struct AddImpl
+{
+ static void run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res);
+};
+
+template <typename T>
+void AddImpl<T>::run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+{
+ TensorVariant broadcasted_lhs(lhs, res.getShape());
+ TensorVariant broadcasted_rhs(rhs, res.getShape());
+ Tensor<T> lhs_accessor(broadcasted_lhs);
+ Tensor<T> rhs_accessor(broadcasted_rhs);
+ Tensor<T> res_accessor(res);
+
+ for (const auto &index : ShapeRange(res.getShape()))
+ {
+ res_accessor.at(index) = lhs_accessor.at(index) + rhs_accessor.at(index);
+ }
+}
+
+template <> struct AddImpl<uint8_t>
+{
+ static void run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res);
+};
+
+void AddImpl<uint8_t>::run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+{
+ const auto &lhs_type = lhs.getType();
+ const auto &rhs_type = rhs.getType();
+ const auto &res_type = res.getType();
+
+ assert(lhs_type.isQuantized());
+ assert(rhs_type.isQuantized());
+ assert(res_type.isQuantized());
+
+ int32_t lhs_offset = -lhs_type.getQuantization().getZeroPoint();
+ int32_t rhs_offset = -rhs_type.getQuantization().getZeroPoint();
+ int32_t output_offset = res_type.getQuantization().getZeroPoint();
+
+ double lhs_scale = lhs_type.getQuantization().getScale();
+ double rhs_scale = rhs_type.getQuantization().getScale();
+ double output_scale = res_type.getQuantization().getScale();
+
+ int left_shift = 20;
+ const double twice_max_input_scale = 2 * std::max(lhs_scale, rhs_scale);
+ const double real_lhs_multiplier = lhs_scale / twice_max_input_scale;
+ const double real_rhs_multiplier = rhs_scale / twice_max_input_scale;
+ const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+ int32_t lhs_multiplier = 0;
+ int32_t rhs_multiplier = 0;
+ int32_t output_multiplier = 0;
+ int lhs_shift = 0;
+ int rhs_shift = 0;
+ int output_shift = 0;
+
+ QuantizeMultiplierSmallerThanOneExp(real_lhs_multiplier, &lhs_multiplier, &lhs_shift);
+ QuantizeMultiplierSmallerThanOneExp(real_rhs_multiplier, &rhs_multiplier, &rhs_shift);
+ QuantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+ TensorVariant broadcasted_lhs(lhs, res_type.getShape());
+ TensorVariant broadcasted_rhs(rhs, res_type.getShape());
+
+ Tensor<uint8_t> lhs_accessor(broadcasted_lhs);
+ Tensor<uint8_t> rhs_accessor(broadcasted_rhs);
+ Tensor<uint8_t> res_accessor(res);
+
+ int32_t output_min = std::numeric_limits<uint8_t>::min();
+ int32_t output_max = std::numeric_limits<uint8_t>::max();
+
+ for (const auto &index : ShapeRange(res_type.getShape()))
+ {
+ const int32_t lhs_val = lhs_accessor.at(index) + lhs_offset;
+ const int32_t rhs_val = rhs_accessor.at(index) + rhs_offset;
+ const int32_t shifted_lhs_val = lhs_val * (1 << left_shift);
+ const int32_t shifted_rhs_val = rhs_val * (1 << left_shift);
+ const int32_t scaled_lhs_val =
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(shifted_lhs_val, lhs_multiplier, lhs_shift);
+ const int32_t scaled_rhs_val =
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(shifted_rhs_val, rhs_multiplier, rhs_shift);
+ const int32_t raw_sum = scaled_lhs_val + scaled_rhs_val;
+ const int32_t raw_output =
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(raw_sum, output_multiplier, output_shift) +
+ output_offset;
+ const int32_t clamped_output = std::min(output_max, std::max(output_min, raw_output));
+ res_accessor.at(index) = static_cast<uint8_t>(clamped_output);
+ }
+}
+
+void Add(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+{
+ if (lhs.getElementType() != rhs.getElementType())
+ {
+ throw std::runtime_error{"Add with different input types is unsupported"};
+ }
+ dispatch<AddImpl>(res.getElementType(), lhs, rhs, res);
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Add.h b/compiler/mir-interpreter/src/ops/Add.h
new file mode 100644
index 000000000..48508226f
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Add.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_ADD_
+#define _NNC_CORE_BACKEND_INTERPRETER_ADD_
+
+#include "mir/TensorVariant.h"
+
+namespace mir_interpreter
+{
+
+void Add(const mir::TensorVariant &lhs, const mir::TensorVariant &rhs, mir::TensorVariant &res);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_ADD_
diff --git a/compiler/mir-interpreter/src/ops/AvgPool2D.cpp b/compiler/mir-interpreter/src/ops/AvgPool2D.cpp
new file mode 100644
index 000000000..3f1d65100
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/AvgPool2D.cpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AvgPool2D.h"
+#include "Common.h"
+
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+
+namespace mir_interpreter
+{
+
+using namespace mir;
+
+template <typename T> class AvgPool2DImpl
+{
+public:
+ static void run(const mir::ops::AvgPool2DOp &op, const mir::TensorVariant &input_var,
+ mir::TensorVariant &output);
+};
+
+template <typename T>
+void AvgPool2DImpl<T>::run(const ops::AvgPool2DOp &op, const TensorVariant &input_var,
+ TensorVariant &output)
+{
+ const auto &input_shape = op.getInputShape(0);
+ const auto &output_shape = op.getOutputShape(0);
+ const auto &window_size = op.getWindowSize();
+ const auto &strides = op.getStrides();
+ const auto &padding_before = op.getPaddingBefore();
+ const auto &padding_after = op.getPaddingAfter();
+ (void)padding_after;
+
+ constexpr int num_spatial_dims = 2;
+ assert(input_var.getShape().rank() == 4);
+ assert(window_size.size() == num_spatial_dims);
+ assert(strides.size() == num_spatial_dims);
+ assert(padding_before.size() == num_spatial_dims);
+ assert(padding_after.size() == num_spatial_dims);
+
+ Tensor<T> res_accessor(output);
+ Tensor<T> input(input_var);
+
+ ShapeRange in_range(input_shape);
+ Index in_index(input_shape.rank());
+
+ for (const auto &out_index : ShapeRange(output_shape))
+ {
+ T result = 0;
+ size_t num_elements = 0;
+
+ // Assuming NHWC format.
+ in_index.at(0) = out_index.at(0);
+ in_index.at(3) = out_index.at(3);
+
+ for (const auto &window_index : ShapeRange(Shape(window_size)))
+ {
+ // Assuming NHWC format.
+ for (int i = 0; i < num_spatial_dims; ++i)
+ in_index.at(1 + i) =
+ out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
+
+ if (in_range.contains(in_index))
+ {
+ num_elements++;
+ result += input.at(in_index);
+ }
+ else if (op.getIncludePad())
+ {
+ num_elements++;
+ }
+ }
+
+ result /= num_elements;
+ res_accessor.at(out_index) = result;
+ }
+}
+
+template <> struct AvgPool2DImpl<uint8_t>
+{
+ static void run(const mir::ops::AvgPool2DOp &op, const mir::TensorVariant &input,
+ mir::TensorVariant &output);
+};
+
+void AvgPool2DImpl<uint8_t>::run(const ops::AvgPool2DOp &op, const TensorVariant &input,
+ TensorVariant &output)
+{
+ const auto &input_type = input.getType();
+ const auto &output_type = op.getOutput(0)->getType();
+ (void)input_type;
+
+ assert(input_type.isQuantized());
+ assert(output_type.isQuantized());
+ assert(input_type.getElementType() == DataType::UINT8);
+
+ const auto &input_shape = op.getInputShape(0);
+ const auto &output_shape = op.getOutputShape(0);
+ const auto &window_size = op.getWindowSize();
+ const auto &strides = op.getStrides();
+ const auto &padding_before = op.getPaddingBefore();
+ const auto &padding_after = op.getPaddingAfter();
+ (void)padding_after;
+
+ constexpr int num_spatial_dims = 2;
+ assert(input.getShape().rank() == 4);
+ assert(window_size.size() == num_spatial_dims);
+ assert(strides.size() == num_spatial_dims);
+ assert(padding_before.size() == num_spatial_dims);
+ assert(padding_after.size() == num_spatial_dims);
+
+ Tensor<uint8_t> input_accessor(input);
+ Tensor<uint8_t> res_accessor(output);
+
+ ShapeRange in_range(input_shape);
+ Index in_index(input_shape.rank());
+
+ int32_t output_min = std::numeric_limits<uint8_t>::min();
+ int32_t output_max = std::numeric_limits<uint8_t>::max();
+
+ for (const auto &out_index : ShapeRange(output_shape))
+ {
+ int32_t result = 0;
+ size_t num_elements = 0;
+
+ // Assuming NHWC format.
+ in_index.at(0) = out_index.at(0);
+ in_index.at(3) = out_index.at(3);
+
+ for (const auto &window_index : ShapeRange(Shape(window_size)))
+ {
+ // Assuming NHWC format.
+ for (int i = 0; i < num_spatial_dims; ++i)
+ in_index.at(1 + i) =
+ out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
+
+ if (in_range.contains(in_index))
+ {
+ num_elements++;
+ result += input_accessor.at(in_index);
+ }
+ else if (op.getIncludePad())
+ {
+ num_elements++;
+ }
+ }
+ result = (result + num_elements / 2) / num_elements;
+ result = std::max(result, output_min);
+ result = std::min(result, output_max);
+ res_accessor.at(out_index) = static_cast<uint8_t>(result);
+ }
+}
+
+void AvgPool2D(const mir::ops::AvgPool2DOp &op, const mir::TensorVariant &input,
+ mir::TensorVariant &output)
+{
+ dispatch<AvgPool2DImpl>(output.getElementType(), op, input, output);
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/AvgPool2D.h b/compiler/mir-interpreter/src/ops/AvgPool2D.h
new file mode 100644
index 000000000..b30574cee
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/AvgPool2D.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_AVG_POOL_2D_
+#define _NNC_CORE_BACKEND_INTERPRETER_AVG_POOL_2D_
+
+#include "mir/ops/AvgPool2DOp.h"
+#include "mir/TensorVariant.h"
+
+namespace mir_interpreter
+{
+
+void AvgPool2D(const mir::ops::AvgPool2DOp &op, const mir::TensorVariant &input,
+ mir::TensorVariant &output);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_AVG_POOL_2D_
diff --git a/compiler/mir-interpreter/src/ops/CappedReLU.cpp b/compiler/mir-interpreter/src/ops/CappedReLU.cpp
new file mode 100644
index 000000000..1ac95ac16
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/CappedReLU.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CappedReLU.h"
+
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+
+#include "Common.h"
+
+#include <algorithm>
+#include <cstdint>
+
+namespace mir_interpreter
+{
+
+template <typename T> struct CappedReLUImpl
+{
+ static void run(const mir::TensorVariant &arg, float cap, mir::TensorVariant &result);
+};
+
+template <typename T>
+void CappedReLUImpl<T>::run(const mir::TensorVariant &arg, float cap, mir::TensorVariant &result)
+{
+ mir::Tensor<T> arg_accessor(arg);
+ mir::Tensor<T> res_accessor(result);
+
+ for (const auto &index : mir::ShapeRange(result.getShape()))
+ {
+ res_accessor.at(index) = std::min(std::max(arg_accessor.at(index), T(0)), static_cast<T>(cap));
+ }
+}
+
+static float dequantize(uint8_t x, const mir::AffineQuantization &q)
+{
+ return (static_cast<int>(x) - q.getZeroPoint()) * q.getScale();
+}
+
+static uint8_t quantize(float x, const mir::AffineQuantization &q)
+{
+ return (static_cast<float>(x) / q.getScale() + q.getZeroPoint());
+}
+
+template <> struct CappedReLUImpl<uint8_t>
+{
+ static void run(const mir::TensorVariant &arg, float cap, mir::TensorVariant &result)
+ {
+ mir::Tensor<uint8_t> arg_accessor(arg);
+ mir::Tensor<uint8_t> res_accessor(result);
+
+ auto quant_info = arg.getType().getQuantization();
+ assert(!quant_info.empty());
+
+ for (const auto &index : mir::ShapeRange(result.getShape()))
+ {
+ auto value = dequantize(arg_accessor.at(index), quant_info);
+ auto out_value =
+ quantize(std::min(std::max(value, 0.0f), cap), result.getType().getQuantization());
+ res_accessor.at(index) = out_value;
+ }
+ }
+};
+
+void CappedReLU(const mir::TensorVariant &arg, float cap, mir::TensorVariant &result)
+{
+ dispatch<CappedReLUImpl>(arg.getElementType(), arg, cap, result);
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/CappedReLU.h b/compiler/mir-interpreter/src/ops/CappedReLU.h
new file mode 100644
index 000000000..ffb756d2a
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/CappedReLU.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_CAPPEDRELU_
+#define _NNC_CORE_BACKEND_INTERPRETER_CAPPEDRELU_
+
+#include <mir/TensorVariant.h>
+
+namespace mir_interpreter
+{
+
+void CappedReLU(const mir::TensorVariant &arg, float cap, mir::TensorVariant &result);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_CAPPEDRELU_
diff --git a/compiler/mir-interpreter/src/ops/Common.cpp b/compiler/mir-interpreter/src/ops/Common.cpp
new file mode 100644
index 000000000..dae207f2e
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Common.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cassert>
+
+#include "Common.h"
+
+namespace mir_interpreter
+{
+
+using namespace mir;
+
+Index shift(const Index &in_index, const Shape &shift_from)
+{
+ Index index = in_index;
+ assert(index.rank() == shift_from.rank());
+ for (int32_t d = 0; d < in_index.rank(); ++d)
+ {
+ index.at(d) = index.at(d) + shift_from.dim(d);
+ }
+ return index;
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Common.h b/compiler/mir-interpreter/src/ops/Common.h
new file mode 100644
index 000000000..43336216e
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Common.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_COMMON_
+#define _NNC_CORE_BACKEND_INTERPRETER_COMMON_
+
+#include "mir/Tensor.h"
+#include "mir/TensorVariant.h"
+#include "mir/DataType.h"
+#include "mir/Shape.h"
+#include "mir/Index.h"
+
+namespace mir_interpreter
+{
+
+template <template <typename> class F, typename... Args>
+void dispatch(mir::DataType dt, Args &&... args)
+{
+ switch (dt)
+ {
+ case mir::DataType::FLOAT32:
+ return F<float>::run(std::forward<Args>(args)...);
+ case mir::DataType::FLOAT64:
+ return F<double>::run(std::forward<Args>(args)...);
+ case mir::DataType::INT32:
+ return F<int32_t>::run(std::forward<Args>(args)...);
+ case mir::DataType::INT64:
+ return F<int64_t>::run(std::forward<Args>(args)...);
+ case mir::DataType::UINT8:
+ return F<uint8_t>::run(std::forward<Args>(args)...);
+ case mir::DataType::UNKNOWN:
+ throw std::runtime_error{"Unknown datatype met during operation execution"};
+ default:
+ throw std::runtime_error{"mir::DataType enum mismatch"};
+ }
+}
+
+template <typename T> void erase(mir::TensorVariant &tv)
+{
+ size_t element_count = tv.getShape().numElements();
+ for (size_t i = 0; i < element_count; ++i)
+ {
+ auto ptr = tv.atOffset(i);
+ *reinterpret_cast<T *>(ptr) = 0;
+ }
+}
+
+mir::Index shift(const mir::Index &in_index, const mir::Shape &shift_from);
+
+} // namespace mir_interpreter
+
+#endif // _NNC_CORE_BACKEND_INTERPRETER_COMMON_
diff --git a/compiler/mir-interpreter/src/ops/Concat.cpp b/compiler/mir-interpreter/src/ops/Concat.cpp
new file mode 100644
index 000000000..99fe00c31
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Concat.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Concat.h"
+#include "Common.h"
+
+#include <cmath>
+#include <cstring>
+
+namespace mir_interpreter
+{
+
+template <typename T> struct ConcatImpl
+{
+ static void run(const std::vector<std::reference_wrapper<const mir::TensorVariant>> &inputs,
+ int axis, mir::TensorVariant &output);
+};
+
+template <typename T>
+void ConcatImpl<T>::run(const std::vector<std::reference_wrapper<const mir::TensorVariant>> &inputs,
+ int axis, mir::TensorVariant &output)
+{
+ const auto &output_shape = output.getShape();
+ const size_t inputs_count = inputs.size();
+ const int32_t concat_dims = output_shape.rank();
+ int64_t concat_size = 0;
+ for (size_t i = 0; i < inputs_count; i++)
+ {
+ const auto &input_shape = inputs[i].get().getShape();
+ assert(input_shape.rank() == concat_dims);
+ for (int32_t j = 0; j < concat_dims; j++)
+ {
+ if (j != axis)
+ {
+ assert(input_shape.dim(j) == output_shape.dim(j));
+ }
+ }
+ concat_size += input_shape.dim(axis);
+ }
+ assert(concat_size == output_shape.dim(axis));
+ // Outer size before axis
+ int32_t outer_size = 1;
+ for (int32_t i = 0; i < axis; i++)
+ outer_size *= output_shape.dim(i);
+ // Inner size after axis
+ int32_t base_inner_size = 1;
+ for (int32_t i = axis + 1; i < concat_dims; i++)
+ base_inner_size *= output_shape.dim(i);
+ // flatten = outer_size * dim(axis) * base_inner_size;
+ std::vector<int32_t> copy_sizes;
+ std::vector<char *> input_ptrs;
+ for (size_t i = 0; i < inputs_count; i++)
+ {
+ const auto input_shape = inputs[i].get().getShape();
+ copy_sizes.push_back(input_shape.dim(axis) * base_inner_size);
+ input_ptrs.push_back(inputs[i].get().atOffset(0));
+ }
+
+ char *output_ptr = output.atOffset(0);
+ const size_t elem_size = inputs[0].get().getElementSize();
+ for (int32_t i = 0; i < outer_size; i++)
+ {
+ for (size_t j = 0; j < inputs_count; j++)
+ {
+ std::memcpy(output_ptr, input_ptrs[j], copy_sizes[j] * elem_size);
+ output_ptr += copy_sizes[j] * elem_size;
+ input_ptrs[j] += copy_sizes[j] * elem_size;
+ }
+ }
+}
+
+template <> struct ConcatImpl<uint8_t>
+{
+ static void run(const std::vector<std::reference_wrapper<const mir::TensorVariant>> &inputs,
+ int axis, mir::TensorVariant &output);
+};
+
+void ConcatImpl<uint8_t>::run(
+ const std::vector<std::reference_wrapper<const mir::TensorVariant>> &inputs, int axis,
+ mir::TensorVariant &output)
+{
+ const size_t inputs_count = inputs.size();
+ std::vector<int32_t> input_zeropoints(inputs_count);
+ std::vector<float> input_scales(inputs_count);
+ const auto &output_shape = output.getShape();
+ const int32_t concat_dimensions = output_shape.rank();
+ int64_t concat_size = 0;
+ for (size_t i = 0; i < inputs_count; i++)
+ {
+ const auto &input_type = inputs[i].get().getType();
+ assert(input_type.isQuantized());
+ assert(input_type.getElementType() == mir::DataType::UINT8);
+ const auto &input_shape = input_type.getShape();
+ assert(input_shape.rank() == concat_dimensions);
+
+ for (int32_t j = 0; j < concat_dimensions; j++)
+ if (j != axis)
+ assert(input_shape.dim(j) == output_shape.dim(j));
+
+ concat_size += input_shape.dim(axis);
+ input_zeropoints[i] = input_type.getQuantization().getZeroPoint();
+ input_scales[i] = input_type.getQuantization().getScale();
+ }
+ assert(concat_size == output_shape.dim(axis));
+
+ const auto &output_type = output.getType();
+ assert(output_type.isQuantized());
+ int32_t output_zeropoint = output_type.getQuantization().getZeroPoint();
+ float output_scale = output_type.getQuantization().getScale();
+
+ // Outer size before axis
+ int32_t outer_size = 1;
+ for (int32_t i = 0; i < axis; i++)
+ outer_size *= output_shape.dim(i);
+ // Inner size after axis
+ int32_t base_inner_size = 1;
+ for (int32_t i = axis + 1; i < concat_dimensions; i++)
+ base_inner_size *= output_shape.dim(i);
+ // flatten = outer_size * dim(axis) * base_inner_size;
+
+ uint8_t *output_ptr = reinterpret_cast<uint8_t *>(output.atOffset(0));
+
+ const float inverse_output_scale = 1.f / output_scale;
+ for (int k = 0; k < outer_size; k++)
+ {
+ for (size_t i = 0; i < inputs_count; ++i)
+ {
+ const mir::TensorVariant &input = inputs[i];
+ const int copy_size = input.getShape().dim(axis) * base_inner_size;
+ const char *input_data = input.atOffset(0) + k * copy_size;
+ const uint8_t *input_ptr = reinterpret_cast<const uint8_t *>(input_data);
+ if (input_zeropoints[i] == output_zeropoint && input_scales[i] == output_scale)
+ {
+ std::memcpy(output_ptr, input_ptr, copy_size);
+ }
+ else
+ {
+ const float scale = input_scales[i] * inverse_output_scale;
+ const float bias = -input_zeropoints[i] * scale;
+ for (int j = 0; j < copy_size; ++j)
+ {
+ const int32_t value =
+ static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
+ output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
+ }
+ }
+ output_ptr += copy_size;
+ }
+ }
+}
+
+void Concat(const std::vector<std::reference_wrapper<const mir::TensorVariant>> &inputs, int axis,
+ mir::TensorVariant &output)
+{
+ dispatch<ConcatImpl>(inputs[0].get().getElementType(), inputs, axis, output);
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Concat.h b/compiler/mir-interpreter/src/ops/Concat.h
new file mode 100644
index 000000000..587a97809
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Concat.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_CONCAT_IMPL_
+#define _NNC_CORE_BACKEND_INTERPRETER_CONCAT_IMPL_
+
+#include <mir/TensorVariant.h>
+
+namespace mir_interpreter
+{
+
+void Concat(const std::vector<std::reference_wrapper<const mir::TensorVariant>> &inputs, int axis,
+ mir::TensorVariant &output);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_CONCAT_IMPL_
diff --git a/compiler/mir-interpreter/src/ops/Conv2D.cpp b/compiler/mir-interpreter/src/ops/Conv2D.cpp
new file mode 100644
index 000000000..c9b98a56f
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Conv2D.cpp
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Conv2D.h"
+#include "QuantizationHelpers.h"
+#include "Common.h"
+
+#include "mir/Tensor.h"
+
+#include <cmath>
+
+namespace mir_interpreter
+{
+
+using namespace mir;
+
+static std::int32_t calcOffset(const Shape &shape, std::int32_t i0, std::int32_t i1,
+ std::int32_t i2, std::int32_t i3)
+{
+ return ((i0 * shape.dim(1) + i1) * shape.dim(2) + i2) * shape.dim(3) + i3;
+}
+
+template <typename T> struct Conv2DImpl
+{
+ static void run(const TensorVariant &input, const TensorVariant &kernel,
+ const Conv2DOpAttributes &attributes, TensorVariant &result,
+ const TensorVariant *fused_bias);
+};
+
+template <typename T>
+void Conv2DImpl<T>::run(const TensorVariant &input, const TensorVariant &kernel,
+ const Conv2DOpAttributes &attributes, TensorVariant &result,
+ const TensorVariant *fused_bias)
+{
+ const auto *input_data = reinterpret_cast<const T *>(input.atOffset(0));
+ const auto *kernel_data = reinterpret_cast<const T *>(kernel.atOffset(0));
+ auto *result_data = reinterpret_cast<T *>(result.atOffset(0));
+
+ const Shape &input_shape = input.getShape();
+ const Shape &output_shape = result.getShape();
+ const Shape &kernel_shape = kernel.getShape();
+
+ const std::vector<std::int32_t> &strides = attributes.strides;
+ const std::vector<std::int32_t> &padding_before = attributes.padding_before;
+ const std::int32_t num_groups = attributes.num_groups;
+ assert(attributes.data_format == DataFormat::NHWC);
+
+ const std::int32_t batch_size = output_shape.dim(0);
+ const std::int32_t output_height = output_shape.dim(1);
+ const std::int32_t output_width = output_shape.dim(2);
+ const std::int32_t kernel_height = kernel_shape.dim(1);
+ const std::int32_t kernel_width = kernel_shape.dim(2);
+ const std::int32_t input_height = input_shape.dim(1);
+ const std::int32_t input_width = input_shape.dim(2);
+
+ const std::int32_t num_in_channels = input_shape.dim(3);
+ const std::int32_t num_out_channels = output_shape.dim(3);
+
+ assert(num_in_channels % num_groups == 0);
+ assert(num_out_channels % num_groups == 0);
+
+ const std::int32_t out_group_size = num_out_channels / num_groups;
+ const std::int32_t in_group_size = num_in_channels / num_groups;
+
+ assert(kernel_shape.dim(3) == in_group_size);
+ assert(kernel_shape.dim(0) == num_out_channels);
+
+ for (std::int32_t batch = 0; batch < batch_size; ++batch)
+ {
+ for (std::int32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (std::int32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (std::int32_t group = 0; group < num_groups; ++group)
+ {
+ const std::int32_t out_group_offset = group * out_group_size;
+ const std::int32_t in_group_offset = group * in_group_size;
+
+ for (std::int32_t out_c = 0; out_c < out_group_size; ++out_c)
+ {
+ const std::int32_t in_y_origin = (out_y * strides[0]) - padding_before[0];
+ const std::int32_t in_x_origin = (out_x * strides[1]) - padding_before[1];
+
+ T sum = 0.0f;
+
+ for (std::int32_t kernel_y = 0; kernel_y < kernel_height; ++kernel_y)
+ {
+ for (std::int32_t kernel_x = 0; kernel_x < kernel_width; ++kernel_x)
+ {
+ for (std::int32_t in_c = 0; in_c < in_group_size; ++in_c)
+ {
+ const std::int32_t in_y = in_y_origin + kernel_y;
+ const std::int32_t in_x = in_x_origin + kernel_x;
+
+ if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+ {
+ const std::int32_t in_offset =
+ calcOffset(input_shape, batch, in_y, in_x, in_group_offset + in_c);
+ const std::int32_t kernel_offset = calcOffset(
+ kernel_shape, out_group_offset + out_c, kernel_y, kernel_x, in_c);
+ const T input_val = input_data[in_offset];
+ const T kernel_val = kernel_data[kernel_offset];
+ sum += kernel_val * input_val;
+ }
+ }
+ }
+ }
+
+ const std::int32_t out_offset =
+ calcOffset(output_shape, batch, out_y, out_x, out_group_offset + out_c);
+ result_data[out_offset] = sum;
+ }
+ }
+ }
+ }
+ }
+}
+
+template <> struct Conv2DImpl<uint8_t>
+{
+ static void run(const TensorVariant &input, const TensorVariant &kernel,
+ const Conv2DOpAttributes &attributes, TensorVariant &result,
+ const TensorVariant *fused_bias);
+};
+
+void Conv2DImpl<uint8_t>::run(const TensorVariant &input, const TensorVariant &kernel,
+ const Conv2DOpAttributes &attributes, TensorVariant &result,
+ const TensorVariant *fused_bias)
+{
+ if (!fused_bias)
+ {
+ throw std::runtime_error{"Quantized Conv2D cannot be executed without fused bias"};
+ }
+
+ const auto &input_type = input.getType();
+ const auto &kernel_type = kernel.getType();
+ const auto &bias_type = fused_bias->getType();
+ const auto &output_type = result.getType();
+ (void)bias_type;
+
+ assert(input_type.isQuantized());
+ assert(kernel_type.isQuantized());
+ assert(bias_type.isQuantized());
+ assert(output_type.isQuantized());
+ assert(input_type.getElementType() == DataType::UINT8);
+ assert(kernel_type.getElementType() == DataType::UINT8);
+ assert(bias_type.getElementType() == DataType::INT32);
+ assert(output_type.getElementType() == DataType::UINT8);
+
+ int32_t input_offset = -input_type.getQuantization().getZeroPoint();
+ int32_t kernel_offset = -kernel_type.getQuantization().getZeroPoint();
+ int32_t output_offset = output_type.getQuantization().getZeroPoint();
+
+ double input_scale = input_type.getQuantization().getScale();
+ double kernel_scale = kernel_type.getQuantization().getScale();
+ double output_scale = output_type.getQuantization().getScale();
+
+ double real_multiplier = input_scale * kernel_scale / output_scale;
+ int32_t output_multiplier = 0;
+ int output_shift = 0;
+ QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+ const Shape &in_shape = input.getShape();
+ const Shape &kernel_shape = kernel.getShape();
+ const Shape &out_shape = result.getShape();
+ const auto &strides = attributes.strides;
+ const std::vector<int32_t> &pads = attributes.padding_before;
+ assert(attributes.num_groups == 1);
+ assert(attributes.data_format == DataFormat::NHWC);
+
+ assert(in_shape.rank() == 4);
+ assert(kernel_shape.rank() == 4);
+ assert(kernel_shape.dim(3) == in_shape.dim(3));
+ assert(kernel_shape.dim(0) == out_shape.dim(3));
+ assert(strides.size() == 2);
+ assert(pads.size() == 2);
+
+ int32_t stride_height = strides[0];
+ int32_t stride_width = strides[1];
+
+ int32_t pad_height = pads[0];
+ int32_t pad_width = pads[1];
+
+ int32_t input_height = in_shape.dim(1);
+ int32_t input_width = in_shape.dim(2);
+
+ Tensor<uint8_t> input_accessor(input);
+ Tensor<uint8_t> kernel_accessor(kernel);
+ Tensor<int32_t> bias_accessor(*fused_bias);
+ Tensor<uint8_t> res_accessor(result);
+
+ int32_t output_min = std::numeric_limits<uint8_t>::min();
+ int32_t output_max = std::numeric_limits<uint8_t>::max();
+
+ for (int batch = 0; batch < out_shape.dim(0); ++batch)
+ {
+ for (int out_y = 0; out_y < out_shape.dim(1); ++out_y)
+ {
+ for (int out_x = 0; out_x < out_shape.dim(2); ++out_x)
+ {
+ for (int out_channel = 0; out_channel < out_shape.dim(3); ++out_channel)
+ {
+ const int in_x_origin = (out_x * stride_width) - pad_width;
+ const int in_y_origin = (out_y * stride_height) - pad_height;
+ int32_t acc = 0;
+ for (int filter_y = 0; filter_y < kernel_shape.dim(1); ++filter_y)
+ {
+ for (int filter_x = 0; filter_x < kernel_shape.dim(2); ++filter_x)
+ {
+ for (int in_channel = 0; in_channel < kernel_shape.dim(3); ++in_channel)
+ {
+ const int in_x = in_x_origin + filter_x;
+ const int in_y = in_y_origin + filter_y;
+ // If the location is outside the bounds of the input image,
+ // use zero as a default value.
+ if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
+ {
+ Index in_index{batch, in_y, in_x, in_channel};
+ Index ker_index{out_channel, filter_y, filter_x, in_channel};
+ int32_t input_val = input_accessor.at(in_index);
+ int32_t kernel_val = kernel_accessor.at(ker_index);
+ acc += (kernel_val + kernel_offset) * (input_val + input_offset);
+ }
+ }
+ }
+ }
+ acc += bias_accessor.at(Index{out_channel});
+ acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+ acc += output_offset;
+ acc = std::max(acc, output_min);
+ acc = std::min(acc, output_max);
+ Index out_index{batch, out_y, out_x, out_channel};
+ res_accessor.at(out_index) = static_cast<uint8_t>(acc);
+ }
+ }
+ }
+ }
+}
+
+void Conv2D(const mir::TensorVariant &input, const mir::TensorVariant &kernel,
+ const mir::Conv2DOpAttributes &attributes, mir::TensorVariant &result,
+ const mir::TensorVariant *fused_bias)
+{
+ dispatch<Conv2DImpl>(result.getElementType(), input, kernel, attributes, result, fused_bias);
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Conv2D.h b/compiler/mir-interpreter/src/ops/Conv2D.h
new file mode 100644
index 000000000..ebb550816
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Conv2D.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_CONV2D_IMPL_
+#define _NNC_CORE_BACKEND_INTERPRETER_CONV2D_IMPL_
+
+#include "mir/ops/Conv2DOp.h"
+#include "mir/TensorVariant.h"
+
+namespace mir_interpreter
+{
+
+void Conv2D(const mir::TensorVariant &input, const mir::TensorVariant &kernel,
+ const mir::Conv2DOpAttributes &attributes, mir::TensorVariant &result,
+ const mir::TensorVariant *fused_bias);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_CONV2D_IMPL
diff --git a/compiler/mir-interpreter/src/ops/DeConv2D.cpp b/compiler/mir-interpreter/src/ops/DeConv2D.cpp
new file mode 100644
index 000000000..746d8c87c
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/DeConv2D.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DeConv2D.h"
+#include "Common.h"
+
+#include "mir/TensorUtil.h"
+
+#include <cstdint>
+
+namespace mir_interpreter
+{
+
+using namespace mir;
+using std::int32_t;
+
+static int32_t calcOffset(const Shape &shape, int32_t i0, int32_t i1, int32_t i2, int32_t i3)
+{
+ return ((i0 * shape.dim(1) + i1) * shape.dim(2) + i2) * shape.dim(3) + i3;
+}
+
+template <typename T> struct DeConv2DImpl
+{
+ static void run(const TensorVariant &input, const TensorVariant &kernel,
+ const Deconv2DOpAttributes &attributes, TensorVariant &output);
+};
+
+template <typename T>
+void DeConv2DImpl<T>::run(const TensorVariant &input, const TensorVariant &kernel,
+ const Deconv2DOpAttributes &attributes, TensorVariant &output)
+{
+ // [H, W, Co, Ci] -> [Ci, H, W, Co]
+ TensorVariant transposed_kernel = transposeTensor<3, 0, 1, 2>(kernel);
+
+ const auto *input_data = reinterpret_cast<const T *>(input.atOffset(0));
+ const auto *kernel_data = reinterpret_cast<const T *>(transposed_kernel.atOffset(0));
+ auto *output_data = reinterpret_cast<T *>(output.atOffset(0));
+
+ const Shape &input_shape = input.getShape();
+ const Shape &output_shape = output.getShape();
+ const Shape &kernel_shape = transposed_kernel.getShape();
+
+ const std::vector<int32_t> &strides = attributes.strides;
+ const std::vector<int32_t> &padding_before = attributes.padding_before;
+ assert(attributes.data_format == DataFormat::NHWC);
+
+ const int32_t batch_size = output_shape.dim(0);
+ const int32_t output_height = output_shape.dim(1);
+ const int32_t output_width = output_shape.dim(2);
+ const int32_t kernel_height = kernel_shape.dim(1);
+ const int32_t kernel_width = kernel_shape.dim(2);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+
+ const int32_t num_in_channels = input_shape.dim(3);
+ const int32_t num_out_channels = output_shape.dim(3);
+
+ assert(kernel_shape.dim(0) == num_in_channels);
+ assert(kernel_shape.dim(3) == num_out_channels);
+
+ erase<T>(output);
+
+ for (int32_t batch = 0; batch < batch_size; ++batch)
+ {
+ for (int32_t in_y = 0; in_y < input_height; ++in_y)
+ {
+ for (int32_t in_x = 0; in_x < input_width; ++in_x)
+ {
+ for (int32_t in_c = 0; in_c < num_in_channels; ++in_c)
+ {
+ const T input_val = input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+ const int32_t out_y_origin = in_y * strides[0] - padding_before[0];
+ const int32_t out_x_origin = in_x * strides[1] - padding_before[1];
+
+ for (int32_t kernel_y = 0; kernel_y < kernel_height; ++kernel_y)
+ {
+ for (int32_t kernel_x = 0; kernel_x < kernel_width; ++kernel_x)
+ {
+ const int32_t out_y = out_y_origin + kernel_y;
+ const int32_t out_x = out_x_origin + kernel_x;
+
+ if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
+ {
+ for (int32_t out_c = 0; out_c < num_out_channels; ++out_c)
+ {
+ const int32_t kernel_offset =
+ calcOffset(kernel_shape, in_c, kernel_y, kernel_x, out_c);
+ const int32_t output_offset =
+ calcOffset(output_shape, batch, out_y, out_x, out_c);
+ const T kernel_val = kernel_data[kernel_offset];
+ output_data[output_offset] += input_val * kernel_val;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+void DeConv2D(const TensorVariant &input, const TensorVariant &kernel,
+ const Deconv2DOpAttributes &attributes, TensorVariant &output)
+{
+ dispatch<DeConv2DImpl>(output.getElementType(), input, kernel, attributes, output);
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/DeConv2D.h b/compiler/mir-interpreter/src/ops/DeConv2D.h
new file mode 100644
index 000000000..be797fcef
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/DeConv2D.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_DECONV2D_IMPL_
+#define _NNC_CORE_BACKEND_INTERPRETER_DECONV2D_IMPL_
+
+#include "mir/Attributes.h"
+#include "mir/TensorVariant.h"
+
+namespace mir_interpreter
+{
+
+/**
+ * @brief Transposed convolution (or Deconvolution)
+ * @param input The Input tensor
+ * @param op The DeConvolution operation object
+ *
+ * This is basically the backward pass for the convolution operation,
+ * hence all the indexing can be deducted by expressing the input index
+ * of Conv in terms of it's output index.
+ */
+
+void DeConv2D(const mir::TensorVariant &input, const mir::TensorVariant &kernel,
+ const mir::Deconv2DOpAttributes &attributes, mir::TensorVariant &output);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_DECONV2D_IMPL_
diff --git a/compiler/mir-interpreter/src/ops/DepthwiseConv2D.cpp b/compiler/mir-interpreter/src/ops/DepthwiseConv2D.cpp
new file mode 100644
index 000000000..4b6df3478
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/DepthwiseConv2D.cpp
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthwiseConv2D.h"
+#include "QuantizationHelpers.h"
+#include "Common.h"
+
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+
+#include <cmath>
+
+namespace mir_interpreter
+{
+
+using namespace mir;
+
+template <typename T> struct DepthwiseConv2DImpl
+{
+ static void run(const mir::ops::DepthwiseConv2DOp &op, const mir::TensorVariant &inputv,
+ const mir::TensorVariant &kernelv, const mir::TensorVariant *biasv,
+ mir::TensorVariant &output);
+};
+
+template <typename T>
+void DepthwiseConv2DImpl<T>::run(const mir::ops::DepthwiseConv2DOp &op,
+ const mir::TensorVariant &inputv,
+ const mir::TensorVariant &kernelv, const mir::TensorVariant *biasv,
+ mir::TensorVariant &output)
+{
+ const Shape &in_shape = op.getInputShape(0);
+ const Shape &kernel_shape = op.getInputShape(1);
+ const Shape &out_shape = op.getOutputShape(0);
+ const auto &strides = op.getStrides();
+ const std::vector<int32_t> &pads = op.getPaddingBefore();
+
+ assert(in_shape.rank() == 4);
+ assert(kernel_shape.rank() == 4);
+ assert(kernel_shape.dim(2) == in_shape.dim(3));
+ assert(in_shape.dim(3) * kernel_shape.dim(3) == out_shape.dim(3));
+ assert(strides.size() == 2);
+ assert(pads.size() == 2);
+
+ int32_t channel_multiplier = kernel_shape.dim(3);
+
+ Tensor<T> res_accessor(output);
+ Tensor<T> input(inputv);
+ Tensor<T> bias(*biasv);
+ Tensor<T> kernel(kernelv);
+
+ ShapeRange in_range(in_shape);
+ ShapeRange kernel_range(kernel_shape);
+ ShapeRange out_range(Shape{out_shape.dim(0), out_shape.dim(1), out_shape.dim(2), 1});
+
+ Index in_index;
+ in_index.resize(4);
+
+ erase<T>(output);
+
+ for (const auto &out_index : out_range)
+ {
+ Index out_index_k = out_index;
+ for (const auto &kernel_index : kernel_range)
+ {
+ in_index.at(0) = out_index.at(0);
+ for (int i = 0; i < 2; ++i)
+ in_index.at(1 + i) = out_index.at(1 + i) * strides[i] + kernel_index.at(i) - pads[i];
+ in_index.at(3) = kernel_index.at(2);
+
+ if (in_range.contains(in_index))
+ {
+ out_index_k.at(3) = kernel_index.at(2) * channel_multiplier + kernel_index.at(3);
+ res_accessor.at(out_index_k) += input.at(in_index) * kernel.at(kernel_index);
+ }
+ }
+ }
+}
+
+template <> struct DepthwiseConv2DImpl<uint8_t>
+{
+ static void run(const mir::ops::DepthwiseConv2DOp &op, const mir::TensorVariant &inputv,
+ const mir::TensorVariant &kernelv, const mir::TensorVariant *biasv,
+ mir::TensorVariant &output);
+};
+
+void DepthwiseConv2DImpl<uint8_t>::run(const mir::ops::DepthwiseConv2DOp &op,
+ const mir::TensorVariant &inputv,
+ const mir::TensorVariant &kernelv,
+ const mir::TensorVariant *biasv, mir::TensorVariant &output)
+{
+ if (!biasv)
+ {
+ throw std::runtime_error{"Unsupported quantized DepthwiseConv2D without fused bias"};
+ }
+
+ const auto &input_type = inputv.getType();
+ const auto &kernel_type = kernelv.getType();
+ const auto &bias_type = biasv->getType();
+ const auto &output_type = op.getOutput(0)->getType();
+ (void)bias_type;
+
+ assert(input_type.isQuantized());
+ assert(kernel_type.isQuantized());
+ assert(bias_type.isQuantized());
+ assert(output_type.isQuantized());
+ assert(input_type.getElementType() == DataType::UINT8);
+ assert(kernel_type.getElementType() == DataType::UINT8);
+ assert(bias_type.getElementType() == DataType::INT32);
+
+ int32_t input_offset = -input_type.getQuantization().getZeroPoint();
+ int32_t kernel_offset = -kernel_type.getQuantization().getZeroPoint();
+ int32_t output_offset = output_type.getQuantization().getZeroPoint();
+
+ double input_scale = input_type.getQuantization().getScale();
+ double kernel_scale = kernel_type.getQuantization().getScale();
+ double output_scale = output_type.getQuantization().getScale();
+
+ double real_multiplier = input_scale * kernel_scale / output_scale;
+ int32_t output_multiplier = 0;
+ int output_shift = 0;
+ QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+ const Shape &in_shape = inputv.getShape();
+ const Shape &kernel_shape = kernelv.getShape();
+ const Shape &out_shape = op.getOutputShape(0);
+ const auto &strides = op.getStrides();
+ const std::vector<int32_t> &pads = op.getPaddingBefore();
+
+ assert(in_shape.rank() == 4);
+ assert(kernel_shape.rank() == 4);
+ assert(kernel_shape.dim(2) == in_shape.dim(3)); // HWIO
+ assert(in_shape.dim(3) * kernel_shape.dim(3) == out_shape.dim(3));
+ assert(strides.size() == 2);
+ assert(pads.size() == 2);
+
+ int32_t stride_height = strides[0];
+ int32_t stride_width = strides[1];
+
+ int32_t pad_height = pads[0];
+ int32_t pad_width = pads[1];
+
+ int32_t input_height = in_shape.dim(1);
+ int32_t input_width = in_shape.dim(2);
+
+ Tensor<uint8_t> input_accessor(inputv);
+ Tensor<uint8_t> kernel_accessor(kernelv);
+ Tensor<int32_t> bias_accessor(*biasv);
+ Tensor<uint8_t> res_accessor(output);
+
+ int32_t output_min = std::numeric_limits<uint8_t>::min();
+ int32_t output_max = std::numeric_limits<uint8_t>::max();
+
+ int batches = out_shape.dim(0);
+ int output_height = out_shape.dim(1);
+ int output_width = out_shape.dim(2);
+ int input_depth = in_shape.dim(3);
+
+ int filter_height = kernel_shape.dim(0); // HWIO
+ int filter_width = kernel_shape.dim(1); // HWIO
+
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int ic = 0; ic < input_depth; ++ic)
+ {
+ const int oc = ic;
+ const int in_x_origin = (out_x * stride_width) - pad_width;
+ const int in_y_origin = (out_y * stride_height) - pad_height;
+ int32_t acc = 0;
+ for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int in_x = in_x_origin + filter_x;
+ const int in_y = in_y_origin + filter_y;
+ // If the location is outside the bounds of the input image,
+ // use zero as a default value.
+ if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
+ {
+ Index in_index{b, in_y, in_x, ic};
+ Index ker_index{filter_y, filter_x, oc, 0}; // HWIO
+ int32_t input_val = input_accessor.at(in_index);
+ int32_t kernel_val = kernel_accessor.at(ker_index);
+ acc += (kernel_val + kernel_offset) * (input_val + input_offset);
+ }
+ }
+ }
+ acc += bias_accessor.at(Index{oc});
+ acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+ acc += output_offset;
+ acc = std::max(acc, output_min);
+ acc = std::min(acc, output_max);
+ Index out_index{b, out_y, out_x, oc};
+ res_accessor.at(out_index) = static_cast<uint8_t>(acc);
+ }
+ }
+ }
+ }
+}
+
+void DepthwiseConv2D(const mir::ops::DepthwiseConv2DOp &op, const mir::TensorVariant &input,
+ const mir::TensorVariant &kernel, mir::TensorVariant &output,
+ const mir::TensorVariant *bias)
+{
+ dispatch<DepthwiseConv2DImpl>(output.getElementType(), op, input, kernel, bias, output);
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/DepthwiseConv2D.h b/compiler/mir-interpreter/src/ops/DepthwiseConv2D.h
new file mode 100644
index 000000000..d89529fc9
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/DepthwiseConv2D.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_DEPTHWISE_CONV2D_IMPL_
+#define _NNC_CORE_BACKEND_INTERPRETER_DEPTHWISE_CONV2D_IMPL_
+
+#include "mir/ops/DepthwiseConv2DOp.h"
+#include "mir/TensorVariant.h"
+
+namespace mir_interpreter
+{
+
+void DepthwiseConv2D(const mir::ops::DepthwiseConv2DOp &op, const mir::TensorVariant &input,
+ const mir::TensorVariant &kernel, mir::TensorVariant &output,
+ const mir::TensorVariant *bias);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_DEPTHWISE_CONV2D_IMPL_
diff --git a/compiler/mir-interpreter/src/ops/Div.cpp b/compiler/mir-interpreter/src/ops/Div.cpp
new file mode 100644
index 000000000..00553e7e0
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Div.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Div.h"
+#include "Common.h"
+
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+
+namespace mir_interpreter
+{
+
+using namespace mir;
+
+template <typename T> struct DivImpl
+{
+ static void run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res);
+};
+
+template <typename T>
+void DivImpl<T>::run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+{
+ TensorVariant broadcasted_lhs(lhs, res.getShape());
+ TensorVariant broadcasted_rhs(rhs, res.getShape());
+ Tensor<T> lhs_accessor(broadcasted_lhs);
+ Tensor<T> rhs_accessor(broadcasted_rhs);
+ Tensor<T> res_accessor(res);
+
+ for (const auto &index : ShapeRange(res.getShape()))
+ {
+ res_accessor.at(index) = lhs_accessor.at(index) / rhs_accessor.at(index);
+ }
+}
+
+template <> struct DivImpl<uint8_t>
+{
+ static void run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+ {
+ // No support for quantized elementwise div yet
+ throw std::runtime_error{"NYI"};
+ }
+};
+
+void Div(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+{
+ dispatch<DivImpl>(res.getElementType(), lhs, rhs, res);
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Div.h b/compiler/mir-interpreter/src/ops/Div.h
new file mode 100644
index 000000000..558e299ec
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Div.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_DIV_
+#define _NNC_CORE_BACKEND_INTERPRETER_DIV_
+
+#include "mir/TensorVariant.h"
+
+namespace mir_interpreter
+{
+
+void Div(const mir::TensorVariant &lhs, const mir::TensorVariant &rhs, mir::TensorVariant &res);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_DIV_
diff --git a/compiler/mir-interpreter/src/ops/ELU.cpp b/compiler/mir-interpreter/src/ops/ELU.cpp
new file mode 100644
index 000000000..0cd76baf4
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/ELU.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReLU.h"
+#include "Common.h"
+
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+
+#include <cmath>
+
+namespace mir_interpreter
+{
+
+template <typename T> struct ELUImpl
+{
+ static void run(const mir::TensorVariant &arg, float alpha, mir::TensorVariant &result);
+};
+
+template <typename T>
+void ELUImpl<T>::run(const mir::TensorVariant &arg, float alpha, mir::TensorVariant &result)
+{
+ mir::Tensor<T> arg_accessor(arg);
+ mir::Tensor<T> res_accessor(result);
+
+ for (const auto &index : mir::ShapeRange(result.getShape()))
+ {
+ const T x = arg_accessor.at(index);
+ res_accessor.at(index) = x < 0 ? alpha * (std::exp(x) - 1) : x;
+ }
+}
+
+void ELU(const mir::TensorVariant &arg, float alpha, mir::TensorVariant &result)
+{
+ dispatch<ELUImpl>(result.getElementType(), arg, alpha, result);
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/ELU.h b/compiler/mir-interpreter/src/ops/ELU.h
new file mode 100644
index 000000000..c6ebae1a7
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/ELU.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_ELU_
+#define _NNC_CORE_BACKEND_INTERPRETER_ELU_
+
+#include <mir/TensorVariant.h>
+
+namespace mir_interpreter
+{
+
+void ELU(const mir::TensorVariant &arg, float alpha, mir::TensorVariant &result);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_ELU_
diff --git a/compiler/mir-interpreter/src/ops/Equal.cpp b/compiler/mir-interpreter/src/ops/Equal.cpp
new file mode 100644
index 000000000..b75ea5543
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Equal.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Equal.h"
+#include "Common.h"
+
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+
+namespace mir_interpreter
+{
+
+using namespace mir;
+
+template <typename T> struct EqualImpl
+{
+ static void run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res);
+};
+
+template <typename T>
+void EqualImpl<T>::run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+{
+ TensorVariant broadcasted_lhs(lhs, res.getShape());
+ TensorVariant broadcasted_rhs(rhs, res.getShape());
+ Tensor<T> lhs_accessor(broadcasted_lhs);
+ Tensor<T> rhs_accessor(broadcasted_rhs);
+ Tensor<uint8_t> res_accessor(res);
+
+ for (const auto &index : ShapeRange(res.getShape()))
+ {
+ res_accessor.at(index) = (lhs_accessor.at(index) == rhs_accessor.at(index));
+ }
+}
+
+void Equal(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+{
+ if (lhs.getElementType() != rhs.getElementType())
+ {
+ throw std::runtime_error{"Equal with different input types is unsupported"};
+ }
+
+ dispatch<EqualImpl>(lhs.getElementType(), lhs, rhs, res);
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Equal.h b/compiler/mir-interpreter/src/ops/Equal.h
new file mode 100644
index 000000000..2d112a2f1
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Equal.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_EQUAL_
+#define _NNC_CORE_BACKEND_INTERPRETER_EQUAL_
+
+#include "mir/TensorVariant.h"
+
+namespace mir_interpreter
+{
+
+void Equal(const mir::TensorVariant &lhs, const mir::TensorVariant &rhs, mir::TensorVariant &res);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_EQUAL_
diff --git a/compiler/mir-interpreter/src/ops/Fill.h b/compiler/mir-interpreter/src/ops/Fill.h
new file mode 100644
index 000000000..6dee25b8a
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Fill.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_FILL_
+#define _NNC_CORE_BACKEND_INTERPRETER_FILL_
+
+#include "Common.h"
+
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+
+namespace mir_interpreter
+{
+
+template <typename T> struct FillImpl
+{
+ template <typename F> static void run(mir::TensorVariant &res, F f)
+ {
+ mir::Tensor<T> res_accessor(res);
+
+ for (const auto &index : mir::ShapeRange(res.getShape()))
+ {
+ res_accessor.at(index) = f(index);
+ }
+ }
+};
+
+template <typename F> void Fill(mir::TensorVariant &t, F f)
+{
+ dispatch<FillImpl>(t.getElementType(), t, f);
+}
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_FILL_
diff --git a/compiler/mir-interpreter/src/ops/FullyConnected.cpp b/compiler/mir-interpreter/src/ops/FullyConnected.cpp
new file mode 100644
index 000000000..9c6ef8dc8
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/FullyConnected.cpp
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnected.h"
+#include "Common.h"
+
+#include "QuantizationHelpers.h"
+
+#include "mir/Tensor.h"
+
+namespace mir_interpreter
+{
+
+template <typename T>
+static void fullyConnected2D(const mir::TensorVariant &input, const mir::TensorVariant &weights,
+ mir::TensorVariant &output)
+{
+ assert(input.getShape().rank() == 2);
+ assert(weights.getShape().rank() == 2);
+ assert(input.getShape().dim(1) == weights.getShape().dim(0));
+
+ auto in_raw = reinterpret_cast<T *>(input.atOffset(0));
+ auto weight_raw = reinterpret_cast<T *>(weights.atOffset(0));
+ auto output_raw = reinterpret_cast<T *>(output.atOffset(0));
+
+ auto rows = output.getShape().dim(0);
+ auto cols = output.getShape().dim(1);
+ auto N = input.getShape().dim(1);
+ auto wcols = weights.getShape().dim(1);
+
+ for (int32_t r = 0; r < rows; ++r)
+ {
+ for (int32_t k = 0; k < N; ++k)
+ {
+ auto in = in_raw[r * N + k];
+
+ for (int32_t c = 0; c < cols; ++c)
+ {
+ output_raw[r * cols + c] += in * weight_raw[k * wcols + c];
+ }
+ }
+ }
+}
+
+template <typename T> struct FullyConnectedImpl
+{
+ static void run(const mir::TensorVariant &inputv, const mir::TensorVariant &weightsv,
+ const mir::ops::FullyConnectedOp &op, mir::TensorVariant &res,
+ const mir::TensorVariant *biasv);
+};
+
+template <typename T>
+void FullyConnectedImpl<T>::run(const mir::TensorVariant &inputv,
+ const mir::TensorVariant &weightsv,
+ const mir::ops::FullyConnectedOp &op, mir::TensorVariant &res,
+ const mir::TensorVariant *biasv)
+{
+ if (biasv)
+ {
+ throw std::runtime_error("non-quantized FullyConnected with fused bias is unsupported");
+ }
+
+ mir::Tensor<T> input{inputv};
+ mir::Tensor<T> weights{weightsv};
+
+ erase<T>(res);
+
+ if (input.getShape().rank() == 2 && weights.getShape().rank() == 2 && res.getShape().rank() == 2)
+ {
+ // optimized case for 2d matrix multiplication
+ fullyConnected2D<T>(inputv, weightsv, res);
+ return;
+ }
+
+ mir::Tensor<T> accessor(res);
+
+ const mir::Shape &in_shape = input.getShape();
+ int32_t in_rank = in_shape.rank();
+
+ const mir::Shape &w_shape = weights.getShape();
+ int32_t w_rank = w_shape.rank();
+
+ assert(in_shape.dim(in_rank - 1) == w_shape.dim(w_rank - 2));
+ (void)in_rank;
+
+ mir::ShapeRange out_range(res.getShape());
+
+ int32_t len = w_shape.dim(w_rank - 2);
+
+ for (auto &out_index : out_range)
+ {
+ mir::Index t_index = out_index;
+ T &output_element = accessor.at(out_index);
+ int32_t col = t_index.at(w_rank - 1);
+ int32_t row = t_index.at(w_rank - 2);
+ for (int32_t i = 0; i < len; ++i)
+ {
+ t_index.at(w_rank - 1) = i;
+ T in = input.at(t_index);
+ t_index.at(w_rank - 1) = col;
+ t_index.at(w_rank - 2) = i;
+ T w = weights.at(t_index);
+ t_index.at(w_rank - 2) = row;
+ output_element += in * w;
+ }
+ }
+}
+
+template <> struct FullyConnectedImpl<uint8_t>
+{
+ static void run(const mir::TensorVariant &inputv, const mir::TensorVariant &weightsv,
+ const mir::ops::FullyConnectedOp &op, mir::TensorVariant &res,
+ const mir::TensorVariant *biasv);
+};
+
+void FullyConnectedImpl<uint8_t>::run(const mir::TensorVariant &inputv,
+ const mir::TensorVariant &weightsv,
+ const mir::ops::FullyConnectedOp &op, mir::TensorVariant &res,
+ const mir::TensorVariant *biasv)
+{
+ if (!biasv)
+ {
+ throw std::runtime_error{"Quantized FullyConnected cannot be executed without fused bias"};
+ }
+
+ const auto &input_type = inputv.getType();
+ const auto &weights_type = weightsv.getType();
+ const auto &bias_type = biasv->getType();
+ const auto &output_type = op.getOutput(0)->getType();
+ (void)bias_type;
+
+ assert(input_type.isQuantized());
+ assert(weights_type.isQuantized());
+ assert(bias_type.isQuantized());
+ assert(output_type.isQuantized());
+ assert(input_type.getElementType() == mir::DataType::UINT8);
+ assert(weights_type.getElementType() == mir::DataType::UINT8);
+ assert(bias_type.getElementType() == mir::DataType::INT32);
+
+ int32_t input_offset = -input_type.getQuantization().getZeroPoint();
+ int32_t weights_offset = -weights_type.getQuantization().getZeroPoint();
+ int32_t output_offset = output_type.getQuantization().getZeroPoint();
+
+ double input_scale = input_type.getQuantization().getScale();
+ double weights_scale = weights_type.getQuantization().getScale();
+ double output_scale = output_type.getQuantization().getScale();
+
+ double real_multiplier = input_scale * weights_scale / output_scale;
+ int32_t output_multiplier = 0;
+ int output_shift = 0;
+ QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+ const mir::Shape &in_shape = inputv.getShape();
+ const mir::Shape &weights_shape = weightsv.getShape();
+ const mir::Shape &out_shape = op.getOutputShape(0);
+
+ const int32_t batches = in_shape.dim(0);
+ assert(in_shape.rank() == 2);
+ assert(weights_shape.rank() == 2);
+ assert(in_shape.dim(1) == weights_shape.dim(0));
+ const int32_t accum_depth = weights_shape.dim(0);
+ const int32_t output_depth = weights_shape.dim(1);
+
+ uint8_t *input_data = reinterpret_cast<uint8_t *>(inputv.atOffset(0));
+ uint8_t *weights_data = reinterpret_cast<uint8_t *>(weightsv.atOffset(0));
+ int32_t *bias_data = reinterpret_cast<int32_t *>(biasv->atOffset(0));
+
+ uint8_t *output_data = reinterpret_cast<uint8_t *>(res.atOffset(0));
+
+ int32_t output_min = std::numeric_limits<uint8_t>::min();
+ int32_t output_max = std::numeric_limits<uint8_t>::max();
+
+ for (int32_t b = 0; b < batches; ++b)
+ {
+ for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+ {
+ int32_t acc = 0;
+ for (int d = 0; d < accum_depth; ++d)
+ {
+ int32_t input_val = input_data[b * accum_depth + d];
+ int32_t weights_val = weights_data[d * output_depth + out_c];
+ acc += (weights_val + weights_offset) * (input_val + input_offset);
+ }
+ acc += bias_data[out_c];
+ acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+ acc += output_offset;
+ acc = std::max(acc, output_min);
+ acc = std::min(acc, output_max);
+ output_data[out_c + output_depth * b] = static_cast<uint8_t>(acc);
+ }
+ }
+}
+
+void FullyConnected(const mir::TensorVariant &input, const mir::TensorVariant &weights,
+ const mir::ops::FullyConnectedOp &op, mir::TensorVariant &res,
+ const mir::TensorVariant *bias)
+{
+ dispatch<FullyConnectedImpl>(res.getElementType(), input, weights, op, res, bias);
+}
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/FullyConnected.h b/compiler/mir-interpreter/src/ops/FullyConnected.h
new file mode 100644
index 000000000..fdfe64265
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/FullyConnected.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_FULLYCONNECTED_
+#define _NNC_CORE_BACKEND_INTERPRETER_FULLYCONNECTED_
+
+#include "mir/ops/FullyConnectedOp.h"
+#include "mir/ShapeRange.h"
+
+namespace mir_interpreter
+{
+
+void FullyConnected(const mir::TensorVariant &input, const mir::TensorVariant &weights,
+ const mir::ops::FullyConnectedOp &op, mir::TensorVariant &res,
+ const mir::TensorVariant *bias);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_FULLYCONNECTED_
diff --git a/compiler/mir-interpreter/src/ops/Gather.cpp b/compiler/mir-interpreter/src/ops/Gather.cpp
new file mode 100644
index 000000000..4328c26b2
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Gather.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Gather.h"
+#include "Common.h"
+
+#include "mir/Tensor.h"
+
+namespace mir_interpreter
+{
+
+using namespace mir;
+
+template <typename T, typename IndicesT> struct GatherImpl
+{
+ static void run(const TensorVariant &datav, const TensorVariant &indicesv,
+ const ops::GatherOp &op, mir::TensorVariant &res);
+};
+
+template <typename T, typename IndicesT>
+void GatherImpl<T, IndicesT>::run(const TensorVariant &datav, const TensorVariant &indicesv,
+ const ops::GatherOp &op, TensorVariant &res)
+{
+ const auto &data_shape = datav.getShape();
+ const auto &indices_shape = indicesv.getShape();
+ Tensor<T> data(datav);
+ Tensor<T> output(res);
+ Tensor<IndicesT> indices(indicesv);
+
+ int32_t axis = op.getAxis();
+ if (axis < 0)
+ axis += data_shape.rank();
+ assert(axis >= 0 && axis < data_shape.rank());
+ int32_t axis_size = data_shape.dim(axis);
+ int32_t num_indices = indices_shape.numElements();
+
+ int32_t outer_size = 1;
+ for (int32_t i = 0; i < axis; ++i)
+ outer_size *= data_shape.dim(i);
+
+ int32_t inner_size = 1;
+ for (int32_t i = axis + 1; i < data_shape.rank(); ++i)
+ inner_size *= data_shape.dim(i);
+
+ for (int32_t outer = 0; outer < outer_size; ++outer)
+ {
+ for (int32_t i = 0; i < num_indices; ++i)
+ {
+ auto index = indices.atOffset(i);
+ assert(index >= 0 && index < axis_size);
+ for (int32_t inner = 0; inner < inner_size; inner++)
+ {
+ output.atOffset((outer * num_indices + i) * inner_size + inner) =
+ data.atOffset((outer * axis_size + index) * inner_size + inner);
+ }
+ }
+ }
+}
+
+// a hack to reuse dispath function
+template <typename T> struct GatherByT
+{
+
+ template <typename IndicesT> using GatherWithFixedT = GatherImpl<T, IndicesT>;
+
+ static void run(const TensorVariant &data, const TensorVariant &indices, const ops::GatherOp &op,
+ TensorVariant &res)
+ {
+ dispatch<GatherWithFixedT>(indices.getElementType(), data, indices, op, res);
+ }
+};
+
+void Gather(const TensorVariant &data, const TensorVariant &indices, const ops::GatherOp &op,
+ TensorVariant &res)
+{
+ dispatch<GatherByT>(data.getElementType(), data, indices, op, res);
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Gather.h b/compiler/mir-interpreter/src/ops/Gather.h
new file mode 100644
index 000000000..0f9648323
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Gather.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_GATHER_
+#define _NNC_CORE_BACKEND_INTERPRETER_GATHER_
+
+#include "mir/ops/GatherOp.h"
+#include "mir/TensorVariant.h"
+
+namespace mir_interpreter
+{
+
+void Gather(const mir::TensorVariant &data, const mir::TensorVariant &indices,
+ const mir::ops::GatherOp &op, mir::TensorVariant &res);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_GATHER_
diff --git a/compiler/mir-interpreter/src/ops/Greater.cpp b/compiler/mir-interpreter/src/ops/Greater.cpp
new file mode 100644
index 000000000..36400292f
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Greater.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Greater.h"
+#include "Common.h"
+
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+
+namespace mir_interpreter
+{
+
+using namespace mir;
+
+template <typename T> struct GreaterImpl
+{
+ static void run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res);
+};
+
+template <typename T>
+void GreaterImpl<T>::run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+{
+ TensorVariant broadcasted_lhs(lhs, res.getShape());
+ TensorVariant broadcasted_rhs(rhs, res.getShape());
+ Tensor<T> lhs_accessor(broadcasted_lhs);
+ Tensor<T> rhs_accessor(broadcasted_rhs);
+ Tensor<uint8_t> res_accessor(res);
+
+ for (const auto &index : ShapeRange(res.getShape()))
+ {
+ res_accessor.at(index) = (lhs_accessor.at(index) > rhs_accessor.at(index));
+ }
+}
+
+void Greater(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+{
+ if (lhs.getElementType() != rhs.getElementType())
+ {
+ throw std::runtime_error{"Greater with different input types is unsupported"};
+ }
+ dispatch<GreaterImpl>(lhs.getElementType(), lhs, rhs, res);
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Greater.h b/compiler/mir-interpreter/src/ops/Greater.h
new file mode 100644
index 000000000..812245ecd
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Greater.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_GREATER_
+#define _NNC_CORE_BACKEND_INTERPRETER_GREATER_
+
+#include "mir/TensorVariant.h"
+
+namespace mir_interpreter
+{
+
+void Greater(const mir::TensorVariant &lhs, const mir::TensorVariant &rhs, mir::TensorVariant &res);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_GREATER_
diff --git a/compiler/mir-interpreter/src/ops/HardSwish.cpp b/compiler/mir-interpreter/src/ops/HardSwish.cpp
new file mode 100644
index 000000000..20f7820c2
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/HardSwish.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "HardSwish.h"
+#include "Common.h"
+
+namespace mir_interpreter
+{
+
+template <typename T> struct HardSwishImpl
+{
+ static void run(const mir::TensorVariant &input, mir::TensorVariant &result);
+};
+
+template <typename T>
+void HardSwishImpl<T>::run(const mir::TensorVariant &input, mir::TensorVariant &result)
+{
+ auto output_data = reinterpret_cast<T *>(result.atOffset(0));
+ auto input_data = reinterpret_cast<T *>(input.atOffset(0));
+ auto in_end = input_data + input.getShape().numElements();
+ for (; input_data < in_end; input_data++, output_data++)
+ {
+ const auto in = *input_data;
+ *output_data = in * std::min<T>(6.f, std::max<T>(0.f, in + 3.f)) / 6.f;
+ }
+}
+
+template <> struct HardSwishImpl<uint8_t>
+{
+ static void run(const mir::TensorVariant &input, mir::TensorVariant &result)
+ {
+ throw std::runtime_error{"NYI"};
+ }
+};
+
+void HardSwish(const mir::TensorVariant &input, mir::TensorVariant &result)
+{
+ dispatch<HardSwishImpl>(input.getElementType(), input, result);
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/HardSwish.h b/compiler/mir-interpreter/src/ops/HardSwish.h
new file mode 100644
index 000000000..9b39bb164
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/HardSwish.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_HARDSWISH_IMPL_
+#define _NNC_CORE_BACKEND_INTERPRETER_HARDSWISH_IMPL_
+
+#include "mir/TensorVariant.h"
+
+namespace mir_interpreter
+{
+
+void HardSwish(const mir::TensorVariant &input, mir::TensorVariant &result);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_HARDSWISH_IMPL_
diff --git a/compiler/mir-interpreter/src/ops/LeakyReLU.cpp b/compiler/mir-interpreter/src/ops/LeakyReLU.cpp
new file mode 100644
index 000000000..5b265f9f5
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/LeakyReLU.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReLU.h"
+#include "Common.h"
+
+#include <mir/ShapeRange.h>
+#include <mir/Tensor.h>
+
+namespace mir_interpreter
+{
+
+template <typename T> struct LeakyReLUImpl
+{
+ static void run(const mir::TensorVariant &arg, float alpha, mir::TensorVariant &result);
+};
+
+template <typename T>
+void LeakyReLUImpl<T>::run(const mir::TensorVariant &arg, float alpha, mir::TensorVariant &result)
+{
+ mir::Tensor<T> arg_accessor(arg);
+ mir::Tensor<T> res_accessor(result);
+
+ for (const auto &index : mir::ShapeRange(result.getShape()))
+ {
+ auto x = arg_accessor.at(index);
+ res_accessor.at(index) = x < 0 ? x * alpha : x;
+ }
+}
+
+void LeakyReLU(const mir::TensorVariant &arg, float alpha, mir::TensorVariant &result)
+{
+ dispatch<LeakyReLUImpl>(result.getElementType(), arg, alpha, result);
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/LeakyReLU.h b/compiler/mir-interpreter/src/ops/LeakyReLU.h
new file mode 100644
index 000000000..6bf9b78ac
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/LeakyReLU.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_LEAKYRELU_
+#define _NNC_CORE_BACKEND_INTERPRETER_LEAKYRELU_
+
+#include <mir/TensorVariant.h>
+
+namespace mir_interpreter
+{
+
+void LeakyReLU(const mir::TensorVariant &arg, float cap, mir::TensorVariant &result);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_LEAKYRELU_
diff --git a/compiler/mir-interpreter/src/ops/Less.cpp b/compiler/mir-interpreter/src/ops/Less.cpp
new file mode 100644
index 000000000..8da351915
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Less.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Less.h"
+#include "Common.h"
+
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+
+namespace mir_interpreter
+{
+
+using namespace mir;
+
+template <typename T> struct LessImpl
+{
+ static void run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res);
+};
+
+template <typename T>
+void LessImpl<T>::run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+{
+ TensorVariant broadcasted_lhs(lhs, res.getShape());
+ TensorVariant broadcasted_rhs(rhs, res.getShape());
+ Tensor<T> lhs_accessor(broadcasted_lhs);
+ Tensor<T> rhs_accessor(broadcasted_rhs);
+ Tensor<uint8_t> res_accessor(res);
+
+ for (const auto &index : ShapeRange(res.getShape()))
+ {
+ res_accessor.at(index) = (lhs_accessor.at(index) < rhs_accessor.at(index));
+ }
+}
+
+void Less(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+{
+ if (lhs.getElementType() != rhs.getElementType())
+ {
+ throw std::runtime_error{"Less with different input types is unsupported"};
+ }
+ dispatch<LessImpl>(lhs.getElementType(), lhs, rhs, res);
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Less.h b/compiler/mir-interpreter/src/ops/Less.h
new file mode 100644
index 000000000..fa3edd2d0
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Less.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_LESS_
+#define _NNC_CORE_BACKEND_INTERPRETER_LESS_
+
+#include "mir/TensorVariant.h"
+
+namespace mir_interpreter
+{
+
+void Less(const mir::TensorVariant &lhs, const mir::TensorVariant &rhs, mir::TensorVariant &res);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_LESS_
diff --git a/compiler/mir-interpreter/src/ops/Max.cpp b/compiler/mir-interpreter/src/ops/Max.cpp
new file mode 100644
index 000000000..eb284c77c
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Max.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Max.h"
+#include "Common.h"
+
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+
+#include <algorithm>
+
+namespace mir_interpreter
+{
+
+using namespace mir;
+
+template <typename T> struct MaxImpl
+{
+ static void run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res);
+};
+
+template <typename T>
+void MaxImpl<T>::run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+{
+ TensorVariant broadcasted_lhs(lhs, res.getShape());
+ TensorVariant broadcasted_rhs(rhs, res.getShape());
+ Tensor<T> lhs_accessor(broadcasted_lhs);
+ Tensor<T> rhs_accessor(broadcasted_rhs);
+ Tensor<T> res_accessor(res);
+
+ for (const auto &index : ShapeRange(res.getShape()))
+ {
+ res_accessor.at(index) = std::max(lhs_accessor.at(index), rhs_accessor.at(index));
+ }
+}
+template <> struct MaxImpl<uint8_t>
+{
+ static void run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+ {
+ throw std::runtime_error{"NYI"};
+ };
+};
+
+void Max(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+{
+ if (lhs.getElementType() != rhs.getElementType())
+ {
+ throw std::runtime_error{"Max with different input types is unsupported"};
+ }
+ dispatch<MaxImpl>(lhs.getElementType(), lhs, rhs, res);
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Max.h b/compiler/mir-interpreter/src/ops/Max.h
new file mode 100644
index 000000000..b49d0602d
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Max.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_MAX_
+#define _NNC_CORE_BACKEND_INTERPRETER_MAX_
+
+#include "mir/TensorVariant.h"
+
+namespace mir_interpreter
+{
+
+void Max(const mir::TensorVariant &lhs, const mir::TensorVariant &rhs, mir::TensorVariant &res);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_MAX_
diff --git a/compiler/mir-interpreter/src/ops/MaxPool2D.cpp b/compiler/mir-interpreter/src/ops/MaxPool2D.cpp
new file mode 100644
index 000000000..cec2f5984
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/MaxPool2D.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MaxPool2D.h"
+#include "Common.h"
+
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+
+#include <limits>
+
+namespace mir_interpreter
+{
+
+using namespace mir;
+
+template <typename T> struct MaxPool2DImpl
+{
+ static void run(const mir::TensorVariant &inputv, const mir::ops::MaxPool2DOp &op,
+ mir::TensorVariant &result);
+};
+
+template <typename T>
+void MaxPool2DImpl<T>::run(const TensorVariant &inputv, const ops::MaxPool2DOp &op,
+ TensorVariant &result)
+{
+ const auto &input_shape = op.getInputShape(0);
+ const auto &output_shape = op.getOutputShape(0);
+ const auto &window_size = op.getWindowSize();
+ const auto &strides = op.getStrides();
+ const auto &padding_before = op.getPaddingBefore();
+ const auto &padding_after = op.getPaddingAfter();
+ (void)padding_after;
+
+ Tensor<T> input(inputv);
+
+ constexpr int num_spatial_dims = 2;
+ assert(input.getShape().rank() == 4);
+ assert(window_size.size() == num_spatial_dims);
+ assert(strides.size() == num_spatial_dims);
+ assert(padding_before.size() == num_spatial_dims);
+ assert(padding_after.size() == num_spatial_dims);
+
+ Tensor<T> res_accessor(result);
+
+ ShapeRange in_range(input_shape);
+ Index in_index(input_shape.rank());
+
+ for (const auto &out_index : ShapeRange(output_shape))
+ {
+ T result = std::numeric_limits<T>::lowest();
+
+ // Assuming NHWC format.
+ in_index.at(0) = out_index.at(0);
+ in_index.at(3) = out_index.at(3);
+
+ for (const auto &window_index : ShapeRange(Shape(window_size)))
+ {
+ // Assuming NHWC format.
+ for (int i = 0; i < num_spatial_dims; ++i)
+ in_index.at(1 + i) =
+ out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
+
+ if (in_range.contains(in_index))
+ {
+ result = std::max(result, input.at(in_index));
+ }
+ }
+
+ res_accessor.at(out_index) = result;
+ }
+}
+
+template <> struct MaxPool2DImpl<uint8_t>
+{
+ static void run(const mir::TensorVariant &input, const mir::ops::MaxPool2DOp &op,
+ mir::TensorVariant &result);
+};
+
+void MaxPool2DImpl<uint8_t>::run(const TensorVariant &input, const ops::MaxPool2DOp &op,
+ TensorVariant &result)
+{
+ const auto &input_type = input.getType();
+ const auto &output_type = op.getOutput(0)->getType();
+ (void)input_type;
+
+ assert(input_type.isQuantized());
+ assert(output_type.isQuantized());
+ assert(input_type.getElementType() == DataType::UINT8);
+
+ const auto &input_shape = op.getInputShape(0);
+ const auto &output_shape = op.getOutputShape(0);
+ const auto &window_size = op.getWindowSize();
+ const auto &strides = op.getStrides();
+ const auto &padding_before = op.getPaddingBefore();
+ const auto &padding_after = op.getPaddingAfter();
+ (void)padding_after;
+
+ constexpr int num_spatial_dims = 2;
+ assert(input.getShape().rank() == 4);
+ assert(window_size.size() == num_spatial_dims);
+ assert(strides.size() == num_spatial_dims);
+ assert(padding_before.size() == num_spatial_dims);
+ assert(padding_after.size() == num_spatial_dims);
+
+ Tensor<uint8_t> input_accessor(input);
+
+ TensorType res_type(mir::DataType::UINT8, output_shape, output_type.getQuantization());
+ TensorVariant res(res_type);
+ Tensor<uint8_t> res_accessor(res);
+
+ ShapeRange in_range(input_shape);
+ Index in_index(input_shape.rank());
+
+ for (const auto &out_index : ShapeRange(output_shape))
+ {
+ // Assuming NHWC format.
+ in_index.at(0) = out_index.at(0);
+ in_index.at(3) = out_index.at(3);
+
+ uint8_t result = 0;
+ for (const auto &window_index : ShapeRange(Shape(window_size)))
+ {
+ // Assuming NHWC format.
+ for (int i = 0; i < num_spatial_dims; ++i)
+ in_index.at(1 + i) =
+ out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
+
+ if (in_range.contains(in_index))
+ {
+ result = std::max(result, input_accessor.at(in_index));
+ }
+ }
+ res_accessor.at(out_index) = result;
+ }
+}
+
+void MaxPool2D(const mir::TensorVariant &input, const mir::ops::MaxPool2DOp &op,
+ mir::TensorVariant &result)
+{
+ dispatch<MaxPool2DImpl>(input.getElementType(), input, op, result);
+};
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/MaxPool2D.h b/compiler/mir-interpreter/src/ops/MaxPool2D.h
new file mode 100644
index 000000000..564def207
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/MaxPool2D.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_MAX_POOL_2D_
+#define _NNC_CORE_BACKEND_INTERPRETER_MAX_POOL_2D_
+
+#include "mir/ops/MaxPool2DOp.h"
+#include "mir/TensorVariant.h"
+
+namespace mir_interpreter
+{
+
+void MaxPool2D(const mir::TensorVariant &input, const mir::ops::MaxPool2DOp &op,
+ mir::TensorVariant &result);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_MAX_POOL_2D_
diff --git a/compiler/mir-interpreter/src/ops/Mul.cpp b/compiler/mir-interpreter/src/ops/Mul.cpp
new file mode 100644
index 000000000..446577c58
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Mul.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Mul.h"
+#include "Common.h"
+
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+
+namespace mir_interpreter
+{
+
+using namespace mir;
+
+template <typename T> struct MulImpl
+{
+ static void run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res);
+};
+
+template <typename T>
+void MulImpl<T>::run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+{
+ TensorVariant broadcasted_lhs(lhs, res.getShape());
+ TensorVariant broadcasted_rhs(rhs, res.getShape());
+ Tensor<T> lhs_accessor(broadcasted_lhs);
+ Tensor<T> rhs_accessor(broadcasted_rhs);
+ Tensor<T> res_accessor(res);
+
+ for (const auto &index : ShapeRange(res.getShape()))
+ {
+ res_accessor.at(index) = lhs_accessor.at(index) * rhs_accessor.at(index);
+ }
+}
+
+template <> struct MulImpl<uint8_t>
+{
+ static void run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+ {
+ throw std::runtime_error{"NYI"};
+ }
+};
+
+void Mul(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+{
+ dispatch<MulImpl>(lhs.getElementType(), lhs, rhs, res);
+};
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Mul.h b/compiler/mir-interpreter/src/ops/Mul.h
new file mode 100644
index 000000000..b2e71fa85
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Mul.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_MUL_
+#define _NNC_CORE_BACKEND_INTERPRETER_MUL_
+
+#include "mir/TensorVariant.h"
+
+namespace mir_interpreter
+{
+
+void Mul(const mir::TensorVariant &lhs, const mir::TensorVariant &rhs, mir::TensorVariant &res);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_MUL_
diff --git a/compiler/mir-interpreter/src/ops/Pad.cpp b/compiler/mir-interpreter/src/ops/Pad.cpp
new file mode 100644
index 000000000..054a1b68a
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Pad.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Pad.h"
+#include "Common.h"
+
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+
+namespace mir_interpreter
+{
+
+using namespace mir;
+
+template <typename T> struct PadImpl
+{
+ static void run(const mir::TensorVariant &inputv, const mir::ops::PadOp &op,
+ mir::TensorVariant &result);
+};
+
+template <typename T>
+void PadImpl<T>::run(const TensorVariant &inputv, const ops::PadOp &op, TensorVariant &result)
+{
+ Tensor<T> result_accessor(result);
+ Tensor<T> input(inputv);
+
+ Shape out_shape = result_accessor.getShape();
+
+ ShapeRange out_range(out_shape);
+ const int rank = op.getInputShape(0).rank();
+ const auto &padding_before = op.getPaddingBefore();
+ const auto &padding_after = op.getPaddingAfter();
+
+ Index temp_index;
+ temp_index.resize(rank);
+
+ bool index_on_padding(false);
+ for (const Index &ind : out_range)
+ {
+ index_on_padding = false;
+
+ for (int32_t i = 0; i < rank; i++)
+ {
+ // index on input values
+ if (ind.at(i) >= padding_before[i] && ind.at(i) < out_shape.dim(i) - padding_after[i])
+ {
+ temp_index.at(i) = ind.at(i) - padding_before[i];
+ }
+ else
+ { // not in input
+ index_on_padding = true;
+ break;
+ }
+ }
+ if (index_on_padding)
+ {
+ result_accessor.at(ind) = op.getPaddingValue();
+ }
+ else
+ {
+ result_accessor.at(ind) = input.at(temp_index);
+ }
+ }
+}
+
+void Pad(const mir::TensorVariant &input, const mir::ops::PadOp &op, mir::TensorVariant &result)
+{
+ dispatch<PadImpl>(input.getElementType(), input, op, result);
+};
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Pad.h b/compiler/mir-interpreter/src/ops/Pad.h
new file mode 100644
index 000000000..cd72b8afd
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Pad.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_PAD_IMPL_
+#define _NNC_CORE_BACKEND_INTERPRETER_PAD_IMPL_
+
+#include "mir/ops/PadOp.h"
+#include "mir/TensorVariant.h"
+
+namespace mir_interpreter
+{
+/**
+ * @brief Implements PadOp for interpreter backend
+ *
+ * This operation pads a tensor according to the paddings
+ * you specify. For each dimension of input add values
+ * before and after of contents.
+ */
+void Pad(const mir::TensorVariant &input, const mir::ops::PadOp &op, mir::TensorVariant &result);
+
+} // namespace mir_interpreter
+
+#endif // _NNC_CORE_BACKEND_INTERPRETER_PAD_IMPL_
diff --git a/compiler/mir-interpreter/src/ops/Quantization.cpp b/compiler/mir-interpreter/src/ops/Quantization.cpp
new file mode 100644
index 000000000..283a7c751
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Quantization.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Quantization.h"
+#include "mir/Tensor.h"
+#include "mir/ShapeRange.h"
+
+#include <cmath>
+#include <limits>
+
+namespace mir_interpreter
+{
+using namespace mir;
+
+void Dequantize(const TensorVariant &input, TensorVariant &output)
+{
+ const TensorType &input_type = input.getType();
+ assert(input_type.isQuantized());
+ assert(input_type.getElementType() == DataType::UINT8);
+
+ const float scale = input_type.getQuantization().getScale();
+ const int32_t zero_point = input_type.getQuantization().getZeroPoint();
+
+ Tensor<uint8_t> input_accessor(input);
+ Tensor<float> res_accessor(output);
+
+ for (const auto &index : ShapeRange(output.getShape()))
+ {
+ const int32_t value = input_accessor.at(index);
+ res_accessor.at(index) = scale * static_cast<float>(value - zero_point);
+ }
+}
+
+void Quantize(const TensorVariant &input, TensorVariant &output)
+{
+ const TensorType &output_type = output.getType();
+ assert(output_type.isQuantized());
+ assert(input.getElementType() == DataType::FLOAT32);
+
+ const float scale = output_type.getQuantization().getScale();
+ const int32_t zero_point = output_type.getQuantization().getZeroPoint();
+
+ const int32_t min_val = std::numeric_limits<uint8_t>::min();
+ const int32_t max_val = std::numeric_limits<uint8_t>::max();
+
+ Tensor<float> input_accessor(input);
+ Tensor<uint8_t> res_accessor(output);
+
+ for (const auto &index : ShapeRange(output.getShape()))
+ {
+ const float value = input_accessor.at(index);
+ int32_t unclamped = static_cast<int32_t>(std::round(value / scale)) + zero_point;
+ int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
+ res_accessor.at(index) = static_cast<uint8_t>(clamped);
+ }
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Quantization.h b/compiler/mir-interpreter/src/ops/Quantization.h
new file mode 100644
index 000000000..23388d4d8
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Quantization.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_QUANTIZATION_IMPL_
+#define _NNC_CORE_BACKEND_INTERPRETER_QUANTIZATION_IMPL_
+
+#include "mir/TensorVariant.h"
+
+namespace mir_interpreter
+{
+using namespace mir;
+
+void Dequantize(const TensorVariant &input, TensorVariant &output);
+
+void Quantize(const TensorVariant &input, TensorVariant &output);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_QUANTIZATION_IMPL_
diff --git a/compiler/mir-interpreter/src/ops/QuantizationHelpers.h b/compiler/mir-interpreter/src/ops/QuantizationHelpers.h
new file mode 100644
index 000000000..8faeffbd3
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/QuantizationHelpers.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_QUANTIZATION_HELPERS_
+#define _NNC_CORE_BACKEND_INTERPRETER_QUANTIZATION_HELPERS_
+
+#include <cmath>
+#include <limits>
+
+namespace mir_interpreter
+{
+
+inline void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
+{
+ if (double_multiplier == 0.)
+ {
+ *quantized_multiplier = 0;
+ *shift = 0;
+ return;
+ }
+
+ const double q = std::frexp(double_multiplier, shift);
+ auto q_fixed = static_cast<int64_t>(round(q * (1ll << 31)));
+
+ assert(q_fixed <= (1ll << 31));
+ if (q_fixed == (1ll << 31))
+ {
+ q_fixed /= 2;
+ ++*shift;
+ }
+ assert(q_fixed <= std::numeric_limits<int32_t>::max());
+ // A shift amount smaller than -31 would cause all bits to be shifted out
+ // and thus all results would be zero. We implement that instead with
+ // q_fixed==0, so as to avoid hitting issues with right-shift
+ // operations with shift amounts greater than 31. Note that this happens
+ // roughly when abs(double_multiplier) < 2^-31 and the present handling means
+ // that we're effectively flushing tiny double_multiplier's to zero.
+ // We could conceivably handle values in the range (roughly) [32, 63]
+ // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
+ // the present handling is just doing 'flush denormals to zero'. We could
+ // reconsider and actually generate nonzero denormals if a need arises.
+ if (*shift < -31)
+ {
+ *shift = 0;
+ q_fixed = 0;
+ }
+ *quantized_multiplier = static_cast<int32_t>(q_fixed);
+}
+
+inline void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
+ int32_t *quantized_multiplier, int *left_shift)
+{
+ assert(double_multiplier < 1.0);
+ assert(double_multiplier > 0.0);
+ int shift;
+ QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
+ assert(shift <= 0);
+ *left_shift = shift;
+}
+
+inline int32_t MaskIfNonZero(int32_t a)
+{
+ static const int32_t zero = 0;
+ return a ? ~zero : zero;
+}
+
+inline int32_t MaskIfZero(int32_t a) { return MaskIfNonZero(!a); }
+
+inline int32_t MaskIfLessThan(int32_t a, int32_t b) { return MaskIfNonZero(a < b); }
+
+inline int32_t MaskIfGreaterThan(int32_t a, int32_t b) { return MaskIfNonZero(a > b); }
+
+inline int32_t RoundingDivideByPOT(int32_t x, int exponent)
+{
+ assert(exponent >= 0);
+ assert(exponent <= 31);
+ const int32_t mask = (1ll << exponent) - 1;
+ const int32_t remainder = x & mask;
+ const int32_t threshold = (mask >> 1) + (MaskIfLessThan(x, 0) & 1);
+ return (x >> exponent) + (MaskIfGreaterThan(remainder, threshold) & 1);
+}
+
+inline std::int32_t SaturatingRoundingDoublingHighMul(std::int32_t a, std::int32_t b)
+{
+ bool overflow = a == b && a == std::numeric_limits<std::int32_t>::min();
+ std::int64_t a_64(a);
+ std::int64_t b_64(b);
+ std::int64_t ab_64 = a_64 * b_64;
+ std::int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
+ std::int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31));
+ return overflow ? std::numeric_limits<std::int32_t>::max() : ab_x2_high32;
+}
+
+inline int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
+{
+ int left_shift = shift > 0 ? shift : 0;
+ int right_shift = shift > 0 ? 0 : -shift;
+ return RoundingDivideByPOT(
+ SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift);
+}
+
+inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(int32_t x,
+ int32_t quantized_multiplier,
+ int left_shift)
+{
+ return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(x, quantized_multiplier),
+ -left_shift);
+}
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_QUANTIZATION_HELPERS_
diff --git a/compiler/mir-interpreter/src/ops/ReLU.cpp b/compiler/mir-interpreter/src/ops/ReLU.cpp
new file mode 100644
index 000000000..92d3ded5e
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/ReLU.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReLU.h"
+#include "Common.h"
+
+#include <mir/ShapeRange.h>
+#include <mir/Tensor.h>
+
+#include <algorithm>
+
+namespace mir_interpreter
+{
+
+template <typename T> struct ReLUImpl
+{
+ static void run(const mir::TensorVariant &arg, mir::TensorVariant &result);
+};
+
+template <typename T>
+void ReLUImpl<T>::run(const mir::TensorVariant &arg, mir::TensorVariant &result)
+{
+ mir::Tensor<T> arg_accessor(arg);
+ mir::Tensor<T> res_accessor(result);
+
+ for (const auto &index : mir::ShapeRange(result.getShape()))
+ {
+ res_accessor.at(index) = std::max(arg_accessor.at(index), static_cast<T>(0));
+ }
+}
+
+template <> struct ReLUImpl<uint8_t>
+{
+ static void run(const mir::TensorVariant &arg, mir::TensorVariant &result)
+ {
+ throw std::runtime_error{"NYI"};
+ }
+};
+
+void ReLU(const mir::TensorVariant &arg, mir::TensorVariant &result)
+{
+ dispatch<ReLUImpl>(arg.getElementType(), arg, result);
+};
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/ReLU.h b/compiler/mir-interpreter/src/ops/ReLU.h
new file mode 100644
index 000000000..9edabb9d9
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/ReLU.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_RELU_
+#define _NNC_CORE_BACKEND_INTERPRETER_RELU_
+
+#include <mir/TensorVariant.h>
+
+namespace mir_interpreter
+{
+
+void ReLU(const mir::TensorVariant &arg, mir::TensorVariant &result);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_RELU_
diff --git a/compiler/mir-interpreter/src/ops/ReduceMean.cpp b/compiler/mir-interpreter/src/ops/ReduceMean.cpp
new file mode 100644
index 000000000..ebaa3b48f
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/ReduceMean.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_REDUCE_MEAN_
+#define _NNC_CORE_BACKEND_INTERPRETER_REDUCE_MEAN_
+
+#include "ReduceMean.h"
+#include "Common.h"
+
+#include "mir/ops/ReduceMeanOp.h"
+#include "mir/Tensor.h"
+#include "mir/ShapeRange.h"
+
+namespace mir_interpreter
+{
+
+template <typename T> struct ReduceMeanImpl
+{
+ static void run(const mir::TensorVariant &inputv, const mir::ops::ReduceMeanOp &op,
+ mir::TensorVariant &output);
+};
+
+template <typename T>
+void ReduceMeanImpl<T>::run(const mir::TensorVariant &inputv, const mir::ops::ReduceMeanOp &op,
+ mir::TensorVariant &output)
+{
+ const auto &input_shape = op.getInputShape(0);
+ const auto &output_shape = op.getOutputShape(0);
+ const auto &reduction_dims = op.getReductionDims();
+ const bool keep_dims = op.getKeepDims();
+
+ const auto reductor = [](T result, T x) { return result + x; };
+
+ mir::Tensor<T> input(inputv);
+ mir::Tensor<T> res_accessor(output);
+
+ erase<T>(output);
+
+ // This mask contains 'true' for dimensions that should be reduced. For example, if we want
+ // to reduce dimensions 1 and 3 with total number of dimensions of 4, the mask will be
+ // [false, true, false, true].
+ std::vector<bool> reduction_dims_mask(input_shape.rank(), false);
+ for (const int dim : reduction_dims)
+ {
+ reduction_dims_mask[dim] = true;
+ }
+
+ mir::Index out_index(output_shape.rank());
+ for (const mir::Index &in_index : mir::ShapeRange(input_shape))
+ {
+ int out_index_dim = 0;
+ for (int dim = 0; dim < input_shape.rank(); ++dim)
+ {
+ if (keep_dims)
+ {
+ out_index.at(out_index_dim++) = reduction_dims_mask[dim] ? 0 : in_index.at(dim);
+ }
+ else
+ {
+ if (!reduction_dims_mask[dim])
+ {
+ out_index.at(out_index_dim++) = in_index.at(dim);
+ }
+ }
+ }
+ res_accessor.at(out_index) = reductor(res_accessor.at(out_index), input.at(in_index));
+ }
+
+ const std::int32_t reduction_factor = input_shape.numElements() / output_shape.numElements();
+
+ for (const auto &index : mir::ShapeRange(output_shape))
+ {
+ res_accessor.at(index) /= reduction_factor;
+ }
+}
+
+void ReduceMean(const mir::TensorVariant &input, const mir::ops::ReduceMeanOp &op,
+ mir::TensorVariant &output)
+{
+ dispatch<ReduceMeanImpl>(input.getElementType(), input, op, output);
+};
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_REDUCE_MEAN_
diff --git a/compiler/mir-interpreter/src/ops/ReduceMean.h b/compiler/mir-interpreter/src/ops/ReduceMean.h
new file mode 100644
index 000000000..178563b2c
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/ReduceMean.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_REDUCE_MEAN_
+#define _NNC_CORE_BACKEND_INTERPRETER_REDUCE_MEAN_
+
+#include "mir/ops/ReduceMeanOp.h"
+
+namespace mir_interpreter
+{
+
+void ReduceMean(const mir::TensorVariant &input, const mir::ops::ReduceMeanOp &op,
+ mir::TensorVariant &output);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_REDUCE_MEAN_
diff --git a/compiler/mir-interpreter/src/ops/Reshape.cpp b/compiler/mir-interpreter/src/ops/Reshape.cpp
new file mode 100644
index 000000000..f29b261ce
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Reshape.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Reshape.h"
+
+#include "mir/ShapeRange.h"
+
+#include <cstring>
+
+namespace mir_interpreter
+{
+
+void Reshape(const mir::TensorVariant &input, mir::TensorVariant &output)
+{
+ assert(input.getShape().numElements() == output.getShape().numElements());
+
+ mir::ShapeRange input_range(input.getShape());
+ auto in_iter = input_range.begin();
+ const size_t elem_size = input.getElementSize();
+
+ for (const auto &out_index : mir::ShapeRange(output.getShape()))
+ std::memcpy(output.at(out_index), input.at(*in_iter++), elem_size);
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Reshape.h b/compiler/mir-interpreter/src/ops/Reshape.h
new file mode 100644
index 000000000..2da6411f6
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Reshape.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_RESHAPE_IMPL_
+#define _NNC_CORE_BACKEND_INTERPRETER_RESHAPE_IMPL_
+
+#include "mir/TensorVariant.h"
+
+namespace mir_interpreter
+{
+
+void Reshape(const mir::TensorVariant &input, mir::TensorVariant &output);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_RESHAPE_IMPL_
diff --git a/compiler/mir-interpreter/src/ops/Sigmoid.cpp b/compiler/mir-interpreter/src/ops/Sigmoid.cpp
new file mode 100644
index 000000000..23718f935
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Sigmoid.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Sigmoid.h"
+#include "Common.h"
+
+#include <mir/ShapeRange.h>
+#include <mir/Tensor.h>
+
+#include <cmath>
+
+namespace mir_interpreter
+{
+
+template <typename T> struct SigmoidImpl
+{
+ static void run(const mir::TensorVariant &arg, mir::TensorVariant &result);
+};
+
+template <typename T>
+void SigmoidImpl<T>::run(const mir::TensorVariant &arg, mir::TensorVariant &result)
+{
+ mir::Tensor<T> arg_accessor(arg);
+ mir::Tensor<T> res_accessor(result);
+
+ for (const auto &index : mir::ShapeRange(result.getShape()))
+ {
+ res_accessor.at(index) = 1.0f / (1.0f + std::exp(-arg_accessor.at(index)));
+ }
+}
+
+template <> struct SigmoidImpl<uint8_t>
+{
+ static void run(const mir::TensorVariant &arg, mir::TensorVariant &result)
+ {
+ throw std::runtime_error{"NYI"};
+ }
+};
+
+void Sigmoid(const mir::TensorVariant &arg, mir::TensorVariant &result)
+{
+ dispatch<SigmoidImpl>(arg.getElementType(), arg, result);
+};
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Sigmoid.h b/compiler/mir-interpreter/src/ops/Sigmoid.h
new file mode 100644
index 000000000..81c614c89
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Sigmoid.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_SIGMOID_
+#define _NNC_CORE_BACKEND_INTERPRETER_SIGMOID_
+
+#include <mir/TensorVariant.h>
+
+namespace mir_interpreter
+{
+
+void Sigmoid(const mir::TensorVariant &arg, mir::TensorVariant &result);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_SIGMOID_
diff --git a/compiler/mir-interpreter/src/ops/Slice.cpp b/compiler/mir-interpreter/src/ops/Slice.cpp
new file mode 100644
index 000000000..df24d49cd
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Slice.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Slice.h"
+
+#include "Fill.h"
+#include "Common.h"
+
+#include "mir/Tensor.h"
+#include "mir/ShapeRange.h"
+
+namespace mir_interpreter
+{
+
+template <typename T> struct SliceImpl
+{
+ static void run(const mir::TensorVariant &arg, const mir::Shape &starts, mir::TensorVariant &res);
+};
+
+template <typename T>
+void SliceImpl<T>::run(const mir::TensorVariant &arg, const mir::Shape &starts,
+ mir::TensorVariant &res)
+{
+ mir::Tensor<T> input(arg);
+ mir::Tensor<T> output(res);
+
+ for (auto id : mir::ShapeRange(res.getShape()))
+ {
+ mir::Index idx = mir_interpreter::shift(id, starts);
+ output.at(id) = input.at(idx);
+ }
+}
+
+void Slice(const mir::TensorVariant &arg, const mir::Shape &starts, mir::TensorVariant &res)
+{
+ dispatch<SliceImpl>(arg.getElementType(), arg, starts, res);
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Slice.h b/compiler/mir-interpreter/src/ops/Slice.h
new file mode 100644
index 000000000..9e5e3bb0e
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Slice.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_SLICE_
+#define _NNC_CORE_BACKEND_INTERPRETER_SLICE_
+
+#include "mir/TensorVariant.h"
+
+namespace mir_interpreter
+{
+
+void Slice(const mir::TensorVariant &arg, const mir::Shape &starts, mir::TensorVariant &res);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_DIV_
diff --git a/compiler/mir-interpreter/src/ops/Softmax.cpp b/compiler/mir-interpreter/src/ops/Softmax.cpp
new file mode 100644
index 000000000..f263f967d
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Softmax.cpp
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Softmax.h"
+#include "Common.h"
+#include "QuantizationHelpers.h"
+
+#include <mir/ShapeRange.h>
+#include <mir/Tensor.h>
+
+#include <cmath>
+
+namespace mir_interpreter
+{
+
+static inline void PopulateSoftmaxLookupTable(float *table, float input_scale, float beta)
+{
+ const float scale = -input_scale * beta;
+ const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+ for (int32_t val = 0; val <= max_uint8; ++val)
+ table[max_uint8 - val] = expf(scale * val);
+}
+
+template <typename T> struct SoftmaxImpl
+{
+ static void run(const mir::TensorVariant &arg, int axis, mir::TensorVariant &result);
+};
+
+template <typename T>
+void SoftmaxImpl<T>::run(const mir::TensorVariant &arg, int axis, mir::TensorVariant &result)
+{
+ mir::Tensor<T> arg_accessor(arg);
+ mir::Tensor<T> res_accessor(result);
+
+ mir::Shape expsum_shape = arg.getShape();
+ expsum_shape.dim(axis) = 1;
+ mir::TensorType expsum_type(arg.getElementType(), expsum_shape);
+ mir::TensorVariant expsum(expsum_type);
+ mir::Tensor<T> expsum_accessor(expsum);
+
+ for (const auto &expsum_index : mir::ShapeRange(expsum_shape))
+ {
+ T sum = 0;
+ mir::Index arg_index = expsum_index;
+ std::int32_t axis_size = arg.getShape().dim(axis);
+ for (std::int32_t i = 0; i < axis_size; ++i)
+ {
+ arg_index.at(axis) = i;
+ sum += std::exp(arg_accessor.at(arg_index));
+ }
+ expsum_accessor.at(expsum_index) = sum;
+ }
+
+ for (const auto &res_index : mir::ShapeRange(result.getShape()))
+ {
+ mir::Index expsum_index = res_index;
+ expsum_index.at(axis) = 0;
+ res_accessor.at(res_index) =
+ std::exp(arg_accessor.at(res_index)) / expsum_accessor.at(expsum_index);
+ }
+}
+
+template <> struct SoftmaxImpl<uint8_t>
+{
+ static void run(const mir::TensorVariant &input, int axis, mir::TensorVariant &output);
+};
+
+void SoftmaxImpl<uint8_t>::run(const mir::TensorVariant &input, int axis,
+ mir::TensorVariant &output)
+{
+ const auto &input_type = input.getType();
+ const auto &output_type = output.getType();
+
+ assert(input_type.isQuantized());
+ assert(output_type.isQuantized());
+
+ const auto input_shape = input_type.getShape();
+
+ assert(input_type.getElementType() == mir::DataType::UINT8);
+ assert(axis == input_shape.rank() - 1); // supported only last dim axis
+ (void)axis;
+
+ double input_scale = input_type.getQuantization().getScale();
+ double output_scale = output_type.getQuantization().getScale();
+
+ const int trailing_dim = input_shape.rank() - 1;
+ int excluding_last_dim = 1;
+ for (int32_t i = 0; i < input_shape.rank() - 1; i++)
+ {
+ excluding_last_dim *= input_shape.dim(i);
+ }
+ const int last_dim = input_shape.dim(trailing_dim);
+
+ const int32_t clamp_max = std::numeric_limits<uint8_t>::max();
+ const int32_t clamp_min = std::numeric_limits<uint8_t>::min();
+
+ uint8_t *input_data = reinterpret_cast<uint8_t *>(input.atOffset(0));
+
+ float table[256];
+ PopulateSoftmaxLookupTable(table, input_scale, 1.f);
+
+ uint8_t *output_data = reinterpret_cast<uint8_t *>(output.atOffset(0));
+
+ for (int i = 0; i < excluding_last_dim; ++i)
+ {
+ int32_t max_val = std::numeric_limits<uint8_t>::min();
+ // Find max quantized value.
+ for (int j = 0; j < last_dim; ++j)
+ {
+ max_val = std::max(max_val, static_cast<int32_t>(input_data[j]));
+ }
+
+ float sum_exp = 0.0f;
+ const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+ const float *table_offset = &table[max_uint8 - max_val];
+ // Calculate normalizer sum(exp(x)).
+ for (int j = 0; j < last_dim; ++j)
+ {
+ sum_exp += table_offset[input_data[j]];
+ }
+
+ const float inv_sum_exp = 1.0f / (sum_exp * output_scale);
+ // Normalize and quantize probabilities.
+ for (int j = 0; j < last_dim; ++j)
+ {
+ const float prob_rescaled = table_offset[input_data[j]] * inv_sum_exp;
+ const int32_t prob_quantized = static_cast<int32_t>(prob_rescaled + 0.5);
+ output_data[j] =
+ static_cast<uint8_t>(std::max(std::min(clamp_max, prob_quantized), clamp_min));
+ }
+ input_data += last_dim;
+ output_data += last_dim;
+ }
+}
+
+void Softmax(const mir::TensorVariant &arg, int axis, mir::TensorVariant &result)
+{
+ dispatch<SoftmaxImpl>(arg.getElementType(), arg, axis, result);
+};
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Softmax.h b/compiler/mir-interpreter/src/ops/Softmax.h
new file mode 100644
index 000000000..9c9818c70
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Softmax.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_SOFTMAX_
+#define _NNC_CORE_BACKEND_INTERPRETER_SOFTMAX_
+
+#include <mir/TensorVariant.h>
+
+namespace mir_interpreter
+{
+
+void Softmax(const mir::TensorVariant &arg, int axis, mir::TensorVariant &result);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_SOFTMAX_
diff --git a/compiler/mir-interpreter/src/ops/Sqrt.cpp b/compiler/mir-interpreter/src/ops/Sqrt.cpp
new file mode 100644
index 000000000..7a2ca49c8
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Sqrt.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Sqrt.h"
+#include "Common.h"
+
+#include <mir/ShapeRange.h>
+#include <mir/Tensor.h>
+
+#include <cmath>
+
+namespace mir_interpreter
+{
+
+template <typename T> struct SqrtImpl
+{
+ static void run(const mir::TensorVariant &arg, mir::TensorVariant &result);
+};
+
+template <typename T>
+void SqrtImpl<T>::run(const mir::TensorVariant &arg, mir::TensorVariant &result)
+{
+ mir::Tensor<T> arg_accessor(arg);
+ mir::Tensor<T> res_accessor(result);
+
+ for (const auto &index : mir::ShapeRange(result.getShape()))
+ {
+ res_accessor.at(index) = std::sqrt(arg_accessor.at(index));
+ }
+}
+
+template <> struct SqrtImpl<uint8_t>
+{
+ static void run(const mir::TensorVariant &arg, mir::TensorVariant &result)
+ {
+ throw std::runtime_error{"NYI"};
+ }
+};
+
+void Sqrt(const mir::TensorVariant &arg, mir::TensorVariant &result)
+{
+ dispatch<SqrtImpl>(arg.getElementType(), arg, result);
+};
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Sqrt.h b/compiler/mir-interpreter/src/ops/Sqrt.h
new file mode 100644
index 000000000..fef2bf0fe
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Sqrt.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_SQRT_
+#define _NNC_CORE_BACKEND_INTERPRETER_SQRT_
+
+#include <mir/TensorVariant.h>
+
+namespace mir_interpreter
+{
+
+void Sqrt(const mir::TensorVariant &arg, mir::TensorVariant &result);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_SQRT_
diff --git a/compiler/mir-interpreter/src/ops/Sub.cpp b/compiler/mir-interpreter/src/ops/Sub.cpp
new file mode 100644
index 000000000..6c03cff82
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Sub.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Sub.h"
+#include "Common.h"
+
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+
+namespace mir_interpreter
+{
+
+using namespace mir;
+
+template <typename T> struct SubImpl
+{
+ static void run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res);
+};
+
+template <typename T>
+void SubImpl<T>::run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+{
+ TensorVariant broadcasted_lhs(lhs, res.getShape());
+ TensorVariant broadcasted_rhs(rhs, res.getShape());
+ Tensor<T> lhs_accessor(broadcasted_lhs);
+ Tensor<T> rhs_accessor(broadcasted_rhs);
+ Tensor<T> res_accessor(res);
+
+ for (const auto &index : ShapeRange(res.getShape()))
+ {
+ res_accessor.at(index) = lhs_accessor.at(index) - rhs_accessor.at(index);
+ }
+}
+
+template <> struct SubImpl<uint8_t>
+{
+ static void run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+ {
+ throw std::runtime_error{"NYI"};
+ }
+};
+
+void Sub(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
+{
+ dispatch<SubImpl>(lhs.getElementType(), lhs, rhs, res);
+};
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Sub.h b/compiler/mir-interpreter/src/ops/Sub.h
new file mode 100644
index 000000000..53991596f
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Sub.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_SUB_
+#define _NNC_CORE_BACKEND_INTERPRETER_SUB_
+
+#include "mir/TensorVariant.h"
+
+namespace mir_interpreter
+{
+
+void Sub(const mir::TensorVariant &lhs, const mir::TensorVariant &rhs, mir::TensorVariant &res);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_SUB_
diff --git a/compiler/mir-interpreter/src/ops/Tanh.cpp b/compiler/mir-interpreter/src/ops/Tanh.cpp
new file mode 100644
index 000000000..49a3461bf
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Tanh.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Sigmoid.h"
+#include "Common.h"
+
+#include <mir/ShapeRange.h>
+#include <mir/Tensor.h>
+
+#include <cmath>
+
+namespace mir_interpreter
+{
+
+template <typename T> struct TanhImpl
+{
+ static void run(const mir::TensorVariant &arg, mir::TensorVariant &result);
+};
+
+template <typename T>
+void TanhImpl<T>::run(const mir::TensorVariant &arg, mir::TensorVariant &result)
+{
+ mir::Tensor<T> arg_accessor(arg);
+ mir::Tensor<T> res_accessor(result);
+
+ for (const auto &index : mir::ShapeRange(result.getShape()))
+ {
+ res_accessor.at(index) = std::tanh(arg_accessor.at(index));
+ }
+}
+
+template <> struct TanhImpl<uint8_t>
+{
+ static void run(const mir::TensorVariant &arg, mir::TensorVariant &result)
+ {
+ throw std::runtime_error{"NYI"};
+ }
+};
+
+void Tanh(const mir::TensorVariant &arg, mir::TensorVariant &result)
+{
+ dispatch<TanhImpl>(arg.getElementType(), arg, result);
+};
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Tanh.h b/compiler/mir-interpreter/src/ops/Tanh.h
new file mode 100644
index 000000000..2f376f5bd
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Tanh.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_TANH_
+#define _NNC_CORE_BACKEND_INTERPRETER_TANH_
+
+#include <mir/TensorVariant.h>
+
+namespace mir_interpreter
+{
+
+void Tanh(const mir::TensorVariant &arg, mir::TensorVariant &result);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_TANH_
diff --git a/compiler/mir-interpreter/src/ops/Transpose.cpp b/compiler/mir-interpreter/src/ops/Transpose.cpp
new file mode 100644
index 000000000..1f0ad56c3
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Transpose.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Transpose.h"
+
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+
+#include "Common.h"
+
+namespace mir_interpreter
+{
+
+template <typename T> struct TransposeImpl
+{
+ static void run(const mir::TensorVariant &input, const mir::ops::TransposeOp &op,
+ mir::TensorVariant &output);
+};
+
+template <typename T>
+void TransposeImpl<T>::run(const mir::TensorVariant &inputv, const mir::ops::TransposeOp &op,
+ mir::TensorVariant &outputv)
+{
+ const auto &output_shape = op.getOutputShape(0);
+ const auto &axis_order = op.getAxisOrder();
+ const int32_t num_axis = static_cast<int32_t>(axis_order.size());
+ assert(num_axis == inputv.getShape().rank());
+ assert(num_axis == output_shape.rank());
+
+ mir::Index output_index;
+ output_index.resize(num_axis);
+
+ mir::Tensor<T> input(inputv);
+ mir::Tensor<T> output(outputv);
+
+ for (auto &input_index : mir::ShapeRange(input.getShape()))
+ {
+ for (int32_t i = 0; i < num_axis; i++)
+ output_index.at(i) = input_index.at(axis_order[i]);
+
+ output.at(output_index) = input.at(input_index);
+ }
+}
+
+void Transpose(const mir::TensorVariant &input, const mir::ops::TransposeOp &op,
+ mir::TensorVariant &output)
+{
+ dispatch<TransposeImpl>(input.getElementType(), input, op, output);
+}
+
+} // namespace mir_interpreter
diff --git a/compiler/mir-interpreter/src/ops/Transpose.h b/compiler/mir-interpreter/src/ops/Transpose.h
new file mode 100644
index 000000000..f60ed7295
--- /dev/null
+++ b/compiler/mir-interpreter/src/ops/Transpose.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_TRANSPOSE_
+#define _NNC_CORE_BACKEND_INTERPRETER_TRANSPOSE_
+
+#include "mir/TensorVariant.h"
+#include "mir/ops/TransposeOp.h"
+
+namespace mir_interpreter
+{
+
+void Transpose(const mir::TensorVariant &input, const mir::ops::TransposeOp &op,
+ mir::TensorVariant &output);
+
+} // namespace mir_interpreter
+
+#endif //_NNC_CORE_BACKEND_INTERPRETER_TRANSPOSE_
diff --git a/compiler/mir-onnx-importer/AttributeHelpers.h b/compiler/mir-onnx-importer/AttributeHelpers.h
new file mode 100644
index 000000000..d5cc1501a
--- /dev/null
+++ b/compiler/mir-onnx-importer/AttributeHelpers.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_ATTRIBUTE_HELPERS_H
+#define MIR_ONNX_ATTRIBUTE_HELPERS_H
+
+#include "onnx/onnx.pb.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace mir_onnx
+{
+
+template <typename T> T getAttributeValue(const onnx::AttributeProto &attribute) = delete;
+
+template <> inline float getAttributeValue(const onnx::AttributeProto &attribute)
+{
+ assert(attribute.type() == onnx::AttributeProto::FLOAT);
+ return attribute.f();
+}
+
+template <> inline std::int64_t getAttributeValue(const onnx::AttributeProto &attribute)
+{
+ assert(attribute.type() == onnx::AttributeProto::INT);
+ return attribute.i();
+}
+
+template <> inline std::string getAttributeValue(const onnx::AttributeProto &attribute)
+{
+ assert(attribute.type() == onnx::AttributeProto::STRING);
+ return attribute.s();
+}
+
+template <> inline onnx::TensorProto getAttributeValue(const onnx::AttributeProto &attribute)
+{
+ assert(attribute.type() == onnx::AttributeProto::TENSOR);
+ return attribute.t();
+}
+
+template <>
+inline std::vector<std::int32_t> getAttributeValue(const onnx::AttributeProto &attribute)
+{
+ assert(attribute.type() == onnx::AttributeProto::INTS);
+ // TODO Check that values fit.
+ return {attribute.ints().cbegin(), attribute.ints().cend()};
+}
+
+template <>
+inline std::vector<std::int64_t> getAttributeValue(const onnx::AttributeProto &attribute)
+{
+ assert(attribute.type() == onnx::AttributeProto::INTS);
+ return {attribute.ints().cbegin(), attribute.ints().cend()};
+}
+
+inline const onnx::AttributeProto *findAttribute(const onnx::NodeProto &node,
+ const std::string &name)
+{
+ const auto &attributes = node.attribute();
+ const auto it = std::find_if(
+ attributes.cbegin(), attributes.cend(),
+ [&name](const onnx::AttributeProto &attribute) { return attribute.name() == name; });
+ if (it == attributes.cend())
+ return nullptr;
+ return &*it;
+}
+
+template <typename T> T getAttributeValue(const onnx::NodeProto &node, const std::string &name)
+{
+ const auto *attribute = findAttribute(node, name);
+ if (attribute == nullptr)
+ throw std::runtime_error("Cannot find attribute '" + name + "' in node '" + node.name() + "'.");
+ return getAttributeValue<T>(*attribute);
+}
+
+template <typename T>
+T getAttributeValue(const onnx::NodeProto &node, const std::string &name, T default_value)
+{
+ const auto *attribute = findAttribute(node, name);
+ if (attribute == nullptr)
+ return std::move(default_value);
+ return getAttributeValue<T>(*attribute);
+}
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_ATTRIBUTE_HELPERS_H
diff --git a/compiler/mir-onnx-importer/CMakeLists.txt b/compiler/mir-onnx-importer/CMakeLists.txt
new file mode 100644
index 000000000..5f27bc041
--- /dev/null
+++ b/compiler/mir-onnx-importer/CMakeLists.txt
@@ -0,0 +1,119 @@
+nnas_find_package(ONNXSource EXACT 1.6.0 QUIET)
+nnas_find_package(Protobuf QUIET)
+
+if (NOT ONNXSource_FOUND)
+ return()
+endif ()
+
+if (NOT Protobuf_FOUND)
+ return()
+endif ()
+
+Protobuf_Generate(MIR_ONNX_PROTO
+ ${CMAKE_CURRENT_BINARY_DIR}/generated
+ ${ONNXSource_DIR}
+ onnx/onnx.proto)
+
+add_library(mir_onnx_proto STATIC ${MIR_ONNX_PROTO_SOURCES})
+set_target_properties(mir_onnx_proto PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(mir_onnx_proto PUBLIC ${MIR_ONNX_PROTO_INCLUDE_DIRS})
+target_link_libraries(mir_onnx_proto PUBLIC libprotobuf)
+
+set(MIR_ONNX_IMPORTER_SOURCES
+ AttributeHelpers.h
+ ConvPoolHelpers.cpp
+ ConvPoolHelpers.h
+ ONNXHelpers.cpp
+ ONNXHelpers.h
+ ONNXImporterImpl.cpp
+ ONNXImporterImpl.h
+ ONNXNodeConverterRegistry.h
+ ONNXNodeConverterRegistry.cpp
+ ONNXOpRegistration.h
+ Op/Abs.cpp
+ Op/Abs.h
+ Op/Add.cpp
+ Op/Add.h
+ Op/AveragePool.cpp
+ Op/AveragePool.h
+ Op/BatchNormalization.cpp
+ Op/BatchNormalization.h
+ Op/Concat.cpp
+ Op/Concat.h
+ Op/Constant.cpp
+ Op/Constant.h
+ Op/Conv.cpp
+ Op/Conv.h
+ Op/ConvTranspose.cpp
+ Op/ConvTranspose.h
+ Op/Div.cpp
+ Op/Div.h
+ Op/Dropout.cpp
+ Op/Dropout.h
+ Op/Equal.cpp
+ Op/Equal.h
+ Op/Expand.cpp
+ Op/Expand.h
+ Op/Flatten.cpp
+ Op/Flatten.h
+ Op/Gather.cpp
+ Op/Gather.h
+ Op/Greater.cpp
+ Op/Greater.h
+ Op/Gemm.cpp
+ Op/Gemm.h
+ Op/Identity.cpp
+ Op/Identity.h
+ Op/Less.cpp
+ Op/Less.h
+ Op/MatMul.cpp
+ Op/MatMul.h
+ Op/GlobalAveragePool.cpp
+ Op/GlobalAveragePool.h
+ Op/Max.cpp
+ Op/Max.h
+ Op/MaxPool.cpp
+ Op/MaxPool.h
+ Op/Mul.cpp
+ Op/Mul.h
+ Op/Pad.cpp
+ Op/Pad.h
+ Op/Reciprocal.cpp
+ Op/Reciprocal.h
+ Op/ReduceMean.cpp
+ Op/ReduceMean.h
+ Op/Relu.cpp
+ Op/Relu.h
+ Op/Reshape.cpp
+ Op/Reshape.h
+ Op/Shape.cpp
+ Op/Shape.h
+ Op/Sigmoid.cpp
+ Op/Sigmoid.h
+ Op/Softmax.cpp
+ Op/Softmax.h
+ Op/Sqrt.cpp
+ Op/Sqrt.h
+ Op/Sub.cpp
+ Op/Sub.h
+ Op/Sum.cpp
+ Op/Sum.h
+ Op/Tanh.cpp
+ Op/Tanh.h
+ Op/Transpose.cpp
+ Op/Transpose.h
+ Op/Unsqueeze.cpp
+ Op/Unsqueeze.h
+ Op/Upsample.cpp
+ Op/Upsample.h)
+
+add_library(mir_onnx_importer STATIC ${MIR_ONNX_IMPORTER_SOURCES})
+set_target_properties(mir_onnx_importer PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(mir_onnx_importer PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+target_link_libraries(mir_onnx_importer PUBLIC mir_onnx_proto mir PRIVATE stdex mir_interpreter)
+
+nnas_find_package(GTest REQUIRED)
+
+file(GLOB_RECURSE TEST_SOURCES "*.test.cpp")
+GTest_AddTest(mir_onnx_importer_test ${TEST_SOURCES})
+target_link_libraries(mir_onnx_importer_test mir_onnx_importer)
diff --git a/compiler/mir-onnx-importer/ConvPoolHelpers.cpp b/compiler/mir-onnx-importer/ConvPoolHelpers.cpp
new file mode 100644
index 000000000..d98e6deae
--- /dev/null
+++ b/compiler/mir-onnx-importer/ConvPoolHelpers.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvPoolHelpers.h"
+
+#include <algorithm>
+#include <cassert>
+
+namespace mir_onnx
+{
+
+void inferAutoPadding(const std::string &pad_type, const mir::Shape &input_shape,
+ const std::vector<std::int32_t> &dilations,
+ const std::vector<std::int32_t> &strides,
+ const std::vector<std::int32_t> &window_size,
+ std::vector<std::int32_t> &padding_before,
+ std::vector<std::int32_t> &padding_after)
+{
+ constexpr int num_spatial_dims = 2;
+
+ if (pad_type == "NOTSET")
+ {
+ // Do nothing.
+ }
+ else if (pad_type == "VALID")
+ {
+ padding_before.assign(num_spatial_dims, 0);
+ padding_after.assign(num_spatial_dims, 0);
+ }
+ else
+ {
+ padding_before.resize(num_spatial_dims);
+ padding_after.resize(num_spatial_dims);
+
+ assert(dilations.size() == num_spatial_dims);
+ assert(strides.size() == num_spatial_dims);
+ assert(window_size.size() == num_spatial_dims);
+
+ for (int i = 0; i < num_spatial_dims; ++i)
+ {
+ const std::int32_t eff_window_size = (window_size[i] - 1) * dilations[i] + 1;
+ // Assuming input has NCHW format.
+ const std::int32_t residual = input_shape.dim(2 + i) % strides[i];
+ const std::int32_t total_pad = std::max(
+ INT32_C(0), residual == 0 ? eff_window_size - strides[i] : eff_window_size - residual);
+ if (pad_type == "SAME_UPPER")
+ {
+ padding_before[i] = total_pad / 2;
+ padding_after[i] = (total_pad + 1) / 2;
+ }
+ else
+ {
+ assert(pad_type == "SAME_LOWER");
+ padding_before[i] = (total_pad + 1) / 2;
+ padding_after[i] = total_pad / 2;
+ }
+ }
+ }
+}
+
+std::vector<std::int32_t> fixPads(const mir::Shape &input_shape,
+ const std::vector<std::int32_t> &pads,
+ const std::vector<std::int32_t> &strides,
+ const std::vector<std::int32_t> &dilation,
+ const std::vector<std::int32_t> &kernel_shape)
+{
+ assert(pads.size() % 2 == 0);
+ int spatial_dimensions = pads.size() / 2;
+ std::vector<std::int32_t> fixed_pads(pads);
+ for (int i = 0; i < spatial_dimensions; ++i)
+ {
+ auto effective_window_dim = (kernel_shape[i] - 1) * dilation[i] + 1;
+ auto effective_input_dim = input_shape.dim(i + 2) + pads[i] + pads[i + spatial_dimensions];
+ // Computing number of "redundant" elements at the end of input dimension
+ // for example we have effective_input_dim == 8, effective_window)dim == 3 and stride == 2:
+ // [1][2][3][4][5][6][7][8] - input
+ // * * * . . . . - first kernel application
+ // . . * * * . . - second kernel application
+ // . . . . * * * - third kernel application
+ // element 8 is unused (remainder should be 1)
+ //
+ // glossary:
+ // i - effective input size
+ // w - effective window size
+ // s - stride
+ // n - number of kernel applications (3 in example)
+ //
+ // i = s * (n-1) + w + r
+ // r = i - w - s * (n-1)
+ // n - is the maximum number of windows we can fit into input, so this formula is equal to
+ // r = (i - w) % s
+ auto remainder = (effective_input_dim - effective_window_dim) % strides[i];
+
+ // remove redundant pad, but no more than there are padding
+ fixed_pads[i + spatial_dimensions] -= std::min(remainder, pads[i + spatial_dimensions]);
+ }
+ return fixed_pads;
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/ConvPoolHelpers.h b/compiler/mir-onnx-importer/ConvPoolHelpers.h
new file mode 100644
index 000000000..099392f4f
--- /dev/null
+++ b/compiler/mir-onnx-importer/ConvPoolHelpers.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_CONV_POOL_HELPERS_H
+#define MIR_ONNX_CONV_POOL_HELPERS_H
+
+#include "mir/Shape.h"
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+namespace mir_onnx
+{
+
+void inferAutoPadding(const std::string &pad_type, const mir::Shape &input_shape,
+ const std::vector<std::int32_t> &dilations,
+ const std::vector<std::int32_t> &strides,
+ const std::vector<std::int32_t> &window_size,
+ std::vector<std::int32_t> &padding_before,
+ std::vector<std::int32_t> &padding_after);
+
+std::vector<std::int32_t> fixPads(const mir::Shape &input_shape,
+ const std::vector<std::int32_t> &pads,
+ const std::vector<std::int32_t> &strides,
+ const std::vector<std::int32_t> &dilation,
+ const std::vector<std::int32_t> &kernel_shape);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_CONV_POOL_HELPERS_H
diff --git a/compiler/mir-onnx-importer/ONNXHelpers.cpp b/compiler/mir-onnx-importer/ONNXHelpers.cpp
new file mode 100644
index 000000000..f3a9d182d
--- /dev/null
+++ b/compiler/mir-onnx-importer/ONNXHelpers.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "MirInterpreter.h"
+#include "mir/ops/ConstantOp.h"
+
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+#include "mir/TensorVariant.h"
+#include "mir/Index.h"
+
+namespace mir_onnx
+{
+
+const int64_t firstUnknownOpset = 13;
+
+template <typename T> static mir::Shape constantToShapeT(const mir::TensorVariant &t)
+{
+ const mir::Shape &t_shape = t.getShape();
+ mir::Tensor<T> input(t);
+ if (t_shape.rank() != 1)
+ throw std::runtime_error("only 1-d tensors supported as a shape input");
+
+ mir::Shape target_shape;
+ std::int32_t rank = t_shape.dim(0);
+ target_shape.resize(rank);
+ for (int i = 0; i < rank; ++i)
+ target_shape.dim(i) = static_cast<std::int32_t>(input.at(mir::Index{i}));
+ return target_shape;
+}
+
+mir::Shape constantToShape(const mir::ops::ConstantOp *op)
+{
+ const auto &t = op->getValue();
+ mir::DataType d_type = t.getElementType();
+
+ if (t.getType().isQuantized())
+ throw std::runtime_error("unsupported data type of shape operator");
+
+ switch (d_type)
+ {
+ case mir::DataType::FLOAT32:
+ return constantToShapeT<float>(t);
+ break;
+ case mir::DataType::FLOAT64:
+ return constantToShapeT<double>(t);
+ break;
+ case mir::DataType::INT32:
+ return constantToShapeT<int32_t>(t);
+ break;
+ case mir::DataType::INT64:
+ return constantToShapeT<int64_t>(t);
+ break;
+ case mir::DataType::UINT8:
+ return constantToShapeT<uint8_t>(t);
+ break;
+ default:
+ throw std::runtime_error{"Unknown datatype in constant"};
+ break;
+ }
+}
+
+mir::DataType onnxDataTypeToMirDataType(onnx::TensorProto::DataType type)
+{
+ switch (type)
+ {
+ case onnx::TensorProto_DataType_UINT8:
+ return mir::DataType::UINT8;
+ break;
+ case onnx::TensorProto_DataType_INT32:
+ return mir::DataType::INT32;
+ break;
+ case onnx::TensorProto_DataType_INT64:
+ return mir::DataType::INT64;
+ break;
+ case onnx::TensorProto_DataType_DOUBLE:
+ return mir::DataType::FLOAT64;
+ break;
+ case onnx::TensorProto_DataType_FLOAT:
+ return mir::DataType::FLOAT32;
+ break;
+ case onnx::TensorProto_DataType_UNDEFINED:
+ throw std::runtime_error{"Undefined input data type not supported"};
+ break;
+ default:
+ throw std::runtime_error{"Unsupported tensor element data type"};
+ }
+}
+
+mir::TensorVariant createTensor(const onnx::TensorProto *tensor)
+{
+ mir::DataType type;
+ const void *src_data;
+ mir::Shape shape(tensor->dims_size());
+ for (int i = 0; i < tensor->dims_size(); ++i)
+ {
+ shape.dim(i) = tensor->dims(i);
+ }
+
+ if (tensor->float_data_size() != 0)
+ {
+ assert(tensor->data_type() == onnx::TensorProto::FLOAT);
+ type = mir::DataType::FLOAT32;
+ src_data = tensor->float_data().data();
+ }
+ else if (tensor->double_data_size() != 0)
+ {
+ assert(tensor->data_type() == onnx::TensorProto::DOUBLE);
+ type = mir::DataType::FLOAT64;
+ src_data = tensor->double_data().data();
+ }
+ else if (tensor->int32_data_size() != 0)
+ {
+ assert(tensor->data_type() == onnx::TensorProto::INT32);
+ type = mir::DataType::INT32;
+ src_data = tensor->int32_data().data();
+ }
+ else if (tensor->int64_data_size() != 0)
+ {
+ assert(tensor->data_type() == onnx::TensorProto::INT64);
+ type = mir::DataType::INT64;
+ src_data = tensor->int64_data().data();
+ }
+ else if (tensor->has_raw_data())
+ {
+ type = onnxDataTypeToMirDataType((onnx::TensorProto_DataType)tensor->data_type());
+ src_data = tensor->raw_data().data();
+ }
+ else
+ {
+ throw std::runtime_error("Invalid data in Proto file, investigate");
+ }
+
+ return mir::TensorVariant({type, shape}, src_data);
+}
+
+mir::Operation *foldConstants(mir::Graph *graph, mir::Operation *op)
+{
+ if (op->getType() == mir::Operation::Type::constant ||
+ op->getType() == mir::Operation::Type::input || op->getType() == mir::Operation::Type::output)
+ {
+ // don't fold input, output and constant nodes
+ return op;
+ }
+
+ if (op->getNumOutputs() != 1)
+ {
+ // this operation either have more than 1 output or none at all
+ return op;
+ }
+
+ bool is_foldable =
+ std::all_of(op->getInputs().begin(), op->getInputs().end(), [](mir::Operation::Output *out) {
+ return out->getNode()->getType() == mir::Operation::Type::constant;
+ });
+
+ if (!is_foldable)
+ return op;
+
+ mir_interpreter::MIRInterpreter interpreter;
+ for (mir::Operation::Output *out : op->getInputs())
+ {
+ auto *constant = static_cast<mir::ops::ConstantOp *>(out->getNode());
+ interpreter.setTensor(out, constant->getValue());
+ }
+ op->accept(&interpreter);
+ const mir::TensorVariant &output = interpreter.getTensor(op->getOutput(0));
+
+ return graph->create<mir::ops::ConstantOp>(output);
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/ONNXHelpers.h b/compiler/mir-onnx-importer/ONNXHelpers.h
new file mode 100644
index 000000000..1367ab82a
--- /dev/null
+++ b/compiler/mir-onnx-importer/ONNXHelpers.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIR_ONNX_HELPERS_H__
+#define __MIR_ONNX_HELPERS_H__
+
+#include "mir/Graph.h"
+#include "mir/ops/ConstantOp.h"
+#include "mir/TensorVariant.h"
+#include "mir/ops/TransposeOp.h"
+
+#include "onnx/onnx.pb.h"
+
+namespace mir_onnx
+{
+
+extern const int64_t firstUnknownOpset;
+
+mir::DataType onnxDataTypeToMirDataType(onnx::TensorProto::DataType type);
+
+mir::Shape constantToShape(const mir::ops::ConstantOp *op);
+
+mir::TensorVariant createTensor(const onnx::TensorProto *tensor);
+
+mir::Operation *foldConstants(mir::Graph *graph, mir::Operation *op);
+
+template <typename OpType, typename... Types>
+mir::Operation *createOp(mir::Graph *graph, Types &&... args)
+{
+ auto op = graph->create<OpType>(std::forward<Types>(args)...);
+ op = foldConstants(graph, op);
+ return op;
+}
+
+} // namespace mir_onnx
+
+#endif // __MIR_ONNX_HELPERS_H__
diff --git a/compiler/mir-onnx-importer/ONNXImporterImpl.cpp b/compiler/mir-onnx-importer/ONNXImporterImpl.cpp
new file mode 100644
index 000000000..c33104198
--- /dev/null
+++ b/compiler/mir-onnx-importer/ONNXImporterImpl.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ONNXImporterImpl.h"
+#include "ONNXHelpers.h"
+#include "ONNXOpRegistration.h"
+#include "onnx/onnx.pb.h"
+
+#include "mir/Shape.h"
+#include "mir/TensorUtil.h"
+
+#include "mir/ops/ConstantOp.h"
+
+#include <fcntl.h>
+
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/text_format.h>
+#include <functional>
+#include <iostream>
+#include <stdex/Memory.h>
+#include <utility>
+
+namespace mir_onnx
+{
+
+namespace
+{
+
+class ONNXImporterImpl final
+{
+public:
+ ONNXImporterImpl();
+ ~ONNXImporterImpl();
+ /// @brief Load the model and convert it into a MIR Graph.
+ std::unique_ptr<mir::Graph> importModelFromBinaryFile(const std::string &filename);
+ std::unique_ptr<mir::Graph> importModelFromTextFile(const std::string &filename);
+
+private:
+ std::unique_ptr<mir::Graph> createIR();
+ void createGraphInputs();
+ void collectUnsupportedOps();
+ std::unique_ptr<onnx::ModelProto> _model;
+ std::unique_ptr<ConverterContext> _converterCtx;
+ std::unique_ptr<ModelContext> _modelCtx;
+ std::unique_ptr<mir::Graph> _graph;
+};
+
+ONNXImporterImpl::ONNXImporterImpl() { registerSupportedOps(); }
+
+ONNXImporterImpl::~ONNXImporterImpl() = default;
+
+void loadModelFromBinaryFile(const std::string &filename, onnx::ModelProto *model)
+{
+ GOOGLE_PROTOBUF_VERIFY_VERSION;
+
+ int file_handle = open(filename.c_str(), O_RDONLY);
+
+ if (file_handle == -1)
+ throw std::runtime_error("Couldn't open file \"" + filename + "\": " + std::strerror(errno) +
+ ".");
+
+ google::protobuf::io::FileInputStream file_stream(file_handle);
+ file_stream.SetCloseOnDelete(true);
+
+ google::protobuf::io::CodedInputStream coded_stream(&file_stream);
+ coded_stream.SetTotalBytesLimit(INT_MAX, INT_MAX);
+
+ if (!model->ParseFromCodedStream(&coded_stream))
+ throw std::runtime_error("Couldn't parse file \"" + filename + "\".");
+
+ // If the file has not been consumed entirely, assume that the file is in the wrong format.
+ if (!coded_stream.ConsumedEntireMessage())
+ throw std::runtime_error("File \"" + filename + "\" has not been consumed entirely.");
+}
+
+void loadModelFromTextFile(const std::string &filename, onnx::ModelProto *model)
+{
+ GOOGLE_PROTOBUF_VERIFY_VERSION;
+
+ int file_handle = open(filename.c_str(), O_RDONLY);
+
+ if (file_handle == -1)
+ throw std::runtime_error("Couldn't open file \"" + filename + "\": " + std::strerror(errno) +
+ ".");
+
+ google::protobuf::io::FileInputStream file_stream(file_handle);
+ file_stream.SetCloseOnDelete(true);
+
+ if (!google::protobuf::TextFormat::Parse(&file_stream, model))
+ throw std::runtime_error("Couldn't parse file \"" + filename + "\".");
+}
+
+std::unique_ptr<mir::Graph> ONNXImporterImpl::importModelFromBinaryFile(const std::string &filename)
+{
+ _model = stdex::make_unique<onnx::ModelProto>();
+ loadModelFromBinaryFile(filename, _model.get());
+ _modelCtx = stdex::make_unique<ModelContext>(_model.get());
+ collectUnsupportedOps();
+ return createIR();
+}
+
+std::unique_ptr<mir::Graph> ONNXImporterImpl::importModelFromTextFile(const std::string &filename)
+{
+ _model = stdex::make_unique<onnx::ModelProto>();
+ loadModelFromTextFile(filename, _model.get());
+ _modelCtx = stdex::make_unique<ModelContext>(_model.get());
+ collectUnsupportedOps();
+ return createIR();
+}
+
+void ONNXImporterImpl::collectUnsupportedOps()
+{
+ std::set<std::pair<std::string, int64_t>> problems_op_set;
+
+ for (int i = 0; i < _model->graph().node_size(); i++)
+ {
+ const auto &onnx_node = _model->graph().node(i);
+ assert(onnx_node.has_op_type());
+ const auto &op_type = onnx_node.op_type();
+ auto opset = _modelCtx->getDomainOpsetVersion(onnx_node.domain());
+
+ NodeConverterRegistry::ConverterFunc converter =
+ NodeConverterRegistry::getInstance().lookup(op_type, opset);
+
+ if (converter == nullptr)
+ problems_op_set.emplace(op_type, opset);
+ }
+ if (!problems_op_set.empty())
+ {
+ std::cerr << "The following operators are not supported:\n";
+ for (const auto &op : problems_op_set)
+ std::cerr << op.first << " opset " << op.second << std::endl;
+ throw std::runtime_error("Unsupported operators found");
+ }
+}
+
+void ONNXImporterImpl::createGraphInputs()
+{
+ const auto &graph = _model->graph();
+ const auto &initializer = graph.initializer();
+ const auto &value_info = graph.value_info();
+
+ // Create all initializer Tensors
+ for (const auto &tensor : initializer)
+ {
+ const auto mir_tensor = createTensor(&tensor);
+ auto *op = _graph->create<mir::ops::ConstantOp>(mir_tensor);
+ _converterCtx->setOutput(tensor.name(), op->getOutput(0));
+ }
+
+ for (const auto &input : graph.input())
+ {
+ assert(input.has_name());
+
+ if (_converterCtx->getOutput(input.name()) == nullptr)
+ {
+ const auto &onnx_input_shape = input.type().tensor_type().shape();
+ mir::Shape shape(onnx_input_shape.dim_size());
+ for (int i = 0; i < onnx_input_shape.dim_size(); i++)
+ {
+ assert(onnx_input_shape.dim(i).has_dim_value());
+ shape.dim(i) = static_cast<int32_t>(onnx_input_shape.dim(i).dim_value());
+ }
+
+ auto elem_type = onnxDataTypeToMirDataType(
+ (onnx::TensorProto_DataType)input.type().tensor_type().elem_type());
+ mir::TensorType type{elem_type, shape};
+ auto *op = _graph->create<mir::ops::InputOp>(type);
+ _converterCtx->setOutput(input.name(), op->getOutput(0));
+ }
+ }
+}
+
+std::unique_ptr<mir::Graph> ONNXImporterImpl::createIR()
+{
+ _graph = stdex::make_unique<mir::Graph>();
+ _converterCtx = stdex::make_unique<ConverterContext>(_graph.get());
+
+ createGraphInputs();
+
+ // Forming partially ordered computation graph
+ for (const auto &onnx_node : _model->graph().node())
+ {
+ assert(onnx_node.has_op_type());
+ auto &op_type = onnx_node.op_type();
+ auto opset = _modelCtx->getDomainOpsetVersion(onnx_node.domain());
+ // Get converter
+ NodeConverterRegistry::ConverterFunc converter =
+ NodeConverterRegistry::getInstance().lookup(op_type, opset);
+ assert(converter != nullptr);
+ converter(onnx_node, _converterCtx.get());
+ }
+ // Set graph outputs
+ const auto &outputs = _model->graph().output();
+ for (const auto &output : outputs)
+ {
+ assert(output.has_name());
+ auto mir_output = _converterCtx->getOutput(output.name());
+ if (mir_output == nullptr)
+ throw std::runtime_error("Bad output name!");
+
+ _graph->create<mir::ops::OutputOp>(mir_output);
+ }
+
+ return std::move(_graph);
+}
+
+} // namespace
+
+std::unique_ptr<mir::Graph> importModelFromBinaryFile(const std::string &filename)
+{
+ ONNXImporterImpl importer;
+ return importer.importModelFromBinaryFile(filename);
+}
+
+std::unique_ptr<mir::Graph> importModelFromTextFile(const std::string &filename)
+{
+ ONNXImporterImpl importer;
+ return importer.importModelFromTextFile(filename);
+}
+
+std::unique_ptr<mir::Graph> loadModel(const std::string &filename)
+{
+ return importModelFromBinaryFile(filename);
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/ONNXImporterImpl.h b/compiler/mir-onnx-importer/ONNXImporterImpl.h
new file mode 100644
index 000000000..02a49b330
--- /dev/null
+++ b/compiler/mir-onnx-importer/ONNXImporterImpl.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_ONNX_IMPORTER_H
+#define _MIR_ONNX_IMPORTER_H
+
+#include "mir/Graph.h"
+
+#include <memory>
+#include <string>
+
+namespace mir_onnx
+{
+
+std::unique_ptr<mir::Graph> importModelFromBinaryFile(const std::string &filename);
+std::unique_ptr<mir::Graph> importModelFromTextFile(const std::string &filename);
+// TODO Remove after changing all uses.
+std::unique_ptr<mir::Graph> loadModel(const std::string &filename);
+
+} // namespace mir_onnx
+
+#endif // _MIR_ONNX_IMPORTER_H
diff --git a/compiler/mir-onnx-importer/ONNXNodeConverterRegistry.cpp b/compiler/mir-onnx-importer/ONNXNodeConverterRegistry.cpp
new file mode 100644
index 000000000..a11b18e89
--- /dev/null
+++ b/compiler/mir-onnx-importer/ONNXNodeConverterRegistry.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ONNXNodeConverterRegistry.h"
+
+#include <memory>
+
+namespace mir_onnx
+{
+
+void ModelContext::setDomainOpsetVersion(const std::string &domain, const int64_t opset_version)
+{
+ _domainToOpsetVersion.emplace(domain, opset_version);
+}
+
+int64_t ModelContext::getDomainOpsetVersion(const std::string &domain) const
+{
+ auto iter = _domainToOpsetVersion.find(domain);
+ if (iter == _domainToOpsetVersion.end())
+ throw std::runtime_error("Didn't have domain " + domain + "!");
+ return iter->second;
+}
+
+ModelContext::ModelContext(const onnx::ModelProto *model)
+{
+ if (model == nullptr)
+ {
+ throw std::runtime_error{"Model should be imported before importer prepare"};
+ }
+
+ if (model->ir_version() > onnx::IR_VERSION)
+ {
+ throw std::runtime_error("IR version " + std::to_string(model->ir_version()) +
+ " is not supported yet.");
+ }
+
+ // Set Opset Version for each domain
+ for (const auto &op_set : model->opset_import())
+ {
+ setDomainOpsetVersion(op_set.domain(), op_set.version());
+ }
+}
+
+// ConverterContext
+
+ConverterContext::ConverterContext(mir::Graph *graph) : _graph(graph) {}
+
+void ConverterContext::setOutput(const std::string &name, mir::Operation::Output *output)
+{
+ output->setName(name);
+ auto result = _tensorNameToOutput.emplace(name, output);
+ if (!result.second)
+ throw std::runtime_error("Name duplication: " + name);
+}
+
+mir::Operation::Output *ConverterContext::getOutput(const std::string &name) const
+{
+ auto iter = _tensorNameToOutput.find(name);
+ if (iter == _tensorNameToOutput.end())
+ return nullptr;
+ else
+ return iter->second;
+}
+
+std::vector<mir::Operation::Output *>
+ConverterContext::getNodeInputs(const onnx::NodeProto &onnx_node) const
+{
+ const auto &input_names = onnx_node.input();
+ std::vector<mir::Operation::Output *> outputs;
+
+ for (const auto &input_name : input_names)
+ {
+ if (!input_name.empty())
+ {
+ auto *mir_output = getOutput(input_name);
+ assert(mir_output != nullptr);
+ outputs.emplace_back(mir_output);
+ }
+ }
+ return outputs;
+}
+
+void ConverterContext::setNodeOutputs(const onnx::NodeProto &onnx_node,
+ const std::vector<mir::Operation::Output *> &outputs)
+{
+ assert(!outputs.empty());
+ for (std::size_t i = 0; i < outputs.size(); ++i)
+ {
+ setOutput(onnx_node.output(i), outputs[i]);
+ }
+}
+
+// NodeConverterRegistry
+
+NodeConverterRegistry::ConverterFunc NodeConverterRegistry::lookup(const std::string &optype,
+ int64_t opset) const
+{
+ auto it = _converter_map.find(optype);
+ if (it == _converter_map.end())
+ {
+ return nullptr;
+ }
+
+ const VersionMap &conv_map = it->second;
+
+ auto res = std::lower_bound(
+ conv_map.crbegin(), conv_map.crend(), opset,
+ [](const VersionMap::value_type &pair, int64_t opset) { return pair.first > opset; });
+
+ if (res == conv_map.crend())
+ {
+ return nullptr;
+ }
+ return res->second;
+}
+
+NodeConverterRegistry &NodeConverterRegistry::getInstance()
+{
+ static NodeConverterRegistry instance;
+ return instance;
+}
+
+void NodeConverterRegistry::registerConverter(const std::string &op_type, int64_t opset,
+ NodeConverterRegistry::ConverterFunc conv)
+{
+ _converter_map[op_type].emplace(opset, conv);
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/ONNXNodeConverterRegistry.h b/compiler/mir-onnx-importer/ONNXNodeConverterRegistry.h
new file mode 100644
index 000000000..ea712ad23
--- /dev/null
+++ b/compiler/mir-onnx-importer/ONNXNodeConverterRegistry.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONNX_NODE_CONVERTER_REGISTRY_H__
+#define __ONNX_NODE_CONVERTER_REGISTRY_H__
+
+#include "onnx/onnx.pb.h"
+#include "mir/Graph.h"
+
+#include <map>
+#include <string>
+#include <vector>
+
+namespace mir_onnx
+{
+
+class ModelContext
+{
+public:
+ explicit ModelContext(const onnx::ModelProto *model);
+
+ void setDomainOpsetVersion(const std::string &domain, const int64_t opset_version);
+ int64_t getDomainOpsetVersion(const std::string &domain) const;
+
+private:
+ std::map<std::string, int64_t> _domainToOpsetVersion;
+};
+
+class ConverterContext
+{
+public:
+ explicit ConverterContext(mir::Graph *graph);
+ ~ConverterContext() = default;
+
+ void setOutput(const std::string &name, mir::Operation::Output *output);
+ mir::Operation::Output *getOutput(const std::string &name) const;
+ std::vector<mir::Operation::Output *> getNodeInputs(const onnx::NodeProto &onnx_node) const;
+ void setNodeOutputs(const onnx::NodeProto &onnx_node,
+ const std::vector<mir::Operation::Output *> &outputs);
+ mir::Graph *getGraph() const { return _graph; }
+
+private:
+ std::map<std::string, mir::Operation::Output *> _tensorNameToOutput;
+ mir::Graph *_graph;
+};
+
+class NodeConverterRegistry
+{
+public:
+ using ConverterFunc = void (*)(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+ NodeConverterRegistry() = default;
+
+ ConverterFunc lookup(const std::string &optype, int64_t opset) const;
+ void registerConverter(const std::string &op_type, int64_t opset, ConverterFunc conv);
+
+ static NodeConverterRegistry &getInstance();
+
+private:
+ using VersionMap = std::map<int64_t, ConverterFunc>;
+
+ std::unordered_map<std::string, VersionMap> _converter_map;
+};
+
+} // namespace mir_onnx
+
+#endif // __ONNX_NODE_CONVERTER_REGISTRY_H__
diff --git a/compiler/mir-onnx-importer/ONNXNodeConverterRegistry.test.cpp b/compiler/mir-onnx-importer/ONNXNodeConverterRegistry.test.cpp
new file mode 100644
index 000000000..dfc3e4216
--- /dev/null
+++ b/compiler/mir-onnx-importer/ONNXNodeConverterRegistry.test.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ONNXNodeConverterRegistry.h"
+#include "ONNXHelpers.h"
+
+#include "gtest/gtest.h"
+
+using namespace mir_onnx;
+
+void converterV1(const onnx::NodeProto &node, ConverterContext *ctx) {}
+void converterV3(const onnx::NodeProto &node, ConverterContext *ctx) {}
+void converterV7(const onnx::NodeProto &node, ConverterContext *ctx) {}
+
+class NodeConverterRegsitryTest : public ::testing::Test
+{
+protected:
+ void SetUp() override
+ {
+ registry.registerConverter("dummy", 1, converterV1);
+ registry.registerConverter("dummy", 3, converterV3);
+ registry.registerConverter("dummy", 7, converterV7);
+ registry.registerConverter("dummy", firstUnknownOpset, nullptr);
+ }
+
+ NodeConverterRegistry registry;
+};
+
+TEST_F(NodeConverterRegsitryTest, existing_lookup_works)
+{
+ auto res = registry.lookup("dummy", 1);
+ ASSERT_EQ(res, &converterV1);
+}
+
+TEST_F(NodeConverterRegsitryTest, skipped_lookup_works)
+{
+ auto res = registry.lookup("dummy", 2);
+ ASSERT_EQ(res, &converterV1);
+}
+
+TEST_F(NodeConverterRegsitryTest, first_unknown_version_works)
+{
+ auto res = registry.lookup("dummy", 14);
+ ASSERT_EQ(res, nullptr);
+}
+
+TEST_F(NodeConverterRegsitryTest, lower_than_first_version)
+{
+ auto res = registry.lookup("dummy", 0);
+ ASSERT_EQ(res, nullptr);
+}
diff --git a/compiler/mir-onnx-importer/ONNXOpRegistration.h b/compiler/mir-onnx-importer/ONNXOpRegistration.h
new file mode 100644
index 000000000..e3001b000
--- /dev/null
+++ b/compiler/mir-onnx-importer/ONNXOpRegistration.h
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONNX_OP_REGISTRATION_H__
+#define __ONNX_OP_REGISTRATION_H__
+
+#include "ONNXNodeConverterRegistry.h"
+
+#include "Op/Abs.h"
+#include "Op/Add.h"
+#include "Op/AveragePool.h"
+#include "Op/BatchNormalization.h"
+#include "Op/Concat.h"
+#include "Op/Constant.h"
+#include "Op/Conv.h"
+#include "Op/ConvTranspose.h"
+#include "Op/Div.h"
+#include "Op/Dropout.h"
+#include "Op/Equal.h"
+#include "Op/Expand.h"
+#include "Op/Flatten.h"
+#include "Op/Gather.h"
+#include "Op/Greater.h"
+#include "Op/Gemm.h"
+#include "Op/GlobalAveragePool.h"
+#include "Op/Identity.h"
+#include "Op/Less.h"
+#include "Op/MatMul.h"
+#include "Op/Max.h"
+#include "Op/MaxPool.h"
+#include "Op/Mul.h"
+#include "Op/Pad.h"
+#include "Op/Reciprocal.h"
+#include "Op/ReduceMean.h"
+#include "Op/Relu.h"
+#include "Op/Reshape.h"
+#include "Op/Shape.h"
+#include "Op/Sigmoid.h"
+#include "Op/Softmax.h"
+#include "Op/Sqrt.h"
+#include "Op/Sub.h"
+#include "Op/Sum.h"
+#include "Op/Tanh.h"
+#include "Op/Transpose.h"
+#include "Op/Unsqueeze.h"
+#include "Op/Upsample.h"
+
+namespace mir_onnx
+{
+
+inline void registerSupportedOps()
+{
+ auto &registry = NodeConverterRegistry::getInstance();
+
+#define REG_CONVERTER(name, version, function) registry.registerConverter(name, version, function)
+#define REG(name, version) REG_CONVERTER(#name, version, convert##name##V##version)
+#define UNSUPPORTED(name, version) REG_CONVERTER(#name, version, nullptr)
+
+ REG(Abs, 1);
+ REG(Abs, 6);
+ UNSUPPORTED(Abs, firstUnknownOpset);
+
+ REG(Add, 1);
+ REG(Add, 6);
+ REG(Add, 7);
+ UNSUPPORTED(Add, firstUnknownOpset);
+
+ REG(AveragePool, 1);
+ REG(AveragePool, 7);
+ REG(AveragePool, 10);
+ UNSUPPORTED(AveragePool, 11);
+ UNSUPPORTED(AveragePool, firstUnknownOpset);
+
+ REG(BatchNormalization, 1);
+ REG(BatchNormalization, 6);
+ REG(BatchNormalization, 7);
+ REG(BatchNormalization, 9);
+ UNSUPPORTED(BatchNormalization, firstUnknownOpset);
+
+ REG(Concat, 1);
+ REG(Concat, 4);
+ UNSUPPORTED(Concat, 11);
+ UNSUPPORTED(Concat, firstUnknownOpset);
+
+ REG(Constant, 1);
+ REG(Constant, 9);
+ REG(Constant, 11);
+ UNSUPPORTED(Constant, 12);
+ UNSUPPORTED(Constant, firstUnknownOpset);
+
+ REG(Conv, 1);
+ UNSUPPORTED(Conv, 11);
+ UNSUPPORTED(Conv, firstUnknownOpset);
+
+ REG(ConvTranspose, 1);
+ UNSUPPORTED(ConvTranspose, 11);
+ UNSUPPORTED(ConvTranspose, firstUnknownOpset);
+
+ UNSUPPORTED(Div, 1);
+ UNSUPPORTED(Div, 6);
+ REG(Div, 7);
+ UNSUPPORTED(Div, firstUnknownOpset);
+
+ REG(Dropout, 1);
+ REG(Dropout, 6);
+ REG(Dropout, 7);
+ REG(Dropout, 10);
+ UNSUPPORTED(Dropout, 12);
+ UNSUPPORTED(Dropout, firstUnknownOpset);
+
+ UNSUPPORTED(Equal, 1);
+ REG(Equal, 7);
+ REG(Equal, 11);
+ UNSUPPORTED(Equal, firstUnknownOpset);
+
+ REG(Expand, 8);
+ UNSUPPORTED(Expand, firstUnknownOpset);
+
+ REG(Flatten, 1);
+ REG(Flatten, 9);
+ UNSUPPORTED(Flatten, 11);
+ UNSUPPORTED(Flatten, firstUnknownOpset);
+
+ REG(Gather, 1);
+ UNSUPPORTED(Gather, 11);
+ UNSUPPORTED(Gather, firstUnknownOpset);
+
+ REG(Gemm, 1);
+ REG(Gemm, 6);
+ REG(Gemm, 7);
+ REG(Gemm, 9);
+ REG(Gemm, 11);
+ UNSUPPORTED(Gemm, firstUnknownOpset);
+
+ UNSUPPORTED(GlobalAveragePool, 1);
+ REG(GlobalAveragePool, 2);
+ UNSUPPORTED(GlobalAveragePool, firstUnknownOpset);
+
+ UNSUPPORTED(Greater, 1);
+ REG(Greater, 7);
+ REG(Greater, 9);
+ UNSUPPORTED(Greater, firstUnknownOpset);
+
+ REG(Identity, 1);
+ UNSUPPORTED(Identity, firstUnknownOpset);
+
+ UNSUPPORTED(Less, 1);
+ REG(Less, 7);
+ REG(Less, 9);
+ UNSUPPORTED(Less, firstUnknownOpset);
+
+ REG(MatMul, 1);
+ REG(MatMul, 9);
+ UNSUPPORTED(MatMul, firstUnknownOpset);
+
+ REG(Max, 1);
+ REG(Max, 6);
+ REG(Max, 8);
+ UNSUPPORTED(Max, firstUnknownOpset);
+
+ REG(MaxPool, 1);
+ REG(MaxPool, 8);
+ REG(MaxPool, 10);
+ UNSUPPORTED(MaxPool, 11);
+ UNSUPPORTED(MaxPool, 12);
+ UNSUPPORTED(MaxPool, firstUnknownOpset);
+
+ UNSUPPORTED(Mul, 1);
+ UNSUPPORTED(Mul, 6);
+ REG(Mul, 7);
+ UNSUPPORTED(Mul, firstUnknownOpset);
+
+ REG(Pad, 1);
+ REG(Pad, 2);
+ UNSUPPORTED(Pad, 11);
+ UNSUPPORTED(Pad, firstUnknownOpset);
+
+ REG(Reciprocal, 1);
+ REG(Reciprocal, 6);
+ UNSUPPORTED(Reciprocal, firstUnknownOpset);
+
+ REG(ReduceMean, 1);
+ UNSUPPORTED(ReduceMean, 11);
+ UNSUPPORTED(ReduceMean, firstUnknownOpset);
+
+ REG(Relu, 1);
+ REG(Relu, 6);
+ UNSUPPORTED(Relu, firstUnknownOpset);
+
+ REG(Reshape, 1);
+ REG(Reshape, 5);
+ UNSUPPORTED(Reshape, firstUnknownOpset);
+
+ REG(Shape, 1);
+ UNSUPPORTED(Shape, firstUnknownOpset);
+
+ REG(Sigmoid, 1);
+ REG(Sigmoid, 6);
+ UNSUPPORTED(Sigmoid, firstUnknownOpset);
+
+ REG(Softmax, 1);
+ // TODO SoftmaxV11 is mostly the same, needs a check though
+ UNSUPPORTED(Softmax, firstUnknownOpset);
+
+ REG(Sqrt, 1);
+ REG(Sqrt, 6);
+ UNSUPPORTED(Sqrt, firstUnknownOpset);
+
+ REG(Sub, 1);
+ REG(Sub, 6);
+ REG(Sub, 7);
+ UNSUPPORTED(Sub, firstUnknownOpset);
+
+ UNSUPPORTED(Sum, 1);
+ UNSUPPORTED(Sum, 6);
+ REG(Sum, 8);
+ UNSUPPORTED(Sum, firstUnknownOpset);
+
+ REG(Tanh, 1);
+ REG(Tanh, 6);
+ UNSUPPORTED(Tanh, firstUnknownOpset);
+
+ REG(Transpose, 1);
+ UNSUPPORTED(Transpose, firstUnknownOpset);
+
+ REG(Unsqueeze, 1);
+ UNSUPPORTED(Unsqueeze, 11);
+ UNSUPPORTED(Unsqueeze, firstUnknownOpset);
+
+ // Upsample-1 is not mentioned in onnx master and was considered experimental at the time
+ REG(Upsample, 1);
+ REG(Upsample, 7);
+ REG(Upsample, 9);
+ UNSUPPORTED(Upsample, firstUnknownOpset);
+
+#undef REG
+#undef REG_CONVERTER
+#undef UNSUPPORTED
+}
+
+} // namespace mir_onnx
+
+#endif // __ONNX_OP_REGISTRATION_H__
diff --git a/compiler/mir-onnx-importer/Op/Abs.cpp b/compiler/mir-onnx-importer/Op/Abs.cpp
new file mode 100644
index 000000000..350270cfd
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Abs.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Abs.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/ops/AbsOp.h"
+
+namespace mir_onnx
+{
+
+static void convertAbsGeneric(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ auto result = createOp<mir::ops::AbsOp>(graph, inputs[0])->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertAbsV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertAbsGeneric(onnx_node, context);
+}
+
+void convertAbsV6(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertAbsGeneric(onnx_node, context);
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Abs.h b/compiler/mir-onnx-importer/Op/Abs.h
new file mode 100644
index 000000000..06fcd5f3c
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Abs.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_ABS_H
+#define MIR_ONNX_OP_ABS_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertAbsV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertAbsV6(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_ABS_H
diff --git a/compiler/mir-onnx-importer/Op/Add.cpp b/compiler/mir-onnx-importer/Op/Add.cpp
new file mode 100644
index 000000000..8944b4e66
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Add.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Add.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/ops/AddOp.h"
+
+namespace mir_onnx
+{
+
+void convertAddV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ // consumed_inputs attribute not used
+ convertAddV6(onnx_node, context);
+}
+
+void convertAddV6(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ // broadcast attribute not used
+ const auto *axis = findAttribute(onnx_node, "axis");
+ if (axis != nullptr)
+ throw std::runtime_error("Not supported axis attribute in Add operation!");
+
+ convertAddV7(onnx_node, context);
+}
+
+void convertAddV7(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ auto result = createOp<mir::ops::AddOp>(graph, inputs[0], inputs[1])->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Add.h b/compiler/mir-onnx-importer/Op/Add.h
new file mode 100644
index 000000000..a11aa6bb7
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Add.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_ADD_H
+#define MIR_ONNX_OP_ADD_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertAddV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertAddV6(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertAddV7(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_ADD_H
diff --git a/compiler/mir-onnx-importer/Op/AveragePool.cpp b/compiler/mir-onnx-importer/Op/AveragePool.cpp
new file mode 100644
index 000000000..503feffc8
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/AveragePool.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AveragePool.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+#include "ConvPoolHelpers.h"
+
+#include "mir/ops/AvgPool2DOp.h"
+
+namespace mir_onnx
+{
+
+void convertAveragePoolV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ assert(inputs.size() == 1);
+ auto input = inputs[0];
+
+ const auto &input_shape = input->getShape();
+ if (input_shape.rank() != 4)
+ throw std::runtime_error("AveragePool: only 2-D input is supported.");
+
+ constexpr int num_spatial_dims = 2;
+
+ const auto strides =
+ getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
+ if (strides.size() != num_spatial_dims)
+ throw std::runtime_error("AveragePool: attribute 'strides' has incorrect size.");
+
+ const auto kernel_shape = getAttributeValue<std::vector<std::int32_t>>(onnx_node, "kernel_shape");
+ if (kernel_shape.size() != num_spatial_dims)
+ throw std::runtime_error("AveragePool: attribute 'kernel_shape' has incorrect size.");
+
+ std::vector<std::int32_t> padding_before(num_spatial_dims, 0);
+ std::vector<std::int32_t> padding_after(num_spatial_dims, 0);
+ if (const auto *pads_attr = findAttribute(onnx_node, "pads"))
+ {
+ const auto pads = getAttributeValue<std::vector<std::int32_t>>(*pads_attr);
+ if (pads.size() != num_spatial_dims * 2)
+ throw std::runtime_error("AveragePool: attribute 'pads' has incorrect size.");
+ padding_before.assign(pads.cbegin(), std::next(pads.cbegin(), num_spatial_dims));
+ padding_after.assign(std::next(pads.cbegin(), num_spatial_dims), pads.cend());
+ }
+ else
+ {
+ const auto auto_pad = getAttributeValue<std::string>(onnx_node, "auto_pad", "NOTSET");
+ const std::vector<std::int32_t> dilations(num_spatial_dims, 1);
+ inferAutoPadding(auto_pad, input_shape, dilations, strides, kernel_shape, padding_before,
+ padding_after);
+ }
+
+ mir::AvgPool2DOpAttributes attributes;
+ attributes.window = kernel_shape;
+ attributes.strides = strides;
+ attributes.padding_before = padding_before;
+ attributes.padding_after = padding_after;
+ attributes.include_pad = false;
+ attributes.data_format = mir::DataFormat::NCHW;
+ auto result = createOp<mir::ops::AvgPool2DOp>(graph, input, attributes)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertAveragePoolV7(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ const auto count_include_pad = getAttributeValue<int64_t>(onnx_node, "count_include_pad", 0);
+ if (count_include_pad != 0)
+ throw std::runtime_error("Not supported count_include_pad attribute!");
+
+ convertAveragePoolV1(onnx_node, context);
+}
+
+void convertAveragePoolV10(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ const auto ceil_mode = getAttributeValue<int64_t>(onnx_node, "ceil_mode", 0);
+ if (ceil_mode != 0)
+ throw std::runtime_error("Not supported ceil_mode attribute!");
+
+ convertAveragePoolV7(onnx_node, context);
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/AveragePool.h b/compiler/mir-onnx-importer/Op/AveragePool.h
new file mode 100644
index 000000000..54e406daf
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/AveragePool.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_AVERAGE_POOL_H
+#define MIR_ONNX_OP_AVERAGE_POOL_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertAveragePoolV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertAveragePoolV7(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertAveragePoolV10(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_AVERAGE_POOL_H
diff --git a/compiler/mir-onnx-importer/Op/BatchNormalization.cpp b/compiler/mir-onnx-importer/Op/BatchNormalization.cpp
new file mode 100644
index 000000000..8a6d8cc51
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/BatchNormalization.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BatchNormalization.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+
+#include "mir/ops/AddOp.h"
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/MulOp.h"
+#include "mir/ops/ReshapeOp.h"
+
+#include <cmath>
+
+namespace mir_onnx
+{
+
+void convertBatchNormalizationV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ // consumed_inputs attribute not used
+ convertBatchNormalizationV6(onnx_node, context);
+}
+
+void convertBatchNormalizationV6(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ const auto is_test = getAttributeValue<std::int64_t>(onnx_node, "is_test", 0);
+ if (is_test == 0)
+ throw std::runtime_error("Not supported is_test attribute!");
+
+ convertBatchNormalizationV7(onnx_node, context);
+}
+
+void convertBatchNormalizationV7(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ // spatial attribute used only for learning
+
+ convertBatchNormalizationV9(onnx_node, context);
+}
+
+void convertBatchNormalizationV9(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ // momentum attrribute used only for learning
+
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ assert(inputs.size() == 5);
+ auto input = inputs[0];
+ auto scale = inputs[1];
+ auto bias = inputs[2];
+ auto mean = inputs[3];
+ auto var = inputs[4];
+
+ // 1e-05f is the default epsilon.
+ const auto epsilon = getAttributeValue<float>(onnx_node, "epsilon", 1e-05f);
+
+ // Y = (X - mean) * scale / sqrt(var + epsilon) + bias =
+ // = (X + C1) * C2 + bias
+ // We need these to be constants since we are going to change them.
+ // TODO Implement the formula using ops and let the optimizer constant-fold them.
+ auto scale_op = dynamic_cast<mir::ops::ConstantOp *>(scale->getNode());
+ auto mean_op = dynamic_cast<mir::ops::ConstantOp *>(mean->getNode());
+ auto var_op = dynamic_cast<mir::ops::ConstantOp *>(var->getNode());
+
+ if (scale_op == nullptr || mean_op == nullptr || var_op == nullptr)
+ throw std::runtime_error(
+ "BatchNormalization: only constant 'scale', 'mean' and 'variance' inputs are supported.");
+
+ mir::Tensor<float> scale_accessor(scale_op->getValue());
+ mir::Tensor<float> mean_accessor(mean_op->getValue());
+ mir::Tensor<float> var_accessor(var_op->getValue());
+
+ // C1 = -mean
+ for (const auto &idx : mir::ShapeRange(mean_accessor.getShape()))
+ mean_accessor.at(idx) *= -1;
+
+ // C2 = scale / sqrt(var + epsilon)
+ for (const auto &idx : mir::ShapeRange(scale_accessor.getShape()))
+ scale_accessor.at(idx) /= std::sqrt(var_accessor.at(idx) + epsilon);
+
+ assert(mean_accessor.getShape().rank() == 1);
+ auto input_rank = input->getShape().rank();
+ if (input_rank < 2)
+ throw std::runtime_error("Inputs with shape rank < 2 are not supported for batchnorm");
+
+ mir::Shape new_shape(std::vector<std::int32_t>(input_rank, 1));
+
+ new_shape.dim(1) = mean_accessor.getShape().dim(0); // set channel dim
+
+ auto reshaped_mean = createOp<mir::ops::ReshapeOp>(graph, mean, new_shape)->getOutput(0);
+ auto reshaped_scale = createOp<mir::ops::ReshapeOp>(graph, scale, new_shape)->getOutput(0);
+ auto reshaped_bias = createOp<mir::ops::ReshapeOp>(graph, bias, new_shape)->getOutput(0);
+
+ // Y = (X + C1) * C2 + bias
+ auto result = createOp<mir::ops::AddOp>(graph, input, reshaped_mean)->getOutput(0);
+ result = createOp<mir::ops::MulOp>(graph, result, reshaped_scale)->getOutput(0);
+ result = createOp<mir::ops::AddOp>(graph, result, reshaped_bias)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/BatchNormalization.h b/compiler/mir-onnx-importer/Op/BatchNormalization.h
new file mode 100644
index 000000000..7c2e37a9c
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/BatchNormalization.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_BATCH_NORMALIZATION_H
+#define MIR_ONNX_OP_BATCH_NORMALIZATION_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertBatchNormalizationV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertBatchNormalizationV6(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertBatchNormalizationV7(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertBatchNormalizationV9(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_BATCH_NORMALIZATION_H
diff --git a/compiler/mir-onnx-importer/Op/Concat.cpp b/compiler/mir-onnx-importer/Op/Concat.cpp
new file mode 100644
index 000000000..dbe752647
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Concat.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Concat.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/ops/ConcatOp.h"
+
+namespace mir_onnx
+{
+
+void convertConcatV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ const auto axis = getAttributeValue<int64_t>(onnx_node, "axis", 1);
+
+ auto result = createOp<mir::ops::ConcatOp>(graph, inputs, axis)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertConcatV4(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+ // From version 4 axis attribute is required
+ auto attr = findAttribute(onnx_node, "axis");
+ if (!attr)
+ throw std::runtime_error("Attribute axis is required!");
+ int32_t axis = attr->i();
+
+ auto result = createOp<mir::ops::ConcatOp>(graph, inputs, axis)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Concat.h b/compiler/mir-onnx-importer/Op/Concat.h
new file mode 100644
index 000000000..430a2d9e4
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Concat.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_CONCAT_H
+#define MIR_ONNX_OP_CONCAT_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertConcatV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertConcatV4(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_CONCAT_H
diff --git a/compiler/mir-onnx-importer/Op/Constant.cpp b/compiler/mir-onnx-importer/Op/Constant.cpp
new file mode 100644
index 000000000..710760ed3
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Constant.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Constant.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/TensorVariant.h"
+#include "mir/ops/ConstantOp.h"
+
+namespace mir_onnx
+{
+
+void convertConstantV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ const auto onnx_tensor = getAttributeValue<onnx::TensorProto>(onnx_node, "value");
+ auto mir_tensor = createTensor(&onnx_tensor);
+
+ auto result = graph->create<mir::ops::ConstantOp>(mir_tensor)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertConstantV9(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ // Since version 9 Constant operation support other types contained in tensor
+ convertConstantV1(onnx_node, context);
+}
+
+void convertConstantV11(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ const auto *value_attr = findAttribute(onnx_node, "value");
+ const auto *sparse_value_attr = findAttribute(onnx_node, "sparse_value");
+ if (value_attr == nullptr && sparse_value_attr == nullptr)
+ throw std::runtime_error("Not enough attributes in Constant operation!");
+
+ if (value_attr != nullptr)
+ return convertConstantV9(onnx_node, context);
+
+ if (sparse_value_attr != nullptr)
+ throw std::runtime_error("Not supported sparse_tensor in Constant operation!");
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Constant.h b/compiler/mir-onnx-importer/Op/Constant.h
new file mode 100644
index 000000000..2a4db0fb7
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Constant.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_CONSTANT_H
+#define MIR_ONNX_OP_CONSTANT_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertConstantV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertConstantV9(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertConstantV11(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_CONSTANT_H
diff --git a/compiler/mir-onnx-importer/Op/Conv.cpp b/compiler/mir-onnx-importer/Op/Conv.cpp
new file mode 100644
index 000000000..7dc6ce818
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Conv.cpp
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Conv.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+#include "ConvPoolHelpers.h"
+
+#include "mir/ops/AddOp.h"
+#include "mir/ops/Conv2DOp.h"
+#include "mir/ops/ReshapeOp.h"
+
+namespace mir_onnx
+{
+
+void convertConvV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ assert(inputs.size() >= 2);
+ auto input = inputs[0];
+ auto kernel = inputs[1];
+
+ auto input_shape = input->getShape();
+ bool conv1d = false;
+ if (input_shape.rank() == 3)
+ {
+ input_shape = {input_shape.dim(0), input_shape.dim(1), input_shape.dim(2), 1};
+ auto reshaped_input = createOp<mir::ops::ReshapeOp>(graph, input, input_shape);
+ input = reshaped_input->getOutput(0);
+ conv1d = true;
+ }
+ else
+ {
+ if (input_shape.rank() != 4)
+ throw std::runtime_error{"Conv is unsupported for tensors with more than 4 dimentions"};
+ }
+
+ constexpr int num_spatial_dims = 2;
+
+ std::vector<int32_t> dilations(num_spatial_dims, 1);
+ if (const auto *dilations_attr = findAttribute(onnx_node, "dilations"))
+ {
+ dilations = getAttributeValue<std::vector<int32_t>>(*dilations_attr);
+ if (conv1d)
+ dilations.emplace_back(1);
+ }
+
+ if (dilations.size() != num_spatial_dims)
+ throw std::runtime_error("Conv: attribute 'dilations' has incorrect size.");
+ if (!std::all_of(dilations.cbegin(), dilations.cend(), [](std::int32_t x) { return x == 1; }))
+ throw std::runtime_error("Conv: attribute 'dilations' has unsupported value.");
+
+ std::vector<int32_t> strides(num_spatial_dims, 1);
+ if (const auto *strides_attr = findAttribute(onnx_node, "strides"))
+ {
+ strides = getAttributeValue<std::vector<int32_t>>(*strides_attr);
+ if (conv1d)
+ strides.emplace_back(1);
+ }
+
+ if (strides.size() != num_spatial_dims)
+ throw std::runtime_error("Conv: attribute 'strides' has incorrect size.");
+
+ // Assuming kernel has OIHW format.
+ if (conv1d)
+ {
+ auto kernel_shape = kernel->getShape();
+ assert(kernel_shape.rank() == 3);
+ kernel_shape = {kernel_shape.dim(0), kernel_shape.dim(1), kernel_shape.dim(2), 1};
+ auto reshaped_kernel = createOp<mir::ops::ReshapeOp>(graph, kernel, kernel_shape);
+ kernel = reshaped_kernel->getOutput(0);
+ }
+
+ std::vector<std::int32_t> kernel_shape{kernel->getShape().dim(2), kernel->getShape().dim(3)};
+ if (const auto *k_shape_attr = findAttribute(onnx_node, "kernel_shape"))
+ {
+ kernel_shape = getAttributeValue<std::vector<std::int32_t>>(*k_shape_attr);
+ if (conv1d)
+ kernel_shape.emplace_back(1);
+ }
+
+ if (kernel_shape.size() != num_spatial_dims)
+ throw std::runtime_error("Conv: attribute 'kernel_shape' has incorrect size.");
+
+ std::vector<std::int32_t> padding_before(num_spatial_dims, 0);
+ std::vector<std::int32_t> padding_after(num_spatial_dims, 0);
+ if (const auto *pads_attr = findAttribute(onnx_node, "pads"))
+ {
+ auto pads = getAttributeValue<std::vector<std::int32_t>>(*pads_attr);
+ if (conv1d)
+ {
+ pads.emplace_back(0);
+ pads.emplace_back(0);
+ }
+
+ if (pads.size() != num_spatial_dims * 2)
+ throw std::runtime_error("Conv: attribute 'pads' has incorrect size.");
+ const auto fixed_pads = fixPads(input_shape, pads, strides, dilations, kernel_shape);
+ padding_before.assign(fixed_pads.cbegin(), std::next(fixed_pads.cbegin(), num_spatial_dims));
+ padding_after.assign(std::next(fixed_pads.cbegin(), num_spatial_dims), fixed_pads.cend());
+ }
+ else
+ {
+ const auto auto_pad = getAttributeValue<std::string>(onnx_node, "auto_pad", "NOTSET");
+ inferAutoPadding(auto_pad, input_shape, dilations, strides, kernel_shape, padding_before,
+ padding_after);
+ }
+
+ const auto group = getAttributeValue<std::int64_t>(onnx_node, "group", 1);
+
+ mir::Conv2DOpAttributes attributes;
+ attributes.strides = strides;
+ attributes.padding_before = padding_before;
+ attributes.padding_after = padding_after;
+ attributes.num_groups = group;
+ attributes.data_format = mir::DataFormat::NCHW;
+
+ std::vector<std::size_t> perm{0, 2, 3, 1}; // OIHW -> OHWI
+ kernel = createOp<mir::ops::TransposeOp>(graph, kernel, perm)->getOutput(0);
+ auto result = createOp<mir::ops::Conv2DOp>(graph, input, kernel, attributes)->getOutput(0);
+
+ if (inputs.size() > 2)
+ {
+ auto bias = inputs[2];
+ bias = createOp<mir::ops::ReshapeOp>(graph, bias, mir::Shape{1, bias->getShape().dim(0), 1, 1})
+ ->getOutput(0);
+ result = createOp<mir::ops::AddOp>(graph, result, bias)->getOutput(0);
+ }
+
+ if (conv1d)
+ {
+ auto output_shape = result->getShape();
+ output_shape.resize(output_shape.rank() - 1);
+ result = createOp<mir::ops::ReshapeOp>(graph, result, output_shape)->getOutput(0);
+ }
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Conv.h b/compiler/mir-onnx-importer/Op/Conv.h
new file mode 100644
index 000000000..2af2b8959
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Conv.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_CONV_H
+#define MIR_ONNX_OP_CONV_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertConvV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_CONV_H
diff --git a/compiler/mir-onnx-importer/Op/ConvTranspose.cpp b/compiler/mir-onnx-importer/Op/ConvTranspose.cpp
new file mode 100644
index 000000000..3078a1959
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/ConvTranspose.cpp
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvTranspose.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+#include "ConvPoolHelpers.h"
+
+#include "mir/TensorUtil.h"
+#include "mir/ops/AddOp.h"
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/Deconv2DOp.h"
+#include "mir/ops/ReshapeOp.h"
+
+namespace mir_onnx
+{
+
+void convertConvTransposeV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ assert(inputs.size() >= 2);
+ auto input = inputs[0];
+ auto kernel = inputs[1];
+
+ const auto group = getAttributeValue<std::int64_t>(onnx_node, "group", 1);
+ if (group != 1)
+ throw std::runtime_error("ConvTranspose: attribute 'group' has unsupported value.");
+
+ const auto &input_shape = input->getShape();
+ if (input_shape.rank() != 4)
+ throw std::runtime_error("ConvTranspose: only 2-D input is supported.");
+
+ constexpr int num_spatial_dims = 2;
+
+ const auto dilations =
+ getAttributeValue(onnx_node, "dilations", std::vector<std::int32_t>(num_spatial_dims, 1));
+ if (dilations.size() != num_spatial_dims)
+ throw std::runtime_error("ConvTranspose: attribute 'dilations' has incorrect size.");
+ if (!std::all_of(dilations.cbegin(), dilations.cend(), [](std::int32_t x) { return x == 1; }))
+ throw std::runtime_error("ConvTranspose: attribute 'dilations' has unsupported value.");
+
+ const auto strides =
+ getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
+ if (strides.size() != num_spatial_dims)
+ throw std::runtime_error("ConvTranspose: attribute 'strides' has incorrect size.");
+
+ const auto output_padding = getAttributeValue(onnx_node, "output_padding",
+ std::vector<std::int32_t>(num_spatial_dims, 0));
+ if (output_padding.size() != num_spatial_dims)
+ throw std::runtime_error("ConvTranspose: attribute 'output_padding' has incorrect size.");
+ if (!std::all_of(output_padding.cbegin(), output_padding.cend(),
+ [](std::int32_t x) { return x == 0; }))
+ throw std::runtime_error("ConvTranspose: attribute 'output_padding' has unsupported value.");
+
+ // Assuming kernel has IOHW format.
+ assert(kernel->getShape().rank() == 4);
+ const auto kernel_size = getAttributeValue(
+ onnx_node, "kernel_shape",
+ std::vector<std::int32_t>{kernel->getShape().dim(2), kernel->getShape().dim(3)});
+ if (kernel_size.size() != num_spatial_dims)
+ throw std::runtime_error("ConvTranspose: attribute 'kernel_shape' has incorrect size.");
+
+ // ONNX IOHW -> MIR HWOI
+ std::vector<std::size_t> perm{2, 3, 1, 0}; // OIHW -> OHWI
+ kernel = createOp<mir::ops::TransposeOp>(graph, kernel, perm)->getOutput(0);
+
+ mir::Operation::Output *result;
+ if (const auto *output_shape_attr = findAttribute(onnx_node, "output_shape"))
+ {
+ const auto output_size = getAttributeValue<std::vector<std::int32_t>>(*output_shape_attr);
+ if (output_size.size() != num_spatial_dims)
+ throw std::runtime_error("ConvTranspose: attribute 'output_shape' has incorrect size.");
+ const mir::Shape output_shape{input_shape.dim(0), kernel->getShape().dim(2), output_size[0],
+ output_size[1]};
+ mir::Deconv2DOpAttributes attributes;
+ attributes.strides = strides;
+ attributes.data_format = mir::DataFormat::NCHW;
+ attributes.padding_type = mir::ops::PaddingType::SameUpper;
+ result = createOp<mir::ops::DeConv2DOp>(graph, input, kernel, attributes, output_shape)
+ ->getOutput(0);
+ }
+ else
+ {
+ // TODO This code was not tested.
+ throw std::runtime_error(
+ "ConvTranspose: absence of attribute 'output_shape' is not supported.");
+ std::vector<std::int32_t> padding_before(num_spatial_dims, 0);
+ std::vector<std::int32_t> padding_after(num_spatial_dims, 0);
+ if (const auto *pads_attr = findAttribute(onnx_node, "pads"))
+ {
+ const auto pads = getAttributeValue<std::vector<std::int32_t>>(*pads_attr);
+ if (pads.size() != num_spatial_dims * 2)
+ throw std::runtime_error("ConvTranspose: attribute 'pads' has incorrect size.");
+ padding_before.assign(pads.cbegin(), std::next(pads.cbegin(), num_spatial_dims));
+ padding_after.assign(std::next(pads.cbegin(), num_spatial_dims), pads.cend());
+ }
+ else
+ {
+ const auto auto_pad = getAttributeValue<std::string>(onnx_node, "auto_pad", "NOTSET");
+ inferAutoPadding(auto_pad, input_shape, dilations, strides, kernel_size, padding_before,
+ padding_after);
+ }
+ mir::Deconv2DOpAttributes attributes;
+ attributes.strides = strides;
+ attributes.padding_before = padding_before;
+ attributes.padding_after = padding_after;
+ attributes.data_format = mir::DataFormat::NCHW;
+ result = createOp<mir::ops::DeConv2DOp>(graph, input, kernel, attributes)->getOutput(0);
+ }
+
+ if (inputs.size() > 2)
+ {
+ auto bias = inputs[2];
+ bias = createOp<mir::ops::ReshapeOp>(graph, bias, mir::Shape{1, bias->getShape().dim(0), 1, 1})
+ ->getOutput(0);
+ result = createOp<mir::ops::AddOp>(graph, result, bias)->getOutput(0);
+ }
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/ConvTranspose.h b/compiler/mir-onnx-importer/Op/ConvTranspose.h
new file mode 100644
index 000000000..d203dc6c1
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/ConvTranspose.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_CONV_TRANSPOSE_H
+#define MIR_ONNX_OP_CONV_TRANSPOSE_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertConvTransposeV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_CONV_TRANSPOSE_H
diff --git a/compiler/mir-onnx-importer/Op/Div.cpp b/compiler/mir-onnx-importer/Op/Div.cpp
new file mode 100644
index 000000000..40620169a
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Div.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Div.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/ops/DivOp.h"
+
+namespace mir_onnx
+{
+
+void convertDivV7(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+
+ mir::Graph *graph = context->getGraph();
+
+ auto result = createOp<mir::ops::DivOp>(graph, inputs[0], inputs[1])->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Div.h b/compiler/mir-onnx-importer/Op/Div.h
new file mode 100644
index 000000000..cdc254fb8
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Div.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_DIV_H
+#define MIR_ONNX_OP_DIV_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertDivV7(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_DIV_H
diff --git a/compiler/mir-onnx-importer/Op/Dropout.cpp b/compiler/mir-onnx-importer/Op/Dropout.cpp
new file mode 100644
index 000000000..ef6972784
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Dropout.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dropout.h"
+
+#include "AttributeHelpers.h"
+
+namespace mir_onnx
+{
+
+void convertDropoutV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ // consumed_inputs attribute not used
+ convertDropoutV6(onnx_node, context);
+}
+
+void convertDropoutV6(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ const auto is_test = getAttributeValue<std::int64_t>(onnx_node, "is_test", 0);
+ if (is_test == 0)
+ throw std::runtime_error("Not supported is_test attribute!");
+
+ convertDropoutV10(onnx_node, context);
+}
+
+void convertDropoutV7(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertDropoutV10(onnx_node, context);
+}
+
+void convertDropoutV10(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+
+ // ratio attribute not used
+
+ // This is a no-op in inference mode.
+ context->setNodeOutputs(onnx_node, {inputs[0]});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Dropout.h b/compiler/mir-onnx-importer/Op/Dropout.h
new file mode 100644
index 000000000..9a90ac79b
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Dropout.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_DROPOUT_H
+#define MIR_ONNX_OP_DROPOUT_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertDropoutV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertDropoutV6(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertDropoutV7(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertDropoutV10(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_DROPOUT_H
diff --git a/compiler/mir-onnx-importer/Op/Equal.cpp b/compiler/mir-onnx-importer/Op/Equal.cpp
new file mode 100644
index 000000000..242389eb5
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Equal.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Equal.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/ops/EqualOp.h"
+
+namespace mir_onnx
+{
+
+void convertEqualV11(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ auto result = createOp<mir::ops::EqualOp>(graph, inputs[0], inputs[1])->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertEqualV7(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ // Other type constraints
+ convertEqualV11(onnx_node, context);
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Equal.h b/compiler/mir-onnx-importer/Op/Equal.h
new file mode 100644
index 000000000..0672cd661
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Equal.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_EQUAL_H
+#define MIR_ONNX_OP_EQUAL_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertEqualV11(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertEqualV7(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_EQUAL_H
diff --git a/compiler/mir-onnx-importer/Op/Expand.cpp b/compiler/mir-onnx-importer/Op/Expand.cpp
new file mode 100644
index 000000000..40002dfa9
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Expand.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Expand.h"
+
+#include "ONNXHelpers.h"
+
+#include "mir/ops/BroadcastOp.h"
+
+namespace mir_onnx
+{
+
+void convertExpandV8(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ if (inputs[1]->getNode()->getType() != mir::Operation::Type::constant)
+ {
+ throw std::runtime_error{"Expand with non-constant input shape is not supported"};
+ }
+
+ auto target_shape = constantToShape(static_cast<mir::ops::ConstantOp *>(inputs[1]->getNode()));
+
+ auto *result = createOp<mir::ops::BroadcastOp>(graph, inputs[0], target_shape)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Expand.h b/compiler/mir-onnx-importer/Op/Expand.h
new file mode 100644
index 000000000..35f7af407
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Expand.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_EXPAND_H
+#define MIR_ONNX_OP_EXPAND_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertExpandV8(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_EXPAND_H
diff --git a/compiler/mir-onnx-importer/Op/Flatten.cpp b/compiler/mir-onnx-importer/Op/Flatten.cpp
new file mode 100644
index 000000000..dfad6ddbf
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Flatten.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Flatten.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/ops/ReshapeOp.h"
+
+namespace mir_onnx
+{
+
+void convertFlattenV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ const auto axis = getAttributeValue<int64_t>(onnx_node, "axis", 1);
+ assert(inputs.size() == 1);
+ const auto &in_shape = inputs[0]->getShape();
+ assert(axis <= in_shape.rank()); // A tensor of rank >= axis
+ int32_t first_dim = 1, second_dim = 1;
+ int32_t dim = 0;
+
+ for (; dim < axis; dim++)
+ first_dim *= in_shape.dim(dim);
+
+ for (; dim < in_shape.rank(); dim++)
+ second_dim *= in_shape.dim(dim);
+
+ mir::Shape out_shape({first_dim, second_dim}); // Output 2D tensor
+
+ auto result = createOp<mir::ops::ReshapeOp>(graph, inputs[0], out_shape)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertFlattenV9(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ // Other type constraints
+ convertFlattenV1(onnx_node, context);
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Flatten.h b/compiler/mir-onnx-importer/Op/Flatten.h
new file mode 100644
index 000000000..174a8d906
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Flatten.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_FLATTEN_H
+#define MIR_ONNX_OP_FLATTEN_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertFlattenV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertFlattenV9(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_FLATTEN_H
diff --git a/compiler/mir-onnx-importer/Op/Gather.cpp b/compiler/mir-onnx-importer/Op/Gather.cpp
new file mode 100644
index 000000000..fa3746c67
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Gather.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Gather.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/ops/GatherOp.h"
+
+namespace mir_onnx
+{
+
+void convertGatherV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ // 0 is the default axis number.
+ const auto axis = getAttributeValue<std::int64_t>(onnx_node, "axis", 0);
+
+ auto result = createOp<mir::ops::GatherOp>(graph, inputs[0], inputs[1], axis)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Gather.h b/compiler/mir-onnx-importer/Op/Gather.h
new file mode 100644
index 000000000..c4308d2be
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Gather.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_GATHER_H
+#define MIR_ONNX_OP_GATHER_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertGatherV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_GATHER_H
diff --git a/compiler/mir-onnx-importer/Op/Gemm.cpp b/compiler/mir-onnx-importer/Op/Gemm.cpp
new file mode 100644
index 000000000..1e0759dda
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Gemm.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Gemm.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/TensorUtil.h"
+
+#include "mir/ops/AddOp.h"
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/FullyConnectedOp.h"
+#include "mir/ops/MulOp.h"
+#include "mir/ops/ReshapeOp.h"
+#include "mir/ops/TransposeOp.h"
+
+namespace mir_onnx
+{
+
+static void convertGemm(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ assert(inputs.size() == 2 || inputs.size() == 3);
+
+ auto a = inputs[0];
+ auto b = inputs[1];
+ auto c = inputs.size() > 2 ? inputs[2] : nullptr;
+
+ // 1.0f is the default factor.
+ const auto alpha_val = getAttributeValue<float>(onnx_node, "alpha", 1.0f);
+ const auto beta_val = getAttributeValue<float>(onnx_node, "beta", 1.0f);
+
+ // 0 means that no transpose is needed. It is the default value.
+ const auto trans_a = getAttributeValue<std::int64_t>(onnx_node, "transA", 0);
+ const auto trans_b = getAttributeValue<std::int64_t>(onnx_node, "transB", 0);
+
+ // Transpose the A and B matrices as needed.
+ if (trans_a)
+ a = createOp<mir::ops::TransposeOp>(graph, a, std::vector<std::size_t>{1, 0})->getOutput(0);
+ if (trans_b)
+ b = createOp<mir::ops::TransposeOp>(graph, b, std::vector<std::size_t>{1, 0})->getOutput(0);
+
+ // Calculate A * B.
+ auto ab = createOp<mir::ops::FullyConnectedOp>(graph, a, b)->getOutput(0);
+
+ // Multiply A * B by the constant factor.
+ if (alpha_val != 1.0f)
+ {
+ mir::TensorVariant alpha_tensor({mir::DataType::FLOAT32, {}}, &alpha_val);
+ auto alpha = createOp<mir::ops::ConstantOp>(graph, alpha_tensor)->getOutput(0);
+ ab = createOp<mir::ops::MulOp>(graph, alpha, ab)->getOutput(0);
+ }
+
+ // If there are no third input, node is simple A*B multiplication
+ if (!c)
+ {
+ context->setNodeOutputs(onnx_node, {ab});
+ return;
+ }
+
+ // Multiply C by the constant factor.
+ if (beta_val != 1.0f)
+ {
+ mir::TensorVariant beta_tensor({mir::DataType::FLOAT32, {}}, &beta_val);
+ auto beta = createOp<mir::ops::ConstantOp>(graph, beta_tensor)->getOutput(0);
+ c = createOp<mir::ops::MulOp>(graph, beta, c)->getOutput(0);
+ }
+
+ // Calculate the result: alpha * A * B + beta * C.
+ auto result = createOp<mir::ops::AddOp>(graph, ab, c)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertGemmV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ return convertGemm(onnx_node, context);
+}
+
+void convertGemmV6(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ // This version differs from V1: in description of C input (redundant text "can be inplace.")
+ return convertGemm(onnx_node, context);
+}
+
+void convertGemmV7(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ // This version differs from V6: removed "broadcast" atribute
+ return convertGemm(onnx_node, context);
+}
+
+void convertGemmV9(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ // This version differs from V7: added more supported types
+ return convertGemm(onnx_node, context);
+}
+
+void convertGemmV11(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ // This operation differs from V11: input C is optional
+ return convertGemm(onnx_node, context);
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Gemm.h b/compiler/mir-onnx-importer/Op/Gemm.h
new file mode 100644
index 000000000..d87a36e7b
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Gemm.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_GEMM_H
+#define MIR_ONNX_OP_GEMM_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertGemmV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertGemmV6(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertGemmV7(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertGemmV9(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertGemmV11(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_GEMM_H
diff --git a/compiler/mir-onnx-importer/Op/GlobalAveragePool.cpp b/compiler/mir-onnx-importer/Op/GlobalAveragePool.cpp
new file mode 100644
index 000000000..379c8b596
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/GlobalAveragePool.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GlobalAveragePool.h"
+
+#include "ONNXHelpers.h"
+
+#include "mir/ops/AvgPool2DOp.h"
+
+namespace mir_onnx
+{
+
+void convertGlobalAveragePoolV2(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ assert(inputs.size() == 1);
+ auto input = inputs[0];
+
+ const auto &input_shape = input->getShape();
+ if (input_shape.rank() != 4)
+ throw std::runtime_error("GlobalAveragePool: only 2-D input is supported.");
+
+ // GlobalAveragePool is equivalent to AveragePool with kernel size equal
+ // to the spatial dimension of input tensor.
+ const std::vector<std::int32_t> window_size{input->getShape().dim(2), input->getShape().dim(3)};
+ mir::AvgPool2DOpAttributes attributes;
+ attributes.window = window_size;
+ attributes.data_format = mir::DataFormat::NCHW;
+
+ auto result = createOp<mir::ops::AvgPool2DOp>(graph, input, attributes)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/GlobalAveragePool.h b/compiler/mir-onnx-importer/Op/GlobalAveragePool.h
new file mode 100644
index 000000000..b2fb9b8c9
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/GlobalAveragePool.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_GLOBAL_AVERAGE_POOL_H
+#define MIR_ONNX_OP_GLOBAL_AVERAGE_POOL_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertGlobalAveragePoolV2(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_GLOBAL_AVERAGE_POOL_H
diff --git a/compiler/mir-onnx-importer/Op/Greater.cpp b/compiler/mir-onnx-importer/Op/Greater.cpp
new file mode 100644
index 000000000..deaf96d4b
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Greater.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Greater.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/ops/GreaterOp.h"
+
+namespace mir_onnx
+{
+
+static void convertGreaterVGeneric(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ auto result = createOp<mir::ops::GreaterOp>(graph, inputs[0], inputs[1])->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertGreaterV7(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertGreaterVGeneric(onnx_node, context);
+}
+
+void convertGreaterV9(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertGreaterVGeneric(onnx_node, context);
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Greater.h b/compiler/mir-onnx-importer/Op/Greater.h
new file mode 100644
index 000000000..3b6a44f33
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Greater.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_GREATER_H
+#define MIR_ONNX_OP_GREATER_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertGreaterV7(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertGreaterV9(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_GREATER_H
diff --git a/compiler/mir-onnx-importer/Op/Identity.cpp b/compiler/mir-onnx-importer/Op/Identity.cpp
new file mode 100644
index 000000000..6db70ffcd
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Identity.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Identity.h"
+
+namespace mir_onnx
+{
+
+void convertIdentityV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ const auto inputs = context->getNodeInputs(onnx_node);
+ assert(inputs.size() == 1);
+
+ context->setNodeOutputs(onnx_node, {inputs[0]});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Identity.h b/compiler/mir-onnx-importer/Op/Identity.h
new file mode 100644
index 000000000..ea63bab4a
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Identity.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_IDENTITY_H
+#define MIR_ONNX_OP_IDENTITY_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertIdentityV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_IDENTITY_H
diff --git a/compiler/mir-onnx-importer/Op/Less.cpp b/compiler/mir-onnx-importer/Op/Less.cpp
new file mode 100644
index 000000000..44f5d8cf4
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Less.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Less.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/ops/LessOp.h"
+
+namespace mir_onnx
+{
+
+static void convertLessGeneric(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ auto result = createOp<mir::ops::LessOp>(graph, inputs[0], inputs[1])->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertLessV7(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertLessGeneric(onnx_node, context);
+}
+
+void convertLessV9(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertLessGeneric(onnx_node, context);
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Less.h b/compiler/mir-onnx-importer/Op/Less.h
new file mode 100644
index 000000000..682c08725
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Less.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_LESS_H
+#define MIR_ONNX_OP_LESS_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertLessV7(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertLessV9(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_LESS_H
diff --git a/compiler/mir-onnx-importer/Op/MatMul.cpp b/compiler/mir-onnx-importer/Op/MatMul.cpp
new file mode 100644
index 000000000..6d8ea6b83
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/MatMul.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MatMul.h"
+
+#include "ONNXHelpers.h"
+
+#include "mir/ops/FullyConnectedOp.h"
+
+namespace mir_onnx
+{
+
+void convertMatMulV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ assert(inputs.size() == 2);
+ auto A = inputs[0];
+ auto B = inputs[1];
+ // MatMul multiply N-dimentional matrix
+ // FullyConnected layer multiply only 2-dimentional matrix
+ if (A->getShape().rank() != 2 || B->getShape().rank() != 2)
+ throw std::runtime_error("Supported only 2D matrix multiplying!");
+ // Calculate A * B.
+ auto result = createOp<mir::ops::FullyConnectedOp>(graph, A, B)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertMatMulV9(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ // Other type constraints
+ convertMatMulV1(onnx_node, context);
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/MatMul.h b/compiler/mir-onnx-importer/Op/MatMul.h
new file mode 100644
index 000000000..97e641ebb
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/MatMul.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_MATMUL_H
+#define MIR_ONNX_OP_MATMUL_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertMatMulV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertMatMulV9(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_MATMUL_H
diff --git a/compiler/mir-onnx-importer/Op/Max.cpp b/compiler/mir-onnx-importer/Op/Max.cpp
new file mode 100644
index 000000000..d4c7d1775
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Max.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Max.h"
+
+#include "ONNXHelpers.h"
+
+#include "mir/ops/MaxOp.h"
+
+namespace mir_onnx
+{
+
+static void convertMaxGeneric(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ if (inputs.size() != 2)
+ {
+ throw std::runtime_error{"Unsupported number of inputs for Max operator"};
+ }
+ mir::Graph *graph = context->getGraph();
+ auto result = createOp<mir::ops::MaxOp>(graph, inputs[0], inputs[1])->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertMaxV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertMaxGeneric(onnx_node, context);
+}
+
+void convertMaxV6(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertMaxGeneric(onnx_node, context);
+}
+
+void convertMaxV8(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertMaxGeneric(onnx_node, context);
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Max.h b/compiler/mir-onnx-importer/Op/Max.h
new file mode 100644
index 000000000..1f2754b62
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Max.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_MAX_H
+#define MIR_ONNX_OP_MAX_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertMaxV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertMaxV6(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertMaxV8(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_MAX_H
diff --git a/compiler/mir-onnx-importer/Op/MaxPool.cpp b/compiler/mir-onnx-importer/Op/MaxPool.cpp
new file mode 100644
index 000000000..53e6e1556
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/MaxPool.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MaxPool.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+#include "ConvPoolHelpers.h"
+
+#include "mir/ops/MaxPool2DOp.h"
+
+namespace mir_onnx
+{
+
+void convertMaxPoolV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ assert(inputs.size() == 1);
+ auto input = inputs[0];
+
+ const auto &input_shape = input->getShape();
+ if (input_shape.rank() != 4)
+ throw std::runtime_error("MaxPool: only 2-D input is supported.");
+
+ constexpr int num_spatial_dims = 2;
+
+ const auto strides =
+ getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
+ if (strides.size() != num_spatial_dims)
+ throw std::runtime_error("MaxPool: attribute 'strides' has incorrect size.");
+
+ const auto kernel_shape = getAttributeValue<std::vector<std::int32_t>>(onnx_node, "kernel_shape");
+ if (kernel_shape.size() != num_spatial_dims)
+ throw std::runtime_error("MaxPool: attribute 'kernel_shape' has incorrect size.");
+
+ std::vector<std::int32_t> padding_before;
+ std::vector<std::int32_t> padding_after;
+ if (const auto *pads_attr = findAttribute(onnx_node, "pads"))
+ {
+ const auto pads = getAttributeValue<std::vector<std::int32_t>>(*pads_attr);
+ if (pads.size() != num_spatial_dims * 2)
+ throw std::runtime_error("MaxPool: attribute 'pads' has incorrect size.");
+ padding_before.assign(pads.cbegin(), std::next(pads.cbegin(), num_spatial_dims));
+ padding_after.assign(std::next(pads.cbegin(), num_spatial_dims), pads.cend());
+ }
+ else
+ {
+ const auto auto_pad = getAttributeValue<std::string>(onnx_node, "auto_pad", "NOTSET");
+ const std::vector<std::int32_t> dilations(num_spatial_dims, 1);
+ inferAutoPadding(auto_pad, input_shape, dilations, strides, kernel_shape, padding_before,
+ padding_after);
+ }
+
+ mir::MaxPool2DOpAttributes attributes;
+ attributes.window = kernel_shape;
+ attributes.strides = strides;
+ attributes.padding_before = padding_before;
+ attributes.padding_after = padding_after;
+ attributes.data_format = mir::DataFormat::NCHW;
+ auto result = createOp<mir::ops::MaxPool2DOp>(graph, input, attributes)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertMaxPoolV8(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ const auto storage_order = getAttributeValue<int64_t>(onnx_node, "storage_order", 0);
+ if (storage_order != 0)
+ throw std::runtime_error("Not supported storage order attribute!");
+
+ convertMaxPoolV1(onnx_node, context);
+}
+
+void convertMaxPoolV10(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ const auto ceil_mode = getAttributeValue<int64_t>(onnx_node, "ceil_mode", 0);
+ if (ceil_mode != 0)
+ throw std::runtime_error("Not supported ceil_mode attribute!");
+
+ const auto *dilations = findAttribute(onnx_node, "dilations");
+ if (dilations != nullptr)
+ {
+ // check default (=1) dilations on each spatial axis
+ for (auto index = 0; index < dilations->ints_size(); index++)
+ if (dilations->ints(index) != 1)
+ throw std::runtime_error("Not supported dilations in MaxPool operation!");
+ }
+
+ convertMaxPoolV8(onnx_node, context);
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/MaxPool.h b/compiler/mir-onnx-importer/Op/MaxPool.h
new file mode 100644
index 000000000..85bd9cf1a
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/MaxPool.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_MAX_POOL_H
+#define MIR_ONNX_OP_MAX_POOL_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertMaxPoolV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertMaxPoolV8(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertMaxPoolV10(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_MAX_POOL_H
diff --git a/compiler/mir-onnx-importer/Op/Mul.cpp b/compiler/mir-onnx-importer/Op/Mul.cpp
new file mode 100644
index 000000000..dbfdd4950
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Mul.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Mul.h"
+
+#include "ONNXHelpers.h"
+
+#include "mir/ops/MulOp.h"
+
+namespace mir_onnx
+{
+
+void convertMulV7(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+ auto result = createOp<mir::ops::MulOp>(graph, inputs[0], inputs[1])->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Mul.h b/compiler/mir-onnx-importer/Op/Mul.h
new file mode 100644
index 000000000..58738c81d
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Mul.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_MUL_H
+#define MIR_ONNX_OP_MUL_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertMulV7(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_MUL_H
diff --git a/compiler/mir-onnx-importer/Op/Pad.cpp b/compiler/mir-onnx-importer/Op/Pad.cpp
new file mode 100644
index 000000000..504a32bb8
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Pad.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Pad.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/ops/PadOp.h"
+
+namespace mir_onnx
+{
+
+void convertPadAttrName(const std::string &pad_attr_name, const onnx::NodeProto &onnx_node,
+ ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ assert(inputs.size() == 1);
+ auto input = inputs[0];
+
+ // 0.0f is the default value to be filled into padded cells.
+ const auto value = getAttributeValue<float>(onnx_node, "value", 0.0f);
+ const auto pads = getAttributeValue<std::vector<std::int64_t>>(onnx_node, pad_attr_name);
+ // "constant" is the default mode.
+ const auto mode = getAttributeValue<std::string>(onnx_node, "mode", "constant");
+ if (mode != "constant")
+ throw std::runtime_error("Not supported Pad mode attribute!");
+
+ const int num_dims = input->getShape().rank();
+ assert(pads.size() == num_dims * 2);
+ mir::PadOpAttributes attributes(num_dims);
+ for (int i = 0; i < num_dims; i++)
+ {
+ attributes.padding_before[i] = pads[i];
+ attributes.padding_after[i] = pads[num_dims + i];
+ }
+
+ attributes.padding_value = value;
+
+ auto result = createOp<mir::ops::PadOp>(graph, input, attributes)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertPadV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertPadAttrName("paddings", onnx_node, context);
+}
+
+void convertPadV2(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertPadAttrName("pads", onnx_node, context);
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Pad.h b/compiler/mir-onnx-importer/Op/Pad.h
new file mode 100644
index 000000000..a0731ae4c
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Pad.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_PAD_H
+#define MIR_ONNX_OP_PAD_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertPadV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertPadV2(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_PAD_H
diff --git a/compiler/mir-onnx-importer/Op/Reciprocal.cpp b/compiler/mir-onnx-importer/Op/Reciprocal.cpp
new file mode 100644
index 000000000..b063d4b8c
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Reciprocal.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Reciprocal.h"
+
+#include "ONNXHelpers.h"
+
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/DivOp.h"
+
+namespace mir_onnx
+{
+
+static void convertReciprocal(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ assert(inputs.size() == 1);
+ auto input = inputs[0];
+
+ const float one_value = 1.0f;
+ mir::TensorVariant one_tensor({mir::DataType::FLOAT32, {}}, &one_value);
+ auto one = createOp<mir::ops::ConstantOp>(graph, one_tensor)->getOutput(0);
+ auto result = createOp<mir::ops::DivOp>(graph, input, one)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertReciprocalV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertReciprocal(onnx_node, context);
+}
+
+void convertReciprocalV6(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertReciprocal(onnx_node, context);
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Reciprocal.h b/compiler/mir-onnx-importer/Op/Reciprocal.h
new file mode 100644
index 000000000..747623ab5
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Reciprocal.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_RECIPROCAL_H
+#define MIR_ONNX_OP_RECIPROCAL_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertReciprocalV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertReciprocalV6(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_RECIPROCAL_H
diff --git a/compiler/mir-onnx-importer/Op/ReduceMean.cpp b/compiler/mir-onnx-importer/Op/ReduceMean.cpp
new file mode 100644
index 000000000..ec43bffb4
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/ReduceMean.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReduceMean.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/ops/ReduceMeanOp.h"
+
+#include <numeric>
+
+namespace mir_onnx
+{
+
+void convertReduceMeanV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ const auto inputs = context->getNodeInputs(onnx_node);
+ assert(inputs.size() == 1);
+
+ const auto axes = getAttributeValue<std::vector<std::int64_t>>(onnx_node, "axes");
+ const auto keepdims = getAttributeValue<int64_t>(onnx_node, "keepdims", 1);
+
+ std::vector<int32_t> reduce_dims;
+ if (axes.empty())
+ { // reduce over all dimensions
+ reduce_dims.resize(inputs[0]->getShape().rank());
+ std::iota(reduce_dims.begin(), reduce_dims.end(), 0);
+ }
+ else
+ {
+ auto rank = inputs[0]->getShape().rank();
+
+ std::transform(axes.begin(), axes.end(), std::back_inserter(reduce_dims),
+ [rank](int64_t axis) { return axis < 0 ? axis + rank : axis; });
+ }
+ // Keep the reduced dimension or not, default 1 mean keep reduced dimension.
+ bool keep_dims = static_cast<bool>(keepdims);
+
+ mir::Graph *graph = context->getGraph();
+ auto result =
+ createOp<mir::ops::ReduceMeanOp>(graph, inputs[0], reduce_dims, keep_dims)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/ReduceMean.h b/compiler/mir-onnx-importer/Op/ReduceMean.h
new file mode 100644
index 000000000..3553c96b5
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/ReduceMean.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_REDUCEMEAN_H
+#define MIR_ONNX_OP_REDUCEMEAN_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertReduceMeanV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_REDUCEMEAN_H
diff --git a/compiler/mir-onnx-importer/Op/Relu.cpp b/compiler/mir-onnx-importer/Op/Relu.cpp
new file mode 100644
index 000000000..72424e847
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Relu.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Relu.h"
+
+#include "ONNXHelpers.h"
+
+#include "mir/ops/ReluOp.h"
+
+namespace mir_onnx
+{
+
+static void convertRelu(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+ assert(inputs.size() == 1);
+ auto result = createOp<mir::ops::ReluOp>(graph, inputs[0])->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertReluV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertRelu(onnx_node, context);
+}
+
+void convertReluV6(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertRelu(onnx_node, context);
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Relu.h b/compiler/mir-onnx-importer/Op/Relu.h
new file mode 100644
index 000000000..7159f0add
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Relu.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_RELU_H
+#define MIR_ONNX_OP_RELU_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertReluV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertReluV6(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_RELU_H
diff --git a/compiler/mir-onnx-importer/Op/Reshape.cpp b/compiler/mir-onnx-importer/Op/Reshape.cpp
new file mode 100644
index 000000000..5cd4985e2
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Reshape.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Reshape.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/Tensor.h"
+#include "mir/ShapeRange.h"
+
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/ReshapeOp.h"
+
+namespace mir_onnx
+{
+
+void convertReshapeV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+ // consumed_inputs attribute not used
+ const auto *shape_attr = findAttribute(onnx_node, "shape");
+ if (shape_attr && shape_attr->ints_size() > 0)
+ {
+ mir::Shape in_shape = inputs[0]->getShape();
+ mir::Shape out_shape(shape_attr->ints_size());
+ for (int32_t index = 0; index < out_shape.rank(); index++)
+ {
+ const auto dim_value = shape_attr->ints(index);
+ if (dim_value == 0)
+ out_shape.dim(index) = in_shape.dim(index);
+ else
+ out_shape.dim(index) = dim_value;
+ }
+
+ auto result = createOp<mir::ops::ReshapeOp>(graph, inputs[0], out_shape)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+ }
+ else // dimension value is unchanged
+ {
+ context->setNodeOutputs(onnx_node, {inputs[0]});
+ }
+}
+
+void convertReshapeV5(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+ // The original shape
+ const auto &in_shape = inputs[0]->getShape();
+
+ // Input tensor describing the new shape
+ auto *op = dynamic_cast<mir::ops::ConstantOp *>(inputs[1]->getNode());
+ assert(op && "We support only constant shape input");
+ auto shape_tensor = op->getValue();
+ mir::Shape shape_tensor_shape = (shape_tensor).getShape();
+ assert(shape_tensor_shape.rank() == 1);
+ // The rank of the new shape
+ auto cnt = shape_tensor_shape.numElements();
+ // The vector to build the new shape from
+ std::vector<int32_t> shape_vector(cnt);
+ mir::ShapeRange out_range(shape_tensor_shape);
+ mir::Tensor<int64_t> tensor_accessor(shape_tensor);
+
+ int i = 0;
+ for (auto idx : out_range)
+ {
+ if (tensor_accessor.at(idx) == 0)
+ shape_vector[i] = in_shape.dim(i);
+ else if (tensor_accessor.at(idx) == -1)
+ shape_vector[i] = mir::Shape::autoDim;
+ else
+ shape_vector[i] = tensor_accessor.at(idx);
+ i++;
+ }
+ auto out_shape = mir::Shape(shape_vector);
+ auto result = createOp<mir::ops::ReshapeOp>(graph, inputs[0], out_shape)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Reshape.h b/compiler/mir-onnx-importer/Op/Reshape.h
new file mode 100644
index 000000000..4ebbcb7a7
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Reshape.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_RESHAPE_H
+#define MIR_ONNX_OP_RESHAPE_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertReshapeV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertReshapeV5(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_RESHAPE_H
diff --git a/compiler/mir-onnx-importer/Op/Shape.cpp b/compiler/mir-onnx-importer/Op/Shape.cpp
new file mode 100644
index 000000000..8cc250b6e
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Shape.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Shape.h"
+
+#include "ONNXHelpers.h"
+
+#include "mir/TensorVariant.h"
+
+#include "mir/ops/ConstantOp.h"
+
+namespace mir_onnx
+{
+
+void convertShapeV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+ const auto &input_shape = inputs[0]->getShape();
+ int size = input_shape.rank();
+ mir::Shape output_shape{size};
+ std::vector<int64_t> data(static_cast<std::size_t>(size));
+ for (int i = 0; i < size; i++)
+ {
+ data[i] = input_shape.dim(i);
+ }
+ mir::TensorVariant tensor({mir::DataType::INT64, output_shape}, data.data());
+ auto result = createOp<mir::ops::ConstantOp>(graph, tensor)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Shape.h b/compiler/mir-onnx-importer/Op/Shape.h
new file mode 100644
index 000000000..e427d0330
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Shape.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_SHAPE_H
+#define MIR_ONNX_OP_SHAPE_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertShapeV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_SHAPE_H
diff --git a/compiler/mir-onnx-importer/Op/Sigmoid.cpp b/compiler/mir-onnx-importer/Op/Sigmoid.cpp
new file mode 100644
index 000000000..3db547186
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Sigmoid.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Sigmoid.h"
+
+#include "ONNXHelpers.h"
+
+#include "mir/ops/SigmoidOp.h"
+
+namespace mir_onnx
+{
+
+static void convertSigmoid(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+ assert(inputs.size() == 1);
+ auto result = createOp<mir::ops::SigmoidOp>(graph, inputs[0])->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertSigmoidV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertSigmoid(onnx_node, context);
+}
+
+void convertSigmoidV6(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertSigmoid(onnx_node, context);
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Sigmoid.h b/compiler/mir-onnx-importer/Op/Sigmoid.h
new file mode 100644
index 000000000..e2d85298f
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Sigmoid.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_SIGMOID_H
+#define MIR_ONNX_OP_SIGMOID_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertSigmoidV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertSigmoidV6(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_SIGMOID_H
diff --git a/compiler/mir-onnx-importer/Op/Softmax.cpp b/compiler/mir-onnx-importer/Op/Softmax.cpp
new file mode 100644
index 000000000..1a2ca04ae
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Softmax.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Softmax.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/ops/SoftmaxOp.h"
+
+namespace mir_onnx
+{
+
+void convertSoftmaxV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ // 1 is the default axis number.
+ const auto axis = getAttributeValue<std::int64_t>(onnx_node, "axis", 1);
+
+ auto result = createOp<mir::ops::SoftmaxOp>(graph, inputs[0], axis)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Softmax.h b/compiler/mir-onnx-importer/Op/Softmax.h
new file mode 100644
index 000000000..23d14c123
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Softmax.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_SOFTMAX_H
+#define MIR_ONNX_OP_SOFTMAX_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertSoftmaxV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_SOFTMAX_H
diff --git a/compiler/mir-onnx-importer/Op/Sqrt.cpp b/compiler/mir-onnx-importer/Op/Sqrt.cpp
new file mode 100644
index 000000000..70ef252fe
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Sqrt.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Sqrt.h"
+
+#include "ONNXHelpers.h"
+
+#include "mir/ops/SqrtOp.h"
+
+namespace mir_onnx
+{
+
+static void convertSqrt(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+ assert(inputs.size() == 1);
+ auto result = createOp<mir::ops::SqrtOp>(graph, inputs[0])->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertSqrtV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertSqrt(onnx_node, context);
+}
+
+void convertSqrtV6(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertSqrt(onnx_node, context);
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Sqrt.h b/compiler/mir-onnx-importer/Op/Sqrt.h
new file mode 100644
index 000000000..51815c93c
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Sqrt.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_SQRT_H
+#define MIR_ONNX_OP_SQRT_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertSqrtV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertSqrtV6(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_SQRT_H
diff --git a/compiler/mir-onnx-importer/Op/Sub.cpp b/compiler/mir-onnx-importer/Op/Sub.cpp
new file mode 100644
index 000000000..0c3251909
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Sub.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Sub.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/ops/SubOp.h"
+
+namespace mir_onnx
+{
+
+void convertSubV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ // consumed_inputs attribute not used
+ convertSubV6(onnx_node, context);
+}
+
+void convertSubV6(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ // broadcast attribute not used
+ const auto *axis = findAttribute(onnx_node, "axis");
+ if (axis != nullptr)
+ throw std::runtime_error("Not supported axis attribute in Sub operation!");
+
+ convertSubV7(onnx_node, context);
+}
+
+void convertSubV7(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ auto result = createOp<mir::ops::SubOp>(graph, inputs[0], inputs[1])->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Sub.h b/compiler/mir-onnx-importer/Op/Sub.h
new file mode 100644
index 000000000..b521e71ae
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Sub.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_SUB_H
+#define MIR_ONNX_OP_SUB_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertSubV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertSubV6(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertSubV7(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_SUB_H
diff --git a/compiler/mir-onnx-importer/Op/Sum.cpp b/compiler/mir-onnx-importer/Op/Sum.cpp
new file mode 100644
index 000000000..c3a8dacca
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Sum.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Sum.h"
+
+#include "ONNXHelpers.h"
+
+#include "mir/ops/AddOp.h"
+
+namespace mir_onnx
+{
+
+void convertSumV8(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+ assert(inputs.size() >= 1);
+
+ auto result = inputs[0];
+ for (int i = 1; i < static_cast<int>(inputs.size()); ++i)
+ {
+ result = createOp<mir::ops::AddOp>(graph, result, inputs[i])->getOutput(0);
+ }
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Sum.h b/compiler/mir-onnx-importer/Op/Sum.h
new file mode 100644
index 000000000..74ceb6dd7
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Sum.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_SUM_H
+#define MIR_ONNX_OP_SUM_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertSumV8(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_SUM_H
diff --git a/compiler/mir-onnx-importer/Op/Tanh.cpp b/compiler/mir-onnx-importer/Op/Tanh.cpp
new file mode 100644
index 000000000..c7faf157c
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Tanh.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Tanh.h"
+
+#include "ONNXHelpers.h"
+
+#include "mir/ops/TanhOp.h"
+
+namespace mir_onnx
+{
+
+static void convertTanh(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+ assert(inputs.size() == 1);
+ auto result = createOp<mir::ops::TanhOp>(graph, inputs[0])->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertTanhV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertTanh(onnx_node, context);
+}
+
+void convertTanhV6(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ convertTanh(onnx_node, context);
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Tanh.h b/compiler/mir-onnx-importer/Op/Tanh.h
new file mode 100644
index 000000000..5d3199541
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Tanh.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_TANH_H
+#define MIR_ONNX_OP_TANH_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertTanhV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertTanhV6(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_TANH_H
diff --git a/compiler/mir-onnx-importer/Op/Transpose.cpp b/compiler/mir-onnx-importer/Op/Transpose.cpp
new file mode 100644
index 000000000..82bb2f122
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Transpose.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Transpose.h"
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/ops/TransposeOp.h"
+
+#include <numeric>
+
+namespace mir_onnx
+{
+
+void convertTransposeV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ const auto inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ assert(inputs.size() == 1);
+ auto input = inputs[0];
+
+ const auto num_axes = input->getShape().rank();
+ std::vector<std::size_t> axis_order(num_axes);
+ const auto *perm_attr = findAttribute(onnx_node, "perm");
+
+ if (perm_attr == nullptr)
+ {
+ // Reverse the dimensions.
+ std::iota(axis_order.rbegin(), axis_order.rend(), 0);
+ }
+ else
+ {
+ const auto perm = getAttributeValue<std::vector<std::int64_t>>(*perm_attr);
+ assert(perm.size() == num_axes);
+ std::copy(perm.cbegin(), perm.cend(), axis_order.begin());
+ }
+
+ auto result = createOp<mir::ops::TransposeOp>(graph, input, axis_order)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Transpose.h b/compiler/mir-onnx-importer/Op/Transpose.h
new file mode 100644
index 000000000..1f8c4369a
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Transpose.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_TRANSPOSE_H
+#define MIR_ONNX_OP_TRANSPOSE_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertTransposeV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_TRANSPOSE_H
diff --git a/compiler/mir-onnx-importer/Op/Unsqueeze.cpp b/compiler/mir-onnx-importer/Op/Unsqueeze.cpp
new file mode 100644
index 000000000..1b5995532
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Unsqueeze.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Unsqueeze.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/ops/ReshapeOp.h"
+
+namespace mir_onnx
+{
+
+void convertUnsqueezeV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+ const auto axes = getAttributeValue<std::vector<std::int64_t>>(onnx_node, "axes");
+ assert(!axes.empty());
+ const mir::Shape &input_shape = inputs[0]->getShape();
+ const int out_rank = input_shape.rank() + static_cast<int>(axes.size());
+ mir::Shape out_shape(out_rank);
+ auto ints_iterator = axes.cbegin();
+ int j = 0;
+ for (int i = 0; i < out_rank; i++)
+ {
+ if (ints_iterator < axes.cend() && i == *ints_iterator)
+ {
+ out_shape.dim(i) = 1;
+ ints_iterator++;
+ }
+ else
+ {
+ out_shape.dim(i) = input_shape.dim(j);
+ j++;
+ }
+ }
+ auto result = createOp<mir::ops::ReshapeOp>(graph, inputs[0], out_shape)->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Unsqueeze.h b/compiler/mir-onnx-importer/Op/Unsqueeze.h
new file mode 100644
index 000000000..46fea97ee
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Unsqueeze.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_UNSQUEEZE_H
+#define MIR_ONNX_OP_UNSQUEEZE_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertUnsqueezeV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_UNSQUEEZE_H
diff --git a/compiler/mir-onnx-importer/Op/Upsample.cpp b/compiler/mir-onnx-importer/Op/Upsample.cpp
new file mode 100644
index 000000000..49a555647
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Upsample.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Upsample.h"
+
+#include "ONNXHelpers.h"
+#include "AttributeHelpers.h"
+
+#include "mir/Tensor.h"
+
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/ResizeOp.h"
+
+namespace mir_onnx
+{
+
+void convertUpsampleV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ // "nearest" is the default mode.
+ std::string mode = getAttributeValue<std::string>(onnx_node, "mode", "nearest");
+ assert(mode == "nearest" && "Unsupported upscale mode!");
+
+ const float h_scale = getAttributeValue<float>(onnx_node, "height_scale", 0.0f); // required
+ const float w_scale = getAttributeValue<float>(onnx_node, "width_scale", 0.0f); // required
+ if (h_scale < 1.0f || w_scale < 1.0f)
+ throw std::runtime_error("Wrong scale attributes!");
+
+ assert(inputs[0]->getShape().rank() == 4 && "Only rank 4 is supported");
+ std::vector<float> scales_vector(4);
+ // NCHW
+ scales_vector.at(0) = 1.0f;
+ scales_vector.at(1) = 1.0f;
+ scales_vector.at(2) = h_scale;
+ scales_vector.at(3) = w_scale;
+
+ auto result =
+ createOp<mir::ops::ResizeOp>(graph, inputs[0],
+ mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
+ ->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertUpsampleV7(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ // "nearest" is the default mode.
+ std::string mode = getAttributeValue<std::string>(onnx_node, "mode", "nearest");
+ assert(mode == "nearest" && "Unsupported upscale mode!");
+
+ const auto *scales_attr = findAttribute(onnx_node, "scales");
+ if (!scales_attr)
+ throw std::runtime_error("Not enough required scales attribute!");
+
+ if (scales_attr->floats_size() != inputs[0]->getShape().rank())
+ throw std::runtime_error(
+ "Number of elements of scales should be the same as the rank of input");
+
+ assert(inputs[0]->getShape().rank() == 4 && "Only rank 4 is supported");
+ std::vector<float> scales_vector(4);
+ // NCHW
+ scales_vector.at(0) = scales_attr->floats(0);
+ scales_vector.at(1) = scales_attr->floats(1);
+ scales_vector.at(2) = scales_attr->floats(2);
+ scales_vector.at(3) = scales_attr->floats(3);
+
+ auto result =
+ createOp<mir::ops::ResizeOp>(graph, inputs[0],
+ mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
+ ->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+void convertUpsampleV9(const onnx::NodeProto &onnx_node, ConverterContext *context)
+{
+ std::vector<mir::Operation::Output *> inputs = context->getNodeInputs(onnx_node);
+ mir::Graph *graph = context->getGraph();
+
+ // "nearest" is the default mode.
+ const auto mode = getAttributeValue<std::string>(onnx_node, "mode", "nearest");
+ assert(mode == "nearest" && "Unsupported upscale mode!");
+
+ // relies on attributes being lifted to constants (ONNX optimization pass)
+ assert(inputs.size() > 1);
+ auto *scales = dynamic_cast<mir::ops::ConstantOp *>(inputs[1]->getNode());
+ assert(scales && "Weights could be a constant tensor only");
+ auto scales_tensor = mir::Tensor<float>(scales->getValue());
+ int rank = inputs[0]->getShape().rank();
+ assert(scales_tensor.getShape().numElements() == rank &&
+ "The number of elements of 'scales' should be the same as the rank of input 'X'");
+ assert(rank == 4 && "Only rank 4 is supported");
+ std::vector<float> scales_vector(4);
+ assert(scales_tensor.getShape().rank() == 1 && "Scales are a 1d tensor");
+ for (int i = 0; i < scales_tensor.getShape().numElements(); i++)
+ scales_vector[i] = scales_tensor.atOffset(i);
+
+ auto result =
+ createOp<mir::ops::ResizeOp>(graph, inputs[0],
+ mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
+ ->getOutput(0);
+
+ context->setNodeOutputs(onnx_node, {result});
+}
+
+} // namespace mir_onnx
diff --git a/compiler/mir-onnx-importer/Op/Upsample.h b/compiler/mir-onnx-importer/Op/Upsample.h
new file mode 100644
index 000000000..99600eede
--- /dev/null
+++ b/compiler/mir-onnx-importer/Op/Upsample.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_ONNX_OP_UPSAMPLE_H
+#define MIR_ONNX_OP_UPSAMPLE_H
+
+#include "ONNXNodeConverterRegistry.h"
+
+namespace mir_onnx
+{
+
+void convertUpsampleV1(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertUpsampleV7(const onnx::NodeProto &onnx_node, ConverterContext *context);
+void convertUpsampleV9(const onnx::NodeProto &onnx_node, ConverterContext *context);
+
+} // namespace mir_onnx
+
+#endif // MIR_ONNX_OP_UPSAMPLE_H
diff --git a/compiler/mir-onnx-importer/requires.cmake b/compiler/mir-onnx-importer/requires.cmake
new file mode 100644
index 000000000..52a7837df
--- /dev/null
+++ b/compiler/mir-onnx-importer/requires.cmake
@@ -0,0 +1,2 @@
+require("mir")
+require("mir-interpreter")
diff --git a/compiler/mir-tflite-importer/CMakeLists.txt b/compiler/mir-tflite-importer/CMakeLists.txt
new file mode 100644
index 000000000..4a06d51b8
--- /dev/null
+++ b/compiler/mir-tflite-importer/CMakeLists.txt
@@ -0,0 +1,22 @@
+nnas_find_package(FlatBuffers REQUIRED)
+
+if (NOT FlatBuffers_FOUND)
+ return()
+endif ()
+
+FlatBuffers_Target(mir_tflite_schema
+ OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated/schema"
+ SCHEMA_DIR "${CMAKE_CURRENT_SOURCE_DIR}/schema"
+ SCHEMA_FILES schema.fbs)
+
+
+set(MIR_TFLITE_IMPORTER_SOURCES
+ tflite_importer.cpp
+ tflite_importer.h
+ tflite_op_creator.cpp
+ tflite_op_creator.h)
+
+add_library(mir_tflite_importer STATIC ${MIR_TFLITE_IMPORTER_SOURCES})
+set_target_properties(mir_tflite_importer PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(mir_tflite_importer PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+target_link_libraries(mir_tflite_importer PUBLIC mir mir_tflite_schema PRIVATE stdex)
diff --git a/compiler/mir-tflite-importer/requires.cmake b/compiler/mir-tflite-importer/requires.cmake
new file mode 100644
index 000000000..1059c50d3
--- /dev/null
+++ b/compiler/mir-tflite-importer/requires.cmake
@@ -0,0 +1 @@
+require("mir")
diff --git a/compiler/mir-tflite-importer/schema/schema.fbs b/compiler/mir-tflite-importer/schema/schema.fbs
new file mode 100644
index 000000000..dc7aab128
--- /dev/null
+++ b/compiler/mir-tflite-importer/schema/schema.fbs
@@ -0,0 +1,937 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+
+namespace tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+ FLOAT32 = 0,
+ FLOAT16 = 1,
+ INT32 = 2,
+ UINT8 = 3,
+ INT64 = 4,
+ STRING = 5,
+ BOOL = 6,
+ INT16 = 7,
+ COMPLEX64 = 8,
+ INT8 = 9,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+ custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+ CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+ // These four parameters are the asymmetric linear quantization parameters.
+ // Given a quantized value q, the corresponding float value f should be:
+ // f = scale * (q - zero_point)
+ // For other quantization types, the QuantizationDetails below is used.
+ min:[float]; // For importing back into tensorflow.
+ max:[float]; // For importing back into tensorflow.
+ scale:[float]; // For dequantizing the tensor's values.
+ zero_point:[long];
+
+ // If this is not none, the other quantization parameters (i.e. min, max,
+ // scale, zero_point fields above) are ignored and the value of the
+ // QuantizationDetails union should be used.
+ details:QuantizationDetails;
+
+ // Specifies the dimension of the Tensor's shape that the scales and
+ // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+ // with quantization params:
+ // scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+ // will be quantized across the second dimension of t.
+ // t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+ // t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+ // t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+ quantized_dimension:int;
+}
+
+table Tensor {
+ // The tensor shape. The meaning of each entry is operator-specific but
+ // builtin ops use: [batch size, height, width, number of channels] (That's
+ // Tensorflow's NHWC).
+ shape:[int];
+ type:TensorType;
+ // An index that refers to the buffers table at the root of the model. Or,
+ // if there is no data buffer associated (i.e. intermediate results), then
+ // this is 0 (which refers to an always existent empty buffer).
+ //
+ // The data_buffer itself is an opaque container, with the assumption that the
+ // target device is little-endian. In addition, all builtin operators assume
+ // the memory is ordered such that if `shape` is [4, 3, 2], then index
+ // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+ buffer:uint;
+ name:string; // For debugging and importing back into tensorflow.
+ quantization:QuantizationParameters; // Optional.
+
+ is_variable:bool = false;
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+enum BuiltinOperator : byte {
+ ADD = 0,
+ AVERAGE_POOL_2D = 1,
+ CONCATENATION = 2,
+ CONV_2D = 3,
+ DEPTHWISE_CONV_2D = 4,
+ DEPTH_TO_SPACE = 5,
+ DEQUANTIZE = 6,
+ EMBEDDING_LOOKUP = 7,
+ FLOOR = 8,
+ FULLY_CONNECTED = 9,
+ HASHTABLE_LOOKUP = 10,
+ L2_NORMALIZATION = 11,
+ L2_POOL_2D = 12,
+ LOCAL_RESPONSE_NORMALIZATION = 13,
+ LOGISTIC = 14,
+ LSH_PROJECTION = 15,
+ LSTM = 16,
+ MAX_POOL_2D = 17,
+ MUL = 18,
+ RELU = 19,
+ // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+ // since different model developers use RELU1 in different ways. Never
+ // create another op called RELU1.
+ RELU_N1_TO_1 = 20,
+ RELU6 = 21,
+ RESHAPE = 22,
+ RESIZE_BILINEAR = 23,
+ RNN = 24,
+ SOFTMAX = 25,
+ SPACE_TO_DEPTH = 26,
+ SVDF = 27,
+ TANH = 28,
+ // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
+ CONCAT_EMBEDDINGS = 29,
+ SKIP_GRAM = 30,
+ CALL = 31,
+ CUSTOM = 32,
+ EMBEDDING_LOOKUP_SPARSE = 33,
+ PAD = 34,
+ UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+ GATHER = 36,
+ BATCH_TO_SPACE_ND = 37,
+ SPACE_TO_BATCH_ND = 38,
+ TRANSPOSE = 39,
+ MEAN = 40,
+ SUB = 41,
+ DIV = 42,
+ SQUEEZE = 43,
+ UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+ STRIDED_SLICE = 45,
+ BIDIRECTIONAL_SEQUENCE_RNN = 46,
+ EXP = 47,
+ TOPK_V2 = 48,
+ SPLIT = 49,
+ LOG_SOFTMAX = 50,
+ // DELEGATE is a special op type for the operations which are delegated to
+ // other backends.
+ // WARNING: Experimental interface, subject to change
+ DELEGATE = 51,
+ BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+ CAST = 53,
+ PRELU = 54,
+ MAXIMUM = 55,
+ ARG_MAX = 56,
+ MINIMUM = 57,
+ LESS = 58,
+ NEG = 59,
+ PADV2 = 60,
+ GREATER = 61,
+ GREATER_EQUAL = 62,
+ LESS_EQUAL = 63,
+ SELECT = 64,
+ SLICE = 65,
+ SIN = 66,
+ TRANSPOSE_CONV = 67,
+ SPARSE_TO_DENSE = 68,
+ TILE = 69,
+ EXPAND_DIMS = 70,
+ EQUAL = 71,
+ NOT_EQUAL = 72,
+ LOG = 73,
+ SUM = 74,
+ SQRT = 75,
+ RSQRT = 76,
+ SHAPE = 77,
+ POW = 78,
+ ARG_MIN = 79,
+ FAKE_QUANT = 80,
+ REDUCE_PROD = 81,
+ REDUCE_MAX = 82,
+ PACK = 83,
+ LOGICAL_OR = 84,
+ ONE_HOT = 85,
+ LOGICAL_AND = 86,
+ LOGICAL_NOT = 87,
+ UNPACK = 88,
+ REDUCE_MIN = 89,
+ FLOOR_DIV = 90,
+ REDUCE_ANY = 91,
+ SQUARE = 92,
+ ZEROS_LIKE = 93,
+ FILL = 94,
+ FLOOR_MOD = 95,
+ RANGE = 96,
+ RESIZE_NEAREST_NEIGHBOR = 97,
+ LEAKY_RELU = 98,
+ SQUARED_DIFFERENCE = 99,
+ MIRROR_PAD = 100,
+ ABS = 101,
+ SPLIT_V = 102,
+ UNIQUE = 103,
+ CEIL = 104,
+ REVERSE_V2 = 105,
+ ADD_N = 106,
+ GATHER_ND = 107,
+ COS = 108,
+ WHERE = 109,
+ RANK = 110,
+ ELU = 111,
+ REVERSE_SEQUENCE = 112,
+ MATRIX_DIAG = 113,
+ QUANTIZE = 114,
+ MATRIX_SET_DIAG = 115,
+ ROUND = 116,
+ HARD_SWISH = 117,
+ IF = 118,
+ WHILE = 119,
+ NON_MAX_SUPPRESSION_V4 = 120,
+ NON_MAX_SUPPRESSION_V5 = 121,
+ SCATTER_ND = 122
+}
+
+// Options for the builtin operators.
+union BuiltinOptions {
+ Conv2DOptions,
+ DepthwiseConv2DOptions,
+ ConcatEmbeddingsOptions,
+ LSHProjectionOptions,
+ Pool2DOptions,
+ SVDFOptions,
+ RNNOptions,
+ FullyConnectedOptions,
+ SoftmaxOptions,
+ ConcatenationOptions,
+ AddOptions,
+ L2NormOptions,
+ LocalResponseNormalizationOptions,
+ LSTMOptions,
+ ResizeBilinearOptions,
+ CallOptions,
+ ReshapeOptions,
+ SkipGramOptions,
+ SpaceToDepthOptions,
+ EmbeddingLookupSparseOptions,
+ MulOptions,
+ PadOptions,
+ GatherOptions,
+ BatchToSpaceNDOptions,
+ SpaceToBatchNDOptions,
+ TransposeOptions,
+ ReducerOptions,
+ SubOptions,
+ DivOptions,
+ SqueezeOptions,
+ SequenceRNNOptions,
+ StridedSliceOptions,
+ ExpOptions,
+ TopKV2Options,
+ SplitOptions,
+ LogSoftmaxOptions,
+ CastOptions,
+ DequantizeOptions,
+ MaximumMinimumOptions,
+ ArgMaxOptions,
+ LessOptions,
+ NegOptions,
+ PadV2Options,
+ GreaterOptions,
+ GreaterEqualOptions,
+ LessEqualOptions,
+ SelectOptions,
+ SliceOptions,
+ TransposeConvOptions,
+ SparseToDenseOptions,
+ TileOptions,
+ ExpandDimsOptions,
+ EqualOptions,
+ NotEqualOptions,
+ ShapeOptions,
+ PowOptions,
+ ArgMinOptions,
+ FakeQuantOptions,
+ PackOptions,
+ LogicalOrOptions,
+ OneHotOptions,
+ LogicalAndOptions,
+ LogicalNotOptions,
+ UnpackOptions,
+ FloorDivOptions,
+ SquareOptions,
+ ZerosLikeOptions,
+ FillOptions,
+ BidirectionalSequenceLSTMOptions,
+ BidirectionalSequenceRNNOptions,
+ UnidirectionalSequenceLSTMOptions,
+ FloorModOptions,
+ RangeOptions,
+ ResizeNearestNeighborOptions,
+ LeakyReluOptions,
+ SquaredDifferenceOptions,
+ MirrorPadOptions,
+ AbsOptions,
+ SplitVOptions,
+ UniqueOptions,
+ ReverseV2Options,
+ AddNOptions,
+ GatherNdOptions,
+ CosOptions,
+ WhereOptions,
+ RankOptions,
+ ReverseSequenceOptions,
+ MatrixDiagOptions,
+ QuantizeOptions,
+ MatrixSetDiagOptions,
+ HardSwishOptions,
+ IfOptions,
+ WhileOptions,
+ DepthToSpaceOptions,
+ NonMaxSuppressionV4Options,
+ NonMaxSuppressionV5Options,
+ ScatterNdOptions
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+ NONE = 0,
+ RELU = 1,
+ RELU_N1_TO_1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ filter_width:int;
+ filter_height:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+ // Parameters for DepthwiseConv version 1 or above.
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ depth_multiplier:int;
+ fused_activation_function:ActivationFunctionType;
+ // Parameters for DepthwiseConv version 2 or above.
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+ num_channels:int;
+ num_columns_per_channel:[int];
+ embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+ UNKNOWN = 0,
+ SPARSE = 1,
+ DENSE = 2,
+}
+
+table LSHProjectionOptions {
+ type: LSHProjectionType;
+}
+
+table SVDFOptions {
+ rank:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ merge_outputs: bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+ DEFAULT = 0,
+ SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+ // Parameters for FullyConnected version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+
+ // Parameters for FullyConnected version 2 or above.
+ weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+ // Parameters for FullyConnected version 5 or above.
+ // If set to true, then the number of dimension is preserved. Furthermore,
+ // all but the last dimension of the input and output shapes will be equal.
+ keep_num_dims: bool;
+}
+
+table SoftmaxOptions {
+ beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+ axis:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table MulOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+ radius:int;
+ bias:float;
+ alpha:float;
+ beta:float;
+}
+
+enum LSTMKernelType : byte {
+ // Full LSTM kernel which supports peephole and projection.
+ FULL = 0,
+ // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+ BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+ // Parameters for LSTM version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // Parameters for LSTM version 2 or above.
+ // Basic kernel is only supported in version 2 or above.
+ kernel_type: LSTMKernelType = FULL;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true then first dimension is sequence, otherwise batch.
+ time_major:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+ // Parameters supported by version 1:
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true, store the outputs of both directions into the first output.
+ merge_outputs: bool;
+
+ // Parameters supported by version 2:
+ // If true then first dimension is sequence, otherwise batch.
+ // Version 1 implementations assumed time_major to be true, so this default
+ // value should never change.
+ time_major: bool = true;
+}
+
+table ResizeBilinearOptions {
+ new_height: int (deprecated);
+ new_width: int (deprecated);
+ align_corners: bool;
+}
+
+table ResizeNearestNeighborOptions {
+ align_corners: bool;
+}
+
+// A call operation options
+table CallOptions {
+ // The subgraph index that needs to be called.
+ subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+ new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+ ngram_size: int;
+ max_skip_size: int;
+ include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+ block_size: int;
+}
+
+table DepthToSpaceOptions {
+ block_size: int;
+}
+
+table SubOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DivOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+ SUM = 0,
+ MEAN = 1,
+ SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+ combiner:CombinerType;
+}
+
+table GatherOptions {
+ axis: int;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+ keep_dims: bool;
+}
+
+table SqueezeOptions {
+ squeeze_dims:[int];
+}
+
+table SplitOptions {
+ num_splits: int;
+}
+
+table SplitVOptions {
+ num_splits: int;
+}
+
+table StridedSliceOptions {
+ begin_mask: int;
+ end_mask: int;
+ ellipsis_mask: int;
+ new_axis_mask: int;
+ shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+ in_data_type: TensorType;
+ out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+ output_type : TensorType;
+}
+
+table ArgMinOptions {
+ output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+ validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+ // Optional output type of the operation (int32 or int64). Defaults to int32.
+ out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+ // Parameters supported by version 1:
+ min:float;
+ max:float;
+ num_bits:int;
+
+ // Parameters supported by version 2:
+ narrow_range:bool;
+}
+
+table PackOptions {
+ values_count:int;
+ axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+ axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+ num:int;
+ axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+ alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+ // Doesn't include borders.
+ REFLECT = 0,
+ // Includes borders.
+ SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+ mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+ idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+ seq_dim:int;
+ batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+ then_subgraph_index:int;
+ else_subgraph_index:int;
+}
+
+table WhileOptions {
+ cond_subgraph_index:int;
+ body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+ builtin_code:BuiltinOperator;
+ custom_code:string;
+
+ // The version of the operator. The version need to be bumped whenever new
+ // parameters are introduced into an op.
+ version:int = 1;
+}
+
+enum CustomOptionsFormat : byte {
+ FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+ // Index into the operator_codes array. Using an integer here avoids
+ // complicate map lookups.
+ opcode_index:uint;
+
+ // Optional input and output tensors are indicated by -1.
+ inputs:[int];
+ outputs:[int];
+
+ builtin_options:BuiltinOptions;
+ custom_options:[ubyte];
+ custom_options_format:CustomOptionsFormat;
+
+ // A list of booleans indicating the input tensors which are being mutated by
+ // this operator.(e.g. used by RNN and LSTM).
+ // For example, if the "inputs" array refers to 5 tensors and the second and
+ // fifth are mutable variables, then this list will contain
+ // [false, true, false, false, true].
+ //
+ // If the list is empty, no variable is mutated in this operator.
+ // The list either has the same length as `inputs`, or is empty.
+ mutating_variable_inputs:[bool];
+
+ // A list of indices to the subgraph's "tensors" that are internal to an Op.
+ // Internal tensors are those that do not flow in or out of the operation,
+ // but instead are part of internal computation. As such, the operation's
+ // implementation may manage its memory more efficiently. They are needed
+ // however (i.e. not just an implementation detail) since they are part of the
+ // computation, which may require relevant metadata such as quantization
+ // parameters.
+ intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+ // A list of all tensors used in this subgraph.
+ tensors:[Tensor];
+
+ // Indices of the tensors that are inputs into this subgraph. Note this is
+ // the list of non-static tensors that feed into the subgraph for inference.
+ inputs:[int];
+
+ // Indices of the tensors that are outputs out of this subgraph. Note this is
+ // the list of output tensors that are considered the product of the
+ // subgraph's inference.
+ outputs:[int];
+
+ // All operators, in execution order.
+ operators:[Operator];
+
+ // Name of this subgraph (used for debugging).
+ name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+ data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+ // A human readable string to uniquely identify a Metadata.
+ name:string;
+ // An index to the buffers table.
+ buffer:uint;
+}
+
+table Model {
+ // Version of the schema.
+ version:uint;
+
+ // A list of all operator codes used in this model. This is
+ // kept in order because operators carry an index into this
+ // vector.
+ operator_codes:[OperatorCode];
+
+ // All the subgraphs of the model. The 0th is assumed to be the main
+ // model.
+ subgraphs:[SubGraph];
+
+ // A description of the model.
+ description:string;
+
+ // Buffers of the model.
+ // Note the 0th entry of this array must be an empty buffer (sentinel).
+ // This is a convention so that tensors without a buffer can provide 0 as
+ // their buffer.
+ buffers:[Buffer];
+
+ // Metadata about the model. Indirects into the existings buffers list.
+ // Deprecated, prefer to use metadata field.
+ metadata_buffer:[int];
+
+ // Metadata about the model.
+ metadata:[Metadata];
+}
+
+root_type Model;
diff --git a/compiler/mir-tflite-importer/schema/schema.meta b/compiler/mir-tflite-importer/schema/schema.meta
new file mode 100644
index 000000000..c86134c5a
--- /dev/null
+++ b/compiler/mir-tflite-importer/schema/schema.meta
@@ -0,0 +1,2 @@
+REPO=https://github.com/tensorflow/tensorflow.git
+COMMIT=998eadd
diff --git a/compiler/mir-tflite-importer/schema/schema_v0.fbs b/compiler/mir-tflite-importer/schema/schema_v0.fbs
new file mode 100644
index 000000000..852ea988f
--- /dev/null
+++ b/compiler/mir-tflite-importer/schema/schema_v0.fbs
@@ -0,0 +1,247 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace tflite;
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+ FLOAT32 = 0,
+ FLOAT16 = 1,
+ INT32 = 2,
+ UINT8 = 3,
+ INT64 = 4,
+}
+
+// Parameters for converting a quantized tensor back to float. Given a
+// quantized value q, the corresponding float value f should be:
+// f = scale * (q - zero_point)
+table QuantizationParameters {
+ min:[float]; // For importing back into tensorflow.
+ max:[float]; // For importing back into tensorflow.
+ scale:[float];
+ zero_point:[long];
+}
+
+table Tensor {
+ // The tensor shape. The meaning of each entry is operator-specific but
+ // builtin ops use: [batch size, number of channels, height, width] (That's
+ // Tensorflow's NCHW).
+ shape:[int];
+ type:TensorType;
+ // The data_buffer is an opaque container, with the assumption that the
+ // target device is little-endian. In addition, all builtin operators assume
+ // the memory is ordered such that if `shape` is [4, 3, 2], then index
+ // [i, j, k] maps to data_buffer[i*4*3 + j*3 + k].
+ data_buffer:[ubyte];
+ name:string; // For debugging and importing back into tensorflow.
+ quantization:QuantizationParameters; // Optional.
+}
+
+// A list of builtin operators. Builtin operators a slighlty faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+enum BuiltinOperator : byte {
+ CUSTOM = 0,
+ CONVOLUTION = 1,
+ DEPTHWISE_CONVOLUTION = 2,
+ CONCAT_EMBEDDINGS = 3,
+ LSH_PROJECTION = 4,
+ TANH = 5,
+ RELU = 6,
+ AVERAGE_POOL = 7,
+ MAX_POOL = 8,
+ L2_POOL = 9,
+ SIGMOID = 10,
+ SVDF = 11,
+ BasicRNN = 12,
+ RELU6 = 13,
+ EMBEDDING_LOOKUP = 14,
+ FULLY_CONNECTED = 15,
+ HASHTABLE_LOOKUP = 16,
+ SOFTMAX = 17,
+ CONCATENATION = 18,
+ LSTM = 19,
+ ADD = 20,
+ L2NORM = 21,
+ LOCAL_RESPONSE_NORM = 22,
+ RESIZE_BILINEAR = 23,
+}
+
+// Options for the builtin operators.
+union BuiltinOptions {
+ ConvolutionOptions,
+ DepthwiseConvolutionOptions,
+ ConcatEmbeddingsOptions,
+ LSHProjectionOptions,
+ PoolOptions,
+ SVDFOptions,
+ BasicRNNOptions,
+ FullyConnectedOptions,
+ SoftmaxOptions,
+ ConcatenationOptions,
+ AddOptions,
+ L2NormOptions,
+ LocalResponseNormOptions,
+ LSTMOptions,
+ ResizeBilinearOptions,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+ NONE = 0,
+ RELU = 1,
+ RELU1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGN_BIT = 5,
+}
+
+table ConvolutionOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table PoolOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ filter_width:int;
+ filter_height:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConvolutionOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ depth_multiplier:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table ConcatEmbeddingsOptions {
+ num_channels:int;
+ num_columns_per_channel:[int];
+ embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+ UNKNOWN = 0,
+ SPARSE = 1,
+ DENSE = 2,
+}
+
+table LSHProjectionOptions {
+ type: LSHProjectionType;
+}
+
+table SVDFOptions {
+ rank:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow BasicRNNCell.
+table BasicRNNOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table SoftmaxOptions {
+ beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+ axis:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormOptions {
+ radius:int;
+ bias:float;
+ alpha:float;
+ beta:float;
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+}
+
+table ResizeBilinearOptions {
+ new_height:int;
+ new_width:int;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+ builtin_code:BuiltinOperator;
+ custom_code:string;
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+ // Index into the operator_codes array. Using an integer here avoids
+ // complicate map lookups.
+ opcode_index:int;
+
+ inputs:[int];
+ outputs:[int];
+
+ builtin_options:BuiltinOptions;
+ custom_options:[ubyte];
+}
+
+// The root type, defining a model.
+table Model {
+ // A list of all tensors used in this model.
+ tensors:[Tensor];
+
+ // Indices of the input tensors.
+ inputs:[int];
+
+ // Indices of the output tensors.
+ outputs:[int];
+
+ // A list of all operator codes used in this model. This is
+ // kept in order because operators carry an index into this
+ // vector.
+ operator_codes:[OperatorCode];
+
+ // All operators, in execution order.
+ operators:[Operator];
+}
+
+root_type Model;
diff --git a/compiler/mir-tflite-importer/schema/schema_v0.meta b/compiler/mir-tflite-importer/schema/schema_v0.meta
new file mode 100644
index 000000000..74668ab7a
--- /dev/null
+++ b/compiler/mir-tflite-importer/schema/schema_v0.meta
@@ -0,0 +1,2 @@
+REPO=https://github.com/tensorflow/tensorflow.git
+COMMIT=c7a04561fb8
diff --git a/compiler/mir-tflite-importer/schema/schema_v1.fbs b/compiler/mir-tflite-importer/schema/schema_v1.fbs
new file mode 100644
index 000000000..06cd9408e
--- /dev/null
+++ b/compiler/mir-tflite-importer/schema/schema_v1.fbs
@@ -0,0 +1,295 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+
+namespace tflite;
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+ FLOAT32 = 0,
+ FLOAT16 = 1,
+ INT32 = 2,
+ UINT8 = 3,
+ INT64 = 4,
+ STRING = 5,
+}
+
+// Parameters for converting a quantized tensor back to float. Given a
+// quantized value q, the corresponding float value f should be:
+// f = scale * (q - zero_point)
+table QuantizationParameters {
+ min:[float]; // For importing back into tensorflow.
+ max:[float]; // For importing back into tensorflow.
+ scale:[float];
+ zero_point:[long];
+}
+
+table Tensor {
+ // The tensor shape. The meaning of each entry is operator-specific but
+ // builtin ops use: [batch size, number of channels, height, width] (That's
+ // Tensorflow's NCHW).
+ shape:[int];
+ type:TensorType;
+ // The data_buffer is an opaque container, with the assumption that the
+ // target device is little-endian. In addition, all builtin operators assume
+ // the memory is ordered such that if `shape` is [4, 3, 2], then index
+ // [i, j, k] maps to data_buffer[i*3*2 + j*3 + k].
+ data_buffer:[ubyte];
+ name:string; // For debugging and importing back into tensorflow.
+ quantization:QuantizationParameters; // Optional.
+}
+
+// A list of builtin operators. Builtin operators a slighlty faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+enum BuiltinOperator : byte {
+ CUSTOM = 0,
+ CONVOLUTION = 1,
+ DEPTHWISE_CONVOLUTION = 2,
+ CONCAT_EMBEDDINGS = 3,
+ LSH_PROJECTION = 4,
+ TANH = 5,
+ RELU = 6,
+ AVERAGE_POOL = 7,
+ MAX_POOL = 8,
+ L2_POOL = 9,
+ SIGMOID = 10,
+ SVDF = 11,
+ BasicRNN = 12,
+ RELU6 = 13,
+ EMBEDDING_LOOKUP = 14,
+ FULLY_CONNECTED = 15,
+ HASHTABLE_LOOKUP = 16,
+ SOFTMAX = 17,
+ CONCATENATION = 18,
+ LSTM = 19,
+ ADD = 20,
+ L2NORM = 21,
+ LOCAL_RESPONSE_NORM = 22,
+ RESIZE_BILINEAR = 23,
+ CALL = 24,
+ RESHAPE = 25,
+ SKIP_GRAM = 26,
+ SPACE_TO_DEPTH = 27,
+}
+
+// Options for the builtin operators.
+union BuiltinOptions {
+ ConvolutionOptions,
+ DepthwiseConvolutionOptions,
+ ConcatEmbeddingsOptions,
+ LSHProjectionOptions,
+ PoolOptions,
+ SVDFOptions,
+ BasicRNNOptions,
+ FullyConnectedOptions,
+ SoftmaxOptions,
+ ConcatenationOptions,
+ AddOptions,
+ L2NormOptions,
+ LocalResponseNormOptions,
+ LSTMOptions,
+ ResizeBilinearOptions,
+ CallOptions,
+ ReshapeOptions,
+ SkipGramOptions,
+ SpaceToDepthOptions,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+ NONE = 0,
+ RELU = 1,
+ RELU1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGN_BIT = 5,
+}
+
+table ConvolutionOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table PoolOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ filter_width:int;
+ filter_height:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConvolutionOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ depth_multiplier:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table ConcatEmbeddingsOptions {
+ num_channels:int;
+ num_columns_per_channel:[int];
+ embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+ UNKNOWN = 0,
+ SPARSE = 1,
+ DENSE = 2,
+}
+
+table LSHProjectionOptions {
+ type: LSHProjectionType;
+}
+
+table SVDFOptions {
+ rank:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow BasicRNNCell.
+table BasicRNNOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table SoftmaxOptions {
+ beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+ axis:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormOptions {
+ radius:int;
+ bias:float;
+ alpha:float;
+ beta:float;
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+}
+
+table ResizeBilinearOptions {
+ new_height:int;
+ new_width:int;
+}
+
+// A call operation options
+table CallOptions {
+ // The subgraph index that needs to be called.
+ subgraph:int;
+}
+
+table ReshapeOptions {
+ new_shape:[int];
+}
+
+table SkipGramOptions {
+ ngram_size: int;
+ max_skip_size: int;
+ include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+ block_size: int;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+ builtin_code:BuiltinOperator;
+ custom_code:string;
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+ // Index into the operator_codes array. Using an integer here avoids
+ // complicate map lookups.
+ opcode_index:int;
+
+ inputs:[int];
+ outputs:[int];
+
+ builtin_options:BuiltinOptions;
+ custom_options:[ubyte];
+}
+
+// The root type, defining a model.
+table SubGraph {
+ // A list of all tensors used in this model.
+ tensors:[Tensor];
+
+ // Indices of the input tensors.
+ inputs:[int];
+
+ // Indices of the output tensors.
+ outputs:[int];
+
+ // All operators, in execution order.
+ operators:[Operator];
+
+ // Name of subgraph (used for debugging).
+ name:string;
+}
+
+table Model {
+ // Version of the schema.
+ version:int;
+
+ // A list of all operator codes used in this model. This is
+ // kept in order because operators carry an index into this
+ // vector.
+ operator_codes:[OperatorCode];
+
+ // All the subgraphs of the model. The 0th is assumed to be the main
+ // model.
+ subgraphs:[SubGraph];
+
+ // A description of the model.
+ description:string;
+}
+
+root_type Model;
diff --git a/compiler/mir-tflite-importer/schema/schema_v1.meta b/compiler/mir-tflite-importer/schema/schema_v1.meta
new file mode 100644
index 000000000..74668ab7a
--- /dev/null
+++ b/compiler/mir-tflite-importer/schema/schema_v1.meta
@@ -0,0 +1,2 @@
+REPO=https://github.com/tensorflow/tensorflow.git
+COMMIT=c7a04561fb8
diff --git a/compiler/mir-tflite-importer/schema/schema_v2.fbs b/compiler/mir-tflite-importer/schema/schema_v2.fbs
new file mode 100644
index 000000000..96731c8aa
--- /dev/null
+++ b/compiler/mir-tflite-importer/schema/schema_v2.fbs
@@ -0,0 +1,303 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+
+namespace tflite;
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+ FLOAT32 = 0,
+ FLOAT16 = 1,
+ INT32 = 2,
+ UINT8 = 3,
+ INT64 = 4,
+ STRING = 5,
+}
+
+// Parameters for converting a quantized tensor back to float. Given a
+// quantized value q, the corresponding float value f should be:
+// f = scale * (q - zero_point)
+table QuantizationParameters {
+ min:[float]; // For importing back into tensorflow.
+ max:[float]; // For importing back into tensorflow.
+ scale:[float];
+ zero_point:[long];
+}
+
+table Tensor {
+ // The tensor shape. The meaning of each entry is operator-specific but
+ // builtin ops use: [batch size, number of channels, height, width] (That's
+ // Tensorflow's NCHW).
+ shape:[int];
+ type:TensorType;
+ // The data_buffer is an opaque container, with the assumption that the
+ // target device is little-endian. In addition, all builtin operators assume
+ // the memory is ordered such that if `shape` is [4, 3, 2], then index
+ // [i, j, k] maps to data_buffer[i*3*2 + j*3 + k].
+ data_buffer:[ubyte];
+ name:string; // For debugging and importing back into tensorflow.
+ quantization:QuantizationParameters; // Optional.
+}
+
+// A list of builtin operators. Builtin operators a slighlty faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+enum BuiltinOperator : byte {
+ ADD = 0,
+ AVERAGE_POOL_2D = 1,
+ CONCATENATION = 2,
+ CONV_2D = 3,
+ DEPTHWISE_CONV_2D = 4,
+ // DEPTH_TO_SPACE = 5,
+ // DEQUANTIZE = 6,
+ EMBEDDING_LOOKUP = 7,
+ // FLOOR = 8,
+ FULLY_CONNECTED = 9,
+ HASHTABLE_LOOKUP = 10,
+ L2_NORMALIZATION = 11,
+ L2_POOL_2D = 12,
+ LOCAL_RESPONSE_NORMALIZATION = 13,
+ LOGISTIC = 14,
+ LSH_PROJECTION = 15,
+ LSTM = 16,
+ MAX_POOL_2D = 17,
+ // MUL = 18,
+ RELU = 19,
+ // RELU1=20,
+ RELU6 = 21,
+ RESHAPE = 22,
+ RESIZE_BILINEAR = 23,
+ RNN = 24,
+ SOFTMAX = 25,
+ SPACE_TO_DEPTH = 26,
+ SVDF = 27,
+ TANH = 28,
+ // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
+ CONCAT_EMBEDDINGS = 29,
+ SKIP_GRAM = 30,
+ CALL = 31,
+ CUSTOM = 32,
+
+}
+
+// Options for the builtin operators.
+union BuiltinOptions {
+ Conv2DOptions,
+ DepthwiseConv2DOptions,
+ ConcatEmbeddingsOptions,
+ LSHProjectionOptions,
+ Pool2DOptions,
+ SVDFOptions,
+ RNNOptions,
+ FullyConnectedOptions,
+ SoftmaxOptions,
+ ConcatenationOptions,
+ AddOptions,
+ L2NormOptions,
+ LocalResponseNormalizationOptions,
+ LSTMOptions,
+ ResizeBilinearOptions,
+ CallOptions,
+ ReshapeOptions,
+ SkipGramOptions,
+ SpaceToDepthOptions,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+ NONE = 0,
+ RELU = 1,
+ RELU1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table Pool2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ filter_width:int;
+ filter_height:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ depth_multiplier:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table ConcatEmbeddingsOptions {
+ num_channels:int;
+ num_columns_per_channel:[int];
+ embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+ UNKNOWN = 0,
+ SPARSE = 1,
+ DENSE = 2,
+}
+
+table LSHProjectionOptions {
+ type: LSHProjectionType;
+}
+
+table SVDFOptions {
+ rank:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table SoftmaxOptions {
+ beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+ axis:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+ radius:int;
+ bias:float;
+ alpha:float;
+ beta:float;
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+}
+
+table ResizeBilinearOptions {
+ new_height:int;
+ new_width:int;
+}
+
+// A call operation options
+table CallOptions {
+ // The subgraph index that needs to be called.
+ subgraph:int;
+}
+
+table ReshapeOptions {
+ new_shape:[int];
+}
+
+table SkipGramOptions {
+ ngram_size: int;
+ max_skip_size: int;
+ include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+ block_size: int;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+ builtin_code:BuiltinOperator;
+ custom_code:string;
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+ // Index into the operator_codes array. Using an integer here avoids
+ // complicate map lookups.
+ opcode_index:int;
+
+ inputs:[int];
+ outputs:[int];
+
+ builtin_options:BuiltinOptions;
+ custom_options:[ubyte];
+}
+
+// The root type, defining a model.
+table SubGraph {
+ // A list of all tensors used in this model.
+ tensors:[Tensor];
+
+ // Indices of the input tensors.
+ inputs:[int];
+
+ // Indices of the output tensors.
+ outputs:[int];
+
+ // All operators, in execution order.
+ operators:[Operator];
+
+ // Name of subgraph (used for debugging).
+ name:string;
+}
+
+table Model {
+ // Version of the schema.
+ version:int;
+
+ // A list of all operator codes used in this model. This is
+ // kept in order because operators carry an index into this
+ // vector.
+ operator_codes:[OperatorCode];
+
+ // All the subgraphs of the model. The 0th is assumed to be the main
+ // model.
+ subgraphs:[SubGraph];
+
+ // A description of the model.
+ description:string;
+}
+
+root_type Model;
diff --git a/compiler/mir-tflite-importer/schema/schema_v2.meta b/compiler/mir-tflite-importer/schema/schema_v2.meta
new file mode 100644
index 000000000..74668ab7a
--- /dev/null
+++ b/compiler/mir-tflite-importer/schema/schema_v2.meta
@@ -0,0 +1,2 @@
+REPO=https://github.com/tensorflow/tensorflow.git
+COMMIT=c7a04561fb8
diff --git a/compiler/mir-tflite-importer/schema/schema_v3.fbs b/compiler/mir-tflite-importer/schema/schema_v3.fbs
new file mode 100644
index 000000000..cedefe08f
--- /dev/null
+++ b/compiler/mir-tflite-importer/schema/schema_v3.fbs
@@ -0,0 +1,326 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+
+namespace tflite;
+
+// This corresponds to the version (4).
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+ FLOAT32 = 0,
+ FLOAT16 = 1,
+ INT32 = 2,
+ UINT8 = 3,
+ INT64 = 4,
+ STRING = 5,
+}
+
+// Parameters for converting a quantized tensor back to float. Given a
+// quantized value q, the corresponding float value f should be:
+// f = scale * (q - zero_point)
+table QuantizationParameters {
+ min:[float]; // For importing back into tensorflow.
+ max:[float]; // For importing back into tensorflow.
+ scale:[float];
+ zero_point:[long];
+}
+
+table Tensor {
+ // The tensor shape. The meaning of each entry is operator-specific but
+ // builtin ops use: [batch size, number of channels, height, width] (That's
+ // Tensorflow's NCHW).
+ shape:[int];
+ type:TensorType;
+ // An index that refers to the buffers table at the root of the model. Or,
+ // if there is no data buffer associated (i.e. intermediate results), then
+ // this is 0 (which refers to an always existant empty buffer).
+ //
+ // The data_buffer itself is an opaque container, with the assumption that the
+ // target device is little-endian. In addition, all builtin operators assume
+ // the memory is ordered such that if `shape` is [4, 3, 2], then index
+ // [i, j, k] maps to data_buffer[i*3*2 + j*3 + k].
+ buffer:uint;
+ name:string; // For debugging and importing back into tensorflow.
+ quantization:QuantizationParameters; // Optional.
+}
+
+// A list of builtin operators. Builtin operators a slighlty faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+enum BuiltinOperator : byte {
+ ADD = 0,
+ AVERAGE_POOL_2D = 1,
+ CONCATENATION = 2,
+ CONV_2D = 3,
+ DEPTHWISE_CONV_2D = 4,
+ // DEPTH_TO_SPACE = 5,
+ // DEQUANTIZE = 6,
+ EMBEDDING_LOOKUP = 7,
+ // FLOOR = 8,
+ FULLY_CONNECTED = 9,
+ HASHTABLE_LOOKUP = 10,
+ L2_NORMALIZATION = 11,
+ L2_POOL_2D = 12,
+ LOCAL_RESPONSE_NORMALIZATION = 13,
+ LOGISTIC = 14,
+ LSH_PROJECTION = 15,
+ LSTM = 16,
+ MAX_POOL_2D = 17,
+ // MUL = 18,
+ RELU = 19,
+ // RELU1=20,
+ RELU6 = 21,
+ RESHAPE = 22,
+ RESIZE_BILINEAR = 23,
+ RNN = 24,
+ SOFTMAX = 25,
+ SPACE_TO_DEPTH = 26,
+ SVDF = 27,
+ TANH = 28,
+ // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
+ CONCAT_EMBEDDINGS = 29,
+ SKIP_GRAM = 30,
+ CALL = 31,
+ CUSTOM = 32,
+
+}
+
+// Options for the builtin operators.
+union BuiltinOptions {
+ Conv2DOptions,
+ DepthwiseConv2DOptions,
+ ConcatEmbeddingsOptions,
+ LSHProjectionOptions,
+ Pool2DOptions,
+ SVDFOptions,
+ RNNOptions,
+ FullyConnectedOptions,
+ SoftmaxOptions,
+ ConcatenationOptions,
+ AddOptions,
+ L2NormOptions,
+ LocalResponseNormalizationOptions,
+ LSTMOptions,
+ ResizeBilinearOptions,
+ CallOptions,
+ ReshapeOptions,
+ SkipGramOptions,
+ SpaceToDepthOptions,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+ NONE = 0,
+ RELU = 1,
+ RELU1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table Pool2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ filter_width:int;
+ filter_height:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ depth_multiplier:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table ConcatEmbeddingsOptions {
+ num_channels:int;
+ num_columns_per_channel:[int];
+ embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+ UNKNOWN = 0,
+ SPARSE = 1,
+ DENSE = 2,
+}
+
+table LSHProjectionOptions {
+ type: LSHProjectionType;
+}
+
+table SVDFOptions {
+ rank:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table SoftmaxOptions {
+ beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+ axis:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+ radius:int;
+ bias:float;
+ alpha:float;
+ beta:float;
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+}
+
+table ResizeBilinearOptions {
+ new_height:int;
+ new_width:int;
+}
+
+// A call operation options
+table CallOptions {
+ // The subgraph index that needs to be called.
+ subgraph:uint;
+}
+
+table ReshapeOptions {
+ new_shape:[int];
+}
+
+table SkipGramOptions {
+ ngram_size: int;
+ max_skip_size: int;
+ include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+ block_size: int;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+ builtin_code:BuiltinOperator;
+ custom_code:string;
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+ // Index into the operator_codes array. Using an integer here avoids
+ // complicate map lookups.
+ opcode_index:uint;
+
+ inputs:[int];
+ outputs:[int];
+
+ builtin_options:BuiltinOptions;
+ custom_options:[ubyte];
+}
+
+// The root type, defining a model.
+table SubGraph {
+ // A list of all tensors used in this model.
+ tensors:[Tensor];
+
+ // Indices of the input tensors.
+ inputs:[int];
+
+ // Indices of the output tensors.
+ outputs:[int];
+
+ // All operators, in execution order.
+ operators:[Operator];
+
+ // Name of subgraph (used for debugging).
+ name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index.
+table Buffer {
+ data:[ubyte];
+}
+
+table Model {
+ // Version of the schema.
+ version:uint;
+
+ // A list of all operator codes used in this model. This is
+ // kept in order because operators carry an index into this
+ // vector.
+ operator_codes:[OperatorCode];
+
+ // All the subgraphs of the model. The 0th is assumed to be the main
+ // model.
+ subgraphs:[SubGraph];
+
+ // A description of the model.
+ description:string;
+
+ // Buffers of the model.
+ // NOTE: It is required that the first entry in here is always an empty
+ // buffer. This is so that the default buffer index of zero in Tensor
+ // will always refer to a valid empty buffer.
+ buffers:[Buffer];
+
+}
+
+root_type Model;
diff --git a/compiler/mir-tflite-importer/schema/schema_v3.meta b/compiler/mir-tflite-importer/schema/schema_v3.meta
new file mode 100644
index 000000000..74668ab7a
--- /dev/null
+++ b/compiler/mir-tflite-importer/schema/schema_v3.meta
@@ -0,0 +1,2 @@
+REPO=https://github.com/tensorflow/tensorflow.git
+COMMIT=c7a04561fb8
diff --git a/compiler/mir-tflite-importer/tflite_importer.cpp b/compiler/mir-tflite-importer/tflite_importer.cpp
new file mode 100644
index 000000000..e3001d33d
--- /dev/null
+++ b/compiler/mir-tflite-importer/tflite_importer.cpp
@@ -0,0 +1,428 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflite_importer.h"
+#include "tflite_op_creator.h"
+#include "schema_generated.h"
+
+#include "mir/TensorVariant.h"
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/OutputOp.h"
+
+#include <fstream>
+#include <stdex/Memory.h>
+#include <utility>
+#include <vector>
+#include <set>
+
+namespace mir_tflite
+{
+
+namespace
+{
+
+class TfliteImporter
+{
+public:
+ explicit TfliteImporter(std::string filename);
+
+ /// @brief Load the model and convert it into a MIR Graph.
+ std::unique_ptr<mir::Graph> importModel();
+
+ ~TfliteImporter();
+
+private:
+ std::string _filename;
+ std::unique_ptr<tflite::ModelT> _model;
+
+ std::unique_ptr<mir::Graph> _graph;
+ std::unique_ptr<TFLiteOpCreator> _opCreator;
+
+ // Maps TFLite tensors indices to corresponding MIR operation outputs.
+ std::vector<mir::Operation::Output *> _tensorMap;
+
+ void import();
+
+ void walkModel(const tflite::ModelT *model);
+
+ void walkSubgraph(const tflite::SubGraphT *subgraph);
+
+ void walkOperator(const tflite::SubGraphT *subgraph, const tflite::OperatorT *op);
+
+ /**
+ * @brief Pass through tflite graph and collect operators unsupported by NNC
+ * @throw PassException with message, containing detected problems
+ */
+ void collectUnsupportedOps();
+
+ /**
+ * @brief Returns MIR operation outputs corresponding to the inputs of the given operator.
+ */
+ std::vector<mir::Operation::Output *> getMIRInputsForOperator(const tflite::SubGraphT *subgraph,
+ const tflite::OperatorT *op);
+};
+
+TfliteImporter::TfliteImporter(std::string filename) : _filename(std::move(filename))
+{
+ _graph = stdex::make_unique<mir::Graph>();
+ _opCreator = stdex::make_unique<TFLiteOpCreator>(_graph.get());
+}
+
+TfliteImporter::~TfliteImporter() = default;
+
+void TfliteImporter::import()
+{
+ std::ifstream stream(_filename, std::ios::in | std::ios::binary);
+ if (stream.fail())
+ throw std::runtime_error("Couldn't open file \"" + _filename + "\".");
+
+ std::vector<char> model_buffer((std::istreambuf_iterator<char>(stream)),
+ std::istreambuf_iterator<char>());
+
+ if (stream.fail())
+ throw std::runtime_error("Couldn't read file \"" + _filename + "\".");
+
+ flatbuffers::Verifier verifier(reinterpret_cast<const std::uint8_t *>(model_buffer.data()),
+ model_buffer.size());
+
+ if (!tflite::VerifyModelBuffer(verifier))
+ throw std::runtime_error("Could not load model: " + _filename + "\n");
+
+ _model = tflite::UnPackModel(model_buffer.data());
+}
+
+static const std::set<tflite::BuiltinOperator> supportedOperators = {
+ tflite::BuiltinOperator_ADD,
+ tflite::BuiltinOperator_AVERAGE_POOL_2D,
+ tflite::BuiltinOperator_CONCATENATION,
+ tflite::BuiltinOperator_CONV_2D,
+ tflite::BuiltinOperator_DEPTHWISE_CONV_2D,
+ tflite::BuiltinOperator_DIV,
+ tflite::BuiltinOperator_FULLY_CONNECTED,
+ tflite::BuiltinOperator_HARD_SWISH,
+ tflite::BuiltinOperator_LEAKY_RELU,
+ tflite::BuiltinOperator_LOGISTIC,
+ tflite::BuiltinOperator_MAX_POOL_2D,
+ tflite::BuiltinOperator_MAXIMUM,
+ tflite::BuiltinOperator_MEAN,
+ tflite::BuiltinOperator_MUL,
+ tflite::BuiltinOperator_PAD,
+ tflite::BuiltinOperator_RELU,
+ tflite::BuiltinOperator_RELU6,
+ tflite::BuiltinOperator_RESHAPE,
+ tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
+ tflite::BuiltinOperator_RSQRT,
+ tflite::BuiltinOperator_SHAPE,
+ tflite::BuiltinOperator_SLICE,
+ tflite::BuiltinOperator_SOFTMAX,
+ tflite::BuiltinOperator_SQRT,
+ tflite::BuiltinOperator_SQUARED_DIFFERENCE,
+ tflite::BuiltinOperator_SQUEEZE,
+ tflite::BuiltinOperator_STRIDED_SLICE,
+ tflite::BuiltinOperator_SUB,
+ tflite::BuiltinOperator_TANH,
+ tflite::BuiltinOperator_TRANSPOSE,
+ tflite::BuiltinOperator_TRANSPOSE_CONV,
+};
+
+void TfliteImporter::collectUnsupportedOps()
+{
+ std::set<std::string> errors;
+ for (const auto &subgraph : _model->subgraphs)
+ for (const auto &op : subgraph->operators)
+ {
+ tflite::BuiltinOperator opcode = _model->operator_codes[op->opcode_index]->builtin_code;
+ if (supportedOperators.find(opcode) == supportedOperators.end())
+ {
+ if (opcode <= tflite::BuiltinOperator_MAX)
+ errors.insert(std::string(EnumNameBuiltinOperator(opcode)) + ": unsupported operator");
+ else
+ errors.insert(std::to_string(opcode) + ": unsuppored in tflite custom opcode");
+ }
+ }
+
+ if (!errors.empty())
+ {
+ std::string msg("NNC can't load model. Detected problems:");
+ for (const auto &e : errors)
+ msg.append("\n * " + e);
+ throw std::runtime_error(msg);
+ }
+}
+
+std::unique_ptr<mir::Graph> TfliteImporter::importModel()
+{
+ import();
+ collectUnsupportedOps();
+ walkModel(_model.get());
+ return std::move(_graph);
+}
+
+void TfliteImporter::walkModel(const tflite::ModelT *model)
+{
+ for (const auto &subgraph : model->subgraphs)
+ walkSubgraph(subgraph.get());
+}
+
+mir::DataType convertElementType(tflite::TensorType type)
+{
+ switch (type)
+ {
+ case tflite::TensorType_INT32:
+ return mir::DataType::INT32;
+ case tflite::TensorType_FLOAT32:
+ return mir::DataType::FLOAT32;
+ case tflite::TensorType_INT64:
+ return mir::DataType::INT64;
+ case tflite::TensorType_UINT8:
+ return mir::DataType::UINT8;
+ default:
+ throw std::runtime_error(std::string("Unsupported tensor type: ") + EnumNameTensorType(type));
+ }
+}
+
+mir::TensorType getMirTensorType(const tflite::TensorT &tensor)
+{
+ mir::DataType element_type = convertElementType(tensor.type);
+
+ mir::Shape shape(tensor.shape.size());
+ for (std::size_t i = 0; i < tensor.shape.size(); ++i)
+ {
+ shape.dim(i) = tensor.shape[i];
+ }
+
+ if (tensor.quantization != nullptr)
+ {
+ const tflite::QuantizationParametersT &params = *tensor.quantization;
+
+ if (params.details.type != tflite::QuantizationDetails_NONE)
+ throw std::runtime_error("Custom quantization is not supported.");
+
+ // Empty parameters mean no quantization at all.
+ if (params.scale.empty() && params.zero_point.empty())
+ return mir::TensorType{element_type, shape};
+
+ if (params.scale.size() != 1 || params.zero_point.size() != 1)
+ throw std::runtime_error("Non-scalar quantization is not supported.");
+
+ mir::AffineQuantization quantization{params.scale[0], static_cast<int>(params.zero_point[0])};
+
+ return mir::TensorType{element_type, shape, quantization};
+ }
+ else
+ {
+ return mir::TensorType{element_type, shape};
+ }
+}
+
+void TfliteImporter::walkSubgraph(const tflite::SubGraphT *subgraph)
+{
+ _tensorMap.assign(subgraph->tensors.size(), nullptr);
+
+ for (const auto input_tensor_index : subgraph->inputs)
+ {
+ const tflite::TensorT &tensor = *subgraph->tensors[input_tensor_index];
+
+ mir::TensorType input_type = getMirTensorType(tensor);
+ auto input = _graph->create<mir::ops::InputOp>(input_type)->getOutput(0);
+ input->setName(tensor.name);
+
+ assert(_tensorMap[input_tensor_index] == nullptr);
+ _tensorMap[input_tensor_index] = input;
+ }
+
+ for (const auto &op : subgraph->operators)
+ {
+ walkOperator(subgraph, op.get());
+ }
+
+ for (const auto output_tensor_index : subgraph->outputs)
+ {
+ auto output = _tensorMap[output_tensor_index];
+ _graph->create<mir::ops::OutputOp>(output);
+ }
+}
+
+void TfliteImporter::walkOperator(const tflite::SubGraphT *subgraph, const tflite::OperatorT *op)
+{
+ std::vector<mir::Operation::Output *> inputs = getMIRInputsForOperator(subgraph, op);
+ std::vector<mir::Operation::Output *> outputs;
+
+ tflite::BuiltinOperator opcode = _model->operator_codes[op->opcode_index]->builtin_code;
+ switch (opcode)
+ {
+ case tflite::BuiltinOperator_CONV_2D:
+ outputs = _opCreator->convertConv2D(op->builtin_options.AsConv2DOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_DEPTHWISE_CONV_2D:
+ outputs = _opCreator->convertDepthwiseConv2D(op->builtin_options.AsDepthwiseConv2DOptions(),
+ inputs);
+ break;
+ case tflite::BuiltinOperator_MAX_POOL_2D:
+ outputs = _opCreator->convertMaxPool2D(op->builtin_options.AsPool2DOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_AVERAGE_POOL_2D:
+ outputs = _opCreator->convertAveragePool2D(op->builtin_options.AsPool2DOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_CONCATENATION:
+ outputs =
+ _opCreator->convertConcatenation(op->builtin_options.AsConcatenationOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_RESHAPE:
+ outputs = _opCreator->convertReshape(op->builtin_options.AsReshapeOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR:
+ outputs = _opCreator->convertResizeNearestNeighbor(
+ op->builtin_options.AsResizeNearestNeighborOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_MEAN:
+ outputs = _opCreator->convertMean(op->builtin_options.AsReducerOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_FULLY_CONNECTED:
+ outputs =
+ _opCreator->convertFullyConnected(op->builtin_options.AsFullyConnectedOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_SOFTMAX:
+ outputs = _opCreator->convertSoftmax(op->builtin_options.AsSoftmaxOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_SLICE:
+ outputs = _opCreator->convertSlice(op->builtin_options.AsSliceOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_SQUEEZE:
+ outputs = _opCreator->convertSqueeze(op->builtin_options.AsSqueezeOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_LOGISTIC:
+ outputs = _opCreator->convertLogistic(inputs);
+ break;
+ case tflite::BuiltinOperator_RSQRT:
+ outputs = _opCreator->convertRsqrt(inputs);
+ break;
+ case tflite::BuiltinOperator_SQRT:
+ outputs = _opCreator->convertSqrt(inputs);
+ break;
+ case tflite::BuiltinOperator_ADD:
+ outputs = _opCreator->convertAdd(op->builtin_options.AsAddOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_SUB:
+ outputs = _opCreator->convertSub(op->builtin_options.AsSubOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_MUL:
+ outputs = _opCreator->convertMul(op->builtin_options.AsMulOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_DIV:
+ outputs = _opCreator->convertDiv(op->builtin_options.AsDivOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_MAXIMUM:
+ outputs = _opCreator->convertMax(inputs);
+ break;
+ case tflite::BuiltinOperator_SQUARED_DIFFERENCE:
+ outputs = _opCreator->convertSquaredDifference(inputs);
+ break;
+ case tflite::BuiltinOperator_TRANSPOSE_CONV:
+ outputs =
+ _opCreator->convertTransposeConv(op->builtin_options.AsTransposeConvOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_PAD:
+ outputs = _opCreator->convertPad(op->builtin_options.AsPadOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_TANH:
+ outputs = _opCreator->convertTanh(inputs);
+ break;
+ case tflite::BuiltinOperator_RELU:
+ outputs = _opCreator->convertReLU(inputs);
+ break;
+ case tflite::BuiltinOperator_RELU6:
+ outputs = _opCreator->convertReLU6(inputs);
+ break;
+ case tflite::BuiltinOperator_TRANSPOSE:
+ outputs = _opCreator->convertTranspose(op->builtin_options.AsTransposeOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_STRIDED_SLICE:
+ outputs =
+ _opCreator->convertStridedSlice(op->builtin_options.AsStridedSliceOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_LEAKY_RELU:
+ outputs = _opCreator->convertLeakyReLU(op->builtin_options.AsLeakyReluOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_SHAPE:
+ outputs = _opCreator->convertShape(op->builtin_options.AsShapeOptions(), inputs);
+ break;
+ case tflite::BuiltinOperator_HARD_SWISH:
+ outputs = _opCreator->convertHardSwish(op->builtin_options.AsHardSwishOptions(), inputs);
+ break;
+ default:
+ assert(false && "All unsupported types should have been found before this pass.");
+ }
+
+ assert(outputs.size() == op->outputs.size());
+ for (std::size_t i = 0; i < op->outputs.size(); ++i)
+ {
+ const auto tensor_index = op->outputs[i];
+ const tflite::TensorT &tensor = *subgraph->tensors[tensor_index];
+
+ mir::TensorType output_type = getMirTensorType(tensor);
+
+ // The type should have been inferred correctly, except for quantization information.
+ assert(outputs[i]->getType().getElementType() == output_type.getElementType() &&
+ outputs[i]->getType().getShape() == output_type.getShape());
+
+ outputs[i]->setName(tensor.name);
+ outputs[i]->setType(output_type);
+
+ assert(_tensorMap[tensor_index] == nullptr);
+ _tensorMap[tensor_index] = outputs[i];
+ }
+}
+
+std::vector<mir::Operation::Output *>
+TfliteImporter::getMIRInputsForOperator(const tflite::SubGraphT *subgraph,
+ const tflite::OperatorT *op)
+{
+ std::vector<mir::Operation::Output *> inputs;
+
+ for (const auto tensor_index : op->inputs)
+ {
+ const tflite::TensorT &tensor = *subgraph->tensors[tensor_index];
+ const tflite::BufferT &buffer = *_model->buffers[tensor.buffer];
+ if (!buffer.data.empty())
+ {
+ assert(_tensorMap[tensor_index] == nullptr);
+ mir::TensorType type = getMirTensorType(tensor);
+ mir::TensorVariant mir_tensor{type, buffer.data.data()};
+ inputs.emplace_back(_graph->create<mir::ops::ConstantOp>(mir_tensor)->getOutput(0));
+ }
+ else
+ {
+ assert(_tensorMap[tensor_index] != nullptr);
+ // By this point every input for the operation "op" should have corresponding
+ // Model IR operations that output its inputs. This assumption is provided by the fact
+ // that TFLite format specifies all operations in the execution order.
+ inputs.emplace_back(_tensorMap[tensor_index]);
+ }
+ }
+
+ return inputs;
+}
+
+} // namespace
+
+std::unique_ptr<mir::Graph> loadModel(std::string filename)
+{
+ TfliteImporter importer(std::move(filename));
+ return importer.importModel();
+}
+
+} // namespace mir_tflite
diff --git a/compiler/mir-tflite-importer/tflite_importer.h b/compiler/mir-tflite-importer/tflite_importer.h
new file mode 100644
index 000000000..85cd01ee9
--- /dev/null
+++ b/compiler/mir-tflite-importer/tflite_importer.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_TFLITE_IMPORTER_H
+#define MIR_TFLITE_IMPORTER_H
+
+#include "mir/Graph.h"
+
+#include <memory>
+#include <string>
+
+namespace mir_tflite
+{
+
+std::unique_ptr<mir::Graph> loadModel(std::string filename);
+
+} // namespace mir_tflite
+
+#endif // MIR_TFLITE_IMPORTER_H
diff --git a/compiler/mir-tflite-importer/tflite_op_creator.cpp b/compiler/mir-tflite-importer/tflite_op_creator.cpp
new file mode 100644
index 000000000..5f4279f55
--- /dev/null
+++ b/compiler/mir-tflite-importer/tflite_op_creator.cpp
@@ -0,0 +1,649 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflite_op_creator.h"
+#include "schema_generated.h"
+
+#include "mir/ops/AddOp.h"
+#include "mir/ops/AvgPool2DOp.h"
+#include "mir/ops/CappedReluOp.h"
+#include "mir/ops/ConcatOp.h"
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/Conv2DOp.h"
+#include "mir/ops/Deconv2DOp.h"
+#include "mir/ops/DepthwiseConv2DOp.h"
+#include "mir/ops/DivOp.h"
+#include "mir/ops/FullyConnectedOp.h"
+#include "mir/ops/HardSwishOp.h"
+#include "mir/ops/LeakyReluOp.h"
+#include "mir/ops/MaxOp.h"
+#include "mir/ops/MaxPool2DOp.h"
+#include "mir/ops/MulOp.h"
+#include "mir/ops/PadOp.h"
+#include "mir/ops/ReduceMeanOp.h"
+#include "mir/ops/ReluOp.h"
+#include "mir/ops/ReshapeOp.h"
+#include "mir/ops/ResizeOp.h"
+#include "mir/ops/SigmoidOp.h"
+#include "mir/ops/SliceOp.h"
+#include "mir/ops/SoftmaxOp.h"
+#include "mir/ops/SqrtOp.h"
+#include "mir/ops/SqueezeOp.h"
+#include "mir/ops/SubOp.h"
+#include "mir/ops/TanhOp.h"
+#include "mir/ops/TransposeOp.h"
+
+#include "mir/Shape.h"
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+
+namespace mir_tflite
+{
+
+namespace ops = mir::ops;
+using mir::Shape;
+
+static mir::ops::PaddingType convertPadding(tflite::Padding padding)
+{
+ switch (padding)
+ {
+ case tflite::Padding_VALID:
+ return mir::ops::PaddingType::Valid;
+ case tflite::Padding_SAME:
+ return mir::ops::PaddingType::SameUpper;
+ default:
+ assert(false);
+ }
+}
+
+// TODO Move this to MIR?
+static void calculatePadding(mir::ops::PaddingType padding_type, const mir::Shape &input_shape,
+ const std::vector<std::int32_t> &window_size,
+ const std::vector<std::int32_t> &strides,
+ std::vector<std::int32_t> &padding_before,
+ std::vector<std::int32_t> &padding_after)
+{
+ constexpr int num_spatial_dims = 2;
+ assert(window_size.size() == num_spatial_dims);
+ assert(strides.size() == num_spatial_dims);
+ assert(padding_before.size() == num_spatial_dims);
+ assert(padding_after.size() == num_spatial_dims);
+
+ switch (padding_type)
+ {
+ case mir::ops::PaddingType::SameUpper:
+ for (int i = 0; i < num_spatial_dims; ++i)
+ {
+ // Assuming NHWC format.
+ const std::int32_t total_padding =
+ (input_shape.dim(1 + i) % strides[i] == 0)
+ ? std::max(0, window_size[i] - strides[i])
+ : std::max(0, window_size[i] - input_shape.dim(1 + i) % strides[i]);
+ padding_before[i] = total_padding / 2;
+ padding_after[i] = total_padding - padding_before[i];
+ }
+ break;
+ case mir::ops::PaddingType::Valid:
+ for (int i = 0; i < num_spatial_dims; ++i)
+ {
+ padding_before[i] = 0;
+ padding_after[i] = 0;
+ }
+ break;
+ default:
+ assert(false);
+ }
+}
+
+template <typename VectorT>
+static std::vector<VectorT> convertIntTensorToVector(const mir::Tensor<int32_t> &tensor)
+{
+ std::vector<VectorT> v;
+ for (const auto &i : mir::ShapeRange(tensor.getShape()))
+ v.emplace_back(static_cast<VectorT>(tensor.at(i)));
+ return v;
+}
+
+static const mir::TensorVariant &extractTensor(const mir::Operation::Output *output)
+{
+ auto constant_op = dynamic_cast<const ops::ConstantOp *>(output->getNode());
+ if (constant_op == nullptr)
+ throw std::runtime_error("Non-constant input is not supported.");
+ return constant_op->getValue();
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertConv2D(const tflite::Conv2DOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto input = inputs.at(0);
+ auto kernel = inputs.at(1);
+ auto bias = inputs.at(2);
+
+ mir::Conv2DOpAttributes attributes;
+ attributes.strides = {opts->stride_h, opts->stride_w};
+
+ const auto padding_type = convertPadding(opts->padding);
+ const auto &input_shape = input->getShape();
+ const auto &kernel_shape = kernel->getShape();
+ const auto &strides = attributes.strides;
+ auto &pad_before = attributes.padding_before;
+ auto &pad_after = attributes.padding_after;
+ std::vector<std::int32_t> kernel_size{kernel_shape.dim(1), kernel_shape.dim(2)};
+ calculatePadding(padding_type, input_shape, kernel_size, strides, pad_before, pad_after);
+
+ mir::Operation::Output *result;
+ if (input->getType().isQuantized())
+ {
+ result = createOp<ops::Conv2DOp>(input, kernel, bias, attributes)->getOutput(0);
+ }
+ else // TODO Fuse bias to other backends
+ {
+ result = createOp<ops::Conv2DOp>(input, kernel, attributes)->getOutput(0);
+ result = createOp<ops::AddOp>(result, bias)->getOutput(0);
+ }
+ return {addFusedActivation(result, opts->fused_activation_function)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertDepthwiseConv2D(const tflite::DepthwiseConv2DOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto input = inputs.at(0);
+ auto kernel = inputs.at(1);
+ auto bias = inputs.at(2);
+
+ // OHWI -> HWIO
+ const std::vector<std::size_t> axis_order{1, 2, 3, 0};
+ kernel = createOp<ops::TransposeOp>(kernel, axis_order)->getOutput(0);
+
+ mir::Conv2DOpAttributes attributes;
+ attributes.strides = {opts->stride_h, opts->stride_w};
+
+ const auto padding_type = convertPadding(opts->padding);
+ const auto &input_shape = input->getShape();
+ const auto &kernel_shape = kernel->getShape();
+ std::vector<std::int32_t> kernel_size{kernel_shape.dim(0), kernel_shape.dim(1)};
+ const auto &strides = attributes.strides;
+ auto &pad_before = attributes.padding_before;
+ auto &pad_after = attributes.padding_after;
+ calculatePadding(padding_type, input_shape, kernel_size, strides, pad_before, pad_after);
+
+ mir::Operation::Output *result;
+ if (input->getType().isQuantized())
+ {
+ result = createOp<ops::DepthwiseConv2DOp>(input, kernel, bias, attributes)->getOutput(0);
+ }
+ else // TODO Fuse bias to other backends
+ {
+ result = createOp<ops::DepthwiseConv2DOp>(input, kernel, attributes)->getOutput(0);
+ result = createOp<ops::AddOp>(result, bias)->getOutput(0);
+ }
+ return {addFusedActivation(result, opts->fused_activation_function)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertConcatenation(const tflite::ConcatenationOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto result = createOp<ops::ConcatOp>(inputs, opts->axis);
+ return {addFusedActivation(result->getOutput(0), opts->fused_activation_function)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertMaxPool2D(const tflite::Pool2DOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto input = inputs.at(0);
+
+ const auto &input_shape = input->getShape();
+
+ mir::MaxPool2DOpAttributes attributes;
+ attributes.window = {opts->filter_height, opts->filter_width};
+ attributes.strides = {opts->stride_h, opts->stride_w};
+
+ const auto padding_type = convertPadding(opts->padding);
+ const auto &window_size = attributes.window;
+ const auto &strides = attributes.strides;
+ auto &pad_before = attributes.padding_before;
+ auto &pad_after = attributes.padding_after;
+ calculatePadding(padding_type, input_shape, window_size, strides, pad_before, pad_after);
+
+ auto result = createOp<ops::MaxPool2DOp>(input, attributes);
+ return {addFusedActivation(result->getOutput(0), opts->fused_activation_function)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertAveragePool2D(const tflite::Pool2DOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto input = inputs.at(0);
+
+ const auto &input_shape = input->getShape();
+
+ mir::AvgPool2DOpAttributes attributes;
+ attributes.window = {opts->filter_height, opts->filter_width};
+ attributes.strides = {opts->stride_h, opts->stride_w};
+ attributes.include_pad = false;
+
+ const auto padding_type = convertPadding(opts->padding);
+ const auto &window_size = attributes.window;
+ const auto &strides = attributes.strides;
+ auto &pad_before = attributes.padding_before;
+ auto &pad_after = attributes.padding_after;
+ calculatePadding(padding_type, input_shape, window_size, strides, pad_before, pad_after);
+
+ auto result = createOp<ops::AvgPool2DOp>(input, attributes);
+ return {addFusedActivation(result->getOutput(0), opts->fused_activation_function)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertSoftmax(const tflite::SoftmaxOptionsT * /*opts*/,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto input = inputs.at(0);
+
+ // Softmax in TFLite is always 2-D.
+ assert(input->getShape().rank() == 2);
+ const int32_t axis = 1;
+ auto result = createOp<ops::SoftmaxOp>(input, axis);
+ return {result->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertSlice(const tflite::SliceOptionsT * /*opts*/,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto input = inputs.at(0);
+ mir::Tensor<int32_t> begin_tensor(extractTensor(inputs.at(1)));
+ mir::Tensor<int32_t> size_tensor(extractTensor(inputs.at(2)));
+
+ Shape starts(convertIntTensorToVector<int32_t>(begin_tensor));
+ Shape sizes(convertIntTensorToVector<int32_t>(size_tensor));
+ auto result = createOp<ops::SliceOp>(input, starts, sizes);
+ return {result->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertReshape(const tflite::ReshapeOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto input = inputs.at(0);
+
+ // TODO: we should also support "-1" values in new_shape, which means that correct
+ // shape values must be calculated. Better do it in the shape inference module.
+ Shape new_shape(opts->new_shape.size());
+ for (int i = 0; i < opts->new_shape.size(); ++i)
+ {
+ new_shape.dim(i) = opts->new_shape[i];
+ }
+ auto result = createOp<ops::ReshapeOp>(input, new_shape);
+ return {result->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertTransposeConv(const tflite::TransposeConvOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ mir::Tensor<int32_t> output_shape_tensor(extractTensor(inputs.at(0)));
+ auto kernel = inputs.at(1);
+ auto input = inputs.at(2);
+
+ mir::Deconv2DOpAttributes attributes;
+ attributes.strides = {opts->stride_h, opts->stride_w};
+ Shape output_shape(convertIntTensorToVector<int32_t>(output_shape_tensor));
+
+ // OHWI -> HWOI
+ const std::vector<std::size_t> axis_order{1, 2, 0, 3};
+ kernel = createOp<ops::TransposeOp>(kernel, axis_order)->getOutput(0);
+
+ attributes.padding_type = convertPadding(opts->padding);
+ auto result = createOp<ops::DeConv2DOp>(input, kernel, attributes, output_shape)->getOutput(0);
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertResizeNearestNeighbor(const tflite::ResizeNearestNeighborOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ if (opts->align_corners)
+ throw std::runtime_error("'align_corners' is not currently supported");
+
+ auto input = inputs.at(0);
+ mir::Tensor<int32_t> size_tensor(extractTensor(inputs.at(1)));
+
+ const auto &input_shape = input->getShape();
+ Shape res_shape{input_shape.dim(0), size_tensor.at(mir::Index{0}), size_tensor.at(mir::Index{1}),
+ input_shape.dim(3)};
+ auto result =
+ createOp<ops::ResizeOp>(input, ops::ResizeOp::ResizeMethod::nearestNeighbor, res_shape);
+ return {result->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertAdd(const tflite::AddOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ assert(inputs.size() == 2);
+ auto result = createOp<ops::AddOp>(inputs[0], inputs[1])->getOutput(0);
+ return {addFusedActivation(result, opts->fused_activation_function)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertSub(const tflite::SubOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ assert(inputs.size() == 2);
+ auto result = createOp<ops::SubOp>(inputs[0], inputs[1])->getOutput(0);
+ return {addFusedActivation(result, opts->fused_activation_function)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertMul(const tflite::MulOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ assert(inputs.size() == 2);
+ auto result = createOp<ops::MulOp>(inputs[0], inputs[1])->getOutput(0);
+ return {addFusedActivation(result, opts->fused_activation_function)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertDiv(const tflite::DivOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ assert(inputs.size() == 2);
+ auto result = createOp<ops::DivOp>(inputs[0], inputs[1])->getOutput(0);
+ return {addFusedActivation(result, opts->fused_activation_function)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertMax(const std::vector<mir::Operation::Output *> &inputs)
+{
+ assert(inputs.size() == 2);
+ auto result = createOp<ops::MaxOp>(inputs[0], inputs[1])->getOutput(0);
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertSquaredDifference(const std::vector<mir::Operation::Output *> &inputs)
+{
+ assert(inputs.size() == 2);
+ auto result = createOp<ops::SubOp>(inputs[0], inputs[1])->getOutput(0);
+ result = createOp<ops::MulOp>(result, result)->getOutput(0);
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertMean(const tflite::ReducerOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto input = inputs.at(0);
+ mir::Tensor<int32_t> axes_tensor(extractTensor(inputs.at(1)));
+
+ std::vector<int32_t> axes = convertIntTensorToVector<int32_t>(axes_tensor);
+ auto result = createOp<ops::ReduceMeanOp>(input, axes, opts->keep_dims);
+ return {result->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertFullyConnected(const tflite::FullyConnectedOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto input = inputs.at(0);
+ auto weights = inputs.at(1);
+ auto bias = inputs.at(2);
+
+ // Flatten input to 2-D shape.
+ const auto &input_shape = input->getShape();
+ int32_t outer_size = input_shape.dim(0);
+ int32_t inner_size = input_shape.numElements() / outer_size;
+ auto flatten = createOp<ops::ReshapeOp>(input, Shape{outer_size, inner_size})->getOutput(0);
+
+ // Transpose the weights.
+ const std::vector<std::size_t> axis_order{1, 0};
+ weights = createOp<ops::TransposeOp>(weights, axis_order)->getOutput(0);
+
+ mir::Operation::Output *result;
+ if (input->getType().isQuantized())
+ {
+ result = createOp<ops::FullyConnectedOp>(flatten, weights, bias)->getOutput(0);
+ }
+ else // TODO Fuse bias to other backends
+ {
+ result = createOp<ops::FullyConnectedOp>(flatten, weights)->getOutput(0);
+ result = createOp<ops::AddOp>(result, bias)->getOutput(0);
+ }
+ return {addFusedActivation(result, opts->fused_activation_function)};
+}
+
+mir::Operation::Output *
+TFLiteOpCreator::addFusedActivation(mir::Operation::Output *input,
+ tflite::ActivationFunctionType activation_type)
+{
+ switch (activation_type)
+ {
+ case tflite::ActivationFunctionType_NONE:
+ return input;
+ case tflite::ActivationFunctionType_RELU:
+ return createOp<ops::ReluOp>(input)->getOutput(0);
+ case tflite::ActivationFunctionType_RELU6:
+ return createOp<ops::CappedReluOp>(input, 6)->getOutput(0);
+ case tflite::ActivationFunctionType_TANH:
+ return createOp<ops::TanhOp>(input)->getOutput(0);
+ default:
+ throw std::runtime_error(std::string("Unsupported activation type: ") +
+ tflite::EnumNameActivationFunctionType(activation_type));
+ }
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertSqueeze(const tflite::SqueezeOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto input = inputs.at(0);
+
+ std::vector<int32_t> squeeze_dims(opts->squeeze_dims.begin(), opts->squeeze_dims.end());
+ auto result = createOp<ops::SqueezeOp>(input, squeeze_dims);
+ return {result->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertPad(const tflite::PadOptionsT * /*opts*/,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto input = inputs.at(0);
+ mir::Tensor<int32_t> paddings_tensor(extractTensor(inputs.at(1)));
+
+ const auto &input_shape = input->getShape();
+ const int num_dims = input_shape.rank();
+
+ mir::PadOpAttributes attributes(num_dims);
+ for (int i = 0; i < num_dims; i++)
+ {
+ attributes.padding_before[i] = paddings_tensor.at(mir::Index({i, 0}));
+ attributes.padding_after[i] = paddings_tensor.at(mir::Index({i, 1}));
+ }
+
+ auto result = createOp<ops::PadOp>(input, attributes)->getOutput(0);
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertTanh(const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto input = inputs.at(0);
+
+ auto result = createOp<ops::TanhOp>(input);
+ return {result->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertReLU(const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto input = inputs.at(0);
+
+ auto result = createOp<ops::ReluOp>(input);
+ return {result->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertReLU6(const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto input = inputs.at(0);
+
+ auto result = createOp<ops::CappedReluOp>(input, 6);
+ return {result->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertRsqrt(const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto input = inputs.at(0);
+
+ const float one_value = 1.0f;
+ mir::TensorVariant one_tensor({mir::DataType::FLOAT32, {}}, &one_value);
+ auto one = createOp<ops::ConstantOp>(one_tensor)->getOutput(0);
+ auto sqrt = createOp<ops::SqrtOp>(input)->getOutput(0);
+ auto result = createOp<ops::DivOp>(one, sqrt)->getOutput(0);
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertSqrt(const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto input = inputs.at(0);
+
+ auto result = createOp<ops::SqrtOp>(input)->getOutput(0);
+ return {result};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertLogistic(const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto input = inputs.at(0);
+
+ auto result = createOp<ops::SigmoidOp>(input);
+ return {result->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertTranspose(const tflite::TransposeOptionsT * /*opts*/,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto input = inputs.at(0);
+ mir::Tensor<int32_t> perm_tensor(extractTensor(inputs.at(1)));
+
+ std::vector<std::size_t> axis_order = convertIntTensorToVector<std::size_t>(perm_tensor);
+ auto result = createOp<ops::TransposeOp>(input, axis_order);
+ return {result->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertStridedSlice(const tflite::StridedSliceOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ if (opts->ellipsis_mask != 0)
+ throw std::runtime_error("StridedSlice: parameter 'ellipsis_mask' is not supported.");
+
+ if (opts->new_axis_mask != 0)
+ throw std::runtime_error("StridedSlice: parameter 'new_axis_mask' is not supported.");
+
+ auto input = inputs.at(0);
+ mir::Tensor<int32_t> begin_tensor(extractTensor(inputs.at(1)));
+ mir::Tensor<int32_t> end_tensor(extractTensor(inputs.at(2)));
+ mir::Tensor<int32_t> strides_tensor(extractTensor(inputs.at(3)));
+
+ std::vector<int32_t> begin = convertIntTensorToVector<int32_t>(begin_tensor);
+ std::vector<int32_t> end = convertIntTensorToVector<int32_t>(end_tensor);
+ std::vector<int32_t> strides = convertIntTensorToVector<int32_t>(strides_tensor);
+
+ int32_t begin_mask = opts->begin_mask;
+ int32_t end_mask = opts->end_mask;
+ int32_t shrink_axis_mask = opts->shrink_axis_mask;
+
+ const auto &input_shape = input->getShape();
+ int32_t num_dims = input_shape.rank();
+
+ for (int32_t stride : strides)
+ {
+ if (stride != 1)
+ throw std::runtime_error("StridedSlice: parameter 'strides' is not supported");
+ }
+
+ Shape start(num_dims);
+ Shape size(num_dims);
+ std::vector<int32_t> squeeze_dims;
+ for (int axis = 0; axis < num_dims; axis++)
+ {
+ if (static_cast<uint32_t>(begin_mask) & (1u << static_cast<uint32_t>(axis)))
+ start.dim(axis) = 0;
+ else
+ start.dim(axis) = begin.at(static_cast<uint64_t>(axis));
+
+ if (static_cast<uint32_t>(end_mask) & (1u << static_cast<uint32_t>(axis)))
+ size.dim(axis) = input_shape.dim(axis) - start.dim(axis);
+ else
+ size.dim(axis) = end.at(static_cast<uint64_t>(axis)) - start.dim(axis);
+
+ if (static_cast<uint32_t>(shrink_axis_mask) & (1u << static_cast<uint32_t>(axis)))
+ squeeze_dims.push_back(axis);
+ }
+
+ auto result = createOp<ops::SliceOp>(input, start, size);
+ result = createOp<ops::SqueezeOp>(result->getOutput(0), squeeze_dims);
+ return {result->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertLeakyReLU(const tflite::LeakyReluOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto input = inputs.at(0);
+
+ auto result = createOp<ops::LeakyReluOp>(input, opts->alpha);
+ return {result->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertShape(const tflite::ShapeOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ if (opts->out_type != tflite::TensorType_INT32)
+ {
+ throw std::runtime_error(std::string("SHAPE: Unsupported tensor type: ") +
+ EnumNameTensorType(opts->out_type));
+ }
+
+ const auto &input_shape = inputs[0]->getShape();
+ int32_t rank = input_shape.rank();
+ std::vector<int32_t> data;
+ data.reserve(static_cast<uint64_t>(rank));
+ for (int32_t i = 0; i < rank; i++)
+ data.emplace_back(input_shape.dim(i));
+ mir::TensorVariant tensor({mir::DataType::INT32, {rank}}, data.data());
+ auto result = createOp<ops::ConstantOp>(tensor);
+ return {result->getOutput(0)};
+}
+
+std::vector<mir::Operation::Output *>
+TFLiteOpCreator::convertHardSwish(const tflite::HardSwishOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs)
+{
+ auto result = createOp<ops::HardSwishOp>(inputs[0])->getOutput(0);
+ return {result};
+}
+
+} // namespace mir_tflite
diff --git a/compiler/mir-tflite-importer/tflite_op_creator.h b/compiler/mir-tflite-importer/tflite_op_creator.h
new file mode 100644
index 000000000..820436f33
--- /dev/null
+++ b/compiler/mir-tflite-importer/tflite_op_creator.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MIR_TFLITE_OP_CREATOR_H
+#define MIR_TFLITE_OP_CREATOR_H
+
+#include "schema_generated.h"
+
+#include "mir/Graph.h"
+
+#include <utility>
+#include <vector>
+
+namespace mir_tflite
+{
+
+class TFLiteOpCreator
+{
+public:
+ explicit TFLiteOpCreator(mir::Graph *g) : _graph(g) {}
+
+ std::vector<mir::Operation::Output *>
+ convertConv2D(const tflite::Conv2DOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertDepthwiseConv2D(const tflite::DepthwiseConv2DOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertConcatenation(const tflite::ConcatenationOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertMaxPool2D(const tflite::Pool2DOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertAveragePool2D(const tflite::Pool2DOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertMean(const tflite::ReducerOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertSoftmax(const tflite::SoftmaxOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertSlice(const tflite::SliceOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertReshape(const tflite::ReshapeOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertFullyConnected(const tflite::FullyConnectedOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertResizeNearestNeighbor(const tflite::ResizeNearestNeighborOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertLogistic(const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertRsqrt(const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertSqrt(const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertSqueeze(const tflite::SqueezeOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertAdd(const tflite::AddOptionsT *opts, const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertSub(const tflite::SubOptionsT *opts, const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertMul(const tflite::MulOptionsT *opts, const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertDiv(const tflite::DivOptionsT *opts, const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertMax(const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertSquaredDifference(const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertTanh(const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertReLU(const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertReLU6(const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertTransposeConv(const tflite::TransposeConvOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertPad(const tflite::PadOptionsT *opts, const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertTranspose(const tflite::TransposeOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertStridedSlice(const tflite::StridedSliceOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertLeakyReLU(const tflite::LeakyReluOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertShape(const tflite::ShapeOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+ std::vector<mir::Operation::Output *>
+ convertHardSwish(const tflite::HardSwishOptionsT *opts,
+ const std::vector<mir::Operation::Output *> &inputs);
+
+private:
+ mir::Graph *_graph;
+
+ mir::Operation::Output *addFusedActivation(mir::Operation::Output *input,
+ tflite::ActivationFunctionType activation_type);
+
+ template <typename OpType, typename... Types> mir::Operation *createOp(Types &&... args);
+};
+
+template <typename OpType, typename... Types>
+mir::Operation *TFLiteOpCreator::createOp(Types &&... args)
+{
+ return _graph->create<OpType>(std::forward<Types>(args)...);
+}
+
+} // namespace mir_tflite
+
+#endif // MIR_TFLITE_OP_CREATOR_H
diff --git a/compiler/mir/CMakeLists.txt b/compiler/mir/CMakeLists.txt
new file mode 100644
index 000000000..ecb1de2db
--- /dev/null
+++ b/compiler/mir/CMakeLists.txt
@@ -0,0 +1,38 @@
+set(MIR_SOURCES
+ src/ops/AvgPool2DOp.cpp
+ src/ops/BinaryElementwiseOp.cpp
+ src/ops/BroadcastOp.cpp
+ src/ops/ConcatOp.cpp
+ src/ops/Conv2DOp.cpp
+ src/ops/DeConv2DOp.cpp
+ src/ops/DepthwiseConv2DOp.cpp
+ src/ops/FullyConnectedOp.cpp
+ src/ops/GatherOp.cpp
+ src/ops/MaxPool2DOp.cpp
+ src/ops/PadOp.cpp
+ src/ops/ReduceOp.cpp
+ src/ops/SqueezeOp.cpp
+ src/ops/SliceOp.cpp
+ src/ops/TransposeOp.cpp
+ src/DotGraph.cpp
+ src/DotNodeBuilder.cpp
+ src/Graph.cpp
+ src/GraphPatternMatcher.cpp
+ src/Index.cpp
+ src/IrDotDumper.cpp
+ src/Operation.cpp
+ src/Shape.cpp
+ src/Tensor.cpp
+ src/TensorVariant.cpp
+ src/Visitor.cpp)
+
+add_library(mir STATIC ${MIR_SOURCES})
+target_include_directories(mir PUBLIC include)
+target_link_libraries(mir PUBLIC adtidas)
+target_link_libraries(mir PRIVATE nncc_common)
+target_link_libraries(mir PUBLIC nncc_coverage)
+
+set_target_properties(mir PROPERTIES POSITION_INDEPENDENT_CODE ON)
+set_target_properties(mir PROPERTIES LINKER_LANGUAGE CXX)
+
+add_subdirectory(unittests)
diff --git a/compiler/mir/Readme.md b/compiler/mir/Readme.md
new file mode 100644
index 000000000..9fb1348a5
--- /dev/null
+++ b/compiler/mir/Readme.md
@@ -0,0 +1,36 @@
+## Model IR (MIR)
+
+### Purpose
+This library exposes **NNC**'s model IR to the outer tools (currently `Mirunner`).
+
+### Design philosophy
+
+**MIR** was designed to support a multiple-frontend NN compiler/optimizer.
+
+### Function
+
+The high level overview of **MIR** is:
+* operations are a composition of their `inputs`, `outputs` and
+special attributes specific to different operation types.
+* operations can have multiple inputs and multiple outputs,
+ each output can be an input to more than one operation
+ (can be used in more than one operation).
+* the kernel tensors are represented by `ConstantOp` and
+ are linked to operations via `Input` objects.
+
+Mir has a protobuf serializer/deserializer for shapes and tensors (see `mir.proto` schema).
+
+For list of currently supported operations, see `mir/ops/operations.lst.h`.
+
+### How to use
+Can be included as a `CMake` target.
+
+### TODO
+
+* Expand serialization
+* Add More to readme
+
+### Dependencies
+
+Mir depends on `adtitas` library, which provides the `small_vector` data type.
+
diff --git a/compiler/mir/include/mir/Attributes.h b/compiler/mir/include/mir/Attributes.h
new file mode 100644
index 000000000..64a4e0f46
--- /dev/null
+++ b/compiler/mir/include/mir/Attributes.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OP_ATTRIBUTES_H
+#define OP_ATTRIBUTES_H
+
+#include <vector>
+#include "mir/DataFormat.h"
+#include "mir/ops/PaddingType.h"
+
+namespace mir
+{
+
+struct Conv2DOpAttributes
+{
+ Conv2DOpAttributes() = default;
+
+ std::vector<std::int32_t> strides{1, 1};
+ std::vector<std::int32_t> padding_before{0, 0};
+ std::vector<std::int32_t> padding_after{0, 0};
+ std::int32_t num_groups{1};
+ DataFormat data_format{DataFormat::NHWC};
+};
+
+struct AvgPool2DOpAttributes
+{
+ AvgPool2DOpAttributes() = default;
+
+ std::vector<std::int32_t> window{1, 1};
+ std::vector<std::int32_t> strides{1, 1};
+ std::vector<std::int32_t> padding_before{0, 0};
+ std::vector<std::int32_t> padding_after{0, 0};
+ DataFormat data_format{DataFormat::NHWC};
+ bool include_pad{true};
+};
+
+struct MaxPool2DOpAttributes
+{
+ MaxPool2DOpAttributes() = default;
+
+ std::vector<std::int32_t> window{1, 1};
+ std::vector<std::int32_t> strides{1, 1};
+ std::vector<std::int32_t> padding_before{0, 0};
+ std::vector<std::int32_t> padding_after{0, 0};
+ DataFormat data_format{DataFormat::NHWC};
+};
+
+struct Deconv2DOpAttributes
+{
+ Deconv2DOpAttributes() = default;
+
+ std::vector<std::int32_t> strides{1, 1};
+ std::vector<std::int32_t> padding_before{0, 0};
+ std::vector<std::int32_t> padding_after{0, 0};
+ DataFormat data_format{DataFormat::NHWC};
+ ops::PaddingType padding_type{ops::PaddingType::Explicit};
+};
+
+struct PadOpAttributes
+{
+ PadOpAttributes() : padding_value(0.0) {}
+ PadOpAttributes(unsigned dims) : padding_before(dims), padding_after(dims), padding_value(0.0) {}
+
+ std::vector<std::int32_t> padding_before;
+ std::vector<std::int32_t> padding_after;
+ float padding_value;
+};
+} // namespace mir
+
+#endif
diff --git a/compiler/mir/include/mir/Common.h b/compiler/mir/include/mir/Common.h
new file mode 100644
index 000000000..340622e99
--- /dev/null
+++ b/compiler/mir/include/mir/Common.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_COMMON_H_
+#define _MIR_COMMON_H_
+
+#include <cstddef>
+#include <cstdint>
+
+namespace mir
+{
+/**
+ * @brief maximum number of dimensions what an Index, Shape or Tensor can have
+ */
+constexpr std::size_t MAX_DIMENSION_COUNT = 8;
+
+inline constexpr std::size_t wrap_index(std::int32_t index, std::size_t limit) noexcept
+{
+ return static_cast<std::size_t>(index >= 0 ? index : limit + index);
+}
+} // namespace mir
+
+#endif //_MIR_COMMON_H_
diff --git a/compiler/mir/include/mir/DataFormat.h b/compiler/mir/include/mir/DataFormat.h
new file mode 100644
index 000000000..44edfa828
--- /dev/null
+++ b/compiler/mir/include/mir/DataFormat.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_DATA_FORMAT_H_
+#define _MIR_DATA_FORMAT_H_
+
+#include <cassert>
+#include <string>
+
+namespace mir
+{
+
+enum class DataFormat
+{
+ NCHW,
+ NHWC
+};
+
+inline int getDataBatchDimIndex(DataFormat data_format)
+{
+ switch (data_format)
+ {
+ case DataFormat::NCHW:
+ case DataFormat::NHWC:
+ return 0;
+ default:
+ assert(false);
+ return -1; // Dummy value to silence compiler warning.
+ }
+}
+
+inline int getDataChannelDimIndex(DataFormat data_format)
+{
+ switch (data_format)
+ {
+ case DataFormat::NCHW:
+ return 1;
+ case DataFormat::NHWC:
+ return 3;
+ default:
+ assert(false);
+ return -1; // Dummy value to silene compiler warning.
+ }
+}
+
+inline int getDataSpatialDimIndex(DataFormat data_format, int dim)
+{
+ assert(dim >= 0 && dim <= 1);
+ switch (data_format)
+ {
+ case DataFormat::NCHW:
+ return 2 + dim;
+ case DataFormat::NHWC:
+ return 1 + dim;
+ default:
+ assert(false);
+ return -1; // Dummy value to silence compiler warning.
+ }
+}
+
+inline std::string toString(DataFormat data_format)
+{
+ switch (data_format)
+ {
+ case DataFormat::NCHW:
+ return "NCHW";
+ case DataFormat::NHWC:
+ return "NHWC";
+ default:
+ assert(false);
+ return ""; // Dummy value to silence compiler warning.
+ }
+}
+
+} // namespace mir
+
+#endif //_MIR_DATA_FORMAT_H_
diff --git a/compiler/mir/include/mir/DataType.h b/compiler/mir/include/mir/DataType.h
new file mode 100644
index 000000000..4d99be3c1
--- /dev/null
+++ b/compiler/mir/include/mir/DataType.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_DATA_TYPE_H_
+#define _MIR_DATA_TYPE_H_
+
+#include <cassert>
+#include <cstdint>
+
+namespace mir
+{
+
+enum class DataType
+{
+ UNKNOWN,
+ FLOAT32,
+ FLOAT64,
+ INT32,
+ INT64,
+ UINT8
+};
+
+inline std::size_t getDataTypeSize(DataType type)
+{
+ switch (type)
+ {
+ case DataType::FLOAT32:
+ return sizeof(float);
+ case DataType::FLOAT64:
+ return sizeof(double);
+ case DataType::INT32:
+ return sizeof(int32_t);
+ case DataType::INT64:
+ return sizeof(int64_t);
+ case DataType::UINT8:
+ return sizeof(uint8_t);
+ default:
+ assert(false);
+ return 0;
+ }
+}
+
+} // namespace mir
+
+#endif //_MIR_DATA_TYPE_H_
diff --git a/compiler/mir/include/mir/ExternalRegion.h b/compiler/mir/include/mir/ExternalRegion.h
new file mode 100644
index 000000000..d9d6bfb56
--- /dev/null
+++ b/compiler/mir/include/mir/ExternalRegion.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_EXTERNAL_REGION_H_
+#define _MIR_EXTERNAL_REGION_H_
+
+#include "mir/Region.h"
+
+namespace mir
+{
+
+template <typename T> class ExternalRegion final : public Region<T>
+{
+public:
+ ExternalRegion(T *base, int32_t size) : _base{base}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ T *base() override { return _base; }
+ int32_t size() const override { return _size; }
+
+private:
+ T *const _base;
+ int32_t const _size;
+};
+
+} // namespace mir
+
+#endif //_MIR_EXTERNAL_REGION_H_
diff --git a/compiler/mir/include/mir/Graph.h b/compiler/mir/include/mir/Graph.h
new file mode 100644
index 000000000..bf94cfb14
--- /dev/null
+++ b/compiler/mir/include/mir/Graph.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_GRAPH_H_
+#define _MIR_GRAPH_H_
+
+#include <string>
+#include <vector>
+#include <type_traits>
+#include <unordered_set>
+#include <unordered_map>
+#include <set>
+
+#include "mir/Operation.h"
+#include "mir/ops/InputOp.h"
+#include "mir/ops/OutputOp.h"
+
+namespace mir
+{
+
+class Graph
+{
+public:
+ explicit Graph() = default;
+
+ virtual ~Graph();
+
+ template <typename T, typename... Args> Operation *create(Args &&... args)
+ {
+ auto op = new T(std::forward<Args>(args)...);
+ op->setId(_last_node_id++);
+ registerOp(op);
+ return op;
+ }
+
+ /**
+ * @brief Copies `old_op` with new inputs and registers it into graph.
+ */
+ Operation *copyOpWithInputs(Operation *old_op, const std::vector<Operation::Output *> &inputs)
+ {
+ assert(inputs.size() == old_op->getNumInputs());
+ auto op = old_op->copyWithInputs(inputs);
+ op->setId(_last_node_id++);
+ registerOp(op);
+ return op;
+ }
+
+ void accept(IVisitor *visitor);
+
+ /**
+ * @brief Returns all graph nodes
+ * @return vector containing all graph nodes
+ */
+ std::unordered_set<Operation *> getNodes() const { return _ops; }
+
+ /**
+ * @brief Returns all graph input nodes
+ * @returns vector containing all graph input nodes
+ */
+ std::vector<ops::InputOp *> getInputs() const { return _inputs; }
+
+ /**
+ * @brief Returns all graph output nodes
+ * @returns vector containing all graph output nodes
+ */
+ std::vector<ops::OutputOp *> getOutputs() const { return _outputs; }
+
+ /**
+ * @brief remove node from graph, along with its links in other nodes
+ * @param op node to be removed
+ */
+ void removeNode(Operation *op);
+
+ /**
+ * @brief Subsitude node in graph with another keeping all edges
+ * @param op Node to subsitude
+ * @param with Node to place instead
+ */
+ void replaceNode(Operation *op, Operation *with);
+
+private:
+ void registerOp(Operation *op);
+
+ std::unordered_set<Operation *> _ops;
+ size_t _last_node_id = 0;
+ // TODO Change these to unordered_sets.
+ std::vector<ops::InputOp *> _inputs;
+ std::vector<ops::OutputOp *> _outputs;
+};
+
+/**
+ * @brief Returns nodes of the graph sorted topologically.
+ */
+std::vector<Operation *> getSortedNodes(Graph *graph);
+
+} // namespace mir
+
+#endif //_MIR_GRAPH_H_
diff --git a/compiler/mir/include/mir/GraphPatternMatcher.h b/compiler/mir/include/mir/GraphPatternMatcher.h
new file mode 100644
index 000000000..9db5958d0
--- /dev/null
+++ b/compiler/mir/include/mir/GraphPatternMatcher.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_GRAPH_PATTERN_MATCHER_H_
+#define _MIR_GRAPH_PATTERN_MATCHER_H_
+
+#include "mir/Graph.h"
+
+namespace mir
+{
+
+class Operation;
+
+class GraphPatternMatcher
+{
+public:
+ using Predicate = bool(const Operation *);
+ explicit GraphPatternMatcher(Graph *g) : _g(g){};
+
+ /**
+ * @brief Match an edge with 2 predicates for ends of the edge
+ * @param pattern
+ * @return Vector of topmost ops of all matches; empty if no mathces are found
+ */
+ std::vector<std::pair<Operation *, Operation *>> matchEdge(Predicate p1, Predicate p2);
+
+ /**
+ * @brief Match a two level tree where the bottommost node has multiple previous nodes
+ * @param p1 Predicate for top node
+ * @param p2 Predicate for bottom node
+ * @return Vector of pairs : all matches; empty if no matches are found
+ */
+ std::vector<std::pair<std::vector<Operation *>, Operation *>> matchUpBush(Predicate p1,
+ Predicate p2);
+
+private:
+ Graph *_g;
+};
+
+} // namespace mir
+
+#endif //_MIR_GRAPH_PATTERN_MATCHER_H_
diff --git a/compiler/mir/include/mir/Index.h b/compiler/mir/include/mir/Index.h
new file mode 100644
index 000000000..83dfe34af
--- /dev/null
+++ b/compiler/mir/include/mir/Index.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_INDEX_H_
+#define _MIR_INDEX_H_
+
+#include <initializer_list>
+#include <cstdint>
+#include <ostream>
+
+#include "mir/Common.h"
+
+#include "adtidas/SmallVector.h"
+
+namespace mir
+{
+
+class Index
+{
+public:
+ Index() = default;
+
+ Index(std::initializer_list<int32_t> &&l) noexcept : _indices(std::move(l))
+ {
+ // DO NOTHING
+ }
+
+ explicit Index(size_t rank) : _indices(rank) {}
+
+ int32_t rank() const { return static_cast<int32_t>(_indices.size()); }
+
+ /**
+ * @brief resize index to given dimension number
+ * @param size new number of dimensions
+ * @return *this
+ * @warning if new size is greater than old, new dimensions are undefined
+ */
+ Index &resize(int32_t size);
+
+ /**
+ * @brief fill all axis with `index`
+ * @return `*this`
+ */
+ Index &fill(int32_t index);
+
+ /**
+ * @brief return position on given axis
+ * @param axis index of axis to get index on. If axis is negative returns axis from the last
+ * @return
+ */
+ int32_t &at(int32_t axis) { return _indices[wrap_index(axis, _indices.size())]; }
+
+ /**
+ * @brief return position on given axis
+ * @param axis index of axis to get index on. If axis is negative returns axis from the last
+ * @return
+ */
+ int32_t at(int32_t axis) const { return _indices[wrap_index(axis, _indices.size())]; }
+
+private:
+ adt::small_vector<int32_t, MAX_DIMENSION_COUNT> _indices;
+};
+
+std::ostream &operator<<(std::ostream &s, const Index &idx);
+
+} // namespace mir
+
+#endif //_MIR_INDEX_H_
diff --git a/compiler/mir/include/mir/IrDotDumper.h b/compiler/mir/include/mir/IrDotDumper.h
new file mode 100644
index 000000000..e6c295cb7
--- /dev/null
+++ b/compiler/mir/include/mir/IrDotDumper.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_IR_DOT_DUMPER_H_
+#define _MIR_IR_DOT_DUMPER_H_
+
+#include <ostream>
+
+namespace mir
+{
+
+class Graph;
+
+void dumpGraph(const Graph *graph, std::ostream &stream);
+
+} // namespace mir
+
+#endif //_MIR_IR_DOT_DUMPER_H_
diff --git a/compiler/mir/include/mir/OpDefs.h b/compiler/mir/include/mir/OpDefs.h
new file mode 100644
index 000000000..f7351ab26
--- /dev/null
+++ b/compiler/mir/include/mir/OpDefs.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPDEFS_H_
+#define _MIR_OPDEFS_H_
+
+#include "mir/ops/AbsOp.h"
+#include "mir/ops/AddOp.h"
+#include "mir/ops/AvgPool2DOp.h"
+#include "mir/ops/BroadcastOp.h"
+#include "mir/ops/CappedReluOp.h"
+#include "mir/ops/ConcatOp.h"
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/Conv2DOp.h"
+#include "mir/ops/Deconv2DOp.h"
+#include "mir/ops/DepthwiseConv2DOp.h"
+#include "mir/ops/DequantizeOp.h"
+#include "mir/ops/DivOp.h"
+#include "mir/ops/EluOp.h"
+#include "mir/ops/EqualOp.h"
+#include "mir/ops/FullyConnectedOp.h"
+#include "mir/ops/GatherOp.h"
+#include "mir/ops/GreaterOp.h"
+#include "mir/ops/HardSwishOp.h"
+#include "mir/ops/InputOp.h"
+#include "mir/ops/LeakyReluOp.h"
+#include "mir/ops/LessOp.h"
+#include "mir/ops/MaxOp.h"
+#include "mir/ops/MaxPool2DOp.h"
+#include "mir/ops/MulOp.h"
+#include "mir/ops/OutputOp.h"
+#include "mir/ops/PadOp.h"
+#include "mir/ops/QuantizeOp.h"
+#include "mir/ops/ReduceMeanOp.h"
+#include "mir/ops/ReluOp.h"
+#include "mir/ops/ReshapeOp.h"
+#include "mir/ops/ResizeOp.h"
+#include "mir/ops/SigmoidOp.h"
+#include "mir/ops/SliceOp.h"
+#include "mir/ops/SoftmaxOp.h"
+#include "mir/ops/SqrtOp.h"
+#include "mir/ops/SqueezeOp.h"
+#include "mir/ops/SubOp.h"
+#include "mir/ops/TanhOp.h"
+#include "mir/ops/TransposeOp.h"
+
+#endif // _MIR_OPDEFS_H_
diff --git a/compiler/mir/include/mir/Operation.h b/compiler/mir/include/mir/Operation.h
new file mode 100644
index 000000000..37af6d2a8
--- /dev/null
+++ b/compiler/mir/include/mir/Operation.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPERATION_H_
+#define _MIR_OPERATION_H_
+
+#include "mir/TensorType.h"
+
+#include <deque>
+#include <string>
+#include <limits>
+#include <vector>
+
+namespace mir
+{
+
+class IVisitor;
+
+class Operation
+{
+public:
+ enum class Type
+ {
+#define HANDLE_OP(OpType, OpClass) OpType,
+#include "mir/Operations.inc"
+#undef HANDLE_OP
+ };
+
+ /// @brief Represents a use of an operation output.
+ struct Use
+ {
+ Use(Operation *node, std::size_t index) : _node(node), _index(index) {}
+
+ Operation *getNode() const { return _node; }
+
+ std::size_t getIndex() const { return _index; }
+
+ bool operator==(const Use &other) { return _node == other._node && _index == other._index; }
+
+ private:
+ Operation *_node;
+ std::size_t _index;
+ };
+
+ /// @brief Represents an output of a node.
+ class Output
+ {
+ public:
+ Output(Operation *node, std::size_t index) : _node(node), _index(index) {}
+
+ ~Output() = default;
+
+ Output(const Output &) = delete;
+ Output(Output &&) = delete;
+ Output &operator=(const Output &) = delete;
+ Output &operator=(Output &&) = delete;
+
+ /// @brief Returns the node this is an output of.
+ Operation *getNode() { return _node; }
+ const Operation *getNode() const { return _node; }
+
+ /// @brief Returns the index of this output among all the outputs of the node.
+ std::size_t getIndex() const { return _index; }
+
+ /// @brief Returns the inputs that consume this output.
+ const std::vector<Use> &getUses() const { return _uses; }
+
+ /// @brief Adds the specified use to the uses of this output.
+ void addUse(Use use) { _uses.push_back(use); }
+
+ /// @brief Removes the specified use from the uses of this output.
+ void removeUse(Use use);
+
+ /// @brief Replace the defs of all uses of this output with the specified def.
+ void replaceAllUsesWith(Output *new_def);
+
+ /// @brief Gets the type of this output.
+ const TensorType &getType() const { return _type; }
+
+ /// @brief Sets the type of this output.
+ /// @warning Use with caution, because it can make the IR inconsistent.
+ void setType(const TensorType &type) { _type = type; }
+
+ // Convenient accessors.
+ DataType getElementType() const { return getType().getElementType(); }
+ const Shape &getShape() const { return getType().getShape(); }
+
+ // TODO Remove in favor of `setType`.
+ void setShape(const Shape &shape) { setType(TensorType(_type.getElementType(), shape)); }
+
+ const std::string &getName() const { return _name; }
+ void setName(const std::string &name) { _name = name; }
+
+ /// @brief Set AffineQuantization to Ouput
+ void setQuantization(const mir::AffineQuantization &quant)
+ {
+ setType(TensorType(getElementType(), getShape(), quant));
+ }
+
+ private:
+ Operation *_node;
+ std::size_t _index;
+ std::vector<Use> _uses;
+ TensorType _type;
+ std::string _name;
+ };
+
+ virtual ~Operation() = default;
+
+ Type getType() const { return _type; }
+
+ std::size_t getId() const { return _id; }
+ void setId(std::size_t id) { _id = id; }
+
+ std::size_t getNumInputs() const { return _inputs.size(); }
+ std::size_t getNumOutputs() const { return _outputs.size(); }
+
+ std::deque<Output *> &getInputs() { return _inputs; }
+ const std::deque<Output *> &getInputs() const { return _inputs; }
+
+ std::deque<Output> &getOutputs() { return _outputs; }
+ const std::deque<Output> &getOutputs() const { return _outputs; }
+
+ Output *getInput(std::size_t index)
+ {
+ assert(index < _inputs.size());
+ return _inputs[index];
+ }
+
+ const Output *getInput(std::size_t index) const
+ {
+ assert(index < _inputs.size());
+ return _inputs[index];
+ }
+
+ Output *getOutput(std::size_t index)
+ {
+ assert(index < _outputs.size());
+ return &_outputs[index];
+ }
+
+ const Output *getOutput(std::size_t index) const
+ {
+ assert(index < _outputs.size());
+ return &_outputs[index];
+ }
+
+ const Shape &getInputShape(std::size_t index) const { return getInput(index)->getShape(); }
+
+ const Shape &getOutputShape(std::size_t index) const { return getOutput(index)->getShape(); }
+
+ void accept(IVisitor *v);
+
+ virtual Operation *copyWithInputs(const std::vector<Output *> &inputs) = 0;
+
+protected:
+ Operation(Type type, const std::vector<Output *> &inputs, std::size_t num_outputs = 1);
+
+ void setOutputType(std::size_t index, const TensorType &type) { getOutput(index)->setType(type); }
+
+private:
+ Type _type;
+ std::size_t _id = std::numeric_limits<std::size_t>::max();
+ std::deque<Output *> _inputs;
+ std::deque<Output> _outputs;
+};
+
+/**
+ * @return the opcode of operation in string format, like "Add", "Conv2d", etc.
+ */
+const std::string &getTypeName(Operation::Type type);
+
+} // namespace mir
+
+#endif //_MIR_OPERATION_H_
diff --git a/compiler/mir/include/mir/Operations.inc b/compiler/mir/include/mir/Operations.inc
new file mode 100644
index 000000000..d5736cb9b
--- /dev/null
+++ b/compiler/mir/include/mir/Operations.inc
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HANDLE_OP
+#error "You should define HANDLE_OP before including this file"
+#endif // HANDLE_OP
+
+HANDLE_OP(abs, AbsOp)
+HANDLE_OP(add, AddOp)
+HANDLE_OP(avgPool2D, AvgPool2DOp)
+HANDLE_OP(broadcast, BroadcastOp)
+HANDLE_OP(cappedReLU, CappedReluOp)
+HANDLE_OP(concat, ConcatOp)
+HANDLE_OP(constant, ConstantOp)
+HANDLE_OP(conv2D, Conv2DOp)
+HANDLE_OP(deConv2D, DeConv2DOp)
+HANDLE_OP(depthwiseConv, DepthwiseConv2DOp)
+HANDLE_OP(dequantize, DequantizeOp)
+HANDLE_OP(div, DivOp)
+HANDLE_OP(ELU, EluOp)
+HANDLE_OP(equal, EqualOp)
+HANDLE_OP(fullyConnected, FullyConnectedOp)
+HANDLE_OP(gather, GatherOp)
+HANDLE_OP(greater, GreaterOp)
+HANDLE_OP(hardswish, HardSwishOp)
+HANDLE_OP(input, InputOp)
+HANDLE_OP(leakyReLU, LeakyReluOp)
+HANDLE_OP(less, LessOp)
+HANDLE_OP(max, MaxOp)
+HANDLE_OP(maxPool2D, MaxPool2DOp)
+HANDLE_OP(mul, MulOp)
+HANDLE_OP(output, OutputOp)
+HANDLE_OP(pad, PadOp)
+HANDLE_OP(quantize, QuantizeOp)
+HANDLE_OP(reduceMean, ReduceMeanOp)
+HANDLE_OP(ReLU, ReluOp)
+HANDLE_OP(reshape, ReshapeOp)
+HANDLE_OP(resizeIm, ResizeOp)
+HANDLE_OP(sigmoid, SigmoidOp)
+HANDLE_OP(slice, SliceOp)
+HANDLE_OP(softmax, SoftmaxOp)
+HANDLE_OP(sqrt, SqrtOp)
+HANDLE_OP(squeeze, SqueezeOp)
+HANDLE_OP(sub, SubOp)
+HANDLE_OP(tanh, TanhOp)
+HANDLE_OP(transpose, TransposeOp)
diff --git a/compiler/mir/include/mir/Quantization.h b/compiler/mir/include/mir/Quantization.h
new file mode 100644
index 000000000..d266ee00d
--- /dev/null
+++ b/compiler/mir/include/mir/Quantization.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_QUANTIZATION_H_
+#define _MIR_QUANTIZATION_H_
+
+namespace mir
+{
+
+class AffineQuantization
+{
+public:
+ AffineQuantization() = default;
+
+ AffineQuantization(float scale, int zero_point)
+ : _scale(scale), _zero_point(zero_point), _empty(false)
+ {
+ }
+
+ float getScale() const { return _scale; }
+
+ int getZeroPoint() const { return _zero_point; }
+
+ bool empty() const { return _empty; }
+
+private:
+ float _scale = 0.f;
+ int _zero_point = 0;
+ bool _empty = true;
+};
+
+} // namespace mir
+
+#endif //_MIR_QUANTIZATION_H_
diff --git a/compiler/mir/include/mir/Region.h b/compiler/mir/include/mir/Region.h
new file mode 100644
index 000000000..f56b4cae1
--- /dev/null
+++ b/compiler/mir/include/mir/Region.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_REGION_H_
+#define _MIR_REGION_H_
+
+#include <cstdint>
+#include "mir/Shape.h"
+
+namespace mir
+{
+
+template <typename T> struct Region
+{
+ virtual ~Region() = default;
+
+ virtual T *base() = 0;
+
+ virtual int32_t size() const = 0;
+};
+
+} // namespace mir
+
+#endif //_MIR_REGION_H_
diff --git a/compiler/mir/include/mir/Shape.h b/compiler/mir/include/mir/Shape.h
new file mode 100644
index 000000000..cb33e6784
--- /dev/null
+++ b/compiler/mir/include/mir/Shape.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_SHAPE_H_
+#define _MIR_SHAPE_H_
+
+#include <initializer_list>
+#include <vector>
+#include <cstdint>
+
+#include "adtidas/SmallVector.h"
+#include "mir/Common.h"
+
+namespace mir
+{
+
+class Shape
+{
+public:
+ static constexpr int32_t autoDim = -1;
+
+ Shape() = default;
+
+ explicit Shape(int32_t rank) : _dims(rank) {}
+
+ Shape(std::initializer_list<int32_t> &&dims) : _dims(std::move(dims)) {}
+
+ explicit Shape(const std::vector<int32_t> &dims) : _dims(std::begin(dims), std::end(dims)) {}
+
+ int32_t rank() const { return static_cast<int32_t>(_dims.size()); }
+
+ void resize(int32_t size);
+
+ int32_t &dim(int32_t axis) noexcept
+ {
+ auto dim = wrap_index(axis, _dims.size());
+ return _dims[dim];
+ };
+
+ int32_t dim(int32_t axis) const noexcept
+ {
+ auto dim = wrap_index(axis, _dims.size());
+ return _dims[dim];
+ }
+
+ int32_t numElements() const;
+
+ bool operator==(const Shape &rhs) const { return _dims == rhs._dims; }
+
+ bool operator!=(const Shape &rhs) const { return !(*this == rhs); }
+
+private:
+ adt::small_vector<int32_t, MAX_DIMENSION_COUNT> _dims;
+};
+
+Shape broadcastShapes(const Shape &lhs_shape, const Shape &rhs_shape);
+
+std::string toString(const Shape &shape);
+
+} // namespace mir
+
+#endif //_MIR_SHAPE_H_
diff --git a/compiler/mir/include/mir/ShapeRange.h b/compiler/mir/include/mir/ShapeRange.h
new file mode 100644
index 000000000..a450bf090
--- /dev/null
+++ b/compiler/mir/include/mir/ShapeRange.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_SHAPE_RANGE_H_
+#define _MIR_SHAPE_RANGE_H_
+
+#include <cassert>
+
+#include "mir/Shape.h"
+#include "mir/Index.h"
+
+namespace mir
+{
+
+class ShapeIter
+ : public std::iterator<std::forward_iterator_tag, Index, std::size_t, Index *, Index &>
+{
+public:
+ ShapeIter &operator++()
+ {
+ if (_shape.rank() > 0)
+ {
+ auto *pidx = &_index.at(0);
+ auto *pshape = &_shape.dim(0);
+ int32_t rank = _shape.rank();
+ int32_t c = rank - 1;
+ pidx[c]++;
+ while (pidx[c] >= pshape[c] && c > 0)
+ {
+ pidx[c] = 0;
+ pidx[--c]++;
+ }
+ }
+ _pos++;
+ return *this;
+ }
+
+ const ShapeIter operator++(int)
+ {
+ ShapeIter it = *this;
+ ++*this;
+ return it;
+ }
+
+ const Index &operator*() const { return _index; }
+
+ bool operator!=(const ShapeIter &iter) const
+ {
+ assert(iter._index.rank() == _index.rank());
+ assert(iter._shape == _shape);
+ return _pos != iter._pos;
+ }
+
+private:
+ explicit ShapeIter(Shape &shape, int32_t pos) : _pos(pos), _shape(shape)
+ {
+ _index.resize(shape.rank());
+ _index.fill(0);
+ }
+
+ int32_t _pos;
+ Index _index;
+ Shape &_shape;
+
+ friend class ShapeRange;
+};
+
+class ShapeRange
+{
+public:
+ explicit ShapeRange(const Shape &shape) : _shape(shape) {}
+
+ explicit ShapeRange(Shape &&shape) : _shape(std::move(shape)) {}
+
+ ShapeIter begin() { return ShapeIter(_shape, 0); }
+
+ ShapeIter end() { return ShapeIter(_shape, _shape.numElements()); }
+
+ bool contains(const Index &idx)
+ {
+ assert(idx.rank() == _shape.rank());
+ for (int32_t d = 0; d < idx.rank(); ++d)
+ {
+ if ((idx.at(d) >= _shape.dim(d)) || (idx.at(d) < 0))
+ return false;
+ }
+ return true;
+ }
+
+private:
+ Shape _shape;
+};
+
+} // namespace mir
+
+#endif //_MIR_SHAPE_RANGE_H_
diff --git a/compiler/mir/include/mir/Tensor.h b/compiler/mir/include/mir/Tensor.h
new file mode 100644
index 000000000..5e1f3f236
--- /dev/null
+++ b/compiler/mir/include/mir/Tensor.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_TENSOR_H_
+#define _MIR_TENSOR_H_
+
+#include "mir/ExternalRegion.h"
+#include "mir/TensorVariant.h"
+
+namespace mir
+{
+
+template <typename T> class Tensor final
+{
+public:
+ explicit Tensor(const TensorVariant &t) : _proxy(t) {}
+
+ T at(const Index &id) const { return *reinterpret_cast<T *>(this->_proxy.at(id)); }
+
+ T &at(const Index &id) { return *reinterpret_cast<T *>(this->_proxy.at(id)); }
+
+ T atOffset(int32_t offset) const { return *reinterpret_cast<T *>(this->_proxy.atOffset(offset)); }
+
+ T &atOffset(int32_t offset) { return *reinterpret_cast<T *>(this->_proxy.atOffset(offset)); }
+
+ ExternalRegion<T> getRegion(const Index &idx)
+ {
+ // Only last dimension is safe to process continiously
+ auto last_dim = getShape().rank() - 1;
+ auto base = reinterpret_cast<T *>(_proxy.at(idx));
+ auto length = getShape().dim(last_dim) - idx.at(last_dim);
+ return ExternalRegion<T>(base, length);
+ }
+
+ const Shape &getShape() const { return _proxy.getShape(); };
+
+private:
+ const TensorVariant &_proxy;
+};
+
+extern template class Tensor<float>;
+
+extern template class Tensor<double>;
+
+extern template class Tensor<int>;
+
+} // namespace mir
+
+#endif //_MIR_TENSOR_H_
diff --git a/compiler/mir/include/mir/TensorType.h b/compiler/mir/include/mir/TensorType.h
new file mode 100644
index 000000000..98797d687
--- /dev/null
+++ b/compiler/mir/include/mir/TensorType.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_TENSOR_TYPE_H_
+#define _MIR_TENSOR_TYPE_H_
+
+#include "mir/DataType.h"
+#include "mir/Quantization.h"
+#include "mir/Shape.h"
+
+namespace mir
+{
+
+class TensorType final
+{
+public:
+ TensorType() = default;
+
+ TensorType(DataType element_type, const Shape &shape) : _element_type(element_type), _shape(shape)
+ {
+ }
+
+ TensorType(DataType element_type, const Shape &shape, const AffineQuantization &quant)
+ : _element_type(element_type), _shape(shape), _quantization(quant)
+ {
+ }
+
+ DataType getElementType() const { return _element_type; }
+
+ const Shape &getShape() const { return _shape; }
+
+ const AffineQuantization &getQuantization() const { return _quantization; }
+ void setQuantization(const AffineQuantization &quant) { _quantization = quant; }
+ bool isQuantized() const { return !_quantization.empty(); }
+
+private:
+ DataType _element_type = DataType::UNKNOWN;
+ Shape _shape;
+ AffineQuantization _quantization;
+};
+
+} // namespace mir
+#endif // _MIR_TENSOR_TYPE_H_
diff --git a/compiler/mir/include/mir/TensorUtil.h b/compiler/mir/include/mir/TensorUtil.h
new file mode 100644
index 000000000..757f99f0b
--- /dev/null
+++ b/compiler/mir/include/mir/TensorUtil.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_TENSOR_UTIL_H_
+#define _MIR_TENSOR_UTIL_H_
+
+#include "mir/Index.h"
+#include "mir/ShapeRange.h"
+#include "mir/TensorVariant.h"
+
+#include <cstring>
+
+namespace mir
+{
+
+template <int32_t... Ints> Shape transposeShape(const Shape &shape)
+{
+ assert(sizeof...(Ints) == shape.rank());
+ return {shape.dim(Ints)...};
+}
+
+template <unsigned int... Ints> TensorVariant transposeTensor(const TensorVariant &tensor)
+{
+ const auto &shape = tensor.getShape();
+ Shape transposed_shape{shape.dim(Ints)...};
+
+ auto elem_type = tensor.getElementType();
+ auto elem_size = tensor.getElementSize();
+ TensorType transposed_type(elem_type, transposed_shape);
+ if (tensor.getType().isQuantized())
+ transposed_type.setQuantization(tensor.getType().getQuantization());
+
+ TensorVariant transposed_tensor(transposed_type);
+
+ for (const auto &index : ShapeRange(shape))
+ {
+ Index transposed_index{index.at(Ints)...};
+ std::memcpy(transposed_tensor.at(transposed_index), tensor.at(index), elem_size);
+ }
+
+ return transposed_tensor;
+}
+
+} // namespace mir
+
+#endif //_MIR_TENSOR_UTIL_H_
diff --git a/compiler/mir/include/mir/TensorVariant.h b/compiler/mir/include/mir/TensorVariant.h
new file mode 100644
index 000000000..921fd4468
--- /dev/null
+++ b/compiler/mir/include/mir/TensorVariant.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_TENSOR_VARIANT_H_
+#define _MIR_TENSOR_VARIANT_H_
+
+#include "mir/Common.h"
+#include "mir/Index.h"
+#include "mir/TensorType.h"
+
+#include <adtidas/SmallVector.h>
+
+#include <cassert>
+#include <memory>
+
+namespace mir
+{
+
+class TensorVariant
+{
+public:
+ explicit TensorVariant(const TensorType &type);
+
+ TensorVariant(const TensorType &type, const void *data);
+
+ // TODO Remove as deprecated.
+ TensorVariant(DataType element_type, const Shape &shape);
+
+ // TODO Remove as deprecated.
+ TensorVariant(DataType element_type, const Shape &shape, const void *data);
+
+ TensorVariant(const TensorVariant &t_old, const Shape &shape);
+
+ virtual ~TensorVariant() = default;
+
+ char *at(const Index &idx) const { return _data.get() + getOffset(idx) * _element_size; }
+
+ char *atOffset(int32_t offset) const
+ {
+ assert(offset >= 0 && offset < getShape().numElements());
+ return _data.get() + offset * _element_size;
+ }
+
+ size_t getOffset(const Index &idx) const
+ {
+ assert(idx.rank() == getShape().rank());
+ std::size_t offset = 0;
+ for (int i = 0; i < getShape().rank(); ++i)
+ offset += idx.at(i) * _strides[i];
+ return offset;
+ }
+
+ const TensorType &getType() const { return _type; }
+
+ DataType getElementType() const { return _type.getElementType(); }
+ const Shape &getShape() const { return _type.getShape(); }
+
+ // TODO Replace uses with `getElementType` and remove.
+ DataType getDataType() const { return _type.getElementType(); }
+ // FIXME This should not be a member of this class.
+ size_t getElementSize() const { return _element_size; }
+
+private:
+ TensorType _type;
+ std::shared_ptr<char> _data;
+ adt::small_vector<int_fast32_t, MAX_DIMENSION_COUNT> _strides;
+
+ size_t _element_size;
+};
+
+} // namespace mir
+
+#endif //_MIR_TENSOR_VARIANT_H_
diff --git a/compiler/mir/include/mir/Visitor.h b/compiler/mir/include/mir/Visitor.h
new file mode 100644
index 000000000..fed4c981f
--- /dev/null
+++ b/compiler/mir/include/mir/Visitor.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_VISITOR_H_
+#define _MIR_VISITOR_H_
+
+namespace mir
+{
+
+// Forward declare operations as we don't need anything but references
+namespace ops
+{
+#define HANDLE_OP(OpType, OpClass) class OpClass;
+#include "mir/Operations.inc"
+#undef HANDLE_OP
+} // namespace ops
+
+class Operation;
+
+/**
+ * @brief Interface for visitors
+ * Use in MIR component if you want to enforce to implement visits for all operations
+ */
+class IVisitor
+{
+public:
+#define HANDLE_OP(OpType, OpClass) virtual void visit(ops::OpClass &) = 0;
+#include "Operations.inc"
+#undef HANDLE_OP
+
+ virtual ~IVisitor() = default;
+};
+
+/**
+ * @brief Base visitor with empty fallback function
+ */
+class Visitor : public IVisitor
+{
+public:
+#define HANDLE_OP(OpType, OpClass) void visit(ops::OpClass &) override;
+#include "Operations.inc"
+#undef HANDLE_OP
+
+protected:
+ virtual void visit_fallback(Operation &) {}
+};
+
+} // namespace mir
+
+#endif //_MIR_VISITOR_H_
diff --git a/compiler/mir/include/mir/ops/AbsOp.h b/compiler/mir/include/mir/ops/AbsOp.h
new file mode 100644
index 000000000..d7dbd1622
--- /dev/null
+++ b/compiler/mir/include/mir/ops/AbsOp.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_ABS_OP_H_
+#define _MIR_OPS_ABS_OP_H_
+
+#include "mir/Operation.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class AbsOp : public Operation
+{
+public:
+ AbsOp(Output *arg) : Operation(Type::abs, {arg})
+ {
+ // Infer output shape.
+ setOutputType(0, {arg->getElementType(), arg->getShape()});
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new AbsOp(inputs[0]);
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_ABS_OP_H_
diff --git a/compiler/mir/include/mir/ops/AddOp.h b/compiler/mir/include/mir/ops/AddOp.h
new file mode 100644
index 000000000..962cd48c5
--- /dev/null
+++ b/compiler/mir/include/mir/ops/AddOp.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_ADD_OP_H_
+#define _MIR_OPS_ADD_OP_H_
+
+#include "mir/ops/BinaryElementwiseOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class AddOp : public BinaryElementwiseOp
+{
+public:
+ AddOp(Output *arg1, Output *arg2) : BinaryElementwiseOp(Type::add, arg1, arg2) {}
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new AddOp(inputs[0], inputs[1]);
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_ADD_OP_H_
diff --git a/compiler/mir/include/mir/ops/AvgPool2DOp.h b/compiler/mir/include/mir/ops/AvgPool2DOp.h
new file mode 100644
index 000000000..47fe058ee
--- /dev/null
+++ b/compiler/mir/include/mir/ops/AvgPool2DOp.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_AVG_POOL_OP_H_
+#define _MIR_OPS_AVG_POOL_OP_H_
+
+#include "mir/Operation.h"
+#include "mir/Attributes.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace mir
+{
+namespace ops
+{
+
+class AvgPool2DOp : public Operation
+{
+public:
+ AvgPool2DOp(Output *arg, const AvgPool2DOpAttributes &attributes)
+ : Operation(Type::avgPool2D, {arg}), _attributes(attributes)
+ {
+ inferOutputTypes();
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new AvgPool2DOp(inputs[0], _attributes);
+ };
+
+ const std::vector<std::int32_t> &getWindowSize() const { return _attributes.window; }
+
+ const std::vector<std::int32_t> &getStrides() const { return _attributes.strides; }
+
+ const std::vector<std::int32_t> &getPaddingBefore() const { return _attributes.padding_before; }
+
+ const std::vector<std::int32_t> &getPaddingAfter() const { return _attributes.padding_after; }
+
+ bool getIncludePad() const { return _attributes.include_pad; }
+
+ DataFormat getDataFormat() const { return _attributes.data_format; }
+
+ const AvgPool2DOpAttributes &getAttributes() const { return _attributes; }
+
+private:
+ void inferOutputTypes();
+
+ AvgPool2DOpAttributes _attributes;
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_AVG_POOL_OP_H_
diff --git a/compiler/mir/include/mir/ops/BinaryElementwiseOp.h b/compiler/mir/include/mir/ops/BinaryElementwiseOp.h
new file mode 100644
index 000000000..8f344185a
--- /dev/null
+++ b/compiler/mir/include/mir/ops/BinaryElementwiseOp.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_BINARY_ELEMENTWISE_OP_H_
+#define _MIR_OPS_BINARY_ELEMENTWISE_OP_H_
+
+#include "mir/Operation.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class BinaryElementwiseOp : public Operation
+{
+protected:
+ BinaryElementwiseOp(Type type, Output *lhs, Output *rhs) : Operation(type, {lhs, rhs})
+ {
+ inferOutputTypes();
+ }
+
+private:
+ void inferOutputTypes();
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_BINARY_ELEMENTWISE_OP_H_
diff --git a/compiler/mir/include/mir/ops/BroadcastOp.h b/compiler/mir/include/mir/ops/BroadcastOp.h
new file mode 100644
index 000000000..9d1cc221b
--- /dev/null
+++ b/compiler/mir/include/mir/ops/BroadcastOp.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_BROADCAST_OP_H_
+#define _MIR_OPS_BROADCAST_OP_H_
+
+#include "mir/Operation.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class BroadcastOp : public Operation
+{
+public:
+ BroadcastOp(Output *input, const Shape &target_shape) : Operation(Type::broadcast, {input})
+ {
+ inferOutputTypes(target_shape);
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new BroadcastOp(inputs[0], getOutputShape(0));
+ }
+
+private:
+ void inferOutputTypes(const Shape &target_shape);
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_BINARY_BROADCAST_OP_H_
diff --git a/compiler/mir/include/mir/ops/CappedReluOp.h b/compiler/mir/include/mir/ops/CappedReluOp.h
new file mode 100644
index 000000000..efd06c99c
--- /dev/null
+++ b/compiler/mir/include/mir/ops/CappedReluOp.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_CAPPED_RELU_H_
+#define _MIR_OPS_CAPPED_RELU_H_
+
+#include "mir/Operation.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class CappedReluOp : public Operation
+{
+public:
+ CappedReluOp(Output *arg, float cap) : Operation(Type::cappedReLU, {arg}), _cap(cap)
+ {
+ // Infer output shape.
+ setOutputType(0, {arg->getElementType(), arg->getShape()});
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new CappedReluOp(inputs[0], _cap);
+ }
+
+ float getCap() const { return _cap; }
+
+private:
+ float _cap;
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_CAPPED_RELU_H_
diff --git a/compiler/mir/include/mir/ops/ConcatOp.h b/compiler/mir/include/mir/ops/ConcatOp.h
new file mode 100644
index 000000000..4f46d4449
--- /dev/null
+++ b/compiler/mir/include/mir/ops/ConcatOp.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_CONCAT_OP_H_
+#define _MIR_OPS_CONCAT_OP_H_
+
+#include "mir/Operation.h"
+
+namespace mir
+{
+namespace ops
+{
+
+/**
+ * @brief Description of tensor concatenation operation.
+ */
+class ConcatOp : public Operation
+{
+public:
+ ConcatOp(const std::vector<Output *> &args, int32_t axis)
+ : Operation(Type::concat, args), _axis(axis)
+ {
+ inferOutputTypes();
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new ConcatOp(inputs, _axis);
+ };
+
+ int32_t getAxis() const
+ {
+ if (_axis < 0)
+ {
+ // Negative axis is used to index starting from the last element of the shape
+ // -1 means last element, -2 means second from end, like in python
+ int32_t res = _axis + getInputShape(0).rank();
+ assert(res >= 0);
+ return res;
+ }
+ return _axis;
+ }
+
+private:
+ void inferOutputTypes();
+
+ /// @brief The axis along which to concatenate, may be negative to index from the end
+ int32_t _axis;
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_CONCAT_OP_H_
diff --git a/compiler/mir/include/mir/ops/ConstantOp.h b/compiler/mir/include/mir/ops/ConstantOp.h
new file mode 100644
index 000000000..ab2a592d9
--- /dev/null
+++ b/compiler/mir/include/mir/ops/ConstantOp.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_CONSTANT_OP_H_
+#define _MIR_OPS_CONSTANT_OP_H_
+
+#include "mir/Operation.h"
+#include "mir/TensorVariant.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class ConstantOp : public Operation
+{
+public:
+ explicit ConstantOp(const TensorVariant &value) : Operation(Type::constant, {}), _value(value)
+ {
+ setOutputType(0, _value.getType());
+ }
+
+ const TensorVariant &getValue() const { return _value; }
+
+ Operation *copyWithInputs(const std::vector<mir::Operation::Output *> &input) override
+ {
+ assert(false && "Copying constants is not allowed!");
+ (void)input;
+ return nullptr;
+ }
+
+private:
+ TensorVariant _value;
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_CONSTANT_OP_H_
diff --git a/compiler/mir/include/mir/ops/Conv2DOp.h b/compiler/mir/include/mir/ops/Conv2DOp.h
new file mode 100644
index 000000000..ec818dae5
--- /dev/null
+++ b/compiler/mir/include/mir/ops/Conv2DOp.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_CONV_2D_OP_H_
+#define _MIR_OPS_CONV_2D_OP_H_
+
+#include "mir/Operation.h"
+#include "mir/Attributes.h"
+#include <vector>
+
+namespace mir
+{
+namespace ops
+{
+
+class Conv2DOp : public Operation
+{
+public:
+ Conv2DOp(Output *input, Output *kernel, const Conv2DOpAttributes &attributes)
+ : Operation(Type::conv2D, {input, kernel}), _attributes(attributes)
+ {
+ inferOutputTypes();
+ }
+
+ Conv2DOp(Output *input, Output *kernel, Output *bias, const Conv2DOpAttributes &attributes)
+ : Operation(Type::conv2D, {input, kernel, bias}), _attributes(attributes)
+ {
+ inferOutputTypes();
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ if (inputs.size() == 2)
+ return new Conv2DOp(inputs[0], inputs[1], _attributes);
+ else
+ return new Conv2DOp(inputs[0], inputs[1], inputs[2], _attributes);
+ };
+
+ const std::vector<std::int32_t> &getStrides() const { return _attributes.strides; }
+
+ const std::vector<std::int32_t> &getPaddingBefore() const { return _attributes.padding_before; }
+
+ const std::vector<std::int32_t> &getPaddingAfter() const { return _attributes.padding_after; }
+
+ std::int32_t getNumGroups() const { return _attributes.num_groups; }
+
+ DataFormat getDataFormat() const { return _attributes.data_format; }
+
+ const Conv2DOpAttributes &getAttributes() const { return _attributes; }
+
+private:
+ void inferOutputTypes();
+
+ Conv2DOpAttributes _attributes;
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_CONV_2D_OP_H_
diff --git a/compiler/mir/include/mir/ops/Deconv2DOp.h b/compiler/mir/include/mir/ops/Deconv2DOp.h
new file mode 100644
index 000000000..a7b548028
--- /dev/null
+++ b/compiler/mir/include/mir/ops/Deconv2DOp.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_DECONV_2D_OP_H_
+#define _MIR_OPS_DECONV_2D_OP_H_
+
+#include "mir/Operation.h"
+#include "mir/Attributes.h"
+#include "mir/ops/PaddingType.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace mir
+{
+namespace ops
+{
+
+class DeConv2DOp : public Operation
+{
+public:
+ DeConv2DOp(Output *input, Output *kernel, const Deconv2DOpAttributes &attributes)
+ : Operation(Type::deConv2D, {input, kernel}), _attributes(attributes)
+ {
+ inferOutputTypes();
+ }
+
+ DeConv2DOp(Output *input, Output *kernel, const Deconv2DOpAttributes &attributes,
+ const Shape &output_shape)
+ : Operation(Type::deConv2D, {input, kernel}), _attributes(attributes)
+ {
+ assert(input->getElementType() == kernel->getElementType());
+ setOutputType(0, {input->getElementType(), output_shape});
+ inferPaddings();
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ if (getPaddingType() == PaddingType::Explicit)
+ return new DeConv2DOp(inputs[0], inputs[1], _attributes);
+ else
+ return new DeConv2DOp(inputs[0], inputs[1], _attributes, getOutputShape(0));
+ }
+
+ const std::vector<std::int32_t> &getStrides() const { return _attributes.strides; }
+
+ PaddingType getPaddingType() const { return _attributes.padding_type; }
+
+ const std::vector<std::int32_t> &getPaddingBefore() const { return _attributes.padding_before; }
+
+ const std::vector<std::int32_t> &getPaddingAfter() const { return _attributes.padding_after; }
+
+ DataFormat getDataFormat() const { return _attributes.data_format; }
+
+ const Deconv2DOpAttributes &getAttributes() const { return _attributes; }
+
+private:
+ void inferOutputTypes();
+
+ /**
+ * @brief Compute paddings based on input shape, kernel shape and strides
+ */
+ void inferPaddings();
+
+ Deconv2DOpAttributes _attributes;
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_DECONV_2D_OP_H_
diff --git a/compiler/mir/include/mir/ops/DepthwiseConv2DOp.h b/compiler/mir/include/mir/ops/DepthwiseConv2DOp.h
new file mode 100644
index 000000000..347b8e94f
--- /dev/null
+++ b/compiler/mir/include/mir/ops/DepthwiseConv2DOp.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_DEPTHWISE_CONV_2D_OP_H_
+#define _MIR_OPS_DEPTHWISE_CONV_2D_OP_H_
+
+#include "mir/Operation.h"
+#include "mir/Attributes.h"
+#include <vector>
+
+namespace mir
+{
+namespace ops
+{
+
+class DepthwiseConv2DOp : public Operation
+{
+public:
+ DepthwiseConv2DOp(Output *input, Output *kernel, const Conv2DOpAttributes &attributes)
+ : Operation(Type::depthwiseConv, {input, kernel}), _attributes(attributes)
+ {
+ inferOutputTypes();
+ }
+
+ DepthwiseConv2DOp(Output *input, Output *kernel, Output *bias,
+ const Conv2DOpAttributes &attributes)
+ : Operation(Type::depthwiseConv, {input, kernel, bias}), _attributes(attributes)
+ {
+ inferOutputTypes();
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ if (inputs.size() == 2)
+ return new DepthwiseConv2DOp(inputs[0], inputs[1], _attributes);
+ else
+ return new DepthwiseConv2DOp(inputs[0], inputs[1], inputs[2], _attributes);
+ }
+
+ const std::vector<std::int32_t> &getStrides() const { return _attributes.strides; }
+
+ const std::vector<std::int32_t> &getPaddingBefore() const { return _attributes.padding_before; }
+
+ const std::vector<std::int32_t> &getPaddingAfter() const { return _attributes.padding_after; }
+
+ DataFormat getDataFormat() const { return _attributes.data_format; }
+
+ const Conv2DOpAttributes &getAttributes() const { return _attributes; }
+
+private:
+ void inferOutputTypes();
+
+ mir::Conv2DOpAttributes _attributes;
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_DEPTHWISE_CONV_2D_OP_H_
diff --git a/compiler/mir/include/mir/ops/DequantizeOp.h b/compiler/mir/include/mir/ops/DequantizeOp.h
new file mode 100644
index 000000000..0b412235e
--- /dev/null
+++ b/compiler/mir/include/mir/ops/DequantizeOp.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_DEQUANTIZE_OP_H_
+#define _MIR_OPS_DEQUANTIZE_OP_H_
+
+#include "mir/Operation.h"
+#include "mir/DataFormat.h"
+#include <vector>
+
+namespace mir
+{
+namespace ops
+{
+
+class DequantizeOp : public Operation
+{
+public:
+ explicit DequantizeOp(Output *input) : Operation(Type::dequantize, {input})
+ {
+ setOutputType(0, {input->getElementType(), input->getShape()});
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new DequantizeOp(inputs[0]);
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_DEQUANTIZE_OP_H_
diff --git a/compiler/mir/include/mir/ops/DivOp.h b/compiler/mir/include/mir/ops/DivOp.h
new file mode 100644
index 000000000..349e75b9a
--- /dev/null
+++ b/compiler/mir/include/mir/ops/DivOp.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_DIV_OP_H_
+#define _MIR_OPS_DIV_OP_H_
+
+#include "mir/ops/BinaryElementwiseOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class DivOp : public BinaryElementwiseOp
+{
+public:
+ DivOp(Output *arg1, Output *arg2) : BinaryElementwiseOp(Type::div, arg1, arg2) {}
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new DivOp(inputs[0], inputs[1]);
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_DIV_OP_H_
diff --git a/compiler/mir/include/mir/ops/EluOp.h b/compiler/mir/include/mir/ops/EluOp.h
new file mode 100644
index 000000000..c13b40251
--- /dev/null
+++ b/compiler/mir/include/mir/ops/EluOp.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_ELU_OP_H_
+#define _MIR_OPS_ELU_OP_H_
+
+#include "mir/Operation.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class EluOp : public Operation
+{
+public:
+ EluOp(Output *arg, float alpha) : Operation(Type::ELU, {arg}), _alpha(alpha)
+ {
+ setOutputType(0, {arg->getElementType(), arg->getShape()});
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new EluOp(inputs[0], _alpha);
+ }
+
+ float getAlpha() const { return _alpha; }
+
+private:
+ float _alpha = 1.0;
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_ELU_OP_H_
diff --git a/compiler/mir/include/mir/ops/EqualOp.h b/compiler/mir/include/mir/ops/EqualOp.h
new file mode 100644
index 000000000..964c2e809
--- /dev/null
+++ b/compiler/mir/include/mir/ops/EqualOp.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_EQUAL_OP_H_
+#define _MIR_OPS_EQUAL_OP_H_
+
+#include "mir/ops/BinaryElementwiseOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class EqualOp : public BinaryElementwiseOp
+{
+public:
+ EqualOp(Output *arg1, Output *arg2) : BinaryElementwiseOp(Type::equal, arg1, arg2)
+ {
+ setOutputType(0, {DataType::UINT8, getInputShape(0)});
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new EqualOp(inputs[0], inputs[1]);
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_EQUAL_OP_H_
diff --git a/compiler/mir/include/mir/ops/FullyConnectedOp.h b/compiler/mir/include/mir/ops/FullyConnectedOp.h
new file mode 100644
index 000000000..589c42df9
--- /dev/null
+++ b/compiler/mir/include/mir/ops/FullyConnectedOp.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_FULLY_CONNECTED_OP_H_
+#define _MIR_OPS_FULLY_CONNECTED_OP_H_
+
+#include "mir/Operation.h"
+#include "mir/TensorVariant.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class FullyConnectedOp : public Operation
+{
+public:
+ FullyConnectedOp(Output *input, Output *weights)
+ : Operation(Type::fullyConnected, {input, weights})
+ {
+ inferOutputTypes();
+ }
+
+ FullyConnectedOp(Output *input, Output *weights, Output *bias)
+ : Operation(Type::fullyConnected, {input, weights, bias})
+ {
+ inferOutputTypes();
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ if (inputs.size() == 2)
+ return new FullyConnectedOp(inputs[0], inputs[1]);
+ else
+ return new FullyConnectedOp(inputs[0], inputs[1], inputs[2]);
+ }
+
+private:
+ void inferOutputTypes();
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_FULLY_CONNECTED_OP_H_
diff --git a/compiler/mir/include/mir/ops/GatherOp.h b/compiler/mir/include/mir/ops/GatherOp.h
new file mode 100644
index 000000000..899c9f169
--- /dev/null
+++ b/compiler/mir/include/mir/ops/GatherOp.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_GATHER_OP_H_
+#define _MIR_OPS_GATHER_OP_H_
+
+#include "mir/Operation.h"
+
+namespace mir
+{
+namespace ops
+{
+
+/**
+ * @brief Gather operation as defined by ONNX spec.
+ * https://github.com/onnx/onnx/blob/master/docs/Operators.md#Gather
+ * https://www.tensorflow.org/api_docs/python/tf/gather
+ */
+class GatherOp : public Operation
+{
+public:
+ GatherOp(Output *data, Output *indices, int32_t axis)
+ : Operation(Type::gather, {data, indices}), _axis(axis)
+ {
+ inferOutputTypes();
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new GatherOp(inputs[0], inputs[1], _axis);
+ }
+
+ int32_t getAxis() const { return _axis; }
+
+private:
+ void inferOutputTypes();
+
+ int32_t _axis;
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_GATHER_OP_H_
diff --git a/compiler/mir/include/mir/ops/GreaterOp.h b/compiler/mir/include/mir/ops/GreaterOp.h
new file mode 100644
index 000000000..35ede7757
--- /dev/null
+++ b/compiler/mir/include/mir/ops/GreaterOp.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_GREATER_OP_H_
+#define _MIR_OPS_GREATER_OP_H_
+
+#include "mir/ops/BinaryElementwiseOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class GreaterOp : public BinaryElementwiseOp
+{
+public:
+ GreaterOp(Output *arg1, Output *arg2) : BinaryElementwiseOp(Type::greater, arg1, arg2)
+ {
+ setOutputType(0, {DataType::UINT8, getInputShape(0)});
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new GreaterOp(inputs[0], inputs[1]);
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_GREATER_OP_H_
diff --git a/compiler/mir/include/mir/ops/HardSwishOp.h b/compiler/mir/include/mir/ops/HardSwishOp.h
new file mode 100644
index 000000000..48a18bf58
--- /dev/null
+++ b/compiler/mir/include/mir/ops/HardSwishOp.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_HARD_SWISH_H_
+#define _MIR_OPS_HARD_SWISH_H_
+
+#include "mir/Operation.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class HardSwishOp : public Operation
+{
+public:
+ HardSwishOp(Output *arg) : Operation(Type::hardswish, {arg})
+ {
+ setOutputType(0, {arg->getElementType(), arg->getShape()});
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new HardSwishOp(inputs[0]);
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_HARD_SWISH_H_
diff --git a/compiler/mir/include/mir/ops/InputOp.h b/compiler/mir/include/mir/ops/InputOp.h
new file mode 100644
index 000000000..ed576aeb8
--- /dev/null
+++ b/compiler/mir/include/mir/ops/InputOp.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_INPUT_OP_H_
+#define _MIR_OPS_INPUT_OP_H_
+
+#include "mir/Operation.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class InputOp : public Operation
+{
+public:
+ // @brief Deprecated [use TensorType for creation of Input]
+ explicit InputOp(const Shape &shape) : Operation(Type::input, {})
+ {
+ setOutputType(0, {mir::DataType::UNKNOWN, shape});
+ }
+
+ explicit InputOp(const TensorType &type) : Operation(Type::input, {}) { setOutputType(0, type); }
+
+ Operation *copyWithInputs(const std::vector<Output *> &input) override
+ {
+ assert(false && "copying graph input is not allowed");
+ (void)input;
+ return nullptr;
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_INPUT_OP_H_
diff --git a/compiler/mir/include/mir/ops/LeakyReluOp.h b/compiler/mir/include/mir/ops/LeakyReluOp.h
new file mode 100644
index 000000000..5294778ac
--- /dev/null
+++ b/compiler/mir/include/mir/ops/LeakyReluOp.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_LEAKY_RELU_OP_H_
+#define _MIR_OPS_LEAKY_RELU_OP_H_
+
+#include "mir/Operation.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class LeakyReluOp : public Operation
+{
+public:
+ explicit LeakyReluOp(Output *arg, float alpha) : Operation(Type::leakyReLU, {arg}), _alpha(alpha)
+ {
+ // Infer output shape.
+ setOutputType(0, {arg->getElementType(), arg->getShape()});
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new LeakyReluOp(inputs[0], _alpha);
+ }
+
+ float getAlpha() const { return _alpha; }
+
+private:
+ float _alpha;
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_LEAKY_RELU_OP_H_
diff --git a/compiler/mir/include/mir/ops/LessOp.h b/compiler/mir/include/mir/ops/LessOp.h
new file mode 100644
index 000000000..7e5fb2666
--- /dev/null
+++ b/compiler/mir/include/mir/ops/LessOp.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_LESS_OP_H_
+#define _MIR_OPS_LESS_OP_H_
+
+#include "mir/ops/BinaryElementwiseOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class LessOp : public BinaryElementwiseOp
+{
+public:
+ LessOp(Output *arg1, Output *arg2) : BinaryElementwiseOp(Type::less, arg1, arg2)
+ {
+ setOutputType(0, TensorType(DataType::UINT8, getInputShape(0)));
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new LessOp(inputs[0], inputs[1]);
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_LESS_OP_H_
diff --git a/compiler/mir/include/mir/ops/MaxOp.h b/compiler/mir/include/mir/ops/MaxOp.h
new file mode 100644
index 000000000..ca2d91abb
--- /dev/null
+++ b/compiler/mir/include/mir/ops/MaxOp.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_MAX_OP_H_
+#define _MIR_OPS_MAX_OP_H_
+
+#include "mir/ops/BinaryElementwiseOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class MaxOp : public BinaryElementwiseOp
+{
+public:
+ MaxOp(Output *arg1, Output *arg2) : BinaryElementwiseOp(Type::max, arg1, arg2) {}
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new MaxOp(inputs[0], inputs[1]);
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_MAX_OP_H_
diff --git a/compiler/mir/include/mir/ops/MaxPool2DOp.h b/compiler/mir/include/mir/ops/MaxPool2DOp.h
new file mode 100644
index 000000000..7c5df4a53
--- /dev/null
+++ b/compiler/mir/include/mir/ops/MaxPool2DOp.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_MAX_POOL_OP_H_
+#define _MIR_OPS_MAX_POOL_OP_H_
+
+#include "mir/Operation.h"
+#include "mir/Attributes.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace mir
+{
+namespace ops
+{
+
+class MaxPool2DOp : public Operation
+{
+public:
+ MaxPool2DOp(Output *arg, const MaxPool2DOpAttributes &attributes)
+ : Operation(Type::maxPool2D, {arg}), _attributes(attributes)
+ {
+ inferOutputTypes();
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new MaxPool2DOp(inputs[0], _attributes);
+ };
+
+ const std::vector<std::int32_t> &getWindowSize() const { return _attributes.window; }
+
+ const std::vector<std::int32_t> &getStrides() const { return _attributes.strides; }
+
+ const std::vector<std::int32_t> &getPaddingBefore() const { return _attributes.padding_before; }
+
+ const std::vector<std::int32_t> &getPaddingAfter() const { return _attributes.padding_after; }
+
+ DataFormat getDataFormat() const { return _attributes.data_format; }
+
+ const MaxPool2DOpAttributes &getAttributes() const { return _attributes; }
+
+private:
+ void inferOutputTypes();
+
+ MaxPool2DOpAttributes _attributes;
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_MAX_POOL_OP_H_
diff --git a/compiler/mir/include/mir/ops/MulOp.h b/compiler/mir/include/mir/ops/MulOp.h
new file mode 100644
index 000000000..c76e307de
--- /dev/null
+++ b/compiler/mir/include/mir/ops/MulOp.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_MUL_OP_H_
+#define _MIR_OPS_MUL_OP_H_
+
+#include "mir/ops/BinaryElementwiseOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class MulOp : public BinaryElementwiseOp
+{
+public:
+ MulOp(Output *arg1, Output *arg2) : BinaryElementwiseOp(Type::mul, arg1, arg2) {}
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new MulOp(inputs[0], inputs[1]);
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_MUL_OP_H_
diff --git a/compiler/mir/include/mir/ops/OutputOp.h b/compiler/mir/include/mir/ops/OutputOp.h
new file mode 100644
index 000000000..4c44af8a5
--- /dev/null
+++ b/compiler/mir/include/mir/ops/OutputOp.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_OUTPUT_OP_H_
+#define _MIR_OPS_OUTPUT_OP_H_
+
+#include "mir/Operation.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class OutputOp : public Operation
+{
+public:
+ explicit OutputOp(Output *input) : Operation(Type::output, {input}, 0) {}
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new OutputOp(inputs[0]);
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_OUTPUT_OP_H_
diff --git a/compiler/mir/include/mir/ops/PadOp.h b/compiler/mir/include/mir/ops/PadOp.h
new file mode 100644
index 000000000..76453acec
--- /dev/null
+++ b/compiler/mir/include/mir/ops/PadOp.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_PAD_OP_H_
+#define _MIR_OPS_PAD_OP_H_
+
+#include "mir/Operation.h"
+#include "mir/Attributes.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class PadOp : public Operation
+{
+public:
+ PadOp(Output *arg, const PadOpAttributes &attributes)
+ : Operation(Type::pad, {arg}), _attributes(attributes)
+ {
+ assert(_attributes.padding_before.size() == _attributes.padding_after.size());
+ inferOutputTypes();
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new PadOp(inputs[0], _attributes);
+ }
+
+ const std::vector<std::int32_t> &getPaddingBefore() const { return _attributes.padding_before; }
+
+ const std::vector<std::int32_t> &getPaddingAfter() const { return _attributes.padding_after; }
+
+ float getPaddingValue() const { return _attributes.padding_value; }
+
+private:
+ void inferOutputTypes();
+
+ PadOpAttributes _attributes;
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_PAD_OP_H_
diff --git a/compiler/mir/include/mir/ops/PaddingType.h b/compiler/mir/include/mir/ops/PaddingType.h
new file mode 100644
index 000000000..836c8dcc8
--- /dev/null
+++ b/compiler/mir/include/mir/ops/PaddingType.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_CORE_IR_MODEL_COMMON_PROPS_H_
+#define _NNC_CORE_IR_MODEL_COMMON_PROPS_H_
+
+namespace mir
+{
+namespace ops
+{
+
+// Follows ONNX convention.
+enum class PaddingType
+{
+ Explicit,
+ Valid,
+ SameLower,
+ SameUpper
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_NNC_CORE_IR_MODEL_COMMON_PROPS_H_
diff --git a/compiler/mir/include/mir/ops/QuantizeOp.h b/compiler/mir/include/mir/ops/QuantizeOp.h
new file mode 100644
index 000000000..7e9216e3e
--- /dev/null
+++ b/compiler/mir/include/mir/ops/QuantizeOp.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_QUANTIZE_OP_H_
+#define _MIR_OPS_QUANTIZE_OP_H_
+
+#include "mir/Operation.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class QuantizeOp : public Operation
+{
+public:
+ explicit QuantizeOp(Output *input) : Operation(Type::quantize, {input})
+ {
+ setOutputType(0, {input->getElementType(), input->getShape()});
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new QuantizeOp(inputs[0]);
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_QUANTIZE_OP_H_
diff --git a/compiler/mir/include/mir/ops/ReduceMeanOp.h b/compiler/mir/include/mir/ops/ReduceMeanOp.h
new file mode 100644
index 000000000..add47ac75
--- /dev/null
+++ b/compiler/mir/include/mir/ops/ReduceMeanOp.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_REDUCE_MEAN_OP_H_
+#define _MIR_OPS_REDUCE_MEAN_OP_H_
+
+#include "mir/ops/ReduceOp.h"
+#include <vector>
+
+namespace mir
+{
+namespace ops
+{
+
+class ReduceMeanOp : public ReduceOp
+{
+public:
+ ReduceMeanOp(Output *arg, const std::vector<int> &reduction_dims, bool keep_dims)
+ : ReduceOp(Type::reduceMean, arg, reduction_dims, keep_dims)
+ {
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new ReduceMeanOp(inputs[0], getReductionDims(), getKeepDims());
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_REDUCE_MEAN_OP_H_
diff --git a/compiler/mir/include/mir/ops/ReduceOp.h b/compiler/mir/include/mir/ops/ReduceOp.h
new file mode 100644
index 000000000..0f46a4596
--- /dev/null
+++ b/compiler/mir/include/mir/ops/ReduceOp.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_REDUCE_OP_H_
+#define _MIR_OPS_REDUCE_OP_H_
+
+#include "mir/Operation.h"
+#include <vector>
+
+namespace mir
+{
+namespace ops
+{
+
+class ReduceOp : public Operation
+{
+protected:
+ ReduceOp(Type type, Output *arg, const std::vector<int> &reduction_dims, bool keep_dims)
+ : Operation(type, {arg}), _reduction_dims(reduction_dims), _keep_dims(keep_dims)
+ {
+ inferOutputTypes();
+ }
+
+public:
+ const std::vector<int> &getReductionDims() const { return _reduction_dims; };
+
+ bool getKeepDims() const { return _keep_dims; };
+
+private:
+ void inferOutputTypes();
+
+ std::vector<int> _reduction_dims;
+ bool _keep_dims;
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_REDUCE_OP_H_
diff --git a/compiler/mir/include/mir/ops/ReluOp.h b/compiler/mir/include/mir/ops/ReluOp.h
new file mode 100644
index 000000000..fd1cc3c85
--- /dev/null
+++ b/compiler/mir/include/mir/ops/ReluOp.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_RELU_OP_H_
+#define _MIR_OPS_RELU_OP_H_
+
+#include "mir/Operation.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class ReluOp : public Operation
+{
+public:
+ explicit ReluOp(Output *arg) : Operation(Type::ReLU, {arg})
+ {
+ // Infer output shape.
+ setOutputType(0, {arg->getElementType(), arg->getShape()});
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &arg) override
+ {
+ return new ReluOp(arg[0]);
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_RELU_OP_H_
diff --git a/compiler/mir/include/mir/ops/ReshapeOp.h b/compiler/mir/include/mir/ops/ReshapeOp.h
new file mode 100644
index 000000000..7271cff65
--- /dev/null
+++ b/compiler/mir/include/mir/ops/ReshapeOp.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_RESHAPE_OP_H_
+#define _MIR_OPS_RESHAPE_OP_H_
+
+#include "mir/Operation.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class ReshapeOp : public Operation
+{
+public:
+ ReshapeOp(Output *arg, const Shape &shape) : Operation(Type::reshape, {arg})
+ {
+ const Shape &input_shape = getInputShape(0);
+ auto output_shape = shape;
+
+ auto in_elements_num = input_shape.numElements();
+ int32_t out_elements_num = 1;
+ // Can't use num_elements due to -1 in input shape and Shape using unsigned ints for dimensions.
+ for (int32_t d = 0; d < output_shape.rank(); ++d)
+ {
+ auto dim = output_shape.dim(d);
+ if (dim != Shape::autoDim)
+ out_elements_num *= dim;
+ }
+
+ for (int32_t d = 0; d < output_shape.rank(); ++d)
+ {
+ auto &dim = output_shape.dim(d);
+ if (dim == Shape::autoDim)
+ dim = static_cast<int32_t>(in_elements_num / out_elements_num);
+ }
+
+ setOutputType(0, {arg->getElementType(), output_shape});
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new ReshapeOp(inputs[0], getOutputShape(0));
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_RESHAPE_OP_H_
diff --git a/compiler/mir/include/mir/ops/ResizeOp.h b/compiler/mir/include/mir/ops/ResizeOp.h
new file mode 100644
index 000000000..51e1b0b76
--- /dev/null
+++ b/compiler/mir/include/mir/ops/ResizeOp.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_RESIZE_OP_H_
+#define _MIR_OPS_RESIZE_OP_H_
+
+#include "mir/Operation.h"
+#include "mir/Shape.h"
+#include <vector>
+#include <cmath>
+
+namespace mir
+{
+namespace ops
+{
+
+/**@brief Resize operation
+ * scales are such that output = input * scale for each dimension
+ * and the number of dimensions matches
+ */
+class ResizeOp : public Operation
+{
+public:
+ enum class ResizeMethod
+ {
+ nearestNeighbor, // TODO: BICUBIC and BILINEAR
+ };
+
+ ResizeOp(Output *arg, ResizeMethod mode, const std::vector<float> &scales)
+ : Operation(Type::resizeIm, {arg}), _mode(mode), _scales(scales)
+ {
+ // Infer output shape based on given scales.
+ auto &input_shape = getInputShape(0);
+ assert(input_shape.rank() == 4 && _scales.size() == 4);
+ Shape output_shape(input_shape.rank());
+
+ for (int32_t i = 0; i < input_shape.rank(); ++i)
+ {
+ output_shape.dim(i) = static_cast<int32_t>(lroundf(_scales.at(i) * input_shape.dim(i)));
+ }
+
+ setOutputType(0, {getInput(0)->getElementType(), output_shape});
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new ResizeOp(inputs[0], _mode, getOutputShape(0));
+ }
+
+ ResizeOp(Output *arg, ResizeMethod mode, const Shape &output_shape)
+ : Operation(Type::resizeIm, {arg}), _mode(mode)
+ {
+ // Calculate scales based on given shape.
+ auto &input_shape = getInputShape(0);
+ assert(input_shape.rank() == 4 && output_shape.rank() == 4);
+ setOutputType(0, {getInput(0)->getElementType(), output_shape});
+ _scales = {1.0f, static_cast<float>(output_shape.dim(1)) / input_shape.dim(1),
+ static_cast<float>(output_shape.dim(2)) / input_shape.dim(2), 1.0f};
+ }
+
+ /** @return The resize mode */
+ ResizeMethod getMode() const { return _mode; }
+
+ const std::vector<float> &getScales() const { return _scales; }
+
+private:
+ std::vector<float> _scales;
+ ResizeMethod _mode;
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_RESIZE_OP_H_
diff --git a/compiler/mir/include/mir/ops/SigmoidOp.h b/compiler/mir/include/mir/ops/SigmoidOp.h
new file mode 100644
index 000000000..8655baca0
--- /dev/null
+++ b/compiler/mir/include/mir/ops/SigmoidOp.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_SIGMOID_OP_H_
+#define _MIR_OPS_SIGMOID_OP_H_
+
+#include "mir/Operation.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class SigmoidOp : public Operation
+{
+public:
+ explicit SigmoidOp(Output *arg) : Operation(Type::sigmoid, {arg})
+ {
+ // Infer output shape.
+ setOutputType(0, {arg->getElementType(), arg->getShape()});
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new SigmoidOp(inputs[0]);
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_SIGMOID_OP_H_
diff --git a/compiler/mir/include/mir/ops/SliceOp.h b/compiler/mir/include/mir/ops/SliceOp.h
new file mode 100644
index 000000000..6370de4fa
--- /dev/null
+++ b/compiler/mir/include/mir/ops/SliceOp.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_SLICE_OP_H_
+#define _MIR_OPS_SLICE_OP_H_
+
+#include "mir/Operation.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class SliceOp : public Operation
+{
+public:
+ SliceOp(Output *arg, const Shape &starts, const Shape &sizes)
+ : Operation(Type::slice, {arg}), _starts(starts), _sizes(sizes)
+ {
+ inferOutputTypes();
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new SliceOp(inputs[0], _starts, _sizes);
+ }
+
+ const Shape &getStarts() { return _starts; }
+
+ const Shape &getSizes() { return _sizes; }
+
+private:
+ void inferOutputTypes();
+
+ Shape _starts;
+ Shape _sizes;
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_SLICE_OP_H_
diff --git a/compiler/mir/include/mir/ops/SoftmaxOp.h b/compiler/mir/include/mir/ops/SoftmaxOp.h
new file mode 100644
index 000000000..ca05f593c
--- /dev/null
+++ b/compiler/mir/include/mir/ops/SoftmaxOp.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_SOFTMAX_OP_H_
+#define _MIR_OPS_SOFTMAX_OP_H_
+
+#include "mir/Operation.h"
+
+namespace mir
+{
+namespace ops
+{
+
+/**
+ * @brief description of softmax operation.
+ */
+class SoftmaxOp : public Operation
+{
+public:
+ SoftmaxOp(Output *arg, int32_t axis) : Operation(Type::softmax, {arg}), _axis(axis)
+ {
+ setOutputType(0, {arg->getElementType(), arg->getShape()});
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new SoftmaxOp(inputs[0], _axis);
+ }
+
+ int32_t getAxis() const
+ {
+ if (_axis < 0)
+ {
+ // Negative axis is used to index starting from the last element of the shape
+ // -1 means last element, -2 means second from end, like in python
+ int32_t res = _axis + getInputShape(0).rank();
+ assert(res >= 0);
+ return res;
+ }
+ return _axis;
+ }
+
+private:
+ /// @brief The axis along which to concatenate, may be negative to index from the end
+ int32_t _axis;
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_SOFTMAX_OP_H_
diff --git a/compiler/mir/include/mir/ops/SqrtOp.h b/compiler/mir/include/mir/ops/SqrtOp.h
new file mode 100644
index 000000000..a029f7634
--- /dev/null
+++ b/compiler/mir/include/mir/ops/SqrtOp.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_SQRT_OP_H_
+#define _MIR_OPS_SQRT_OP_H_
+
+#include "mir/Operation.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class SqrtOp : public Operation
+{
+public:
+ explicit SqrtOp(Output *arg) : Operation(Type::sqrt, {arg})
+ {
+ setOutputType(0, {arg->getElementType(), arg->getShape()});
+ };
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new SqrtOp(inputs[0]);
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_SQRT_OP_H_
diff --git a/compiler/mir/include/mir/ops/SqueezeOp.h b/compiler/mir/include/mir/ops/SqueezeOp.h
new file mode 100644
index 000000000..8ef2a78bb
--- /dev/null
+++ b/compiler/mir/include/mir/ops/SqueezeOp.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_SQUEEZE_OP_H_
+#define _MIR_OPS_SQUEEZE_OP_H_
+
+#include "mir/Operation.h"
+#include <algorithm>
+
+namespace mir
+{
+namespace ops
+{
+
+class SqueezeOp : public Operation
+{
+public:
+ SqueezeOp(Output *arg, const std::vector<std::int32_t> &dims_to_squeeze)
+ : Operation(Type::squeeze, {arg}), _dims_to_squeeze(dims_to_squeeze)
+ {
+ // Infer output shape.
+ inferOutputTypes();
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new SqueezeOp(inputs[0], _dims_to_squeeze);
+ }
+
+ void inferOutputTypes();
+
+ int32_t getNumSqueezeDims() const { return static_cast<int32_t>(_dims_to_squeeze.size()); }
+
+ const std::vector<int32_t> &getDimsToSqueeze() const { return _dims_to_squeeze; }
+
+private:
+ std::vector<int32_t> _dims_to_squeeze;
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_SQUEEZE_OP_H_
diff --git a/compiler/mir/include/mir/ops/SubOp.h b/compiler/mir/include/mir/ops/SubOp.h
new file mode 100644
index 000000000..519b238d0
--- /dev/null
+++ b/compiler/mir/include/mir/ops/SubOp.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_SUB_OP_H_
+#define _MIR_OPS_SUB_OP_H_
+
+#include "mir/ops/BinaryElementwiseOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class SubOp : public BinaryElementwiseOp
+{
+public:
+ SubOp(Output *arg1, Output *arg2) : BinaryElementwiseOp(Type::sub, arg1, arg2) {}
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new SubOp(inputs[0], inputs[1]);
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_SUB_OP_H_
diff --git a/compiler/mir/include/mir/ops/TanhOp.h b/compiler/mir/include/mir/ops/TanhOp.h
new file mode 100644
index 000000000..d49261310
--- /dev/null
+++ b/compiler/mir/include/mir/ops/TanhOp.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_TANH_OP_H_
+#define _MIR_OPS_TANH_OP_H_
+
+#include "mir/Operation.h"
+
+namespace mir
+{
+namespace ops
+{
+
+class TanhOp : public Operation
+{
+public:
+ explicit TanhOp(Output *arg) : Operation(Type::tanh, {arg})
+ {
+ // Infer output shape.
+ setOutputType(0, {arg->getElementType(), arg->getShape()});
+ }
+
+ Operation *copyWithInputs(const std::vector<Output *> &inputs) override
+ {
+ return new TanhOp(inputs[0]);
+ }
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_TANH_OP_H_
diff --git a/compiler/mir/include/mir/ops/TransposeOp.h b/compiler/mir/include/mir/ops/TransposeOp.h
new file mode 100644
index 000000000..235a901f1
--- /dev/null
+++ b/compiler/mir/include/mir/ops/TransposeOp.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_OPS_TRANSPOSE_OP_H_
+#define _MIR_OPS_TRANSPOSE_OP_H_
+
+#include "mir/Operation.h"
+#include <vector>
+
+namespace mir
+{
+namespace ops
+{
+
+/**
+ * @brief Tensor transpose operation.
+ *
+ * Rearranges axes of input tensor.
+ */
+class TransposeOp : public Operation
+{
+public:
+ TransposeOp(Output *arg, const std::vector<std::size_t> &axis_order);
+
+ const std::vector<std::size_t> &getAxisOrder() const { return _axis_order; }
+
+ Operation *copyWithInputs(const std::vector<Output *> &arg) override
+ {
+ return new TransposeOp(arg[0], _axis_order);
+ }
+
+private:
+ void inferOutputTypes();
+
+ std::vector<std::size_t> _axis_order;
+};
+
+} // namespace ops
+} // namespace mir
+
+#endif //_MIR_OPS_TRANSPOSE_OP_H_
diff --git a/compiler/mir/src/DotGraph.cpp b/compiler/mir/src/DotGraph.cpp
new file mode 100644
index 000000000..b0e92e19d
--- /dev/null
+++ b/compiler/mir/src/DotGraph.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DotGraph.h"
+
+namespace mir
+{
+
+void DotGraph::addNode(DotNode node) { _nodes.emplace_back(std::move(node)); }
+
+void DotGraph::addEdge(DotEdge edge) { _edges.emplace_back(edge); }
+
+std::ostream &operator<<(std::ostream &stream, const DotGraph &graph)
+{
+ stream << "digraph D {" << std::endl;
+ for (const auto &node : graph._nodes)
+ {
+ stream << node.id << " [shape=record label=\"" << node.label << "\"];" << std::endl;
+ }
+ for (const auto &edge : graph._edges)
+ {
+ stream << edge.src_id << " -> " << edge.dst_id << ";" << std::endl;
+ }
+ stream << "}" << std::endl;
+ return stream;
+}
+
+} // namespace mir
diff --git a/compiler/mir/src/DotGraph.h b/compiler/mir/src/DotGraph.h
new file mode 100644
index 000000000..29698bb57
--- /dev/null
+++ b/compiler/mir/src/DotGraph.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_DOT_GRAPH_
+#define _MIR_DOT_GRAPH_
+
+#include <cstddef>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace mir
+{
+
+struct DotNode
+{
+ std::size_t id;
+ std::string label;
+};
+
+struct DotEdge
+{
+ std::size_t src_id;
+ std::size_t dst_id;
+};
+
+class DotGraph
+{
+public:
+ void addNode(DotNode node);
+ void addEdge(DotEdge edge);
+
+ friend std::ostream &operator<<(std::ostream &stream, const DotGraph &graph);
+
+private:
+ std::vector<DotNode> _nodes;
+ std::vector<DotEdge> _edges;
+};
+
+} // namespace mir
+
+#endif //_MIR_DOT_GRAPH_
diff --git a/compiler/mir/src/DotNodeBuilder.cpp b/compiler/mir/src/DotNodeBuilder.cpp
new file mode 100644
index 000000000..abe7af63a
--- /dev/null
+++ b/compiler/mir/src/DotNodeBuilder.cpp
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DotNodeBuilder.h"
+#include "mir/OpDefs.h"
+
+#include <sstream>
+
+namespace mir
+{
+
+template <typename T> static std::string toString(const std::vector<T> &v)
+{
+ std::stringstream ss;
+ ss << "[";
+ for (std::size_t i = 0; i < v.size(); ++i)
+ {
+ if (i != 0)
+ ss << ", ";
+ ss << v[i];
+ }
+ return ss.str();
+}
+
+DotNodeBuilder::DotNodeBuilder(const Operation &op)
+{
+ _type_name = getTypeName(op.getType());
+ _id = op.getId();
+
+ for (std::size_t i = 0; i < op.getNumInputs(); ++i)
+ {
+ _in_shapes.push_back(toString(op.getInputShape(i)));
+ }
+
+ for (std::size_t i = 0; i < op.getNumOutputs(); ++i)
+ {
+ _out_shapes.push_back(toString(op.getOutputShape(i)));
+ }
+
+ // Get attributes.
+ const_cast<Operation &>(op).accept(this);
+}
+
+void DotNodeBuilder::visit(ops::AvgPool2DOp &op)
+{
+ addAttribute("Window size", toString(op.getWindowSize()));
+ addAttribute("Strides", toString(op.getStrides()));
+ addAttribute("Padding before", toString(op.getPaddingBefore()));
+ addAttribute("Padding after", toString(op.getPaddingAfter()));
+ addAttribute("Include pad", std::to_string(op.getIncludePad()));
+}
+
+void DotNodeBuilder::visit(ops::CappedReluOp &op)
+{
+ addAttribute("Cap", std::to_string(op.getCap()));
+}
+
+void DotNodeBuilder::visit(ops::ConcatOp &op)
+{
+ addAttribute("Axis", std::to_string(op.getAxis()));
+}
+
+void DotNodeBuilder::visit(ops::Conv2DOp &op)
+{
+ addAttribute("Strides", toString(op.getStrides()));
+ addAttribute("Padding before", toString(op.getPaddingBefore()));
+ addAttribute("Padding after", toString(op.getPaddingAfter()));
+ addAttribute("Num groups", std::to_string(op.getNumGroups()));
+ addAttribute("Data format", toString(op.getDataFormat()));
+}
+
+void DotNodeBuilder::visit(ops::DepthwiseConv2DOp &op)
+{
+ addAttribute("Strides", toString(op.getStrides()));
+ addAttribute("Padding before", toString(op.getPaddingBefore()));
+ addAttribute("Padding after", toString(op.getPaddingAfter()));
+ addAttribute("Data format", toString(op.getDataFormat()));
+}
+
+void DotNodeBuilder::visit(ops::MaxPool2DOp &op)
+{
+ addAttribute("Window size", toString(op.getWindowSize()));
+ addAttribute("Strides", toString(op.getStrides()));
+ addAttribute("Padding before", toString(op.getPaddingBefore()));
+ addAttribute("Padding after", toString(op.getPaddingAfter()));
+ addAttribute("Data format", toString(op.getDataFormat()));
+}
+
+void DotNodeBuilder::visit(ops::SoftmaxOp &op)
+{
+ addAttribute("Axis", std::to_string(op.getAxis()));
+}
+
+void DotNodeBuilder::visit(ops::SliceOp &op)
+{
+ addAttribute("Starts", toString(op.getStarts()));
+ addAttribute("Sizes", toString(op.getSizes()));
+}
+
+void DotNodeBuilder::visit(ops::DeConv2DOp &op)
+{
+ addAttribute("Padding before", toString(op.getPaddingBefore()));
+ addAttribute("Padding after", toString(op.getPaddingAfter()));
+ addAttribute("Strides", toString(op.getStrides()));
+ addAttribute("Data format", toString(op.getDataFormat()));
+}
+
+void DotNodeBuilder::visit(ops::EluOp &op) { addAttribute("Alpha", std::to_string(op.getAlpha())); }
+
+void DotNodeBuilder::visit(ops::SqueezeOp &op)
+{
+ addAttribute("Dims to squeeze", toString(op.getDimsToSqueeze()));
+}
+
+void mir::DotNodeBuilder::visit(ops::PadOp &op)
+{
+ addAttribute("Padding before", toString(op.getPaddingBefore()));
+ addAttribute("Padding after", toString(op.getPaddingAfter()));
+ addAttribute("Padding value", std::to_string(op.getPaddingValue()));
+}
+
+void DotNodeBuilder::visit(ops::ReduceMeanOp &op)
+{
+ addAttribute("Reduction dims", toString(op.getReductionDims()));
+ addAttribute("Keep dims", std::to_string(op.getKeepDims()));
+}
+
+void DotNodeBuilder::visit(ops::ResizeOp &op)
+{
+ assert(op.getMode() == ops::ResizeOp::ResizeMethod::nearestNeighbor);
+ (void)op;
+
+ addAttribute("Interpolation mode", "nearestNeighbor");
+}
+
+void DotNodeBuilder::visit(ops::TransposeOp &op)
+{
+ addAttribute("Axis order", toString(op.getAxisOrder()));
+}
+
+void DotNodeBuilder::visit(ops::GatherOp &op)
+{
+ addAttribute("Axis", std::to_string(op.getAxis()));
+}
+
+void DotNodeBuilder::visit(mir::ops::LeakyReluOp &op)
+{
+ addAttribute("Alpha", std::to_string(op.getAlpha()));
+}
+
+void DotNodeBuilder::addAttribute(std::string name, std::string val)
+{
+ this->_attributes.emplace_back(std::move(name), std::move(val));
+}
+
+std::string DotNodeBuilder::getLabel() const
+{
+ std::stringstream ss;
+
+ ss << "{" << _type_name << " | {{";
+
+ for (std::size_t i = 0; i < _in_shapes.size(); ++i)
+ {
+ if (i != 0)
+ ss << " | ";
+ ss << "in" << i << ": " << _in_shapes[i];
+ }
+
+ ss << " | ";
+
+ for (std::size_t i = 0; i < _out_shapes.size(); ++i)
+ {
+ if (i != 0)
+ ss << " | ";
+ ss << "out" << i << ": " << _out_shapes[i];
+ }
+
+ ss << "} | {";
+
+ for (std::size_t i = 0; i < _attributes.size(); ++i)
+ {
+ if (i != 0)
+ ss << " | ";
+ ss << _attributes[i].first << ": " << _attributes[i].second;
+ }
+
+ ss << "}}}";
+
+ return ss.str();
+}
+
+} // namespace mir
diff --git a/compiler/mir/src/DotNodeBuilder.h b/compiler/mir/src/DotNodeBuilder.h
new file mode 100644
index 000000000..09eba0c20
--- /dev/null
+++ b/compiler/mir/src/DotNodeBuilder.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MIR_DOT_NODE_BUILDER_H_
+#define _MIR_DOT_NODE_BUILDER_H_
+
+#include "DotGraph.h"
+
+#include "mir/Visitor.h"
+
+#include <cstddef>
+#include <string>
+#include <vector>
+
+namespace mir
+{
+
+class DotNodeBuilder : public Visitor
+{
+public:
+ explicit DotNodeBuilder(const Operation &op);
+
+ void visit(ops::AvgPool2DOp &op) override;
+ void visit(ops::CappedReluOp &op) override;
+ void visit(ops::ConcatOp &op) override;
+ void visit(ops::Conv2DOp &op) override;
+ void visit(ops::DeConv2DOp &op) override;
+ void visit(ops::DepthwiseConv2DOp &op) override;
+ void visit(ops::EluOp &op) override;
+ void visit(ops::GatherOp &op) override;
+ void visit(ops::LeakyReluOp &op) override;
+ void visit(ops::MaxPool2DOp &op) override;
+ void visit(ops::PadOp &op) override;
+ void visit(ops::ReduceMeanOp &op) override;
+ void visit(ops::ResizeOp &op) override;
+ void visit(ops::SliceOp &op) override;
+ void visit(ops::SoftmaxOp &op) override;
+ void visit(ops::SqueezeOp &op) override;
+ void visit(ops::TransposeOp &op) override;
+
+ void addAttribute(std::string name, std::string val);
+
+ DotNode getDotNode() const { return {_id, getLabel()}; }
+
+private:
+ std::string getLabel() const;
+
+ std::size_t _id;
+ std::string _type_name;
+ std::vector<std::string> _in_shapes;
+ std::vector<std::string> _out_shapes;
+ std::vector<std::pair<std::string, std::string>> _attributes;
+};
+
+} // namespace mir
+
+#endif //_MIR_DOT_NODE_BUILDER_H_
diff --git a/compiler/mir/src/Graph.cpp b/compiler/mir/src/Graph.cpp
new file mode 100644
index 000000000..0eccdac2b
--- /dev/null
+++ b/compiler/mir/src/Graph.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/Graph.h"
+
+#include <algorithm>
+#include <deque>
+#include <unordered_map>
+
+namespace mir
+{
+
+/**
+ * @brief replace all usages of operation `op` with node `with`
+ * (i.e. all references in previous/next nodes )
+ * @param op the operation to replace
+ * @param with the operation to use as a replacement
+ */
+static void replaceUsages(Operation *op, Operation *with)
+{
+ assert(op->getNumOutputs() == with->getNumOutputs());
+ for (std::size_t i = 0; i < op->getNumOutputs(); ++i)
+ {
+ Operation::Output *output = op->getOutput(i);
+ output->replaceAllUsesWith(with->getOutput(i));
+ }
+}
+
+std::vector<Operation *> getSortedNodes(Graph *graph)
+{
+ std::deque<Operation *> ready_nodes;
+ std::unordered_map<Operation *, std::size_t> num_visited_input_edges;
+
+ for (Operation *op : graph->getNodes())
+ {
+ if (op->getNumInputs() == 0)
+ {
+ ready_nodes.push_back(op);
+ }
+ }
+
+ std::vector<Operation *> sorted_nodes;
+ while (!ready_nodes.empty())
+ {
+ Operation *src_node = ready_nodes.front();
+ ready_nodes.pop_front();
+ sorted_nodes.push_back(src_node);
+ for (Operation::Output &output : src_node->getOutputs())
+ {
+ for (const auto use : output.getUses())
+ {
+ Operation *dst_node = use.getNode();
+ if (++num_visited_input_edges[dst_node] == dst_node->getNumInputs())
+ {
+ ready_nodes.push_back(dst_node);
+ }
+ }
+ }
+ }
+
+ return sorted_nodes;
+}
+
+void Graph::accept(IVisitor *visitor)
+{
+ for (Operation *node : getSortedNodes(this))
+ {
+ node->accept(visitor);
+ }
+}
+
+Graph::~Graph()
+{
+ for (auto &node : _ops)
+ {
+ delete node;
+ }
+}
+
+void Graph::registerOp(Operation *op)
+{
+ _ops.emplace(op);
+
+ if (auto *input_op = dynamic_cast<ops::InputOp *>(op))
+ _inputs.emplace_back(input_op);
+
+ if (auto *output_op = dynamic_cast<ops::OutputOp *>(op))
+ _outputs.emplace_back(output_op);
+}
+
+void Graph::replaceNode(Operation *op, Operation *with)
+{
+ replaceUsages(op, with);
+ removeNode(op);
+}
+
+void Graph::removeNode(Operation *op)
+{
+#ifndef NDEBUG
+ for (const auto &output : op->getOutputs())
+ {
+ assert(output.getUses().empty() && "Trying to remove a node that has uses.");
+ }
+#endif
+
+ for (std::size_t i = 0; i < op->getNumInputs(); ++i)
+ {
+ op->getInput(i)->removeUse(Operation::Use(op, i));
+ }
+
+ if (op->getType() == Operation::Type::input)
+ _inputs.erase(
+ std::remove(_inputs.begin(), _inputs.end(), op)); // NOLINT(bugprone-inaccurate-erase)
+
+ if (op->getType() == Operation::Type::output)
+ _outputs.erase(
+ std::remove(_outputs.begin(), _outputs.end(), op)); // NOLINT(bugprone-inaccurate-erase)
+
+ _ops.erase(op);
+ delete op;
+}
+
+} // namespace mir
diff --git a/compiler/mir/src/GraphPatternMatcher.cpp b/compiler/mir/src/GraphPatternMatcher.cpp
new file mode 100644
index 000000000..78ea1fa02
--- /dev/null
+++ b/compiler/mir/src/GraphPatternMatcher.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/GraphPatternMatcher.h"
+
+#include <algorithm>
+#include <vector>
+
+namespace mir
+{
+
+std::vector<std::pair<Operation *, Operation *>>
+GraphPatternMatcher::matchEdge(GraphPatternMatcher::Predicate p1, GraphPatternMatcher::Predicate p2)
+{
+
+ std::vector<std::pair<Operation *, Operation *>> matches;
+ for (auto *start : _g->getNodes())
+ {
+ if (p1(start))
+ {
+ for (auto &out : start->getOutputs())
+ {
+ for (auto use : out.getUses())
+ {
+ Operation *end = use.getNode();
+ if (p2(end))
+ {
+ matches.emplace_back(std::make_pair(start, end));
+ break;
+ }
+ }
+ }
+ }
+ }
+ return matches;
+}
+
+std::vector<std::pair<std::vector<Operation *>, Operation *>>
+GraphPatternMatcher::matchUpBush(mir::GraphPatternMatcher::Predicate p1,
+ mir::GraphPatternMatcher::Predicate p2)
+{
+ std::vector<std::pair<std::vector<Operation *>, Operation *>> matches;
+ for (auto *root : _g->getNodes())
+ {
+ if (p2(root))
+ {
+ const auto &inputs = root->getInputs();
+ if (std::all_of(inputs.begin(), inputs.end(),
+ [p1](const Operation::Output *input) { return p1(input->getNode()); }))
+ {
+ std::vector<Operation *> tops;
+ tops.reserve(inputs.size());
+ for (Operation::Output *pr : inputs)
+ {
+ tops.emplace_back(pr->getNode());
+ }
+ matches.emplace_back(std::make_pair(tops, root));
+ }
+ }
+ }
+ return matches;
+}
+} // namespace mir
diff --git a/compiler/mir/src/Index.cpp b/compiler/mir/src/Index.cpp
new file mode 100644
index 000000000..501ea1552
--- /dev/null
+++ b/compiler/mir/src/Index.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/Index.h"
+
+#include <algorithm>
+
+namespace mir
+{
+
+Index &Index::resize(int32_t size)
+{
+ _indices.resize(size);
+ return *this;
+}
+
+Index &Index::fill(int32_t index)
+{
+ std::fill(std::begin(_indices), std::end(_indices), index);
+ return (*this);
+}
+
+std::ostream &operator<<(std::ostream &s, const Index &idx)
+{
+ s << "[ ";
+ for (int32_t i = 0; i < idx.rank(); ++i)
+ {
+ if (i != 0)
+ s << ", ";
+ s << idx.at(i);
+ }
+ s << "]";
+
+ return s;
+}
+
+} // namespace mir
diff --git a/compiler/mir/src/IrDotDumper.cpp b/compiler/mir/src/IrDotDumper.cpp
new file mode 100644
index 000000000..0c3f4dfb0
--- /dev/null
+++ b/compiler/mir/src/IrDotDumper.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/IrDotDumper.h"
+#include "mir/Graph.h"
+#include "DotGraph.h"
+#include "DotNodeBuilder.h"
+
+namespace mir
+{
+
+void dumpGraph(const Graph *graph, std::ostream &stream)
+{
+ DotGraph dot_graph;
+
+ for (const auto *node : graph->getNodes())
+ {
+ dot_graph.addNode(DotNodeBuilder(*node).getDotNode());
+ for (const Operation::Output *input : node->getInputs())
+ {
+ dot_graph.addEdge({input->getNode()->getId(), node->getId()});
+ }
+ }
+
+ stream << dot_graph;
+}
+
+} // namespace mir
diff --git a/compiler/mir/src/Operation.cpp b/compiler/mir/src/Operation.cpp
new file mode 100644
index 000000000..6f72acbf6
--- /dev/null
+++ b/compiler/mir/src/Operation.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/Operation.h"
+#include "mir/Visitor.h"
+#include "mir/OpDefs.h"
+
+#include <algorithm>
+
+namespace mir
+{
+
+void Operation::Output::removeUse(Operation::Use use)
+{
+ auto it = std::remove(_uses.begin(), _uses.end(), use);
+ _uses.erase(it);
+}
+
+void Operation::Output::replaceAllUsesWith(mir::Operation::Output *new_def)
+{
+ for (auto use : _uses)
+ {
+ use.getNode()->_inputs[use.getIndex()] = new_def;
+ new_def->addUse(use);
+ }
+ _uses.clear();
+}
+
+Operation::Operation(Type type, const std::vector<Output *> &inputs, std::size_t num_outputs)
+ : _type(type)
+{
+ for (std::size_t i = 0; i < inputs.size(); ++i)
+ {
+ inputs[i]->addUse(Use(this, i));
+ _inputs.push_back(inputs[i]);
+ }
+ for (std::size_t i = 0; i < num_outputs; ++i)
+ {
+ _outputs.emplace_back(this, i);
+ }
+}
+
+void Operation::accept(IVisitor *v)
+{
+ switch (getType())
+ {
+#define HANDLE_OP(OpType, OpClass) \
+ case Type::OpType: \
+ v->visit(dynamic_cast<ops::OpClass &>(*this)); \
+ break;
+#include "mir/Operations.inc"
+#undef HANDLE_OP
+ default:
+ assert(false && "OP not defined!");
+ }
+}
+
+const std::string &getTypeName(Operation::Type type)
+{
+ switch (type)
+ {
+#define HANDLE_OP(OpType, OpClass) \
+ case Operation::Type::OpType: \
+ { \
+ static const std::string name(#OpType); \
+ return name; \
+ }
+#include "mir/Operations.inc"
+#undef HANDLE_OP
+ }
+ throw std::runtime_error("unexpected opcode");
+}
+
+} // namespace mir
diff --git a/compiler/mir/src/Shape.cpp b/compiler/mir/src/Shape.cpp
new file mode 100644
index 000000000..825420cd6
--- /dev/null
+++ b/compiler/mir/src/Shape.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/Shape.h"
+
+#include <algorithm>
+#include <cassert>
+#include <sstream>
+
+namespace mir
+{
+
+constexpr int32_t mir::Shape::autoDim;
+
+void Shape::resize(int32_t size) { _dims.resize(size); }
+
+int32_t Shape::numElements() const
+{
+ int32_t res = 1;
+
+ for (int32_t axis = 0; axis < rank(); ++axis)
+ {
+ assert(dim(axis) != Shape::autoDim);
+ res *= dim(axis);
+ }
+
+ return res;
+}
+
+Shape broadcastShapes(const Shape &lhs_shape, const Shape &rhs_shape)
+{
+ const int num_dims = std::max(lhs_shape.rank(), rhs_shape.rank());
+ Shape result_shape(num_dims);
+
+ for (int i = 0; i < num_dims; ++i)
+ {
+ const std::int32_t lhs_dim =
+ (i >= num_dims - lhs_shape.rank()) ? lhs_shape.dim(i - (num_dims - lhs_shape.rank())) : 1;
+ const std::int32_t rhs_dim =
+ (i >= num_dims - rhs_shape.rank()) ? rhs_shape.dim(i - (num_dims - rhs_shape.rank())) : 1;
+ if (lhs_dim == 1)
+ {
+ result_shape.dim(i) = rhs_dim;
+ }
+ else
+ {
+ assert(rhs_dim == 1 || rhs_dim == lhs_dim);
+ result_shape.dim(i) = lhs_dim;
+ }
+ }
+
+ return result_shape;
+}
+
+std::string toString(const Shape &shape)
+{
+ std::stringstream ss;
+
+ ss << "[";
+ for (int32_t axis = 0; axis < shape.rank(); ++axis)
+ {
+ if (axis != 0)
+ ss << ", ";
+ if (shape.dim(axis) == Shape::autoDim)
+ ss << "AUTO";
+ else
+ ss << shape.dim(axis);
+ }
+ ss << "]";
+
+ return ss.str();
+}
+
+} // namespace mir
diff --git a/compiler/mir/src/Tensor.cpp b/compiler/mir/src/Tensor.cpp
new file mode 100644
index 000000000..d0e860b5d
--- /dev/null
+++ b/compiler/mir/src/Tensor.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/Tensor.h"
+
+namespace mir
+{
+
+template class Tensor<float>;
+template class Tensor<double>;
+template class Tensor<int>;
+
+} // namespace mir
diff --git a/compiler/mir/src/TensorVariant.cpp b/compiler/mir/src/TensorVariant.cpp
new file mode 100644
index 000000000..9e57dbaf0
--- /dev/null
+++ b/compiler/mir/src/TensorVariant.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/TensorVariant.h"
+#include <cstring>
+
+namespace mir
+{
+
+TensorVariant::TensorVariant(const TensorType &type) : _type(type), _strides(type.getShape().rank())
+{
+ _element_size = getDataTypeSize(getElementType());
+ std::size_t data_size = getShape().numElements() * _element_size;
+ _data.reset(new char[data_size], std::default_delete<char[]>());
+
+ int stride = 1;
+ for (int d = getShape().rank() - 1; d >= 0; --d)
+ {
+ _strides[d] = stride;
+ stride *= getShape().dim(d);
+ }
+}
+
+TensorVariant::TensorVariant(DataType element_type, const Shape &shape)
+ : TensorVariant(TensorType(element_type, shape))
+{
+}
+
+TensorVariant::TensorVariant(const TensorType &type, const void *data) : TensorVariant(type)
+{
+ std::size_t data_size = getShape().numElements() * _element_size;
+ std::memcpy(_data.get(), data, data_size);
+}
+
+TensorVariant::TensorVariant(DataType element_type, const Shape &shape, const void *data)
+ : TensorVariant(TensorType(element_type, shape), data)
+{
+}
+
+/**
+ * @brief Construct a TensorVariant from t_old that has strides with 0 where dim = 1
+ * Used for broadcasting
+ * @param t_old TensorVariant to use as base
+ * @param shape shape to broadcast to
+ */
+TensorVariant::TensorVariant(const TensorVariant &t_old, const Shape &shape)
+ : _type(t_old.getType().getElementType(), shape), _data(t_old._data),
+ _strides(static_cast<size_t>(shape.rank())), _element_size(t_old._element_size)
+{
+ int axis_old = t_old.getShape().rank() - 1;
+ for (int d = shape.rank() - 1; d >= 0; d--)
+ {
+ if (axis_old == -1)
+ break;
+ if (t_old.getShape().dim(axis_old) != 1)
+ _strides[d] = t_old._strides[axis_old];
+ axis_old--;
+ }
+}
+
+} // namespace mir
diff --git a/compiler/mir/src/Visitor.cpp b/compiler/mir/src/Visitor.cpp
new file mode 100644
index 000000000..882871d4a
--- /dev/null
+++ b/compiler/mir/src/Visitor.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/Visitor.h"
+
+#include "mir/OpDefs.h"
+
+namespace mir
+{
+
+#define HANDLE_OP(OpType, OpClass) \
+ void Visitor::visit(ops::OpClass &op) { visit_fallback(op); }
+#include "mir/Operations.inc"
+#undef HANDLE_OP
+
+} // namespace mir
diff --git a/compiler/mir/src/ops/AvgPool2DOp.cpp b/compiler/mir/src/ops/AvgPool2DOp.cpp
new file mode 100644
index 000000000..52b67303f
--- /dev/null
+++ b/compiler/mir/src/ops/AvgPool2DOp.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/ops/AvgPool2DOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+void AvgPool2DOp::inferOutputTypes()
+{
+ const auto &input_shape = getInputShape(0);
+ const int batch_dim_index = getDataBatchDimIndex(_attributes.data_format);
+ const int channel_dim_index = getDataChannelDimIndex(_attributes.data_format);
+
+ constexpr int num_spatial_dims = 2;
+
+ assert(input_shape.rank() == 4);
+ assert(_attributes.window.size() == num_spatial_dims);
+ assert(_attributes.strides.size() == num_spatial_dims);
+ assert(_attributes.padding_before.size() == num_spatial_dims);
+ assert(_attributes.padding_after.size() == num_spatial_dims);
+
+ Shape output_shape(4);
+
+ output_shape.dim(batch_dim_index) = input_shape.dim(batch_dim_index);
+ output_shape.dim(channel_dim_index) = input_shape.dim(channel_dim_index);
+
+ for (int i = 0; i < num_spatial_dims; i++)
+ {
+ const int spatial_dim_index = getDataSpatialDimIndex(_attributes.data_format, i);
+ const std::int32_t padded_input = input_shape.dim(spatial_dim_index) +
+ _attributes.padding_before.at(i) +
+ _attributes.padding_after.at(i);
+ // out_size = ceil((in_size - window_size + 1) / stride) =
+ // (in_size - window_size + 1 + stride - 1) / stride =
+ // (in_size - window_size) / stride + 1
+ output_shape.dim(spatial_dim_index) =
+ (padded_input - _attributes.window[i]) / _attributes.strides[i] + 1;
+ }
+
+ setOutputType(0, {getInput(0)->getElementType(), output_shape});
+}
+
+} // namespace ops
+} // namespace mir
diff --git a/compiler/mir/src/ops/BinaryElementwiseOp.cpp b/compiler/mir/src/ops/BinaryElementwiseOp.cpp
new file mode 100644
index 000000000..982df3193
--- /dev/null
+++ b/compiler/mir/src/ops/BinaryElementwiseOp.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/ops/BinaryElementwiseOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+void BinaryElementwiseOp::inferOutputTypes()
+{
+ const auto &lhs_shape = getInputShape(0);
+ const auto &rhs_shape = getInputShape(1);
+
+ auto dt = getInput(0)->getElementType();
+
+ // lhs and rhs have equal element types
+ assert(dt == getInput(1)->getElementType());
+ auto out_shape = broadcastShapes(lhs_shape, rhs_shape);
+
+ setOutputType(0, {dt, out_shape});
+}
+
+} // namespace ops
+} // namespace mir
diff --git a/compiler/mir/src/ops/BroadcastOp.cpp b/compiler/mir/src/ops/BroadcastOp.cpp
new file mode 100644
index 000000000..b9f915a83
--- /dev/null
+++ b/compiler/mir/src/ops/BroadcastOp.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/ops/BroadcastOp.h"
+#include "mir/ops/ConstantOp.h"
+#include "mir/Tensor.h"
+
+namespace mir
+{
+namespace ops
+{
+
+void BroadcastOp::inferOutputTypes(const Shape &target_shape)
+{
+ const Shape &input_shape = getInputShape(0);
+ Shape output_shape = broadcastShapes(input_shape, target_shape);
+
+ setOutputType(0, {getInput(0)->getElementType(), output_shape});
+}
+
+} // namespace ops
+} // namespace mir
diff --git a/compiler/mir/src/ops/ConcatOp.cpp b/compiler/mir/src/ops/ConcatOp.cpp
new file mode 100644
index 000000000..c8c4764ad
--- /dev/null
+++ b/compiler/mir/src/ops/ConcatOp.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/ops/ConcatOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+void ConcatOp::inferOutputTypes()
+{
+ Shape output_shape(getInputShape(0));
+ output_shape.dim(_axis) = 0;
+ auto element_type = getInput(0)->getElementType();
+
+ for (std::size_t i = 0; i < getNumInputs(); ++i)
+ {
+ output_shape.dim(_axis) += getInputShape(i).dim(_axis);
+ assert(getInput(i)->getElementType() == element_type);
+ }
+
+ setOutputType(0, {element_type, output_shape});
+}
+
+} // namespace ops
+} // namespace mir
diff --git a/compiler/mir/src/ops/Conv2DOp.cpp b/compiler/mir/src/ops/Conv2DOp.cpp
new file mode 100644
index 000000000..1addc5734
--- /dev/null
+++ b/compiler/mir/src/ops/Conv2DOp.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/ops/Conv2DOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+void Conv2DOp::inferOutputTypes()
+{
+ // Kernel shape: [O, H, W, I / M].
+ const auto &input_shape = getInputShape(0);
+ const auto &kernel_shape = getInputShape(1);
+ const int batch_dim_index = getDataBatchDimIndex(_attributes.data_format);
+ const int channel_dim_index = getDataChannelDimIndex(_attributes.data_format);
+
+ constexpr int num_spatial_dims = 2;
+
+ assert(input_shape.rank() == 2 + num_spatial_dims);
+ assert(kernel_shape.rank() == 2 + num_spatial_dims);
+ assert(kernel_shape.dim(3) * _attributes.num_groups == input_shape.dim(channel_dim_index));
+ assert(kernel_shape.dim(0) % _attributes.num_groups == 0);
+
+ assert(_attributes.strides.size() == num_spatial_dims);
+ assert(_attributes.padding_before.size() == num_spatial_dims);
+ assert(_attributes.padding_after.size() == num_spatial_dims);
+
+ Shape output_shape(2 + num_spatial_dims);
+
+ output_shape.dim(batch_dim_index) = input_shape.dim(batch_dim_index);
+ output_shape.dim(channel_dim_index) = kernel_shape.dim(0);
+
+ for (int i = 0; i < num_spatial_dims; i++)
+ {
+ const int spatial_dim_index = getDataSpatialDimIndex(_attributes.data_format, i);
+ const std::int32_t padded_input = input_shape.dim(spatial_dim_index) +
+ _attributes.padding_before[i] + _attributes.padding_after[i];
+ // out_size = ceil((in_size - kernel_size + 1) / stride) =
+ // (in_size - kernel_size + 1 + stride - 1) / stride =
+ // (in_size - kernel_size) / stride + 1
+ output_shape.dim(spatial_dim_index) =
+ (padded_input - kernel_shape.dim(1 + i)) / _attributes.strides[i] + 1;
+ }
+
+ auto dt = getInput(0)->getElementType();
+ assert(dt == getInput(1)->getElementType() && "kernel should have same data type as input");
+
+ setOutputType(0, {dt, output_shape});
+}
+
+} // namespace ops
+} // namespace mir
diff --git a/compiler/mir/src/ops/DeConv2DOp.cpp b/compiler/mir/src/ops/DeConv2DOp.cpp
new file mode 100644
index 000000000..35b111bc0
--- /dev/null
+++ b/compiler/mir/src/ops/DeConv2DOp.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/ops/Deconv2DOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+// See the formulas at https://github.com/onnx/onnx/blob/master/docs/Operators.md#convtranspose.
+void DeConv2DOp::inferPaddings()
+{
+ assert(_attributes.padding_type != PaddingType::Explicit);
+
+ const auto &input_shape = getInputShape(0);
+ const auto &kernel_shape = getInputShape(1);
+ const auto &output_shape = getOutputShape(0);
+
+ constexpr int num_spatial_dims = 2;
+
+ for (int i = 0; i < num_spatial_dims; ++i)
+ {
+ const int spatial_dim_index = getDataSpatialDimIndex(_attributes.data_format, i);
+ const std::int32_t total_padding =
+ (input_shape.dim(spatial_dim_index) - 1) * _attributes.strides[i] + kernel_shape.dim(i) -
+ output_shape.dim(spatial_dim_index);
+
+ switch (_attributes.padding_type)
+ {
+ case PaddingType::Valid:
+ // TODO Figure out what to do.
+ assert(false);
+ break;
+ case PaddingType::SameLower:
+ _attributes.padding_after[i] = total_padding / 2;
+ _attributes.padding_before[i] = total_padding - _attributes.padding_after[i];
+ break;
+ case PaddingType::SameUpper:
+ _attributes.padding_before[i] = total_padding / 2;
+ _attributes.padding_after[i] = total_padding - _attributes.padding_before[i];
+ break;
+ default:
+ assert(false);
+ }
+ }
+}
+
+// See the formulas at https://github.com/onnx/onnx/blob/master/docs/Operators.md#convtranspose.
+void DeConv2DOp::inferOutputTypes()
+{
+ assert(_attributes.padding_type == PaddingType::Explicit);
+
+ // Kernel shape: [Hk, Wk, Co, Ci]
+ const auto &input_shape = getInputShape(0);
+ const auto &kernel_shape = getInputShape(1);
+ const int batch_dim_index = getDataBatchDimIndex(_attributes.data_format);
+ const int channel_dim_index = getDataChannelDimIndex(_attributes.data_format);
+
+ assert(input_shape.rank() == 4);
+ assert(kernel_shape.rank() == 4);
+ assert(kernel_shape.dim(3) == input_shape.dim(channel_dim_index));
+
+ Shape output_shape(4);
+
+ output_shape.dim(batch_dim_index) = input_shape.dim(batch_dim_index);
+ output_shape.dim(channel_dim_index) = kernel_shape.dim(2);
+
+ constexpr int num_spatial_dims = 2;
+
+ for (int i = 0; i < num_spatial_dims; i++)
+ {
+ const int spatial_dim_index = getDataSpatialDimIndex(_attributes.data_format, i);
+ output_shape.dim(spatial_dim_index) =
+ (input_shape.dim(spatial_dim_index) - 1) * _attributes.strides[i] + kernel_shape.dim(i) -
+ (_attributes.padding_before.at(i) + _attributes.padding_after.at(i));
+ }
+
+ setOutputType(0, {getInput(0)->getElementType(), output_shape});
+}
+
+} // namespace ops
+} // namespace mir
diff --git a/compiler/mir/src/ops/DepthwiseConv2DOp.cpp b/compiler/mir/src/ops/DepthwiseConv2DOp.cpp
new file mode 100644
index 000000000..0154bcd09
--- /dev/null
+++ b/compiler/mir/src/ops/DepthwiseConv2DOp.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/ops/DepthwiseConv2DOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+void DepthwiseConv2DOp::inferOutputTypes()
+{
+ // Kernel shape: [Hk, Wk, Ci, M].
+ const auto &input_shape = getInputShape(0);
+ const auto &kernel_shape = getInputShape(1);
+ const int batch_dim_index = getDataBatchDimIndex(_attributes.data_format);
+ const int channel_dim_index = getDataChannelDimIndex(_attributes.data_format);
+
+ assert(input_shape.rank() == 4);
+ assert(kernel_shape.rank() == 4);
+ assert(input_shape.dim(channel_dim_index) == kernel_shape.dim(2));
+ assert(_attributes.strides.size() == 2);
+ assert(_attributes.padding_before.size() == 2);
+ assert(_attributes.padding_after.size() == 2);
+
+ Shape output_shape(4);
+
+ output_shape.dim(batch_dim_index) = input_shape.dim(batch_dim_index);
+ output_shape.dim(channel_dim_index) = input_shape.dim(channel_dim_index) * kernel_shape.dim(3);
+
+ for (int i = 0; i < 2; i++)
+ {
+ const int spatial_dim_index = getDataSpatialDimIndex(_attributes.data_format, i);
+ const std::int32_t padded_input = input_shape.dim(spatial_dim_index) +
+ _attributes.padding_before[i] + _attributes.padding_after[i];
+ // out_size = ceil((in_size - kernel_size + 1) / stride) =
+ // (in_size - kernel_size + 1 + stride - 1) / stride =
+ // (in_size - kernel_size) / stride + 1
+ output_shape.dim(spatial_dim_index) =
+ (padded_input - kernel_shape.dim(i)) / _attributes.strides[i] + 1;
+ }
+
+ setOutputType(0, {getInput(0)->getElementType(), output_shape});
+}
+
+} // namespace ops
+} // namespace mir
diff --git a/compiler/mir/src/ops/FullyConnectedOp.cpp b/compiler/mir/src/ops/FullyConnectedOp.cpp
new file mode 100644
index 000000000..2865b6c87
--- /dev/null
+++ b/compiler/mir/src/ops/FullyConnectedOp.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/ops/FullyConnectedOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+void FullyConnectedOp::inferOutputTypes()
+{
+ auto &input_shape = getInputShape(0);
+ auto &weights_shape = getInputShape(1);
+ auto input_rank = input_shape.rank();
+ auto weights_rank = weights_shape.rank();
+
+ assert(weights_rank >= 2);
+ assert(input_rank == weights_rank);
+ assert(input_shape.dim(input_rank - 1) == weights_shape.dim(weights_rank - 2));
+ (void)input_rank;
+ for (int32_t i = 0; i < weights_rank - 2; ++i)
+ assert(weights_shape.dim(i) == input_shape.dim(i));
+
+ Shape output_shape = weights_shape;
+ output_shape.dim(weights_rank - 1) = weights_shape.dim(weights_rank - 1);
+ output_shape.dim(weights_rank - 2) = input_shape.dim(weights_rank - 2);
+
+ setOutputType(0, {getInput(0)->getElementType(), output_shape});
+}
+
+} // namespace ops
+} // namespace mir
diff --git a/compiler/mir/src/ops/GatherOp.cpp b/compiler/mir/src/ops/GatherOp.cpp
new file mode 100644
index 000000000..264ac2618
--- /dev/null
+++ b/compiler/mir/src/ops/GatherOp.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/ops/GatherOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+void GatherOp::inferOutputTypes()
+{
+ const auto &data_shape = getInputShape(0);
+ const auto &indices_shape = getInputShape(1);
+
+ auto data_rank = data_shape.rank();
+ auto indices_rank = indices_shape.rank();
+ auto output_rank = data_rank + indices_rank - 1;
+
+ assert(_axis >= -data_rank && _axis < data_rank);
+ int32_t axis = _axis < 0 ? _axis + data_rank : _axis;
+
+ Shape output_shape;
+ output_shape.resize(output_rank);
+
+ // Output shape is data.shape[:axis] + indices.shape + data.shape[axis + 1:].
+ int32_t output_index = 0;
+ for (int32_t i = 0; i < axis; ++i)
+ output_shape.dim(output_index++) = data_shape.dim(i);
+ for (int32_t i = 0; i < indices_rank; ++i)
+ output_shape.dim(output_index++) = indices_shape.dim(i);
+ for (int32_t i = axis + 1; i < data_rank; ++i)
+ output_shape.dim(output_index++) = data_shape.dim(i);
+
+ setOutputType(0, {getInput(0)->getElementType(), output_shape});
+}
+
+} // namespace ops
+} // namespace mir
diff --git a/compiler/mir/src/ops/MaxPool2DOp.cpp b/compiler/mir/src/ops/MaxPool2DOp.cpp
new file mode 100644
index 000000000..38e72424e
--- /dev/null
+++ b/compiler/mir/src/ops/MaxPool2DOp.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/ops/MaxPool2DOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+void MaxPool2DOp::inferOutputTypes()
+{
+ const auto &input_shape = getInputShape(0);
+ const int batch_dim_index = getDataBatchDimIndex(_attributes.data_format);
+ const int channel_dim_index = getDataChannelDimIndex(_attributes.data_format);
+
+ constexpr int num_spatial_dims = 2;
+
+ assert(input_shape.rank() == 4);
+ assert(_attributes.window.size() == num_spatial_dims);
+ assert(_attributes.strides.size() == num_spatial_dims);
+ assert(_attributes.padding_before.size() == num_spatial_dims);
+ assert(_attributes.padding_after.size() == num_spatial_dims);
+
+ Shape output_shape(4);
+
+ output_shape.dim(batch_dim_index) = input_shape.dim(batch_dim_index);
+ output_shape.dim(channel_dim_index) = input_shape.dim(channel_dim_index);
+
+ for (int i = 0; i < num_spatial_dims; i++)
+ {
+ const int spatial_dim_index = getDataSpatialDimIndex(_attributes.data_format, i);
+ const std::int32_t padded_input = input_shape.dim(spatial_dim_index) +
+ _attributes.padding_before.at(i) +
+ _attributes.padding_after.at(i);
+ // out_size = ceil((in_size - window_size + 1) / stride) =
+ // (in_size - window_size + 1 + stride - 1) / stride =
+ // (in_size - window_size) / stride + 1
+ output_shape.dim(spatial_dim_index) =
+ (padded_input - _attributes.window[i]) / _attributes.strides[i] + 1;
+ }
+
+ setOutputType(0, {getInput(0)->getElementType(), output_shape});
+}
+
+} // namespace ops
+} // namespace mir
diff --git a/compiler/mir/src/ops/PadOp.cpp b/compiler/mir/src/ops/PadOp.cpp
new file mode 100644
index 000000000..465856d92
--- /dev/null
+++ b/compiler/mir/src/ops/PadOp.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/ops/PadOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+void PadOp::inferOutputTypes()
+{
+ const Shape &input_shape = getInputShape(0);
+ int32_t num_dims = input_shape.rank();
+
+ Shape out_shape(num_dims);
+ for (int32_t dim = 0; dim < num_dims; ++dim)
+ {
+ out_shape.dim(dim) =
+ _attributes.padding_before[dim] + input_shape.dim(dim) + _attributes.padding_after[dim];
+ }
+
+ setOutputType(0, {getInput(0)->getElementType(), out_shape});
+}
+
+} // namespace ops
+} // namespace mir
diff --git a/compiler/mir/src/ops/ReduceOp.cpp b/compiler/mir/src/ops/ReduceOp.cpp
new file mode 100644
index 000000000..ac4c322e1
--- /dev/null
+++ b/compiler/mir/src/ops/ReduceOp.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/ops/ReduceMeanOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+void ReduceOp::inferOutputTypes()
+{
+ const auto &input_shape = getInputShape(0);
+ const auto &reduction_dims = getReductionDims();
+ Shape output_shape;
+
+ if (getKeepDims())
+ {
+ output_shape = input_shape;
+ for (const int dim : reduction_dims)
+ {
+ output_shape.dim(dim) = 1;
+ }
+ }
+ else
+ {
+ // This mask contains 'true' for dimension indices that should be reduced.
+ // for example, if we want to reduce 1 and 3 dimensions with total number of dimensions 4,
+ // the mask will contain: [false, true, false, true].
+ std::vector<bool> reduction_dims_mask(input_shape.rank(), false);
+ for (auto axis : reduction_dims)
+ reduction_dims_mask[axis] = true;
+
+ std::vector<std::int32_t> out_dims;
+ out_dims.reserve(input_shape.rank() - reduction_dims.size());
+ for (int axis_id = 0; axis_id < input_shape.rank(); axis_id++)
+ {
+ if (!reduction_dims_mask[axis_id])
+ out_dims.emplace_back(input_shape.dim(axis_id));
+ }
+ output_shape = Shape(out_dims);
+ }
+
+ setOutputType(0, {getInput(0)->getElementType(), output_shape});
+}
+
+} // namespace ops
+} // namespace mir
diff --git a/compiler/mir/src/ops/SliceOp.cpp b/compiler/mir/src/ops/SliceOp.cpp
new file mode 100644
index 000000000..6e65be70a
--- /dev/null
+++ b/compiler/mir/src/ops/SliceOp.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/ops/SliceOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+// Only supports 4d inputs
+void SliceOp::inferOutputTypes()
+{
+ const Shape &input_shape = getInputShape(0);
+ assert(input_shape.rank() <= 4 && "Support only 4D tensors or smaller");
+ Shape output_shape(input_shape.rank());
+ for (int i = 0; i < input_shape.rank(); i++)
+ {
+ if (_sizes.dim(i) == -1)
+ {
+ output_shape.dim(i) = input_shape.dim(i) - _starts.dim(i);
+ }
+ else
+ {
+ output_shape.dim(i) = _sizes.dim(i);
+ }
+ }
+ setOutputType(0, {getInput(0)->getElementType(), output_shape});
+}
+
+} // namespace ops
+} // namespace mir
diff --git a/compiler/mir/src/ops/SqueezeOp.cpp b/compiler/mir/src/ops/SqueezeOp.cpp
new file mode 100644
index 000000000..63f7d43f3
--- /dev/null
+++ b/compiler/mir/src/ops/SqueezeOp.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/ops/SqueezeOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+void SqueezeOp::inferOutputTypes()
+{
+ assert(getNumInputs() == 1);
+
+ const auto &input_shape = getInputShape(0);
+ auto dt = getInput(0)->getElementType();
+ int32_t input_rank = input_shape.rank();
+
+ std::vector<int32_t> dims_to_squeeze;
+
+ if (getNumSqueezeDims() == 0)
+ {
+ for (int32_t i = 0; i < input_rank; ++i)
+ {
+ if (input_shape.dim(i) == 1)
+ {
+ dims_to_squeeze.push_back(i);
+ }
+ }
+ }
+ else
+ {
+ dims_to_squeeze = getDimsToSqueeze();
+ sort(dims_to_squeeze.begin(), dims_to_squeeze.end());
+ dims_to_squeeze.erase(unique(dims_to_squeeze.begin(), dims_to_squeeze.end()),
+ dims_to_squeeze.end());
+ }
+
+ if (dims_to_squeeze.size() == static_cast<size_t>(input_rank))
+ {
+ // Input shape have 1s in all dimensions, output shape is (1,)
+ setOutputType(0, {dt, Shape{1}});
+ return;
+ }
+
+ int32_t output_rank = 0;
+ size_t squeezing_idx = 0;
+ Shape output_shape(input_rank - dims_to_squeeze.size());
+ for (int32_t i = 0; i < input_rank; ++i)
+ {
+ if (squeezing_idx < dims_to_squeeze.size() && i == dims_to_squeeze[squeezing_idx])
+ {
+ if (input_shape.dim(i) != 1)
+ throw std::invalid_argument("All squeezed dimensions should have size 1");
+
+ squeezing_idx++;
+ }
+ else
+ {
+ output_shape.dim(output_rank++) = input_shape.dim(i);
+ }
+ }
+
+ setOutputType(0, {dt, output_shape});
+}
+
+} // namespace ops
+} // namespace mir
diff --git a/compiler/mir/src/ops/TransposeOp.cpp b/compiler/mir/src/ops/TransposeOp.cpp
new file mode 100644
index 000000000..92282e17d
--- /dev/null
+++ b/compiler/mir/src/ops/TransposeOp.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/ops/TransposeOp.h"
+
+namespace mir
+{
+namespace ops
+{
+
+TransposeOp::TransposeOp(Output *arg, const std::vector<std::size_t> &axis_order)
+ : Operation(Type::transpose, {arg}), _axis_order(axis_order)
+{
+ assert(_axis_order.size() == static_cast<std::size_t>(getInputShape(0).rank()));
+ inferOutputTypes();
+}
+
+void TransposeOp::inferOutputTypes()
+{
+ auto &input_shape = getInputShape(0);
+ Shape output_shape(input_shape.rank());
+ for (std::size_t i = 0; i < _axis_order.size(); ++i)
+ output_shape.dim(static_cast<std::int64_t>(i)) =
+ input_shape.dim(static_cast<int32_t>(_axis_order.at(i)));
+
+ setOutputType(0, {getInput(0)->getElementType(), output_shape});
+}
+
+} // namespace ops
+} // namespace mir
diff --git a/compiler/mir/unittests/CMakeLists.txt b/compiler/mir/unittests/CMakeLists.txt
new file mode 100644
index 000000000..4844eba3a
--- /dev/null
+++ b/compiler/mir/unittests/CMakeLists.txt
@@ -0,0 +1,16 @@
+set(MIR_TEST_SOURCES
+ Operation.cpp
+ Index.cpp
+ ShapeInference.cpp
+ ShapeRange.cpp
+ TensorVariant.cpp
+ NodeReplacer.cpp)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(mir_test ${MIR_TEST_SOURCES})
+target_link_libraries(mir_test mir)
diff --git a/compiler/mir/unittests/Index.cpp b/compiler/mir/unittests/Index.cpp
new file mode 100644
index 000000000..e2edf40ef
--- /dev/null
+++ b/compiler/mir/unittests/Index.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "mir/Shape.h"
+#include "mir/Index.h"
+
+using namespace mir;
+
+TEST(Shape, Base)
+{
+ Shape s1{3, 2};
+ ASSERT_EQ(s1.rank(), 2);
+ ASSERT_EQ(s1.dim(0), 3);
+ ASSERT_EQ(s1.dim(1), 2);
+ ASSERT_EQ(s1.dim(-1), 2);
+ ASSERT_EQ(s1.dim(-2), 3);
+ ASSERT_EQ(s1.numElements(), 6);
+
+ s1.dim(1) = 4;
+ ASSERT_EQ(s1.dim(1), 4);
+ ASSERT_EQ(s1.numElements(), 12);
+
+ Shape s2 = s1;
+ ASSERT_EQ(s1, s2);
+
+ s2.resize(1);
+ ASSERT_NE(s1, s2);
+
+ s2.resize(2);
+ s2.dim(1) = s1.dim(1);
+ ASSERT_EQ(s1, s2);
+}
+
+TEST(Index, Base)
+{
+ Index idx{3, 2};
+ ASSERT_EQ(idx.rank(), 2);
+ ASSERT_EQ(idx.at(0), 3);
+ ASSERT_EQ(idx.at(1), 2);
+ ASSERT_EQ(idx.at(-1), 2);
+ ASSERT_EQ(idx.at(-2), 3);
+
+ idx.at(1) = 4;
+ ASSERT_EQ(idx.at(1), 4);
+
+ idx.resize(1);
+ ASSERT_EQ(idx.rank(), 1);
+}
diff --git a/compiler/mir/unittests/NodeReplacer.cpp b/compiler/mir/unittests/NodeReplacer.cpp
new file mode 100644
index 000000000..2aa0481c6
--- /dev/null
+++ b/compiler/mir/unittests/NodeReplacer.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "mir/Graph.h"
+#include "mir/Visitor.h"
+#include "mir/ops/ConcatOp.h"
+#include "mir/ops/InputOp.h"
+#include "mir/ops/ReluOp.h"
+
+namespace
+{
+
+using namespace mir;
+
+class DumpVisitor : public Visitor
+{
+public:
+ DumpVisitor(std::ostream &s) : _s(s) {}
+
+ void visit(ops::InputOp &op) override { _s << "i" << std::to_string(op.getId()); };
+
+ void visit(ops::ReluOp &op) override { _s << "r" << std::to_string(op.getId()); }
+
+ void visit(ops::ConcatOp &op) override { _s << "c" << std::to_string(op.getId()); }
+
+ std::ostream &_s;
+};
+
+TEST(NodeMutatorTest, SimpleChainTest)
+{
+ auto g = new Graph;
+ mir::TensorType input_type{mir::DataType::FLOAT32, Shape{}};
+ auto n1 = g->create<ops::InputOp>(input_type);
+ auto n2 = g->create<ops::ReluOp>(n1->getOutput(0));
+ auto n3 = g->create<ops::ReluOp>(n2->getOutput(0));
+ auto n4 = g->create<ops::ReluOp>(n2->getOutput(0));
+ auto n5 = g->create<ops::ReluOp>(n1->getOutput(0));
+
+ g->replaceNode(n2, n5);
+
+ std::stringstream ss;
+ DumpVisitor d(ss);
+ g->accept(&d);
+
+ auto str = ss.str();
+ ASSERT_TRUE(str == "i0r4r2r3" || str == "i0r4r3r2") << "str = " << str;
+ delete g;
+}
+
+} // namespace
diff --git a/compiler/mir/unittests/Operation.cpp b/compiler/mir/unittests/Operation.cpp
new file mode 100644
index 000000000..132f84696
--- /dev/null
+++ b/compiler/mir/unittests/Operation.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/Operation.h"
+#include "mir/ops/ConcatOp.h"
+#include "mir/ops/InputOp.h"
+#include "mir/ops/ReshapeOp.h"
+#include "mir/ops/SoftmaxOp.h"
+
+#include <gtest/gtest.h>
+
+using namespace mir;
+
+TEST(Operation, ConnectionTest)
+{
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, Shape{}};
+ auto op1 = new ops::InputOp(input_type);
+ op1->setId(0);
+ auto op2 = new ops::ReshapeOp(op1->getOutput(0), Shape{});
+ op2->setId(1);
+
+ ASSERT_EQ(op1, op2->getInput(0)->getNode());
+
+ delete op1;
+ delete op2;
+}
+
+TEST(Operation, InputOutputShapeTest)
+{
+ Shape input_shape{1, 2, 3};
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, input_shape};
+ ops::InputOp input(input_type);
+ ops::SoftmaxOp op(input.getOutput(0), 0);
+
+ ASSERT_EQ(input_shape, input.getOutputShape(0));
+ ASSERT_EQ(input_shape, op.getInputShape(0));
+}
+
+TEST(Operation, SoftmaxAxisTest)
+{
+ Shape input_shape{1, 2, 3};
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, input_shape};
+ ops::InputOp input(input_type);
+
+ ops::SoftmaxOp op_1(input.getOutput(0), 1);
+ ASSERT_EQ(op_1.getAxis(), 1);
+
+ ops::SoftmaxOp op_n1(input.getOutput(0), -1);
+ ASSERT_EQ(op_n1.getAxis(), 2);
+
+ ops::SoftmaxOp op_n3(input.getOutput(0), -3);
+ ASSERT_EQ(op_n3.getAxis(), 0);
+}
+
+TEST(Operation, ConcatAxisTest)
+{
+ Shape in_shape{1, 2, 3};
+
+ mir::TensorType in_type{mir::DataType::FLOAT32, in_shape};
+ ops::InputOp input1(in_type), input2(in_type);
+
+ ops::ConcatOp op_1({input1.getOutput(0), input2.getOutput(0)}, 1);
+ ASSERT_EQ(op_1.getAxis(), 1);
+
+ ops::ConcatOp op_n1({input1.getOutput(0), input2.getOutput(0)}, -1);
+ ASSERT_EQ(op_n1.getAxis(), 2);
+
+ ops::ConcatOp op_n3({input1.getOutput(0), input2.getOutput(0)}, -3);
+ ASSERT_EQ(op_n3.getAxis(), 0);
+}
+
+TEST(Operation, OpNameTest)
+{
+#define HANDLE_OP(OpType, OpClass) ASSERT_EQ(getTypeName(Operation::Type::OpType), #OpType);
+#include "mir/Operations.inc"
+#undef HANDLE_OP
+}
diff --git a/compiler/mir/unittests/ShapeInference.cpp b/compiler/mir/unittests/ShapeInference.cpp
new file mode 100644
index 000000000..bae4ec5e2
--- /dev/null
+++ b/compiler/mir/unittests/ShapeInference.cpp
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/Graph.h"
+#include "mir/ops/AddOp.h"
+#include "mir/ops/ReshapeOp.h"
+#include "mir/ops/ResizeOp.h"
+#include "mir/ops/SqueezeOp.h"
+#include "mir/ops/ReduceMeanOp.h"
+#include "mir/Shape.h"
+
+#include <vector>
+
+#include "gtest/gtest.h"
+
+using namespace mir;
+
+TEST(ShapeInferenceTest, BidirectionalBroadcast)
+{
+ const Shape shape1{2, 1, 2};
+ const Shape shape2{3, 1};
+ const Shape reference{2, 3, 2};
+
+ const Shape result1 = broadcastShapes(shape1, shape2);
+ const Shape result2 = broadcastShapes(shape2, shape1);
+
+ ASSERT_EQ(result1, reference);
+ ASSERT_EQ(result2, reference);
+}
+
+TEST(ShapeInferenceTest, ReshapeAutoDimension)
+{
+ Graph g;
+
+ Shape input_shape{10, 2, 5};
+ Shape expected_shape{10, 1, 10};
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, input_shape};
+ auto input = g.create<ops::InputOp>(input_type);
+ auto op = g.create<ops::ReshapeOp>(input->getOutput(0), Shape{10, 1, Shape::autoDim});
+
+ ASSERT_EQ(expected_shape, op->getOutputShape(0));
+}
+
+TEST(ShapeInferenceTest, ResizeWithShape)
+{
+ Graph g;
+
+ Shape result_shape{2, 10, 10, 3};
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, Shape{1, 5, 5, 3}};
+ auto input = g.create<ops::InputOp>(input_type);
+
+ auto op = g.create<ops::ResizeOp>(input->getOutput(0),
+ ops::ResizeOp::ResizeMethod::nearestNeighbor, result_shape);
+
+ ASSERT_EQ(result_shape, op->getOutputShape(0));
+}
+
+TEST(ShapeInferenceTest, ResizeWithScale)
+{
+ Graph g;
+
+ Shape result_shape{1, 30, 10, 3};
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, Shape{1, 5, 5, 3}};
+ auto input = g.create<ops::InputOp>(input_type);
+
+ auto op =
+ g.create<ops::ResizeOp>(input->getOutput(0), ops::ResizeOp::ResizeMethod::nearestNeighbor,
+ std::vector<float>{1, 6, 2, 1});
+
+ ASSERT_EQ(result_shape, op->getOutputShape(0));
+}
+
+TEST(ShapeInferenceTest, ReduceChangeRank)
+{
+ Graph g;
+
+ Shape resultShape{10, 10};
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, Shape{10, 2, 10, 9}};
+ auto input = g.create<ops::InputOp>(input_type);
+
+ auto n = g.create<ops::ReduceMeanOp>(input->getOutput(0), std::vector<int32_t>{1, 3}, false);
+
+ ASSERT_EQ(resultShape, n->getOutputShape(0));
+}
+
+TEST(ShapeInferenceTest, ReshapeAutoDimensionShrink)
+{
+ Graph g;
+
+ Shape input_shape{10, 2, 10};
+ Shape result_shape_shrink{10, 20};
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, input_shape};
+ auto input = g.create<ops::InputOp>(input_type);
+ auto op = g.create<ops::ReshapeOp>(input->getOutput(0), Shape{10, Shape::autoDim});
+
+ ASSERT_EQ(result_shape_shrink, op->getOutputShape(0));
+}
+
+TEST(ShapeInferenceTest, ReshapeAutoDimensionExpand)
+{
+ Graph g;
+
+ Shape input_shape{10, 2, 10};
+ Shape result_shape_expand{5, 10, 2, 2};
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, input_shape};
+ auto input = g.create<ops::InputOp>(input_type);
+ auto op = g.create<ops::ReshapeOp>(input->getOutput(0), Shape{5, Shape::autoDim, 2, 2});
+
+ ASSERT_EQ(result_shape_expand, op->getOutputShape(0));
+}
+
+TEST(ShapeInferenceTest, ReshapeAutoDimensionUnsqueeze)
+{
+ Graph g;
+
+ Shape input_shape{10, 2, 10};
+ Shape result_shape_expand{1, 10, 2, 1, 10, 1};
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, input_shape};
+ auto input = g.create<ops::InputOp>(input_type);
+ auto op = g.create<ops::ReshapeOp>(input->getOutput(0), Shape{1, Shape::autoDim, 2, 1, 10, 1});
+
+ ASSERT_EQ(result_shape_expand, op->getOutputShape(0));
+}
+
+TEST(ShapeInferenceTest, SqueezeTestAllDims)
+{
+ Graph g;
+
+ Shape input_shape{1, 2, 1, 4};
+ Shape expected_shape{2, 4};
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, input_shape};
+ auto input = g.create<ops::InputOp>(input_type);
+ auto sq1 = g.create<ops::SqueezeOp>(input->getOutput(0), std::vector<int32_t>{});
+
+ ASSERT_EQ(sq1->getOutputShape(0), expected_shape);
+}
+
+TEST(ShapeInferenceTest, ElementwiseBC)
+{
+ Graph g;
+
+ Shape input_shape{1, 10, 10, 1};
+ Shape input2_shape{1, 1, 10, 10};
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, input_shape};
+ mir::TensorType input2_type{mir::DataType::FLOAT32, input2_shape};
+
+ auto input = g.create<ops::InputOp>(input_type);
+ auto input2 = g.create<ops::InputOp>(input2_type);
+
+ auto add = g.create<ops::AddOp>(input->getOutput(0), input2->getOutput(0));
+
+ ASSERT_EQ(add->getOutputShape(0), Shape({1, 10, 10, 10}));
+}
+
+TEST(ShapeInferenceTest, SqueezeTestSpecificDims)
+{
+ Graph g;
+
+ Shape input_shape{1, 2, 1, 4};
+ Shape expected_shape{1, 2, 4};
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, input_shape};
+ auto input = g.create<ops::InputOp>(input_type);
+ auto sq1 = g.create<ops::SqueezeOp>(input->getOutput(0), std::vector<int32_t>{2});
+
+ ASSERT_EQ(sq1->getOutputShape(0), expected_shape);
+}
+
+TEST(ShapeInferenceTest, SqueezeTestScalarResult)
+{
+ Graph g;
+
+ Shape input_shape{1, 1, 1, 1};
+ Shape expected_shape{1};
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, input_shape};
+ auto input = g.create<ops::InputOp>(input_type);
+ auto sq1 = g.create<ops::SqueezeOp>(input->getOutput(0), std::vector<int32_t>{});
+
+ ASSERT_EQ(sq1->getOutputShape(0), expected_shape);
+}
diff --git a/compiler/mir/unittests/ShapeRange.cpp b/compiler/mir/unittests/ShapeRange.cpp
new file mode 100644
index 000000000..3b32d0c61
--- /dev/null
+++ b/compiler/mir/unittests/ShapeRange.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+#include "mir/ShapeRange.h"
+
+using namespace mir;
+
+namespace
+{
+
+struct ParamType
+{
+ int32_t actual_length;
+ Shape shape;
+
+ template <typename... Args>
+ explicit ParamType(int32_t actual_len, Args &&... args)
+ : actual_length(actual_len), shape({static_cast<int32_t>(args)...})
+ {
+ }
+};
+
+class ShapeIteratorTest : public ::testing::TestWithParam<ParamType>
+{
+};
+
+TEST_P(ShapeIteratorTest, ElementCount)
+{
+ Shape sh(GetParam().shape);
+ ShapeRange r(sh);
+
+ int32_t cnt = 0;
+ for (auto &idx : r)
+ {
+ (void)idx;
+ cnt++;
+ }
+
+ ASSERT_EQ(cnt, GetParam().actual_length);
+}
+
+std::vector<ParamType> test_data{ParamType{6, 1, 2, 3}, ParamType{16, 2, 2, 4},
+ ParamType{1, 1, 1, 1, 1, 1}, ParamType{5, 5, 1, 1, 1, 1, 1}};
+
+INSTANTIATE_TEST_CASE_P(SimpleInput, ShapeIteratorTest, ::testing::ValuesIn(test_data));
+
+TEST(ShapeRange, Contains)
+{
+ const int h = 2;
+ const int w = 3;
+ Shape shape{static_cast<int32_t>(h), static_cast<int32_t>(w)};
+ ShapeRange range(shape);
+ Index index{0, 0, 0, 0};
+ for (int32_t row = -2; row < h + 1; ++row)
+ for (int32_t col = -2; col < w + 1; ++col)
+ {
+ Index idx{row, col};
+ if (row < 0 || row >= h || col < 0 || col >= w)
+ ASSERT_FALSE(range.contains(idx));
+ else
+ ASSERT_TRUE(range.contains(idx));
+ }
+}
+} // namespace
diff --git a/compiler/mir/unittests/TensorVariant.cpp b/compiler/mir/unittests/TensorVariant.cpp
new file mode 100644
index 000000000..745885127
--- /dev/null
+++ b/compiler/mir/unittests/TensorVariant.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/TensorVariant.h"
+
+#include <gtest/gtest.h>
+
+using namespace mir;
+
+TEST(TensorVariant, BasicTest)
+{
+ Shape shape{2, 2};
+ TensorVariant t(DataType::FLOAT32, shape);
+
+ ASSERT_EQ(t.getShape(), shape);
+ ASSERT_EQ(t.getOffset({0, 0}), 0u);
+}
+
+TEST(TensorVariant, ElementSizeDeductionTest)
+{
+ Shape shape{2, 2, 2};
+ TensorVariant t(DataType::FLOAT32, shape);
+
+ ASSERT_EQ(t.getElementSize(), sizeof(float));
+ ASSERT_EQ((float *)t.at({1, 1, 1}), (float *)t.at({0, 0, 0}) + 7);
+}
diff --git a/compiler/mir2loco/CMakeLists.txt b/compiler/mir2loco/CMakeLists.txt
new file mode 100644
index 000000000..49bf3dbde
--- /dev/null
+++ b/compiler/mir2loco/CMakeLists.txt
@@ -0,0 +1,19 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(mir2loco STATIC ${SOURCES})
+target_include_directories(mir2loco PRIVATE src)
+target_include_directories(mir2loco PUBLIC include)
+target_link_libraries(mir2loco PUBLIC mir)
+target_link_libraries(mir2loco PUBLIC loco)
+target_link_libraries(mir2loco PRIVATE stdex)
+
+nnas_find_package(GTest QUIET)
+
+if(NOT GTest_FOUND)
+ return()
+endif(NOT GTest_FOUND)
+
+GTest_AddTest(mir2loco_test ${TESTS})
+target_link_libraries(mir2loco_test mir2loco)
diff --git a/compiler/mir2loco/include/mir2loco.h b/compiler/mir2loco/include/mir2loco.h
new file mode 100644
index 000000000..54dcbf8a6
--- /dev/null
+++ b/compiler/mir2loco/include/mir2loco.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir/Graph.h"
+#include "mir/Visitor.h"
+#include "loco.h"
+
+namespace mir2loco
+{
+
+class Transformer final : public mir::Visitor
+{
+public:
+ Transformer() = default;
+ ~Transformer() = default;
+
+ void visit(mir::ops::AddOp &op) override;
+ void visit(mir::ops::AvgPool2DOp &op) override;
+ void visit(mir::ops::ConcatOp &op) override;
+ void visit(mir::ops::ConstantOp &op) override;
+ void visit(mir::ops::Conv2DOp &op) override;
+ void visit(mir::ops::DeConv2DOp &op) override;
+ void visit(mir::ops::DepthwiseConv2DOp &op) override;
+ void visit(mir::ops::DivOp &op) override;
+ void visit(mir::ops::FullyConnectedOp &op) override;
+ void visit(mir::ops::InputOp &op) override;
+ void visit(mir::ops::MaxPool2DOp &op) override;
+ void visit(mir::ops::MulOp &op) override;
+ void visit(mir::ops::OutputOp &op) override;
+ void visit(mir::ops::ReluOp &op) override;
+ void visit(mir::ops::ReshapeOp &op) override;
+ void visit(mir::ops::SoftmaxOp &op) override;
+ void visit(mir::ops::SubOp &op) override;
+ void visit(mir::ops::TransposeOp &op) override;
+
+ void visit_fallback(mir::Operation &op) override;
+
+ std::unique_ptr<loco::Graph> transform(mir::Graph *mir_graph);
+
+private:
+ std::unique_ptr<loco::Graph> _loco_graph;
+ std::unordered_map<mir::Operation::Output *, loco::Node *> _mir2loco_map;
+};
+
+} // namespace mir2loco
diff --git a/compiler/mir2loco/requires.cmake b/compiler/mir2loco/requires.cmake
new file mode 100644
index 000000000..36482214d
--- /dev/null
+++ b/compiler/mir2loco/requires.cmake
@@ -0,0 +1,2 @@
+require("loco")
+require("mir")
diff --git a/compiler/mir2loco/src/mir2loco.cpp b/compiler/mir2loco/src/mir2loco.cpp
new file mode 100644
index 000000000..fc1f6933b
--- /dev/null
+++ b/compiler/mir2loco/src/mir2loco.cpp
@@ -0,0 +1,725 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir2loco.h"
+
+#include "mir/ops/AddOp.h"
+#include "mir/ops/AvgPool2DOp.h"
+#include "mir/ops/ConcatOp.h"
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/Conv2DOp.h"
+#include "mir/ops/Deconv2DOp.h"
+#include "mir/ops/DepthwiseConv2DOp.h"
+#include "mir/ops/DivOp.h"
+#include "mir/ops/FullyConnectedOp.h"
+#include "mir/ops/MaxPool2DOp.h"
+#include "mir/ops/MulOp.h"
+#include "mir/ops/ReluOp.h"
+#include "mir/ops/ReshapeOp.h"
+#include "mir/ops/SoftmaxOp.h"
+#include "mir/ops/SubOp.h"
+#include "mir/ops/TransposeOp.h"
+
+#include "mir/ShapeRange.h"
+
+#include <cassert>
+#include <cstring>
+#include <stdex/Memory.h>
+
+namespace mir2loco
+{
+namespace
+{
+template <class NodeType> void setupShape(const mir::Shape &shape, NodeType *node)
+{
+ node->rank(shape.rank());
+ for (int32_t i = 0; i < shape.rank(); i++)
+ {
+ node->dim(i) = static_cast<uint32_t>(shape.dim(i));
+ }
+}
+
+std::unique_ptr<loco::TensorShape> make_tensor_shape(const mir::Shape &shape)
+{
+ auto res = stdex::make_unique<loco::TensorShape>();
+ setupShape(shape, res.get());
+ return std::move(res);
+}
+
+void setupPad(const std::vector<std::int32_t> &padding_before,
+ const std::vector<std::int32_t> &padding_after, loco::Padding2D *pad)
+{
+ assert(padding_before.size() == 2 && padding_after.size() == 2);
+ pad->top(padding_before[0]);
+ pad->left(padding_before[1]);
+ pad->bottom(padding_after[0]);
+ pad->right(padding_after[1]);
+}
+
+void setupWindow(const std::vector<std::int32_t> &window_size, loco::Window<2> *window)
+{
+ assert(window_size.size() == 2);
+ window->vertical(window_size[0]);
+ window->horizontal(window_size[1]);
+}
+
+void setupStride(const std::vector<std::int32_t> &strides, loco::Stride<2> *stride)
+{
+ assert(strides.size() == 2);
+ stride->vertical(strides[0]);
+ stride->horizontal(strides[1]);
+}
+
+loco::Permutation<loco::Domain::Feature> createFeaturePermutation(mir::DataFormat format)
+{
+ loco::Permutation<loco::Domain::Feature> perm;
+ if (format == mir::DataFormat::NHWC)
+ {
+ perm.axis(loco::FeatureAxis::Count) = 0;
+ perm.axis(loco::FeatureAxis::Height) = 1;
+ perm.axis(loco::FeatureAxis::Width) = 2;
+ perm.axis(loco::FeatureAxis::Depth) = 3;
+ }
+ else
+ {
+ assert(format == mir::DataFormat::NCHW);
+ perm.axis(loco::FeatureAxis::Count) = 0;
+ perm.axis(loco::FeatureAxis::Depth) = 1;
+ perm.axis(loco::FeatureAxis::Height) = 2;
+ perm.axis(loco::FeatureAxis::Width) = 3;
+ }
+ return perm;
+}
+
+std::unique_ptr<loco::FeatureEncoder> createFeatureEncoder(mir::DataFormat data_format)
+{
+ auto perm = createFeaturePermutation(data_format);
+ return stdex::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>(perm);
+}
+
+std::unique_ptr<loco::FeatureDecoder> createFeatureDecoder(mir::DataFormat data_format)
+{
+ auto perm = createFeaturePermutation(data_format);
+ return stdex::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>(perm);
+}
+
+std::unique_ptr<loco::FilterEncoder> createOHWIFilterEncoder()
+{
+ loco::Permutation<loco::Domain::Filter> perm;
+ perm.axis(loco::FilterAxis::Count) = 0;
+ perm.axis(loco::FilterAxis::Height) = 1;
+ perm.axis(loco::FilterAxis::Width) = 2;
+ perm.axis(loco::FilterAxis::Depth) = 3;
+ return stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>(perm);
+}
+
+std::unique_ptr<loco::FilterEncoder> createHWOIFilterEncoder()
+{
+ loco::Permutation<loco::Domain::Filter> perm;
+ perm.axis(loco::FilterAxis::Height) = 0;
+ perm.axis(loco::FilterAxis::Width) = 1;
+ perm.axis(loco::FilterAxis::Count) = 2;
+ perm.axis(loco::FilterAxis::Depth) = 3;
+ return stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>(perm);
+}
+
+std::unique_ptr<loco::DepthwiseFilterEncoder> createHWIMDepthwiseFilterEncoder()
+{
+ loco::Permutation<loco::Domain::DepthwiseFilter> perm;
+ perm.axis(loco::DepthwiseFilterAxis::Height) = 0;
+ perm.axis(loco::DepthwiseFilterAxis::Width) = 1;
+ perm.axis(loco::DepthwiseFilterAxis::Depth) = 2;
+ perm.axis(loco::DepthwiseFilterAxis::Multiplier) = 3;
+ return stdex::make_unique<loco::PermutingEncoder<loco::Domain::DepthwiseFilter>>(perm);
+}
+
+std::unique_ptr<loco::DepthwiseFilterEncoder> createIHWMDepthwiseFilterEncoder()
+{
+ loco::Permutation<loco::Domain::DepthwiseFilter> perm;
+ perm.axis(loco::DepthwiseFilterAxis::Depth) = 0;
+ perm.axis(loco::DepthwiseFilterAxis::Height) = 1;
+ perm.axis(loco::DepthwiseFilterAxis::Width) = 2;
+ perm.axis(loco::DepthwiseFilterAxis::Multiplier) = 3;
+ return stdex::make_unique<loco::PermutingEncoder<loco::Domain::DepthwiseFilter>>(perm);
+}
+
+std::unique_ptr<loco::MatrixEncoder> createHWMatrixEncoder()
+{
+ loco::Permutation<loco::Domain::Matrix> perm;
+ perm.axis(loco::MatrixAxis::Height) = 0;
+ perm.axis(loco::MatrixAxis::Width) = 1;
+ return stdex::make_unique<loco::PermutingEncoder<loco::Domain::Matrix>>(perm);
+}
+
+std::unique_ptr<loco::MatrixDecoder> createHWMatrixDecoder()
+{
+ loco::Permutation<loco::Domain::Matrix> perm;
+ perm.axis(loco::MatrixAxis::Height) = 0;
+ perm.axis(loco::MatrixAxis::Width) = 1;
+ return stdex::make_unique<loco::PermutingDecoder<loco::Domain::Matrix>>(perm);
+}
+
+loco::DataType convertDataType(mir::DataType data_type)
+{
+ switch (data_type)
+ {
+ case mir::DataType::UNKNOWN:
+ return loco::DataType::Unknown;
+ case mir::DataType::FLOAT32:
+ return loco::DataType::FLOAT32;
+ case mir::DataType::FLOAT64:
+ return loco::DataType::FLOAT64;
+ case mir::DataType::INT32:
+ return loco::DataType::S32;
+ case mir::DataType::INT64:
+ return loco::DataType::S64;
+ default:
+ break;
+ }
+ throw std::runtime_error("Unsupported data type");
+}
+
+loco::Node *createBroadcastIfNeeded(loco::Node *node, const mir::Shape &shape,
+ const mir::Shape &out_shape)
+{
+ auto graph = node->graph();
+
+ if (shape == out_shape)
+ return node; // not needed
+
+ int32_t out_rank = out_shape.rank();
+ int32_t rank_diff = out_rank - shape.rank();
+ // Create Broadcast
+ auto *broadcast = graph->nodes()->create<loco::TensorBroadcast>();
+ // Create Reshape for equal ranks
+ if (shape.rank() != out_rank)
+ {
+ auto *reshape = graph->nodes()->create<loco::FixedReshape>();
+ reshape->input(node);
+ reshape->rank(out_rank);
+ broadcast->input(reshape);
+ // Set reshape dims
+ for (int32_t dim = 0; dim < out_rank; dim++)
+ {
+ if (dim < rank_diff)
+ reshape->dim(dim) = 1;
+ else
+ reshape->dim(dim) = shape.dim(dim - rank_diff);
+ }
+ }
+ else
+ {
+ broadcast->input(node);
+ }
+ // Flag if no one dim isn't equal
+ bool compatible_shapes = true;
+ for (int32_t dim = 0; dim < out_rank; dim++)
+ {
+ // Set broadcast mapping
+ if (dim < rank_diff || (shape.dim(dim - rank_diff) == 1 && out_shape.dim(dim) != 1))
+ broadcast->mapping()->dim(dim) = out_shape.dim(dim);
+ // Check compatibility
+ if (dim >= rank_diff && shape.dim(dim - rank_diff) != 1 &&
+ shape.dim(dim - rank_diff) != out_shape.dim(dim))
+ compatible_shapes = false;
+ }
+ // Check compatibility
+ if (!compatible_shapes)
+ throw std::runtime_error("Not compatible shapes for broadcasting!");
+
+ return broadcast;
+}
+
+template <typename NodeType>
+NodeType *createEltwiseBinary(const mir::ops::BinaryElementwiseOp &op, loco::Node *lhs,
+ loco::Node *rhs)
+{
+ auto graph = lhs->graph();
+
+ const auto &lhs_shape = op.getInput(0)->getShape();
+ const auto &rhs_shape = op.getInput(1)->getShape();
+ const auto &out_shape = op.getOutputShape(0);
+ // Create Broadcast if it's needed
+ auto lhs_node = createBroadcastIfNeeded(lhs, lhs_shape, out_shape);
+ auto rhs_node = createBroadcastIfNeeded(rhs, rhs_shape, out_shape);
+ // Create Node
+ auto result = graph->nodes()->create<NodeType>();
+ result->lhs(lhs_node);
+ result->rhs(rhs_node);
+ return result;
+}
+} // namespace
+
+void Transformer::visit(mir::ops::AddOp &op)
+{
+ // Get Input
+ auto lhs = _mir2loco_map.at(op.getInput(0));
+ auto rhs = _mir2loco_map.at(op.getInput(1));
+ auto result = createEltwiseBinary<loco::EltwiseAdd>(op, lhs, rhs);
+ // Not set Shape
+ // Add to map
+ _mir2loco_map.emplace(op.getOutput(0), result);
+}
+
+void Transformer::visit(mir::ops::AvgPool2DOp &op)
+{
+ loco::Node *input = _mir2loco_map.at(op.getInput(0));
+
+ auto *encoded_input = _loco_graph->nodes()->create<loco::FeatureEncode>();
+ encoded_input->input(input);
+ encoded_input->encoder(createFeatureEncoder(op.getDataFormat()));
+
+ auto *avg_pool_node = _loco_graph->nodes()->create<loco::AvgPool2D>();
+ avg_pool_node->ifm(encoded_input);
+ avg_pool_node->convention(op.getIncludePad() ? loco::AvgPool2D::Convention::Full
+ : loco::AvgPool2D::Convention::Valid);
+ setupWindow(op.getWindowSize(), avg_pool_node->window());
+ setupStride(op.getStrides(), avg_pool_node->stride());
+ setupPad(op.getPaddingBefore(), op.getPaddingAfter(), avg_pool_node->pad());
+
+ auto *output = _loco_graph->nodes()->create<loco::FeatureDecode>();
+ output->input(avg_pool_node);
+ output->decoder(createFeatureDecoder(op.getDataFormat()));
+
+ _mir2loco_map.emplace(op.getOutput(0), output);
+}
+
+void Transformer::visit(mir::ops::ConcatOp &op)
+{
+ if (op.getNumInputs() < 2)
+ throw std::runtime_error("Not enough tensors for concatenation!");
+
+ loco::Node *last_concat = nullptr;
+
+ for (std::size_t i = 1; i < op.getNumInputs(); i++)
+ {
+ loco::Node *lhs = last_concat;
+ if (lhs == nullptr)
+ {
+ mir::Operation::Output *mir_lhs = op.getInput(i - 1);
+ lhs = _mir2loco_map.at(mir_lhs);
+ }
+ mir::Operation::Output *mir_rhs = op.getInput(i);
+ loco::Node *rhs = _mir2loco_map.at(mir_rhs);
+ // Create TensorConcat
+ auto concat_node = _loco_graph->nodes()->create<loco::TensorConcat>();
+ // Set inputs
+ concat_node->lhs(lhs);
+ concat_node->rhs(rhs);
+ // Set axis
+ concat_node->axis(op.getAxis());
+ // Set last concat
+ last_concat = concat_node;
+ }
+ // Not set Shape
+ // Add to map
+ _mir2loco_map.emplace(op.getOutput(0), last_concat);
+}
+
+void Transformer::visit(mir::ops::ConstantOp &op)
+{
+ auto const_node = _loco_graph->nodes()->create<loco::ConstGen>();
+ // Not set Input
+ // Set Shape
+ const auto &out_shape = op.getOutputShape(0);
+ setupShape(out_shape, const_node);
+ // Copy value
+ const auto &value = op.getValue();
+ const_node->dtype(convertDataType(value.getElementType()));
+ // TODO Support other data types
+ switch (const_node->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ const_node->size<loco::DataType::FLOAT32>(out_shape.numElements());
+ float &const_float = const_node->at<loco::DataType::FLOAT32>(0);
+ char *loco_ptr = reinterpret_cast<char *>(&const_float);
+ char *mir_ptr = value.at(mir::Index(out_shape.rank()));
+ std::memcpy(loco_ptr, mir_ptr, out_shape.numElements() * sizeof(float));
+ break;
+ }
+ case loco::DataType::FLOAT64:
+ {
+ // TODO Change that when loco support other DataTypeImpl
+ const_node->dtype(loco::DataType::FLOAT32);
+ const_node->size<loco::DataType::FLOAT32>(out_shape.numElements());
+ float &const_float = const_node->at<loco::DataType::FLOAT32>(0);
+ char *mir_ptr = value.at(mir::Index(out_shape.rank()));
+ double *mir_double = reinterpret_cast<double *>(mir_ptr);
+ float *loco_float = &const_float;
+ for (const mir::Index &idx : mir::ShapeRange(out_shape))
+ {
+ *loco_float = static_cast<float>(*mir_double);
+ loco_float++;
+ mir_double++;
+ }
+ break;
+ }
+ case loco::DataType::S32:
+ {
+ const_node->size<loco::DataType::S32>(out_shape.numElements());
+ int32_t &const_int32 = const_node->at<loco::DataType::S32>(0);
+ char *loco_ptr = reinterpret_cast<char *>(&const_int32);
+ char *mir_ptr = value.at(mir::Index(out_shape.rank()));
+ std::memcpy(loco_ptr, mir_ptr, out_shape.numElements() * sizeof(int32_t));
+ break;
+ }
+ case loco::DataType::S64:
+ {
+ // TODO Change that when loco support other DataTypeImpl
+ const_node->dtype(loco::DataType::S32);
+ const_node->size<loco::DataType::S32>(out_shape.numElements());
+ int32_t &const_int32 = const_node->at<loco::DataType::S32>(0);
+ char *mir_ptr = value.at(mir::Index(out_shape.rank()));
+ int64_t *mir_int64 = reinterpret_cast<int64_t *>(mir_ptr);
+ int32_t *loco_int32 = &const_int32;
+ for (const mir::Index &idx : mir::ShapeRange(out_shape))
+ {
+ *loco_int32 = static_cast<float>(*mir_int64);
+ loco_int32++;
+ mir_int64++;
+ }
+ break;
+ }
+ default:
+ std::runtime_error("Unsupported data type");
+ }
+ // Add to map
+ _mir2loco_map.emplace(op.getOutput(0), const_node);
+}
+
+void Transformer::visit(mir::ops::Conv2DOp &op)
+{
+ mir::Operation::Output *mir_input = op.getInput(0);
+ mir::Operation::Output *mir_filter = op.getInput(1);
+
+ loco::Node *input = _mir2loco_map.at(mir_input);
+ loco::Node *filter = _mir2loco_map.at(mir_filter);
+
+ // loco does not have grouped Conv2D operation. Try to translate into something else.
+ if (op.getNumGroups() != 1)
+ {
+ const std::int32_t group_size = mir_filter->getShape().dim(3);
+ const std::int32_t num_in_channels = group_size * op.getNumGroups();
+ const std::int32_t num_out_channels = mir_filter->getShape().dim(0);
+
+ // If the size of the group is 1, translate the operation into DepthwiseConv2D. Limit ourselves
+ // with the case of 'multiplier' == 1 for now.
+ if (group_size == 1 && (num_out_channels == num_in_channels))
+ {
+ // [O, H, W, I / group] == [I, H, W, M].
+ auto *encoded_input = _loco_graph->nodes()->create<loco::FeatureEncode>();
+ encoded_input->input(input);
+ encoded_input->encoder(createFeatureEncoder(op.getDataFormat()));
+
+ auto *encoded_filter = _loco_graph->nodes()->create<loco::DepthwiseFilterEncode>();
+ encoded_filter->input(filter);
+ encoded_filter->encoder(createIHWMDepthwiseFilterEncoder());
+
+ auto *dw_conv2d_node = _loco_graph->nodes()->create<loco::DepthwiseConv2D>();
+ dw_conv2d_node->ifm(encoded_input);
+ dw_conv2d_node->ker(encoded_filter);
+ setupStride(op.getStrides(), dw_conv2d_node->stride());
+ setupPad(op.getPaddingBefore(), op.getPaddingAfter(), dw_conv2d_node->pad());
+
+ auto *output = _loco_graph->nodes()->create<loco::FeatureDecode>();
+ output->input(dw_conv2d_node);
+ output->decoder(createFeatureDecoder(op.getDataFormat()));
+
+ _mir2loco_map.emplace(op.getOutput(0), output);
+ }
+ else
+ {
+ // There are few things we can do here:
+ // 1) If group_size == 1, reshape the kernel [O, H, W, I / group] == [I * M, H, W, 1] ->
+ // [I, M, H, W] and use DepthwiseConv2D.
+ // 2) Split the operation into smaller Conv2Ds.
+ // 3) Replicate the filter along 'O' axis 'num_groups' times, zero out some elements, and use
+ // ordinary Conv2D.
+ throw std::runtime_error("Grouped Conv2D operation is not fully supported.");
+ }
+ }
+ else
+ {
+ auto *encoded_input = _loco_graph->nodes()->create<loco::FeatureEncode>();
+ encoded_input->input(input);
+ encoded_input->encoder(createFeatureEncoder(op.getDataFormat()));
+
+ auto *encoded_filter = _loco_graph->nodes()->create<loco::FilterEncode>();
+ encoded_filter->input(filter);
+ encoded_filter->encoder(createOHWIFilterEncoder());
+
+ auto *conv2d_node = _loco_graph->nodes()->create<loco::Conv2D>();
+ conv2d_node->ifm(encoded_input);
+ conv2d_node->ker(encoded_filter);
+ setupStride(op.getStrides(), conv2d_node->stride());
+ setupPad(op.getPaddingBefore(), op.getPaddingAfter(), conv2d_node->pad());
+
+ auto *output = _loco_graph->nodes()->create<loco::FeatureDecode>();
+ output->input(conv2d_node);
+ output->decoder(createFeatureDecoder(op.getDataFormat()));
+
+ _mir2loco_map.emplace(op.getOutput(0), output);
+ }
+}
+
+void Transformer::visit(mir::ops::DeConv2DOp &op)
+{
+ mir::Operation::Output *mir_input = op.getInput(0);
+ mir::Operation::Output *mir_filter = op.getInput(1);
+
+ loco::Node *input = _mir2loco_map.at(mir_input);
+ loco::Node *filter = _mir2loco_map.at(mir_filter);
+
+ auto *encoded_input = _loco_graph->nodes()->create<loco::FeatureEncode>();
+ encoded_input->input(input);
+ encoded_input->encoder(createFeatureEncoder(op.getDataFormat()));
+
+ auto *encoded_filter = _loco_graph->nodes()->create<loco::FilterEncode>();
+ encoded_filter->input(filter);
+ encoded_filter->encoder(createHWOIFilterEncoder());
+
+ auto *tr_conv2d_node = _loco_graph->nodes()->create<loco::TransposedConv2D>();
+ tr_conv2d_node->ifm(encoded_input);
+ tr_conv2d_node->ker(encoded_filter);
+ setupStride(op.getStrides(), tr_conv2d_node->stride());
+ if (op.getPaddingType() == mir::ops::PaddingType::Explicit)
+ setupPad(op.getPaddingBefore(), op.getPaddingAfter(), tr_conv2d_node->pad());
+ else
+ throw std::runtime_error("Not supported non explicit paddings on loco!");
+
+ auto *output = _loco_graph->nodes()->create<loco::FeatureDecode>();
+ output->input(tr_conv2d_node);
+ output->decoder(createFeatureDecoder(op.getDataFormat()));
+
+ _mir2loco_map.emplace(op.getOutput(0), output);
+}
+
+void Transformer::visit(mir::ops::DepthwiseConv2DOp &op)
+{
+ mir::Operation::Output *mir_input = op.getInput(0);
+ mir::Operation::Output *mir_filter = op.getInput(1);
+
+ loco::Node *input = _mir2loco_map.at(mir_input);
+ loco::Node *filter = _mir2loco_map.at(mir_filter);
+
+ auto *encoded_input = _loco_graph->nodes()->create<loco::FeatureEncode>();
+ encoded_input->input(input);
+ encoded_input->encoder(createFeatureEncoder(op.getDataFormat()));
+
+ auto *encoded_filter = _loco_graph->nodes()->create<loco::DepthwiseFilterEncode>();
+ encoded_filter->input(filter);
+ encoded_filter->encoder(createHWIMDepthwiseFilterEncoder());
+
+ auto *dw_conv2d_node = _loco_graph->nodes()->create<loco::DepthwiseConv2D>();
+ dw_conv2d_node->ifm(encoded_input);
+ dw_conv2d_node->ker(encoded_filter);
+ setupStride(op.getStrides(), dw_conv2d_node->stride());
+ setupPad(op.getPaddingBefore(), op.getPaddingAfter(), dw_conv2d_node->pad());
+
+ auto *output = _loco_graph->nodes()->create<loco::FeatureDecode>();
+ output->input(dw_conv2d_node);
+ output->decoder(createFeatureDecoder(op.getDataFormat()));
+
+ _mir2loco_map.emplace(op.getOutput(0), output);
+}
+
+void Transformer::visit(mir::ops::DivOp &op)
+{
+ // Get Input
+ loco::Node *lhs = _mir2loco_map.at(op.getInput(0));
+ loco::Node *rhs = _mir2loco_map.at(op.getInput(1));
+ auto result = createEltwiseBinary<loco::EltwiseDiv>(op, lhs, rhs);
+ // Not set Shape
+ // Add to map
+ _mir2loco_map.emplace(op.getOutput(0), result);
+}
+
+void Transformer::visit(mir::ops::FullyConnectedOp &op)
+{
+ mir::Operation::Output *mir_lhs = op.getInput(0);
+ mir::Operation::Output *mir_rhs = op.getInput(1);
+ // Check 2D shape
+ assert(op.getInput(0)->getShape().rank() == 2);
+ assert(op.getInput(1)->getShape().rank() == 2);
+
+ loco::Node *lhs = _mir2loco_map.at(mir_lhs);
+ loco::Node *rhs = _mir2loco_map.at(mir_rhs);
+
+ auto *encoded_lhs = _loco_graph->nodes()->create<loco::MatrixEncode>();
+ encoded_lhs->input(lhs);
+ encoded_lhs->encoder(createHWMatrixEncoder());
+
+ auto *encoded_rhs = _loco_graph->nodes()->create<loco::MatrixEncode>();
+ encoded_rhs->input(rhs);
+ encoded_rhs->encoder(createHWMatrixEncoder());
+
+ auto *mat_mul = _loco_graph->nodes()->create<loco::MatMul>();
+ mat_mul->lhs(encoded_lhs);
+ mat_mul->rhs(encoded_rhs);
+
+ auto *output = _loco_graph->nodes()->create<loco::MatrixDecode>();
+ output->input(mat_mul);
+ output->decoder(createHWMatrixDecoder());
+
+ _mir2loco_map.emplace(op.getOutput(0), output);
+}
+
+void Transformer::visit(mir::ops::InputOp &op)
+{
+ mir::Operation::Output *mir_output = op.getOutput(0);
+
+ loco::GraphInput *graph_input = _loco_graph->inputs()->create();
+ graph_input->name(mir_output->getName());
+ graph_input->dtype(convertDataType(mir_output->getElementType()));
+
+ auto *pull_node = _loco_graph->nodes()->create<loco::Pull>();
+ setupShape(mir_output->getShape(), pull_node);
+
+ loco::link(graph_input, pull_node);
+
+ _mir2loco_map.emplace(mir_output, pull_node);
+}
+
+void Transformer::visit(mir::ops::MaxPool2DOp &op)
+{
+ loco::Node *input = _mir2loco_map.at(op.getInput(0));
+
+ auto *encoded_input = _loco_graph->nodes()->create<loco::FeatureEncode>();
+ encoded_input->input(input);
+ encoded_input->encoder(createFeatureEncoder(op.getDataFormat()));
+
+ auto max_pool_node = _loco_graph->nodes()->create<loco::MaxPool2D>();
+ max_pool_node->ifm(encoded_input);
+ setupWindow(op.getWindowSize(), max_pool_node->window());
+ setupStride(op.getStrides(), max_pool_node->stride());
+ setupPad(op.getPaddingBefore(), op.getPaddingAfter(), max_pool_node->pad());
+
+ auto *output = _loco_graph->nodes()->create<loco::FeatureDecode>();
+ output->input(max_pool_node);
+ output->decoder(createFeatureDecoder(op.getDataFormat()));
+
+ _mir2loco_map.emplace(op.getOutput(0), output);
+}
+
+void Transformer::visit(mir::ops::MulOp &op)
+{
+ // Get Input
+ loco::Node *lhs = _mir2loco_map.at(op.getInput(0));
+ loco::Node *rhs = _mir2loco_map.at(op.getInput(1));
+ auto result = createEltwiseBinary<loco::EltwiseMul>(op, lhs, rhs);
+ // Not set Shape
+ // Add to map
+ _mir2loco_map.emplace(op.getOutput(0), result);
+}
+
+void Transformer::visit(mir::ops::OutputOp &op)
+{
+ mir::Operation::Output *mir_input = op.getInput(0);
+ loco::Node *input = _mir2loco_map.at(mir_input);
+
+ loco::GraphOutput *graph_output = _loco_graph->outputs()->create();
+ graph_output->name(mir_input->getName());
+ graph_output->dtype(convertDataType(mir_input->getElementType()));
+ graph_output->shape(make_tensor_shape(mir_input->getShape()));
+
+ auto *push_node = _loco_graph->nodes()->create<loco::Push>();
+ push_node->from(input);
+
+ loco::link(graph_output, push_node);
+}
+
+void Transformer::visit(mir::ops::ReluOp &op)
+{
+ loco::Node *input = _mir2loco_map.at(op.getInput(0));
+
+ auto relu_node = _loco_graph->nodes()->create<loco::ReLU>();
+ relu_node->input(input);
+ // Not set shape
+ // Add to map
+ _mir2loco_map.emplace(op.getOutput(0), relu_node);
+}
+
+void Transformer::visit(mir::ops::ReshapeOp &op)
+{
+ loco::Node *input = _mir2loco_map.at(op.getInput(0));
+
+ auto reshape_node = _loco_graph->nodes()->create<loco::Reshape<loco::ReshapeType::Fixed>>();
+ reshape_node->input(input);
+ // Set Shape
+ auto &out_shape = op.getOutputShape(0);
+ setupShape(out_shape, reshape_node);
+ // Add to map
+ _mir2loco_map.emplace(op.getOutput(0), reshape_node);
+}
+
+void Transformer::visit(mir::ops::SoftmaxOp &op)
+{
+ loco::Node *input = _mir2loco_map.at(op.getInput(0));
+
+ auto softmax_node = _loco_graph->nodes()->create<loco::TensorSoftmax>();
+ softmax_node->input(input);
+ // Set Axis
+ softmax_node->axis(op.getAxis());
+ // Add to map
+ _mir2loco_map.emplace(op.getOutput(0), softmax_node);
+}
+
+void Transformer::visit(mir::ops::SubOp &op)
+{
+ // Get Input
+ loco::Node *lhs = _mir2loco_map.at(op.getInput(0));
+ loco::Node *rhs = _mir2loco_map.at(op.getInput(1));
+ auto result = createEltwiseBinary<loco::EltwiseSub>(op, lhs, rhs);
+ // Not set Shape
+ // Add to map
+ _mir2loco_map.emplace(op.getOutput(0), result);
+}
+
+void Transformer::visit(mir::ops::TransposeOp &op)
+{
+ loco::Node *input = _mir2loco_map.at(op.getInput(0));
+ const auto &axis_order = op.getAxisOrder();
+
+ auto transpose_node = _loco_graph->nodes()->create<loco::TensorTranspose>();
+ transpose_node->input(input);
+ // Set axis order
+ transpose_node->perm()->size(axis_order.size());
+ for (size_t i = 0; i < axis_order.size(); i++)
+ transpose_node->perm()->axis(i) = axis_order[i];
+ // Not set shape
+ // Add to map
+ _mir2loco_map.emplace(op.getOutput(0), transpose_node);
+}
+
+void Transformer::visit_fallback(mir::Operation &op) { throw std::runtime_error("NYI operation"); }
+
+std::unique_ptr<loco::Graph> Transformer::transform(mir::Graph *mir_graph)
+{
+ _mir2loco_map.clear();
+ _loco_graph.reset();
+ _loco_graph = loco::make_graph();
+
+ // Transform Nodes
+ mir_graph->accept(this);
+
+ // validate graph
+ assert(loco::valid(_loco_graph.get()));
+
+ return std::move(_loco_graph);
+}
+
+} // namespace mir2loco
diff --git a/compiler/mir2loco/src/mir2loco.test.cpp b/compiler/mir2loco/src/mir2loco.test.cpp
new file mode 100644
index 000000000..3870caeb5
--- /dev/null
+++ b/compiler/mir2loco/src/mir2loco.test.cpp
@@ -0,0 +1,736 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir2loco.h"
+
+#include "mir/ops/AddOp.h"
+#include "mir/ops/AvgPool2DOp.h"
+#include "mir/ops/ConcatOp.h"
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/Conv2DOp.h"
+#include "mir/ops/Deconv2DOp.h"
+#include "mir/ops/DepthwiseConv2DOp.h"
+#include "mir/ops/FullyConnectedOp.h"
+#include "mir/ops/MaxPool2DOp.h"
+#include "mir/ops/MulOp.h"
+#include "mir/ops/ReluOp.h"
+#include "mir/ops/ReshapeOp.h"
+#include "mir/ops/SoftmaxOp.h"
+#include "mir/ops/TransposeOp.h"
+
+#include <gtest/gtest.h>
+
+class TestTransformer_mir2loco : public ::testing::Test
+{
+};
+
+TEST_F(TestTransformer_mir2loco, Input_Output_Test)
+{
+ mir::Graph mir_graph;
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, {5, 6, 7, 8}};
+ auto *input = mir_graph.create<mir::ops::InputOp>(input_type)->getOutput(0);
+ mir_graph.create<mir::ops::OutputOp>(input);
+ input->setName("x");
+
+ mir2loco::Transformer transformer;
+ auto loco_graph = transformer.transform(&mir_graph);
+
+ loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
+ loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(1));
+
+ ASSERT_NE(pull_node, nullptr);
+ ASSERT_NE(push_node, nullptr);
+ ASSERT_EQ(push_node->from(), pull_node);
+ // Shape check
+ ASSERT_EQ(pull_node->rank(), 4);
+ ASSERT_EQ(pull_node->dim(0), 5);
+ ASSERT_EQ(pull_node->dim(1), 6);
+ ASSERT_EQ(pull_node->dim(2), 7);
+ ASSERT_EQ(pull_node->dim(3), 8);
+
+ ASSERT_TRUE(push_node->indexed());
+ ASSERT_EQ(push_node->index(), 0);
+
+ // Check Graph-level properties
+ ASSERT_EQ(loco_graph->outputs()->size(), 1);
+ ASSERT_NE(loco_graph->outputs()->at(0)->shape(), nullptr);
+ ASSERT_EQ(loco_graph->outputs()->at(0)->shape()->rank(), 4);
+ ASSERT_EQ(loco_graph->outputs()->at(0)->shape()->dim(0), 5);
+ ASSERT_EQ(loco_graph->outputs()->at(0)->shape()->dim(1), 6);
+ ASSERT_EQ(loco_graph->outputs()->at(0)->shape()->dim(2), 7);
+ ASSERT_EQ(loco_graph->outputs()->at(0)->shape()->dim(3), 8);
+}
+
+TEST_F(TestTransformer_mir2loco, Relu_Test)
+{
+ mir::Graph mir_graph;
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, {7, 7, 9, 9}};
+ auto *input = mir_graph.create<mir::ops::InputOp>(input_type)->getOutput(0);
+ auto *relu = mir_graph.create<mir::ops::ReluOp>(input)->getOutput(0);
+ mir_graph.create<mir::ops::OutputOp>(relu);
+ input->setName("x");
+ relu->setName("y");
+
+ mir2loco::Transformer transformer;
+ auto loco_graph = transformer.transform(&mir_graph);
+
+ loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
+ loco::ReLU *relu_node = dynamic_cast<loco::ReLU *>(loco_graph->nodes()->at(1));
+ loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(2));
+
+ ASSERT_NE(pull_node, nullptr);
+ ASSERT_NE(relu_node, nullptr);
+ ASSERT_NE(push_node, nullptr);
+ ASSERT_EQ(relu_node->input(), pull_node);
+ ASSERT_EQ(push_node->from(), relu_node);
+ // Shape check
+ ASSERT_EQ(pull_node->rank(), 4);
+ ASSERT_EQ(pull_node->dim(0), 7);
+ ASSERT_EQ(pull_node->dim(1), 7);
+ ASSERT_EQ(pull_node->dim(2), 9);
+ ASSERT_EQ(pull_node->dim(3), 9);
+
+ ASSERT_TRUE(push_node->indexed());
+ ASSERT_EQ(push_node->index(), 0);
+
+ // Check Graph-level properties
+ ASSERT_EQ(loco_graph->outputs()->size(), 1);
+ ASSERT_NE(loco_graph->outputs()->at(0)->shape(), nullptr);
+ ASSERT_EQ(loco_graph->outputs()->at(0)->shape()->rank(), 4);
+ ASSERT_EQ(loco_graph->outputs()->at(0)->shape()->dim(0), 7);
+ ASSERT_EQ(loco_graph->outputs()->at(0)->shape()->dim(1), 7);
+ ASSERT_EQ(loco_graph->outputs()->at(0)->shape()->dim(2), 9);
+ ASSERT_EQ(loco_graph->outputs()->at(0)->shape()->dim(3), 9);
+}
+
+TEST_F(TestTransformer_mir2loco, Avg_Pool_Test)
+{
+ mir::Graph mir_graph;
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, {7, 7, 9, 9}};
+ auto *input = mir_graph.create<mir::ops::InputOp>(input_type)->getOutput(0);
+
+ mir::AvgPool2DOpAttributes attributes;
+ attributes.window = {2, 3};
+ attributes.strides = {4, 5};
+ attributes.padding_before = {5, 9};
+ attributes.padding_after = {7, 4};
+ auto *pool = mir_graph.create<mir::ops::AvgPool2DOp>(input, attributes)->getOutput(0);
+ mir_graph.create<mir::ops::OutputOp>(pool);
+ input->setName("x");
+ pool->setName("y");
+
+ mir2loco::Transformer transformer;
+ auto loco_graph = transformer.transform(&mir_graph);
+
+ loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
+ loco::FeatureEncode *encode_node =
+ dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(1));
+ loco::AvgPool2D *pool_node = dynamic_cast<loco::AvgPool2D *>(loco_graph->nodes()->at(2));
+ loco::FeatureDecode *decode_node =
+ dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(3));
+ loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(4));
+
+ ASSERT_NE(pull_node, nullptr);
+ ASSERT_NE(encode_node, nullptr);
+ ASSERT_NE(pool_node, nullptr);
+ ASSERT_NE(decode_node, nullptr);
+ ASSERT_NE(push_node, nullptr);
+ ASSERT_EQ(encode_node->input(), pull_node);
+ ASSERT_EQ(pool_node->ifm(), encode_node);
+ ASSERT_EQ(decode_node->input(), pool_node);
+ ASSERT_EQ(push_node->from(), decode_node);
+ // Check params
+ ASSERT_EQ(pool_node->convention(), loco::AvgPool2D::Convention::Full);
+ ASSERT_EQ(pool_node->pad()->top(), 5);
+ ASSERT_EQ(pool_node->pad()->left(), 9);
+ ASSERT_EQ(pool_node->pad()->bottom(), 7);
+ ASSERT_EQ(pool_node->pad()->right(), 4);
+ ASSERT_EQ(pool_node->window()->vertical(), 2);
+ ASSERT_EQ(pool_node->window()->horizontal(), 3);
+ ASSERT_EQ(pool_node->stride()->vertical(), 4);
+ ASSERT_EQ(pool_node->stride()->horizontal(), 5);
+}
+
+TEST_F(TestTransformer_mir2loco, Max_Pool_Test)
+{
+ mir::Graph mir_graph;
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, {7, 7, 9, 9}};
+ auto *input = mir_graph.create<mir::ops::InputOp>(input_type)->getOutput(0);
+ mir::MaxPool2DOpAttributes attributes;
+ attributes.window = {2, 3};
+ attributes.strides = {4, 5};
+ attributes.padding_before = {5, 9};
+ attributes.padding_after = {7, 4};
+ auto *pool = mir_graph.create<mir::ops::MaxPool2DOp>(input, attributes)->getOutput(0);
+ mir_graph.create<mir::ops::OutputOp>(pool);
+ input->setName("x");
+ pool->setName("y");
+
+ mir2loco::Transformer transformer;
+ auto loco_graph = transformer.transform(&mir_graph);
+
+ loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
+ loco::FeatureEncode *encode_node =
+ dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(1));
+ loco::MaxPool2D *pool_node = dynamic_cast<loco::MaxPool2D *>(loco_graph->nodes()->at(2));
+ loco::FeatureDecode *decode_node =
+ dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(3));
+ loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(4));
+
+ ASSERT_NE(pull_node, nullptr);
+ ASSERT_NE(encode_node, nullptr);
+ ASSERT_NE(pool_node, nullptr);
+ ASSERT_NE(decode_node, nullptr);
+ ASSERT_NE(push_node, nullptr);
+ ASSERT_EQ(encode_node->input(), pull_node);
+ ASSERT_EQ(pool_node->ifm(), encode_node);
+ ASSERT_EQ(decode_node->input(), pool_node);
+ ASSERT_EQ(push_node->from(), decode_node);
+ // Check params
+ ASSERT_EQ(pool_node->pad()->top(), 5);
+ ASSERT_EQ(pool_node->pad()->left(), 9);
+ ASSERT_EQ(pool_node->pad()->bottom(), 7);
+ ASSERT_EQ(pool_node->pad()->right(), 4);
+ ASSERT_EQ(pool_node->window()->vertical(), 2);
+ ASSERT_EQ(pool_node->window()->horizontal(), 3);
+ ASSERT_EQ(pool_node->stride()->vertical(), 4);
+ ASSERT_EQ(pool_node->stride()->horizontal(), 5);
+}
+
+TEST_F(TestTransformer_mir2loco, Concat_Test)
+{
+ mir::Graph mir_graph;
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, {5, 6, 7, 3}};
+ auto *input1 = mir_graph.create<mir::ops::InputOp>(input_type)->getOutput(0);
+ auto *input2 = mir_graph.create<mir::ops::InputOp>(input_type)->getOutput(0);
+ auto *input3 = mir_graph.create<mir::ops::InputOp>(input_type)->getOutput(0);
+ std::vector<mir::Operation::Output *> inputs{input1, input2, input3};
+ auto *concat = mir_graph.create<mir::ops::ConcatOp>(inputs, 2)->getOutput(0);
+ mir_graph.create<mir::ops::OutputOp>(concat);
+ input1->setName("x1");
+ input2->setName("x2");
+ input3->setName("x3");
+ concat->setName("y");
+
+ mir2loco::Transformer transformer;
+ auto loco_graph = transformer.transform(&mir_graph);
+
+ loco::Pull *pull1_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
+ loco::Pull *pull2_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(1));
+ loco::Pull *pull3_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(2));
+ loco::TensorConcat *concat1_node = dynamic_cast<loco::TensorConcat *>(loco_graph->nodes()->at(3));
+ loco::TensorConcat *concat2_node = dynamic_cast<loco::TensorConcat *>(loco_graph->nodes()->at(4));
+ loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(5));
+
+ ASSERT_NE(pull1_node, nullptr);
+ ASSERT_NE(pull2_node, nullptr);
+ ASSERT_NE(pull3_node, nullptr);
+ ASSERT_NE(concat1_node, nullptr);
+ ASSERT_NE(concat2_node, nullptr);
+ ASSERT_NE(push_node, nullptr);
+
+ ASSERT_NE(dynamic_cast<loco::Pull *>(concat1_node->lhs()), nullptr);
+ ASSERT_NE(dynamic_cast<loco::Pull *>(concat1_node->rhs()), nullptr);
+ ASSERT_EQ(concat2_node->lhs(), concat1_node);
+ ASSERT_NE(dynamic_cast<loco::Pull *>(concat2_node->rhs()), nullptr);
+ ASSERT_EQ(push_node->from(), concat2_node);
+ // Check axis
+ ASSERT_EQ(concat1_node->axis(), 2);
+ ASSERT_EQ(concat2_node->axis(), 2);
+}
+
+TEST_F(TestTransformer_mir2loco, Reshape_Test)
+{
+ mir::Graph mir_graph;
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, {7, 8, 9, 9}};
+ auto *input = mir_graph.create<mir::ops::InputOp>(input_type)->getOutput(0);
+ auto *reshape = mir_graph.create<mir::ops::ReshapeOp>(input, mir::Shape{7, 8, 81})->getOutput(0);
+ mir_graph.create<mir::ops::OutputOp>(reshape);
+ input->setName("x");
+ reshape->setName("y");
+
+ mir2loco::Transformer transformer;
+ auto loco_graph = transformer.transform(&mir_graph);
+
+ loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
+ loco::Reshape<loco::ReshapeType::Fixed> *reshape_node =
+ dynamic_cast<loco::Reshape<loco::ReshapeType::Fixed> *>(loco_graph->nodes()->at(1));
+ loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(2));
+
+ ASSERT_NE(pull_node, nullptr);
+ ASSERT_NE(reshape_node, nullptr);
+ ASSERT_NE(push_node, nullptr);
+ ASSERT_EQ(reshape_node->input(), pull_node);
+ ASSERT_EQ(push_node->from(), reshape_node);
+ // Check params
+ ASSERT_EQ(reshape_node->rank(), 3);
+ ASSERT_EQ(reshape_node->dim(0), 7);
+ ASSERT_EQ(reshape_node->dim(1), 8);
+ ASSERT_EQ(reshape_node->dim(2), 81);
+}
+
+TEST_F(TestTransformer_mir2loco, Const_Float_Test)
+{
+ mir::Graph mir_graph;
+
+ mir::TensorType type{mir::DataType::FLOAT32, {2, 3}};
+ const float data[] = {5.9, 6.7, 5.32, 54.11231, 43.2444, 3.409};
+ mir::TensorVariant mir_tensor{type, data};
+ auto *constant = mir_graph.create<mir::ops::ConstantOp>(mir_tensor)->getOutput(0);
+ mir_graph.create<mir::ops::OutputOp>(constant);
+ constant->setName("x");
+
+ mir2loco::Transformer transformer;
+ auto loco_graph = transformer.transform(&mir_graph);
+
+ loco::ConstGen *const_node = dynamic_cast<loco::ConstGen *>(loco_graph->nodes()->at(0));
+ loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(1));
+
+ ASSERT_NE(const_node, nullptr);
+ ASSERT_NE(push_node, nullptr);
+ ASSERT_EQ(push_node->from(), const_node);
+ // Shape check
+ ASSERT_EQ(const_node->rank(), 2);
+ ASSERT_EQ(const_node->dim(0), 2);
+ ASSERT_EQ(const_node->dim(1), 3);
+
+ for (int i = 0; i < 6; i++)
+ ASSERT_FLOAT_EQ(const_node->at<loco::DataType::FLOAT32>(i), data[i]);
+}
+
+TEST_F(TestTransformer_mir2loco, Add_Test)
+{
+ mir::Graph mir_graph;
+
+ mir::TensorType input1_type{mir::DataType::FLOAT32, {5, 6, 7, 3}};
+ mir::TensorType input2_type{mir::DataType::FLOAT32, {5, 1, 7, 3}};
+ auto *input1 = mir_graph.create<mir::ops::InputOp>(input1_type)->getOutput(0);
+ auto *input2 = mir_graph.create<mir::ops::InputOp>(input2_type)->getOutput(0);
+ auto *add = mir_graph.create<mir::ops::AddOp>(input1, input2)->getOutput(0);
+ mir_graph.create<mir::ops::OutputOp>(add);
+ input1->setName("x1");
+ input2->setName("x2");
+ add->setName("y");
+
+ mir2loco::Transformer transformer;
+ auto loco_graph = transformer.transform(&mir_graph);
+
+ // Pull
+ auto inputs = loco_graph->inputs();
+ ASSERT_EQ(inputs->size(), 2);
+ loco::Pull *pull_node0 = loco::pull_node(loco_graph.get(), 0);
+ ASSERT_NE(pull_node0, nullptr);
+ loco::Pull *pull_node1 = loco::pull_node(loco_graph.get(), 1);
+ ASSERT_NE(pull_node1, nullptr);
+ // Add
+ auto pull_uses = loco::succs(pull_node0);
+ ASSERT_EQ(pull_uses.size(), 1);
+ loco::EltwiseAdd *add_node = dynamic_cast<loco::EltwiseAdd *>(*pull_uses.begin());
+ ASSERT_NE(add_node, nullptr);
+ ASSERT_EQ(add_node->lhs(), pull_node0);
+ // TensorBroadcast
+ loco::TensorBroadcast *broadcast_node = dynamic_cast<loco::TensorBroadcast *>(add_node->rhs());
+ ASSERT_NE(broadcast_node, nullptr);
+ ASSERT_EQ(broadcast_node->input(), pull_node1);
+ // Check params
+ ASSERT_TRUE(broadcast_node->mapping()->defined(1));
+ ASSERT_EQ(broadcast_node->mapping()->dim(1), 6);
+}
+
+TEST_F(TestTransformer_mir2loco, Conv2D_Test)
+{
+ mir::Graph mir_graph;
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, {7, 7, 9, 1}};
+ auto *input = mir_graph.create<mir::ops::InputOp>(input_type)->getOutput(0);
+
+ mir::TensorType filter_type{mir::DataType::FLOAT32, {2, 3, 1, 1}};
+ const float data[] = {5.9, 6.7, 5.32, 54.11231, 43.2444, 3.409};
+ mir::TensorVariant filter_tensor{filter_type, data};
+ auto *filter = mir_graph.create<mir::ops::ConstantOp>(filter_tensor)->getOutput(0);
+
+ mir::Conv2DOpAttributes attributes;
+ attributes.strides = {2, 3};
+ attributes.padding_before = {5, 9};
+ attributes.padding_after = {7, 4};
+
+ auto *conv = mir_graph.create<mir::ops::Conv2DOp>(input, filter, attributes)->getOutput(0);
+
+ mir_graph.create<mir::ops::OutputOp>(conv);
+ input->setName("x");
+ conv->setName("y");
+
+ mir2loco::Transformer transformer;
+ auto loco_graph = transformer.transform(&mir_graph);
+
+ loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
+ loco::ConstGen *const_node = dynamic_cast<loco::ConstGen *>(loco_graph->nodes()->at(1));
+ loco::FeatureEncode *encode_node =
+ dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(2));
+ loco::FilterEncode *filter_node = dynamic_cast<loco::FilterEncode *>(loco_graph->nodes()->at(3));
+ loco::Conv2D *conv_node = dynamic_cast<loco::Conv2D *>(loco_graph->nodes()->at(4));
+ loco::FeatureDecode *decode_node =
+ dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(5));
+ loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(6));
+
+ ASSERT_NE(pull_node, nullptr);
+ ASSERT_NE(const_node, nullptr);
+ ASSERT_NE(filter_node, nullptr);
+ ASSERT_NE(encode_node, nullptr);
+ ASSERT_NE(conv_node, nullptr);
+ ASSERT_NE(decode_node, nullptr);
+ ASSERT_NE(push_node, nullptr);
+ ASSERT_EQ(encode_node->input(), pull_node);
+ ASSERT_EQ(filter_node->input(), const_node);
+ ASSERT_EQ(conv_node->ifm(), encode_node);
+ ASSERT_EQ(conv_node->ker(), filter_node);
+ ASSERT_EQ(decode_node->input(), conv_node);
+ ASSERT_EQ(push_node->from(), decode_node);
+ // Check params
+ ASSERT_EQ(conv_node->pad()->top(), 5);
+ ASSERT_EQ(conv_node->pad()->left(), 9);
+ ASSERT_EQ(conv_node->pad()->bottom(), 7);
+ ASSERT_EQ(conv_node->pad()->right(), 4);
+ ASSERT_EQ(conv_node->stride()->vertical(), 2);
+ ASSERT_EQ(conv_node->stride()->horizontal(), 3);
+}
+
+TEST_F(TestTransformer_mir2loco, Softmax_Test)
+{
+ mir::Graph mir_graph;
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, {7, 7, 1, 9}};
+ auto *input = mir_graph.create<mir::ops::InputOp>(input_type)->getOutput(0);
+ auto *softmax = mir_graph.create<mir::ops::SoftmaxOp>(input, 2)->getOutput(0);
+ mir_graph.create<mir::ops::OutputOp>(softmax);
+ input->setName("x");
+ softmax->setName("y");
+
+ mir2loco::Transformer transformer;
+ auto loco_graph = transformer.transform(&mir_graph);
+
+ loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
+ loco::TensorSoftmax *softmax_node =
+ dynamic_cast<loco::TensorSoftmax *>(loco_graph->nodes()->at(1));
+ loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(2));
+
+ ASSERT_NE(pull_node, nullptr);
+ ASSERT_NE(softmax_node, nullptr);
+ ASSERT_NE(push_node, nullptr);
+ ASSERT_EQ(softmax_node->input(), pull_node);
+ ASSERT_EQ(push_node->from(), softmax_node);
+ // Check axis
+ ASSERT_EQ(softmax_node->axis(), 2);
+}
+
+TEST_F(TestTransformer_mir2loco, Mul_Test)
+{
+ mir::Graph mir_graph;
+
+ mir::TensorType input1_type{mir::DataType::FLOAT32, {5, 6, 7, 13}};
+ mir::TensorType input2_type{mir::DataType::FLOAT32, {13}};
+ auto *input1 = mir_graph.create<mir::ops::InputOp>(input1_type)->getOutput(0);
+ auto *input2 = mir_graph.create<mir::ops::InputOp>(input2_type)->getOutput(0);
+ auto *add = mir_graph.create<mir::ops::MulOp>(input1, input2)->getOutput(0);
+ mir_graph.create<mir::ops::OutputOp>(add);
+ input1->setName("x1");
+ input2->setName("x2");
+ add->setName("y");
+
+ mir2loco::Transformer transformer;
+ auto loco_graph = transformer.transform(&mir_graph);
+
+ // Pulls
+ auto inputs = loco_graph->inputs();
+ ASSERT_EQ(inputs->size(), 2);
+ loco::Pull *pull_node0 = loco::pull_node(loco_graph.get(), 0);
+ ASSERT_NE(pull_node0, nullptr);
+ loco::Pull *pull_node1 = loco::pull_node(loco_graph.get(), 1);
+ ASSERT_NE(pull_node1, nullptr);
+ // Mul
+ auto pull0_uses = loco::succs(pull_node0);
+ ASSERT_EQ(pull0_uses.size(), 1);
+ loco::EltwiseMul *mul_node = dynamic_cast<loco::EltwiseMul *>(*pull0_uses.begin());
+ ASSERT_NE(mul_node, nullptr);
+ // Broadcast
+ loco::TensorBroadcast *broadcast_node = dynamic_cast<loco::TensorBroadcast *>(mul_node->rhs());
+ ASSERT_NE(broadcast_node, nullptr);
+ ASSERT_EQ(mul_node->lhs(), pull_node0);
+ ASSERT_EQ(mul_node->rhs(), broadcast_node);
+ loco::FixedReshape *reshape_node = dynamic_cast<loco::FixedReshape *>(broadcast_node->input());
+ ASSERT_NE(reshape_node, nullptr);
+ ASSERT_EQ(reshape_node->input(), pull_node1);
+ ASSERT_EQ(reshape_node->rank(), 4);
+ ASSERT_EQ(reshape_node->dim(0), 1);
+ ASSERT_EQ(reshape_node->dim(1), 1);
+ ASSERT_EQ(reshape_node->dim(2), 1);
+ ASSERT_EQ(reshape_node->dim(3), 13);
+ // Params checks
+ ASSERT_EQ(pull_node0->rank(), 4);
+ ASSERT_EQ(pull_node0->dim(0), 5);
+ ASSERT_EQ(pull_node0->dim(1), 6);
+ ASSERT_EQ(pull_node0->dim(2), 7);
+ ASSERT_EQ(pull_node0->dim(3), 13);
+
+ ASSERT_EQ(pull_node1->rank(), 1);
+ ASSERT_EQ(pull_node1->dim(0), 13);
+
+ ASSERT_TRUE(broadcast_node->mapping()->defined(0));
+ ASSERT_EQ(broadcast_node->mapping()->dim(0), 5);
+ ASSERT_TRUE(broadcast_node->mapping()->defined(1));
+ ASSERT_EQ(broadcast_node->mapping()->dim(1), 6);
+ ASSERT_TRUE(broadcast_node->mapping()->defined(2));
+ ASSERT_EQ(broadcast_node->mapping()->dim(2), 7);
+}
+
+TEST_F(TestTransformer_mir2loco, DepthwiseConv2D_Test)
+{
+ mir::Graph mir_graph;
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, {7, 7, 9, 1}};
+ auto *input = mir_graph.create<mir::ops::InputOp>(input_type)->getOutput(0);
+
+ mir::TensorType filter_type{mir::DataType::FLOAT32, {2, 3, 1, 1}};
+ const float data[] = {5.9, 6.7, 5.32, 54.11231, 43.2444, 3.409};
+ mir::TensorVariant filter_tensor{filter_type, data};
+ auto *filter = mir_graph.create<mir::ops::ConstantOp>(filter_tensor)->getOutput(0);
+
+ mir::Conv2DOpAttributes attributes;
+ attributes.strides = {2, 3};
+ attributes.padding_before = {5, 9};
+ attributes.padding_after = {7, 4};
+
+ auto *conv =
+ mir_graph.create<mir::ops::DepthwiseConv2DOp>(input, filter, attributes)->getOutput(0);
+
+ mir_graph.create<mir::ops::OutputOp>(conv);
+ input->setName("x");
+ conv->setName("y");
+
+ mir2loco::Transformer transformer;
+ auto loco_graph = transformer.transform(&mir_graph);
+
+ // Pull
+ auto inputs = loco_graph->inputs();
+ loco::Pull *pull_node = loco::pull_node(loco_graph.get(), 0);
+ ASSERT_NE(pull_node, nullptr);
+ // FeatureEncode
+ auto pull_uses = loco::succs(pull_node);
+ ASSERT_EQ(pull_uses.size(), 1);
+ loco::FeatureEncode *encode_node = dynamic_cast<loco::FeatureEncode *>(*pull_uses.begin());
+ ASSERT_NE(encode_node, nullptr);
+ ASSERT_EQ(encode_node->input(), pull_node);
+ // DepthwiseConv2D
+ auto encode_uses = loco::succs(encode_node);
+ ASSERT_EQ(encode_uses.size(), 1);
+ loco::DepthwiseConv2D *dw_conv_node = dynamic_cast<loco::DepthwiseConv2D *>(*encode_uses.begin());
+ ASSERT_NE(dw_conv_node, nullptr);
+ loco::DepthwiseFilterEncode *filter_node =
+ dynamic_cast<loco::DepthwiseFilterEncode *>(dw_conv_node->ker());
+ ASSERT_NE(filter_node, nullptr);
+ ASSERT_EQ(dw_conv_node->ifm(), encode_node);
+ // Check params
+ ASSERT_EQ(dw_conv_node->pad()->top(), 5);
+ ASSERT_EQ(dw_conv_node->pad()->left(), 9);
+ ASSERT_EQ(dw_conv_node->pad()->bottom(), 7);
+ ASSERT_EQ(dw_conv_node->pad()->right(), 4);
+ ASSERT_EQ(dw_conv_node->stride()->vertical(), 2);
+ ASSERT_EQ(dw_conv_node->stride()->horizontal(), 3);
+ // ConstGen
+ loco::ConstGen *const_node = dynamic_cast<loco::ConstGen *>(filter_node->input());
+ ASSERT_NE(const_node, nullptr);
+ // FeatureDecode
+ auto dw_conv_uses = loco::succs(dw_conv_node);
+ ASSERT_EQ(dw_conv_uses.size(), 1);
+ loco::FeatureDecode *decode_node = dynamic_cast<loco::FeatureDecode *>(*dw_conv_uses.begin());
+ ASSERT_NE(decode_node, nullptr);
+ ASSERT_EQ(decode_node->input(), dw_conv_node);
+ // Push
+ auto decode_uses = loco::succs(decode_node);
+ ASSERT_EQ(decode_uses.size(), 1);
+ loco::Push *push_node = dynamic_cast<loco::Push *>(*decode_uses.begin());
+ ASSERT_NE(push_node, nullptr);
+ ASSERT_EQ(push_node->from(), decode_node);
+}
+
+TEST_F(TestTransformer_mir2loco, DeConv2D_Test)
+{
+ mir::Graph mir_graph;
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, {7, 7, 9, 1}};
+ auto *input = mir_graph.create<mir::ops::InputOp>(input_type)->getOutput(0);
+
+ mir::TensorType filter_type{mir::DataType::FLOAT32, {2, 3, 1, 1}};
+ const float data[] = {5.9, 6.7, 5.32, 54.11231, 43.2444, 3.409};
+ mir::TensorVariant filter_tensor{filter_type, data};
+ auto *filter = mir_graph.create<mir::ops::ConstantOp>(filter_tensor)->getOutput(0);
+
+ mir::Deconv2DOpAttributes attributes;
+ attributes.strides = {1, 2};
+ attributes.padding_before = {3, 4};
+ attributes.padding_after = {5, 6};
+
+ auto *conv = mir_graph.create<mir::ops::DeConv2DOp>(input, filter, attributes)->getOutput(0);
+
+ mir_graph.create<mir::ops::OutputOp>(conv);
+ input->setName("x");
+ conv->setName("y");
+
+ mir2loco::Transformer transformer;
+ auto loco_graph = transformer.transform(&mir_graph);
+
+ // Pull
+ loco::Pull *pull_node = loco::pull_node(loco_graph.get(), 0);
+ ASSERT_NE(pull_node, nullptr);
+ // FeatureEncode
+ auto pull_uses = loco::succs(pull_node);
+ ASSERT_EQ(pull_uses.size(), 1);
+ loco::FeatureEncode *encode_node = dynamic_cast<loco::FeatureEncode *>(*pull_uses.begin());
+ ASSERT_NE(encode_node, nullptr);
+ ASSERT_EQ(encode_node->input(), pull_node);
+ // TransposedConv2D
+ auto encode_uses = loco::succs(encode_node);
+ ASSERT_EQ(encode_uses.size(), 1);
+ loco::TransposedConv2D *tr_conv_node =
+ dynamic_cast<loco::TransposedConv2D *>(*encode_uses.begin());
+ ASSERT_NE(tr_conv_node, nullptr);
+ loco::FilterEncode *filter_node = dynamic_cast<loco::FilterEncode *>(tr_conv_node->ker());
+ ASSERT_NE(filter_node, nullptr);
+ ASSERT_EQ(tr_conv_node->ifm(), encode_node);
+ // Check params
+ ASSERT_EQ(tr_conv_node->pad()->top(), 3);
+ ASSERT_EQ(tr_conv_node->pad()->left(), 4);
+ ASSERT_EQ(tr_conv_node->pad()->bottom(), 5);
+ ASSERT_EQ(tr_conv_node->pad()->right(), 6);
+ ASSERT_EQ(tr_conv_node->stride()->vertical(), 1);
+ ASSERT_EQ(tr_conv_node->stride()->horizontal(), 2);
+ // ConstGen
+ loco::ConstGen *const_node = dynamic_cast<loco::ConstGen *>(filter_node->input());
+ ASSERT_NE(const_node, nullptr);
+ // FeatureDecode
+ auto tr_conv_uses = loco::succs(tr_conv_node);
+ ASSERT_EQ(tr_conv_uses.size(), 1);
+ loco::FeatureDecode *decode_node = dynamic_cast<loco::FeatureDecode *>(*tr_conv_uses.begin());
+ ASSERT_NE(decode_node, nullptr);
+ ASSERT_EQ(decode_node->input(), tr_conv_node);
+ // Push
+ auto decode_uses = loco::succs(decode_node);
+ ASSERT_EQ(decode_uses.size(), 1);
+ loco::Push *push_node = dynamic_cast<loco::Push *>(*decode_uses.begin());
+ ASSERT_NE(push_node, nullptr);
+ ASSERT_EQ(push_node->from(), decode_node);
+}
+
+TEST_F(TestTransformer_mir2loco, FullyConnected_Test)
+{
+ mir::Graph mir_graph;
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, {10, 2}};
+ auto *input = mir_graph.create<mir::ops::InputOp>(input_type)->getOutput(0);
+
+ mir::TensorType weights_type{mir::DataType::FLOAT32, mir::Shape{2, 2}};
+ const float data[] = {5.9, 5.32, 54.11231, 3.409};
+ mir::TensorVariant weights_tensor{weights_type, data};
+ auto *weights = mir_graph.create<mir::ops::ConstantOp>(weights_tensor)->getOutput(0);
+
+ auto *fc = mir_graph.create<mir::ops::FullyConnectedOp>(input, weights)->getOutput(0);
+
+ mir_graph.create<mir::ops::OutputOp>(fc);
+ input->setName("x");
+ fc->setName("y");
+
+ mir2loco::Transformer transformer;
+ auto loco_graph = transformer.transform(&mir_graph);
+
+ // Pull
+ auto inputs = loco_graph->inputs();
+ loco::Pull *pull_node = loco::pull_node(loco_graph.get(), 0);
+ ASSERT_NE(pull_node, nullptr);
+ // MatrixEncode
+ auto pull_uses = loco::succs(pull_node);
+ ASSERT_EQ(pull_uses.size(), 1);
+ loco::MatrixEncode *encode_node = dynamic_cast<loco::MatrixEncode *>(*pull_uses.begin());
+ ASSERT_NE(encode_node, nullptr);
+ ASSERT_EQ(encode_node->input(), pull_node);
+ // MatMul
+ auto encode_uses = loco::succs(encode_node);
+ ASSERT_EQ(encode_uses.size(), 1);
+ loco::MatMul *fc_node = dynamic_cast<loco::MatMul *>(*encode_uses.begin());
+ ASSERT_NE(fc_node, nullptr);
+ loco::MatrixEncode *kernel_encode_node = dynamic_cast<loco::MatrixEncode *>(fc_node->rhs());
+ ASSERT_NE(kernel_encode_node, nullptr);
+ ASSERT_EQ(fc_node->lhs(), encode_node);
+ // ConstGen
+ loco::ConstGen *const_node = dynamic_cast<loco::ConstGen *>(kernel_encode_node->input());
+ ASSERT_NE(const_node, nullptr);
+ // MatrixDecode
+ auto fc_uses = loco::succs(fc_node);
+ ASSERT_EQ(fc_uses.size(), 1);
+ loco::MatrixDecode *decode_node = dynamic_cast<loco::MatrixDecode *>(*fc_uses.begin());
+ ASSERT_NE(decode_node, nullptr);
+ ASSERT_EQ(decode_node->input(), fc_node);
+ // Push
+ auto decode_uses = loco::succs(decode_node);
+ ASSERT_EQ(decode_uses.size(), 1);
+ loco::Push *push_node = dynamic_cast<loco::Push *>(*decode_uses.begin());
+ ASSERT_NE(push_node, nullptr);
+ ASSERT_EQ(push_node->from(), decode_node);
+}
+
+TEST_F(TestTransformer_mir2loco, Transpose_Test)
+{
+ mir::Graph mir_graph;
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, {2, 7, 9, 5}};
+ auto *input = mir_graph.create<mir::ops::InputOp>(input_type)->getOutput(0);
+ auto *transpose =
+ mir_graph.create<mir::ops::TransposeOp>(input, std::vector<std::size_t>{3, 0, 1, 2})
+ ->getOutput(0);
+ mir_graph.create<mir::ops::OutputOp>(transpose);
+ input->setName("x");
+ transpose->setName("y");
+
+ mir2loco::Transformer transformer;
+ auto loco_graph = transformer.transform(&mir_graph);
+ // Pull
+ auto inputs = loco_graph->inputs();
+ loco::Pull *pull_node = loco::pull_node(loco_graph.get(), 0);
+ ASSERT_NE(pull_node, nullptr);
+ // Transpose
+ auto pull_uses = loco::succs(pull_node);
+ ASSERT_EQ(pull_uses.size(), 1);
+ loco::TensorTranspose *transpose_node = dynamic_cast<loco::TensorTranspose *>(*pull_uses.begin());
+ ASSERT_NE(transpose_node, nullptr);
+ ASSERT_EQ(transpose_node->input(), pull_node);
+ // Push
+ auto transpose_uses = loco::succs(transpose_node);
+ ASSERT_EQ(transpose_uses.size(), 1);
+ loco::Push *push_node = dynamic_cast<loco::Push *>(*transpose_uses.begin());
+ ASSERT_NE(push_node, nullptr);
+ ASSERT_EQ(push_node->from(), transpose_node);
+ // Axis check
+ ASSERT_EQ(transpose_node->perm()->size(), 4);
+ ASSERT_EQ(transpose_node->perm()->axis(0), 3);
+ ASSERT_EQ(transpose_node->perm()->axis(1), 0);
+ ASSERT_EQ(transpose_node->perm()->axis(2), 1);
+ ASSERT_EQ(transpose_node->perm()->axis(3), 2);
+}
diff --git a/compiler/moco-log/CMakeLists.txt b/compiler/moco-log/CMakeLists.txt
new file mode 100644
index 000000000..036b4e74b
--- /dev/null
+++ b/compiler/moco-log/CMakeLists.txt
@@ -0,0 +1,9 @@
+# TODO Find how to test logging framework
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_library(moco_log SHARED ${SOURCES})
+target_include_directories(moco_log PUBLIC include)
+target_link_libraries(moco_log PUBLIC hermes)
+target_link_libraries(moco_log PRIVATE hermes_std)
+target_link_libraries(moco_log PRIVATE stdex)
+install(TARGETS moco_log DESTINATION lib)
diff --git a/compiler/moco-log/README.md b/compiler/moco-log/README.md
new file mode 100644
index 000000000..d8289ab9b
--- /dev/null
+++ b/compiler/moco-log/README.md
@@ -0,0 +1,3 @@
+# moco-log
+
+_moco-log_ is a logging framework for _moco_ compiler framework.
diff --git a/compiler/moco-log/include/moco/Log.h b/compiler/moco-log/include/moco/Log.h
new file mode 100644
index 000000000..ec246cd59
--- /dev/null
+++ b/compiler/moco-log/include/moco/Log.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_LOG_H__
+#define __MOCO_LOG_H__
+
+#include <hermes.h>
+
+namespace moco
+{
+
+/**
+ * @brief Logger Implementation
+ */
+class Logger final : public hermes::Source
+{
+public:
+ Logger(hermes::Context *ctx);
+ ~Logger();
+};
+
+/**
+ * @brief Logger Configuration
+ *
+ * Users are able to turn logging on/off via MOCO_LOG environment variable.
+ */
+class LoggerConfig final : public hermes::Config
+{
+public:
+ LoggerConfig();
+
+public:
+ void configure(const hermes::Source *, hermes::Source::Setting &) const final;
+ void configure(const Logger *, hermes::Source::Setting &) const;
+
+private:
+ bool _enabled;
+};
+
+} // namespace moco
+
+#include "moco/LoggingContext.h"
+
+/**
+ * HOW TO USE:
+ *
+ * LOGGER(l);
+ *
+ * INFO(l) << "Hello, World" << std::endl;
+ *
+ */
+#define LOGGER(name) ::moco::Logger name{::moco::LoggingContext::get()};
+
+// TODO Support FATAL, ERROR, WARN, and VERBOSE
+#define INFO(name) HERMES_INFO(name)
+
+// WARNING!
+//
+// THE CURRENT IMPLEMENTATION IS NOT THREAD SAFE.
+//
+
+#endif // __MOCO_LOG_H__
diff --git a/compiler/moco-log/include/moco/LoggingContext.h b/compiler/moco-log/include/moco/LoggingContext.h
new file mode 100644
index 000000000..18f15b134
--- /dev/null
+++ b/compiler/moco-log/include/moco/LoggingContext.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_LOGGING_CONTEXT_H__
+#define __MOCO_LOGGING_CONTEXT_H__
+
+#include <hermes.h>
+
+namespace moco
+{
+
+/**
+ * @brief Global logging context
+ */
+struct LoggingContext
+{
+ static hermes::Context *get(void);
+};
+
+} // namespace moco
+
+#endif // __MOCO_LOGGING_CONTEXT_H__
diff --git a/compiler/moco-log/requires.cmake b/compiler/moco-log/requires.cmake
new file mode 100644
index 000000000..657aa04a1
--- /dev/null
+++ b/compiler/moco-log/requires.cmake
@@ -0,0 +1,2 @@
+require("hermes")
+require("hermes-std")
diff --git a/compiler/moco-log/src/Log.cpp b/compiler/moco-log/src/Log.cpp
new file mode 100644
index 000000000..4d204ee52
--- /dev/null
+++ b/compiler/moco-log/src/Log.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Log.h"
+
+#include <cassert>
+#include <cstdlib>
+#include <iostream>
+
+// TODO Extract these lexical conversion routines as a library
+namespace
+{
+
+/**
+ * @brief Convert C-string as a value of type T
+ *
+ * safecast(s, v) returns v if s is nullptr.
+ */
+template <typename T> T safecast(const char *, const T &);
+
+template <> bool safecast<bool>(const char *s, const bool &value)
+{
+ return (s == nullptr) ? value : (std::stoi(s) != 0);
+}
+
+} // namespace
+
+//
+// Logger
+//
+namespace moco
+{
+
+Logger::Logger(hermes::Context *ctx) { activate(ctx->sources(), ctx->bus()); }
+Logger::~Logger() { deactivate(); }
+
+} // namespace moco
+
+//
+// LoggerConfig
+//
+namespace moco
+{
+
+LoggerConfig::LoggerConfig()
+{
+ // Turn on logging if MOCO_LOG is set as non-zero value
+ _enabled = safecast<bool>(std::getenv("MOCO_LOG"), false);
+}
+
+void LoggerConfig::configure(const hermes::Source *source, hermes::Source::Setting &setting) const
+{
+ // Let's ignore hermes::Sources if that is not a moco logger
+ if (auto logger = dynamic_cast<const Logger *>(source))
+ {
+ configure(logger, setting);
+ }
+}
+
+void LoggerConfig::configure(const Logger *, hermes::Source::Setting &setting) const
+{
+ if (_enabled)
+ {
+ // Enable all catagories
+ setting.accept_all();
+ }
+ else
+ {
+ // Disable all catagories
+ setting.reject_all();
+ }
+}
+
+} // namespace moco
diff --git a/compiler/moco-log/src/LoggingContext.cpp b/compiler/moco-log/src/LoggingContext.cpp
new file mode 100644
index 000000000..a004e1d3d
--- /dev/null
+++ b/compiler/moco-log/src/LoggingContext.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/LoggingContext.h"
+#include "moco/Log.h"
+
+#include <hermes/ConsoleReporter.h>
+#include <stdex/Memory.h>
+
+namespace moco
+{
+
+hermes::Context *LoggingContext::get(void)
+{
+ static hermes::Context *ctx = nullptr;
+
+ if (ctx == nullptr)
+ {
+ ctx = new hermes::Context;
+ ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
+ ctx->config(stdex::make_unique<LoggerConfig>());
+ }
+
+ return ctx;
+}
+
+} // namespace moco
diff --git a/compiler/moco-tf/CMakeLists.txt b/compiler/moco-tf/CMakeLists.txt
new file mode 100644
index 000000000..5516388a4
--- /dev/null
+++ b/compiler/moco-tf/CMakeLists.txt
@@ -0,0 +1,51 @@
+if(NOT TARGET mio_tf)
+ return()
+endif(NOT TARGET mio_tf)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(moco_tf_frontend SHARED ${SOURCES})
+target_include_directories(moco_tf_frontend PRIVATE src)
+target_include_directories(moco_tf_frontend PUBLIC include)
+target_link_libraries(moco_tf_frontend PUBLIC loco)
+target_link_libraries(moco_tf_frontend PUBLIC moco_lang)
+target_link_libraries(moco_tf_frontend PUBLIC moco_import)
+target_link_libraries(moco_tf_frontend PUBLIC moco_pass)
+target_link_libraries(moco_tf_frontend PUBLIC mio_tf)
+target_link_libraries(moco_tf_frontend PRIVATE moco_service)
+target_link_libraries(moco_tf_frontend PRIVATE moco_support)
+target_link_libraries(moco_tf_frontend PRIVATE bino)
+target_link_libraries(moco_tf_frontend PRIVATE fipe)
+target_link_libraries(moco_tf_frontend PRIVATE locop)
+target_link_libraries(moco_tf_frontend PRIVATE stdex)
+target_link_libraries(moco_tf_frontend PRIVATE moco_log)
+target_link_libraries(moco_tf_frontend PRIVATE pepper_str)
+target_link_libraries(moco_tf_frontend PRIVATE pepper_strcast)
+target_link_libraries(moco_tf_frontend PRIVATE locomotiv)
+target_link_libraries(moco_tf_frontend PRIVATE plier_tf)
+target_link_libraries(moco_tf_frontend PRIVATE locoex_customop)
+target_link_libraries(moco_tf_frontend PRIVATE logo)
+target_link_libraries(moco_tf_frontend PRIVATE oops)
+install(TARGETS moco_tf_frontend DESTINATION lib)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+add_executable(moco_tf_frontend_test ${TESTS})
+target_include_directories(moco_tf_frontend_test PRIVATE src)
+target_link_libraries(moco_tf_frontend_test gtest_main)
+target_link_libraries(moco_tf_frontend_test bino)
+target_link_libraries(moco_tf_frontend_test fipe)
+target_link_libraries(moco_tf_frontend_test locop)
+target_link_libraries(moco_tf_frontend_test moco_log)
+target_link_libraries(moco_tf_frontend_test moco_tf_frontend)
+target_link_libraries(moco_tf_frontend_test stdex)
+target_link_libraries(moco_tf_frontend_test plier_tf)
+target_link_libraries(moco_tf_frontend_test locoex_customop)
+target_link_libraries(moco_tf_frontend_test logo)
+add_test(moco_tf_frontend_test moco_tf_frontend_test)
diff --git a/compiler/moco-tf/README.md b/compiler/moco-tf/README.md
new file mode 100644
index 000000000..add1159e1
--- /dev/null
+++ b/compiler/moco-tf/README.md
@@ -0,0 +1,57 @@
+# moco-tf
+
+_moco-tf_ translates a TensorFlow model into _loco_
+
+## Purpose
+
+_moco-tf_ is to convert TensorFlow generated model file to in-memory _loco_ IR Graph.
+
+## How to use
+
+```cxx
+#include <moco/tf/Frontend.h>
+
+...
+
+ ::moco::tf::Frontend moco;
+
+ std::string pb_path = "path_to_pb_file_to_load";
+
+ auto loco_graph = moco.load(sig, pb_path, ::moco::tf::Frontend::FileType::Binary);
+```
+
+## Dependency
+
+Please refer [requires.cmake](./requires.cmake) for dependant modules.
+
+## Naming rules
+
+### TensorFlow node names
+
+Use `REGISTER_OP` argument used in TensorFlow source `core` folder.
+
+```
+cd tensorflow/core
+grep -Rn "REGISTER_OP"
+```
+
+To see single Op, `Conv2D` for example
+```
+cd tensorflow/core
+grep -Rn "REGISTER_OP" | grep "Conv2D"
+```
+
+### Names related with TensorFlow nodes
+
+Like `GraphBuilder` and `Canonicalization`, TensorFlow node names can be used as
+prefix or suffix.
+
+- `Conv2DGraphBuilder`
+- `Conv2DCanonicalizier`
+
+### TensorFlow Dialect IR
+
+Use `TF` prefix with TensorFlow Dialect node names
+
+- `TFAvgPool`
+- `TFConv2D`
diff --git a/compiler/moco-tf/doc/Conversion.md b/compiler/moco-tf/doc/Conversion.md
new file mode 100644
index 000000000..08551cc3c
--- /dev/null
+++ b/compiler/moco-tf/doc/Conversion.md
@@ -0,0 +1,140 @@
+This document outlines how to express each TensorFlow operation on top of _loco_
+
+**CAUTION** All the python examples below are written in Python 3 with TensorFlow v1.13.
+
+**DISCLAIMER** _loco_ does not support named values, but all the below _loco_ examples assign "name" to each value to make it easy to read.
+
+### Placeholder
+
+**Placeholder** in _TensorFlow_ corresponds to **Pull** in _loco_.
+
+_Python_:
+```python
+import tensorflow as tf
+input = tf.placeholder(dtype=tf.float32, shape=[3, 4], name='input')
+print(tf.get_default_graph().as_graph_def())
+```
+
+API reference: [tf.placeholder](https://www.tensorflow.org/versions/r1.13/api_docs/python/tf)
+
+_TensorFlow_
+```prototext
+node {
+ name: "input"
+ op: "Placeholder"
+ attr {
+ key: "dtype"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim { size: 3 }
+ dim { size: 4 }
+ }
+ }
+ }
+}
+```
+
+_loco_:
+```
+%input = Pull(dtype: FLOAT32, shape: [3, 4])
+Push(%input)
+```
+
+### Identity
+
+**Identity** in _TensorFlow_ corresponds to **Forward** in _loco_.
+
+_Python_:
+```python
+import tensorflow as tf
+input = tf.placeholder(dtype=tf.float32, shape=[3, 4])
+ident = tf.identity(input)
+print(tf.get_default_graph().as_graph_def())
+```
+
+API reference: [tf.identity](https://www.tensorflow.org/api_docs/python/tf/identity)
+
+_TensorFlow_:
+```
+node {
+ name: "Placeholder"
+ op: "Placeholder"
+ attr {
+ key: "dtype"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim { size: 3 }
+ dim { size: 4 }
+ }
+ }
+ }
+}
+node {
+ name: "Identity"
+ op: "Identity"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value { type: DT_FLOAT }
+ }
+}
+```
+
+_loco_:
+```
+%input = Pull(dtype: FLOAT32, shape: [3, 4])
+%ident = Forward(%input)
+Push(%ident)
+```
+
+### Const
+
+**Const** in _TensorFlow_ corresponds to **ConstGen** in _loco_.
+
+_Python_:
+```python
+import tensorflow as tf
+constant = tf.constant(value=[1.0], dtype=tf.float32, shape=[3, 4])
+tf.get_default_graph().as_graph_def()
+```
+
+API reference: [tf.constant](https://www.tensorflow.org/versions/r1.13/api_docs/python/tf/constant)
+
+_TensorFlow_:
+```
+node {
+ name: "Const"
+ op: "Const"
+ attr {
+ key: "dtype"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim { size: 3 }
+ dim { size: 4 }
+ }
+ float_val: 1.0
+ }
+ }
+ }
+}
+```
+
+_loco_:
+```
+%constant = ConstGen(dtype: FLOAT32, shape: [3, 4], data: ...);
+Push(%constant)
+```
diff --git a/compiler/moco-tf/include/moco/tf/Frontend.h b/compiler/moco-tf/include/moco/tf/Frontend.h
new file mode 100644
index 000000000..6914fdd38
--- /dev/null
+++ b/compiler/moco-tf/include/moco/tf/Frontend.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TENSORFLOW_FRONTEND_H__
+#define __MOCO_TENSORFLOW_FRONTEND_H__
+
+#include <moco/Import/ModelSignature.h>
+
+#include <loco.h>
+
+#include <tensorflow/core/framework/graph.pb.h>
+
+namespace moco
+{
+namespace tf
+{
+
+class Frontend
+{
+public:
+ enum class FileType
+ {
+ Text,
+ Binary,
+ };
+
+public:
+ Frontend();
+
+public:
+ std::unique_ptr<loco::Graph> load(const ModelSignature &, const char *, FileType) const;
+ std::unique_ptr<loco::Graph> load(const ModelSignature &, std::istream *, FileType) const;
+
+private:
+ std::unique_ptr<loco::Graph> import(const ModelSignature &, tensorflow::GraphDef &) const;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TENSORFLOW_FRONTEND_H__
diff --git a/compiler/moco-tf/requires.cmake b/compiler/moco-tf/requires.cmake
new file mode 100644
index 000000000..751192fff
--- /dev/null
+++ b/compiler/moco-tf/requires.cmake
@@ -0,0 +1,13 @@
+require("fipe")
+require("loco")
+require("moco")
+require("locop")
+require("stdex")
+require("moco-log")
+require("pepper-strcast")
+require("locomotiv")
+require("mio-tf")
+require("plier-tf")
+require("locoex-customop")
+require("logo")
+require("oops")
diff --git a/compiler/moco-tf/src/BroadcastHelper.cpp b/compiler/moco-tf/src/BroadcastHelper.cpp
new file mode 100644
index 000000000..fc058c141
--- /dev/null
+++ b/compiler/moco-tf/src/BroadcastHelper.cpp
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BroadcastHelper.h"
+
+#include <loco/IR/Nodes.h>
+#include <loco/Service/ShapeInference.h>
+
+#include <cassert>
+
+namespace
+{
+
+class NodeWithTensorShape
+{
+public:
+ NodeWithTensorShape() = default;
+
+public:
+ NodeWithTensorShape(loco::Node *node, const loco::TensorShape &shape) : _node{node}, _shape{shape}
+ {
+ // DO NOTHING
+ }
+
+public:
+ loco::Node *node(void) const { return _node; }
+ const loco::TensorShape &shape(void) const { return _shape; }
+
+private:
+ loco::Node *_node = nullptr;
+ loco::TensorShape _shape;
+};
+
+NodeWithTensorShape glue(loco::Node *node, const loco::TensorShape &shape)
+{
+ return NodeWithTensorShape(node, shape);
+}
+
+/**
+ * @brief Create a higher-rank TensorShape following NumPy broadcasting semantics
+ *
+ * HOW TO USE:
+ *
+ * auto expanded_tensor_shape = expand(tensor_shape).to(N);
+ */
+class TensorShapeExpander
+{
+public:
+ TensorShapeExpander(const loco::TensorShape &shape) : _shape{shape}
+ {
+ // DO NOTHING
+ }
+
+public:
+ loco::TensorShape to(uint32_t output_rank)
+ {
+ auto const &input_shape = _shape;
+ uint32_t const input_rank = input_shape.rank();
+
+ assert(input_rank <= output_rank && "Cannot shrink rank");
+ uint32_t const axis_shift = output_rank - input_rank;
+
+ loco::TensorShape output_shape;
+
+ output_shape.rank(output_rank);
+ for (uint32_t axis = 0; axis < output_rank; ++axis)
+ {
+ output_shape.dim(axis) = (axis < axis_shift) ? 1 : input_shape.dim(axis - axis_shift);
+ }
+
+ return output_shape;
+ }
+
+private:
+ const loco::TensorShape _shape;
+};
+
+TensorShapeExpander expand(const loco::TensorShape &shape) { return TensorShapeExpander{shape}; }
+
+/**
+ * @brief Create a rank-expanded node (if required)
+ */
+class ExpandRankFunctor final
+{
+public:
+ ExpandRankFunctor(uint32_t rank) : _rank{rank}
+ {
+ // DO NOTHING
+ }
+
+public:
+ NodeWithTensorShape operator()(const NodeWithTensorShape &in) const
+ {
+ auto const input_node = in.node();
+ auto const input_shape = in.shape();
+ auto const input_rank = input_shape.rank();
+
+ uint32_t const expected_rank = _rank;
+
+ assert(input_rank <= expected_rank);
+ if (input_rank == expected_rank)
+ {
+ // Nothing to expand
+ return in;
+ }
+
+ auto g = input_node->graph();
+ assert(g != nullptr);
+
+ auto output_shape = expand(input_shape).to(expected_rank);
+ auto output_node = g->nodes()->create<loco::FixedReshape>();
+
+ output_node->input(input_node);
+ output_node->rank(expected_rank);
+ for (uint32_t axis = 0; axis < expected_rank; ++axis)
+ {
+ output_node->dim(axis) = output_shape.dim(axis);
+ }
+
+ return glue(output_node, output_shape);
+ }
+
+private:
+ uint32_t _rank;
+};
+
+ExpandRankFunctor expand_rank_to(uint32_t rank) { return ExpandRankFunctor{rank}; }
+
+/**
+ * @brief Create a dimension-expanded node (if required)
+ */
+class ExpandDimsFunctor final
+{
+public:
+ ExpandDimsFunctor(const loco::TensorShape &shape) : _shape{shape}
+ {
+ // DO NOTHING
+ }
+
+public:
+ NodeWithTensorShape operator()(const NodeWithTensorShape &in) const
+ {
+ auto const input_node = in.node();
+ auto const input_shape = in.shape();
+ const auto &output_shape = _shape;
+
+ assert(input_shape.rank() == output_shape.rank());
+
+ if (input_shape == output_shape)
+ {
+ // Nothing to expand
+ return in;
+ }
+
+ uint32_t const rank = output_shape.rank();
+
+ auto g = input_node->graph();
+ assert(g != nullptr);
+
+ auto output_node = g->nodes()->create<loco::TensorBroadcast>();
+
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ auto input_dim = input_shape.dim(axis);
+ auto output_dim = output_shape.dim(axis);
+
+ assert(input_dim.known() and output_dim.known());
+
+ if (!(input_dim == output_dim))
+ {
+ assert(input_dim == 1);
+ output_node->mapping()->dim(axis) = output_dim;
+ }
+ }
+
+ output_node->input(input_node);
+
+ return glue(output_node, output_shape);
+ }
+
+private:
+ loco::TensorShape _shape;
+};
+
+ExpandDimsFunctor expand_dims_as(const loco::TensorShape &shape)
+{
+ return ExpandDimsFunctor{shape};
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+loco::Node *BroadcastFunctor::build(loco::Node *node, const loco::TensorShape &shape) const
+{
+ // clang-format off
+ return glue(node, shape)
+ | expand_rank_to(_shape.rank())
+ | expand_dims_as(_shape)
+ | [] (const NodeWithTensorShape &in) { return in.node(); };
+ // clang-format on
+}
+
+loco::Node *BroadcastFunctor::build(loco::Node *node) const
+{
+ return build(node, loco::shape_get(node).as<loco::TensorShape>());
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/BroadcastHelper.h b/compiler/moco-tf/src/BroadcastHelper.h
new file mode 100644
index 000000000..6238ad269
--- /dev/null
+++ b/compiler/moco-tf/src/BroadcastHelper.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BROADCAST_HELPER_H__
+#define __BROADCAST_HELPER_H__
+
+#include <loco/IR/Node.h>
+#include <loco/IR/Dimension.h>
+#include <loco/IR/TensorShape.h>
+
+#include <bino.h>
+#include <fipe.h> // include "fipe.h" for clients
+
+namespace moco
+{
+namespace tf
+{
+
+class BroadcastFunctor final
+{
+public:
+ BroadcastFunctor(const loco::TensorShape &shape) : _shape{shape}
+ {
+ // DO NOTHING
+ }
+
+public:
+ loco::Node *build(loco::Node *in_node, const loco::TensorShape &in_shape) const;
+
+ loco::Node *operator()(loco::Node *in_node, const loco::TensorShape &in_shape) const
+ {
+ return build(in_node, in_shape);
+ }
+
+ // This method assumes the followings:
+ // - loco::shape_known(node) returns true, and
+ // - loco::shape_get(node).domain() is loco::Domain::Tensor
+ loco::Node *build(loco::Node *node) const;
+
+ loco::Node *operator()(loco::Node *node) const { return build(node); }
+
+private:
+ loco::TensorShape _shape;
+};
+
+/**
+ * @brief Create a broadcasted node
+ *
+ * First, append canonical.FixedReshape if rank expansion is required.
+ * Then, append canonical.TensorBroadcast if dimension expansion is required
+ *
+ * This mimics "tf.broadcast_to" API in TensorFlow.
+ */
+static inline auto broadcast_to(const loco::TensorShape &shape)
+ -> decltype(bino::transform_both(std::declval<BroadcastFunctor>()))
+{
+ return bino::transform_both(BroadcastFunctor{shape});
+}
+
+} // namespace tf
+} // namespace moco
+
+#endif // __BROADCAST_HELPER_H__
diff --git a/compiler/moco-tf/src/BroadcastHelper.test.cpp b/compiler/moco-tf/src/BroadcastHelper.test.cpp
new file mode 100644
index 000000000..a6cbd719a
--- /dev/null
+++ b/compiler/moco-tf/src/BroadcastHelper.test.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BroadcastHelper.h"
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+TEST(BroadcastFunctorTest, expand_rank)
+{
+ // Broadcast Tensor<3> as Tensor<1 x 3>
+ auto g = loco::make_graph();
+
+ auto input = g->inputs()->create();
+
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->index(0);
+
+ loco::TensorShape current_shape;
+ {
+ current_shape.rank(1);
+ current_shape.dim(0) = 3;
+ }
+
+ loco::TensorShape expected_shape;
+ {
+ expected_shape.rank(2);
+ expected_shape.dim(0) = 1;
+ expected_shape.dim(1) = 3;
+ }
+
+ moco::tf::BroadcastFunctor functor{expected_shape};
+
+ auto node = functor.build(pull, current_shape);
+
+ ASSERT_EQ(node->opnum(), static_cast<uint32_t>(loco::CanonicalOpcode::FixedReshape));
+ ASSERT_EQ(node->arg(0), pull);
+}
+
+TEST(BroadcastFunctorTest, expand_dims)
+{
+ // Broadcast Tensor<1> as Tensor<3>
+ auto g = loco::make_graph();
+
+ auto input = g->inputs()->create();
+
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->index(0);
+
+ loco::TensorShape current_shape;
+ {
+ current_shape.rank(1);
+ current_shape.dim(0) = 1;
+ }
+
+ loco::TensorShape expected_shape;
+ {
+ expected_shape.rank(1);
+ expected_shape.dim(0) = 3;
+ }
+
+ moco::tf::BroadcastFunctor functor{expected_shape};
+
+ auto node = functor.build(pull, current_shape);
+
+ ASSERT_EQ(node->opnum(), static_cast<uint32_t>(loco::CanonicalOpcode::TensorBroadcast));
+ ASSERT_EQ(node->arg(0), pull);
+
+ auto tensor_broadcast = dynamic_cast<loco::TensorBroadcast *>(node);
+
+ ASSERT_NE(tensor_broadcast, nullptr);
+ ASSERT_TRUE(tensor_broadcast->mapping()->defined(0));
+ ASSERT_EQ(tensor_broadcast->mapping()->dim(0), 3);
+}
diff --git a/compiler/moco-tf/src/CanonicalEltwiseInputConnector.cpp b/compiler/moco-tf/src/CanonicalEltwiseInputConnector.cpp
new file mode 100644
index 000000000..adeae39de
--- /dev/null
+++ b/compiler/moco-tf/src/CanonicalEltwiseInputConnector.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CanonicalEltwiseInputConnector.h"
+
+#include <loco/IR/Nodes.h>
+
+namespace moco
+{
+namespace tf
+{
+namespace eltwise
+{
+namespace binary
+{
+
+template <typename NodeTy> void InputConnector<NodeTy>::operator()(const NodePair &p) const
+{
+ _node->lhs(p.first);
+ _node->rhs(p.second);
+}
+
+#define INSTANTIATE(OP) template void InputConnector<loco::OP>::operator()(const NodePair &) const;
+
+INSTANTIATE(EltwiseAdd);
+INSTANTIATE(EltwiseSub);
+INSTANTIATE(EltwiseMax);
+INSTANTIATE(EltwiseMul);
+INSTANTIATE(EltwiseDiv);
+
+#undef INSTANTIATE
+
+} // namespace binary
+} // namespace eltwise
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/CanonicalEltwiseInputConnector.h b/compiler/moco-tf/src/CanonicalEltwiseInputConnector.h
new file mode 100644
index 000000000..a50a5011c
--- /dev/null
+++ b/compiler/moco-tf/src/CanonicalEltwiseInputConnector.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CANONICAL_ELTWISE_INPUT_CONNECTOR_H__
+#define __CANONICAL_ELTWISE_INPUT_CONNECTOR_H__
+
+#include <loco/IR/Node.h>
+
+#include <utility>
+
+namespace moco
+{
+namespace tf
+{
+namespace eltwise
+{
+namespace binary
+{
+
+using NodePair = std::pair<loco::Node *, loco::Node *>;
+
+template <typename NodeTy> class InputConnector
+{
+public:
+ InputConnector(NodeTy *node) : _node{node}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void operator()(const NodePair &p) const;
+
+private:
+ NodeTy *_node;
+};
+
+template <typename NodeTy> InputConnector<NodeTy> connect_to(NodeTy *node)
+{
+ return InputConnector<NodeTy>{node};
+}
+
+} // namespace binary
+} // namespace eltwise
+} // namespace tf
+} // namespace moco
+
+#endif // __CANONICAL_ELTWISE_INPUT_CONNECTOR_H__
diff --git a/compiler/moco-tf/src/Canonicalization/AddCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/AddCanonicalizer.cpp
new file mode 100644
index 000000000..8028a870c
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/AddCanonicalizer.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AddCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+#include <moco/IR/TFNodes.h>
+
+#include "TFEltwiseBinaryCanonicalzeHelper.h"
+
+namespace moco
+{
+namespace tf
+{
+
+bool AddCanonicalizer::transform(TFAdd *node) const
+{
+ return canonicalize_eltwise_binary_node(node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/AddCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/AddCanonicalizer.h
new file mode 100644
index 000000000..53ba9ed58
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/AddCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_ADD_CANONICALIZER_H__
+#define __MOCO_TF_ADD_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFAdd to Canonical EltwiseAdd
+ */
+class AddCanonicalizer : public SimpleNodeTransform<TFAdd>
+{
+public:
+ const char *name(void) const final { return "AddCanonicalizer"; }
+
+public:
+ bool transform(TFAdd *node) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_ADD_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/AvgPoolCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/AvgPoolCanonicalizer.cpp
new file mode 100644
index 000000000..e07a4f64f
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/AvgPoolCanonicalizer.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AvgPoolCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+#include <moco/Support/TFShapeInferenceHelper.h>
+
+#include "CodecHelper.h"
+
+#include <loco/IR/NodeShape.h>
+
+#include <moco/Log.h>
+
+namespace
+{
+
+bool canonicalize_avgpool2d(loco::Graph *graph, moco::TFAvgPool *node)
+{
+ LOGGER(l);
+
+ /**
+ * @note This will replace TFAvgPool node with Canonical FeatureEncode +
+ * AvgPool2D + FeatureDecode
+ *
+ * Before
+ * A -- TFAvgPool -- C
+ *
+ * After
+ * +- TFAvgPool --
+ * |
+ * A -+- FeatureEncode -- AvgPool2D -- FeatureDecode -- C
+ *
+ * Where
+ * A : value of TFAvgPool
+ * C : a node that uses TFAvgPool as an input
+ * TFAvgPool is disconnected from other nodes
+ */
+
+ auto data_layout = plier::tf::as_data_layout(node->data_layout());
+
+ auto feature_enc = graph->nodes()->create<loco::FeatureEncode>();
+ auto avgPool2d_node = graph->nodes()->create<loco::AvgPool2D>();
+ auto feature_dec = graph->nodes()->create<loco::FeatureDecode>();
+
+ set_feature_enc(feature_enc, data_layout);
+ set_feature_dec(feature_dec, data_layout);
+
+ avgPool2d_node->convention(loco::AvgPool2D::Convention::Valid);
+
+ auto value_shape = moco::node_shape(node->value());
+ assert(value_shape.domain() != loco::Domain::Unknown);
+
+ auto node_stride = moco::stride_of(node->strides(), node->data_layout());
+ auto node_window = moco::window_of(node->ksize(), node->data_layout());
+
+ moco::Padding2DInference infer_padding2d;
+
+ infer_padding2d.padding(node->padding());
+ infer_padding2d.stride(node_stride);
+ infer_padding2d.window(node_window);
+
+ auto input_feature_shape = moco::as_feature_shape(value_shape, node->data_layout());
+ auto input_plane_shape = moco::make_plane_shape(input_feature_shape);
+
+ *avgPool2d_node->pad() = infer_padding2d(input_plane_shape);
+ *avgPool2d_node->stride() = node_stride;
+ *avgPool2d_node->window() = node_window;
+
+ INFO(l) << "Canonicalize TFAvgPool pad = T " << avgPool2d_node->pad()->top() << ", L "
+ << avgPool2d_node->pad()->left() << ", B " << avgPool2d_node->pad()->bottom() << ", R "
+ << avgPool2d_node->pad()->right() << std::endl;
+
+ // update graph
+ auto node_A = node->value();
+
+ // update connections
+ feature_enc->input(node_A);
+ avgPool2d_node->ifm(feature_enc);
+ feature_dec->input(avgPool2d_node);
+
+ // replace node
+ replace(node).with(feature_dec);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool AvgPoolCanonicalizer::transform(TFAvgPool *node) const
+{
+ return canonicalize_avgpool2d(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/AvgPoolCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/AvgPoolCanonicalizer.h
new file mode 100644
index 000000000..e9c56c868
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/AvgPoolCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_AVGPOOL_CANONICALIZER_H__
+#define __MOCO_TF_AVGPOOL_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFAvgPool to Canonical AvgPool2D
+ */
+class AvgPoolCanonicalizer : public SimpleNodeTransform<moco::TFAvgPool>
+{
+public:
+ const char *name(void) const final { return "AvgPoolCanonicalizer"; }
+
+public:
+ bool transform(TFAvgPool *node) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_AVGPOOL_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/BiasAddCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/BiasAddCanonicalizer.cpp
new file mode 100644
index 000000000..a5568ce1a
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/BiasAddCanonicalizer.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BiasAddCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+
+#include <moco/Names.h>
+#include <moco/Log.h>
+#include <plier/tf/Convert.h>
+
+namespace
+{
+using plier::tf::DataLayout;
+
+bool canonicalize_biasadd(loco::Graph *graph, moco::TFBiasAdd *node)
+{
+ LOGGER(l);
+
+ /**
+ * @note This will replace TFBiasAdd node with Canonical BiasEncode + TensorBiasAdd
+ *
+ * Before
+ * A -- TFBiasAdd - C
+ * B -/
+ *
+ * After
+ * A -- TFBiasAdd -
+ * B -/
+ * A --------------- TensorBiasAdd - C
+ * B - BiasEncode -/
+ *
+ * Where
+ * A : value of TFBiasAdd
+ * B : bias of TFBiasAdd
+ * C : a node that uses TFBiasAdd as an input
+ * TFBiasAdd is disconnected from node C
+ * A and B are drawn twice to simplify the diagram
+ */
+
+ INFO(l) << "TFNodeCanonicalize TFBiasAdd begin";
+
+ // tensorflow data_format: one of NHWC or NCHW.
+ auto data_layout = plier::tf::as_data_layout(node->data_layout());
+
+ // creating loco nodes
+ auto bias_enc = graph->nodes()->create<loco::BiasEncode>();
+
+ auto bias_add = graph->nodes()->create<loco::TensorBiasAdd>();
+ {
+ if (data_layout == DataLayout::NHWC)
+ {
+ INFO(l) << "TFNodeCanonicalize TFBiasAdd axis 3";
+ bias_add->axis(3);
+ }
+ else if (data_layout == DataLayout::NCHW)
+ {
+ INFO(l) << "TFNodeCanonicalize TFBiasAdd axis 1";
+ bias_add->axis(1); // Channel
+ // Note: the following descrition of TF 1.13 at
+ // https://www.tensorflow.org/api_docs/python/tf/nn/bias_add seems wrong:
+ // "bias: A 1-D Tensor with size matching the last dimension of value."
+ // because providing the size of W (last dimension) to bias throws an error with TensorFlow
+ }
+ }
+
+ auto node_A = node->value();
+ auto node_B = node->bias();
+
+ // update connections
+ bias_add->value(node_A);
+ bias_add->bias(bias_enc);
+ bias_enc->input(node_B);
+
+ // replace old with new : about C in above note
+ replace(node).with(bias_add);
+
+ INFO(l) << "TFNodeCanonicalize TFBiasAdd done";
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool BiasAddCanonicalizer::transform(TFBiasAdd *node) const
+{
+ return canonicalize_biasadd(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/BiasAddCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/BiasAddCanonicalizer.h
new file mode 100644
index 000000000..ff4032ca9
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/BiasAddCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_BIASADD_CANONICALIZER_H__
+#define __MOCO_TF_BIASADD_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFBiasAdd to Canonical BiasAdd
+ */
+class BiasAddCanonicalizer final : public SimpleNodeTransform<moco::TFBiasAdd>
+{
+public:
+ const char *name(void) const final { return "BiasAddCanonicalizer"; }
+
+public:
+ bool transform(TFBiasAdd *node) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_BIASADD_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/ConcatV2Canonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/ConcatV2Canonicalizer.cpp
new file mode 100644
index 000000000..b59a3f3d7
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/ConcatV2Canonicalizer.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConcatV2Canonicalizer.h"
+#include "LogHelper.h"
+
+#include <moco/IR/TFDialect.h>
+#include <moco/Support/TFShapeInferenceHelper.h>
+
+#include <moco/Log.h>
+
+#include <loco/Service/ShapeInference.h>
+
+#include <stdex/Memory.h>
+#include <oops/UserExn.h>
+
+namespace
+{
+
+using namespace moco::tf;
+
+bool scalar_value(moco::TFConst *node, int32_t &ret)
+{
+ auto nodeshape = node_shape(node);
+ if (!(node->dtype() == loco::DataType::S32))
+ return false;
+
+ auto tensor_shape = nodeshape.as<loco::TensorShape>();
+ if (!(tensor_shape.rank() == 0 || tensor_shape.rank() == 1))
+ return false;
+
+ ret = node->at<loco::DataType::S32>(0);
+
+ return true;
+}
+
+bool canonicalize_concat(loco::Graph *graph, moco::TFConcatV2 *node)
+{
+ LOGGER(l);
+
+ /**
+ * @note This will replace TFConcatV2 node with (series of) Canonical
+ * TensorConcat. Below diagram is an example of three inputs
+ *
+ * Before
+ * A --- TFConcatV2 -- C
+ * B --/
+ * N --/
+ * X --/
+ * After
+ * A --- TFConcatV2
+ * B --/
+ * N --/
+ * X --/
+ * A --- TensorConcat -- TensorConcat -- C
+ * B --/ /
+ * N -----------------/
+ *
+ * Where
+ * A : first value of TFConcatV2
+ * B : second value of TFConcatV2
+ * N : third or N'th value of TFConcatV2
+ * X : axis node of TFConcatV2
+ * C : a node that uses TFConcatV2 as an input
+ * TFConcatV2 is disconnected from C
+ * To simplify the diagram in 'After', A, B, N are drawn
+ * multiple times but they are same nodes.
+ */
+
+ const int num_values = node->num_values();
+ assert(num_values >= 2);
+
+ // get axis absolute value
+ auto value_a = node->values(0);
+ if (!loco::shape_known(value_a))
+ return false;
+
+ uint32_t node_rank = 0;
+ {
+ auto value_a_shape = moco::node_shape(value_a);
+ assert(value_a_shape.domain() == loco::Domain::Tensor);
+
+ auto value_a_tensor_shape = value_a_shape.as<loco::TensorShape>();
+ node_rank = value_a_tensor_shape.rank();
+ }
+
+ int32_t axis_value = 0;
+ {
+ // axis should be TFConst
+ auto axis_node = node->axis();
+ auto tfconst = dynamic_cast<moco::TFConst *>(axis_node);
+ if (tfconst == nullptr)
+ {
+ // TODO Check this: this error can be from TFOptimizatier.
+ throw oops::UserExn("ConcatV2 node has invalid input for axis", node->name());
+ }
+ auto result = scalar_value(tfconst, axis_value);
+ if (!result)
+ {
+ // TODO Check this: this error can be from TFOptimizatier.
+ throw oops::UserExn("ConcatV2 node has invalid input for axis", node->name());
+ }
+ }
+ uint32_t axis_absolute = (axis_value >= 0) ? axis_value : (int32_t)node_rank + axis_value;
+
+ INFO(l) << "canonicalize_concat axis(" << axis_absolute << "), value(" << axis_value << "), rank("
+ << node_rank << ")";
+
+ // Convert series of TensorConcat if num_values > 2
+ auto concat_node = graph->nodes()->create<loco::TensorConcat>();
+ concat_node->lhs(node->values(0));
+ concat_node->rhs(node->values(1));
+ concat_node->axis(axis_absolute);
+
+ loco::TensorConcat *last_concat = concat_node;
+ for (int ni = 2; ni < num_values; ++ni)
+ {
+ auto concat_node_next = graph->nodes()->create<loco::TensorConcat>();
+
+ concat_node_next->lhs(last_concat);
+ concat_node_next->rhs(node->values(ni));
+ concat_node_next->axis(axis_absolute);
+
+ // update last concat node
+ last_concat = concat_node_next;
+ }
+
+ // replace node
+ replace(node).with(last_concat);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool ConcatV2Canonicalizer::transform(TFConcatV2 *node) const
+{
+ return canonicalize_concat(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/ConcatV2Canonicalizer.h b/compiler/moco-tf/src/Canonicalization/ConcatV2Canonicalizer.h
new file mode 100644
index 000000000..e6b471b89
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/ConcatV2Canonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_CONCATV2_CANONICALIZER_H__
+#define __MOCO_TF_CONCATV2_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFConcatV2 to Canonical TensorConcat
+ */
+class ConcatV2Canonicalizer : public SimpleNodeTransform<moco::TFConcatV2>
+{
+public:
+ const char *name(void) const final { return "ConcatV2Canonicalizer"; }
+
+public:
+ bool transform(moco::TFConcatV2 *node) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_CONCATV2_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/ConstCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/ConstCanonicalizer.cpp
new file mode 100644
index 000000000..60629cd5a
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/ConstCanonicalizer.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConstCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+
+#include <moco/Names.h>
+#include <moco/Log.h>
+
+#include <oops/UserExn.h>
+
+namespace
+{
+
+bool canonicalize_const(loco::Graph *graph, moco::TFConst *node)
+{
+ LOGGER(l);
+
+ /**
+ * @note This will replace TFConst node with Canonical Const
+ *
+ * Before
+ * TFConst -- C
+ *
+ * After
+ * TFConst -
+ * ConstGen -- C
+ *
+ * Where
+ * C : a node that uses TFConst as an input
+ * TFConst is disconnected from other nodes
+ */
+
+ INFO(l) << "TFNodeCanonicalize TFConst begin";
+
+ auto const_node = graph->nodes()->create<loco::ConstGen>();
+
+ // copy properties
+ auto dtype = node->dtype();
+ const_node->dtype(dtype);
+
+ auto rank = node->rank();
+
+ if (rank == 0)
+ {
+ // This routine implements a workaround that converts a scalar constant (rank-0 tensor)
+ // into a rank-1 tensor of shape [1].
+ //
+ // TODO Revise this implementation later
+ const_node->rank(1);
+ const_node->dim(0) = 1;
+ }
+ else
+ {
+ const_node->rank(rank);
+
+ for (uint32_t r = 0; r < rank; ++r)
+ {
+ if (node->dim(r).known())
+ const_node->dim(r) = node->dim(r);
+ else
+ const_node->dim(r).unset();
+ }
+ }
+
+ switch (dtype)
+ {
+ case loco::DataType::S32:
+ {
+ uint32_t input_elements = node->size<loco::DataType::S32>();
+ const_node->size<loco::DataType::S32>(input_elements);
+ for (uint32_t i = 0; i < input_elements; ++i)
+ {
+ const_node->at<loco::DataType::S32>(i) = node->at<loco::DataType::S32>(i);
+ }
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ uint32_t input_elements = node->size<loco::DataType::FLOAT32>();
+ const_node->size<loco::DataType::FLOAT32>(input_elements);
+ for (uint32_t i = 0; i < input_elements; ++i)
+ {
+ const_node->at<loco::DataType::FLOAT32>(i) = node->at<loco::DataType::FLOAT32>(i);
+ }
+ break;
+ }
+ default:
+ throw oops::UserExn("Const has unsupported data type", node->name());
+ }
+
+ // update graph
+ replace(node).with(const_node);
+
+ INFO(l) << "TFNodeCanonicalize TFConst done";
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool ConstCanonicalizer::transform(TFConst *node) const
+{
+ return canonicalize_const(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/ConstCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/ConstCanonicalizer.h
new file mode 100644
index 000000000..1b0b2b867
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/ConstCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_CONST_CANONICALIZER_H__
+#define __MOCO_TF_CONST_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFConst to Canonical ConstGen
+ */
+class ConstCanonicalizer : public SimpleNodeTransform<moco::TFConst>
+{
+public:
+ const char *name(void) const final { return "ConstCanonicalizer"; }
+
+public:
+ bool transform(moco::TFConst *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_CONST_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/Conv2DBackpropInputCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/Conv2DBackpropInputCanonicalizer.cpp
new file mode 100644
index 000000000..d3cbd4ab3
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/Conv2DBackpropInputCanonicalizer.cpp
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Conv2DBackpropInputCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+
+#include "CodecHelper.h"
+
+#include <loco/IR/Stride.h>
+#include <loco/IR/Padding2D.h>
+#include <loco/Service/ShapeInference.h>
+
+#include <oops/UserExn.h>
+
+namespace
+{
+using plier::tf::DataLayout;
+
+void set_filter_enc(loco::FilterEncode *filter_enc)
+{
+ auto enc = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+
+ // In TensorFlow, Conv2dBackpropInput's filter is a 4-D tensor of following shape:
+ // [filter_height, filter_width, out_channels, in_channels] or HWOI or HWNC (in/out in loco sense)
+ enc->perm()->axis(loco::FilterAxis::Height) = 0;
+ enc->perm()->axis(loco::FilterAxis::Width) = 1;
+ enc->perm()->axis(loco::FilterAxis::Count) = 2;
+ enc->perm()->axis(loco::FilterAxis::Depth) = 3;
+
+ filter_enc->encoder(std::move(enc));
+}
+
+} // namespace
+
+namespace
+{
+
+bool stride_2d_from_4d(loco::Stride<2> &ret, const std::vector<int64_t> &strides_4d,
+ const DataLayout data_layout)
+{
+ if (!(strides_4d.size() == 4))
+ return false;
+
+ switch (data_layout)
+ {
+ case DataLayout::NHWC:
+ ret.vertical(strides_4d.at(1));
+ ret.horizontal(strides_4d.at(2));
+ break;
+ case DataLayout::NCHW:
+ ret.vertical(strides_4d.at(2));
+ ret.horizontal(strides_4d.at(3));
+ break;
+ default:
+ return false;
+ }
+ return true;
+}
+
+struct PlaneShape
+{
+ loco::Dimension vertical;
+ loco::Dimension horizontal;
+};
+
+class Padding2DInference final
+{
+public:
+ Padding2DInference(const moco::TFNode *node) { _node = node; }
+
+public:
+ loco::Padding2D operator()(void);
+
+public:
+ PlaneShape &input() { return _input; }
+ PlaneShape &output() { return _output; }
+ loco::Stride<2> &stride() { return _stride; }
+ loco::Window<2> &window() { return _window; }
+ moco::TFPadding &padding() { return _padding; }
+
+private:
+ /// @brief Check whether ingredients set by non-default values
+ bool ready()
+ {
+ if (not input().vertical.known())
+ return false;
+ if (not input().horizontal.known())
+ return false;
+ if (not output().vertical.known())
+ return false;
+ if (not output().horizontal.known())
+ return false;
+ if (stride().vertical() == 0)
+ return false;
+ if (stride().horizontal() == 0)
+ return false;
+ if (window().vertical() == 0)
+ return false;
+ if (window().horizontal() == 0)
+ return false;
+ if (padding().empty())
+ return false;
+
+ return true;
+ }
+
+ inline uint32_t tight_output_for_valid_padding(uint32_t input, uint32_t stride, uint32_t filter)
+ {
+ return stride * (input - 1) + filter;
+ }
+
+ /**
+ * @note For Conv2DBackpropInput SAME padding, TensorFlow requires this condition to hold
+ *
+ * Reference: `::tensorflow::GetWindowedOutputSizeVerboseV2()` from TensorFlow project
+ */
+ inline bool same_padding_applicable(uint32_t input, uint32_t output, uint32_t stride)
+ {
+ // Here 'input' and 'output' means Conv2DBackpropInput's actual node input and output.
+ // Then these three conditions are equivalent:
+ //
+ // input == floor((output + stride - 1) / stride)
+ // input == ceil(output / stride)
+ // (stride * (input - 1) < output) and (output <= stride * input)
+ return (stride * (input - 1) < output) and (output <= stride * input);
+ }
+
+ inline uint32_t padding_needed(uint32_t input, uint32_t output, uint32_t stride, uint32_t filter)
+ {
+ return stride * (input - 1) + filter - output;
+ }
+
+private:
+ const moco::TFNode *_node;
+ PlaneShape _input;
+ PlaneShape _output;
+ loco::Stride<2> _stride;
+ loco::Window<2> _window;
+ moco::TFPadding _padding;
+};
+
+loco::Padding2D Padding2DInference::operator()(void)
+{
+ assert(ready());
+
+ if (padding() == "VALID")
+ {
+ // In case of VALID padding, TensorFlow accepts any size same or larger than
+ // 'tight fit' output. When output size (set by 'input sizes' node input) is
+ // larger than tight fit, extra spaces filled with zero.
+ auto tight_output_vertical = tight_output_for_valid_padding(
+ input().vertical.value(), stride().vertical(), window().vertical());
+ auto tight_output_horizontal = tight_output_for_valid_padding(
+ input().horizontal.value(), stride().horizontal(), window().horizontal());
+
+ if (output().vertical.value() < tight_output_vertical or
+ output().horizontal.value() < tight_output_horizontal)
+ throw oops::UserExn("input_sizes is too small", _node->name());
+
+ // Currently, only accept tight fit.
+ // TODO Support non-tight case by adding zero padding operation
+ assert(output().vertical.value() == tight_output_vertical);
+ assert(output().horizontal.value() == tight_output_horizontal);
+
+ return loco::Padding2D(0, 0, 0, 0);
+ }
+
+ if (padding() == "SAME")
+ {
+ // This condition is required by TensorFlow
+ if (not same_padding_applicable(input().vertical.value(), output().vertical.value(),
+ stride().vertical()) or
+ not same_padding_applicable(input().horizontal.value(), output().horizontal.value(),
+ stride().horizontal()))
+ throw oops::UserExn("Size mismatch for SAME padding", _node->name());
+
+ auto whole_pad_vertical = padding_needed(input().vertical.value(), output().vertical.value(),
+ stride().vertical(), window().vertical());
+ auto whole_pad_horizontal =
+ padding_needed(input().horizontal.value(), output().horizontal.value(),
+ stride().horizontal(), window().horizontal());
+
+ loco::Padding2D res;
+
+ res.top(whole_pad_vertical / 2);
+ res.bottom(whole_pad_vertical - res.top());
+ res.left(whole_pad_horizontal / 2);
+ res.right(whole_pad_horizontal - res.left());
+
+ return res;
+ }
+
+ throw oops::UserExn("Usupported padding " + padding(), _node->name());
+}
+
+/**
+ * @param[out] ret PlaneShape extracted from 'node' with given 'data_layout'
+ * @param[in] node
+ * @param[in] data_layout
+ *
+ * @return true on success
+ */
+bool set_plane_shape(PlaneShape &ret, const loco::Node *node, const DataLayout data_layout)
+{
+ auto tensor_shape = loco::shape_get(node).as<loco::TensorShape>();
+ if (!(tensor_shape.rank() == 4))
+ return false;
+
+ switch (data_layout)
+ {
+ case DataLayout::NHWC:
+ ret.vertical = tensor_shape.dim(1).value();
+ ret.horizontal = tensor_shape.dim(2).value();
+ break;
+ case DataLayout::NCHW:
+ ret.vertical = tensor_shape.dim(2).value();
+ ret.horizontal = tensor_shape.dim(3).value();
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * @param[out] ret 2D Window extracted from HW** filter node
+ * @param[in] filter_node
+ *
+ * @return true on success
+ */
+bool set_window(loco::Window<2> &ret, const loco::Node *filter_node)
+{
+ auto tensor_shape = loco::shape_get(filter_node).as<loco::TensorShape>();
+ assert(tensor_shape.rank() == 4);
+
+ ret.vertical(tensor_shape.dim(0).value());
+ ret.horizontal(tensor_shape.dim(1).value());
+
+ return true;
+}
+
+} // namespace
+
+namespace
+{
+
+bool canonicalize_conv2d_backprop_input(loco::Graph *graph,
+ moco::TFConv2DBackpropInput *conv2d_backprop)
+{
+ /**
+ * @note This will replace TFConv2DBackpropInput node with canonical
+ * FeatureEncode + FilterEncode + TransposedConv2D + FeatureDecode
+ *
+ * Before
+ * input_sizes ----
+ * \
+ * filter -------- TFConv2DBackpropInput --- output(s)
+ * /
+ * out_backprop ---
+ *
+ * After
+ * input_sizes ----
+ * \
+ * filter -------- TFConv2DBackpropInput ---
+ * /
+ * out_backprop ---
+ *
+ * filter ------ FilterEncode ------ TransposedConv2D --- FeatureDecode --- output(s)
+ * (as ker) /
+ * out_backprop --- FeatureEncode ---
+ * (as ifm)
+ */
+
+ if (!loco::shape_known(conv2d_backprop->out_backprop()))
+ return false;
+ if (!loco::shape_known(conv2d_backprop))
+ return false;
+ if (!loco::shape_known(conv2d_backprop->filter()))
+ return false;
+
+ auto data_layout = plier::tf::as_data_layout(conv2d_backprop->data_layout());
+
+ // Nodes to replace
+ auto feature_enc = graph->nodes()->create<loco::FeatureEncode>();
+ auto filter_enc = graph->nodes()->create<loco::FilterEncode>();
+ auto tr_conv2d = graph->nodes()->create<loco::TransposedConv2D>();
+ auto feature_dec = graph->nodes()->create<loco::FeatureDecode>();
+
+ set_feature_enc(feature_enc, data_layout);
+ set_filter_enc(filter_enc);
+ set_feature_dec(feature_dec, data_layout);
+
+ // Attributes for new TransposedConv2D
+ loco::Stride<2> stride;
+ loco::Padding2D pad;
+
+ // Get attributes
+ {
+ if (!stride_2d_from_4d(stride, conv2d_backprop->strides(), data_layout))
+ throw oops::UserExn("Unsupported strides", conv2d_backprop->name());
+
+ Padding2DInference infer_pad(conv2d_backprop);
+
+ if (!set_plane_shape(infer_pad.input(), conv2d_backprop->out_backprop(), data_layout))
+ throw oops::UserExn("Unsupported out_backprop data_format", conv2d_backprop->name());
+ if (!set_plane_shape(infer_pad.output(), conv2d_backprop, data_layout))
+ throw oops::UserExn("Unsupported data_format", conv2d_backprop->name());
+ if (!set_window(infer_pad.window(), conv2d_backprop->filter()))
+ throw oops::UserExn("Unsupported filter shape", conv2d_backprop->name());
+ infer_pad.stride() = stride;
+ infer_pad.padding() = conv2d_backprop->padding();
+
+ // Run padding infer_pad
+ pad = infer_pad();
+ }
+
+ // Set attributes
+ tr_conv2d->pad()->top(pad.top());
+ tr_conv2d->pad()->bottom(pad.bottom());
+ tr_conv2d->pad()->left(pad.left());
+ tr_conv2d->pad()->right(pad.right());
+
+ tr_conv2d->stride()->vertical(stride.vertical());
+ tr_conv2d->stride()->horizontal(stride.horizontal());
+
+ // Update graph
+ auto input_node = conv2d_backprop->out_backprop();
+ auto filter_node = conv2d_backprop->filter();
+
+ // Update connections
+ feature_enc->input(input_node);
+ filter_enc->input(filter_node);
+ tr_conv2d->ifm(feature_enc);
+ tr_conv2d->ker(filter_enc);
+ feature_dec->input(tr_conv2d);
+
+ // Replace old conv2d_backprop
+ replace(conv2d_backprop).with(feature_dec);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool Conv2DBackpropInputCanonicalizer::transform(TFConv2DBackpropInput *node) const
+{
+ return canonicalize_conv2d_backprop_input(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/Conv2DBackpropInputCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/Conv2DBackpropInputCanonicalizer.h
new file mode 100644
index 000000000..bc37bb9cb
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/Conv2DBackpropInputCanonicalizer.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_CONV2DBACKPROPINPUT_CANONICALIZER_H__
+#define __MOCO_TF_CONV2DBACKPROPINPUT_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/// @brief Convert TFConv2DBackpropInput to Canonical TransposedConv2D
+class Conv2DBackpropInputCanonicalizer : public SimpleNodeTransform<moco::TFConv2DBackpropInput>
+{
+public:
+ const char *name(void) const final { return "Conv2DBackpropInputCanonicalizer"; }
+
+public:
+ bool transform(moco::TFConv2DBackpropInput *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_CONV2DBACKPROPINPUT_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/Conv2DCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/Conv2DCanonicalizer.cpp
new file mode 100644
index 000000000..a955793a8
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/Conv2DCanonicalizer.cpp
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Conv2DCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+#include <moco/Support/TFShapeInferenceHelper.h>
+
+#include "CodecHelper.h"
+
+#include <moco/Log.h>
+
+namespace
+{
+using plier::tf::DataLayout;
+
+void set_filter_enc(loco::FilterEncode *filter_enc)
+{
+ auto enc = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+
+ // In TensorFlow, conv2d filter is a 4-D tensor of following shape:
+ // [filter_height, filter_width, in_channels, out_channels] -> HWIO (HWCN)
+ enc->perm()->axis(loco::FilterAxis::Height) = 0;
+ enc->perm()->axis(loco::FilterAxis::Width) = 1;
+ enc->perm()->axis(loco::FilterAxis::Depth) = 2;
+ enc->perm()->axis(loco::FilterAxis::Count) = 3;
+
+ filter_enc->encoder(std::move(enc));
+}
+
+bool canonicalize_conv2d(loco::Graph *graph, moco::TFConv2D *node)
+{
+ LOGGER(l);
+
+ /**
+ * @note This will replace TFCon2D node with Canonical FeatureEncode +
+ * FilterEncode + Conv2D + FeatureDecode
+ *
+ * Before
+ * A -- TFConv2D - C
+ * B -/
+ *
+ * After
+ * A -- TFConv2D -
+ * B -/
+ * A -- FeatureEncode - Conv2D - FeatureDecode - C
+ * B -- FilterEncode -/
+ *
+ * Where
+ * A : ifm of TFConv2D
+ * B : ker of TFConv2D
+ * C : a node that uses TFConv2D as an input
+ * TFConv2D is disconnected from other nodes
+ * A and B are drawn twice to simplify the diagram
+ */
+
+ auto data_layout = plier::tf::as_data_layout(node->data_layout());
+
+ auto feature_enc = graph->nodes()->create<loco::FeatureEncode>();
+ auto filter_enc = graph->nodes()->create<loco::FilterEncode>();
+ auto conv2d = graph->nodes()->create<loco::Conv2D>();
+ auto feature_dec = graph->nodes()->create<loco::FeatureDecode>();
+
+ set_feature_enc(feature_enc, data_layout);
+ set_filter_enc(filter_enc);
+ set_feature_dec(feature_dec, data_layout);
+
+ auto input_shape = moco::node_shape(node->input());
+ assert(input_shape.domain() != loco::Domain::Unknown);
+
+ auto ker_shape = moco::node_shape(node->filter());
+ auto ker_tensor_shape = ker_shape.as<loco::TensorShape>(); // in HWIO
+
+ auto node_stride = moco::stride_of(node->strides(), node->data_layout());
+ auto node_window = moco::window_of(ker_tensor_shape, "HWIO");
+
+ moco::Padding2DInference infer_padding2d;
+
+ infer_padding2d.padding(node->padding());
+ infer_padding2d.stride(node_stride);
+ infer_padding2d.window(node_window);
+
+ auto input_feature_shape = moco::as_feature_shape(input_shape, node->data_layout());
+ auto input_plane_shape = moco::make_plane_shape(input_feature_shape);
+
+ *conv2d->pad() = infer_padding2d(input_plane_shape);
+ *conv2d->stride() = node_stride;
+
+ // update graph
+ auto node_A = node->input();
+ auto node_B = node->filter();
+
+ // update connections
+ feature_enc->input(node_A);
+ filter_enc->input(node_B);
+ conv2d->ifm(feature_enc);
+ conv2d->ker(filter_enc);
+ feature_dec->input(conv2d);
+
+ // replace old node
+ replace(node).with(feature_dec);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool Conv2DCanonicalizer::transform(TFConv2D *node) const
+{
+ return canonicalize_conv2d(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/Conv2DCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/Conv2DCanonicalizer.h
new file mode 100644
index 000000000..ea39667f3
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/Conv2DCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_CONV2D_CANONICALIZER_H__
+#define __MOCO_TF_CONV2D_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFConv2D to Canonical Conv2D
+ */
+class Conv2DCanonicalizer : public SimpleNodeTransform<TFConv2D>
+{
+public:
+ const char *name(void) const final { return "Conv2DCanonicalizer"; }
+
+public:
+ bool transform(TFConv2D *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_CONV2D_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/DepthwiseConv2dNativeCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/DepthwiseConv2dNativeCanonicalizer.cpp
new file mode 100644
index 000000000..50dddf637
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/DepthwiseConv2dNativeCanonicalizer.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthwiseConv2dNativeCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+#include <moco/Support/TFShapeInferenceHelper.h>
+
+#include "CodecHelper.h"
+
+#include <moco/Log.h>
+
+namespace
+{
+
+using plier::tf::DataLayout;
+
+void set_filter_enc(loco::DepthwiseFilterEncode *filter_enc)
+{
+ auto enc = stdex::make_unique<loco::PermutingEncoder<loco::Domain::DepthwiseFilter>>();
+
+ // In TensorFlow, depthwiseconv2dnative filter is a 4-D tensor of following shape:
+ // [filter_height, filter_width, in_channels, channel_multiplier] -> HWCM
+ enc->perm()->axis(loco::DepthwiseFilterAxis::Height) = 0;
+ enc->perm()->axis(loco::DepthwiseFilterAxis::Width) = 1;
+ enc->perm()->axis(loco::DepthwiseFilterAxis::Depth) = 2;
+ enc->perm()->axis(loco::DepthwiseFilterAxis::Multiplier) = 3;
+
+ filter_enc->encoder(std::move(enc));
+}
+
+bool canonicalize_depthwiseconv2dnative(loco::Graph *graph, moco::TFDepthwiseConv2dNative *node)
+{
+ LOGGER(l);
+
+ /**
+ * @note This will replace TFDepthwiseConv2dNative node with Canonical FeatureEncode +
+ * DepthwiseFilterEncode + DepthwiseConv2D + FeatureDecode
+ *
+ * Before
+ * A -+- TFDepthwiseConv2dNative - C
+ * |
+ * B -+
+ *
+ * After
+ *
+ * A -+ FeatureEncode ----------------+- DepthwiseConv2D - FeatureDecode - C
+ * | |
+ * +-(TFDepthwiseConv2dNative) |
+ * | |
+ * B -+ DepthwiseFilterEncode --------+
+ *
+ * Where
+ * A : ifm of TFDepthwiseConv2dNative
+ * B : ker of TFDepthwiseConv2dNative
+ * C : a node that uses TFDepthwiseConv2dNative as an input
+ * TFDepthwiseConv2dNative is disconnected from other nodes
+ */
+
+ INFO(l) << "TFNodeCanonicalize TFDepthwiseConv2dNative begin";
+
+ auto data_layout = plier::tf::as_data_layout(node->data_layout());
+
+ auto feature_enc = graph->nodes()->create<loco::FeatureEncode>();
+ auto filter_enc = graph->nodes()->create<loco::DepthwiseFilterEncode>();
+ auto depthwiseconv2d = graph->nodes()->create<loco::DepthwiseConv2D>();
+ auto feature_dec = graph->nodes()->create<loco::FeatureDecode>();
+
+ set_feature_enc(feature_enc, data_layout);
+ set_filter_enc(filter_enc);
+ set_feature_dec(feature_dec, data_layout);
+
+ // Calculate Pad and Stride from inference
+ auto input_shape = moco::node_shape(node->input());
+ auto ker_shape = moco::node_shape(node->filter());
+ auto ker_tensor_shape = ker_shape.as<loco::TensorShape>();
+ auto node_stride = moco::stride_of(node->strides(), node->data_layout());
+ auto node_window = moco::window_of(ker_tensor_shape, "HWCM");
+
+ moco::Padding2DInference infer_padding2d;
+
+ infer_padding2d.padding(node->padding());
+ infer_padding2d.stride(node_stride);
+ infer_padding2d.window(node_window);
+
+ auto input_feature_shape = moco::as_feature_shape(input_shape, node->data_layout());
+ auto input_plane_shape = moco::make_plane_shape(input_feature_shape);
+
+ *depthwiseconv2d->pad() = infer_padding2d(input_plane_shape);
+ *depthwiseconv2d->stride() = node_stride;
+
+ // update graph
+ auto node_A = node->input();
+ auto node_B = node->filter();
+
+ // update connections
+ feature_enc->input(node_A);
+ filter_enc->input(node_B);
+ depthwiseconv2d->ifm(feature_enc);
+ depthwiseconv2d->ker(filter_enc);
+ feature_dec->input(depthwiseconv2d);
+
+ // replace and disconnect old node
+ replace(node).with(feature_dec);
+
+ INFO(l) << "TFNodeCanonicalize TFDepthwiseConv2dNative done";
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool DepthwiseConv2dNativeCanonicalizer::transform(TFDepthwiseConv2dNative *node) const
+{
+ return canonicalize_depthwiseconv2dnative(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/DepthwiseConv2dNativeCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/DepthwiseConv2dNativeCanonicalizer.h
new file mode 100644
index 000000000..704e1ade9
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/DepthwiseConv2dNativeCanonicalizer.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_DEPTHWISE_CONV2D_NATIVE_CANONICALIZER_H__
+#define __MOCO_TF_DEPTHWISE_CONV2D_NATIVE_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFDepthwiseConv2dNative to Canonical DepthwiseConv2D
+ */
+class DepthwiseConv2dNativeCanonicalizer : public SimpleNodeTransform<moco::TFDepthwiseConv2dNative>
+{
+public:
+ const char *name(void) const final { return "DepthwiseConv2dNativeCanonicalizer"; }
+
+public:
+ bool transform(moco::TFDepthwiseConv2dNative *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_DEPTHWISE_CONV2D_NATIVE_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/IdentityCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/IdentityCanonicalizer.cpp
new file mode 100644
index 000000000..3b680cf04
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/IdentityCanonicalizer.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IdentityCanonicalizer.h"
+
+#include "Convert.h"
+
+#include <moco/IR/TFDialect.h>
+
+#include <moco/Names.h>
+#include <moco/Log.h>
+
+namespace
+{
+
+bool canonicalize_identity(loco::Graph *graph, moco::TFIdentity *node)
+{
+ LOGGER(l);
+
+ /**
+ * @note This will replace TFIdentity node with Canonical Forward
+ *
+ * Before
+ * A -- TFIdentity -- C
+ *
+ * After
+ * /- TFIdentity --
+ * A -- Forward -- C
+ *
+ * Where
+ * A : input of TFIdentity
+ * C : a node that uses TFIdentity as an input
+ * TFIdentity is disconnected from the output
+ */
+
+ INFO(l) << "TFNodeCanonicalize TFIdentity begin";
+
+ auto forward_node = graph->nodes()->create<loco::Forward>();
+
+ auto node_A = node->input();
+
+ forward_node->input(node_A);
+
+ // update graph
+ replace(node).with(forward_node);
+
+ INFO(l) << "TFNodeCanonicalize TFIdentity done";
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool IdentityCanonicalizer::transform(TFIdentity *node) const
+{
+ return canonicalize_identity(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/IdentityCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/IdentityCanonicalizer.h
new file mode 100644
index 000000000..59b2894c5
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/IdentityCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_IDENTITY_CANONICALIZER_H__
+#define __MOCO_TF_IDENTITY_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFIdentity to Canonical Forward
+ */
+class IdentityCanonicalizer : public SimpleNodeTransform<moco::TFIdentity>
+{
+public:
+ const char *name(void) const final { return "IdentityCanonicalizer"; }
+
+public:
+ bool transform(moco::TFIdentity *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_IDENTITY_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/MaxPoolCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/MaxPoolCanonicalizer.cpp
new file mode 100644
index 000000000..06a605717
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/MaxPoolCanonicalizer.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MaxPoolCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+#include <moco/Support/TFShapeInferenceHelper.h>
+
+#include "CodecHelper.h"
+
+#include <moco/Log.h>
+
+namespace
+{
+
+bool canonicalize_maxpool2d(loco::Graph *graph, moco::TFMaxPool *node)
+{
+ LOGGER(l);
+
+ /**
+ * @note This will replace TFMaxPool node with Canonical FeatureEncode +
+ * MaxPool2D + FeatureDecode
+ *
+ * Before
+ * A -- TFMaxPool -- C
+ *
+ * After
+ * +- TFMaxPool --
+ * |
+ * A -+- FeatureEncode -- MaxPool2D -- FeatureDecode -- C
+ *
+ * Where
+ * A : value of TFMaxPool
+ * C : a node that uses TFMaxPool as an input
+ * TFMaxPool is disconnected from other nodes
+ */
+
+ auto data_layout = plier::tf::as_data_layout(node->data_layout());
+
+ auto feature_enc = graph->nodes()->create<loco::FeatureEncode>();
+ auto maxPool2d_node = graph->nodes()->create<loco::MaxPool2D>();
+ auto feature_dec = graph->nodes()->create<loco::FeatureDecode>();
+
+ set_feature_enc(feature_enc, data_layout);
+ set_feature_dec(feature_dec, data_layout);
+
+ // paddata to pad
+ auto input_shape = moco::node_shape(node->input());
+ assert(input_shape.domain() != loco::Domain::Unknown);
+
+ auto node_stride = moco::stride_of(node->strides(), node->data_layout());
+ auto node_window = moco::window_of(node->ksize(), node->data_layout());
+
+ moco::Padding2DInference infer_padding2d;
+
+ infer_padding2d.padding(node->padding());
+ infer_padding2d.stride(node_stride);
+ infer_padding2d.window(node_window);
+
+ auto input_feature_shape = moco::as_feature_shape(input_shape, node->data_layout());
+ auto input_plane_shape = moco::make_plane_shape(input_feature_shape);
+
+ *maxPool2d_node->pad() = infer_padding2d(input_plane_shape);
+ *maxPool2d_node->stride() = node_stride;
+ *maxPool2d_node->window() = node_window;
+
+ INFO(l) << "Canonicalize TFMaxPool pad = T " << maxPool2d_node->pad()->top() << ", L "
+ << maxPool2d_node->pad()->left() << ", B " << maxPool2d_node->pad()->bottom() << ", R "
+ << maxPool2d_node->pad()->right() << std::endl;
+
+ // update graph
+ auto node_A = node->input();
+
+ // update connections
+ feature_enc->input(node_A);
+ maxPool2d_node->ifm(feature_enc);
+ feature_dec->input(maxPool2d_node);
+
+ // replace node
+ replace(node).with(feature_dec);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool MaxPoolCanonicalizer::transform(TFMaxPool *node) const
+{
+ return canonicalize_maxpool2d(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/MaxPoolCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/MaxPoolCanonicalizer.h
new file mode 100644
index 000000000..c58ade528
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/MaxPoolCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_MAXPOOL_CANONICALIZER_H__
+#define __MOCO_TF_MAXPOOL_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFMaxPool to Canonical MaxPool2D
+ */
+class MaxPoolCanonicalizer : public SimpleNodeTransform<moco::TFMaxPool>
+{
+public:
+ const char *name(void) const final { return "MaxPoolCanonicalizer"; }
+
+public:
+ bool transform(moco::TFMaxPool *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_MAXPOOL_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/MaximumCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/MaximumCanonicalizer.cpp
new file mode 100644
index 000000000..92634d01f
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/MaximumCanonicalizer.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MaximumCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+
+#include "TFEltwiseBinaryCanonicalzeHelper.h"
+
+namespace moco
+{
+namespace tf
+{
+
+bool MaximumCanonicalizer::transform(moco::TFMaximum *node) const
+{
+ return canonicalize_eltwise_binary_node(node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/MaximumCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/MaximumCanonicalizer.h
new file mode 100644
index 000000000..baff4d7ad
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/MaximumCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_MAXIMUM_CANONICALIZER_H__
+#define __MOCO_TF_MAXIMUM_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFMaximum to Canonical EltwiseMax
+ */
+class MaximumCanonicalizer : public SimpleNodeTransform<moco::TFMaximum>
+{
+public:
+ const char *name(void) const final { return "MaximumCanonicalizer"; }
+
+public:
+ bool transform(moco::TFMaximum *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_MAXIMUM_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/MeanCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/MeanCanonicalizer.cpp
new file mode 100644
index 000000000..69eaf7900
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/MeanCanonicalizer.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MeanCanonicalizer.h"
+#include "TFReduceCanonicalzeHelper.h"
+
+namespace moco
+{
+namespace tf
+{
+
+bool MeanCanonicalizer::transform(moco::TFMean *node) const
+{
+ return canonicalize_reduce_node(node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/MeanCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/MeanCanonicalizer.h
new file mode 100644
index 000000000..469d7e3cd
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/MeanCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_MEAN_CANONICALIZER_H__
+#define __MOCO_TF_MEAN_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Canonicalize TF-dialect TFMean into canonical TensorReduce(Mean) node
+ */
+class MeanCanonicalizer : public SimpleNodeTransform<moco::TFMean>
+{
+public:
+ const char *name(void) const final { return "MeanCanonicalizer"; }
+
+public:
+ bool transform(moco::TFMean *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_MEAN_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/MulCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/MulCanonicalizer.cpp
new file mode 100644
index 000000000..d02f71361
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/MulCanonicalizer.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MulCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+
+#include "TFEltwiseBinaryCanonicalzeHelper.h"
+
+namespace moco
+{
+namespace tf
+{
+
+bool MulCanonicalizer::transform(moco::TFMul *node) const
+{
+ return canonicalize_eltwise_binary_node(node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/MulCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/MulCanonicalizer.h
new file mode 100644
index 000000000..480eec700
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/MulCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_MUL_CANONICALIZER_H__
+#define __MOCO_TF_MUL_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFMul to Canonical EltwiseMul
+ */
+class MulCanonicalizer : public SimpleNodeTransform<moco::TFMul>
+{
+public:
+ const char *name(void) const final { return "MulCanonicalizer"; }
+
+public:
+ bool transform(moco::TFMul *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_MUL_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/PadCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/PadCanonicalizer.cpp
new file mode 100644
index 000000000..10816f47c
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/PadCanonicalizer.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PadCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+
+#include "loco/Service/TypeInference.h"
+
+#include <stdex/Memory.h>
+
+namespace
+{
+
+bool canonicalize_pad(loco::Graph *graph, moco::TFPad *node)
+{
+ /**
+ * @note This will replace TFPad node with Canonical TensorConstantPad
+ *
+ * Before
+ * input --- TFPad -- C
+ * paddings --/
+ * After
+ * paddings ------- TFPad --
+ * /
+ * input ----------- TensorConstantPad -- C
+ * ConstGen --------/
+ * Where
+ * input : input of TFPad
+ * paddings : paddings of TFPad. it becomes TensorConstantPad's attribute.
+ * C : a node that uses TFPad as an input. TFPad is disconnected from C.
+ * ConstGen : constant value of Pad. TFPad has zero value by default.
+ */
+
+ auto pad_node = graph->nodes()->create<loco::TensorConstantPad>();
+
+ auto constant_node = graph->nodes()->create<loco::ConstGen>();
+
+ auto input_node = node->input();
+ // TODO: support other dtype.
+ assert(loco::dtype_get(input_node) == loco::DataType::FLOAT32);
+ constant_node->dtype(loco::DataType::FLOAT32);
+ // TODO: constant node changes to scalar when it is implemented.
+ constant_node->shape({1});
+ constant_node->size<loco::DataType::FLOAT32>(1);
+ constant_node->at<loco::DataType::FLOAT32>(0) = 0.0f;
+
+ auto const_paddings_node = dynamic_cast<loco::ConstGen *>(node->paddings());
+ // TODO: support S64 type.
+ assert(const_paddings_node->dtype() == loco::DataType::S32);
+ assert(const_paddings_node->rank() == 2);
+ assert(const_paddings_node->dim(1).value() == 2);
+
+ auto padding = pad_node->padding();
+ uint32_t padding_rank = const_paddings_node->dim(0).value();
+ padding->rank(padding_rank);
+
+ for (uint32_t i = 0; i < padding_rank; i++)
+ {
+ padding->front(i) = const_paddings_node->at<loco::DataType::S32>(i << 1);
+ padding->back(i) = const_paddings_node->at<loco::DataType::S32>((i << 1) + 1);
+ }
+
+ // update connections
+ pad_node->input(input_node);
+ pad_node->constant(constant_node);
+
+ // replace node
+ replace(node).with(pad_node);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool PadCanonicalizer::transform(TFPad *node) const
+{
+ return canonicalize_pad(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/PadCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/PadCanonicalizer.h
new file mode 100644
index 000000000..64bb6041a
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/PadCanonicalizer.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_PAD_CANONICALIZER_H__
+#define __MOCO_TF_PAD_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFPad to Canonical TensorConstantPad
+ */
+class PadCanonicalizer final : public SimpleNodeTransform<moco::TFPad>
+{
+public:
+ const char *name(void) const final { return "PadCanonicalizer"; }
+
+public:
+ bool transform(moco::TFPad *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_PAD_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/PlaceholderCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/PlaceholderCanonicalizer.cpp
new file mode 100644
index 000000000..f568e909f
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/PlaceholderCanonicalizer.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PlaceholderCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+
+#include <moco/Names.h>
+#include <moco/Log.h>
+
+namespace
+{
+
+bool canonicalize_placeholder(loco::Graph *graph, moco::TFPlaceholder *node)
+{
+ LOGGER(l);
+
+ /**
+ * @note This will replace TFPlaceholder node with Canonical Pull
+ *
+ * Before
+ * TFPlaceholder -- C
+ *
+ * After
+ * TFPlaceholder -
+ * Pull -- C
+ *
+ * Where
+ * C : a node that uses TFPlaceholder as an input
+ * TFPlaceholder is disconnected from other nodes
+ */
+
+ INFO(l) << "PlaceholderCanonicalizer begin";
+
+ auto pull_node = graph->nodes()->create<loco::Pull>();
+
+ // copy properties
+ auto dtype = node->dtype();
+ pull_node->dtype(dtype);
+
+ auto rank = node->rank();
+
+ if (rank == 0)
+ {
+ // This routine implements a workaround that converts a scalar constant (rank-0 tensor)
+ // into a rank-1 tensor of shape [1].
+ //
+ // TODO Revise this implementation later
+ pull_node->rank(1);
+ pull_node->dim(0) = 1;
+ }
+ else
+ {
+ pull_node->rank(rank);
+
+ for (uint32_t r = 0; r < rank; ++r)
+ {
+ if (node->dim(r).known())
+ pull_node->dim(r) = node->dim(r);
+ else
+ pull_node->dim(r).unset();
+ }
+ }
+
+ // set loco::Pull GraphInputIndex
+ pull_node->index(moco::index(node));
+
+ // update graph
+ replace(node).with(pull_node);
+
+ INFO(l) << "PlaceholderCanonicalizer done";
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool PlaceholderCanonicalizer::transform(TFPlaceholder *node) const
+{
+ return canonicalize_placeholder(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/PlaceholderCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/PlaceholderCanonicalizer.h
new file mode 100644
index 000000000..66eafe6af
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/PlaceholderCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_PLACEHOLDER_CANONICALIZER_H__
+#define __MOCO_TF_PLACEHOLDER_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/Nodes/TFPlaceholder.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFPlaceholder to Canonical Pull
+ *
+ * @note GraphInputIndex is copied to Pull
+ */
+class PlaceholderCanonicalizer : public SimpleNodeTransform<::moco::TFPlaceholder>
+{
+public:
+ const char *name(void) const final { return "PlaceholderCanonicalizer"; }
+
+public:
+ bool transform(moco::TFPlaceholder *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_PLACEHOLDER_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/RealDivCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/RealDivCanonicalizer.cpp
new file mode 100644
index 000000000..a448d85fa
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/RealDivCanonicalizer.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "RealDivCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+
+#include "TFEltwiseBinaryCanonicalzeHelper.h"
+
+namespace moco
+{
+namespace tf
+{
+
+bool RealDivCanonicalizer::transform(moco::TFRealDiv *node) const
+{
+ return canonicalize_eltwise_binary_node(node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/RealDivCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/RealDivCanonicalizer.h
new file mode 100644
index 000000000..76e1bd377
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/RealDivCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_REALDIV_CANONICALIZER_H__
+#define __MOCO_TF_REALDIV_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFRealDiv to Canonical EltwiseDiv
+ */
+class RealDivCanonicalizer : public SimpleNodeTransform<moco::TFRealDiv>
+{
+public:
+ const char *name(void) const final { return "RealDivCanonicalizer"; }
+
+public:
+ bool transform(moco::TFRealDiv *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_REALDIV_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/Relu6Canonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/Relu6Canonicalizer.cpp
new file mode 100644
index 000000000..c53a880a8
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/Relu6Canonicalizer.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Relu6Canonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+
+#include <stdex/Memory.h>
+
+namespace
+{
+
+bool canonicalize_relu6(loco::Graph *graph, moco::TFRelu6 *node)
+{
+ /**
+ * @note This will replace TFRelu6 node with Canonical ReLU6
+ *
+ * Before
+ * A --- TFRelu6 -- C
+ * After
+ * +- TFRelu6 --
+ * |
+ * A -+- ReLU6 -- C
+ *
+ * Where
+ * A : features of TFRelu6
+ * C : a node that uses TFRelu6 as an input
+ * TFRelu6 is disconnected from C
+ */
+
+ auto relu6_node = graph->nodes()->create<loco::ReLU6>();
+
+ auto node_A = node->features();
+
+ // update connections
+ relu6_node->input(node_A);
+
+ // replace node
+ replace(node).with(relu6_node);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool Relu6Canonicalizer::transform(TFRelu6 *node) const
+{
+ return canonicalize_relu6(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/Relu6Canonicalizer.h b/compiler/moco-tf/src/Canonicalization/Relu6Canonicalizer.h
new file mode 100644
index 000000000..d8ad5db8e
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/Relu6Canonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_RELU6_CANONICALIZER_H__
+#define __MOCO_TF_RELU6_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFRelu6 to Canonical ReLU6
+ */
+class Relu6Canonicalizer : public SimpleNodeTransform<moco::TFRelu6>
+{
+public:
+ const char *name(void) const final { return "Relu6Canonicalizer"; }
+
+public:
+ bool transform(moco::TFRelu6 *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_RELU6_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/ReluCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/ReluCanonicalizer.cpp
new file mode 100644
index 000000000..7965dc931
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/ReluCanonicalizer.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReluCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+
+#include <stdex/Memory.h>
+
+namespace
+{
+
+bool canonicalize_relu(loco::Graph *graph, moco::TFRelu *node)
+{
+ /**
+ * @note This will replace TFRelu node with Canonical ReLU
+ *
+ * Before
+ * A --- TFRelu -- C
+ * After
+ * +- TFRelu --
+ * |
+ * A -+- ReLU -- C
+ *
+ * Where
+ * A : features of TFRelu
+ * C : a node that uses TFRelu as an input
+ * TFRelu is disconnected from C
+ */
+
+ auto relu_node = graph->nodes()->create<loco::ReLU>();
+
+ auto node_A = node->features();
+
+ // update connections
+ relu_node->input(node_A);
+
+ // replace node
+ replace(node).with(relu_node);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool ReluCanonicalizer::transform(TFRelu *node) const
+{
+ return canonicalize_relu(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/ReluCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/ReluCanonicalizer.h
new file mode 100644
index 000000000..e27abe158
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/ReluCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_RELU_CANONICALIZER_H__
+#define __MOCO_TF_RELU_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFRelu to Canonical ReLU
+ */
+class ReluCanonicalizer : public SimpleNodeTransform<moco::TFRelu>
+{
+public:
+ const char *name(void) const final { return "ReluCanonicalizer"; }
+
+public:
+ bool transform(moco::TFRelu *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_RELU_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/ReshapeCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/ReshapeCanonicalizer.cpp
new file mode 100644
index 000000000..b944568e0
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/ReshapeCanonicalizer.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReshapeCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+
+#include <moco/Log.h>
+#include <plier/tf/Convert.h>
+#include <oops/UserExn.h>
+
+#include <cassert>
+
+namespace
+{
+using plier::tf::DataLayout;
+
+/**
+ * @brief Check whether given 'new shape' arg is a fixed shape input for Reshape
+ *
+ * ConstNode can be moco::TFConst or loco::ConstGen
+ */
+template <typename ConstNode> bool is_fixed_shape_input(ConstNode *const_shape_input)
+{
+ if (const_shape_input == nullptr)
+ return false;
+
+ // Shape input should be integer tensor of rank 1, e.g. [2, 3, 4] or [3, -1]
+ // TODO Support other possible data types, e.g. S64
+ assert(const_shape_input->dtype() == loco::DataType::S32);
+ assert(const_shape_input->rank() == 1);
+
+ auto shape_rank = const_shape_input->dim(0).value();
+ assert(shape_rank > 0);
+
+ for (uint32_t axis = 0; axis < shape_rank; ++axis)
+ {
+ auto shape_dim = const_shape_input->template at<loco::DataType::S32>(axis);
+ if (shape_dim == -1)
+ {
+ // has wildcard dimension, i.e. dynamic reshape
+ return false;
+ }
+ if (!(shape_dim >= 1))
+ {
+ throw oops::UserExn("New shape of Reshape has invalid dimension");
+ }
+ }
+ return true;
+}
+
+/// @note Currently only supports to canonicalize Fixed Reshape
+bool canonicalize_reshape(loco::Graph *graph, moco::TFReshape *node)
+{
+ LOGGER(l);
+ INFO(l) << "TFNodeCanonicalize TFReshape begin";
+
+ /**
+ * This rule canonicalizes TFReshape only when its output shape is known at
+ * compile time, i.e. fixed reshape case.
+ * TODO Support other cases like dynamic reshape
+ *
+ * This will replace TFReshape + TFConst or Canonical ConstGen(as shape input)
+ * node pair into Canonical Reshape<ReshapeType::Fixed>, or 'FixedReshape'.
+ * Shape input (TFConst or Canonical ConstGen) should not have wildcard
+ * dimension to be converted to FixedReshape.
+ *
+ * Before
+ * TFConst (shape)
+ * or ---
+ * ConstGen \
+ * \
+ * In --------- TFReshape ------- Out(s)
+ * (tensor)
+ *
+ * After
+ * TFConst
+ * or ---
+ * ConstGen \
+ * \
+ * ---------- TFReshape
+ * /
+ * In -------- FixedReshape ----- Out(s)
+ */
+
+ // create loco node to replace
+ auto fixed_reshape = graph->nodes()->create<loco::FixedReshape>();
+
+ // Supports 2 cases for Reshape's shape input:
+ // TF-dialect TFConst or Canonical ConstGen
+ loco::Node *shape_input = node->shape();
+ auto tfconst_shape_input = dynamic_cast<moco::TFConst *>(shape_input);
+ auto constgen_shape_input = dynamic_cast<loco::ConstGen *>(shape_input);
+
+ if (tfconst_shape_input)
+ {
+ // Only support fixed reshape
+ // TODO support dynamic reshape
+ if (!(is_fixed_shape_input(tfconst_shape_input)))
+ {
+ throw oops::UserExn("Supports only fixed reshape", node->name());
+ }
+
+ auto rank = tfconst_shape_input->dim(0).value();
+ fixed_reshape->rank(rank);
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ fixed_reshape->dim(axis) = tfconst_shape_input->at<loco::DataType::S32>(axis);
+ }
+ }
+ else if (constgen_shape_input)
+ {
+ // ditto
+ if (!(is_fixed_shape_input(constgen_shape_input)))
+ {
+ throw oops::UserExn("Supports only fixed reshape", node->name());
+ }
+
+ auto rank = constgen_shape_input->dim(0).value();
+ fixed_reshape->rank(rank);
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ fixed_reshape->dim(axis) = constgen_shape_input->at<loco::DataType::S32>(axis);
+ }
+ }
+ else
+ {
+ // TODO support dynamic reshape from not const node
+ throw oops::UserExn("Supports only const node as input shape", node->name());
+ }
+
+ // replace
+ auto in = node->tensor();
+ fixed_reshape->input(in);
+
+ replace(node).with(fixed_reshape);
+
+ INFO(l) << "TFNodeCanonicalize TFReshape done";
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool ReshapeCanonicalizer::transform(TFReshape *node) const
+{
+ return canonicalize_reshape(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/ReshapeCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/ReshapeCanonicalizer.h
new file mode 100644
index 000000000..1a792024e
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/ReshapeCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_RESHAPE_CANONICALIZER_H__
+#define __MOCO_TF_RESHAPE_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFReshape to Canonical Reshape
+ */
+class ReshapeCanonicalizer : public SimpleNodeTransform<moco::TFReshape>
+{
+public:
+ const char *name(void) const final { return "ReshapeCanonicalizer"; }
+
+public:
+ bool transform(moco::TFReshape *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_RESHAPE_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/RsqrtCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/RsqrtCanonicalizer.cpp
new file mode 100644
index 000000000..c31dbf6d6
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/RsqrtCanonicalizer.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "RsqrtCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+#include <moco/Support/TFShapeInferenceHelper.h>
+
+#include <moco/Log.h>
+
+#include <loco/Service/TypeInference.h>
+
+#include <stdex/Memory.h>
+#include <oops/UserExn.h>
+
+namespace
+{
+
+template <typename T>
+bool prepare_const_gen(loco::ConstGen *const_node, const loco::TensorShape &tensorshape, T value);
+
+template <>
+bool prepare_const_gen<float>(loco::ConstGen *const_node, const loco::TensorShape &tensorshape,
+ float value)
+{
+ LOGGER(l);
+
+ uint32_t const_num_elements = 1;
+
+ auto dtype = loco::DataType::FLOAT32;
+ const_node->dtype(dtype);
+
+ auto rank = tensorshape.rank();
+ const_node->rank(rank);
+ for (uint32_t r = 0; r < rank; ++r)
+ {
+ if (tensorshape.dim(r).known())
+ const_node->dim(r) = tensorshape.dim(r);
+ else
+ return false;
+
+ assert(tensorshape.dim(r).value() > 0);
+
+ const_num_elements *= tensorshape.dim(r).value();
+ }
+
+ INFO(l) << "prepare_const_gen : Elements = " << const_num_elements;
+
+ const_node->size<loco::DataType::FLOAT32>(const_num_elements);
+ for (uint32_t i = 0; i < const_num_elements; ++i)
+ {
+ const_node->at<loco::DataType::FLOAT32>(i) = value;
+ }
+
+ return true;
+}
+
+bool canonicalize_rsqrt(loco::Graph *graph, moco::TFRsqrt *node)
+{
+ /**
+ * @note This will replace TFRsqrt node with Canonical EltwiseSqrt + EltwiseRealDiv
+ *
+ * Before
+ * A --- TFRsqrt -- C
+ * After
+ * +- TFRsqrt --
+ * |
+ * | ConstGen --+
+ * | \
+ * A -+- EltwiseSqrt -- EltwiseDiv -- C
+ *
+ * Where
+ * A : features of TFRsqrt
+ * C : a node that uses TFSqrt as an input
+ * TFRsqrt is disconnected from C
+ * TFRsqrt is converted to 1 / EltwiseSqrt
+ */
+
+ auto nodeshape = moco::node_shape(node);
+ if (nodeshape.domain() == loco::Domain::Unknown)
+ {
+ // We need this shape information
+ assert(false); // this shouldn't happen, let's add an alarm
+ return false;
+ }
+ auto tensorshape = nodeshape.as<loco::TensorShape>();
+
+ if (!loco::dtype_known(node))
+ {
+ // We need type of this node
+ return false;
+ }
+
+ auto sqrt_node = graph->nodes()->create<loco::EltwiseSqrt>();
+ auto eltdiv_node = graph->nodes()->create<loco::EltwiseDiv>();
+ auto const_node = graph->nodes()->create<loco::ConstGen>();
+
+ auto dtype = loco::dtype_get(node);
+
+ switch (dtype)
+ {
+ case loco::DataType::FLOAT32:
+ if (!prepare_const_gen<float>(const_node, tensorshape, 1.0f))
+ throw oops::UserExn("Cannot handle unknown shape", node->name());
+ break;
+
+ default:
+ throw oops::UserExn("Unsupported data type", node->name());
+ }
+
+ auto node_A = node->x();
+
+ // update connections
+ sqrt_node->input(node_A);
+ eltdiv_node->lhs(const_node);
+ eltdiv_node->rhs(sqrt_node);
+
+ // replace node
+ replace(node).with(eltdiv_node);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool RsqrtCanonicalizer::transform(TFRsqrt *node) const
+{
+ return canonicalize_rsqrt(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/RsqrtCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/RsqrtCanonicalizer.h
new file mode 100644
index 000000000..7fd4ff697
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/RsqrtCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_RSQRT_CANONICALIZER_H__
+#define __MOCO_TF_RSQRT_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFRsqrt to Canonical EltwiseDiv + EltwiseSqrt
+ */
+class RsqrtCanonicalizer : public SimpleNodeTransform<moco::TFRsqrt>
+{
+public:
+ const char *name(void) const final { return "RsqrtCanonicalizer"; }
+
+public:
+ bool transform(moco::TFRsqrt *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_RSQRT_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.cpp
new file mode 100644
index 000000000..98af7b693
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SoftmaxCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+#include <moco/Support/TFShapeInferenceHelper.h>
+
+#include <moco/Log.h>
+
+namespace
+{
+
+bool canonicalize_softmax(loco::Graph *graph, moco::TFSoftmax *node)
+{
+ LOGGER(l);
+
+ INFO(l) << "TFNodeCanonicalize TFSoftmax begin";
+
+ /**
+ * This will replace shape inferred TFSoftmax node into canonical TensorSoftmax
+ *
+ * Before
+ * In ---- TFSoftmax ---- Out(s)
+ *
+ * After
+ * ------ TFSoftmax
+ * /
+ * In ---- TensorSoftmax ----- Out(s)
+ */
+
+ auto nodeshape = moco::node_shape(node);
+ // Canonicalization into TensorSoftmax is valid when softmax has shape info
+ assert(nodeshape.domain() != loco::Domain::Unknown);
+
+ auto softmax_tensor_shape = nodeshape.as<loco::TensorShape>();
+
+ // Create loco node to replace
+ auto softmax = graph->nodes()->create<loco::TensorSoftmax>();
+
+ // replace
+ auto in = node->logits();
+ softmax->input(in);
+ softmax->axis(softmax_tensor_shape.rank() - 1);
+ replace(node).with(softmax);
+
+ INFO(l) << "TFNodeCanonicalize TFSoftmax done";
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool SoftmaxCanonicalizer::transform(TFSoftmax *node) const
+{
+ return canonicalize_softmax(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.h
new file mode 100644
index 000000000..ebaf04cfe
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_SOFTMAX_CANONICALIZER_H__
+#define __MOCO_TF_SOFTMAx_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+* @brief Canonicalize TF-dialect TFSoftmax into canonical Softmax node
+*/
+class SoftmaxCanonicalizer : public SimpleNodeTransform<moco::TFSoftmax>
+{
+public:
+ const char *name(void) const final { return "SoftmaxCanonicalizer"; }
+
+public:
+ bool transform(moco::TFSoftmax *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_SOFTMAX_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/SqrtCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/SqrtCanonicalizer.cpp
new file mode 100644
index 000000000..89b9b8a44
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/SqrtCanonicalizer.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SqrtCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+
+namespace
+{
+
+bool canonicalize_sqrt(loco::Graph *graph, moco::TFSqrt *node)
+{
+ /**
+ * @note This will replace TFSqrt node with Canonical EltwiseSqrt
+ *
+ * Before
+ * A --- TFSqrt -- C
+ * After
+ * +- TFSqrt --
+ * |
+ * A -+- EltwiseSqrt -- C
+ *
+ * Where
+ * A : features of TFSqrt
+ * C : a node that uses TFSqrt as an input
+ * TFSqrt is disconnected from C
+ */
+
+ auto sqrt_node = graph->nodes()->create<loco::EltwiseSqrt>();
+
+ auto node_A = node->x();
+
+ // update connections
+ sqrt_node->input(node_A);
+
+ // replace node
+ replace(node).with(sqrt_node);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool SqrtCanonicalizer::transform(TFSqrt *node) const
+{
+ return canonicalize_sqrt(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/SqrtCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/SqrtCanonicalizer.h
new file mode 100644
index 000000000..3f7ffead8
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/SqrtCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_SQRT_CANONICALIZER_H__
+#define __MOCO_TF_SQRT_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFsqrt to Canonical EltwiseSqrt
+ */
+class SqrtCanonicalizer : public SimpleNodeTransform<moco::TFSqrt>
+{
+public:
+ const char *name(void) const final { return "SqrtCanonicalizer"; }
+
+public:
+ bool transform(moco::TFSqrt *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_SQRT_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/SqueezeCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/SqueezeCanonicalizer.cpp
new file mode 100644
index 000000000..f5b991206
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/SqueezeCanonicalizer.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SqueezeCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+#include <moco/Support/TFShapeInferenceHelper.h>
+
+#include <moco/Log.h>
+
+namespace
+{
+
+bool canonicalize_squeeze_to_reshape(loco::Graph *graph, moco::TFSqueeze *node)
+{
+ LOGGER(l);
+
+ INFO(l) << "TFNodeCanonicalize TFSqueeze begin";
+
+ /**
+ * This will replace shape inferred TFSqueeze node into canonical FixedReshape
+ *
+ * Before
+ * In ---- TFSqueeze ---- Out(s)
+ *
+ * After
+ * ------ TFSqueeze
+ * /
+ * In ---- FixedReshape ----- Out(s)
+ */
+
+ auto nodeshape = moco::node_shape(node);
+ // canonicalize into FixedReshape is valid when squeeze has shape info
+ // TODO Support general Squeeze case
+ assert(nodeshape.domain() != loco::Domain::Unknown);
+
+ auto squeeze_tensor_shape = nodeshape.as<loco::TensorShape>();
+
+ // Create loco node to replace
+ auto reshape = graph->nodes()->create<loco::FixedReshape>();
+
+ // Copy shape
+ reshape->rank(squeeze_tensor_shape.rank());
+ for (uint32_t axis = 0; axis < squeeze_tensor_shape.rank(); ++axis)
+ {
+ assert(squeeze_tensor_shape.dim(axis).known());
+ reshape->dim(axis) = squeeze_tensor_shape.dim(axis);
+ }
+
+ // replace
+ auto in = node->input();
+ reshape->input(in);
+ replace(node).with(reshape);
+
+ INFO(l) << "TFNodeCanonicalize TFSqueeze done";
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool SqueezeCanonicalizer::transform(TFSqueeze *node) const
+{
+ return canonicalize_squeeze_to_reshape(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/SqueezeCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/SqueezeCanonicalizer.h
new file mode 100644
index 000000000..28a1442bd
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/SqueezeCanonicalizer.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_SQUEEZE_CANONICALIZER_H__
+#define __MOCO_TF_SQUEEZE_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Canonicalize TF-dialect TFSqueeze into canonical FixedReshape node
+ *
+ * @note There is no canonical Squeeze node
+ */
+class SqueezeCanonicalizer : public SimpleNodeTransform<moco::TFSqueeze>
+{
+public:
+ const char *name(void) const final { return "SqueezeCanonicalizer"; }
+
+public:
+ bool transform(moco::TFSqueeze *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_SQUEEZE_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.cpp
new file mode 100644
index 000000000..574fa3993
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "StopGradientCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+
+#include <moco/Log.h>
+
+namespace
+{
+
+bool canonicalize_stopgradient(loco::Graph *graph, moco::TFStopGradient *node)
+{
+ LOGGER(l);
+
+ INFO(l) << "TFNodeCanonicalize TFStopGradient begin";
+
+ /**
+ * This will replace shape inferred TFStopGradient node into canonical Forward
+ *
+ * Before
+ * In --- TFStopGradient --- Out(s)
+ *
+ * After
+ * -- TFStopGradient
+ * /
+ * In --- Forward --- Out(s)
+ */
+
+ // Create loco node to replace
+ auto forward_node = graph->nodes()->create<loco::Forward>();
+
+ // update connection
+ forward_node->input(node->input());
+
+ // replace node
+ replace(node).with(forward_node);
+
+ INFO(l) << "TFNodeCanonicalize TFStopGradient done";
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool StopGradientCanonicalizer::transform(TFStopGradient *node) const
+{
+ return canonicalize_stopgradient(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.h
new file mode 100644
index 000000000..6a17728a6
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_STOPGRADIENT_CANONICALIZER_H__
+#define __MOCO_TF_STOPGRADIENT_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+* @brief Canonicalize TF-dialect TFStopGradient into canonical Forward node
+*/
+class StopGradientCanonicalizer : public SimpleNodeTransform<moco::TFStopGradient>
+{
+public:
+ const char *name(void) const final { return "StopGradientCanonicalizer"; }
+
+public:
+ bool transform(moco::TFStopGradient *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_STOPGRADIENT_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/SubCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/SubCanonicalizer.cpp
new file mode 100644
index 000000000..c518b7d64
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/SubCanonicalizer.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SubCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+
+#include "TFEltwiseBinaryCanonicalzeHelper.h"
+
+namespace moco
+{
+namespace tf
+{
+
+bool SubCanonicalizer::transform(moco::TFSub *node) const
+{
+ return canonicalize_eltwise_binary_node(node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/SubCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/SubCanonicalizer.h
new file mode 100644
index 000000000..f715cc86c
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/SubCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_SUB_CANONICALIZER_H__
+#define __MOCO_TF_SUB_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFSub to Canonical EltwiseSub
+ */
+class SubCanonicalizer : public SimpleNodeTransform<moco::TFSub>
+{
+public:
+ const char *name(void) const final { return "SubCanonicalizer"; }
+
+public:
+ bool transform(moco::TFSub *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_SUB_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/TFPushCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/TFPushCanonicalizer.cpp
new file mode 100644
index 000000000..081e0e5f9
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/TFPushCanonicalizer.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFPushCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+
+#include <stdex/Memory.h>
+
+namespace
+{
+
+bool canonicalize_push(loco::Graph *graph, moco::TFPush *node)
+{
+ /**
+ * @note This will replace TFRelu node with Canonical ReLU
+ *
+ * Before
+ * A --- TFPush
+ * After
+ * +- TFPush
+ * |
+ * A -+- Push
+ *
+ * Where
+ * A : from of TFPush
+ * TFPush will have no GraphOutputIndex
+ * Push will have GraphOutputIndex that from TFPush
+ */
+
+ auto push_node = graph->nodes()->create<loco::Push>();
+
+ auto node_A = node->from();
+
+ // update connections
+ push_node->from(node_A);
+
+ // update output index
+ push_node->index(node->index());
+ node->index_reset();
+
+ // replace node
+ replace(node).with(push_node);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool TFPushCanonicalizer::transform(TFPush *node) const
+{
+ return canonicalize_push(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/TFPushCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/TFPushCanonicalizer.h
new file mode 100644
index 000000000..569a71f82
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/TFPushCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_PUSH_CANONICALIZER_H__
+#define __MOCO_TF_PUSH_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFPush to Canonical Push
+ */
+class TFPushCanonicalizer : public SimpleNodeTransform<moco::TFPush>
+{
+public:
+ const char *name(void) const final { return "TFPushCanonicalizer"; }
+
+public:
+ bool transform(moco::TFPush *) const final;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_PUSH_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalization/TanhCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/TanhCanonicalizer.cpp
new file mode 100644
index 000000000..3f48a50fc
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/TanhCanonicalizer.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TanhCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+
+#include <stdex/Memory.h>
+
+namespace
+{
+
+bool canonicalize_tanh(loco::Graph *graph, moco::TFTanh *node)
+{
+ /**
+ * @note This will replace TFTanh node with Canonical Tanh
+ *
+ * Before
+ * A --- TFTanh -- C
+ * After
+ * +- TFTanh --
+ * |
+ * A -+-- Tanh --- C
+ *
+ * Where
+ * A : x of TFTanh
+ * C : a node that uses TFTanh as an input
+ * TFTanh is disconnected from C
+ */
+
+ auto tanh_node = graph->nodes()->create<loco::Tanh>();
+
+ auto node_A = node->x();
+
+ // update connections
+ tanh_node->input(node_A);
+
+ // replace node
+ replace(node).with(tanh_node);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool TanhCanonicalizer::transform(TFTanh *node) const
+{
+ return canonicalize_tanh(node->graph(), node);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalization/TanhCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/TanhCanonicalizer.h
new file mode 100644
index 000000000..af5e79fb5
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalization/TanhCanonicalizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_TANH_CANONICALIZER_H__
+#define __MOCO_TF_TANH_CANONICALIZER_H__
+
+#include "Transform.h"
+#include "SimpleNodeTransform.h"
+
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Convert TFTanh to Canonical Tanh
+ */
+class TanhCanonicalizer : public SimpleNodeTransform<moco::TFTanh>
+{
+public:
+ const char *name(void) const final { return "TanhCanonicalizer"; }
+
+public:
+ bool transform(moco::TFTanh *) const override;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_TANH_CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalizer.cpp b/compiler/moco-tf/src/Canonicalizer.cpp
new file mode 100644
index 000000000..04bc7c57a
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalizer.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Canonicalizer.h"
+
+#include "Knob.h"
+#include "ProgressReporter.h"
+
+#include "Transforms/ShapeInferencePass.h"
+#include "Transforms/TypeInferencePass.h"
+
+#include "Canonicalization/AddCanonicalizer.h"
+#include "Canonicalization/AvgPoolCanonicalizer.h"
+#include "Canonicalization/BiasAddCanonicalizer.h"
+#include "Canonicalization/ConcatV2Canonicalizer.h"
+#include "Canonicalization/ConstCanonicalizer.h"
+#include "Canonicalization/Conv2DBackpropInputCanonicalizer.h"
+#include "Canonicalization/Conv2DCanonicalizer.h"
+#include "Canonicalization/DepthwiseConv2dNativeCanonicalizer.h"
+#include "Canonicalization/IdentityCanonicalizer.h"
+#include "Canonicalization/MaximumCanonicalizer.h"
+#include "Canonicalization/MaxPoolCanonicalizer.h"
+#include "Canonicalization/MeanCanonicalizer.h"
+#include "Canonicalization/MulCanonicalizer.h"
+#include "Canonicalization/PadCanonicalizer.h"
+#include "Canonicalization/PlaceholderCanonicalizer.h"
+#include "Canonicalization/RealDivCanonicalizer.h"
+#include "Canonicalization/ReluCanonicalizer.h"
+#include "Canonicalization/Relu6Canonicalizer.h"
+#include "Canonicalization/ReshapeCanonicalizer.h"
+#include "Canonicalization/RsqrtCanonicalizer.h"
+#include "Canonicalization/SoftmaxCanonicalizer.h"
+#include "Canonicalization/SqrtCanonicalizer.h"
+#include "Canonicalization/SqueezeCanonicalizer.h"
+#include "Canonicalization/StopGradientCanonicalizer.h"
+#include "Canonicalization/SubCanonicalizer.h"
+#include "Canonicalization/TanhCanonicalizer.h"
+// For virtual nodes
+#include "Canonicalization/TFPushCanonicalizer.h"
+
+#include <moco/IR/TFDialect.h>
+#include <moco/IR/TFNodes.h>
+
+#include <logo/Phase.h>
+
+#include <stdex/Memory.h>
+
+#include <cassert>
+
+namespace
+{
+
+/**
+ * @brief Return true if graph has TFDialect nodes
+ */
+bool has_tf_nodes(loco::Graph *g)
+{
+ auto active_nodes = loco::active_nodes(loco::output_nodes(g));
+ for (auto node : active_nodes)
+ {
+ if (node->dialect() == moco::TFDialect::get())
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+void Canonicalizer::canonicalize(loco::Graph *g) const
+{
+ logo::Phase phase;
+
+ /* TRANSFORM DECLARATION BEGIN */
+ // Run shape and type inference at the top
+ phase.emplace_back(stdex::make_unique<ShapeInferencePass>());
+ phase.emplace_back(stdex::make_unique<TypeInferencePass>());
+
+ phase.emplace_back(stdex::make_unique<AddCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<AvgPoolCanonicalizer>());
+ if (moco::tf::get<moco::tf::Knob::CanonicalizeBiasAdd>())
+ phase.emplace_back(stdex::make_unique<BiasAddCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<ConcatV2Canonicalizer>());
+ if (moco::tf::get<moco::tf::Knob::CanonicalizeConst>())
+ phase.emplace_back(stdex::make_unique<ConstCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<Conv2DBackpropInputCanonicalizer>());
+ if (moco::tf::get<moco::tf::Knob::CanonicalizeConv2D>())
+ phase.emplace_back(stdex::make_unique<Conv2DCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<DepthwiseConv2dNativeCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<IdentityCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<MaximumCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<MaxPoolCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<MeanCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<MulCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<PadCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<PlaceholderCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<RealDivCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<ReluCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<Relu6Canonicalizer>());
+ phase.emplace_back(stdex::make_unique<ReshapeCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<RsqrtCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<SoftmaxCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<SqrtCanonicalizer>());
+ // NOTE SquaredDifference is handled in ResolveSquaredDifference
+ phase.emplace_back(stdex::make_unique<SqueezeCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<StopGradientCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<SubCanonicalizer>());
+ phase.emplace_back(stdex::make_unique<TanhCanonicalizer>());
+ // For virtual nodes
+ phase.emplace_back(stdex::make_unique<TFPushCanonicalizer>());
+ /* TRANSFORM DECLARATION END */
+
+ ProgressReporter prog(g, logo::PhaseStrategy::Restart);
+ logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
+ phase_runner.attach(&prog);
+ phase_runner.run(phase);
+
+ // Assert if graph has TF dialect nodes
+ assert(!has_tf_nodes(g));
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Canonicalizer.h b/compiler/moco-tf/src/Canonicalizer.h
new file mode 100644
index 000000000..098a6719c
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalizer.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CANONICALIZER_H__
+#define __CANONICALIZER_H__
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+class Canonicalizer final
+{
+public:
+ void canonicalize(loco::Graph *) const;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __CANONICALIZER_H__
diff --git a/compiler/moco-tf/src/Canonicalizer.test.cpp b/compiler/moco-tf/src/Canonicalizer.test.cpp
new file mode 100644
index 000000000..8eaf86f2f
--- /dev/null
+++ b/compiler/moco-tf/src/Canonicalizer.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Canonicalizer.h"
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+// Canonicalizer SHOULD NOT crash even though a given graph is empty
+TEST(Canonicalizer, empty_graph)
+{
+ moco::tf::Canonicalizer cano;
+
+ loco::Graph g;
+
+ cano.canonicalize(&g);
+
+ SUCCEED();
+}
diff --git a/compiler/moco-tf/src/CodecHelper.h b/compiler/moco-tf/src/CodecHelper.h
new file mode 100644
index 000000000..85e4e2164
--- /dev/null
+++ b/compiler/moco-tf/src/CodecHelper.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CODEC_HELPER_H__
+#define __CODEC_HELPER_H__
+
+#include <plier/tf/Convert.h>
+#include <stdex/Memory.h>
+
+namespace
+{
+
+using plier::tf::DataLayout;
+
+void set_feature_enc(loco::FeatureEncode *feature_enc, DataLayout data_layout)
+{
+ auto enc = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
+
+ if (data_layout == DataLayout::NHWC)
+ {
+ enc->perm()->axis(loco::FeatureAxis::Count) = 0;
+ enc->perm()->axis(loco::FeatureAxis::Height) = 1;
+ enc->perm()->axis(loco::FeatureAxis::Width) = 2;
+ enc->perm()->axis(loco::FeatureAxis::Depth) = 3;
+ }
+ else if (data_layout == DataLayout::NCHW)
+ {
+ enc->perm()->axis(loco::FeatureAxis::Count) = 0;
+ enc->perm()->axis(loco::FeatureAxis::Depth) = 1;
+ enc->perm()->axis(loco::FeatureAxis::Height) = 2;
+ enc->perm()->axis(loco::FeatureAxis::Width) = 3;
+ }
+
+ feature_enc->encoder(std::move(enc));
+}
+
+void set_feature_dec(loco::FeatureDecode *feature_dec, DataLayout data_layout)
+{
+ auto dec = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
+
+ if (data_layout == DataLayout::NHWC)
+ {
+ dec->perm()->axis(loco::FeatureAxis::Count) = 0;
+ dec->perm()->axis(loco::FeatureAxis::Height) = 1;
+ dec->perm()->axis(loco::FeatureAxis::Width) = 2;
+ dec->perm()->axis(loco::FeatureAxis::Depth) = 3;
+ }
+ else if (data_layout == DataLayout::NCHW)
+ {
+ dec->perm()->axis(loco::FeatureAxis::Count) = 0;
+ dec->perm()->axis(loco::FeatureAxis::Depth) = 1;
+ dec->perm()->axis(loco::FeatureAxis::Height) = 2;
+ dec->perm()->axis(loco::FeatureAxis::Width) = 3;
+ }
+
+ feature_dec->decoder(std::move(dec));
+}
+
+} // namespace
+
+#endif // __CODEC_HELPER_H__
diff --git a/compiler/moco-tf/src/Convert.cpp b/compiler/moco-tf/src/Convert.cpp
new file mode 100644
index 000000000..6285f5eab
--- /dev/null
+++ b/compiler/moco-tf/src/Convert.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Convert.h"
+
+#include <algorithm>
+#include <cctype>
+
+// TODO move to some common file
+namespace moco
+{
+
+std::string str_toupper(std::string s)
+{
+ // from https://en.cppreference.com/w/cpp/string/byte/toupper
+ std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { return std::toupper(c); });
+ return s;
+}
+
+} // namespace moco
diff --git a/compiler/moco-tf/src/Convert.h b/compiler/moco-tf/src/Convert.h
new file mode 100644
index 000000000..77dab3700
--- /dev/null
+++ b/compiler/moco-tf/src/Convert.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERT_H__
+#define __CONVERT_H__
+
+#include <string>
+
+// TODO move to some common file
+namespace moco
+{
+
+std::string str_toupper(std::string s);
+
+} // namespace moco
+
+#endif // __CONVERT_H__
diff --git a/compiler/moco-tf/src/Convert.test.cpp b/compiler/moco-tf/src/Convert.test.cpp
new file mode 100644
index 000000000..b02a597cb
--- /dev/null
+++ b/compiler/moco-tf/src/Convert.test.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <Convert.h>
+
+#include <gtest/gtest.h>
+
+#include <string>
+
+TEST(moco_Convert, string_toupper)
+{
+ std::string source = "Hello World!!!";
+ std::string convert = moco::str_toupper(source);
+
+ ASSERT_EQ(convert, "HELLO WORLD!!!");
+}
diff --git a/compiler/moco-tf/src/Frontend.cpp b/compiler/moco-tf/src/Frontend.cpp
new file mode 100644
index 000000000..a17d5dd0e
--- /dev/null
+++ b/compiler/moco-tf/src/Frontend.cpp
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <moco/tf/Frontend.h>
+#include <moco/Importer.h>
+#include <moco/IR/TFNode.h>
+#include <moco/Log.h>
+
+#include <moco/Import/GraphBuilderRegistry.h>
+
+#include "Canonicalizer.h"
+#include "Optimizer.h"
+#include "TFOptimizer.h"
+
+#include "Transforms.h"
+
+#include "Op/COpCall.h"
+
+#include <loco/Service/ShapeInference.h>
+
+#include <stdex/Memory.h>
+#include <oops/UserExn.h>
+
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+#include <iostream>
+#include <sstream>
+#include <fstream>
+#include <stdexcept>
+
+#include <fcntl.h>
+#include <unistd.h>
+
+namespace
+{
+
+bool load_text(std::istream *stream, tensorflow::GraphDef &graph_def)
+{
+ google::protobuf::io::IstreamInputStream iis(stream);
+
+ return google::protobuf::TextFormat::Parse(&iis, &graph_def);
+}
+
+bool load_binary(std::istream *stream, tensorflow::GraphDef &graph_def)
+{
+ google::protobuf::io::IstreamInputStream iis(stream);
+ google::protobuf::io::CodedInputStream cis(&iis);
+
+ return graph_def.ParseFromCodedStream(&cis);
+}
+
+void load_tf(std::istream *stream, moco::tf::Frontend::FileType type,
+ tensorflow::GraphDef &graph_def)
+{
+ bool result = (type == moco::tf::Frontend::FileType::Text) ? load_text(stream, graph_def)
+ : load_binary(stream, graph_def);
+ if (!result)
+ {
+ throw oops::UserExn("Failed to parse prototxt from stream");
+ }
+}
+
+// If Placeholder has no shape attribute, set unknown_rank property to true.
+void set_unknown_rank(tensorflow::GraphDef &tf_graph_def)
+{
+ for (auto &n : *tf_graph_def.mutable_node())
+ {
+ if (n.op().compare("Placeholder"))
+ continue;
+
+ auto iter = n.attr().find("shape");
+ if (iter == n.attr().end())
+ {
+ tensorflow::AttrValue attr;
+ attr.mutable_shape()->set_unknown_rank(true);
+ n.mutable_attr()->insert({"shape", attr});
+ }
+ }
+}
+
+/**
+ * @brief Set input shape according to signature if node has unknown shape in GraphDef.
+ *
+ * @note If shape you provided is wrong or not enough, it returns false.
+ */
+bool set_input_shape(const moco::ModelSignature &signature, tensorflow::GraphDef &tf_graph_def)
+{
+ for (auto &n : *tf_graph_def.mutable_node())
+ {
+ if (n.op().compare("Placeholder"))
+ continue;
+
+ auto node_shape = n.mutable_attr()->at("shape").mutable_shape();
+ auto sig_shape = signature.shape(n.name() + ":0");
+
+ if (node_shape->unknown_rank() || !node_shape->dim_size())
+ {
+ // If shape in GraphDef is unknown, user must provide the shape info.
+ if (sig_shape == nullptr)
+ return false;
+ node_shape->clear_unknown_rank();
+ for (uint32_t i = 0; i < sig_shape->rank(); i++)
+ node_shape->add_dim()->set_size(-1);
+ }
+
+ for (uint32_t d = 0; d < node_shape->dim_size(); d++)
+ {
+ if (node_shape->mutable_dim(d)->size() == -1)
+ {
+ if (sig_shape == nullptr)
+ return false;
+ node_shape->mutable_dim(d)->set_size(sig_shape->dim(d));
+ }
+ else
+ {
+ // If User provide shape info though it already exists in GraphDef, make sure it matches
+ // the shape of GraphDef.
+ if (sig_shape && node_shape->dim(d).size() != sig_shape->dim(d))
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+void transform_tf(const moco::ModelSignature &signature, tensorflow::GraphDef &tf_graph_def)
+{
+ set_unknown_rank(tf_graph_def);
+ if (!set_input_shape(signature, tf_graph_def))
+ oops::UserExn("Info you provided may be wrong or not enough. Please check the info file.");
+}
+
+/**
+ * @brief Returns GraphBuilderRegistry that looks up default registry and additions
+ * such as custom op
+ */
+moco::GraphBuilderRegistry make_graph_builder_registry(const moco::ModelSignature &sig)
+{
+ moco::GraphBuilderRegistry registry{&moco::GraphBuilderRegistry::get()};
+
+ // build a COpCallGraphBuilder per custom op type
+ for (const auto &custom_op : sig.customops())
+ {
+ std::unique_ptr<moco::tf::COpCallGraphBuilder> builder =
+ stdex::make_unique<moco::tf::COpCallGraphBuilder>(&sig);
+ registry.add(custom_op, std::move(builder));
+ }
+
+ return registry;
+}
+
+} // namespace
+
+// TODO Find a proper place for this function
+
+namespace
+{
+
+loco::TensorShape tensor_shape(loco::Node *node)
+{
+ assert(loco::shape_known(node));
+ auto node_shape = loco::shape_get(node);
+ return node_shape.as<loco::TensorShape>();
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+Frontend::Frontend()
+{
+ // DO NOTHING
+}
+
+std::unique_ptr<loco::Graph> Frontend::load(const ModelSignature &signature, const char *modelfile,
+ FileType type) const
+{
+ // Using c++ standard library, rather than file descriptor, makes these lines portable
+ std::ifstream ifs{modelfile, std::ios::in | std::ios::binary};
+ return load(signature, &ifs, type);
+}
+
+std::unique_ptr<loco::Graph> Frontend::load(const ModelSignature &signature, std::istream *stream,
+ FileType type) const
+{
+ tensorflow::GraphDef tf_graph_def;
+
+ load_tf(stream, type, tf_graph_def);
+
+ transform_tf(signature, tf_graph_def);
+
+ auto graph = import(signature, tf_graph_def);
+
+ return std::move(graph);
+}
+
+std::unique_ptr<loco::Graph> Frontend::import(const ModelSignature &signature,
+ tensorflow::GraphDef &tf_graph_def) const
+{
+ LOGGER(frontend);
+
+ // Let's use GraphBuilderRegistry with COpCallGraphBuilder
+ GraphBuilderRegistry registry = make_graph_builder_registry(signature);
+
+ Importer importer{&registry};
+
+ INFO(frontend) << ">>";
+ INFO(frontend) << ">> Import stage started";
+ INFO(frontend) << ">>";
+ auto graph = importer.import(signature, tf_graph_def);
+
+ TFOptimizer tfoptimizier;
+
+ // Transform TFNodes
+ INFO(frontend) << ">>";
+ INFO(frontend) << ">> TF optimize stage started";
+ INFO(frontend) << ">>";
+ tfoptimizier.optimize(graph.get());
+
+ // Fill graph-level input/output shape
+ //
+ // ASSUMPTION! All the shapes are known at this point
+ for (uint32_t n = 0; n < graph->inputs()->size(); ++n)
+ {
+ auto input = graph->inputs()->at(n);
+ auto input_node = moco::placeholder_node(graph.get(), n);
+ assert(input_node != nullptr);
+ input->shape(stdex::make_unique<loco::TensorShape>(tensor_shape(input_node)));
+ }
+
+ for (uint32_t n = 0; n < graph->outputs()->size(); ++n)
+ {
+ auto output = graph->outputs()->at(n);
+ auto output_node = moco::push_node(graph.get(), n);
+ assert(output_node != nullptr);
+ output->shape(stdex::make_unique<loco::TensorShape>(::tensor_shape(output_node)));
+ }
+
+ // Convert graph to hold only Canonical dialect
+ Canonicalizer canonicalizer;
+
+ INFO(frontend) << ">>";
+ INFO(frontend) << ">> Canonicalize stage started";
+ INFO(frontend) << ">>";
+ canonicalizer.canonicalize(graph.get());
+
+ // Optimize imported loco::Graph
+ Optimizer optimizer;
+
+ INFO(frontend) << ">>";
+ INFO(frontend) << ">> Canonical optimize stage started";
+ INFO(frontend) << ">>";
+ optimizer.optimize(graph.get());
+
+ return std::move(graph);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Frontend.test.cpp b/compiler/moco-tf/src/Frontend.test.cpp
new file mode 100644
index 000000000..c665bd9e3
--- /dev/null
+++ b/compiler/moco-tf/src/Frontend.test.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/tf/Frontend.h"
+
+#include "TestHelper.h"
+
+#include <sstream>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+// clang-format off
+const char *pbtxt_000 = STRING_CONTENT(
+node {
+ name: "Placeholder"
+ op: "Placeholder"
+ attr {
+ key: "dtype"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim { size: 4 }
+ }
+ }
+ }
+}
+node {
+ name: "Identity"
+ op: "Identity"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value { type: DT_FLOAT }
+ }
+}
+);
+// clang-format on
+
+} // namespace
+
+TEST(FrontendTests, testcase_000)
+{
+ moco::tf::Frontend frontend;
+ moco::ModelSignature signature;
+
+ signature.add_input(moco::TensorName("Placeholder", 0));
+ signature.shape("Placeholder:0", angkor::TensorShape{4});
+ signature.add_output(moco::TensorName("Identity", 0));
+
+ std::stringstream ss{pbtxt_000};
+
+ auto graph = frontend.load(signature, &ss, moco::tf::Frontend::FileType::Text);
+
+ ASSERT_EQ(graph->inputs()->size(), 1);
+ ASSERT_EQ(graph->inputs()->at(0)->name(), "Placeholder");
+ ASSERT_NE(graph->inputs()->at(0)->shape(), nullptr);
+ ASSERT_EQ(graph->inputs()->at(0)->shape()->rank(), 1);
+ ASSERT_EQ(graph->inputs()->at(0)->shape()->dim(0), 4);
+
+ ASSERT_EQ(graph->outputs()->size(), 1);
+ ASSERT_EQ(graph->outputs()->at(0)->name(), "Identity");
+ ASSERT_NE(graph->outputs()->at(0)->shape(), nullptr);
+ ASSERT_EQ(graph->outputs()->at(0)->shape()->rank(), 1);
+ ASSERT_EQ(graph->outputs()->at(0)->shape()->dim(0), 4);
+}
diff --git a/compiler/moco-tf/src/Knob.cpp b/compiler/moco-tf/src/Knob.cpp
new file mode 100644
index 000000000..0e1c7e0ea
--- /dev/null
+++ b/compiler/moco-tf/src/Knob.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Knob.h"
+
+#include <pepper/strcast.h>
+
+#include <iostream>
+#include <string>
+
+// Basic Infrastructure to declare and access Knob values
+//
+// TODO Reuse this infrastructure as a library
+namespace
+{
+
+using KnobName = std::string;
+
+/**
+ * @brief Load configuration (from somewhere)
+ */
+struct KnobLoader
+{
+ virtual ~KnobLoader() = default;
+
+ virtual bool load(const KnobName &name, bool default_value) const = 0;
+};
+
+// Template-programming helpers
+template <typename T> T knob_load(const KnobLoader &, const KnobName &, const T &);
+
+template <>
+bool knob_load(const KnobLoader &l, const KnobName &knob_name, const bool &default_value)
+{
+ return l.load(knob_name, default_value);
+}
+
+/**
+ * @brief Load configuration from environment variables
+ *
+ * Given a prefix P, EnvKnobLoader reads a configuration K from concat(P, K).
+ *
+ * For example, let us assume that P is "MY_" and K is "CONFIG".
+ *
+ * Then, EnvKnobLoader reads configuration CONFIG from environment variable MY_CONFIG.
+ */
+class EnvKnobLoader final : public KnobLoader
+{
+public:
+ EnvKnobLoader(const std::string &prefix) : _prefix{prefix}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool load(const KnobName &knob_name, bool default_value) const override
+ {
+ auto envvar = _prefix + knob_name;
+ auto s = std::getenv(envvar.c_str());
+
+ return pepper::safe_strcast<int>(s, default_value ? 1 : 0) != 0;
+ }
+
+private:
+ /// @brief Environment variable prefix
+ std::string _prefix;
+};
+
+} // namespace
+
+namespace
+{
+
+/**
+ * TODO Support Knob Loader Injection
+ *
+ * Let us assume that there is a compiler "A" based on moco, and it wants to reuse this
+ * infrastructure.
+ *
+ * Under the current design, users have to set "MOCO_XXX" even though they uses "A", which is
+ * counter-intuitive.
+ *
+ * "Knob Loader Injection" aims to address this issue. "Knob Loader Injection" allows "A" to
+ * inject its own knob loader that reads "A_XXX" environment variables.
+ */
+const KnobLoader &knob_loader(void)
+{
+ static EnvKnobLoader loader{"MOCO_"};
+ return loader;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+#define KNOB_BOOL(NAME, DEFAULT, DESC) \
+ template <> typename KnobTrait<Knob::NAME>::ValueType get<Knob::NAME>(void) \
+ { \
+ static typename KnobTrait<Knob::NAME>::ValueType value = \
+ ::knob_load<typename KnobTrait<Knob::NAME>::ValueType>(::knob_loader(), #NAME, DEFAULT); \
+ return value; \
+ }
+#include "Knob.lst"
+#undef KNOB_BOOL
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Knob.h b/compiler/moco-tf/src/Knob.h
new file mode 100644
index 000000000..145a81dc3
--- /dev/null
+++ b/compiler/moco-tf/src/Knob.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KNOB_H__
+#define __KNOB_H__
+
+namespace moco
+{
+namespace tf
+{
+
+enum class Knob
+{
+#define KNOB_BOOL(NAME, DEFAULT, DESC) NAME,
+#include "Knob.lst"
+#undef KNOB_BOOL
+};
+
+template <Knob K> struct KnobTrait;
+
+#define KNOB_BOOL(NAME, DEFAULT, DESC) \
+ template <> struct KnobTrait<Knob::NAME> \
+ { \
+ using ValueType = bool; \
+ };
+#include "Knob.lst"
+#undef KNOB_BOOL
+
+template <Knob K> typename KnobTrait<K>::ValueType get(void);
+
+} // namespace tf
+} // namespace moco
+
+#endif // __KNOB_H__
diff --git a/compiler/moco-tf/src/Knob.lst b/compiler/moco-tf/src/Knob.lst
new file mode 100644
index 000000000..b88e064c7
--- /dev/null
+++ b/compiler/moco-tf/src/Knob.lst
@@ -0,0 +1,39 @@
+#ifndef KNOB_BOOL
+#error "KNOB_BOOL is not defined"
+#endif // KNOB_BOOL
+
+// KNOB_BOOL(NAME, DEFAULT_VALUE, DESCRIPTION)
+
+// TensorFlow dialect transforms
+KNOB_BOOL(FuseBinaryIntoPreceding, true, Fuse Binary node to preceding node)
+KNOB_BOOL(ResolveFusedBatchNorm, true, Enable ResolveFusedBatchNorm transform)
+KNOB_BOOL(ResolveConstantShape, true, Replace determined TFShape to TFConst)
+KNOB_BOOL(ResolveReshapeWildcardDim, true, Resolve wildcard dimension in TFReshape node)
+KNOB_BOOL(ResolveSquaredDifference, true, Resolve SquaredDifference node)
+KNOB_BOOL(RemoveTFIdentityNode, true, Enable RemoveTFIdentityNode optimization)
+KNOB_BOOL(SqueezeReduceNode, true, Insert TFSqueeze if ReduceNode do not keep dimensions)
+// Constant folding
+KNOB_BOOL(ConstantFoldAdd, false, Constant fold for Add node)
+KNOB_BOOL(ConstantFoldMul, false, Constant fold for Mul node)
+KNOB_BOOL(ConstantFoldPack, false, Constant fold for Pack node)
+KNOB_BOOL(ConstantFoldStridedSlice, false, Constant fold for StridedSlice node)
+
+// Canonicalization
+KNOB_BOOL(CanonicalizeBiasAdd, true, Enable Canonicalize for BiasAdd node)
+KNOB_BOOL(CanonicalizeConst, true, Enable Canonicalize for Const node)
+KNOB_BOOL(CanonicalizeConv2D, true, Enable Canonicalize for Conv2D node)
+
+// Canonical transforms
+KNOB_BOOL(ConstantFolding, true, Enable constant-folding optimization)
+KNOB_BOOL(RemoveForwardNode, true, Enable RemoveForwardNode optimization)
+KNOB_BOOL(ReorderDecode, true, Enable ReorderDecode optimization)
+// BEG: These knobs are valid only when ReorderDecode is enabled
+KNOB_BOOL(ReorderDecodeReLU, true, Reorder FeatureDecode-ReLU)
+KNOB_BOOL(ReorderDecodeTensorBiasAdd, true, Reorder FeatureDecode-TensorBiasAdd)
+// END
+KNOB_BOOL(SimplifyDomainConversion, true, Enable SimplifyDomainConversion optimization)
+KNOB_BOOL(ResolveDuplicateReshape, true, Resolve duplicated Reshape nodes)
+KNOB_BOOL(ResolveRedundantReshape, true, Resolve redundant Reshape node)
+
+// Graph transformations
+KNOB_BOOL(RemoveDeadNode, true, Enable RemoveDeadNode optimization)
diff --git a/compiler/moco-tf/src/LogHelper.cpp b/compiler/moco-tf/src/LogHelper.cpp
new file mode 100644
index 000000000..92ff75569
--- /dev/null
+++ b/compiler/moco-tf/src/LogHelper.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LogHelper.h"
+
+namespace loco
+{
+
+std::ostream &operator<<(std::ostream &os, const loco::FeatureShape &feature_shape)
+{
+ os << "[" << feature_shape.count().value() << "," << feature_shape.height().value() << ","
+ << feature_shape.width().value() << "," << feature_shape.depth().value() << "]";
+ return os;
+}
+
+std::ostream &operator<<(std::ostream &os, const loco::FilterShape &filter_shape)
+{
+ os << "[" << filter_shape.height().value() << "," << filter_shape.width().value() << ","
+ << filter_shape.depth().value() << "," << filter_shape.count().value() << "]";
+ return os;
+}
+
+std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape)
+{
+ os << "[";
+ for (uint32_t r = 0; r < tensor_shape.rank(); ++r)
+ {
+ if (r)
+ os << ",";
+ os << tensor_shape.dim(r).value();
+ }
+ os << "]";
+ return os;
+}
+
+std::ostream &operator<<(std::ostream &os, const loco::Padding2D &pad)
+{
+ os << "[TLBR " << pad.top() << "," << pad.left() << "," << pad.bottom() << "," << pad.right()
+ << "]";
+
+ return os;
+}
+
+} // namespace loco
+
+std::ostream &operator<<(std::ostream &os, const std::vector<int64_t> &vi64)
+{
+ for (auto vi : vi64)
+ {
+ os << vi << " ";
+ }
+ return os;
+}
+
+#include "TFFormattedGraph.h"
+
+namespace moco
+{
+namespace tf
+{
+
+FormattedGraph fmt(loco::Graph *g)
+{
+ auto node_summary_builder = stdex::make_unique<TFNodeSummaryBuilderFactory>();
+ return std::move(locop::fmt<locop::LinearV1>(g).with(std::move(node_summary_builder)));
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/LogHelper.h b/compiler/moco-tf/src/LogHelper.h
new file mode 100644
index 000000000..4e3cb5dac
--- /dev/null
+++ b/compiler/moco-tf/src/LogHelper.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOG_HELPER_H__
+#define __LOG_HELPER_H__
+
+#include <locop/FormattedGraph.h>
+
+#include <loco/IR/FeatureShape.h>
+#include <loco/IR/FilterShape.h>
+#include <loco/IR/TensorShape.h>
+
+#include <sstream>
+#include <vector>
+
+namespace loco
+{
+
+/**
+ * @brief dump FeatureShape values to stream
+ */
+std::ostream &operator<<(std::ostream &os, const loco::FeatureShape &feature_shape);
+
+/**
+ * @brief dump FilterShape values to stream
+ */
+std::ostream &operator<<(std::ostream &os, const loco::FilterShape &filter_shape);
+
+/**
+ * @brief dump TensorShape values to stream
+ */
+std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape);
+
+/**
+ * @brief dump Padding2D values to stream
+ */
+std::ostream &operator<<(std::ostream &os, const loco::Padding2D &pad);
+
+} // namespace loco
+
+/**
+ * @brief dump std::vector<int64_t> values to stream
+ */
+std::ostream &operator<<(std::ostream &os, const std::vector<int64_t> &vi64);
+
+namespace moco
+{
+namespace tf
+{
+
+using FormattedGraph = locop::FormattedGraphImpl<locop::Formatter::LinearV1>;
+
+FormattedGraph fmt(loco::Graph *g);
+
+static inline FormattedGraph fmt(const std::unique_ptr<loco::Graph> &g) { return fmt(g.get()); }
+
+} // namespace tf
+} // namespace moco
+
+#endif // __LOG_HELPER_H__
diff --git a/compiler/moco-tf/src/Op/COpCall.cpp b/compiler/moco-tf/src/Op/COpCall.cpp
new file mode 100644
index 000000000..801196f0f
--- /dev/null
+++ b/compiler/moco-tf/src/Op/COpCall.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "COpCall.h"
+
+#include "Convert.h"
+
+#include <locoex/COpCall.h>
+#include <locoex/COpAttrTypes.h>
+#include <moco/Names.h>
+#include <moco/tf/Frontend.h>
+#include <loco.h>
+#include <stdex/Memory.h>
+#include <oops/UserExn.h>
+
+#include <vector>
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+class COpCallGraphUpdate final : public moco::GraphUpdate
+{
+public:
+ COpCallGraphUpdate(locoex::COpCall *node, const std::vector<moco::TensorName> &input_names)
+ : _node(node), _input_names(input_names)
+ {
+ }
+
+ void input(const moco::SymbolTable *) const override;
+
+private:
+ locoex::COpCall *_node;
+ const std::vector<moco::TensorName> _input_names;
+};
+
+void COpCallGraphUpdate::input(const moco::SymbolTable *tensor_names) const
+{
+ for (int n = 0; n < _input_names.size(); n++)
+ {
+ loco::Node *target = tensor_names->node(_input_names.at(n));
+ _node->input(n, target);
+ }
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool COpCallGraphBuilder::validate(const tensorflow::NodeDef &tf_node) const { return true; }
+
+void COpCallGraphBuilder::build(const tensorflow::NodeDef &tf_node,
+ GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // Create a "COpCall" node for CustomOp and set attributes
+ auto call_node = graph->nodes()->create<locoex::COpCall>(tf_node.input_size());
+ {
+ call_node->op(tf_node.op());
+ call_node->name(tf_node.name());
+ call_node->dtype(_signature->dtype(tf_node.name()));
+
+ auto shape = _signature->shape(tf_node.name());
+ call_node->rank(shape->rank());
+ for (int d = 0; d < shape->rank(); d++)
+ call_node->dim(d) = shape->dim(d);
+
+ for (auto iter = tf_node.attr().begin(); iter != tf_node.attr().end(); iter++)
+ {
+ auto name = iter->first;
+ auto val = iter->second;
+
+ if (val.value_case() == tensorflow::AttrValue::kF)
+ {
+ call_node->attr(name, stdex::make_unique<locoex::COpAttrFloat>(val.f()));
+ }
+ else if (val.value_case() == tensorflow::AttrValue::kI)
+ {
+ call_node->attr(name, stdex::make_unique<locoex::COpAttrInt>(val.i()));
+ }
+ // TODO define more types
+ else
+ {
+ throw oops::UserExn("Unsupported attribute type", tf_node.name());
+ }
+ }
+ }
+
+ // register this node with its name
+ TensorName output_name(tf_node.name(), 0);
+ tensor_names->enroll(output_name, call_node);
+
+ // Queue node input update
+ std::vector<TensorName> input_names;
+ for (int i = 0; i < tf_node.input_size(); ++i)
+ {
+ input_names.emplace_back(TensorName(tf_node.input(i)));
+ }
+ auto update = stdex::make_unique<COpCallGraphUpdate>(call_node, input_names);
+ updates->enroll(std::move(update));
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Op/COpCall.h b/compiler/moco-tf/src/Op/COpCall.h
new file mode 100644
index 000000000..0bb8a93c9
--- /dev/null
+++ b/compiler/moco-tf/src/Op/COpCall.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_COP_CALL_H__
+#define __OP_COP_CALL_H__
+
+#include <moco/tf/Frontend.h>
+
+#include <moco/Import/GraphBuilder.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief GraphBuilder for COpCall node
+ */
+class COpCallGraphBuilder final : public GraphBuilder
+{
+public:
+ COpCallGraphBuilder(const ModelSignature *signature) : _signature(signature) { /* empty */}
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+
+private:
+ const ModelSignature *_signature;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __OP_COP_CALL_H__
diff --git a/compiler/moco-tf/src/Op/COpCall.test.cpp b/compiler/moco-tf/src/Op/COpCall.test.cpp
new file mode 100644
index 000000000..f13118292
--- /dev/null
+++ b/compiler/moco-tf/src/Op/COpCall.test.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "COpCall.h"
+
+#include "TestHelper.h"
+
+#include "Canonicalizer.h"
+
+#include <moco/Importer.h>
+
+#include <locoex/COpCall.h>
+#include <locoex/COpAttrTypes.h>
+
+#include <loco.h>
+#include <plier/tf/TestHelper.h>
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+using namespace moco::tf::test;
+
+namespace
+{
+// clang-format off
+const char *customop_01_pbtxtdata = STRING_CONTENT(
+node {
+ name: "input1"
+ op: "Placeholder"
+ attr {
+ key: "dtype" value { type: DT_FLOAT } }
+ attr {
+ key: "shape"
+ value { shape { dim { size: 1 } dim { size: 2 } } }
+ }
+}
+node {
+ name: "input2"
+ op: "Const"
+ attr { key: "dtype" value { type: DT_FLOAT } }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape { dim { size: 1 } dim { size: 2 } }
+ float_val: 1.1 float_val: 2.2
+ }
+ }
+ }
+}
+node {
+ name: "my/customOp/000"
+ op: "new_custom_op"
+ input: "input1"
+ input: "input2"
+ attr { key: "my_float" value { f: 0.001 } }
+ attr { key: "my_int" value { i: 111 } }
+}
+);
+
+// clang-format on
+} // namespace
+
+TEST(Call_Test, Call_01)
+{
+ moco::ModelSignature signature;
+ {
+ signature.add_input(moco::TensorName("input1", 0));
+ signature.add_output(moco::TensorName("my/customOp/000", 0));
+ signature.add_customop("new_custom_op");
+ signature.dtype("my/customOp/000", loco::DataType::FLOAT32);
+ signature.shape("my/customOp/000", {1, 2});
+ }
+
+ tensorflow::GraphDef graph_def;
+ EXPECT_TRUE(plier::tf::parse_graphdef(customop_01_pbtxtdata, graph_def));
+
+ // import
+ moco::GraphBuilderRegistry registry{&moco::GraphBuilderRegistry::get()};
+ registry.add("new_custom_op", stdex::make_unique<moco::tf::COpCallGraphBuilder>(&signature));
+
+ moco::Importer importer(&registry);
+ std::unique_ptr<loco::Graph> graph = importer.import(signature, graph_def);
+
+ // what to test:
+ // - there should exist COpCall
+ // - two input nodes should exist and not be nullptr
+ // - attributes should match
+
+ auto *customop = moco::tf::test::find_first_node_bytype<locoex::COpCall>(graph.get());
+ ASSERT_NE(customop, nullptr);
+
+ ASSERT_EQ(customop->arity(), 2);
+
+ loco::Node *input_0 = customop->arg(0);
+ loco::Node *input_1 = customop->arg(1);
+ ASSERT_NE(input_0, nullptr);
+ ASSERT_NE(input_1, nullptr);
+
+ auto f_attr = customop->attr<locoex::COpAttrType::Float>("my_float");
+ ASSERT_FLOAT_EQ(f_attr->val(), 0.001);
+ ASSERT_TRUE(f_attr->type() == locoex::COpAttrType::Float);
+
+ auto i_attr = customop->attr<locoex::COpAttrType::Int>("my_int");
+ ASSERT_FLOAT_EQ(i_attr->val(), 111);
+ ASSERT_TRUE(i_attr->type() == locoex::COpAttrType::Int);
+}
diff --git a/compiler/moco-tf/src/Optimizer.cpp b/compiler/moco-tf/src/Optimizer.cpp
new file mode 100644
index 000000000..f33b4109b
--- /dev/null
+++ b/compiler/moco-tf/src/Optimizer.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Optimizer.h"
+
+#include "Knob.h"
+#include "ProgressReporter.h"
+#include "Transforms.h"
+
+#include <logo/Phase.h>
+
+#include <stdex/Memory.h>
+
+namespace moco
+{
+namespace tf
+{
+
+void Optimizer::optimize(loco::Graph *g) const
+{
+ logo::Phase phase;
+
+ /* TRANSFORM DECLARATION BEGIN */
+ // Shape inference is required for ResolveRedundantReshape
+ phase.emplace_back(stdex::make_unique<ShapeInferencePass>());
+
+ if (moco::tf::get<moco::tf::Knob::ConstantFolding>())
+ {
+ phase.emplace_back(stdex::make_unique<logo::ConstantFoldingPass>());
+ }
+
+ if (moco::tf::get<moco::tf::Knob::RemoveDeadNode>())
+ {
+ phase.emplace_back(stdex::make_unique<logo::RemoveDeadNodePass>());
+ }
+
+ if (moco::tf::get<moco::tf::Knob::ReorderDecode>() &&
+ moco::tf::get<moco::tf::Knob::ReorderDecodeTensorBiasAdd>())
+ {
+ phase.emplace_back(stdex::make_unique<logo::ReorderDecodePass<loco::TensorBiasAdd>>());
+ }
+
+ if (moco::tf::get<moco::tf::Knob::ReorderDecode>() &&
+ moco::tf::get<moco::tf::Knob::ReorderDecodeReLU>())
+ {
+ phase.emplace_back(stdex::make_unique<logo::ReorderDecodePass<loco::ReLU>>());
+ }
+
+ if (moco::tf::get<moco::tf::Knob::SimplifyDomainConversion>())
+ {
+ phase.emplace_back(stdex::make_unique<logo::SimplifyDomainConversionPass>());
+ }
+
+ if (moco::tf::get<moco::tf::Knob::RemoveForwardNode>())
+ {
+ phase.emplace_back(stdex::make_unique<logo::RemoveForwardNodePass>());
+ }
+
+ if (moco::tf::get<moco::tf::Knob::ResolveDuplicateReshape>())
+ {
+ phase.emplace_back(stdex::make_unique<logo::ResolveDuplicateReshapePass>());
+ }
+
+ if (moco::tf::get<moco::tf::Knob::ResolveRedundantReshape>())
+ {
+ phase.emplace_back(stdex::make_unique<logo::ResolveRedundantReshapePass>());
+ }
+ /* TRANSFORM DECLARATION END */
+
+ ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
+ logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+ phase_runner.attach(&prog);
+ phase_runner.run(phase);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Optimizer.h b/compiler/moco-tf/src/Optimizer.h
new file mode 100644
index 000000000..8584df89b
--- /dev/null
+++ b/compiler/moco-tf/src/Optimizer.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OPTIMIZER_H__
+#define __OPTIMIZER_H__
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+class Optimizer final
+{
+public:
+ void optimize(loco::Graph *) const;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __OPTIMIZER_H__
diff --git a/compiler/moco-tf/src/Optimizer.test.cpp b/compiler/moco-tf/src/Optimizer.test.cpp
new file mode 100644
index 000000000..5ffed58e3
--- /dev/null
+++ b/compiler/moco-tf/src/Optimizer.test.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Optimizer.h"
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+// Optimizer SHOULD NOT crash even though a given graph is empty
+TEST(Optimizer, empty_graph)
+{
+ moco::tf::Optimizer o;
+
+ loco::Graph g;
+
+ o.optimize(&g);
+
+ SUCCEED();
+}
+
+TEST(Optimizer, simple_forward_graph)
+{
+ moco::tf::Optimizer o;
+
+ /**
+ * Create a simple graph that forwards a constant as output
+ */
+ loco::Graph g;
+ {
+ auto constgen = g.nodes()->create<loco::ConstGen>();
+ constgen->shape({2, 3});
+
+ auto forward = g.nodes()->create<loco::Forward>();
+ forward->input(constgen);
+
+ auto pull = g.nodes()->create<loco::Push>();
+ pull->from(forward);
+ }
+
+ o.optimize(&g);
+
+ SUCCEED();
+}
+
+TEST(Optimizer, simple_forward_graph_with_one_valid_output)
+{
+ moco::tf::Optimizer o;
+
+ /**
+ * Create a simple graph that forwards a constant as graph-level output
+ */
+ loco::Graph g;
+ {
+ auto output = g.outputs()->create();
+
+ auto constgen = g.nodes()->create<loco::ConstGen>();
+ constgen->shape({2, 3});
+ constgen->dtype(loco::DataType::FLOAT32);
+ constgen->size<loco::DataType::FLOAT32>(6);
+
+ auto forward = g.nodes()->create<loco::Forward>();
+ forward->input(constgen);
+
+ auto pull = g.nodes()->create<loco::Push>();
+ pull->from(forward);
+
+ loco::link(output, pull);
+ }
+
+ o.optimize(&g);
+
+ SUCCEED();
+}
diff --git a/compiler/moco-tf/src/ProgressReporter.cpp b/compiler/moco-tf/src/ProgressReporter.cpp
new file mode 100644
index 000000000..41338ffec
--- /dev/null
+++ b/compiler/moco-tf/src/ProgressReporter.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ProgressReporter.h"
+
+#include "LogHelper.h"
+
+#include <logo/Phase.h>
+#include <logo/Pass.h>
+
+#include <moco/Log.h>
+
+#include <cassert>
+
+namespace
+{
+
+char to_char(bool b) { return b ? 'Y' : 'N'; }
+
+const char *to_str(logo::PhaseStrategy s)
+{
+ switch (s)
+ {
+ case logo::PhaseStrategy::Saturate:
+ return "Saturate";
+ case logo::PhaseStrategy::Restart:
+ return "Restart";
+ }
+ assert(false);
+ return "";
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+void ProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseBegin> *info)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "==============================================================";
+ INFO(prime) << "PhaseRunner<" << to_str(strategy()) << ">";
+ INFO(prime) << "Initial graph";
+ INFO(prime) << moco::tf::fmt(graph());
+}
+
+void ProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseEnd> *info)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "PhaseRunner<" << to_str(strategy()) << "> - done";
+}
+
+void ProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassBegin> *info)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "--------------------------------------------------------------";
+ INFO(prime) << "Before " << logo::pass_name(info->pass());
+}
+
+void ProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassEnd> *info)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "After " << logo::pass_name(info->pass())
+ << " (changed: " << to_char(info->changed()) << ")";
+ INFO(prime) << moco::tf::fmt(graph());
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/ProgressReporter.h b/compiler/moco-tf/src/ProgressReporter.h
new file mode 100644
index 000000000..190d972c5
--- /dev/null
+++ b/compiler/moco-tf/src/ProgressReporter.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_PROGRESSREPORTER_H__
+#define __MOCO_TF_PROGRESSREPORTER_H__
+
+#include <logo/Phase.h>
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+class ProgressReporter : public logo::PhaseEventListener
+{
+public:
+ ProgressReporter(loco::Graph *graph, logo::PhaseStrategy strategy)
+ : _graph{graph}, _strategy{strategy}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseBegin> *) override;
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseEnd> *) override;
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassBegin> *) override;
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassEnd> *) override;
+
+public:
+ loco::Graph *graph(void) const { return _graph; }
+ logo::PhaseStrategy strategy(void) const { return _strategy; }
+
+private:
+ loco::Graph *_graph;
+ logo::PhaseStrategy _strategy;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_PROGRESSREPORTER_H__
diff --git a/compiler/moco-tf/src/SimpleNodeTransform.h b/compiler/moco-tf/src/SimpleNodeTransform.h
new file mode 100644
index 000000000..b69cbad6b
--- /dev/null
+++ b/compiler/moco-tf/src/SimpleNodeTransform.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_SIMPLE_NODE_TRANSFORM_H__
+#define __MOCO_TF_SIMPLE_NODE_TRANSFORM_H__
+
+#include "Transform.h"
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Per-Node Transform
+ */
+template <typename ConcreteNode> struct SimpleNodeTransform : public Transform
+{
+ SimpleNodeTransform() = default;
+
+ virtual ~SimpleNodeTransform() = default;
+
+ // NOTE Users SHOULD implement this method
+ virtual bool transform(ConcreteNode *node) const = 0;
+
+ bool run(loco::Graph *graph) final
+ {
+ using loco::active_nodes;
+ using loco::output_nodes;
+
+ bool changed = false;
+
+ for (auto node : active_nodes(output_nodes(graph)))
+ {
+ if (auto casted = dynamic_cast<ConcreteNode *>(node))
+ {
+ if (transform(casted))
+ {
+ changed = true;
+ }
+ }
+ }
+
+ return changed;
+ }
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_SIMPLE_NODE_TRANSFORM_H__
diff --git a/compiler/moco-tf/src/SimpleNodeTransform.test.cpp b/compiler/moco-tf/src/SimpleNodeTransform.test.cpp
new file mode 100644
index 000000000..781a48781
--- /dev/null
+++ b/compiler/moco-tf/src/SimpleNodeTransform.test.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SimpleNodeTransform.h"
+
+#include <set>
+
+#include <gtest/gtest.h>
+
+TEST(SimpleNodeTransformTests, run)
+{
+ class Transform final : public moco::tf::SimpleNodeTransform<loco::Push>
+ {
+ public:
+ Transform(std::multiset<loco::Node *> *out) : _out{out}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ bool transform(loco::Push *node) const final
+ {
+ _out->insert(node);
+ return false;
+ }
+
+ private:
+ std::multiset<loco::Node *> *_out;
+ };
+
+ auto g = loco::make_graph();
+ auto output_0 = g->outputs()->create();
+ auto push = g->nodes()->create<loco::Push>();
+ loco::link(output_0, push);
+
+ std::multiset<loco::Node *> nodes;
+ Transform transform{&nodes};
+
+ transform.run(g.get());
+
+ ASSERT_EQ(nodes.size(), 1);
+ ASSERT_EQ(nodes.count(push), 1);
+}
diff --git a/compiler/moco-tf/src/TFEltwiseBinaryCanonicalzeHelper.h b/compiler/moco-tf/src/TFEltwiseBinaryCanonicalzeHelper.h
new file mode 100644
index 000000000..df9aec144
--- /dev/null
+++ b/compiler/moco-tf/src/TFEltwiseBinaryCanonicalzeHelper.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TF_ELTWISE_BINARY_CANONICALIZE_HELPER_H__
+#define __TF_ELTWISE_BINARY_CANONICALIZE_HELPER_H__
+
+#include <moco/IR/TFDialect.h>
+#include <moco/IR/TFNodes.h>
+
+#include "CanonicalEltwiseInputConnector.h"
+#include "BroadcastHelper.h"
+
+#include <loco/IR/Nodes.h>
+#include <loco/IR/NodeShape.h>
+#include <loco/Service/ShapeInference.h>
+
+#include <fipe.h>
+
+namespace
+{
+
+template <typename TFNodeT> struct EltwiseBinaryCanonicalizationRule;
+
+template <> struct EltwiseBinaryCanonicalizationRule<moco::TFAdd>
+{
+ using CanonicalNode = loco::EltwiseAdd;
+};
+
+template <> struct EltwiseBinaryCanonicalizationRule<moco::TFSub>
+{
+ using CanonicalNode = loco::EltwiseSub;
+};
+
+template <> struct EltwiseBinaryCanonicalizationRule<moco::TFMaximum>
+{
+ using CanonicalNode = loco::EltwiseMax;
+};
+
+template <> struct EltwiseBinaryCanonicalizationRule<moco::TFMul>
+{
+ using CanonicalNode = loco::EltwiseMul;
+};
+
+template <> struct EltwiseBinaryCanonicalizationRule<moco::TFRealDiv>
+{
+ using CanonicalNode = loco::EltwiseDiv;
+};
+
+template <typename TFNode> bool canonicalize_eltwise_binary_node(TFNode *node)
+{
+ auto graph = node->graph();
+
+ /**
+ * This will replace T/F Eltwise Binary node with a corresponding Canonical Eltwise node
+ *
+ * BEFORE
+ * A --- T/F Node --- C
+ * /
+ * B ----
+ *
+ * AFTER
+ * A --- T/F Node ---
+ * /
+ * B ----
+ *
+ * A --- [FixedReshape] --- [TensorBroadcast] --- Canonical Node -- C
+ * /
+ * B --- [FixedReshape] --- [TensorBroadcast] ----
+ *
+ * NOTE
+ * - [...] means optional node. They may or may not be created during this procedure.
+ * - T/F Node is disconnected from C after transformation.
+ */
+
+ using CanonicalNodeT = typename EltwiseBinaryCanonicalizationRule<TFNode>::CanonicalNode;
+
+ auto node_A = node->x();
+ auto node_B = node->y();
+
+ if (!loco::shape_known(node_A) || !loco::shape_known(node_B))
+ return false;
+ if (!loco::shape_known(node))
+ return false;
+
+ auto out_shape = loco::shape_get(node).template as<loco::TensorShape>();
+
+ // Create a node
+ auto canonical_node = graph->nodes()->template create<CanonicalNodeT>();
+
+ using moco::tf::eltwise::binary::connect_to;
+ using moco::tf::broadcast_to;
+
+ // update connections
+ std::make_pair(node_A, node_B) | broadcast_to(out_shape) | connect_to(canonical_node);
+
+ // replace node
+ replace(node).with(canonical_node);
+
+ return true;
+}
+
+} // namespace
+
+#endif // __TF_ELTWISE_BINARY_CANONICALIZE_HELPER_H__
diff --git a/compiler/moco-tf/src/TFFormattedGraph.cpp b/compiler/moco-tf/src/TFFormattedGraph.cpp
new file mode 100644
index 000000000..2ea514a2b
--- /dev/null
+++ b/compiler/moco-tf/src/TFFormattedGraph.cpp
@@ -0,0 +1,400 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFFormattedGraph.h"
+
+#include <moco/IR/TFDialect.h>
+#include <moco/IR/TFNodes.h>
+
+#include "LogHelper.h"
+
+#include <pepper/str.h>
+#include <locoex/Service/COpFormattedGraph.h>
+#include <oops/InternalExn.h>
+
+#include <sstream>
+
+namespace
+{
+
+std::string opname(uint32_t opnum)
+{
+ static std::string prefix{"tf."};
+
+ switch (static_cast<moco::TFOpcode>(opnum))
+ {
+#define TENSORFLOW_NODE(OPCODE, CLASS) \
+ case moco::TFOpcode::OPCODE: \
+ return prefix + #OPCODE;
+#include <moco/IR/TFNodes.lst>
+#undef TENSORFLOW_NODE
+ default:
+ break;
+ };
+
+ return prefix + "Invalid";
+}
+
+using namespace moco;
+using namespace moco::tf;
+
+/// TFNodeSummaryBuilder with default implementation
+class TFNodeSummaryBuilderBase : public locop::NodeSummaryBuilder
+{
+public:
+ TFNodeSummaryBuilderBase(const locop::SymbolTable *tbl) : _tbl{tbl}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool build(const loco::Node *, locop::NodeSummary &s) const final;
+
+protected:
+#define TENSORFLOW_NODE(OPCODE, CLASS) \
+ virtual bool summary(const CLASS *node, locop::NodeSummary &s) const \
+ { \
+ s.comments().append("Emitted by Default NodeSummaryBuilder"); \
+ s.state(locop::NodeSummary::State::PartiallyKnown); \
+ return true; \
+ }
+#include <moco/IR/TFNodes.lst>
+#undef TENSORFLOW_NODE
+
+protected:
+ const locop::SymbolTable *tbl(void) const { return _tbl; }
+
+ // Please do not use _tbl directly and use tbl().
+ // This will be changed to private in near future.
+protected:
+ const locop::SymbolTable *_tbl;
+};
+
+class TFNodeSummaryBuilder final : public TFNodeSummaryBuilderBase
+{
+public:
+ TFNodeSummaryBuilder(const locop::SymbolTable *tbl) : TFNodeSummaryBuilderBase(tbl)
+ {
+ // DO NOTHING
+ }
+
+private:
+#define IMPLEMENT(CLASS) bool summary(const CLASS *, locop::NodeSummary &) const final
+ IMPLEMENT(TFAdd);
+ IMPLEMENT(TFAvgPool);
+ IMPLEMENT(TFBiasAdd);
+ IMPLEMENT(TFConcatV2);
+ IMPLEMENT(TFConst);
+ IMPLEMENT(TFConv2D);
+ IMPLEMENT(TFConv2DBackpropInput);
+ IMPLEMENT(TFDepthwiseConv2dNative);
+ IMPLEMENT(TFFusedBatchNorm);
+ IMPLEMENT(TFMaximum);
+ IMPLEMENT(TFMaxPool);
+ IMPLEMENT(TFMean);
+ IMPLEMENT(TFMul);
+ IMPLEMENT(TFPack);
+ IMPLEMENT(TFReshape);
+ IMPLEMENT(TFRsqrt);
+ IMPLEMENT(TFShape);
+ IMPLEMENT(TFSoftmax);
+ IMPLEMENT(TFSqueeze);
+ IMPLEMENT(TFStopGradient);
+ IMPLEMENT(TFStridedSlice);
+ IMPLEMENT(TFTanh);
+ // For virtual nodes
+ IMPLEMENT(TFPush);
+#undef IMPLEMENT
+};
+
+bool TFNodeSummaryBuilderBase::build(const loco::Node *node, locop::NodeSummary &s) const
+{
+ if (node->dialect() != TFDialect::get())
+ return false;
+
+#define TENSORFLOW_NODE(OPCODE, CLASS) \
+ if (dynamic_cast<const CLASS *>(node)) \
+ { \
+ s.opname(opname(node->opnum())); \
+ return summary(dynamic_cast<const CLASS *>(node), s); \
+ }
+#include <moco/IR/TFNodes.lst>
+#undef TENSORFLOW_NODE
+
+ return false;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFAdd *node, locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.args().append("y", tbl()->lookup(node->y()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFAvgPool *node, locop::NodeSummary &s) const
+{
+ s.args().append("value", tbl()->lookup(node->value()));
+ s.args().append("ksize", pepper::str(node->ksize()));
+ s.args().append("strides", pepper::str(node->strides()));
+ s.args().append("padding", node->padding());
+ s.args().append("data_layout", node->data_layout());
+
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFBiasAdd *node, locop::NodeSummary &s) const
+{
+ s.args().append("value", tbl()->lookup(node->value()));
+ s.args().append("bias", tbl()->lookup(node->bias()));
+ s.args().append("data_layout", node->data_layout());
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFConcatV2 *node, locop::NodeSummary &s) const
+{
+ for (uint32_t n = 0; n < node->num_values(); ++n)
+ {
+ std::ostringstream ss;
+ ss << "values(" << n << ")";
+ s.args().append(ss.str(), tbl()->lookup(node->values(n)));
+ }
+ s.args().append("axis", tbl()->lookup(node->axis()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFConst *node, locop::NodeSummary &s) const
+{
+ std::ostringstream ss;
+
+ auto dtype = node->dtype();
+ switch (dtype)
+ {
+ case loco::DataType::S32:
+ ss << node->size<loco::DataType::S32>();
+ break;
+ case loco::DataType::FLOAT32:
+ ss << node->size<loco::DataType::FLOAT32>();
+ break;
+ default:
+ INTERNAL_EXN_V("Unsupported data type", node->name());
+ }
+ s.args().append("size", ss.str());
+ s.state(locop::NodeSummary::State::PartiallyKnown);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFConv2D *node, locop::NodeSummary &s) const
+{
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.args().append("filter", tbl()->lookup(node->filter()));
+ s.args().append("padding", node->padding());
+ s.args().append("data_layout", node->data_layout());
+ s.args().append("strides", pepper::str(node->strides()));
+ s.state(locop::NodeSummary::State::PartiallyKnown);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFConv2DBackpropInput *node, locop::NodeSummary &s) const
+{
+ s.args().append("input_sizes", tbl()->lookup(node->input_sizes()));
+ s.args().append("filter", tbl()->lookup(node->filter()));
+ s.args().append("out_backprop", tbl()->lookup(node->out_backprop()));
+ s.args().append("padding", node->padding());
+ s.args().append("data_layout", node->data_layout());
+ s.args().append("strides", pepper::str(node->strides()));
+ s.state(locop::NodeSummary::State::PartiallyKnown);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFDepthwiseConv2dNative *node, locop::NodeSummary &s) const
+{
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.args().append("filter", tbl()->lookup(node->filter()));
+ s.args().append("padding", node->padding());
+ s.args().append("data_layout", node->data_layout());
+ s.args().append("strides", pepper::str(node->strides()));
+ s.state(locop::NodeSummary::State::PartiallyKnown);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFFusedBatchNorm *node, locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.args().append("scale", tbl()->lookup(node->scale()));
+ s.args().append("offset", tbl()->lookup(node->offset()));
+ s.args().append("mean", tbl()->lookup(node->mean()));
+ s.args().append("variance", tbl()->lookup(node->variance()));
+ s.args().append("epsilon", pepper::str(node->epsilon()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFMaximum *node, locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.args().append("y", tbl()->lookup(node->y()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFMaxPool *node, locop::NodeSummary &s) const
+{
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.args().append("ksize", pepper::str(node->ksize()));
+ s.args().append("strides", pepper::str(node->strides()));
+ s.args().append("padding", node->padding());
+ s.args().append("data_layout", node->data_layout());
+
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFMean *node, locop::NodeSummary &s) const
+{
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.args().append("reduction_indices", tbl()->lookup(node->reduction_indices()));
+ s.args().append("keep_dims", pepper::str(node->keep_dims()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFMul *node, locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.args().append("y", tbl()->lookup(node->y()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFPack *node, locop::NodeSummary &s) const
+{
+ s.args().append("N", pepper::str(node->N()));
+ s.args().append("axis", pepper::str(node->axis()));
+ for (uint32_t n = 0; n < node->N(); ++n)
+ s.args().append("values", tbl()->lookup(node->values(n)));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFReshape *node, locop::NodeSummary &s) const
+{
+ s.args().append("tensor", tbl()->lookup(node->tensor()));
+ s.args().append("shape", tbl()->lookup(node->shape()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFRsqrt *node, locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFShape *node, locop::NodeSummary &s) const
+{
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.state(locop::NodeSummary::State::PartiallyKnown);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFSoftmax *node, locop::NodeSummary &s) const
+{
+ s.args().append("logits", tbl()->lookup(node->logits()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFSqueeze *node, locop::NodeSummary &s) const
+{
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.args().append("squeeze_dims", pepper::str(node->squeeze_dims()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFStopGradient *node, locop::NodeSummary &s) const
+{
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFStridedSlice *node, locop::NodeSummary &s) const
+{
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.args().append("begin", tbl()->lookup(node->begin()));
+ s.args().append("end", tbl()->lookup(node->end()));
+ if (node->strides() != nullptr)
+ s.args().append("strides", tbl()->lookup(node->strides()));
+ s.args().append("begin_mask", pepper::str(node->begin_mask()));
+ s.args().append("end_mask", pepper::str(node->end_mask()));
+ s.args().append("ellipsis_mask", pepper::str(node->ellipsis_mask()));
+ s.args().append("new_axis_mask", pepper::str(node->new_axis_mask()));
+ s.args().append("shrink_axis_mask", pepper::str(node->shrink_axis_mask()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool TFNodeSummaryBuilder::summary(const TFTanh *node, locop::NodeSummary &s) const
+{
+ s.args().append("x", tbl()->lookup(node->x()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+// For virtual nodes
+bool TFNodeSummaryBuilder::summary(const TFPush *node, locop::NodeSummary &s) const
+{
+ s.args().append("index", node->indexed() ? pepper::str(node->index()) : "?");
+ s.args().append("from", tbl()->lookup(node->from()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+namespace tf
+{
+
+bool MocoNodeSummaryBuilder::build(const loco::Node *node, locop::NodeSummary &s) const
+{
+ if (locop::CanonicalNodeSummaryBuilder(_tbl).build(node, s))
+ {
+ return true;
+ }
+
+ if (TFNodeSummaryBuilder(_tbl).build(node, s))
+ {
+ return true;
+ }
+
+ if (locoex::COpNodeSummaryBuilder(_tbl).build(node, s))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/TFFormattedGraph.h b/compiler/moco-tf/src/TFFormattedGraph.h
new file mode 100644
index 000000000..f79208536
--- /dev/null
+++ b/compiler/moco-tf/src/TFFormattedGraph.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TF_FORMATTED_GRAPH_H__
+#define __TF_FORMATTED_GRAPH_H__
+
+#include <locop/FormattedGraph.h>
+
+#include <stdex/Memory.h>
+
+namespace moco
+{
+namespace tf
+{
+
+class MocoNodeSummaryBuilder final : public locop::NodeSummaryBuilder
+{
+public:
+ MocoNodeSummaryBuilder(const locop::SymbolTable *tbl) : _tbl{tbl}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool build(const loco::Node *node, locop::NodeSummary &s) const final;
+
+private:
+ const locop::SymbolTable *_tbl;
+};
+
+class TFNodeSummaryBuilderFactory final : public locop::NodeSummaryBuilderFactory
+{
+public:
+ TFNodeSummaryBuilderFactory() = default;
+
+public:
+ std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *tlb) const final
+ {
+ return stdex::make_unique<MocoNodeSummaryBuilder>(tlb);
+ }
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __TF_FORMATTED_GRAPH_H__
diff --git a/compiler/moco-tf/src/TFOptimizer.cpp b/compiler/moco-tf/src/TFOptimizer.cpp
new file mode 100644
index 000000000..2256b99b8
--- /dev/null
+++ b/compiler/moco-tf/src/TFOptimizer.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFOptimizer.h"
+
+#include "Knob.h"
+#include "ProgressReporter.h"
+#include "Transforms.h"
+
+#include <logo/Phase.h>
+
+#include <stdex/Memory.h>
+
+namespace moco
+{
+namespace tf
+{
+
+void TFOptimizer::optimize(loco::Graph *g) const
+{
+ logo::Phase phase;
+
+ /* TRANSFORM DECLARATION BEGIN */
+ if (moco::tf::get<moco::tf::Knob::ResolveFusedBatchNorm>())
+ {
+ phase.emplace_back(stdex::make_unique<moco::ResolveFusedBatchNorm>());
+ }
+ if (moco::tf::get<moco::tf::Knob::FuseBinaryIntoPreceding>())
+ {
+ phase.emplace_back(stdex::make_unique<moco::FuseBinaryIntoPreceding>());
+ }
+ if (moco::tf::get<moco::tf::Knob::ResolveConstantShape>())
+ {
+ phase.emplace_back(stdex::make_unique<moco::ResolveConstantShape>());
+ }
+ if (moco::tf::get<moco::tf::Knob::ResolveReshapeWildcardDim>())
+ {
+ phase.emplace_back(stdex::make_unique<moco::ResolveReshapeWildcardDim>());
+ }
+ if (moco::tf::get<moco::tf::Knob::ResolveSquaredDifference>())
+ {
+ phase.emplace_back(stdex::make_unique<moco::ResolveSquaredDifference>());
+ }
+ if (moco::tf::get<moco::tf::Knob::RemoveTFIdentityNode>())
+ {
+ phase.emplace_back(stdex::make_unique<moco::RemoveTFIdentityNode>());
+ }
+ if (moco::tf::get<moco::tf::Knob::RemoveDeadNode>())
+ {
+ phase.emplace_back(stdex::make_unique<logo::RemoveDeadNodePass>());
+ }
+ if (moco::tf::get<moco::tf::Knob::SqueezeReduceNode>())
+ {
+ phase.emplace_back(stdex::make_unique<moco::SqueezeReduceNode>());
+ }
+ // Shape inference is needed for added nodes doing above transformations
+ phase.emplace_back(stdex::make_unique<moco::tf::ShapeInferencePass>());
+ phase.emplace_back(stdex::make_unique<moco::tf::TypeInferencePass>());
+ /* TRANSFORM DECLARATION END */
+
+ ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
+ logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+ phase_runner.attach(&prog);
+ phase_runner.run(phase);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/TFOptimizer.h b/compiler/moco-tf/src/TFOptimizer.h
new file mode 100644
index 000000000..69ab74d3e
--- /dev/null
+++ b/compiler/moco-tf/src/TFOptimizer.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TF_OPTIMIZER_H__
+#define __TF_OPTIMIZER_H__
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+class TFOptimizer final
+{
+public:
+ void optimize(loco::Graph *) const;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __TF_OPTIMIZER_H__
diff --git a/compiler/moco-tf/src/TFOptimizer.test.cpp b/compiler/moco-tf/src/TFOptimizer.test.cpp
new file mode 100644
index 000000000..26348f6c8
--- /dev/null
+++ b/compiler/moco-tf/src/TFOptimizer.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFOptimizer.h"
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+// TFOptimizer SHOULD NOT crash even though a given graph is empty
+TEST(TFOptimizer, empty_graph)
+{
+ moco::tf::TFOptimizer tfo;
+
+ loco::Graph g;
+
+ tfo.optimize(&g);
+
+ SUCCEED();
+}
diff --git a/compiler/moco-tf/src/TFReduceCanonicalzeHelper.h b/compiler/moco-tf/src/TFReduceCanonicalzeHelper.h
new file mode 100644
index 000000000..abd24cec8
--- /dev/null
+++ b/compiler/moco-tf/src/TFReduceCanonicalzeHelper.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TF_REDUCE_CANONICALIZE_HELPER_H__
+#define __TF_REDUCE_CANONICALIZE_HELPER_H__
+
+#include <moco/IR/TFDialect.h>
+#include <moco/IR/TFNodes.h>
+
+#include <loco/Service/ShapeInference.h>
+
+#include <moco/Log.h>
+
+namespace
+{
+
+template <typename TFNodeT> loco::ReduceFunc reduceFunc(void);
+
+template <> loco::ReduceFunc reduceFunc<moco::TFMean>(void) { return loco::ReduceFunc::Mean; }
+
+template <typename TFNode> bool canonicalize_reduce_node(TFNode *node)
+{
+ LOGGER(l);
+
+ INFO(l) << "TFNodeCanonicalize ReduceNode begin";
+
+ auto graph = node->graph();
+
+ /**
+ * This will replace T/F Reduce node with a corresponding Canonical Reduce node
+ *
+ * BEFORE
+ * reduction_indices -------- T/F Node -- C
+ * input -------/
+ *
+ * AFTER
+ * +------ T/F Node --
+ * | /
+ * reduction_indices -------
+ * | \
+ * input -+------ Canonical Node -- C
+ *
+ * NOTE
+ * - T/F Node is disconnected from C after transformation
+ */
+
+ // TFSqueeze had to be inserted if keep_dims() was false
+ assert(node->keep_dims());
+
+ auto axes_node = node->reduction_indices();
+ assert(axes_node != nullptr);
+
+ auto node_tensor_shape = loco::shape_get(node).template as<loco::TensorShape>();
+
+ // Canonicalization into TensorReduce is valid when reduction indices is constant
+ // TODO Support general TensorReduce case
+ std::vector<int32_t> axes_values;
+ if (auto const_axes = dynamic_cast<moco::TFConst *>(axes_node))
+ {
+ // TODO Support S64 type
+ assert(const_axes->dtype() == loco::DataType::S32);
+
+ for (uint32_t i = 0; i < const_axes->size<loco::DataType::S32>(); ++i)
+ {
+ int32_t axis = const_axes->at<loco::DataType::S32>(i);
+ if (axis < 0)
+ axis += node_tensor_shape.rank();
+ axes_values.push_back(axis);
+ }
+ }
+ else if (auto const_axes = dynamic_cast<loco::ConstGen *>(axes_node))
+ {
+ // TODO Support S64 type
+ assert(const_axes->dtype() == loco::DataType::S32);
+
+ for (uint32_t i = 0; i < const_axes->size<loco::DataType::S32>(); ++i)
+ {
+ int32_t axis = const_axes->at<loco::DataType::S32>(i);
+ if (axis < 0)
+ axis += node_tensor_shape.rank();
+ axes_values.push_back(axis);
+ }
+ }
+ else
+ return false;
+
+ // Create loco node to replace
+ auto reduce = graph->nodes()->template create<loco::TensorReduce>();
+
+ // replace
+ reduce->func(reduceFunc<TFNode>());
+ reduce->input(node->input());
+ for (uint32_t i = 0; i < axes_values.size(); ++i)
+ reduce->axes()->insert(axes_values.at(i));
+
+ replace(node).with(reduce);
+
+ INFO(l) << "TFNodeCanonicalize ReduceNode done";
+
+ return true;
+}
+
+} // namespace
+
+#endif // __TF_REDUCE_CANONICALIZE_HELPER_H__
diff --git a/compiler/moco-tf/src/TestHelper.h b/compiler/moco-tf/src/TestHelper.h
new file mode 100644
index 000000000..dd32d4433
--- /dev/null
+++ b/compiler/moco-tf/src/TestHelper.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TEST_HELPER_H__
+#define __TEST_HELPER_H__
+
+#include <loco.h>
+
+#include <tensorflow/core/framework/graph.pb.h>
+
+#define STRING_CONTENT(content) #content
+
+namespace moco
+{
+namespace tf
+{
+namespace test
+{
+
+template <typename T> T *find_first_node_bytype(loco::Graph *g)
+{
+ T *first_node = nullptr;
+ loco::Graph::NodeContext *nodes = g->nodes();
+ uint32_t count = nodes->size();
+
+ for (uint32_t i = 0; i < count; ++i)
+ {
+ first_node = dynamic_cast<T *>(nodes->at(i));
+ if (first_node != nullptr)
+ break;
+ }
+
+ return first_node;
+}
+
+template <typename T> std::vector<T *> find_nodes_bytype(loco::Graph *g)
+{
+ std::vector<T *> find_nodes;
+ loco::Graph::NodeContext *nodes = g->nodes();
+ uint32_t count = nodes->size();
+
+ for (uint32_t i = 0; i < count; ++i)
+ {
+ auto node = dynamic_cast<T *>(nodes->at(i));
+ if (node != nullptr)
+ find_nodes.push_back(node);
+ }
+
+ return find_nodes;
+}
+
+/**
+ * @brief Append setup output of graph by adding loco::Push node
+ *
+ * @note This is subject to change when loco changes I/O treatment
+ */
+void setup_output_node(loco::Graph *graph, loco::Node *last_node);
+
+} // namespace test
+} // namespace tf
+} // namespace moco
+
+#include <moco/IR/TFNode.h>
+
+#include <moco/Import/GraphBuilder.h>
+
+#include <plier/tf/TestHelper.h>
+
+namespace moco
+{
+namespace tf
+{
+namespace test
+{
+
+class TFNodeBuildTester
+{
+public:
+ TFNodeBuildTester();
+
+public:
+ void inputs(const std::vector<std::string> &names);
+ void output(const char *name);
+ moco::TFNode *output(void);
+
+ void run(tensorflow::NodeDef &node_def, moco::GraphBuilder &graph_builder);
+
+private:
+ std::unique_ptr<moco::SymbolTable> _tensor_names;
+ std::unique_ptr<loco::Graph> _graph;
+
+ std::vector<moco::TFNode *> _inputs;
+ const char *_output{nullptr};
+};
+
+} // namespace test
+} // namespace tf
+} // namespace moco
+
+#endif // __TEST_HELPER_H__
diff --git a/compiler/moco-tf/src/TestHelper.test.cpp b/compiler/moco-tf/src/TestHelper.test.cpp
new file mode 100644
index 000000000..1e8c38e36
--- /dev/null
+++ b/compiler/moco-tf/src/TestHelper.test.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TestHelper.h"
+
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+#include <cstring>
+
+namespace moco
+{
+namespace tf
+{
+namespace test
+{
+
+void setup_output_node(loco::Graph *graph, loco::Node *last_node)
+{
+ // add push as output
+ auto push_node = graph->nodes()->create<loco::Push>();
+ push_node->from(last_node);
+
+ // set the graph output name and node object
+ auto graph_output = graph->outputs()->create();
+ graph_output->name("output");
+ graph_output->dtype(loco::DataType::FLOAT32);
+ loco::link(graph_output, push_node);
+}
+
+} // namespace test
+} // namespace tf
+} // namespace moco
+
+#include <moco/IR/Nodes/TFConst.h>
+
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+namespace moco
+{
+namespace tf
+{
+namespace test
+{
+
+TFNodeBuildTester::TFNodeBuildTester()
+{
+ _graph = loco::make_graph();
+ _tensor_names = stdex::make_unique<moco::SymbolTable>();
+}
+
+void TFNodeBuildTester::inputs(const std::vector<std::string> &names)
+{
+ for (auto name : names)
+ {
+ auto input = _graph->nodes()->create<moco::TFConst>();
+ moco::TensorName name_01(name, 0);
+ _tensor_names->enroll(name_01, input);
+
+ _inputs.push_back(input);
+ }
+}
+
+void TFNodeBuildTester::output(const char *name) { _output = name; }
+
+moco::TFNode *TFNodeBuildTester::output(void)
+{
+ assert(_output != nullptr);
+
+ moco::TensorName tname(_output, 0);
+ return static_cast<moco::TFNode *>(_tensor_names->node(tname));
+}
+
+void TFNodeBuildTester::run(tensorflow::NodeDef &nodedef, moco::GraphBuilder &graphbuilder)
+{
+ assert(_output != nullptr);
+
+ auto node_defs = stdex::make_unique<moco::NodeDefTable>();
+ auto updates = stdex::make_unique<moco::UpdateQueue>();
+
+ moco::GraphBuilderContext gb_context(_graph.get(), node_defs.get(), _tensor_names.get(),
+ updates.get());
+
+ EXPECT_TRUE(graphbuilder.validate(nodedef));
+ graphbuilder.build(nodedef, &gb_context);
+
+ for (auto &update : updates->queue())
+ {
+ update->input(_tensor_names.get());
+ }
+
+ auto tfnode = output();
+ ASSERT_NE(tfnode, nullptr);
+
+ int idx = 0;
+ ASSERT_EQ(tfnode->arity(), _inputs.size());
+ for (auto input : _inputs)
+ {
+ ASSERT_EQ(tfnode->arg(idx++), input);
+ }
+}
+
+} // namespace test
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Transform.cpp b/compiler/moco-tf/src/Transform.cpp
new file mode 100644
index 000000000..f19ce21c4
--- /dev/null
+++ b/compiler/moco-tf/src/Transform.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Transform.h"
+
+namespace moco
+{
+namespace tf
+{
+
+std::string transform_name(const Transform *t)
+{
+ if (t->name() == nullptr)
+ {
+ return "(unknown)";
+ }
+
+ return t->name();
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Transform.h b/compiler/moco-tf/src/Transform.h
new file mode 100644
index 000000000..80cb9f97f
--- /dev/null
+++ b/compiler/moco-tf/src/Transform.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_TRANSFORM_H__
+#define __MOCO_TF_TRANSFORM_H__
+
+#include <loco.h>
+
+#include <logo/Pass.h>
+
+#include <string>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @note Transform will be replaced by logo::Pass
+ */
+
+using Transform = logo::Pass;
+
+std::string transform_name(const Transform *);
+
+template <typename DERIVED> DERIVED *as(loco::Node *node) { return dynamic_cast<DERIVED *>(node); }
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_TRANSFORM_H__
diff --git a/compiler/moco-tf/src/Transform.test.cpp b/compiler/moco-tf/src/Transform.test.cpp
new file mode 100644
index 000000000..e029b54c4
--- /dev/null
+++ b/compiler/moco-tf/src/Transform.test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Transform.h"
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+TEST(Transform, transform_name_over_unnamed_transform)
+{
+ struct SampleTransform final : public moco::tf::Transform
+ {
+ bool run(loco::Graph *) final { return false; }
+ };
+
+ SampleTransform sample_transform;
+
+ ASSERT_EQ(moco::tf::transform_name(&sample_transform), "(unknown)");
+}
+
+TEST(Transform, transform_name_over_named_transform)
+{
+ struct SampleTransform final : public moco::tf::Transform
+ {
+ const char *name(void) const final { return "sample"; }
+ bool run(loco::Graph *) final { return false; }
+ };
+
+ SampleTransform sample_transform;
+
+ ASSERT_EQ(moco::tf::transform_name(&sample_transform), "sample");
+}
diff --git a/compiler/moco-tf/src/Transforms.h b/compiler/moco-tf/src/Transforms.h
new file mode 100644
index 000000000..f14b81675
--- /dev/null
+++ b/compiler/moco-tf/src/Transforms.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_TRANSFORMS_H__
+#define __MOCO_TF_TRANSFORMS_H__
+
+#include "Transforms/ShapeInferencePass.h"
+#include "Transforms/TypeInferencePass.h"
+
+#include <logo/Passes.h>
+#include <moco/Pass/Passes.h>
+
+#endif // __MOCO_TF_TRANSFORMS_H__
diff --git a/compiler/moco-tf/src/Transforms/ShapeInferencePass.cpp b/compiler/moco-tf/src/Transforms/ShapeInferencePass.cpp
new file mode 100644
index 000000000..64ba9dfb1
--- /dev/null
+++ b/compiler/moco-tf/src/Transforms/ShapeInferencePass.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ShapeInferencePass.h"
+
+#include <moco/IR/TFDialect.h>
+
+#include <moco/Service/TFShapeInferenceRule.h>
+
+#include <loco.h>
+
+#include <loco/IR/CanonicalDialect.h>
+
+#include <loco/Service/ShapeInference.h>
+#include <loco/Service/ShapeInferenceRule.h>
+#include <loco/Service/CanonicalShapeInferenceRule.h>
+#include <loco/Service/MultiDialectShapeInferenceRule.h>
+
+#include <locoex/COpDialect.h>
+#include <locoex/Service/COpShapeInferenceRule.h>
+
+namespace moco
+{
+namespace tf
+{
+
+bool ShapeInferencePass::run(loco::Graph *graph)
+{
+ loco::CanonicalShapeInferenceRule canonical_rule;
+ moco::TFShapeInferenceRule tf_rule;
+ locoex::COpShapeInferenceRule cop_rule; // rule for custop op
+
+ loco::MultiDialectShapeInferenceRule rules;
+
+ rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
+ .bind(TFDialect::get(), &tf_rule)
+ .bind(locoex::COpDialect::get(), &cop_rule);
+
+ return loco::apply(&rules).to(graph);
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Transforms/ShapeInferencePass.h b/compiler/moco-tf/src/Transforms/ShapeInferencePass.h
new file mode 100644
index 000000000..f8be5f146
--- /dev/null
+++ b/compiler/moco-tf/src/Transforms/ShapeInferencePass.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_SHAPE_INFERENCE_PASS_H__
+#define __MOCO_TF_SHAPE_INFERENCE_PASS_H__
+
+#include "Transform.h"
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Run shape inference to the graph
+ */
+class ShapeInferencePass : public Transform
+{
+public:
+ const char *name(void) const final { return "ShapeInferencePass"; }
+
+public:
+ bool run(loco::Graph *graph) override;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_SHAPE_INFERENCE_PASS_H__
diff --git a/compiler/moco-tf/src/Transforms/TypeInferencePass.cpp b/compiler/moco-tf/src/Transforms/TypeInferencePass.cpp
new file mode 100644
index 000000000..db6cf7521
--- /dev/null
+++ b/compiler/moco-tf/src/Transforms/TypeInferencePass.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TypeInferencePass.h"
+
+#include <moco/IR/TFDialect.h>
+
+#include <moco/Service/TFTypeInferenceRule.h>
+
+#include <loco.h>
+
+#include <loco/IR/CanonicalDialect.h>
+#include <loco/Service/TypeInference.h>
+
+#include <locoex/COpDialect.h>
+#include <locoex/Service/COpTypeInference.h>
+
+namespace moco
+{
+namespace tf
+{
+
+bool TypeInferencePass::run(loco::Graph *graph)
+{
+ loco::CanonicalTypeInferenceRule canonical_rule;
+ moco::TFTypeInferenceRule tf_rule; // rule for TF dialect
+ locoex::COpTypeInferenceRule cop_rule; // rule for custop op
+
+ loco::MultiDialectTypeInferenceRule rules;
+
+ rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
+ .bind(TFDialect::get(), &tf_rule)
+ .bind(locoex::COpDialect::get(), &cop_rule);
+
+ loco::apply(&rules).to(graph);
+
+ return false;
+}
+
+} // namespace tf
+} // namespace moco
diff --git a/compiler/moco-tf/src/Transforms/TypeInferencePass.h b/compiler/moco-tf/src/Transforms/TypeInferencePass.h
new file mode 100644
index 000000000..88a2f86f1
--- /dev/null
+++ b/compiler/moco-tf/src/Transforms/TypeInferencePass.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TF_TYPE_INFERENCE_PASS_H__
+#define __MOCO_TF_TYPE_INFERENCE_PASS_H__
+
+#include "Transform.h"
+
+#include <loco.h>
+
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Run type inference to the graph
+ */
+class TypeInferencePass : public Transform
+{
+public:
+ const char *name(void) const final { return "TypeInferencePass"; }
+
+public:
+ bool run(loco::Graph *graph) override;
+};
+
+} // namespace tf
+} // namespace moco
+
+#endif // __MOCO_TF_TYPE_INFERENCE_PASS_H__
diff --git a/compiler/moco-value-pbtxt-test/.gitignore b/compiler/moco-value-pbtxt-test/.gitignore
new file mode 100644
index 000000000..8dbfa9012
--- /dev/null
+++ b/compiler/moco-value-pbtxt-test/.gitignore
@@ -0,0 +1 @@
+/test.local.lst
diff --git a/compiler/moco-value-pbtxt-test/CMakeLists.txt b/compiler/moco-value-pbtxt-test/CMakeLists.txt
new file mode 100644
index 000000000..a469c20dc
--- /dev/null
+++ b/compiler/moco-value-pbtxt-test/CMakeLists.txt
@@ -0,0 +1,136 @@
+option(MOCO_VALUE_PBTXT_TEST "Enable moco value test for pbtxt input model" ON)
+
+if(NOT MOCO_VALUE_PBTXT_TEST)
+ return()
+endif(NOT MOCO_VALUE_PBTXT_TEST)
+
+if(NOT TARGET tfkit)
+ message(STATUS "moco: Skip test material preparation as tfkit is not defined")
+ return()
+endif(NOT TARGET tfkit)
+
+nncc_find_resource(TensorFlowTests)
+
+#
+# Copy [Testcase]/test.pbtxt to Testcase.pbtxt in binary folder
+# Copy [Testcase]/test.info to Testcase.info in binary folder
+# Encode Testcase.pbtxt to Testcase.pb
+#
+set(TEST_PBTXT_FILE "test.pbtxt")
+set(TEST_INFO_FILE "test.info")
+set(TEST_REPO "${TensorFlowTests_DIR}") # Where to find tests
+set(TEST_SPACE "${CMAKE_CURRENT_BINARY_DIR}") # Where to run tests
+
+unset(TESTCASES)
+
+macro(add NAME)
+ list(APPEND TESTCASES ${NAME})
+endmacro(add)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+unset(MOCO_VALUE_PBTXT_DEPS)
+
+foreach(PREFIX IN ITEMS ${TESTCASES})
+ set(PBTXTFILE "${PREFIX}/${TEST_PBTXT_FILE}")
+ get_filename_component(DIR_NAME ${PBTXTFILE} DIRECTORY)
+
+ set(PBTXT_SOURCE_FILE "${DIR_NAME}.pbtxt")
+ set(PBTXT_SOURCE_PATH "${TEST_SPACE}/${DIR_NAME}.pbtxt")
+
+ set(PBTXT_INFO_FILE "${DIR_NAME}.info")
+ set(PBTXT_INFO_PATH "${TEST_SPACE}/${PBTXT_INFO_FILE}")
+
+ set(PB_OUTPUT_FILE "${DIR_NAME}.pb")
+ set(PB_PATH "${TEST_SPACE}/${PB_OUTPUT_FILE}")
+
+ # Copy files
+ add_custom_command(
+ OUTPUT ${PBTXT_SOURCE_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TEST_REPO}/${DIR_NAME}/${TEST_PBTXT_FILE}"
+ "${PBTXT_SOURCE_PATH}"
+ DEPENDS "${TEST_REPO}/${DIR_NAME}/${TEST_PBTXT_FILE}"
+ COMMENT "Copy ${PBTXT_SOURCE_FILE}"
+ )
+
+ add_custom_command(
+ OUTPUT ${PBTXT_INFO_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TEST_REPO}/${DIR_NAME}/${TEST_INFO_FILE}"
+ "${PBTXT_INFO_PATH}"
+ DEPENDS "${TEST_REPO}/${DIR_NAME}/${TEST_INFO_FILE}"
+ COMMENT "Copy ${PBTXT_INFO_FILE}"
+ )
+
+ # Use tfkit to encode
+ add_custom_command(
+ OUTPUT ${PB_OUTPUT_FILE}
+ COMMAND $<TARGET_FILE:tfkit> encode ${PBTXT_SOURCE_PATH} ${PB_OUTPUT_FILE}
+ DEPENDS tfkit ${PBTXT_SOURCE_PATH}
+ COMMENT "Generate ${PB_OUTPUT_FILE}"
+ )
+
+ list(APPEND MOCO_VALUE_PBTXT_TESTS ${DIR_NAME})
+ list(APPEND MOCO_VALUE_PBTXT_DEPS ${PBTXT_INFO_FILE} ${PB_OUTPUT_FILE})
+
+endforeach(PREFIX)
+
+nnas_find_package(TensorFlow QUIET)
+if(NOT TensorFlow_FOUND)
+ message(STATUS "moco: Skip adding test as TensorFlow is not found")
+ return()
+endif(NOT TensorFlow_FOUND)
+
+##
+## Copy runall.sh
+##
+set(TEST_RUNNER_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/runall.sh")
+set(TEST_RUNNER "${CMAKE_CURRENT_BINARY_DIR}/run-tests")
+
+add_custom_command(
+ OUTPUT ${TEST_RUNNER}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TEST_RUNNER_SOURCE}" "${TEST_RUNNER}"
+ DEPENDS ${TEST_RUNNER_SOURCE}
+ COMMENT "Generate test runner"
+)
+
+list(APPEND MOCO_VALUE_PBTXT_DEPS "${TEST_RUNNER}")
+
+###
+### Generate test.config
+###
+set(TOOLCHIAN_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/toolchain.config")
+
+add_custom_command(
+ OUTPUT ${TOOLCHIAN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TOOLCHIAN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'NNKIT_RUN_PATH=\"$<TARGET_FILE:nnkit-run>\"' >> ${TOOLCHIAN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TF_BACKEND_PATH=\"$<TARGET_FILE:nnkit_tf_backend>\"' >> ${TOOLCHIAN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'MOCO_TF_BACKEND_PATH=\"$<TARGET_FILE:nnkit_moco_tf_backend>\"' >> ${TOOLCHIAN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TF2TFLITE_PATH=\"$<TARGET_FILE:tf2tflite>\"' >> ${TOOLCHIAN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'RANDOMIZE_ACTION_PATH=\"$<TARGET_FILE:nnkit_randomize_action>\"' >> ${TOOLCHIAN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_EXPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_export_action>\"' >> ${TOOLCHIAN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_IMPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_import_action>\"' >> ${TOOLCHIAN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'I5DIFF_PATH=\"$<TARGET_FILE:i5diff>\"' >> ${TOOLCHIAN_CONFIG}
+ DEPENDS
+ nnkit-run
+ nnkit_tf_backend
+ nnkit_moco_tf_backend
+ tf2tflite
+ nnkit_randomize_action
+ nnkit_HDF5_export_action
+ nnkit_HDF5_import_action
+ i5diff
+ COMMENT "Generate test configuration"
+)
+
+list(APPEND MOCO_VALUE_PBTXT_DEPS "${TOOLCHIAN_CONFIG}")
+
+# This target enforces CMake to generate all the dependencies during "build" phase
+add_custom_target(moco_value_pbtxt_test_deps ALL DEPENDS ${MOCO_VALUE_PBTXT_DEPS})
+
+# Run tests
+add_test(NAME moco_value_pbtxt_test
+ COMMAND "${TEST_RUNNER}" "${TOOLCHIAN_CONFIG}" "${TEST_SPACE}" ${MOCO_VALUE_PBTXT_TESTS})
diff --git a/compiler/moco-value-pbtxt-test/README.md b/compiler/moco-value-pbtxt-test/README.md
new file mode 100644
index 000000000..f5d1ac225
--- /dev/null
+++ b/compiler/moco-value-pbtxt-test/README.md
@@ -0,0 +1 @@
+# moco-value-pbtxt-test
diff --git a/compiler/moco-value-pbtxt-test/requires.cmake b/compiler/moco-value-pbtxt-test/requires.cmake
new file mode 100644
index 000000000..771418fed
--- /dev/null
+++ b/compiler/moco-value-pbtxt-test/requires.cmake
@@ -0,0 +1,2 @@
+require("tfkit")
+require("nnkit")
diff --git a/compiler/moco-value-pbtxt-test/runall.sh b/compiler/moco-value-pbtxt-test/runall.sh
new file mode 100755
index 000000000..ee43f1ad6
--- /dev/null
+++ b/compiler/moco-value-pbtxt-test/runall.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+
+# Need at least 2 arguments
+if [[ $# -lt 2 ]]; then
+ echo "USAGE: $0 ..."
+ echo
+ echo "ARGUMENTS:"
+ echo " [test.config path]"
+ echo " [WORKDIR]"
+ echo " [Prefix1]"
+ echo " [Prefix2]"
+ echo " ..."
+ exit 255
+fi
+
+CONFIG_PATH="$1"; shift
+WORKDIR="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- starting moco test tf"
+echo "-- Found nnkit-run: ${NNKIT_RUN_PATH}"
+echo "-- Found TF backend: ${TF_BACKEND_PATH}"
+echo "-- Found moco TF backend: ${MOCO_TF_BACKEND_PATH}"
+echo "-- Found randomize action: ${RANDOMIZE_ACTION_PATH}"
+echo "-- Found HDF5 export action: ${HDF5_EXPORT_ACTION_PATH}"
+echo "-- Found HDF5 import action: ${HDF5_IMPORT_ACTION_PATH}"
+echo "-- Found i5diff: ${I5DIFF_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [[ $# -ne 0 ]]; do
+ PREFIX="$1"; shift
+
+ TESTED+=("${PREFIX}")
+
+ PASSED_TAG="${PREFIX}.passed"
+
+ rm -f "${PASSED_TAG}"
+
+ cat > "${PREFIX}.log" <(
+ exec 2>&1
+
+ echo "-- Found pb: ${PREFIX}.pb"
+
+ # Show commands
+ set -x
+ "${NNKIT_RUN_PATH}" \
+ --backend "${TF_BACKEND_PATH}" \
+ --backend-arg "${WORKDIR}/${PREFIX}.pb" \
+ --backend-arg "${WORKDIR}/${PREFIX}.info" \
+ --pre "${RANDOMIZE_ACTION_PATH}" \
+ --pre "${HDF5_EXPORT_ACTION_PATH}" \
+ --pre-arg "${WORKDIR}/${PREFIX}.input.h5" \
+ --post "${HDF5_EXPORT_ACTION_PATH}" \
+ --post-arg "${WORKDIR}/${PREFIX}.expected.h5"
+
+ "${NNKIT_RUN_PATH}" \
+ --backend "${MOCO_TF_BACKEND_PATH}" \
+ --backend-arg "${WORKDIR}/${PREFIX}.pb" \
+ --backend-arg "${WORKDIR}/${PREFIX}.info" \
+ --pre "${HDF5_IMPORT_ACTION_PATH}" \
+ --pre-arg "${WORKDIR}/${PREFIX}.input.h5" \
+ --post "${HDF5_EXPORT_ACTION_PATH}" \
+ --post-arg "${WORKDIR}/${PREFIX}.obtained.h5"
+
+ "${I5DIFF_PATH}" -d 0.001 "${PREFIX}.expected.h5" "${PREFIX}.obtained.h5"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("$PREFIX")
+ else
+ FAILED+=("$PREFIX")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/moco-value-pbtxt-test/test.lst b/compiler/moco-value-pbtxt-test/test.lst
new file mode 100644
index 000000000..c2d8e42ff
--- /dev/null
+++ b/compiler/moco-value-pbtxt-test/test.lst
@@ -0,0 +1,103 @@
+add(NET_0000)
+add(NET_0001)
+add(NET_0002)
+add(NET_0003)
+add(NET_0004)
+add(NET_0005)
+add(NET_0006)
+add(NET_0007)
+add(NET_0008)
+add(NET_0009)
+add(NET_0010)
+add(NET_0011)
+add(NET_0012)
+add(NET_0013)
+add(NET_0014)
+add(NET_0015)
+add(NET_0016)
+add(NET_0017)
+add(NET_0018)
+add(NET_0019)
+add(NET_0020)
+add(NET_0021)
+add(NET_0022)
+add(NET_0023)
+add(NET_0024)
+add(NET_0025)
+add(NET_0026)
+add(NET_0028)
+add(NET_0029)
+add(NET_0030)
+add(NET_0031)
+add(NET_0032)
+add(NET_0033)
+add(NET_0034)
+add(NET_0035)
+# add(NET_0036)
+add(NET_0037)
+add(NET_0038)
+add(NET_0039)
+add(NET_0040)
+add(NET_0041)
+add(REGRESSION_0000)
+add(REGRESSION_0001)
+add(REGRESSION_0002)
+add(UNIT_Add_000)
+add(UNIT_Add_001)
+add(UNIT_Add_002)
+add(UNIT_Add_004)
+add(UNIT_Add_005)
+add(UNIT_AvgPool_000)
+add(UNIT_AvgPool_001)
+add(UNIT_BiasAdd_000)
+add(UNIT_BiasAdd_001)
+add(UNIT_BiasAdd_002)
+add(UNIT_ConcatV2_000)
+add(UNIT_ConcatV2_001)
+add(UNIT_ConcatV2_002)
+add(UNIT_Const_000)
+add(UNIT_Conv2D_000)
+add(UNIT_Conv2DBackpropInput_000)
+add(UNIT_Conv2DBackpropInput_001)
+add(UNIT_DepthwiseConv2dNative_000)
+add(UNIT_DepthwiseConv2dNative_001)
+add(UNIT_FusedBatchNorm_000)
+add(UNIT_FusedBatchNorm_001)
+add(UNIT_Maximum_000)
+add(UNIT_Maximum_001)
+add(UNIT_Maximum_002)
+add(UNIT_MaxPool_000)
+add(UNIT_MaxPool_001)
+add(UNIT_Mean_000)
+add(UNIT_Mean_001)
+add(UNIT_Mean_002)
+add(UNIT_Mean_003)
+add(UNIT_Mul_000)
+add(UNIT_Mul_001)
+add(UNIT_Mul_002)
+add(UNIT_Pad_000)
+add(UNIT_Placeholder_000)
+add(UNIT_Placeholder_001)
+add(UNIT_Placeholder_002)
+add(UNIT_Placeholder_003)
+add(UNIT_RealDiv_000)
+add(UNIT_Relu_000)
+add(UNIT_Relu6_000)
+add(UNIT_Reshape_000)
+add(UNIT_Rsqrt_000)
+add(UNIT_Softmax_000)
+add(UNIT_Softmax_001)
+add(UNIT_Softmax_002)
+add(UNIT_Softmax_003)
+add(UNIT_Sqrt_000)
+add(UNIT_SquaredDifference_000)
+add(UNIT_SquaredDifference_001)
+add(UNIT_Squeeze_000)
+add(UNIT_Squeeze_001)
+add(UNIT_Squeeze_002)
+add(UNIT_Squeeze_003)
+add(UNIT_StopGradient_000)
+add(UNIT_StopGradient_001)
+add(UNIT_Sub_000)
+add(UNIT_Sub_001)
+add(UNIT_Tanh_000)
diff --git a/compiler/moco/CMakeLists.txt b/compiler/moco/CMakeLists.txt
new file mode 100644
index 000000000..9fdd4398e
--- /dev/null
+++ b/compiler/moco/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_subdirectory(lang)
+add_subdirectory(support)
+add_subdirectory(service)
+add_subdirectory(import)
+add_subdirectory(pass)
diff --git a/compiler/moco/README.md b/compiler/moco/README.md
new file mode 100644
index 000000000..13c7aaae3
--- /dev/null
+++ b/compiler/moco/README.md
@@ -0,0 +1,3 @@
+# moco
+
+_moco_ provides building blocks to load and process TensorFlow models and to produce graph of loco canonical IR
diff --git a/compiler/moco/import/CMakeLists.txt b/compiler/moco/import/CMakeLists.txt
new file mode 100644
index 000000000..43107776e
--- /dev/null
+++ b/compiler/moco/import/CMakeLists.txt
@@ -0,0 +1,26 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(moco_import SHARED ${SOURCES})
+target_include_directories(moco_import PRIVATE src)
+target_include_directories(moco_import PUBLIC include)
+target_link_libraries(moco_import PUBLIC moco_lang)
+target_link_libraries(moco_import PUBLIC mio_tf)
+target_link_libraries(moco_import PUBLIC stdex)
+target_link_libraries(moco_import PRIVATE nncc_common)
+target_link_libraries(moco_import PRIVATE plier_tf)
+target_link_libraries(moco_import PRIVATE oops)
+install(TARGETS moco_import DESTINATION lib) # moco_tf_frontend requires moco_import
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(moco_import_test ${TESTS})
+target_include_directories(moco_import_test PRIVATE src)
+target_link_libraries(moco_import_test moco_import)
+target_link_libraries(moco_import_test plier_tf)
+target_link_libraries(moco_import_test oops)
diff --git a/compiler/moco/import/README.md b/compiler/moco/import/README.md
new file mode 100644
index 000000000..2704d35d6
--- /dev/null
+++ b/compiler/moco/import/README.md
@@ -0,0 +1,3 @@
+# moco-import
+
+_moco-import_ provides importing TensorFlow model file to _moco_ TensorFlow Dialect IR
diff --git a/compiler/moco/import/include/moco/GraphHelper.h b/compiler/moco/import/include/moco/GraphHelper.h
new file mode 100644
index 000000000..fad62af4e
--- /dev/null
+++ b/compiler/moco/import/include/moco/GraphHelper.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_GRAPH_HELPER_H__
+#define __MOCO_GRAPH_HELPER_H__
+
+#include <moco/IR/TFNode.h>
+
+#include <loco.h>
+
+namespace moco
+{
+
+/**
+ * @brief find_node_byname() will return a node with type T with given name
+ * in graph g
+ *
+ * @note this uses simple linear search, but can speed up with better
+ * algorithms when needed.
+ */
+template <typename T> T *find_node_byname(loco::Graph *g, const char *name)
+{
+ T *first_node = nullptr;
+ loco::Graph::NodeContext *nodes = g->nodes();
+ uint32_t count = nodes->size();
+
+ for (uint32_t i = 0; i < count; ++i)
+ {
+ auto tfnode = dynamic_cast<TFNode *>(nodes->at(i));
+ if (tfnode != nullptr)
+ {
+ if (tfnode->name() == name)
+ {
+ // if tfnode is NOT type of T then return will be nullptr
+ // this is OK cause the user wanted to get type T but it isn't
+ return dynamic_cast<T *>(tfnode);
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+} // namespace moco
+
+#endif // __MOCO_GRAPH_HELPER_H__
diff --git a/compiler/moco/import/include/moco/Import/GraphBuilder.h b/compiler/moco/import/include/moco/Import/GraphBuilder.h
new file mode 100644
index 000000000..c19918def
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/GraphBuilder.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_GRAPH_BUILDER_H__
+#define __MOCO_IMPORT_GRAPH_BUILDER_H__
+
+#include "GraphBuilderContext.h"
+
+#include <tensorflow/core/framework/graph.pb.h>
+
+namespace moco
+{
+
+/**
+ * @brief Interface of convert TF NodeDef to loco::Node (e.g., Conv2DGraphBuilder)
+ */
+class GraphBuilder
+{
+public:
+ virtual bool validate(const tensorflow::NodeDef &) const = 0;
+ virtual void build(const tensorflow::NodeDef &, GraphBuilderContext *) const = 0;
+ virtual ~GraphBuilder() {}
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_GRAPH_BUILDER_H__
diff --git a/compiler/moco/import/include/moco/Import/GraphBuilderContext.h b/compiler/moco/import/include/moco/Import/GraphBuilderContext.h
new file mode 100644
index 000000000..ae4f02c2a
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/GraphBuilderContext.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_GRAPH_BUILDER_CONTEXT_H__
+#define __MOCO_IMPORT_GRAPH_BUILDER_CONTEXT_H__
+
+#include <moco/Names.h>
+
+#include <loco.h>
+
+#include <tensorflow/core/framework/graph.pb.h>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace moco
+{
+
+/**
+ * @brief Class to store and query tensorflow::NodeDef* with string name key
+ */
+class NodeDefTable
+{
+public:
+ /**
+ * @brief Registers a name with corresponding tensorflow::NodeDef*
+ */
+ void enroll(const std::string &node_name, const tensorflow::NodeDef *node);
+ /**
+ * @brief Queries enrolled(registered) with name and return node if found
+ * Will throw runtime_error if not found
+ */
+ const tensorflow::NodeDef *node(const std::string &node_name) const;
+
+private:
+ using MapNameNode_t = std::map<std::string, const tensorflow::NodeDef *>;
+
+ MapNameNode_t _table;
+};
+
+/**
+ * @brief Class to store and query loco::Node* with string name key
+ */
+class SymbolTable
+{
+public:
+ /**
+ * @brief Registers a name with corresponding loco::Node *
+ */
+ void enroll(const TensorName &tensor_name, loco::Node *node);
+ /**
+ * @brief Queries enrolled(registered) with name and return node if found
+ * Will throw runtime_error if not found
+ */
+ loco::Node *node(const TensorName &tensor_name) const;
+
+private:
+ using MapNameNode_t = std::map<TensorName, loco::Node *, TensorNameCompare>;
+
+ MapNameNode_t _table;
+};
+
+/**
+ * @brief Interface to connect the graph
+ */
+class GraphUpdate
+{
+public:
+ virtual ~GraphUpdate() = default;
+
+public:
+ /**
+ * @brief Do the graph input connections using the SymbolTable
+ */
+ virtual void input(const SymbolTable *) const = 0;
+};
+
+/**
+ * @brief Class to store GraphUpdate objects
+ */
+class UpdateQueue final
+{
+public:
+ /**
+ * @brief Registers GraphUpdate objects
+ */
+ void enroll(std::unique_ptr<GraphUpdate> &&update);
+
+public:
+ using Queue = std::vector<std::unique_ptr<GraphUpdate>>;
+
+ const Queue &queue() const { return _queue; }
+
+private:
+ Queue _queue;
+};
+
+/**
+ * @brief Class to store context to build loco graph IR from TensorFlow
+ */
+class GraphBuilderContext
+{
+public:
+ GraphBuilderContext(loco::Graph *g, NodeDefTable *nodedef, SymbolTable *tensor_names,
+ UpdateQueue *updates)
+ : _g(g), _nodedef(nodedef), _tensor_names(tensor_names), _updates(updates)
+ {
+ // DO NOTHING
+ }
+
+ GraphBuilderContext(const GraphBuilderContext &) = delete;
+ GraphBuilderContext(GraphBuilderContext &&) = delete;
+
+public:
+ loco::Graph *graph() { return _g; }
+ NodeDefTable *nodedef() { return _nodedef; }
+ SymbolTable *tensor_names() { return _tensor_names; }
+ UpdateQueue *updates() { return _updates; }
+
+private:
+ loco::Graph *_g;
+ NodeDefTable *_nodedef;
+ SymbolTable *_tensor_names;
+ UpdateQueue *_updates;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_GRAPH_BUILDER_CONTEXT_H__
diff --git a/compiler/moco/import/include/moco/Import/GraphBuilderRegistry.h b/compiler/moco/import/include/moco/Import/GraphBuilderRegistry.h
new file mode 100644
index 000000000..da65cffb8
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/GraphBuilderRegistry.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_GRAPH_BUILDER_REGISTRY_H__
+#define __MOCO_IMPORT_GRAPH_BUILDER_REGISTRY_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+#include <map>
+#include <memory>
+#include <string>
+
+namespace moco
+{
+
+struct GraphBuilderSource
+{
+ virtual ~GraphBuilderSource() = default;
+
+ /**
+ * @brief Returns registered GraphBuilder pointer for operator (nullptr if not present)
+ */
+ virtual const GraphBuilder *lookup(const std::string &op) const = 0;
+};
+
+/**
+ * @brief Class to return graph builder for TF nodes
+ */
+class GraphBuilderRegistry final : public GraphBuilderSource
+{
+public:
+ GraphBuilderRegistry();
+
+public:
+ GraphBuilderRegistry(const GraphBuilderSource *parent) : _parent{parent}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Returns registered GraphBuilder pointer for operator or
+ * nullptr if not registered
+ */
+ const GraphBuilder *lookup(const std::string &op) const final
+ {
+ if (_builder_map.find(op) == _builder_map.end())
+ return (_parent == nullptr) ? nullptr : _parent->lookup(op);
+
+ return _builder_map.at(op).get();
+ }
+
+ static GraphBuilderRegistry &get()
+ {
+ static GraphBuilderRegistry me;
+ return me;
+ }
+
+public:
+ void add(const std::string op, std::unique_ptr<GraphBuilder> &&builder)
+ {
+ _builder_map[op] = std::move(builder);
+ }
+
+private:
+ const GraphBuilderSource *_parent = nullptr;
+
+private:
+ std::map<const std::string, std::unique_ptr<GraphBuilder>> _builder_map;
+};
+
+} // namespace mono
+
+#endif // __MOCO_IMPORT_GRAPH_BUILDER_REGISTRY_H__
diff --git a/compiler/moco/import/include/moco/Import/ModelSignature.h b/compiler/moco/import/include/moco/Import/ModelSignature.h
new file mode 100644
index 000000000..0db7c2795
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/ModelSignature.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_MODELSIGNATURE_H__
+#define __MOCO_IMPORT_MODELSIGNATURE_H__
+
+#include <moco/Names.h>
+
+#include <loco.h>
+#include <angkor/TensorShape.h>
+
+#include <string>
+#include <vector>
+
+namespace moco
+{
+
+/**
+ * @brief Class to store information to run a model. Normally this info comes from users
+ * via CLI params or configuration file.
+ */
+struct ModelSignature
+{
+public:
+ void add_input(const TensorName &input) { _inputs.push_back(input); }
+ void add_input(const TensorName &&input) { _inputs.push_back(input); }
+ void add_output(const TensorName &output) { _outputs.push_back(output); }
+ void add_output(const TensorName &&output) { _outputs.push_back(output); }
+
+ const std::vector<TensorName> &inputs() const { return _inputs; }
+ const std::vector<TensorName> &outputs() const { return _outputs; }
+
+ /**
+ * @brief Adds customop op type (not name of node) provided from user
+ */
+ void add_customop(const std::string &op);
+ const std::vector<std::string> &customops() const { return _customops; }
+
+ /**
+ * @brief Adds node name and its shape provided from user
+ */
+ void shape(const std::string &node_name, const angkor::TensorShape &shape);
+ const angkor::TensorShape *shape(const std::string &node_name) const;
+
+ /**
+ * @brief Adds node name and its dtype provided from user
+ */
+ void dtype(const std::string &node_name, loco::DataType dtype);
+ loco::DataType dtype(const std::string &node_name) const;
+
+private:
+ std::vector<TensorName> _inputs; // graph inputs
+ std::vector<TensorName> _outputs; // graph outputs
+
+ // For custom op types passed from user (e.g., via CLI)
+ std::vector<std::string> _customops;
+
+ // For and node names and shapes passed from user (e.g., via CLI)
+ std::map<std::string, angkor::TensorShape> _shapes;
+
+ // For and node names and dtype passed from user (e.g., via CLI)
+ std::map<std::string, loco::DataType> _dtypes;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_MODELSIGNATURE_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes.h b/compiler/moco/import/include/moco/Import/Nodes.h
new file mode 100644
index 000000000..8c940a28c
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_NODES_H__
+#define __MOCO_IMPORT_NODES_H__
+
+#include "Nodes/Add.h"
+#include "Nodes/AvgPool.h"
+#include "Nodes/BiasAdd.h"
+#include "Nodes/Concat.h"
+#include "Nodes/Const.h"
+#include "Nodes/Conv2DBackpropInput.h"
+#include "Nodes/Conv2D.h"
+#include "Nodes/DepthwiseConv2dNative.h"
+#include "Nodes/FakeQuantWithMinMaxVars.h"
+#include "Nodes/FusedBatchNorm.h"
+#include "Nodes/Identity.h"
+#include "Nodes/Maximum.h"
+#include "Nodes/MaxPool.h"
+#include "Nodes/Mean.h"
+#include "Nodes/Mul.h"
+#include "Nodes/Pack.h"
+#include "Nodes/Pad.h"
+#include "Nodes/Placeholder.h"
+#include "Nodes/RealDiv.h"
+#include "Nodes/Relu6.h"
+#include "Nodes/Relu.h"
+#include "Nodes/Reshape.h"
+#include "Nodes/Rsqrt.h"
+#include "Nodes/Shape.h"
+#include "Nodes/Softmax.h"
+#include "Nodes/Sqrt.h"
+#include "Nodes/SquaredDifference.h"
+#include "Nodes/Squeeze.h"
+#include "Nodes/StopGradient.h"
+#include "Nodes/StridedSlice.h"
+#include "Nodes/Sub.h"
+#include "Nodes/Tanh.h"
+
+#endif // __MOCO_IMPORT_NODES_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Add.h b/compiler/moco/import/include/moco/Import/Nodes/Add.h
new file mode 100644
index 000000000..3d0d0f30f
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Add.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_ADD_H__
+#define __MOCO_IMPORT_OP_ADD_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for Add node
+ */
+class AddGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_ADD_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/AvgPool.h b/compiler/moco/import/include/moco/Import/Nodes/AvgPool.h
new file mode 100644
index 000000000..4c8087afe
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/AvgPool.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_AVG_POOL_H__
+#define __MOCO_IMPORT_OP_AVG_POOL_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+class AvgPoolGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const final;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const final;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_AVG_POOL_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/BiasAdd.h b/compiler/moco/import/include/moco/Import/Nodes/BiasAdd.h
new file mode 100644
index 000000000..214df03de
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/BiasAdd.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_BIAS_ADD_H__
+#define __MOCO_IMPORT_OP_BIAS_ADD_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+class BiasAddGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const final;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const final;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_BIAS_ADD_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Concat.h b/compiler/moco/import/include/moco/Import/Nodes/Concat.h
new file mode 100644
index 000000000..2341fb00c
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Concat.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_CONCAT_H__
+#define __MOCO_IMPORT_OP_CONCAT_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+class ConcatV2GraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const final;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const final;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_CONCAT_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Const.h b/compiler/moco/import/include/moco/Import/Nodes/Const.h
new file mode 100644
index 000000000..1ce378219
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Const.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_CONST_H__
+#define __MOCO_IMPORT_OP_CONST_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+class ConstGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const final;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const final;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_CONST_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Conv2D.h b/compiler/moco/import/include/moco/Import/Nodes/Conv2D.h
new file mode 100644
index 000000000..3bd3dc74a
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Conv2D.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_CONV_2D_H__
+#define __MOCO_IMPORT_OP_CONV_2D_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+class Conv2DGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const final;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const final;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_CONV_2D_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Conv2DBackpropInput.h b/compiler/moco/import/include/moco/Import/Nodes/Conv2DBackpropInput.h
new file mode 100644
index 000000000..262a443fe
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Conv2DBackpropInput.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_CONV_2D_BACKPROP_INPUT_H__
+#define __MOCO_IMPORT_OP_CONV_2D_BACKPROP_INPUT_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for Conv2DBackpropInput node
+ */
+class Conv2DBackpropInputGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_CONV_2D_BACKPROP_INPUT_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/DepthwiseConv2dNative.h b/compiler/moco/import/include/moco/Import/Nodes/DepthwiseConv2dNative.h
new file mode 100644
index 000000000..1dcbba1eb
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/DepthwiseConv2dNative.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_DEPTHWISE_CONV_2D_NATIVE_H__
+#define __MOCO_IMPORT_OP_DEPTHWISE_CONV_2D_NATIVE_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for DepthwiseConv2dNative node
+ */
+class DepthwiseConv2dNativeGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_DEPTHWISE_CONV_2D_NATIVE_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/FakeQuantWithMinMaxVars.h b/compiler/moco/import/include/moco/Import/Nodes/FakeQuantWithMinMaxVars.h
new file mode 100644
index 000000000..9e223c18e
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/FakeQuantWithMinMaxVars.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_FAKE_QUANT_WITH_MINMAX_VARS_H__
+#define __MOCO_IMPORT_OP_FAKE_QUANT_WITH_MINMAX_VARS_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for FakeQuantWithMinMaxVars node
+ */
+class FakeQuantWithMinMaxVarsGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_FAKE_QUANT_WITH_MINMAX_VARS_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/FusedBatchNorm.h b/compiler/moco/import/include/moco/Import/Nodes/FusedBatchNorm.h
new file mode 100644
index 000000000..38d1d5682
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/FusedBatchNorm.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_FUSED_BATCH_NORM_H__
+#define __MOCO_IMPORT_OP_FUSED_BATCH_NORM_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for FusedBatchNorm node
+ */
+class FusedBatchNormGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_FUSED_BATCH_NORM_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Identity.h b/compiler/moco/import/include/moco/Import/Nodes/Identity.h
new file mode 100644
index 000000000..29e04800f
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Identity.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_IDENTITY_H__
+#define __MOCO_IMPORT_OP_IDENTITY_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+class IdentityGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const final;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const final;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_IDENTITY_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/MaxPool.h b/compiler/moco/import/include/moco/Import/Nodes/MaxPool.h
new file mode 100644
index 000000000..696fa71e6
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/MaxPool.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_MAX_POOL_H__
+#define __MOCO_IMPORT_OP_MAX_POOL_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+class MaxPoolGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const final;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const final;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_MAX_POOL_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Maximum.h b/compiler/moco/import/include/moco/Import/Nodes/Maximum.h
new file mode 100644
index 000000000..69d897742
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Maximum.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_MAXIMUM_H__
+#define __MOCO_IMPORT_OP_MAXIMUM_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for Maximum node
+ */
+class MaximumGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_MAXIMUM_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Mean.h b/compiler/moco/import/include/moco/Import/Nodes/Mean.h
new file mode 100644
index 000000000..7bae1bb39
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Mean.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_MEAN_H__
+#define __MOCO_IMPORT_OP_MEAN_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for Mean node
+ */
+class MeanGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_MEAN_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Mul.h b/compiler/moco/import/include/moco/Import/Nodes/Mul.h
new file mode 100644
index 000000000..667c81954
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Mul.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_MUL_H__
+#define __MOCO_IMPORT_OP_MUL_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for Mul node
+ */
+class MulGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_MUL_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Pack.h b/compiler/moco/import/include/moco/Import/Nodes/Pack.h
new file mode 100644
index 000000000..94666ad51
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Pack.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_PACK_H__
+#define __MOCO_IMPORT_OP_PACK_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+class PackGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const final;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const final;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_PACK_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Pad.h b/compiler/moco/import/include/moco/Import/Nodes/Pad.h
new file mode 100644
index 000000000..22eab32ac
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Pad.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_PAD_H__
+#define __MOCO_IMPORT_OP_PAD_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for Pad node
+ */
+class PadGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_PAD_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Placeholder.h b/compiler/moco/import/include/moco/Import/Nodes/Placeholder.h
new file mode 100644
index 000000000..458600915
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Placeholder.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_PLACEHOLDER_H__
+#define __MOCO_IMPORT_OP_PLACEHOLDER_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for Placeholder node
+ */
+class PlaceholderGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_PLACEHOLDER_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/RealDiv.h b/compiler/moco/import/include/moco/Import/Nodes/RealDiv.h
new file mode 100644
index 000000000..142e8b5f8
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/RealDiv.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_REALDIV_H__
+#define __MOCO_IMPORT_OP_REALDIV_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for RealDiv node
+ */
+class RealDivGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_REALDIV_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Relu.h b/compiler/moco/import/include/moco/Import/Nodes/Relu.h
new file mode 100644
index 000000000..0bd9cff04
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Relu.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_RELU_H__
+#define __MOCO_IMPORT_OP_RELU_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for Relu node
+ */
+class ReluGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const final;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const final;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_RELU_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Relu6.h b/compiler/moco/import/include/moco/Import/Nodes/Relu6.h
new file mode 100644
index 000000000..d211b0543
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Relu6.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_RELU6_H__
+#define __MOCO_IMPORT_OP_RELU6_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for Relu6 node
+ */
+class Relu6GraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const final;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const final;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_RELU6_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Reshape.h b/compiler/moco/import/include/moco/Import/Nodes/Reshape.h
new file mode 100644
index 000000000..e8bfeee23
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Reshape.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_RESHAPE_H__
+#define __MOCO_IMPORT_OP_RESHAPE_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for Reshape node
+ */
+class ReshapeGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_RESHAPE_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Rsqrt.h b/compiler/moco/import/include/moco/Import/Nodes/Rsqrt.h
new file mode 100644
index 000000000..dedc52323
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Rsqrt.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_RSQRT_H__
+#define __MOCO_IMPORT_OP_RSQRT_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for Rsqrt node
+ */
+class RsqrtGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_RSQRT_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Shape.h b/compiler/moco/import/include/moco/Import/Nodes/Shape.h
new file mode 100644
index 000000000..e36e1d546
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Shape.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_SHAPE_H__
+#define __MOCO_IMPORT_OP_SHAPE_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for Shape node
+ */
+class ShapeGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_SHAPE_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Softmax.h b/compiler/moco/import/include/moco/Import/Nodes/Softmax.h
new file mode 100644
index 000000000..43fbb8852
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Softmax.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_SOFTMAX_H__
+#define __MOCO_IMPORT_OP_SOFTMAX_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+* @brief GraphBuilder for Softmax node
+*/
+class SoftmaxGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_SOFTMAX_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Sqrt.h b/compiler/moco/import/include/moco/Import/Nodes/Sqrt.h
new file mode 100644
index 000000000..d17dc3494
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Sqrt.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_SQRT_H__
+#define __MOCO_IMPORT_OP_SQRT_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for Sqrt node
+ */
+class SqrtGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_SQRT_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/SquaredDifference.h b/compiler/moco/import/include/moco/Import/Nodes/SquaredDifference.h
new file mode 100644
index 000000000..501464d65
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/SquaredDifference.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_SQUARED_DIFFERENCE_H__
+#define __MOCO_IMPORT_OP_SQUARED_DIFFERENCE_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for SquaredDifference node
+ */
+class SquaredDifferenceGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_SQUARED_DIFFERENCE_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Squeeze.h b/compiler/moco/import/include/moco/Import/Nodes/Squeeze.h
new file mode 100644
index 000000000..64ead074b
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Squeeze.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_SQUEEZE_H__
+#define __MOCO_IMPORT_OP_SQUEEZE_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for Squeeze node
+ */
+class SqueezeGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_SQUEEZE_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/StopGradient.h b/compiler/moco/import/include/moco/Import/Nodes/StopGradient.h
new file mode 100644
index 000000000..e547a8a8b
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/StopGradient.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_STOP_GRADIENT_H__
+#define __MOCO_IMPORT_OP_STOP_GRADIENT_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for StopGradient node
+ */
+class StopGradientGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_STOP_GRADIENT_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/StridedSlice.h b/compiler/moco/import/include/moco/Import/Nodes/StridedSlice.h
new file mode 100644
index 000000000..61170ebbf
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/StridedSlice.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_STRIDEDSLICE_H__
+#define __MOCO_IMPORT_OP_STRIDEDSLICE_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+class StridedSliceGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const final;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const final;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_STRIDEDSLICE_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Sub.h b/compiler/moco/import/include/moco/Import/Nodes/Sub.h
new file mode 100644
index 000000000..d6351e34a
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Sub.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_SUB_H__
+#define __MOCO_IMPORT_OP_SUB_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for Sub node
+ */
+class SubGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_SUB_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Tanh.h b/compiler/moco/import/include/moco/Import/Nodes/Tanh.h
new file mode 100644
index 000000000..183e117ef
--- /dev/null
+++ b/compiler/moco/import/include/moco/Import/Nodes/Tanh.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORT_OP_TANH_H__
+#define __MOCO_IMPORT_OP_TANH_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+namespace moco
+{
+
+/**
+ * @brief GraphBuilder for Tanh node
+ */
+class TanhGraphBuilder final : public GraphBuilder
+{
+public:
+ bool validate(const tensorflow::NodeDef &) const override;
+ void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORT_OP_TANH_H__
diff --git a/compiler/moco/import/include/moco/Importer.h b/compiler/moco/import/include/moco/Importer.h
new file mode 100644
index 000000000..ee0660c52
--- /dev/null
+++ b/compiler/moco/import/include/moco/Importer.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IMPORTER_H__
+#define __MOCO_IMPORTER_H__
+
+#include "moco/Import/ModelSignature.h"
+#include "moco/Import/GraphBuilderRegistry.h"
+
+#include <moco/Names.h>
+
+#include <loco.h>
+
+#include <tensorflow/core/framework/graph.pb.h>
+
+#include <memory>
+
+namespace moco
+{
+
+class Importer final
+{
+public:
+ Importer();
+
+public:
+ explicit Importer(const GraphBuilderSource *source) : _source{source}
+ {
+ // DO NOTHING
+ }
+
+public:
+ std::unique_ptr<loco::Graph> import(const ModelSignature &, tensorflow::GraphDef &) const;
+
+private:
+ const GraphBuilderSource *_source = nullptr;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IMPORTER_H__
diff --git a/compiler/moco/import/src/Convert.cpp b/compiler/moco/import/src/Convert.cpp
new file mode 100644
index 000000000..6285f5eab
--- /dev/null
+++ b/compiler/moco/import/src/Convert.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Convert.h"
+
+#include <algorithm>
+#include <cctype>
+
+// TODO move to some common file
+namespace moco
+{
+
+std::string str_toupper(std::string s)
+{
+ // from https://en.cppreference.com/w/cpp/string/byte/toupper
+ std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { return std::toupper(c); });
+ return s;
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Convert.h b/compiler/moco/import/src/Convert.h
new file mode 100644
index 000000000..77dab3700
--- /dev/null
+++ b/compiler/moco/import/src/Convert.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERT_H__
+#define __CONVERT_H__
+
+#include <string>
+
+// TODO move to some common file
+namespace moco
+{
+
+std::string str_toupper(std::string s);
+
+} // namespace moco
+
+#endif // __CONVERT_H__
diff --git a/compiler/moco/import/src/GraphBuilderContext.cpp b/compiler/moco/import/src/GraphBuilderContext.cpp
new file mode 100644
index 000000000..bbc1d8bd0
--- /dev/null
+++ b/compiler/moco/import/src/GraphBuilderContext.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/GraphBuilderContext.h"
+
+#include <oops/UserExn.h>
+
+#include <stdexcept>
+#include <string>
+
+namespace moco
+{
+
+void NodeDefTable::enroll(const std::string &node_name, const tensorflow::NodeDef *node)
+{
+ MapNameNode_t::iterator iter = _table.find(node_name);
+
+ if (iter != _table.end())
+ {
+ throw oops::UserExn("Duplicate node name in GraphDef", node_name);
+ }
+
+ _table[node_name] = node;
+}
+
+const tensorflow::NodeDef *NodeDefTable::node(const std::string &node_name) const
+{
+ MapNameNode_t::const_iterator iter = _table.find(node_name);
+
+ if (iter == _table.end())
+ {
+ throw oops::UserExn("Cannot find node with name in GraphDef", node_name);
+ }
+
+ return iter->second;
+}
+
+void SymbolTable::enroll(const TensorName &tensor_name, loco::Node *node)
+{
+ MapNameNode_t::iterator iter = _table.find(tensor_name);
+
+ if (iter != _table.end())
+ {
+ throw oops::UserExn("Duplicate node name in GraphDef", tensor_name.name());
+ }
+
+ _table[tensor_name] = node;
+}
+
+loco::Node *SymbolTable::node(const TensorName &tensor_name) const
+{
+ MapNameNode_t::const_iterator iter = _table.find(tensor_name);
+
+ if (iter == _table.end())
+ {
+ throw oops::UserExn("Cannot find node with name in GraphDef", tensor_name.name());
+ }
+
+ return iter->second;
+}
+
+void UpdateQueue::enroll(std::unique_ptr<GraphUpdate> &&update)
+{
+ _queue.push_back(std::move(update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/GraphBuilderContext.test.cpp b/compiler/moco/import/src/GraphBuilderContext.test.cpp
new file mode 100644
index 000000000..51f6db245
--- /dev/null
+++ b/compiler/moco/import/src/GraphBuilderContext.test.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/GraphBuilderContext.h"
+#include <moco/Names.h>
+
+#include <loco.h>
+
+#include <oops/UserExn.h>
+
+#include <gtest/gtest.h>
+
+TEST(GraphBuilderContext, ctor)
+{
+ auto graph = loco::make_graph();
+ moco::NodeDefTable nodedef;
+ moco::SymbolTable nodes;
+ moco::UpdateQueue updates;
+
+ moco::GraphBuilderContext context(graph.get(), &nodedef, &nodes, &updates);
+
+ ASSERT_EQ(context.graph(), graph.get());
+ ASSERT_EQ(context.nodedef(), &nodedef);
+ ASSERT_EQ(context.tensor_names(), &nodes);
+ ASSERT_EQ(context.updates(), &updates);
+}
+
+TEST(SymbolTable, node_name)
+{
+ moco::SymbolTable table;
+ loco::Pull pull_node;
+ moco::TensorName name("input", 0);
+ moco::TensorName invalid("invalid", 0);
+
+ table.enroll(name, &pull_node);
+ ASSERT_EQ(table.node(name), &pull_node);
+ // duplicate name should throw
+ EXPECT_THROW(table.enroll(name, &pull_node), oops::UserExn);
+ // unregistered name should throw
+ EXPECT_THROW(table.node(invalid), oops::UserExn);
+}
+
+namespace
+{
+
+class TestGraphUpdate final : public moco::GraphUpdate
+{
+public:
+ void input(const moco::SymbolTable *) const override;
+};
+
+void TestGraphUpdate::input(const moco::SymbolTable *) const {}
+
+} // namespace
+
+TEST(GraphUpdateQueue, queue)
+{
+ std::unique_ptr<TestGraphUpdate> update(new TestGraphUpdate());
+ moco::UpdateQueue updates;
+
+ updates.enroll(std::move(update));
+ auto &queue = updates.queue();
+ ASSERT_EQ(queue.size(), 1);
+}
diff --git a/compiler/moco/import/src/GraphBuilderRegistry.cpp b/compiler/moco/import/src/GraphBuilderRegistry.cpp
new file mode 100644
index 000000000..3a028513f
--- /dev/null
+++ b/compiler/moco/import/src/GraphBuilderRegistry.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/GraphBuilderRegistry.h"
+#include "moco/Import/Nodes.h"
+
+#include <stdex/Memory.h>
+
+namespace moco
+{
+
+GraphBuilderRegistry::GraphBuilderRegistry()
+{
+ add("Add", stdex::make_unique<AddGraphBuilder>());
+ add("AvgPool", stdex::make_unique<AvgPoolGraphBuilder>());
+ add("BiasAdd", stdex::make_unique<BiasAddGraphBuilder>());
+ add("ConcatV2", stdex::make_unique<ConcatV2GraphBuilder>());
+ add("Const", stdex::make_unique<ConstGraphBuilder>());
+ add("Conv2D", stdex::make_unique<Conv2DGraphBuilder>());
+ add("Conv2DBackpropInput", stdex::make_unique<Conv2DBackpropInputGraphBuilder>());
+ add("DepthwiseConv2dNative", stdex::make_unique<DepthwiseConv2dNativeGraphBuilder>());
+ add("FakeQuantWithMinMaxVars", stdex::make_unique<FakeQuantWithMinMaxVarsGraphBuilder>());
+ add("FusedBatchNorm", stdex::make_unique<FusedBatchNormGraphBuilder>());
+ add("Identity", stdex::make_unique<IdentityGraphBuilder>());
+ add("Maximum", stdex::make_unique<MaximumGraphBuilder>());
+ add("MaxPool", stdex::make_unique<MaxPoolGraphBuilder>());
+ add("Mean", stdex::make_unique<MeanGraphBuilder>());
+ add("Mul", stdex::make_unique<MulGraphBuilder>());
+ add("Pack", stdex::make_unique<PackGraphBuilder>());
+ add("Pad", stdex::make_unique<PadGraphBuilder>());
+ add("Placeholder", stdex::make_unique<PlaceholderGraphBuilder>());
+ add("RealDiv", stdex::make_unique<RealDivGraphBuilder>());
+ add("Relu", stdex::make_unique<ReluGraphBuilder>());
+ add("Relu6", stdex::make_unique<Relu6GraphBuilder>());
+ add("Reshape", stdex::make_unique<ReshapeGraphBuilder>());
+ add("Rsqrt", stdex::make_unique<RsqrtGraphBuilder>());
+ add("Shape", stdex::make_unique<ShapeGraphBuilder>());
+ add("Softmax", stdex::make_unique<SoftmaxGraphBuilder>());
+ add("Sqrt", stdex::make_unique<SqrtGraphBuilder>());
+ add("SquaredDifference", stdex::make_unique<SquaredDifferenceGraphBuilder>());
+ add("Squeeze", stdex::make_unique<SqueezeGraphBuilder>());
+ add("StopGradient", stdex::make_unique<StopGradientGraphBuilder>());
+ add("StridedSlice", stdex::make_unique<StridedSliceGraphBuilder>());
+ add("Sub", stdex::make_unique<SubGraphBuilder>());
+ add("Tanh", stdex::make_unique<TanhGraphBuilder>());
+
+ // Virtual node like `TFPush` need not to be added here
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Importer.cpp b/compiler/moco/import/src/Importer.cpp
new file mode 100644
index 000000000..8d3ca6cfc
--- /dev/null
+++ b/compiler/moco/import/src/Importer.cpp
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Importer.h"
+#include "moco/Import/GraphBuilder.h"
+#include "moco/Import/GraphBuilderContext.h"
+
+#include "moco/Import/GraphBuilderRegistry.h"
+
+#include <moco/IR/Nodes/TFPlaceholder.h>
+#include <moco/IR/TFNode.h>
+
+#include <stdex/Memory.h>
+#include <oops/UserExn.h>
+
+#include <cassert>
+#include <sstream>
+#include <stdexcept>
+
+namespace
+{
+
+void convert_graph(const moco::GraphBuilderSource &source, const moco::ModelSignature &signature,
+ tensorflow::GraphDef &tf_graph_def, loco::Graph *graph)
+{
+ auto nodedef = stdex::make_unique<moco::NodeDefTable>();
+ auto tensor_names = stdex::make_unique<moco::SymbolTable>();
+ auto updates = stdex::make_unique<moco::UpdateQueue>();
+
+ moco::GraphBuilderContext gb_context(graph, nodedef.get(), tensor_names.get(), updates.get());
+
+ // Building a loco graph
+ // 1. Convert all the nodes to moco::TFNode
+ // 2. Connect inputs: set all node input(from a string) to actual node object
+ // 3. Set graph input
+ // 4. Create moco::TFPush node and set graph output
+
+ /**
+ * @brief Prepare tensorflow::NodeDef search table from name
+ */
+ for (const auto &n : tf_graph_def.node())
+ {
+ nodedef->enroll(n.name(), &n);
+ }
+
+ /**
+ * @brief 1. Convert all the nodes to moco::TFNode
+ *
+ * @note In each build for a TF node, four things happen
+ * 1) create corresponding moco::TFNode(s)
+ * 2) read and set the attributes to created moco::TFNode(s)
+ * 3) register name-moco::TFNode(last one of Nodes) that will be used as the output
+ * 4) queue a task to set the input of the moco::TFNode(first one of the Nodes)
+ * this is done only for required nodes depending on the operator
+ *
+ * @example Placeholder("in") - Identity("out")
+ * %1 = Placeholder --> 0x1001 (moco::TFNode* object address)
+ * (symboltable: register %1, after the registeration table will contain as below;
+ * "in" : 0x1001
+ * )
+ * (queue: this will be empty as Pull does not queue a task to set input;
+ * )
+ *
+ * %2 = Forward --> 0x1002
+ * (symboltable: register %2 and table will look like below;
+ * "in" : 0x1001
+ * "out" : 0x1002
+ * )
+ * (queue: Forward will queue a task with input "in";
+ * 0x1002: {"in"}
+ * )
+ */
+ for (const auto &n : tf_graph_def.node())
+ {
+ if (const auto *graph_builder = source.lookup(n.op()))
+ {
+ if (!graph_builder->validate(n))
+ {
+ throw oops::UserExn("Invalid operator", n.op());
+ }
+
+ graph_builder->build(n, &gb_context);
+ }
+ else
+ {
+ throw oops::UserExn("Not supported", n.op());
+ }
+ }
+
+ /**
+ * @brief 2. Connect inputs: Iterate updates and call each update input method
+ *
+ * @note Continue from above example graph, connecting inputs is done in following steps
+ * a) iterate queue
+ * b) call the input method for each update
+ * c) each update has the moco::TFNode *node and names of the input to connect
+ * node = 0x1002 and names = {"in"}
+ * d) from symbol table, "in" will return 0x1001
+ * e) set input of 0x1002 with 0x1001
+ */
+ for (auto &update : updates->queue())
+ {
+ update->input(tensor_names.get());
+ }
+
+ /**
+ * @brief 3. Set graph input
+ */
+ for (auto input : signature.inputs())
+ {
+ auto node = tensor_names->node(input);
+ assert(node != nullptr);
+
+ auto graph_input = graph->inputs()->create();
+
+ auto placeholder_node = dynamic_cast<moco::TFPlaceholder *>(node);
+ assert(placeholder_node != nullptr);
+
+ graph_input->name(input.nodeName());
+
+ // annotate index that should be passed to loco::Pull
+ moco::index(placeholder_node, graph_input->index());
+
+ // This implementation works as "PlaceholderGraphBuilder in Nodes/Placeholder.cpp"
+ // accepts only TF_FLOAT32 as of now.
+ //
+ // TODO Support other types
+ graph_input->dtype(loco::DataType::FLOAT32);
+ }
+
+ /**
+ * @brief 4. Create moco::TFPush node and set graph output
+ */
+ for (auto output : signature.outputs())
+ {
+ auto output_node = tensor_names->node(output);
+ assert(output_node);
+
+ // create moco::TFPush for output of graph
+ auto push_node = graph->nodes()->create<moco::TFPush>();
+ push_node->from(output_node); // set input of TFPush to output node
+
+ // set the graph output name and node object
+ auto graph_output = graph->outputs()->create();
+ graph_output->name(output.nodeName());
+ push_node->index(graph_output->index());
+
+ // TODO Support other types
+ graph_output->dtype(loco::DataType::FLOAT32);
+ }
+
+ // validate graph
+ assert(loco::valid(graph));
+}
+
+} // namespace
+
+namespace moco
+{
+
+Importer::Importer()
+{
+ // DO NOTHING
+}
+
+std::unique_ptr<loco::Graph> Importer::import(const ModelSignature &signature,
+ tensorflow::GraphDef &tf_graph_def) const
+{
+ auto graph = loco::make_graph();
+
+ const GraphBuilderSource *source_ptr = &moco::GraphBuilderRegistry::get();
+
+ if (_source != nullptr)
+ {
+ // Use user-defined GraphBuilderSource
+ source_ptr = _source;
+ }
+
+ convert_graph(*source_ptr, signature, tf_graph_def, graph.get());
+
+ return std::move(graph);
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Importer.test.cpp b/compiler/moco/import/src/Importer.test.cpp
new file mode 100644
index 000000000..23873390c
--- /dev/null
+++ b/compiler/moco/import/src/Importer.test.cpp
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Importer.h"
+#include "moco/GraphHelper.h"
+
+#include <moco/IR/Nodes/TFIdentity.h>
+
+#include "TestHelper.h"
+#include <loco.h>
+#include <plier/tf/TestHelper.h>
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+TEST(TensorFlowImport, Dummy) { moco::Importer import; }
+
+namespace
+{
+
+// clang-format off
+const char *basic_pbtxtdata = STRING_CONTENT(
+node {
+ name: "Placeholder"
+ op: "Placeholder"
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 1
+ }
+ dim {
+ size: 2
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+}
+node {
+ name: "output/identity"
+ op: "Identity"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, load_model_withio_tf)
+{
+ moco::ModelSignature signature;
+
+ signature.add_input(moco::TensorName("Placeholder", 0));
+ signature.add_output(moco::TensorName("output/identity", 0));
+
+ tensorflow::GraphDef graph_def;
+ EXPECT_TRUE(plier::tf::parse_graphdef(basic_pbtxtdata, graph_def));
+
+ moco::Importer importer;
+
+ std::unique_ptr<loco::Graph> graph = importer.import(signature, graph_def);
+
+ // what to test:
+ // - import reads Placeholder
+ // - import reads Identity
+ // - attribute values should match
+
+ auto tfidentity = find_first_node_bytype<moco::TFIdentity>(graph.get());
+ ASSERT_NE(tfidentity, nullptr);
+ ASSERT_NE(tfidentity->input(), nullptr);
+}
+
+namespace
+{
+
+// clang-format off
+const char *query_pbtxtdata = STRING_CONTENT(
+node {
+ name: "Placeholder"
+ op: "Placeholder"
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 1
+ }
+ dim {
+ size: 2
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 2
+ }
+ }
+ }
+ }
+}
+node {
+ name: "Foo/w_min"
+ op: "Const"
+ attr {
+ key: "dtype"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape { }
+ float_val: -1.0
+ }
+ }
+ }
+}
+node {
+ name: "output/identity"
+ op: "Identity"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+node {
+ name: "Foo/w_max"
+ op: "Const"
+ attr {
+ key: "dtype"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape { }
+ float_val: -1.0
+ }
+ }
+ }
+}
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, find_node_by_name)
+{
+ moco::ModelSignature signature;
+
+ signature.add_input(moco::TensorName("Placeholder", 0));
+ signature.add_output(moco::TensorName("output/identity", 0));
+
+ tensorflow::GraphDef graph_def;
+ EXPECT_TRUE(plier::tf::parse_graphdef(query_pbtxtdata, graph_def));
+
+ moco::Importer importer;
+
+ std::unique_ptr<loco::Graph> graph = importer.import(signature, graph_def);
+
+ // what to test:
+ // - get name of first Identity node
+ // - find node by name `Foo/w_min`
+ // - find node by name `Foo/w_max`
+
+ auto tfidentity = find_first_node_bytype<moco::TFIdentity>(graph.get());
+ ASSERT_NE(tfidentity, nullptr);
+ ASSERT_NE(tfidentity->input(), nullptr);
+ ASSERT_STREQ(tfidentity->name().c_str(), "output/identity");
+
+ auto query_node = moco::find_node_byname<moco::TFConst>(graph.get(), "Foo/w_min");
+ ASSERT_NE(query_node, nullptr);
+ ASSERT_STREQ(query_node->name().c_str(), "Foo/w_min");
+
+ auto query_node2 = moco::find_node_byname<moco::TFConst>(graph.get(), "Foo/w_max");
+ ASSERT_NE(query_node2, nullptr);
+ ASSERT_STREQ(query_node2->name().c_str(), "Foo/w_max");
+}
diff --git a/compiler/moco/import/src/ModelSignature.cpp b/compiler/moco/import/src/ModelSignature.cpp
new file mode 100644
index 000000000..d4c7e5085
--- /dev/null
+++ b/compiler/moco/import/src/ModelSignature.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/ModelSignature.h"
+
+#include <oops/UserExn.h>
+
+namespace moco
+{
+
+void ModelSignature::add_customop(const std::string &op)
+{
+ if (std::find(_customops.begin(), _customops.end(), op) == _customops.end())
+ _customops.emplace_back(op);
+ else
+ throw oops::UserExn("Duplicate custom operator", op);
+}
+
+void ModelSignature::shape(const std::string &node_name, const angkor::TensorShape &shape)
+{
+ if (_shapes.find(node_name) != _shapes.end())
+ throw oops::UserExn("Duplicate node name", node_name);
+
+ _shapes[node_name] = shape;
+}
+
+const angkor::TensorShape *ModelSignature::shape(const std::string &node_name) const
+{
+ auto res = _shapes.find(node_name);
+ if (res == _shapes.end())
+ return nullptr;
+ else
+ return &res->second;
+}
+
+void ModelSignature::dtype(const std::string &node_name, loco::DataType dtype)
+{
+ if (_dtypes.find(node_name) != _dtypes.end())
+ throw oops::UserExn("Duplicate node name", node_name);
+
+ _dtypes[node_name] = dtype;
+}
+
+loco::DataType ModelSignature::dtype(const std::string &node_name) const
+{
+ auto res = _dtypes.find(node_name);
+ if (res == _dtypes.end())
+ return loco::DataType::Unknown;
+ else
+ return res->second;
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Add.cpp b/compiler/moco/import/src/Nodes/Add.cpp
new file mode 100644
index 000000000..6981a55e1
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Add.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Add.h"
+
+#include <moco/IR/Nodes/TFAdd.h>
+
+#include <loco.h>
+#include <stdex/Memory.h>
+
+namespace
+{
+
+using namespace moco;
+
+/**
+ * @brief GraphUpdate for TF Add node
+ */
+class TFAddGraphUpdate final : public GraphUpdate
+{
+public:
+ TFAddGraphUpdate(TFAdd *node, std::vector<TensorName> names) : _node(node), _names(names) {}
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFAdd *_node;
+ std::vector<TensorName> _names;
+};
+
+void TFAddGraphUpdate::input(const SymbolTable *tensor_names) const
+{
+ assert(_names.size() == 2);
+
+ _node->x(tensor_names->node(_names[0]));
+ _node->y(tensor_names->node(_names[1]));
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool AddGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ return node.input_size() == 2;
+}
+
+void AddGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // creating TF dialect Add node
+ auto tf_add = graph->nodes()->create<TFAdd>();
+ tf_add->name(node.name());
+
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, tf_add);
+
+ std::vector<TensorName> add_input_names;
+ add_input_names.push_back(TensorName(node.input(0))); // x
+ add_input_names.push_back(TensorName(node.input(1))); // y
+
+ auto tf_add_update = stdex::make_unique<TFAddGraphUpdate>(tf_add, add_input_names);
+ updates->enroll(std::move(tf_add_update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Add.test.cpp b/compiler/moco/import/src/Nodes/Add.test.cpp
new file mode 100644
index 000000000..ace2b0801
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Add.test.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Add.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *add_basic_pbtxt = STRING_CONTENT(
+ name: "ADD_01"
+ op: "Add"
+ input: "input_01"
+ input: "input_02"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, tf_add_basic)
+{
+ TFNodeBuildTester tester;
+ moco::AddGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(add_basic_pbtxt, nodedef));
+
+ // what to test:
+ // - TFAdd node should exist
+ // - both inputs x() and y() should not be null
+
+ tester.inputs({"input_01", "input_02"});
+ tester.output("ADD_01");
+ tester.run(nodedef, graphbuilder);
+}
diff --git a/compiler/moco/import/src/Nodes/AvgPool.cpp b/compiler/moco/import/src/Nodes/AvgPool.cpp
new file mode 100644
index 000000000..6d7fd36bb
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/AvgPool.cpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/AvgPool.h"
+
+#include <moco/IR/Nodes/TFAvgPool.h>
+
+#include <moco/Names.h>
+
+#include "Convert.h"
+#include <loco/IR/PermutingCodec.h>
+#include <stdex/Memory.h>
+#include <plier/tf/Convert.h>
+#include <oops/UserExn.h>
+
+#include <cassert>
+#include <stdexcept>
+
+using namespace plier::tf;
+
+namespace
+{
+
+using namespace moco;
+
+class TFAvgPoolGraphUpdate final : public GraphUpdate
+{
+public:
+ TFAvgPoolGraphUpdate(TFAvgPool *node, const TensorName &name)
+ : _avgpool_node(node), _value_name(name)
+ {
+ }
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFAvgPool *_avgpool_node;
+ const TensorName _value_name;
+};
+
+void TFAvgPoolGraphUpdate::input(const SymbolTable *node_table) const
+{
+ loco::Node *value_node = node_table->node(_value_name);
+ _avgpool_node->value(value_node);
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool AvgPoolGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ if (node.input_size() != 1)
+ return false;
+
+ // note: even though "data_format" is not entered when a model is written,
+ // TF seems to generate "data_format" field into a pb file
+ if (!plier::tf::has_attrs(node, {"T", "data_format", "ksize", "padding", "strides"}))
+ return false;
+
+ auto tf_ksize = get_list_attr(node, "ksize");
+ auto ksize = as_int64_list(tf_ksize);
+ if (ksize.size() != 4)
+ {
+ // TODO support ksize length for 1 and 2
+ throw oops::UserExn("AvgPool only supports ksize length 4", node.name());
+ }
+
+ auto tf_strides = get_list_attr(node, "strides");
+ auto strides = as_int64_list(tf_strides);
+ if (strides.size() != 4)
+ {
+ // TODO support strides length for 1 and 2
+ throw oops::UserExn("AvgPool only supports strides length 4", node.name());
+ }
+
+ return true;
+}
+
+void AvgPoolGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // name of loco nodes
+ ::std::string avgPool2d_name = node.name();
+
+ // tensorflow data_format: one of NHWC or NCHW.
+ auto data_layout = get_string_attr(node, "data_format");
+ auto avgPool_node = graph->nodes()->create<TFAvgPool>();
+ avgPool_node->name(node.name());
+ avgPool_node->data_layout(data_layout);
+
+ // padding
+ auto padding = moco::str_toupper(get_string_attr(node, "padding"));
+ avgPool_node->padding(padding);
+
+ // ksize
+ auto tf_ksize = get_list_attr(node, "ksize");
+ auto ksize = as_int64_list(tf_ksize);
+ avgPool_node->ksize(ksize);
+
+ // strides
+ auto tf_strides = get_list_attr(node, "strides");
+ auto strides = as_int64_list(tf_strides);
+ avgPool_node->strides(strides);
+
+ // To set the input node of encode_node with avgPool2d_name
+ TensorName output_name(avgPool2d_name, 0);
+ tensor_names->enroll(output_name, avgPool_node);
+
+ // Record ifm inputs to featureEncode_node
+ auto update = stdex::make_unique<TFAvgPoolGraphUpdate>(avgPool_node, TensorName(node.input(0)));
+
+ updates->enroll(std::move(update));
+}
+
+} // namespace moco
+
+// TODO Consider a case when TF AvgPool is for 3D.
+// AvgPool works for 2D and other Dimensions, such as 3D
+// So, in future, some other GraphBuilder decide if AvgPoolGraphBuilder is used or
+// other GraphBuilder is used for TF AvgPool
diff --git a/compiler/moco/import/src/Nodes/AvgPool.test.cpp b/compiler/moco/import/src/Nodes/AvgPool.test.cpp
new file mode 100644
index 000000000..7d62f0eaa
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/AvgPool.test.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/AvgPool.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *avgpool_01_pbtxtdata = STRING_CONTENT(
+ name: "avgpool"
+ op: "AvgPool"
+ input: "const/float"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+ attr {
+ key: "ksize"
+ value {
+ list {
+ i: 1
+ i: 2
+ i: 3
+ i: 1
+ }
+ }
+ }
+ attr {
+ key: "padding"
+ value {
+ s: "VALID"
+ }
+ }
+ attr {
+ key: "strides"
+ value {
+ list {
+ i: 1
+ i: 3
+ i: 2
+ i: 1
+ }
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, AvgPool_01)
+{
+ TFNodeBuildTester tester;
+ moco::AvgPoolGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(avgpool_01_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFAvgPool
+ // - input should exist
+ // - attributes value should match
+
+ tester.inputs({"const/float"});
+ tester.output("avgpool");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFAvgPool *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->data_layout(), "NHWC");
+ ASSERT_EQ(test_node->padding(), "VALID");
+ ASSERT_EQ(test_node->ksize(), std::vector<int64_t>({1, 2, 3, 1}));
+ ASSERT_EQ(test_node->strides(), std::vector<int64_t>({1, 3, 2, 1}));
+}
diff --git a/compiler/moco/import/src/Nodes/BiasAdd.cpp b/compiler/moco/import/src/Nodes/BiasAdd.cpp
new file mode 100644
index 000000000..a3eb91116
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/BiasAdd.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/BiasAdd.h"
+
+#include <moco/IR/Nodes/TFBiasAdd.h>
+
+#include <moco/Names.h>
+
+#include <loco.h>
+#include <loco/IR/PermutingCodec.h>
+#include <stdex/Memory.h>
+#include <plier/tf/Convert.h>
+#include <oops/UserExn.h>
+
+#include <cassert>
+#include <vector>
+
+namespace
+{
+using namespace moco;
+
+class TFBiasAddGraphUpdate final : public GraphUpdate
+{
+public:
+ TFBiasAddGraphUpdate(TFBiasAdd *biasadd, std::vector<TensorName> &names)
+ : _biasadd(biasadd), _names(names)
+ {
+ }
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFBiasAdd *_biasadd;
+ std::vector<TensorName> _names;
+};
+
+void TFBiasAddGraphUpdate::input(const SymbolTable *node_table) const
+{
+ assert(_names.size() == 2);
+
+ auto value_node = node_table->node(_names[0]);
+ auto bias_node = node_table->node(_names[1]);
+ assert(value_node != nullptr);
+ assert(bias_node != nullptr);
+
+ _biasadd->value(value_node);
+ _biasadd->bias(bias_node);
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool BiasAddGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ if (node.input_size() != 2)
+ return false;
+
+ // note: even though "data_format" is not entered when a model is written,
+ // TF seems to generate "data_format" field into a pb file
+ if (!plier::tf::has_attrs(node, {"T", "data_format"}))
+ return false;
+
+ // TODO add type check
+ // type of input and bias should be same (except using quantization)
+
+ // Note In case of TF.nn.bias_add,
+ // "value may have any number of dimensions." ...
+ // but "data_format: A string. 'NHWC' and 'NCHW' are supported."
+ // Not sure if value should be 4-D tensor. Let's skip this check for now.
+
+ auto data_layout = plier::tf::get_string_attr(node, "data_format");
+ if (!(data_layout == "NHWC" || data_layout == "NCHW"))
+ {
+ throw oops::UserExn("BiasAdd Unsupported data_format", node.name());
+ }
+
+ return true;
+}
+
+void BiasAddGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // tensorflow data_format: one of NHWC or NCHW.
+ auto data_layout = plier::tf::get_string_attr(node, "data_format");
+ auto tf_bias_add = graph->nodes()->create<TFBiasAdd>();
+ tf_bias_add->name(node.name());
+ tf_bias_add->data_layout(data_layout);
+
+ // To set the input node of encode_node with biasAdd_name
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, tf_bias_add);
+
+ std::vector<TensorName> input_names;
+ input_names.push_back(TensorName(node.input(0)));
+ input_names.push_back(TensorName(node.input(1)));
+
+ auto update = stdex::make_unique<TFBiasAddGraphUpdate>(tf_bias_add, input_names);
+ updates->enroll(std::move(update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/BiasAdd.test.cpp b/compiler/moco/import/src/Nodes/BiasAdd.test.cpp
new file mode 100644
index 000000000..626456d30
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/BiasAdd.test.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/BiasAdd.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+
+// clang-format off
+const char *bias_add_01_pbtxtdata = STRING_CONTENT(
+ name: "out"
+ op: "BiasAdd"
+ input: "val"
+ input: "bias"
+ attr {
+ key: "T"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "data_format"
+ value { s: "NHWC" }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, bias_add_01)
+{
+ TFNodeBuildTester tester;
+ moco::BiasAddGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(bias_add_01_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFBiasAdd
+ // - value() should not be nullptr
+ // - bias() should not be nullptr
+ // - data_layout should match
+
+ tester.inputs({"val", "bias"});
+ tester.output("out");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFBiasAdd *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_TRUE(test_node->data_layout() == "NHWC");
+}
+
+namespace
+{
+
+// clang-format off
+const char *bias_add_NCHW_pbtxtdata = STRING_CONTENT(
+ name: "out"
+ op: "BiasAdd"
+ input: "val"
+ input: "bias"
+ attr {
+ key: "T"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "data_format"
+ value { s: "NCHW" }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, bias_add_NCHW_axis)
+{
+ TFNodeBuildTester tester;
+ moco::BiasAddGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(bias_add_NCHW_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFBiasAdd
+ // - value() should not be nullptr
+ // - bias() should not be nullptr
+ // - data_layout should match
+
+ tester.inputs({"val", "bias"});
+ tester.output("out");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFBiasAdd *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_TRUE(test_node->data_layout() == "NCHW");
+}
diff --git a/compiler/moco/import/src/Nodes/Concat.cpp b/compiler/moco/import/src/Nodes/Concat.cpp
new file mode 100644
index 000000000..8bf8a84b5
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Concat.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Concat.h"
+
+#include <moco/IR/Nodes/TFConcatV2.h>
+
+#include <moco/Names.h>
+
+#include <loco.h>
+#include <stdex/Memory.h>
+#include <plier/tf/Convert.h>
+
+#include <cassert>
+
+namespace
+{
+
+using namespace moco;
+
+class TFConcatV2GraphUpdate final : public GraphUpdate
+{
+public:
+ TFConcatV2GraphUpdate(TFConcatV2 *node, std::vector<TensorName> names)
+ : _node(node), _names(names)
+ {
+ }
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFConcatV2 *_node;
+ std::vector<TensorName> _names;
+};
+
+void TFConcatV2GraphUpdate::input(const SymbolTable *tensor_names) const
+{
+ uint32_t num_values = _names.size() - 1; // exclude axis
+ assert(num_values >= 1);
+
+ for (uint32_t i = 0; i < num_values; ++i)
+ {
+ auto input_node = tensor_names->node(_names[i]);
+ assert(input_node != nullptr);
+ _node->values(i, input_node);
+ }
+ auto axis_node = tensor_names->node(_names[num_values]);
+ assert(axis_node != nullptr);
+ _node->axis(axis_node);
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool ConcatV2GraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ if (!plier::tf::has_attrs(node, {"T", "N", "Tidx"}))
+ return false;
+
+ // Concat node SHOULD have 3 or more inputs, that is 2 + axis
+ const int num_inputs = node.input_size() - 1;
+ return (num_inputs >= 2) && (num_inputs == plier::tf::get_int_attr(node, "N"));
+}
+
+void ConcatV2GraphBuilder::build(const tensorflow::NodeDef &node,
+ GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ auto graph = context->graph();
+ auto tensor_names = context->tensor_names();
+ auto updates = context->updates();
+
+ const int num_inputs = node.input_size() - 1;
+ std::vector<TensorName> input_names;
+ auto concat_node = graph->nodes()->create<TFConcatV2>(num_inputs);
+ concat_node->name(node.name());
+
+ for (int ni = 0; ni < num_inputs; ++ni)
+ {
+ input_names.push_back(TensorName(node.input(ni)));
+ }
+ // last one is the axis
+ input_names.push_back(TensorName(node.input(num_inputs)));
+
+ // register string-name to the last node as output of concat(s)
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, concat_node);
+
+ auto update = stdex::make_unique<TFConcatV2GraphUpdate>(concat_node, input_names);
+ updates->enroll(std::move(update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Concat.test.cpp b/compiler/moco/import/src/Nodes/Concat.test.cpp
new file mode 100644
index 000000000..c0986578b
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Concat.test.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Concat.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+
+// clang-format off
+const char *concat_01_pbtxtdata = STRING_CONTENT(
+ name: "Concat"
+ op: "ConcatV2"
+ input: "Input01"
+ input: "Input02"
+ input: "Axis"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, concat_01)
+{
+ TFNodeBuildTester tester;
+ moco::ConcatV2GraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(concat_01_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFConcatV2
+ // - there should be two values
+ // - values(idx) should not be nullptr
+ // - axis() should not be nullptr
+
+ tester.inputs({"Input01", "Input02", "Axis"});
+ tester.output("Concat");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFConcatV2 *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->num_values(), 2);
+}
+
+namespace
+{
+
+// clang-format off
+const char *concat_02_pbtxtdata = STRING_CONTENT(
+ name: "Concat"
+ op: "ConcatV2"
+ input: "Input01"
+ input: "Input02"
+ input: "Input03"
+ input: "Axis"
+ attr {
+ key: "N"
+ value {
+ i: 3
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, concat_02)
+{
+ TFNodeBuildTester tester;
+ moco::ConcatV2GraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(concat_02_pbtxtdata, nodedef));
+
+ // what to test: TFConcatV2 has 3 inputs
+ // - there should exist TFConcatV2
+ // - values(idx) should not be nullptr
+ // - axis() should not be nullptr
+
+ tester.inputs({"Input01", "Input02", "Input03", "Axis"});
+ tester.output("Concat");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFConcatV2 *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->num_values(), 3);
+}
diff --git a/compiler/moco/import/src/Nodes/Const.cpp b/compiler/moco/import/src/Nodes/Const.cpp
new file mode 100644
index 000000000..15ea717db
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Const.cpp
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Const.h"
+
+#include <moco/Names.h>
+#include <moco/IR/TFNodes.h>
+
+#include <loco.h>
+#include <plier/tf/Convert.h>
+#include <oops/UserExn.h>
+
+#include <cassert>
+#include <stdexcept>
+#include <string>
+
+namespace
+{
+
+using namespace moco;
+
+void read_value_int8(TFConst *const_node, int num_elements,
+ const tensorflow::TensorProto &input_tensor)
+{
+ const_node->size<loco::DataType::S8>(num_elements);
+
+ int32_t input_elements = input_tensor.int_val_size();
+
+ if (input_tensor.tensor_content().size() == num_elements * sizeof(int8_t))
+ {
+ const std::string &str_content = input_tensor.tensor_content();
+ const int8_t *s8_ptr = reinterpret_cast<const int8_t *>(str_content.c_str());
+ for (int32_t i = 0; i < num_elements; i++)
+ {
+ const_node->at<loco::DataType::S8>(i) = *(s8_ptr + i);
+ }
+ }
+ else if (0 < input_elements && input_elements <= num_elements)
+ {
+ for (int32_t i = 0; i < input_elements; i++)
+ {
+ const_node->at<loco::DataType::S8>(i) = input_tensor.int_val(i);
+ }
+
+ for (int32_t i = input_elements; i < num_elements; i++)
+ {
+ const_node->at<loco::DataType::S8>(i) = input_tensor.int_val(input_elements - 1);
+ }
+ }
+ else
+ {
+ throw oops::UserExn("Invalid Const values", const_node->name());
+ }
+}
+
+void read_value_int32(TFConst *const_node, int num_elements,
+ const tensorflow::TensorProto &input_tensor)
+{
+ const_node->size<loco::DataType::S32>(num_elements);
+
+ int32_t input_elements = input_tensor.int_val_size();
+
+ if (input_tensor.tensor_content().size() == num_elements * sizeof(int32_t))
+ {
+ const std::string &str_content = input_tensor.tensor_content();
+ const int32_t *s32_ptr = reinterpret_cast<const int32_t *>(str_content.c_str());
+ for (int32_t i = 0; i < num_elements; i++)
+ {
+ const_node->at<loco::DataType::S32>(i) = *(s32_ptr + i);
+ }
+ }
+ else if (0 < input_elements && input_elements <= num_elements)
+ {
+ for (int32_t i = 0; i < input_elements; i++)
+ {
+ const_node->at<loco::DataType::S32>(i) = input_tensor.int_val(i);
+ }
+
+ for (int32_t i = input_elements; i < num_elements; i++)
+ {
+ const_node->at<loco::DataType::S32>(i) = input_tensor.int_val(input_elements - 1);
+ }
+ }
+ else
+ {
+ throw oops::UserExn("Invalid Const values", const_node->name());
+ }
+}
+
+void read_value_float32(TFConst *const_node, int num_elements,
+ const tensorflow::TensorProto &input_tensor)
+{
+ const_node->size<loco::DataType::FLOAT32>(num_elements);
+
+ int32_t input_elements = input_tensor.float_val_size();
+
+ if (input_tensor.tensor_content().size() == num_elements * sizeof(float))
+ {
+ const std::string &str_content = input_tensor.tensor_content();
+ const float *float_ptr = reinterpret_cast<const float *>(str_content.c_str());
+ for (int32_t i = 0; i < num_elements; i++)
+ {
+ const_node->at<loco::DataType::FLOAT32>(i) = *(float_ptr + i);
+ }
+ }
+ else if (0 < input_elements && input_elements <= num_elements)
+ {
+ for (int32_t i = 0; i < input_elements; i++)
+ {
+ const_node->at<loco::DataType::FLOAT32>(i) = input_tensor.float_val(i);
+ }
+
+ for (int32_t i = input_elements; i < num_elements; i++)
+ {
+ const_node->at<loco::DataType::FLOAT32>(i) = input_tensor.float_val(input_elements - 1);
+ }
+ }
+ else
+ {
+ throw oops::UserExn("Invalid Const values", const_node->name());
+ }
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool ConstGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ if (!plier::tf::has_attrs(node, {"dtype", "value"}))
+ return false;
+
+ const auto &input_tensor = plier::tf::get_tensor_attr(node, "value");
+ const auto &input_shape = input_tensor.tensor_shape();
+ const auto &input_dims = input_shape.dim();
+
+ if (!(input_shape.dim_size() <= 6))
+ return false;
+
+ for (auto &d : input_dims)
+ {
+ if (d.size() > std::numeric_limits<int>::max())
+ throw oops::UserExn("Const Shape element overflows", node.name());
+
+ if (d.size() < 0)
+ throw oops::UserExn("Unknown dim size", node.name());
+ }
+
+ auto dtype = plier::tf::as_loco_datatype(plier::tf::get_datatype_attr(node, "dtype"));
+ if (!(dtype == loco::DataType::S32 || dtype == loco::DataType::FLOAT32 ||
+ dtype == loco::DataType::S8))
+ return false;
+ // TODO support other dtype
+
+ return true;
+}
+
+void ConstGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+
+ // Create a "TFConstant" node for Const
+ auto const_node = graph->nodes()->create<TFConst>();
+ const_node->name(node.name());
+
+ // set dtype
+ auto dtype = plier::tf::as_loco_datatype(plier::tf::get_datatype_attr(node, "dtype"));
+ const_node->dtype(dtype);
+
+ // import shape and value
+ const auto &input_tensor = plier::tf::get_tensor_attr(node, "value");
+ const auto &input_shape = input_tensor.tensor_shape();
+ const auto &input_dims = input_shape.dim();
+ assert(input_shape.dim_size() <= 6);
+ const_node->rank(input_shape.dim_size());
+ int index = 0;
+ bool zero_sized_shape = false;
+ for (auto &d : input_dims)
+ {
+ assert(d.size() <= std::numeric_limits<int>::max());
+ if (d.size() == 0)
+ zero_sized_shape = true;
+
+ assert(d.size() >= 0);
+ const_node->dim(index++) = d.size();
+ }
+
+ int num_elements = 1;
+ if (zero_sized_shape)
+ {
+ const_node->rank(0);
+ num_elements = 0;
+ }
+ else
+ {
+ for (uint32_t d = 0; d < const_node->rank(); d++)
+ {
+ num_elements *= const_node->dim(d).value();
+ }
+ }
+
+ switch (dtype)
+ {
+ case loco::DataType::S8:
+ read_value_int8(const_node, num_elements, input_tensor);
+ break;
+
+ case loco::DataType::S32:
+ read_value_int32(const_node, num_elements, input_tensor);
+ break;
+
+ case loco::DataType::FLOAT32:
+ read_value_float32(const_node, num_elements, input_tensor);
+ break;
+
+ // TODO support other types
+
+ default:
+ assert(false);
+ }
+
+ // register string-name to node
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, const_node);
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Const.test.cpp b/compiler/moco/import/src/Nodes/Const.test.cpp
new file mode 100644
index 000000000..854499fe6
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Const.test.cpp
@@ -0,0 +1,465 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Const.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+
+// Test case for "input_tensor.float_val_size() == num_elements"
+
+// clang-format off
+const char *const_float_01_pbtxtdata = STRING_CONTENT(
+ name: "const/float"
+ op: "Const"
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 3
+ }
+ }
+ float_val: 1.1
+ float_val: 2.2
+ float_val: 3.3
+ float_val: 4.4
+ float_val: 5.5
+ float_val: 6.6
+ }
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, const_float_01)
+{
+ TFNodeBuildTester tester;
+ moco::ConstGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(const_float_01_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFConst
+ // - values should match
+
+ tester.inputs({});
+ tester.output("const/float");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFConst *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->size<loco::DataType::FLOAT32>(), 6);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(0), 1.1f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(1), 2.2f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(2), 3.3f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(3), 4.4f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(4), 5.5f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(5), 6.6f);
+}
+
+namespace
+{
+// Test case for "input_tensor.float_val_size() == 1"
+
+// clang-format off
+const char *const_float_02_pbtxtdata = STRING_CONTENT(
+ name: "const/float"
+ op: "Const"
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 3
+ }
+ }
+ float_val: 1.1
+ }
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, const_float_02)
+{
+ TFNodeBuildTester tester;
+ moco::ConstGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(const_float_02_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFConst
+ // - values should match
+
+ tester.inputs({});
+ tester.output("const/float");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFConst *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->size<loco::DataType::FLOAT32>(), 6);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(0), 1.1f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(1), 1.1f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(2), 1.1f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(3), 1.1f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(4), 1.1f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(5), 1.1f);
+}
+
+namespace
+{
+// Test case for "input_tensor.tensor_content().size() == num_elements * sizeof(float)"
+// Generated with tfkit tool: "cat ./test.pbtxt | ./tfkit pack"
+
+// clang-format off
+const char *const_float_03_pbtxtdata = STRING_CONTENT(
+ name: "const/float"
+ op: "Const"
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 3
+ }
+ }
+ tensor_content: "\315\314\214?\315\314\014@33S@\315\314\214@\000\000\260@33\323@"
+ }
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, const_float_03)
+{
+ TFNodeBuildTester tester;
+ moco::ConstGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(const_float_03_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFConst
+ // - values should match
+
+ tester.inputs({});
+ tester.output("const/float");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFConst *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->size<loco::DataType::FLOAT32>(), 6);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(0), 1.1f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(1), 2.2f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(2), 3.3f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(3), 4.4f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(4), 5.5f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(5), 6.6f);
+}
+
+namespace
+{
+// Test case for "input_tensor.float_val_size() < num_elements"
+
+// clang-format off
+const char *const_float_04_pbtxtdata = STRING_CONTENT(
+ name: "const/float"
+ op: "Const"
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 3
+ }
+ }
+ float_val: 1.1
+ float_val: 2.2
+ }
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, const_float_04)
+{
+ TFNodeBuildTester tester;
+ moco::ConstGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(const_float_04_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFConst
+ // - values should match
+
+ tester.inputs({});
+ tester.output("const/float");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFConst *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->size<loco::DataType::FLOAT32>(), 6);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(0), 1.1f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(1), 2.2f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(2), 2.2f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(3), 2.2f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(4), 2.2f);
+ ASSERT_EQ(test_node->at<loco::DataType::FLOAT32>(5), 2.2f);
+}
+
+namespace
+{
+// Test case for "input_tensor.int_val_size() < num_elements"
+
+// clang-format off
+const char *const_int32_04_pbtxtdata = STRING_CONTENT(
+ name: "const/int"
+ op: "Const"
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 3
+ }
+ }
+ int_val: 1
+ int_val: 2
+ }
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, const_int32_04)
+{
+ TFNodeBuildTester tester;
+ moco::ConstGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(const_int32_04_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFConst
+ // - values should match
+
+ tester.inputs({});
+ tester.output("const/int");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFConst *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->size<loco::DataType::S32>(), 6);
+ ASSERT_EQ(test_node->at<loco::DataType::S32>(0), 1);
+ ASSERT_EQ(test_node->at<loco::DataType::S32>(1), 2);
+ ASSERT_EQ(test_node->at<loco::DataType::S32>(2), 2);
+ ASSERT_EQ(test_node->at<loco::DataType::S32>(3), 2);
+ ASSERT_EQ(test_node->at<loco::DataType::S32>(4), 2);
+ ASSERT_EQ(test_node->at<loco::DataType::S32>(5), 2);
+}
+
+namespace
+{
+// Test case for "scalar"
+
+// clang-format off
+const char *const_int32_scalar_pbtxtdata = STRING_CONTENT(
+ name: "const/int"
+ op: "Const"
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ }
+ int_val: 3
+ }
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, const_int32_scalar)
+{
+ TFNodeBuildTester tester;
+ moco::ConstGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(const_int32_scalar_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFConst
+ // - there should be one element and value should be 3
+
+ tester.inputs({});
+ tester.output("const/int");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFConst *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->size<loco::DataType::S32>(), 1);
+ ASSERT_EQ(test_node->at<loco::DataType::S32>(0), 3);
+}
+
+namespace
+{
+
+// clang-format off
+const char *const_int8_01_pbtxtdata = STRING_CONTENT(
+ name: "const/int8"
+ op: "Const"
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT8
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT8
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 3
+ }
+ }
+ int_val: 0
+ int_val: -1
+ int_val: 1
+ int_val: 2
+ int_val: 3
+ int_val: 4
+ }
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, const_int8_01)
+{
+ TFNodeBuildTester tester;
+ moco::ConstGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(const_int8_01_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFConst
+ // - values should match
+
+ tester.inputs({});
+ tester.output("const/int8");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFConst *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->size<loco::DataType::S8>(), 6);
+ ASSERT_EQ(test_node->at<loco::DataType::S8>(0), 0);
+ ASSERT_EQ(test_node->at<loco::DataType::S8>(1), -1);
+ ASSERT_EQ(test_node->at<loco::DataType::S8>(2), 1);
+ ASSERT_EQ(test_node->at<loco::DataType::S8>(3), 2);
+ ASSERT_EQ(test_node->at<loco::DataType::S8>(4), 3);
+ ASSERT_EQ(test_node->at<loco::DataType::S8>(5), 4);
+}
diff --git a/compiler/moco/import/src/Nodes/Conv2D.cpp b/compiler/moco/import/src/Nodes/Conv2D.cpp
new file mode 100644
index 000000000..e6b98dcd1
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Conv2D.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Conv2D.h"
+
+#include <moco/IR/Nodes/TFConv2D.h>
+
+#include <moco/Names.h>
+
+#include "Convert.h"
+
+#include <loco.h>
+#include <loco/IR/PermutingCodec.h>
+#include <stdex/Memory.h>
+#include <plier/tf/Convert.h>
+#include <oops/UserExn.h>
+
+#include <cassert>
+#include <stdexcept>
+#include <algorithm>
+
+namespace
+{
+using namespace moco;
+
+class TFConv2DGraphUpdate final : public GraphUpdate
+{
+public:
+ TFConv2DGraphUpdate(TFConv2D *node, std::vector<TensorName> names) : _node(node), _names(names) {}
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFConv2D *_node;
+ std::vector<TensorName> _names;
+};
+
+void TFConv2DGraphUpdate::input(const SymbolTable *node_table) const
+{
+ assert(_names.size() == 2);
+
+ auto input_node = node_table->node(_names[0]);
+ auto filter_node = node_table->node(_names[1]);
+ assert(input_node != nullptr);
+ assert(filter_node != nullptr);
+
+ _node->input(input_node);
+ _node->filter(filter_node);
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool Conv2DGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ if (node.input_size() != 2)
+ return false;
+
+ // note: even though "data_format" is not entered when a model is written,
+ // TF seems to generate "data_format" field into a pb file
+ if (!plier::tf::has_attrs(node, {"T", "data_format", "padding", "strides"}))
+ return false;
+
+ auto data_layout = plier::tf::get_string_attr(node, "data_format");
+ if (!(data_layout == "NHWC" || data_layout == "NCHW"))
+ {
+ throw oops::UserExn("Conv2D Unsupported data_format", node.name());
+ }
+
+ // dilation attribute is not fully supported
+ if (plier::tf::has_attr(node, "dilations"))
+ {
+ // TODO Support non-default dilations
+ auto dilation = plier::tf::get_list_attr(node, "dilations").i();
+ if (!std::all_of(dilation.begin(), dilation.end(), [](std::int64_t dil) { return dil == 1; }))
+ return false;
+ }
+ // Else, dilations are automatically set to default [1,1,1,1] which we assumes now
+
+ return true;
+}
+
+void Conv2DGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // name of loco nodes
+ std::string conv2d_name = node.name();
+
+ auto conv2d = graph->nodes()->create<TFConv2D>();
+ conv2d->name(node.name());
+
+ // read attributes
+ auto data_layout = plier::tf::get_string_attr(node, "data_format");
+ assert(data_layout == "NHWC" || data_layout == "NCHW");
+ conv2d->data_layout(data_layout);
+
+ auto tf_strides = plier::tf::get_list_attr(node, "strides");
+ auto strides = plier::tf::as_int64_list(tf_strides);
+ conv2d->strides(strides);
+
+ auto padding = moco::str_toupper(plier::tf::get_string_attr(node, "padding"));
+ assert(padding == "VALID" || padding == "SAME");
+ conv2d->padding(padding);
+
+ // save the name for graph link updates
+ TensorName output_name(conv2d_name, 0);
+ tensor_names->enroll(output_name, conv2d);
+
+ std::vector<TensorName> input_names;
+ input_names.push_back(TensorName(node.input(0))); // input
+ input_names.push_back(TensorName(node.input(1))); // kernel
+
+ // Record ifm inputs to featureEncode_node
+ auto tfconv2d_update = stdex::make_unique<TFConv2DGraphUpdate>(conv2d, input_names);
+
+ updates->enroll(std::move(tfconv2d_update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Conv2D.test.cpp b/compiler/moco/import/src/Nodes/Conv2D.test.cpp
new file mode 100644
index 000000000..ba006f489
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Conv2D.test.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Conv2D.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *conv2d_01_pbtxtdata = STRING_CONTENT(
+ name: "conv2d"
+ op: "Conv2D"
+ input: "ifm"
+ input: "ker"
+ attr { key: "T" value { type: DT_FLOAT } }
+ attr { key: "data_format" value { s: "NHWC" } }
+ attr { key: "dilations" value { list { i: 1 i: 1 i: 1 i: 1 } } }
+ attr { key: "padding" value { s: "VALID" } }
+ attr { key: "strides" value { list { i: 1 i: 2 i: 3 i: 1 } } }
+);
+// clang-format on
+} // namespace
+
+TEST(TensorFlowImport, Conv2D_01)
+{
+ TFNodeBuildTester tester;
+ moco::Conv2DGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(conv2d_01_pbtxtdata, nodedef));
+
+ // what to test:
+ // - Conv2D node should exist
+ // - ifm() should not be nullptr
+ // - ker() should not be nullptr
+ // - attribute values should match
+
+ tester.inputs({"ifm", "ker"});
+ tester.output("conv2d");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFConv2D *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->padding(), "VALID");
+ ASSERT_EQ(test_node->data_layout(), "NHWC");
+ auto strides = test_node->strides();
+ ASSERT_EQ(strides.size(), 4);
+ // TODO add verify dilation
+}
+
+namespace
+{
+// clang-format off
+const char *conv2d_inception_pbtxtdata = STRING_CONTENT(
+ name: "InceptionV3/InceptionV3/Conv2d_1a_3x3/Conv2D"
+ op: "Conv2D"
+ input: "input:0"
+ input: "InceptionV3/Conv2d_1a_3x3/weights/read/_3__cf__3"
+ attr {
+ key: "T"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "data_format"
+ value { s: "NHWC" }
+ }
+ attr {
+ key: "dilations"
+ value {
+ list { i: 1 i: 1 i: 1 i: 1 }
+ }
+ }
+ attr {
+ key: "padding"
+ value { s: "VALID" }
+ }
+ attr {
+ key: "strides"
+ value {
+ list { i: 1 i: 2 i: 2 i: 1 }
+ }
+ }
+);
+} // namespace
+
+TEST(TensorFlowImport, Conv2D_inception_indexed_tensor_name)
+{
+ TFNodeBuildTester tester;
+ moco::Conv2DGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(conv2d_inception_pbtxtdata, nodedef));
+
+ // what to test: name with ':0' should be treated correctly
+ // - Conv2D node should exist
+ // - ifm() should not be nullptr
+ // - ker() should not be nullptr
+
+ tester.inputs({"input", "InceptionV3/Conv2d_1a_3x3/weights/read/_3__cf__3"});
+ tester.output("InceptionV3/InceptionV3/Conv2d_1a_3x3/Conv2D");
+ tester.run(nodedef, graphbuilder);
+}
diff --git a/compiler/moco/import/src/Nodes/Conv2DBackpropInput.cpp b/compiler/moco/import/src/Nodes/Conv2DBackpropInput.cpp
new file mode 100644
index 000000000..74c6605ab
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Conv2DBackpropInput.cpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Conv2DBackpropInput.h"
+
+#include <moco/IR/Nodes/TFConv2DBackpropInput.h>
+
+#include "Convert.h"
+
+#include <loco.h>
+#include <stdex/Memory.h>
+#include <plier/tf/Convert.h>
+#include <oops/UserExn.h>
+
+namespace
+{
+using namespace moco;
+
+/// @brief GraphUpdate for Conv2DBackpropInput node
+class Conv2DBackpropInputGraphUpdate final : public GraphUpdate
+{
+public:
+ Conv2DBackpropInputGraphUpdate(TFConv2DBackpropInput *node, std::vector<TensorName> names)
+ : _node(node), _input_names(names)
+ {
+ // DO NOTHING
+ }
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFConv2DBackpropInput *_node;
+ std::vector<TensorName> _input_names;
+};
+
+void Conv2DBackpropInputGraphUpdate::input(const SymbolTable *table) const
+{
+ assert(_input_names.size() == 3);
+
+ auto input_sizes_node = table->node(_input_names[0]);
+ auto filter_node = table->node(_input_names[1]);
+ auto out_backprop_node = table->node(_input_names[2]);
+
+ assert(input_sizes_node != nullptr);
+ assert(filter_node != nullptr);
+ assert(out_backprop_node != nullptr);
+
+ _node->input_sizes(input_sizes_node);
+ _node->filter(filter_node);
+ _node->out_backprop(out_backprop_node);
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool Conv2DBackpropInputGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ if (node.input_size() != 3)
+ return false;
+
+ if (!plier::tf::has_attrs(node, {"T", "data_format", "padding", "strides"}))
+ return false;
+
+ auto data_layout = plier::tf::get_string_attr(node, "data_format");
+ if (!(data_layout == "NHWC" || data_layout == "NCHW"))
+ {
+ throw oops::UserExn("Conv2DBackprop Unsupported data_format", node.name());
+ }
+
+ // dilation attribute is not fully supported
+ if (plier::tf::has_attr(node, "dilations"))
+ {
+ // TODO Support non-default dilations
+ auto dilation = plier::tf::get_list_attr(node, "dilations").i();
+ if (!std::all_of(dilation.begin(), dilation.end(), [](std::int64_t dil) { return dil == 1; }))
+ return false;
+ }
+ // Else, dilations are automatically set to default [1,1,1,1] which we assumes now
+
+ return true;
+}
+
+void Conv2DBackpropInputGraphBuilder::build(const tensorflow::NodeDef &node,
+ GraphBuilderContext *context) const
+{
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // name of loco nodes
+ std::string conv2d_backprop_name = node.name();
+
+ auto conv2d_backprop = graph->nodes()->create<TFConv2DBackpropInput>();
+ conv2d_backprop->name(node.name());
+
+ // read attributes
+ auto data_layout = plier::tf::get_string_attr(node, "data_format");
+ assert(data_layout == "NHWC" || data_layout == "NCHW");
+ conv2d_backprop->data_layout(data_layout);
+
+ auto tf_strides = plier::tf::get_list_attr(node, "strides");
+ auto strides = plier::tf::as_int64_list(tf_strides);
+ conv2d_backprop->strides(strides);
+
+ auto padding = moco::str_toupper(plier::tf::get_string_attr(node, "padding"));
+ assert(padding == "VALID" || padding == "SAME");
+ conv2d_backprop->padding(padding);
+
+ // save the name for graph link updates
+ TensorName output_name(conv2d_backprop_name, 0);
+ tensor_names->enroll(output_name, conv2d_backprop);
+
+ std::vector<TensorName> input_names;
+ input_names.push_back(TensorName(node.input(0))); // input_sizes
+ input_names.push_back(TensorName(node.input(1))); // filter
+ input_names.push_back(TensorName(node.input(2))); // out_backprop
+
+ // update
+ auto conv2d_backprop_update =
+ stdex::make_unique<Conv2DBackpropInputGraphUpdate>(conv2d_backprop, input_names);
+
+ updates->enroll(std::move(conv2d_backprop_update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Conv2DBackpropInput.test.cpp b/compiler/moco/import/src/Nodes/Conv2DBackpropInput.test.cpp
new file mode 100644
index 000000000..8c462bc3b
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Conv2DBackpropInput.test.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Conv2DBackpropInput.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *conv2d_backprop_input_01_pbtxtdata = STRING_CONTENT(
+ name: "ofm"
+ op: "Conv2DBackpropInput"
+ input: "outshape"
+ input: "weights"
+ input: "ifm"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+ attr {
+ key: "dilations"
+ value {
+ list {
+ i: 1
+ i: 1
+ i: 1
+ i: 1
+ }
+ }
+ }
+ attr {
+ key: "padding"
+ value {
+ s: "SAME"
+ }
+ }
+ attr {
+ key: "strides"
+ value {
+ list {
+ i: 1
+ i: 2
+ i: 2
+ i: 1
+ }
+ }
+ }
+);
+// clang-format on
+} // namespace
+
+TEST(TensorFlowImport, conv2d_backprop_input_01)
+{
+ TFNodeBuildTester tester;
+ moco::Conv2DBackpropInputGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(conv2d_backprop_input_01_pbtxtdata, nodedef));
+
+ // what to test:
+ // - All node inputs are valid
+ // - All attributes are as expected
+
+ tester.inputs({"outshape", "weights", "ifm"});
+ tester.output("ofm");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFConv2DBackpropInput *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->padding(), "SAME");
+ ASSERT_EQ(test_node->data_layout(), "NHWC");
+ ASSERT_EQ(test_node->strides().size(), 4);
+}
diff --git a/compiler/moco/import/src/Nodes/DepthwiseConv2dNative.cpp b/compiler/moco/import/src/Nodes/DepthwiseConv2dNative.cpp
new file mode 100644
index 000000000..3991a4d51
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/DepthwiseConv2dNative.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/DepthwiseConv2dNative.h"
+
+#include <moco/IR/Nodes/TFDepthwiseConv2dNative.h>
+
+#include <moco/Names.h>
+
+#include "Convert.h"
+
+#include <plier/tf/Convert.h>
+#include <loco/IR/PermutingCodec.h>
+#include <stdex/Memory.h>
+#include <oops/UserExn.h>
+
+#include <cassert>
+
+using namespace plier::tf;
+
+namespace
+{
+using namespace moco;
+
+class TFDepthwiseConv2dNativeGraphUpdate final : public GraphUpdate
+{
+public:
+ TFDepthwiseConv2dNativeGraphUpdate(TFDepthwiseConv2dNative *node, std::vector<TensorName> names)
+ : _node(node), _names(names)
+ {
+ }
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFDepthwiseConv2dNative *_node;
+ std::vector<TensorName> _names;
+};
+
+void TFDepthwiseConv2dNativeGraphUpdate::input(const SymbolTable *node_table) const
+{
+ assert(_names.size() == 2);
+
+ auto input_node = node_table->node(_names[0]);
+ auto filter_node = node_table->node(_names[1]);
+ assert(input_node != nullptr);
+ assert(filter_node != nullptr);
+
+ _node->input(input_node);
+ _node->filter(filter_node);
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool DepthwiseConv2dNativeGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ if (node.input_size() != 2)
+ return false;
+
+ // note: even though "data_format" and "dilations" are not entered when a model is written,
+ // TF seems to generate those field into a pb file.
+ if (!has_attrs(node, {"T", "data_format", "dilations", "padding", "strides"}))
+ return false;
+
+ auto data_layout = plier::tf::get_string_attr(node, "data_format");
+ if (!(data_layout == "NHWC" || data_layout == "NCHW"))
+ {
+ throw oops::UserExn("DepthwiseConv2dNative Unsupported data_format", node.name());
+ }
+
+ auto padding = moco::str_toupper(get_string_attr(node, "padding"));
+ if (!(padding == "VALID" || padding == "SAME"))
+ return false;
+
+ auto tf_strides = get_list_attr(node, "strides");
+ auto strides = as_int64_list(tf_strides);
+ if (!(strides.size() == 4))
+ {
+ throw oops::UserExn("DepthwiseConv2dNative strides requires rank 4", node.name());
+ }
+ auto stride_n = strides.at(0);
+ auto stride_h = strides.at(1);
+ auto stride_w = strides.at(2);
+ auto stride_c = strides.at(3);
+ if (!(stride_n == 1 && stride_c == 1) || !(stride_h == stride_w))
+ {
+ // TODO this message may need to be refined
+ throw oops::UserExn("DepthwiseConv2dNative strides requires N=C=1, H=W", node.name());
+ }
+
+ return true;
+}
+
+void DepthwiseConv2dNativeGraphBuilder::build(const tensorflow::NodeDef &node,
+ GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ auto depthwiseconv2d_native_node = graph->nodes()->create<TFDepthwiseConv2dNative>();
+ depthwiseconv2d_native_node->name(node.name());
+
+ // read attributes
+ auto data_layout = get_string_attr(node, "data_format");
+ depthwiseconv2d_native_node->data_layout(data_layout);
+
+ auto tf_strides = get_list_attr(node, "strides");
+ auto strides = as_int64_list(tf_strides);
+ depthwiseconv2d_native_node->strides(strides);
+
+ auto padding = moco::str_toupper(get_string_attr(node, "padding"));
+ depthwiseconv2d_native_node->padding(padding);
+
+ // save the name for graph link updates
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, depthwiseconv2d_native_node);
+
+ std::vector<TensorName> input_names;
+ input_names.push_back(TensorName(node.input(0))); // input
+ input_names.push_back(TensorName(node.input(1))); // kernel
+
+ // Record ifm inputs to featureEncode_node
+ auto tfdepthwiseconv2dnative_update = stdex::make_unique<TFDepthwiseConv2dNativeGraphUpdate>(
+ depthwiseconv2d_native_node, input_names);
+
+ updates->enroll(std::move(tfdepthwiseconv2dnative_update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/DepthwiseConv2dNative.test.cpp b/compiler/moco/import/src/Nodes/DepthwiseConv2dNative.test.cpp
new file mode 100644
index 000000000..c65283c1b
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/DepthwiseConv2dNative.test.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/DepthwiseConv2dNative.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *depthwise_conv2d_native_01_pbtxtdata = STRING_CONTENT(
+ name: "depthwise"
+ op: "DepthwiseConv2dNative"
+ input: "input"
+ input: "filter"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+ attr {
+ key: "dilations"
+ value {
+ list {
+ i: 1
+ i: 1
+ i: 1
+ i: 1
+ }
+ }
+ }
+ attr {
+ key: "padding"
+ value {
+ s: "VALID"
+ }
+ }
+ attr {
+ key: "strides"
+ value {
+ list {
+ i: 1
+ i: 1
+ i: 1
+ i: 1
+ }
+ }
+ }
+);
+// clang-format on
+} // namespace
+
+TEST(TensorFlowImport, Depthwise_conv2d_native)
+{
+ TFNodeBuildTester tester;
+ moco::DepthwiseConv2dNativeGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(depthwise_conv2d_native_01_pbtxtdata, nodedef));
+
+ // what to test:
+ // - All node inputs are valid
+ // - All attributes are as expected
+
+ tester.inputs({"input", "filter"});
+ tester.output("depthwise");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFDepthwiseConv2dNative *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->padding(), "VALID");
+ ASSERT_EQ(test_node->data_layout(), "NHWC");
+ ASSERT_EQ(test_node->strides().size(), 4);
+}
diff --git a/compiler/moco/import/src/Nodes/FakeQuantWithMinMaxVars.cpp b/compiler/moco/import/src/Nodes/FakeQuantWithMinMaxVars.cpp
new file mode 100644
index 000000000..d2fa3d1eb
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/FakeQuantWithMinMaxVars.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/FakeQuantWithMinMaxVars.h"
+
+#include <moco/IR/Nodes/TFFakeQuantWithMinMaxVars.h>
+
+#include <moco/Names.h>
+
+#include "Convert.h"
+
+#include <plier/tf/Convert.h>
+#include <loco/IR/PermutingCodec.h>
+#include <stdex/Memory.h>
+
+#include <cassert>
+
+using namespace plier::tf;
+
+namespace
+{
+using namespace moco;
+
+class TFFakeQuantWithMinMaxVarsGraphUpdate final : public GraphUpdate
+{
+public:
+ TFFakeQuantWithMinMaxVarsGraphUpdate(TFFakeQuantWithMinMaxVars *node,
+ std::vector<TensorName> names)
+ : _node(node), _names(names)
+ {
+ }
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFFakeQuantWithMinMaxVars *_node;
+ std::vector<TensorName> _names;
+};
+
+void TFFakeQuantWithMinMaxVarsGraphUpdate::input(const SymbolTable *node_table) const
+{
+ assert(_names.size() == 3);
+
+ auto inputs_node = node_table->node(_names[0]);
+ auto min_node = node_table->node(_names[1]);
+ auto max_node = node_table->node(_names[2]);
+ assert(inputs_node != nullptr);
+ assert(min_node != nullptr);
+ assert(max_node != nullptr);
+
+ _node->inputs(inputs_node);
+ _node->min(min_node);
+ _node->max(max_node);
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool FakeQuantWithMinMaxVarsGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ if (node.input_size() != 3)
+ return false;
+
+ // attrs "narrow_range", "num_bits" are optional
+ return true;
+}
+
+void FakeQuantWithMinMaxVarsGraphBuilder::build(const tensorflow::NodeDef &node,
+ GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ auto fakequant_node = graph->nodes()->create<TFFakeQuantWithMinMaxVars>();
+ fakequant_node->name(node.name());
+
+ // read optional attributes
+ if (has_attr(node, "num_bits"))
+ {
+ auto num_bits = get_int_attr(node, "num_bits");
+ fakequant_node->num_bits(num_bits);
+ }
+ if (has_attr(node, "narrow_range"))
+ {
+ auto narrow_range = get_bool_attr(node, "narrow_range");
+ fakequant_node->narrow_range(narrow_range);
+ }
+
+ // save the name for graph link updates
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, fakequant_node);
+
+ std::vector<TensorName> input_names;
+ input_names.push_back(TensorName(node.input(0))); // inputs
+ input_names.push_back(TensorName(node.input(1))); // min
+ input_names.push_back(TensorName(node.input(2))); // max
+
+ // Record ifm inputs to featureEncode_node
+ auto tffakequant_update =
+ stdex::make_unique<TFFakeQuantWithMinMaxVarsGraphUpdate>(fakequant_node, input_names);
+
+ updates->enroll(std::move(tffakequant_update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/FakeQuantWithMinMaxVars.test.cpp b/compiler/moco/import/src/Nodes/FakeQuantWithMinMaxVars.test.cpp
new file mode 100644
index 000000000..40c494bb0
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/FakeQuantWithMinMaxVars.test.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/FakeQuantWithMinMaxVars.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *fakequant_01_pbtxtdata = STRING_CONTENT(
+ name: "FakeQuant"
+ op: "FakeQuantWithMinMaxVars"
+ input: "Input"
+ input: "FakeMin"
+ input: "FakeMax"
+ attr {
+ key: "narrow_range"
+ value { b: true }
+ }
+ attr {
+ key: "num_bits"
+ value { i: 16 }
+ }
+);
+// clang-format on
+} // namespace
+
+TEST(TensorFlowImport, FakeQuantWithMinMaxVars)
+{
+ TFNodeBuildTester tester;
+ moco::FakeQuantWithMinMaxVarsGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(fakequant_01_pbtxtdata, nodedef));
+
+ // what to test:
+ // - All node inputs are valid
+ // - All attributes are as expected
+
+ tester.inputs({"Input", "FakeMin", "FakeMax"});
+ tester.output("FakeQuant");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFFakeQuantWithMinMaxVars *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->narrow_range(), true);
+ ASSERT_EQ(test_node->num_bits(), 16);
+}
diff --git a/compiler/moco/import/src/Nodes/FusedBatchNorm.cpp b/compiler/moco/import/src/Nodes/FusedBatchNorm.cpp
new file mode 100644
index 000000000..59f98017c
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/FusedBatchNorm.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/FusedBatchNorm.h"
+
+#include <moco/IR/Nodes/TFFusedBatchNorm.h>
+
+#include <loco.h>
+#include <stdex/Memory.h>
+#include <plier/tf/Convert.h>
+
+namespace
+{
+
+using namespace moco;
+
+/**
+ * @brief GraphUpdate for FusedBatchNorm node
+ */
+class FusedBatchNormGraphUpdate final : public GraphUpdate
+{
+public:
+ FusedBatchNormGraphUpdate(TFFusedBatchNorm *node, std::vector<TensorName> names)
+ : _node(node), _names(names)
+ {
+ }
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFFusedBatchNorm *_node;
+ std::vector<TensorName> _names;
+};
+
+void FusedBatchNormGraphUpdate::input(const SymbolTable *tensor_names) const
+{
+ assert(_names.size() == 5);
+
+ _node->x(tensor_names->node(_names[0]));
+ _node->scale(tensor_names->node(_names[1]));
+ _node->offset(tensor_names->node(_names[2]));
+ _node->mean(tensor_names->node(_names[3]));
+ _node->variance(tensor_names->node(_names[4]));
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool FusedBatchNormGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ if (node.input_size() != 5)
+ return false;
+
+ return plier::tf::has_attrs(node, {"epsilon"});
+}
+
+void FusedBatchNormGraphBuilder::build(const tensorflow::NodeDef &node,
+ GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ float epsilon = plier::tf::get_float_attr(node, "epsilon");
+
+ // creating TF dialect FusedBatchNorm node
+ auto tf_fbn = graph->nodes()->create<TFFusedBatchNorm>();
+ tf_fbn->name(node.name());
+ tf_fbn->epsilon(epsilon);
+
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, tf_fbn);
+
+ std::vector<TensorName> fbn_input_names;
+ fbn_input_names.push_back(TensorName(node.input(0))); // input
+ fbn_input_names.push_back(TensorName(node.input(1))); // scale
+ fbn_input_names.push_back(TensorName(node.input(2))); // offset
+ fbn_input_names.push_back(TensorName(node.input(3))); // mean
+ fbn_input_names.push_back(TensorName(node.input(4))); // variance
+
+ auto tf_fbn_update = stdex::make_unique<FusedBatchNormGraphUpdate>(tf_fbn, fbn_input_names);
+ updates->enroll(std::move(tf_fbn_update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/FusedBatchNorm.test.cpp b/compiler/moco/import/src/Nodes/FusedBatchNorm.test.cpp
new file mode 100644
index 000000000..0f2e037b8
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/FusedBatchNorm.test.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/FusedBatchNorm.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *fbn_basic_pbtxt = STRING_CONTENT(
+ name: "FBN_01"
+ op: "FusedBatchNorm"
+ input: "input"
+ input: "gamma"
+ input: "beta"
+ input: "FBN_01/mean"
+ input: "FBN_01/variance"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+ attr {
+ key: "epsilon"
+ value {
+ f: 0.001
+ }
+ }
+ attr {
+ key: "is_training"
+ value {
+ b: false
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, tf_fbn_basic)
+{
+ TFNodeBuildTester tester;
+ moco::FusedBatchNormGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(fbn_basic_pbtxt, nodedef));
+
+ // what to test:
+ // - there should exist a TFFusedBatchNorm
+ // - input() should not be nullptr
+ // - gamma() should not be nullptr
+ // - beta() should not be nullptr
+ // - mean() should not be nullptr
+ // - variance() should not be nullptr
+ // - epsilon() value should match
+
+ tester.inputs({"input", "gamma", "beta", "FBN_01/mean", "FBN_01/variance"});
+ tester.output("FBN_01");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFFusedBatchNorm *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->epsilon(), 0.001f);
+}
diff --git a/compiler/moco/import/src/Nodes/Identity.cpp b/compiler/moco/import/src/Nodes/Identity.cpp
new file mode 100644
index 000000000..8ca0e2d01
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Identity.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Identity.h"
+
+#include <moco/IR/Nodes/TFIdentity.h>
+
+#include <moco/Names.h>
+#include <loco.h>
+#include <stdex/Memory.h>
+
+#include <vector>
+
+namespace
+{
+
+using namespace moco;
+
+class TFIdentityGraphUpdate final : public GraphUpdate
+{
+public:
+ TFIdentityGraphUpdate(TFIdentity *node, const std::vector<TensorName> &names)
+ : _node(node), _names(names)
+ {
+ }
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFIdentity *_node;
+ const std::vector<TensorName> _names;
+};
+
+void TFIdentityGraphUpdate::input(const SymbolTable *tensor_names) const
+{
+ for (auto &name : _names)
+ {
+ loco::Node *target = tensor_names->node(name);
+ _node->input(target);
+ }
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool IdentityGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ if (node.input_size() < 1) // from TensorFlow lite toco
+ return false;
+
+ return true;
+}
+
+void IdentityGraphBuilder::build(const tensorflow::NodeDef &node,
+ GraphBuilderContext *context) const
+{
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // Create a Identity node
+ auto identity_node = graph->nodes()->create<TFIdentity>();
+ identity_node->name(node.name());
+
+ // register string-name to node
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, identity_node);
+
+ // Queue node input update
+ // TODO: Check if we really need multiple input handlings
+ std::vector<TensorName> names;
+ for (int i = 0; i < node.input_size(); ++i)
+ {
+ names.emplace_back(TensorName(node.input(i)));
+ }
+ auto update = stdex::make_unique<TFIdentityGraphUpdate>(identity_node, names);
+ updates->enroll(std::move(update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/MaxPool.cpp b/compiler/moco/import/src/Nodes/MaxPool.cpp
new file mode 100644
index 000000000..63275a3b8
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/MaxPool.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/MaxPool.h"
+
+#include <moco/IR/Nodes/TFMaxPool.h>
+
+#include <moco/Names.h>
+
+#include "Convert.h"
+
+#include <loco.h>
+#include <loco/IR/PermutingCodec.h>
+#include <stdex/Memory.h>
+#include <plier/tf/Convert.h>
+#include <oops/UserExn.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+using namespace moco;
+
+class TFMaxPoolGraphUpdate final : public GraphUpdate
+{
+public:
+ TFMaxPoolGraphUpdate(TFMaxPool *node, const TensorName &name)
+ : _maxpool_node(node), _input_name(name)
+ {
+ }
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFMaxPool *_maxpool_node;
+ const TensorName _input_name;
+};
+
+void TFMaxPoolGraphUpdate::input(const SymbolTable *node_table) const
+{
+ loco::Node *input_node = node_table->node(_input_name);
+ _maxpool_node->input(input_node);
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool MaxPoolGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ // note: even though "data_format" is not entered when a model is written,
+ // TF seems to generate "data_format" field into a pb file
+ if (!plier::tf::has_attrs(node, {"T", "data_format", "ksize", "padding", "strides"}))
+ return false;
+
+ auto data_layout = plier::tf::get_string_attr(node, "data_format");
+ if (!(data_layout == "NHWC" || data_layout == "NCHW"))
+ {
+ throw oops::UserExn("MaxPool Unsupported data_format", node.name());
+ }
+
+ auto tf_ksize = plier::tf::get_list_attr(node, "ksize");
+ auto ksize = plier::tf::as_int64_list(tf_ksize);
+ if (ksize.size() != 4)
+ {
+ // TODO support ksize length for 1 and 2
+ throw oops::UserExn("MaxPool ksize requires rank 4", node.name());
+ }
+
+ auto tf_strides = plier::tf::get_list_attr(node, "strides");
+ auto strides = plier::tf::as_int64_list(tf_strides);
+ if (strides.size() != 4)
+ {
+ // TODO support strides length for 1 and 2
+ throw oops::UserExn("MaxPool strides requires rank 4", node.name());
+ }
+
+ return true;
+}
+
+void MaxPoolGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // name of loco nodes
+ ::std::string node_name = node.name();
+
+ // tensorflow data_format: one of NHWC or NCHW.
+ auto data_layout = plier::tf::get_string_attr(node, "data_format");
+ auto maxPool_node = graph->nodes()->create<TFMaxPool>();
+ maxPool_node->name(node.name());
+ maxPool_node->data_layout(data_layout);
+
+ // padding
+ auto padding = moco::str_toupper(plier::tf::get_string_attr(node, "padding"));
+ maxPool_node->padding(padding);
+
+ // ksize
+ auto tf_ksize = plier::tf::get_list_attr(node, "ksize");
+ auto ksize = plier::tf::as_int64_list(tf_ksize);
+ assert(ksize.size() == 4);
+ maxPool_node->ksize(ksize);
+
+ // strides
+ auto tf_strides = plier::tf::get_list_attr(node, "strides");
+ auto strides = plier::tf::as_int64_list(tf_strides);
+ assert(strides.size() == 4);
+ maxPool_node->strides(strides);
+
+ // To set the input node of encode_node with node_name
+ TensorName output_name(node_name, 0);
+ tensor_names->enroll(output_name, maxPool_node);
+
+ // Record ifm inputs to featureEncode_node
+ auto update = stdex::make_unique<TFMaxPoolGraphUpdate>(maxPool_node, TensorName(node.input(0)));
+
+ updates->enroll(std::move(update));
+}
+
+} // namespace moco
+
+// TODO Consider a case when TF MaxPool is for 3D.
+// MaxPool works for 2D and other Dimensions, such as 3D
+// So, in future, some other GraphBuilder decide if MaxPoolGraphBuilder is used or
+// other GraphBuilder is used for TF MaxPool
diff --git a/compiler/moco/import/src/Nodes/MaxPool.test.cpp b/compiler/moco/import/src/Nodes/MaxPool.test.cpp
new file mode 100644
index 000000000..a85e2027b
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/MaxPool.test.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/MaxPool.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *maxpool_01_pbtxtdata = STRING_CONTENT(
+ name: "maxpool"
+ op: "MaxPool"
+ input: "const/float"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+ attr {
+ key: "ksize"
+ value {
+ list {
+ i: 1
+ i: 2
+ i: 3
+ i: 1
+ }
+ }
+ }
+ attr {
+ key: "padding"
+ value {
+ s: "VALID"
+ }
+ }
+ attr {
+ key: "strides"
+ value {
+ list {
+ i: 1
+ i: 3
+ i: 2
+ i: 1
+ }
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, MaxPool_01)
+{
+ TFNodeBuildTester tester;
+ moco::MaxPoolGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(maxpool_01_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFMaxPool
+ // - attributes value should match
+
+ tester.inputs({"const/float"});
+ tester.output("maxpool");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFMaxPool *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->data_layout(), "NHWC");
+ ASSERT_EQ(test_node->padding(), "VALID");
+ ASSERT_EQ(test_node->ksize(), std::vector<int64_t>({1, 2, 3, 1}));
+ ASSERT_EQ(test_node->strides(), std::vector<int64_t>({1, 3, 2, 1}));
+}
diff --git a/compiler/moco/import/src/Nodes/Maximum.cpp b/compiler/moco/import/src/Nodes/Maximum.cpp
new file mode 100644
index 000000000..43bbbabe6
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Maximum.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Maximum.h"
+
+#include <moco/IR/Nodes/TFMaximum.h>
+
+#include <loco.h>
+#include <stdex/Memory.h>
+
+namespace
+{
+
+using namespace moco;
+
+/**
+ * @brief GraphUpdate for TF Maximum node
+ */
+class TFMaximumGraphUpdate final : public GraphUpdate
+{
+public:
+ TFMaximumGraphUpdate(TFMaximum *node, std::vector<TensorName> names) : _node(node), _names(names)
+ {
+ }
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFMaximum *_node;
+ std::vector<TensorName> _names;
+};
+
+void TFMaximumGraphUpdate::input(const SymbolTable *tensor_names) const
+{
+ assert(_names.size() == 2);
+
+ _node->x(tensor_names->node(_names[0]));
+ _node->y(tensor_names->node(_names[1]));
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool MaximumGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ return node.input_size() == 2;
+}
+
+void MaximumGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // creating TF dialect Maximum node
+ auto tf_maximum = graph->nodes()->create<TFMaximum>();
+ tf_maximum->name(node.name());
+
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, tf_maximum);
+
+ std::vector<TensorName> add_input_names;
+ add_input_names.push_back(TensorName(node.input(0))); // x
+ add_input_names.push_back(TensorName(node.input(1))); // y
+
+ auto tf_maximum_update = stdex::make_unique<TFMaximumGraphUpdate>(tf_maximum, add_input_names);
+ updates->enroll(std::move(tf_maximum_update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Maximum.test.cpp b/compiler/moco/import/src/Nodes/Maximum.test.cpp
new file mode 100644
index 000000000..2a8b63622
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Maximum.test.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Maximum.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *maximum_basic_pbtxt = STRING_CONTENT(
+ name: "MAXIMUM_01"
+ op: "Maximum"
+ input: "input_01"
+ input: "input_02"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, tf_maximum_basic)
+{
+ TFNodeBuildTester tester;
+ moco::MaximumGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(maximum_basic_pbtxt, nodedef));
+
+ // what to test:
+ // - TFMaximum node should exist
+ // - both inputs x() and y() should not be null
+
+ tester.inputs({"input_01", "input_02"});
+ tester.output("MAXIMUM_01");
+ tester.run(nodedef, graphbuilder);
+}
diff --git a/compiler/moco/import/src/Nodes/Mean.cpp b/compiler/moco/import/src/Nodes/Mean.cpp
new file mode 100644
index 000000000..30fb0f1f7
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Mean.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Mean.h"
+
+#include <moco/IR/Nodes/TFMean.h>
+
+#include <loco.h>
+#include <stdex/Memory.h>
+#include <plier/tf/Convert.h>
+
+namespace
+{
+using namespace moco;
+
+/**
+ * @brief GraphUpdate for Mean node
+ */
+class MeanGraphUpdate final : public GraphUpdate
+{
+public:
+ MeanGraphUpdate(TFMean *node, const TensorName &&input_name,
+ const TensorName &&reduction_indices_name)
+ : _node(node), _input_name(input_name), _reduction_indices_name(reduction_indices_name)
+ {
+ // DO NOTHING
+ }
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFMean *_node;
+ const TensorName _input_name;
+ const TensorName _reduction_indices_name;
+};
+
+void MeanGraphUpdate::input(const SymbolTable *table) const
+{
+ loco::Node *input_node = table->node(_input_name);
+ loco::Node *reduction_indices_node = table->node(_reduction_indices_name);
+ _node->input(input_node);
+ _node->reduction_indices(reduction_indices_node);
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool MeanGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ if (node.input_size() != 2)
+ return false;
+
+ if (!plier::tf::has_attrs(node, {"T", "Tidx", "keep_dims"}))
+ return false;
+
+ auto dtype = plier::tf::get_datatype_attr(node, "Tidx");
+ if (dtype != tensorflow::DataType::DT_INT32 && dtype != tensorflow::DataType::DT_INT64)
+ return false;
+
+ return true;
+}
+
+void MeanGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // creating TF dialect Mean node
+ auto tf_mean = graph->nodes()->create<TFMean>();
+ tf_mean->name(node.name());
+ tf_mean->keep_dims(plier::tf::get_bool_attr(node, "keep_dims"));
+
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, tf_mean);
+
+ auto update = stdex::make_unique<MeanGraphUpdate>(tf_mean, TensorName(node.input(0)),
+ TensorName(node.input(1)));
+ updates->enroll(std::move(update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Mean.test.cpp b/compiler/moco/import/src/Nodes/Mean.test.cpp
new file mode 100644
index 000000000..6321fad16
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Mean.test.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Mean.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+
+// clang-format off
+const char *mean_true_pbtxtdata = STRING_CONTENT(
+ name: "Mean"
+ op: "Mean"
+ input: "Placeholder"
+ input: "Const"
+ attr {
+ key: "T"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "Tidx"
+ value { type: DT_INT32 }
+ }
+ attr {
+ key: "keep_dims"
+ value { b: true }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, mean_true)
+{
+ TFNodeBuildTester tester;
+ moco::MeanGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(mean_true_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFMean
+ // - input node should not be nullptr
+ // - reduction_indeces node should not be nullptr
+ // - keep_dims attribute is set same as pbtxt
+
+ tester.inputs({"Placeholder", "Const"});
+ tester.output("Mean");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFMean *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->keep_dims(), true);
+}
+
+namespace
+{
+
+// clang-format off
+const char *mean_false_pbtxtdata = STRING_CONTENT(
+ name: "Mean"
+ op: "Mean"
+ input: "Placeholder"
+ input: "Const"
+ attr {
+ key: "T"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "Tidx"
+ value { type: DT_INT32 }
+ }
+ attr {
+ key: "keep_dims"
+ value { b: false }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, mean_false)
+{
+ TFNodeBuildTester tester;
+ moco::MeanGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(mean_false_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFMean
+ // - input node should not be nullptr
+ // - reduction_indeces node should not be nullptr
+ // - keep_dims attribute is set same as pbtxt
+
+ tester.inputs({"Placeholder", "Const"});
+ tester.output("Mean");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFMean *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->keep_dims(), false);
+}
diff --git a/compiler/moco/import/src/Nodes/Mul.cpp b/compiler/moco/import/src/Nodes/Mul.cpp
new file mode 100644
index 000000000..ab926b59e
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Mul.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Mul.h"
+
+#include <moco/IR/Nodes/TFMul.h>
+
+#include <loco.h>
+#include <stdex/Memory.h>
+
+namespace
+{
+
+using namespace moco;
+
+/**
+ * @brief GraphUpdate for TF Mul node
+ */
+class TFMulGraphUpdate final : public GraphUpdate
+{
+public:
+ TFMulGraphUpdate(TFMul *node, std::vector<TensorName> names) : _node(node), _names(names) {}
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFMul *_node;
+ std::vector<TensorName> _names;
+};
+
+void TFMulGraphUpdate::input(const SymbolTable *tensor_names) const
+{
+ assert(_names.size() == 2);
+
+ _node->x(tensor_names->node(_names[0]));
+ _node->y(tensor_names->node(_names[1]));
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool MulGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ return node.input_size() == 2;
+}
+
+void MulGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // creating TF dialect Mul node
+ auto tf_mul = graph->nodes()->create<TFMul>();
+ tf_mul->name(node.name());
+
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, tf_mul);
+
+ std::vector<TensorName> add_input_names;
+ add_input_names.push_back(TensorName(node.input(0))); // x
+ add_input_names.push_back(TensorName(node.input(1))); // y
+
+ auto tf_mul_update = stdex::make_unique<TFMulGraphUpdate>(tf_mul, add_input_names);
+ updates->enroll(std::move(tf_mul_update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Mul.test.cpp b/compiler/moco/import/src/Nodes/Mul.test.cpp
new file mode 100644
index 000000000..92730b377
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Mul.test.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Mul.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *mul_basic_pbtxt = STRING_CONTENT(
+ name: "MUL_01"
+ op: "Mul"
+ input: "input_01"
+ input: "input_02"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, tf_mul_basic)
+{
+ TFNodeBuildTester tester;
+ moco::MulGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(mul_basic_pbtxt, nodedef));
+
+ // what to test:
+ // - TFMul node should exist
+ // - both inputs x() and y() should not be null
+
+ tester.inputs({"input_01", "input_02"});
+ tester.output("MUL_01");
+ tester.run(nodedef, graphbuilder);
+}
diff --git a/compiler/moco/import/src/Nodes/Pack.cpp b/compiler/moco/import/src/Nodes/Pack.cpp
new file mode 100644
index 000000000..45815a30e
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Pack.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Pack.h"
+
+#include <moco/IR/Nodes/TFPack.h>
+#include <moco/IR/Nodes/TFConst.h>
+
+#include <moco/Names.h>
+
+#include <loco.h>
+#include <loco/IR/NodeShape.h>
+#include <stdex/Memory.h>
+#include <plier/tf/Convert.h>
+
+#include <cassert>
+
+namespace
+{
+
+using namespace moco;
+
+class TFPackGraphUpdate final : public GraphUpdate
+{
+public:
+ TFPackGraphUpdate(TFPack *node, std::vector<TensorName> names) : _node(node), _names(names) {}
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFPack *_node;
+ std::vector<TensorName> _names;
+};
+
+void TFPackGraphUpdate::input(const SymbolTable *tensor_names) const
+{
+ uint32_t num_values = _names.size();
+ assert(num_values >= 1);
+
+ for (uint32_t i = 0; i < num_values; ++i)
+ {
+ auto input_node = tensor_names->node(_names[i]);
+ assert(input_node != nullptr);
+ _node->values(i, input_node);
+ }
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool PackGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ if (!plier::tf::has_attrs(node, {"T", "N", "axis"}))
+ return false;
+
+ const int num_inputs = node.input_size();
+ return (num_inputs >= 1) && (num_inputs == plier::tf::get_int_attr(node, "N"));
+}
+
+void PackGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ auto graph = context->graph();
+ auto tensor_names = context->tensor_names();
+ auto updates = context->updates();
+
+ const int num_inputs = node.input_size();
+ std::vector<TensorName> input_names;
+ auto pack_node = graph->nodes()->create<TFPack>(num_inputs);
+ pack_node->name(node.name());
+
+ for (int ni = 0; ni < num_inputs; ++ni)
+ {
+ input_names.push_back(TensorName(node.input(ni)));
+ }
+
+ pack_node->axis(plier::tf::get_int_attr(node, "axis"));
+
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, pack_node);
+
+ auto update = stdex::make_unique<TFPackGraphUpdate>(pack_node, input_names);
+ updates->enroll(std::move(update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Pack.test.cpp b/compiler/moco/import/src/Nodes/Pack.test.cpp
new file mode 100644
index 000000000..01774a906
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Pack.test.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Pack.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+
+// clang-format off
+const char *pack_01_pbtxtdata = STRING_CONTENT(
+ name: "Pack"
+ op: "Pack"
+ input: "input_1"
+ input: "input_2"
+ input: "input_3"
+ input: "input_4"
+ attr {
+ key: "N"
+ value {
+ i: 4
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "axis"
+ value {
+ i: 0
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, tf_pack_basic)
+{
+ TFNodeBuildTester tester;
+ moco::PackGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(pack_01_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFPack
+ // - there should be four values
+ // - values(idx) should not be nullptr
+ // - axis() should be 0
+
+ tester.inputs({"input_1", "input_2", "input_3", "input_4"});
+ tester.output("Pack");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFPack *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->N(), 4);
+ ASSERT_NE(test_node->values(0), nullptr);
+ ASSERT_NE(test_node->values(1), nullptr);
+ ASSERT_NE(test_node->values(2), nullptr);
+ ASSERT_NE(test_node->values(3), nullptr);
+ ASSERT_EQ(test_node->axis(), 0);
+}
diff --git a/compiler/moco/import/src/Nodes/Pad.cpp b/compiler/moco/import/src/Nodes/Pad.cpp
new file mode 100644
index 000000000..262a68fa0
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Pad.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Pad.h"
+
+#include <moco/IR/Nodes/TFPad.h>
+
+#include <loco.h>
+#include <stdex/Memory.h>
+#include <plier/tf/Convert.h>
+
+namespace
+{
+
+using namespace moco;
+
+/**
+ * @brief GraphUpdate for TF Pad node
+ */
+class TFPadGraphUpdate final : public GraphUpdate
+{
+public:
+ TFPadGraphUpdate(TFPad *node, std::vector<TensorName> names) : _node(node), _names(names) {}
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFPad *_node;
+ std::vector<TensorName> _names;
+};
+
+void TFPadGraphUpdate::input(const SymbolTable *table) const
+{
+ assert(_names.size() == 2);
+
+ _node->input(table->node(_names[0]));
+ _node->paddings(table->node(_names[1]));
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool PadGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ if (node.input_size() != 2)
+ return false;
+
+ return plier::tf::has_attrs(node, {"T", "Tpaddings"});
+}
+
+void PadGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // creating TF dialect Pad node
+ auto tf_pad = graph->nodes()->create<TFPad>();
+ tf_pad->name(node.name());
+
+ // register string-name to node
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, tf_pad);
+
+ std::vector<TensorName> add_input_names;
+ add_input_names.push_back(TensorName(node.input(0))); // input
+ add_input_names.push_back(TensorName(node.input(1))); // paddings
+
+ // Queue node input update
+ auto tf_pad_update = stdex::make_unique<TFPadGraphUpdate>(tf_pad, add_input_names);
+ updates->enroll(std::move(tf_pad_update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Pad.test.cpp b/compiler/moco/import/src/Nodes/Pad.test.cpp
new file mode 100644
index 000000000..19769cf6b
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Pad.test.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Pad.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *pad_basic_pbtxt = STRING_CONTENT(
+ name: "Pad"
+ op: "Pad"
+ input: "input"
+ input: "paddings"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tpaddings"
+ value {
+ type: DT_INT32
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, tf_pad_basic)
+{
+ TFNodeBuildTester tester;
+ moco::PadGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(pad_basic_pbtxt, nodedef));
+
+ // what to test:
+ // - TFPad node should exist
+ // - input input() should not be null
+ // - input paddings() should not be null
+
+ tester.inputs({"input", "paddings"});
+ tester.output("Pad");
+ tester.run(nodedef, graphbuilder);
+}
diff --git a/compiler/moco/import/src/Nodes/Placeholder.cpp b/compiler/moco/import/src/Nodes/Placeholder.cpp
new file mode 100644
index 000000000..0033f664b
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Placeholder.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Placeholder.h"
+
+#include <moco/IR/Nodes/TFPlaceholder.h>
+
+#include <moco/Names.h>
+#include <plier/tf/Convert.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace moco
+{
+
+bool PlaceholderGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ if (!plier::tf::has_attrs(node, {"dtype", "shape"}))
+ return false;
+
+ loco::DataType dtype = plier::tf::as_loco_datatype(plier::tf::get_datatype_attr(node, "dtype"));
+ if (dtype != loco::DataType::FLOAT32)
+ return false;
+ // TODO support other types
+
+ return true;
+}
+
+void PlaceholderGraphBuilder::build(const tensorflow::NodeDef &node,
+ GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+
+ loco::DataType dtype = plier::tf::as_loco_datatype(plier::tf::get_datatype_attr(node, "dtype"));
+ const auto &shape = plier::tf::get_shape_attr(node, "shape");
+ // TODO handle for unknown rank
+ assert(!shape.unknown_rank());
+ int64_t num_dims = shape.dim_size();
+
+ // TODO support other types
+ assert(dtype == loco::DataType::FLOAT32);
+
+ // Create a "Placeholder" node as an input
+ auto placeholder_node = graph->nodes()->create<moco::TFPlaceholder>();
+ placeholder_node->name(node.name());
+ placeholder_node->dtype(dtype);
+
+ // Setting shape info.
+ placeholder_node->rank(num_dims);
+ for (int64_t d = 0; d < num_dims; d++)
+ {
+ assert(shape.dim(d).size() < std::numeric_limits<uint32_t>::max());
+ int64_t dim_value = shape.dim(d).size();
+ if (dim_value >= 0)
+ {
+ uint32_t dim_value32 = static_cast<uint32_t>(dim_value);
+ placeholder_node->dim(d) = dim_value32;
+ }
+ else
+ {
+ placeholder_node->dim(d).unset();
+ // TODO Remove assert() and do implement
+ // NOTE Current implementation assumes dim is all know
+ assert(false);
+ }
+ }
+
+ // register string-name to node
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, placeholder_node);
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Placeholder.test.cpp b/compiler/moco/import/src/Nodes/Placeholder.test.cpp
new file mode 100644
index 000000000..80488ce39
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Placeholder.test.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Placeholder.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *known_batch_pbtxt = STRING_CONTENT(
+ name: "placeholder"
+ op: "Placeholder"
+ attr {
+ key: "dtype" value { type: DT_FLOAT }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim { size: 1024 }
+ dim { size: 2 }
+ dim { size: 3 }
+ dim { size: 4 }
+ }
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, placeholder_knwon_batch)
+{
+ TFNodeBuildTester tester;
+ moco::PlaceholderGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(known_batch_pbtxt, nodedef));
+
+ // what to test:
+ // - TFPlaceholder node should exist
+ // - shape attribute should match
+
+ tester.inputs({});
+ tester.output("placeholder");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFPlaceholder *>(tester.output());
+ assert(test_node != nullptr);
+ ASSERT_TRUE(test_node->dim(0).known() && test_node->dim(0).value() == 1024);
+ ASSERT_TRUE(test_node->dim(1).known() && test_node->dim(1).value() == 2);
+ ASSERT_TRUE(test_node->dim(2).known() && test_node->dim(2).value() == 3);
+ ASSERT_TRUE(test_node->dim(3).known() && test_node->dim(3).value() == 4);
+}
diff --git a/compiler/moco/import/src/Nodes/RealDiv.cpp b/compiler/moco/import/src/Nodes/RealDiv.cpp
new file mode 100644
index 000000000..de3d57673
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/RealDiv.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/RealDiv.h"
+
+#include <moco/IR/Nodes/TFRealDiv.h>
+
+#include <loco.h>
+#include <stdex/Memory.h>
+
+namespace
+{
+
+using namespace moco;
+
+/**
+ * @brief GraphUpdate for TF RealDiv node
+ */
+class TFRealDivGraphUpdate final : public GraphUpdate
+{
+public:
+ TFRealDivGraphUpdate(TFRealDiv *node, std::vector<TensorName> names) : _node(node), _names(names)
+ {
+ }
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFRealDiv *_node;
+ std::vector<TensorName> _names;
+};
+
+void TFRealDivGraphUpdate::input(const SymbolTable *tensor_names) const
+{
+ assert(_names.size() == 2);
+
+ _node->x(tensor_names->node(_names[0]));
+ _node->y(tensor_names->node(_names[1]));
+}
+
+} // namespace
+
+namespace moco
+{
+bool RealDivGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ return node.input_size() == 2;
+}
+
+void RealDivGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // creating TF dialect RealDiv node
+ auto tf_div = graph->nodes()->create<TFRealDiv>();
+ tf_div->name(node.name());
+
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, tf_div);
+
+ std::vector<TensorName> div_input_names;
+ div_input_names.push_back(TensorName(node.input(0))); // x
+ div_input_names.push_back(TensorName(node.input(1))); // y
+
+ auto tf_div_update = stdex::make_unique<TFRealDivGraphUpdate>(tf_div, div_input_names);
+ updates->enroll(std::move(tf_div_update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/RealDiv.test.cpp b/compiler/moco/import/src/Nodes/RealDiv.test.cpp
new file mode 100644
index 000000000..cda2d3738
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/RealDiv.test.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/RealDiv.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *div_basic_pbtxt = STRING_CONTENT(
+ name: "DIV_01"
+ op: "RealDiv"
+ input: "input_01"
+ input: "input_02"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, tf_div_basic)
+{
+ TFNodeBuildTester tester;
+ moco::RealDivGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(div_basic_pbtxt, nodedef));
+
+ // what to test:
+ // - TFRealDiv node should exist
+ // - both inputs x() and y() should not be null
+
+ tester.inputs({"input_01", "input_02"});
+ tester.output("DIV_01");
+ tester.run(nodedef, graphbuilder);
+}
diff --git a/compiler/moco/import/src/Nodes/Relu.cpp b/compiler/moco/import/src/Nodes/Relu.cpp
new file mode 100644
index 000000000..eedc8155d
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Relu.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Relu.h"
+
+#include <moco/IR/Nodes/TFRelu.h>
+
+#include <moco/Names.h>
+#include <loco.h>
+#include <stdex/Memory.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+using namespace moco;
+
+class TFReluGraphUpdate final : public GraphUpdate
+{
+public:
+ TFReluGraphUpdate(TFRelu *node, const TensorName &&name) : _node(node), _name(name) {}
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFRelu *_node;
+ const TensorName _name;
+};
+
+void TFReluGraphUpdate::input(const SymbolTable *table) const
+{
+ loco::Node *target = table->node(_name);
+ _node->features(target);
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool ReluGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ // ReLU node SHOULD have only one input
+ if (node.input_size() != 1)
+ return false;
+
+ return true;
+}
+
+void ReluGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // Create a "TFRelu" node for Relu
+ auto relu_node = graph->nodes()->create<TFRelu>();
+ relu_node->name(node.name());
+
+ // register string-name to node
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, relu_node);
+
+ // Queue node input update
+ auto update = stdex::make_unique<TFReluGraphUpdate>(relu_node, TensorName(node.input(0)));
+ updates->enroll(std::move(update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Relu.test.cpp b/compiler/moco/import/src/Nodes/Relu.test.cpp
new file mode 100644
index 000000000..a20ee081d
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Relu.test.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Relu.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+
+// clang-format off
+const char *relu_01_pbtxtdata = STRING_CONTENT(
+ name: "ReLU"
+ op: "Relu"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, relu_01)
+{
+ TFNodeBuildTester tester;
+ moco::ReluGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(relu_01_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFRelu
+ // - features node should not be nullptr
+
+ tester.inputs({"Placeholder"});
+ tester.output("ReLU");
+ tester.run(nodedef, graphbuilder);
+}
diff --git a/compiler/moco/import/src/Nodes/Relu6.cpp b/compiler/moco/import/src/Nodes/Relu6.cpp
new file mode 100644
index 000000000..4700ba408
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Relu6.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Relu6.h"
+
+#include <moco/IR/Nodes/TFRelu6.h>
+
+#include <stdex/Memory.h>
+
+namespace
+{
+
+using namespace moco;
+
+class TFRelu6GraphUpdate final : public GraphUpdate
+{
+public:
+ TFRelu6GraphUpdate(TFRelu6 *node, const TensorName &&name) : _node(node), _name(name) {}
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFRelu6 *_node;
+ const TensorName _name;
+};
+
+void TFRelu6GraphUpdate::input(const SymbolTable *table) const
+{
+ loco::Node *target = table->node(_name);
+ _node->features(target);
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool Relu6GraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ // ReLU6 node SHOULD have only one input
+ if (node.input_size() != 1)
+ return false;
+ return true;
+}
+
+void Relu6GraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // Create a "TFRelu6" node for Relu
+ auto relu_node = graph->nodes()->create<TFRelu6>();
+ relu_node->name(node.name());
+
+ // register string-name to node
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, relu_node);
+
+ // Queue node input update
+ auto update = stdex::make_unique<TFRelu6GraphUpdate>(relu_node, TensorName(node.input(0)));
+ updates->enroll(std::move(update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Relu6.test.cpp b/compiler/moco/import/src/Nodes/Relu6.test.cpp
new file mode 100644
index 000000000..26beb6c17
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Relu6.test.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Relu6.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+
+// clang-format off
+const char *relu6_01_pbtxtdata = STRING_CONTENT(
+ name: "ReLU6"
+ op: "Relu6"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, relu6_01)
+{
+ TFNodeBuildTester tester;
+ moco::Relu6GraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(relu6_01_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFRelu6
+ // - features node should not be null
+
+ tester.inputs({"Placeholder"});
+ tester.output("ReLU6");
+ tester.run(nodedef, graphbuilder);
+}
diff --git a/compiler/moco/import/src/Nodes/Reshape.cpp b/compiler/moco/import/src/Nodes/Reshape.cpp
new file mode 100644
index 000000000..26e22513f
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Reshape.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Reshape.h"
+
+#include <moco/IR/Nodes/TFReshape.h>
+
+#include <moco/Names.h>
+#include <plier/tf/Convert.h>
+#include <loco.h>
+#include <stdex/Memory.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+using namespace moco;
+
+class ReshapeGraphUpdate final : public GraphUpdate
+{
+public:
+ ReshapeGraphUpdate(TFReshape *node, std::vector<TensorName> names) : _node(node), _names(names) {}
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFReshape *_node;
+ std::vector<TensorName> _names;
+};
+
+void ReshapeGraphUpdate::input(const SymbolTable *node_table) const
+{
+ assert(_names.size() == 2);
+
+ auto tensor_node = node_table->node(_names[0]);
+ auto shape_node = node_table->node(_names[1]);
+
+ assert(tensor_node != nullptr);
+ assert(shape_node != nullptr);
+
+ _node->tensor(tensor_node);
+ _node->shape(shape_node);
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool ReshapeGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ // Tensorflow Reshape has 2 inputs: tensor & shape
+ if (node.input_size() != 2)
+ return false;
+
+ // TODO Assert Tshape value is DT_INT32?
+ return plier::tf::has_attrs(node, {"T", "Tshape"});
+}
+
+void ReshapeGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // name of loco nodes
+ std::string reshape_name = node.name();
+
+ auto reshape = graph->nodes()->create<TFReshape>();
+ reshape->name(node.name());
+
+ // save the name for graph link updates
+ TensorName output_name(reshape_name, 0);
+ tensor_names->enroll(output_name, reshape);
+
+ std::vector<TensorName> input_names;
+ input_names.push_back(TensorName(node.input(0))); // tensor
+ input_names.push_back(TensorName(node.input(1))); // shape
+
+ // Queue node input update
+ auto update = stdex::make_unique<ReshapeGraphUpdate>(reshape, input_names);
+
+ updates->enroll(std::move(update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Reshape.test.cpp b/compiler/moco/import/src/Nodes/Reshape.test.cpp
new file mode 100644
index 000000000..c406bf47b
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Reshape.test.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Reshape.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+
+// clang-format off
+const char *reshape_01_pbtxtdata = STRING_CONTENT(
+ name: "reshape"
+ op: "Reshape"
+ input: "placeholder"
+ input: "shape"
+ attr {
+ key: "T"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "Tshape"
+ value { type: DT_INT32 }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, reshape_01)
+{
+ TFNodeBuildTester tester;
+ moco::ReshapeGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(reshape_01_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFReshape
+ // - input nodes should not be null
+
+ tester.inputs({"placeholder", "shape"});
+ tester.output("reshape");
+ tester.run(nodedef, graphbuilder);
+}
diff --git a/compiler/moco/import/src/Nodes/Rsqrt.cpp b/compiler/moco/import/src/Nodes/Rsqrt.cpp
new file mode 100644
index 000000000..979ac90c9
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Rsqrt.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Rsqrt.h"
+
+#include <moco/IR/Nodes/TFRsqrt.h>
+
+#include <loco.h>
+#include <stdex/Memory.h>
+
+namespace
+{
+
+using namespace moco;
+
+/**
+ * @brief GraphUpdate for TF Rsqrt node
+ */
+class TFRsqrtGraphUpdate final : public GraphUpdate
+{
+public:
+ TFRsqrtGraphUpdate(TFRsqrt *node, TensorName &&name) : _node(node), _name(name) {}
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFRsqrt *_node;
+ TensorName _name;
+};
+
+void TFRsqrtGraphUpdate::input(const SymbolTable *table) const
+{
+ loco::Node *target = table->node(_name);
+ _node->x(target);
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool RsqrtGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ return node.input_size() == 1;
+}
+
+void RsqrtGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // creating TF dialect Rsqrt node
+ auto tf_rsqrt = graph->nodes()->create<TFRsqrt>();
+ tf_rsqrt->name(node.name());
+
+ // register string-name to node
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, tf_rsqrt);
+
+ // Queue node input update
+ auto tf_rsqrt_update =
+ stdex::make_unique<TFRsqrtGraphUpdate>(tf_rsqrt, TensorName(node.input(0)));
+ updates->enroll(std::move(tf_rsqrt_update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Rsqrt.test.cpp b/compiler/moco/import/src/Nodes/Rsqrt.test.cpp
new file mode 100644
index 000000000..2750725bc
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Rsqrt.test.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Rsqrt.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *rsqrt_basic_pbtxt = STRING_CONTENT(
+ name: "RSQRT_01"
+ op: "Rsqrt"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, tf_rsqrt_basic)
+{
+ TFNodeBuildTester tester;
+ moco::RsqrtGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(rsqrt_basic_pbtxt, nodedef));
+
+ // what to test:
+ // - TFRsqrt node should exist
+ // - input x() should not be null
+
+ tester.inputs({"Placeholder"});
+ tester.output("RSQRT_01");
+ tester.run(nodedef, graphbuilder);
+}
diff --git a/compiler/moco/import/src/Nodes/Shape.cpp b/compiler/moco/import/src/Nodes/Shape.cpp
new file mode 100644
index 000000000..1e112ebb0
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Shape.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Shape.h"
+
+#include <moco/IR/Nodes/TFShape.h>
+
+#include <loco.h>
+#include <stdex/Memory.h>
+#include <plier/tf/Convert.h>
+
+namespace
+{
+using namespace moco;
+
+/**
+ * @brief GraphUpdate for Shape node
+ */
+class ShapeGraphUpdate final : public GraphUpdate
+{
+public:
+ ShapeGraphUpdate(TFShape *node, const TensorName &&input_name)
+ : _node(node), _input_name(input_name)
+ {
+ // DO NOTHING
+ }
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFShape *_node;
+ const TensorName _input_name;
+};
+
+void ShapeGraphUpdate::input(const SymbolTable *table) const
+{
+ loco::Node *input_node = table->node(_input_name);
+ _node->input(input_node);
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool ShapeGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ if (node.input_size() != 1)
+ return false;
+
+ return plier::tf::has_attrs(node, {"T"});
+}
+
+void ShapeGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // create TF dialect Shape node
+ auto tf_shape = graph->nodes()->create<TFShape>();
+ tf_shape->name(node.name());
+
+ if (plier::tf::has_attrs(node, {"out_type"}))
+ {
+ auto dtype = plier::tf::as_loco_datatype(plier::tf::get_datatype_attr(node, "out_type"));
+ // TODO Support other dtype like S64
+ assert(dtype == loco::DataType::S32);
+
+ tf_shape->dtype(dtype);
+ }
+ else
+ {
+ // Set to S32, TF-documented default value for 'out_type'
+ tf_shape->dtype(loco::DataType::S32);
+ }
+
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, tf_shape);
+
+ auto update = stdex::make_unique<ShapeGraphUpdate>(tf_shape, TensorName(node.input(0)));
+ updates->enroll(std::move(update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Shape.test.cpp b/compiler/moco/import/src/Nodes/Shape.test.cpp
new file mode 100644
index 000000000..4aaf66c6f
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Shape.test.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Shape.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+
+// clang-format off
+const char *shape_000_pbtxtdata = STRING_CONTENT(
+ name: "Shape"
+ op: "Shape"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "out_type"
+ value { type: DT_INT32 }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, shape_000)
+{
+ TFNodeBuildTester tester;
+ moco::ShapeGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(shape_000_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFShape
+ // - input node should not be null
+ // - dtype attribute is set same as out_type attribute of pbtxt
+
+ tester.inputs({"Placeholder"});
+ tester.output("Shape");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFShape *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->dtype(), loco::DataType::S32);
+}
diff --git a/compiler/moco/import/src/Nodes/Softmax.cpp b/compiler/moco/import/src/Nodes/Softmax.cpp
new file mode 100644
index 000000000..6f2c609ff
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Softmax.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Softmax.h"
+
+#include <moco/IR/Nodes/TFSoftmax.h>
+
+#include <loco.h>
+#include <stdex/Memory.h>
+#include <plier/tf/Convert.h>
+
+namespace
+{
+using namespace moco;
+
+/**
+* @brief GraphUpdate for Softmax node
+*/
+class SoftmaxGraphUpdate final : public GraphUpdate
+{
+public:
+ SoftmaxGraphUpdate(TFSoftmax *node, const TensorName &&input_name)
+ : _node(node), _input_name(input_name)
+ {
+ // DO NOTHING
+ }
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFSoftmax *_node;
+ const TensorName _input_name;
+};
+
+void SoftmaxGraphUpdate::input(const SymbolTable *table) const
+{
+ loco::Node *input_node = table->node(_input_name);
+ _node->logits(input_node);
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool SoftmaxGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ if (node.input_size() != 1)
+ return false;
+
+ return plier::tf::has_attrs(node, {"T"});
+}
+
+void SoftmaxGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // creating TF dialect Softmax node
+ auto tf_softmax = graph->nodes()->create<TFSoftmax>();
+ tf_softmax->name(node.name());
+
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, tf_softmax);
+
+ auto update = stdex::make_unique<SoftmaxGraphUpdate>(tf_softmax, TensorName(node.input(0)));
+ updates->enroll(std::move(update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Softmax.test.cpp b/compiler/moco/import/src/Nodes/Softmax.test.cpp
new file mode 100644
index 000000000..b7c0797bb
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Softmax.test.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Softmax.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+
+// clang-format off
+const char *softmax_2d_pbtxtdata = STRING_CONTENT(
+ name: "Softmax"
+ op: "Softmax"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, softmax_2d)
+{
+ TFNodeBuildTester tester;
+ moco::SoftmaxGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(softmax_2d_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFSoftmax
+ // - logits node should not be null
+
+ tester.inputs({"Placeholder"});
+ tester.output("Softmax");
+ tester.run(nodedef, graphbuilder);
+}
diff --git a/compiler/moco/import/src/Nodes/Sqrt.cpp b/compiler/moco/import/src/Nodes/Sqrt.cpp
new file mode 100644
index 000000000..f891e48f6
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Sqrt.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Sqrt.h"
+
+#include <moco/IR/Nodes/TFSqrt.h>
+
+#include <loco.h>
+#include <stdex/Memory.h>
+
+namespace
+{
+
+using namespace moco;
+
+/**
+ * @brief GraphUpdate for TF Sqrt node
+ */
+class TFSqrtGraphUpdate final : public GraphUpdate
+{
+public:
+ TFSqrtGraphUpdate(TFSqrt *node, TensorName &&name) : _node(node), _name(name) {}
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFSqrt *_node;
+ TensorName _name;
+};
+
+void TFSqrtGraphUpdate::input(const SymbolTable *table) const
+{
+ loco::Node *target = table->node(_name);
+ _node->x(target);
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool SqrtGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ return node.input_size() == 1;
+}
+
+void SqrtGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // creating TF dialect Sqrt node
+ auto tf_sqrt = graph->nodes()->create<TFSqrt>();
+ tf_sqrt->name(node.name());
+
+ // register string-name to node
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, tf_sqrt);
+
+ // Queue node input update
+ auto tf_sqrt_update = stdex::make_unique<TFSqrtGraphUpdate>(tf_sqrt, TensorName(node.input(0)));
+ updates->enroll(std::move(tf_sqrt_update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Sqrt.test.cpp b/compiler/moco/import/src/Nodes/Sqrt.test.cpp
new file mode 100644
index 000000000..427d4df0f
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Sqrt.test.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Sqrt.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *sqrt_basic_pbtxt = STRING_CONTENT(
+ name: "SQRT_01"
+ op: "Sqrt"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, tf_sqrt_basic)
+{
+ TFNodeBuildTester tester;
+ moco::SqrtGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(sqrt_basic_pbtxt, nodedef));
+
+ // what to test:
+ // - TFSqrt node should exist
+ // - input x() should not be null
+
+ tester.inputs({"Placeholder"});
+ tester.output("SQRT_01");
+ tester.run(nodedef, graphbuilder);
+}
diff --git a/compiler/moco/import/src/Nodes/SquaredDifference.cpp b/compiler/moco/import/src/Nodes/SquaredDifference.cpp
new file mode 100644
index 000000000..17a1fe93d
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/SquaredDifference.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/SquaredDifference.h"
+
+#include <moco/IR/Nodes/TFSquaredDifference.h>
+
+#include <loco.h>
+#include <stdex/Memory.h>
+
+namespace
+{
+
+using namespace moco;
+
+/**
+ * @brief GraphUpdate for TF SquaredDifference node
+ */
+class TFSquaredDifferenceGraphUpdate final : public GraphUpdate
+{
+public:
+ TFSquaredDifferenceGraphUpdate(TFSquaredDifference *node, std::vector<TensorName> names)
+ : _node(node), _names(names)
+ {
+ }
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFSquaredDifference *_node;
+ std::vector<TensorName> _names;
+};
+
+void TFSquaredDifferenceGraphUpdate::input(const SymbolTable *table) const
+{
+ assert(_names.size() == 2);
+
+ _node->x(table->node(_names[0]));
+ _node->y(table->node(_names[1]));
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool SquaredDifferenceGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ return node.input_size() == 2;
+}
+
+void SquaredDifferenceGraphBuilder::build(const tensorflow::NodeDef &node,
+ GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // creating TF dialect SquaredDifference node
+ auto tf_sqdiff = graph->nodes()->create<TFSquaredDifference>();
+ tf_sqdiff->name(node.name());
+
+ // register string-name to node
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, tf_sqdiff);
+
+ std::vector<TensorName> add_input_names;
+ add_input_names.push_back(TensorName(node.input(0))); // x
+ add_input_names.push_back(TensorName(node.input(1))); // y
+
+ // Queue node input update
+ auto tf_sqrt_update =
+ stdex::make_unique<TFSquaredDifferenceGraphUpdate>(tf_sqdiff, add_input_names);
+ updates->enroll(std::move(tf_sqrt_update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/SquaredDifference.test.cpp b/compiler/moco/import/src/Nodes/SquaredDifference.test.cpp
new file mode 100644
index 000000000..336ab1358
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/SquaredDifference.test.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/SquaredDifference.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *sqdiff_basic_pbtxt = STRING_CONTENT(
+ name: "squared_difference"
+ op: "SquaredDifference"
+ input: "input_01"
+ input: "input_02"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, tf_squdiff_basic)
+{
+ TFNodeBuildTester tester;
+ moco::SquaredDifferenceGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(sqdiff_basic_pbtxt, nodedef));
+
+ // what to test:
+ // - TFSquaredDifference node should exist
+ // - input x() should not be null
+ // - input y() should not be null
+
+ tester.inputs({"input_01", "input_02"});
+ tester.output("squared_difference");
+ tester.run(nodedef, graphbuilder);
+}
diff --git a/compiler/moco/import/src/Nodes/Squeeze.cpp b/compiler/moco/import/src/Nodes/Squeeze.cpp
new file mode 100644
index 000000000..1b4ebae6f
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Squeeze.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Squeeze.h"
+
+#include <moco/IR/Nodes/TFSqueeze.h>
+
+#include <moco/Names.h>
+
+#include <loco.h>
+#include <stdex/Memory.h>
+#include <plier/tf/Convert.h>
+#include <oops/UserExn.h>
+
+namespace
+{
+using namespace moco;
+
+/**
+ * @brief GraphUpdate for Squeeze node
+ */
+class SqueezeGraphUpdate final : public GraphUpdate
+{
+public:
+ SqueezeGraphUpdate(TFSqueeze *node, const TensorName &&input_name)
+ : _node(node), _input_name(input_name)
+ {
+ // DO NOTHING
+ }
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFSqueeze *_node;
+ const TensorName _input_name;
+};
+
+void SqueezeGraphUpdate::input(const SymbolTable *table) const
+{
+ loco::Node *input_node = table->node(_input_name);
+ _node->input(input_node);
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool SqueezeGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ if (node.input_size() != 1)
+ return false;
+
+ if (!plier::tf::has_attrs(node, {"T"}))
+ return false;
+
+ if (plier::tf::has_attrs(node, {"axis"}))
+ {
+ // TODO support 'axis' attribute
+ oops::UserExn("Squeeze: Unsupported 'axis' attribute", node.name());
+ }
+
+ return true;
+}
+
+void SqueezeGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // TODO support 'axis' attribute
+ assert(!plier::tf::has_attrs(node, {"axis"}));
+
+ std::vector<int64_t> squeeze_dims;
+ if (plier::tf::has_attrs(node, {"squeeze_dims"}))
+ {
+ auto squeeze_dim_list = plier::tf::get_list_attr(node, {"squeeze_dims"});
+ // TODO assert squeeze_dims are mutually different?
+ squeeze_dims = plier::tf::as_int64_list(squeeze_dim_list);
+ }
+ // Note that it is possible that NodeDef does not have squeeze_dims attribute.
+ // In that case, TFSqueeze also has empty squeeze_dims,
+
+ // creating TF dialect Squeeze node
+ auto tf_squeeze = graph->nodes()->create<TFSqueeze>();
+ tf_squeeze->name(node.name());
+ tf_squeeze->squeeze_dims(squeeze_dims);
+
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, tf_squeeze);
+
+ auto update = stdex::make_unique<SqueezeGraphUpdate>(tf_squeeze, TensorName(node.input(0)));
+ updates->enroll(std::move(update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Squeeze.test.cpp b/compiler/moco/import/src/Nodes/Squeeze.test.cpp
new file mode 100644
index 000000000..e8188f98b
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Squeeze.test.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Squeeze.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+
+// clang-format off
+const char *squeeze_all_pbtxtdata = STRING_CONTENT(
+ name: "Squeeze"
+ op: "Squeeze"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value { type: DT_FLOAT }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, squeeze_all)
+{
+ TFNodeBuildTester tester;
+ moco::SqueezeGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(squeeze_all_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFSqueeze
+ // - input node should not be null
+ // - squeeze_dims attribute is set same as pbtxt
+
+ tester.inputs({"Placeholder"});
+ tester.output("Squeeze");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFSqueeze *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->squeeze_dims().size(), 0);
+}
+
+namespace
+{
+
+// clang-format off
+const char *squeeze_some_pbtxtdata = STRING_CONTENT(
+ name: "Squeeze"
+ op: "Squeeze"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "squeeze_dims"
+ value {
+ list { i: 1 }
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, squeeze_some)
+{
+ TFNodeBuildTester tester;
+ moco::SqueezeGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(squeeze_some_pbtxtdata, nodedef));
+
+ // what to test:
+ // - there should exist TFSqueeze
+ // - input node should not be null
+ // - squeeze_dims attribute is set same as pbtxt
+
+ tester.inputs({"Placeholder"});
+ tester.output("Squeeze");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFSqueeze *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->squeeze_dims().size(), 1);
+ ASSERT_EQ(test_node->squeeze_dims().at(0), 1);
+}
+
+// TODO Add test case for negative squeeze dim
diff --git a/compiler/moco/import/src/Nodes/StopGradient.cpp b/compiler/moco/import/src/Nodes/StopGradient.cpp
new file mode 100644
index 000000000..9caec6943
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/StopGradient.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/StopGradient.h"
+
+#include <moco/IR/Nodes/TFStopGradient.h>
+
+#include <loco.h>
+#include <plier/tf/Convert.h>
+#include <stdex/Memory.h>
+
+namespace
+{
+
+using namespace moco;
+
+/**
+ * @brief GraphUpdate for TF StopGradient node
+ */
+class TFStopGradientGraphUpdate final : public GraphUpdate
+{
+public:
+ TFStopGradientGraphUpdate(TFStopGradient *node, TensorName &&name) : _node(node), _name(name) {}
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFStopGradient *_node;
+ TensorName _name;
+};
+
+void TFStopGradientGraphUpdate::input(const SymbolTable *table) const
+{
+ loco::Node *target = table->node(_name);
+ _node->input(target);
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool StopGradientGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ if (node.input_size() != 1)
+ return false;
+
+ return plier::tf::has_attrs(node, {"T"});
+}
+
+void StopGradientGraphBuilder::build(const tensorflow::NodeDef &node,
+ GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // creating TF dialect StopGradient node
+ auto tf_stopgradient = graph->nodes()->create<TFStopGradient>();
+ tf_stopgradient->name(node.name());
+
+ // register string-name to node
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, tf_stopgradient);
+
+ // Queue node input update
+ auto tf_stopgradient_update =
+ stdex::make_unique<TFStopGradientGraphUpdate>(tf_stopgradient, TensorName(node.input(0)));
+ updates->enroll(std::move(tf_stopgradient_update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/StopGradient.test.cpp b/compiler/moco/import/src/Nodes/StopGradient.test.cpp
new file mode 100644
index 000000000..0bf70ebcc
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/StopGradient.test.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/StopGradient.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *stopgradient_basic_pbtxt = STRING_CONTENT(
+ name: "StopGradient_01"
+ op: "StopGradient"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, tf_stopgradient_basic)
+{
+ TFNodeBuildTester tester;
+ moco::StopGradientGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(stopgradient_basic_pbtxt, nodedef));
+
+ // what to test:
+ // - TFStopGradient node should exist
+ // - input() should not be null
+
+ tester.inputs({"Placeholder"});
+ tester.output("StopGradient_01");
+ tester.run(nodedef, graphbuilder);
+}
diff --git a/compiler/moco/import/src/Nodes/StridedSlice.cpp b/compiler/moco/import/src/Nodes/StridedSlice.cpp
new file mode 100644
index 000000000..06d388be0
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/StridedSlice.cpp
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/StridedSlice.h"
+
+#include <moco/IR/Nodes/TFStridedSlice.h>
+#include <moco/IR/Nodes/TFConst.h>
+
+#include <moco/Names.h>
+
+#include "Convert.h"
+
+#include <loco.h>
+#include <stdex/Memory.h>
+#include <plier/tf/Convert.h>
+#include <oops/UserExn.h>
+
+namespace
+{
+using namespace moco;
+
+class TFStridedSliceGraphUpdate final : public GraphUpdate
+{
+public:
+ TFStridedSliceGraphUpdate(TFStridedSlice *node, std::vector<TensorName> names)
+ : _node(node), _names(names)
+ {
+ }
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFStridedSlice *_node;
+ std::vector<TensorName> _names;
+};
+
+void TFStridedSliceGraphUpdate::input(const SymbolTable *node_table) const
+{
+ // TODO support size 3 where strides is None
+ assert(_names.size() == 4);
+
+ auto input_node = node_table->node(_names[0]);
+ auto begin_node = node_table->node(_names[1]);
+ auto end_node = node_table->node(_names[2]);
+ auto strides_node = node_table->node(_names[3]);
+ assert(input_node != nullptr);
+ assert(begin_node != nullptr);
+ assert(end_node != nullptr);
+ assert(strides_node != nullptr);
+
+ _node->input(input_node);
+ _node->begin(begin_node);
+ _node->end(end_node);
+ _node->strides(strides_node);
+
+ // TODO move validation codes to some suitable place
+ // Run basic validation
+
+ // TODO support full mask features
+ if (_node->begin_mask() != 0 || _node->end_mask() != 0 || _node->ellipsis_mask() != 0 ||
+ _node->new_axis_mask() != 0 || _node->shrink_axis_mask() != 1)
+ {
+ throw oops::UserExn("Mask attributes are not supported for now: ", _node->name());
+ }
+
+ // Only Const are supported for now
+ auto const_input = dynamic_cast<moco::TFConst *>(_node->input());
+ auto const_begin = dynamic_cast<moco::TFConst *>(_node->begin());
+ auto const_end = dynamic_cast<moco::TFConst *>(_node->end());
+ auto const_strides = dynamic_cast<moco::TFConst *>(_node->strides());
+ if (const_input == nullptr || const_begin == nullptr || const_end == nullptr ||
+ const_strides == nullptr)
+ {
+ throw oops::UserExn("Only Const inputs are supported for now: ", _node->name());
+ }
+
+ // TODO support S64
+ if (const_begin->dtype() != loco::DataType::S32 || const_end->dtype() != loco::DataType::S32 ||
+ const_strides->dtype() != loco::DataType::S32)
+ {
+ throw oops::UserExn("Only Const types of INT32 are supported for begin/end/strides for now: ",
+ _node->name());
+ }
+
+ // Input Rank should match number of elements of the begin/end/strides
+ auto rin = const_input->rank();
+ if (rin != const_begin->size<loco::DataType::S32>() ||
+ rin != const_end->size<loco::DataType::S32>() ||
+ rin != const_strides->size<loco::DataType::S32>())
+ {
+ throw oops::UserExn("Ranks for inputs should be same: ", _node->name());
+ }
+
+ // TODO support strides type of S64
+ // TODO support other strides value
+ // Only support stride 1 for now
+ uint32_t elements = const_strides->size<loco::DataType::S32>();
+ for (uint32_t e = 0; e < elements; ++e)
+ {
+ if (const_strides->at<loco::DataType::S32>(e) != 1)
+ {
+ throw oops::UserExn("Only stride 1 is supported for now: ", _node->name());
+ }
+ }
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool StridedSliceGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ // TODO support node.input_size() == 3 where strides is None
+ if (node.input_size() != 4)
+ return false;
+
+ if (!plier::tf::has_attrs(node, {"T", "Index", "begin_mask", "end_mask", "ellipsis_mask",
+ "new_axis_mask", "shrink_axis_mask"}))
+ return false;
+
+ return true;
+}
+
+void StridedSliceGraphBuilder::build(const tensorflow::NodeDef &node,
+ GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ std::string node_name = node.name();
+
+ auto stridedslice = graph->nodes()->create<TFStridedSlice>();
+ stridedslice->name(node_name);
+
+ // read attributes
+ auto begin_mask = plier::tf::get_int_attr(node, "begin_mask");
+ auto end_mask = plier::tf::get_int_attr(node, "end_mask");
+ auto ellipsis_mask = plier::tf::get_int_attr(node, "ellipsis_mask");
+ auto new_axis_mask = plier::tf::get_int_attr(node, "new_axis_mask");
+ auto shrink_axis_mask = plier::tf::get_int_attr(node, "shrink_axis_mask");
+
+ stridedslice->begin_mask(begin_mask);
+ stridedslice->end_mask(end_mask);
+ stridedslice->ellipsis_mask(ellipsis_mask);
+ stridedslice->new_axis_mask(new_axis_mask);
+ stridedslice->shrink_axis_mask(shrink_axis_mask);
+
+ // TODO support general mask values: we support only this limited case for now
+ assert(begin_mask == 0);
+ assert(end_mask == 0);
+ assert(ellipsis_mask == 0);
+ assert(new_axis_mask == 0);
+ assert(shrink_axis_mask == 1);
+
+ // save the name for graph link updates
+ TensorName output_name(node_name, 0);
+ tensor_names->enroll(output_name, stridedslice);
+
+ std::vector<TensorName> input_names;
+ input_names.push_back(TensorName(node.input(0))); // input
+ input_names.push_back(TensorName(node.input(1))); // begin
+ input_names.push_back(TensorName(node.input(2))); // end
+ input_names.push_back(TensorName(node.input(3))); // strides
+
+ auto tfconv2d_update = stdex::make_unique<TFStridedSliceGraphUpdate>(stridedslice, input_names);
+
+ updates->enroll(std::move(tfconv2d_update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/StridedSlice.test.cpp b/compiler/moco/import/src/Nodes/StridedSlice.test.cpp
new file mode 100644
index 000000000..b6959d7ab
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/StridedSlice.test.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/StridedSlice.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *stridedslice_basic_pbtxt = STRING_CONTENT(
+ name: "StridedSlice"
+ op: "StridedSlice"
+ input: "input"
+ input: "begin"
+ input: "end"
+ input: "strides"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 1
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, tf_stridedslice_basic)
+{
+ TFNodeBuildTester tester;
+ moco::StridedSliceGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(stridedslice_basic_pbtxt, nodedef));
+
+ // what to test:
+ // - TFStridedSlice node should exist
+ // - inputs should not be nullptr
+ // - attributes should match the values
+
+ tester.inputs({"input", "begin", "end", "strides"}, loco::DataType::S32);
+ tester.output("StridedSlice");
+ tester.run(nodedef, graphbuilder);
+
+ auto test_node = dynamic_cast<moco::TFStridedSlice *>(tester.output());
+ ASSERT_NE(test_node, nullptr);
+ ASSERT_EQ(test_node->begin_mask(), 0);
+ ASSERT_EQ(test_node->end_mask(), 0);
+ ASSERT_EQ(test_node->ellipsis_mask(), 0);
+ ASSERT_EQ(test_node->new_axis_mask(), 0);
+ ASSERT_EQ(test_node->shrink_axis_mask(), 1);
+}
+
+// TODO add test where strides is None
diff --git a/compiler/moco/import/src/Nodes/Sub.cpp b/compiler/moco/import/src/Nodes/Sub.cpp
new file mode 100644
index 000000000..bdad81d67
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Sub.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Sub.h"
+
+#include <moco/IR/Nodes/TFSub.h>
+
+#include <loco.h>
+#include <stdex/Memory.h>
+
+namespace
+{
+
+using namespace moco;
+
+/**
+ * @brief GraphUpdate for TF Sub node
+ */
+class TFSubGraphUpdate final : public GraphUpdate
+{
+public:
+ TFSubGraphUpdate(TFSub *node, std::vector<TensorName> names) : _node(node), _names(names) {}
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFSub *_node;
+ std::vector<TensorName> _names;
+};
+
+void TFSubGraphUpdate::input(const SymbolTable *tensor_names) const
+{
+ assert(_names.size() == 2);
+
+ _node->x(tensor_names->node(_names[0]));
+ _node->y(tensor_names->node(_names[1]));
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool SubGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ return node.input_size() == 2;
+}
+
+void SubGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // creating TF dialect Sub node
+ auto tf_sub = graph->nodes()->create<TFSub>();
+ tf_sub->name(node.name());
+
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, tf_sub);
+
+ std::vector<TensorName> sub_input_names;
+ sub_input_names.push_back(TensorName(node.input(0))); // x
+ sub_input_names.push_back(TensorName(node.input(1))); // y
+
+ auto tf_sub_update = stdex::make_unique<TFSubGraphUpdate>(tf_sub, sub_input_names);
+ updates->enroll(std::move(tf_sub_update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Sub.test.cpp b/compiler/moco/import/src/Nodes/Sub.test.cpp
new file mode 100644
index 000000000..05f1fb0d6
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Sub.test.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Sub.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *sub_basic_pbtxt = STRING_CONTENT(
+ name: "SUB_01"
+ op: "Sub"
+ input: "input_01"
+ input: "input_02"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, tf_sub_basic)
+{
+ TFNodeBuildTester tester;
+ moco::SubGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(sub_basic_pbtxt, nodedef));
+
+ // what to test:
+ // - TFSub node should exist
+ // - both inputs x() and y() should not be null
+
+ tester.inputs({"input_01", "input_02"});
+ tester.output("SUB_01");
+ tester.run(nodedef, graphbuilder);
+}
diff --git a/compiler/moco/import/src/Nodes/Tanh.cpp b/compiler/moco/import/src/Nodes/Tanh.cpp
new file mode 100644
index 000000000..c89fa862a
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Tanh.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Tanh.h"
+
+#include <moco/IR/Nodes/TFTanh.h>
+
+#include <loco.h>
+#include <stdex/Memory.h>
+
+namespace
+{
+
+using namespace moco;
+
+/**
+ * @brief GraphUpdate for TF Tanh node
+ */
+class TFTanhGraphUpdate final : public GraphUpdate
+{
+public:
+ TFTanhGraphUpdate(TFTanh *node, TensorName &&name) : _node(node), _name(name) {}
+
+ void input(const SymbolTable *) const override;
+
+private:
+ TFTanh *_node;
+ TensorName _name;
+};
+
+void TFTanhGraphUpdate::input(const SymbolTable *table) const
+{
+ loco::Node *target = table->node(_name);
+ _node->x(target);
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool TanhGraphBuilder::validate(const tensorflow::NodeDef &node) const
+{
+ return node.input_size() == 1;
+}
+
+void TanhGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *tensor_names = context->tensor_names();
+ UpdateQueue *updates = context->updates();
+
+ // creating TF dialect Tanh node
+ auto tf_tanh = graph->nodes()->create<TFTanh>();
+ tf_tanh->name(node.name());
+
+ // register string-name to node
+ TensorName output_name(node.name(), 0);
+ tensor_names->enroll(output_name, tf_tanh);
+
+ // Queue node input update
+ auto tf_tanh_update = stdex::make_unique<TFTanhGraphUpdate>(tf_tanh, TensorName(node.input(0)));
+ updates->enroll(std::move(tf_tanh_update));
+}
+
+} // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Tanh.test.cpp b/compiler/moco/import/src/Nodes/Tanh.test.cpp
new file mode 100644
index 000000000..20ebd15b2
--- /dev/null
+++ b/compiler/moco/import/src/Nodes/Tanh.test.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Import/Nodes/Tanh.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+// clang-format off
+const char *tanh_basic_pbtxt = STRING_CONTENT(
+ name: "output/tanh"
+ op: "Tanh"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+);
+// clang-format on
+
+} // namespace
+
+TEST(TensorFlowImport, tf_tanh_basic)
+{
+ TFNodeBuildTester tester;
+ moco::TanhGraphBuilder graphbuilder;
+ tensorflow::NodeDef nodedef;
+
+ EXPECT_TRUE(plier::tf::parse_nodedef(tanh_basic_pbtxt, nodedef));
+
+ // what to test:
+ // - TFTanh node should exist
+ // - input x() should not be null
+
+ tester.inputs({"Placeholder"});
+ tester.output("output/tanh");
+ tester.run(nodedef, graphbuilder);
+}
diff --git a/compiler/moco/import/src/TestHelper.h b/compiler/moco/import/src/TestHelper.h
new file mode 100644
index 000000000..54ca45b4a
--- /dev/null
+++ b/compiler/moco/import/src/TestHelper.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TEST_HELPER_H__
+#define __TEST_HELPER_H__
+
+#include "moco/Import/GraphBuilder.h"
+
+#include <moco/IR/TFNode.h>
+#include <loco.h>
+#include <plier/tf/TestHelper.h>
+
+#include <tensorflow/core/framework/graph.pb.h>
+
+#define STRING_CONTENT(content) #content
+
+namespace moco
+{
+namespace test
+{
+
+template <typename T> T *find_first_node_bytype(loco::Graph *g)
+{
+ T *first_node = nullptr;
+ loco::Graph::NodeContext *nodes = g->nodes();
+ uint32_t count = nodes->size();
+
+ for (uint32_t i = 0; i < count; ++i)
+ {
+ first_node = dynamic_cast<T *>(nodes->at(i));
+ if (first_node != nullptr)
+ break;
+ }
+
+ return first_node;
+}
+
+} // namespace test
+} // namespace moco
+
+namespace moco
+{
+namespace test
+{
+
+class TFNodeBuildTester
+{
+public:
+ TFNodeBuildTester();
+
+public:
+ void inputs(const std::vector<std::string> &names);
+ void inputs(const std::vector<std::string> &names, const loco::DataType dtype);
+ void output(const char *name);
+ moco::TFNode *output(void);
+
+ void run(tensorflow::NodeDef &node_def, moco::GraphBuilder &graph_builder);
+
+private:
+ std::unique_ptr<moco::SymbolTable> _tensor_names;
+ std::unique_ptr<loco::Graph> _graph;
+
+ std::vector<moco::TFNode *> _inputs;
+ const char *_output{nullptr};
+};
+
+} // namespace test
+} // namespace moco
+
+#endif // __TEST_HELPER_H__
diff --git a/compiler/moco/import/src/TestHelper.test.cpp b/compiler/moco/import/src/TestHelper.test.cpp
new file mode 100644
index 000000000..06c3dd372
--- /dev/null
+++ b/compiler/moco/import/src/TestHelper.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TestHelper.h"
+
+#include <moco/IR/Nodes/TFConst.h>
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+namespace moco
+{
+namespace test
+{
+
+TFNodeBuildTester::TFNodeBuildTester()
+{
+ _graph = loco::make_graph();
+ _tensor_names = stdex::make_unique<moco::SymbolTable>();
+}
+
+void TFNodeBuildTester::inputs(const std::vector<std::string> &names)
+{
+ for (auto name : names)
+ {
+ auto input = _graph->nodes()->create<moco::TFConst>();
+ moco::TensorName name_01(name, 0);
+ _tensor_names->enroll(name_01, input);
+
+ _inputs.push_back(input);
+ }
+}
+
+void TFNodeBuildTester::inputs(const std::vector<std::string> &names, const loco::DataType dtype)
+{
+ for (auto name : names)
+ {
+ auto input = _graph->nodes()->create<moco::TFConst>();
+ input->dtype(dtype);
+ moco::TensorName name_01(name, 0);
+ _tensor_names->enroll(name_01, input);
+
+ _inputs.push_back(input);
+ }
+}
+
+void TFNodeBuildTester::output(const char *name) { _output = name; }
+
+moco::TFNode *TFNodeBuildTester::output(void)
+{
+ assert(_output != nullptr);
+
+ moco::TensorName tname(_output, 0);
+ return static_cast<moco::TFNode *>(_tensor_names->node(tname));
+}
+
+void TFNodeBuildTester::run(tensorflow::NodeDef &nodedef, moco::GraphBuilder &graphbuilder)
+{
+ assert(_output != nullptr);
+
+ auto node_defs = stdex::make_unique<moco::NodeDefTable>();
+ auto updates = stdex::make_unique<moco::UpdateQueue>();
+
+ moco::GraphBuilderContext gb_context(_graph.get(), node_defs.get(), _tensor_names.get(),
+ updates.get());
+
+ EXPECT_TRUE(graphbuilder.validate(nodedef));
+ graphbuilder.build(nodedef, &gb_context);
+
+ for (auto &update : updates->queue())
+ {
+ update->input(_tensor_names.get());
+ }
+
+ auto tfnode = output();
+ ASSERT_NE(tfnode, nullptr);
+ ASSERT_STREQ(tfnode->name().c_str(), _output);
+
+ int idx = 0;
+ ASSERT_EQ(tfnode->arity(), _inputs.size());
+ for (auto input : _inputs)
+ {
+ ASSERT_EQ(tfnode->arg(idx++), input);
+ }
+}
+
+} // namespace test
+} // namespace moco
diff --git a/compiler/moco/lang/CMakeLists.txt b/compiler/moco/lang/CMakeLists.txt
new file mode 100644
index 000000000..a64fdf92a
--- /dev/null
+++ b/compiler/moco/lang/CMakeLists.txt
@@ -0,0 +1,21 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(moco_lang SHARED ${SOURCES})
+target_include_directories(moco_lang PRIVATE src)
+target_include_directories(moco_lang PUBLIC include)
+target_link_libraries(moco_lang PUBLIC loco)
+target_link_libraries(moco_lang PRIVATE nncc_common)
+target_link_libraries(moco_lang PRIVATE stdex)
+install(TARGETS moco_lang DESTINATION lib) # moco_tf_frontend requires moco_lang
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(moco_lang_test ${TESTS})
+target_include_directories(moco_lang_test PRIVATE src)
+target_link_libraries(moco_lang_test moco_lang)
diff --git a/compiler/moco/lang/README.md b/compiler/moco/lang/README.md
new file mode 100644
index 000000000..6ee3fc660
--- /dev/null
+++ b/compiler/moco/lang/README.md
@@ -0,0 +1,3 @@
+# lang
+
+`lang` provides TensorFlow Dialect IR
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFAdd.h b/compiler/moco/lang/include/moco/IR/Nodes/TFAdd.h
new file mode 100644
index 000000000..13b064fba
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFAdd.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFADD_H__
+#define __MOCO_IR_TFADD_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+namespace moco
+{
+
+/// @note TFAdd corresponds to the following GraphDef
+/*
+node {
+ name: "add"
+ op: "Add"
+ input: "x"
+ input: "y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+*/
+
+class TFAdd final : public FixedArityNode<2, TFNodeImpl<TFOpcode::Add>>
+{
+public:
+ TFAdd() = default;
+
+public:
+ Node *x(void) const { return at(0)->node(); }
+ void x(Node *node) { at(0)->node(node); }
+
+ Node *y(void) const { return at(1)->node(); }
+ void y(Node *node) { at(1)->node(node); }
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFADD_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFAvgPool.h b/compiler/moco/lang/include/moco/IR/Nodes/TFAvgPool.h
new file mode 100644
index 000000000..74c91b5fb
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFAvgPool.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFAVGPOOL_H__
+#define __MOCO_IR_TFAVGPOOL_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+#include <vector>
+
+namespace moco
+{
+
+/// @note TFAvgPool corresponds to the following GraphDef
+/*
+node {
+ name: "avgpool"
+ op: "AvgPool"
+ input: "placeholder"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+ attr {
+ key: "ksize"
+ value {
+ list {
+ i: 1 i: 3 i: 3 i: 1
+ }
+ }
+ }
+ attr {
+ key: "padding"
+ value {
+ s: "SAME"
+ }
+ }
+ attr {
+ key: "strides"
+ value {
+ list {
+ i: 1 i: 1 i: 1 i: 1
+ }
+ }
+ }
+}
+*/
+
+class TFAvgPool final : public FixedArityNode<1, TFNodeImpl<TFOpcode::AvgPool>>
+{
+public:
+ TFAvgPool() = default;
+
+public:
+ Node *value(void) const { return at(0)->node(); }
+ void value(Node *node) { return at(0)->node(node); }
+
+public:
+ const TFDataLayout &data_layout(void) const { return _data_layout; }
+ void data_layout(const TFDataLayout &data_layout) { _data_layout = data_layout; }
+
+ const TFPadding &padding(void) const { return _padding; }
+ void padding(const TFPadding &padding) { _padding = padding; }
+
+ const std::vector<int64_t> &ksize(void) const { return _ksize; }
+ void ksize(const std::vector<int64_t> &ksize) { _ksize = ksize; }
+
+ const std::vector<int64_t> &strides(void) const { return _strides; }
+ void strides(const std::vector<int64_t> &strides) { _strides = strides; }
+
+private:
+ TFDataLayout _data_layout;
+ TFPadding _padding;
+ std::vector<int64_t> _ksize;
+ std::vector<int64_t> _strides;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFAVGPOOL_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFBiasAdd.h b/compiler/moco/lang/include/moco/IR/Nodes/TFBiasAdd.h
new file mode 100644
index 000000000..11e309caa
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFBiasAdd.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFBIASADD_H__
+#define __MOCO_IR_TFBIASADD_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+namespace moco
+{
+
+/// @note TFBiasAdd corresponds to the following GraphDef
+/*
+node {
+ name: "bias_add_01"
+ op: "BiasAdd"
+ input: "input_01"
+ input: "bias_add_01/bias"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+}
+*/
+
+class TFBiasAdd final : public FixedArityNode<2, TFNodeImpl<TFOpcode::BiasAdd>>
+{
+public:
+ TFBiasAdd() = default;
+
+public:
+ Node *value(void) const { return at(0)->node(); }
+ void value(Node *node) { return at(0)->node(node); }
+
+ Node *bias(void) const { return at(1)->node(); }
+ void bias(Node *node) { return at(1)->node(node); }
+
+ const TFDataLayout data_layout(void) const { return _data_layout; }
+ void data_layout(const TFDataLayout &data_layout) { _data_layout = data_layout; }
+
+private:
+ TFDataLayout _data_layout;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFBIASADD_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFConcatV2.h b/compiler/moco/lang/include/moco/IR/Nodes/TFConcatV2.h
new file mode 100644
index 000000000..7f0d32697
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFConcatV2.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFCONCATV2_H__
+#define __MOCO_IR_TFCONCATV2_H__
+
+#include "moco/IR/TFNodeDecl.h"
+#include "moco/IR/VariadicArityNode.h"
+
+namespace moco
+{
+
+/// @note TFConcatV2 corresponds to the following GraphDef
+/*
+node {
+ name: "Concat"
+ op: "ConcatV2"
+ input: "Input01"
+ input: "Input02"
+ input: "Axis"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "Tidx"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+*/
+
+class TFConcatV2 final : public VariadicArityNode<TFNodeImpl<TFOpcode::ConcatV2>>
+{
+public:
+ TFConcatV2(uint32_t arity) : VariadicArityNode<TFNodeImpl<TFOpcode::ConcatV2>>(arity + 1)
+ {
+ // we add +1 for axis of VariadicArityNode ctor
+ // at least one value is required
+ assert(arity >= 1);
+ }
+
+public:
+ uint32_t num_values(void) const
+ {
+ // last one is for axis
+ return arity() - 1;
+ }
+
+public:
+ Node *values(uint32_t index) const
+ {
+ assert(index < num_values());
+ return at(index)->node();
+ }
+ void values(uint32_t index, Node *node)
+ {
+ assert(index < num_values());
+ at(index)->node(node);
+ }
+
+ Node *axis(void) const { return at(num_values())->node(); }
+ void axis(Node *node) { at(num_values())->node(node); }
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFCONCATV2_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFConst.h b/compiler/moco/lang/include/moco/IR/Nodes/TFConst.h
new file mode 100644
index 000000000..7c2595fcb
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFConst.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFCONSTANT_H__
+#define __MOCO_IR_TFCONSTANT_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+#include <loco/IR/DataTypeTraits.h>
+#include <loco/IR/NodeMixins.h>
+#include <loco/IR/TensorShape.h>
+
+#include <vector>
+
+namespace moco
+{
+
+/// @note TFConst corresponds to the following GraphDef
+/*
+node {
+ name: "val"
+ op: "Const"
+ attr {
+ key: "dtype"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim { size: 1 }
+ dim { size: 3 }
+ dim { size: 4 }
+ dim { size: 4 }
+ }
+ float_val: 2.1
+ }
+ }
+ }
+}
+*/
+
+/**
+ * @brief IR for tf.constant
+ *
+ * @note Implementation for this class came from Canonical ConstGen
+ * Read comments in loco::ConstGen for details
+ */
+class TFConst final : public FixedArityNode<0, TFNodeImpl<TFOpcode::Const>>,
+ public loco::NodeMixin<loco::NodeTrait::DataType>,
+ public loco::NodeMixin<loco::NodeTrait::TensorShape>
+{
+public:
+ TFConst() = default;
+
+public:
+ template <loco::DataType DT> uint32_t size(void) const;
+ template <loco::DataType DT> void size(uint32_t size);
+
+ template <loco::DataType DT> const typename loco::DataTypeImpl<DT>::Type &at(uint32_t n) const;
+ template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &at(uint32_t n);
+
+private:
+ std::vector<uint8_t> _data;
+};
+
+} // namespace moco
+
+namespace moco
+{
+
+loco::TensorShape tensor_shape(const TFConst *node);
+
+uint32_t num_elements(const TFConst *tfconst);
+bool same_shape(const TFConst *lhs, const TFConst *rhs);
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFCONSTANT_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFConv2D.h b/compiler/moco/lang/include/moco/IR/Nodes/TFConv2D.h
new file mode 100644
index 000000000..0d5a17879
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFConv2D.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFCONV2D_H__
+#define __MOCO_IR_TFCONV2D_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+#include <vector>
+
+namespace moco
+{
+
+class TFConv2D final : public FixedArityNode<2, TFNodeImpl<TFOpcode::Conv2D>>
+{
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+
+ loco::Node *filter(void) const { return at(1)->node(); }
+ void filter(Node *node) { at(1)->node(node); }
+
+public:
+ const TFPadding &padding(void) const { return _padding; }
+ void padding(const TFPadding &padding) { _padding = padding; }
+
+ const TFDataLayout &data_layout(void) const { return _data_layout; }
+ void data_layout(const TFDataLayout &data_layout) { _data_layout = data_layout; }
+
+ const std::vector<int64_t> &strides(void) const { return _strides; }
+ void strides(const std::vector<int64_t> &strides) { _strides = strides; }
+
+private:
+ TFPadding _padding;
+ TFDataLayout _data_layout;
+ std::vector<int64_t> _strides;
+ // TODO Support "Dilation"
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFCONV2D_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFConv2DBackpropInput.h b/compiler/moco/lang/include/moco/IR/Nodes/TFConv2DBackpropInput.h
new file mode 100644
index 000000000..43e620d24
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFConv2DBackpropInput.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFCONV2DBACKPROPINPUT_H__
+#define __MOCO_IR_TFCONV2DBACKPROPINPUT_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+#include <vector>
+
+namespace moco
+{
+
+/// @note TFConv2DBackpropInput corresponds to the following GraphDef
+/*
+node {
+ name: "conv2d_backprop_input"
+ op: "Conv2DBackpropInput"
+ input: "input_sizes"
+ input: "filter"
+ input: "out_backprop"
+ attr {
+ key: "T"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "data_format"
+ value { s: "NHWC" }
+ }
+ attr {
+ key: "dilations"
+ value {
+ list { i: 1 i: 1 i: 1 i: 1 }
+ }
+ }
+ attr {
+ key: "padding"
+ value { s: "SAME" }
+ }
+ attr {
+ key: "strides"
+ value {
+ list { i: 1 i: 2 i: 2 i: 1 }
+ }
+ }
+}
+*/
+
+/**
+ * @note For Tensorflow Conv2DBackpropInput, 'input' refers actual output of the
+ * node, and 'input' refers actual input. The reasone of this is, as name
+ * suggests, because it is inspired from backpropagation of convolution.
+ * For example, 'out_backprop' of Conv2DBackpropInput is its actual input
+ * feature map, and 'input_sizes' means desired output node's size.
+ * Note that this convention is against loco canonical's convention.
+ */
+class TFConv2DBackpropInput final
+ : public FixedArityNode<3, TFNodeImpl<TFOpcode::Conv2DBackpropInput>>
+{
+public:
+ loco::Node *input_sizes(void) const { return at(0)->node(); }
+ void input_sizes(Node *node) { at(0)->node(node); }
+
+ loco::Node *filter(void) const { return at(1)->node(); }
+ void filter(Node *node) { at(1)->node(node); }
+
+ loco::Node *out_backprop(void) const { return at(2)->node(); }
+ void out_backprop(Node *node) { at(2)->node(node); }
+
+public:
+ const TFPadding &padding(void) const { return _padding; }
+ void padding(const TFPadding &padding) { _padding = padding; }
+
+ const TFDataLayout &data_layout(void) const { return _data_layout; }
+ void data_layout(const TFDataLayout &data_layout) { _data_layout = data_layout; }
+
+ const std::vector<int64_t> &strides(void) const { return _strides; }
+ void strides(const std::vector<int64_t> &strides) { _strides = strides; }
+
+private:
+ TFPadding _padding;
+ TFDataLayout _data_layout;
+ std::vector<int64_t> _strides;
+ // TODO Support "Dilation"
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFCONV2DBACKPROPINPUT_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFDepthwiseConv2dNative.h b/compiler/moco/lang/include/moco/IR/Nodes/TFDepthwiseConv2dNative.h
new file mode 100644
index 000000000..aefc0b5d9
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFDepthwiseConv2dNative.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFDEPTHWISECONV2DNATIVE_H__
+#define __MOCO_IR_TFDEPTHWISECONV2DNATIVE_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+#include <vector>
+
+namespace moco
+{
+
+class TFDepthwiseConv2dNative final
+ : public FixedArityNode<2, TFNodeImpl<TFOpcode::DepthwiseConv2dNative>>
+{
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+
+ loco::Node *filter(void) const { return at(1)->node(); }
+ void filter(Node *node) { at(1)->node(node); }
+
+public:
+ const TFPadding &padding(void) const { return _padding; }
+ void padding(const TFPadding &padding) { _padding = padding; }
+
+ const TFDataLayout &data_layout(void) const { return _data_layout; }
+ void data_layout(const TFDataLayout &data_layout) { _data_layout = data_layout; }
+
+ const std::vector<int64_t> &strides(void) const { return _strides; }
+ void strides(const std::vector<int64_t> &strides) { _strides = strides; }
+
+private:
+ TFPadding _padding;
+ TFDataLayout _data_layout;
+ std::vector<int64_t> _strides;
+ // TODO Support "Dilation"
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFDEPTHWISECONV2DNATIVE_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFFakeQuantWithMinMaxVars.h b/compiler/moco/lang/include/moco/IR/Nodes/TFFakeQuantWithMinMaxVars.h
new file mode 100644
index 000000000..ec54da596
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFFakeQuantWithMinMaxVars.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFFAKEQUANTWITHMINMAXVARS_H__
+#define __MOCO_IR_TFFAKEQUANTWITHMINMAXVARS_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+#include <vector>
+
+namespace moco
+{
+
+class TFFakeQuantWithMinMaxVars final
+ : public FixedArityNode<3, TFNodeImpl<TFOpcode::FakeQuantWithMinMaxVars>>
+{
+public:
+ loco::Node *inputs(void) const { return at(0)->node(); }
+ void inputs(Node *node) { at(0)->node(node); }
+
+ loco::Node *min(void) const { return at(1)->node(); }
+ void min(Node *node) { at(1)->node(node); }
+
+ loco::Node *max(void) const { return at(2)->node(); }
+ void max(Node *node) { at(2)->node(node); }
+
+public:
+ const int64_t &num_bits(void) const { return _num_bits; }
+ void num_bits(const int64_t &num_bits) { _num_bits = num_bits; }
+
+ const bool &narrow_range(void) const { return _narrow_range; }
+ void narrow_range(const bool &narrow_range) { _narrow_range = narrow_range; }
+
+private:
+ int64_t _num_bits{8};
+ bool _narrow_range{false};
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFFAKEQUANTWITHMINMAXVARS_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFFusedBatchNorm.h b/compiler/moco/lang/include/moco/IR/Nodes/TFFusedBatchNorm.h
new file mode 100644
index 000000000..5b980e3b2
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFFusedBatchNorm.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFFUSEDBATCHNORM_H__
+#define __MOCO_IR_TFFUSEDBATCHNORM_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+namespace moco
+{
+
+class TFFusedBatchNorm final : public FixedArityNode<5, TFNodeImpl<TFOpcode::FusedBatchNorm>>
+{
+public:
+ TFFusedBatchNorm() = default;
+
+public:
+ Node *x(void) const { return at(0)->node(); }
+ void x(Node *node) { at(0)->node(node); }
+
+ Node *scale(void) const { return at(1)->node(); } // gamma
+ void scale(Node *node) { at(1)->node(node); }
+
+ Node *offset(void) const { return at(2)->node(); } // beta
+ void offset(Node *node) { at(2)->node(node); }
+
+ Node *mean(void) const { return at(3)->node(); }
+ void mean(Node *node) { at(3)->node(node); }
+
+ Node *variance(void) const { return at(4)->node(); }
+ void variance(Node *node) { at(4)->node(node); }
+
+ float epsilon(void) const { return _epsilon; }
+ void epsilon(float epsilon) { _epsilon = epsilon; }
+
+private:
+ float _epsilon = 0.001f;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFFUSEDBATCHNORM_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFIdentity.h b/compiler/moco/lang/include/moco/IR/Nodes/TFIdentity.h
new file mode 100644
index 000000000..26a1a36bf
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFIdentity.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFIDENTITY_H__
+#define __MOCO_IR_TFIDENTITY_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+namespace moco
+{
+
+/// @note TFIdentity corresponds to the following GraphDef
+/*
+node {
+ name: "identity"
+ op: "Identity"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+*/
+
+class TFIdentity final : public FixedArityNode<1, TFNodeImpl<TFOpcode::Identity>>
+{
+public:
+ TFIdentity() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFIDENTITY_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFMaxPool.h b/compiler/moco/lang/include/moco/IR/Nodes/TFMaxPool.h
new file mode 100644
index 000000000..a66b4044e
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFMaxPool.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFMAXPOOL_H__
+#define __MOCO_IR_TFMAXPOOL_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+#include <vector>
+
+namespace moco
+{
+
+/// @note TFMaxPool corresponds to the following GraphDef
+/*
+node {
+ name: "maxpool2d"
+ op: "MaxPool"
+ input: "placeholder"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+ attr {
+ key: "ksize"
+ value {
+ list {
+ i: 1 i: 2 i: 2 i: 1
+ }
+ }
+ }
+ attr {
+ key: "padding"
+ value {
+ s: "VALID"
+ }
+ }
+ attr {
+ key: "strides"
+ value {
+ list {
+ i: 1 i: 1 i: 1 i: 1
+ }
+ }
+ }
+}
+*/
+
+class TFMaxPool final : public FixedArityNode<1, TFNodeImpl<TFOpcode::MaxPool>>
+{
+public:
+ TFMaxPool() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { return at(0)->node(node); }
+
+public:
+ const TFDataLayout &data_layout(void) const { return _data_layout; }
+ void data_layout(const TFDataLayout &data_layout) { _data_layout = data_layout; }
+
+ const TFPadding &padding(void) const { return _padding; }
+ void padding(const TFPadding &padding) { _padding = padding; }
+
+ const std::vector<int64_t> &ksize(void) const { return _ksize; }
+ void ksize(const std::vector<int64_t> &ksize) { _ksize = ksize; }
+
+ const std::vector<int64_t> &strides(void) const { return _strides; }
+ void strides(const std::vector<int64_t> &strides) { _strides = strides; }
+
+private:
+ TFDataLayout _data_layout;
+ TFPadding _padding;
+ std::vector<int64_t> _ksize;
+ std::vector<int64_t> _strides;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFMAXPOOL_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFMaximum.h b/compiler/moco/lang/include/moco/IR/Nodes/TFMaximum.h
new file mode 100644
index 000000000..346dbebe8
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFMaximum.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFMAXIMUM_H__
+#define __MOCO_IR_TFMAXIMUM_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+namespace moco
+{
+
+/// @note TFMaximum corresponds to the following GraphDef
+/*
+node {
+ name: "maximum"
+ op: "Maximum"
+ input: "x"
+ input: "y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+*/
+
+class TFMaximum final : public FixedArityNode<2, TFNodeImpl<TFOpcode::Maximum>>
+{
+public:
+ TFMaximum() = default;
+
+public:
+ Node *x(void) const { return at(0)->node(); }
+ void x(Node *node) { at(0)->node(node); }
+
+ Node *y(void) const { return at(1)->node(); }
+ void y(Node *node) { at(1)->node(node); }
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFMAXIMUM_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFMean.h b/compiler/moco/lang/include/moco/IR/Nodes/TFMean.h
new file mode 100644
index 000000000..abcd21c49
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFMean.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFMEAN_H__
+#define __MOCO_IR_TFMEAN_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+#include <vector>
+
+namespace moco
+{
+
+/// @note TFMean corresponds to the following GraphDef
+/*
+node {
+ name: "Mean"
+ op: "Mean"
+ input: "Placeholder"
+ input: "Mean/reduction_indices"
+ attr {
+ key: "T"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "Tidx"
+ value { type: DT_INT32 }
+ }
+ attr {
+ key: "keep_dims"
+ value { b: true }
+ }
+}
+*/
+
+class TFMean final : public FixedArityNode<2, TFNodeImpl<TFOpcode::Mean>>
+{
+public:
+ TFMean() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+
+ Node *reduction_indices(void) const { return at(1)->node(); }
+ void reduction_indices(Node *node) { at(1)->node(node); }
+
+public:
+ bool keep_dims(void) const { return _keep_dims; }
+ void keep_dims(bool keep_dims) { _keep_dims = keep_dims; }
+
+private:
+ bool _keep_dims = false;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFMEAN_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFMul.h b/compiler/moco/lang/include/moco/IR/Nodes/TFMul.h
new file mode 100644
index 000000000..4692838cb
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFMul.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFMUL_H__
+#define __MOCO_IR_TFMUL_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+namespace moco
+{
+
+/// @note TFMul corresponds to the following GraphDef
+/*
+node {
+ name: "mul"
+ op: "Mul"
+ input: "x"
+ input: "y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+*/
+
+class TFMul final : public FixedArityNode<2, TFNodeImpl<TFOpcode::Mul>>
+{
+public:
+ TFMul() = default;
+
+public:
+ Node *x(void) const { return at(0)->node(); }
+ void x(Node *node) { at(0)->node(node); }
+
+ Node *y(void) const { return at(1)->node(); }
+ void y(Node *node) { at(1)->node(node); }
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFMUL_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFPack.h b/compiler/moco/lang/include/moco/IR/Nodes/TFPack.h
new file mode 100644
index 000000000..1046a18ed
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFPack.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFPACK_H__
+#define __MOCO_IR_TFPACK_H__
+
+#include "moco/IR/TFNodeDecl.h"
+#include "moco/IR/VariadicArityNode.h"
+
+namespace moco
+{
+/// @note TFPack corresponds to the following GraphDef
+/*
+node {
+ name: "Pack"
+ op: "Pack"
+ input: "input_1"
+ input: "input_2"
+ attr {
+ key: "N"
+ value {
+ i: 2
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "axis"
+ value {
+ i: 0
+ }
+ }
+}
+*/
+
+class TFPack final : public VariadicArityNode<TFNodeImpl<TFOpcode::Pack>>
+{
+public:
+ TFPack(uint32_t arity) : VariadicArityNode<TFNodeImpl<TFOpcode::Pack>>(arity)
+ {
+ // at least one item should exist
+ assert(arity >= 1);
+ }
+
+public:
+ Node *values(uint32_t index) const
+ {
+ assert(index < arity());
+ return at(index)->node();
+ }
+ void values(uint32_t index, Node *node)
+ {
+ assert(index < arity());
+ at(index)->node(node);
+ }
+
+public:
+ uint32_t N(void) const { return arity(); }
+
+ int32_t axis(void) const { return _axis; }
+ void axis(int32_t axis) { _axis = axis; }
+
+private:
+ int32_t _axis{0};
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFPACK_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFPad.h b/compiler/moco/lang/include/moco/IR/Nodes/TFPad.h
new file mode 100644
index 000000000..dae4741d6
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFPad.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFPAD_H__
+#define __MOCO_IR_TFPAD_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+namespace moco
+{
+/// @note TFPad corresponds to the following GraphDef
+/*
+node {
+ name: "Pad"
+ op: "Pad"
+ input: "Const_tensor"
+ input: "Const_paddings"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "Tpaddings"
+ value {
+ type: DT_INT32
+ }
+ }
+}
+*/
+
+class TFPad final : public FixedArityNode<2, TFNodeImpl<TFOpcode::Pad>>
+{
+public:
+ TFPad() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+
+ Node *paddings(void) const { return at(1)->node(); }
+ void paddings(Node *node) { at(1)->node(node); }
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFPAD_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFPlaceholder.h b/compiler/moco/lang/include/moco/IR/Nodes/TFPlaceholder.h
new file mode 100644
index 000000000..65a78e665
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFPlaceholder.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFPLACEHOLDER_H__
+#define __MOCO_IR_TFPLACEHOLDER_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+#include <loco/IR/DataTypeTraits.h>
+#include <loco/IR/NodeMixins.h>
+#include <loco/IR/GraphInputIndex.h>
+#include <loco/IR/TensorShape.h>
+
+namespace moco
+{
+
+/// @note TFPlaceholder corresponds to the following GraphDef
+/*
+node {
+ name: "placeholder"
+ op: "Placeholder"
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 1
+ }
+ dim {
+ size: 3
+ }
+ dim {
+ size: 3
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+}
+*/
+
+/**
+ * @brief IR for tf.placeholder
+ */
+class TFPlaceholder final : public FixedArityNode<0, TFNodeImpl<TFOpcode::Placeholder>>,
+ public loco::NodeMixin<loco::NodeTrait::DataType>,
+ public loco::NodeMixin<loco::NodeTrait::TensorShape>
+{
+public:
+ TFPlaceholder() = default;
+
+ // TODO Update unkown shape information. tensorflow::NodeDef may not have "shape" attr.
+};
+
+} // namespace moco
+
+namespace moco
+{
+
+bool indexed(const TFPlaceholder *node);
+loco::GraphInputIndex index(const TFPlaceholder *node);
+void index(TFPlaceholder *node, const loco::GraphInputIndex index);
+loco::TensorShape tensor_shape(const TFPlaceholder *node);
+
+TFPlaceholder *placeholder_node(loco::Graph *g, const loco::GraphInputIndex &idx);
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFPLACEHOLDER_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFPush.h b/compiler/moco/lang/include/moco/IR/Nodes/TFPush.h
new file mode 100644
index 000000000..e45804252
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFPush.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFPUSH_H__
+#define __MOCO_IR_TFPUSH_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+#include <loco.h>
+
+namespace moco
+{
+
+/**
+ * @brief Make a value visible to user
+ *
+ * @note TFPush is a virtual node that does not corresponds to real TensorFlow node
+ * Why this node is introduced:
+ * - Any TensorFlow Nodes can be an output.
+ * - So let any TFNode type can provide OutputIndex using Annotation.
+ * - Problem comes when in transformation, output node can be replaced.
+ * - This causes that OutputIndex Annotation should be copied to new node.
+ * - This makes every transformation in any Dialect code change.
+ * - And even worse, this makes every new transformation follow this rule.
+ * - Which is not good.
+ * - Thus, like loco Canonical does, follow loco::Push.
+ */
+class TFPush /* to user */ final : public FixedArityNode<1, TFNodeImpl<TFOpcode::TFPush>>
+{
+public:
+ TFPush() = default;
+
+public:
+ loco::Node *from(void) const { return at(0)->node(); }
+ void from(loco::Node *node) { at(0)->node(node); }
+
+public:
+ void index(const loco::GraphOutputIndex &index);
+
+ /**
+ * @brief Get associated output index
+ *
+ * The behavior of this method is undefined when "index" is not set before.
+ *
+ * NOTE This method intentionally returns "GraphOutputIndex" instead of "const GraphOutputIndex &"
+ * not to expose the internal implementation details.
+ */
+ loco::GraphOutputIndex index(void) const;
+
+ /**
+ * @brief Check whether index is initialized
+ *
+ * NOTE "indexed" method does not validate whether index is in a valid range
+ */
+ bool indexed(void) const { return _index != -1; }
+
+ /**
+ * @brief Reset output index
+ */
+ void index_reset(void) { _index = -1; }
+
+private:
+ int64_t _index = -1; // Uninitialized
+};
+
+/// @brief Find a TFPush node with a given output index
+TFPush *push_node(loco::Graph *g, const loco::GraphOutputIndex &index);
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFPUSH_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFRealDiv.h b/compiler/moco/lang/include/moco/IR/Nodes/TFRealDiv.h
new file mode 100644
index 000000000..8d61b3d13
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFRealDiv.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFREALDIV_H__
+#define __MOCO_IR_TFREALDIV_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+namespace moco
+{
+
+/// @note TFRealDiv corresponds to the following GraphDef
+/*
+node {
+ name: "div"
+ op: "RealDiv"
+ input: "x"
+ input: "y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+*/
+
+class TFRealDiv final : public FixedArityNode<2, TFNodeImpl<TFOpcode::RealDiv>>
+{
+public:
+ TFRealDiv() = default;
+
+public:
+ Node *x(void) const { return at(0)->node(); }
+ void x(Node *node) { at(0)->node(node); }
+
+ Node *y(void) const { return at(1)->node(); }
+ void y(Node *node) { at(1)->node(node); }
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFREALDIV_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFRelu.h b/compiler/moco/lang/include/moco/IR/Nodes/TFRelu.h
new file mode 100644
index 000000000..90e121e5e
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFRelu.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFRELU_H__
+#define __MOCO_IR_TFRELU_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+namespace moco
+{
+
+/// @note TFRelu corresponds to the following GraphDef
+/*
+node {
+ name: "output/relu"
+ op: "Relu"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+*/
+
+class TFRelu final : public FixedArityNode<1, TFNodeImpl<TFOpcode::Relu>>
+{
+public:
+ TFRelu() = default;
+
+public:
+ Node *features(void) const { return at(0)->node(); }
+ void features(Node *node) { at(0)->node(node); }
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFRELU_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFRelu6.h b/compiler/moco/lang/include/moco/IR/Nodes/TFRelu6.h
new file mode 100644
index 000000000..bb705b782
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFRelu6.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFRELU6_H__
+#define __MOCO_IR_TFRELU6_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+namespace moco
+{
+
+/// @note TFRelu6 corresponds to the following GraphDef
+/*
+node {
+ name: "Relu6"
+ op: "Relu6"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value { type: DT_FLOAT }
+ }
+}
+*/
+
+class TFRelu6 final : public FixedArityNode<1, TFNodeImpl<TFOpcode::Relu6>>
+{
+public:
+ TFRelu6() = default;
+
+public:
+ Node *features(void) const { return at(0)->node(); }
+ void features(Node *node) { at(0)->node(node); }
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFRELU6_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFReshape.h b/compiler/moco/lang/include/moco/IR/Nodes/TFReshape.h
new file mode 100644
index 000000000..1f743565d
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFReshape.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFRESHAPE_H__
+#define __MOCO_IR_TFRESHAPE_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+namespace moco
+{
+
+/// @note TFReshape corresponds to the following GraphDef
+/*
+node {
+ name: "reshape"
+ op: "Reshape"
+ input: "tensor"
+ input: "shape"
+ attr {
+ key: "T"
+ value { type: DT_FLOAT }
+ }
+}
+*/
+
+class TFReshape final : public FixedArityNode<2, TFNodeImpl<TFOpcode::Reshape>>
+{
+public:
+ TFReshape() = default;
+
+public:
+ Node *tensor(void) const { return at(0)->node(); }
+ void tensor(Node *node) { at(0)->node(node); }
+
+ Node *shape(void) const { return at(1)->node(); }
+ void shape(Node *node) { at(1)->node(node); }
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFRESHAPE_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFRsqrt.h b/compiler/moco/lang/include/moco/IR/Nodes/TFRsqrt.h
new file mode 100644
index 000000000..c71a5b98c
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFRsqrt.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFRSQRT_H__
+#define __MOCO_IR_TFRSQRT_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+namespace moco
+{
+
+/// @note TFRsqrt corresponds to the following GraphDef
+/*
+node {
+ name: "Rsqrt"
+ op: "Rsqrt"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+*/
+
+class TFRsqrt final : public FixedArityNode<1, TFNodeImpl<TFOpcode::Rsqrt>>
+{
+public:
+ TFRsqrt() = default;
+
+public:
+ Node *x(void) const { return at(0)->node(); }
+ void x(Node *node) { at(0)->node(node); }
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFRSQRT_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFShape.h b/compiler/moco/lang/include/moco/IR/Nodes/TFShape.h
new file mode 100644
index 000000000..36f0f1e69
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFShape.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFSHAPE_H__
+#define __MOCO_IR_TFSHAPE_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+#include <loco/IR/NodeMixins.h>
+
+#include <vector>
+
+namespace moco
+{
+
+/// @note TFShape corresponds to the following GraphDef
+/*
+node {
+ name: "Shape"
+ op: "Shape"
+ input: "some_input"
+ attr {
+ key: "T"
+ value { type: DT_FLOAT }
+ }
+ attr {
+ key: "out_type"
+ value { type: DT_INT32 }
+ }
+}
+*/
+
+/// @note Mixed in dtype() is for 'out_type' attribute
+class TFShape final : public FixedArityNode<1, TFNodeImpl<TFOpcode::Shape>>,
+ public loco::NodeMixin<loco::NodeTrait::DataType>
+{
+public:
+ TFShape() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFSHAPE_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFSoftmax.h b/compiler/moco/lang/include/moco/IR/Nodes/TFSoftmax.h
new file mode 100644
index 000000000..c98df1d82
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFSoftmax.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFSOFTMAX_H__
+#define __MOCO_IR_TFSOFTMAX_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+namespace moco
+{
+
+class TFSoftmax final : public FixedArityNode<1, TFNodeImpl<TFOpcode::Softmax>>
+{
+public:
+ TFSoftmax() = default;
+
+public:
+ Node *logits(void) const { return at(0)->node(); }
+ void logits(Node *node) { at(0)->node(node); }
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFSOFTMAX_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFSqrt.h b/compiler/moco/lang/include/moco/IR/Nodes/TFSqrt.h
new file mode 100644
index 000000000..273b5d49b
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFSqrt.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFSQRT_H__
+#define __MOCO_IR_TFSQRT_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+namespace moco
+{
+
+/// @note TFSqrt corresponds to the following GraphDef
+/*
+node {
+ name: "Sqrt"
+ op: "Sqrt"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+*/
+
+class TFSqrt final : public FixedArityNode<1, TFNodeImpl<TFOpcode::Sqrt>>
+{
+public:
+ TFSqrt() = default;
+
+public:
+ Node *x(void) const { return at(0)->node(); }
+ void x(Node *node) { at(0)->node(node); }
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFSQRT_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFSquaredDifference.h b/compiler/moco/lang/include/moco/IR/Nodes/TFSquaredDifference.h
new file mode 100644
index 000000000..4e0a929d3
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFSquaredDifference.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFSQUAREDDIFFERENCE_H__
+#define __MOCO_IR_TFSQUAREDDIFFERENCE_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+namespace moco
+{
+
+/// @note TFSquaredDifference corresponds to the following GraphDef
+/*
+node {
+ name: "SquaredDifference"
+ op: "SquaredDifference"
+ input: "input_x"
+ input: "input_y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+*/
+
+class TFSquaredDifference final : public FixedArityNode<2, TFNodeImpl<TFOpcode::SquaredDifference>>
+{
+public:
+ TFSquaredDifference() = default;
+
+public:
+ Node *x(void) const { return at(0)->node(); }
+ void x(Node *node) { at(0)->node(node); }
+
+ Node *y(void) const { return at(1)->node(); }
+ void y(Node *node) { at(1)->node(node); }
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFSQUAREDDIFFERENCE_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFSqueeze.h b/compiler/moco/lang/include/moco/IR/Nodes/TFSqueeze.h
new file mode 100644
index 000000000..612497ee7
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFSqueeze.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFSQUEEZE_H__
+#define __MOCO_IR_TFSQUEEZE_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+#include <vector>
+
+namespace moco
+{
+
+/// @note TFSqueeze corresponds to the following GraphDef
+/*
+node {
+ name: "squeeze"
+ op: "Squeeze"
+ input: "x"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "squeeze_dims"
+ value {
+ list {
+ i: a
+ i: b
+ ..
+ }
+ }
+ }
+}
+*/
+
+class TFSqueeze final : public FixedArityNode<1, TFNodeImpl<TFOpcode::Squeeze>>
+{
+public:
+ TFSqueeze() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+
+public:
+ const std::vector<int64_t> &squeeze_dims(void) const { return _squeeze_dims; }
+ void squeeze_dims(const std::vector<int64_t> &squeeze_dims) { _squeeze_dims = squeeze_dims; }
+
+private:
+ std::vector<int64_t> _squeeze_dims;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFSQUEEZE_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFStopGradient.h b/compiler/moco/lang/include/moco/IR/Nodes/TFStopGradient.h
new file mode 100644
index 000000000..cfebd92a9
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFStopGradient.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFSTOPGRADIENT_H__
+#define __MOCO_IR_TFSTOPGRADIENT_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+namespace moco
+{
+
+/// @note TFStopGradient corresponds to the following GraphDef
+/*
+node {
+ name: "StopGradient"
+ op: "StopGradient"
+ input: "Placeholder"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+*/
+
+class TFStopGradient final : public FixedArityNode<1, TFNodeImpl<TFOpcode::StopGradient>>
+{
+public:
+ TFStopGradient() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFSTOPGRADIENT_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFStridedSlice.h b/compiler/moco/lang/include/moco/IR/Nodes/TFStridedSlice.h
new file mode 100644
index 000000000..75012b219
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFStridedSlice.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFSTRIDEDSLICE_H__
+#define __MOCO_IR_TFSTRIDEDSLICE_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+namespace moco
+{
+
+/// @note TFStridedSlice corresponds to the following GraphDef
+/*
+node {
+ name: "StridedSlice"
+ op: "StridedSlice"
+ input: "input"
+ input: "begin"
+ input: "end"
+ input: "stride"
+ attr {
+ key: "Index"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "begin_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "ellipsis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "end_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "new_axis_mask"
+ value {
+ i: 0
+ }
+ }
+ attr {
+ key: "shrink_axis_mask"
+ value {
+ i: 0
+ }
+ }
+}
+*/
+
+class TFStridedSlice final : public FixedArityNode<4, TFNodeImpl<TFOpcode::StridedSlice>>
+{
+public:
+ TFStridedSlice() = default;
+
+public:
+ Node *input(void) const { return at(0)->node(); }
+ void input(Node *node) { at(0)->node(node); }
+
+ Node *begin(void) const { return at(1)->node(); }
+ void begin(Node *node) { at(1)->node(node); }
+
+ Node *end(void) const { return at(2)->node(); }
+ void end(Node *node) { at(2)->node(node); }
+
+ Node *strides(void) const { return at(3)->node(); }
+ void strides(Node *node) { at(3)->node(node); }
+
+public:
+ int32_t begin_mask(void) const { return _begin_mask; }
+ void begin_mask(int32_t begin_mask) { _begin_mask = begin_mask; }
+
+ int32_t end_mask(void) const { return _end_mask; }
+ void end_mask(int32_t end_mask) { _end_mask = end_mask; }
+
+ int32_t ellipsis_mask(void) const { return _ellipsis_mask; }
+ void ellipsis_mask(int32_t ellipsis_mask) { _ellipsis_mask = ellipsis_mask; }
+
+ int32_t new_axis_mask(void) const { return _new_axis_mask; }
+ void new_axis_mask(int32_t new_axis_mask) { _new_axis_mask = new_axis_mask; }
+
+ int32_t shrink_axis_mask(void) const { return _shrink_axis_mask; }
+ void shrink_axis_mask(int32_t shrink_axis_mask) { _shrink_axis_mask = shrink_axis_mask; }
+
+private:
+ int32_t _begin_mask{0};
+ int32_t _end_mask{0};
+ int32_t _ellipsis_mask{0};
+ int32_t _new_axis_mask{0};
+ int32_t _shrink_axis_mask{0};
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFSTRIDEDSLICE_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFSub.h b/compiler/moco/lang/include/moco/IR/Nodes/TFSub.h
new file mode 100644
index 000000000..27905cbdb
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFSub.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFSUB_H__
+#define __MOCO_IR_TFSUB_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+namespace moco
+{
+
+/// @note TFSub corresponds to the following GraphDef
+/*
+node {
+ name: "sub"
+ op: "Sub"
+ input: "x"
+ input: "y"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
+*/
+
+class TFSub final : public FixedArityNode<2, TFNodeImpl<TFOpcode::Sub>>
+{
+public:
+ TFSub() = default;
+
+public:
+ Node *x(void) const { return at(0)->node(); }
+ void x(Node *node) { at(0)->node(node); }
+
+ Node *y(void) const { return at(1)->node(); }
+ void y(Node *node) { at(1)->node(node); }
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFSUB_H__
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFTanh.h b/compiler/moco/lang/include/moco/IR/Nodes/TFTanh.h
new file mode 100644
index 000000000..4543c62f3
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFTanh.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFTANH_H__
+#define __MOCO_IR_TFTANH_H__
+
+#include "moco/IR/TFNodeDecl.h"
+
+namespace moco
+{
+
+class TFTanh final : public FixedArityNode<1, TFNodeImpl<TFOpcode::Tanh>>
+{
+public:
+ TFTanh() = default;
+
+public:
+ Node *x(void) const { return at(0)->node(); }
+ void x(Node *node) { at(0)->node(node); }
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFTANH_H__
diff --git a/compiler/moco/lang/include/moco/IR/TFDataLayout.h b/compiler/moco/lang/include/moco/IR/TFDataLayout.h
new file mode 100644
index 000000000..f0edfacd5
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/TFDataLayout.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFDATALAYOUT_H__
+#define __MOCO_IR_TFDATALAYOUT_H__
+
+#include <string>
+
+namespace moco
+{
+
+using TFDataLayout = std::string;
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFDATALAYOUT_H__
diff --git a/compiler/moco/lang/include/moco/IR/TFDialect.h b/compiler/moco/lang/include/moco/IR/TFDialect.h
new file mode 100644
index 000000000..847bc527f
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/TFDialect.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFDIALECT_H__
+#define __MOCO_IR_TFDIALECT_H__
+
+#include <loco/IR/Dialect.h>
+
+namespace moco
+{
+
+/**
+ * @brief A singleton for TensorFlow Dialect
+ */
+class TFDialect final : public loco::Dialect
+{
+private:
+ TFDialect();
+
+public:
+ TFDialect(const TFDialect &) = delete;
+ TFDialect(TFDialect &&) = delete;
+
+public:
+ static loco::Dialect *get(void);
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFDIALECT_H__
diff --git a/compiler/moco/lang/include/moco/IR/TFNode.h b/compiler/moco/lang/include/moco/IR/TFNode.h
new file mode 100644
index 000000000..e3d900ba3
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/TFNode.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFNODE_H__
+#define __MOCO_IR_TFNODE_H__
+
+#include "moco/IR/TFNodeDecl.h"
+#include "moco/IR/TFNodeImpl.h"
+
+#endif // __MOCO_IR_TFNODE_H__
diff --git a/compiler/moco/lang/include/moco/IR/TFNodeDecl.h b/compiler/moco/lang/include/moco/IR/TFNodeDecl.h
new file mode 100644
index 000000000..68d7161b6
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/TFNodeDecl.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFNODE_DECL_H__
+#define __MOCO_IR_TFNODE_DECL_H__
+
+#include <loco/IR/Node.h>
+#include <loco/IR/Dialect.h>
+
+#include "moco/IR/TFOpcode.h"
+#include "moco/IR/TFNodeVisitor.forward.h"
+
+#include "moco/IR/TFDataLayout.h"
+#include "moco/IR/TFPadding.h"
+
+#include <array>
+#include <string>
+
+namespace moco
+{
+
+/**
+ * @note NodeName is string name of the Node without ':#' prefix like ':0' or ':1'
+ */
+using NodeName = std::string;
+
+struct TFNode : public loco::Node
+{
+ virtual ~TFNode() = default;
+
+ const loco::Dialect *dialect(void) const final;
+ virtual TFOpcode opcode(void) const = 0;
+
+ template <typename T> T accept(TFNodeVisitorBase<T> *) const;
+ template <typename T> T accept(TFNodeMutableVisitorBase<T> *);
+
+ NodeName name(void) const { return _name; }
+ void name(const NodeName &name) { _name = name; }
+
+private:
+ NodeName _name;
+};
+
+template <TFOpcode Code> struct TFNodeImpl : public TFNode
+{
+ virtual ~TFNodeImpl() = default;
+
+ uint32_t opnum(void) const final { return static_cast<uint32_t>(Code); }
+ TFOpcode opcode(void) const final { return Code; }
+};
+
+/**
+ * @brief Nodes with the fixed number of inputs
+ */
+template <unsigned N, typename Base> class FixedArityNode : public Base
+{
+public:
+ FixedArityNode()
+ {
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ _args[n] = std::unique_ptr<loco::Use>{new loco::Use{this}};
+ }
+ }
+
+ virtual ~FixedArityNode() = default;
+
+public:
+ unsigned arity(void) const final { return N; }
+
+ loco::Node *arg(uint32_t n) const final { return _args.at(n)->node(); }
+
+ void drop(void) final
+ {
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ _args.at(n)->node(nullptr);
+ }
+ }
+
+protected:
+ // This API allows inherited classes to access "_args" field.
+ loco::Use *at(unsigned n) const { return _args.at(n).get(); }
+
+private:
+ std::array<std::unique_ptr<loco::Use>, N> _args{};
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFNODE_DECL_H__
diff --git a/compiler/moco/lang/include/moco/IR/TFNodeImpl.h b/compiler/moco/lang/include/moco/IR/TFNodeImpl.h
new file mode 100644
index 000000000..afc306031
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/TFNodeImpl.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFNODE_IMPL_H__
+#define __MOCO_IR_TFNODE_IMPL_H__
+
+#include "moco/IR/TFNodes.h"
+#include "moco/IR/TFNodeVisitor.h"
+
+#include <stdexcept>
+
+namespace moco
+{
+
+template <typename T> T TFNode::accept(TFNodeVisitorBase<T> *v) const
+{
+ switch (this->opcode())
+ {
+#define TENSORFLOW_NODE(OPCODE, CLASS) \
+ case TFOpcode::OPCODE: \
+ return v->visit(dynamic_cast<const CLASS *>(this));
+
+#include "TFNodes.lst"
+#undef TENSORFLOW_NODE
+ default:
+ break;
+ }
+
+ // TODO including oops will make oops dependent to modules that include this
+ // postpone decision to this or not
+ throw std::runtime_error{"Unsupported Node"};
+}
+
+template <typename T> T TFNode::accept(TFNodeMutableVisitorBase<T> *v)
+{
+ switch (this->opcode())
+ {
+#define TENSORFLOW_NODE(OPCODE, CLASS) \
+ case TFOpcode::OPCODE: \
+ return v->visit(dynamic_cast<CLASS *>(this));
+
+#include "TFNodes.lst"
+#undef TENSORFLOW_NODE
+ default:
+ break;
+ }
+
+ // TODO including oops will make oops dependent to modules that include this
+ // postpone decision to this or not
+ throw std::runtime_error{"Unsupported Node"};
+}
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFNODE_IMPL_H__
diff --git a/compiler/moco/lang/include/moco/IR/TFNodeVisitor.forward.h b/compiler/moco/lang/include/moco/IR/TFNodeVisitor.forward.h
new file mode 100644
index 000000000..1eb86871c
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/TFNodeVisitor.forward.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFNODE_VISITOR_FORWARD_H__
+#define __MOCO_IR_TFNODE_VISITOR_FORWARD_H__
+
+namespace moco
+{
+
+// NOTE These forward declarations SHOULD BE aligned with Node delcarations in
+// "TFNodeVisitor.h"
+template <typename T> struct TFNodeVisitorBase;
+template <typename T> struct TFNodeMutableVisitorBase;
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFNODE_VISITOR_FORWARD_H__
diff --git a/compiler/moco/lang/include/moco/IR/TFNodeVisitor.h b/compiler/moco/lang/include/moco/IR/TFNodeVisitor.h
new file mode 100644
index 000000000..8d23e447d
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/TFNodeVisitor.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFNODE_VISITOR_H__
+#define __MOCO_IR_TFNODE_VISITOR_H__
+
+#include "moco/IR/TFNodes.h"
+
+#include <stdexcept>
+
+namespace moco
+{
+
+/**
+ * DO NOT use this class. Use TFNodeVisitor instead.
+ */
+template <typename T> struct TFNodeVisitorBase
+{
+ virtual ~TFNodeVisitorBase() = default;
+
+#define TENSORFLOW_NODE(OPCODE, CLASS) virtual T visit(const CLASS *) = 0;
+#include "TFNodes.lst"
+#undef TENSORFLOW_NODE
+};
+
+template <typename T> struct TFNodeVisitor : public TFNodeVisitorBase<T>
+{
+ virtual ~TFNodeVisitor() = default;
+
+#define TENSORFLOW_NODE(OPCODE, CLASS) \
+ virtual T visit(const CLASS *node) { return visit(static_cast<const TFNode *>(node)); }
+#include "TFNodes.lst"
+#undef TENSORFLOW_NODE
+
+ // TODO including oops will make oops dependent to modules that include this
+ // postpone decision to this or not
+ /// @brief Default fallback
+ virtual T visit(const TFNode *) { throw std::runtime_error{"Unsupported Node"}; }
+};
+
+/**
+ * DO NOT use this class. Use TFNodeMutableVisitor instead.
+ */
+template <typename T> struct TFNodeMutableVisitorBase
+{
+ virtual ~TFNodeMutableVisitorBase() = default;
+
+#define TENSORFLOW_NODE(OPCODE, CLASS) virtual T visit(CLASS *) = 0;
+#include "TFNodes.lst"
+#undef TENSORFLOW_NODE
+};
+
+template <typename T> struct TFNodeMutableVisitor : public TFNodeMutableVisitorBase<T>
+{
+ virtual ~TFNodeMutableVisitor() = default;
+
+#define TENSORFLOW_NODE(OPCODE, CLASS) \
+ virtual T visit(CLASS *node) { return visit(static_cast<TFNode *>(node)); }
+#include "TFNodes.lst"
+#undef TENSORFLOW_NODE
+
+ // TODO including oops will make oops dependent to modules that include this
+ // postpone decision to this or not
+ /// @brief Default fallback
+ virtual T visit(TFNode *) { throw std::runtime_error{"Unsupported Node"}; }
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFNODE_VISITOR_H__
diff --git a/compiler/moco/lang/include/moco/IR/TFNodes.h b/compiler/moco/lang/include/moco/IR/TFNodes.h
new file mode 100644
index 000000000..ad54dfdf3
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/TFNodes.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFNODES_H__
+#define __MOCO_IR_TFNODES_H__
+
+#include "moco/IR/Nodes/TFAdd.h"
+#include "moco/IR/Nodes/TFAvgPool.h"
+#include "moco/IR/Nodes/TFBiasAdd.h"
+#include "moco/IR/Nodes/TFConcatV2.h"
+#include "moco/IR/Nodes/TFConst.h"
+#include "moco/IR/Nodes/TFConv2D.h"
+#include "moco/IR/Nodes/TFConv2DBackpropInput.h"
+#include "moco/IR/Nodes/TFDepthwiseConv2dNative.h"
+#include "moco/IR/Nodes/TFFakeQuantWithMinMaxVars.h"
+#include "moco/IR/Nodes/TFFusedBatchNorm.h"
+#include "moco/IR/Nodes/TFIdentity.h"
+#include "moco/IR/Nodes/TFMaximum.h"
+#include "moco/IR/Nodes/TFMaxPool.h"
+#include "moco/IR/Nodes/TFMean.h"
+#include "moco/IR/Nodes/TFMul.h"
+#include "moco/IR/Nodes/TFPack.h"
+#include "moco/IR/Nodes/TFPad.h"
+#include "moco/IR/Nodes/TFPlaceholder.h"
+#include "moco/IR/Nodes/TFRealDiv.h"
+#include "moco/IR/Nodes/TFRelu.h"
+#include "moco/IR/Nodes/TFRelu6.h"
+#include "moco/IR/Nodes/TFReshape.h"
+#include "moco/IR/Nodes/TFRsqrt.h"
+#include "moco/IR/Nodes/TFShape.h"
+#include "moco/IR/Nodes/TFSoftmax.h"
+#include "moco/IR/Nodes/TFSqrt.h"
+#include "moco/IR/Nodes/TFSquaredDifference.h"
+#include "moco/IR/Nodes/TFSqueeze.h"
+#include "moco/IR/Nodes/TFStopGradient.h"
+#include "moco/IR/Nodes/TFStridedSlice.h"
+#include "moco/IR/Nodes/TFSub.h"
+#include "moco/IR/Nodes/TFTanh.h"
+// For virtual node(s)
+#include "moco/IR/Nodes/TFPush.h"
+
+#endif // __MOCO_IR_TFNODES_H__
diff --git a/compiler/moco/lang/include/moco/IR/TFNodes.lst b/compiler/moco/lang/include/moco/IR/TFNodes.lst
new file mode 100644
index 000000000..8373d2b8d
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/TFNodes.lst
@@ -0,0 +1,48 @@
+#ifndef TENSORFLOW_NODE
+#error "Define TENSORFLOW_NODE"
+#endif // TENSORFLOW_NODE
+
+//
+// PLEASE SORT NODE DECLS IN ALPHABETICAL ORDER
+//
+// Naming rule: Follow names in TensorFlow C++ source
+// ex) for AvgPool, tensorflow/core/ops/nn_ops.cc
+// REGISTER_OP("AvgPool") <-- OPCODE: AvgPool. Prefix `TF` for CLASS name
+// .Input("value: T") <-- Input name is 'value'
+//
+
+// TENSORFLOW_NODE(OPCODE, CLASS)
+TENSORFLOW_NODE(Add, TFAdd)
+TENSORFLOW_NODE(AvgPool, TFAvgPool)
+TENSORFLOW_NODE(BiasAdd, TFBiasAdd)
+TENSORFLOW_NODE(ConcatV2, TFConcatV2)
+TENSORFLOW_NODE(Const, TFConst)
+TENSORFLOW_NODE(Conv2D, TFConv2D)
+TENSORFLOW_NODE(Conv2DBackpropInput, TFConv2DBackpropInput)
+TENSORFLOW_NODE(DepthwiseConv2dNative, TFDepthwiseConv2dNative)
+TENSORFLOW_NODE(FakeQuantWithMinMaxVars, TFFakeQuantWithMinMaxVars)
+TENSORFLOW_NODE(FusedBatchNorm, TFFusedBatchNorm)
+TENSORFLOW_NODE(Identity, TFIdentity)
+TENSORFLOW_NODE(Maximum, TFMaximum)
+TENSORFLOW_NODE(MaxPool, TFMaxPool)
+TENSORFLOW_NODE(Mean, TFMean)
+TENSORFLOW_NODE(Mul, TFMul)
+TENSORFLOW_NODE(Pack, TFPack)
+TENSORFLOW_NODE(Pad, TFPad)
+TENSORFLOW_NODE(Placeholder, TFPlaceholder)
+TENSORFLOW_NODE(RealDiv, TFRealDiv)
+TENSORFLOW_NODE(Relu, TFRelu)
+TENSORFLOW_NODE(Relu6, TFRelu6)
+TENSORFLOW_NODE(Reshape, TFReshape)
+TENSORFLOW_NODE(Rsqrt, TFRsqrt)
+TENSORFLOW_NODE(Shape, TFShape)
+TENSORFLOW_NODE(Softmax, TFSoftmax)
+TENSORFLOW_NODE(Sqrt, TFSqrt)
+TENSORFLOW_NODE(SquaredDifference, TFSquaredDifference)
+TENSORFLOW_NODE(Squeeze, TFSqueeze)
+TENSORFLOW_NODE(StopGradient, TFStopGradient)
+TENSORFLOW_NODE(StridedSlice, TFStridedSlice)
+TENSORFLOW_NODE(Sub, TFSub)
+TENSORFLOW_NODE(Tanh, TFTanh)
+// For virtual node(s)
+TENSORFLOW_NODE(TFPush, TFPush)
diff --git a/compiler/moco/lang/include/moco/IR/TFOpcode.h b/compiler/moco/lang/include/moco/IR/TFOpcode.h
new file mode 100644
index 000000000..7524dcce4
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/TFOpcode.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFOPCODE_H__
+#define __MOCO_IR_TFOPCODE_H__
+
+namespace moco
+{
+
+/**
+ * @brief TensorFlow Node Opcode
+ */
+enum class TFOpcode
+{
+#define TENSORFLOW_NODE(OPCODE, CLASS) OPCODE,
+#include "TFNodes.lst"
+#undef TENSORFLOW_NODE
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFOPCODE_H__
diff --git a/compiler/moco/lang/include/moco/IR/TFPadding.h b/compiler/moco/lang/include/moco/IR/TFPadding.h
new file mode 100644
index 000000000..c75b3f2ce
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/TFPadding.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_TFPADDING_H__
+#define __MOCO_IR_TFPADDING_H__
+
+#include <string>
+
+namespace moco
+{
+
+using TFPadding = std::string;
+
+} // namespace moco
+
+#endif // __MOCO_IR_TFPADDING_H__
diff --git a/compiler/moco/lang/include/moco/IR/VariadicArityNode.h b/compiler/moco/lang/include/moco/IR/VariadicArityNode.h
new file mode 100644
index 000000000..7df0f7dec
--- /dev/null
+++ b/compiler/moco/lang/include/moco/IR/VariadicArityNode.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_IR_VARIADIC_ARITY_NODE_H__
+#define __MOCO_IR_VARIADIC_ARITY_NODE_H__
+
+#include <loco/IR/Node.h>
+#include <loco/IR/Use.h>
+
+#include <vector>
+#include <memory>
+#include <cassert>
+
+namespace moco
+{
+
+/**
+ * @brief Nodes with the variadic inputs
+ */
+template <typename Base> class VariadicArityNode : public Base
+{
+public:
+ VariadicArityNode(uint32_t arity)
+ {
+ for (uint32_t n = 0; n < arity; ++n)
+ {
+ _args.emplace_back(std::move(std::unique_ptr<loco::Use>{new loco::Use{this}}));
+ }
+ };
+
+ virtual ~VariadicArityNode() = default;
+
+public:
+ uint32_t arity(void) const final { return _args.size(); }
+
+ loco::Node *arg(uint32_t n) const final
+ {
+ assert(n < _args.size());
+ return _args.at(n)->node();
+ }
+
+ void drop(void) final
+ {
+ for (uint32_t n = 0; n < _args.size(); ++n)
+ {
+ _args.at(n)->node(nullptr);
+ }
+ }
+
+protected:
+ // This API allows inherited classes to access "_args" field.
+ loco::Use *at(uint32_t n) const
+ {
+ assert(n < _args.size());
+ return _args.at(n).get();
+ }
+
+private:
+ std::vector<std::unique_ptr<loco::Use>> _args;
+};
+
+} // namespace moco
+
+#endif // __MOCO_IR_VARIADIC_ARITY_NODE_H__
diff --git a/compiler/moco/lang/include/moco/Names.h b/compiler/moco/lang/include/moco/Names.h
new file mode 100644
index 000000000..1addc812b
--- /dev/null
+++ b/compiler/moco/lang/include/moco/Names.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_NAMES_H__
+#define __MOCO_NAMES_H__
+
+#include <string>
+#include <stdexcept>
+
+namespace moco
+{
+
+struct TensorName final
+{
+public:
+ /**
+ * @brief Constructor
+ *
+ * @note If tensor_name does not have ":index", this constructor adds ":0" by default
+ */
+ explicit TensorName(const std::string &tensor_name)
+ {
+ if (tensor_name.find(":") != std::string::npos) // tensor_name is a form of letter:0
+ {
+ _name.assign(tensor_name);
+ }
+ else
+ {
+ _name.assign(tensor_name + ":0"); // if it does not have ":index", adds ":0" by default
+ }
+ }
+
+ explicit TensorName(const std::string &node_name, const int tensor_index)
+ {
+ if (node_name.find(":") != std::string::npos) // tensor_name is already a form of name:0
+ {
+ // TODO including oops will make oops dependent to modules that include this
+ // postpone decision to this or not
+ throw std::runtime_error("Error: Node name has already tensor index:" + node_name);
+ }
+ else
+ {
+ _name.assign(node_name + ":" + std::to_string(tensor_index));
+ }
+ }
+
+ const std::string &name() const { return _name; }
+
+ /**
+ * @brief Returns node name from tensor name by removing, e.g., ":0"
+ */
+ const std::string nodeName() const
+ {
+ auto index = _name.find(":");
+
+ if (index != std::string::npos)
+ return _name.substr(0, index);
+ else
+ {
+ // TODO including oops will make oops dependent to modules that include this
+ // postpone decision to this or not
+ throw std::runtime_error{"Error: Tensor name should be a 'name:number' format: " + _name};
+ }
+ };
+
+private:
+ std::string _name;
+};
+
+/**
+ * @brief To use TensorName as a key in std::map, this struct defines how to compare two TensorNames
+ */
+struct TensorNameCompare
+{
+ bool operator()(const TensorName &lhs, const TensorName &rhs) const
+ {
+ return lhs.name() < rhs.name();
+ }
+};
+
+} // namespace moco
+
+#endif // __MOCO_NAMES_H__
diff --git a/compiler/moco/lang/src/IR/Nodes/TFAdd.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFAdd.test.cpp
new file mode 100644
index 000000000..d2cfb6ac4
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFAdd.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFAdd.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFAddTest, constructor)
+{
+ moco::TFAdd add_node;
+
+ ASSERT_EQ(add_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(add_node.opcode(), moco::TFOpcode::Add);
+
+ ASSERT_EQ(add_node.x(), nullptr);
+ ASSERT_EQ(add_node.y(), nullptr);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFAvgPool.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFAvgPool.test.cpp
new file mode 100644
index 000000000..32a27ffa0
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFAvgPool.test.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFAvgPool.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFAvgPoolTest, constructor)
+{
+ moco::TFAvgPool avgpool;
+
+ ASSERT_EQ(avgpool.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(avgpool.opcode(), moco::TFOpcode::AvgPool);
+
+ ASSERT_EQ(avgpool.value(), nullptr);
+ ASSERT_EQ(avgpool.data_layout(), "");
+ ASSERT_EQ(avgpool.padding(), "");
+ ASSERT_EQ(avgpool.ksize(), std::vector<int64_t>({}));
+ ASSERT_EQ(avgpool.strides(), std::vector<int64_t>({}));
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFBiasAdd.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFBiasAdd.test.cpp
new file mode 100644
index 000000000..4a15a4981
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFBiasAdd.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFBiasAdd.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFBiasAddTest, constructor)
+{
+ moco::TFBiasAdd bias_add;
+
+ ASSERT_EQ(bias_add.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(bias_add.opcode(), moco::TFOpcode::BiasAdd);
+
+ ASSERT_EQ(bias_add.value(), nullptr);
+ ASSERT_EQ(bias_add.bias(), nullptr);
+ ASSERT_EQ(bias_add.data_layout(), "");
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFConcatV2.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFConcatV2.test.cpp
new file mode 100644
index 000000000..8f7df92d0
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFConcatV2.test.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFConcatV2.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFConcatV2Test, constructor)
+{
+ moco::TFConcatV2 concatv2_node(3); // num of values
+
+ ASSERT_EQ(concatv2_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(concatv2_node.opcode(), moco::TFOpcode::ConcatV2);
+
+ ASSERT_EQ(concatv2_node.num_values(), 3);
+ ASSERT_EQ(concatv2_node.values(0), nullptr);
+ ASSERT_EQ(concatv2_node.values(1), nullptr);
+ ASSERT_EQ(concatv2_node.values(2), nullptr);
+ ASSERT_EQ(concatv2_node.axis(), nullptr);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFConst.cpp b/compiler/moco/lang/src/IR/Nodes/TFConst.cpp
new file mode 100644
index 000000000..5c8c08ec0
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFConst.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFConst.h"
+
+#include <cassert>
+
+namespace moco
+{
+
+template <loco::DataType DT> uint32_t TFConst::size(void) const
+{
+ assert(dtype() == DT);
+ assert(_data.size() % sizeof(typename loco::DataTypeImpl<DT>::Type) == 0);
+ return _data.size() / sizeof(typename loco::DataTypeImpl<DT>::Type);
+}
+
+template <loco::DataType DT> void TFConst::size(uint32_t l)
+{
+ assert(dtype() == DT);
+ _data.resize(l * sizeof(typename loco::DataTypeImpl<DT>::Type));
+}
+
+template <loco::DataType DT>
+const typename loco::DataTypeImpl<DT>::Type &TFConst::at(uint32_t n) const
+{
+ assert(dtype() == DT);
+ assert(n < size<DT>());
+ return *(reinterpret_cast<const typename loco::DataTypeImpl<DT>::Type *>(_data.data()) + n);
+}
+
+template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &TFConst::at(uint32_t n)
+{
+ assert(dtype() == DT);
+ assert(n < size<DT>());
+ return *(reinterpret_cast<typename loco::DataTypeImpl<DT>::Type *>(_data.data()) + n);
+}
+
+#define INSTANTIATE(DT) \
+ template uint32_t TFConst::size<DT>(void) const; \
+ template void TFConst::size<DT>(uint32_t); \
+ template const typename loco::DataTypeImpl<DT>::Type &TFConst::at<DT>(uint32_t) const; \
+ template typename loco::DataTypeImpl<DT>::Type &TFConst::at<DT>(uint32_t);
+
+INSTANTIATE(loco::DataType::S8);
+INSTANTIATE(loco::DataType::S32);
+INSTANTIATE(loco::DataType::FLOAT32);
+
+#undef INSTANTIATE
+
+loco::TensorShape tensor_shape(const TFConst *node)
+{
+ assert(node != nullptr);
+
+ loco::TensorShape shape;
+
+ uint32_t rank = node->rank();
+ shape.rank(rank);
+ for (uint32_t index = 0; index < rank; ++index)
+ {
+ assert(node->dim(index).known());
+ shape.dim(index) = node->dim(index).value();
+ }
+
+ return shape;
+}
+
+uint32_t num_elements(const TFConst *tfconst)
+{
+ assert(tfconst != nullptr);
+
+ uint32_t num_elements = 1;
+ for (uint32_t index = 0; index < tfconst->rank(); ++index)
+ {
+ assert(tfconst->dim(index).known());
+ uint32_t dim = tfconst->dim(index).value();
+ num_elements = num_elements * dim;
+ }
+ return num_elements;
+}
+
+bool same_shape(const TFConst *lhs, const TFConst *rhs)
+{
+ assert(lhs != nullptr);
+ assert(rhs != nullptr);
+
+ if (lhs->rank() != rhs->rank())
+ return false;
+
+ for (uint32_t index = 0; index < lhs->rank(); ++index)
+ {
+ assert(lhs->dim(index).known());
+ assert(rhs->dim(index).known());
+ if (lhs->dim(index).value() != rhs->dim(index).value())
+ return false;
+ }
+ return true;
+}
+
+} // namespace moco
diff --git a/compiler/moco/lang/src/IR/Nodes/TFConst.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFConst.test.cpp
new file mode 100644
index 000000000..259966e33
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFConst.test.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFConst.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFConstantTest, constructor)
+{
+ moco::TFConst constant;
+
+ ASSERT_EQ(constant.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(constant.opcode(), moco::TFOpcode::Const);
+
+ ASSERT_EQ(constant.dtype(), loco::DataType::Unknown);
+ ASSERT_EQ(constant.rank(), 0);
+
+ constant.dtype(loco::DataType::FLOAT32);
+ ASSERT_EQ(constant.dtype(), loco::DataType::FLOAT32);
+
+ constant.rank(2);
+ ASSERT_EQ(constant.rank(), 2);
+
+ constant.dim(0) = 2;
+ constant.dim(1) = 3;
+
+ ASSERT_TRUE(constant.dim(0).known());
+ ASSERT_TRUE(constant.dim(1).known());
+
+ ASSERT_EQ(constant.dim(0), 2);
+ ASSERT_EQ(constant.dim(1), 3);
+
+ constant.size<loco::DataType::FLOAT32>(6);
+
+ ASSERT_EQ(constant.size<loco::DataType::FLOAT32>(), 6);
+
+ constant.at<loco::DataType::FLOAT32>(0) = 0.0f; // Set 0,0
+ constant.at<loco::DataType::FLOAT32>(1) = 1.0f; // Set 0,1
+ constant.at<loco::DataType::FLOAT32>(2) = 2.0f; // Set 0,2
+ constant.at<loco::DataType::FLOAT32>(3) = 3.0f; // Set 1,0
+ constant.at<loco::DataType::FLOAT32>(4) = 4.0f; // Set 1,1
+ constant.at<loco::DataType::FLOAT32>(5) = 5.0f; // Set 1,2
+
+ ASSERT_EQ(constant.at<loco::DataType::FLOAT32>(0), 0.0f);
+ ASSERT_EQ(constant.at<loco::DataType::FLOAT32>(1), 1.0f);
+ ASSERT_EQ(constant.at<loco::DataType::FLOAT32>(2), 2.0f);
+ ASSERT_EQ(constant.at<loco::DataType::FLOAT32>(3), 3.0f);
+ ASSERT_EQ(constant.at<loco::DataType::FLOAT32>(4), 4.0f);
+ ASSERT_EQ(constant.at<loco::DataType::FLOAT32>(5), 5.0f);
+}
+
+TEST(TFConstantTest, datatype_s8)
+{
+ moco::TFConst constant;
+
+ ASSERT_EQ(constant.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(constant.opcode(), moco::TFOpcode::Const);
+
+ ASSERT_EQ(constant.dtype(), loco::DataType::Unknown);
+ ASSERT_EQ(constant.rank(), 0);
+
+ constant.dtype(loco::DataType::S8);
+ ASSERT_EQ(constant.dtype(), loco::DataType::S8);
+
+ constant.rank(1);
+ ASSERT_EQ(constant.rank(), 1);
+
+ constant.dim(0) = 3;
+ ASSERT_TRUE(constant.dim(0).known());
+ ASSERT_EQ(constant.dim(0), 3);
+ constant.size<loco::DataType::S8>(3);
+ ASSERT_EQ(constant.size<loco::DataType::S8>(), 3);
+
+ constant.at<loco::DataType::S8>(0) = -1;
+ constant.at<loco::DataType::S8>(1) = 1;
+ constant.at<loco::DataType::S8>(2) = 0;
+
+ ASSERT_EQ(constant.at<loco::DataType::S8>(0), -1);
+ ASSERT_EQ(constant.at<loco::DataType::S8>(1), 1);
+ ASSERT_EQ(constant.at<loco::DataType::S8>(2), 0);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFConv2D.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFConv2D.test.cpp
new file mode 100644
index 000000000..3e3453db0
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFConv2D.test.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFConv2D.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFConv2DTest, constructor)
+{
+ moco::TFConv2D conv2d_node;
+
+ ASSERT_EQ(conv2d_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(conv2d_node.opcode(), moco::TFOpcode::Conv2D);
+
+ ASSERT_EQ(conv2d_node.input(), nullptr);
+ ASSERT_EQ(conv2d_node.filter(), nullptr);
+ ASSERT_EQ(conv2d_node.padding(), "");
+ ASSERT_EQ(conv2d_node.data_layout(), "");
+ ASSERT_EQ(conv2d_node.strides().size(), 0);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFConv2DBackpropInput.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFConv2DBackpropInput.test.cpp
new file mode 100644
index 000000000..f7ad4ce67
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFConv2DBackpropInput.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFConv2DBackpropInput.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFConv2DBackpropInputTest, constructor)
+{
+ moco::TFConv2DBackpropInput conv2dbi_node;
+
+ ASSERT_EQ(conv2dbi_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(conv2dbi_node.opcode(), moco::TFOpcode::Conv2DBackpropInput);
+
+ ASSERT_EQ(conv2dbi_node.input_sizes(), nullptr);
+ ASSERT_EQ(conv2dbi_node.filter(), nullptr);
+ ASSERT_EQ(conv2dbi_node.out_backprop(), nullptr);
+ ASSERT_EQ(conv2dbi_node.padding(), "");
+ ASSERT_EQ(conv2dbi_node.data_layout(), "");
+ ASSERT_EQ(conv2dbi_node.strides().size(), 0);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFDepthwiseConv2dNative.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFDepthwiseConv2dNative.test.cpp
new file mode 100644
index 000000000..2562997c2
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFDepthwiseConv2dNative.test.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFDepthwiseConv2dNative.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFDepthwiseConv2dNativeTest, constructor)
+{
+ moco::TFDepthwiseConv2dNative depthwiseConv2dnative_node;
+
+ ASSERT_EQ(depthwiseConv2dnative_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(depthwiseConv2dnative_node.opcode(), moco::TFOpcode::DepthwiseConv2dNative);
+
+ ASSERT_EQ(depthwiseConv2dnative_node.input(), nullptr);
+ ASSERT_EQ(depthwiseConv2dnative_node.filter(), nullptr);
+ ASSERT_EQ(depthwiseConv2dnative_node.padding(), "");
+ ASSERT_EQ(depthwiseConv2dnative_node.data_layout(), "");
+ ASSERT_EQ(depthwiseConv2dnative_node.strides().size(), 0);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFFakeQuantWithMinMaxVars.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFFakeQuantWithMinMaxVars.test.cpp
new file mode 100644
index 000000000..be8fc3a70
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFFakeQuantWithMinMaxVars.test.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFFakeQuantWithMinMaxVars.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFFakeQuantWithMinMaxVarsTest, constructor)
+{
+ moco::TFFakeQuantWithMinMaxVars fakequant_node;
+
+ ASSERT_EQ(fakequant_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(fakequant_node.opcode(), moco::TFOpcode::FakeQuantWithMinMaxVars);
+
+ ASSERT_EQ(fakequant_node.inputs(), nullptr);
+ ASSERT_EQ(fakequant_node.min(), nullptr);
+ ASSERT_EQ(fakequant_node.max(), nullptr);
+ ASSERT_EQ(fakequant_node.num_bits(), 8);
+ ASSERT_EQ(fakequant_node.narrow_range(), false);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFFusedBatchNorm.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFFusedBatchNorm.test.cpp
new file mode 100644
index 000000000..265f8f9a4
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFFusedBatchNorm.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFFusedBatchNorm.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFFusedBatchNormTest, constructor)
+{
+ moco::TFFusedBatchNorm fbn_node;
+
+ ASSERT_EQ(fbn_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(fbn_node.opcode(), moco::TFOpcode::FusedBatchNorm);
+
+ ASSERT_EQ(fbn_node.x(), nullptr);
+ ASSERT_EQ(fbn_node.scale(), nullptr);
+ ASSERT_EQ(fbn_node.offset(), nullptr);
+ ASSERT_EQ(fbn_node.mean(), nullptr);
+ ASSERT_EQ(fbn_node.variance(), nullptr);
+ ASSERT_NE(fbn_node.epsilon(), 0.0f);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFIdentity.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFIdentity.test.cpp
new file mode 100644
index 000000000..deb17d502
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFIdentity.test.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFIdentity.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFIdentituTest, constructor)
+{
+ moco::TFIdentity identity_node;
+
+ ASSERT_EQ(identity_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(identity_node.opcode(), moco::TFOpcode::Identity);
+
+ ASSERT_EQ(identity_node.input(), nullptr);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFMaxPool.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFMaxPool.test.cpp
new file mode 100644
index 000000000..482ad889d
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFMaxPool.test.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFMaxPool.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFMaxPoolTest, constructor)
+{
+ moco::TFMaxPool maxpool;
+
+ ASSERT_EQ(maxpool.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(maxpool.opcode(), moco::TFOpcode::MaxPool);
+
+ ASSERT_EQ(maxpool.input(), nullptr);
+ ASSERT_EQ(maxpool.data_layout(), "");
+ ASSERT_EQ(maxpool.padding(), "");
+ ASSERT_EQ(maxpool.ksize(), std::vector<int64_t>({}));
+ ASSERT_EQ(maxpool.strides(), std::vector<int64_t>({}));
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFMaximum.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFMaximum.test.cpp
new file mode 100644
index 000000000..568bd7038
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFMaximum.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFMaximum.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFMaximumTest, constructor)
+{
+ moco::TFMaximum max_node;
+
+ ASSERT_EQ(max_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(max_node.opcode(), moco::TFOpcode::Maximum);
+
+ ASSERT_EQ(max_node.x(), nullptr);
+ ASSERT_EQ(max_node.y(), nullptr);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFMean.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFMean.test.cpp
new file mode 100644
index 000000000..126b31783
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFMean.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFMean.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFMeanTest, constructor)
+{
+ moco::TFMean mean_node;
+
+ ASSERT_EQ(mean_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(mean_node.opcode(), moco::TFOpcode::Mean);
+
+ ASSERT_EQ(mean_node.input(), nullptr);
+ ASSERT_EQ(mean_node.reduction_indices(), nullptr);
+ ASSERT_EQ(mean_node.keep_dims(), false);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFMul.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFMul.test.cpp
new file mode 100644
index 000000000..a4a1ecfd7
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFMul.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFMul.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFMulTest, constructor)
+{
+ moco::TFMul mul_node;
+
+ ASSERT_EQ(mul_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(mul_node.opcode(), moco::TFOpcode::Mul);
+
+ ASSERT_EQ(mul_node.x(), nullptr);
+ ASSERT_EQ(mul_node.y(), nullptr);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFPack.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFPack.test.cpp
new file mode 100644
index 000000000..a62b39f3d
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFPack.test.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFPack.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFPackTest, constructor)
+{
+ moco::TFPack pack_node(3); // num of values
+
+ ASSERT_EQ(pack_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(pack_node.opcode(), moco::TFOpcode::Pack);
+
+ ASSERT_EQ(pack_node.N(), 3);
+ ASSERT_EQ(pack_node.values(0), nullptr);
+ ASSERT_EQ(pack_node.values(1), nullptr);
+ ASSERT_EQ(pack_node.values(2), nullptr);
+ ASSERT_EQ(pack_node.axis(), 0);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFPad.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFPad.test.cpp
new file mode 100644
index 000000000..f3f3dcc8c
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFPad.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFPad.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFPadTest, constructor)
+{
+ moco::TFPad pad;
+
+ ASSERT_EQ(pad.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(pad.opcode(), moco::TFOpcode::Pad);
+
+ ASSERT_EQ(pad.input(), nullptr);
+ ASSERT_EQ(pad.paddings(), nullptr);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFPlaceholder.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFPlaceholder.test.cpp
new file mode 100644
index 000000000..e082f0c3e
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFPlaceholder.test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFPlaceholder.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFPlaceholderTest, constructor)
+{
+ moco::TFPlaceholder placeholder;
+
+ ASSERT_EQ(placeholder.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(placeholder.opcode(), moco::TFOpcode::Placeholder);
+
+ ASSERT_EQ(placeholder.dtype(), loco::DataType::Unknown);
+ ASSERT_EQ(placeholder.rank(), 0);
+
+ placeholder.dtype(loco::DataType::FLOAT32);
+ ASSERT_EQ(placeholder.dtype(), loco::DataType::FLOAT32);
+
+ placeholder.rank(2);
+ ASSERT_EQ(placeholder.rank(), 2);
+
+ placeholder.dim(0) = 2;
+ placeholder.dim(1) = 3;
+
+ ASSERT_TRUE(placeholder.dim(0).known());
+ ASSERT_TRUE(placeholder.dim(1).known());
+
+ ASSERT_EQ(placeholder.dim(0), 2);
+ ASSERT_EQ(placeholder.dim(1), 3);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFRealDiv.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFRealDiv.test.cpp
new file mode 100644
index 000000000..bfb8154a6
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFRealDiv.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFRealDiv.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFRealDivTest, constructor)
+{
+ moco::TFRealDiv div_node;
+
+ ASSERT_EQ(div_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(div_node.opcode(), moco::TFOpcode::RealDiv);
+
+ ASSERT_EQ(div_node.x(), nullptr);
+ ASSERT_EQ(div_node.y(), nullptr);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFRelu.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFRelu.test.cpp
new file mode 100644
index 000000000..650e2550d
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFRelu.test.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFRelu.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFReluTest, constructor)
+{
+ moco::TFRelu relu_node;
+
+ ASSERT_EQ(relu_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(relu_node.opcode(), moco::TFOpcode::Relu);
+
+ ASSERT_EQ(relu_node.features(), nullptr);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFRelu6.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFRelu6.test.cpp
new file mode 100644
index 000000000..9cce83df3
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFRelu6.test.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFRelu6.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFRelu6Test, constructor)
+{
+ moco::TFRelu6 relu6_node;
+
+ ASSERT_EQ(relu6_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(relu6_node.opcode(), moco::TFOpcode::Relu6);
+
+ ASSERT_EQ(relu6_node.features(), nullptr);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFReshape.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFReshape.test.cpp
new file mode 100644
index 000000000..514c691e9
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFReshape.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFReshape.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFReshapeTest, constructor)
+{
+ moco::TFReshape reshape_node;
+
+ ASSERT_EQ(reshape_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(reshape_node.opcode(), moco::TFOpcode::Reshape);
+
+ ASSERT_EQ(reshape_node.tensor(), nullptr);
+ ASSERT_EQ(reshape_node.shape(), nullptr);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFRsqrt.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFRsqrt.test.cpp
new file mode 100644
index 000000000..e94336dfe
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFRsqrt.test.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes//TFRsqrt.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFRsqrtTest, constructor)
+{
+ moco::TFRsqrt rsqrt_node;
+
+ ASSERT_EQ(rsqrt_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(rsqrt_node.opcode(), moco::TFOpcode::Rsqrt);
+
+ ASSERT_EQ(rsqrt_node.x(), nullptr);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFShape.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFShape.test.cpp
new file mode 100644
index 000000000..28110d790
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFShape.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes//TFShape.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFShapeTest, constructor)
+{
+ moco::TFShape shape_node;
+
+ ASSERT_EQ(shape_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(shape_node.opcode(), moco::TFOpcode::Shape);
+
+ ASSERT_EQ(shape_node.input(), nullptr);
+ ASSERT_EQ(shape_node.dtype(), loco::DataType::Unknown);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFSoftmax.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFSoftmax.test.cpp
new file mode 100644
index 000000000..67449feac
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFSoftmax.test.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFSoftmax.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFSoftmaxTest, constructor)
+{
+ moco::TFSoftmax softmax_node;
+
+ ASSERT_EQ(softmax_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(softmax_node.opcode(), moco::TFOpcode::Softmax);
+
+ ASSERT_EQ(softmax_node.logits(), nullptr);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFSqrt.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFSqrt.test.cpp
new file mode 100644
index 000000000..942769f6c
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFSqrt.test.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFSqrt.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFSqrtTest, constructor)
+{
+ moco::TFSqrt sqrt_node;
+
+ ASSERT_EQ(sqrt_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(sqrt_node.opcode(), moco::TFOpcode::Sqrt);
+
+ ASSERT_EQ(sqrt_node.x(), nullptr);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFSquaredDifference.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFSquaredDifference.test.cpp
new file mode 100644
index 000000000..c3ece9b70
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFSquaredDifference.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFSquaredDifference.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFSquaredDifferenceTest, constructor)
+{
+ moco::TFSquaredDifference sd_node;
+
+ ASSERT_EQ(sd_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(sd_node.opcode(), moco::TFOpcode::SquaredDifference);
+
+ ASSERT_EQ(sd_node.x(), nullptr);
+ ASSERT_EQ(sd_node.y(), nullptr);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFSqueeze.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFSqueeze.test.cpp
new file mode 100644
index 000000000..034ca70b2
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFSqueeze.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFSqueeze.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFSqueezeTest, constructor)
+{
+ moco::TFSqueeze squeeze_node;
+
+ ASSERT_EQ(squeeze_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(squeeze_node.opcode(), moco::TFOpcode::Squeeze);
+
+ ASSERT_EQ(squeeze_node.input(), nullptr);
+ ASSERT_EQ(squeeze_node.squeeze_dims().size(), 0);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFStopGradient.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFStopGradient.test.cpp
new file mode 100644
index 000000000..054ccda41
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFStopGradient.test.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFStopGradient.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFStopGradientTest, constructor)
+{
+ moco::TFStopGradient node;
+
+ ASSERT_EQ(node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(node.opcode(), moco::TFOpcode::StopGradient);
+
+ ASSERT_EQ(node.input(), nullptr);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFStridedSlice.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFStridedSlice.test.cpp
new file mode 100644
index 000000000..9e7e45543
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFStridedSlice.test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFStridedSlice.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFStridedSliceTest, constructor)
+{
+ moco::TFStridedSlice node;
+
+ ASSERT_EQ(node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(node.opcode(), moco::TFOpcode::StridedSlice);
+
+ ASSERT_EQ(node.input(), nullptr);
+ ASSERT_EQ(node.begin(), nullptr);
+ ASSERT_EQ(node.end(), nullptr);
+ ASSERT_EQ(node.strides(), nullptr);
+ ASSERT_EQ(node.begin_mask(), 0);
+ ASSERT_EQ(node.end_mask(), 0);
+ ASSERT_EQ(node.ellipsis_mask(), 0);
+ ASSERT_EQ(node.new_axis_mask(), 0);
+ ASSERT_EQ(node.shrink_axis_mask(), 0);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFSub.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFSub.test.cpp
new file mode 100644
index 000000000..4b80713bd
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFSub.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFSub.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFSubTest, constructor)
+{
+ moco::TFSub sub_node;
+
+ ASSERT_EQ(sub_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(sub_node.opcode(), moco::TFOpcode::Sub);
+
+ ASSERT_EQ(sub_node.x(), nullptr);
+ ASSERT_EQ(sub_node.y(), nullptr);
+}
diff --git a/compiler/moco/lang/src/IR/Nodes/TFTanh.test.cpp b/compiler/moco/lang/src/IR/Nodes/TFTanh.test.cpp
new file mode 100644
index 000000000..38458a694
--- /dev/null
+++ b/compiler/moco/lang/src/IR/Nodes/TFTanh.test.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFTanh.h"
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFTanhTest, constructor)
+{
+ moco::TFTanh tanh_node;
+
+ ASSERT_EQ(tanh_node.dialect(), moco::TFDialect::get());
+ ASSERT_EQ(tanh_node.opcode(), moco::TFOpcode::Tanh);
+
+ ASSERT_EQ(tanh_node.x(), nullptr);
+}
diff --git a/compiler/moco/lang/src/IR/TFDialect.cpp b/compiler/moco/lang/src/IR/TFDialect.cpp
new file mode 100644
index 000000000..35bbcc2c9
--- /dev/null
+++ b/compiler/moco/lang/src/IR/TFDialect.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/TFDialect.h"
+#include "moco/IR/TFNode.h"
+
+#include <loco/IR/Graph.h>
+#include <loco/IR/GraphInputIndex.h>
+#include <loco/IR/GraphOutputIndex.h>
+
+#include <stdex/Memory.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+struct GiiQueryServiceImpl final : public loco::GraphInputIndexQueryService
+{
+ bool associated(const loco::Node *node) const final
+ {
+ if (auto tfplaceholder = dynamic_cast<const moco::TFPlaceholder *>(node))
+ {
+ return moco::indexed(tfplaceholder);
+ }
+ return false;
+ }
+
+ loco::GraphOutputIndex index(const loco::Node *node) const final
+ {
+ assert(associated(node));
+ auto tfplaceholder = dynamic_cast<const moco::TFPlaceholder *>(node);
+ assert(tfplaceholder != nullptr);
+ return moco::index(tfplaceholder);
+ }
+};
+
+struct GoiQueryServiceImpl final : public loco::GraphOutputIndexQueryService
+{
+ bool associated(const loco::Node *node) const final
+ {
+ if (auto tfpush = dynamic_cast<const moco::TFPush *>(node))
+ {
+ return tfpush->indexed();
+ }
+ return false;
+ }
+
+ loco::GraphOutputIndex index(const loco::Node *node) const final
+ {
+ assert(associated(node));
+ if (auto tfpush = dynamic_cast<const moco::TFPush *>(node))
+ {
+ return tfpush->index();
+ }
+ throw std::invalid_argument("node");
+ }
+};
+
+} // namespace
+
+namespace moco
+{
+
+TFDialect::TFDialect()
+{
+ service<loco::GraphInputIndexQueryService>(stdex::make_unique<GiiQueryServiceImpl>());
+ service<loco::GraphOutputIndexQueryService>(stdex::make_unique<GoiQueryServiceImpl>());
+}
+
+loco::Dialect *TFDialect::get(void)
+{
+ static TFDialect d;
+ return &d;
+}
+
+} // namespace moco
diff --git a/compiler/moco/lang/src/IR/TFDialect.test.cpp b/compiler/moco/lang/src/IR/TFDialect.test.cpp
new file mode 100644
index 000000000..3c8b1a16b
--- /dev/null
+++ b/compiler/moco/lang/src/IR/TFDialect.test.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/TFDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(TFDialectTest, get)
+{
+ auto d = moco::TFDialect::get();
+
+ // get() SHOULD return a valid(non-null) pointer
+ ASSERT_NE(d, nullptr);
+ // The return value SHOULD be stable across multiple invocations
+ ASSERT_EQ(d, moco::TFDialect::get());
+}
diff --git a/compiler/moco/lang/src/IR/TFNode.cpp b/compiler/moco/lang/src/IR/TFNode.cpp
new file mode 100644
index 000000000..ab9356196
--- /dev/null
+++ b/compiler/moco/lang/src/IR/TFNode.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/TFNode.h"
+#include "moco/IR/TFDialect.h"
+
+#include <cassert>
+
+namespace moco
+{
+
+const loco::Dialect *TFNode::dialect(void) const { return TFDialect::get(); }
+
+} // namespace moco
+
+// TODO move this to appropriate place
+#include <stdex/Memory.h>
+
+namespace moco
+{
+
+struct GraphInputIndexAnnotation : public loco::NodeAnnotation
+{
+public:
+ GraphInputIndexAnnotation(const loco::GraphInputIndex &index) : _index{index}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const loco::GraphInputIndex &index(void) const { return _index; }
+
+private:
+ loco::GraphInputIndex _index;
+};
+
+bool indexed(const TFPlaceholder *node)
+{
+ return (node->annot<GraphInputIndexAnnotation>() != nullptr);
+}
+
+loco::GraphInputIndex index(const TFPlaceholder *node)
+{
+ assert(indexed(node));
+ return node->annot<GraphInputIndexAnnotation>()->index();
+}
+
+void index(TFPlaceholder *node, const loco::GraphInputIndex index)
+{
+ node->annot(stdex::make_unique<GraphInputIndexAnnotation>(index));
+}
+
+loco::TensorShape tensor_shape(const TFPlaceholder *node)
+{
+ assert(node != nullptr);
+
+ loco::TensorShape shape;
+
+ uint32_t rank = node->rank();
+ shape.rank(rank);
+ for (uint32_t index = 0; index < rank; ++index)
+ {
+ if (node->dim(index).known())
+ shape.dim(index) = node->dim(index).value();
+ else
+ shape.dim(index).unset();
+ }
+
+ return shape;
+}
+
+TFPlaceholder *placeholder_node(loco::Graph *g, const loco::GraphInputIndex &idx)
+{
+ for (uint32_t n = 0; n < g->nodes()->size(); ++n)
+ {
+ if (auto tfplaceholder = dynamic_cast<TFPlaceholder *>(g->nodes()->at(n)))
+ {
+ if (indexed(tfplaceholder) && index(tfplaceholder) == idx)
+ {
+ return tfplaceholder;
+ }
+ }
+ }
+ return nullptr;
+}
+
+} // namespace moco
+
+namespace moco
+{
+
+/**
+ * TFPush
+ */
+
+void TFPush::index(const loco::GraphOutputIndex &index)
+{
+ // Push internally stores "GraphOutputIndex" as int64_t
+ _index = static_cast<int64_t>(index);
+}
+
+loco::GraphOutputIndex TFPush::index(void) const
+{
+ assert(_index >= std::numeric_limits<loco::GraphOutputIndex>::min());
+ assert(_index <= std::numeric_limits<loco::GraphOutputIndex>::max());
+ return static_cast<loco::GraphOutputIndex>(_index);
+}
+
+TFPush *push_node(loco::Graph *g, const loco::GraphOutputIndex &index)
+{
+ for (uint32_t n = 0; n < g->nodes()->size(); ++n)
+ {
+ if (auto tfpush = dynamic_cast<TFPush *>(g->nodes()->at(n)))
+ {
+ if (tfpush->indexed() && tfpush->index() == index)
+ {
+ return tfpush;
+ }
+ }
+ }
+ return nullptr;
+}
+
+} // namespace moco
diff --git a/compiler/moco/lang/src/IR/TFNode.test.cpp b/compiler/moco/lang/src/IR/TFNode.test.cpp
new file mode 100644
index 000000000..4df1211db
--- /dev/null
+++ b/compiler/moco/lang/src/IR/TFNode.test.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/Nodes/TFPlaceholder.h"
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+TEST(TFNodeTest_Placeholder, index)
+{
+ loco::Graph graph;
+
+ auto test_node = graph.nodes()->create<moco::TFPlaceholder>();
+
+ loco::GraphInputIndex index_set{100};
+ moco::index(test_node, index_set);
+
+ auto index_get = moco::index(test_node);
+ ASSERT_EQ(index_get, index_set);
+}
+
+TEST(TFNodeTest_Placeholder, name)
+{
+ loco::Graph graph;
+
+ auto test_node = graph.nodes()->create<moco::TFPlaceholder>();
+
+ test_node->name("PlaceholderName");
+ ASSERT_EQ(test_node->name(), "PlaceholderName");
+}
diff --git a/compiler/moco/lang/src/IR/VariadicArityNode.test.cpp b/compiler/moco/lang/src/IR/VariadicArityNode.test.cpp
new file mode 100644
index 000000000..57361af98
--- /dev/null
+++ b/compiler/moco/lang/src/IR/VariadicArityNode.test.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/IR/VariadicArityNode.h"
+
+#include <loco/IR/Nodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace moco;
+
+class ArbitraryInputNode : public VariadicArityNode<loco::Node>
+{
+public:
+ ArbitraryInputNode(uint32_t arity) : VariadicArityNode<loco::Node>(arity) {}
+
+ void input(uint32_t idx, loco::Node *node) { at(idx)->node(node); }
+ loco::Node *input(uint32_t idx) const { return at(idx)->node(); }
+
+ const loco::Dialect *dialect(void) const { return nullptr; } // this won't be called for testing
+ uint32_t opnum(void) const { return -1; } // this won't be called for testing
+};
+
+} // namespace
+
+TEST(CustomOpTest, VariadicArityNode_arity_n)
+{
+ loco::ConstGen cg0, cg1, cg2;
+
+ ArbitraryInputNode a_node(3);
+ a_node.input(0, &cg0);
+ a_node.input(1, &cg1);
+ a_node.input(2, &cg2);
+
+ ASSERT_EQ(a_node.arity(), 3);
+ ASSERT_EQ(a_node.input(0), &cg0);
+ ASSERT_EQ(a_node.input(1), &cg1);
+ ASSERT_EQ(a_node.input(2), &cg2);
+}
diff --git a/compiler/moco/pass/CMakeLists.txt b/compiler/moco/pass/CMakeLists.txt
new file mode 100644
index 000000000..1eba86283
--- /dev/null
+++ b/compiler/moco/pass/CMakeLists.txt
@@ -0,0 +1,26 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(moco_pass SHARED ${SOURCES})
+target_include_directories(moco_pass PRIVATE src)
+target_include_directories(moco_pass PUBLIC include)
+target_link_libraries(moco_pass PUBLIC loco)
+target_link_libraries(moco_pass PUBLIC logo_core)
+target_link_libraries(moco_pass PUBLIC moco_lang)
+target_link_libraries(moco_pass PRIVATE moco_support)
+target_link_libraries(moco_pass PRIVATE stdex)
+target_link_libraries(moco_pass PRIVATE oops)
+install(TARGETS moco_pass DESTINATION lib)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(moco_pass_test ${TESTS})
+target_include_directories(moco_pass_test PRIVATE src)
+target_link_libraries(moco_pass_test moco_pass)
+target_link_libraries(moco_pass_test moco_support)
+target_link_libraries(moco_pass_test stdex)
diff --git a/compiler/moco/pass/README.md b/compiler/moco/pass/README.md
new file mode 100644
index 000000000..51921b8db
--- /dev/null
+++ b/compiler/moco/pass/README.md
@@ -0,0 +1,3 @@
+# pass
+
+_pass_ provides _moco_ General Graph Passes for Transformation and Optimization
diff --git a/compiler/moco/pass/include/moco/Pass/Passes.h b/compiler/moco/pass/include/moco/Pass/Passes.h
new file mode 100644
index 000000000..210f0acfc
--- /dev/null
+++ b/compiler/moco/pass/include/moco/Pass/Passes.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_PASS_PASSES_H__
+#define __MOCO_PASS_PASSES_H__
+
+#include "Passes/ConstantFoldAdd.h"
+#include "Passes/ConstantFoldMul.h"
+#include "Passes/ConstantFoldPack.h"
+#include "Passes/ConstantFoldStridedSlice.h"
+#include "Passes/FuseBinaryIntoPreceding.h"
+#include "Passes/RemoveTFIdentityNode.h"
+#include "Passes/ResolveConstantShape.h"
+#include "Passes/ResolveFusedBatchNorm.h"
+#include "Passes/ResolveReshapeWildcardDim.h"
+#include "Passes/ResolveSquaredDifference.h"
+#include "Passes/SqueezeReduceNode.h"
+
+#endif // __MOCO_PASS_PASSES_H__
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldAdd.h b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldAdd.h
new file mode 100644
index 000000000..ed58d5ee3
--- /dev/null
+++ b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldAdd.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_PASS_CONSTANTFOLD_ADD_H__
+#define __MOCO_PASS_CONSTANTFOLD_ADD_H__
+
+#include <logo/Pass.h>
+
+#include <loco.h>
+
+namespace moco
+{
+
+/**
+ * @brief Constant folder for Const + Add -> Const
+ */
+class ConstantFoldAdd : public logo::Pass
+{
+public:
+ const char *name(void) const final { return "ConstantFoldAdd"; }
+
+public:
+ bool run(loco::Graph *graph) override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_PASS_CONSTANTFOLD_ADD_H__
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldMul.h b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldMul.h
new file mode 100644
index 000000000..5528b8612
--- /dev/null
+++ b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldMul.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_PASS_CONSTANTFOLD_MUL_H__
+#define __MOCO_PASS_CONSTANTFOLD_MUL_H__
+
+#include <logo/Pass.h>
+
+#include <loco.h>
+
+namespace moco
+{
+
+/**
+ * @brief Constant folder for Const + Mul -> Const
+*/
+class ConstantFoldMul : public logo::Pass
+{
+public:
+ const char *name(void) const final { return "ConstantFoldMul"; }
+
+public:
+ bool run(loco::Graph *graph) override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_PASS_CONSTANTFOLD_MUL_H__
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldPack.h b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldPack.h
new file mode 100644
index 000000000..fc6bc0ace
--- /dev/null
+++ b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldPack.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_PASS_CONSTANTFOLD_PACK_H__
+#define __MOCO_PASS_CONSTANTFOLD_PACK_H__
+
+#include <logo/Pass.h>
+
+#include <loco.h>
+
+namespace moco
+{
+
+// TODO Provide like ConstantFoldPass<TFPack> for ConstantFold extension
+
+/**
+ * @brief Constant folder for Const + Pack -> Const
+*/
+class ConstantFoldPack : public logo::Pass
+{
+public:
+ const char *name(void) const final { return "ConstantFoldPack"; }
+
+public:
+ bool run(loco::Graph *graph) override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_PASS_CONSTANTFOLD_PACK_H__
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldStridedSlice.h b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldStridedSlice.h
new file mode 100644
index 000000000..1e3492c2c
--- /dev/null
+++ b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldStridedSlice.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_PASS_CONSTANTFOLD_STRIDEDSLICE_H__
+#define __MOCO_PASS_CONSTANTFOLD_STRIDEDSLICE_H__
+
+#include <logo/Pass.h>
+
+#include <loco.h>
+
+namespace moco
+{
+
+/**
+ * @brief Constant folder for Const + StridedSlice -> Const
+*/
+class ConstantFoldStridedSlice : public logo::Pass
+{
+public:
+ const char *name(void) const final { return "ConstantFoldStridedSlice"; }
+
+public:
+ bool run(loco::Graph *graph) override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_PASS_CONSTANTFOLD_STRIDEDSLICE_H__
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/FuseBinaryIntoPreceding.h b/compiler/moco/pass/include/moco/Pass/Passes/FuseBinaryIntoPreceding.h
new file mode 100644
index 000000000..24e3567c0
--- /dev/null
+++ b/compiler/moco/pass/include/moco/Pass/Passes/FuseBinaryIntoPreceding.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_PASS_FUSE_BINARY_INTO_PRECEDING_H__
+#define __MOCO_PASS_FUSE_BINARY_INTO_PRECEDING_H__
+
+#include <logo/Pass.h>
+
+#include <loco.h>
+
+namespace moco
+{
+
+/**
+ * @brief Fuse TFAdd, TFMul to preceding TFConv2D or TFDepthWiseConv2D
+*/
+class FuseBinaryIntoPreceding : public logo::Pass
+{
+public:
+ const char *name(void) const final { return "FuseBinaryIntoPreceding"; }
+
+public:
+ bool run(loco::Graph *graph) override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_PASS_FUSE_BINARY_INTO_PRECEDING_H__
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/RemoveTFIdentityNode.h b/compiler/moco/pass/include/moco/Pass/Passes/RemoveTFIdentityNode.h
new file mode 100644
index 000000000..388249b63
--- /dev/null
+++ b/compiler/moco/pass/include/moco/Pass/Passes/RemoveTFIdentityNode.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_PASS_REMOVE_TFIDENTITY_NODE_H__
+#define __MOCO_PASS_REMOVE_TFIDENTITY_NODE_H__
+
+#include <logo/Pass.h>
+
+#include <loco.h>
+
+namespace moco
+{
+
+/**
+ * @brief Use the input of "TFIdentity" node instead
+ *
+ * BEFORE:
+ * [X] -> [TFIdentity] -> [Y]
+ *
+ * AFTER:
+ * [X] -> [Y]
+ * [TFIdentity]
+ *
+ * NOTE This transform does not remove "TFIdentity" node
+ * This transform is identical to RemoveForwardNode
+ */
+struct RemoveTFIdentityNode final : public logo::Pass
+{
+ const char *name(void) const final { return "RemoveTFIdentityNode"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace moco
+
+#endif // __MOCO_PASS_REMOVE_TFIDENTITY_NODE_H__
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/ResolveConstantShape.h b/compiler/moco/pass/include/moco/Pass/Passes/ResolveConstantShape.h
new file mode 100644
index 000000000..16046a052
--- /dev/null
+++ b/compiler/moco/pass/include/moco/Pass/Passes/ResolveConstantShape.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_PASS_RESOLVE_CONSTANT_SHAPE_H__
+#define __MOCO_PASS_RESOLVE_CONSTANT_SHAPE_H__
+
+#include <logo/Pass.h>
+
+#include <loco.h>
+
+namespace moco
+{
+
+/**
+ * @brief Replace fully determined TFShape node into TFConst
+ */
+class ResolveConstantShape : public logo::Pass
+{
+public:
+ const char *name(void) const final { return "ResolveConstantShape"; }
+
+public:
+ bool run(loco::Graph *graph) override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_PASS_RESOLVE_CONSTANT_SHAPE_H__
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/ResolveFusedBatchNorm.h b/compiler/moco/pass/include/moco/Pass/Passes/ResolveFusedBatchNorm.h
new file mode 100644
index 000000000..ce5ea0bb0
--- /dev/null
+++ b/compiler/moco/pass/include/moco/Pass/Passes/ResolveFusedBatchNorm.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_PASS_RESOLVE_FUSEDBATCHNORM_H__
+#define __MOCO_PASS_RESOLVE_FUSEDBATCHNORM_H__
+
+#include <logo/Pass.h>
+
+#include <loco.h>
+
+namespace moco
+{
+
+/**
+ * @brief Trasform TFFusedBatchNorm into TFAdd + TFRsqrt + TFMul + TFBatchNorm
+*/
+class ResolveFusedBatchNorm : public logo::Pass
+{
+public:
+ const char *name(void) const final { return "ResolveFusedBatchNorm"; }
+
+public:
+ bool run(loco::Graph *graph) override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_PASS_RESOLVE_FUSEDBATCHNORM_H__
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/ResolveReshapeWildcardDim.h b/compiler/moco/pass/include/moco/Pass/Passes/ResolveReshapeWildcardDim.h
new file mode 100644
index 000000000..137c97379
--- /dev/null
+++ b/compiler/moco/pass/include/moco/Pass/Passes/ResolveReshapeWildcardDim.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_PASS_RESOLVE_RESHAPE_WILDCARD_DIM_H__
+#define __MOCO_PASS_RESOLVE_RESHAPE_WILDCARD_DIM_H__
+
+#include <logo/Pass.h>
+
+#include <loco.h>
+
+namespace moco
+{
+
+/**
+ * @brief Determine wildcard dimension (denoted as -1) of Reshape's shape input
+ * if possible
+ */
+class ResolveReshapeWildcardDim : public logo::Pass
+{
+public:
+ const char *name(void) const final { return "ResolveReshapeWildcardDim"; }
+
+public:
+ bool run(loco::Graph *graph) override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_PASS_RESOLVE_RESHAPE_WILDCARD_DIM_H__
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/ResolveSquaredDifference.h b/compiler/moco/pass/include/moco/Pass/Passes/ResolveSquaredDifference.h
new file mode 100644
index 000000000..1aa78655e
--- /dev/null
+++ b/compiler/moco/pass/include/moco/Pass/Passes/ResolveSquaredDifference.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_PASS_RESOLVE_SQUARED_DIFFERENCE_H__
+#define __MOCO_PASS_RESOLVE_SQUARED_DIFFERENCE_H__
+
+#include <logo/Pass.h>
+
+#include <loco.h>
+
+namespace moco
+{
+
+/**
+ * @brief Decompose TFSquaredDifference to TFSub, TFMul
+ */
+class ResolveSquaredDifference : public logo::Pass
+{
+public:
+ const char *name(void) const final { return "ResolveSquaredDifference"; }
+
+public:
+ bool run(loco::Graph *graph) override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_PASS_RESOLVE_SQUARED_DIFFERENCE_H__
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/SqueezeReduceNode.h b/compiler/moco/pass/include/moco/Pass/Passes/SqueezeReduceNode.h
new file mode 100644
index 000000000..d4a3e65c6
--- /dev/null
+++ b/compiler/moco/pass/include/moco/Pass/Passes/SqueezeReduceNode.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_PASS_SQUEEZE_REDUCE_NODE_H__
+#define __MOCO_PASS_SQUEEZE_REDUCE_NODE_H__
+
+#include <logo/Pass.h>
+
+#include <loco.h>
+
+namespace moco
+{
+
+/**
+ * @brief If ReduceTypeOP don't keep dimensions, replace the ReduceTypeOp
+ * as new one to keep dimensions and insert TFSqueeze
+ */
+class SqueezeReduceNode : public logo::Pass
+{
+public:
+ const char *name(void) const final { return "SqueezeReduceNode"; }
+
+public:
+ bool run(loco::Graph *graph) override;
+};
+
+} // namespace moco
+
+#endif // __MOCO_PASS_SQUEEZE_REDUCE_NODE_H__
diff --git a/compiler/moco/pass/src/ConstantFoldAdd.test.cpp b/compiler/moco/pass/src/ConstantFoldAdd.test.cpp
new file mode 100644
index 000000000..bc9489fbd
--- /dev/null
+++ b/compiler/moco/pass/src/ConstantFoldAdd.test.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Pass/Passes/ConstantFoldAdd.h"
+#include "TestHelper.h"
+
+#include <moco/IR/TFNodes.h>
+#include <loco.h>
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+
+moco::TFConst *const_vector_init(loco::Graph *graph, std::vector<int32_t> values)
+{
+ auto const_node = graph->nodes()->create<moco::TFConst>();
+ auto dim = values.size();
+
+ const_node->dtype(loco::DataType::S32);
+ const_node->rank(1);
+ const_node->dim(0).set(dim);
+
+ const_node->size<loco::DataType::S32>(dim);
+ for (int32_t i = 0; i < dim; ++i)
+ const_node->at<loco::DataType::S32>(i) = values[i];
+
+ return const_node;
+}
+
+} // namespace
+
+TEST(ConstantFoldAdd, basic_vector)
+{
+ loco::Graph graph;
+
+ auto add_node = graph.nodes()->create<moco::TFAdd>();
+ {
+ auto const_from_ss = const_vector_init(&graph, {1, 3, 5});
+ add_node->x(const_from_ss);
+
+ auto const_y = const_vector_init(&graph, {2});
+ add_node->y(const_y);
+ }
+ setup_output_node(&graph, add_node);
+
+ auto pass = stdex::make_unique<moco::ConstantFoldAdd>();
+ bool cont = true;
+ while (cont)
+ {
+ cont = pass->run(&graph);
+ }
+
+ auto ssnode = find_first_node_bytype<moco::TFAdd>(&graph);
+ ASSERT_EQ(ssnode, nullptr);
+
+ auto ssconst = find_first_node_bytype<moco::TFConst>(&graph);
+ ASSERT_NE(ssconst, nullptr);
+ ASSERT_EQ(ssconst->size<loco::DataType::S32>(), 3);
+ ASSERT_EQ(ssconst->at<loco::DataType::S32>(0), 3);
+ ASSERT_EQ(ssconst->at<loco::DataType::S32>(1), 5);
+ ASSERT_EQ(ssconst->at<loco::DataType::S32>(2), 7);
+}
+
+TEST(ConstantFoldAdd, basic_refinedet_1)
+{
+ loco::Graph graph;
+
+ auto add_node = graph.nodes()->create<moco::TFAdd>();
+ {
+ auto const_from_ss = const_vector_init(&graph, {10});
+ add_node->x(const_from_ss);
+
+ auto const_y = const_vector_init(&graph, {0});
+ add_node->y(const_y);
+ }
+ setup_output_node(&graph, add_node);
+
+ auto pass = stdex::make_unique<moco::ConstantFoldAdd>();
+ bool cont = true;
+ while (cont)
+ {
+ cont = pass->run(&graph);
+ }
+
+ auto ssnode = find_first_node_bytype<moco::TFAdd>(&graph);
+ ASSERT_EQ(ssnode, nullptr);
+
+ auto ssconst = find_first_node_bytype<moco::TFConst>(&graph);
+ ASSERT_NE(ssconst, nullptr);
+ ASSERT_EQ(ssconst->size<loco::DataType::S32>(), 1);
+ ASSERT_EQ(ssconst->at<loco::DataType::S32>(0), 10);
+}
diff --git a/compiler/moco/pass/src/ConstantFoldHelper.cpp b/compiler/moco/pass/src/ConstantFoldHelper.cpp
new file mode 100644
index 000000000..79b04863c
--- /dev/null
+++ b/compiler/moco/pass/src/ConstantFoldHelper.cpp
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConstantFoldHelper.h"
+
+#include <cassert>
+#include <sstream>
+#include <string>
+
+namespace
+{
+
+// TODO this may need to be moved to loco
+bool same_shape(const loco::TensorShape *lhs, const loco::TensorShape *rhs)
+{
+ if (lhs->rank() != rhs->rank())
+ return false;
+
+ for (uint32_t r = 0; r < lhs->rank(); r++)
+ {
+ if (lhs->dim(r).value() != rhs->dim(r).value())
+ return false;
+ }
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+
+TFConst *new_const(loco::Graph *graph, loco::TensorShape &tensor_shape, const loco::DataType &dtype)
+{
+ assert(dtype == loco::DataType::S32 || dtype == loco::DataType::FLOAT32);
+
+ auto const_node = graph->nodes()->create<TFConst>();
+ const_node->dtype(dtype);
+ const_node->rank(tensor_shape.rank());
+
+ // Calc number of elements for target node and set shape
+ uint32_t num_elements = 1;
+ for (uint32_t r = 0; r < tensor_shape.rank(); r++)
+ {
+ const_node->dim(r) = tensor_shape.dim(r);
+ assert(const_node->dim(r).known());
+ num_elements = num_elements * const_node->dim(r).value();
+ }
+ if (dtype == loco::DataType::S32)
+ const_node->size<loco::DataType::S32>(num_elements);
+ else if (dtype == loco::DataType::FLOAT32)
+ const_node->size<loco::DataType::FLOAT32>(num_elements);
+
+ // give name for this node from address to be unique
+ std::ostringstream oss;
+ oss << "Const_" << (void *)const_node;
+ const_node->name(oss.str());
+
+ return const_node;
+}
+
+} // namespace moco
+
+namespace moco
+{
+
+template <> int32_t scalar_from_const<int32_t>(const TFConst *tfconst)
+{
+ assert(tfconst->rank() == 0 || tfconst->rank() == 1);
+ assert(tfconst->dtype() == loco::DataType::S32);
+ return tfconst->at<loco::DataType::S32>(0);
+}
+
+template <> float scalar_from_const<float>(const TFConst *tfconst)
+{
+ assert(tfconst->rank() == 0 || tfconst->rank() == 1);
+ assert(tfconst->dtype() == loco::DataType::FLOAT32);
+ return tfconst->at<loco::DataType::FLOAT32>(0);
+}
+
+bool valid_shape_for_constfold_binary_op(const loco::TensorShape &lhs, const loco::TensorShape &rhs)
+{
+ // scalar
+ if (lhs.rank() == 0 || rhs.rank() == 0)
+ return true;
+
+ // same as scalar
+ if (lhs.rank() == 1 && lhs.dim(0).value() == 1)
+ return true;
+ if (rhs.rank() == 1 && rhs.dim(0).value() == 1)
+ return true;
+
+ // for elementwise binary operation
+ return ::same_shape(&lhs, &rhs);
+}
+
+} // namespace moco
+
+namespace moco
+{
+
+float BinaryFunc::apply(float, float) const
+{
+ throw std::runtime_error{"F32 is not supported yet"};
+}
+
+int32_t BinaryFunc::apply(int32_t, int32_t) const
+{
+ throw std::runtime_error{"S32 is not supported yet"};
+}
+
+} // namespace moco
+
+namespace
+{
+
+void apply_binary_s32(const moco::TFConst *lhs, int32_t rhs, moco::TFConst *output,
+ const moco::BinaryFunc &f)
+{
+ assert(lhs->dtype() == loco::DataType::S32);
+ assert(same_shape(lhs, output));
+
+ uint32_t nume = num_elements(lhs);
+ for (uint32_t e = 0; e < nume; e++)
+ {
+ output->at<loco::DataType::S32>(e) = f.apply(lhs->at<loco::DataType::S32>(e), rhs);
+ }
+}
+
+void apply_binary_f32(const moco::TFConst *lhs, float rhs, moco::TFConst *output,
+ const moco::BinaryFunc &f)
+{
+ assert(lhs->dtype() == loco::DataType::FLOAT32);
+ assert(same_shape(lhs, output));
+
+ uint32_t nume = num_elements(lhs);
+ for (uint32_t e = 0; e < nume; e++)
+ {
+ output->at<loco::DataType::FLOAT32>(e) = f.apply(lhs->at<loco::DataType::FLOAT32>(e), rhs);
+ }
+}
+
+void apply_binary_s32(const moco::TFConst *lhs, const moco::TFConst *rhs, moco::TFConst *output,
+ const moco::BinaryFunc &f)
+{
+ assert(same_shape(output, lhs));
+ assert(same_shape(output, rhs));
+ assert(output->dtype() == lhs->dtype());
+ assert(output->dtype() == rhs->dtype());
+
+ uint32_t nume = num_elements(lhs);
+ for (uint32_t e = 0; e < nume; e++)
+ {
+ output->at<loco::DataType::S32>(e) =
+ f.apply(lhs->at<loco::DataType::S32>(e), rhs->at<loco::DataType::S32>(e));
+ }
+}
+
+void apply_binary_f32(const moco::TFConst *lhs, const moco::TFConst *rhs, moco::TFConst *output,
+ const moco::BinaryFunc &f)
+{
+ assert(same_shape(output, lhs));
+ assert(same_shape(output, rhs));
+ assert(output->dtype() == lhs->dtype());
+ assert(output->dtype() == rhs->dtype());
+
+ uint32_t nume = num_elements(lhs);
+ for (uint32_t e = 0; e < nume; e++)
+ {
+ output->at<loco::DataType::FLOAT32>(e) =
+ f.apply(lhs->at<loco::DataType::FLOAT32>(e), rhs->at<loco::DataType::FLOAT32>(e));
+ }
+}
+
+} // namespace
+
+namespace moco
+{
+
+template <>
+void apply_binary<int32_t>(const moco::TFConst *x_const, const moco::TFConst *y_const,
+ moco::TFConst *output_const, const moco::BinaryFunc &f)
+{
+ auto x_shape = moco::tensor_shape(x_const);
+ auto y_shape = moco::tensor_shape(y_const);
+
+ if (y_shape.rank() == 0 || y_shape.rank() == 1)
+ {
+ auto rhs = scalar_from_const<int32_t>(y_const);
+ apply_binary_s32(x_const, rhs, output_const, f);
+ }
+ else if (x_shape.rank() == 0 || x_shape.rank() == 1)
+ {
+ auto rhs = scalar_from_const<int32_t>(x_const);
+ apply_binary_s32(y_const, rhs, output_const, f);
+ }
+ else
+ {
+ apply_binary_f32(x_const, y_const, output_const, f);
+ }
+}
+
+template <>
+void apply_binary<float>(const moco::TFConst *x_const, const moco::TFConst *y_const,
+ moco::TFConst *output_const, const moco::BinaryFunc &f)
+{
+ auto x_shape = moco::tensor_shape(x_const);
+ auto y_shape = moco::tensor_shape(y_const);
+
+ if (y_shape.rank() == 0 || y_shape.rank() == 1)
+ {
+ auto rhs = scalar_from_const<float>(y_const);
+ apply_binary_f32(x_const, rhs, output_const, f);
+ }
+ else if (x_shape.rank() == 0 || x_shape.rank() == 1)
+ {
+ auto rhs = scalar_from_const<float>(x_const);
+ apply_binary_f32(y_const, rhs, output_const, f);
+ }
+ else
+ {
+ apply_binary_f32(x_const, y_const, output_const, f);
+ }
+}
+
+} // namespace moco
diff --git a/compiler/moco/pass/src/ConstantFoldHelper.h b/compiler/moco/pass/src/ConstantFoldHelper.h
new file mode 100644
index 000000000..393b083f2
--- /dev/null
+++ b/compiler/moco/pass/src/ConstantFoldHelper.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_CONSTANT_FOLD_HELPER_H__
+#define __MOCO_CONSTANT_FOLD_HELPER_H__
+
+#include <moco/IR/Nodes/TFConst.h>
+
+#include <loco.h>
+#include <loco/IR/TensorShape.h>
+
+namespace moco
+{
+
+TFConst *new_const(loco::Graph *graph, loco::TensorShape &tensor_shape,
+ const loco::DataType &dtype);
+
+template <typename T> T scalar_from_const(const TFConst *tfconst);
+template <> int32_t scalar_from_const<int32_t>(const TFConst *tfconst);
+template <> float scalar_from_const<float>(const TFConst *tfconst);
+
+/**
+ * @note Check if it is valid to run Constant folding for binary operations
+ * as-of current implementation. That is currently we support for
+ * element-wise or one of the input is scalar.
+ * TODO Support other shapes of binary operation
+ */
+bool valid_shape_for_constfold_binary_op(const loco::TensorShape &lhs,
+ const loco::TensorShape &rhs);
+
+struct BinaryFunc
+{
+ virtual ~BinaryFunc() = default;
+
+ virtual float apply(float, float) const;
+ virtual int32_t apply(int32_t, int32_t) const;
+};
+
+template <typename T>
+void apply_binary(const moco::TFConst *x_const, const moco::TFConst *y_const,
+ moco::TFConst *output_const, const moco::BinaryFunc &f);
+template <>
+void apply_binary<int32_t>(const moco::TFConst *x_const, const moco::TFConst *y_const,
+ moco::TFConst *output_const, const moco::BinaryFunc &f);
+template <>
+void apply_binary<float>(const moco::TFConst *x_const, const moco::TFConst *y_const,
+ moco::TFConst *output_const, const moco::BinaryFunc &f);
+
+} // namespace moco
+
+#endif // __MOCO_CONSTANT_FOLD_HELPER_H__
diff --git a/compiler/moco/pass/src/ConstantFoldMul.test.cpp b/compiler/moco/pass/src/ConstantFoldMul.test.cpp
new file mode 100644
index 000000000..4e9b78fd4
--- /dev/null
+++ b/compiler/moco/pass/src/ConstantFoldMul.test.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Pass/Passes/ConstantFoldMul.h"
+#include "TestHelper.h"
+
+#include <moco/IR/TFNodes.h>
+#include <loco.h>
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+
+moco::TFConst *const_vector_init(loco::Graph *graph, std::vector<int32_t> values)
+{
+ auto const_node = graph->nodes()->create<moco::TFConst>();
+ auto dim = values.size();
+
+ const_node->dtype(loco::DataType::S32);
+ const_node->rank(1);
+ const_node->dim(0).set(dim);
+
+ const_node->size<loco::DataType::S32>(dim);
+ for (int32_t i = 0; i < dim; ++i)
+ const_node->at<loco::DataType::S32>(i) = values[i];
+
+ return const_node;
+}
+
+} // namespace
+
+TEST(ConstantFoldMul, basic_vector)
+{
+ loco::Graph graph;
+
+ auto mul_node = graph.nodes()->create<moco::TFMul>();
+ {
+ auto const_from_ss = const_vector_init(&graph, {1, 3, 5});
+ mul_node->x(const_from_ss);
+
+ auto const_y = const_vector_init(&graph, {2});
+ mul_node->y(const_y);
+ }
+ setup_output_node(&graph, mul_node);
+
+ auto pass = stdex::make_unique<moco::ConstantFoldMul>();
+ bool cont = true;
+ while (cont)
+ {
+ cont = pass->run(&graph);
+ }
+
+ auto ssnode = find_first_node_bytype<moco::TFMul>(&graph);
+ ASSERT_EQ(ssnode, nullptr);
+
+ auto ssconst = find_first_node_bytype<moco::TFConst>(&graph);
+ ASSERT_NE(ssconst, nullptr);
+ ASSERT_EQ(ssconst->size<loco::DataType::S32>(), 3);
+ ASSERT_EQ(ssconst->at<loco::DataType::S32>(0), 2);
+ ASSERT_EQ(ssconst->at<loco::DataType::S32>(1), 6);
+ ASSERT_EQ(ssconst->at<loco::DataType::S32>(2), 10);
+}
+
+TEST(ConstantFoldMul, basic_refinedet_1)
+{
+ loco::Graph graph;
+
+ auto mul_node = graph.nodes()->create<moco::TFMul>();
+ {
+ auto const_from_ss = const_vector_init(&graph, {5});
+ mul_node->x(const_from_ss);
+
+ auto const_y = const_vector_init(&graph, {2});
+ mul_node->y(const_y);
+ }
+ setup_output_node(&graph, mul_node);
+
+ auto pass = stdex::make_unique<moco::ConstantFoldMul>();
+ bool cont = true;
+ while (cont)
+ {
+ cont = pass->run(&graph);
+ }
+
+ auto ssnode = find_first_node_bytype<moco::TFMul>(&graph);
+ ASSERT_EQ(ssnode, nullptr);
+
+ auto ssconst = find_first_node_bytype<moco::TFConst>(&graph);
+ ASSERT_NE(ssconst, nullptr);
+ ASSERT_EQ(ssconst->size<loco::DataType::S32>(), 1);
+ ASSERT_EQ(ssconst->at<loco::DataType::S32>(0), 10);
+}
diff --git a/compiler/moco/pass/src/ConstantFoldPack.test.cpp b/compiler/moco/pass/src/ConstantFoldPack.test.cpp
new file mode 100644
index 000000000..cb6eff0c8
--- /dev/null
+++ b/compiler/moco/pass/src/ConstantFoldPack.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Pass/Passes/ConstantFoldPack.h"
+#include "TestHelper.h"
+
+#include <moco/IR/TFNodes.h>
+#include <loco.h>
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+
+moco::TFConst *const_vector_init(loco::Graph *graph, std::vector<int32_t> values)
+{
+ auto const_node = graph->nodes()->create<moco::TFConst>();
+ auto dim = values.size();
+
+ const_node->dtype(loco::DataType::S32);
+ const_node->rank(1);
+ const_node->dim(0).set(dim);
+
+ const_node->size<loco::DataType::S32>(dim);
+ for (int32_t i = 0; i < dim; ++i)
+ const_node->at<loco::DataType::S32>(i) = values[i];
+
+ return const_node;
+}
+
+} // namespace
+
+TEST(ConstantFoldPack, basic_scalar4_vector)
+{
+ loco::Graph graph;
+
+ auto pack_node = graph.nodes()->create<moco::TFPack>(4);
+ {
+ auto input_0 = const_vector_init(&graph, {1});
+ pack_node->values(0, input_0);
+
+ auto input_1 = const_vector_init(&graph, {10});
+ pack_node->values(1, input_1);
+
+ auto input_2 = const_vector_init(&graph, {10});
+ pack_node->values(2, input_2);
+
+ auto input_3 = const_vector_init(&graph, {64});
+ pack_node->values(3, input_3);
+ }
+ // add Identity node as the output Pack will be replaced
+ auto identity = graph.nodes()->create<moco::TFIdentity>();
+ identity->input(pack_node);
+ setup_output_node(&graph, identity);
+
+ auto pass = stdex::make_unique<moco::ConstantFoldPack>();
+ bool cont = true;
+ while (cont)
+ {
+ cont = pass->run(&graph);
+ }
+
+ auto pnode = find_first_node_bytype<moco::TFPack>(&graph);
+ ASSERT_EQ(pnode, nullptr);
+
+ auto pconst = find_first_node_bytype<moco::TFConst>(&graph);
+ ASSERT_NE(pconst, nullptr);
+ ASSERT_EQ(pconst->rank(), 2);
+ ASSERT_EQ(pconst->size<loco::DataType::S32>(), 4);
+ ASSERT_EQ(pconst->at<loco::DataType::S32>(0), 1);
+ ASSERT_EQ(pconst->at<loco::DataType::S32>(1), 10);
+ ASSERT_EQ(pconst->at<loco::DataType::S32>(2), 10);
+ ASSERT_EQ(pconst->at<loco::DataType::S32>(3), 64);
+}
diff --git a/compiler/moco/pass/src/ConstantFoldStridedSlice.test.cpp b/compiler/moco/pass/src/ConstantFoldStridedSlice.test.cpp
new file mode 100644
index 000000000..b5bada221
--- /dev/null
+++ b/compiler/moco/pass/src/ConstantFoldStridedSlice.test.cpp
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Pass/Passes/ConstantFoldStridedSlice.h"
+#include "TestHelper.h"
+
+#include <moco/IR/TFNodes.h>
+#include <loco.h>
+#include <stdex/Memory.h>
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+
+moco::TFConst *const_vector_init(loco::Graph *graph, std::vector<int32_t> values)
+{
+ auto const_node = graph->nodes()->create<moco::TFConst>();
+ auto dim = values.size();
+
+ const_node->dtype(loco::DataType::S32);
+ const_node->rank(1);
+ const_node->dim(0).set(dim);
+
+ const_node->size<loco::DataType::S32>(dim);
+ for (int32_t i = 0; i < dim; ++i)
+ const_node->at<loco::DataType::S32>(i) = values[i];
+
+ return const_node;
+}
+
+moco::TFConst *const_matrix(loco::Graph *graph, int32_t dimh, int32_t dimw)
+{
+ auto const_node = graph->nodes()->create<moco::TFConst>();
+
+ const_node->dtype(loco::DataType::S32);
+ const_node->rank(2);
+ const_node->dim(0).set(dimh);
+ const_node->dim(1).set(dimw);
+
+ auto elements = dimh * dimw;
+ const_node->size<loco::DataType::S32>(elements);
+ for (int32_t i = 0; i < elements; ++i)
+ const_node->at<loco::DataType::S32>(i) = i;
+
+ return const_node;
+}
+
+} // namespace
+
+TEST(ConstantFoldStridedSlice, basic_matrix55_11)
+{
+ loco::Graph graph;
+
+ auto sslice_node = graph.nodes()->create<moco::TFStridedSlice>();
+ {
+ auto const_input = const_matrix(&graph, 5, 5);
+ sslice_node->input(const_input);
+
+ auto const_begin = const_vector_init(&graph, {1, 1});
+ sslice_node->begin(const_begin);
+ auto const_end = const_vector_init(&graph, {2, 4});
+ sslice_node->end(const_end);
+ auto const_strides = const_vector_init(&graph, {1, 1});
+ sslice_node->strides(const_strides);
+
+ sslice_node->shrink_axis_mask(1);
+ }
+ setup_output_node(&graph, sslice_node);
+
+ auto pass = stdex::make_unique<moco::ConstantFoldStridedSlice>();
+ bool cont = true;
+ while (cont)
+ {
+ cont = pass->run(&graph);
+ }
+
+ auto ssnode = find_first_node_bytype<moco::TFStridedSlice>(&graph);
+ ASSERT_EQ(ssnode, nullptr);
+
+ auto ssconst = find_first_node_bytype<moco::TFConst>(&graph);
+ ASSERT_NE(ssconst, nullptr);
+ ASSERT_EQ(ssconst->size<loco::DataType::S32>(), 3);
+ ASSERT_EQ(ssconst->at<loco::DataType::S32>(0), 6);
+ ASSERT_EQ(ssconst->at<loco::DataType::S32>(1), 7);
+ ASSERT_EQ(ssconst->at<loco::DataType::S32>(2), 8);
+}
+
+TEST(ConstantFoldStridedSlice, basic_vector4_0)
+{
+ loco::Graph graph;
+
+ auto sslice_node = graph.nodes()->create<moco::TFStridedSlice>();
+ {
+ auto const_input = const_vector_init(&graph, {1, 5, 5, 64});
+ sslice_node->input(const_input);
+
+ auto const_begin = const_vector_init(&graph, {0});
+ sslice_node->begin(const_begin);
+ auto const_end = const_vector_init(&graph, {1});
+ sslice_node->end(const_end);
+ auto const_strides = const_vector_init(&graph, {1});
+ sslice_node->strides(const_strides);
+
+ sslice_node->shrink_axis_mask(1);
+ }
+ setup_output_node(&graph, sslice_node);
+
+ auto pass = stdex::make_unique<moco::ConstantFoldStridedSlice>();
+ bool cont = true;
+ while (cont)
+ {
+ cont = pass->run(&graph);
+ }
+
+ auto ssnode = find_first_node_bytype<moco::TFStridedSlice>(&graph);
+ ASSERT_EQ(ssnode, nullptr);
+
+ auto ssconst = find_first_node_bytype<moco::TFConst>(&graph);
+ ASSERT_NE(ssconst, nullptr);
+ ASSERT_EQ(ssconst->size<loco::DataType::S32>(), 1);
+ ASSERT_EQ(ssconst->at<loco::DataType::S32>(0), 1);
+}
+
+TEST(ConstantFoldStridedSlice, basic_vector4_1)
+{
+ loco::Graph graph;
+
+ auto sslice_node = graph.nodes()->create<moco::TFStridedSlice>();
+ {
+ auto const_input = const_vector_init(&graph, {1, 5, 5, 64});
+ sslice_node->input(const_input);
+
+ auto const_begin = const_vector_init(&graph, {1});
+ sslice_node->begin(const_begin);
+ auto const_end = const_vector_init(&graph, {2});
+ sslice_node->end(const_end);
+ auto const_strides = const_vector_init(&graph, {1});
+ sslice_node->strides(const_strides);
+
+ sslice_node->shrink_axis_mask(1);
+ }
+ setup_output_node(&graph, sslice_node);
+
+ auto pass = stdex::make_unique<moco::ConstantFoldStridedSlice>();
+ bool cont = true;
+ while (cont)
+ {
+ cont = pass->run(&graph);
+ }
+
+ auto ssnode = find_first_node_bytype<moco::TFStridedSlice>(&graph);
+ ASSERT_EQ(ssnode, nullptr);
+
+ auto ssconst = find_first_node_bytype<moco::TFConst>(&graph);
+ ASSERT_NE(ssconst, nullptr);
+ ASSERT_EQ(ssconst->size<loco::DataType::S32>(), 1);
+ ASSERT_EQ(ssconst->at<loco::DataType::S32>(0), 5);
+}
+
+TEST(ConstantFoldStridedSlice, basic_vector4_2)
+{
+ loco::Graph graph;
+
+ auto sslice_node = graph.nodes()->create<moco::TFStridedSlice>();
+ {
+ auto const_input = const_vector_init(&graph, {1, 5, 5, 64});
+ sslice_node->input(const_input);
+
+ auto const_begin = const_vector_init(&graph, {2});
+ sslice_node->begin(const_begin);
+ auto const_end = const_vector_init(&graph, {3});
+ sslice_node->end(const_end);
+ auto const_strides = const_vector_init(&graph, {1});
+ sslice_node->strides(const_strides);
+
+ sslice_node->shrink_axis_mask(1);
+ }
+ setup_output_node(&graph, sslice_node);
+
+ auto pass = stdex::make_unique<moco::ConstantFoldStridedSlice>();
+ bool cont = true;
+ while (cont)
+ {
+ cont = pass->run(&graph);
+ }
+
+ auto ssnode = find_first_node_bytype<moco::TFStridedSlice>(&graph);
+ ASSERT_EQ(ssnode, nullptr);
+
+ auto ssconst = find_first_node_bytype<moco::TFConst>(&graph);
+ ASSERT_NE(ssconst, nullptr);
+ ASSERT_EQ(ssconst->size<loco::DataType::S32>(), 1);
+ ASSERT_EQ(ssconst->at<loco::DataType::S32>(0), 5);
+}
+
+namespace
+{
+
+/**
+ * @note tfconst_at() implementation should be same as that of inside
+ * ConstantFoldStridedSlice.cpp for valid testing
+ */
+int32_t tfconst_at(const moco::TFConst *tfconst, const std::vector<uint32_t> &pos)
+{
+ uint32_t rank = tfconst->rank();
+ assert(rank == pos.size());
+
+ uint32_t element = 0;
+ for (uint32_t r = 0; r < rank; ++r)
+ {
+ uint32_t dim = tfconst->dim(r).value();
+ element = element * dim + pos.at(r);
+ }
+ return tfconst->at<loco::DataType::S32>(element);
+}
+
+} // namespace
+
+TEST(ConstantFoldStridedSlice, tfconst_at)
+{
+ loco::Graph graph;
+
+ auto const_node = graph.nodes()->create<moco::TFConst>();
+
+ const_node->dtype(loco::DataType::S32);
+ const_node->rank(3);
+ const_node->dim(0).set(2);
+ const_node->dim(1).set(3);
+ const_node->dim(2).set(4);
+
+ auto elements = 2 * 3 * 4;
+ const_node->size<loco::DataType::S32>(elements);
+ for (int32_t i = 0; i < elements; ++i)
+ const_node->at<loco::DataType::S32>(i) = i;
+ /*
+ [
+ [ 0, 1, 2, 3] <- [0,0,0]
+ [ 4, 5, 6, 7] <- [0,1,0] [0,1,1] [0,1,2]
+ [ 8, 9,10,11]
+ ]
+ [
+ [12,13,14,15]
+ [16,17,18,19] <- [1,1,0] [1,1,1]
+ [20,21,22,23] <- [1,2,0] [1,2,1] [1,2,2] [1,2,3]
+ ]
+ */
+
+ ASSERT_EQ(tfconst_at(const_node, {0, 0, 0}), 0);
+ ASSERT_EQ(tfconst_at(const_node, {1, 1, 1}), 17);
+ ASSERT_EQ(tfconst_at(const_node, {0, 1, 2}), 6);
+ ASSERT_EQ(tfconst_at(const_node, {1, 2, 3}), 23);
+}
diff --git a/compiler/moco/pass/src/Passes/ConstantFoldAdd.cpp b/compiler/moco/pass/src/Passes/ConstantFoldAdd.cpp
new file mode 100644
index 000000000..018749b78
--- /dev/null
+++ b/compiler/moco/pass/src/Passes/ConstantFoldAdd.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Pass/Passes/ConstantFoldAdd.h"
+
+#include "ConstantFoldHelper.h"
+
+#include <moco/IR/Nodes/TFAdd.h>
+#include <moco/IR/Nodes/TFConst.h>
+
+#include <moco/Support/NodeAs.h>
+
+namespace
+{
+
+struct Func final : public moco::BinaryFunc
+{
+ float apply(float lhs, float rhs) const { return lhs + rhs; }
+ int32_t apply(int32_t lhs, int32_t rhs) const { return lhs + rhs; }
+};
+
+bool constantfold_add(moco::TFAdd *node)
+{
+ auto x_const = moco::as<moco::TFConst>(node->x());
+ auto y_const = moco::as<moco::TFConst>(node->y());
+ if (x_const == nullptr || y_const == nullptr)
+ return false;
+
+ if (x_const->dtype() != y_const->dtype())
+ return false;
+ // TODO support other types
+ if (x_const->dtype() != loco::DataType::S32 && x_const->dtype() != loco::DataType::FLOAT32)
+ return false;
+
+ // NOTE we support limited shape of elementwise add or add with a scalar.
+ // valid_shape_for_constfold_binary_op() explains limited shape.
+ auto x_shape = moco::tensor_shape(x_const);
+ auto y_shape = moco::tensor_shape(y_const);
+ if (!moco::valid_shape_for_constfold_binary_op(x_shape, y_shape))
+ return false;
+
+ loco::TensorShape output_shape;
+ if (y_shape.rank() == 0 || y_shape.rank() == 1)
+ output_shape = x_shape;
+ else
+ output_shape = y_shape;
+
+ auto graph = node->graph();
+ auto output_const = moco::new_const(graph, output_shape, x_const->dtype());
+ Func f;
+
+ if (x_const->dtype() == loco::DataType::S32)
+ {
+ moco::apply_binary<int32_t>(x_const, y_const, output_const, f);
+ }
+ else if (x_const->dtype() == loco::DataType::FLOAT32)
+ {
+ moco::apply_binary<float>(x_const, y_const, output_const, f);
+ }
+
+ // replace
+ loco::replace(node).with(output_const);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+
+/**
+ * @note This will Replace TFAdd with TFConst when inputs are TFConst
+ *
+ * Before
+ * A --- TFAdd --- C
+ * B --/
+ * After
+ * A --- TFAdd
+ * B --/
+ * TFConst ---------- C
+ * Where
+ * A,B : inputs of TFAdd
+ * C : a node that uses TFAdd as an input
+ * TFAdd is disconnected from C
+ * Nodes are drawn multiple times to simplify the diagram
+ */
+bool ConstantFoldAdd::run(loco::Graph *graph)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(graph)))
+ {
+ if (auto add_node = as<moco::TFAdd>(node))
+ {
+ if (constantfold_add(add_node))
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace moco
diff --git a/compiler/moco/pass/src/Passes/ConstantFoldMul.cpp b/compiler/moco/pass/src/Passes/ConstantFoldMul.cpp
new file mode 100644
index 000000000..c1870ffee
--- /dev/null
+++ b/compiler/moco/pass/src/Passes/ConstantFoldMul.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Pass/Passes/ConstantFoldMul.h"
+
+#include "ConstantFoldHelper.h"
+
+#include <moco/IR/Nodes/TFMul.h>
+#include <moco/IR/Nodes/TFConst.h>
+
+#include <moco/Support/NodeAs.h>
+
+namespace
+{
+
+struct Func final : public moco::BinaryFunc
+{
+ float apply(float lhs, float rhs) const { return lhs * rhs; }
+ int32_t apply(int32_t lhs, int32_t rhs) const { return lhs * rhs; }
+};
+
+bool constantfold_mul(moco::TFMul *node)
+{
+ auto x_const = moco::as<moco::TFConst>(node->x());
+ auto y_const = moco::as<moco::TFConst>(node->y());
+ if (x_const == nullptr || y_const == nullptr)
+ return false;
+
+ if (x_const->dtype() != y_const->dtype())
+ return false;
+ // TODO support other types
+ if (x_const->dtype() != loco::DataType::S32 && x_const->dtype() != loco::DataType::FLOAT32)
+ return false;
+
+ // NOTE we support limited shape of elementwise mul or multiply with a scalar.
+ // valid_shape_for_constfold_binary_op() explains limited shape.
+ auto x_shape = moco::tensor_shape(x_const);
+ auto y_shape = moco::tensor_shape(y_const);
+ if (!moco::valid_shape_for_constfold_binary_op(x_shape, y_shape))
+ return false;
+
+ loco::TensorShape output_shape;
+ if (y_shape.rank() == 0 || y_shape.rank() == 1)
+ output_shape = x_shape;
+ else
+ output_shape = y_shape;
+
+ auto graph = node->graph();
+ auto output_const = moco::new_const(graph, output_shape, x_const->dtype());
+ Func f;
+
+ if (x_const->dtype() == loco::DataType::S32)
+ {
+ moco::apply_binary<int32_t>(x_const, y_const, output_const, f);
+ }
+ else if (x_const->dtype() == loco::DataType::FLOAT32)
+ {
+ moco::apply_binary<float>(x_const, y_const, output_const, f);
+ }
+
+ // replace
+ loco::replace(node).with(output_const);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+
+/**
+ * @note This will Replace TFMul with TFConst when input are TFConst
+ *
+ * Before
+ * A --- TFMul --- C
+ * B --/
+ * After
+ * A --- TFMul
+ * B --/
+ * TFConst ---------- C
+ * Where
+ * A,B : inputs of TFMul
+ * C : a node that uses TFMul as an input
+ * TFMul is disconnected from C
+ * Nodes are drawn multiple times to simplify the diagram
+ */
+bool ConstantFoldMul::run(loco::Graph *graph)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(graph)))
+ {
+ if (auto mul_node = as<moco::TFMul>(node))
+ {
+ if (constantfold_mul(mul_node))
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace moco
diff --git a/compiler/moco/pass/src/Passes/ConstantFoldPack.cpp b/compiler/moco/pass/src/Passes/ConstantFoldPack.cpp
new file mode 100644
index 000000000..cc8a23d18
--- /dev/null
+++ b/compiler/moco/pass/src/Passes/ConstantFoldPack.cpp
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Pass/Passes/ConstantFoldPack.h"
+
+#include "ConstantFoldHelper.h"
+#include "TensorPackEnumerator.h"
+
+#include <moco/IR/Nodes/TFPack.h>
+#include <moco/IR/Nodes/TFConst.h>
+
+#include <moco/Support/NodeAs.h>
+
+#include <oops/UserExn.h>
+
+#include <cassert>
+#include <vector>
+
+namespace
+{
+
+// TODO move to loco
+bool operator==(const loco::TensorShape &lhs, const loco::TensorShape &rhs)
+{
+ if (lhs.rank() != rhs.rank())
+ return false;
+ for (uint32_t axis = 0; axis < lhs.rank(); ++axis)
+ {
+ if (!(lhs.dim(axis) == rhs.dim(axis)))
+ return false;
+ }
+ return true;
+}
+
+bool valid_axis_range(int32_t output_rank, int32_t pack_axis)
+{
+ // check axis range in [-r-1, r+1)
+ assert(output_rank > 0);
+ return (-output_rank <= pack_axis) && (pack_axis < output_rank);
+}
+
+bool constantfold_pack(moco::TFPack *node)
+{
+ // check if all the inputs are Const
+ std::vector<moco::TFConst *> input_nodes;
+ uint32_t num = node->N();
+
+ for (uint32_t index = 0; index < num; ++index)
+ {
+ auto in = dynamic_cast<moco::TFConst *>(node->values(index));
+ if (in == nullptr)
+ return false;
+
+ input_nodes.push_back(in);
+ }
+ assert(input_nodes.size() == num);
+
+ // check if all inputs have same shape and dtype
+ auto input_0 = input_nodes.at(0);
+ auto shape_0 = moco::tensor_shape(input_0);
+ auto dtype_0 = input_0->dtype();
+ if (dtype_0 != loco::DataType::S32 && dtype_0 != loco::DataType::FLOAT32)
+ {
+ // TODO support other types
+ assert(false);
+ return false;
+ }
+ for (uint32_t index = 1; index < num; ++index)
+ {
+ auto input_i = input_nodes.at(index);
+ auto shape_i = moco::tensor_shape(input_i);
+ auto dtype_i = input_i->dtype();
+ if (!(shape_0 == shape_i))
+ return false;
+ if (dtype_0 != dtype_i)
+ return false;
+ }
+
+ int32_t output_rank = static_cast<int32_t>(shape_0.rank() + 1);
+ int32_t pack_axis = node->axis();
+ if (!valid_axis_range(output_rank, pack_axis))
+ {
+ throw oops::UserExn("axis is out of range: ", node->name());
+ }
+
+ if (pack_axis < 0)
+ {
+ pack_axis = output_rank + pack_axis;
+ }
+
+ // define output shape
+ loco::TensorShape output_shape;
+ output_shape.rank(output_rank);
+
+ for (int32_t r = 0, s = 0; r < output_rank; ++r)
+ {
+ if (r == pack_axis)
+ {
+ output_shape.dim(r).set(num);
+ }
+ else
+ {
+ output_shape.dim(r).set(shape_0.dim(s++).value());
+ }
+ }
+
+ auto graph = node->graph();
+
+ // create new constant
+ auto output_const = moco::new_const(graph, output_shape, input_0->dtype());
+
+ moco::TensorPackEnumerator etor;
+
+ etor.shape(shape_0, output_shape);
+ etor.axis(pack_axis);
+ for (etor.start(); etor.valid(); etor.advance())
+ {
+ uint32_t inp_num = etor.inp_num();
+ uint32_t inp_element = etor.inp_element();
+ uint32_t out_element = etor.out_element();
+
+ auto inp_const = input_nodes[inp_num];
+
+ if (input_0->dtype() == loco::DataType::S32)
+ {
+ int32_t val = inp_const->at<loco::DataType::S32>(inp_element);
+ output_const->at<loco::DataType::S32>(out_element) = val;
+ }
+ else if (input_0->dtype() == loco::DataType::FLOAT32)
+ {
+ float val = inp_const->at<loco::DataType::FLOAT32>(inp_element);
+ output_const->at<loco::DataType::FLOAT32>(out_element) = val;
+ }
+ }
+
+ // replace
+ loco::replace(node).with(output_const);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+
+/**
+ * @note This will Replace TFPack with TFConst when inputs are TFConst
+ *
+ * Before
+ * A --- TFPack --- C
+ * B --/
+ * After
+ * A --- TFPack
+ * B --/
+ * TFConst ---------- C
+ * Where
+ * A, B : inputs of TFPack
+ * C : a node that uses TFPack as an input
+ * TFPack is disconnected from C
+ * Nodes are drawn multiple times to simplify the diagram
+ */
+bool ConstantFoldPack::run(loco::Graph *graph)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(graph)))
+ {
+ if (auto pack_node = as<moco::TFPack>(node))
+ {
+ if (constantfold_pack(pack_node))
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace moco
diff --git a/compiler/moco/pass/src/Passes/ConstantFoldStridedSlice.cpp b/compiler/moco/pass/src/Passes/ConstantFoldStridedSlice.cpp
new file mode 100644
index 000000000..8be47648d
--- /dev/null
+++ b/compiler/moco/pass/src/Passes/ConstantFoldStridedSlice.cpp
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Pass/Passes/ConstantFoldStridedSlice.h"
+
+#include "ConstantFoldHelper.h"
+#include "TensorSliceEnumerator.h"
+
+#include <moco/IR/Nodes/TFStridedSlice.h>
+#include <moco/IR/Nodes/TFConst.h>
+
+#include <moco/Support/NodeAs.h>
+#include <moco/Support/TFShapeInferenceHelper.h>
+
+#include <oops/UserExn.h>
+
+#include <cassert>
+#include <vector>
+
+namespace
+{
+
+loco::TensorShape calc_output_shape(moco::TFStridedSlice *node)
+{
+ auto const_input = dynamic_cast<moco::TFConst *>(node->input());
+ auto const_begin = dynamic_cast<moco::TFConst *>(node->begin());
+ auto const_end = dynamic_cast<moco::TFConst *>(node->end());
+ auto const_strides = dynamic_cast<moco::TFConst *>(node->strides());
+ auto input_rank = const_input->rank();
+ auto output_rank = input_rank;
+ loco::TensorShape output_shape_range;
+
+ output_shape_range.rank(input_rank);
+ for (uint32_t r = 0; r < input_rank; ++r)
+ {
+ // TODO apply begin/end mask
+ // TODO apply ellipsis mask
+ // TODO apply strides
+ auto end = const_end->at<loco::DataType::S32>(r);
+ auto begin = const_begin->at<loco::DataType::S32>(r);
+ auto size = end - begin;
+ output_shape_range.dim(r).set(size);
+ }
+
+ loco::TensorShape output_tensor_shape;
+ if (node->shrink_axis_mask() != 0)
+ {
+ for (uint32_t rs = 0; rs < input_rank; ++rs)
+ {
+ int32_t bit = 1 << rs;
+ int32_t mask = node->shrink_axis_mask();
+ if (bit & mask)
+ {
+ // shrink one dimension
+ assert(output_rank > 0);
+ output_rank = output_rank - 1;
+ }
+ }
+ output_tensor_shape.rank(output_rank);
+ for (uint32_t rs = 0, rd = 0; rs < input_rank; ++rs)
+ {
+ int32_t bit = 1 << rs;
+ int32_t mask = node->shrink_axis_mask();
+ if ((bit & mask) == 0)
+ {
+ // use this dimension
+ output_tensor_shape.dim(rd).set(output_shape_range.dim(rs).value());
+ rd++;
+ }
+ // else this dimension is shrink-ed
+ }
+ }
+ else
+ {
+ output_tensor_shape = output_shape_range;
+ }
+
+ return output_tensor_shape;
+}
+
+moco::u32v_t vector_from_const(moco::TFConst *tfconst)
+{
+ moco::u32v_t result;
+
+ auto rank = tfconst->rank();
+ assert(rank == 1);
+ auto dim = tfconst->dim(0).value();
+
+ result.resize(dim);
+ for (uint32_t r = 0; r < dim; ++r)
+ {
+ auto val = tfconst->at<loco::DataType::S32>(r);
+ result.at(r) = val;
+ }
+
+ return result;
+}
+
+moco::u32v_t operator-(const moco::u32v_t &lhs, const moco::u32v_t &rhs)
+{
+ assert(lhs.size() == rhs.size());
+
+ moco::u32v_t res;
+ res.resize(lhs.size());
+ for (uint32_t r = 0; r < lhs.size(); r++)
+ {
+ res.at(r) = lhs.at(r) - rhs.at(r);
+ }
+ return res;
+}
+
+template <typename T> T tfconst_at(const moco::TFConst *tfconst, const moco::u32v_t &pos);
+
+template <> int32_t tfconst_at<int32_t>(const moco::TFConst *tfconst, const moco::u32v_t &pos)
+{
+ uint32_t rank = tfconst->rank();
+ assert(rank == pos.size());
+ uint32_t element = 0;
+ for (uint32_t r = 0; r < rank; ++r)
+ {
+ uint32_t dim = tfconst->dim(r).value();
+ element = element * dim + pos.at(r);
+ }
+ return tfconst->at<loco::DataType::S32>(element);
+}
+
+template <> float tfconst_at<float>(const moco::TFConst *tfconst, const moco::u32v_t &pos)
+{
+ uint32_t rank = tfconst->rank();
+ assert(rank == pos.size());
+ uint32_t element = 0;
+ for (uint32_t r = 0; r < rank; ++r)
+ {
+ uint32_t dim = tfconst->dim(r).value();
+ element = element * dim + pos.at(r);
+ }
+ return tfconst->at<loco::DataType::FLOAT32>(element);
+}
+
+void tfconst_at(moco::TFConst *tfconst, const moco::u32v_t &pos, int32_t value)
+{
+ // tfconst->rank() can be smaller than pos.size()
+ // i.e., tfconst: shape[3] and pos[0,1]
+ // where shape[3] is output result shape
+ // [0,1] is position of input const
+ uint32_t rank = pos.size();
+ uint32_t element = 0;
+ for (uint32_t r = 0; r < rank; ++r)
+ {
+ // this is like expand the shape from [3] to [1,3] to use same formula as in reading
+ uint32_t dim = tfconst->rank() < r ? tfconst->dim(r).value() : 1;
+ element = element * dim + pos.at(r);
+ }
+
+ tfconst->at<loco::DataType::S32>(element) = value;
+}
+
+void tfconst_at(moco::TFConst *tfconst, const moco::u32v_t &pos, float value)
+{
+ uint32_t rank = pos.size();
+ uint32_t element = 0;
+ for (uint32_t r = 0; r < rank; ++r)
+ {
+ uint32_t dim = tfconst->rank() < r ? tfconst->dim(r).value() : 1;
+ element = element * dim + pos.at(r);
+ }
+
+ tfconst->at<loco::DataType::FLOAT32>(element) = value;
+}
+
+bool constantfold_stridedslice(moco::TFStridedSlice *node)
+{
+ auto const_input = dynamic_cast<moco::TFConst *>(node->input());
+ if (const_input == nullptr)
+ {
+ // input is not TFConst, there's nothing to do
+ return false;
+ }
+
+ // TODO support full mask features: see import codes also
+ assert(node->begin_mask() == 0);
+ assert(node->end_mask() == 0);
+ assert(node->ellipsis_mask() == 0);
+ assert(node->shrink_axis_mask() == 1);
+
+ // TODO support other dtypes
+ assert(const_input->dtype() == loco::DataType::S32 ||
+ const_input->dtype() == loco::DataType::FLOAT32);
+
+ auto const_begin = dynamic_cast<moco::TFConst *>(node->begin());
+ auto const_end = dynamic_cast<moco::TFConst *>(node->end());
+ auto const_strides = dynamic_cast<moco::TFConst *>(node->strides());
+ if (const_begin == nullptr || const_end == nullptr || const_strides == nullptr)
+ {
+ return false;
+ }
+
+ // NOTE need shape but cannot depend on shape inference service module
+ auto tensor_shape = calc_output_shape(node);
+ auto input_shape = moco::tensor_shape(const_input);
+
+ auto graph = node->graph();
+
+ // Create our target TFConst node with shape from begin~end/strides
+ auto const_sliced = moco::new_const(graph, tensor_shape, const_input->dtype());
+
+ // Copy sliced elements using TensorSliceEnumerator
+ moco::TensorSliceEnumerator etor;
+ auto v_begin = vector_from_const(const_begin);
+ auto v_end = vector_from_const(const_end);
+ moco::u32v_t v_cursor;
+ moco::u32v_t v_offset;
+
+ etor.shape(input_shape);
+ etor.begin(v_begin);
+ etor.end(v_end);
+
+ for (etor.start(); etor.valid(); etor.advance())
+ {
+ v_cursor = etor.cursor();
+ v_offset = v_cursor - v_begin;
+
+ if (const_input->dtype() == loco::DataType::S32)
+ {
+ int32_t value = tfconst_at<int32_t>(const_input, v_cursor);
+ tfconst_at(const_sliced, v_offset, value);
+ }
+ else if (const_input->dtype() == loco::DataType::FLOAT32)
+ {
+ float value = tfconst_at<float>(const_input, v_cursor);
+ tfconst_at(const_sliced, v_offset, value);
+ }
+ }
+
+ // replace
+ loco::replace(node).with(const_sliced);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+
+/**
+ * @note This will Replace TFStridedSlice with TFConst when 'input' is TFConst
+ *
+ * Before
+ * A --- TFStridedSlice --- C
+ * B --/
+ * After
+ * A --- TFStridedSlice
+ * B --/
+ * TFConst ---------- C
+ * Where
+ * A,B : inputs of TFStridedSlice
+ * C : a node that uses TFStridedSlice as an input
+ * TFStridedSlice is disconnected from C
+ * Nodes are drawn multiple times to simplify the diagram
+ * Limits
+ * Only limit set of inputs are supported for now
+ */
+bool ConstantFoldStridedSlice::run(loco::Graph *graph)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(graph)))
+ {
+ if (auto sslice_node = as<moco::TFStridedSlice>(node))
+ {
+ if (constantfold_stridedslice(sslice_node))
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace moco
diff --git a/compiler/moco/pass/src/Passes/FuseBinaryIntoPreceding.cpp b/compiler/moco/pass/src/Passes/FuseBinaryIntoPreceding.cpp
new file mode 100644
index 000000000..4a9631ea9
--- /dev/null
+++ b/compiler/moco/pass/src/Passes/FuseBinaryIntoPreceding.cpp
@@ -0,0 +1,539 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Pass/Passes/FuseBinaryIntoPreceding.h"
+
+#include <moco/Support/TFShapeInferenceHelper.h>
+
+#include <moco/IR/TFDialect.h>
+#include <moco/IR/Nodes/TFAdd.h>
+#include <moco/IR/Nodes/TFBiasAdd.h>
+#include <moco/IR/Nodes/TFConst.h>
+#include <moco/IR/Nodes/TFConv2D.h>
+#include <moco/IR/Nodes/TFDepthwiseConv2dNative.h>
+#include <moco/IR/Nodes/TFMul.h>
+
+#include <cassert>
+#include <memory>
+
+namespace
+{
+
+/**
+ * @brief Fusable operation type
+ */
+enum class FuseType
+{
+ Conv2D,
+ DepthwiseConv2D,
+ // TODO Support FullyConnected
+};
+
+// TODO rename this method when there is a better name
+bool is_only_one_valid(moco::TFConst *xc, moco::TFConst *yc)
+{
+ if (xc == nullptr && yc == nullptr)
+ return false;
+ if (xc != nullptr && yc != nullptr)
+ return false;
+
+ return true;
+}
+
+// TODO Put this in some common place
+void copy_shape(const moco::TFConst *src, moco::TFConst *dst)
+{
+ assert(src != nullptr);
+ assert(dst != nullptr);
+
+ uint32_t rank = src->rank();
+ dst->rank(rank);
+ for (uint32_t index = 0; index < rank; ++index)
+ {
+ if (src->dim(index).known())
+ dst->dim(index) = src->dim(index);
+ else
+ dst->dim(index).unset();
+ }
+}
+
+/**
+ * @brief return true if shape is identical
+ */
+bool shape_match(const moco::TFConst *c1, const moco::TFConst *c2)
+{
+ assert(c1 != nullptr);
+ assert(c2 != nullptr);
+
+ uint32_t rank = c1->rank();
+ if (rank != c2->rank())
+ return false;
+
+ for (uint32_t index = 0; index < rank; ++index)
+ {
+ if (!c1->dim(index).known() || !c2->dim(index).known())
+ return false;
+
+ if (c1->dim(index).value() != c2->dim(index).value())
+ return false;
+ }
+ return true;
+}
+
+template <FuseType FT>
+moco::TFConst *create_kernel_from_fuse_mulparam(loco::Graph *graph, moco::TFConst *ker,
+ moco::TFConst *mulparam);
+
+template <>
+moco::TFConst *create_kernel_from_fuse_mulparam<FuseType::Conv2D>(loco::Graph *graph,
+ moco::TFConst *ker,
+ moco::TFConst *mulparam)
+{
+ auto ker_shape_inf = moco::node_shape(ker);
+ assert(ker_shape_inf.domain() != loco::Domain::Unknown);
+ auto ker_shape = ker_shape_inf.as<loco::TensorShape>();
+
+ auto mulparam_shape_inf = moco::node_shape(mulparam);
+ assert(mulparam_shape_inf.domain() != loco::Domain::Unknown);
+ auto mulparam_shape = mulparam_shape_inf.as<loco::TensorShape>();
+
+ // create new ker_fused with same size of ker
+ auto ker_fused = graph->nodes()->create<moco::TFConst>();
+
+ assert(ker_shape.rank() == 4);
+ assert(mulparam_shape.rank() == 1);
+ assert(ker_shape.dim(3).value() == mulparam_shape.dim(0).value());
+
+ ker_fused->dtype(loco::DataType::FLOAT32);
+ copy_shape(ker, ker_fused);
+ auto ker_num_elements = ker->size<loco::DataType::FLOAT32>();
+ ker_fused->size<loco::DataType::FLOAT32>(ker_num_elements);
+
+ // TensorFlow Conv2D Kernel has HWIO format
+ // Broadcast Mul vector to Kernel tensor by the Output
+ const uint32_t ker_height = ker_shape.dim(0).value();
+ const uint32_t ker_width = ker_shape.dim(1).value();
+ const uint32_t ker_input = ker_shape.dim(2).value();
+ const uint32_t ker_output = ker_shape.dim(3).value();
+
+ for (uint32_t ker_y = 0; ker_y < ker_height; ++ker_y)
+ {
+ for (uint32_t ker_x = 0; ker_x < ker_width; ++ker_x)
+ {
+ for (uint32_t in_ch = 0; in_ch < ker_input; ++in_ch)
+ {
+ uint32_t num_items = ((ker_y * ker_width + ker_x) * ker_input + in_ch) * ker_output;
+ for (uint32_t out_ch = 0; out_ch < ker_output; ++out_ch)
+ {
+ auto mulparam_v = mulparam->at<loco::DataType::FLOAT32>(out_ch);
+ auto ker_v = ker->at<loco::DataType::FLOAT32>(num_items + out_ch);
+ ker_fused->at<loco::DataType::FLOAT32>(num_items + out_ch) = ker_v * mulparam_v;
+ }
+ }
+ }
+ }
+
+ return ker_fused;
+}
+
+/**
+ * @brief Create a kernel from fuse mulparam<FuseType::DepthwiseConv2D> object
+ * @return Kernel of fused mulparam
+ */
+template <>
+moco::TFConst *create_kernel_from_fuse_mulparam<FuseType::DepthwiseConv2D>(loco::Graph *graph,
+ moco::TFConst *ker,
+ moco::TFConst *mulparam)
+{
+ auto ker_shape_inf = moco::node_shape(ker);
+ assert(ker_shape_inf.domain() != loco::Domain::Unknown);
+ auto ker_shape = ker_shape_inf.as<loco::TensorShape>();
+
+ auto mulparam_shape_inf = moco::node_shape(mulparam);
+ assert(mulparam_shape_inf.domain() != loco::Domain::Unknown);
+ auto mulparam_shape = mulparam_shape_inf.as<loco::TensorShape>();
+
+ // create new ker_fused with same size of ker
+ auto ker_fused = graph->nodes()->create<moco::TFConst>();
+
+ assert(ker_shape.rank() == 4);
+ assert(mulparam_shape.rank() == 1);
+ assert(ker_shape.dim(2).value() * ker_shape.dim(3).value() == mulparam_shape.dim(0).value());
+
+ ker_fused->dtype(loco::DataType::FLOAT32);
+ copy_shape(ker, ker_fused);
+ auto ker_num_elements = ker->size<loco::DataType::FLOAT32>();
+ ker_fused->size<loco::DataType::FLOAT32>(ker_num_elements);
+
+ // TensorFlow DepthwiseConv2DNative Kernel has HWIM format
+ // Broadcast Mul vector to Kernel tensor by the Output
+ const uint32_t ker_height = ker_shape.dim(0).value();
+ const uint32_t ker_width = ker_shape.dim(1).value();
+ const uint32_t ker_input = ker_shape.dim(2).value();
+ const uint32_t ker_multiplier = ker_shape.dim(3).value();
+
+ for (uint32_t ker_y = 0; ker_y < ker_height; ++ker_y)
+ {
+ for (uint32_t ker_x = 0; ker_x < ker_width; ++ker_x)
+ {
+ for (uint32_t in_ch = 0; in_ch < ker_input; ++in_ch)
+ {
+ uint32_t num_items = ((ker_y * ker_width + ker_x) * ker_input + in_ch) * ker_multiplier;
+ for (uint32_t ker_ch = 0; ker_ch < ker_multiplier; ++ker_ch)
+ {
+ auto mulparam_v = mulparam->at<loco::DataType::FLOAT32>(in_ch + ker_ch * ker_input);
+ auto ker_v = ker->at<loco::DataType::FLOAT32>(num_items + ker_ch);
+ ker_fused->at<loco::DataType::FLOAT32>(num_items + ker_ch) = ker_v * mulparam_v;
+ }
+ }
+ }
+ }
+
+ return ker_fused;
+}
+
+/**
+ * @brief Create a fused convolution opertion from kernel of fused mulparam
+ * @return Fused convolution operation
+ */
+template <FuseType FT, class T>
+T *fused_conv_node(loco::Graph *graph, moco::TFConst *mulparam, T *conv_node)
+{
+ // LOGGER(l);
+
+ // ker should be constant
+ auto ker = dynamic_cast<moco::TFConst *>(conv_node->filter());
+ if (ker == nullptr)
+ {
+ // Wait until ker is becomes TFConst: there are cases when it's Identity.
+ // INFO(l) << "Mul fuse_to_preceding: precedingOp ker is not TFConst";
+ return nullptr;
+ }
+ auto ifm = conv_node->input();
+ assert(ifm != nullptr);
+
+ // we need shape information, if not wait till it's ready
+ auto ker_shape_inf = moco::node_shape(ker);
+ if (ker_shape_inf.domain() == loco::Domain::Unknown)
+ {
+ // INFO(l) << "Mul fuse_to_preceding: precedingOp ker has no shape";
+ return nullptr;
+ }
+ auto mulparam_shape_inf = moco::node_shape(mulparam);
+ if (mulparam_shape_inf.domain() == loco::Domain::Unknown)
+ {
+ // INFO(l) << "Mul fuse_to_preceding: precedingOp mulparam has no shape";
+ return nullptr;
+ }
+ // if MulParam rank is not 1 we cannot fuse, just skip
+ auto mulparam_shape = mulparam_shape_inf.as<loco::TensorShape>();
+ if (mulparam_shape.rank() != 1)
+ {
+ // INFO(l) << "Mul fuse_to_preceding: Mul rank is not 1";
+ return nullptr;
+ }
+
+ auto ker_fused = create_kernel_from_fuse_mulparam<FT>(graph, ker, mulparam);
+ auto conv_fused = graph->nodes()->create<T>();
+
+ conv_fused->input(ifm);
+ conv_fused->filter(ker_fused);
+ conv_fused->padding(conv_node->padding());
+ conv_fused->data_layout(conv_node->data_layout());
+ conv_fused->strides(conv_node->strides());
+
+ return conv_fused;
+}
+
+/**
+ * @note This creates fused ker:2 from ker:1, 'mulparam' and
+ * new precedingOp:2 that uses ker:2 as the kernel.
+ * Then make C to use precedingOp:2 as new input.
+ *
+ * <Before>
+ * mulparam-\
+ * ker:1 --\ \
+ * ifm ----- precedingOp:1 ----------- Mul --- C
+ *
+ *
+ * <After>
+ * mulparam-\
+ * ker:1 --\ \
+ * - precedingOp:1 ----------- Mul ---
+ * /
+ * ifm ----- precedingOp:2 ------------------- C
+ * ker:2 ---/
+ *
+ *
+ * [Where]
+ * - precedingOp:1 can be one of TFConv2D, TFDepthwiseConv2dNative, FullyConnected
+ * - 'mulparam' and Mul will be disconnected from the Output.
+ * - ker:2 is added with fused values of ker:1 and mulparam
+ * - precedingOp:2 is added using ifm and ker:2 and other parameters
+ * same as precedingOp:1.
+ * - ker:1, precedingOp:1, 'mulparam' and Mul should be removed in
+ * RemoveDeadNodeTransform if not used.
+ */
+bool fuse_to_preceding(loco::Graph *graph, moco::TFMul *node)
+{
+ auto xc = dynamic_cast<moco::TFConst *>(node->x());
+ auto yc = dynamic_cast<moco::TFConst *>(node->y());
+
+ // Note: if both are constants, it should be done by constant-folding
+ if (!(is_only_one_valid(xc, yc)))
+ return false;
+
+ moco::TFConst *mulparam = nullptr;
+ moco::TFNode *precedingOp = nullptr;
+
+ if (xc != nullptr)
+ {
+ mulparam = xc;
+ precedingOp = dynamic_cast<moco::TFNode *>(node->y());
+ }
+ else // yc != nullptr
+ {
+ mulparam = yc;
+ precedingOp = dynamic_cast<moco::TFNode *>(node->x());
+ }
+
+ assert(mulparam->dtype() == loco::DataType::FLOAT32);
+
+ // TODO support FullyConnected
+ moco::TFNode *fused_node = nullptr;
+ if (auto conv2d = dynamic_cast<moco::TFConv2D *>(precedingOp))
+ fused_node = fused_conv_node<FuseType::Conv2D, moco::TFConv2D>(graph, mulparam, conv2d);
+ else if (auto dw_conv2d = dynamic_cast<moco::TFDepthwiseConv2dNative *>(precedingOp))
+ fused_node = fused_conv_node<FuseType::DepthwiseConv2D, moco::TFDepthwiseConv2dNative>(
+ graph, mulparam, dw_conv2d);
+
+ // Not ready yet
+ if (fused_node == nullptr)
+ return false;
+
+ // Replace TFMul node with new precedingOp with fused kernel
+ // This will leave existing precedingOp as-is but can be removed if not used
+ // from other transformations
+ replace(node).with(fused_node);
+ // TODO check if need to disconnect
+ // node->x(nullptr);
+ // node->y(nullptr);
+ // fused_node->ifm(nullptr);
+ // fused_node->ker(nullptr);
+
+ return true;
+}
+
+/**
+ * @brief Create zero-filled BiasAdd opertion and insert after precedingOp
+ * The plan is to fuse 'addparam' to TFBiasAdd bias
+ * @return Zero-filled BiasAdd operation
+ */
+template <class T>
+moco::TFBiasAdd *create_biasadd_node(loco::Graph *graph, moco::TFConst *addparam, T *precedingOp)
+{
+ auto dtype = addparam->dtype();
+ assert(dtype == loco::DataType::FLOAT32);
+
+ // Create TFConst(bias of TFBiasAdd) with same shape and dtype of 'addparam' but
+ // with values 0.0
+ auto biasadd_param = graph->nodes()->create<moco::TFConst>();
+ biasadd_param->dtype(dtype);
+ copy_shape(addparam, biasadd_param);
+ auto biasadd_num_elements = addparam->size<loco::DataType::FLOAT32>();
+ biasadd_param->size<loco::DataType::FLOAT32>(biasadd_num_elements);
+ for (int32_t i = 0; i < biasadd_num_elements; i++)
+ {
+ biasadd_param->at<loco::DataType::FLOAT32>(i) = 0.0f;
+ }
+
+ // Create TFBiasAdd with same shape as TFAdd
+ auto data_layout = precedingOp->data_layout();
+ auto tf_biasadd = graph->nodes()->create<moco::TFBiasAdd>();
+ tf_biasadd->data_layout(data_layout);
+
+ loco::replace(precedingOp).with(tf_biasadd);
+ tf_biasadd->value(precedingOp);
+ tf_biasadd->bias(biasadd_param);
+
+ return tf_biasadd;
+}
+
+/**
+ * @note TFAdd will be fused to TFBiasAdd
+ *
+ * <Before>
+ * If precedingOp is not TFBiasAdd, then insert TFConst:1 + TFBiasAdd that
+ * TFConst:1 has zero values.
+ *
+ * addparam --\
+ * \
+ * precedingOp ---------------------------- TFAdd ----- C
+ *
+ *
+ * <Intermediate>
+ * If it's TFBiasAdd and one of the input is TFConst type,
+ * then we can fuse 'addparam' to the input TFConst:2 value of TFBiasAdd, where
+ * TFConst:2 has added values from 'addparam'
+ *
+ * addparam --\
+ * TFConst:1 --------\ \
+ * precedingOp ------- TFBiasAdd ---------- TFAdd ----- C
+ *
+ *
+ * <After>
+ * addparam --\
+ * TFConst:2 --------\ \
+ * precedingOp ------- TFBiasAdd ---------- TFAdd -----
+ * \--------------------- C
+ *
+ *
+ * [Where]
+ * - precedingOp can be TFConv2D, TFDepthwiseConv2dNative, FullyConnected,
+ * TFBiasAdd.
+ * - Intermediate is to insert TFBiasAdd + TFConst:1
+ * - After is to fuse 'addparam' of TFAdd into TFConst:1 + TFBiasAdd
+ * that becomes TFConst:2 + TFBiasAdd
+ */
+bool fuse_to_preceding(loco::Graph *graph, moco::TFAdd *node)
+{
+ // LOGGER(l);
+
+ auto xc = dynamic_cast<moco::TFConst *>(node->x());
+ auto yc = dynamic_cast<moco::TFConst *>(node->y());
+
+ // Note: if both are constants, it should be done by constant-folding
+ if (!(is_only_one_valid(xc, yc)))
+ return false;
+
+ moco::TFConst *addparam = nullptr;
+ moco::TFNode *precedingOp = nullptr;
+
+ if (xc != nullptr)
+ {
+ addparam = xc;
+ precedingOp = dynamic_cast<moco::TFNode *>(node->y());
+ }
+ else // yc != nullptr
+ {
+ addparam = yc;
+ precedingOp = dynamic_cast<moco::TFNode *>(node->x());
+ }
+
+ auto addparam_shape_inf = moco::node_shape(addparam);
+ if (addparam_shape_inf.domain() == loco::Domain::Unknown)
+ {
+ // INFO(l) << "Add fuse_to_preceding: addparam has no shape";
+ return false;
+ }
+ // if AddParam rank is not 0 or 1 we cannot fuse, just skip
+ auto addparam_shape = addparam_shape_inf.as<loco::TensorShape>();
+ if (addparam_shape.rank() > 1)
+ {
+ // INFO(l) << "Add fuse_to_preceding: Add rank is not 0 or 1";
+ return false;
+ }
+
+ // TODO do something when rank() is 0
+ if (addparam_shape.rank() == 0)
+ {
+ // Not supported yet
+ return false;
+ }
+ assert(addparam_shape.rank() != 0);
+
+ // TODO support FullyConnected
+ moco::TFBiasAdd *biasadd = nullptr;
+ if (auto conv2d = dynamic_cast<moco::TFConv2D *>(precedingOp))
+ biasadd = create_biasadd_node<moco::TFConv2D>(graph, addparam, conv2d);
+ else if (auto dw_conv2d = dynamic_cast<moco::TFDepthwiseConv2dNative *>(precedingOp))
+ biasadd = create_biasadd_node<moco::TFDepthwiseConv2dNative>(graph, addparam, dw_conv2d);
+ else if (auto old_bias_add = dynamic_cast<moco::TFBiasAdd *>(precedingOp))
+ biasadd = old_bias_add;
+
+ if (biasadd == nullptr)
+ {
+ // try next turn
+ return false;
+ }
+
+ // Let's fuse addparam into biasadd bias
+ auto biasadd_bias = dynamic_cast<moco::TFConst *>(biasadd->bias());
+ assert(biasadd_bias != nullptr);
+ if (!shape_match(biasadd_bias, addparam))
+ {
+ // INFO(l) << "TFBiasAdd bias and TFAdd input shape mismatch";
+ return false;
+ }
+ auto add_num_elements = addparam->size<loco::DataType::FLOAT32>();
+ assert(add_num_elements == biasadd_bias->size<loco::DataType::FLOAT32>());
+ for (int32_t i = 0; i < add_num_elements; i++)
+ {
+ biasadd_bias->at<loco::DataType::FLOAT32>(i) += addparam->at<loco::DataType::FLOAT32>(i);
+ }
+
+ replace(node).with(biasadd);
+ // TODO check if need to disconnect
+ // node->x(nullptr);
+ // node->y(nullptr);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool FuseBinaryIntoPreceding::run(loco::Graph *graph)
+{
+ bool changed = false;
+ auto active_nodes = loco::active_nodes(loco::output_nodes(graph));
+
+ for (auto node : active_nodes)
+ {
+ if (node->dialect() == moco::TFDialect::get())
+ {
+ {
+ auto tf_node = dynamic_cast<moco::TFMul *>(node);
+ if (tf_node != nullptr)
+ {
+ if (fuse_to_preceding(graph, tf_node))
+ changed = true;
+ }
+ }
+ {
+ // TODO support Div
+ }
+
+ {
+ auto tf_node = dynamic_cast<moco::TFAdd *>(node);
+ if (tf_node != nullptr)
+ {
+ if (fuse_to_preceding(graph, tf_node))
+ changed = true;
+ }
+ }
+ {
+ // TODO support Sub
+ }
+ }
+ }
+
+ return changed;
+}
+
+} // namespace moco
diff --git a/compiler/moco/pass/src/Passes/RemoveTFIdentityNode.cpp b/compiler/moco/pass/src/Passes/RemoveTFIdentityNode.cpp
new file mode 100644
index 000000000..d3d22c90e
--- /dev/null
+++ b/compiler/moco/pass/src/Passes/RemoveTFIdentityNode.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Pass/Passes/RemoveTFIdentityNode.h"
+
+#include <moco/IR/TFDialect.h>
+#include <moco/IR/TFNode.h>
+
+#include <set>
+
+namespace moco
+{
+
+bool RemoveTFIdentityNode::run(loco::Graph *g)
+{
+ struct Collector final : public moco::TFNodeMutableVisitor<void>
+ {
+ void visit(moco::TFIdentity *node) final
+ {
+ if (node->input() != nullptr)
+ {
+ candidates.insert(node);
+ }
+ }
+
+ void visit(moco::TFNode *) final { return; }
+
+ std::set<moco::TFIdentity *> candidates;
+ };
+
+ Collector collector;
+
+ for (auto node : loco::all_nodes(g))
+ {
+ if (node->dialect() == moco::TFDialect::get())
+ {
+ auto tf_node = dynamic_cast<moco::TFNode *>(node);
+ // NOTE our analysis tool reports an error for tf_node may be nullptr
+ if (tf_node != nullptr)
+ tf_node->accept(&collector);
+ }
+ }
+
+ for (auto node : collector.candidates)
+ {
+ replace(node).with(node->input());
+ node->input(nullptr);
+ }
+
+ return collector.candidates.size() > 0;
+}
+
+} // namespace moco
diff --git a/compiler/moco/pass/src/Passes/ResolveConstantShape.cpp b/compiler/moco/pass/src/Passes/ResolveConstantShape.cpp
new file mode 100644
index 000000000..2a1323fbc
--- /dev/null
+++ b/compiler/moco/pass/src/Passes/ResolveConstantShape.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Pass/Passes/ResolveConstantShape.h"
+
+#include <moco/Support/TFShapeInferenceHelper.h>
+#include <moco/Support/NodeAs.h>
+
+#include <moco/IR/Nodes/TFShape.h>
+#include <moco/IR/Nodes/TFConst.h>
+
+#include <loco.h>
+
+#include <oops/UserExn.h>
+
+#include <cassert>
+
+namespace
+{
+
+/**
+ * WHEN:
+ * - TFShape's input shape is determined
+ * DO:
+ * - Replace TFShape into TFConst
+ *
+ *
+ * <Before>
+ * in ---- TFShape ---- out(s)
+ *
+ * <After>
+ * in ---- TFShape
+ *
+ * TFConst ---- out(s)
+ */
+bool resolve_constant_shape(loco::Graph *graph, moco::TFShape *shape_node)
+{
+ auto input_shape = moco::node_shape(shape_node->input());
+
+ // Check condition
+ if (input_shape.domain() == loco::Domain::Unknown)
+ {
+ // Cannot resolve without known input_shape
+ return false;
+ }
+
+ auto input_tensor_shape = input_shape.as<loco::TensorShape>();
+
+ auto shape_rank = input_tensor_shape.rank();
+ for (uint32_t axis = 0; axis < shape_rank; ++axis)
+ {
+ if (!input_tensor_shape.dim(axis).known())
+ {
+ // Cannot resolve with unknown dimension
+ return false;
+ }
+ }
+
+ // Make TFConst to replace TFShape
+ auto const_node = graph->nodes()->create<moco::TFConst>();
+
+ // set dtype
+ auto dtype = shape_node->dtype();
+ const_node->dtype(dtype);
+
+ // set shape
+ const_node->rank(1);
+ const_node->dim(0) = shape_rank;
+
+ // set data
+ if (dtype == loco::DataType::S32)
+ {
+ // TODO Better to make template for this when support new dtype
+ const_node->size<loco::DataType::S32>(shape_rank);
+ for (uint32_t axis = 0; axis < shape_rank; ++axis)
+ {
+ int32_t dim = (int32_t)input_tensor_shape.dim(axis).value();
+ if (!(dim > 0))
+ {
+ throw oops::UserExn("Invalid input shape", shape_node->name());
+ }
+ const_node->at<loco::DataType::S32>(axis) = dim;
+ }
+ }
+ else
+ {
+ throw oops::UserExn("Unsupported data type", shape_node->name());
+ }
+
+ // replace
+ loco::replace(shape_node).with(const_node);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool ResolveConstantShape::run(loco::Graph *graph)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(graph)))
+ {
+ if (auto shape_node = as<moco::TFShape>(node))
+ {
+ if (resolve_constant_shape(graph, shape_node))
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace moco
diff --git a/compiler/moco/pass/src/Passes/ResolveFusedBatchNorm.cpp b/compiler/moco/pass/src/Passes/ResolveFusedBatchNorm.cpp
new file mode 100644
index 000000000..6fd1474af
--- /dev/null
+++ b/compiler/moco/pass/src/Passes/ResolveFusedBatchNorm.cpp
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Pass/Passes/ResolveFusedBatchNorm.h"
+
+#include <moco/Support/NodeAs.h>
+
+#include <moco/IR/Nodes/TFAdd.h>
+#include <moco/IR/Nodes/TFConst.h>
+#include <moco/IR/Nodes/TFMul.h>
+#include <moco/IR/Nodes/TFFusedBatchNorm.h>
+
+#include <cassert>
+#include <cmath>
+#include <memory>
+
+namespace
+{
+
+bool is_same_shape(moco::TFConst *lc, moco::TFConst *rc)
+{
+ if (lc->rank() != rc->rank())
+ return false;
+
+ for (auto r = 0; r < lc->rank(); ++r)
+ {
+ if (lc->dim(r).value() != rc->dim(r).value())
+ return false;
+ }
+ return true;
+}
+
+void copy_shape(const moco::TFConst *src, moco::TFConst *dst)
+{
+ assert(src != nullptr);
+ assert(dst != nullptr);
+
+ uint32_t rank = src->rank();
+ dst->rank(rank);
+ for (uint32_t index = 0; index < rank; ++index)
+ {
+ if (src->dim(index).known())
+ dst->dim(index) = src->dim(index).value();
+ else
+ dst->dim(index).unset();
+ }
+}
+
+/**
+ * @note resolve_to_muladd() will transform TFFusedBatchNorm to TFMul, TFAdd and two ConstGen
+ *
+ * <arguments>
+ * %0:input
+ * %1:gamma : const
+ * %2:beta : const
+ * %3:mean : const
+ * %4:variance : const
+ * %5:epsilon : const
+ *
+ * <constant operations>
+ * fbn_epsilon_array = make_array(%5:epsilon)
+ * fbn_epsilon = %4:variance + fbn_epsilon_array
+ * fbn_rsqrt = 1.0 / math::sqrt(fbn_epsilon)
+ *
+ * fbn_mean = %3:mean
+ * fbn_mul = fbn_rsqrt * %1:gamma
+ * fbn_offset = %2:beta
+ *
+ * fbn_mul_0_param = fbn_mul
+ * fbn_add_param = fbn_offset - fbn_mean * fbn_mul
+ *
+ * <new replace nodes>
+ * %11:fbn_mul_0_param = ConstGen(fbn_mul_0_param)
+ * %12:fbn_mul_0 = TFMul(%0:input, %11:fbn_mul_0_param)
+ * %21:fbn_add_param = ConstGen(fbn_add_param)
+ * %22:fbn = TFAdd(%12:fbn_mul_0,%21:fbn_add_param)
+ */
+bool resolve_to_muladd(loco::Graph *graph, moco::TFFusedBatchNorm *node)
+{
+ // LOGGER(lfbn);
+
+ auto tffbn_x = node->x();
+ if (tffbn_x == nullptr)
+ {
+ // This node is already converted
+ return false;
+ }
+
+ auto tffbn_scale = dynamic_cast<moco::TFConst *>(node->scale());
+ auto tffbn_offset = dynamic_cast<moco::TFConst *>(node->offset());
+ auto tffbn_mean = dynamic_cast<moco::TFConst *>(node->mean());
+ auto tffbn_variance = dynamic_cast<moco::TFConst *>(node->variance());
+
+ // all should be const
+ if (tffbn_scale == nullptr || tffbn_offset == nullptr || tffbn_mean == nullptr ||
+ tffbn_variance == nullptr)
+ {
+ // INFO(lfbn) << "TFFBN resolve_to_muladd: One of constant input node is not a constant"
+ // << std::endl;
+ return false;
+ }
+ assert(tffbn_scale->dtype() == loco::DataType::FLOAT32);
+ assert(tffbn_offset->dtype() == loco::DataType::FLOAT32);
+ assert(tffbn_mean->dtype() == loco::DataType::FLOAT32);
+ assert(tffbn_variance->dtype() == loco::DataType::FLOAT32);
+
+ // check all const shape are the same
+ if (!is_same_shape(tffbn_scale, tffbn_offset) || !is_same_shape(tffbn_scale, tffbn_mean) ||
+ !is_same_shape(tffbn_scale, tffbn_variance))
+ {
+ // INFO(lfbn) << "TFFBN resolve_to_muladd: Shape of constant are not same" << std::endl;
+ return false;
+ }
+
+ auto tffbn_epsilon = node->epsilon();
+ // INFO(lfbn) << "TFFBN tffbn_epsilon = " << tffbn_epsilon << std::endl;
+ auto const_num_elements = tffbn_scale->size<loco::DataType::FLOAT32>();
+ // INFO(lfbn) << "TFFBN const_num_elements = " << const_num_elements << std::endl;
+
+ // fbn_epsilon = %4:variance + fbn_epsilon_array
+ std::unique_ptr<float[]> fbn_epsilon{new float[const_num_elements]};
+ for (int32_t i = 0; i < const_num_elements; i++)
+ {
+ auto variance = tffbn_variance->at<loco::DataType::FLOAT32>(i);
+ fbn_epsilon.get()[i] = variance + tffbn_epsilon;
+ }
+
+ // fbn_rsqrt = 1.0 / math::sqrt(fbn_epsilon)
+ std::unique_ptr<float[]> fbn_rsqrt{new float[const_num_elements]};
+ for (int32_t i = 0; i < const_num_elements; i++)
+ {
+ fbn_rsqrt.get()[i] = 1.0 / sqrt(fbn_epsilon.get()[i]);
+ }
+
+ // fbn_mean = %3:mean : TODO remove this block and use %3:mean
+ std::unique_ptr<float[]> fbn_mean{new float[const_num_elements]};
+ for (int32_t i = 0; i < const_num_elements; i++)
+ {
+ fbn_mean.get()[i] = tffbn_mean->at<loco::DataType::FLOAT32>(i);
+ }
+
+ // fbn_mul = fbn_rsqrt * %1:gamma
+ std::unique_ptr<float[]> fbn_mul{new float[const_num_elements]};
+ for (int32_t i = 0; i < const_num_elements; i++)
+ {
+ fbn_mul.get()[i] = fbn_rsqrt.get()[i] * tffbn_scale->at<loco::DataType::FLOAT32>(i);
+ }
+
+ // fbn_offset = %2:beta : TODO remove this block and use %2:beta
+ std::unique_ptr<float[]> fbn_offset{new float[const_num_elements]};
+ for (int32_t i = 0; i < const_num_elements; i++)
+ {
+ fbn_offset.get()[i] = tffbn_offset->at<loco::DataType::FLOAT32>(i);
+ }
+
+ // fbn_mul_0_param = fbn_mul : remove this and use fbn_mul
+ std::unique_ptr<float[]> fbn_mul_0_param{new float[const_num_elements]};
+ for (int32_t i = 0; i < const_num_elements; i++)
+ {
+ fbn_mul_0_param.get()[i] = fbn_mul.get()[i];
+ }
+
+ // fbn_add_param = fbn_offset - fbn_mean * fbn_mul
+ std::unique_ptr<float[]> fbn_add_param{new float[const_num_elements]};
+ for (int32_t i = 0; i < const_num_elements; i++)
+ {
+ fbn_add_param.get()[i] = fbn_offset.get()[i] - fbn_mean.get()[i] * fbn_mul.get()[i];
+ }
+
+ // INFO(lfbn) << "TFFBN create ConstGen" << std::endl;
+
+ /*
+ * %11:fbn_mul_0_param = ConstGen(fbn_mul_0_param)
+ * %21:fbn_add_param = ConstGen(fbn_add_param)
+ */
+ auto const_fbn_mul_0_param = graph->nodes()->create<moco::TFConst>();
+ const_fbn_mul_0_param->dtype(loco::DataType::FLOAT32);
+ copy_shape(tffbn_scale, const_fbn_mul_0_param);
+ const_fbn_mul_0_param->size<loco::DataType::FLOAT32>(const_num_elements);
+ for (int32_t i = 0; i < const_num_elements; i++)
+ {
+ const_fbn_mul_0_param->at<loco::DataType::FLOAT32>(i) = fbn_mul_0_param.get()[i];
+ }
+ auto const_fbn_add_param = graph->nodes()->create<moco::TFConst>();
+ const_fbn_add_param->dtype(loco::DataType::FLOAT32);
+ copy_shape(tffbn_scale, const_fbn_add_param);
+ const_fbn_add_param->size<loco::DataType::FLOAT32>(const_num_elements);
+ for (int32_t i = 0; i < const_num_elements; i++)
+ {
+ const_fbn_add_param->at<loco::DataType::FLOAT32>(i) = fbn_add_param.get()[i];
+ }
+
+ // INFO(lfbn) << "TFFBN create TFMul, TFAdd" << std::endl;
+ /*
+ * %12:fbn_mul_0 = TFMul(%0:input, %11:fbn_mul_0_param)
+ * %22:fbn = TFAdd(%12:fbn_mul_0,%21:fbn_add_param)
+ */
+ auto fbn_mul_0 = graph->nodes()->create<moco::TFMul>();
+ fbn_mul_0->x(tffbn_x);
+ fbn_mul_0->y(const_fbn_mul_0_param);
+
+ auto fbn = graph->nodes()->create<moco::TFAdd>();
+ fbn->x(fbn_mul_0);
+ fbn->y(const_fbn_add_param);
+
+ // replace old node with new fbn
+ replace(node).with(fbn);
+ // unlink from graph
+ node->x(nullptr);
+ node->scale(nullptr);
+ node->offset(nullptr);
+ node->mean(nullptr);
+ node->variance(nullptr);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool ResolveFusedBatchNorm::run(loco::Graph *graph)
+{
+ for (auto node : loco::active_nodes(loco::output_nodes(graph)))
+ {
+ if (as<moco::TFFusedBatchNorm>(node))
+ {
+ if (resolve_to_muladd(graph, as<moco::TFFusedBatchNorm>(node)))
+ {
+ // tree has been changed. let's return so that we don't need to
+ // considier about following node is correct or not.
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+} // namespace moco
diff --git a/compiler/moco/pass/src/Passes/ResolveReshapeWildcardDim.cpp b/compiler/moco/pass/src/Passes/ResolveReshapeWildcardDim.cpp
new file mode 100644
index 000000000..3446716cb
--- /dev/null
+++ b/compiler/moco/pass/src/Passes/ResolveReshapeWildcardDim.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Pass/Passes/ResolveReshapeWildcardDim.h"
+
+#include <moco/Support/TFShapeInferenceHelper.h>
+#include <moco/Support/NodeAs.h>
+
+#include <moco/IR/Nodes/TFReshape.h>
+#include <moco/IR/Nodes/TFConst.h>
+
+#include <cassert>
+#include <limits>
+
+namespace
+{
+
+/**
+ * @return true when 'node' has one and only one wildcard dimension
+ * @return false when 'node' has no wildcard dimension, i.e. fixed reshape case
+ *
+ * @note Assertions in this function are sanity check for 'node', Reshape's
+ * Const shape input
+ */
+bool has_one_wildcard_dim(const moco::TFConst *node)
+{
+ assert(node->dtype() == loco::DataType::S32);
+ assert(node->rank() == 1);
+
+ auto len = node->dim(0).value();
+ assert(len > 0);
+
+ // Must have one and only wildcard dimension(-1)
+ uint32_t count_wildcard_dim = 0;
+ for (uint32_t i = 0; i < len; ++i)
+ {
+ auto dim = node->at<loco::DataType::S32>(i);
+ if (dim == -1)
+ count_wildcard_dim++;
+ else
+ assert(dim >= 1);
+ }
+
+ assert(count_wildcard_dim <= 1 &&
+ "Invalid Reshape: there should be none or only one wildcard dimension");
+ return count_wildcard_dim;
+}
+
+uint32_t volume(const loco::TensorShape &shape)
+{
+ uint32_t ret = 1;
+ auto rank = shape.rank();
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ ret *= shape.dim(axis).value();
+ }
+ return ret;
+}
+
+void deduce_and_fix_wildcard_dim(moco::TFConst *node, const loco::NodeShape &tensor_input_shape)
+{
+ assert(has_one_wildcard_dim(node));
+
+ assert(tensor_input_shape.domain() == loco::Domain::Tensor);
+ auto shape = tensor_input_shape.as<loco::TensorShape>();
+
+ auto len = node->dim(0).value();
+ uint32_t wildcard_index = std::numeric_limits<uint32_t>::max();
+ uint32_t product_of_non_wildcard_dims = 1;
+
+ // Deduce
+ for (uint32_t i = 0; i < len; ++i)
+ {
+ auto dim = node->at<loco::DataType::S32>(i);
+ if (dim == -1)
+ {
+ wildcard_index = i;
+ }
+ else
+ {
+ product_of_non_wildcard_dims *= dim;
+ }
+ }
+ assert(wildcard_index != std::numeric_limits<uint32_t>::max());
+
+ // Fix
+ assert(volume(shape) % product_of_non_wildcard_dims == 0);
+ node->at<loco::DataType::S32>(wildcard_index) = volume(shape) / product_of_non_wildcard_dims;
+}
+
+/**
+ * WHEN:
+ * - TFReshape's shape input is TFConst
+ * - The TFConst is valid shape input for dynamic reshape, i.e. it has one and
+ * only wildcard dimension(-1)
+ * - TFReshape's tensor input has complete shape inference data
+ * DO:
+ * - Deduce what the wildcard dimension is and fix it
+ */
+bool resolve_wildcard_dim(moco::TFReshape *reshape)
+{
+ // Check conditions (WHEN)
+ auto const_shape_input = dynamic_cast<moco::TFConst *>(reshape->shape());
+ if (!const_shape_input)
+ return false;
+
+ if (!has_one_wildcard_dim(const_shape_input))
+ return false;
+
+ auto tensor_input_shape = moco::node_shape(reshape->tensor());
+ if (tensor_input_shape.domain() == loco::Domain::Unknown)
+ return false;
+
+ // Deduce (DO)
+ deduce_and_fix_wildcard_dim(const_shape_input, tensor_input_shape);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool ResolveReshapeWildcardDim::run(loco::Graph *graph)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(graph)))
+ {
+ if (auto reshape = as<moco::TFReshape>(node))
+ {
+ if (resolve_wildcard_dim(reshape))
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace moco
diff --git a/compiler/moco/pass/src/Passes/ResolveSquaredDifference.cpp b/compiler/moco/pass/src/Passes/ResolveSquaredDifference.cpp
new file mode 100644
index 000000000..b66add1ae
--- /dev/null
+++ b/compiler/moco/pass/src/Passes/ResolveSquaredDifference.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Pass/Passes/ResolveSquaredDifference.h"
+
+#include <moco/IR/TFDialect.h>
+#include <moco/IR/TFNodes.h>
+#include <moco/IR/TFNodeVisitor.h>
+#include <moco/IR/TFNodeImpl.h>
+
+#include <loco/IR/NodeShape.h>
+#include <loco/Service/ShapeInference.h>
+
+#include <stdex/Memory.h>
+
+namespace
+{
+
+bool decompose_sqdiff(moco::TFSquaredDifference *node)
+{
+ /**
+ * @note This will decompose TFSquaredDifference node into TFSub and TFMul
+ *
+ * Before
+ * A --- TFSquaredDifference -- C
+ * B --/
+ * After
+ * A --- TFSquaredDifference --
+ * B --/
+ * A --- TFSub == TFMul -- C
+ * B --/
+ * Where
+ * A : x of TFSquaredDifference
+ * B : y of TFSquaredDifference
+ * C : a node that uses TFSquaredDifference as an input
+ * TFSquaredDifference is disconnected from C
+ * A and B are drawn multiple times to simplify the diagram
+ */
+
+ auto node_A = node->x();
+ auto node_B = node->y();
+
+ auto sub_node = node->graph()->nodes()->create<moco::TFSub>();
+ auto mul_node = node->graph()->nodes()->create<moco::TFMul>();
+
+ // update connections
+ sub_node->x(node_A);
+ sub_node->y(node_B);
+ mul_node->x(sub_node);
+ mul_node->y(sub_node);
+
+ // replace node
+ replace(node).with(mul_node);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool ResolveSquaredDifference::run(loco::Graph *graph)
+{
+ auto active_nodes = loco::active_nodes(loco::output_nodes(graph));
+ bool changed = false;
+
+ for (auto node : active_nodes)
+ {
+ if (node->dialect() == TFDialect::get())
+ {
+ auto tf_node = dynamic_cast<moco::TFSquaredDifference *>(node);
+ if (tf_node != nullptr)
+ {
+ if (decompose_sqdiff(tf_node))
+ changed = true;
+ }
+ }
+ }
+
+ return changed;
+}
+
+} // namespace moco
diff --git a/compiler/moco/pass/src/Passes/SqueezeReduceNode.cpp b/compiler/moco/pass/src/Passes/SqueezeReduceNode.cpp
new file mode 100644
index 000000000..0d9686328
--- /dev/null
+++ b/compiler/moco/pass/src/Passes/SqueezeReduceNode.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Pass/Passes/SqueezeReduceNode.h"
+
+#include <moco/Support/NodeAs.h>
+
+#include <moco/IR/Nodes/TFConst.h>
+#include <moco/IR/Nodes/TFSqueeze.h>
+#include <moco/IR/Nodes/TFMean.h>
+
+#include <cassert>
+
+namespace
+{
+
+/**
+ * WHEN:
+ * - Reduce operations do not keep dimensions
+ * DO:
+ * - Replace original ReduceTypeOp to new ReduceTypeOp, which 'keep_dims' attribute is true
+ * - Insert TFSqueeze after new ReduceTypeOp
+ *
+ *
+ * <Before>
+ * in ---- ReduceTypeOp:0 (keep_dims = false) --- out(s)
+ *
+ * <After>
+ * --- ReduceTypeOp:0 (keep_dims = false)
+ * /
+ * in ---- ReduceTypeOp:1 (keep_dims = true) ---- TFSqueeze --- out(s)
+ *
+ * <Where>
+ * - 'keep_dims' attribute of ReduceTypeOp:0 is false
+ *
+ */
+template <class TFNode> bool squeeze_reduce_node(loco::Graph *graph, TFNode *reduce_node)
+{
+ // Don't need to squeeze reduce node
+ if (reduce_node->keep_dims())
+ return false;
+
+ // Reduction indices are not yet constant
+ auto const_reduction_indices = dynamic_cast<moco::TFConst *>(reduce_node->reduction_indices());
+ if (const_reduction_indices == nullptr)
+ return false;
+
+ auto squeeze_node = graph->nodes()->create<moco::TFSqueeze>();
+ auto new_reduce_node = graph->nodes()->create<TFNode>();
+
+ new_reduce_node->input(reduce_node->input());
+ new_reduce_node->reduction_indices(reduce_node->reduction_indices());
+ new_reduce_node->keep_dims(true);
+
+ // Insert squeeze dims
+ // TODO Support S64 type
+ assert(const_reduction_indices->dtype() == loco::DataType::S32);
+
+ std::vector<int64_t> reduction_values;
+ for (uint32_t i = 0; i < const_reduction_indices->size<loco::DataType::S32>(); ++i)
+ reduction_values.push_back(const_reduction_indices->at<loco::DataType::S32>(i));
+ squeeze_node->squeeze_dims(reduction_values);
+
+ // replace
+ loco::replace(reduce_node).with(squeeze_node);
+ squeeze_node->input(new_reduce_node);
+
+ return true;
+}
+
+} // namespace
+
+namespace moco
+{
+
+bool SqueezeReduceNode::run(loco::Graph *graph)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(graph)))
+ {
+ if (auto shape_node = as<moco::TFMean>(node))
+ {
+ if (squeeze_reduce_node(graph, shape_node))
+ changed = true;
+ }
+ // TODO Add more reduce type operations
+ }
+
+ return changed;
+}
+
+} // namespace moco
diff --git a/compiler/moco/pass/src/TensorPackEnumerator.cpp b/compiler/moco/pass/src/TensorPackEnumerator.cpp
new file mode 100644
index 000000000..61a160cfb
--- /dev/null
+++ b/compiler/moco/pass/src/TensorPackEnumerator.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorPackEnumerator.h"
+
+#include <cassert>
+
+namespace moco
+{
+
+void TensorPackEnumerator::shape(const loco::TensorShape &si, const loco::TensorShape &so)
+{
+ _shape_inp = si;
+ _shape_out = so;
+
+ assert(_shape_inp.rank() + 1 == _shape_out.rank());
+
+ _rank_out = _shape_out.rank();
+}
+
+void TensorPackEnumerator::increment(uint32_t r)
+{
+ _cursor_out.at(r) = _cursor_out.at(r) + 1;
+
+ if (_cursor_out.at(r) >= _boundary_out.at(r))
+ {
+ if (r > 0)
+ {
+ _cursor_out.at(r) = 0;
+ increment(r - 1);
+ }
+ else
+ {
+ // reached to the end
+ }
+ }
+}
+
+void TensorPackEnumerator::start(void)
+{
+ uint32_t rank = _rank_out;
+
+ _cursor_out.resize(rank);
+ _boundary_out.resize(rank);
+ for (uint32_t r = 0; r < rank; ++r)
+ {
+ _cursor_out.at(r) = 0;
+ _boundary_out.at(r) = _shape_out.dim(r).value();
+ }
+
+ rank = _rank_out - 1;
+ _cursor_inp.resize(rank);
+ _boundary_inp.resize(rank);
+ for (uint32_t r = 0; r < rank; ++r)
+ {
+ _cursor_inp.at(r) = 0;
+ _boundary_inp.at(r) = _shape_inp.dim(r).value();
+ }
+ _num_inp = 0;
+}
+
+bool TensorPackEnumerator::valid(void)
+{
+ uint32_t rank = _rank_out;
+ for (uint32_t r = 0; r < rank; ++r)
+ {
+ if (_cursor_out.at(r) >= _boundary_out.at(r))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+void TensorPackEnumerator::advance(void)
+{
+ uint32_t r = _rank_out - 1;
+ increment(r);
+
+ // from _cursor_out, set _cursor_inp and _num
+ for (int32_t r = 0, s = 0; r < _rank_out; ++r)
+ {
+ if (r == _axis)
+ {
+ _num_inp = _cursor_out.at(r);
+ }
+ else
+ {
+ _cursor_inp.at(s) = _cursor_out.at(r);
+ s++;
+ }
+ }
+}
+
+uint32_t TensorPackEnumerator::inp_num(void) const { return _num_inp; }
+
+uint32_t TensorPackEnumerator::inp_element(void) const
+{
+ uint32_t rank = _rank_out - 1;
+ uint32_t element = 0;
+ for (uint32_t r = 0; r < rank; ++r)
+ {
+ uint32_t dim = _boundary_inp.at(r);
+ element = element * dim + _cursor_inp.at(r);
+ }
+ return element;
+}
+
+uint32_t TensorPackEnumerator::out_element(void) const
+{
+ uint32_t rank = _rank_out;
+ uint32_t element = 0;
+ for (uint32_t r = 0; r < rank; ++r)
+ {
+ uint32_t dim = _boundary_out.at(r);
+ element = element * dim + _cursor_out.at(r);
+ }
+ return element;
+}
+
+} // namespace moco
diff --git a/compiler/moco/pass/src/TensorPackEnumerator.h b/compiler/moco/pass/src/TensorPackEnumerator.h
new file mode 100644
index 000000000..efdec3eb6
--- /dev/null
+++ b/compiler/moco/pass/src/TensorPackEnumerator.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TENSOR_PACK_ENUMERATOR_H__
+#define __MOCO_TENSOR_PACK_ENUMERATOR_H__
+
+#include <loco/IR/TensorShape.h>
+
+#include <vector>
+
+namespace moco
+{
+
+using u32v_t = std::vector<uint32_t>;
+
+class TensorPackEnumerator
+{
+public:
+ TensorPackEnumerator() = default;
+
+public:
+ void shape(const loco::TensorShape &si, const loco::TensorShape &so);
+ void axis(uint32_t axis) { _axis = axis; }
+
+public:
+ void start(void);
+ bool valid(void);
+ void advance(void);
+
+public:
+ uint32_t inp_num(void) const;
+ uint32_t inp_element(void) const;
+ uint32_t out_element(void) const;
+
+private:
+ void increment(uint32_t);
+
+private:
+ loco::TensorShape _shape_inp;
+ loco::TensorShape _shape_out;
+
+ uint32_t _axis = 0;
+ uint32_t _rank_out = 0;
+ uint32_t _num_inp = 0;
+ u32v_t _cursor_inp;
+ u32v_t _cursor_out;
+ u32v_t _boundary_inp;
+ u32v_t _boundary_out;
+};
+
+} // namespace moco
+
+#endif // __MOCO_TENSOR_PACK_ENUMERATOR_H__
diff --git a/compiler/moco/pass/src/TensorSliceEnumerator.cpp b/compiler/moco/pass/src/TensorSliceEnumerator.cpp
new file mode 100644
index 000000000..58bd0554c
--- /dev/null
+++ b/compiler/moco/pass/src/TensorSliceEnumerator.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorSliceEnumerator.h"
+
+#include <cassert>
+
+namespace moco
+{
+
+void TensorSliceEnumerator::shape(loco::TensorShape &s)
+{
+ _shape_in = s;
+ _rank_in = _shape_in.rank();
+}
+
+void TensorSliceEnumerator::increment(uint32_t r)
+{
+ if (_cursor.at(r) < _boundary.at(r))
+ _cursor.at(r) = _cursor.at(r) + 1;
+ else
+ {
+ if (r > 0)
+ {
+ _cursor.at(r) = _begin[r];
+ increment(r - 1);
+ }
+ else
+ {
+ // reached to the end
+ }
+ }
+}
+
+void TensorSliceEnumerator::start(void)
+{
+ auto rank = _rank_in;
+
+ _cursor.resize(rank);
+ _boundary.resize(rank);
+ for (uint32_t r = 0; r < rank; ++r)
+ {
+ _cursor.at(r) = _begin[r];
+ _boundary.at(r) = _end[r];
+ }
+}
+
+bool TensorSliceEnumerator::valid(void)
+{
+ auto rank = _rank_in;
+ for (uint32_t r = 0; r < rank; ++r)
+ {
+ if (_cursor.at(r) >= _boundary.at(r))
+ return false;
+ }
+ return true;
+}
+
+void TensorSliceEnumerator::advance(void)
+{
+ uint32_t r = _rank_in - 1;
+ increment(r);
+}
+
+uint32_t TensorSliceEnumerator::cursor(uint32_t rank) const
+{
+ assert(rank < _rank_in);
+ return _cursor.at(rank);
+}
+
+} // namespace moco
diff --git a/compiler/moco/pass/src/TensorSliceEnumerator.h b/compiler/moco/pass/src/TensorSliceEnumerator.h
new file mode 100644
index 000000000..c8206fe9d
--- /dev/null
+++ b/compiler/moco/pass/src/TensorSliceEnumerator.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_TENSOR_SLICE_ENUMERATOR_H__
+#define __MOCO_TENSOR_SLICE_ENUMERATOR_H__
+
+#include <loco/IR/TensorShape.h>
+
+#include <vector>
+
+namespace moco
+{
+
+using u32v_t = std::vector<uint32_t>;
+
+class TensorSliceEnumerator
+{
+public:
+ TensorSliceEnumerator() = default;
+
+public:
+ void shape(loco::TensorShape &s);
+ void begin(u32v_t &b) { _begin = b; }
+ void end(u32v_t &e) { _end = e; }
+
+public:
+ void start(void);
+ bool valid(void);
+ void advance(void);
+
+ uint32_t cursor(uint32_t rank) const;
+ const u32v_t cursor(void) const { return _cursor; }
+
+private:
+ void increment(uint32_t);
+
+private:
+ loco::TensorShape _shape_in;
+
+ uint32_t _rank_in = 0;
+ u32v_t _cursor;
+ u32v_t _boundary;
+ u32v_t _begin;
+ u32v_t _end;
+};
+
+} // namespace moco
+
+#endif // __MOCO_TENSOR_SLICE_ENUMERATOR_H__
diff --git a/compiler/moco/pass/src/TensorSliceEnumerator.test.cpp b/compiler/moco/pass/src/TensorSliceEnumerator.test.cpp
new file mode 100644
index 000000000..078fe423f
--- /dev/null
+++ b/compiler/moco/pass/src/TensorSliceEnumerator.test.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorSliceEnumerator.h"
+
+#include <gtest/gtest.h>
+
+TEST(TensorSliceEnumeratorTest, basic_vector)
+{
+ moco::TensorSliceEnumerator iter;
+ loco::TensorShape shape;
+ uint32_t rank = 1;
+
+ shape.rank(rank);
+ shape.dim(0) = loco::Dimension(4);
+
+ std::vector<uint32_t> begin = {1};
+ std::vector<uint32_t> end = {3};
+
+ iter.shape(shape);
+ iter.begin(begin);
+ iter.end(end);
+
+ for (iter.start(); iter.valid(); iter.advance())
+ {
+ for (uint32_t r = 0; r < rank; ++r)
+ {
+ printf("%d ", iter.cursor(r));
+ }
+ printf("\n");
+ }
+
+ GTEST_SUCCEED();
+}
+
+TEST(TensorSliceEnumeratorTest, basic_matrix)
+{
+ moco::TensorSliceEnumerator etor;
+ loco::TensorShape shape;
+ uint32_t rank = 2;
+
+ shape.rank(rank);
+ shape.dim(0) = loco::Dimension(5);
+ shape.dim(1) = loco::Dimension(5);
+
+ std::vector<uint32_t> begin = {1, 1};
+ std::vector<uint32_t> end = {2, 4};
+ std::vector<uint32_t> offset;
+ std::vector<uint32_t> cursor;
+
+ etor.shape(shape);
+ etor.begin(begin);
+ etor.end(end);
+
+ for (etor.start(); etor.valid(); etor.advance())
+ {
+ cursor = etor.cursor();
+ assert(cursor.size() == begin.size());
+
+ offset.resize(cursor.size());
+ for (uint32_t r = 0; r < cursor.size(); r++)
+ {
+ offset.at(r) = cursor.at(r) - begin.at(r);
+ std::cout << offset.at(r) << " ";
+ }
+ std::cout << std::endl;
+ }
+
+ GTEST_SUCCEED();
+}
diff --git a/compiler/moco/pass/src/TestHelper.h b/compiler/moco/pass/src/TestHelper.h
new file mode 100644
index 000000000..b97491dba
--- /dev/null
+++ b/compiler/moco/pass/src/TestHelper.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TEST_HELPER_H__
+#define __TEST_HELPER_H__
+
+#include <loco.h>
+
+#include <moco/Support/NodeAs.h>
+
+namespace moco
+{
+namespace test
+{
+
+template <typename T> T *find_first_node_bytype(loco::Graph *g)
+{
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto T_node = as<T>(node))
+ {
+ return T_node;
+ }
+ }
+
+ return nullptr;
+}
+
+template <typename T> std::vector<T *> find_nodes_bytype(loco::Graph *g)
+{
+ std::vector<T *> find_nodes;
+
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto T_node = as<T>(node))
+ {
+ find_nodes.push_back(T_node);
+ }
+ }
+
+ return find_nodes;
+}
+
+/**
+ * @brief Append setup output of graph by adding loco::Push node
+ *
+ * @note This is subject to change when loco changes I/O treatment
+ */
+void setup_output_node(loco::Graph *graph, loco::Node *last_node);
+
+} // namespace test
+} // namespace moco
+
+#endif // __TEST_HELPER_H__
diff --git a/compiler/moco/pass/src/TestHelper.test.cpp b/compiler/moco/pass/src/TestHelper.test.cpp
new file mode 100644
index 000000000..59915d60f
--- /dev/null
+++ b/compiler/moco/pass/src/TestHelper.test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TestHelper.h"
+
+namespace moco
+{
+namespace test
+{
+
+void setup_output_node(loco::Graph *graph, loco::Node *last_node)
+{
+ // add push as output
+ auto push_node = graph->nodes()->create<loco::Push>();
+ push_node->from(last_node);
+
+ // set the graph output name and node object
+ auto graph_output = graph->outputs()->create();
+ graph_output->name("output");
+ graph_output->dtype(loco::DataType::FLOAT32);
+ loco::link(graph_output, push_node);
+}
+
+} // namespace test
+} // namespace moco
diff --git a/compiler/moco/requires.cmake b/compiler/moco/requires.cmake
new file mode 100644
index 000000000..1a7d36454
--- /dev/null
+++ b/compiler/moco/requires.cmake
@@ -0,0 +1,8 @@
+require("loco")
+require("locop")
+require("stdex")
+require("moco-log")
+require("plier-tf")
+require("mio-tf")
+require("logo")
+require("oops")
diff --git a/compiler/moco/service/CMakeLists.txt b/compiler/moco/service/CMakeLists.txt
new file mode 100644
index 000000000..dff0233b1
--- /dev/null
+++ b/compiler/moco/service/CMakeLists.txt
@@ -0,0 +1,24 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(moco_service SHARED ${SOURCES})
+target_include_directories(moco_service PRIVATE src)
+target_include_directories(moco_service PUBLIC include)
+target_link_libraries(moco_service PUBLIC loco)
+target_link_libraries(moco_service PUBLIC moco_lang)
+target_link_libraries(moco_service PRIVATE moco_support)
+target_link_libraries(moco_service PRIVATE nncc_common)
+target_link_libraries(moco_service PRIVATE stdex)
+target_link_libraries(moco_service PRIVATE oops)
+install(TARGETS moco_service DESTINATION lib)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(moco_service_test ${TESTS})
+target_include_directories(moco_service_test PRIVATE src)
+target_link_libraries(moco_service_test moco_service)
diff --git a/compiler/moco/service/README.md b/compiler/moco/service/README.md
new file mode 100644
index 000000000..78906dbfe
--- /dev/null
+++ b/compiler/moco/service/README.md
@@ -0,0 +1,3 @@
+# service
+
+`service` provides TensorFlow Dialect Services
diff --git a/compiler/moco/service/include/moco/Service/TFShapeInferenceRule.h b/compiler/moco/service/include/moco/Service/TFShapeInferenceRule.h
new file mode 100644
index 000000000..98d716c2a
--- /dev/null
+++ b/compiler/moco/service/include/moco/Service/TFShapeInferenceRule.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_SERVICE_SHAPE_INFERENCE_RULE_H__
+#define __MOCO_SERVICE_SHAPE_INFERENCE_RULE_H__
+
+#include <loco/Service/ShapeInferenceRule.h>
+
+namespace moco
+{
+
+/**
+ * @brief Shape inference rule for TensorFlow dialect
+ */
+struct TFShapeInferenceRule final : public loco::ShapeInferenceRule
+{
+ bool support(const API &ver) const final;
+ bool recognize(const loco::Dialect *) const final;
+ bool infer(const loco::Node *, loco::NodeShape &) const final;
+ void infer(const Context *, const loco::Node *, Sink *) const final;
+};
+
+} // namespace moco
+
+#endif // __MOCO_SERVICE_SHAPE_INFERENCE_RULE_H__
diff --git a/compiler/moco/service/include/moco/Service/TFTypeInferenceRule.h b/compiler/moco/service/include/moco/Service/TFTypeInferenceRule.h
new file mode 100644
index 000000000..f712fdb01
--- /dev/null
+++ b/compiler/moco/service/include/moco/Service/TFTypeInferenceRule.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_SERVICE_TYPE_INFERENCE_RULE_H__
+#define __MOCO_SERVICE_TYPE_INFERENCE_RULE_H__
+
+#include <loco/Service/TypeInference.h>
+
+namespace moco
+{
+
+/**
+ * @brief Type Inference Rule for TFDialect
+ */
+struct TFTypeInferenceRule final : public loco::TypeInferenceRule
+{
+ bool recognize(const loco::Dialect *) const final;
+ bool infer(const loco::Node *, loco::DataType &) const final;
+};
+
+} // namespace moco
+
+#endif // __MOCO_SERVICE_TYPE_INFERENCE_RULE_H__
diff --git a/compiler/moco/service/src/Service/TFShapeInferenceRule.cpp b/compiler/moco/service/src/Service/TFShapeInferenceRule.cpp
new file mode 100644
index 000000000..6d122c863
--- /dev/null
+++ b/compiler/moco/service/src/Service/TFShapeInferenceRule.cpp
@@ -0,0 +1,891 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Service/TFShapeInferenceRule.h"
+
+#include <moco/Support/TFShapeInferenceHelper.h>
+
+#include "moco/IR/TFDialect.h"
+#include "moco/IR/TFNode.h"
+
+#include <loco/IR/NodeShape.h>
+#include <loco/Service/ShapeInference.h>
+
+#include <oops/UserExn.h>
+
+#include <cassert>
+#include <cmath>
+
+namespace
+{
+
+class ShapeInferenceAlgorithm final : public moco::TFNodeVisitor<loco::NodeShape>
+{
+public:
+ ShapeInferenceAlgorithm(const loco::ShapeInferenceRule::Context *ctx) : _ctx{ctx}
+ {
+ // DO NOTHING
+ }
+
+private:
+ const loco::ShapeInferenceRule::Context *_ctx;
+
+private:
+ bool shape_known(const loco::Node *node) const { return _ctx->known(node); }
+ loco::NodeShape node_shape(const loco::Node *node) const { return _ctx->get(node); }
+
+private:
+ loco::NodeShape binary_node_shape(const moco::TFNode::Node *node)
+ {
+ // This helper works only for binary node.
+ assert(node->arity() == 2);
+
+ auto lhs_shape = node_shape(node->arg(0));
+ auto rhs_shape = node_shape(node->arg(1));
+
+ loco::TensorShape lhs_tensorshape = lhs_shape.as<loco::TensorShape>();
+ loco::TensorShape rhs_tensorshape = rhs_shape.as<loco::TensorShape>();
+ loco::TensorShape sum_tensorshape = moco::broadcast_shape(lhs_tensorshape, rhs_tensorshape);
+
+ loco::NodeShape sum_shape({sum_tensorshape});
+
+ return sum_shape;
+ }
+
+ loco::NodeShape node_shape_with_check(const moco::TFNode::Node *node)
+ {
+ auto nodeshape = node_shape(node);
+ assert(nodeshape.domain() == loco::Domain::Tensor);
+
+ return nodeshape;
+ }
+
+ bool valid_scalar_value(moco::TFConst *node)
+ {
+ auto nodeshape = node_shape(node);
+ if (nodeshape.domain() != loco::Domain::Tensor)
+ {
+ return false;
+ }
+ if (node->dtype() != loco::DataType::S32)
+ {
+ return false;
+ }
+
+ auto tensor_shape = nodeshape.as<loco::TensorShape>();
+ if (!(tensor_shape.rank() == 0 || tensor_shape.rank() == 1))
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+ int32_t scalar_value(moco::TFConst *node)
+ {
+ auto nodeshape = node_shape(node);
+ assert(node->dtype() == loco::DataType::S32);
+
+ auto tensor_shape = nodeshape.as<loco::TensorShape>();
+ assert(tensor_shape.rank() == 0 || tensor_shape.rank() == 1);
+
+ return node->at<loco::DataType::S32>(0);
+ }
+
+public:
+ loco::NodeShape visit(const moco::TFAdd *node) final { return binary_node_shape(node); }
+
+ loco::NodeShape visit(const moco::TFAvgPool *node) final
+ {
+ auto value_shape = node_shape(node->value());
+ assert(value_shape.domain() != loco::Domain::Unknown);
+
+ moco::PlaneInference infer_plane_shape;
+
+ infer_plane_shape.padding(node->padding());
+ infer_plane_shape.stride(moco::stride_of(node->strides(), node->data_layout()));
+ infer_plane_shape.window(moco::window_of(node->ksize(), node->data_layout()));
+
+ auto input_feature_shape = moco::as_feature_shape(value_shape, node->data_layout());
+ auto input_plane_shape = moco::make_plane_shape(input_feature_shape);
+ auto output_feature_shape = input_feature_shape;
+ auto output_plane_shape = infer_plane_shape(input_plane_shape);
+
+ moco::update(output_feature_shape).with(output_plane_shape);
+
+ return moco::as_tensor_shape(output_feature_shape, node->data_layout());
+ }
+
+ loco::NodeShape visit(const moco::TFBiasAdd *node) final
+ {
+ return node_shape_with_check(node->value());
+ }
+
+ loco::NodeShape visit(const moco::TFConcatV2 *node) final
+ {
+ // axis shape should be available
+ auto axis_node = node->axis();
+ auto axis_shape = node_shape(axis_node);
+ assert(axis_shape.domain() != loco::Domain::Unknown);
+
+ // check all input shapes and all ranks should be same
+ auto value_a = node->values(0);
+ auto value_a_shape = node_shape(value_a);
+ assert(value_a_shape.domain() == loco::Domain::Tensor);
+ auto value_a_tensor_shape = value_a_shape.as<loco::TensorShape>();
+ uint32_t a_rank = value_a_tensor_shape.rank();
+
+ uint32_t num_values = node->num_values();
+ for (uint32_t ni = 1; ni < num_values; ++ni)
+ {
+ auto value_b = node->values(ni);
+ auto value_b_shape = node_shape(value_b);
+ assert(value_b_shape.domain() == loco::Domain::Tensor);
+ auto value_b_tensor_shape = value_b_shape.as<loco::TensorShape>();
+ assert(a_rank == value_b_tensor_shape.rank());
+ }
+
+ int32_t axis_value = 0;
+ bool axis_available = false;
+ {
+ // check for axis is TFConst
+ auto tfconst = dynamic_cast<moco::TFConst *>(axis_node);
+ if (tfconst != nullptr)
+ {
+ if (valid_scalar_value(tfconst))
+ {
+ axis_value = scalar_value(tfconst);
+ axis_available = true;
+ }
+ }
+ }
+ if (!axis_available)
+ {
+ // TODO may need to refine error message
+ throw oops::UserExn("ConcatV2 node does not have axis input", node->name());
+ }
+
+ uint32_t axis_absolute = (axis_value >= 0) ? axis_value : (int32_t)a_rank + axis_value;
+ loco::TensorShape output_tensor_shape = value_a_tensor_shape;
+
+ for (uint32_t index = 0; index < a_rank; ++index)
+ {
+ if (value_a_tensor_shape.dim(index).known())
+ {
+ uint32_t dim = value_a_tensor_shape.dim(index).value();
+ uint32_t dim_acc = dim;
+
+ for (uint32_t ni = 1; ni < num_values; ++ni)
+ {
+ auto value_b = node->values(ni);
+ auto value_b_shape = node_shape(value_b);
+ assert(value_b_shape.domain() == loco::Domain::Tensor);
+ auto value_b_tensor_shape = value_b_shape.as<loco::TensorShape>();
+ assert(value_b_tensor_shape.dim(index).known());
+ if (index == axis_absolute)
+ dim_acc += value_b_tensor_shape.dim(index).value();
+ else
+ assert(dim == value_b_tensor_shape.dim(index).value());
+ }
+ output_tensor_shape.dim(index) = dim_acc;
+ }
+ else
+ output_tensor_shape.dim(index).unset();
+ }
+ return loco::NodeShape(output_tensor_shape);
+ }
+
+ loco::NodeShape visit(const moco::TFConst *node) final
+ {
+ loco::TensorShape output_tensor_shape;
+
+ uint32_t rank = node->rank();
+ output_tensor_shape.rank(rank);
+ for (uint32_t index = 0; index < rank; ++index)
+ {
+ if (node->dim(index).known())
+ output_tensor_shape.dim(index) = node->dim(index).value();
+ else
+ output_tensor_shape.dim(index).unset();
+ }
+
+ return loco::NodeShape(output_tensor_shape);
+ }
+
+ loco::NodeShape visit(const moco::TFConv2D *node) final
+ {
+ auto input_shape = moco::node_shape(node->input());
+ auto ker_shape = moco::node_shape(node->filter());
+ auto ker_tensor_shape = ker_shape.as<loco::TensorShape>(); // in HWIO
+ auto node_stride = moco::stride_of(node->strides(), node->data_layout());
+ auto node_window = moco::window_of(ker_tensor_shape, "HWIO");
+
+ moco::PlaneInference infer_plane_shape;
+
+ infer_plane_shape.padding(node->padding());
+ infer_plane_shape.stride(node_stride);
+ infer_plane_shape.window(node_window);
+
+ auto input_feature_shape = moco::as_feature_shape(input_shape, node->data_layout());
+ auto input_plane_shape = moco::make_plane_shape(input_feature_shape);
+ // output count is from input count, depth is from kernel 'O' which is dim(3)
+ auto output_feature_shape = input_feature_shape;
+ output_feature_shape.depth() = ker_tensor_shape.dim(3).value();
+
+ auto output_plane_shape = infer_plane_shape(input_plane_shape);
+
+ moco::update(output_feature_shape).with(output_plane_shape);
+
+ return moco::as_tensor_shape(output_feature_shape, node->data_layout());
+ }
+
+ loco::NodeShape visit(const moco::TFConv2DBackpropInput *node) final
+ {
+ // TFConv2DBackpropInput's first input, named 'input_sizes', actually contains shape of node
+ // output's feature map. We can get shape of TFConv2DBackpropInput by just copying this.
+ // TODO Support when 'input_sizes' is not TFConst, or support constant folding
+ auto input_sizes_node = dynamic_cast<moco::TFConst *>(node->input_sizes());
+ if (input_sizes_node == nullptr)
+ {
+ // we are now supporting somekind of constant folding for this node, wait till it is finished
+ loco::NodeShape unknown;
+ return unknown;
+ }
+
+ // Let's support S32 for time being
+ // TODO Support other integer types
+ assert(input_sizes_node->dtype() == loco::DataType::S32);
+ assert(input_sizes_node->size<loco::DataType::S32>() == 4);
+
+ // copy!
+ loco::TensorShape ofm_tensor_shape;
+ ofm_tensor_shape.rank(4);
+ for (uint32_t i = 0; i < 4; ++i)
+ {
+ int32_t dim = input_sizes_node->at<loco::DataType::S32>(i);
+ assert(dim > 0);
+ ofm_tensor_shape.dim(i) = (uint32_t)dim;
+ }
+
+ return loco::NodeShape(ofm_tensor_shape);
+ }
+
+ loco::NodeShape visit(const moco::TFDepthwiseConv2dNative *node) final
+ {
+ auto input_shape = moco::node_shape(node->input()); // NHWC
+ auto ker_shape = moco::node_shape(node->filter());
+ auto ker_tensor_shape = ker_shape.as<loco::TensorShape>(); // in HWCM
+ auto node_stride = moco::stride_of(node->strides(), node->data_layout());
+ auto node_window = moco::window_of(ker_tensor_shape, "HWCM");
+
+ moco::PlaneInference infer_plane_shape;
+
+ infer_plane_shape.padding(node->padding());
+ infer_plane_shape.stride(node_stride);
+ infer_plane_shape.window(node_window);
+
+ auto input_feature_shape = moco::as_feature_shape(input_shape, node->data_layout());
+ auto input_plane_shape = moco::make_plane_shape(input_feature_shape);
+ // output count is from input count, depth is from kernel 'CM' which is dim(2) * dim(3)
+ auto output_feature_shape = input_feature_shape;
+ output_feature_shape.depth() =
+ loco::Dimension(ker_tensor_shape.dim(2).value() * ker_tensor_shape.dim(3).value());
+
+ auto output_plane_shape = infer_plane_shape(input_plane_shape);
+
+ moco::update(output_feature_shape).with(output_plane_shape);
+
+ return moco::as_tensor_shape(output_feature_shape, node->data_layout());
+ }
+
+ loco::NodeShape visit(const moco::TFFakeQuantWithMinMaxVars *node) final
+ {
+ return node_shape_with_check(node->inputs());
+ }
+
+ loco::NodeShape visit(const moco::TFFusedBatchNorm *node) final
+ {
+ return node_shape_with_check(node->x());
+ }
+
+ loco::NodeShape visit(const moco::TFIdentity *node) final
+ {
+ return node_shape_with_check(node->input());
+ }
+
+ loco::NodeShape visit(const moco::TFMaximum *node) final { return binary_node_shape(node); }
+
+ loco::NodeShape visit(const moco::TFMaxPool *node) final
+ {
+ auto input_shape = node_shape(node->input());
+ assert(input_shape.domain() != loco::Domain::Unknown);
+
+ moco::PlaneInference infer_plane_shape;
+
+ infer_plane_shape.padding(node->padding());
+ infer_plane_shape.stride(moco::stride_of(node->strides(), node->data_layout()));
+ infer_plane_shape.window(moco::window_of(node->ksize(), node->data_layout()));
+
+ auto input_feature_shape = moco::as_feature_shape(input_shape, node->data_layout());
+ auto input_plane_shape = moco::make_plane_shape(input_feature_shape);
+ auto output_feature_shape = input_feature_shape;
+ auto output_plane_shape = infer_plane_shape(input_plane_shape);
+
+ moco::update(output_feature_shape).with(output_plane_shape);
+
+ return moco::as_tensor_shape(output_feature_shape, node->data_layout());
+ }
+
+ loco::NodeShape visit(const moco::TFMean *node) final
+ {
+ auto input_shape = node_shape(node->input());
+ auto reduction_indices = node->reduction_indices();
+
+ // Get constant values if reduction_indices is const
+ std::vector<int32_t> reduction_values;
+ if (auto tfconst = dynamic_cast<moco::TFConst *>(reduction_indices))
+ {
+ assert(tfconst->dtype() == loco::DataType::S32);
+ auto const_size = tfconst->size<loco::DataType::S32>();
+ for (uint32_t i = 0; i < const_size; ++i)
+ {
+ int32_t axis = tfconst->at<loco::DataType::S32>(i);
+ if (axis < 0)
+ axis += input_shape.as<loco::TensorShape>().rank();
+ reduction_values.push_back(axis);
+ }
+ }
+ else
+ {
+ // we cannot find a valid reduction indices value
+ loco::NodeShape unknown;
+ return unknown;
+ }
+
+ loco::TensorShape output_shape;
+ auto input_tensor_shape = input_shape.as<loco::TensorShape>();
+
+ if (node->keep_dims())
+ {
+ output_shape.rank(input_tensor_shape.rank());
+ for (uint32_t i = 0; i < input_tensor_shape.rank(); ++i)
+ output_shape.dim(i) = input_tensor_shape.dim(i);
+ for (uint32_t i = 0; i < reduction_values.size(); ++i)
+ output_shape.dim(reduction_values.at(i)) = 1;
+ }
+ else
+ {
+ std::vector<bool> check_reduce(input_tensor_shape.rank(), false);
+ for (uint32_t i = 0; i < reduction_values.size(); ++i)
+ check_reduce.at(reduction_values.at(i)) = true;
+
+ uint32_t reduce_cnt = 0;
+ for (uint32_t i = 0; i < check_reduce.size(); ++i)
+ if (check_reduce.at(i))
+ ++reduce_cnt;
+
+ output_shape.rank(input_tensor_shape.rank() - reduce_cnt);
+ for (uint32_t i = 0, j = 0; i < check_reduce.size(); ++i)
+ if (check_reduce.at(i) == false)
+ output_shape.dim(j++) = i;
+ }
+
+ return loco::NodeShape(output_shape);
+ }
+
+ loco::NodeShape visit(const moco::TFMul *node) final { return binary_node_shape(node); }
+
+ loco::NodeShape visit(const moco::TFPack *node) final
+ {
+ loco::NodeShape unknown;
+
+ auto input_shape_0 = node_shape(node->values(0));
+ if (input_shape_0.domain() != loco::Domain::Tensor)
+ {
+ // TODO fix this for other cases
+ // We support only valid tensor shape for now
+ return unknown;
+ }
+ loco::TensorShape tensor_shape_0 = input_shape_0.as<loco::TensorShape>();
+
+ // all input shapes should be same
+ auto num_values = node->N();
+ for (uint32_t i = 1; i < num_values; ++i)
+ {
+ auto input_shape = node_shape(node->values(i));
+ if (input_shape.domain() != loco::Domain::Tensor)
+ {
+ // TODO ditto
+ return unknown;
+ }
+
+ loco::TensorShape tensor_shape = input_shape.as<loco::TensorShape>();
+ if (!(input_shape_0 == input_shape))
+ {
+ throw oops::UserExn("All input values shape should be same", node->name());
+ }
+ }
+
+ // output rank will be +1 of rank of the input
+ // axis should be in range of [-r, r), where r is rank of the output
+ auto axis = node->axis();
+ int32_t rank = static_cast<int32_t>(tensor_shape_0.rank());
+ assert(rank >= 0);
+ int32_t rank_output = rank + 1;
+ if (axis < -rank_output || rank_output <= axis)
+ {
+ throw oops::UserExn("axis is out of range", node->name());
+ }
+
+ auto axis_stack = (axis >= 0) ? axis : rank_output + axis;
+
+ loco::TensorShape output_tensor_shape;
+
+ output_tensor_shape.rank(rank_output);
+ for (int32_t r = 0; r < axis_stack; ++r)
+ {
+ output_tensor_shape.dim(r).set(tensor_shape_0.dim(r).value());
+ }
+ output_tensor_shape.dim(axis_stack).set(num_values);
+ for (int32_t r = axis_stack; r < rank; ++r)
+ {
+ output_tensor_shape.dim(r + 1).set(tensor_shape_0.dim(r).value());
+ }
+
+ return loco::NodeShape(output_tensor_shape);
+ }
+
+ loco::NodeShape visit(const moco::TFPad *node) final
+ {
+ auto input_shape = node_shape(node->input());
+ assert(input_shape.domain() == loco::Domain::Tensor);
+
+ auto const_paddings = dynamic_cast<moco::TFConst *>(node->paddings());
+ assert(const_paddings);
+ assert(const_paddings->dtype() == loco::DataType::S32);
+ assert(const_paddings->rank() == 2);
+
+ loco::TensorShape input_tensor_shape = input_shape.as<loco::TensorShape>();
+ loco::TensorShape output_tensor_shape;
+
+ output_tensor_shape.rank(input_tensor_shape.rank());
+ for (uint32_t axis = 0; axis < input_tensor_shape.rank(); ++axis)
+ {
+ output_tensor_shape.dim(axis) = input_tensor_shape.dim(axis).value() +
+ const_paddings->at<loco::DataType::S32>(axis * 2) +
+ const_paddings->at<loco::DataType::S32>(axis * 2 + 1);
+ }
+
+ return loco::NodeShape{output_tensor_shape};
+ }
+
+ loco::NodeShape visit(const moco::TFPlaceholder *node) final
+ {
+ loco::TensorShape output_tensor_shape;
+
+ uint32_t rank = node->rank();
+ output_tensor_shape.rank(rank);
+ for (uint32_t index = 0; index < rank; ++index)
+ {
+ if (node->dim(index).known())
+ output_tensor_shape.dim(index) = node->dim(index).value();
+ else
+ output_tensor_shape.dim(index).unset();
+ }
+
+ return loco::NodeShape(output_tensor_shape);
+ }
+
+ loco::NodeShape visit(const moco::TFRealDiv *node) final { return binary_node_shape(node); }
+
+ loco::NodeShape visit(const moco::TFRelu *node) final
+ {
+ return node_shape_with_check(node->features());
+ }
+
+ loco::NodeShape visit(const moco::TFRelu6 *node) final
+ {
+ return node_shape_with_check(node->features());
+ }
+
+ loco::NodeShape visit(const moco::TFReshape *node) final
+ {
+ loco::NodeShape unknown;
+
+ // For now, we only consider Fixed Reshape, i.e. Reshape with determined
+ // 'shape' input. So here we only support case when 'shape' input of
+ // TFReshape is TFConst. If 'shape' input is not TFConst, another
+ // transform (e.g. constant folding) should be done beforehand to make
+ // it TFConst.
+ // TODO Support dynamic Reshape
+ // Note that 'shape()' here is 'shape' input, not node's shape information
+ auto const_shape_input = dynamic_cast<moco::TFConst *>(node->shape());
+ if (!const_shape_input)
+ {
+ // 'shape' input of TFReshape is not TFConst, we can not do shape inference
+ return unknown;
+ }
+
+ // 'Shape' input should be integer tensor of rank 1, e.g. [2, 3, 4] or [3, -1]
+ assert(const_shape_input->dtype() == loco::DataType::S32);
+ assert(const_shape_input->rank() == 1);
+
+ auto shape_rank = const_shape_input->dim(0).value();
+ assert(shape_rank > 0);
+
+ loco::TensorShape output_shape;
+ output_shape.rank(shape_rank);
+ for (uint32_t axis = 0; axis < shape_rank; ++axis)
+ {
+ auto shape_dim = const_shape_input->at<loco::DataType::S32>(axis);
+ if (shape_dim == -1)
+ {
+ // Reshape's new shape has wildcard dimension, i.e. dynamic reshape
+ return unknown;
+ }
+ assert(shape_dim >= 1);
+ output_shape.dim(axis) = shape_dim;
+ }
+
+ // TODO Compare 'tensor' input and validate coherency?
+ // Not sure this is appropriate stage for this task.
+
+ return loco::NodeShape(output_shape);
+ }
+
+ loco::NodeShape visit(const moco::TFRsqrt *node) final
+ {
+ return node_shape_with_check(node->x());
+ }
+
+ loco::NodeShape visit(const moco::TFShape *node) final
+ {
+ auto input_shape = node_shape(node->input());
+ auto input_tensor_shape = input_shape.as<loco::TensorShape>();
+
+ loco::TensorShape output_shape;
+
+ // Note that input shape becomes node(TFShape)'s value
+ output_shape.rank(1);
+ output_shape.dim(0) = input_tensor_shape.rank();
+
+ return loco::NodeShape(output_shape);
+ }
+
+ loco::NodeShape visit(const moco::TFSoftmax *node) final
+ {
+ return node_shape_with_check(node->logits());
+ }
+
+ loco::NodeShape visit(const moco::TFSqrt *node) final { return node_shape_with_check(node->x()); }
+
+ loco::NodeShape visit(const moco::TFSquaredDifference *node) final
+ {
+ return binary_node_shape(node);
+ }
+
+ loco::NodeShape visit(const moco::TFSqueeze *node) final
+ {
+ auto input_shape = node_shape(node->input());
+
+ // TODO Not sure Squeeze only get input as Tensor
+ // Note that tensor_shape() has assertion in it
+ auto input_tensor_shape = input_shape.as<loco::TensorShape>();
+
+ auto squeeze_dims_vec = node->squeeze_dims();
+ std::set<int64_t> squeeze_dims(squeeze_dims_vec.cbegin(), squeeze_dims_vec.cend());
+
+ loco::TensorShape output_shape;
+ uint32_t output_rank = 0;
+
+ if (squeeze_dims.empty())
+ {
+ // Remove all dimensions whose value is 1
+ for (uint32_t axis = 0; axis < input_tensor_shape.rank(); ++axis)
+ {
+ assert(input_tensor_shape.dim(axis).known());
+ auto dim = input_tensor_shape.dim(axis).value();
+ if (dim != 1)
+ {
+ assert(dim > 1);
+ output_shape.rank(++output_rank);
+ output_shape.dim(output_rank - 1) = dim;
+ }
+ }
+ }
+ else
+ {
+ uint32_t input_rank = input_tensor_shape.rank();
+
+ // Sanity check for 'squeeze_dims'
+ auto is_valid_squeeze_dims = [&squeeze_dims, &input_rank]() {
+ if (!(squeeze_dims.size() < input_rank))
+ return false;
+ for (auto squeeze_dim : squeeze_dims)
+ {
+ if (!(squeeze_dim >= -(int64_t)input_rank))
+ return false;
+ if (!(squeeze_dim < (int64_t)input_rank))
+ return false;
+ }
+ return true;
+ };
+
+ if (!is_valid_squeeze_dims())
+ {
+ throw oops::UserExn("Invalid squeeze dimension", node->name());
+ }
+
+ // Resolve negative squeeze dimension
+ std::set<int64_t> resolved_squeeze_dims;
+ for (auto squeeze_dim : squeeze_dims)
+ {
+ if (squeeze_dim < 0)
+ resolved_squeeze_dims.insert(squeeze_dim + (int64_t)input_rank);
+ else
+ resolved_squeeze_dims.insert(squeeze_dim);
+ }
+
+ // Remove squeeze dimensions only
+ for (uint32_t axis = 0; axis < input_rank; ++axis)
+ {
+ assert(input_tensor_shape.dim(axis).known());
+ auto dim = input_tensor_shape.dim(axis).value();
+ if (resolved_squeeze_dims.find((int64_t)axis) == resolved_squeeze_dims.cend())
+ {
+ // Not squeeze dim
+ output_shape.rank(++output_rank);
+ output_shape.dim(output_rank - 1) = dim;
+ }
+ else
+ {
+ // Is squeeze dim
+ assert(dim == 1);
+ // DO NOTHING
+ }
+ }
+ }
+
+ assert(output_shape.rank() > 0);
+
+ return loco::NodeShape(output_shape);
+ }
+
+ loco::NodeShape visit(const moco::TFStopGradient *node) final
+ {
+ return node_shape_with_check(node->input());
+ }
+
+ loco::NodeShape visit(const moco::TFStridedSlice *node) final
+ {
+ loco::NodeShape unknown;
+ auto input_shape = node_shape(node->input());
+ if (input_shape.domain() != loco::Domain::Tensor)
+ {
+ // TODO fix this for other cases
+ // We support only tensor shape for now
+ return unknown;
+ }
+
+ // TODO support full mask features: see import codes also
+ // Limited attributes for now
+ assert(node->begin_mask() == 0);
+ assert(node->end_mask() == 0);
+ assert(node->ellipsis_mask() == 0);
+ assert(node->shrink_axis_mask() == 1);
+
+ auto const_begin = dynamic_cast<moco::TFConst *>(node->begin());
+ auto const_end = dynamic_cast<moco::TFConst *>(node->end());
+ auto const_strides = dynamic_cast<moco::TFConst *>(node->strides());
+
+ assert(dynamic_cast<moco::TFConst *>(node->input()) != nullptr);
+ assert(const_begin != nullptr);
+ assert(const_end != nullptr);
+ assert(const_strides != nullptr);
+
+ auto input_tensor_shape = input_shape.as<loco::TensorShape>();
+ auto input_rank = input_tensor_shape.rank();
+ auto output_rank = input_rank;
+
+ // TODO support strides with > 1
+ uint32_t elements = const_strides->size<loco::DataType::S32>();
+ for (uint32_t e = 0; e < elements; ++e)
+ assert(const_strides->at<loco::DataType::S32>(e) == 1);
+
+ // lets apply begin ~ end range from input shape
+ loco::TensorShape output_shape_range;
+
+ output_shape_range.rank(input_rank);
+ for (uint32_t r = 0; r < input_rank; ++r)
+ {
+ // TODO apply begin/end mask
+ // TODO apply ellipsis mask
+ // TODO apply strides
+ auto end = const_end->at<loco::DataType::S32>(r);
+ auto begin = const_begin->at<loco::DataType::S32>(r);
+ auto size = end - begin;
+ output_shape_range.dim(r).set(size);
+ }
+
+ // get final tensor shape from applying shrink mask to output_shape_range
+ loco::TensorShape output_tensor_shape;
+
+ if (node->shrink_axis_mask() != 0)
+ {
+ for (uint32_t rs = 0; rs < input_rank; ++rs)
+ {
+ int32_t bit = 1 << rs;
+ int32_t mask = node->shrink_axis_mask();
+ if (bit & mask)
+ {
+ // shrink one dimension
+ assert(output_rank > 0);
+ output_rank = output_rank - 1;
+ }
+ }
+ output_tensor_shape.rank(output_rank);
+ for (uint32_t rs = 0, rd = 0; rs < input_rank; ++rs)
+ {
+ int32_t bit = 1 << rs;
+ int32_t mask = node->shrink_axis_mask();
+ if ((bit & mask) == 0)
+ {
+ // use this dimension
+ output_tensor_shape.dim(rd).set(output_shape_range.dim(rs).value());
+ rd++;
+ }
+ // else this dimension is shrink-ed
+ }
+ }
+ else
+ {
+ output_tensor_shape = output_shape_range;
+ }
+
+ return loco::NodeShape(output_tensor_shape);
+ }
+
+ loco::NodeShape visit(const moco::TFSub *node) final { return binary_node_shape(node); }
+
+ loco::NodeShape visit(const moco::TFTanh *node) final { return node_shape_with_check(node->x()); }
+
+ // For virtual nodes
+ loco::NodeShape visit(const moco::TFPush *node) { return node_shape_with_check(node->from()); }
+
+public:
+ loco::NodeShape visit(const moco::TFNode *) final
+ {
+ loco::NodeShape unknown;
+ return unknown;
+ }
+};
+
+} // namespace
+
+namespace
+{
+namespace compat
+{
+
+struct Context final : public loco::ShapeInferenceRule::Context
+{
+ bool known(const loco::Node *node) const final { return loco::shape_known(node); }
+ loco::NodeShape get(const loco::Node *node) const final { return loco::shape_get(node); }
+};
+
+class Sink final : public loco::ShapeInferenceRule::Sink
+{
+public:
+ enum Status
+ {
+ Unknown,
+ Okay,
+ Fail,
+ };
+
+public:
+ const Status &status(void) const { return _status; }
+ const loco::NodeShape &shape(void) const { return _shape; }
+
+public:
+ void okay(const loco::NodeShape &shape) final
+ {
+ _status = Okay;
+ _shape = shape;
+ }
+
+ void fail(void) final
+ {
+ // Notify failrue
+ _status = Fail;
+ }
+
+private:
+ Status _status = Unknown;
+ loco::NodeShape _shape;
+};
+
+} // namespace compat
+} // namespace
+
+namespace moco
+{
+
+bool TFShapeInferenceRule::support(const API &api) const
+{
+ return api == API::V1 or api == API::V2;
+}
+
+bool TFShapeInferenceRule::recognize(const loco::Dialect *d) const
+{
+ // handle only TensorFlow dialect
+ return TFDialect::get() == d;
+}
+
+bool TFShapeInferenceRule::infer(const loco::Node *node, loco::NodeShape &shape) const
+{
+ ::compat::Context ctx;
+ ::compat::Sink sink;
+
+ infer(&ctx, node, &sink);
+
+ assert(sink.status() == ::compat::Sink::Okay or sink.status() == ::compat::Sink::Fail);
+
+ if (sink.status() == ::compat::Sink::Fail)
+ {
+ return false;
+ }
+
+ shape = sink.shape();
+
+ return true;
+}
+
+void TFShapeInferenceRule::infer(const Context *ctx, const loco::Node *node, Sink *sink) const
+{
+ assert(node->dialect() == TFDialect::get());
+ assert(dynamic_cast<const TFNode *>(node) != nullptr);
+
+ ShapeInferenceAlgorithm alg{ctx};
+ auto shape = dynamic_cast<const TFNode *>(node)->accept(&alg);
+
+ if (shape.domain() == loco::Domain::Unknown)
+ sink->fail();
+ else
+ sink->okay(shape);
+}
+
+} // namespace moco
diff --git a/compiler/moco/service/src/Service/TFShapeInferenceRule.test.cpp b/compiler/moco/service/src/Service/TFShapeInferenceRule.test.cpp
new file mode 100644
index 000000000..1e1b48ca7
--- /dev/null
+++ b/compiler/moco/service/src/Service/TFShapeInferenceRule.test.cpp
@@ -0,0 +1,500 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Service/TFShapeInferenceRule.h"
+
+#include "TestHelper.h"
+
+#include "moco/IR/TFNodes.h"
+
+#include <loco.h>
+#include <loco/Service/ShapeInference.h>
+
+#include <gtest/gtest.h>
+
+using namespace moco::test;
+
+namespace
+{
+
+moco::TFAvgPool *avgpool_network_simple1331(loco::Graph *graph)
+{
+ auto avgpool_node = graph->nodes()->create<moco::TFAvgPool>();
+
+ avgpool_node->data_layout("NHWC");
+ avgpool_node->ksize({1, 3, 3, 1});
+ avgpool_node->strides({1, 1, 1, 1});
+
+ // Dummy const node as ifm, just to fake TFShapeInferenceRule for TFAvgPool.
+ auto const_node = graph->nodes()->create<moco::TFConst>();
+ {
+ const_node->rank(4);
+ const_node->dim(0).set(1);
+ const_node->dim(1).set(3);
+ const_node->dim(2).set(3);
+ const_node->dim(3).set(1);
+ }
+ avgpool_node->value(const_node);
+
+ setup_output_node(graph, avgpool_node);
+
+ return avgpool_node;
+}
+
+} // namespace
+
+TEST(TFShapeInferenceRule, avgpool_same)
+{
+ moco::TFShapeInferenceRule shape_infer;
+ loco::Graph graph;
+
+ auto avgpool_node = avgpool_network_simple1331(&graph);
+ avgpool_node->padding("SAME");
+
+ bool cont = true;
+ while (cont)
+ {
+ cont = loco::apply(&shape_infer).to(&graph);
+ };
+
+ auto nodeshape = loco::shape_get(avgpool_node);
+ auto tshape = nodeshape.as<loco::TensorShape>();
+ ASSERT_EQ(tshape.rank(), 4);
+ ASSERT_EQ(tshape.dim(0).value(), 1);
+ ASSERT_EQ(tshape.dim(1).value(), 3);
+ ASSERT_EQ(tshape.dim(2).value(), 3);
+ ASSERT_EQ(tshape.dim(3).value(), 1);
+}
+
+TEST(TFShapeInferenceRule, avgpool_valid)
+{
+ moco::TFShapeInferenceRule shape_infer;
+ loco::Graph graph;
+
+ auto avgpool_node = avgpool_network_simple1331(&graph);
+ avgpool_node->padding("VALID");
+
+ bool cont = true;
+ while (cont)
+ {
+ cont = loco::apply(&shape_infer).to(&graph);
+ };
+
+ auto nodeshape = loco::shape_get(avgpool_node);
+ auto tshape = nodeshape.as<loco::TensorShape>();
+ ASSERT_EQ(tshape.rank(), 4);
+ ASSERT_EQ(tshape.dim(0).value(), 1);
+ ASSERT_EQ(tshape.dim(1).value(), 1);
+ ASSERT_EQ(tshape.dim(2).value(), 1);
+ ASSERT_EQ(tshape.dim(3).value(), 1);
+}
+
+namespace
+{
+
+void conv2d_test(const std::array<uint32_t, 4> ifm_shape, const std::array<uint32_t, 4> ker_shape,
+ const std::array<uint32_t, 2> stride_h_w, std::string padding,
+ const std::array<uint32_t, 4> expected_shape)
+{
+ moco::TFShapeInferenceRule shape_infer;
+ loco::Graph graph;
+
+ auto conv2d_node = graph.nodes()->create<moco::TFConv2D>();
+ conv2d_node->data_layout("NHWC");
+ conv2d_node->strides({1, stride_h_w[0], stride_h_w[1], 1});
+ conv2d_node->padding(padding);
+
+ auto ifm_node = graph.nodes()->create<moco::TFConst>();
+ {
+ ifm_node->rank(4);
+ ifm_node->dim(0).set(ifm_shape[0]);
+ ifm_node->dim(1).set(ifm_shape[1]);
+ ifm_node->dim(2).set(ifm_shape[2]);
+ ifm_node->dim(3).set(ifm_shape[3]);
+ }
+
+ auto ker_node = graph.nodes()->create<moco::TFConst>();
+ {
+ ker_node->rank(4);
+ ker_node->dim(0).set(ker_shape[0]);
+ ker_node->dim(1).set(ker_shape[1]);
+ ker_node->dim(2).set(ker_shape[2]);
+ ker_node->dim(3).set(ker_shape[3]);
+ }
+
+ conv2d_node->input(ifm_node);
+ conv2d_node->filter(ker_node);
+
+ setup_output_node(&graph, conv2d_node);
+
+ bool cont = true;
+ while (cont)
+ {
+ cont = loco::apply(&shape_infer).to(&graph);
+ };
+
+ auto nodeshape = loco::shape_get(conv2d_node);
+ auto tshape = nodeshape.as<loco::TensorShape>();
+ ASSERT_EQ(tshape.rank(), 4);
+ ASSERT_EQ(tshape.dim(0).value(), expected_shape[0]);
+ ASSERT_EQ(tshape.dim(1).value(), expected_shape[1]);
+ ASSERT_EQ(tshape.dim(2).value(), expected_shape[2]);
+ ASSERT_EQ(tshape.dim(3).value(), expected_shape[3]);
+}
+
+} // namespace
+
+/*
+ Testing "InceptionV3/InceptionV3/Conv2d_1a_3x3/Conv2D" Conv2D node in Inception_v3:
+ The result shape of this test is generated with the code below:
+
+ ifm = tf.constant(value=1.1, shape=[1, 299, 299, 3])
+ ker = tf.constant(value=1.1, shape=[3, 3, 3, 32])
+
+ out = tf.nn.conv2d(ifm, ker, strides = [1, 2, 2, 1], padding= 'VALID')
+
+ with tf.Session() as sess:
+ res = sess.run(out)
+ print(res.shape)
+ */
+TEST(TFShapeInferenceRule, conv2d_VALID)
+{
+ conv2d_test({1, 299, 299, 3}, // ifm
+ {3, 3, 3, 32}, // ker
+ {2, 2}, // strides
+ "VALID", // padding
+ {1, 149, 149, 32}); // expected shape after FixShape
+}
+
+/*
+ Testing "InceptionV3/InceptionV3/Conv2d_2b_3x3/Conv2D" Conv2D node in Inception_v3:
+ The result shape of this test is generated with the code below:
+
+ ifm = tf.constant(value=1.1, shape=[1, 147, 147, 32])
+ ker = tf.constant(value=1.1, shape=[3, 3, 32, 64])
+
+ out = tf.nn.conv2d(ifm, ker, strides = [1, 1, 1, 1], padding= 'SAME')
+
+ with tf.Session() as sess:
+ res = sess.run(out)
+ print(res.shape)
+ */
+TEST(TFShapeInferenceRule, conv2d_SAME)
+{
+ conv2d_test({1, 147, 147, 32}, // ifm
+ {3, 3, 32, 64}, // ker
+ {1, 1}, // strides
+ "SAME", // padding
+ {1, 147, 147, 64}); // expected shape after FixShape
+}
+
+/*
+ Testing Pack
+*/
+namespace
+{
+
+moco::TFConst *const_scalar(loco::Graph *graph, int32_t val)
+{
+ auto const_node = graph->nodes()->create<moco::TFConst>();
+
+ const_node->dtype(loco::DataType::S32);
+ const_node->rank(0);
+ const_node->size<loco::DataType::S32>(1);
+ const_node->at<loco::DataType::S32>(0) = val;
+
+ return const_node;
+}
+
+moco::TFConst *const_vector(loco::Graph *graph, int32_t dim)
+{
+ auto const_node = graph->nodes()->create<moco::TFConst>();
+
+ const_node->dtype(loco::DataType::S32);
+ const_node->rank(1);
+ const_node->dim(0).set(dim);
+
+ const_node->size<loco::DataType::S32>(dim);
+ for (int32_t i = 0; i < dim; ++i)
+ const_node->at<loco::DataType::S32>(i) = i;
+
+ return const_node;
+}
+
+moco::TFConst *const_vector_init(loco::Graph *graph, std::vector<int32_t> values)
+{
+ auto const_node = graph->nodes()->create<moco::TFConst>();
+ auto dim = values.size();
+
+ const_node->dtype(loco::DataType::S32);
+ const_node->rank(1);
+ const_node->dim(0).set(dim);
+
+ const_node->size<loco::DataType::S32>(dim);
+ for (int32_t i = 0; i < dim; ++i)
+ const_node->at<loco::DataType::S32>(i) = values[i];
+
+ return const_node;
+}
+
+moco::TFConst *const_matrix(loco::Graph *graph, int32_t dimh, int32_t dimw)
+{
+ auto const_node = graph->nodes()->create<moco::TFConst>();
+
+ const_node->dtype(loco::DataType::S32);
+ const_node->rank(2);
+ const_node->dim(0).set(dimh);
+ const_node->dim(1).set(dimw);
+
+ auto elements = dimh * dimw;
+ const_node->size<loco::DataType::S32>(elements);
+ for (int32_t i = 0; i < elements; ++i)
+ const_node->at<loco::DataType::S32>(i) = i;
+
+ return const_node;
+}
+
+} // namespace
+
+TEST(TFShapeInferenceRule, pack_scalar_2)
+{
+ moco::TFShapeInferenceRule shape_infer;
+ loco::Graph graph;
+
+ auto pack_node = graph.nodes()->create<moco::TFPack>(2);
+ pack_node->axis(0);
+ {
+ auto const_node_0 = const_scalar(&graph, 1);
+ pack_node->values(0, const_node_0);
+ auto const_node_1 = const_scalar(&graph, 1);
+ pack_node->values(1, const_node_1);
+ }
+ setup_output_node(&graph, pack_node);
+
+ bool cont = true;
+ while (cont)
+ {
+ cont = loco::apply(&shape_infer).to(&graph);
+ };
+
+ auto nodeshape = loco::shape_get(pack_node);
+ auto tshape = nodeshape.as<loco::TensorShape>();
+ ASSERT_EQ(tshape.rank(), 1);
+ ASSERT_EQ(tshape.dim(0).value(), 2);
+}
+
+TEST(TFShapeInferenceRule, pack_vector3_2)
+{
+ moco::TFShapeInferenceRule shape_infer;
+ loco::Graph graph;
+
+ auto pack_node = graph.nodes()->create<moco::TFPack>(2);
+ pack_node->axis(0);
+ {
+ auto const_node_0 = const_vector(&graph, 3);
+ pack_node->values(0, const_node_0);
+ auto const_node_1 = const_vector(&graph, 3);
+ pack_node->values(1, const_node_1);
+ }
+ setup_output_node(&graph, pack_node);
+
+ bool cont = true;
+ while (cont)
+ {
+ cont = loco::apply(&shape_infer).to(&graph);
+ };
+
+ auto nodeshape = loco::shape_get(pack_node);
+ auto tshape = nodeshape.as<loco::TensorShape>();
+
+ ASSERT_EQ(tshape.rank(), 2);
+ ASSERT_EQ(tshape.dim(0).value(), 2);
+ ASSERT_EQ(tshape.dim(1).value(), 3);
+}
+
+TEST(TFShapeInferenceRule, pack_vector3_2_axis_1)
+{
+ moco::TFShapeInferenceRule shape_infer;
+ loco::Graph graph;
+
+ auto pack_node = graph.nodes()->create<moco::TFPack>(2);
+ pack_node->axis(1);
+ {
+ auto const_node_0 = const_vector(&graph, 3);
+ pack_node->values(0, const_node_0);
+ auto const_node_1 = const_vector(&graph, 3);
+ pack_node->values(1, const_node_1);
+ }
+ setup_output_node(&graph, pack_node);
+
+ bool cont = true;
+ while (cont)
+ {
+ cont = loco::apply(&shape_infer).to(&graph);
+ };
+
+ auto nodeshape = loco::shape_get(pack_node);
+ auto tshape = nodeshape.as<loco::TensorShape>();
+
+ ASSERT_EQ(tshape.rank(), 2);
+ ASSERT_EQ(tshape.dim(0).value(), 3);
+ ASSERT_EQ(tshape.dim(1).value(), 2);
+}
+
+TEST(TFShapeInferenceRule, pack_vector3_2_axis_m2)
+{
+ moco::TFShapeInferenceRule shape_infer;
+ loco::Graph graph;
+
+ auto pack_node = graph.nodes()->create<moco::TFPack>(2);
+ pack_node->axis(-2);
+ {
+ auto const_node_0 = const_vector(&graph, 3);
+ pack_node->values(0, const_node_0);
+ auto const_node_1 = const_vector(&graph, 3);
+ pack_node->values(1, const_node_1);
+ }
+ setup_output_node(&graph, pack_node);
+
+ bool cont = true;
+ while (cont)
+ {
+ cont = loco::apply(&shape_infer).to(&graph);
+ };
+
+ auto nodeshape = loco::shape_get(pack_node);
+ auto tshape = nodeshape.as<loco::TensorShape>();
+
+ ASSERT_EQ(tshape.rank(), 2);
+ ASSERT_EQ(tshape.dim(0).value(), 2);
+ ASSERT_EQ(tshape.dim(1).value(), 3);
+}
+
+TEST(TFShapeInferenceRule, pack_vector3_2_axis_m3)
+{
+ moco::TFShapeInferenceRule shape_infer;
+ loco::Graph graph;
+
+ auto pack_node = graph.nodes()->create<moco::TFPack>(2);
+ pack_node->axis(-3);
+ {
+ auto const_node_0 = const_vector(&graph, 3);
+ pack_node->values(0, const_node_0);
+ auto const_node_1 = const_vector(&graph, 3);
+ pack_node->values(1, const_node_1);
+ }
+ setup_output_node(&graph, pack_node);
+
+ // -3 is out of range and should throw
+ EXPECT_ANY_THROW(loco::apply(&shape_infer).to(&graph));
+}
+
+TEST(TFShapeInferenceRule, pack_matrix3x4_2_axis_1)
+{
+ moco::TFShapeInferenceRule shape_infer;
+ loco::Graph graph;
+
+ auto pack_node = graph.nodes()->create<moco::TFPack>(2);
+ pack_node->axis(1);
+ {
+ auto const_node_0 = const_matrix(&graph, 3, 4);
+ pack_node->values(0, const_node_0);
+ auto const_node_1 = const_matrix(&graph, 3, 4);
+ pack_node->values(1, const_node_1);
+ }
+ setup_output_node(&graph, pack_node);
+
+ bool cont = true;
+ while (cont)
+ {
+ cont = loco::apply(&shape_infer).to(&graph);
+ };
+
+ auto nodeshape = loco::shape_get(pack_node);
+ auto tshape = nodeshape.as<loco::TensorShape>();
+
+ ASSERT_EQ(tshape.rank(), 3);
+ ASSERT_EQ(tshape.dim(0).value(), 3);
+ ASSERT_EQ(tshape.dim(1).value(), 2);
+ ASSERT_EQ(tshape.dim(2).value(), 4);
+}
+
+TEST(TFShapeInferenceRule, stridedslice_matrix5x5_shrink)
+{
+ moco::TFShapeInferenceRule shape_infer;
+ loco::Graph graph;
+
+ auto sslice_node = graph.nodes()->create<moco::TFStridedSlice>();
+ {
+ auto const_input = const_matrix(&graph, 5, 5);
+ sslice_node->input(const_input);
+
+ auto const_begin = const_vector_init(&graph, {1, 1});
+ sslice_node->begin(const_begin);
+ auto const_end = const_vector_init(&graph, {2, 4});
+ sslice_node->end(const_end);
+ auto const_strides = const_vector_init(&graph, {1, 1});
+ sslice_node->strides(const_strides);
+
+ sslice_node->shrink_axis_mask(1);
+ }
+ setup_output_node(&graph, sslice_node);
+
+ bool cont = true;
+ while (cont)
+ {
+ cont = loco::apply(&shape_infer).to(&graph);
+ };
+
+ auto nodeshape = loco::shape_get(sslice_node);
+ auto tshape = nodeshape.as<loco::TensorShape>();
+
+ ASSERT_EQ(tshape.rank(), 1);
+ ASSERT_EQ(tshape.dim(0).value(), 3);
+}
+
+TEST(TFShapeInferenceRule, stridedslice_4_shrink)
+{
+ moco::TFShapeInferenceRule shape_infer;
+ loco::Graph graph;
+
+ auto sslice_node = graph.nodes()->create<moco::TFStridedSlice>();
+ {
+ auto const_input = const_vector(&graph, 4);
+ sslice_node->input(const_input);
+
+ auto const_begin = const_vector_init(&graph, {0});
+ sslice_node->begin(const_begin);
+ auto const_end = const_vector_init(&graph, {1});
+ sslice_node->end(const_end);
+ auto const_strides = const_vector_init(&graph, {1});
+ sslice_node->strides(const_strides);
+
+ sslice_node->shrink_axis_mask(1);
+ }
+ setup_output_node(&graph, sslice_node);
+
+ bool cont = true;
+ while (cont)
+ {
+ cont = loco::apply(&shape_infer).to(&graph);
+ };
+
+ auto nodeshape = loco::shape_get(sslice_node);
+ auto tshape = nodeshape.as<loco::TensorShape>();
+
+ ASSERT_EQ(tshape.rank(), 0);
+}
diff --git a/compiler/moco/service/src/Service/TFTypeInferenceRule.cpp b/compiler/moco/service/src/Service/TFTypeInferenceRule.cpp
new file mode 100644
index 000000000..112ab955d
--- /dev/null
+++ b/compiler/moco/service/src/Service/TFTypeInferenceRule.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Service/TFTypeInferenceRule.h"
+
+#include "moco/IR/TFDialect.h"
+#include "moco/IR/TFNodeVisitor.h"
+#include "moco/IR/TFNodes.h"
+
+#include "moco/IR/TFNodeImpl.h"
+
+#include <cassert>
+
+namespace
+{
+
+using namespace moco;
+
+struct TypeForwardAlgorithm final : public moco::TFNodeVisitor<loco::DataType>
+{
+ loco::DataType visit(const TFAdd *node) { return dtype_get(node->x()); }
+ loco::DataType visit(const TFAvgPool *node) { return dtype_get(node->value()); }
+ loco::DataType visit(const TFBiasAdd *node) { return dtype_get(node->value()); }
+ loco::DataType visit(const TFConcatV2 *node) { return dtype_get(node->values(0)); }
+
+ loco::DataType visit(const TFConst *node) { return node->dtype(); }
+
+ loco::DataType visit(const TFConv2D *node) { return dtype_get(node->input()); }
+ loco::DataType visit(const TFConv2DBackpropInput *node)
+ {
+ return dtype_get(node->out_backprop());
+ }
+ loco::DataType visit(const TFDepthwiseConv2dNative *node) { return dtype_get(node->input()); }
+ loco::DataType visit(const TFFakeQuantWithMinMaxVars *node) { return dtype_get(node->inputs()); }
+ loco::DataType visit(const TFFusedBatchNorm *node) { return dtype_get(node->x()); }
+ loco::DataType visit(const TFIdentity *node) { return dtype_get(node->input()); }
+ loco::DataType visit(const TFMaximum *node) { return dtype_get(node->x()); }
+ loco::DataType visit(const TFMaxPool *node) { return dtype_get(node->input()); }
+ loco::DataType visit(const TFMean *node) { return dtype_get(node->input()); }
+ loco::DataType visit(const TFMul *node) { return dtype_get(node->x()); }
+ loco::DataType visit(const TFPack *node) { return dtype_get(node->values(0)); }
+ loco::DataType visit(const TFPad *node) { return dtype_get(node->input()); }
+
+ loco::DataType visit(const TFPlaceholder *node) { return node->dtype(); }
+
+ loco::DataType visit(const TFRealDiv *node) { return dtype_get(node->x()); }
+ loco::DataType visit(const TFRelu *node) { return dtype_get(node->features()); }
+ loco::DataType visit(const TFRelu6 *node) { return dtype_get(node->features()); }
+ loco::DataType visit(const TFReshape *node) { return dtype_get(node->tensor()); }
+ loco::DataType visit(const TFRsqrt *node) { return dtype_get(node->x()); }
+
+ loco::DataType visit(const TFShape *node) { return node->dtype(); }
+
+ loco::DataType visit(const TFSoftmax *node) { return dtype_get(node->logits()); }
+ loco::DataType visit(const TFSqrt *node) { return dtype_get(node->x()); }
+ loco::DataType visit(const TFSquaredDifference *node) { return dtype_get(node->x()); }
+ loco::DataType visit(const TFSqueeze *node) { return dtype_get(node->input()); }
+ loco::DataType visit(const TFStopGradient *node) { return dtype_get(node->input()); }
+ loco::DataType visit(const TFStridedSlice *node) { return dtype_get(node->input()); }
+ loco::DataType visit(const TFSub *node) { return dtype_get(node->x()); }
+ loco::DataType visit(const TFTanh *node) { return dtype_get(node->x()); }
+
+ // For virtual nodes
+ loco::DataType visit(const TFPush *node) { return dtype_get(node->from()); }
+};
+
+} // namespace
+
+namespace moco
+{
+
+bool TFTypeInferenceRule::recognize(const loco::Dialect *d) const
+{
+ // This rule recognizes only "TFDialect" dialect!
+ return TFDialect::get() == d;
+}
+
+bool TFTypeInferenceRule::infer(const loco::Node *node, loco::DataType &dtype) const
+{
+ assert(node->dialect() == TFDialect::get());
+
+ TypeForwardAlgorithm alg;
+
+// clang-format off
+#define TENSORFLOW_NODE(OPCODE,CLASS) \
+ if (dynamic_cast<const moco::CLASS *>(node)) \
+ { \
+ auto tfnode = dynamic_cast<const moco::CLASS *>(node); \
+ dtype = tfnode->accept(&alg); \
+ assert(dtype != loco::DataType::Unknown); \
+ return true; \
+ }
+#include "moco/IR/TFNodes.lst"
+#undef TENSORFLOW_NODE
+ // clang-format on
+
+ return false;
+}
+
+} // namespace moco
diff --git a/compiler/moco/service/src/TestHelper.h b/compiler/moco/service/src/TestHelper.h
new file mode 100644
index 000000000..8f3ff764e
--- /dev/null
+++ b/compiler/moco/service/src/TestHelper.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TEST_HELPER_H__
+#define __TEST_HELPER_H__
+
+#include <loco.h>
+
+namespace moco
+{
+namespace test
+{
+
+template <typename T> T *find_first_node_bytype(loco::Graph *g)
+{
+ T *first_node = nullptr;
+ loco::Graph::NodeContext *nodes = g->nodes();
+ uint32_t count = nodes->size();
+
+ for (uint32_t i = 0; i < count; ++i)
+ {
+ first_node = dynamic_cast<T *>(nodes->at(i));
+ if (first_node != nullptr)
+ break;
+ }
+
+ return first_node;
+}
+
+template <typename T> std::vector<T *> find_nodes_bytype(loco::Graph *g)
+{
+ std::vector<T *> find_nodes;
+ loco::Graph::NodeContext *nodes = g->nodes();
+ uint32_t count = nodes->size();
+
+ for (uint32_t i = 0; i < count; ++i)
+ {
+ auto node = dynamic_cast<T *>(nodes->at(i));
+ if (node != nullptr)
+ find_nodes.push_back(node);
+ }
+
+ return find_nodes;
+}
+
+/**
+ * @brief Append setup output of graph by adding loco::Push node
+ *
+ * @note This is subject to change when loco changes I/O treatment
+ */
+void setup_output_node(loco::Graph *graph, loco::Node *last_node);
+
+} // namespace test
+} // namespace moco
+
+#endif // __TEST_HELPER_H__
diff --git a/compiler/moco/service/src/TestHelper.test.cpp b/compiler/moco/service/src/TestHelper.test.cpp
new file mode 100644
index 000000000..59915d60f
--- /dev/null
+++ b/compiler/moco/service/src/TestHelper.test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TestHelper.h"
+
+namespace moco
+{
+namespace test
+{
+
+void setup_output_node(loco::Graph *graph, loco::Node *last_node)
+{
+ // add push as output
+ auto push_node = graph->nodes()->create<loco::Push>();
+ push_node->from(last_node);
+
+ // set the graph output name and node object
+ auto graph_output = graph->outputs()->create();
+ graph_output->name("output");
+ graph_output->dtype(loco::DataType::FLOAT32);
+ loco::link(graph_output, push_node);
+}
+
+} // namespace test
+} // namespace moco
diff --git a/compiler/moco/support/CMakeLists.txt b/compiler/moco/support/CMakeLists.txt
new file mode 100644
index 000000000..2a896d495
--- /dev/null
+++ b/compiler/moco/support/CMakeLists.txt
@@ -0,0 +1,9 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_library(moco_support SHARED ${SOURCES})
+target_include_directories(moco_support PRIVATE src)
+target_include_directories(moco_support PUBLIC include)
+target_link_libraries(moco_support PUBLIC loco)
+target_link_libraries(moco_support PUBLIC moco_lang)
+target_link_libraries(moco_support PRIVATE oops)
+install(TARGETS moco_support DESTINATION lib)
diff --git a/compiler/moco/support/README.md b/compiler/moco/support/README.md
new file mode 100644
index 000000000..081f65d39
--- /dev/null
+++ b/compiler/moco/support/README.md
@@ -0,0 +1,3 @@
+# support
+
+_support_ privides _moco_ support libraries
diff --git a/compiler/moco/support/include/moco/Support/NodeAs.h b/compiler/moco/support/include/moco/Support/NodeAs.h
new file mode 100644
index 000000000..dc78ff94a
--- /dev/null
+++ b/compiler/moco/support/include/moco/Support/NodeAs.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_SUPPORT_NODE_AS_H__
+#define __MOCO_SUPPORT_NODE_AS_H__
+
+#include <loco.h>
+
+namespace moco
+{
+
+template <typename DERIVED> DERIVED *as(loco::Node *node) { return dynamic_cast<DERIVED *>(node); }
+
+} // namespace moco
+
+#endif // __MOCO_SUPPORT_NODE_AS_H__
diff --git a/compiler/moco/support/include/moco/Support/TFShapeInferenceHelper.h b/compiler/moco/support/include/moco/Support/TFShapeInferenceHelper.h
new file mode 100644
index 000000000..52324700a
--- /dev/null
+++ b/compiler/moco/support/include/moco/Support/TFShapeInferenceHelper.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_SUPPORT_SHAPE_INFERENCE_HELPER_H__
+#define __MOCO_SUPPORT_SHAPE_INFERENCE_HELPER_H__
+
+#include <moco/IR/TFDataLayout.h>
+#include <moco/IR/TFPadding.h>
+
+#include <loco.h>
+#include <loco/IR/NodeShape.h>
+#include <loco/IR/Padding2D.h>
+#include <loco/IR/Stride.h>
+#include <loco/IR/Window.h>
+
+#include <cassert>
+
+namespace moco
+{
+
+/**
+ * @note Helper for return broadcasted shape for binary operators having
+ * different shape for input x and y
+ */
+loco::TensorShape broadcast_shape(const loco::TensorShape &x, const loco::TensorShape &y);
+
+} // namespace moco
+
+namespace moco
+{
+
+/**
+ * @brief Return true if node has shape inference data for checking shape
+ * inference is done or not
+ *
+ * @note Will be deprecated in near future
+ */
+bool shape_inference_done(const loco::Node *node);
+
+/**
+ * @note While in shape inference, Node maybe Canonical, TF dialect or other dialects
+ * This will provide common loco::NodeShape as shape information
+ */
+loco::NodeShape node_shape(const loco::Node *node);
+bool node_shape(const loco::Node *node, loco::NodeShape &nodeshape);
+
+loco::TensorShape as_tensor_shape(const loco::FeatureShape &feature_shape,
+ const TFDataLayout &data_layout);
+
+loco::FeatureShape as_feature_shape(const loco::NodeShape &nodeshape,
+ const TFDataLayout &data_layout);
+
+} // namespace moco
+
+namespace moco
+{
+
+struct PlaneShape
+{
+ loco::Dimension height;
+ loco::Dimension width;
+};
+
+class FeatureShapeUpdater final
+{
+public:
+ FeatureShapeUpdater(loco::FeatureShape *ptr) : _feature_shape_ptr{ptr}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void with(const PlaneShape &plane_shape) const
+ {
+ _feature_shape_ptr->height() = plane_shape.height;
+ _feature_shape_ptr->width() = plane_shape.width;
+ }
+
+private:
+ loco::FeatureShape *_feature_shape_ptr;
+};
+
+PlaneShape make_plane_shape(const loco::FeatureShape &feature_shape);
+
+FeatureShapeUpdater update(loco::FeatureShape &feature_shape);
+
+class PlaneInference
+{
+protected:
+ struct Parameters
+ {
+ PlaneShape input;
+ PlaneShape stride;
+ PlaneShape window;
+ PlaneShape dilation;
+ PlaneShape effective_window;
+ PlaneShape output;
+ };
+
+ void fill(Parameters &p, const PlaneShape &in)
+ {
+ p.input.height = in.height;
+ p.input.width = in.width;
+
+ p.stride.height = _stride.vertical();
+ p.stride.width = _stride.horizontal();
+
+ p.window.height = _window.vertical();
+ p.window.width = _window.horizontal();
+
+ // TODO support dilation
+ p.dilation.height = 1;
+ p.dilation.width = 1;
+
+ p.effective_window.height = p.dilation.height.value() * (p.window.height.value() - 1) + 1;
+ p.effective_window.width = p.dilation.width.value() * (p.window.width.value() - 1) + 1;
+ }
+
+ PlaneShape infer(const Parameters &p, const PlaneShape &)
+ {
+ PlaneShape res;
+
+ if (_padding == "VALID")
+ {
+ res.height =
+ (p.input.height.value() + p.stride.height.value() - p.effective_window.height.value()) /
+ p.stride.height.value();
+ res.width =
+ (p.input.width.value() + p.stride.width.value() - p.effective_window.width.value()) /
+ p.stride.width.value();
+ }
+ else if (_padding == "SAME")
+ {
+ res.height = (p.input.height.value() + p.stride.height.value() - 1) / p.stride.height.value();
+ res.width = (p.input.width.value() + p.stride.width.value() - 1) / p.stride.width.value();
+ }
+ else
+ assert(false);
+
+ return res;
+ }
+
+public:
+ PlaneShape operator()(const PlaneShape &in)
+ {
+ Parameters p;
+
+ fill(p, in);
+
+ return infer(p, in);
+ }
+
+public:
+ void padding(const TFPadding &value) { _padding = value; }
+ void window(const loco::Window<2> value) { _window = value; }
+ void stride(const loco::Stride<2> value) { _stride = value; }
+
+private:
+ TFPadding _padding;
+ loco::Window<2> _window;
+ loco::Stride<2> _stride;
+};
+
+class Padding2DInference final : public PlaneInference
+{
+public:
+ loco::Padding2D operator()(const PlaneShape &in)
+ {
+ Parameters p;
+
+ fill(p, in);
+
+ auto output = infer(p, in);
+
+ int64_t i_height = (int64_t)(output.height.value() - 1) * (int64_t)p.stride.height.value() +
+ (int64_t)p.effective_window.height.value() - (int64_t)p.input.height.value();
+ int64_t i_width = (int64_t)(output.width.value() - 1) * (int64_t)p.stride.width.value() +
+ (int64_t)p.effective_window.width.value() - (int64_t)p.input.width.value();
+
+ uint32_t pad_height = i_height >= 0 ? (uint32_t)i_height : 0U;
+ uint32_t pad_width = i_width >= 0 ? (uint32_t)i_width : 0U;
+
+ loco::Padding2D padding2d;
+
+ padding2d.top(pad_height / 2);
+ padding2d.bottom(pad_height - padding2d.top());
+ padding2d.left(pad_width / 2);
+ padding2d.right(pad_width - padding2d.left());
+
+ return padding2d;
+ }
+};
+
+} // namespace moco
+
+namespace moco
+{
+
+using TFStrides = std::vector<int64_t>;
+using TFKSize = std::vector<int64_t>;
+
+loco::Stride<2> stride_of(const TFStrides &strides, const TFDataLayout &datalayout);
+loco::Window<2> window_of(const TFKSize &ksize, const TFDataLayout &datalayout);
+loco::Window<2> window_of(const loco::TensorShape &shape, const TFDataLayout &datalayout);
+
+} // namespace moco
+
+#endif // __MOCO_SERVICE_SHAPE_INFERENCE_HELPER_H__
diff --git a/compiler/moco/support/src/TFShapeInferenceHelper.cpp b/compiler/moco/support/src/TFShapeInferenceHelper.cpp
new file mode 100644
index 000000000..13e514a78
--- /dev/null
+++ b/compiler/moco/support/src/TFShapeInferenceHelper.cpp
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moco/Support/TFShapeInferenceHelper.h"
+
+#include <loco/Service/ShapeInference.h>
+
+#include <oops/InternalExn.h>
+
+#include <cassert>
+
+namespace
+{
+
+// TODO Use codes in loco and remove duplicate broadcast_shape() and related
+/**
+ * @brief Create a higher-rank TensorShape following NumPy broadcasting semantics
+ *
+ * HOW TO USE:
+ *
+ * auto expanded_tensor_shape = expand(tensor_shape).to(N);
+ */
+class TensorShapeExpander
+{
+public:
+ TensorShapeExpander(const loco::TensorShape &shape) : _shape{shape}
+ {
+ // DO NOTHING
+ }
+
+public:
+ loco::TensorShape to(uint32_t output_rank)
+ {
+ auto const &input_shape = _shape;
+ uint32_t const input_rank = input_shape.rank();
+
+ assert(input_rank <= output_rank && "Cannot shrink rank");
+ uint32_t const axis_shift = output_rank - input_rank;
+
+ loco::TensorShape output_shape;
+
+ output_shape.rank(output_rank);
+ for (uint32_t axis = 0; axis < output_rank; ++axis)
+ {
+ output_shape.dim(axis) = (axis < axis_shift) ? 1 : input_shape.dim(axis - axis_shift);
+ }
+
+ return output_shape;
+ }
+
+private:
+ const loco::TensorShape _shape;
+};
+
+/**
+ * @breif Expand shape x and y to same rank by align right and filling with 1
+ */
+void expand_rank(loco::TensorShape &x, loco::TensorShape &y)
+{
+ auto x_rank = x.rank();
+ auto y_rank = y.rank();
+
+ if (x_rank == y_rank)
+ return;
+
+ TensorShapeExpander x_exp(x);
+ TensorShapeExpander y_exp(y);
+
+ auto xy_rank = std::max(x_rank, y_rank);
+
+ x = x_rank > y_rank ? x : x_exp.to(xy_rank);
+ y = y_rank > x_rank ? y : y_exp.to(xy_rank);
+}
+
+/**
+ * @breif Returns shape of expanded dimension of input x and y having same rank
+ */
+loco::TensorShape expand_dimension(const loco::TensorShape &x, const loco::TensorShape &y)
+{
+ assert(x.rank() == y.rank());
+
+ auto rank = x.rank();
+
+ loco::TensorShape output_shape;
+
+ output_shape.rank(rank);
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ assert(x.dim(axis).known() && y.dim(axis).known());
+
+ auto x_dim = x.dim(axis).value();
+ auto y_dim = y.dim(axis).value();
+
+ // each dimension of x and y should be same or one must be 1 if different
+ if (!((x_dim == y_dim) || (x_dim == 1 || y_dim == 1)))
+ {
+ // TODO may need to refine message
+ INTERNAL_EXN("ShapeInference: Input shapes don't match");
+ }
+
+ output_shape.dim(axis) = std::max(x_dim, y_dim);
+ }
+
+ return output_shape;
+}
+
+} // namespace
+
+namespace moco
+{
+
+loco::TensorShape broadcast_shape(const loco::TensorShape &x, const loco::TensorShape &y)
+{
+ auto x_match = x;
+ auto y_match = y;
+
+ expand_rank(x_match, y_match);
+
+ auto output_shape = expand_dimension(x_match, y_match);
+
+ return output_shape;
+}
+
+} // namespace moco
+
+namespace moco
+{
+
+loco::NodeShape node_shape(const loco::Node *node)
+{
+ loco::NodeShape nodeshape; // default domain is Unknown
+
+ if (loco::shape_known(node))
+ {
+ nodeshape = loco::shape_get(node);
+ }
+
+ return nodeshape;
+}
+
+bool node_shape(const loco::Node *node, loco::NodeShape &nodeshape)
+{
+ nodeshape = node_shape(node);
+ return (nodeshape.domain() != loco::Domain::Unknown);
+}
+
+loco::TensorShape as_tensor_shape(const loco::FeatureShape &feature_shape,
+ const TFDataLayout &data_layout)
+{
+ loco::TensorShape tensor_shape;
+
+ tensor_shape.rank(4);
+ if (data_layout == "NHWC")
+ {
+ tensor_shape.dim(0) = feature_shape.count();
+ tensor_shape.dim(1) = feature_shape.height();
+ tensor_shape.dim(2) = feature_shape.width();
+ tensor_shape.dim(3) = feature_shape.depth();
+ }
+ else if (data_layout == "NCHW")
+ {
+ tensor_shape.dim(0) = feature_shape.count();
+ tensor_shape.dim(1) = feature_shape.depth();
+ tensor_shape.dim(2) = feature_shape.height();
+ tensor_shape.dim(3) = feature_shape.width();
+ }
+ else
+ {
+ // TODO support for other data_layout if needed
+ INTERNAL_EXN_V("ShapeInference: Unknown data_format", data_layout);
+ }
+
+ return tensor_shape;
+}
+
+loco::FeatureShape as_feature_shape(const loco::NodeShape &nodeshape,
+ const TFDataLayout &data_layout)
+{
+ if (nodeshape.domain() == loco::Domain::Feature)
+ return nodeshape.as<loco::FeatureShape>();
+
+ loco::FeatureShape feature_shape;
+
+ // only convert from tensor to feature
+ if (nodeshape.domain() != loco::Domain::Tensor)
+ {
+ INTERNAL_EXN("ShapeInference: Invalid shape information");
+ }
+
+ loco::TensorShape tensor_shape = nodeshape.as<loco::TensorShape>();
+
+ if (tensor_shape.rank() != 4)
+ {
+ INTERNAL_EXN("ShapeInference: Rank is not 4");
+ }
+
+ if (data_layout == "NHWC")
+ {
+ feature_shape.count() = tensor_shape.dim(0);
+ feature_shape.height() = tensor_shape.dim(1);
+ feature_shape.width() = tensor_shape.dim(2);
+ feature_shape.depth() = tensor_shape.dim(3);
+ }
+ else if (data_layout == "NCHW")
+ {
+ feature_shape.count() = tensor_shape.dim(0);
+ feature_shape.depth() = tensor_shape.dim(1);
+ feature_shape.height() = tensor_shape.dim(2);
+ feature_shape.width() = tensor_shape.dim(3);
+ }
+ else
+ {
+ // TODO support for other data_layout if needed
+ INTERNAL_EXN_V("ShapeInference: Unknown data_format", data_layout);
+ }
+
+ return feature_shape;
+}
+
+} // namespace moco
+
+namespace moco
+{
+
+PlaneShape make_plane_shape(const loco::FeatureShape &feature_shape)
+{
+ PlaneShape plane_shape;
+
+ plane_shape.height = feature_shape.height();
+ plane_shape.width = feature_shape.width();
+
+ return plane_shape;
+}
+
+FeatureShapeUpdater update(loco::FeatureShape &feature_shape)
+{
+ return FeatureShapeUpdater{&feature_shape};
+}
+
+} // namespace moco
+
+namespace
+{
+
+/**
+ * @brief Class to represent TensorFlow "data_format" attr.
+ */
+enum class DataLayout
+{
+ NHWC,
+ NCHW,
+};
+
+DataLayout as_data_layout(const std::string &tf_layout_str)
+{
+ if (tf_layout_str == "NHWC")
+ return DataLayout::NHWC;
+ else if (tf_layout_str == "NCHW")
+ return DataLayout::NCHW;
+ else
+ /// @note data layout tag in TensorFlow is 'data_format'
+ INTERNAL_EXN_V("ShapeInference: Unknown data_format", tf_layout_str);
+}
+
+} // namespace
+
+namespace moco
+{
+
+loco::Stride<2> stride_of(const TFStrides &strides, const TFDataLayout &datalayout)
+{
+ loco::Stride<2> stride;
+
+ auto data_layout = as_data_layout(datalayout);
+ if (data_layout == DataLayout::NHWC)
+ {
+ stride.vertical(strides[1]);
+ stride.horizontal(strides[2]);
+ }
+ else if (data_layout == DataLayout::NCHW)
+ {
+ stride.vertical(strides[2]);
+ stride.horizontal(strides[3]);
+ }
+ else
+ {
+ // TODO add more datalayout supports if needed
+ INTERNAL_EXN("ShapeInference: Unknown data_format");
+ }
+
+ return stride;
+}
+
+loco::Window<2> window_of(const TFKSize &ksize, const TFDataLayout &datalayout)
+{
+ loco::Window<2> window;
+
+ auto data_layout = as_data_layout(datalayout);
+ if (data_layout == DataLayout::NHWC)
+ {
+ window.vertical(ksize[1]);
+ window.horizontal(ksize[2]);
+ }
+ else if (data_layout == DataLayout::NCHW)
+ {
+ window.vertical(ksize[2]);
+ window.horizontal(ksize[3]);
+ }
+ else
+ {
+ // TODO add more datalayout supports if needed
+ INTERNAL_EXN("ShapeInference: Unknown data_format");
+ }
+
+ return window;
+}
+
+loco::Window<2> window_of(const loco::TensorShape &shape, const TFDataLayout &datalayout)
+{
+ loco::Window<2> window;
+
+ if (datalayout == "HWIO")
+ {
+ window.vertical(shape.dim(0).value());
+ window.horizontal(shape.dim(1).value());
+ }
+ else if (datalayout == "HWCM")
+ {
+ window.vertical(shape.dim(0).value());
+ window.horizontal(shape.dim(1).value());
+ }
+ else
+ {
+ // TODO add more datalayout supports if needed
+ INTERNAL_EXN_V("ShapeInference: Unknown data_format", datalayout);
+ }
+
+ return window;
+}
+
+} // namespace moco
diff --git a/compiler/morph/CMakeLists.txt b/compiler/morph/CMakeLists.txt
new file mode 100644
index 000000000..ec7da8d30
--- /dev/null
+++ b/compiler/morph/CMakeLists.txt
@@ -0,0 +1,20 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(morph STATIC ${SOURCES})
+set_target_properties(morph PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(morph PUBLIC include)
+target_link_libraries(morph PRIVATE nncc_common)
+target_link_libraries(morph PUBLIC angkor)
+
+nnas_find_package(GTest QUIET)
+
+if(NOT GTest_FOUND)
+ return()
+endif(NOT GTest_FOUND)
+
+add_executable(morph_test ${TESTS})
+target_link_libraries(morph_test morph)
+target_link_libraries(morph_test gtest_main)
+add_test(morph_test morph_test)
diff --git a/compiler/morph/README.md b/compiler/morph/README.md
new file mode 100644
index 000000000..45ca36078
--- /dev/null
+++ b/compiler/morph/README.md
@@ -0,0 +1,3 @@
+# morph
+
+``morph`` is a collection of shape conversion routines for various NN frameworks, such as Caffe.
diff --git a/compiler/morph/include/morph/caffe.h b/compiler/morph/include/morph/caffe.h
new file mode 100644
index 000000000..d0a00dc2b
--- /dev/null
+++ b/compiler/morph/include/morph/caffe.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MORPH_CAFFE_H__
+#define __MORPH_CAFFE_H__
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/feature/Shape.h>
+#include <nncc/core/ADT/kernel/Shape.h>
+
+namespace morph
+{
+namespace caffe
+{
+
+nncc::core::ADT::tensor::Shape as_tensor_shape(const nncc::core::ADT::feature::Shape &);
+nncc::core::ADT::tensor::Shape as_tensor_shape(const nncc::core::ADT::kernel::Shape &);
+
+nncc::core::ADT::feature::Shape as_feature_shape(const nncc::core::ADT::tensor::Shape &);
+nncc::core::ADT::kernel::Shape as_kernel_shape(const nncc::core::ADT::tensor::Shape &);
+
+} // namespace caffe
+} // namespace morph
+
+#endif // __MORPH_CAFFE_H__
diff --git a/compiler/morph/include/morph/dims.h b/compiler/morph/include/morph/dims.h
new file mode 100644
index 000000000..5f9c8a8f4
--- /dev/null
+++ b/compiler/morph/include/morph/dims.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MORPH_DIMS_H__
+#define __MORPH_DIMS_H__
+
+#include <nncc/core/ADT/tensor/Shape.h>
+
+#include <vector>
+
+namespace morph
+{
+
+template <typename T> using Dims = std::vector<T>;
+
+Dims<uint32_t> as_dims(const nncc::core::ADT::tensor::Shape &);
+
+} // namespace morph
+
+#endif // __MORPH_DIMS_H__
diff --git a/compiler/morph/include/morph/nnapi.h b/compiler/morph/include/morph/nnapi.h
new file mode 100644
index 000000000..be742b027
--- /dev/null
+++ b/compiler/morph/include/morph/nnapi.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MORPH_NNAPI_H__
+#define __MORPH_NNAPI_H__
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/feature/Shape.h>
+#include <nncc/core/ADT/kernel/Shape.h>
+
+namespace morph
+{
+namespace nnapi
+{
+
+nncc::core::ADT::tensor::Shape as_tensor_shape(const nncc::core::ADT::feature::Shape &);
+nncc::core::ADT::tensor::Shape as_tensor_shape(const nncc::core::ADT::kernel::Shape &);
+
+nncc::core::ADT::feature::Shape as_feature_shape(const nncc::core::ADT::tensor::Shape &);
+nncc::core::ADT::kernel::Shape as_kernel_shape(const nncc::core::ADT::tensor::Shape &);
+
+} // namespace nnapi
+} // namespace morph
+
+#endif // __MORPH_NNAPI_H__
diff --git a/compiler/morph/include/morph/tflite.h b/compiler/morph/include/morph/tflite.h
new file mode 100644
index 000000000..dc33b4ac9
--- /dev/null
+++ b/compiler/morph/include/morph/tflite.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MORPH_TFLITE_H__
+#define __MORPH_TFLITE_H__
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/feature/Shape.h>
+#include <nncc/core/ADT/kernel/Shape.h>
+
+namespace morph
+{
+namespace tflite
+{
+
+nncc::core::ADT::tensor::Shape as_tensor_shape(const nncc::core::ADT::feature::Shape &);
+nncc::core::ADT::tensor::Shape as_tensor_shape(const nncc::core::ADT::kernel::Shape &);
+
+nncc::core::ADT::feature::Shape as_feature_shape(const nncc::core::ADT::tensor::Shape &);
+nncc::core::ADT::kernel::Shape as_kernel_shape(const nncc::core::ADT::tensor::Shape &);
+
+} // namespace tflite
+} // namespace morph
+
+#endif // __MORPH_TFLITE_H__
diff --git a/compiler/morph/requires.cmake b/compiler/morph/requires.cmake
new file mode 100644
index 000000000..654db88c3
--- /dev/null
+++ b/compiler/morph/requires.cmake
@@ -0,0 +1 @@
+require("angkor")
diff --git a/compiler/morph/src/caffe.cpp b/compiler/morph/src/caffe.cpp
new file mode 100644
index 000000000..29a20ff98
--- /dev/null
+++ b/compiler/morph/src/caffe.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "morph/caffe.h"
+
+#include <cassert>
+
+using namespace nncc::core::ADT;
+
+namespace morph
+{
+namespace caffe
+{
+
+tensor::Shape as_tensor_shape(const feature::Shape &shape)
+{
+ tensor::Shape res;
+
+ res.resize(4);
+ res.dim(0) = 1;
+ res.dim(1) = shape.depth();
+ res.dim(2) = shape.height();
+ res.dim(3) = shape.width();
+
+ return res;
+}
+
+tensor::Shape as_tensor_shape(const kernel::Shape &shape)
+{
+ tensor::Shape res;
+
+ res.resize(4);
+ res.dim(0) = shape.count();
+ res.dim(1) = shape.depth();
+ res.dim(2) = shape.height();
+ res.dim(3) = shape.width();
+
+ return res;
+}
+
+feature::Shape as_feature_shape(const tensor::Shape &shape)
+{
+ assert(shape.rank() == 4);
+ assert(shape.dim(0) == 1);
+ return feature::Shape{shape.dim(1), shape.dim(2), shape.dim(3)};
+}
+
+kernel::Shape as_kernel_shape(const tensor::Shape &shape)
+{
+ assert(shape.rank() == 4);
+ return kernel::Shape{shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3)};
+}
+
+} // namespace caffe
+} // namespace morph
diff --git a/compiler/morph/src/caffe.test.cpp b/compiler/morph/src/caffe.test.cpp
new file mode 100644
index 000000000..030903b21
--- /dev/null
+++ b/compiler/morph/src/caffe.test.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "morph/caffe.h"
+
+#include <gtest/gtest.h>
+
+using namespace nncc::core::ADT;
+
+TEST(MORPH_CAFFE, as_feature_shape)
+{
+ auto shape = morph::caffe::as_feature_shape(tensor::Shape{1, 3, 4, 5});
+
+ ASSERT_EQ(shape.depth(), 3);
+ ASSERT_EQ(shape.height(), 4);
+ ASSERT_EQ(shape.width(), 5);
+}
+
+TEST(MORPH_CAFFE, as_kernel_shape)
+{
+ auto shape = morph::caffe::as_kernel_shape(tensor::Shape{2, 3, 4, 5});
+
+ ASSERT_EQ(shape.count(), 2);
+ ASSERT_EQ(shape.depth(), 3);
+ ASSERT_EQ(shape.height(), 4);
+ ASSERT_EQ(shape.width(), 5);
+}
+
+TEST(MORPH_CAFFE, as_tensor_shape)
+{
+ // From feature::Shape
+ {
+ auto shape = morph::caffe::as_tensor_shape(feature::Shape{2, 3, 4});
+
+ ASSERT_EQ(shape.rank(), 4);
+ ASSERT_EQ(shape.dim(0), 1);
+ ASSERT_EQ(shape.dim(1), 2);
+ ASSERT_EQ(shape.dim(2), 3);
+ ASSERT_EQ(shape.dim(3), 4);
+ }
+
+ // From kernel::Shape
+ {
+ auto shape = morph::caffe::as_tensor_shape(kernel::Shape{2, 3, 4, 5});
+
+ ASSERT_EQ(shape.rank(), 4);
+ ASSERT_EQ(shape.dim(0), 2);
+ ASSERT_EQ(shape.dim(1), 3);
+ ASSERT_EQ(shape.dim(2), 4);
+ ASSERT_EQ(shape.dim(3), 5);
+ }
+}
diff --git a/compiler/morph/src/dims.cpp b/compiler/morph/src/dims.cpp
new file mode 100644
index 000000000..44d407d20
--- /dev/null
+++ b/compiler/morph/src/dims.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "morph/dims.h"
+
+using namespace nncc::core::ADT;
+
+namespace morph
+{
+
+Dims<uint32_t> as_dims(const tensor::Shape &shape)
+{
+ Dims<uint32_t> res;
+
+ for (uint32_t n = 0; n < shape.rank(); ++n)
+ {
+ res.emplace_back(shape.dim(n));
+ }
+
+ return res;
+}
+
+} // namespace morph
diff --git a/compiler/morph/src/dims.test.cpp b/compiler/morph/src/dims.test.cpp
new file mode 100644
index 000000000..4a79e7bdb
--- /dev/null
+++ b/compiler/morph/src/dims.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "morph/dims.h"
+
+#include <gtest/gtest.h>
+
+using namespace nncc::core::ADT;
+
+TEST(DimsTest, as_dims_from_tensor)
+{
+ auto dims = morph::as_dims(tensor::Shape{1, 3, 4, 5});
+
+ ASSERT_EQ(dims.size(), 4);
+ ASSERT_EQ(dims.at(0), 1);
+ ASSERT_EQ(dims.at(1), 3);
+ ASSERT_EQ(dims.at(2), 4);
+ ASSERT_EQ(dims.at(3), 5);
+}
diff --git a/compiler/morph/src/nnapi.cpp b/compiler/morph/src/nnapi.cpp
new file mode 100644
index 000000000..e00be3d23
--- /dev/null
+++ b/compiler/morph/src/nnapi.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "morph/nnapi.h"
+
+#include <cassert>
+
+using namespace nncc::core::ADT;
+
+namespace morph
+{
+namespace nnapi
+{
+
+tensor::Shape as_tensor_shape(const feature::Shape &shape)
+{
+ tensor::Shape res;
+
+ res.resize(4);
+ res.dim(0) = 1;
+ res.dim(1) = shape.height();
+ res.dim(2) = shape.width();
+ res.dim(3) = shape.depth();
+
+ return res;
+}
+
+tensor::Shape as_tensor_shape(const kernel::Shape &shape)
+{
+ tensor::Shape res;
+
+ res.resize(4);
+ res.dim(0) = shape.count();
+ res.dim(1) = shape.height();
+ res.dim(2) = shape.width();
+ res.dim(3) = shape.depth();
+
+ return res;
+}
+
+feature::Shape as_feature_shape(const tensor::Shape &shape)
+{
+ assert(shape.rank() == 4);
+ assert(shape.dim(0) == 1);
+ return feature::Shape{shape.dim(3), shape.dim(1), shape.dim(2)};
+}
+
+kernel::Shape as_kernel_shape(const tensor::Shape &shape)
+{
+ assert(shape.rank() == 4);
+ return kernel::Shape{shape.dim(0), shape.dim(3), shape.dim(1), shape.dim(2)};
+}
+
+} // namespace nnapi
+} // namespace morph
diff --git a/compiler/morph/src/nnapi.test.cpp b/compiler/morph/src/nnapi.test.cpp
new file mode 100644
index 000000000..b31a69cbb
--- /dev/null
+++ b/compiler/morph/src/nnapi.test.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "morph/nnapi.h"
+
+#include <gtest/gtest.h>
+
+using namespace nncc::core::ADT;
+
+TEST(MORPH_NNAPI, as_feature_shape)
+{
+ auto shape = morph::nnapi::as_feature_shape(tensor::Shape{1, 3, 4, 5});
+
+ ASSERT_EQ(shape.depth(), 5);
+ ASSERT_EQ(shape.height(), 3);
+ ASSERT_EQ(shape.width(), 4);
+}
+
+TEST(MORPH_NNAPI, as_kernel_shape)
+{
+ auto shape = morph::nnapi::as_kernel_shape(tensor::Shape{2, 3, 4, 5});
+
+ ASSERT_EQ(shape.count(), 2);
+ ASSERT_EQ(shape.depth(), 5);
+ ASSERT_EQ(shape.height(), 3);
+ ASSERT_EQ(shape.width(), 4);
+}
+
+TEST(MORPH_NNAPI, as_tensor_shape)
+{
+ // From feature::Shape
+ {
+ auto shape = morph::nnapi::as_tensor_shape(feature::Shape{2, 3, 4});
+
+ ASSERT_EQ(shape.rank(), 4);
+ ASSERT_EQ(shape.dim(0), 1);
+ ASSERT_EQ(shape.dim(1), 3);
+ ASSERT_EQ(shape.dim(2), 4);
+ ASSERT_EQ(shape.dim(3), 2);
+ }
+
+ // From kernel::Shape
+ {
+ auto shape = morph::nnapi::as_tensor_shape(kernel::Shape{2, 3, 4, 5});
+
+ ASSERT_EQ(shape.rank(), 4);
+ ASSERT_EQ(shape.dim(0), 2);
+ ASSERT_EQ(shape.dim(1), 4);
+ ASSERT_EQ(shape.dim(2), 5);
+ ASSERT_EQ(shape.dim(3), 3);
+ }
+}
diff --git a/compiler/morph/src/tflite.cpp b/compiler/morph/src/tflite.cpp
new file mode 100644
index 000000000..a87063ed2
--- /dev/null
+++ b/compiler/morph/src/tflite.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "morph/tflite.h"
+
+#include <cassert>
+
+using namespace nncc::core::ADT;
+
+namespace morph
+{
+namespace tflite
+{
+
+tensor::Shape as_tensor_shape(const feature::Shape &shape)
+{
+ tensor::Shape res;
+
+ res.resize(4);
+ res.dim(0) = 1;
+ res.dim(1) = shape.height();
+ res.dim(2) = shape.width();
+ res.dim(3) = shape.depth();
+
+ return res;
+}
+
+tensor::Shape as_tensor_shape(const kernel::Shape &shape)
+{
+ tensor::Shape res;
+
+ res.resize(4);
+ res.dim(0) = shape.count();
+ res.dim(1) = shape.height();
+ res.dim(2) = shape.width();
+ res.dim(3) = shape.depth();
+
+ return res;
+}
+
+feature::Shape as_feature_shape(const tensor::Shape &shape)
+{
+ assert(shape.rank() == 4);
+ assert(shape.dim(0) == 1);
+ return feature::Shape{shape.dim(3), shape.dim(1), shape.dim(2)};
+}
+
+kernel::Shape as_kernel_shape(const tensor::Shape &shape)
+{
+ assert(shape.rank() == 4);
+ return kernel::Shape{shape.dim(0), shape.dim(3), shape.dim(1), shape.dim(2)};
+}
+
+} // namespace tflite
+} // namespace morph
diff --git a/compiler/morph/src/tflite.test.cpp b/compiler/morph/src/tflite.test.cpp
new file mode 100644
index 000000000..e78b61b8a
--- /dev/null
+++ b/compiler/morph/src/tflite.test.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "morph/tflite.h"
+
+#include <gtest/gtest.h>
+
+using namespace nncc::core::ADT;
+
+TEST(MORPH_TFLITE, as_feature_shape)
+{
+ auto shape = morph::tflite::as_feature_shape(tensor::Shape{1, 3, 4, 5});
+
+ ASSERT_EQ(shape.depth(), 5);
+ ASSERT_EQ(shape.height(), 3);
+ ASSERT_EQ(shape.width(), 4);
+}
+
+TEST(MORPH_TFLITE, as_kernel_shape)
+{
+ auto shape = morph::tflite::as_kernel_shape(tensor::Shape{2, 3, 4, 5});
+
+ ASSERT_EQ(shape.count(), 2);
+ ASSERT_EQ(shape.depth(), 5);
+ ASSERT_EQ(shape.height(), 3);
+ ASSERT_EQ(shape.width(), 4);
+}
+
+TEST(MORPH_TFLITE, as_tensor_shape)
+{
+ // From feature::Shape
+ {
+ auto shape = morph::tflite::as_tensor_shape(feature::Shape{2, 3, 4});
+
+ ASSERT_EQ(shape.rank(), 4);
+ ASSERT_EQ(shape.dim(0), 1);
+ ASSERT_EQ(shape.dim(1), 3);
+ ASSERT_EQ(shape.dim(2), 4);
+ ASSERT_EQ(shape.dim(3), 2);
+ }
+
+ // From kernel::Shape
+ {
+ auto shape = morph::tflite::as_tensor_shape(kernel::Shape{2, 3, 4, 5});
+
+ ASSERT_EQ(shape.rank(), 4);
+ ASSERT_EQ(shape.dim(0), 2);
+ ASSERT_EQ(shape.dim(1), 4);
+ ASSERT_EQ(shape.dim(2), 5);
+ ASSERT_EQ(shape.dim(3), 3);
+ }
+}
diff --git a/compiler/nest/CMakeLists.txt b/compiler/nest/CMakeLists.txt
new file mode 100644
index 000000000..ad6d4787c
--- /dev/null
+++ b/compiler/nest/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(core)
diff --git a/compiler/nest/README.md b/compiler/nest/README.md
new file mode 100644
index 000000000..64cfb75ed
--- /dev/null
+++ b/compiler/nest/README.md
@@ -0,0 +1,8 @@
+# nest
+
+_nest_ is a lightweight nested loop generation library, which makes it easy to generate complex,
+optimized nested loops (such as loops in conv2d).
+
+## References
+- [Halide](https://github.com/halide/Halide)
+- [Tensor Comprehension](https://github.com/facebookresearch/TensorComprehensions)
diff --git a/compiler/nest/core/CMakeLists.txt b/compiler/nest/core/CMakeLists.txt
new file mode 100644
index 000000000..b603f9ae9
--- /dev/null
+++ b/compiler/nest/core/CMakeLists.txt
@@ -0,0 +1,27 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(nest_core STATIC ${SOURCES})
+set_target_properties(nest_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(nest_core PUBLIC include)
+
+file(GLOB EXAMPLE_FILES "examples/*.cpp")
+
+foreach(EXAMPLE_FILE IN ITEMS ${EXAMPLE_FILES})
+ get_filename_component(EXAMPLE_NAME ${EXAMPLE_FILE} NAME_WE)
+ set(TARGET_NAME nest_IR_example_${EXAMPLE_NAME})
+ add_executable(${TARGET_NAME} ${EXAMPLE_FILE})
+ target_link_libraries(${TARGET_NAME} nest_core)
+endforeach(EXAMPLE_FILE)
+
+nnas_find_package(GTest QUIET)
+
+if(NOT GTest_FOUND)
+ return()
+endif(NOT GTest_FOUND)
+
+add_executable(nest_core_test ${TESTS})
+target_link_libraries(nest_core_test gtest_main)
+target_link_libraries(nest_core_test nest_core)
+add_test(nest_core_test nest_core_test)
diff --git a/compiler/nest/core/examples/conv2d.cpp b/compiler/nest/core/examples/conv2d.cpp
new file mode 100644
index 000000000..e405af9c0
--- /dev/null
+++ b/compiler/nest/core/examples/conv2d.cpp
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <nest/Module.h>
+
+int main(int, char **)
+{
+ // This example shows how to specify convolution with IFM(1x3x3) and Kernel(1x1x3x3) with nest
+ // - STRIDE is 1, and there is no padding
+ //
+ // The below code corresponds to the following nest DSL code:
+ // ----------------------------------------------------------------------------------------------
+ // Domain ofm(1, 1, 1)
+ // Domain ifm(1, 3, 3)
+ // Domain ker(1, 1, 3, 3)
+ //
+ // Var ofm_ch : { min = 0, max = 1 }
+ // Var ofm_row : { min = 0, max = 1 }
+ // Var ofm_col : { min = 0, max = 1 }
+ // Var ker_ch : { min = 0, max = 1 }
+ // Var ker_row : { min = 0, max = 3 }
+ // Var ker_col : { min = 0, max = 3 }
+ //
+ // PUSH ifm(ker_ch, ker_row, ker_col) * ker(ofm_ch, ker_ch, ofm_row + ker_row, ofm_col + ker_col)
+ // RET ofm(ofm_ch, ofm_row, ofm_col)
+ // ----------------------------------------------------------------------------------------------
+ //
+ // The first part declares Domain(s) which corresponds to a multi-dimensional array in C-style
+ // (without type). For example, 'Domain ofm(1, 3, 3)' corresponds to the
+ // following C array declaration.
+ // float ofm[1][3][3];
+ // (Here we assume that domain type is 'float')
+ //
+ // The second part declares Var(s) which serves as a loop iteration variable. Basically, each
+ // variable emits one for loop and these loops are nested. As there are 6 variables in the above
+ // example, there will be 6 nested-loops.
+ //
+ // Each variable has a corresponding bound, and the bound of each variable states the starting /
+ // termination condition. For example, 'Var ofm_ch : { min = 0, max = 1 }' will introduce the
+ // following for loop:
+ // ----------------------------------------------------------------------------------------------
+ // for (int ofm_ch = 0; ofm_ch < 1; ++ofm_ch) { ... }
+ // ----------------------------------------------------------------------------------------------
+ //
+ // The last part declares statement(s) which state the computation performed inside these nested
+ // loops. Nest is stack-based. There is a virtual stack inside nested loop, and the evaluation of
+ // each statement will update this stack.
+ //
+ // Each nest code has one return statement (RET). This return statement specifies where to write
+ // the computed result.
+ //
+ // PUSH 'expr' statement evaluates an arithmetic expression (specified by 'expr') and pushes the
+ // numeric result to the stack. When PUSH statement evaluates an arithmetic expression, variables
+ // that do not appear in RET statement are treated as reduction variables. For example,
+ // ker_ch, ker_row, and ker_col do not appear in RET statement. So, PUSH '...' statement in the
+ // above example corresponds to the following nested loops:
+ // ----------------------------------------------------------------------------------------------
+ // float value = 0.0f;
+ //
+ // for (int ker_ch = 0; ker_ch < 1; ++ker_ch) {
+ // for (int ker_row = 0; ker_row < 3; ++ker_row) {
+ // for (int ker_col = 0; ker_col < 3; ++ker_col) {
+ // float ifm_value = ifm[ker_ch][ofm_row + ker_row][ofm_col + ker_col];
+ // float ker_value = ker[ofm_ch][ker_ch][ker_row][ker_col];
+ // value += ifm_value * ker_value;
+ // }
+ // }
+ // }
+ // ----------------------------------------------------------------------------------------------
+ //
+ // In summary, the above nest example corresponds to the following 2D convolution:
+ // ----------------------------------------------------------------------------------------------
+ // float ofm[1][1][1];
+ // float ifm[1][3][3];
+ // float ker[1][1][3][3];
+ //
+ // for (int ofm_ch = 0; ofm_ch < 1; ++ofm_ch) {
+ // for (int ofm_row = 0; ofm_row < 1; ++ofm_row) {
+ // for (int ofm_col = 0; ofm_col < 1; ++ofm_col) {
+ // float value = 0.0f;
+ //
+ // for (int ker_ch = 0; ker_ch < 1; ++ker_ch) {
+ // for (int ker_row = 0; ker_row < 3; ++ker_row) {
+ // for (int ker_col = 0; ker_col < 3; ++ker_col) {
+ // float ifm_value = ifm[ker_ch][ofm_row + ker_row][ofm_col + ker_col];
+ // float ker_value = ker[ofm_ch][ker_ch][ker_row][ker_col];
+ // value += ifm_value * ker_value;
+ // }
+ // }
+ // }
+ //
+ // ofm[ofm_ch][ofm_col][ofm_row] = value;
+ // }
+ // }
+ // }
+ // ----------------------------------------------------------------------------------------------
+ //
+ nest::Module m;
+
+ //
+ // Domains
+ //
+ auto ofm = m.domain().make({1 /*C*/, 1 /*H*/, 1 /*W*/});
+ auto ifm = m.domain().make({1 /*C*/, 3 /*H*/, 3 /*W*/});
+ auto ker = m.domain().make({1 /*N*/, 1 /*C*/, 3 /*H*/, 3 /*W*/});
+
+ //
+ // Variables
+ //
+ auto ofm_ch = m.var().make();
+ auto ofm_row = m.var().make();
+ auto ofm_col = m.var().make();
+
+ auto ker_ch = m.var().make();
+ auto ker_row = m.var().make();
+ auto ker_col = m.var().make();
+
+ // Declare the bound of each variables
+ using nest::Bound;
+
+ m.var().bound(ofm_ch) = Bound{0, 1};
+ m.var().bound(ofm_row) = Bound{0, 1};
+ m.var().bound(ofm_col) = Bound{0, 1};
+
+ m.var().bound(ker_ch) = Bound{0, 1};
+ m.var().bound(ker_row) = Bound{0, 3};
+ m.var().bound(ker_col) = Bound{0, 3};
+
+ //
+ // Statement
+ //
+ auto ifm_value = ifm(ker_ch, ofm_row + ker_row, ofm_col + ker_col);
+ auto ker_value = ker(ofm_ch, ker_ch, ker_row, ker_col);
+
+ m.push(ifm_value * ker_value);
+ m.ret(ofm(ofm_ch, ofm_row, ofm_col));
+
+ return 0;
+}
diff --git a/compiler/nest/core/include/nest/Block.h b/compiler/nest/core/include/nest/Block.h
new file mode 100644
index 000000000..e9b646f4e
--- /dev/null
+++ b/compiler/nest/core/include/nest/Block.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_BLOCK_H__
+#define __NEST_BLOCK_H__
+
+#include "nest/Stmt.h"
+
+#include <vector>
+
+#include <cstdint>
+
+namespace nest
+{
+
+struct Block
+{
+public:
+ uint32_t size(void) const { return _stmts.size(); }
+
+public:
+ const Stmt &at(uint32_t n) const { return _stmts.at(n); }
+
+public:
+ void append(const Stmt &stmt) { _stmts.emplace_back(stmt); }
+
+private:
+ std::vector<Stmt> _stmts;
+};
+
+} // namespace nest
+
+#endif // __NEST_BLOCK_H__
diff --git a/compiler/nest/core/include/nest/Bound.h b/compiler/nest/core/include/nest/Bound.h
new file mode 100644
index 000000000..252a69e85
--- /dev/null
+++ b/compiler/nest/core/include/nest/Bound.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_BOUND_H__
+#define __NEST_BOUND_H__
+
+#include <cstdint>
+
+namespace nest
+{
+
+class Bound
+{
+public:
+ Bound() = default;
+
+public:
+ Bound(int64_t min, int64_t max) : _min{min}, _max{max}
+ {
+ // DO NOTHING
+ }
+
+public:
+ int64_t min(void) const { return _min; }
+ int64_t max(void) const { return _max; }
+
+private:
+ int64_t _min;
+ int64_t _max;
+};
+
+} // namespace nest
+
+#endif // __NEST_BOUND_H__
diff --git a/compiler/nest/core/include/nest/Closure.h b/compiler/nest/core/include/nest/Closure.h
new file mode 100644
index 000000000..7e09afa1a
--- /dev/null
+++ b/compiler/nest/core/include/nest/Closure.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_CLOSURE_H__
+#define __NEST_CLOSURE_H__
+
+#include "nest/DomainID.h"
+#include "nest/Expr.h"
+
+namespace nest
+{
+
+class Closure
+{
+public:
+ template <typename... Args>
+ Closure(const DomainID &id, Args &&... indices) : _id{id}, _sub{std::forward<Args>(indices)...}
+ {
+ // DO NOTHING
+ }
+
+public:
+ operator Expr() const;
+
+public:
+ const DomainID &id(void) const { return _id; }
+ const expr::Subscript &sub(void) const { return _sub; }
+
+private:
+ DomainID const _id;
+ expr::Subscript const _sub;
+};
+
+} // namespace nest
+
+#endif // __NEST_CLOSURE_H__
diff --git a/compiler/nest/core/include/nest/Domain.h b/compiler/nest/core/include/nest/Domain.h
new file mode 100644
index 000000000..8d809e2ad
--- /dev/null
+++ b/compiler/nest/core/include/nest/Domain.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_DOMAIN_H__
+#define __NEST_DOMAIN_H__
+
+#include "nest/Closure.h"
+
+namespace nest
+{
+
+class Domain
+{
+public:
+ Domain() = default;
+
+public:
+ Domain(const DomainID &id) : _id{id}
+ {
+ // DO NOTHING
+ }
+
+public:
+ Domain(const Domain &) = default;
+
+public:
+ template <typename... Args> Closure operator()(Args &&... indices)
+ {
+ return Closure{_id, std::forward<Args>(indices)...};
+ }
+
+public:
+ const DomainID &id(void) const { return _id; }
+
+private:
+ DomainID const _id;
+};
+
+} // namespace nest
+
+#endif // __NEST_DOMAIN_H__
diff --git a/compiler/nest/core/include/nest/DomainContext.h b/compiler/nest/core/include/nest/DomainContext.h
new file mode 100644
index 000000000..425accb93
--- /dev/null
+++ b/compiler/nest/core/include/nest/DomainContext.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_DOMAIN_CONTEXT_H__
+#define __NEST_DOMAIN_CONTEXT_H__
+
+#include "nest/DomainInfo.h"
+#include "nest/Domain.h"
+
+#include <vector>
+
+namespace nest
+{
+
+class DomainContext
+{
+public:
+ uint32_t count(void) const;
+
+public:
+ Domain make(std::initializer_list<uint32_t> dims);
+
+public:
+ const DomainInfo &info(const Domain &) const;
+
+private:
+ std::vector<DomainInfo> _info;
+};
+
+} // namespace nest
+
+#endif // __NEST_DOMAIN_CONTEXT_H__
diff --git a/compiler/nest/core/include/nest/DomainID.h b/compiler/nest/core/include/nest/DomainID.h
new file mode 100644
index 000000000..935680f58
--- /dev/null
+++ b/compiler/nest/core/include/nest/DomainID.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_DOMAIN_ID_H__
+#define __NEST_DOMAIN_ID_H__
+
+#include <cstdint>
+
+namespace nest
+{
+
+class DomainID
+{
+public:
+ DomainID();
+
+public:
+ explicit DomainID(uint32_t value) : _value{value}
+ {
+ // DO NOTHING
+ }
+
+public:
+ DomainID(const DomainID &vid) : _value{vid._value}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t value(void) const { return _value; }
+
+private:
+ uint32_t _value;
+};
+
+bool operator==(const DomainID &lhs, const DomainID &rhs);
+bool operator<(const DomainID &lhs, const DomainID &rhs);
+
+} // namespace nest
+
+#endif // __NEST_DOMAIN_ID_H__
diff --git a/compiler/nest/core/include/nest/DomainInfo.h b/compiler/nest/core/include/nest/DomainInfo.h
new file mode 100644
index 000000000..ac8f04e8f
--- /dev/null
+++ b/compiler/nest/core/include/nest/DomainInfo.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_DOMAIN_INFO_H__
+#define __NEST_DOMAIN_INFO_H__
+
+#include <initializer_list>
+#include <vector>
+
+#include <cstdint>
+
+namespace nest
+{
+
+class DomainInfo
+{
+public:
+ DomainInfo(std::initializer_list<uint32_t> dims) : _dims{dims}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t rank(void) const { return _dims.size(); }
+
+public:
+ uint32_t dim(uint32_t axis) const { return _dims.at(axis); }
+
+private:
+ std::vector<uint32_t> _dims;
+};
+
+} // namespace nest
+
+#endif // __NEST_DOMAIN_INFO_H__
diff --git a/compiler/nest/core/include/nest/Expr.h b/compiler/nest/core/include/nest/Expr.h
new file mode 100644
index 000000000..520edcd30
--- /dev/null
+++ b/compiler/nest/core/include/nest/Expr.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_EXPR_H__
+#define __NEST_EXPR_H__
+
+#include "nest/expr/Node.h"
+#include "nest/expr/Visitor.h"
+
+#include "nest/expr/VarNode.h"
+#include "nest/expr/DerefNode.h"
+#include "nest/expr/AddNode.h"
+#include "nest/expr/MulNode.h"
+
+#include <memory>
+
+namespace nest
+{
+
+// WARNING All of the descendant of expr::Node SHOULD BE immutable
+//
+// The copy/move constructor of Block and Module class simply copies their shared pointer under
+// the assumption that these classes are immutable.
+using Expr = std::shared_ptr<nest::expr::Node>;
+
+} // namespace nest
+
+nest::Expr operator+(const nest::Expr &, const nest::Expr &);
+nest::Expr operator*(const nest::Expr &, const nest::Expr &);
+
+#endif // __NEST_EXPR_H__
diff --git a/compiler/nest/core/include/nest/FV.h b/compiler/nest/core/include/nest/FV.h
new file mode 100644
index 000000000..089f1596b
--- /dev/null
+++ b/compiler/nest/core/include/nest/FV.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_FV_H__
+#define __NEST_FV_H__
+
+#include "nest/VarID.h"
+
+#include "nest/Expr.h"
+
+#include <set>
+
+namespace nest
+{
+
+/**
+ * @brief Collect free variables from nest expr/stmt/block/...
+ */
+struct FV
+{
+ static std::set<VarID> in(const Expr &);
+};
+
+} // namespace nest
+
+#endif // __NEST_FV_H__
diff --git a/compiler/nest/core/include/nest/Level.h b/compiler/nest/core/include/nest/Level.h
new file mode 100644
index 000000000..ac1788636
--- /dev/null
+++ b/compiler/nest/core/include/nest/Level.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_LEVEL_H__
+#define __NEST_LEVEL_H__
+
+#include <cstdint>
+
+namespace nest
+{
+class Level final
+{
+public:
+ Level();
+ explicit Level(uint32_t value);
+
+public:
+ bool valid(void) const;
+
+public:
+ uint32_t value(void) const { return _value; }
+
+private:
+ uint32_t _value;
+};
+
+bool operator==(const Level &lhs, const Level &rhs);
+bool operator<(const Level &lhs, const Level &rhs);
+
+} // namespace nest
+
+#endif // __NEST_LEVEL_H__
diff --git a/compiler/nest/core/include/nest/Module.h b/compiler/nest/core/include/nest/Module.h
new file mode 100644
index 000000000..1294e3089
--- /dev/null
+++ b/compiler/nest/core/include/nest/Module.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_MODULE_H__
+#define __NEST_MODULE_H__
+
+#include "nest/VarContext.h"
+#include "nest/DomainContext.h"
+#include "nest/Ret.h"
+#include "nest/Block.h"
+
+namespace nest
+{
+
+class Module
+{
+public:
+ Module() = default;
+
+private:
+ VarContext _var_ctx;
+
+public:
+ VarContext &var(void) { return _var_ctx; }
+ const VarContext &var(void) const { return _var_ctx; }
+
+private:
+ DomainContext _domain_ctx;
+
+public:
+ DomainContext &domain(void) { return _domain_ctx; }
+ const DomainContext &domain(void) const { return _domain_ctx; }
+
+private:
+ Block _block;
+
+public:
+ const Block &block(void) const { return _block; }
+
+public:
+ void push(const Expr &expr);
+
+private:
+ std::shared_ptr<Ret> _ret;
+
+public:
+ // NOTE Do NOT invoke ret() before ret(expr) call
+ const Ret &ret(void) const;
+
+public:
+ // NOTE Only one ret(expr) call is allowed for each module
+ void ret(const Closure &closure);
+};
+
+} // namespace nest
+
+#endif // __NEST_MODULE_H__
diff --git a/compiler/nest/core/include/nest/Ret.h b/compiler/nest/core/include/nest/Ret.h
new file mode 100644
index 000000000..8f8af15e2
--- /dev/null
+++ b/compiler/nest/core/include/nest/Ret.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_RET_H__
+#define __NEST_RET_H__
+
+#include "nest/DomainID.h"
+#include "nest/Expr.h"
+
+namespace nest
+{
+
+// WARNING Ret SHOULD BE immutable
+//
+// The copy/move constructor of Module class simply copies the shared pointer under the assumption
+// that Ret is immutable.
+class Ret
+{
+public:
+ Ret(const DomainID &id, const expr::Subscript &sub) : _id{id}, _sub{sub}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const DomainID &id(void) const { return _id; }
+ const expr::Subscript &sub(void) const { return _sub; }
+
+private:
+ DomainID _id;
+ expr::Subscript _sub;
+};
+
+} // namespace nest
+
+#endif // __NEST_RET_H__
diff --git a/compiler/nest/core/include/nest/Schedule.h b/compiler/nest/core/include/nest/Schedule.h
new file mode 100644
index 000000000..a92c245c3
--- /dev/null
+++ b/compiler/nest/core/include/nest/Schedule.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_SCHEDULE_H__
+#define __NEST_SCHEDULE_H__
+
+#include "nest/Module.h"
+#include "nest/Level.h"
+
+#include <vector>
+
+namespace nest
+{
+
+class Schedule final
+{
+public:
+ explicit Schedule(const Module &);
+
+public:
+ const VarContext &var(void) const { return _module.var(); }
+ const DomainContext &domain(void) const { return _module.domain(); }
+ const Block &block(void) const { return _module.block(); }
+ const Ret &ret(void) const { return _module.ret(); }
+
+public:
+ Var at(const Level &) const;
+ Level level(const Var &) const;
+
+private:
+ Module _module;
+
+private:
+ std::vector<VarID> _level;
+};
+
+} // namespace nest
+
+#endif // __NEST_SCHEDULE_H___
diff --git a/compiler/nest/core/include/nest/Stmt.h b/compiler/nest/core/include/nest/Stmt.h
new file mode 100644
index 000000000..4fff26ffa
--- /dev/null
+++ b/compiler/nest/core/include/nest/Stmt.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_STMT_H__
+#define __NEST_STMT_H__
+
+#include "nest/stmt/Node.h"
+#include "nest/stmt/Visitor.h"
+
+#include "nest/stmt/PushNode.h"
+
+#include <memory>
+
+namespace nest
+{
+
+using Stmt = std::shared_ptr<stmt::Node>;
+
+} // namespace nest
+
+#endif // __NEST_STMT_H__
diff --git a/compiler/nest/core/include/nest/Var.h b/compiler/nest/core/include/nest/Var.h
new file mode 100644
index 000000000..bb2c6c786
--- /dev/null
+++ b/compiler/nest/core/include/nest/Var.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_VAR_H__
+#define __NEST_VAR_H__
+
+#include "nest/VarID.h"
+#include "nest/Expr.h"
+
+namespace nest
+{
+
+class Var
+{
+public:
+ Var() = default;
+
+public:
+ Var(const VarID &id) : _id{id}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const VarID &id(void) const { return _id; }
+
+public:
+ operator Expr(void) const;
+
+private:
+ VarID const _id;
+};
+
+} // namespace nest
+
+#endif // __NEST_VAR_H__
diff --git a/compiler/nest/core/include/nest/VarContext.h b/compiler/nest/core/include/nest/VarContext.h
new file mode 100644
index 000000000..2438abfce
--- /dev/null
+++ b/compiler/nest/core/include/nest/VarContext.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_VAR_CONTEXT_H__
+#define __NEST_VAR_CONTEXT_H__
+
+#include "nest/Bound.h"
+#include "nest/Var.h"
+
+#include <vector>
+
+namespace nest
+{
+
+class VarContext
+{
+public:
+ uint32_t count(void) const;
+
+public:
+ Var make(void);
+
+public:
+ Bound &bound(const Var &);
+ const Bound &bound(const Var &) const;
+
+private:
+ std::vector<Bound> _bound;
+};
+
+} // namespace nest
+
+#endif // __NEST_VAR_CONTEXT_H__
diff --git a/compiler/nest/core/include/nest/VarID.h b/compiler/nest/core/include/nest/VarID.h
new file mode 100644
index 000000000..daf3e9ee4
--- /dev/null
+++ b/compiler/nest/core/include/nest/VarID.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_VAR_ID_H__
+#define __NEST_VAR_ID_H__
+
+#include <cstdint>
+
+namespace nest
+{
+
+class VarID
+{
+public:
+ VarID();
+
+public:
+ explicit VarID(uint32_t value) : _value{value}
+ {
+ // DO NOTHING
+ }
+
+public:
+ VarID(const VarID &vid) : _value{vid._value}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t value(void) const { return _value; }
+
+private:
+ uint32_t _value;
+};
+
+bool operator==(const VarID &lhs, const VarID &rhs);
+bool operator<(const VarID &lhs, const VarID &rhs);
+
+} // namespace nest
+
+#endif // __NEST_VAR_ID_H__
diff --git a/compiler/nest/core/include/nest/expr/AddNode.h b/compiler/nest/core/include/nest/expr/AddNode.h
new file mode 100644
index 000000000..b9b5afb22
--- /dev/null
+++ b/compiler/nest/core/include/nest/expr/AddNode.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_EXPR_ADD_NODE_H__
+#define __NEST_EXPR_ADD_NODE_H__
+
+#include "nest/expr/Node.h"
+
+#include <memory>
+
+namespace nest
+{
+namespace expr
+{
+
+class AddNode final : public Node
+{
+public:
+ AddNode(const std::shared_ptr<expr::Node> &lhs, const std::shared_ptr<expr::Node> &rhs)
+ : _lhs{lhs}, _rhs{rhs}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const AddNode *asAdd(void) const override { return this; }
+
+public:
+ const std::shared_ptr<expr::Node> &lhs(void) const { return _lhs; }
+ const std::shared_ptr<expr::Node> &rhs(void) const { return _rhs; }
+
+private:
+ std::shared_ptr<expr::Node> const _lhs;
+ std::shared_ptr<expr::Node> const _rhs;
+};
+
+} // namespace expr
+} // namespace nest
+
+#endif // __NEST_EXPR_ADD_NODE_H__
diff --git a/compiler/nest/core/include/nest/expr/DerefNode.h b/compiler/nest/core/include/nest/expr/DerefNode.h
new file mode 100644
index 000000000..19adfe3b3
--- /dev/null
+++ b/compiler/nest/core/include/nest/expr/DerefNode.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEXT_EXPR_DEREF_NODE_H__
+#define __NEXT_EXPR_DEREF_NODE_H__
+
+#include "nest/DomainID.h"
+
+#include "nest/expr/Subscript.h"
+
+namespace nest
+{
+namespace expr
+{
+
+class DerefNode final : public Node
+{
+public:
+ template <typename... Args>
+ DerefNode(const DomainID &id, Args &&... indicies)
+ : _id{id}, _sub{std::forward<Args>(indicies)...}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const DerefNode *asDeref(void) const override { return this; }
+
+public:
+ const DomainID &id(void) const { return _id; }
+ const Subscript &sub(void) const { return _sub; }
+
+private:
+ DomainID const _id;
+ Subscript const _sub;
+};
+
+} // namespace expr
+} // namespace nest
+
+#endif // __NEXT_EXPR_DEREF_NODE_H__
diff --git a/compiler/nest/core/include/nest/expr/Forward.h b/compiler/nest/core/include/nest/expr/Forward.h
new file mode 100644
index 000000000..ae15b13b4
--- /dev/null
+++ b/compiler/nest/core/include/nest/expr/Forward.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_EXPR_FORWARD_H__
+#define __NEST_EXPR_FORWARD_H__
+
+#include "nest/expr/Macro.h"
+
+namespace nest
+{
+namespace expr
+{
+
+#define EXPR(Tag) class NEST_EXPR_CLASS_NAME(Tag);
+#include "nest/expr/Node.def"
+#undef EXPR
+
+} // namespace expr
+} // namespace nest
+
+#endif // __NEST_EXPR_FORWARD_H__
diff --git a/compiler/nest/core/include/nest/expr/Macro.h b/compiler/nest/core/include/nest/expr/Macro.h
new file mode 100644
index 000000000..dca8bd344
--- /dev/null
+++ b/compiler/nest/core/include/nest/expr/Macro.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_EXPR_MACRO_H__
+#define __NEST_EXPR_MACRO_H__
+
+#ifndef NEST_EXPR_CLASS_NAME
+#define NEST_EXPR_CLASS_NAME(Tag) Tag##Node
+#endif // NEST_EXPR_CLASS_NAME
+
+#ifndef NEST_EXPR_CAST_METHOD_NAME
+#define NEST_EXPR_CAST_METHOD_NAME(Tag) as##Tag
+#endif // NEST_EXPR_CAST_METHOD_NAME
+
+#endif // __NEST_EXPR_MACRO_H__
diff --git a/compiler/nest/core/include/nest/expr/MulNode.h b/compiler/nest/core/include/nest/expr/MulNode.h
new file mode 100644
index 000000000..f388b33a3
--- /dev/null
+++ b/compiler/nest/core/include/nest/expr/MulNode.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_EXPR_MUL_NODE_H__
+#define __NEST_EXPR_MUL_NODE_H__
+
+#include "nest/expr/Node.h"
+
+#include <memory>
+
+namespace nest
+{
+namespace expr
+{
+
+class MulNode final : public Node
+{
+public:
+ MulNode(const std::shared_ptr<expr::Node> &lhs, const std::shared_ptr<expr::Node> &rhs)
+ : _lhs{lhs}, _rhs{rhs}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const MulNode *asMul(void) const override { return this; }
+
+public:
+ const std::shared_ptr<expr::Node> &lhs(void) const { return _lhs; }
+ const std::shared_ptr<expr::Node> &rhs(void) const { return _rhs; }
+
+private:
+ std::shared_ptr<expr::Node> const _lhs;
+ std::shared_ptr<expr::Node> const _rhs;
+};
+
+} // namespace expr
+} // namespace nest
+
+#endif // __NEST_EXPR_MUL_NODE_H__
diff --git a/compiler/nest/core/include/nest/expr/Node.def b/compiler/nest/core/include/nest/expr/Node.def
new file mode 100644
index 000000000..0429517a0
--- /dev/null
+++ b/compiler/nest/core/include/nest/expr/Node.def
@@ -0,0 +1,9 @@
+#ifndef EXPR
+#error EXPR is should be defined before include this file
+#endif
+
+// EXPR(Tag)
+EXPR(Var)
+EXPR(Deref)
+EXPR(Add)
+EXPR(Mul)
diff --git a/compiler/nest/core/include/nest/expr/Node.h b/compiler/nest/core/include/nest/expr/Node.h
new file mode 100644
index 000000000..d187c25fa
--- /dev/null
+++ b/compiler/nest/core/include/nest/expr/Node.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_EXPR_NODE_H__
+#define __NEST_EXPR_NODE_H__
+
+#include "nest/expr/Macro.h"
+#include "nest/expr/Forward.h"
+#include "nest/expr/Visitor.h"
+
+#include <stdexcept>
+
+namespace nest
+{
+namespace expr
+{
+
+struct Node
+{
+ virtual ~Node() = default;
+
+#define EXPR(Tag) \
+ virtual const NEST_EXPR_CLASS_NAME(Tag) * NEST_EXPR_CAST_METHOD_NAME(Tag)(void) const \
+ { \
+ return nullptr; \
+ }
+#include "nest/expr/Node.def"
+#undef EXPR
+
+ template <typename T> T accept(Visitor<T> *v)
+ {
+#define EXPR(Tag) \
+ if (auto e = NEST_EXPR_CAST_METHOD_NAME(Tag)()) \
+ { \
+ return v->visit(e); \
+ }
+#include "nest/expr/Node.def"
+#undef EXPR
+
+ throw std::runtime_error{"unreachable"};
+ }
+
+ template <typename T> T accept(Visitor<T> &v) { return accept(&v); }
+};
+
+} // namespace expr
+} // namespace nest
+
+#endif // __NEST_EXPR_NODE_H__
diff --git a/compiler/nest/core/include/nest/expr/Subscript.h b/compiler/nest/core/include/nest/expr/Subscript.h
new file mode 100644
index 000000000..1670b8094
--- /dev/null
+++ b/compiler/nest/core/include/nest/expr/Subscript.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_SUBSCRIPT_H__
+#define __NEST_SUBSCRIPT_H__
+
+#include "nest/expr/Node.h"
+
+#include <vector>
+#include <initializer_list>
+
+#include <memory>
+
+namespace nest
+{
+namespace expr
+{
+
+class Subscript
+{
+public:
+ Subscript(std::initializer_list<std::shared_ptr<Node>> indices) : _indices{indices}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t rank(void) const { return _indices.size(); }
+
+public:
+ const std::shared_ptr<expr::Node> &at(uint32_t n) const { return _indices.at(n); }
+
+private:
+ std::vector<std::shared_ptr<expr::Node>> _indices;
+};
+
+} // namespace expr
+} // namespace nest
+
+#endif // __NEST_SUBSCRIPT_H__
diff --git a/compiler/nest/core/include/nest/expr/VarNode.h b/compiler/nest/core/include/nest/expr/VarNode.h
new file mode 100644
index 000000000..0767fed41
--- /dev/null
+++ b/compiler/nest/core/include/nest/expr/VarNode.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEXT_EXPR_VAR_NODE_H__
+#define __NEXT_EXPR_VAR_NODE_H__
+
+#include "nest/VarID.h"
+
+#include "nest/expr/Forward.h"
+#include "nest/expr/Visitor.h"
+#include "nest/expr/Node.h"
+
+namespace nest
+{
+namespace expr
+{
+
+class VarNode final : public Node
+{
+public:
+ VarNode(const VarID &id) : _id{id}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const VarNode *asVar(void) const override { return this; }
+
+public:
+ const VarID &id(void) const { return _id; }
+
+private:
+ VarID const _id;
+};
+
+} // namespace expr
+} // namespace nest
+
+#endif // __NEXT_EXPR_VAR_NODE_H__
diff --git a/compiler/nest/core/include/nest/expr/Visitor.h b/compiler/nest/core/include/nest/expr/Visitor.h
new file mode 100644
index 000000000..84f70e9f4
--- /dev/null
+++ b/compiler/nest/core/include/nest/expr/Visitor.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_EXPR_VISITOR_H__
+#define __NEST_EXPR_VISITOR_H__
+
+#include "nest/expr/Macro.h"
+#include "nest/expr/Forward.h"
+
+namespace nest
+{
+namespace expr
+{
+
+template <typename T> struct Visitor
+{
+ virtual ~Visitor() = default;
+
+#define EXPR(Tag) virtual T visit(const NEST_EXPR_CLASS_NAME(Tag) *) = 0;
+#include "nest/expr/Node.def"
+#undef EXPR
+};
+
+} // namespace expr
+} // namespace nest
+
+#endif // __NEST_EXPR_VISITOR_H__
diff --git a/compiler/nest/core/include/nest/stmt/Forward.h b/compiler/nest/core/include/nest/stmt/Forward.h
new file mode 100644
index 000000000..349898e42
--- /dev/null
+++ b/compiler/nest/core/include/nest/stmt/Forward.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_STMT_FORWARD_H__
+#define __NEST_STMT_FORWARD_H__
+
+#include "nest/stmt/Macro.h"
+
+namespace nest
+{
+namespace stmt
+{
+
+#define STMT(Tag) class NEST_STMT_CLASS_NAME(Tag);
+#include "nest/stmt/Node.def"
+#undef STMT
+
+} // namespace stmt
+} // namespace nest
+
+#endif // __NEST_STMT_FORWARD_H__
diff --git a/compiler/nest/core/include/nest/stmt/Macro.h b/compiler/nest/core/include/nest/stmt/Macro.h
new file mode 100644
index 000000000..f8cd8941e
--- /dev/null
+++ b/compiler/nest/core/include/nest/stmt/Macro.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_STMT_MACRO_H__
+#define __NEST_STMT_MACRO_H__
+
+#ifndef NEST_STMT_CLASS_NAME
+#define NEST_STMT_CLASS_NAME(Tag) Tag##Node
+#endif // NEST_STMT_CLASS_NAME
+
+#ifndef NEST_STMT_CAST_METHOD_NAME
+#define NEST_STMT_CAST_METHOD_NAME(Tag) as##Tag
+#endif // STMT_CAST_METHOD_NAME
+
+#endif // __NEST_STMT_MACRO_H__
diff --git a/compiler/nest/core/include/nest/stmt/Node.def b/compiler/nest/core/include/nest/stmt/Node.def
new file mode 100644
index 000000000..dcec9147f
--- /dev/null
+++ b/compiler/nest/core/include/nest/stmt/Node.def
@@ -0,0 +1,6 @@
+#ifndef STMT
+#error STMT is should be defined before including this file
+#endif
+
+// STMT(Tag)
+STMT(Push)
diff --git a/compiler/nest/core/include/nest/stmt/Node.h b/compiler/nest/core/include/nest/stmt/Node.h
new file mode 100644
index 000000000..593eb68a1
--- /dev/null
+++ b/compiler/nest/core/include/nest/stmt/Node.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_STMT_NODE_H__
+#define __NEST_STMT_NODE_H__
+
+#include "nest/stmt/Macro.h"
+#include "nest/stmt/Forward.h"
+#include "nest/stmt/Visitor.h"
+
+#include <stdexcept>
+
+namespace nest
+{
+namespace stmt
+{
+
+struct Node
+{
+ virtual ~Node() = default;
+
+#define STMT(Tag) \
+ virtual const NEST_STMT_CLASS_NAME(Tag) * NEST_STMT_CAST_METHOD_NAME(Tag)(void) const \
+ { \
+ return nullptr; \
+ }
+#include "nest/stmt/Node.def"
+#undef STMT
+
+ template <typename T> T accept(Visitor<T> *v)
+ {
+#define STMT(Tag) \
+ if (auto s = NEST_STMT_CAST_METHOD_NAME(Tag)()) \
+ { \
+ return v->visit(s); \
+ }
+#include "nest/stmt/Node.def"
+#undef STMT
+
+ throw std::runtime_error{"unreachable"};
+ }
+
+ template <typename T> T accept(Visitor<T> &v) { return accept(&v); }
+};
+
+} // namespace stmt
+} // namespace nest
+
+#endif // __NEST_STMT_NODE_H__
diff --git a/compiler/nest/core/include/nest/stmt/PushNode.h b/compiler/nest/core/include/nest/stmt/PushNode.h
new file mode 100644
index 000000000..8a6b3cd91
--- /dev/null
+++ b/compiler/nest/core/include/nest/stmt/PushNode.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_STMT_PUSH_NODE_H__
+#define __NEST_STMT_PUSH_NODE_H__
+
+#include "nest/stmt/Node.h"
+#include "nest/Expr.h"
+
+namespace nest
+{
+namespace stmt
+{
+
+class PushNode final : public Node
+{
+public:
+ PushNode(const Expr &expr) : _expr{expr}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const PushNode *asPush(void) const override { return this; }
+
+public:
+ const Expr &expr(void) const { return _expr; }
+
+private:
+ Expr const _expr;
+};
+
+} // namespace stmt
+} // namespace nest
+
+#endif // __NEST_STMT_PUSH_NODE_H__
diff --git a/compiler/nest/core/include/nest/stmt/Visitor.h b/compiler/nest/core/include/nest/stmt/Visitor.h
new file mode 100644
index 000000000..459f0c9d6
--- /dev/null
+++ b/compiler/nest/core/include/nest/stmt/Visitor.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_STMT_VISITOR_H__
+#define __NEST_STMT_VISITOR_H__
+
+#include "nest/stmt/Macro.h"
+#include "nest/stmt/Forward.h"
+
+namespace nest
+{
+namespace stmt
+{
+
+template <typename T> struct Visitor
+{
+ virtual ~Visitor() = default;
+
+#define STMT(Tag) virtual T visit(const NEST_STMT_CLASS_NAME(Tag) *) = 0;
+#include "nest/stmt/Node.def"
+#undef STMT
+};
+
+} // namespace stmt
+} // namespace nest
+
+#endif // __NEST_STMT_VISITOR_H__
diff --git a/compiler/nest/core/src/Block.test.cpp b/compiler/nest/core/src/Block.test.cpp
new file mode 100644
index 000000000..b40fbeaac
--- /dev/null
+++ b/compiler/nest/core/src/Block.test.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/Block.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+struct DummyNode final : public nest::stmt::Node
+{
+ // Dummy Node for testing
+};
+}
+
+TEST(BLOCK, use_case_1)
+{
+ nest::Block block;
+
+ ASSERT_EQ(block.size(), 0);
+
+ auto stmt = std::make_shared<DummyNode>();
+
+ block.append(stmt);
+
+ ASSERT_EQ(block.size(), 1);
+ ASSERT_EQ(block.at(0), stmt);
+}
diff --git a/compiler/nest/core/src/Bound.test.cpp b/compiler/nest/core/src/Bound.test.cpp
new file mode 100644
index 000000000..7b2f0b62e
--- /dev/null
+++ b/compiler/nest/core/src/Bound.test.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/Bound.h"
+
+#include <gtest/gtest.h>
+
+TEST(BOUND, ctor)
+{
+ const nest::Bound b{-10, 20};
+
+ ASSERT_EQ(b.min(), -10);
+ ASSERT_EQ(b.max(), 20);
+}
diff --git a/compiler/nest/core/src/Closure.cpp b/compiler/nest/core/src/Closure.cpp
new file mode 100644
index 000000000..253a612f8
--- /dev/null
+++ b/compiler/nest/core/src/Closure.cpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/Closure.h"
+
+namespace nest
+{
+
+Closure::operator Expr(void) const { return std::make_shared<expr::DerefNode>(_id, _sub); }
+
+} // namespace nest
diff --git a/compiler/nest/core/src/Closure.test.cpp b/compiler/nest/core/src/Closure.test.cpp
new file mode 100644
index 000000000..1dae849a3
--- /dev/null
+++ b/compiler/nest/core/src/Closure.test.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/Closure.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+struct DummyNode final : public nest::expr::Node
+{
+};
+}
+
+TEST(Closure, ctor)
+{
+ nest::DomainID dom_id{0};
+ nest::Closure closure{dom_id, std::make_shared<DummyNode>()};
+
+ ASSERT_EQ(closure.id().value(), 0);
+ ASSERT_EQ(closure.sub().rank(), 1);
+}
+
+TEST(Closure, cast)
+{
+ nest::DomainID dom_id{0};
+ nest::Closure closure{dom_id, std::make_shared<DummyNode>()};
+
+ auto check = [](const nest::Expr &e) { ASSERT_NE(e.get(), nullptr); };
+
+ check(closure);
+}
diff --git a/compiler/nest/core/src/Domain.test.cpp b/compiler/nest/core/src/Domain.test.cpp
new file mode 100644
index 000000000..5f973ecf7
--- /dev/null
+++ b/compiler/nest/core/src/Domain.test.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/Domain.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+namespace expr
+{
+struct DummyNode final : public nest::expr::Node
+{
+};
+} // namespace expr
+} // namespace
+
+// NOTE Build failed when DOMAIN is used instead of _DOMAIN
+TEST(_DOMAIN, base_usecase)
+{
+ nest::DomainID dom_id{0};
+ nest::Domain dom{dom_id};
+
+ nest::Closure clo = dom(std::make_shared<::expr::DummyNode>());
+
+ ASSERT_EQ(clo.id(), dom_id);
+ ASSERT_EQ(clo.sub().rank(), 1);
+}
diff --git a/compiler/nest/core/src/DomainContext.cpp b/compiler/nest/core/src/DomainContext.cpp
new file mode 100644
index 000000000..4a7f223d4
--- /dev/null
+++ b/compiler/nest/core/src/DomainContext.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/DomainContext.h"
+
+namespace nest
+{
+
+uint32_t DomainContext::count(void) const { return _info.size(); }
+
+Domain DomainContext::make(std::initializer_list<uint32_t> dims)
+{
+ const DomainID domain_id{count()};
+
+ _info.emplace_back(dims);
+
+ return Domain{domain_id};
+}
+
+const DomainInfo &DomainContext::info(const Domain &dom) const
+{
+ return _info.at(dom.id().value());
+}
+
+} // namespace nest
diff --git a/compiler/nest/core/src/DomainContext.test.cpp b/compiler/nest/core/src/DomainContext.test.cpp
new file mode 100644
index 000000000..10882df70
--- /dev/null
+++ b/compiler/nest/core/src/DomainContext.test.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/DomainContext.h"
+
+#include <gtest/gtest.h>
+
+TEST(DOMAIN_CONTEXT, usecase)
+{
+ nest::DomainContext ctx;
+
+ auto dom_0 = ctx.make({1, 3, 4});
+
+ ASSERT_EQ(ctx.count(), 1);
+
+ auto check_dom_0 = [&](void) {
+ ASSERT_EQ(ctx.info(dom_0).rank(), 3);
+ ASSERT_EQ(ctx.info(dom_0).dim(0), 1);
+ ASSERT_EQ(ctx.info(dom_0).dim(1), 3);
+ ASSERT_EQ(ctx.info(dom_0).dim(2), 4);
+ };
+
+ check_dom_0();
+
+ auto dom_1 = ctx.make({7, 6, 2, 1});
+
+ ASSERT_EQ(ctx.count(), 2);
+
+ // Domain ID should be unique for each domain
+ ASSERT_FALSE(dom_0.id() == dom_1.id());
+
+ auto check_dom_1 = [&](void) {
+ ASSERT_EQ(ctx.info(dom_1).rank(), 4);
+ ASSERT_EQ(ctx.info(dom_1).dim(0), 7);
+ ASSERT_EQ(ctx.info(dom_1).dim(1), 6);
+ ASSERT_EQ(ctx.info(dom_1).dim(2), 2);
+ ASSERT_EQ(ctx.info(dom_1).dim(3), 1);
+ };
+
+ // make() SHOULD NOT affect the existing domain information
+ check_dom_0();
+ check_dom_1();
+}
diff --git a/compiler/nest/core/src/DomainID.cpp b/compiler/nest/core/src/DomainID.cpp
new file mode 100644
index 000000000..714f07b67
--- /dev/null
+++ b/compiler/nest/core/src/DomainID.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/DomainID.h"
+
+namespace nest
+{
+
+DomainID::DomainID() : _value{0xffffffff}
+{
+ // DO NOTHING
+}
+
+bool operator==(const DomainID &lhs, const DomainID &rhs) { return lhs.value() == rhs.value(); }
+bool operator<(const DomainID &lhs, const DomainID &rhs) { return lhs.value() < rhs.value(); }
+
+} // namespace nest
diff --git a/compiler/nest/core/src/DomainID.test.cpp b/compiler/nest/core/src/DomainID.test.cpp
new file mode 100644
index 000000000..6b1ce8360
--- /dev/null
+++ b/compiler/nest/core/src/DomainID.test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/DomainID.h"
+
+#include <gtest/gtest.h>
+
+TEST(DOMAIN_ID, ctor)
+{
+ nest::DomainID id{0};
+
+ ASSERT_EQ(id.value(), 0);
+}
+
+TEST(DOMAIN_ID, operator_eq)
+{
+ ASSERT_TRUE(nest::DomainID(0) == nest::DomainID(0));
+ ASSERT_FALSE(nest::DomainID(0) == nest::DomainID(1));
+}
+
+TEST(DOMAIN_ID, operator_lt)
+{
+ ASSERT_TRUE(nest::DomainID(0) < nest::DomainID(1));
+ ASSERT_FALSE(nest::DomainID(1) < nest::DomainID(0));
+}
diff --git a/compiler/nest/core/src/DomainInfo.test.cpp b/compiler/nest/core/src/DomainInfo.test.cpp
new file mode 100644
index 000000000..7a5d81144
--- /dev/null
+++ b/compiler/nest/core/src/DomainInfo.test.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/DomainInfo.h"
+
+#include <gtest/gtest.h>
+
+TEST(DOMAIN_INFO, ctor)
+{
+ nest::DomainInfo info{1, 2, 3, 4};
+
+ ASSERT_EQ(info.rank(), 4);
+ ASSERT_EQ(info.dim(0), 1);
+ ASSERT_EQ(info.dim(1), 2);
+ ASSERT_EQ(info.dim(2), 3);
+ ASSERT_EQ(info.dim(3), 4);
+}
diff --git a/compiler/nest/core/src/Expr.cpp b/compiler/nest/core/src/Expr.cpp
new file mode 100644
index 000000000..6d35f4c5a
--- /dev/null
+++ b/compiler/nest/core/src/Expr.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/Expr.h"
+
+nest::Expr operator+(const nest::Expr &lhs, const nest::Expr &rhs)
+{
+ return std::make_shared<nest::expr::AddNode>(lhs, rhs);
+}
+
+nest::Expr operator*(const nest::Expr &lhs, const nest::Expr &rhs)
+{
+ return std::make_shared<nest::expr::MulNode>(lhs, rhs);
+}
diff --git a/compiler/nest/core/src/Expr.test.cpp b/compiler/nest/core/src/Expr.test.cpp
new file mode 100644
index 000000000..0c96f7714
--- /dev/null
+++ b/compiler/nest/core/src/Expr.test.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/Expr.h"
+
+#include <memory>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+struct DummyNode final : public nest::expr::Node
+{
+};
+}
+
+TEST(EXPR, operator_sum)
+{
+ auto left = std::make_shared<DummyNode>();
+ auto right = std::make_shared<DummyNode>();
+
+ auto expr = left + right;
+
+ ASSERT_NE(expr->asAdd(), nullptr);
+
+ auto add = expr->asAdd();
+
+ ASSERT_EQ(add->lhs().get(), left.get());
+ ASSERT_EQ(add->rhs().get(), right.get());
+}
+
+TEST(EXPR, operator_mul)
+{
+ auto left = std::make_shared<DummyNode>();
+ auto right = std::make_shared<DummyNode>();
+
+ auto expr = left * right;
+
+ ASSERT_NE(expr->asMul(), nullptr);
+
+ auto add = expr->asMul();
+
+ ASSERT_EQ(add->lhs().get(), left.get());
+ ASSERT_EQ(add->rhs().get(), right.get());
+}
diff --git a/compiler/nest/core/src/FV.cpp b/compiler/nest/core/src/FV.cpp
new file mode 100644
index 000000000..2b51d178c
--- /dev/null
+++ b/compiler/nest/core/src/FV.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/FV.h"
+
+namespace
+{
+
+using namespace nest;
+using namespace nest::expr;
+
+class Collector final : public Visitor<void>
+{
+public:
+ Collector(std::set<VarID> &out) : _out(out)
+ {
+ // DO NOTHING
+ }
+
+public:
+ void visit(const VarNode *e) override { _out.insert(e->id()); }
+
+ void visit(const DerefNode *e) override
+ {
+ for (uint32_t n = 0; n < e->sub().rank(); ++n)
+ {
+ e->sub().at(n)->accept(this);
+ }
+ }
+
+ void visit(const AddNode *e) override
+ {
+ e->lhs()->accept(this);
+ e->rhs()->accept(this);
+ }
+
+ void visit(const MulNode *e) override
+ {
+ e->lhs()->accept(this);
+ e->rhs()->accept(this);
+ }
+
+private:
+ std::set<nest::VarID> &_out;
+};
+
+} // namespace
+
+namespace nest
+{
+
+std::set<VarID> FV::in(const Expr &expr)
+{
+ std::set<VarID> res;
+
+ Collector collector{res};
+ expr->accept(collector);
+
+ return res;
+}
+
+} // namespace nest
diff --git a/compiler/nest/core/src/FV.test.cpp b/compiler/nest/core/src/FV.test.cpp
new file mode 100644
index 000000000..55f5f5877
--- /dev/null
+++ b/compiler/nest/core/src/FV.test.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/FV.h"
+#include "nest/Module.h"
+
+#include <gtest/gtest.h>
+
+TEST(FV, var_expr)
+{
+ nest::Module m;
+
+ auto var = m.var().make();
+
+ auto fvs = nest::FV::in(var);
+
+ ASSERT_EQ(fvs.size(), 1);
+ ASSERT_NE(fvs.find(var.id()), fvs.end());
+}
+
+TEST(FV, deref_expr)
+{
+ nest::Module m;
+
+ auto dom = m.domain().make({16});
+ auto var = m.var().make();
+
+ auto fvs = nest::FV::in(dom(var));
+
+ ASSERT_EQ(fvs.size(), 1);
+ ASSERT_NE(fvs.find(var.id()), fvs.end());
+}
+
+TEST(FV, add_expr)
+{
+ nest::Module m;
+
+ auto v_0 = m.var().make();
+ auto v_1 = m.var().make();
+
+ auto fvs = nest::FV::in(v_0 + v_1);
+
+ ASSERT_EQ(fvs.size(), 2);
+ ASSERT_NE(fvs.find(v_0.id()), fvs.end());
+ ASSERT_NE(fvs.find(v_1.id()), fvs.end());
+}
+
+TEST(FV, mul_expr)
+{
+ nest::Module m;
+
+ auto v_0 = m.var().make();
+ auto v_1 = m.var().make();
+
+ nest::FV fv;
+
+ auto fvs = nest::FV::in(v_0 * v_1);
+
+ ASSERT_EQ(fvs.size(), 2);
+ ASSERT_NE(fvs.find(v_0.id()), fvs.end());
+ ASSERT_NE(fvs.find(v_1.id()), fvs.end());
+}
diff --git a/compiler/nest/core/src/Level.cpp b/compiler/nest/core/src/Level.cpp
new file mode 100644
index 000000000..1adf4351d
--- /dev/null
+++ b/compiler/nest/core/src/Level.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/Level.h"
+
+#include <cassert>
+
+namespace
+{
+const uint32_t invalid_tag = 0xffffffff;
+} // namespace
+
+namespace nest
+{
+Level::Level() : _value{invalid_tag}
+{
+ // DO NOTHING
+}
+
+Level::Level(uint32_t value) : _value{value} { assert(value != invalid_tag); }
+
+bool Level::valid(void) const { return _value != invalid_tag; }
+
+bool operator==(const Level &lhs, const Level &rhs)
+{
+ assert(lhs.valid() && rhs.valid());
+ return lhs.value() == rhs.value();
+}
+
+bool operator<(const Level &lhs, const Level &rhs)
+{
+ assert(lhs.valid() && rhs.valid());
+ return lhs.value() < rhs.value();
+}
+} // namespace nest
diff --git a/compiler/nest/core/src/Level.test.cpp b/compiler/nest/core/src/Level.test.cpp
new file mode 100644
index 000000000..b9e203d9d
--- /dev/null
+++ b/compiler/nest/core/src/Level.test.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/Level.h"
+
+#include <gtest/gtest.h>
+
+TEST(LEVEL, constructor)
+{
+ nest::Level lv{3};
+
+ ASSERT_EQ(lv.value(), 3);
+}
+
+TEST(LEVEL, operator_eq)
+{
+ ASSERT_TRUE(nest::Level(3) == nest::Level(3));
+ ASSERT_FALSE(nest::Level(3) == nest::Level(4));
+}
+
+TEST(LEVEL, operator_lt)
+{
+ ASSERT_FALSE(nest::Level(3) < nest::Level(3));
+ ASSERT_TRUE(nest::Level(3) < nest::Level(4));
+ ASSERT_FALSE(nest::Level(4) < nest::Level(3));
+}
diff --git a/compiler/nest/core/src/Module.cpp b/compiler/nest/core/src/Module.cpp
new file mode 100644
index 000000000..060f94e3e
--- /dev/null
+++ b/compiler/nest/core/src/Module.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/Module.h"
+
+#include <cassert>
+
+namespace nest
+{
+
+void Module::push(const Expr &expr)
+{
+ auto stmt = std::make_shared<stmt::PushNode>(expr);
+ _block.append(stmt);
+}
+
+void Module::ret(const Closure &clo)
+{
+ // Only one RET is allowed for each module
+ assert(_ret == nullptr);
+ _ret = std::make_shared<Ret>(clo.id(), clo.sub());
+}
+
+const Ret &Module::ret(void) const
+{
+ // Caller should NOT invoke this method before setting ret
+ assert(_ret != nullptr);
+ return *_ret;
+}
+
+} // namespace nest
diff --git a/compiler/nest/core/src/Module.test.cpp b/compiler/nest/core/src/Module.test.cpp
new file mode 100644
index 000000000..01e414d25
--- /dev/null
+++ b/compiler/nest/core/src/Module.test.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/Module.h"
+
+#include <gtest/gtest.h>
+
+TEST(MODULE, create_var)
+{
+ nest::Module m;
+
+ auto create = [](nest::Module &m) {
+ // This code will invoke 'VarContext &var(void)' method
+ return m.var().make();
+ };
+
+ auto check = [](const nest::Module &m) {
+ // This code will invoke 'const VarContext &var(void) const' method
+ ASSERT_EQ(m.var().count(), 1);
+ };
+
+ create(m);
+ check(m);
+}
+
+TEST(MODULE, create_domain)
+{
+ nest::Module m;
+
+ auto create = [](nest::Module &m, std::initializer_list<uint32_t> dims) {
+ // This code will invoke 'DomainContext &domain(void)' method
+ return m.domain().make(dims);
+ };
+
+ auto check = [](const nest::Module &m) {
+ // This code will invoke 'const DomainContext &domain(void) const' method
+ ASSERT_EQ(m.domain().count(), 1);
+ };
+
+ create(m, {1, 3, 3});
+ check(m);
+}
+
+TEST(MODULE, push)
+{
+ nest::Module m;
+
+ auto ifm = m.domain().make({1, 3, 3});
+
+ auto var_ch = m.var().make();
+ auto var_row = m.var().make();
+ auto var_col = m.var().make();
+
+ m.push(ifm(var_ch, var_row, var_col));
+
+ ASSERT_EQ(m.block().size(), 1);
+ ASSERT_NE(m.block().at(0)->asPush(), nullptr);
+}
+
+TEST(MODULE, ret)
+{
+ nest::Module m;
+
+ auto ifm = m.domain().make({1});
+ auto ofm = m.domain().make({1});
+
+ auto ind = m.var().make();
+
+ m.push(ifm(ind));
+ m.ret(ofm(ind));
+
+ ASSERT_EQ(m.ret().id(), ofm.id());
+ ASSERT_EQ(m.ret().sub().rank(), 1);
+}
+
+TEST(MODULE, copy)
+{
+ nest::Module orig;
+ nest::Module copy;
+
+ orig = copy;
+
+ orig.var().make();
+
+ ASSERT_EQ(copy.var().count(), 0);
+}
diff --git a/compiler/nest/core/src/Ret.test.cpp b/compiler/nest/core/src/Ret.test.cpp
new file mode 100644
index 000000000..703f04901
--- /dev/null
+++ b/compiler/nest/core/src/Ret.test.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/Ret.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+struct DummyNode final : public nest::expr::Node
+{
+};
+}
+
+TEST(RET, ctor)
+{
+ nest::DomainID dom_id{0};
+ nest::expr::Subscript sub{std::make_shared<DummyNode>()};
+
+ nest::Ret ret{dom_id, sub};
+
+ ASSERT_EQ(ret.id().value(), 0);
+ ASSERT_EQ(ret.sub().rank(), 1);
+}
+
+TEST(RET, copy)
+{
+ nest::DomainID src_id{0};
+ nest::expr::Subscript src_sub{std::make_shared<DummyNode>()};
+
+ const nest::Ret src{src_id, src_sub};
+
+ nest::DomainID dst_id{1};
+ nest::expr::Subscript dst_sub{std::make_shared<DummyNode>(), std::make_shared<DummyNode>()};
+
+ nest::Ret dst{dst_id, dst_sub};
+
+ ASSERT_EQ(dst.id().value(), 1);
+ ASSERT_EQ(dst.sub().rank(), 2);
+
+ dst = src;
+
+ ASSERT_EQ(dst.id().value(), 0);
+ ASSERT_EQ(dst.sub().rank(), 1);
+}
diff --git a/compiler/nest/core/src/Schedule.cpp b/compiler/nest/core/src/Schedule.cpp
new file mode 100644
index 000000000..81f9cd26a
--- /dev/null
+++ b/compiler/nest/core/src/Schedule.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/Schedule.h"
+
+#include <cassert>
+#include <stdexcept>
+
+namespace nest
+{
+
+Schedule::Schedule(const Module &module) : _module{module}
+{
+ // NOTE This implementation assumes that VarContext sequentially assigns VarID
+ for (uint32_t n = 0; n < _module.var().count(); ++n)
+ {
+ _level.emplace_back(VarID{n});
+ }
+
+ assert(_level.size() == _module.var().count());
+}
+
+Var Schedule::at(const Level &lv) const { return Var{_level.at(lv.value())}; }
+
+Level Schedule::level(const Var &var) const
+{
+ for (uint32_t lv = 0; lv < _level.size(); ++lv)
+ {
+ if (_level.at(lv) == var.id())
+ {
+ return Level{lv};
+ }
+ }
+
+ throw std::invalid_argument{"var"};
+}
+
+} // namespace nest
diff --git a/compiler/nest/core/src/Schedule.test.cpp b/compiler/nest/core/src/Schedule.test.cpp
new file mode 100644
index 000000000..8f0ddb23c
--- /dev/null
+++ b/compiler/nest/core/src/Schedule.test.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/Schedule.h"
+
+#include <gtest/gtest.h>
+
+TEST(SCHEDULE, module)
+{
+ nest::Module m;
+
+ auto var_1 = m.var().make();
+
+ nest::Schedule sch{m};
+
+ ASSERT_EQ(sch.level(var_1).value(), 0);
+}
+
+TEST(SCHEDULE, module_copy)
+{
+ nest::Module m;
+
+ m.var().make();
+
+ nest::Schedule sch{m};
+
+ // Update on 'm' does not affect the schedule
+ m.var().make();
+
+ ASSERT_EQ(sch.var().count(), 1);
+}
diff --git a/compiler/nest/core/src/Var.cpp b/compiler/nest/core/src/Var.cpp
new file mode 100644
index 000000000..93ce6e43b
--- /dev/null
+++ b/compiler/nest/core/src/Var.cpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/Var.h"
+
+namespace nest
+{
+
+Var::operator Expr(void) const { return std::make_shared<expr::VarNode>(_id); }
+
+} // namespace nest
diff --git a/compiler/nest/core/src/Var.test.cpp b/compiler/nest/core/src/Var.test.cpp
new file mode 100644
index 000000000..29f879558
--- /dev/null
+++ b/compiler/nest/core/src/Var.test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/Var.h"
+
+#include <gtest/gtest.h>
+
+TEST(VAR, ctor)
+{
+ nest::VarID id{0};
+ nest::Var var{id};
+
+ ASSERT_EQ(var.id(), id);
+}
+
+TEST(VAR, cast)
+{
+ nest::VarID id{0};
+ nest::Var var{id};
+
+ nest::Expr expr = var;
+
+ ASSERT_NE(expr->asVar(), nullptr);
+ ASSERT_EQ(expr->asVar()->id(), id);
+}
diff --git a/compiler/nest/core/src/VarContext.cpp b/compiler/nest/core/src/VarContext.cpp
new file mode 100644
index 000000000..24b1336a7
--- /dev/null
+++ b/compiler/nest/core/src/VarContext.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/VarContext.h"
+
+namespace nest
+{
+
+uint32_t VarContext::count(void) const { return _bound.size(); }
+
+Var VarContext::make(void)
+{
+ const VarID vid{count()};
+
+ _bound.emplace_back(0, 0);
+
+ return Var{vid};
+}
+
+Bound &VarContext::bound(const Var &var) { return _bound.at(var.id().value()); }
+const Bound &VarContext::bound(const Var &var) const { return _bound.at(var.id().value()); }
+
+} // namespace nest
diff --git a/compiler/nest/core/src/VarContext.test.cpp b/compiler/nest/core/src/VarContext.test.cpp
new file mode 100644
index 000000000..169bd6126
--- /dev/null
+++ b/compiler/nest/core/src/VarContext.test.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/VarContext.h"
+
+#include <gtest/gtest.h>
+
+TEST(VAR_CONTEXT, make)
+{
+ nest::VarContext ctx;
+
+ auto var_0 = ctx.make();
+ auto var_1 = ctx.make();
+
+ ASSERT_FALSE(var_0.id() == var_1.id());
+}
+
+TEST(VAR_CONTEXT, count)
+{
+ nest::VarContext ctx;
+
+ ASSERT_EQ(ctx.count(), 0);
+
+ auto var_0 = ctx.make();
+
+ ASSERT_EQ(ctx.count(), 1);
+
+ auto var_1 = ctx.make();
+
+ ASSERT_EQ(ctx.count(), 2);
+}
+
+TEST(VAR_CONTEXT, bound_one)
+{
+ nest::VarContext ctx;
+
+ auto var_0 = ctx.make();
+
+ ASSERT_EQ(ctx.bound(var_0).min(), 0);
+ ASSERT_EQ(ctx.bound(var_0).max(), 0);
+
+ ctx.bound(var_0) = nest::Bound{-3, 5};
+
+ ASSERT_EQ(ctx.bound(var_0).min(), -3);
+ ASSERT_EQ(ctx.bound(var_0).max(), 5);
+}
+
+TEST(VAR_CONTEXT, bound_independent)
+{
+ nest::VarContext ctx;
+
+ auto var_0 = ctx.make();
+
+ ASSERT_EQ(ctx.bound(var_0).min(), 0);
+ ASSERT_EQ(ctx.bound(var_0).max(), 0);
+
+ auto var_1 = ctx.make();
+
+ ASSERT_EQ(ctx.bound(var_1).min(), 0);
+ ASSERT_EQ(ctx.bound(var_1).max(), 0);
+
+ ctx.bound(var_0) = nest::Bound{-3, 5};
+
+ ASSERT_EQ(ctx.bound(var_0).min(), -3);
+ ASSERT_EQ(ctx.bound(var_0).max(), 5);
+
+ ASSERT_EQ(ctx.bound(var_1).min(), 0);
+ ASSERT_EQ(ctx.bound(var_1).max(), 0);
+}
diff --git a/compiler/nest/core/src/VarID.cpp b/compiler/nest/core/src/VarID.cpp
new file mode 100644
index 000000000..23906764d
--- /dev/null
+++ b/compiler/nest/core/src/VarID.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/VarID.h"
+
+namespace nest
+{
+
+VarID::VarID() : _value{0xffffffff}
+{
+ // DO NOTHING
+}
+
+bool operator==(const VarID &lhs, const VarID &rhs) { return lhs.value() == rhs.value(); }
+bool operator<(const VarID &lhs, const VarID &rhs) { return lhs.value() < rhs.value(); }
+
+} // namespace nest
diff --git a/compiler/nest/core/src/VarID.test.cpp b/compiler/nest/core/src/VarID.test.cpp
new file mode 100644
index 000000000..e4a17a5c1
--- /dev/null
+++ b/compiler/nest/core/src/VarID.test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/VarID.h"
+
+#include <gtest/gtest.h>
+
+TEST(VAR_ID, ctor)
+{
+ nest::VarID id{0};
+
+ ASSERT_EQ(id.value(), 0);
+}
+
+TEST(VAR_ID, operator_eq)
+{
+ ASSERT_TRUE(nest::VarID(0) == nest::VarID(0));
+ ASSERT_FALSE(nest::VarID(0) == nest::VarID(1));
+}
+
+TEST(VAR_ID, operator_lt)
+{
+ ASSERT_TRUE(nest::VarID(0) < nest::VarID(1));
+ ASSERT_FALSE(nest::VarID(1) < nest::VarID(0));
+}
diff --git a/compiler/nest/core/src/expr/AddNode.test.cpp b/compiler/nest/core/src/expr/AddNode.test.cpp
new file mode 100644
index 000000000..5c44c4743
--- /dev/null
+++ b/compiler/nest/core/src/expr/AddNode.test.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/expr/AddNode.h"
+
+#include <memory>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+struct DummyNode final : public nest::expr::Node
+{
+};
+}
+
+TEST(ADD_NODE, cast)
+{
+ auto left = std::make_shared<DummyNode>();
+ auto right = std::make_shared<DummyNode>();
+
+ auto derived = std::make_shared<nest::expr::AddNode>(left, right);
+ std::shared_ptr<nest::expr::Node> base = derived;
+
+ ASSERT_NE(derived.get(), nullptr);
+ ASSERT_EQ(base->asAdd(), derived.get());
+
+ ASSERT_EQ(derived->lhs().get(), left.get());
+ ASSERT_EQ(derived->rhs().get(), right.get());
+}
diff --git a/compiler/nest/core/src/expr/DerefNode.test.cpp b/compiler/nest/core/src/expr/DerefNode.test.cpp
new file mode 100644
index 000000000..e02a7de0b
--- /dev/null
+++ b/compiler/nest/core/src/expr/DerefNode.test.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/expr/DerefNode.h"
+
+#include <memory>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+struct DummyNode final : public nest::expr::Node
+{
+};
+}
+
+TEST(DEREF_NODE, cast)
+{
+ const nest::DomainID dom_id{0};
+
+ auto derived = std::make_shared<nest::expr::DerefNode>(dom_id, std::make_shared<DummyNode>());
+ std::shared_ptr<nest::expr::Node> base = derived;
+
+ ASSERT_NE(derived.get(), nullptr);
+ ASSERT_EQ(base->asDeref(), derived.get());
+}
diff --git a/compiler/nest/core/src/expr/Macro.cpp b/compiler/nest/core/src/expr/Macro.cpp
new file mode 100644
index 000000000..4e7a13e08
--- /dev/null
+++ b/compiler/nest/core/src/expr/Macro.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/expr/Macro.h"
+
+// This file checkes the self-completeness of 'nest/expr/Macro.h'.
+//
+// NOTE Please do NOT remove this file.
diff --git a/compiler/nest/core/src/expr/MulNode.test.cpp b/compiler/nest/core/src/expr/MulNode.test.cpp
new file mode 100644
index 000000000..b2d29471c
--- /dev/null
+++ b/compiler/nest/core/src/expr/MulNode.test.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/expr/MulNode.h"
+
+#include <memory>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+struct DummyNode final : public nest::expr::Node
+{
+};
+}
+
+TEST(MUL_NODE, cast)
+{
+ auto left = std::make_shared<DummyNode>();
+ auto right = std::make_shared<DummyNode>();
+
+ auto derived = std::make_shared<nest::expr::MulNode>(left, right);
+ std::shared_ptr<nest::expr::Node> base = derived;
+
+ ASSERT_NE(derived.get(), nullptr);
+ ASSERT_EQ(base->asMul(), derived.get());
+
+ ASSERT_EQ(derived->lhs().get(), left.get());
+ ASSERT_EQ(derived->rhs().get(), right.get());
+}
diff --git a/compiler/nest/core/src/expr/Node.cpp b/compiler/nest/core/src/expr/Node.cpp
new file mode 100644
index 000000000..0c162428c
--- /dev/null
+++ b/compiler/nest/core/src/expr/Node.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/expr/Node.h"
+
+// This file checkes the self-completeness of 'nest/expr/Node.h'.
+//
+// NOTE Please do NOT remove this file.
diff --git a/compiler/nest/core/src/expr/Subscript.test.cpp b/compiler/nest/core/src/expr/Subscript.test.cpp
new file mode 100644
index 000000000..2f187b86c
--- /dev/null
+++ b/compiler/nest/core/src/expr/Subscript.test.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/expr/Subscript.h"
+#include "nest/expr/VarNode.h"
+
+#include <memory>
+
+#include <gtest/gtest.h>
+
+TEST(SUBSCRIPT, ctor)
+{
+ nest::VarID id_0{0};
+ nest::VarID id_1{1};
+
+ auto expr_0 = std::make_shared<nest::expr::VarNode>(id_0);
+ auto expr_1 = std::make_shared<nest::expr::VarNode>(id_1);
+
+ nest::expr::Subscript sub{expr_0, expr_1};
+
+ ASSERT_EQ(sub.rank(), 2);
+ ASSERT_EQ(sub.at(0), expr_0);
+ ASSERT_EQ(sub.at(1), expr_1);
+}
diff --git a/compiler/nest/core/src/expr/VarNode.test.cpp b/compiler/nest/core/src/expr/VarNode.test.cpp
new file mode 100644
index 000000000..e8b2764e4
--- /dev/null
+++ b/compiler/nest/core/src/expr/VarNode.test.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/expr/VarNode.h"
+
+#include <memory>
+
+#include <gtest/gtest.h>
+
+TEST(VAR_NODE, ctor)
+{
+ auto make = [](uint32_t n) {
+ const nest::VarID id{n};
+
+ return std::make_shared<nest::expr::VarNode>(id);
+ };
+
+ auto node = make(4);
+
+ // NOTE 'id' should be copied
+ ASSERT_EQ(node->id().value(), 4);
+}
+
+TEST(VAR_NODE, cast)
+{
+ const nest::VarID id{0};
+
+ auto derived = std::make_shared<nest::expr::VarNode>(id);
+ std::shared_ptr<nest::expr::Node> base = derived;
+
+ // NOTE Cast method should be overrided
+ ASSERT_NE(derived.get(), nullptr);
+ ASSERT_EQ(base->asVar(), derived.get());
+}
diff --git a/compiler/nest/core/src/expr/Visitor.cpp b/compiler/nest/core/src/expr/Visitor.cpp
new file mode 100644
index 000000000..531e68951
--- /dev/null
+++ b/compiler/nest/core/src/expr/Visitor.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/expr/Visitor.h"
+
+// This file checkes the self-completeness of 'nest/expr/Visitor.h'.
+//
+// NOTE Please do NOT remove this file.
diff --git a/compiler/nest/core/src/stmt/Macro.cpp b/compiler/nest/core/src/stmt/Macro.cpp
new file mode 100644
index 000000000..99adb436c
--- /dev/null
+++ b/compiler/nest/core/src/stmt/Macro.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/stmt/Macro.h"
+
+// This file checkes the self-completeness of 'nest/stmt/Macro.h'.
+//
+// NOTE Please do NOT remove this file.
diff --git a/compiler/nest/core/src/stmt/Node.cpp b/compiler/nest/core/src/stmt/Node.cpp
new file mode 100644
index 000000000..6265778d9
--- /dev/null
+++ b/compiler/nest/core/src/stmt/Node.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/stmt/Node.h"
+
+// This file checkes the self-completeness of 'nest/stmt/Node.h'.
+//
+// NOTE Please do NOT remove this file.
diff --git a/compiler/nest/core/src/stmt/PushNode.test.cpp b/compiler/nest/core/src/stmt/PushNode.test.cpp
new file mode 100644
index 000000000..a54efbb54
--- /dev/null
+++ b/compiler/nest/core/src/stmt/PushNode.test.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/stmt/PushNode.h"
+
+#include <memory>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+struct DummyExprNode final : public nest::expr::Node
+{
+};
+}
+
+TEST(STMT_PUSH_NODE, cast)
+{
+ auto derived = std::make_shared<nest::stmt::PushNode>(std::make_shared<DummyExprNode>());
+ std::shared_ptr<nest::stmt::Node> base = derived;
+
+ ASSERT_NE(derived.get(), nullptr);
+ ASSERT_EQ(base->asPush(), derived.get());
+}
diff --git a/compiler/nest/core/src/stmt/Visitor.cpp b/compiler/nest/core/src/stmt/Visitor.cpp
new file mode 100644
index 000000000..621379bca
--- /dev/null
+++ b/compiler/nest/core/src/stmt/Visitor.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nest/stmt/Visitor.h"
+
+// This file checkes the self-completeness of 'nest/stmt/Visitor.h'.
+//
+// NOTE Please do NOT remove this file.
diff --git a/compiler/nike/CMakeLists.txt b/compiler/nike/CMakeLists.txt
new file mode 100644
index 000000000..737c73b8f
--- /dev/null
+++ b/compiler/nike/CMakeLists.txt
@@ -0,0 +1,15 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(nike STATIC ${SOURCES})
+target_include_directories(nike PUBLIC include)
+
+nnas_find_package(GTest QUIET)
+
+if(NOT GTest_FOUND)
+ return()
+endif(NOT GTest_FOUND)
+
+GTest_AddTest(nike_test ${TESTS})
+target_link_libraries(nike_test nike)
diff --git a/compiler/nike/README.md b/compiler/nike/README.md
new file mode 100644
index 000000000..b61fbe614
--- /dev/null
+++ b/compiler/nike/README.md
@@ -0,0 +1,4 @@
+# nike
+
+_nike_ is a collection of **numeric** value comparison routines.
+- _nike_ is a combination of two words: _numeric_ and _dike_. FYI, _dike_ is the goddess of justice in ancient Greek culture.
diff --git a/compiler/nike/include/nike/AbsoluteEpsilonEqual.h b/compiler/nike/include/nike/AbsoluteEpsilonEqual.h
new file mode 100644
index 000000000..7125b5764
--- /dev/null
+++ b/compiler/nike/include/nike/AbsoluteEpsilonEqual.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NIKE_ABSOLUTE_EPSILON_EQUAL_H__
+#define __NIKE_ABSOLUTE_EPSILON_EQUAL_H__
+
+namespace nike
+{
+
+class AbsoluteEpsilonEqualFunctor
+{
+public:
+ friend AbsoluteEpsilonEqualFunctor absolute_epsilon_equal(float);
+
+private:
+ AbsoluteEpsilonEqualFunctor(float tolerance) : _tolerance{tolerance}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool operator()(float lhs, float rhs) const;
+
+private:
+ float _tolerance;
+};
+
+/**
+ * @note AbsoluteEpsilonEqualFunctor uses its own rule for NaN values.
+ *
+ * For example, "NAN == NAN" is false but "absolute_epsilon_equal(0.001f)(NAN, NAN)" is true.
+ */
+AbsoluteEpsilonEqualFunctor absolute_epsilon_equal(float tolerance);
+
+} // namespace nike
+
+#endif // __NIKE_ABSOLUTE_EPSILON_EQUAL_H__
diff --git a/compiler/nike/include/nike/RelativeEpsilonEqual.h b/compiler/nike/include/nike/RelativeEpsilonEqual.h
new file mode 100644
index 000000000..1b4c04a12
--- /dev/null
+++ b/compiler/nike/include/nike/RelativeEpsilonEqual.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NIKE_RELATIVE_EPSILON_EQUAL_H__
+#define __NIKE_RELATIVE_EPSILON_EQUAL_H__
+
+namespace nike
+{
+
+class RelativeEpsilonEqualFunctor
+{
+public:
+ friend RelativeEpsilonEqualFunctor relative_epsilon_equal(unsigned);
+
+private:
+ RelativeEpsilonEqualFunctor(unsigned tolerance) : _tolerance{tolerance}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool operator()(float lhs, float rhs) const;
+
+private:
+ unsigned _tolerance;
+};
+
+/**
+ * @note RelativeEpsilonEqualFunctor uses its own rule for NaN values.
+ *
+ * For example, "NAN == NAN" is false but "relative_epsilon_equal(1)(NAN, NAN)" is true.
+ */
+RelativeEpsilonEqualFunctor relative_epsilon_equal(unsigned tolerance);
+
+} // namespace nike
+
+#endif // __NIKE_RELATIVE_EPSILON_EQUAL_H__
diff --git a/compiler/nike/src/AbsoluteEpsilonEqual.cpp b/compiler/nike/src/AbsoluteEpsilonEqual.cpp
new file mode 100644
index 000000000..5877ee84f
--- /dev/null
+++ b/compiler/nike/src/AbsoluteEpsilonEqual.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nike/AbsoluteEpsilonEqual.h"
+
+#include <cmath>
+#include <cfloat>
+
+namespace nike
+{
+
+bool AbsoluteEpsilonEqualFunctor::operator()(float lhs, float rhs) const
+{
+ if (std::isnan(lhs) && std::isnan(rhs))
+ {
+ return true;
+ }
+
+ const auto diff = std::fabs(lhs - rhs);
+
+ return diff <= _tolerance;
+}
+
+AbsoluteEpsilonEqualFunctor absolute_epsilon_equal(float tolerance)
+{
+ return AbsoluteEpsilonEqualFunctor{tolerance};
+}
+
+} // namespace nike
diff --git a/compiler/nike/src/AbsoluteEpsilonEqual.test.cpp b/compiler/nike/src/AbsoluteEpsilonEqual.test.cpp
new file mode 100644
index 000000000..8475375a4
--- /dev/null
+++ b/compiler/nike/src/AbsoluteEpsilonEqual.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nike/AbsoluteEpsilonEqual.h"
+
+#include <cmath> // For "NAN"
+
+#include <gtest/gtest.h>
+
+TEST(AbsoluteEpsilonEqualTest, tolerance)
+{
+ auto equal = nike::absolute_epsilon_equal(0.001f);
+
+ ASSERT_TRUE(equal(NAN, NAN));
+ ASSERT_TRUE(equal(1.0f, 1.0f));
+ ASSERT_FALSE(equal(1.0f, 2.0f));
+ ASSERT_TRUE(equal(1.0f, 1.0f + 0.0001f));
+}
diff --git a/compiler/nike/src/RelativeEpsilonEqual.cpp b/compiler/nike/src/RelativeEpsilonEqual.cpp
new file mode 100644
index 000000000..ac8d2ad82
--- /dev/null
+++ b/compiler/nike/src/RelativeEpsilonEqual.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nike/RelativeEpsilonEqual.h"
+
+#include <cmath>
+#include <cfloat>
+
+#include <algorithm>
+
+namespace nike
+{
+
+bool RelativeEpsilonEqualFunctor::operator()(float lhs, float rhs) const
+{
+ if (std::isnan(lhs) && std::isnan(rhs))
+ {
+ return true;
+ }
+
+ // TODO How to handle sign difference?
+ auto const delta = std::fabs(lhs - rhs);
+ auto const max = std::max(std::fabs(lhs), std::fabs(rhs));
+
+ return delta <= (max * FLT_EPSILON * _tolerance);
+}
+
+RelativeEpsilonEqualFunctor relative_epsilon_equal(unsigned tolerance)
+{
+ return RelativeEpsilonEqualFunctor{tolerance};
+}
+
+} // namespace nike
diff --git a/compiler/nike/src/RelativeEpsilonEqual.test.cpp b/compiler/nike/src/RelativeEpsilonEqual.test.cpp
new file mode 100644
index 000000000..ec0805ed5
--- /dev/null
+++ b/compiler/nike/src/RelativeEpsilonEqual.test.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nike/RelativeEpsilonEqual.h"
+
+#include <cmath> // For "NAN"
+#include <cfloat> // For "FLT_EPSILON"
+
+#include <gtest/gtest.h>
+
+TEST(RelativeEpsilonEqualTest, tolerance_1)
+{
+ auto equal = nike::relative_epsilon_equal(1);
+
+ ASSERT_TRUE(equal(NAN, NAN));
+ ASSERT_TRUE(equal(1.0f, 1.0f));
+ ASSERT_FALSE(equal(1.0f, 2.0f));
+}
+
+TEST(RelativeEpsilonEqualTest, tolerance_2)
+{
+ auto equal = nike::relative_epsilon_equal(2);
+
+ ASSERT_TRUE(equal(1.0f, 1.0f + FLT_EPSILON));
+ ASSERT_FALSE(equal(1.0f, 1.0f + 3 * FLT_EPSILON));
+}
diff --git a/compiler/nnc/CMakeLists.txt b/compiler/nnc/CMakeLists.txt
new file mode 100644
index 000000000..f899ffb95
--- /dev/null
+++ b/compiler/nnc/CMakeLists.txt
@@ -0,0 +1,42 @@
+set(NNC_ROOT_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+set(NNC_ROOT_BIN_DIR ${CMAKE_CURRENT_BINARY_DIR})
+
+list(APPEND CMAKE_MODULE_PATH ${NNC_ROOT_SRC_DIR}/cmake)
+
+include(config)
+include(utils)
+
+# configure file that contains extern definitions
+configure_file(${NNC_ROOT_SRC_DIR}/include/Definitions.h.in
+ ${NNC_ROOT_BIN_DIR}/include/Definitions.h)
+
+# add interface header files
+
+# target for compiler executable
+add_executable(${NNC_TARGET_EXECUTABLE} ${NNC_DRIVER_DIR}/main.cpp ${NNC_DRIVER_DIR}/Driver.cpp ${NNC_DRIVER_DIR}/Options.cpp)
+target_link_libraries(${NNC_TARGET_EXECUTABLE} PRIVATE stdex)
+
+# install compiler
+nnc_install_executable(${NNC_TARGET_EXECUTABLE})
+if(NNC_FRONTEND_CAFFE2_ENABLED)
+ target_link_libraries(${NNC_TARGET_EXECUTABLE} PRIVATE mir_caffe2_importer)
+endif()
+if(NNC_FRONTEND_CAFFE_ENABLED)
+ target_link_libraries(${NNC_TARGET_EXECUTABLE} PRIVATE mir_caffe_importer)
+endif()
+if(NNC_FRONTEND_TFLITE_ENABLED)
+ target_link_libraries(${NNC_TARGET_EXECUTABLE} PRIVATE mir_tflite_importer)
+endif()
+if(NNC_FRONTEND_ONNX_ENABLED)
+ target_link_libraries(${NNC_TARGET_EXECUTABLE} PRIVATE mir_onnx_importer)
+endif()
+
+add_subdirectory(support)
+add_subdirectory(pass)
+add_subdirectory(passes)
+add_subdirectory(backends)
+if(NNC_ENABLE_UNITTEST)
+ add_subdirectory(unittests)
+endif()
+add_subdirectory(tests)
+add_subdirectory(utils)
diff --git a/compiler/nnc/README.md b/compiler/nnc/README.md
new file mode 100644
index 000000000..538811f2d
--- /dev/null
+++ b/compiler/nnc/README.md
@@ -0,0 +1,58 @@
+# nnc
+Neural Network Compiler
+
+### DESCRIPTION
+
+nnc is a neural network compiler that transforms neural networks of various formats into source or machine code.
+> At this moment only two NN are supported (MobileNet and InceptionV3) in Tensorflow Lite or Caffe format.
+
+### SYNOPSIS
+
+nnc OPTIONS
+
+### OPTIONS
+
+ --help, -h - print usage and exit
+ --caffe - treat input file as Caffe model
+ --tflite - treat input file as Tensor Flow Lite model
+ --target - select target language to emit for given architecture.
+ Valid values are 'x86-c++', 'interpreter'
+ --nnmodel, -m - specify input file with NN model
+ --output, -o - specify name for output files
+ --output-dir, -d - specify directory for output files
+ --input-model-data - interpreter option: specify file with neural network input data.
+ This file contains array of floats in binary form
+ --input-node - interpreter option: set input node in Computational Graph
+ --output-node - interpreter option: set output node in Computational Graph
+
+
+
+### USAGE
+
+Assuming that user has already installed nnc as follows:
+```
+$ cmake <path_to_nnc_sources> -DCMAKE_INSTALL_PREFIX=<path_to_install>
+$ make all && make install
+```
+
+Also assuming that we have tflite model (for example inceptionv3.tflite)
+
+**1. Running nnc in interpreter mode:**
+```
+<path_to_install>/bin/nnc \
+--nnmodel inceptionv3.tflite \
+--target interpreter \
+--input-model-data data.file \
+--input-node input --output-node output
+```
+
+**2. Running to generate C/C++ source code:**
+
+```
+<path_to_install>/bin/nnc \
+--nnmodel inceptionv3.tflite \
+--target x86-c++ \
+--output inception \
+--output-dir output_dir
+```
+
diff --git a/compiler/nnc/backends/CMakeLists.txt b/compiler/nnc/backends/CMakeLists.txt
new file mode 100644
index 000000000..9c210c755
--- /dev/null
+++ b/compiler/nnc/backends/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_subdirectory(acl_soft_backend)
+add_subdirectory(interpreter)
+add_subdirectory(soft_backend)
diff --git a/compiler/nnc/backends/acl_soft_backend/AclArtifactUtilities.in b/compiler/nnc/backends/acl_soft_backend/AclArtifactUtilities.in
new file mode 100644
index 000000000..b6ce15059
--- /dev/null
+++ b/compiler/nnc/backends/acl_soft_backend/AclArtifactUtilities.in
@@ -0,0 +1,29 @@
+static void initializeTensor(arm_compute::CLTensor& tensor, const arm_compute::TensorShape& ts) {
+ arm_compute::TensorInfo ti(ts, arm_compute::Format::F32);
+ tensor.allocator()->init(ti);
+}
+
+static void fillTensor(arm_compute::CLTensor& tensor, float scalar) {
+ tensor.map();
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+ arm_compute::Iterator iter(&tensor, window);
+ arm_compute::execute_window_loop(window, [&scalar, &iter](const arm_compute::Coordinates&) {
+ memcpy(iter.ptr(), &scalar, sizeof(float));
+ }, iter);
+
+ tensor.unmap();
+}
+
+static void deserializeTensor(std::istream& par_in, arm_compute::CLTensor& tensor) {
+ tensor.map();
+
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+ arm_compute::Iterator iter(&tensor, window);
+ arm_compute::execute_window_loop(window, [&par_in, &iter](const arm_compute::Coordinates&) {
+ par_in.read(reinterpret_cast<char*>(iter.ptr()), sizeof(float));
+ }, iter);
+
+ tensor.unmap();
+}
diff --git a/compiler/nnc/backends/acl_soft_backend/AclCppGenerator.cpp b/compiler/nnc/backends/acl_soft_backend/AclCppGenerator.cpp
new file mode 100644
index 000000000..3a5b9ecaf
--- /dev/null
+++ b/compiler/nnc/backends/acl_soft_backend/AclCppGenerator.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backends/acl_soft_backend/AclCppGenerator.h"
+#include "AclCppOpGenerator.h"
+#include "backends/acl_soft_backend/AclCppException.h"
+
+#include <boost/filesystem.hpp>
+
+#include <fstream>
+#include <utility>
+
+namespace nnc
+{
+
+using namespace std;
+namespace fs = boost::filesystem;
+
+AclCppCodeGenerator::AclCppCodeGenerator(string output_dir, string artifact_name)
+ : _output_dir(std::move(output_dir)), _artifact_name(std::move(artifact_name))
+{
+}
+
+void AclCppCodeGenerator::run(mir::Graph *data)
+{
+ mir::Graph *g = data;
+ assert(g);
+
+ // Create a directory for generated artifact files.
+ fs::create_directory(_output_dir);
+
+ string base_path = _output_dir + "/" + _artifact_name;
+ string code_path = base_path + ".cpp";
+ string decl_path = base_path + ".h";
+ string par_path = base_path + ".par";
+
+ // Create the source and header files output streams.
+ ofstream code_out(code_path);
+
+ if (code_out.fail())
+ throw AclCppException("Can not open code output file: " + code_path);
+
+ ofstream decl_out(decl_path);
+
+ if (decl_out.fail())
+ throw AclCppException("Can not open declaration output file: " + decl_path);
+
+ ofstream par_out(par_path, ios_base::out | ios_base::binary);
+
+ if (par_out.fail())
+ throw AclCppException("Can not open parameter output file: " + par_path);
+
+ ArtifactGeneratorCppCode code_gen(code_out);
+ ArtifactGeneratorCppDecl decl_gen(decl_out);
+
+ // Generate the artifact.
+ AclCppOpGenerator op_generator(_artifact_name, par_out);
+ auto dom = op_generator.generate(g);
+ dom.accept(&code_gen);
+ dom.accept(&decl_gen);
+}
+
+} // namespace nnc
diff --git a/compiler/nnc/backends/acl_soft_backend/AclCppOpGenerator.cpp b/compiler/nnc/backends/acl_soft_backend/AclCppOpGenerator.cpp
new file mode 100644
index 000000000..b5e3734ae
--- /dev/null
+++ b/compiler/nnc/backends/acl_soft_backend/AclCppOpGenerator.cpp
@@ -0,0 +1,1000 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AclCppOpGenerator.h"
+#include "backends/acl_soft_backend/AclCppException.h"
+#include "mir/ShapeRange.h"
+#include "mir/TensorUtil.h"
+#include "mir/Tensor.h"
+
+#include "mir/Operation.h"
+#include "mir/OpDefs.h"
+
+#include <algorithm>
+#include <map>
+
+namespace nnc
+{
+
+using namespace std;
+using namespace mir;
+
+AclCppOpGenerator::AclCppOpGenerator(const string &name, ostream &par_out)
+ : _parOut(par_out), _module(name), _constrBlock(nullptr), _infBlock(nullptr),
+ _clScheduler(AF::id("arm_compute::CLScheduler"))
+{
+}
+
+const ArtifactModule &AclCppOpGenerator::generate(mir::Graph *g)
+{
+ // Including headers.
+ _module.addHeaderSysInclude("fstream");
+ _module.addHeaderInclude("arm_compute/core/Types.h");
+ _module.addHeaderInclude("arm_compute/runtime/CL/CLFunctions.h");
+ _module.addHeaderInclude("arm_compute/runtime/CL/CLScheduler.h");
+ _module.addHeaderInclude("arm_compute/runtime/CL/CLBufferAllocator.h");
+ _module.addHeaderInclude("arm_compute/runtime/BlobLifetimeManager.h");
+ _module.addHeaderInclude("arm_compute/runtime/PoolManager.h");
+ _module.addHeaderInclude("arm_compute/runtime/MemoryManagerOnDemand.h");
+
+ // The general structure creation.
+ _artifactClass = _module.createClass(_module.name());
+ _constrBlock = _artifactClass->getConstrBlock();
+ _inferenceFunction = _artifactClass->func(true, "void", "Inference");
+ _infBlock = _inferenceFunction->getBlock();
+
+ // Input parameter stream preparation.
+ _parInVar = _artifactClass->var(false, "std::ifstream", "_parIn");
+ _parIn = _parInVar->use();
+ string par_file_name = _module.name() + ".par";
+ _constrBlock->call("open", {AF::lit("\"" + par_file_name + "\""),
+ AF::lit("std::ios_base::in | std::ios_base::binary")},
+ _parIn);
+ auto file_fail = _constrBlock->ifCond(AF::call("fail", {}, _parIn));
+ auto file_fail_block = file_fail->getBlock();
+ file_fail_block->addStatement(
+ AF::lit("throw std::string(\"Failed to open file: " + par_file_name + " for reading\")"));
+
+ // Traverse the computational graph.
+ g->accept(this);
+
+ // Generate all the deferred entities.
+ genNamed(g);
+ genPersistentTensorAllocations();
+ genDeserializations();
+ genFillings();
+
+ // Make sure all the OpenCL jobs are done executing:
+ _infBlock->call("sync", {}, AF::call("get", {}, _clScheduler, ArtifactCallType::scope));
+
+ return _module;
+}
+
+void AclCppOpGenerator::visit(ops::ConcatOp &op)
+{
+ const auto &ir_inputs = op.getInputs();
+ const auto *ir_output = op.getOutput(0);
+
+ static const char *axis_names[] = {
+ "arm_compute::DataLayoutDimension::BATCHES", "arm_compute::DataLayoutDimension::CHANNEL",
+ "arm_compute::DataLayoutDimension::HEIGHT", "arm_compute::DataLayoutDimension::WIDTH"};
+
+ int axis = op.getAxis();
+ assert(axis >= 0 && axis < static_cast<int>(sizeof(axis_names) / sizeof(axis_names[0])) &&
+ "axis outside this range is not supported in ACL");
+ const char *axis_name = axis_names[axis];
+
+ auto out = genTensor(ir_output);
+ auto prefix = out->name() + "_concatenate_layer";
+ auto inputs_var = _constrBlock->var("std::vector<arm_compute::ICLTensor*>", prefix + "_inputs");
+ auto inputs = inputs_var->use();
+
+ for (const Operation::Output *ir_input : ir_inputs)
+ _constrBlock->call("push_back", {AF::ref(AF::id(tensorName(ir_input)))}, inputs);
+
+ auto layer = genLayer("arm_compute::CLConcatenateLayer", prefix,
+ {inputs, AF::ref(out), AF::lit(axis_name)});
+
+ addToPersistentTensors(out);
+ genLayerExecution(layer);
+}
+
+void AclCppOpGenerator::visit(ops::Conv2DOp &op)
+{
+ assert(op.getNumGroups() == 1);
+ genConvolution(op, "arm_compute::CLConvolutionLayer", "_convolution_layer");
+}
+
+void AclCppOpGenerator::visit(ops::DepthwiseConv2DOp &op)
+{
+ genConvolution(op, "arm_compute::CLDepthwiseConvolutionLayer", "_depthwise_convolution_layer");
+}
+
+void AclCppOpGenerator::visit(ops::SoftmaxOp &op)
+{
+ assert(op.getNumInputs() == 1);
+ const auto *ir_input = op.getInput(0);
+ const auto *ir_output = op.getOutput(0);
+
+ auto in = AF::id(tensorName(ir_input));
+
+ int rank = ir_output->getShape().rank();
+ // CLPermute does not support all kinds of permutations now.
+ // rank can be more than 2 in our models, so we can not use CLTranspose.
+ // This means we can support tensors with no more then one axis > 1.
+ int axis = op.getAxis();
+ assert(axis == rank - 1);
+ int nof_long_axes = 0;
+
+ for (int i = 0; i < rank; ++i)
+ {
+ if (ir_output->getShape().dim(i) > 1)
+ ++nof_long_axes;
+ }
+
+ // TODO: Consider how to support Softmax on more general inputs.
+ if (nof_long_axes > 1)
+ throw AclCppException("Unsupported Softmax operation with several dimensions greater than 1");
+
+ // Create the output tensor.
+ shared_ptr<ArtifactId> output = genTensor(ir_output);
+ auto layer_name_prefix = output->name();
+
+ if (axis == 0)
+ {
+ // Simple version: do not need pre and post reshapes.
+ // Apply the softmax operation.
+ auto sm = genLayer("arm_compute::CLSoftmaxLayer", layer_name_prefix + "_softmax_layer",
+ {AF::ref(in), AF::ref(output)});
+ addToPersistentTensors(output);
+ genLayerExecution(sm);
+ }
+ else
+ {
+ // TODO refactor this code, it works only with 1 batch
+
+ // Need to reshape before the Softmax application and after it.
+ // Then we need two tensors for intermediate results. This is because we do a couple of
+ // auxiliary
+ // reshapes: one to transform the input tensor to a unidimensional tensor and the second to
+ // transorm the result of the softmax operation back to the original form.
+ Shape sm_shape(ir_output->getShape());
+
+ std::swap(sm_shape.dim(axis), sm_shape.dim(-1));
+
+ auto tmp = genTensor(layer_name_prefix + "_tmp", sm_shape);
+ auto tmp2 = genTensor(layer_name_prefix + "_tmp2", sm_shape);
+
+ // Do the input permutation.
+ auto transp1 = genLayer("arm_compute::CLReshapeLayer", layer_name_prefix + "_transp_layer1",
+ {AF::ref(in), AF::ref(tmp)});
+ addToPersistentTensors(tmp);
+ genLayerExecution(transp1);
+
+ // Apply the softmax operaion.
+ auto sm = genLayer("arm_compute::CLSoftmaxLayer", layer_name_prefix + "_softmax_layer",
+ {AF::ref(tmp), AF::ref(tmp2)});
+ addToPersistentTensors(tmp2);
+ genLayerExecution(sm);
+
+ // Reshape the output to the original form.
+ auto transp2 = genLayer("arm_compute::CLReshapeLayer", layer_name_prefix + "_transp_layer2",
+ {AF::ref(tmp2), AF::ref(output)});
+ addToPersistentTensors(output);
+ genLayerExecution(transp2);
+ }
+}
+
+template <typename Op>
+shared_ptr<ArtifactVariable> AclCppOpGenerator::genPadStrideInfo(const Op &op, const string &prefix,
+ ArtifactBlock *block)
+{
+ using AF = ArtifactFactory;
+
+ const Shape strides(op.getStrides());
+ assert(strides.rank() == 2);
+ auto &padding_before = op.getPaddingBefore();
+ auto &padding_after = op.getPaddingAfter();
+
+ string type_name = "arm_compute::PadStrideInfo";
+
+ string var_name = prefix + "_pad_stride_info";
+
+ list<std::shared_ptr<ArtifactExpr>> var_init_params = {
+ AF::lit(to_string(strides.dim(1))),
+ AF::lit(to_string(strides.dim(0))),
+ AF::lit(to_string(padding_before.at(1))),
+ AF::lit(to_string(padding_after.at(1))),
+ AF::lit(to_string(padding_before.at(0))),
+ AF::lit(to_string(padding_after.at(0))),
+ AF::lit("arm_compute::DimensionRoundingType::FLOOR")};
+
+ auto pad_stride_info_var = block->var(type_name, var_name, {}, var_init_params);
+
+ return pad_stride_info_var;
+}
+
+shared_ptr<ArtifactId> AclCppOpGenerator::genTransposeMIRtoACL(const string &name,
+ const Shape &input_shape,
+ const shared_ptr<ArtifactId> &input)
+{
+ Shape transposed_shape = transposeShape<0, 3, 1, 2>(input_shape);
+ shared_ptr<ArtifactId> transposed_id = genTensor(name, transposed_shape, false);
+ const bool allocate_at_inference = true;
+ genTranspose(input, transposed_id, {0, 3, 1, 2}, allocate_at_inference);
+ return transposed_id;
+}
+
+shared_ptr<ArtifactId> AclCppOpGenerator::genTransposeACLtoMIR(const string &name,
+ const Shape &input_shape,
+ const shared_ptr<ArtifactId> &input)
+{
+ Shape transposed_shape = transposeShape<0, 2, 3, 1>(input_shape);
+ shared_ptr<ArtifactId> transposed_id = genTensor(name, transposed_shape, false);
+ const bool allocate_at_inference = false;
+ genTranspose(input, transposed_id, {0, 2, 3, 1}, allocate_at_inference);
+ return transposed_id;
+}
+
+void AclCppOpGenerator::visit(ops::AvgPool2DOp &op)
+{
+ genPooling(op, "arm_compute::PoolingType::AVG", !op.getIncludePad());
+}
+
+void AclCppOpGenerator::visit(ops::MaxPool2DOp &op)
+{
+ // The value of 'exclude_padding' does not really matter for MAX pooling.
+ genPooling(op, "arm_compute::PoolingType::MAX", false);
+}
+
+void AclCppOpGenerator::visit(ops::FullyConnectedOp &op)
+{
+ assert(op.getNumInputs() == 2);
+ const auto *ir_input = op.getInput(0);
+ const auto *ir_weights = op.getInput(1);
+ const auto *ir_output = op.getOutput(0);
+
+ auto ir_weights_op = dynamic_cast<const mir::ops::ConstantOp *>(ir_weights->getNode());
+ if (ir_weights_op == nullptr)
+ throw AclCppException("Unsupported operation type");
+
+ const TensorVariant ir_weights_tensor = transposeTensor<1, 0>(ir_weights_op->getValue());
+ const Shape &ir_weights_shape = ir_weights_tensor.getShape();
+
+ // Get the input node tensor id in the DOM.
+ auto in = AF::id(tensorName(ir_input));
+
+ // Create the output tensor in the DOM.
+ if (ir_output->getShape().rank() != 2)
+ throw AclCppException("Unsupported number of dimensions in fc layer");
+ auto out = genTensor(ir_output);
+ string operation_name = out->name() + "_fully_connected_layer";
+
+ // Create the weights tensor in the DOM and use its id.
+ auto weights = genTensor(operation_name + "_weights", ir_weights_shape);
+
+ // Instantiate the CLFullyConnectedLayer object.
+ auto layer = genLayer("arm_compute::CLFullyConnectedLayer", operation_name,
+ {AF::ref(in), AF::ref(weights), AF::lit("nullptr"), AF::ref(out)});
+
+ addToPersistentTensors(weights);
+ // Serialize the weights tensor and generate the function to deserialize it in the artifact.
+ serializeTensor(weights, ir_weights_tensor);
+ addToPersistentTensors(out);
+ genLayerExecution(layer);
+}
+
+void AclCppOpGenerator::visit(ops::CappedReluOp &op)
+{
+ genActivation(op, "LU_BOUNDED_RELU", op.getCap());
+}
+
+void AclCppOpGenerator::visit(ops::InputOp &op)
+{
+ shared_ptr<ArtifactId> tensor;
+ tensor = genTensor(op.getOutput(0));
+ addToPersistentTensors(tensor);
+}
+
+// FIXME: temporary solution
+static bool shouldSerializeConstant(const ops::ConstantOp &op)
+{
+ // Operations from 'self_serializing_ops_to_inputs' serializing tensors with appropriate index
+ // themselves,
+ // so we don't serialize them here, also we don't serialize tensors from dangling ConstantOp
+ static std::map<Operation::Type, std::size_t> self_serializing_ops_to_inputs{
+ {Operation::Type::conv2D, 1}, {Operation::Type::fullyConnected, 1}};
+
+ for (Operation::Use use : op.getOutput(0)->getUses())
+ {
+ auto self_serializing_op_it = self_serializing_ops_to_inputs.find(use.getNode()->getType());
+ // Serialize if next_node type not from 'self_serializing_ops_to_inputs'
+ if (self_serializing_op_it == self_serializing_ops_to_inputs.end())
+ return true;
+
+ // If next_node has current ConstantOp as it's previous node, but not with appropriate index -
+ // serialize current ConstantOp
+ if (self_serializing_op_it->second != use.getIndex())
+ return true;
+ }
+
+ return false;
+}
+
+void AclCppOpGenerator::visit(ops::ConstantOp &op)
+{
+ if (shouldSerializeConstant(op))
+ {
+ TensorVariant data = op.getValue();
+ shared_ptr<ArtifactId> out = genTensor(op.getOutput(0));
+ addToPersistentTensors(out);
+ serializeTensor(out, data);
+ }
+}
+
+void AclCppOpGenerator::visit(ops::ReluOp &op) { genActivation(op, "RELU"); }
+
+void AclCppOpGenerator::visit(ops::ReshapeOp &op)
+{
+ assert(op.getNumInputs() == 1);
+ const auto *ir_input = op.getInput(0);
+ const auto *ir_output = op.getOutput(0);
+
+ // Get the id of the input tensor in the generated artifact.
+ auto in = AF::id(tensorName(ir_input));
+
+ // Create the output tensor in the DOM and return its id.
+ const Shape &out_shape = ir_output->getShape();
+
+ // This check confirms that we can "safely" reshape data
+ // The only safe configuration of output shape is (1...1, N, 1 ... 1)
+ bool found_non_one = false;
+ for (int32_t i = 0; i < out_shape.rank(); ++i)
+ {
+ if (out_shape.dim(i) != 1)
+ {
+ if (found_non_one)
+ throw AclCppException("Unsupported result of reshape");
+ found_non_one = true;
+ }
+ }
+
+ shared_ptr<ArtifactId> out = genTensor(ir_output);
+
+ // Create an instance of the CLReshapeLayer class as a member of the artifact class.
+ auto layer = genLayer("arm_compute::CLReshapeLayer", out->name() + "_reshape_layer",
+ {AF::ref(in), AF::ref(out)});
+ addToPersistentTensors(out);
+ genLayerExecution(layer);
+}
+
+void AclCppOpGenerator::visit(mir::ops::SliceOp & /*op*/)
+{
+ throw AclCppException("Unimplemented operation: SliceOp");
+}
+
+void AclCppOpGenerator::visit(ops::TanhOp &op) { genActivation(op, "TANH"); }
+
+void AclCppOpGenerator::visit(ops::DeConv2DOp &op)
+{
+ genConvolution(op, "arm_compute::CLDeconvolutionLayer", "_deconvolution_layer");
+}
+
+void AclCppOpGenerator::visit(ops::EluOp & /*op*/)
+{
+ throw AclCppException("EluOp not supported by the ACL library yet.");
+}
+
+void AclCppOpGenerator::visit(ops::PadOp &op)
+{
+ assert(op.getNumInputs() == 1);
+ const auto *ir_input = op.getInput(0);
+ const auto *ir_output = op.getOutput(0);
+
+ // Get the id of the input tensor.
+ auto input = AF::id(tensorName(ir_input));
+
+ // Create the output tensor in the DOM
+ auto out = genTensor(ir_output);
+ addToPersistentTensors(out);
+
+ // Generate PadLayer params
+ auto prefix = out->name() + "_pad_layer";
+ auto pad_list_decl = _constrBlock->var("arm_compute::PaddingList", prefix + "_pads");
+ auto pad_list = pad_list_decl->use();
+ const auto &padding_before = op.getPaddingBefore();
+ const auto &padding_after = op.getPaddingAfter();
+ for (int i = 0; i < ir_input->getShape().rank(); ++i)
+ {
+ auto pad_var = _constrBlock->var(
+ "arm_compute::PaddingInfo", prefix + "_pad_" + to_string(i), {},
+ {AF::lit(to_string(padding_before[i])), AF::lit(to_string(padding_after[i]))});
+ auto pad = pad_var->use();
+ _constrBlock->call("push_back", {pad}, pad_list);
+ }
+
+ // Generate PadLayer
+ // FIXME Set up the `constant_value` parameter.
+ assert(op.getPaddingValue() == 0.0f);
+ auto layer =
+ genLayer("arm_compute::CLPadLayer", prefix, {AF::ref(input), AF::ref(out), pad_list});
+ genLayerExecution(layer);
+}
+
+template <typename Op>
+void AclCppOpGenerator::genPooling(Op &op, const std::string &pooling_type, bool exclude_padding)
+{
+ assert(op.getNumInputs() == 1);
+ const auto *ir_input = op.getInput(0);
+ const auto *ir_output = op.getOutput(0);
+
+ string in_name = tensorName(ir_input);
+ auto in_id = AF::id(in_name);
+
+ const string output_tensor_name = tensorName(ir_output);
+
+ // Transpose data from MIR format to format compatible with ACL
+ const string transposed_input_name = output_tensor_name + "transposed_input";
+ shared_ptr<ArtifactId> transposed_input =
+ genTransposeMIRtoACL(transposed_input_name, ir_input->getShape(), in_id);
+
+ const string layer_name = output_tensor_name + "_pooling_layer";
+
+ shared_ptr<ArtifactVariable> pad_stride_info_var = genPadStrideInfo(op, layer_name, _constrBlock);
+
+ shared_ptr<ArtifactId> pad_stride_info = pad_stride_info_var->use();
+
+ // Create kernel window info
+ shared_ptr<ArtifactVariable> kernel_window_var = _constrBlock->var(
+ "arm_compute::Size2D", layer_name + "_kernel_window", {},
+ {AF::lit(to_string(op.getWindowSize()[1])), AF::lit(to_string(op.getWindowSize()[0]))});
+ shared_ptr<ArtifactId> kernel_window = kernel_window_var->use();
+
+ // Create pooling info: pooling type, kernel info, strides, etc
+ shared_ptr<ArtifactVariable> pooling_info_var =
+ _constrBlock->var("arm_compute::PoolingLayerInfo", layer_name + "_pooling_info", {},
+ {AF::lit(pooling_type), kernel_window, pad_stride_info,
+ AF::lit(exclude_padding ? "true" : "false")});
+ shared_ptr<ArtifactId> pooling_info = pooling_info_var->use();
+
+ // Generate auxiliary tensor to hold transposed output of pool in NCHW format
+ Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(ir_output->getShape());
+ shared_ptr<ArtifactId> transposed_output =
+ genTensor(layer_name + "_out_transpose", transposed_output_shape);
+
+ // Actual layer creation
+ shared_ptr<ArtifactId> layer =
+ genLayer("arm_compute::CLPoolingLayer", layer_name,
+ {AF::ref(transposed_input), AF::ref(transposed_output), pooling_info});
+ genTensorAllocation(_infBlock, transposed_output);
+ genLayerExecution(layer);
+
+ shared_ptr<ArtifactId> output =
+ genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
+
+ genTensorDeallocation(_infBlock, transposed_input);
+ genTensorDeallocation(_infBlock, transposed_output);
+}
+
+template <typename Op>
+void AclCppOpGenerator::genConvolution(Op &op, const string &acl_func_name, const string &suffix)
+{
+ const auto *ir_input = op.getInput(0);
+ const auto *ir_weights = op.getInput(1);
+ const auto *ir_output = op.getOutput(0);
+
+ auto ir_weights_op = dynamic_cast<const ops::ConstantOp *>(ir_weights->getNode());
+ if (ir_weights_op == nullptr)
+ throw AclCppException("Unsupported operation type");
+
+ auto ir_weights_tensor = ir_weights_op->getValue();
+ if (op.getType() == Operation::Type::conv2D)
+ {
+ // [Co, Hk, Wk, Ci] -> [Co, Ci, Hk, Wk].
+ ir_weights_tensor = transposeTensor<0, 3, 1, 2>(ir_weights_tensor);
+ }
+ else
+ {
+ ir_weights_tensor = transposeTensor<3, 2, 0, 1>(ir_weights_tensor);
+ }
+
+ const Shape &ir_weights_shape = ir_weights_tensor.getShape();
+
+ // get output tensor name that is used as base for other names
+ const string output_tensor_name = tensorName(ir_output);
+
+ // Get the identifier of the input tensor in the DOM.
+ auto input = AF::id(tensorName(ir_input));
+
+ // Generate auxiliary tensor to hold transposed input of convolution in NCHW format
+ shared_ptr<ArtifactId> transposed_input =
+ genTransposeMIRtoACL(output_tensor_name + "_transposed_input", ir_input->getShape(), input);
+
+ // Create the transposed output tensor in the DOM.
+ const string transposed_output_name = output_tensor_name + "_transposed_output";
+ Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(ir_output->getShape());
+ shared_ptr<ArtifactId> transposed_output =
+ genTensor(transposed_output_name, transposed_output_shape);
+
+ string operation_name = output_tensor_name + suffix;
+
+ // Generate a tensor for weights (kernel) in the DOM.
+ auto weights = genTensor(operation_name + "_weights", ir_weights_shape);
+
+ // Create a local variable of type PadStrideInfo in the artifact constructor:
+ // PadStrideInfo pad_stride_info(stride_x, stride_y, pad_x, pad_y);
+ auto pad_stride_info_var = genPadStrideInfo(op, operation_name, _constrBlock);
+
+ auto pad_stride_info = pad_stride_info_var->use();
+
+ // The parameter for the conv_layer.config(&in, &weights, nullptr, &out, pad_stride_info)
+ // function call.
+ list<shared_ptr<ArtifactExpr>> config_params{AF::ref(transposed_input), AF::ref(weights),
+ AF::lit("nullptr"), AF::ref(transposed_output),
+ pad_stride_info};
+
+ // Add to additional parameters for deconvolution.
+ if (op.getType() == Operation::Type::deConv2D)
+ {
+ config_params.push_back(AF::lit("0"));
+ config_params.push_back(AF::lit("0"));
+ }
+
+ // Create the convolution (/depthwise convolution/deconvolution) layer class instance.
+ shared_ptr<ArtifactId> layer = genLayer(acl_func_name, operation_name, config_params);
+
+ addToPersistentTensors(weights);
+ // Save the IR weights tensor to later read this in the artifact.
+ serializeTensor(weights, ir_weights_tensor);
+ genTensorAllocation(_infBlock, transposed_output);
+ genLayerExecution(layer);
+
+ // Generate auxiliar tensor to hold transposed output of convolution in NHWC format
+ shared_ptr<ArtifactId> output =
+ genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
+
+ genTensorDeallocation(_infBlock, transposed_input);
+ genTensorDeallocation(_infBlock, transposed_output);
+}
+
+void AclCppOpGenerator::genActivation(const Operation &op, const std::string &activation_name,
+ float a, float b)
+{
+ assert(op.getNumInputs() == 1);
+ const auto *ir_input = op.getInput(0);
+ const auto *ir_output = op.getOutput(0);
+
+ // Get the id of the input tensor.
+ auto in = AF::id(tensorName(ir_input));
+
+ // Create the output tensor in the DOM and return its id.
+ shared_ptr<ArtifactId> output = genTensor(ir_output);
+
+ auto prefix = output->name() + "_activation_layer";
+
+ // Create an instance of the ActivationLayerInfo class as a local variable in the artifact
+ // constructor. This instance profide information about the concrete activation function,
+ // like: ReLU, Tanh etc and two optional parameter (alpha and betha) needed by some activations.
+ auto activation_info_var = _constrBlock->var(
+ "arm_compute::ActivationLayerInfo", prefix + "_activation_info", {},
+ {AF::lit("arm_compute::ActivationLayerInfo::ActivationFunction::" + activation_name),
+ AF::lit(to_string(a)), AF::lit(to_string(b))});
+ auto activation_info = activation_info_var->use();
+
+ // Create an instance of the CLActivationLayer class as a member of the artifact class.
+ auto layer = genLayer("arm_compute::CLActivationLayer", prefix,
+ {AF::ref(in), AF::ref(output), activation_info});
+ addToPersistentTensors(output);
+ genLayerExecution(layer);
+}
+
+shared_ptr<ArtifactId> AclCppOpGenerator::genAddition(const string &prefix, size_t index,
+ const Shape &ir_shape,
+ const std::shared_ptr<ArtifactId> &in1,
+ const std::shared_ptr<ArtifactId> &in2,
+ std::shared_ptr<ArtifactId> out)
+{
+ string operation_name = prefix + "_" + to_string(index);
+ // Create the output tensor in the DOM or reuse the out, if it is not nullptr - that is for the
+ // last element in the handled sequence.
+ if (!out)
+ out = genTensor(operation_name, ir_shape);
+
+ // Create an instance of the CLActivationLayer class as a member of the artifact class.
+ auto arithmetic_add_layer_var = _artifactClass->var(false, "arm_compute::CLArithmeticAddition",
+ operation_name + "_arithmetic_add_layer");
+ auto arithmetic_add_layer = arithmetic_add_layer_var->use();
+
+ // Generate the call: arithmetic_add_layer.configure(&in1, &in2, &out);
+ _constrBlock->call("configure", {AF::ref(in1), AF::ref(in2), AF::ref(out),
+ AF::lit("arm_compute::ConvertPolicy::WRAP")},
+ arithmetic_add_layer);
+
+ // Generate the call: arithmetic_add_layer.run();
+ _infBlock->call("run", {}, arithmetic_add_layer);
+ return out;
+}
+
+shared_ptr<ArtifactId> AclCppOpGenerator::genMultiplication(const string &prefix, size_t index,
+ const Shape &ir_shape,
+ const shared_ptr<ArtifactId> &in1,
+ const shared_ptr<ArtifactId> &in2,
+ shared_ptr<ArtifactId> out)
+{
+ string operation_name = prefix + "_" + to_string(index);
+
+ // Create the output tensor in the DOM or reuse the out, if it is not nullptr - that is for the
+ // last element in the handled sequence.
+ if (!out)
+ out = genTensor(operation_name, ir_shape);
+
+ // Create a unit tensor with the rank = ir.shape.rank() and having all dimensions = 1. It is
+ // possible to use such a tensor in the operation because of the broadcasting support for the
+ // input tensors in the CLArithmeticDivision operation.
+ Shape ir_unit_shape(ir_shape.rank());
+
+ for (int i = 0; i < ir_unit_shape.rank(); ++i)
+ ir_unit_shape.dim(i) = 1;
+
+ // Create a unit tensor in the DOM.
+ auto unit = genTensor(operation_name + "_unit", ir_unit_shape);
+ addToPersistentTensors(unit);
+
+ // Fill the unit tensor with the 1 value.
+ fillTensor(unit, "1");
+
+ // Create a tmp tensor in the DOM to store the result of 1 / in2.
+ auto tmp = genTensor(operation_name + "_tmp", ir_shape);
+ genTensorAllocation(_infBlock, tmp);
+
+ // Create an instance of the CLArithmeticDivision class as a member of the artifact class.
+ auto arithmetic_div_layer_var1 = _artifactClass->var(false, "arm_compute::CLArithmeticDivision",
+ operation_name + "_arithmetic_div_layer_1");
+ auto arithmetic_div_layer1 = arithmetic_div_layer_var1->use();
+
+ // Generate the call: arithmetic_div_layer1.configure(&unit, &in2, &tmp);
+ _constrBlock->call("configure", {AF::ref(unit), AF::ref(in2), AF::ref(tmp)},
+ arithmetic_div_layer1);
+
+ // Generate the call: arithmetic_div_layer1.run();
+ _infBlock->call("run", {}, arithmetic_div_layer1);
+
+ // Create an instance of the CLArithmeticDivision class as a member of the artifact class.
+ auto arithmetic_div_layer_var2 = _artifactClass->var(false, "arm_compute::CLArithmeticDivision",
+ operation_name + "_arithmetic_div_layer_2");
+ auto arithmetic_div_layer2 = arithmetic_div_layer_var2->use();
+
+ // Generate the call: arithmetic_div_layer2.configure(&in1, &tmp, &out);
+ _constrBlock->call("configure", {AF::ref(in1), AF::ref(tmp), AF::ref(out)},
+ arithmetic_div_layer2);
+
+ // Generate the call: arithmetic_div_layer2.run();
+ _infBlock->call("run", {}, arithmetic_div_layer2);
+
+ genTensorDeallocation(_infBlock, tmp);
+
+ return out;
+}
+
+string AclCppOpGenerator::tensorName(const Operation::Output *ir_tensor) const
+{
+ string tensor_name = ir_tensor->getName();
+
+ if (!tensor_name.empty())
+ {
+ tensor_name = "_" + tensor_name;
+ replace_if(tensor_name.begin(), tensor_name.end(), [](char c) { return std::isalnum(c) == 0; },
+ '_');
+ }
+ else
+ {
+ assert(ir_tensor->getNode()->getNumOutputs() == 1);
+ tensor_name = "tensor_" + to_string(ir_tensor->getNode()->getId());
+ }
+
+ return tensor_name;
+}
+
+template <typename T>
+std::shared_ptr<ArtifactId>
+AclCppOpGenerator::genVectorInitializedVar(ArtifactBlock *block, const string &type,
+ const string &name, const vector<T> &init)
+{
+ list<shared_ptr<ArtifactExpr>> dims;
+
+ for (const auto &v : init)
+ dims.push_back(AF::lit(to_string(v)));
+
+ auto shape_var = block->var(type, name, {}, dims);
+ auto shape_id = shape_var->use();
+ return shape_id;
+}
+
+shared_ptr<ArtifactId> AclCppOpGenerator::genTensor(const string &name, const Shape &ir_shape,
+ bool gen_accessor)
+{
+ auto id = AF::id(name);
+
+ if (_tensorNames.insert(name).second)
+ {
+ _artifactClass->var(false, "arm_compute::CLTensor", name);
+ vector<int32_t> shape_vectorized;
+
+ // create vector of initializers from Shape
+ shape_vectorized.reserve(ir_shape.rank());
+ for (int i = 0; i < ir_shape.rank(); ++i)
+ shape_vectorized.push_back(ir_shape.dim(-i - 1));
+
+ const char *type_name = "arm_compute::TensorShape";
+ shared_ptr<ArtifactId> shape =
+ genVectorInitializedVar(_constrBlock, type_name, name + "_shape", shape_vectorized);
+ _constrBlock->call("initializeTensor", {id, shape});
+
+ if (gen_accessor)
+ {
+ auto f = _artifactClass->func(true, "arm_compute::CLTensor&", "get" + name);
+ auto b = f->getBlock();
+ b->ret(id);
+ }
+ }
+
+ return id;
+}
+
+shared_ptr<ArtifactId> AclCppOpGenerator::genTensor(const Operation::Output *ir_tensor)
+{
+ return genTensor(tensorName(ir_tensor), ir_tensor->getShape(), !ir_tensor->getName().empty());
+}
+
+void AclCppOpGenerator::genNamed(Graph *graph)
+{
+ const auto &inputs = graph->getInputs();
+ if (inputs.size() == 1)
+ {
+ const auto *input_op = inputs[0];
+ auto f = _artifactClass->func(true, "arm_compute::CLTensor&", "getInput");
+ auto b = f->getBlock();
+ auto id = AF::id(tensorName(input_op->getOutput(0)));
+ b->ret(id);
+ }
+
+ const auto &outputs = graph->getOutputs();
+ if (outputs.size() == 1)
+ {
+ const auto *output_op = outputs[0];
+ auto f = _artifactClass->func(true, "arm_compute::CLTensor&", "getOutput");
+ auto b = f->getBlock();
+ auto id = AF::id(tensorName(output_op->getInput(0)));
+ b->ret(id);
+ }
+}
+
+void AclCppOpGenerator::serializeTensor(const shared_ptr<ArtifactId> &tensor_id,
+ const TensorVariant &ir_tensor)
+{
+ serializeIRTensor(ir_tensor);
+ _serializations.push_back(tensor_id);
+}
+
+void AclCppOpGenerator::serializeIRTensor(const TensorVariant &tensor)
+{
+ const Shape &shape = tensor.getShape();
+ Index coords;
+ coords.resize(shape.rank());
+ Index dimensions;
+ dimensions.resize(shape.rank());
+
+ for (int i = 0; i < shape.rank(); ++i)
+ {
+ coords.at(i) = 0;
+ dimensions.at(i) = shape.dim(i);
+ }
+
+ size_t data_size = tensor.getElementSize() * tensor.getShape().numElements();
+ _parOut.write(tensor.atOffset(0), data_size);
+}
+
+void AclCppOpGenerator::genDeserializations()
+{
+ for (auto &tensor : _serializations)
+ _constrBlock->call("deserializeTensor", {_parIn, tensor});
+}
+
+void AclCppOpGenerator::genFillings()
+{
+ for (auto f : _fillings)
+ _constrBlock->call("fillTensor", {f.first, AF::lit(f.second)});
+}
+
+void AclCppOpGenerator::fillTensor(const shared_ptr<ArtifactId> &tensor_id, const string &val)
+{
+ _fillings.emplace_back(make_pair(tensor_id, val));
+}
+
+void AclCppOpGenerator::visit(ops::SqueezeOp & /*op*/)
+{
+ throw AclCppException("Unimplemented operation: Squeeze");
+}
+
+void AclCppOpGenerator::visit(ops::SqrtOp & /*op*/)
+{
+ throw AclCppException("Unimplemented operation: Sqrt");
+}
+
+void AclCppOpGenerator::addToPersistentTensors(const std::shared_ptr<ArtifactId> &tensor_id)
+{
+ _persistent_tensors.push_back(tensor_id);
+}
+
+shared_ptr<ArtifactFunctionCall>
+AclCppOpGenerator::genTensorAllocation(ArtifactBlock *block, const shared_ptr<ArtifactId> &tensor)
+{
+ return block->call("allocate", {}, AF::call("allocator", {}, tensor), ArtifactCallType::ref);
+}
+
+shared_ptr<ArtifactFunctionCall>
+AclCppOpGenerator::genTensorDeallocation(ArtifactBlock *block, const shared_ptr<ArtifactId> &tensor)
+{
+ return block->call("free", {}, AF::call("allocator", {}, tensor), ArtifactCallType::ref);
+}
+
+void AclCppOpGenerator::genPersistentTensorAllocations()
+{
+ for (auto &tensor : _persistent_tensors)
+ genTensorAllocation(_constrBlock, tensor);
+}
+
+shared_ptr<ArtifactId>
+AclCppOpGenerator::genLayer(const string &layer_type, const string &layer_name,
+ const list<shared_ptr<ArtifactExpr>> &config_params)
+{
+ auto layer_var = _artifactClass->var(false, layer_type, layer_name);
+ auto layer = layer_var->use();
+ _constrBlock->call("configure", config_params, layer);
+ return layer;
+}
+
+void AclCppOpGenerator::genLayerExecution(const shared_ptr<ArtifactId> &layer_id)
+{
+ _infBlock->call("run", {}, layer_id);
+}
+
+void AclCppOpGenerator::visit(mir::ops::ResizeOp & /*op*/)
+{
+ throw AclCppException("Unimplemented operation: Resize");
+}
+
+void AclCppOpGenerator::genTranspose(const std::shared_ptr<nnc::ArtifactId> &input,
+ const std::shared_ptr<nnc::ArtifactId> &output,
+ const std::vector<size_t> &mir_perm,
+ bool allocate_at_inference)
+{
+
+ // acl 18.8 opencl implementation supports only 3 types of permutation:
+ // in mir (0, 3, 1, 2), in acl(axes are in reverse order) (1, 2, 0)
+ // in mir (0, 2, 3, 1), in acl (2, 0, 1)
+ // in mir (2, 3, 1, 0), in acl (3, 2, 0, 1)
+ // so here we try to transform mir transpose into one acl supports
+
+ const string &out_name = output->name();
+ vector<size_t> acl_perm;
+
+ if (mir_perm == vector<size_t>{0, 3, 1, 2})
+ acl_perm = {1, 2, 0};
+ else if (mir_perm == vector<size_t>{0, 2, 3, 1})
+ acl_perm = {2, 0, 1};
+ else if (mir_perm == vector<size_t>{2, 3, 1, 0})
+ acl_perm = {3, 2, 0, 1};
+ else
+ throw AclCppException("Unsupported transpose sequence in operation " + out_name);
+
+ // Create operation parameter containing permutation vector
+ shared_ptr<ArtifactId> perm_vector = genVectorInitializedVar(
+ _constrBlock, "arm_compute::PermutationVector", out_name + "_perm_param", acl_perm);
+
+ // Instantiate the CLPermute object.
+ string layer_name = out_name + "_transpose_layer";
+ list<shared_ptr<ArtifactExpr>> arguments = {AF::ref(input), AF::ref(output), perm_vector};
+ auto layer = genLayer("arm_compute::CLPermute", layer_name, arguments);
+ if (allocate_at_inference)
+ genTensorAllocation(_infBlock, output);
+ else
+ addToPersistentTensors(output);
+ genLayerExecution(layer);
+}
+
+void AclCppOpGenerator::visit(mir::ops::TransposeOp &op)
+{
+ assert(op.getNumInputs() == 1);
+ const auto *ir_input = op.getInput(0);
+ const auto *ir_output = op.getOutput(0);
+
+ // Get the input node tensor id in the DOM.
+ shared_ptr<ArtifactId> input = AF::id(tensorName(ir_input));
+ const vector<size_t> &mir_axis_order = op.getAxisOrder();
+
+ // Create the output tensor in the DOM.
+ if (ir_output->getShape().rank() != 4)
+ throw AclCppException("Unsupported number of dimensions in transpose operation");
+ // TODO replace transpose shape
+ shared_ptr<ArtifactId> output = genTensor(ir_output);
+
+ // Actual generation of operation and related stuff
+ genTranspose(input, output, mir_axis_order, false);
+}
+
+void AclCppOpGenerator::visit(mir::ops::GatherOp & /*op*/)
+{
+ throw AclCppException("Unimplemented operation: GatherOp");
+}
+
+void AclCppOpGenerator::visit(ops::SigmoidOp &op) { genActivation(op, "LOGISTIC"); }
+
+void AclCppOpGenerator::visit(mir::ops::LeakyReluOp &op)
+{
+ genActivation(op, "LEAKY_RELU", op.getAlpha());
+}
+
+void AclCppOpGenerator::visit(mir::ops::OutputOp & /*op*/)
+{
+ // No-op.
+}
+
+void AclCppOpGenerator::visit(mir::ops::AddOp &op)
+{
+ assert(op.getNumInputs() == 2);
+ const auto *ir_lhs = op.getInput(0);
+ const auto *ir_rhs = op.getInput(1);
+ const auto *ir_output = op.getOutput(0);
+
+ // Create the output tensor in the DOM and obtain its identifier.
+ auto out = genTensor(ir_output);
+ addToPersistentTensors(out);
+
+ // Get the identifiers of the input tensors in the DOM.
+ auto lhs = AF::id(tensorName(ir_lhs));
+ auto rhs = AF::id(tensorName(ir_rhs));
+
+ genAddition(out->name() + "_" + "addition", 0, ir_rhs->getShape(), lhs, rhs, out);
+}
+
+void AclCppOpGenerator::visit(mir::ops::DivOp &) { throw AclCppException("NYI"); }
+
+void AclCppOpGenerator::visit(mir::ops::MaxOp &) { throw AclCppException("NYI"); }
+
+void AclCppOpGenerator::visit(mir::ops::MulOp &op)
+{
+ assert(op.getNumInputs() == 2);
+ const auto *ir_lhs = op.getInput(0);
+ const auto *ir_rhs = op.getInput(1);
+ const auto *ir_output = op.getOutput(0);
+
+ // Create the output tensor in the DOM and obtain its identifier.
+ auto out = genTensor(ir_output);
+ addToPersistentTensors(out);
+
+ // Get the identifiers of the input tensors in the DOM.
+ auto lhs = AF::id(tensorName(ir_lhs));
+ auto rhs = AF::id(tensorName(ir_rhs));
+
+ genMultiplication(out->name() + "_" + "multiplication", 0, ir_rhs->getShape(), lhs, rhs, out);
+}
+
+void AclCppOpGenerator::visit(mir::ops::SubOp &) { throw AclCppException("NYI"); }
+
+void AclCppOpGenerator::visit_fallback(mir::Operation &) { throw AclCppException("NYI"); }
+
+} // namespace nnc
diff --git a/compiler/nnc/backends/acl_soft_backend/AclCppOpGenerator.h b/compiler/nnc/backends/acl_soft_backend/AclCppOpGenerator.h
new file mode 100644
index 000000000..79a7a6f3f
--- /dev/null
+++ b/compiler/nnc/backends/acl_soft_backend/AclCppOpGenerator.h
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_ACL_CPP_OP_GENERATOR_H_
+#define _NNC_ACL_CPP_OP_GENERATOR_H_
+
+#include "mir/Visitor.h"
+#include "mir/TensorVariant.h"
+#include "mir/Operation.h"
+#include "mir/Graph.h"
+#include "ArtifactModel.h"
+#include "ArtifactGeneratorCppCode.h"
+#include "ArtifactGeneratorCppDecl.h"
+
+#include <set>
+
+namespace nnc
+{
+
+/**
+ * @brief Implements the visitor for the model IR which generates the DOM description
+ * translated to C++ source/header files by the ACL soft backend code generators.
+ */
+class AclCppOpGenerator : public mir::Visitor
+{
+public:
+ AclCppOpGenerator(const std::string &name, std::ostream &par_out);
+ /**
+ * @brief The main interface function to the class. Convers the model IR to the DOM.
+ * @param g - pointer the model IR graph.
+ * @return - reference to the top-level DOM entity.
+ */
+ const ArtifactModule &generate(mir::Graph *g);
+
+ /**
+ * @brief Implementations of the MIR visitors.
+ * @param op
+ */
+ void visit(mir::ops::AddOp &op) override;
+ void visit(mir::ops::AvgPool2DOp &op) override;
+ void visit(mir::ops::CappedReluOp &op) override;
+ void visit(mir::ops::ConcatOp &op) override;
+ void visit(mir::ops::ConstantOp &op) override;
+ void visit(mir::ops::Conv2DOp &op) override;
+ void visit(mir::ops::DeConv2DOp &op) override;
+ void visit(mir::ops::DepthwiseConv2DOp &op) override;
+ void visit(mir::ops::DivOp &op) override;
+ void visit(mir::ops::EluOp &op) override;
+ void visit(mir::ops::FullyConnectedOp &op) override;
+ void visit(mir::ops::GatherOp &op) override;
+ void visit(mir::ops::InputOp &op) override;
+ void visit(mir::ops::LeakyReluOp &op) override;
+ void visit(mir::ops::MaxOp &op) override;
+ void visit(mir::ops::MaxPool2DOp &op) override;
+ void visit(mir::ops::MulOp &op) override;
+ void visit(mir::ops::OutputOp &op) override;
+ void visit(mir::ops::PadOp &op) override;
+ void visit(mir::ops::ReluOp &op) override;
+ void visit(mir::ops::ReshapeOp &op) override;
+ void visit(mir::ops::ResizeOp &op) override;
+ void visit(mir::ops::SigmoidOp &op) override;
+ void visit(mir::ops::SliceOp &op) override;
+ void visit(mir::ops::SoftmaxOp &op) override;
+ void visit(mir::ops::SqrtOp &op) override;
+ void visit(mir::ops::SqueezeOp &op) override;
+ void visit(mir::ops::SubOp &op) override;
+ void visit(mir::ops::TanhOp &op) override;
+ void visit(mir::ops::TransposeOp &op) override;
+
+protected:
+ void visit_fallback(mir::Operation &op) override;
+
+private:
+ using AF = ArtifactFactory;
+
+ /**
+ * @brief generate transpose of input tensor NHWC -> NCHW
+ * @param name name of tensor containing transposed data
+ * @param input_shape shape of @p input
+ * @param input id of input tensor
+ * @return Id of result tensor
+ */
+ std::shared_ptr<ArtifactId> genTransposeMIRtoACL(const std::string &name,
+ const mir::Shape &input_shape,
+ const std::shared_ptr<ArtifactId> &input);
+
+ /**
+ * @brief generate transpose NCHW -> NHWC
+ * @param name name of tensor containing transposed data
+ * @param input_shape shape of @p input
+ * @param input id of input tensor
+ * @return Id of result tensor
+ */
+ std::shared_ptr<ArtifactId> genTransposeACLtoMIR(const std::string &name,
+ const mir::Shape &input_shape,
+ const std::shared_ptr<ArtifactId> &input);
+
+ /**
+ * @brief Generate DOM for PadStrideInfo object
+ * @tparam Oper Class of operation with pad and stride properties
+ * @param op Operation entity to generate variable for
+ * @param prefix First part of generated variable name
+ * @param block Code block where insert variable declaration
+ * @return generated variable
+ */
+ template <typename Op>
+ std::shared_ptr<ArtifactVariable> genPadStrideInfo(const Op &op, const std::string &prefix,
+ ArtifactBlock *block);
+
+ template <typename Op>
+ void genPooling(Op &op, const std::string &pooling_type, bool exclude_padding);
+
+ /**
+ * @brief The common part of the convolution and the depthwise convolution.
+ */
+ template <typename Op>
+ void genConvolution(Op &op, const std::string &acl_func_name, const std::string &suffix);
+
+ /**
+ * @brief Generates different types of activation functions: ReLU, Tanh etc.
+ * @param activation_name - names of activation functions used in ACL: RELU, TANH etc.
+ * @param a - alpha parameter used by some activation functions: BOUNDED_RELU, LU_BOUNDED_RELU,
+ * LINEAR, TANH.
+ * @param b - betha parameter used by some activation functions: LINEAR, LU_BOUNDED_RELU, TANH.
+ */
+ void genActivation(const mir::Operation &op, const std::string &activation_name, float a = 0,
+ float b = 0);
+
+ /**
+ * @brief Used to generate a binary addition operation in handling of the elementwise.
+ *
+ * @param prefix - the name (in the DOM) of operation called this method.
+ * @param index - the index of the call in the elementwise loop.
+ * @param ir_shape - the shape of the operands in the IR.
+ * @param in1 - the descriptor of the first operand in the DOM. Can be either original tensor
+ * identifier in the input sequence or a variable storing the partial result of
+ * applying the operation to the previous terms in the sequence.
+ * @param in2 - the descriptor of the second operand in the DOM.
+ * @param out - the descriptor for storing the operation result. If it is not nullptr, it is
+ * used to return the result. If it is nullptr (the default), a new tensor is
+ * allocated in the DOM to return the result.
+ * @return - the DOM ID of the temporary variable storing the partial sum of the elements
+ * to the left of and including the in2 term, or the operation out if in2 was
+ * the last term in the sequence.
+ */
+ std::shared_ptr<ArtifactId> genAddition(const std::string &prefix, size_t index,
+ const mir::Shape &ir_shape,
+ const std::shared_ptr<ArtifactId> &in1,
+ const std::shared_ptr<ArtifactId> &in2,
+ std::shared_ptr<ArtifactId> out = nullptr);
+
+ /**
+ * @brief Used to generate a binary multiplication operation in handling of the
+ * elementwise. As there is currently no the CLArithmeticMultiplication in the
+ * ACL library, in1 * in2 is emulated as: in1 / (1 / in2) with
+ * CLArithmeticDivision.
+ *
+ * @param prefix - the name (in the DOM) of operation called this method.
+ * @param index - the index of the call in the elementwise loop.
+ * @param ir_shape - the shape of the operands in the IR.
+ * @param in1 - the descriptor of the first operand in the DOM. Can be either original tensor
+ * identifier in the input sequence or a variable storing the partial result of
+ * applying the operation to the previous terms in the sequence.
+ * @param in2 - the descriptor of the second operand in the DOM.
+ * @param out - the descriptor for storing the operation result. If it is not nullptr, it is
+ * used to return the result. If it is nullptr (the default), a new tensor is
+ * allocated in the DOM to return the result.
+ * @return - the DOM ID of the temporary variable storing the partial product of the
+ * elements to the left of and including the in2 term, or the operation out if
+ * in2 was the last term in the sequence.
+ */
+ std::shared_ptr<ArtifactId> genMultiplication(const std::string &prefix, size_t index,
+ const mir::Shape &ir_shape,
+ const std::shared_ptr<ArtifactId> &in1,
+ const std::shared_ptr<ArtifactId> &in2,
+ std::shared_ptr<ArtifactId> out = nullptr);
+
+ /**
+ * @brief Generates a unique name for the tensor.
+ */
+ std::string tensorName(const mir::Operation::Output *ir_tensor) const;
+
+ /**
+ * @brief Generates variables tensor shape in DOM.
+ * @param block - DOM block where to create this shape: artifact constructor, inference function.
+ * @param name - prefix used for generating the unique name for this shape.
+ * @param shape - model IR shape for which we create a DOM analogue.
+ * @return - a DOM identifier for the created shape.
+ */
+ template <typename T>
+ std::shared_ptr<ArtifactId> genVectorInitializedVar(ArtifactBlock *block, const std::string &type,
+ const std::string &name,
+ const std::vector<T> &init);
+
+ /**
+ * @brief Generates a DOM tensor.
+ * @param name - its name.
+ * @param ir_shape - IR shape used to construct the tensor.
+ * @param gen_accessor - whether to generate an accessor function for this tensor
+ * in the artifact class.
+ * @return - a DOM identifier for the created tensor.
+ */
+ std::shared_ptr<ArtifactId> genTensor(const std::string &name, const mir::Shape &ir_shape,
+ bool gen_accessor = false);
+
+ /**
+ * @brief Generates a DOM tensor.
+ * @param ir_tensor - the ModelIR tensor.
+ * @return - a DOM identifier for the created tensor.
+ */
+ std::shared_ptr<ArtifactId> genTensor(const mir::Operation::Output *ir_tensor);
+
+ /**
+ * @brief generate transposing operation, @p mir_perm contains dimensions in MIR order (batch has
+ * index 0)
+ * @param input id of input tensor
+ * @param output id out output tensor
+ * @param mir_perm new order of dimensions
+ */
+ void genTranspose(const std::shared_ptr<nnc::ArtifactId> &input,
+ const std::shared_ptr<nnc::ArtifactId> &output,
+ const std::vector<size_t> &mir_perm, bool allocate_at_inference);
+
+ /**
+ * @brief Generates accessors for the input/output tensors.
+ * @param graph - the ModelIR graph.
+ */
+ void genNamed(mir::Graph *graph);
+
+ /**
+ * @brief Schedule a tensor serialization.
+ * @param tensor_id - an artifact ID of the tensor.
+ * @param ir_tensor - the IR source of the tensor.
+ */
+ void serializeTensor(const std::shared_ptr<ArtifactId> &tensor_id,
+ const mir::TensorVariant &ir_tensor);
+
+ /**
+ * @brief Serialize an IR tensor in a file.
+ * @param tensor - tensor to serialize.
+ */
+ void serializeIRTensor(const mir::TensorVariant &tensor);
+
+ /**
+ * @brief Generate the deserialization calls in right places in the artifact.
+ */
+ void genDeserializations();
+
+ /**
+ * @brief Generate procedure calls for filling tensors with constant scalar values.
+ */
+ void genFillings();
+
+ /**
+ * @brief Store the tensor ID and its value for the successive generation (for uniform tensors).
+ * @param tensor_id - ID of the tensor.
+ * @param val - its value.
+ */
+ void fillTensor(const std::shared_ptr<ArtifactId> &tensor_id, const std::string &val);
+
+ /**
+ * @brief Schedule the tensor allocation in the artifact constructor.
+ * @param tensor_id - ID of the scheduled tensor.
+ */
+ void addToPersistentTensors(const std::shared_ptr<ArtifactId> &tensor_id);
+
+ /**
+ * @brief Generate allocation of tensor
+ * @param block Block to insert allocation in
+ * @param tensor Id of tensor to allocate
+ */
+ std::shared_ptr<ArtifactFunctionCall>
+ genTensorAllocation(ArtifactBlock *block, const std::shared_ptr<ArtifactId> &tensor);
+
+ /**
+ * @brief Generate deallocation of tensor
+ * @param block Block to insert deallocation in
+ * @param tensor Id of tensor to deallocate
+ */
+ std::shared_ptr<ArtifactFunctionCall>
+ genTensorDeallocation(ArtifactBlock *block, const std::shared_ptr<ArtifactId> &tensor);
+
+ /**
+ * @brief Generate all the scheduled tensor allocations.
+ */
+ void genPersistentTensorAllocations();
+
+ /**
+ * @brief Generate the layer declaration and the configure() call.
+ * @param layer_type - ACL layer type.
+ * @param layer_name - name of the layer variable in the artifact.
+ * @param config_params - input/output tensor names and the other configuration information.
+ * @return - generated tensor ID.
+ */
+ std::shared_ptr<ArtifactId>
+ genLayer(const std::string &layer_type, const std::string &layer_name,
+ const std::list<std::shared_ptr<ArtifactExpr>> &config_params);
+
+ /**
+ * @brief Generate the layer run() call.
+ * @param layer_id - layer ID.
+ */
+ void genLayerExecution(const std::shared_ptr<ArtifactId> &layer_id);
+
+ /**
+ * @brief All named tensors names.
+ */
+ std::set<std::string> _tensorNames;
+
+ /**
+ * @brief The stream for tensors serialization.
+ */
+ std::ostream &_parOut;
+
+ /**
+ * @brief The whole artifact module in the DOM.
+ */
+ ArtifactModule _module;
+
+ /**
+ * @brief The artifact class.
+ */
+ std::shared_ptr<ArtifactClass> _artifactClass;
+
+ /**
+ * @brief The artifact inference function.
+ */
+ std::shared_ptr<ArtifactClassFunction> _inferenceFunction;
+
+ /**
+ * @brief The constuctor block of DOM instructions.
+ */
+ ArtifactBlock *_constrBlock;
+
+ /**
+ * @brief The inference function block of DOM instruction.
+ */
+ ArtifactBlock *_infBlock;
+
+ /**
+ * @brief The variable describing the input stream for tensors deserialization.
+ */
+ std::shared_ptr<ArtifactVariable> _parInVar;
+
+ /**
+ * @brief The identifier to reference the previous variable.
+ */
+ std::shared_ptr<ArtifactId> _parIn;
+
+ /**
+ * @brief The CLScheduler class representation in the DOM.
+ */
+ std::shared_ptr<ArtifactId> _clScheduler;
+
+ /**
+ * @brief Tensors which need to be allocated at the artifact construction time.
+ */
+ std::list<std::shared_ptr<ArtifactId>> _persistent_tensors;
+
+ /**
+ * @brief Tensors which are serialized from the Model IR and need to be deserialized in the
+ * artifact.
+ */
+ std::list<std::shared_ptr<ArtifactId>> _serializations;
+
+ /**
+ * @brief Tensors which must be filled with constant values and the corresponding values.
+ */
+ std::list<std::pair<std::shared_ptr<ArtifactId>, std::string>> _fillings;
+};
+
+} // namespace nnc
+
+#endif //_NNC_ACL_CPP_OP_GENERATOR_H_
diff --git a/compiler/nnc/backends/acl_soft_backend/ArtifactGeneratorCppCode.cpp b/compiler/nnc/backends/acl_soft_backend/ArtifactGeneratorCppCode.cpp
new file mode 100644
index 000000000..d9605f137
--- /dev/null
+++ b/compiler/nnc/backends/acl_soft_backend/ArtifactGeneratorCppCode.cpp
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ArtifactGeneratorCppCode.h"
+#include "ArtifactModel.h"
+
+#include "AclArtifactUtilities.generated.h"
+
+#include <cassert>
+
+using namespace std;
+
+namespace nnc
+{
+
+ArtifactGeneratorCppCode::ArtifactGeneratorCppCode(ostream &out) : _out(out) {}
+
+void ArtifactGeneratorCppCode::visit(const ArtifactLiteral *node) { _out << node->getValue(); }
+
+void ArtifactGeneratorCppCode::visit(const ArtifactId *node) { _out << node->name(); }
+
+void ArtifactGeneratorCppCode::visit(const ArtifactRef *node)
+{
+ _out << "&";
+ node->obj()->accept(this);
+}
+
+void ArtifactGeneratorCppCode::visit(const ArtifactDeref *node)
+{
+ _out << "*";
+ node->obj()->accept(this);
+}
+
+void ArtifactGeneratorCppCode::visit(const ArtifactVariable *node)
+{
+ _out << node->typeName() << " " << node->name();
+
+ for (const auto &d : node->getDimensions())
+ {
+ _out << "[";
+ d->accept(this);
+ _out << "]";
+ }
+
+ if (!node->getInitializers().empty())
+ {
+ _out << "(";
+ bool add_comma = false;
+
+ for (const auto &i : node->getInitializers())
+ {
+ if (add_comma)
+ _out << ", ";
+
+ i->accept(this);
+ add_comma = true;
+ }
+
+ _out << ")";
+ }
+}
+
+void ArtifactGeneratorCppCode::visit(const ArtifactFunctionCall *node)
+{
+ static const char *call_type_str[] = {".", "->", "::"};
+
+ if (node->on())
+ {
+ node->on()->accept(this);
+ _out << call_type_str[static_cast<int>(node->callType())];
+ }
+
+ _out << node->funcName();
+ _out << "(";
+ bool add_comma = false;
+
+ for (const auto &par : node->paramList())
+ {
+ if (add_comma)
+ _out << ", ";
+
+ par->accept(this);
+ add_comma = true;
+ }
+
+ _out << ")";
+}
+
+void ArtifactGeneratorCppCode::visit(const ArtifactUnaryExpr *node)
+{
+ // The trailing space is intended in new and delete!
+ static const char *un_op_str[] = {"++", "--", "new ", "delete ", "++", "--"};
+
+ if (node->getOp() < ArtifactUnOp::postIncr)
+ {
+ _out << un_op_str[static_cast<int>(node->getOp())];
+ node->getExpr()->accept(this);
+ }
+ else
+ {
+ node->getExpr()->accept(this);
+ _out << un_op_str[static_cast<int>(node->getOp())];
+ }
+}
+
+void ArtifactGeneratorCppCode::visit(const ArtifactBinaryExpr *node)
+{
+ static const char *bin_op_str[] = {"==", "!=", "<", "<=", ">", ">=", "=", "+",
+ "-", "*", "/", "+=", "-=", "*=", "/="};
+ node->getLeft()->accept(this);
+ _out << " " << bin_op_str[static_cast<int>(node->getOp())] << " ";
+ node->getRight()->accept(this);
+}
+
+void ArtifactGeneratorCppCode::visit(const ArtifactIndex *node)
+{
+ node->getExpr()->accept(this);
+ _out << "[";
+ node->getInd()->accept(this);
+ _out << "]";
+}
+
+void ArtifactGeneratorCppCode::visit(const ArtifactRet *node)
+{
+ _out << "return ";
+ node->expr()->accept(this);
+}
+
+void ArtifactGeneratorCppCode::visit(const ArtifactBreak * /*node*/) { _out << "break"; }
+
+void ArtifactGeneratorCppCode::visit(const ArtifactCont * /*node*/) { _out << "continue"; }
+
+void ArtifactGeneratorCppCode::visit(const ArtifactBlock *node)
+{
+ _out << " {" << endl;
+ ++_ind;
+
+ for (const auto &st : node->getStatements())
+ {
+ _out << _ind;
+ st->accept(this);
+
+ if (!st->isBlock())
+ _out << ";" << endl;
+ }
+
+ --_ind;
+ _out << _ind << "}" << endl;
+}
+
+void ArtifactGeneratorCppCode::visit(const ArtifactForLoop *node)
+{
+ _out << "for(";
+
+ if (node->getInit())
+ node->getInit()->accept(this);
+
+ _out << "; ";
+
+ if (node->getCond())
+ node->getCond()->accept(this);
+
+ _out << "; ";
+
+ if (node->getIter())
+ node->getIter()->accept(this);
+
+ _out << ")";
+ node->getBlock()->accept(this);
+}
+
+void ArtifactGeneratorCppCode::visit(const ArtifactIf *node)
+{
+ _out << "if(";
+ node->getCond()->accept(this);
+ _out << ")";
+ node->getBlock()->accept(this);
+
+ if (!node->getElseBlock()->getStatements().empty())
+ {
+ _out << _ind << "else";
+ node->getElseBlock()->accept(this);
+ }
+}
+
+void ArtifactGeneratorCppCode::visit(const ArtifactFunction * /*node*/)
+{
+ // TODO implement this function
+}
+
+void ArtifactGeneratorCppCode::visit(const ArtifactClass *node)
+{
+ // Generate a public default constructor here.
+ _out << node->name() << "::" << node->name() << "()";
+
+ if (!node->privateVariables().empty())
+ {
+ _out << " : ";
+ bool add_delim = false;
+
+ for (const auto &v : node->privateVariables())
+ {
+ if (add_delim)
+ _out << ",\n";
+
+ v->accept(this);
+ add_delim = true;
+ }
+ }
+
+ // TODO add constructors of public variables
+
+ node->getConstrBlock()->accept(this);
+ _out << endl;
+
+ // Then generate the other stuff.
+
+ for (const auto &e : node->publicFunctions())
+ e->accept(this);
+
+ for (const auto &e : node->privateFunctions())
+ e->accept(this);
+}
+
+void ArtifactGeneratorCppCode::visit(const ArtifactClassVariable *node)
+{
+ _out << node->name() << "(";
+ bool add_comma = false;
+
+ for (const auto &i : node->getInitializers())
+ {
+ if (add_comma)
+ _out << ", ";
+
+ i->accept(this);
+ add_comma = true;
+ }
+
+ _out << ")";
+}
+
+void ArtifactGeneratorCppCode::visit(const ArtifactClassFunction *node)
+{
+ _out << node->getRetTypeName();
+
+ if (!node->getRetTypeName().empty())
+ _out << " ";
+
+ _out << node->owner()->name() << "::" << node->name() << "(";
+ bool add_comma = false;
+
+ for (const auto &p : node->getParameters())
+ {
+ if (add_comma)
+ _out << ", ";
+
+ p->accept(this);
+ add_comma = true;
+ }
+
+ _out << ")";
+ node->getBlock()->accept(this);
+ _out << endl;
+}
+
+void ArtifactGeneratorCppCode::visit(const ArtifactModule *node)
+{
+ _out << "#include \"" << node->name() << ".h\"" << endl << endl;
+
+ for (const auto &i : node->sourceSysIncludes())
+ _out << "#include <" << i << ">" << endl;
+
+ if (!node->sourceSysIncludes().empty())
+ _out << endl;
+
+ for (const auto &i : node->sourceIncludes())
+ _out << "#include \"" << i << "\"" << endl;
+
+ if (!node->sourceIncludes().empty())
+ _out << endl;
+
+ _out.write(AclArtifactUtilities, sizeof(AclArtifactUtilities));
+ _out << endl;
+
+ for (const auto &e : node->entities())
+ e->accept(this);
+}
+
+} // namespace nnc
diff --git a/compiler/nnc/backends/acl_soft_backend/ArtifactGeneratorCppCode.h b/compiler/nnc/backends/acl_soft_backend/ArtifactGeneratorCppCode.h
new file mode 100644
index 000000000..9394750a5
--- /dev/null
+++ b/compiler/nnc/backends/acl_soft_backend/ArtifactGeneratorCppCode.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_ARTIFACT_GENERATOR_CPP_CODE_H_
+#define _NNC_ARTIFACT_GENERATOR_CPP_CODE_H_
+
+#include "IArtifactGenerator.h"
+#include "ArtifactIndent.h"
+
+#include <ostream>
+
+namespace nnc
+{
+
+/**
+ * @brief The ACL C++ artifact source code producer.
+ */
+class ArtifactGeneratorCppCode : public IArtifactGenerator
+{
+public:
+ explicit ArtifactGeneratorCppCode(std::ostream &out);
+
+ void visit(const ArtifactLiteral *node) override;
+ void visit(const ArtifactId *node) override;
+ void visit(const ArtifactRef *node) override;
+ void visit(const ArtifactDeref *node) override;
+ void visit(const ArtifactVariable *node) override;
+ void visit(const ArtifactFunctionCall *node) override;
+ void visit(const ArtifactUnaryExpr *node) override;
+ void visit(const ArtifactBinaryExpr *node) override;
+ void visit(const ArtifactIndex *node) override;
+ void visit(const ArtifactRet *node) override;
+ void visit(const ArtifactBreak *node) override;
+ void visit(const ArtifactCont *node) override;
+ void visit(const ArtifactBlock *node) override;
+ void visit(const ArtifactForLoop *node) override;
+ void visit(const ArtifactIf *node) override;
+ void visit(const ArtifactFunction *node) override;
+ void visit(const ArtifactClass *node) override;
+ void visit(const ArtifactClassVariable *node) override;
+ void visit(const ArtifactClassFunction *node) override;
+ void visit(const ArtifactModule *node) override;
+
+private:
+ std::ostream &_out;
+ ArtifactIndent _ind;
+};
+
+} // namespace nnc
+
+#endif //_NNC_ARTIFACT_GENERATOR_CPP_CODE_H_
diff --git a/compiler/nnc/backends/acl_soft_backend/ArtifactGeneratorCppDecl.cpp b/compiler/nnc/backends/acl_soft_backend/ArtifactGeneratorCppDecl.cpp
new file mode 100644
index 000000000..95f370332
--- /dev/null
+++ b/compiler/nnc/backends/acl_soft_backend/ArtifactGeneratorCppDecl.cpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ArtifactGeneratorCppDecl.h"
+#include "ArtifactModel.h"
+
+#include <cassert>
+
+using namespace std;
+
+namespace nnc
+{
+
+ArtifactGeneratorCppDecl::ArtifactGeneratorCppDecl(ostream &out) : _out(out) {}
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactLiteral *node) { _out << node->getValue(); }
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactId *node) { _out << node->name(); }
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactRef *node)
+{
+ _out << "&";
+ node->obj()->accept(this);
+}
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactDeref *node)
+{
+ _out << "*";
+ node->obj()->accept(this);
+}
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactVariable *node)
+{
+ _out << node->typeName() << " " << node->name();
+}
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactFunctionCall * /*node*/) {}
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactUnaryExpr * /*node*/) {}
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactBinaryExpr * /*node*/) {}
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactIndex * /*node*/) {}
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactRet * /*node*/) {}
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactBreak * /*node*/) {}
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactCont * /*node*/) {}
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactBlock * /*node*/) {}
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactForLoop * /*node*/) {}
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactIf * /*node*/) {}
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactFunction *node)
+{
+ _out << node->getRetTypeName() << " " << node->name() << "(";
+
+ bool add_comma = false;
+
+ for (const auto &par : node->getParameters())
+ {
+ if (add_comma)
+ _out << ", ";
+
+ par->accept(this);
+ add_comma = true;
+ }
+
+ _out << ");";
+}
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactClass *node)
+{
+ _out << "class " << node->name() << " {" << endl;
+ _out << "public:" << endl;
+ ++_ind;
+
+ // Generate a public default constructor here.
+ _out << _ind << node->name() << "();" << endl;
+
+ // Then generate the other stuff.
+
+ for (const auto &e : node->publicFunctions())
+ {
+ _out << _ind;
+ e->accept(this);
+ }
+
+ _out << endl << "private:" << endl;
+
+ for (const auto &e : node->privateFunctions())
+ {
+ _out << _ind;
+ e->accept(this);
+ }
+
+ if (!node->privateFunctions().empty())
+ _out << endl;
+
+ // TODO add public variables
+
+ for (const auto &e : node->privateVariables())
+ {
+ _out << _ind;
+ e->accept(this);
+ }
+
+ --_ind;
+ _out << "};" << endl;
+}
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactClassVariable *node)
+{
+ _out << node->typeName() << " " << node->name() << ";" << endl;
+}
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactClassFunction *node)
+{
+ _out << node->getRetTypeName();
+
+ if (!node->getRetTypeName().empty())
+ _out << " ";
+
+ _out << node->name() << "(";
+ bool add_comma = false;
+
+ for (const auto &p : node->getParameters())
+ {
+ if (add_comma)
+ _out << ", ";
+
+ p->accept(this);
+ add_comma = true;
+ }
+
+ _out << ");" << endl;
+}
+
+void ArtifactGeneratorCppDecl::visit(const ArtifactModule *node)
+{
+ for (const auto &i : node->headerSysIncludes())
+ _out << "#include <" << i << ">" << endl;
+
+ if (!node->headerSysIncludes().empty())
+ _out << endl;
+
+ for (const auto &i : node->headerIncludes())
+ _out << "#include \"" << i << "\"" << endl;
+
+ if (!node->headerIncludes().empty())
+ _out << endl;
+
+ for (const auto &e : node->entities())
+ e->accept(this);
+}
+
+} // namespace nnc
diff --git a/compiler/nnc/backends/acl_soft_backend/ArtifactGeneratorCppDecl.h b/compiler/nnc/backends/acl_soft_backend/ArtifactGeneratorCppDecl.h
new file mode 100644
index 000000000..aef8df913
--- /dev/null
+++ b/compiler/nnc/backends/acl_soft_backend/ArtifactGeneratorCppDecl.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_ARTIFACT_GENERATOR_CPP_DECL_H_
+#define _NNC_ARTIFACT_GENERATOR_CPP_DECL_H_
+
+#include "IArtifactGenerator.h"
+#include "ArtifactIndent.h"
+
+#include <ostream>
+
+namespace nnc
+{
+
+/**
+ * @brief The ACL C++ artifact header file producer.
+ */
+class ArtifactGeneratorCppDecl : public IArtifactGenerator
+{
+public:
+ explicit ArtifactGeneratorCppDecl(std::ostream &out);
+
+ void visit(const ArtifactLiteral *node) override;
+ void visit(const ArtifactId *node) override;
+ void visit(const ArtifactRef *node) override;
+ void visit(const ArtifactDeref *node) override;
+ void visit(const ArtifactVariable *node) override;
+ void visit(const ArtifactFunctionCall *node) override;
+ void visit(const ArtifactUnaryExpr *node) override;
+ void visit(const ArtifactBinaryExpr *node) override;
+ void visit(const ArtifactIndex *node) override;
+ void visit(const ArtifactRet *node) override;
+ void visit(const ArtifactBreak *node) override;
+ void visit(const ArtifactCont *node) override;
+ void visit(const ArtifactBlock *node) override;
+ void visit(const ArtifactForLoop *node) override;
+ void visit(const ArtifactIf *node) override;
+ void visit(const ArtifactFunction *node) override;
+ void visit(const ArtifactClass *node) override;
+ void visit(const ArtifactClassVariable *node) override;
+ void visit(const ArtifactClassFunction *node) override;
+ void visit(const ArtifactModule *node) override;
+
+private:
+ std::ostream &_out;
+ ArtifactIndent _ind;
+};
+
+} // namespace nnc
+
+#endif //_NNC_ARTIFACT_GENERATOR_CPP_DECL_H_
diff --git a/compiler/nnc/backends/acl_soft_backend/ArtifactIndent.h b/compiler/nnc/backends/acl_soft_backend/ArtifactIndent.h
new file mode 100644
index 000000000..b241443cf
--- /dev/null
+++ b/compiler/nnc/backends/acl_soft_backend/ArtifactIndent.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNCC_ARTIFACT_INDENT_H_
+#define _NNCC_ARTIFACT_INDENT_H_
+
+#include <string>
+#include <ostream>
+
+namespace nnc
+{
+
+/**
+ * @brief Used by code and declaration generators to indent generated text.
+ */
+class ArtifactIndent
+{
+public:
+ ArtifactIndent() : _level(0), _step(2) {}
+
+ ArtifactIndent &operator++()
+ {
+ _level += _step;
+ return *this;
+ }
+
+ ArtifactIndent &operator--()
+ {
+ _level -= _step;
+ return *this;
+ }
+
+ int level() const { return _level; }
+
+private:
+ int _level;
+ int _step;
+};
+
+inline std::ostream &operator<<(std::ostream &out, const ArtifactIndent &ind)
+{
+ if (ind.level() > 0)
+ out << std::string(ind.level(), ' ');
+
+ return out;
+}
+
+} // namespace nnc
+
+#endif //_NNCC_ARTIFACT_INDENT_H_
diff --git a/compiler/nnc/backends/acl_soft_backend/ArtifactModel.cpp b/compiler/nnc/backends/acl_soft_backend/ArtifactModel.cpp
new file mode 100644
index 000000000..8888697e7
--- /dev/null
+++ b/compiler/nnc/backends/acl_soft_backend/ArtifactModel.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ArtifactModel.h"
+
+namespace nnc
+{
+
+using namespace std;
+
+// ArtifactFunctionCall implementation
+ArtifactFunctionCall::ArtifactFunctionCall(string func_name,
+ list<shared_ptr<ArtifactExpr>> param_list,
+ shared_ptr<ArtifactExpr> on, ArtifactCallType call_type)
+ : _funcName(std::move(func_name)), _callType(call_type), _on(std::move(on)),
+ _paramList(std::move(param_list))
+{
+}
+
+// ArtifactBlock implementation.
+shared_ptr<ArtifactVariable>
+ArtifactBlock::var(const string &type_name, const string &var_name,
+ const list<shared_ptr<ArtifactExpr>> &dimensions,
+ const list<std::shared_ptr<ArtifactExpr>> &initializers)
+{
+ auto var = make_shared<ArtifactVariable>(type_name, var_name, dimensions, initializers);
+ _statements.push_back(var);
+ return var;
+}
+
+shared_ptr<ArtifactFunctionCall>
+ArtifactBlock::call(const string &func_name, const list<shared_ptr<ArtifactExpr>> &param_list,
+ shared_ptr<ArtifactExpr> call_on, ArtifactCallType call_type)
+{
+ auto func_call = make_shared<ArtifactFunctionCall>(func_name, param_list, call_on, call_type);
+ _statements.push_back(func_call);
+ return func_call;
+}
+
+shared_ptr<ArtifactRet> ArtifactBlock::ret(shared_ptr<ArtifactExpr> expr)
+{
+ auto ret = make_shared<ArtifactRet>(expr);
+ _statements.push_back(ret);
+ return ret;
+}
+
+shared_ptr<ArtifactBreak> ArtifactBlock::brk()
+{
+ auto brk = make_shared<ArtifactBreak>();
+ _statements.push_back(brk);
+ return brk;
+}
+
+shared_ptr<ArtifactCont> ArtifactBlock::cont()
+{
+ auto cont = make_shared<ArtifactCont>();
+ _statements.push_back(cont);
+ return cont;
+}
+
+shared_ptr<ArtifactForLoop> ArtifactBlock::forLoop(shared_ptr<ArtifactVariable> init,
+ shared_ptr<ArtifactExpr> cond,
+ shared_ptr<ArtifactExpr> iter)
+{
+ auto loop = make_shared<ArtifactForLoop>(init, cond, iter);
+ _statements.push_back(loop);
+ return loop;
+}
+
+shared_ptr<ArtifactIf> ArtifactBlock::ifCond(shared_ptr<ArtifactExpr> cond)
+{
+ auto ifb = make_shared<ArtifactIf>(cond);
+ _statements.push_back(ifb);
+ return ifb;
+}
+
+shared_ptr<ArtifactUnaryExpr> ArtifactBlock::un(ArtifactUnOp op, shared_ptr<ArtifactExpr> expr)
+{
+ auto un = make_shared<ArtifactUnaryExpr>(op, expr);
+ _statements.push_back(un);
+ return un;
+}
+
+shared_ptr<ArtifactBinaryExpr> ArtifactBlock::bin(ArtifactBinOp op, shared_ptr<ArtifactExpr> left,
+ shared_ptr<ArtifactExpr> right)
+{
+ auto bin = make_shared<ArtifactBinaryExpr>(op, left, right);
+ _statements.push_back(bin);
+ return bin;
+}
+
+shared_ptr<ArtifactUnaryExpr> ArtifactBlock::heapNew(shared_ptr<ArtifactExpr> expr)
+{
+ auto heap_new = make_shared<ArtifactUnaryExpr>(ArtifactUnOp::heapNew, expr);
+ _statements.push_back(heap_new);
+ return heap_new;
+}
+
+shared_ptr<ArtifactUnaryExpr> ArtifactBlock::heapFree(shared_ptr<ArtifactExpr> expr)
+{
+ auto heap_del = make_shared<ArtifactUnaryExpr>(ArtifactUnOp::heapFree, expr);
+ _statements.push_back(heap_del);
+ return heap_del;
+}
+
+} // namespace nnc
diff --git a/compiler/nnc/backends/acl_soft_backend/ArtifactModel.h b/compiler/nnc/backends/acl_soft_backend/ArtifactModel.h
new file mode 100644
index 000000000..106c9bec3
--- /dev/null
+++ b/compiler/nnc/backends/acl_soft_backend/ArtifactModel.h
@@ -0,0 +1,859 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_ARTIFACT_MODEL_H_
+#define _NNC_ARTIFACT_MODEL_H_
+
+#include <string>
+#include <list>
+#include <ostream>
+#include <utility>
+#include <memory>
+
+#include "IArtifactGenerator.h"
+
+namespace nnc
+{
+
+/**
+ * @todo FIXME: remove the identical accept() function implementations!
+ *
+ * @todo Get rid of the 'Artifact' prefix in the class names
+ * in this hierarchy, after anticipated namespace refactoring
+ * in the nnc project.
+ */
+
+/**
+ * @brief Indicates how object on which call is made is treated.
+ * In C++ syntax syntax: '.', '->', '::'
+ */
+enum class ArtifactCallType
+{
+ obj, // '.'
+ ref, // '->'
+ scope // '::'
+};
+
+/**
+ * @brief The base class of the whole artifact entities hierarchy.
+ */
+class ArtifactEntity
+{
+public:
+ virtual ~ArtifactEntity() = default;
+
+ /**
+ * @brief If this entity represents something containing a block of instructions
+ */
+ virtual bool isBlock() const { return false; }
+ /**
+ * @brief This is the core function of each artifact entity and
+ * is implemented by all concrete classes in the hierarchy.
+ */
+ virtual void accept(IArtifactGenerator *g) const = 0;
+
+protected:
+ ArtifactEntity() = default;
+};
+
+/**
+ * @brief Represents any named entity in the code.
+ */
+class ArtifactNamed : public ArtifactEntity
+{
+public:
+ explicit ArtifactNamed(std::string name) : _name(std::move(name)) {}
+
+ /**
+ * Returns the identifier name.
+ * @return the identifier name.
+ */
+ const std::string &name() const { return _name; }
+
+private:
+ std::string _name;
+};
+
+/**
+ * @brief Basic class for all expressions: identifiers, function calls, references etc.
+ */
+class ArtifactExpr : public ArtifactEntity
+{
+};
+
+/**
+ * @brief Represents literals which should go to the artifact source code as is.
+ */
+class ArtifactLiteral : public ArtifactExpr
+{
+public:
+ explicit ArtifactLiteral(std::string value) : _value(std::move(value)) {}
+
+ void accept(IArtifactGenerator *g) const override { g->visit(this); }
+
+ /**
+ * Returns the literal value.
+ * @return the literal value.
+ */
+ const std::string &getValue() const { return _value; }
+
+private:
+ std::string _value;
+};
+
+/**
+ * @brief Type of objects which can be used to reference named entities by their names.
+ */
+class ArtifactId : public ArtifactExpr
+{
+public:
+ explicit ArtifactId(std::string id) : _id(std::move(id)) {}
+ explicit ArtifactId(const ArtifactNamed *named) : _id(named->name()) {}
+
+ void accept(IArtifactGenerator *g) const override { g->visit(this); }
+
+ const std::string &name() const { return _id; }
+
+private:
+ std::string _id;
+};
+
+/**
+ * @brief Represents an entity with semantics like C/C++ address of (&) operator.
+ */
+class ArtifactRef : public ArtifactExpr
+{
+public:
+ explicit ArtifactRef(std::shared_ptr<ArtifactExpr> ref) : _ref(std::move(ref)) {}
+
+ void accept(IArtifactGenerator *g) const override { g->visit(this); }
+
+ std::shared_ptr<ArtifactExpr> obj() const { return _ref; }
+
+private:
+ std::shared_ptr<ArtifactExpr> _ref;
+};
+
+/**
+ * @brief Represents an entity with semantics of C/C++ dereference (*) operator.
+ */
+class ArtifactDeref : public ArtifactExpr
+{
+public:
+ explicit ArtifactDeref(std::shared_ptr<ArtifactExpr> ref) : _ref(std::move(ref)) {}
+
+ void accept(IArtifactGenerator *g) const override { g->visit(this); }
+
+ std::shared_ptr<ArtifactExpr> obj() const { return _ref; }
+
+private:
+ std::shared_ptr<ArtifactExpr> _ref;
+};
+
+/**
+ * @brief Represents a function call.
+ */
+class ArtifactFunctionCall : public ArtifactExpr
+{
+public:
+ ArtifactFunctionCall(std::string func_name, std::list<std::shared_ptr<ArtifactExpr>> param_list,
+ std::shared_ptr<ArtifactExpr> on = nullptr,
+ ArtifactCallType call_type = ArtifactCallType::obj);
+
+ void accept(IArtifactGenerator *g) const override { g->visit(this); }
+
+ const std::string &funcName() const { return _funcName; }
+ const std::list<std::shared_ptr<ArtifactExpr>> &paramList() const { return _paramList; }
+ std::shared_ptr<ArtifactExpr> on() const { return _on; }
+ ArtifactCallType callType() const { return _callType; }
+
+private:
+ std::string _funcName;
+ ArtifactCallType _callType;
+ std::shared_ptr<ArtifactExpr> _on;
+ std::list<std::shared_ptr<ArtifactExpr>> _paramList;
+};
+
+/**
+ * @brief Used for the ArtifactUnaryExpr.
+ */
+enum class ArtifactUnOp
+{
+ preIncr,
+ preDecr,
+ heapNew,
+ heapFree,
+ postIncr,
+ postDecr
+};
+
+class ArtifactUnaryExpr : public ArtifactExpr
+{
+public:
+ ArtifactUnaryExpr(ArtifactUnOp op, std::shared_ptr<ArtifactExpr> expr)
+ : _op(op), _expr(std::move(expr))
+ {
+ }
+
+ void accept(IArtifactGenerator *g) const override { g->visit(this); }
+
+ ArtifactUnOp getOp() const { return _op; }
+ std::shared_ptr<ArtifactExpr> getExpr() const { return _expr; }
+
+private:
+ ArtifactUnOp _op;
+ std::shared_ptr<ArtifactExpr> _expr;
+};
+
+/**
+ * @brief Used for the ArtifactBinaryExpr.
+ */
+enum class ArtifactBinOp
+{
+ eq,
+ notEq,
+ less,
+ lessOrEq,
+ great,
+ greatOrEq,
+ assign,
+ plus,
+ minus,
+ mult,
+ div,
+ plusAssign,
+ minusAssign,
+ multAssign,
+ divAssign
+};
+
+/**
+ * @brief Represents different types of binary expressions.
+ */
+class ArtifactBinaryExpr : public ArtifactExpr
+{
+public:
+ ArtifactBinaryExpr(ArtifactBinOp op, std::shared_ptr<ArtifactExpr> left,
+ std::shared_ptr<ArtifactExpr> right)
+ : _op(op), _left(std::move(left)), _right(std::move(right))
+ {
+ }
+
+ void accept(IArtifactGenerator *g) const override { g->visit(this); }
+
+ ArtifactBinOp getOp() const { return _op; }
+ std::shared_ptr<ArtifactExpr> getLeft() const { return _left; }
+ std::shared_ptr<ArtifactExpr> getRight() const { return _right; }
+
+private:
+ ArtifactBinOp _op;
+ std::shared_ptr<ArtifactExpr> _left;
+ std::shared_ptr<ArtifactExpr> _right;
+};
+
+/**
+ * @brief Array index access
+ */
+class ArtifactIndex : public ArtifactExpr
+{
+public:
+ ArtifactIndex(std::shared_ptr<ArtifactExpr> expr, std::shared_ptr<ArtifactExpr> ind)
+ : _expr(std::move(expr)), _ind(std::move(ind))
+ {
+ }
+
+ void accept(IArtifactGenerator *g) const override { g->visit(this); }
+
+ std::shared_ptr<ArtifactExpr> getExpr() const { return _expr; }
+ std::shared_ptr<ArtifactExpr> getInd() const { return _ind; }
+
+private:
+ std::shared_ptr<ArtifactExpr> _expr;
+ std::shared_ptr<ArtifactExpr> _ind;
+};
+
+/**
+ * @brief Just represents return from function statement.
+ */
+class ArtifactRet : public ArtifactEntity
+{
+public:
+ explicit ArtifactRet(std::shared_ptr<ArtifactExpr> expr) : _expr(std::move(expr)) {}
+
+ void accept(IArtifactGenerator *g) const override { g->visit(this); }
+
+ std::shared_ptr<ArtifactExpr> expr() const { return _expr; }
+
+private:
+ std::shared_ptr<ArtifactExpr> _expr;
+};
+
+/**
+ * @brief Just represents the break statement.
+ */
+class ArtifactBreak : public ArtifactEntity
+{
+public:
+ void accept(IArtifactGenerator *g) const override { g->visit(this); }
+};
+
+/**
+ * @brief Just represents the continue statement.
+ */
+class ArtifactCont : public ArtifactEntity
+{
+public:
+ void accept(IArtifactGenerator *g) const override { g->visit(this); }
+};
+
+/**
+ * @brief Represents a variable.
+ */
+class ArtifactVariable : public ArtifactNamed
+{
+public:
+ ArtifactVariable(std::string type_name, std::string var_name,
+ std::list<std::shared_ptr<ArtifactExpr>> dimensions = {},
+ std::list<std::shared_ptr<ArtifactExpr>> initializers = {})
+ : _typeName(std::move(type_name)), _dimensions(std::move(dimensions)),
+ _initializers(std::move(initializers)), ArtifactNamed(std::move(var_name))
+ {
+ }
+
+ void accept(IArtifactGenerator *g) const override { g->visit(this); }
+
+ const std::string &typeName() const { return _typeName; }
+ const std::list<std::shared_ptr<ArtifactExpr>> &getDimensions() const { return _dimensions; };
+ const std::list<std::shared_ptr<ArtifactExpr>> &getInitializers() const { return _initializers; }
+ std::shared_ptr<ArtifactId> use() { return std::make_shared<ArtifactId>(this); }
+
+private:
+ std::string _typeName;
+ std::list<std::shared_ptr<ArtifactExpr>> _dimensions; // If not empty, this is an array
+ std::list<std::shared_ptr<ArtifactExpr>> _initializers;
+};
+
+/**
+ * @brief Represents a block of instructions.
+ */
+class ArtifactBlock : public ArtifactEntity
+{
+public:
+ bool isBlock() const override { return true; }
+
+ void accept(IArtifactGenerator *g) const override { g->visit(this); }
+
+ void addStatement(const std::shared_ptr<ArtifactEntity> &statement)
+ {
+ _statements.push_back(statement);
+ }
+
+ const std::list<std::shared_ptr<ArtifactEntity>> &getStatements() const { return _statements; }
+
+ /**
+ * @brief Creates a new variable and place it to the block.
+ * @param type_name - the variable type name.
+ * @param var_name - the varibale name.
+ * @param dimensions - optional dimensions, if the declared variable is an array.
+ * @param initializers - optional arguments of the object constructor.
+ * @return - the newly created variable.
+ */
+ std::shared_ptr<ArtifactVariable>
+ var(const std::string &type_name, const std::string &var_name,
+ const std::list<std::shared_ptr<ArtifactExpr>> &dimensions = {},
+ const std::list<std::shared_ptr<ArtifactExpr>> &initializers = {});
+ /**
+ * @brief Creates a function call.
+ * @param func_name - the function name.
+ * @param param_list - the parameters which are used for the call.
+ * @param call_on - optional object on which the function is called (if it is a member function).
+ * @param call_type - (for member functions only) call through: '.', '->', or '::'.
+ * @return
+ */
+ std::shared_ptr<ArtifactFunctionCall>
+ call(const std::string &func_name, const std::list<std::shared_ptr<ArtifactExpr>> &param_list,
+ std::shared_ptr<ArtifactExpr> call_on = nullptr,
+ ArtifactCallType call_type = ArtifactCallType::obj);
+ /**
+ * @brief Creates a return from function statement.
+ * @param expr - value to return in generated code.
+ * @return
+ */
+ std::shared_ptr<ArtifactRet> ret(std::shared_ptr<ArtifactExpr> expr);
+
+ /**
+ * @brief Creates a break from a loop instruction.
+ * @return
+ */
+ std::shared_ptr<ArtifactBreak> brk();
+
+ /**
+ * @brief Creates a continue in a loop instruction.
+ * @return
+ */
+ std::shared_ptr<ArtifactCont> cont();
+
+ /**
+ * @brief Creates a for loop instruction.
+ * @param init - initialize for loop.
+ * @param cond - condition to check for stay looping.
+ * @param iter - change when transiting to the next iteration.
+ * @return
+ */
+ std::shared_ptr<ArtifactForLoop> forLoop(std::shared_ptr<ArtifactVariable> init = nullptr,
+ std::shared_ptr<ArtifactExpr> cond = nullptr,
+ std::shared_ptr<ArtifactExpr> iter = nullptr);
+
+ /**
+ * @brief Creates an 'if' blocks.
+ * @param cond - condition expression
+ * @return
+ */
+ std::shared_ptr<ArtifactIf> ifCond(std::shared_ptr<ArtifactExpr> cond);
+
+ /**
+ * @brief Creates an unary operation expression.
+ * @param op
+ * @param expr
+ * @return
+ */
+ std::shared_ptr<ArtifactUnaryExpr> un(ArtifactUnOp op, std::shared_ptr<ArtifactExpr> expr);
+
+ /**
+ * @brief Creates a binary operation expression.
+ * @param op
+ * @param left
+ * @param right
+ * @return
+ */
+ std::shared_ptr<ArtifactBinaryExpr> bin(ArtifactBinOp op, std::shared_ptr<ArtifactExpr> left,
+ std::shared_ptr<ArtifactExpr> right);
+
+ /**
+ * @brief Creates a heap new operation expression.
+ * @param expr
+ * @return
+ */
+ std::shared_ptr<ArtifactUnaryExpr> heapNew(std::shared_ptr<ArtifactExpr> expr);
+
+ /**
+ * @brief Creates a heap free operation expression.
+ * @param expr
+ * @return
+ */
+ std::shared_ptr<ArtifactUnaryExpr> heapFree(std::shared_ptr<ArtifactExpr> expr);
+
+private:
+ std::list<std::shared_ptr<ArtifactEntity>> _statements;
+};
+
+/**
+ * @brief Represents for loops.
+ */
+class ArtifactForLoop : public ArtifactEntity
+{
+public:
+ explicit ArtifactForLoop(std::shared_ptr<ArtifactVariable> init = nullptr,
+ std::shared_ptr<ArtifactExpr> cond = nullptr,
+ std::shared_ptr<ArtifactExpr> iter = nullptr)
+ : _init(std::move(init)), _cond(std::move(cond)), _iter(std::move(iter))
+ {
+ }
+
+ bool isBlock() const override { return true; }
+
+ void accept(IArtifactGenerator *g) const override { g->visit(this); }
+
+ std::shared_ptr<ArtifactVariable> getInit() const { return _init; }
+ std::shared_ptr<ArtifactExpr> getCond() const { return _cond; }
+ std::shared_ptr<ArtifactExpr> getIter() const { return _iter; }
+ const ArtifactBlock *getBlock() const { return &_body; }
+ ArtifactBlock *getBlock() { return &_body; }
+
+private:
+ std::shared_ptr<ArtifactVariable> _init;
+ std::shared_ptr<ArtifactExpr> _cond;
+ std::shared_ptr<ArtifactExpr> _iter;
+ ArtifactBlock _body;
+};
+
+/**
+ * @brief Represents if block.
+ */
+class ArtifactIf : public ArtifactEntity
+{
+public:
+ explicit ArtifactIf(std::shared_ptr<ArtifactExpr> cond) : _cond(std::move(cond)) {}
+ bool isBlock() const override { return true; }
+
+ void accept(IArtifactGenerator *g) const override { g->visit(this); }
+
+ std::shared_ptr<ArtifactExpr> getCond() const { return _cond; }
+ const ArtifactBlock *getBlock() const { return &_body; }
+ ArtifactBlock *getBlock() { return &_body; }
+ const ArtifactBlock *getElseBlock() const { return &_elseBody; }
+ ArtifactBlock *getElseBlock() { return &_elseBody; }
+
+private:
+ std::shared_ptr<ArtifactExpr> _cond;
+ ArtifactBlock _body;
+ ArtifactBlock _elseBody;
+};
+
+/**
+ * @brief Represents a function.
+ */
+class ArtifactFunction : public ArtifactNamed
+{
+public:
+ /**
+ * @brief Constructs a function object.
+ * @param ret_type_name - the name of the returned type
+ * @param func_name - the function name.
+ * @param params - the parameter list.
+ */
+ ArtifactFunction(std::string ret_type_name, const std::string &func_name,
+ std::list<std::shared_ptr<ArtifactVariable>> params = {})
+ : ArtifactNamed(func_name), _params(std::move(params)), _retTypeName(std::move(ret_type_name))
+ {
+ }
+
+ void accept(IArtifactGenerator *g) const override { g->visit(this); }
+
+ const std::list<std::shared_ptr<ArtifactVariable>> &getParameters() const { return _params; }
+ const std::string &getRetTypeName() const { return _retTypeName; }
+ const ArtifactBlock *getBlock() const { return &_body; }
+ ArtifactBlock *getBlock() { return &_body; }
+
+private:
+ std::list<std::shared_ptr<ArtifactVariable>> _params;
+ std::string _retTypeName;
+ ArtifactBlock _body;
+};
+
+/**
+ * @brief Basic class for both class member variables and memmber functions.
+ */
+class ArtifactClassMember
+{
+public:
+ explicit ArtifactClassMember(const ArtifactClass *owner) : _owner(owner) {}
+
+ const ArtifactClass *owner() const { return _owner; }
+
+protected:
+ const ArtifactClass *_owner;
+};
+
+/**
+ * @brief A class member variables.
+ */
+class ArtifactClassVariable : public ArtifactClassMember, public ArtifactVariable
+{
+public:
+ ArtifactClassVariable(const ArtifactClass *owner, const std::string &type_name,
+ const std::string &var_name,
+ const std::list<std::shared_ptr<ArtifactExpr>> &dimensions = {},
+ const std::list<std::shared_ptr<ArtifactExpr>> &initializers = {})
+ : ArtifactClassMember(owner), ArtifactVariable(type_name, var_name, dimensions, initializers)
+ {
+ }
+
+ void accept(IArtifactGenerator *g) const override { g->visit(this); }
+};
+
+/**
+ * @brief A class for member functions.
+ */
+class ArtifactClassFunction : public ArtifactClassMember, public ArtifactFunction
+{
+public:
+ ArtifactClassFunction(const ArtifactClass *owner, const std::string &ret_type_name,
+ const std::string &func_name,
+ const std::list<std::shared_ptr<ArtifactVariable>> &params = {})
+ : ArtifactClassMember(owner), ArtifactFunction(ret_type_name, func_name, params)
+ {
+ }
+
+ void accept(IArtifactGenerator *g) const override { g->visit(this); }
+};
+
+/**
+ * @brief Represents a class.
+ */
+class ArtifactClass : public ArtifactNamed
+{
+public:
+ explicit ArtifactClass(const std::string &class_name) : ArtifactNamed(class_name) {}
+
+ void accept(IArtifactGenerator *g) const override { g->visit(this); }
+
+ /**
+ * @brief Creates a class member variable.
+ * @param is_public - if the created variable be public.
+ * @param type_name
+ * @param var_name
+ * @param dimensions
+ * @param initializer
+ * @param constructors
+ * @return
+ */
+ std::shared_ptr<ArtifactClassVariable>
+ var(bool is_public, const std::string &type_name, const std::string &var_name,
+ const std::list<std::shared_ptr<ArtifactExpr>> &dimensions = {},
+ const std::list<std::shared_ptr<ArtifactExpr>> &initializers = {})
+ {
+ if (is_public)
+ {
+ auto var = std::make_shared<ArtifactClassVariable>(this, type_name, var_name, dimensions,
+ initializers);
+ _publicVariables.push_back(var);
+ return var;
+ }
+ else
+ {
+ auto var = std::make_shared<ArtifactClassVariable>(this, type_name, var_name, dimensions,
+ initializers);
+ _privateVariables.push_back(var);
+ return var;
+ }
+ }
+
+ /**
+ * @brief Creates a class member function.
+ * @param is_public - if the created function be public.
+ * @param ret_type_name
+ * @param func_name
+ * @param params
+ * @return
+ */
+ std::shared_ptr<ArtifactClassFunction>
+ func(bool is_public, const std::string &ret_type_name, const std::string &func_name,
+ const std::list<std::shared_ptr<ArtifactVariable>> &params = {})
+ {
+ if (is_public)
+ {
+ auto func = std::make_shared<ArtifactClassFunction>(this, ret_type_name, func_name, params);
+ _publicFunctions.push_back(func);
+ return func;
+ }
+ else
+ {
+ auto func = std::make_shared<ArtifactClassFunction>(this, ret_type_name, func_name, params);
+ _privateFunctions.push_back(func);
+ return func;
+ }
+ }
+
+ const std::list<std::shared_ptr<ArtifactClassVariable>> &publicVariables() const
+ {
+ return _publicVariables;
+ }
+
+ const std::list<std::shared_ptr<ArtifactClassVariable>> &privateVariables() const
+ {
+ return _privateVariables;
+ }
+
+ const std::list<std::shared_ptr<ArtifactClassFunction>> &publicFunctions() const
+ {
+ return _publicFunctions;
+ }
+
+ const std::list<std::shared_ptr<ArtifactClassFunction>> &privateFunctions() const
+ {
+ return _privateFunctions;
+ }
+
+ ArtifactBlock *getConstrBlock() { return &_constrBlock; }
+
+ const ArtifactBlock *getConstrBlock() const { return &_constrBlock; }
+
+private:
+ std::list<std::shared_ptr<ArtifactClassVariable>> _publicVariables;
+ std::list<std::shared_ptr<ArtifactClassVariable>> _privateVariables;
+ std::list<std::shared_ptr<ArtifactClassFunction>> _publicFunctions;
+ std::list<std::shared_ptr<ArtifactClassFunction>> _privateFunctions;
+ ArtifactBlock _constrBlock;
+};
+
+/**
+ * @brief Class representing a module in the ACL C++ soft backend.
+ */
+class ArtifactModule
+{
+public:
+ explicit ArtifactModule(std::string name) : _name(std::move(name)) {}
+
+ void accept(IArtifactGenerator *g) const { g->visit(this); }
+
+ std::shared_ptr<ArtifactClass> createClass(const std::string &name)
+ {
+ auto a_class = std::make_shared<ArtifactClass>(name);
+ _entities.emplace_back(a_class);
+ return a_class;
+ }
+
+ void addHeaderInclude(const std::string &name) { _headerIncludes.push_back(name); }
+ void addSourceInclude(const std::string &name) { _sourceIncludes.push_back(name); }
+ void addHeaderSysInclude(const std::string &name) { _headerSysIncludes.push_back(name); }
+ void addSourceSysInclude(const std::string &name) { _sourceSysIncludes.push_back(name); }
+
+ const std::string &name() const { return _name; }
+ const std::list<std::shared_ptr<ArtifactEntity>> entities() const { return _entities; }
+ const std::list<std::string> &headerIncludes() const { return _headerIncludes; }
+ const std::list<std::string> &sourceIncludes() const { return _sourceIncludes; }
+ const std::list<std::string> &headerSysIncludes() const { return _headerSysIncludes; }
+ const std::list<std::string> &sourceSysIncludes() const { return _sourceSysIncludes; }
+
+private:
+ std::string _name;
+ std::list<std::shared_ptr<ArtifactEntity>> _entities;
+ std::list<std::string> _headerIncludes;
+ std::list<std::string> _sourceIncludes;
+ std::list<std::string> _headerSysIncludes;
+ std::list<std::string> _sourceSysIncludes;
+};
+
+/**
+ * @brief Factory for some kinds of frequently used artifact DOM objects.
+ */
+class ArtifactFactory
+{
+public:
+ static std::shared_ptr<ArtifactId> id(const std::string &name)
+ {
+ return std::make_shared<ArtifactId>(name);
+ }
+
+ static std::shared_ptr<ArtifactLiteral> lit(const std::string &name)
+ {
+ return std::make_shared<ArtifactLiteral>(name);
+ }
+
+ /**
+ * @brief Creates a new variable and place it to the block.
+ * @param type_name - the variable type name.
+ * @param var_name - the varibale name.
+ * @param dimensions - optional dimensions, if the declared variable is an array.
+ * @param initializer - optional variable initializer.
+ * @param constructors - optional arguments of the object constructor, if instantiating a class.
+ * @return - the newly created variable.
+ */
+ static std::shared_ptr<ArtifactVariable>
+ var(const std::string &type_name, const std::string &var_name,
+ const std::list<std::shared_ptr<ArtifactExpr>> &dimensions = {},
+ const std::list<std::shared_ptr<ArtifactExpr>> &initializers = {})
+ {
+ return std::make_shared<ArtifactVariable>(type_name, var_name, dimensions, initializers);
+ }
+
+ /**
+ * @brief Creates a 'reference' (like C/C++ '&' address operator do) to the expression.
+ * @param ref
+ * @return
+ */
+ static std::shared_ptr<ArtifactRef> ref(std::shared_ptr<ArtifactExpr> ref)
+ {
+ return std::make_shared<ArtifactRef>(ref);
+ }
+
+ /**
+ * @brief Creates a 'dereference' (like C/C++ '*' dereference operator do) of the expression.
+ * @param ref
+ * @return
+ */
+ static std::shared_ptr<ArtifactDeref> deref(std::shared_ptr<ArtifactExpr> ref)
+ {
+ return std::make_shared<ArtifactDeref>(ref);
+ }
+
+ /**
+ * @brief Creates a function call.
+ * @param func_name - the function name.
+ * @param param_list - the parameters which are used for the call.
+ * @param call_on - optional object on which the function is called (if it is a member function).
+ * @param call_type - (for member functions only) call through: '.', '->', or '::'.
+ * @return
+ */
+ static std::shared_ptr<ArtifactFunctionCall>
+ call(const std::string &func_name, const std::list<std::shared_ptr<ArtifactExpr>> &param_list,
+ std::shared_ptr<ArtifactExpr> on = nullptr,
+ ArtifactCallType call_type = ArtifactCallType::obj)
+ {
+ return std::make_shared<ArtifactFunctionCall>(func_name, param_list, on, call_type);
+ }
+
+ /**
+ * @brief Creates an unary operation expression.
+ * @param op
+ * @param expr
+ * @return
+ */
+ static std::shared_ptr<ArtifactUnaryExpr> un(ArtifactUnOp op, std::shared_ptr<ArtifactExpr> expr)
+ {
+ return std::make_shared<ArtifactUnaryExpr>(op, expr);
+ }
+
+ /**
+ * @brief Creates a binary operation expression.
+ * @param op
+ * @param left
+ * @param right
+ * @return
+ */
+ static std::shared_ptr<ArtifactBinaryExpr>
+ bin(ArtifactBinOp op, std::shared_ptr<ArtifactExpr> left, std::shared_ptr<ArtifactExpr> right)
+ {
+ return std::make_shared<ArtifactBinaryExpr>(op, left, right);
+ }
+
+ /**
+ * @brief Creates an array element accessor expression (like C/C++ array[i]).
+ * @param expr
+ * @param ind
+ * @return
+ */
+ static std::shared_ptr<ArtifactIndex> ind(std::shared_ptr<ArtifactExpr> expr,
+ std::shared_ptr<ArtifactExpr> ind)
+ {
+ return std::make_shared<ArtifactIndex>(expr, ind);
+ }
+
+ /**
+ * @brief Creates a heap new operation expression.
+ * @param expr
+ * @return
+ */
+ static std::shared_ptr<ArtifactUnaryExpr> heapNew(std::shared_ptr<ArtifactExpr> expr)
+ {
+ return std::make_shared<ArtifactUnaryExpr>(ArtifactUnOp::heapNew, expr);
+ }
+
+ /**
+ * @brief Creates a heap free operation expression.
+ * @param expr
+ * @return
+ */
+ static std::shared_ptr<ArtifactUnaryExpr> heapFree(std::shared_ptr<ArtifactExpr> expr)
+ {
+ return std::make_shared<ArtifactUnaryExpr>(ArtifactUnOp::heapFree, expr);
+ }
+};
+
+} // namespace nnc
+
+#endif //_NNC_ARTIFACT_MODEL_H_
diff --git a/compiler/nnc/backends/acl_soft_backend/CMakeLists.txt b/compiler/nnc/backends/acl_soft_backend/CMakeLists.txt
new file mode 100644
index 000000000..8f55303b1
--- /dev/null
+++ b/compiler/nnc/backends/acl_soft_backend/CMakeLists.txt
@@ -0,0 +1,14 @@
+nnas_find_package(Boost REQUIRED COMPONENTS filesystem)
+
+set(ACL_SOFT_BACKEND_CPP_SOURCES AclCppGenerator.cpp AclCppOpGenerator.cpp
+ ArtifactGeneratorCppCode.cpp ArtifactGeneratorCppDecl.cpp ArtifactModel.cpp)
+
+file(GLOB_RECURSE ACL_IN_SOURCES "*.in")
+nnc_make_generated_sources("${ACL_IN_SOURCES}" ${CMAKE_CURRENT_BINARY_DIR} ACL_GENERATED_SOURCES)
+
+nnc_add_library(acl_soft_backend_cpp SHARED ${ACL_SOFT_BACKEND_CPP_SOURCES} ${ACL_GENERATED_SOURCES})
+target_include_directories(acl_soft_backend_cpp PRIVATE ${Boost_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR})
+target_link_libraries(acl_soft_backend_cpp PRIVATE mir ${Boost_LIBRARIES})
+
+# install soft backend c++ library
+nnc_install_library(acl_soft_backend_cpp)
diff --git a/compiler/nnc/backends/acl_soft_backend/IArtifactGenerator.h b/compiler/nnc/backends/acl_soft_backend/IArtifactGenerator.h
new file mode 100644
index 000000000..02ccbb615
--- /dev/null
+++ b/compiler/nnc/backends/acl_soft_backend/IArtifactGenerator.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_ARTIFACT_GENERATOR_INTERFACE_H_
+#define _NNC_ARTIFACT_GENERATOR_INTERFACE_H_
+
+namespace nnc
+{
+
+class ArtifactEntity;
+class ArtifactLiteral;
+class ArtifactNamed;
+class ArtifactVariable;
+class ArtifactExpr;
+class ArtifactId;
+class ArtifactRef;
+class ArtifactDeref;
+class ArtifactFunctionCall;
+class ArtifactUnaryExpr;
+class ArtifactBinaryExpr;
+class ArtifactIndex;
+class ArtifactRet;
+class ArtifactBreak;
+class ArtifactCont;
+class ArtifactBlock;
+class ArtifactForLoop;
+class ArtifactIf;
+class ArtifactFunction;
+class ArtifactModule;
+class ArtifactClass;
+class ArtifactClassMember;
+class ArtifactClassVariable;
+class ArtifactClassFunction;
+
+/**
+ * @brief The interface of the artifact source code producer.
+ */
+class IArtifactGenerator
+{
+public:
+ virtual ~IArtifactGenerator() = default;
+
+ virtual void visit(const ArtifactLiteral *node) = 0;
+ virtual void visit(const ArtifactId *node) = 0;
+ virtual void visit(const ArtifactRef *node) = 0;
+ virtual void visit(const ArtifactDeref *node) = 0;
+ virtual void visit(const ArtifactVariable *node) = 0;
+ virtual void visit(const ArtifactFunctionCall *node) = 0;
+ virtual void visit(const ArtifactUnaryExpr *node) = 0;
+ virtual void visit(const ArtifactBinaryExpr *node) = 0;
+ virtual void visit(const ArtifactIndex *node) = 0;
+ virtual void visit(const ArtifactRet *node) = 0;
+ virtual void visit(const ArtifactBreak *node) = 0;
+ virtual void visit(const ArtifactCont *node) = 0;
+ virtual void visit(const ArtifactBlock *node) = 0;
+ virtual void visit(const ArtifactForLoop *node) = 0;
+ virtual void visit(const ArtifactIf *node) = 0;
+ virtual void visit(const ArtifactFunction *node) = 0;
+ virtual void visit(const ArtifactClass *node) = 0;
+ virtual void visit(const ArtifactClassVariable *node) = 0;
+ virtual void visit(const ArtifactClassFunction *node) = 0;
+ virtual void visit(const ArtifactModule *node) = 0;
+};
+
+} // namespace nnc
+
+#endif //_NNC_ARTIFACT_GENERATOR_INTERFACE_H_
diff --git a/compiler/nnc/backends/interpreter/CMakeLists.txt b/compiler/nnc/backends/interpreter/CMakeLists.txt
new file mode 100644
index 000000000..9c016b494
--- /dev/null
+++ b/compiler/nnc/backends/interpreter/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(interp_src InterpreterBackend.cpp)
+nnc_add_library(nnc_interpreter SHARED ${interp_src})
+target_link_libraries(nnc_interpreter PRIVATE mir_interpreter)
+
+if(NNC_HDF5_SUPPORTED)
+ target_include_directories(nnc_interpreter PRIVATE ${HDF5_INCLUDE_DIRS})
+ target_link_libraries(nnc_interpreter PRIVATE ${HDF5_CXX_LIBRARIES})
+endif(NNC_HDF5_SUPPORTED)
+
+# install interpreter library
+nnc_install_library(nnc_interpreter)
diff --git a/compiler/nnc/backends/interpreter/InterpreterBackend.cpp b/compiler/nnc/backends/interpreter/InterpreterBackend.cpp
new file mode 100644
index 000000000..923a7cfc7
--- /dev/null
+++ b/compiler/nnc/backends/interpreter/InterpreterBackend.cpp
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstring>
+#include <utility>
+#include <vector>
+#include <fstream>
+#include <sstream>
+#include <algorithm>
+
+#include "Definitions.h"
+
+#ifdef NNC_HDF5_SUPPORTED
+#include <H5Cpp.h>
+#else
+#include <iostream>
+#endif // NNC_HDF5_SUPPORTED
+
+#include "mir/Shape.h"
+
+#include "MirInterpreter.h"
+#include "backends/interpreter/InterpreterBackend.h"
+
+#include "mir/Graph.h"
+
+#include "mir/ShapeRange.h"
+#include "mir/Tensor.h"
+
+#include <stdexcept>
+
+namespace nnc
+{
+
+using namespace mir;
+
+#ifdef NNC_HDF5_SUPPORTED
+
+/**
+ * @brief save tensor in file in '.hdf5' format
+ * @param tensor - tensor to save
+ * @param tensor_name - name, by wich tensor will be saved
+ * @param destination - path to file, in which tensor will be saved
+ */
+static void writeTensorToHDF5File(const TensorVariant &tensor, std::string tensor_name,
+ const std::string &destination)
+{
+
+ // Prepare shape, rank, dims, numElems
+ auto &shape = tensor.getShape();
+ const int32_t rank = shape.rank();
+ hsize_t dims[rank];
+ for (int32_t axis = 0; axis < rank; ++axis)
+ {
+ dims[axis] = static_cast<hsize_t>(shape.dim(axis));
+ }
+
+ // Create array from tensor
+ std::vector<char> values;
+ const auto elem_size = tensor.getElementSize();
+ values.resize(elem_size * shape.numElements());
+ char *values_ptr = values.data();
+ ShapeRange out_range(shape);
+ for (auto &out_idx : out_range)
+ {
+ std::memcpy(values_ptr, tensor.at(out_idx), elem_size);
+ values_ptr += elem_size;
+ }
+
+ // Backslashes are not allowed in tensor names
+ std::replace(tensor_name.begin(), tensor_name.end(), '/', '_');
+ std::string filename = destination + "/" + tensor_name + ".hdf5";
+
+ // Write to .hdf5 file
+ H5::H5File h5File(filename, H5F_ACC_TRUNC);
+ H5::DataSpace dataspace(rank, dims);
+ H5::DataType h5_data_type;
+
+ if (tensor.getDataType() == DataType::FLOAT32)
+ h5_data_type = H5::PredType::NATIVE_FLOAT;
+ else if (tensor.getDataType() == DataType::UINT8)
+ h5_data_type = H5::PredType::NATIVE_UINT8;
+ else
+ throw std::runtime_error("NYI writing that DataType!");
+
+ auto dataset = h5File.createDataSet(tensor_name, h5_data_type, dataspace);
+ dataset.write(values.data(), h5_data_type);
+}
+
+#endif // NNC_HDF5_SUPPORTED
+
+static TensorVariant readTensorFromFile(const std::string &filename, const TensorType &type)
+{
+ const std::size_t input_data_size =
+ type.getShape().numElements() * getDataTypeSize(type.getElementType());
+
+ std::ifstream stream(filename, std::ios::in | std::ios::binary);
+ if (stream.fail())
+ throw std::runtime_error("Couldn't open file \"" + filename + "\".");
+
+ stream.seekg(0, std::ios::end);
+ std::streampos end = stream.tellg();
+ stream.seekg(0, std::ios::beg);
+ std::streampos begin = stream.tellg();
+ int64_t file_size = end - begin;
+
+ if (static_cast<std::size_t>(file_size) != input_data_size)
+ throw std::runtime_error("File \"" + filename + "\" has incorrect size: " +
+ std::to_string(file_size) + "(expected: " +
+ std::to_string(input_data_size) + ").");
+
+ std::unique_ptr<char[]> data(new char[input_data_size]);
+ stream.read(data.get(), input_data_size);
+ if (stream.fail())
+ throw std::runtime_error("Couldn't read file \"" + filename + "\".");
+
+ return TensorVariant(type, data.get());
+}
+
+InterpreterBackend::InterpreterBackend(std::string input_dir, std::string output_dir)
+ : _input_dir(std::move(input_dir)), _output_dir(std::move(output_dir))
+{
+}
+
+void InterpreterBackend::run(mir::Graph *graph)
+{
+ assert(graph);
+
+ mir_interpreter::MIRInterpreter interpreter;
+
+ for (const auto *input_op : graph->getInputs())
+ {
+ const Operation::Output *input = input_op->getOutput(0);
+
+ std::string tensor_name = input->getName();
+ assert(!tensor_name.empty());
+ std::replace(tensor_name.begin(), tensor_name.end(), '/', '_');
+ std::string filename = _input_dir + "/" + tensor_name + ".dat";
+
+ TensorVariant tensor = readTensorFromFile(filename, input->getType());
+ interpreter.setTensor(input, std::move(tensor));
+ }
+
+ graph->accept(&interpreter);
+
+ for (const auto *output_op : graph->getOutputs())
+ {
+ const auto &output_name = output_op->getInput(0)->getName();
+
+#ifdef NNC_HDF5_SUPPORTED
+ const auto &tensor = interpreter.getTensor(output_op->getInput(0));
+ writeTensorToHDF5File(tensor, output_name, _output_dir);
+#else
+ std::cout << "Result <" << output_name << "> wasn't saved, due to lack of HDF5" << std::endl;
+#endif // NNC_HDF5_SUPPORTED
+ }
+}
+
+} // namespace nnc
diff --git a/compiler/nnc/backends/soft_backend/CMakeLists.txt b/compiler/nnc/backends/soft_backend/CMakeLists.txt
new file mode 100644
index 000000000..ea0cd1ac6
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/CMakeLists.txt
@@ -0,0 +1,14 @@
+nnas_find_package(Boost REQUIRED COMPONENTS filesystem)
+
+set(SOFT_BACKEND_CPP_SOURCES CPPGenerator.cpp ModelAnalyzer.cpp SBSerializer.cpp SequencedIR.cpp)
+
+file(GLOB_RECURSE SOFT_DEF_SOURCES "*.def")
+
+nnc_make_generated_sources("${SOFT_DEF_SOURCES}" ${CMAKE_CURRENT_BINARY_DIR} SOFT_GENERATED_SOURCES)
+
+nnc_add_library(soft_backend_cpp SHARED ${SOFT_BACKEND_CPP_SOURCES} ${SOFT_GENERATED_SOURCES})
+target_include_directories(soft_backend_cpp PRIVATE ${Boost_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR})
+target_link_libraries(soft_backend_cpp PRIVATE mir ${Boost_LIBRARIES})
+
+# install soft backend c++ library
+nnc_install_library(soft_backend_cpp)
diff --git a/compiler/nnc/backends/soft_backend/CPPGenerator.cpp b/compiler/nnc/backends/soft_backend/CPPGenerator.cpp
new file mode 100644
index 000000000..236881b80
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/CPPGenerator.cpp
@@ -0,0 +1,489 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backends/soft_backend/CPPGenerator.h"
+
+#include "mir/Operation.h"
+#include "ModelAnalyzer.h"
+#include "SBSerializer.h"
+
+#include "CommonData.def"
+
+#include "cpp_header_types.generated.h"
+#include "cpp_operations.generated.h"
+#include "CommonData.generated.h"
+#include "eigen.generated.h"
+#include "cpp_common_funcs.generated.h"
+#include "cpp_capped_relu.generated.h"
+#include "cpp_concat.generated.h"
+#include "cpp_conv.generated.h"
+#include "cpp_conv_transpose.generated.h"
+#include "cpp_depthwise_conv.generated.h"
+#include "cpp_fully_connected.generated.h"
+#include "cpp_pool.generated.h"
+#include "cpp_sigmoid.generated.h"
+#include "cpp_sqrt.generated.h"
+#include "cpp_relu.generated.h"
+#include "cpp_leaky_relu.generated.h"
+#include "cpp_reduce.generated.h"
+#include "cpp_resize.generated.h"
+#include "cpp_softmax.generated.h"
+#include "cpp_slice.generated.h"
+#include "cpp_elu.generated.h"
+#include "cpp_tanh.generated.h"
+#include "cpp_elementwise.generated.h"
+#include "cpp_pad.generated.h"
+#include "cpp_transpose.generated.h"
+#include "cpp_gather.generated.h"
+#include "cpp_broadcast.generated.h"
+
+#include <boost/filesystem.hpp>
+
+#include <cstring>
+#include <fstream>
+#include <stdexcept>
+#include <utility>
+
+namespace nnc
+{
+
+using namespace sir;
+using namespace std;
+namespace fs = boost::filesystem;
+
+/**
+ * @brief Creates pointer to some output stream to encapsulate resource management into deleter
+ * for example may be used to return std::cout
+ * @param path Path to opened file
+ * @return Pointer output stream
+ * @throws runtime_error if did not succeed
+ */
+static unique_ptr<ofstream> getStream(const string &path)
+{
+ unique_ptr<ofstream> ofs(new ofstream(path));
+ if (ofs->fail())
+ throw runtime_error("Can not open code output file: " + path);
+ return ofs;
+}
+
+CPPCodeGenerator::CPPCodeGenerator(std::string output_dir, std::string artifact_name)
+ : _output_dir(std::move(output_dir)), _artifact_name(std::move(artifact_name))
+{
+}
+
+void CPPCodeGenerator::materializeModelParams(ostream &out, const Serializer &s)
+{
+ using namespace params;
+
+ // First form a dump header
+ char header[HEADER_LEN];
+ uint32_t version = s.getFormatVersion();
+ uint32_t hash = s.getModelHash();
+ static_assert(VERSION_LEN == sizeof(version), "version length mismatch");
+ static_assert(HASH_LEN == sizeof(hash), "hash length mismatch");
+ memcpy(header, MAGIC, MAGIC_LEN);
+ memcpy(header + MAGIC_LEN, &version, VERSION_LEN);
+ memcpy(header + MAGIC_LEN + VERSION_LEN, &hash, HASH_LEN);
+
+ out.write(header, HEADER_LEN);
+ if (out.fail())
+ throw runtime_error("Failed to write model parameters header");
+ auto &params = s.getBuffer();
+ out.write(params.data(), params.size());
+ if (out.fail())
+ throw runtime_error("Failed to write model Parameters");
+}
+
+void CPPCodeGenerator::run(mir::Graph *graph)
+{
+ assert(graph);
+
+ // visit and analyze graph
+ ModelAnalyzer ma;
+ ma.analyze(graph);
+ // serialize parameters
+ Serializer serializer;
+ serializer.serialize(ma.getInferenceSequence());
+ // rename tensors for specific backend language
+ formatTensorNames(ma);
+
+ fs::create_directory(_output_dir);
+
+ const string base_path = _output_dir + "/" + _artifact_name;
+ const string header_path = base_path + ".h";
+ const string code_path = base_path + ".cpp";
+ const string params_path = base_path + ".params";
+
+ // Print header
+ auto header_stream = getStream(header_path);
+ materializeHeader(*header_stream, ma);
+ header_stream.reset();
+
+ // Print code
+ auto code_stream = getStream(code_path);
+ materializeCode(*code_stream, ma, serializer);
+ code_stream.reset();
+
+ // Print model parameters
+ auto model_stream = getStream(params_path);
+ materializeModelParams(*model_stream, serializer);
+ model_stream.reset();
+}
+
+/**
+ * @brief Renames tensors with respect to C++ naming conventions
+ * @param ma Intermediate artifact information
+ */
+void CPPCodeGenerator::formatTensorNames(const ModelAnalyzer &ma)
+{
+ using TensorType = TensorDescriptor::Type;
+
+ int tmp_tensors = 0;
+ for (const TensorDescriptor &td : ma.getTensors())
+ {
+ string formatted_name;
+ if (td.name.empty())
+ {
+ assert(td.type == TensorType::temporary);
+ formatted_name = "Tensor_" + to_string(tmp_tensors++);
+ }
+ else
+ {
+ if (td.type != TensorType::temporary)
+ formatted_name.append("_");
+ formatted_name.append(td.name);
+ for (char &c : formatted_name)
+ {
+ if (!isalnum(c))
+ c = '_';
+ }
+ }
+ _formattedTensors.push_back(move(formatted_name));
+ }
+}
+
+/**
+ * + Writes to out support data types and methods: Shape, Tensor.
+ * This is part of user interface to feed data to artifact.
+ * + Writes actual model class that contains:
+ * network constructor, setters to feed data to network, getters to get results,
+ * and doInference method that performs actual inference.
+ */
+void CPPCodeGenerator::materializeHeader(ostream &out, const ModelAnalyzer &ma)
+{
+ string class_name = ma.getModelName() + "Model";
+
+ out.write(cpp_header_types, sizeof(cpp_header_types));
+ out << "class " << class_name << "\n"
+ "{\n"
+ "public:\n"
+ " "
+ << class_name << "(const std::string& parametersPath);\n"
+ " ~"
+ << class_name << "();\n";
+ // generate input setters
+ if (ma.getInputs().size() == 1)
+ out << " bool setInput(const Tensor& inputs);\n";
+ for (const size_t inId : ma.getInputs())
+ {
+ const string &tName = _formattedTensors[inId];
+ out << " bool set" << tName << "(const Tensor& t);\n";
+ }
+ // generate output getters
+ if (ma.getOutputs().size() == 1)
+ {
+ out << " std::shared_ptr<Tensor> getOutput();\n";
+ }
+ for (const size_t out_id : ma.getPersistentTensors())
+ {
+ const string &tensor_name = _formattedTensors[out_id];
+ out << " std::shared_ptr<Tensor> get" << tensor_name << "();\n";
+ }
+ out << " void doInference();\n\n"
+ "private:\n"
+ " "
+ << class_name << "() = delete;\n"
+ " "
+ << class_name << "(const " << class_name << "& orig) = delete;\n"
+ " "
+ << class_name << "& operator=(const " << class_name << "& orig) = delete;\n";
+ // generate input/output tensors
+ for (const size_t in_tensor_id : ma.getInputs())
+ {
+ const string &tName = _formattedTensors[in_tensor_id];
+ out << " Tensor " << tName << ";\n";
+ }
+ for (const size_t out_tensor_id : ma.getPersistentTensors())
+ {
+ const string &tName = _formattedTensors[out_tensor_id];
+ out << " std::shared_ptr<Tensor> " << tName << ";\n";
+ }
+ // pointer to NN parameters
+ out << " char* _parameters;\n";
+ out << " size_t _paramSize;\n";
+ out << "};\n";
+}
+
+/**
+ * @brief Prints list of function arguments, separated by commas
+ * @param out Stream to write program text
+ * @param args arguments to print
+ */
+static void printOperationArgs(ostream &out, const vector<string> &args)
+{
+ bool insert_comma = false;
+ for (const string &arg : args)
+ {
+ if (insert_comma)
+ out << ", ";
+ insert_comma = true;
+ out << arg;
+ }
+}
+
+void CPPCodeGenerator::gatherOperationArguments(const ModelAnalyzer &ma,
+ const vector<size_t> &arg_ids, vector<string> &args)
+{
+
+ for (size_t id : arg_ids)
+ {
+ const string &tensor_name = _formattedTensors[id];
+ if (ma.getTensors()[id].type == TensorDescriptor::Type::persistent)
+ args.push_back("*" + tensor_name);
+ else
+ args.push_back(tensor_name);
+ }
+}
+
+void CPPCodeGenerator::printSetter(ostream &out, const string &class_name,
+ const string &setter_name, const TensorDescriptor &td)
+{
+
+ const string &var_name = _formattedTensors[td.id];
+ out << "bool " << class_name << "::set" << setter_name << "(const Tensor& t)\n"
+ "{\n";
+ // need to insert input correctness check
+ const mir::Shape expected = td.shape;
+ int rank = expected.rank();
+ if (rank != 0)
+ {
+ out << " "
+ << "if (t.getShape().getDims() != " << td.shape.rank() << ") return false;\n";
+ for (int i = 0; i < rank; ++i)
+ out << " "
+ << "if (t.getShape()[" << i << "] != " << expected.dim(i) << ") return false;\n";
+ }
+ out << " " << var_name << " = t;\n"
+ " return true;\n"
+ "}\n\n";
+}
+
+void CPPCodeGenerator::printGetter(ostream &out, const string &class_name,
+ const string &getter_name, const TensorDescriptor &td)
+{
+
+ const string &var_name = _formattedTensors[td.id];
+ out << "shared_ptr<Tensor> " << class_name << "::get" << getter_name << "()\n"
+ "{\n"
+ " return "
+ << var_name << ";\n"
+ "}\n\n";
+}
+
+void CPPCodeGenerator::materializeCall(ostream &out, const ModelAnalyzer &ma,
+ const sir::CallFunction *call)
+{
+ assert(call != nullptr);
+ if (call->mirOp->getType() == mir::Operation::Type::input)
+ return;
+ // materialize call
+ out << " " << call->funcName << "(";
+ const auto &prev_nodes = call->mirOp->getInputs();
+ const auto &out_tensors = call->outputs;
+ vector<string> args;
+ args.reserve(prev_nodes.size() + out_tensors.size() + 1);
+ // gather output arguments
+ gatherOperationArguments(ma, call->outputs, args);
+ // parameters offset
+ args.push_back("_parameters + " + to_string(params::HEADER_LEN + call->paramStartOffset));
+ // gather input arguments
+ gatherOperationArguments(ma, call->inputs, args);
+ // put arguments into stream
+ printOperationArgs(out, args);
+ out << ");\n";
+}
+
+void CPPCodeGenerator::materializeTranspose(ostream &out, const ModelAnalyzer &ma,
+ const sir::TransposeTensor *transpose)
+{
+ assert(transpose != nullptr);
+ (void)out;
+ (void)ma;
+ (void)transpose;
+ assert(false && "not implemented");
+}
+
+void CPPCodeGenerator::materializeConstructor(ostream &out, const ModelAnalyzer &ma,
+ const sir::CreateTmp *constructor)
+{
+ assert(constructor != nullptr);
+ const TensorDescriptor &td = ma.getTensors()[constructor->tensorId];
+ assert(td.type == sir::TensorDescriptor::Type::temporary);
+ (void)td;
+ const string &t_name = _formattedTensors[constructor->tensorId];
+ out << " Tensor " << t_name << ";\n";
+}
+
+void CPPCodeGenerator::materializeDestructor(ostream &out, const ModelAnalyzer &ma,
+ const sir::DestroyTmp *destructor)
+{
+ assert(destructor != nullptr);
+ const TensorDescriptor &td = ma.getTensors()[destructor->tensorId];
+ assert(td.type == sir::TensorDescriptor::Type::temporary);
+ (void)td;
+ const string &t_name = _formattedTensors[destructor->tensorId];
+ out << " " << t_name << ".clean();\n";
+}
+
+void CPPCodeGenerator::materializeInferenceSequence(ostream &out, const ModelAnalyzer &ma)
+{
+
+ // Allocate temporary(im2col) tensor
+ out << " Tensor " << _formattedTensors[ma.getTempTID()] << "(Shape{" << ma.getMaxTemporarySize()
+ << "});\n";
+
+ for (const unique_ptr<Action> &action : ma.getInferenceSequence())
+ {
+ Action *ptr = action.get();
+ switch (action->type)
+ {
+ case Action::Type::callFunction:
+ materializeCall(out, ma, dynamic_cast<const sir::CallFunction *>(ptr));
+ break;
+ case Action::Type::transposeTensor:
+ materializeTranspose(out, ma, dynamic_cast<const sir::TransposeTensor *>(ptr));
+ break;
+ case Action::Type::createTmp:
+ materializeConstructor(out, ma, dynamic_cast<const sir::CreateTmp *>(ptr));
+ break;
+ case Action::Type::destroyTmp:
+ materializeDestructor(out, ma, dynamic_cast<const sir::DestroyTmp *>(ptr));
+ break;
+ default:
+ assert(false && "unexpected action type");
+ }
+ }
+}
+
+/**
+ * Function writes to output stream needed code snippets, and implementations of artifact class
+ * functions.
+ */
+void CPPCodeGenerator::materializeCode(ostream &out, const ModelAnalyzer &ma, const Serializer &s)
+{
+ string class_name = ma.getModelName() + "Model";
+
+ out << "#include \"" << _artifact_name << ".h\"\n";
+
+ // put operations from tflite
+ out.write(eigen, sizeof(eigen));
+
+ out.write(CommonData, sizeof(CommonData));
+
+ out.write(cpp_common_funcs, sizeof(cpp_common_funcs));
+ out.write(cpp_capped_relu, sizeof(cpp_capped_relu));
+ out.write(cpp_concat, sizeof(cpp_concat));
+ out.write(cpp_conv, sizeof(cpp_conv));
+ out.write(cpp_depthwise_conv, sizeof(cpp_depthwise_conv));
+ out.write(cpp_fully_connected, sizeof(cpp_fully_connected));
+ out.write(cpp_resize, sizeof(cpp_resize));
+ out.write(cpp_sigmoid, sizeof(cpp_sigmoid));
+ out.write(cpp_pool, sizeof(cpp_pool));
+ out.write(cpp_relu, sizeof(cpp_relu));
+ out.write(cpp_reduce, sizeof(cpp_reduce));
+ out.write(cpp_softmax, sizeof(cpp_softmax));
+ out.write(cpp_slice, sizeof(cpp_slice));
+ out.write(cpp_elementwise, sizeof(cpp_elementwise));
+ out.write(cpp_elu, sizeof(cpp_elu));
+ out.write(cpp_tanh, sizeof(cpp_tanh));
+ out.write(cpp_pad, sizeof(cpp_pad));
+ out.write(cpp_sqrt, sizeof(cpp_sqrt));
+ out.write(cpp_conv_transpose, sizeof(cpp_conv_transpose));
+ out.write(cpp_transpose, sizeof(cpp_transpose));
+ out.write(cpp_gather, sizeof(cpp_gather));
+ out.write(cpp_broadcast, sizeof(cpp_broadcast));
+ // Operations calls into all of the above
+ out.write(cpp_operations, sizeof(cpp_operations));
+ // Below call into operations
+ out.write(cpp_leaky_relu, sizeof(cpp_leaky_relu));
+
+ // gen NN constructor
+ out << class_name << "::" << class_name
+ << "(const string& parametersPath)\n"
+ "{\n"
+ " readParameters(_parameters, _paramSize, parametersPath, "
+ << s.getFormatVersion() << ", " << s.getModelHash() << ");\n"
+ "}\n\n";
+ // gen NN destructor
+ out << class_name << "::~" << class_name << "()\n"
+ "{\n"
+ " releaseParameters(_parameters, _paramSize);\n"
+ "}\n\n";
+ // generate input setters
+ // generate main setter if network has only one
+ const auto &inputs = ma.getInputs();
+ const auto &tensors = ma.getTensors();
+ if (inputs.size() == 1)
+ {
+ const TensorDescriptor &td = tensors[inputs[0]];
+ printSetter(out, class_name, "Input", td);
+ }
+ // generate setters by names
+ for (size_t input_tensor_id : inputs)
+ {
+ const string &input_tensor_name = _formattedTensors[input_tensor_id];
+ const TensorDescriptor &td = tensors[input_tensor_id];
+ printSetter(out, class_name, input_tensor_name, td);
+ }
+
+ // gen output getters
+ // generate main getter if network has only one
+ const auto &outputs = ma.getOutputs();
+ if (outputs.size() == 1)
+ {
+ const TensorDescriptor &td = tensors[outputs[0]];
+ printGetter(out, class_name, "Output", td);
+ }
+ for (size_t output_tensor_id : ma.getPersistentTensors())
+ {
+ const string &output_tensor_name = _formattedTensors[output_tensor_id];
+ const TensorDescriptor &td = tensors[output_tensor_id];
+ printGetter(out, class_name, output_tensor_name, td);
+ }
+ out << "void " << class_name << "::doInference()\n"
+ "{\n";
+ for (size_t output_tensor_id : ma.getPersistentTensors())
+ {
+ const string &output_tensor_name = _formattedTensors[output_tensor_id];
+ out << " " << output_tensor_name << ".reset(new Tensor());\n";
+ }
+
+ // gen inference sequence
+ materializeInferenceSequence(out, ma);
+ out << "}";
+}
+
+} // namespace nnc
diff --git a/compiler/nnc/backends/soft_backend/CommonData.def b/compiler/nnc/backends/soft_backend/CommonData.def
new file mode 100644
index 000000000..a89d99db8
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/CommonData.def
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_SOFT_BACKEND_PARAM_CONSTANTS_H_
+#define _NNC_SOFT_BACKEND_PARAM_CONSTANTS_H_
+
+/**
+ * This file contains common constants and classes for code generator and artifact
+ */
+
+namespace params {
+const int MAGIC_LEN = 4;
+const int VERSION_LEN = 4;
+const int HASH_LEN = 4;
+const int HEADER_LEN = MAGIC_LEN + VERSION_LEN + HASH_LEN;
+
+const char MAGIC[MAGIC_LEN + 1] = "NNMP"; // Neural Network Model Parameters
+}
+
+/**
+ * Type of Edge Handling for pooling operation
+ */
+enum class PoolBorderType {
+ ZEROFILLED, // elements outside of input considered zero
+ EMPTY // Consider that there are no elements outside of input shape
+};
+
+#endif // _NNC_SOFT_BACKEND_PARAM_CONSTANTS_H_
diff --git a/compiler/nnc/backends/soft_backend/ModelAnalyzer.cpp b/compiler/nnc/backends/soft_backend/ModelAnalyzer.cpp
new file mode 100644
index 000000000..82e62b531
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/ModelAnalyzer.cpp
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModelAnalyzer.h"
+
+#include "mir/Shape.h"
+#include "mir/Graph.h"
+#include "mir/OpDefs.h"
+
+#include <stack>
+#include <map>
+
+using namespace std;
+
+namespace nnc
+{
+
+using namespace mir;
+using namespace sir;
+
+void ModelAnalyzer::appendOperationToInference(Operation *op, const string &function_name,
+ std::vector<size_t> aux_args)
+{
+
+ vector<size_t> node_output_tensors;
+
+ // process operation outputs
+ if (op->getType() == Operation::Type::input)
+ {
+ // register input tensor
+ const string &tensor_name = op->getOutput(0)->getName();
+ const auto tensor_id = declareInputTensor(tensor_name, op->getOutputShape(0));
+ node_output_tensors.push_back(tensor_id);
+ }
+ else if (op->getType() == Operation::Type::constant)
+ {
+ // register constant tensor
+ // it's data is deserialized to described tensor by O(1) at runtime
+ const auto tensor_id = declareTemporaryTensor();
+ node_output_tensors.push_back(tensor_id);
+ }
+ else if (op->getType() == Operation::Type::output)
+ {
+ assert(!op->getInput(0)->getName().empty());
+ }
+ else
+ {
+ for (const auto &output : op->getOutputs())
+ {
+ const auto &tensor_name = output.getName();
+ const auto tensor_id =
+ tensor_name.empty() ? declareTemporaryTensor() : declarePersistentTensor(tensor_name);
+ node_output_tensors.push_back(tensor_id);
+ }
+ }
+
+ // process operation inputs
+ vector<size_t> node_input_tensors;
+ for (const Operation::Output *input : op->getInputs())
+ {
+ size_t idx = input->getIndex();
+ const Operation *prev_op = input->getNode();
+ assert(_opToDescr.find(prev_op) != _opToDescr.end());
+ auto call = dynamic_cast<const CallFunction *>(_opToDescr[prev_op]);
+ assert(call);
+ const size_t &in_tensor_id = call->outputs[idx];
+ node_input_tensors.push_back(in_tensor_id);
+ }
+
+ std::copy(aux_args.begin(), aux_args.end(), std::back_inserter(node_input_tensors));
+ unique_ptr<Action> operation_call(new CallFunction(
+ op, function_name, std::move(node_input_tensors), std::move(node_output_tensors)));
+ _inferenceSequence.push_back(std::move(operation_call));
+ _opToDescr[op] = _inferenceSequence.back().get();
+}
+
+void ModelAnalyzer::updateMaxTemporarySize(const size_t size)
+{
+ _max_temp_size = std::max(_max_temp_size, size);
+}
+
+size_t ModelAnalyzer::declareInputTensor(const std::string &name, const mir::Shape &shape)
+{
+ assert(!name.empty() && "Input tensor must have name");
+ size_t id = _allocatedTensors++;
+ _tensors.push_back({id, TensorDescriptor::Type::input, name, shape});
+ _inputs.push_back(id);
+ return id;
+}
+
+size_t ModelAnalyzer::declarePersistentTensor(const std::string &name)
+{
+ assert(!name.empty());
+ size_t id = _allocatedTensors++;
+ _tensors.push_back({id, TensorDescriptor::Type::persistent, name, {}});
+ _persistent_tensors.push_back(id);
+ return id;
+}
+
+size_t ModelAnalyzer::declareTemporaryTensor()
+{
+ size_t id = _allocatedTensors++;
+ _tensors.push_back({id, TensorDescriptor::Type::temporary, "", {}});
+ return id;
+}
+
+void ModelAnalyzer::gatherDefUseInfo(const vector<unique_ptr<Action>> &post_order,
+ map<size_t, size_t> &first_def, map<size_t, size_t> &last_use)
+{
+
+ for (size_t pos = 0; pos < post_order.size(); ++pos)
+ {
+ const unique_ptr<Action> &action = post_order[pos];
+ const CallFunction *call = dynamic_cast<CallFunction *>(action.get());
+ assert(call);
+
+ // update def info
+ for (size_t output_tensor_id : call->outputs)
+ {
+ const TensorDescriptor &td = _tensors[output_tensor_id];
+ if (td.type != TensorDescriptor::Type::temporary)
+ continue;
+
+ if (!first_def.count(output_tensor_id))
+ first_def[output_tensor_id] = pos;
+ }
+
+ // update usage info
+ for (size_t input_tensor_id : call->inputs)
+ {
+ const TensorDescriptor &td = _tensors[input_tensor_id];
+ if (td.type != TensorDescriptor::Type::temporary)
+ continue;
+
+ last_use[input_tensor_id] = pos;
+ }
+ }
+}
+
+void ModelAnalyzer::constructInferenceSequence(const vector<Operation *> &post_order)
+{
+ // Run inference sequence construction over constructed list of operations
+ for (auto it = post_order.rbegin(); it != post_order.rend(); ++it)
+ {
+ Operation *node = *it;
+ node->accept(this);
+ }
+
+ // Insert temporary tensor constructors
+ // map temporary tensor id to index in original sequence where it was defined/used first/last time
+ map<size_t, size_t> first_def;
+ map<size_t, size_t> last_use;
+
+ // prepare use-def info
+ gatherDefUseInfo(_inferenceSequence, first_def, last_use);
+
+ // insert memory operations
+ // Every iteration of loop contains three steps:
+ // 1) insert constructors of temporary tensors used in current operations
+ // and not used in inference sequence before
+ // 2) insert operation call
+ // 3) insert destructors of temporary tensors unused after current operation
+ std::vector<unique_ptr<Action>> old_inference_seq;
+ old_inference_seq.swap(_inferenceSequence);
+ _inferenceSequence.reserve(old_inference_seq.size());
+
+ for (size_t pos = 0; pos < old_inference_seq.size(); ++pos)
+ {
+ unique_ptr<Action> &action = old_inference_seq[pos];
+ const CallFunction *call = dynamic_cast<CallFunction *>(action.get());
+ assert(call);
+
+ // construct required temporary tensors
+ for (size_t output_tensor_id : call->outputs)
+ {
+ const TensorDescriptor &td = _tensors[output_tensor_id];
+ assert(td.id == output_tensor_id);
+ if (td.type != TensorDescriptor::Type::temporary)
+ continue;
+
+ if (first_def[output_tensor_id] == pos)
+ {
+ unique_ptr<Action> tmp_constructor(new CreateTmp(output_tensor_id));
+ _inferenceSequence.push_back(std::move(tmp_constructor));
+ }
+ }
+
+ // Insert operation call
+ _inferenceSequence.push_back(std::move(action));
+
+ // destroy unused temporary tensors
+ for (size_t input_tensor_id : call->inputs)
+ {
+ const TensorDescriptor &td = _tensors[input_tensor_id];
+ assert(td.id == input_tensor_id);
+ if (td.type != TensorDescriptor::Type::temporary)
+ continue;
+
+ if (last_use[input_tensor_id] == pos)
+ {
+ unique_ptr<Action> tmp_destructor(new DestroyTmp(input_tensor_id));
+ _inferenceSequence.push_back(std::move(tmp_destructor));
+ }
+ }
+ }
+}
+
+void ModelAnalyzer::collectOutputs(const mir::Graph *g)
+{
+ for (ops::OutputOp *out_op : g->getOutputs())
+ {
+ auto op_call = dynamic_cast<const CallFunction *>(_opToDescr[out_op]);
+ assert(op_call->inputs.size() == 1);
+ _outputs.push_back(op_call->inputs[0]);
+ }
+}
+
+void ModelAnalyzer::analyze(const mir::Graph *g)
+{
+ // Current path through graph
+ stack<pair<Operation *, size_t>> s;
+ // Nodes in Reverse Post Order stored by DFS
+ vector<Operation *> post_order;
+ // Set contains pointer to node if it is visited by DFS
+ set<Operation *> visited;
+
+ vector<Operation *> init_ops;
+ for (Operation *op : g->getNodes())
+ {
+ if (op->getNumInputs() == 0)
+ {
+ init_ops.emplace_back(op);
+ }
+ }
+
+ // Register temporary tensor for im2col buffer
+ _temp_tensor_id = declareTemporaryTensor();
+
+ // Walk all network inputs
+ for (Operation *in : init_ops)
+ {
+ if (!visited.count(in))
+ {
+ visited.insert(in);
+ s.push({in, 0});
+ }
+
+ // main DFS loop
+ while (!s.empty())
+ {
+ // top stores current node and current outgoing edge from it
+ auto &top = s.top();
+ Operation *node = top.first;
+ auto edge = top.second++;
+ // FIXME Refactor me.
+ std::vector<Operation *> next_nodes;
+ for (const auto &out : node->getOutputs())
+ {
+ const auto &uses = out.getUses();
+ std::transform(uses.begin(), uses.end(), std::back_inserter(next_nodes),
+ [](Operation::Use use) { return use.getNode(); });
+ }
+ if (edge == next_nodes.size())
+ {
+ // this node is fully analyzed, push it into RPO and pop from stack
+ post_order.push_back(node);
+ s.pop();
+ }
+ else
+ {
+ // Search current outgoing edge
+ Operation *successor = next_nodes[edge];
+ if (!visited.count(successor))
+ {
+ visited.insert(successor);
+ s.push({next_nodes[edge], 0});
+ }
+ }
+ }
+ }
+
+ constructInferenceSequence(post_order);
+
+ collectOutputs(g);
+}
+
+void ModelAnalyzer::visit(ops::ConcatOp &op) { appendOperationToInference(&op, "concat"); }
+
+void ModelAnalyzer::visit(ops::Conv2DOp &op)
+{
+ assert(op.getNumGroups() == 1);
+ const auto &kernel_shape = op.getInputShape(1);
+ const auto &out_shape = op.getOutputShape(0);
+ const int32_t tmp_size = kernel_shape.dim(1) * kernel_shape.dim(2) * kernel_shape.dim(3) *
+ out_shape.dim(0) * out_shape.dim(1) * out_shape.dim(2);
+ updateMaxTemporarySize(static_cast<size_t>(tmp_size));
+ appendOperationToInference(&op, "conv2d", {_temp_tensor_id});
+}
+
+void ModelAnalyzer::visit(ops::DepthwiseConv2DOp &op)
+{
+ appendOperationToInference(&op, "depthwiseConv2d");
+}
+
+void ModelAnalyzer::visit(ops::SoftmaxOp &op) { appendOperationToInference(&op, "softmax"); }
+
+void ModelAnalyzer::visit(ops::AvgPool2DOp &op) { appendOperationToInference(&op, "avgPool"); }
+
+void ModelAnalyzer::visit(ops::MaxPool2DOp &op) { appendOperationToInference(&op, "maxPool"); }
+
+void ModelAnalyzer::visit(ops::FullyConnectedOp &op)
+{
+ appendOperationToInference(&op, "fullConnect");
+}
+
+void ModelAnalyzer::visit(ops::BroadcastOp &op) { appendOperationToInference(&op, "broadcast"); }
+
+void ModelAnalyzer::visit(ops::CappedReluOp &op) { appendOperationToInference(&op, "cappedRelu"); }
+
+void ModelAnalyzer::visit(ops::InputOp &op)
+{
+ assert(op.getNumInputs() == 0);
+ appendOperationToInference(&op, "in");
+}
+
+void ModelAnalyzer::visit(ops::ConstantOp &op)
+{
+ assert(op.getNumInputs() == 0);
+
+ // FIXME This is to work around deserializeTensors not being able to deserialize tensors of type
+ // other than float32.
+ const auto *output = op.getOutput(0);
+ if (output->getUses().empty())
+ return;
+
+ appendOperationToInference(&op, "constant");
+}
+
+void ModelAnalyzer::visit(ops::ReluOp &op) { appendOperationToInference(&op, "relu"); }
+
+void ModelAnalyzer::visit(ops::ReshapeOp &op) { appendOperationToInference(&op, "reshape"); }
+
+void ModelAnalyzer::visit(mir::ops::ResizeOp &op)
+{
+ const auto &in_shape = op.getInputShape(0);
+ const auto &out_shape = op.getOutputShape(0);
+
+ assert(in_shape.rank() == 4);
+ assert(in_shape.rank() == out_shape.rank());
+
+ if (in_shape.dim(0) != out_shape.dim(0) || in_shape.dim(3) != out_shape.dim(3))
+ throw std::runtime_error("Not supported Resize on other dims besides height and width!");
+
+ switch (op.getMode())
+ {
+ case mir::ops::ResizeOp::ResizeMethod::nearestNeighbor:
+ appendOperationToInference(&op, "resize");
+ break;
+ default:
+ assert(false && "Not Implemented!");
+ }
+}
+
+void ModelAnalyzer::visit(mir::ops::SliceOp &op) { appendOperationToInference(&op, "slice"); }
+
+void ModelAnalyzer::visit(mir::ops::TanhOp &op)
+{
+ appendOperationToInference(&op, "tanhActivation");
+}
+
+void ModelAnalyzer::visit(mir::ops::EluOp &op) { appendOperationToInference(&op, "elu"); }
+
+void ModelAnalyzer::visit(mir::ops::DeConv2DOp &op)
+{
+ const auto &kernel_shape = op.getInputShape(1);
+ const auto &out_shape = op.getOutputShape(0);
+ const int32_t tmp_size = kernel_shape.dim(0) * kernel_shape.dim(1) * kernel_shape.dim(3) *
+ out_shape.dim(0) * out_shape.dim(1) * out_shape.dim(2);
+ updateMaxTemporarySize(static_cast<size_t>(tmp_size));
+ appendOperationToInference(&op, "convTransposed2d", {_temp_tensor_id});
+}
+
+void ModelAnalyzer::visit(ops::SqueezeOp &op) { appendOperationToInference(&op, "reshape"); }
+
+void ModelAnalyzer::visit(ops::SqrtOp &op) { appendOperationToInference(&op, "sqrtFN"); }
+
+void ModelAnalyzer::visit(mir::ops::PadOp &op) { appendOperationToInference(&op, "pad"); }
+
+void ModelAnalyzer::visit(mir::ops::ReduceMeanOp &op)
+{
+ appendOperationToInference(&op, "reduceMean");
+}
+
+void ModelAnalyzer::visit(mir::ops::TransposeOp &op)
+{
+ appendOperationToInference(&op, "transpose");
+}
+
+void ModelAnalyzer::visit(mir::ops::GatherOp &op) { appendOperationToInference(&op, "gather"); }
+
+void ModelAnalyzer::visit(mir::ops::SigmoidOp &op) { appendOperationToInference(&op, "sigmoid"); }
+
+void ModelAnalyzer::visit(mir::ops::LeakyReluOp &op)
+{
+ appendOperationToInference(&op, "leakyRelu");
+}
+
+void ModelAnalyzer::visit(mir::ops::OutputOp &op) { appendOperationToInference(&op, "out"); }
+
+void ModelAnalyzer::visit(mir::ops::AbsOp &op) { appendOperationToInference(&op, "absFN"); }
+
+void ModelAnalyzer::visit(mir::ops::AddOp &op)
+{
+ appendOperationToInference(&op, "ElementWise<Add>");
+}
+
+void ModelAnalyzer::visit(mir::ops::DivOp &op)
+{
+ appendOperationToInference(&op, "ElementWise<Div>");
+}
+
+void ModelAnalyzer::visit(mir::ops::MaxOp &op)
+{
+ appendOperationToInference(&op, "ElementWise<Max>");
+}
+
+void ModelAnalyzer::visit(mir::ops::MulOp &op)
+{
+ appendOperationToInference(&op, "ElementWise<Mul>");
+}
+
+void ModelAnalyzer::visit(mir::ops::SubOp &op)
+{
+ appendOperationToInference(&op, "ElementWise<Sub>");
+}
+
+void ModelAnalyzer::visit_fallback(mir::Operation &) { throw std::runtime_error("NYI operation"); }
+
+} // namespace nnc
diff --git a/compiler/nnc/backends/soft_backend/ModelAnalyzer.h b/compiler/nnc/backends/soft_backend/ModelAnalyzer.h
new file mode 100644
index 000000000..471c31011
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/ModelAnalyzer.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_SOFT_BACKEND_MODEL_ANALYZER_H_
+#define _NNC_SOFT_BACKEND_MODEL_ANALYZER_H_
+
+#include "SequencedIR.h"
+
+#include "mir/Graph.h"
+#include "mir/Visitor.h"
+#include "mir/Shape.h"
+#include "mir/TensorVariant.h"
+#include "mir/Operation.h"
+
+#include <vector>
+#include <map>
+#include <string>
+#include <cassert>
+#include <iterator>
+
+namespace nnc
+{
+
+/**
+ * @brief Constructs inference sequence for given computational graph,
+ * gathers list of variables used in artifact.
+ */
+class ModelAnalyzer : public mir::Visitor
+{
+public:
+ /**
+ * @brief contructs inference sequence
+ * @param g pointer to graph to linearize
+ */
+ void analyze(const mir::Graph *g);
+
+ void visit(mir::ops::AbsOp &) override;
+ void visit(mir::ops::AddOp &op) override;
+ void visit(mir::ops::AvgPool2DOp &op) override;
+ void visit(mir::ops::BroadcastOp &op) override;
+ void visit(mir::ops::CappedReluOp &op) override;
+ void visit(mir::ops::ConcatOp &op) override;
+ void visit(mir::ops::ConstantOp &op) override;
+ void visit(mir::ops::Conv2DOp &op) override;
+ void visit(mir::ops::DeConv2DOp &op) override;
+ void visit(mir::ops::DepthwiseConv2DOp &op) override;
+ void visit(mir::ops::DivOp &op) override;
+ void visit(mir::ops::EluOp &op) override;
+ void visit(mir::ops::FullyConnectedOp &op) override;
+ void visit(mir::ops::GatherOp &op) override;
+ void visit(mir::ops::InputOp &op) override;
+ void visit(mir::ops::LeakyReluOp &op) override;
+ void visit(mir::ops::MaxOp &op) override;
+ void visit(mir::ops::MaxPool2DOp &op) override;
+ void visit(mir::ops::MulOp &op) override;
+ void visit(mir::ops::OutputOp &op) override;
+ void visit(mir::ops::PadOp &op) override;
+ void visit(mir::ops::ReduceMeanOp &op) override;
+ void visit(mir::ops::ReluOp &op) override;
+ void visit(mir::ops::ReshapeOp &op) override;
+ void visit(mir::ops::ResizeOp &op) override;
+ void visit(mir::ops::SigmoidOp &op) override;
+ void visit(mir::ops::SliceOp &op) override;
+ void visit(mir::ops::SoftmaxOp &op) override;
+ void visit(mir::ops::SqrtOp &op) override;
+ void visit(mir::ops::SqueezeOp &op) override;
+ void visit(mir::ops::SubOp &op) override;
+ void visit(mir::ops::TanhOp &op) override;
+ void visit(mir::ops::TransposeOp &op) override;
+
+ /**
+ * @return vector of id's of network input tensors
+ */
+ const std::vector<size_t> &getInputs() const { return _inputs; }
+
+ /**
+ * @return vector of id's of tensors with unique names taken from Model IR
+ */
+ const std::vector<size_t> &getPersistentTensors() const { return _persistent_tensors; }
+
+ /**
+ * @return vector of id's of network output tensors
+ */
+ const std::vector<size_t> &getOutputs() const { return _outputs; }
+
+ /**
+ * @return vector of all network tensors
+ */
+ const std::vector<sir::TensorDescriptor> &getTensors() const { return _tensors; }
+
+ /**
+ * @return Inference sequence
+ */
+ const std::vector<std::unique_ptr<sir::Action>> &getInferenceSequence() const
+ {
+ return _inferenceSequence;
+ }
+
+ /**
+ * @return Inference sequence
+ */
+ std::vector<std::unique_ptr<sir::Action>> &getInferenceSequence() { return _inferenceSequence; }
+
+ /**
+ * @return Model name, taken from Model IR
+ */
+ const std::string &getModelName() const { return _modelName; }
+
+ size_t getMaxTemporarySize() const { return _max_temp_size; }
+
+ size_t getTempTID() const { return _temp_tensor_id; }
+
+protected:
+ void visit_fallback(mir::Operation &op) override;
+
+private:
+ /**
+ * @brief Common function to add function call in inference sequence
+ * @param op Node representing added call
+ * @param function_name Function name
+ * @param aux_args Auxilliary argument ids
+ *
+ * Inserts information about CG operation into inference sequence: name of operation,
+ * creates tensors for operation outputs, binds operation inputs with tensors from previous
+ * operations
+ */
+ void appendOperationToInference(mir::Operation *op, const std::string &function_name,
+ std::vector<size_t> aux_args = {});
+
+ /**
+ * @brief Registers a temporary buffer of size *size* used by op *op_id*
+ * @param size Size of buffer
+ */
+ void updateMaxTemporarySize(size_t size);
+
+ /**
+ * @brief Declares input tensor in artifact
+ * @param name Name of tensor
+ * @param shape expected shape of input
+ * @return Id of created tensor
+ */
+ size_t declareInputTensor(const std::string &name, const mir::Shape &shape);
+
+ /**
+ * @brief Declares persistent tensor in artifact
+ * @param name Name of variable, if empty - assigned automaticly
+ * @return Id of created tensor
+ */
+ size_t declarePersistentTensor(const std::string &name);
+
+ /**
+ * @brief Declares temporary tensor in artifact
+ * @return Id of created tensor
+ */
+ size_t declareTemporaryTensor();
+
+ /**
+ * @brief Gathers info where tensors were defined and used in inference sequence
+ * @param sequence Sequence of operations in inference
+ * @param first_def Maps tensor id to position in inf sequence where it was defined first time.
+ * @param last_use Maps tensor id to position in inf sequence where it was used last time.
+ */
+ void gatherDefUseInfo(const std::vector<std::unique_ptr<sir::Action>> &post_order,
+ std::map<size_t, size_t> &first_def, std::map<size_t, size_t> &last_use);
+
+ /**
+ * @brief constructs inference sequence from vector of mir::Operations, constructed
+ * @param post_order vector representing layout of operations in inference
+ */
+ void constructInferenceSequence(const std::vector<mir::Operation *> &post_order);
+
+ /**
+ * @brief Fill list of outputs in ModelAnalyzer
+ * @param g Graph where to get list of outputs
+ */
+ void collectOutputs(const mir::Graph *g);
+
+ std::string _modelName = "NN";
+ std::vector<std::unique_ptr<sir::Action>> _inferenceSequence;
+ size_t _allocatedTensors = 0;
+ std::vector<size_t> _inputs;
+ /// @brief list of persistent tensors
+ std::vector<size_t> _persistent_tensors;
+ /// @brief list of tensor ids corresponding to NN outputs
+ std::vector<size_t> _outputs;
+ size_t _max_temp_size = 0;
+ size_t _temp_tensor_id = 0;
+ std::vector<sir::TensorDescriptor> _tensors;
+ std::map<const mir::Operation *, const sir::Action *> _opToDescr;
+};
+
+} // namespace nnc
+
+#endif //_NNC_SOFT_BACKEND_MODEL_ANALYZER_H_
diff --git a/compiler/nnc/backends/soft_backend/SBSerializer.cpp b/compiler/nnc/backends/soft_backend/SBSerializer.cpp
new file mode 100644
index 000000000..96fa51580
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/SBSerializer.cpp
@@ -0,0 +1,414 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SBSerializer.h"
+#include "mir/ShapeRange.h"
+#include "mir/TensorUtil.h"
+
+#include "mir/OpDefs.h"
+
+#include <algorithm>
+
+#define UNUSED(x) ((void)(x))
+
+namespace nnc
+{
+
+static_assert(std::numeric_limits<float>::is_iec559, "Unsupported float type");
+
+using namespace std;
+
+using mir::Index;
+using mir::Shape;
+using mir::ShapeRange;
+using mir::TensorVariant;
+
+namespace ops = mir::ops;
+
+namespace
+{
+// Currently there are no operations with more then 4 dimensions in kernels/weights etc supported
+const auto MAX_DIMS = 4;
+const auto MAX_DIM_SIZE = numeric_limits<int32_t>::max();
+// Assuming there are no large enums
+const auto MAX_ENUM_VAL = numeric_limits<char>::max();
+} // unnamed namespace
+
+void Serializer::packData(const void *data, size_t size)
+{
+ auto p = static_cast<const char *>(data);
+ size_t old_size = _buffer.size();
+ _buffer.resize(old_size + size);
+ copy(p, p + size, _buffer.data() + old_size);
+}
+
+template <typename T> void Serializer::serializeT(const T &obj) { packData(&obj, sizeof(T)); }
+
+/**
+ * @brief Convert enum to it's underlying type
+ * @tparam E Enum type
+ * @param enum_value Value of enum
+ * @return Integer value that correspond to enumVal
+ */
+template <typename E> typename underlying_type<E>::type etoi(E enum_value)
+{
+ return static_cast<typename underlying_type<E>::type>(enum_value);
+}
+
+void Serializer::serializeShape(const Shape &s)
+{
+ int32_t rank = s.rank();
+ assert(rank <= MAX_DIMS);
+ serializeT<int32_t>(s.rank());
+ for (int32_t i = 0; i < rank; ++i)
+ {
+ int32_t dim = s.dim(i);
+ serializeT<int32_t>(dim);
+ }
+}
+
+void Serializer::serializeTensor(const TensorVariant &t)
+{
+ // serialize type
+ assert(etoi(t.getDataType()) < MAX_ENUM_VAL);
+ serializeT<int32_t>(etoi(t.getDataType()));
+ // seriazlie data size
+ size_t element_size = t.getElementSize();
+ assert(element_size <= MAX_DIMS);
+ serializeT<int32_t>(element_size);
+ // serialize shape
+ const Shape &shape = t.getShape();
+ serializeShape(shape);
+ // serialize actual data
+ size_t data_size = element_size * shape.numElements();
+
+ size_t old_serialized_data_size = _buffer.size();
+ _buffer.reserve(old_serialized_data_size + data_size);
+ for (const Index &idx : ShapeRange(shape))
+ {
+ packData(t.at(idx), element_size);
+ }
+}
+
+void Serializer::serializeStrides(const vector<int32_t> &strides)
+{
+ serializeT<int>(strides.size());
+ for (const int32_t x : strides)
+ {
+ serializeT<int32_t>(x);
+ }
+}
+
+template <typename Op> void Serializer::serializePads(const Op &op, int32_t number_of_pads)
+{
+ assert(number_of_pads <= MAX_DIMS);
+ serializeT<int32_t>(number_of_pads);
+ for (int i = 0; i < static_cast<int>(number_of_pads); ++i)
+ {
+ auto pad = op.getPaddingBefore().at(i);
+ assert(pad <= MAX_DIM_SIZE);
+ assert(pad >= 0);
+ UNUSED(pad);
+ serializeT<int32_t>(op.getPaddingBefore().at(i));
+ }
+}
+
+void Serializer::visit(ops::ConcatOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ // axis number should fit into one byte
+ assert(op.getAxis() <= MAX_DIMS);
+ serializeT<int32_t>(op.getAxis());
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit(ops::Conv2DOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ // serialize strides
+ serializeStrides(op.getStrides());
+ // serialize pads
+ int32_t padsRank = 2; // op.getInputShape(0).rank();
+ serializePads(op, padsRank);
+ // serialize output shape
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit(ops::DepthwiseConv2DOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ // serialize strides
+ serializeStrides(op.getStrides());
+ // serialize pads
+ int32_t padsRank = 2; // kernel.getShape().rank();
+ serializePads(op, padsRank);
+ // serialize output shape
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit(ops::SoftmaxOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ // axis number should fit into one byte
+ assert(op.getAxis() <= MAX_DIMS);
+ serializeT<int32_t>(op.getAxis());
+}
+
+void Serializer::visit(ops::AvgPool2DOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ // serialize window shape
+ serializeShape(Shape(op.getWindowSize()));
+ // serialize strindes
+ serializeStrides(op.getStrides());
+ // serialize pads
+ int32_t number_of_pads = 2; // windowShape.rank();
+ serializePads(op, number_of_pads);
+ serializeT<int32_t>(op.getIncludePad());
+ // serialize output shape
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit(ops::MaxPool2DOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ // serialize window shape
+ serializeShape(Shape(op.getWindowSize()));
+ // serialize strindes
+ serializeStrides(op.getStrides());
+ // serialize pads
+ int32_t number_of_pads = 2; // windowShape.rank();
+ serializePads(op, number_of_pads);
+ // serialize output shape
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit(ops::FullyConnectedOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit(ops::BroadcastOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit(ops::CappedReluOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ serializeT<float>(op.getCap());
+}
+
+void Serializer::visit(ops::InputOp & /*op*/)
+{
+ // no parameters to dump
+}
+
+void Serializer::visit(ops::ConstantOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ serializeTensor(op.getValue());
+}
+
+void Serializer::visit(ops::ReluOp & /*op*/)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ // no parameters to dump
+}
+
+void Serializer::visit(ops::ReshapeOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit(mir::ops::SliceOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ serializeShape(op.getStarts());
+ serializeShape(op.getSizes());
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit(mir::ops::TanhOp & /*op*/)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ // no parameters to dump
+}
+
+void Serializer::visit(mir::ops::EluOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ serializeT<float>(op.getAlpha());
+}
+
+void Serializer::visit(mir::ops::DeConv2DOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ // serialize strides
+ serializeStrides(op.getStrides());
+ // serialize pads
+ int32_t number_of_pads = 2; // op.getInputShape(0).rank();
+ serializePads(op, number_of_pads);
+ // serialize output shape
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit(ops::SqueezeOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit(mir::ops::PadOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+
+ // serialize paddings
+ const int num_dims = op.getInputShape(0).rank();
+
+ // serialize output shape
+ serializeShape(op.getOutputShape(0));
+
+ // serialize num dimensions
+ serializeT<int32_t>(num_dims);
+
+ const auto &padding_before = op.getPaddingBefore();
+ const auto &padding_after = op.getPaddingAfter();
+ for (int i = 0; i < num_dims; i++)
+ {
+ serializeT<int32_t>(padding_before[num_dims - 1 - i]);
+ serializeT<int32_t>(padding_after[num_dims - 1 - i]);
+ }
+
+ // FIXME Make use of padding value.
+ assert(op.getPaddingValue() == 0.0f);
+}
+
+void Serializer::visit(mir::ops::SqrtOp & /*op*/)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ // no parameters to dump
+}
+
+void Serializer::visit(mir::ops::ResizeOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ // Result shape is the same as Output shape
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit(mir::ops::ReduceMeanOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ serializeShape(Shape(op.getReductionDims())); // reuse shape serialization
+ serializeT<int32_t>(op.getKeepDims());
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit(mir::ops::TransposeOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ // serializer parameters
+ auto &axis_order = op.getAxisOrder();
+ serializeT(static_cast<int32_t>(axis_order.size()));
+ for (auto &axis : axis_order)
+ serializeT(static_cast<int32_t>(axis));
+
+ // serialize output shape
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit(mir::ops::GatherOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ // serialize parameters
+ serializeT<int32_t>(op.getAxis());
+ // serialize output shape
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit(mir::ops::SigmoidOp & /*op*/) { _curOp->paramStartOffset = _buffer.size(); }
+
+void Serializer::visit(mir::ops::LeakyReluOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ serializeT<float>(op.getAlpha());
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::serialize(vector<unique_ptr<sir::Action>> &inference_sequence)
+{
+ for (unique_ptr<sir::Action> &action : inference_sequence)
+ {
+ if (action->type != sir::Action::Type::callFunction)
+ continue;
+ _curOp = dynamic_cast<sir::CallFunction *>(action.get());
+ _curOp->mirOp->accept(this);
+ }
+}
+
+void Serializer::visit(mir::ops::OutputOp & /*op*/)
+{
+ // no parameters to dump
+}
+
+void Serializer::visit(mir::ops::AbsOp &)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ // no parameters to dump
+}
+
+void Serializer::visit(mir::ops::AddOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ // Op type is known at codegen Time
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit(mir::ops::DivOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ // Op type is known at codegen Time
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit(mir::ops::MaxOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ // Op type is known at codegen Time
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit(mir::ops::MulOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ // Op type is known at codegen Time
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit(mir::ops::SubOp &op)
+{
+ _curOp->paramStartOffset = _buffer.size();
+ // Op type is known at codegen Time
+ serializeShape(op.getOutputShape(0));
+}
+
+void Serializer::visit_fallback(mir::Operation &) { throw std::runtime_error("NYI operation"); }
+
+} // namespace nnc
diff --git a/compiler/nnc/backends/soft_backend/SBSerializer.h b/compiler/nnc/backends/soft_backend/SBSerializer.h
new file mode 100644
index 000000000..98b9ce605
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/SBSerializer.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_SOFT_BACKEND_SERIALIZER_H_
+#define _NNC_SOFT_BACKEND_SERIALIZER_H_
+
+#include "mir/Visitor.h"
+#include "mir/Shape.h"
+#include "mir/TensorVariant.h"
+#include "ModelAnalyzer.h"
+
+#include <vector>
+#include <cstdint>
+
+namespace nnc
+{
+
+/**
+ * @brief Serializer of network parameters for soft backend
+ *
+ * Serializer class responsible for serialization of given computational graph parameters and
+ * binding of inference operations to this data.
+ * It owns buffer that contains serialized data.
+ * To serialize data `serialize` method should be called with sequence from ModelAnalyzer object
+ * To gather this vector use `getBuffer` method.
+ * Objects of this class are one-off and not designed to serialize more than one IR
+ */
+class Serializer : public mir::Visitor
+{
+public:
+ void visit(mir::ops::AbsOp &op) override;
+ void visit(mir::ops::AddOp &op) override;
+ void visit(mir::ops::AvgPool2DOp &op) override;
+ void visit(mir::ops::BroadcastOp &op) override;
+ void visit(mir::ops::CappedReluOp &op) override;
+ void visit(mir::ops::ConcatOp &op) override;
+ void visit(mir::ops::ConstantOp &op) override;
+ void visit(mir::ops::Conv2DOp &op) override;
+ void visit(mir::ops::DeConv2DOp &op) override;
+ void visit(mir::ops::DepthwiseConv2DOp &op) override;
+ void visit(mir::ops::DivOp &op) override;
+ void visit(mir::ops::EluOp &op) override;
+ void visit(mir::ops::FullyConnectedOp &op) override;
+ void visit(mir::ops::GatherOp &op) override;
+ void visit(mir::ops::InputOp &op) override;
+ void visit(mir::ops::LeakyReluOp &op) override;
+ void visit(mir::ops::MaxOp &op) override;
+ void visit(mir::ops::MaxPool2DOp &op) override;
+ void visit(mir::ops::MulOp &op) override;
+ void visit(mir::ops::OutputOp &op) override;
+ void visit(mir::ops::PadOp &op) override;
+ void visit(mir::ops::ReduceMeanOp &op) override;
+ void visit(mir::ops::ReluOp &op) override;
+ void visit(mir::ops::ReshapeOp &op) override;
+ void visit(mir::ops::ResizeOp &op) override;
+ void visit(mir::ops::SigmoidOp &op) override;
+ void visit(mir::ops::SliceOp &op) override;
+ void visit(mir::ops::SoftmaxOp &op) override;
+ void visit(mir::ops::SqrtOp &op) override;
+ void visit(mir::ops::SqueezeOp &op) override;
+ void visit(mir::ops::SubOp &op) override;
+ void visit(mir::ops::TanhOp &op) override;
+ void visit(mir::ops::TransposeOp &op) override;
+
+ void serialize(std::vector<std::unique_ptr<sir::Action>> &inference_sequence);
+
+ const std::vector<char> &getBuffer() const { return _buffer; }
+
+ uint32_t getFormatVersion() const { return _formatVersion; }
+
+ uint32_t getModelHash() const { return _modelHash; }
+
+protected:
+ void visit_fallback(mir::Operation &op) override;
+
+private:
+ /**
+ * @brief Low level function to serialize untyped data buffer
+ * @param data Buffer containing data to serialize
+ * @param size Size of data to serialize
+ */
+ void packData(const void *data, size_t size);
+ /**
+ * @brief Serialize trivially copyable objects
+ * @tparam T Type of object to serialize
+ * @param obj Reference to object to serialize
+ */
+ template <typename T> void serializeT(const T &obj);
+ /**
+ * @brief Serialize Tensor shape object
+ * @param s shape to serialize
+ */
+ void serializeShape(const mir::Shape &s);
+ /**
+ * @brief Function serializes type of given tensor base data,
+ * it's shape and raw data in 'c' format(i.e. layout of multidimensional C array)
+ * @param t Tensor to serialize
+ */
+ void serializeTensor(const mir::TensorVariant &t);
+ /**
+ * @brief Serialize strides.
+ * @param strides The strides to serialize.
+ */
+ void serializeStrides(const std::vector<std::int32_t> &strides);
+ /**
+ * @brief Serialize pads for operations like Conv2D
+ * @tparam Op Operation type
+ * @param op Reference to operation where pads are stored
+ * @param padsRank Number of pads to serialize
+ */
+ template <class Op> void serializePads(const Op &op, int32_t number_of_pads);
+
+ sir::CallFunction *_curOp = nullptr;
+ const uint32_t _formatVersion = 1;
+ uint32_t _modelHash = 0;
+ std::vector<char> _buffer;
+};
+
+} // namespace nnc
+
+#endif //_NNC_SOFT_BACKEND_SERIALIZER_H_
diff --git a/compiler/nnc/backends/soft_backend/SequencedIR.cpp b/compiler/nnc/backends/soft_backend/SequencedIR.cpp
new file mode 100644
index 000000000..267fe577d
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/SequencedIR.cpp
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SequencedIR.h"
diff --git a/compiler/nnc/backends/soft_backend/SequencedIR.h b/compiler/nnc/backends/soft_backend/SequencedIR.h
new file mode 100644
index 000000000..9a761243e
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/SequencedIR.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_SOFT_BACKEND_SEQUENCED_IR_H_
+#define _NNC_SOFT_BACKEND_SEQUENCED_IR_H_
+
+#include "mir/Shape.h"
+#include "mir/Operation.h"
+
+#include <string>
+#include <vector>
+#include <cstdint>
+#include <limits>
+#include <list>
+
+namespace nnc
+{
+
+namespace sir
+{
+
+const size_t INVALID_TENSOR_ID = std::numeric_limits<size_t>::max();
+
+/**
+ * @brief Represents variable used in artifact.
+ * This variable can store inputs, outputs of network and temporary data.
+ */
+struct TensorDescriptor
+{
+ /**
+ * input tensors of this type supposed to be set outside of artifact
+ * persistent tensors store data after inference process is over, this include NN outputs
+ * temporary tensors are not accessible outside artifact in any way,
+ * they are created and destructed on demand
+ */
+ enum class Type
+ {
+ input,
+ persistent,
+ temporary
+ };
+
+ size_t id;
+ Type type;
+ std::string name;
+ // if _shape.rank() == 0 - assume shape is not known for this tensor on compilation
+ mir::Shape shape;
+};
+
+/**
+ * @brief Action represents operation in inference sequence that is needed to
+ */
+struct Action
+{
+
+ /**
+ * Defines which type of action to perform
+ * createTmp responsible for creation of temporary tensor in inference sequence
+ * destroyTmp responsible for deletion of temporary tensor
+ * transpose
+ */
+ enum class Type
+ {
+ createTmp,
+ destroyTmp,
+ callFunction,
+ transposeTensor
+ };
+
+ explicit Action(Type t) : type(t) {}
+
+ virtual ~Action() = default;
+
+ Type type;
+};
+
+struct TransposeTensor : public Action
+{
+
+ TransposeTensor(size_t input, size_t output, std::vector<int32_t> &&perm)
+ : Action(Type::transposeTensor), perm(std::move(perm)), input(input), output(output)
+ {
+ }
+
+ std::vector<int32_t> perm;
+ size_t input;
+ size_t output;
+};
+
+struct CreateTmp : public Action
+{
+
+ explicit CreateTmp(size_t tid) : Action(Type::createTmp), tensorId(tid) {}
+
+ size_t tensorId;
+};
+
+struct DestroyTmp : public Action
+{
+
+ explicit DestroyTmp(size_t tid) : Action(Type::destroyTmp), tensorId(tid) {}
+
+ size_t tensorId;
+};
+
+struct CallFunction : public Action
+{
+
+ CallFunction(mir::Operation *op, std::string func_name, std::vector<size_t> &&inputs,
+ std::vector<size_t> &&outputs)
+ : Action(Type::callFunction), mirOp(op), funcName(std::move(func_name)), inputs(inputs),
+ outputs(outputs), paramStartOffset(0)
+ {
+ }
+
+ CallFunction() : Action(Type::callFunction), mirOp(nullptr), paramStartOffset(0) {}
+
+ mir::Operation *mirOp;
+ std::string funcName;
+ // list of input tensors
+ std::vector<size_t> inputs;
+ // list of output tensors
+ std::vector<size_t> outputs;
+ size_t paramStartOffset;
+};
+
+} // namespace sir
+
+} // namespace nnc
+
+#endif // _NNC_SOFT_BACKEND_SEQUENCED_IR_H_
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_broadcast.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_broadcast.def
new file mode 100644
index 000000000..1d170eb48
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_broadcast.def
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+template <int N>
+inline void NdArrayDescForBroadcast(const RuntimeShape &input_shape,
+ const RuntimeShape &output_shape, NdArrayDesc<N> *desc_out)
+{
+ TFLITE_DCHECK(desc_out != nullptr);
+
+ auto extended_input_shape = RuntimeShape::ExtendedShape(N, input_shape);
+ auto extended_output_shape = RuntimeShape::ExtendedShape(N, output_shape);
+
+ int desc_stride = 1;
+ for (int i = N - 1; i >= 0; --i)
+ {
+ desc_out->extents[i] = extended_input_shape.Dims(i);
+ desc_out->strides[i] = desc_stride;
+ desc_stride *= extended_input_shape.Dims(i);
+ }
+
+ for (int i = 0; i < N; ++i)
+ {
+ const int extent0 = extended_input_shape.Dims(i);
+ const int extent1 = extended_output_shape.Dims(i);
+ if (extent0 != extent1)
+ {
+ TFLITE_DCHECK_EQ(extent0, 1);
+ desc_out->strides[i] = 0;
+ desc_out->extents[i] = extent1;
+ }
+ }
+}
+
+void Broadcast4DSlow(const RuntimeShape &input_shape, const float *input_data,
+ const RuntimeShape &output_shape, float *output_data)
+{
+ TFLITE_DCHECK_LE(input_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
+ const RuntimeShape extended_output_shape = RuntimeShape::ExtendedShape(4, output_shape);
+
+ NdArrayDesc<4> desc;
+ NdArrayDescForBroadcast(input_shape, output_shape, &desc);
+
+ for (int b = 0; b < extended_output_shape.Dims(0); ++b)
+ {
+ for (int y = 0; y < extended_output_shape.Dims(1); ++y)
+ {
+ for (int x = 0; x < extended_output_shape.Dims(2); ++x)
+ {
+ for (int c = 0; c < extended_output_shape.Dims(3); ++c)
+ {
+ auto out_idx = Offset(extended_output_shape, b, y, x, c);
+ auto in_idx = SubscriptToIndex(desc, b, y, x, c);
+ output_data[out_idx] = input_data[in_idx];
+ }
+ }
+ }
+ }
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_capped_relu.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_capped_relu.def
new file mode 100644
index 000000000..52e657f8d
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_capped_relu.def
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+inline void CappedRelu(const float* input_data, const Dims<4>& input_dims,
+ float cap, float* output_data, const Dims<4>& output_dims) {
+
+ const auto input = MapAsVector(input_data, input_dims);
+ auto output = MapAsVector(output_data, output_dims);
+
+ output = input.cwiseMax(0.0f).cwiseMin(cap);
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_common_funcs.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_common_funcs.def
new file mode 100644
index 000000000..ff42e58f4
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_common_funcs.def
@@ -0,0 +1,750 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// *****************************************************************************
+// From internal/compatibility.h
+
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <vector>
+
+#ifndef TFLITE_DCHECK
+#define TFLITE_DCHECK(condition) (condition) ? (void)0 : assert(false)
+#endif
+
+#ifndef TFLITE_DCHECK_EQ
+#define TFLITE_DCHECK_EQ(x, y) ((x) == (y)) ? (void)0 : assert(false)
+#endif
+
+#ifndef TFLITE_DCHECK_NE
+#define TFLITE_DCHECK_NE(x, y) ((x) != (y)) ? (void)0 : assert(false)
+#endif
+
+#ifndef TFLITE_DCHECK_GE
+#define TFLITE_DCHECK_GE(x, y) ((x) >= (y)) ? (void)0 : assert(false)
+#endif
+
+#ifndef TFLITE_DCHECK_GT
+#define TFLITE_DCHECK_GT(x, y) ((x) > (y)) ? (void)0 : assert(false)
+#endif
+
+#ifndef TFLITE_DCHECK_LE
+#define TFLITE_DCHECK_LE(x, y) ((x) <= (y)) ? (void)0 : assert(false)
+#endif
+
+#ifndef TFLITE_DCHECK_LT
+#define TFLITE_DCHECK_LT(x, y) ((x) < (y)) ? (void)0 : assert(false)
+#endif
+
+// TODO(ahentz): Clean up: We should stick to the DCHECK versions.
+#ifndef TFLITE_CHECK
+#define TFLITE_CHECK(condition) (condition) ? (void)0 : abort()
+#endif
+
+#ifndef TFLITE_CHECK_EQ
+#define TFLITE_CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : abort()
+#endif
+
+#ifndef TFLITE_CHECK_NE
+#define TFLITE_CHECK_NE(x, y) ((x) != (y)) ? (void)0 : abort()
+#endif
+
+#ifndef TFLITE_CHECK_GE
+#define TFLITE_CHECK_GE(x, y) ((x) >= (y)) ? (void)0 : abort()
+#endif
+
+#ifndef TFLITE_CHECK_GT
+#define TFLITE_CHECK_GT(x, y) ((x) > (y)) ? (void)0 : abort()
+#endif
+
+#ifndef TFLITE_CHECK_LE
+#define TFLITE_CHECK_LE(x, y) ((x) <= (y)) ? (void)0 : abort()
+#endif
+
+#ifndef TFLITE_CHECK_LT
+#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : abort()
+#endif
+
+// TODO(ahentz): Clean up.
+using int8 = std::int8_t;
+using uint8 = std::uint8_t;
+using int16 = std::int16_t;
+using uint16 = std::uint16_t;
+using int32 = std::int32_t;
+using uint32 = std::uint32_t;
+
+// *****************************************************************************
+// From internal/types.h
+
+template <int N>
+struct Dims {
+ int sizes[N];
+ int strides[N];
+};
+
+class RuntimeShape {
+public:
+ // Shapes with dimensions up to 4 are stored directly in the structure, while
+ // larger shapes are separately allocated.
+ static constexpr int kMaxSmallSize = 4;
+
+ RuntimeShape& operator=(RuntimeShape const&) = delete;
+
+ RuntimeShape() : size_(0) {}
+
+ explicit RuntimeShape(int dimensions_count) : size_(dimensions_count) {
+ if (dimensions_count > kMaxSmallSize) {
+ dims_pointer_ = new int32[dimensions_count];
+ }
+ }
+
+ RuntimeShape(int shape_size, int32 value) : size_(0) {
+ Resize(shape_size);
+ for (int i = 0; i < shape_size; ++i) {
+ SetDim(i, value);
+ }
+ }
+
+ RuntimeShape(int dimensions_count, const int32* dims_data) : size_(0) {
+ ReplaceWith(dimensions_count, dims_data);
+ }
+
+ RuntimeShape(const std::initializer_list<int> init_list) : size_(0) {
+ BuildFrom(init_list);
+ }
+
+ // get bigger shape for elementwise Ops
+ void maxShape(RuntimeShape const& other) {
+ TFLITE_CHECK(other.DimensionsCount() == size_ && size_ == 4 && "Elementwise shapes must be 4d");
+ for (size_t i = 0; i < 4; i++) {
+ dims_[i] = std::max(dims_[i], other.dims_[i]);
+ }
+ }
+
+ // Avoid using this constructor. We should be able to delete it when C++17
+ // rolls out.
+ RuntimeShape(RuntimeShape const& other) : size_(other.DimensionsCount()) {
+ if (size_ > kMaxSmallSize) {
+ dims_pointer_ = new int32[size_];
+ }
+ std::memcpy(DimsData(), other.DimsData(), sizeof(int32) * size_);
+ }
+
+ bool operator==(const RuntimeShape& comp) const {
+ return this->size_ == comp.size_ &&
+ std::memcmp(DimsData(), comp.DimsData(), size_ * sizeof(int32)) == 0;
+ }
+
+ ~RuntimeShape() {
+ if (size_ > kMaxSmallSize) {
+
+ delete[] dims_pointer_;
+ }
+ }
+
+ inline int32 DimensionsCount() const { return size_; }
+ inline int32 Dims(int i) const {
+ TFLITE_DCHECK_GE(i, 0);
+ TFLITE_DCHECK_LT(i, size_);
+ return size_ > kMaxSmallSize ? dims_pointer_[i] : dims_[i];
+ }
+ inline void SetDim(int i, int32 val) {
+ TFLITE_DCHECK_GE(i, 0);
+ TFLITE_DCHECK_LT(i, size_);
+ if (size_ > kMaxSmallSize) {
+ dims_pointer_[i] = val;
+ } else {
+ dims_[i] = val;
+ }
+ }
+
+ inline int32* DimsData() {
+ return size_ > kMaxSmallSize ? dims_pointer_ : dims_;
+ }
+ inline const int32* DimsData() const {
+ return size_ > kMaxSmallSize ? dims_pointer_ : dims_;
+ }
+ // The caller must ensure that the shape is no bigger than 4-D.
+ inline const int32* DimsDataUpTo4D() const { return dims_; }
+
+ inline void Resize(int dimensions_count) {
+ if (size_ > kMaxSmallSize) {
+ delete[] dims_pointer_;
+ }
+ size_ = dimensions_count;
+ if (dimensions_count > kMaxSmallSize) {
+ dims_pointer_ = new int32[dimensions_count];
+ }
+ }
+
+ inline void ReplaceWith(int dimensions_count, const int32* dims_data) {
+ Resize(dimensions_count);
+ int32* dst_dims = DimsData();
+ std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32));
+ }
+
+ template <typename T>
+ inline void BuildFrom(const T& src_iterable) {
+ const int dimensions_count =
+ std::distance(src_iterable.begin(), src_iterable.end());
+ Resize(dimensions_count);
+ int32* data = DimsData();
+ for (auto it : src_iterable) {
+ *data = it;
+ ++data;
+ }
+ }
+
+ // This will probably be factored out. Old code made substantial use of 4-D
+ // shapes, and so this function is used to extend smaller shapes. Note that
+ // (a) as Dims<4>-dependent code is eliminated, the reliance on this should be
+ // reduced, and (b) some kernels are stricly 4-D, but then the shapes of their
+ // inputs should already be 4-D, so this function should not be needed.
+ inline static RuntimeShape ExtendedShape(int new_shape_size,
+ const RuntimeShape& shape) {
+ return RuntimeShape(new_shape_size, shape, 1);
+ }
+
+ inline void BuildFrom(const std::initializer_list<int> init_list) {
+ BuildFrom<const std::initializer_list<int>>(init_list);
+ }
+
+ // Returns the total count of elements, that is the size when flattened into a
+ // vector.
+ inline int FlatSize() const {
+ int buffer_size = 1;
+ const int* dims_data = DimsData();
+ for (int i = 0; i < size_; i++) {
+ const int dim = dims_data[i];
+ TFLITE_DCHECK_GE(dim, 1);
+ buffer_size *= dim;
+ }
+ return buffer_size;
+ }
+
+ bool operator!=(const RuntimeShape& comp) const { return !((*this) == comp); }
+
+private:
+ // For use only by ExtendedShape(), written to guarantee (return-value) copy
+ // elision in C++17.
+ // This creates a shape padded to the desired size with the specified value.
+ RuntimeShape(int new_shape_size, const RuntimeShape& shape, int pad_value)
+ : size_(0) {
+ // If the following check fails, it is likely because a 4D-only kernel is
+ // being used with an array of larger dimension count.
+ TFLITE_CHECK_GE(new_shape_size, shape.DimensionsCount());
+ Resize(new_shape_size);
+ const int size_increase = new_shape_size - shape.DimensionsCount();
+ for (int i = 0; i < size_increase; ++i) {
+ SetDim(i, pad_value);
+ }
+ std::memcpy(DimsData() + size_increase, shape.DimsData(),
+ sizeof(int32) * shape.DimensionsCount());
+ }
+
+ int32 size_;
+ union {
+ int32 dims_[kMaxSmallSize];
+ int32* dims_pointer_;
+ };
+};
+
+inline int Offset(const Dims<4>& dims, int i0, int i1, int i2, int i3) {
+ TFLITE_DCHECK(i0 >= 0 && i0 < dims.sizes[0]);
+ TFLITE_DCHECK(i1 >= 0 && i1 < dims.sizes[1]);
+ TFLITE_DCHECK(i2 >= 0 && i2 < dims.sizes[2]);
+ TFLITE_DCHECK(i3 >= 0 && i3 < dims.sizes[3]);
+ return i0 * dims.strides[0] + i1 * dims.strides[1] +
+ i2 * dims.strides[2] + i3 * dims.strides[3];
+}
+
+// Gets next index to iterate through a multidimensional array.
+inline bool NextIndex(const int num_dims, const int* dims, int* current) {
+ if (num_dims == 0) {
+ return false;
+ }
+ TFLITE_DCHECK(dims != nullptr);
+ TFLITE_DCHECK(current != nullptr);
+ int carry = 1;
+ for (int idx = num_dims - 1; idx >= 0; --idx) {
+ int current_val = current[idx] + carry;
+ TFLITE_DCHECK_GE(dims[idx], current_val);
+ if (dims[idx] == current_val) {
+ current[idx] = 0;
+ } else {
+ current[idx] = current_val;
+ carry = 0;
+ break;
+ }
+ }
+ return (carry == 0);
+}
+
+inline size_t ReducedOutputOffset(const int num_dims, const int* dims,
+ const int* index, const int num_axis,
+ const int* axis) {
+ if (num_dims == 0) {
+ return 0;
+ }
+ TFLITE_DCHECK(dims != nullptr);
+ TFLITE_DCHECK(index != nullptr);
+ size_t offset = 0;
+ for (int idx = 0; idx < num_dims; ++idx) {
+ // if we need to skip this axis
+ bool is_axis = false;
+ if (axis != nullptr) {
+ for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) {
+ if (idx == axis[axis_idx]) {
+ is_axis = true;
+ break;
+ }
+ }
+ }
+ if (!is_axis) {
+ offset = offset * static_cast<size_t>(dims[idx]) +
+ static_cast<size_t>(index[idx]);
+ }
+ }
+ return offset;
+}
+
+template <int N>
+bool IsPackedWithoutStrides(const Dims<N>& dims) {
+ int expected_stride = 1;
+ for (int d = 0; d < N; d++) {
+ if (dims.strides[d] != expected_stride) return false;
+ expected_stride *= dims.sizes[d];
+ }
+ return true;
+}
+
+// Get array size, DCHECKing that the dim index is in range.
+//
+// Note that this will be phased out with Dims<4>, since RuntimeShape::Dims()
+// already performs this check.
+template <int N>
+int ArraySize(const Dims<N>& array, int index) {
+ TFLITE_DCHECK(index >= 0 && index < N);
+ return array.sizes[index];
+}
+
+// Get common array size, DCHECKing that they all agree.
+template <typename ArrayType1, typename ArrayType2>
+int MatchingArraySize(const ArrayType1& array1, int index1,
+ const ArrayType2& array2, int index2) {
+ TFLITE_DCHECK_EQ(ArraySize(array1, index1), ArraySize(array2, index2));
+ return ArraySize(array1, index1);
+}
+
+// Flat size calculation, checking that dimensions match with one or more other
+// arrays.
+inline int MatchingFlatSize(const RuntimeShape& shape,
+ const RuntimeShape& check_shape_0) {
+ TFLITE_DCHECK_EQ(shape.DimensionsCount(), check_shape_0.DimensionsCount());
+ const int dims_count = shape.DimensionsCount();
+ for (int i = 0; i < dims_count; ++i) {
+ TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i));
+ }
+ return shape.FlatSize();
+}
+
+inline int MatchingFlatSize(const RuntimeShape& shape,
+ const RuntimeShape& check_shape_0,
+ const RuntimeShape& check_shape_1) {
+ TFLITE_DCHECK_EQ(shape.DimensionsCount(), check_shape_0.DimensionsCount());
+ const int dims_count = shape.DimensionsCount();
+ for (int i = 0; i < dims_count; ++i) {
+ TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i));
+ }
+ return MatchingFlatSize(shape, check_shape_1);
+}
+
+inline int MatchingFlatSize(const RuntimeShape& shape,
+ const RuntimeShape& check_shape_0,
+ const RuntimeShape& check_shape_1,
+ const RuntimeShape& check_shape_2) {
+ TFLITE_DCHECK_EQ(shape.DimensionsCount(), check_shape_0.DimensionsCount());
+ const int dims_count = shape.DimensionsCount();
+ for (int i = 0; i < dims_count; ++i) {
+ TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i));
+ }
+ return MatchingFlatSize(shape, check_shape_1, check_shape_2);
+}
+
+template <int N>
+inline int MatchingFlatSize(const Dims<N>& dims, const Dims<N>& check_dims_0) {
+ for (int i = 0; i < N; ++i) {
+ TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i));
+ }
+ return FlatSize(dims);
+}
+
+template <int N>
+inline int FlatSize(const Dims<N>& dims) {
+ int flat_size = 1;
+ for (int i = 0; i < N; ++i) {
+ flat_size *= dims.sizes[i];
+ }
+ return flat_size;
+}
+
+template <int N>
+inline int FlatSizeSkipDim(const Dims<N>& dims, int skip_dim) {
+ TFLITE_DCHECK(skip_dim >= 0 && skip_dim < N);
+ int flat_size = 1;
+ for (int i = 0; i < N; ++i) {
+ flat_size *= (i == skip_dim) ? 1 : dims.sizes[i];
+ }
+ return flat_size;
+}
+
+// A combination of MatchingFlatSize() and FlatSizeSkipDim().
+template <int N>
+inline int MatchingFlatSizeSkipDim(const Dims<N>& dims, int skip_dim,
+ const Dims<N>& check_dims_0) {
+ for (int i = 0; i < N; ++i) {
+ if (i != skip_dim) {
+ TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i));
+ }
+ }
+ return FlatSizeSkipDim(dims, skip_dim);
+}
+
+template <int N>
+inline int MatchingFlatSizeSkipDim(const Dims<N>& dims, int skip_dim,
+ const Dims<N>& check_dims_0,
+ const Dims<N>& check_dims_1,
+ const Dims<N>& check_dims_2) {
+ for (int i = 0; i < N; ++i) {
+ if (i != skip_dim) {
+ TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i));
+ }
+ }
+ return MatchingFlatSizeSkipDim(dims, skip_dim, check_dims_1, check_dims_2);
+}
+
+template <int N>
+inline int MatchingFlatSizeSkipDim(const Dims<N>& dims, int skip_dim,
+ const Dims<N>& check_dims_0,
+ const Dims<N>& check_dims_1,
+ const Dims<N>& check_dims_2,
+ const Dims<N>& check_dims_3) {
+ for (int i = 0; i < N; ++i) {
+ if (i != skip_dim) {
+ TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i));
+ }
+ }
+ return MatchingFlatSizeSkipDim(dims, skip_dim, check_dims_1, check_dims_2,
+ check_dims_3);
+}
+
+// Data is required to be contiguous, and so many operators can use either the
+// full array flat size or the flat size with one dimension skipped (commonly
+// the depth).
+inline int FlatSizeSkipDim(const RuntimeShape& shape, int skip_dim) {
+ const int dims_count = shape.DimensionsCount();
+ TFLITE_DCHECK(skip_dim >= 0 && skip_dim < dims_count);
+ const auto* dims_data = shape.DimsData();
+ int flat_size = 1;
+ for (int i = 0; i < dims_count; ++i) {
+ flat_size *= (i == skip_dim) ? 1 : dims_data[i];
+ }
+ return flat_size;
+}
+
+// *****************************************************************************
+// From optimized_ops.h
+
+template <typename Scalar>
+using VectorMap = typename std::conditional<
+ std::is_const<Scalar>::value,
+ Eigen::Map<const Eigen::Matrix<typename std::remove_const<Scalar>::type,
+ Eigen::Dynamic, 1>>,
+ Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, 1>>>::type;
+
+template <typename Scalar, int N>
+VectorMap<Scalar> MapAsVector(Scalar* data, const Dims<N>& dims) {
+ const int size = FlatSize(dims);
+ return VectorMap<Scalar>(data, size, 1);
+}
+
+template <typename Scalar>
+VectorMap<Scalar> MapAsVector(Scalar* data, const size_t size) {
+ return VectorMap<Scalar>(data, size, 1);
+}
+
+template <typename Scalar>
+VectorMap<Scalar> MapAsVector(Scalar* data, const RuntimeShape& shape) {
+ const int size = shape.FlatSize();
+ return VectorMap<Scalar>(data, size, 1);
+}
+
+template <typename Scalar>
+using MatrixMap = typename std::conditional<
+ std::is_const<Scalar>::value,
+ Eigen::Map<const Eigen::Matrix<typename std::remove_const<Scalar>::type,
+ Eigen::Dynamic, Eigen::Dynamic>>,
+ Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>>>::type;
+
+template <typename Scalar, int N>
+MatrixMap<Scalar> MapAsMatrixWithFirstDimAsRows(Scalar* data,
+ const Dims<N>& dims) {
+ const int rows = dims.sizes[0];
+ int cols = 1;
+ for (int d = 1; d < N; d++) {
+ cols *= dims.sizes[d];
+ }
+ return MatrixMap<Scalar>(data, rows, cols);
+}
+
+template <typename Scalar, int N>
+MatrixMap<Scalar> MapAsMatrixWithLastDimAsCols(Scalar* data,
+ const Dims<N>& dims) {
+ const int cols = dims.sizes[N - 1];
+ int rows = 1;
+ for (int d = 0; d < N - 1; d++) {
+ rows *= dims.sizes[d];
+ }
+ return MatrixMap<Scalar>(data, rows, cols);
+}
+
+template <typename Scalar>
+MatrixMap<Scalar> MapAsMatrixWithLastDimAsRows(Scalar* data,
+ const RuntimeShape& shape) {
+ const int dims_count = shape.DimensionsCount();
+ const int rows = shape.Dims(dims_count - 1);
+ const int cols = FlatSizeSkipDim(shape, dims_count - 1);
+ return MatrixMap<Scalar>(data, rows, cols);
+}
+
+template <typename Scalar>
+MatrixMap<Scalar> MapAsMatrixWithFirstDimAsCols(Scalar* data,
+ const RuntimeShape& shape) {
+ const int cols = shape.Dims(0);
+ const int rows = FlatSizeSkipDim(shape, 0);
+ return MatrixMap<Scalar>(data, rows, cols);
+}
+
+template <typename Scalar, int N>
+MatrixMap<Scalar> MapAsMatrixWithGivenNumberOfRows(Scalar* data,
+ const Dims<N>& dims,
+ int rows) {
+ int cols = 1;
+ bool matched_rows = false;
+ for (int d = 0; d < N; d++) {
+ cols *= dims.sizes[d];
+ if (cols == rows) {
+ matched_rows = true;
+ cols = 1;
+ }
+ }
+ TFLITE_DCHECK(matched_rows);
+ return MatrixMap<Scalar>(data, rows, cols);
+}
+
+template <typename Lhs, typename Rhs, typename Result>
+void Gemm(const Eigen::MatrixBase<Lhs>& lhs, const Eigen::MatrixBase<Rhs>& rhs,
+ Eigen::MatrixBase<Result>* result) {
+ if (rhs.cols() == 1) {
+
+ result->col(0).noalias() = lhs * rhs.col(0);
+ } else {
+
+ result->noalias() = lhs * rhs;
+ }
+}
+
+struct SliceParams {
+ int8 begin_count;
+ int32 begin[4];
+ int8 size_count;
+ int32 size[4];
+};
+
+// Get common shape dim, DCHECKing that they all agree.
+inline int MatchingDim(const RuntimeShape& shape1, int index1,
+ const RuntimeShape& shape2, int index2) {
+ TFLITE_DCHECK_EQ(shape1.Dims(index1), shape2.Dims(index2));
+ return shape1.Dims(index1);
+}
+
+template <typename... Args>
+int MatchingDim(const RuntimeShape& shape1, int index1,
+ const RuntimeShape& shape2, int index2, Args... args) {
+ TFLITE_DCHECK_EQ(shape1.Dims(index1), shape2.Dims(index2));
+ return MatchingDim(shape1, index1, args...);
+}
+
+enum class PaddingType : uint8 { kNone, kSame, kValid };
+
+struct PaddingValues {
+ int16 width;
+ int16 height;
+};
+
+struct ConvParams {
+ // PaddingType padding_type;
+ PaddingValues padding_values;
+ // TODO(starka): This was just "stride", so check that width+height is OK.
+ int16 stride_width;
+ int16 stride_height;
+ /* not used currently
+ int16 dilation_width_factor;
+ int16 dilation_height_factor;
+ // uint8 inference params.
+ // TODO(b/65838351): Use smaller types if appropriate.
+ int32 input_offset;
+ int32 weights_offset;
+ int32 output_offset;
+ int32 output_multiplier;
+ int output_shift;
+ // uint8, etc, activation params.
+ int32 quantized_activation_min;
+ int32 quantized_activation_max;
+ // float activation params.
+ float float_activation_min;
+ float float_activation_max;
+ */
+};
+
+
+struct DepthwiseParams {
+ //PaddingType padding_type;
+ PaddingValues padding_values;
+ int16 stride_width;
+ int16 stride_height;
+ int16 dilation_width_factor;
+ int16 dilation_height_factor;
+ int16 depth_multiplier;
+ /*
+ // uint8 inference params.
+ // TODO(b/65838351): Use smaller types if appropriate.
+ int32 input_offset;
+ int32 weights_offset;
+ int32 output_offset;
+ int32 output_multiplier;
+ int output_shift;
+ // uint8, etc, activation params.
+ int32 quantized_activation_min;
+ int32 quantized_activation_max;
+ // float activation params.
+ float float_activation_min;
+ float float_activation_max;
+ */
+};
+
+inline int Offset(const RuntimeShape& shape, int i0, int i1, int i2, int i3) {
+ TFLITE_DCHECK_EQ(shape.DimensionsCount(), 4);
+ const int* dims_data = shape.DimsDataUpTo4D();
+ TFLITE_DCHECK(i0 >= 0 && i0 < dims_data[0]);
+ TFLITE_DCHECK(i1 >= 0 && i1 < dims_data[1]);
+ TFLITE_DCHECK(i2 >= 0 && i2 < dims_data[2]);
+ TFLITE_DCHECK(i3 >= 0 && i3 < dims_data[3]);
+ return ((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3;
+}
+
+inline int Offset(const Dims<4>& dims, int* index) {
+ return Offset(dims, index[0], index[1], index[2], index[3]);
+}
+
+inline int Offset(const RuntimeShape& shape, int* index) {
+ return Offset(shape, index[0], index[1], index[2], index[3]);
+}
+
+struct GatherParams {
+ int16 axis;
+};
+
+struct TransposeParams {
+ int8 perm_count;
+ int32 perm[4];
+};
+
+// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
+// BROADCASTING.
+//
+// NdArrayDesc<N> describes the shape and memory layout of an N-dimensional
+// rectangular array of numbers.
+//
+// NdArrayDesc<N> is basically identical to Dims<N> defined in types.h.
+// However, as Dims<N> is to be deprecated, this class exists as an adaptor
+// to enable simple unoptimized implementations of element-wise broadcasting
+// operations.
+template <int N>
+struct NdArrayDesc {
+ // The "extent" of each dimension. Indices along dimension d must be in the
+ // half-open interval [0, extents[d]).
+ int extents[N];
+
+ // The number of *elements* (not bytes) between consecutive indices of each
+ // dimension.
+ int strides[N];
+};
+
+// DO NOT USE THIS FUNCTION FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
+// BROADCASTING.
+//
+// Same as Offset(), except takes as NdArrayDesc<N> instead of Dims<N>.
+inline int SubscriptToIndex(const NdArrayDesc<4>& desc, int i0, int i1, int i2,
+ int i3) {
+ TFLITE_DCHECK(i0 >= 0 && i0 < desc.extents[0]);
+ TFLITE_DCHECK(i1 >= 0 && i1 < desc.extents[1]);
+ TFLITE_DCHECK(i2 >= 0 && i2 < desc.extents[2]);
+ TFLITE_DCHECK(i3 >= 0 && i3 < desc.extents[3]);
+ return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] +
+ i3 * desc.strides[3];
+}
+
+template <int N>
+inline void NdArrayDescsForElementwiseBroadcast(
+ const RuntimeShape& input0_shape, const RuntimeShape& input1_shape,
+ NdArrayDesc<N>* desc0_out, NdArrayDesc<N>* desc1_out) {
+ TFLITE_DCHECK(desc0_out != nullptr);
+ TFLITE_DCHECK(desc1_out != nullptr);
+
+ auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape);
+ auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape);
+
+ // Copy dims to desc, calculating strides.
+ int desc0_stride = 1;
+ int desc1_stride = 1;
+ for (int i = N - 1; i >= 0; --i) {
+ desc0_out->extents[i] = extended_input0_shape.Dims(i);
+ desc0_out->strides[i] = desc0_stride;
+ desc0_stride *= extended_input0_shape.Dims(i);
+ desc1_out->extents[i] = extended_input1_shape.Dims(i);
+ desc1_out->strides[i] = desc1_stride;
+ desc1_stride *= extended_input1_shape.Dims(i);
+ }
+
+ // Walk over each dimension. If the extents are equal do nothing.
+ // Otherwise, set the desc with extent 1 to have extent equal to the other and
+ // stride 0.
+ for (int i = 0; i < N; ++i) {
+ const int extent0 = extended_input0_shape.Dims(i);
+ const int extent1 = extended_input1_shape.Dims(i);
+ if (extent0 != extent1) {
+ if (extent0 == 1) {
+ desc0_out->strides[i] = 0;
+ desc0_out->extents[i] = extent1;
+ } else {
+ TFLITE_DCHECK_EQ(extent1, 1);
+ desc1_out->strides[i] = 0;
+ desc1_out->extents[i] = extent0;
+ }
+ }
+ }
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_concat.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_concat.def
new file mode 100644
index 000000000..76ca59647
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_concat.def
@@ -0,0 +1,45 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+template <typename Scalar>
+void Concatenation(int concat_dim, const Scalar* const* input_data,
+ const Dims<4>* input_dims, int inputs_count,
+ Scalar* output_data, const Dims<4>& output_dims) {
+ int concat_size = 0;
+ for (int i = 0; i < inputs_count; i++) {
+ for (int j = 0; j < 4; j++) {
+ if (j != concat_dim) {
+ MatchingArraySize(input_dims[i], j, output_dims, j);
+ }
+ }
+ concat_size += ArraySize(input_dims[i], concat_dim);
+ }
+ TFLITE_DCHECK_EQ(concat_size, ArraySize(output_dims, concat_dim));
+ TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
+ int outer_size = 1;
+ for (int i = concat_dim + 1; i < 4; i++) {
+ outer_size *= output_dims.sizes[i];
+ }
+ Scalar* output_ptr = output_data;
+ for (int k = 0; k < outer_size; k++) {
+ for (int i = 0; i < inputs_count; ++i) {
+ const int copy_size =
+ input_dims[i].sizes[concat_dim] * input_dims[i].strides[concat_dim];
+ memcpy(output_ptr, input_data[i] + k * copy_size,
+ copy_size * sizeof(Scalar));
+ output_ptr += copy_size;
+ }
+ }
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_conv.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_conv.def
new file mode 100644
index 000000000..4fef4dcfd
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_conv.def
@@ -0,0 +1,237 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+template <typename T>
+inline void ExtractPatchIntoBufferColumn(const RuntimeShape& input_shape, int w,
+ int h, int b, int kheight, int kwidth,
+ int stride_width, int stride_height,
+ int pad_width, int pad_height,
+ int in_width, int in_height,
+ int in_depth, int single_buffer_length,
+ int buffer_id, const T* in_data,
+ T* conv_buffer_data, uint8 zero_byte) {
+ TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+ // This chunk of code reshapes all the inputs corresponding to
+ // output (b, h, w) to a column vector in conv_buffer(:, buffer_id).
+ const int kwidth_times_indepth = kwidth * in_depth;
+ const int inwidth_times_indepth = in_width * in_depth;
+ const int ih_ungated_start = h * stride_height - pad_height;
+ const int ih_ungated_end = (ih_ungated_start + kheight);
+ const int ih_end = std::min(ih_ungated_end, in_height);
+ const int iw_ungated_start = w * stride_width - pad_width;
+ const int iw_ungated_end = (iw_ungated_start + kwidth);
+ const int iw_end = std::min(iw_ungated_end, in_width);
+ // If the patch is off the edge of the input image, skip writing those rows
+ // and columns from the patch into the output array.
+ const int h_offset = std::max(0, -ih_ungated_start);
+ const int w_offset = std::max(0, -iw_ungated_start);
+ const int ih_start = std::max(0, ih_ungated_start);
+ const int iw_start = std::max(0, iw_ungated_start);
+ const int single_row_num =
+ std::min(kwidth - w_offset, in_width - iw_start) * in_depth;
+ const int output_row_offset = (buffer_id * single_buffer_length);
+ int out_offset =
+ output_row_offset + (h_offset * kwidth + w_offset) * in_depth;
+ int in_offset = Offset(input_shape, b, ih_start, iw_start, 0);
+
+ // Express all of the calculations as padding around the input patch.
+ const int top_padding = h_offset;
+ const int bottom_padding = (ih_ungated_end - ih_end);
+ const int left_padding = w_offset;
+ const int right_padding = (iw_ungated_end - iw_end);
+ assert(single_row_num ==
+ ((kwidth - (left_padding + right_padding)) * in_depth));
+
+ // Write out zeroes to the elements representing the top rows of the input
+ // patch that are off the edge of the input image.
+ if (top_padding > 0) {
+ const int top_row_elements = (top_padding * kwidth * in_depth);
+ memset(conv_buffer_data + output_row_offset, zero_byte,
+ (top_row_elements * sizeof(T)));
+ }
+
+ // If the patch is on the interior of the input image horizontally, just copy
+ // over the rows sequentially, otherwise add zero padding at the start or end.
+ if ((left_padding == 0) && (right_padding == 0)) {
+ for (int ih = ih_start; ih < ih_end; ++ih) {
+ memcpy(conv_buffer_data + out_offset, in_data + in_offset,
+ single_row_num * sizeof(T));
+ out_offset += kwidth_times_indepth;
+ in_offset += inwidth_times_indepth;
+ }
+ } else {
+ for (int ih = ih_start; ih < ih_end; ++ih) {
+ if (left_padding > 0) {
+ const int left_start = (out_offset - (left_padding * in_depth));
+ memset(conv_buffer_data + left_start, zero_byte,
+ (left_padding * in_depth * sizeof(T)));
+ }
+ memcpy(conv_buffer_data + out_offset, in_data + in_offset,
+ single_row_num * sizeof(T));
+ if (right_padding > 0) {
+ const int right_start = (out_offset + single_row_num);
+ memset(conv_buffer_data + right_start, zero_byte,
+ (right_padding * in_depth * sizeof(T)));
+ }
+ out_offset += kwidth_times_indepth;
+ in_offset += inwidth_times_indepth;
+ }
+ }
+
+ // If the bottom of the patch falls off the input image, pad the values
+ // representing those input rows with zeroes.
+ if (bottom_padding > 0) {
+ const int bottom_row_elements = (bottom_padding * kwidth * in_depth);
+ const int bottom_start =
+ output_row_offset +
+ ((top_padding + (ih_end - ih_start)) * kwidth * in_depth);
+ memset(conv_buffer_data + bottom_start, zero_byte,
+ (bottom_row_elements * sizeof(T)));
+ }
+}
+
+/* Place Dilated Im2Col should be here when it is required */
+
+template <typename T>
+void Im2col(const ConvParams& params, int kheight, int kwidth, uint8 zero_byte,
+ const RuntimeShape& input_shape, const T* input_data,
+ const RuntimeShape& output_shape, T* output_data) {
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int pad_width = params.padding_values.width;
+ const int pad_height = params.padding_values.height;
+ TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int input_depth = input_shape.Dims(3);
+ const int input_width = input_shape.Dims(2);
+ const int input_height = input_shape.Dims(1);
+ const int output_depth = output_shape.Dims(3);
+ const int output_width = output_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+
+ int buffer_id = 0;
+ // Loop over the output nodes.
+ for (int b = 0; b < batches; ++b) {
+ for (int h = 0; h < output_height; ++h) {
+ for (int w = 0; w < output_width; ++w) {
+ ExtractPatchIntoBufferColumn(
+ input_shape, w, h, b, kheight, kwidth, stride_width, stride_height,
+ pad_width, pad_height, input_width, input_height, input_depth,
+ output_depth, buffer_id, input_data, output_data, zero_byte);
+ ++buffer_id;
+ }
+ }
+ }
+}
+
+inline void Conv(const ConvParams& params,
+ const RuntimeShape& input_shape, const float* input_data,
+ const RuntimeShape& filter_shape, const float* filter_data,
+ const RuntimeShape& output_shape, float* output_data,
+ const RuntimeShape& im2col_shape, float* im2col_data) {
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ /* Dilation
+ const int dilation_width_factor = params.dilation_width_factor;
+ const int dilation_height_factor = params.dilation_height_factor;
+
+ const float output_activation_min = params.float_activation_min;
+ const float output_activation_max = params.float_activation_max;
+ */
+ TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+
+ (void)im2col_data;
+ (void)im2col_shape;
+
+ // NB: static_cast<float>(0x00000000h) == 0.0f
+ const uint8 float_zero_byte = 0x00;
+ const float* gemm_input_data = nullptr;
+ const RuntimeShape* gemm_input_shape = nullptr;
+ const int filter_width = filter_shape.Dims(2);
+ const int filter_height = filter_shape.Dims(1);
+ const bool need_im2col = stride_width != 1 || stride_height != 1 ||
+ filter_width != 1 || filter_height != 1;
+ // Dilated im2col
+ /* const bool need_dilated_im2col =
+ dilation_width_factor != 1 || dilation_height_factor != 1;
+ if (need_dilated_im2col) {
+ DilatedIm2col(params, float_zero_byte, input_shape, input_data,
+ filter_shape, output_shape, im2col_data);
+ gemm_input_data = im2col_data;
+ gemm_input_shape = &im2col_shape;
+ } else */if (need_im2col) {
+ TFLITE_DCHECK(im2col_data);
+ Im2col(params, filter_height, filter_width, float_zero_byte, input_shape,
+ input_data, im2col_shape, im2col_data);
+ gemm_input_data = im2col_data;
+ gemm_input_shape = &im2col_shape;
+ } else {
+ // TODO(aselle): We need to make sure to not send im2col if it is not
+ // needed.
+ TFLITE_DCHECK(!im2col_data);
+ gemm_input_data = input_data;
+ gemm_input_shape = &input_shape;
+ }
+
+ // The following code computes matrix multiplication c = a * transponse(b)
+ // with CBLAS, where:
+ // * `a` is a matrix with dimensions (m, k).
+ // * `b` is a matrix with dimensions (n, k), so transpose(b) is (k, n).
+ // * `c` is a matrix with dimensions (m, n).
+ // The naming of variables are aligned with CBLAS specification here.
+ const float* a = gemm_input_data;
+ const float* b = filter_data;
+ float* c = output_data;
+ const int gemm_input_dims = gemm_input_shape->DimensionsCount();
+ int m = FlatSizeSkipDim(*gemm_input_shape, gemm_input_dims - 1);
+ int n = output_shape.Dims(3);
+ int k = gemm_input_shape->Dims(gemm_input_dims - 1);
+
+#if defined(TF_LITE_USE_CBLAS) && defined(__APPLE__)
+ // The stride of matrix a, b and c respectively.
+ int stride_a = k;
+ int stride_b = k;
+ int stride_c = n;
+
+ cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, m, n, k, 1.0f, a,
+ stride_a, b, stride_b, 0.0f, c, stride_c);
+#else
+ // When an optimized CBLAS implementation is not available, fall back
+ // to using Eigen.
+ typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
+ Matrix;
+ typedef Eigen::Map<Matrix> MatrixRef;
+ typedef Eigen::Map<const Matrix> ConstMatrixRef;
+
+ MatrixRef matrix_c(c, m, n);
+ ConstMatrixRef matrix_a(a, m, k);
+ ConstMatrixRef matrix_b(b, n, k);
+
+ // The following special casing for when a or b is a vector is required
+ // as Eigen seem to fail to make this optimization on its own.
+ if (n == 1) {
+ matrix_c.col(0).noalias() = matrix_a * matrix_b.row(0).transpose();
+ } else if (m == 1) {
+ matrix_c.row(0).noalias() = matrix_a.row(0) * matrix_b.transpose();
+ } else {
+ matrix_c.noalias() = matrix_a * matrix_b.transpose();
+ }
+
+#endif // defined(TF_LITE_USE_CBLAS) && defined(__APPLE__)
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_conv_transpose.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_conv_transpose.def
new file mode 100644
index 000000000..016ff15e1
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_conv_transpose.def
@@ -0,0 +1,111 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <iostream>
+
+template <typename T>
+void TransposeIm2col(const ConvParams& params, uint8 zero_byte,
+ const RuntimeShape& input_shape, const T* input_data,
+ const RuntimeShape& filter_shape,
+ const RuntimeShape& output_shape, T* im2col_data) {
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int pad_width = params.padding_values.width;
+ const int pad_height = params.padding_values.height;
+ TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK(im2col_data);
+
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+ const int filter_height = filter_shape.Dims(1);
+ const int filter_width = filter_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
+ MatchingDim(output_shape, 3, filter_shape, 0); // output_depth
+
+ // Construct the MxN sized im2col matrix.
+ // The rows M, are sub-ordered B x H x W
+ const RuntimeShape row_shape({1, batches, output_height, output_width});
+ // The columns, N, are sub-ordered Kh x Kw x Din
+ const RuntimeShape col_shape({1, filter_height, filter_width, input_depth});
+ // Use dimensions M and N to construct dims for indexing directly into im2col
+ const RuntimeShape im2col_shape(
+ {1, 1, row_shape.FlatSize(), col_shape.FlatSize()});
+
+ // Build the im2col matrix by looping through all the input pixels,
+ // computing their influence on the output, rather than looping through all
+ // the output pixels. We therefore must initialize the im2col array to zero.
+ // This is potentially inefficient because we subsequently overwrite bytes
+ // set here. However, in practice memset is very fast and costs negligible.
+ memset(im2col_data, zero_byte, im2col_shape.FlatSize() * sizeof(T));
+
+ // Loop through the output batches
+ for (int batch = 0; batch < batches; ++batch) {
+ // Loop through input pixels one at a time.
+ for (int in_y = 0; in_y < input_height; ++in_y) {
+ for (int in_x = 0; in_x < input_width; ++in_x) {
+ // Loop through the output pixels it will influence
+ const int out_x_origin = (in_x * stride_width) - pad_width;
+ const int out_y_origin = (in_y * stride_height) - pad_height;
+ for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
+ const int out_y = out_y_origin + filter_y;
+ // Is output pixel within height bounds?
+ if ((out_y >= 0) && (out_y < output_height)) {
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
+ const int out_x = out_x_origin + filter_x;
+ // Is output pixel within width bounds?
+ if ((out_x >= 0) && (out_x < output_width)) {
+ // Copy the input elements of this pixel
+ T const* src =
+ input_data + Offset(input_shape, batch, in_y, in_x, 0);
+ int row_offset = Offset(row_shape, 0, batch, out_y, out_x);
+ int col_offset = Offset(col_shape, 0, filter_y, filter_x, 0);
+ T* dst = im2col_data +
+ Offset(im2col_shape, 0, 0, row_offset, col_offset);
+ memcpy(dst, src, input_depth * sizeof(T));
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+inline void TransposeConv(
+ const ConvParams& params, const RuntimeShape& input_shape,
+ const float* input_data, const RuntimeShape& filter_shape,
+ const float* filter_data, const RuntimeShape& output_shape,
+ float* output_data, const RuntimeShape& im2col_shape, float* im2col_data) {
+
+ // Note we could use transposed weights with forward conv for unstrided
+ // cases. But we are already getting good performance with this code as-is.
+ TFLITE_DCHECK(im2col_data);
+ TransposeIm2col(params, 0, input_shape, input_data, filter_shape,
+ output_shape, im2col_data);
+
+ const auto im2col_matrix_map =
+ MapAsMatrixWithLastDimAsRows(im2col_data, im2col_shape);
+ const auto filter_matrix_map =
+ MapAsMatrixWithFirstDimAsCols(filter_data, filter_shape);
+ auto output_matrix_map =
+ MapAsMatrixWithLastDimAsRows(output_data, output_shape);
+
+ Gemm(filter_matrix_map.transpose(), im2col_matrix_map, &output_matrix_map);
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_depthwise_conv.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_depthwise_conv.def
new file mode 100644
index 000000000..e48fd1f76
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_depthwise_conv.def
@@ -0,0 +1,1029 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+// Implementation of float DepthwiseConv
+
+template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
+struct FloatDepthwiseConvKernel {};
+
+#ifdef USE_NEON
+
+template <>
+struct FloatDepthwiseConvKernel<false, 8, 1> {
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float* input_ptr, int input_ptr_increment,
+ const float* filter_ptr, float* acc_buffer_ptr) {
+ // Load the filters
+ float32x4_t filter[2];
+ for (int i = 0; i < 2; i++) {
+ filter[i] = vld1q_f32(filter_ptr + 4 * i);
+ }
+ int outp = 0;
+ // Handle 2 output pixels at a time.
+ for (; outp <= num_output_pixels - 2; outp += 2) {
+ // Load the inputs
+ float32x4_t input[4];
+ for (int i = 0; i < 4; i++) {
+ input[i] = vld1q_f32(input_ptr + 4 * i);
+ }
+ input_ptr += 16;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[4];
+ for (int i = 0; i < 4; i++) {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ acc[0] = vmlaq_f32(acc[0], input[0], filter[0]);
+ acc[1] = vmlaq_f32(acc[1], input[1], filter[1]);
+ acc[2] = vmlaq_f32(acc[2], input[2], filter[0]);
+ acc[3] = vmlaq_f32(acc[3], input[3], filter[1]);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++) {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle one output pixel at a time.
+ for (; outp < num_output_pixels; outp++) {
+ // Load the inputs
+ float32x4_t input[2];
+ for (int i = 0; i < 2; i++) {
+ input[i] = vld1q_f32(input_ptr + 4 * i);
+ }
+ input_ptr += 8;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[2];
+ for (int i = 0; i < 2; i++) {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++) {
+ acc[i] = vmlaq_f32(acc[i], input[i], filter[i]);
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++) {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ }
+ }
+};
+
+template <>
+struct FloatDepthwiseConvKernel<false, 2, 1> {
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float* input_ptr, int input_ptr_increment,
+ const float* filter_ptr, float* acc_buffer_ptr) {
+ const float32x2_t filters = vld1_f32(filter_ptr);
+ const float32x4_t filters_dup2 = vcombine_f32(filters, filters);
+ int outp = 0;
+ // Handle 8 output pixels at a time.
+ for (; outp <= num_output_pixels - 8; outp += 8) {
+ // Load the inputs
+ float32x4_t input[4];
+ for (int i = 0; i < 4; i++) {
+ input[i] = vld1q_f32(input_ptr + 4 * i);
+ }
+ input_ptr += 16;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[4];
+ for (int i = 0; i < 4; i++) {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ for (int i = 0; i < 4; i++) {
+ acc[i] = vmlaq_f32(acc[i], input[i], filters_dup2);
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++) {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle 4 output pixels at a time.
+ for (; outp <= num_output_pixels - 4; outp += 4) {
+ // Load the inputs
+ float32x4_t input[2];
+ for (int i = 0; i < 2; i++) {
+ input[i] = vld1q_f32(input_ptr + 4 * i);
+ }
+ input_ptr += 8;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[2];
+ for (int i = 0; i < 2; i++) {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++) {
+ acc[i] = vmlaq_f32(acc[i], input[i], filters_dup2);
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++) {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ }
+ // Handle 2 output pixels at a time.
+ for (; outp <= num_output_pixels - 2; outp += 2) {
+ // Load the inputs
+ const float32x4_t input = vld1q_f32(input_ptr);
+ input_ptr += 4;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc = vld1q_f32(acc_buffer_ptr);
+ // Multiply-accumulate
+ acc = vmlaq_f32(acc, input, filters_dup2);
+ // Store the accumulators back to acc_buffer
+ vst1q_f32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ }
+ // Handle 1 output pixel at a time
+ for (; outp < num_output_pixels; outp++) {
+ // Load the inputs
+ const float32x2_t input = vld1_f32(input_ptr);
+ input_ptr += 2;
+ // Load the accumulators from acc_buffer
+ float32x2_t acc = vld1_f32(acc_buffer_ptr);
+ // Multiply-accumulate
+ acc = vmla_f32(acc, input, filters);
+ // Store the accumulators back to acc_buffer
+ vst1_f32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 2;
+ }
+ }
+};
+
+template <>
+struct FloatDepthwiseConvKernel<true, 0, 1> {
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float* input_ptr, int input_ptr_increment,
+ const float* filter_ptr, float* acc_buffer_ptr) {
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++) {
+ const float* local_filter_ptr = filter_ptr;
+ const float* local_input_ptr = input_ptr;
+ int ic = 0;
+ // Handle 16 input channels at a time.
+ for (; ic <= input_depth - 16; ic += 16) {
+ // Load the filters
+ float32x4_t filter_0 = vld1q_f32(local_filter_ptr + 4 * 0);
+ float32x4_t filter_1 = vld1q_f32(local_filter_ptr + 4 * 1);
+ float32x4_t filter_2 = vld1q_f32(local_filter_ptr + 4 * 2);
+ float32x4_t filter_3 = vld1q_f32(local_filter_ptr + 4 * 3);
+ local_filter_ptr += 16;
+ // Load the inputs
+ float32x4_t input_0 = vld1q_f32(local_input_ptr + 4 * 0);
+ float32x4_t input_1 = vld1q_f32(local_input_ptr + 4 * 1);
+ float32x4_t input_2 = vld1q_f32(local_input_ptr + 4 * 2);
+ float32x4_t input_3 = vld1q_f32(local_input_ptr + 4 * 3);
+ local_input_ptr += 16;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc_0 = vld1q_f32(acc_buffer_ptr + 4 * 0);
+ float32x4_t acc_1 = vld1q_f32(acc_buffer_ptr + 4 * 1);
+ float32x4_t acc_2 = vld1q_f32(acc_buffer_ptr + 4 * 2);
+ float32x4_t acc_3 = vld1q_f32(acc_buffer_ptr + 4 * 3);
+ // Multiply-accumulate
+ acc_0 = vmlaq_f32(acc_0, input_0, filter_0);
+ acc_1 = vmlaq_f32(acc_1, input_1, filter_1);
+ acc_2 = vmlaq_f32(acc_2, input_2, filter_2);
+ acc_3 = vmlaq_f32(acc_3, input_3, filter_3);
+ // Store the accumulators back to acc_buffer
+ vst1q_f32(acc_buffer_ptr + 4 * 0, acc_0);
+ vst1q_f32(acc_buffer_ptr + 4 * 1, acc_1);
+ vst1q_f32(acc_buffer_ptr + 4 * 2, acc_2);
+ vst1q_f32(acc_buffer_ptr + 4 * 3, acc_3);
+ acc_buffer_ptr += 16;
+ }
+ // Handle 4 input channels at a time.
+ for (; ic <= input_depth - 4; ic += 4) {
+ // Load the filters
+ float32x4_t filter;
+ filter = vld1q_f32(local_filter_ptr);
+ local_filter_ptr += 4;
+ // Load the inputs
+ float32x4_t input;
+ input = vld1q_f32(local_input_ptr);
+ local_input_ptr += 4;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc;
+ acc = vld1q_f32(acc_buffer_ptr);
+ // Multiply-accumulate
+ acc = vmlaq_f32(acc, input, filter);
+ // Store the accumulators back to acc_buffer
+ vst1q_f32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ }
+ // Handle one input channel at a time.
+ for (; ic < input_depth; ic++) {
+ const float input_val = *local_input_ptr++;
+ const float filter_val = *local_filter_ptr++;
+ *acc_buffer_ptr++ += filter_val * input_val;
+ }
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <>
+struct FloatDepthwiseConvKernel<true, 0, 8> {
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float* input_ptr, int input_ptr_increment,
+ const float* filter_ptr, float* acc_buffer_ptr) {
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++) {
+ const float* local_filter_ptr = filter_ptr;
+ const float* local_input_ptr = input_ptr;
+ int ic = 0;
+ // Handle 2 input channels at a time.
+ for (; ic <= input_depth - 2; ic += 2) {
+ // Load the filters
+ float32x4_t filter[4];
+ for (int i = 0; i < 4; i++) {
+ filter[i] = vld1q_f32(local_filter_ptr + 4 * i);
+ }
+ local_filter_ptr += 16;
+ // Load the inputs
+ const float32x2_t input = vld1_f32(local_input_ptr);
+ local_input_ptr += 2;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[4];
+ for (int i = 0; i < 4; i++) {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ acc[0] = vmlaq_lane_f32(acc[0], filter[0], input, 0);
+ acc[1] = vmlaq_lane_f32(acc[1], filter[1], input, 0);
+ acc[2] = vmlaq_lane_f32(acc[2], filter[2], input, 1);
+ acc[3] = vmlaq_lane_f32(acc[3], filter[3], input, 1);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++) {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle one input channel at a time.
+ for (; ic < input_depth; ic++) {
+ // Load the filters
+ float32x4_t filter[2];
+ for (int i = 0; i < 2; i++) {
+ filter[i] = vld1q_f32(local_filter_ptr + 4 * i);
+ }
+ local_filter_ptr += 8;
+ // Load the inputs
+ const float input_val = *local_input_ptr++;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[2];
+ for (int i = 0; i < 2; i++) {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++) {
+ acc[i] = vmlaq_n_f32(acc[i], filter[i], input_val);
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++) {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ }
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+// Note this implementation is very slow for input_depths < 8
+// (e.g. comparable to reference implementation) see, specializations for
+// input_depth=3 below.
+template <>
+struct FloatDepthwiseConvKernel<true, 0, 2> {
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float* input_ptr, int input_ptr_increment,
+ const float* filter_ptr, float* acc_buffer_ptr) {
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++) {
+ const float* local_filter_ptr = filter_ptr;
+ const float* local_input_ptr = input_ptr;
+ int ic = 0;
+ // Handle 8 input channels at a time.
+ for (; ic <= input_depth - 8; ic += 8) {
+ // Load the filters
+ float32x4_t filter[4];
+ for (int i = 0; i < 4; i++) {
+ filter[i] = vld1q_f32(local_filter_ptr + 4 * i);
+ }
+ local_filter_ptr += 16;
+ // Load the inputs
+ float32x4x2_t input_dup2[2];
+ for (int i = 0; i < 2; i++) {
+ const float32x4_t input = vld1q_f32(local_input_ptr + 4 * i);
+ input_dup2[i] = vzipq_f32(input, input);
+ }
+ local_input_ptr += 8;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[4];
+ for (int i = 0; i < 4; i++) {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ acc[0] = vmlaq_f32(acc[0], filter[0], input_dup2[0].val[0]);
+ acc[1] = vmlaq_f32(acc[1], filter[1], input_dup2[0].val[1]);
+ acc[2] = vmlaq_f32(acc[2], filter[2], input_dup2[1].val[0]);
+ acc[3] = vmlaq_f32(acc[3], filter[3], input_dup2[1].val[1]);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++) {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle 4 input channels at a time.
+ for (; ic <= input_depth - 4; ic += 4) {
+ // Load the filters
+ float32x2_t filter[4];
+ for (int i = 0; i < 4; i++) {
+ filter[i] = vld1_f32(local_filter_ptr + 2 * i);
+ }
+ local_filter_ptr += 8;
+ // Load the inputs
+ const float32x4_t input = vld1q_f32(local_input_ptr);
+ local_input_ptr += 4;
+ // Load the accumulators from acc_buffer
+ float32x2_t acc[4];
+ for (int i = 0; i < 4; i++) {
+ acc[i] = vld1_f32(acc_buffer_ptr + 2 * i);
+ }
+ // Multiply-accumulate
+ acc[0] = vmla_lane_f32(acc[0], filter[0], vget_low_f32(input), 0);
+ acc[1] = vmla_lane_f32(acc[1], filter[1], vget_low_f32(input), 1);
+ acc[2] = vmla_lane_f32(acc[2], filter[2], vget_high_f32(input), 0);
+ acc[3] = vmla_lane_f32(acc[3], filter[3], vget_high_f32(input), 1);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++) {
+ vst1_f32(acc_buffer_ptr + 2 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ }
+ // Handle 2 input channels at a time.
+ for (; ic <= input_depth - 2; ic += 2) {
+ // Load the filters
+ const float32x4_t filter = vld1q_f32(local_filter_ptr);
+ local_filter_ptr += 4;
+ // Load the inputs
+ const float32x2_t input = vld1_f32(local_input_ptr);
+ local_input_ptr += 2;
+ // Load the accumulators from acc_buffer
+ float32x2_t acc[2];
+ for (int i = 0; i < 2; i++) {
+ acc[i] = vld1_f32(acc_buffer_ptr + 2 * i);
+ }
+ // Multiply-accumulate
+ acc[0] = vmla_lane_f32(acc[0], vget_low_f32(filter), input, 0);
+ acc[1] = vmla_lane_f32(acc[1], vget_high_f32(filter), input, 1);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++) {
+ vst1_f32(acc_buffer_ptr + 2 * i, acc[i]);
+ }
+ acc_buffer_ptr += 4;
+ }
+ // Handle one input channel at a time.
+ for (; ic < input_depth; ic++) {
+ // Load the inputs
+ const float input_val = *local_input_ptr++;
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++) {
+ acc_buffer_ptr[i] += local_filter_ptr[i] * input_val;
+ }
+ local_filter_ptr += 2;
+ acc_buffer_ptr += 2;
+ }
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <>
+struct FloatDepthwiseConvKernel<true, 3, 2> {
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float* input_ptr, int input_ptr_increment,
+ const float* filter_ptr, float* acc_buffer_ptr) {
+ // Load the filters
+ float32x2_t filter[3];
+ for (int i = 0; i < 3; i++) {
+ filter[i] = vld1_f32(filter_ptr + 2 * i);
+ }
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++) {
+ const float32x2_t input01 = vld1_f32(input_ptr);
+ const float32x2_t input2 = vld1_dup_f32(input_ptr + 2);
+ // Load the accumulators from acc_buffer
+ float32x2_t acc[3];
+ for (int i = 0; i < 3; i++) {
+ acc[i] = vld1_f32(acc_buffer_ptr + 2 * i);
+ }
+ // Multiply-accumulate for each input channel there 2 outputs
+ acc[0] = vmla_lane_f32(acc[0], filter[0], input01, 0);
+ acc[1] = vmla_lane_f32(acc[1], filter[1], input01, 1);
+ acc[2] = vmla_lane_f32(acc[2], filter[2], input2, 0);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 3; i++) {
+ vst1_f32(acc_buffer_ptr + 2 * i, acc[i]);
+ }
+ acc_buffer_ptr += 6;
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <>
+struct FloatDepthwiseConvKernel<true, 3, 4> {
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float* input_ptr, int input_ptr_increment,
+ const float* filter_ptr, float* acc_buffer_ptr) {
+ // Load the filters
+ float32x4_t filter[3];
+ for (int i = 0; i < 3; i++) {
+ filter[i] = vld1q_f32(filter_ptr + 4 * i);
+ }
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++) {
+ // NOTE: we only want 3 values, so we read it as two ops where
+ // the second op just duplicates the lane
+ const float32x2_t input01 = vld1_f32(input_ptr);
+ const float32x2_t input2 = vld1_dup_f32(input_ptr + 2);
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[3];
+ for (int i = 0; i < 3; i++) {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate all outputs.
+ acc[0] = vmlaq_lane_f32(acc[0], filter[0], input01, 0);
+ acc[1] = vmlaq_lane_f32(acc[1], filter[1], input01, 1);
+ acc[2] = vmlaq_lane_f32(acc[2], filter[2], input2, 0);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 3; i++) {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 12;
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <>
+struct FloatDepthwiseConvKernel<true, 1, 8> {
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float* input_ptr, int input_ptr_increment,
+ const float* filter_ptr, float* acc_buffer_ptr) {
+ // Load the filters
+ float32x4_t filter[2];
+ for (int i = 0; i < 2; i++) {
+ filter[i] = vld1q_f32(filter_ptr + 4 * i);
+ }
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++) {
+ // Load the inputs
+ const float input_val = *input_ptr;
+ input_ptr += input_ptr_increment;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[2];
+ for (int i = 0; i < 2; i++) {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++) {
+ acc[i] = vmlaq_n_f32(acc[i], filter[i], input_val);
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++) {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ }
+ }
+};
+
+template <>
+struct FloatDepthwiseConvKernel<true, 1, 32> {
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float* input_ptr, int input_ptr_increment,
+ const float* filter_ptr, float* acc_buffer_ptr) {
+ // Load the filters
+ float32x4_t filter_0 = vld1q_f32(filter_ptr + 4 * 0);
+ float32x4_t filter_1 = vld1q_f32(filter_ptr + 4 * 1);
+ float32x4_t filter_2 = vld1q_f32(filter_ptr + 4 * 2);
+ float32x4_t filter_3 = vld1q_f32(filter_ptr + 4 * 3);
+ float32x4_t filter_4 = vld1q_f32(filter_ptr + 4 * 4);
+ float32x4_t filter_5 = vld1q_f32(filter_ptr + 4 * 5);
+ float32x4_t filter_6 = vld1q_f32(filter_ptr + 4 * 6);
+ float32x4_t filter_7 = vld1q_f32(filter_ptr + 4 * 7);
+
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++) {
+ // Load the inputs
+ const float input_val = *input_ptr;
+ input_ptr += input_ptr_increment;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc_0 = vld1q_f32(acc_buffer_ptr + 4 * 0);
+ float32x4_t acc_1 = vld1q_f32(acc_buffer_ptr + 4 * 1);
+ float32x4_t acc_2 = vld1q_f32(acc_buffer_ptr + 4 * 2);
+ float32x4_t acc_3 = vld1q_f32(acc_buffer_ptr + 4 * 3);
+ float32x4_t acc_4 = vld1q_f32(acc_buffer_ptr + 4 * 4);
+ float32x4_t acc_5 = vld1q_f32(acc_buffer_ptr + 4 * 5);
+ float32x4_t acc_6 = vld1q_f32(acc_buffer_ptr + 4 * 6);
+ float32x4_t acc_7 = vld1q_f32(acc_buffer_ptr + 4 * 7);
+ // Multiply-accumulate
+ acc_0 = vmlaq_n_f32(acc_0, filter_0, input_val);
+ acc_1 = vmlaq_n_f32(acc_1, filter_1, input_val);
+ acc_2 = vmlaq_n_f32(acc_2, filter_2, input_val);
+ acc_3 = vmlaq_n_f32(acc_3, filter_3, input_val);
+ acc_4 = vmlaq_n_f32(acc_4, filter_4, input_val);
+ acc_5 = vmlaq_n_f32(acc_5, filter_5, input_val);
+ acc_6 = vmlaq_n_f32(acc_6, filter_6, input_val);
+ acc_7 = vmlaq_n_f32(acc_7, filter_7, input_val);
+ // Store the accumulators back to acc_buffer
+ vst1q_f32(acc_buffer_ptr + 4 * 0, acc_0);
+ vst1q_f32(acc_buffer_ptr + 4 * 1, acc_1);
+ vst1q_f32(acc_buffer_ptr + 4 * 2, acc_2);
+ vst1q_f32(acc_buffer_ptr + 4 * 3, acc_3);
+ vst1q_f32(acc_buffer_ptr + 4 * 4, acc_4);
+ vst1q_f32(acc_buffer_ptr + 4 * 5, acc_5);
+ vst1q_f32(acc_buffer_ptr + 4 * 6, acc_6);
+ vst1q_f32(acc_buffer_ptr + 4 * 7, acc_7);
+ acc_buffer_ptr += 32;
+ }
+ }
+};
+
+template <>
+struct FloatDepthwiseConvKernel<true, 1, 20> {
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float* input_ptr, int input_ptr_increment,
+ const float* filter_ptr, float* acc_buffer_ptr) {
+ // Load the filters
+ float32x4_t filter_0 = vld1q_f32(filter_ptr + 4 * 0);
+ float32x4_t filter_1 = vld1q_f32(filter_ptr + 4 * 1);
+ float32x4_t filter_2 = vld1q_f32(filter_ptr + 4 * 2);
+ float32x4_t filter_3 = vld1q_f32(filter_ptr + 4 * 3);
+ float32x4_t filter_4 = vld1q_f32(filter_ptr + 4 * 4);
+
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++) {
+ // Load the inputs
+ const float input_val = *input_ptr;
+ input_ptr += input_ptr_increment;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc_0 = vld1q_f32(acc_buffer_ptr + 4 * 0);
+ float32x4_t acc_1 = vld1q_f32(acc_buffer_ptr + 4 * 1);
+ float32x4_t acc_2 = vld1q_f32(acc_buffer_ptr + 4 * 2);
+ float32x4_t acc_3 = vld1q_f32(acc_buffer_ptr + 4 * 3);
+ float32x4_t acc_4 = vld1q_f32(acc_buffer_ptr + 4 * 4);
+ // Multiply-accumulate
+ acc_0 = vmlaq_n_f32(acc_0, filter_0, input_val);
+ acc_1 = vmlaq_n_f32(acc_1, filter_1, input_val);
+ acc_2 = vmlaq_n_f32(acc_2, filter_2, input_val);
+ acc_3 = vmlaq_n_f32(acc_3, filter_3, input_val);
+ acc_4 = vmlaq_n_f32(acc_4, filter_4, input_val);
+ // Store the accumulators back to acc_buffer
+ vst1q_f32(acc_buffer_ptr + 4 * 0, acc_0);
+ vst1q_f32(acc_buffer_ptr + 4 * 1, acc_1);
+ vst1q_f32(acc_buffer_ptr + 4 * 2, acc_2);
+ vst1q_f32(acc_buffer_ptr + 4 * 3, acc_3);
+ vst1q_f32(acc_buffer_ptr + 4 * 4, acc_4);
+ acc_buffer_ptr += 20;
+ }
+ }
+};
+
+template <>
+struct FloatDepthwiseConvKernel<true, 0, 16> {
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float* input_ptr, int input_ptr_increment,
+ const float* filter_ptr, float* acc_buffer_ptr) {
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++) {
+ const float* local_filter_ptr = filter_ptr;
+ const float* local_input_ptr = input_ptr;
+ for (int ic = 0; ic < input_depth; ic++) {
+ // Load the filters
+ float32x4_t filter[4];
+ for (int i = 0; i < 4; i++) {
+ filter[i] = vld1q_f32(local_filter_ptr + 4 * i);
+ }
+ local_filter_ptr += 16;
+ // Load the inputs
+ const float input_val = *local_input_ptr++;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[4];
+ for (int i = 0; i < 4; i++) {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ for (int i = 0; i < 4; i++) {
+ acc[i] = vmlaq_n_f32(acc[i], filter[i], input_val);
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++) {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <>
+struct FloatDepthwiseConvKernel<true, 8, 1> {
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float* input_ptr, int input_ptr_increment,
+ const float* filter_ptr, float* acc_buffer_ptr) {
+ // Load the filters
+ float32x4_t filter[2];
+ for (int i = 0; i < 2; i++) {
+ filter[i] = vld1q_f32(filter_ptr + 4 * i);
+ }
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++) {
+ // Load the inputs
+ float32x4_t input[2];
+ for (int i = 0; i < 2; i++) {
+ input[i] = vld1q_f32(input_ptr + 4 * i);
+ }
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[2];
+ for (int i = 0; i < 2; i++) {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++) {
+ acc[i] = vmlaq_f32(acc[i], input[i], filter[i]);
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++) {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <>
+struct FloatDepthwiseConvKernel<true, 2, 1> {
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float* input_ptr, int input_ptr_increment,
+ const float* filter_ptr, float* acc_buffer_ptr) {
+ float32x2_t filter = vld1_f32(filter_ptr);
+ float32x4_t filter_x4 = vcombine_f32(filter, filter);
+ int outp = 0;
+
+ // Handle two output pixels at a time.
+ for (; outp <= num_output_pixels - 2; outp += 2) {
+ // Load the inputs
+ float32x2_t input_1 = vld1_f32(input_ptr);
+ input_ptr += input_ptr_increment;
+ float32x2_t input_2 = vld1_f32(input_ptr);
+ input_ptr += input_ptr_increment;
+ float32x4_t input = vcombine_f32(input_1, input_2);
+
+ // Load the accumulators from acc_buffer
+ float32x4_t acc = vld1q_f32(acc_buffer_ptr);
+
+ // Multiply-accumulate
+ acc = vmlaq_f32(acc, input, filter_x4);
+
+ // Store the accumulators back to acc_buffer
+ vst1q_f32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ }
+ // Handle one output pixel at a time.
+ for (; outp < num_output_pixels; outp++) {
+ // Load the inputs
+ float32x2_t input = vld1_f32(input_ptr);
+ input_ptr += input_ptr_increment;
+
+ // Load the accumulators from acc_buffer
+ float32x2_t acc = vld1_f32(acc_buffer_ptr);
+
+ // Multiply-accumulate
+ acc = vmla_f32(acc, input, filter);
+
+ // Store the accumulators back to acc_buffer
+ vst1_f32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 2;
+ }
+ }
+};
+
+template <>
+struct FloatDepthwiseConvKernel<true, 4, 1> {
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float* input_ptr, int input_ptr_increment,
+ const float* filter_ptr, float* acc_buffer_ptr) {
+ float32x4_t filter = vld1q_f32(filter_ptr);
+
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++) {
+ // Load the inputs
+ float32x4_t input = vld1q_f32(input_ptr);
+ // Load the accumulators from acc_buffer
+ float32x4_t acc = vld1q_f32(acc_buffer_ptr);
+ // Multiply-accumulate
+ acc = vmlaq_f32(acc, input, filter);
+ // Store the accumulators back to acc_buffer
+ vst1q_f32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+#endif
+
+// Accumulates the effect of one row of the filter, on a segment of one row
+// of the output, accessing the corresponding one row of the input.
+template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
+void FloatDepthwiseConvAccumRow(int stride, int dilation_factor,
+ int input_depth, int input_width,
+ const float* input_data, int pad_width,
+ int depth_multiplier, int filter_width,
+ const float* filter_data,
+ int out_x_buffer_start, int out_x_buffer_end,
+ int output_depth, float* acc_buffer) {
+ // Sanity check parameters. This is important in particular to ensure
+ // that we keep the number of template instantiations minimal, so we don't
+ // increase binary size unnecessarily.
+ static_assert(kFixedDepthMultiplier || !kFixedInputDepth, "");
+ static_assert(kFixedInputDepth || kAllowStrided, "");
+ TFLITE_DCHECK(stride == 1 || kAllowStrided);
+ if (kFixedInputDepth) {
+ TFLITE_DCHECK_EQ(input_depth, kFixedInputDepth);
+ }
+ if (kFixedDepthMultiplier) {
+ TFLITE_DCHECK_EQ(depth_multiplier, kFixedDepthMultiplier);
+ }
+ TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
+ const int input_ptr_increment = stride * input_depth;
+ const float* filter_base_ptr = filter_data;
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
+ // For the current (filter_x, filter_y) point in the filter,
+ // compute the boundaries of the corresponding output row segment.
+ int out_x_loop_start_unclampled = 0;
+ int out_x_loop_end_unclampled = 0;
+ if (kAllowStrided) {
+ if (stride == 2) {
+ out_x_loop_start_unclampled =
+ (pad_width - dilation_factor * filter_x + 1) / 2;
+ out_x_loop_end_unclampled =
+ (pad_width + input_width - dilation_factor * filter_x + 1) / 2;
+ } else if (stride == 4) {
+ out_x_loop_start_unclampled =
+ (pad_width - dilation_factor * filter_x + 3) / 4;
+ out_x_loop_end_unclampled =
+ (pad_width + input_width - dilation_factor * filter_x + 3) / 4;
+ } else {
+ out_x_loop_start_unclampled =
+ (pad_width - dilation_factor * filter_x + stride - 1) / stride;
+ out_x_loop_end_unclampled = (pad_width + input_width -
+ dilation_factor * filter_x + stride - 1) /
+ stride;
+ }
+ } else {
+ out_x_loop_start_unclampled = pad_width - dilation_factor * filter_x;
+ out_x_loop_end_unclampled =
+ pad_width + input_width - dilation_factor * filter_x;
+ }
+ // The kernel will have to iterate on the segment of the
+ // output row that starts at out_x_loop_start and out_x_loop_end.
+ const int out_x_loop_start =
+ std::max(out_x_buffer_start, out_x_loop_start_unclampled);
+ const int out_x_loop_end =
+ std::min(out_x_buffer_end, out_x_loop_end_unclampled);
+
+ float* acc_buffer_ptr =
+ acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
+ const int in_x_origin =
+ (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x;
+ const float* input_ptr = input_data + in_x_origin * input_depth;
+ const int num_output_pixels = out_x_loop_end - out_x_loop_start;
+ FloatDepthwiseConvKernel<kAllowStrided, kFixedInputDepth,
+ kFixedDepthMultiplier>::Run(num_output_pixels,
+ input_depth,
+ depth_multiplier,
+ input_ptr,
+ input_ptr_increment,
+ filter_base_ptr,
+ acc_buffer_ptr);
+ filter_base_ptr += output_depth;
+ }
+}
+
+// generic fallback of FloatDepthwiseConvAccumRow, portable, non-templatized.
+inline void FloatDepthwiseConvAccumRowGeneric(
+ int stride, int dilation_factor, int input_depth, int input_width,
+ const float* input_data, int pad_width, int depth_multiplier,
+ int filter_width, const float* filter_data, int out_x_buffer_start,
+ int out_x_buffer_end, int output_depth, float* acc_buffer) {
+ const float* filter_base_ptr = filter_data;
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
+ const int out_x_loop_start = std::max(
+ out_x_buffer_start,
+ (pad_width - dilation_factor * filter_x + stride - 1) / stride);
+ const int out_x_loop_end = std::min(
+ out_x_buffer_end,
+ (pad_width + input_width - dilation_factor * filter_x + stride - 1) /
+ stride);
+
+ float* acc_buffer_ptr =
+ acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
+ const int in_x_origin =
+ (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x;
+ const float* input_ptr = input_data + in_x_origin * input_depth;
+ const int input_ptr_increment = (stride - 1) * input_depth;
+ for (int out_x = out_x_loop_start; out_x < out_x_loop_end; out_x++) {
+ const float* filter_ptr = filter_base_ptr;
+ for (int ic = 0; ic < input_depth; ++ic) {
+ const float input_val = *input_ptr++;
+ for (int m = 0; m < depth_multiplier; m++) {
+ const float filter_val = *filter_ptr++;
+ *acc_buffer_ptr++ += filter_val * input_val;
+ }
+ }
+ input_ptr += input_ptr_increment;
+ }
+ filter_base_ptr += output_depth;
+ }
+}
+
+// Initializes the accumulator buffer with zeros values.
+inline void DepthwiseConvInitAccBuffer(int num_output_pixels, int output_depth, float* acc_buffer) {
+ memset(acc_buffer, 0, sizeof(acc_buffer[0]) * output_depth * num_output_pixels);
+
+}
+
+inline void DepthwiseConv(
+ const DepthwiseParams& params, const RuntimeShape& input_shape,
+ const float* input_data, const RuntimeShape& filter_shape,
+ const float* filter_data, const RuntimeShape& output_shape,
+ float* output_data) {
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int pad_width = params.padding_values.width;
+ const int pad_height = params.padding_values.height;
+ const int depth_multiplier = params.depth_multiplier;
+ const int dilation_width_factor = params.dilation_width_factor;
+ const int dilation_height_factor = params.dilation_height_factor;
+ TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int input_depth = input_shape.Dims(3);
+ const int filter_height = filter_shape.Dims(1);
+ const int filter_width = filter_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
+ TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
+
+ static const int kAccBufferMaxSize = 4832;
+ float acc_buffer[kAccBufferMaxSize];
+ TFLITE_DCHECK_GE(kAccBufferMaxSize, output_depth);
+ const int kOutputPixelsInAccBuffer = kAccBufferMaxSize / output_depth;
+ const int kAccBufferActualSize = kOutputPixelsInAccBuffer * output_depth;
+ TFLITE_DCHECK_LE(kOutputPixelsInAccBuffer * output_depth,
+ kAccBufferActualSize);
+ TFLITE_DCHECK_LE(kAccBufferActualSize, kAccBufferMaxSize);
+ TFLITE_DCHECK_GE(kOutputPixelsInAccBuffer, 1);
+
+ // row_accum_func will point to the core accumulation function to be used
+ // for this DepthwiseConv op.
+ using row_accum_func_t = decltype(&FloatDepthwiseConvAccumRowGeneric);
+ row_accum_func_t row_accum_func = nullptr;
+
+#define TFMINI_USE_DEPTHWISECONV_KERNEL(ALLOW_STRIDED, FIXED_INPUT_DEPTH, \
+ FIXED_DEPTH_MULTIPLIER) \
+ if (!row_accum_func && (stride_width == 1 || ALLOW_STRIDED) && \
+ (input_depth == FIXED_INPUT_DEPTH || FIXED_INPUT_DEPTH == 0) && \
+ depth_multiplier == FIXED_DEPTH_MULTIPLIER) { \
+ row_accum_func = \
+ FloatDepthwiseConvAccumRow<ALLOW_STRIDED, FIXED_INPUT_DEPTH, \
+ FIXED_DEPTH_MULTIPLIER>; \
+ }
+
+#ifdef USE_NEON
+ // We go over our list of kernels by decreasing order of preference
+ // for the cases where multiple kernels could apply.
+
+ // Start with the fastest kernels: AllowStrided=false, fixed input depth.
+
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 8, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 1)
+
+ // Next come the strided kernels: AllowStrided=true, fixed input depth.
+ // They are a bit less efficient, but allow stride!=1.
+
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 8, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 8)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 20)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 32)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 2, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 3, 2)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 3, 4)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 4, 1)
+
+ // Finally, the kernels allowing a variable input depth,
+ // these are the least efficient but most general kernels.
+
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 2)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 8)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 16)
+
+#endif // USE_NEON
+
+#undef TFMINI_USE_DEPTHWISECONV_KERNEL
+
+ // No matching fast kernel found, use slow fallback.
+ if (!row_accum_func) {
+ row_accum_func = FloatDepthwiseConvAccumRowGeneric;
+ }
+
+ const int input_height_stride = input_shape.Dims(3) * input_shape.Dims(2);
+ const int input_batch_stride = input_height_stride * input_shape.Dims(1);
+ const int filter_height_stride = filter_shape.Dims(3) * filter_shape.Dims(2);
+
+ // Now that we have determined row_accum_func, we can start work.
+ float* output_ptr = output_data;
+ for (int b = 0; b < batches; ++b) {
+ for (int out_y = 0; out_y < output_height; ++out_y) {
+ const int in_y_origin = (out_y * stride_height) - pad_height;
+ const int filter_y_start =
+ std::max(0, (-in_y_origin + dilation_height_factor - 1) /
+ dilation_height_factor);
+ const int filter_y_end =
+ std::min(filter_height,
+ (input_height - in_y_origin + dilation_height_factor - 1) /
+ dilation_height_factor);
+ for (int out_x_buffer_start = 0; out_x_buffer_start < output_width;
+ out_x_buffer_start += kOutputPixelsInAccBuffer) {
+ const int out_x_buffer_end = std::min(
+ output_width, out_x_buffer_start + kOutputPixelsInAccBuffer);
+ // We call a 'pixel' a group of activation that share all but the
+ // 'depth'/'channel' coordinate. num_output_pixels is the number of
+ // output pixels that we will accumulate in this loop iteration.
+ const int num_output_pixels = out_x_buffer_end - out_x_buffer_start;
+ // Initialize our local accumulator with the bias values, so we don't
+ // have to add them later.
+ DepthwiseConvInitAccBuffer(num_output_pixels, output_depth, acc_buffer);
+ // Accumulation loop. Most of the time should be spent in here.
+ for (int filter_y = filter_y_start; filter_y < filter_y_end;
+ ++filter_y) {
+ const int in_y = in_y_origin + dilation_height_factor * filter_y;
+ row_accum_func(
+ stride_width, dilation_width_factor, input_depth, input_width,
+ input_data + in_y * input_height_stride + b * input_batch_stride,
+ pad_width, depth_multiplier, filter_width,
+ filter_data + filter_y * filter_height_stride, out_x_buffer_start,
+ out_x_buffer_end, output_depth, acc_buffer);
+ }
+ // Finished accumulating. Now store to destination.
+ const int num_output_values = output_depth * num_output_pixels;
+ int i = 0;
+// TODO(benoitjacob) optimized code goes here
+#ifdef USE_NEON
+ // Handle 16 values at a time
+ for (; i <= num_output_values - 16; i += 16) {
+ float32x4_t acc[4];
+ for (int k = 0; k < 4; k++) {
+ acc[k] = vld1q_f32(acc_buffer + i + 4 * k);
+ }
+ for (int k = 0; k < 4; k++) {
+ vst1q_f32(output_ptr + 4 * k, acc[k]);
+ }
+ output_ptr += 16;
+ }
+ // Handle 4 values at a time
+ for (; i <= num_output_values - 4; i += 4) {
+ float32x4_t acc = vld1q_f32(acc_buffer + i);
+
+ vst1q_f32(output_ptr, acc);
+ output_ptr += 4;
+ }
+#endif
+ // Handle leftover values, one by one. This is very slow.
+ for (; i < num_output_values; i++) {
+ float acc = acc_buffer[i];
+ *output_ptr++ = acc;
+ }
+ }
+ }
+ }
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_elementwise.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_elementwise.def
new file mode 100644
index 000000000..422b34242
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_elementwise.def
@@ -0,0 +1,273 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// TODO(ycling): Refactoring. Remove BroadcastLogical and use the more
+// generalized and efficient BroadcastBinaryFunction.
+//
+// Also appears to duplicte MinimumMaximum.
+//
+// R: Result type. T1: Input 1 type. T2: Input 2 type.
+template <typename R, typename T1, typename T2>
+inline void BroadcastBinaryFunction4DSlow(
+ const RuntimeShape& unextended_input1_shape, const T1* input1_data,
+ const RuntimeShape& unextended_input2_shape, const T2* input2_data,
+ const RuntimeShape& unextended_output_shape, R* output_data,
+ R (* func)(T1, T2)) {
+ TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+ const RuntimeShape output_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+ NdArrayDesc<4> desc1;
+ NdArrayDesc<4> desc2;
+ NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
+ unextended_input2_shape, &desc1, &desc2);
+
+ for (int b = 0; b < output_shape.Dims(0); ++b) {
+ for (int y = 0; y < output_shape.Dims(1); ++y) {
+ for (int x = 0; x < output_shape.Dims(2); ++x) {
+ for (int c = 0; c < output_shape.Dims(3); ++c) {
+ auto out_idx = Offset(output_shape, b, y, x, c);
+ auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
+ auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
+ auto in1_val = input1_data[in1_idx];
+ auto in2_val = input2_data[in2_idx];
+ output_data[out_idx] = func(in1_val, in2_val);
+ }
+ }
+ }
+ }
+}
+
+// R: Result type. T1: Input 1 type. T2: Input 2 type.
+// TODO(renjieliu): Refactor other binary functions to use this one.
+template <typename R, typename T1, typename T2>
+inline void BinaryFunction(const RuntimeShape& input1_shape,
+ const T1* input1_data,
+ const RuntimeShape& input2_shape,
+ const T2* input2_data,
+ const RuntimeShape& output_shape, R* output_data,
+ R (* func)(T1, T2)) {
+ const int flat_size =
+ MatchingFlatSize(input1_shape, input2_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i) {
+ output_data[i] = func(input1_data[i], input2_data[i]);
+ }
+}
+
+struct Sub {
+ static inline void Sub_(const float* input1_data, const float* input2_data,
+ float* output_data, const int size) {
+ int i = 0;
+#ifdef USE_NEON
+ for (; i <= size - 16; i += 16) {
+ auto a10 = vld1q_f32(input1_data + i);
+ auto a11 = vld1q_f32(input1_data + i + 4);
+ auto a12 = vld1q_f32(input1_data + i + 8);
+ auto a13 = vld1q_f32(input1_data + i + 12);
+ auto a20 = vld1q_f32(input2_data + i);
+ auto a21 = vld1q_f32(input2_data + i + 4);
+ auto a22 = vld1q_f32(input2_data + i + 8);
+ auto a23 = vld1q_f32(input2_data + i + 12);
+ auto x0 = vsubq_f32(a10, a20);
+ auto x1 = vsubq_f32(a11, a21);
+ auto x2 = vsubq_f32(a12, a22);
+ auto x3 = vsubq_f32(a13, a23);
+ vst1q_f32(output_data + i, x0);
+ vst1q_f32(output_data + i + 4, x1);
+ vst1q_f32(output_data + i + 8, x2);
+ vst1q_f32(output_data + i + 12, x3);
+ }
+ for (; i <= size - 4; i += 4) {
+ auto a1 = vld1q_f32(input1_data + i);
+ auto a2 = vld1q_f32(input2_data + i);
+ auto x = vsubq_f32(a1, a2);
+ vst1q_f32(output_data + i, x);
+ }
+#endif // NEON
+
+ for (; i < size; i++) {
+ output_data[i] = input1_data[i] - input2_data[i];
+ }
+ }
+
+ static inline void Call(
+ const float* input1_data, const RuntimeShape& in1_shape,
+ const float* input2_data, const RuntimeShape& in2_shape,
+ float* output_data, const RuntimeShape& out_shape) {
+ if (in1_shape != in2_shape) {
+ BroadcastBinaryFunction4DSlow<float, float, float>(
+ in1_shape, input1_data,
+ in2_shape, input2_data,
+ out_shape, output_data,
+ [](float a, float b) { return a - b; }
+ );
+ } else {
+ Sub_(input1_data, input2_data, output_data, out_shape.FlatSize());
+ }
+ }
+};
+
+struct Add {
+ static inline void Add_(const float* input1_data, const float* input2_data,
+ float* output_data, const int size) {
+ int i = 0;
+#ifdef USE_NEON
+ for (; i <= size - 16; i += 16) {
+ auto a10 = vld1q_f32(input1_data + i);
+ auto a11 = vld1q_f32(input1_data + i + 4);
+ auto a12 = vld1q_f32(input1_data + i + 8);
+ auto a13 = vld1q_f32(input1_data + i + 12);
+ auto a20 = vld1q_f32(input2_data + i);
+ auto a21 = vld1q_f32(input2_data + i + 4);
+ auto a22 = vld1q_f32(input2_data + i + 8);
+ auto a23 = vld1q_f32(input2_data + i + 12);
+ auto x0 = vaddq_f32(a10, a20);
+ auto x1 = vaddq_f32(a11, a21);
+ auto x2 = vaddq_f32(a12, a22);
+ auto x3 = vaddq_f32(a13, a23);
+ vst1q_f32(output_data + i, x0);
+ vst1q_f32(output_data + i + 4, x1);
+ vst1q_f32(output_data + i + 8, x2);
+ vst1q_f32(output_data + i + 12, x3);
+ }
+ for (; i <= size - 4; i += 4) {
+ auto a1 = vld1q_f32(input1_data + i);
+ auto a2 = vld1q_f32(input2_data + i);
+ auto x = vaddq_f32(a1, a2);
+ vst1q_f32(output_data + i, x);
+ }
+#endif // NEON
+
+ for (; i < size; i++) {
+ output_data[i] = input1_data[i] + input2_data[i];
+ }
+ }
+
+ static inline void Call(
+ const float* input1_data, const RuntimeShape& in1_shape,
+ const float* input2_data, const RuntimeShape& in2_shape,
+ float* output_data, const RuntimeShape& out_shape) {
+ if (in1_shape != in2_shape) {
+ BroadcastBinaryFunction4DSlow<float, float, float>(
+ in1_shape, input1_data,
+ in2_shape, input2_data,
+ out_shape, output_data,
+ [](float a, float b) { return a + b; }
+ );
+ } else {
+ Add_(input1_data, input2_data, output_data, out_shape.FlatSize());
+ }
+ }
+};
+
+struct Max {
+ static inline void Call(
+ const float* input1_data, const RuntimeShape& in1_shape,
+ const float* input2_data, const RuntimeShape& in2_shape,
+ float* output_data, const RuntimeShape& out_shape) {
+ if (in1_shape != in2_shape) {
+ BroadcastBinaryFunction4DSlow<float, float, float>(
+ in1_shape, input1_data,
+ in2_shape, input2_data,
+ out_shape, output_data,
+ [](float a, float b) { return std::max(a, b); }
+ );
+ } else {
+ auto input1 = MapAsVector(input1_data, in1_shape.FlatSize());
+ auto input2 = MapAsVector(input2_data, in2_shape.FlatSize());
+ auto output = MapAsVector(output_data, out_shape.FlatSize());
+ output = input1.cwiseMax(input2);
+ }
+ }
+};
+
+struct Mul {
+ static inline void Call(const float* input1_data, const RuntimeShape& in1_shape,
+ const float* input2_data, const RuntimeShape& in2_shape,
+ float* output_data, const RuntimeShape& out_shape) {
+ if (in1_shape != in2_shape) {
+ BroadcastBinaryFunction4DSlow<float, float, float>(
+ in1_shape, input1_data,
+ in2_shape, input2_data,
+ out_shape, output_data,
+ [](float a, float b) { return a * b; });
+ } else {
+ Mul_(input1_data, input2_data, output_data, out_shape.FlatSize());
+ }
+ }
+
+ static inline void Mul_(const float* input1_data, const float* input2_data,
+ float* output_data, const int size) {
+
+ int i = 0;
+#ifdef USE_NEON
+ for (; i <= size - 16; i += 16) {
+ auto a10 = vld1q_f32(input1_data + i);
+ auto a11 = vld1q_f32(input1_data + i + 4);
+ auto a12 = vld1q_f32(input1_data + i + 8);
+ auto a13 = vld1q_f32(input1_data + i + 12);
+ auto a20 = vld1q_f32(input2_data + i);
+ auto a21 = vld1q_f32(input2_data + i + 4);
+ auto a22 = vld1q_f32(input2_data + i + 8);
+ auto a23 = vld1q_f32(input2_data + i + 12);
+ auto x0 = vmulq_f32(a10, a20);
+ auto x1 = vmulq_f32(a11, a21);
+ auto x2 = vmulq_f32(a12, a22);
+ auto x3 = vmulq_f32(a13, a23);
+
+ vst1q_f32(output_data + i, x0);
+ vst1q_f32(output_data + i + 4, x1);
+ vst1q_f32(output_data + i + 8, x2);
+ vst1q_f32(output_data + i + 12, x3);
+ }
+ for (; i <= size - 4; i += 4) {
+ auto a1 = vld1q_f32(input1_data + i);
+ auto a2 = vld1q_f32(input2_data + i);
+ auto x = vmulq_f32(a1, a2);
+
+ vst1q_f32(output_data + i, x);
+ }
+#endif // NEON
+
+ for (; i < size; i++) {
+ output_data[i] = input1_data[i] * input2_data[i];
+ }
+ }
+};
+
+//TODO maybe move to a separate file since everything else here is extracted from TF Lite
+//23.11.2018
+struct Div {
+ static inline void Call(
+ const float* input1_data, const RuntimeShape& in1_shape,
+ const float* input2_data, const RuntimeShape& in2_shape,
+ float* output_data, const RuntimeShape& out_shape) {
+ if (in1_shape != in2_shape) {
+ BroadcastBinaryFunction4DSlow<float, float, float>(
+ in1_shape, input1_data,
+ in2_shape, input2_data,
+ out_shape, output_data,
+ [](float a, float b) { return a / b; }
+ );
+ } else {
+ auto input1 = MapAsVector(input1_data, in1_shape.FlatSize());
+ auto input2 = MapAsVector(input2_data, in2_shape.FlatSize());
+ auto output = MapAsVector(output_data, out_shape.FlatSize());
+ output = input1.cwiseQuotient(input2);
+ }
+ }
+};
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_elu.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_elu.def
new file mode 100644
index 000000000..8e4122278
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_elu.def
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+inline void ELU(const float* input_data, const Dims<4>& input_dims,
+ float alpha, float* output_data,
+ const Dims<4>& output_dims) {
+
+ const auto input = MapAsVector(input_data, input_dims);
+ auto output = MapAsVector(output_data, output_dims);
+
+ output = input.array().min(0.0f);
+ // Separate op for EXP vectorization
+ output = output.array().exp();
+ output = (output.array() - 1.0f) * alpha;
+ output += input.cwiseMax(0.0f);
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_fully_connected.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_fully_connected.def
new file mode 100644
index 000000000..22b1c3b35
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_fully_connected.def
@@ -0,0 +1,27 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+inline void FullyConnected(const float* input_data, const Dims<4>& input_dims,
+ const float* weights_data, const Dims<4>& weights_dims,
+ float* output_data, const Dims<4>& output_dims) {
+ const auto input_matrix_map =
+ MapAsMatrixWithFirstDimAsRows(input_data, input_dims);
+ const auto filter_matrix_map =
+ MapAsMatrixWithFirstDimAsRows(weights_data, weights_dims);
+ auto output_matrix_map =
+ MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
+
+ Gemm(filter_matrix_map, input_matrix_map, &output_matrix_map);
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_gather.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_gather.def
new file mode 100644
index 000000000..bb4a55f0d
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_gather.def
@@ -0,0 +1,48 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+template <typename T, typename CoordsT = int32>
+inline void Gather(const GatherParams& op_params,
+ const RuntimeShape& input_shape, const T* input_data,
+ const RuntimeShape& coords_shape, const CoordsT* coords_data,
+ const RuntimeShape& output_shape, T* output_data) {
+ int axis = op_params.axis;
+ if (axis < 0) {
+ axis += input_shape.DimensionsCount();
+ }
+ TFLITE_DCHECK_GE(axis, 0);
+ TFLITE_DCHECK_LT(axis, input_shape.DimensionsCount());
+ const int axis_size = input_shape.Dims(axis);
+ const int coords_count = coords_shape.FlatSize();
+
+ int outer_size = 1;
+ for (int i = 0; i < axis; ++i) {
+ outer_size *= input_shape.Dims(i);
+ }
+
+ int inner_size = 1;
+ for (int i = axis + 1; i < input_shape.DimensionsCount(); ++i) {
+ inner_size *= input_shape.Dims(i);
+ }
+
+ for (int outer = 0; outer < outer_size; ++outer) {
+ for (int i = 0; i < coords_count; ++i) {
+ int coord = static_cast<int>(coords_data[i]);
+ TFLITE_DCHECK_GE(coord, 0);
+ TFLITE_DCHECK_LT(coord, axis_size);
+ std::memcpy(
+ output_data + (outer * coords_count + i) * inner_size,
+ input_data + (outer * axis_size + coord) * inner_size,
+ sizeof(T) * inner_size);
+ }
+ }
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_header_types.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_header_types.def
new file mode 100644
index 000000000..771329cdd
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_header_types.def
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstring>
+#include <initializer_list>
+#include <memory>
+#include <cassert>
+#include <algorithm>
+
+using index_t = long;
+
+const index_t MAX_DIMS = 8;
+
+/** @brief Shape of Tensor object
+ *
+ * This class represents size of multidimensional table
+ */
+class Shape
+{
+public:
+
+ Shape()
+ {
+ _dims = 0;
+ }
+
+ template <class T>
+ Shape(std::initializer_list<T> data): _dims(data.size())
+ {
+ assert(_dims <= MAX_DIMS);
+ index_t *dataPtr = _data;
+ for (T value: data)
+ {
+ *dataPtr++ = static_cast<index_t>(value);
+ }
+ }
+
+ Shape(const Shape &orig): _dims(orig._dims)
+ {
+ for (index_t i = 0; i < _dims; ++i)
+ _data[i] = orig._data[i];
+ }
+
+ Shape &operator=(const Shape &orig)
+ {
+ _dims = orig._dims;
+ for (index_t i = 0; i < _dims; ++i)
+ _data[i] = orig._data[i];
+ return *this;
+ }
+
+ /** Returns number of table dimensions*/
+ index_t getDims() const
+ {
+ return _dims;
+ }
+
+ /** Sets number of table dimensions*/
+ void setDims(index_t dims)
+ {
+ assert(dims < MAX_DIMS);
+ _dims = dims;
+ }
+
+ /** Returns size of selected dimension*/
+ index_t &operator[](index_t dim)
+ {
+ assert(dim < _dims);
+ return _data[dim];
+ }
+
+ /** Returns size of selected dimension, constant version*/
+ index_t operator[](index_t dim) const
+ {
+ assert(dim < _dims);
+ return _data[dim];
+ }
+
+ /** Returns number of elements in table*/
+ index_t getNumElems() const
+ {
+ index_t volume = 1;
+ for (index_t i = 0; i < _dims; ++i)
+ {
+ volume *= _data[i];
+ }
+ return volume;
+ }
+
+private:
+ index_t _data[MAX_DIMS];
+ index_t _dims;
+};
+
+/** This class points to one cell in table*/
+using Index = Shape;
+
+/** @brief Multidimensional table
+ *
+ * This class represents multidimensional table.
+ * It is used to provide NN model interface and intermediate objects in inference sequence.
+ */
+class Tensor
+{
+public:
+ Tensor(): Tensor(Shape{}){}
+
+ Tensor(Tensor &&orig): _shape(orig._shape), _data(orig._data), _managed(orig._managed)
+ {
+ orig._managed = false;
+ }
+
+ /** Constructs table, that references external data as its content*/
+ Tensor(const Shape& shape, float *data): _shape(shape), _data(data){}
+
+ Tensor(const Shape& shape): _shape(shape), _data(new float[shape.getNumElems()]), _managed(true) {}
+
+ ~Tensor()
+ {
+ if (_managed)
+ delete [] _data;
+ }
+
+ /** Copies data from external source into table*/
+ void fillData(const float *data, const index_t num_elements)
+ {
+ assert(_managed);
+ std::memcpy(_data, data, num_elements * sizeof(float));
+ }
+
+ Tensor& operator=(const Tensor& t) {
+ if (this == &t)
+ return *this;
+
+ if (!t._managed) {
+ if (_managed)
+ delete _data;
+
+ _managed = false;
+ _data = t._data;
+ _shape = t._shape;
+ } else {
+ // this tensor is not constant so we can write data into it
+ assert(_managed);
+ reshape(t._shape);
+ fillData(t._data, _shape.getNumElems());
+ }
+
+ return *this;
+ }
+
+ /** Access element in table by index*/
+ float &at(const Index &idx)
+ {
+ return *(_data + getOffset(idx));
+ }
+
+ /** Access element in table by index, constant version*/
+ float at(const Index &idx) const
+ {
+ return *(_data + getOffset(idx));
+ }
+
+ void reshape(const Shape &shape)
+ {
+ index_t oldVolume = _shape.getNumElems();
+ _shape = shape;
+ if (_managed && oldVolume != shape.getNumElems())
+ {
+ float* new_data = new float[shape.getNumElems()];
+ delete [] _data;
+ std::swap(new_data, _data);
+ }
+ }
+
+ /** Free memory, set empty shape */
+ void clean()
+ {
+ _shape.setDims(0);
+ if (_managed)
+ delete [] _data;
+ _managed = false;
+ }
+
+ /** Returns pointer to raw data*/
+ float *getData()
+ {
+ return _data;
+ }
+
+ /** Returns pointer to raw data, constant version*/
+ const float *getData() const
+ {
+ return _data;
+ }
+
+ /** Returns size object of this table*/
+ const Shape &getShape() const
+ {
+ return _shape;
+ }
+
+private:
+ index_t getOffset(const Index &idx) const
+ {
+ assert(idx.getDims() == _shape.getDims());
+ index_t offset = 0;
+ index_t stride = 1;
+ for (index_t i = _shape.getDims() - 1; i >= 0; --i)
+ {
+ assert(idx[i] < _shape[i]);
+ offset += stride * idx[i];
+ stride *= _shape[i];
+ }
+ return offset;
+ }
+
+ Shape _shape;
+ float *_data;
+ bool _managed = false;
+};
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_leaky_relu.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_leaky_relu.def
new file mode 100644
index 000000000..7c07b3539
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_leaky_relu.def
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+void leakyRelu(Tensor& out, const char* params, const Tensor& in) {
+ const float alpha = deserializeT<float>(params);
+ out.reshape(in.getShape());
+
+ const auto input = MapAsVector(in.getData(), static_cast<size_t>(in.getShape().getNumElems()));
+ auto output = MapAsVector(out.getData(), static_cast<size_t>(in.getShape().getNumElems()));
+
+ output = (alpha * input).cwiseMax(input);
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_operations.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_operations.def
new file mode 100644
index 000000000..f78274e5c
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_operations.def
@@ -0,0 +1,656 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <string>
+#include <cstdint>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <cstring>
+
+using namespace std;
+
+#define UNUSED(x) ((void)(x))
+
+static_assert(numeric_limits<float>::is_iec559, "Unsupported float type");
+
+void readParameters(char *&data, size_t &len, const string &path,
+ uint32_t expectedVersion, uint32_t expectedHash)
+{
+ static_assert(sizeof(expectedVersion) == params::VERSION_LEN, "version length mismatch");
+ static_assert(sizeof(expectedHash) == params::HASH_LEN, "hash length mismatch");
+ int fd;
+ struct stat st;
+ fd = open(path.c_str(), O_RDONLY);
+ assert(fd != -1);
+
+ // gather file info
+ int statRes = fstat(fd, &st);
+ assert(statRes != -1);
+ UNUSED(statRes);
+ len = static_cast<size_t>(st.st_size);
+ assert(len >= params::HEADER_LEN);
+
+ // check magic correctness
+ char magic[params::MAGIC_LEN + 1] = {};
+ ssize_t magic_len = read(fd, magic, params::MAGIC_LEN);
+ assert(magic_len == params::MAGIC_LEN);
+ UNUSED(magic_len);
+ assert(strncmp(magic, params::MAGIC, params::MAGIC_LEN) == 0);
+ UNUSED(magic);
+
+ // checkversion correctness
+ decltype(expectedVersion) version;
+ ssize_t version_len = read(fd, &version, sizeof(version));
+ assert(version_len == sizeof(version));
+ UNUSED(version_len);
+ assert(version == expectedVersion);
+ UNUSED(version);
+
+ // check hash correctness
+ decltype(expectedHash) hash;
+ ssize_t hash_len = read(fd, &hash, sizeof(hash));
+ assert(hash_len == sizeof(hash));
+ UNUSED(hash_len);
+ assert(hash == expectedHash);
+ UNUSED(hash);
+
+ data = static_cast<char *>(mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0));
+ int is_error = close(fd);
+ assert(!is_error && "Can not close file!");
+ UNUSED(is_error);
+}
+
+void releaseParameters(char *data, size_t len)
+{
+ int res = munmap(data, len);
+ assert(res == 0);
+ UNUSED(res);
+}
+
+template <int rank>
+size_t volume(Dims<rank> d)
+{
+ size_t v = 1;
+ for (int i = 0; i < rank; ++i)
+ {
+ v *= d.sizes[i];
+ }
+ return v;
+}
+
+RuntimeShape shapeToRuntimeShape(const Shape& s) {
+ const int rank = static_cast<int>(s.getDims());
+ RuntimeShape sh(rank);
+ for (int i = 0; i < rank; i++) {
+ sh.SetDim(i, static_cast<int32_t>(s[i]));
+ }
+ return sh;
+}
+
+Dims<4> shapeToDims(const Shape &s)
+{
+ Dims<4> dims;
+ const int rank = static_cast<int>(s.getDims());
+ assert(rank >= 1 && rank <= 4);
+ int stride = 1;
+ for (int i = 0; i < rank; ++i)
+ {
+ dims.sizes[i] = static_cast<int>(s[rank - 1 - i]);
+ dims.strides[i] = stride;
+ stride *= s[rank - 1 - i];
+ }
+ for (int i = rank; i < 4; ++i)
+ {
+ dims.sizes[i] = 1;
+ dims.strides[i] = stride;
+ }
+ return dims;
+}
+
+template <class T>
+static inline T deserializeT(const char *&buf)
+{
+ T v;
+ const char *end = buf + sizeof(T);
+ copy(buf, end, reinterpret_cast<char *>(&v));
+ buf = end;
+ return v;
+}
+
+static inline Shape deserializeShape(const char *&buf)
+{
+ Shape s;
+ int32_t rank = deserializeT<int32_t>(buf);
+ s.setDims(rank);
+ for (int i = 0; i < rank; ++i)
+ {
+ s[i] = deserializeT<int32_t>(buf);
+ }
+ return s;
+}
+
+static inline vector<int32_t> deserializeStrides(const char *&buf)
+{
+ vector<int32_t> strides;
+ const int num_strides = deserializeT<int>(buf);
+ for (int i = 0; i < num_strides; ++i) {
+ strides.emplace_back(deserializeT<int32_t>(buf));
+ }
+ return strides;
+}
+
+__attribute__((unused))
+static bool isAddrAligned(const void *data, int alignment)
+{
+ return (reinterpret_cast<uintptr_t>(data) % alignment) == 0;
+}
+
+static inline Tensor deserializeTensor(const char*& buf)
+{
+ int32_t d_type = deserializeT<int32_t>(buf);
+ assert(d_type == 1 && "Unknown data type");
+ int32_t element_size = deserializeT<int32_t>(buf);
+ assert(element_size == 4 && "Unsupported element size");
+ Shape shape = deserializeShape(buf);
+ const float* data = reinterpret_cast<const float*>(buf);
+ assert(isAddrAligned(data, 4));
+ Tensor tensor(shape, const_cast<float*>(data));
+ buf += element_size * shape.getNumElems();
+ return tensor;
+}
+
+// This operation takes as input multiple tensors, at least 2, likely less then 7
+// parameter pack provides generalization for all possible number of inputs
+template <class ...Args>
+void concat(Tensor &out, const char *params, const Args &...inputs)
+{
+ const float *input[] = {inputs.getData()...};
+ Dims<4> input_d[] = {shapeToDims(inputs.getShape())...};
+ int axis = deserializeT<int32_t>(params);
+ Shape out_s = deserializeShape(params);
+ // because inner functions accepts axis in reverse order
+ axis = static_cast<int>(out_s.getDims()) - 1 - axis;
+ int inputs_count = sizeof(input)/sizeof(input[0]);
+
+ out.reshape(out_s);
+
+ Concatenation(axis,
+ input, input_d, inputs_count,
+ out.getData(), shapeToDims(out.getShape()));
+}
+
+void conv2d(Tensor& out, const char* params, const Tensor& input, const Tensor& kernel,
+ Tensor& temporary) {
+ const vector<int32_t> strides = deserializeStrides(params);
+ const Shape pads = deserializeShape(params);
+ const Shape out_shape = deserializeShape(params);
+ out.reshape(out_shape);
+
+ assert(strides.size() == 2);
+ const auto stride_h = static_cast<int16>(strides[0]);
+ const auto stride_w = static_cast<int16>(strides[1]);
+
+ assert(pads.getDims() == 2);
+ const auto pad_h = static_cast<int16>(pads[0]);
+ const auto pad_w = static_cast<int16>(pads[1]);
+
+ const Shape& kernel_shape = kernel.getShape();
+ const Shape im2col_shape{out_shape[0], out_shape[1], out_shape[2],
+ kernel_shape[1] * kernel_shape[2] * kernel_shape[3]};
+
+ float* im2col_data = nullptr;
+ if (stride_w != 1 || stride_h != 1 || kernel_shape[1] != 1 || kernel_shape[2] != 1) {
+ im2col_data = temporary.getData();
+ }
+
+ const ConvParams conv_params{{pad_w, pad_h}, stride_w, stride_h};
+ Conv(conv_params,
+ shapeToRuntimeShape(input.getShape()), input.getData(),
+ shapeToRuntimeShape(kernel_shape), kernel.getData(),
+ shapeToRuntimeShape(out_shape), out.getData(),
+ shapeToRuntimeShape(im2col_shape), im2col_data);
+}
+
+void convTransposed2d(Tensor& out, const char* params, const Tensor& input, const Tensor& kernel,
+ Tensor& temporary) {
+ const vector<int32_t> strides = deserializeStrides(params);
+ const Shape pads = deserializeShape(params);
+ const Shape out_shape = deserializeShape(params);
+ out.reshape(out_shape);
+
+ assert(strides.size() == 2);
+ const auto stride_h = static_cast<int16>(strides[0]);
+ const auto stride_w = static_cast<int16>(strides[1]);
+
+ assert(pads.getDims() == 2);
+ const auto pad_h = static_cast<int16>(pads[0]);
+ const auto pad_w = static_cast<int16>(pads[1]);
+
+ const RuntimeShape input_rt_shape = shapeToRuntimeShape(input.getShape());
+ const RuntimeShape out_rt_shape = shapeToRuntimeShape(out_shape);
+
+ // Transpose the kernel from HWOI to OHWI format.
+ const Shape& kernel_shape = kernel.getShape();
+ const RuntimeShape kernel_rt_shape = {static_cast<int>(kernel_shape[2]),
+ static_cast<int>(kernel_shape[0]),
+ static_cast<int>(kernel_shape[1]),
+ static_cast<int>(kernel_shape[3])};
+ unique_ptr<float[]> kernel_data(new float[kernel_rt_shape.FlatSize()]);
+ TransposeParams transpose_params{4, {2, 0, 1, 3}};
+ Transpose(transpose_params,
+ shapeToRuntimeShape(kernel_shape), kernel.getData(),
+ kernel_rt_shape, kernel_data.get());
+
+ const int32 kernel_height = kernel_rt_shape.Dims(1);
+ const int32 kernel_width = kernel_rt_shape.Dims(2);
+
+ const RuntimeShape im2col_shape{out_rt_shape.Dims(0),
+ out_rt_shape.Dims(1),
+ out_rt_shape.Dims(2),
+ input_rt_shape.Dims(3) * kernel_width * kernel_height};
+
+ ConvParams conv_params{{pad_w, pad_h}, stride_w, stride_h};
+
+ TransposeConv(conv_params,
+ input_rt_shape, input.getData(),
+ kernel_rt_shape, kernel_data.get(),
+ out_rt_shape, out.getData(),
+ im2col_shape, temporary.getData());
+}
+
+void depthwiseConv2d(Tensor& out, const char* params, const Tensor& input, const Tensor& kernel) {
+ const vector<int32_t> strides = deserializeStrides(params);
+ const Shape pads = deserializeShape(params);
+ const Shape out_shape = deserializeShape(params);
+ out.reshape(out_shape);
+
+ assert(strides.size() == 2);
+ const auto stride_h = static_cast<int16>(strides[0]);
+ const auto stride_w = static_cast<int16>(strides[1]);
+
+ assert(pads.getDims() == 2);
+ const auto pad_h = static_cast<int16>(pads[0]);
+ const auto pad_w = static_cast<int16>(pads[1]);
+
+ const RuntimeShape input_dims = shapeToRuntimeShape(input.getShape());
+ const RuntimeShape kernel_dims = shapeToRuntimeShape(kernel.getShape());
+ const RuntimeShape out_dims = shapeToRuntimeShape(out_shape);
+
+ const auto depth_multiplier = static_cast<int16>(out_dims.Dims(3) / input_dims.Dims(3));
+ assert(out_dims.Dims(3) % input_dims.Dims(3) == 0);
+
+ // Reshape kernel -- squash zeroth and first dimensions.
+ const int output_channels = kernel_dims.Dims(3) * kernel_dims.Dims(2);
+ assert(output_channels == out_dims.Dims(3));
+ const int kernel_w = kernel_dims.Dims(1);
+ const int kernel_h = kernel_dims.Dims(0);
+ const RuntimeShape kernel_rt_shape = {1, kernel_h, kernel_w, output_channels};
+
+ const DepthwiseParams depthwise_conv_params = {{pad_w, pad_h}, stride_w,
+ stride_h, 1, 1,
+ depth_multiplier};
+
+ // TODO Fusing bias into depthwise conv is close to a no-op due to the nature of the operation
+ // consider doing that
+ DepthwiseConv(depthwise_conv_params,
+ input_dims, input.getData(),
+ kernel_rt_shape, kernel.getData(),
+ out_dims, out.getData());
+}
+
+void softmax(Tensor &out, const char *params, const Tensor &in)
+{
+ const float *input = in.getData();
+ Dims<4> input_d = shapeToDims(in.getShape());
+ float beta = 1;
+ int32_t axis = deserializeT<int32_t>(params);
+ assert(axis == in.getShape().getDims() - 1);
+ UNUSED(axis);
+
+ out.reshape(in.getShape());
+
+ Softmax(input, input_d, beta, out.getData(), input_d);
+}
+
+void avgPool(Tensor &out, const char *params, const Tensor &in)
+{
+ const float *input = in.getData();
+ Dims<4> input_d = shapeToDims(in.getShape());
+ Shape window = deserializeShape(params);
+ vector<int32_t> strides = deserializeStrides(params);
+ Shape pads = deserializeShape(params);
+ bool include_pad = deserializeT<int32_t>(params);
+ Shape out_s = deserializeShape(params);
+
+ assert(window.getDims() == 2);
+ const int window_w = static_cast<int>(window[1]);
+ const int window_h = static_cast<int>(window[0]);
+ assert(strides.size() == 2);
+ const int stride_w = static_cast<int>(strides[1]);
+ const int stride_h = static_cast<int>(strides[0]);
+ assert(pads.getDims() == 2);
+ const int pad_w = static_cast<int>(pads[1]);
+ const int pad_h = static_cast<int>(pads[0]);
+
+ out.reshape(out_s);
+
+ Dims<4> out_d = shapeToDims(out_s);
+
+ AveragePool(input, input_d,
+ stride_w, stride_h,
+ pad_w, pad_h,
+ window_w, window_h,
+ out.getData(), out_d,
+ include_pad);
+}
+
+void maxPool(Tensor &out, const char *params, const Tensor &in)
+{
+ const float *input = in.getData();
+ Dims<4> input_d = shapeToDims(in.getShape());
+ Shape window = deserializeShape(params);
+ vector<int32_t> strides = deserializeStrides(params);
+ Shape pads = deserializeShape(params);
+ Shape out_s = deserializeShape(params);
+
+ assert(window.getDims() == 2);
+ const int window_w = static_cast<int>(window[1]);
+ const int window_h = static_cast<int>(window[0]);
+ assert(strides.size() == 2);
+ const int stride_w = static_cast<int>(strides[1]);
+ const int stride_h = static_cast<int>(strides[0]);
+ assert(pads.getDims() == 2);
+ const int pad_w = static_cast<int>(pads[1]);
+ const int pad_h = static_cast<int>(pads[0]);
+
+ out.reshape(out_s);
+
+ Dims<4> out_d = shapeToDims(out_s);
+
+ MaxPool(input, input_d,
+ stride_w, stride_h,
+ pad_w, pad_h,
+ window_w, window_h,
+ out.getData(), out_d);
+}
+
+void fullConnect(Tensor& out, const char* params, const Tensor& in, const Tensor& w) {
+ Shape out_s = deserializeShape(params);
+ out.reshape(out_s);
+
+ FullyConnected(in.getData(), shapeToDims(in.getShape()),
+ w.getData(), shapeToDims(w.getShape()),
+ out.getData(), shapeToDims(out_s));
+}
+
+/**
+ * @brief Resize assuming tflite axis order (NHWC)
+ */
+void resize(Tensor& out, const char* params, const Tensor& in) {
+ // The Tensorflow version of this op allows resize on the width and height
+ // axis only.
+ const float* input = in.getData();
+ assert(in.getShape().getDims() == 4 && "Should be a 4d tensor");
+ RuntimeShape in_shape = shapeToRuntimeShape(in.getShape());
+ Shape out_shape = deserializeShape(params);
+ out.reshape(out_shape);
+
+ assert(out_shape.getDims() == 4 && "Should be a 4d tensor");
+ RuntimeShape out_runtime = shapeToRuntimeShape(out_shape);
+ assert(out_shape[0] == in_shape.Dims(0) && out_shape[3] == in_shape.Dims(3) &&
+ "Resize is unly supported over hight and width");
+
+ ResizeNearestNeighbor<float>(
+ in_shape, input,
+ static_cast<int>(out_shape[1]), static_cast<int>(out_shape[2]),
+ out_runtime, out.getData());
+}
+
+void cappedRelu(Tensor &out, const char *params, const Tensor &in)
+{
+ const float *input = in.getData();
+ Dims<4> input_d = shapeToDims(in.getShape());
+ float cap = deserializeT<float>(params);
+
+ out.reshape(in.getShape());
+
+ CappedRelu(input, input_d, cap, out.getData(), input_d);
+}
+
+void slice(Tensor& out, const char* params, const Tensor& in) {
+ Shape starts = deserializeShape(params);
+ Shape sizes = deserializeShape(params);
+ Shape out_s = deserializeShape(params);
+
+ out.reshape(out_s);
+ SliceParams slice_params;
+ slice_params.begin_count = static_cast<uint8>(starts.getDims());
+ slice_params.size_count = static_cast<uint8>(sizes.getDims());
+
+ assert(slice_params.begin_count <= 4);
+ assert(slice_params.size_count <= 4);
+ assert(starts.getDims() == sizes.getDims());
+
+ for (int i = 0; i < slice_params.begin_count; i++) {
+ slice_params.begin[i] = static_cast<int32>(starts[i]);
+ slice_params.size[i] = static_cast<int32>(sizes[i]);
+ }
+ Slice(
+ slice_params,
+ shapeToRuntimeShape(in.getShape()), in.getData(),
+ shapeToRuntimeShape(out_s), out.getData()
+ );
+}
+
+void relu(Tensor &out, const char *params, const Tensor &in)
+{
+ const float *input = in.getData();
+ Dims<4> input_d = shapeToDims(in.getShape());
+
+ out.reshape(in.getShape());
+
+ Relu(input, input_d, out.getData(), input_d);
+}
+
+void sigmoid(Tensor& out, const char* params, const Tensor& in) {
+ out.reshape(in.getShape());
+ Logistic(shapeToRuntimeShape(in.getShape()), in.getData(),
+ shapeToRuntimeShape(out.getShape()), out.getData());
+}
+
+void elu(Tensor &out, const char* params, const Tensor& in) {
+ const float* input = in.getData();
+ const Dims<4> inp_d = shapeToDims(in.getShape());
+
+ const float alpha = deserializeT<float>(params);
+ out.reshape(in.getShape());
+
+ ELU(input, inp_d, alpha, out.getData(), inp_d);
+}
+
+void tanhActivation(Tensor &out, const char* params, const Tensor& in) {
+ UNUSED(params);
+ const float* input = in.getData();
+ const Dims<4> inp_d = shapeToDims(in.getShape());
+
+ out.reshape(in.getShape());
+
+ float* output = out.getData();
+ const Dims<4> out_d = shapeToDims(in.getShape());
+ Tanh(input, inp_d, output, out_d);
+}
+
+template <typename F>
+void ElementWise(Tensor &out, const char *params, const Tensor &lhs, const Tensor &rhs)
+{
+ const float *lhs_data = lhs.getData();
+ const float *rhs_data = rhs.getData();
+
+ const Shape out_shape = deserializeShape(params);
+ out.reshape(out_shape);
+
+ F::Call(lhs_data, shapeToRuntimeShape(lhs.getShape()),
+ rhs_data, shapeToRuntimeShape(rhs.getShape()),
+ out.getData(), shapeToRuntimeShape(out_shape));
+}
+
+// TODO refactor tflite's code for this op
+void reshape(Tensor& out, const char* params, const Tensor& in) {
+ Shape out_s = deserializeShape(params);
+ assert(out_s.getNumElems() == in.getShape().getNumElems());
+
+ out.reshape(out_s);
+ out.fillData(in.getData(), in.getShape().getNumElems());
+}
+
+void reduceMean(Tensor& out, const char* params, const Tensor& in) {
+ Shape tmp_reduction_dims = deserializeShape(params);
+ bool keep_dims = static_cast<bool>(deserializeT<int32_t>(params));
+ Shape out_s = deserializeShape(params);
+ out.reshape(out_s);
+
+ const int32_t rank_inp = static_cast<int32_t>(in.getShape().getDims());
+ const int32_t rank_out = static_cast<int32_t>(out_s.getDims());
+ const int32_t rank_axis = static_cast<int32_t>(tmp_reduction_dims.getDims());
+
+
+ int32_t in_dim[8];
+ int32_t tmp_index[8]; // input iterator storage
+ assert(rank_inp < 8);
+ for (int i = 0; i < rank_inp; i++) {
+ in_dim[i] = static_cast<int32_t>(in.getShape()[i]);
+ }
+ int32_t out_dim[8];
+ assert(rank_out <= 8);
+ for (int i = 0; i < rank_out; i++) {
+ out_dim[i] = static_cast<int32_t>(out.getShape()[i]);
+ }
+ int32_t axis[8];
+ int32_t resolved_axis[8]; // in case there are negative or duplicate indexes
+ assert(rank_axis <= 8);
+ for (int i = 0; i < rank_axis; i++) {
+ axis[i] = static_cast<int32_t>(tmp_reduction_dims[i]);
+ }
+
+ float* temp_sum = new float[out_s.getNumElems()];
+
+ bool succ = Mean(
+ in.getData(), in_dim, rank_inp,
+ out.getData(), out_dim, rank_out,
+ axis, rank_axis, keep_dims,
+ tmp_index, resolved_axis, temp_sum
+ );
+ assert(succ && "Mean failed!");
+ delete[] temp_sum;
+}
+
+void pad(Tensor& out, const char* params, const Tensor& in) {
+ const float* input = in.getData();
+ const Dims<4> input_dims = shapeToDims(in.getShape());
+
+ // deserialize output shape
+ Shape output_shape = deserializeShape(params);
+
+ // deserialize number of dimensions
+ const int32_t num_dim = deserializeT<int32_t>(params);
+
+ // deserialize paddings
+ std::vector<int> left_paddings, right_paddings;
+ for(int i = 0; i < num_dim; i++) {
+ left_paddings.push_back(deserializeT<int32_t>(params));
+ right_paddings.push_back(deserializeT<int32_t>(params));
+ }
+ for(int i = num_dim; i < 4; i++) {
+ left_paddings.push_back(0);
+ right_paddings.push_back(0);
+ }
+
+ out.reshape(output_shape);
+
+ float* output = out.getData();
+ const Dims<4> output_dims = shapeToDims(out.getShape());
+
+ Pad(input, input_dims, left_paddings, right_paddings, output, output_dims);
+}
+
+void sqrtFN(Tensor& out, const char* params, const Tensor& in) {
+ const float* input = in.getData();
+ const Dims<4> inp_d = shapeToDims(in.getShape());
+ // no params to deserialize
+
+ out.reshape(in.getShape());
+ Sqrt(input, inp_d, out.getData());
+}
+
+void absFN(Tensor &out, const char *params, const Tensor& in) {
+ out.reshape(in.getShape());
+
+ const float* in_data = in.getData();
+ float* out_data = out.getData();
+ const index_t num_elements = in.getShape().getNumElems();
+
+ for (index_t i = 0; i < num_elements; ++i) {
+ out_data[i] = abs(in_data[i]);
+ }
+}
+
+void transpose(Tensor &out, const char *params, const Tensor &in) {
+ TransposeParams transpose_params;
+ transpose_params.perm_count = static_cast<int8>(deserializeT<int32_t>(params));
+ for (int i = 0; i < transpose_params.perm_count; ++i)
+ transpose_params.perm[i] = deserializeT<int32_t>(params);
+
+ Shape out_s = deserializeShape(params);
+ assert(out_s.getNumElems() == in.getShape().getNumElems());
+ out.reshape(out_s);
+
+ Transpose(transpose_params,
+ shapeToRuntimeShape(in.getShape()), in.getData(),
+ shapeToRuntimeShape(out.getShape()), out.getData());
+}
+
+void gather(Tensor &out, const char *params, const Tensor &data, const Tensor &indices) {
+ GatherParams gather_params;
+ gather_params.axis = static_cast<int16>(deserializeT<int32_t>(params));
+
+ Shape out_s = deserializeShape(params);
+ out.reshape(out_s);
+
+ // reinterpret_cast is used here because indices in ModelIR are integral, but getData returns
+ // pointer to float.
+ Gather(gather_params,
+ shapeToRuntimeShape(data.getShape()), data.getData(),
+ shapeToRuntimeShape(indices.getShape()), indices.getData(),
+ shapeToRuntimeShape(out.getShape()), out.getData());
+}
+
+void broadcast(Tensor &out, const char *params, const Tensor &in)
+{
+ Shape out_shape = deserializeShape(params);
+ out.reshape(out_shape);
+
+ Broadcast4DSlow(shapeToRuntimeShape(in.getShape()), in.getData(),
+ shapeToRuntimeShape(out_shape), out.getData());
+}
+
+void constant(Tensor& out, const char* params) {
+ out = deserializeTensor(params);
+}
+
+void out(const char* params, const Tensor& in) {
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_pad.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_pad.def
new file mode 100644
index 000000000..39dd60dbb
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_pad.def
@@ -0,0 +1,96 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+inline void Pad(const float* input_data, const Dims<4>& input_dims,
+ const std::vector<int>& left_paddings,
+ const std::vector<int>& right_paddings, float* output_data,
+ const Dims<4>& output_dims) {
+
+ const int output_batch = ArraySize(output_dims, 3);
+ const int output_height = ArraySize(output_dims, 2);
+ const int output_width = ArraySize(output_dims, 1);
+ const int output_depth = ArraySize(output_dims, 0);
+
+ const int left_b_padding = left_paddings[3];
+ const int left_h_padding = left_paddings[2];
+ const int left_w_padding = left_paddings[1];
+ const int left_d_padding = left_paddings[0];
+
+ const int right_b_padding = right_paddings[3];
+ const int right_h_padding = right_paddings[2];
+ const int right_w_padding = right_paddings[1];
+ const int right_d_padding = right_paddings[0];
+
+ const int input_depth = ArraySize(input_dims, 0);
+
+ if (left_b_padding != 0) {
+ memset(output_data, 0,
+ left_b_padding * output_height * output_width * output_depth *
+ sizeof(float));
+ }
+ for (int out_b = left_b_padding; out_b < output_batch - right_b_padding;
+ ++out_b) {
+ if (left_h_padding != 0) {
+ memset(output_data + Offset(output_dims, 0, 0, 0, out_b), 0,
+ left_h_padding * output_width * output_depth * sizeof(float));
+ }
+ for (int out_h = left_h_padding; out_h < output_height - right_h_padding;
+ ++out_h) {
+ if (left_w_padding != 0) {
+ memset(output_data + Offset(output_dims, 0, 0, out_h, out_b), 0,
+ left_w_padding * output_depth * sizeof(float));
+ }
+ for (int out_w = left_w_padding; out_w < output_width - right_w_padding;
+ ++out_w) {
+ if (left_d_padding != 0) {
+ memset(output_data + Offset(output_dims, 0, out_w, out_h, out_b), 0,
+ left_d_padding * sizeof(float));
+ }
+
+ float* out = output_data +
+ Offset(output_dims, left_d_padding, out_w, out_h, out_b);
+ const float* in =
+ input_data + Offset(input_dims, 0, out_w - left_w_padding,
+ out_h - left_h_padding, out_b - left_b_padding);
+ memcpy(out, in, input_depth * sizeof(float));
+
+ if (right_d_padding != 0) {
+ memset(
+ output_data + Offset(output_dims, output_depth - right_d_padding,
+ out_w, out_h, out_b),
+ 0, right_d_padding * sizeof(float));
+ }
+ }
+ if (right_w_padding != 0) {
+ memset(
+ output_data + Offset(output_dims, 0, output_width - right_w_padding,
+ out_h, out_b),
+ 0, right_w_padding * output_depth * sizeof(float));
+ }
+ }
+ if (right_h_padding != 0) {
+ memset(output_data + Offset(output_dims, 0, 0,
+ output_height - right_h_padding, out_b),
+ 0, right_h_padding * output_width * output_depth * sizeof(float));
+ }
+ }
+ if (right_b_padding != 0) {
+ memset(output_data +
+ Offset(output_dims, 0, 0, 0, output_batch - right_b_padding),
+ 0,
+ right_b_padding * output_height * output_width * output_depth *
+ sizeof(float));
+ }
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_pool.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_pool.def
new file mode 100644
index 000000000..bf970c01c
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_pool.def
@@ -0,0 +1,116 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+inline int NodeOffset(int b, int h, int w, int height, int width) {
+ return (b * height + h) * width + w;
+}
+
+inline void AveragePool(const float* input_data, const Dims<4>& input_dims,
+ int stride_width, int stride_height, int pad_width,
+ int pad_height, int kwidth, int kheight,
+ float* output_data,
+ const Dims<4>& output_dims,
+ bool include_pad) {
+
+ const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
+ const int input_height = ArraySize(input_dims, 2);
+ const int input_width = ArraySize(input_dims, 1);
+ const int output_height = ArraySize(output_dims, 2);
+ const int output_width = ArraySize(output_dims, 1);
+ MatchingArraySize(input_dims, 0, output_dims, 0);
+
+ // TODO(benoitjacob) make this a proper reference impl without Eigen!
+ const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims);
+ auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
+ // TODO(benoitjacob) get rid of the dynamic memory allocation here!
+ Eigen::VectorXf out_count(out_mat.cols());
+ out_count.setZero();
+ // Prefill the output to 0.
+ out_mat.setZero();
+ for (int b = 0; b < batches; ++b) {
+ for (int h = 0; h < input_height; ++h) {
+ for (int w = 0; w < input_width; ++w) {
+ // (h_start, h_end) * (w_start, w_end) is the range that the input
+ // vector projects to.
+ int hpad = h + pad_height;
+ int wpad = w + pad_width;
+ int h_start =
+ (hpad < kheight) ? 0 : (hpad - kheight) / stride_height + 1;
+ int h_end = std::min(hpad / stride_height + 1, output_height);
+ int w_start = (wpad < kwidth) ? 0 : (wpad - kwidth) / stride_width + 1;
+ int w_end = std::min(wpad / stride_width + 1, output_width);
+ // compute elementwise sum
+ for (int ph = h_start; ph < h_end; ++ph) {
+ for (int pw = w_start; pw < w_end; ++pw) {
+ int out_offset = NodeOffset(b, ph, pw, output_height, output_width);
+ out_mat.col(out_offset) +=
+ in_mat.col(NodeOffset(b, h, w, input_height, input_width));
+ out_count(out_offset)++;
+ }
+ }
+ }
+ }
+ }
+ // Divide the output by the actual number of elements being averaged over
+ TFLITE_DCHECK_GT(out_count.minCoeff(), 0);
+ if (include_pad) {
+ out_mat.array() /= kheight * kwidth;
+ } else {
+ out_mat.array().rowwise() /= out_count.transpose().array();
+ }
+}
+
+inline void MaxPool(const float* input_data, const Dims<4>& input_dims,
+ int stride_width, int stride_height, int pad_width,
+ int pad_height, int kwidth, int kheight,
+ float* output_data, const Dims<4>& output_dims) {
+
+ const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
+ const int input_height = ArraySize(input_dims, 2);
+ const int input_width = ArraySize(input_dims, 1);
+ const int output_height = ArraySize(output_dims, 2);
+ const int output_width = ArraySize(output_dims, 1);
+ MatchingArraySize(input_dims, 0, output_dims, 0);
+
+ const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims);
+ auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
+ // Prefill the output to minimum representable float value
+ out_mat.setConstant(std::numeric_limits<float>::lowest());
+ for (int b = 0; b < batches; ++b) {
+ for (int h = 0; h < input_height; ++h) {
+ for (int w = 0; w < input_width; ++w) {
+ // (h_start, h_end) * (w_start, w_end) is the range that the input
+ // vector projects to.
+ int hpad = h + pad_height;
+ int wpad = w + pad_width;
+ int h_start =
+ (hpad < kheight) ? 0 : (hpad - kheight) / stride_height + 1;
+ int h_end = std::min(hpad / stride_height + 1, output_height);
+ int w_start = (wpad < kwidth) ? 0 : (wpad - kwidth) / stride_width + 1;
+ int w_end = std::min(wpad / stride_width + 1, output_width);
+ // compute elementwise sum
+ for (int ph = h_start; ph < h_end; ++ph) {
+ for (int pw = w_start; pw < w_end; ++pw) {
+ int out_offset = NodeOffset(b, ph, pw, output_height, output_width);
+ out_mat.col(out_offset) =
+ out_mat.col(out_offset)
+ .cwiseMax(in_mat.col(
+ NodeOffset(b, h, w, input_height, input_width)));
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_reduce.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_reduce.def
new file mode 100644
index 000000000..e38277802
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_reduce.def
@@ -0,0 +1,185 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+
+// A generic reduce method that can be used for reduce_sum, reduce_mean, etc.
+// This method iterates through input data and reduce elements along the
+// dimensions given in axis.
+template <typename In, typename Out>
+inline bool Reduce(const In* input_data, const int* input_dims,
+ const int* output_dims, const int input_num_dims,
+ const int output_num_dims, const int* axis,
+ const int num_axis, int* input_iter,
+ Out reducer(const Out current, const In in),
+ Out* output_data) {
+ // Reset input iterator.
+ for (int idx = 0; idx < input_num_dims; ++idx) {
+ input_iter[idx] = 0;
+ }
+ // Iterate through input_data.
+ do {
+ size_t input_offset =
+ ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
+ size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims,
+ input_iter, num_axis, axis);
+ output_data[output_offset] =
+ reducer(output_data[output_offset], input_data[input_offset]);
+ } while (NextIndex(input_num_dims, input_dims, input_iter));
+ return true;
+}
+
+inline bool ResolveAxis(const int num_dims, const int* axis,
+ const int64_t num_axis, int* out_axis,
+ int* out_num_axis) {
+ *out_num_axis = 0; // Just in case.
+ // Short-circuit axis resolution for scalars; the axis will go unused.
+ if (num_dims == 0) {
+ return true;
+ }
+ // o(n^2) is fine since out_num_axis should be really small, mostly <= 4
+ for (int64_t idx = 0; idx < num_axis; ++idx) {
+ // Handle negative index.
+ int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx];
+ TFLITE_DCHECK(current >= 0 && current < num_dims);
+ bool is_dup = false;
+ for (int j = 0; j < *out_num_axis; ++j) {
+ if (out_axis[j] == current) {
+ is_dup = true;
+ break;
+ }
+ }
+ if (!is_dup) {
+ out_axis[*out_num_axis] = current;
+ *out_num_axis += 1;
+ }
+ }
+ return true;
+}
+
+// This method expects that output_data has been initialized.
+template <typename In, typename Out>
+inline bool ReduceSumImpl(const In* input_data, const int* input_dims,
+ const int* output_dims, const int input_num_dims,
+ const int output_num_dims, const int* axis,
+ const int num_axis, int* input_iter,
+ Out* output_data) {
+ auto reducer = [ ](const Out current, const In in) -> Out {
+ const Out actual_in = static_cast<Out>(in);
+ return current + actual_in;
+ };
+ return Reduce<In, Out>(input_data, input_dims, output_dims, input_num_dims,
+ output_num_dims, axis, num_axis, input_iter, reducer,
+ output_data);
+}
+
+template <typename T>
+inline bool InitTensorDataForReduce(const int* dims, const int num_dims,
+ const T init_value, T* data) {
+ size_t num_elements = 1;
+ for (int idx = 0; idx < num_dims; ++idx) {
+ size_t current = static_cast<size_t>(dims[idx]);
+ // Overflow prevention.
+ if (num_elements > std::numeric_limits<size_t>::max() / current) {
+ return false;
+ }
+ num_elements *= current;
+ }
+ for (size_t idx = 0; idx < num_elements; ++idx) {
+ data[idx] = init_value;
+ }
+ return true;
+}
+
+// Computes the generic value (i.e., sum/max/min/prod) of elements across
+// dimensions given in axis. It needs to pass in init_value and reducer.
+template <typename T>
+inline bool ReduceGeneric(const T* input_data, const int* input_dims,
+ const int input_num_dims, T* output_data,
+ const int* output_dims, const int output_num_dims,
+ const int* axis, const int64_t num_axis_dimensions,
+ bool keep_dims, int* temp_index, int* resolved_axis,
+ T init_value,
+ T reducer(const T current, const T in)) {
+ // Reset output data.
+ if (!InitTensorDataForReduce(output_dims, output_num_dims, init_value,
+ output_data)) {
+ return false;
+ }
+
+ // Resolve axis.
+ int num_resolved_axis = 0;
+ if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
+ &num_resolved_axis)) {
+ return false;
+ }
+
+ return Reduce<T, T>(input_data, input_dims, output_dims, input_num_dims,
+ output_num_dims, resolved_axis, num_resolved_axis,
+ temp_index, reducer, output_data);
+}
+
+// Computes the mean of elements across dimensions given in axis.
+// It does so in two stages, first calculates the sum of elements along the axis
+// then divides it by the number of element in axis.
+template <typename T, typename U>
+inline bool Mean(const T* input_data, const int* input_dims,
+ const int input_num_dims, T* output_data,
+ const int* output_dims, const int output_num_dims,
+ const int* axis, const int num_axis_dimensions, bool keep_dims,
+ int* temp_index, int* resolved_axis, U* temp_sum) {
+ // Reset output data.
+ size_t num_outputs = 1;
+ for (int idx = 0; idx < output_num_dims; ++idx) {
+ size_t current = static_cast<size_t>(output_dims[idx]);
+ // Overflow prevention.
+ if (num_outputs > std::numeric_limits<size_t>::max() / current) {
+ return false;
+ }
+ num_outputs *= current;
+ }
+ for (size_t idx = 0; idx < num_outputs; ++idx) {
+ output_data[idx] = T();
+ temp_sum[idx] = U();
+ }
+
+ // Resolve axis.
+ int num_resolved_axis = 0;
+ if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
+ &num_resolved_axis)) {
+ return false;
+ }
+
+ if (!ReduceSumImpl<T, U>(input_data, input_dims, output_dims, input_num_dims,
+ output_num_dims, resolved_axis, num_resolved_axis,
+ temp_index, temp_sum)) {
+ return false;
+ }
+
+ // Calculate mean by dividing output_data by num of aggregated element.
+ U num_elements_in_axis = 1;
+ for (int idx = 0; idx < num_resolved_axis; ++idx) {
+ size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
+ // Overflow prevention.
+ if (current > (std::numeric_limits<U>::max() / num_elements_in_axis)) {
+ return false;
+ }
+ num_elements_in_axis *= current;
+ }
+
+ if (num_elements_in_axis > 0) {
+ for (size_t idx = 0; idx < num_outputs; ++idx) {
+ output_data[idx] =
+ static_cast<T>(temp_sum[idx] / static_cast<U>(num_elements_in_axis));
+ }
+ }
+ return true;
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_relu.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_relu.def
new file mode 100644
index 000000000..0082cf7c3
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_relu.def
@@ -0,0 +1,23 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+inline void Relu(const float* input_data, const Dims<4>& input_dims,
+ float* output_data, const Dims<4>& output_dims) {
+
+ const auto input = MapAsVector(input_data, input_dims);
+ auto output = MapAsVector(output_data, output_dims);
+
+ output = input.cwiseMax(0.0f);
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_resize.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_resize.def
new file mode 100644
index 000000000..68dde5670
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_resize.def
@@ -0,0 +1,61 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+template <typename T>
+inline void ResizeNearestNeighbor(
+ const RuntimeShape& unextended_input_shape, const T* input_data,
+ const int32 output_height, const int32 output_width,
+ const RuntimeShape& unextended_output_shape, T* output_data) {
+ // Align corners = true is not supported.
+ TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+
+ const RuntimeShape input_shape =
+ RuntimeShape::ExtendedShape(4, unextended_input_shape);
+ const RuntimeShape output_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+ int32 batches = MatchingDim(input_shape, 0, output_shape, 0);
+ int32 input_height = input_shape.Dims(1);
+ int32 input_width = input_shape.Dims(2);
+ int32 depth = MatchingDim(input_shape, 3, output_shape, 3);
+
+
+ // We use float to ensure agreement with the Tensorflow implementation.
+ const float height_scale = static_cast<float>(input_height) / output_height;
+ const float width_scale = static_cast<float>(input_width) / output_width;
+
+ const int col_offset = input_shape.Dims(3);
+ const int row_offset = input_shape.Dims(2) * col_offset;
+ const int batch_offset = input_shape.Dims(1) * row_offset;
+
+ const T* input_ptr = input_data;
+ T* output_ptr = output_data;
+ for (int b = 0; b < batches; ++b) {
+ for (int y = 0; y < output_height; ++y) {
+ int32 in_y = std::min(static_cast<int32>(std::floor(y * height_scale)),
+ input_height - 1);
+ const T* y_input_ptr = input_ptr + in_y * row_offset;
+ for (int x = 0; x < output_width; ++x) {
+ int32 in_x = std::min(static_cast<int32>(std::floor(x * width_scale)),
+ input_width - 1);
+ const T* x_input_ptr = y_input_ptr + in_x * col_offset;
+ memcpy(output_ptr, x_input_ptr, depth * sizeof(T));
+ output_ptr += depth;
+ }
+ }
+ input_ptr += batch_offset;
+ }
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_sigmoid.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_sigmoid.def
new file mode 100644
index 000000000..a67d6fdab
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_sigmoid.def
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
+ const RuntimeShape& output_shape, float* output_data) {
+
+ const auto input = MapAsVector(input_data, input_shape);
+ auto output = MapAsVector(output_data, output_shape);
+
+ output.array() = input.array().unaryExpr(Eigen::internal::scalar_logistic_op<float>());
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_slice.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_slice.def
new file mode 100644
index 000000000..3b0705340
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_slice.def
@@ -0,0 +1,56 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+template <typename T>
+inline void Slice(const SliceParams& op_params,
+ const RuntimeShape& input_shape, const T* input_data,
+ const RuntimeShape& output_shape, T* output_data) {
+ const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(4, input_shape);
+ // TODO(dkalenichenko): This op only supports 4D tensors or smaller.
+ TFLITE_DCHECK_LE(op_params.begin_count, 4);
+ TFLITE_DCHECK_LE(op_params.size_count, 4);
+ const int begin_count = op_params.begin_count;
+ const int size_count = op_params.size_count;
+ // We front-pad the begin and size vectors.
+ const int start_b = 4 - begin_count > 0 ? 0 : op_params.begin[0];
+ const int stop_b = (4 - size_count > 0 || op_params.size[0] == -1)
+ ? ext_shape.Dims(0)
+ : start_b + op_params.size[0];
+ const int start_h = begin_count < 3 ? 0 : op_params.begin[begin_count - 3];
+ const int stop_h = (size_count < 3 || op_params.size[size_count - 3] == -1)
+ ? ext_shape.Dims(1)
+ : start_h + op_params.size[size_count - 3];
+ const int start_w = begin_count < 2 ? 0 : op_params.begin[begin_count - 2];
+ const int stop_w = (size_count < 2 || op_params.size[size_count - 2] == -1)
+ ? ext_shape.Dims(2)
+ : start_w + op_params.size[size_count - 2];
+ const int start_d = begin_count < 1 ? 0 : op_params.begin[begin_count - 1];
+ const int stop_d = (size_count < 1 || op_params.size[size_count - 1] == -1)
+ ? ext_shape.Dims(3)
+ : start_d + op_params.size[size_count - 1];
+
+ T* out_ptr = output_data;
+ for (int in_b = start_b; in_b < stop_b; ++in_b) {
+ for (int in_h = start_h; in_h < stop_h; ++in_h) {
+ for (int in_w = start_w; in_w < stop_w; ++in_w) {
+ const int len = stop_d - start_d;
+ memcpy(out_ptr,
+ input_data + Offset(ext_shape, in_b, in_h, in_w, start_d),
+ len * sizeof(T));
+ out_ptr += len;
+ }
+ }
+ }
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_softmax.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_softmax.def
new file mode 100644
index 000000000..ebf9c5975
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_softmax.def
@@ -0,0 +1,33 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+inline void Softmax(const float* input_data, const Dims<4>& input_dims,
+ float beta, float* output_data,
+ const Dims<4>& output_dims) {
+
+ MatchingFlatSize(input_dims, output_dims);
+
+ const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims);
+ auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
+ // Compute the exponential first, removing the max coefficient for numerical
+ // stability.
+ out_mat = (in_mat.rowwise() - in_mat.colwise().maxCoeff()).array() * beta;
+ // We are separating out the exp function so that exp can be vectorized.
+ out_mat = out_mat.array().exp();
+ // Normalize to get the activations.
+ Eigen::Array<float, 1, Eigen::Dynamic> scale =
+ out_mat.array().colwise().sum().inverse();
+ out_mat.array().rowwise() *= scale;
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_sqrt.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_sqrt.def
new file mode 100644
index 000000000..29c4185fa
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_sqrt.def
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+inline void Sqrt(const float* input_data, const Dims<4>& input_dims, float* output_data) {
+
+ const auto input = MapAsVector(input_data, input_dims);
+ auto output = MapAsVector(output_data, input_dims);
+
+ output = input.array().sqrt();
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_tanh.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_tanh.def
new file mode 100644
index 000000000..a85a6c52b
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_tanh.def
@@ -0,0 +1,21 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+inline void Tanh(const float* input_data, const Dims<4>& input_dims,
+ float* output_data, const Dims<4>& output_dims) {
+ auto input_map = MapAsVector(input_data, input_dims);
+ auto output_map = MapAsVector(output_data, output_dims);
+ output_map.array() = input_map.array().tanh();
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_transpose.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_transpose.def
new file mode 100644
index 000000000..30bd4dfa4
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_transpose.def
@@ -0,0 +1,65 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+template <typename T>
+void Transpose(const TransposeParams& params,
+ const RuntimeShape& unextended_input_shape, const T* input_data,
+ const RuntimeShape& unextended_output_shape, T* output_data) {
+ const int unextended_output_size = unextended_output_shape.DimensionsCount();
+ TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_size, 4);
+ TFLITE_DCHECK_EQ(unextended_output_size, params.perm_count);
+ const RuntimeShape input_shape =
+ RuntimeShape::ExtendedShape(4, unextended_input_shape);
+ const RuntimeShape output_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_shape);
+ const int input_ext_size = 4 - unextended_input_shape.DimensionsCount();
+ const int output_ext_size = 4 - unextended_output_size;
+
+ // The perm data is extended to match the output, each index incremented by
+ // the amount of front padding of the input shape.
+ int extended_perm[4];
+ for (int i = 0; i < output_ext_size; ++i) {
+ extended_perm[i] = i;
+ }
+ for (int i = 0; i < unextended_output_size; ++i) {
+ extended_perm[i + output_ext_size] = params.perm[i] + input_ext_size;
+ }
+
+ int out_sizes[4];
+ // Compute the inverse permutation array so we can do an output centered
+ // transpose. Also, check to make sure output_dims is matching input_dims.
+ for (int k = 0; k < 4; k++) {
+ out_sizes[k] = MatchingDim(input_shape, extended_perm[k], output_shape, k);
+ }
+
+ // Naive transpose loop (iterate on output index and compute input index).
+ int o[4]; // loop index (on output).
+ int i[4];
+ for (o[3] = 0; o[3] < out_sizes[3]; o[3]++) {
+ i[extended_perm[3]] = o[3];
+ for (o[2] = 0; o[2] < out_sizes[2]; o[2]++) {
+ i[extended_perm[2]] = o[2];
+ for (o[1] = 0; o[1] < out_sizes[1]; o[1]++) {
+ i[extended_perm[1]] = o[1];
+ for (o[0] = 0; o[0] < out_sizes[0]; o[0]++) {
+ i[extended_perm[0]] = o[0];
+ output_data[Offset(output_shape, o)] =
+ input_data[Offset(input_shape, i)];
+ }
+ }
+ }
+ }
+}
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/eigen.def b/compiler/nnc/backends/soft_backend/code_snippets/eigen.def
new file mode 100644
index 000000000..b02f84bed
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/eigen.def
@@ -0,0 +1,29033 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2007-2011 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+// #include "src/Core/util/DisableStupidWarnings.h"
+#ifndef EIGEN_WARNINGS_DISABLED
+#define EIGEN_WARNINGS_DISABLED
+#ifdef _MSC_VER
+ #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
+ #pragma warning( push )
+ #endif
+ #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800)
+#elif defined __INTEL_COMPILER
+ #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
+ #pragma warning push
+ #endif
+ #pragma warning disable 2196 279 1684 2259
+#elif defined __clang__
+ #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
+ #pragma clang diagnostic push
+ #endif
+ #pragma clang diagnostic ignored "-Wconstant-logical-operand"
+#elif defined __GNUC__ && __GNUC__>=6
+ #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
+ #pragma GCC diagnostic push
+ #endif
+ #pragma GCC diagnostic ignored "-Wignored-attributes"
+#endif
+#if defined __NVCC__
+ #pragma diag_suppress code_is_unreachable
+ #pragma diag_suppress initialization_not_reachable
+ #pragma diag_suppress 1222
+ #pragma diag_suppress 2527
+ #pragma diag_suppress 2529
+ #pragma diag_suppress 2651
+ #pragma diag_suppress 2653
+ #pragma diag_suppress 2668
+ #pragma diag_suppress 2669
+ #pragma diag_suppress 2670
+ #pragma diag_suppress 2671
+ #pragma diag_suppress 2735
+ #pragma diag_suppress 2737
+#endif
+#endif
+// end #include "src/Core/util/DisableStupidWarnings.h"
+#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
+ #ifndef EIGEN_NO_DEBUG
+ #define EIGEN_NO_DEBUG
+ #endif
+ #ifdef EIGEN_INTERNAL_DEBUGGING
+ #undef EIGEN_INTERNAL_DEBUGGING
+ #endif
+ #ifdef EIGEN_EXCEPTIONS
+ #undef EIGEN_EXCEPTIONS
+ #endif
+ #ifdef __CUDACC__
+ #ifndef EIGEN_DONT_VECTORIZE
+ #define EIGEN_DONT_VECTORIZE
+ #endif
+ #define EIGEN_DEVICE_FUNC __host__ __device__
+ #include <math_functions.hpp>
+ #else
+ #define EIGEN_DEVICE_FUNC
+ #endif
+#else
+ #define EIGEN_DEVICE_FUNC
+#endif
+#if defined(__CUDA_ARCH__) && defined(__NVCC__)
+ #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC;
+#else
+ #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
+#endif
+#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL)
+ #define EIGEN_EXCEPTIONS
+#endif
+#ifdef EIGEN_EXCEPTIONS
+ #include <new>
+#endif
+// #include "src/Core/util/Macros.h"
+#ifndef EIGEN_MACROS_H
+#define EIGEN_MACROS_H
+#define EIGEN_WORLD_VERSION 3
+#define EIGEN_MAJOR_VERSION 3
+#define EIGEN_MINOR_VERSION 4
+#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
+ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
+ EIGEN_MINOR_VERSION>=z))))
+#ifdef __GNUC__
+ #define EIGEN_COMP_GNUC 1
+#else
+ #define EIGEN_COMP_GNUC 0
+#endif
+#if defined(__clang__)
+ #define EIGEN_COMP_CLANG (__clang_major__*100+__clang_minor__)
+#else
+ #define EIGEN_COMP_CLANG 0
+#endif
+#if defined(__llvm__)
+ #define EIGEN_COMP_LLVM 1
+#else
+ #define EIGEN_COMP_LLVM 0
+#endif
+#if defined(__INTEL_COMPILER)
+ #define EIGEN_COMP_ICC __INTEL_COMPILER
+#else
+ #define EIGEN_COMP_ICC 0
+#endif
+#if defined(__MINGW32__)
+ #define EIGEN_COMP_MINGW 1
+#else
+ #define EIGEN_COMP_MINGW 0
+#endif
+#if defined(__SUNPRO_CC)
+ #define EIGEN_COMP_SUNCC 1
+#else
+ #define EIGEN_COMP_SUNCC 0
+#endif
+#if defined(_MSC_VER)
+ #define EIGEN_COMP_MSVC _MSC_VER
+#else
+ #define EIGEN_COMP_MSVC 0
+#endif
+#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC || EIGEN_COMP_LLVM || EIGEN_COMP_CLANG)
+ #define EIGEN_COMP_MSVC_STRICT _MSC_VER
+#else
+ #define EIGEN_COMP_MSVC_STRICT 0
+#endif
+#if defined(__IBMCPP__) || defined(__xlc__)
+ #define EIGEN_COMP_IBM 1
+#else
+ #define EIGEN_COMP_IBM 0
+#endif
+#if defined(__PGI)
+ #define EIGEN_COMP_PGI 1
+#else
+ #define EIGEN_COMP_PGI 0
+#endif
+#if defined(__CC_ARM) || defined(__ARMCC_VERSION)
+ #define EIGEN_COMP_ARM 1
+#else
+ #define EIGEN_COMP_ARM 0
+#endif
+#if defined(__EMSCRIPTEN__)
+ #define EIGEN_COMP_EMSCRIPTEN 1
+#else
+ #define EIGEN_COMP_EMSCRIPTEN 0
+#endif
+#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM || EIGEN_COMP_EMSCRIPTEN)
+ #define EIGEN_COMP_GNUC_STRICT 1
+#else
+ #define EIGEN_COMP_GNUC_STRICT 0
+#endif
+#if EIGEN_COMP_GNUC
+ #define EIGEN_GNUC_AT_LEAST(x,y) ((__GNUC__==x && __GNUC_MINOR__>=y) || __GNUC__>x)
+ #define EIGEN_GNUC_AT_MOST(x,y) ((__GNUC__==x && __GNUC_MINOR__<=y) || __GNUC__<x)
+ #define EIGEN_GNUC_AT(x,y) ( __GNUC__==x && __GNUC_MINOR__==y )
+#else
+ #define EIGEN_GNUC_AT_LEAST(x,y) 0
+ #define EIGEN_GNUC_AT_MOST(x,y) 0
+ #define EIGEN_GNUC_AT(x,y) 0
+#endif
+#if EIGEN_COMP_GNUC && (__GNUC__ <= 3)
+#define EIGEN_GCC3_OR_OLDER 1
+#else
+#define EIGEN_GCC3_OR_OLDER 0
+#endif
+#if defined(__x86_64__) || defined(_M_X64) || defined(__amd64)
+ #define EIGEN_ARCH_x86_64 1
+#else
+ #define EIGEN_ARCH_x86_64 0
+#endif
+#if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386)
+ #define EIGEN_ARCH_i386 1
+#else
+ #define EIGEN_ARCH_i386 0
+#endif
+#if EIGEN_ARCH_x86_64 || EIGEN_ARCH_i386
+ #define EIGEN_ARCH_i386_OR_x86_64 1
+#else
+ #define EIGEN_ARCH_i386_OR_x86_64 0
+#endif
+#if defined(__arm__)
+ #define EIGEN_ARCH_ARM 1
+#else
+ #define EIGEN_ARCH_ARM 0
+#endif
+#if defined(__aarch64__)
+ #define EIGEN_ARCH_ARM64 1
+#else
+ #define EIGEN_ARCH_ARM64 0
+#endif
+#if EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64
+ #define EIGEN_ARCH_ARM_OR_ARM64 1
+#else
+ #define EIGEN_ARCH_ARM_OR_ARM64 0
+#endif
+#if defined(__mips__) || defined(__mips)
+ #define EIGEN_ARCH_MIPS 1
+#else
+ #define EIGEN_ARCH_MIPS 0
+#endif
+#if defined(__sparc__) || defined(__sparc)
+ #define EIGEN_ARCH_SPARC 1
+#else
+ #define EIGEN_ARCH_SPARC 0
+#endif
+#if defined(__ia64__)
+ #define EIGEN_ARCH_IA64 1
+#else
+ #define EIGEN_ARCH_IA64 0
+#endif
+#if defined(__powerpc__) || defined(__ppc__) || defined(_M_PPC)
+ #define EIGEN_ARCH_PPC 1
+#else
+ #define EIGEN_ARCH_PPC 0
+#endif
+#if defined(__unix__) || defined(__unix)
+ #define EIGEN_OS_UNIX 1
+#else
+ #define EIGEN_OS_UNIX 0
+#endif
+#if defined(__linux__)
+ #define EIGEN_OS_LINUX 1
+#else
+ #define EIGEN_OS_LINUX 0
+#endif
+#if defined(__ANDROID__) || defined(ANDROID)
+ #define EIGEN_OS_ANDROID 1
+#else
+ #define EIGEN_OS_ANDROID 0
+#endif
+#if defined(__gnu_linux__) && !(EIGEN_OS_ANDROID)
+ #define EIGEN_OS_GNULINUX 1
+#else
+ #define EIGEN_OS_GNULINUX 0
+#endif
+#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__)
+ #define EIGEN_OS_BSD 1
+#else
+ #define EIGEN_OS_BSD 0
+#endif
+#if defined(__APPLE__)
+ #define EIGEN_OS_MAC 1
+#else
+ #define EIGEN_OS_MAC 0
+#endif
+#if defined(__QNX__)
+ #define EIGEN_OS_QNX 1
+#else
+ #define EIGEN_OS_QNX 0
+#endif
+#if defined(_WIN32)
+ #define EIGEN_OS_WIN 1
+#else
+ #define EIGEN_OS_WIN 0
+#endif
+#if defined(_WIN64)
+ #define EIGEN_OS_WIN64 1
+#else
+ #define EIGEN_OS_WIN64 0
+#endif
+#if defined(_WIN32_WCE)
+ #define EIGEN_OS_WINCE 1
+#else
+ #define EIGEN_OS_WINCE 0
+#endif
+#if defined(__CYGWIN__)
+ #define EIGEN_OS_CYGWIN 1
+#else
+ #define EIGEN_OS_CYGWIN 0
+#endif
+#if EIGEN_OS_WIN && !( EIGEN_OS_WINCE || EIGEN_OS_CYGWIN )
+ #define EIGEN_OS_WIN_STRICT 1
+#else
+ #define EIGEN_OS_WIN_STRICT 0
+#endif
+#if (defined(sun) || defined(__sun)) && !(defined(__SVR4) || defined(__svr4__))
+ #define EIGEN_OS_SUN 1
+#else
+ #define EIGEN_OS_SUN 0
+#endif
+#if (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__))
+ #define EIGEN_OS_SOLARIS 1
+#else
+ #define EIGEN_OS_SOLARIS 0
+#endif
+#if EIGEN_GNUC_AT_MOST(4,3) && !EIGEN_COMP_CLANG
+ #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0
+#else
+ #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1
+#endif
+#define EIGEN_NOT_A_MACRO
+#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
+#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor
+#else
+#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::ColMajor
+#endif
+#ifndef EIGEN_DEFAULT_DENSE_INDEX_TYPE
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t
+#endif
+#ifdef __has_builtin
+# define EIGEN_HAS_BUILTIN(x) __has_builtin(x)
+#else
+# define EIGEN_HAS_BUILTIN(x) 0
+#endif
+#ifndef __has_feature
+# define __has_feature(x) 0
+#endif
+#ifndef EIGEN_MAX_CPP_VER
+#define EIGEN_MAX_CPP_VER 99
+#endif
+#if EIGEN_MAX_CPP_VER>=11 && (defined(__cplusplus) && (__cplusplus >= 201103L) || EIGEN_COMP_MSVC >= 1900)
+#define EIGEN_HAS_CXX11 1
+#else
+#define EIGEN_HAS_CXX11 0
+#endif
+#ifndef EIGEN_HAS_RVALUE_REFERENCES
+#if EIGEN_MAX_CPP_VER>=11 && \
+ (__has_feature(cxx_rvalue_references) || \
+ (defined(__cplusplus) && __cplusplus >= 201103L) || \
+ (EIGEN_COMP_MSVC >= 1600))
+ #define EIGEN_HAS_RVALUE_REFERENCES 1
+#else
+ #define EIGEN_HAS_RVALUE_REFERENCES 0
+#endif
+#endif
+#ifndef EIGEN_HAS_C99_MATH
+#if EIGEN_MAX_CPP_VER>=11 && \
+ ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \
+ || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \
+ || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)))
+ #define EIGEN_HAS_C99_MATH 1
+#else
+ #define EIGEN_HAS_C99_MATH 0
+#endif
+#endif
+#ifndef EIGEN_HAS_STD_RESULT_OF
+#if EIGEN_MAX_CPP_VER>=11 && ((__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L)))
+#define EIGEN_HAS_STD_RESULT_OF 1
+#else
+#define EIGEN_HAS_STD_RESULT_OF 0
+#endif
+#endif
+#ifndef EIGEN_HAS_VARIADIC_TEMPLATES
+#if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
+ && ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) )
+#define EIGEN_HAS_VARIADIC_TEMPLATES 1
+#else
+#define EIGEN_HAS_VARIADIC_TEMPLATES 0
+#endif
+#endif
+#ifndef EIGEN_HAS_CONSTEXPR
+#ifdef __CUDACC__
+#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500))
+ #define EIGEN_HAS_CONSTEXPR 1
+#endif
+#elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
+ (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)))
+#define EIGEN_HAS_CONSTEXPR 1
+#endif
+#ifndef EIGEN_HAS_CONSTEXPR
+#define EIGEN_HAS_CONSTEXPR 0
+#endif
+#endif
+#ifndef EIGEN_HAS_CXX11_MATH
+ #if EIGEN_MAX_CPP_VER>=11 && ((__cplusplus > 201103L) || (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \
+ && (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC))
+ #define EIGEN_HAS_CXX11_MATH 1
+ #else
+ #define EIGEN_HAS_CXX11_MATH 0
+ #endif
+#endif
+#ifndef EIGEN_HAS_CXX11_CONTAINERS
+ #if EIGEN_MAX_CPP_VER>=11 && \
+ ((__cplusplus > 201103L) \
+ || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \
+ || EIGEN_COMP_MSVC >= 1900)
+ #define EIGEN_HAS_CXX11_CONTAINERS 1
+ #else
+ #define EIGEN_HAS_CXX11_CONTAINERS 0
+ #endif
+#endif
+#ifndef EIGEN_HAS_CXX11_NOEXCEPT
+ #if EIGEN_MAX_CPP_VER>=11 && \
+ (__has_feature(cxx_noexcept) \
+ || (__cplusplus > 201103L) \
+ || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \
+ || EIGEN_COMP_MSVC >= 1900)
+ #define EIGEN_HAS_CXX11_NOEXCEPT 1
+ #else
+ #define EIGEN_HAS_CXX11_NOEXCEPT 0
+ #endif
+#endif
+#ifndef EIGEN_FAST_MATH
+#define EIGEN_FAST_MATH 1
+#endif
+#define EIGEN_DEBUG_VAR(x) std::cerr << #x << " = " << x << std::endl;
+#define EIGEN_CAT2(a,b) a ## b
+#define EIGEN_CAT(a,b) EIGEN_CAT2(a,b)
+#define EIGEN_COMMA ,
+#define EIGEN_MAKESTRING2(a) #a
+#define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)
+#if EIGEN_COMP_MSVC || EIGEN_COMP_ICC
+#define EIGEN_STRONG_INLINE __forceinline
+#else
+#define EIGEN_STRONG_INLINE inline
+#endif
+#if EIGEN_GNUC_AT_LEAST(4,2)
+#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
+#else
+#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
+#endif
+#if EIGEN_COMP_GNUC
+#define EIGEN_DONT_INLINE __attribute__((noinline))
+#elif EIGEN_COMP_MSVC
+#define EIGEN_DONT_INLINE __declspec(noinline)
+#else
+#define EIGEN_DONT_INLINE
+#endif
+#if EIGEN_COMP_GNUC
+#define EIGEN_PERMISSIVE_EXPR __extension__
+#else
+#define EIGEN_PERMISSIVE_EXPR
+#endif
+#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS inline
+#ifdef NDEBUG
+# ifndef EIGEN_NO_DEBUG
+# define EIGEN_NO_DEBUG
+# endif
+#endif
+#ifdef EIGEN_NO_DEBUG
+ #define eigen_plain_assert(x)
+#else
+ #if EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO
+ namespace Eigen {
+ namespace internal {
+ inline bool copy_bool(bool b) { return b; }
+ }
+ }
+ #define eigen_plain_assert(x) assert(x)
+ #else
+ #include <cstdlib>
+ #include <iostream>
+ namespace Eigen {
+ namespace internal {
+ namespace {
+ EIGEN_DONT_INLINE bool copy_bool(bool b) { return b; }
+ }
+ inline void assert_fail(const char *condition, const char *function, const char *file, int line)
+ {
+ std::cerr << "assertion failed: " << condition << " in function " << function << " at " << file << ":" << line << std::endl;
+ abort();
+ }
+ }
+ }
+ #define eigen_plain_assert(x) \
+ do { \
+ if(!Eigen::internal::copy_bool(x)) \
+ Eigen::internal::assert_fail(EIGEN_MAKESTRING(x), __PRETTY_FUNCTION__, __FILE__, __LINE__); \
+ } while(false)
+ #endif
+#endif
+#ifndef eigen_assert
+#define eigen_assert(x) eigen_plain_assert(x)
+#endif
+#ifdef EIGEN_INTERNAL_DEBUGGING
+#define eigen_internal_assert(x) eigen_assert(x)
+#else
+#define eigen_internal_assert(x)
+#endif
+#ifdef EIGEN_NO_DEBUG
+#define EIGEN_ONLY_USED_FOR_DEBUG(x) EIGEN_UNUSED_VARIABLE(x)
+#else
+#define EIGEN_ONLY_USED_FOR_DEBUG(x)
+#endif
+#ifndef EIGEN_NO_DEPRECATED_WARNING
+ #if EIGEN_COMP_GNUC
+ #define EIGEN_DEPRECATED __attribute__((deprecated))
+ #elif EIGEN_COMP_MSVC
+ #define EIGEN_DEPRECATED __declspec(deprecated)
+ #else
+ #define EIGEN_DEPRECATED
+ #endif
+#else
+ #define EIGEN_DEPRECATED
+#endif
+#if EIGEN_COMP_GNUC
+#define EIGEN_UNUSED __attribute__((unused))
+#else
+#define EIGEN_UNUSED
+#endif
+namespace Eigen {
+ namespace internal {
+ template<typename T> EIGEN_DEVICE_FUNC void ignore_unused_variable(const T&) {}
+ }
+}
+#define EIGEN_UNUSED_VARIABLE(var) Eigen::internal::ignore_unused_variable(var);
+#if !defined(EIGEN_ASM_COMMENT)
+ #if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64)
+ #define EIGEN_ASM_COMMENT(X) __asm__("#" X)
+ #else
+ #define EIGEN_ASM_COMMENT(X)
+ #endif
+#endif
+#if (defined __CUDACC__)
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
+#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
+#elif EIGEN_COMP_MSVC
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
+#elif EIGEN_COMP_SUNCC
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
+#else
+ #error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler
+#endif
+#if defined(EIGEN_DONT_VECTORIZE)
+ #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
+#elif defined(EIGEN_VECTORIZE_AVX512)
+ #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
+#elif defined(__AVX__)
+ #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
+#else
+ #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
+#endif
+#define EIGEN_MIN_ALIGN_BYTES 16
+#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
+#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
+#endif
+#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
+ #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
+ #undef EIGEN_MAX_STATIC_ALIGN_BYTES
+ #endif
+ #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
+#endif
+#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
+ #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64)
+ #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
+ #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6)
+ #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
+ #else
+ #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
+ #endif
+ #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
+ && !EIGEN_GCC3_OR_OLDER \
+ && !EIGEN_COMP_SUNCC \
+ && !EIGEN_OS_QNX
+ #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
+ #else
+ #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
+ #endif
+ #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
+ #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
+ #else
+ #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
+ #endif
+#endif
+#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
+#undef EIGEN_MAX_STATIC_ALIGN_BYTES
+#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
+#endif
+#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
+ #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
+#endif
+#define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
+#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
+#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
+#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
+#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
+#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
+#else
+#define EIGEN_ALIGN_MAX
+#endif
+#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
+#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
+#endif
+#ifdef EIGEN_DONT_ALIGN
+ #ifdef EIGEN_MAX_ALIGN_BYTES
+ #undef EIGEN_MAX_ALIGN_BYTES
+ #endif
+ #define EIGEN_MAX_ALIGN_BYTES 0
+#elif !defined(EIGEN_MAX_ALIGN_BYTES)
+ #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
+#endif
+#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
+#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
+#else
+#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
+#endif
+#ifndef EIGEN_UNALIGNED_VECTORIZE
+#define EIGEN_UNALIGNED_VECTORIZE 1
+#endif
+#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
+ #define EIGEN_RESTRICT
+#endif
+#ifndef EIGEN_RESTRICT
+ #define EIGEN_RESTRICT __restrict
+#endif
+#ifndef EIGEN_STACK_ALLOCATION_LIMIT
+#define EIGEN_STACK_ALLOCATION_LIMIT 131072
+#endif
+#ifndef EIGEN_DEFAULT_IO_FORMAT
+#ifdef EIGEN_MAKING_DOCS
+#define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat(3, 0, " ", "\n", "", "")
+#else
+#define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat()
+#endif
+#endif
+#define EIGEN_EMPTY
+#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || defined(__CUDACC_VER__))
+ #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
+ using Base::operator =;
+#elif EIGEN_COMP_CLANG
+ #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
+ using Base::operator =; \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { Base::operator=(other); return *this; } \
+ template <typename OtherDerived> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other) { Base::operator=(other.derived()); return *this; }
+#else
+ #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
+ using Base::operator =; \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) \
+ { \
+ Base::operator=(other); \
+ return *this; \
+ }
+#endif
+#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived)
+#define EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
+ typedef typename Eigen::internal::traits<Derived>::Scalar Scalar; \
+ typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; \
+ typedef typename Base::CoeffReturnType CoeffReturnType; \
+ typedef typename Eigen::internal::ref_selector<Derived>::type Nested; \
+ typedef typename Eigen::internal::traits<Derived>::StorageKind StorageKind; \
+ typedef typename Eigen::internal::traits<Derived>::StorageIndex StorageIndex; \
+ enum { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \
+ ColsAtCompileTime = Eigen::internal::traits<Derived>::ColsAtCompileTime, \
+ Flags = Eigen::internal::traits<Derived>::Flags, \
+ SizeAtCompileTime = Base::SizeAtCompileTime, \
+ MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \
+ IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \
+ using Base::derived; \
+ using Base::const_cast_derived;
+#define EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \
+ EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
+ typedef typename Base::PacketScalar PacketScalar;
+#define EIGEN_PLAIN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b)
+#define EIGEN_PLAIN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b)
+#define EIGEN_SIZE_MIN_PREFER_DYNAMIC(a,b) (((int)a == 0 || (int)b == 0) ? 0 \
+ : ((int)a == 1 || (int)b == 1) ? 1 \
+ : ((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \
+ : ((int)a <= (int)b) ? (int)a : (int)b)
+#define EIGEN_SIZE_MIN_PREFER_FIXED(a,b) (((int)a == 0 || (int)b == 0) ? 0 \
+ : ((int)a == 1 || (int)b == 1) ? 1 \
+ : ((int)a == Dynamic && (int)b == Dynamic) ? Dynamic \
+ : ((int)a == Dynamic) ? (int)b \
+ : ((int)b == Dynamic) ? (int)a \
+ : ((int)a <= (int)b) ? (int)a : (int)b)
+#define EIGEN_SIZE_MAX(a,b) (((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \
+ : ((int)a >= (int)b) ? (int)a : (int)b)
+#define EIGEN_LOGICAL_XOR(a,b) (((a) || (b)) && !((a) && (b)))
+#define EIGEN_IMPLIES(a,b) (!(a) || (b))
+#define EIGEN_CWISE_BINARY_RETURN_TYPE(LHS,RHS,OPNAME) \
+ CwiseBinaryOp< \
+ EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)< \
+ typename internal::traits<LHS>::Scalar, \
+ typename internal::traits<RHS>::Scalar \
+ >, \
+ const LHS, \
+ const RHS \
+ >
+#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,OPNAME) \
+ template<typename OtherDerived> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME) \
+ (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
+ { \
+ return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME)(derived(), other.derived()); \
+ }
+#define EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,TYPEA,TYPEB) \
+ (Eigen::internal::has_ReturnType<Eigen::ScalarBinaryOpTraits<TYPEA,TYPEB,EIGEN_CAT(EIGEN_CAT(Eigen::internal::scalar_,OPNAME),_op)<TYPEA,TYPEB> > >::value)
+#define EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(EXPR,SCALAR,OPNAME) \
+ CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<typename internal::traits<EXPR>::Scalar,SCALAR>, const EXPR, \
+ const typename internal::plain_constant_type<EXPR,SCALAR>::type>
+#define EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(SCALAR,EXPR,OPNAME) \
+ CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<SCALAR,typename internal::traits<EXPR>::Scalar>, \
+ const typename internal::plain_constant_type<EXPR,SCALAR>::type, const EXPR>
+#if EIGEN_COMP_MSVC_STRICT<=1600
+#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) typename internal::enable_if<true,X>::type
+#else
+#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) X
+#endif
+#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) \
+ template <typename T> EIGEN_DEVICE_FUNC inline \
+ EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type,OPNAME))\
+ (METHOD)(const T& scalar) const { \
+ typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type PromotedT; \
+ return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,PromotedT,OPNAME)(derived(), \
+ typename internal::plain_constant_type<Derived,PromotedT>::type(derived().rows(), derived().cols(), internal::scalar_constant_op<PromotedT>(scalar))); \
+ }
+#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \
+ template <typename T> EIGEN_DEVICE_FUNC inline friend \
+ EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type,Derived,OPNAME)) \
+ (METHOD)(const T& scalar, const StorageBaseType& matrix) { \
+ typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type PromotedT; \
+ return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(PromotedT,Derived,OPNAME)( \
+ typename internal::plain_constant_type<Derived,PromotedT>::type(matrix.derived().rows(), matrix.derived().cols(), internal::scalar_constant_op<PromotedT>(scalar)), matrix.derived()); \
+ }
+#define EIGEN_MAKE_SCALAR_BINARY_OP(METHOD,OPNAME) \
+ EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \
+ EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME)
+#ifdef EIGEN_EXCEPTIONS
+# define EIGEN_THROW_X(X) throw X
+# define EIGEN_THROW throw
+# define EIGEN_TRY try
+# define EIGEN_CATCH(X) catch (X)
+#else
+# ifdef __CUDA_ARCH__
+# define EIGEN_THROW_X(X) asm("trap;")
+# define EIGEN_THROW asm("trap;")
+# else
+# define EIGEN_THROW_X(X) std::abort()
+# define EIGEN_THROW std::abort()
+# endif
+# define EIGEN_TRY if (true)
+# define EIGEN_CATCH(X) else
+#endif
+#if EIGEN_HAS_CXX11_NOEXCEPT
+# define EIGEN_INCLUDE_TYPE_TRAITS
+# define EIGEN_NOEXCEPT noexcept
+# define EIGEN_NOEXCEPT_IF(x) noexcept(x)
+# define EIGEN_NO_THROW noexcept(true)
+# define EIGEN_EXCEPTION_SPEC(X) noexcept(false)
+#else
+# define EIGEN_NOEXCEPT
+# define EIGEN_NOEXCEPT_IF(x)
+# define EIGEN_NO_THROW throw()
+# define EIGEN_EXCEPTION_SPEC(X) throw(X)
+#endif
+#endif
+// end #include "src/Core/util/Macros.h"
+#if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6)
+ #pragma GCC optimize ("-fno-ipa-cp-clone")
+#endif
+#include <complex>
+#if EIGEN_MAX_ALIGN_BYTES==0
+ #ifndef EIGEN_DONT_VECTORIZE
+ #define EIGEN_DONT_VECTORIZE
+ #endif
+#endif
+#if EIGEN_COMP_MSVC
+ #include <malloc.h>
+ #if (EIGEN_COMP_MSVC >= 1500)
+ #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
+ #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
+ #endif
+ #endif
+#else
+ #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
+ #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
+ #endif
+#endif
+#ifndef EIGEN_DONT_VECTORIZE
+ #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
+ #define EIGEN_VECTORIZE
+ #define EIGEN_VECTORIZE_SSE
+ #define EIGEN_VECTORIZE_SSE2
+ #ifdef __SSE3__
+ #define EIGEN_VECTORIZE_SSE3
+ #endif
+ #ifdef __SSSE3__
+ #define EIGEN_VECTORIZE_SSSE3
+ #endif
+ #ifdef __SSE4_1__
+ #define EIGEN_VECTORIZE_SSE4_1
+ #endif
+ #ifdef __SSE4_2__
+ #define EIGEN_VECTORIZE_SSE4_2
+ #endif
+ #ifdef __AVX__
+ #define EIGEN_VECTORIZE_AVX
+ #define EIGEN_VECTORIZE_SSE3
+ #define EIGEN_VECTORIZE_SSSE3
+ #define EIGEN_VECTORIZE_SSE4_1
+ #define EIGEN_VECTORIZE_SSE4_2
+ #endif
+ #ifdef __AVX2__
+ #define EIGEN_VECTORIZE_AVX2
+ #endif
+ #ifdef __FMA__
+ #define EIGEN_VECTORIZE_FMA
+ #endif
+ #if defined(__AVX512F__) && defined(EIGEN_ENABLE_AVX512)
+ #define EIGEN_VECTORIZE_AVX512
+ #define EIGEN_VECTORIZE_AVX2
+ #define EIGEN_VECTORIZE_AVX
+ #define EIGEN_VECTORIZE_FMA
+ #ifdef __AVX512DQ__
+ #define EIGEN_VECTORIZE_AVX512DQ
+ #endif
+ #endif
+ extern "C" {
+ #if EIGEN_COMP_ICC >= 1110
+ #include <immintrin.h>
+ #else
+ #include <mmintrin.h>
+ #include <emmintrin.h>
+ #include <xmmintrin.h>
+ #ifdef EIGEN_VECTORIZE_SSE3
+ #include <pmmintrin.h>
+ #endif
+ #ifdef EIGEN_VECTORIZE_SSSE3
+ #include <tmmintrin.h>
+ #endif
+ #ifdef EIGEN_VECTORIZE_SSE4_1
+ #include <smmintrin.h>
+ #endif
+ #ifdef EIGEN_VECTORIZE_SSE4_2
+ #include <nmmintrin.h>
+ #endif
+ #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
+ #include <immintrin.h>
+ #endif
+ #endif
+ }
+ #elif defined __VSX__
+ #define EIGEN_VECTORIZE
+ #define EIGEN_VECTORIZE_VSX
+ #include <altivec.h>
+ #undef bool
+ #undef vector
+ #undef pixel
+ #elif defined __ALTIVEC__
+ #define EIGEN_VECTORIZE
+ #define EIGEN_VECTORIZE_ALTIVEC
+ #include <altivec.h>
+ #undef bool
+ #undef vector
+ #undef pixel
+ #elif (defined __ARM_NEON) || (defined __ARM_NEON__)
+ #define EIGEN_VECTORIZE
+ #define EIGEN_VECTORIZE_NEON
+ #include <arm_neon.h>
+ #elif (defined __s390x__ && defined __VEC__)
+ #define EIGEN_VECTORIZE
+ #define EIGEN_VECTORIZE_ZVECTOR
+ #include <vecintrin.h>
+ #endif
+#endif
+#if defined(__F16C__) && !defined(EIGEN_COMP_CLANG)
+ #define EIGEN_HAS_FP16_C
+#endif
+#if defined __CUDACC__
+ #define EIGEN_VECTORIZE_CUDA
+ #include <vector_types.h>
+ #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+ #define EIGEN_HAS_CUDA_FP16
+ #endif
+#endif
+#if defined EIGEN_HAS_CUDA_FP16
+ #include <host_defines.h>
+ #include <cuda_fp16.h>
+#endif
+#if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE)
+ #define EIGEN_HAS_OPENMP
+#endif
+#ifdef EIGEN_HAS_OPENMP
+#include <omp.h>
+#endif
+#if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM
+#define EIGEN_HAS_ERRNO
+#endif
+#ifdef EIGEN_HAS_ERRNO
+#include <cerrno>
+#endif
+#include <cstddef>
+#include <cstdlib>
+#include <cmath>
+#include <cassert>
+#include <functional>
+#include <iosfwd>
+#include <cstring>
+#include <string>
+#include <limits>
+#include <climits>
+#include <algorithm>
+#ifdef EIGEN_INCLUDE_TYPE_TRAITS
+#include <type_traits>
+#endif
+#ifdef EIGEN_DEBUG_ASSIGN
+#include <iostream>
+#endif
+#if EIGEN_COMP_MSVC && EIGEN_ARCH_i386_OR_x86_64 && !EIGEN_OS_WINCE
+ #include <intrin.h>
+#endif
+namespace Eigen {
+inline static const char *SimdInstructionSetsInUse(void) {
+#if defined(EIGEN_VECTORIZE_AVX512)
+ return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
+#elif defined(EIGEN_VECTORIZE_AVX)
+ return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
+#elif defined(EIGEN_VECTORIZE_SSE4_2)
+ return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
+#elif defined(EIGEN_VECTORIZE_SSE4_1)
+ return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
+#elif defined(EIGEN_VECTORIZE_SSSE3)
+ return "SSE, SSE2, SSE3, SSSE3";
+#elif defined(EIGEN_VECTORIZE_SSE3)
+ return "SSE, SSE2, SSE3";
+#elif defined(EIGEN_VECTORIZE_SSE2)
+ return "SSE, SSE2";
+#elif defined(EIGEN_VECTORIZE_ALTIVEC)
+ return "AltiVec";
+#elif defined(EIGEN_VECTORIZE_VSX)
+ return "VSX";
+#elif defined(EIGEN_VECTORIZE_NEON)
+ return "ARM NEON";
+#elif defined(EIGEN_VECTORIZE_ZVECTOR)
+ return "S390X ZVECTOR";
+#else
+ return "None";
+#endif
+}
+}
+#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT
+#error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information
+#endif
+namespace Eigen {
+using std::size_t;
+using std::ptrdiff_t;
+}
+// #include "src/Core/util/Constants.h"
+#ifndef EIGEN_CONSTANTS_H
+#define EIGEN_CONSTANTS_H
+namespace Eigen {
+const int Dynamic = -1;
+const int DynamicIndex = 0xffffff;
+const int Infinity = -1;
+const int HugeCost = 10000;
+const unsigned int RowMajorBit = 0x1;
+const unsigned int EvalBeforeNestingBit = 0x2;
+EIGEN_DEPRECATED
+const unsigned int EvalBeforeAssigningBit = 0x4;
+const unsigned int PacketAccessBit = 0x8;
+#ifdef EIGEN_VECTORIZE
+const unsigned int ActualPacketAccessBit = PacketAccessBit;
+#else
+const unsigned int ActualPacketAccessBit = 0x0;
+#endif
+const unsigned int LinearAccessBit = 0x10;
+const unsigned int LvalueBit = 0x20;
+const unsigned int DirectAccessBit = 0x40;
+EIGEN_DEPRECATED const unsigned int AlignedBit = 0x80;
+const unsigned int NestByRefBit = 0x100;
+const unsigned int NoPreferredStorageOrderBit = 0x200;
+const unsigned int CompressedAccessBit = 0x400;
+const unsigned int HereditaryBits = RowMajorBit
+ | EvalBeforeNestingBit;
+enum UpLoType {
+ Lower=0x1,
+ Upper=0x2,
+ UnitDiag=0x4,
+ ZeroDiag=0x8,
+ UnitLower=UnitDiag|Lower,
+ UnitUpper=UnitDiag|Upper,
+ StrictlyLower=ZeroDiag|Lower,
+ StrictlyUpper=ZeroDiag|Upper,
+ SelfAdjoint=0x10,
+ Symmetric=0x20
+};
+enum AlignmentType {
+ Unaligned=0,
+ Aligned8=8,
+ Aligned16=16,
+ Aligned32=32,
+ Aligned64=64,
+ Aligned128=128,
+ AlignedMask=255,
+ Aligned=16,
+#if EIGEN_MAX_ALIGN_BYTES==128
+ AlignedMax = Aligned128
+#elif EIGEN_MAX_ALIGN_BYTES==64
+ AlignedMax = Aligned64
+#elif EIGEN_MAX_ALIGN_BYTES==32
+ AlignedMax = Aligned32
+#elif EIGEN_MAX_ALIGN_BYTES==16
+ AlignedMax = Aligned16
+#elif EIGEN_MAX_ALIGN_BYTES==8
+ AlignedMax = Aligned8
+#elif EIGEN_MAX_ALIGN_BYTES==0
+ AlignedMax = Unaligned
+#else
+#error Invalid value for EIGEN_MAX_ALIGN_BYTES
+#endif
+};
+enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
+enum DirectionType {
+ Vertical,
+ Horizontal,
+ BothDirections
+};
+enum TraversalType {
+ DefaultTraversal,
+ LinearTraversal,
+ InnerVectorizedTraversal,
+ LinearVectorizedTraversal,
+ SliceVectorizedTraversal,
+ InvalidTraversal,
+ AllAtOnceTraversal
+};
+enum UnrollingType {
+ NoUnrolling,
+ InnerUnrolling,
+ CompleteUnrolling
+};
+enum SpecializedType {
+ Specialized,
+ BuiltIn
+};
+enum StorageOptions {
+ ColMajor = 0,
+ RowMajor = 0x1,
+ AutoAlign = 0,
+ DontAlign = 0x2
+};
+enum SideType {
+ OnTheLeft = 1,
+ OnTheRight = 2
+};
+enum NoChange_t { NoChange };
+enum Sequential_t { Sequential };
+enum Default_t { Default };
+enum AmbiVectorMode {
+ IsDense = 0,
+ IsSparse
+};
+enum AccessorLevels {
+ ReadOnlyAccessors,
+ WriteAccessors,
+ DirectAccessors,
+ DirectWriteAccessors
+};
+enum DecompositionOptions {
+ Pivoting = 0x01,
+ NoPivoting = 0x02,
+ ComputeFullU = 0x04,
+ ComputeThinU = 0x08,
+ ComputeFullV = 0x10,
+ ComputeThinV = 0x20,
+ EigenvaluesOnly = 0x40,
+ ComputeEigenvectors = 0x80,
+ EigVecMask = EigenvaluesOnly | ComputeEigenvectors,
+ Ax_lBx = 0x100,
+ ABx_lx = 0x200,
+ BAx_lx = 0x400,
+ GenEigMask = Ax_lBx | ABx_lx | BAx_lx
+};
+enum QRPreconditioners {
+ NoQRPreconditioner,
+ HouseholderQRPreconditioner,
+ ColPivHouseholderQRPreconditioner,
+ FullPivHouseholderQRPreconditioner
+};
+#ifdef Success
+#error The preprocessor symbol 'Success' is defined, possibly by the X11 header file X.h
+#endif
+enum ComputationInfo {
+ Success = 0,
+ NumericalIssue = 1,
+ NoConvergence = 2,
+ InvalidInput = 3
+};
+enum TransformTraits {
+ Isometry = 0x1,
+ Affine = 0x2,
+ AffineCompact = 0x10 | Affine,
+ Projective = 0x20
+};
+namespace Architecture
+{
+ enum Type {
+ Generic = 0x0,
+ SSE = 0x1,
+ AltiVec = 0x2,
+ VSX = 0x3,
+ NEON = 0x4,
+#if defined EIGEN_VECTORIZE_SSE
+ Target = SSE
+#elif defined EIGEN_VECTORIZE_ALTIVEC
+ Target = AltiVec
+#elif defined EIGEN_VECTORIZE_VSX
+ Target = VSX
+#elif defined EIGEN_VECTORIZE_NEON
+ Target = NEON
+#else
+ Target = Generic
+#endif
+ };
+}
+enum ProductImplType
+{ DefaultProduct=0, LazyProduct, AliasFreeProduct, CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct };
+enum Action {GetAction, SetAction};
+struct Dense {};
+struct Sparse {};
+struct SolverStorage {};
+struct PermutationStorage {};
+struct TranspositionsStorage {};
+struct MatrixXpr {};
+struct ArrayXpr {};
+struct DenseShape { static std::string debugName() { return "DenseShape"; } };
+struct SolverShape { static std::string debugName() { return "SolverShape"; } };
+struct HomogeneousShape { static std::string debugName() { return "HomogeneousShape"; } };
+struct DiagonalShape { static std::string debugName() { return "DiagonalShape"; } };
+struct BandShape { static std::string debugName() { return "BandShape"; } };
+struct TriangularShape { static std::string debugName() { return "TriangularShape"; } };
+struct SelfAdjointShape { static std::string debugName() { return "SelfAdjointShape"; } };
+struct PermutationShape { static std::string debugName() { return "PermutationShape"; } };
+struct TranspositionsShape { static std::string debugName() { return "TranspositionsShape"; } };
+struct SparseShape { static std::string debugName() { return "SparseShape"; } };
+namespace internal {
+struct IndexBased {};
+struct IteratorBased {};
+enum ComparisonName {
+ cmp_EQ = 0,
+ cmp_LT = 1,
+ cmp_LE = 2,
+ cmp_UNORD = 3,
+ cmp_NEQ = 4,
+ cmp_GT = 5,
+ cmp_GE = 6
+};
+}
+}
+#endif
+// end #include "src/Core/util/Constants.h"
+// #include "src/Core/util/Meta.h"
+#ifndef EIGEN_META_H
+#define EIGEN_META_H
+#if defined(__CUDA_ARCH__)
+#include <cfloat>
+#include <math_constants.h>
+#endif
+#if EIGEN_COMP_ICC>=1600 && __cplusplus >= 201103L
+#include <cstdint>
+#endif
+namespace Eigen {
+typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex;
+typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE Index;
+namespace internal {
+#if EIGEN_COMP_ICC>=1600 && __cplusplus >= 201103L
+typedef std::intptr_t IntPtr;
+typedef std::uintptr_t UIntPtr;
+#else
+typedef std::ptrdiff_t IntPtr;
+typedef std::size_t UIntPtr;
+#endif
+struct true_type { enum { value = 1 }; };
+struct false_type { enum { value = 0 }; };
+template<bool Condition, typename Then, typename Else>
+struct conditional { typedef Then type; };
+template<typename Then, typename Else>
+struct conditional <false, Then, Else> { typedef Else type; };
+template<typename T, typename U> struct is_same { enum { value = 0 }; };
+template<typename T> struct is_same<T,T> { enum { value = 1 }; };
+template<typename T> struct remove_reference { typedef T type; };
+template<typename T> struct remove_reference<T&> { typedef T type; };
+template<typename T> struct remove_pointer { typedef T type; };
+template<typename T> struct remove_pointer<T*> { typedef T type; };
+template<typename T> struct remove_pointer<T*const> { typedef T type; };
+template <class T> struct remove_const { typedef T type; };
+template <class T> struct remove_const<const T> { typedef T type; };
+template <class T> struct remove_const<const T[]> { typedef T type[]; };
+template <class T, unsigned int Size> struct remove_const<const T[Size]> { typedef T type[Size]; };
+template<typename T> struct remove_all { typedef T type; };
+template<typename T> struct remove_all<const T> { typedef typename remove_all<T>::type type; };
+template<typename T> struct remove_all<T const&> { typedef typename remove_all<T>::type type; };
+template<typename T> struct remove_all<T&> { typedef typename remove_all<T>::type type; };
+template<typename T> struct remove_all<T const*> { typedef typename remove_all<T>::type type; };
+template<typename T> struct remove_all<T*> { typedef typename remove_all<T>::type type; };
+template<typename T> struct is_arithmetic { enum { value = false }; };
+template<> struct is_arithmetic<float> { enum { value = true }; };
+template<> struct is_arithmetic<double> { enum { value = true }; };
+template<> struct is_arithmetic<long double> { enum { value = true }; };
+template<> struct is_arithmetic<bool> { enum { value = true }; };
+template<> struct is_arithmetic<char> { enum { value = true }; };
+template<> struct is_arithmetic<signed char> { enum { value = true }; };
+template<> struct is_arithmetic<unsigned char> { enum { value = true }; };
+template<> struct is_arithmetic<signed short> { enum { value = true }; };
+template<> struct is_arithmetic<unsigned short>{ enum { value = true }; };
+template<> struct is_arithmetic<signed int> { enum { value = true }; };
+template<> struct is_arithmetic<unsigned int> { enum { value = true }; };
+template<> struct is_arithmetic<signed long> { enum { value = true }; };
+template<> struct is_arithmetic<unsigned long> { enum { value = true }; };
+template<typename T> struct is_integral { enum { value = false }; };
+template<> struct is_integral<bool> { enum { value = true }; };
+template<> struct is_integral<char> { enum { value = true }; };
+template<> struct is_integral<signed char> { enum { value = true }; };
+template<> struct is_integral<unsigned char> { enum { value = true }; };
+template<> struct is_integral<signed short> { enum { value = true }; };
+template<> struct is_integral<unsigned short> { enum { value = true }; };
+template<> struct is_integral<signed int> { enum { value = true }; };
+template<> struct is_integral<unsigned int> { enum { value = true }; };
+template<> struct is_integral<signed long> { enum { value = true }; };
+template<> struct is_integral<unsigned long> { enum { value = true }; };
+template <typename T> struct add_const { typedef const T type; };
+template <typename T> struct add_const<T&> { typedef T& type; };
+template <typename T> struct is_const { enum { value = 0 }; };
+template <typename T> struct is_const<T const> { enum { value = 1 }; };
+template<typename T> struct add_const_on_value_type { typedef const T type; };
+template<typename T> struct add_const_on_value_type<T&> { typedef T const& type; };
+template<typename T> struct add_const_on_value_type<T*> { typedef T const* type; };
+template<typename T> struct add_const_on_value_type<T* const> { typedef T const* const type; };
+template<typename T> struct add_const_on_value_type<T const* const> { typedef T const* const type; };
+template<typename From, typename To>
+struct is_convertible_impl
+{
+private:
+ struct any_conversion
+ {
+ template <typename T> any_conversion(const volatile T&);
+ template <typename T> any_conversion(T&);
+ };
+ struct yes {int a[1];};
+ struct no {int a[2];};
+ static yes test(const To&, int);
+ static no test(any_conversion, ...);
+public:
+ static From ms_from;
+#ifdef __INTEL_COMPILER
+ #pragma warning push
+ #pragma warning ( disable : 2259 )
+#endif
+ enum { value = sizeof(test(ms_from, 0))==sizeof(yes) };
+#ifdef __INTEL_COMPILER
+ #pragma warning pop
+#endif
+};
+template<typename From, typename To>
+struct is_convertible
+{
+ enum { value = is_convertible_impl<typename remove_all<From>::type,
+ typename remove_all<To >::type>::value };
+};
+template<bool Condition, typename T=void> struct enable_if;
+template<typename T> struct enable_if<true,T>
+{ typedef T type; };
+#if defined(__CUDA_ARCH__)
+#if !defined(__FLT_EPSILON__)
+#define __FLT_EPSILON__ FLT_EPSILON
+#define __DBL_EPSILON__ DBL_EPSILON
+#endif
+namespace device {
+template<typename T> struct numeric_limits
+{
+ EIGEN_DEVICE_FUNC
+ static T epsilon() { return 0; }
+ static T (max)() { assert(false && "Highest not supported for this type"); }
+ static T (min)() { assert(false && "Lowest not supported for this type"); }
+ static T infinity() { assert(false && "Infinity not supported for this type"); }
+ static T quiet_NaN() { assert(false && "quiet_NaN not supported for this type"); }
+};
+template<> struct numeric_limits<float>
+{
+ EIGEN_DEVICE_FUNC
+ static float epsilon() { return __FLT_EPSILON__; }
+ EIGEN_DEVICE_FUNC
+ static float (max)() { return CUDART_MAX_NORMAL_F; }
+ EIGEN_DEVICE_FUNC
+ static float (min)() { return FLT_MIN; }
+ EIGEN_DEVICE_FUNC
+ static float infinity() { return CUDART_INF_F; }
+ EIGEN_DEVICE_FUNC
+ static float quiet_NaN() { return CUDART_NAN_F; }
+};
+template<> struct numeric_limits<double>
+{
+ EIGEN_DEVICE_FUNC
+ static double epsilon() { return __DBL_EPSILON__; }
+ EIGEN_DEVICE_FUNC
+ static double (max)() { return DBL_MAX; }
+ EIGEN_DEVICE_FUNC
+ static double (min)() { return DBL_MIN; }
+ EIGEN_DEVICE_FUNC
+ static double infinity() { return CUDART_INF; }
+ EIGEN_DEVICE_FUNC
+ static double quiet_NaN() { return CUDART_NAN; }
+};
+template<> struct numeric_limits<int>
+{
+ EIGEN_DEVICE_FUNC
+ static int epsilon() { return 0; }
+ EIGEN_DEVICE_FUNC
+ static int (max)() { return INT_MAX; }
+ EIGEN_DEVICE_FUNC
+ static int (min)() { return INT_MIN; }
+};
+template<> struct numeric_limits<unsigned int>
+{
+ EIGEN_DEVICE_FUNC
+ static unsigned int epsilon() { return 0; }
+ EIGEN_DEVICE_FUNC
+ static unsigned int (max)() { return UINT_MAX; }
+ EIGEN_DEVICE_FUNC
+ static unsigned int (min)() { return 0; }
+};
+template<> struct numeric_limits<long>
+{
+ EIGEN_DEVICE_FUNC
+ static long epsilon() { return 0; }
+ EIGEN_DEVICE_FUNC
+ static long (max)() { return LONG_MAX; }
+ EIGEN_DEVICE_FUNC
+ static long (min)() { return LONG_MIN; }
+};
+template<> struct numeric_limits<unsigned long>
+{
+ EIGEN_DEVICE_FUNC
+ static unsigned long epsilon() { return 0; }
+ EIGEN_DEVICE_FUNC
+ static unsigned long (max)() { return ULONG_MAX; }
+ EIGEN_DEVICE_FUNC
+ static unsigned long (min)() { return 0; }
+};
+template<> struct numeric_limits<long long>
+{
+ EIGEN_DEVICE_FUNC
+ static long long epsilon() { return 0; }
+ EIGEN_DEVICE_FUNC
+ static long long (max)() { return LLONG_MAX; }
+ EIGEN_DEVICE_FUNC
+ static long long (min)() { return LLONG_MIN; }
+};
+template<> struct numeric_limits<unsigned long long>
+{
+ EIGEN_DEVICE_FUNC
+ static unsigned long long epsilon() { return 0; }
+ EIGEN_DEVICE_FUNC
+ static unsigned long long (max)() { return ULLONG_MAX; }
+ EIGEN_DEVICE_FUNC
+ static unsigned long long (min)() { return 0; }
+};
+}
+#endif
+class noncopyable
+{
+ EIGEN_DEVICE_FUNC noncopyable(const noncopyable&);
+ EIGEN_DEVICE_FUNC const noncopyable& operator=(const noncopyable&);
+protected:
+ EIGEN_DEVICE_FUNC noncopyable() {}
+ EIGEN_DEVICE_FUNC ~noncopyable() {}
+};
+#if EIGEN_HAS_STD_RESULT_OF
+template<typename T> struct result_of {
+ typedef typename std::result_of<T>::type type1;
+ typedef typename remove_all<type1>::type type;
+};
+#else
+template<typename T> struct result_of { };
+struct has_none {int a[1];};
+struct has_std_result_type {int a[2];};
+struct has_tr1_result {int a[3];};
+template<typename Func, typename ArgType, int SizeOf=sizeof(has_none)>
+struct unary_result_of_select {typedef typename internal::remove_all<ArgType>::type type;};
+template<typename Func, typename ArgType>
+struct unary_result_of_select<Func, ArgType, sizeof(has_std_result_type)> {typedef typename Func::result_type type;};
+template<typename Func, typename ArgType>
+struct unary_result_of_select<Func, ArgType, sizeof(has_tr1_result)> {typedef typename Func::template result<Func(ArgType)>::type type;};
+template<typename Func, typename ArgType>
+struct result_of<Func(ArgType)> {
+ template<typename T>
+ static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
+ template<typename T>
+ static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType)>::type const * = 0);
+ static has_none testFunctor(...);
+ enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
+ typedef typename unary_result_of_select<Func, ArgType, FunctorType>::type type;
+};
+template<typename Func, typename ArgType0, typename ArgType1, int SizeOf=sizeof(has_none)>
+struct binary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;};
+template<typename Func, typename ArgType0, typename ArgType1>
+struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_std_result_type)>
+{typedef typename Func::result_type type;};
+template<typename Func, typename ArgType0, typename ArgType1>
+struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_tr1_result)>
+{typedef typename Func::template result<Func(ArgType0,ArgType1)>::type type;};
+template<typename Func, typename ArgType0, typename ArgType1>
+struct result_of<Func(ArgType0,ArgType1)> {
+ template<typename T>
+ static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
+ template<typename T>
+ static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1)>::type const * = 0);
+ static has_none testFunctor(...);
+ enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
+ typedef typename binary_result_of_select<Func, ArgType0, ArgType1, FunctorType>::type type;
+};
+template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2, int SizeOf=sizeof(has_none)>
+struct ternary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;};
+template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
+struct ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, sizeof(has_std_result_type)>
+{typedef typename Func::result_type type;};
+template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
+struct ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, sizeof(has_tr1_result)>
+{typedef typename Func::template result<Func(ArgType0,ArgType1,ArgType2)>::type type;};
+template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
+struct result_of<Func(ArgType0,ArgType1,ArgType2)> {
+ template<typename T>
+ static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
+ template<typename T>
+ static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1,ArgType2)>::type const * = 0);
+ static has_none testFunctor(...);
+ enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
+ typedef typename ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, FunctorType>::type type;
+};
+#endif
+struct meta_yes { char a[1]; };
+struct meta_no { char a[2]; };
+template <typename T>
+struct has_ReturnType
+{
+ template <typename C> static meta_yes testFunctor(typename C::ReturnType const *);
+ template <typename C> static meta_no testFunctor(...);
+ enum { value = sizeof(testFunctor<T>(0)) == sizeof(meta_yes) };
+};
+template<typename T> const T* return_ptr();
+template <typename T, typename IndexType=Index>
+struct has_nullary_operator
+{
+ template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()())>0)>::type * = 0);
+ static meta_no testFunctor(...);
+ enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
+};
+template <typename T, typename IndexType=Index>
+struct has_unary_operator
+{
+ template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0)))>0)>::type * = 0);
+ static meta_no testFunctor(...);
+ enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
+};
+template <typename T, typename IndexType=Index>
+struct has_binary_operator
+{
+ template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0),IndexType(0)))>0)>::type * = 0);
+ static meta_no testFunctor(...);
+ enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
+};
+template<int Y,
+ int InfX = 0,
+ int SupX = ((Y==1) ? 1 : Y/2),
+ bool Done = ((SupX-InfX)<=1 ? true : ((SupX*SupX <= Y) && ((SupX+1)*(SupX+1) > Y))) >
+class meta_sqrt
+{
+ enum {
+ MidX = (InfX+SupX)/2,
+ TakeInf = MidX*MidX > Y ? 1 : 0,
+ NewInf = int(TakeInf) ? InfX : int(MidX),
+ NewSup = int(TakeInf) ? int(MidX) : SupX
+ };
+ public:
+ enum { ret = meta_sqrt<Y,NewInf,NewSup>::ret };
+};
+template<int Y, int InfX, int SupX>
+class meta_sqrt<Y, InfX, SupX, true> { public: enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; };
+template<int A, int B, int K=1, bool Done = ((A*K)%B)==0>
+struct meta_least_common_multiple
+{
+ enum { ret = meta_least_common_multiple<A,B,K+1>::ret };
+};
+template<int A, int B, int K>
+struct meta_least_common_multiple<A,B,K,true>
+{
+ enum { ret = A*K };
+};
+template<typename T, typename U> struct scalar_product_traits
+{
+ enum { Defined = 0 };
+};
+}
+namespace numext {
+#if defined(__CUDA_ARCH__)
+template<typename T> EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b = a; a = tmp; }
+#else
+template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); }
+#endif
+#if defined(__CUDA_ARCH__)
+using internal::device::numeric_limits;
+#else
+using std::numeric_limits;
+#endif
+template<typename T>
+T div_ceil(const T &a, const T &b)
+{
+ return (a+b-1) / b;
+}
+}
+}
+#endif
+// end #include "src/Core/util/Meta.h"
+// #include "src/Core/util/ForwardDeclarations.h"
+#ifndef EIGEN_FORWARDDECLARATIONS_H
+#define EIGEN_FORWARDDECLARATIONS_H
+namespace Eigen {
+namespace internal {
+template<typename T> struct traits;
+template<typename T> struct traits<const T> : traits<T> {};
+template<typename Derived> struct has_direct_access
+{
+ enum { ret = (traits<Derived>::Flags & DirectAccessBit) ? 1 : 0 };
+};
+template<typename Derived> struct accessors_level
+{
+ enum { has_direct_access = (traits<Derived>::Flags & DirectAccessBit) ? 1 : 0,
+ has_write_access = (traits<Derived>::Flags & LvalueBit) ? 1 : 0,
+ value = has_direct_access ? (has_write_access ? DirectWriteAccessors : DirectAccessors)
+ : (has_write_access ? WriteAccessors : ReadOnlyAccessors)
+ };
+};
+template<typename T> struct evaluator_traits;
+template< typename T> struct evaluator;
+}
+template<typename T> struct NumTraits;
+template<typename Derived> struct EigenBase;
+template<typename Derived> class DenseBase;
+template<typename Derived> class PlainObjectBase;
+template<typename Derived,
+ int Level = internal::accessors_level<Derived>::value >
+class DenseCoeffsBase;
+template<typename _Scalar, int _Rows, int _Cols,
+ int _Options = AutoAlign |
+#if EIGEN_GNUC_AT(3,4)
+ ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
+ : !(_Cols==1 && _Rows!=1) ? EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION
+ : Eigen::ColMajor ),
+#else
+ ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
+ : (_Cols==1 && _Rows!=1) ? Eigen::ColMajor
+ : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
+#endif
+ int _MaxRows = _Rows,
+ int _MaxCols = _Cols
+> class Matrix;
+template<typename Derived> class MatrixBase;
+template<typename Derived> class ArrayBase;
+template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged;
+template<typename ExpressionType, template <typename> class StorageBase > class NoAlias;
+template<typename ExpressionType> class NestByValue;
+template<typename ExpressionType> class ForceAlignedAccess;
+template<typename ExpressionType> class SwapWrapper;
+template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false> class Block;
+template<typename MatrixType, int Size=Dynamic> class VectorBlock;
+template<typename MatrixType> class Transpose;
+template<typename MatrixType> class Conjugate;
+template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
+template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp;
+template<typename ViewOp, typename MatrixType> class CwiseUnaryView;
+template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
+template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3> class CwiseTernaryOp;
+template<typename Decomposition, typename Rhstype> class Solve;
+template<typename XprType> class Inverse;
+template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product;
+template<typename Derived> class DiagonalBase;
+template<typename _DiagonalVectorType> class DiagonalWrapper;
+template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeAtCompileTime> class DiagonalMatrix;
+template<typename MatrixType, typename DiagonalType, int ProductOrder> class DiagonalProduct;
+template<typename MatrixType, int Index = 0> class Diagonal;
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime, typename IndexType=int> class PermutationMatrix;
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime, typename IndexType=int> class Transpositions;
+template<typename Derived> class PermutationBase;
+template<typename Derived> class TranspositionsBase;
+template<typename _IndicesType> class PermutationWrapper;
+template<typename _IndicesType> class TranspositionsWrapper;
+template<typename Derived,
+ int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors
+> class MapBase;
+template<int InnerStrideAtCompileTime, int OuterStrideAtCompileTime> class Stride;
+template<int Value = Dynamic> class InnerStride;
+template<int Value = Dynamic> class OuterStride;
+template<typename MatrixType, int MapOptions=Unaligned, typename StrideType = Stride<0,0> > class Map;
+template<typename Derived> class RefBase;
+template<typename PlainObjectType, int Options = 0,
+ typename StrideType = typename internal::conditional<PlainObjectType::IsVectorAtCompileTime,InnerStride<1>,OuterStride<> >::type > class Ref;
+template<typename Derived> class TriangularBase;
+template<typename MatrixType, unsigned int Mode> class TriangularView;
+template<typename MatrixType, unsigned int Mode> class SelfAdjointView;
+template<typename MatrixType> class SparseView;
+template<typename ExpressionType> class WithFormat;
+template<typename MatrixType> struct CommaInitializer;
+template<typename Derived> class ReturnByValue;
+template<typename ExpressionType> class ArrayWrapper;
+template<typename Derived> class SolverBase;
+template<typename XprType> class InnerIterator;
+namespace internal {
+template<typename DecompositionType> struct kernel_retval_base;
+template<typename DecompositionType> struct kernel_retval;
+template<typename DecompositionType> struct image_retval_base;
+template<typename DecompositionType> struct image_retval;
+}
+namespace internal {
+template<typename _Scalar, int Rows=Dynamic, int Cols=Dynamic, int Supers=Dynamic, int Subs=Dynamic, int Options=0> class BandMatrix;
+}
+namespace internal {
+template<typename Lhs, typename Rhs> struct product_type;
+template<bool> struct EnableIf;
+template< typename T,
+ int ProductTag = internal::product_type<typename T::Lhs,typename T::Rhs>::ret,
+ typename LhsShape = typename evaluator_traits<typename T::Lhs>::Shape,
+ typename RhsShape = typename evaluator_traits<typename T::Rhs>::Shape,
+ typename LhsScalar = typename traits<typename T::Lhs>::Scalar,
+ typename RhsScalar = typename traits<typename T::Rhs>::Scalar
+ > struct product_evaluator;
+}
+template<typename Lhs, typename Rhs,
+ int ProductType = internal::product_type<Lhs,Rhs>::value>
+struct ProductReturnType;
+template<typename Lhs, typename Rhs> struct LazyProductReturnType;
+namespace internal {
+template<typename LhsScalar, typename RhsScalar, bool ConjLhs=false, bool ConjRhs=false> struct conj_helper;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_sum_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_difference_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_conj_product_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_min_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_max_op;
+template<typename Scalar> struct scalar_opposite_op;
+template<typename Scalar> struct scalar_conjugate_op;
+template<typename Scalar> struct scalar_real_op;
+template<typename Scalar> struct scalar_imag_op;
+template<typename Scalar> struct scalar_abs_op;
+template<typename Scalar> struct scalar_abs2_op;
+template<typename Scalar> struct scalar_sqrt_op;
+template<typename Scalar> struct scalar_rsqrt_op;
+template<typename Scalar> struct scalar_exp_op;
+template<typename Scalar> struct scalar_log_op;
+template<typename Scalar> struct scalar_cos_op;
+template<typename Scalar> struct scalar_sin_op;
+template<typename Scalar> struct scalar_acos_op;
+template<typename Scalar> struct scalar_asin_op;
+template<typename Scalar> struct scalar_tan_op;
+template<typename Scalar> struct scalar_inverse_op;
+template<typename Scalar> struct scalar_square_op;
+template<typename Scalar> struct scalar_cube_op;
+template<typename Scalar, typename NewType> struct scalar_cast_op;
+template<typename Scalar> struct scalar_random_op;
+template<typename Scalar> struct scalar_constant_op;
+template<typename Scalar> struct scalar_identity_op;
+template<typename Scalar,bool iscpx> struct scalar_sign_op;
+template<typename Scalar,typename ScalarExponent> struct scalar_pow_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_hypot_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_quotient_op;
+template<typename Scalar> struct scalar_lgamma_op;
+template<typename Scalar> struct scalar_digamma_op;
+template<typename Scalar> struct scalar_erf_op;
+template<typename Scalar> struct scalar_erfc_op;
+template<typename Scalar> struct scalar_igamma_op;
+template<typename Scalar> struct scalar_igammac_op;
+template<typename Scalar> struct scalar_zeta_op;
+template<typename Scalar> struct scalar_betainc_op;
+}
+struct IOFormat;
+template<typename _Scalar, int _Rows, int _Cols,
+ int _Options = AutoAlign |
+#if EIGEN_GNUC_AT(3,4)
+ ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
+ : !(_Cols==1 && _Rows!=1) ? EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION
+ : Eigen::ColMajor ),
+#else
+ ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
+ : (_Cols==1 && _Rows!=1) ? Eigen::ColMajor
+ : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
+#endif
+ int _MaxRows = _Rows, int _MaxCols = _Cols> class Array;
+template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType> class Select;
+template<typename MatrixType, typename BinaryOp, int Direction> class PartialReduxExpr;
+template<typename ExpressionType, int Direction> class VectorwiseOp;
+template<typename MatrixType,int RowFactor,int ColFactor> class Replicate;
+template<typename MatrixType, int Direction = BothDirections> class Reverse;
+template<typename MatrixType> class FullPivLU;
+template<typename MatrixType> class PartialPivLU;
+namespace internal {
+template<typename MatrixType> struct inverse_impl;
+}
+template<typename MatrixType> class HouseholderQR;
+template<typename MatrixType> class ColPivHouseholderQR;
+template<typename MatrixType> class FullPivHouseholderQR;
+template<typename MatrixType> class CompleteOrthogonalDecomposition;
+template<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD;
+template<typename MatrixType> class BDCSVD;
+template<typename MatrixType, int UpLo = Lower> class LLT;
+template<typename MatrixType, int UpLo = Lower> class LDLT;
+template<typename VectorsType, typename CoeffsType, int Side=OnTheLeft> class HouseholderSequence;
+template<typename Scalar> class JacobiRotation;
+template<typename Derived, int _Dim> class RotationBase;
+template<typename Lhs, typename Rhs> class Cross;
+template<typename Derived> class QuaternionBase;
+template<typename Scalar> class Rotation2D;
+template<typename Scalar> class AngleAxis;
+template<typename Scalar,int Dim> class Translation;
+template<typename Scalar,int Dim> class AlignedBox;
+template<typename Scalar, int Options = AutoAlign> class Quaternion;
+template<typename Scalar,int Dim,int Mode,int _Options=AutoAlign> class Transform;
+template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class ParametrizedLine;
+template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class Hyperplane;
+template<typename Scalar> class UniformScaling;
+template<typename MatrixType,int Direction> class Homogeneous;
+template<typename Derived> class SparseMatrixBase;
+template<typename Derived> struct MatrixExponentialReturnValue;
+template<typename Derived> class MatrixFunctionReturnValue;
+template<typename Derived> class MatrixSquareRootReturnValue;
+template<typename Derived> class MatrixLogarithmReturnValue;
+template<typename Derived> class MatrixPowerReturnValue;
+template<typename Derived> class MatrixComplexPowerReturnValue;
+namespace internal {
+template <typename Scalar>
+struct stem_function
+{
+ typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
+ typedef ComplexScalar type(ComplexScalar, int);
+};
+}
+}
+#endif
+// end #include "src/Core/util/ForwardDeclarations.h"
+// #include "src/Core/util/StaticAssert.h"
+#ifndef EIGEN_STATIC_ASSERT_H
+#define EIGEN_STATIC_ASSERT_H
+#ifndef EIGEN_NO_STATIC_ASSERT
+ #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600))
+ #define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG);
+ #else
+ namespace Eigen {
+ namespace internal {
+ template<bool condition>
+ struct static_assertion {};
+ template<>
+ struct static_assertion<true>
+ {
+ enum {
+ YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX,
+ YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES,
+ YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES,
+ THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE,
+ THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE,
+ THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE,
+ OUT_OF_RANGE_ACCESS,
+ YOU_MADE_A_PROGRAMMING_MISTAKE,
+ EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT,
+ EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE,
+ YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR,
+ YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR,
+ UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC,
+ THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES,
+ FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED,
+ NUMERIC_TYPE_MUST_BE_REAL,
+ COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED,
+ WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED,
+ THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE,
+ INVALID_MATRIX_PRODUCT,
+ INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS,
+ INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION,
+ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY,
+ THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES,
+ THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES,
+ INVALID_MATRIX_TEMPLATE_PARAMETERS,
+ INVALID_MATRIXBASE_TEMPLATE_PARAMETERS,
+ BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER,
+ THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX,
+ THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE,
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES,
+ YOU_ALREADY_SPECIFIED_THIS_STRIDE,
+ INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION,
+ THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD,
+ PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1,
+ THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS,
+ YOU_CANNOT_MIX_ARRAYS_AND_MATRICES,
+ YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION,
+ THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY,
+ YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT,
+ THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS,
+ THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS,
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL,
+ THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES,
+ YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED,
+ YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED,
+ THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE,
+ THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
+ OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG,
+ IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY,
+ STORAGE_LAYOUT_DOES_NOT_MATCH,
+ EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE,
+ THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS,
+ MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY,
+ THIS_TYPE_IS_NOT_SUPPORTED,
+ STORAGE_KIND_MUST_MATCH,
+ STORAGE_INDEX_MUST_MATCH,
+ CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY
+ };
+ };
+ }
+ }
+ #if EIGEN_COMP_MSVC
+ #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
+ {Eigen::internal::static_assertion<bool(CONDITION)>::MSG;}
+ #else
+ #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
+ if (Eigen::internal::static_assertion<static_cast<bool>(CONDITION)>::MSG) {}
+ #endif
+ #endif
+#else
+ #define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG);
+#endif
+#define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \
+ EIGEN_STATIC_ASSERT(TYPE::IsVectorAtCompileTime, \
+ YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX)
+#define EIGEN_STATIC_ASSERT_FIXED_SIZE(TYPE) \
+ EIGEN_STATIC_ASSERT(TYPE::SizeAtCompileTime!=Eigen::Dynamic, \
+ YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR)
+#define EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(TYPE) \
+ EIGEN_STATIC_ASSERT(TYPE::SizeAtCompileTime==Eigen::Dynamic, \
+ YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR)
+#define EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(TYPE, SIZE) \
+ EIGEN_STATIC_ASSERT(TYPE::IsVectorAtCompileTime && TYPE::SizeAtCompileTime==SIZE, \
+ THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE)
+#define EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(TYPE, ROWS, COLS) \
+ EIGEN_STATIC_ASSERT(TYPE::RowsAtCompileTime==ROWS && TYPE::ColsAtCompileTime==COLS, \
+ THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE)
+#define EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(TYPE0,TYPE1) \
+ EIGEN_STATIC_ASSERT( \
+ (int(TYPE0::SizeAtCompileTime)==Eigen::Dynamic \
+ || int(TYPE1::SizeAtCompileTime)==Eigen::Dynamic \
+ || int(TYPE0::SizeAtCompileTime)==int(TYPE1::SizeAtCompileTime)),\
+ YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES)
+#define EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1) \
+ ( \
+ (int(Eigen::internal::size_of_xpr_at_compile_time<TYPE0>::ret)==0 && int(Eigen::internal::size_of_xpr_at_compile_time<TYPE1>::ret)==0) \
+ || (\
+ (int(TYPE0::RowsAtCompileTime)==Eigen::Dynamic \
+ || int(TYPE1::RowsAtCompileTime)==Eigen::Dynamic \
+ || int(TYPE0::RowsAtCompileTime)==int(TYPE1::RowsAtCompileTime)) \
+ && (int(TYPE0::ColsAtCompileTime)==Eigen::Dynamic \
+ || int(TYPE1::ColsAtCompileTime)==Eigen::Dynamic \
+ || int(TYPE0::ColsAtCompileTime)==int(TYPE1::ColsAtCompileTime))\
+ ) \
+ )
+#define EIGEN_STATIC_ASSERT_NON_INTEGER(TYPE) \
+ EIGEN_STATIC_ASSERT(!NumTraits<TYPE>::IsInteger, THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES)
+#define EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(TYPE0,TYPE1) \
+ EIGEN_STATIC_ASSERT( \
+ EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1),\
+ YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES)
+#define EIGEN_STATIC_ASSERT_SIZE_1x1(TYPE) \
+ EIGEN_STATIC_ASSERT((TYPE::RowsAtCompileTime == 1 || TYPE::RowsAtCompileTime == Dynamic) && \
+ (TYPE::ColsAtCompileTime == 1 || TYPE::ColsAtCompileTime == Dynamic), \
+ THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS)
+#define EIGEN_STATIC_ASSERT_LVALUE(Derived) \
+ EIGEN_STATIC_ASSERT(Eigen::internal::is_lvalue<Derived>::value, \
+ THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY)
+#define EIGEN_STATIC_ASSERT_ARRAYXPR(Derived) \
+ EIGEN_STATIC_ASSERT((Eigen::internal::is_same<typename Eigen::internal::traits<Derived>::XprKind, ArrayXpr>::value), \
+ THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES)
+#define EIGEN_STATIC_ASSERT_SAME_XPR_KIND(Derived1, Derived2) \
+ EIGEN_STATIC_ASSERT((Eigen::internal::is_same<typename Eigen::internal::traits<Derived1>::XprKind, \
+ typename Eigen::internal::traits<Derived2>::XprKind \
+ >::value), \
+ YOU_CANNOT_MIX_ARRAYS_AND_MATRICES)
+#define EIGEN_INTERNAL_CHECK_COST_VALUE(C) \
+ EIGEN_STATIC_ASSERT((C)>=0 && (C)<=HugeCost*HugeCost, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE);
+#endif
+// end #include "src/Core/util/StaticAssert.h"
+// #include "src/Core/util/XprHelper.h"
+#ifndef EIGEN_XPRHELPER_H
+#define EIGEN_XPRHELPER_H
+#if EIGEN_COMP_GNUC && !EIGEN_GNUC_AT(4,3)
+ #define EIGEN_EMPTY_STRUCT_CTOR(X) \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X() {} \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X(const X& ) {}
+#else
+ #define EIGEN_EMPTY_STRUCT_CTOR(X)
+#endif
+namespace Eigen {
+namespace internal {
+template<typename IndexDest, typename IndexSrc>
+EIGEN_DEVICE_FUNC
+inline IndexDest convert_index(const IndexSrc& idx) {
+ eigen_internal_assert(idx <= NumTraits<IndexDest>::highest() && "Index value to big for target type");
+ return IndexDest(idx);
+}
+template<typename ExprScalar,typename T, bool IsSupported>
+struct promote_scalar_arg;
+template<typename S,typename T>
+struct promote_scalar_arg<S,T,true>
+{
+ typedef T type;
+};
+template<typename ExprScalar,typename T,typename PromotedType,
+ bool ConvertibleToLiteral = internal::is_convertible<T,PromotedType>::value,
+ bool IsSafe = NumTraits<T>::IsInteger || !NumTraits<PromotedType>::IsInteger>
+struct promote_scalar_arg_unsupported;
+template<typename S,typename T>
+struct promote_scalar_arg<S,T,false> : promote_scalar_arg_unsupported<S,T,typename NumTraits<S>::Literal> {};
+template<typename S,typename T, typename PromotedType>
+struct promote_scalar_arg_unsupported<S,T,PromotedType,true,true>
+{
+ typedef PromotedType type;
+};
+template<typename ExprScalar,typename T, typename PromotedType>
+struct promote_scalar_arg_unsupported<ExprScalar,T,PromotedType,false,true>
+ : promote_scalar_arg_unsupported<ExprScalar,T,ExprScalar>
+{};
+template<typename S,typename T, typename PromotedType, bool ConvertibleToLiteral>
+struct promote_scalar_arg_unsupported<S,T,PromotedType,ConvertibleToLiteral,false> {};
+template<typename S,typename T>
+struct promote_scalar_arg_unsupported<S,T,S,false,true> {};
+class no_assignment_operator
+{
+ private:
+ no_assignment_operator& operator=(const no_assignment_operator&);
+};
+template<typename I1, typename I2>
+struct promote_index_type
+{
+ typedef typename conditional<(sizeof(I1)<sizeof(I2)), I2, I1>::type type;
+};
+template<typename T, int Value> class variable_if_dynamic
+{
+ public:
+ EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamic)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {}
+};
+template<typename T> class variable_if_dynamic<T, Dynamic>
+{
+ T m_value;
+ EIGEN_DEVICE_FUNC variable_if_dynamic() { eigen_assert(false); }
+ public:
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value) : m_value(value) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T value() const { return m_value; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; }
+};
+template<typename T, int Value> class variable_if_dynamicindex
+{
+ public:
+ EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamicindex)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {}
+};
+template<typename T> class variable_if_dynamicindex<T, DynamicIndex>
+{
+ T m_value;
+ EIGEN_DEVICE_FUNC variable_if_dynamicindex() { eigen_assert(false); }
+ public:
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T value) : m_value(value) {}
+ EIGEN_DEVICE_FUNC T EIGEN_STRONG_INLINE value() const { return m_value; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; }
+};
+template<typename T> struct functor_traits
+{
+ enum
+ {
+ Cost = 10,
+ PacketAccess = false,
+ IsRepeatable = false
+ };
+};
+template<typename T> struct packet_traits;
+template<typename T> struct unpacket_traits
+{
+ typedef T type;
+ typedef T half;
+ enum
+ {
+ size = 1,
+ alignment = 1
+ };
+};
+template<int Size, typename PacketType,
+ bool Stop = Size==Dynamic || (Size%unpacket_traits<PacketType>::size)==0 || is_same<PacketType,typename unpacket_traits<PacketType>::half>::value>
+struct find_best_packet_helper;
+template< int Size, typename PacketType>
+struct find_best_packet_helper<Size,PacketType,true>
+{
+ typedef PacketType type;
+};
+template<int Size, typename PacketType>
+struct find_best_packet_helper<Size,PacketType,false>
+{
+ typedef typename find_best_packet_helper<Size,typename unpacket_traits<PacketType>::half>::type type;
+};
+template<typename T, int Size>
+struct find_best_packet
+{
+ typedef typename find_best_packet_helper<Size,typename packet_traits<T>::type>::type type;
+};
+#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
+template<int ArrayBytes, int AlignmentBytes,
+ bool Match = bool((ArrayBytes%AlignmentBytes)==0),
+ bool TryHalf = bool(EIGEN_MIN_ALIGN_BYTES<AlignmentBytes) >
+struct compute_default_alignment_helper
+{
+ enum { value = 0 };
+};
+template<int ArrayBytes, int AlignmentBytes, bool TryHalf>
+struct compute_default_alignment_helper<ArrayBytes, AlignmentBytes, true, TryHalf>
+{
+ enum { value = AlignmentBytes };
+};
+template<int ArrayBytes, int AlignmentBytes>
+struct compute_default_alignment_helper<ArrayBytes, AlignmentBytes, false, true>
+{
+ enum { value = compute_default_alignment_helper<ArrayBytes, AlignmentBytes/2>::value };
+};
+#else
+template<int ArrayBytes, int AlignmentBytes>
+struct compute_default_alignment_helper
+{
+ enum { value = 0 };
+};
+#endif
+template<typename T, int Size> struct compute_default_alignment {
+ enum { value = compute_default_alignment_helper<Size*sizeof(T),EIGEN_MAX_STATIC_ALIGN_BYTES>::value };
+};
+template<typename T> struct compute_default_alignment<T,Dynamic> {
+ enum { value = EIGEN_MAX_ALIGN_BYTES };
+};
+template<typename _Scalar, int _Rows, int _Cols,
+ int _Options = AutoAlign |
+ ( (_Rows==1 && _Cols!=1) ? RowMajor
+ : (_Cols==1 && _Rows!=1) ? ColMajor
+ : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
+ int _MaxRows = _Rows,
+ int _MaxCols = _Cols
+> class make_proper_matrix_type
+{
+ enum {
+ IsColVector = _Cols==1 && _Rows!=1,
+ IsRowVector = _Rows==1 && _Cols!=1,
+ Options = IsColVector ? (_Options | ColMajor) & ~RowMajor
+ : IsRowVector ? (_Options | RowMajor) & ~ColMajor
+ : _Options
+ };
+ public:
+ typedef Matrix<_Scalar, _Rows, _Cols, Options, _MaxRows, _MaxCols> type;
+};
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+class compute_matrix_flags
+{
+ enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0 };
+ public:
+ enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit };
+};
+template<int _Rows, int _Cols> struct size_at_compile_time
+{
+ enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };
+};
+template<typename XprType> struct size_of_xpr_at_compile_time
+{
+ enum { ret = size_at_compile_time<traits<XprType>::RowsAtCompileTime,traits<XprType>::ColsAtCompileTime>::ret };
+};
+template<typename T, typename StorageKind = typename traits<T>::StorageKind> struct plain_matrix_type;
+template<typename T, typename BaseClassType, int Flags> struct plain_matrix_type_dense;
+template<typename T> struct plain_matrix_type<T,Dense>
+{
+ typedef typename plain_matrix_type_dense<T,typename traits<T>::XprKind, traits<T>::Flags>::type type;
+};
+template<typename T> struct plain_matrix_type<T,DiagonalShape>
+{
+ typedef typename T::PlainObject type;
+};
+template<typename T, int Flags> struct plain_matrix_type_dense<T,MatrixXpr,Flags>
+{
+ typedef Matrix<typename traits<T>::Scalar,
+ traits<T>::RowsAtCompileTime,
+ traits<T>::ColsAtCompileTime,
+ AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor),
+ traits<T>::MaxRowsAtCompileTime,
+ traits<T>::MaxColsAtCompileTime
+ > type;
+};
+template<typename T, int Flags> struct plain_matrix_type_dense<T,ArrayXpr,Flags>
+{
+ typedef Array<typename traits<T>::Scalar,
+ traits<T>::RowsAtCompileTime,
+ traits<T>::ColsAtCompileTime,
+ AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor),
+ traits<T>::MaxRowsAtCompileTime,
+ traits<T>::MaxColsAtCompileTime
+ > type;
+};
+template<typename T, typename StorageKind = typename traits<T>::StorageKind> struct eval;
+template<typename T> struct eval<T,Dense>
+{
+ typedef typename plain_matrix_type<T>::type type;
+};
+template<typename T> struct eval<T,DiagonalShape>
+{
+ typedef typename plain_matrix_type<T>::type type;
+};
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct eval<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense>
+{
+ typedef const Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>& type;
+};
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct eval<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense>
+{
+ typedef const Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>& type;
+};
+template<typename T, typename StorageKind = typename traits<T>::StorageKind> struct plain_object_eval;
+template<typename T>
+struct plain_object_eval<T,Dense>
+{
+ typedef typename plain_matrix_type_dense<T,typename traits<T>::XprKind, evaluator<T>::Flags>::type type;
+};
+template<typename T> struct plain_matrix_type_column_major
+{
+ enum { Rows = traits<T>::RowsAtCompileTime,
+ Cols = traits<T>::ColsAtCompileTime,
+ MaxRows = traits<T>::MaxRowsAtCompileTime,
+ MaxCols = traits<T>::MaxColsAtCompileTime
+ };
+ typedef Matrix<typename traits<T>::Scalar,
+ Rows,
+ Cols,
+ (MaxRows==1&&MaxCols!=1) ? RowMajor : ColMajor,
+ MaxRows,
+ MaxCols
+ > type;
+};
+template<typename T> struct plain_matrix_type_row_major
+{
+ enum { Rows = traits<T>::RowsAtCompileTime,
+ Cols = traits<T>::ColsAtCompileTime,
+ MaxRows = traits<T>::MaxRowsAtCompileTime,
+ MaxCols = traits<T>::MaxColsAtCompileTime
+ };
+ typedef Matrix<typename traits<T>::Scalar,
+ Rows,
+ Cols,
+ (MaxCols==1&&MaxRows!=1) ? RowMajor : ColMajor,
+ MaxRows,
+ MaxCols
+ > type;
+};
+template <typename T>
+struct ref_selector
+{
+ typedef typename conditional<
+ bool(traits<T>::Flags & NestByRefBit),
+ T const&,
+ const T
+ >::type type;
+ typedef typename conditional<
+ bool(traits<T>::Flags & NestByRefBit),
+ T &,
+ T
+ >::type non_const_type;
+};
+template<typename T1, typename T2>
+struct transfer_constness
+{
+ typedef typename conditional<
+ bool(internal::is_const<T1>::value),
+ typename internal::add_const_on_value_type<T2>::type,
+ T2
+ >::type type;
+};
+template<typename T, int n, typename PlainObject = typename plain_object_eval<T>::type> struct nested_eval
+{
+ enum {
+ ScalarReadCost = NumTraits<typename traits<T>::Scalar>::ReadCost,
+ CoeffReadCost = evaluator<T>::CoeffReadCost,
+ NAsInteger = n == Dynamic ? HugeCost : n,
+ CostEval = (NAsInteger+1) * ScalarReadCost + CoeffReadCost,
+ CostNoEval = NAsInteger * CoeffReadCost,
+ Evaluate = (int(evaluator<T>::Flags) & EvalBeforeNestingBit) || (int(CostEval) < int(CostNoEval))
+ };
+ typedef typename conditional<Evaluate, PlainObject, typename ref_selector<T>::type>::type type;
+};
+template<typename T>
+EIGEN_DEVICE_FUNC
+inline T* const_cast_ptr(const T* ptr)
+{
+ return const_cast<T*>(ptr);
+}
+template<typename Derived, typename XprKind = typename traits<Derived>::XprKind>
+struct dense_xpr_base
+{
+};
+template<typename Derived>
+struct dense_xpr_base<Derived, MatrixXpr>
+{
+ typedef MatrixBase<Derived> type;
+};
+template<typename Derived>
+struct dense_xpr_base<Derived, ArrayXpr>
+{
+ typedef ArrayBase<Derived> type;
+};
+template<typename Derived, typename XprKind = typename traits<Derived>::XprKind, typename StorageKind = typename traits<Derived>::StorageKind>
+struct generic_xpr_base;
+template<typename Derived, typename XprKind>
+struct generic_xpr_base<Derived, XprKind, Dense>
+{
+ typedef typename dense_xpr_base<Derived,XprKind>::type type;
+};
+template<typename XprType, typename CastType> struct cast_return_type
+{
+ typedef typename XprType::Scalar CurrentScalarType;
+ typedef typename remove_all<CastType>::type _CastType;
+ typedef typename _CastType::Scalar NewScalarType;
+ typedef typename conditional<is_same<CurrentScalarType,NewScalarType>::value,
+ const XprType&,CastType>::type type;
+};
+template <typename A, typename B> struct promote_storage_type;
+template <typename A> struct promote_storage_type<A,A>
+{
+ typedef A ret;
+};
+template <typename A> struct promote_storage_type<A, const A>
+{
+ typedef A ret;
+};
+template <typename A> struct promote_storage_type<const A, A>
+{
+ typedef A ret;
+};
+template <typename A, typename B, typename Functor> struct cwise_promote_storage_type;
+template <typename A, typename Functor> struct cwise_promote_storage_type<A,A,Functor> { typedef A ret; };
+template <typename Functor> struct cwise_promote_storage_type<Dense,Dense,Functor> { typedef Dense ret; };
+template <typename A, typename Functor> struct cwise_promote_storage_type<A,Dense,Functor> { typedef Dense ret; };
+template <typename B, typename Functor> struct cwise_promote_storage_type<Dense,B,Functor> { typedef Dense ret; };
+template <typename Functor> struct cwise_promote_storage_type<Sparse,Dense,Functor> { typedef Sparse ret; };
+template <typename Functor> struct cwise_promote_storage_type<Dense,Sparse,Functor> { typedef Sparse ret; };
+template <typename LhsKind, typename RhsKind, int LhsOrder, int RhsOrder> struct cwise_promote_storage_order {
+ enum { value = LhsOrder };
+};
+template <typename LhsKind, int LhsOrder, int RhsOrder> struct cwise_promote_storage_order<LhsKind,Sparse,LhsOrder,RhsOrder> { enum { value = RhsOrder }; };
+template <typename RhsKind, int LhsOrder, int RhsOrder> struct cwise_promote_storage_order<Sparse,RhsKind,LhsOrder,RhsOrder> { enum { value = LhsOrder }; };
+template <int Order> struct cwise_promote_storage_order<Sparse,Sparse,Order,Order> { enum { value = Order }; };
+template <typename A, typename B, int ProductTag> struct product_promote_storage_type;
+template <typename A, int ProductTag> struct product_promote_storage_type<A, A, ProductTag> { typedef A ret;};
+template <int ProductTag> struct product_promote_storage_type<Dense, Dense, ProductTag> { typedef Dense ret;};
+template <typename A, int ProductTag> struct product_promote_storage_type<A, Dense, ProductTag> { typedef Dense ret; };
+template <typename B, int ProductTag> struct product_promote_storage_type<Dense, B, ProductTag> { typedef Dense ret; };
+template <typename A, int ProductTag> struct product_promote_storage_type<A, DiagonalShape, ProductTag> { typedef A ret; };
+template <typename B, int ProductTag> struct product_promote_storage_type<DiagonalShape, B, ProductTag> { typedef B ret; };
+template <int ProductTag> struct product_promote_storage_type<Dense, DiagonalShape, ProductTag> { typedef Dense ret; };
+template <int ProductTag> struct product_promote_storage_type<DiagonalShape, Dense, ProductTag> { typedef Dense ret; };
+template <typename A, int ProductTag> struct product_promote_storage_type<A, PermutationStorage, ProductTag> { typedef A ret; };
+template <typename B, int ProductTag> struct product_promote_storage_type<PermutationStorage, B, ProductTag> { typedef B ret; };
+template <int ProductTag> struct product_promote_storage_type<Dense, PermutationStorage, ProductTag> { typedef Dense ret; };
+template <int ProductTag> struct product_promote_storage_type<PermutationStorage, Dense, ProductTag> { typedef Dense ret; };
+template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
+struct plain_row_type
+{
+ typedef Matrix<Scalar, 1, ExpressionType::ColsAtCompileTime,
+ ExpressionType::PlainObject::Options | RowMajor, 1, ExpressionType::MaxColsAtCompileTime> MatrixRowType;
+ typedef Array<Scalar, 1, ExpressionType::ColsAtCompileTime,
+ ExpressionType::PlainObject::Options | RowMajor, 1, ExpressionType::MaxColsAtCompileTime> ArrayRowType;
+ typedef typename conditional<
+ is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
+ MatrixRowType,
+ ArrayRowType
+ >::type type;
+};
+template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
+struct plain_col_type
+{
+ typedef Matrix<Scalar, ExpressionType::RowsAtCompileTime, 1,
+ ExpressionType::PlainObject::Options & ~RowMajor, ExpressionType::MaxRowsAtCompileTime, 1> MatrixColType;
+ typedef Array<Scalar, ExpressionType::RowsAtCompileTime, 1,
+ ExpressionType::PlainObject::Options & ~RowMajor, ExpressionType::MaxRowsAtCompileTime, 1> ArrayColType;
+ typedef typename conditional<
+ is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
+ MatrixColType,
+ ArrayColType
+ >::type type;
+};
+template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
+struct plain_diag_type
+{
+ enum { diag_size = EIGEN_SIZE_MIN_PREFER_DYNAMIC(ExpressionType::RowsAtCompileTime, ExpressionType::ColsAtCompileTime),
+ max_diag_size = EIGEN_SIZE_MIN_PREFER_FIXED(ExpressionType::MaxRowsAtCompileTime, ExpressionType::MaxColsAtCompileTime)
+ };
+ typedef Matrix<Scalar, diag_size, 1, ExpressionType::PlainObject::Options & ~RowMajor, max_diag_size, 1> MatrixDiagType;
+ typedef Array<Scalar, diag_size, 1, ExpressionType::PlainObject::Options & ~RowMajor, max_diag_size, 1> ArrayDiagType;
+ typedef typename conditional<
+ is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
+ MatrixDiagType,
+ ArrayDiagType
+ >::type type;
+};
+template<typename Expr,typename Scalar = typename Expr::Scalar>
+struct plain_constant_type
+{
+ enum { Options = (traits<Expr>::Flags&RowMajorBit)?RowMajor:0 };
+ typedef Array<Scalar, traits<Expr>::RowsAtCompileTime, traits<Expr>::ColsAtCompileTime,
+ Options, traits<Expr>::MaxRowsAtCompileTime,traits<Expr>::MaxColsAtCompileTime> array_type;
+ typedef Matrix<Scalar, traits<Expr>::RowsAtCompileTime, traits<Expr>::ColsAtCompileTime,
+ Options, traits<Expr>::MaxRowsAtCompileTime,traits<Expr>::MaxColsAtCompileTime> matrix_type;
+ typedef CwiseNullaryOp<scalar_constant_op<Scalar>, const typename conditional<is_same< typename traits<Expr>::XprKind, MatrixXpr >::value, matrix_type, array_type>::type > type;
+};
+template<typename ExpressionType>
+struct is_lvalue
+{
+ enum { value = (!bool(is_const<ExpressionType>::value)) &&
+ bool(traits<ExpressionType>::Flags & LvalueBit) };
+};
+template<typename T> struct is_diagonal
+{ enum { ret = false }; };
+template<typename T> struct is_diagonal<DiagonalBase<T> >
+{ enum { ret = true }; };
+template<typename T> struct is_diagonal<DiagonalWrapper<T> >
+{ enum { ret = true }; };
+template<typename T, int S> struct is_diagonal<DiagonalMatrix<T,S> >
+{ enum { ret = true }; };
+template<typename S1, typename S2> struct glue_shapes;
+template<> struct glue_shapes<DenseShape,TriangularShape> { typedef TriangularShape type; };
+template<typename T1, typename T2>
+bool is_same_dense(const T1 &mat1, const T2 &mat2, typename enable_if<has_direct_access<T1>::ret&&has_direct_access<T2>::ret, T1>::type * = 0)
+{
+ return (mat1.data()==mat2.data()) && (mat1.innerStride()==mat2.innerStride()) && (mat1.outerStride()==mat2.outerStride());
+}
+template<typename T1, typename T2>
+bool is_same_dense(const T1 &, const T2 &, typename enable_if<!(has_direct_access<T1>::ret&&has_direct_access<T2>::ret), T1>::type * = 0)
+{
+ return false;
+}
+template<typename T,bool Vectorized=false,typename EnaleIf = void>
+struct scalar_div_cost {
+ enum { value = 8*NumTraits<T>::MulCost };
+};
+template<typename T,bool Vectorized>
+struct scalar_div_cost<std::complex<T>, Vectorized> {
+ enum { value = 2*scalar_div_cost<T>::value
+ + 6*NumTraits<T>::MulCost
+ + 3*NumTraits<T>::AddCost
+ };
+};
+template<bool Vectorized>
+struct scalar_div_cost<signed long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 24 }; };
+template<bool Vectorized>
+struct scalar_div_cost<unsigned long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 21 }; };
+#ifdef EIGEN_DEBUG_ASSIGN
+std::string demangle_traversal(int t)
+{
+ if(t==DefaultTraversal) return "DefaultTraversal";
+ if(t==LinearTraversal) return "LinearTraversal";
+ if(t==InnerVectorizedTraversal) return "InnerVectorizedTraversal";
+ if(t==LinearVectorizedTraversal) return "LinearVectorizedTraversal";
+ if(t==SliceVectorizedTraversal) return "SliceVectorizedTraversal";
+ return "?";
+}
+std::string demangle_unrolling(int t)
+{
+ if(t==NoUnrolling) return "NoUnrolling";
+ if(t==InnerUnrolling) return "InnerUnrolling";
+ if(t==CompleteUnrolling) return "CompleteUnrolling";
+ return "?";
+}
+std::string demangle_flags(int f)
+{
+ std::string res;
+ if(f&RowMajorBit) res += " | RowMajor";
+ if(f&PacketAccessBit) res += " | Packet";
+ if(f&LinearAccessBit) res += " | Linear";
+ if(f&LvalueBit) res += " | Lvalue";
+ if(f&DirectAccessBit) res += " | Direct";
+ if(f&NestByRefBit) res += " | NestByRef";
+ if(f&NoPreferredStorageOrderBit) res += " | NoPreferredStorageOrderBit";
+ return res;
+}
+#endif
+}
+template<typename ScalarA, typename ScalarB, typename BinaryOp=internal::scalar_product_op<ScalarA,ScalarB> >
+struct ScalarBinaryOpTraits
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ : internal::scalar_product_traits<ScalarA,ScalarB>
+#endif
+{};
+template<typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<T,T,BinaryOp>
+{
+ typedef T ReturnType;
+};
+template <typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<T, typename NumTraits<typename internal::enable_if<NumTraits<T>::IsComplex,T>::type>::Real, BinaryOp>
+{
+ typedef T ReturnType;
+};
+template <typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<typename NumTraits<typename internal::enable_if<NumTraits<T>::IsComplex,T>::type>::Real, T, BinaryOp>
+{
+ typedef T ReturnType;
+};
+template<typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<T,void,BinaryOp>
+{
+ typedef T ReturnType;
+};
+template<typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<void,T,BinaryOp>
+{
+ typedef T ReturnType;
+};
+template<typename BinaryOp>
+struct ScalarBinaryOpTraits<void,void,BinaryOp>
+{
+ typedef void ReturnType;
+};
+#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
+ EIGEN_STATIC_ASSERT((Eigen::internal::has_ReturnType<ScalarBinaryOpTraits<LHS, RHS,BINOP> >::value), \
+ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+}
+#endif
+// end #include "src/Core/util/XprHelper.h"
+// #include "src/Core/util/Memory.h"
+#ifndef EIGEN_MEMORY_H
+#define EIGEN_MEMORY_H
+#ifndef EIGEN_MALLOC_ALREADY_ALIGNED
+#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
+ && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
+ #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
+#else
+ #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
+#endif
+#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
+ #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
+#else
+ #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
+#endif
+#if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
+ || (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
+ || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
+ || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
+ #define EIGEN_MALLOC_ALREADY_ALIGNED 1
+#else
+ #define EIGEN_MALLOC_ALREADY_ALIGNED 0
+#endif
+#endif
+namespace Eigen {
+namespace internal {
+EIGEN_DEVICE_FUNC
+inline void throw_std_bad_alloc()
+{
+ #ifdef EIGEN_EXCEPTIONS
+ throw std::bad_alloc();
+ #else
+ std::size_t huge = static_cast<std::size_t>(-1);
+ new int[huge];
+ #endif
+}
+inline void* handmade_aligned_malloc(std::size_t size)
+{
+ void *original = std::malloc(size+EIGEN_DEFAULT_ALIGN_BYTES);
+ if (original == 0) return 0;
+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
+ *(reinterpret_cast<void**>(aligned) - 1) = original;
+ return aligned;
+}
+inline void handmade_aligned_free(void *ptr)
+{
+ if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
+}
+inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
+{
+ if (ptr == 0) return handmade_aligned_malloc(size);
+ void *original = *(reinterpret_cast<void**>(ptr) - 1);
+ std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
+ original = std::realloc(original,size+EIGEN_DEFAULT_ALIGN_BYTES);
+ if (original == 0) return 0;
+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
+ void *previous_aligned = static_cast<char *>(original)+previous_offset;
+ if(aligned!=previous_aligned)
+ std::memmove(aligned, previous_aligned, size);
+ *(reinterpret_cast<void**>(aligned) - 1) = original;
+ return aligned;
+}
+#ifdef EIGEN_NO_MALLOC
+EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
+{
+ eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
+}
+#elif defined EIGEN_RUNTIME_NO_MALLOC
+EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false)
+{
+ static bool value = true;
+ if (update == 1)
+ value = new_value;
+ return value;
+}
+EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
+EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
+EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
+{
+ eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
+}
+#else
+EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
+{}
+#endif
+EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
+{
+ check_that_malloc_is_allowed();
+ void *result;
+ #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
+ result = std::malloc(size);
+ #if EIGEN_DEFAULT_ALIGN_BYTES==16
+ eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator.");
+ #endif
+ #else
+ result = handmade_aligned_malloc(size);
+ #endif
+ if(!result && size)
+ throw_std_bad_alloc();
+ return result;
+}
+EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
+{
+ #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
+ std::free(ptr);
+ #else
+ handmade_aligned_free(ptr);
+ #endif
+}
+inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
+{
+ EIGEN_UNUSED_VARIABLE(old_size);
+ void *result;
+#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
+ result = std::realloc(ptr,new_size);
+#else
+ result = handmade_aligned_realloc(ptr,new_size,old_size);
+#endif
+ if (!result && new_size)
+ throw_std_bad_alloc();
+ return result;
+}
+template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size)
+{
+ return aligned_malloc(size);
+}
+template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std::size_t size)
+{
+ check_that_malloc_is_allowed();
+ void *result = std::malloc(size);
+ if(!result && size)
+ throw_std_bad_alloc();
+ return result;
+}
+template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr)
+{
+ aligned_free(ptr);
+}
+template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
+{
+ std::free(ptr);
+}
+template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
+{
+ return aligned_realloc(ptr, new_size, old_size);
+}
+template<> inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t)
+{
+ return std::realloc(ptr, new_size);
+}
+template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, std::size_t size)
+{
+ if(ptr)
+ while(size) ptr[--size].~T();
+}
+template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, std::size_t size)
+{
+ std::size_t i;
+ EIGEN_TRY
+ {
+ for (i = 0; i < size; ++i) ::new (ptr + i) T;
+ return ptr;
+ }
+ EIGEN_CATCH(...)
+ {
+ destruct_elements_of_array(ptr, i);
+ EIGEN_THROW;
+ }
+ return NULL;
+}
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size)
+{
+ if(size > std::size_t(-1) / sizeof(T))
+ throw_std_bad_alloc();
+}
+template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size)
+{
+ check_size_for_overflow<T>(size);
+ T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size));
+ EIGEN_TRY
+ {
+ return construct_elements_of_array(result, size);
+ }
+ EIGEN_CATCH(...)
+ {
+ aligned_free(result);
+ EIGEN_THROW;
+ }
+ return result;
+}
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size)
+{
+ check_size_for_overflow<T>(size);
+ T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
+ EIGEN_TRY
+ {
+ return construct_elements_of_array(result, size);
+ }
+ EIGEN_CATCH(...)
+ {
+ conditional_aligned_free<Align>(result);
+ EIGEN_THROW;
+ }
+ return result;
+}
+template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size)
+{
+ destruct_elements_of_array<T>(ptr, size);
+ aligned_free(ptr);
+}
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, std::size_t size)
+{
+ destruct_elements_of_array<T>(ptr, size);
+ conditional_aligned_free<Align>(ptr);
+}
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size)
+{
+ check_size_for_overflow<T>(new_size);
+ check_size_for_overflow<T>(old_size);
+ if(new_size < old_size)
+ destruct_elements_of_array(pts+new_size, old_size-new_size);
+ T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
+ if(new_size > old_size)
+ {
+ EIGEN_TRY
+ {
+ construct_elements_of_array(result+old_size, new_size-old_size);
+ }
+ EIGEN_CATCH(...)
+ {
+ conditional_aligned_free<Align>(result);
+ EIGEN_THROW;
+ }
+ }
+ return result;
+}
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size)
+{
+ if(size==0)
+ return 0;
+ check_size_for_overflow<T>(size);
+ T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
+ if(NumTraits<T>::RequireInitialization)
+ {
+ EIGEN_TRY
+ {
+ construct_elements_of_array(result, size);
+ }
+ EIGEN_CATCH(...)
+ {
+ conditional_aligned_free<Align>(result);
+ EIGEN_THROW;
+ }
+ }
+ return result;
+}
+template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size)
+{
+ check_size_for_overflow<T>(new_size);
+ check_size_for_overflow<T>(old_size);
+ if(NumTraits<T>::RequireInitialization && (new_size < old_size))
+ destruct_elements_of_array(pts+new_size, old_size-new_size);
+ T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
+ if(NumTraits<T>::RequireInitialization && (new_size > old_size))
+ {
+ EIGEN_TRY
+ {
+ construct_elements_of_array(result+old_size, new_size-old_size);
+ }
+ EIGEN_CATCH(...)
+ {
+ conditional_aligned_free<Align>(result);
+ EIGEN_THROW;
+ }
+ }
+ return result;
+}
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, std::size_t size)
+{
+ if(NumTraits<T>::RequireInitialization)
+ destruct_elements_of_array<T>(ptr, size);
+ conditional_aligned_free<Align>(ptr);
+}
+template<int Alignment, typename Scalar, typename Index>
+EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size)
+{
+ const Index ScalarSize = sizeof(Scalar);
+ const Index AlignmentSize = Alignment / ScalarSize;
+ const Index AlignmentMask = AlignmentSize-1;
+ if(AlignmentSize<=1)
+ {
+ return 0;
+ }
+ else if( (UIntPtr(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0)
+ {
+ return size;
+ }
+ else
+ {
+ Index first = (AlignmentSize - (Index((UIntPtr(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
+ return (first < size) ? first : size;
+ }
+}
+template<typename Scalar, typename Index>
+EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size)
+{
+ typedef typename packet_traits<Scalar>::type DefaultPacketType;
+ return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);
+}
+template<typename Index>
+inline Index first_multiple(Index size, Index base)
+{
+ return ((size+base-1)/base)*base;
+}
+template<typename T, bool UseMemcpy> struct smart_copy_helper;
+template<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target)
+{
+ smart_copy_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
+}
+template<typename T> struct smart_copy_helper<T,true> {
+ EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
+ {
+ IntPtr size = IntPtr(end)-IntPtr(start);
+ if(size==0) return;
+ eigen_internal_assert(start!=0 && end!=0 && target!=0);
+ memcpy(target, start, size);
+ }
+};
+template<typename T> struct smart_copy_helper<T,false> {
+ EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
+ { std::copy(start, end, target); }
+};
+template<typename T, bool UseMemmove> struct smart_memmove_helper;
+template<typename T> void smart_memmove(const T* start, const T* end, T* target)
+{
+ smart_memmove_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
+}
+template<typename T> struct smart_memmove_helper<T,true> {
+ static inline void run(const T* start, const T* end, T* target)
+ {
+ IntPtr size = IntPtr(end)-IntPtr(start);
+ if(size==0) return;
+ eigen_internal_assert(start!=0 && end!=0 && target!=0);
+ std::memmove(target, start, size);
+ }
+};
+template<typename T> struct smart_memmove_helper<T,false> {
+ static inline void run(const T* start, const T* end, T* target)
+ {
+ if (UIntPtr(target) < UIntPtr(start))
+ {
+ std::copy(start, end, target);
+ }
+ else
+ {
+ std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
+ std::copy_backward(start, end, target + count);
+ }
+ }
+};
+#ifndef EIGEN_ALLOCA
+ #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
+ #define EIGEN_ALLOCA alloca
+ #elif EIGEN_COMP_MSVC
+ #define EIGEN_ALLOCA _alloca
+ #endif
+#endif
+template<typename T> class aligned_stack_memory_handler : noncopyable
+{
+ public:
+ aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
+ : m_ptr(ptr), m_size(size), m_deallocate(dealloc)
+ {
+ if(NumTraits<T>::RequireInitialization && m_ptr)
+ Eigen::internal::construct_elements_of_array(m_ptr, size);
+ }
+ ~aligned_stack_memory_handler()
+ {
+ if(NumTraits<T>::RequireInitialization && m_ptr)
+ Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
+ if(m_deallocate)
+ Eigen::internal::aligned_free(m_ptr);
+ }
+ protected:
+ T* m_ptr;
+ std::size_t m_size;
+ bool m_deallocate;
+};
+template<typename T> class scoped_array : noncopyable
+{
+ T* m_ptr;
+public:
+ explicit scoped_array(std::ptrdiff_t size)
+ {
+ m_ptr = new T[size];
+ }
+ ~scoped_array()
+ {
+ delete[] m_ptr;
+ }
+ T& operator[](std::ptrdiff_t i) { return m_ptr[i]; }
+ const T& operator[](std::ptrdiff_t i) const { return m_ptr[i]; }
+ T* &ptr() { return m_ptr; }
+ const T* ptr() const { return m_ptr; }
+ operator const T*() const { return m_ptr; }
+};
+template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
+{
+ std::swap(a.ptr(),b.ptr());
+}
+}
+#ifdef EIGEN_ALLOCA
+ #if EIGEN_DEFAULT_ALIGN_BYTES>0
+ #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((internal::UIntPtr(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)))
+ #else
+ #define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
+ #endif
+ #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
+ Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
+ TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \
+ : reinterpret_cast<TYPE*>( \
+ (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \
+ : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \
+ Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
+#else
+ #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
+ Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
+ TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
+ Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
+#endif
+#if EIGEN_MAX_ALIGN_BYTES!=0
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
+ void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
+ EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
+ EIGEN_CATCH (...) { return 0; } \
+ }
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
+ void *operator new(std::size_t size) { \
+ return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
+ } \
+ void *operator new[](std::size_t size) { \
+ return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
+ } \
+ void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+ void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+ void operator delete(void * ptr, std::size_t ) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+ void operator delete[](void * ptr, std::size_t ) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+ \
+ \
+ \
+ static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \
+ static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \
+ void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \
+ void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \
+ \
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
+ void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \
+ Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
+ } \
+ typedef void eigen_aligned_operator_new_marker_type;
+#else
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
+#endif
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0)))
+template<class T>
+class aligned_allocator : public std::allocator<T>
+{
+public:
+ typedef std::size_t size_type;
+ typedef std::ptrdiff_t difference_type;
+ typedef T* pointer;
+ typedef const T* const_pointer;
+ typedef T& reference;
+ typedef const T& const_reference;
+ typedef T value_type;
+ template<class U>
+ struct rebind
+ {
+ typedef aligned_allocator<U> other;
+ };
+ aligned_allocator() : std::allocator<T>() {}
+ aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {}
+ template<class U>
+ aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {}
+ ~aligned_allocator() {}
+ pointer allocate(size_type num, const void* = 0)
+ {
+ internal::check_size_for_overflow<T>(num);
+ return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
+ }
+ void deallocate(pointer p, size_type )
+ {
+ internal::aligned_free(p);
+ }
+};
+#if !defined(EIGEN_NO_CPUID)
+# if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64
+# if defined(__PIC__) && EIGEN_ARCH_i386
+# define EIGEN_CPUID(abcd,func,id) \
+ __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
+# elif defined(__PIC__) && EIGEN_ARCH_x86_64
+# define EIGEN_CPUID(abcd,func,id) \
+ __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
+# else
+# define EIGEN_CPUID(abcd,func,id) \
+ __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
+# endif
+# elif EIGEN_COMP_MSVC
+# if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64
+# define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
+# endif
+# endif
+#endif
+namespace internal {
+#ifdef EIGEN_CPUID
+inline bool cpuid_is_vendor(int abcd[4], const int vendor[3])
+{
+ return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2];
+}
+inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
+{
+ int abcd[4];
+ l1 = l2 = l3 = 0;
+ int cache_id = 0;
+ int cache_type = 0;
+ do {
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+ EIGEN_CPUID(abcd,0x4,cache_id);
+ cache_type = (abcd[0] & 0x0F) >> 0;
+ if(cache_type==1||cache_type==3)
+ {
+ int cache_level = (abcd[0] & 0xE0) >> 5;
+ int ways = (abcd[1] & 0xFFC00000) >> 22;
+ int partitions = (abcd[1] & 0x003FF000) >> 12;
+ int line_size = (abcd[1] & 0x00000FFF) >> 0;
+ int sets = (abcd[2]);
+ int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
+ switch(cache_level)
+ {
+ case 1: l1 = cache_size; break;
+ case 2: l2 = cache_size; break;
+ case 3: l3 = cache_size; break;
+ default: break;
+ }
+ }
+ cache_id++;
+ } while(cache_type>0 && cache_id<16);
+}
+inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
+{
+ int abcd[4];
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+ l1 = l2 = l3 = 0;
+ EIGEN_CPUID(abcd,0x00000002,0);
+ unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
+ bool check_for_p2_core2 = false;
+ for(int i=0; i<14; ++i)
+ {
+ switch(bytes[i])
+ {
+ case 0x0A: l1 = 8; break;
+ case 0x0C: l1 = 16; break;
+ case 0x0E: l1 = 24; break;
+ case 0x10: l1 = 16; break;
+ case 0x15: l1 = 16; break;
+ case 0x2C: l1 = 32; break;
+ case 0x30: l1 = 32; break;
+ case 0x60: l1 = 16; break;
+ case 0x66: l1 = 8; break;
+ case 0x67: l1 = 16; break;
+ case 0x68: l1 = 32; break;
+ case 0x1A: l2 = 96; break;
+ case 0x22: l3 = 512; break;
+ case 0x23: l3 = 1024; break;
+ case 0x25: l3 = 2048; break;
+ case 0x29: l3 = 4096; break;
+ case 0x39: l2 = 128; break;
+ case 0x3A: l2 = 192; break;
+ case 0x3B: l2 = 128; break;
+ case 0x3C: l2 = 256; break;
+ case 0x3D: l2 = 384; break;
+ case 0x3E: l2 = 512; break;
+ case 0x40: l2 = 0; break;
+ case 0x41: l2 = 128; break;
+ case 0x42: l2 = 256; break;
+ case 0x43: l2 = 512; break;
+ case 0x44: l2 = 1024; break;
+ case 0x45: l2 = 2048; break;
+ case 0x46: l3 = 4096; break;
+ case 0x47: l3 = 8192; break;
+ case 0x48: l2 = 3072; break;
+ case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;
+ case 0x4A: l3 = 6144; break;
+ case 0x4B: l3 = 8192; break;
+ case 0x4C: l3 = 12288; break;
+ case 0x4D: l3 = 16384; break;
+ case 0x4E: l2 = 6144; break;
+ case 0x78: l2 = 1024; break;
+ case 0x79: l2 = 128; break;
+ case 0x7A: l2 = 256; break;
+ case 0x7B: l2 = 512; break;
+ case 0x7C: l2 = 1024; break;
+ case 0x7D: l2 = 2048; break;
+ case 0x7E: l2 = 256; break;
+ case 0x7F: l2 = 512; break;
+ case 0x80: l2 = 512; break;
+ case 0x81: l2 = 128; break;
+ case 0x82: l2 = 256; break;
+ case 0x83: l2 = 512; break;
+ case 0x84: l2 = 1024; break;
+ case 0x85: l2 = 2048; break;
+ case 0x86: l2 = 512; break;
+ case 0x87: l2 = 1024; break;
+ case 0x88: l3 = 2048; break;
+ case 0x89: l3 = 4096; break;
+ case 0x8A: l3 = 8192; break;
+ case 0x8D: l3 = 3072; break;
+ default: break;
+ }
+ }
+ if(check_for_p2_core2 && l2 == l3)
+ l3 = 0;
+ l1 *= 1024;
+ l2 *= 1024;
+ l3 *= 1024;
+}
+inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
+{
+ if(max_std_funcs>=4)
+ queryCacheSizes_intel_direct(l1,l2,l3);
+ else
+ queryCacheSizes_intel_codes(l1,l2,l3);
+}
+inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
+{
+ int abcd[4];
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+ EIGEN_CPUID(abcd,0x80000005,0);
+ l1 = (abcd[2] >> 24) * 1024;
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+ EIGEN_CPUID(abcd,0x80000006,0);
+ l2 = (abcd[2] >> 16) * 1024;
+ l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024;
+}
+#endif
+inline void queryCacheSizes(int& l1, int& l2, int& l3)
+{
+ #ifdef EIGEN_CPUID
+ int abcd[4];
+ const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e};
+ const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163};
+ const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574};
+ EIGEN_CPUID(abcd,0x0,0);
+ int max_std_funcs = abcd[1];
+ if(cpuid_is_vendor(abcd,GenuineIntel))
+ queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
+ else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))
+ queryCacheSizes_amd(l1,l2,l3);
+ else
+ queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
+ #else
+ l1 = l2 = l3 = -1;
+ #endif
+}
+inline int queryL1CacheSize()
+{
+ int l1(-1), l2, l3;
+ queryCacheSizes(l1,l2,l3);
+ return l1;
+}
+inline int queryTopLevelCacheSize()
+{
+ int l1, l2(-1), l3(-1);
+ queryCacheSizes(l1,l2,l3);
+ return (std::max)(l2,l3);
+}
+}
+}
+#endif
+// end #include "src/Core/util/Memory.h"
+// #include "src/Core/NumTraits.h"
+#ifndef EIGEN_NUMTRAITS_H
+#define EIGEN_NUMTRAITS_H
+namespace Eigen {
+namespace internal {
+template< typename T,
+ bool use_numeric_limits = std::numeric_limits<T>::is_specialized,
+ bool is_integer = NumTraits<T>::IsInteger>
+struct default_digits10_impl
+{
+ static int run() { return std::numeric_limits<T>::digits10; }
+};
+template<typename T>
+struct default_digits10_impl<T,false,false>
+{
+ static int run() {
+ using std::log10;
+ using std::ceil;
+ typedef typename NumTraits<T>::Real Real;
+ return int(ceil(-log10(NumTraits<Real>::epsilon())));
+ }
+};
+template<typename T>
+struct default_digits10_impl<T,false,true>
+{
+ static int run() { return 0; }
+};
+}
+template<typename T> struct GenericNumTraits
+{
+ enum {
+ IsInteger = std::numeric_limits<T>::is_integer,
+ IsSigned = std::numeric_limits<T>::is_signed,
+ IsComplex = 0,
+ RequireInitialization = internal::is_arithmetic<T>::value ? 0 : 1,
+ ReadCost = 1,
+ AddCost = 1,
+ MulCost = 1
+ };
+ typedef T Real;
+ typedef typename internal::conditional<
+ IsInteger,
+ typename internal::conditional<sizeof(T)<=2, float, double>::type,
+ T
+ >::type NonInteger;
+ typedef T Nested;
+ typedef T Literal;
+ EIGEN_DEVICE_FUNC
+ static inline Real epsilon()
+ {
+ return numext::numeric_limits<T>::epsilon();
+ }
+ EIGEN_DEVICE_FUNC
+ static inline int digits10()
+ {
+ return internal::default_digits10_impl<T>::run();
+ }
+ EIGEN_DEVICE_FUNC
+ static inline Real dummy_precision()
+ {
+ return Real(0);
+ }
+ EIGEN_DEVICE_FUNC
+ static inline T highest() {
+ return (numext::numeric_limits<T>::max)();
+ }
+ EIGEN_DEVICE_FUNC
+ static inline T lowest() {
+ return IsInteger ? (numext::numeric_limits<T>::min)() : (-(numext::numeric_limits<T>::max)());
+ }
+ EIGEN_DEVICE_FUNC
+ static inline T infinity() {
+ return numext::numeric_limits<T>::infinity();
+ }
+ EIGEN_DEVICE_FUNC
+ static inline T quiet_NaN() {
+ return numext::numeric_limits<T>::quiet_NaN();
+ }
+};
+template<typename T> struct NumTraits : GenericNumTraits<T>
+{};
+template<> struct NumTraits<float>
+ : GenericNumTraits<float>
+{
+ EIGEN_DEVICE_FUNC
+ static inline float dummy_precision() { return 1e-5f; }
+};
+template<> struct NumTraits<double> : GenericNumTraits<double>
+{
+ EIGEN_DEVICE_FUNC
+ static inline double dummy_precision() { return 1e-12; }
+};
+template<> struct NumTraits<long double>
+ : GenericNumTraits<long double>
+{
+ static inline long double dummy_precision() { return 1e-15l; }
+};
+template<typename _Real> struct NumTraits<std::complex<_Real> >
+ : GenericNumTraits<std::complex<_Real> >
+{
+ typedef _Real Real;
+ typedef typename NumTraits<_Real>::Literal Literal;
+ enum {
+ IsComplex = 1,
+ RequireInitialization = NumTraits<_Real>::RequireInitialization,
+ ReadCost = 2 * NumTraits<_Real>::ReadCost,
+ AddCost = 2 * NumTraits<Real>::AddCost,
+ MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
+ };
+ EIGEN_DEVICE_FUNC
+ static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
+ EIGEN_DEVICE_FUNC
+ static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
+ EIGEN_DEVICE_FUNC
+ static inline int digits10() { return NumTraits<Real>::digits10(); }
+};
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
+{
+ typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> ArrayType;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ typedef Array<RealScalar, Rows, Cols, Options, MaxRows, MaxCols> Real;
+ typedef typename NumTraits<Scalar>::NonInteger NonIntegerScalar;
+ typedef Array<NonIntegerScalar, Rows, Cols, Options, MaxRows, MaxCols> NonInteger;
+ typedef ArrayType & Nested;
+ typedef typename NumTraits<Scalar>::Literal Literal;
+ enum {
+ IsComplex = NumTraits<Scalar>::IsComplex,
+ IsInteger = NumTraits<Scalar>::IsInteger,
+ IsSigned = NumTraits<Scalar>::IsSigned,
+ RequireInitialization = 1,
+ ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::ReadCost,
+ AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
+ MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
+ };
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
+ static inline int digits10() { return NumTraits<Scalar>::digits10(); }
+};
+template<> struct NumTraits<std::string>
+ : GenericNumTraits<std::string>
+{
+ enum {
+ RequireInitialization = 1,
+ ReadCost = HugeCost,
+ AddCost = HugeCost,
+ MulCost = HugeCost
+ };
+ static inline int digits10() { return 0; }
+private:
+ static inline std::string epsilon();
+ static inline std::string dummy_precision();
+ static inline std::string lowest();
+ static inline std::string highest();
+ static inline std::string infinity();
+ static inline std::string quiet_NaN();
+};
+template<> struct NumTraits<void> {};
+}
+#endif
+// end #include "src/Core/NumTraits.h"
+// #include "src/Core/MathFunctions.h"
+#ifndef EIGEN_MATHFUNCTIONS_H
+#define EIGEN_MATHFUNCTIONS_H
+#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L
+namespace Eigen {
+#if EIGEN_OS_WINCE && EIGEN_COMP_MSVC && EIGEN_COMP_MSVC<=1500
+long abs(long x) { return (labs(x)); }
+double abs(double x) { return (fabs(x)); }
+float abs(float x) { return (fabsf(x)); }
+long double abs(long double x) { return (fabsl(x)); }
+#endif
+namespace internal {
+template<typename T, typename dummy = void>
+struct global_math_functions_filtering_base
+{
+ typedef T type;
+};
+template<typename T> struct always_void { typedef void type; };
+template<typename T>
+struct global_math_functions_filtering_base
+ <T,
+ typename always_void<typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl>::type
+ >
+{
+ typedef typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl type;
+};
+#define EIGEN_MATHFUNC_IMPL(func, scalar) Eigen::internal::func##_impl<typename Eigen::internal::global_math_functions_filtering_base<scalar>::type>
+#define EIGEN_MATHFUNC_RETVAL(func, scalar) typename Eigen::internal::func##_retval<typename Eigen::internal::global_math_functions_filtering_base<scalar>::type>::type
+template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct real_default_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ return x;
+ }
+};
+template<typename Scalar>
+struct real_default_impl<Scalar,true>
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ using std::real;
+ return real(x);
+ }
+};
+template<typename Scalar> struct real_impl : real_default_impl<Scalar> {};
+#ifdef __CUDA_ARCH__
+template<typename T>
+struct real_impl<std::complex<T> >
+{
+ typedef T RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline T run(const std::complex<T>& x)
+ {
+ return x.real();
+ }
+};
+#endif
+template<typename Scalar>
+struct real_retval
+{
+ typedef typename NumTraits<Scalar>::Real type;
+};
+template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct imag_default_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar&)
+ {
+ return RealScalar(0);
+ }
+};
+template<typename Scalar>
+struct imag_default_impl<Scalar,true>
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ using std::imag;
+ return imag(x);
+ }
+};
+template<typename Scalar> struct imag_impl : imag_default_impl<Scalar> {};
+#ifdef __CUDA_ARCH__
+template<typename T>
+struct imag_impl<std::complex<T> >
+{
+ typedef T RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline T run(const std::complex<T>& x)
+ {
+ return x.imag();
+ }
+};
+#endif
+template<typename Scalar>
+struct imag_retval
+{
+ typedef typename NumTraits<Scalar>::Real type;
+};
+template<typename Scalar>
+struct real_ref_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar& run(Scalar& x)
+ {
+ return reinterpret_cast<RealScalar*>(&x)[0];
+ }
+ EIGEN_DEVICE_FUNC
+ static inline const RealScalar& run(const Scalar& x)
+ {
+ return reinterpret_cast<const RealScalar*>(&x)[0];
+ }
+};
+template<typename Scalar>
+struct real_ref_retval
+{
+ typedef typename NumTraits<Scalar>::Real & type;
+};
+template<typename Scalar, bool IsComplex>
+struct imag_ref_default_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar& run(Scalar& x)
+ {
+ return reinterpret_cast<RealScalar*>(&x)[1];
+ }
+ EIGEN_DEVICE_FUNC
+ static inline const RealScalar& run(const Scalar& x)
+ {
+ return reinterpret_cast<RealScalar*>(&x)[1];
+ }
+};
+template<typename Scalar>
+struct imag_ref_default_impl<Scalar, false>
+{
+ EIGEN_DEVICE_FUNC
+ static inline Scalar run(Scalar&)
+ {
+ return Scalar(0);
+ }
+ EIGEN_DEVICE_FUNC
+ static inline const Scalar run(const Scalar&)
+ {
+ return Scalar(0);
+ }
+};
+template<typename Scalar>
+struct imag_ref_impl : imag_ref_default_impl<Scalar, NumTraits<Scalar>::IsComplex> {};
+template<typename Scalar>
+struct imag_ref_retval
+{
+ typedef typename NumTraits<Scalar>::Real & type;
+};
+template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct conj_impl
+{
+ EIGEN_DEVICE_FUNC
+ static inline Scalar run(const Scalar& x)
+ {
+ return x;
+ }
+};
+template<typename Scalar>
+struct conj_impl<Scalar,true>
+{
+ EIGEN_DEVICE_FUNC
+ static inline Scalar run(const Scalar& x)
+ {
+ using std::conj;
+ return conj(x);
+ }
+};
+template<typename Scalar>
+struct conj_retval
+{
+ typedef Scalar type;
+};
+template<typename Scalar,bool IsComplex>
+struct abs2_impl_default
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ return x*x;
+ }
+};
+template<typename Scalar>
+struct abs2_impl_default<Scalar, true>
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ return real(x)*real(x) + imag(x)*imag(x);
+ }
+};
+template<typename Scalar>
+struct abs2_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ return abs2_impl_default<Scalar,NumTraits<Scalar>::IsComplex>::run(x);
+ }
+};
+template<typename Scalar>
+struct abs2_retval
+{
+ typedef typename NumTraits<Scalar>::Real type;
+};
+template<typename Scalar, bool IsComplex>
+struct norm1_default_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ EIGEN_USING_STD_MATH(abs);
+ return abs(real(x)) + abs(imag(x));
+ }
+};
+template<typename Scalar>
+struct norm1_default_impl<Scalar, false>
+{
+ EIGEN_DEVICE_FUNC
+ static inline Scalar run(const Scalar& x)
+ {
+ EIGEN_USING_STD_MATH(abs);
+ return abs(x);
+ }
+};
+template<typename Scalar>
+struct norm1_impl : norm1_default_impl<Scalar, NumTraits<Scalar>::IsComplex> {};
+template<typename Scalar>
+struct norm1_retval
+{
+ typedef typename NumTraits<Scalar>::Real type;
+};
+template<typename Scalar>
+struct hypot_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ static inline RealScalar run(const Scalar& x, const Scalar& y)
+ {
+ EIGEN_USING_STD_MATH(abs);
+ EIGEN_USING_STD_MATH(sqrt);
+ RealScalar _x = abs(x);
+ RealScalar _y = abs(y);
+ Scalar p, qp;
+ if(_x>_y)
+ {
+ p = _x;
+ qp = _y / p;
+ }
+ else
+ {
+ p = _y;
+ qp = _x / p;
+ }
+ if(p==RealScalar(0)) return RealScalar(0);
+ return p * sqrt(RealScalar(1) + qp*qp);
+ }
+};
+template<typename Scalar>
+struct hypot_retval
+{
+ typedef typename NumTraits<Scalar>::Real type;
+};
+template<typename OldType, typename NewType>
+struct cast_impl
+{
+ EIGEN_DEVICE_FUNC
+ static inline NewType run(const OldType& x)
+ {
+ return static_cast<NewType>(x);
+ }
+};
+template<typename OldType, typename NewType>
+EIGEN_DEVICE_FUNC
+inline NewType cast(const OldType& x)
+{
+ return cast_impl<OldType, NewType>::run(x);
+}
+#if EIGEN_HAS_CXX11_MATH
+ template<typename Scalar>
+ struct round_impl {
+ static inline Scalar run(const Scalar& x)
+ {
+ EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
+ using std::round;
+ return round(x);
+ }
+ };
+#else
+ template<typename Scalar>
+ struct round_impl
+ {
+ static inline Scalar run(const Scalar& x)
+ {
+ EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
+ EIGEN_USING_STD_MATH(floor);
+ EIGEN_USING_STD_MATH(ceil);
+ return (x > Scalar(0)) ? floor(x + Scalar(0.5)) : ceil(x - Scalar(0.5));
+ }
+ };
+#endif
+template<typename Scalar>
+struct round_retval
+{
+ typedef Scalar type;
+};
+#if EIGEN_HAS_CXX11_MATH
+ template<typename Scalar>
+ struct arg_impl {
+ static inline Scalar run(const Scalar& x)
+ {
+ EIGEN_USING_STD_MATH(arg);
+ return arg(x);
+ }
+ };
+#else
+ template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+ struct arg_default_impl
+ {
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ return (x < Scalar(0)) ? Scalar(EIGEN_PI) : Scalar(0); }
+ };
+ template<typename Scalar>
+ struct arg_default_impl<Scalar,true>
+ {
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ EIGEN_USING_STD_MATH(arg);
+ return arg(x);
+ }
+ };
+ template<typename Scalar> struct arg_impl : arg_default_impl<Scalar> {};
+#endif
+template<typename Scalar>
+struct arg_retval
+{
+ typedef typename NumTraits<Scalar>::Real type;
+};
+namespace std_fallback {
+ template<typename Scalar>
+ EIGEN_DEVICE_FUNC inline Scalar log1p(const Scalar& x) {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_USING_STD_MATH(log);
+ Scalar x1p = RealScalar(1) + x;
+ return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
+ }
+}
+template<typename Scalar>
+struct log1p_impl {
+ static inline Scalar run(const Scalar& x)
+ {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+ #if EIGEN_HAS_CXX11_MATH
+ using std::log1p;
+ #endif
+ using std_fallback::log1p;
+ return log1p(x);
+ }
+};
+template<typename Scalar>
+struct log1p_retval
+{
+ typedef Scalar type;
+};
+template<typename ScalarX,typename ScalarY, bool IsInteger = NumTraits<ScalarX>::IsInteger&&NumTraits<ScalarY>::IsInteger>
+struct pow_impl
+{
+ typedef typename ScalarBinaryOpTraits<ScalarX,ScalarY,internal::scalar_pow_op<ScalarX,ScalarY> >::ReturnType result_type;
+ static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y)
+ {
+ EIGEN_USING_STD_MATH(pow);
+ return pow(x, y);
+ }
+};
+template<typename ScalarX,typename ScalarY>
+struct pow_impl<ScalarX,ScalarY, true>
+{
+ typedef ScalarX result_type;
+ static EIGEN_DEVICE_FUNC inline ScalarX run(ScalarX x, ScalarY y)
+ {
+ ScalarX res(1);
+ eigen_assert(!NumTraits<ScalarY>::IsSigned || y >= 0);
+ if(y & 1) res *= x;
+ y >>= 1;
+ while(y)
+ {
+ x *= x;
+ if(y&1) res *= x;
+ y >>= 1;
+ }
+ return res;
+ }
+};
+template<typename Scalar,
+ bool IsComplex,
+ bool IsInteger>
+struct random_default_impl {};
+template<typename Scalar>
+struct random_impl : random_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
+template<typename Scalar>
+struct random_retval
+{
+ typedef Scalar type;
+};
+template<typename Scalar> inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y);
+template<typename Scalar> inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random();
+template<typename Scalar>
+struct random_default_impl<Scalar, false, false>
+{
+ static inline Scalar run(const Scalar& x, const Scalar& y)
+ {
+ return x + (y-x) * Scalar(std::rand()) / Scalar(RAND_MAX);
+ }
+ static inline Scalar run()
+ {
+ return run(Scalar(NumTraits<Scalar>::IsSigned ? -1 : 0), Scalar(1));
+ }
+};
+enum {
+ meta_floor_log2_terminate,
+ meta_floor_log2_move_up,
+ meta_floor_log2_move_down,
+ meta_floor_log2_bogus
+};
+template<unsigned int n, int lower, int upper> struct meta_floor_log2_selector
+{
+ enum { middle = (lower + upper) / 2,
+ value = (upper <= lower + 1) ? int(meta_floor_log2_terminate)
+ : (n < (1 << middle)) ? int(meta_floor_log2_move_down)
+ : (n==0) ? int(meta_floor_log2_bogus)
+ : int(meta_floor_log2_move_up)
+ };
+};
+template<unsigned int n,
+ int lower = 0,
+ int upper = sizeof(unsigned int) * CHAR_BIT - 1,
+ int selector = meta_floor_log2_selector<n, lower, upper>::value>
+struct meta_floor_log2 {};
+template<unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_down>
+{
+ enum { value = meta_floor_log2<n, lower, meta_floor_log2_selector<n, lower, upper>::middle>::value };
+};
+template<unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_up>
+{
+ enum { value = meta_floor_log2<n, meta_floor_log2_selector<n, lower, upper>::middle, upper>::value };
+};
+template<unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_terminate>
+{
+ enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower };
+};
+template<unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus>
+{
+};
+template<typename Scalar>
+struct random_default_impl<Scalar, false, true>
+{
+ static inline Scalar run(const Scalar& x, const Scalar& y)
+ {
+ typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
+ if(y<x)
+ return x;
+ std::size_t range = ScalarX(y)-ScalarX(x);
+ std::size_t offset = 0;
+ std::size_t divisor = 1;
+ std::size_t multiplier = 1;
+ if(range<RAND_MAX) divisor = (std::size_t(RAND_MAX)+1)/(range+1);
+ else multiplier = 1 + range/(std::size_t(RAND_MAX)+1);
+ do {
+ offset = (std::size_t(std::rand()) * multiplier) / divisor;
+ } while (offset > range);
+ return Scalar(ScalarX(x) + offset);
+ }
+ static inline Scalar run()
+ {
+#ifdef EIGEN_MAKING_DOCS
+ return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));
+#else
+ enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value,
+ scalar_bits = sizeof(Scalar) * CHAR_BIT,
+ shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)),
+ offset = NumTraits<Scalar>::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0
+ };
+ return Scalar((std::rand() >> shift) - offset);
+#endif
+ }
+};
+template<typename Scalar>
+struct random_default_impl<Scalar, true, false>
+{
+ static inline Scalar run(const Scalar& x, const Scalar& y)
+ {
+ return Scalar(random(real(x), real(y)),
+ random(imag(x), imag(y)));
+ }
+ static inline Scalar run()
+ {
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ return Scalar(random<RealScalar>(), random<RealScalar>());
+ }
+};
+template<typename Scalar>
+inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y)
+{
+ return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y);
+}
+template<typename Scalar>
+inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()
+{
+ return EIGEN_MATHFUNC_IMPL(random, Scalar)::run();
+}
+#if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG)
+#define EIGEN_USE_STD_FPCLASSIFY 1
+#else
+#define EIGEN_USE_STD_FPCLASSIFY 0
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<internal::is_integral<T>::value,bool>::type
+isnan_impl(const T&) { return false; }
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<internal::is_integral<T>::value,bool>::type
+isinf_impl(const T&) { return false; }
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<internal::is_integral<T>::value,bool>::type
+isfinite_impl(const T&) { return true; }
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
+isfinite_impl(const T& x)
+{
+ #ifdef __CUDA_ARCH__
+ return (::isfinite)(x);
+ #elif EIGEN_USE_STD_FPCLASSIFY
+ using std::isfinite;
+ return isfinite EIGEN_NOT_A_MACRO (x);
+ #else
+ return x<=NumTraits<T>::highest() && x>=NumTraits<T>::lowest();
+ #endif
+}
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
+isinf_impl(const T& x)
+{
+ #ifdef __CUDA_ARCH__
+ return (::isinf)(x);
+ #elif EIGEN_USE_STD_FPCLASSIFY
+ using std::isinf;
+ return isinf EIGEN_NOT_A_MACRO (x);
+ #else
+ return x>NumTraits<T>::highest() || x<NumTraits<T>::lowest();
+ #endif
+}
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
+isnan_impl(const T& x)
+{
+ #ifdef __CUDA_ARCH__
+ return (::isnan)(x);
+ #elif EIGEN_USE_STD_FPCLASSIFY
+ using std::isnan;
+ return isnan EIGEN_NOT_A_MACRO (x);
+ #else
+ return x != x;
+ #endif
+}
+#if (!EIGEN_USE_STD_FPCLASSIFY)
+#if EIGEN_COMP_MSVC
+template<typename T> EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x)
+{
+ return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF;
+}
+EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x)!=0; }
+EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x)!=0; }
+EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x)!=0; }
+EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); }
+EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); }
+EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x) { return isinf_msvc_helper(x); }
+#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC)
+#if EIGEN_GNUC_AT_LEAST(5,0)
+ #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((optimize("no-finite-math-only")))
+#else
+ #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((noinline,optimize("no-finite-math-only")))
+#endif
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const long double& x) { return __builtin_isnan(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const double& x) { return __builtin_isnan(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const float& x) { return __builtin_isnan(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const double& x) { return __builtin_isinf(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const float& x) { return __builtin_isinf(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return __builtin_isinf(x); }
+#undef EIGEN_TMP_NOOPT_ATTRIB
+#endif
+#endif
+template<typename T> EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x);
+template<typename T> EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x);
+template<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x);
+template<typename T> T generic_fast_tanh_float(const T& a_x);
+}
+namespace numext {
+#ifndef __CUDA_ARCH__
+template<typename T>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
+{
+ EIGEN_USING_STD_MATH(min);
+ return min EIGEN_NOT_A_MACRO (x,y);
+}
+template<typename T>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
+{
+ EIGEN_USING_STD_MATH(max);
+ return max EIGEN_NOT_A_MACRO (x,y);
+}
+#else
+template<typename T>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
+{
+ return y < x ? y : x;
+}
+template<>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y)
+{
+ return fminf(x, y);
+}
+template<typename T>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
+{
+ return x < y ? y : x;
+}
+template<>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y)
+{
+ return fmaxf(x, y);
+}
+#endif
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type real_ref(const Scalar& x)
+{
+ return internal::real_ref_impl<Scalar>::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(arg, Scalar)::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x)
+{
+ return internal::imag_ref_impl<Scalar>::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) imag_ref(Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& y)
+{
+ return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float log1p(const float &x) { return ::log1pf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double log1p(const double &x) { return ::log1p(x); }
+#endif
+template<typename ScalarX,typename ScalarY>
+EIGEN_DEVICE_FUNC
+inline typename internal::pow_impl<ScalarX,ScalarY>::result_type pow(const ScalarX& x, const ScalarY& y)
+{
+ return internal::pow_impl<ScalarX,ScalarY>::run(x, y);
+}
+template<typename T> EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); }
+template<typename T> EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); }
+template<typename T> EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); }
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x);
+}
+template<typename T>
+EIGEN_DEVICE_FUNC
+T (floor)(const T& x)
+{
+ EIGEN_USING_STD_MATH(floor);
+ return floor(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float floor(const float &x) { return ::floorf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double floor(const double &x) { return ::floor(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC
+T (ceil)(const T& x)
+{
+ EIGEN_USING_STD_MATH(ceil);
+ return ceil(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float ceil(const float &x) { return ::ceilf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double ceil(const double &x) { return ::ceil(x); }
+#endif
+inline int log2(int x)
+{
+ eigen_assert(x>=0);
+ unsigned int v(x);
+ static const int table[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ return table[(v * 0x07C4ACDDU) >> 27];
+}
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T sqrt(const T &x)
+{
+ EIGEN_USING_STD_MATH(sqrt);
+ return sqrt(x);
+}
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T log(const T &x) {
+ EIGEN_USING_STD_MATH(log);
+ return log(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float log(const float &x) { return ::logf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double log(const double &x) { return ::log(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type
+abs(const T &x) {
+ EIGEN_USING_STD_MATH(abs);
+ return abs(x);
+}
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+typename internal::enable_if<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>::type
+abs(const T &x) {
+ return x;
+}
+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); }
+EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); }
+#endif
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float abs(const float &x) { return ::fabsf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double abs(const double &x) { return ::fabs(x); }
+template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float abs(const std::complex<float>& x) {
+ return ::hypotf(x.real(), x.imag());
+}
+template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double abs(const std::complex<double>& x) {
+ return ::hypot(x.real(), x.imag());
+}
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T exp(const T &x) {
+ EIGEN_USING_STD_MATH(exp);
+ return exp(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float exp(const float &x) { return ::expf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double exp(const double &x) { return ::exp(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T cos(const T &x) {
+ EIGEN_USING_STD_MATH(cos);
+ return cos(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float cos(const float &x) { return ::cosf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double cos(const double &x) { return ::cos(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T sin(const T &x) {
+ EIGEN_USING_STD_MATH(sin);
+ return sin(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float sin(const float &x) { return ::sinf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double sin(const double &x) { return ::sin(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T tan(const T &x) {
+ EIGEN_USING_STD_MATH(tan);
+ return tan(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float tan(const float &x) { return ::tanf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double tan(const double &x) { return ::tan(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T acos(const T &x) {
+ EIGEN_USING_STD_MATH(acos);
+ return acos(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float acos(const float &x) { return ::acosf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double acos(const double &x) { return ::acos(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T asin(const T &x) {
+ EIGEN_USING_STD_MATH(asin);
+ return asin(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float asin(const float &x) { return ::asinf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double asin(const double &x) { return ::asin(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T atan(const T &x) {
+ EIGEN_USING_STD_MATH(atan);
+ return atan(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float atan(const float &x) { return ::atanf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double atan(const double &x) { return ::atan(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T cosh(const T &x) {
+ EIGEN_USING_STD_MATH(cosh);
+ return cosh(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float cosh(const float &x) { return ::coshf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double cosh(const double &x) { return ::cosh(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T sinh(const T &x) {
+ EIGEN_USING_STD_MATH(sinh);
+ return sinh(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float sinh(const float &x) { return ::sinhf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double sinh(const double &x) { return ::sinh(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T tanh(const T &x) {
+ EIGEN_USING_STD_MATH(tanh);
+ return tanh(x);
+}
+#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float tanh(float x) { return internal::generic_fast_tanh_float(x); }
+#endif
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float tanh(const float &x) { return ::tanhf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double tanh(const double &x) { return ::tanh(x); }
+#endif
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T fmod(const T& a, const T& b) {
+ EIGEN_USING_STD_MATH(fmod);
+ return fmod(a, b);
+}
+#ifdef __CUDACC__
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float fmod(const float& a, const float& b) {
+ return ::fmodf(a, b);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double fmod(const double& a, const double& b) {
+ return ::fmod(a, b);
+}
+#endif
+}
+namespace internal {
+template<typename T>
+EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x)
+{
+ return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x));
+}
+template<typename T>
+EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x)
+{
+ return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x));
+}
+template<typename T>
+EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x)
+{
+ return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x));
+}
+template<typename Scalar,
+ bool IsComplex,
+ bool IsInteger>
+struct scalar_fuzzy_default_impl {};
+template<typename Scalar>
+struct scalar_fuzzy_default_impl<Scalar, false, false>
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ template<typename OtherScalar> EIGEN_DEVICE_FUNC
+ static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
+ {
+ return numext::abs(x) <= numext::abs(y) * prec;
+ }
+ EIGEN_DEVICE_FUNC
+ static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
+ {
+ return numext::abs(x - y) <= numext::mini(numext::abs(x), numext::abs(y)) * prec;
+ }
+ EIGEN_DEVICE_FUNC
+ static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)
+ {
+ return x <= y || isApprox(x, y, prec);
+ }
+};
+template<typename Scalar>
+struct scalar_fuzzy_default_impl<Scalar, false, true>
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ template<typename OtherScalar> EIGEN_DEVICE_FUNC
+ static inline bool isMuchSmallerThan(const Scalar& x, const Scalar&, const RealScalar&)
+ {
+ return x == Scalar(0);
+ }
+ EIGEN_DEVICE_FUNC
+ static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar&)
+ {
+ return x == y;
+ }
+ EIGEN_DEVICE_FUNC
+ static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar&)
+ {
+ return x <= y;
+ }
+};
+template<typename Scalar>
+struct scalar_fuzzy_default_impl<Scalar, true, false>
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ template<typename OtherScalar> EIGEN_DEVICE_FUNC
+ static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
+ {
+ return numext::abs2(x) <= numext::abs2(y) * prec * prec;
+ }
+ EIGEN_DEVICE_FUNC
+ static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
+ {
+ return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec;
+ }
+};
+template<typename Scalar>
+struct scalar_fuzzy_impl : scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
+template<typename Scalar, typename OtherScalar> EIGEN_DEVICE_FUNC
+inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,
+ const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
+{
+ return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision);
+}
+template<typename Scalar> EIGEN_DEVICE_FUNC
+inline bool isApprox(const Scalar& x, const Scalar& y,
+ const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
+{
+ return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision);
+}
+template<typename Scalar> EIGEN_DEVICE_FUNC
+inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y,
+ const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
+{
+ return scalar_fuzzy_impl<Scalar>::isApproxOrLessThan(x, y, precision);
+}
+template<> struct random_impl<bool>
+{
+ static inline bool run()
+ {
+ return random<int>(0,1)==0 ? false : true;
+ }
+};
+template<> struct scalar_fuzzy_impl<bool>
+{
+ typedef bool RealScalar;
+ template<typename OtherScalar> EIGEN_DEVICE_FUNC
+ static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&)
+ {
+ return !x;
+ }
+ EIGEN_DEVICE_FUNC
+ static inline bool isApprox(bool x, bool y, bool)
+ {
+ return x == y;
+ }
+ EIGEN_DEVICE_FUNC
+ static inline bool isApproxOrLessThan(const bool& x, const bool& y, const bool&)
+ {
+ return (!x) || y;
+ }
+};
+}
+}
+#endif
+// end #include "src/Core/MathFunctions.h"
+// #include "src/Core/GenericPacketMath.h"
+#ifndef EIGEN_GENERIC_PACKET_MATH_H
+#define EIGEN_GENERIC_PACKET_MATH_H
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_DEBUG_ALIGNED_LOAD
+#define EIGEN_DEBUG_ALIGNED_LOAD
+#endif
+#ifndef EIGEN_DEBUG_UNALIGNED_LOAD
+#define EIGEN_DEBUG_UNALIGNED_LOAD
+#endif
+#ifndef EIGEN_DEBUG_ALIGNED_STORE
+#define EIGEN_DEBUG_ALIGNED_STORE
+#endif
+#ifndef EIGEN_DEBUG_UNALIGNED_STORE
+#define EIGEN_DEBUG_UNALIGNED_STORE
+#endif
+struct default_packet_traits
+{
+ enum {
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasNegate = 1,
+ HasAbs = 1,
+ HasArg = 0,
+ HasAbs2 = 1,
+ HasMin = 1,
+ HasMax = 1,
+ HasConj = 1,
+ HasSetLinear = 1,
+ HasBlend = 0,
+ HasDiv = 0,
+ HasSqrt = 0,
+ HasRsqrt = 0,
+ HasExp = 0,
+ HasLog = 0,
+ HasLog1p = 0,
+ HasLog10 = 0,
+ HasPow = 0,
+ HasSin = 0,
+ HasCos = 0,
+ HasTan = 0,
+ HasASin = 0,
+ HasACos = 0,
+ HasATan = 0,
+ HasSinh = 0,
+ HasCosh = 0,
+ HasTanh = 0,
+ HasLGamma = 0,
+ HasDiGamma = 0,
+ HasZeta = 0,
+ HasPolygamma = 0,
+ HasErf = 0,
+ HasErfc = 0,
+ HasIGamma = 0,
+ HasIGammac = 0,
+ HasBetaInc = 0,
+ HasRound = 0,
+ HasFloor = 0,
+ HasCeil = 0,
+ HasSign = 0
+ };
+};
+template<typename T> struct packet_traits : default_packet_traits
+{
+ typedef T type;
+ typedef T half;
+ enum {
+ Vectorizable = 0,
+ size = 1,
+ AlignedOnScalar = 0,
+ HasHalfPacket = 0
+ };
+ enum {
+ HasAdd = 0,
+ HasSub = 0,
+ HasMul = 0,
+ HasNegate = 0,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasConj = 0,
+ HasSetLinear = 0
+ };
+};
+template<typename T> struct packet_traits<const T> : packet_traits<T> { };
+template <typename Src, typename Tgt> struct type_casting_traits {
+ enum {
+ VectorizedCast = 0,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a) {
+ return static_cast<TgtPacket>(a);
+}
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a, const SrcPacket& ) {
+ return static_cast<TgtPacket>(a);
+}
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a, const SrcPacket& , const SrcPacket& , const SrcPacket& ) {
+ return static_cast<TgtPacket>(a);
+}
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+padd(const Packet& a,
+ const Packet& b) { return a+b; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+psub(const Packet& a,
+ const Packet& b) { return a-b; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pnegate(const Packet& a) { return -a; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pconj(const Packet& a) { return numext::conj(a); }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pmul(const Packet& a,
+ const Packet& b) { return a*b; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pdiv(const Packet& a,
+ const Packet& b) { return a/b; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pmin(const Packet& a,
+ const Packet& b) { return numext::mini(a, b); }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pmax(const Packet& a,
+ const Packet& b) { return numext::maxi(a, b); }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pabs(const Packet& a) { using std::abs; return abs(a); }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+parg(const Packet& a) { using numext::arg; return arg(a); }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pand(const Packet& a, const Packet& b) { return a & b; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+por(const Packet& a, const Packet& b) { return a | b; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pxor(const Packet& a, const Packet& b) { return a ^ b; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pandnot(const Packet& a, const Packet& b) { return a & (!b); }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); }
+template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
+ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+ploadquad(const typename unpacket_traits<Packet>::type* from)
+{ return pload1<Packet>(from); }
+template<typename Packet> EIGEN_DEVICE_FUNC
+inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
+ Packet& a0, Packet& a1, Packet& a2, Packet& a3)
+{
+ a0 = pload1<Packet>(a+0);
+ a1 = pload1<Packet>(a+1);
+ a2 = pload1<Packet>(a+2);
+ a3 = pload1<Packet>(a+3);
+}
+template<typename Packet> EIGEN_DEVICE_FUNC
+inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
+ Packet& a0, Packet& a1)
+{
+ a0 = pload1<Packet>(a+0);
+ a1 = pload1<Packet>(a+1);
+}
+template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
+plset(const typename unpacket_traits<Packet>::type& a) { return a; }
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
+{ (*to) = from; }
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
+{ (*to) = from; }
+ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index )
+ { return ploadu<Packet>(from); }
+ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index )
+ { pstore(to, from); }
+template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
+{
+#ifdef __CUDA_ARCH__
+#if defined(__LP64__)
+ asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
+#else
+ asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
+#endif
+#elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
+ __builtin_prefetch(addr);
+#endif
+}
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
+{ return a; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+preduxp(const Packet* vecs) { return vecs[0]; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
+{ return a; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline
+typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
+predux_downto4(const Packet& a)
+{ return a; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
+{ return a; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
+{ return a; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
+{ return a; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
+{ return a; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
+{
+ return Packet(imag(a),real(a));
+}
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet psin(const Packet& a) { using std::sin; return sin(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pcos(const Packet& a) { using std::cos; return cos(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet ptan(const Packet& a) { using std::tan; return tan(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pasin(const Packet& a) { using std::asin; return asin(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pacos(const Packet& a) { using std::acos; return acos(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet patan(const Packet& a) { using std::atan; return atan(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet psinh(const Packet& a) { using std::sinh; return sinh(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pexp(const Packet& a) { using std::exp; return exp(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet plog(const Packet& a) { using std::log; return log(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet plog1p(const Packet& a) { return numext::log1p(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet plog10(const Packet& a) { using std::log10; return log10(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet prsqrt(const Packet& a) {
+ return pdiv(pset1<Packet>(1), psqrt(a));
+}
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pround(const Packet& a) { using numext::round; return round(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
+template<typename Packet>
+inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
+{
+ pstore(to, pset1<Packet>(a));
+}
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pmadd(const Packet& a,
+ const Packet& b,
+ const Packet& c)
+{ return padd(pmul(a, b),c); }
+template<typename Packet, int Alignment>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
+{
+ if(Alignment >= unpacket_traits<Packet>::alignment)
+ return pload<Packet>(from);
+ else
+ return ploadu<Packet>(from);
+}
+template<typename Scalar, typename Packet, int Alignment>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
+{
+ if(Alignment >= unpacket_traits<Packet>::alignment)
+ pstore(to, from);
+ else
+ pstoreu(to, from);
+}
+template<typename Packet, int LoadMode>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
+{
+ return ploadt<Packet, LoadMode>(from);
+}
+template<int Offset,typename PacketType>
+struct palign_impl
+{
+ static inline void run(PacketType&, const PacketType&) {}
+};
+template<int Offset,typename PacketType>
+inline void palign(PacketType& first, const PacketType& second)
+{
+ palign_impl<Offset,PacketType>::run(first,second);
+}
+#ifndef __CUDACC__
+template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
+{ return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
+template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
+{ return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
+#endif
+template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
+ Packet packet[N];
+};
+template<typename Packet> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet,1>& ) {
+}
+template <size_t N> struct Selector {
+ bool select[N];
+};
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
+ return ifPacket.select[0] ? thenPacket : elsePacket;
+}
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
+{
+ Selector<unpacket_traits<Packet>::size> mask;
+ mask.select[0] = true;
+ for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
+ mask.select[i] = false;
+ return pblend(mask, pset1<Packet>(b), a);
+}
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
+{
+ Selector<unpacket_traits<Packet>::size> mask;
+ for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
+ mask.select[i] = false;
+ mask.select[unpacket_traits<Packet>::size-1] = true;
+ return pblend(mask, pset1<Packet>(b), a);
+}
+}
+}
+#endif
+// end #include "src/Core/GenericPacketMath.h"
+#if defined EIGEN_VECTORIZE_AVX512
+// #include "src/Core/arch/SSE/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_SSE_H
+#define EIGEN_PACKET_MATH_SSE_H
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
+#endif
+#ifdef __FMA__
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
+#endif
+#endif
+#if (defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)
+template<typename T>
+struct eigen_packet_wrapper
+{
+ EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
+ EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
+ m_val = v;
+ return *this;
+ }
+ T m_val;
+};
+typedef eigen_packet_wrapper<__m128> Packet4f;
+typedef eigen_packet_wrapper<__m128i> Packet4i;
+typedef eigen_packet_wrapper<__m128d> Packet2d;
+#else
+typedef __m128 Packet4f;
+typedef __m128i Packet4i;
+typedef __m128d Packet2d;
+#endif
+template<> struct is_arithmetic<__m128> { enum { value = true }; };
+template<> struct is_arithmetic<__m128i> { enum { value = true }; };
+template<> struct is_arithmetic<__m128d> { enum { value = true }; };
+#define vec4f_swizzle1(v,p,q,r,s) \
+ (_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), ((s)<<6|(r)<<4|(q)<<2|(p)))))
+#define vec4i_swizzle1(v,p,q,r,s) \
+ (_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p))))
+#define vec2d_swizzle1(v,p,q) \
+ (_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2)))))
+#define vec4f_swizzle2(a,b,p,q,r,s) \
+ (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
+#define vec4i_swizzle2(a,b,p,q,r,s) \
+ (_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p))))))
+#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+ const Packet4f p4f_##NAME = pset1<Packet4f>(X)
+#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+ const Packet2d p2d_##NAME = pset1<Packet2d>(X)
+#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+ const Packet4f p4f_##NAME = _mm_castsi128_ps(pset1<Packet4i>(X))
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+ const Packet4i p4i_##NAME = pset1<Packet4i>(X)
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet4f type;
+ typedef Packet4f half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasHalfPacket = 0,
+ HasDiv = 1,
+ HasSin = EIGEN_FAST_MATH,
+ HasCos = EIGEN_FAST_MATH,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasTanh = EIGEN_FAST_MATH,
+ HasBlend = 1
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ ,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+#endif
+ };
+};
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet2d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=2,
+ HasHalfPacket = 0,
+ HasDiv = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasBlend = 1
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ ,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+#endif
+ };
+};
+#endif
+template<> struct packet_traits<int> : default_packet_traits
+{
+ typedef Packet4i type;
+ typedef Packet4i half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasBlend = 1
+ };
+};
+template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct scalar_div_cost<float,true> { enum { value = 7 }; };
+template<> struct scalar_div_cost<double,true> { enum { value = 8 }; };
+#endif
+#if EIGEN_COMP_MSVC==1500
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps(from,from,from,from); }
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
+#else
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
+#endif
+#if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__)
+template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {
+ return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0);
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
+template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
+template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
+template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
+{
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
+ return _mm_xor_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
+{
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));
+ return _mm_xor_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
+{
+ return psub(Packet4i(_mm_setr_epi32(0,0,0,0)), a);
+}
+template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_mullo_epi32(a,b);
+#else
+ return vec4i_swizzle1(
+ vec4i_swizzle2(
+ _mm_mul_epu32(a,b),
+ _mm_mul_epu32(vec4i_swizzle1(a,1,0,3,2),
+ vec4i_swizzle1(b,1,0,3,2)),
+ 0,2,0,2),
+ 0,2,1,3);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_min_epi32(a,b);
+#else
+ Packet4i mask = _mm_cmplt_epi32(a,b);
+ return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_max_epi32(a,b);
+#else
+ Packet4i mask = _mm_cmpgt_epi32(a,b);
+ return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
+}
+#ifdef EIGEN_VECTORIZE_SSE4_1
+template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return _mm_round_ps(a, 0); }
+template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return _mm_round_pd(a, 0); }
+template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return _mm_ceil_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return _mm_ceil_pd(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return _mm_floor_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return _mm_floor_pd(a); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
+#if EIGEN_COMP_MSVC
+ template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ #if (EIGEN_COMP_MSVC==1600)
+ __m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from));
+ res = _mm_loadh_pi(res, (const __m64*)(from+2));
+ return res;
+ #else
+ return _mm_loadu_ps(from);
+ #endif
+ }
+#else
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return _mm_loadu_ps(from);
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return _mm_loadu_pd(from);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
+}
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
+{
+ return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))), 0, 0, 1, 1);
+}
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
+{ return pset1<Packet2d>(from[0]); }
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
+{
+ Packet4i tmp;
+ tmp = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(from));
+ return vec4i_swizzle1(tmp, 0, 0, 1, 1);
+}
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }
+template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
+{
+ return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
+{
+ return _mm_set_pd(from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
+{
+ return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+ }
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
+{
+ to[stride*0] = _mm_cvtss_f32(from);
+ to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
+ to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
+ to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
+{
+ to[stride*0] = _mm_cvtsd_f64(from);
+ to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
+{
+ to[stride*0] = _mm_cvtsi128_si32(from);
+ to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
+ to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
+ to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
+{
+ Packet4f pa = _mm_set_ss(a);
+ pstore(to, Packet4f(vec4f_swizzle1(pa,0,0,0,0)));
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& a)
+{
+ Packet2d pa = _mm_set_sd(a);
+ pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
+}
+#ifndef EIGEN_VECTORIZE_AVX
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+#endif
+#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
+#elif EIGEN_COMP_MSVC_STRICT
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
+#else
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
+{ return _mm_shuffle_ps(a,a,0x1B); }
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
+{ return _mm_shuffle_pd(a,a,0x1); }
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
+{ return _mm_shuffle_epi32(a,0x1B); }
+template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
+{
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
+ return _mm_and_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a)
+{
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
+ return _mm_and_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
+{
+ #ifdef EIGEN_VECTORIZE_SSSE3
+ return _mm_abs_epi32(a);
+ #else
+ Packet4i aux = _mm_srai_epi32(a,31);
+ return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);
+ #endif
+}
+#ifndef __AVX__
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4f>(const float *a,
+ Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+{
+ a3 = pload<Packet4f>(a);
+ a0 = vec4f_swizzle1(a3, 0,0,0,0);
+ a1 = vec4f_swizzle1(a3, 1,1,1,1);
+ a2 = vec4f_swizzle1(a3, 2,2,2,2);
+ a3 = vec4f_swizzle1(a3, 3,3,3,3);
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet2d>(const double *a,
+ Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
+{
+#ifdef EIGEN_VECTORIZE_SSE3
+ a0 = _mm_loaddup_pd(a+0);
+ a1 = _mm_loaddup_pd(a+1);
+ a2 = _mm_loaddup_pd(a+2);
+ a3 = _mm_loaddup_pd(a+3);
+#else
+ a1 = pload<Packet2d>(a);
+ a0 = vec2d_swizzle1(a1, 0,0);
+ a1 = vec2d_swizzle1(a1, 1,1);
+ a3 = pload<Packet2d>(a+2);
+ a2 = vec2d_swizzle1(a3, 0,0);
+ a3 = vec2d_swizzle1(a3, 1,1);
+#endif
+}
+#endif
+EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
+{
+ vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
+ vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA));
+ vecs[3] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xFF));
+ vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
+}
+#ifdef EIGEN_VECTORIZE_SSE3
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
+}
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ return _mm_hadd_pd(vecs[0], vecs[1]);
+}
+#else
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ Packet4f tmp0, tmp1, tmp2;
+ tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]);
+ tmp1 = _mm_unpackhi_ps(vecs[0], vecs[1]);
+ tmp2 = _mm_unpackhi_ps(vecs[2], vecs[3]);
+ tmp0 = _mm_add_ps(tmp0, tmp1);
+ tmp1 = _mm_unpacklo_ps(vecs[2], vecs[3]);
+ tmp1 = _mm_add_ps(tmp1, tmp2);
+ tmp2 = _mm_movehl_ps(tmp1, tmp0);
+ tmp0 = _mm_movelh_ps(tmp0, tmp1);
+ return _mm_add_ps(tmp0, tmp2);
+}
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
+}
+#endif
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
+}
+#ifdef EIGEN_VECTORIZE_SSSE3
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
+}
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i tmp0 = _mm_hadd_epi32(a,a);
+ return pfirst<Packet4i>(_mm_hadd_epi32(tmp0,tmp0));
+}
+#else
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
+ return pfirst(tmp) + pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1));
+}
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ Packet4i tmp0, tmp1, tmp2;
+ tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
+ tmp1 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
+ tmp2 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
+ tmp0 = _mm_add_epi32(tmp0, tmp1);
+ tmp1 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
+ tmp1 = _mm_add_epi32(tmp1, tmp2);
+ tmp2 = _mm_unpacklo_epi64(tmp0, tmp1);
+ tmp0 = _mm_unpackhi_epi64(tmp0, tmp1);
+ return _mm_add_epi32(tmp0, tmp2);
+}
+#endif
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
+{
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
+}
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+ return pfirst<Packet4i>(_mm_min_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
+ int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
+ return aux0<aux2 ? aux0 : aux2;
+#endif
+}
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+ return pfirst<Packet4i>(_mm_max_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
+ int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
+ return aux0>aux2 ? aux0 : aux2;
+#endif
+}
+#if EIGEN_COMP_GNUC
+#endif
+#ifdef EIGEN_VECTORIZE_SSSE3
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+ {
+ if (Offset!=0)
+ first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+ if (Offset!=0)
+ first = _mm_alignr_epi8(second,first, Offset*4);
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset==1)
+ first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
+ }
+};
+#else
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_move_ss(first,second);
+ first = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(first),0x39));
+ }
+ else if (Offset==2)
+ {
+ first = _mm_movehl_ps(first,first);
+ first = _mm_movelh_ps(first,second);
+ }
+ else if (Offset==3)
+ {
+ first = _mm_move_ss(first,second);
+ first = _mm_shuffle_ps(first,second,0x93);
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ first = _mm_shuffle_epi32(first,0x39);
+ }
+ else if (Offset==2)
+ {
+ first = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(first)));
+ first = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ }
+ else if (Offset==3)
+ {
+ first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ first = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second),0x93));
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(first),_mm_castpd_ps(first)));
+ first = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(first),_mm_castpd_ps(second)));
+ }
+ }
+};
+#endif
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+ _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+ __m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
+ kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
+ kernel.packet[1] = tmp;
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+ __m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
+ __m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
+ __m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
+ __m128i T3 = _mm_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
+ kernel.packet[0] = _mm_unpacklo_epi64(T0, T1);
+ kernel.packet[1] = _mm_unpackhi_epi64(T0, T1);
+ kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
+ kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m128i false_mask = _mm_cmpeq_epi32(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
+ const __m128 zero = _mm_setzero_ps();
+ const __m128 select = _mm_set_ps(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m128 false_mask = _mm_cmpeq_ps(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_ps(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
+ const __m128d zero = _mm_setzero_pd();
+ const __m128d select = _mm_set_pd(ifPacket.select[1], ifPacket.select[0]);
+ __m128d false_mask = _mm_cmpeq_pd(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_pd(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_ps(a,pset1<Packet4f>(b),1);
+#else
+ return _mm_move_ss(a, _mm_load_ss(&b));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_pd(a,pset1<Packet2d>(b),1);
+#else
+ return _mm_move_sd(a, _mm_load_sd(&b));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_ps(a,pset1<Packet4f>(b),(1<<3));
+#else
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF));
+ return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, pset1<Packet4f>(b)));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_pd(a,pset1<Packet2d>(b),(1<<1));
+#else
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF));
+ return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet2d>(b)));
+#endif
+}
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
+ return ::fmaf(a,b,c);
+}
+template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, const double& c) {
+ return ::fma(a,b,c);
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/SSE/PacketMath.h"
+// #include "src/Core/arch/AVX/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_AVX_H
+#define EIGEN_PACKET_MATH_AVX_H
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
+#endif
+#ifdef __FMA__
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+#endif
+typedef __m256 Packet8f;
+typedef __m256i Packet8i;
+typedef __m256d Packet4d;
+template<> struct is_arithmetic<__m256> { enum { value = true }; };
+template<> struct is_arithmetic<__m256i> { enum { value = true }; };
+template<> struct is_arithmetic<__m256d> { enum { value = true }; };
+#define _EIGEN_DECLARE_CONST_Packet8f(NAME,X) \
+ const Packet8f p8f_##NAME = pset1<Packet8f>(X)
+#define _EIGEN_DECLARE_CONST_Packet4d(NAME,X) \
+ const Packet4d p4d_##NAME = pset1<Packet4d>(X)
+#define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \
+ const Packet8f p8f_##NAME = _mm256_castsi256_ps(pset1<Packet8i>(X))
+#define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
+ const Packet8i p8i_##NAME = pset1<Packet8i>(X)
+#ifndef EIGEN_VECTORIZE_AVX512
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet8f type;
+ typedef Packet4f half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=8,
+ HasHalfPacket = 1,
+ HasDiv = 1,
+ HasSin = EIGEN_FAST_MATH,
+ HasCos = 0,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasTanh = EIGEN_FAST_MATH,
+ HasBlend = 1,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+ };
+};
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet4d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasHalfPacket = 1,
+ HasDiv = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasBlend = 1,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+ };
+};
+#endif
+template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
+template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
+template<> struct unpacket_traits<Packet8f> { typedef float type; typedef Packet4f half; enum {size=8, alignment=Aligned32}; };
+template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; enum {size=4, alignment=Aligned32}; };
+template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32}; };
+template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet8i pset1<Packet8i>(const int& from) { return _mm256_set1_epi32(from); }
+template<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float* from) { return _mm256_broadcast_ss(from); }
+template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }
+template<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
+template<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
+template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d padd<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)
+{
+ return _mm256_sub_ps(_mm256_set1_ps(0.0),a);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a)
+{
+ return _mm256_sub_pd(_mm256_set1_pd(0.0),a);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pconj(const Packet8f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4d pconj(const Packet4d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet8i pconj(const Packet8i& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet8f pmul<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_mul_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmul<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_mul_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_div_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pdiv<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_div_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& , const Packet8i& )
+{ eigen_assert(false && "packet integer division are not supported by AVX");
+ return pset1<Packet8i>(0);
+}
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
+#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
+ Packet8f res = c;
+ __asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
+ return res;
+#else
+ return _mm256_fmadd_ps(a,b,c);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
+#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
+ Packet4d res = c;
+ __asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
+ return res;
+#else
+ return _mm256_fmadd_pd(a,b,c);
+#endif
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_min_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_min_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pround<Packet8f>(const Packet8f& a) { return _mm256_round_ps(a, _MM_FROUND_CUR_DIRECTION); }
+template<> EIGEN_STRONG_INLINE Packet4d pround<Packet4d>(const Packet4d& a) { return _mm256_round_pd(a, _MM_FROUND_CUR_DIRECTION); }
+template<> EIGEN_STRONG_INLINE Packet8f pceil<Packet8f>(const Packet8f& a) { return _mm256_ceil_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet4d pceil<Packet4d>(const Packet4d& a) { return _mm256_ceil_pd(a); }
+template<> EIGEN_STRONG_INLINE Packet8f pfloor<Packet8f>(const Packet8f& a) { return _mm256_floor_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet4d pfloor<Packet4d>(const Packet4d& a) { return _mm256_floor_pd(a); }
+template<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pand<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f por<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_or_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d por<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_or_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pxor<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_xor_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pxor<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_xor_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pandnot<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_andnot_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pandnot<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_andnot_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pload<Packet8f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet4d pload<Packet4d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet8i pload<Packet8i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(reinterpret_cast<const __m256i*>(from)); }
+template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
+template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)
+{
+ Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
+ tmp = _mm256_blend_ps(tmp,_mm256_castps128_ps256(_mm_permute_ps( _mm256_castps256_ps128(tmp), _MM_SHUFFLE(1,0,1,0))), 15);
+ return _mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2));
+}
+template<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from)
+{
+ Packet4d tmp = _mm256_broadcast_pd((const __m128d*)(const void*)from);
+ return _mm256_permute_pd(tmp, 3<<2);
+}
+template<> EIGEN_STRONG_INLINE Packet8f ploadquad<Packet8f>(const float* from)
+{
+ Packet8f tmp = _mm256_castps128_ps256(_mm_broadcast_ss(from));
+ return _mm256_insertf128_ps(tmp, _mm_broadcast_ss(from+1), 1);
+}
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
+template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, Index stride)
+{
+ return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride],
+ from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, Index stride)
+{
+ return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, Index stride)
+{
+ __m128 low = _mm256_extractf128_ps(from, 0);
+ to[stride*0] = _mm_cvtss_f32(low);
+ to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1));
+ to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 2));
+ to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3));
+ __m128 high = _mm256_extractf128_ps(from, 1);
+ to[stride*4] = _mm_cvtss_f32(high);
+ to[stride*5] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1));
+ to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2));
+ to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, Index stride)
+{
+ __m128d low = _mm256_extractf128_pd(from, 0);
+ to[stride*0] = _mm_cvtsd_f64(low);
+ to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1));
+ __m128d high = _mm256_extractf128_pd(from, 1);
+ to[stride*2] = _mm_cvtsd_f64(high);
+ to[stride*3] = _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1));
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet8f>(float* to, const float& a)
+{
+ Packet8f pa = pset1<Packet8f>(a);
+ pstore(to, pa);
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet4d>(double* to, const double& a)
+{
+ Packet4d pa = pset1<Packet4d>(a);
+ pstore(to, pa);
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
+{
+ Packet8i pa = pset1<Packet8i>(a);
+ pstore(to, pa);
+}
+#ifndef EIGEN_VECTORIZE_AVX512
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+#endif
+template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
+ return _mm_cvtss_f32(_mm256_castps256_ps128(a));
+}
+template<> EIGEN_STRONG_INLINE double pfirst<Packet4d>(const Packet4d& a) {
+ return _mm_cvtsd_f64(_mm256_castpd256_pd128(a));
+}
+template<> EIGEN_STRONG_INLINE int pfirst<Packet8i>(const Packet8i& a) {
+ return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
+}
+template<> EIGEN_STRONG_INLINE Packet8f preverse(const Packet8f& a)
+{
+ __m256 tmp = _mm256_shuffle_ps(a,a,0x1b);
+ return _mm256_permute2f128_ps(tmp, tmp, 1);
+}
+template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
+{
+ __m256d tmp = _mm256_shuffle_pd(a,a,5);
+ return _mm256_permute2f128_pd(tmp, tmp, 1);
+ __m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
+ return _mm256_permute_pd(swap_halves,5);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f& a)
+{
+ const Packet8f mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
+ return _mm256_and_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a)
+{
+ const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
+ return _mm256_and_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet8f preduxp<Packet8f>(const Packet8f* vecs)
+{
+ __m256 hsum1 = _mm256_hadd_ps(vecs[0], vecs[1]);
+ __m256 hsum2 = _mm256_hadd_ps(vecs[2], vecs[3]);
+ __m256 hsum3 = _mm256_hadd_ps(vecs[4], vecs[5]);
+ __m256 hsum4 = _mm256_hadd_ps(vecs[6], vecs[7]);
+ __m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
+ __m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
+ __m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
+ __m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
+ __m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
+ __m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
+ __m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
+ __m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
+ __m256 sum1 = _mm256_add_ps(perm1, hsum5);
+ __m256 sum2 = _mm256_add_ps(perm2, hsum6);
+ __m256 sum3 = _mm256_add_ps(perm3, hsum7);
+ __m256 sum4 = _mm256_add_ps(perm4, hsum8);
+ __m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
+ __m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
+ __m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
+ return final;
+}
+template<> EIGEN_STRONG_INLINE Packet4d preduxp<Packet4d>(const Packet4d* vecs)
+{
+ Packet4d tmp0, tmp1;
+ tmp0 = _mm256_hadd_pd(vecs[0], vecs[1]);
+ tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
+ tmp1 = _mm256_hadd_pd(vecs[2], vecs[3]);
+ tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
+ return _mm256_blend_pd(tmp0, tmp1, 0xC);
+}
+template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
+{
+ return predux(Packet4f(_mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1))));
+}
+template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
+{
+ return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1))));
+}
+template<> EIGEN_STRONG_INLINE Packet4f predux_downto4<Packet8f>(const Packet8f& a)
+{
+ return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
+}
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet8f>(const Packet8f& a)
+{
+ Packet8f tmp;
+ tmp = _mm256_mul_ps(a, _mm256_permute2f128_ps(a,a,1));
+ tmp = _mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
+ return pfirst(_mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet4d>(const Packet4d& a)
+{
+ Packet4d tmp;
+ tmp = _mm256_mul_pd(a, _mm256_permute2f128_pd(a,a,1));
+ return pfirst(_mm256_mul_pd(tmp, _mm256_shuffle_pd(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE float predux_min<Packet8f>(const Packet8f& a)
+{
+ Packet8f tmp = _mm256_min_ps(a, _mm256_permute2f128_ps(a,a,1));
+ tmp = _mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
+ return pfirst(_mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_min<Packet4d>(const Packet4d& a)
+{
+ Packet4d tmp = _mm256_min_pd(a, _mm256_permute2f128_pd(a,a,1));
+ return pfirst(_mm256_min_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE float predux_max<Packet8f>(const Packet8f& a)
+{
+ Packet8f tmp = _mm256_max_ps(a, _mm256_permute2f128_ps(a,a,1));
+ tmp = _mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
+ return pfirst(_mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_max<Packet4d>(const Packet4d& a)
+{
+ Packet4d tmp = _mm256_max_pd(a, _mm256_permute2f128_pd(a,a,1));
+ return pfirst(_mm256_max_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
+}
+template<int Offset>
+struct palign_impl<Offset,Packet8f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet8f& first, const Packet8f& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm256_blend_ps(first, second, 1);
+ Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
+ Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+ first = _mm256_blend_ps(tmp1, tmp2, 0x88);
+ }
+ else if (Offset==2)
+ {
+ first = _mm256_blend_ps(first, second, 3);
+ Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
+ Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+ first = _mm256_blend_ps(tmp1, tmp2, 0xcc);
+ }
+ else if (Offset==3)
+ {
+ first = _mm256_blend_ps(first, second, 7);
+ Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
+ Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+ first = _mm256_blend_ps(tmp1, tmp2, 0xee);
+ }
+ else if (Offset==4)
+ {
+ first = _mm256_blend_ps(first, second, 15);
+ Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
+ Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+ first = _mm256_permute_ps(tmp2, _MM_SHUFFLE(3,2,1,0));
+ }
+ else if (Offset==5)
+ {
+ first = _mm256_blend_ps(first, second, 31);
+ first = _mm256_permute2f128_ps(first, first, 1);
+ Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
+ first = _mm256_permute2f128_ps(tmp, tmp, 1);
+ first = _mm256_blend_ps(tmp, first, 0x88);
+ }
+ else if (Offset==6)
+ {
+ first = _mm256_blend_ps(first, second, 63);
+ first = _mm256_permute2f128_ps(first, first, 1);
+ Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
+ first = _mm256_permute2f128_ps(tmp, tmp, 1);
+ first = _mm256_blend_ps(tmp, first, 0xcc);
+ }
+ else if (Offset==7)
+ {
+ first = _mm256_blend_ps(first, second, 127);
+ first = _mm256_permute2f128_ps(first, first, 1);
+ Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
+ first = _mm256_permute2f128_ps(tmp, tmp, 1);
+ first = _mm256_blend_ps(tmp, first, 0xee);
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4d& first, const Packet4d& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm256_blend_pd(first, second, 1);
+ __m256d tmp = _mm256_permute_pd(first, 5);
+ first = _mm256_permute2f128_pd(tmp, tmp, 1);
+ first = _mm256_blend_pd(tmp, first, 0xA);
+ }
+ else if (Offset==2)
+ {
+ first = _mm256_blend_pd(first, second, 3);
+ first = _mm256_permute2f128_pd(first, first, 1);
+ }
+ else if (Offset==3)
+ {
+ first = _mm256_blend_pd(first, second, 7);
+ __m256d tmp = _mm256_permute_pd(first, 5);
+ first = _mm256_permute2f128_pd(tmp, tmp, 1);
+ first = _mm256_blend_pd(tmp, first, 5);
+ }
+ }
+};
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet8f,8>& kernel) {
+ __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
+ __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
+ __m256 T4 = _mm256_unpacklo_ps(kernel.packet[4], kernel.packet[5]);
+ __m256 T5 = _mm256_unpackhi_ps(kernel.packet[4], kernel.packet[5]);
+ __m256 T6 = _mm256_unpacklo_ps(kernel.packet[6], kernel.packet[7]);
+ __m256 T7 = _mm256_unpackhi_ps(kernel.packet[6], kernel.packet[7]);
+ __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
+ __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
+ __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
+ __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
+ __m256 S4 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(1,0,1,0));
+ __m256 S5 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(3,2,3,2));
+ __m256 S6 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(1,0,1,0));
+ __m256 S7 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(3,2,3,2));
+ kernel.packet[0] = _mm256_permute2f128_ps(S0, S4, 0x20);
+ kernel.packet[1] = _mm256_permute2f128_ps(S1, S5, 0x20);
+ kernel.packet[2] = _mm256_permute2f128_ps(S2, S6, 0x20);
+ kernel.packet[3] = _mm256_permute2f128_ps(S3, S7, 0x20);
+ kernel.packet[4] = _mm256_permute2f128_ps(S0, S4, 0x31);
+ kernel.packet[5] = _mm256_permute2f128_ps(S1, S5, 0x31);
+ kernel.packet[6] = _mm256_permute2f128_ps(S2, S6, 0x31);
+ kernel.packet[7] = _mm256_permute2f128_ps(S3, S7, 0x31);
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet8f,4>& kernel) {
+ __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
+ __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
+ __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
+ __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
+ __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
+ __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
+ kernel.packet[0] = _mm256_permute2f128_ps(S0, S1, 0x20);
+ kernel.packet[1] = _mm256_permute2f128_ps(S2, S3, 0x20);
+ kernel.packet[2] = _mm256_permute2f128_ps(S0, S1, 0x31);
+ kernel.packet[3] = _mm256_permute2f128_ps(S2, S3, 0x31);
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4d,4>& kernel) {
+ __m256d T0 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 15);
+ __m256d T1 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);
+ __m256d T2 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 15);
+ __m256d T3 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 0);
+ kernel.packet[1] = _mm256_permute2f128_pd(T0, T2, 32);
+ kernel.packet[3] = _mm256_permute2f128_pd(T0, T2, 49);
+ kernel.packet[0] = _mm256_permute2f128_pd(T1, T3, 32);
+ kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) {
+ const __m256 zero = _mm256_setzero_ps();
+ const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ);
+ return _mm256_blendv_ps(thenPacket, elsePacket, false_mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) {
+ const __m256d zero = _mm256_setzero_pd();
+ const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ);
+ return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pinsertfirst(const Packet8f& a, float b)
+{
+ return _mm256_blend_ps(a,pset1<Packet8f>(b),1);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pinsertfirst(const Packet4d& a, double b)
+{
+ return _mm256_blend_pd(a,pset1<Packet4d>(b),1);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b)
+{
+ return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
+}
+template<> EIGEN_STRONG_INLINE Packet4d pinsertlast(const Packet4d& a, double b)
+{
+ return _mm256_blend_pd(a,pset1<Packet4d>(b),(1<<3));
+}
+}
+}
+#endif
+// end #include "src/Core/arch/AVX/PacketMath.h"
+// #include "src/Core/arch/AVX512/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_AVX512_H
+#define EIGEN_PACKET_MATH_AVX512_H
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
+#endif
+#ifdef __FMA__
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+#endif
+typedef __m512 Packet16f;
+typedef __m512i Packet16i;
+typedef __m512d Packet8d;
+template <>
+struct is_arithmetic<__m512> {
+ enum { value = true };
+};
+template <>
+struct is_arithmetic<__m512i> {
+ enum { value = true };
+};
+template <>
+struct is_arithmetic<__m512d> {
+ enum { value = true };
+};
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet16f type;
+ typedef Packet8f half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 16,
+ HasHalfPacket = 1,
+#if EIGEN_GNUC_AT_LEAST(5, 3)
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ HasLog = 1,
+#endif
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+#endif
+ HasDiv = 1
+ };
+ };
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet8d type;
+ typedef Packet4d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 8,
+ HasHalfPacket = 1,
+#if EIGEN_GNUC_AT_LEAST(5, 3)
+ HasSqrt = 1,
+ HasRsqrt = EIGEN_FAST_MATH,
+#endif
+ HasDiv = 1
+ };
+};
+template <>
+struct unpacket_traits<Packet16f> {
+ typedef float type;
+ typedef Packet8f half;
+ enum { size = 16, alignment=Aligned64 };
+};
+template <>
+struct unpacket_traits<Packet8d> {
+ typedef double type;
+ typedef Packet4d half;
+ enum { size = 8, alignment=Aligned64 };
+};
+template <>
+struct unpacket_traits<Packet16i> {
+ typedef int type;
+ typedef Packet8i half;
+ enum { size = 16, alignment=Aligned64 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet16f pset1<Packet16f>(const float& from) {
+ return _mm512_set1_ps(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pset1<Packet8d>(const double& from) {
+ return _mm512_set1_pd(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16i pset1<Packet16i>(const int& from) {
+ return _mm512_set1_epi32(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pload1<Packet16f>(const float* from) {
+ return _mm512_broadcastss_ps(_mm_load_ps1(from));
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pload1<Packet8d>(const double* from) {
+ return _mm512_broadcastsd_pd(_mm_load_pd1(from));
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f plset<Packet16f>(const float& a) {
+ return _mm512_add_ps(
+ _mm512_set1_ps(a),
+ _mm512_set_ps(15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f,
+ 4.0f, 3.0f, 2.0f, 1.0f, 0.0f));
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d plset<Packet8d>(const double& a) {
+ return _mm512_add_pd(_mm512_set1_pd(a),
+ _mm512_set_pd(7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0));
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f padd<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+ return _mm512_add_ps(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d padd<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+ return _mm512_add_pd(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f psub<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+ return _mm512_sub_ps(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d psub<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+ return _mm512_sub_pd(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pnegate(const Packet16f& a) {
+ return _mm512_sub_ps(_mm512_set1_ps(0.0), a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pnegate(const Packet8d& a) {
+ return _mm512_sub_pd(_mm512_set1_pd(0.0), a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pconj(const Packet16f& a) {
+ return a;
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pconj(const Packet8d& a) {
+ return a;
+}
+template <>
+EIGEN_STRONG_INLINE Packet16i pconj(const Packet16i& a) {
+ return a;
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pmul<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+ return _mm512_mul_ps(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pmul<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+ return _mm512_mul_pd(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pdiv<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+ return _mm512_div_ps(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pdiv<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+ return _mm512_div_pd(a, b);
+}
+#ifdef __FMA__
+template <>
+EIGEN_STRONG_INLINE Packet16f pmadd(const Packet16f& a, const Packet16f& b,
+ const Packet16f& c) {
+ return _mm512_fmadd_ps(a, b, c);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pmadd(const Packet8d& a, const Packet8d& b,
+ const Packet8d& c) {
+ return _mm512_fmadd_pd(a, b, c);
+}
+#endif
+template <>
+EIGEN_STRONG_INLINE Packet16f pmin<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+ return _mm512_min_ps(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pmin<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+ return _mm512_min_pd(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pmax<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+ return _mm512_max_ps(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pmax<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+ return _mm512_max_pd(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pand<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ return _mm512_and_ps(a, b);
+#else
+ Packet16f res = _mm512_undefined_ps();
+ Packet4f lane0_a = _mm512_extractf32x4_ps(a, 0);
+ Packet4f lane0_b = _mm512_extractf32x4_ps(b, 0);
+ res = _mm512_insertf32x4(res, _mm_and_ps(lane0_a, lane0_b), 0);
+ Packet4f lane1_a = _mm512_extractf32x4_ps(a, 1);
+ Packet4f lane1_b = _mm512_extractf32x4_ps(b, 1);
+ res = _mm512_insertf32x4(res, _mm_and_ps(lane1_a, lane1_b), 1);
+ Packet4f lane2_a = _mm512_extractf32x4_ps(a, 2);
+ Packet4f lane2_b = _mm512_extractf32x4_ps(b, 2);
+ res = _mm512_insertf32x4(res, _mm_and_ps(lane2_a, lane2_b), 2);
+ Packet4f lane3_a = _mm512_extractf32x4_ps(a, 3);
+ Packet4f lane3_b = _mm512_extractf32x4_ps(b, 3);
+ res = _mm512_insertf32x4(res, _mm_and_ps(lane3_a, lane3_b), 3);
+ return res;
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pand<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ return _mm512_and_pd(a, b);
+#else
+ Packet8d res = _mm512_undefined_pd();
+ Packet4d lane0_a = _mm512_extractf64x4_pd(a, 0);
+ Packet4d lane0_b = _mm512_extractf64x4_pd(b, 0);
+ res = _mm512_insertf64x4(res, _mm256_and_pd(lane0_a, lane0_b), 0);
+ Packet4d lane1_a = _mm512_extractf64x4_pd(a, 1);
+ Packet4d lane1_b = _mm512_extractf64x4_pd(b, 1);
+ res = _mm512_insertf64x4(res, _mm256_and_pd(lane1_a, lane1_b), 1);
+ return res;
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f por<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ return _mm512_or_ps(a, b);
+#else
+ Packet16f res = _mm512_undefined_ps();
+ Packet4f lane0_a = _mm512_extractf32x4_ps(a, 0);
+ Packet4f lane0_b = _mm512_extractf32x4_ps(b, 0);
+ res = _mm512_insertf32x4(res, _mm_or_ps(lane0_a, lane0_b), 0);
+ Packet4f lane1_a = _mm512_extractf32x4_ps(a, 1);
+ Packet4f lane1_b = _mm512_extractf32x4_ps(b, 1);
+ res = _mm512_insertf32x4(res, _mm_or_ps(lane1_a, lane1_b), 1);
+ Packet4f lane2_a = _mm512_extractf32x4_ps(a, 2);
+ Packet4f lane2_b = _mm512_extractf32x4_ps(b, 2);
+ res = _mm512_insertf32x4(res, _mm_or_ps(lane2_a, lane2_b), 2);
+ Packet4f lane3_a = _mm512_extractf32x4_ps(a, 3);
+ Packet4f lane3_b = _mm512_extractf32x4_ps(b, 3);
+ res = _mm512_insertf32x4(res, _mm_or_ps(lane3_a, lane3_b), 3);
+ return res;
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d por<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ return _mm512_or_pd(a, b);
+#else
+ Packet8d res = _mm512_undefined_pd();
+ Packet4d lane0_a = _mm512_extractf64x4_pd(a, 0);
+ Packet4d lane0_b = _mm512_extractf64x4_pd(b, 0);
+ res = _mm512_insertf64x4(res, _mm256_or_pd(lane0_a, lane0_b), 0);
+ Packet4d lane1_a = _mm512_extractf64x4_pd(a, 1);
+ Packet4d lane1_b = _mm512_extractf64x4_pd(b, 1);
+ res = _mm512_insertf64x4(res, _mm256_or_pd(lane1_a, lane1_b), 1);
+ return res;
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pxor<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ return _mm512_xor_ps(a, b);
+#else
+ Packet16f res = _mm512_undefined_ps();
+ Packet4f lane0_a = _mm512_extractf32x4_ps(a, 0);
+ Packet4f lane0_b = _mm512_extractf32x4_ps(b, 0);
+ res = _mm512_insertf32x4(res, _mm_xor_ps(lane0_a, lane0_b), 0);
+ Packet4f lane1_a = _mm512_extractf32x4_ps(a, 1);
+ Packet4f lane1_b = _mm512_extractf32x4_ps(b, 1);
+ res = _mm512_insertf32x4(res, _mm_xor_ps(lane1_a, lane1_b), 1);
+ Packet4f lane2_a = _mm512_extractf32x4_ps(a, 2);
+ Packet4f lane2_b = _mm512_extractf32x4_ps(b, 2);
+ res = _mm512_insertf32x4(res, _mm_xor_ps(lane2_a, lane2_b), 2);
+ Packet4f lane3_a = _mm512_extractf32x4_ps(a, 3);
+ Packet4f lane3_b = _mm512_extractf32x4_ps(b, 3);
+ res = _mm512_insertf32x4(res, _mm_xor_ps(lane3_a, lane3_b), 3);
+ return res;
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pxor<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ return _mm512_xor_pd(a, b);
+#else
+ Packet8d res = _mm512_undefined_pd();
+ Packet4d lane0_a = _mm512_extractf64x4_pd(a, 0);
+ Packet4d lane0_b = _mm512_extractf64x4_pd(b, 0);
+ res = _mm512_insertf64x4(res, _mm256_xor_pd(lane0_a, lane0_b), 0);
+ Packet4d lane1_a = _mm512_extractf64x4_pd(a, 1);
+ Packet4d lane1_b = _mm512_extractf64x4_pd(b, 1);
+ res = _mm512_insertf64x4(res, _mm256_xor_pd(lane1_a, lane1_b), 1);
+ return res;
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pandnot<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ return _mm512_andnot_ps(a, b);
+#else
+ Packet16f res = _mm512_undefined_ps();
+ Packet4f lane0_a = _mm512_extractf32x4_ps(a, 0);
+ Packet4f lane0_b = _mm512_extractf32x4_ps(b, 0);
+ res = _mm512_insertf32x4(res, _mm_andnot_ps(lane0_a, lane0_b), 0);
+ Packet4f lane1_a = _mm512_extractf32x4_ps(a, 1);
+ Packet4f lane1_b = _mm512_extractf32x4_ps(b, 1);
+ res = _mm512_insertf32x4(res, _mm_andnot_ps(lane1_a, lane1_b), 1);
+ Packet4f lane2_a = _mm512_extractf32x4_ps(a, 2);
+ Packet4f lane2_b = _mm512_extractf32x4_ps(b, 2);
+ res = _mm512_insertf32x4(res, _mm_andnot_ps(lane2_a, lane2_b), 2);
+ Packet4f lane3_a = _mm512_extractf32x4_ps(a, 3);
+ Packet4f lane3_b = _mm512_extractf32x4_ps(b, 3);
+ res = _mm512_insertf32x4(res, _mm_andnot_ps(lane3_a, lane3_b), 3);
+ return res;
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pandnot<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ return _mm512_andnot_pd(a, b);
+#else
+ Packet8d res = _mm512_undefined_pd();
+ Packet4d lane0_a = _mm512_extractf64x4_pd(a, 0);
+ Packet4d lane0_b = _mm512_extractf64x4_pd(b, 0);
+ res = _mm512_insertf64x4(res, _mm256_andnot_pd(lane0_a, lane0_b), 0);
+ Packet4d lane1_a = _mm512_extractf64x4_pd(a, 1);
+ Packet4d lane1_b = _mm512_extractf64x4_pd(b, 1);
+ res = _mm512_insertf64x4(res, _mm256_andnot_pd(lane1_a, lane1_b), 1);
+ return res;
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pload<Packet16f>(const float* from) {
+ EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ps(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pload<Packet8d>(const double* from) {
+ EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_pd(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16i pload<Packet16i>(const int* from) {
+ EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_si512(
+ reinterpret_cast<const __m512i*>(from));
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f ploadu<Packet16f>(const float* from) {
+ EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ps(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d ploadu<Packet8d>(const double* from) {
+ EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_pd(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16i ploadu<Packet16i>(const int* from) {
+ EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_si512(
+ reinterpret_cast<const __m512i*>(from));
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f ploaddup<Packet16f>(const float* from) {
+ Packet8f lane0 = _mm256_broadcast_ps((const __m128*)(const void*)from);
+ lane0 = _mm256_blend_ps(
+ lane0, _mm256_castps128_ps256(_mm_permute_ps(
+ _mm256_castps256_ps128(lane0), _MM_SHUFFLE(1, 0, 1, 0))),
+ 15);
+ lane0 = _mm256_permute_ps(lane0, _MM_SHUFFLE(3, 3, 2, 2));
+ Packet8f lane1 = _mm256_broadcast_ps((const __m128*)(const void*)(from + 4));
+ lane1 = _mm256_blend_ps(
+ lane1, _mm256_castps128_ps256(_mm_permute_ps(
+ _mm256_castps256_ps128(lane1), _MM_SHUFFLE(1, 0, 1, 0))),
+ 15);
+ lane1 = _mm256_permute_ps(lane1, _MM_SHUFFLE(3, 3, 2, 2));
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ Packet16f res = _mm512_undefined_ps();
+ return _mm512_insertf32x8(res, lane0, 0);
+ return _mm512_insertf32x8(res, lane1, 1);
+ return res;
+#else
+ Packet16f res = _mm512_undefined_ps();
+ res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 0), 0);
+ res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 1), 1);
+ res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 0), 2);
+ res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 1), 3);
+ return res;
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d ploaddup<Packet8d>(const double* from) {
+ Packet4d lane0 = _mm256_broadcast_pd((const __m128d*)(const void*)from);
+ lane0 = _mm256_permute_pd(lane0, 3 << 2);
+ Packet4d lane1 = _mm256_broadcast_pd((const __m128d*)(const void*)(from + 2));
+ lane1 = _mm256_permute_pd(lane1, 3 << 2);
+ Packet8d res = _mm512_undefined_pd();
+ res = _mm512_insertf64x4(res, lane0, 0);
+ return _mm512_insertf64x4(res, lane1, 1);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f ploadquad<Packet16f>(const float* from) {
+ Packet16f tmp = _mm512_undefined_ps();
+ tmp = _mm512_insertf32x4(tmp, _mm_load_ps1(from), 0);
+ tmp = _mm512_insertf32x4(tmp, _mm_load_ps1(from + 1), 1);
+ tmp = _mm512_insertf32x4(tmp, _mm_load_ps1(from + 2), 2);
+ tmp = _mm512_insertf32x4(tmp, _mm_load_ps1(from + 3), 3);
+ return tmp;
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d ploadquad<Packet8d>(const double* from) {
+ Packet8d tmp = _mm512_undefined_pd();
+ Packet2d tmp0 = _mm_load_pd1(from);
+ Packet2d tmp1 = _mm_load_pd1(from + 1);
+ Packet4d lane0 = _mm256_broadcastsd_pd(tmp0);
+ Packet4d lane1 = _mm256_broadcastsd_pd(tmp1);
+ tmp = _mm512_insertf64x4(tmp, lane0, 0);
+ return _mm512_insertf64x4(tmp, lane1, 1);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet16f& from) {
+ EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ps(to, from);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet8d& from) {
+ EIGEN_DEBUG_ALIGNED_STORE _mm512_store_pd(to, from);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet16i& from) {
+ EIGEN_DEBUG_ALIGNED_STORE _mm512_storeu_si512(reinterpret_cast<__m512i*>(to),
+ from);
+}
+template <>
+EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet16f& from) {
+ EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ps(to, from);
+}
+template <>
+EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet8d& from) {
+ EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_pd(to, from);
+}
+template <>
+EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet16i& from) {
+ EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_si512(
+ reinterpret_cast<__m512i*>(to), from);
+}
+template <>
+EIGEN_DEVICE_FUNC inline Packet16f pgather<float, Packet16f>(const float* from,
+ Index stride) {
+ Packet16i stride_vector = _mm512_set1_epi32(stride);
+ Packet16i stride_multiplier =
+ _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ Packet16i indices = _mm512_mullo_epi32(stride_vector, stride_multiplier);
+ return _mm512_i32gather_ps(indices, from, 4);
+}
+template <>
+EIGEN_DEVICE_FUNC inline Packet8d pgather<double, Packet8d>(const double* from,
+ Index stride) {
+ Packet8i stride_vector = _mm256_set1_epi32(stride);
+ Packet8i stride_multiplier = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+ Packet8i indices = _mm256_mullo_epi32(stride_vector, stride_multiplier);
+ return _mm512_i32gather_pd(indices, from, 8);
+}
+template <>
+EIGEN_DEVICE_FUNC inline void pscatter<float, Packet16f>(float* to,
+ const Packet16f& from,
+ Index stride) {
+ Packet16i stride_vector = _mm512_set1_epi32(stride);
+ Packet16i stride_multiplier =
+ _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ Packet16i indices = _mm512_mullo_epi32(stride_vector, stride_multiplier);
+ _mm512_i32scatter_ps(to, indices, from, 4);
+}
+template <>
+EIGEN_DEVICE_FUNC inline void pscatter<double, Packet8d>(double* to,
+ const Packet8d& from,
+ Index stride) {
+ Packet8i stride_vector = _mm256_set1_epi32(stride);
+ Packet8i stride_multiplier = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+ Packet8i indices = _mm256_mullo_epi32(stride_vector, stride_multiplier);
+ _mm512_i32scatter_pd(to, indices, from, 8);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore1<Packet16f>(float* to, const float& a) {
+ Packet16f pa = pset1<Packet16f>(a);
+ pstore(to, pa);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore1<Packet8d>(double* to, const double& a) {
+ Packet8d pa = pset1<Packet8d>(a);
+ pstore(to, pa);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore1<Packet16i>(int* to, const int& a) {
+ Packet16i pa = pset1<Packet16i>(a);
+ pstore(to, pa);
+}
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template <>
+EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) {
+ return _mm_cvtss_f32(_mm512_extractf32x4_ps(a, 0));
+}
+template <>
+EIGEN_STRONG_INLINE double pfirst<Packet8d>(const Packet8d& a) {
+ return _mm_cvtsd_f64(_mm256_extractf128_pd(_mm512_extractf64x4_pd(a, 0), 0));
+}
+template <>
+EIGEN_STRONG_INLINE int pfirst<Packet16i>(const Packet16i& a) {
+ return _mm_extract_epi32(_mm512_extracti32x4_epi32(a, 0), 0);
+}
+template<> EIGEN_STRONG_INLINE Packet16f preverse(const Packet16f& a)
+{
+ return _mm512_permutexvar_ps(_mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), a);
+}
+template<> EIGEN_STRONG_INLINE Packet8d preverse(const Packet8d& a)
+{
+ return _mm512_permutexvar_pd(_mm512_set_epi32(0, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7), a);
+}
+template<> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a)
+{
+ return (__m512)_mm512_and_si512((__m512i)a, _mm512_set1_epi32(0x7fffffff));
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
+ return (__m512d)_mm512_and_si512((__m512i)a,
+ _mm512_set1_epi64(0x7fffffffffffffff));
+}
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+#define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT) \
+ __m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0) __m256 OUTPUT##_1 = \
+ _mm512_extractf32x8_ps(INPUT, 1)
+#else
+#define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT) \
+ __m256 OUTPUT##_0 = _mm256_insertf128_ps( \
+ _mm256_castps128_ps256(_mm512_extractf32x4_ps(INPUT, 0)), \
+ _mm512_extractf32x4_ps(INPUT, 1), 1); \
+ __m256 OUTPUT##_1 = _mm256_insertf128_ps( \
+ _mm256_castps128_ps256(_mm512_extractf32x4_ps(INPUT, 2)), \
+ _mm512_extractf32x4_ps(INPUT, 3), 1);
+#endif
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+#define EIGEN_INSERT_8f_INTO_16f(OUTPUT, INPUTA, INPUTB) \
+ OUTPUT = _mm512_insertf32x8(OUTPUT, INPUTA, 0); \
+ OUTPUT = _mm512_insertf32x8(OUTPUT, INPUTB, 1);
+#else
+#define EIGEN_INSERT_8f_INTO_16f(OUTPUT, INPUTA, INPUTB) \
+ OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTA, 0), 0); \
+ OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTA, 1), 1); \
+ OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTB, 0), 2); \
+ OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTB, 1), 3);
+#endif
+template<> EIGEN_STRONG_INLINE Packet16f preduxp<Packet16f>(const Packet16f*
+vecs)
+{
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[0], vecs0);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[1], vecs1);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[2], vecs2);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[3], vecs3);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[4], vecs4);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[5], vecs5);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[6], vecs6);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[7], vecs7);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[8], vecs8);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[9], vecs9);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[10], vecs10);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[11], vecs11);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[12], vecs12);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[13], vecs13);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[14], vecs14);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[15], vecs15);
+ __m256 hsum1 = _mm256_hadd_ps(vecs0_0, vecs1_0);
+ __m256 hsum2 = _mm256_hadd_ps(vecs2_0, vecs3_0);
+ __m256 hsum3 = _mm256_hadd_ps(vecs4_0, vecs5_0);
+ __m256 hsum4 = _mm256_hadd_ps(vecs6_0, vecs7_0);
+ __m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
+ __m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
+ __m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
+ __m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
+ __m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
+ __m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
+ __m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
+ __m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
+ __m256 sum1 = _mm256_add_ps(perm1, hsum5);
+ __m256 sum2 = _mm256_add_ps(perm2, hsum6);
+ __m256 sum3 = _mm256_add_ps(perm3, hsum7);
+ __m256 sum4 = _mm256_add_ps(perm4, hsum8);
+ __m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
+ __m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
+ __m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
+ hsum1 = _mm256_hadd_ps(vecs0_1, vecs1_1);
+ hsum2 = _mm256_hadd_ps(vecs2_1, vecs3_1);
+ hsum3 = _mm256_hadd_ps(vecs4_1, vecs5_1);
+ hsum4 = _mm256_hadd_ps(vecs6_1, vecs7_1);
+ hsum5 = _mm256_hadd_ps(hsum1, hsum1);
+ hsum6 = _mm256_hadd_ps(hsum2, hsum2);
+ hsum7 = _mm256_hadd_ps(hsum3, hsum3);
+ hsum8 = _mm256_hadd_ps(hsum4, hsum4);
+ perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
+ perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
+ perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
+ perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
+ sum1 = _mm256_add_ps(perm1, hsum5);
+ sum2 = _mm256_add_ps(perm2, hsum6);
+ sum3 = _mm256_add_ps(perm3, hsum7);
+ sum4 = _mm256_add_ps(perm4, hsum8);
+ blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
+ blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
+ final = padd(final, _mm256_blend_ps(blend1, blend2, 0xf0));
+ hsum1 = _mm256_hadd_ps(vecs8_0, vecs9_0);
+ hsum2 = _mm256_hadd_ps(vecs10_0, vecs11_0);
+ hsum3 = _mm256_hadd_ps(vecs12_0, vecs13_0);
+ hsum4 = _mm256_hadd_ps(vecs14_0, vecs15_0);
+ hsum5 = _mm256_hadd_ps(hsum1, hsum1);
+ hsum6 = _mm256_hadd_ps(hsum2, hsum2);
+ hsum7 = _mm256_hadd_ps(hsum3, hsum3);
+ hsum8 = _mm256_hadd_ps(hsum4, hsum4);
+ perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
+ perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
+ perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
+ perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
+ sum1 = _mm256_add_ps(perm1, hsum5);
+ sum2 = _mm256_add_ps(perm2, hsum6);
+ sum3 = _mm256_add_ps(perm3, hsum7);
+ sum4 = _mm256_add_ps(perm4, hsum8);
+ blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
+ blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
+ __m256 final_1 = _mm256_blend_ps(blend1, blend2, 0xf0);
+ hsum1 = _mm256_hadd_ps(vecs8_1, vecs9_1);
+ hsum2 = _mm256_hadd_ps(vecs10_1, vecs11_1);
+ hsum3 = _mm256_hadd_ps(vecs12_1, vecs13_1);
+ hsum4 = _mm256_hadd_ps(vecs14_1, vecs15_1);
+ hsum5 = _mm256_hadd_ps(hsum1, hsum1);
+ hsum6 = _mm256_hadd_ps(hsum2, hsum2);
+ hsum7 = _mm256_hadd_ps(hsum3, hsum3);
+ hsum8 = _mm256_hadd_ps(hsum4, hsum4);
+ perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
+ perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
+ perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
+ perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
+ sum1 = _mm256_add_ps(perm1, hsum5);
+ sum2 = _mm256_add_ps(perm2, hsum6);
+ sum3 = _mm256_add_ps(perm3, hsum7);
+ sum4 = _mm256_add_ps(perm4, hsum8);
+ blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
+ blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
+ final_1 = padd(final_1, _mm256_blend_ps(blend1, blend2, 0xf0));
+ __m512 final_output;
+ EIGEN_INSERT_8f_INTO_16f(final_output, final, final_1);
+ return final_output;
+}
+template<> EIGEN_STRONG_INLINE Packet8d preduxp<Packet8d>(const Packet8d* vecs)
+{
+ Packet4d vecs0_0 = _mm512_extractf64x4_pd(vecs[0], 0);
+ Packet4d vecs0_1 = _mm512_extractf64x4_pd(vecs[0], 1);
+ Packet4d vecs1_0 = _mm512_extractf64x4_pd(vecs[1], 0);
+ Packet4d vecs1_1 = _mm512_extractf64x4_pd(vecs[1], 1);
+ Packet4d vecs2_0 = _mm512_extractf64x4_pd(vecs[2], 0);
+ Packet4d vecs2_1 = _mm512_extractf64x4_pd(vecs[2], 1);
+ Packet4d vecs3_0 = _mm512_extractf64x4_pd(vecs[3], 0);
+ Packet4d vecs3_1 = _mm512_extractf64x4_pd(vecs[3], 1);
+ Packet4d vecs4_0 = _mm512_extractf64x4_pd(vecs[4], 0);
+ Packet4d vecs4_1 = _mm512_extractf64x4_pd(vecs[4], 1);
+ Packet4d vecs5_0 = _mm512_extractf64x4_pd(vecs[5], 0);
+ Packet4d vecs5_1 = _mm512_extractf64x4_pd(vecs[5], 1);
+ Packet4d vecs6_0 = _mm512_extractf64x4_pd(vecs[6], 0);
+ Packet4d vecs6_1 = _mm512_extractf64x4_pd(vecs[6], 1);
+ Packet4d vecs7_0 = _mm512_extractf64x4_pd(vecs[7], 0);
+ Packet4d vecs7_1 = _mm512_extractf64x4_pd(vecs[7], 1);
+ Packet4d tmp0, tmp1;
+ tmp0 = _mm256_hadd_pd(vecs0_0, vecs1_0);
+ tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
+ tmp1 = _mm256_hadd_pd(vecs2_0, vecs3_0);
+ tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
+ __m256d final_0 = _mm256_blend_pd(tmp0, tmp1, 0xC);
+ tmp0 = _mm256_hadd_pd(vecs0_1, vecs1_1);
+ tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
+ tmp1 = _mm256_hadd_pd(vecs2_1, vecs3_1);
+ tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
+ final_0 = padd(final_0, _mm256_blend_pd(tmp0, tmp1, 0xC));
+ tmp0 = _mm256_hadd_pd(vecs4_0, vecs5_0);
+ tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
+ tmp1 = _mm256_hadd_pd(vecs6_0, vecs7_0);
+ tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
+ __m256d final_1 = _mm256_blend_pd(tmp0, tmp1, 0xC);
+ tmp0 = _mm256_hadd_pd(vecs4_1, vecs5_1);
+ tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
+ tmp1 = _mm256_hadd_pd(vecs6_1, vecs7_1);
+ tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
+ final_1 = padd(final_1, _mm256_blend_pd(tmp0, tmp1, 0xC));
+ __m512d final_output = _mm512_insertf64x4(final_output, final_0, 0);
+ return _mm512_insertf64x4(final_output, final_1, 1);
+}
+template <>
+EIGEN_STRONG_INLINE float predux<Packet16f>(const Packet16f& a) {
+#if 0
+ Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
+ Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
+ Packet8f sum = padd(lane0, lane1);
+ Packet8f tmp0 = _mm256_hadd_ps(sum, _mm256_permute2f128_ps(a, a, 1));
+ tmp0 = _mm256_hadd_ps(tmp0, tmp0);
+ return pfirst(_mm256_hadd_ps(tmp0, tmp0));
+#else
+ Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
+ Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
+ Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
+ Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
+ Packet4f sum = padd(padd(lane0, lane1), padd(lane2, lane3));
+ sum = _mm_hadd_ps(sum, sum);
+ sum = _mm_hadd_ps(sum, _mm_permute_ps(sum, 1));
+ return pfirst(sum);
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE double predux<Packet8d>(const Packet8d& a) {
+ Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
+ Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
+ Packet4d sum = padd(lane0, lane1);
+ Packet4d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1));
+ return pfirst(_mm256_hadd_pd(tmp0, tmp0));
+}
+template <>
+EIGEN_STRONG_INLINE Packet8f predux_downto4<Packet16f>(const Packet16f& a) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
+ Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
+ return padd(lane0, lane1);
+#else
+ Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
+ Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
+ Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
+ Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
+ Packet4f sum0 = padd(lane0, lane2);
+ Packet4f sum1 = padd(lane1, lane3);
+ return _mm256_insertf128_ps(_mm256_castps128_ps256(sum0), sum1, 1);
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet4d predux_downto4<Packet8d>(const Packet8d& a) {
+ Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
+ Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
+ Packet4d res = padd(lane0, lane1);
+ return res;
+}
+template <>
+EIGEN_STRONG_INLINE float predux_mul<Packet16f>(const Packet16f& a) {
+#if 0
+ Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
+ Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
+ Packet8f res = pmul(lane0, lane1);
+ res = pmul(res, _mm256_permute2f128_ps(res, res, 1));
+ res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
+ return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
+#else
+ Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
+ Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
+ Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
+ Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
+ Packet4f res = pmul(pmul(lane0, lane1), pmul(lane2, lane3));
+ res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
+ return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE double predux_mul<Packet8d>(const Packet8d& a) {
+ Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
+ Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
+ Packet4d res = pmul(lane0, lane1);
+ res = pmul(res, _mm256_permute2f128_pd(res, res, 1));
+ return pfirst(pmul(res, _mm256_shuffle_pd(res, res, 1)));
+}
+template <>
+EIGEN_STRONG_INLINE float predux_min<Packet16f>(const Packet16f& a) {
+ Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
+ Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
+ Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
+ Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
+ Packet4f res = _mm_min_ps(_mm_min_ps(lane0, lane1), _mm_min_ps(lane2, lane3));
+ res = _mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
+ return pfirst(_mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
+}
+template <>
+EIGEN_STRONG_INLINE double predux_min<Packet8d>(const Packet8d& a) {
+ Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
+ Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
+ Packet4d res = _mm256_min_pd(lane0, lane1);
+ res = _mm256_min_pd(res, _mm256_permute2f128_pd(res, res, 1));
+ return pfirst(_mm256_min_pd(res, _mm256_shuffle_pd(res, res, 1)));
+}
+template <>
+EIGEN_STRONG_INLINE float predux_max<Packet16f>(const Packet16f& a) {
+ Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
+ Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
+ Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
+ Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
+ Packet4f res = _mm_max_ps(_mm_max_ps(lane0, lane1), _mm_max_ps(lane2, lane3));
+ res = _mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
+ return pfirst(_mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
+}
+template <>
+EIGEN_STRONG_INLINE double predux_max<Packet8d>(const Packet8d& a) {
+ Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
+ Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
+ Packet4d res = _mm256_max_pd(lane0, lane1);
+ res = _mm256_max_pd(res, _mm256_permute2f128_pd(res, res, 1));
+ return pfirst(_mm256_max_pd(res, _mm256_shuffle_pd(res, res, 1)));
+}
+template <int Offset>
+struct palign_impl<Offset, Packet16f> {
+ static EIGEN_STRONG_INLINE void run(Packet16f& first,
+ const Packet16f& second) {
+ if (Offset != 0) {
+ __m512i first_idx = _mm512_set_epi32(
+ Offset + 15, Offset + 14, Offset + 13, Offset + 12, Offset + 11,
+ Offset + 10, Offset + 9, Offset + 8, Offset + 7, Offset + 6,
+ Offset + 5, Offset + 4, Offset + 3, Offset + 2, Offset + 1, Offset);
+ __m512i second_idx =
+ _mm512_set_epi32(Offset - 1, Offset - 2, Offset - 3, Offset - 4,
+ Offset - 5, Offset - 6, Offset - 7, Offset - 8,
+ Offset - 9, Offset - 10, Offset - 11, Offset - 12,
+ Offset - 13, Offset - 14, Offset - 15, Offset - 16);
+ unsigned short mask = 0xFFFF;
+ mask <<= (16 - Offset);
+ first = _mm512_permutexvar_ps(first_idx, first);
+ Packet16f tmp = _mm512_permutexvar_ps(second_idx, second);
+ first = _mm512_mask_blend_ps(mask, first, tmp);
+ }
+ }
+};
+template <int Offset>
+struct palign_impl<Offset, Packet8d> {
+ static EIGEN_STRONG_INLINE void run(Packet8d& first, const Packet8d& second) {
+ if (Offset != 0) {
+ __m512i first_idx = _mm512_set_epi32(
+ 0, Offset + 7, 0, Offset + 6, 0, Offset + 5, 0, Offset + 4, 0,
+ Offset + 3, 0, Offset + 2, 0, Offset + 1, 0, Offset);
+ __m512i second_idx = _mm512_set_epi32(
+ 0, Offset - 1, 0, Offset - 2, 0, Offset - 3, 0, Offset - 4, 0,
+ Offset - 5, 0, Offset - 6, 0, Offset - 7, 0, Offset - 8);
+ unsigned char mask = 0xFF;
+ mask <<= (8 - Offset);
+ first = _mm512_permutexvar_pd(first_idx, first);
+ Packet8d tmp = _mm512_permutexvar_pd(second_idx, second);
+ first = _mm512_mask_blend_pd(mask, first, tmp);
+ }
+ }
+};
+#define PACK_OUTPUT(OUTPUT, INPUT, INDEX, STRIDE) \
+ EIGEN_INSERT_8f_INTO_16f(OUTPUT[INDEX], INPUT[INDEX], INPUT[INDEX + STRIDE]);
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet16f, 16>& kernel) {
+ __m512 T0 = _mm512_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
+ __m512 T1 = _mm512_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
+ __m512 T2 = _mm512_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
+ __m512 T3 = _mm512_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
+ __m512 T4 = _mm512_unpacklo_ps(kernel.packet[4], kernel.packet[5]);
+ __m512 T5 = _mm512_unpackhi_ps(kernel.packet[4], kernel.packet[5]);
+ __m512 T6 = _mm512_unpacklo_ps(kernel.packet[6], kernel.packet[7]);
+ __m512 T7 = _mm512_unpackhi_ps(kernel.packet[6], kernel.packet[7]);
+ __m512 T8 = _mm512_unpacklo_ps(kernel.packet[8], kernel.packet[9]);
+ __m512 T9 = _mm512_unpackhi_ps(kernel.packet[8], kernel.packet[9]);
+ __m512 T10 = _mm512_unpacklo_ps(kernel.packet[10], kernel.packet[11]);
+ __m512 T11 = _mm512_unpackhi_ps(kernel.packet[10], kernel.packet[11]);
+ __m512 T12 = _mm512_unpacklo_ps(kernel.packet[12], kernel.packet[13]);
+ __m512 T13 = _mm512_unpackhi_ps(kernel.packet[12], kernel.packet[13]);
+ __m512 T14 = _mm512_unpacklo_ps(kernel.packet[14], kernel.packet[15]);
+ __m512 T15 = _mm512_unpackhi_ps(kernel.packet[14], kernel.packet[15]);
+ __m512 S0 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S1 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(3, 2, 3, 2));
+ __m512 S2 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S3 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(3, 2, 3, 2));
+ __m512 S4 = _mm512_shuffle_ps(T4, T6, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S5 = _mm512_shuffle_ps(T4, T6, _MM_SHUFFLE(3, 2, 3, 2));
+ __m512 S6 = _mm512_shuffle_ps(T5, T7, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S7 = _mm512_shuffle_ps(T5, T7, _MM_SHUFFLE(3, 2, 3, 2));
+ __m512 S8 = _mm512_shuffle_ps(T8, T10, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S9 = _mm512_shuffle_ps(T8, T10, _MM_SHUFFLE(3, 2, 3, 2));
+ __m512 S10 = _mm512_shuffle_ps(T9, T11, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S11 = _mm512_shuffle_ps(T9, T11, _MM_SHUFFLE(3, 2, 3, 2));
+ __m512 S12 = _mm512_shuffle_ps(T12, T14, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S13 = _mm512_shuffle_ps(T12, T14, _MM_SHUFFLE(3, 2, 3, 2));
+ __m512 S14 = _mm512_shuffle_ps(T13, T15, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S15 = _mm512_shuffle_ps(T13, T15, _MM_SHUFFLE(3, 2, 3, 2));
+ EIGEN_EXTRACT_8f_FROM_16f(S0, S0);
+ EIGEN_EXTRACT_8f_FROM_16f(S1, S1);
+ EIGEN_EXTRACT_8f_FROM_16f(S2, S2);
+ EIGEN_EXTRACT_8f_FROM_16f(S3, S3);
+ EIGEN_EXTRACT_8f_FROM_16f(S4, S4);
+ EIGEN_EXTRACT_8f_FROM_16f(S5, S5);
+ EIGEN_EXTRACT_8f_FROM_16f(S6, S6);
+ EIGEN_EXTRACT_8f_FROM_16f(S7, S7);
+ EIGEN_EXTRACT_8f_FROM_16f(S8, S8);
+ EIGEN_EXTRACT_8f_FROM_16f(S9, S9);
+ EIGEN_EXTRACT_8f_FROM_16f(S10, S10);
+ EIGEN_EXTRACT_8f_FROM_16f(S11, S11);
+ EIGEN_EXTRACT_8f_FROM_16f(S12, S12);
+ EIGEN_EXTRACT_8f_FROM_16f(S13, S13);
+ EIGEN_EXTRACT_8f_FROM_16f(S14, S14);
+ EIGEN_EXTRACT_8f_FROM_16f(S15, S15);
+ PacketBlock<Packet8f, 32> tmp;
+ tmp.packet[0] = _mm256_permute2f128_ps(S0_0, S4_0, 0x20);
+ tmp.packet[1] = _mm256_permute2f128_ps(S1_0, S5_0, 0x20);
+ tmp.packet[2] = _mm256_permute2f128_ps(S2_0, S6_0, 0x20);
+ tmp.packet[3] = _mm256_permute2f128_ps(S3_0, S7_0, 0x20);
+ tmp.packet[4] = _mm256_permute2f128_ps(S0_0, S4_0, 0x31);
+ tmp.packet[5] = _mm256_permute2f128_ps(S1_0, S5_0, 0x31);
+ tmp.packet[6] = _mm256_permute2f128_ps(S2_0, S6_0, 0x31);
+ tmp.packet[7] = _mm256_permute2f128_ps(S3_0, S7_0, 0x31);
+ tmp.packet[8] = _mm256_permute2f128_ps(S0_1, S4_1, 0x20);
+ tmp.packet[9] = _mm256_permute2f128_ps(S1_1, S5_1, 0x20);
+ tmp.packet[10] = _mm256_permute2f128_ps(S2_1, S6_1, 0x20);
+ tmp.packet[11] = _mm256_permute2f128_ps(S3_1, S7_1, 0x20);
+ tmp.packet[12] = _mm256_permute2f128_ps(S0_1, S4_1, 0x31);
+ tmp.packet[13] = _mm256_permute2f128_ps(S1_1, S5_1, 0x31);
+ tmp.packet[14] = _mm256_permute2f128_ps(S2_1, S6_1, 0x31);
+ tmp.packet[15] = _mm256_permute2f128_ps(S3_1, S7_1, 0x31);
+ tmp.packet[16] = _mm256_permute2f128_ps(S8_0, S12_0, 0x20);
+ tmp.packet[17] = _mm256_permute2f128_ps(S9_0, S13_0, 0x20);
+ tmp.packet[18] = _mm256_permute2f128_ps(S10_0, S14_0, 0x20);
+ tmp.packet[19] = _mm256_permute2f128_ps(S11_0, S15_0, 0x20);
+ tmp.packet[20] = _mm256_permute2f128_ps(S8_0, S12_0, 0x31);
+ tmp.packet[21] = _mm256_permute2f128_ps(S9_0, S13_0, 0x31);
+ tmp.packet[22] = _mm256_permute2f128_ps(S10_0, S14_0, 0x31);
+ tmp.packet[23] = _mm256_permute2f128_ps(S11_0, S15_0, 0x31);
+ tmp.packet[24] = _mm256_permute2f128_ps(S8_1, S12_1, 0x20);
+ tmp.packet[25] = _mm256_permute2f128_ps(S9_1, S13_1, 0x20);
+ tmp.packet[26] = _mm256_permute2f128_ps(S10_1, S14_1, 0x20);
+ tmp.packet[27] = _mm256_permute2f128_ps(S11_1, S15_1, 0x20);
+ tmp.packet[28] = _mm256_permute2f128_ps(S8_1, S12_1, 0x31);
+ tmp.packet[29] = _mm256_permute2f128_ps(S9_1, S13_1, 0x31);
+ tmp.packet[30] = _mm256_permute2f128_ps(S10_1, S14_1, 0x31);
+ tmp.packet[31] = _mm256_permute2f128_ps(S11_1, S15_1, 0x31);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 0, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 1, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 2, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 3, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 4, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 5, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 6, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 7, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 8, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 9, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 10, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 11, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 12, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 13, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 14, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 15, 16);
+}
+#define PACK_OUTPUT_2(OUTPUT, INPUT, INDEX, STRIDE) \
+ EIGEN_INSERT_8f_INTO_16f(OUTPUT[INDEX], INPUT[2 * INDEX], \
+ INPUT[2 * INDEX + STRIDE]);
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet16f, 4>& kernel) {
+ __m512 T0 = _mm512_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
+ __m512 T1 = _mm512_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
+ __m512 T2 = _mm512_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
+ __m512 T3 = _mm512_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
+ __m512 S0 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S1 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(3, 2, 3, 2));
+ __m512 S2 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S3 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(3, 2, 3, 2));
+ EIGEN_EXTRACT_8f_FROM_16f(S0, S0);
+ EIGEN_EXTRACT_8f_FROM_16f(S1, S1);
+ EIGEN_EXTRACT_8f_FROM_16f(S2, S2);
+ EIGEN_EXTRACT_8f_FROM_16f(S3, S3);
+ PacketBlock<Packet8f, 8> tmp;
+ tmp.packet[0] = _mm256_permute2f128_ps(S0_0, S1_0, 0x20);
+ tmp.packet[1] = _mm256_permute2f128_ps(S2_0, S3_0, 0x20);
+ tmp.packet[2] = _mm256_permute2f128_ps(S0_0, S1_0, 0x31);
+ tmp.packet[3] = _mm256_permute2f128_ps(S2_0, S3_0, 0x31);
+ tmp.packet[4] = _mm256_permute2f128_ps(S0_1, S1_1, 0x20);
+ tmp.packet[5] = _mm256_permute2f128_ps(S2_1, S3_1, 0x20);
+ tmp.packet[6] = _mm256_permute2f128_ps(S0_1, S1_1, 0x31);
+ tmp.packet[7] = _mm256_permute2f128_ps(S2_1, S3_1, 0x31);
+ PACK_OUTPUT_2(kernel.packet, tmp.packet, 0, 1);
+ PACK_OUTPUT_2(kernel.packet, tmp.packet, 1, 1);
+ PACK_OUTPUT_2(kernel.packet, tmp.packet, 2, 1);
+ PACK_OUTPUT_2(kernel.packet, tmp.packet, 3, 1);
+}
+#define PACK_OUTPUT_SQ_D(OUTPUT, INPUT, INDEX, STRIDE) \
+ OUTPUT[INDEX] = _mm512_insertf64x4(OUTPUT[INDEX], INPUT[INDEX], 0); \
+ OUTPUT[INDEX] = _mm512_insertf64x4(OUTPUT[INDEX], INPUT[INDEX + STRIDE], 1);
+#define PACK_OUTPUT_D(OUTPUT, INPUT, INDEX, STRIDE) \
+ OUTPUT[INDEX] = _mm512_insertf64x4(OUTPUT[INDEX], INPUT[(2 * INDEX)], 0); \
+ OUTPUT[INDEX] = \
+ _mm512_insertf64x4(OUTPUT[INDEX], INPUT[(2 * INDEX) + STRIDE], 1);
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8d, 4>& kernel) {
+ __m512d T0 = _mm512_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);
+ __m512d T1 = _mm512_shuffle_pd(kernel.packet[0], kernel.packet[1], 0xff);
+ __m512d T2 = _mm512_shuffle_pd(kernel.packet[2], kernel.packet[3], 0);
+ __m512d T3 = _mm512_shuffle_pd(kernel.packet[2], kernel.packet[3], 0xff);
+ PacketBlock<Packet4d, 8> tmp;
+ tmp.packet[0] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),
+ _mm512_extractf64x4_pd(T2, 0), 0x20);
+ tmp.packet[1] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),
+ _mm512_extractf64x4_pd(T3, 0), 0x20);
+ tmp.packet[2] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),
+ _mm512_extractf64x4_pd(T2, 0), 0x31);
+ tmp.packet[3] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),
+ _mm512_extractf64x4_pd(T3, 0), 0x31);
+ tmp.packet[4] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),
+ _mm512_extractf64x4_pd(T2, 1), 0x20);
+ tmp.packet[5] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),
+ _mm512_extractf64x4_pd(T3, 1), 0x20);
+ tmp.packet[6] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),
+ _mm512_extractf64x4_pd(T2, 1), 0x31);
+ tmp.packet[7] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),
+ _mm512_extractf64x4_pd(T3, 1), 0x31);
+ PACK_OUTPUT_D(kernel.packet, tmp.packet, 0, 1);
+ PACK_OUTPUT_D(kernel.packet, tmp.packet, 1, 1);
+ PACK_OUTPUT_D(kernel.packet, tmp.packet, 2, 1);
+ PACK_OUTPUT_D(kernel.packet, tmp.packet, 3, 1);
+}
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8d, 8>& kernel) {
+ __m512d T0 = _mm512_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
+ __m512d T1 = _mm512_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
+ __m512d T2 = _mm512_unpacklo_pd(kernel.packet[2], kernel.packet[3]);
+ __m512d T3 = _mm512_unpackhi_pd(kernel.packet[2], kernel.packet[3]);
+ __m512d T4 = _mm512_unpacklo_pd(kernel.packet[4], kernel.packet[5]);
+ __m512d T5 = _mm512_unpackhi_pd(kernel.packet[4], kernel.packet[5]);
+ __m512d T6 = _mm512_unpacklo_pd(kernel.packet[6], kernel.packet[7]);
+ __m512d T7 = _mm512_unpackhi_pd(kernel.packet[6], kernel.packet[7]);
+ PacketBlock<Packet4d, 16> tmp;
+ tmp.packet[0] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),
+ _mm512_extractf64x4_pd(T2, 0), 0x20);
+ tmp.packet[1] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),
+ _mm512_extractf64x4_pd(T3, 0), 0x20);
+ tmp.packet[2] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),
+ _mm512_extractf64x4_pd(T2, 0), 0x31);
+ tmp.packet[3] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),
+ _mm512_extractf64x4_pd(T3, 0), 0x31);
+ tmp.packet[4] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),
+ _mm512_extractf64x4_pd(T2, 1), 0x20);
+ tmp.packet[5] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),
+ _mm512_extractf64x4_pd(T3, 1), 0x20);
+ tmp.packet[6] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),
+ _mm512_extractf64x4_pd(T2, 1), 0x31);
+ tmp.packet[7] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),
+ _mm512_extractf64x4_pd(T3, 1), 0x31);
+ tmp.packet[8] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 0),
+ _mm512_extractf64x4_pd(T6, 0), 0x20);
+ tmp.packet[9] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 0),
+ _mm512_extractf64x4_pd(T7, 0), 0x20);
+ tmp.packet[10] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 0),
+ _mm512_extractf64x4_pd(T6, 0), 0x31);
+ tmp.packet[11] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 0),
+ _mm512_extractf64x4_pd(T7, 0), 0x31);
+ tmp.packet[12] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 1),
+ _mm512_extractf64x4_pd(T6, 1), 0x20);
+ tmp.packet[13] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 1),
+ _mm512_extractf64x4_pd(T7, 1), 0x20);
+ tmp.packet[14] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 1),
+ _mm512_extractf64x4_pd(T6, 1), 0x31);
+ tmp.packet[15] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 1),
+ _mm512_extractf64x4_pd(T7, 1), 0x31);
+ PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 0, 8);
+ PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 1, 8);
+ PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 2, 8);
+ PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 3, 8);
+ PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 4, 8);
+ PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 5, 8);
+ PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 6, 8);
+ PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 7, 8);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pblend(const Selector<16>& ,
+ const Packet16f& ,
+ const Packet16f& ) {
+ assert(false && "To be implemented");
+ return Packet16f();
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pblend(const Selector<8>& ,
+ const Packet8d& ,
+ const Packet8d& ) {
+ assert(false && "To be implemented");
+ return Packet8d();
+}
+}
+}
+#endif
+// end #include "src/Core/arch/AVX512/PacketMath.h"
+// #include "src/Core/arch/AVX512/MathFunctions.h"
+#ifndef THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
+#define THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
+namespace Eigen {
+namespace internal {
+#if EIGEN_GNUC_AT_LEAST(5, 3)
+#define _EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
+ const Packet16f p16f_##NAME = pset1<Packet16f>(X)
+#define _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
+ const Packet16f p16f_##NAME = (__m512)pset1<Packet16i>(X)
+#define _EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
+ const Packet8d p8d_##NAME = pset1<Packet8d>(X)
+#define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
+ const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
+#if defined(EIGEN_VECTORIZE_AVX512DQ)
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+plog<Packet16f>(const Packet16f& _x) {
+ Packet16f x = _x;
+ _EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
+ _EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet16f(126f, 126.0f);
+ _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inv_mant_mask, ~0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(min_norm_pos, 0x00800000);
+ _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(minus_inf, 0xff800000);
+ _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_SQRTHF, 0.707106781186547524f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p0, 7.0376836292E-2f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p1, -1.1514610310E-1f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p2, 1.1676998740E-1f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p3, -1.2420140846E-1f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p4, +1.4249322787E-1f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p5, -1.6668057665E-1f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p6, +2.0000714765E-1f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p7, -2.4999993993E-1f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p8, +3.3333331174E-1f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_q1, -2.12194440e-4f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_q2, 0.693359375f);
+ __mmask16 invalid_mask =
+ _mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_NGE_UQ);
+ __mmask16 iszero_mask =
+ _mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_EQ_UQ);
+ x = pmax(x, p16f_min_norm_pos);
+ Packet16f emm0 = _mm512_cvtepi32_ps(_mm512_srli_epi32((__m512i)x, 23));
+ Packet16f e = _mm512_sub_ps(emm0, p16f_126f);
+ x = _mm512_and_ps(x, p16f_inv_mant_mask);
+ x = _mm512_or_ps(x, p16f_half);
+ __mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ);
+ Packet16f tmp = _mm512_mask_blend_ps(mask, x, _mm512_setzero_ps());
+ x = psub(x, p16f_1);
+ e = psub(e, _mm512_mask_blend_ps(mask, p16f_1, _mm512_setzero_ps()));
+ x = padd(x, tmp);
+ Packet16f x2 = pmul(x, x);
+ Packet16f x3 = pmul(x2, x);
+ Packet16f y, y1, y2;
+ y = pmadd(p16f_cephes_log_p0, x, p16f_cephes_log_p1);
+ y1 = pmadd(p16f_cephes_log_p3, x, p16f_cephes_log_p4);
+ y2 = pmadd(p16f_cephes_log_p6, x, p16f_cephes_log_p7);
+ y = pmadd(y, x, p16f_cephes_log_p2);
+ y1 = pmadd(y1, x, p16f_cephes_log_p5);
+ y2 = pmadd(y2, x, p16f_cephes_log_p8);
+ y = pmadd(y, x3, y1);
+ y = pmadd(y, x3, y2);
+ y = pmul(y, x3);
+ y1 = pmul(e, p16f_cephes_log_q1);
+ tmp = pmul(x2, p16f_half);
+ y = padd(y, y1);
+ x = psub(x, tmp);
+ y2 = pmul(e, p16f_cephes_log_q2);
+ x = padd(x, y);
+ x = padd(x, y2);
+ return _mm512_mask_blend_ps(iszero_mask, p16f_minus_inf,
+ _mm512_mask_blend_ps(invalid_mask, p16f_nan, x));
+}
+#endif
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+pexp<Packet16f>(const Packet16f& _x) {
+ _EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
+ _EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
+ _EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
+ _EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
+ Packet16f x = pmax(pmin(_x, p16f_exp_hi), p16f_exp_lo);
+ Packet16f m = _mm512_floor_ps(pmadd(x, p16f_cephes_LOG2EF, p16f_half));
+ _EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
+ Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
+ Packet16f r2 = pmul(r, r);
+ Packet16f y = p16f_cephes_exp_p0;
+ y = pmadd(y, r, p16f_cephes_exp_p1);
+ y = pmadd(y, r, p16f_cephes_exp_p2);
+ y = pmadd(y, r, p16f_cephes_exp_p3);
+ y = pmadd(y, r, p16f_cephes_exp_p4);
+ y = pmadd(y, r, p16f_cephes_exp_p5);
+ y = pmadd(y, r2, r);
+ y = padd(y, p16f_1);
+ Packet16i emm0 = _mm512_cvttps_epi32(padd(m, p16f_127));
+ emm0 = _mm512_slli_epi32(emm0, 23);
+ return pmax(pmul(y, _mm512_castsi512_ps(emm0)), _x);
+}
+#if EIGEN_FAST_MATH
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+psqrt<Packet16f>(const Packet16f& _x) {
+ _EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
+ _EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
+ _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(flt_min, 0x00800000);
+ Packet16f neg_half = pmul(_x, p16f_minus_half);
+ __mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ);
+ Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_rsqrt14_ps(_x),
+ _mm512_setzero_ps());
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
+ return pmul(_x, x);
+}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
+psqrt<Packet8d>(const Packet8d& _x) {
+ _EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
+ _EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
+ _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(dbl_min, 0x0010000000000000LL);
+ Packet8d neg_half = pmul(_x, p8d_minus_half);
+ __mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ);
+ Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_rsqrt14_pd(_x),
+ _mm512_setzero_pd());
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
+ return pmul(_x, x);
+}
+#else
+template <>
+EIGEN_STRONG_INLINE Packet16f psqrt<Packet16f>(const Packet16f& x) {
+ return _mm512_sqrt_ps(x);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d psqrt<Packet8d>(const Packet8d& x) {
+ return _mm512_sqrt_pd(x);
+}
+#endif
+#ifdef EIGEN_FAST_MATH
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+prsqrt<Packet16f>(const Packet16f& _x) {
+ _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inf, 0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
+ _EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
+ _EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
+ _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(flt_min, 0x00800000);
+ Packet16f neg_half = pmul(_x, p16f_minus_half);
+ __mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ);
+ Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(),
+ _mm512_rsqrt14_ps(_x));
+ __mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ);
+ Packet16f infs_and_nans = _mm512_mask_blend_ps(
+ neg_mask, p16f_nan,
+ _mm512_mask_blend_ps(le_zero_mask, p16f_inf, _mm512_setzero_ps()));
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
+ return _mm512_mask_blend_ps(le_zero_mask, infs_and_nans, x);
+}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
+prsqrt<Packet8d>(const Packet8d& _x) {
+ _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(inf, 0x7ff0000000000000LL);
+ _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(nan, 0x7ff1000000000000LL);
+ _EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
+ _EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
+ _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(dbl_min, 0x0010000000000000LL);
+ Packet8d neg_half = pmul(_x, p8d_minus_half);
+ __mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ);
+ Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(),
+ _mm512_rsqrt14_pd(_x));
+ __mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ);
+ Packet8d infs_and_nans = _mm512_mask_blend_pd(
+ neg_mask, p8d_nan,
+ _mm512_mask_blend_pd(le_zero_mask, p8d_inf, _mm512_setzero_pd()));
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
+ return _mm512_mask_blend_pd(le_zero_mask, infs_and_nans, x);
+}
+#else
+template <>
+EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
+ return _mm512_rsqrt28_ps(x);
+}
+#endif
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/AVX512/MathFunctions.h"
+#elif defined EIGEN_VECTORIZE_AVX
+// #include "src/Core/arch/SSE/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_SSE_H
+#define EIGEN_PACKET_MATH_SSE_H
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
+#endif
+#ifdef __FMA__
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
+#endif
+#endif
+#if (defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)
+template<typename T>
+struct eigen_packet_wrapper
+{
+ EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
+ EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
+ m_val = v;
+ return *this;
+ }
+ T m_val;
+};
+typedef eigen_packet_wrapper<__m128> Packet4f;
+typedef eigen_packet_wrapper<__m128i> Packet4i;
+typedef eigen_packet_wrapper<__m128d> Packet2d;
+#else
+typedef __m128 Packet4f;
+typedef __m128i Packet4i;
+typedef __m128d Packet2d;
+#endif
+template<> struct is_arithmetic<__m128> { enum { value = true }; };
+template<> struct is_arithmetic<__m128i> { enum { value = true }; };
+template<> struct is_arithmetic<__m128d> { enum { value = true }; };
+#define vec4f_swizzle1(v,p,q,r,s) \
+ (_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), ((s)<<6|(r)<<4|(q)<<2|(p)))))
+#define vec4i_swizzle1(v,p,q,r,s) \
+ (_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p))))
+#define vec2d_swizzle1(v,p,q) \
+ (_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2)))))
+#define vec4f_swizzle2(a,b,p,q,r,s) \
+ (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
+#define vec4i_swizzle2(a,b,p,q,r,s) \
+ (_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p))))))
+#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+ const Packet4f p4f_##NAME = pset1<Packet4f>(X)
+#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+ const Packet2d p2d_##NAME = pset1<Packet2d>(X)
+#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+ const Packet4f p4f_##NAME = _mm_castsi128_ps(pset1<Packet4i>(X))
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+ const Packet4i p4i_##NAME = pset1<Packet4i>(X)
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet4f type;
+ typedef Packet4f half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasHalfPacket = 0,
+ HasDiv = 1,
+ HasSin = EIGEN_FAST_MATH,
+ HasCos = EIGEN_FAST_MATH,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasTanh = EIGEN_FAST_MATH,
+ HasBlend = 1
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ ,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+#endif
+ };
+};
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet2d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=2,
+ HasHalfPacket = 0,
+ HasDiv = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasBlend = 1
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ ,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+#endif
+ };
+};
+#endif
+template<> struct packet_traits<int> : default_packet_traits
+{
+ typedef Packet4i type;
+ typedef Packet4i half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasBlend = 1
+ };
+};
+template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct scalar_div_cost<float,true> { enum { value = 7 }; };
+template<> struct scalar_div_cost<double,true> { enum { value = 8 }; };
+#endif
+#if EIGEN_COMP_MSVC==1500
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps(from,from,from,from); }
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
+#else
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
+#endif
+#if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__)
+template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {
+ return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0);
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
+template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
+template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
+template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
+{
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
+ return _mm_xor_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
+{
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));
+ return _mm_xor_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
+{
+ return psub(Packet4i(_mm_setr_epi32(0,0,0,0)), a);
+}
+template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_mullo_epi32(a,b);
+#else
+ return vec4i_swizzle1(
+ vec4i_swizzle2(
+ _mm_mul_epu32(a,b),
+ _mm_mul_epu32(vec4i_swizzle1(a,1,0,3,2),
+ vec4i_swizzle1(b,1,0,3,2)),
+ 0,2,0,2),
+ 0,2,1,3);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_min_epi32(a,b);
+#else
+ Packet4i mask = _mm_cmplt_epi32(a,b);
+ return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_max_epi32(a,b);
+#else
+ Packet4i mask = _mm_cmpgt_epi32(a,b);
+ return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
+}
+#ifdef EIGEN_VECTORIZE_SSE4_1
+template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return _mm_round_ps(a, 0); }
+template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return _mm_round_pd(a, 0); }
+template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return _mm_ceil_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return _mm_ceil_pd(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return _mm_floor_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return _mm_floor_pd(a); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
+#if EIGEN_COMP_MSVC
+ template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ #if (EIGEN_COMP_MSVC==1600)
+ __m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from));
+ res = _mm_loadh_pi(res, (const __m64*)(from+2));
+ return res;
+ #else
+ return _mm_loadu_ps(from);
+ #endif
+ }
+#else
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return _mm_loadu_ps(from);
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return _mm_loadu_pd(from);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
+}
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
+{
+ return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))), 0, 0, 1, 1);
+}
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
+{ return pset1<Packet2d>(from[0]); }
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
+{
+ Packet4i tmp;
+ tmp = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(from));
+ return vec4i_swizzle1(tmp, 0, 0, 1, 1);
+}
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }
+template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
+{
+ return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
+{
+ return _mm_set_pd(from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
+{
+ return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+ }
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
+{
+ to[stride*0] = _mm_cvtss_f32(from);
+ to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
+ to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
+ to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
+{
+ to[stride*0] = _mm_cvtsd_f64(from);
+ to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
+{
+ to[stride*0] = _mm_cvtsi128_si32(from);
+ to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
+ to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
+ to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
+{
+ Packet4f pa = _mm_set_ss(a);
+ pstore(to, Packet4f(vec4f_swizzle1(pa,0,0,0,0)));
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& a)
+{
+ Packet2d pa = _mm_set_sd(a);
+ pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
+}
+#ifndef EIGEN_VECTORIZE_AVX
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+#endif
+#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
+#elif EIGEN_COMP_MSVC_STRICT
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
+#else
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
+{ return _mm_shuffle_ps(a,a,0x1B); }
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
+{ return _mm_shuffle_pd(a,a,0x1); }
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
+{ return _mm_shuffle_epi32(a,0x1B); }
+template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
+{
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
+ return _mm_and_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a)
+{
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
+ return _mm_and_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
+{
+ #ifdef EIGEN_VECTORIZE_SSSE3
+ return _mm_abs_epi32(a);
+ #else
+ Packet4i aux = _mm_srai_epi32(a,31);
+ return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);
+ #endif
+}
+#ifndef __AVX__
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4f>(const float *a,
+ Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+{
+ a3 = pload<Packet4f>(a);
+ a0 = vec4f_swizzle1(a3, 0,0,0,0);
+ a1 = vec4f_swizzle1(a3, 1,1,1,1);
+ a2 = vec4f_swizzle1(a3, 2,2,2,2);
+ a3 = vec4f_swizzle1(a3, 3,3,3,3);
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet2d>(const double *a,
+ Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
+{
+#ifdef EIGEN_VECTORIZE_SSE3
+ a0 = _mm_loaddup_pd(a+0);
+ a1 = _mm_loaddup_pd(a+1);
+ a2 = _mm_loaddup_pd(a+2);
+ a3 = _mm_loaddup_pd(a+3);
+#else
+ a1 = pload<Packet2d>(a);
+ a0 = vec2d_swizzle1(a1, 0,0);
+ a1 = vec2d_swizzle1(a1, 1,1);
+ a3 = pload<Packet2d>(a+2);
+ a2 = vec2d_swizzle1(a3, 0,0);
+ a3 = vec2d_swizzle1(a3, 1,1);
+#endif
+}
+#endif
+EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
+{
+ vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
+ vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA));
+ vecs[3] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xFF));
+ vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
+}
+#ifdef EIGEN_VECTORIZE_SSE3
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
+}
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ return _mm_hadd_pd(vecs[0], vecs[1]);
+}
+#else
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ Packet4f tmp0, tmp1, tmp2;
+ tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]);
+ tmp1 = _mm_unpackhi_ps(vecs[0], vecs[1]);
+ tmp2 = _mm_unpackhi_ps(vecs[2], vecs[3]);
+ tmp0 = _mm_add_ps(tmp0, tmp1);
+ tmp1 = _mm_unpacklo_ps(vecs[2], vecs[3]);
+ tmp1 = _mm_add_ps(tmp1, tmp2);
+ tmp2 = _mm_movehl_ps(tmp1, tmp0);
+ tmp0 = _mm_movelh_ps(tmp0, tmp1);
+ return _mm_add_ps(tmp0, tmp2);
+}
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
+}
+#endif
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
+}
+#ifdef EIGEN_VECTORIZE_SSSE3
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
+}
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i tmp0 = _mm_hadd_epi32(a,a);
+ return pfirst<Packet4i>(_mm_hadd_epi32(tmp0,tmp0));
+}
+#else
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
+ return pfirst(tmp) + pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1));
+}
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ Packet4i tmp0, tmp1, tmp2;
+ tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
+ tmp1 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
+ tmp2 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
+ tmp0 = _mm_add_epi32(tmp0, tmp1);
+ tmp1 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
+ tmp1 = _mm_add_epi32(tmp1, tmp2);
+ tmp2 = _mm_unpacklo_epi64(tmp0, tmp1);
+ tmp0 = _mm_unpackhi_epi64(tmp0, tmp1);
+ return _mm_add_epi32(tmp0, tmp2);
+}
+#endif
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
+{
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
+}
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+ return pfirst<Packet4i>(_mm_min_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
+ int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
+ return aux0<aux2 ? aux0 : aux2;
+#endif
+}
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+ return pfirst<Packet4i>(_mm_max_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
+ int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
+ return aux0>aux2 ? aux0 : aux2;
+#endif
+}
+#if EIGEN_COMP_GNUC
+#endif
+#ifdef EIGEN_VECTORIZE_SSSE3
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+ {
+ if (Offset!=0)
+ first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+ if (Offset!=0)
+ first = _mm_alignr_epi8(second,first, Offset*4);
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset==1)
+ first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
+ }
+};
+#else
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_move_ss(first,second);
+ first = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(first),0x39));
+ }
+ else if (Offset==2)
+ {
+ first = _mm_movehl_ps(first,first);
+ first = _mm_movelh_ps(first,second);
+ }
+ else if (Offset==3)
+ {
+ first = _mm_move_ss(first,second);
+ first = _mm_shuffle_ps(first,second,0x93);
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ first = _mm_shuffle_epi32(first,0x39);
+ }
+ else if (Offset==2)
+ {
+ first = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(first)));
+ first = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ }
+ else if (Offset==3)
+ {
+ first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ first = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second),0x93));
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(first),_mm_castpd_ps(first)));
+ first = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(first),_mm_castpd_ps(second)));
+ }
+ }
+};
+#endif
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+ _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+ __m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
+ kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
+ kernel.packet[1] = tmp;
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+ __m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
+ __m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
+ __m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
+ __m128i T3 = _mm_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
+ kernel.packet[0] = _mm_unpacklo_epi64(T0, T1);
+ kernel.packet[1] = _mm_unpackhi_epi64(T0, T1);
+ kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
+ kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m128i false_mask = _mm_cmpeq_epi32(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
+ const __m128 zero = _mm_setzero_ps();
+ const __m128 select = _mm_set_ps(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m128 false_mask = _mm_cmpeq_ps(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_ps(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
+ const __m128d zero = _mm_setzero_pd();
+ const __m128d select = _mm_set_pd(ifPacket.select[1], ifPacket.select[0]);
+ __m128d false_mask = _mm_cmpeq_pd(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_pd(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_ps(a,pset1<Packet4f>(b),1);
+#else
+ return _mm_move_ss(a, _mm_load_ss(&b));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_pd(a,pset1<Packet2d>(b),1);
+#else
+ return _mm_move_sd(a, _mm_load_sd(&b));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_ps(a,pset1<Packet4f>(b),(1<<3));
+#else
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF));
+ return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, pset1<Packet4f>(b)));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_pd(a,pset1<Packet2d>(b),(1<<1));
+#else
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF));
+ return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet2d>(b)));
+#endif
+}
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
+ return ::fmaf(a,b,c);
+}
+template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, const double& c) {
+ return ::fma(a,b,c);
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/SSE/PacketMath.h"
+// #include "src/Core/arch/SSE/Complex.h"
+#ifndef EIGEN_COMPLEX_SSE_H
+#define EIGEN_COMPLEX_SSE_H
+namespace Eigen {
+namespace internal {
+struct Packet2cf
+{
+ EIGEN_STRONG_INLINE Packet2cf() {}
+ EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
+ __m128 v;
+};
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<std::complex<float> > : default_packet_traits
+{
+ typedef Packet2cf type;
+ typedef Packet2cf half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 2,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0,
+ HasBlend = 1
+ };
+};
+#endif
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
+template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a)
+{
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
+ return Packet2cf(_mm_xor_ps(a.v,mask));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
+{
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_xor_ps(a.v,mask));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
+ _mm_mul_ps(_mm_movehdup_ps(a.v),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
+ return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
+ _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
+ #endif
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&numext::real_ref(*from))); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&numext::real_ref(*from))); }
+template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
+{
+ Packet2cf res;
+#if EIGEN_GNUC_AT_MOST(4,2)
+ res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
+#elif EIGEN_GNUC_AT_LEAST(4,6)
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wuninitialized"
+ res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
+ #pragma GCC diagnostic pop
+#else
+ res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
+#endif
+ return Packet2cf(_mm_movelh_ps(res.v,res.v));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v)); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); }
+template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
+{
+ return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]),
+ std::imag(from[0*stride]), std::real(from[0*stride])));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
+{
+ to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
+ _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));
+ to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
+ _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
+}
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
+{
+ #if EIGEN_GNUC_AT_MOST(4,3)
+ EIGEN_ALIGN16 std::complex<float> res[2];
+ _mm_store_ps((float*)res, a.v);
+ return res[0];
+ #else
+ std::complex<float> res;
+ _mm_storel_pi((__m64*)&res, a.v);
+ return res;
+ #endif
+}
+template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(Packet2d(_mm_castps_pd(a.v))))); }
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
+{
+ return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
+{
+ return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v)));
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
+{
+ return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
+}
+template<int Offset>
+struct palign_impl<Offset,Packet2cf>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
+ {
+ if (Offset==1)
+ {
+ first.v = _mm_movehl_ps(first.v, first.v);
+ first.v = _mm_movelh_ps(first.v, second.v);
+ }
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(a, pconj(b));
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
+ _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(pconj(a), b);
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
+ _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return pconj(internal::pmul(a, b));
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
+ _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet4f, Packet2cf, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
+ { return Packet2cf(Eigen::internal::pmul<Packet4f>(x, y.v)); }
+};
+template<> struct conj_helper<Packet2cf, Packet4f, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
+ { return Packet2cf(Eigen::internal::pmul<Packet4f>(x.v, y)); }
+};
+template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
+ __m128 s = _mm_mul_ps(b.v,b.v);
+ return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
+}
+EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& x)
+{
+ return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
+}
+struct Packet1cd
+{
+ EIGEN_STRONG_INLINE Packet1cd() {}
+ EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}
+ __m128d v;
+};
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<std::complex<double> > : default_packet_traits
+{
+ typedef Packet1cd type;
+ typedef Packet1cd half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 0,
+ size = 1,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+#endif
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
+{
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_xor_pd(a.v,mask));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return Packet1cd(_mm_addsub_pd(_mm_mul_pd(_mm_movedup_pd(a.v), b.v),
+ _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0))));
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
+ return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
+ _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0)), mask)));
+ #endif
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
+{ return ploadu<Packet1cd>(&from); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
+{
+ EIGEN_ALIGN16 double res[2];
+ _mm_store_pd(res, a.v);
+ return std::complex<double>(res[0],res[1]);
+}
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
+{
+ return vecs[0];
+}
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+ static EIGEN_STRONG_INLINE void run(Packet1cd& , const Packet1cd& )
+ {
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(a, pconj(b));
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
+ _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0))));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(pconj(a), b);
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
+ _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0)), mask)));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return pconj(internal::pmul(a, b));
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
+ _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0))));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet2d, Packet1cd, false,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
+ { return Packet1cd(Eigen::internal::pmul<Packet2d>(x, y.v)); }
+};
+template<> struct conj_helper<Packet1cd, Packet2d, false,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
+ { return Packet1cd(Eigen::internal::pmul<Packet2d>(x.v, y)); }
+};
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+ __m128d s = _mm_mul_pd(b.v,b.v);
+ return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
+}
+EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd& x)
+{
+ return Packet1cd(preverse(Packet2d(x.v)));
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2cf,2>& kernel) {
+ __m128d w1 = _mm_castps_pd(kernel.packet[0].v);
+ __m128d w2 = _mm_castps_pd(kernel.packet[1].v);
+ __m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
+ kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
+ kernel.packet[1].v = tmp;
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
+ __m128d result = pblend<Packet2d>(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
+ return Packet2cf(_mm_castpd_ps(result));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pinsertfirst(const Packet2cf& a, std::complex<float> b)
+{
+ return Packet2cf(_mm_loadl_pi(a.v, reinterpret_cast<const __m64*>(&b)));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pinsertfirst(const Packet1cd&, std::complex<double> b)
+{
+ return pset1<Packet1cd>(b);
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pinsertlast(const Packet2cf& a, std::complex<float> b)
+{
+ return Packet2cf(_mm_loadh_pi(a.v, reinterpret_cast<const __m64*>(&b)));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pinsertlast(const Packet1cd&, std::complex<double> b)
+{
+ return pset1<Packet1cd>(b);
+}
+}
+}
+#endif
+// end #include "src/Core/arch/SSE/Complex.h"
+// #include "src/Core/arch/SSE/MathFunctions.h"
+#ifndef EIGEN_MATH_FUNCTIONS_SSE_H
+#define EIGEN_MATH_FUNCTIONS_SSE_H
+namespace Eigen {
+namespace internal {
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f plog<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
+ Packet4i emm0;
+ Packet4f invalid_mask = _mm_cmpnge_ps(x, _mm_setzero_ps());
+ Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());
+ x = pmax(x, p4f_min_norm_pos);
+ emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
+ x = _mm_and_ps(x, p4f_inv_mant_mask);
+ x = _mm_or_ps(x, p4f_half);
+ emm0 = _mm_sub_epi32(emm0, p4i_0x7f);
+ Packet4f e = padd(Packet4f(_mm_cvtepi32_ps(emm0)), p4f_1);
+ Packet4f mask = _mm_cmplt_ps(x, p4f_cephes_SQRTHF);
+ Packet4f tmp = pand(x, mask);
+ x = psub(x, p4f_1);
+ e = psub(e, pand(p4f_1, mask));
+ x = padd(x, tmp);
+ Packet4f x2 = pmul(x,x);
+ Packet4f x3 = pmul(x2,x);
+ Packet4f y, y1, y2;
+ y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
+ y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
+ y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
+ y = pmadd(y , x, p4f_cephes_log_p2);
+ y1 = pmadd(y1, x, p4f_cephes_log_p5);
+ y2 = pmadd(y2, x, p4f_cephes_log_p8);
+ y = pmadd(y, x3, y1);
+ y = pmadd(y, x3, y2);
+ y = pmul(y, x3);
+ y1 = pmul(e, p4f_cephes_log_q1);
+ tmp = pmul(x2, p4f_half);
+ y = padd(y, y1);
+ x = psub(x, tmp);
+ y2 = pmul(e, p4f_cephes_log_q2);
+ x = padd(x, y);
+ x = padd(x, y2);
+ return _mm_or_ps(_mm_andnot_ps(iszero_mask, _mm_or_ps(x, invalid_mask)),
+ _mm_and_ps(iszero_mask, p4f_minus_inf));
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pexp<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+ _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
+ _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
+ Packet4f tmp, fx;
+ Packet4i emm0;
+ x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
+ fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ fx = _mm_floor_ps(fx);
+#else
+ emm0 = _mm_cvttps_epi32(fx);
+ tmp = _mm_cvtepi32_ps(emm0);
+ Packet4f mask = _mm_cmpgt_ps(tmp, fx);
+ mask = _mm_and_ps(mask, p4f_1);
+ fx = psub(tmp, mask);
+#endif
+ tmp = pmul(fx, p4f_cephes_exp_C1);
+ Packet4f z = pmul(fx, p4f_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+ z = pmul(x,x);
+ Packet4f y = p4f_cephes_exp_p0;
+ y = pmadd(y, x, p4f_cephes_exp_p1);
+ y = pmadd(y, x, p4f_cephes_exp_p2);
+ y = pmadd(y, x, p4f_cephes_exp_p3);
+ y = pmadd(y, x, p4f_cephes_exp_p4);
+ y = pmadd(y, x, p4f_cephes_exp_p5);
+ y = pmadd(y, z, x);
+ y = padd(y, p4f_1);
+ emm0 = _mm_cvttps_epi32(fx);
+ emm0 = _mm_add_epi32(emm0, p4i_0x7f);
+ emm0 = _mm_slli_epi32(emm0, 23);
+ return pmax(pmul(y, Packet4f(_mm_castsi128_ps(emm0))), _x);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d pexp<Packet2d>(const Packet2d& _x)
+{
+ Packet2d x = _x;
+ _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
+ _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
+ _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
+ _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
+ _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
+ static const __m128i p4i_1023_0 = _mm_setr_epi32(1023, 1023, 0, 0);
+ Packet2d tmp, fx;
+ Packet4i emm0;
+ x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
+ fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ fx = _mm_floor_pd(fx);
+#else
+ emm0 = _mm_cvttpd_epi32(fx);
+ tmp = _mm_cvtepi32_pd(emm0);
+ Packet2d mask = _mm_cmpgt_pd(tmp, fx);
+ mask = _mm_and_pd(mask, p2d_1);
+ fx = psub(tmp, mask);
+#endif
+ tmp = pmul(fx, p2d_cephes_exp_C1);
+ Packet2d z = pmul(fx, p2d_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+ Packet2d x2 = pmul(x,x);
+ Packet2d px = p2d_cephes_exp_p0;
+ px = pmadd(px, x2, p2d_cephes_exp_p1);
+ px = pmadd(px, x2, p2d_cephes_exp_p2);
+ px = pmul (px, x);
+ Packet2d qx = p2d_cephes_exp_q0;
+ qx = pmadd(qx, x2, p2d_cephes_exp_q1);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q2);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q3);
+ x = pdiv(px,psub(qx,px));
+ x = pmadd(p2d_2,x,p2d_1);
+ emm0 = _mm_cvttpd_epi32(fx);
+ emm0 = _mm_add_epi32(emm0, p4i_1023_0);
+ emm0 = _mm_slli_epi32(emm0, 20);
+ emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3));
+ return pmax(pmul(x, Packet2d(_mm_castsi128_pd(emm0))), _x);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psin<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(1, 1);
+ _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
+ _EIGEN_DECLARE_CONST_Packet4i(2, 2);
+ _EIGEN_DECLARE_CONST_Packet4i(4, 4);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f);
+ Packet4f xmm1, xmm2, xmm3, sign_bit, y;
+ Packet4i emm0, emm2;
+ sign_bit = x;
+ x = pabs(x);
+ sign_bit = _mm_and_ps(sign_bit, p4f_sign_mask);
+ y = pmul(x, p4f_cephes_FOPI);
+ emm2 = _mm_cvttps_epi32(y);
+ emm2 = _mm_add_epi32(emm2, p4i_1);
+ emm2 = _mm_and_si128(emm2, p4i_not1);
+ y = _mm_cvtepi32_ps(emm2);
+ emm0 = _mm_and_si128(emm2, p4i_4);
+ emm0 = _mm_slli_epi32(emm0, 29);
+ emm2 = _mm_and_si128(emm2, p4i_2);
+ emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+ Packet4f swap_sign_bit = _mm_castsi128_ps(emm0);
+ Packet4f poly_mask = _mm_castsi128_ps(emm2);
+ sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
+ xmm1 = pmul(y, p4f_minus_cephes_DP1);
+ xmm2 = pmul(y, p4f_minus_cephes_DP2);
+ xmm3 = pmul(y, p4f_minus_cephes_DP3);
+ x = padd(x, xmm1);
+ x = padd(x, xmm2);
+ x = padd(x, xmm3);
+ y = p4f_coscof_p0;
+ Packet4f z = _mm_mul_ps(x,x);
+ y = pmadd(y, z, p4f_coscof_p1);
+ y = pmadd(y, z, p4f_coscof_p2);
+ y = pmul(y, z);
+ y = pmul(y, z);
+ Packet4f tmp = pmul(z, p4f_half);
+ y = psub(y, tmp);
+ y = padd(y, p4f_1);
+ Packet4f y2 = p4f_sincof_p0;
+ y2 = pmadd(y2, z, p4f_sincof_p1);
+ y2 = pmadd(y2, z, p4f_sincof_p2);
+ y2 = pmul(y2, z);
+ y2 = pmul(y2, x);
+ y2 = padd(y2, x);
+ y2 = _mm_and_ps(poly_mask, y2);
+ y = _mm_andnot_ps(poly_mask, y);
+ y = _mm_or_ps(y,y2);
+ return _mm_xor_ps(y, sign_bit);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pcos<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(1, 1);
+ _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
+ _EIGEN_DECLARE_CONST_Packet4i(2, 2);
+ _EIGEN_DECLARE_CONST_Packet4i(4, 4);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f);
+ Packet4f xmm1, xmm2, xmm3, y;
+ Packet4i emm0, emm2;
+ x = pabs(x);
+ y = pmul(x, p4f_cephes_FOPI);
+ emm2 = _mm_cvttps_epi32(y);
+ emm2 = _mm_add_epi32(emm2, p4i_1);
+ emm2 = _mm_and_si128(emm2, p4i_not1);
+ y = _mm_cvtepi32_ps(emm2);
+ emm2 = _mm_sub_epi32(emm2, p4i_2);
+ emm0 = _mm_andnot_si128(emm2, p4i_4);
+ emm0 = _mm_slli_epi32(emm0, 29);
+ emm2 = _mm_and_si128(emm2, p4i_2);
+ emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+ Packet4f sign_bit = _mm_castsi128_ps(emm0);
+ Packet4f poly_mask = _mm_castsi128_ps(emm2);
+ xmm1 = pmul(y, p4f_minus_cephes_DP1);
+ xmm2 = pmul(y, p4f_minus_cephes_DP2);
+ xmm3 = pmul(y, p4f_minus_cephes_DP3);
+ x = padd(x, xmm1);
+ x = padd(x, xmm2);
+ x = padd(x, xmm3);
+ y = p4f_coscof_p0;
+ Packet4f z = pmul(x,x);
+ y = pmadd(y,z,p4f_coscof_p1);
+ y = pmadd(y,z,p4f_coscof_p2);
+ y = pmul(y, z);
+ y = pmul(y, z);
+ Packet4f tmp = _mm_mul_ps(z, p4f_half);
+ y = psub(y, tmp);
+ y = padd(y, p4f_1);
+ Packet4f y2 = p4f_sincof_p0;
+ y2 = pmadd(y2, z, p4f_sincof_p1);
+ y2 = pmadd(y2, z, p4f_sincof_p2);
+ y2 = pmul(y2, z);
+ y2 = pmadd(y2, x, x);
+ y2 = _mm_and_ps(poly_mask, y2);
+ y = _mm_andnot_ps(poly_mask, y);
+ y = _mm_or_ps(y,y2);
+ return _mm_xor_ps(y, sign_bit);
+}
+#if EIGEN_FAST_MATH
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psqrt<Packet4f>(const Packet4f& _x)
+{
+ Packet4f half = pmul(_x, pset1<Packet4f>(.5f));
+ Packet4f denormal_mask = _mm_and_ps(
+ _mm_cmpge_ps(_x, _mm_setzero_ps()),
+ _mm_cmplt_ps(_x, pset1<Packet4f>((std::numeric_limits<float>::min)())));
+ Packet4f x = _mm_rsqrt_ps(_x);
+ x = pmul(x, psub(pset1<Packet4f>(1.5f), pmul(half, pmul(x,x))));
+ return _mm_andnot_ps(denormal_mask, pmul(_x,x));
+}
+#else
+template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
+#endif
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
+#if EIGEN_FAST_MATH
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inf, 0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(nan, 0x7fc00000);
+ _EIGEN_DECLARE_CONST_Packet4f(one_point_five, 1.5f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5f);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(flt_min, 0x00800000);
+ Packet4f neg_half = pmul(_x, p4f_minus_half);
+ Packet4f le_zero_mask = _mm_cmple_ps(_x, p4f_flt_min);
+ Packet4f x = _mm_andnot_ps(le_zero_mask, _mm_rsqrt_ps(_x));
+ Packet4f neg_mask = _mm_cmplt_ps(_x, _mm_setzero_ps());
+ Packet4f zero_mask = _mm_andnot_ps(neg_mask, le_zero_mask);
+ Packet4f infs_and_nans = _mm_or_ps(_mm_and_ps(neg_mask, p4f_nan),
+ _mm_and_ps(zero_mask, p4f_inf));
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p4f_one_point_five));
+ return _mm_or_ps(x, infs_and_nans);
+}
+#else
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& x) {
+ return _mm_div_ps(pset1<Packet4f>(1.0f), _mm_sqrt_ps(x));
+}
+#endif
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d prsqrt<Packet2d>(const Packet2d& x) {
+ return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x));
+}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
+ptanh<Packet4f>(const Packet4f& x) {
+ return internal::generic_fast_tanh_float(x);
+}
+}
+namespace numext {
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float sqrt(const float &x)
+{
+ return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x))));
+}
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double sqrt(const double &x)
+{
+#if EIGEN_COMP_GNUC_STRICT
+ return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x))));
+#else
+ return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x))));
+#endif
+}
+}
+}
+#endif
+// end #include "src/Core/arch/SSE/MathFunctions.h"
+// #include "src/Core/arch/AVX/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_AVX_H
+#define EIGEN_PACKET_MATH_AVX_H
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
+#endif
+#ifdef __FMA__
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+#endif
+typedef __m256 Packet8f;
+typedef __m256i Packet8i;
+typedef __m256d Packet4d;
+template<> struct is_arithmetic<__m256> { enum { value = true }; };
+template<> struct is_arithmetic<__m256i> { enum { value = true }; };
+template<> struct is_arithmetic<__m256d> { enum { value = true }; };
+#define _EIGEN_DECLARE_CONST_Packet8f(NAME,X) \
+ const Packet8f p8f_##NAME = pset1<Packet8f>(X)
+#define _EIGEN_DECLARE_CONST_Packet4d(NAME,X) \
+ const Packet4d p4d_##NAME = pset1<Packet4d>(X)
+#define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \
+ const Packet8f p8f_##NAME = _mm256_castsi256_ps(pset1<Packet8i>(X))
+#define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
+ const Packet8i p8i_##NAME = pset1<Packet8i>(X)
+#ifndef EIGEN_VECTORIZE_AVX512
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet8f type;
+ typedef Packet4f half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=8,
+ HasHalfPacket = 1,
+ HasDiv = 1,
+ HasSin = EIGEN_FAST_MATH,
+ HasCos = 0,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasTanh = EIGEN_FAST_MATH,
+ HasBlend = 1,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+ };
+};
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet4d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasHalfPacket = 1,
+ HasDiv = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasBlend = 1,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+ };
+};
+#endif
+template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
+template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
+template<> struct unpacket_traits<Packet8f> { typedef float type; typedef Packet4f half; enum {size=8, alignment=Aligned32}; };
+template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; enum {size=4, alignment=Aligned32}; };
+template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32}; };
+template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet8i pset1<Packet8i>(const int& from) { return _mm256_set1_epi32(from); }
+template<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float* from) { return _mm256_broadcast_ss(from); }
+template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }
+template<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
+template<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
+template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d padd<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)
+{
+ return _mm256_sub_ps(_mm256_set1_ps(0.0),a);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a)
+{
+ return _mm256_sub_pd(_mm256_set1_pd(0.0),a);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pconj(const Packet8f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4d pconj(const Packet4d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet8i pconj(const Packet8i& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet8f pmul<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_mul_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmul<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_mul_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_div_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pdiv<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_div_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& , const Packet8i& )
+{ eigen_assert(false && "packet integer division are not supported by AVX");
+ return pset1<Packet8i>(0);
+}
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
+#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
+ Packet8f res = c;
+ __asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
+ return res;
+#else
+ return _mm256_fmadd_ps(a,b,c);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
+#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
+ Packet4d res = c;
+ __asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
+ return res;
+#else
+ return _mm256_fmadd_pd(a,b,c);
+#endif
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_min_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_min_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pround<Packet8f>(const Packet8f& a) { return _mm256_round_ps(a, _MM_FROUND_CUR_DIRECTION); }
+template<> EIGEN_STRONG_INLINE Packet4d pround<Packet4d>(const Packet4d& a) { return _mm256_round_pd(a, _MM_FROUND_CUR_DIRECTION); }
+template<> EIGEN_STRONG_INLINE Packet8f pceil<Packet8f>(const Packet8f& a) { return _mm256_ceil_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet4d pceil<Packet4d>(const Packet4d& a) { return _mm256_ceil_pd(a); }
+template<> EIGEN_STRONG_INLINE Packet8f pfloor<Packet8f>(const Packet8f& a) { return _mm256_floor_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet4d pfloor<Packet4d>(const Packet4d& a) { return _mm256_floor_pd(a); }
+template<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pand<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f por<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_or_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d por<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_or_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pxor<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_xor_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pxor<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_xor_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pandnot<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_andnot_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pandnot<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_andnot_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pload<Packet8f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet4d pload<Packet4d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet8i pload<Packet8i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(reinterpret_cast<const __m256i*>(from)); }
+template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
+template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)
+{
+ Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
+ tmp = _mm256_blend_ps(tmp,_mm256_castps128_ps256(_mm_permute_ps( _mm256_castps256_ps128(tmp), _MM_SHUFFLE(1,0,1,0))), 15);
+ return _mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2));
+}
+template<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from)
+{
+ Packet4d tmp = _mm256_broadcast_pd((const __m128d*)(const void*)from);
+ return _mm256_permute_pd(tmp, 3<<2);
+}
+template<> EIGEN_STRONG_INLINE Packet8f ploadquad<Packet8f>(const float* from)
+{
+ Packet8f tmp = _mm256_castps128_ps256(_mm_broadcast_ss(from));
+ return _mm256_insertf128_ps(tmp, _mm_broadcast_ss(from+1), 1);
+}
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
+template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, Index stride)
+{
+ return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride],
+ from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, Index stride)
+{
+ return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, Index stride)
+{
+ __m128 low = _mm256_extractf128_ps(from, 0);
+ to[stride*0] = _mm_cvtss_f32(low);
+ to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1));
+ to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 2));
+ to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3));
+ __m128 high = _mm256_extractf128_ps(from, 1);
+ to[stride*4] = _mm_cvtss_f32(high);
+ to[stride*5] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1));
+ to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2));
+ to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, Index stride)
+{
+ __m128d low = _mm256_extractf128_pd(from, 0);
+ to[stride*0] = _mm_cvtsd_f64(low);
+ to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1));
+ __m128d high = _mm256_extractf128_pd(from, 1);
+ to[stride*2] = _mm_cvtsd_f64(high);
+ to[stride*3] = _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1));
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet8f>(float* to, const float& a)
+{
+ Packet8f pa = pset1<Packet8f>(a);
+ pstore(to, pa);
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet4d>(double* to, const double& a)
+{
+ Packet4d pa = pset1<Packet4d>(a);
+ pstore(to, pa);
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
+{
+ Packet8i pa = pset1<Packet8i>(a);
+ pstore(to, pa);
+}
+#ifndef EIGEN_VECTORIZE_AVX512
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+#endif
+template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
+ return _mm_cvtss_f32(_mm256_castps256_ps128(a));
+}
+template<> EIGEN_STRONG_INLINE double pfirst<Packet4d>(const Packet4d& a) {
+ return _mm_cvtsd_f64(_mm256_castpd256_pd128(a));
+}
+template<> EIGEN_STRONG_INLINE int pfirst<Packet8i>(const Packet8i& a) {
+ return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
+}
+template<> EIGEN_STRONG_INLINE Packet8f preverse(const Packet8f& a)
+{
+ __m256 tmp = _mm256_shuffle_ps(a,a,0x1b);
+ return _mm256_permute2f128_ps(tmp, tmp, 1);
+}
+template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
+{
+ __m256d tmp = _mm256_shuffle_pd(a,a,5);
+ return _mm256_permute2f128_pd(tmp, tmp, 1);
+ __m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
+ return _mm256_permute_pd(swap_halves,5);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f& a)
+{
+ const Packet8f mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
+ return _mm256_and_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a)
+{
+ const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
+ return _mm256_and_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet8f preduxp<Packet8f>(const Packet8f* vecs)
+{
+ __m256 hsum1 = _mm256_hadd_ps(vecs[0], vecs[1]);
+ __m256 hsum2 = _mm256_hadd_ps(vecs[2], vecs[3]);
+ __m256 hsum3 = _mm256_hadd_ps(vecs[4], vecs[5]);
+ __m256 hsum4 = _mm256_hadd_ps(vecs[6], vecs[7]);
+ __m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
+ __m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
+ __m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
+ __m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
+ __m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
+ __m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
+ __m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
+ __m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
+ __m256 sum1 = _mm256_add_ps(perm1, hsum5);
+ __m256 sum2 = _mm256_add_ps(perm2, hsum6);
+ __m256 sum3 = _mm256_add_ps(perm3, hsum7);
+ __m256 sum4 = _mm256_add_ps(perm4, hsum8);
+ __m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
+ __m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
+ __m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
+ return final;
+}
+template<> EIGEN_STRONG_INLINE Packet4d preduxp<Packet4d>(const Packet4d* vecs)
+{
+ Packet4d tmp0, tmp1;
+ tmp0 = _mm256_hadd_pd(vecs[0], vecs[1]);
+ tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
+ tmp1 = _mm256_hadd_pd(vecs[2], vecs[3]);
+ tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
+ return _mm256_blend_pd(tmp0, tmp1, 0xC);
+}
+template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
+{
+ return predux(Packet4f(_mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1))));
+}
+template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
+{
+ return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1))));
+}
+template<> EIGEN_STRONG_INLINE Packet4f predux_downto4<Packet8f>(const Packet8f& a)
+{
+ return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
+}
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet8f>(const Packet8f& a)
+{
+ Packet8f tmp;
+ tmp = _mm256_mul_ps(a, _mm256_permute2f128_ps(a,a,1));
+ tmp = _mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
+ return pfirst(_mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet4d>(const Packet4d& a)
+{
+ Packet4d tmp;
+ tmp = _mm256_mul_pd(a, _mm256_permute2f128_pd(a,a,1));
+ return pfirst(_mm256_mul_pd(tmp, _mm256_shuffle_pd(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE float predux_min<Packet8f>(const Packet8f& a)
+{
+ Packet8f tmp = _mm256_min_ps(a, _mm256_permute2f128_ps(a,a,1));
+ tmp = _mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
+ return pfirst(_mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_min<Packet4d>(const Packet4d& a)
+{
+ Packet4d tmp = _mm256_min_pd(a, _mm256_permute2f128_pd(a,a,1));
+ return pfirst(_mm256_min_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE float predux_max<Packet8f>(const Packet8f& a)
+{
+ Packet8f tmp = _mm256_max_ps(a, _mm256_permute2f128_ps(a,a,1));
+ tmp = _mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
+ return pfirst(_mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_max<Packet4d>(const Packet4d& a)
+{
+ Packet4d tmp = _mm256_max_pd(a, _mm256_permute2f128_pd(a,a,1));
+ return pfirst(_mm256_max_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
+}
+template<int Offset>
+struct palign_impl<Offset,Packet8f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet8f& first, const Packet8f& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm256_blend_ps(first, second, 1);
+ Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
+ Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+ first = _mm256_blend_ps(tmp1, tmp2, 0x88);
+ }
+ else if (Offset==2)
+ {
+ first = _mm256_blend_ps(first, second, 3);
+ Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
+ Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+ first = _mm256_blend_ps(tmp1, tmp2, 0xcc);
+ }
+ else if (Offset==3)
+ {
+ first = _mm256_blend_ps(first, second, 7);
+ Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
+ Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+ first = _mm256_blend_ps(tmp1, tmp2, 0xee);
+ }
+ else if (Offset==4)
+ {
+ first = _mm256_blend_ps(first, second, 15);
+ Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
+ Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+ first = _mm256_permute_ps(tmp2, _MM_SHUFFLE(3,2,1,0));
+ }
+ else if (Offset==5)
+ {
+ first = _mm256_blend_ps(first, second, 31);
+ first = _mm256_permute2f128_ps(first, first, 1);
+ Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
+ first = _mm256_permute2f128_ps(tmp, tmp, 1);
+ first = _mm256_blend_ps(tmp, first, 0x88);
+ }
+ else if (Offset==6)
+ {
+ first = _mm256_blend_ps(first, second, 63);
+ first = _mm256_permute2f128_ps(first, first, 1);
+ Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
+ first = _mm256_permute2f128_ps(tmp, tmp, 1);
+ first = _mm256_blend_ps(tmp, first, 0xcc);
+ }
+ else if (Offset==7)
+ {
+ first = _mm256_blend_ps(first, second, 127);
+ first = _mm256_permute2f128_ps(first, first, 1);
+ Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
+ first = _mm256_permute2f128_ps(tmp, tmp, 1);
+ first = _mm256_blend_ps(tmp, first, 0xee);
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4d& first, const Packet4d& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm256_blend_pd(first, second, 1);
+ __m256d tmp = _mm256_permute_pd(first, 5);
+ first = _mm256_permute2f128_pd(tmp, tmp, 1);
+ first = _mm256_blend_pd(tmp, first, 0xA);
+ }
+ else if (Offset==2)
+ {
+ first = _mm256_blend_pd(first, second, 3);
+ first = _mm256_permute2f128_pd(first, first, 1);
+ }
+ else if (Offset==3)
+ {
+ first = _mm256_blend_pd(first, second, 7);
+ __m256d tmp = _mm256_permute_pd(first, 5);
+ first = _mm256_permute2f128_pd(tmp, tmp, 1);
+ first = _mm256_blend_pd(tmp, first, 5);
+ }
+ }
+};
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet8f,8>& kernel) {
+ __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
+ __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
+ __m256 T4 = _mm256_unpacklo_ps(kernel.packet[4], kernel.packet[5]);
+ __m256 T5 = _mm256_unpackhi_ps(kernel.packet[4], kernel.packet[5]);
+ __m256 T6 = _mm256_unpacklo_ps(kernel.packet[6], kernel.packet[7]);
+ __m256 T7 = _mm256_unpackhi_ps(kernel.packet[6], kernel.packet[7]);
+ __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
+ __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
+ __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
+ __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
+ __m256 S4 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(1,0,1,0));
+ __m256 S5 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(3,2,3,2));
+ __m256 S6 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(1,0,1,0));
+ __m256 S7 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(3,2,3,2));
+ kernel.packet[0] = _mm256_permute2f128_ps(S0, S4, 0x20);
+ kernel.packet[1] = _mm256_permute2f128_ps(S1, S5, 0x20);
+ kernel.packet[2] = _mm256_permute2f128_ps(S2, S6, 0x20);
+ kernel.packet[3] = _mm256_permute2f128_ps(S3, S7, 0x20);
+ kernel.packet[4] = _mm256_permute2f128_ps(S0, S4, 0x31);
+ kernel.packet[5] = _mm256_permute2f128_ps(S1, S5, 0x31);
+ kernel.packet[6] = _mm256_permute2f128_ps(S2, S6, 0x31);
+ kernel.packet[7] = _mm256_permute2f128_ps(S3, S7, 0x31);
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet8f,4>& kernel) {
+ __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
+ __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
+ __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
+ __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
+ __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
+ __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
+ kernel.packet[0] = _mm256_permute2f128_ps(S0, S1, 0x20);
+ kernel.packet[1] = _mm256_permute2f128_ps(S2, S3, 0x20);
+ kernel.packet[2] = _mm256_permute2f128_ps(S0, S1, 0x31);
+ kernel.packet[3] = _mm256_permute2f128_ps(S2, S3, 0x31);
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4d,4>& kernel) {
+ __m256d T0 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 15);
+ __m256d T1 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);
+ __m256d T2 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 15);
+ __m256d T3 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 0);
+ kernel.packet[1] = _mm256_permute2f128_pd(T0, T2, 32);
+ kernel.packet[3] = _mm256_permute2f128_pd(T0, T2, 49);
+ kernel.packet[0] = _mm256_permute2f128_pd(T1, T3, 32);
+ kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) {
+ const __m256 zero = _mm256_setzero_ps();
+ const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ);
+ return _mm256_blendv_ps(thenPacket, elsePacket, false_mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) {
+ const __m256d zero = _mm256_setzero_pd();
+ const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ);
+ return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pinsertfirst(const Packet8f& a, float b)
+{
+ return _mm256_blend_ps(a,pset1<Packet8f>(b),1);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pinsertfirst(const Packet4d& a, double b)
+{
+ return _mm256_blend_pd(a,pset1<Packet4d>(b),1);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b)
+{
+ return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
+}
+template<> EIGEN_STRONG_INLINE Packet4d pinsertlast(const Packet4d& a, double b)
+{
+ return _mm256_blend_pd(a,pset1<Packet4d>(b),(1<<3));
+}
+}
+}
+#endif
+// end #include "src/Core/arch/AVX/PacketMath.h"
+// #include "src/Core/arch/AVX/MathFunctions.h"
+#ifndef EIGEN_MATH_FUNCTIONS_AVX_H
+#define EIGEN_MATH_FUNCTIONS_AVX_H
+namespace Eigen {
+namespace internal {
+inline Packet8i pshiftleft(Packet8i v, int n)
+{
+#ifdef EIGEN_VECTORIZE_AVX2
+ return _mm256_slli_epi32(v, n);
+#else
+ __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(v, 0), n);
+ __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(v, 1), n);
+ return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
+#endif
+}
+inline Packet8f pshiftright(Packet8f v, int n)
+{
+#ifdef EIGEN_VECTORIZE_AVX2
+ return _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(v), n));
+#else
+ __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 0), n);
+ __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 1), n);
+ return _mm256_cvtepi32_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1));
+#endif
+}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+psin<Packet8f>(const Packet8f& _x) {
+ Packet8f x = _x;
+ _EIGEN_DECLARE_CONST_Packet8i(one, 1);
+ _EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
+ _EIGEN_DECLARE_CONST_Packet8f(two, 2.0f);
+ _EIGEN_DECLARE_CONST_Packet8f(one_over_four, 0.25f);
+ _EIGEN_DECLARE_CONST_Packet8f(one_over_pi, 3.183098861837907e-01f);
+ _EIGEN_DECLARE_CONST_Packet8f(neg_pi_first, -3.140625000000000e+00f);
+ _EIGEN_DECLARE_CONST_Packet8f(neg_pi_second, -9.670257568359375e-04f);
+ _EIGEN_DECLARE_CONST_Packet8f(neg_pi_third, -6.278329571784980e-07f);
+ _EIGEN_DECLARE_CONST_Packet8f(four_over_pi, 1.273239544735163e+00f);
+ Packet8f z = pmul(x, p8f_one_over_pi);
+ Packet8f shift = _mm256_floor_ps(padd(z, p8f_one_over_four));
+ x = pmadd(shift, p8f_neg_pi_first, x);
+ x = pmadd(shift, p8f_neg_pi_second, x);
+ x = pmadd(shift, p8f_neg_pi_third, x);
+ z = pmul(x, p8f_four_over_pi);
+ Packet8i shift_ints = _mm256_cvtps_epi32(shift);
+ Packet8i shift_isodd = _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one)));
+ Packet8i sign_flip_mask = pshiftleft(shift_isodd, 31);
+ Packet8f ival_mask = _mm256_cmp_ps(z, p8f_one, _CMP_GT_OQ);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_right_0, 9.999999724233232e-01f);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_right_2, -3.084242535619928e-01f);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_right_4, 1.584991525700324e-02f);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_right_6, -3.188805084631342e-04f);
+ Packet8f z_minus_two = psub(z, p8f_two);
+ Packet8f z_minus_two2 = pmul(z_minus_two, z_minus_two);
+ Packet8f right = pmadd(p8f_coeff_right_6, z_minus_two2, p8f_coeff_right_4);
+ right = pmadd(right, z_minus_two2, p8f_coeff_right_2);
+ right = pmadd(right, z_minus_two2, p8f_coeff_right_0);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_left_1, 7.853981525427295e-01f);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_left_3, -8.074536727092352e-02f);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_left_5, 2.489871967827018e-03f);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_left_7, -3.587725841214251e-05f);
+ Packet8f z2 = pmul(z, z);
+ Packet8f left = pmadd(p8f_coeff_left_7, z2, p8f_coeff_left_5);
+ left = pmadd(left, z2, p8f_coeff_left_3);
+ left = pmadd(left, z2, p8f_coeff_left_1);
+ left = pmul(left, z);
+ left = _mm256_andnot_ps(ival_mask, left);
+ right = _mm256_and_ps(ival_mask, right);
+ Packet8f res = _mm256_or_ps(left, right);
+ res = _mm256_xor_ps(res, _mm256_castsi256_ps(sign_flip_mask));
+ return res;
+}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+plog<Packet8f>(const Packet8f& _x) {
+ Packet8f x = _x;
+ _EIGEN_DECLARE_CONST_Packet8f(1, 1.0f);
+ _EIGEN_DECLARE_CONST_Packet8f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet8f(126f, 126.0f);
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inv_mant_mask, ~0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(min_norm_pos, 0x00800000);
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(minus_inf, 0xff800000);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_SQRTHF, 0.707106781186547524f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p0, 7.0376836292E-2f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p1, -1.1514610310E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p2, 1.1676998740E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p3, -1.2420140846E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p4, +1.4249322787E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p5, -1.6668057665E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p6, +2.0000714765E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p7, -2.4999993993E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p8, +3.3333331174E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_q1, -2.12194440e-4f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_q2, 0.693359375f);
+ Packet8f invalid_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_NGE_UQ);
+ Packet8f iszero_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_EQ_OQ);
+ x = pmax(x, p8f_min_norm_pos);
+ Packet8f emm0 = pshiftright(x,23);
+ Packet8f e = _mm256_sub_ps(emm0, p8f_126f);
+ x = _mm256_and_ps(x, p8f_inv_mant_mask);
+ x = _mm256_or_ps(x, p8f_half);
+ Packet8f mask = _mm256_cmp_ps(x, p8f_cephes_SQRTHF, _CMP_LT_OQ);
+ Packet8f tmp = _mm256_and_ps(x, mask);
+ x = psub(x, p8f_1);
+ e = psub(e, _mm256_and_ps(p8f_1, mask));
+ x = padd(x, tmp);
+ Packet8f x2 = pmul(x, x);
+ Packet8f x3 = pmul(x2, x);
+ Packet8f y, y1, y2;
+ y = pmadd(p8f_cephes_log_p0, x, p8f_cephes_log_p1);
+ y1 = pmadd(p8f_cephes_log_p3, x, p8f_cephes_log_p4);
+ y2 = pmadd(p8f_cephes_log_p6, x, p8f_cephes_log_p7);
+ y = pmadd(y, x, p8f_cephes_log_p2);
+ y1 = pmadd(y1, x, p8f_cephes_log_p5);
+ y2 = pmadd(y2, x, p8f_cephes_log_p8);
+ y = pmadd(y, x3, y1);
+ y = pmadd(y, x3, y2);
+ y = pmul(y, x3);
+ y1 = pmul(e, p8f_cephes_log_q1);
+ tmp = pmul(x2, p8f_half);
+ y = padd(y, y1);
+ x = psub(x, tmp);
+ y2 = pmul(e, p8f_cephes_log_q2);
+ x = padd(x, y);
+ x = padd(x, y2);
+ return _mm256_or_ps(
+ _mm256_andnot_ps(iszero_mask, _mm256_or_ps(x, invalid_mask)),
+ _mm256_and_ps(iszero_mask, p8f_minus_inf));
+}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+pexp<Packet8f>(const Packet8f& _x) {
+ _EIGEN_DECLARE_CONST_Packet8f(1, 1.0f);
+ _EIGEN_DECLARE_CONST_Packet8f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet8f(127, 127.0f);
+ _EIGEN_DECLARE_CONST_Packet8f(exp_hi, 88.3762626647950f);
+ _EIGEN_DECLARE_CONST_Packet8f(exp_lo, -88.3762626647949f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_LOG2EF, 1.44269504088896341f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p0, 1.9875691500E-4f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p1, 1.3981999507E-3f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p2, 8.3334519073E-3f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p3, 4.1665795894E-2f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p4, 1.6666665459E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p5, 5.0000001201E-1f);
+ Packet8f x = pmax(pmin(_x, p8f_exp_hi), p8f_exp_lo);
+ Packet8f m = _mm256_floor_ps(pmadd(x, p8f_cephes_LOG2EF, p8f_half));
+#ifdef EIGEN_VECTORIZE_FMA
+ _EIGEN_DECLARE_CONST_Packet8f(nln2, -0.6931471805599453f);
+ Packet8f r = _mm256_fmadd_ps(m, p8f_nln2, x);
+#else
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_C1, 0.693359375f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_C2, -2.12194440e-4f);
+ Packet8f r = psub(x, pmul(m, p8f_cephes_exp_C1));
+ r = psub(r, pmul(m, p8f_cephes_exp_C2));
+#endif
+ Packet8f r2 = pmul(r, r);
+ Packet8f y = p8f_cephes_exp_p0;
+ y = pmadd(y, r, p8f_cephes_exp_p1);
+ y = pmadd(y, r, p8f_cephes_exp_p2);
+ y = pmadd(y, r, p8f_cephes_exp_p3);
+ y = pmadd(y, r, p8f_cephes_exp_p4);
+ y = pmadd(y, r, p8f_cephes_exp_p5);
+ y = pmadd(y, r2, r);
+ y = padd(y, p8f_1);
+ Packet8i emm0 = _mm256_cvttps_epi32(padd(m, p8f_127));
+ emm0 = pshiftleft(emm0, 23);
+ return pmax(pmul(y, _mm256_castsi256_ps(emm0)), _x);
+}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+ptanh<Packet8f>(const Packet8f& x) {
+ return internal::generic_fast_tanh_float(x);
+}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
+pexp<Packet4d>(const Packet4d& _x) {
+ Packet4d x = _x;
+ _EIGEN_DECLARE_CONST_Packet4d(1, 1.0);
+ _EIGEN_DECLARE_CONST_Packet4d(2, 2.0);
+ _EIGEN_DECLARE_CONST_Packet4d(half, 0.5);
+ _EIGEN_DECLARE_CONST_Packet4d(exp_hi, 709.437);
+ _EIGEN_DECLARE_CONST_Packet4d(exp_lo, -709.436139303);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_LOG2EF, 1.4426950408889634073599);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p0, 1.26177193074810590878e-4);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p1, 3.02994407707441961300e-2);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p2, 9.99999999999999999910e-1);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q0, 3.00198505138664455042e-6);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q1, 2.52448340349684104192e-3);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q2, 2.27265548208155028766e-1);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q3, 2.00000000000000000009e0);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C1, 0.693145751953125);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C2, 1.42860682030941723212e-6);
+ _EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
+ Packet4d tmp, fx;
+ x = pmax(pmin(x, p4d_exp_hi), p4d_exp_lo);
+ fx = pmadd(p4d_cephes_LOG2EF, x, p4d_half);
+ fx = _mm256_floor_pd(fx);
+ tmp = pmul(fx, p4d_cephes_exp_C1);
+ Packet4d z = pmul(fx, p4d_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+ Packet4d x2 = pmul(x, x);
+ Packet4d px = p4d_cephes_exp_p0;
+ px = pmadd(px, x2, p4d_cephes_exp_p1);
+ px = pmadd(px, x2, p4d_cephes_exp_p2);
+ px = pmul(px, x);
+ Packet4d qx = p4d_cephes_exp_q0;
+ qx = pmadd(qx, x2, p4d_cephes_exp_q1);
+ qx = pmadd(qx, x2, p4d_cephes_exp_q2);
+ qx = pmadd(qx, x2, p4d_cephes_exp_q3);
+ x = _mm256_div_pd(px, psub(qx, px));
+ x = pmadd(p4d_2, x, p4d_1);
+ __m128i emm0 = _mm256_cvtpd_epi32(fx);
+ emm0 = _mm_add_epi32(emm0, p4i_1023);
+ emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0));
+ __m128i lo = _mm_slli_epi64(emm0, 52);
+ __m128i hi = _mm_slli_epi64(_mm_srli_epi64(emm0, 32), 52);
+ __m256i e = _mm256_insertf128_si256(_mm256_setzero_si256(), lo, 0);
+ e = _mm256_insertf128_si256(e, hi, 1);
+ return pmax(pmul(x, _mm256_castsi256_pd(e)), _x);
+}
+#if EIGEN_FAST_MATH
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+psqrt<Packet8f>(const Packet8f& _x) {
+ Packet8f half = pmul(_x, pset1<Packet8f>(.5f));
+ Packet8f denormal_mask = _mm256_and_ps(
+ _mm256_cmp_ps(_x, pset1<Packet8f>((std::numeric_limits<float>::min)()),
+ _CMP_LT_OQ),
+ _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_GE_OQ));
+ Packet8f x = _mm256_rsqrt_ps(_x);
+ x = pmul(x, psub(pset1<Packet8f>(1.5f), pmul(half, pmul(x,x))));
+ return _mm256_andnot_ps(denormal_mask, pmul(_x,x));
+}
+#else
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet8f psqrt<Packet8f>(const Packet8f& x) {
+ return _mm256_sqrt_ps(x);
+}
+#endif
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4d psqrt<Packet4d>(const Packet4d& x) {
+ return _mm256_sqrt_pd(x);
+}
+#if EIGEN_FAST_MATH
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(nan, 0x7fc00000);
+ _EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
+ _EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
+ Packet8f neg_half = pmul(_x, p8f_minus_half);
+ Packet8f le_zero_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_LT_OQ);
+ Packet8f x = _mm256_andnot_ps(le_zero_mask, _mm256_rsqrt_ps(_x));
+ Packet8f neg_mask = _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_LT_OQ);
+ Packet8f zero_mask = _mm256_andnot_ps(neg_mask, le_zero_mask);
+ Packet8f infs_and_nans = _mm256_or_ps(_mm256_and_ps(neg_mask, p8f_nan),
+ _mm256_and_ps(zero_mask, p8f_inf));
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p8f_one_point_five));
+ return _mm256_or_ps(x, infs_and_nans);
+}
+#else
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet8f prsqrt<Packet8f>(const Packet8f& x) {
+ _EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
+ return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(x));
+}
+#endif
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4d prsqrt<Packet4d>(const Packet4d& x) {
+ _EIGEN_DECLARE_CONST_Packet4d(one, 1.0);
+ return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(x));
+}
+}
+}
+#endif
+// end #include "src/Core/arch/AVX/MathFunctions.h"
+// #include "src/Core/arch/AVX/Complex.h"
+#ifndef EIGEN_COMPLEX_AVX_H
+#define EIGEN_COMPLEX_AVX_H
+namespace Eigen {
+namespace internal {
+struct Packet4cf
+{
+ EIGEN_STRONG_INLINE Packet4cf() {}
+ EIGEN_STRONG_INLINE explicit Packet4cf(const __m256& a) : v(a) {}
+ __m256 v;
+};
+template<> struct packet_traits<std::complex<float> > : default_packet_traits
+{
+ typedef Packet4cf type;
+ typedef Packet2cf half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 4,
+ HasHalfPacket = 1,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32}; typedef Packet2cf half; };
+template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf pnegate(const Packet4cf& a)
+{
+ return Packet4cf(pnegate(a.v));
+}
+template<> EIGEN_STRONG_INLINE Packet4cf pconj(const Packet4cf& a)
+{
+ const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet4cf(_mm256_xor_ps(a.v,mask));
+}
+template<> EIGEN_STRONG_INLINE Packet4cf pmul<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
+{
+ __m256 tmp1 = _mm256_mul_ps(_mm256_moveldup_ps(a.v), b.v);
+ __m256 tmp2 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2,3,0,1)));
+ __m256 result = _mm256_addsub_ps(tmp1, tmp2);
+ return Packet4cf(result);
+}
+template<> EIGEN_STRONG_INLINE Packet4cf pand <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_and_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf por <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_or_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf pxor <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_xor_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf pandnot<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_andnot_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf pload <Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(pload<Packet8f>(&numext::real_ref(*from))); }
+template<> EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(ploadu<Packet8f>(&numext::real_ref(*from))); }
+template<> EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from)
+{
+ return Packet4cf(_mm256_castpd_ps(_mm256_broadcast_sd((const double*)(const void*)&from)));
+}
+template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from)
+{
+ Packet2cf a = ploaddup<Packet2cf>(from);
+ Packet2cf b = ploaddup<Packet2cf>(from+1);
+ return Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(a.v), b.v, 1));
+}
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }
+template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from, Index stride)
+{
+ return Packet4cf(_mm256_set_ps(std::imag(from[3*stride]), std::real(from[3*stride]),
+ std::imag(from[2*stride]), std::real(from[2*stride]),
+ std::imag(from[1*stride]), std::real(from[1*stride]),
+ std::imag(from[0*stride]), std::real(from[0*stride])));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, Index stride)
+{
+ __m128 low = _mm256_extractf128_ps(from.v, 0);
+ to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)),
+ _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1)));
+ to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 2)),
+ _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3)));
+ __m128 high = _mm256_extractf128_ps(from.v, 1);
+ to[stride*2] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 0)),
+ _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1)));
+ to[stride*3] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)),
+ _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)));
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet4cf>(const Packet4cf& a)
+{
+ return pfirst(Packet2cf(_mm256_castps256_ps128(a.v)));
+}
+template<> EIGEN_STRONG_INLINE Packet4cf preverse(const Packet4cf& a) {
+ __m128 low = _mm256_extractf128_ps(a.v, 0);
+ __m128 high = _mm256_extractf128_ps(a.v, 1);
+ __m128d lowd = _mm_castps_pd(low);
+ __m128d highd = _mm_castps_pd(high);
+ low = _mm_castpd_ps(_mm_shuffle_pd(lowd,lowd,0x1));
+ high = _mm_castpd_ps(_mm_shuffle_pd(highd,highd,0x1));
+ __m256 result = _mm256_setzero_ps();
+ result = _mm256_insertf128_ps(result, low, 1);
+ result = _mm256_insertf128_ps(result, high, 0);
+ return Packet4cf(result);
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet4cf>(const Packet4cf& a)
+{
+ return predux(padd(Packet2cf(_mm256_extractf128_ps(a.v,0)),
+ Packet2cf(_mm256_extractf128_ps(a.v,1))));
+}
+template<> EIGEN_STRONG_INLINE Packet4cf preduxp<Packet4cf>(const Packet4cf* vecs)
+{
+ Packet8f t0 = _mm256_shuffle_ps(vecs[0].v, vecs[0].v, _MM_SHUFFLE(3, 1, 2 ,0));
+ Packet8f t1 = _mm256_shuffle_ps(vecs[1].v, vecs[1].v, _MM_SHUFFLE(3, 1, 2 ,0));
+ t0 = _mm256_hadd_ps(t0,t1);
+ Packet8f t2 = _mm256_shuffle_ps(vecs[2].v, vecs[2].v, _MM_SHUFFLE(3, 1, 2 ,0));
+ Packet8f t3 = _mm256_shuffle_ps(vecs[3].v, vecs[3].v, _MM_SHUFFLE(3, 1, 2 ,0));
+ t2 = _mm256_hadd_ps(t2,t3);
+ t1 = _mm256_permute2f128_ps(t0,t2, 0 + (2<<4));
+ t3 = _mm256_permute2f128_ps(t0,t2, 1 + (3<<4));
+ return Packet4cf(_mm256_add_ps(t1,t3));
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const Packet4cf& a)
+{
+ return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)),
+ Packet2cf(_mm256_extractf128_ps(a.v, 1))));
+}
+template<int Offset>
+struct palign_impl<Offset,Packet4cf>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4cf& first, const Packet4cf& second)
+ {
+ if (Offset==0) return;
+ palign_impl<Offset*2,Packet8f>::run(first.v, second.v);
+ }
+};
+template<> struct conj_helper<Packet4cf, Packet4cf, false,true>
+{
+ EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+template<> struct conj_helper<Packet4cf, Packet4cf, true,false>
+{
+ EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
+{
+ EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+template<> struct conj_helper<Packet8f, Packet4cf, false,false>
+{
+ EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet8f& x, const Packet4cf& y, const Packet4cf& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet4cf pmul(const Packet8f& x, const Packet4cf& y) const
+ { return Packet4cf(Eigen::internal::pmul(x, y.v)); }
+};
+template<> struct conj_helper<Packet4cf, Packet8f, false,false>
+{
+ EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet8f& y, const Packet4cf& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& x, const Packet8f& y) const
+ { return Packet4cf(Eigen::internal::pmul(x.v, y)); }
+};
+template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
+{
+ Packet4cf num = pmul(a, pconj(b));
+ __m256 tmp = _mm256_mul_ps(b.v, b.v);
+ __m256 tmp2 = _mm256_shuffle_ps(tmp,tmp,0xB1);
+ __m256 denom = _mm256_add_ps(tmp, tmp2);
+ return Packet4cf(_mm256_div_ps(num.v, denom));
+}
+template<> EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& x)
+{
+ return Packet4cf(_mm256_shuffle_ps(x.v, x.v, _MM_SHUFFLE(2, 3, 0 ,1)));
+}
+struct Packet2cd
+{
+ EIGEN_STRONG_INLINE Packet2cd() {}
+ EIGEN_STRONG_INLINE explicit Packet2cd(const __m256d& a) : v(a) {}
+ __m256d v;
+};
+template<> struct packet_traits<std::complex<double> > : default_packet_traits
+{
+ typedef Packet2cd type;
+ typedef Packet1cd half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 0,
+ size = 2,
+ HasHalfPacket = 1,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32}; typedef Packet1cd half; };
+template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pnegate(const Packet2cd& a) { return Packet2cd(pnegate(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pconj(const Packet2cd& a)
+{
+ const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000,0x0,0x0,0x0,0x80000000,0x0,0x0,0x0));
+ return Packet2cd(_mm256_xor_pd(a.v,mask));
+}
+template<> EIGEN_STRONG_INLINE Packet2cd pmul<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
+{
+ __m256d tmp1 = _mm256_shuffle_pd(a.v,a.v,0x0);
+ __m256d even = _mm256_mul_pd(tmp1, b.v);
+ __m256d tmp2 = _mm256_shuffle_pd(a.v,a.v,0xF);
+ __m256d tmp3 = _mm256_shuffle_pd(b.v,b.v,0x5);
+ __m256d odd = _mm256_mul_pd(tmp2, tmp3);
+ return Packet2cd(_mm256_addsub_pd(even, odd));
+}
+template<> EIGEN_STRONG_INLINE Packet2cd pand <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_and_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd por <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_or_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pxor <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_xor_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pandnot<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_andnot_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pload <Packet2cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(pload<Packet4d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet2cd ploadu<Packet2cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(ploadu<Packet4d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pset1<Packet2cd>(const std::complex<double>& from)
+{
+ return Packet2cd(_mm256_broadcast_pd((const __m128d*)(const void*)&from));
+}
+template<> EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(const std::complex<double>* from) { return pset1<Packet2cd>(*from); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
+template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from, Index stride)
+{
+ return Packet2cd(_mm256_set_pd(std::imag(from[1*stride]), std::real(from[1*stride]),
+ std::imag(from[0*stride]), std::real(from[0*stride])));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from, Index stride)
+{
+ __m128d low = _mm256_extractf128_pd(from.v, 0);
+ to[stride*0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));
+ __m128d high = _mm256_extractf128_pd(from.v, 1);
+ to[stride*1] = std::complex<double>(_mm_cvtsd_f64(high), _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1)));
+}
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet2cd>(const Packet2cd& a)
+{
+ __m128d low = _mm256_extractf128_pd(a.v, 0);
+ EIGEN_ALIGN16 double res[2];
+ _mm_store_pd(res, low);
+ return std::complex<double>(res[0],res[1]);
+}
+template<> EIGEN_STRONG_INLINE Packet2cd preverse(const Packet2cd& a) {
+ __m256d result = _mm256_permute2f128_pd(a.v, a.v, 1);
+ return Packet2cd(result);
+}
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet2cd>(const Packet2cd& a)
+{
+ return predux(padd(Packet1cd(_mm256_extractf128_pd(a.v,0)),
+ Packet1cd(_mm256_extractf128_pd(a.v,1))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cd preduxp<Packet2cd>(const Packet2cd* vecs)
+{
+ Packet4d t0 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 0 + (2<<4));
+ Packet4d t1 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 1 + (3<<4));
+ return Packet2cd(_mm256_add_pd(t0,t1));
+}
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const Packet2cd& a)
+{
+ return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v,0)),
+ Packet1cd(_mm256_extractf128_pd(a.v,1))));
+}
+template<int Offset>
+struct palign_impl<Offset,Packet2cd>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2cd& first, const Packet2cd& second)
+ {
+ if (Offset==0) return;
+ palign_impl<Offset*2,Packet4d>::run(first.v, second.v);
+ }
+};
+template<> struct conj_helper<Packet2cd, Packet2cd, false,true>
+{
+ EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+template<> struct conj_helper<Packet2cd, Packet2cd, true,false>
+{
+ EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
+{
+ EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+template<> struct conj_helper<Packet4d, Packet2cd, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet4d& x, const Packet2cd& y, const Packet2cd& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet2cd pmul(const Packet4d& x, const Packet2cd& y) const
+ { return Packet2cd(Eigen::internal::pmul(x, y.v)); }
+};
+template<> struct conj_helper<Packet2cd, Packet4d, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet4d& y, const Packet2cd& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& x, const Packet4d& y) const
+ { return Packet2cd(Eigen::internal::pmul(x.v, y)); }
+};
+template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
+{
+ Packet2cd num = pmul(a, pconj(b));
+ __m256d tmp = _mm256_mul_pd(b.v, b.v);
+ __m256d denom = _mm256_hadd_pd(tmp, tmp);
+ return Packet2cd(_mm256_div_pd(num.v, denom));
+}
+template<> EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x)
+{
+ return Packet2cd(_mm256_shuffle_pd(x.v, x.v, 0x5));
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4cf,4>& kernel) {
+ __m256d P0 = _mm256_castps_pd(kernel.packet[0].v);
+ __m256d P1 = _mm256_castps_pd(kernel.packet[1].v);
+ __m256d P2 = _mm256_castps_pd(kernel.packet[2].v);
+ __m256d P3 = _mm256_castps_pd(kernel.packet[3].v);
+ __m256d T0 = _mm256_shuffle_pd(P0, P1, 15);
+ __m256d T1 = _mm256_shuffle_pd(P0, P1, 0);
+ __m256d T2 = _mm256_shuffle_pd(P2, P3, 15);
+ __m256d T3 = _mm256_shuffle_pd(P2, P3, 0);
+ kernel.packet[1].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 32));
+ kernel.packet[3].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 49));
+ kernel.packet[0].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 32));
+ kernel.packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49));
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2cd,2>& kernel) {
+ __m256d tmp = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 0+(2<<4));
+ kernel.packet[1].v = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 1+(3<<4));
+ kernel.packet[0].v = tmp;
+}
+template<> EIGEN_STRONG_INLINE Packet4cf pinsertfirst(const Packet4cf& a, std::complex<float> b)
+{
+ return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,1|2));
+}
+template<> EIGEN_STRONG_INLINE Packet2cd pinsertfirst(const Packet2cd& a, std::complex<double> b)
+{
+ return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,1|2));
+}
+template<> EIGEN_STRONG_INLINE Packet4cf pinsertlast(const Packet4cf& a, std::complex<float> b)
+{
+ return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,(1<<7)|(1<<6)));
+}
+template<> EIGEN_STRONG_INLINE Packet2cd pinsertlast(const Packet2cd& a, std::complex<double> b)
+{
+ return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,(1<<3)|(1<<2)));
+}
+}
+}
+#endif
+// end #include "src/Core/arch/AVX/Complex.h"
+// #include "src/Core/arch/AVX/TypeCasting.h"
+#ifndef EIGEN_TYPE_CASTING_AVX_H
+#define EIGEN_TYPE_CASTING_AVX_H
+namespace Eigen {
+namespace internal {
+template <>
+struct type_casting_traits<float, int> {
+ enum {
+ VectorizedCast = 0,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+template <>
+struct type_casting_traits<int, float> {
+ enum {
+ VectorizedCast = 0,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
+ return _mm256_cvtps_epi32(a);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) {
+ return _mm256_cvtepi32_ps(a);
+}
+}
+}
+#endif
+// end #include "src/Core/arch/AVX/TypeCasting.h"
+#elif defined EIGEN_VECTORIZE_SSE
+// #include "src/Core/arch/SSE/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_SSE_H
+#define EIGEN_PACKET_MATH_SSE_H
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
+#endif
+#ifdef __FMA__
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
+#endif
+#endif
+#if (defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)
+template<typename T>
+struct eigen_packet_wrapper
+{
+ EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
+ EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
+ m_val = v;
+ return *this;
+ }
+ T m_val;
+};
+typedef eigen_packet_wrapper<__m128> Packet4f;
+typedef eigen_packet_wrapper<__m128i> Packet4i;
+typedef eigen_packet_wrapper<__m128d> Packet2d;
+#else
+typedef __m128 Packet4f;
+typedef __m128i Packet4i;
+typedef __m128d Packet2d;
+#endif
+template<> struct is_arithmetic<__m128> { enum { value = true }; };
+template<> struct is_arithmetic<__m128i> { enum { value = true }; };
+template<> struct is_arithmetic<__m128d> { enum { value = true }; };
+#define vec4f_swizzle1(v,p,q,r,s) \
+ (_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), ((s)<<6|(r)<<4|(q)<<2|(p)))))
+#define vec4i_swizzle1(v,p,q,r,s) \
+ (_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p))))
+#define vec2d_swizzle1(v,p,q) \
+ (_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2)))))
+#define vec4f_swizzle2(a,b,p,q,r,s) \
+ (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
+#define vec4i_swizzle2(a,b,p,q,r,s) \
+ (_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p))))))
+#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+ const Packet4f p4f_##NAME = pset1<Packet4f>(X)
+#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+ const Packet2d p2d_##NAME = pset1<Packet2d>(X)
+#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+ const Packet4f p4f_##NAME = _mm_castsi128_ps(pset1<Packet4i>(X))
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+ const Packet4i p4i_##NAME = pset1<Packet4i>(X)
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet4f type;
+ typedef Packet4f half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasHalfPacket = 0,
+ HasDiv = 1,
+ HasSin = EIGEN_FAST_MATH,
+ HasCos = EIGEN_FAST_MATH,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasTanh = EIGEN_FAST_MATH,
+ HasBlend = 1
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ ,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+#endif
+ };
+};
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet2d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=2,
+ HasHalfPacket = 0,
+ HasDiv = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasBlend = 1
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ ,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+#endif
+ };
+};
+#endif
+template<> struct packet_traits<int> : default_packet_traits
+{
+ typedef Packet4i type;
+ typedef Packet4i half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasBlend = 1
+ };
+};
+template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct scalar_div_cost<float,true> { enum { value = 7 }; };
+template<> struct scalar_div_cost<double,true> { enum { value = 8 }; };
+#endif
+#if EIGEN_COMP_MSVC==1500
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps(from,from,from,from); }
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
+#else
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
+#endif
+#if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__)
+template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {
+ return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0);
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
+template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
+template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
+template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
+{
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
+ return _mm_xor_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
+{
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));
+ return _mm_xor_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
+{
+ return psub(Packet4i(_mm_setr_epi32(0,0,0,0)), a);
+}
+template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_mullo_epi32(a,b);
+#else
+ return vec4i_swizzle1(
+ vec4i_swizzle2(
+ _mm_mul_epu32(a,b),
+ _mm_mul_epu32(vec4i_swizzle1(a,1,0,3,2),
+ vec4i_swizzle1(b,1,0,3,2)),
+ 0,2,0,2),
+ 0,2,1,3);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_min_epi32(a,b);
+#else
+ Packet4i mask = _mm_cmplt_epi32(a,b);
+ return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_max_epi32(a,b);
+#else
+ Packet4i mask = _mm_cmpgt_epi32(a,b);
+ return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
+}
+#ifdef EIGEN_VECTORIZE_SSE4_1
+template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return _mm_round_ps(a, 0); }
+template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return _mm_round_pd(a, 0); }
+template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return _mm_ceil_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return _mm_ceil_pd(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return _mm_floor_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return _mm_floor_pd(a); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
+#if EIGEN_COMP_MSVC
+ template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ #if (EIGEN_COMP_MSVC==1600)
+ __m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from));
+ res = _mm_loadh_pi(res, (const __m64*)(from+2));
+ return res;
+ #else
+ return _mm_loadu_ps(from);
+ #endif
+ }
+#else
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return _mm_loadu_ps(from);
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return _mm_loadu_pd(from);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
+}
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
+{
+ return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))), 0, 0, 1, 1);
+}
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
+{ return pset1<Packet2d>(from[0]); }
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
+{
+ Packet4i tmp;
+ tmp = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(from));
+ return vec4i_swizzle1(tmp, 0, 0, 1, 1);
+}
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }
+template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
+{
+ return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
+{
+ return _mm_set_pd(from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
+{
+ return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+ }
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
+{
+ to[stride*0] = _mm_cvtss_f32(from);
+ to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
+ to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
+ to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
+{
+ to[stride*0] = _mm_cvtsd_f64(from);
+ to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
+{
+ to[stride*0] = _mm_cvtsi128_si32(from);
+ to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
+ to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
+ to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
+{
+ Packet4f pa = _mm_set_ss(a);
+ pstore(to, Packet4f(vec4f_swizzle1(pa,0,0,0,0)));
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& a)
+{
+ Packet2d pa = _mm_set_sd(a);
+ pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
+}
+#ifndef EIGEN_VECTORIZE_AVX
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+#endif
+#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
+#elif EIGEN_COMP_MSVC_STRICT
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
+#else
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
+{ return _mm_shuffle_ps(a,a,0x1B); }
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
+{ return _mm_shuffle_pd(a,a,0x1); }
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
+{ return _mm_shuffle_epi32(a,0x1B); }
+template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
+{
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
+ return _mm_and_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a)
+{
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
+ return _mm_and_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
+{
+ #ifdef EIGEN_VECTORIZE_SSSE3
+ return _mm_abs_epi32(a);
+ #else
+ Packet4i aux = _mm_srai_epi32(a,31);
+ return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);
+ #endif
+}
+#ifndef __AVX__
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4f>(const float *a,
+ Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+{
+ a3 = pload<Packet4f>(a);
+ a0 = vec4f_swizzle1(a3, 0,0,0,0);
+ a1 = vec4f_swizzle1(a3, 1,1,1,1);
+ a2 = vec4f_swizzle1(a3, 2,2,2,2);
+ a3 = vec4f_swizzle1(a3, 3,3,3,3);
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet2d>(const double *a,
+ Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
+{
+#ifdef EIGEN_VECTORIZE_SSE3
+ a0 = _mm_loaddup_pd(a+0);
+ a1 = _mm_loaddup_pd(a+1);
+ a2 = _mm_loaddup_pd(a+2);
+ a3 = _mm_loaddup_pd(a+3);
+#else
+ a1 = pload<Packet2d>(a);
+ a0 = vec2d_swizzle1(a1, 0,0);
+ a1 = vec2d_swizzle1(a1, 1,1);
+ a3 = pload<Packet2d>(a+2);
+ a2 = vec2d_swizzle1(a3, 0,0);
+ a3 = vec2d_swizzle1(a3, 1,1);
+#endif
+}
+#endif
+EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
+{
+ vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
+ vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA));
+ vecs[3] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xFF));
+ vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
+}
+#ifdef EIGEN_VECTORIZE_SSE3
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
+}
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ return _mm_hadd_pd(vecs[0], vecs[1]);
+}
+#else
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ Packet4f tmp0, tmp1, tmp2;
+ tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]);
+ tmp1 = _mm_unpackhi_ps(vecs[0], vecs[1]);
+ tmp2 = _mm_unpackhi_ps(vecs[2], vecs[3]);
+ tmp0 = _mm_add_ps(tmp0, tmp1);
+ tmp1 = _mm_unpacklo_ps(vecs[2], vecs[3]);
+ tmp1 = _mm_add_ps(tmp1, tmp2);
+ tmp2 = _mm_movehl_ps(tmp1, tmp0);
+ tmp0 = _mm_movelh_ps(tmp0, tmp1);
+ return _mm_add_ps(tmp0, tmp2);
+}
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
+}
+#endif
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
+}
+#ifdef EIGEN_VECTORIZE_SSSE3
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
+}
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i tmp0 = _mm_hadd_epi32(a,a);
+ return pfirst<Packet4i>(_mm_hadd_epi32(tmp0,tmp0));
+}
+#else
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
+ return pfirst(tmp) + pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1));
+}
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ Packet4i tmp0, tmp1, tmp2;
+ tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
+ tmp1 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
+ tmp2 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
+ tmp0 = _mm_add_epi32(tmp0, tmp1);
+ tmp1 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
+ tmp1 = _mm_add_epi32(tmp1, tmp2);
+ tmp2 = _mm_unpacklo_epi64(tmp0, tmp1);
+ tmp0 = _mm_unpackhi_epi64(tmp0, tmp1);
+ return _mm_add_epi32(tmp0, tmp2);
+}
+#endif
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
+{
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
+}
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+ return pfirst<Packet4i>(_mm_min_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
+ int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
+ return aux0<aux2 ? aux0 : aux2;
+#endif
+}
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+ return pfirst<Packet4i>(_mm_max_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
+ int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
+ return aux0>aux2 ? aux0 : aux2;
+#endif
+}
+#if EIGEN_COMP_GNUC
+#endif
+#ifdef EIGEN_VECTORIZE_SSSE3
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+ {
+ if (Offset!=0)
+ first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+ if (Offset!=0)
+ first = _mm_alignr_epi8(second,first, Offset*4);
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset==1)
+ first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
+ }
+};
+#else
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_move_ss(first,second);
+ first = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(first),0x39));
+ }
+ else if (Offset==2)
+ {
+ first = _mm_movehl_ps(first,first);
+ first = _mm_movelh_ps(first,second);
+ }
+ else if (Offset==3)
+ {
+ first = _mm_move_ss(first,second);
+ first = _mm_shuffle_ps(first,second,0x93);
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ first = _mm_shuffle_epi32(first,0x39);
+ }
+ else if (Offset==2)
+ {
+ first = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(first)));
+ first = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ }
+ else if (Offset==3)
+ {
+ first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ first = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second),0x93));
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(first),_mm_castpd_ps(first)));
+ first = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(first),_mm_castpd_ps(second)));
+ }
+ }
+};
+#endif
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+ _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+ __m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
+ kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
+ kernel.packet[1] = tmp;
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+ __m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
+ __m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
+ __m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
+ __m128i T3 = _mm_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
+ kernel.packet[0] = _mm_unpacklo_epi64(T0, T1);
+ kernel.packet[1] = _mm_unpackhi_epi64(T0, T1);
+ kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
+ kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m128i false_mask = _mm_cmpeq_epi32(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
+ const __m128 zero = _mm_setzero_ps();
+ const __m128 select = _mm_set_ps(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m128 false_mask = _mm_cmpeq_ps(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_ps(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
+ const __m128d zero = _mm_setzero_pd();
+ const __m128d select = _mm_set_pd(ifPacket.select[1], ifPacket.select[0]);
+ __m128d false_mask = _mm_cmpeq_pd(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_pd(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_ps(a,pset1<Packet4f>(b),1);
+#else
+ return _mm_move_ss(a, _mm_load_ss(&b));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_pd(a,pset1<Packet2d>(b),1);
+#else
+ return _mm_move_sd(a, _mm_load_sd(&b));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_ps(a,pset1<Packet4f>(b),(1<<3));
+#else
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF));
+ return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, pset1<Packet4f>(b)));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_pd(a,pset1<Packet2d>(b),(1<<1));
+#else
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF));
+ return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet2d>(b)));
+#endif
+}
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
+ return ::fmaf(a,b,c);
+}
+template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, const double& c) {
+ return ::fma(a,b,c);
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/SSE/PacketMath.h"
+// #include "src/Core/arch/SSE/MathFunctions.h"
+#ifndef EIGEN_MATH_FUNCTIONS_SSE_H
+#define EIGEN_MATH_FUNCTIONS_SSE_H
+namespace Eigen {
+namespace internal {
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f plog<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
+ Packet4i emm0;
+ Packet4f invalid_mask = _mm_cmpnge_ps(x, _mm_setzero_ps());
+ Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());
+ x = pmax(x, p4f_min_norm_pos);
+ emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
+ x = _mm_and_ps(x, p4f_inv_mant_mask);
+ x = _mm_or_ps(x, p4f_half);
+ emm0 = _mm_sub_epi32(emm0, p4i_0x7f);
+ Packet4f e = padd(Packet4f(_mm_cvtepi32_ps(emm0)), p4f_1);
+ Packet4f mask = _mm_cmplt_ps(x, p4f_cephes_SQRTHF);
+ Packet4f tmp = pand(x, mask);
+ x = psub(x, p4f_1);
+ e = psub(e, pand(p4f_1, mask));
+ x = padd(x, tmp);
+ Packet4f x2 = pmul(x,x);
+ Packet4f x3 = pmul(x2,x);
+ Packet4f y, y1, y2;
+ y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
+ y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
+ y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
+ y = pmadd(y , x, p4f_cephes_log_p2);
+ y1 = pmadd(y1, x, p4f_cephes_log_p5);
+ y2 = pmadd(y2, x, p4f_cephes_log_p8);
+ y = pmadd(y, x3, y1);
+ y = pmadd(y, x3, y2);
+ y = pmul(y, x3);
+ y1 = pmul(e, p4f_cephes_log_q1);
+ tmp = pmul(x2, p4f_half);
+ y = padd(y, y1);
+ x = psub(x, tmp);
+ y2 = pmul(e, p4f_cephes_log_q2);
+ x = padd(x, y);
+ x = padd(x, y2);
+ return _mm_or_ps(_mm_andnot_ps(iszero_mask, _mm_or_ps(x, invalid_mask)),
+ _mm_and_ps(iszero_mask, p4f_minus_inf));
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pexp<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+ _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
+ _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
+ Packet4f tmp, fx;
+ Packet4i emm0;
+ x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
+ fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ fx = _mm_floor_ps(fx);
+#else
+ emm0 = _mm_cvttps_epi32(fx);
+ tmp = _mm_cvtepi32_ps(emm0);
+ Packet4f mask = _mm_cmpgt_ps(tmp, fx);
+ mask = _mm_and_ps(mask, p4f_1);
+ fx = psub(tmp, mask);
+#endif
+ tmp = pmul(fx, p4f_cephes_exp_C1);
+ Packet4f z = pmul(fx, p4f_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+ z = pmul(x,x);
+ Packet4f y = p4f_cephes_exp_p0;
+ y = pmadd(y, x, p4f_cephes_exp_p1);
+ y = pmadd(y, x, p4f_cephes_exp_p2);
+ y = pmadd(y, x, p4f_cephes_exp_p3);
+ y = pmadd(y, x, p4f_cephes_exp_p4);
+ y = pmadd(y, x, p4f_cephes_exp_p5);
+ y = pmadd(y, z, x);
+ y = padd(y, p4f_1);
+ emm0 = _mm_cvttps_epi32(fx);
+ emm0 = _mm_add_epi32(emm0, p4i_0x7f);
+ emm0 = _mm_slli_epi32(emm0, 23);
+ return pmax(pmul(y, Packet4f(_mm_castsi128_ps(emm0))), _x);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d pexp<Packet2d>(const Packet2d& _x)
+{
+ Packet2d x = _x;
+ _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
+ _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
+ _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
+ _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
+ _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
+ static const __m128i p4i_1023_0 = _mm_setr_epi32(1023, 1023, 0, 0);
+ Packet2d tmp, fx;
+ Packet4i emm0;
+ x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
+ fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ fx = _mm_floor_pd(fx);
+#else
+ emm0 = _mm_cvttpd_epi32(fx);
+ tmp = _mm_cvtepi32_pd(emm0);
+ Packet2d mask = _mm_cmpgt_pd(tmp, fx);
+ mask = _mm_and_pd(mask, p2d_1);
+ fx = psub(tmp, mask);
+#endif
+ tmp = pmul(fx, p2d_cephes_exp_C1);
+ Packet2d z = pmul(fx, p2d_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+ Packet2d x2 = pmul(x,x);
+ Packet2d px = p2d_cephes_exp_p0;
+ px = pmadd(px, x2, p2d_cephes_exp_p1);
+ px = pmadd(px, x2, p2d_cephes_exp_p2);
+ px = pmul (px, x);
+ Packet2d qx = p2d_cephes_exp_q0;
+ qx = pmadd(qx, x2, p2d_cephes_exp_q1);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q2);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q3);
+ x = pdiv(px,psub(qx,px));
+ x = pmadd(p2d_2,x,p2d_1);
+ emm0 = _mm_cvttpd_epi32(fx);
+ emm0 = _mm_add_epi32(emm0, p4i_1023_0);
+ emm0 = _mm_slli_epi32(emm0, 20);
+ emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3));
+ return pmax(pmul(x, Packet2d(_mm_castsi128_pd(emm0))), _x);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psin<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(1, 1);
+ _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
+ _EIGEN_DECLARE_CONST_Packet4i(2, 2);
+ _EIGEN_DECLARE_CONST_Packet4i(4, 4);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f);
+ Packet4f xmm1, xmm2, xmm3, sign_bit, y;
+ Packet4i emm0, emm2;
+ sign_bit = x;
+ x = pabs(x);
+ sign_bit = _mm_and_ps(sign_bit, p4f_sign_mask);
+ y = pmul(x, p4f_cephes_FOPI);
+ emm2 = _mm_cvttps_epi32(y);
+ emm2 = _mm_add_epi32(emm2, p4i_1);
+ emm2 = _mm_and_si128(emm2, p4i_not1);
+ y = _mm_cvtepi32_ps(emm2);
+ emm0 = _mm_and_si128(emm2, p4i_4);
+ emm0 = _mm_slli_epi32(emm0, 29);
+ emm2 = _mm_and_si128(emm2, p4i_2);
+ emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+ Packet4f swap_sign_bit = _mm_castsi128_ps(emm0);
+ Packet4f poly_mask = _mm_castsi128_ps(emm2);
+ sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
+ xmm1 = pmul(y, p4f_minus_cephes_DP1);
+ xmm2 = pmul(y, p4f_minus_cephes_DP2);
+ xmm3 = pmul(y, p4f_minus_cephes_DP3);
+ x = padd(x, xmm1);
+ x = padd(x, xmm2);
+ x = padd(x, xmm3);
+ y = p4f_coscof_p0;
+ Packet4f z = _mm_mul_ps(x,x);
+ y = pmadd(y, z, p4f_coscof_p1);
+ y = pmadd(y, z, p4f_coscof_p2);
+ y = pmul(y, z);
+ y = pmul(y, z);
+ Packet4f tmp = pmul(z, p4f_half);
+ y = psub(y, tmp);
+ y = padd(y, p4f_1);
+ Packet4f y2 = p4f_sincof_p0;
+ y2 = pmadd(y2, z, p4f_sincof_p1);
+ y2 = pmadd(y2, z, p4f_sincof_p2);
+ y2 = pmul(y2, z);
+ y2 = pmul(y2, x);
+ y2 = padd(y2, x);
+ y2 = _mm_and_ps(poly_mask, y2);
+ y = _mm_andnot_ps(poly_mask, y);
+ y = _mm_or_ps(y,y2);
+ return _mm_xor_ps(y, sign_bit);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pcos<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(1, 1);
+ _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
+ _EIGEN_DECLARE_CONST_Packet4i(2, 2);
+ _EIGEN_DECLARE_CONST_Packet4i(4, 4);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f);
+ Packet4f xmm1, xmm2, xmm3, y;
+ Packet4i emm0, emm2;
+ x = pabs(x);
+ y = pmul(x, p4f_cephes_FOPI);
+ emm2 = _mm_cvttps_epi32(y);
+ emm2 = _mm_add_epi32(emm2, p4i_1);
+ emm2 = _mm_and_si128(emm2, p4i_not1);
+ y = _mm_cvtepi32_ps(emm2);
+ emm2 = _mm_sub_epi32(emm2, p4i_2);
+ emm0 = _mm_andnot_si128(emm2, p4i_4);
+ emm0 = _mm_slli_epi32(emm0, 29);
+ emm2 = _mm_and_si128(emm2, p4i_2);
+ emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+ Packet4f sign_bit = _mm_castsi128_ps(emm0);
+ Packet4f poly_mask = _mm_castsi128_ps(emm2);
+ xmm1 = pmul(y, p4f_minus_cephes_DP1);
+ xmm2 = pmul(y, p4f_minus_cephes_DP2);
+ xmm3 = pmul(y, p4f_minus_cephes_DP3);
+ x = padd(x, xmm1);
+ x = padd(x, xmm2);
+ x = padd(x, xmm3);
+ y = p4f_coscof_p0;
+ Packet4f z = pmul(x,x);
+ y = pmadd(y,z,p4f_coscof_p1);
+ y = pmadd(y,z,p4f_coscof_p2);
+ y = pmul(y, z);
+ y = pmul(y, z);
+ Packet4f tmp = _mm_mul_ps(z, p4f_half);
+ y = psub(y, tmp);
+ y = padd(y, p4f_1);
+ Packet4f y2 = p4f_sincof_p0;
+ y2 = pmadd(y2, z, p4f_sincof_p1);
+ y2 = pmadd(y2, z, p4f_sincof_p2);
+ y2 = pmul(y2, z);
+ y2 = pmadd(y2, x, x);
+ y2 = _mm_and_ps(poly_mask, y2);
+ y = _mm_andnot_ps(poly_mask, y);
+ y = _mm_or_ps(y,y2);
+ return _mm_xor_ps(y, sign_bit);
+}
+#if EIGEN_FAST_MATH
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psqrt<Packet4f>(const Packet4f& _x)
+{
+ Packet4f half = pmul(_x, pset1<Packet4f>(.5f));
+ Packet4f denormal_mask = _mm_and_ps(
+ _mm_cmpge_ps(_x, _mm_setzero_ps()),
+ _mm_cmplt_ps(_x, pset1<Packet4f>((std::numeric_limits<float>::min)())));
+ Packet4f x = _mm_rsqrt_ps(_x);
+ x = pmul(x, psub(pset1<Packet4f>(1.5f), pmul(half, pmul(x,x))));
+ return _mm_andnot_ps(denormal_mask, pmul(_x,x));
+}
+#else
+template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
+#endif
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
+#if EIGEN_FAST_MATH
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inf, 0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(nan, 0x7fc00000);
+ _EIGEN_DECLARE_CONST_Packet4f(one_point_five, 1.5f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5f);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(flt_min, 0x00800000);
+ Packet4f neg_half = pmul(_x, p4f_minus_half);
+ Packet4f le_zero_mask = _mm_cmple_ps(_x, p4f_flt_min);
+ Packet4f x = _mm_andnot_ps(le_zero_mask, _mm_rsqrt_ps(_x));
+ Packet4f neg_mask = _mm_cmplt_ps(_x, _mm_setzero_ps());
+ Packet4f zero_mask = _mm_andnot_ps(neg_mask, le_zero_mask);
+ Packet4f infs_and_nans = _mm_or_ps(_mm_and_ps(neg_mask, p4f_nan),
+ _mm_and_ps(zero_mask, p4f_inf));
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p4f_one_point_five));
+ return _mm_or_ps(x, infs_and_nans);
+}
+#else
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& x) {
+ return _mm_div_ps(pset1<Packet4f>(1.0f), _mm_sqrt_ps(x));
+}
+#endif
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d prsqrt<Packet2d>(const Packet2d& x) {
+ return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x));
+}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
+ptanh<Packet4f>(const Packet4f& x) {
+ return internal::generic_fast_tanh_float(x);
+}
+}
+namespace numext {
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float sqrt(const float &x)
+{
+ return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x))));
+}
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double sqrt(const double &x)
+{
+#if EIGEN_COMP_GNUC_STRICT
+ return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x))));
+#else
+ return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x))));
+#endif
+}
+}
+}
+#endif
+// end #include "src/Core/arch/SSE/MathFunctions.h"
+// #include "src/Core/arch/SSE/Complex.h"
+#ifndef EIGEN_COMPLEX_SSE_H
+#define EIGEN_COMPLEX_SSE_H
+namespace Eigen {
+namespace internal {
+struct Packet2cf
+{
+ EIGEN_STRONG_INLINE Packet2cf() {}
+ EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
+ __m128 v;
+};
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<std::complex<float> > : default_packet_traits
+{
+ typedef Packet2cf type;
+ typedef Packet2cf half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 2,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0,
+ HasBlend = 1
+ };
+};
+#endif
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
+template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a)
+{
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
+ return Packet2cf(_mm_xor_ps(a.v,mask));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
+{
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_xor_ps(a.v,mask));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
+ _mm_mul_ps(_mm_movehdup_ps(a.v),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
+ return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
+ _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
+ #endif
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&numext::real_ref(*from))); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&numext::real_ref(*from))); }
+template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
+{
+ Packet2cf res;
+#if EIGEN_GNUC_AT_MOST(4,2)
+ res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
+#elif EIGEN_GNUC_AT_LEAST(4,6)
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wuninitialized"
+ res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
+ #pragma GCC diagnostic pop
+#else
+ res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
+#endif
+ return Packet2cf(_mm_movelh_ps(res.v,res.v));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v)); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); }
+template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
+{
+ return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]),
+ std::imag(from[0*stride]), std::real(from[0*stride])));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
+{
+ to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
+ _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));
+ to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
+ _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
+}
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
+{
+ #if EIGEN_GNUC_AT_MOST(4,3)
+ EIGEN_ALIGN16 std::complex<float> res[2];
+ _mm_store_ps((float*)res, a.v);
+ return res[0];
+ #else
+ std::complex<float> res;
+ _mm_storel_pi((__m64*)&res, a.v);
+ return res;
+ #endif
+}
+template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(Packet2d(_mm_castps_pd(a.v))))); }
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
+{
+ return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
+{
+ return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v)));
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
+{
+ return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
+}
+template<int Offset>
+struct palign_impl<Offset,Packet2cf>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
+ {
+ if (Offset==1)
+ {
+ first.v = _mm_movehl_ps(first.v, first.v);
+ first.v = _mm_movelh_ps(first.v, second.v);
+ }
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(a, pconj(b));
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
+ _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(pconj(a), b);
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
+ _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return pconj(internal::pmul(a, b));
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
+ _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet4f, Packet2cf, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
+ { return Packet2cf(Eigen::internal::pmul<Packet4f>(x, y.v)); }
+};
+template<> struct conj_helper<Packet2cf, Packet4f, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
+ { return Packet2cf(Eigen::internal::pmul<Packet4f>(x.v, y)); }
+};
+template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
+ __m128 s = _mm_mul_ps(b.v,b.v);
+ return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
+}
+EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& x)
+{
+ return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
+}
+struct Packet1cd
+{
+ EIGEN_STRONG_INLINE Packet1cd() {}
+ EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}
+ __m128d v;
+};
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<std::complex<double> > : default_packet_traits
+{
+ typedef Packet1cd type;
+ typedef Packet1cd half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 0,
+ size = 1,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+#endif
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
+{
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_xor_pd(a.v,mask));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return Packet1cd(_mm_addsub_pd(_mm_mul_pd(_mm_movedup_pd(a.v), b.v),
+ _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0))));
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
+ return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
+ _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0)), mask)));
+ #endif
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
+{ return ploadu<Packet1cd>(&from); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
+{
+ EIGEN_ALIGN16 double res[2];
+ _mm_store_pd(res, a.v);
+ return std::complex<double>(res[0],res[1]);
+}
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
+{
+ return vecs[0];
+}
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+ static EIGEN_STRONG_INLINE void run(Packet1cd& , const Packet1cd& )
+ {
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(a, pconj(b));
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
+ _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0))));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(pconj(a), b);
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
+ _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0)), mask)));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return pconj(internal::pmul(a, b));
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
+ _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0))));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet2d, Packet1cd, false,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
+ { return Packet1cd(Eigen::internal::pmul<Packet2d>(x, y.v)); }
+};
+template<> struct conj_helper<Packet1cd, Packet2d, false,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
+ { return Packet1cd(Eigen::internal::pmul<Packet2d>(x.v, y)); }
+};
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+ __m128d s = _mm_mul_pd(b.v,b.v);
+ return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
+}
+EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd& x)
+{
+ return Packet1cd(preverse(Packet2d(x.v)));
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2cf,2>& kernel) {
+ __m128d w1 = _mm_castps_pd(kernel.packet[0].v);
+ __m128d w2 = _mm_castps_pd(kernel.packet[1].v);
+ __m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
+ kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
+ kernel.packet[1].v = tmp;
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
+ __m128d result = pblend<Packet2d>(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
+ return Packet2cf(_mm_castpd_ps(result));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pinsertfirst(const Packet2cf& a, std::complex<float> b)
+{
+ return Packet2cf(_mm_loadl_pi(a.v, reinterpret_cast<const __m64*>(&b)));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pinsertfirst(const Packet1cd&, std::complex<double> b)
+{
+ return pset1<Packet1cd>(b);
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pinsertlast(const Packet2cf& a, std::complex<float> b)
+{
+ return Packet2cf(_mm_loadh_pi(a.v, reinterpret_cast<const __m64*>(&b)));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pinsertlast(const Packet1cd&, std::complex<double> b)
+{
+ return pset1<Packet1cd>(b);
+}
+}
+}
+#endif
+// end #include "src/Core/arch/SSE/Complex.h"
+// #include "src/Core/arch/SSE/TypeCasting.h"
+#ifndef EIGEN_TYPE_CASTING_SSE_H
+#define EIGEN_TYPE_CASTING_SSE_H
+namespace Eigen {
+namespace internal {
+template <>
+struct type_casting_traits<float, int> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
+ return _mm_cvttps_epi32(a);
+}
+template <>
+struct type_casting_traits<int, float> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
+ return _mm_cvtepi32_ps(a);
+}
+template <>
+struct type_casting_traits<double, float> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 2,
+ TgtCoeffRatio = 1
+ };
+};
+template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
+ return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
+}
+template <>
+struct type_casting_traits<float, double> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 2
+ };
+};
+template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
+ return _mm_cvtps_pd(a);
+}
+}
+}
+#endif
+// end #include "src/Core/arch/SSE/TypeCasting.h"
+#elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
+// #include "src/Core/arch/AltiVec/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_ALTIVEC_H
+#define EIGEN_PACKET_MATH_ALTIVEC_H
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
+#endif
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
+#endif
+typedef __vector float Packet4f;
+typedef __vector int Packet4i;
+typedef __vector unsigned int Packet4ui;
+typedef __vector __bool int Packet4bi;
+typedef __vector short int Packet8i;
+typedef __vector unsigned char Packet16uc;
+#define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
+ Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(vec_splat_s32(X))
+#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
+ Packet4i p4i_##NAME = vec_splat_s32(X)
+#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+ Packet4f p4f_##NAME = pset1<Packet4f>(X)
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+ Packet4i p4i_##NAME = pset1<Packet4i>(X)
+#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+ Packet2d p2d_##NAME = pset1<Packet2d>(X)
+#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
+ Packet2l p2l_##NAME = pset1<Packet2l>(X)
+#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+ const Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(pset1<Packet4i>(X))
+#define DST_CHAN 1
+#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
+static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0);
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0);
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1);
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16);
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1);
+static Packet4f p4f_MZERO = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1);
+#ifndef __VSX__
+static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0);
+#endif
+static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
+static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
+static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 };
+static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
+#ifdef __PPC64__
+#define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0
+#else
+#define _EIGEN_MASK_ALIGNMENT 0xfffffff0
+#endif
+#define _EIGEN_ALIGNED_PTR(x) ((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
+#ifdef _BIG_ENDIAN
+static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0);
+#ifdef __VSX__
+static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+#endif
+static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);
+static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);
+static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8);
+#else
+static Packet16uc p16uc_FORWARD = p16uc_REVERSE32;
+static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);
+static Packet16uc p16uc_PSET32_WEVEN = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);
+static Packet16uc p16uc_HALF64_0_16 = vec_sld(vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 0), (Packet16uc)p4i_ZERO, 8);
+#endif
+static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN);
+static Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN);
+static Packet16uc p16uc_TRANSPOSE64_HI = p16uc_PSET64_HI + p16uc_HALF64_0_16;
+static Packet16uc p16uc_TRANSPOSE64_LO = p16uc_PSET64_LO + p16uc_HALF64_0_16;
+static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8);
+#ifdef _BIG_ENDIAN
+static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);
+#else
+static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_PSET64_HI, p16uc_PSET64_LO, 8);
+#endif
+#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
+ #define EIGEN_PPC_PREFETCH(ADDR) __builtin_prefetch(ADDR);
+#else
+ #define EIGEN_PPC_PREFETCH(ADDR) asm( " dcbt [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
+#endif
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet4f type;
+ typedef Packet4f half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasHalfPacket = 1,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasMin = 1,
+ HasMax = 1,
+ HasAbs = 1,
+ HasSin = 0,
+ HasCos = 0,
+ HasLog = 0,
+ HasExp = 1,
+#ifdef __VSX__
+ HasSqrt = 1,
+#if !EIGEN_COMP_CLANG
+ HasRsqrt = 1,
+#else
+ HasRsqrt = 0,
+#endif
+#else
+ HasSqrt = 0,
+ HasRsqrt = 0,
+#endif
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1,
+ HasNegate = 1,
+ HasBlend = 1
+ };
+};
+template<> struct packet_traits<int> : default_packet_traits
+{
+ typedef Packet4i type;
+ typedef Packet4i half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 4,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 0,
+ HasBlend = 1
+ };
+};
+template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
+inline std::ostream & operator <<(std::ostream & s, const Packet16uc & v)
+{
+ union {
+ Packet16uc v;
+ unsigned char n[16];
+ } vt;
+ vt.v = v;
+ for (int i=0; i< 16; i++)
+ s << (int)vt.n[i] << ", ";
+ return s;
+}
+inline std::ostream & operator <<(std::ostream & s, const Packet4f & v)
+{
+ union {
+ Packet4f v;
+ float n[4];
+ } vt;
+ vt.v = v;
+ s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
+ return s;
+}
+inline std::ostream & operator <<(std::ostream & s, const Packet4i & v)
+{
+ union {
+ Packet4i v;
+ int n[4];
+ } vt;
+ vt.v = v;
+ s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
+ return s;
+}
+inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
+{
+ union {
+ Packet4ui v;
+ unsigned int n[4];
+ } vt;
+ vt.v = v;
+ s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
+ return s;
+}
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+#ifdef __VSX__
+ return vec_vsx_ld(0, from);
+#else
+ return vec_ld(0, from);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+#ifdef __VSX__
+ return vec_vsx_ld(0, from);
+#else
+ return vec_ld(0, from);
+#endif
+}
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+#ifdef __VSX__
+ vec_vsx_st(from, 0, to);
+#else
+ vec_st(from, 0, to);
+#endif
+}
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+#ifdef __VSX__
+ vec_vsx_st(from, 0, to);
+#else
+ vec_st(from, 0, to);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
+ Packet4f v = {from, from, from, from};
+ return v;
+}
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
+ Packet4i v = {from, from, from, from};
+ return v;
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4f>(const float *a,
+ Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+{
+ a3 = pload<Packet4f>(a);
+ a0 = vec_splat(a3, 0);
+ a1 = vec_splat(a3, 1);
+ a2 = vec_splat(a3, 2);
+ a3 = vec_splat(a3, 3);
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4i>(const int *a,
+ Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
+{
+ a3 = pload<Packet4i>(a);
+ a0 = vec_splat(a3, 0);
+ a1 = vec_splat(a3, 1);
+ a2 = vec_splat(a3, 2);
+ a3 = vec_splat(a3, 3);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
+{
+ float EIGEN_ALIGN16 af[4];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ af[2] = from[2*stride];
+ af[3] = from[3*stride];
+ return pload<Packet4f>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
+{
+ int EIGEN_ALIGN16 ai[4];
+ ai[0] = from[0*stride];
+ ai[1] = from[1*stride];
+ ai[2] = from[2*stride];
+ ai[3] = from[3*stride];
+ return pload<Packet4i>(ai);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
+{
+ float EIGEN_ALIGN16 af[4];
+ pstore<float>(af, from);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+ to[2*stride] = af[2];
+ to[3*stride] = af[3];
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
+{
+ int EIGEN_ALIGN16 ai[4];
+ pstore<int>((int *)ai, from);
+ to[0*stride] = ai[0];
+ to[1*stride] = ai[1];
+ to[2*stride] = ai[2];
+ to[3*stride] = ai[3];
+}
+template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return pset1<Packet4f>(a) + p4f_COUNTDOWN; }
+template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return pset1<Packet4i>(a) + p4i_COUNTDOWN; }
+template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return a + b; }
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return a + b; }
+template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return a - b; }
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return a - b; }
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return p4f_ZERO - a; }
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return p4i_ZERO - a; }
+template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b, p4f_MZERO); }
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return a * b; }
+template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+#ifndef __VSX__
+ Packet4f t, y_0, y_1;
+ y_0 = vec_re(b);
+ t = vec_nmsub(y_0, b, p4f_ONE);
+ y_1 = vec_madd(y_0, t, y_0);
+ return vec_madd(a, y_1, p4f_MZERO);
+#else
+ return vec_div(a, b);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& , const Packet4i& )
+{ eigen_assert(false && "packet integer division are not supported by AltiVec");
+ return pset1<Packet4i>(0);
+}
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; }
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_or(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); }
+template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return vec_round(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return vec_ceil(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return vec_floor(a); }
+#ifdef _BIG_ENDIAN
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ Packet16uc MSQ, LSQ;
+ Packet16uc mask;
+ MSQ = vec_ld(0, (unsigned char *)from);
+ LSQ = vec_ld(15, (unsigned char *)from);
+ mask = vec_lvsl(0, from);
+ return (Packet4f) vec_perm(MSQ, LSQ, mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ Packet16uc MSQ, LSQ;
+ Packet16uc mask;
+ MSQ = vec_ld(0, (unsigned char *)from);
+ LSQ = vec_ld(15, (unsigned char *)from);
+ mask = vec_lvsl(0, from);
+ return (Packet4i) vec_perm(MSQ, LSQ, mask);
+}
+#else
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return (Packet4i) vec_vsx_ld((long)from & 15, (const int*) _EIGEN_ALIGNED_PTR(from));
+}
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return (Packet4f) vec_vsx_ld((long)from & 15, (const float*) _EIGEN_ALIGNED_PTR(from));
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
+{
+ Packet4f p;
+ if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet4f>(from);
+ else p = ploadu<Packet4f>(from);
+ return vec_perm(p, p, p16uc_DUPLICATE32_HI);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
+{
+ Packet4i p;
+ if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet4i>(from);
+ else p = ploadu<Packet4i>(from);
+ return vec_perm(p, p, p16uc_DUPLICATE32_HI);
+}
+#ifdef _BIG_ENDIAN
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
+{
+ EIGEN_DEBUG_UNALIGNED_STORE
+ Packet16uc MSQ, LSQ, edges;
+ Packet16uc edgeAlign, align;
+ MSQ = vec_ld(0, (unsigned char *)to);
+ LSQ = vec_ld(15, (unsigned char *)to);
+ edgeAlign = vec_lvsl(0, to);
+ edges=vec_perm(LSQ,MSQ,edgeAlign);
+ align = vec_lvsr( 0, to );
+ MSQ = vec_perm(edges,(Packet16uc)from,align);
+ LSQ = vec_perm((Packet16uc)from,edges,align);
+ vec_st( LSQ, 15, (unsigned char *)to );
+ vec_st( MSQ, 0, (unsigned char *)to );
+}
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from)
+{
+ EIGEN_DEBUG_UNALIGNED_STORE
+ Packet16uc MSQ, LSQ, edges;
+ Packet16uc edgeAlign, align;
+ MSQ = vec_ld(0, (unsigned char *)to);
+ LSQ = vec_ld(15, (unsigned char *)to);
+ edgeAlign = vec_lvsl(0, to);
+ edges=vec_perm(LSQ, MSQ, edgeAlign);
+ align = vec_lvsr( 0, to );
+ MSQ = vec_perm(edges, (Packet16uc) from, align);
+ LSQ = vec_perm((Packet16uc) from, edges, align);
+ vec_st( LSQ, 15, (unsigned char *)to );
+ vec_st( MSQ, 0, (unsigned char *)to );
+}
+#else
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+ vec_vsx_st(from, (long)to & 15, (int*) _EIGEN_ALIGNED_PTR(to));
+}
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+ vec_vsx_st(from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to));
+}
+#endif
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_PPC_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_PPC_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x; vec_ste(a, 0, &x); return x; }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x; vec_ste(a, 0, &x); return x; }
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
+{
+ return reinterpret_cast<Packet4f>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));
+}
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
+{
+ return reinterpret_cast<Packet4i>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32)); }
+template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); }
+template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+ Packet4f b, sum;
+ b = vec_sld(a, a, 8);
+ sum = a + b;
+ b = vec_sld(sum, sum, 4);
+ sum += b;
+ return pfirst(sum);
+}
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ Packet4f v[4], sum[4];
+ v[0] = vec_mergeh(vecs[0], vecs[2]);
+ v[1] = vec_mergel(vecs[0], vecs[2]);
+ v[2] = vec_mergeh(vecs[1], vecs[3]);
+ v[3] = vec_mergel(vecs[1], vecs[3]);
+ sum[0] = vec_mergeh(v[0], v[2]);
+ sum[1] = vec_mergel(v[0], v[2]);
+ sum[2] = vec_mergeh(v[1], v[3]);
+ sum[3] = vec_mergel(v[1], v[3]);
+ sum[0] = sum[0] + sum[1];
+ sum[1] = sum[2] + sum[3];
+ sum[0] = sum[0] + sum[1];
+ return sum[0];
+}
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i sum;
+ sum = vec_sums(a, p4i_ZERO);
+#ifdef _BIG_ENDIAN
+ sum = vec_sld(sum, p4i_ZERO, 12);
+#else
+ sum = vec_sld(p4i_ZERO, sum, 4);
+#endif
+ return pfirst(sum);
+}
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ Packet4i v[4], sum[4];
+ v[0] = vec_mergeh(vecs[0], vecs[2]);
+ v[1] = vec_mergel(vecs[0], vecs[2]);
+ v[2] = vec_mergeh(vecs[1], vecs[3]);
+ v[3] = vec_mergel(vecs[1], vecs[3]);
+ sum[0] = vec_mergeh(v[0], v[2]);
+ sum[1] = vec_mergel(v[0], v[2]);
+ sum[2] = vec_mergeh(v[1], v[3]);
+ sum[3] = vec_mergel(v[1], v[3]);
+ sum[0] = sum[0] + sum[1];
+ sum[1] = sum[2] + sum[3];
+ sum[0] = sum[0] + sum[1];
+ return sum[0];
+}
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
+{
+ Packet4f prod;
+ prod = pmul(a, vec_sld(a, a, 8));
+ return pfirst(pmul(prod, vec_sld(prod, prod, 4)));
+}
+template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
+{
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ return aux[0] * aux[1] * aux[2] * aux[3];
+}
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+ Packet4f b, res;
+ b = vec_min(a, vec_sld(a, a, 8));
+ res = vec_min(b, vec_sld(b, b, 4));
+ return pfirst(res);
+}
+template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
+{
+ Packet4i b, res;
+ b = vec_min(a, vec_sld(a, a, 8));
+ res = vec_min(b, vec_sld(b, b, 4));
+ return pfirst(res);
+}
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+ Packet4f b, res;
+ b = vec_max(a, vec_sld(a, a, 8));
+ res = vec_max(b, vec_sld(b, b, 4));
+ return pfirst(res);
+}
+template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
+{
+ Packet4i b, res;
+ b = vec_max(a, vec_sld(a, a, 8));
+ res = vec_max(b, vec_sld(b, b, 4));
+ return pfirst(res);
+}
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+ {
+#ifdef _BIG_ENDIAN
+ switch (Offset % 4) {
+ case 1:
+ first = vec_sld(first, second, 4); break;
+ case 2:
+ first = vec_sld(first, second, 8); break;
+ case 3:
+ first = vec_sld(first, second, 12); break;
+ }
+#else
+ switch (Offset % 4) {
+ case 1:
+ first = vec_sld(second, first, 12); break;
+ case 2:
+ first = vec_sld(second, first, 8); break;
+ case 3:
+ first = vec_sld(second, first, 4); break;
+ }
+#endif
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+#ifdef _BIG_ENDIAN
+ switch (Offset % 4) {
+ case 1:
+ first = vec_sld(first, second, 4); break;
+ case 2:
+ first = vec_sld(first, second, 8); break;
+ case 3:
+ first = vec_sld(first, second, 12); break;
+ }
+#else
+ switch (Offset % 4) {
+ case 1:
+ first = vec_sld(second, first, 12); break;
+ case 2:
+ first = vec_sld(second, first, 8); break;
+ case 3:
+ first = vec_sld(second, first, 4); break;
+ }
+#endif
+ }
+};
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+ Packet4f t0, t1, t2, t3;
+ t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
+ t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
+ t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
+ t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
+ kernel.packet[0] = vec_mergeh(t0, t2);
+ kernel.packet[1] = vec_mergel(t0, t2);
+ kernel.packet[2] = vec_mergeh(t1, t3);
+ kernel.packet[3] = vec_mergel(t1, t3);
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+ Packet4i t0, t1, t2, t3;
+ t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
+ t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
+ t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
+ t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
+ kernel.packet[0] = vec_mergeh(t0, t2);
+ kernel.packet[1] = vec_mergel(t0, t2);
+ kernel.packet[2] = vec_mergeh(t1, t3);
+ kernel.packet[3] = vec_mergel(t1, t3);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
+ Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
+ Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE)));
+ return vec_sel(elsePacket, thenPacket, mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
+ Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
+ Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE)));
+ return vec_sel(elsePacket, thenPacket, mask);
+}
+#ifdef __VSX__
+typedef __vector double Packet2d;
+typedef __vector unsigned long long Packet2ul;
+typedef __vector long long Packet2l;
+#if EIGEN_COMP_CLANG
+typedef Packet2ul Packet2bl;
+#else
+typedef __vector __bool long Packet2bl;
+#endif
+static Packet2l p2l_ONE = { 1, 1 };
+static Packet2l p2l_ZERO = reinterpret_cast<Packet2l>(p4i_ZERO);
+static Packet2d p2d_ONE = { 1.0, 1.0 };
+static Packet2d p2d_ZERO = reinterpret_cast<Packet2d>(p4f_ZERO);
+static Packet2d p2d_MZERO = { -0.0, -0.0 };
+#ifdef _BIG_ENDIAN
+static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ZERO), reinterpret_cast<Packet4f>(p2d_ONE), 8));
+#else
+static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ONE), reinterpret_cast<Packet4f>(p2d_ZERO), 8));
+#endif
+template<int index> Packet2d vec_splat_dbl(Packet2d& a);
+template<> EIGEN_STRONG_INLINE Packet2d vec_splat_dbl<0>(Packet2d& a)
+{
+ return reinterpret_cast<Packet2d>(vec_perm(a, a, p16uc_PSET64_HI));
+}
+template<> EIGEN_STRONG_INLINE Packet2d vec_splat_dbl<1>(Packet2d& a)
+{
+ return reinterpret_cast<Packet2d>(vec_perm(a, a, p16uc_PSET64_LO));
+}
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet2d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=2,
+ HasHalfPacket = 1,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasMin = 1,
+ HasMax = 1,
+ HasAbs = 1,
+ HasSin = 0,
+ HasCos = 0,
+ HasLog = 0,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1,
+ HasNegate = 1,
+ HasBlend = 1
+ };
+};
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
+inline std::ostream & operator <<(std::ostream & s, const Packet2l & v)
+{
+ union {
+ Packet2l v;
+ int64_t n[2];
+ } vt;
+ vt.v = v;
+ s << vt.n[0] << ", " << vt.n[1];
+ return s;
+}
+inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
+{
+ union {
+ Packet2d v;
+ double n[2];
+ } vt;
+ vt.v = v;
+ s << vt.n[0] << ", " << vt.n[1];
+ return s;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+#ifdef __VSX__
+ return vec_vsx_ld(0, from);
+#else
+ return vec_ld(0, from);
+#endif
+}
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+#ifdef __VSX__
+ vec_vsx_st(from, 0, to);
+#else
+ vec_st(from, 0, to);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
+ Packet2d v = {from, from};
+ return v;
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet2d>(const double *a,
+ Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
+{
+ a1 = pload<Packet2d>(a);
+ a0 = vec_splat_dbl<0>(a1);
+ a1 = vec_splat_dbl<1>(a1);
+ a3 = pload<Packet2d>(a+2);
+ a2 = vec_splat_dbl<0>(a3);
+ a3 = vec_splat_dbl<1>(a3);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
+{
+ double EIGEN_ALIGN16 af[2];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ return pload<Packet2d>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
+{
+ double EIGEN_ALIGN16 af[2];
+ pstore<double>(af, from);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+}
+template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return pset1<Packet2d>(a) + p2d_COUNTDOWN; }
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return a + b; }
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return a - b; }
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return p2d_ZERO - a; }
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_madd(a,b,p2d_MZERO); }
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_div(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
+template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ return (Packet2d) vec_vsx_ld((long)from & 15, (const double*) _EIGEN_ALIGNED_PTR(from));
+}
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
+{
+ Packet2d p;
+ if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet2d>(from);
+ else p = ploadu<Packet2d>(from);
+ return vec_splat_dbl<0>(p);
+}
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+ vec_vsx_st((Packet4f)from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to));
+}
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_PPC_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore<double>(x, a); return x[0]; }
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
+{
+ return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
+}
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); }
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
+{
+ Packet2d b, sum;
+ b = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(a), reinterpret_cast<Packet4f>(a), 8));
+ sum = a + b;
+ return pfirst<Packet2d>(sum);
+}
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ Packet2d v[2], sum;
+ v[0] = vecs[0] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[0]), reinterpret_cast<Packet4f>(vecs[0]), 8));
+ v[1] = vecs[1] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[1]), reinterpret_cast<Packet4f>(vecs[1]), 8));
+#ifdef _BIG_ENDIAN
+ sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[0]), reinterpret_cast<Packet4f>(v[1]), 8));
+#else
+ sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[1]), reinterpret_cast<Packet4f>(v[0]), 8));
+#endif
+ return sum;
+}
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
+{
+ return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
+}
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
+{
+ return pfirst(pmin(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
+}
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
+{
+ return pfirst(pmax(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
+}
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset == 1)
+#ifdef _BIG_ENDIAN
+ first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(first), reinterpret_cast<Packet4ui>(second), 8));
+#else
+ first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(second), reinterpret_cast<Packet4ui>(first), 8));
+#endif
+ }
+};
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+ Packet2d t0, t1;
+ t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI);
+ t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO);
+ kernel.packet[0] = t0;
+ kernel.packet[1] = t1;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
+ Packet2l select = { ifPacket.select[0], ifPacket.select[1] };
+ Packet2bl mask = vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE));
+ return vec_sel(elsePacket, thenPacket, mask);
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/AltiVec/PacketMath.h"
+// #include "src/Core/arch/AltiVec/MathFunctions.h"
+#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+namespace Eigen {
+namespace internal {
+static _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+static _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+static _EIGEN_DECLARE_CONST_Packet4i(23, 23);
+static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
+static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
+static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000);
+static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan, 0xffffffff);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
+static _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
+static _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
+#ifdef __VSX__
+static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
+static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
+static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
+static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
+static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
+#ifdef __POWER8_VECTOR__
+static Packet2l p2l_1023 = { 1023, 1023 };
+static Packet2ul p2ul_52 = { 52, 52 };
+#endif
+#endif
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f plog<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ Packet4i emm0;
+ Packet4ui isvalid_mask = reinterpret_cast<Packet4ui>(vec_cmpge(x, p4f_ZERO));
+ Packet4ui iszero_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(x, p4f_ZERO));
+ x = pmax(x, p4f_min_norm_pos);
+ emm0 = vec_sr(reinterpret_cast<Packet4i>(x),
+ reinterpret_cast<Packet4ui>(p4i_23));
+ x = pand(x, p4f_inv_mant_mask);
+ x = por(x, p4f_half);
+ emm0 = psub(emm0, p4i_0x7f);
+ Packet4f e = padd(vec_ctf(emm0, 0), p4f_1);
+ Packet4f mask = reinterpret_cast<Packet4f>(vec_cmplt(x, p4f_cephes_SQRTHF));
+ Packet4f tmp = pand(x, mask);
+ x = psub(x, p4f_1);
+ e = psub(e, pand(p4f_1, mask));
+ x = padd(x, tmp);
+ Packet4f x2 = pmul(x,x);
+ Packet4f x3 = pmul(x2,x);
+ Packet4f y, y1, y2;
+ y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
+ y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
+ y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
+ y = pmadd(y , x, p4f_cephes_log_p2);
+ y1 = pmadd(y1, x, p4f_cephes_log_p5);
+ y2 = pmadd(y2, x, p4f_cephes_log_p8);
+ y = pmadd(y, x3, y1);
+ y = pmadd(y, x3, y2);
+ y = pmul(y, x3);
+ y1 = pmul(e, p4f_cephes_log_q1);
+ tmp = pmul(x2, p4f_half);
+ y = padd(y, y1);
+ x = psub(x, tmp);
+ y2 = pmul(e, p4f_cephes_log_q2);
+ x = padd(x, y);
+ x = padd(x, y2);
+ x = vec_sel(x, p4f_minus_inf, iszero_mask);
+ x = vec_sel(p4f_minus_nan, x, isvalid_mask);
+ return x;
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pexp<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ Packet4f tmp, fx;
+ Packet4i emm0;
+ x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
+ fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
+ fx = pfloor(fx);
+ tmp = pmul(fx, p4f_cephes_exp_C1);
+ Packet4f z = pmul(fx, p4f_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+ z = pmul(x,x);
+ Packet4f y = p4f_cephes_exp_p0;
+ y = pmadd(y, x, p4f_cephes_exp_p1);
+ y = pmadd(y, x, p4f_cephes_exp_p2);
+ y = pmadd(y, x, p4f_cephes_exp_p3);
+ y = pmadd(y, x, p4f_cephes_exp_p4);
+ y = pmadd(y, x, p4f_cephes_exp_p5);
+ y = pmadd(y, z, x);
+ y = padd(y, p4f_1);
+ emm0 = vec_cts(fx, 0);
+ emm0 = vec_add(emm0, p4i_0x7f);
+ emm0 = vec_sl(emm0, reinterpret_cast<Packet4ui>(p4i_23));
+ Packet4ui isnumber_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(_x, _x));
+ return vec_sel(_x, pmax(pmul(y, reinterpret_cast<Packet4f>(emm0)), _x),
+ isnumber_mask);
+}
+#ifndef EIGEN_COMP_CLANG
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& x)
+{
+ return vec_rsqrt(x);
+}
+#endif
+#ifdef __VSX__
+#ifndef EIGEN_COMP_CLANG
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d prsqrt<Packet2d>(const Packet2d& x)
+{
+ return vec_rsqrt(x);
+}
+#endif
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psqrt<Packet4f>(const Packet4f& x)
+{
+ return vec_sqrt(x);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d psqrt<Packet2d>(const Packet2d& x)
+{
+ return vec_sqrt(x);
+}
+static inline Packet2l ConvertToPacket2l(const Packet2d& x) {
+#if EIGEN_GNUC_AT_LEAST(5, 4) || \
+ (EIGEN_GNUC_AT(6, 1) && __GNUC_PATCHLEVEL__ >= 1)
+ return vec_cts(x, 0);
+#else
+ double tmp[2];
+ memcpy(tmp, &x, sizeof(tmp));
+ Packet2l l = { static_cast<long long>(tmp[0]),
+ static_cast<long long>(tmp[1]) };
+ return l;
+#endif
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d pexp<Packet2d>(const Packet2d& _x)
+{
+ Packet2d x = _x;
+ Packet2d tmp, fx;
+ Packet2l emm0;
+ x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
+ fx = pmadd(x, p2d_cephes_LOG2EF, p2d_half);
+ fx = pfloor(fx);
+ tmp = pmul(fx, p2d_cephes_exp_C1);
+ Packet2d z = pmul(fx, p2d_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+ Packet2d x2 = pmul(x,x);
+ Packet2d px = p2d_cephes_exp_p0;
+ px = pmadd(px, x2, p2d_cephes_exp_p1);
+ px = pmadd(px, x2, p2d_cephes_exp_p2);
+ px = pmul (px, x);
+ Packet2d qx = p2d_cephes_exp_q0;
+ qx = pmadd(qx, x2, p2d_cephes_exp_q1);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q2);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q3);
+ x = pdiv(px,psub(qx,px));
+ x = pmadd(p2d_2,x,p2d_1);
+ emm0 = ConvertToPacket2l(fx);
+#ifdef __POWER8_VECTOR__
+ emm0 = vec_add(emm0, p2l_1023);
+ emm0 = vec_sl(emm0, p2ul_52);
+#else
+ _EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
+ _EIGEN_DECLARE_CONST_Packet4i(20, 20);
+ Packet4i emm04i = reinterpret_cast<Packet4i>(emm0);
+ emm04i = vec_add(emm04i, p4i_1023);
+ emm04i = vec_sl(emm04i, reinterpret_cast<Packet4ui>(p4i_20));
+ static const Packet16uc perm = {
+ 0x14, 0x15, 0x16, 0x17, 0x00, 0x01, 0x02, 0x03,
+ 0x1c, 0x1d, 0x1e, 0x1f, 0x08, 0x09, 0x0a, 0x0b };
+#ifdef _BIG_ENDIAN
+ emm0 = reinterpret_cast<Packet2l>(vec_perm(p4i_ZERO, emm04i, perm));
+#else
+ emm0 = reinterpret_cast<Packet2l>(vec_perm(emm04i, p4i_ZERO, perm));
+#endif
+#endif
+ Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x));
+ return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x),
+ isnumber_mask);
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/AltiVec/MathFunctions.h"
+// #include "src/Core/arch/AltiVec/Complex.h"
+#ifndef EIGEN_COMPLEX32_ALTIVEC_H
+#define EIGEN_COMPLEX32_ALTIVEC_H
+namespace Eigen {
+namespace internal {
+static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);
+#ifdef __VSX__
+#if defined(_BIG_ENDIAN)
+static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);
+static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);
+#else
+static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);
+static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);
+#endif
+#endif
+struct Packet2cf
+{
+ EIGEN_STRONG_INLINE explicit Packet2cf() : v(p4f_ZERO) {}
+ EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
+ Packet4f v;
+};
+template<> struct packet_traits<std::complex<float> > : default_packet_traits
+{
+ typedef Packet2cf type;
+ typedef Packet2cf half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 2,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+#ifdef __VSX__
+ HasBlend = 1,
+#endif
+ HasSetLinear = 0
+ };
+};
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
+template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
+{
+ Packet2cf res;
+ if((std::ptrdiff_t(&from) % 16) == 0)
+ res.v = pload<Packet4f>((const float *)&from);
+ else
+ res.v = ploadu<Packet4f>((const float *)&from);
+ res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { return Packet2cf(pload<Packet4f>((const float *) from)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { return Packet2cf(ploadu<Packet4f>((const float*) from)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstore((float*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstoreu((float*)to, from.v); }
+template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
+{
+ std::complex<float> EIGEN_ALIGN16 af[2];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ return pload<Packet2cf>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
+{
+ std::complex<float> EIGEN_ALIGN16 af[2];
+ pstore<std::complex<float> >((std::complex<float> *) af, from);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+}
+template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v + b.v); }
+template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v - b.v); }
+template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor<Packet4f>(a.v, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR))); }
+template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ Packet4f v1, v2;
+ v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD);
+ v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);
+ v1 = vec_madd(v1, b.v, p4f_ZERO);
+ v2 = vec_madd(v2, b.v, p4f_ZERO);
+ v2 = reinterpret_cast<Packet4f>(pxor(v2, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR)));
+ v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);
+ return Packet2cf(padd<Packet4f>(v1, v2));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v, b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v, b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v, b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v, b.v)); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_PPC_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
+{
+ std::complex<float> EIGEN_ALIGN16 res[2];
+ pstore((float *)&res, a.v);
+ return res[0];
+}
+template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
+{
+ Packet4f rev_a;
+ rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2);
+ return Packet2cf(rev_a);
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
+{
+ Packet4f b;
+ b = vec_sld(a.v, a.v, 8);
+ b = padd<Packet4f>(a.v, b);
+ return pfirst<Packet2cf>(Packet2cf(b));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
+{
+ Packet4f b1, b2;
+#ifdef _BIG_ENDIAN
+ b1 = vec_sld(vecs[0].v, vecs[1].v, 8);
+ b2 = vec_sld(vecs[1].v, vecs[0].v, 8);
+#else
+ b1 = vec_sld(vecs[1].v, vecs[0].v, 8);
+ b2 = vec_sld(vecs[0].v, vecs[1].v, 8);
+#endif
+ b2 = vec_sld(b2, b2, 8);
+ b2 = padd<Packet4f>(b1, b2);
+ return Packet2cf(b2);
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
+{
+ Packet4f b;
+ Packet2cf prod;
+ b = vec_sld(a.v, a.v, 8);
+ prod = pmul<Packet2cf>(a, Packet2cf(b));
+ return pfirst<Packet2cf>(prod);
+}
+template<int Offset>
+struct palign_impl<Offset,Packet2cf>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
+ {
+ if (Offset==1)
+ {
+#ifdef _BIG_ENDIAN
+ first.v = vec_sld(first.v, second.v, 8);
+#else
+ first.v = vec_sld(second.v, first.v, 8);
+#endif
+ }
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+template<> struct conj_helper<Packet4f, Packet2cf, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
+ { return Packet2cf(internal::pmul<Packet4f>(x, y.v)); }
+};
+template<> struct conj_helper<Packet2cf, Packet4f, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
+ { return Packet2cf(internal::pmul<Packet4f>(x.v, y)); }
+};
+template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a, b);
+ Packet4f s = pmul<Packet4f>(b.v, b.v);
+ return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
+{
+ return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX32_REV));
+}
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
+{
+ Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
+ kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
+ kernel.packet[0].v = tmp;
+}
+#ifdef __VSX__
+template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
+ Packet2cf result;
+ result.v = reinterpret_cast<Packet4f>(pblend<Packet2d>(ifPacket, reinterpret_cast<Packet2d>(thenPacket.v), reinterpret_cast<Packet2d>(elsePacket.v)));
+ return result;
+}
+#endif
+#ifdef __VSX__
+struct Packet1cd
+{
+ EIGEN_STRONG_INLINE Packet1cd() {}
+ EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
+ Packet2d v;
+};
+template<> struct packet_traits<std::complex<double> > : default_packet_traits
+{
+ typedef Packet1cd type;
+ typedef Packet1cd half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 0,
+ size = 1,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
+template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstore((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstoreu((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
+{ return ploadu<Packet1cd>(&from); }
+template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
+{
+ std::complex<double> EIGEN_ALIGN16 af[2];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ return pload<Packet1cd>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
+{
+ std::complex<double> EIGEN_ALIGN16 af[2];
+ pstore<std::complex<double> >(af, from);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+}
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR2))); }
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet2d a_re, a_im, v1, v2;
+ a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
+ a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
+ v1 = vec_madd(a_re, b.v, p2d_ZERO);
+ v2 = vec_madd(a_im, b.v, p2d_ZERO);
+ v2 = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v2), reinterpret_cast<Packet4ui>(v2), 8));
+ v2 = pxor(v2, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR1));
+ return Packet1cd(padd<Packet2d>(v1, v2));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pand(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(por(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pxor(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pandnot(a.v, b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_PPC_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
+{
+ std::complex<double> EIGEN_ALIGN16 res[2];
+ pstore<std::complex<double> >(res, a);
+ return res[0];
+}
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+ static EIGEN_STRONG_INLINE void run(Packet1cd& , const Packet1cd& )
+ {
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+template<> struct conj_helper<Packet2d, Packet1cd, false,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
+ { return Packet1cd(internal::pmul<Packet2d>(x, y.v)); }
+};
+template<> struct conj_helper<Packet1cd, Packet2d, false,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
+ { return Packet1cd(internal::pmul<Packet2d>(x.v, y)); }
+};
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+ Packet2d s = pmul<Packet2d>(b.v, b.v);
+ return Packet1cd(pdiv(res.v, padd<Packet2d>(s, vec_perm(s, s, p16uc_REVERSE64))));
+}
+EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd& x)
+{
+ return Packet1cd(preverse(Packet2d(x.v)));
+}
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
+{
+ Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
+ kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
+ kernel.packet[0].v = tmp;
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/AltiVec/Complex.h"
+#elif defined EIGEN_VECTORIZE_NEON
+// #include "src/Core/arch/NEON/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_NEON_H
+#define EIGEN_PACKET_MATH_NEON_H
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#if EIGEN_ARCH_ARM64
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
+#else
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
+#endif
+#endif
+typedef float32x2_t Packet2f;
+typedef float32x4_t Packet4f;
+typedef int32x4_t Packet4i;
+typedef int32x2_t Packet2i;
+typedef uint32x4_t Packet4ui;
+#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+ const Packet4f p4f_##NAME = pset1<Packet4f>(X)
+#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+ const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int32_t>(X))
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+ const Packet4i p4i_##NAME = pset1<Packet4i>(X)
+#if EIGEN_ARCH_ARM64
+ #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__("prfm pldl1keep, [%[addr]]\n" ::[addr] "r"(ADDR) : );
+#elif EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
+ #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
+#elif defined __pld
+ #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
+#elif EIGEN_ARCH_ARM32
+ #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ("pld [%[addr]]\n" :: [addr] "r" (ADDR) : );
+#else
+ #define EIGEN_ARM_PREFETCH(ADDR)
+#endif
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet4f type;
+ typedef Packet4f half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 4,
+ HasHalfPacket=0,
+ HasDiv = 1,
+ HasSin = 0,
+ HasCos = 0,
+ HasLog = 0,
+ HasExp = 1,
+ HasSqrt = 0
+ };
+};
+template<> struct packet_traits<int32_t> : default_packet_traits
+{
+ typedef Packet4i type;
+ typedef Packet4i half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasHalfPacket=0
+ };
+};
+#if EIGEN_GNUC_AT_MOST(4,4) && !EIGEN_COMP_LLVM
+EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
+EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
+EIGEN_STRONG_INLINE float32x2_t vld1_dup_f32 (const float* x) { return ::vld1_dup_f32 ((const float32_t*)x); }
+EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q_f32((float32_t*)to,from); }
+EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
+#endif
+template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet4i> { typedef int32_t type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) { return vdupq_n_s32(from); }
+template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)
+{
+ const float f[] = {0, 1, 2, 3};
+ Packet4f countdown = vld1q_f32(f);
+ return vaddq_f32(pset1<Packet4f>(a), countdown);
+}
+template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int32_t& a)
+{
+ const int32_t i[] = {0, 1, 2, 3};
+ Packet4i countdown = vld1q_s32(i);
+ return vaddq_s32(pset1<Packet4i>(a), countdown);
+}
+template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return vnegq_f32(a); }
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return vnegq_s32(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+#if EIGEN_ARCH_ARM64
+ return vdivq_f32(a,b);
+#else
+ Packet4f inv, restep, div;
+ inv = vrecpeq_f32(b);
+ restep = vrecpsq_f32(b, inv);
+ inv = vmulq_f32(restep, inv);
+ div = vmulq_f32(a, inv);
+ return div;
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& , const Packet4i& )
+{ eigen_assert(false && "packet integer division are not supported by NEON");
+ return pset1<Packet4i>(0);
+}
+#if (defined __ARM_FEATURE_FMA) && !(EIGEN_COMP_CLANG && EIGEN_ARCH_ARM)
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c,a,b); }
+#else
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
+#if EIGEN_COMP_CLANG && EIGEN_ARCH_ARM
+ Packet4f r = c;
+ asm volatile(
+ "vmla.f32 %q[r], %q[a], %q[b]"
+ : [r] "+w" (r)
+ : [a] "w" (a),
+ [b] "w" (b)
+ : );
+ return r;
+#else
+ return vmlaq_f32(c,a,b);
+#endif
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return vmlaq_s32(c,a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int32_t* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); }
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int32_t* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); }
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
+{
+ float32x2_t lo, hi;
+ lo = vld1_dup_f32(from);
+ hi = vld1_dup_f32(from+1);
+ return vcombine_f32(lo, hi);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int32_t* from)
+{
+ int32x2_t lo, hi;
+ lo = vld1_dup_s32(from);
+ hi = vld1_dup_s32(from+1);
+ return vcombine_s32(lo, hi);
+}
+template<> EIGEN_STRONG_INLINE void pstore<float> (float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<int32_t>(int32_t* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<float> (float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int32_t>(int32_t* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
+template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
+{
+ Packet4f res = pset1<Packet4f>(0.f);
+ res = vsetq_lane_f32(from[0*stride], res, 0);
+ res = vsetq_lane_f32(from[1*stride], res, 1);
+ res = vsetq_lane_f32(from[2*stride], res, 2);
+ res = vsetq_lane_f32(from[3*stride], res, 3);
+ return res;
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int32_t, Packet4i>(const int32_t* from, Index stride)
+{
+ Packet4i res = pset1<Packet4i>(0);
+ res = vsetq_lane_s32(from[0*stride], res, 0);
+ res = vsetq_lane_s32(from[1*stride], res, 1);
+ res = vsetq_lane_s32(from[2*stride], res, 2);
+ res = vsetq_lane_s32(from[3*stride], res, 3);
+ return res;
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
+{
+ to[stride*0] = vgetq_lane_f32(from, 0);
+ to[stride*1] = vgetq_lane_f32(from, 1);
+ to[stride*2] = vgetq_lane_f32(from, 2);
+ to[stride*3] = vgetq_lane_f32(from, 3);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int32_t, Packet4i>(int32_t* to, const Packet4i& from, Index stride)
+{
+ to[stride*0] = vgetq_lane_s32(from, 0);
+ to[stride*1] = vgetq_lane_s32(from, 1);
+ to[stride*2] = vgetq_lane_s32(from, 2);
+ to[stride*3] = vgetq_lane_s32(from, 3);
+}
+template<> EIGEN_STRONG_INLINE void prefetch<float> (const float* addr) { EIGEN_ARM_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<int32_t>(const int32_t* addr) { EIGEN_ARM_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
+template<> EIGEN_STRONG_INLINE int32_t pfirst<Packet4i>(const Packet4i& a) { int32_t EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
+ float32x2_t a_lo, a_hi;
+ Packet4f a_r64;
+ a_r64 = vrev64q_f32(a);
+ a_lo = vget_low_f32(a_r64);
+ a_hi = vget_high_f32(a_r64);
+ return vcombine_f32(a_hi, a_lo);
+}
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
+ int32x2_t a_lo, a_hi;
+ Packet4i a_r64;
+ a_r64 = vrev64q_s32(a);
+ a_lo = vget_low_s32(a_r64);
+ a_hi = vget_high_s32(a_r64);
+ return vcombine_s32(a_hi, a_lo);
+}
+template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); }
+template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); }
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+ float32x2_t a_lo, a_hi, sum;
+ a_lo = vget_low_f32(a);
+ a_hi = vget_high_f32(a);
+ sum = vpadd_f32(a_lo, a_hi);
+ sum = vpadd_f32(sum, sum);
+ return vget_lane_f32(sum, 0);
+}
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ float32x4x2_t vtrn1, vtrn2, res1, res2;
+ Packet4f sum1, sum2, sum;
+ vtrn1 = vzipq_f32(vecs[0], vecs[2]);
+ vtrn2 = vzipq_f32(vecs[1], vecs[3]);
+ res1 = vzipq_f32(vtrn1.val[0], vtrn2.val[0]);
+ res2 = vzipq_f32(vtrn1.val[1], vtrn2.val[1]);
+ sum1 = vaddq_f32(res1.val[0], res1.val[1]);
+ sum2 = vaddq_f32(res2.val[0], res2.val[1]);
+ sum = vaddq_f32(sum1, sum2);
+ return sum;
+}
+template<> EIGEN_STRONG_INLINE int32_t predux<Packet4i>(const Packet4i& a)
+{
+ int32x2_t a_lo, a_hi, sum;
+ a_lo = vget_low_s32(a);
+ a_hi = vget_high_s32(a);
+ sum = vpadd_s32(a_lo, a_hi);
+ sum = vpadd_s32(sum, sum);
+ return vget_lane_s32(sum, 0);
+}
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ int32x4x2_t vtrn1, vtrn2, res1, res2;
+ Packet4i sum1, sum2, sum;
+ vtrn1 = vzipq_s32(vecs[0], vecs[2]);
+ vtrn2 = vzipq_s32(vecs[1], vecs[3]);
+ res1 = vzipq_s32(vtrn1.val[0], vtrn2.val[0]);
+ res2 = vzipq_s32(vtrn1.val[1], vtrn2.val[1]);
+ sum1 = vaddq_s32(res1.val[0], res1.val[1]);
+ sum2 = vaddq_s32(res2.val[0], res2.val[1]);
+ sum = vaddq_s32(sum1, sum2);
+ return sum;
+}
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
+{
+ float32x2_t a_lo, a_hi, prod;
+ a_lo = vget_low_f32(a);
+ a_hi = vget_high_f32(a);
+ prod = vmul_f32(a_lo, a_hi);
+ prod = vmul_f32(prod, vrev64_f32(prod));
+ return vget_lane_f32(prod, 0);
+}
+template<> EIGEN_STRONG_INLINE int32_t predux_mul<Packet4i>(const Packet4i& a)
+{
+ int32x2_t a_lo, a_hi, prod;
+ a_lo = vget_low_s32(a);
+ a_hi = vget_high_s32(a);
+ prod = vmul_s32(a_lo, a_hi);
+ prod = vmul_s32(prod, vrev64_s32(prod));
+ return vget_lane_s32(prod, 0);
+}
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+ float32x2_t a_lo, a_hi, min;
+ a_lo = vget_low_f32(a);
+ a_hi = vget_high_f32(a);
+ min = vpmin_f32(a_lo, a_hi);
+ min = vpmin_f32(min, min);
+ return vget_lane_f32(min, 0);
+}
+template<> EIGEN_STRONG_INLINE int32_t predux_min<Packet4i>(const Packet4i& a)
+{
+ int32x2_t a_lo, a_hi, min;
+ a_lo = vget_low_s32(a);
+ a_hi = vget_high_s32(a);
+ min = vpmin_s32(a_lo, a_hi);
+ min = vpmin_s32(min, min);
+ return vget_lane_s32(min, 0);
+}
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+ float32x2_t a_lo, a_hi, max;
+ a_lo = vget_low_f32(a);
+ a_hi = vget_high_f32(a);
+ max = vpmax_f32(a_lo, a_hi);
+ max = vpmax_f32(max, max);
+ return vget_lane_f32(max, 0);
+}
+template<> EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a)
+{
+ int32x2_t a_lo, a_hi, max;
+ a_lo = vget_low_s32(a);
+ a_hi = vget_high_s32(a);
+ max = vpmax_s32(a_lo, a_hi);
+ max = vpmax_s32(max, max);
+ return vget_lane_s32(max, 0);
+}
+#define PALIGN_NEON(Offset,Type,Command) \
+template<>\
+struct palign_impl<Offset,Type>\
+{\
+ EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
+ {\
+ if (Offset!=0)\
+ first = Command(first, second, Offset);\
+ }\
+};\
+
+PALIGN_NEON(0,Packet4f,vextq_f32)
+PALIGN_NEON(1,Packet4f,vextq_f32)
+PALIGN_NEON(2,Packet4f,vextq_f32)
+PALIGN_NEON(3,Packet4f,vextq_f32)
+PALIGN_NEON(0,Packet4i,vextq_s32)
+PALIGN_NEON(1,Packet4i,vextq_s32)
+PALIGN_NEON(2,Packet4i,vextq_s32)
+PALIGN_NEON(3,Packet4i,vextq_s32)
+#undef PALIGN_NEON
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+ float32x4x2_t tmp1 = vzipq_f32(kernel.packet[0], kernel.packet[1]);
+ float32x4x2_t tmp2 = vzipq_f32(kernel.packet[2], kernel.packet[3]);
+ kernel.packet[0] = vcombine_f32(vget_low_f32(tmp1.val[0]), vget_low_f32(tmp2.val[0]));
+ kernel.packet[1] = vcombine_f32(vget_high_f32(tmp1.val[0]), vget_high_f32(tmp2.val[0]));
+ kernel.packet[2] = vcombine_f32(vget_low_f32(tmp1.val[1]), vget_low_f32(tmp2.val[1]));
+ kernel.packet[3] = vcombine_f32(vget_high_f32(tmp1.val[1]), vget_high_f32(tmp2.val[1]));
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+ int32x4x2_t tmp1 = vzipq_s32(kernel.packet[0], kernel.packet[1]);
+ int32x4x2_t tmp2 = vzipq_s32(kernel.packet[2], kernel.packet[3]);
+ kernel.packet[0] = vcombine_s32(vget_low_s32(tmp1.val[0]), vget_low_s32(tmp2.val[0]));
+ kernel.packet[1] = vcombine_s32(vget_high_s32(tmp1.val[0]), vget_high_s32(tmp2.val[0]));
+ kernel.packet[2] = vcombine_s32(vget_low_s32(tmp1.val[1]), vget_low_s32(tmp2.val[1]));
+ kernel.packet[3] = vcombine_s32(vget_high_s32(tmp1.val[1]), vget_high_s32(tmp2.val[1]));
+}
+#ifdef __apple_build_version__
+#define EIGEN_APPLE_DOUBLE_NEON_BUG (__apple_build_version__ < 6010000)
+#else
+#define EIGEN_APPLE_DOUBLE_NEON_BUG 0
+#endif
+#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
+template <typename T>
+uint64x2_t vreinterpretq_u64_f64(T a)
+{
+ return (uint64x2_t) a;
+}
+template <typename T>
+float64x2_t vreinterpretq_f64_u64(T a)
+{
+ return (float64x2_t) a;
+}
+typedef float64x2_t Packet2d;
+typedef float64x1_t Packet1d;
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet2d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 2,
+ HasHalfPacket=0,
+ HasDiv = 1,
+ HasSin = 0,
+ HasCos = 0,
+ HasLog = 0,
+ HasExp = 0,
+ HasSqrt = 0
+ };
+};
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return vdupq_n_f64(from); }
+template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a)
+{
+ const double countdown_raw[] = {0.0,1.0};
+ const Packet2d countdown = vld1q_f64(countdown_raw);
+ return vaddq_f64(pset1<Packet2d>(a), countdown);
+}
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return vsubq_f64(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return vnegq_f64(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmulq_f64(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vdivq_f64(a,b); }
+#ifdef __ARM_FEATURE_FMA
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vfmaq_f64(c,a,b); }
+#else
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vmlaq_f64(c,a,b); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vminq_f64(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmaxq_f64(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+ return vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+ return vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+ return vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+ return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f64(from); }
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f64(from); }
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
+{
+ return vld1q_dup_f64(from);
+}
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f64(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f64(to, from); }
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
+{
+ Packet2d res = pset1<Packet2d>(0.0);
+ res = vsetq_lane_f64(from[0*stride], res, 0);
+ res = vsetq_lane_f64(from[1*stride], res, 1);
+ return res;
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
+{
+ to[stride*0] = vgetq_lane_f64(from, 0);
+ to[stride*1] = vgetq_lane_f64(from, 1);
+}
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ARM_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(a, 0); }
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); }
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); }
+#if EIGEN_COMP_CLANG && defined(__apple_build_version__)
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return (vget_low_f64(a) + vget_high_f64(a))[0]; }
+#else
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return vget_lane_f64(vget_low_f64(a) + vget_high_f64(a), 0); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ float64x2_t trn1, trn2;
+ trn1 = vzip1q_f64(vecs[0], vecs[1]);
+ trn2 = vzip2q_f64(vecs[0], vecs[1]);
+ return vaddq_f64(trn1, trn2);
+}
+#if EIGEN_COMP_CLANG && defined(__apple_build_version__)
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) { return (vget_low_f64(a) * vget_high_f64(a))[0]; }
+#else
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) { return vget_lane_f64(vget_low_f64(a) * vget_high_f64(a), 0); }
+#endif
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpminq_f64(a, a), 0); }
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpmaxq_f64(a, a), 0); }
+#define PALIGN_NEON(Offset,Type,Command) \
+template<>\
+struct palign_impl<Offset,Type>\
+{\
+ EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
+ {\
+ if (Offset!=0)\
+ first = Command(first, second, Offset);\
+ }\
+};\
+PALIGN_NEON(0,Packet2d,vextq_f64)
+PALIGN_NEON(1,Packet2d,vextq_f64)
+#undef PALIGN_NEON
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+ float64x2_t trn1 = vzip1q_f64(kernel.packet[0], kernel.packet[1]);
+ float64x2_t trn2 = vzip2q_f64(kernel.packet[0], kernel.packet[1]);
+ kernel.packet[0] = trn1;
+ kernel.packet[1] = trn2;
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/NEON/PacketMath.h"
+// #include "src/Core/arch/NEON/MathFunctions.h"
+#ifndef EIGEN_MATH_FUNCTIONS_NEON_H
+#define EIGEN_MATH_FUNCTIONS_NEON_H
+namespace Eigen {
+namespace internal {
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pexp<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ Packet4f tmp, fx;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+ _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
+ _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
+ x = vminq_f32(x, p4f_exp_hi);
+ x = vmaxq_f32(x, p4f_exp_lo);
+ fx = vmlaq_f32(p4f_half, x, p4f_cephes_LOG2EF);
+ tmp = vcvtq_f32_s32(vcvtq_s32_f32(fx));
+ Packet4ui mask = vcgtq_f32(tmp, fx);
+ mask = vandq_u32(mask, vreinterpretq_u32_f32(p4f_1));
+ fx = vsubq_f32(tmp, vreinterpretq_f32_u32(mask));
+ tmp = vmulq_f32(fx, p4f_cephes_exp_C1);
+ Packet4f z = vmulq_f32(fx, p4f_cephes_exp_C2);
+ x = vsubq_f32(x, tmp);
+ x = vsubq_f32(x, z);
+ Packet4f y = vmulq_f32(p4f_cephes_exp_p0, x);
+ z = vmulq_f32(x, x);
+ y = vaddq_f32(y, p4f_cephes_exp_p1);
+ y = vmulq_f32(y, x);
+ y = vaddq_f32(y, p4f_cephes_exp_p2);
+ y = vmulq_f32(y, x);
+ y = vaddq_f32(y, p4f_cephes_exp_p3);
+ y = vmulq_f32(y, x);
+ y = vaddq_f32(y, p4f_cephes_exp_p4);
+ y = vmulq_f32(y, x);
+ y = vaddq_f32(y, p4f_cephes_exp_p5);
+ y = vmulq_f32(y, z);
+ y = vaddq_f32(y, x);
+ y = vaddq_f32(y, p4f_1);
+ int32x4_t mm;
+ mm = vcvtq_s32_f32(fx);
+ mm = vaddq_s32(mm, p4i_0x7f);
+ mm = vshlq_n_s32(mm, 23);
+ Packet4f pow2n = vreinterpretq_f32_s32(mm);
+ y = vmulq_f32(y, pow2n);
+ return y;
+}
+}
+}
+#endif
+// end #include "src/Core/arch/NEON/MathFunctions.h"
+// #include "src/Core/arch/NEON/Complex.h"
+#ifndef EIGEN_COMPLEX_NEON_H
+#define EIGEN_COMPLEX_NEON_H
+namespace Eigen {
+namespace internal {
+inline uint32x4_t p4ui_CONJ_XOR() {
+#if EIGEN_COMP_CLANG
+ uint32x4_t ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
+ return ret;
+#else
+ static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
+ return vld1q_u32( conj_XOR_DATA );
+#endif
+}
+inline uint32x2_t p2ui_CONJ_XOR() {
+ static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };
+ return vld1_u32( conj_XOR_DATA );
+}
+struct Packet2cf
+{
+ EIGEN_STRONG_INLINE Packet2cf() {}
+ EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
+ Packet4f v;
+};
+template<> struct packet_traits<std::complex<float> > : default_packet_traits
+{
+ typedef Packet2cf type;
+ typedef Packet2cf half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 2,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
+template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
+{
+ float32x2_t r64;
+ r64 = vld1_f32((float *)&from);
+ return Packet2cf(vcombine_f32(r64, r64));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
+{
+ Packet4ui b = vreinterpretq_u32_f32(a.v);
+ return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ Packet4f v1, v2;
+ v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
+ v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
+ v1 = vmulq_f32(v1, b.v);
+ v2 = vmulq_f32(v2, b.v);
+ v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR()));
+ v2 = vrev64q_f32(v2);
+ return Packet2cf(vaddq_f32(v1, v2));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
+template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
+{
+ Packet4f res = pset1<Packet4f>(0.f);
+ res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
+ res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
+ res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
+ res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
+ return Packet2cf(res);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
+{
+ to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
+ to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
+}
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); }
+template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
+{
+ std::complex<float> EIGEN_ALIGN16 x[2];
+ vst1q_f32((float *)x, a.v);
+ return x[0];
+}
+template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
+{
+ float32x2_t a_lo, a_hi;
+ Packet4f a_r128;
+ a_lo = vget_low_f32(a.v);
+ a_hi = vget_high_f32(a.v);
+ a_r128 = vcombine_f32(a_hi, a_lo);
+ return Packet2cf(a_r128);
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
+{
+ return Packet2cf(vrev64q_f32(a.v));
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
+{
+ float32x2_t a1, a2;
+ std::complex<float> s;
+ a1 = vget_low_f32(a.v);
+ a2 = vget_high_f32(a.v);
+ a2 = vadd_f32(a1, a2);
+ vst1_f32((float *)&s, a2);
+ return s;
+}
+template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
+{
+ Packet4f sum1, sum2, sum;
+ sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
+ sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
+ sum = vaddq_f32(sum1, sum2);
+ return Packet2cf(sum);
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
+{
+ float32x2_t a1, a2, v1, v2, prod;
+ std::complex<float> s;
+ a1 = vget_low_f32(a.v);
+ a2 = vget_high_f32(a.v);
+ v1 = vdup_lane_f32(a1, 0);
+ v2 = vdup_lane_f32(a1, 1);
+ v1 = vmul_f32(v1, a2);
+ v2 = vmul_f32(v2, a2);
+ v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
+ v2 = vrev64_f32(v2);
+ prod = vadd_f32(v1, v2);
+ vst1_f32((float *)&s, prod);
+ return s;
+}
+template<int Offset>
+struct palign_impl<Offset,Packet2cf>
+{
+ EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
+ {
+ if (Offset==1)
+ {
+ first.v = vextq_f32(first.v, second.v, 2);
+ }
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
+ Packet4f s, rev_s;
+ s = vmulq_f32(b.v, b.v);
+ rev_s = vrev64q_f32(s);
+ return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2cf,2>& kernel) {
+ Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
+ kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
+ kernel.packet[1].v = tmp;
+}
+#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
+#if EIGEN_COMP_CLANG
+ static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
+#else
+ const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
+ static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
+#endif
+struct Packet1cd
+{
+ EIGEN_STRONG_INLINE Packet1cd() {}
+ EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
+ Packet2d v;
+};
+template<> struct packet_traits<std::complex<double> > : default_packet_traits
+{
+ typedef Packet1cd type;
+ typedef Packet1cd half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 0,
+ size = 1,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
+template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
+{ return ploadu<Packet1cd>(&from); }
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(padd<Packet2d>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(psub<Packet2d>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate<Packet2d>(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet2d v1, v2;
+ v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
+ v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);
+ v1 = vmulq_f64(v1, b.v);
+ v2 = vmulq_f64(v2, b.v);
+ v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
+ v2 = preverse<Packet2d>(v2);
+ return Packet1cd(vaddq_f64(v1, v2));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((double *)addr); }
+template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
+{
+ Packet2d res = pset1<Packet2d>(0.0);
+ res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
+ res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
+ return Packet1cd(res);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
+{
+ to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1));
+}
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
+{
+ std::complex<double> EIGEN_ALIGN16 res;
+ pstore<std::complex<double> >(&res, a);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+ static EIGEN_STRONG_INLINE void run(Packet1cd& , const Packet1cd& )
+ {
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+ Packet2d s = pmul<Packet2d>(b.v, b.v);
+ Packet2d rev_s = preverse<Packet2d>(s);
+ return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));
+}
+EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd& x)
+{
+ return Packet1cd(preverse(Packet2d(x.v)));
+}
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
+{
+ Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
+ kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
+ kernel.packet[1].v = tmp;
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/NEON/Complex.h"
+#elif defined EIGEN_VECTORIZE_ZVECTOR
+// #include "src/Core/arch/ZVector/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_ZVECTOR_H
+#define EIGEN_PACKET_MATH_ZVECTOR_H
+#include <stdint.h>
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
+#endif
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
+#endif
+typedef __vector int Packet4i;
+typedef __vector unsigned int Packet4ui;
+typedef __vector __bool int Packet4bi;
+typedef __vector short int Packet8i;
+typedef __vector unsigned char Packet16uc;
+typedef __vector double Packet2d;
+typedef __vector unsigned long long Packet2ul;
+typedef __vector long long Packet2l;
+typedef struct {
+ Packet2d v4f[2];
+} Packet4f;
+typedef union {
+ int32_t i[4];
+ uint32_t ui[4];
+ int64_t l[2];
+ uint64_t ul[2];
+ double d[2];
+ Packet4i v4i;
+ Packet4ui v4ui;
+ Packet2l v2l;
+ Packet2ul v2ul;
+ Packet2d v2d;
+} Packet;
+#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
+ Packet4i p4i_##NAME = reinterpret_cast<Packet4i>(vec_splat_s32(X))
+#define _EIGEN_DECLARE_CONST_FAST_Packet2d(NAME,X) \
+ Packet2d p2d_##NAME = reinterpret_cast<Packet2d>(vec_splat_s64(X))
+#define _EIGEN_DECLARE_CONST_FAST_Packet2l(NAME,X) \
+ Packet2l p2l_##NAME = reinterpret_cast<Packet2l>(vec_splat_s64(X))
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+ Packet4i p4i_##NAME = pset1<Packet4i>(X)
+#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+ Packet2d p2d_##NAME = pset1<Packet2d>(X)
+#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
+ Packet2l p2l_##NAME = pset1<Packet2l>(X)
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE, 1);
+static _EIGEN_DECLARE_CONST_FAST_Packet2d(ZERO, 0);
+static _EIGEN_DECLARE_CONST_FAST_Packet2l(ZERO, 0);
+static _EIGEN_DECLARE_CONST_FAST_Packet2l(ONE, 1);
+static Packet2d p2d_ONE = { 1.0, 1.0 };
+static Packet2d p2d_ZERO_ = { -0.0, -0.0 };
+static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
+static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
+static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet16uc>(p2d_ZERO), reinterpret_cast<Packet16uc>(p2d_ONE), 8));
+static Packet16uc p16uc_PSET64_HI = { 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
+static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
+#define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0
+#define _EIGEN_ALIGNED_PTR(x) ((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
+static Packet16uc p16uc_FORWARD = { 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15 };
+static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 };
+static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);
+static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f,4>& kernel);
+inline std::ostream & operator <<(std::ostream & s, const Packet4i & v)
+{
+ Packet vt;
+ vt.v4i = v;
+ s << vt.i[0] << ", " << vt.i[1] << ", " << vt.i[2] << ", " << vt.i[3];
+ return s;
+}
+inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
+{
+ Packet vt;
+ vt.v4ui = v;
+ s << vt.ui[0] << ", " << vt.ui[1] << ", " << vt.ui[2] << ", " << vt.ui[3];
+ return s;
+}
+inline std::ostream & operator <<(std::ostream & s, const Packet2l & v)
+{
+ Packet vt;
+ vt.v2l = v;
+ s << vt.l[0] << ", " << vt.l[1];
+ return s;
+}
+inline std::ostream & operator <<(std::ostream & s, const Packet2ul & v)
+{
+ Packet vt;
+ vt.v2ul = v;
+ s << vt.ul[0] << ", " << vt.ul[1] ;
+ return s;
+}
+inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
+{
+ Packet vt;
+ vt.v2d = v;
+ s << vt.d[0] << ", " << vt.d[1];
+ return s;
+}
+template<int element> EIGEN_STRONG_INLINE Packet4f vec_splat_packet4f(const Packet4f& from)
+{
+ Packet4f splat;
+ switch (element) {
+ case 0:
+ splat.v4f[0] = vec_splat(from.v4f[0], 0);
+ splat.v4f[1] = splat.v4f[0];
+ break;
+ case 1:
+ splat.v4f[0] = vec_splat(from.v4f[0], 1);
+ splat.v4f[1] = splat.v4f[0];
+ break;
+ case 2:
+ splat.v4f[0] = vec_splat(from.v4f[1], 0);
+ splat.v4f[1] = splat.v4f[0];
+ break;
+ case 3:
+ splat.v4f[0] = vec_splat(from.v4f[1], 1);
+ splat.v4f[1] = splat.v4f[0];
+ break;
+ }
+ return splat;
+}
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+ switch (Offset % 4) {
+ case 1:
+ first = vec_sld(first, second, 4); break;
+ case 2:
+ first = vec_sld(first, second, 8); break;
+ case 3:
+ first = vec_sld(first, second, 12); break;
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+ {
+ switch (Offset % 4) {
+ case 1:
+ first.v4f[0] = vec_sld(first.v4f[0], first.v4f[1], 8);
+ first.v4f[1] = vec_sld(first.v4f[1], second.v4f[0], 8);
+ break;
+ case 2:
+ first.v4f[0] = first.v4f[1];
+ first.v4f[1] = second.v4f[0];
+ break;
+ case 3:
+ first.v4f[0] = vec_sld(first.v4f[1], second.v4f[0], 8);
+ first.v4f[1] = vec_sld(second.v4f[0], second.v4f[1], 8);
+ break;
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset == 1)
+ first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(first), reinterpret_cast<Packet4i>(second), 8));
+ }
+};
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ Packet *vfrom;
+ vfrom = (Packet *) from;
+ return vfrom->v4i;
+}
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ Packet4f vfrom;
+ vfrom.v4f[0] = vec_ld2f(&from[0]);
+ vfrom.v4f[1] = vec_ld2f(&from[2]);
+ return vfrom;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ Packet *vfrom;
+ vfrom = (Packet *) from;
+ return vfrom->v2d;
+}
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+ Packet *vto;
+ vto = (Packet *) to;
+ vto->v4i = from;
+}
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+ vec_st2f(from.v4f[0], &to[0]);
+ vec_st2f(from.v4f[1], &to[2]);
+}
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+ Packet *vto;
+ vto = (Packet *) to;
+ vto->v2d = from;
+}
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from)
+{
+ return vec_splats(from);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
+ return vec_splats(from);
+}
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from)
+{
+ Packet4f to;
+ to.v4f[0] = pset1<Packet2d>(static_cast<const double&>(from));
+ to.v4f[1] = to.v4f[0];
+ return to;
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4i>(const int *a,
+ Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
+{
+ a3 = pload<Packet4i>(a);
+ a0 = vec_splat(a3, 0);
+ a1 = vec_splat(a3, 1);
+ a2 = vec_splat(a3, 2);
+ a3 = vec_splat(a3, 3);
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4f>(const float *a,
+ Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+{
+ a3 = pload<Packet4f>(a);
+ a0 = vec_splat_packet4f<0>(a3);
+ a1 = vec_splat_packet4f<1>(a3);
+ a2 = vec_splat_packet4f<2>(a3);
+ a3 = vec_splat_packet4f<3>(a3);
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet2d>(const double *a,
+ Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
+{
+ a1 = pload<Packet2d>(a);
+ a0 = vec_splat(a1, 0);
+ a1 = vec_splat(a1, 1);
+ a3 = pload<Packet2d>(a+2);
+ a2 = vec_splat(a3, 0);
+ a3 = vec_splat(a3, 1);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
+{
+ int EIGEN_ALIGN16 ai[4];
+ ai[0] = from[0*stride];
+ ai[1] = from[1*stride];
+ ai[2] = from[2*stride];
+ ai[3] = from[3*stride];
+ return pload<Packet4i>(ai);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
+{
+ float EIGEN_ALIGN16 ai[4];
+ ai[0] = from[0*stride];
+ ai[1] = from[1*stride];
+ ai[2] = from[2*stride];
+ ai[3] = from[3*stride];
+ return pload<Packet4f>(ai);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
+{
+ double EIGEN_ALIGN16 af[2];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ return pload<Packet2d>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
+{
+ int EIGEN_ALIGN16 ai[4];
+ pstore<int>((int *)ai, from);
+ to[0*stride] = ai[0];
+ to[1*stride] = ai[1];
+ to[2*stride] = ai[2];
+ to[3*stride] = ai[3];
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
+{
+ float EIGEN_ALIGN16 ai[4];
+ pstore<float>((float *)ai, from);
+ to[0*stride] = ai[0];
+ to[1*stride] = ai[1];
+ to[2*stride] = ai[2];
+ to[3*stride] = ai[3];
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
+{
+ double EIGEN_ALIGN16 af[2];
+ pstore<double>(af, from);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+}
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a + b); }
+template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f c;
+ c.v4f[0] = a.v4f[0] + b.v4f[0];
+ c.v4f[1] = a.v4f[1] + b.v4f[1];
+ return c;
+}
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a + b); }
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a - b); }
+template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f c;
+ c.v4f[0] = a.v4f[0] - b.v4f[0];
+ c.v4f[1] = a.v4f[1] - b.v4f[1];
+ return c;
+}
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a - b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a * b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f c;
+ c.v4f[0] = a.v4f[0] * b.v4f[0];
+ c.v4f[1] = a.v4f[1] * b.v4f[1];
+ return c;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a * b); }
+template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a / b); }
+template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f c;
+ c.v4f[0] = a.v4f[0] / b.v4f[0];
+ c.v4f[1] = a.v4f[1] / b.v4f[1];
+ return c;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a / b); }
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return (-a); }
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
+{
+ Packet4f c;
+ c.v4f[0] = -a.v4f[0];
+ c.v4f[1] = -a.v4f[1];
+ return c;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return (-a); }
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd<Packet4i>(pmul<Packet4i>(a, b), c); }
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
+{
+ Packet4f res;
+ res.v4f[0] = vec_madd(a.v4f[0], b.v4f[0], c.v4f[0]);
+ res.v4f[1] = vec_madd(a.v4f[1], b.v4f[1], c.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
+template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return padd<Packet4i>(pset1<Packet4i>(a), p4i_COUNTDOWN); }
+template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return padd<Packet4f>(pset1<Packet4f>(a), p4f_COUNTDOWN); }
+template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return padd<Packet2d>(pset1<Packet2d>(a), p2d_COUNTDOWN); }
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f res;
+ res.v4f[0] = pmin(a.v4f[0], b.v4f[0]);
+ res.v4f[1] = pmin(a.v4f[1], b.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f res;
+ res.v4f[0] = pmax(a.v4f[0], b.v4f[0]);
+ res.v4f[1] = pmax(a.v4f[1], b.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f res;
+ res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
+ res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f res;
+ res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
+ res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f res;
+ res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
+ res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return pand<Packet4i>(a, vec_nor(b, b)); }
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
+template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f res;
+ res.v4f[0] = pandnot(a.v4f[0], b.v4f[0]);
+ res.v4f[1] = pandnot(a.v4f[1], b.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)
+{
+ Packet4f res;
+ res.v4f[0] = vec_round(a.v4f[0]);
+ res.v4f[1] = vec_round(a.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a)
+{
+ Packet4f res;
+ res.v4f[0] = vec_ceil(a.v4f[0]);
+ res.v4f[1] = vec_ceil(a.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
+{
+ Packet4f res;
+ res.v4f[0] = vec_floor(a.v4f[0]);
+ res.v4f[1] = vec_floor(a.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { return pload<Packet4i>(from); }
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { return pload<Packet4f>(from); }
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { return pload<Packet2d>(from); }
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
+{
+ Packet4i p = pload<Packet4i>(from);
+ return vec_perm(p, p, p16uc_DUPLICATE32_HI);
+}
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
+{
+ Packet4f p = pload<Packet4f>(from);
+ p.v4f[1] = vec_splat(p.v4f[0], 1);
+ p.v4f[0] = vec_splat(p.v4f[0], 0);
+ return p;
+}
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
+{
+ Packet2d p = pload<Packet2d>(from);
+ return vec_perm(p, p, p16uc_PSET64_HI);
+}
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { pstore<int>(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { pstore<float>(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { pstore<double>(to, from); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; }
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[2]; vec_st2f(a.v4f[0], &x[0]); return x[0]; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; }
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
+{
+ return reinterpret_cast<Packet4i>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));
+}
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
+{
+ return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
+}
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
+{
+ Packet4f rev;
+ rev.v4f[0] = preverse<Packet2d>(a.v4f[1]);
+ rev.v4f[1] = preverse<Packet2d>(a.v4f[0]);
+ return rev;
+}
+template<> EIGEN_STRONG_INLINE Packet4i pabs<Packet4i>(const Packet4i& a) { return vec_abs(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pabs<Packet2d>(const Packet2d& a) { return vec_abs(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pabs<Packet4f>(const Packet4f& a)
+{
+ Packet4f res;
+ res.v4f[0] = pabs(a.v4f[0]);
+ res.v4f[1] = pabs(a.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i b, sum;
+ b = vec_sld(a, a, 8);
+ sum = padd<Packet4i>(a, b);
+ b = vec_sld(sum, sum, 4);
+ sum = padd<Packet4i>(sum, b);
+ return pfirst(sum);
+}
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
+{
+ Packet2d b, sum;
+ b = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8));
+ sum = padd<Packet2d>(a, b);
+ return pfirst(sum);
+}
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+ Packet2d sum;
+ sum = padd<Packet2d>(a.v4f[0], a.v4f[1]);
+ double first = predux<Packet2d>(sum);
+ return static_cast<float>(first);
+}
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ Packet4i v[4], sum[4];
+ v[0] = vec_mergeh(vecs[0], vecs[2]);
+ v[1] = vec_mergel(vecs[0], vecs[2]);
+ v[2] = vec_mergeh(vecs[1], vecs[3]);
+ v[3] = vec_mergel(vecs[1], vecs[3]);
+ sum[0] = vec_mergeh(v[0], v[2]);
+ sum[1] = vec_mergel(v[0], v[2]);
+ sum[2] = vec_mergeh(v[1], v[3]);
+ sum[3] = vec_mergel(v[1], v[3]);
+ sum[0] = padd<Packet4i>(sum[0], sum[1]);
+ sum[1] = padd<Packet4i>(sum[2], sum[3]);
+ sum[0] = padd<Packet4i>(sum[0], sum[1]);
+ return sum[0];
+}
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ Packet2d v[2], sum;
+ v[0] = padd<Packet2d>(vecs[0], reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(vecs[0]), reinterpret_cast<Packet4ui>(vecs[0]), 8)));
+ v[1] = padd<Packet2d>(vecs[1], reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(vecs[1]), reinterpret_cast<Packet4ui>(vecs[1]), 8)));
+ sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v[0]), reinterpret_cast<Packet4ui>(v[1]), 8));
+ return sum;
+}
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ PacketBlock<Packet4f,4> transpose;
+ transpose.packet[0] = vecs[0];
+ transpose.packet[1] = vecs[1];
+ transpose.packet[2] = vecs[2];
+ transpose.packet[3] = vecs[3];
+ ptranspose(transpose);
+ Packet4f sum = padd(transpose.packet[0], transpose.packet[1]);
+ sum = padd(sum, transpose.packet[2]);
+ sum = padd(sum, transpose.packet[3]);
+ return sum;
+}
+template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
+{
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ return aux[0] * aux[1] * aux[2] * aux[3];
+}
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
+{
+ return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
+}
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
+{
+ return static_cast<float>(pfirst(predux_mul(pmul(a.v4f[0], a.v4f[1]))));
+}
+template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
+{
+ Packet4i b, res;
+ b = pmin<Packet4i>(a, vec_sld(a, a, 8));
+ res = pmin<Packet4i>(b, vec_sld(b, b, 4));
+ return pfirst(res);
+}
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
+{
+ return pfirst(pmin<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
+}
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+ Packet2d b, res;
+ b = pmin<Packet2d>(a.v4f[0], a.v4f[1]);
+ res = pmin<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
+ return static_cast<float>(pfirst(res));
+}
+template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
+{
+ Packet4i b, res;
+ b = pmax<Packet4i>(a, vec_sld(a, a, 8));
+ res = pmax<Packet4i>(b, vec_sld(b, b, 4));
+ return pfirst(res);
+}
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
+{
+ return pfirst(pmax<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
+}
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+ Packet2d b, res;
+ b = pmax<Packet2d>(a.v4f[0], a.v4f[1]);
+ res = pmax<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
+ return static_cast<float>(pfirst(res));
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+ Packet4i t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
+ Packet4i t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
+ Packet4i t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
+ Packet4i t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
+ kernel.packet[0] = vec_mergeh(t0, t2);
+ kernel.packet[1] = vec_mergel(t0, t2);
+ kernel.packet[2] = vec_mergeh(t1, t3);
+ kernel.packet[3] = vec_mergel(t1, t3);
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+ Packet2d t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI);
+ Packet2d t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO);
+ kernel.packet[0] = t0;
+ kernel.packet[1] = t1;
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+ PacketBlock<Packet2d,2> t0,t1,t2,t3;
+ t0.packet[0] = kernel.packet[0].v4f[0];
+ t0.packet[1] = kernel.packet[1].v4f[0];
+ t1.packet[0] = kernel.packet[0].v4f[1];
+ t1.packet[1] = kernel.packet[1].v4f[1];
+ t2.packet[0] = kernel.packet[2].v4f[0];
+ t2.packet[1] = kernel.packet[3].v4f[0];
+ t3.packet[0] = kernel.packet[2].v4f[1];
+ t3.packet[1] = kernel.packet[3].v4f[1];
+ ptranspose(t0);
+ ptranspose(t1);
+ ptranspose(t2);
+ ptranspose(t3);
+ kernel.packet[0].v4f[0] = t0.packet[0];
+ kernel.packet[0].v4f[1] = t2.packet[0];
+ kernel.packet[1].v4f[0] = t0.packet[1];
+ kernel.packet[1].v4f[1] = t2.packet[1];
+ kernel.packet[2].v4f[0] = t1.packet[0];
+ kernel.packet[2].v4f[1] = t3.packet[0];
+ kernel.packet[3].v4f[0] = t1.packet[1];
+ kernel.packet[3].v4f[1] = t3.packet[1];
+}
+template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
+ Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
+ Packet4ui mask = vec_cmpeq(select, reinterpret_cast<Packet4ui>(p4i_ONE));
+ return vec_sel(elsePacket, thenPacket, mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
+ Packet2ul select_hi = { ifPacket.select[0], ifPacket.select[1] };
+ Packet2ul select_lo = { ifPacket.select[2], ifPacket.select[3] };
+ Packet2ul mask_hi = vec_cmpeq(select_hi, reinterpret_cast<Packet2ul>(p2l_ONE));
+ Packet2ul mask_lo = vec_cmpeq(select_lo, reinterpret_cast<Packet2ul>(p2l_ONE));
+ Packet4f result;
+ result.v4f[0] = vec_sel(elsePacket.v4f[0], thenPacket.v4f[0], mask_hi);
+ result.v4f[1] = vec_sel(elsePacket.v4f[1], thenPacket.v4f[1], mask_lo);
+ return result;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
+ Packet2ul select = { ifPacket.select[0], ifPacket.select[1] };
+ Packet2ul mask = vec_cmpeq(select, reinterpret_cast<Packet2ul>(p2l_ONE));
+ return vec_sel(elsePacket, thenPacket, mask);
+}
+}
+}
+#endif
+// end #include "src/Core/arch/ZVector/PacketMath.h"
+// #include "src/Core/arch/ZVector/MathFunctions.h"
+#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+namespace Eigen {
+namespace internal {
+static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
+static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
+static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
+static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
+static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d pexp<Packet2d>(const Packet2d& _x)
+{
+ Packet2d x = _x;
+ Packet2d tmp, fx;
+ Packet2l emm0;
+ x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
+ fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
+ fx = vec_floor(fx);
+ tmp = pmul(fx, p2d_cephes_exp_C1);
+ Packet2d z = pmul(fx, p2d_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+ Packet2d x2 = pmul(x,x);
+ Packet2d px = p2d_cephes_exp_p0;
+ px = pmadd(px, x2, p2d_cephes_exp_p1);
+ px = pmadd(px, x2, p2d_cephes_exp_p2);
+ px = pmul (px, x);
+ Packet2d qx = p2d_cephes_exp_q0;
+ qx = pmadd(qx, x2, p2d_cephes_exp_q1);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q2);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q3);
+ x = pdiv(px,psub(qx,px));
+ x = pmadd(p2d_2,x,p2d_1);
+ emm0 = vec_ctsl(fx, 0);
+ static const Packet2l p2l_1023 = { 1023, 1023 };
+ static const Packet2ul p2ul_52 = { 52, 52 };
+ emm0 = emm0 + p2l_1023;
+ emm0 = emm0 << reinterpret_cast<Packet2l>(p2ul_52);
+ Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x));
+ return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x),
+ isnumber_mask);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pexp<Packet4f>(const Packet4f& x)
+{
+ Packet4f res;
+ res.v4f[0] = pexp<Packet2d>(x.v4f[0]);
+ res.v4f[1] = pexp<Packet2d>(x.v4f[1]);
+ return res;
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d psqrt<Packet2d>(const Packet2d& x)
+{
+ return __builtin_s390_vfsqdb(x);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psqrt<Packet4f>(const Packet4f& x)
+{
+ Packet4f res;
+ res.v4f[0] = psqrt<Packet2d>(x.v4f[0]);
+ res.v4f[1] = psqrt<Packet2d>(x.v4f[1]);
+ return res;
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d prsqrt<Packet2d>(const Packet2d& x) {
+ return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& x) {
+ Packet4f res;
+ res.v4f[0] = prsqrt<Packet2d>(x.v4f[0]);
+ res.v4f[1] = prsqrt<Packet2d>(x.v4f[1]);
+ return res;
+}
+}
+}
+#endif
+// end #include "src/Core/arch/ZVector/MathFunctions.h"
+// #include "src/Core/arch/ZVector/Complex.h"
+#ifndef EIGEN_COMPLEX32_ALTIVEC_H
+#define EIGEN_COMPLEX32_ALTIVEC_H
+namespace Eigen {
+namespace internal {
+static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);
+static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);
+struct Packet1cd
+{
+ EIGEN_STRONG_INLINE Packet1cd() {}
+ EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
+ Packet2d v;
+};
+struct Packet2cf
+{
+ EIGEN_STRONG_INLINE Packet2cf() {}
+ EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
+ union {
+ Packet4f v;
+ Packet1cd cd[2];
+ };
+};
+template<> struct packet_traits<std::complex<float> > : default_packet_traits
+{
+ typedef Packet2cf type;
+ typedef Packet2cf half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 2,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasBlend = 1,
+ HasSetLinear = 0
+ };
+};
+template<> struct packet_traits<std::complex<double> > : default_packet_traits
+{
+ typedef Packet1cd type;
+ typedef Packet1cd half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 1,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel);
+template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
+{ return ploadu<Packet1cd>(&from); }
+template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
+{
+ Packet2cf res;
+ res.cd[0] = Packet1cd(vec_ld2f((const float *)&from));
+ res.cd[1] = res.cd[0];
+ return res;
+}
+template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
+{
+ std::complex<float> EIGEN_ALIGN16 af[2];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ return pload<Packet2cf>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride EIGEN_UNUSED)
+{
+ return pload<Packet1cd>(from);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
+{
+ std::complex<float> EIGEN_ALIGN16 af[2];
+ pstore<std::complex<float> >((std::complex<float> *) af, from);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride EIGEN_UNUSED)
+{
+ pstore<std::complex<double> >(to, from);
+}
+template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v, b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
+template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v, b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
+template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(Packet4f(a.v))); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
+{
+ Packet2cf res;
+ res.v.v4f[0] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0]))).v;
+ res.v.v4f[1] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1]))).v;
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet2d a_re, a_im, v1, v2;
+ a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
+ a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
+ v1 = vec_madd(a_re, b.v, p2d_ZERO);
+ v2 = vec_madd(a_im, b.v, p2d_ZERO);
+ v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8);
+ v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1);
+ return Packet1cd(v1 + v2);
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ Packet2cf res;
+ res.v.v4f[0] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[0]))).v;
+ res.v.v4f[1] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[1]))).v;
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
+template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
+{
+ std::complex<double> EIGEN_ALIGN16 res;
+ pstore<std::complex<double> >(&res, a);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
+{
+ std::complex<float> EIGEN_ALIGN16 res[2];
+ pstore<std::complex<float> >(res, a);
+ return res[0];
+}
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
+{
+ Packet2cf res;
+ res.cd[0] = a.cd[1];
+ res.cd[1] = a.cd[0];
+ return res;
+}
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
+{
+ std::complex<float> res;
+ Packet1cd b = padd<Packet1cd>(a.cd[0], a.cd[1]);
+ vec_st2f(b.v, (float*)&res);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
+{
+ return vecs[0];
+}
+template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
+{
+ PacketBlock<Packet2cf,2> transpose;
+ transpose.packet[0] = vecs[0];
+ transpose.packet[1] = vecs[1];
+ ptranspose(transpose);
+ return padd<Packet2cf>(transpose.packet[0], transpose.packet[1]);
+}
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
+{
+ std::complex<float> res;
+ Packet1cd b = pmul<Packet1cd>(a.cd[0], a.cd[1]);
+ vec_st2f(b.v, (float*)&res);
+ return res;
+}
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+ static EIGEN_STRONG_INLINE void run(Packet1cd& , const Packet1cd& )
+ {
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet2cf>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
+ {
+ if (Offset == 1) {
+ first.cd[0] = first.cd[1];
+ first.cd[1] = second.cd[0];
+ }
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+ Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
+ return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64)));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ Packet2cf res;
+ res.cd[0] = pdiv<Packet1cd>(a.cd[0], b.cd[0]);
+ res.cd[1] = pdiv<Packet1cd>(a.cd[1], b.cd[1]);
+ return res;
+}
+EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd& x)
+{
+ return Packet1cd(preverse(Packet2d(x.v)));
+}
+EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& x)
+{
+ Packet2cf res;
+ res.cd[0] = pcplxflip(x.cd[0]);
+ res.cd[1] = pcplxflip(x.cd[1]);
+ return res;
+}
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
+{
+ Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
+ kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
+ kernel.packet[0].v = tmp;
+}
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
+{
+ Packet1cd tmp = kernel.packet[0].cd[1];
+ kernel.packet[0].cd[1] = kernel.packet[1].cd[0];
+ kernel.packet[1].cd[0] = tmp;
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
+ Packet2cf result;
+ const Selector<4> ifPacket4 = { ifPacket.select[0], ifPacket.select[0], ifPacket.select[1], ifPacket.select[1] };
+ result.v = pblend<Packet4f>(ifPacket4, thenPacket.v, elsePacket.v);
+ return result;
+}
+}
+}
+#endif
+// end #include "src/Core/arch/ZVector/Complex.h"
+#endif
+#if defined EIGEN_VECTORIZE_CUDA
+// #include "src/Core/arch/CUDA/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_CUDA_H
+#define EIGEN_PACKET_MATH_CUDA_H
+namespace Eigen {
+namespace internal {
+#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
+template<> struct is_arithmetic<float4> { enum { value = true }; };
+template<> struct is_arithmetic<double2> { enum { value = true }; };
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef float4 type;
+ typedef float4 half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasHalfPacket = 0,
+ HasDiv = 1,
+ HasSin = 0,
+ HasCos = 0,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasLGamma = 1,
+ HasDiGamma = 1,
+ HasZeta = 1,
+ HasPolygamma = 1,
+ HasErf = 1,
+ HasErfc = 1,
+ HasIGamma = 1,
+ HasIGammac = 1,
+ HasBetaInc = 1,
+ HasBlend = 0,
+ };
+};
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef double2 type;
+ typedef double2 half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=2,
+ HasHalfPacket = 0,
+ HasDiv = 1,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasLGamma = 1,
+ HasDiGamma = 1,
+ HasZeta = 1,
+ HasPolygamma = 1,
+ HasErf = 1,
+ HasErfc = 1,
+ HasIGamma = 1,
+ HasIGammac = 1,
+ HasBetaInc = 1,
+ HasBlend = 0,
+ };
+};
+template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16}; typedef float4 half; };
+template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16}; typedef double2 half; };
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float& from) {
+ return make_float4(from, from, from, from);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const double& from) {
+ return make_double2(from, from);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
+ return make_float4(a, a+1, a+2, a+3);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset<double2>(const double& a) {
+ return make_double2(a, a+1);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 padd<float4>(const float4& a, const float4& b) {
+ return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 padd<double2>(const double2& a, const double2& b) {
+ return make_double2(a.x+b.x, a.y+b.y);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psub<float4>(const float4& a, const float4& b) {
+ return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 psub<double2>(const double2& a, const double2& b) {
+ return make_double2(a.x-b.x, a.y-b.y);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pnegate(const float4& a) {
+ return make_float4(-a.x, -a.y, -a.z, -a.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pnegate(const double2& a) {
+ return make_double2(-a.x, -a.y);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pconj(const float4& a) { return a; }
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pconj(const double2& a) { return a; }
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmul<float4>(const float4& a, const float4& b) {
+ return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmul<double2>(const double2& a, const double2& b) {
+ return make_double2(a.x*b.x, a.y*b.y);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pdiv<float4>(const float4& a, const float4& b) {
+ return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pdiv<double2>(const double2& a, const double2& b) {
+ return make_double2(a.x/b.x, a.y/b.y);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmin<float4>(const float4& a, const float4& b) {
+ return make_float4(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z), fminf(a.w, b.w));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmin<double2>(const double2& a, const double2& b) {
+ return make_double2(fmin(a.x, b.x), fmin(a.y, b.y));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmax<float4>(const float4& a, const float4& b) {
+ return make_float4(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z), fmaxf(a.w, b.w));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmax<double2>(const double2& a, const double2& b) {
+ return make_double2(fmax(a.x, b.x), fmax(a.y, b.y));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pload<float4>(const float* from) {
+ return *reinterpret_cast<const float4*>(from);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pload<double2>(const double* from) {
+ return *reinterpret_cast<const double2*>(from);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploadu<float4>(const float* from) {
+ return make_float4(from[0], from[1], from[2], from[3]);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu<double2>(const double* from) {
+ return make_double2(from[0], from[1]);
+}
+template<> EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float* from) {
+ return make_float4(from[0], from[0], from[1], from[1]);
+}
+template<> EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double* from) {
+ return make_double2(from[0], from[0]);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<float>(float* to, const float4& from) {
+ *reinterpret_cast<float4*>(to) = from;
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<double>(double* to, const double2& from) {
+ *reinterpret_cast<double2*>(to) = from;
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const float4& from) {
+ to[0] = from.x;
+ to[1] = from.y;
+ to[2] = from.z;
+ to[3] = from.w;
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const double2& from) {
+ to[0] = from.x;
+ to[1] = from.y;
+}
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+ return __ldg((const float4*)from);
+#else
+ return make_float4(from[0], from[1], from[2], from[3]);
+#endif
+}
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+ return __ldg((const double2*)from);
+#else
+ return make_double2(from[0], from[1]);
+#endif
+}
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+ return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3));
+#else
+ return make_float4(from[0], from[1], from[2], from[3]);
+#endif
+}
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+ return make_double2(__ldg(from+0), __ldg(from+1));
+#else
+ return make_double2(from[0], from[1]);
+#endif
+}
+template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
+ return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {
+ return make_double2(from[0*stride], from[1*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {
+ to[stride*0] = from.x;
+ to[stride*1] = from.y;
+ to[stride*2] = from.z;
+ to[stride*3] = from.w;
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {
+ to[stride*0] = from.x;
+ to[stride*1] = from.y;
+}
+template<> EIGEN_DEVICE_FUNC inline float pfirst<float4>(const float4& a) {
+ return a.x;
+}
+template<> EIGEN_DEVICE_FUNC inline double pfirst<double2>(const double2& a) {
+ return a.x;
+}
+template<> EIGEN_DEVICE_FUNC inline float predux<float4>(const float4& a) {
+ return a.x + a.y + a.z + a.w;
+}
+template<> EIGEN_DEVICE_FUNC inline double predux<double2>(const double2& a) {
+ return a.x + a.y;
+}
+template<> EIGEN_DEVICE_FUNC inline float predux_max<float4>(const float4& a) {
+ return fmaxf(fmaxf(a.x, a.y), fmaxf(a.z, a.w));
+}
+template<> EIGEN_DEVICE_FUNC inline double predux_max<double2>(const double2& a) {
+ return fmax(a.x, a.y);
+}
+template<> EIGEN_DEVICE_FUNC inline float predux_min<float4>(const float4& a) {
+ return fminf(fminf(a.x, a.y), fminf(a.z, a.w));
+}
+template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a) {
+ return fmin(a.x, a.y);
+}
+template<> EIGEN_DEVICE_FUNC inline float predux_mul<float4>(const float4& a) {
+ return a.x * a.y * a.z * a.w;
+}
+template<> EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a) {
+ return a.x * a.y;
+}
+template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
+ return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
+}
+template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
+ return make_double2(fabs(a.x), fabs(a.y));
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<float4,4>& kernel) {
+ float tmp = kernel.packet[0].y;
+ kernel.packet[0].y = kernel.packet[1].x;
+ kernel.packet[1].x = tmp;
+ tmp = kernel.packet[0].z;
+ kernel.packet[0].z = kernel.packet[2].x;
+ kernel.packet[2].x = tmp;
+ tmp = kernel.packet[0].w;
+ kernel.packet[0].w = kernel.packet[3].x;
+ kernel.packet[3].x = tmp;
+ tmp = kernel.packet[1].z;
+ kernel.packet[1].z = kernel.packet[2].y;
+ kernel.packet[2].y = tmp;
+ tmp = kernel.packet[1].w;
+ kernel.packet[1].w = kernel.packet[3].y;
+ kernel.packet[3].y = tmp;
+ tmp = kernel.packet[2].w;
+ kernel.packet[2].w = kernel.packet[3].z;
+ kernel.packet[3].z = tmp;
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<double2,2>& kernel) {
+ double tmp = kernel.packet[0].y;
+ kernel.packet[0].y = kernel.packet[1].x;
+ kernel.packet[1].x = tmp;
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/CUDA/PacketMath.h"
+// #include "src/Core/arch/CUDA/MathFunctions.h"
+#ifndef EIGEN_MATH_FUNCTIONS_CUDA_H
+#define EIGEN_MATH_FUNCTIONS_CUDA_H
+namespace Eigen {
+namespace internal {
+#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 plog<float4>(const float4& a)
+{
+ return make_float4(logf(a.x), logf(a.y), logf(a.z), logf(a.w));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 plog<double2>(const double2& a)
+{
+ using ::log;
+ return make_double2(log(a.x), log(a.y));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 plog1p<float4>(const float4& a)
+{
+ return make_float4(log1pf(a.x), log1pf(a.y), log1pf(a.z), log1pf(a.w));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 plog1p<double2>(const double2& a)
+{
+ return make_double2(log1p(a.x), log1p(a.y));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pexp<float4>(const float4& a)
+{
+ return make_float4(expf(a.x), expf(a.y), expf(a.z), expf(a.w));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pexp<double2>(const double2& a)
+{
+ using ::exp;
+ return make_double2(exp(a.x), exp(a.y));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 psqrt<float4>(const float4& a)
+{
+ return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 psqrt<double2>(const double2& a)
+{
+ using ::sqrt;
+ return make_double2(sqrt(a.x), sqrt(a.y));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 prsqrt<float4>(const float4& a)
+{
+ return make_float4(rsqrtf(a.x), rsqrtf(a.y), rsqrtf(a.z), rsqrtf(a.w));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 prsqrt<double2>(const double2& a)
+{
+ return make_double2(rsqrt(a.x), rsqrt(a.y));
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/CUDA/MathFunctions.h"
+#endif
+// #include "src/Core/arch/Default/Settings.h"
+#ifndef EIGEN_DEFAULT_SETTINGS_H
+#define EIGEN_DEFAULT_SETTINGS_H
+#ifndef EIGEN_UNROLLING_LIMIT
+#define EIGEN_UNROLLING_LIMIT 100
+#endif
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+#ifndef EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH
+#define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8
+#endif
+#endif
+// end #include "src/Core/arch/Default/Settings.h"
+// #include "src/Core/functors/BinaryFunctors.h"
+#ifndef EIGEN_BINARY_FUNCTORS_H
+#define EIGEN_BINARY_FUNCTORS_H
+namespace Eigen {
+namespace internal {
+template<typename Arg1, typename Arg2>
+struct binary_op_base
+{
+ typedef Arg1 first_argument_type;
+ typedef Arg2 second_argument_type;
+};
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_sum_op : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_sum_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
+#else
+ scalar_sum_op() {
+ EIGEN_SCALAR_BINARY_OP_PLUGIN
+ }
+#endif
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::padd(a,b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
+ { return internal::predux(a); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_sum_op<LhsScalar,RhsScalar> > {
+ enum {
+ Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
+ PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasAdd && packet_traits<RhsScalar>::HasAdd
+ };
+};
+template<> struct scalar_sum_op<bool,bool> : scalar_sum_op<int,int> {
+ EIGEN_DEPRECATED
+ scalar_sum_op() {}
+};
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_product_op : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_product_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
+#else
+ scalar_product_op() {
+ EIGEN_SCALAR_BINARY_OP_PLUGIN
+ }
+#endif
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::pmul(a,b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
+ { return internal::predux_mul(a); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
+ enum {
+ Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2,
+ PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul
+ };
+};
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_conj_product_op : binary_op_base<LhsScalar,RhsScalar>
+{
+ enum {
+ Conj = NumTraits<LhsScalar>::IsComplex
+ };
+ typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_conj_product_op>::ReturnType result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const
+ { return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
+ enum {
+ Cost = NumTraits<LhsScalar>::MulCost,
+ PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMul
+ };
+};
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_min_op : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_min_op>::ReturnType result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::mini(a, b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::pmin(a,b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
+ { return internal::predux_min(a); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_min_op<LhsScalar,RhsScalar> > {
+ enum {
+ Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
+ PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMin
+ };
+};
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_max_op : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_max_op>::ReturnType result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::maxi(a, b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::pmax(a,b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
+ { return internal::predux_max(a); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_max_op<LhsScalar,RhsScalar> > {
+ enum {
+ Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
+ PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMax
+ };
+};
+template<typename LhsScalar, typename RhsScalar, ComparisonName cmp> struct scalar_cmp_op;
+template<typename LhsScalar, typename RhsScalar, ComparisonName cmp>
+struct functor_traits<scalar_cmp_op<LhsScalar,RhsScalar, cmp> > {
+ enum {
+ Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
+ PacketAccess = false
+ };
+};
+template<ComparisonName Cmp, typename LhsScalar, typename RhsScalar>
+struct result_of<scalar_cmp_op<LhsScalar, RhsScalar, Cmp>(LhsScalar,RhsScalar)> {
+ typedef bool type;
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_EQ> : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef bool result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a==b;}
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LT> : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef bool result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<b;}
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LE> : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef bool result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<=b;}
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GT> : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef bool result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>b;}
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GE> : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef bool result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>=b;}
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_UNORD> : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef bool result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return !(a<=b || b<=a);}
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef bool result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a!=b;}
+};
+template<typename Scalar>
+struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
+{
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
+ {
+ EIGEN_USING_STD_MATH(sqrt)
+ Scalar p, qp;
+ if(_x>_y)
+ {
+ p = _x;
+ qp = _y / p;
+ }
+ else
+ {
+ p = _y;
+ qp = _x / p;
+ }
+ return p * sqrt(Scalar(1) + qp*qp);
+ }
+};
+template<typename Scalar>
+struct functor_traits<scalar_hypot_op<Scalar,Scalar> > {
+ enum
+ {
+ Cost = 3 * NumTraits<Scalar>::AddCost +
+ 2 * NumTraits<Scalar>::MulCost +
+ 2 * scalar_div_cost<Scalar,false>::value,
+ PacketAccess = false
+ };
+};
+template<typename Scalar, typename Exponent>
+struct scalar_pow_op : binary_op_base<Scalar,Exponent>
+{
+ typedef typename ScalarBinaryOpTraits<Scalar,Exponent,scalar_pow_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_pow_op)
+#else
+ scalar_pow_op() {
+ typedef Scalar LhsScalar;
+ typedef Exponent RhsScalar;
+ EIGEN_SCALAR_BINARY_OP_PLUGIN
+ }
+#endif
+ EIGEN_DEVICE_FUNC
+ inline result_type operator() (const Scalar& a, const Exponent& b) const { return numext::pow(a, b); }
+};
+template<typename Scalar, typename Exponent>
+struct functor_traits<scalar_pow_op<Scalar,Exponent> > {
+ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
+};
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_difference_op : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_difference_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
+#else
+ scalar_difference_op() {
+ EIGEN_SCALAR_BINARY_OP_PLUGIN
+ }
+#endif
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a - b; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::psub(a,b); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_difference_op<LhsScalar,RhsScalar> > {
+ enum {
+ Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
+ PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasSub && packet_traits<RhsScalar>::HasSub
+ };
+};
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_quotient_op : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_quotient_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
+#else
+ scalar_quotient_op() {
+ EIGEN_SCALAR_BINARY_OP_PLUGIN
+ }
+#endif
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::pdiv(a,b); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
+ typedef typename scalar_quotient_op<LhsScalar,RhsScalar>::result_type result_type;
+ enum {
+ PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv,
+ Cost = scalar_div_cost<result_type,PacketAccess>::value
+ };
+};
+struct scalar_boolean_and_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
+};
+template<> struct functor_traits<scalar_boolean_and_op> {
+ enum {
+ Cost = NumTraits<bool>::AddCost,
+ PacketAccess = false
+ };
+};
+struct scalar_boolean_or_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
+};
+template<> struct functor_traits<scalar_boolean_or_op> {
+ enum {
+ Cost = NumTraits<bool>::AddCost,
+ PacketAccess = false
+ };
+};
+struct scalar_boolean_xor_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_xor_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a ^ b; }
+};
+template<> struct functor_traits<scalar_boolean_xor_op> {
+ enum {
+ Cost = NumTraits<bool>::AddCost,
+ PacketAccess = false
+ };
+};
+template<typename BinaryOp> struct bind1st_op : BinaryOp {
+ typedef typename BinaryOp::first_argument_type first_argument_type;
+ typedef typename BinaryOp::second_argument_type second_argument_type;
+ typedef typename BinaryOp::result_type result_type;
+ bind1st_op(const first_argument_type &val) : m_value(val) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const second_argument_type& b) const { return BinaryOp::operator()(m_value,b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& b) const
+ { return BinaryOp::packetOp(internal::pset1<Packet>(m_value), b); }
+ first_argument_type m_value;
+};
+template<typename BinaryOp> struct functor_traits<bind1st_op<BinaryOp> > : functor_traits<BinaryOp> {};
+template<typename BinaryOp> struct bind2nd_op : BinaryOp {
+ typedef typename BinaryOp::first_argument_type first_argument_type;
+ typedef typename BinaryOp::second_argument_type second_argument_type;
+ typedef typename BinaryOp::result_type result_type;
+ bind2nd_op(const second_argument_type &val) : m_value(val) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const first_argument_type& a) const { return BinaryOp::operator()(a,m_value); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return BinaryOp::packetOp(a,internal::pset1<Packet>(m_value)); }
+ second_argument_type m_value;
+};
+template<typename BinaryOp> struct functor_traits<bind2nd_op<BinaryOp> > : functor_traits<BinaryOp> {};
+}
+}
+#endif
+// end #include "src/Core/functors/BinaryFunctors.h"
+// #include "src/Core/functors/UnaryFunctors.h"
+#ifndef EIGEN_UNARY_FUNCTORS_H
+#define EIGEN_UNARY_FUNCTORS_H
+namespace Eigen {
+namespace internal {
+template<typename Scalar> struct scalar_opposite_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return internal::pnegate(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_opposite_op<Scalar> >
+{ enum {
+ Cost = NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasNegate };
+};
+template<typename Scalar> struct scalar_abs_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs(a); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return internal::pabs(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_abs_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasAbs
+ };
+};
+template<typename Scalar> struct scalar_score_coeff_op : scalar_abs_op<Scalar>
+{
+ typedef void Score_is_abs;
+};
+template<typename Scalar>
+struct functor_traits<scalar_score_coeff_op<Scalar> > : functor_traits<scalar_abs_op<Scalar> > {};
+template<typename Scalar, typename=void> struct abs_knowing_score
+{
+ EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ template<typename Score>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a, const Score&) const { return numext::abs(a); }
+};
+template<typename Scalar> struct abs_knowing_score<Scalar, typename scalar_score_coeff_op<Scalar>::Score_is_abs>
+{
+ EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ template<typename Scal>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scal&, const result_type& a) const { return a; }
+};
+template<typename Scalar> struct scalar_abs2_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return internal::pmul(a,a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_abs2_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasAbs2 }; };
+template<typename Scalar> struct scalar_conjugate_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op)
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_conjugate_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0,
+ PacketAccess = packet_traits<Scalar>::HasConj
+ };
+};
+template<typename Scalar> struct scalar_arg_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_arg_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using numext::arg; return arg(a); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return internal::parg(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_arg_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::IsComplex ? 5 * NumTraits<Scalar>::MulCost : NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasArg
+ };
+};
+template<typename Scalar, typename NewType>
+struct scalar_cast_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
+ typedef NewType result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast<Scalar, NewType>(a); }
+};
+template<typename Scalar, typename NewType>
+struct functor_traits<scalar_cast_op<Scalar,NewType> >
+{ enum { Cost = is_same<Scalar, NewType>::value ? 0 : NumTraits<NewType>::AddCost, PacketAccess = false }; };
+template<typename Scalar>
+struct scalar_real_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::real(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_real_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+template<typename Scalar>
+struct scalar_imag_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::imag(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_imag_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+template<typename Scalar>
+struct scalar_real_ref_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::real_ref(*const_cast<Scalar*>(&a)); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_real_ref_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+template<typename Scalar>
+struct scalar_imag_ref_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::imag_ref(*const_cast<Scalar*>(&a)); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_imag_ref_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+template<typename Scalar> struct scalar_exp_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::exp(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
+};
+template <typename Scalar>
+struct functor_traits<scalar_exp_op<Scalar> > {
+ enum {
+ PacketAccess = packet_traits<Scalar>::HasExp,
+#ifdef EIGEN_VECTORIZE_FMA
+ Cost =
+ (sizeof(Scalar) == 4
+ ? (8 * NumTraits<Scalar>::AddCost + 6 * NumTraits<Scalar>::MulCost)
+ : (14 * NumTraits<Scalar>::AddCost +
+ 6 * NumTraits<Scalar>::MulCost +
+ scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))
+#else
+ Cost =
+ (sizeof(Scalar) == 4
+ ? (21 * NumTraits<Scalar>::AddCost + 13 * NumTraits<Scalar>::MulCost)
+ : (23 * NumTraits<Scalar>::AddCost +
+ 12 * NumTraits<Scalar>::MulCost +
+ scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))
+#endif
+ };
+};
+template<typename Scalar> struct scalar_log_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::log(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
+};
+template <typename Scalar>
+struct functor_traits<scalar_log_op<Scalar> > {
+ enum {
+ PacketAccess = packet_traits<Scalar>::HasLog,
+ Cost =
+ (PacketAccess
+#ifdef EIGEN_VECTORIZE_FMA
+ ? (20 * NumTraits<Scalar>::AddCost + 7 * NumTraits<Scalar>::MulCost)
+#else
+ ? (36 * NumTraits<Scalar>::AddCost + 14 * NumTraits<Scalar>::MulCost)
+#endif
+ : sizeof(Scalar)==4 ? 40 : 85)
+ };
+};
+template<typename Scalar> struct scalar_log1p_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_log1p_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::log1p(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog1p(a); }
+};
+template <typename Scalar>
+struct functor_traits<scalar_log1p_op<Scalar> > {
+ enum {
+ PacketAccess = packet_traits<Scalar>::HasLog1p,
+ Cost = functor_traits<scalar_log_op<Scalar> >::Cost
+ };
+};
+template<typename Scalar> struct scalar_log10_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_log10_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { EIGEN_USING_STD_MATH(log10) return log10(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog10(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_log10_op<Scalar> >
+{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog10 }; };
+template<typename Scalar> struct scalar_sqrt_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sqrt(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
+};
+template <typename Scalar>
+struct functor_traits<scalar_sqrt_op<Scalar> > {
+ enum {
+#if EIGEN_FAST_MATH
+ Cost = (sizeof(Scalar) == 8 ? 28
+ : (3 * NumTraits<Scalar>::AddCost +
+ 5 * NumTraits<Scalar>::MulCost)),
+#else
+ Cost = (sizeof(Scalar) == 8 ? 28 : 14),
+#endif
+ PacketAccess = packet_traits<Scalar>::HasSqrt
+ };
+};
+template<typename Scalar> struct scalar_rsqrt_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_rsqrt_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return Scalar(1)/numext::sqrt(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_rsqrt_op<Scalar> >
+{ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasRsqrt
+ };
+};
+template<typename Scalar> struct scalar_cos_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return numext::cos(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_cos_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasCos
+ };
+};
+template<typename Scalar> struct scalar_sin_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sin(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_sin_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasSin
+ };
+};
+template<typename Scalar> struct scalar_tan_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::tan(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_tan_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasTan
+ };
+};
+template<typename Scalar> struct scalar_acos_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::acos(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_acos_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasACos
+ };
+};
+template<typename Scalar> struct scalar_asin_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::asin(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_asin_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasASin
+ };
+};
+template<typename Scalar> struct scalar_atan_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_atan_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::atan(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::patan(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_atan_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasATan
+ };
+};
+template <typename Scalar>
+struct scalar_tanh_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::tanh(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { return ptanh(x); }
+};
+template <typename Scalar>
+struct functor_traits<scalar_tanh_op<Scalar> > {
+ enum {
+ PacketAccess = packet_traits<Scalar>::HasTanh,
+ Cost = ( (EIGEN_FAST_MATH && is_same<Scalar,float>::value)
+#ifdef EIGEN_VECTORIZE_FMA
+ ? (2 * NumTraits<Scalar>::AddCost +
+ 6 * NumTraits<Scalar>::MulCost +
+ scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)
+#else
+ ? (11 * NumTraits<Scalar>::AddCost +
+ 11 * NumTraits<Scalar>::MulCost +
+ scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)
+#endif
+ : (6 * NumTraits<Scalar>::AddCost +
+ 3 * NumTraits<Scalar>::MulCost +
+ 2 * scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value +
+ functor_traits<scalar_exp_op<Scalar> >::Cost))
+ };
+};
+template<typename Scalar> struct scalar_sinh_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sinh_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sinh(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psinh(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_sinh_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasSinh
+ };
+};
+template<typename Scalar> struct scalar_cosh_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cosh_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::cosh(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcosh(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_cosh_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasCosh
+ };
+};
+template<typename Scalar>
+struct scalar_inverse_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op)
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
+ { return internal::pdiv(pset1<Packet>(Scalar(1)),a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_inverse_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
+template<typename Scalar>
+struct scalar_square_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op)
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
+ { return internal::pmul(a,a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_square_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
+template<typename Scalar>
+struct scalar_cube_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op)
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a*a; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
+ { return internal::pmul(a,pmul(a,a)); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_cube_op<Scalar> >
+{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
+template<typename Scalar> struct scalar_round_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_round_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::round(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pround(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_round_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasRound
+ };
+};
+template<typename Scalar> struct scalar_floor_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_floor_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::floor(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pfloor(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_floor_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasFloor
+ };
+};
+template<typename Scalar> struct scalar_ceil_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_ceil_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::ceil(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pceil(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_ceil_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasCeil
+ };
+};
+template<typename Scalar> struct scalar_isnan_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_isnan_op)
+ typedef bool result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isnan)(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_isnan_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::MulCost,
+ PacketAccess = false
+ };
+};
+template<typename Scalar> struct scalar_isinf_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_isinf_op)
+ typedef bool result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isinf)(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_isinf_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::MulCost,
+ PacketAccess = false
+ };
+};
+template<typename Scalar> struct scalar_isfinite_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_isfinite_op)
+ typedef bool result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isfinite)(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_isfinite_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::MulCost,
+ PacketAccess = false
+ };
+};
+template<typename Scalar> struct scalar_boolean_not_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_not_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a) const { return !a; }
+};
+template<typename Scalar>
+struct functor_traits<scalar_boolean_not_op<Scalar> > {
+ enum {
+ Cost = NumTraits<bool>::AddCost,
+ PacketAccess = false
+ };
+};
+template<typename Scalar,bool iscpx=(NumTraits<Scalar>::IsComplex!=0) > struct scalar_sign_op;
+template<typename Scalar>
+struct scalar_sign_op<Scalar,false> {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
+ {
+ return Scalar( (a>Scalar(0)) - (a<Scalar(0)) );
+ }
+};
+template<typename Scalar>
+struct scalar_sign_op<Scalar,true> {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
+ {
+ typedef typename NumTraits<Scalar>::Real real_type;
+ real_type aa = numext::abs(a);
+ if (aa==real_type(0))
+ return Scalar(0);
+ aa = real_type(1)/aa;
+ return Scalar(real(a)*aa, imag(a)*aa );
+ }
+};
+template<typename Scalar>
+struct functor_traits<scalar_sign_op<Scalar> >
+{ enum {
+ Cost =
+ NumTraits<Scalar>::IsComplex
+ ? ( 8*NumTraits<Scalar>::MulCost )
+ : ( 3*NumTraits<Scalar>::AddCost),
+ PacketAccess = packet_traits<Scalar>::HasSign
+ };
+};
+
+template <typename T>
+struct scalar_logistic_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const {
+ const T one = T(1);
+ return one / (one + numext::exp(-x));
+ }
+
+ template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Packet packetOp(const Packet& x) const {
+ const Packet one = pset1<Packet>(T(1));
+ return pdiv(one, padd(one, pexp(pnegate(x))));
+ }
+};
+template <typename T>
+struct functor_traits<scalar_logistic_op<T> > {
+ enum {
+ Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6,
+ PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv &&
+ packet_traits<T>::HasNegate && packet_traits<T>::HasExp
+ };
+};
+
+template <>
+struct scalar_logistic_op<float> {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator()(const float& x) const {
+ const float one = 1.0f;
+ return one / (one + numext::exp(-x));
+ }
+
+ template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Packet packetOp(const Packet& _x) const {
+ // Clamp the inputs to the range [-18, 18] since anything outside
+ // this range is 0.0f or 1.0f in single-precision.
+ const Packet x = pmax(pmin(_x, pset1<Packet>(18.0)), pset1<Packet>(-18.0));
+
+ // The monomial coefficients of the numerator polynomial (odd).
+ const Packet alpha_1 = pset1<Packet>(2.48287947061529e-01);
+ const Packet alpha_3 = pset1<Packet>(8.51377133304701e-03);
+ const Packet alpha_5 = pset1<Packet>(6.08574864600143e-05);
+ const Packet alpha_7 = pset1<Packet>(1.15627324459942e-07);
+ const Packet alpha_9 = pset1<Packet>(4.37031012579801e-11);
+
+ // The monomial coefficients of the denominator polynomial (even).
+ const Packet beta_0 = pset1<Packet>(9.93151921023180e-01);
+ const Packet beta_2 = pset1<Packet>(1.16817656904453e-01);
+ const Packet beta_4 = pset1<Packet>(1.70198817374094e-03);
+ const Packet beta_6 = pset1<Packet>(6.29106785017040e-06);
+ const Packet beta_8 = pset1<Packet>(5.76102136993427e-09);
+ const Packet beta_10 = pset1<Packet>(6.10247389755681e-13);
+
+ // Since the polynomials are odd/even, we need x^2.
+ const Packet x2 = pmul(x, x);
+
+ // Evaluate the numerator polynomial p.
+ Packet p = pmadd(x2, alpha_9, alpha_7);
+ p = pmadd(x2, p, alpha_5);
+ p = pmadd(x2, p, alpha_3);
+ p = pmadd(x2, p, alpha_1);
+ p = pmul(x, p);
+
+ // Evaluate the denominator polynomial p.
+ Packet q = pmadd(x2, beta_10, beta_8);
+ q = pmadd(x2, q, beta_6);
+ q = pmadd(x2, q, beta_4);
+ q = pmadd(x2, q, beta_2);
+ q = pmadd(x2, q, beta_0);
+
+ // Divide the numerator by the denominator and shift it up.
+ return pmax(pmin(padd(pdiv(p, q), pset1<Packet>(0.5)), pset1<Packet>(1.0)),
+ pset1<Packet>(0.0));
+ }
+};
+}
+}
+#endif
+// end #include "src/Core/functors/UnaryFunctors.h"
+// #include "src/Core/functors/NullaryFunctors.h"
+#ifndef EIGEN_NULLARY_FUNCTORS_H
+#define EIGEN_NULLARY_FUNCTORS_H
+namespace Eigen {
+namespace internal {
+template<typename Scalar>
+struct scalar_constant_op {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() () const { return m_other; }
+ template<typename PacketType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetOp() const { return internal::pset1<PacketType>(m_other); }
+ const Scalar m_other;
+};
+template<typename Scalar>
+struct functor_traits<scalar_constant_op<Scalar> >
+{ enum { Cost = 0 ,
+ PacketAccess = packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };
+template<typename Scalar> struct scalar_identity_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op)
+ template<typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType row, IndexType col) const { return row==col ? Scalar(1) : Scalar(0); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_identity_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
+template <typename Scalar, typename Packet, bool IsInteger> struct linspaced_op_impl;
+template <typename Scalar, typename Packet>
+struct linspaced_op_impl<Scalar,Packet,false>
+{
+ linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
+ m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
+ m_flip(numext::abs(high)<numext::abs(low))
+ {}
+ template<typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const {
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ if(m_flip)
+ return (i==0)? m_low : (m_high - RealScalar(m_size1-i)*m_step);
+ else
+ return (i==m_size1)? m_high : (m_low + RealScalar(i)*m_step);
+ }
+ template<typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const
+ {
+ if(m_flip)
+ {
+ Packet pi = plset<Packet>(Scalar(i-m_size1));
+ Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi));
+ if(i==0)
+ res = pinsertfirst(res, m_low);
+ return res;
+ }
+ else
+ {
+ Packet pi = plset<Packet>(Scalar(i));
+ Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi));
+ if(i==m_size1-unpacket_traits<Packet>::size+1)
+ res = pinsertlast(res, m_high);
+ return res;
+ }
+ }
+ const Scalar m_low;
+ const Scalar m_high;
+ const Index m_size1;
+ const Scalar m_step;
+ const bool m_flip;
+};
+template <typename Scalar, typename Packet>
+struct linspaced_op_impl<Scalar,Packet,true>
+{
+ linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
+ m_low(low),
+ m_multiplier((high-low)/convert_index<Scalar>(num_steps<=1 ? 1 : num_steps-1)),
+ m_divisor(convert_index<Scalar>((high>=low?num_steps:-num_steps)+(high-low))/((numext::abs(high-low)+1)==0?1:(numext::abs(high-low)+1))),
+ m_use_divisor(num_steps>1 && (numext::abs(high-low)+1)<num_steps)
+ {}
+ template<typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const Scalar operator() (IndexType i) const
+ {
+ if(m_use_divisor) return m_low + convert_index<Scalar>(i)/m_divisor;
+ else return m_low + convert_index<Scalar>(i)*m_multiplier;
+ }
+ const Scalar m_low;
+ const Scalar m_multiplier;
+ const Scalar m_divisor;
+ const bool m_use_divisor;
+};
+template <typename Scalar, typename PacketType> struct linspaced_op;
+template <typename Scalar, typename PacketType> struct functor_traits< linspaced_op<Scalar,PacketType> >
+{
+ enum
+ {
+ Cost = 1,
+ PacketAccess = (!NumTraits<Scalar>::IsInteger) && packet_traits<Scalar>::HasSetLinear && packet_traits<Scalar>::HasBlend,
+ IsRepeatable = true
+ };
+};
+template <typename Scalar, typename PacketType> struct linspaced_op
+{
+ linspaced_op(const Scalar& low, const Scalar& high, Index num_steps)
+ : impl((num_steps==1 ? high : low),high,num_steps)
+ {}
+ template<typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { return impl(i); }
+ template<typename Packet,typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const { return impl.packetOp(i); }
+ const linspaced_op_impl<Scalar,PacketType,NumTraits<Scalar>::IsInteger> impl;
+};
+template<typename Functor> struct functor_has_linear_access { enum { ret = !has_binary_operator<Functor>::value }; };
+#if !( (EIGEN_COMP_MSVC>1600) || (EIGEN_GNUC_AT_LEAST(4,8)) || (EIGEN_COMP_ICC>=1600))
+template<typename Scalar,typename IndexType>
+struct has_nullary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 1}; };
+template<typename Scalar,typename IndexType>
+struct has_unary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_binary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_nullary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_unary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_binary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 1}; };
+template<typename Scalar, typename PacketType,typename IndexType>
+struct has_nullary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 0}; };
+template<typename Scalar, typename PacketType,typename IndexType>
+struct has_unary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 1}; };
+template<typename Scalar, typename PacketType,typename IndexType>
+struct has_binary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_nullary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 1}; };
+template<typename Scalar,typename IndexType>
+struct has_unary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_binary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 0}; };
+#endif
+}
+}
+#endif
+// end #include "src/Core/functors/NullaryFunctors.h"
+// #include "src/Core/functors/AssignmentFunctors.h"
+#ifndef EIGEN_ASSIGNMENT_FUNCTORS_H
+#define EIGEN_ASSIGNMENT_FUNCTORS_H
+namespace Eigen {
+namespace internal {
+template<typename DstScalar,typename SrcScalar> struct assign_op {
+ EIGEN_EMPTY_STRUCT_CTOR(assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; }
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+ { internal::pstoret<DstScalar,Packet,Alignment>(a,b); }
+};
+template<typename DstScalar> struct assign_op<DstScalar,void> {};
+template<typename DstScalar,typename SrcScalar>
+struct functor_traits<assign_op<DstScalar,SrcScalar> > {
+ enum {
+ Cost = NumTraits<DstScalar>::ReadCost,
+ PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::Vectorizable && packet_traits<SrcScalar>::Vectorizable
+ };
+};
+template<typename DstScalar,typename SrcScalar> struct add_assign_op {
+ EIGEN_EMPTY_STRUCT_CTOR(add_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a += b; }
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+ { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::padd(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename DstScalar,typename SrcScalar>
+struct functor_traits<add_assign_op<DstScalar,SrcScalar> > {
+ enum {
+ Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::AddCost,
+ PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasAdd
+ };
+};
+template<typename DstScalar,typename SrcScalar> struct sub_assign_op {
+ EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a -= b; }
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+ { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::psub(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename DstScalar,typename SrcScalar>
+struct functor_traits<sub_assign_op<DstScalar,SrcScalar> > {
+ enum {
+ Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::AddCost,
+ PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasSub
+ };
+};
+template<typename DstScalar, typename SrcScalar=DstScalar>
+struct mul_assign_op {
+ EIGEN_EMPTY_STRUCT_CTOR(mul_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a *= b; }
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+ { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename DstScalar, typename SrcScalar>
+struct functor_traits<mul_assign_op<DstScalar,SrcScalar> > {
+ enum {
+ Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::MulCost,
+ PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasMul
+ };
+};
+template<typename DstScalar, typename SrcScalar=DstScalar> struct div_assign_op {
+ EIGEN_EMPTY_STRUCT_CTOR(div_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a /= b; }
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+ { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pdiv(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename DstScalar, typename SrcScalar>
+struct functor_traits<div_assign_op<DstScalar,SrcScalar> > {
+ enum {
+ Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::MulCost,
+ PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasDiv
+ };
+};
+template<typename Scalar> struct swap_assign_op {
+ EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const
+ {
+#ifdef __CUDACC__
+ Scalar t=b; const_cast<Scalar&>(b)=a; a=t;
+#else
+ using std::swap;
+ swap(a,const_cast<Scalar&>(b));
+#endif
+ }
+};
+template<typename Scalar>
+struct functor_traits<swap_assign_op<Scalar> > {
+ enum {
+ Cost = 3 * NumTraits<Scalar>::ReadCost,
+ PacketAccess = packet_traits<Scalar>::Vectorizable
+ };
+};
+}
+}
+#endif
+// end #include "src/Core/functors/AssignmentFunctors.h"
+// #include "src/Core/DenseCoeffsBase.h"
+#ifndef EIGEN_DENSECOEFFSBASE_H
+#define EIGEN_DENSECOEFFSBASE_H
+namespace Eigen {
+namespace internal {
+template<typename T> struct add_const_on_value_type_if_arithmetic
+{
+ typedef typename conditional<is_arithmetic<T>::value, T, typename add_const_on_value_type<T>::type>::type type;
+};
+}
+template<typename Derived>
+class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
+{
+ public:
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+ typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),
+ const Scalar&,
+ typename internal::conditional<internal::is_arithmetic<Scalar>::value, Scalar, const Scalar>::type
+ >::type CoeffReturnType;
+ typedef typename internal::add_const_on_value_type_if_arithmetic<
+ typename internal::packet_traits<Scalar>::type
+ >::type PacketReturnType;
+ typedef EigenBase<Derived> Base;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::derived;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const
+ {
+ return int(Derived::RowsAtCompileTime) == 1 ? 0
+ : int(Derived::ColsAtCompileTime) == 1 ? inner
+ : int(Derived::Flags)&RowMajorBit ? outer
+ : inner;
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const
+ {
+ return int(Derived::ColsAtCompileTime) == 1 ? 0
+ : int(Derived::RowsAtCompileTime) == 1 ? inner
+ : int(Derived::Flags)&RowMajorBit ? inner
+ : outer;
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
+ {
+ eigen_internal_assert(row >= 0 && row < rows()
+ && col >= 0 && col < cols());
+ return internal::evaluator<Derived>(derived()).coeff(row,col);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
+ {
+ return coeff(rowIndexByOuterInner(outer, inner),
+ colIndexByOuterInner(outer, inner));
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const
+ {
+ eigen_assert(row >= 0 && row < rows()
+ && col >= 0 && col < cols());
+ return coeff(row, col);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ coeff(Index index) const
+ {
+ EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
+ THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
+ eigen_internal_assert(index >= 0 && index < size());
+ return internal::evaluator<Derived>(derived()).coeff(index);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ operator[](Index index) const
+ {
+ EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
+ THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
+ eigen_assert(index >= 0 && index < size());
+ return coeff(index);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ operator()(Index index) const
+ {
+ eigen_assert(index >= 0 && index < size());
+ return coeff(index);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ x() const { return (*this)[0]; }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ y() const
+ {
+ EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
+ return (*this)[1];
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ z() const
+ {
+ EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
+ return (*this)[2];
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ w() const
+ {
+ EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
+ return (*this)[3];
+ }
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const
+ {
+ typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
+ eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
+ return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(row,col);
+ }
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketReturnType packetByOuterInner(Index outer, Index inner) const
+ {
+ return packet<LoadMode>(rowIndexByOuterInner(outer, inner),
+ colIndexByOuterInner(outer, inner));
+ }
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
+ {
+ EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
+ THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
+ typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
+ eigen_internal_assert(index >= 0 && index < size());
+ return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(index);
+ }
+ protected:
+ void coeffRef();
+ void coeffRefByOuterInner();
+ void writePacket();
+ void writePacketByOuterInner();
+ void copyCoeff();
+ void copyCoeffByOuterInner();
+ void copyPacket();
+ void copyPacketByOuterInner();
+ void stride();
+ void innerStride();
+ void outerStride();
+ void rowStride();
+ void colStride();
+};
+template<typename Derived>
+class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
+{
+ public:
+ typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ using Base::coeff;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::derived;
+ using Base::rowIndexByOuterInner;
+ using Base::colIndexByOuterInner;
+ using Base::operator[];
+ using Base::operator();
+ using Base::x;
+ using Base::y;
+ using Base::z;
+ using Base::w;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
+ {
+ eigen_internal_assert(row >= 0 && row < rows()
+ && col >= 0 && col < cols());
+ return internal::evaluator<Derived>(derived()).coeffRef(row,col);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ coeffRefByOuterInner(Index outer, Index inner)
+ {
+ return coeffRef(rowIndexByOuterInner(outer, inner),
+ colIndexByOuterInner(outer, inner));
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ operator()(Index row, Index col)
+ {
+ eigen_assert(row >= 0 && row < rows()
+ && col >= 0 && col < cols());
+ return coeffRef(row, col);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ coeffRef(Index index)
+ {
+ EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
+ THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
+ eigen_internal_assert(index >= 0 && index < size());
+ return internal::evaluator<Derived>(derived()).coeffRef(index);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ operator[](Index index)
+ {
+ EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
+ THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
+ eigen_assert(index >= 0 && index < size());
+ return coeffRef(index);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ operator()(Index index)
+ {
+ eigen_assert(index >= 0 && index < size());
+ return coeffRef(index);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ x() { return (*this)[0]; }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ y()
+ {
+ EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
+ return (*this)[1];
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ z()
+ {
+ EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
+ return (*this)[2];
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ w()
+ {
+ EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
+ return (*this)[3];
+ }
+};
+template<typename Derived>
+class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
+{
+ public:
+ typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::derived;
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const
+ {
+ return derived().innerStride();
+ }
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const
+ {
+ return derived().outerStride();
+ }
+ inline Index stride() const
+ {
+ return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
+ }
+ EIGEN_DEVICE_FUNC
+ inline Index rowStride() const
+ {
+ return Derived::IsRowMajor ? outerStride() : innerStride();
+ }
+ EIGEN_DEVICE_FUNC
+ inline Index colStride() const
+ {
+ return Derived::IsRowMajor ? innerStride() : outerStride();
+ }
+};
+template<typename Derived>
+class DenseCoeffsBase<Derived, DirectWriteAccessors>
+ : public DenseCoeffsBase<Derived, WriteAccessors>
+{
+ public:
+ typedef DenseCoeffsBase<Derived, WriteAccessors> Base;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::derived;
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const
+ {
+ return derived().innerStride();
+ }
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const
+ {
+ return derived().outerStride();
+ }
+ inline Index stride() const
+ {
+ return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
+ }
+ EIGEN_DEVICE_FUNC
+ inline Index rowStride() const
+ {
+ return Derived::IsRowMajor ? outerStride() : innerStride();
+ }
+ EIGEN_DEVICE_FUNC
+ inline Index colStride() const
+ {
+ return Derived::IsRowMajor ? innerStride() : outerStride();
+ }
+};
+namespace internal {
+template<int Alignment, typename Derived, bool JustReturnZero>
+struct first_aligned_impl
+{
+ static inline Index run(const Derived&)
+ { return 0; }
+};
+template<int Alignment, typename Derived>
+struct first_aligned_impl<Alignment, Derived, false>
+{
+ static inline Index run(const Derived& m)
+ {
+ return internal::first_aligned<Alignment>(m.data(), m.size());
+ }
+};
+template<int Alignment, typename Derived>
+static inline Index first_aligned(const DenseBase<Derived>& m)
+{
+ enum { ReturnZero = (int(evaluator<Derived>::Alignment) >= Alignment) || !(Derived::Flags & DirectAccessBit) };
+ return first_aligned_impl<Alignment, Derived, ReturnZero>::run(m.derived());
+}
+template<typename Derived>
+static inline Index first_default_aligned(const DenseBase<Derived>& m)
+{
+ typedef typename Derived::Scalar Scalar;
+ typedef typename packet_traits<Scalar>::type DefaultPacketType;
+ return internal::first_aligned<int(unpacket_traits<DefaultPacketType>::alignment),Derived>(m);
+}
+template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
+struct inner_stride_at_compile_time
+{
+ enum { ret = traits<Derived>::InnerStrideAtCompileTime };
+};
+template<typename Derived>
+struct inner_stride_at_compile_time<Derived, false>
+{
+ enum { ret = 0 };
+};
+template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
+struct outer_stride_at_compile_time
+{
+ enum { ret = traits<Derived>::OuterStrideAtCompileTime };
+};
+template<typename Derived>
+struct outer_stride_at_compile_time<Derived, false>
+{
+ enum { ret = 0 };
+};
+}
+}
+#endif
+// end #include "src/Core/DenseCoeffsBase.h"
+// #include "src/Core/DenseBase.h"
+#ifndef EIGEN_DENSEBASE_H
+#define EIGEN_DENSEBASE_H
+namespace Eigen {
+namespace internal {
+static inline void check_DenseIndex_is_signed() {
+ EIGEN_STATIC_ASSERT(NumTraits<DenseIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE);
+}
+}
+template<typename Derived> class DenseBase
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ : public DenseCoeffsBase<Derived>
+#else
+ : public DenseCoeffsBase<Derived,DirectWriteAccessors>
+#endif
+{
+ public:
+ typedef Eigen::InnerIterator<Derived> InnerIterator;
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef Scalar value_type;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ typedef DenseCoeffsBase<Derived> Base;
+ using Base::derived;
+ using Base::const_cast_derived;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::rowIndexByOuterInner;
+ using Base::colIndexByOuterInner;
+ using Base::coeff;
+ using Base::coeffByOuterInner;
+ using Base::operator();
+ using Base::operator[];
+ using Base::x;
+ using Base::y;
+ using Base::z;
+ using Base::w;
+ using Base::stride;
+ using Base::innerStride;
+ using Base::outerStride;
+ using Base::rowStride;
+ using Base::colStride;
+ typedef typename Base::CoeffReturnType CoeffReturnType;
+ enum {
+ RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
+ ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
+ SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
+ internal::traits<Derived>::ColsAtCompileTime>::ret),
+ MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
+ MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
+ internal::traits<Derived>::MaxColsAtCompileTime>::ret),
+ IsVectorAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime == 1
+ || internal::traits<Derived>::MaxColsAtCompileTime == 1,
+ Flags = internal::traits<Derived>::Flags,
+ IsRowMajor = int(Flags) & RowMajorBit,
+ InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime)
+ : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
+ InnerStrideAtCompileTime = internal::inner_stride_at_compile_time<Derived>::ret,
+ OuterStrideAtCompileTime = internal::outer_stride_at_compile_time<Derived>::ret
+ };
+ typedef typename internal::find_best_packet<Scalar,SizeAtCompileTime>::type PacketScalar;
+ enum { IsPlainObjectBase = 0 };
+ typedef Matrix<typename internal::traits<Derived>::Scalar,
+ internal::traits<Derived>::RowsAtCompileTime,
+ internal::traits<Derived>::ColsAtCompileTime,
+ AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
+ internal::traits<Derived>::MaxRowsAtCompileTime,
+ internal::traits<Derived>::MaxColsAtCompileTime
+ > PlainMatrix;
+ typedef Array<typename internal::traits<Derived>::Scalar,
+ internal::traits<Derived>::RowsAtCompileTime,
+ internal::traits<Derived>::ColsAtCompileTime,
+ AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
+ internal::traits<Derived>::MaxRowsAtCompileTime,
+ internal::traits<Derived>::MaxColsAtCompileTime
+ > PlainArray;
+ typedef typename internal::conditional<internal::is_same<typename internal::traits<Derived>::XprKind,MatrixXpr >::value,
+ PlainMatrix, PlainArray>::type PlainObject;
+ EIGEN_DEVICE_FUNC
+ inline Index nonZeros() const { return size(); }
+ EIGEN_DEVICE_FUNC
+ Index outerSize() const
+ {
+ return IsVectorAtCompileTime ? 1
+ : int(IsRowMajor) ? this->rows() : this->cols();
+ }
+ EIGEN_DEVICE_FUNC
+ Index innerSize() const
+ {
+ return IsVectorAtCompileTime ? this->size()
+ : int(IsRowMajor) ? this->cols() : this->rows();
+ }
+ EIGEN_DEVICE_FUNC
+ void resize(Index newSize)
+ {
+ EIGEN_ONLY_USED_FOR_DEBUG(newSize);
+ eigen_assert(newSize == this->size()
+ && "DenseBase::resize() does not actually allow to resize.");
+ }
+ EIGEN_DEVICE_FUNC
+ void resize(Index rows, Index cols)
+ {
+ EIGEN_ONLY_USED_FOR_DEBUG(rows);
+ EIGEN_ONLY_USED_FOR_DEBUG(cols);
+ eigen_assert(rows == this->rows() && cols == this->cols()
+ && "DenseBase::resize() does not actually allow to resize.");
+ }
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
+ typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> SequentialLinSpacedReturnType;
+ typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> RandomAccessLinSpacedReturnType;
+ typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
+#endif
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator=(const DenseBase<OtherDerived>& other);
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator=(const DenseBase& other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const EigenBase<OtherDerived> &other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator+=(const EigenBase<OtherDerived> &other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator-=(const EigenBase<OtherDerived> &other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const ReturnByValue<OtherDerived>& func);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& lazyAssign(const DenseBase<OtherDerived>& other);
+ EIGEN_DEVICE_FUNC
+ CommaInitializer<Derived> operator<< (const Scalar& s);
+ template<unsigned int Added,unsigned int Removed>
+ EIGEN_DEPRECATED
+ const Derived& flagged() const
+ { return derived(); }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);
+ typedef Transpose<Derived> TransposeReturnType;
+ EIGEN_DEVICE_FUNC
+ TransposeReturnType transpose();
+ typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
+ EIGEN_DEVICE_FUNC
+ ConstTransposeReturnType transpose() const;
+ EIGEN_DEVICE_FUNC
+ void transposeInPlace();
+ EIGEN_DEVICE_FUNC static const ConstantReturnType
+ Constant(Index rows, Index cols, const Scalar& value);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType
+ Constant(Index size, const Scalar& value);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType
+ Constant(const Scalar& value);
+ EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
+ LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high);
+ EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
+ LinSpaced(Index size, const Scalar& low, const Scalar& high);
+ EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
+ LinSpaced(Sequential_t, const Scalar& low, const Scalar& high);
+ EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
+ LinSpaced(const Scalar& low, const Scalar& high);
+ template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
+ static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
+ NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
+ template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
+ static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
+ NullaryExpr(Index size, const CustomNullaryOp& func);
+ template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
+ static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
+ NullaryExpr(const CustomNullaryOp& func);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType Zero();
+ EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType Ones();
+ EIGEN_DEVICE_FUNC void fill(const Scalar& value);
+ EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value);
+ EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
+ EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high);
+ EIGEN_DEVICE_FUNC Derived& setZero();
+ EIGEN_DEVICE_FUNC Derived& setOnes();
+ EIGEN_DEVICE_FUNC Derived& setRandom();
+ template<typename OtherDerived> EIGEN_DEVICE_FUNC
+ bool isApprox(const DenseBase<OtherDerived>& other,
+ const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ EIGEN_DEVICE_FUNC
+ bool isMuchSmallerThan(const RealScalar& other,
+ const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ template<typename OtherDerived> EIGEN_DEVICE_FUNC
+ bool isMuchSmallerThan(const DenseBase<OtherDerived>& other,
+ const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ inline bool hasNaN() const;
+ inline bool allFinite() const;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator*=(const Scalar& other);
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator/=(const Scalar& other);
+ typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE EvalReturnType eval() const
+ {
+ return typename internal::eval<Derived>::type(derived());
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void swap(const DenseBase<OtherDerived>& other)
+ {
+ EIGEN_STATIC_ASSERT(!OtherDerived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
+ eigen_assert(rows()==other.rows() && cols()==other.cols());
+ call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void swap(PlainObjectBase<OtherDerived>& other)
+ {
+ eigen_assert(rows()==other.rows() && cols()==other.cols());
+ call_assignment(derived(), other.derived(), internal::swap_assign_op<Scalar>());
+ }
+ EIGEN_DEVICE_FUNC inline const NestByValue<Derived> nestByValue() const;
+ EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
+ EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess();
+ template<bool Enable> EIGEN_DEVICE_FUNC
+ inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
+ template<bool Enable> EIGEN_DEVICE_FUNC
+ inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
+ EIGEN_DEVICE_FUNC Scalar sum() const;
+ EIGEN_DEVICE_FUNC Scalar mean() const;
+ EIGEN_DEVICE_FUNC Scalar trace() const;
+ EIGEN_DEVICE_FUNC Scalar prod() const;
+ EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff() const;
+ EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;
+ template<typename IndexType> EIGEN_DEVICE_FUNC
+ typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
+ template<typename IndexType> EIGEN_DEVICE_FUNC
+ typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const;
+ template<typename IndexType> EIGEN_DEVICE_FUNC
+ typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const;
+ template<typename IndexType> EIGEN_DEVICE_FUNC
+ typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const;
+ template<typename BinaryOp>
+ EIGEN_DEVICE_FUNC
+ Scalar redux(const BinaryOp& func) const;
+ template<typename Visitor>
+ EIGEN_DEVICE_FUNC
+ void visit(Visitor& func) const;
+ inline const WithFormat<Derived> format(const IOFormat& fmt) const
+ {
+ return WithFormat<Derived>(derived(), fmt);
+ }
+ EIGEN_DEVICE_FUNC
+ CoeffReturnType value() const
+ {
+ EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
+ eigen_assert(this->rows() == 1 && this->cols() == 1);
+ return derived().coeff(0,0);
+ }
+ EIGEN_DEVICE_FUNC bool all() const;
+ EIGEN_DEVICE_FUNC bool any() const;
+ EIGEN_DEVICE_FUNC Index count() const;
+ typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
+ typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType;
+ typedef VectorwiseOp<Derived, Vertical> ColwiseReturnType;
+ typedef const VectorwiseOp<const Derived, Vertical> ConstColwiseReturnType;
+ EIGEN_DEVICE_FUNC inline ConstRowwiseReturnType rowwise() const {
+ return ConstRowwiseReturnType(derived());
+ }
+ EIGEN_DEVICE_FUNC RowwiseReturnType rowwise();
+ EIGEN_DEVICE_FUNC inline ConstColwiseReturnType colwise() const {
+ return ConstColwiseReturnType(derived());
+ }
+ EIGEN_DEVICE_FUNC ColwiseReturnType colwise();
+ typedef CwiseNullaryOp<internal::scalar_random_op<Scalar>,PlainObject> RandomReturnType;
+ static const RandomReturnType Random(Index rows, Index cols);
+ static const RandomReturnType Random(Index size);
+ static const RandomReturnType Random();
+ template<typename ThenDerived,typename ElseDerived>
+ const Select<Derived,ThenDerived,ElseDerived>
+ select(const DenseBase<ThenDerived>& thenMatrix,
+ const DenseBase<ElseDerived>& elseMatrix) const;
+ template<typename ThenDerived>
+ inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
+ select(const DenseBase<ThenDerived>& thenMatrix, const typename ThenDerived::Scalar& elseScalar) const;
+ template<typename ElseDerived>
+ inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
+ select(const typename ElseDerived::Scalar& thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;
+ template<int p> RealScalar lpNorm() const;
+ template<int RowFactor, int ColFactor>
+ EIGEN_DEVICE_FUNC
+ const Replicate<Derived,RowFactor,ColFactor> replicate() const;
+ EIGEN_DEVICE_FUNC
+ const Replicate<Derived, Dynamic, Dynamic> replicate(Index rowFactor, Index colFactor) const
+ {
+ return Replicate<Derived, Dynamic, Dynamic>(derived(), rowFactor, colFactor);
+ }
+ typedef Reverse<Derived, BothDirections> ReverseReturnType;
+ typedef const Reverse<const Derived, BothDirections> ConstReverseReturnType;
+ EIGEN_DEVICE_FUNC ReverseReturnType reverse();
+ EIGEN_DEVICE_FUNC ConstReverseReturnType reverse() const
+ {
+ return ConstReverseReturnType(derived());
+ }
+ EIGEN_DEVICE_FUNC void reverseInPlace();
+#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase
+#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND)
+// # include "../plugins/BlockMethods.h"
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+typedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, 1, !IsRowMajor> ColXpr;
+typedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, 1, !IsRowMajor> ConstColXpr;
+typedef Block<Derived, 1, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> RowXpr;
+typedef const Block<const Derived, 1, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> ConstRowXpr;
+typedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, Dynamic, !IsRowMajor> ColsBlockXpr;
+typedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, Dynamic, !IsRowMajor> ConstColsBlockXpr;
+typedef Block<Derived, Dynamic, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> RowsBlockXpr;
+typedef const Block<const Derived, Dynamic, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> ConstRowsBlockXpr;
+template<int N> struct NColsBlockXpr { typedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, N, !IsRowMajor> Type; };
+template<int N> struct ConstNColsBlockXpr { typedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, N, !IsRowMajor> Type; };
+template<int N> struct NRowsBlockXpr { typedef Block<Derived, N, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; };
+template<int N> struct ConstNRowsBlockXpr { typedef const Block<const Derived, N, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; };
+typedef Block<Derived> BlockXpr;
+typedef const Block<const Derived> ConstBlockXpr;
+template<int Rows, int Cols> struct FixedBlockXpr { typedef Block<Derived,Rows,Cols> Type; };
+template<int Rows, int Cols> struct ConstFixedBlockXpr { typedef Block<const Derived,Rows,Cols> Type; };
+typedef VectorBlock<Derived> SegmentReturnType;
+typedef const VectorBlock<const Derived> ConstSegmentReturnType;
+template<int Size> struct FixedSegmentReturnType { typedef VectorBlock<Derived, Size> Type; };
+template<int Size> struct ConstFixedSegmentReturnType { typedef const VectorBlock<const Derived, Size> Type; };
+#endif
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+EIGEN_DEVICE_FUNC
+inline BlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols)
+{
+ return BlockXpr(derived(), startRow, startCol, blockRows, blockCols);
+}
+EIGEN_DEVICE_FUNC
+inline const ConstBlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols) const
+{
+ return ConstBlockXpr(derived(), startRow, startCol, blockRows, blockCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+EIGEN_DEVICE_FUNC
+inline BlockXpr topRightCorner(Index cRows, Index cCols)
+{
+ return BlockXpr(derived(), 0, cols() - cCols, cRows, cCols);
+}
+EIGEN_DEVICE_FUNC
+inline const ConstBlockXpr topRightCorner(Index cRows, Index cCols) const
+{
+ return ConstBlockXpr(derived(), 0, cols() - cCols, cRows, cCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline typename FixedBlockXpr<CRows,CCols>::Type topRightCorner()
+{
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols);
+}
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner() const
+{
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int CRows, int CCols>
+inline typename FixedBlockXpr<CRows,CCols>::Type topRightCorner(Index cRows, Index cCols)
+{
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - cCols, cRows, cCols);
+}
+template<int CRows, int CCols>
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner(Index cRows, Index cCols) const
+{
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - cCols, cRows, cCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+EIGEN_DEVICE_FUNC
+inline BlockXpr topLeftCorner(Index cRows, Index cCols)
+{
+ return BlockXpr(derived(), 0, 0, cRows, cCols);
+}
+EIGEN_DEVICE_FUNC
+inline const ConstBlockXpr topLeftCorner(Index cRows, Index cCols) const
+{
+ return ConstBlockXpr(derived(), 0, 0, cRows, cCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline typename FixedBlockXpr<CRows,CCols>::Type topLeftCorner()
+{
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0);
+}
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner() const
+{
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int CRows, int CCols>
+inline typename FixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index cRows, Index cCols)
+{
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0, cRows, cCols);
+}
+template<int CRows, int CCols>
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index cRows, Index cCols) const
+{
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0, cRows, cCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+EIGEN_DEVICE_FUNC
+inline BlockXpr bottomRightCorner(Index cRows, Index cCols)
+{
+ return BlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
+}
+EIGEN_DEVICE_FUNC
+inline const ConstBlockXpr bottomRightCorner(Index cRows, Index cCols) const
+{
+ return ConstBlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline typename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner()
+{
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols);
+}
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner() const
+{
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int CRows, int CCols>
+inline typename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner(Index cRows, Index cCols)
+{
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
+}
+template<int CRows, int CCols>
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner(Index cRows, Index cCols) const
+{
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+EIGEN_DEVICE_FUNC
+inline BlockXpr bottomLeftCorner(Index cRows, Index cCols)
+{
+ return BlockXpr(derived(), rows() - cRows, 0, cRows, cCols);
+}
+EIGEN_DEVICE_FUNC
+inline const ConstBlockXpr bottomLeftCorner(Index cRows, Index cCols) const
+{
+ return ConstBlockXpr(derived(), rows() - cRows, 0, cRows, cCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline typename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner()
+{
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0);
+}
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner() const
+{
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int CRows, int CCols>
+inline typename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Index cRows, Index cCols)
+{
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, 0, cRows, cCols);
+}
+template<int CRows, int CCols>
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Index cRows, Index cCols) const
+{
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, 0, cRows, cCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+EIGEN_DEVICE_FUNC
+inline RowsBlockXpr topRows(Index n)
+{
+ return RowsBlockXpr(derived(), 0, 0, n, cols());
+}
+EIGEN_DEVICE_FUNC
+inline ConstRowsBlockXpr topRows(Index n) const
+{
+ return ConstRowsBlockXpr(derived(), 0, 0, n, cols());
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename NRowsBlockXpr<N>::Type topRows(Index n = N)
+{
+ return typename NRowsBlockXpr<N>::Type(derived(), 0, 0, n, cols());
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstNRowsBlockXpr<N>::Type topRows(Index n = N) const
+{
+ return typename ConstNRowsBlockXpr<N>::Type(derived(), 0, 0, n, cols());
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+EIGEN_DEVICE_FUNC
+inline RowsBlockXpr bottomRows(Index n)
+{
+ return RowsBlockXpr(derived(), rows() - n, 0, n, cols());
+}
+EIGEN_DEVICE_FUNC
+inline ConstRowsBlockXpr bottomRows(Index n) const
+{
+ return ConstRowsBlockXpr(derived(), rows() - n, 0, n, cols());
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename NRowsBlockXpr<N>::Type bottomRows(Index n = N)
+{
+ return typename NRowsBlockXpr<N>::Type(derived(), rows() - n, 0, n, cols());
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstNRowsBlockXpr<N>::Type bottomRows(Index n = N) const
+{
+ return typename ConstNRowsBlockXpr<N>::Type(derived(), rows() - n, 0, n, cols());
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+EIGEN_DEVICE_FUNC
+inline RowsBlockXpr middleRows(Index startRow, Index n)
+{
+ return RowsBlockXpr(derived(), startRow, 0, n, cols());
+}
+EIGEN_DEVICE_FUNC
+inline ConstRowsBlockXpr middleRows(Index startRow, Index n) const
+{
+ return ConstRowsBlockXpr(derived(), startRow, 0, n, cols());
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename NRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N)
+{
+ return typename NRowsBlockXpr<N>::Type(derived(), startRow, 0, n, cols());
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstNRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N) const
+{
+ return typename ConstNRowsBlockXpr<N>::Type(derived(), startRow, 0, n, cols());
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+EIGEN_DEVICE_FUNC
+inline ColsBlockXpr leftCols(Index n)
+{
+ return ColsBlockXpr(derived(), 0, 0, rows(), n);
+}
+EIGEN_DEVICE_FUNC
+inline ConstColsBlockXpr leftCols(Index n) const
+{
+ return ConstColsBlockXpr(derived(), 0, 0, rows(), n);
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename NColsBlockXpr<N>::Type leftCols(Index n = N)
+{
+ return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), n);
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstNColsBlockXpr<N>::Type leftCols(Index n = N) const
+{
+ return typename ConstNColsBlockXpr<N>::Type(derived(), 0, 0, rows(), n);
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+EIGEN_DEVICE_FUNC
+inline ColsBlockXpr rightCols(Index n)
+{
+ return ColsBlockXpr(derived(), 0, cols() - n, rows(), n);
+}
+EIGEN_DEVICE_FUNC
+inline ConstColsBlockXpr rightCols(Index n) const
+{
+ return ConstColsBlockXpr(derived(), 0, cols() - n, rows(), n);
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename NColsBlockXpr<N>::Type rightCols(Index n = N)
+{
+ return typename NColsBlockXpr<N>::Type(derived(), 0, cols() - n, rows(), n);
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstNColsBlockXpr<N>::Type rightCols(Index n = N) const
+{
+ return typename ConstNColsBlockXpr<N>::Type(derived(), 0, cols() - n, rows(), n);
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+EIGEN_DEVICE_FUNC
+inline ColsBlockXpr middleCols(Index startCol, Index numCols)
+{
+ return ColsBlockXpr(derived(), 0, startCol, rows(), numCols);
+}
+EIGEN_DEVICE_FUNC
+inline ConstColsBlockXpr middleCols(Index startCol, Index numCols) const
+{
+ return ConstColsBlockXpr(derived(), 0, startCol, rows(), numCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename NColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N)
+{
+ return typename NColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), n);
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N) const
+{
+ return typename ConstNColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), n);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int NRows, int NCols>
+EIGEN_DEVICE_FUNC
+inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol)
+{
+ return typename FixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol);
+}
+template<int NRows, int NCols>
+EIGEN_DEVICE_FUNC
+inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol) const
+{
+ return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int NRows, int NCols>
+inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol,
+ Index blockRows, Index blockCols)
+{
+ return typename FixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols);
+}
+template<int NRows, int NCols>
+inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol,
+ Index blockRows, Index blockCols) const
+{
+ return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+EIGEN_DEVICE_FUNC
+inline ColXpr col(Index i)
+{
+ return ColXpr(derived(), i);
+}
+EIGEN_DEVICE_FUNC
+inline ConstColXpr col(Index i) const
+{
+ return ConstColXpr(derived(), i);
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+EIGEN_DEVICE_FUNC
+inline RowXpr row(Index i)
+{
+ return RowXpr(derived(), i);
+}
+EIGEN_DEVICE_FUNC
+inline ConstRowXpr row(Index i) const
+{
+ return ConstRowXpr(derived(), i);
+}
+EIGEN_DEVICE_FUNC
+inline SegmentReturnType segment(Index start, Index n)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return SegmentReturnType(derived(), start, n);
+}
+EIGEN_DEVICE_FUNC
+inline ConstSegmentReturnType segment(Index start, Index n) const
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return ConstSegmentReturnType(derived(), start, n);
+}
+EIGEN_DEVICE_FUNC
+inline SegmentReturnType head(Index n)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return SegmentReturnType(derived(), 0, n);
+}
+EIGEN_DEVICE_FUNC
+inline ConstSegmentReturnType head(Index n) const
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return ConstSegmentReturnType(derived(), 0, n);
+}
+EIGEN_DEVICE_FUNC
+inline SegmentReturnType tail(Index n)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return SegmentReturnType(derived(), this->size() - n, n);
+}
+EIGEN_DEVICE_FUNC
+inline ConstSegmentReturnType tail(Index n) const
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return ConstSegmentReturnType(derived(), this->size() - n, n);
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename FixedSegmentReturnType<N>::Type segment(Index start, Index n = N)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return typename FixedSegmentReturnType<N>::Type(derived(), start, n);
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstFixedSegmentReturnType<N>::Type segment(Index start, Index n = N) const
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return typename ConstFixedSegmentReturnType<N>::Type(derived(), start, n);
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename FixedSegmentReturnType<N>::Type head(Index n = N)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return typename FixedSegmentReturnType<N>::Type(derived(), 0, n);
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstFixedSegmentReturnType<N>::Type head(Index n = N) const
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return typename ConstFixedSegmentReturnType<N>::Type(derived(), 0, n);
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename FixedSegmentReturnType<N>::Type tail(Index n = N)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return typename FixedSegmentReturnType<N>::Type(derived(), size() - n);
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstFixedSegmentReturnType<N>::Type tail(Index n = N) const
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return typename ConstFixedSegmentReturnType<N>::Type(derived(), size() - n);
+}
+// end # include "../plugins/BlockMethods.h"
+# ifdef EIGEN_DENSEBASE_PLUGIN
+# include EIGEN_DENSEBASE_PLUGIN
+# endif
+#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
+#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC
+ inline void evalTo(Dest& ) const
+ {
+ EIGEN_STATIC_ASSERT((internal::is_same<Dest,void>::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS);
+ }
+ protected:
+ EIGEN_DEVICE_FUNC DenseBase()
+ {
+#ifdef EIGEN_INTERNAL_DEBUGGING
+ EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor))
+ && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, int(!IsRowMajor))),
+ INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION)
+#endif
+ }
+ private:
+ EIGEN_DEVICE_FUNC explicit DenseBase(int);
+ EIGEN_DEVICE_FUNC DenseBase(int,int);
+ template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase<OtherDerived>&);
+};
+}
+#endif
+// end #include "src/Core/DenseBase.h"
+// #include "src/Core/MatrixBase.h"
+#ifndef EIGEN_MATRIXBASE_H
+#define EIGEN_MATRIXBASE_H
+namespace Eigen {
+template<typename Derived> class MatrixBase
+ : public DenseBase<Derived>
+{
+ public:
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef MatrixBase StorageBaseType;
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ typedef DenseBase<Derived> Base;
+ using Base::RowsAtCompileTime;
+ using Base::ColsAtCompileTime;
+ using Base::SizeAtCompileTime;
+ using Base::MaxRowsAtCompileTime;
+ using Base::MaxColsAtCompileTime;
+ using Base::MaxSizeAtCompileTime;
+ using Base::IsVectorAtCompileTime;
+ using Base::Flags;
+ using Base::derived;
+ using Base::const_cast_derived;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::coeff;
+ using Base::coeffRef;
+ using Base::lazyAssign;
+ using Base::eval;
+ using Base::operator+=;
+ using Base::operator-=;
+ using Base::operator*=;
+ using Base::operator/=;
+ typedef typename Base::CoeffReturnType CoeffReturnType;
+ typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
+ typedef typename Base::RowXpr RowXpr;
+ typedef typename Base::ColXpr ColXpr;
+#endif
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef Matrix<Scalar,EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime),
+ EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime)> SquareMatrixType;
+#endif
+ EIGEN_DEVICE_FUNC
+ inline Index diagonalSize() const { return (numext::mini)(rows(),cols()); }
+ typedef typename Base::PlainObject PlainObject;
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
+ typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+ CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
+ ConstTransposeReturnType
+ >::type AdjointReturnType;
+ typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType;
+ typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,PlainObject> IdentityReturnType;
+ typedef Block<const CwiseNullaryOp<internal::scalar_identity_op<Scalar>, SquareMatrixType>,
+ internal::traits<Derived>::RowsAtCompileTime,
+ internal::traits<Derived>::ColsAtCompileTime> BasisReturnType;
+#endif
+#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase
+#define EIGEN_DOC_UNARY_ADDONS(X,Y)
+// # include "../plugins/CommonCwiseUnaryOps.h"
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+ const CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Derived>,
+ const Derived&
+ >::type ConjugateReturnType;
+typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+ const CwiseUnaryOp<internal::scalar_real_op<Scalar>, const Derived>,
+ const Derived&
+ >::type RealReturnType;
+typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+ CwiseUnaryView<internal::scalar_real_ref_op<Scalar>, Derived>,
+ Derived&
+ >::type NonConstRealReturnType;
+typedef CwiseUnaryOp<internal::scalar_imag_op<Scalar>, const Derived> ImagReturnType;
+typedef CwiseUnaryView<internal::scalar_imag_ref_op<Scalar>, Derived> NonConstImagReturnType;
+typedef CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const Derived> NegativeReturnType;
+#endif
+EIGEN_DOC_UNARY_ADDONS(operator-,opposite)
+EIGEN_DEVICE_FUNC
+inline const NegativeReturnType
+operator-() const { return NegativeReturnType(derived()); }
+template<class NewType> struct CastXpr { typedef typename internal::cast_return_type<Derived,const CwiseUnaryOp<internal::scalar_cast_op<Scalar, NewType>, const Derived> >::type Type; };
+EIGEN_DOC_UNARY_ADDONS(cast,conversion function)
+template<typename NewType>
+EIGEN_DEVICE_FUNC
+typename CastXpr<NewType>::Type
+cast() const
+{
+ return typename CastXpr<NewType>::Type(derived());
+}
+EIGEN_DOC_UNARY_ADDONS(conjugate,complex conjugate)
+EIGEN_DEVICE_FUNC
+inline ConjugateReturnType
+conjugate() const
+{
+ return ConjugateReturnType(derived());
+}
+EIGEN_DOC_UNARY_ADDONS(real,real part function)
+EIGEN_DEVICE_FUNC
+inline RealReturnType
+real() const { return RealReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function)
+EIGEN_DEVICE_FUNC
+inline const ImagReturnType
+imag() const { return ImagReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(unaryExpr,unary function)
+template<typename CustomUnaryOp>
+EIGEN_DEVICE_FUNC
+inline const CwiseUnaryOp<CustomUnaryOp, const Derived>
+unaryExpr(const CustomUnaryOp& func = CustomUnaryOp()) const
+{
+ return CwiseUnaryOp<CustomUnaryOp, const Derived>(derived(), func);
+}
+EIGEN_DOC_UNARY_ADDONS(unaryViewExpr,unary function)
+template<typename CustomViewOp>
+EIGEN_DEVICE_FUNC
+inline const CwiseUnaryView<CustomViewOp, const Derived>
+unaryViewExpr(const CustomViewOp& func = CustomViewOp()) const
+{
+ return CwiseUnaryView<CustomViewOp, const Derived>(derived(), func);
+}
+EIGEN_DOC_UNARY_ADDONS(real,real part function)
+EIGEN_DEVICE_FUNC
+inline NonConstRealReturnType
+real() { return NonConstRealReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function)
+EIGEN_DEVICE_FUNC
+inline NonConstImagReturnType
+imag() { return NonConstImagReturnType(derived()); }
+// end # include "../plugins/CommonCwiseUnaryOps.h"
+// # include "../plugins/CommonCwiseBinaryOps.h"
+EIGEN_MAKE_CWISE_BINARY_OP(operator-,difference)
+EIGEN_MAKE_CWISE_BINARY_OP(operator+,sum)
+template<typename CustomBinaryOp, typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>
+binaryExpr(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other, const CustomBinaryOp& func = CustomBinaryOp()) const
+{
+ return CwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>(derived(), other.derived(), func);
+}
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product)
+#else
+template<typename T>
+const CwiseBinaryOp<internal::scalar_product_op<Scalar,T>,Derived,Constant<T> > operator*(const T& scalar) const;
+template<typename T> friend
+const CwiseBinaryOp<internal::scalar_product_op<T,Scalar>,Constant<T>,Derived> operator*(const T& scalar, const StorageBaseType& expr);
+#endif
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient)
+#else
+template<typename T>
+const CwiseBinaryOp<internal::scalar_quotient_op<Scalar,T>,Derived,Constant<T> > operator/(const T& scalar) const;
+#endif
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>
+operator&&(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
+ return CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>(derived(),other.derived());
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>
+operator||(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
+ return CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>(derived(),other.derived());
+}
+// end # include "../plugins/CommonCwiseBinaryOps.h"
+// # include "../plugins/MatrixCwiseUnaryOps.h"
+typedef CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> CwiseAbsReturnType;
+typedef CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> CwiseAbs2ReturnType;
+typedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> CwiseSqrtReturnType;
+typedef CwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived> CwiseSignReturnType;
+typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> CwiseInverseReturnType;
+EIGEN_DOC_UNARY_ADDONS(cwiseAbs,absolute value)
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseAbsReturnType
+cwiseAbs() const { return CwiseAbsReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(cwiseAbs2,squared absolute value)
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseAbs2ReturnType
+cwiseAbs2() const { return CwiseAbs2ReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(cwiseSqrt,square-root)
+EIGEN_DEVICE_FUNC
+inline const CwiseSqrtReturnType
+cwiseSqrt() const { return CwiseSqrtReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(cwiseSign,sign function)
+EIGEN_DEVICE_FUNC
+inline const CwiseSignReturnType
+cwiseSign() const { return CwiseSignReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(cwiseInverse,inverse)
+EIGEN_DEVICE_FUNC
+inline const CwiseInverseReturnType
+cwiseInverse() const { return CwiseInverseReturnType(derived()); }
+// end # include "../plugins/MatrixCwiseUnaryOps.h"
+// # include "../plugins/MatrixCwiseBinaryOps.h"
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)
+cwiseProduct(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived());
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<std::equal_to<Scalar>, const Derived, const OtherDerived>
+cwiseEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<std::equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const OtherDerived>
+cwiseNotEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>
+cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const ConstantReturnType>
+cwiseMin(const Scalar &other) const
+{
+ return cwiseMin(Derived::Constant(rows(), cols(), other));
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>
+cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const ConstantReturnType>
+cwiseMax(const Scalar &other) const
+{
+ return cwiseMax(Derived::Constant(rows(), cols(), other));
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>
+cwiseQuotient(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar,internal::cmp_EQ>, const Derived, const ConstantReturnType> CwiseScalarEqualReturnType;
+EIGEN_DEVICE_FUNC
+inline const CwiseScalarEqualReturnType
+cwiseEqual(const Scalar& s) const
+{
+ return CwiseScalarEqualReturnType(derived(), Derived::Constant(rows(), cols(), s), internal::scalar_cmp_op<Scalar,Scalar,internal::cmp_EQ>());
+}
+// end # include "../plugins/MatrixCwiseBinaryOps.h"
+# ifdef EIGEN_MATRIXBASE_PLUGIN
+# include EIGEN_MATRIXBASE_PLUGIN
+# endif
+#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
+#undef EIGEN_DOC_UNARY_ADDONS
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator=(const MatrixBase& other);
+ template <typename OtherDerived>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator=(const DenseBase<OtherDerived>& other);
+ template <typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const EigenBase<OtherDerived>& other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const ReturnByValue<OtherDerived>& other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator+=(const MatrixBase<OtherDerived>& other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator-=(const MatrixBase<OtherDerived>& other);
+#ifdef __CUDACC__
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ const Product<Derived,OtherDerived,LazyProduct>
+ operator*(const MatrixBase<OtherDerived> &other) const
+ { return this->lazyProduct(other); }
+#else
+ template<typename OtherDerived>
+ const Product<Derived,OtherDerived>
+ operator*(const MatrixBase<OtherDerived> &other) const;
+#endif
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ const Product<Derived,OtherDerived,LazyProduct>
+ lazyProduct(const MatrixBase<OtherDerived> &other) const;
+ template<typename OtherDerived>
+ Derived& operator*=(const EigenBase<OtherDerived>& other);
+ template<typename OtherDerived>
+ void applyOnTheLeft(const EigenBase<OtherDerived>& other);
+ template<typename OtherDerived>
+ void applyOnTheRight(const EigenBase<OtherDerived>& other);
+ template<typename DiagonalDerived>
+ EIGEN_DEVICE_FUNC
+ const Product<Derived, DiagonalDerived, LazyProduct>
+ operator*(const DiagonalBase<DiagonalDerived> &diagonal) const;
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
+ dot(const MatrixBase<OtherDerived>& other) const;
+ EIGEN_DEVICE_FUNC RealScalar squaredNorm() const;
+ EIGEN_DEVICE_FUNC RealScalar norm() const;
+ RealScalar stableNorm() const;
+ RealScalar blueNorm() const;
+ RealScalar hypotNorm() const;
+ EIGEN_DEVICE_FUNC const PlainObject normalized() const;
+ EIGEN_DEVICE_FUNC const PlainObject stableNormalized() const;
+ EIGEN_DEVICE_FUNC void normalize();
+ EIGEN_DEVICE_FUNC void stableNormalize();
+ EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;
+ EIGEN_DEVICE_FUNC void adjointInPlace();
+ typedef Diagonal<Derived> DiagonalReturnType;
+ EIGEN_DEVICE_FUNC
+ DiagonalReturnType diagonal();
+ typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
+ EIGEN_DEVICE_FUNC
+ ConstDiagonalReturnType diagonal() const;
+ template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
+ template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };
+ template<int Index>
+ EIGEN_DEVICE_FUNC
+ typename DiagonalIndexReturnType<Index>::Type diagonal();
+ template<int Index>
+ EIGEN_DEVICE_FUNC
+ typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
+ typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType;
+ typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType;
+ EIGEN_DEVICE_FUNC
+ DiagonalDynamicIndexReturnType diagonal(Index index);
+ EIGEN_DEVICE_FUNC
+ ConstDiagonalDynamicIndexReturnType diagonal(Index index) const;
+ template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };
+ template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };
+ template<unsigned int Mode>
+ EIGEN_DEVICE_FUNC
+ typename TriangularViewReturnType<Mode>::Type triangularView();
+ template<unsigned int Mode>
+ EIGEN_DEVICE_FUNC
+ typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
+ template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
+ template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; };
+ template<unsigned int UpLo>
+ EIGEN_DEVICE_FUNC
+ typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
+ template<unsigned int UpLo>
+ EIGEN_DEVICE_FUNC
+ typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
+ const SparseView<Derived> sparseView(const Scalar& m_reference = Scalar(0),
+ const typename NumTraits<Scalar>::Real& m_epsilon = NumTraits<Scalar>::dummy_precision()) const;
+ EIGEN_DEVICE_FUNC static const IdentityReturnType Identity();
+ EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols);
+ EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i);
+ EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i);
+ EIGEN_DEVICE_FUNC static const BasisReturnType UnitX();
+ EIGEN_DEVICE_FUNC static const BasisReturnType UnitY();
+ EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ();
+ EIGEN_DEVICE_FUNC static const BasisReturnType UnitW();
+ EIGEN_DEVICE_FUNC
+ const DiagonalWrapper<const Derived> asDiagonal() const;
+ const PermutationWrapper<const Derived> asPermutation() const;
+ EIGEN_DEVICE_FUNC
+ Derived& setIdentity();
+ EIGEN_DEVICE_FUNC
+ Derived& setIdentity(Index rows, Index cols);
+ bool isIdentity(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ bool isDiagonal(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ bool isUpperTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ bool isLowerTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ template<typename OtherDerived>
+ bool isOrthogonal(const MatrixBase<OtherDerived>& other,
+ const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ bool isUnitary(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase<OtherDerived>& other) const
+ { return cwiseEqual(other).all(); }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase<OtherDerived>& other) const
+ { return cwiseNotEqual(other).any(); }
+ NoAlias<Derived,Eigen::MatrixBase > noalias();
+ inline const Derived& forceAlignedAccess() const { return derived(); }
+ inline Derived& forceAlignedAccess() { return derived(); }
+ template<bool Enable> inline const Derived& forceAlignedAccessIf() const { return derived(); }
+ template<bool Enable> inline Derived& forceAlignedAccessIf() { return derived(); }
+ EIGEN_DEVICE_FUNC Scalar trace() const;
+ template<int p> EIGEN_DEVICE_FUNC RealScalar lpNorm() const;
+ EIGEN_DEVICE_FUNC MatrixBase<Derived>& matrix() { return *this; }
+ EIGEN_DEVICE_FUNC const MatrixBase<Derived>& matrix() const { return *this; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper<Derived> array() { return ArrayWrapper<Derived>(derived()); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper<const Derived> array() const { return ArrayWrapper<const Derived>(derived()); }
+ inline const FullPivLU<PlainObject> fullPivLu() const;
+ inline const PartialPivLU<PlainObject> partialPivLu() const;
+ inline const PartialPivLU<PlainObject> lu() const;
+ inline const Inverse<Derived> inverse() const;
+ template<typename ResultType>
+ inline void computeInverseAndDetWithCheck(
+ ResultType& inverse,
+ typename ResultType::Scalar& determinant,
+ bool& invertible,
+ const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()
+ ) const;
+ template<typename ResultType>
+ inline void computeInverseWithCheck(
+ ResultType& inverse,
+ bool& invertible,
+ const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()
+ ) const;
+ Scalar determinant() const;
+ inline const LLT<PlainObject> llt() const;
+ inline const LDLT<PlainObject> ldlt() const;
+ inline const HouseholderQR<PlainObject> householderQr() const;
+ inline const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
+ inline const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
+ inline const CompleteOrthogonalDecomposition<PlainObject> completeOrthogonalDecomposition() const;
+ inline EigenvaluesReturnType eigenvalues() const;
+ inline RealScalar operatorNorm() const;
+ inline JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;
+ inline BDCSVD<PlainObject> bdcSvd(unsigned int computationOptions = 0) const;
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename OtherDerived> struct cross_product_return_type {
+ typedef typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
+ typedef Matrix<Scalar,MatrixBase::RowsAtCompileTime,MatrixBase::ColsAtCompileTime> type;
+ };
+ #endif
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ inline typename cross_product_return_type<OtherDerived>::type
+#else
+ inline PlainObject
+#endif
+ cross(const MatrixBase<OtherDerived>& other) const;
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ inline PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
+ EIGEN_DEVICE_FUNC
+ inline PlainObject unitOrthogonal(void) const;
+ EIGEN_DEVICE_FUNC
+ inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
+ enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
+ : ColsAtCompileTime==1 ? Vertical : Horizontal };
+ typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
+ EIGEN_DEVICE_FUNC
+ inline HomogeneousReturnType homogeneous() const;
+ enum {
+ SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
+ };
+ typedef Block<const Derived,
+ internal::traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
+ internal::traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne;
+ typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType;
+ EIGEN_DEVICE_FUNC
+ inline const HNormalizedReturnType hnormalized() const;
+ void makeHouseholderInPlace(Scalar& tau, RealScalar& beta);
+ template<typename EssentialPart>
+ void makeHouseholder(EssentialPart& essential,
+ Scalar& tau, RealScalar& beta) const;
+ template<typename EssentialPart>
+ void applyHouseholderOnTheLeft(const EssentialPart& essential,
+ const Scalar& tau,
+ Scalar* workspace);
+ template<typename EssentialPart>
+ void applyHouseholderOnTheRight(const EssentialPart& essential,
+ const Scalar& tau,
+ Scalar* workspace);
+ template<typename OtherScalar>
+ void applyOnTheLeft(Index p, Index q, const JacobiRotation<OtherScalar>& j);
+ template<typename OtherScalar>
+ void applyOnTheRight(Index p, Index q, const JacobiRotation<OtherScalar>& j);
+ template<typename OtherDerived>
+ EIGEN_STRONG_INLINE const typename SparseMatrixBase<OtherDerived>::template CwiseProductDenseReturnType<Derived>::Type
+ cwiseProduct(const SparseMatrixBase<OtherDerived> &other) const
+ {
+ return other.cwiseProduct(derived());
+ }
+ typedef typename internal::stem_function<Scalar>::type StemFunction;
+ const MatrixExponentialReturnValue<Derived> exp() const;
+ const MatrixFunctionReturnValue<Derived> matrixFunction(StemFunction f) const;
+ const MatrixFunctionReturnValue<Derived> cosh() const;
+ const MatrixFunctionReturnValue<Derived> sinh() const;
+ const MatrixFunctionReturnValue<Derived> cos() const;
+ const MatrixFunctionReturnValue<Derived> sin() const;
+ const MatrixSquareRootReturnValue<Derived> sqrt() const;
+ const MatrixLogarithmReturnValue<Derived> log() const;
+ const MatrixPowerReturnValue<Derived> pow(const RealScalar& p) const;
+ const MatrixComplexPowerReturnValue<Derived> pow(const std::complex<RealScalar>& p) const;
+ protected:
+ EIGEN_DEVICE_FUNC MatrixBase() : Base() {}
+ private:
+ EIGEN_DEVICE_FUNC explicit MatrixBase(int);
+ EIGEN_DEVICE_FUNC MatrixBase(int,int);
+ template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit MatrixBase(const MatrixBase<OtherDerived>&);
+ protected:
+ template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )
+ {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+ template<typename OtherDerived> Derived& operator-=(const ArrayBase<OtherDerived>& )
+ {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+};
+template<typename Derived>
+template<typename OtherDerived>
+inline Derived&
+MatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other)
+{
+ other.derived().applyThisOnTheRight(derived());
+ return derived();
+}
+template<typename Derived>
+template<typename OtherDerived>
+inline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other)
+{
+ other.derived().applyThisOnTheRight(derived());
+}
+template<typename Derived>
+template<typename OtherDerived>
+inline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other)
+{
+ other.derived().applyThisOnTheLeft(derived());
+}
+}
+#endif
+// end #include "src/Core/MatrixBase.h"
+// #include "src/Core/EigenBase.h"
+#ifndef EIGEN_EIGENBASE_H
+#define EIGEN_EIGENBASE_H
+namespace Eigen {
+template<typename Derived> struct EigenBase
+{
+ typedef Eigen::Index Index;
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ EIGEN_DEVICE_FUNC
+ Derived& derived() { return *static_cast<Derived*>(this); }
+ EIGEN_DEVICE_FUNC
+ const Derived& derived() const { return *static_cast<const Derived*>(this); }
+ EIGEN_DEVICE_FUNC
+ inline Derived& const_cast_derived() const
+ { return *static_cast<Derived*>(const_cast<EigenBase*>(this)); }
+ EIGEN_DEVICE_FUNC
+ inline const Derived& const_derived() const
+ { return *static_cast<const Derived*>(this); }
+ EIGEN_DEVICE_FUNC
+ inline Index rows() const { return derived().rows(); }
+ EIGEN_DEVICE_FUNC
+ inline Index cols() const { return derived().cols(); }
+ EIGEN_DEVICE_FUNC
+ inline Index size() const { return rows() * cols(); }
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC
+ inline void evalTo(Dest& dst) const
+ { derived().evalTo(dst); }
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC
+ inline void addTo(Dest& dst) const
+ {
+ typename Dest::PlainObject res(rows(),cols());
+ evalTo(res);
+ dst += res;
+ }
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC
+ inline void subTo(Dest& dst) const
+ {
+ typename Dest::PlainObject res(rows(),cols());
+ evalTo(res);
+ dst -= res;
+ }
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const
+ {
+ dst = dst * this->derived();
+ }
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const
+ {
+ dst = this->derived() * dst;
+ }
+};
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
+{
+ call_assignment(derived(), other.derived());
+ return derived();
+}
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
+{
+ call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return derived();
+}
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
+{
+ call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return derived();
+}
+}
+#endif
+// end #include "src/Core/EigenBase.h"
+// #include "src/Core/Product.h"
+#ifndef EIGEN_PRODUCT_H
+#define EIGEN_PRODUCT_H
+namespace Eigen {
+template<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl;
+namespace internal {
+template<typename Lhs, typename Rhs, int Option>
+struct traits<Product<Lhs, Rhs, Option> >
+{
+ typedef typename remove_all<Lhs>::type LhsCleaned;
+ typedef typename remove_all<Rhs>::type RhsCleaned;
+ typedef traits<LhsCleaned> LhsTraits;
+ typedef traits<RhsCleaned> RhsTraits;
+ typedef MatrixXpr XprKind;
+ typedef typename ScalarBinaryOpTraits<typename traits<LhsCleaned>::Scalar, typename traits<RhsCleaned>::Scalar>::ReturnType Scalar;
+ typedef typename product_promote_storage_type<typename LhsTraits::StorageKind,
+ typename RhsTraits::StorageKind,
+ internal::product_type<Lhs,Rhs>::ret>::ret StorageKind;
+ typedef typename promote_index_type<typename LhsTraits::StorageIndex,
+ typename RhsTraits::StorageIndex>::type StorageIndex;
+ enum {
+ RowsAtCompileTime = LhsTraits::RowsAtCompileTime,
+ ColsAtCompileTime = RhsTraits::ColsAtCompileTime,
+ MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime,
+ InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime),
+ Flags = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? RowMajorBit
+ : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
+ : ( ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit))
+ || ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) ) ? RowMajorBit
+ : NoPreferredStorageOrderBit
+ };
+};
+}
+template<typename _Lhs, typename _Rhs, int Option>
+class Product : public ProductImpl<_Lhs,_Rhs,Option,
+ typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind,
+ typename internal::traits<_Rhs>::StorageKind,
+ internal::product_type<_Lhs,_Rhs>::ret>::ret>
+{
+ public:
+ typedef _Lhs Lhs;
+ typedef _Rhs Rhs;
+ typedef typename ProductImpl<
+ Lhs, Rhs, Option,
+ typename internal::product_promote_storage_type<typename internal::traits<Lhs>::StorageKind,
+ typename internal::traits<Rhs>::StorageKind,
+ internal::product_type<Lhs,Rhs>::ret>::ret>::Base Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
+ typedef typename internal::ref_selector<Lhs>::type LhsNested;
+ typedef typename internal::ref_selector<Rhs>::type RhsNested;
+ typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
+ typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
+ EIGEN_DEVICE_FUNC Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
+ {
+ eigen_assert(lhs.cols() == rhs.rows()
+ && "invalid matrix product"
+ && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
+ }
+ EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
+ EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; }
+ EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; }
+ protected:
+ LhsNested m_lhs;
+ RhsNested m_rhs;
+};
+namespace internal {
+template<typename Lhs, typename Rhs, int Option, int ProductTag = internal::product_type<Lhs,Rhs>::ret>
+class dense_product_base
+ : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type
+{};
+template<typename Lhs, typename Rhs, int Option>
+class dense_product_base<Lhs, Rhs, Option, InnerProduct>
+ : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type
+{
+ typedef Product<Lhs,Rhs,Option> ProductXpr;
+ typedef typename internal::dense_xpr_base<ProductXpr>::type Base;
+public:
+ using Base::derived;
+ typedef typename Base::Scalar Scalar;
+ operator const Scalar() const
+ {
+ return internal::evaluator<ProductXpr>(derived()).coeff(0,0);
+ }
+};
+}
+template<typename Lhs, typename Rhs, int Option, typename StorageKind>
+class ProductImpl : public internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type
+{
+ public:
+ typedef typename internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type Base;
+};
+template<typename Lhs, typename Rhs, int Option>
+class ProductImpl<Lhs,Rhs,Option,Dense>
+ : public internal::dense_product_base<Lhs,Rhs,Option>
+{
+ typedef Product<Lhs, Rhs, Option> Derived;
+ public:
+ typedef typename internal::dense_product_base<Lhs, Rhs, Option> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+ protected:
+ enum {
+ IsOneByOne = (RowsAtCompileTime == 1 || RowsAtCompileTime == Dynamic) &&
+ (ColsAtCompileTime == 1 || ColsAtCompileTime == Dynamic),
+ EnableCoeff = IsOneByOne || Option==LazyProduct
+ };
+ public:
+ EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const
+ {
+ EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
+ eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
+ return internal::evaluator<Derived>(derived()).coeff(row,col);
+ }
+ EIGEN_DEVICE_FUNC Scalar coeff(Index i) const
+ {
+ EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
+ eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
+ return internal::evaluator<Derived>(derived()).coeff(i);
+ }
+};
+}
+#endif
+// end #include "src/Core/Product.h"
+// #include "src/Core/CoreEvaluators.h"
+#ifndef EIGEN_COREEVALUATORS_H
+#define EIGEN_COREEVALUATORS_H
+namespace Eigen {
+namespace internal {
+template<typename StorageKind>
+struct storage_kind_to_evaluator_kind {
+ typedef IndexBased Kind;
+};
+template<typename StorageKind> struct storage_kind_to_shape;
+template<> struct storage_kind_to_shape<Dense> { typedef DenseShape Shape; };
+template<> struct storage_kind_to_shape<SolverStorage> { typedef SolverShape Shape; };
+template<> struct storage_kind_to_shape<PermutationStorage> { typedef PermutationShape Shape; };
+template<> struct storage_kind_to_shape<TranspositionsStorage> { typedef TranspositionsShape Shape; };
+template< typename T,
+ typename Arg1Kind = typename evaluator_traits<typename T::Arg1>::Kind,
+ typename Arg2Kind = typename evaluator_traits<typename T::Arg2>::Kind,
+ typename Arg3Kind = typename evaluator_traits<typename T::Arg3>::Kind,
+ typename Arg1Scalar = typename traits<typename T::Arg1>::Scalar,
+ typename Arg2Scalar = typename traits<typename T::Arg2>::Scalar,
+ typename Arg3Scalar = typename traits<typename T::Arg3>::Scalar> struct ternary_evaluator;
+template< typename T,
+ typename LhsKind = typename evaluator_traits<typename T::Lhs>::Kind,
+ typename RhsKind = typename evaluator_traits<typename T::Rhs>::Kind,
+ typename LhsScalar = typename traits<typename T::Lhs>::Scalar,
+ typename RhsScalar = typename traits<typename T::Rhs>::Scalar> struct binary_evaluator;
+template< typename T,
+ typename Kind = typename evaluator_traits<typename T::NestedExpression>::Kind,
+ typename Scalar = typename T::Scalar> struct unary_evaluator;
+template<typename T>
+struct evaluator_traits_base
+{
+ typedef typename storage_kind_to_evaluator_kind<typename traits<T>::StorageKind>::Kind Kind;
+ typedef typename storage_kind_to_shape<typename traits<T>::StorageKind>::Shape Shape;
+};
+template<typename T>
+struct evaluator_traits : public evaluator_traits_base<T>
+{
+};
+template<typename T, typename Shape = typename evaluator_traits<T>::Shape >
+struct evaluator_assume_aliasing {
+ static const bool value = false;
+};
+template<typename T>
+struct evaluator : public unary_evaluator<T>
+{
+ typedef unary_evaluator<T> Base;
+ EIGEN_DEVICE_FUNC explicit evaluator(const T& xpr) : Base(xpr) {}
+};
+template<typename T>
+struct evaluator<const T>
+ : evaluator<T>
+{
+ EIGEN_DEVICE_FUNC
+ explicit evaluator(const T& xpr) : evaluator<T>(xpr) {}
+};
+template<typename ExpressionType>
+struct evaluator_base : public noncopyable
+{
+ typedef traits<ExpressionType> ExpressionTraits;
+ enum {
+ Alignment = 0
+ };
+};
+template<typename Derived>
+struct evaluator<PlainObjectBase<Derived> >
+ : evaluator_base<Derived>
+{
+ typedef PlainObjectBase<Derived> PlainObjectType;
+ typedef typename PlainObjectType::Scalar Scalar;
+ typedef typename PlainObjectType::CoeffReturnType CoeffReturnType;
+ enum {
+ IsRowMajor = PlainObjectType::IsRowMajor,
+ IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime,
+ RowsAtCompileTime = PlainObjectType::RowsAtCompileTime,
+ ColsAtCompileTime = PlainObjectType::ColsAtCompileTime,
+ CoeffReadCost = NumTraits<Scalar>::ReadCost,
+ Flags = traits<Derived>::EvaluatorFlags,
+ Alignment = traits<Derived>::Alignment
+ };
+ EIGEN_DEVICE_FUNC evaluator()
+ : m_data(0),
+ m_outerStride(IsVectorAtCompileTime ? 0
+ : int(IsRowMajor) ? ColsAtCompileTime
+ : RowsAtCompileTime)
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m)
+ : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride())
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ if (IsRowMajor)
+ return m_data[row * m_outerStride.value() + col];
+ else
+ return m_data[row + col * m_outerStride.value()];
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_data[index];
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
+ {
+ if (IsRowMajor)
+ return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col];
+ else
+ return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()];
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
+ {
+ return const_cast<Scalar*>(m_data)[index];
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ if (IsRowMajor)
+ return ploadt<PacketType, LoadMode>(m_data + row * m_outerStride.value() + col);
+ else
+ return ploadt<PacketType, LoadMode>(m_data + row + col * m_outerStride.value());
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ return ploadt<PacketType, LoadMode>(m_data + index);
+ }
+ template<int StoreMode,typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index row, Index col, const PacketType& x)
+ {
+ if (IsRowMajor)
+ return pstoret<Scalar, PacketType, StoreMode>
+ (const_cast<Scalar*>(m_data) + row * m_outerStride.value() + col, x);
+ else
+ return pstoret<Scalar, PacketType, StoreMode>
+ (const_cast<Scalar*>(m_data) + row + col * m_outerStride.value(), x);
+ }
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index index, const PacketType& x)
+ {
+ return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
+ }
+protected:
+ const Scalar *m_data;
+ variable_if_dynamic<Index, IsVectorAtCompileTime ? 0
+ : int(IsRowMajor) ? ColsAtCompileTime
+ : RowsAtCompileTime> m_outerStride;
+};
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+struct evaluator<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
+ : evaluator<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
+{
+ typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
+ EIGEN_DEVICE_FUNC evaluator() {}
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m)
+ : evaluator<PlainObjectBase<XprType> >(m)
+ { }
+};
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+struct evaluator<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
+ : evaluator<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
+{
+ typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
+ EIGEN_DEVICE_FUNC evaluator() {}
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m)
+ : evaluator<PlainObjectBase<XprType> >(m)
+ { }
+};
+template<typename ArgType>
+struct unary_evaluator<Transpose<ArgType>, IndexBased>
+ : evaluator_base<Transpose<ArgType> >
+{
+ typedef Transpose<ArgType> XprType;
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ Flags = evaluator<ArgType>::Flags ^ RowMajorBit,
+ Alignment = evaluator<ArgType>::Alignment
+ };
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {}
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_argImpl.coeff(col, row);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_argImpl.coeff(index);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
+ {
+ return m_argImpl.coeffRef(col, row);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ typename XprType::Scalar& coeffRef(Index index)
+ {
+ return m_argImpl.coeffRef(index);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ return m_argImpl.template packet<LoadMode,PacketType>(col, row);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ return m_argImpl.template packet<LoadMode,PacketType>(index);
+ }
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index row, Index col, const PacketType& x)
+ {
+ m_argImpl.template writePacket<StoreMode,PacketType>(col, row, x);
+ }
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index index, const PacketType& x)
+ {
+ m_argImpl.template writePacket<StoreMode,PacketType>(index, x);
+ }
+protected:
+ evaluator<ArgType> m_argImpl;
+};
+template<typename Scalar,typename NullaryOp,
+ bool has_nullary = has_nullary_operator<NullaryOp>::value,
+ bool has_unary = has_unary_operator<NullaryOp>::value,
+ bool has_binary = has_binary_operator<NullaryOp>::value>
+struct nullary_wrapper
+{
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { return op(i,j); }
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); }
+ template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { return op.template packetOp<T>(i,j); }
+ template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp<T>(i); }
+};
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,true,false,false>
+{
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType=0, IndexType=0) const { return op(); }
+ template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType=0, IndexType=0) const { return op.template packetOp<T>(); }
+};
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,false,false,true>
+{
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j=0) const { return op(i,j); }
+ template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j=0) const { return op.template packetOp<T>(i,j); }
+};
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,false,true,false>
+{
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const {
+ eigen_assert(i==0 || j==0);
+ return op(i+j);
+ }
+ template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const {
+ eigen_assert(i==0 || j==0);
+ return op.template packetOp<T>(i+j);
+ }
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); }
+ template <typename T, typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp<T>(i); }
+};
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,false,false,false> {};
+#if 0 && EIGEN_COMP_MSVC>0
+template<typename T> struct nullary_wrapper_workaround_msvc {
+ nullary_wrapper_workaround_msvc(const T&);
+ operator T()const;
+};
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,true,true,true>
+{
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const {
+ return nullary_wrapper<Scalar,NullaryOp,
+ has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+ has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+ has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().operator()(op,i,j);
+ }
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const {
+ return nullary_wrapper<Scalar,NullaryOp,
+ has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+ has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+ has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().operator()(op,i);
+ }
+ template <typename T, typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const {
+ return nullary_wrapper<Scalar,NullaryOp,
+ has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+ has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+ has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().template packetOp<T>(op,i,j);
+ }
+ template <typename T, typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const {
+ return nullary_wrapper<Scalar,NullaryOp,
+ has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+ has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+ has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().template packetOp<T>(op,i);
+ }
+};
+#endif
+template<typename NullaryOp, typename PlainObjectType>
+struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
+ : evaluator_base<CwiseNullaryOp<NullaryOp,PlainObjectType> >
+{
+ typedef CwiseNullaryOp<NullaryOp,PlainObjectType> XprType;
+ typedef typename internal::remove_all<PlainObjectType>::type PlainObjectTypeCleaned;
+ enum {
+ CoeffReadCost = internal::functor_traits<NullaryOp>::Cost,
+ Flags = (evaluator<PlainObjectTypeCleaned>::Flags
+ & ( HereditaryBits
+ | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
+ | (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
+ | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit),
+ Alignment = AlignedMax
+ };
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n)
+ : m_functor(n.functor()), m_wrapper()
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(IndexType row, IndexType col) const
+ {
+ return m_wrapper(m_functor, row, col);
+ }
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(IndexType index) const
+ {
+ return m_wrapper(m_functor,index);
+ }
+ template<int LoadMode, typename PacketType, typename IndexType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(IndexType row, IndexType col) const
+ {
+ return m_wrapper.template packetOp<PacketType>(m_functor, row, col);
+ }
+ template<int LoadMode, typename PacketType, typename IndexType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(IndexType index) const
+ {
+ return m_wrapper.template packetOp<PacketType>(m_functor, index);
+ }
+protected:
+ const NullaryOp m_functor;
+ const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;
+};
+template<typename UnaryOp, typename ArgType>
+struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
+ : evaluator_base<CwiseUnaryOp<UnaryOp, ArgType> >
+{
+ typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
+ Flags = evaluator<ArgType>::Flags
+ & (HereditaryBits | LinearAccessBit | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
+ Alignment = evaluator<ArgType>::Alignment
+ };
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ explicit unary_evaluator(const XprType& op)
+ : m_functor(op.functor()),
+ m_argImpl(op.nestedExpression())
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost);
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_functor(m_argImpl.coeff(row, col));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_functor(m_argImpl.coeff(index));
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col));
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index));
+ }
+protected:
+ const UnaryOp m_functor;
+ evaluator<ArgType> m_argImpl;
+};
+template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
+struct evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
+ : public ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
+{
+ typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
+ typedef ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > Base;
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
+};
+template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
+struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased, IndexBased>
+ : evaluator_base<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
+{
+ typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
+ enum {
+ CoeffReadCost = evaluator<Arg1>::CoeffReadCost + evaluator<Arg2>::CoeffReadCost + evaluator<Arg3>::CoeffReadCost + functor_traits<TernaryOp>::Cost,
+ Arg1Flags = evaluator<Arg1>::Flags,
+ Arg2Flags = evaluator<Arg2>::Flags,
+ Arg3Flags = evaluator<Arg3>::Flags,
+ SameType = is_same<typename Arg1::Scalar,typename Arg2::Scalar>::value && is_same<typename Arg1::Scalar,typename Arg3::Scalar>::value,
+ StorageOrdersAgree = (int(Arg1Flags)&RowMajorBit)==(int(Arg2Flags)&RowMajorBit) && (int(Arg1Flags)&RowMajorBit)==(int(Arg3Flags)&RowMajorBit),
+ Flags0 = (int(Arg1Flags) | int(Arg2Flags) | int(Arg3Flags)) & (
+ HereditaryBits
+ | (int(Arg1Flags) & int(Arg2Flags) & int(Arg3Flags) &
+ ( (StorageOrdersAgree ? LinearAccessBit : 0)
+ | (functor_traits<TernaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
+ )
+ )
+ ),
+ Flags = (Flags0 & ~RowMajorBit) | (Arg1Flags & RowMajorBit),
+ Alignment = EIGEN_PLAIN_ENUM_MIN(
+ EIGEN_PLAIN_ENUM_MIN(evaluator<Arg1>::Alignment, evaluator<Arg2>::Alignment),
+ evaluator<Arg3>::Alignment)
+ };
+ EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr)
+ : m_functor(xpr.functor()),
+ m_arg1Impl(xpr.arg1()),
+ m_arg2Impl(xpr.arg2()),
+ m_arg3Impl(xpr.arg3())
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<TernaryOp>::Cost);
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(row, col),
+ m_arg2Impl.template packet<LoadMode,PacketType>(row, col),
+ m_arg3Impl.template packet<LoadMode,PacketType>(row, col));
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(index),
+ m_arg2Impl.template packet<LoadMode,PacketType>(index),
+ m_arg3Impl.template packet<LoadMode,PacketType>(index));
+ }
+protected:
+ const TernaryOp m_functor;
+ evaluator<Arg1> m_arg1Impl;
+ evaluator<Arg2> m_arg2Impl;
+ evaluator<Arg3> m_arg3Impl;
+};
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+ : public binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+{
+ typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+ typedef binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > Base;
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
+};
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBased>
+ : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+{
+ typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+ enum {
+ CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+ LhsFlags = evaluator<Lhs>::Flags,
+ RhsFlags = evaluator<Rhs>::Flags,
+ SameType = is_same<typename Lhs::Scalar,typename Rhs::Scalar>::value,
+ StorageOrdersAgree = (int(LhsFlags)&RowMajorBit)==(int(RhsFlags)&RowMajorBit),
+ Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
+ HereditaryBits
+ | (int(LhsFlags) & int(RhsFlags) &
+ ( (StorageOrdersAgree ? LinearAccessBit : 0)
+ | (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
+ )
+ )
+ ),
+ Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
+ Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<Lhs>::Alignment,evaluator<Rhs>::Alignment)
+ };
+ EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr)
+ : m_functor(xpr.functor()),
+ m_lhsImpl(xpr.lhs()),
+ m_rhsImpl(xpr.rhs())
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index));
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col),
+ m_rhsImpl.template packet<LoadMode,PacketType>(row, col));
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index),
+ m_rhsImpl.template packet<LoadMode,PacketType>(index));
+ }
+protected:
+ const BinaryOp m_functor;
+ evaluator<Lhs> m_lhsImpl;
+ evaluator<Rhs> m_rhsImpl;
+};
+template<typename UnaryOp, typename ArgType>
+struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
+ : evaluator_base<CwiseUnaryView<UnaryOp, ArgType> >
+{
+ typedef CwiseUnaryView<UnaryOp, ArgType> XprType;
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
+ Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)),
+ Alignment = 0
+ };
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
+ : m_unaryOp(op.functor()),
+ m_argImpl(op.nestedExpression())
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost);
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_unaryOp(m_argImpl.coeff(row, col));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_unaryOp(m_argImpl.coeff(index));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
+ {
+ return m_unaryOp(m_argImpl.coeffRef(row, col));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
+ {
+ return m_unaryOp(m_argImpl.coeffRef(index));
+ }
+protected:
+ const UnaryOp m_unaryOp;
+ evaluator<ArgType> m_argImpl;
+};
+template<typename Derived, typename PlainObjectType>
+struct mapbase_evaluator;
+template<typename Derived, typename PlainObjectType>
+struct mapbase_evaluator : evaluator_base<Derived>
+{
+ typedef Derived XprType;
+ typedef typename XprType::PointerType PointerType;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ enum {
+ IsRowMajor = XprType::RowsAtCompileTime,
+ ColsAtCompileTime = XprType::ColsAtCompileTime,
+ CoeffReadCost = NumTraits<Scalar>::ReadCost
+ };
+ EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map)
+ : m_data(const_cast<PointerType>(map.data())),
+ m_innerStride(map.innerStride()),
+ m_outerStride(map.outerStride())
+ {
+ EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1),
+ PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_data[col * colStride() + row * rowStride()];
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_data[index * m_innerStride.value()];
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
+ {
+ return m_data[col * colStride() + row * rowStride()];
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
+ {
+ return m_data[index * m_innerStride.value()];
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ PointerType ptr = m_data + row * rowStride() + col * colStride();
+ return internal::ploadt<PacketType, LoadMode>(ptr);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ return internal::ploadt<PacketType, LoadMode>(m_data + index * m_innerStride.value());
+ }
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index row, Index col, const PacketType& x)
+ {
+ PointerType ptr = m_data + row * rowStride() + col * colStride();
+ return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);
+ }
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index index, const PacketType& x)
+ {
+ internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_innerStride.value(), x);
+ }
+protected:
+ EIGEN_DEVICE_FUNC
+ inline Index rowStride() const { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); }
+ EIGEN_DEVICE_FUNC
+ inline Index colStride() const { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); }
+ PointerType m_data;
+ const internal::variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_innerStride;
+ const internal::variable_if_dynamic<Index, XprType::OuterStrideAtCompileTime> m_outerStride;
+};
+template<typename PlainObjectType, int MapOptions, typename StrideType>
+struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
+ : public mapbase_evaluator<Map<PlainObjectType, MapOptions, StrideType>, PlainObjectType>
+{
+ typedef Map<PlainObjectType, MapOptions, StrideType> XprType;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename packet_traits<Scalar>::type PacketScalar;
+ enum {
+ InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
+ ? int(PlainObjectType::InnerStrideAtCompileTime)
+ : int(StrideType::InnerStrideAtCompileTime),
+ OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
+ ? int(PlainObjectType::OuterStrideAtCompileTime)
+ : int(StrideType::OuterStrideAtCompileTime),
+ HasNoInnerStride = InnerStrideAtCompileTime == 1,
+ HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
+ HasNoStride = HasNoInnerStride && HasNoOuterStride,
+ IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
+ PacketAccessMask = bool(HasNoInnerStride) ? ~int(0) : ~int(PacketAccessBit),
+ LinearAccessMask = bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime) ? ~int(0) : ~int(LinearAccessBit),
+ Flags = int( evaluator<PlainObjectType>::Flags) & (LinearAccessMask&PacketAccessMask),
+ Alignment = int(MapOptions)&int(AlignedMask)
+ };
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map)
+ : mapbase_evaluator<XprType, PlainObjectType>(map)
+ { }
+};
+template<typename PlainObjectType, int RefOptions, typename StrideType>
+struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> >
+ : public mapbase_evaluator<Ref<PlainObjectType, RefOptions, StrideType>, PlainObjectType>
+{
+ typedef Ref<PlainObjectType, RefOptions, StrideType> XprType;
+ enum {
+ Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags,
+ Alignment = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Alignment
+ };
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref)
+ : mapbase_evaluator<XprType, PlainObjectType>(ref)
+ { }
+};
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel,
+ bool HasDirectAccess = internal::has_direct_access<ArgType>::ret> struct block_evaluator;
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
+ : block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel>
+{
+ typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename packet_traits<Scalar>::type PacketScalar;
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ RowsAtCompileTime = traits<XprType>::RowsAtCompileTime,
+ ColsAtCompileTime = traits<XprType>::ColsAtCompileTime,
+ MaxRowsAtCompileTime = traits<XprType>::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = traits<XprType>::MaxColsAtCompileTime,
+ ArgTypeIsRowMajor = (int(evaluator<ArgType>::Flags)&RowMajorBit) != 0,
+ IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1
+ : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
+ : ArgTypeIsRowMajor,
+ HasSameStorageOrderAsArgType = (IsRowMajor == ArgTypeIsRowMajor),
+ InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
+ InnerStrideAtCompileTime = HasSameStorageOrderAsArgType
+ ? int(inner_stride_at_compile_time<ArgType>::ret)
+ : int(outer_stride_at_compile_time<ArgType>::ret),
+ OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
+ ? int(outer_stride_at_compile_time<ArgType>::ret)
+ : int(inner_stride_at_compile_time<ArgType>::ret),
+ MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0,
+ FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
+ FlagsRowMajorBit = XprType::Flags&RowMajorBit,
+ Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
+ DirectAccessBit |
+ MaskPacketAccessBit),
+ Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
+ PacketAlignment = unpacket_traits<PacketScalar>::alignment,
+ Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
+ Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
+ };
+ typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block)
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+};
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, false>
+ : unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
+{
+ typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+ EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
+ : unary_evaluator<XprType>(block)
+ {}
+};
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBased>
+ : evaluator_base<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
+{
+ typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block)
+ : m_argImpl(block.nestedExpression()),
+ m_startRow(block.startRow()),
+ m_startCol(block.startCol())
+ { }
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ enum {
+ RowsAtCompileTime = XprType::RowsAtCompileTime
+ };
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
+ {
+ return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
+ {
+ return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
+ RowsAtCompileTime == 1 ? index : 0);
+ }
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index row, Index col, const PacketType& x)
+ {
+ return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x);
+ }
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index index, const PacketType& x)
+ {
+ return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
+ RowsAtCompileTime == 1 ? index : 0,
+ x);
+ }
+protected:
+ evaluator<ArgType> m_argImpl;
+ const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
+ const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
+};
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, true>
+ : mapbase_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>,
+ typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>
+{
+ typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+ typedef typename XprType::Scalar Scalar;
+ EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
+ : mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
+ {
+ eigen_assert(((internal::UIntPtr(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
+ }
+};
+template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
+struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
+ : evaluator_base<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
+{
+ typedef Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> XprType;
+ enum {
+ CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost
+ + EIGEN_PLAIN_ENUM_MAX(evaluator<ThenMatrixType>::CoeffReadCost,
+ evaluator<ElseMatrixType>::CoeffReadCost),
+ Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits,
+ Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)
+ };
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select)
+ : m_conditionImpl(select.conditionMatrix()),
+ m_thenImpl(select.thenMatrix()),
+ m_elseImpl(select.elseMatrix())
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ if (m_conditionImpl.coeff(row, col))
+ return m_thenImpl.coeff(row, col);
+ else
+ return m_elseImpl.coeff(row, col);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ if (m_conditionImpl.coeff(index))
+ return m_thenImpl.coeff(index);
+ else
+ return m_elseImpl.coeff(index);
+ }
+protected:
+ evaluator<ConditionMatrixType> m_conditionImpl;
+ evaluator<ThenMatrixType> m_thenImpl;
+ evaluator<ElseMatrixType> m_elseImpl;
+};
+template<typename ArgType, int RowFactor, int ColFactor>
+struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
+ : evaluator_base<Replicate<ArgType, RowFactor, ColFactor> >
+{
+ typedef Replicate<ArgType, RowFactor, ColFactor> XprType;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ enum {
+ Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor
+ };
+ typedef typename internal::nested_eval<ArgType,Factor>::type ArgTypeNested;
+ typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
+ enum {
+ CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
+ LinearAccessMask = XprType::IsVectorAtCompileTime ? LinearAccessBit : 0,
+ Flags = (evaluator<ArgTypeNestedCleaned>::Flags & (HereditaryBits|LinearAccessMask) & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit),
+ Alignment = evaluator<ArgTypeNestedCleaned>::Alignment
+ };
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate)
+ : m_arg(replicate.nestedExpression()),
+ m_argImpl(m_arg),
+ m_rows(replicate.nestedExpression().rows()),
+ m_cols(replicate.nestedExpression().cols())
+ {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
+ : RowFactor==1 ? row
+ : row % m_rows.value();
+ const Index actual_col = internal::traits<XprType>::ColsAtCompileTime==1 ? 0
+ : ColFactor==1 ? col
+ : col % m_cols.value();
+ return m_argImpl.coeff(actual_row, actual_col);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
+ ? (ColFactor==1 ? index : index%m_cols.value())
+ : (RowFactor==1 ? index : index%m_rows.value());
+ return m_argImpl.coeff(actual_index);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
+ : RowFactor==1 ? row
+ : row % m_rows.value();
+ const Index actual_col = internal::traits<XprType>::ColsAtCompileTime==1 ? 0
+ : ColFactor==1 ? col
+ : col % m_cols.value();
+ return m_argImpl.template packet<LoadMode,PacketType>(actual_row, actual_col);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
+ ? (ColFactor==1 ? index : index%m_cols.value())
+ : (RowFactor==1 ? index : index%m_rows.value());
+ return m_argImpl.template packet<LoadMode,PacketType>(actual_index);
+ }
+protected:
+ const ArgTypeNested m_arg;
+ evaluator<ArgTypeNestedCleaned> m_argImpl;
+ const variable_if_dynamic<Index, ArgType::RowsAtCompileTime> m_rows;
+ const variable_if_dynamic<Index, ArgType::ColsAtCompileTime> m_cols;
+};
+template< typename ArgType, typename MemberOp, int Direction>
+struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
+ : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> >
+{
+ typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
+ typedef typename internal::nested_eval<ArgType,1>::type ArgTypeNested;
+ typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
+ typedef typename ArgType::Scalar InputScalar;
+ typedef typename XprType::Scalar Scalar;
+ enum {
+ TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime)
+ };
+ typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
+ enum {
+ CoeffReadCost = TraversalSize==Dynamic ? HugeCost
+ : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
+ Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))) | LinearAccessBit,
+ Alignment = 0
+ };
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr)
+ : m_arg(xpr.nestedExpression()), m_functor(xpr.functor())
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : int(CostOpType::value));
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const Scalar coeff(Index i, Index j) const
+ {
+ if (Direction==Vertical)
+ return m_functor(m_arg.col(j));
+ else
+ return m_functor(m_arg.row(i));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const Scalar coeff(Index index) const
+ {
+ if (Direction==Vertical)
+ return m_functor(m_arg.col(index));
+ else
+ return m_functor(m_arg.row(index));
+ }
+protected:
+ typename internal::add_const_on_value_type<ArgTypeNested>::type m_arg;
+ const MemberOp m_functor;
+};
+template<typename XprType>
+struct evaluator_wrapper_base
+ : evaluator_base<XprType>
+{
+ typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType;
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ Flags = evaluator<ArgType>::Flags,
+ Alignment = evaluator<ArgType>::Alignment
+ };
+ EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}
+ typedef typename ArgType::Scalar Scalar;
+ typedef typename ArgType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_argImpl.coeff(row, col);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_argImpl.coeff(index);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
+ {
+ return m_argImpl.coeffRef(row, col);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
+ {
+ return m_argImpl.coeffRef(index);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ return m_argImpl.template packet<LoadMode,PacketType>(row, col);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ return m_argImpl.template packet<LoadMode,PacketType>(index);
+ }
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index row, Index col, const PacketType& x)
+ {
+ m_argImpl.template writePacket<StoreMode>(row, col, x);
+ }
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index index, const PacketType& x)
+ {
+ m_argImpl.template writePacket<StoreMode>(index, x);
+ }
+protected:
+ evaluator<ArgType> m_argImpl;
+};
+template<typename TArgType>
+struct unary_evaluator<ArrayWrapper<TArgType> >
+ : evaluator_wrapper_base<ArrayWrapper<TArgType> >
+{
+ typedef ArrayWrapper<TArgType> XprType;
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper)
+ : evaluator_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression())
+ { }
+};
+template<typename PacketType, bool ReversePacket> struct reverse_packet_cond;
+template<typename ArgType, int Direction>
+struct unary_evaluator<Reverse<ArgType, Direction> >
+ : evaluator_base<Reverse<ArgType, Direction> >
+{
+ typedef Reverse<ArgType, Direction> XprType;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ enum {
+ IsRowMajor = XprType::IsRowMajor,
+ IsColMajor = !IsRowMajor,
+ ReverseRow = (Direction == Vertical) || (Direction == BothDirections),
+ ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),
+ ReversePacket = (Direction == BothDirections)
+ || ((Direction == Vertical) && IsColMajor)
+ || ((Direction == Horizontal) && IsRowMajor),
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ Flags0 = evaluator<ArgType>::Flags,
+ LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) )
+ || ((ReverseRow && XprType::ColsAtCompileTime==1) || (ReverseCol && XprType::RowsAtCompileTime==1))
+ ? LinearAccessBit : 0,
+ Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess),
+ Alignment = 0
+ };
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse)
+ : m_argImpl(reverse.nestedExpression()),
+ m_rows(ReverseRow ? reverse.nestedExpression().rows() : 1),
+ m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1)
+ { }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row,
+ ReverseCol ? m_cols.value() - col - 1 : col);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
+ {
+ return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row,
+ ReverseCol ? m_cols.value() - col - 1 : col);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
+ {
+ return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ enum {
+ PacketSize = unpacket_traits<PacketType>::size,
+ OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
+ OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1
+ };
+ typedef internal::reverse_packet_cond<PacketType,ReversePacket> reverse_packet;
+ return reverse_packet::run(m_argImpl.template packet<LoadMode,PacketType>(
+ ReverseRow ? m_rows.value() - row - OffsetRow : row,
+ ReverseCol ? m_cols.value() - col - OffsetCol : col));
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ enum { PacketSize = unpacket_traits<PacketType>::size };
+ return preverse(m_argImpl.template packet<LoadMode,PacketType>(m_rows.value() * m_cols.value() - index - PacketSize));
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index row, Index col, const PacketType& x)
+ {
+ enum {
+ PacketSize = unpacket_traits<PacketType>::size,
+ OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
+ OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1
+ };
+ typedef internal::reverse_packet_cond<PacketType,ReversePacket> reverse_packet;
+ m_argImpl.template writePacket<LoadMode>(
+ ReverseRow ? m_rows.value() - row - OffsetRow : row,
+ ReverseCol ? m_cols.value() - col - OffsetCol : col,
+ reverse_packet::run(x));
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index index, const PacketType& x)
+ {
+ enum { PacketSize = unpacket_traits<PacketType>::size };
+ m_argImpl.template writePacket<LoadMode>
+ (m_rows.value() * m_cols.value() - index - PacketSize, preverse(x));
+ }
+protected:
+ evaluator<ArgType> m_argImpl;
+ const variable_if_dynamic<Index, ReverseRow ? ArgType::RowsAtCompileTime : 1> m_rows;
+ const variable_if_dynamic<Index, ReverseCol ? ArgType::ColsAtCompileTime : 1> m_cols;
+};
+template<typename ArgType, int DiagIndex>
+struct evaluator<Diagonal<ArgType, DiagIndex> >
+ : evaluator_base<Diagonal<ArgType, DiagIndex> >
+{
+ typedef Diagonal<ArgType, DiagIndex> XprType;
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ Flags = (unsigned int)(evaluator<ArgType>::Flags & (HereditaryBits | DirectAccessBit) & ~RowMajorBit) | LinearAccessBit,
+ Alignment = 0
+ };
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal)
+ : m_argImpl(diagonal.nestedExpression()),
+ m_index(diagonal.index())
+ { }
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index) const
+ {
+ return m_argImpl.coeff(row + rowOffset(), row + colOffset());
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_argImpl.coeff(index + rowOffset(), index + colOffset());
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index)
+ {
+ return m_argImpl.coeffRef(row + rowOffset(), row + colOffset());
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
+ {
+ return m_argImpl.coeffRef(index + rowOffset(), index + colOffset());
+ }
+protected:
+ evaluator<ArgType> m_argImpl;
+ const internal::variable_if_dynamicindex<Index, XprType::DiagIndex> m_index;
+private:
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; }
+};
+template<typename ArgType> class EvalToTemp;
+template<typename ArgType>
+struct traits<EvalToTemp<ArgType> >
+ : public traits<ArgType>
+{ };
+template<typename ArgType>
+class EvalToTemp
+ : public dense_xpr_base<EvalToTemp<ArgType> >::type
+{
+ public:
+ typedef typename dense_xpr_base<EvalToTemp>::type Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp)
+ explicit EvalToTemp(const ArgType& arg)
+ : m_arg(arg)
+ { }
+ const ArgType& arg() const
+ {
+ return m_arg;
+ }
+ Index rows() const
+ {
+ return m_arg.rows();
+ }
+ Index cols() const
+ {
+ return m_arg.cols();
+ }
+ private:
+ const ArgType& m_arg;
+};
+template<typename ArgType>
+struct evaluator<EvalToTemp<ArgType> >
+ : public evaluator<typename ArgType::PlainObject>
+{
+ typedef EvalToTemp<ArgType> XprType;
+ typedef typename ArgType::PlainObject PlainObject;
+ typedef evaluator<PlainObject> Base;
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
+ : m_result(xpr.arg())
+ {
+ ::new (static_cast<Base*>(this)) Base(m_result);
+ }
+ EIGEN_DEVICE_FUNC evaluator(const ArgType& arg)
+ : m_result(arg)
+ {
+ ::new (static_cast<Base*>(this)) Base(m_result);
+ }
+protected:
+ PlainObject m_result;
+};
+}
+}
+#endif
+// end #include "src/Core/CoreEvaluators.h"
+// #include "src/Core/AssignEvaluator.h"
+#ifndef EIGEN_ASSIGN_EVALUATOR_H
+#define EIGEN_ASSIGN_EVALUATOR_H
+namespace Eigen {
+namespace internal {
+template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
+struct copy_using_evaluator_traits
+{
+ typedef typename DstEvaluator::XprType Dst;
+ typedef typename Dst::Scalar DstScalar;
+ enum {
+ DstFlags = DstEvaluator::Flags,
+ SrcFlags = SrcEvaluator::Flags
+ };
+public:
+ enum {
+ DstAlignment = DstEvaluator::Alignment,
+ SrcAlignment = SrcEvaluator::Alignment,
+ DstHasDirectAccess = DstFlags & DirectAccessBit,
+ JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
+ };
+private:
+ enum {
+ InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
+ : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
+ : int(Dst::RowsAtCompileTime),
+ InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
+ : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
+ : int(Dst::MaxRowsAtCompileTime),
+ OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
+ MaxSizeAtCompileTime = Dst::SizeAtCompileTime
+ };
+ typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType;
+ typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType;
+ enum {
+ LinearPacketSize = unpacket_traits<LinearPacketType>::size,
+ InnerPacketSize = unpacket_traits<InnerPacketType>::size
+ };
+public:
+ enum {
+ LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,
+ InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment
+ };
+private:
+ enum {
+ DstIsRowMajor = DstFlags&RowMajorBit,
+ SrcIsRowMajor = SrcFlags&RowMajorBit,
+ StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
+ MightVectorize = bool(StorageOrdersAgree)
+ && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
+ && bool(functor_traits<AssignFunc>::PacketAccess),
+ MayInnerVectorize = MightVectorize
+ && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
+ && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
+ && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
+ MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
+ MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
+ && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
+ MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
+ && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))
+ };
+public:
+ enum {
+ Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal)
+ : int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
+ : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
+ : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
+ : int(MayLinearize) ? int(LinearTraversal)
+ : int(DefaultTraversal),
+ Vectorized = int(Traversal) == InnerVectorizedTraversal
+ || int(Traversal) == LinearVectorizedTraversal
+ || int(Traversal) == SliceVectorizedTraversal
+ };
+ typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;
+private:
+ enum {
+ ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize
+ : Vectorized ? InnerPacketSize
+ : 1,
+ UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
+ MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
+ && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
+ MayUnrollInner = int(InnerSize) != Dynamic
+ && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
+ };
+public:
+ enum {
+ Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
+ ? (
+ int(MayUnrollCompletely) ? int(CompleteUnrolling)
+ : int(MayUnrollInner) ? int(InnerUnrolling)
+ : int(NoUnrolling)
+ )
+ : int(Traversal) == int(LinearVectorizedTraversal)
+ ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
+ ? int(CompleteUnrolling)
+ : int(NoUnrolling) )
+ : int(Traversal) == int(LinearTraversal)
+ ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
+ : int(NoUnrolling) )
+#if EIGEN_UNALIGNED_VECTORIZE
+ : int(Traversal) == int(SliceVectorizedTraversal)
+ ? ( bool(MayUnrollInner) ? int(InnerUnrolling)
+ : int(NoUnrolling) )
+#endif
+ : int(NoUnrolling)
+ };
+#ifdef EIGEN_DEBUG_ASSIGN
+ static void debug()
+ {
+ std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
+ std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
+ std::cerr.setf(std::ios::hex, std::ios::basefield);
+ std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
+ std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
+ std::cerr.unsetf(std::ios::hex);
+ EIGEN_DEBUG_VAR(DstAlignment)
+ EIGEN_DEBUG_VAR(SrcAlignment)
+ EIGEN_DEBUG_VAR(LinearRequiredAlignment)
+ EIGEN_DEBUG_VAR(InnerRequiredAlignment)
+ EIGEN_DEBUG_VAR(JointAlignment)
+ EIGEN_DEBUG_VAR(InnerSize)
+ EIGEN_DEBUG_VAR(InnerMaxSize)
+ EIGEN_DEBUG_VAR(LinearPacketSize)
+ EIGEN_DEBUG_VAR(InnerPacketSize)
+ EIGEN_DEBUG_VAR(ActualPacketSize)
+ EIGEN_DEBUG_VAR(StorageOrdersAgree)
+ EIGEN_DEBUG_VAR(MightVectorize)
+ EIGEN_DEBUG_VAR(MayLinearize)
+ EIGEN_DEBUG_VAR(MayInnerVectorize)
+ EIGEN_DEBUG_VAR(MayLinearVectorize)
+ EIGEN_DEBUG_VAR(MaySliceVectorize)
+ std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
+ EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
+ EIGEN_DEBUG_VAR(UnrollingLimit)
+ EIGEN_DEBUG_VAR(MayUnrollCompletely)
+ EIGEN_DEBUG_VAR(MayUnrollInner)
+ std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
+ std::cerr << std::endl;
+ }
+#endif
+};
+template<typename Kernel, int Index, int Stop>
+struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
+{
+ typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
+ typedef typename DstEvaluatorType::XprType DstXprType;
+ enum {
+ outer = Index / DstXprType::InnerSizeAtCompileTime,
+ inner = Index % DstXprType::InnerSizeAtCompileTime
+ };
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ kernel.assignCoeffByOuterInner(outer, inner);
+ copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
+ }
+};
+template<typename Kernel, int Stop>
+struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
+};
+template<typename Kernel, int Index_, int Stop>
+struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
+ {
+ kernel.assignCoeffByOuterInner(outer, Index_);
+ copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
+ }
+};
+template<typename Kernel, int Stop>
+struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
+};
+template<typename Kernel, int Index, int Stop>
+struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
+ {
+ kernel.assignCoeff(Index);
+ copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
+ }
+};
+template<typename Kernel, int Stop>
+struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
+};
+template<typename Kernel, int Index, int Stop>
+struct copy_using_evaluator_innervec_CompleteUnrolling
+{
+ typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
+ typedef typename DstEvaluatorType::XprType DstXprType;
+ typedef typename Kernel::PacketType PacketType;
+ enum {
+ outer = Index / DstXprType::InnerSizeAtCompileTime,
+ inner = Index % DstXprType::InnerSizeAtCompileTime,
+ SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
+ DstAlignment = Kernel::AssignmentTraits::DstAlignment
+ };
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
+ enum { NextIndex = Index + unpacket_traits<PacketType>::size };
+ copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
+ }
+};
+template<typename Kernel, int Stop>
+struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
+};
+template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
+struct copy_using_evaluator_innervec_InnerUnrolling
+{
+ typedef typename Kernel::PacketType PacketType;
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
+ {
+ kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
+ enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
+ }
+};
+template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
+struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
+};
+template<typename Kernel,
+ int Traversal = Kernel::AssignmentTraits::Traversal,
+ int Unrolling = Kernel::AssignmentTraits::Unrolling>
+struct dense_assignment_loop;
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
+{
+ EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
+ {
+ for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
+ for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
+ kernel.assignCoeffByOuterInner(outer, inner);
+ }
+ }
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ const Index outerSize = kernel.outerSize();
+ for(Index outer = 0; outer < outerSize; ++outer)
+ copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
+ }
+};
+template <bool IsAligned = false>
+struct unaligned_dense_assignment_loop
+{
+ template <typename Kernel>
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
+};
+template <>
+struct unaligned_dense_assignment_loop<false>
+{
+#if EIGEN_COMP_MSVC
+ template <typename Kernel>
+ static EIGEN_DONT_INLINE void run(Kernel &kernel,
+ Index start,
+ Index end)
+#else
+ template <typename Kernel>
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
+ Index start,
+ Index end)
+#endif
+ {
+ for (Index index = start; index < end; ++index)
+ kernel.assignCoeff(index);
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ const Index size = kernel.size();
+ typedef typename Kernel::Scalar Scalar;
+ typedef typename Kernel::PacketType PacketType;
+ enum {
+ requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
+ packetSize = unpacket_traits<PacketType>::size,
+ dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
+ dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
+ : int(Kernel::AssignmentTraits::DstAlignment),
+ srcAlignment = Kernel::AssignmentTraits::JointAlignment
+ };
+ const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
+ const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
+ unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
+ for(Index index = alignedStart; index < alignedEnd; index += packetSize)
+ kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
+ unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ typedef typename Kernel::PacketType PacketType;
+ enum { size = DstXprType::SizeAtCompileTime,
+ packetSize =unpacket_traits<PacketType>::size,
+ alignedSize = (size/packetSize)*packetSize };
+ copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
+ copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
+{
+ typedef typename Kernel::PacketType PacketType;
+ enum {
+ SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
+ DstAlignment = Kernel::AssignmentTraits::DstAlignment
+ };
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ const Index innerSize = kernel.innerSize();
+ const Index outerSize = kernel.outerSize();
+ const Index packetSize = unpacket_traits<PacketType>::size;
+ for(Index outer = 0; outer < outerSize; ++outer)
+ for(Index inner = 0; inner < innerSize; inner+=packetSize)
+ kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ typedef typename Kernel::AssignmentTraits Traits;
+ const Index outerSize = kernel.outerSize();
+ for(Index outer = 0; outer < outerSize; ++outer)
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
+ Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ const Index size = kernel.size();
+ for(Index i = 0; i < size; ++i)
+ kernel.assignCoeff(i);
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::Scalar Scalar;
+ typedef typename Kernel::PacketType PacketType;
+ enum {
+ packetSize = unpacket_traits<PacketType>::size,
+ requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
+ alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
+ dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
+ dstAlignment = alignable ? int(requestedAlignment)
+ : int(Kernel::AssignmentTraits::DstAlignment)
+ };
+ const Scalar *dst_ptr = kernel.dstDataPtr();
+ if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
+ {
+ return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
+ }
+ const Index packetAlignedMask = packetSize - 1;
+ const Index innerSize = kernel.innerSize();
+ const Index outerSize = kernel.outerSize();
+ const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
+ Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
+ for(Index outer = 0; outer < outerSize; ++outer)
+ {
+ const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
+ for(Index inner = 0; inner<alignedStart ; ++inner)
+ kernel.assignCoeffByOuterInner(outer, inner);
+ for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
+ kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
+ for(Index inner = alignedEnd; inner<innerSize ; ++inner)
+ kernel.assignCoeffByOuterInner(outer, inner);
+ alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
+ }
+ }
+};
+#if EIGEN_UNALIGNED_VECTORIZE
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ typedef typename Kernel::PacketType PacketType;
+ enum { size = DstXprType::InnerSizeAtCompileTime,
+ packetSize =unpacket_traits<PacketType>::size,
+ vectorizableSize = (size/packetSize)*packetSize };
+ for(Index outer = 0; outer < kernel.outerSize(); ++outer)
+ {
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
+ copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer);
+ }
+ }
+};
+#endif
+template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
+class generic_dense_assignment_kernel
+{
+protected:
+ typedef typename DstEvaluatorTypeT::XprType DstXprType;
+ typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
+public:
+ typedef DstEvaluatorTypeT DstEvaluatorType;
+ typedef SrcEvaluatorTypeT SrcEvaluatorType;
+ typedef typename DstEvaluatorType::Scalar Scalar;
+ typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
+ typedef typename AssignmentTraits::PacketType PacketType;
+ EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
+ : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
+ {
+ #ifdef EIGEN_DEBUG_ASSIGN
+ AssignmentTraits::debug();
+ #endif
+ }
+ EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); }
+ EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); }
+ EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); }
+ EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); }
+ EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); }
+ EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
+ EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
+ EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
+ {
+ m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
+ {
+ m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
+ {
+ Index row = rowIndexByOuterInner(outer, inner);
+ Index col = colIndexByOuterInner(outer, inner);
+ assignCoeff(row, col);
+ }
+ template<int StoreMode, int LoadMode, typename PacketType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
+ {
+ m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
+ }
+ template<int StoreMode, int LoadMode, typename PacketType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
+ {
+ m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
+ }
+ template<int StoreMode, int LoadMode, typename PacketType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
+ {
+ Index row = rowIndexByOuterInner(outer, inner);
+ Index col = colIndexByOuterInner(outer, inner);
+ assignPacket<StoreMode,LoadMode,PacketType>(row, col);
+ }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
+ {
+ typedef typename DstEvaluatorType::ExpressionTraits Traits;
+ return int(Traits::RowsAtCompileTime) == 1 ? 0
+ : int(Traits::ColsAtCompileTime) == 1 ? inner
+ : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
+ : inner;
+ }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
+ {
+ typedef typename DstEvaluatorType::ExpressionTraits Traits;
+ return int(Traits::ColsAtCompileTime) == 1 ? 0
+ : int(Traits::RowsAtCompileTime) == 1 ? inner
+ : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
+ : outer;
+ }
+ EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
+ {
+ return m_dstExpr.data();
+ }
+protected:
+ DstEvaluatorType& m_dst;
+ const SrcEvaluatorType& m_src;
+ const Functor &m_functor;
+ DstXprType& m_dstExpr;
+};
+template<typename DstXprType,typename SrcXprType, typename Functor>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &)
+{
+ EIGEN_ONLY_USED_FOR_DEBUG(dst);
+ EIGEN_ONLY_USED_FOR_DEBUG(src);
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+}
+template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &)
+{
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))
+ dst.resize(dstRows, dstCols);
+ eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
+}
+template<typename DstXprType, typename SrcXprType, typename Functor>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
+{
+ typedef evaluator<DstXprType> DstEvaluatorType;
+ typedef evaluator<SrcXprType> SrcEvaluatorType;
+ SrcEvaluatorType srcEvaluator(src);
+ resize_if_allowed(dst, src, func);
+ DstEvaluatorType dstEvaluator(dst);
+ typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
+ Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
+ dense_assignment_loop<Kernel>::run(kernel);
+}
+template<typename DstXprType, typename SrcXprType>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
+{
+ call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
+}
+template<typename DstShape, typename SrcShape> struct AssignmentKind;
+struct Dense2Dense {};
+struct EigenBase2EigenBase {};
+template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
+template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
+template< typename DstXprType, typename SrcXprType, typename Functor,
+ typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
+ typename EnableIf = void>
+struct Assignment;
+template<typename Dst, typename Src>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(Dst& dst, const Src& src)
+{
+ call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
+}
+template<typename Dst, typename Src>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(const Dst& dst, const Src& src)
+{
+ call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
+}
+template<typename Dst, typename Src, typename Func>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
+{
+ typename plain_matrix_type<Src>::type tmp(src);
+ call_assignment_no_alias(dst, tmp, func);
+}
+template<typename Dst, typename Src, typename Func>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
+{
+ call_assignment_no_alias(dst, src, func);
+}
+template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
+{
+ call_assignment_no_alias(dst.expression(), src, func);
+}
+template<typename Dst, typename Src, typename Func>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
+{
+ enum {
+ NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
+ || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
+ ) && int(Dst::SizeAtCompileTime) != 1
+ };
+ typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
+ typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
+ ActualDstType actualDst(dst);
+ EIGEN_STATIC_ASSERT_LVALUE(Dst)
+ EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
+ EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
+ Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
+}
+template<typename Dst, typename Src>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment_no_alias(Dst& dst, const Src& src)
+{
+ call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
+}
+template<typename Dst, typename Src, typename Func>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
+{
+ EIGEN_STATIC_ASSERT_LVALUE(Dst)
+ EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
+ EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
+ Assignment<Dst,Src,Func>::run(dst, src, func);
+}
+template<typename Dst, typename Src>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
+{
+ call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
+}
+template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
+struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
+{
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+ {
+#ifndef EIGEN_NO_DEBUG
+ internal::check_for_aliasing(dst, src);
+#endif
+ call_dense_assignment_loop(dst, src, func);
+ }
+};
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
+struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
+{
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &)
+ {
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+ src.evalTo(dst);
+ }
+ template<typename SrcScalarType>
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &)
+ {
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+ src.addTo(dst);
+ }
+ template<typename SrcScalarType>
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &)
+ {
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+ src.subTo(dst);
+ }
+};
+}
+}
+#endif
+// end #include "src/Core/AssignEvaluator.h"
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+// #include "src/Core/Assign.h"
+#ifndef EIGEN_ASSIGN_H
+#define EIGEN_ASSIGN_H
+namespace Eigen {
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
+ ::lazyAssign(const DenseBase<OtherDerived>& other)
+{
+ enum{
+ SameType = internal::is_same<typename Derived::Scalar,typename OtherDerived::Scalar>::value
+ };
+ EIGEN_STATIC_ASSERT_LVALUE(Derived)
+ EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
+ EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+ eigen_assert(rows() == other.rows() && cols() == other.cols());
+ internal::call_assignment_no_alias(derived(),other.derived());
+ return derived();
+}
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
+{
+ internal::call_assignment(derived(), other.derived());
+ return derived();
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other)
+{
+ internal::call_assignment(derived(), other.derived());
+ return derived();
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other)
+{
+ internal::call_assignment(derived(), other.derived());
+ return derived();
+}
+template<typename Derived>
+template <typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
+{
+ internal::call_assignment(derived(), other.derived());
+ return derived();
+}
+template<typename Derived>
+template <typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other)
+{
+ internal::call_assignment(derived(), other.derived());
+ return derived();
+}
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
+{
+ other.derived().evalTo(derived());
+ return derived();
+}
+}
+#endif
+// end #include "src/Core/Assign.h"
+#endif
+// #include "src/Core/ArrayBase.h"
+#ifndef EIGEN_ARRAYBASE_H
+#define EIGEN_ARRAYBASE_H
+namespace Eigen {
+template<typename Derived> class ArrayBase
+ : public DenseBase<Derived>
+{
+ public:
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef ArrayBase StorageBaseType;
+ typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl;
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ typedef DenseBase<Derived> Base;
+ using Base::RowsAtCompileTime;
+ using Base::ColsAtCompileTime;
+ using Base::SizeAtCompileTime;
+ using Base::MaxRowsAtCompileTime;
+ using Base::MaxColsAtCompileTime;
+ using Base::MaxSizeAtCompileTime;
+ using Base::IsVectorAtCompileTime;
+ using Base::Flags;
+ using Base::derived;
+ using Base::const_cast_derived;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::coeff;
+ using Base::coeffRef;
+ using Base::lazyAssign;
+ using Base::operator=;
+ using Base::operator+=;
+ using Base::operator-=;
+ using Base::operator*=;
+ using Base::operator/=;
+ typedef typename Base::CoeffReturnType CoeffReturnType;
+#endif
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef typename Base::PlainObject PlainObject;
+ typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
+#endif
+#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase
+#define EIGEN_DOC_UNARY_ADDONS(X,Y)
+// # include "../plugins/CommonCwiseUnaryOps.h"
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+ const CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Derived>,
+ const Derived&
+ >::type ConjugateReturnType;
+typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+ const CwiseUnaryOp<internal::scalar_real_op<Scalar>, const Derived>,
+ const Derived&
+ >::type RealReturnType;
+typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+ CwiseUnaryView<internal::scalar_real_ref_op<Scalar>, Derived>,
+ Derived&
+ >::type NonConstRealReturnType;
+typedef CwiseUnaryOp<internal::scalar_imag_op<Scalar>, const Derived> ImagReturnType;
+typedef CwiseUnaryView<internal::scalar_imag_ref_op<Scalar>, Derived> NonConstImagReturnType;
+typedef CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const Derived> NegativeReturnType;
+#endif
+EIGEN_DOC_UNARY_ADDONS(operator-,opposite)
+EIGEN_DEVICE_FUNC
+inline const NegativeReturnType
+operator-() const { return NegativeReturnType(derived()); }
+template<class NewType> struct CastXpr { typedef typename internal::cast_return_type<Derived,const CwiseUnaryOp<internal::scalar_cast_op<Scalar, NewType>, const Derived> >::type Type; };
+EIGEN_DOC_UNARY_ADDONS(cast,conversion function)
+template<typename NewType>
+EIGEN_DEVICE_FUNC
+typename CastXpr<NewType>::Type
+cast() const
+{
+ return typename CastXpr<NewType>::Type(derived());
+}
+EIGEN_DOC_UNARY_ADDONS(conjugate,complex conjugate)
+EIGEN_DEVICE_FUNC
+inline ConjugateReturnType
+conjugate() const
+{
+ return ConjugateReturnType(derived());
+}
+EIGEN_DOC_UNARY_ADDONS(real,real part function)
+EIGEN_DEVICE_FUNC
+inline RealReturnType
+real() const { return RealReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function)
+EIGEN_DEVICE_FUNC
+inline const ImagReturnType
+imag() const { return ImagReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(unaryExpr,unary function)
+template<typename CustomUnaryOp>
+EIGEN_DEVICE_FUNC
+inline const CwiseUnaryOp<CustomUnaryOp, const Derived>
+unaryExpr(const CustomUnaryOp& func = CustomUnaryOp()) const
+{
+ return CwiseUnaryOp<CustomUnaryOp, const Derived>(derived(), func);
+}
+EIGEN_DOC_UNARY_ADDONS(unaryViewExpr,unary function)
+template<typename CustomViewOp>
+EIGEN_DEVICE_FUNC
+inline const CwiseUnaryView<CustomViewOp, const Derived>
+unaryViewExpr(const CustomViewOp& func = CustomViewOp()) const
+{
+ return CwiseUnaryView<CustomViewOp, const Derived>(derived(), func);
+}
+EIGEN_DOC_UNARY_ADDONS(real,real part function)
+EIGEN_DEVICE_FUNC
+inline NonConstRealReturnType
+real() { return NonConstRealReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function)
+EIGEN_DEVICE_FUNC
+inline NonConstImagReturnType
+imag() { return NonConstImagReturnType(derived()); }
+// end # include "../plugins/CommonCwiseUnaryOps.h"
+// # include "../plugins/MatrixCwiseUnaryOps.h"
+typedef CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> CwiseAbsReturnType;
+typedef CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> CwiseAbs2ReturnType;
+typedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> CwiseSqrtReturnType;
+typedef CwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived> CwiseSignReturnType;
+typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> CwiseInverseReturnType;
+EIGEN_DOC_UNARY_ADDONS(cwiseAbs,absolute value)
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseAbsReturnType
+cwiseAbs() const { return CwiseAbsReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(cwiseAbs2,squared absolute value)
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseAbs2ReturnType
+cwiseAbs2() const { return CwiseAbs2ReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(cwiseSqrt,square-root)
+EIGEN_DEVICE_FUNC
+inline const CwiseSqrtReturnType
+cwiseSqrt() const { return CwiseSqrtReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(cwiseSign,sign function)
+EIGEN_DEVICE_FUNC
+inline const CwiseSignReturnType
+cwiseSign() const { return CwiseSignReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(cwiseInverse,inverse)
+EIGEN_DEVICE_FUNC
+inline const CwiseInverseReturnType
+cwiseInverse() const { return CwiseInverseReturnType(derived()); }
+// end # include "../plugins/MatrixCwiseUnaryOps.h"
+// # include "../plugins/ArrayCwiseUnaryOps.h"
+typedef CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> AbsReturnType;
+typedef CwiseUnaryOp<internal::scalar_arg_op<Scalar>, const Derived> ArgReturnType;
+typedef CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> Abs2ReturnType;
+typedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> SqrtReturnType;
+typedef CwiseUnaryOp<internal::scalar_rsqrt_op<Scalar>, const Derived> RsqrtReturnType;
+typedef CwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived> SignReturnType;
+typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> InverseReturnType;
+typedef CwiseUnaryOp<internal::scalar_boolean_not_op<Scalar>, const Derived> BooleanNotReturnType;
+typedef CwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived> ExpReturnType;
+typedef CwiseUnaryOp<internal::scalar_log_op<Scalar>, const Derived> LogReturnType;
+typedef CwiseUnaryOp<internal::scalar_log1p_op<Scalar>, const Derived> Log1pReturnType;
+typedef CwiseUnaryOp<internal::scalar_log10_op<Scalar>, const Derived> Log10ReturnType;
+typedef CwiseUnaryOp<internal::scalar_cos_op<Scalar>, const Derived> CosReturnType;
+typedef CwiseUnaryOp<internal::scalar_sin_op<Scalar>, const Derived> SinReturnType;
+typedef CwiseUnaryOp<internal::scalar_tan_op<Scalar>, const Derived> TanReturnType;
+typedef CwiseUnaryOp<internal::scalar_acos_op<Scalar>, const Derived> AcosReturnType;
+typedef CwiseUnaryOp<internal::scalar_asin_op<Scalar>, const Derived> AsinReturnType;
+typedef CwiseUnaryOp<internal::scalar_atan_op<Scalar>, const Derived> AtanReturnType;
+typedef CwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived> TanhReturnType;
+typedef CwiseUnaryOp<internal::scalar_logistic_op<Scalar>, const Derived> LogisticReturnType;
+typedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType;
+typedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType;
+typedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> SquareReturnType;
+typedef CwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived> CubeReturnType;
+typedef CwiseUnaryOp<internal::scalar_round_op<Scalar>, const Derived> RoundReturnType;
+typedef CwiseUnaryOp<internal::scalar_floor_op<Scalar>, const Derived> FloorReturnType;
+typedef CwiseUnaryOp<internal::scalar_ceil_op<Scalar>, const Derived> CeilReturnType;
+typedef CwiseUnaryOp<internal::scalar_isnan_op<Scalar>, const Derived> IsNaNReturnType;
+typedef CwiseUnaryOp<internal::scalar_isinf_op<Scalar>, const Derived> IsInfReturnType;
+typedef CwiseUnaryOp<internal::scalar_isfinite_op<Scalar>, const Derived> IsFiniteReturnType;
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const AbsReturnType
+abs() const
+{
+ return AbsReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const ArgReturnType
+arg() const
+{
+ return ArgReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const Abs2ReturnType
+abs2() const
+{
+ return Abs2ReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const ExpReturnType
+exp() const
+{
+ return ExpReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const LogReturnType
+log() const
+{
+ return LogReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const Log1pReturnType
+log1p() const
+{
+ return Log1pReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const Log10ReturnType
+log10() const
+{
+ return Log10ReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const SqrtReturnType
+sqrt() const
+{
+ return SqrtReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const RsqrtReturnType
+rsqrt() const
+{
+ return RsqrtReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const SignReturnType
+sign() const
+{
+ return SignReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const CosReturnType
+cos() const
+{
+ return CosReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const SinReturnType
+sin() const
+{
+ return SinReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const TanReturnType
+tan() const
+{
+ return TanReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const AtanReturnType
+atan() const
+{
+ return AtanReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const AcosReturnType
+acos() const
+{
+ return AcosReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const AsinReturnType
+asin() const
+{
+ return AsinReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const TanhReturnType
+tanh() const
+{
+ return TanhReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const SinhReturnType
+sinh() const
+{
+ return SinhReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const CoshReturnType
+cosh() const
+{
+ return CoshReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const LogisticReturnType
+logistic() const
+{
+ return LogisticReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const InverseReturnType
+inverse() const
+{
+ return InverseReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const SquareReturnType
+square() const
+{
+ return SquareReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const CubeReturnType
+cube() const
+{
+ return CubeReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const RoundReturnType
+round() const
+{
+ return RoundReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const FloorReturnType
+floor() const
+{
+ return FloorReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const CeilReturnType
+ceil() const
+{
+ return CeilReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const IsNaNReturnType
+isNaN() const
+{
+ return IsNaNReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const IsInfReturnType
+isInf() const
+{
+ return IsInfReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const IsFiniteReturnType
+isFinite() const
+{
+ return IsFiniteReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const BooleanNotReturnType
+operator!() const
+{
+ EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value),
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
+ return BooleanNotReturnType(derived());
+}
+typedef CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> LgammaReturnType;
+typedef CwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived> DigammaReturnType;
+typedef CwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> ErfReturnType;
+typedef CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> ErfcReturnType;
+EIGEN_DEVICE_FUNC
+inline const LgammaReturnType
+lgamma() const
+{
+ return LgammaReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const DigammaReturnType
+digamma() const
+{
+ return DigammaReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const ErfReturnType
+erf() const
+{
+ return ErfReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const ErfcReturnType
+erfc() const
+{
+ return ErfcReturnType(derived());
+}
+// end # include "../plugins/ArrayCwiseUnaryOps.h"
+// # include "../plugins/CommonCwiseBinaryOps.h"
+EIGEN_MAKE_CWISE_BINARY_OP(operator-,difference)
+EIGEN_MAKE_CWISE_BINARY_OP(operator+,sum)
+template<typename CustomBinaryOp, typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>
+binaryExpr(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other, const CustomBinaryOp& func = CustomBinaryOp()) const
+{
+ return CwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>(derived(), other.derived(), func);
+}
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product)
+#else
+template<typename T>
+const CwiseBinaryOp<internal::scalar_product_op<Scalar,T>,Derived,Constant<T> > operator*(const T& scalar) const;
+template<typename T> friend
+const CwiseBinaryOp<internal::scalar_product_op<T,Scalar>,Constant<T>,Derived> operator*(const T& scalar, const StorageBaseType& expr);
+#endif
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient)
+#else
+template<typename T>
+const CwiseBinaryOp<internal::scalar_quotient_op<Scalar,T>,Derived,Constant<T> > operator/(const T& scalar) const;
+#endif
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>
+operator&&(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
+ return CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>(derived(),other.derived());
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>
+operator||(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
+ return CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>(derived(),other.derived());
+}
+// end # include "../plugins/CommonCwiseBinaryOps.h"
+// # include "../plugins/MatrixCwiseBinaryOps.h"
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)
+cwiseProduct(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived());
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<std::equal_to<Scalar>, const Derived, const OtherDerived>
+cwiseEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<std::equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const OtherDerived>
+cwiseNotEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>
+cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const ConstantReturnType>
+cwiseMin(const Scalar &other) const
+{
+ return cwiseMin(Derived::Constant(rows(), cols(), other));
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>
+cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const ConstantReturnType>
+cwiseMax(const Scalar &other) const
+{
+ return cwiseMax(Derived::Constant(rows(), cols(), other));
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>
+cwiseQuotient(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar,internal::cmp_EQ>, const Derived, const ConstantReturnType> CwiseScalarEqualReturnType;
+EIGEN_DEVICE_FUNC
+inline const CwiseScalarEqualReturnType
+cwiseEqual(const Scalar& s) const
+{
+ return CwiseScalarEqualReturnType(derived(), Derived::Constant(rows(), cols(), s), internal::scalar_cmp_op<Scalar,Scalar,internal::cmp_EQ>());
+}
+// end # include "../plugins/MatrixCwiseBinaryOps.h"
+// # include "../plugins/ArrayCwiseBinaryOps.h"
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)
+operator*(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived());
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_quotient_op<Scalar,typename OtherDerived::Scalar>, const Derived, const OtherDerived>
+operator/(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<internal::scalar_quotient_op<Scalar,typename OtherDerived::Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+EIGEN_MAKE_CWISE_BINARY_OP(min,min)
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived,
+ const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> >
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+min
+#else
+(min)
+#endif
+(const Scalar &other) const
+{
+ return (min)(Derived::PlainObject::Constant(rows(), cols(), other));
+}
+EIGEN_MAKE_CWISE_BINARY_OP(max,max)
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived,
+ const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> >
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+max
+#else
+(max)
+#endif
+(const Scalar &other) const
+{
+ return (max)(Derived::PlainObject::Constant(rows(), cols(), other));
+}
+EIGEN_MAKE_CWISE_BINARY_OP(pow,pow)
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(pow,pow)
+#else
+template<typename T>
+const CwiseBinaryOp<internal::scalar_pow_op<Scalar,T>,Derived,Constant<T> > pow(const T& exponent) const;
+#endif
+#define EIGEN_MAKE_CWISE_COMP_OP(OP, COMPARATOR) \
+template<typename OtherDerived> \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_cmp_op<Scalar, typename OtherDerived::Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived> \
+OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
+{ \
+ return CwiseBinaryOp<internal::scalar_cmp_op<Scalar, typename OtherDerived::Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived>(derived(), other.derived()); \
+}\
+typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> > Cmp ## COMPARATOR ## ReturnType; \
+typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar, internal::cmp_ ## COMPARATOR>, const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject>, const Derived > RCmp ## COMPARATOR ## ReturnType; \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Cmp ## COMPARATOR ## ReturnType \
+OP(const Scalar& s) const { \
+ return this->OP(Derived::PlainObject::Constant(rows(), cols(), s)); \
+} \
+EIGEN_DEVICE_FUNC friend EIGEN_STRONG_INLINE const RCmp ## COMPARATOR ## ReturnType \
+OP(const Scalar& s, const Derived& d) { \
+ return Derived::PlainObject::Constant(d.rows(), d.cols(), s).OP(d); \
+}
+#define EIGEN_MAKE_CWISE_COMP_R_OP(OP, R_OP, RCOMPARATOR) \
+template<typename OtherDerived> \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_cmp_op<typename OtherDerived::Scalar, Scalar, internal::cmp_##RCOMPARATOR>, const OtherDerived, const Derived> \
+OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
+{ \
+ return CwiseBinaryOp<internal::scalar_cmp_op<typename OtherDerived::Scalar, Scalar, internal::cmp_##RCOMPARATOR>, const OtherDerived, const Derived>(other.derived(), derived()); \
+} \
+EIGEN_DEVICE_FUNC \
+inline const RCmp ## RCOMPARATOR ## ReturnType \
+OP(const Scalar& s) const { \
+ return Derived::PlainObject::Constant(rows(), cols(), s).R_OP(*this); \
+} \
+friend inline const Cmp ## RCOMPARATOR ## ReturnType \
+OP(const Scalar& s, const Derived& d) { \
+ return d.R_OP(Derived::PlainObject::Constant(d.rows(), d.cols(), s)); \
+}
+EIGEN_MAKE_CWISE_COMP_OP(operator<, LT)
+EIGEN_MAKE_CWISE_COMP_OP(operator<=, LE)
+EIGEN_MAKE_CWISE_COMP_R_OP(operator>, operator<, LT)
+EIGEN_MAKE_CWISE_COMP_R_OP(operator>=, operator<=, LE)
+EIGEN_MAKE_CWISE_COMP_OP(operator==, EQ)
+EIGEN_MAKE_CWISE_COMP_OP(operator!=, NEQ)
+#undef EIGEN_MAKE_CWISE_COMP_OP
+#undef EIGEN_MAKE_CWISE_COMP_R_OP
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP(operator+,sum)
+#else
+template<typename T>
+const CwiseBinaryOp<internal::scalar_sum_op<Scalar,T>,Derived,Constant<T> > operator+(const T& scalar) const;
+template<typename T> friend
+const CwiseBinaryOp<internal::scalar_sum_op<T,Scalar>,Constant<T>,Derived> operator+(const T& scalar, const StorageBaseType& expr);
+#endif
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP(operator-,difference)
+#else
+template<typename T>
+const CwiseBinaryOp<internal::scalar_difference_op<Scalar,T>,Derived,Constant<T> > operator-(const T& scalar) const;
+template<typename T> friend
+const CwiseBinaryOp<internal::scalar_difference_op<T,Scalar>,Constant<T>,Derived> operator-(const T& scalar, const StorageBaseType& expr);
+#endif
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(operator/,quotient)
+#else
+ template<typename T> friend
+ inline const CwiseBinaryOp<internal::scalar_quotient_op<T,Scalar>,Constant<T>,Derived>
+ operator/(const T& s,const StorageBaseType& a);
+#endif
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<internal::scalar_boolean_xor_op, const Derived, const OtherDerived>
+operator^(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
+ return CwiseBinaryOp<internal::scalar_boolean_xor_op, const Derived, const OtherDerived>(derived(),other.derived());
+}
+#if 0
+template<typename DerivedN>
+inline const CwiseBinaryOp<internal::scalar_polygamma_op<Scalar>, const DerivedN, const Derived>
+polygamma(const EIGEN_CURRENT_STORAGE_BASE_CLASS<DerivedN> &n) const
+{
+ return CwiseBinaryOp<internal::scalar_polygamma_op<Scalar>, const DerivedN, const Derived>(n.derived(), this->derived());
+}
+#endif
+template<typename DerivedQ>
+inline const CwiseBinaryOp<internal::scalar_zeta_op<Scalar>, const Derived, const DerivedQ>
+zeta(const EIGEN_CURRENT_STORAGE_BASE_CLASS<DerivedQ> &q) const
+{
+ return CwiseBinaryOp<internal::scalar_zeta_op<Scalar>, const Derived, const DerivedQ>(this->derived(), q.derived());
+}
+// end # include "../plugins/ArrayCwiseBinaryOps.h"
+# ifdef EIGEN_ARRAYBASE_PLUGIN
+# include EIGEN_ARRAYBASE_PLUGIN
+# endif
+#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
+#undef EIGEN_DOC_UNARY_ADDONS
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator=(const ArrayBase& other)
+ {
+ internal::call_assignment(derived(), other.derived());
+ return derived();
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator=(const Scalar &value)
+ { Base::setConstant(value); return derived(); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator+=(const Scalar& scalar);
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator-=(const Scalar& scalar);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator+=(const ArrayBase<OtherDerived>& other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator-=(const ArrayBase<OtherDerived>& other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator*=(const ArrayBase<OtherDerived>& other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator/=(const ArrayBase<OtherDerived>& other);
+ public:
+ EIGEN_DEVICE_FUNC
+ ArrayBase<Derived>& array() { return *this; }
+ EIGEN_DEVICE_FUNC
+ const ArrayBase<Derived>& array() const { return *this; }
+ protected:
+ EIGEN_DEVICE_FUNC
+ ArrayBase() : Base() {}
+ private:
+ explicit ArrayBase(Index);
+ ArrayBase(Index,Index);
+ template<typename OtherDerived> explicit ArrayBase(const ArrayBase<OtherDerived>&);
+ protected:
+ template<typename OtherDerived> Derived& operator+=(const MatrixBase<OtherDerived>& )
+ {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+ template<typename OtherDerived> Derived& operator-=(const MatrixBase<OtherDerived>& )
+ {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+};
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
+ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
+{
+ call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return derived();
+}
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
+ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
+{
+ call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return derived();
+}
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
+ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
+{
+ call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return derived();
+}
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
+ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
+{
+ call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return derived();
+}
+}
+#endif
+// end #include "src/Core/ArrayBase.h"
+// #include "src/Core/util/BlasUtil.h"
+#ifndef EIGEN_BLASUTIL_H
+#define EIGEN_BLASUTIL_H
+namespace Eigen {
+namespace internal {
+template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs=false, bool ConjugateRhs=false>
+struct gebp_kernel;
+template<typename Scalar, typename Index, typename DataMapper, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>
+struct gemm_pack_rhs;
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
+struct gemm_pack_lhs;
+template<
+ typename Index,
+ typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
+ typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
+ int ResStorageOrder>
+struct general_matrix_matrix_product;
+template<typename Index,
+ typename LhsScalar, typename LhsMapper, int LhsStorageOrder, bool ConjugateLhs,
+ typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version=Specialized>
+struct general_matrix_vector_product;
+template<bool Conjugate> struct conj_if;
+template<> struct conj_if<true> {
+ template<typename T>
+ inline T operator()(const T& x) const { return numext::conj(x); }
+ template<typename T>
+ inline T pconj(const T& x) const { return internal::pconj(x); }
+};
+template<> struct conj_if<false> {
+ template<typename T>
+ inline const T& operator()(const T& x) const { return x; }
+ template<typename T>
+ inline const T& pconj(const T& x) const { return x; }
+};
+template<typename LhsScalar, typename RhsScalar, bool ConjLhs, bool ConjRhs>
+struct conj_helper
+{
+ typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar>::ReturnType Scalar;
+ EIGEN_STRONG_INLINE Scalar pmadd(const LhsScalar& x, const RhsScalar& y, const Scalar& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Scalar pmul(const LhsScalar& x, const RhsScalar& y) const
+ { return conj_if<ConjLhs>()(x) * conj_if<ConjRhs>()(y); }
+};
+template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false>
+{
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return internal::pmadd(x,y,c); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return internal::pmul(x,y); }
+};
+template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, false,true>
+{
+ typedef std::complex<RealScalar> Scalar;
+ EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
+ { return c + pmul(x,y); }
+ EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
+ { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::imag(x)*numext::real(y) - numext::real(x)*numext::imag(y)); }
+};
+template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,false>
+{
+ typedef std::complex<RealScalar> Scalar;
+ EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
+ { return c + pmul(x,y); }
+ EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
+ { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
+};
+template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,true>
+{
+ typedef std::complex<RealScalar> Scalar;
+ EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
+ { return c + pmul(x,y); }
+ EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
+ { return Scalar(numext::real(x)*numext::real(y) - numext::imag(x)*numext::imag(y), - numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
+};
+template<typename RealScalar,bool Conj> struct conj_helper<std::complex<RealScalar>, RealScalar, Conj,false>
+{
+ typedef std::complex<RealScalar> Scalar;
+ EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const RealScalar& y, const Scalar& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const RealScalar& y) const
+ { return conj_if<Conj>()(x)*y; }
+};
+template<typename RealScalar,bool Conj> struct conj_helper<RealScalar, std::complex<RealScalar>, false,Conj>
+{
+ typedef std::complex<RealScalar> Scalar;
+ EIGEN_STRONG_INLINE Scalar pmadd(const RealScalar& x, const Scalar& y, const Scalar& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Scalar pmul(const RealScalar& x, const Scalar& y) const
+ { return x*conj_if<Conj>()(y); }
+};
+template<typename From,typename To> struct get_factor {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return To(x); }
+};
+template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::Real> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return numext::real(x); }
+};
+template<typename Scalar, typename Index>
+class BlasVectorMapper {
+ public:
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {}
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
+ return m_data[i];
+ }
+ template <typename Packet, int AlignmentType>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet load(Index i) const {
+ return ploadt<Packet, AlignmentType>(m_data + i);
+ }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC bool aligned(Index i) const {
+ return (UIntPtr(m_data+i)%sizeof(Packet))==0;
+ }
+ protected:
+ Scalar* m_data;
+};
+template<typename Scalar, typename Index, int AlignmentType>
+class BlasLinearMapper {
+ public:
+ typedef typename packet_traits<Scalar>::type Packet;
+ typedef typename packet_traits<Scalar>::half HalfPacket;
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {}
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
+ internal::prefetch(&operator()(i));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
+ return m_data[i];
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
+ return ploadt<Packet, AlignmentType>(m_data + i);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
+ return ploadt<HalfPacket, AlignmentType>(m_data + i);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const {
+ pstoret<Scalar, Packet, AlignmentType>(m_data + i, p);
+ }
+ protected:
+ Scalar *m_data;
+};
+template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned>
+class blas_data_mapper {
+ public:
+ typedef typename packet_traits<Scalar>::type Packet;
+ typedef typename packet_traits<Scalar>::half HalfPacket;
+ typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper;
+ typedef BlasVectorMapper<Scalar, Index> VectorMapper;
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
+ getSubMapper(Index i, Index j) const {
+ return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
+ return LinearMapper(&operator()(i, j));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
+ return VectorMapper(&operator()(i, j));
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {
+ return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride];
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
+ return ploadt<Packet, AlignmentType>(&operator()(i, j));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
+ return ploadt<HalfPacket, AlignmentType>(&operator()(i, j));
+ }
+ template<typename SubPacket>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
+ pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
+ }
+ template<typename SubPacket>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
+ return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
+ }
+ EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; }
+ EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
+ EIGEN_DEVICE_FUNC Index firstAligned(Index size) const {
+ if (UIntPtr(m_data)%sizeof(Scalar)) {
+ return -1;
+ }
+ return internal::first_default_aligned(m_data, size);
+ }
+ protected:
+ Scalar* EIGEN_RESTRICT m_data;
+ const Index m_stride;
+};
+template<typename Scalar, typename Index, int StorageOrder>
+class const_blas_data_mapper : public blas_data_mapper<const Scalar, Index, StorageOrder> {
+ public:
+ EIGEN_ALWAYS_INLINE const_blas_data_mapper(const Scalar *data, Index stride) : blas_data_mapper<const Scalar, Index, StorageOrder>(data, stride) {}
+ EIGEN_ALWAYS_INLINE const_blas_data_mapper<Scalar, Index, StorageOrder> getSubMapper(Index i, Index j) const {
+ return const_blas_data_mapper<Scalar, Index, StorageOrder>(&(this->operator()(i, j)), this->m_stride);
+ }
+};
+template<typename XprType> struct blas_traits
+{
+ typedef typename traits<XprType>::Scalar Scalar;
+ typedef const XprType& ExtractType;
+ typedef XprType _ExtractType;
+ enum {
+ IsComplex = NumTraits<Scalar>::IsComplex,
+ IsTransposed = false,
+ NeedToConjugate = false,
+ HasUsableDirectAccess = ( (int(XprType::Flags)&DirectAccessBit)
+ && ( bool(XprType::IsVectorAtCompileTime)
+ || int(inner_stride_at_compile_time<XprType>::ret) == 1)
+ ) ? 1 : 0
+ };
+ typedef typename conditional<bool(HasUsableDirectAccess),
+ ExtractType,
+ typename _ExtractType::PlainObject
+ >::type DirectLinearAccessType;
+ static inline ExtractType extract(const XprType& x) { return x; }
+ static inline const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
+};
+template<typename Scalar, typename NestedXpr>
+struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> >
+ : blas_traits<NestedXpr>
+{
+ typedef blas_traits<NestedXpr> Base;
+ typedef CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> XprType;
+ typedef typename Base::ExtractType ExtractType;
+ enum {
+ IsComplex = NumTraits<Scalar>::IsComplex,
+ NeedToConjugate = Base::NeedToConjugate ? 0 : IsComplex
+ };
+ static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
+ static inline Scalar extractScalarFactor(const XprType& x) { return conj(Base::extractScalarFactor(x.nestedExpression())); }
+};
+template<typename Scalar, typename NestedXpr, typename Plain>
+struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> >
+ : blas_traits<NestedXpr>
+{
+ typedef blas_traits<NestedXpr> Base;
+ typedef CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> XprType;
+ typedef typename Base::ExtractType ExtractType;
+ static inline ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); }
+ static inline Scalar extractScalarFactor(const XprType& x)
+ { return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); }
+};
+template<typename Scalar, typename NestedXpr, typename Plain>
+struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > >
+ : blas_traits<NestedXpr>
+{
+ typedef blas_traits<NestedXpr> Base;
+ typedef CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > XprType;
+ typedef typename Base::ExtractType ExtractType;
+ static inline ExtractType extract(const XprType& x) { return Base::extract(x.lhs()); }
+ static inline Scalar extractScalarFactor(const XprType& x)
+ { return Base::extractScalarFactor(x.lhs()) * x.rhs().functor().m_other; }
+};
+template<typename Scalar, typename Plain1, typename Plain2>
+struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1>,
+ const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain2> > >
+ : blas_traits<CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1> >
+{};
+template<typename Scalar, typename NestedXpr>
+struct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> >
+ : blas_traits<NestedXpr>
+{
+ typedef blas_traits<NestedXpr> Base;
+ typedef CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> XprType;
+ typedef typename Base::ExtractType ExtractType;
+ static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
+ static inline Scalar extractScalarFactor(const XprType& x)
+ { return - Base::extractScalarFactor(x.nestedExpression()); }
+};
+template<typename NestedXpr>
+struct blas_traits<Transpose<NestedXpr> >
+ : blas_traits<NestedXpr>
+{
+ typedef typename NestedXpr::Scalar Scalar;
+ typedef blas_traits<NestedXpr> Base;
+ typedef Transpose<NestedXpr> XprType;
+ typedef Transpose<const typename Base::_ExtractType> ExtractType;
+ typedef Transpose<const typename Base::_ExtractType> _ExtractType;
+ typedef typename conditional<bool(Base::HasUsableDirectAccess),
+ ExtractType,
+ typename ExtractType::PlainObject
+ >::type DirectLinearAccessType;
+ enum {
+ IsTransposed = Base::IsTransposed ? 0 : 1
+ };
+ static inline ExtractType extract(const XprType& x) { return ExtractType(Base::extract(x.nestedExpression())); }
+ static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }
+};
+template<typename T>
+struct blas_traits<const T>
+ : blas_traits<T>
+{};
+template<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectAccess>
+struct extract_data_selector {
+ static const typename T::Scalar* run(const T& m)
+ {
+ return blas_traits<T>::extract(m).data();
+ }
+};
+template<typename T>
+struct extract_data_selector<T,false> {
+ static typename T::Scalar* run(const T&) { return 0; }
+};
+template<typename T> const typename T::Scalar* extract_data(const T& m)
+{
+ return extract_data_selector<T>::run(m);
+}
+}
+}
+#endif
+// end #include "src/Core/util/BlasUtil.h"
+// #include "src/Core/DenseStorage.h"
+#ifndef EIGEN_MATRIXSTORAGE_H
+#define EIGEN_MATRIXSTORAGE_H
+#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X) X; EIGEN_DENSE_STORAGE_CTOR_PLUGIN;
+#else
+ #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X)
+#endif
+namespace Eigen {
+namespace internal {
+ template<typename T>
+ T generic_fast_tanh_float(const T& a_x)
+ {
+ // Clamp the inputs to the range [-9, 9] since anything outside
+ // this range is +/-1.0f in single-precision.
+ const T plus_9 = pset1<T>(9.f);
+ const T minus_9 = pset1<T>(-9.f);
+ const T x = pmax(pmin(a_x, plus_9), minus_9);
+ // The monomial coefficients of the numerator polynomial (odd).
+ const T alpha_1 = pset1<T>(4.89352455891786e-03f);
+ const T alpha_3 = pset1<T>(6.37261928875436e-04f);
+ const T alpha_5 = pset1<T>(1.48572235717979e-05f);
+ const T alpha_7 = pset1<T>(5.12229709037114e-08f);
+ const T alpha_9 = pset1<T>(-8.60467152213735e-11f);
+ const T alpha_11 = pset1<T>(2.00018790482477e-13f);
+ const T alpha_13 = pset1<T>(-2.76076847742355e-16f);
+
+ // The monomial coefficients of the denominator polynomial (even).
+ const T beta_0 = pset1<T>(4.89352518554385e-03f);
+ const T beta_2 = pset1<T>(2.26843463243900e-03f);
+ const T beta_4 = pset1<T>(1.18534705686654e-04f);
+ const T beta_6 = pset1<T>(1.19825839466702e-06f);
+
+ // Since the polynomials are odd/even, we need x^2.
+ const T x2 = pmul(x, x);
+
+ // Evaluate the numerator polynomial p.
+ T p = pmadd(x2, alpha_13, alpha_11);
+ p = pmadd(x2, p, alpha_9);
+ p = pmadd(x2, p, alpha_7);
+ p = pmadd(x2, p, alpha_5);
+ p = pmadd(x2, p, alpha_3);
+ p = pmadd(x2, p, alpha_1);
+ p = pmul(x, p);
+
+ // Evaluate the denominator polynomial p.
+ T q = pmadd(x2, beta_6, beta_4);
+ q = pmadd(x2, q, beta_2);
+ q = pmadd(x2, q, beta_0);
+
+ // Divide the numerator by the denominator.
+ return pdiv(p, q);
+ }
+
+struct constructor_without_unaligned_array_assert {};
+template<typename T, int Size>
+EIGEN_DEVICE_FUNC
+void check_static_allocation_size()
+{
+ #if EIGEN_STACK_ALLOCATION_LIMIT
+ EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
+ #endif
+}
+template <typename T, int Size, int MatrixOrArrayOptions,
+ int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
+ : compute_default_alignment<T,Size>::value >
+struct plain_array
+{
+ T array[Size];
+ EIGEN_DEVICE_FUNC
+ plain_array()
+ {
+ check_static_allocation_size<T,Size>();
+ }
+ EIGEN_DEVICE_FUNC
+ plain_array(constructor_without_unaligned_array_assert)
+ {
+ check_static_allocation_size<T,Size>();
+ }
+};
+#if defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
+ #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask)
+#elif EIGEN_GNUC_AT_LEAST(4,7)
+ template<typename PtrType>
+ EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; }
+ #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
+ eigen_assert((internal::UIntPtr(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \
+ && "this assertion is explained here: " \
+ "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
+ " **** READ THIS WEB PAGE !!! ****");
+#else
+ #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
+ eigen_assert((internal::UIntPtr(array) & (sizemask)) == 0 \
+ && "this assertion is explained here: " \
+ "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
+ " **** READ THIS WEB PAGE !!! ****");
+#endif
+template <typename T, int Size, int MatrixOrArrayOptions>
+struct plain_array<T, Size, MatrixOrArrayOptions, 8>
+{
+ EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];
+ EIGEN_DEVICE_FUNC
+ plain_array()
+ {
+ EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);
+ check_static_allocation_size<T,Size>();
+ }
+ EIGEN_DEVICE_FUNC
+ plain_array(constructor_without_unaligned_array_assert)
+ {
+ check_static_allocation_size<T,Size>();
+ }
+};
+template <typename T, int Size, int MatrixOrArrayOptions>
+struct plain_array<T, Size, MatrixOrArrayOptions, 16>
+{
+ EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];
+ EIGEN_DEVICE_FUNC
+ plain_array()
+ {
+ EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);
+ check_static_allocation_size<T,Size>();
+ }
+ EIGEN_DEVICE_FUNC
+ plain_array(constructor_without_unaligned_array_assert)
+ {
+ check_static_allocation_size<T,Size>();
+ }
+};
+template <typename T, int Size, int MatrixOrArrayOptions>
+struct plain_array<T, Size, MatrixOrArrayOptions, 32>
+{
+ EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];
+ EIGEN_DEVICE_FUNC
+ plain_array()
+ {
+ EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);
+ check_static_allocation_size<T,Size>();
+ }
+ EIGEN_DEVICE_FUNC
+ plain_array(constructor_without_unaligned_array_assert)
+ {
+ check_static_allocation_size<T,Size>();
+ }
+};
+template <typename T, int Size, int MatrixOrArrayOptions>
+struct plain_array<T, Size, MatrixOrArrayOptions, 64>
+{
+ EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];
+ EIGEN_DEVICE_FUNC
+ plain_array()
+ {
+ EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);
+ check_static_allocation_size<T,Size>();
+ }
+ EIGEN_DEVICE_FUNC
+ plain_array(constructor_without_unaligned_array_assert)
+ {
+ check_static_allocation_size<T,Size>();
+ }
+};
+template <typename T, int MatrixOrArrayOptions, int Alignment>
+struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
+{
+ T array[1];
+ EIGEN_DEVICE_FUNC plain_array() {}
+ EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
+};
+}
+template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage;
+template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage
+{
+ internal::plain_array<T,Size,_Options> m_data;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
+ }
+ EIGEN_DEVICE_FUNC
+ explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+ : m_data(internal::constructor_without_unaligned_array_assert()) {}
+ EIGEN_DEVICE_FUNC
+ DenseStorage(const DenseStorage& other) : m_data(other.m_data) {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
+ }
+ EIGEN_DEVICE_FUNC
+ DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other) m_data = other.m_data;
+ return *this;
+ }
+ EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+ eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols);
+ EIGEN_UNUSED_VARIABLE(size);
+ EIGEN_UNUSED_VARIABLE(rows);
+ EIGEN_UNUSED_VARIABLE(cols);
+ }
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
+ EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
+ EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
+ EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
+ EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+};
+template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0, _Rows, _Cols, _Options>
+{
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() {}
+ EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {}
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {}
+ EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }
+ EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {}
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {}
+ EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
+ EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
+ EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
+ EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
+ EIGEN_DEVICE_FUNC const T *data() const { return 0; }
+ EIGEN_DEVICE_FUNC T *data() { return 0; }
+};
+template<typename T, int _Options> class DenseStorage<T, 0, Dynamic, Dynamic, _Options>
+: public DenseStorage<T, 0, 0, 0, _Options> { };
+template<typename T, int _Rows, int _Options> class DenseStorage<T, 0, _Rows, Dynamic, _Options>
+: public DenseStorage<T, 0, 0, 0, _Options> { };
+template<typename T, int _Cols, int _Options> class DenseStorage<T, 0, Dynamic, _Cols, _Options>
+: public DenseStorage<T, 0, 0, 0, _Options> { };
+template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>
+{
+ internal::plain_array<T,Size,_Options> m_data;
+ Index m_rows;
+ Index m_cols;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
+ EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+ : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
+ EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other)
+ {
+ m_data = other.m_data;
+ m_rows = other.m_rows;
+ m_cols = other.m_cols;
+ }
+ return *this;
+ }
+ EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
+ { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
+ EIGEN_DEVICE_FUNC Index rows() const {return m_rows;}
+ EIGEN_DEVICE_FUNC Index cols() const {return m_cols;}
+ EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
+ EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+};
+template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Size, Dynamic, _Cols, _Options>
+{
+ internal::plain_array<T,Size,_Options> m_data;
+ Index m_rows;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
+ EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+ : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
+ EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other)
+ {
+ m_data = other.m_data;
+ m_rows = other.m_rows;
+ }
+ return *this;
+ }
+ EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
+ EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
+ EIGEN_DEVICE_FUNC Index cols(void) const {return _Cols;}
+ EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
+ EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; }
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+};
+template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Size, _Rows, Dynamic, _Options>
+{
+ internal::plain_array<T,Size,_Options> m_data;
+ Index m_cols;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
+ EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+ : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
+ EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other)
+ {
+ m_data = other.m_data;
+ m_cols = other.m_cols;
+ }
+ return *this;
+ }
+ EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {}
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
+ EIGEN_DEVICE_FUNC Index rows(void) const {return _Rows;}
+ EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
+ void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
+ void resize(Index, Index, Index cols) { m_cols = cols; }
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+};
+template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynamic, _Options>
+{
+ T *m_data;
+ Index m_rows;
+ Index m_cols;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
+ EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+ : m_data(0), m_rows(0), m_cols(0) {}
+ EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
+ : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)
+ {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+ eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0);
+ }
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
+ : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*other.m_cols))
+ , m_rows(other.m_rows)
+ , m_cols(other.m_cols)
+ {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*m_cols)
+ internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data);
+ }
+ EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other)
+ {
+ DenseStorage tmp(other);
+ this->swap(tmp);
+ }
+ return *this;
+ }
+#if EIGEN_HAS_RVALUE_REFERENCES
+ EIGEN_DEVICE_FUNC
+ DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
+ : m_data(std::move(other.m_data))
+ , m_rows(std::move(other.m_rows))
+ , m_cols(std::move(other.m_cols))
+ {
+ other.m_data = nullptr;
+ other.m_rows = 0;
+ other.m_cols = 0;
+ }
+ EIGEN_DEVICE_FUNC
+ DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
+ {
+ using std::swap;
+ swap(m_data, other.m_data);
+ swap(m_rows, other.m_rows);
+ swap(m_cols, other.m_cols);
+ return *this;
+ }
+#endif
+ EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
+ { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
+ EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
+ EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
+ void conservativeResize(Index size, Index rows, Index cols)
+ {
+ m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
+ m_rows = rows;
+ m_cols = cols;
+ }
+ EIGEN_DEVICE_FUNC void resize(Index size, Index rows, Index cols)
+ {
+ if(size != m_rows*m_cols)
+ {
+ internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols);
+ if (size)
+ m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
+ else
+ m_data = 0;
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+ }
+ m_rows = rows;
+ m_cols = cols;
+ }
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data; }
+};
+template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Rows, Dynamic, _Options>
+{
+ T *m_data;
+ Index m_cols;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}
+ explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
+ EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(cols)
+ {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+ eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0);
+ EIGEN_UNUSED_VARIABLE(rows);
+ }
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
+ : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))
+ , m_cols(other.m_cols)
+ {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_cols*_Rows)
+ internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data);
+ }
+ EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other)
+ {
+ DenseStorage tmp(other);
+ this->swap(tmp);
+ }
+ return *this;
+ }
+#if EIGEN_HAS_RVALUE_REFERENCES
+ EIGEN_DEVICE_FUNC
+ DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
+ : m_data(std::move(other.m_data))
+ , m_cols(std::move(other.m_cols))
+ {
+ other.m_data = nullptr;
+ other.m_cols = 0;
+ }
+ EIGEN_DEVICE_FUNC
+ DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
+ {
+ using std::swap;
+ swap(m_data, other.m_data);
+ swap(m_cols, other.m_cols);
+ return *this;
+ }
+#endif
+ EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
+ EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
+ EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
+ EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols)
+ {
+ m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
+ m_cols = cols;
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols)
+ {
+ if(size != _Rows*m_cols)
+ {
+ internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols);
+ if (size)
+ m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
+ else
+ m_data = 0;
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+ }
+ m_cols = cols;
+ }
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data; }
+};
+template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dynamic, _Cols, _Options>
+{
+ T *m_data;
+ Index m_rows;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}
+ explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
+ EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows)
+ {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+ eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols);
+ EIGEN_UNUSED_VARIABLE(cols);
+ }
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
+ : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))
+ , m_rows(other.m_rows)
+ {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*_Cols)
+ internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data);
+ }
+ EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other)
+ {
+ DenseStorage tmp(other);
+ this->swap(tmp);
+ }
+ return *this;
+ }
+#if EIGEN_HAS_RVALUE_REFERENCES
+ EIGEN_DEVICE_FUNC
+ DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
+ : m_data(std::move(other.m_data))
+ , m_rows(std::move(other.m_rows))
+ {
+ other.m_data = nullptr;
+ other.m_rows = 0;
+ }
+ EIGEN_DEVICE_FUNC
+ DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
+ {
+ using std::swap;
+ swap(m_data, other.m_data);
+ swap(m_rows, other.m_rows);
+ return *this;
+ }
+#endif
+ EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
+ EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
+ EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
+ void conservativeResize(Index size, Index rows, Index)
+ {
+ m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
+ m_rows = rows;
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index)
+ {
+ if(size != m_rows*_Cols)
+ {
+ internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows);
+ if (size)
+ m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
+ else
+ m_data = 0;
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+ }
+ m_rows = rows;
+ }
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data; }
+};
+}
+#endif
+// end #include "src/Core/DenseStorage.h"
+// #include "src/Core/NoAlias.h"
+#ifndef EIGEN_NOALIAS_H
+#define EIGEN_NOALIAS_H
+namespace Eigen {
+template<typename ExpressionType, template <typename> class StorageBase>
+class NoAlias
+{
+ public:
+ typedef typename ExpressionType::Scalar Scalar;
+ explicit NoAlias(ExpressionType& expression) : m_expression(expression) {}
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
+ {
+ call_assignment_no_alias(m_expression, other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
+ return m_expression;
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
+ {
+ call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return m_expression;
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
+ {
+ call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return m_expression;
+ }
+ EIGEN_DEVICE_FUNC
+ ExpressionType& expression() const
+ {
+ return m_expression;
+ }
+ protected:
+ ExpressionType& m_expression;
+};
+template<typename Derived>
+NoAlias<Derived,MatrixBase> MatrixBase<Derived>::noalias()
+{
+ return NoAlias<Derived, Eigen::MatrixBase >(derived());
+}
+}
+#endif
+// end #include "src/Core/NoAlias.h"
+// #include "src/Core/PlainObjectBase.h"
+#ifndef EIGEN_DENSESTORAGEBASE_H
+#define EIGEN_DENSESTORAGEBASE_H
+#if defined(EIGEN_INITIALIZE_MATRICES_BY_ZERO)
+# define EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=Scalar(0);
+#elif defined(EIGEN_INITIALIZE_MATRICES_BY_NAN)
+# define EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=std::numeric_limits<Scalar>::quiet_NaN();
+#else
+# undef EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+#endif
+namespace Eigen {
+namespace internal {
+template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
+ template<typename Index>
+ EIGEN_DEVICE_FUNC
+ static EIGEN_ALWAYS_INLINE void run(Index, Index)
+ {
+ }
+};
+template<> struct check_rows_cols_for_overflow<Dynamic> {
+ template<typename Index>
+ EIGEN_DEVICE_FUNC
+ static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols)
+ {
+ Index max_index = (std::size_t(1) << (8 * sizeof(Index) - 1)) - 1;
+ bool error = (rows == 0 || cols == 0) ? false
+ : (rows > max_index / cols);
+ if (error)
+ throw_std_bad_alloc();
+ }
+};
+template <typename Derived,
+ typename OtherDerived = Derived,
+ bool IsVector = bool(Derived::IsVectorAtCompileTime) && bool(OtherDerived::IsVectorAtCompileTime)>
+struct conservative_resize_like_impl;
+template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct matrix_swap_impl;
+}
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+namespace doxygen {
+template<typename Derived> struct dense_xpr_base_dispatcher;
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct dense_xpr_base_dispatcher<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+ : public MatrixBase {};
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct dense_xpr_base_dispatcher<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+ : public ArrayBase {};
+}
+template<typename Derived>
+class PlainObjectBase : public doxygen::dense_xpr_base_dispatcher<Derived>
+#else
+template<typename Derived>
+class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
+#endif
+{
+ public:
+ enum { Options = internal::traits<Derived>::Options };
+ typedef typename internal::dense_xpr_base<Derived>::type Base;
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ typedef Derived DenseType;
+ using Base::RowsAtCompileTime;
+ using Base::ColsAtCompileTime;
+ using Base::SizeAtCompileTime;
+ using Base::MaxRowsAtCompileTime;
+ using Base::MaxColsAtCompileTime;
+ using Base::MaxSizeAtCompileTime;
+ using Base::IsVectorAtCompileTime;
+ using Base::Flags;
+ template<typename PlainObjectType, int MapOptions, typename StrideType> friend class Eigen::Map;
+ friend class Eigen::Map<Derived, Unaligned>;
+ typedef Eigen::Map<Derived, Unaligned> MapType;
+ friend class Eigen::Map<const Derived, Unaligned>;
+ typedef const Eigen::Map<const Derived, Unaligned> ConstMapType;
+#if EIGEN_MAX_ALIGN_BYTES>0
+ friend class Eigen::Map<Derived, AlignedMax>;
+ friend class Eigen::Map<const Derived, AlignedMax>;
+#endif
+ typedef Eigen::Map<Derived, AlignedMax> AlignedMapType;
+ typedef const Eigen::Map<const Derived, AlignedMax> ConstAlignedMapType;
+ template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; };
+ template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; };
+ template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, AlignedMax, StrideType> type; };
+ template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, AlignedMax, StrideType> type; };
+ protected:
+ DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;
+ public:
+ enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits<Derived>::Alignment>0) };
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
+ EIGEN_DEVICE_FUNC
+ Base& base() { return *static_cast<Base*>(this); }
+ EIGEN_DEVICE_FUNC
+ const Base& base() const { return *static_cast<const Base*>(this); }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index rows() const { return m_storage.rows(); }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const
+ {
+ if(Flags & RowMajorBit)
+ return m_storage.data()[colId + rowId * m_storage.cols()];
+ else
+ return m_storage.data()[rowId + colId * m_storage.rows()];
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const
+ {
+ return m_storage.data()[index];
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId)
+ {
+ if(Flags & RowMajorBit)
+ return m_storage.data()[colId + rowId * m_storage.cols()];
+ else
+ return m_storage.data()[rowId + colId * m_storage.rows()];
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
+ {
+ return m_storage.data()[index];
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const
+ {
+ if(Flags & RowMajorBit)
+ return m_storage.data()[colId + rowId * m_storage.cols()];
+ else
+ return m_storage.data()[rowId + colId * m_storage.rows()];
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const
+ {
+ return m_storage.data()[index];
+ }
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
+ {
+ return internal::ploadt<PacketScalar, LoadMode>
+ (m_storage.data() + (Flags & RowMajorBit
+ ? colId + rowId * m_storage.cols()
+ : rowId + colId * m_storage.rows()));
+ }
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
+ {
+ return internal::ploadt<PacketScalar, LoadMode>(m_storage.data() + index);
+ }
+ template<int StoreMode>
+ EIGEN_STRONG_INLINE void writePacket(Index rowId, Index colId, const PacketScalar& val)
+ {
+ internal::pstoret<Scalar, PacketScalar, StoreMode>
+ (m_storage.data() + (Flags & RowMajorBit
+ ? colId + rowId * m_storage.cols()
+ : rowId + colId * m_storage.rows()), val);
+ }
+ template<int StoreMode>
+ EIGEN_STRONG_INLINE void writePacket(Index index, const PacketScalar& val)
+ {
+ internal::pstoret<Scalar, PacketScalar, StoreMode>(m_storage.data() + index, val);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const
+ { return m_storage.data(); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data()
+ { return m_storage.data(); }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void resize(Index rows, Index cols)
+ {
+ eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,rows==RowsAtCompileTime)
+ && EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,cols==ColsAtCompileTime)
+ && EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,rows<=MaxRowsAtCompileTime)
+ && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,cols<=MaxColsAtCompileTime)
+ && rows>=0 && cols>=0 && "Invalid sizes when resizing a matrix or array.");
+ internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(rows, cols);
+ #ifdef EIGEN_INITIALIZE_COEFFS
+ Index size = rows*cols;
+ bool size_changed = size != this->size();
+ m_storage.resize(size, rows, cols);
+ if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+ #else
+ m_storage.resize(rows*cols, rows, cols);
+ #endif
+ }
+ EIGEN_DEVICE_FUNC
+ inline void resize(Index size)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
+ eigen_assert(((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime==Dynamic || size<=MaxSizeAtCompileTime)) || SizeAtCompileTime == size) && size>=0);
+ #ifdef EIGEN_INITIALIZE_COEFFS
+ bool size_changed = size != this->size();
+ #endif
+ if(RowsAtCompileTime == 1)
+ m_storage.resize(size, 1, size);
+ else
+ m_storage.resize(size, size, 1);
+ #ifdef EIGEN_INITIALIZE_COEFFS
+ if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+ #endif
+ }
+ EIGEN_DEVICE_FUNC
+ inline void resize(NoChange_t, Index cols)
+ {
+ resize(rows(), cols);
+ }
+ EIGEN_DEVICE_FUNC
+ inline void resize(Index rows, NoChange_t)
+ {
+ resize(rows, cols());
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
+ {
+ const OtherDerived& other = _other.derived();
+ internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(other.rows(), other.cols());
+ const Index othersize = other.rows()*other.cols();
+ if(RowsAtCompileTime == 1)
+ {
+ eigen_assert(other.rows() == 1 || other.cols() == 1);
+ resize(1, othersize);
+ }
+ else if(ColsAtCompileTime == 1)
+ {
+ eigen_assert(other.rows() == 1 || other.cols() == 1);
+ resize(othersize, 1);
+ }
+ else resize(other.rows(), other.cols());
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void conservativeResize(Index rows, Index cols)
+ {
+ internal::conservative_resize_like_impl<Derived>::run(*this, rows, cols);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void conservativeResize(Index rows, NoChange_t)
+ {
+ conservativeResize(rows, cols());
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index cols)
+ {
+ conservativeResize(rows(), cols);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void conservativeResize(Index size)
+ {
+ internal::conservative_resize_like_impl<Derived>::run(*this, size);
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase<OtherDerived>& other)
+ {
+ internal::conservative_resize_like_impl<Derived,OtherDerived>::run(*this, other);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other)
+ {
+ return _set(other);
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Derived& lazyAssign(const DenseBase<OtherDerived>& other)
+ {
+ _resize_to_match(other);
+ return Base::lazyAssign(other.derived());
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Derived& operator=(const ReturnByValue<OtherDerived>& func)
+ {
+ resize(func.rows(), func.cols());
+ return Base::operator=(func);
+ }
+ protected:
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE PlainObjectBase() : m_storage()
+ {
+ }
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ EIGEN_DEVICE_FUNC
+ explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert)
+ : m_storage(internal::constructor_without_unaligned_array_assert())
+ {
+ }
+#endif
+#if EIGEN_HAS_RVALUE_REFERENCES
+ EIGEN_DEVICE_FUNC
+ PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT
+ : m_storage( std::move(other.m_storage) )
+ {
+ }
+ EIGEN_DEVICE_FUNC
+ PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT
+ {
+ using std::swap;
+ swap(m_storage, other.m_storage);
+ return *this;
+ }
+#endif
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other)
+ : Base(), m_storage(other.m_storage) { }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols)
+ : m_storage(size, rows, cols)
+ {
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase<OtherDerived> &other)
+ : m_storage()
+ {
+ _check_template_params();
+ resizeLike(other);
+ _set_noalias(other);
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
+ : m_storage()
+ {
+ _check_template_params();
+ resizeLike(other);
+ *this = other.derived();
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue<OtherDerived>& other)
+ {
+ _check_template_params();
+ resize(other.rows(), other.cols());
+ other.evalTo(this->derived());
+ }
+ public:
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived> &other)
+ {
+ _resize_to_match(other);
+ Base::operator=(other.derived());
+ return this->derived();
+ }
+ static inline ConstMapType Map(const Scalar* data)
+ { return ConstMapType(data); }
+ static inline MapType Map(Scalar* data)
+ { return MapType(data); }
+ static inline ConstMapType Map(const Scalar* data, Index size)
+ { return ConstMapType(data, size); }
+ static inline MapType Map(Scalar* data, Index size)
+ { return MapType(data, size); }
+ static inline ConstMapType Map(const Scalar* data, Index rows, Index cols)
+ { return ConstMapType(data, rows, cols); }
+ static inline MapType Map(Scalar* data, Index rows, Index cols)
+ { return MapType(data, rows, cols); }
+ static inline ConstAlignedMapType MapAligned(const Scalar* data)
+ { return ConstAlignedMapType(data); }
+ static inline AlignedMapType MapAligned(Scalar* data)
+ { return AlignedMapType(data); }
+ static inline ConstAlignedMapType MapAligned(const Scalar* data, Index size)
+ { return ConstAlignedMapType(data, size); }
+ static inline AlignedMapType MapAligned(Scalar* data, Index size)
+ { return AlignedMapType(data, size); }
+ static inline ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols)
+ { return ConstAlignedMapType(data, rows, cols); }
+ static inline AlignedMapType MapAligned(Scalar* data, Index rows, Index cols)
+ { return AlignedMapType(data, rows, cols); }
+ template<int Outer, int Inner>
+ static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, const Stride<Outer, Inner>& stride)
+ { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, const Stride<Outer, Inner>& stride)
+ { return typename StridedMapType<Stride<Outer, Inner> >::type(data, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+ { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, size, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+ { return typename StridedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+ { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+ { return typename StridedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, const Stride<Outer, Inner>& stride)
+ { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, const Stride<Outer, Inner>& stride)
+ { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+ { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+ { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+ { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+ { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
+ using Base::setConstant;
+ EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& val);
+ EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& val);
+ using Base::setZero;
+ EIGEN_DEVICE_FUNC Derived& setZero(Index size);
+ EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols);
+ using Base::setOnes;
+ EIGEN_DEVICE_FUNC Derived& setOnes(Index size);
+ EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols);
+ using Base::setRandom;
+ Derived& setRandom(Index size);
+ Derived& setRandom(Index rows, Index cols);
+ #ifdef EIGEN_PLAINOBJECTBASE_PLUGIN
+ #include EIGEN_PLAINOBJECTBASE_PLUGIN
+ #endif
+ protected:
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase<OtherDerived>& other)
+ {
+ #ifdef EIGEN_NO_AUTOMATIC_RESIZING
+ eigen_assert((this->size()==0 || (IsVectorAtCompileTime ? (this->size() == other.size())
+ : (rows() == other.rows() && cols() == other.cols())))
+ && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
+ EIGEN_ONLY_USED_FOR_DEBUG(other);
+ #else
+ resizeLike(other);
+ #endif
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other)
+ {
+ internal::call_assignment(this->derived(), other.derived());
+ return this->derived();
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase<OtherDerived>& other)
+ {
+ internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
+ return this->derived();
+ }
+ template<typename T0, typename T1>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
+ {
+ EIGEN_STATIC_ASSERT(bool(NumTraits<T0>::IsInteger) &&
+ bool(NumTraits<T1>::IsInteger),
+ FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
+ resize(rows,cols);
+ }
+ template<typename T0, typename T1>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
+ m_storage.data()[0] = Scalar(val0);
+ m_storage.data()[1] = Scalar(val1);
+ }
+ template<typename T0, typename T1>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init2(const Index& val0, const Index& val1,
+ typename internal::enable_if< (!internal::is_same<Index,Scalar>::value)
+ && (internal::is_same<T0,Index>::value)
+ && (internal::is_same<T1,Index>::value)
+ && Base::SizeAtCompileTime==2,T1>::type* = 0)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
+ m_storage.data()[0] = Scalar(val0);
+ m_storage.data()[1] = Scalar(val1);
+ }
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if< (Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value)
+ && ((!internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>::type* = 0)
+ {
+ const bool is_integer = NumTraits<T>::IsInteger;
+ EIGEN_UNUSED_VARIABLE(is_integer);
+ EIGEN_STATIC_ASSERT(is_integer,
+ FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
+ resize(size);
+ }
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if<Base::SizeAtCompileTime==1 && internal::is_convertible<T, Scalar>::value,T>::type* = 0)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
+ m_storage.data()[0] = val0;
+ }
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const Index& val0,
+ typename internal::enable_if< (!internal::is_same<Index,Scalar>::value)
+ && (internal::is_same<Index,T>::value)
+ && Base::SizeAtCompileTime==1
+ && internal::is_convertible<T, Scalar>::value,T*>::type* = 0)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
+ m_storage.data()[0] = Scalar(val0);
+ }
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const Scalar* data){
+ this->_set_noalias(ConstMapType(data));
+ }
+ template<typename T, typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const DenseBase<OtherDerived>& other){
+ this->_set_noalias(other);
+ }
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const Derived& other){
+ this->_set_noalias(other);
+ }
+ template<typename T, typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const EigenBase<OtherDerived>& other){
+ this->derived() = other;
+ }
+ template<typename T, typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const ReturnByValue<OtherDerived>& other)
+ {
+ resize(other.rows(), other.cols());
+ other.evalTo(this->derived());
+ }
+ template<typename T, typename OtherDerived, int ColsAtCompileTime>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
+ {
+ this->derived() = r;
+ }
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const Scalar& val0,
+ typename internal::enable_if< Base::SizeAtCompileTime!=Dynamic
+ && Base::SizeAtCompileTime!=1
+ && internal::is_convertible<T, Scalar>::value
+ && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T>::type* = 0)
+ {
+ Base::setConstant(val0);
+ }
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const Index& val0,
+ typename internal::enable_if< (!internal::is_same<Index,Scalar>::value)
+ && (internal::is_same<Index,T>::value)
+ && Base::SizeAtCompileTime!=Dynamic
+ && Base::SizeAtCompileTime!=1
+ && internal::is_convertible<T, Scalar>::value
+ && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T*>::type* = 0)
+ {
+ Base::setConstant(val0);
+ }
+ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
+ friend struct internal::matrix_swap_impl;
+ public:
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void swap(DenseBase<OtherDerived> & other)
+ {
+ enum { SwapPointers = internal::is_same<Derived, OtherDerived>::value && Base::SizeAtCompileTime==Dynamic };
+ internal::matrix_swap_impl<Derived, OtherDerived, bool(SwapPointers)>::run(this->derived(), other.derived());
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void swap(DenseBase<OtherDerived> const & other)
+ { Base::swap(other.derived()); }
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void _check_template_params()
+ {
+ EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor)
+ && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (Options&RowMajor)==0)
+ && ((RowsAtCompileTime == Dynamic) || (RowsAtCompileTime >= 0))
+ && ((ColsAtCompileTime == Dynamic) || (ColsAtCompileTime >= 0))
+ && ((MaxRowsAtCompileTime == Dynamic) || (MaxRowsAtCompileTime >= 0))
+ && ((MaxColsAtCompileTime == Dynamic) || (MaxColsAtCompileTime >= 0))
+ && (MaxRowsAtCompileTime == RowsAtCompileTime || RowsAtCompileTime==Dynamic)
+ && (MaxColsAtCompileTime == ColsAtCompileTime || ColsAtCompileTime==Dynamic)
+ && (Options & (DontAlign|RowMajor)) == Options),
+ INVALID_MATRIX_TEMPLATE_PARAMETERS)
+ }
+ enum { IsPlainObjectBase = 1 };
+#endif
+};
+namespace internal {
+template <typename Derived, typename OtherDerived, bool IsVector>
+struct conservative_resize_like_impl
+{
+ static void run(DenseBase<Derived>& _this, Index rows, Index cols)
+ {
+ if (_this.rows() == rows && _this.cols() == cols) return;
+ EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)
+ if ( ( Derived::IsRowMajor && _this.cols() == cols) ||
+ (!Derived::IsRowMajor && _this.rows() == rows) )
+ {
+ internal::check_rows_cols_for_overflow<Derived::MaxSizeAtCompileTime>::run(rows, cols);
+ _this.derived().m_storage.conservativeResize(rows*cols,rows,cols);
+ }
+ else
+ {
+ typename Derived::PlainObject tmp(rows,cols);
+ const Index common_rows = numext::mini(rows, _this.rows());
+ const Index common_cols = numext::mini(cols, _this.cols());
+ tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
+ _this.derived().swap(tmp);
+ }
+ }
+ static void run(DenseBase<Derived>& _this, const DenseBase<OtherDerived>& other)
+ {
+ if (_this.rows() == other.rows() && _this.cols() == other.cols()) return;
+ EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)
+ EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(OtherDerived)
+ if ( ( Derived::IsRowMajor && _this.cols() == other.cols()) ||
+ (!Derived::IsRowMajor && _this.rows() == other.rows()) )
+ {
+ const Index new_rows = other.rows() - _this.rows();
+ const Index new_cols = other.cols() - _this.cols();
+ _this.derived().m_storage.conservativeResize(other.size(),other.rows(),other.cols());
+ if (new_rows>0)
+ _this.bottomRightCorner(new_rows, other.cols()) = other.bottomRows(new_rows);
+ else if (new_cols>0)
+ _this.bottomRightCorner(other.rows(), new_cols) = other.rightCols(new_cols);
+ }
+ else
+ {
+ typename Derived::PlainObject tmp(other);
+ const Index common_rows = numext::mini(tmp.rows(), _this.rows());
+ const Index common_cols = numext::mini(tmp.cols(), _this.cols());
+ tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
+ _this.derived().swap(tmp);
+ }
+ }
+};
+template <typename Derived, typename OtherDerived>
+struct conservative_resize_like_impl<Derived,OtherDerived,true>
+ : conservative_resize_like_impl<Derived,OtherDerived,false>
+{
+ using conservative_resize_like_impl<Derived,OtherDerived,false>::run;
+ static void run(DenseBase<Derived>& _this, Index size)
+ {
+ const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : size;
+ const Index new_cols = Derived::RowsAtCompileTime==1 ? size : 1;
+ _this.derived().m_storage.conservativeResize(size,new_rows,new_cols);
+ }
+ static void run(DenseBase<Derived>& _this, const DenseBase<OtherDerived>& other)
+ {
+ if (_this.rows() == other.rows() && _this.cols() == other.cols()) return;
+ const Index num_new_elements = other.size() - _this.size();
+ const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : other.rows();
+ const Index new_cols = Derived::RowsAtCompileTime==1 ? other.cols() : 1;
+ _this.derived().m_storage.conservativeResize(other.size(),new_rows,new_cols);
+ if (num_new_elements > 0)
+ _this.tail(num_new_elements) = other.tail(num_new_elements);
+ }
+};
+template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
+struct matrix_swap_impl
+{
+ EIGEN_DEVICE_FUNC
+ static inline void run(MatrixTypeA& a, MatrixTypeB& b)
+ {
+ a.base().swap(b);
+ }
+};
+template<typename MatrixTypeA, typename MatrixTypeB>
+struct matrix_swap_impl<MatrixTypeA, MatrixTypeB, true>
+{
+ EIGEN_DEVICE_FUNC
+ static inline void run(MatrixTypeA& a, MatrixTypeB& b)
+ {
+ static_cast<typename MatrixTypeA::Base&>(a).m_storage.swap(static_cast<typename MatrixTypeB::Base&>(b).m_storage);
+ }
+};
+}
+}
+#endif
+// end #include "src/Core/PlainObjectBase.h"
+// #include "src/Core/Matrix.h"
+#ifndef EIGEN_MATRIX_H
+#define EIGEN_MATRIX_H
+namespace Eigen {
+namespace internal {
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+{
+private:
+ enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
+ typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
+ enum {
+ row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
+ is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
+ max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
+ default_alignment = compute_default_alignment<_Scalar,max_size>::value,
+ actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
+ required_alignment = unpacket_traits<PacketScalar>::alignment,
+ packet_access_bit = (packet_traits<_Scalar>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0
+ };
+public:
+ typedef _Scalar Scalar;
+ typedef Dense StorageKind;
+ typedef Eigen::Index StorageIndex;
+ typedef MatrixXpr XprKind;
+ enum {
+ RowsAtCompileTime = _Rows,
+ ColsAtCompileTime = _Cols,
+ MaxRowsAtCompileTime = _MaxRows,
+ MaxColsAtCompileTime = _MaxCols,
+ Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
+ Options = _Options,
+ InnerStrideAtCompileTime = 1,
+ OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
+ EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
+ Alignment = actual_alignment
+ };
+};
+}
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+class Matrix
+ : public PlainObjectBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+{
+ public:
+ typedef PlainObjectBase<Matrix> Base;
+ enum { Options = _Options };
+ EIGEN_DENSE_PUBLIC_INTERFACE(Matrix)
+ typedef typename Base::PlainObject PlainObject;
+ using Base::base;
+ using Base::coeffRef;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other)
+ {
+ return Base::_set(other);
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase<OtherDerived>& other)
+ {
+ return Base::_set(other);
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase<OtherDerived> &other)
+ {
+ return Base::operator=(other);
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue<OtherDerived>& func)
+ {
+ return Base::operator=(func);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix() : Base()
+ {
+ Base::_check_template_params();
+ EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+ }
+ EIGEN_DEVICE_FUNC
+ explicit Matrix(internal::constructor_without_unaligned_array_assert)
+ : Base(internal::constructor_without_unaligned_array_assert())
+ { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
+#if EIGEN_HAS_RVALUE_REFERENCES
+ EIGEN_DEVICE_FUNC
+ Matrix(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
+ : Base(std::move(other))
+ {
+ Base::_check_template_params();
+ if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
+ Base::_set_noalias(other);
+ }
+ EIGEN_DEVICE_FUNC
+ Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
+ {
+ other.swap(*this);
+ return *this;
+ }
+#endif
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE explicit Matrix(const T& x)
+ {
+ Base::_check_template_params();
+ Base::template _init1<T>(x);
+ }
+ template<typename T0, typename T1>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y)
+ {
+ Base::_check_template_params();
+ Base::template _init2<T0,T1>(x, y);
+ }
+ #else
+ EIGEN_DEVICE_FUNC
+ explicit Matrix(const Scalar *data);
+ EIGEN_STRONG_INLINE explicit Matrix(Index dim);
+ Matrix(const Scalar& x);
+ EIGEN_DEVICE_FUNC
+ Matrix(Index rows, Index cols);
+ Matrix(const Scalar& x, const Scalar& y);
+ #endif
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z)
+ {
+ Base::_check_template_params();
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 3)
+ m_storage.data()[0] = x;
+ m_storage.data()[1] = y;
+ m_storage.data()[2] = z;
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w)
+ {
+ Base::_check_template_params();
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 4)
+ m_storage.data()[0] = x;
+ m_storage.data()[1] = y;
+ m_storage.data()[2] = z;
+ m_storage.data()[3] = w;
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other)
+ { }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other)
+ : Base(other.derived())
+ { }
+ EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
+ EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ explicit Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Matrix& operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
+ #ifdef EIGEN_MATRIX_PLUGIN
+ #include EIGEN_MATRIX_PLUGIN
+ #endif
+ protected:
+ template <typename Derived, typename OtherDerived, bool IsVector>
+ friend struct internal::conservative_resize_like_impl;
+ using Base::m_storage;
+};
+#define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \
+ \
+typedef Matrix<Type, Size, Size> Matrix##SizeSuffix##TypeSuffix; \
+ \
+typedef Matrix<Type, Size, 1> Vector##SizeSuffix##TypeSuffix; \
+ \
+typedef Matrix<Type, 1, Size> RowVector##SizeSuffix##TypeSuffix;
+#define EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \
+ \
+typedef Matrix<Type, Size, Dynamic> Matrix##Size##X##TypeSuffix; \
+ \
+typedef Matrix<Type, Dynamic, Size> Matrix##X##Size##TypeSuffix;
+#define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 2, 2) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 3, 3) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 4, 4) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \
+EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \
+EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \
+EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 4)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(int, i)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(float, f)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(double, d)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<float>, cf)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
+#undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES
+#undef EIGEN_MAKE_TYPEDEFS
+#undef EIGEN_MAKE_FIXED_TYPEDEFS
+}
+#endif
+// end #include "src/Core/Matrix.h"
+// #include "src/Core/Array.h"
+#ifndef EIGEN_ARRAY_H
+#define EIGEN_ARRAY_H
+namespace Eigen {
+namespace internal {
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+{
+ typedef ArrayXpr XprKind;
+ typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
+};
+}
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+class Array
+ : public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+{
+ public:
+ typedef PlainObjectBase<Array> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Array)
+ enum { Options = _Options };
+ typedef typename Base::PlainObject PlainObject;
+ protected:
+ template <typename Derived, typename OtherDerived, bool IsVector>
+ friend struct internal::conservative_resize_like_impl;
+ using Base::m_storage;
+ public:
+ using Base::base;
+ using Base::coeff;
+ using Base::coeffRef;
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array& operator=(const EigenBase<OtherDerived> &other)
+ {
+ return Base::operator=(other);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array& operator=(const Scalar &value)
+ {
+ Base::setConstant(value);
+ return *this;
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array& operator=(const DenseBase<OtherDerived>& other)
+ {
+ return Base::_set(other);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array& operator=(const Array& other)
+ {
+ return Base::_set(other);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array() : Base()
+ {
+ Base::_check_template_params();
+ EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+ }
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ EIGEN_DEVICE_FUNC
+ Array(internal::constructor_without_unaligned_array_assert)
+ : Base(internal::constructor_without_unaligned_array_assert())
+ {
+ Base::_check_template_params();
+ EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+ }
+#endif
+#if EIGEN_HAS_RVALUE_REFERENCES
+ EIGEN_DEVICE_FUNC
+ Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
+ : Base(std::move(other))
+ {
+ Base::_check_template_params();
+ if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
+ Base::_set_noalias(other);
+ }
+ EIGEN_DEVICE_FUNC
+ Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
+ {
+ other.swap(*this);
+ return *this;
+ }
+#endif
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE explicit Array(const T& x)
+ {
+ Base::_check_template_params();
+ Base::template _init1<T>(x);
+ }
+ template<typename T0, typename T1>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1)
+ {
+ Base::_check_template_params();
+ this->template _init2<T0,T1>(val0, val1);
+ }
+ #else
+ EIGEN_DEVICE_FUNC explicit Array(const Scalar *data);
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE explicit Array(Index dim);
+ Array(const Scalar& value);
+ Array(Index rows, Index cols);
+ Array(const Scalar& val0, const Scalar& val1);
+ #endif
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2)
+ {
+ Base::_check_template_params();
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 3)
+ m_storage.data()[0] = val0;
+ m_storage.data()[1] = val1;
+ m_storage.data()[2] = val2;
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3)
+ {
+ Base::_check_template_params();
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 4)
+ m_storage.data()[0] = val0;
+ m_storage.data()[1] = val1;
+ m_storage.data()[2] = val2;
+ m_storage.data()[3] = val3;
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array(const Array& other)
+ : Base(other)
+ { }
+ private:
+ struct PrivateType {};
+ public:
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other,
+ typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value,
+ PrivateType>::type = PrivateType())
+ : Base(other.derived())
+ { }
+ EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
+ EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
+ #ifdef EIGEN_ARRAY_PLUGIN
+ #include EIGEN_ARRAY_PLUGIN
+ #endif
+ private:
+ template<typename MatrixType, typename OtherDerived, bool SwapPointers>
+ friend struct internal::matrix_swap_impl;
+};
+#define EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \
+ \
+typedef Array<Type, Size, Size> Array##SizeSuffix##SizeSuffix##TypeSuffix; \
+ \
+typedef Array<Type, Size, 1> Array##SizeSuffix##TypeSuffix;
+#define EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \
+ \
+typedef Array<Type, Size, Dynamic> Array##Size##X##TypeSuffix; \
+ \
+typedef Array<Type, Dynamic, Size> Array##X##Size##TypeSuffix;
+#define EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
+EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 2, 2) \
+EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 3, 3) \
+EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 4, 4) \
+EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \
+EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \
+EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \
+EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 4)
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(int, i)
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(float, f)
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(double, d)
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex<float>, cf)
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
+#undef EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES
+#undef EIGEN_MAKE_ARRAY_TYPEDEFS
+#undef EIGEN_MAKE_ARRAY_TYPEDEFS_LARGE
+#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
+using Eigen::Matrix##SizeSuffix##TypeSuffix; \
+using Eigen::Vector##SizeSuffix##TypeSuffix; \
+using Eigen::RowVector##SizeSuffix##TypeSuffix;
+#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(TypeSuffix) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X)
+#define EIGEN_USING_ARRAY_TYPEDEFS \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(i) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(f) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(d) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cf) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cd)
+}
+#endif
+// end #include "src/Core/Array.h"
+// #include "src/Core/CwiseBinaryOp.h"
+#ifndef EIGEN_CWISE_BINARY_OP_H
+#define EIGEN_CWISE_BINARY_OP_H
+namespace Eigen {
+namespace internal {
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+{
+ typedef typename remove_all<Lhs>::type Ancestor;
+ typedef typename traits<Ancestor>::XprKind XprKind;
+ enum {
+ RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,
+ ColsAtCompileTime = traits<Ancestor>::ColsAtCompileTime,
+ MaxRowsAtCompileTime = traits<Ancestor>::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = traits<Ancestor>::MaxColsAtCompileTime
+ };
+ typedef typename result_of<
+ BinaryOp(
+ const typename Lhs::Scalar&,
+ const typename Rhs::Scalar&
+ )
+ >::type Scalar;
+ typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind,
+ typename traits<Rhs>::StorageKind,
+ BinaryOp>::ret StorageKind;
+ typedef typename promote_index_type<typename traits<Lhs>::StorageIndex,
+ typename traits<Rhs>::StorageIndex>::type StorageIndex;
+ typedef typename Lhs::Nested LhsNested;
+ typedef typename Rhs::Nested RhsNested;
+ typedef typename remove_reference<LhsNested>::type _LhsNested;
+ typedef typename remove_reference<RhsNested>::type _RhsNested;
+ enum {
+ Flags = cwise_promote_storage_order<typename traits<Lhs>::StorageKind,typename traits<Rhs>::StorageKind,_LhsNested::Flags & RowMajorBit,_RhsNested::Flags & RowMajorBit>::value
+ };
+};
+}
+template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
+class CwiseBinaryOpImpl;
+template<typename BinaryOp, typename LhsType, typename RhsType>
+class CwiseBinaryOp :
+ public CwiseBinaryOpImpl<
+ BinaryOp, LhsType, RhsType,
+ typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,
+ typename internal::traits<RhsType>::StorageKind,
+ BinaryOp>::ret>,
+ internal::no_assignment_operator
+{
+ public:
+ typedef typename internal::remove_all<BinaryOp>::type Functor;
+ typedef typename internal::remove_all<LhsType>::type Lhs;
+ typedef typename internal::remove_all<RhsType>::type Rhs;
+ typedef typename CwiseBinaryOpImpl<
+ BinaryOp, LhsType, RhsType,
+ typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,
+ typename internal::traits<Rhs>::StorageKind,
+ BinaryOp>::ret>::Base Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp)
+ typedef typename internal::ref_selector<LhsType>::type LhsNested;
+ typedef typename internal::ref_selector<RhsType>::type RhsNested;
+ typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
+ typedef typename internal::remove_reference<RhsNested>::type _RhsNested;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp())
+ : m_lhs(aLhs), m_rhs(aRhs), m_functor(func)
+ {
+ EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename Rhs::Scalar);
+ EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs)
+ eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols());
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index rows() const {
+ if (internal::traits<typename internal::remove_all<LhsNested>::type>::RowsAtCompileTime==Dynamic)
+ return m_rhs.rows();
+ else
+ return m_lhs.rows();
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index cols() const {
+ if (internal::traits<typename internal::remove_all<LhsNested>::type>::ColsAtCompileTime==Dynamic)
+ return m_rhs.cols();
+ else
+ return m_lhs.cols();
+ }
+ EIGEN_DEVICE_FUNC
+ const _LhsNested& lhs() const { return m_lhs; }
+ EIGEN_DEVICE_FUNC
+ const _RhsNested& rhs() const { return m_rhs; }
+ EIGEN_DEVICE_FUNC
+ const BinaryOp& functor() const { return m_functor; }
+ protected:
+ LhsNested m_lhs;
+ RhsNested m_rhs;
+ const BinaryOp m_functor;
+};
+template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
+class CwiseBinaryOpImpl
+ : public internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
+{
+public:
+ typedef typename internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
+};
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_STRONG_INLINE Derived &
+MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
+{
+ call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return derived();
+}
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_STRONG_INLINE Derived &
+MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
+{
+ call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return derived();
+}
+}
+#endif
+// end #include "src/Core/CwiseBinaryOp.h"
+// #include "src/Core/CwiseUnaryOp.h"
+#ifndef EIGEN_CWISE_UNARY_OP_H
+#define EIGEN_CWISE_UNARY_OP_H
+namespace Eigen {
+namespace internal {
+template<typename UnaryOp, typename XprType>
+struct traits<CwiseUnaryOp<UnaryOp, XprType> >
+ : traits<XprType>
+{
+ typedef typename result_of<
+ UnaryOp(const typename XprType::Scalar&)
+ >::type Scalar;
+ typedef typename XprType::Nested XprTypeNested;
+ typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
+ enum {
+ Flags = _XprTypeNested::Flags & RowMajorBit
+ };
+};
+}
+template<typename UnaryOp, typename XprType, typename StorageKind>
+class CwiseUnaryOpImpl;
+template<typename UnaryOp, typename XprType>
+class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>, internal::no_assignment_operator
+{
+ public:
+ typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
+ typedef typename internal::ref_selector<XprType>::type XprTypeNested;
+ typedef typename internal::remove_all<XprType>::type NestedExpression;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
+ : m_xpr(xpr), m_functor(func) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Index rows() const { return m_xpr.rows(); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Index cols() const { return m_xpr.cols(); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const UnaryOp& functor() const { return m_functor; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const typename internal::remove_all<XprTypeNested>::type&
+ nestedExpression() const { return m_xpr; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ typename internal::remove_all<XprTypeNested>::type&
+ nestedExpression() { return m_xpr; }
+ protected:
+ XprTypeNested m_xpr;
+ const UnaryOp m_functor;
+};
+template<typename UnaryOp, typename XprType, typename StorageKind>
+class CwiseUnaryOpImpl
+ : public internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type
+{
+public:
+ typedef typename internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
+};
+}
+#endif
+// end #include "src/Core/CwiseUnaryOp.h"
+// #include "src/Core/CwiseNullaryOp.h"
+#ifndef EIGEN_CWISE_NULLARY_OP_H
+#define EIGEN_CWISE_NULLARY_OP_H
+namespace Eigen {
+namespace internal {
+template<typename NullaryOp, typename PlainObjectType>
+struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
+{
+ enum {
+ Flags = traits<PlainObjectType>::Flags & RowMajorBit
+ };
+};
+}
+template<typename NullaryOp, typename PlainObjectType>
+class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator
+{
+ public:
+ typedef typename internal::dense_xpr_base<CwiseNullaryOp>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp)
+ EIGEN_DEVICE_FUNC
+ CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp())
+ : m_rows(rows), m_cols(cols), m_functor(func)
+ {
+ eigen_assert(rows >= 0
+ && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
+ && cols >= 0
+ && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index rows() const { return m_rows.value(); }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); }
+ EIGEN_DEVICE_FUNC
+ const NullaryOp& functor() const { return m_functor; }
+ protected:
+ const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
+ const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
+ const NullaryOp m_functor;
+};
+template<typename Derived>
+template<typename CustomNullaryOp>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
+DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)
+{
+ return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func);
+}
+template<typename Derived>
+template<typename CustomNullaryOp>
+EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
+DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, PlainObject>(1, size, func);
+ else return CwiseNullaryOp<CustomNullaryOp, PlainObject>(size, 1, func);
+}
+template<typename Derived>
+template<typename CustomNullaryOp>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
+DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
+{
+ return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func);
+}
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)
+{
+ return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_constant_op<Scalar>(value));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Constant(Index size, const Scalar& value)
+{
+ return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Constant(const Scalar& value)
+{
+ EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+ return DenseBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_constant_op<Scalar>(value));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
+DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
+DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+ return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
+DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
+DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+ return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApproxToConstant
+(const Scalar& val, const RealScalar& prec) const
+{
+ typename internal::nested_eval<Derived,1>::type self(derived());
+ for(Index j = 0; j < cols(); ++j)
+ for(Index i = 0; i < rows(); ++i)
+ if(!internal::isApprox(self.coeff(i, j), val, prec))
+ return false;
+ return true;
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isConstant
+(const Scalar& val, const RealScalar& prec) const
+{
+ return isApproxToConstant(val, prec);
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
+{
+ setConstant(val);
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
+{
+ return derived() = Constant(rows(), cols(), val);
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
+{
+ resize(size);
+ return setConstant(val);
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
+{
+ resize(rows, cols);
+ return setConstant(val);
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return setLinSpaced(size(), low, high);
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Zero(Index rows, Index cols)
+{
+ return Constant(rows, cols, Scalar(0));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Zero(Index size)
+{
+ return Constant(size, Scalar(0));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Zero()
+{
+ return Constant(Scalar(0));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isZero(const RealScalar& prec) const
+{
+ typename internal::nested_eval<Derived,1>::type self(derived());
+ for(Index j = 0; j < cols(); ++j)
+ for(Index i = 0; i < rows(); ++i)
+ if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<Scalar>(1), prec))
+ return false;
+ return true;
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
+{
+ return setConstant(Scalar(0));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setZero(Index newSize)
+{
+ resize(newSize);
+ return setConstant(Scalar(0));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setZero(Index rows, Index cols)
+{
+ resize(rows, cols);
+ return setConstant(Scalar(0));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Ones(Index rows, Index cols)
+{
+ return Constant(rows, cols, Scalar(1));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Ones(Index newSize)
+{
+ return Constant(newSize, Scalar(1));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Ones()
+{
+ return Constant(Scalar(1));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isOnes
+(const RealScalar& prec) const
+{
+ return isApproxToConstant(Scalar(1), prec);
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
+{
+ return setConstant(Scalar(1));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setOnes(Index newSize)
+{
+ resize(newSize);
+ return setConstant(Scalar(1));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
+{
+ resize(rows, cols);
+ return setConstant(Scalar(1));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
+MatrixBase<Derived>::Identity(Index rows, Index cols)
+{
+ return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>());
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
+MatrixBase<Derived>::Identity()
+{
+ EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+ return MatrixBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_identity_op<Scalar>());
+}
+template<typename Derived>
+bool MatrixBase<Derived>::isIdentity
+(const RealScalar& prec) const
+{
+ typename internal::nested_eval<Derived,1>::type self(derived());
+ for(Index j = 0; j < cols(); ++j)
+ {
+ for(Index i = 0; i < rows(); ++i)
+ {
+ if(i == j)
+ {
+ if(!internal::isApprox(self.coeff(i, j), static_cast<Scalar>(1), prec))
+ return false;
+ }
+ else
+ {
+ if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<RealScalar>(1), prec))
+ return false;
+ }
+ }
+ }
+ return true;
+}
+namespace internal {
+template<typename Derived, bool Big = (Derived::SizeAtCompileTime>=16)>
+struct setIdentity_impl
+{
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Derived& run(Derived& m)
+ {
+ return m = Derived::Identity(m.rows(), m.cols());
+ }
+};
+template<typename Derived>
+struct setIdentity_impl<Derived, true>
+{
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Derived& run(Derived& m)
+ {
+ m.setZero();
+ const Index size = numext::mini(m.rows(), m.cols());
+ for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
+ return m;
+ }
+};
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
+{
+ return internal::setIdentity_impl<Derived>::run(derived());
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols)
+{
+ derived().resize(rows, cols);
+ return setIdentity();
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i);
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return BasisReturnType(SquareMatrixType::Identity(),i);
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
+{ return Derived::Unit(0); }
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
+{ return Derived::Unit(1); }
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
+{ return Derived::Unit(2); }
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
+{ return Derived::Unit(3); }
+}
+#endif
+// end #include "src/Core/CwiseNullaryOp.h"
+// #include "src/Core/Stride.h"
+#ifndef EIGEN_STRIDE_H
+#define EIGEN_STRIDE_H
+namespace Eigen {
+template<int _OuterStrideAtCompileTime, int _InnerStrideAtCompileTime>
+class Stride
+{
+ public:
+ typedef Eigen::Index Index;
+ enum {
+ InnerStrideAtCompileTime = _InnerStrideAtCompileTime,
+ OuterStrideAtCompileTime = _OuterStrideAtCompileTime
+ };
+ EIGEN_DEVICE_FUNC
+ Stride()
+ : m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)
+ {
+ eigen_assert(InnerStrideAtCompileTime != Dynamic && OuterStrideAtCompileTime != Dynamic);
+ }
+ EIGEN_DEVICE_FUNC
+ Stride(Index outerStride, Index innerStride)
+ : m_outer(outerStride), m_inner(innerStride)
+ {
+ eigen_assert(innerStride>=0 && outerStride>=0);
+ }
+ EIGEN_DEVICE_FUNC
+ Stride(const Stride& other)
+ : m_outer(other.outer()), m_inner(other.inner())
+ {}
+ EIGEN_DEVICE_FUNC
+ inline Index outer() const { return m_outer.value(); }
+ EIGEN_DEVICE_FUNC
+ inline Index inner() const { return m_inner.value(); }
+ protected:
+ internal::variable_if_dynamic<Index, OuterStrideAtCompileTime> m_outer;
+ internal::variable_if_dynamic<Index, InnerStrideAtCompileTime> m_inner;
+};
+template<int Value>
+class InnerStride : public Stride<0, Value>
+{
+ typedef Stride<0, Value> Base;
+ public:
+ EIGEN_DEVICE_FUNC InnerStride() : Base() {}
+ EIGEN_DEVICE_FUNC InnerStride(Index v) : Base(0, v) {}
+};
+template<int Value>
+class OuterStride : public Stride<Value, 0>
+{
+ typedef Stride<Value, 0> Base;
+ public:
+ EIGEN_DEVICE_FUNC OuterStride() : Base() {}
+ EIGEN_DEVICE_FUNC OuterStride(Index v) : Base(v,0) {}
+};
+}
+#endif
+// end #include "src/Core/Stride.h"
+// #include "src/Core/MapBase.h"
+#ifndef EIGEN_MAPBASE_H
+#define EIGEN_MAPBASE_H
+#define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \
+ EIGEN_STATIC_ASSERT((int(internal::evaluator<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \
+ YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT)
+namespace Eigen {
+template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
+ : public internal::dense_xpr_base<Derived>::type
+{
+ public:
+ typedef typename internal::dense_xpr_base<Derived>::type Base;
+ enum {
+ RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
+ ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
+ SizeAtCompileTime = Base::SizeAtCompileTime
+ };
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ typedef typename internal::conditional<
+ bool(internal::is_lvalue<Derived>::value),
+ Scalar *,
+ const Scalar *>::type
+ PointerType;
+ using Base::derived;
+ using Base::MaxRowsAtCompileTime;
+ using Base::MaxColsAtCompileTime;
+ using Base::MaxSizeAtCompileTime;
+ using Base::IsVectorAtCompileTime;
+ using Base::Flags;
+ using Base::IsRowMajor;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::coeff;
+ using Base::coeffRef;
+ using Base::lazyAssign;
+ using Base::eval;
+ using Base::innerStride;
+ using Base::outerStride;
+ using Base::rowStride;
+ using Base::colStride;
+ using Base::operator=;
+ typedef typename Base::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); }
+ EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeff(Index rowId, Index colId) const
+ {
+ return m_data[colId * colStride() + rowId * rowStride()];
+ }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeff(Index index) const
+ {
+ EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+ return m_data[index * innerStride()];
+ }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index rowId, Index colId) const
+ {
+ return this->m_data[colId * colStride() + rowId * rowStride()];
+ }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index index) const
+ {
+ EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+ return this->m_data[index * innerStride()];
+ }
+ template<int LoadMode>
+ inline PacketScalar packet(Index rowId, Index colId) const
+ {
+ return internal::ploadt<PacketScalar, LoadMode>
+ (m_data + (colId * colStride() + rowId * rowStride()));
+ }
+ template<int LoadMode>
+ inline PacketScalar packet(Index index) const
+ {
+ EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+ return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
+ }
+ EIGEN_DEVICE_FUNC
+ explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
+ {
+ EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+ checkSanity<Derived>();
+ }
+ EIGEN_DEVICE_FUNC
+ inline MapBase(PointerType dataPtr, Index vecSize)
+ : m_data(dataPtr),
+ m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)),
+ m_cols(ColsAtCompileTime == Dynamic ? vecSize : Index(ColsAtCompileTime))
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ eigen_assert(vecSize >= 0);
+ eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize);
+ checkSanity<Derived>();
+ }
+ EIGEN_DEVICE_FUNC
+ inline MapBase(PointerType dataPtr, Index rows, Index cols)
+ : m_data(dataPtr), m_rows(rows), m_cols(cols)
+ {
+ eigen_assert( (dataPtr == 0)
+ || ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
+ && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
+ checkSanity<Derived>();
+ }
+ #ifdef EIGEN_MAPBASE_PLUGIN
+ #include EIGEN_MAPBASE_PLUGIN
+ #endif
+ protected:
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
+ {
+#if EIGEN_MAX_ALIGN_BYTES>0
+ eigen_assert(( ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0)
+ || (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
+#endif
+ }
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ void checkSanity(typename internal::enable_if<internal::traits<T>::Alignment==0,void*>::type = 0) const
+ {}
+ PointerType m_data;
+ const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
+ const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
+};
+template<typename Derived> class MapBase<Derived, WriteAccessors>
+ : public MapBase<Derived, ReadOnlyAccessors>
+{
+ typedef MapBase<Derived, ReadOnlyAccessors> ReadOnlyMapBase;
+ public:
+ typedef MapBase<Derived, ReadOnlyAccessors> Base;
+ typedef typename Base::Scalar Scalar;
+ typedef typename Base::PacketScalar PacketScalar;
+ typedef typename Base::StorageIndex StorageIndex;
+ typedef typename Base::PointerType PointerType;
+ using Base::derived;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::coeff;
+ using Base::coeffRef;
+ using Base::innerStride;
+ using Base::outerStride;
+ using Base::rowStride;
+ using Base::colStride;
+ typedef typename internal::conditional<
+ internal::is_lvalue<Derived>::value,
+ Scalar,
+ const Scalar
+ >::type ScalarWithConstIfNotLvalue;
+ EIGEN_DEVICE_FUNC
+ inline const Scalar* data() const { return this->m_data; }
+ EIGEN_DEVICE_FUNC
+ inline ScalarWithConstIfNotLvalue* data() { return this->m_data; }
+ EIGEN_DEVICE_FUNC
+ inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col)
+ {
+ return this->m_data[col * colStride() + row * rowStride()];
+ }
+ EIGEN_DEVICE_FUNC
+ inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
+ {
+ EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+ return this->m_data[index * innerStride()];
+ }
+ template<int StoreMode>
+ inline void writePacket(Index row, Index col, const PacketScalar& val)
+ {
+ internal::pstoret<Scalar, PacketScalar, StoreMode>
+ (this->m_data + (col * colStride() + row * rowStride()), val);
+ }
+ template<int StoreMode>
+ inline void writePacket(Index index, const PacketScalar& val)
+ {
+ EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+ internal::pstoret<Scalar, PacketScalar, StoreMode>
+ (this->m_data + index * innerStride(), val);
+ }
+ EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
+ EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
+ EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {}
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const MapBase& other)
+ {
+ ReadOnlyMapBase::Base::operator=(other);
+ return derived();
+ }
+ using ReadOnlyMapBase::Base::operator=;
+};
+#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS
+}
+#endif
+// end #include "src/Core/MapBase.h"
+// #include "src/Core/Map.h"
+#ifndef EIGEN_MAP_H
+#define EIGEN_MAP_H
+namespace Eigen {
+namespace internal {
+template<typename PlainObjectType, int MapOptions, typename StrideType>
+struct traits<Map<PlainObjectType, MapOptions, StrideType> >
+ : public traits<PlainObjectType>
+{
+ typedef traits<PlainObjectType> TraitsBase;
+ enum {
+ InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
+ ? int(PlainObjectType::InnerStrideAtCompileTime)
+ : int(StrideType::InnerStrideAtCompileTime),
+ OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
+ ? int(PlainObjectType::OuterStrideAtCompileTime)
+ : int(StrideType::OuterStrideAtCompileTime),
+ Alignment = int(MapOptions)&int(AlignedMask),
+ Flags0 = TraitsBase::Flags & (~NestByRefBit),
+ Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
+ };
+private:
+ enum { Options };
+};
+}
+template<typename PlainObjectType, int MapOptions, typename StrideType> class Map
+ : public MapBase<Map<PlainObjectType, MapOptions, StrideType> >
+{
+ public:
+ typedef MapBase<Map> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Map)
+ typedef typename Base::PointerType PointerType;
+ typedef PointerType PointerArgType;
+ EIGEN_DEVICE_FUNC
+ inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const
+ {
+ return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
+ }
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const
+ {
+ return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
+ : IsVectorAtCompileTime ? this->size()
+ : int(Flags)&RowMajorBit ? this->cols()
+ : this->rows();
+ }
+ EIGEN_DEVICE_FUNC
+ explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType())
+ : Base(cast_to_pointer_type(dataPtr)), m_stride(stride)
+ {
+ PlainObjectType::Base::_check_template_params();
+ }
+ EIGEN_DEVICE_FUNC
+ inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType())
+ : Base(cast_to_pointer_type(dataPtr), size), m_stride(stride)
+ {
+ PlainObjectType::Base::_check_template_params();
+ }
+ EIGEN_DEVICE_FUNC
+ inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType())
+ : Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride)
+ {
+ PlainObjectType::Base::_check_template_params();
+ }
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
+ protected:
+ StrideType m_stride;
+};
+}
+#endif
+// end #include "src/Core/Map.h"
+// #include "src/Core/Block.h"
+#ifndef EIGEN_BLOCK_H
+#define EIGEN_BLOCK_H
+namespace Eigen {
+namespace internal {
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
+struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprType>
+{
+ typedef typename traits<XprType>::Scalar Scalar;
+ typedef typename traits<XprType>::StorageKind StorageKind;
+ typedef typename traits<XprType>::XprKind XprKind;
+ typedef typename ref_selector<XprType>::type XprTypeNested;
+ typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
+ enum{
+ MatrixRows = traits<XprType>::RowsAtCompileTime,
+ MatrixCols = traits<XprType>::ColsAtCompileTime,
+ RowsAtCompileTime = MatrixRows == 0 ? 0 : BlockRows,
+ ColsAtCompileTime = MatrixCols == 0 ? 0 : BlockCols,
+ MaxRowsAtCompileTime = BlockRows==0 ? 0
+ : RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime)
+ : int(traits<XprType>::MaxRowsAtCompileTime),
+ MaxColsAtCompileTime = BlockCols==0 ? 0
+ : ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime)
+ : int(traits<XprType>::MaxColsAtCompileTime),
+ XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
+ IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
+ : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
+ : XprTypeIsRowMajor,
+ HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor),
+ InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
+ InnerStrideAtCompileTime = HasSameStorageOrderAsXprType
+ ? int(inner_stride_at_compile_time<XprType>::ret)
+ : int(outer_stride_at_compile_time<XprType>::ret),
+ OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
+ ? int(outer_stride_at_compile_time<XprType>::ret)
+ : int(inner_stride_at_compile_time<XprType>::ret),
+ FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
+ FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
+ Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit,
+ Alignment = 0
+ };
+};
+template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false,
+ bool HasDirectAccess = internal::has_direct_access<XprType>::ret> class BlockImpl_dense;
+}
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, typename StorageKind> class BlockImpl;
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class Block
+ : public BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind>
+{
+ typedef BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind> Impl;
+ public:
+ typedef Impl Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(Block)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)
+ typedef typename internal::remove_all<XprType>::type NestedExpression;
+ EIGEN_DEVICE_FUNC
+ inline Block(XprType& xpr, Index i) : Impl(xpr,i)
+ {
+ eigen_assert( (i>=0) && (
+ ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && i<xpr.rows())
+ ||((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && i<xpr.cols())));
+ }
+ EIGEN_DEVICE_FUNC
+ inline Block(XprType& xpr, Index startRow, Index startCol)
+ : Impl(xpr, startRow, startCol)
+ {
+ EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
+ eigen_assert(startRow >= 0 && BlockRows >= 0 && startRow + BlockRows <= xpr.rows()
+ && startCol >= 0 && BlockCols >= 0 && startCol + BlockCols <= xpr.cols());
+ }
+ EIGEN_DEVICE_FUNC
+ inline Block(XprType& xpr,
+ Index startRow, Index startCol,
+ Index blockRows, Index blockCols)
+ : Impl(xpr, startRow, startCol, blockRows, blockCols)
+ {
+ eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
+ && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
+ eigen_assert(startRow >= 0 && blockRows >= 0 && startRow <= xpr.rows() - blockRows
+ && startCol >= 0 && blockCols >= 0 && startCol <= xpr.cols() - blockCols);
+ }
+};
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
+class BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, Dense>
+ : public internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel>
+{
+ typedef internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel> Impl;
+ typedef typename XprType::StorageIndex StorageIndex;
+ public:
+ typedef Impl Base;
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
+ EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
+ EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) {}
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
+ : Impl(xpr, startRow, startCol, blockRows, blockCols) {}
+};
+namespace internal {
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> class BlockImpl_dense
+ : public internal::dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel> >::type
+{
+ typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
+ typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
+ public:
+ typedef typename internal::dense_xpr_base<BlockType>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr, Index i)
+ : m_xpr(xpr),
+ m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
+ m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0),
+ m_blockRows(BlockRows==1 ? 1 : xpr.rows()),
+ m_blockCols(BlockCols==1 ? 1 : xpr.cols())
+ {}
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
+ : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
+ m_blockRows(BlockRows), m_blockCols(BlockCols)
+ {}
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr,
+ Index startRow, Index startCol,
+ Index blockRows, Index blockCols)
+ : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
+ m_blockRows(blockRows), m_blockCols(blockCols)
+ {}
+ EIGEN_DEVICE_FUNC inline Index rows() const { return m_blockRows.value(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return m_blockCols.value(); }
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index rowId, Index colId)
+ {
+ EIGEN_STATIC_ASSERT_LVALUE(XprType)
+ return m_xpr.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
+ }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index rowId, Index colId) const
+ {
+ return m_xpr.derived().coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const
+ {
+ return m_xpr.coeff(rowId + m_startRow.value(), colId + m_startCol.value());
+ }
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index index)
+ {
+ EIGEN_STATIC_ASSERT_LVALUE(XprType)
+ return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+ }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index index) const
+ {
+ return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+ }
+ EIGEN_DEVICE_FUNC
+ inline const CoeffReturnType coeff(Index index) const
+ {
+ return m_xpr.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+ }
+ template<int LoadMode>
+ inline PacketScalar packet(Index rowId, Index colId) const
+ {
+ return m_xpr.template packet<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value());
+ }
+ template<int LoadMode>
+ inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
+ {
+ m_xpr.template writePacket<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value(), val);
+ }
+ template<int LoadMode>
+ inline PacketScalar packet(Index index) const
+ {
+ return m_xpr.template packet<Unaligned>
+ (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+ }
+ template<int LoadMode>
+ inline void writePacket(Index index, const PacketScalar& val)
+ {
+ m_xpr.template writePacket<Unaligned>
+ (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val);
+ }
+ #ifdef EIGEN_PARSED_BY_DOXYGEN
+ EIGEN_DEVICE_FUNC inline const Scalar* data() const;
+ EIGEN_DEVICE_FUNC inline Index innerStride() const;
+ EIGEN_DEVICE_FUNC inline Index outerStride() const;
+ #endif
+ EIGEN_DEVICE_FUNC
+ const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
+ {
+ return m_xpr;
+ }
+ EIGEN_DEVICE_FUNC
+ XprType& nestedExpression() { return m_xpr; }
+ EIGEN_DEVICE_FUNC
+ StorageIndex startRow() const
+ {
+ return m_startRow.value();
+ }
+ EIGEN_DEVICE_FUNC
+ StorageIndex startCol() const
+ {
+ return m_startCol.value();
+ }
+ protected:
+ XprTypeNested m_xpr;
+ const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
+ const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
+ const internal::variable_if_dynamic<StorageIndex, RowsAtCompileTime> m_blockRows;
+ const internal::variable_if_dynamic<StorageIndex, ColsAtCompileTime> m_blockCols;
+};
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
+class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
+ : public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel> >
+{
+ typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
+ typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
+ enum {
+ XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0
+ };
+ public:
+ typedef MapBase<BlockType> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr, Index i)
+ : Base(xpr.data() + i * ( ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor))
+ || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()),
+ BlockRows==1 ? 1 : xpr.rows(),
+ BlockCols==1 ? 1 : xpr.cols()),
+ m_xpr(xpr),
+ m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
+ m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)
+ {
+ init();
+ }
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
+ : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)),
+ m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
+ {
+ init();
+ }
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr,
+ Index startRow, Index startCol,
+ Index blockRows, Index blockCols)
+ : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols),
+ m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
+ {
+ init();
+ }
+ EIGEN_DEVICE_FUNC
+ const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
+ {
+ return m_xpr;
+ }
+ EIGEN_DEVICE_FUNC
+ XprType& nestedExpression() { return m_xpr; }
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const
+ {
+ return internal::traits<BlockType>::HasSameStorageOrderAsXprType
+ ? m_xpr.innerStride()
+ : m_xpr.outerStride();
+ }
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const
+ {
+ return m_outerStride;
+ }
+ EIGEN_DEVICE_FUNC
+ StorageIndex startRow() const
+ {
+ return m_startRow.value();
+ }
+ EIGEN_DEVICE_FUNC
+ StorageIndex startCol() const
+ {
+ return m_startCol.value();
+ }
+ #ifndef __SUNPRO_CC
+ protected:
+ #endif
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols)
+ : Base(data, blockRows, blockCols), m_xpr(xpr)
+ {
+ init();
+ }
+ #endif
+ protected:
+ EIGEN_DEVICE_FUNC
+ void init()
+ {
+ m_outerStride = internal::traits<BlockType>::HasSameStorageOrderAsXprType
+ ? m_xpr.outerStride()
+ : m_xpr.innerStride();
+ }
+ XprTypeNested m_xpr;
+ const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
+ const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
+ Index m_outerStride;
+};
+}
+}
+#endif
+// end #include "src/Core/Block.h"
+// #include "src/Core/Transpose.h"
+#ifndef EIGEN_TRANSPOSE_H
+#define EIGEN_TRANSPOSE_H
+namespace Eigen {
+namespace internal {
+template<typename MatrixType>
+struct traits<Transpose<MatrixType> > : public traits<MatrixType>
+{
+ typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
+ typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedPlain;
+ enum {
+ RowsAtCompileTime = MatrixType::ColsAtCompileTime,
+ ColsAtCompileTime = MatrixType::RowsAtCompileTime,
+ MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime,
+ MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+ FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
+ Flags0 = traits<MatrixTypeNestedPlain>::Flags & ~(LvalueBit | NestByRefBit),
+ Flags1 = Flags0 | FlagsLvalueBit,
+ Flags = Flags1 ^ RowMajorBit,
+ InnerStrideAtCompileTime = inner_stride_at_compile_time<MatrixType>::ret,
+ OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret
+ };
+};
+}
+template<typename MatrixType, typename StorageKind> class TransposeImpl;
+template<typename MatrixType> class Transpose
+ : public TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>
+{
+ public:
+ typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
+ typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
+ typedef typename internal::remove_all<MatrixType>::type NestedExpression;
+ EIGEN_DEVICE_FUNC
+ explicit inline Transpose(MatrixType& matrix) : m_matrix(matrix) {}
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
+ EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.cols(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.rows(); }
+ EIGEN_DEVICE_FUNC
+ const typename internal::remove_all<MatrixTypeNested>::type&
+ nestedExpression() const { return m_matrix; }
+ EIGEN_DEVICE_FUNC
+ typename internal::remove_reference<MatrixTypeNested>::type&
+ nestedExpression() { return m_matrix; }
+ void resize(Index nrows, Index ncols) {
+ m_matrix.resize(ncols,nrows);
+ }
+ protected:
+ typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
+};
+namespace internal {
+template<typename MatrixType, bool HasDirectAccess = has_direct_access<MatrixType>::ret>
+struct TransposeImpl_base
+{
+ typedef typename dense_xpr_base<Transpose<MatrixType> >::type type;
+};
+template<typename MatrixType>
+struct TransposeImpl_base<MatrixType, false>
+{
+ typedef typename dense_xpr_base<Transpose<MatrixType> >::type type;
+};
+}
+template<typename XprType, typename StorageKind>
+class TransposeImpl
+ : public internal::generic_xpr_base<Transpose<XprType> >::type
+{
+public:
+ typedef typename internal::generic_xpr_base<Transpose<XprType> >::type Base;
+};
+template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
+ : public internal::TransposeImpl_base<MatrixType>::type
+{
+ public:
+ typedef typename internal::TransposeImpl_base<MatrixType>::type Base;
+ using Base::coeffRef;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl)
+ EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
+ EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
+ typedef typename internal::conditional<
+ internal::is_lvalue<MatrixType>::value,
+ Scalar,
+ const Scalar
+ >::type ScalarWithConstIfNotLvalue;
+ EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
+ EIGEN_DEVICE_FUNC inline const Scalar* data() const { return derived().nestedExpression().data(); }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index rowId, Index colId) const
+ {
+ return derived().nestedExpression().coeffRef(colId, rowId);
+ }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index index) const
+ {
+ return derived().nestedExpression().coeffRef(index);
+ }
+};
+template<typename Derived>
+inline Transpose<Derived>
+DenseBase<Derived>::transpose()
+{
+ return TransposeReturnType(derived());
+}
+template<typename Derived>
+inline typename DenseBase<Derived>::ConstTransposeReturnType
+DenseBase<Derived>::transpose() const
+{
+ return ConstTransposeReturnType(derived());
+}
+template<typename Derived>
+inline const typename MatrixBase<Derived>::AdjointReturnType
+MatrixBase<Derived>::adjoint() const
+{
+ return AdjointReturnType(this->transpose());
+}
+namespace internal {
+template<typename MatrixType,
+ bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic,
+ bool MatchPacketSize =
+ (int(MatrixType::RowsAtCompileTime) == int(internal::packet_traits<typename MatrixType::Scalar>::size))
+ && (internal::evaluator<MatrixType>::Flags&PacketAccessBit) >
+struct inplace_transpose_selector;
+template<typename MatrixType>
+struct inplace_transpose_selector<MatrixType,true,false> {
+ static void run(MatrixType& m) {
+ m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
+ }
+};
+template<typename MatrixType>
+struct inplace_transpose_selector<MatrixType,true,true> {
+ static void run(MatrixType& m) {
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet;
+ const Index PacketSize = internal::packet_traits<Scalar>::size;
+ const Index Alignment = internal::evaluator<MatrixType>::Alignment;
+ PacketBlock<Packet> A;
+ for (Index i=0; i<PacketSize; ++i)
+ A.packet[i] = m.template packetByOuterInner<Alignment>(i,0);
+ internal::ptranspose(A);
+ for (Index i=0; i<PacketSize; ++i)
+ m.template writePacket<Alignment>(m.rowIndexByOuterInner(i,0), m.colIndexByOuterInner(i,0), A.packet[i]);
+ }
+};
+template<typename MatrixType,bool MatchPacketSize>
+struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> {
+ static void run(MatrixType& m) {
+ if (m.rows()==m.cols())
+ m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
+ else
+ m = m.transpose().eval();
+ }
+};
+}
+template<typename Derived>
+inline void DenseBase<Derived>::transposeInPlace()
+{
+ eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic))
+ && "transposeInPlace() called on a non-square non-resizable matrix");
+ internal::inplace_transpose_selector<Derived>::run(derived());
+}
+template<typename Derived>
+inline void MatrixBase<Derived>::adjointInPlace()
+{
+ derived() = adjoint().eval();
+}
+#ifndef EIGEN_NO_DEBUG
+namespace internal {
+template<bool DestIsTransposed, typename OtherDerived>
+struct check_transpose_aliasing_compile_time_selector
+{
+ enum { ret = bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed };
+};
+template<bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
+struct check_transpose_aliasing_compile_time_selector<DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
+{
+ enum { ret = bool(blas_traits<DerivedA>::IsTransposed) != DestIsTransposed
+ || bool(blas_traits<DerivedB>::IsTransposed) != DestIsTransposed
+ };
+};
+template<typename Scalar, bool DestIsTransposed, typename OtherDerived>
+struct check_transpose_aliasing_run_time_selector
+{
+ static bool run(const Scalar* dest, const OtherDerived& src)
+ {
+ return (bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src));
+ }
+};
+template<typename Scalar, bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
+struct check_transpose_aliasing_run_time_selector<Scalar,DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
+{
+ static bool run(const Scalar* dest, const CwiseBinaryOp<BinOp,DerivedA,DerivedB>& src)
+ {
+ return ((blas_traits<DerivedA>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src.lhs())))
+ || ((blas_traits<DerivedB>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src.rhs())));
+ }
+};
+template<typename Derived, typename OtherDerived,
+ bool MightHaveTransposeAliasing
+ = check_transpose_aliasing_compile_time_selector
+ <blas_traits<Derived>::IsTransposed,OtherDerived>::ret
+ >
+struct checkTransposeAliasing_impl
+{
+ static void run(const Derived& dst, const OtherDerived& other)
+ {
+ eigen_assert((!check_transpose_aliasing_run_time_selector
+ <typename Derived::Scalar,blas_traits<Derived>::IsTransposed,OtherDerived>
+ ::run(extract_data(dst), other))
+ && "aliasing detected during transposition, use transposeInPlace() "
+ "or evaluate the rhs into a temporary using .eval()");
+ }
+};
+template<typename Derived, typename OtherDerived>
+struct checkTransposeAliasing_impl<Derived, OtherDerived, false>
+{
+ static void run(const Derived&, const OtherDerived&)
+ {
+ }
+};
+template<typename Dst, typename Src>
+void check_for_aliasing(const Dst &dst, const Src &src)
+{
+ internal::checkTransposeAliasing_impl<Dst, Src>::run(dst, src);
+}
+}
+#endif
+}
+#endif
+// end #include "src/Core/Transpose.h"
+// #include "src/Core/Redux.h"
+#ifndef EIGEN_REDUX_H
+#define EIGEN_REDUX_H
+namespace Eigen {
+namespace internal {
+template<typename Func, typename Derived>
+struct redux_traits
+{
+public:
+ typedef typename find_best_packet<typename Derived::Scalar,Derived::SizeAtCompileTime>::type PacketType;
+ enum {
+ PacketSize = unpacket_traits<PacketType>::size,
+ InnerMaxSize = int(Derived::IsRowMajor)
+ ? Derived::MaxColsAtCompileTime
+ : Derived::MaxRowsAtCompileTime
+ };
+ enum {
+ MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
+ && (functor_traits<Func>::PacketAccess),
+ MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit),
+ MaySliceVectorize = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize
+ };
+public:
+ enum {
+ Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
+ : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
+ : int(DefaultTraversal)
+ };
+public:
+ enum {
+ Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost
+ : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
+ UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
+ };
+public:
+ enum {
+ Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
+ };
+#ifdef EIGEN_DEBUG_ASSIGN
+ static void debug()
+ {
+ std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl;
+ std::cerr.setf(std::ios::hex, std::ios::basefield);
+ EIGEN_DEBUG_VAR(Derived::Flags)
+ std::cerr.unsetf(std::ios::hex);
+ EIGEN_DEBUG_VAR(InnerMaxSize)
+ EIGEN_DEBUG_VAR(PacketSize)
+ EIGEN_DEBUG_VAR(MightVectorize)
+ EIGEN_DEBUG_VAR(MayLinearVectorize)
+ EIGEN_DEBUG_VAR(MaySliceVectorize)
+ EIGEN_DEBUG_VAR(Traversal)
+ EIGEN_DEBUG_VAR(UnrollingLimit)
+ EIGEN_DEBUG_VAR(Unrolling)
+ std::cerr << std::endl;
+ }
+#endif
+};
+template<typename Func, typename Derived, int Start, int Length>
+struct redux_novec_unroller
+{
+ enum {
+ HalfLength = Length/2
+ };
+ typedef typename Derived::Scalar Scalar;
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
+ {
+ return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
+ redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
+ }
+};
+template<typename Func, typename Derived, int Start>
+struct redux_novec_unroller<Func, Derived, Start, 1>
+{
+ enum {
+ outer = Start / Derived::InnerSizeAtCompileTime,
+ inner = Start % Derived::InnerSizeAtCompileTime
+ };
+ typedef typename Derived::Scalar Scalar;
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
+ {
+ return mat.coeffByOuterInner(outer, inner);
+ }
+};
+template<typename Func, typename Derived, int Start>
+struct redux_novec_unroller<Func, Derived, Start, 0>
+{
+ typedef typename Derived::Scalar Scalar;
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
+};
+template<typename Func, typename Derived, int Start, int Length>
+struct redux_vec_unroller
+{
+ enum {
+ PacketSize = redux_traits<Func, Derived>::PacketSize,
+ HalfLength = Length/2
+ };
+ typedef typename Derived::Scalar Scalar;
+ typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
+ static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
+ {
+ return func.packetOp(
+ redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
+ redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
+ }
+};
+template<typename Func, typename Derived, int Start>
+struct redux_vec_unroller<Func, Derived, Start, 1>
+{
+ enum {
+ index = Start * redux_traits<Func, Derived>::PacketSize,
+ outer = index / int(Derived::InnerSizeAtCompileTime),
+ inner = index % int(Derived::InnerSizeAtCompileTime),
+ alignment = Derived::Alignment
+ };
+ typedef typename Derived::Scalar Scalar;
+ typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
+ static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
+ {
+ return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner);
+ }
+};
+template<typename Func, typename Derived,
+ int Traversal = redux_traits<Func, Derived>::Traversal,
+ int Unrolling = redux_traits<Func, Derived>::Unrolling
+>
+struct redux_impl;
+template<typename Func, typename Derived>
+struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
+{
+ typedef typename Derived::Scalar Scalar;
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
+ {
+ eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
+ Scalar res;
+ res = mat.coeffByOuterInner(0, 0);
+ for(Index i = 1; i < mat.innerSize(); ++i)
+ res = func(res, mat.coeffByOuterInner(0, i));
+ for(Index i = 1; i < mat.outerSize(); ++i)
+ for(Index j = 0; j < mat.innerSize(); ++j)
+ res = func(res, mat.coeffByOuterInner(i, j));
+ return res;
+ }
+};
+template<typename Func, typename Derived>
+struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
+ : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
+{};
+template<typename Func, typename Derived>
+struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
+{
+ typedef typename Derived::Scalar Scalar;
+ typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
+ static Scalar run(const Derived &mat, const Func& func)
+ {
+ const Index size = mat.size();
+ const Index packetSize = redux_traits<Func, Derived>::PacketSize;
+ const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
+ enum {
+ alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
+ alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
+ };
+ const Index alignedStart = internal::first_default_aligned(mat.nestedExpression());
+ const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
+ const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
+ const Index alignedEnd2 = alignedStart + alignedSize2;
+ const Index alignedEnd = alignedStart + alignedSize;
+ Scalar res;
+ if(alignedSize)
+ {
+ PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart);
+ if(alignedSize>packetSize)
+ {
+ PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize);
+ for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
+ {
+ packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index));
+ packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize));
+ }
+ packet_res0 = func.packetOp(packet_res0,packet_res1);
+ if(alignedEnd>alignedEnd2)
+ packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2));
+ }
+ res = func.predux(packet_res0);
+ for(Index index = 0; index < alignedStart; ++index)
+ res = func(res,mat.coeff(index));
+ for(Index index = alignedEnd; index < size; ++index)
+ res = func(res,mat.coeff(index));
+ }
+ else
+ {
+ res = mat.coeff(0);
+ for(Index index = 1; index < size; ++index)
+ res = func(res,mat.coeff(index));
+ }
+ return res;
+ }
+};
+template<typename Func, typename Derived, int Unrolling>
+struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
+{
+ typedef typename Derived::Scalar Scalar;
+ typedef typename redux_traits<Func, Derived>::PacketType PacketType;
+ EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
+ {
+ eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
+ const Index innerSize = mat.innerSize();
+ const Index outerSize = mat.outerSize();
+ enum {
+ packetSize = redux_traits<Func, Derived>::PacketSize
+ };
+ const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
+ Scalar res;
+ if(packetedInnerSize)
+ {
+ PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0);
+ for(Index j=0; j<outerSize; ++j)
+ for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
+ packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i));
+ res = func.predux(packet_res);
+ for(Index j=0; j<outerSize; ++j)
+ for(Index i=packetedInnerSize; i<innerSize; ++i)
+ res = func(res, mat.coeffByOuterInner(j,i));
+ }
+ else
+ {
+ res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
+ }
+ return res;
+ }
+};
+template<typename Func, typename Derived>
+struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
+{
+ typedef typename Derived::Scalar Scalar;
+ typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
+ enum {
+ PacketSize = redux_traits<Func, Derived>::PacketSize,
+ Size = Derived::SizeAtCompileTime,
+ VectorizedSize = (Size / PacketSize) * PacketSize
+ };
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
+ {
+ eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
+ if (VectorizedSize > 0) {
+ Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
+ if (VectorizedSize != Size)
+ res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
+ return res;
+ }
+ else {
+ return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func);
+ }
+ }
+};
+template<typename _XprType>
+class redux_evaluator
+{
+public:
+ typedef _XprType XprType;
+ EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ typedef typename XprType::PacketScalar PacketScalar;
+ typedef typename XprType::PacketReturnType PacketReturnType;
+ enum {
+ MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
+ Flags = evaluator<XprType>::Flags & ~DirectAccessBit,
+ IsRowMajor = XprType::IsRowMajor,
+ SizeAtCompileTime = XprType::SizeAtCompileTime,
+ InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
+ CoeffReadCost = evaluator<XprType>::CoeffReadCost,
+ Alignment = evaluator<XprType>::Alignment
+ };
+ EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
+ EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
+ EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
+ EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); }
+ EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); }
+ EIGEN_DEVICE_FUNC
+ CoeffReturnType coeff(Index row, Index col) const
+ { return m_evaluator.coeff(row, col); }
+ EIGEN_DEVICE_FUNC
+ CoeffReturnType coeff(Index index) const
+ { return m_evaluator.coeff(index); }
+ template<int LoadMode, typename PacketType>
+ PacketType packet(Index row, Index col) const
+ { return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
+ template<int LoadMode, typename PacketType>
+ PacketType packet(Index index) const
+ { return m_evaluator.template packet<LoadMode,PacketType>(index); }
+ EIGEN_DEVICE_FUNC
+ CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
+ { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
+ template<int LoadMode, typename PacketType>
+ PacketType packetByOuterInner(Index outer, Index inner) const
+ { return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
+ const XprType & nestedExpression() const { return m_xpr; }
+protected:
+ internal::evaluator<XprType> m_evaluator;
+ const XprType &m_xpr;
+};
+}
+template<typename Derived>
+template<typename Func>
+typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::redux(const Func& func) const
+{
+ eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
+ typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
+ ThisEvaluator thisEval(derived());
+ return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
+}
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::minCoeff() const
+{
+ return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
+}
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::maxCoeff() const
+{
+ return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
+}
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::sum() const
+{
+ if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
+ return Scalar(0);
+ return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
+}
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::mean() const
+{
+#ifdef __INTEL_COMPILER
+ #pragma warning push
+ #pragma warning ( disable : 2259 )
+#endif
+ return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
+#ifdef __INTEL_COMPILER
+ #pragma warning pop
+#endif
+}
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::prod() const
+{
+ if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
+ return Scalar(1);
+ return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
+}
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+MatrixBase<Derived>::trace() const
+{
+ return derived().diagonal().sum();
+}
+}
+#endif
+// end #include "src/Core/Redux.h"
+// #include "src/Core/GeneralProduct.h"
+#ifndef EIGEN_GENERAL_PRODUCT_H
+#define EIGEN_GENERAL_PRODUCT_H
+namespace Eigen {
+enum {
+ Large = 2,
+ Small = 3
+};
+namespace internal {
+template<int Rows, int Cols, int Depth> struct product_type_selector;
+template<int Size, int MaxSize> struct product_size_category
+{
+ enum { is_large = MaxSize == Dynamic ||
+ Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
+ (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
+ value = is_large ? Large
+ : Size == 1 ? 1
+ : Small
+ };
+};
+template<typename Lhs, typename Rhs> struct product_type
+{
+ typedef typename remove_all<Lhs>::type _Lhs;
+ typedef typename remove_all<Rhs>::type _Rhs;
+ enum {
+ MaxRows = traits<_Lhs>::MaxRowsAtCompileTime,
+ Rows = traits<_Lhs>::RowsAtCompileTime,
+ MaxCols = traits<_Rhs>::MaxColsAtCompileTime,
+ Cols = traits<_Rhs>::ColsAtCompileTime,
+ MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime,
+ traits<_Rhs>::MaxRowsAtCompileTime),
+ Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime,
+ traits<_Rhs>::RowsAtCompileTime)
+ };
+private:
+ enum {
+ rows_select = product_size_category<Rows,MaxRows>::value,
+ cols_select = product_size_category<Cols,MaxCols>::value,
+ depth_select = product_size_category<Depth,MaxDepth>::value
+ };
+ typedef product_type_selector<rows_select, cols_select, depth_select> selector;
+public:
+ enum {
+ value = selector::ret,
+ ret = selector::ret
+ };
+#ifdef EIGEN_DEBUG_PRODUCT
+ static void debug()
+ {
+ EIGEN_DEBUG_VAR(Rows);
+ EIGEN_DEBUG_VAR(Cols);
+ EIGEN_DEBUG_VAR(Depth);
+ EIGEN_DEBUG_VAR(rows_select);
+ EIGEN_DEBUG_VAR(cols_select);
+ EIGEN_DEBUG_VAR(depth_select);
+ EIGEN_DEBUG_VAR(value);
+ }
+#endif
+};
+template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
+template<int M> struct product_type_selector<M, 1, 1> { enum { ret = LazyCoeffBasedProductMode }; };
+template<int N> struct product_type_selector<1, N, 1> { enum { ret = LazyCoeffBasedProductMode }; };
+template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
+template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
+template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
+template<> struct product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
+template<> struct product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
+template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
+template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
+template<> struct product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
+template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
+template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
+template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
+template<> struct product_type_selector<Large,Small,Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Small,Large,Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
+}
+namespace internal {
+template<int Side, int StorageOrder, bool BlasCompatible>
+struct gemv_dense_selector;
+}
+namespace internal {
+template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
+template<typename Scalar,int Size,int MaxSize>
+struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
+{
+ EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
+};
+template<typename Scalar,int Size>
+struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
+{
+ EIGEN_STRONG_INLINE Scalar* data() { return 0; }
+};
+template<typename Scalar,int Size,int MaxSize>
+struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
+{
+ enum {
+ ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
+ PacketSize = internal::packet_traits<Scalar>::size
+ };
+ #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
+ internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0,EIGEN_PLAIN_ENUM_MIN(AlignedMax,PacketSize)> m_data;
+ EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
+ #else
+ internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data;
+ EIGEN_STRONG_INLINE Scalar* data() {
+ return ForceAlignment
+ ? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
+ : m_data.array;
+ }
+ #endif
+};
+template<int StorageOrder, bool BlasCompatible>
+struct gemv_dense_selector<OnTheLeft,StorageOrder,BlasCompatible>
+{
+ template<typename Lhs, typename Rhs, typename Dest>
+ static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+ {
+ Transpose<Dest> destT(dest);
+ enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
+ gemv_dense_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
+ ::run(rhs.transpose(), lhs.transpose(), destT, alpha);
+ }
+};
+template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
+{
+ template<typename Lhs, typename Rhs, typename Dest>
+ static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+ {
+ typedef typename Lhs::Scalar LhsScalar;
+ typedef typename Rhs::Scalar RhsScalar;
+ typedef typename Dest::Scalar ResScalar;
+ typedef typename Dest::RealScalar RealScalar;
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+ typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
+ ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
+ ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
+ ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
+ * RhsBlasTraits::extractScalarFactor(rhs);
+ typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;
+ enum {
+ EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
+ ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
+ MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
+ };
+ typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
+ typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
+ RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
+ if(!MightCannotUseDest)
+ {
+ general_matrix_vector_product
+ <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
+ actualLhs.rows(), actualLhs.cols(),
+ LhsMapper(actualLhs.data(), actualLhs.outerStride()),
+ RhsMapper(actualRhs.data(), actualRhs.innerStride()),
+ dest.data(), 1,
+ compatibleAlpha);
+ }
+ else
+ {
+ gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
+ const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
+ const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
+ ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
+ evalToDest ? dest.data() : static_dest.data());
+ if(!evalToDest)
+ {
+ #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ Index size = dest.size();
+ EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ #endif
+ if(!alphaIsCompatible)
+ {
+ MappedDest(actualDestPtr, dest.size()).setZero();
+ compatibleAlpha = RhsScalar(1);
+ }
+ else
+ MappedDest(actualDestPtr, dest.size()) = dest;
+ }
+ general_matrix_vector_product
+ <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
+ actualLhs.rows(), actualLhs.cols(),
+ LhsMapper(actualLhs.data(), actualLhs.outerStride()),
+ RhsMapper(actualRhs.data(), actualRhs.innerStride()),
+ actualDestPtr, 1,
+ compatibleAlpha);
+ if (!evalToDest)
+ {
+ if(!alphaIsCompatible)
+ dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
+ else
+ dest = MappedDest(actualDestPtr, dest.size());
+ }
+ }
+ }
+};
+template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>
+{
+ template<typename Lhs, typename Rhs, typename Dest>
+ static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+ {
+ typedef typename Lhs::Scalar LhsScalar;
+ typedef typename Rhs::Scalar RhsScalar;
+ typedef typename Dest::Scalar ResScalar;
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+ typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+ typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
+ typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
+ ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
+ * RhsBlasTraits::extractScalarFactor(rhs);
+ enum {
+ DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
+ };
+ gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
+ ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
+ DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
+ if(!DirectlyUseRhs)
+ {
+ #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ Index size = actualRhs.size();
+ EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ #endif
+ Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+ }
+ typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;
+ typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;
+ general_matrix_vector_product
+ <Index,LhsScalar,LhsMapper,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
+ actualLhs.rows(), actualLhs.cols(),
+ LhsMapper(actualLhs.data(), actualLhs.outerStride()),
+ RhsMapper(actualRhsPtr, 1),
+ dest.data(), dest.col(0).innerStride(),
+ actualAlpha);
+ }
+};
+template<> struct gemv_dense_selector<OnTheRight,ColMajor,false>
+{
+ template<typename Lhs, typename Rhs, typename Dest>
+ static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+ {
+ EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
+ typename nested_eval<Rhs,1>::type actual_rhs(rhs);
+ const Index size = rhs.rows();
+ for(Index k=0; k<size; ++k)
+ dest += (alpha*actual_rhs.coeff(k)) * lhs.col(k);
+ }
+};
+template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
+{
+ template<typename Lhs, typename Rhs, typename Dest>
+ static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+ {
+ EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
+ typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs);
+ const Index rows = dest.rows();
+ for(Index i=0; i<rows; ++i)
+ dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(actual_rhs.transpose())).sum();
+ }
+};
+}
+#ifndef __CUDACC__
+template<typename Derived>
+template<typename OtherDerived>
+inline const Product<Derived, OtherDerived>
+MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
+{
+ enum {
+ ProductIsValid = Derived::ColsAtCompileTime==Dynamic
+ || OtherDerived::RowsAtCompileTime==Dynamic
+ || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
+ AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
+ SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
+ };
+ EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
+ INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
+ EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
+ INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
+ EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
+#ifdef EIGEN_DEBUG_PRODUCT
+ internal::product_type<Derived,OtherDerived>::debug();
+#endif
+ return Product<Derived, OtherDerived>(derived(), other.derived());
+}
+#endif
+template<typename Derived>
+template<typename OtherDerived>
+const Product<Derived,OtherDerived,LazyProduct>
+MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
+{
+ enum {
+ ProductIsValid = Derived::ColsAtCompileTime==Dynamic
+ || OtherDerived::RowsAtCompileTime==Dynamic
+ || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
+ AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
+ SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
+ };
+ EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
+ INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
+ EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
+ INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
+ EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
+ return Product<Derived,OtherDerived,LazyProduct>(derived(), other.derived());
+}
+}
+#endif
+// end #include "src/Core/GeneralProduct.h"
+// #include "src/Core/products/GeneralBlockPanelKernel.h"
+#ifndef EIGEN_GENERAL_BLOCK_PANEL_H
+#define EIGEN_GENERAL_BLOCK_PANEL_H
+namespace Eigen {
+namespace internal {
+template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
+class gebp_traits;
+inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b)
+{
+ return a<=0 ? b : a;
+}
+#if EIGEN_ARCH_i386_OR_x86_64
+const std::ptrdiff_t defaultL1CacheSize = 32*1024;
+const std::ptrdiff_t defaultL2CacheSize = 256*1024;
+const std::ptrdiff_t defaultL3CacheSize = 2*1024*1024;
+#else
+const std::ptrdiff_t defaultL1CacheSize = 16*1024;
+const std::ptrdiff_t defaultL2CacheSize = 512*1024;
+const std::ptrdiff_t defaultL3CacheSize = 512*1024;
+#endif
+struct CacheSizes {
+ CacheSizes(): m_l1(-1),m_l2(-1),m_l3(-1) {
+ int l1CacheSize, l2CacheSize, l3CacheSize;
+ queryCacheSizes(l1CacheSize, l2CacheSize, l3CacheSize);
+ m_l1 = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize);
+ m_l2 = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize);
+ m_l3 = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize);
+ }
+ std::ptrdiff_t m_l1;
+ std::ptrdiff_t m_l2;
+ std::ptrdiff_t m_l3;
+};
+inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3)
+{
+ static CacheSizes m_cacheSizes;
+ if(action==SetAction)
+ {
+ eigen_internal_assert(l1!=0 && l2!=0);
+ m_cacheSizes.m_l1 = *l1;
+ m_cacheSizes.m_l2 = *l2;
+ m_cacheSizes.m_l3 = *l3;
+ }
+ else if(action==GetAction)
+ {
+ eigen_internal_assert(l1!=0 && l2!=0);
+ *l1 = m_cacheSizes.m_l1;
+ *l2 = m_cacheSizes.m_l2;
+ *l3 = m_cacheSizes.m_l3;
+ }
+ else
+ {
+ eigen_internal_assert(false);
+ }
+}
+template<typename LhsScalar, typename RhsScalar, int KcFactor, typename Index>
+void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1)
+{
+ typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+ std::ptrdiff_t l1, l2, l3;
+ manage_caching_sizes(GetAction, &l1, &l2, &l3);
+ if (num_threads > 1) {
+ typedef typename Traits::ResScalar ResScalar;
+ enum {
+ kdiv = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
+ ksub = Traits::mr * Traits::nr * sizeof(ResScalar),
+ kr = 8,
+ mr = Traits::mr,
+ nr = Traits::nr
+ };
+ const Index k_cache = (numext::mini<Index>)((l1-ksub)/kdiv, 320);
+ if (k_cache < k) {
+ k = k_cache - (k_cache % kr);
+ eigen_internal_assert(k > 0);
+ }
+ const Index n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k);
+ const Index n_per_thread = numext::div_ceil(n, num_threads);
+ if (n_cache <= n_per_thread) {
+ eigen_internal_assert(n_cache >= static_cast<Index>(nr));
+ n = n_cache - (n_cache % nr);
+ eigen_internal_assert(n > 0);
+ } else {
+ n = (numext::mini<Index>)(n, (n_per_thread + nr - 1) - ((n_per_thread + nr - 1) % nr));
+ }
+ if (l3 > l2) {
+ const Index m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads);
+ const Index m_per_thread = numext::div_ceil(m, num_threads);
+ if(m_cache < m_per_thread && m_cache >= static_cast<Index>(mr)) {
+ m = m_cache - (m_cache % mr);
+ eigen_internal_assert(m > 0);
+ } else {
+ m = (numext::mini<Index>)(m, (m_per_thread + mr - 1) - ((m_per_thread + mr - 1) % mr));
+ }
+ }
+ }
+ else {
+#ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
+ l1 = 9*1024;
+ l2 = 32*1024;
+ l3 = 512*1024;
+#endif
+ if((numext::maxi)(k,(numext::maxi)(m,n))<48)
+ return;
+ typedef typename Traits::ResScalar ResScalar;
+ enum {
+ k_peeling = 8,
+ k_div = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
+ k_sub = Traits::mr * Traits::nr * sizeof(ResScalar)
+ };
+ const Index max_kc = numext::maxi<Index>(((l1-k_sub)/k_div) & (~(k_peeling-1)),1);
+ const Index old_k = k;
+ if(k>max_kc)
+ {
+ k = (k%max_kc)==0 ? max_kc
+ : max_kc - k_peeling * ((max_kc-1-(k%max_kc))/(k_peeling*(k/max_kc+1)));
+ eigen_internal_assert(((old_k/k) == (old_k/max_kc)) && "the number of sweeps has to remain the same");
+ }
+ #ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
+ const Index actual_l2 = l3;
+ #else
+ const Index actual_l2 = 1572864;
+ #endif
+ Index max_nc;
+ const Index lhs_bytes = m * k * sizeof(LhsScalar);
+ const Index remaining_l1 = l1- k_sub - lhs_bytes;
+ if(remaining_l1 >= Index(Traits::nr*sizeof(RhsScalar))*k)
+ {
+ max_nc = remaining_l1 / (k*sizeof(RhsScalar));
+ }
+ else
+ {
+ max_nc = (3*actual_l2)/(2*2*max_kc*sizeof(RhsScalar));
+ }
+ Index nc = numext::mini<Index>(actual_l2/(2*k*sizeof(RhsScalar)), max_nc) & (~(Traits::nr-1));
+ if(n>nc)
+ {
+ n = (n%nc)==0 ? nc
+ : (nc - Traits::nr * ((nc-(n%nc))/(Traits::nr*(n/nc+1))));
+ }
+ else if(old_k==k)
+ {
+ Index problem_size = k*n*sizeof(LhsScalar);
+ Index actual_lm = actual_l2;
+ Index max_mc = m;
+ if(problem_size<=1024)
+ {
+ actual_lm = l1;
+ }
+ else if(l3!=0 && problem_size<=32768)
+ {
+ actual_lm = l2;
+ max_mc = (numext::mini<Index>)(576,max_mc);
+ }
+ Index mc = (numext::mini<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc);
+ if (mc > Traits::mr) mc -= mc % Traits::mr;
+ else if (mc==0) return;
+ m = (m%mc)==0 ? mc
+ : (mc - Traits::mr * ((mc-(m%mc))/(Traits::mr*(m/mc+1))));
+ }
+ }
+}
+template <typename Index>
+inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n)
+{
+#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
+ if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
+ k = numext::mini<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
+ m = numext::mini<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
+ n = numext::mini<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
+ return true;
+ }
+#else
+ EIGEN_UNUSED_VARIABLE(k)
+ EIGEN_UNUSED_VARIABLE(m)
+ EIGEN_UNUSED_VARIABLE(n)
+#endif
+ return false;
+}
+template<typename LhsScalar, typename RhsScalar, int KcFactor, typename Index>
+void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
+{
+ if (!useSpecificBlockingSizes(k, m, n)) {
+ evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor, Index>(k, m, n, num_threads);
+ }
+}
+template<typename LhsScalar, typename RhsScalar, typename Index>
+inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
+{
+ computeProductBlockingSizes<LhsScalar,RhsScalar,1,Index>(k, m, n, num_threads);
+}
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+ #define CJMADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
+#else
+ template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
+ EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& )
+ {
+ c = cj.pmadd(a,b,c);
+ }
+ };
+ template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
+ EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t)
+ {
+ t = b; t = cj.pmul(a,t); c = padd(c,t);
+ }
+ };
+ template<typename CJ, typename A, typename B, typename C, typename T>
+ EIGEN_STRONG_INLINE void gebp_madd(const CJ& cj, A& a, B& b, C& c, T& t)
+ {
+ gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t);
+ }
+ #define CJMADD(CJ,A,B,C,T) gebp_madd(CJ,A,B,C,T);
+#endif
+template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs, bool _ConjRhs>
+class gebp_traits
+{
+public:
+ typedef _LhsScalar LhsScalar;
+ typedef _RhsScalar RhsScalar;
+ typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+ enum {
+ ConjLhs = _ConjLhs,
+ ConjRhs = _ConjRhs,
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable,
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
+ NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
+ nr = 4,
+ default_mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
+#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
+ mr = Vectorizable ? 3*LhsPacketSize : default_mr,
+#else
+ mr = default_mr,
+#endif
+ LhsProgress = LhsPacketSize,
+ RhsProgress = 1
+ };
+ typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+ typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+ typedef typename packet_traits<ResScalar>::type _ResPacket;
+ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+ typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+ typedef ResPacket AccPacket;
+ EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
+ {
+ p = pset1<ResPacket>(ResScalar(0));
+ }
+ EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
+ {
+ pbroadcast4(b, b0, b1, b2, b3);
+ }
+ template<typename RhsPacketType>
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketType& dest) const
+ {
+ dest = pset1<RhsPacketType>(*b);
+ }
+ EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
+ {
+ dest = ploadquad<RhsPacket>(b);
+ }
+ template<typename LhsPacketType>
+ EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacketType& dest) const
+ {
+ dest = pload<LhsPacketType>(a);
+ }
+ template<typename LhsPacketType>
+ EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacketType& dest) const
+ {
+ dest = ploadu<LhsPacketType>(a);
+ }
+ template<typename LhsPacketType, typename RhsPacketType, typename AccPacketType>
+ EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, AccPacketType& tmp) const
+ {
+ conj_helper<LhsPacketType,RhsPacketType,ConjLhs,ConjRhs> cj;
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+ EIGEN_UNUSED_VARIABLE(tmp);
+ c = cj.pmadd(a,b,c);
+#else
+ tmp = b; tmp = cj.pmul(a,tmp); c = padd(c,tmp);
+#endif
+ }
+ EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
+ {
+ r = pmadd(c,alpha,r);
+ }
+ template<typename ResPacketHalf>
+ EIGEN_STRONG_INLINE void acc(const ResPacketHalf& c, const ResPacketHalf& alpha, ResPacketHalf& r) const
+ {
+ r = pmadd(c,alpha,r);
+ }
+};
+template<typename RealScalar, bool _ConjLhs>
+class gebp_traits<std::complex<RealScalar>, RealScalar, _ConjLhs, false>
+{
+public:
+ typedef std::complex<RealScalar> LhsScalar;
+ typedef RealScalar RhsScalar;
+ typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+ enum {
+ ConjLhs = _ConjLhs,
+ ConjRhs = false,
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable,
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
+ NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
+ nr = 4,
+#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
+ mr = 3*LhsPacketSize,
+#else
+ mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
+#endif
+ LhsProgress = LhsPacketSize,
+ RhsProgress = 1
+ };
+ typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+ typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+ typedef typename packet_traits<ResScalar>::type _ResPacket;
+ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+ typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+ typedef ResPacket AccPacket;
+ EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
+ {
+ p = pset1<ResPacket>(ResScalar(0));
+ }
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
+ {
+ dest = pset1<RhsPacket>(*b);
+ }
+ EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
+ {
+ dest = pset1<RhsPacket>(*b);
+ }
+ EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
+ {
+ dest = pload<LhsPacket>(a);
+ }
+ EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacket& dest) const
+ {
+ dest = ploadu<LhsPacket>(a);
+ }
+ EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
+ {
+ pbroadcast4(b, b0, b1, b2, b3);
+ }
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const
+ {
+ madd_impl(a, b, c, tmp, typename conditional<Vectorizable,true_type,false_type>::type());
+ }
+ EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
+ {
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+ EIGEN_UNUSED_VARIABLE(tmp);
+ c.v = pmadd(a.v,b,c.v);
+#else
+ tmp = b; tmp = pmul(a.v,tmp); c.v = padd(c.v,tmp);
+#endif
+ }
+ EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& , const false_type&) const
+ {
+ c += a * b;
+ }
+ EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
+ {
+ r = cj.pmadd(c,alpha,r);
+ }
+protected:
+ conj_helper<ResPacket,ResPacket,ConjLhs,false> cj;
+};
+template<typename Packet>
+struct DoublePacket
+{
+ Packet first;
+ Packet second;
+};
+template<typename Packet>
+DoublePacket<Packet> padd(const DoublePacket<Packet> &a, const DoublePacket<Packet> &b)
+{
+ DoublePacket<Packet> res;
+ res.first = padd(a.first, b.first);
+ res.second = padd(a.second,b.second);
+ return res;
+}
+template<typename Packet>
+const DoublePacket<Packet>& predux_downto4(const DoublePacket<Packet> &a)
+{
+ return a;
+}
+template<typename Packet> struct unpacket_traits<DoublePacket<Packet> > { typedef DoublePacket<Packet> half; };
+template<typename RealScalar, bool _ConjLhs, bool _ConjRhs>
+class gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, _ConjLhs, _ConjRhs >
+{
+public:
+ typedef std::complex<RealScalar> Scalar;
+ typedef std::complex<RealScalar> LhsScalar;
+ typedef std::complex<RealScalar> RhsScalar;
+ typedef std::complex<RealScalar> ResScalar;
+ enum {
+ ConjLhs = _ConjLhs,
+ ConjRhs = _ConjRhs,
+ Vectorizable = packet_traits<RealScalar>::Vectorizable
+ && packet_traits<Scalar>::Vectorizable,
+ RealPacketSize = Vectorizable ? packet_traits<RealScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ nr = 4,
+ mr = ResPacketSize,
+ LhsProgress = ResPacketSize,
+ RhsProgress = 1
+ };
+ typedef typename packet_traits<RealScalar>::type RealPacket;
+ typedef typename packet_traits<Scalar>::type ScalarPacket;
+ typedef DoublePacket<RealPacket> DoublePacketType;
+ typedef typename conditional<Vectorizable,RealPacket, Scalar>::type LhsPacket;
+ typedef typename conditional<Vectorizable,DoublePacketType,Scalar>::type RhsPacket;
+ typedef typename conditional<Vectorizable,ScalarPacket,Scalar>::type ResPacket;
+ typedef typename conditional<Vectorizable,DoublePacketType,Scalar>::type AccPacket;
+ EIGEN_STRONG_INLINE void initAcc(Scalar& p) { p = Scalar(0); }
+ EIGEN_STRONG_INLINE void initAcc(DoublePacketType& p)
+ {
+ p.first = pset1<RealPacket>(RealScalar(0));
+ p.second = pset1<RealPacket>(RealScalar(0));
+ }
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, ResPacket& dest) const
+ {
+ dest = pset1<ResPacket>(*b);
+ }
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, DoublePacketType& dest) const
+ {
+ dest.first = pset1<RealPacket>(real(*b));
+ dest.second = pset1<RealPacket>(imag(*b));
+ }
+ EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, ResPacket& dest) const
+ {
+ loadRhs(b,dest);
+ }
+ EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, DoublePacketType& dest) const
+ {
+ eigen_internal_assert(unpacket_traits<ScalarPacket>::size<=4);
+ loadRhs(b,dest);
+ }
+ EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
+ {
+ loadRhs(b+0, b0);
+ loadRhs(b+1, b1);
+ loadRhs(b+2, b2);
+ loadRhs(b+3, b3);
+ }
+ EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, DoublePacketType& b0, DoublePacketType& b1)
+ {
+ loadRhs(b+0, b0);
+ loadRhs(b+1, b1);
+ }
+ EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsScalar& b0, RhsScalar& b1)
+ {
+ loadRhs(b+0, b0);
+ loadRhs(b+1, b1);
+ }
+ EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
+ {
+ dest = pload<LhsPacket>((const typename unpacket_traits<LhsPacket>::type*)(a));
+ }
+ EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacket& dest) const
+ {
+ dest = ploadu<LhsPacket>((const typename unpacket_traits<LhsPacket>::type*)(a));
+ }
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, DoublePacketType& c, RhsPacket& ) const
+ {
+ c.first = padd(pmul(a,b.first), c.first);
+ c.second = padd(pmul(a,b.second),c.second);
+ }
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, ResPacket& c, RhsPacket& ) const
+ {
+ c = cj.pmadd(a,b,c);
+ }
+ EIGEN_STRONG_INLINE void acc(const Scalar& c, const Scalar& alpha, Scalar& r) const { r += alpha * c; }
+ EIGEN_STRONG_INLINE void acc(const DoublePacketType& c, const ResPacket& alpha, ResPacket& r) const
+ {
+ ResPacket tmp;
+ if((!ConjLhs)&&(!ConjRhs))
+ {
+ tmp = pcplxflip(pconj(ResPacket(c.second)));
+ tmp = padd(ResPacket(c.first),tmp);
+ }
+ else if((!ConjLhs)&&(ConjRhs))
+ {
+ tmp = pconj(pcplxflip(ResPacket(c.second)));
+ tmp = padd(ResPacket(c.first),tmp);
+ }
+ else if((ConjLhs)&&(!ConjRhs))
+ {
+ tmp = pcplxflip(ResPacket(c.second));
+ tmp = padd(pconj(ResPacket(c.first)),tmp);
+ }
+ else if((ConjLhs)&&(ConjRhs))
+ {
+ tmp = pcplxflip(ResPacket(c.second));
+ tmp = psub(pconj(ResPacket(c.first)),tmp);
+ }
+ r = pmadd(tmp,alpha,r);
+ }
+protected:
+ conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;
+};
+template<typename RealScalar, bool _ConjRhs>
+class gebp_traits<RealScalar, std::complex<RealScalar>, false, _ConjRhs >
+{
+public:
+ typedef std::complex<RealScalar> Scalar;
+ typedef RealScalar LhsScalar;
+ typedef Scalar RhsScalar;
+ typedef Scalar ResScalar;
+ enum {
+ ConjLhs = false,
+ ConjRhs = _ConjRhs,
+ Vectorizable = packet_traits<RealScalar>::Vectorizable
+ && packet_traits<Scalar>::Vectorizable,
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
+ NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
+ nr = 4,
+ mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*ResPacketSize,
+ LhsProgress = ResPacketSize,
+ RhsProgress = 1
+ };
+ typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+ typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+ typedef typename packet_traits<ResScalar>::type _ResPacket;
+ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+ typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+ typedef ResPacket AccPacket;
+ EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
+ {
+ p = pset1<ResPacket>(ResScalar(0));
+ }
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
+ {
+ dest = pset1<RhsPacket>(*b);
+ }
+ void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
+ {
+ pbroadcast4(b, b0, b1, b2, b3);
+ }
+ EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
+ {
+ dest = ploaddup<LhsPacket>(a);
+ }
+ EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
+ {
+ eigen_internal_assert(unpacket_traits<RhsPacket>::size<=4);
+ loadRhs(b,dest);
+ }
+ EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacket& dest) const
+ {
+ dest = ploaddup<LhsPacket>(a);
+ }
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const
+ {
+ madd_impl(a, b, c, tmp, typename conditional<Vectorizable,true_type,false_type>::type());
+ }
+ EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
+ {
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+ EIGEN_UNUSED_VARIABLE(tmp);
+ c.v = pmadd(a,b.v,c.v);
+#else
+ tmp = b; tmp.v = pmul(a,tmp.v); c = padd(c,tmp);
+#endif
+ }
+ EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& , const false_type&) const
+ {
+ c += a * b;
+ }
+ EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
+ {
+ r = cj.pmadd(alpha,c,r);
+ }
+protected:
+ conj_helper<ResPacket,ResPacket,false,ConjRhs> cj;
+};
+template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
+struct gebp_kernel
+{
+ typedef gebp_traits<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> Traits;
+ typedef typename Traits::ResScalar ResScalar;
+ typedef typename Traits::LhsPacket LhsPacket;
+ typedef typename Traits::RhsPacket RhsPacket;
+ typedef typename Traits::ResPacket ResPacket;
+ typedef typename Traits::AccPacket AccPacket;
+ typedef gebp_traits<RhsScalar,LhsScalar,ConjugateRhs,ConjugateLhs> SwappedTraits;
+ typedef typename SwappedTraits::ResScalar SResScalar;
+ typedef typename SwappedTraits::LhsPacket SLhsPacket;
+ typedef typename SwappedTraits::RhsPacket SRhsPacket;
+ typedef typename SwappedTraits::ResPacket SResPacket;
+ typedef typename SwappedTraits::AccPacket SAccPacket;
+ typedef typename DataMapper::LinearMapper LinearMapper;
+ enum {
+ Vectorizable = Traits::Vectorizable,
+ LhsProgress = Traits::LhsProgress,
+ RhsProgress = Traits::RhsProgress,
+ ResPacketSize = Traits::ResPacketSize
+ };
+ EIGEN_DONT_INLINE
+ void operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,
+ Index rows, Index depth, Index cols, ResScalar alpha,
+ Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0);
+};
+template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_DONT_INLINE
+void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,ConjugateRhs>
+ ::operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,
+ Index rows, Index depth, Index cols, ResScalar alpha,
+ Index strideA, Index strideB, Index offsetA, Index offsetB)
+ {
+ Traits traits;
+ SwappedTraits straits;
+ if(strideA==-1) strideA = depth;
+ if(strideB==-1) strideB = depth;
+ conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
+ Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
+ const Index peeled_mc3 = mr>=3*Traits::LhsProgress ? (rows/(3*LhsProgress))*(3*LhsProgress) : 0;
+ const Index peeled_mc2 = mr>=2*Traits::LhsProgress ? peeled_mc3+((rows-peeled_mc3)/(2*LhsProgress))*(2*LhsProgress) : 0;
+ const Index peeled_mc1 = mr>=1*Traits::LhsProgress ? (rows/(1*LhsProgress))*(1*LhsProgress) : 0;
+ enum { pk = 8 };
+ const Index peeled_kc = depth & ~(pk-1);
+ const Index prefetch_res_offset = 32/sizeof(ResScalar);
+ if(mr>=3*Traits::LhsProgress)
+ {
+ const Index l1 = defaultL1CacheSize;
+ const Index actual_panel_rows = (3*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) ));
+ for(Index i1=0; i1<peeled_mc3; i1+=actual_panel_rows)
+ {
+ const Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc3);
+ for(Index j2=0; j2<packet_cols4; j2+=nr)
+ {
+ for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
+ {
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*LhsProgress)];
+ prefetch(&blA[0]);
+ AccPacket C0, C1, C2, C3,
+ C4, C5, C6, C7,
+ C8, C9, C10, C11;
+ traits.initAcc(C0); traits.initAcc(C1); traits.initAcc(C2); traits.initAcc(C3);
+ traits.initAcc(C4); traits.initAcc(C5); traits.initAcc(C6); traits.initAcc(C7);
+ traits.initAcc(C8); traits.initAcc(C9); traits.initAcc(C10); traits.initAcc(C11);
+ LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
+ LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
+ LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
+ LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
+ r0.prefetch(0);
+ r1.prefetch(0);
+ r2.prefetch(0);
+ r3.prefetch(0);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+ prefetch(&blB[0]);
+ LhsPacket A0, A1;
+ for(Index k=0; k<peeled_kc; k+=pk)
+ {
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX4");
+ RhsPacket B_0, T0;
+ LhsPacket A2;
+#define EIGEN_GEBP_ONESTEP(K) \
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ internal::prefetch(blA+(3*K+16)*LhsProgress); \
+ if (EIGEN_ARCH_ARM) { internal::prefetch(blB+(4*K+16)*RhsProgress); } \
+ traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
+ traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
+ traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
+ traits.loadRhs(blB + (0+4*K)*Traits::RhsProgress, B_0); \
+ traits.madd(A0, B_0, C0, T0); \
+ traits.madd(A1, B_0, C4, T0); \
+ traits.madd(A2, B_0, C8, B_0); \
+ traits.loadRhs(blB + (1+4*K)*Traits::RhsProgress, B_0); \
+ traits.madd(A0, B_0, C1, T0); \
+ traits.madd(A1, B_0, C5, T0); \
+ traits.madd(A2, B_0, C9, B_0); \
+ traits.loadRhs(blB + (2+4*K)*Traits::RhsProgress, B_0); \
+ traits.madd(A0, B_0, C2, T0); \
+ traits.madd(A1, B_0, C6, T0); \
+ traits.madd(A2, B_0, C10, B_0); \
+ traits.loadRhs(blB + (3+4*K)*Traits::RhsProgress, B_0); \
+ traits.madd(A0, B_0, C3 , T0); \
+ traits.madd(A1, B_0, C7, T0); \
+ traits.madd(A2, B_0, C11, B_0); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX4"); \
+ } while(false)
+ internal::prefetch(blB);
+ EIGEN_GEBP_ONESTEP(0);
+ EIGEN_GEBP_ONESTEP(1);
+ EIGEN_GEBP_ONESTEP(2);
+ EIGEN_GEBP_ONESTEP(3);
+ EIGEN_GEBP_ONESTEP(4);
+ EIGEN_GEBP_ONESTEP(5);
+ EIGEN_GEBP_ONESTEP(6);
+ EIGEN_GEBP_ONESTEP(7);
+ blB += pk*4*RhsProgress;
+ blA += pk*3*Traits::LhsProgress;
+ EIGEN_ASM_COMMENT("end gebp micro kernel 3pX4");
+ }
+ for(Index k=peeled_kc; k<depth; k++)
+ {
+ RhsPacket B_0, T0;
+ LhsPacket A2;
+ EIGEN_GEBP_ONESTEP(0);
+ blB += 4*RhsProgress;
+ blA += 3*Traits::LhsProgress;
+ }
+#undef EIGEN_GEBP_ONESTEP
+ ResPacket R0, R1, R2;
+ ResPacket alphav = pset1<ResPacket>(alpha);
+ R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r0.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r0.loadPacket(2 * Traits::ResPacketSize);
+ traits.acc(C0, alphav, R0);
+ traits.acc(C4, alphav, R1);
+ traits.acc(C8, alphav, R2);
+ r0.storePacket(0 * Traits::ResPacketSize, R0);
+ r0.storePacket(1 * Traits::ResPacketSize, R1);
+ r0.storePacket(2 * Traits::ResPacketSize, R2);
+ R0 = r1.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r1.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r1.loadPacket(2 * Traits::ResPacketSize);
+ traits.acc(C1, alphav, R0);
+ traits.acc(C5, alphav, R1);
+ traits.acc(C9, alphav, R2);
+ r1.storePacket(0 * Traits::ResPacketSize, R0);
+ r1.storePacket(1 * Traits::ResPacketSize, R1);
+ r1.storePacket(2 * Traits::ResPacketSize, R2);
+ R0 = r2.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r2.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r2.loadPacket(2 * Traits::ResPacketSize);
+ traits.acc(C2, alphav, R0);
+ traits.acc(C6, alphav, R1);
+ traits.acc(C10, alphav, R2);
+ r2.storePacket(0 * Traits::ResPacketSize, R0);
+ r2.storePacket(1 * Traits::ResPacketSize, R1);
+ r2.storePacket(2 * Traits::ResPacketSize, R2);
+ R0 = r3.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r3.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r3.loadPacket(2 * Traits::ResPacketSize);
+ traits.acc(C3, alphav, R0);
+ traits.acc(C7, alphav, R1);
+ traits.acc(C11, alphav, R2);
+ r3.storePacket(0 * Traits::ResPacketSize, R0);
+ r3.storePacket(1 * Traits::ResPacketSize, R1);
+ r3.storePacket(2 * Traits::ResPacketSize, R2);
+ }
+ }
+ for(Index j2=packet_cols4; j2<cols; j2++)
+ {
+ for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
+ {
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*Traits::LhsProgress)];
+ prefetch(&blA[0]);
+ AccPacket C0, C4, C8;
+ traits.initAcc(C0);
+ traits.initAcc(C4);
+ traits.initAcc(C8);
+ LinearMapper r0 = res.getLinearMapper(i, j2);
+ r0.prefetch(0);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB];
+ LhsPacket A0, A1, A2;
+ for(Index k=0; k<peeled_kc; k+=pk)
+ {
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX1");
+ RhsPacket B_0;
+#define EIGEN_GEBGP_ONESTEP(K) \
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX1"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
+ traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
+ traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
+ traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
+ traits.madd(A0, B_0, C0, B_0); \
+ traits.madd(A1, B_0, C4, B_0); \
+ traits.madd(A2, B_0, C8, B_0); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX1"); \
+ } while(false)
+ EIGEN_GEBGP_ONESTEP(0);
+ EIGEN_GEBGP_ONESTEP(1);
+ EIGEN_GEBGP_ONESTEP(2);
+ EIGEN_GEBGP_ONESTEP(3);
+ EIGEN_GEBGP_ONESTEP(4);
+ EIGEN_GEBGP_ONESTEP(5);
+ EIGEN_GEBGP_ONESTEP(6);
+ EIGEN_GEBGP_ONESTEP(7);
+ blB += pk*RhsProgress;
+ blA += pk*3*Traits::LhsProgress;
+ EIGEN_ASM_COMMENT("end gebp micro kernel 3pX1");
+ }
+ for(Index k=peeled_kc; k<depth; k++)
+ {
+ RhsPacket B_0;
+ EIGEN_GEBGP_ONESTEP(0);
+ blB += RhsProgress;
+ blA += 3*Traits::LhsProgress;
+ }
+#undef EIGEN_GEBGP_ONESTEP
+ ResPacket R0, R1, R2;
+ ResPacket alphav = pset1<ResPacket>(alpha);
+ R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r0.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r0.loadPacket(2 * Traits::ResPacketSize);
+ traits.acc(C0, alphav, R0);
+ traits.acc(C4, alphav, R1);
+ traits.acc(C8, alphav, R2);
+ r0.storePacket(0 * Traits::ResPacketSize, R0);
+ r0.storePacket(1 * Traits::ResPacketSize, R1);
+ r0.storePacket(2 * Traits::ResPacketSize, R2);
+ }
+ }
+ }
+ }
+ if(mr>=2*Traits::LhsProgress)
+ {
+ const Index l1 = defaultL1CacheSize;
+ Index actual_panel_rows = (2*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) ));
+ for(Index i1=peeled_mc3; i1<peeled_mc2; i1+=actual_panel_rows)
+ {
+ Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc2);
+ for(Index j2=0; j2<packet_cols4; j2+=nr)
+ {
+ for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
+ {
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
+ prefetch(&blA[0]);
+ AccPacket C0, C1, C2, C3,
+ C4, C5, C6, C7;
+ traits.initAcc(C0); traits.initAcc(C1); traits.initAcc(C2); traits.initAcc(C3);
+ traits.initAcc(C4); traits.initAcc(C5); traits.initAcc(C6); traits.initAcc(C7);
+ LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
+ LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
+ LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
+ LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
+ r0.prefetch(prefetch_res_offset);
+ r1.prefetch(prefetch_res_offset);
+ r2.prefetch(prefetch_res_offset);
+ r3.prefetch(prefetch_res_offset);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+ prefetch(&blB[0]);
+ LhsPacket A0, A1;
+ for(Index k=0; k<peeled_kc; k+=pk)
+ {
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX4");
+ RhsPacket B_0, B1, B2, B3, T0;
+ #define EIGEN_GEBGP_ONESTEP(K) \
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX4"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \
+ traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \
+ traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \
+ traits.madd(A0, B_0, C0, T0); \
+ traits.madd(A1, B_0, C4, B_0); \
+ traits.madd(A0, B1, C1, T0); \
+ traits.madd(A1, B1, C5, B1); \
+ traits.madd(A0, B2, C2, T0); \
+ traits.madd(A1, B2, C6, B2); \
+ traits.madd(A0, B3, C3, T0); \
+ traits.madd(A1, B3, C7, B3); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX4"); \
+ } while(false)
+ internal::prefetch(blB+(48+0));
+ EIGEN_GEBGP_ONESTEP(0);
+ EIGEN_GEBGP_ONESTEP(1);
+ EIGEN_GEBGP_ONESTEP(2);
+ EIGEN_GEBGP_ONESTEP(3);
+ internal::prefetch(blB+(48+16));
+ EIGEN_GEBGP_ONESTEP(4);
+ EIGEN_GEBGP_ONESTEP(5);
+ EIGEN_GEBGP_ONESTEP(6);
+ EIGEN_GEBGP_ONESTEP(7);
+ blB += pk*4*RhsProgress;
+ blA += pk*(2*Traits::LhsProgress);
+ EIGEN_ASM_COMMENT("end gebp micro kernel 2pX4");
+ }
+ for(Index k=peeled_kc; k<depth; k++)
+ {
+ RhsPacket B_0, B1, B2, B3, T0;
+ EIGEN_GEBGP_ONESTEP(0);
+ blB += 4*RhsProgress;
+ blA += 2*Traits::LhsProgress;
+ }
+#undef EIGEN_GEBGP_ONESTEP
+ ResPacket R0, R1, R2, R3;
+ ResPacket alphav = pset1<ResPacket>(alpha);
+ R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r0.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r1.loadPacket(0 * Traits::ResPacketSize);
+ R3 = r1.loadPacket(1 * Traits::ResPacketSize);
+ traits.acc(C0, alphav, R0);
+ traits.acc(C4, alphav, R1);
+ traits.acc(C1, alphav, R2);
+ traits.acc(C5, alphav, R3);
+ r0.storePacket(0 * Traits::ResPacketSize, R0);
+ r0.storePacket(1 * Traits::ResPacketSize, R1);
+ r1.storePacket(0 * Traits::ResPacketSize, R2);
+ r1.storePacket(1 * Traits::ResPacketSize, R3);
+ R0 = r2.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r2.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r3.loadPacket(0 * Traits::ResPacketSize);
+ R3 = r3.loadPacket(1 * Traits::ResPacketSize);
+ traits.acc(C2, alphav, R0);
+ traits.acc(C6, alphav, R1);
+ traits.acc(C3, alphav, R2);
+ traits.acc(C7, alphav, R3);
+ r2.storePacket(0 * Traits::ResPacketSize, R0);
+ r2.storePacket(1 * Traits::ResPacketSize, R1);
+ r3.storePacket(0 * Traits::ResPacketSize, R2);
+ r3.storePacket(1 * Traits::ResPacketSize, R3);
+ }
+ }
+ for(Index j2=packet_cols4; j2<cols; j2++)
+ {
+ for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
+ {
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
+ prefetch(&blA[0]);
+ AccPacket C0, C4;
+ traits.initAcc(C0);
+ traits.initAcc(C4);
+ LinearMapper r0 = res.getLinearMapper(i, j2);
+ r0.prefetch(prefetch_res_offset);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB];
+ LhsPacket A0, A1;
+ for(Index k=0; k<peeled_kc; k+=pk)
+ {
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX1");
+ RhsPacket B_0, B1;
+#define EIGEN_GEBGP_ONESTEP(K) \
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX1"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \
+ traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \
+ traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
+ traits.madd(A0, B_0, C0, B1); \
+ traits.madd(A1, B_0, C4, B_0); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX1"); \
+ } while(false)
+ EIGEN_GEBGP_ONESTEP(0);
+ EIGEN_GEBGP_ONESTEP(1);
+ EIGEN_GEBGP_ONESTEP(2);
+ EIGEN_GEBGP_ONESTEP(3);
+ EIGEN_GEBGP_ONESTEP(4);
+ EIGEN_GEBGP_ONESTEP(5);
+ EIGEN_GEBGP_ONESTEP(6);
+ EIGEN_GEBGP_ONESTEP(7);
+ blB += pk*RhsProgress;
+ blA += pk*2*Traits::LhsProgress;
+ EIGEN_ASM_COMMENT("end gebp micro kernel 2pX1");
+ }
+ for(Index k=peeled_kc; k<depth; k++)
+ {
+ RhsPacket B_0, B1;
+ EIGEN_GEBGP_ONESTEP(0);
+ blB += RhsProgress;
+ blA += 2*Traits::LhsProgress;
+ }
+#undef EIGEN_GEBGP_ONESTEP
+ ResPacket R0, R1;
+ ResPacket alphav = pset1<ResPacket>(alpha);
+ R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r0.loadPacket(1 * Traits::ResPacketSize);
+ traits.acc(C0, alphav, R0);
+ traits.acc(C4, alphav, R1);
+ r0.storePacket(0 * Traits::ResPacketSize, R0);
+ r0.storePacket(1 * Traits::ResPacketSize, R1);
+ }
+ }
+ }
+ }
+ if(mr>=1*Traits::LhsProgress)
+ {
+ for(Index i=peeled_mc2; i<peeled_mc1; i+=1*LhsProgress)
+ {
+ for(Index j2=0; j2<packet_cols4; j2+=nr)
+ {
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*(1*Traits::LhsProgress)];
+ prefetch(&blA[0]);
+ AccPacket C0, C1, C2, C3;
+ traits.initAcc(C0);
+ traits.initAcc(C1);
+ traits.initAcc(C2);
+ traits.initAcc(C3);
+ LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
+ LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
+ LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
+ LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
+ r0.prefetch(prefetch_res_offset);
+ r1.prefetch(prefetch_res_offset);
+ r2.prefetch(prefetch_res_offset);
+ r3.prefetch(prefetch_res_offset);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+ prefetch(&blB[0]);
+ LhsPacket A0;
+ for(Index k=0; k<peeled_kc; k+=pk)
+ {
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 1pX4");
+ RhsPacket B_0, B1, B2, B3;
+#define EIGEN_GEBGP_ONESTEP(K) \
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 1pX4"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \
+ traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \
+ traits.madd(A0, B_0, C0, B_0); \
+ traits.madd(A0, B1, C1, B1); \
+ traits.madd(A0, B2, C2, B2); \
+ traits.madd(A0, B3, C3, B3); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 1pX4"); \
+ } while(false)
+ internal::prefetch(blB+(48+0));
+ EIGEN_GEBGP_ONESTEP(0);
+ EIGEN_GEBGP_ONESTEP(1);
+ EIGEN_GEBGP_ONESTEP(2);
+ EIGEN_GEBGP_ONESTEP(3);
+ internal::prefetch(blB+(48+16));
+ EIGEN_GEBGP_ONESTEP(4);
+ EIGEN_GEBGP_ONESTEP(5);
+ EIGEN_GEBGP_ONESTEP(6);
+ EIGEN_GEBGP_ONESTEP(7);
+ blB += pk*4*RhsProgress;
+ blA += pk*1*LhsProgress;
+ EIGEN_ASM_COMMENT("end gebp micro kernel 1pX4");
+ }
+ for(Index k=peeled_kc; k<depth; k++)
+ {
+ RhsPacket B_0, B1, B2, B3;
+ EIGEN_GEBGP_ONESTEP(0);
+ blB += 4*RhsProgress;
+ blA += 1*LhsProgress;
+ }
+#undef EIGEN_GEBGP_ONESTEP
+ ResPacket R0, R1;
+ ResPacket alphav = pset1<ResPacket>(alpha);
+ R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r1.loadPacket(0 * Traits::ResPacketSize);
+ traits.acc(C0, alphav, R0);
+ traits.acc(C1, alphav, R1);
+ r0.storePacket(0 * Traits::ResPacketSize, R0);
+ r1.storePacket(0 * Traits::ResPacketSize, R1);
+ R0 = r2.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r3.loadPacket(0 * Traits::ResPacketSize);
+ traits.acc(C2, alphav, R0);
+ traits.acc(C3, alphav, R1);
+ r2.storePacket(0 * Traits::ResPacketSize, R0);
+ r3.storePacket(0 * Traits::ResPacketSize, R1);
+ }
+ for(Index j2=packet_cols4; j2<cols; j2++)
+ {
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*(1*Traits::LhsProgress)];
+ prefetch(&blA[0]);
+ AccPacket C0;
+ traits.initAcc(C0);
+ LinearMapper r0 = res.getLinearMapper(i, j2);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB];
+ LhsPacket A0;
+ for(Index k=0; k<peeled_kc; k+=pk)
+ {
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 1pX1");
+ RhsPacket B_0;
+#define EIGEN_GEBGP_ONESTEP(K) \
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 1pX1"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \
+ traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
+ traits.madd(A0, B_0, C0, B_0); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 1pX1"); \
+ } while(false);
+ EIGEN_GEBGP_ONESTEP(0);
+ EIGEN_GEBGP_ONESTEP(1);
+ EIGEN_GEBGP_ONESTEP(2);
+ EIGEN_GEBGP_ONESTEP(3);
+ EIGEN_GEBGP_ONESTEP(4);
+ EIGEN_GEBGP_ONESTEP(5);
+ EIGEN_GEBGP_ONESTEP(6);
+ EIGEN_GEBGP_ONESTEP(7);
+ blB += pk*RhsProgress;
+ blA += pk*1*Traits::LhsProgress;
+ EIGEN_ASM_COMMENT("end gebp micro kernel 1pX1");
+ }
+ for(Index k=peeled_kc; k<depth; k++)
+ {
+ RhsPacket B_0;
+ EIGEN_GEBGP_ONESTEP(0);
+ blB += RhsProgress;
+ blA += 1*Traits::LhsProgress;
+ }
+#undef EIGEN_GEBGP_ONESTEP
+ ResPacket R0;
+ ResPacket alphav = pset1<ResPacket>(alpha);
+ R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+ traits.acc(C0, alphav, R0);
+ r0.storePacket(0 * Traits::ResPacketSize, R0);
+ }
+ }
+ }
+ if(peeled_mc1<rows)
+ {
+ for(Index j2=0; j2<packet_cols4; j2+=nr)
+ {
+ for(Index i=peeled_mc1; i<rows; i+=1)
+ {
+ const LhsScalar* blA = &blockA[i*strideA+offsetA];
+ prefetch(&blA[0]);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+ typedef typename unpacket_traits<SResPacket>::half SResPacketHalf;
+ if ((SwappedTraits::LhsProgress % 4) == 0 &&
+ (SwappedTraits::LhsProgress <= 8) &&
+ (SwappedTraits::LhsProgress!=8 || unpacket_traits<SResPacketHalf>::size==nr))
+ {
+ SAccPacket C0, C1, C2, C3;
+ straits.initAcc(C0);
+ straits.initAcc(C1);
+ straits.initAcc(C2);
+ straits.initAcc(C3);
+ const Index spk = (std::max)(1,SwappedTraits::LhsProgress/4);
+ const Index endk = (depth/spk)*spk;
+ const Index endk4 = (depth/(spk*4))*(spk*4);
+ Index k=0;
+ for(; k<endk4; k+=4*spk)
+ {
+ SLhsPacket A0,A1;
+ SRhsPacket B_0,B_1;
+ straits.loadLhsUnaligned(blB+0*SwappedTraits::LhsProgress, A0);
+ straits.loadLhsUnaligned(blB+1*SwappedTraits::LhsProgress, A1);
+ straits.loadRhsQuad(blA+0*spk, B_0);
+ straits.loadRhsQuad(blA+1*spk, B_1);
+ straits.madd(A0,B_0,C0,B_0);
+ straits.madd(A1,B_1,C1,B_1);
+ straits.loadLhsUnaligned(blB+2*SwappedTraits::LhsProgress, A0);
+ straits.loadLhsUnaligned(blB+3*SwappedTraits::LhsProgress, A1);
+ straits.loadRhsQuad(blA+2*spk, B_0);
+ straits.loadRhsQuad(blA+3*spk, B_1);
+ straits.madd(A0,B_0,C2,B_0);
+ straits.madd(A1,B_1,C3,B_1);
+ blB += 4*SwappedTraits::LhsProgress;
+ blA += 4*spk;
+ }
+ C0 = padd(padd(C0,C1),padd(C2,C3));
+ for(; k<endk; k+=spk)
+ {
+ SLhsPacket A0;
+ SRhsPacket B_0;
+ straits.loadLhsUnaligned(blB, A0);
+ straits.loadRhsQuad(blA, B_0);
+ straits.madd(A0,B_0,C0,B_0);
+ blB += SwappedTraits::LhsProgress;
+ blA += spk;
+ }
+ if(SwappedTraits::LhsProgress==8)
+ {
+ typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SResPacket>::half,SResPacket>::type SResPacketHalf;
+ typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SLhsPacket>::half,SLhsPacket>::type SLhsPacketHalf;
+ typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SLhsPacket>::half,SRhsPacket>::type SRhsPacketHalf;
+ typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SAccPacket>::half,SAccPacket>::type SAccPacketHalf;
+ SResPacketHalf R = res.template gatherPacket<SResPacketHalf>(i, j2);
+ SResPacketHalf alphav = pset1<SResPacketHalf>(alpha);
+ if(depth-endk>0)
+ {
+ SLhsPacketHalf a0;
+ SRhsPacketHalf b0;
+ straits.loadLhsUnaligned(blB, a0);
+ straits.loadRhs(blA, b0);
+ SAccPacketHalf c0 = predux_downto4(C0);
+ straits.madd(a0,b0,c0,b0);
+ straits.acc(c0, alphav, R);
+ }
+ else
+ {
+ straits.acc(predux_downto4(C0), alphav, R);
+ }
+ res.scatterPacket(i, j2, R);
+ }
+ else
+ {
+ SResPacket R = res.template gatherPacket<SResPacket>(i, j2);
+ SResPacket alphav = pset1<SResPacket>(alpha);
+ straits.acc(C0, alphav, R);
+ res.scatterPacket(i, j2, R);
+ }
+ }
+ else
+ {
+ ResScalar C0(0), C1(0), C2(0), C3(0);
+ for(Index k=0; k<depth; k++)
+ {
+ LhsScalar A0;
+ RhsScalar B_0, B_1;
+ A0 = blA[k];
+ B_0 = blB[0];
+ B_1 = blB[1];
+ CJMADD(cj,A0,B_0,C0, B_0);
+ CJMADD(cj,A0,B_1,C1, B_1);
+ B_0 = blB[2];
+ B_1 = blB[3];
+ CJMADD(cj,A0,B_0,C2, B_0);
+ CJMADD(cj,A0,B_1,C3, B_1);
+ blB += 4;
+ }
+ res(i, j2 + 0) += alpha * C0;
+ res(i, j2 + 1) += alpha * C1;
+ res(i, j2 + 2) += alpha * C2;
+ res(i, j2 + 3) += alpha * C3;
+ }
+ }
+ }
+ for(Index j2=packet_cols4; j2<cols; j2++)
+ {
+ for(Index i=peeled_mc1; i<rows; i+=1)
+ {
+ const LhsScalar* blA = &blockA[i*strideA+offsetA];
+ prefetch(&blA[0]);
+ ResScalar C0(0);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB];
+ for(Index k=0; k<depth; k++)
+ {
+ LhsScalar A0 = blA[k];
+ RhsScalar B_0 = blB[k];
+ CJMADD(cj, A0, B_0, C0, B_0);
+ }
+ res(i, j2) += alpha * C0;
+ }
+ }
+ }
+ }
+#undef CJMADD
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
+struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, ColMajor, Conjugate, PanelMode>
+{
+ typedef typename DataMapper::LinearMapper LinearMapper;
+ EIGEN_DONT_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);
+};
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, ColMajor, Conjugate, PanelMode>
+ ::operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+ enum { PacketSize = packet_traits<Scalar>::size };
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
+ EIGEN_UNUSED_VARIABLE(stride);
+ EIGEN_UNUSED_VARIABLE(offset);
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ eigen_assert( ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) || (Pack1<=4) );
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+ Index count = 0;
+ const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
+ const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
+ const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
+ const Index peeled_mc0 = Pack2>=1*PacketSize ? peeled_mc1
+ : Pack2>1 ? (rows/Pack2)*Pack2 : 0;
+ Index i=0;
+ if(Pack1>=3*PacketSize)
+ {
+ for(; i<peeled_mc3; i+=3*PacketSize)
+ {
+ if(PanelMode) count += (3*PacketSize) * offset;
+ for(Index k=0; k<depth; k++)
+ {
+ Packet A, B, C;
+ A = lhs.loadPacket(i+0*PacketSize, k);
+ B = lhs.loadPacket(i+1*PacketSize, k);
+ C = lhs.loadPacket(i+2*PacketSize, k);
+ pstore(blockA+count, cj.pconj(A)); count+=PacketSize;
+ pstore(blockA+count, cj.pconj(B)); count+=PacketSize;
+ pstore(blockA+count, cj.pconj(C)); count+=PacketSize;
+ }
+ if(PanelMode) count += (3*PacketSize) * (stride-offset-depth);
+ }
+ }
+ if(Pack1>=2*PacketSize)
+ {
+ for(; i<peeled_mc2; i+=2*PacketSize)
+ {
+ if(PanelMode) count += (2*PacketSize) * offset;
+ for(Index k=0; k<depth; k++)
+ {
+ Packet A, B;
+ A = lhs.loadPacket(i+0*PacketSize, k);
+ B = lhs.loadPacket(i+1*PacketSize, k);
+ pstore(blockA+count, cj.pconj(A)); count+=PacketSize;
+ pstore(blockA+count, cj.pconj(B)); count+=PacketSize;
+ }
+ if(PanelMode) count += (2*PacketSize) * (stride-offset-depth);
+ }
+ }
+ if(Pack1>=1*PacketSize)
+ {
+ for(; i<peeled_mc1; i+=1*PacketSize)
+ {
+ if(PanelMode) count += (1*PacketSize) * offset;
+ for(Index k=0; k<depth; k++)
+ {
+ Packet A;
+ A = lhs.loadPacket(i+0*PacketSize, k);
+ pstore(blockA+count, cj.pconj(A));
+ count+=PacketSize;
+ }
+ if(PanelMode) count += (1*PacketSize) * (stride-offset-depth);
+ }
+ }
+ if(Pack2<PacketSize && Pack2>1)
+ {
+ for(; i<peeled_mc0; i+=Pack2)
+ {
+ if(PanelMode) count += Pack2 * offset;
+ for(Index k=0; k<depth; k++)
+ for(Index w=0; w<Pack2; w++)
+ blockA[count++] = cj(lhs(i+w, k));
+ if(PanelMode) count += Pack2 * (stride-offset-depth);
+ }
+ }
+ for(; i<rows; i++)
+ {
+ if(PanelMode) count += offset;
+ for(Index k=0; k<depth; k++)
+ blockA[count++] = cj(lhs(i, k));
+ if(PanelMode) count += (stride-offset-depth);
+ }
+}
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
+struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, RowMajor, Conjugate, PanelMode>
+{
+ typedef typename DataMapper::LinearMapper LinearMapper;
+ EIGEN_DONT_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);
+};
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, RowMajor, Conjugate, PanelMode>
+ ::operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+ enum { PacketSize = packet_traits<Scalar>::size };
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
+ EIGEN_UNUSED_VARIABLE(stride);
+ EIGEN_UNUSED_VARIABLE(offset);
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+ Index count = 0;
+ int pack = Pack1;
+ Index i = 0;
+ while(pack>0)
+ {
+ Index remaining_rows = rows-i;
+ Index peeled_mc = i+(remaining_rows/pack)*pack;
+ for(; i<peeled_mc; i+=pack)
+ {
+ if(PanelMode) count += pack * offset;
+ const Index peeled_k = (depth/PacketSize)*PacketSize;
+ Index k=0;
+ if(pack>=PacketSize)
+ {
+ for(; k<peeled_k; k+=PacketSize)
+ {
+ for (Index m = 0; m < pack; m += PacketSize)
+ {
+ PacketBlock<Packet> kernel;
+ for (int p = 0; p < PacketSize; ++p) kernel.packet[p] = lhs.loadPacket(i+p+m, k);
+ ptranspose(kernel);
+ for (int p = 0; p < PacketSize; ++p) pstore(blockA+count+m+(pack)*p, cj.pconj(kernel.packet[p]));
+ }
+ count += PacketSize*pack;
+ }
+ }
+ for(; k<depth; k++)
+ {
+ Index w=0;
+ for(; w<pack-3; w+=4)
+ {
+ Scalar a(cj(lhs(i+w+0, k))),
+ b(cj(lhs(i+w+1, k))),
+ c(cj(lhs(i+w+2, k))),
+ d(cj(lhs(i+w+3, k)));
+ blockA[count++] = a;
+ blockA[count++] = b;
+ blockA[count++] = c;
+ blockA[count++] = d;
+ }
+ if(pack%4)
+ for(;w<pack;++w)
+ blockA[count++] = cj(lhs(i+w, k));
+ }
+ if(PanelMode) count += pack * (stride-offset-depth);
+ }
+ pack -= PacketSize;
+ if(pack<Pack2 && (pack+PacketSize)!=Pack2)
+ pack = Pack2;
+ }
+ for(; i<rows; i++)
+ {
+ if(PanelMode) count += offset;
+ for(Index k=0; k<depth; k++)
+ blockA[count++] = cj(lhs(i, k));
+ if(PanelMode) count += (stride-offset-depth);
+ }
+}
+template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
+struct gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+ typedef typename DataMapper::LinearMapper LinearMapper;
+ enum { PacketSize = packet_traits<Scalar>::size };
+ EIGEN_DONT_INLINE void operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);
+};
+template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
+ ::operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
+{
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
+ EIGEN_UNUSED_VARIABLE(stride);
+ EIGEN_UNUSED_VARIABLE(offset);
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+ Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
+ Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
+ Index count = 0;
+ const Index peeled_k = (depth/PacketSize)*PacketSize;
+ if(nr>=4)
+ {
+ for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
+ {
+ if(PanelMode) count += 4 * offset;
+ const LinearMapper dm0 = rhs.getLinearMapper(0, j2 + 0);
+ const LinearMapper dm1 = rhs.getLinearMapper(0, j2 + 1);
+ const LinearMapper dm2 = rhs.getLinearMapper(0, j2 + 2);
+ const LinearMapper dm3 = rhs.getLinearMapper(0, j2 + 3);
+ Index k=0;
+ if((PacketSize%4)==0)
+ {
+ for(; k<peeled_k; k+=PacketSize) {
+ PacketBlock<Packet,(PacketSize%4)==0?4:PacketSize> kernel;
+ kernel.packet[0] = dm0.loadPacket(k);
+ kernel.packet[1%PacketSize] = dm1.loadPacket(k);
+ kernel.packet[2%PacketSize] = dm2.loadPacket(k);
+ kernel.packet[3%PacketSize] = dm3.loadPacket(k);
+ ptranspose(kernel);
+ pstoreu(blockB+count+0*PacketSize, cj.pconj(kernel.packet[0]));
+ pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel.packet[1%PacketSize]));
+ pstoreu(blockB+count+2*PacketSize, cj.pconj(kernel.packet[2%PacketSize]));
+ pstoreu(blockB+count+3*PacketSize, cj.pconj(kernel.packet[3%PacketSize]));
+ count+=4*PacketSize;
+ }
+ }
+ for(; k<depth; k++)
+ {
+ blockB[count+0] = cj(dm0(k));
+ blockB[count+1] = cj(dm1(k));
+ blockB[count+2] = cj(dm2(k));
+ blockB[count+3] = cj(dm3(k));
+ count += 4;
+ }
+ if(PanelMode) count += 4 * (stride-offset-depth);
+ }
+ }
+ for(Index j2=packet_cols4; j2<cols; ++j2)
+ {
+ if(PanelMode) count += offset;
+ const LinearMapper dm0 = rhs.getLinearMapper(0, j2);
+ for(Index k=0; k<depth; k++)
+ {
+ blockB[count] = cj(dm0(k));
+ count += 1;
+ }
+ if(PanelMode) count += (stride-offset-depth);
+ }
+}
+template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
+struct gemm_pack_rhs<Scalar, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+ typedef typename DataMapper::LinearMapper LinearMapper;
+ enum { PacketSize = packet_traits<Scalar>::size };
+ EIGEN_DONT_INLINE void operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);
+};
+template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
+ ::operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
+{
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
+ EIGEN_UNUSED_VARIABLE(stride);
+ EIGEN_UNUSED_VARIABLE(offset);
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+ Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
+ Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
+ Index count = 0;
+ if(nr>=4)
+ {
+ for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
+ {
+ if(PanelMode) count += 4 * offset;
+ for(Index k=0; k<depth; k++)
+ {
+ if (PacketSize==4) {
+ Packet A = rhs.loadPacket(k, j2);
+ pstoreu(blockB+count, cj.pconj(A));
+ count += PacketSize;
+ } else {
+ const LinearMapper dm0 = rhs.getLinearMapper(k, j2);
+ blockB[count+0] = cj(dm0(0));
+ blockB[count+1] = cj(dm0(1));
+ blockB[count+2] = cj(dm0(2));
+ blockB[count+3] = cj(dm0(3));
+ count += 4;
+ }
+ }
+ if(PanelMode) count += 4 * (stride-offset-depth);
+ }
+ }
+ for(Index j2=packet_cols4; j2<cols; ++j2)
+ {
+ if(PanelMode) count += offset;
+ for(Index k=0; k<depth; k++)
+ {
+ blockB[count] = cj(rhs(k, j2));
+ count += 1;
+ }
+ if(PanelMode) count += stride-offset-depth;
+ }
+}
+}
+inline std::ptrdiff_t l1CacheSize()
+{
+ std::ptrdiff_t l1, l2, l3;
+ internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
+ return l1;
+}
+inline std::ptrdiff_t l2CacheSize()
+{
+ std::ptrdiff_t l1, l2, l3;
+ internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
+ return l2;
+}
+inline std::ptrdiff_t l3CacheSize()
+{
+ std::ptrdiff_t l1, l2, l3;
+ internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
+ return l3;
+}
+inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2, std::ptrdiff_t l3)
+{
+ internal::manage_caching_sizes(SetAction, &l1, &l2, &l3);
+}
+}
+#endif
+// end #include "src/Core/products/GeneralBlockPanelKernel.h"
+// #include "src/Core/products/Parallelizer.h"
+#ifndef EIGEN_PARALLELIZER_H
+#define EIGEN_PARALLELIZER_H
+#include <atomic>
+namespace Eigen {
+namespace internal {
+inline void manage_multi_threading(Action action, int* v)
+{
+ static EIGEN_UNUSED int m_maxThreads = -1;
+ if(action==SetAction)
+ {
+ eigen_internal_assert(v!=0);
+ m_maxThreads = *v;
+ }
+ else if(action==GetAction)
+ {
+ eigen_internal_assert(v!=0);
+ #ifdef EIGEN_HAS_OPENMP
+ if(m_maxThreads>0)
+ *v = m_maxThreads;
+ else
+ *v = omp_get_max_threads();
+ #else
+ *v = 1;
+ #endif
+ }
+ else
+ {
+ eigen_internal_assert(false);
+ }
+}
+}
+inline void initParallel()
+{
+ int nbt;
+ internal::manage_multi_threading(GetAction, &nbt);
+ std::ptrdiff_t l1, l2, l3;
+ internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
+}
+inline int nbThreads()
+{
+ int ret;
+ internal::manage_multi_threading(GetAction, &ret);
+ return ret;
+}
+inline void setNbThreads(int v)
+{
+ internal::manage_multi_threading(SetAction, &v);
+}
+namespace internal {
+template<typename Index> struct GemmParallelInfo
+{
+ GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
+ std::atomic<Index> sync;
+ std::atomic<int> users;
+ Index lhs_start;
+ Index lhs_length;
+};
+template<bool Condition, typename Functor, typename Index>
+void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose)
+{
+#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
+ EIGEN_UNUSED_VARIABLE(depth);
+ EIGEN_UNUSED_VARIABLE(transpose);
+ func(0,rows, 0,cols);
+#else
+ Index size = transpose ? rows : cols;
+ Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr);
+ double work = static_cast<double>(rows) * static_cast<double>(cols) *
+ static_cast<double>(depth);
+ double kMinTaskSize = 50000;
+ pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
+ Index threads = std::min<Index>(nbThreads(), pb_max_threads);
+ if((!Condition) || (threads==1) || (omp_get_num_threads()>1))
+ return func(0,rows, 0,cols);
+ Eigen::initParallel();
+ func.initParallelSession(threads);
+ if(transpose)
+ std::swap(rows,cols);
+ ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
+ #pragma omp parallel num_threads(threads)
+ {
+ Index i = omp_get_thread_num();
+ Index actual_threads = omp_get_num_threads();
+ Index blockCols = (cols / actual_threads) & ~Index(0x3);
+ Index blockRows = (rows / actual_threads);
+ blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
+ Index r0 = i*blockRows;
+ Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
+ Index c0 = i*blockCols;
+ Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
+ info[i].lhs_start = r0;
+ info[i].lhs_length = actualBlockRows;
+ if(transpose) func(c0, actualBlockCols, 0, rows, info);
+ else func(0, rows, c0, actualBlockCols, info);
+ }
+#endif
+}
+}
+}
+#endif
+// end #include "src/Core/products/Parallelizer.h"
+// #include "src/Core/ProductEvaluators.h"
+#ifndef EIGEN_PRODUCTEVALUATORS_H
+#define EIGEN_PRODUCTEVALUATORS_H
+namespace Eigen {
+namespace internal {
+template<typename Lhs, typename Rhs, int Options>
+struct evaluator<Product<Lhs, Rhs, Options> >
+ : public product_evaluator<Product<Lhs, Rhs, Options> >
+{
+ typedef Product<Lhs, Rhs, Options> XprType;
+ typedef product_evaluator<XprType> Base;
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
+};
+template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
+struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
+ const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
+ const Product<Lhs, Rhs, DefaultProduct> > >
+{
+ static const bool value = true;
+};
+template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
+struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
+ const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
+ const Product<Lhs, Rhs, DefaultProduct> > >
+ : public evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> >
+{
+ typedef CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
+ const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
+ const Product<Lhs, Rhs, DefaultProduct> > XprType;
+ typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base;
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
+ : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs())
+ {}
+};
+template<typename Lhs, typename Rhs, int DiagIndex>
+struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
+ : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >
+{
+ typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
+ typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
+ : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
+ Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
+ xpr.index() ))
+ {}
+};
+template< typename Lhs, typename Rhs,
+ typename LhsShape = typename evaluator_traits<Lhs>::Shape,
+ typename RhsShape = typename evaluator_traits<Rhs>::Shape,
+ int ProductType = internal::product_type<Lhs,Rhs>::value>
+struct generic_product_impl;
+template<typename Lhs, typename Rhs>
+struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct> > {
+ static const bool value = true;
+};
+template<typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape>
+struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape>
+ : public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject>
+{
+ typedef Product<Lhs, Rhs, Options> XprType;
+ typedef typename XprType::PlainObject PlainObject;
+ typedef evaluator<PlainObject> Base;
+ enum {
+ Flags = Base::Flags | EvalBeforeNestingBit
+ };
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ explicit product_evaluator(const XprType& xpr)
+ : m_result(xpr.rows(), xpr.cols())
+ {
+ ::new (static_cast<Base*>(this)) Base(m_result);
+ generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
+ }
+protected:
+ PlainObject m_result;
+};
+template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar,Scalar>, Dense2Dense,
+ typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
+{
+ typedef Product<Lhs,Rhs,Options> SrcXprType;
+ static EIGEN_STRONG_INLINE
+ void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
+ {
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
+ generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
+ }
+};
+template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar,Scalar>, Dense2Dense,
+ typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
+{
+ typedef Product<Lhs,Rhs,Options> SrcXprType;
+ static EIGEN_STRONG_INLINE
+ void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
+ {
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+ generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
+ }
+};
+template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar,Scalar>, Dense2Dense,
+ typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
+{
+ typedef Product<Lhs,Rhs,Options> SrcXprType;
+ static EIGEN_STRONG_INLINE
+ void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
+ {
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+ generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
+ }
+};
+template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain>
+struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
+ const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense>
+{
+ typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
+ const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
+ const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
+ static EIGEN_STRONG_INLINE
+ void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
+ {
+ call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
+ }
+};
+template<typename OtherXpr, typename Lhs, typename Rhs>
+struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
+ const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
+ static const bool value = true;
+};
+template<typename OtherXpr, typename Lhs, typename Rhs>
+struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_difference_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
+ const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
+ static const bool value = true;
+};
+template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
+struct assignment_from_xpr_op_product
+{
+ template<typename SrcXprType, typename InitialFunc>
+ static EIGEN_STRONG_INLINE
+ void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& )
+ {
+ call_assignment_no_alias(dst, src.lhs(), Func1());
+ call_assignment_no_alias(dst, src.rhs(), Func2());
+ }
+};
+#define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP,BINOP,ASSIGN_OP2) \
+ template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> \
+ struct Assignment<DstXprType, CwiseBinaryOp<internal::BINOP<OtherScalar,ProdScalar>, const OtherXpr, \
+ const Product<Lhs,Rhs,DefaultProduct> >, internal::ASSIGN_OP<DstScalar,SrcScalar>, Dense2Dense> \
+ : assignment_from_xpr_op_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::ASSIGN_OP<DstScalar,OtherScalar>, internal::ASSIGN_OP2<DstScalar,ProdScalar> > \
+ {}
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op,add_assign_op);
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_sum_op,add_assign_op);
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_sum_op,sub_assign_op);
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op,sub_assign_op);
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_difference_op,sub_assign_op);
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_difference_op,add_assign_op);
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
+{
+ template<typename Dst>
+ static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
+ }
+ template<typename Dst>
+ static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
+ }
+ template<typename Dst>
+ static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
+};
+template<typename Dst, typename Lhs, typename Rhs, typename Func>
+void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
+{
+ evaluator<Rhs> rhsEval(rhs);
+ typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
+ const Index cols = dst.cols();
+ for (Index j=0; j<cols; ++j)
+ func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs);
+}
+template<typename Dst, typename Lhs, typename Rhs, typename Func>
+void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
+{
+ evaluator<Lhs> lhsEval(lhs);
+ typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
+ const Index rows = dst.rows();
+ for (Index i=0; i<rows; ++i)
+ func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs);
+}
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
+{
+ template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
+ struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
+ struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
+ struct adds {
+ Scalar m_scale;
+ explicit adds(const Scalar& s) : m_scale(s) {}
+ template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
+ dst.const_cast_derived() += m_scale * src;
+ }
+ };
+ template<typename Dst>
+ static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
+ }
+ template<typename Dst>
+ static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
+ }
+ template<typename Dst>
+ static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
+ }
+ template<typename Dst>
+ static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+ {
+ internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
+ }
+};
+template<typename Lhs, typename Rhs, typename Derived>
+struct generic_product_impl_base
+{
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ template<typename Dst>
+ static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
+ template<typename Dst>
+ static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
+ template<typename Dst>
+ static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
+ template<typename Dst>
+ static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+ { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
+};
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
+ : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> >
+{
+ typedef typename nested_eval<Lhs,1>::type LhsNested;
+ typedef typename nested_eval<Rhs,1>::type RhsNested;
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
+ typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
+ template<typename Dest>
+ static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+ {
+ LhsNested actual_lhs(lhs);
+ RhsNested actual_rhs(rhs);
+ internal::gemv_dense_selector<Side,
+ (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
+ bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)
+ >::run(actual_lhs, actual_rhs, dst, alpha);
+ }
+};
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
+{
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ template<typename Dst>
+ static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>());
+ }
+ template<typename Dst>
+ static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
+ }
+ template<typename Dst>
+ static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
+ }
+};
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,LazyCoeffBasedProductMode>
+ : generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> {};
+template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
+struct etor_product_coeff_impl;
+template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl;
+template<typename Lhs, typename Rhs, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape>
+ : evaluator_base<Product<Lhs, Rhs, LazyProduct> >
+{
+ typedef Product<Lhs, Rhs, LazyProduct> XprType;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ explicit product_evaluator(const XprType& xpr)
+ : m_lhs(xpr.lhs()),
+ m_rhs(xpr.rhs()),
+ m_lhsImpl(m_lhs),
+ m_rhsImpl(m_rhs),
+ m_innerDim(xpr.lhs().cols())
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
+ EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost);
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+#if 0
+ std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n";
+ std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n";
+ std::cerr << "LhsAlignment= " << LhsAlignment << "\n";
+ std::cerr << "RhsAlignment= " << RhsAlignment << "\n";
+ std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n";
+ std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n";
+ std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n";
+ std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n";
+ std::cerr << "Alignment= " << Alignment << "\n";
+ std::cerr << "Flags= " << Flags << "\n";
+#endif
+ }
+ typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
+ typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
+ typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
+ typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
+ typedef evaluator<LhsNestedCleaned> LhsEtorType;
+ typedef evaluator<RhsNestedCleaned> RhsEtorType;
+ enum {
+ RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
+ ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
+ InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
+ MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime
+ };
+ typedef typename find_best_packet<Scalar,RowsAtCompileTime>::type LhsVecPacketType;
+ typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;
+ enum {
+ LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
+ RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
+ CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
+ : InnerSize == Dynamic ? HugeCost
+ : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
+ + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
+ Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
+ LhsFlags = LhsEtorType::Flags,
+ RhsFlags = RhsEtorType::Flags,
+ LhsRowMajor = LhsFlags & RowMajorBit,
+ RhsRowMajor = RhsFlags & RowMajorBit,
+ LhsVecPacketSize = unpacket_traits<LhsVecPacketType>::size,
+ RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size,
+ LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
+ RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
+ SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
+ CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1),
+ CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime!=1),
+ EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
+ : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
+ : (bool(RhsRowMajor) && !CanVectorizeLhs),
+ Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
+ | (EvalToRowMajor ? RowMajorBit : 0)
+ | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
+ | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
+ LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
+ RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
+ Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
+ : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
+ : 0,
+ CanVectorizeInner = SameType
+ && LhsRowMajor
+ && (!RhsRowMajor)
+ && (LhsFlags & RhsFlags & ActualPacketAccessBit)
+ && (InnerSize % packet_traits<Scalar>::size == 0)
+ };
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
+ {
+ return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
+ }
+ EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const
+ {
+ const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
+ const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
+ return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
+ }
+ template<int LoadMode, typename PacketType>
+ const PacketType packet(Index row, Index col) const
+ {
+ PacketType res;
+ typedef etor_product_packet_impl<bool(int(Flags)&RowMajorBit) ? RowMajor : ColMajor,
+ Unroll ? int(InnerSize) : Dynamic,
+ LhsEtorType, RhsEtorType, PacketType, LoadMode> PacketImpl;
+ PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
+ return res;
+ }
+ template<int LoadMode, typename PacketType>
+ const PacketType packet(Index index) const
+ {
+ const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
+ const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
+ return packet<LoadMode,PacketType>(row,col);
+ }
+protected:
+ typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
+ typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
+ LhsEtorType m_lhsImpl;
+ RhsEtorType m_rhsImpl;
+ Index m_innerDim;
+};
+template<typename Lhs, typename Rhs>
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape>
+ : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape>
+{
+ typedef Product<Lhs, Rhs, DefaultProduct> XprType;
+ typedef Product<Lhs, Rhs, LazyProduct> BaseProduct;
+ typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape> Base;
+ enum {
+ Flags = Base::Flags | EvalBeforeNestingBit
+ };
+ EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
+ : Base(BaseProduct(xpr.lhs(),xpr.rhs()))
+ {}
+};
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
+{
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+ {
+ etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
+ res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
+ }
+};
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
+{
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+ {
+ etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
+ res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
+ }
+};
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
+{
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index , Packet &res)
+ {
+ res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
+ }
+};
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
+{
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index , Packet &res)
+ {
+ res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
+ }
+};
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
+{
+ static EIGEN_STRONG_INLINE void run(Index , Index , const Lhs& , const Rhs& , Index , Packet &res)
+ {
+ res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
+ }
+};
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
+{
+ static EIGEN_STRONG_INLINE void run(Index , Index , const Lhs& , const Rhs& , Index , Packet &res)
+ {
+ res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
+ }
+};
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
+{
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+ {
+ res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
+ for(Index i = 0; i < innerDim; ++i)
+ res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res);
+ }
+};
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
+{
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+ {
+ res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
+ for(Index i = 0; i < innerDim; ++i)
+ res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
+ }
+};
+template<int Mode, bool LhsIsTriangular,
+ typename Lhs, bool LhsIsVector,
+ typename Rhs, bool RhsIsVector>
+struct triangular_product_impl;
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>
+ : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> >
+{
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ template<typename Dest>
+ static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+ {
+ triangular_product_impl<Lhs::Mode,true,typename Lhs::MatrixType,false,Rhs, Rhs::ColsAtCompileTime==1>
+ ::run(dst, lhs.nestedExpression(), rhs, alpha);
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>
+: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> >
+{
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ template<typename Dest>
+ static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+ {
+ triangular_product_impl<Rhs::Mode,false,Lhs,Lhs::RowsAtCompileTime==1, typename Rhs::MatrixType, false>::run(dst, lhs, rhs.nestedExpression(), alpha);
+ }
+};
+template <typename Lhs, int LhsMode, bool LhsIsVector,
+ typename Rhs, int RhsMode, bool RhsIsVector>
+struct selfadjoint_product_impl;
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
+ : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> >
+{
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ template<typename Dest>
+ static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+ {
+ selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha);
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
+: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> >
+{
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ template<typename Dest>
+ static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+ {
+ selfadjoint_product_impl<Lhs,0,Lhs::IsVectorAtCompileTime,typename Rhs::MatrixType,Rhs::Mode,false>::run(dst, lhs, rhs.nestedExpression(), alpha);
+ }
+};
+template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
+struct diagonal_product_evaluator_base
+ : evaluator_base<Derived>
+{
+ typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
+public:
+ enum {
+ CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
+ MatrixFlags = evaluator<MatrixType>::Flags,
+ DiagFlags = evaluator<DiagonalType>::Flags,
+ _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
+ _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
+ ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
+ _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
+ _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
+ _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
+ Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
+ Alignment = evaluator<MatrixType>::Alignment
+ };
+ diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
+ : m_diagImpl(diag), m_matImpl(mat)
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
+ {
+ return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
+ }
+protected:
+ template<int LoadMode,typename PacketType>
+ EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
+ {
+ return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
+ internal::pset1<PacketType>(m_diagImpl.coeff(id)));
+ }
+ template<int LoadMode,typename PacketType>
+ EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
+ {
+ enum {
+ InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
+ DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment))
+ };
+ return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
+ m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
+ }
+ evaluator<DiagonalType> m_diagImpl;
+ evaluator<MatrixType> m_matImpl;
+};
+template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape>
+ : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft>
+{
+ typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> Base;
+ using Base::m_diagImpl;
+ using Base::m_matImpl;
+ using Base::coeff;
+ typedef typename Base::Scalar Scalar;
+ typedef Product<Lhs, Rhs, ProductKind> XprType;
+ typedef typename XprType::PlainObject PlainObject;
+ enum {
+ StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor
+ };
+ EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
+ : Base(xpr.rhs(), xpr.lhs().diagonal())
+ {
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
+ {
+ return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
+ }
+#ifndef __CUDACC__
+ template<int LoadMode,typename PacketType>
+ EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
+ {
+ return this->template packet_impl<LoadMode,PacketType>(row,col, row,
+ typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
+ }
+ template<int LoadMode,typename PacketType>
+ EIGEN_STRONG_INLINE PacketType packet(Index idx) const
+ {
+ return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
+ }
+#endif
+};
+template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape>
+ : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight>
+{
+ typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> Base;
+ using Base::m_diagImpl;
+ using Base::m_matImpl;
+ using Base::coeff;
+ typedef typename Base::Scalar Scalar;
+ typedef Product<Lhs, Rhs, ProductKind> XprType;
+ typedef typename XprType::PlainObject PlainObject;
+ enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor };
+ EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
+ : Base(xpr.lhs(), xpr.rhs().diagonal())
+ {
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
+ {
+ return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
+ }
+#ifndef __CUDACC__
+ template<int LoadMode,typename PacketType>
+ EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
+ {
+ return this->template packet_impl<LoadMode,PacketType>(row,col, col,
+ typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
+ }
+ template<int LoadMode,typename PacketType>
+ EIGEN_STRONG_INLINE PacketType packet(Index idx) const
+ {
+ return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
+ }
+#endif
+};
+template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
+struct permutation_matrix_product;
+template<typename ExpressionType, int Side, bool Transposed>
+struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
+{
+ typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
+ typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
+ template<typename Dest, typename PermutationType>
+ static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
+ {
+ MatrixType mat(xpr);
+ const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
+ if(is_same_dense(dst, mat))
+ {
+ Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(perm.size());
+ mask.fill(false);
+ Index r = 0;
+ while(r < perm.size())
+ {
+ while(r<perm.size() && mask[r]) r++;
+ if(r>=perm.size())
+ break;
+ Index k0 = r++;
+ Index kPrev = k0;
+ mask.coeffRef(k0) = true;
+ for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k))
+ {
+ Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
+ .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
+ (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
+ mask.coeffRef(k) = true;
+ kPrev = k;
+ }
+ }
+ }
+ else
+ {
+ for(Index i = 0; i < n; ++i)
+ {
+ Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
+ (dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i)
+ =
+ Block<const MatrixTypeCleaned,Side==OnTheLeft ? 1 : MatrixTypeCleaned::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixTypeCleaned::ColsAtCompileTime>
+ (mat, ((Side==OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i);
+ }
+ }
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
+ {
+ permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
+ {
+ permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
+ }
+};
+template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
+struct transposition_matrix_product
+{
+ typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
+ typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
+ template<typename Dest, typename TranspositionType>
+ static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
+ {
+ MatrixType mat(xpr);
+ typedef typename TranspositionType::StorageIndex StorageIndex;
+ const Index size = tr.size();
+ StorageIndex j = 0;
+ if(!is_same_dense(dst,mat))
+ dst = mat;
+ for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
+ if(Index(j=tr.coeff(k))!=k)
+ {
+ if(Side==OnTheLeft) dst.row(k).swap(dst.row(j));
+ else if(Side==OnTheRight) dst.col(k).swap(dst.col(j));
+ }
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
+ {
+ transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
+ {
+ transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
+ }
+};
+}
+}
+#endif
+// end #include "src/Core/ProductEvaluators.h"
+// #include "src/Core/products/GeneralMatrixVector.h"
+#ifndef EIGEN_GENERAL_MATRIX_VECTOR_H
+#define EIGEN_GENERAL_MATRIX_VECTOR_H
+namespace Eigen {
+namespace internal {
+template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
+struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>
+{
+ typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+enum {
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
+ && int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
+};
+typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+typedef typename packet_traits<ResScalar>::type _ResPacket;
+typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+EIGEN_DONT_INLINE static void run(
+ Index rows, Index cols,
+ const LhsMapper& lhs,
+ const RhsMapper& rhs,
+ ResScalar* res, Index resIncr,
+ RhsScalar alpha);
+};
+template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
+ Index rows, Index cols,
+ const LhsMapper& lhs,
+ const RhsMapper& rhs,
+ ResScalar* res, Index resIncr,
+ RhsScalar alpha)
+{
+ EIGEN_UNUSED_VARIABLE(resIncr);
+ eigen_internal_assert(resIncr==1);
+ #ifdef _EIGEN_ACCUMULATE_PACKETS
+ #error _EIGEN_ACCUMULATE_PACKETS has already been defined
+ #endif
+ #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) \
+ pstore(&res[j], \
+ padd(pload<ResPacket>(&res[j]), \
+ padd( \
+ padd(pcj.pmul(lhs0.template load<LhsPacket, Alignment0>(j), ptmp0), \
+ pcj.pmul(lhs1.template load<LhsPacket, Alignment13>(j), ptmp1)), \
+ padd(pcj.pmul(lhs2.template load<LhsPacket, Alignment2>(j), ptmp2), \
+ pcj.pmul(lhs3.template load<LhsPacket, Alignment13>(j), ptmp3)) )))
+ typedef typename LhsMapper::VectorMapper LhsScalars;
+ conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
+ conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
+ if(ConjugateRhs)
+ alpha = numext::conj(alpha);
+ enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned };
+ const Index columnsAtOnce = 4;
+ const Index peels = 2;
+ const Index LhsPacketAlignedMask = LhsPacketSize-1;
+ const Index ResPacketAlignedMask = ResPacketSize-1;
+ const Index size = rows;
+ const Index lhsStride = lhs.stride();
+ Index alignedStart = internal::first_default_aligned(res,size);
+ Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
+ const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
+ const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
+ Index alignmentPattern = alignmentStep==0 ? AllAligned
+ : alignmentStep==(LhsPacketSize/2) ? EvenAligned
+ : FirstAligned;
+ const Index lhsAlignmentOffset = lhs.firstAligned(size);
+ Index skipColumns = 0;
+ if( (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == size) || (UIntPtr(res)%sizeof(ResScalar)) )
+ {
+ alignedSize = 0;
+ alignedStart = 0;
+ alignmentPattern = NoneAligned;
+ }
+ else if(LhsPacketSize > 4)
+ {
+ alignmentPattern = NoneAligned;
+ }
+ else if (LhsPacketSize>1)
+ {
+ while (skipColumns<LhsPacketSize &&
+ alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%LhsPacketSize))
+ ++skipColumns;
+ if (skipColumns==LhsPacketSize)
+ {
+ alignmentPattern = NoneAligned;
+ skipColumns = 0;
+ }
+ else
+ {
+ skipColumns = (std::min)(skipColumns,cols);
+ }
+ }
+ else if(Vectorizable)
+ {
+ alignedStart = 0;
+ alignedSize = size;
+ alignmentPattern = AllAligned;
+ }
+ const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
+ const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
+ Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
+ for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
+ {
+ RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs(i, 0)),
+ ptmp1 = pset1<RhsPacket>(alpha*rhs(i+offset1, 0)),
+ ptmp2 = pset1<RhsPacket>(alpha*rhs(i+2, 0)),
+ ptmp3 = pset1<RhsPacket>(alpha*rhs(i+offset3, 0));
+ const LhsScalars lhs0 = lhs.getVectorMapper(0, i+0), lhs1 = lhs.getVectorMapper(0, i+offset1),
+ lhs2 = lhs.getVectorMapper(0, i+2), lhs3 = lhs.getVectorMapper(0, i+offset3);
+ if (Vectorizable)
+ {
+ for (Index j=0; j<alignedStart; ++j)
+ {
+ res[j] = cj.pmadd(lhs0(j), pfirst(ptmp0), res[j]);
+ res[j] = cj.pmadd(lhs1(j), pfirst(ptmp1), res[j]);
+ res[j] = cj.pmadd(lhs2(j), pfirst(ptmp2), res[j]);
+ res[j] = cj.pmadd(lhs3(j), pfirst(ptmp3), res[j]);
+ }
+ if (alignedSize>alignedStart)
+ {
+ switch(alignmentPattern)
+ {
+ case AllAligned:
+ for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Aligned,Aligned,Aligned);
+ break;
+ case EvenAligned:
+ for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Aligned);
+ break;
+ case FirstAligned:
+ {
+ Index j = alignedStart;
+ if(peels>1)
+ {
+ LhsPacket A00, A01, A02, A03, A10, A11, A12, A13;
+ ResPacket T0, T1;
+ A01 = lhs1.template load<LhsPacket, Aligned>(alignedStart-1);
+ A02 = lhs2.template load<LhsPacket, Aligned>(alignedStart-2);
+ A03 = lhs3.template load<LhsPacket, Aligned>(alignedStart-3);
+ for (; j<peeledSize; j+=peels*ResPacketSize)
+ {
+ A11 = lhs1.template load<LhsPacket, Aligned>(j-1+LhsPacketSize); palign<1>(A01,A11);
+ A12 = lhs2.template load<LhsPacket, Aligned>(j-2+LhsPacketSize); palign<2>(A02,A12);
+ A13 = lhs3.template load<LhsPacket, Aligned>(j-3+LhsPacketSize); palign<3>(A03,A13);
+ A00 = lhs0.template load<LhsPacket, Aligned>(j);
+ A10 = lhs0.template load<LhsPacket, Aligned>(j+LhsPacketSize);
+ T0 = pcj.pmadd(A00, ptmp0, pload<ResPacket>(&res[j]));
+ T1 = pcj.pmadd(A10, ptmp0, pload<ResPacket>(&res[j+ResPacketSize]));
+ T0 = pcj.pmadd(A01, ptmp1, T0);
+ A01 = lhs1.template load<LhsPacket, Aligned>(j-1+2*LhsPacketSize); palign<1>(A11,A01);
+ T0 = pcj.pmadd(A02, ptmp2, T0);
+ A02 = lhs2.template load<LhsPacket, Aligned>(j-2+2*LhsPacketSize); palign<2>(A12,A02);
+ T0 = pcj.pmadd(A03, ptmp3, T0);
+ pstore(&res[j],T0);
+ A03 = lhs3.template load<LhsPacket, Aligned>(j-3+2*LhsPacketSize); palign<3>(A13,A03);
+ T1 = pcj.pmadd(A11, ptmp1, T1);
+ T1 = pcj.pmadd(A12, ptmp2, T1);
+ T1 = pcj.pmadd(A13, ptmp3, T1);
+ pstore(&res[j+ResPacketSize],T1);
+ }
+ }
+ for (; j<alignedSize; j+=ResPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Unaligned);
+ break;
+ }
+ default:
+ for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Unaligned,Unaligned,Unaligned);
+ break;
+ }
+ }
+ }
+ for (Index j=alignedSize; j<size; ++j)
+ {
+ res[j] = cj.pmadd(lhs0(j), pfirst(ptmp0), res[j]);
+ res[j] = cj.pmadd(lhs1(j), pfirst(ptmp1), res[j]);
+ res[j] = cj.pmadd(lhs2(j), pfirst(ptmp2), res[j]);
+ res[j] = cj.pmadd(lhs3(j), pfirst(ptmp3), res[j]);
+ }
+ }
+ Index end = cols;
+ Index start = columnBound;
+ do
+ {
+ for (Index k=start; k<end; ++k)
+ {
+ RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs(k, 0));
+ const LhsScalars lhs0 = lhs.getVectorMapper(0, k);
+ if (Vectorizable)
+ {
+ for (Index j=0; j<alignedStart; ++j)
+ res[j] += cj.pmul(lhs0(j), pfirst(ptmp0));
+ if (lhs0.template aligned<LhsPacket>(alignedStart))
+ for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
+ pstore(&res[i], pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(i), ptmp0, pload<ResPacket>(&res[i])));
+ else
+ for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
+ pstore(&res[i], pcj.pmadd(lhs0.template load<LhsPacket, Unaligned>(i), ptmp0, pload<ResPacket>(&res[i])));
+ }
+ for (Index i=alignedSize; i<size; ++i)
+ res[i] += cj.pmul(lhs0(i), pfirst(ptmp0));
+ }
+ if (skipColumns)
+ {
+ start = 0;
+ end = skipColumns;
+ skipColumns = 0;
+ }
+ else
+ break;
+ } while(Vectorizable);
+ #undef _EIGEN_ACCUMULATE_PACKETS
+}
+template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
+struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>
+{
+typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+enum {
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
+ && int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
+};
+typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+typedef typename packet_traits<ResScalar>::type _ResPacket;
+typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+EIGEN_DONT_INLINE static void run(
+ Index rows, Index cols,
+ const LhsMapper& lhs,
+ const RhsMapper& rhs,
+ ResScalar* res, Index resIncr,
+ ResScalar alpha);
+};
+template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
+ Index rows, Index cols,
+ const LhsMapper& lhs,
+ const RhsMapper& rhs,
+ ResScalar* res, Index resIncr,
+ ResScalar alpha)
+{
+ eigen_internal_assert(rhs.stride()==1);
+ #ifdef _EIGEN_ACCUMULATE_PACKETS
+ #error _EIGEN_ACCUMULATE_PACKETS has already been defined
+ #endif
+ #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) {\
+ RhsPacket b = rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0); \
+ ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Alignment0>(j), b, ptmp0); \
+ ptmp1 = pcj.pmadd(lhs1.template load<LhsPacket, Alignment13>(j), b, ptmp1); \
+ ptmp2 = pcj.pmadd(lhs2.template load<LhsPacket, Alignment2>(j), b, ptmp2); \
+ ptmp3 = pcj.pmadd(lhs3.template load<LhsPacket, Alignment13>(j), b, ptmp3); }
+ conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
+ conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
+ typedef typename LhsMapper::VectorMapper LhsScalars;
+ enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 };
+ const Index rowsAtOnce = 4;
+ const Index peels = 2;
+ const Index RhsPacketAlignedMask = RhsPacketSize-1;
+ const Index LhsPacketAlignedMask = LhsPacketSize-1;
+ const Index depth = cols;
+ const Index lhsStride = lhs.stride();
+ Index alignedStart = rhs.firstAligned(depth);
+ Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
+ const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
+ const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
+ Index alignmentPattern = alignmentStep==0 ? AllAligned
+ : alignmentStep==(LhsPacketSize/2) ? EvenAligned
+ : FirstAligned;
+ const Index lhsAlignmentOffset = lhs.firstAligned(depth);
+ const Index rhsAlignmentOffset = rhs.firstAligned(rows);
+ Index skipRows = 0;
+ if( (sizeof(LhsScalar)!=sizeof(RhsScalar)) ||
+ (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == depth) ||
+ (rhsAlignmentOffset < 0) || (rhsAlignmentOffset == rows) )
+ {
+ alignedSize = 0;
+ alignedStart = 0;
+ alignmentPattern = NoneAligned;
+ }
+ else if(LhsPacketSize > 4)
+ {
+ alignmentPattern = NoneAligned;
+ }
+ else if (LhsPacketSize>1)
+ {
+ while (skipRows<LhsPacketSize &&
+ alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%LhsPacketSize))
+ ++skipRows;
+ if (skipRows==LhsPacketSize)
+ {
+ alignmentPattern = NoneAligned;
+ skipRows = 0;
+ }
+ else
+ {
+ skipRows = (std::min)(skipRows,Index(rows));
+ }
+ }
+ else if(Vectorizable)
+ {
+ alignedStart = 0;
+ alignedSize = depth;
+ alignmentPattern = AllAligned;
+ }
+ const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
+ const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
+ Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
+ for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
+ {
+ EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
+ ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
+ const LhsScalars lhs0 = lhs.getVectorMapper(i+0, 0), lhs1 = lhs.getVectorMapper(i+offset1, 0),
+ lhs2 = lhs.getVectorMapper(i+2, 0), lhs3 = lhs.getVectorMapper(i+offset3, 0);
+ if (Vectorizable)
+ {
+ ResPacket ptmp0 = pset1<ResPacket>(ResScalar(0)), ptmp1 = pset1<ResPacket>(ResScalar(0)),
+ ptmp2 = pset1<ResPacket>(ResScalar(0)), ptmp3 = pset1<ResPacket>(ResScalar(0));
+ for (Index j=0; j<alignedStart; ++j)
+ {
+ RhsScalar b = rhs(j, 0);
+ tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b);
+ tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b);
+ }
+ if (alignedSize>alignedStart)
+ {
+ switch(alignmentPattern)
+ {
+ case AllAligned:
+ for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Aligned,Aligned,Aligned);
+ break;
+ case EvenAligned:
+ for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Aligned);
+ break;
+ case FirstAligned:
+ {
+ Index j = alignedStart;
+ if (peels>1)
+ {
+ LhsPacket A01, A02, A03, A11, A12, A13;
+ A01 = lhs1.template load<LhsPacket, Aligned>(alignedStart-1);
+ A02 = lhs2.template load<LhsPacket, Aligned>(alignedStart-2);
+ A03 = lhs3.template load<LhsPacket, Aligned>(alignedStart-3);
+ for (; j<peeledSize; j+=peels*RhsPacketSize)
+ {
+ RhsPacket b = rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0);
+ A11 = lhs1.template load<LhsPacket, Aligned>(j-1+LhsPacketSize); palign<1>(A01,A11);
+ A12 = lhs2.template load<LhsPacket, Aligned>(j-2+LhsPacketSize); palign<2>(A02,A12);
+ A13 = lhs3.template load<LhsPacket, Aligned>(j-3+LhsPacketSize); palign<3>(A03,A13);
+ ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j), b, ptmp0);
+ ptmp1 = pcj.pmadd(A01, b, ptmp1);
+ A01 = lhs1.template load<LhsPacket, Aligned>(j-1+2*LhsPacketSize); palign<1>(A11,A01);
+ ptmp2 = pcj.pmadd(A02, b, ptmp2);
+ A02 = lhs2.template load<LhsPacket, Aligned>(j-2+2*LhsPacketSize); palign<2>(A12,A02);
+ ptmp3 = pcj.pmadd(A03, b, ptmp3);
+ A03 = lhs3.template load<LhsPacket, Aligned>(j-3+2*LhsPacketSize); palign<3>(A13,A03);
+ b = rhs.getVectorMapper(j+RhsPacketSize, 0).template load<RhsPacket, Aligned>(0);
+ ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j+LhsPacketSize), b, ptmp0);
+ ptmp1 = pcj.pmadd(A11, b, ptmp1);
+ ptmp2 = pcj.pmadd(A12, b, ptmp2);
+ ptmp3 = pcj.pmadd(A13, b, ptmp3);
+ }
+ }
+ for (; j<alignedSize; j+=RhsPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Unaligned);
+ break;
+ }
+ default:
+ for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Unaligned,Unaligned,Unaligned);
+ break;
+ }
+ tmp0 += predux(ptmp0);
+ tmp1 += predux(ptmp1);
+ tmp2 += predux(ptmp2);
+ tmp3 += predux(ptmp3);
+ }
+ }
+ for (Index j=alignedSize; j<depth; ++j)
+ {
+ RhsScalar b = rhs(j, 0);
+ tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b);
+ tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b);
+ }
+ res[i*resIncr] += alpha*tmp0;
+ res[(i+offset1)*resIncr] += alpha*tmp1;
+ res[(i+2)*resIncr] += alpha*tmp2;
+ res[(i+offset3)*resIncr] += alpha*tmp3;
+ }
+ Index end = rows;
+ Index start = rowBound;
+ do
+ {
+ for (Index i=start; i<end; ++i)
+ {
+ EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
+ ResPacket ptmp0 = pset1<ResPacket>(tmp0);
+ const LhsScalars lhs0 = lhs.getVectorMapper(i, 0);
+ for (Index j=0; j<alignedStart; ++j)
+ tmp0 += cj.pmul(lhs0(j), rhs(j, 0));
+ if (alignedSize>alignedStart)
+ {
+ if (lhs0.template aligned<LhsPacket>(alignedStart))
+ for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
+ ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j), rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0), ptmp0);
+ else
+ for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
+ ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Unaligned>(j), rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0), ptmp0);
+ tmp0 += predux(ptmp0);
+ }
+ for (Index j=alignedSize; j<depth; ++j)
+ tmp0 += cj.pmul(lhs0(j), rhs(j, 0));
+ res[i*resIncr] += alpha*tmp0;
+ }
+ if (skipRows)
+ {
+ start = 0;
+ end = skipRows;
+ skipRows = 0;
+ }
+ else
+ break;
+ } while(Vectorizable);
+ #undef _EIGEN_ACCUMULATE_PACKETS
+}
+}
+}
+#endif
+// end #include "src/Core/products/GeneralMatrixVector.h"
+// #include "src/Core/products/GeneralMatrixMatrix.h"
+#ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
+#define EIGEN_GENERAL_MATRIX_MATRIX_H
+namespace Eigen {
+namespace internal {
+template<typename _LhsScalar, typename _RhsScalar> class level3_blocking;
+template<
+ typename Index,
+ typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
+ typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
+struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
+{
+ typedef gebp_traits<RhsScalar,LhsScalar> Traits;
+ typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+ static EIGEN_STRONG_INLINE void run(
+ Index rows, Index cols, Index depth,
+ const LhsScalar* lhs, Index lhsStride,
+ const RhsScalar* rhs, Index rhsStride,
+ ResScalar* res, Index resStride,
+ ResScalar alpha,
+ level3_blocking<RhsScalar,LhsScalar>& blocking,
+ GemmParallelInfo<Index>* info = 0)
+ {
+ general_matrix_matrix_product<Index,
+ RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
+ LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
+ ColMajor>
+ ::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking,info);
+ }
+};
+template<
+ typename Index,
+ typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
+ typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
+struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
+{
+typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+static void run(Index rows, Index cols, Index depth,
+ const LhsScalar* _lhs, Index lhsStride,
+ const RhsScalar* _rhs, Index rhsStride,
+ ResScalar* _res, Index resStride,
+ ResScalar alpha,
+ level3_blocking<LhsScalar,RhsScalar>& blocking,
+ GemmParallelInfo<Index>* info = 0)
+{
+ typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper;
+ typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper;
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
+ LhsMapper lhs(_lhs,lhsStride);
+ RhsMapper rhs(_rhs,rhsStride);
+ ResMapper res(_res, resStride);
+ Index kc = blocking.kc();
+ Index mc = (std::min)(rows,blocking.mc());
+ Index nc = (std::min)(cols,blocking.nc());
+ gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+ gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
+ gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
+#ifdef EIGEN_HAS_OPENMP
+ if(info)
+ {
+ int tid = omp_get_thread_num();
+ int threads = omp_get_num_threads();
+ LhsScalar* blockA = blocking.blockA();
+ eigen_internal_assert(blockA!=0);
+ std::size_t sizeB = kc*nc;
+ ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0);
+ for(Index k=0; k<depth; k+=kc)
+ {
+ const Index actual_kc = (std::min)(k+kc,depth)-k;
+ pack_rhs(blockB, rhs.getSubMapper(k,0), actual_kc, nc);
+ while(info[tid].users!=0) {}
+ info[tid].users += threads;
+ pack_lhs(blockA+info[tid].lhs_start*actual_kc, lhs.getSubMapper(info[tid].lhs_start,k), actual_kc, info[tid].lhs_length);
+ info[tid].sync = k;
+ for(int shift=0; shift<threads; ++shift)
+ {
+ int i = (tid+shift)%threads;
+ if (shift>0) {
+ while(info[i].sync!=k) {
+ }
+ }
+ gebp(res.getSubMapper(info[i].lhs_start, 0), blockA+info[i].lhs_start*actual_kc, blockB, info[i].lhs_length, actual_kc, nc, alpha);
+ }
+ for(Index j=nc; j<cols; j+=nc)
+ {
+ const Index actual_nc = (std::min)(j+nc,cols)-j;
+ pack_rhs(blockB, rhs.getSubMapper(k,j), actual_kc, actual_nc);
+ gebp(res.getSubMapper(0, j), blockA, blockB, rows, actual_kc, actual_nc, alpha);
+ }
+ for(Index i=0; i<threads; ++i)
+ info[i].users -= 1;
+ }
+ }
+ else
+#endif
+ {
+ EIGEN_UNUSED_VARIABLE(info);
+ std::size_t sizeA = kc*mc;
+ std::size_t sizeB = kc*nc;
+ ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
+ ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
+ const bool pack_rhs_once = mc!=rows && kc==depth && nc==cols;
+ for(Index i2=0; i2<rows; i2+=mc)
+ {
+ const Index actual_mc = (std::min)(i2+mc,rows)-i2;
+ for(Index k2=0; k2<depth; k2+=kc)
+ {
+ const Index actual_kc = (std::min)(k2+kc,depth)-k2;
+ pack_lhs(blockA, lhs.getSubMapper(i2,k2), actual_kc, actual_mc);
+ for(Index j2=0; j2<cols; j2+=nc)
+ {
+ const Index actual_nc = (std::min)(j2+nc,cols)-j2;
+ if((!pack_rhs_once) || i2==0)
+ pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc);
+ gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha);
+ }
+ }
+ }
+ }
+}
+};
+template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
+struct gemm_functor
+{
+ gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha, BlockingType& blocking)
+ : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
+ {}
+ void initParallelSession(Index num_threads) const
+ {
+ m_blocking.initParallel(m_lhs.rows(), m_rhs.cols(), m_lhs.cols(), num_threads);
+ m_blocking.allocateA();
+ }
+ void operator() (Index row, Index rows, Index col=0, Index cols=-1, GemmParallelInfo<Index>* info=0) const
+ {
+ if(cols==-1)
+ cols = m_rhs.cols();
+ Gemm::run(rows, cols, m_lhs.cols(),
+ &m_lhs.coeffRef(row,0), m_lhs.outerStride(),
+ &m_rhs.coeffRef(0,col), m_rhs.outerStride(),
+ (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
+ m_actualAlpha, m_blocking, info);
+ }
+ typedef typename Gemm::Traits Traits;
+ protected:
+ const Lhs& m_lhs;
+ const Rhs& m_rhs;
+ Dest& m_dest;
+ Scalar m_actualAlpha;
+ BlockingType& m_blocking;
+};
+template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor=1,
+bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space;
+template<typename _LhsScalar, typename _RhsScalar>
+class level3_blocking
+{
+ typedef _LhsScalar LhsScalar;
+ typedef _RhsScalar RhsScalar;
+ protected:
+ LhsScalar* m_blockA;
+ RhsScalar* m_blockB;
+ Index m_mc;
+ Index m_nc;
+ Index m_kc;
+ public:
+ level3_blocking()
+ : m_blockA(0), m_blockB(0), m_mc(0), m_nc(0), m_kc(0)
+ {}
+ inline Index mc() const { return m_mc; }
+ inline Index nc() const { return m_nc; }
+ inline Index kc() const { return m_kc; }
+ inline LhsScalar* blockA() { return m_blockA; }
+ inline RhsScalar* blockB() { return m_blockB; }
+};
+template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
+class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true >
+ : public level3_blocking<
+ typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
+ typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
+{
+ enum {
+ Transpose = StorageOrder==RowMajor,
+ ActualRows = Transpose ? MaxCols : MaxRows,
+ ActualCols = Transpose ? MaxRows : MaxCols
+ };
+ typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
+ typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
+ typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+ enum {
+ SizeA = ActualRows * MaxDepth,
+ SizeB = ActualCols * MaxDepth
+ };
+#if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
+ EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA];
+ EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB];
+#else
+ EIGEN_ALIGN_MAX char m_staticA[SizeA * sizeof(LhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
+ EIGEN_ALIGN_MAX char m_staticB[SizeB * sizeof(RhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
+#endif
+ public:
+ gemm_blocking_space(Index , Index , Index , Index , bool )
+ {
+ this->m_mc = ActualRows;
+ this->m_nc = ActualCols;
+ this->m_kc = MaxDepth;
+#if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
+ this->m_blockA = m_staticA;
+ this->m_blockB = m_staticB;
+#else
+ this->m_blockA = reinterpret_cast<LhsScalar*>((internal::UIntPtr(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
+ this->m_blockB = reinterpret_cast<RhsScalar*>((internal::UIntPtr(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
+#endif
+ }
+ void initParallel(Index, Index, Index, Index)
+ {}
+ inline void allocateA() {}
+ inline void allocateB() {}
+ inline void allocateAll() {}
+};
+template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
+class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, false>
+ : public level3_blocking<
+ typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
+ typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
+{
+ enum {
+ Transpose = StorageOrder==RowMajor
+ };
+ typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
+ typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
+ typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+ Index m_sizeA;
+ Index m_sizeB;
+ public:
+ gemm_blocking_space(Index rows, Index cols, Index depth, Index num_threads, bool l3_blocking)
+ {
+ this->m_mc = Transpose ? cols : rows;
+ this->m_nc = Transpose ? rows : cols;
+ this->m_kc = depth;
+ if(l3_blocking)
+ {
+ computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, this->m_nc, num_threads);
+ }
+ else
+ {
+ Index n = this->m_nc;
+ computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, n, num_threads);
+ }
+ m_sizeA = this->m_mc * this->m_kc;
+ m_sizeB = this->m_kc * this->m_nc;
+ }
+ void initParallel(Index rows, Index cols, Index depth, Index num_threads)
+ {
+ this->m_mc = Transpose ? cols : rows;
+ this->m_nc = Transpose ? rows : cols;
+ this->m_kc = depth;
+ eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);
+ Index m = this->m_mc;
+ computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc, num_threads);
+ m_sizeA = this->m_mc * this->m_kc;
+ m_sizeB = this->m_kc * this->m_nc;
+ }
+ void allocateA()
+ {
+ if(this->m_blockA==0)
+ this->m_blockA = aligned_new<LhsScalar>(m_sizeA);
+ }
+ void allocateB()
+ {
+ if(this->m_blockB==0)
+ this->m_blockB = aligned_new<RhsScalar>(m_sizeB);
+ }
+ void allocateAll()
+ {
+ allocateA();
+ allocateB();
+ }
+ ~gemm_blocking_space()
+ {
+ aligned_delete(this->m_blockA, m_sizeA);
+ aligned_delete(this->m_blockB, m_sizeB);
+ }
+};
+}
+namespace internal {
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
+ : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> >
+{
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ typedef typename Lhs::Scalar LhsScalar;
+ typedef typename Rhs::Scalar RhsScalar;
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+ typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+ typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+ enum {
+ MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime)
+ };
+ typedef generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> lazyproduct;
+ template<typename Dst>
+ static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+ lazyproduct::evalTo(dst, lhs, rhs);
+ else
+ {
+ dst.setZero();
+ scaleAndAddTo(dst, lhs, rhs, Scalar(1));
+ }
+ }
+ template<typename Dst>
+ static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+ lazyproduct::addTo(dst, lhs, rhs);
+ else
+ scaleAndAddTo(dst,lhs, rhs, Scalar(1));
+ }
+ template<typename Dst>
+ static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+ lazyproduct::subTo(dst, lhs, rhs);
+ else
+ scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
+ }
+ template<typename Dest>
+ static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)
+ {
+ eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
+ if(a_lhs.cols()==0 || a_lhs.rows()==0 || a_rhs.cols()==0)
+ return;
+ typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
+ typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
+ Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
+ * RhsBlasTraits::extractScalarFactor(a_rhs);
+ typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
+ Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;
+ typedef internal::gemm_functor<
+ Scalar, Index,
+ internal::general_matrix_matrix_product<
+ Index,
+ LhsScalar, (ActualLhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
+ RhsScalar, (ActualRhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
+ (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>,
+ ActualLhsTypeCleaned, ActualRhsTypeCleaned, Dest, BlockingType> GemmFunctor;
+ BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true);
+ internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>
+ (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), a_lhs.cols(), Dest::Flags&RowMajorBit);
+ }
+};
+}
+}
+#endif
+// end #include "src/Core/products/GeneralMatrixMatrix.h"
+// #include "src/Core/VectorwiseOp.h"
+#ifndef EIGEN_PARTIAL_REDUX_H
+#define EIGEN_PARTIAL_REDUX_H
+namespace Eigen {
+template< typename MatrixType, typename MemberOp, int Direction>
+class PartialReduxExpr;
+namespace internal {
+template<typename MatrixType, typename MemberOp, int Direction>
+struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
+ : traits<MatrixType>
+{
+ typedef typename MemberOp::result_type Scalar;
+ typedef typename traits<MatrixType>::StorageKind StorageKind;
+ typedef typename traits<MatrixType>::XprKind XprKind;
+ typedef typename MatrixType::Scalar InputScalar;
+ enum {
+ RowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::RowsAtCompileTime,
+ ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime,
+ MaxRowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime,
+ Flags = RowsAtCompileTime == 1 ? RowMajorBit : 0,
+ TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime : MatrixType::ColsAtCompileTime
+ };
+};
+}
+template< typename MatrixType, typename MemberOp, int Direction>
+class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr<MatrixType, MemberOp, Direction> >::type,
+ internal::no_assignment_operator
+{
+ public:
+ typedef typename internal::dense_xpr_base<PartialReduxExpr>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(PartialReduxExpr)
+ EIGEN_DEVICE_FUNC
+ explicit PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp())
+ : m_matrix(mat), m_functor(func) {}
+ EIGEN_DEVICE_FUNC
+ Index rows() const { return (Direction==Vertical ? 1 : m_matrix.rows()); }
+ EIGEN_DEVICE_FUNC
+ Index cols() const { return (Direction==Horizontal ? 1 : m_matrix.cols()); }
+ EIGEN_DEVICE_FUNC
+ typename MatrixType::Nested nestedExpression() const { return m_matrix; }
+ EIGEN_DEVICE_FUNC
+ const MemberOp& functor() const { return m_functor; }
+ protected:
+ typename MatrixType::Nested m_matrix;
+ const MemberOp m_functor;
+};
+#define EIGEN_MEMBER_FUNCTOR(MEMBER,COST) \
+ template <typename ResultType> \
+ struct member_##MEMBER { \
+ EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER) \
+ typedef ResultType result_type; \
+ template<typename Scalar, int Size> struct Cost \
+ { enum { value = COST }; }; \
+ template<typename XprType> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+ ResultType operator()(const XprType& mat) const \
+ { return mat.MEMBER(); } \
+ }
+namespace internal {
+EIGEN_MEMBER_FUNCTOR(squaredNorm, Size * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(norm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(stableNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(blueNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(hypotNorm, (Size-1) * functor_traits<scalar_hypot_op<Scalar> >::Cost );
+EIGEN_MEMBER_FUNCTOR(sum, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(mean, (Size-1)*NumTraits<Scalar>::AddCost + NumTraits<Scalar>::MulCost);
+EIGEN_MEMBER_FUNCTOR(minCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(maxCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(all, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(any, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost);
+template <int p, typename ResultType>
+struct member_lpnorm {
+ typedef ResultType result_type;
+ template<typename Scalar, int Size> struct Cost
+ { enum { value = (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost }; };
+ EIGEN_DEVICE_FUNC member_lpnorm() {}
+ template<typename XprType>
+ EIGEN_DEVICE_FUNC inline ResultType operator()(const XprType& mat) const
+ { return mat.template lpNorm<p>(); }
+};
+template <typename BinaryOp, typename Scalar>
+struct member_redux {
+ typedef typename result_of<
+ BinaryOp(const Scalar&,const Scalar&)
+ >::type result_type;
+ template<typename _Scalar, int Size> struct Cost
+ { enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };
+ EIGEN_DEVICE_FUNC explicit member_redux(const BinaryOp func) : m_functor(func) {}
+ template<typename Derived>
+ EIGEN_DEVICE_FUNC inline result_type operator()(const DenseBase<Derived>& mat) const
+ { return mat.redux(m_functor); }
+ const BinaryOp m_functor;
+};
+}
+template<typename ExpressionType, int Direction> class VectorwiseOp
+{
+ public:
+ typedef typename ExpressionType::Scalar Scalar;
+ typedef typename ExpressionType::RealScalar RealScalar;
+ typedef Eigen::Index Index;
+ typedef typename internal::ref_selector<ExpressionType>::non_const_type ExpressionTypeNested;
+ typedef typename internal::remove_all<ExpressionTypeNested>::type ExpressionTypeNestedCleaned;
+ template<template<typename _Scalar> class Functor,
+ typename Scalar_=Scalar> struct ReturnType
+ {
+ typedef PartialReduxExpr<ExpressionType,
+ Functor<Scalar_>,
+ Direction
+ > Type;
+ };
+ template<typename BinaryOp> struct ReduxReturnType
+ {
+ typedef PartialReduxExpr<ExpressionType,
+ internal::member_redux<BinaryOp,Scalar>,
+ Direction
+ > Type;
+ };
+ enum {
+ isVertical = (Direction==Vertical) ? 1 : 0,
+ isHorizontal = (Direction==Horizontal) ? 1 : 0
+ };
+ protected:
+ typedef typename internal::conditional<isVertical,
+ typename ExpressionType::ColXpr,
+ typename ExpressionType::RowXpr>::type SubVector;
+ EIGEN_DEVICE_FUNC
+ SubVector subVector(Index i)
+ {
+ return SubVector(m_matrix.derived(),i);
+ }
+ EIGEN_DEVICE_FUNC
+ Index subVectors() const
+ { return isVertical?m_matrix.cols():m_matrix.rows(); }
+ template<typename OtherDerived> struct ExtendedType {
+ typedef Replicate<OtherDerived,
+ isVertical ? 1 : ExpressionType::RowsAtCompileTime,
+ isHorizontal ? 1 : ExpressionType::ColsAtCompileTime> Type;
+ };
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ typename ExtendedType<OtherDerived>::Type
+ extendedTo(const DenseBase<OtherDerived>& other) const
+ {
+ EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxColsAtCompileTime==1),
+ YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
+ EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxRowsAtCompileTime==1),
+ YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
+ return typename ExtendedType<OtherDerived>::Type
+ (other.derived(),
+ isVertical ? 1 : m_matrix.rows(),
+ isHorizontal ? 1 : m_matrix.cols());
+ }
+ template<typename OtherDerived> struct OppositeExtendedType {
+ typedef Replicate<OtherDerived,
+ isHorizontal ? 1 : ExpressionType::RowsAtCompileTime,
+ isVertical ? 1 : ExpressionType::ColsAtCompileTime> Type;
+ };
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ typename OppositeExtendedType<OtherDerived>::Type
+ extendedToOpposite(const DenseBase<OtherDerived>& other) const
+ {
+ EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxColsAtCompileTime==1),
+ YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
+ EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxRowsAtCompileTime==1),
+ YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
+ return typename OppositeExtendedType<OtherDerived>::Type
+ (other.derived(),
+ isHorizontal ? 1 : m_matrix.rows(),
+ isVertical ? 1 : m_matrix.cols());
+ }
+ public:
+ EIGEN_DEVICE_FUNC
+ explicit inline VectorwiseOp(ExpressionType& matrix) : m_matrix(matrix) {}
+ EIGEN_DEVICE_FUNC
+ inline const ExpressionType& _expression() const { return m_matrix; }
+ template<typename BinaryOp>
+ EIGEN_DEVICE_FUNC
+ const typename ReduxReturnType<BinaryOp>::Type
+ redux(const BinaryOp& func = BinaryOp()) const
+ { return typename ReduxReturnType<BinaryOp>::Type(_expression(), internal::member_redux<BinaryOp,Scalar>(func)); }
+ typedef typename ReturnType<internal::member_minCoeff>::Type MinCoeffReturnType;
+ typedef typename ReturnType<internal::member_maxCoeff>::Type MaxCoeffReturnType;
+ typedef typename ReturnType<internal::member_squaredNorm,RealScalar>::Type SquaredNormReturnType;
+ typedef typename ReturnType<internal::member_norm,RealScalar>::Type NormReturnType;
+ typedef typename ReturnType<internal::member_blueNorm,RealScalar>::Type BlueNormReturnType;
+ typedef typename ReturnType<internal::member_stableNorm,RealScalar>::Type StableNormReturnType;
+ typedef typename ReturnType<internal::member_hypotNorm,RealScalar>::Type HypotNormReturnType;
+ typedef typename ReturnType<internal::member_sum>::Type SumReturnType;
+ typedef typename ReturnType<internal::member_mean>::Type MeanReturnType;
+ typedef typename ReturnType<internal::member_all>::Type AllReturnType;
+ typedef typename ReturnType<internal::member_any>::Type AnyReturnType;
+ typedef PartialReduxExpr<ExpressionType, internal::member_count<Index>, Direction> CountReturnType;
+ typedef typename ReturnType<internal::member_prod>::Type ProdReturnType;
+ typedef Reverse<const ExpressionType, Direction> ConstReverseReturnType;
+ typedef Reverse<ExpressionType, Direction> ReverseReturnType;
+ template<int p> struct LpNormReturnType {
+ typedef PartialReduxExpr<ExpressionType, internal::member_lpnorm<p,RealScalar>,Direction> Type;
+ };
+ EIGEN_DEVICE_FUNC
+ const MinCoeffReturnType minCoeff() const
+ { return MinCoeffReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const MaxCoeffReturnType maxCoeff() const
+ { return MaxCoeffReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const SquaredNormReturnType squaredNorm() const
+ { return SquaredNormReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const NormReturnType norm() const
+ { return NormReturnType(_expression()); }
+ template<int p>
+ EIGEN_DEVICE_FUNC
+ const typename LpNormReturnType<p>::Type lpNorm() const
+ { return typename LpNormReturnType<p>::Type(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const BlueNormReturnType blueNorm() const
+ { return BlueNormReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const StableNormReturnType stableNorm() const
+ { return StableNormReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const HypotNormReturnType hypotNorm() const
+ { return HypotNormReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const SumReturnType sum() const
+ { return SumReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const MeanReturnType mean() const
+ { return MeanReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const AllReturnType all() const
+ { return AllReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const AnyReturnType any() const
+ { return AnyReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const CountReturnType count() const
+ { return CountReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const ProdReturnType prod() const
+ { return ProdReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const ConstReverseReturnType reverse() const
+ { return ConstReverseReturnType( _expression() ); }
+ EIGEN_DEVICE_FUNC
+ ReverseReturnType reverse()
+ { return ReverseReturnType( _expression() ); }
+ typedef Replicate<ExpressionType,(isVertical?Dynamic:1),(isHorizontal?Dynamic:1)> ReplicateReturnType;
+ EIGEN_DEVICE_FUNC
+ const ReplicateReturnType replicate(Index factor) const;
+ template<int Factor> const Replicate<ExpressionType,isVertical*Factor+isHorizontal,isHorizontal*Factor+isVertical>
+ EIGEN_DEVICE_FUNC
+ replicate(Index factor = Factor) const
+ {
+ return Replicate<ExpressionType,(isVertical?Factor:1),(isHorizontal?Factor:1)>
+ (_expression(),isVertical?factor:1,isHorizontal?factor:1);
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ ExpressionType& operator=(const DenseBase<OtherDerived>& other)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return const_cast<ExpressionType&>(m_matrix = extendedTo(other.derived()));
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ ExpressionType& operator+=(const DenseBase<OtherDerived>& other)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return const_cast<ExpressionType&>(m_matrix += extendedTo(other.derived()));
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ ExpressionType& operator-=(const DenseBase<OtherDerived>& other)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return const_cast<ExpressionType&>(m_matrix -= extendedTo(other.derived()));
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ ExpressionType& operator*=(const DenseBase<OtherDerived>& other)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ m_matrix *= extendedTo(other.derived());
+ return const_cast<ExpressionType&>(m_matrix);
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ ExpressionType& operator/=(const DenseBase<OtherDerived>& other)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ m_matrix /= extendedTo(other.derived());
+ return const_cast<ExpressionType&>(m_matrix);
+ }
+ template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
+ CwiseBinaryOp<internal::scalar_sum_op<Scalar,typename OtherDerived::Scalar>,
+ const ExpressionTypeNestedCleaned,
+ const typename ExtendedType<OtherDerived>::Type>
+ operator+(const DenseBase<OtherDerived>& other) const
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return m_matrix + extendedTo(other.derived());
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ CwiseBinaryOp<internal::scalar_difference_op<Scalar,typename OtherDerived::Scalar>,
+ const ExpressionTypeNestedCleaned,
+ const typename ExtendedType<OtherDerived>::Type>
+ operator-(const DenseBase<OtherDerived>& other) const
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return m_matrix - extendedTo(other.derived());
+ }
+ template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
+ CwiseBinaryOp<internal::scalar_product_op<Scalar>,
+ const ExpressionTypeNestedCleaned,
+ const typename ExtendedType<OtherDerived>::Type>
+ EIGEN_DEVICE_FUNC
+ operator*(const DenseBase<OtherDerived>& other) const
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return m_matrix * extendedTo(other.derived());
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
+ const ExpressionTypeNestedCleaned,
+ const typename ExtendedType<OtherDerived>::Type>
+ operator/(const DenseBase<OtherDerived>& other) const
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return m_matrix / extendedTo(other.derived());
+ }
+ EIGEN_DEVICE_FUNC
+ CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
+ const ExpressionTypeNestedCleaned,
+ const typename OppositeExtendedType<typename ReturnType<internal::member_norm,RealScalar>::Type>::Type>
+ normalized() const { return m_matrix.cwiseQuotient(extendedToOpposite(this->norm())); }
+ EIGEN_DEVICE_FUNC void normalize() {
+ m_matrix = this->normalized();
+ }
+ EIGEN_DEVICE_FUNC inline void reverseInPlace();
+ typedef Homogeneous<ExpressionType,Direction> HomogeneousReturnType;
+ EIGEN_DEVICE_FUNC
+ HomogeneousReturnType homogeneous() const;
+ typedef typename ExpressionType::PlainObject CrossReturnType;
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ const CrossReturnType cross(const MatrixBase<OtherDerived>& other) const;
+ enum {
+ HNormalized_Size = Direction==Vertical ? internal::traits<ExpressionType>::RowsAtCompileTime
+ : internal::traits<ExpressionType>::ColsAtCompileTime,
+ HNormalized_SizeMinusOne = HNormalized_Size==Dynamic ? Dynamic : HNormalized_Size-1
+ };
+ typedef Block<const ExpressionType,
+ Direction==Vertical ? int(HNormalized_SizeMinusOne)
+ : int(internal::traits<ExpressionType>::RowsAtCompileTime),
+ Direction==Horizontal ? int(HNormalized_SizeMinusOne)
+ : int(internal::traits<ExpressionType>::ColsAtCompileTime)>
+ HNormalized_Block;
+ typedef Block<const ExpressionType,
+ Direction==Vertical ? 1 : int(internal::traits<ExpressionType>::RowsAtCompileTime),
+ Direction==Horizontal ? 1 : int(internal::traits<ExpressionType>::ColsAtCompileTime)>
+ HNormalized_Factors;
+ typedef CwiseBinaryOp<internal::scalar_quotient_op<typename internal::traits<ExpressionType>::Scalar>,
+ const HNormalized_Block,
+ const Replicate<HNormalized_Factors,
+ Direction==Vertical ? HNormalized_SizeMinusOne : 1,
+ Direction==Horizontal ? HNormalized_SizeMinusOne : 1> >
+ HNormalizedReturnType;
+ EIGEN_DEVICE_FUNC
+ const HNormalizedReturnType hnormalized() const;
+ protected:
+ ExpressionTypeNested m_matrix;
+};
+template<typename Derived>
+inline typename DenseBase<Derived>::ColwiseReturnType
+DenseBase<Derived>::colwise()
+{
+ return ColwiseReturnType(derived());
+}
+template<typename Derived>
+inline typename DenseBase<Derived>::RowwiseReturnType
+DenseBase<Derived>::rowwise()
+{
+ return RowwiseReturnType(derived());
+}
+}
+#endif
+// end #include "src/Core/VectorwiseOp.h"
+// #include "src/Core/Replicate.h"
+#ifndef EIGEN_REPLICATE_H
+#define EIGEN_REPLICATE_H
+namespace Eigen {
+namespace internal {
+template<typename MatrixType,int RowFactor,int ColFactor>
+struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
+ : traits<MatrixType>
+{
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename traits<MatrixType>::StorageKind StorageKind;
+ typedef typename traits<MatrixType>::XprKind XprKind;
+ typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
+ typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
+ enum {
+ RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic
+ ? Dynamic
+ : RowFactor * MatrixType::RowsAtCompileTime,
+ ColsAtCompileTime = ColFactor==Dynamic || int(MatrixType::ColsAtCompileTime)==Dynamic
+ ? Dynamic
+ : ColFactor * MatrixType::ColsAtCompileTime,
+ MaxRowsAtCompileTime = RowsAtCompileTime,
+ MaxColsAtCompileTime = ColsAtCompileTime,
+ IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1
+ : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0
+ : (MatrixType::Flags & RowMajorBit) ? 1 : 0,
+ Flags = IsRowMajor ? RowMajorBit : 0
+ };
+};
+}
+template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
+ : public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type
+{
+ typedef typename internal::traits<Replicate>::MatrixTypeNested MatrixTypeNested;
+ typedef typename internal::traits<Replicate>::_MatrixTypeNested _MatrixTypeNested;
+ public:
+ typedef typename internal::dense_xpr_base<Replicate>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Replicate)
+ typedef typename internal::remove_all<MatrixType>::type NestedExpression;
+ template<typename OriginalMatrixType>
+ EIGEN_DEVICE_FUNC
+ inline explicit Replicate(const OriginalMatrixType& matrix)
+ : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
+ {
+ EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
+ THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
+ eigen_assert(RowFactor!=Dynamic && ColFactor!=Dynamic);
+ }
+ template<typename OriginalMatrixType>
+ EIGEN_DEVICE_FUNC
+ inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor)
+ : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
+ {
+ EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
+ THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
+ }
+ EIGEN_DEVICE_FUNC
+ inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); }
+ EIGEN_DEVICE_FUNC
+ inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); }
+ EIGEN_DEVICE_FUNC
+ const _MatrixTypeNested& nestedExpression() const
+ {
+ return m_matrix;
+ }
+ protected:
+ MatrixTypeNested m_matrix;
+ const internal::variable_if_dynamic<Index, RowFactor> m_rowFactor;
+ const internal::variable_if_dynamic<Index, ColFactor> m_colFactor;
+};
+template<typename Derived>
+template<int RowFactor, int ColFactor>
+const Replicate<Derived,RowFactor,ColFactor>
+DenseBase<Derived>::replicate() const
+{
+ return Replicate<Derived,RowFactor,ColFactor>(derived());
+}
+template<typename ExpressionType, int Direction>
+const typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType
+VectorwiseOp<ExpressionType,Direction>::replicate(Index factor) const
+{
+ return typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType
+ (_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);
+}
+}
+#endif
+// end #include "src/Core/Replicate.h"
+// #include "src/Core/ArrayWrapper.h"
+#ifndef EIGEN_ARRAYWRAPPER_H
+#define EIGEN_ARRAYWRAPPER_H
+namespace Eigen {
+namespace internal {
+template<typename ExpressionType>
+struct traits<ArrayWrapper<ExpressionType> >
+ : public traits<typename remove_all<typename ExpressionType::Nested>::type >
+{
+ typedef ArrayXpr XprKind;
+ enum {
+ Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
+ LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
+ Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
+ };
+};
+}
+template<typename ExpressionType>
+class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
+{
+ public:
+ typedef ArrayBase<ArrayWrapper> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)
+ typedef typename internal::remove_all<ExpressionType>::type NestedExpression;
+ typedef typename internal::conditional<
+ internal::is_lvalue<ExpressionType>::value,
+ Scalar,
+ const Scalar
+ >::type ScalarWithConstIfNotLvalue;
+ typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
+ using Base::coeffRef;
+ EIGEN_DEVICE_FUNC
+ explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
+ EIGEN_DEVICE_FUNC
+ inline Index rows() const { return m_expression.rows(); }
+ EIGEN_DEVICE_FUNC
+ inline Index cols() const { return m_expression.cols(); }
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const { return m_expression.outerStride(); }
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const { return m_expression.innerStride(); }
+ EIGEN_DEVICE_FUNC
+ inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar* data() const { return m_expression.data(); }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index rowId, Index colId) const
+ {
+ return m_expression.coeffRef(rowId, colId);
+ }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index index) const
+ {
+ return m_expression.coeffRef(index);
+ }
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC
+ inline void evalTo(Dest& dst) const { dst = m_expression; }
+ const typename internal::remove_all<NestedExpressionType>::type&
+ EIGEN_DEVICE_FUNC
+ nestedExpression() const
+ {
+ return m_expression;
+ }
+ EIGEN_DEVICE_FUNC
+ void resize(Index newSize) { m_expression.resize(newSize); }
+ EIGEN_DEVICE_FUNC
+ void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
+ protected:
+ NestedExpressionType m_expression;
+};
+}
+#endif
+// end #include "src/Core/ArrayWrapper.h"
+// #include "src/Core/SelfCwiseBinaryOp.h"
+namespace Eigen {
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
+{
+ internal::call_assignment(this->derived(), PlainObject::Constant(rows(), cols(),other), internal::div_assign_op<Scalar, Scalar>());
+ return derived();
+}
+}
+// end #include "src/Core/SelfCwiseBinaryOp.h"
diff --git a/compiler/nnc/cmake/config.cmake b/compiler/nnc/cmake/config.cmake
new file mode 100644
index 000000000..d9a1288dc
--- /dev/null
+++ b/compiler/nnc/cmake/config.cmake
@@ -0,0 +1,55 @@
+#
+# definition of directories of all nnc component
+#
+set(NNC_DRIVER_DIR ${NNC_ROOT_SRC_DIR}/driver)
+set(NNC_SOFT_BACKEND_DIR ${NNC_ROOT_SRC_DIR}/backends/soft_backend)
+set(NNC_ACL_BACKEND_DIR ${NNC_ROOT_SRC_DIR}/backends/acl_soft_backend)
+set(NNC_INTERPRETER_DIR ${NNC_ROOT_SRC_DIR}/backends/interpreter)
+set(NNC_SUPPORT_DIR ${NNC_ROOT_SRC_DIR}/support)
+set(NNC_PASS_DIR ${NNC_ROOT_SRC_DIR}/pass)
+
+#
+# Other additional useful cmake variables
+#
+set(NNC_ENABLE_UNITTEST ${ENABLE_TEST})
+set(NNC_TARGET_EXECUTABLE nnc) # nnc main target
+
+set(NNC_INSTALL_PATH ${CMAKE_INSTALL_PREFIX}) # root path of installation directory
+set(NNC_INSTALL_BIN_PATH ${NNC_INSTALL_PATH}/bin)
+set(NNC_INSTALL_LIB_PATH ${NNC_INSTALL_PATH}/lib) # directory that contains other directories with shared library
+
+#
+# find necessary packages
+#
+find_package(HDF5 COMPONENTS CXX QUIET)
+# defines if hdf5 package was found
+if(HDF5_FOUND)
+ set(NNC_HDF5_SUPPORTED ON)
+else()
+ message(WARNING "HDF5 not found, functionality of some nnc components will be disabled")
+ set(NNC_HDF5_SUPPORTED OFF)
+endif()
+
+if(TARGET mir_caffe2_importer)
+ set(NNC_FRONTEND_CAFFE2_ENABLED ON)
+else()
+ set(NNC_FRONTEND_CAFFE2_ENABLED OFF)
+endif()
+
+if(TARGET mir_caffe_importer)
+ set(NNC_FRONTEND_CAFFE_ENABLED ON)
+else()
+ set(NNC_FRONTEND_CAFFE_ENABLED OFF)
+endif()
+
+if(TARGET mir_onnx_importer)
+ set(NNC_FRONTEND_ONNX_ENABLED ON)
+else()
+ set(NNC_FRONTEND_ONNX_ENABLED OFF)
+endif()
+
+if(TARGET mir_tflite_importer)
+ set(NNC_FRONTEND_TFLITE_ENABLED ON)
+else()
+ set(NNC_FRONTEND_TFLITE_ENABLED OFF)
+endif()
diff --git a/compiler/nnc/cmake/utils.cmake b/compiler/nnc/cmake/utils.cmake
new file mode 100644
index 000000000..ac5b72aa9
--- /dev/null
+++ b/compiler/nnc/cmake/utils.cmake
@@ -0,0 +1,65 @@
+# generate sources files by *.def files for soft backend
+function(nnc_make_generated_sources DEF_SOURCES OUT_DIR GEN_SOURCES)
+ set(GEN_OUT "")
+ foreach(file IN LISTS DEF_SOURCES)
+ get_filename_component(file_name ${file} NAME_WE)
+ set(out_file "${OUT_DIR}/${file_name}.generated.h")
+ list(APPEND GEN_OUT "${out_file}")
+ add_custom_command(
+ OUTPUT ${out_file}
+ COMMAND def2src ${OUT_DIR} ${file}
+ DEPENDS def2src ${file}
+ )
+ endforeach()
+ set(${GEN_SOURCES} ${GEN_OUT} PARENT_SCOPE)
+endfunction()
+
+function(nnc_set_installation_properties TARG)
+ # TODO when we upgrade our cmake to version 3.8 we'll need to use
+ # BUILD_RPATH variable instead of CMAKE_BUILD_WITH_INSTALL_RPATH here
+
+ # set external RPATHs
+ set_target_properties(${TARG} PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE)
+ # use paths from build directoris
+ set_target_properties(${TARG} PROPERTIES CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
+ # set RPATH to core part of nnc
+ set_target_properties(${TARG} PROPERTIES INSTALL_RPATH ${NNC_INSTALL_LIB_PATH})
+endfunction()
+
+# install nnc libraries
+function(nnc_install_library LIB)
+ install(TARGETS ${LIB} DESTINATION ${NNC_INSTALL_LIB_PATH})
+ nnc_set_installation_properties(${LIB})
+endfunction()
+
+# install nnc executable
+function(nnc_install_executable BIN)
+ install(TARGETS ${BIN} DESTINATION ${NNC_INSTALL_BIN_PATH})
+ nnc_set_installation_properties(${BIN})
+endfunction()
+
+# add nnc library as target
+function(nnc_add_library)
+ add_library(${ARGV})
+ target_link_libraries(${ARGV0} PRIVATE nncc_common)
+ target_link_libraries(${ARGV0} PUBLIC nncc_coverage)
+
+ get_target_property(LIBS ${NNC_TARGET_EXECUTABLE} LINK_LIBRARIES)
+ target_include_directories(${ARGV0} PUBLIC ${NNC_ROOT_SRC_DIR}/include ${NNC_ROOT_BIN_DIR}/include)
+ if(LIBS MATCHES NOTFOUND)
+ set(LIBS "")
+ endif()
+ list(APPEND LIBS ${ARGV0})
+ set_target_properties(${NNC_TARGET_EXECUTABLE} PROPERTIES LINK_LIBRARIES "${LIBS}")
+endfunction()
+
+# function to add nnc unit test
+function(nnc_add_unit_test)
+ if(ENABLE_TEST)
+ add_executable(${ARGV})
+ target_link_libraries(${ARGV0} gtest_main)
+ add_test(${ARGV0} ${ARGV0})
+ endif(ENABLE_TEST)
+ add_dependencies(nnc_unit_tests ${ARGV0})
+ target_include_directories(${ARGV0} PUBLIC ${NNC_ROOT_SRC_DIR}/include ${NNC_ROOT_BIN_DIR}/include)
+endfunction()
diff --git a/compiler/nnc/doxygen.config b/compiler/nnc/doxygen.config
new file mode 100644
index 000000000..497cee340
--- /dev/null
+++ b/compiler/nnc/doxygen.config
@@ -0,0 +1,2427 @@
+# Doxyfile 1.8.11
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a double hash (##) is considered a comment and is placed in
+# front of the TAG it is preceding.
+#
+# All text after a single hash (#) is considered a comment and will be ignored.
+# The format is:
+# TAG = value [value, ...]
+# For lists, items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (\" \").
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all text
+# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv
+# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv
+# for the list of possible encodings.
+# The default value is: UTF-8.
+
+DOXYFILE_ENCODING = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
+# double-quotes, unless you are using Doxywizard) that should identify the
+# project for which the documentation is generated. This name is used in the
+# title of most generated pages and in a few other places.
+# The default value is: My Project.
+
+PROJECT_NAME = "nnc"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
+# could be handy for archiving the generated documentation or if some version
+# control system is used.
+
+PROJECT_NUMBER =
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer a
+# quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF =
+
+# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
+# in the documentation. The maximum height of the logo should not exceed 55
+# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy
+# the logo to the output directory.
+
+PROJECT_LOGO =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
+# into which the generated documentation will be written. If a relative path is
+# entered, it will be relative to the location where doxygen was started. If
+# left blank the current directory will be used.
+
+OUTPUT_DIRECTORY =
+
+# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
+# directories (in 2 levels) under the output directory of each output format and
+# will distribute the generated files over these directories. Enabling this
+# option can be useful when feeding doxygen a huge amount of source files, where
+# putting all generated files in the same directory would otherwise causes
+# performance problems for the file system.
+# The default value is: NO.
+
+CREATE_SUBDIRS = NO
+
+# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII
+# characters to appear in the names of generated files. If set to NO, non-ASCII
+# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode
+# U+3044.
+# The default value is: NO.
+
+ALLOW_UNICODE_NAMES = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
+# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
+# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
+# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
+# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
+# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
+# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
+# Ukrainian and Vietnamese.
+# The default value is: English.
+
+OUTPUT_LANGUAGE = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
+# descriptions after the members that are listed in the file and class
+# documentation (similar to Javadoc). Set to NO to disable this.
+# The default value is: YES.
+
+BRIEF_MEMBER_DESC = YES
+
+# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief
+# description of a member or function before the detailed description
+#
+# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+# The default value is: YES.
+
+REPEAT_BRIEF = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator that is
+# used to form the text in various listings. Each string in this list, if found
+# as the leading text of the brief description, will be stripped from the text
+# and the result, after processing the whole list, is used as the annotated
+# text. Otherwise, the brief description is used as-is. If left blank, the
+# following values are used ($name is automatically replaced with the name of
+# the entity):The $name class, The $name widget, The $name file, is, provides,
+# specifies, contains, represents, a, an and the.
+
+ABBREVIATE_BRIEF =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# doxygen will generate a detailed section even if there is only a brief
+# description.
+# The default value is: NO.
+
+ALWAYS_DETAILED_SEC = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+# The default value is: NO.
+
+INLINE_INHERITED_MEMB = NO
+
+# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path
+# before files name in the file list and in the header files. If set to NO the
+# shortest path that makes the file name unique will be used
+# The default value is: YES.
+
+FULL_PATH_NAMES = YES
+
+# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
+# Stripping is only done if one of the specified strings matches the left-hand
+# part of the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the path to
+# strip.
+#
+# Note that you can specify absolute paths here, but also relative paths, which
+# will be relative from the directory where doxygen is started.
+# This tag requires that the tag FULL_PATH_NAMES is set to YES.
+
+STRIP_FROM_PATH =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
+# path mentioned in the documentation of a class, which tells the reader which
+# header file to include in order to use a class. If left blank only the name of
+# the header file containing the class definition is used. Otherwise one should
+# specify the list of include paths that are normally passed to the compiler
+# using the -I flag.
+
+STRIP_FROM_INC_PATH =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
+# less readable) file names. This can be useful is your file systems doesn't
+# support long names like on DOS, Mac, or CD-ROM.
+# The default value is: NO.
+
+SHORT_NAMES = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
+# first line (until the first dot) of a Javadoc-style comment as the brief
+# description. If set to NO, the Javadoc-style will behave just like regular Qt-
+# style comments (thus requiring an explicit @brief command for a brief
+# description.)
+# The default value is: NO.
+
+JAVADOC_AUTOBRIEF = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
+# line (until the first dot) of a Qt-style comment as the brief description. If
+# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
+# requiring an explicit \brief command for a brief description.)
+# The default value is: NO.
+
+QT_AUTOBRIEF = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
+# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
+# a brief description. This used to be the default behavior. The new default is
+# to treat a multi-line C++ comment block as a detailed description. Set this
+# tag to YES if you prefer the old behavior instead.
+#
+# Note that setting this tag to YES also means that rational rose comments are
+# not recognized any more.
+# The default value is: NO.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
+# documentation from any documented member that it re-implements.
+# The default value is: YES.
+
+INHERIT_DOCS = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new
+# page for each member. If set to NO, the documentation of a member will be part
+# of the file/class/namespace that contains it.
+# The default value is: NO.
+
+SEPARATE_MEMBER_PAGES = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
+# uses this value to replace tabs by spaces in code fragments.
+# Minimum value: 1, maximum value: 16, default value: 4.
+
+TAB_SIZE = 4
+
+# This tag can be used to specify a number of aliases that act as commands in
+# the documentation. An alias has the form:
+# name=value
+# For example adding
+# "sideeffect=@par Side Effects:\n"
+# will allow you to put the command \sideeffect (or @sideeffect) in the
+# documentation, which will result in a user-defined paragraph with heading
+# "Side Effects:". You can put \n's in the value part of an alias to insert
+# newlines.
+
+ALIASES =
+
+# This tag can be used to specify a number of word-keyword mappings (TCL only).
+# A mapping has the form "name=value". For example adding "class=itcl::class"
+# will allow you to use the command class in the itcl::class meaning.
+
+TCL_SUBST =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
+# only. Doxygen will then generate output that is more tailored for C. For
+# instance, some of the names that are used will be different. The list of all
+# members will be omitted, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_FOR_C = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
+# Python sources only. Doxygen will then generate output that is more tailored
+# for that language. For instance, namespaces will be presented as packages,
+# qualified scopes will look different, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_JAVA = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources. Doxygen will then generate output that is tailored for Fortran.
+# The default value is: NO.
+
+OPTIMIZE_FOR_FORTRAN = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for VHDL.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_VHDL = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given
+# extension. Doxygen has a built-in mapping, but you can override or extend it
+# using this tag. The format is ext=language, where ext is a file extension, and
+# language is one of the parsers supported by doxygen: IDL, Java, Javascript,
+# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran:
+# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran:
+# Fortran. In the later case the parser tries to guess whether the code is fixed
+# or free formatted code, this is the default for Fortran type files), VHDL. For
+# instance to make doxygen treat .inc files as Fortran files (default is PHP),
+# and .f files as C (default is Fortran), use: inc=Fortran f=C.
+#
+# Note: For files without extension you can use no_extension as a placeholder.
+#
+# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
+# the files are not read by doxygen.
+
+EXTENSION_MAPPING =
+
+# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
+# according to the Markdown format, which allows for more readable
+# documentation. See http://daringfireball.net/projects/markdown/ for details.
+# The output of markdown processing is further processed by doxygen, so you can
+# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
+# case of backward compatibilities issues.
+# The default value is: YES.
+
+MARKDOWN_SUPPORT = YES
+
+# When enabled doxygen tries to link words that correspond to documented
+# classes, or namespaces to their corresponding documentation. Such a link can
+# be prevented in individual cases by putting a % sign in front of the word or
+# globally by setting AUTOLINK_SUPPORT to NO.
+# The default value is: YES.
+
+AUTOLINK_SUPPORT = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should set this
+# tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string);
+# versus func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+# The default value is: NO.
+
+BUILTIN_STL_SUPPORT = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+# The default value is: NO.
+
+CPP_CLI_SUPPORT = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
+# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen
+# will parse them like normal C++ but will assume all classes use public instead
+# of private inheritance when no explicit protection keyword is present.
+# The default value is: NO.
+
+SIP_SUPPORT = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate
+# getter and setter methods for a property. Setting this option to YES will make
+# doxygen to replace the get and set methods by a property in the documentation.
+# This will only work if the methods are indeed getting or setting a simple
+# type. If this is not the case, or you want to show the methods anyway, you
+# should set this option to NO.
+# The default value is: YES.
+
+IDL_PROPERTY_SUPPORT = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+# The default value is: NO.
+
+DISTRIBUTE_GROUP_DOC = NO
+
+# If one adds a struct or class to a group and this option is enabled, then also
+# any nested class or struct is added to the same group. By default this option
+# is disabled and one has to add nested compounds explicitly via \ingroup.
+# The default value is: NO.
+
+GROUP_NESTED_COMPOUNDS = NO
+
+# Set the SUBGROUPING tag to YES to allow class member groups of the same type
+# (for instance a group of public functions) to be put as a subgroup of that
+# type (e.g. under the Public Functions section). Set it to NO to prevent
+# subgrouping. Alternatively, this can be done per class using the
+# \nosubgrouping command.
+# The default value is: YES.
+
+SUBGROUPING = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
+# are shown inside the group in which they are included (e.g. using \ingroup)
+# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
+# and RTF).
+#
+# Note that this feature does not work in combination with
+# SEPARATE_MEMBER_PAGES.
+# The default value is: NO.
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
+# with only public data fields or simple typedef fields will be shown inline in
+# the documentation of the scope in which they are defined (i.e. file,
+# namespace, or group documentation), provided this scope is documented. If set
+# to NO, structs, classes, and unions are shown on a separate page (for HTML and
+# Man pages) or section (for LaTeX and RTF).
+# The default value is: NO.
+
+INLINE_SIMPLE_STRUCTS = NO
+
+# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
+# enum is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically be
+# useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+# The default value is: NO.
+
+TYPEDEF_HIDES_STRUCT = NO
+
+# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
+# cache is used to resolve symbols given their name and scope. Since this can be
+# an expensive process and often the same symbol appears multiple times in the
+# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
+# doxygen will become slower. If the cache is too large, memory is wasted. The
+# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
+# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
+# symbols. At the end of a run doxygen will report the cache usage and suggest
+# the optimal cache size from a speed point of view.
+# Minimum value: 0, maximum value: 9, default value: 0.
+
+LOOKUP_CACHE_SIZE = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
+# documentation are documented, even if no documentation was available. Private
+# class members and static file members will be hidden unless the
+# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
+# Note: This will also disable the warnings about undocumented members that are
+# normally produced when WARNINGS is set to YES.
+# The default value is: NO.
+
+EXTRACT_ALL = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
+# be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIVATE = YES
+
+# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
+# scope will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PACKAGE = NO
+
+# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
+# included in the documentation.
+# The default value is: NO.
+
+EXTRACT_STATIC = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
+# locally in source files will be included in the documentation. If set to NO,
+# only classes defined in header files are included. Does not have any effect
+# for Java sources.
+# The default value is: YES.
+
+EXTRACT_LOCAL_CLASSES = YES
+
+# This flag is only useful for Objective-C code. If set to YES, local methods,
+# which are defined in the implementation section but not in the interface are
+# included in the documentation. If set to NO, only methods in the interface are
+# included.
+# The default value is: NO.
+
+EXTRACT_LOCAL_METHODS = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base name of
+# the file that contains the anonymous namespace. By default anonymous namespace
+# are hidden.
+# The default value is: NO.
+
+EXTRACT_ANON_NSPACES = YES
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
+# undocumented members inside documented classes or files. If set to NO these
+# members will be included in the various overviews, but no documentation
+# section is generated. This option has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_MEMBERS = YES
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy. If set
+# to NO, these classes will be included in the various overviews. This option
+# has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_CLASSES = YES
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
+# (class|struct|union) declarations. If set to NO, these declarations will be
+# included in the documentation.
+# The default value is: NO.
+
+HIDE_FRIEND_COMPOUNDS = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
+# documentation blocks found inside the body of a function. If set to NO, these
+# blocks will be appended to the function's detailed documentation block.
+# The default value is: NO.
+
+HIDE_IN_BODY_DOCS = NO
+
+# The INTERNAL_DOCS tag determines if documentation that is typed after a
+# \internal command is included. If the tag is set to NO then the documentation
+# will be excluded. Set it to YES to include the internal documentation.
+# The default value is: NO.
+
+INTERNAL_DOCS = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
+# names in lower-case letters. If set to YES, upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+# The default value is: system dependent.
+
+CASE_SENSE_NAMES = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
+# their full class and namespace scopes in the documentation. If set to YES, the
+# scope will be hidden.
+# The default value is: NO.
+
+HIDE_SCOPE_NAMES = NO
+
+# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
+# append additional text to a page's title, such as Class Reference. If set to
+# YES the compound reference will be hidden.
+# The default value is: NO.
+
+HIDE_COMPOUND_REFERENCE= NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
+# the files that are included by a file in the documentation of that file.
+# The default value is: YES.
+
+SHOW_INCLUDE_FILES = YES
+
+# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
+# grouped member an include statement to the documentation, telling the reader
+# which file to include in order to use the member.
+# The default value is: NO.
+
+SHOW_GROUPED_MEMB_INC = NO
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
+# files with double quotes in the documentation rather than with sharp brackets.
+# The default value is: NO.
+
+FORCE_LOCAL_INCLUDES = NO
+
+# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
+# documentation for inline members.
+# The default value is: YES.
+
+INLINE_INFO = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
+# (detailed) documentation of file and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order.
+# The default value is: YES.
+
+SORT_MEMBER_DOCS = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
+# descriptions of file, namespace and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order. Note that
+# this will also influence the order of the classes in the class list.
+# The default value is: NO.
+
+SORT_BRIEF_DOCS = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
+# (brief and detailed) documentation of class members so that constructors and
+# destructors are listed first. If set to NO the constructors will appear in the
+# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
+# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
+# member documentation.
+# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
+# detailed member documentation.
+# The default value is: NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
+# of group names into alphabetical order. If set to NO the group names will
+# appear in their defined order.
+# The default value is: NO.
+
+SORT_GROUP_NAMES = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
+# fully-qualified names, including namespaces. If set to NO, the class list will
+# be sorted only by class name, not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the alphabetical
+# list.
+# The default value is: NO.
+
+SORT_BY_SCOPE_NAME = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
+# type resolution of all parameters of a function it will reject a match between
+# the prototype and the implementation of a member function even if there is
+# only one candidate or it is obvious which candidate to choose by doing a
+# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
+# accept a match between prototype and implementation in such cases.
+# The default value is: NO.
+
+STRICT_PROTO_MATCHING = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
+# list. This list is created by putting \todo commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TODOLIST = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
+# list. This list is created by putting \test commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TESTLIST = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
+# list. This list is created by putting \bug commands in the documentation.
+# The default value is: YES.
+
+GENERATE_BUGLIST = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
+# the deprecated list. This list is created by putting \deprecated commands in
+# the documentation.
+# The default value is: YES.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional documentation
+# sections, marked by \if <section_label> ... \endif and \cond <section_label>
+# ... \endcond blocks.
+
+ENABLED_SECTIONS =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
+# initial value of a variable or macro / define can have for it to appear in the
+# documentation. If the initializer consists of more lines than specified here
+# it will be hidden. Use a value of 0 to hide initializers completely. The
+# appearance of the value of individual variables and macros / defines can be
+# controlled using \showinitializer or \hideinitializer command in the
+# documentation regardless of this setting.
+# Minimum value: 0, maximum value: 10000, default value: 30.
+
+MAX_INITIALIZER_LINES = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
+# the bottom of the documentation of classes and structs. If set to YES, the
+# list will mention the files that were used to generate the documentation.
+# The default value is: YES.
+
+SHOW_USED_FILES = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
+# will remove the Files entry from the Quick Index and from the Folder Tree View
+# (if specified).
+# The default value is: YES.
+
+SHOW_FILES = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
+# page. This will remove the Namespaces entry from the Quick Index and from the
+# Folder Tree View (if specified).
+# The default value is: YES.
+
+SHOW_NAMESPACES = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command command input-file, where command is the value of the
+# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
+# by doxygen. Whatever the program writes to standard output is used as the file
+# version. For an example see the documentation.
+
+FILE_VERSION_FILTER =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. To create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option. You can
+# optionally specify a file name after the option, if omitted DoxygenLayout.xml
+# will be used as the name of the layout file.
+#
+# Note that if you run doxygen from a directory containing a file called
+# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
+# tag is left empty.
+
+LAYOUT_FILE =
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
+# the reference definitions. This must be a list of .bib files. The .bib
+# extension is automatically appended if omitted. This requires the bibtex tool
+# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info.
+# For LaTeX the style of the bibliography can be controlled using
+# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
+# search path. See also \cite for info how to create references.
+
+CITE_BIB_FILES =
+
+#---------------------------------------------------------------------------
+# Configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated to
+# standard output by doxygen. If QUIET is set to YES this implies that the
+# messages are off.
+# The default value is: NO.
+
+QUIET = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
+# this implies that the warnings are on.
+#
+# Tip: Turn warnings on while writing the documentation.
+# The default value is: YES.
+
+WARNINGS = YES
+
+# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
+# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: YES.
+
+WARN_IF_UNDOCUMENTED = YES
+
+# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some parameters
+# in a documented function, or documenting parameters that don't exist or using
+# markup commands wrongly.
+# The default value is: YES.
+
+WARN_IF_DOC_ERROR = YES
+
+# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
+# are documented, but have no documentation for their parameters or return
+# value. If set to NO, doxygen will only warn about wrong or incomplete
+# parameter documentation, but not about the absence of documentation.
+# The default value is: NO.
+
+WARN_NO_PARAMDOC = NO
+
+# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
+# a warning is encountered.
+# The default value is: NO.
+
+WARN_AS_ERROR = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that doxygen
+# can produce. The string should contain the $file, $line, and $text tags, which
+# will be replaced by the file and line number from which the warning originated
+# and the warning text. Optionally the format may contain $version, which will
+# be replaced by the version of the file (if it could be obtained via
+# FILE_VERSION_FILTER)
+# The default value is: $file:$line: $text.
+
+WARN_FORMAT = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning and error
+# messages should be written. If left blank the output is written to standard
+# error (stderr).
+
+WARN_LOGFILE =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag is used to specify the files and/or directories that contain
+# documented source files. You may enter file names like myfile.cpp or
+# directories like /usr/src/myproject. Separate the files or directories with
+# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
+# Note: If this tag is empty the current directory is searched.
+
+INPUT =
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
+# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
+# documentation (see: http://www.gnu.org/software/libiconv) for the list of
+# possible encodings.
+# The default value is: UTF-8.
+
+INPUT_ENCODING = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
+# *.h) to filter out the source-files in the directories.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# read by doxygen.
+#
+# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
+# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
+# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
+# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f, *.for, *.tcl,
+# *.vhd, *.vhdl, *.ucf, *.qsf, *.as and *.js.
+
+FILE_PATTERNS = *.cpp *.c *.cc *.cxx *.h *.hpp
+
+# The RECURSIVE tag can be used to specify whether or not subdirectories should
+# be searched for input files as well.
+# The default value is: NO.
+
+RECURSIVE = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+#
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+
+EXCLUDE =
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+# The default value is: NO.
+
+EXCLUDE_SYMLINKS = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories for example use the pattern */test/*
+
+EXCLUDE_PATTERNS =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories use the pattern */test/*
+
+EXCLUDE_SYMBOLS =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or directories
+# that contain example code fragments that are included (see the \include
+# command).
+
+EXAMPLE_PATH =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
+# *.h) to filter out the source-files in the directories. If left blank all
+# files are included.
+
+EXAMPLE_PATTERNS =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude commands
+# irrespective of the value of the RECURSIVE tag.
+# The default value is: NO.
+
+EXAMPLE_RECURSIVE = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or directories
+# that contain images that are to be included in the documentation (see the
+# \image command).
+
+IMAGE_PATH =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command:
+#
+# <filter> <input-file>
+#
+# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the
+# name of an input file. Doxygen will then use the output that the filter
+# program writes to standard output. If FILTER_PATTERNS is specified, this tag
+# will be ignored.
+#
+# Note that the filter must not add or remove lines; it is applied before the
+# code is scanned, but not when the output code is generated. If lines are added
+# or removed, the anchors will not be placed correctly.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
+
+INPUT_FILTER =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form: pattern=filter
+# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
+# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
+# patterns match the file name, INPUT_FILTER is applied.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
+
+FILTER_PATTERNS =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will also be used to filter the input files that are used for
+# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
+# The default value is: NO.
+
+FILTER_SOURCE_FILES = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
+# it is also possible to disable source filtering for a specific pattern using
+# *.ext= (so without naming a filter).
+# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
+
+FILTER_SOURCE_PATTERNS =
+
+# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
+# is part of the input, its contents will be placed on the main page
+# (index.html). This can be useful if you have a project on for instance GitHub
+# and want to reuse the introduction page also for the doxygen output.
+
+USE_MDFILE_AS_MAINPAGE =
+
+#---------------------------------------------------------------------------
+# Configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
+# generated. Documented entities will be cross-referenced with these sources.
+#
+# Note: To get rid of all source code in the generated output, make sure that
+# also VERBATIM_HEADERS is set to NO.
+# The default value is: NO.
+
+SOURCE_BROWSER = YES
+
+# Setting the INLINE_SOURCES tag to YES will include the body of functions,
+# classes and enums directly into the documentation.
+# The default value is: NO.
+
+INLINE_SOURCES = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
+# special comment blocks from generated source code fragments. Normal C, C++ and
+# Fortran comments will always remain visible.
+# The default value is: YES.
+
+STRIP_CODE_COMMENTS = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
+# function all documented functions referencing it will be listed.
+# The default value is: NO.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES then for each documented function
+# all documented entities called/used by that function will be listed.
+# The default value is: NO.
+
+REFERENCES_RELATION = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
+# to YES then the hyperlinks from functions in REFERENCES_RELATION and
+# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
+# link to the documentation.
+# The default value is: YES.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
+# source code will show a tooltip with additional information such as prototype,
+# brief description and links to the definition and documentation. Since this
+# will make the HTML file larger and loading of large files a bit slower, you
+# can opt to disable this feature.
+# The default value is: YES.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+SOURCE_TOOLTIPS = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code will
+# point to the HTML generated by the htags(1) tool instead of doxygen built-in
+# source browser. The htags tool is part of GNU's global source tagging system
+# (see http://www.gnu.org/software/global/global.html). You will need version
+# 4.8.6 or higher.
+#
+# To use it do the following:
+# - Install the latest version of global
+# - Enable SOURCE_BROWSER and USE_HTAGS in the config file
+# - Make sure the INPUT points to the root of the source tree
+# - Run doxygen as normal
+#
+# Doxygen will invoke htags (and that will in turn invoke gtags), so these
+# tools must be available from the command line (i.e. in the search path).
+#
+# The result: instead of the source browser generated by doxygen, the links to
+# source code will now point to the output of htags.
+# The default value is: NO.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+USE_HTAGS = NO
+
+# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
+# verbatim copy of the header file for each class for which an include is
+# specified. Set to NO to disable this.
+# See also: Section \class.
+# The default value is: YES.
+
+VERBATIM_HEADERS = YES
+
+# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the
+# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the
+# cost of reduced performance. This can be particularly helpful with template
+# rich C++ code for which doxygen's built-in parser lacks the necessary type
+# information.
+# Note: The availability of this option depends on whether or not doxygen was
+# generated with the -Duse-libclang=ON option for CMake.
+# The default value is: NO.
+
+CLANG_ASSISTED_PARSING = NO
+
+# If clang assisted parsing is enabled you can provide the compiler with command
+# line options that you would normally use when invoking the compiler. Note that
+# the include paths will already be set by doxygen for the files and directories
+# specified with INPUT and INCLUDE_PATH.
+# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES.
+
+CLANG_OPTIONS =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
+# compounds will be generated. Enable this if the project contains a lot of
+# classes, structs, unions or interfaces.
+# The default value is: YES.
+
+ALPHABETICAL_INDEX = YES
+
+# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
+# which the alphabetical index list will be split.
+# Minimum value: 1, maximum value: 20, default value: 5.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+COLS_IN_ALPHA_INDEX = 5
+
+# In case all classes in a project start with a common prefix, all classes will
+# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
+# can be used to specify a prefix (or a list of prefixes) that should be ignored
+# while generating the index headers.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+IGNORE_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output
+# The default value is: YES.
+
+GENERATE_HTML = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_OUTPUT = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
+# generated HTML page (for example: .htm, .php, .asp).
+# The default value is: .html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FILE_EXTENSION = .html
+
+# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
+# each generated HTML page. If the tag is left blank doxygen will generate a
+# standard header.
+#
+# To get valid HTML the header file that includes any scripts and style sheets
+# that doxygen needs, which is dependent on the configuration options used (e.g.
+# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
+# default header using
+# doxygen -w html new_header.html new_footer.html new_stylesheet.css
+# YourConfigFile
+# and then modify the file new_header.html. See also section "Doxygen usage"
+# for information on how to generate the default header that doxygen normally
+# uses.
+# Note: The header is subject to change so you typically have to regenerate the
+# default header when upgrading to a newer version of doxygen. For a description
+# of the possible markers and block names see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_HEADER =
+
+# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
+# generated HTML page. If the tag is left blank doxygen will generate a standard
+# footer. See HTML_HEADER for more information on how to generate a default
+# footer and what special commands can be used inside the footer. See also
+# section "Doxygen usage" for information on how to generate the default footer
+# that doxygen normally uses.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FOOTER =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
+# sheet that is used by each HTML page. It can be used to fine-tune the look of
+# the HTML output. If left blank doxygen will generate a default style sheet.
+# See also section "Doxygen usage" for information on how to generate the style
+# sheet that doxygen normally uses.
+# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
+# it is more robust and this tag (HTML_STYLESHEET) will in the future become
+# obsolete.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_STYLESHEET =
+
+# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# cascading style sheets that are included after the standard style sheets
+# created by doxygen. Using this option one can overrule certain style aspects.
+# This is preferred over using HTML_STYLESHEET since it does not replace the
+# standard style sheet and is therefore more robust against future updates.
+# Doxygen will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list). For an example see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_STYLESHEET =
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
+# files will be copied as-is; there are no commands or markers available.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_FILES =
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
+# will adjust the colors in the style sheet and background images according to
+# this color. Hue is specified as an angle on a colorwheel, see
+# http://en.wikipedia.org/wiki/Hue for more information. For instance the value
+# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
+# purple, and 360 is red again.
+# Minimum value: 0, maximum value: 359, default value: 220.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_HUE = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
+# in the HTML output. For a value of 0 the output will use grayscales only. A
+# value of 255 will produce the most vivid colors.
+# Minimum value: 0, maximum value: 255, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_SAT = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
+# luminance component of the colors in the HTML output. Values below 100
+# gradually make the output lighter, whereas values above 100 make the output
+# darker. The value divided by 100 is the actual gamma applied, so 80 represents
+# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
+# change the gamma.
+# Minimum value: 40, maximum value: 240, default value: 80.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_GAMMA = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting this
+# to YES can help to show when doxygen was last run and thus if the
+# documentation is up to date.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_TIMESTAMP = NO
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_SECTIONS = NO
+
+# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
+# shown in the various tree structured indices initially; the user can expand
+# and collapse entries dynamically later on. Doxygen will expand the tree to
+# such a level that at most the specified number of entries are visible (unless
+# a fully collapsed tree already exceeds this amount). So setting the number of
+# entries 1 will produce a full collapsed tree by default. 0 is a special value
+# representing an infinite number of entries and will result in a full expanded
+# tree by default.
+# Minimum value: 0, maximum value: 9999, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_INDEX_NUM_ENTRIES = 100
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files will be
+# generated that can be used as input for Apple's Xcode 3 integrated development
+# environment (see: http://developer.apple.com/tools/xcode/), introduced with
+# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a
+# Makefile in the HTML output directory. Running make will produce the docset in
+# that directory and running make install will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
+# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
+# for more information.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_DOCSET = NO
+
+# This tag determines the name of the docset feed. A documentation feed provides
+# an umbrella under which multiple documentation sets from a single provider
+# (such as a company or product suite) can be grouped.
+# The default value is: Doxygen generated docs.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_FEEDNAME = "Doxygen generated docs"
+
+# This tag specifies a string that should uniquely identify the documentation
+# set bundle. This should be a reverse domain-name style string, e.g.
+# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_BUNDLE_ID = org.doxygen.Project
+
+# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
+# The default value is: org.doxygen.Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_ID = org.doxygen.Publisher
+
+# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
+# The default value is: Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_NAME = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
+# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
+# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
+# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on
+# Windows.
+#
+# The HTML Help Workshop contains a compiler that can convert all HTML output
+# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
+# files are now used as the Windows 98 help format, and will replace the old
+# Windows help format (.hlp) on all Windows platforms in the future. Compressed
+# HTML files also contain an index, a table of contents, and you can search for
+# words in the documentation. The HTML workshop also contains a viewer for
+# compressed HTML files.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_HTMLHELP = NO
+
+# The CHM_FILE tag can be used to specify the file name of the resulting .chm
+# file. You can add a path in front of the file if the result should not be
+# written to the html output directory.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_FILE =
+
+# The HHC_LOCATION tag can be used to specify the location (absolute path
+# including file name) of the HTML help compiler (hhc.exe). If non-empty,
+# doxygen will try to run the HTML help compiler on the generated index.hhp.
+# The file has to be specified with full path.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+HHC_LOCATION =
+
+# The GENERATE_CHI flag controls if a separate .chi index file is generated
+# (YES) or that it should be included in the master .chm file (NO).
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+GENERATE_CHI = NO
+
+# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc)
+# and project file content.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_INDEX_ENCODING =
+
+# The BINARY_TOC flag controls whether a binary table of contents is generated
+# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it
+# enables the Previous and Next buttons.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+BINARY_TOC = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members to
+# the table of contents of the HTML help documentation and to the tree view.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+TOC_EXPAND = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
+# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
+# (.qch) of the generated HTML documentation.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_QHP = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
+# the file name of the resulting .qch file. The path specified is relative to
+# the HTML output folder.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QCH_FILE =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
+# Project output. For more information please see Qt Help Project / Namespace
+# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace).
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_NAMESPACE = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
+# Help Project output. For more information please see Qt Help Project / Virtual
+# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual-
+# folders).
+# The default value is: doc.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_VIRTUAL_FOLDER = doc
+
+# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
+# filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_NAME =
+
+# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_ATTRS =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's filter section matches. Qt Help Project / Filter Attributes (see:
+# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_SECT_FILTER_ATTRS =
+
+# The QHG_LOCATION tag can be used to specify the location of Qt's
+# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
+# generated .qhp file.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHG_LOCATION =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
+# generated, together with the HTML files, they form an Eclipse help plugin. To
+# install this plugin and make it available under the help contents menu in
+# Eclipse, the contents of the directory containing the HTML and XML files needs
+# to be copied into the plugins directory of eclipse. The name of the directory
+# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
+# After copying Eclipse needs to be restarted before the help appears.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_ECLIPSEHELP = NO
+
+# A unique identifier for the Eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have this
+# name. Each documentation set should have its own identifier.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
+
+ECLIPSE_DOC_ID = org.doxygen.Project
+
+# If you want full control over the layout of the generated HTML pages it might
+# be necessary to disable the index and replace it with your own. The
+# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
+# of each HTML page. A value of NO enables the index and the value YES disables
+# it. Since the tabs in the index contain the same information as the navigation
+# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+DISABLE_INDEX = NO
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information. If the tag
+# value is set to YES, a side panel will be generated containing a tree-like
+# index structure (just like the one that is generated for HTML Help). For this
+# to work a browser that supports JavaScript, DHTML, CSS and frames is required
+# (i.e. any modern browser). Windows users are probably better off using the
+# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can
+# further fine-tune the look of the index. As an example, the default style
+# sheet generated by doxygen has an example that shows how to put an image at
+# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
+# the same information as the tab index, you could consider setting
+# DISABLE_INDEX to YES when enabling this option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_TREEVIEW = NO
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
+# doxygen will group on one line in the generated HTML documentation.
+#
+# Note that a value of 0 will completely suppress the enum values from appearing
+# in the overview section.
+# Minimum value: 0, maximum value: 20, default value: 4.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+ENUM_VALUES_PER_LINE = 4
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
+# to set the initial width (in pixels) of the frame in which the tree is shown.
+# Minimum value: 0, maximum value: 1500, default value: 250.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+TREEVIEW_WIDTH = 250
+
+# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to
+# external symbols imported via tag files in a separate window.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+EXT_LINKS_IN_WINDOW = NO
+
+# Use this tag to change the font size of LaTeX formulas included as images in
+# the HTML documentation. When you change the font size after a successful
+# doxygen run you need to manually remove any form_*.png images from the HTML
+# output directory to force them to be regenerated.
+# Minimum value: 8, maximum value: 50, default value: 10.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_FONTSIZE = 10
+
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are not
+# supported properly for IE 6.0, but are supported on all modern browsers.
+#
+# Note that when changing this option you need to delete any form_*.png files in
+# the HTML output directory before the changes have effect.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_TRANSPARENT = YES
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
+# http://www.mathjax.org) which uses client side Javascript for the rendering
+# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
+# installed or if you want to formulas look prettier in the HTML output. When
+# enabled you may also need to install MathJax separately and configure the path
+# to it using the MATHJAX_RELPATH option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+USE_MATHJAX = NO
+
+# When MathJax is enabled you can set the default output format to be used for
+# the MathJax output. See the MathJax site (see:
+# http://docs.mathjax.org/en/latest/output.html) for more details.
+# Possible values are: HTML-CSS (which is slower, but has the best
+# compatibility), NativeMML (i.e. MathML) and SVG.
+# The default value is: HTML-CSS.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_FORMAT = HTML-CSS
+
+# When MathJax is enabled you need to specify the location relative to the HTML
+# output directory using the MATHJAX_RELPATH option. The destination directory
+# should contain the MathJax.js script. For instance, if the mathjax directory
+# is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
+# Content Delivery Network so you can quickly see the result without installing
+# MathJax. However, it is strongly recommended to install a local copy of
+# MathJax from http://www.mathjax.org before deployment.
+# The default value is: http://cdn.mathjax.org/mathjax/latest.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
+# extension names that should be enabled during MathJax rendering. For example
+# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_EXTENSIONS =
+
+# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
+# of code that will be used on startup of the MathJax code. See the MathJax site
+# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
+# example see the documentation.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_CODEFILE =
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
+# the HTML output. The underlying search engine uses javascript and DHTML and
+# should work on any modern browser. Note that when using HTML help
+# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
+# there is already a search function so this one should typically be disabled.
+# For large projects the javascript based search engine can be slow, then
+# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
+# search using the keyboard; to jump to the search box use <access key> + S
+# (what the <access key> is depends on the OS and browser, but it is typically
+# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down
+# key> to jump into the search results window, the results can be navigated
+# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel
+# the search. The filter options can be selected when the cursor is inside the
+# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>
+# to select a filter and <Enter> or <escape> to activate or cancel the filter
+# option.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+SEARCHENGINE = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a web server instead of a web client using Javascript. There
+# are two flavors of web server based searching depending on the EXTERNAL_SEARCH
+# setting. When disabled, doxygen will generate a PHP script for searching and
+# an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing
+# and searching needs to be provided by external tools. See the section
+# "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SERVER_BASED_SEARCH = NO
+
+# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP
+# script for searching. Instead the search results are written to an XML file
+# which needs to be processed by an external indexer. Doxygen will invoke an
+# external search engine pointed to by the SEARCHENGINE_URL option to obtain the
+# search results.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: http://xapian.org/).
+#
+# See the section "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH = NO
+
+# The SEARCHENGINE_URL should point to a search engine hosted by a web server
+# which will return the search results when EXTERNAL_SEARCH is enabled.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: http://xapian.org/). See the section "External Indexing and
+# Searching" for details.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHENGINE_URL =
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
+# search data is written to a file for indexing by an external tool. With the
+# SEARCHDATA_FILE tag the name of this file can be specified.
+# The default file is: searchdata.xml.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHDATA_FILE = searchdata.xml
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the
+# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
+# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
+# projects and redirect the results back to the right project.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH_ID =
+
+# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
+# projects other than the one defined by this configuration file, but that are
+# all added to the same external search index. Each project needs to have a
+# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of
+# to a relative location where the documentation can be found. The format is:
+# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTRA_SEARCH_MAPPINGS =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.
+# The default value is: YES.
+
+GENERATE_LATEX = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_OUTPUT = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked.
+#
+# Note that when enabling USE_PDFLATEX this option is only used for generating
+# bitmaps for formulas in the HTML output, but not in the Makefile that is
+# written to the output directory.
+# The default file is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_CMD_NAME = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
+# index for LaTeX.
+# The default file is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+MAKEINDEX_CMD_NAME = makeindex
+
+# If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+COMPACT_LATEX = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used by the
+# printer.
+# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x
+# 14 inches) and executive (7.25 x 10.5 inches).
+# The default value is: a4.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PAPER_TYPE = a4
+
+# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
+# that should be included in the LaTeX output. The package can be specified just
+# by its name or with the correct syntax as to be used with the LaTeX
+# \usepackage command. To get the times font for instance you can specify :
+# EXTRA_PACKAGES=times or EXTRA_PACKAGES={times}
+# To use the option intlimits with the amsmath package you can specify:
+# EXTRA_PACKAGES=[intlimits]{amsmath}
+# If left blank no extra packages will be included.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+EXTRA_PACKAGES =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
+# generated LaTeX document. The header should contain everything until the first
+# chapter. If it is left blank doxygen will generate a standard header. See
+# section "Doxygen usage" for information on how to let doxygen write the
+# default header to a separate file.
+#
+# Note: Only use a user-defined header if you know what you are doing! The
+# following commands have a special meaning inside the header: $title,
+# $datetime, $date, $doxygenversion, $projectname, $projectnumber,
+# $projectbrief, $projectlogo. Doxygen will replace $title with the empty
+# string, for the replacement values of the other commands the user is referred
+# to HTML_HEADER.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HEADER =
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
+# generated LaTeX document. The footer should contain everything after the last
+# chapter. If it is left blank doxygen will generate a standard footer. See
+# LATEX_HEADER for more information on how to generate a default footer and what
+# special commands can be used inside the footer.
+#
+# Note: Only use a user-defined footer if you know what you are doing!
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_FOOTER =
+
+# The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# LaTeX style sheets that are included after the standard style sheets created
+# by doxygen. Using this option one can overrule certain style aspects. Doxygen
+# will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list).
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_STYLESHEET =
+
+# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the LATEX_OUTPUT output
+# directory. Note that the files will be copied as-is; there are no commands or
+# markers available.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_FILES =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
+# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
+# contain links (just like the HTML output) instead of page references. This
+# makes the output suitable for online browsing using a PDF viewer.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PDF_HYPERLINKS = YES
+
+# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
+# the PDF file directly from the LaTeX files. Set this option to YES, to get a
+# higher quality PDF documentation.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+USE_PDFLATEX = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
+# command to the generated LaTeX files. This will instruct LaTeX to keep running
+# if errors occur, instead of asking the user for help. This option is also used
+# when generating formulas in HTML.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BATCHMODE = NO
+
+# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
+# index chapters (such as File Index, Compound Index, etc.) in the output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HIDE_INDICES = NO
+
+# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
+# code with syntax highlighting in the LaTeX output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_SOURCE_CODE = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. See
+# http://en.wikipedia.org/wiki/BibTeX and \cite for more info.
+# The default value is: plain.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BIB_STYLE = plain
+
+# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated
+# page will contain the date and time when the page was generated. Setting this
+# to NO can help when comparing the output of multiple runs.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_TIMESTAMP = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES, doxygen will generate RTF output. The
+# RTF output is optimized for Word 97 and may not look too pretty with other RTF
+# readers/editors.
+# The default value is: NO.
+
+GENERATE_RTF = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: rtf.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_OUTPUT = rtf
+
+# If the COMPACT_RTF tag is set to YES, doxygen generates more compact RTF
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+COMPACT_RTF = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will
+# contain hyperlink fields. The RTF file will contain links (just like the HTML
+# output) instead of page references. This makes the output suitable for online
+# browsing using Word or some other Word compatible readers that support those
+# fields.
+#
+# Note: WordPad (write) and others do not support links.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_HYPERLINKS = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's config
+# file, i.e. a series of assignments. You only have to provide replacements,
+# missing definitions are set to their default value.
+#
+# See also section "Doxygen usage" for information on how to generate the
+# default style sheet that doxygen normally uses.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_STYLESHEET_FILE =
+
+# Set optional variables used in the generation of an RTF document. Syntax is
+# similar to doxygen's config file. A template extensions file can be generated
+# using doxygen -e rtf extensionFile.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_EXTENSIONS_FILE =
+
+# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
+# with syntax highlighting in the RTF output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_SOURCE_CODE = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES, doxygen will generate man pages for
+# classes and files.
+# The default value is: NO.
+
+GENERATE_MAN = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it. A directory man3 will be created inside the directory specified by
+# MAN_OUTPUT.
+# The default directory is: man.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_OUTPUT = man
+
+# The MAN_EXTENSION tag determines the extension that is added to the generated
+# man pages. In case the manual section does not start with a number, the number
+# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is
+# optional.
+# The default value is: .3.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_EXTENSION = .3
+
+# The MAN_SUBDIR tag determines the name of the directory created within
+# MAN_OUTPUT in which the man pages are placed. If defaults to man followed by
+# MAN_EXTENSION with the initial . removed.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_SUBDIR =
+
+# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
+# will generate one additional man file for each entity documented in the real
+# man page(s). These additional files only source the real man page, but without
+# them the man command would be unable to find the correct page.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_LINKS = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES, doxygen will generate an XML file that
+# captures the structure of the code including all documentation.
+# The default value is: NO.
+
+GENERATE_XML = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: xml.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_OUTPUT = xml
+
+# If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program
+# listings (including syntax highlighting and cross-referencing information) to
+# the XML output. Note that enabling this will significantly increase the size
+# of the XML output.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_PROGRAMLISTING = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the DOCBOOK output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_DOCBOOK tag is set to YES, doxygen will generate Docbook files
+# that can be used to generate PDF.
+# The default value is: NO.
+
+GENERATE_DOCBOOK = NO
+
+# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
+# front of it.
+# The default directory is: docbook.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_OUTPUT = docbook
+
+# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the
+# program listings (including syntax highlighting and cross-referencing
+# information) to the DOCBOOK output. Note that enabling this will significantly
+# increase the size of the DOCBOOK output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_PROGRAMLISTING = NO
+
+#---------------------------------------------------------------------------
+# Configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
+# AutoGen Definitions (see http://autogen.sf.net) file that captures the
+# structure of the code including all documentation. Note that this feature is
+# still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_AUTOGEN_DEF = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES, doxygen will generate a Perl module
+# file that captures the structure of the code including all documentation.
+#
+# Note that this feature is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_PERLMOD = NO
+
+# If the PERLMOD_LATEX tag is set to YES, doxygen will generate the necessary
+# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
+# output from the Perl module output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_LATEX = NO
+
+# If the PERLMOD_PRETTY tag is set to YES, the Perl module output will be nicely
+# formatted so it can be parsed by a human reader. This is useful if you want to
+# understand what is going on. On the other hand, if this tag is set to NO, the
+# size of the Perl module output will be much smaller and Perl will parse it
+# just the same.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_PRETTY = YES
+
+# The names of the make variables in the generated doxyrules.make file are
+# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful
+# so different doxyrules.make files included by the same Makefile don't
+# overwrite each other's variables.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all
+# C-preprocessor directives found in the sources and include files.
+# The default value is: YES.
+
+ENABLE_PREPROCESSING = YES
+
+# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names
+# in the source code. If set to NO, only conditional compilation will be
+# performed. Macro expansion can be done in a controlled way by setting
+# EXPAND_ONLY_PREDEF to YES.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+MACRO_EXPANSION = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
+# the macro expansion is limited to the macros specified with the PREDEFINED and
+# EXPAND_AS_DEFINED tags.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_ONLY_PREDEF = NO
+
+# If the SEARCH_INCLUDES tag is set to YES, the include files in the
+# INCLUDE_PATH will be searched if a #include is found.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SEARCH_INCLUDES = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by the
+# preprocessor.
+# This tag requires that the tag SEARCH_INCLUDES is set to YES.
+
+INCLUDE_PATH =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will be
+# used.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+INCLUDE_FILE_PATTERNS =
+
+# The PREDEFINED tag can be used to specify one or more macro names that are
+# defined before the preprocessor is started (similar to the -D option of e.g.
+# gcc). The argument of the tag is a list of macros of the form: name or
+# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
+# is assumed. To prevent a macro definition from being undefined via #undef or
+# recursively expanded use the := operator instead of the = operator.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+PREDEFINED =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
+# tag can be used to specify a list of macro names that should be expanded. The
+# macro definition that is found in the sources will be used. Use the PREDEFINED
+# tag if you want to use a different macro definition that overrules the
+# definition found in the source code.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_AS_DEFINED =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
+# remove all references to function-like macros that are alone on a line, have
+# an all uppercase name, and do not end with a semicolon. Such function macros
+# are typically used for boiler-plate code, and will confuse the parser if not
+# removed.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SKIP_FUNCTION_MACROS = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES tag can be used to specify one or more tag files. For each tag
+# file the location of the external documentation should be added. The format of
+# a tag file without this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where loc1 and loc2 can be relative or absolute paths or URLs. See the
+# section "Linking to external documentation" for more information about the use
+# of tag files.
+# Note: Each tag file must have a unique name (where the name does NOT include
+# the path). If a tag file is not located in the directory in which doxygen is
+# run, you must also specify the path to the tagfile here.
+
+TAGFILES =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
+# tag file that is based on the input files it reads. See section "Linking to
+# external documentation" for more information about the usage of tag files.
+
+GENERATE_TAGFILE =
+
+# If the ALLEXTERNALS tag is set to YES, all external class will be listed in
+# the class index. If set to NO, only the inherited external classes will be
+# listed.
+# The default value is: NO.
+
+ALLEXTERNALS = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will be
+# listed.
+# The default value is: YES.
+
+EXTERNAL_GROUPS = YES
+
+# If the EXTERNAL_PAGES tag is set to YES, all external pages will be listed in
+# the related pages index. If set to NO, only the current project's pages will
+# be listed.
+# The default value is: YES.
+
+EXTERNAL_PAGES = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of 'which perl').
+# The default file (with absolute path) is: /usr/bin/perl.
+
+PERL_PATH = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram
+# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
+# NO turns the diagrams off. Note that this option also works with HAVE_DOT
+# disabled, but it is recommended to install and use dot, since it yields more
+# powerful graphs.
+# The default value is: YES.
+
+CLASS_DIAGRAMS = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see:
+# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH =
+
+# You can include diagrams made with dia in doxygen documentation. Doxygen will
+# then run dia to produce the diagram and insert it in the documentation. The
+# DIA_PATH tag allows you to specify the directory where the dia binary resides.
+# If left empty dia is assumed to be found in the default search path.
+
+DIA_PATH =
+
+# If set to YES the inheritance and collaboration graphs will hide inheritance
+# and usage relations if the target is undocumented or is not a class.
+# The default value is: YES.
+
+HIDE_UNDOC_RELATIONS = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz (see:
+# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
+# Bell Labs. The other options in this section have no effect if this option is
+# set to NO
+# The default value is: YES.
+
+HAVE_DOT = YES
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
+# to run in parallel. When set to 0 doxygen will base this on the number of
+# processors available in the system. You can set it explicitly to a value
+# larger than 0 to get control over the balance between CPU load and processing
+# speed.
+# Minimum value: 0, maximum value: 32, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_NUM_THREADS = 0
+
+# When you want a differently looking font in the dot files that doxygen
+# generates you can specify the font name using DOT_FONTNAME. You need to make
+# sure dot is able to find the font, which can be done by putting it in a
+# standard location or by setting the DOTFONTPATH environment variable or by
+# setting DOT_FONTPATH to the directory containing the font.
+# The default value is: Helvetica.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTNAME = Helvetica
+
+# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
+# dot graphs.
+# Minimum value: 4, maximum value: 24, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTSIZE = 10
+
+# By default doxygen will tell dot to use the default font as specified with
+# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
+# the path where dot can find it using this tag.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTPATH =
+
+# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
+# each documented class showing the direct and indirect inheritance relations.
+# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CLASS_GRAPH = YES
+
+# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
+# graph for each documented class showing the direct and indirect implementation
+# dependencies (inheritance, containment, and class references variables) of the
+# class with other documented classes.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+COLLABORATION_GRAPH = YES
+
+# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
+# groups, showing the direct groups dependencies.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GROUP_GRAPHS = YES
+
+# If the UML_LOOK tag is set to YES, doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LOOK = NO
+
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside the
+# class node. If there are many fields or methods and many nodes the graph may
+# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the
+# number of items for each type to make the size more manageable. Set this to 0
+# for no limit. Note that the threshold may be exceeded by 50% before the limit
+# is enforced. So when you set the threshold to 10, up to 15 fields may appear,
+# but if the number exceeds 15, the total amount of fields shown is limited to
+# 10.
+# Minimum value: 0, maximum value: 100, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LIMIT_NUM_FIELDS = 10
+
+# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
+# collaboration graphs will show the relations between templates and their
+# instances.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+TEMPLATE_RELATIONS = NO
+
+# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
+# YES then doxygen will generate a graph for each documented file showing the
+# direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDE_GRAPH = YES
+
+# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
+# set to YES then doxygen will generate a graph for each documented file showing
+# the direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDED_BY_GRAPH = YES
+
+# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command. Disabling a call graph can be
+# accomplished by means of the command \hidecallgraph.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALL_GRAPH = NO
+
+# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable caller graphs for selected
+# functions only using the \callergraph command. Disabling a caller graph can be
+# accomplished by means of the command \hidecallergraph.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALLER_GRAPH = NO
+
+# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
+# hierarchy of all classes instead of a textual one.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GRAPHICAL_HIERARCHY = YES
+
+# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
+# dependencies a directory has on other directories in a graphical way. The
+# dependency relations are determined by the #include relations between the
+# files in the directories.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DIRECTORY_GRAPH = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. For an explanation of the image formats see the section
+# output formats in the documentation of the dot tool (Graphviz (see:
+# http://www.graphviz.org/)).
+# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
+# to make the SVG files visible in IE 9+ (other browsers do not have this
+# requirement).
+# Possible values are: png, png:cairo, png:cairo:cairo, png:cairo:gd, png:gd,
+# png:gd:gd, jpg, jpg:cairo, jpg:cairo:gd, jpg:gd, jpg:gd:gd, gif, gif:cairo,
+# gif:cairo:gd, gif:gd, gif:gd:gd, svg, png:gd, png:gd:gd, png:cairo,
+# png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and
+# png:gdiplus:gdiplus.
+# The default value is: png.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_IMAGE_FORMAT = png
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+#
+# Note that this requires a modern browser other than Internet Explorer. Tested
+# and working are Firefox, Chrome, Safari, and Opera.
+# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make
+# the SVG files visible. Older versions of IE do not have SVG support.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INTERACTIVE_SVG = NO
+
+# The DOT_PATH tag can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_PATH =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the \dotfile
+# command).
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOTFILE_DIRS =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the \mscfile
+# command).
+
+MSCFILE_DIRS =
+
+# The DIAFILE_DIRS tag can be used to specify one or more directories that
+# contain dia files that are included in the documentation (see the \diafile
+# command).
+
+DIAFILE_DIRS =
+
+# When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the
+# path where java can find the plantuml.jar file. If left blank, it is assumed
+# PlantUML is not used or called during a preprocessing step. Doxygen will
+# generate a warning when it encounters a \startuml command in this case and
+# will not generate output for the diagram.
+
+PLANTUML_JAR_PATH =
+
+# When using plantuml, the specified paths are searched for files specified by
+# the !include statement in a plantuml block.
+
+PLANTUML_INCLUDE_PATH =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
+# that will be shown in the graph. If the number of nodes in a graph becomes
+# larger than this value, doxygen will truncate the graph, which is visualized
+# by representing a node as a red box. Note that doxygen if the number of direct
+# children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
+# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+# Minimum value: 0, maximum value: 10000, default value: 50.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_GRAPH_MAX_NODES = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
+# generated by dot. A depth value of 3 means that only nodes reachable from the
+# root by following a path via at most 3 edges will be shown. Nodes that lay
+# further from the root node will be omitted. Note that setting this option to 1
+# or 2 may greatly reduce the computation time needed for large code bases. Also
+# note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+# Minimum value: 0, maximum value: 1000, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+MAX_DOT_GRAPH_DEPTH = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not seem
+# to support this out of the box.
+#
+# Warning: Depending on the platform used, enabling this option may lead to
+# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
+# read).
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_TRANSPARENT = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10) support
+# this, this feature is disabled by default.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_MULTI_TARGETS = NO
+
+# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
+# explaining the meaning of the various boxes and arrows in the dot generated
+# graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GENERATE_LEGEND = YES
+
+# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
+# files that are used to generate the various graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_CLEANUP = YES
diff --git a/compiler/nnc/driver/Driver.cpp b/compiler/nnc/driver/Driver.cpp
new file mode 100644
index 000000000..5b369623e
--- /dev/null
+++ b/compiler/nnc/driver/Driver.cpp
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pass/PassData.h"
+
+#include "passes/transformations/DataFormatSwitcher.h"
+#include "passes/transformations/LowerConv2D.h"
+
+#include "backends/interpreter/InterpreterBackend.h"
+#include "backends/soft_backend/CPPGenerator.h"
+#include "passes/dot_dumper/DumperPass.h"
+#include "backends/acl_soft_backend/AclCppGenerator.h"
+
+#include "passes/optimizations/CombineTransposes.h"
+#include "passes/optimizations/ConstantFoldTranspose.h"
+#include "passes/optimizations/DeadCodeElimination.h"
+#include "passes/optimizations/FuseArithmeticOps.h"
+#include "passes/optimizations/SinkRelu.h"
+#include "passes/optimizations/SinkTranspose.h"
+
+#include "support/CommandLine.h"
+#include "Definitions.h"
+#include "Options.h"
+#include "Driver.h"
+
+#ifdef NNC_FRONTEND_CAFFE2_ENABLED
+#include <caffe2_importer.h>
+#endif // NNC_FRONTEND_CAFFE2_ENABLED
+#ifdef NNC_FRONTEND_CAFFE_ENABLED
+#include <caffe_importer.h>
+#endif // NNC_FRONTEND_CAFFE_ENABLED
+#ifdef NNC_FRONTEND_TFLITE_ENABLED
+#include <tflite_importer.h>
+#endif // NNC_FRONTEND_TFLITE_ENABLED
+#ifdef NNC_FRONTEND_ONNX_ENABLED
+#include <ONNXImporterImpl.h>
+#endif // NNC_FRONTEND_ONNX_ENABLED
+
+#include <stdex/Memory.h>
+
+namespace nnc
+{
+
+static std::string getFrontendOptionsString()
+{
+ std::string res;
+
+ if (!cli::caffeFrontend.isDisabled())
+ res += "'" + cli::caffeFrontend.getNames()[0] + "' ";
+
+ if (!cli::caffe2Frontend.isDisabled())
+ res += "'" + cli::caffe2Frontend.getNames()[0] + "' ";
+
+ if (!cli::onnxFrontend.isDisabled())
+ res += "'" + cli::onnxFrontend.getNames()[0] + "' ";
+
+ if (!cli::tflFrontend.isDisabled())
+ res += "'" + cli::tflFrontend.getNames()[0] + "'";
+
+ return res;
+}
+
+static std::unique_ptr<mir::Graph> importModel()
+{
+ // For bool, the value false is converted to zero and the value true is converted to one
+ if (cli::caffeFrontend + cli::caffe2Frontend + cli::tflFrontend + cli::onnxFrontend != 1)
+ throw DriverException("One and only one of the following options are allowed and have to be set"
+ "in the same time: " +
+ getFrontendOptionsString());
+
+ if (cli::caffeFrontend)
+ {
+#ifdef NNC_FRONTEND_CAFFE_ENABLED
+ return mir_caffe::loadModel(cli::inputFile.getRawValue());
+#endif // NNC_FRONTEND_CAFFE_ENABLED
+ }
+ else if (cli::caffe2Frontend)
+ {
+#ifdef NNC_FRONTEND_CAFFE2_ENABLED
+ // FIXME: caffe2 input shapes are not provided by model and must be set from cli
+ // current 'inputShapes' could provide only one shape, while model could has several inputs
+ return mir_caffe2::loadModel(cli::inputFile.getRawValue(), cli::initNet.getRawValue(),
+ {cli::inputShapes.getRawValue()});
+#endif // NNC_FRONTEND_CAFFE2_ENABLED
+ }
+ else if (cli::onnxFrontend)
+ {
+#ifdef NNC_FRONTEND_ONNX_ENABLED
+ return mir_onnx::loadModel(cli::inputFile.getRawValue());
+#endif // NNC_FRONTEND_ONNX_ENABLED
+ }
+ else if (cli::tflFrontend)
+ {
+#ifdef NNC_FRONTEND_TFLITE_ENABLED
+ return mir_tflite::loadModel(cli::inputFile.getRawValue());
+#endif // NNC_FRONTEND_TFLITE_ENABLED
+ }
+
+ assert(false);
+ return nullptr;
+}
+
+static void backend(mir::Graph *graph)
+{
+ if (cli::target == NNC_TARGET_ARM_CPP || cli::target == NNC_TARGET_X86_CPP)
+ {
+ CPPCodeGenerator(cli::artifactDir, cli::artifactName).run(graph);
+ }
+ else if (cli::target == NNC_TARGET_ARM_GPU_CPP)
+ {
+ AclCppCodeGenerator(cli::artifactDir, cli::artifactName).run(graph);
+ }
+ else if (cli::target == NNC_TARGET_INTERPRETER)
+ {
+ InterpreterBackend(cli::interInputDataDir, cli::artifactDir).run(graph);
+ }
+ else
+ {
+ assert(false && "invalid option value");
+ }
+}
+
+/**
+ * @brief run all registered passes
+ * @throw PassException, if errors occured
+ */
+void Driver::runPasses()
+{
+ auto graph = importModel();
+ PassData pass_data(graph.get());
+
+ for (const auto &pass : _passManager.getPasses())
+ {
+ pass_data = pass->run(pass_data);
+ if (cli::dumpGraph && static_cast<mir::Graph *>(pass_data))
+ {
+ DumperPass d(pass->getName());
+ d.run(pass_data);
+ }
+ }
+
+ backend(pass_data);
+
+ // NOTE. Now we destroy data of all passes when PassManager is destroyed.
+ // In future to reduce memory consumption we can destory it when passes are being performed
+
+} // runPasses
+
+/**
+ * @brief Register backend specific passes
+ * @throw DriverException if errors occurred
+ */
+void Driver::registerBackendSpecificPasses()
+{
+ std::unique_ptr<Pass> data_format_pass;
+
+ if (cli::target == NNC_TARGET_ARM_CPP || cli::target == NNC_TARGET_X86_CPP)
+ {
+ _passManager.registerPass(stdex::make_unique<LowerConv2D>());
+ _passManager.registerPass(stdex::make_unique<DataFormatSwitcher>(mir::DataFormat::NHWC));
+ }
+ else if (cli::target == NNC_TARGET_ARM_GPU_CPP)
+ {
+ _passManager.registerPass(stdex::make_unique<LowerConv2D>());
+ _passManager.registerPass(stdex::make_unique<ConstantFoldTranspose>());
+ // TODO Change to DataFormat::NCHW when fix it in ACL
+ _passManager.registerPass(stdex::make_unique<DataFormatSwitcher>(mir::DataFormat::NHWC));
+ }
+ else if (cli::target == NNC_TARGET_INTERPRETER)
+ {
+ _passManager.registerPass(stdex::make_unique<DataFormatSwitcher>(mir::DataFormat::NHWC));
+ }
+ else
+ {
+ assert(false && "invalid option value");
+ }
+}
+
+void Driver::registerOptimizationPass()
+{
+ if (cli::doOptimizationPass)
+ {
+ // TODO: maybe we should start managing the optimizations more intelligently?
+ _passManager.registerPass(std::unique_ptr<Pass>(new CombineTransposes()));
+ _passManager.registerPass(std::unique_ptr<Pass>(new SinkTranspose()));
+ _passManager.registerPass(std::unique_ptr<Pass>(new SinkRelu()));
+#if 0
+ // TODO Support broadcasting.
+ _passManager.registerPass(std::unique_ptr<Pass>(new FuseArithmeticOps()));
+#endif
+ _passManager.registerPass(std::unique_ptr<Pass>(new DeadCodeElimination()));
+ }
+} // registerOptimizationPass
+
+void Driver::runDriver()
+{
+ registerOptimizationPass();
+ registerBackendSpecificPasses();
+
+ runPasses();
+}
+
+} // namespace nnc
diff --git a/compiler/nnc/driver/Driver.h b/compiler/nnc/driver/Driver.h
new file mode 100644
index 000000000..1a61c7902
--- /dev/null
+++ b/compiler/nnc/driver/Driver.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NNCC_DRIVER_H
+#define NNCC_DRIVER_H
+
+#include <exception>
+#include <string>
+
+#include "pass/PassManager.h"
+
+namespace nnc
+{
+
+/**
+ * @brief exceptions description class for compiler driver
+ */
+class DriverException : public std::exception
+{
+public:
+ DriverException() = default;
+ explicit DriverException(std::string reason) : _msg(std::move(reason)) {}
+ explicit DriverException(const char *msg) : _msg(msg) {}
+
+ const char *what() const noexcept override { return _msg.c_str(); }
+
+private:
+ std::string _msg;
+};
+
+/**
+ * @brief Compiler Driver manages the whole pipeline compilation process
+ */
+class Driver
+{
+public:
+ /**
+ * @brief main method to run compiler driver
+ * @throw DriverException if errors occurred in driver
+ * PassException if errors occurred in passes
+ */
+ void runDriver();
+
+private:
+ void registerBackendSpecificPasses();
+ void registerOptimizationPass();
+ void runPasses();
+
+ PassManager _passManager;
+};
+
+} // namespace nnc
+
+#endif // NNCC_DRIVER_H
diff --git a/compiler/nnc/driver/Options.cpp b/compiler/nnc/driver/Options.cpp
new file mode 100644
index 000000000..e22d01847
--- /dev/null
+++ b/compiler/nnc/driver/Options.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Options.h"
+#include "Definitions.h"
+
+#include <string>
+
+namespace nnc
+{
+namespace cli
+{
+
+/**
+ * Options for *compiler driver*
+ */
+Option<bool> Help(optname("--help, -h"), overview("print usage and exit"), false, optional(true));
+Option<bool> caffeFrontend(optname("--caffe"), overview("treat input file as Caffe model"), false,
+ optional(true), optvalues(""), nullptr, separators(""),
+#ifdef NNC_FRONTEND_CAFFE_ENABLED
+ showopt(true)
+#else
+ showopt(false)
+#endif // NNC_FRONTEND_CAFFE_ENABLED
+ );
+Option<bool> onnxFrontend(optname("--onnx"), overview("treat input file as ONNX model"), false,
+ optional(true), optvalues(""), nullptr, separators(""),
+#ifdef NNC_FRONTEND_ONNX_ENABLED
+ showopt(true)
+#else
+ showopt(false)
+#endif // NNC_FRONTEND_ONNX_ENABLED
+ );
+
+Option<bool> caffe2Frontend(optname("--caffe2"),
+ overview("treat input file as Caffe2 model (predict_net.pb)"), false,
+ optional(false), optvalues(""), nullptr, separators(""),
+#ifdef NNC_FRONTEND_CAFFE2_ENABLED
+ showopt(true),
+#else
+ showopt(false),
+#endif // NNC_FRONTEND_CAFFE2_ENABLED
+ IOption::Group::caffe2);
+
+Option<std::vector<int>> inputShapes(optname("--input-shape"), overview("Shape of caffe2 input"),
+ std::vector<int>{}, optional(false), optvalues(""), nullptr,
+ separators(""),
+#ifdef NNC_FRONTEND_CAFFE2_ENABLED
+ showopt(true),
+#else
+ showopt(false),
+#endif // NNC_FRONTEND_CAFFE2_ENABLED
+ IOption::Group::caffe2);
+
+Option<std::string> initNet(optname("--init-net"),
+ overview("path to Caffe2 model weights (init_net.pb)"), std::string(),
+ optional(false), optvalues(""), checkInFile, separators(""),
+#ifdef NNC_FRONTEND_CAFFE2_ENABLED
+ showopt(true),
+#else
+ showopt(false),
+#endif // NNC_FRONTEND_CAFFE2_ENABLED
+ IOption::Group::caffe2);
+
+Option<bool> tflFrontend(optname("--tflite"),
+ overview("treat input file as Tensor Flow Lite model"), false,
+ optional(true), optvalues(""), nullptr, separators(""),
+#ifdef NNC_FRONTEND_TFLITE_ENABLED
+ showopt(true)
+#else
+ showopt(false)
+#endif // NNC_FRONTEND_TFLITE_ENABLED
+ );
+Option<std::string>
+ target(optname("--target"),
+ overview("select target language to emit for given architecture."
+ "Valid values are '" NNC_TARGET_ARM_CPP "', '" NNC_TARGET_X86_CPP
+ "', '" NNC_TARGET_ARM_GPU_CPP "', '" NNC_TARGET_INTERPRETER "'"),
+ std::string(), optional(false),
+ optvalues(NNC_TARGET_ARM_CPP "," NNC_TARGET_X86_CPP "," NNC_TARGET_ARM_GPU_CPP
+ "," NNC_TARGET_INTERPRETER),
+ nullptr, separators("="));
+
+/**
+ * Options for *frontend*
+ */
+Option<std::string> inputFile(optname("--nnmodel, -m"),
+ overview("specify input file with serialized NN models"),
+ std::string(), optional(false), optvalues(""), checkInFile);
+
+/**
+ * Options for *optimizer*
+ */
+Option<bool> doOptimizationPass(optname("-O"), overview("whether to optimize model or not"), false,
+ optional(true), optvalues(""), nullptr, separators(""),
+ showopt(true));
+
+Option<bool> dumpGraph(optname("--dump, -D"),
+ overview("dump graph to dot files after optimization passes"), false,
+ optional(true), optvalues(""), nullptr, separators(""), showopt(true));
+
+/**
+ * Options for *backend*
+ */
+// options for soft backend
+Option<std::string> artifactName(optname("--output, -o"), overview("specify name for output files"),
+ "nnmodel", optional(true), optvalues(""), checkOutFile);
+Option<std::string> artifactDir(optname("--output-dir, -d"),
+ overview("specify directory for output files"),
+ ".", // default is current directory
+ optional(true), optvalues(""), checkOutDir, separators("="));
+
+/**
+ * Options for *interpreter*
+ */
+Option<std::string> interInputDataDir(optname("--input-data-dir"),
+ overview("specify directory with binary files "
+ "containing the input data for the model "
+ "(one file for each input with the same name)"),
+ ".", // default is current directory
+ optional(true), optvalues(""), checkInDir);
+
+} // namespace cli
+} // namespace nnc
diff --git a/compiler/nnc/driver/Options.h b/compiler/nnc/driver/Options.h
new file mode 100644
index 000000000..06994a4d5
--- /dev/null
+++ b/compiler/nnc/driver/Options.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NNCC_COMMANDLINEARGUMENTS_H
+#define NNCC_COMMANDLINEARGUMENTS_H
+
+#include <string>
+#include "support/CommandLine.h"
+
+namespace nnc
+{
+namespace cli
+{
+
+/**
+ * Options for compiler driver
+ */
+extern Option<bool> caffe2Frontend; // frontend for CAFFE2 AI framework
+extern Option<std::vector<int>> inputShapes;
+extern Option<std::string> initNet;
+
+extern Option<bool> caffeFrontend; // frontend for CAFFE AI framework
+extern Option<bool> tflFrontend; // frontend for TensorFlow Lite AI framework
+extern Option<bool> onnxFrontend; // frontend for ONNX AI framework
+
+extern Option<bool> doOptimizationPass; // enable optimization pass
+extern Option<bool> dumpGraph; // enable Dumping graph to .dot files
+
+// valid values for target option
+#define NNC_TARGET_ARM_CPP "arm-c++"
+#define NNC_TARGET_X86_CPP "x86-c++"
+#define NNC_TARGET_ARM_GPU_CPP "arm-gpu-c++"
+#define NNC_TARGET_INTERPRETER "interpreter"
+extern Option<std::string> target; // kind of target for which compiler generates code
+
+/**
+ * Frontend options
+ */
+extern Option<std::string> inputFile; // files contains model of specific AI framework
+
+/**
+ * Options for backend
+ */
+extern Option<std::string> artifactDir; // output directory for artifact
+extern Option<std::string> artifactName; // name of artifact
+
+/**
+ * Options for interpreter
+ */
+extern Option<std::string> interInputDataDir; // directory with input data files
+
+} // namespace cli
+} // namespace nnc
+
+#endif // NNCC_COMMANDLINEARGUMENTS_H
diff --git a/compiler/nnc/driver/main.cpp b/compiler/nnc/driver/main.cpp
new file mode 100644
index 000000000..10d5edc0e
--- /dev/null
+++ b/compiler/nnc/driver/main.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <vector>
+
+#include "support/CommandLine.h"
+#include "pass/PassException.h"
+#include "Driver.h"
+
+using namespace nnc;
+
+/*
+ * Prints the explanatory string of an exception. If the exception is nested, recurses to print
+ * the explanatory string of the exception it holds.
+ */
+static void printException(const std::exception &e, int indent = 0)
+{
+ std::cerr << std::string(indent, ' ') << e.what() << std::endl;
+ try
+ {
+ std::rethrow_if_nested(e);
+ }
+ catch (const std::exception &e)
+ {
+ printException(e, indent + 2);
+ }
+}
+
+int main(int argc, const char *argv[])
+{
+ int exit_code = EXIT_FAILURE;
+
+ try
+ {
+ // Parse command line
+ cli::CommandLine::getParser()->parseCommandLine(argc, argv);
+
+ //
+ // run compiler pipeline:
+ //
+ // for_each(all_passes):
+ // run pass
+ //
+ Driver driver;
+ driver.runDriver();
+
+ // errors didn't happen
+ exit_code = EXIT_SUCCESS;
+ }
+ catch (const DriverException &e)
+ {
+ printException(e);
+ std::cerr << "use --help for more information" << std::endl;
+ }
+ catch (const PassException &e)
+ {
+ printException(e);
+ }
+
+ return exit_code;
+}
diff --git a/compiler/nnc/include/Definitions.h.in b/compiler/nnc/include/Definitions.h.in
new file mode 100644
index 000000000..070cdd201
--- /dev/null
+++ b/compiler/nnc/include/Definitions.h.in
@@ -0,0 +1,44 @@
+#ifndef NNCC_DEFINITIONS_H
+#define NNCC_DEFINITIONS_H
+
+/**
+ * This file contains external definitions that defined by cmake build system
+ * All definitions of this file must start from `NNC` prefix e.g. `NNC_ROOT_PATH`
+ */
+
+/**
+ * @breif absolute path to installation directory of *nnc* project
+ */
+#define NNC_ROOT_PATH "@NNC_INSTALL_PATH@"
+
+/**
+ * @breif absolute path to directory contains libraries
+ */
+#define NNC_LIB_PATH "@NNC_INSTALL_LIB_PATH@"
+
+/**
+ * @brief defines if hdf5 package was found
+ */
+#cmakedefine NNC_HDF5_SUPPORTED
+
+/**
+ * @brief define that TFLite frontend is enabled
+ */
+#cmakedefine NNC_FRONTEND_TFLITE_ENABLED
+
+/**
+ * @brief define that CAFFE frontend is enabled
+ */
+#cmakedefine NNC_FRONTEND_CAFFE_ENABLED
+
+/**
+ * @brief define that CAFFE2 frontend is enabled
+ */
+#cmakedefine NNC_FRONTEND_CAFFE2_ENABLED
+
+/**
+ * @brief define that ONNX frontend is enabled
+ */
+#cmakedefine NNC_FRONTEND_ONNX_ENABLED
+
+#endif //NNCC_DEFINITIONS_H
diff --git a/compiler/nnc/include/backends/acl_soft_backend/AclCppException.h b/compiler/nnc/include/backends/acl_soft_backend/AclCppException.h
new file mode 100644
index 000000000..f5ef2260d
--- /dev/null
+++ b/compiler/nnc/include/backends/acl_soft_backend/AclCppException.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_ACLCPPEXCEPTION_H_
+#define _NNC_ACLCPPEXCEPTION_H_
+
+#include <stdexcept>
+
+namespace nnc
+{
+
+/**
+ * @brief objects of this class are to be thrown from ACL C++ soft backend if errors are occurred.
+ */
+class AclCppException : public std::runtime_error
+{
+public:
+ explicit AclCppException(const std::string &msg) : runtime_error(_prefix + msg) {}
+
+private:
+ static constexpr const char *_prefix = "ACL C++ soft backend error: ";
+};
+
+} // namespace nnc
+
+#endif //_NNC_ACLCPPEXCEPTION_H_
diff --git a/compiler/nnc/include/backends/acl_soft_backend/AclCppGenerator.h b/compiler/nnc/include/backends/acl_soft_backend/AclCppGenerator.h
new file mode 100644
index 000000000..f8e51ab74
--- /dev/null
+++ b/compiler/nnc/include/backends/acl_soft_backend/AclCppGenerator.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_ACL_CPP_GENERATOR_H_
+#define _NNC_ACL_CPP_GENERATOR_H_
+
+#include "mir/Graph.h"
+
+#include <string>
+
+namespace nnc
+{
+
+/**
+ * @brief Class for the ACL C++ code generator.
+ * It is the entry point to the ACL C++ soft backend.
+ */
+class AclCppCodeGenerator final
+{
+public:
+ AclCppCodeGenerator(std::string output_dir, std::string artifact_name);
+
+ /**
+ * @brief Method represents the generation sequence: analysis, serialization,
+ * header/code generation, etc
+ * @param graph MIR graph
+ */
+ void run(mir::Graph *graph);
+
+private:
+ std::string _output_dir;
+ std::string _artifact_name;
+};
+
+} // namespace nnc
+
+#endif //_NNC_ACL_CPP_GENERATOR_H_
diff --git a/compiler/nnc/include/backends/interpreter/InterpreterBackend.h b/compiler/nnc/include/backends/interpreter/InterpreterBackend.h
new file mode 100644
index 000000000..caa0b34e0
--- /dev/null
+++ b/compiler/nnc/include/backends/interpreter/InterpreterBackend.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NNCC_INTERPRETERPASS_H
+#define NNCC_INTERPRETERPASS_H
+
+#include "mir/Graph.h"
+
+#include <string>
+
+namespace nnc
+{
+
+class InterpreterBackend final
+{
+public:
+ InterpreterBackend(std::string input_dir, std::string output_dir);
+
+ void run(mir::Graph *data);
+
+private:
+ std::string _input_dir;
+ std::string _output_dir;
+};
+
+} // namespace nnc
+
+#endif // NNCC_INTERPRETERPASS_H
diff --git a/compiler/nnc/include/backends/soft_backend/CPPGenerator.h b/compiler/nnc/include/backends/soft_backend/CPPGenerator.h
new file mode 100644
index 000000000..d21168aef
--- /dev/null
+++ b/compiler/nnc/include/backends/soft_backend/CPPGenerator.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_SOFT_BACKEND_CPP_GENERATOR_H_
+#define _NNC_SOFT_BACKEND_CPP_GENERATOR_H_
+
+#include "mir/Graph.h"
+
+#include <ostream>
+#include <string>
+#include <vector>
+
+namespace nnc
+{
+
+class ModelAnalyzer;
+class Serializer;
+
+namespace sir
+{
+struct TensorDescriptor;
+struct Action;
+struct CallFunction;
+struct TransposeTensor;
+struct CreateTmp;
+struct DestroyTmp;
+} // namespace sir
+
+/**
+ * @brief CPPCodeGenerator implements interfaces that provides BaseCodeGenerator for C++ language
+ * This includes header file generation, code file generation and variable renaming according to C++
+ * naming requirements
+ */
+class CPPCodeGenerator final
+{
+public:
+ CPPCodeGenerator(std::string output_dir, std::string artifact_name);
+
+ /**
+ * @brief Method represents base generation sequence: analysis, serialization, header/code
+ * generation, etc
+ * @param graph MIR graph
+ */
+ void run(mir::Graph *graph);
+
+private:
+ /**
+ * @brief This function processes tensor names
+ * to transform them into valid identificators of target language
+ * @param ma Intermediate artifact information
+ */
+ void formatTensorNames(const ModelAnalyzer &ma);
+ /**
+ * @brief Derivative classes should override this function to generate header of artifact
+ * @param out Stream to write header text
+ * @param ma Intermediate artifact information
+ */
+ void materializeHeader(std::ostream &out, const ModelAnalyzer &ma);
+
+ /**
+ * @brief Form list of function call arguments
+ * @param ma Intermediate model representation
+ * @param argIds List of argument variable ids
+ * @param args Result list of arguments transformed in form of strings
+ */
+ void gatherOperationArguments(const ModelAnalyzer &ma, const std::vector<std::size_t> &arg_ids,
+ std::vector<std::string> &args);
+ /**
+ * @brief Prints setter of artifact
+ * @param out Output stream
+ * @param className Name of artifact
+ * @param setterName Name of setter function
+ * @param varId id of variable that setter fills
+ */
+ void printSetter(std::ostream &out, const std::string &class_name, const std::string &setter_name,
+ const sir::TensorDescriptor &td);
+ /**
+ * @brief Prints getters of artifact
+ * @param out Output stream
+ * @param className Name of artifact
+ * @param setterName Name of setter function
+ * @param varId id of variable that getter returns
+ */
+ void printGetter(std::ostream &out, const std::string &class_name, const std::string &getter_name,
+ const sir::TensorDescriptor &td);
+ /**
+ * @brief Generate code for function call action
+ * @param out Output stream to print
+ * @param ma Intermediate model representation
+ * @param call Action to generate code from
+ */
+ void materializeCall(std::ostream &out, const ModelAnalyzer &ma, const sir::CallFunction *call);
+ /**
+ * @brief Generate code for transpose action
+ * @param out Output stream to print
+ * @param ma Intermediate model representation
+ * @param action Action to generate code from
+ */
+ void materializeTranspose(std::ostream &out, const ModelAnalyzer &ma,
+ const sir::TransposeTensor *transpose);
+ /**
+ * @brief Generate code for constructor action
+ * @param out Output stream to print
+ * @param ma Intermediate model representation
+ * @param action Action to generate code from
+ */
+ void materializeConstructor(std::ostream &out, const ModelAnalyzer &ma,
+ const sir::CreateTmp *constructor);
+ /**
+ * @brief Generate code for destructor action
+ * @param out Output stream to print
+ * @param ma Intermediate model representation
+ * @param action Action to generate code from
+ */
+ void materializeDestructor(std::ostream &out, const ModelAnalyzer &ma,
+ const sir::DestroyTmp *destructor);
+ /**
+ * @brief Prints inference sequence placed in doInference method of artifact
+ * @param out Output stream
+ * @param ma Intermediate model representation
+ */
+ void materializeInferenceSequence(std::ostream &out, const ModelAnalyzer &ma);
+ /**
+ * @brief Derivative classes should override this function to generate implementation of artifact
+ * @param out Stream to write header text
+ * @param ma Intermediate artifact information
+ * @param s Serializer holds parameters of network and various meta-information: serializer
+ * version, hashes, etc
+ */
+ void materializeCode(std::ostream &out, const ModelAnalyzer &ma, const Serializer &s);
+ /**
+ * @brief Writes serialized parameters to out stream
+ * @param out Stream to write serialized parameters
+ * @param s Serializer holds parameters of network
+ *
+ * Contents of generated file:
+ * + header(magic number to identify file type, protocol version, hashes of network and params)
+ * + array of serialized network parameters
+ */
+ void materializeModelParams(std::ostream &out, const Serializer &s);
+
+ std::string _output_dir;
+ std::string _artifact_name;
+ std::vector<std::string> _formattedTensors;
+};
+
+} // namespace nnc
+
+#endif //_NNC_SOFT_BACKEND_CPP_GENERATOR_H_
diff --git a/compiler/nnc/include/pass/Pass.h b/compiler/nnc/include/pass/Pass.h
new file mode 100644
index 000000000..d07d688f1
--- /dev/null
+++ b/compiler/nnc/include/pass/Pass.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NNCC_PASS_H
+#define NNCC_PASS_H
+
+#include <string>
+
+#include "pass/PassData.h"
+
+namespace nnc
+{
+
+/**
+ * @brief this class represent an interface for all compiler passes like that frontend, backend etc
+ */
+class Pass
+{
+public:
+ /**
+ * @brief run compiler pass
+ * @param data - data that pass is taken
+ * @return data that can be passed to the next pass
+ * @throw PassException object if errors occured
+ */
+ virtual PassData run(PassData data) = 0;
+
+ /**
+ * @brief clean compiler pass data
+ */
+ virtual void cleanup(){};
+
+ virtual ~Pass() = default;
+
+ virtual std::string getName() { return "pass"; }
+};
+
+} // namespace nnc
+
+#endif // NNCC_PASS_H
diff --git a/compiler/nnc/include/pass/PassData.h b/compiler/nnc/include/pass/PassData.h
new file mode 100644
index 000000000..e2c0b8129
--- /dev/null
+++ b/compiler/nnc/include/pass/PassData.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NNCC_PASSDATA_H
+#define NNCC_PASSDATA_H
+
+#include "mir/Graph.h"
+#include "mir/TensorVariant.h"
+
+namespace nnc
+{
+
+/**
+ * @brief class that encapsulate value returned and taken by pass
+ */
+class PassData
+{
+public:
+ /* implicit */ PassData(std::nullptr_t data)
+ : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
+ _dataContainer{.unknown = data},
+ _dataType(PDT::UNKNOWN)
+ {
+ }
+
+ /**
+ * @brief Implicit conversion from Graph* to PassData
+ */
+ /* implicit */ PassData(mir::Graph *graph)
+ : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
+ _dataContainer{.graph = graph},
+ _dataType(PDT::GRAPH)
+ {
+ }
+
+ /**
+ * @brief Implicit conversion from PassData to Graph*
+ */
+ /* implicit */ operator mir::Graph *() const
+ { // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
+ if (_dataType != PDT::GRAPH)
+ return nullptr;
+ return _dataContainer.graph;
+ }
+
+ /**
+ * @brief Implicit conversion from Graph* to PassData
+ */
+ /* implicit */ PassData(mir::TensorVariant *tv)
+ : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
+ _dataContainer{.tensorVariant = tv},
+ _dataType(PDT::TENSOR_VARIANT)
+ {
+ }
+
+ /**
+ * @brief Implicit conversion from PassData to Graph*
+ */
+ /* implicit */ operator mir::TensorVariant *() const
+ { // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
+ if (_dataType != PDT::TENSOR_VARIANT)
+ return nullptr;
+ return _dataContainer.tensorVariant;
+ }
+
+private:
+ // types that PassData can contain
+ enum class PDT : char
+ {
+ GRAPH,
+ TENSOR_VARIANT,
+ UNKNOWN
+
+ } _dataType;
+
+ // union contains all pointers to objects that can be returned from passes
+ union {
+ mir::Graph *graph;
+ mir::TensorVariant *tensorVariant;
+ void *unknown;
+
+ } _dataContainer;
+};
+
+} // namespace nnc
+
+#endif // NNCC_PASSDATA_H
diff --git a/compiler/nnc/include/pass/PassException.h b/compiler/nnc/include/pass/PassException.h
new file mode 100644
index 000000000..d590fc761
--- /dev/null
+++ b/compiler/nnc/include/pass/PassException.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NNCC_PASSEXCEPTION_H
+#define NNCC_PASSEXCEPTION_H
+
+#include <exception>
+#include <string>
+
+namespace nnc
+{
+
+/**
+ * @brief objects of this class are to be thrown from Passes if errors are occurred
+ */
+class PassException : public std::exception
+{
+public:
+ PassException() = default;
+ explicit PassException(std::string msg) : _msg(std::move(msg)) {}
+ explicit PassException(const char *msg) : _msg(msg) {}
+
+ const char *what() const noexcept override { return _msg.c_str(); }
+
+private:
+ std::string _msg;
+};
+
+} // namespace nnc
+
+#endif // NNCC_PASSEXCEPTION_H
diff --git a/compiler/nnc/include/pass/PassManager.h b/compiler/nnc/include/pass/PassManager.h
new file mode 100644
index 000000000..3ce1c3199
--- /dev/null
+++ b/compiler/nnc/include/pass/PassManager.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PASS_MANAGER_H__
+#define __PASS_MANAGER_H__
+
+#include <queue>
+#include <memory>
+
+namespace nnc
+{
+
+// forward declaration
+class Pass;
+
+/**
+ * @brief pass manager class. This class manages running of passes
+ */
+class PassManager
+{
+public:
+ PassManager();
+ ~PassManager();
+
+ /**
+ * @brief register pass in pass manager
+ * @param pass - registered pass
+ */
+ void registerPass(std::unique_ptr<Pass> pass);
+
+ /**
+ * @brief get all registered passes in order in which they were registered
+ */
+ using Passes = std::vector<std::unique_ptr<Pass>>;
+ const Passes &getPasses() const { return _passes; }
+
+private:
+ // data
+ Passes _passes; // registered passes
+};
+
+} // namespace nnc
+
+#endif // __PASS_MANAGER_H__
diff --git a/compiler/nnc/include/passes/dot_dumper/DumperPass.h b/compiler/nnc/include/passes/dot_dumper/DumperPass.h
new file mode 100644
index 000000000..318037079
--- /dev/null
+++ b/compiler/nnc/include/passes/dot_dumper/DumperPass.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NNCC_DUMPERPASS_H
+#define NNCC_DUMPERPASS_H
+
+#include "pass/Pass.h"
+
+namespace nnc
+{
+
+/**
+ * @brief Dumps the graph to a dot file named %number%.dot
+ * where %number% is how many times the graph was dumped.
+ */
+class DumperPass : public Pass
+{
+public:
+ explicit DumperPass(std::string s) : _file_name(std::move(s)) {}
+
+ PassData run(PassData data) override;
+
+private:
+ std::string _file_name;
+ static int _counter;
+};
+
+} // namespace nnc
+#endif // NNCC_DUMPERPASS_H
diff --git a/compiler/nnc/include/passes/optimizations/CombineTransposes.h b/compiler/nnc/include/passes/optimizations/CombineTransposes.h
new file mode 100644
index 000000000..7d227cd5d
--- /dev/null
+++ b/compiler/nnc/include/passes/optimizations/CombineTransposes.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NNCC_COMBINE_TRANSPOSES_H
+#define NNCC_COMBINE_TRANSPOSES_H
+
+#include "pass/Pass.h"
+#include "pass/PassData.h"
+
+namespace nnc
+{
+
+/**
+ * @brief This pass combines sequential transposes and removes identity transposes if
+ * the combination results in an identity permutation.
+ */
+class CombineTransposes : public Pass
+{
+public:
+ PassData run(PassData data) override;
+
+ std::string getName() override { return "opt_combine_transposes"; };
+private:
+};
+
+} // namespace nnc
+
+#endif // NNCC_COMBINE_TRANSPOSES_H
diff --git a/compiler/nnc/include/passes/optimizations/ConstantFoldTranspose.h b/compiler/nnc/include/passes/optimizations/ConstantFoldTranspose.h
new file mode 100644
index 000000000..96e2070f4
--- /dev/null
+++ b/compiler/nnc/include/passes/optimizations/ConstantFoldTranspose.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NNCC_CONSTANT_FOLD_TRANSPOSE_H
+#define NNCC_CONSTANT_FOLD_TRANSPOSE_H
+
+#include "pass/Pass.h"
+
+namespace nnc
+{
+
+class ConstantFoldTranspose : public Pass
+{
+public:
+ PassData run(PassData data) override;
+
+ std::string getName() override
+ {
+ static const std::string name("opt_constant_fold_transpose");
+ return name;
+ };
+};
+
+} // namespace nnc
+
+#endif // NNCC_CONSTANT_FOLD_TRANSPOSE_H
diff --git a/compiler/nnc/include/passes/optimizations/DeadCodeElimination.h b/compiler/nnc/include/passes/optimizations/DeadCodeElimination.h
new file mode 100644
index 000000000..600c3b5ab
--- /dev/null
+++ b/compiler/nnc/include/passes/optimizations/DeadCodeElimination.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NNCC_DEADCODEELIMINATION_H
+#define NNCC_DEADCODEELIMINATION_H
+
+#include "pass/Pass.h"
+#include "pass/PassData.h"
+
+namespace nnc
+{
+
+/**
+ * @brief This pass removes operations without uses.
+ * Importers currently only generate `sConstantOp`s without uses.
+ */
+class DeadCodeElimination : public Pass
+{
+public:
+ PassData run(PassData data) override;
+
+ std::string getName() override { return "RemoveDeadEnds"; };
+};
+
+} // namespace nnc
+
+#endif // NNCC_DEADCODEELIMINATION_H
diff --git a/compiler/nnc/include/passes/optimizations/FuseArithmeticOps.h b/compiler/nnc/include/passes/optimizations/FuseArithmeticOps.h
new file mode 100644
index 000000000..ae39e4c29
--- /dev/null
+++ b/compiler/nnc/include/passes/optimizations/FuseArithmeticOps.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NNCC_FUSE_ARITHMETIC_OPS_H
+#define NNCC_FUSE_ARITHMETIC_OPS_H
+
+#include "pass/Pass.h"
+#include "pass/PassData.h"
+
+namespace nnc
+{
+
+/**
+ * @brief Main purpose of this pass - is to fuse 'Conv->BatchNorm' into 'Conv'
+ * Currently 'BatchNorm' split by NNC frontends into 'Scale->Scale->BiasAdd'
+ * This optimization performs in two steps (repeated while graph changing):
+ * 1. Fuse two successive operations with constant weights into one (ex: 'Scale->Scale' becomes
+ * 'Scale')
+ * 2. Sink 'BiasAdd' through 'Scale' (so 'Conv->BiasAdd->Scale' becomes 'Conv->Scale->BiasAdd')
+ */
+class FuseArithmeticOps : public Pass
+{
+public:
+ PassData run(PassData data) override;
+
+ std::string getName() override { return "FuseArithmeticOps"; }
+};
+
+} // namespace nnc
+
+#endif // NNCC_FUSE_ARITHMETIC_OPS_H
diff --git a/compiler/nnc/include/passes/optimizations/OptimizationUtils.h b/compiler/nnc/include/passes/optimizations/OptimizationUtils.h
new file mode 100644
index 000000000..9a9212c12
--- /dev/null
+++ b/compiler/nnc/include/passes/optimizations/OptimizationUtils.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NNCC_OPTIMIZATION_UTILS_H
+#define NNCC_OPTIMIZATION_UTILS_H
+
+#include "mir/Operation.h"
+#include "mir/Graph.h"
+
+namespace nnc
+{
+namespace opt_util
+{
+/**
+* @brief Swap adjacent nodes in Graph. Creates new nodes and replaces the old ones with new.
+* @param g MIR Graph
+* @param top Node
+* @param bottom Node
+*/
+void swapAdjacent(mir::Graph *g, mir::Operation *top, mir::Operation *bottom);
+
+// TODO: this function and it's usages should be removed, after DCE optimization will be implemented
+void removeNodeIfUnused(mir::Graph *g, mir::Operation *op);
+} // namespace opt_util
+} // namespace nnc
+
+#endif // NNCC_OPTIMIZATION_UTILS_H
diff --git a/compiler/nnc/include/passes/optimizations/SinkRelu.h b/compiler/nnc/include/passes/optimizations/SinkRelu.h
new file mode 100644
index 000000000..edb181432
--- /dev/null
+++ b/compiler/nnc/include/passes/optimizations/SinkRelu.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NNCC_SINKRELU_H
+#define NNCC_SINKRELU_H
+
+#include "pass/Pass.h"
+#include "pass/PassData.h"
+
+namespace nnc
+{
+
+/**
+ * @brief This pass sinks relu below MaxPooling and Concat nodes.
+ */
+class SinkRelu : public Pass
+{
+public:
+ PassData run(PassData data) override;
+
+ std::string getName() override { return "SinkRelu"; };
+};
+
+} // namespace nnc
+
+#endif // NNCC_SINKRELU_H
diff --git a/compiler/nnc/include/passes/optimizations/SinkTranspose.h b/compiler/nnc/include/passes/optimizations/SinkTranspose.h
new file mode 100644
index 000000000..04e5c9926
--- /dev/null
+++ b/compiler/nnc/include/passes/optimizations/SinkTranspose.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NNCC_SINKTRANSPOSE_H
+#define NNCC_SINKTRANSPOSE_H
+
+#include "pass/Pass.h"
+#include "pass/PassData.h"
+
+namespace nnc
+{
+
+/**
+ * @brief This pass sinks transposes below Relu and Concat nodes (in that order).
+ * `concat(relu(tr(x)), relu(tr(y))) -> tr(concat'(relu(x), relu(y)))`
+ */
+class SinkTranspose : public Pass
+{
+public:
+ PassData run(PassData data) override;
+
+ std::string getName() override { return "SinkTranspose"; };
+};
+
+} // namespace nnc
+
+#endif // NNCC_SINKTRANSPOSE_H
diff --git a/compiler/nnc/include/passes/transformations/DataFormatSwitcher.h b/compiler/nnc/include/passes/transformations/DataFormatSwitcher.h
new file mode 100644
index 000000000..7c60a76a9
--- /dev/null
+++ b/compiler/nnc/include/passes/transformations/DataFormatSwitcher.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DATA_FORMAT_SWITCHER_PASS_H
+#define DATA_FORMAT_SWITCHER_PASS_H
+
+#include "mir/Graph.h"
+#include "mir/DataFormat.h"
+#include "mir/Visitor.h"
+
+#include "pass/Pass.h"
+
+namespace nnc
+{
+
+class DataFormatSwitcher : public Pass
+{
+public:
+ explicit DataFormatSwitcher(mir::DataFormat target_format);
+
+ PassData run(PassData data) override;
+
+ void cleanup() override;
+
+ ~DataFormatSwitcher() override;
+
+ std::string getName() override { return "DataFormatSwitcher"; }
+
+private:
+ // operations with DataFormat dependency
+ void switchAvgPool2D(mir::ops::AvgPool2DOp *op);
+ void switchConv2D(mir::ops::Conv2DOp *op);
+ void switchDeConv2D(mir::ops::DeConv2DOp *op);
+ void switchDepthwiseConv2D(mir::ops::DepthwiseConv2DOp *op);
+ void switchMaxPool2D(mir::ops::MaxPool2DOp *op);
+
+ // helper functions
+ mir::Operation::Output *insertTransposeBefore(mir::Operation::Output *out);
+ mir::Operation::Output *insertTransposeAfter(mir::Operation::Output *out);
+
+private:
+ mir::Graph *_graph;
+ mir::DataFormat _target_format;
+ std::vector<mir::Operation *> _candidates_for_switch;
+};
+
+} // namespace nnc
+
+#endif // DATA_FORMAT_SWITCHER_PASS_H
diff --git a/compiler/nnc/include/passes/transformations/LowerConv2D.h b/compiler/nnc/include/passes/transformations/LowerConv2D.h
new file mode 100644
index 000000000..1177f4b7c
--- /dev/null
+++ b/compiler/nnc/include/passes/transformations/LowerConv2D.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NNC_LOWER_CONV2D_H
+#define NNC_LOWER_CONV2D_H
+
+#include "pass/Pass.h"
+
+namespace nnc
+{
+
+// Transforms grouped Conv2D into DepthwiseConv2D when possible. This pass is supposed to be used
+// with backends which do not support grouped Conv2D operation.
+class LowerConv2D : public Pass
+{
+public:
+ LowerConv2D();
+
+ PassData run(PassData data) override;
+
+ void cleanup() override;
+
+ std::string getName() override { return "LowerConv2D"; }
+};
+
+} // namespace nnc
+
+#endif // NNC_LOWER_CONV2D_H
diff --git a/compiler/nnc/include/support/CommandLine.h b/compiler/nnc/include/support/CommandLine.h
new file mode 100644
index 000000000..40777ff46
--- /dev/null
+++ b/compiler/nnc/include/support/CommandLine.h
@@ -0,0 +1,556 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NNCC_COMMANDLINE_H
+#define NNCC_COMMANDLINE_H
+
+#include <string>
+#include <vector>
+#include <map>
+#include <set>
+#include <type_traits>
+#include <cassert>
+#include <limits>
+#include <iostream>
+
+namespace nnc
+{
+namespace cli
+{
+
+/**
+ * @brief simple exception class for invalid options
+ */
+class BadOption : public std::logic_error
+{
+public:
+ explicit BadOption(const std::string &msg, std::string optname = "", std::string value = "")
+ : std::logic_error(msg), _option_name(std::move(optname)), _option_value(std::move(value))
+ {
+ }
+
+ /**
+ * @brief get name for invalid option
+ */
+ const std::string &getName() const { return _option_name; }
+
+ /**
+ * @brief get value for invalid option
+ */
+ const std::string &getValue() const { return _option_value; }
+
+private:
+ std::string _option_name;
+ std::string _option_value;
+};
+
+/**
+ * @brief a class models option type
+ */
+template <typename T, bool isClass> class OptionType
+{
+public:
+ OptionType() = default;
+};
+
+// for class type
+template <typename T> class OptionType<T, true> : public T
+{
+public:
+ /**
+ * @brief set value for option
+ * @tparam Tval - type of value what we want to assign to value
+ * @param val - option value
+ */
+ template <typename Tval> void setRawValue(const Tval &val) { this->T::operator=(val); }
+
+ /**
+ * @brief get option value
+ * @return value of option
+ */
+ const T &getRawValue() const { return *this; }
+
+ T getRawValue() { return *this; }
+};
+
+// for scalar type
+template <typename T> class OptionType<T, false>
+{
+public:
+ /**
+ * @brief convert Option to scalar option type
+ */
+ /*implicit*/ operator T() const
+ {
+ return _value;
+ } // NOLINT(google-explicit-constructor,hicpp-explicit-conversions)
+
+ /**
+ * @brief set value for option
+ * @tparam Tval - type of value what we want to assign to value
+ * @param val - option value
+ */
+ template <typename Tval> void setRawValue(const Tval &val) { _value = val; }
+
+ /**
+ * @brief get option value
+ * @return value of option
+ */
+ const T &getRawValue() const { return _value; }
+
+ T getRawValue() { return _value; }
+
+protected:
+ // methods for Option
+ bool convToBool(const std::string &val);
+ char convToChar(const std::string &val);
+ template <typename Tnum> Tnum convToNum(const std::string &val);
+
+ // data
+ T _value; // option value
+};
+
+/**
+ * @brief interface for Option class
+ */
+class IOption
+{
+public:
+ /**
+ * @brief set option value
+ * @param val - value of option in string format
+ * @todo add support for vector
+ */
+ virtual void setValue(const std::string &val) = 0;
+
+ /**
+ * @brief get all names of option
+ */
+ virtual const std::vector<std::string> &getNames() const = 0;
+
+ /**
+ * @brief get description of option
+ */
+ virtual const std::string &getOverview() const = 0;
+
+ /**
+ * @brief may option be optional?
+ */
+ virtual bool isOptional() const = 0;
+
+ /**
+ * @brief get valid values for given option
+ */
+ virtual const std::vector<std::string> &getValidVals() const = 0;
+
+ /**
+ * @brief get separators for option
+ */
+ virtual const std::vector<char> &getSeparators() const = 0;
+
+ /**
+ * @brief function for option verification
+ * @throw this function throws exception of BadOption
+ * type if verification is not passed
+ */
+ virtual void runCheckerFunc() = 0;
+
+ /**
+ * @brief is option disabled?
+ */
+ virtual bool isDisabled() const = 0;
+
+ /**
+ * @brief can option have several values?
+ */
+ virtual bool canHaveSeveralVals() const = 0;
+
+ /**
+ * @result true if option is in group
+ */
+ virtual bool isGrouped() const = 0;
+
+ // groups for option. Each option can be put in one of these groups
+ enum class Group
+ {
+ none = 0,
+ caffe2 = 1,
+ onnx = 2 // 'onnx' is currently unused
+ };
+
+ /**
+ * @return group in which option is put
+ */
+ virtual IOption::Group getGroup() const = 0;
+
+ /**
+ * @brief name of option group
+ */
+ virtual std::string getGroupName() const = 0;
+
+protected:
+ // this array contains name of option groups. It must be synchronized with Group enum
+ constexpr static const char *const _groupNames[] = {nullptr, "caffe2", "onnx"};
+};
+
+/**
+ * @brief this class describes command line option
+ * @tparam T - type of option
+ */
+template <typename T>
+class Option final : public OptionType<T, std::is_class<T>::value>, public IOption
+{
+public:
+ /**
+ * @brief function type for option verification
+ */
+ using option_checker_t = void (*)(const Option<T> &);
+
+ /**
+ * @brief construct an option
+ * @tparam T - type of an option
+ * @param optnames - names of option
+ * @param descr - overview of option
+ * @param default_val - option value accepted by default
+ * @param is_optional - is option optional?
+ * @param vals - valid values for option. Other values are interpreted as invalid
+ * @param checker - function verifies option
+ * @param seps - symbols that separates name option from value (by default is spaces)
+ * @param enabled - if this option is set to false then it won't be shown for users
+ * @param group - all options can be splitted into groups so this param sets group for option
+ */
+ explicit Option(const std::vector<std::string> &optnames, const std::string &descr,
+ const T &default_val = T(), bool is_optional = false,
+ const std::vector<std::string> &vals = std::vector<std::string>(),
+ option_checker_t checker = nullptr,
+ const std::vector<char> &seps = std::vector<char>(), bool enabled = true,
+ IOption::Group group = IOption::Group::none);
+
+ // options must not be copyable and assignment
+ Option(const Option &) = delete;
+
+ Option &operator=(const Option &) = delete;
+
+ /**
+ * @brief overload assignment operator for type
+ */
+ template <typename Tval> T &operator=(const Tval &val)
+ { // NOLINT(cppcoreguidelines-c-copy-assignment-signature, misc-unconventional-assign-operator)
+ setRawValue(val);
+ return this->getRawValue(); // If not using `this` it won't work
+ }
+
+ // overridden methods
+ void setValue(const std::string &val) override;
+
+ const std::vector<std::string> &getNames() const override { return _names; }
+
+ const std::string &getOverview() const override { return _descr; }
+
+ bool isOptional() const override { return _is_optional; }
+
+ const std::vector<std::string> &getValidVals() const override { return _valid_vals; }
+
+ void runCheckerFunc() override
+ {
+ if (_checker)
+ {
+ _checker(*this);
+ }
+ }
+
+ const std::vector<char> &getSeparators() const override { return _seps; }
+
+ bool isDisabled() const override { return !_is_enabled; }
+
+ bool canHaveSeveralVals() const override { return _can_have_several_vals; }
+
+ bool isGrouped() const override { return _group != IOption::Group::none; }
+
+ IOption::Group getGroup() const override { return _group; }
+
+ std::string getGroupName() const override { return _groupNames[static_cast<size_t>(_group)]; }
+ // end overridden methods
+
+private:
+ // data
+ std::vector<std::string> _names; // names of the option
+ std::string _descr; // overview of option
+ bool _is_optional;
+ std::vector<std::string> _valid_vals; // option can be initialized only by these values
+ option_checker_t _checker; // function verifies option and its value
+ std::vector<char> _seps; // these symbols separate option name and its value
+ bool _is_enabled;
+ bool _can_have_several_vals; // can option take several values?
+ IOption::Group _group; // group for option
+};
+
+/**
+ * @brief this class describes a common command line interface
+ */
+class CommandLine
+{ // NOLINT(cppcoreguidelines-special-member-functions, hicpp-special-member-functions)
+public:
+ // prevent copy or assignment
+ CommandLine(const CommandLine &) = delete;
+
+ CommandLine &operator=(const CommandLine &) = delete;
+
+ /**
+ * @brief singleton method
+ */
+ static CommandLine *getParser();
+
+ /**
+ * @brief parse command line option
+ * @param argc - number of command line arguments
+ * @param argv - command line arguments
+ * @param check_nonoptional - if true then check that all non optional declared options are
+ * presented
+ */
+ void parseCommandLine(int argc, const char **argv, bool check_nonoptional = true);
+
+ /**
+ * @brief register option for parser
+ * @param opt - option
+ */
+ void registerOption(IOption *opt);
+
+private:
+ /**
+ * @brief print usage and exit
+ * @param msg - additional user message
+ * @param exit_code - the program is terminated with this code
+ */
+ [[noreturn]] void usage(const std::string &msg = "", int exit_code = EXIT_FAILURE);
+
+ /**
+ * @brief check that all non optional registered options are passed from command line
+ * @param cmd_args - arguments from command line
+ */
+ void checkRegisteredOptions(const std::set<std::string> &cmd_args);
+
+ /**
+ * @brief call verification function, if present, for option
+ * @param cmd_args - arguments from command line
+ */
+ void checkOptions(const std::set<std::string> &cmd_args);
+
+ /**
+ * @brief find option with `optname` and set `pos` to option value
+ * @param optname - name of option
+ * @return pointer to option
+ * @throw BadOption throw exception if option not found
+ */
+ IOption *findOption(const char *optname);
+
+ /**
+ * @brief figure out option value
+ * @param opt - option for which value is looked for
+ * @param argv - array of command line arguments
+ * @param cur_argv - current position in argv (i.e. cur_argv point to option name)
+ * @return position in argv where option value begins or empty string if option doesn't have value
+ * @throw BadOption throw exception if value for option is incorrect
+ */
+ const char *findOptionValue(const IOption *opt, const char **argv, int cur_argv);
+
+ /**
+ * @brief figure out value for option with multiple values
+ * @param opt - option for which value is looked for
+ * @param opt_name - option name which taken from command line
+ * @param argv - array of command line arguments
+ * @param val_argv - position in argv for current option value
+ * @return position in argv where option value begins or nullptr if option doesn't have value
+ * anymore
+ * @throw BadOption throw exception if value for option is incorrect
+ */
+ const char *findValueForMultOption(const IOption *opt, const std::string &opt_name,
+ const char **argv, int cur_argv);
+
+ // allow object constructor only for methods
+ CommandLine() = default;
+
+ // data
+ std::map<std::string, IOption *> _options_name; // map of name -> option
+ std::vector<IOption *> _options; // options
+ std::map<IOption::Group, std::vector<IOption *>>
+ _grouped_options; // map of groups: group -> vector of options
+ std::string _prog_name; // name of program
+ int _args_num = 0; // number of command line arguments
+};
+
+// the following functions are helpers for users that declare new options
+/**
+ * @brief convert option names for Option constructor
+ * @param names - name of option, if option has several names then
+ * `names` must be represented by a string separated by a comma
+ */
+std::vector<std::string> optname(const char *names);
+
+/** @brief convert option overview for Option constructor */
+inline std::string overview(const char *descr)
+{
+ std::string overview(descr);
+ assert(!overview.empty());
+
+ return overview;
+}
+
+/** @brief convert option overview for Option constructor */
+inline bool optional(bool is_optional) { return is_optional; }
+
+/**
+ * @brief register valid values for option
+ * @param vals - valid values of option, if option has several that values then
+ * `vals` must be represented by a string separated by a comma
+ */
+std::vector<std::string> optvalues(const char *vals);
+
+/**
+ * @brief separators that separate option name and its value
+ * @param seps - chars of separators separated by a comma
+ */
+std::vector<char> separators(const char *seps);
+
+/**
+ * @param is_shown - if set to false, then option won't be shown in help message
+ */
+inline bool showopt(bool is_shown) { return is_shown; }
+// end of helper functions
+
+//
+// Implementation of template functions
+//
+template <typename T> bool OptionType<T, false>::convToBool(const std::string &val)
+{
+ if (val.empty() || val == "TRUE" || val == "True" || val == "true" || val == "1")
+ {
+ return true;
+ }
+
+ if (val == "FALSE" || val == "False" || val == "false" || val == "0")
+ {
+ return false;
+ }
+
+ throw BadOption("", val);
+
+} // convToBool
+
+template <typename T> char OptionType<T, false>::convToChar(const std::string &val)
+{
+ if (val.length() == 1)
+ {
+ return val[0];
+ }
+ else
+ {
+ throw BadOption("", val);
+ }
+
+} // convToChar
+
+template <typename T>
+template <typename Tnum>
+Tnum OptionType<T, false>::convToNum(const std::string &val)
+{
+ Tnum num_val;
+
+ assert((std::is_same<Tnum, uint64_t>::value || std::is_same<Tnum, int64_t>::value));
+ assert((std::numeric_limits<T>::max() < std::numeric_limits<Tnum>::max()));
+ assert(std::numeric_limits<T>::min() >= std::numeric_limits<Tnum>::min());
+
+ try
+ {
+ num_val = std::is_same<Tnum, uint64_t>::value ? stoull(val) : stoll(val);
+ }
+ catch (...)
+ {
+ throw BadOption("", val);
+ }
+
+ if (num_val > std::numeric_limits<T>::max() || num_val < std::numeric_limits<T>::min())
+ {
+ throw BadOption("", val);
+ }
+
+ return num_val;
+
+} // convToNum
+
+template <typename T>
+Option<T>::Option(const std::vector<std::string> &optnames, const std::string &descr,
+ const T &default_val, bool is_optional, const std::vector<std::string> &vals,
+ option_checker_t checker, const std::vector<char> &seps, bool enabled,
+ IOption::Group group)
+{
+ // save all names
+ for (const auto &n : optnames)
+ {
+ _names.push_back(n);
+
+ assert(n[0] == '-' && "option name must start with `-`");
+ }
+
+ _descr = descr;
+ _is_optional = is_optional;
+ _valid_vals = vals;
+ _seps = seps;
+
+ this->setRawValue(default_val);
+
+#ifndef NDEBUG
+ // check that separators are valid symbols
+ for (const auto &s : _seps)
+ {
+ assert((s == '=' || s == ':') && "invalid option separators");
+ }
+#endif // NDEBUG
+
+ // save checker
+ _checker = checker;
+
+ _is_enabled = enabled;
+ assert((_is_enabled || _is_optional || group != IOption::Group::none) &&
+ "disabled non-group option can't be required");
+
+ _group = group;
+
+ _can_have_several_vals =
+ std::is_same<T, std::vector<std::string>>::value || std::is_same<T, std::vector<int>>::value;
+ assert(!(_can_have_several_vals && !_seps.empty()) &&
+ "option with several values can't have separators");
+
+ // register new option for parser
+ CommandLine::getParser()->registerOption(this);
+
+} // Option
+
+//
+// prototypes of option checker functions
+//
+void checkInFile(const Option<std::string> &in_file);
+
+void checkOutFile(const Option<std::string> &out_file);
+
+void checkInDir(const Option<std::string> &dir);
+
+void checkOutDir(const Option<std::string> &dir);
+
+} // namespace cli
+} // namespace nnc
+
+#endif // NNCC_COMMANDLINE_H
diff --git a/compiler/nnc/pass/CMakeLists.txt b/compiler/nnc/pass/CMakeLists.txt
new file mode 100644
index 000000000..16a6948f7
--- /dev/null
+++ b/compiler/nnc/pass/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(PASS_MANAGER_SRC PassManager.cpp)
+
+nnc_add_library(nnc_pass STATIC ${PASS_MANAGER_SRC})
+target_link_libraries(nnc_pass PUBLIC mir)
+set_target_properties(nnc_pass PROPERTIES LINKER_LANGUAGE CXX)
diff --git a/compiler/nnc/pass/PassManager.cpp b/compiler/nnc/pass/PassManager.cpp
new file mode 100644
index 000000000..0b2c06532
--- /dev/null
+++ b/compiler/nnc/pass/PassManager.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pass/PassManager.h"
+#include "pass/Pass.h"
+
+namespace nnc
+{
+
+PassManager::PassManager() = default;
+
+PassManager::~PassManager()
+{
+ for (auto &pass : _passes)
+ pass->cleanup();
+}
+
+void PassManager::registerPass(std::unique_ptr<Pass> pass)
+{
+ _passes.push_back(std::move(pass));
+} // registerPass
+
+} // namespace nnc
diff --git a/compiler/nnc/passes/CMakeLists.txt b/compiler/nnc/passes/CMakeLists.txt
new file mode 100644
index 000000000..ba158b58d
--- /dev/null
+++ b/compiler/nnc/passes/CMakeLists.txt
@@ -0,0 +1,12 @@
+#
+# COMMON UTILS
+#
+set(DEF_CONV ${NNC_ROOT_SRC_DIR}/utils/def2src.cpp)
+add_executable(def2src ${DEF_CONV})
+add_subdirectory(dot_dumper)
+
+#
+# MIDDLE PASSES
+#
+add_subdirectory(optimizations)
+add_subdirectory(transformations)
diff --git a/compiler/nnc/passes/dot_dumper/CMakeLists.txt b/compiler/nnc/passes/dot_dumper/CMakeLists.txt
new file mode 100644
index 000000000..55117ee4d
--- /dev/null
+++ b/compiler/nnc/passes/dot_dumper/CMakeLists.txt
@@ -0,0 +1,6 @@
+file(GLOB_RECURSE DUMPER_SRC ./*.cpp ./*.h)
+nnc_add_library(nnc_dumper SHARED ${DUMPER_SRC})
+target_link_libraries(nnc_dumper PRIVATE mir)
+
+# install dumper library
+nnc_install_library(nnc_dumper)
diff --git a/compiler/nnc/passes/dot_dumper/DumperPass.cpp b/compiler/nnc/passes/dot_dumper/DumperPass.cpp
new file mode 100644
index 000000000..5a1f56e36
--- /dev/null
+++ b/compiler/nnc/passes/dot_dumper/DumperPass.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "passes/dot_dumper/DumperPass.h"
+#include "mir/Graph.h"
+#include "mir/IrDotDumper.h"
+
+#include <fstream>
+
+namespace nnc
+{
+
+using namespace mir;
+int DumperPass::_counter = 0;
+
+PassData DumperPass::run(PassData data)
+{
+ auto graph = static_cast<Graph *>(data);
+ assert(graph && "graph object is expected");
+ std::ofstream stream(std::to_string(_counter++) + "_" + _file_name + ".dot");
+ dumpGraph(graph, stream);
+ return graph;
+}
+
+} // namespace nnc
diff --git a/compiler/nnc/passes/optimizations/CMakeLists.txt b/compiler/nnc/passes/optimizations/CMakeLists.txt
new file mode 100644
index 000000000..006c92888
--- /dev/null
+++ b/compiler/nnc/passes/optimizations/CMakeLists.txt
@@ -0,0 +1,13 @@
+set(OPTIMIZATIONS_SRC
+ CombineTransposes.cpp
+ ConstantFoldTranspose.cpp
+ FuseArithmeticOps.cpp
+ DeadCodeElimination.cpp
+ SinkRelu.cpp
+ SinkTranspose.cpp
+ OptimizationUtils.cpp)
+nnc_add_library(nnc_optimizations SHARED ${OPTIMIZATIONS_SRC})
+target_link_libraries(nnc_optimizations PRIVATE mir)
+
+# install optimizations library
+nnc_install_library(nnc_optimizations)
diff --git a/compiler/nnc/passes/optimizations/CombineTransposes.cpp b/compiler/nnc/passes/optimizations/CombineTransposes.cpp
new file mode 100644
index 000000000..e381a9cae
--- /dev/null
+++ b/compiler/nnc/passes/optimizations/CombineTransposes.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "passes/optimizations/CombineTransposes.h"
+#include "mir/ops/TransposeOp.h"
+#include "mir/Graph.h"
+#include "mir/GraphPatternMatcher.h"
+#include <algorithm>
+
+namespace nnc
+{
+
+using namespace mir;
+
+std::vector<size_t> combineAxisOrders(const std::vector<std::size_t> &order1,
+ const std::vector<std::size_t> &order2)
+{
+ assert(order1.size() == order2.size());
+ std::vector<size_t> res(order1.size());
+ for (size_t i = 0; i < order1.size(); i++)
+ {
+ res[order2[order1[i]]] = i;
+ }
+ return res;
+}
+
+static bool isIdentityTranspose(const std::vector<size_t> &axis_order)
+{
+ for (size_t i = 0; i < (axis_order.size()); i++)
+ {
+ if (axis_order[i] != i)
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+nnc::PassData nnc::CombineTransposes::run(nnc::PassData data)
+{
+ auto g = static_cast<Graph *>(data);
+ assert(g);
+ GraphPatternMatcher matcher(g);
+ auto is_tr = [](const Operation *op1) { return op1->getType() == Operation::Type::transpose; };
+ std::vector<std::pair<Operation *, Operation *>> matches = matcher.matchEdge(is_tr, is_tr);
+ std::unordered_set<Operation *> deleted_nodes;
+ while (!matches.empty())
+ {
+ for (std::pair<Operation *, Operation *> match : matches)
+ {
+ if (deleted_nodes.find(match.first) != deleted_nodes.end())
+ {
+ break;
+ };
+ auto *top_transpose = dynamic_cast<mir::ops::TransposeOp *>(match.first);
+ if (deleted_nodes.find(match.second) != deleted_nodes.end())
+ {
+ break;
+ };
+ auto *bottom_transpose = dynamic_cast<mir::ops::TransposeOp *>(match.second);
+ auto combined_axis_order =
+ combineAxisOrders(top_transpose->getAxisOrder(), bottom_transpose->getAxisOrder());
+
+ if (!isIdentityTranspose(combined_axis_order))
+ {
+ auto new_tr_op =
+ g->create<mir::ops::TransposeOp>(top_transpose->getInput(0), combined_axis_order);
+
+ g->replaceNode(bottom_transpose, new_tr_op);
+ }
+ else
+ {
+ // Connect top input to all outputs of bottom
+ Operation *top = top_transpose->getInput(0)->getNode();
+ g->replaceNode(bottom_transpose, top);
+ }
+ deleted_nodes.emplace(bottom_transpose);
+ if (top_transpose->getOutput(0)->getUses().empty())
+ {
+ g->removeNode(top_transpose);
+ deleted_nodes.emplace(top_transpose);
+ }
+ }
+ matches = matcher.matchEdge(is_tr, is_tr);
+ };
+ return g;
+}
+
+} // namespace nnc
diff --git a/compiler/nnc/passes/optimizations/ConstantFoldTranspose.cpp b/compiler/nnc/passes/optimizations/ConstantFoldTranspose.cpp
new file mode 100644
index 000000000..47a3147a5
--- /dev/null
+++ b/compiler/nnc/passes/optimizations/ConstantFoldTranspose.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "passes/optimizations/ConstantFoldTranspose.h"
+#include "passes/optimizations/OptimizationUtils.h"
+#include "mir/GraphPatternMatcher.h"
+#include "mir/ShapeRange.h"
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/TransposeOp.h"
+
+#include <cstring>
+
+using namespace nnc;
+using namespace mir;
+
+// Copy & paste from interpreter backend.
+// TODO Extract this to a common place and use in both interpreter and optimizations.
+static void transpose(const TensorVariant &input, TensorVariant &res,
+ const std::vector<std::size_t> &axis_order)
+{
+ const auto &input_shape = input.getShape();
+ const int num_axes = static_cast<int>(axis_order.size());
+ assert(num_axes == input_shape.rank());
+
+ ShapeRange in_range(input_shape);
+ Index out_index(input_shape.rank());
+
+ const size_t elem_size = input.getElementSize();
+
+ for (const auto &in_index : in_range)
+ {
+ for (int i = 0; i < num_axes; ++i)
+ out_index.at(i) = in_index.at(axis_order[i]);
+
+ std::memcpy(res.at(out_index), input.at(in_index), elem_size);
+ }
+}
+
+PassData ConstantFoldTranspose::run(PassData data)
+{
+ auto graph = static_cast<Graph *>(data);
+
+ GraphPatternMatcher matcher(graph);
+ auto is_constant = [](const Operation *op) { return op->getType() == Operation::Type::constant; };
+ auto is_transpose = [](const Operation *op) {
+ return op->getType() == Operation::Type::transpose;
+ };
+
+ auto matches = matcher.matchEdge(is_constant, is_transpose);
+ while (!matches.empty())
+ {
+ for (const auto match : matches)
+ {
+ auto constant_op = dynamic_cast<ops::ConstantOp *>(match.first);
+ auto transpose_op = dynamic_cast<ops::TransposeOp *>(match.second);
+
+ const auto elem_type = constant_op->getValue().getElementType();
+ const auto &out_shape = transpose_op->getOutputShape(0);
+ TensorType res_type(elem_type, out_shape);
+ if (constant_op->getOutput(0)->getType().isQuantized())
+ res_type.setQuantization(constant_op->getOutput(0)->getType().getQuantization());
+
+ TensorVariant res(res_type);
+ transpose(constant_op->getValue(), res, transpose_op->getAxisOrder());
+
+ auto new_op = graph->create<ops::ConstantOp>(res);
+
+ graph->replaceNode(transpose_op, new_op);
+ opt_util::removeNodeIfUnused(graph, constant_op);
+ }
+ matches = matcher.matchEdge(is_constant, is_transpose);
+ }
+ return graph;
+}
diff --git a/compiler/nnc/passes/optimizations/DeadCodeElimination.cpp b/compiler/nnc/passes/optimizations/DeadCodeElimination.cpp
new file mode 100644
index 000000000..b89dca1b7
--- /dev/null
+++ b/compiler/nnc/passes/optimizations/DeadCodeElimination.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "passes/optimizations/DeadCodeElimination.h"
+#include "mir/Graph.h"
+
+#include <algorithm>
+
+using namespace mir;
+
+nnc::PassData nnc::DeadCodeElimination::run(PassData data)
+{
+ auto graph = static_cast<Graph *>(data);
+ assert(graph);
+
+ std::vector<Operation *> sorted_nodes = getSortedNodes(graph);
+
+ auto remove_if_unused = [graph](Operation *op) {
+ if (op->getType() == Operation::Type::input || op->getType() == Operation::Type::output)
+ return;
+
+ bool has_no_uses =
+ std::all_of(op->getOutputs().cbegin(), op->getOutputs().cend(),
+ [](const Operation::Output &output) { return output.getUses().empty(); });
+
+ if (has_no_uses)
+ {
+ graph->removeNode(op);
+ }
+ };
+
+ std::for_each(sorted_nodes.rbegin(), sorted_nodes.rend(), remove_if_unused);
+
+ return graph;
+}
diff --git a/compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp b/compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp
new file mode 100644
index 000000000..91686ef74
--- /dev/null
+++ b/compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "passes/optimizations/FuseArithmeticOps.h"
+#include "passes/optimizations/OptimizationUtils.h"
+#include "mir/ops/AddOp.h"
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/Conv2DOp.h"
+#include "mir/ops/MulOp.h"
+#include "mir/Graph.h"
+#include "mir/Tensor.h"
+#include "mir/Index.h"
+#include "mir/TensorVariant.h"
+#include "mir/ShapeRange.h"
+
+#include <algorithm>
+
+namespace nnc
+{
+
+namespace
+{
+
+using namespace mir;
+using namespace std;
+using namespace opt_util;
+
+using OpType = Operation::Type;
+using Edge = pair<Operation *, Operation *>;
+
+/**
+ * This function used to get 'ConstantOp' with weights of 'AddOp', 'MulOp' or 'Conv2DOp'
+ * For each of these ops weights stored in second input node
+ */
+ops::ConstantOp *getSecondInputAsConst(Operation *op)
+{
+ assert(op->getType() == OpType::add || op->getType() == OpType::mul ||
+ op->getType() == OpType::conv2D);
+ return dynamic_cast<ops::ConstantOp *>(op->getInput(1)->getNode());
+}
+
+// This function finds successive operations of given types, with ConstantOp as second input
+vector<Edge> findSuccessiveOpsWithConstWeights(Graph *g, OpType first_op_type,
+ OpType second_op_type)
+{
+ vector<Edge> matches;
+ unordered_set<Operation *> matched_nodes;
+ for (auto *first_op : g->getNodes())
+ {
+ if (first_op->getType() == first_op_type && getSecondInputAsConst(first_op))
+ {
+ for (auto &out : first_op->getOutputs())
+ {
+ for (Operation::Use use : out.getUses())
+ {
+ Operation *second_op = use.getNode();
+ if (second_op->getType() == second_op_type && getSecondInputAsConst(second_op))
+ {
+ /**
+ * Don't match already matched nodes, so for op1->op2->op3 this function
+ * will return {{f1, f2}} and not {{f1, f2}, {f2, f3}}
+ */
+ if (matched_nodes.find(first_op) == matched_nodes.end() &&
+ matched_nodes.find(second_op) == matched_nodes.end())
+ {
+ matched_nodes.emplace(first_op);
+ matched_nodes.emplace(second_op);
+ matches.emplace_back(first_op, second_op);
+ }
+ }
+ }
+ }
+ }
+ }
+ return matches;
+}
+
+/**
+ * This function merges two ConstantOp into new one, by elementwise multiplication or addition
+ * If first ConstantOp rank > 1, second one broadcasting to first by axis=0
+ */
+Operation *mergeConstantOps(Graph *g, const ops::ConstantOp *const1_op,
+ const ops::ConstantOp *const2_op, OpType merge_type)
+{
+ const auto &const1_val = const1_op->getValue();
+ const auto &const2_val = const2_op->getValue();
+ assert(const1_val.getShape().rank() >= const2_val.getShape().rank());
+ assert(const2_val.getShape().rank() == 1);
+ assert(const1_val.getShape().dim(0) == const2_val.getShape().dim(0));
+
+ // Create and fill TensorVariant for new ConstantOp
+ TensorVariant new_const_val(DataType::FLOAT32, const1_val.getShape());
+ Tensor<float> const1_accessor(const1_val);
+ Tensor<float> const2_accessor(const2_val);
+ Tensor<float> new_const_accessor(new_const_val);
+ ShapeRange const1_range(const1_val.getShape());
+ for (auto &idx : const1_range)
+ {
+ float operand1 = const1_accessor.at(idx);
+ /**
+ * Broadcast second ConstantOp to first one:
+ * idx of second constant always has rank 1 and equals to first dimension of first constant idx
+ */
+ float operand2 = const2_accessor.at(Index{idx.at(0)});
+ switch (merge_type)
+ {
+ case OpType::mul:
+ new_const_accessor.at(idx) = operand1 * operand2;
+ break;
+ case OpType::add:
+ new_const_accessor.at(idx) = operand1 + operand2;
+ break;
+ default:
+ assert(false && "only 'mul' and 'add' constants merge types supported");
+ }
+ }
+
+ return g->create<ops::ConstantOp>(new_const_val);
+}
+
+// TODO: support 'DepthwiseConv'->'Mul'
+/**
+ * This function fuses some successive operations with constant weights into one:
+ * 'Add'->'Add' into 'Add'; 'Mul'->'Mul' into 'Mul'; 'Conv'->'Mul' into 'Conv';
+ * Before: | After:
+ * -------------------------|---------------------------
+ * [input] [Const1] | [input] [Const1*Const2]
+ * \\ // | \\ //
+ * [Mul] [Const2] | [Mul]
+ * \\ // |
+ * [Mul] |
+ * -------------------------|---------------------------
+ * [input] [Const1] | [input] [Const1+Const2]
+ * \\ // | \\ //
+ * [Add] [Const2] | [Add]
+ * \\ // |
+ * [Add] |
+ * -------------------------|---------------------------
+ * [input] [Const1] | [input] [Const1*Const2]
+ * \\ // | \\ //
+ * [Conv2D] [Const2] | [Conv2D]
+ * \\ // |
+ * [Mul] |
+ */
+bool fuseSuccessiveOps(Graph *g)
+{
+ // Find all successive ops
+ vector<Edge> successive_ops;
+ auto mul_mul_vec = findSuccessiveOpsWithConstWeights(g, OpType::mul, OpType::mul);
+ successive_ops.insert(successive_ops.end(), mul_mul_vec.begin(), mul_mul_vec.end());
+ auto add_add_vec = findSuccessiveOpsWithConstWeights(g, OpType::add, OpType::add);
+ successive_ops.insert(successive_ops.end(), add_add_vec.begin(), add_add_vec.end());
+ auto conv_mul_vec = findSuccessiveOpsWithConstWeights(g, OpType::conv2D, OpType::mul);
+ successive_ops.insert(successive_ops.end(), conv_mul_vec.begin(), conv_mul_vec.end());
+
+ for (auto &edge : successive_ops)
+ {
+ auto const1_op = getSecondInputAsConst(edge.first);
+ auto const2_op = getSecondInputAsConst(edge.second);
+ assert(const1_op && const2_op);
+
+ // Create new constant operation and copy first successive operation
+ auto new_const_op = mergeConstantOps(g, const1_op, const2_op, edge.second->getType());
+ auto first_op_input = edge.first->getInput(0);
+ auto new_op = g->copyOpWithInputs(edge.first, {first_op_input, new_const_op->getOutput(0)});
+
+ // Replace second successive operation with new one and remove old nodes
+ g->replaceNode(edge.second, new_op);
+ removeNodeIfUnused(g, edge.first);
+ removeNodeIfUnused(g, const1_op);
+ removeNodeIfUnused(g, const2_op);
+ }
+
+ // If there is no successive operations to fuse - graph wasn't changed
+ return !successive_ops.empty();
+}
+
+/**
+ * This function sinks 'Add' through 'Mul'
+ * by multiplying 'Add' weights on 'Mul' weights
+ * Before: | After:
+ *--------------------------|--------------------------
+ * [input] [Const1] | [input] [Const2]
+ * \\ // | \\ //
+ * [Add] [Const2] | [Mul] [Const1*Const2]
+ * \\ // | \\ //
+ * [Mul] | [Add]
+ * |
+ */
+bool sinkAddThroughMul(Graph *g)
+{
+ auto add_mul_edges = findSuccessiveOpsWithConstWeights(g, OpType::add, OpType::mul);
+
+ for (auto &edge : add_mul_edges)
+ {
+ auto old_add_op = edge.first;
+ auto old_mul_op = edge.second;
+ auto old_add_const_op = getSecondInputAsConst(old_add_op);
+ auto ols_mul_const_op = getSecondInputAsConst(old_mul_op);
+ assert(old_add_const_op && ols_mul_const_op);
+
+ // Create new operations
+ auto old_add_input = old_add_op->getInput(0);
+ auto new_mul_op =
+ g->copyOpWithInputs(old_mul_op, {old_add_input, ols_mul_const_op->getOutput(0)});
+ auto new_add_const_op = mergeConstantOps(g, old_add_const_op, ols_mul_const_op, OpType::mul);
+ auto new_add_op =
+ g->copyOpWithInputs(old_add_op, {new_mul_op->getOutput(0), new_add_const_op->getOutput(0)});
+
+ // Replace old mul with new add and remove old nodes
+ g->replaceNode(old_mul_op, new_add_op);
+ removeNodeIfUnused(g, old_add_op);
+ removeNodeIfUnused(g, old_add_const_op);
+ }
+
+ // If there is no add-mul edges - graph wasn't changed
+ return !add_mul_edges.empty();
+}
+
+} // unnamed namespace
+
+nnc::PassData nnc::FuseArithmeticOps::run(nnc::PassData data)
+{
+ auto g = static_cast<Graph *>(data);
+
+ bool graph_changed = true;
+ while (graph_changed)
+ {
+ graph_changed = false;
+ graph_changed |= fuseSuccessiveOps(g);
+ graph_changed |= sinkAddThroughMul(g);
+ }
+
+ return g;
+}
+
+} // namespace nnc
diff --git a/compiler/nnc/passes/optimizations/OptimizationUtils.cpp b/compiler/nnc/passes/optimizations/OptimizationUtils.cpp
new file mode 100644
index 000000000..27e52cdaf
--- /dev/null
+++ b/compiler/nnc/passes/optimizations/OptimizationUtils.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "passes/optimizations/OptimizationUtils.h"
+
+namespace nnc
+{
+namespace opt_util
+{
+
+void swapAdjacent(mir::Graph *g, mir::Operation *top, mir::Operation *bottom)
+{
+ assert(top->getNumInputs() == bottom->getNumInputs() && top->getNumInputs() == 1 &&
+ top->getNumInputs() == top->getNumOutputs() &&
+ top->getNumInputs() == bottom->getNumOutputs() && "incompatible ops");
+ const auto &ins = top->getInputs();
+ std::vector<mir::Operation::Output *> prods;
+ prods.reserve(top->getNumInputs());
+ for (mir::Operation::Output *in : ins)
+ {
+ prods.emplace_back(in);
+ }
+ mir::Operation *new_bottom = g->copyOpWithInputs(bottom, prods);
+ prods.clear();
+ prods.reserve(new_bottom->getNumOutputs());
+ for (mir::Operation::Output &out : new_bottom->getOutputs())
+ {
+ prods.emplace_back(&out);
+ }
+ mir::Operation *new_top = g->copyOpWithInputs(top, prods);
+ g->replaceNode(bottom, new_top);
+ g->replaceNode(top, new_bottom);
+}
+
+// TODO: this function and it's usages should be removed, after DCE optimization will be implemented
+void removeNodeIfUnused(mir::Graph *g, mir::Operation *op)
+{
+ if (op->getOutput(0)->getUses().empty())
+ g->removeNode(op);
+}
+
+} // namespace opt_util
+} // namespace nnc
diff --git a/compiler/nnc/passes/optimizations/SinkRelu.cpp b/compiler/nnc/passes/optimizations/SinkRelu.cpp
new file mode 100644
index 000000000..1307c6254
--- /dev/null
+++ b/compiler/nnc/passes/optimizations/SinkRelu.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "passes/optimizations/SinkRelu.h"
+#include "passes/optimizations/OptimizationUtils.h"
+#include "mir/ops/TransposeOp.h"
+#include "mir/ops/ConcatOp.h"
+#include "mir/ops/ReluOp.h"
+#include "mir/Graph.h"
+#include "mir/GraphPatternMatcher.h"
+
+#include <algorithm>
+
+namespace nnc
+{
+
+using namespace mir;
+using namespace opt_util;
+
+PassData SinkRelu::run(PassData data)
+{
+ auto g = static_cast<Graph *>(data);
+ assert(g);
+ GraphPatternMatcher matcher(g);
+ auto is_relu = [](const Operation *op) { return op->getType() == Operation::Type::ReLU; };
+ auto is_concat = [](const Operation *op) { return op->getType() == Operation::Type::concat; };
+ auto is_max_pool = [](const Operation *op) {
+ return op->getType() == Operation::Type::maxPool2D;
+ };
+ std::vector<std::pair<Operation *, Operation *>> matches;
+
+ // sink ReLU through MaxPool
+ matches = matcher.matchEdge(is_relu, is_max_pool);
+ for (auto pair : matches)
+ {
+ swapAdjacent(g, pair.first, pair.second);
+ }
+ // sink ReLU through Concat
+ auto matches_v = matcher.matchUpBush(is_relu, is_concat);
+ for (const auto &pair : matches_v)
+ {
+ auto relus = pair.first;
+ auto *concat = dynamic_cast<ops::ConcatOp *>(pair.second);
+ std::vector<Operation::Output *> pre_relu;
+ pre_relu.reserve(relus.size());
+ for (auto *r : relus)
+ {
+ pre_relu.emplace_back(r->getInput(0));
+ }
+ // create replacement nodes
+ auto new_concat = g->create<ops::ConcatOp>(pre_relu, concat->getAxis());
+ auto new_relu = g->create<ops::ReluOp>(new_concat->getOutput(0));
+
+ // concat is deleted here
+ g->replaceNode(concat, new_relu);
+ for (auto r : relus)
+ {
+ removeNodeIfUnused(g, r);
+ }
+ }
+ return g;
+}
+
+} // namespace nnc
diff --git a/compiler/nnc/passes/optimizations/SinkTranspose.cpp b/compiler/nnc/passes/optimizations/SinkTranspose.cpp
new file mode 100644
index 000000000..eb5ea2f49
--- /dev/null
+++ b/compiler/nnc/passes/optimizations/SinkTranspose.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "passes/optimizations/SinkTranspose.h"
+#include "passes/optimizations/OptimizationUtils.h"
+#include "mir/ops/TransposeOp.h"
+#include "mir/ops/ConcatOp.h"
+#include "mir/ops/ReluOp.h"
+#include "mir/Graph.h"
+#include "mir/GraphPatternMatcher.h"
+
+#include <string>
+#include <algorithm>
+
+namespace nnc
+{
+
+using namespace mir;
+using namespace opt_util;
+
+PassData SinkTranspose::run(PassData data)
+{
+ auto g = static_cast<Graph *>(data);
+ assert(g); // NOLINT
+ GraphPatternMatcher matcher(g);
+ auto is_tr = [](const Operation *op1) { return op1->getType() == Operation::Type::transpose; };
+ auto is_relu = [](const Operation *op2) { return op2->getType() == Operation::Type::ReLU; };
+ auto is_concat = [](const Operation *op2) { return op2->getType() == Operation::Type::concat; };
+ std::vector<std::pair<Operation *, Operation *>> matches;
+
+ // sink transpose below ReLU
+ matches = matcher.matchEdge(is_tr, is_relu);
+ for (auto pair : matches)
+ {
+ swapAdjacent(g, pair.first, pair.second);
+ }
+
+ // sink transpose through Concat
+ auto v_matches = matcher.matchUpBush(is_tr, is_concat);
+ for (const auto &pair : v_matches)
+ {
+ std::vector<Operation *> trs = pair.first;
+ auto *concat = dynamic_cast<ops::ConcatOp *>(pair.second);
+ auto axis_order = dynamic_cast<ops::TransposeOp *>(trs[0])->getAxisOrder();
+ if (std::all_of(trs.begin(), trs.end(), [&axis_order](Operation *tr) {
+ return dynamic_cast<ops::TransposeOp *>(tr)->getAxisOrder() == axis_order;
+ }))
+ {
+ std::vector<Operation::Output *> prev_trans;
+ prev_trans.reserve(trs.size());
+ for (auto transpose : trs)
+ {
+ prev_trans.emplace_back(transpose->getInput(0));
+ }
+ auto new_concat = g->create<ops::ConcatOp>(prev_trans, axis_order[concat->getAxis()]);
+ auto new_transpose = g->create<ops::TransposeOp>(new_concat->getOutput(0), axis_order);
+ // removes old concat
+ g->replaceNode(concat, new_transpose);
+ for (auto tr : trs)
+ {
+ removeNodeIfUnused(g, tr);
+ }
+ }
+ }
+
+ return g;
+}
+
+} // namespace nnc
diff --git a/compiler/nnc/passes/transformations/CMakeLists.txt b/compiler/nnc/passes/transformations/CMakeLists.txt
new file mode 100644
index 000000000..08517204d
--- /dev/null
+++ b/compiler/nnc/passes/transformations/CMakeLists.txt
@@ -0,0 +1,7 @@
+set(TRANSFORMATIONS_SRC
+ DataFormatSwitcher.cpp
+ LowerConv2D.cpp)
+
+nnc_add_library(nnc_transformations STATIC ${TRANSFORMATIONS_SRC})
+set_target_properties(nnc_transformations PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_link_libraries(nnc_transformations PRIVATE mir)
diff --git a/compiler/nnc/passes/transformations/DataFormatSwitcher.cpp b/compiler/nnc/passes/transformations/DataFormatSwitcher.cpp
new file mode 100644
index 000000000..8ff842660
--- /dev/null
+++ b/compiler/nnc/passes/transformations/DataFormatSwitcher.cpp
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "passes/transformations/DataFormatSwitcher.h"
+
+#include "mir/TensorUtil.h"
+#include "mir/ops/AvgPool2DOp.h"
+#include "mir/ops/Conv2DOp.h"
+#include "mir/ops/Deconv2DOp.h"
+#include "mir/ops/DepthwiseConv2DOp.h"
+#include "mir/ops/MaxPool2DOp.h"
+#include "mir/ops/TransposeOp.h"
+
+namespace nnc
+{
+DataFormatSwitcher::DataFormatSwitcher(const mir::DataFormat target_format)
+ : _target_format(target_format)
+{
+}
+
+DataFormatSwitcher::~DataFormatSwitcher() = default;
+
+PassData DataFormatSwitcher::run(PassData data)
+{
+ _graph = static_cast<mir::Graph *>(data);
+ assert(_graph);
+
+ // Collect nodes which use DataFormat
+ for (auto *node : _graph->getNodes())
+ {
+ switch (node->getType())
+ { // nodes using DataFormat
+ case mir::Operation::Type::avgPool2D:
+ case mir::Operation::Type::conv2D:
+ case mir::Operation::Type::deConv2D:
+ case mir::Operation::Type::depthwiseConv:
+ case mir::Operation::Type::maxPool2D:
+ _candidates_for_switch.push_back(node);
+ break;
+ default:
+ break; // not use DataFormat
+ }
+ }
+ // Switch collected ops
+ for (auto *op : _candidates_for_switch)
+ {
+ switch (op->getType())
+ {
+ case mir::Operation::Type::avgPool2D:
+ switchAvgPool2D(dynamic_cast<mir::ops::AvgPool2DOp *>(op));
+ break;
+ case mir::Operation::Type::conv2D:
+ switchConv2D(dynamic_cast<mir::ops::Conv2DOp *>(op));
+ break;
+ case mir::Operation::Type::deConv2D:
+ switchDeConv2D(dynamic_cast<mir::ops::DeConv2DOp *>(op));
+ break;
+ case mir::Operation::Type::depthwiseConv:
+ switchDepthwiseConv2D(dynamic_cast<mir::ops::DepthwiseConv2DOp *>(op));
+ break;
+ case mir::Operation::Type::maxPool2D:
+ switchMaxPool2D(dynamic_cast<mir::ops::MaxPool2DOp *>(op));
+ break;
+ default:
+ assert(false && "Can't switch DataFormat for this operation!");
+ }
+ }
+
+ return _graph;
+}
+
+void DataFormatSwitcher::cleanup() { _candidates_for_switch.clear(); }
+
+mir::Operation::Output *DataFormatSwitcher::insertTransposeBefore(mir::Operation::Output *out)
+{
+ mir::Operation::Output *new_out;
+ if (_target_format == mir::DataFormat::NHWC)
+ new_out = _graph->create<mir::ops::TransposeOp>(out, std::vector<std::size_t>{0, 2, 3, 1})
+ ->getOutput(0); // NCHW -> NHWC
+ else
+ new_out = _graph->create<mir::ops::TransposeOp>(out, std::vector<std::size_t>{0, 3, 1, 2})
+ ->getOutput(0); // NHWC -> NCHW
+ if (out->getType().isQuantized())
+ new_out->setQuantization(out->getType().getQuantization());
+ return new_out;
+}
+
+mir::Operation::Output *DataFormatSwitcher::insertTransposeAfter(mir::Operation::Output *out)
+{
+ mir::Operation::Output *new_out;
+ if (_target_format == mir::DataFormat::NHWC)
+ new_out = _graph->create<mir::ops::TransposeOp>(out, std::vector<std::size_t>{0, 3, 1, 2})
+ ->getOutput(0); // NHWC -> NCHW
+ else
+ new_out = _graph->create<mir::ops::TransposeOp>(out, std::vector<std::size_t>{0, 2, 3, 1})
+ ->getOutput(0); // NCHW -> NHWC
+ if (out->getType().isQuantized())
+ new_out->setQuantization(out->getType().getQuantization());
+ return new_out;
+}
+
+void DataFormatSwitcher::switchAvgPool2D(mir::ops::AvgPool2DOp *op)
+{
+ if (op->getDataFormat() == _target_format)
+ return;
+
+ auto *input = op->getInput(0);
+
+ mir::AvgPool2DOpAttributes attributes(op->getAttributes());
+ attributes.data_format = _target_format;
+
+ auto *trans_in = insertTransposeBefore(input);
+
+ auto new_pool = _graph->create<mir::ops::AvgPool2DOp>(trans_in, attributes);
+
+ auto *trans_out = insertTransposeAfter(new_pool->getOutput(0));
+
+ _graph->replaceNode(op, trans_out->getNode());
+}
+
+void DataFormatSwitcher::switchConv2D(mir::ops::Conv2DOp *op)
+{
+ if (op->getDataFormat() == _target_format)
+ return;
+
+ assert(op->getNumInputs() >= 2);
+ auto *input = op->getInput(0);
+ auto *kernel = op->getInput(1);
+
+ mir::Conv2DOpAttributes attributes(op->getAttributes());
+ attributes.data_format = _target_format;
+
+ auto *trans_in = insertTransposeBefore(input);
+
+ mir::Operation *new_conv;
+ if (op->getNumInputs() == 2)
+ new_conv = _graph->create<mir::ops::Conv2DOp>(trans_in, kernel, attributes);
+ else
+ {
+ auto bias = op->getInput(2);
+ new_conv = _graph->create<mir::ops::Conv2DOp>(trans_in, kernel, bias, attributes);
+ }
+
+ if (op->getOutput(0)->getType().isQuantized())
+ new_conv->getOutput(0)->setQuantization(op->getOutput(0)->getType().getQuantization());
+
+ auto *trans_out = insertTransposeAfter(new_conv->getOutput(0));
+
+ _graph->replaceNode(op, trans_out->getNode());
+}
+
+void DataFormatSwitcher::switchDeConv2D(mir::ops::DeConv2DOp *op)
+{
+ if (op->getDataFormat() == _target_format)
+ return;
+
+ assert(op->getNumInputs() == 2);
+ auto *input = op->getInput(0);
+ auto *kernel = op->getInput(1);
+
+ auto *trans_in = insertTransposeBefore(input);
+
+ mir::Operation *new_deconv;
+ mir::Deconv2DOpAttributes attributes(op->getAttributes());
+ attributes.data_format = _target_format;
+ if (attributes.padding_type == mir::ops::PaddingType::Explicit)
+ {
+ new_deconv = _graph->create<mir::ops::DeConv2DOp>(trans_in, kernel, attributes);
+ }
+ else
+ {
+ mir::Shape output_shape = op->getOutputShape(0);
+ if (_target_format == mir::DataFormat::NHWC)
+ output_shape = mir::transposeShape<0, 2, 3, 1>(output_shape);
+ else
+ output_shape = mir::transposeShape<0, 3, 1, 2>(output_shape);
+ new_deconv = _graph->create<mir::ops::DeConv2DOp>(trans_in, kernel, attributes, output_shape);
+ }
+
+ auto *trans_out = insertTransposeAfter(new_deconv->getOutput(0));
+
+ _graph->replaceNode(op, trans_out->getNode());
+}
+
+void DataFormatSwitcher::switchDepthwiseConv2D(mir::ops::DepthwiseConv2DOp *op)
+{
+ if (op->getDataFormat() == _target_format)
+ return;
+
+ assert(op->getNumInputs() >= 2);
+ auto *input = op->getInput(0);
+ auto *kernel = op->getInput(1);
+
+ mir::Conv2DOpAttributes attributes(op->getAttributes());
+ attributes.data_format = _target_format;
+
+ auto *trans_in = insertTransposeBefore(input);
+
+ mir::Operation *new_dw_conv;
+ if (op->getNumInputs() == 2)
+ new_dw_conv = _graph->create<mir::ops::DepthwiseConv2DOp>(trans_in, kernel, attributes);
+ else
+ {
+ auto bias = op->getInput(2);
+ new_dw_conv = _graph->create<mir::ops::DepthwiseConv2DOp>(trans_in, kernel, bias, attributes);
+ }
+
+ if (op->getOutput(0)->getType().isQuantized())
+ new_dw_conv->getOutput(0)->setQuantization(op->getOutput(0)->getType().getQuantization());
+
+ auto *trans_out = insertTransposeAfter(new_dw_conv->getOutput(0));
+
+ _graph->replaceNode(op, trans_out->getNode());
+}
+
+void DataFormatSwitcher::switchMaxPool2D(mir::ops::MaxPool2DOp *op)
+{
+ if (op->getDataFormat() == _target_format)
+ return;
+
+ auto *input = op->getInput(0);
+
+ mir::MaxPool2DOpAttributes attributes(op->getAttributes());
+ attributes.data_format = _target_format;
+
+ auto *trans_in = insertTransposeBefore(input);
+
+ auto new_pool = _graph->create<mir::ops::MaxPool2DOp>(trans_in, attributes);
+
+ auto *trans_out = insertTransposeAfter(new_pool->getOutput(0));
+
+ _graph->replaceNode(op, trans_out->getNode());
+}
+
+} // namespace nnc
diff --git a/compiler/nnc/passes/transformations/LowerConv2D.cpp b/compiler/nnc/passes/transformations/LowerConv2D.cpp
new file mode 100644
index 000000000..9e32978bc
--- /dev/null
+++ b/compiler/nnc/passes/transformations/LowerConv2D.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "passes/transformations/LowerConv2D.h"
+
+#include "mir/ops/Conv2DOp.h"
+#include "mir/ops/DepthwiseConv2DOp.h"
+#include "mir/ops/TransposeOp.h"
+
+namespace nnc
+{
+
+static void lowerConv2D(mir::Graph *graph, mir::ops::Conv2DOp *op)
+{
+ mir::Operation::Output *input = op->getInput(0);
+ mir::Operation::Output *kernel = op->getInput(1);
+
+ const std::int32_t in_group_size = kernel->getShape().dim(3);
+ const std::int32_t out_group_size = kernel->getShape().dim(0) / op->getNumGroups();
+
+ if (in_group_size == 1 && out_group_size == 1)
+ {
+ // [O, H, W, I / M] == [M, H, W, 1] -> [H, W, M, 1]
+ std::vector<std::size_t> perm{1, 2, 0, 3};
+ mir::Operation::Output *new_kernel =
+ graph->create<mir::ops::TransposeOp>(kernel, perm)->getOutput(0);
+ mir::Conv2DOpAttributes attributes = op->getAttributes();
+ attributes.num_groups = 1;
+ mir::Operation::Output *new_result =
+ graph->create<mir::ops::DepthwiseConv2DOp>(input, new_kernel, attributes)->getOutput(0);
+ graph->replaceNode(op, new_result->getNode());
+ }
+}
+
+LowerConv2D::LowerConv2D() = default;
+
+PassData LowerConv2D::run(PassData data)
+{
+ auto *graph = static_cast<mir::Graph *>(data);
+
+ // Collect candidate ops before actual transformation because the graph will be changed.
+ std::vector<mir::ops::Conv2DOp *> group_conv_ops;
+ for (mir::Operation *op : graph->getNodes())
+ {
+ auto *conv_op = dynamic_cast<mir::ops::Conv2DOp *>(op);
+ if (conv_op != nullptr && conv_op->getNumGroups() != 1)
+ {
+ group_conv_ops.push_back(conv_op);
+ }
+ }
+
+ for (mir::ops::Conv2DOp *op : group_conv_ops)
+ {
+ lowerConv2D(graph, op);
+ }
+
+ return graph;
+}
+
+void LowerConv2D::cleanup() {}
+
+} // namespace nnc
diff --git a/compiler/nnc/requires.cmake b/compiler/nnc/requires.cmake
new file mode 100644
index 000000000..8b460d962
--- /dev/null
+++ b/compiler/nnc/requires.cmake
@@ -0,0 +1,7 @@
+require("adtidas")
+require("mir-caffe2-importer")
+require("mir-caffe-importer")
+require("mir-onnx-importer")
+require("mir-interpreter")
+require("mir-tflite-importer")
+require("stdex")
diff --git a/compiler/nnc/support/CLOptionChecker.cpp b/compiler/nnc/support/CLOptionChecker.cpp
new file mode 100644
index 000000000..1ec1e876a
--- /dev/null
+++ b/compiler/nnc/support/CLOptionChecker.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "support/CommandLine.h"
+
+#include <dirent.h>
+#include <cstring>
+
+namespace nnc
+{
+namespace cli
+{
+
+void checkInFile(const Option<std::string> &in_file)
+{
+ if (in_file.empty())
+ throw BadOption("Input file name should not be empty");
+
+ auto f = fopen(in_file.c_str(), "rb");
+ if (!f)
+ throw BadOption("Cannot open file <" + in_file + ">");
+ fclose(f);
+} // checkInFile
+
+void checkOutFile(const Option<std::string> &out_file)
+{
+ if (out_file.empty())
+ throw BadOption("Output file name should not be empty");
+
+ /// @todo: if file already exists need to check accessibility
+
+} // checkOutFile
+
+void checkInDir(const Option<std::string> &dir)
+{
+ auto stream = opendir(dir.c_str());
+
+ if (stream == nullptr)
+ throw BadOption(std::string("Could not open directory: ") + std::strerror(errno) + ".");
+
+ closedir(stream);
+} // checkInDir
+
+void checkOutDir(const Option<std::string> &dir)
+{
+ auto stream = opendir(dir.c_str());
+
+ if (stream == nullptr)
+ {
+ // Do not consider the missing directory an error.
+ if (errno == ENOENT)
+ return;
+
+ throw BadOption(std::string("Could not open directory: ") + std::strerror(errno) + ".");
+ }
+
+ closedir(stream);
+} // checkOutDir
+
+} // namespace cli
+} // namespace nnc
diff --git a/compiler/nnc/support/CMakeLists.txt b/compiler/nnc/support/CMakeLists.txt
new file mode 100644
index 000000000..542468ddb
--- /dev/null
+++ b/compiler/nnc/support/CMakeLists.txt
@@ -0,0 +1,7 @@
+set(SUPPORT_SOURCES
+ CommandLine.cpp
+ CLOptionChecker.cpp)
+
+nnc_add_library(nnc_support STATIC ${SUPPORT_SOURCES})
+set_target_properties(nnc_support PROPERTIES LINKER_LANGUAGE CXX)
+set_target_properties(nnc_support PROPERTIES POSITION_INDEPENDENT_CODE ON)
diff --git a/compiler/nnc/support/CommandLine.cpp b/compiler/nnc/support/CommandLine.cpp
new file mode 100644
index 000000000..3ab28ff37
--- /dev/null
+++ b/compiler/nnc/support/CommandLine.cpp
@@ -0,0 +1,637 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstdlib>
+#include <iostream>
+#include <algorithm>
+#include <string>
+#include <vector>
+#include <map>
+#include <set>
+#include <cassert>
+#include <type_traits>
+#include "cstring"
+
+#include "support/CommandLine.h"
+
+namespace nnc
+{
+namespace cli
+{
+
+constexpr const char *const IOption::_groupNames[];
+
+static std::vector<std::string> splitByComma(const char *str)
+{
+ const char *cur_str = str;
+ std::vector<std::string> ret;
+
+ if (std::string(str).empty())
+ return ret;
+
+ for (size_t i = 0, cnt = 0; str[i] != '\0'; i++)
+ {
+ if (str[i] == ',')
+ {
+ std::string name(cur_str, cnt);
+ name.erase(remove_if(name.begin(), name.end(), isspace), name.end());
+ cnt = 0;
+
+ ret.push_back(name);
+
+ cur_str = &str[i + 1];
+ continue;
+ }
+
+ cnt++;
+ }
+
+ // push string after last comma
+ std::string name(cur_str);
+ name.erase(remove_if(name.begin(), name.end(), isspace), name.end());
+ ret.push_back(name);
+
+ return ret;
+
+} // splitByComma
+
+std::vector<std::string> optname(const char *names) { return splitByComma(names); }
+
+std::vector<std::string> optvalues(const char *vals) { return splitByComma(vals); }
+
+std::vector<char> separators(const char *seps)
+{
+ std::vector<char> ret;
+ int i;
+
+ if (std::string(seps).empty())
+ return ret;
+
+ for (i = 0; isspace(seps[i]); i++)
+ ;
+
+ if (seps[i])
+ {
+ ret.push_back(seps[i]);
+ i++;
+ }
+
+ for (; seps[i] != '\0'; i++)
+ {
+ if (seps[i] == ',')
+ {
+ for (i++; isspace(seps[i]); i++)
+ ;
+
+ ret.push_back(seps[i]);
+ }
+ }
+
+ return ret;
+}
+
+CommandLine *CommandLine::getParser()
+{
+ static CommandLine Parser;
+
+ return &Parser;
+
+} // getParser
+
+/**
+ * @param options - vector of all options
+ * @return maximum name length of size among all options
+ */
+static size_t calcMaxLenOfOptionsNames(std::vector<IOption *> options)
+{
+ size_t max_len = 0, len;
+
+ for (const auto opt : options)
+ if (!opt->isDisabled())
+ {
+ len = 0;
+ for (const auto &n : opt->getNames())
+ len += n.length();
+ max_len = (max_len < len) ? len : max_len;
+ }
+
+ return max_len;
+
+} // calcMaxLenOfOptionsNames
+
+/**
+ * @brief print option in help message
+ * @param opt - option that will be printed
+ * @param max_opt_name_len - maximum name length of size among all options
+ * @param leading_spaces - leading spaces that will be printed before option name
+ */
+static void printOption(IOption *opt, size_t max_opt_name_len, size_t leading_spaces)
+{
+
+ const auto &option_descr = opt->getOverview();
+ const auto &names = opt->getNames();
+
+ std::string option_names(names[0]); // initialize with option name
+
+ // add option aliases to option_names and count them length
+ for (size_t i = 1; i < names.size(); i++)
+ option_names += ", " + names[i];
+
+ std::string spaces(max_opt_name_len - option_names.length() + leading_spaces, ' ');
+ std::cerr << " " << option_names << spaces << "- " << option_descr << std::endl;
+
+} // printOption
+
+[[noreturn]] void CommandLine::usage(const std::string &msg, int exit_code)
+{
+ if (!msg.empty())
+ {
+ std::cerr << msg << "\n";
+ }
+
+ std::cerr << "Usage: " << _prog_name << " OPTIONS\n";
+ std::cerr << "Available OPTIONS" << std::endl;
+
+ // determine max length
+ size_t max_len = calcMaxLenOfOptionsNames(_options);
+
+ for (const auto opt : _options)
+ {
+ if (opt->isDisabled())
+ // options that are disabled not have to be shown
+ continue;
+
+ if (opt->isGrouped())
+ // options, that are grouped, will be printed later
+ continue;
+
+ printOption(opt, max_len, 4);
+ }
+
+ // print grouped options
+ for (const auto &group : _grouped_options)
+ {
+ std::cerr << "Options from '" << group.second[0]->getGroupName() << "' group:" << std::endl;
+
+ for (const auto opt : group.second)
+ {
+ printOption(opt, max_len, 4);
+ }
+ }
+
+ exit(exit_code);
+
+} // usage
+
+void CommandLine::registerOption(IOption *opt)
+{
+ for (const auto &n : opt->getNames())
+ {
+ auto i = _options_name.emplace(n, opt);
+
+ if (!i.second)
+ {
+ std::cerr << "option name must be unique: `" << n << "'" << std::endl;
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ _options.push_back(opt);
+
+ if (opt->isGrouped())
+ {
+ auto it = _grouped_options.find(opt->getGroup());
+
+ if (it == _grouped_options.end())
+ _grouped_options.emplace(opt->getGroup(), std::vector<IOption *>{opt});
+ else
+ it->second.push_back(opt);
+ }
+
+} // registerOption
+
+IOption *CommandLine::findOption(const char *optname)
+{
+ auto it = _options_name.find(optname);
+
+ if (it == _options_name.end())
+ {
+ // optname can contain separators, try
+ // to strip these separators and repeat a search
+ size_t i = 0;
+ for (; optname[i] != '\0' && optname[i] != '=' && optname[i] != ':'; i++)
+ ;
+
+ std::string strip_optname(optname, i);
+ it = _options_name.find(strip_optname);
+
+ if (it == _options_name.end())
+ {
+ // couldn't find option
+ throw BadOption(optname, "");
+ }
+ else
+ {
+ IOption *opt = it->second;
+
+ if (opt->getSeparators().empty())
+ {
+ // couldn't find option
+ throw BadOption(optname, "");
+ }
+ }
+ }
+
+ if (it->second->isDisabled())
+ {
+ // if option is disabled we don't have to recognize it
+ throw BadOption(optname, "");
+ }
+
+ return it->second;
+
+} // findOption
+
+// check that option value is correct
+static void checkOptionValue(const IOption *opt, const std::string &opt_name,
+ const std::string &val)
+{
+ auto valid_vals = opt->getValidVals();
+ bool is_valid = valid_vals.empty();
+
+ for (const auto &v : valid_vals)
+ {
+ if (v == val)
+ {
+ // value is valid
+ is_valid = true;
+ break;
+ }
+ }
+
+ if (!is_valid)
+ {
+ throw BadOption(opt_name, val);
+ }
+
+} // checkOptionValue
+
+const char *CommandLine::findOptionValue(const IOption *opt, const char **argv, int cur_argv)
+{
+ auto seps = opt->getSeparators();
+ const char *opt_name = argv[cur_argv];
+ const char *val_pos = nullptr;
+
+ // search one of the separators
+ for (auto s : seps)
+ {
+ for (int i = 0; opt_name[i] != '\0'; i++)
+ {
+ if (s == opt_name[i])
+ {
+ // separator is found, set val_pos to symbol after it
+ val_pos = &opt_name[i] + 1;
+ break;
+ }
+ }
+
+ if (val_pos)
+ {
+ break;
+ }
+ }
+
+ // if option doesn't have additional separators or these separators aren't
+ // found then we assume that option value is the next element in argv,
+ // but if the next element starts with '-' we suppose that option value is empty
+ // because options start with '-'
+ if (!val_pos)
+ {
+ if (_args_num == cur_argv + 1)
+ {
+ val_pos = "";
+ }
+ else
+ {
+ val_pos = argv[cur_argv + 1];
+
+ if (val_pos[0] == '-')
+ {
+ // it can be a value for numeric (negative numbers)
+ // or symbolic (contains value `-`) option
+ if (!isdigit(val_pos[1]) && val_pos[1])
+ {
+ val_pos = "";
+ }
+ }
+ }
+ }
+
+ // check that option value is correct
+ checkOptionValue(opt, opt_name, val_pos);
+
+ return val_pos;
+
+} // findOptionValue
+
+const char *CommandLine::findValueForMultOption(const IOption *opt, const std::string &opt_name,
+ const char **argv, int cur_argv)
+{
+ const char *val_pos = nullptr;
+
+ if (cur_argv >= _args_num)
+ {
+ return nullptr;
+ }
+
+ val_pos = argv[cur_argv];
+
+ if (val_pos[0] == '-')
+ {
+ // it can be a value for numeric (negative numbers)
+ // or symbolic (contains value `-`) option
+ if (!isdigit(val_pos[1]) && val_pos[1])
+ {
+ return nullptr;
+ }
+ }
+
+ checkOptionValue(opt, opt_name, val_pos);
+
+ return val_pos;
+
+} // findValueForMultOption
+
+/**
+ * @brief find option by name
+ * @param opt - found option
+ * @param options - all options
+ * @return true if option was found in options
+ */
+static bool isOptionInOptions(IOption *opt, const std::set<std::string> &options)
+{
+
+ for (const auto &name : opt->getNames())
+ {
+ if (options.find(name) != options.end())
+ {
+ return true;
+ }
+ }
+
+ return false;
+
+} // isOptionInOptions
+
+static bool areOptionsIntersected(const std::vector<IOption *> grouped_options,
+ const std::set<std::string> &all_options)
+{
+ for (const auto &opt : grouped_options)
+ if (isOptionInOptions(opt, all_options))
+ return true;
+
+ return false;
+} // areOptionsIntersected
+
+void CommandLine::checkRegisteredOptions(const std::set<std::string> &cmd_args)
+{
+ for (const auto &opt : _options)
+ {
+ if (opt->isOptional() || isOptionInOptions(opt, cmd_args))
+ continue;
+
+ if (opt->isGrouped())
+ {
+ auto it = _grouped_options.find(opt->getGroup());
+ assert(it != _grouped_options.end());
+
+ if (!areOptionsIntersected(it->second, cmd_args))
+ continue;
+ }
+
+ // option is not found then print error message
+ std::string options;
+
+ for (const auto &n : opt->getNames())
+ {
+ options += (n + " ");
+ }
+
+ usage("one of the following options must be defined: " + options);
+ }
+
+} // checkRegisteredOptions
+
+void CommandLine::checkOptions(const std::set<std::string> &cmd_args)
+{
+ for (const auto &o : _options)
+ {
+ // search option from command line
+ for (const auto &n : o->getNames())
+ {
+ if (cmd_args.find(n) == cmd_args.end())
+ {
+ // name isn't found
+ continue;
+ }
+
+ // check option
+ try
+ {
+ o->runCheckerFunc();
+ }
+ catch (BadOption &e)
+ {
+ usage(e.what());
+ }
+
+ } // opt names
+ } // options
+
+} // checkOptions
+
+void CommandLine::parseCommandLine(int argc, const char **argv, bool check_nonoptional)
+{
+ std::set<std::string> cmd_args;
+ IOption *opt;
+ const char *arg_val = nullptr;
+
+ _prog_name = argv[0];
+ _args_num = argc;
+
+ if (argc == 1)
+ {
+ // empty command line
+ usage();
+ }
+
+ // search help option and print help if this option is passed
+ for (int i = 1; i < argc; i++)
+ {
+ if (!strcmp(argv[i], "-h") || !strcmp(argv[i], "--help"))
+ {
+ usage("", EXIT_SUCCESS);
+ }
+ }
+
+ for (int i = 1; i < argc; i += (argv[i + 1] == arg_val) ? 2 : 1)
+ {
+ if (argv[i][0] != '-')
+ {
+ std::string err_msg(std::string("invalid command line argument: ") + argv[i]);
+ usage(err_msg);
+ }
+
+ // find registered option
+ try
+ {
+ opt = findOption(argv[i]);
+ }
+ catch (BadOption &e)
+ {
+ std::string err_msg(std::string("invalid option: ") + e.getName());
+ usage(err_msg);
+ }
+
+ // figure out value for option
+ try
+ {
+ if (opt->canHaveSeveralVals())
+ {
+ int j = i + 1;
+ for (arg_val = findValueForMultOption(opt, argv[i], argv, j); arg_val;
+ arg_val = findValueForMultOption(opt, argv[i], argv, j))
+ {
+ // set value for option
+ opt->setValue(arg_val);
+ j++;
+ }
+
+ i = j - 1;
+ }
+ else
+ {
+ arg_val = findOptionValue(opt, argv, i);
+
+ // set value for option
+ opt->setValue(arg_val);
+ }
+ }
+ catch (BadOption &e)
+ {
+ std::string optname = e.getName();
+ optname = optname.empty() ? argv[i] : optname;
+ std::string err_msg(std::string("invalid value: ") + e.getValue() +
+ std::string(" for option: ") + optname);
+ usage(err_msg);
+ }
+
+ // we can't just put argv[i] because option can have separators
+ cmd_args.insert(opt->getNames()[0]);
+ }
+
+ if (check_nonoptional)
+ {
+ // check that all registered options are present in command line
+ checkRegisteredOptions(cmd_args);
+ }
+
+ // verify options
+ checkOptions(cmd_args);
+
+} // parseCommandLine
+
+//
+// specializations of setValue method for all supported option type
+//
+// string
+template <> void Option<std::string>::setValue(const std::string &val)
+{
+ if (!val.empty())
+ this->setRawValue(val);
+}
+
+// vector of strings
+template <> void Option<std::vector<std::string>>::setValue(const std::string &val)
+{
+ if (!val.empty())
+ this->push_back(val);
+}
+
+// vector of ints
+template <> void Option<std::vector<int>>::setValue(const std::string &val)
+{
+ if (!val.empty())
+ this->push_back(stoi(val));
+}
+
+// bool
+template <> void Option<bool>::setValue(const std::string &val)
+{
+ this->setRawValue(this->convToBool(val));
+}
+
+// char
+template <> void Option<char>::setValue(const std::string &val)
+{
+ if (!val.empty())
+ this->setRawValue(this->convToChar(val));
+}
+
+// int8
+template <> void Option<int8_t>::setValue(const std::string &val)
+{
+ if (!val.empty())
+ this->setRawValue(this->template convToNum<int64_t>(val));
+}
+
+// int16
+template <> void Option<int16_t>::setValue(const std::string &val)
+{
+ if (!val.empty())
+ this->setRawValue(this->template convToNum<int64_t>(val));
+}
+
+// int32
+template <> void Option<int32_t>::setValue(const std::string &val)
+{
+ if (!val.empty())
+ this->setRawValue(this->template convToNum<int64_t>(val));
+}
+
+// uint8
+template <> void Option<uint8_t>::setValue(const std::string &val)
+{
+ if (!val.empty())
+ this->setRawValue(this->template convToNum<uint64_t>(val));
+}
+
+// uint16
+template <> void Option<uint16_t>::setValue(const std::string &val)
+{
+ if (!val.empty())
+ this->setRawValue(this->template convToNum<uint64_t>(val));
+}
+
+// uint32
+template <> void Option<uint32_t>::setValue(const std::string &val)
+{
+ if (!val.empty())
+ this->setRawValue(this->template convToNum<uint64_t>(val));
+}
+
+} // namespace cli
+} // namespace nnc
diff --git a/compiler/nnc/tests/CMakeLists.txt b/compiler/nnc/tests/CMakeLists.txt
new file mode 100644
index 000000000..c244ec777
--- /dev/null
+++ b/compiler/nnc/tests/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_subdirectory(import)
+add_subdirectory(soft_backend)
+add_subdirectory(acl_soft_backend)
diff --git a/compiler/nnc/tests/acl_soft_backend/AclCppOperations.cpp b/compiler/nnc/tests/acl_soft_backend/AclCppOperations.cpp
new file mode 100644
index 000000000..4ae020355
--- /dev/null
+++ b/compiler/nnc/tests/acl_soft_backend/AclCppOperations.cpp
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+#include <sstream>
+#include <thread>
+#include <cmath>
+#include <memory>
+#include <H5Cpp.h>
+#include <stdlib.h>
+#include "BuildInfo.h"
+
+using namespace std;
+
+static string netAddr(getenv("ODROID_NET_ADDR") ? getenv("ODROID_NET_ADDR") : "");
+
+static unique_ptr<char[]> readTensorDataFromHdf5File(const string &file_name, vector<int> &shape)
+{
+ try
+ {
+ H5::H5File h5File(file_name, H5F_ACC_RDONLY);
+ auto tensor_name = h5File.getObjnameByIdx(0);
+ auto dataset = h5File.openDataSet(tensor_name);
+ auto dataspace = dataset.getSpace();
+ auto rank = dataspace.getSimpleExtentNdims();
+
+ if (rank < 2)
+ return nullptr;
+
+ hsize_t dims[rank];
+
+ if (dataspace.getSimpleExtentDims(dims) != rank)
+ return nullptr;
+
+ int size = 1;
+
+ for (int i = 0; i < rank; ++i)
+ {
+ size *= dims[i];
+ shape.push_back(dims[i]);
+ }
+
+ auto result = unique_ptr<char[]>(new char[size * sizeof(float)]);
+ dataset.read(&result[0], H5::PredType::NATIVE_FLOAT);
+ return result;
+ }
+ catch (H5::FileIException &)
+ {
+ return nullptr;
+ }
+}
+
+// TODO: this function was copied from CPPOperations.cpp, move it to a shared place.
+bool areFloatsNear(float a, float b, int32_t ulp, float eps)
+{
+ assert(ulp < (1 << 23) && "this algorithm is not applicable for such large diffs");
+ assert(eps >= 0 && "epsilon should be positive number");
+ if (fabs(a - b) <= eps)
+ return true;
+ // since this point need to dind difference between numbers
+ // in terms of ULP
+ int32_t ai;
+ int32_t bi;
+ memcpy(&ai, &a, sizeof(float));
+ memcpy(&bi, &b, sizeof(float));
+ // compare mantissa of numbers
+ if (ai > bi)
+ return ai - bi <= ulp;
+ return bi - ai <= ulp;
+}
+
+static void compareHdf5Files(const string &file_name1, const string &file_name2)
+{
+ vector<int> shape1;
+ auto tensor1 = readTensorDataFromHdf5File(file_name1, shape1);
+ float *tensorData1 = reinterpret_cast<float *>(&tensor1[0]);
+ ASSERT_NE(tensorData1, nullptr);
+ vector<int> shape2;
+ auto tensor2 = readTensorDataFromHdf5File(file_name2, shape2);
+ float *tensorData2 = reinterpret_cast<float *>(&tensor2[0]);
+ ASSERT_NE(tensorData2, nullptr);
+ ASSERT_EQ(shape1.size(), shape2.size());
+ int size = 1;
+
+ for (int i = 0; i < shape1.size(); ++i)
+ {
+ ASSERT_EQ(shape1[i], shape2[i]);
+ size *= shape1[i];
+ }
+
+ for (int i = 0; i < size; ++i)
+ {
+ ASSERT_TRUE(areFloatsNear(tensorData1[i], tensorData2[i], 32, 1e-6));
+ }
+}
+
+static string genTmpDirName()
+{
+ string result("/tmp/nnc_test_");
+ stringstream ss;
+ ss << this_thread::get_id();
+ result += ss.str();
+
+ return result;
+}
+
+static bool runOnOdroid(const string &remote_cmd)
+{
+ string cmd = "ssh " + netAddr + " \"" + remote_cmd + "\"";
+ return system(cmd.c_str()) == 0;
+}
+
+static bool copyToOdroid(const string &src, const string &dst)
+{
+ string cmd("scp -q " + src + " " + netAddr + ":" + dst);
+ return system(cmd.c_str()) == 0;
+}
+
+static bool copyFromOdroid(const string &src, const string &dst)
+{
+ string cmd("scp -q " + netAddr + ":" + src + " " + dst);
+ return system(cmd.c_str()) == 0;
+}
+
+static void runAclSystemTest(const string &name)
+{
+ // Ensure the Odroid device net address was set.
+ ASSERT_TRUE(!netAddr.empty());
+
+ // The name of the temporary directory which is generated on the remote device.
+ string dir_name = genTmpDirName();
+
+ // Insure there is no such the directory on the remote device.
+ ASSERT_TRUE(runOnOdroid("rm -rf " + dir_name));
+
+ // Create the temporary directory on the remote device.
+ ASSERT_TRUE(runOnOdroid("mkdir " + dir_name));
+
+ // Copy the executable artifact file to the remote device.
+ ASSERT_TRUE(copyToOdroid(binDir + "/" + name + "/nnc_test", dir_name));
+
+ // Copy the artifact parameter file to the remote device.
+ ASSERT_TRUE(copyToOdroid(binDir + "/" + name + "/AclArtifact.par", dir_name));
+
+ // Copy the model input HDF5 file to the remote device.
+ ASSERT_TRUE(
+ copyToOdroid(binDir + "/" + name + "/in_" + name + "_caffe.hdf5", dir_name + "/in.hdf5"));
+
+ // Switch to the artifact directory on the remote device and run the artifact.
+ ASSERT_TRUE(runOnOdroid("cd " + dir_name + "; ./nnc_test"));
+
+ // Copy the resulting file from the remote device to the host.
+ ASSERT_TRUE(copyFromOdroid(dir_name + "/out.hdf5", binDir + "/" + name));
+
+ // Remove the temporary test case directory from the remote device.
+ ASSERT_TRUE(runOnOdroid("rm -rf " + dir_name));
+
+ // Compare the resulting HDF5 file with the reference one.
+ compareHdf5Files(binDir + "/" + name + "/ref.hdf5", binDir + "/" + name + "/out.hdf5");
+}
+
+TEST(acl_cpp_operations_test, convolution) { runAclSystemTest("convolution"); }
+
+TEST(acl_cpp_operations_test, depthwise_convolution) { runAclSystemTest("depthwise_convolution"); }
+
+TEST(acl_cpp_operations_test, convolution_with_bias) { runAclSystemTest("convolution_with_bias"); }
+
+TEST(acl_cpp_operations_test, scale) { runAclSystemTest("scale"); }
+
+TEST(acl_cpp_operations_test, relu) { runAclSystemTest("relu"); }
+
+TEST(acl_cpp_operations_test, pooling_max) { runAclSystemTest("pooling_max"); }
+
+TEST(acl_cpp_operations_test, pooling_avg) { runAclSystemTest("pooling_avg"); }
+
+TEST(acl_cpp_operations_test, concatenate) { runAclSystemTest("concatenate"); }
+
+TEST(acl_cpp_operations_test, reshape) { runAclSystemTest("reshape"); }
+
+TEST(acl_cpp_operations_test, fully_connected) { runAclSystemTest("fully_connected"); }
diff --git a/compiler/nnc/tests/acl_soft_backend/BuildInfo.h.in b/compiler/nnc/tests/acl_soft_backend/BuildInfo.h.in
new file mode 100644
index 000000000..b390e8e4f
--- /dev/null
+++ b/compiler/nnc/tests/acl_soft_backend/BuildInfo.h.in
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NNC_BUILD_INFO_H_IN_H_
+#define _NNC_BUILD_INFO_H_IN_H_
+
+static std::string binDir = "${CMAKE_CURRENT_BINARY_DIR}";
+
+#endif //_NNC_BUILD_INFO_H_IN_H_
diff --git a/compiler/nnc/tests/acl_soft_backend/CMakeLists.txt b/compiler/nnc/tests/acl_soft_backend/CMakeLists.txt
new file mode 100644
index 000000000..b33c1e66f
--- /dev/null
+++ b/compiler/nnc/tests/acl_soft_backend/CMakeLists.txt
@@ -0,0 +1,125 @@
+function(acl_warn MESSAGE)
+ message(WARNING "The ACL backend system tests will not be built: ${MESSAGE}")
+endfunction(acl_warn)
+
+# Path to the folder where the Odroid root folder is either mounted or copied.
+if(NOT DEFINED ENV{ODROID_MIRROR_DIR})
+ acl_warn("the ODROID_MIRROR_DIR environment variable was not defined.\n\
+ As a cross-build is done, this variable should point to a directory\n\
+ which is either mounted to (with e.g. SSHFS) or contains a copy of the target\n\
+ (e.g. Odroid XU4) device file system.")
+ return()
+endif()
+
+# Path to the ACL library root on the Odroid device.
+if(NOT DEFINED ENV{ODROID_ACL_DIR})
+ acl_warn("the ODROID_ACL_DIR environment variable was not defined.\n\
+ As a cross-build is done, this variable should contain the path to the root directory\n\
+ of the Arm Compute Library on the target (e.g. Odroid XU4) device.")
+ return()
+endif()
+
+# Path to the HDF5 library on the Odroid.
+# It must most likely be: /usr/lib/arm-linux-gnueabihf/hdf5/serial
+if(NOT DEFINED ENV{ODROID_H5_DIR})
+ acl_warn("the ODROID_H5_DIR environment variable was not defined.\n\
+ As a cross-build is done, this variable should contain the path to the root directory\n\
+ of the HDF5 library on the target (e.g. Odroid XU4) device.\n\
+ Often it would be the: /usr/lib/arm-linux-gnueabihf/hdf5/serial directory.")
+ return()
+endif()
+
+find_package(HDF5 COMPONENTS CXX REQUIRED)
+nnas_find_package(GTest REQUIRED)
+
+# Provide the test suite with the information where to locate executalbes to run etc.
+configure_file(BuildInfo.h.in BuildInfo.h)
+
+GTest_AddTest(nnc_acl_soft_backend_system_test AclCppOperations.cpp)
+add_dependencies(nnc_acl_soft_backend_system_test tensor_gen caffegen nnkit-run)
+target_include_directories(nnc_acl_soft_backend_system_test PRIVATE ${CMAKE_CURRENT_BINARY_DIR}
+ ${HDF5_INCLUDE_DIRS})
+target_link_libraries(nnc_acl_soft_backend_system_test ${HDF5_CXX_LIBRARIES})
+
+file(GLOB MODELS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "models/*.prototxt")
+
+# Loop over all the existing system test models.
+foreach(MODEL IN ITEMS ${MODELS})
+ # Set the model-related variables used inside the loop.
+ set(MODEL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${MODEL})
+ get_filename_component(MODEL_NAME ${MODEL} NAME_WE)
+ set(MODEL_DIR ${CMAKE_CURRENT_BINARY_DIR}/${MODEL_NAME})
+ set(COPIED_MODEL_FILE ${MODEL_DIR}/${MODEL_NAME}.prototxt)
+ set(INITIALIZED_MODEL_FILE ${MODEL_DIR}/${MODEL_NAME}.prototxt.weights)
+ set(ENCODED_MODEL_FILE ${MODEL_DIR}/${MODEL_NAME}.caffemodel)
+ set(INPUT_FILE ${MODEL_DIR}/in_${MODEL_NAME}_caffe.hdf5)
+ set(REFERENCE_OUTPUT ${MODEL_DIR}/ref.hdf5)
+ set(MODEL_DIR_TARGET nnc_acl_soft_backend_${MODEL_NAME}_dir)
+ set(COPIED_MODEL_TARGET nnc_acl_soft_backend_${MODEL_NAME}_prototxt)
+ set(INITIALIZED_MODEL_TARGET nnc_acl_soft_backend_${MODEL_NAME}_weights)
+ set(ENCODED_MODEL_TARGET nnc_acl_soft_backend_${MODEL_NAME}_caffemodel)
+ set(INPUT_TARGET nnc_acl_soft_backend_${MODEL_NAME}_input)
+ set(REFERENCE_OUTPUT_TARGET nnc_acl_soft_backend_${MODEL_NAME}_reference_output)
+ set(MAIN_FILE_TARGET nnc_acl_soft_backend_${MODEL_NAME}_main)
+ set(ARTIFACT_TARGET_CMAKE nnc_acl_soft_backend_${MODEL_NAME}_cmake)
+ set(ARTIFACT_TARGET_TOOLCHAIN nnc_acl_soft_backend_${MODEL_NAME}_toolchain)
+ set(ARTIFACT_TARGET nnc_acl_soft_backend_${MODEL_NAME}_artifact)
+ set(ARTIFACT_TARGET_MAKE nnc_acl_soft_backend_${MODEL_NAME}_artifact_make)
+ set(ARTIFACT_TARGET_BINARY nnc_acl_soft_backend_${MODEL_NAME}_artifact_binary)
+
+ # Create a directrory for handling the model in the binary directory.
+ add_custom_target(${MODEL_DIR_TARGET} ALL ${CMAKE_COMMAND} -E make_directory ${MODEL_DIR})
+
+ # Copy the model prototxt to the model build directory.
+ add_custom_target(${COPIED_MODEL_TARGET} ALL ${CMAKE_COMMAND} -E copy ${MODEL_FILE} ${COPIED_MODEL_FILE})
+
+ # Copy the artifact main and project files into the model subfolder inside the binary directory.
+ add_custom_target(${MAIN_FILE_TARGET} ALL ${CMAKE_COMMAND}
+ -E copy ${CMAKE_CURRENT_SOURCE_DIR}/artifact_cmake/main.cpp ${MODEL_DIR}/main.cpp)
+
+ # Copy the artifact CMakeLists.txt to the artifact build directory.
+ add_custom_target(${ARTIFACT_TARGET_CMAKE} ALL ${CMAKE_COMMAND}
+ -E copy ${CMAKE_CURRENT_SOURCE_DIR}/artifact_cmake/CMakeLists.txt ${MODEL_DIR}/CMakeLists.txt)
+
+ # Copy the artifact toolchain file to the artifact build directory.
+ add_custom_target(${ARTIFACT_TARGET_TOOLCHAIN} ALL ${CMAKE_COMMAND}
+ -E copy ${CMAKE_CURRENT_SOURCE_DIR}/artifact_cmake/odroid.cmake ${MODEL_DIR}/odroid.cmake)
+
+ # Initialize the model with weights.
+ add_custom_target(${INITIALIZED_MODEL_TARGET} ALL cat ${COPIED_MODEL_FILE} | GLOG_minloglevel=2
+ $<TARGET_FILE:caffegen> init > ${INITIALIZED_MODEL_FILE}
+ DEPENDS ${COPIED_MODEL_TARGET})
+
+ # Encode the model.
+ add_custom_target(${ENCODED_MODEL_TARGET} ALL cat ${INITIALIZED_MODEL_FILE} | GLOG_minloglevel=2
+ $<TARGET_FILE:caffegen> encode > ${ENCODED_MODEL_FILE}
+ DEPENDS ${INITIALIZED_MODEL_TARGET})
+
+ add_custom_target(${INPUT_TARGET} ALL $<TARGET_FILE:tensor_gen> data ${MODEL_NAME} 1 3 4 8
+ WORKING_DIRECTORY ${MODEL_DIR})
+
+ message("INPUT_FILE = ${INPUT_FILE}")
+
+ # Generate the 'reference' output with NNKIT.
+ add_custom_target(${REFERENCE_OUTPUT_TARGET} ALL $<TARGET_FILE:nnkit-run>
+ --backend $<TARGET_FILE:nnkit_caffe_backend> --backend-arg ${INITIALIZED_MODEL_FILE}
+ --pre $<TARGET_FILE:nnkit_HDF5_import_action> --pre-arg ${INPUT_FILE}
+ --post $<TARGET_FILE:nnkit_HDF5_export_action> --post-arg ${REFERENCE_OUTPUT}
+ DEPENDS $<TARGET_FILE:nnkit-run> ${INPUT_TARGET} ${INITIALIZED_MODEL_TARGET})
+
+ # Generate an artifact from the model.
+ add_custom_target(${ARTIFACT_TARGET} ALL $<TARGET_FILE:nnc>
+ --caffe -m ${ENCODED_MODEL_FILE} -o AclArtifact -d ${MODEL_DIR} --target=arm-gpu-c++
+ DEPENDS $<TARGET_FILE:nnc> ${ENCODED_MODEL_FILE})
+
+ # Generate a Makefile for the artifact cross-building.
+ add_custom_target(${ARTIFACT_TARGET_MAKE} ALL cmake .
+ -DCMAKE_TOOLCHAIN_FILE=odroid.cmake
+ DEPENDS ${ARTIFACT_TARGET_CMAKE} ${ARTIFACT_TARGET_TOOLCHAIN} ${ARTIFACT_TARGET}
+ WORKING_DIRECTORY ${MODEL_DIR})
+
+ # Cross-build the artifact with the generated Makefile.
+ add_custom_target(${ARTIFACT_TARGET_BINARY} ALL make
+ DEPENDS ${ARTIFACT_TARGET_MAKE}
+ WORKING_DIRECTORY ${MODEL_DIR})
+endforeach(MODEL)
diff --git a/compiler/nnc/tests/acl_soft_backend/artifact_cmake/CMakeLists.txt b/compiler/nnc/tests/acl_soft_backend/artifact_cmake/CMakeLists.txt
new file mode 100644
index 000000000..ceecded9e
--- /dev/null
+++ b/compiler/nnc/tests/acl_soft_backend/artifact_cmake/CMakeLists.txt
@@ -0,0 +1,21 @@
+cmake_minimum_required(VERSION 3.5)
+project(nnc_test)
+
+set(CMAKE_CXX_STANDARD 11)
+
+set(ODROID_MIRROR_DIR $ENV{ODROID_MIRROR_DIR})
+set(ODROID_ACL_DIR $ENV{ODROID_ACL_DIR})
+set(ODROID_ACL_INC_DIR ${ODROID_ACL_DIR}/include)
+set(ODROID_ACL_BUILD_DIR ${ODROID_ACL_DIR}/build)
+
+find_library(OPEN_CL OpenCL /usr/lib/arm-linux-gnueabihf)
+find_library(ARM_COMPUTE arm_compute PATHS ${ODROID_ACL_BUILD_DIR})
+find_library(ARM_COMPUTE_CORE arm_compute_core PATHS ${ODROID_ACL_BUILD_DIR})
+find_package(HDF5 COMPONENTS CXX REQUIRED)
+
+add_executable(nnc_test main.cpp AclArtifact.cpp)
+
+target_include_directories(nnc_test PRIVATE . ${ODROID_MIRROR_DIR}${ODROID_ACL_INC_DIR}
+ ${ODROID_MIRROR_DIR}${ODROID_ACL_DIR} ${ODROID_MIRROR_DIR}${HDF5_INCLUDE_DIRS})
+target_link_libraries(nnc_test ${ARM_COMPUTE} ${ARM_COMPUTE_CORE} ${OPEN_CL} ${HDF5_CXX_LIBRARIES})
+target_compile_definitions(nnc_test PRIVATE ARM_COMPUTE_CL)
diff --git a/compiler/nnc/tests/acl_soft_backend/artifact_cmake/main.cpp b/compiler/nnc/tests/acl_soft_backend/artifact_cmake/main.cpp
new file mode 100644
index 000000000..c326b390b
--- /dev/null
+++ b/compiler/nnc/tests/acl_soft_backend/artifact_cmake/main.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AclArtifact.h"
+#include <iostream>
+#include <memory>
+#include <H5Cpp.h>
+
+using namespace std;
+using namespace arm_compute;
+
+static unique_ptr<char[]> getTensorData(CLTensor &tensor)
+{
+ auto buf = unique_ptr<char[]>(new char[tensor.info()->total_size()]);
+ tensor.map();
+ Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+ Iterator i(&tensor, window);
+ char *ptr = &buf[0];
+
+ execute_window_loop(window,
+ [&i, &ptr](const Coordinates &) {
+ memcpy(ptr, i.ptr(), sizeof(float));
+ ptr += sizeof(float);
+ },
+ i);
+
+ tensor.unmap();
+ return buf;
+}
+
+static void readTensor(CLTensor &tensor, H5::DataSet &dataset)
+{
+ auto buf = unique_ptr<char[]>(new char[tensor.info()->total_size()]);
+ dataset.read(&buf[0], H5::PredType::NATIVE_FLOAT);
+ tensor.map();
+ Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+ Iterator i(&tensor, window);
+ char *ptr = &buf[0];
+
+ execute_window_loop(window,
+ [&i, &ptr](const Coordinates &) {
+ memcpy(i.ptr(), ptr, sizeof(float));
+ ptr += sizeof(float);
+ },
+ i);
+
+ tensor.unmap();
+}
+
+static bool readTensorFromHDF5File(CLTensor &tensor, const string &file_name)
+{
+ // Read from the .hdf5 file
+ try
+ {
+ H5::H5File h5File(file_name, H5F_ACC_RDONLY);
+ auto tensor_name = h5File.getObjnameByIdx(0);
+ auto dataset = h5File.openDataSet(tensor_name);
+ auto dataspace = dataset.getSpace();
+ auto rank = dataspace.getSimpleExtentNdims();
+
+ if (rank < 2)
+ return false;
+
+ hsize_t dims[rank];
+
+ if (dataspace.getSimpleExtentDims(dims) != rank)
+ return false;
+
+ TensorShape shape;
+ shape.set_num_dimensions(rank - 1);
+
+ for (int i = 1; i < rank; ++i)
+ shape[rank - i - 1] = dims[i];
+
+ readTensor(tensor, dataset);
+ }
+ catch (H5::FileIException &)
+ {
+ return false;
+ }
+
+ return true;
+}
+
+static void writeTensorToHDF5File(CLTensor &tensor, const string &tensor_name,
+ const string &file_name)
+{
+ const TensorShape &orig_shape = tensor.info()->tensor_shape();
+ const TensorShape &transposed_shape = orig_shape;
+ int rank = transposed_shape.num_dimensions();
+ hsize_t dims[rank + 1];
+ dims[0] = 1;
+
+ for (int i = 0; i < rank; ++i)
+ dims[rank - i] = transposed_shape[i];
+
+ // Write to the .hdf5 file
+ H5::H5File h5File(file_name, H5F_ACC_TRUNC);
+ H5::DataSpace dataspace(rank + 1, dims);
+ auto dataset = h5File.createDataSet(tensor_name, H5::PredType::IEEE_F32BE, dataspace);
+ dataset.write(&getTensorData(tensor)[0], H5::PredType::NATIVE_FLOAT);
+}
+
+int main(int argc, char *argv[])
+{
+ CLScheduler::get().default_init();
+
+ if (!CLScheduler::get().is_initialised())
+ {
+ cout << "Failed to initialise the ACL scheduler" << endl;
+ return 1;
+ }
+
+ AclArtifact artifact;
+ CLTensor &artifact_in = artifact.getInput();
+ readTensorFromHDF5File(artifact_in, "in.hdf5");
+
+ artifact.Inference();
+
+ CLTensor &artifact_out = artifact.getOutput();
+ writeTensorToHDF5File(artifact_out, "out", "out.hdf5");
+
+ return 0;
+}
diff --git a/compiler/nnc/tests/acl_soft_backend/artifact_cmake/odroid.cmake b/compiler/nnc/tests/acl_soft_backend/artifact_cmake/odroid.cmake
new file mode 100644
index 000000000..92faea0cd
--- /dev/null
+++ b/compiler/nnc/tests/acl_soft_backend/artifact_cmake/odroid.cmake
@@ -0,0 +1,17 @@
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR arm)
+
+set(CMAKE_C_COMPILER /usr/bin/arm-linux-gnueabihf-gcc)
+set(CMAKE_CXX_COMPILER /usr/bin/arm-linux-gnueabihf-g++)
+
+set(ODROID_MIRROR_DIR $ENV{ODROID_MIRROR_DIR})
+set(ODROID_H5_DIR $ENV{ODROID_H5_DIR})
+
+set(CMAKE_FIND_ROOT_PATH ${ODROID_MIRROR_DIR}${ODROID_H5_DIR})
+set(CMAKE_SYSROOT ${ODROID_MIRROR_DIR})
+set(CMAKE_PREFIX_PATH ${ODROID_MIRROR_DIR})
+
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
diff --git a/compiler/nnc/tests/acl_soft_backend/models/concatenate.prototxt b/compiler/nnc/tests/acl_soft_backend/models/concatenate.prototxt
new file mode 100644
index 000000000..78d0f8f05
--- /dev/null
+++ b/compiler/nnc/tests/acl_soft_backend/models/concatenate.prototxt
@@ -0,0 +1,23 @@
+name: "CONCANET"
+layer {
+ name: "input"
+ type: "Input"
+ top: "data"
+ input_param {
+ shape {
+ dim: 1
+ dim: 3
+ dim: 4
+ dim: 8
+ }
+ }
+}
+layer {
+ name: "inception_a1_output"
+ type: "Concat"
+ bottom: "data"
+ bottom: "data"
+ bottom: "data"
+ top: "inception_a1_output"
+ phase: TEST
+}
diff --git a/compiler/nnc/tests/acl_soft_backend/models/convolution.prototxt b/compiler/nnc/tests/acl_soft_backend/models/convolution.prototxt
new file mode 100644
index 000000000..176940cfc
--- /dev/null
+++ b/compiler/nnc/tests/acl_soft_backend/models/convolution.prototxt
@@ -0,0 +1,34 @@
+name: "CONVONET"
+layer {
+ name: "input"
+ type: "Input"
+ top: "data"
+ input_param {
+ shape {
+ dim: 1
+ dim: 3
+ dim: 4
+ dim: 8
+ }
+ }
+}
+layer {
+ name: "conv1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ convolution_param {
+ num_output: 3
+ bias_term: false
+ pad: 1
+ kernel_size: 3
+ stride: 2
+ weight_filler {
+ type: "msra"
+ }
+ }
+}
diff --git a/compiler/nnc/tests/acl_soft_backend/models/convolution_with_bias.prototxt b/compiler/nnc/tests/acl_soft_backend/models/convolution_with_bias.prototxt
new file mode 100644
index 000000000..804bac1e8
--- /dev/null
+++ b/compiler/nnc/tests/acl_soft_backend/models/convolution_with_bias.prototxt
@@ -0,0 +1,38 @@
+name: "CONVONET"
+layer {
+ name: "input"
+ type: "Input"
+ top: "data"
+ input_param {
+ shape {
+ dim: 1
+ dim: 3
+ dim: 4
+ dim: 8
+ }
+ }
+}
+layer {
+ name: "conv1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ convolution_param {
+ num_output: 3
+ bias_term: true
+ pad: 1
+ kernel_size: 3
+ stride: 2
+ weight_filler {
+ type: "msra"
+ }
+ bias_filler {
+ type: "constant"
+ value: 3.14
+ }
+ }
+}
diff --git a/compiler/nnc/tests/acl_soft_backend/models/depthwise_convolution.prototxt b/compiler/nnc/tests/acl_soft_backend/models/depthwise_convolution.prototxt
new file mode 100644
index 000000000..b2b1ce522
--- /dev/null
+++ b/compiler/nnc/tests/acl_soft_backend/models/depthwise_convolution.prototxt
@@ -0,0 +1,35 @@
+name: "DEPTHCONVNET"
+layer {
+ name: "input"
+ type: "Input"
+ top: "data"
+ input_param {
+ shape {
+ dim: 1
+ dim: 3
+ dim: 4
+ dim: 8
+ }
+ }
+}
+layer {
+ name: "conv2_1/dw"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv2_1/dw"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ convolution_param {
+ num_output: 3
+ bias_term: false
+ pad: 1
+ kernel_size: 3
+ group: 3
+ stride: 1
+ weight_filler {
+ type: "msra"
+ }
+ }
+}
diff --git a/compiler/nnc/tests/acl_soft_backend/models/fully_connected.prototxt b/compiler/nnc/tests/acl_soft_backend/models/fully_connected.prototxt
new file mode 100644
index 000000000..beca80187
--- /dev/null
+++ b/compiler/nnc/tests/acl_soft_backend/models/fully_connected.prototxt
@@ -0,0 +1,29 @@
+name: "FULLYCONNET"
+layer {
+ name: "input"
+ type: "Input"
+ top: "data"
+ input_param {
+ shape {
+ dim: 1
+ dim: 3
+ dim: 4
+ dim: 8
+ }
+ }
+}
+layer {
+ name: "fc8"
+ type: "InnerProduct"
+ param { lr_mult: 1 decay_mult: 1 }
+ inner_product_param {
+ num_output: 10
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_term: false
+ }
+ bottom: "data"
+ top: "fc8"
+}
diff --git a/compiler/nnc/tests/acl_soft_backend/models/pooling_avg.prototxt b/compiler/nnc/tests/acl_soft_backend/models/pooling_avg.prototxt
new file mode 100644
index 000000000..ad93a6e14
--- /dev/null
+++ b/compiler/nnc/tests/acl_soft_backend/models/pooling_avg.prototxt
@@ -0,0 +1,26 @@
+name: "POOLINGAVGNET"
+layer {
+ name: "input"
+ type: "Input"
+ top: "data"
+ input_param {
+ shape {
+ dim: 1
+ dim: 3
+ dim: 4
+ dim: 8
+ }
+ }
+}
+layer {
+ name: "inception_a1_pool"
+ type: "Pooling"
+ bottom: "data"
+ top: "inception_a1_pool"
+ pooling_param {
+ pool: AVE
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
diff --git a/compiler/nnc/tests/acl_soft_backend/models/pooling_max.prototxt b/compiler/nnc/tests/acl_soft_backend/models/pooling_max.prototxt
new file mode 100644
index 000000000..7ac060af8
--- /dev/null
+++ b/compiler/nnc/tests/acl_soft_backend/models/pooling_max.prototxt
@@ -0,0 +1,26 @@
+name: "POOLINGMAXNET"
+layer {
+ name: "input"
+ type: "Input"
+ top: "data"
+ input_param {
+ shape {
+ dim: 1
+ dim: 3
+ dim: 4
+ dim: 8
+ }
+ }
+}
+layer {
+ name: "inception_a1_pool"
+ type: "Pooling"
+ bottom: "data"
+ top: "inception_a1_pool"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
diff --git a/compiler/nnc/tests/acl_soft_backend/models/relu.prototxt b/compiler/nnc/tests/acl_soft_backend/models/relu.prototxt
new file mode 100644
index 000000000..5f4932f0c
--- /dev/null
+++ b/compiler/nnc/tests/acl_soft_backend/models/relu.prototxt
@@ -0,0 +1,20 @@
+name: "RELUNET"
+layer {
+ name: "input"
+ type: "Input"
+ top: "data"
+ input_param {
+ shape {
+ dim: 1
+ dim: 3
+ dim: 4
+ dim: 8
+ }
+ }
+}
+layer {
+ name: "relu1"
+ type: "ReLU"
+ bottom: "data"
+ top: "relu1"
+}
diff --git a/compiler/nnc/tests/acl_soft_backend/models/reshape.prototxt b/compiler/nnc/tests/acl_soft_backend/models/reshape.prototxt
new file mode 100644
index 000000000..1f48e3972
--- /dev/null
+++ b/compiler/nnc/tests/acl_soft_backend/models/reshape.prototxt
@@ -0,0 +1,26 @@
+name: "RESHAPENET"
+layer {
+ name: "input"
+ type: "Input"
+ top: "data"
+ input_param {
+ shape {
+ dim: 1
+ dim: 3
+ dim: 4
+ dim: 8
+ }
+ }
+}
+layer {
+ name: "reshape"
+ type: "Reshape"
+ bottom: "data"
+ top: "output"
+ reshape_param {
+ shape {
+ dim: 1
+ dim: 96
+ }
+ }
+}
diff --git a/compiler/nnc/tests/acl_soft_backend/models/scale.prototxt b/compiler/nnc/tests/acl_soft_backend/models/scale.prototxt
new file mode 100644
index 000000000..b113056f9
--- /dev/null
+++ b/compiler/nnc/tests/acl_soft_backend/models/scale.prototxt
@@ -0,0 +1,30 @@
+name: "SCALENET"
+layer {
+ name: "input"
+ type: "Input"
+ top: "data"
+ input_param {
+ shape {
+ dim: 1
+ dim: 3
+ dim: 4
+ dim: 8
+ }
+ }
+}
+layer {
+ name: "scale1"
+ type: "Scale"
+ bottom: "data"
+ top: "scale1"
+ param {
+ lr_mult: 1
+ decay_mult: 0
+ }
+ scale_param {
+ filler {
+ value: 2.71
+ }
+ bias_term: false
+ }
+}
diff --git a/compiler/nnc/tests/import/CMakeLists.txt b/compiler/nnc/tests/import/CMakeLists.txt
new file mode 100644
index 000000000..0aa07c5e5
--- /dev/null
+++ b/compiler/nnc/tests/import/CMakeLists.txt
@@ -0,0 +1,20 @@
+# TODO: This CMakeLists.txt declares two targets that build executables
+# that can be used to run following system tests:
+#
+# Caffe Mobilenet model is imported successfully
+# Caffe Inception-v3 model is imported successfully
+# Tensorflow Lite Mobilenet model is imported successfully
+# Tensorflow Lite Inception-v3 model is imported successfully
+#
+# These executables are not executed anywhere because it is not yet decided
+# how to store large files (in this case, files with models), and how to run all system tests.
+# As soon as it is decided, model files should be added, as well as the code that runs the tests.
+if(NNC_FRONTEND_TFLITE_ENABLED)
+ add_executable(system_test_import_tflite tflite.cpp)
+ target_link_libraries(system_test_import_tflite PRIVATE nnc_support mir_tflite_importer)
+endif()
+
+if(NNC_FRONTEND_CAFFE_ENABLED)
+ add_executable(system_test_import_caffe caffe.cpp)
+ target_link_libraries(system_test_import_caffe PRIVATE nnc_support mir_caffe_importer)
+endif()
diff --git a/compiler/nnc/tests/import/caffe.cpp b/compiler/nnc/tests/import/caffe.cpp
new file mode 100644
index 000000000..a3abafccb
--- /dev/null
+++ b/compiler/nnc/tests/import/caffe.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "support/CommandLine.h"
+
+#include <caffe_importer.h>
+
+#include <iostream>
+
+using namespace nnc;
+
+int main(int argc, const char **argv)
+{
+ cli::Option<std::string> model_path(cli::optname("--model"), cli::overview("Path to the model"));
+ cli::CommandLine::getParser()->parseCommandLine(argc, argv);
+
+ try
+ {
+ mir_caffe::loadModel(model_path);
+ }
+ catch (...)
+ {
+ std::cout << "Could not create IR for model \"" << model_path << "\"" << std::endl;
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/compiler/nnc/tests/import/tflite.cpp b/compiler/nnc/tests/import/tflite.cpp
new file mode 100644
index 000000000..1c11e02c1
--- /dev/null
+++ b/compiler/nnc/tests/import/tflite.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "support/CommandLine.h"
+
+#include <tflite_importer.h>
+
+#include <iostream>
+
+using namespace nnc;
+
+int main(int argc, const char **argv)
+{
+ cli::Option<std::string> model_path(cli::optname("--model"), cli::overview("Path to the model"));
+ cli::CommandLine::getParser()->parseCommandLine(argc, argv);
+
+ try
+ {
+ mir_tflite::loadModel(model_path);
+ }
+ catch (...)
+ {
+ std::cout << "Could not create IR for model \"" << model_path << "\"" << std::endl;
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/compiler/nnc/tests/soft_backend/CMakeLists.txt b/compiler/nnc/tests/soft_backend/CMakeLists.txt
new file mode 100644
index 000000000..5526327ba
--- /dev/null
+++ b/compiler/nnc/tests/soft_backend/CMakeLists.txt
@@ -0,0 +1,9 @@
+file(GLOB_RECURSE SOFT_TEST_DEF_SOURCES *.def)
+
+nnc_make_generated_sources("${SOFT_TEST_DEF_SOURCES}" ${CMAKE_CURRENT_BINARY_DIR} SOFT_TEST_GENERATED_SOURCES)
+
+add_executable(nnc_system_soft_backend_cpp_compile CompileCPP.cpp ${SOFT_TEST_GENERATED_SOURCES})
+target_link_libraries(nnc_system_soft_backend_cpp_compile PRIVATE soft_backend_cpp mir)
+target_include_directories(nnc_system_soft_backend_cpp_compile PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${NNC_SOFT_BACKEND_DIR})
+
+add_test(nnc_system_test_soft_backend_cpp_compile nnc_system_soft_backend_cpp_compile)
diff --git a/compiler/nnc/tests/soft_backend/CompileCPP.cpp b/compiler/nnc/tests/soft_backend/CompileCPP.cpp
new file mode 100644
index 000000000..63aeb4a1b
--- /dev/null
+++ b/compiler/nnc/tests/soft_backend/CompileCPP.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This is simple tests to check that generator is running properly and creates compilable artifact
+ * This test is not intended to check correctness of generated artifact
+ */
+
+#include "mir/Graph.h"
+#include "mir/Shape.h"
+#include "mir/ops/InputOp.h"
+#include "mir/ops/OutputOp.h"
+#include "mir/ops/ReluOp.h"
+
+#include "backends/soft_backend/CPPGenerator.h"
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+#include <cstdlib>
+
+// This header generated and contains array with test_main.def contents
+#include "test_main.generated.h"
+
+using namespace std;
+
+using namespace nnc;
+using namespace mir;
+
+// Creates simple graph with input and output
+static void fillGraph(Graph &g)
+{
+ Shape input_shape{1, 2, 3};
+ mir::TensorType input_type{mir::DataType::FLOAT32, input_shape};
+ Operation *input_op = g.create<ops::InputOp>(input_type);
+ Operation *relu_op = g.create<ops::ReluOp>(input_op->getOutput(0));
+ Operation *output_op = g.create<ops::OutputOp>(relu_op->getOutput(0));
+ input_op->getOutput(0)->setName("in");
+ relu_op->getOutput(0)->setName("out");
+}
+
+static void checkFileExists(const string &path)
+{
+ ifstream f(path);
+ if (!f.good())
+ {
+ cerr << "file " << path << " not created\n";
+ exit(1);
+ }
+}
+
+static void createMain(const string &path, const string &header_path)
+{
+ ofstream out(path);
+ if (!out.good())
+ {
+ cerr << "Main file " << path << " not created\n";
+ exit(1);
+ }
+ out << "#include \"" << header_path << "\"\n";
+ out.write(test_main, sizeof(test_main));
+}
+
+int main()
+{
+ std::string output_dir = "test_output";
+ std::string artifact_name = "nnmodel";
+
+ Graph g;
+ fillGraph(g);
+
+ nnc::CPPCodeGenerator cpp_code_generator(output_dir, artifact_name);
+ cpp_code_generator.run(&g);
+
+ string base_path = output_dir + "/" + artifact_name;
+
+ string code_path = base_path + ".cpp";
+ string header_path = base_path + ".h";
+ string main_path = base_path + "_main.cpp";
+
+ checkFileExists(code_path);
+ checkFileExists(header_path);
+ checkFileExists(base_path + ".params");
+
+ createMain(main_path, artifact_name + ".h");
+
+ string target_compiler = "g++ -Wall --std=c++11";
+
+ string compiler_command =
+ target_compiler + " -I" + output_dir + " " + main_path + " " + code_path;
+
+ // call compiler
+ int res = system(compiler_command.c_str());
+
+ if (res == -1)
+ {
+ cerr << "failed to call compiler\n";
+ return 2;
+ }
+ if (res != 0)
+ {
+ cerr << "compiler did not succeed with error code " << res << ": " << compiler_command << "\n";
+ return 3;
+ }
+ return 0;
+}
diff --git a/compiler/nnc/tests/soft_backend/test_main.def b/compiler/nnc/tests/soft_backend/test_main.def
new file mode 100644
index 000000000..6a464f862
--- /dev/null
+++ b/compiler/nnc/tests/soft_backend/test_main.def
@@ -0,0 +1,10 @@
+int main()
+{
+ Shape s{1, 2, 3};
+ Tensor in_t(s);
+ NNModel model("nnmodel.params");
+ model.set_in(in_t);
+ model.doInference();
+ std::shared_ptr<Tensor> out_t = model.get_out();
+ return 0;
+}
diff --git a/compiler/nnc/unittests/CMakeLists.txt b/compiler/nnc/unittests/CMakeLists.txt
new file mode 100644
index 000000000..bbb6c7e67
--- /dev/null
+++ b/compiler/nnc/unittests/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Dummy target to build all nnc unit tests
+add_custom_target(nnc_unit_tests)
+add_custom_target(nnc_check COMMAND ${CMAKE_CTEST_COMMAND} DEPENDS nnc_unit_tests)
+
+add_subdirectory(pass)
+add_subdirectory(soft_backend)
+add_subdirectory(acl_backend)
+add_subdirectory(support)
+add_subdirectory(optimizations)
+add_subdirectory(transformations)
diff --git a/compiler/nnc/unittests/acl_backend/CMakeLists.txt b/compiler/nnc/unittests/acl_backend/CMakeLists.txt
new file mode 100644
index 000000000..b89e75498
--- /dev/null
+++ b/compiler/nnc/unittests/acl_backend/CMakeLists.txt
@@ -0,0 +1,9 @@
+set(ACL_CPP_BACKEND_UTEST_SOURCES DOMToText.cpp MIRToDOM.cpp)
+
+file(GLOB_RECURSE ACL_IN_SOURCES "${NNC_ACL_BACKEND_DIR}/*.in")
+nnc_make_generated_sources("${ACL_IN_SOURCES}" ${CMAKE_CURRENT_BINARY_DIR} ACL_GENERATED_SOURCES)
+
+nnc_add_unit_test(nnc_acl_cpp_backend_test ${ACL_CPP_BACKEND_UTEST_SOURCES} ${ACL_GENERATED_SOURCES})
+
+optional_target_link_libraries(nnc_acl_cpp_backend_test mir acl_soft_backend_cpp)
+target_include_directories(nnc_acl_cpp_backend_test PRIVATE ${NNC_ACL_BACKEND_DIR} ${CMAKE_CURRENT_BINARY_DIR})
diff --git a/compiler/nnc/unittests/acl_backend/DOMToText.cpp b/compiler/nnc/unittests/acl_backend/DOMToText.cpp
new file mode 100644
index 000000000..be0e6713c
--- /dev/null
+++ b/compiler/nnc/unittests/acl_backend/DOMToText.cpp
@@ -0,0 +1,497 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This is test set of text generator from DOM entities
+ * ArtifactEntity, ArtifactNamed, ArtifactExpr and ArtifactClassMember
+ * are not tested since they are abstract classes
+ */
+
+#include <sstream>
+#include <tuple>
+
+#include "ArtifactModel.h"
+#include "ArtifactGeneratorCppCode.h"
+#include "ArtifactGeneratorCppDecl.h"
+
+#include "AclArtifactUtilities.generated.h"
+
+#include "gtest/gtest.h"
+
+using namespace std;
+using namespace nnc;
+
+using AF = ArtifactFactory;
+
+TEST(acl_backend_dom_to_text, ArtifactLiteral)
+{
+ stringstream code_out;
+ stringstream decl_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+ ArtifactGeneratorCppDecl decl_gen(decl_out);
+ const char *lit_data = "hello_world";
+ shared_ptr<ArtifactLiteral> lit = AF::lit(lit_data);
+ lit->accept(&code_gen);
+ lit->accept(&decl_gen);
+ ASSERT_EQ(code_out.str(), lit_data);
+ ASSERT_EQ(decl_out.str(), lit_data);
+}
+
+TEST(acl_backend_dom_to_text, ArtifactId)
+{
+ stringstream code_out;
+ stringstream decl_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+ ArtifactGeneratorCppDecl decl_gen(decl_out);
+ const char *id_data = "some_id";
+ shared_ptr<ArtifactId> id = AF::id(id_data);
+ id->accept(&code_gen);
+ id->accept(&decl_gen);
+ ASSERT_EQ(code_out.str(), id_data);
+ ASSERT_EQ(decl_out.str(), id_data);
+}
+
+TEST(acl_backend_dom_to_text, ArtifactRef)
+{
+ stringstream code_out;
+ stringstream decl_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+ ArtifactGeneratorCppDecl decl_gen(decl_out);
+ const char *id_data = "some_id";
+ shared_ptr<ArtifactId> id = AF::id(id_data);
+ shared_ptr<ArtifactRef> ref = AF::ref(id);
+ ref->accept(&code_gen);
+ ref->accept(&decl_gen);
+ string ref_data = string("&") + id_data;
+ ASSERT_EQ(code_out.str(), ref_data);
+ ASSERT_EQ(decl_out.str(), ref_data);
+}
+
+TEST(acl_backend_dom_to_text, ArtifactDeref)
+{
+ stringstream code_out;
+ stringstream decl_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+ ArtifactGeneratorCppDecl decl_gen(decl_out);
+ const char *id_data = "some_id";
+ shared_ptr<ArtifactId> id = AF::id(id_data);
+ shared_ptr<ArtifactDeref> deref = AF::deref(id);
+ deref->accept(&code_gen);
+ deref->accept(&decl_gen);
+ string ref_data = string("*") + id_data;
+ ASSERT_EQ(code_out.str(), ref_data);
+ ASSERT_EQ(decl_out.str(), ref_data);
+}
+
+static void checkCall(ArtifactCallType type, const char *call_name,
+ const list<shared_ptr<ArtifactExpr>> &args, shared_ptr<ArtifactExpr> obj,
+ const char *ref_data)
+{
+ stringstream code_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+ shared_ptr<ArtifactFunctionCall> call = AF::call(call_name, args, obj, type);
+ call->accept(&code_gen);
+ ASSERT_EQ(code_out.str(), ref_data);
+}
+
+TEST(acl_backend_dom_to_text, ArtifactFunctionCall)
+{
+ const char *lit_data = "123";
+ const char *id_data = "some_id";
+ shared_ptr<ArtifactExpr> id = AF::id(id_data);
+ shared_ptr<ArtifactExpr> lit = AF::lit(lit_data);
+ const list<shared_ptr<ArtifactExpr>> args{id, lit};
+
+ shared_ptr<ArtifactId> obj = AF::id("obj");
+
+ using TestCase = tuple<ArtifactCallType, shared_ptr<ArtifactExpr>, const char *>;
+ TestCase test_cases[] = {TestCase{ArtifactCallType::scope, nullptr, "foo(some_id, 123)"},
+ TestCase{ArtifactCallType::obj, obj, "obj.foo(some_id, 123)"},
+ TestCase{ArtifactCallType::ref, obj, "obj->foo(some_id, 123)"},
+ TestCase{ArtifactCallType::scope, obj, "obj::foo(some_id, 123)"}};
+
+ for (const auto &test : test_cases)
+ {
+ ArtifactCallType call_type = get<0>(test);
+ shared_ptr<ArtifactExpr> obj = get<1>(test);
+ const char *ref_output = get<2>(test);
+ checkCall(call_type, "foo", args, obj, ref_output);
+ }
+}
+
+static void checkUnaryExpression(ArtifactUnOp op, shared_ptr<ArtifactExpr> var,
+ const char *ref_data)
+{
+ stringstream code_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+
+ shared_ptr<ArtifactUnaryExpr> expr = AF::un(op, var);
+ expr->accept(&code_gen);
+ ASSERT_EQ(code_out.str(), ref_data);
+}
+
+TEST(acl_backend_dom_to_text, ArtifactUnaryExpr)
+{
+ const char *var_name = "id";
+ shared_ptr<ArtifactId> var = AF::id(var_name);
+ pair<ArtifactUnOp, const char *> test_cases[] = {
+ {ArtifactUnOp::preIncr, "++id"}, {ArtifactUnOp::preDecr, "--id"},
+ {ArtifactUnOp::heapNew, "new id"}, {ArtifactUnOp::heapFree, "delete id"},
+ {ArtifactUnOp::postIncr, "id++"}, {ArtifactUnOp::postDecr, "id--"}};
+
+ for (auto test : test_cases)
+ {
+ auto op_type = test.first;
+ auto ref_output = test.second;
+ checkUnaryExpression(op_type, var, ref_output);
+ }
+}
+
+static void checkBinaryExpression(ArtifactBinOp op, shared_ptr<ArtifactExpr> op1,
+ shared_ptr<ArtifactExpr> op2, const char *ref_data)
+{
+ stringstream code_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+
+ shared_ptr<ArtifactBinaryExpr> expr = AF::bin(op, op1, op2);
+ expr->accept(&code_gen);
+ ASSERT_EQ(code_out.str(), ref_data);
+}
+
+TEST(acl_backend_dom_to_text, ArtifactBinaryExpr)
+{
+ stringstream code_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+ const char *op1_name = "a";
+ const char *op2_name = "b";
+ shared_ptr<ArtifactId> op1 = AF::id(op1_name);
+ shared_ptr<ArtifactId> op2 = AF::id(op2_name);
+
+ pair<ArtifactBinOp, const char *> test_cases[] = {
+ {ArtifactBinOp::eq, "a == b"}, {ArtifactBinOp::notEq, "a != b"},
+ {ArtifactBinOp::less, "a < b"}, {ArtifactBinOp::lessOrEq, "a <= b"},
+ {ArtifactBinOp::great, "a > b"}, {ArtifactBinOp::greatOrEq, "a >= b"},
+ {ArtifactBinOp::assign, "a = b"}, {ArtifactBinOp::plus, "a + b"},
+ {ArtifactBinOp::minus, "a - b"}, {ArtifactBinOp::mult, "a * b"},
+ {ArtifactBinOp::div, "a / b"}, {ArtifactBinOp::plusAssign, "a += b"},
+ {ArtifactBinOp::minusAssign, "a -= b"}, {ArtifactBinOp::multAssign, "a *= b"},
+ {ArtifactBinOp::divAssign, "a /= b"}};
+
+ for (auto test : test_cases)
+ {
+ auto op_type = test.first;
+ auto ref_output = test.second;
+ checkBinaryExpression(op_type, op1, op2, ref_output);
+ }
+}
+
+TEST(acl_backend_dom_to_text, ArtifactIndex)
+{
+ stringstream code_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+ const char *arr_name = "a";
+ const char *idx_name = "b";
+ shared_ptr<ArtifactId> arr = AF::id(arr_name);
+ shared_ptr<ArtifactId> idx = AF::id(idx_name);
+ shared_ptr<ArtifactIndex> indexing = AF::ind(arr, idx);
+ indexing->accept(&code_gen);
+ ASSERT_EQ(code_out.str(), "a[b]");
+}
+
+TEST(acl_backend_dom_to_text, ArtifactRet)
+{
+ stringstream code_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+ const char *result_name = "a";
+ shared_ptr<ArtifactId> result = AF::id(result_name);
+ ArtifactRet ret(result);
+ ret.accept(&code_gen);
+ ASSERT_EQ(code_out.str(), "return a");
+}
+
+TEST(acl_backend_dom_to_text, ArtifactBreak)
+{
+ stringstream code_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+ ArtifactBreak brk;
+ brk.accept(&code_gen);
+ ASSERT_EQ(code_out.str(), "break");
+}
+
+TEST(acl_backend_dom_to_text, ArtifactCont)
+{
+ stringstream code_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+ ArtifactCont cont;
+ cont.accept(&code_gen);
+ ASSERT_EQ(code_out.str(), "continue");
+}
+
+TEST(acl_backend_dom_to_text, ArtifactVariable)
+{
+ stringstream code_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+ const char *var_type = "int";
+ const char *var_name = "data";
+ shared_ptr<ArtifactLiteral> dim1 = AF::lit("2");
+ shared_ptr<ArtifactLiteral> dim2 = AF::lit("3");
+ list<shared_ptr<ArtifactExpr>> dims{dim1, dim2};
+ list<shared_ptr<ArtifactExpr>> initializers{AF::lit("123")};
+ shared_ptr<ArtifactVariable> var_decl = AF::var(var_type, var_name, dims, initializers);
+ var_decl->accept(&code_gen);
+ // TODO generate initializers in braces
+ ASSERT_EQ(code_out.str(), "int data[2][3](123)");
+}
+
+TEST(acl_backend_dom_to_text, ArtifactBlock)
+{
+ stringstream code_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+ const char *var_name = "var";
+ const char *lit_val = "123";
+
+ shared_ptr<ArtifactExpr> id = AF::id(var_name);
+ shared_ptr<ArtifactExpr> lit = AF::lit(lit_val);
+ const list<shared_ptr<ArtifactExpr>> args{id, lit};
+
+ shared_ptr<ArtifactFunctionCall> call = AF::call("foo", args);
+
+ ArtifactBlock block;
+
+ block.addStatement(call);
+
+ block.accept(&code_gen);
+ ASSERT_EQ(code_out.str(), " {\n foo(var, 123);\n}\n");
+}
+
+TEST(acl_backend_dom_to_text, ArtifactForLoop)
+{
+ stringstream code_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+ const char *var_name = "i";
+ const char *var_type = "int";
+
+ shared_ptr<ArtifactVariable> iter = AF::var(var_type, var_name, {}, {AF::lit("0")});
+ shared_ptr<ArtifactExpr> step =
+ AF::bin(ArtifactBinOp::plusAssign, AF::id(var_name), AF::lit("1"));
+ shared_ptr<ArtifactExpr> cond =
+ AF::bin(ArtifactBinOp::lessOrEq, AF::id(var_name), AF::lit("123"));
+
+ shared_ptr<ArtifactBinaryExpr> expr =
+ AF::bin(ArtifactBinOp::plusAssign, AF::id("hello"), AF::id("world"));
+
+ ArtifactForLoop loop(iter, cond, step);
+
+ loop.getBlock()->addStatement(expr);
+
+ loop.accept(&code_gen);
+ ASSERT_EQ(code_out.str(), "for(int i(0); i <= 123; i += 1) {\n hello += world;\n}\n");
+}
+
+TEST(acl_backend_dom_to_text, ArtifactIf)
+{
+ stringstream code_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+ const char *var_name = "i";
+
+ shared_ptr<ArtifactExpr> cond =
+ AF::bin(ArtifactBinOp::lessOrEq, AF::id(var_name), AF::lit("123"));
+
+ shared_ptr<ArtifactBinaryExpr> expr =
+ AF::bin(ArtifactBinOp::plusAssign, AF::id("hello"), AF::id("world"));
+
+ ArtifactIf if_stmt(cond);
+
+ if_stmt.getBlock()->addStatement(expr);
+
+ if_stmt.accept(&code_gen);
+ ASSERT_EQ(code_out.str(), "if(i <= 123) {\n hello += world;\n}\n");
+}
+
+TEST(acl_backend_dom_to_text, ArtifactFunction)
+{
+ stringstream code_out;
+ stringstream decl_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+ ArtifactGeneratorCppDecl decl_gen(decl_out);
+ const char *ret_type = "int";
+ const char *func_name = "foo";
+ shared_ptr<ArtifactVariable> arg1 = AF::var("int", "a");
+ shared_ptr<ArtifactVariable> arg2 = AF::var("bool", "b");
+ list<shared_ptr<ArtifactVariable>> args{arg1, arg2};
+
+ // test public class variable
+ ArtifactFunction func_decl(ret_type, func_name, args);
+
+ func_decl.accept(&code_gen);
+ ASSERT_EQ(code_out.str(), "");
+ func_decl.accept(&decl_gen);
+
+ ASSERT_EQ(decl_out.str(), "int foo(int a, bool b);");
+}
+
+TEST(acl_backend_dom_to_text, ArtifactClassVariable)
+{
+ stringstream code_out;
+ stringstream decl_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+ ArtifactGeneratorCppDecl decl_gen(decl_out);
+
+ const char *var_type = "int";
+ const char *var_name = "data";
+
+ ArtifactClass cls("Class");
+
+ shared_ptr<ArtifactLiteral> dim1 = AF::lit("2");
+ shared_ptr<ArtifactLiteral> dim2 = AF::lit("3");
+ list<shared_ptr<ArtifactExpr>> dims{dim1, dim2};
+ list<shared_ptr<ArtifactExpr>> list_of_initializer{AF::lit("123")};
+ ArtifactClassVariable var_decl(&cls, var_type, var_name, dims, list_of_initializer);
+
+ var_decl.accept(&code_gen);
+ ASSERT_EQ(code_out.str(), "data(123)");
+ var_decl.accept(&decl_gen);
+ // fixme dimensions are not taken into account, remove ';'
+ ASSERT_EQ(decl_out.str(), "int data;\n");
+}
+
+TEST(acl_backend_dom_to_text, ArtifactClassFunction)
+{
+ stringstream code_out;
+ stringstream decl_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+ ArtifactGeneratorCppDecl decl_gen(decl_out);
+ const char *ret_type = "int";
+ const char *func_name = "foo";
+ shared_ptr<ArtifactVariable> arg1 = AF::var("int", "a");
+ shared_ptr<ArtifactVariable> arg2 = AF::var("bool", "b");
+ list<shared_ptr<ArtifactVariable>> args{arg1, arg2};
+
+ ArtifactClass cls("Class");
+
+ // test public class variable
+ shared_ptr<ArtifactClassFunction> cls_func_decl = cls.func(true, ret_type, func_name, args);
+
+ cls_func_decl->accept(&code_gen);
+ // FIXME do not print new line in this visitor
+ ASSERT_EQ(code_out.str(), "int Class::foo(int a, bool b) {\n}\n\n");
+ cls_func_decl->accept(&decl_gen);
+
+ ASSERT_EQ(decl_out.str(), "int foo(int a, bool b);\n");
+
+ decl_out.str("");
+ code_out.str("");
+
+ // test private class variable
+ cls_func_decl = cls.func(false, ret_type, func_name, args);
+
+ cls_func_decl->accept(&code_gen);
+ // FIXME do not print new line in this visitor
+ ASSERT_EQ(code_out.str(), "int Class::foo(int a, bool b) {\n}\n\n");
+ cls_func_decl->accept(&decl_gen);
+
+ ASSERT_EQ(decl_out.str(), "int foo(int a, bool b);\n");
+}
+
+static shared_ptr<ArtifactClassVariable> createClsVariable(ArtifactClass &cls, const char *var_name,
+ bool is_public)
+{
+ const char *var_type = "int";
+ shared_ptr<ArtifactLiteral> dim1 = AF::lit("2");
+ shared_ptr<ArtifactLiteral> dim2 = AF::lit("3");
+ list<shared_ptr<ArtifactExpr>> dims{dim1, dim2};
+ list<shared_ptr<ArtifactExpr>> initializers{AF::lit("123")};
+ shared_ptr<ArtifactClassVariable> var_decl =
+ cls.var(is_public, var_type, var_name, dims, initializers);
+ return var_decl;
+}
+
+static shared_ptr<ArtifactClassFunction> createClsFunction(ArtifactClass &cls,
+ const char *func_name, bool is_public)
+{
+ const char *var_type = "int";
+ const char *func_type = "void";
+ shared_ptr<ArtifactVariable> var1 = AF::var(var_type, "a");
+ shared_ptr<ArtifactVariable> var2 = AF::var(var_type, "b");
+ list<shared_ptr<ArtifactVariable>> args{var1, var2};
+ shared_ptr<ArtifactClassFunction> func_decl = cls.func(is_public, func_type, func_name, args);
+ return func_decl;
+}
+
+TEST(acl_backend_dom_to_text, ArtifactClass)
+{
+ stringstream code_out;
+ stringstream decl_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+ ArtifactGeneratorCppDecl decl_gen(decl_out);
+
+ ArtifactClass cls("Class");
+
+ createClsFunction(cls, "public_foo", true);
+ createClsFunction(cls, "private_bar", false);
+
+ createClsVariable(cls, "visible", true);
+ createClsVariable(cls, "invisible", false);
+
+ // Test cpp file generation
+ cls.accept(&code_gen);
+ ASSERT_EQ(code_out.str(), "Class::Class() : invisible(123) {\n}\n\n"
+ "void Class::public_foo(int a, int b) {\n}\n\n"
+ "void Class::private_bar(int a, int b) {\n}\n\n");
+
+ // Test header file generation
+ cls.accept(&decl_gen);
+
+ ASSERT_EQ(decl_out.str(), "class Class {\npublic:\n Class();\n"
+ " void public_foo(int a, int b);"
+ "\n\nprivate:\n void private_bar(int a, int b);\n\n"
+ " int invisible;\n};\n");
+}
+
+TEST(acl_backend_dom_to_text, ArtifactModule)
+{
+ stringstream code_out;
+ stringstream decl_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+ ArtifactGeneratorCppDecl decl_gen(decl_out);
+
+ ArtifactModule m("module");
+
+ m.addHeaderInclude("foo.h");
+ m.addHeaderSysInclude("vector");
+ m.addSourceInclude("bar.h");
+ m.addSourceSysInclude("list");
+
+ shared_ptr<ArtifactClass> cls = m.createClass("Class");
+
+ // test cpp file generation
+ // We use snippet code to encode some common functions
+ // This snippet is wrapped in prefix and postfix code
+ const char *code_prefix = "#include \"module.h\"\n\n#include <list>\n\n#include \"bar.h\"\n\n";
+ const char *code_suffix = "\nClass::Class() {\n}\n\n";
+
+ string ref_data = string(code_prefix) +
+ string(AclArtifactUtilities, sizeof(AclArtifactUtilities)) + code_suffix;
+ m.accept(&code_gen);
+ ASSERT_EQ(code_out.str(), ref_data);
+
+ // test header code generation
+ const char *ref_decl_data = "#include <vector>\n\n#include \"foo.h\"\n\nclass Class {\npublic:\n "
+ " Class();\n\nprivate:\n};\n";
+ m.accept(&decl_gen);
+
+ ASSERT_EQ(decl_out.str(), ref_decl_data);
+}
diff --git a/compiler/nnc/unittests/acl_backend/MIRToDOM.cpp b/compiler/nnc/unittests/acl_backend/MIRToDOM.cpp
new file mode 100644
index 000000000..a9b36a145
--- /dev/null
+++ b/compiler/nnc/unittests/acl_backend/MIRToDOM.cpp
@@ -0,0 +1,539 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This test set checks correctness of MIR to DOM transformation
+ */
+
+// system
+#include <functional>
+#include <vector>
+
+// ACL backend
+#include "ArtifactModel.h"
+#include "AclCppOpGenerator.h"
+
+// MIR
+#include "mir/Graph.h"
+#include "mir/ops/CappedReluOp.h"
+#include "mir/ops/ConcatOp.h"
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/Conv2DOp.h"
+#include "mir/ops/Deconv2DOp.h"
+#include "mir/ops/DepthwiseConv2DOp.h"
+#include "mir/ops/EluOp.h"
+#include "mir/ops/FullyConnectedOp.h"
+#include "mir/ops/InputOp.h"
+#include "mir/ops/MaxPool2DOp.h"
+#include "mir/ops/OutputOp.h"
+#include "mir/ops/PadOp.h"
+#include "mir/ops/ReduceMeanOp.h"
+#include "mir/ops/ReluOp.h"
+#include "mir/ops/ReshapeOp.h"
+#include "mir/ops/SigmoidOp.h"
+#include "mir/ops/SoftmaxOp.h"
+#include "mir/ops/TanhOp.h"
+#include "mir/ops/TransposeOp.h"
+
+#include "gtest/gtest.h"
+
+using namespace std;
+using namespace nnc;
+using namespace mir;
+
+namespace
+{
+
+using OpConstructor = function<Operation *(Graph &g, vector<Operation::Output *> &inputs)>;
+
+const char *artifactName = "nnmodel";
+
+/**
+ * @brief Creates graph with one operation generated by opGen function and returns this operation
+ * node
+ * @param g reference to graph which should be filled with operations
+ * @param op_constr functor which creates main operations of graph
+ * @param input_shapes vector of network input shapes
+ * */
+void fillGraph(Graph &g, const OpConstructor &op_constr, const vector<Shape> &input_shapes)
+{
+ // Create graph inputs.
+ vector<mir::Operation::Output *> inputs;
+ for (std::size_t i = 0; i < input_shapes.size(); ++i)
+ {
+ mir::TensorType input_type{mir::DataType::FLOAT32, input_shapes[i]};
+ auto input = g.create<ops::InputOp>(input_type)->getOutput(0);
+ input->setName("x" + to_string(i));
+ inputs.push_back(input);
+ }
+
+ // Create the operation.
+ Operation *op = op_constr(g, inputs);
+
+ // Create graph outputs.
+ for (std::size_t i = 0; i < op->getNumOutputs(); ++i)
+ {
+ op->getOutput(i)->setName("y" + to_string(i));
+ g.create<ops::OutputOp>(op->getOutput(i));
+ }
+}
+
+/**
+ * @brief Checks that list of includes contains all and only desired headers
+ * @param artifact_headers List of headers stored in ArtifaceModule
+ * @param expected_headers Reference set of desired headers
+ * @param message Message to print in case of check failure
+ */
+void checkHeadersSetsEqual(const list<string> &artifact_headers,
+ const set<string> &expected_headers, const char *message)
+{
+ set<string> artifact_set(artifact_headers.begin(), artifact_headers.end());
+ ASSERT_EQ(artifact_set, expected_headers) << message;
+}
+
+/**
+ * @brief Check that artifact DOM has all needed includes
+ * @param m Root module of DOM
+ */
+void checkDomIncludes(const ArtifactModule &m)
+{
+ // check system includes, like '#include <vector>'
+ checkHeadersSetsEqual(m.headerSysIncludes(), {"fstream"}, "header includes diverged");
+
+ checkHeadersSetsEqual(m.sourceIncludes(), {}, "source includes diverged");
+
+ // check ordinary includes, like '#include "artifact_data.h"'
+ checkHeadersSetsEqual(
+ m.headerIncludes(),
+ {"arm_compute/core/Types.h", "arm_compute/runtime/BlobLifetimeManager.h",
+ "arm_compute/runtime/CL/CLBufferAllocator.h", "arm_compute/runtime/CL/CLFunctions.h",
+ "arm_compute/runtime/CL/CLScheduler.h", "arm_compute/runtime/MemoryManagerOnDemand.h",
+ "arm_compute/runtime/PoolManager.h"},
+ "system header includes diverged");
+
+ checkHeadersSetsEqual(m.sourceSysIncludes(), {}, "system source includes diverged");
+}
+
+/**
+ * @brief Check that artifact DOM contains appropriate getters
+ * @param c Main artifact class
+ * @param tensors List of values accessible via getters
+ */
+void checkDomArtifactGetters(const ArtifactClass &c, const vector<string> &tensors)
+{
+ // TODO
+}
+
+/**
+ * @brief Check that artifact class constructor initializes all layers
+ * @param c Main artifact class
+ * @param tensors List of NN layers
+ */
+void checkDomArtifactConstructor(const ArtifactClass &c, const vector<string> &tensors)
+{
+ // TODO
+}
+
+/**
+ * @brief Check that inference executes layers in appropriate order
+ * @param f Inference function description
+ * @param layers List of layers in inference
+ */
+void checkDomInference(const ArtifactFunction &f, const vector<string> &layers)
+{
+ // TODO
+}
+
+/**
+ * @brief Check that artifact DOM contains appropriate class
+ * @param m Root module of DOM
+ * @param layers Names of NN layers in inference sequence
+ * @param tensors Names of tensors in artifact
+ */
+void checkArtifactClass(const ArtifactClass &c, const vector<string> &layers,
+ const vector<string> &tensors)
+{
+ checkDomArtifactGetters(c, tensors);
+ checkDomArtifactConstructor(c, tensors);
+ const ArtifactFunction *inf_func = nullptr;
+ for (const shared_ptr<ArtifactClassFunction> &method : c.publicFunctions())
+ {
+ if (method->name() == "Inference")
+ {
+ inf_func = method.get();
+ break;
+ }
+ }
+ ASSERT_NE(inf_func, nullptr);
+ checkDomInference(*inf_func, layers);
+}
+
+/**
+ * @brief Root of check functions
+ * @param m Main artifact module
+ * @param layers Names of NN layers in inference sequence
+ * @param tensors Names of tensors in artifact
+ */
+void checkDomStructure(const ArtifactModule &m, const vector<string> &layers,
+ const vector<string> &tensors)
+{
+ ASSERT_EQ(m.name(), artifactName);
+ checkDomIncludes(m);
+ ASSERT_EQ(m.entities().size(), 1);
+ ArtifactClass *cls = dynamic_cast<ArtifactClass *>(m.entities().front().get());
+ ASSERT_NE(cls, nullptr);
+ checkArtifactClass(*cls, layers, tensors);
+}
+
+/**
+ * @brief Creates TensorVariant with specified shape
+ * @param shape Desired shape of TV
+ * @return TensorVariant with specified shape
+ */
+TensorVariant createTensorVariant(const Shape &shape)
+{
+ auto num_elems = shape.numElements();
+
+ unique_ptr<float[]> data(new float[num_elems]);
+ float *data_ptr = data.get();
+ for (int32_t i = 0; i < num_elems; ++i)
+ data_ptr[i] = i;
+ return TensorVariant(DataType::FLOAT32, shape, data_ptr);
+}
+} // namespace
+
+// Actual tests
+
+TEST(acl_backend_mir_to_dom, constant)
+{
+ Shape shape{3, 4};
+ TensorVariant constant_data = createTensorVariant(shape);
+
+ Graph g;
+ OpConstructor op_generator = [&constant_data](Graph &g,
+ const vector<Operation::Output *> &inputs) {
+ return g.create<mir::ops::ConstantOp>(constant_data);
+ };
+
+ fillGraph(g, op_generator, {});
+
+ stringstream params_out;
+ AclCppOpGenerator dom_gen(artifactName, params_out);
+
+ const ArtifactModule &m = dom_gen.generate(&g);
+
+ checkDomStructure(m, {}, {});
+}
+
+TEST(acl_backend_mir_to_dom, concat)
+{
+ Graph g;
+ OpConstructor op_generator = [](Graph &g, const vector<Operation::Output *> &inputs) {
+ return g.create<mir::ops::ConcatOp>(inputs, 3);
+ };
+ vector<Shape> input_shapes{{2, 3, 5, 1}, {2, 3, 5, 3}};
+
+ fillGraph(g, op_generator, input_shapes);
+
+ stringstream params_out;
+ AclCppOpGenerator dom_gen(artifactName, params_out);
+
+ const ArtifactModule &m = dom_gen.generate(&g);
+
+ checkDomStructure(m, {}, {});
+
+ stringstream code_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+}
+
+TEST(acl_backend_mir_to_dom, DISABLED_add)
+{
+ // TODO
+}
+
+TEST(acl_backend_mir_to_dom, DISABLED_mul)
+{
+ // TODO
+}
+
+TEST(acl_backend_mir_to_dom, DISABLED_max)
+{
+ // TODO
+}
+
+TEST(acl_backend_mir_to_dom, DISABLED_conv_transposed2d)
+{
+ // TODO
+}
+
+TEST(acl_backend_mir_to_dom, conv2d)
+{
+ const int32_t channels = 3;
+ mir::Shape kernel_shape{1, 3, 3, channels}; // output Channels, Height, Width, input Channels
+ mir::TensorVariant kernel_tensor = createTensorVariant(kernel_shape);
+
+ Graph g;
+ OpConstructor op_generator =
+ [kernel_tensor](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ auto kernel = g.create<mir::ops::ConstantOp>(kernel_tensor)->getOutput(0);
+ return g.create<mir::ops::Conv2DOp>(inputs[0], kernel, mir::Conv2DOpAttributes());
+ };
+
+ vector<Shape> input_shapes{{1, 10, 10, channels}};
+
+ fillGraph(g, op_generator, input_shapes);
+
+ stringstream params_out;
+ AclCppOpGenerator dom_gen(artifactName, params_out);
+
+ const ArtifactModule &m = dom_gen.generate(&g);
+
+ checkDomStructure(m, {}, {});
+}
+
+TEST(acl_backend_mir_to_dom, depthwise_conv)
+{
+ const int32_t channels = 3;
+ mir::Shape kernel_shape{3, 3, channels, 1}; // Height, Width, Channels, Channel multiplier
+ mir::TensorVariant kernel_tensor = createTensorVariant(kernel_shape);
+
+ Graph g;
+ OpConstructor op_generator =
+ [kernel_tensor](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ Conv2DOpAttributes attributes;
+ auto kernel = g.create<mir::ops::ConstantOp>(kernel_tensor)->getOutput(0);
+ return g.create<mir::ops::DepthwiseConv2DOp>(inputs[0], kernel, attributes);
+ };
+
+ vector<Shape> input_shapes{{1, 10, 10, channels}};
+
+ fillGraph(g, op_generator, input_shapes);
+
+ stringstream params_out;
+ AclCppOpGenerator dom_gen(artifactName, params_out);
+
+ const ArtifactModule &m = dom_gen.generate(&g);
+
+ checkDomStructure(m, {}, {});
+
+ stringstream code_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+}
+
+TEST(acl_backend_mir_to_dom, fully_connected)
+{
+ const int32_t in_size = 13;
+ const int32_t out_size = 7;
+ Shape input_shape_data{1, in_size};
+ Shape weights_shape{in_size, out_size};
+ TensorVariant weights_tensor = createTensorVariant(weights_shape);
+
+ Graph g;
+ OpConstructor opGenerator = [weights_tensor](Graph &g,
+ const vector<Operation::Output *> &inputs) {
+ auto weights = g.create<mir::ops::ConstantOp>(weights_tensor)->getOutput(0);
+ return g.create<mir::ops::FullyConnectedOp>(inputs[0], weights);
+ };
+
+ fillGraph(g, opGenerator, {input_shape_data});
+
+ stringstream params_out;
+ AclCppOpGenerator dom_gen(artifactName, params_out);
+
+ const ArtifactModule &m = dom_gen.generate(&g);
+
+ checkDomStructure(m, {}, {});
+
+ stringstream code_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+}
+
+TEST(acl_backend_mir_to_dom, maxpool)
+{
+ mir::MaxPool2DOpAttributes attributes;
+ attributes.window = {3, 3};
+
+ Graph g;
+ OpConstructor op_generator = [&attributes](mir::Graph &g,
+ const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::MaxPool2DOp>(inputs[0], attributes);
+ };
+
+ vector<Shape> input_shapes{{1, 10, 10, 3}};
+
+ fillGraph(g, op_generator, input_shapes);
+
+ stringstream params_out;
+ AclCppOpGenerator dom_gen(artifactName, params_out);
+
+ const ArtifactModule &m = dom_gen.generate(&g);
+
+ checkDomStructure(m, {}, {});
+
+ stringstream code_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+}
+
+TEST(acl_backend_mir_to_dom, DISABLED_avgpool)
+{
+ // TODO
+}
+
+/**
+ * @brief Function to test simple activation operations
+ * @param op_generator functor that generates target operator
+ */
+static void testActivationOp(const OpConstructor &op_generator)
+{
+ Graph g;
+ vector<Shape> input_shapes{{1, 10, 10, 3}};
+
+ fillGraph(g, op_generator, input_shapes);
+
+ stringstream params_out;
+ AclCppOpGenerator dom_gen(artifactName, params_out);
+
+ const ArtifactModule &m = dom_gen.generate(&g);
+
+ checkDomStructure(m, {}, {});
+
+ stringstream code_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+}
+
+TEST(acl_backend_mir_to_dom, relu)
+{
+ OpConstructor op_generator = [](Graph &g, const std::vector<Operation::Output *> &inputs) {
+ return g.create<mir::ops::ReluOp>(inputs[0]);
+ };
+
+ testActivationOp(op_generator);
+}
+
+TEST(acl_backend_mir_to_dom, capped_relu)
+{
+ float cap = 6;
+ OpConstructor op_generator = [cap](Graph &g, const std::vector<Operation::Output *> &inputs) {
+ return g.create<mir::ops::CappedReluOp>(inputs[0], cap);
+ };
+
+ testActivationOp(op_generator);
+}
+
+TEST(acl_backend_mir_to_dom, sigmoid)
+{
+ OpConstructor op_generator = [](Graph &g, const std::vector<Operation::Output *> &inputs) {
+ return g.create<mir::ops::SigmoidOp>(inputs[0]);
+ };
+
+ testActivationOp(op_generator);
+}
+
+TEST(acl_backend_mir_to_dom, DISABLED_elu)
+{
+ // TODO
+}
+
+TEST(acl_backend_mir_to_dom, tanh)
+{
+ OpConstructor op_generator = [](Graph &g, const std::vector<Operation::Output *> &inputs) {
+ return g.create<mir::ops::TanhOp>(inputs[0]);
+ };
+
+ testActivationOp(op_generator);
+}
+
+TEST(acl_backend_mir_to_dom, DISABLED_reduce_mean)
+{
+ // TODO
+}
+
+TEST(acl_backend_mir_to_dom, softmax)
+{
+ Graph g;
+ OpConstructor op_generator = [](Graph &g, const vector<Operation::Output *> &inputs) {
+ return g.create<mir::ops::SoftmaxOp>(inputs[0], 3);
+ };
+ vector<Shape> input_shapes{{1, 1, 1, 3}};
+
+ fillGraph(g, op_generator, input_shapes);
+
+ stringstream params_out;
+ AclCppOpGenerator dom_gen(artifactName, params_out);
+
+ const ArtifactModule &m = dom_gen.generate(&g);
+
+ checkDomStructure(m, {}, {});
+
+ stringstream code_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+}
+
+TEST(acl_backend_mir_to_dom, reshape)
+{
+ Graph g;
+
+ const int32_t h = 10;
+ const int32_t w = 10;
+ const int32_t c = 3;
+
+ Shape input_shape{1, h, w, c};
+ Shape output_shape{1, h * w * c};
+
+ OpConstructor op_generator = [output_shape](Graph &g, const vector<Operation::Output *> &inputs) {
+ return g.create<mir::ops::ReshapeOp>(inputs[0], output_shape);
+ };
+
+ fillGraph(g, op_generator, {input_shape});
+
+ stringstream params_out;
+ AclCppOpGenerator dom_gen(artifactName, params_out);
+
+ const ArtifactModule &m = dom_gen.generate(&g);
+
+ checkDomStructure(m, {}, {});
+
+ stringstream code_out;
+ ArtifactGeneratorCppCode code_gen(code_out);
+}
+
+TEST(acl_backend_mir_to_dom, DISABLED_pad)
+{
+ // TODO
+}
+
+TEST(acl_backend_mir_to_dom, transpose)
+{
+ const int32_t channels = 2;
+ TensorVariant w = createTensorVariant({channels});
+
+ vector<size_t> perm{0, 3, 1, 2};
+
+ Graph g;
+ OpConstructor op_generator = [&perm](Graph &g, const vector<Operation::Output *> &inputs) {
+ return g.create<mir::ops::TransposeOp>(inputs[0], perm);
+ };
+ vector<Shape> input_shapes{{1, 10, 10, channels}};
+
+ fillGraph(g, op_generator, input_shapes);
+
+ stringstream params_out;
+ AclCppOpGenerator dom_gen(artifactName, params_out);
+
+ const ArtifactModule &m = dom_gen.generate(&g);
+
+ checkDomStructure(m, {}, {});
+}
diff --git a/compiler/nnc/unittests/caffe_frontend/test_data/unsupported.caffemodel b/compiler/nnc/unittests/caffe_frontend/test_data/unsupported.caffemodel
new file mode 100644
index 000000000..77278fe50
--- /dev/null
+++ b/compiler/nnc/unittests/caffe_frontend/test_data/unsupported.caffemodel
Binary files differ
diff --git a/compiler/nnc/unittests/optimizations/CMakeLists.txt b/compiler/nnc/unittests/optimizations/CMakeLists.txt
new file mode 100644
index 000000000..a3ec3e6af
--- /dev/null
+++ b/compiler/nnc/unittests/optimizations/CMakeLists.txt
@@ -0,0 +1,7 @@
+set(TESTS_OPTIMIZATIONS_SRC
+ CombineTransposes.cpp
+ SinkTest.cpp
+ DeadCodeElimination.cpp
+ FuseArithmeticOps.cpp)
+nnc_add_unit_test(tests_for_optimizations ${TESTS} ${TESTS_OPTIMIZATIONS_SRC})
+optional_target_link_libraries(tests_for_optimizations nnc_optimizations mir)
diff --git a/compiler/nnc/unittests/optimizations/CombineTransposes.cpp b/compiler/nnc/unittests/optimizations/CombineTransposes.cpp
new file mode 100644
index 000000000..8d90bd20a
--- /dev/null
+++ b/compiler/nnc/unittests/optimizations/CombineTransposes.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "passes/optimizations/CombineTransposes.h"
+#include "mir/ops/TransposeOp.h"
+#include "mir/ops/ReluOp.h"
+#include "mir/ops/OutputOp.h"
+#include "Util.h"
+#include <gtest/gtest.h>
+
+using namespace std;
+using namespace nnc;
+using namespace mir;
+
+namespace
+{
+
+TEST(OptPass, eliminateTransposesLinear)
+{
+ mir::Graph g;
+ /* Create graph:
+ * [input]
+ * ||
+ * [Transpose 1]
+ * ||
+ * [Transpose 2]
+ * ||
+ * [relu]
+ */
+ mir::TensorType input_type{mir::DataType::FLOAT32, Shape{1, 2, 3}};
+ Operation *input = g.create<ops::InputOp>(input_type);
+ Operation *tr1 = g.create<ops::TransposeOp>(input->getOutput(0), vector<size_t>{1, 0, 2});
+ Operation *tr15 = g.create<ops::TransposeOp>(tr1->getOutput(0), vector<size_t>{1, 0, 2});
+ Operation *tr2 = g.create<ops::TransposeOp>(tr15->getOutput(0), vector<size_t>{1, 0, 2});
+ Operation *relu = g.create<ops::ReluOp>(tr2->getOutput(0));
+
+ // Check that layout is desired
+ std::stringstream ss;
+ DumpVisitor d(ss);
+ CombineTransposes pass;
+ pass.run(&g);
+ g.accept(&d);
+ // Assert only 1 transpose remains
+ ASSERT_EQ("i_0.t_1.r_4.", ss.str());
+}
+
+TEST(OptPass, combineTransposesLinear)
+{
+ mir::Graph g;
+ /* Create graph:
+ * [input]
+ * ||
+ * [Transpose 1]
+ * ||
+ * [Transpose 2]
+ * ||
+ * [relu]
+ */
+ mir::TensorType input_type{mir::DataType::FLOAT32, Shape{1, 2, 3}};
+ Operation *input = g.create<ops::InputOp>(input_type);
+ Operation *tr1 = g.create<ops::TransposeOp>(input->getOutput(0), vector<size_t>{1, 0, 2});
+ Operation *tr2 = g.create<ops::TransposeOp>(tr1->getOutput(0), vector<size_t>{0, 2, 1});
+ Operation *relu = g.create<ops::ReluOp>(tr2->getOutput(0));
+
+ std::stringstream ss;
+ DumpVisitor d(ss);
+ CombineTransposes pass;
+ pass.run(&g);
+ g.accept(&d);
+
+ // Assert transposes are combined
+ ASSERT_EQ("i_0.t_4.r_3.", ss.str());
+ Operation::Use use = g.getInputs()[0]->getOutput(0)->getUses()[0];
+ auto ax_ord_actual = dynamic_cast<ops::TransposeOp *>(use.getNode())->getAxisOrder();
+ auto ax_ord_true = vector<size_t>{1, 2, 0};
+ ASSERT_TRUE(ax_ord_actual == ax_ord_true);
+}
+
+TEST(OptPass, combineTransposesBush)
+{
+ mir::Graph g;
+ /* Create graph:
+ * [input]
+ * ||
+ * [Transpose 1]
+ * // \\
+ *[Transpose 2] [Transpose 3]
+ * \\ //
+ * [Add]
+ */
+ mir::TensorType input_type{mir::DataType::FLOAT32, Shape{1, 2, 3, 2}};
+ Operation *input = g.create<ops::InputOp>(input_type);
+ Operation *tr1 = g.create<ops::TransposeOp>(input->getOutput(0), vector<size_t>{1, 0, 2, 3});
+ Operation *tr2 = g.create<ops::TransposeOp>(tr1->getOutput(0), vector<size_t>{1, 0, 2, 3});
+ Operation *tr3 = g.create<ops::TransposeOp>(tr1->getOutput(0), vector<size_t>{1, 0, 2, 3});
+ Operation *elw = g.create<ops::AddOp>(tr2->getOutput(0), tr3->getOutput(0));
+ std::stringstream ss;
+ DumpVisitor d(ss);
+ CombineTransposes pass;
+ pass.run(&g);
+ g.accept(&d);
+ ASSERT_EQ("i_0.b_4.", ss.str());
+ ASSERT_EQ(elw->getInput(0)->getNode()->getType(), mir::Operation::Type::input);
+ ASSERT_EQ(elw->getInput(1)->getNode()->getType(), mir::Operation::Type::input);
+}
+
+TEST(OptPass, combineTransposesOpOrder)
+{
+ mir::Graph g;
+ /* Create graph:
+ * [input] [input2]
+ * || ||
+ * [Transpose 0] [Transpose1]
+ * || ||
+ * [Transpose 2] [Transpose 3]
+ * \\ //
+ * [Add]
+ */
+ mir::TensorType input_type{mir::DataType::FLOAT32, {1, 2, 3}};
+ Operation *in1 = g.create<ops::InputOp>(input_type);
+ Operation *in2 = g.create<ops::InputOp>(input_type);
+ Operation *tr0 = g.create<ops::TransposeOp>(in1->getOutput(0), vector<size_t>{1, 0, 2});
+ Operation *tr1 = g.create<ops::TransposeOp>(in2->getOutput(0), vector<size_t>{2, 1, 0});
+ Operation *tr2 = g.create<ops::TransposeOp>(tr0->getOutput(0), vector<size_t>{1, 0, 2});
+ Operation *tr3 = g.create<ops::TransposeOp>(tr1->getOutput(0), vector<size_t>{2, 1, 0});
+ Operation *elw = g.create<ops::AddOp>(tr2->getOutput(0), tr3->getOutput(0));
+ g.create<ops::OutputOp>(elw->getOutput(0));
+ int n1 = in1->getId();
+ int n2 = in2->getId();
+ CombineTransposes pass;
+ pass.run(&g);
+ ASSERT_EQ(g.getOutputs()[0]->getInput(0)->getNode()->getType(), mir::Operation::Type::add);
+ // Order is preserved
+ ASSERT_EQ(n1, elw->getInput(0)->getNode()->getId());
+ ASSERT_EQ(n2, elw->getInput(1)->getNode()->getId());
+}
+} // unnamed namespace
diff --git a/compiler/nnc/unittests/optimizations/DeadCodeElimination.cpp b/compiler/nnc/unittests/optimizations/DeadCodeElimination.cpp
new file mode 100644
index 000000000..057dd710b
--- /dev/null
+++ b/compiler/nnc/unittests/optimizations/DeadCodeElimination.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "passes/optimizations/DeadCodeElimination.h"
+#include "mir/ops/AddOp.h"
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/InputOp.h"
+#include "mir/ops/OutputOp.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using namespace nnc;
+using namespace mir;
+
+TEST(DeadCodeEliminationTest, RemovesSingleNodes)
+{
+ Graph graph;
+ graph.create<ops::ConstantOp>(TensorVariant{DataType::FLOAT32, {}});
+ graph.create<ops::ConstantOp>(TensorVariant{DataType::FLOAT32, {}});
+
+ DeadCodeElimination pass;
+ pass.run(&graph);
+ ASSERT_EQ(graph.getNodes().size(), 0);
+}
+
+TEST(DeadCodeEliminationTest, RemovesChainedNodes)
+{
+ Graph graph;
+ auto c1 = graph.create<ops::ConstantOp>(TensorVariant{DataType::FLOAT32, {}})->getOutput(0);
+ auto c2 = graph.create<ops::ConstantOp>(TensorVariant{DataType::FLOAT32, {}})->getOutput(0);
+ auto sum = graph.create<ops::AddOp>(c1, c2)->getOutput(0);
+ graph.create<ops::AddOp>(sum, sum);
+
+ DeadCodeElimination pass;
+ pass.run(&graph);
+ ASSERT_EQ(graph.getNodes().size(), 0);
+}
+
+TEST(DeadCodeEliminationTest, PreservesInputNode)
+{
+ Graph graph;
+ graph.create<ops::InputOp>(TensorType{DataType::FLOAT32, {}});
+
+ DeadCodeElimination pass;
+ pass.run(&graph);
+ ASSERT_EQ(graph.getNodes().size(), 1);
+}
+
+TEST(DeadCodeEliminationTest, PreservesOutputNode)
+{
+ Graph graph;
+ auto c = graph.create<ops::ConstantOp>(TensorVariant{DataType::FLOAT32, {}})->getOutput(0);
+ graph.create<ops::OutputOp>(c);
+
+ DeadCodeElimination pass;
+ pass.run(&graph);
+ ASSERT_EQ(graph.getNodes().size(), 2);
+}
+
+TEST(DeadCodeEliminationTest, PreservesUsedNodes)
+{
+ Graph graph;
+ auto c1 = graph.create<ops::ConstantOp>(TensorVariant{DataType::FLOAT32, {}})->getOutput(0);
+ auto c2 = graph.create<ops::ConstantOp>(TensorVariant{DataType::FLOAT32, {}})->getOutput(0);
+ graph.create<ops::AddOp>(c1, c2);
+ graph.create<ops::OutputOp>(c1);
+ graph.create<ops::OutputOp>(c2);
+
+ DeadCodeElimination pass;
+ pass.run(&graph);
+ ASSERT_EQ(graph.getNodes().size(), 4);
+}
+
+} // unnamed namespace
diff --git a/compiler/nnc/unittests/optimizations/FuseArithmeticOps.cpp b/compiler/nnc/unittests/optimizations/FuseArithmeticOps.cpp
new file mode 100644
index 000000000..85a2fee76
--- /dev/null
+++ b/compiler/nnc/unittests/optimizations/FuseArithmeticOps.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "passes/optimizations/FuseArithmeticOps.h"
+#include "Util.h"
+#include "mir/Graph.h"
+#include "mir/ops/AddOp.h"
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/Conv2DOp.h"
+#include "mir/ops/MulOp.h"
+
+#include <gtest/gtest.h>
+#include <sstream>
+
+using namespace nnc;
+using namespace mir;
+
+namespace
+{
+
+TEST(OptPass, fuseConvBiasScaleScaleBias)
+{
+ mir::Graph g;
+
+ // Create graph: 'input->conv->bias->scale->scale->bias'
+ mir::TensorType input_type(mir::DataType::FLOAT32, Shape{1, 299, 299, 3});
+ auto input = g.create<ops::InputOp>(input_type);
+ auto conv_const = g.create<ops::ConstantOp>(TensorVariant(DataType::FLOAT32, {10, 3, 3, 3}));
+ auto conv = g.create<ops::Conv2DOp>(input->getOutput(0), conv_const->getOutput(0),
+ mir::Conv2DOpAttributes());
+ auto bias1_const = g.create<ops::ConstantOp>(TensorVariant(DataType::FLOAT32, {10}));
+ auto bias1 = g.create<ops::AddOp>(conv->getOutput(0), bias1_const->getOutput(0));
+ auto scale1_const = g.create<ops::ConstantOp>(TensorVariant(DataType::FLOAT32, {10}));
+ auto scale1 = g.create<ops::MulOp>(bias1->getOutput(0), scale1_const->getOutput(0));
+ auto scale2_const = g.create<ops::ConstantOp>(TensorVariant(DataType::FLOAT32, {10}));
+ auto scale2 = g.create<ops::MulOp>(scale1->getOutput(0), scale2_const->getOutput(0));
+ auto scale3_const = g.create<ops::ConstantOp>(TensorVariant(DataType::FLOAT32, {10}));
+ auto scale3 = g.create<ops::MulOp>(scale2->getOutput(0), scale3_const->getOutput(0));
+ auto bias2_const = g.create<ops::ConstantOp>(TensorVariant(DataType::FLOAT32, {10}));
+ g.create<ops::AddOp>(scale3->getOutput(0), bias2_const->getOutput(0));
+
+ // Check that layout is desired
+ std::stringstream ss;
+ DumpVisitor d(ss);
+ FuseArithmeticOps pass;
+ pass.run(&g);
+ g.accept(&d);
+ // Assert only 'conv->bias' remains
+ ASSERT_TRUE("i_0.const_25.const_23.conv_26.b_24." == ss.str() ||
+ "i_0.const_23.const_25.conv_26.b_24." == ss.str() ||
+ "const_25.i_0.const_23.conv_26.b_24." == ss.str() ||
+ "const_23.i_0.const_25.conv_26.b_24." == ss.str() ||
+ "const_25.const_23.i_0.conv_26.b_24." == ss.str() ||
+ "const_23.const_25.i_0.conv_26.b_24." == ss.str());
+}
+
+} // unnamed namespace
diff --git a/compiler/nnc/unittests/optimizations/SinkTest.cpp b/compiler/nnc/unittests/optimizations/SinkTest.cpp
new file mode 100644
index 000000000..8c5b2767e
--- /dev/null
+++ b/compiler/nnc/unittests/optimizations/SinkTest.cpp
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "passes/optimizations/SinkTranspose.h"
+#include "passes/optimizations/SinkRelu.h"
+#include "Util.h"
+#include "mir/ops/TransposeOp.h"
+#include "mir/ops/ReluOp.h"
+#include "mir/ops/TanhOp.h"
+#include "mir/ops/ConcatOp.h"
+#include "mir/ops/OutputOp.h"
+#include "mir/Graph.h"
+
+#include <gtest/gtest.h>
+#include <sstream>
+#include <vector>
+
+using namespace std;
+using namespace nnc;
+using namespace mir;
+
+namespace
+{
+Operation *getPrev(Operation *op)
+{
+ assert(op->getNumInputs() == 1);
+ return op->getInput(0)->getNode();
+}
+
+Operation *getNext(Operation *op)
+{
+ assert(op->getNumOutputs() == 1 && (op->getOutput(0)->getUses().size() == 1));
+ Operation::Use use = op->getOutput(0)->getUses()[0];
+ return use.getNode();
+}
+
+/* This tests swapping relu and transpose */
+TEST(OptPass, sinkTrReLU)
+{
+ mir::Graph g;
+ /*
+ * Create graph:
+ * [input]
+ * ||
+ * [Transpose]
+ * ||
+ * [relu]
+ * ||
+ * [tanh]
+ */
+ mir::TensorType input_type{mir::DataType::FLOAT32, Shape{1, 2, 3}};
+ Operation *input = g.create<ops::InputOp>(input_type);
+ Operation *tr1 = g.create<ops::TransposeOp>(input->getOutput(0), vector<size_t>{1, 0, 2});
+ Operation *relu = g.create<ops::ReluOp>(tr1->getOutput(0));
+ Operation *tanh = g.create<ops::TanhOp>(relu->getOutput(0));
+ Operation *out = g.create<ops::OutputOp>(tanh->getOutput(0));
+ (void)out;
+
+ // Check that layout is desired
+ SinkTranspose pass;
+ pass.run(&g);
+
+ // Assert transposes are removed
+ ASSERT_EQ(g.getInputs()[0]->getType(), mir::Operation::Type::input);
+ ASSERT_EQ(getPrev(g.getOutputs()[0])->getType(), mir::Operation::Type::tanh);
+ ASSERT_EQ(getNext(g.getInputs()[0])->getType(), mir::Operation::Type::ReLU);
+ ASSERT_EQ(getPrev(tanh)->getType(), mir::Operation::Type::transpose);
+}
+
+/* This tests swapping concat and transpose */
+TEST(OptPass, sinkTrConcat)
+{
+ mir::Graph g;
+ /*
+ * Create graph:
+ * [input] [input2]
+ * || ||
+ * [Transpose 1] [Transpose 2]
+ * \\ //
+ * [Concat]
+ * ||
+ * [TanH]
+ */
+
+ mir::TensorType in1_type{mir::DataType::FLOAT32, Shape{1, 1, 2, 3}};
+ Operation *in1 = g.create<ops::InputOp>(in1_type);
+
+ mir::TensorType in2_type{mir::DataType::FLOAT32, Shape{1, 1, 2, 3}};
+ Operation *in2 = g.create<ops::InputOp>(in2_type);
+ Operation *tr1 = g.create<ops::TransposeOp>(in1->getOutput(0), vector<size_t>{0, 3, 1, 2});
+ Operation *tr2 = g.create<ops::TransposeOp>(in2->getOutput(0), vector<size_t>{0, 3, 1, 2});
+ Operation *conc =
+ g.create<ops::ConcatOp>(vector<Operation::Output *>{tr1->getOutput(0), tr2->getOutput(0)}, 1);
+ Operation *tanh = g.create<ops::TanhOp>(conc->getOutput(0));
+ Operation *out = g.create<ops::OutputOp>(tanh->getOutput(0));
+ (void)out;
+ // Check that layout is as desired
+ SinkTranspose pass;
+ pass.run(&g);
+
+ ASSERT_EQ(getPrev(getPrev(g.getOutputs()[0]))->getType(), Operation::Type::transpose);
+ ASSERT_TRUE(static_cast<ops::TransposeOp *>(getPrev(tanh))->getAxisOrder() ==
+ vector<size_t>({0, 3, 1, 2}));
+ /* Expected Result:
+ * TanH(Transpose(Concat(inp1,inp2)))
+ */
+}
+
+/* This tests swapping concat and transpose */
+TEST(OptPass, sinkReluConcat)
+{
+ mir::Graph g;
+ /*
+ * Create graph:
+ * [ inp1 ] [ inp2 ]
+ * || ||
+ * [ Relu 1] [ Relu 2]
+ * \\ //
+ * [ Concat ]
+ * ||
+ * [TanH]
+ */
+ mir::TensorType in1_type{mir::DataType::FLOAT32, Shape{1, 1, 2, 3}};
+ Operation *in1 = g.create<ops::InputOp>(in1_type);
+
+ mir::TensorType in2_type{mir::DataType::FLOAT32, Shape{1, 1, 2, 3}};
+ Operation *in2 = g.create<ops::InputOp>(in2_type);
+ Operation *relu1 = g.create<ops::ReluOp>(in1->getOutput(0));
+ Operation *relu2 = g.create<ops::ReluOp>(in2->getOutput(0));
+ Operation *conc = g.create<ops::ConcatOp>(
+ vector<Operation::Output *>{relu1->getOutput(0), relu2->getOutput(0)}, 1);
+ Operation *tanh = g.create<ops::TanhOp>(conc->getOutput(0));
+ Operation *out = g.create<ops::OutputOp>(tanh->getOutput(0));
+ (void)out;
+
+ // Check that layout is as desired
+ SinkRelu pass;
+ pass.run(&g);
+
+ ASSERT_EQ(getPrev(getPrev(g.getOutputs()[0]))->getType(), Operation::Type::ReLU);
+ /* Expected Result:
+ * TanH(Relu(Concat(inp1,inp2)))
+ */
+}
+
+/* This tests swapping relu and max_pool */
+TEST(OptPass, sinkPoolReLU)
+{
+ mir::Graph g;
+ /*
+ * Create graph:
+ * [input]
+ * ||
+ * [relu]
+ * ||
+ * [MaxPool]
+ * ||
+ * [tanh]
+ */
+ mir::TensorType input_type{mir::DataType::FLOAT32, Shape{1, 4, 4, 3}};
+ Operation *input = g.create<ops::InputOp>(input_type);
+ Operation *relu = g.create<ops::ReluOp>(input->getOutput(0));
+ mir::MaxPool2DOpAttributes attributes;
+ attributes.window = {2, 2};
+ attributes.strides = {2, 2};
+ Operation *mp = g.create<ops::MaxPool2DOp>(relu->getOutput(0), attributes);
+ Operation *tanh = g.create<ops::TanhOp>(mp->getOutput(0));
+ Operation *out = g.create<ops::OutputOp>(tanh->getOutput(0));
+ (void)out;
+
+ SinkRelu pass;
+ pass.run(&g);
+ stringstream ss;
+ DumpVisitor d{ss};
+ g.accept(&d);
+
+ // tanh(relu(pool(input)))
+ ASSERT_EQ(getNext(g.getInputs()[0])->getType(), mir::Operation::Type::maxPool2D);
+ ASSERT_EQ(getPrev(g.getOutputs()[0])->getType(), mir::Operation::Type::tanh);
+ ASSERT_EQ("i_0.p_5.r_6.th_3.", ss.str());
+}
+} // unnamed namespace
diff --git a/compiler/nnc/unittests/optimizations/Util.h b/compiler/nnc/unittests/optimizations/Util.h
new file mode 100644
index 000000000..7467a590c
--- /dev/null
+++ b/compiler/nnc/unittests/optimizations/Util.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NNCC_UTIL_H
+#define NNCC_UTIL_H
+
+#include "mir/ops/AddOp.h"
+#include "mir/ops/AvgPool2DOp.h"
+#include "mir/ops/ConcatOp.h"
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/Conv2DOp.h"
+#include "mir/ops/MaxPool2DOp.h"
+#include "mir/ops/MulOp.h"
+#include "mir/ops/OutputOp.h"
+#include "mir/ops/ReluOp.h"
+#include "mir/ops/TanhOp.h"
+#include "mir/ops/TransposeOp.h"
+#include "mir/Visitor.h"
+
+namespace nnc
+{
+
+class DumpVisitor : public mir::Visitor
+{
+public:
+ explicit DumpVisitor(std::ostream &s) : _s(s) {}
+
+ void visit(mir::ops::InputOp &op) override { _s << "i_" << std::to_string(op.getId()) << "."; };
+
+ void visit(mir::ops::TanhOp &op) override { _s << "th_" << std::to_string(op.getId()) << "."; }
+
+ void visit(mir::ops::MulOp &op) override { _s << "s_" << std::to_string(op.getId()) << "."; }
+
+ void visit(mir::ops::AddOp &op) override { _s << "b_" << std::to_string(op.getId()) << "."; }
+
+ void visit(mir::ops::ReluOp &op) override { _s << "r_" << std::to_string(op.getId()) << "."; }
+
+ void visit(mir::ops::AvgPool2DOp &op) override
+ {
+ _s << "p_" << std::to_string(op.getId()) << ".";
+ }
+
+ void visit(mir::ops::MaxPool2DOp &op) override
+ {
+ _s << "p_" << std::to_string(op.getId()) << ".";
+ }
+
+ void visit(mir::ops::TransposeOp &op) override
+ {
+ _s << "t_" << std::to_string(op.getId()) << ".";
+ }
+
+ void visit(mir::ops::Conv2DOp &op) override
+ {
+ _s << "conv_" << std::to_string(op.getId()) << ".";
+ }
+
+ void visit(mir::ops::ConstantOp &op) override
+ {
+ _s << "const_" << std::to_string(op.getId()) << ".";
+ }
+
+ std::ostream &_s;
+};
+
+} // namespace nnc
+#endif // NNCC_UTIL_H
diff --git a/compiler/nnc/unittests/pass/CMakeLists.txt b/compiler/nnc/unittests/pass/CMakeLists.txt
new file mode 100644
index 000000000..778d500d9
--- /dev/null
+++ b/compiler/nnc/unittests/pass/CMakeLists.txt
@@ -0,0 +1,4 @@
+file(GLOB_RECURSE TEST_SOURCES "*.cpp")
+
+nnc_add_unit_test(nnc_pass_test ${TEST_SOURCES})
+optional_target_link_libraries(nnc_pass_test mir)
diff --git a/compiler/nnc/unittests/pass/PassExceptionTest.cpp b/compiler/nnc/unittests/pass/PassExceptionTest.cpp
new file mode 100644
index 000000000..ea9d7a2d5
--- /dev/null
+++ b/compiler/nnc/unittests/pass/PassExceptionTest.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pass/PassException.h"
+
+#include "gtest/gtest.h"
+
+using namespace nnc;
+
+namespace
+{
+
+std::string ErrorMsg = "error constructor";
+
+void passErr1() { throw PassException(ErrorMsg); }
+
+void passErr2()
+{
+ try
+ {
+ passErr1();
+ }
+ catch (const PassException &e)
+ {
+ throw;
+ }
+}
+
+TEST(CONTRIB_PASS, PassException)
+{
+ try
+ {
+ passErr2();
+ }
+ catch (const PassException &e)
+ {
+ ASSERT_TRUE(ErrorMsg == e.what());
+ return;
+ }
+
+ // should not happen
+ FAIL();
+}
+
+} // unnamed namespace
diff --git a/compiler/nnc/unittests/pass/PassManagerTest.cpp b/compiler/nnc/unittests/pass/PassManagerTest.cpp
new file mode 100644
index 000000000..a0ee3140d
--- /dev/null
+++ b/compiler/nnc/unittests/pass/PassManagerTest.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <dlfcn.h>
+
+#include "mir/Graph.h"
+#include "pass/Pass.h"
+#include "pass/PassData.h"
+#include "pass/PassException.h"
+
+#include "gtest/gtest.h"
+
+using namespace nnc;
+
+class DummyPass1 : public Pass
+{
+public:
+ PassData run(PassData data) override
+ {
+ auto graph = static_cast<mir::Graph *>(data);
+
+ if (!graph)
+ {
+ throw PassException();
+ }
+
+ return graph;
+ }
+};
+
+class DummyPass2 : public Pass
+{
+public:
+ PassData run(PassData data) override
+ {
+ auto tv = static_cast<mir::TensorVariant *>(data);
+
+ if (!tv)
+ {
+ throw PassException();
+ }
+
+ return nullptr;
+ }
+};
+
+TEST(CONTRIB_PASS, PassManager)
+{
+ DummyPass1 pass1;
+ DummyPass2 pass2;
+
+ mir::Graph g;
+ auto res = pass1.run(&g);
+ ASSERT_NE(static_cast<mir::Graph *>(res), nullptr);
+
+ ASSERT_THROW(pass2.run(res), PassException);
+}
diff --git a/compiler/nnc/unittests/soft_backend/CMakeLists.txt b/compiler/nnc/unittests/soft_backend/CMakeLists.txt
new file mode 100644
index 000000000..f1cd30b0f
--- /dev/null
+++ b/compiler/nnc/unittests/soft_backend/CMakeLists.txt
@@ -0,0 +1,7 @@
+nnc_add_unit_test(nnc_cpu_cpp_backend_ops_test CPPOperations.cpp)
+optional_target_link_libraries(nnc_cpu_cpp_backend_ops_test mir_interpreter mir soft_backend_cpp)
+target_include_directories(nnc_cpu_cpp_backend_ops_test PRIVATE ${NNC_SOFT_BACKEND_DIR})
+
+nnc_add_unit_test(nnc_cpu_cpp_backend_general_test Generator.cpp CPPHeaderTypes.cpp ModelAnalyzer.cpp)
+optional_target_link_libraries(nnc_cpu_cpp_backend_general_test mir soft_backend_cpp)
+target_include_directories(nnc_cpu_cpp_backend_general_test PRIVATE ${NNC_SOFT_BACKEND_DIR})
diff --git a/compiler/nnc/unittests/soft_backend/CPPHeaderTypes.cpp b/compiler/nnc/unittests/soft_backend/CPPHeaderTypes.cpp
new file mode 100644
index 000000000..3b071510b
--- /dev/null
+++ b/compiler/nnc/unittests/soft_backend/CPPHeaderTypes.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstddef>
+#include <initializer_list>
+#include <functional>
+#include <numeric>
+
+#include "code_snippets/cpp_header_types.def"
+
+#include "gtest/gtest.h"
+
+template <class List> static inline void checkListShapeEq(List list, Shape shape, index_t volume)
+{
+ ASSERT_EQ(static_cast<size_t>(shape.getDims()), list.size());
+ index_t idx = 0;
+ for (auto el : list)
+ {
+ ASSERT_EQ(shape[idx], el);
+ idx++;
+ }
+ ASSERT_EQ(shape.getNumElems(), volume);
+}
+
+TEST(SOFT_BACKEND, shape_and_index)
+{
+ auto list = {2, 3, 4};
+ index_t volume = std::accumulate(list.begin(), list.end(), 1, std::multiplies<index_t>());
+ Shape s1(list);
+ checkListShapeEq(list, s1, volume);
+// This check must be performed only if assertions are enabled
+#ifndef NDEBUG
+ ASSERT_DEATH(s1[list.size()], "");
+#endif
+
+ Shape s2(s1);
+ checkListShapeEq(list, s2, volume);
+
+ Shape s3{1};
+ ASSERT_EQ(s3.getNumElems(), 1);
+ ASSERT_EQ(s3.getDims(), 1);
+ s3 = s1;
+ checkListShapeEq(list, s3, volume);
+
+ s3.setDims(4);
+ s3[3] = 2;
+ ASSERT_EQ(s3.getNumElems(), volume * 2);
+ s3.setDims(3);
+ ASSERT_EQ(s3.getNumElems(), volume);
+}
+
+TEST(SOFT_BACKEND, tensor)
+{
+ // test reshape
+ Tensor t1;
+ ASSERT_EQ(t1.getShape().getNumElems(), 1);
+ const index_t tensor1_height = 2;
+ const index_t tensor1_width = 4;
+ t1.reshape(Shape{tensor1_height, tensor1_width});
+ ASSERT_EQ(t1.getShape().getNumElems(), tensor1_height * tensor1_width);
+ // test at functions
+ float expected_sum = 0;
+ for (index_t i = 0; i < tensor1_height; ++i)
+ for (index_t j = 0; j < tensor1_width; ++j)
+ {
+ index_t elem = (i + 1) * (j + 1);
+ expected_sum += elem;
+ t1.at({i, j}) = elem;
+ }
+ float sum = 0;
+ for (index_t i = 0; i < tensor1_height; ++i)
+ for (index_t j = 0; j < tensor1_width; ++j)
+ {
+ sum += t1.at({i, j});
+ }
+ ASSERT_EQ(sum, expected_sum);
+
+ // test construction with shape
+ const index_t tensor2_height = 3;
+ const index_t tensor2_width = 4;
+ Tensor t2({tensor2_height, tensor2_width});
+ ASSERT_EQ(t2.getShape().getNumElems(), tensor2_height * tensor2_width);
+
+ // test unmanaged tensor
+ const index_t tensor3_depth = 2;
+ const index_t tensor3_height = 2;
+ const index_t tensor3_width = 3;
+ std::vector<float> data({1.0, 2.0, 4.0});
+ data.resize(tensor3_depth * tensor3_height * tensor3_width);
+ float *data_ptr = data.data();
+ Tensor t3(Shape({tensor3_depth, tensor3_height, tensor3_width}), data_ptr);
+ ASSERT_EQ(t3.getShape().getNumElems(), tensor3_depth * tensor3_height * tensor3_width);
+ sum = 0;
+ for (index_t k = 0; k < tensor3_depth; ++k)
+ for (index_t i = 0; i < tensor3_height; ++i)
+ for (index_t j = 0; j < tensor3_width; ++j)
+ {
+ sum += t3.at({k, i, j});
+ }
+ ASSERT_EQ(sum, std::accumulate(data_ptr, data_ptr + t3.getShape().getNumElems(), 0.0f));
+
+ // test tensor copy
+ const index_t t4Width = 4;
+ Tensor t4({t4Width});
+ t4 = t3;
+ for (index_t k = 0; k < tensor3_depth; ++k)
+ for (index_t i = 0; i < tensor3_height; ++i)
+ for (index_t j = 0; j < tensor3_height; ++j)
+ {
+ ASSERT_EQ(t3.at({k, i, j}), t4.at({k, i, j}));
+ }
+}
diff --git a/compiler/nnc/unittests/soft_backend/CPPOperations.cpp b/compiler/nnc/unittests/soft_backend/CPPOperations.cpp
new file mode 100644
index 000000000..508ee954d
--- /dev/null
+++ b/compiler/nnc/unittests/soft_backend/CPPOperations.cpp
@@ -0,0 +1,1007 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <vector>
+#include <functional>
+
+// artifact part
+#include "CommonData.def"
+
+#include "code_snippets/eigen.def"
+
+#include "code_snippets/cpp_header_types.def"
+#include "code_snippets/cpp_common_funcs.def"
+
+#include "code_snippets/cpp_broadcast.def"
+#include "code_snippets/cpp_capped_relu.def"
+#include "code_snippets/cpp_concat.def"
+#include "code_snippets/cpp_conv.def"
+#include "code_snippets/cpp_conv_transpose.def"
+#include "code_snippets/cpp_depthwise_conv.def"
+#include "code_snippets/cpp_elementwise.def"
+#include "code_snippets/cpp_elu.def"
+#include "code_snippets/cpp_fully_connected.def"
+#include "code_snippets/cpp_gather.def"
+#include "code_snippets/cpp_sigmoid.def"
+#include "code_snippets/cpp_pad.def"
+#include "code_snippets/cpp_pool.def"
+#include "code_snippets/cpp_reduce.def"
+#include "code_snippets/cpp_relu.def"
+#include "code_snippets/cpp_resize.def"
+#include "code_snippets/cpp_softmax.def"
+#include "code_snippets/cpp_sqrt.def"
+#include "code_snippets/cpp_slice.def"
+#include "code_snippets/cpp_tanh.def"
+#include "code_snippets/cpp_transpose.def"
+
+#include "code_snippets/cpp_operations.def"
+#include "code_snippets/cpp_leaky_relu.def"
+
+// soft backend part
+
+#include "ModelAnalyzer.h"
+#include "SBSerializer.h"
+
+// operations part
+#include "mir/ops/AbsOp.h"
+#include "mir/ops/AddOp.h"
+#include "mir/ops/AvgPool2DOp.h"
+#include "mir/ops/BroadcastOp.h"
+#include "mir/ops/CappedReluOp.h"
+#include "mir/ops/ConcatOp.h"
+#include "mir/ops/ConstantOp.h"
+#include "mir/ops/Conv2DOp.h"
+#include "mir/ops/Deconv2DOp.h"
+#include "mir/ops/DepthwiseConv2DOp.h"
+#include "mir/ops/DivOp.h"
+#include "mir/ops/EluOp.h"
+#include "mir/ops/FullyConnectedOp.h"
+#include "mir/ops/LeakyReluOp.h"
+#include "mir/ops/MaxOp.h"
+#include "mir/ops/MaxPool2DOp.h"
+#include "mir/ops/MulOp.h"
+#include "mir/ops/PadOp.h"
+#include "mir/ops/ReduceMeanOp.h"
+#include "mir/ops/ReluOp.h"
+#include "mir/ops/ReshapeOp.h"
+#include "mir/ops/ResizeOp.h"
+#include "mir/ops/SigmoidOp.h"
+#include "mir/ops/SliceOp.h"
+#include "mir/ops/SoftmaxOp.h"
+#include "mir/ops/SqrtOp.h"
+#include "mir/ops/SubOp.h"
+#include "mir/ops/TanhOp.h"
+#include "mir/ops/TransposeOp.h"
+
+// various headers
+#include "mir/TensorVariant.h"
+#include "mir/Tensor.h"
+#include "mir/Graph.h"
+#include "mir/ShapeRange.h"
+
+#include "MirInterpreter.h"
+
+#include "gtest/gtest.h"
+
+using namespace std;
+
+using namespace nnc;
+
+namespace irOps = mir::ops;
+
+/*
+ This test suite operates with both artifact and NNC tensors:
+ nnc data has `n` literal in name
+ artifact data has 'a' literal in name
+ For example: nShape, aShape, nTensor, aTensor.
+
+ Artifact data types are: Tensor, Shape
+ NNC data types are: mir::TensorVariant, tensor::Shape, mir::Tensor<float>
+*/
+
+namespace
+{
+
+/**
+ * @brief Creates graph with one operation generated by opGen function and returns this operation
+ * node
+ */
+mir::Operation *
+fillGraph(mir::Graph &g,
+ const function<mir::Operation *(mir::Graph &g, vector<mir::Operation::Output *> &inputs)>
+ &op_gen,
+ const vector<unique_ptr<mir::TensorVariant>> &input_ntensors)
+{
+ // Create operation inputs.
+ vector<mir::Operation::Output *> inputs;
+ for (const unique_ptr<mir::TensorVariant> &ntensor : input_ntensors)
+ {
+ auto input = g.create<mir::ops::ConstantOp>(*ntensor)->getOutput(0);
+ inputs.push_back(input);
+ }
+
+ return op_gen(g, inputs);
+}
+
+/**
+ * @brief Fills NNC Shape object with data from src container
+ */
+void fillNShape(mir::Shape &nshape, const vector<int> &raw_shape_data)
+{
+ int shape_rank = raw_shape_data.size();
+ nshape.resize(shape_rank);
+ for (int i = 0; i < shape_rank; ++i)
+ nshape.dim(i) = raw_shape_data[i];
+}
+
+/**
+ * @brief Converts NNC Shape to artifact Shape
+ */
+void copyAShapeFromNShape(Shape &ashape, const mir::Shape &src)
+{
+ int shape_rank = src.rank();
+ ashape.setDims(shape_rank);
+ for (int i = 0; i < shape_rank; ++i)
+ ashape[i] = src.dim(i);
+}
+
+/**
+ * @brief Fills NNC and artifact Shape objects with data from rawShapeData
+ */
+void fillShapes(mir::Shape &nshape, Shape &ashape, const vector<int> &raw_shape_data)
+{
+ fillNShape(nshape, raw_shape_data);
+ copyAShapeFromNShape(ashape, nshape);
+}
+
+/**
+ * @brief Fills NNC tensor with some determined data
+ */
+void fillNTensor(mir::TensorVariant &dst, float start)
+{
+ float t = start;
+ mir::Tensor<float> wrapper(dst);
+ for (const mir::Index &idx : mir::ShapeRange(dst.getShape()))
+ {
+ wrapper.at(idx) = sin(t) * 2.0f;
+ t += 1.0f;
+ }
+}
+
+/**
+ * @brief Converts NNC mir::TensorVariant to artifact Tensor object
+ */
+void copyATensorFromNTensor(Tensor &dst, mir::TensorVariant &src)
+{
+ mir::Tensor<float> wrapper(src);
+ Index art_idx;
+ int rank = src.getShape().rank();
+ art_idx.setDims(rank);
+ for (mir::Index idx : mir::ShapeRange(src.getShape()))
+ {
+ for (int i = 0; i < rank; ++i)
+ art_idx[i] = idx.at(i);
+ dst.at(art_idx) = wrapper.at(idx);
+ }
+}
+
+/**
+ * @brief Fills NNC and artifact tensor objects with some determined data
+ */
+void fillTensors(unique_ptr<mir::TensorVariant> &ntensor, Tensor &atensor, const vector<int> &shape,
+ float start)
+{
+ Shape ashape;
+ mir::Shape nshape;
+ fillShapes(nshape, ashape, shape);
+ atensor.reshape(ashape);
+ ntensor.reset(new mir::TensorVariant(mir::DataType::FLOAT32, nshape));
+ fillNTensor(*ntensor, start);
+ copyATensorFromNTensor(atensor, *ntensor);
+}
+
+/**
+ * @brief Run interpreter to get reference output data
+ */
+mir::TensorVariant getReferenceTensor(mir::Graph &g, mir::Operation *op)
+{
+ mir_interpreter::MIRInterpreter interpreter;
+ g.accept(&interpreter);
+ assert(op->getNumOutputs() == 1);
+ return interpreter.getTensor(op->getOutput(0));
+}
+
+/**
+ * @brief Run selected operation, used to make code in tests more compact and fit getReferenceTensor
+ * format
+ */
+template <typename Operation, typename... Args> Tensor run(Operation op, const Args &... args)
+{
+ Tensor output;
+ op(output, args...);
+ return output;
+}
+
+/**
+ * @brief Compare floats using combined Units in the Last Place and epsilon approach
+ * @param a First number to compare
+ * @param b Second number to compare
+ * @param ulp Max tolerated number of units in the last place
+ * @param eps Max tolerated absolute difference
+ * @return true if diff of two numbers is less than 'eps' or ULP between arguments is less than
+ * 'ulp'
+ */
+bool areFloatsNear(float a, float b, int32_t ulp, float eps)
+{
+ assert(ulp < (1 << 23) && "this algorithm is not applicable for such large diffs");
+ assert(eps >= 0 && "epsilon should be positive number");
+ if (fabs(a - b) <= eps)
+ return true;
+ // since this point need to dind difference between numbers
+ // in terms of ULP
+ int32_t ai;
+ int32_t bi;
+ memcpy(&ai, &a, sizeof(float));
+ memcpy(&bi, &b, sizeof(float));
+ // compare mantissa of numbers
+ if (ai > bi)
+ return ai - bi <= ulp;
+ return bi - ai <= ulp;
+}
+
+/**
+ * @brief Compares nnc mir::TensorVariant and artifact Tensor objects
+ * @param ref_nnc_tensor Reference tensor that interpreter produced
+ * @param test_art_tensor Tensor that artifact operation computed
+ */
+void compareResults(const mir::TensorVariant &ref_nnc_tensor, const Tensor &test_art_tensor)
+{
+ assert(ref_nnc_tensor.getElementSize() == 4L &&
+ ref_nnc_tensor.getDataType() == mir::DataType::FLOAT32);
+
+ const mir::Shape &nnc_shape = ref_nnc_tensor.getShape();
+ const Shape &art_shape = test_art_tensor.getShape();
+
+ // check that reference and test shapes are equal
+ ASSERT_EQ(nnc_shape.rank(), art_shape.getDims());
+
+ int rank = nnc_shape.rank();
+ for (int i = 0; i < rank; ++i)
+ ASSERT_EQ(nnc_shape.dim(i), art_shape[i]);
+
+ // check that reference and test tensor contents are equal
+ Index artifact_idx;
+ artifact_idx.setDims(rank);
+ for (mir::Index nnc_idx : mir::ShapeRange(nnc_shape))
+ {
+ for (int i = 0; i < rank; ++i)
+ artifact_idx[i] = nnc_idx.at(i);
+ // Input and output data lies in range of [-10, 10],
+ // chosen epsilon lies near the edge of float type computational precision
+ float ref_data = mir::Tensor<float>(ref_nnc_tensor).at(nnc_idx);
+ float test_data = test_art_tensor.at(artifact_idx);
+ ASSERT_TRUE(areFloatsNear(ref_data, test_data, 32, 1e-5))
+ << "Tensor element " << nnc_idx << " diverged, reference: " << ref_data
+ << " test result: " << test_data;
+ }
+}
+
+/**
+ * @brief This function creates test graph, runs interpeter, specifies artifact operation and
+ * compares results
+ */
+template <typename TestFunc, typename... Args>
+void createAndRunTestGraph(
+ function<mir::Operation *(mir::Graph &, const std::vector<mir::Operation::Output *> &inputs)>
+ op_generator,
+ TestFunc artifactOperation, const vector<unique_ptr<mir::TensorVariant>> &input_ntensors,
+ Args &... input_atensors)
+{
+ mir::Graph g;
+ mir::Operation *actual_operation = fillGraph(g, op_generator, input_ntensors);
+
+ // serialize data for soft backend operation
+ vector<unique_ptr<sir::Action>> inference_sequence;
+ unique_ptr<sir::CallFunction> op_call(new sir::CallFunction);
+ op_call->mirOp = actual_operation;
+ inference_sequence.push_back(std::move(op_call));
+ Serializer serializer;
+ serializer.serialize(inference_sequence);
+ assert(static_cast<sir::CallFunction *>(inference_sequence.front().get())->paramStartOffset == 0);
+
+ mir::TensorVariant reference_output = getReferenceTensor(g, actual_operation);
+
+ Tensor test_output;
+ artifactOperation(test_output, serializer.getBuffer().data(), input_atensors...);
+
+ compareResults(reference_output, test_output);
+}
+} // namespace
+
+TEST(cpp_operations_test, capped_relu)
+{
+ // test prerequisites
+ // cap has this value to cut input numbers(they are in range [-1, 1])
+ float cap = 0.5f;
+ vector<int> shape_data{2, 3, 4, 5};
+ Tensor input_atensor;
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
+ fillTensors(input_ntensors[0], input_atensor, shape_data, 1.0f);
+ auto op_generator = [cap](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::CappedReluOp>(inputs[0], cap);
+ };
+
+ createAndRunTestGraph(op_generator, cappedRelu, input_ntensors, input_atensor);
+}
+
+TEST(cpp_operations_test, concat)
+{
+ for (int num_dims = 1; num_dims <= 4; ++num_dims)
+ for (int axis = 0; axis < num_dims; ++axis)
+ {
+ // test prerequisites
+ vector<int> shape_data1{2, 3, 5, 7};
+ vector<int> shape_data2{2, 3, 5, 7};
+ shape_data1.resize(num_dims);
+ shape_data2.resize(num_dims);
+ // set different size for concatenating axis
+ shape_data2[axis] = 11;
+ vector<Tensor> input_atensors(2);
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(2);
+ fillTensors(input_ntensors[0], input_atensors[0], shape_data1, 1.0f);
+ fillTensors(input_ntensors[1], input_atensors[1], shape_data2, 2.0f);
+ auto op_generator = [axis](mir::Graph &g,
+ const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::ConcatOp>(inputs, axis);
+ };
+
+ createAndRunTestGraph(op_generator, concat<Tensor, Tensor>, input_ntensors, input_atensors[0],
+ input_atensors[1]);
+ }
+}
+
+TEST(cpp_operations_test, addbc)
+{
+ for (int num_dims = 2; num_dims <= 4; ++num_dims)
+ {
+ // test prerequisites
+ vector<int> shape_data1{3, 44, 5, 1};
+ vector<int> shape_data2{3, 1, 5, 6};
+ shape_data1.resize(num_dims);
+ shape_data2.resize(num_dims);
+ vector<Tensor> input_atensors(2);
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(2);
+ fillTensors(input_ntensors[0], input_atensors[0], shape_data1, 1.0f);
+ fillTensors(input_ntensors[1], input_atensors[1], shape_data2, 2.0f);
+ auto op_generator = [](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::AddOp>(inputs[0], inputs[1]);
+ };
+
+ createAndRunTestGraph(op_generator, ElementWise<Add>, input_ntensors, input_atensors[0],
+ input_atensors[1]);
+ }
+}
+
+TEST(cpp_operations_test, mulbc)
+{
+ for (int num_dims = 2; num_dims <= 4; ++num_dims)
+ {
+ // test prerequisites
+ vector<int> shape_data1{3, 22, 5, 1};
+ vector<int> shape_data2{3, 1, 5, 6};
+ shape_data1.resize(num_dims);
+ shape_data2.resize(num_dims);
+ vector<Tensor> input_atensors(2);
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(2);
+ fillTensors(input_ntensors[0], input_atensors[0], shape_data1, 1.0f);
+ fillTensors(input_ntensors[1], input_atensors[1], shape_data2, 2.0f);
+ auto opGenerator = [](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::MulOp>(inputs[0], inputs[1]);
+ };
+
+ createAndRunTestGraph(opGenerator, ElementWise<Mul>, input_ntensors, input_atensors[0],
+ input_atensors[1]);
+ }
+}
+
+TEST(cpp_operations_test, divbc)
+{
+ for (int num_dims = 2; num_dims <= 4; ++num_dims)
+ {
+ // test prerequisites
+ vector<int> shape_data1{3, 22, 5, 1};
+ vector<int> shape_data2{3, 1, 5, 6};
+ shape_data1.resize(num_dims);
+ shape_data2.resize(num_dims);
+ vector<Tensor> input_atensors(2);
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(2);
+ fillTensors(input_ntensors[0], input_atensors[0], shape_data1, 5.0f);
+ fillTensors(input_ntensors[1], input_atensors[1], shape_data2, 2.0f);
+ auto opGenerator = [](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::DivOp>(inputs[0], inputs[1]);
+ };
+
+ createAndRunTestGraph(opGenerator, ElementWise<Div>, input_ntensors, input_atensors[0],
+ input_atensors[1]);
+ }
+}
+
+TEST(cpp_operations_test, add)
+{
+ for (int num_dims = 2; num_dims <= 4; ++num_dims)
+ {
+ // test prerequisites
+ vector<int> shape_data{2, 3, 5, 7};
+ shape_data.resize(num_dims);
+ vector<Tensor> input_atensors(2);
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(2);
+ fillTensors(input_ntensors[0], input_atensors[0], shape_data, 1.0f);
+ fillTensors(input_ntensors[1], input_atensors[1], shape_data, 2.0f);
+ auto op_generator = [](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::AddOp>(inputs[0], inputs[1]);
+ };
+
+ createAndRunTestGraph(op_generator, ElementWise<Add>, input_ntensors, input_atensors[0],
+ input_atensors[1]);
+ }
+}
+
+TEST(cpp_operations_test, sub)
+{
+ for (int num_dims = 2; num_dims <= 4; ++num_dims)
+ {
+ // test prerequisites
+ vector<int> shape_data{2, 3, 5, 7};
+ shape_data.resize(num_dims);
+ vector<Tensor> input_atensors(2);
+ vector<unique_ptr<mir::TensorVariant>> input_n_tensors(2);
+ fillTensors(input_n_tensors[0], input_atensors[0], shape_data, 1.0f);
+ fillTensors(input_n_tensors[1], input_atensors[1], shape_data, 2.0f);
+ auto opGenerator = [](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::SubOp>(inputs[0], inputs[1]);
+ };
+
+ createAndRunTestGraph(opGenerator, ElementWise<Sub>, input_n_tensors, input_atensors[0],
+ input_atensors[1]);
+ }
+}
+
+TEST(cpp_operations_test, mul)
+{
+ for (int num_dims = 2; num_dims <= 4; ++num_dims)
+ {
+ // test prerequisites
+ vector<int> shape_data{2, 3, 5, 7};
+ shape_data.resize(num_dims);
+ vector<Tensor> input_atensors(2);
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(2);
+ fillTensors(input_ntensors[0], input_atensors[0], shape_data, 1.0f);
+ fillTensors(input_ntensors[1], input_atensors[1], shape_data, 2.0f);
+ auto op_generator = [](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::MulOp>(inputs[0], inputs[1]);
+ };
+
+ createAndRunTestGraph(op_generator, ElementWise<Mul>, input_ntensors, input_atensors[0],
+ input_atensors[1]);
+ }
+}
+
+TEST(cpp_operations_test, max)
+{
+ for (int num_dims = 2; num_dims <= 4; ++num_dims)
+ {
+ // test prerequisites
+ vector<int> shape_data{2, 3, 5, 7};
+ shape_data.resize(num_dims);
+ vector<Tensor> input_atensors(2);
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(2);
+ fillTensors(input_ntensors[0], input_atensors[0], shape_data, 1.0f);
+ fillTensors(input_ntensors[1], input_atensors[1], shape_data, 2.0f);
+ auto op_generator = [](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::MaxOp>(inputs[0], inputs[1]);
+ };
+
+ createAndRunTestGraph(op_generator, ElementWise<Max>, input_ntensors, input_atensors[0],
+ input_atensors[1]);
+ }
+}
+
+TEST(cpp_operations_test, convTransposed2d)
+{
+ // Iterate over kernel width, kernel height,
+ // input channels(input_c), output channels(output_c),
+ // stride width, stride height
+ // size 3 is chosen to cover all cases, where width bigger/smaller then height and equal/not equal
+ // to 1
+ using iT = int32_t;
+ Tensor temporary(Shape({1024 * 40}));
+ for (iT kernel_h = 2; kernel_h <= 4; ++kernel_h)
+ for (iT kernel_w = 2; kernel_w <= 4; ++kernel_w)
+ for (iT input_c = 1; input_c <= 3; ++input_c)
+ for (iT output_c = 1; output_c <= 3; ++output_c)
+ for (iT stride_h = 1; stride_h <= 3; ++stride_h)
+ for (iT stride_w = 1; stride_w <= 3; ++stride_w)
+ {
+ vector<int> input_shape_data{3, 9, 3, static_cast<int>(input_c)}; // NHWC
+ vector<int> kernel_shape_data{kernel_h, kernel_w, output_c, input_c};
+ vector<int32_t> strides{stride_h, stride_w};
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(2);
+ Tensor input_atensor0;
+ Tensor input_atensor1;
+ fillTensors(input_ntensors[0], input_atensor0, input_shape_data, 1.0f);
+ fillTensors(input_ntensors[1], input_atensor1, kernel_shape_data, 1.0f);
+ auto op_generator = [&strides](mir::Graph &g,
+ const std::vector<mir::Operation::Output *> &inputs) {
+ mir::Deconv2DOpAttributes attributes;
+ attributes.strides = strides;
+ return g.create<mir::ops::DeConv2DOp>(inputs[0], inputs[1], attributes);
+ };
+
+ createAndRunTestGraph(op_generator, convTransposed2d, input_ntensors, input_atensor0,
+ input_atensor1, temporary);
+ }
+}
+
+TEST(cpp_operations_test, conv2d)
+{
+ // Iterate over kernel width, kernel height,
+ // input channels(input_c), output channels(output_c),
+ // stride width, stride height
+ // size 3 is chosen to cover all cases, where width bigger/smaller then height and equal/not equal
+ // to 1
+ using iT = int32_t;
+ Tensor temporary(Shape({1024 * 20}));
+ for (iT kernel_h = 1; kernel_h <= 3; ++kernel_h)
+ for (iT kernel_w = 1; kernel_w <= 3; ++kernel_w)
+ for (iT input_c = 1; input_c <= 3; ++input_c)
+ for (iT output_c = 1; output_c <= 3; ++output_c)
+ for (iT stride_h = 1; stride_h <= 3; ++stride_h)
+ for (iT stride_w = 1; stride_w <= 3; ++stride_w)
+ {
+ vector<int> input_shape_data{3, 5, 7, static_cast<int>(input_c)}; // NHWC
+ vector<int> kernel_shape_data{output_c, kernel_h, kernel_w, input_c}; // OHWI
+ vector<int32_t> strides{stride_h, stride_w};
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(2);
+ Tensor input_atensor0;
+ Tensor input_atensor1;
+ fillTensors(input_ntensors[0], input_atensor0, input_shape_data, 1.0f);
+ fillTensors(input_ntensors[1], input_atensor1, kernel_shape_data, 1.0f);
+ auto op_generator = [&strides](mir::Graph &g,
+ const std::vector<mir::Operation::Output *> &inputs) {
+ mir::Conv2DOpAttributes attributes;
+ attributes.strides = strides;
+ return g.create<mir::ops::Conv2DOp>(inputs[0], inputs[1], attributes);
+ };
+
+ createAndRunTestGraph(op_generator, conv2d, input_ntensors, input_atensor0,
+ input_atensor1, temporary);
+ }
+}
+
+TEST(cpp_operations_test, depthwise_conv)
+{
+ // Iterate over kernel width, kernel height,
+ // channels
+ // stride width, stride height
+ // layers multiplier
+ // size 3 is chosen to cover all cases, where width bigger/smaller then height and equal/not equal
+ // to 1
+ using iT = int32_t;
+ for (iT kernel_h = 1; kernel_h <= 3; ++kernel_h)
+ for (iT kernel_w = 1; kernel_w <= 3; ++kernel_w)
+ for (iT channels = 1; channels <= 3; ++channels)
+ for (iT stride_w = 1; stride_w <= 3; ++stride_w)
+ for (iT stride_h = 1; stride_h <= 3; ++stride_h)
+ for (iT multiplier = 1; multiplier <= 2; ++multiplier)
+ {
+ vector<int> input_shape_data{3, 7, 6, static_cast<int>(channels)}; // NHWC
+ vector<int> kernel_shape_data{kernel_h, kernel_w, channels, multiplier}; // HWCN
+ vector<int32_t> strides{stride_h, stride_w};
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(2);
+ Tensor input_atensor0;
+ Tensor input_atensor1;
+ fillTensors(input_ntensors[0], input_atensor0, input_shape_data, 1.0f);
+ fillTensors(input_ntensors[1], input_atensor1, kernel_shape_data, 1.0f);
+ auto op_generator = [&strides](mir::Graph &g,
+ const std::vector<mir::Operation::Output *> &inputs) {
+ mir::Conv2DOpAttributes attributes;
+ attributes.strides = strides;
+ return g.create<mir::ops::DepthwiseConv2DOp>(inputs[0], inputs[1], attributes);
+ };
+
+ createAndRunTestGraph(op_generator, depthwiseConv2d, input_ntensors, input_atensor0,
+ input_atensor1);
+ }
+}
+
+TEST(cpp_operations_test, fully_connected)
+{
+ vector<int> input_shape_data{3, 13};
+ vector<int> weights_shape_data{13, 7};
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(2);
+ Tensor input_atensor0;
+ Tensor input_atensor1;
+ fillTensors(input_ntensors[0], input_atensor0, input_shape_data, 1.0f);
+ fillTensors(input_ntensors[1], input_atensor1, weights_shape_data, 1.0f);
+ auto op_generator = [](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::FullyConnectedOp>(inputs[0], inputs[1]);
+ };
+
+ createAndRunTestGraph(op_generator, fullConnect, input_ntensors, input_atensor0, input_atensor1);
+}
+
+TEST(cpp_operations_test, resize_NN_test)
+{
+ mir::Shape test_shapes[] = {{1, 8, 8, 1}, {2, 10, 10, 1}, {1, 11, 11, 2}, {2, 8, 12, 2},
+ {1, 48, 12, 1}, {1, 48, 48, 1}, {1, 48, 56, 1}};
+ for (mir::Shape res_shape : test_shapes)
+ {
+ vector<int> input_shape_data{res_shape.dim(0), 4, 4, res_shape.dim(3)};
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
+ Tensor input_atensor;
+ fillTensors(input_ntensors[0], input_atensor, input_shape_data, 1.0f);
+ auto op_generator = [&res_shape](mir::Graph &g,
+ const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::ResizeOp>(
+ inputs[0], mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, res_shape);
+ };
+
+ createAndRunTestGraph(op_generator, resize, input_ntensors, input_atensor);
+ }
+}
+
+TEST(cpp_operations_test, resize_NN_test_scales)
+{
+ cout << "\n";
+ std::vector<float> test_scales[] = {
+ {1, 2, 2, 1}, {1, 2, 3, 1}, {1, 3, 2, 1}, {1, 2.5, 2, 1}, {1, 3, 9, 1}};
+ for (const std::vector<float> &scales : test_scales)
+ {
+ vector<int> input_shape_data{1, 4, 4, 1};
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
+ Tensor input_atensor;
+ fillTensors(input_ntensors[0], input_atensor, input_shape_data, 1.0f);
+ auto op_generator = [&scales](mir::Graph &g,
+ const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::ResizeOp>(
+ inputs[0], mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales);
+ };
+ createAndRunTestGraph(op_generator, resize, input_ntensors, input_atensor);
+ }
+}
+
+TEST(cpp_operations_test, avgpool)
+{
+ // Iterate over window width, window height
+ // channels
+ // stride width, stride height
+ // size 3 is chosen to cover all cases, where width bigger/smaller then height and equal/not equal
+ // to 1
+ using iT = int32_t;
+ for (iT windowH = 1; windowH <= 3; ++windowH)
+ for (iT windowW = 1; windowW <= 3; ++windowW)
+ for (iT channels = 1; channels <= 2; ++channels)
+ for (iT stride_h = 1; stride_h <= 3; ++stride_h)
+ for (iT stride_w = 1; stride_w <= 3; ++stride_w)
+ {
+ vector<int> shape_data{3, 5, 7, static_cast<int>(channels)};
+ vector<int32_t> window_size{windowH, windowW};
+ vector<int32_t> strides{stride_h, stride_w};
+ Tensor input_atensor;
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
+ fillTensors(input_ntensors[0], input_atensor, shape_data, 1.0f);
+
+ mir::AvgPool2DOpAttributes attributes;
+ attributes.window = window_size;
+ attributes.strides = strides;
+ for (const auto include_pad : {false, true})
+ {
+ attributes.include_pad = include_pad;
+ auto op_generator = [&attributes](
+ mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::AvgPool2DOp>(inputs[0], attributes);
+ };
+
+ createAndRunTestGraph(op_generator, avgPool, input_ntensors, input_atensor);
+ }
+ }
+}
+
+TEST(cpp_operations_test, maxpool)
+{
+ // Iterate over window width, window height
+ // channels
+ // stride width, stride height
+ // size 3 is chosen to cover all cases, where width bigger/smaller then height and equal/not equal
+ // to 1
+ using iT = int32_t;
+ for (iT windowH = 1; windowH <= 3; ++windowH)
+ for (iT windowW = 1; windowW <= 3; ++windowW)
+ for (iT channels = 1; channels <= 2; ++channels)
+ for (iT stride_h = 1; stride_h <= 3; ++stride_h)
+ for (iT stride_w = 1; stride_w <= 3; ++stride_w)
+ {
+ vector<int> shape_data{3, 5, 7, static_cast<int>(channels)};
+ vector<int32_t> window_size{windowH, windowW};
+ vector<int32_t> strides{stride_h, stride_w};
+ Tensor input_atensor;
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
+ fillTensors(input_ntensors[0], input_atensor, shape_data, 1.0f);
+
+ auto op_generator = [&window_size, &strides](
+ mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ mir::MaxPool2DOpAttributes attributes;
+ attributes.window = window_size;
+ attributes.strides = strides;
+ return g.create<mir::ops::MaxPool2DOp>(inputs[0], attributes);
+ };
+
+ createAndRunTestGraph(op_generator, maxPool, input_ntensors, input_atensor);
+ }
+}
+
+TEST(cpp_operations_test, relu)
+{
+ // test prerequisites
+ vector<int> shape_data{2, 3, 4, 5};
+ Tensor input_atensor;
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
+ fillTensors(input_ntensors[0], input_atensor, shape_data, 1.0f);
+ auto op_generator = [](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::ReluOp>(inputs[0]);
+ };
+
+ createAndRunTestGraph(op_generator, relu, input_ntensors, input_atensor);
+}
+
+TEST(cpp_operations_test, leaky_relu)
+{
+ // test prerequisites
+ vector<int> shape_data{2, 3, 4, 5};
+ Tensor input_atensor;
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
+ fillTensors(input_ntensors[0], input_atensor, shape_data, 1.0f);
+ auto op_generator = [](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::LeakyReluOp>(inputs[0], 0.1);
+ };
+
+ createAndRunTestGraph(op_generator, leakyRelu, input_ntensors, input_atensor);
+}
+
+TEST(cpp_operations_test, sigmoid)
+{
+ // test prerequisites
+ vector<int> shape_data{2, 3, 4, 5};
+ Tensor input_atensor;
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
+ fillTensors(input_ntensors[0], input_atensor, shape_data, 1.0f);
+ auto opGenerator = [](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::SigmoidOp>(inputs[0]);
+ };
+
+ createAndRunTestGraph(opGenerator, sigmoid, input_ntensors, input_atensor);
+}
+
+TEST(cpp_operations_test, elu)
+{
+ // test prerequisites
+ vector<int> shape_data{2, 3, 4, 5};
+ Tensor input_atensor;
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
+ fillTensors(input_ntensors[0], input_atensor, shape_data, 1.0f);
+ auto op_generator = [](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::EluOp>(inputs[0], 1);
+ };
+
+ createAndRunTestGraph(op_generator, elu, input_ntensors, input_atensor);
+}
+
+TEST(cpp_operations_test, tanh)
+{
+ // test prerequisites
+ vector<int> shape_data{2, 3, 4, 5};
+ Tensor input_atensor;
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
+ fillTensors(input_ntensors[0], input_atensor, shape_data, 1.0f);
+ auto op_generator = [](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::TanhOp>(inputs[0]);
+ };
+
+ createAndRunTestGraph(op_generator, tanhActivation, input_ntensors, input_atensor);
+}
+
+TEST(cpp_operations_test, reduceMeanTst)
+{
+ // test prerequisites
+ // different test cases
+ std::vector<int> test_axis_list[] = {{2, 3}, {1}, {0}, {2}, {3}, {0, 2}, {1, 2, 3}};
+ for (const vector<int> &axis_list : test_axis_list)
+ {
+ for (const bool keep_dims : {true, false})
+ {
+ vector<int> input_shape_data{2, 3, 4, 5};
+ Tensor input_atensor;
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
+ fillTensors(input_ntensors[0], input_atensor, input_shape_data, 1.0f);
+ auto op_generator = [&axis_list, keep_dims](
+ mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ auto op = g.create<mir::ops::ReduceMeanOp>(inputs[0], axis_list, keep_dims);
+ return op;
+ };
+
+ createAndRunTestGraph(op_generator, reduceMean, input_ntensors, input_atensor);
+ }
+ }
+}
+
+TEST(cpp_operations_test, softmax)
+{
+ // iterate over number of dimensions in tensor
+ for (int num_dims = 1; num_dims <= 4; ++num_dims)
+ {
+ // test prerequisites
+ vector<int> shape_data{2, 3, 4, 5};
+ shape_data.resize(num_dims);
+ int axis = num_dims - 1;
+ Tensor input_atensor;
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
+ fillTensors(input_ntensors[0], input_atensor, shape_data, 1.0f);
+ auto op_generator = [axis](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::SoftmaxOp>(inputs[0], axis);
+ };
+
+ createAndRunTestGraph(op_generator, softmax, input_ntensors, input_atensor);
+ }
+}
+
+TEST(cpp_operations_test, slice4d)
+{
+ vector<int> shape_data{5, 30, 40, 12};
+ vector<int> starts[] = {{0, 0, 0, 0}, {1, 1, 1, 1}, {1, 0, 1, 0}, {0, 1, 1, 0}};
+ vector<int> sizes[] = {
+ {-1, -1, -1, -1}, {4, -1, 10, -1},
+ };
+ for (auto st : starts)
+ {
+ for (auto sz : sizes)
+ {
+ Tensor input_atensor;
+ vector<unique_ptr<mir::TensorVariant>> input_n_tensor(1);
+ fillTensors(input_n_tensor[0], input_atensor, shape_data, 1.0f);
+ auto op_gen = [st, sz](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::SliceOp>(inputs[0], mir::Shape(st), mir::Shape(sz));
+ };
+ createAndRunTestGraph(op_gen, slice, input_n_tensor, input_atensor);
+ }
+ }
+}
+
+TEST(cpp_operations_test, reshape)
+{
+ // test prerequisites
+ vector<int> input_shape_data{2, 3, 4, 5};
+ vector<int> output_shape_data{1, 120};
+ mir::Shape output_nshape;
+ fillNShape(output_nshape, output_shape_data);
+ Tensor input_atensor;
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
+ fillTensors(input_ntensors[0], input_atensor, input_shape_data, 1.0f);
+ auto op_generator = [&output_nshape](mir::Graph &g,
+ const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::ReshapeOp>(inputs[0], output_nshape);
+ };
+
+ createAndRunTestGraph(op_generator, reshape, input_ntensors, input_atensor);
+}
+
+TEST(cpp_operations_test, abs)
+{
+ // test prerequisites
+ vector<int> shape_data{2, 3, 4, 5};
+ Tensor input_atensor;
+ vector<unique_ptr<mir::TensorVariant>> input_ntensor(1);
+ fillTensors(input_ntensor[0], input_atensor, shape_data, 1.0f);
+ auto op_generator = [](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::AbsOp>(inputs[0]);
+ };
+ createAndRunTestGraph(op_generator, absFN, input_ntensor, input_atensor);
+}
+
+TEST(cpp_operations_test, sqrt)
+{
+ // test prerequisites
+ vector<int> shape_data{2, 3, 4, 5};
+ Tensor input_atensor;
+ vector<unique_ptr<mir::TensorVariant>> input_ntensor(1);
+ fillTensors(input_ntensor[0], input_atensor, shape_data, 1.0f);
+ auto op_generator = [](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::SqrtOp>(inputs[0]);
+ };
+ createAndRunTestGraph(op_generator, sqrtFN, input_ntensor, input_atensor);
+}
+
+TEST(cpp_operations_test, pad)
+{
+ // test on matrix 2x3
+ vector<int> input_shape{2, 3};
+
+ Tensor input_atensor;
+ vector<unique_ptr<mir::TensorVariant>> input_ntensor(1);
+ fillTensors(input_ntensor[0], input_atensor, input_shape, 1.0f);
+ // PadOp params
+ mir::PadOpAttributes attributes;
+ attributes.padding_before = {1, 2};
+ attributes.padding_after = {1, 2};
+
+ auto op_generator = [&attributes](mir::Graph &g,
+ const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::PadOp>(inputs[0], attributes);
+ };
+
+ createAndRunTestGraph(op_generator, pad, input_ntensor, input_atensor);
+}
+
+TEST(cpp_operations_test, transpose)
+{
+ // test transpose for 4 dims tensors
+ vector<int> input_shape_4d{2, 3, 4, 5};
+ Tensor input_atensor_4d;
+ vector<unique_ptr<mir::TensorVariant>> input_ntensor_4d(1);
+ fillTensors(input_ntensor_4d[0], input_atensor_4d, input_shape_4d, 1.0f);
+
+ vector<size_t> test_cases_pack_4d[] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {3, 2, 1, 0}};
+ for (const auto &permute : test_cases_pack_4d)
+ {
+ auto op_generator = [&permute](mir::Graph &g,
+ const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::TransposeOp>(inputs[0], permute);
+ };
+ createAndRunTestGraph(op_generator, transpose, input_ntensor_4d, input_atensor_4d);
+ }
+
+ // test transpose for 3 dims tensors
+ vector<int> input_shape_3d{3, 4, 5};
+ Tensor input_atensor_3d;
+ vector<unique_ptr<mir::TensorVariant>> input_ntensor_3d(1);
+ fillTensors(input_ntensor_3d[0], input_atensor_3d, input_shape_3d, 1.0f);
+ vector<size_t> test_cases_pack_3d[] = {{0, 1, 2}, {1, 0, 2}, {2, 1, 0}};
+ for (const auto &permute : test_cases_pack_3d)
+ {
+ auto op_generator = [&permute](mir::Graph &g,
+ const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::TransposeOp>(inputs[0], permute);
+ };
+ createAndRunTestGraph(op_generator, transpose, input_ntensor_3d, input_atensor_3d);
+ }
+}
+
+TEST(cpp_operation_test, broadcast)
+{
+ const mir::Shape target_shapes[] = {{6}, {2, 3}, {2, 3, 1}, {1, 2, 1, 3}};
+ for (const mir::Shape &target_shape : target_shapes)
+ {
+ vector<int> input_shape_data{};
+ vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
+ Tensor input_atensor;
+ fillTensors(input_ntensors[0], input_atensor, input_shape_data, 1.0f);
+ auto op_generator = [&target_shape](mir::Graph &g,
+ const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::BroadcastOp>(inputs[0], target_shape);
+ };
+ createAndRunTestGraph(op_generator, broadcast, input_ntensors, input_atensor);
+ }
+}
diff --git a/compiler/nnc/unittests/soft_backend/Generator.cpp b/compiler/nnc/unittests/soft_backend/Generator.cpp
new file mode 100644
index 000000000..dce700348
--- /dev/null
+++ b/compiler/nnc/unittests/soft_backend/Generator.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backends/soft_backend/CPPGenerator.h"
+#include "mir/ops/ReluOp.h"
+
+#include <gtest/gtest.h>
+
+#include <fstream>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <cstdio>
+#include <ftw.h>
+
+using namespace std;
+
+using namespace nnc;
+using namespace mir;
+
+static bool isFileExists(const string &path)
+{
+ ifstream f(path);
+ return f.good();
+}
+
+static void deleteFile(const string &path)
+{
+ int res = remove(path.c_str());
+ assert(!res && "failed to remove file");
+ (void)res;
+}
+
+int removeRec(const char *fpath, const struct stat * /*sb*/, int /*typeflag*/,
+ struct FTW * /*ftwbuf*/)
+{
+ deleteFile(fpath);
+ return 0;
+}
+
+static void deleteDir(const string &path)
+{
+ int res = nftw(path.c_str(), removeRec, 1, FTW_DEPTH | FTW_PHYS);
+ assert(!res && "failed to remove dir");
+ (void)res;
+}
+
+static void checkOutputExists(const string &common_path)
+{
+ ASSERT_TRUE(isFileExists(common_path + ".h"));
+ ASSERT_TRUE(isFileExists(common_path + ".cpp"));
+ ASSERT_TRUE(isFileExists(common_path + ".params"));
+}
+
+static void emptyFile(const string &path) { ofstream of(path); }
+
+TEST(Generator, check_generator_call)
+{
+// assume here that c++ and c code generators behave identically in terms of parameters check
+// test only c++ generator
+#define TEST_DIR "output_dir"
+#define TEST_NAME "someName"
+#define BASE_NAME TEST_DIR "/" TEST_NAME
+
+ mir::Graph g;
+ mir::TensorType input_type{mir::DataType::FLOAT32, Shape{1, 2, 3, 4}};
+ Operation::Output *input = g.create<ops::InputOp>(input_type)->getOutput(0);
+ input->setName("input");
+ Operation *output = g.create<ops::ReluOp>(input);
+
+ // test that generator creates output dir and files
+ if (isFileExists(TEST_DIR))
+ deleteDir(TEST_DIR);
+ assert(!isFileExists(TEST_DIR) && "remove output dir");
+ CPPCodeGenerator cpp_code_generator(TEST_DIR, TEST_NAME);
+ cpp_code_generator.run(&g);
+ checkOutputExists(BASE_NAME);
+
+ // test that generator creates output files in existing empty dir
+ deleteFile(BASE_NAME ".h");
+ deleteFile(BASE_NAME ".cpp");
+ deleteFile(BASE_NAME ".params");
+ cpp_code_generator.run(&g);
+ checkOutputExists(BASE_NAME);
+
+ // test that generator rewrites existing files
+ emptyFile(BASE_NAME ".h");
+ struct stat sBefore, sAfter;
+ int res = stat(BASE_NAME ".h", &sBefore);
+ assert(res == 0);
+ (void)res;
+ assert(sBefore.st_size == 0);
+ cpp_code_generator.run(&g);
+ res = stat(BASE_NAME ".h", &sAfter);
+ assert(res == 0);
+
+ ASSERT_NE(sBefore.st_size, sAfter.st_size);
+
+ deleteDir(TEST_DIR);
+}
diff --git a/compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp b/compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp
new file mode 100644
index 000000000..d38385e91
--- /dev/null
+++ b/compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModelAnalyzer.h"
+#include "mir/Graph.h"
+#include "mir/ops/InputOp.h"
+#include "mir/ops/ReluOp.h"
+#include "mir/ops/ConcatOp.h"
+
+#include <gtest/gtest.h>
+
+using namespace std;
+using namespace nnc;
+using namespace mir;
+using namespace sir;
+
+static const CallFunction *getCall(const unique_ptr<Action> &ptr)
+{
+ return dynamic_cast<const CallFunction *>(ptr.get());
+}
+
+/*
+ * This test designed to check basic layout properties of Model analyzer
+ */
+TEST(ModelAnalyzer, linearization)
+{
+ mir::Graph g;
+ /*
+ * Create graph:
+ * [input]
+ * / \
+ * | |
+ * V V
+ * [head1] [head2]
+ * | |
+ * V V
+ * [tail1] [tail2]
+ * \ /
+ * \ /
+ * [join]
+ */
+ mir::TensorType input_type{mir::DataType::FLOAT32, Shape{1, 2, 3}};
+ Operation *input = g.create<ops::InputOp>(input_type);
+ Operation *head1 = g.create<ops::ReluOp>(input->getOutput(0));
+ Operation *head2 = g.create<ops::ReluOp>(input->getOutput(0));
+ Operation *tail1 = g.create<ops::ReluOp>(head1->getOutput(0));
+ Operation *tail2 = g.create<ops::ReluOp>(head2->getOutput(0));
+ vector<mir::Operation::Output *> concat_inputs{tail1->getOutput(0), tail2->getOutput(0)};
+ Operation *join = g.create<ops::ConcatOp>(concat_inputs, 0);
+ input->getOutput(0)->setName("input");
+ head1->getOutput(0)->setName("head1");
+ head2->getOutput(0)->setName("head2");
+ tail1->getOutput(0)->setName("tail2");
+ tail2->getOutput(0)->setName("tail2");
+ join->getOutput(0)->setName("join");
+
+ // Check that layout is desired
+ ModelAnalyzer ma;
+ ma.analyze(&g);
+ const auto &seq = ma.getInferenceSequence();
+ ASSERT_EQ(seq.size(), 6u);
+
+ vector<Operation *> op_seq(seq.size());
+ transform(seq.cbegin(), seq.cend(), op_seq.begin(),
+ [](const unique_ptr<sir::Action> &action) { return getCall(action)->mirOp; });
+
+ vector<Operation *> valid_seq1{input, head1, tail1, head2, tail2, join};
+ vector<Operation *> valid_seq2{input, head2, tail2, head1, tail1, join};
+ ASSERT_TRUE(op_seq == valid_seq1 || op_seq == valid_seq2);
+}
diff --git a/compiler/nnc/unittests/support/CMakeLists.txt b/compiler/nnc/unittests/support/CMakeLists.txt
new file mode 100644
index 000000000..d236159ce
--- /dev/null
+++ b/compiler/nnc/unittests/support/CMakeLists.txt
@@ -0,0 +1,4 @@
+set(CL_SOURCES ${NNC_SUPPORT_DIR}/CommandLine.cpp)
+set(CL_TESTS CommandLineTest.cpp)
+
+nnc_add_unit_test(nnc_support_commandline_test ${CL_TESTS} ${CL_SOURCES})
diff --git a/compiler/nnc/unittests/support/CommandLineTest.cpp b/compiler/nnc/unittests/support/CommandLineTest.cpp
new file mode 100644
index 000000000..73f77aa20
--- /dev/null
+++ b/compiler/nnc/unittests/support/CommandLineTest.cpp
@@ -0,0 +1,271 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "support/CommandLine.h"
+
+#include "gtest/gtest.h"
+
+using namespace nnc::cli;
+
+void soption_checker1(const Option<std::string> &opt) { ASSERT_EQ(opt, "SOME_VALUE1,SOME_VALUE2"); }
+
+void soption_checker2(const Option<std::string> &opt) { ASSERT_EQ(opt, "AAA_VALUE"); }
+
+void boption_checker(const Option<bool> &opt) { ASSERT_EQ(opt, false); }
+
+/**
+ * declare command line options for testing
+ */
+//
+// declare string option
+//
+// test option with several names
+Option<std::string> SMultiOpt(optname("--multopt, -m, -mul"),
+ overview("description of option with several names"));
+// test option with single name
+Option<std::string> SSingleOpt(optname("--single"),
+ overview("description of option with single name"));
+// test option with several separators
+Option<std::string> SSeveralSepOpt(optname("-several_separators"),
+ overview("description of option with several separators"), "",
+ optional(false), optvalues(""), soption_checker1,
+ separators("=, :"));
+// test option with one separator
+Option<std::string> SOneSepOpt(optname("--one_separarot"),
+ overview("description of option with one separator"), "",
+ optional(false), optvalues(""), soption_checker2, separators("="));
+// test option with defalut value
+Option<std::string> SDefaultOpt(optname("-default_val_opt"),
+ overview("description of option with default value"),
+ "DEFAULT_VALUE");
+// test optional option
+Option<std::string> SOptionalOpt(optname("--optional_opt"),
+ overview("description of optional option"), "", optional(true));
+// test option with valid values
+Option<std::string> SValidValsOpt(optname("-valid_opt"),
+ overview("description of option with valid values"), "",
+ optional(true), optvalues("value1, value2"));
+
+//
+// declare integer options
+//
+// test option with negative value and valid values
+Option<int32_t> NNegOpt(optname("-neg_val"),
+ overview("description of integer option with negative value"), -1,
+ optional(false), optvalues("-42, -33"));
+
+// test option with default negative value
+Option<int32_t>
+ NDefaultNegOpt(optname("-default_neg_val"),
+ overview("description of integer option with default negative value"), -33);
+// test option with positive values
+Option<uint32_t> NPosOpt(optname("-pos_val"),
+ overview("description of integer option with positive value"), 1,
+ optional(false), optvalues("42, 33"));
+
+//
+// declare char options
+//
+Option<char> CharOpt(optname("-char-opt"), overview("description of char option"), '\0',
+ optional(false), optvalues("a, b"));
+
+Option<char> DashOpt(optname("-dash_opt"), overview("description of char option with dash value"),
+ '\0', optional(false), optvalues("-"));
+
+//
+// declare bool option
+//
+Option<bool> BoolOpt(optname("-bool_opt"), overview("description of bool option"), true,
+ optional(false), optvalues(""), boption_checker, separators("="));
+Option<bool> BoolOpt2(optname("-bool-opt2"), overview("description of bool option with value"));
+
+//
+// declare vector<string> option
+//
+Option<std::vector<std::string>> VecStrOpt1(optname("-vec_opt1"),
+ overview("description of vector option"));
+Option<std::vector<std::string>> VecStrOpt2(optname("-vec_opt2"),
+ overview("description of vector option"));
+Option<std::vector<std::string>> VecStrOptWithVals(optname("--vec_opt_with_vals"),
+ overview("description of vector option"),
+ std::vector<std::string>(), optional(false),
+ optvalues("abc, 123, xxx"));
+//
+// declare options in group
+//
+//
+// declare bool option
+//
+Option<bool> GroupOpt1(optname("-group_opt1"), overview("description of group option"), true,
+ optional(false), optvalues(""), nullptr, separators(""), showopt(true),
+ IOption::Group::caffe2);
+Option<std::string> GroupOpt2(optname("-group_opt2"), overview("description of group option"),
+ std::string(), optional(true), optvalues(""), nullptr, separators(""),
+ showopt(true), IOption::Group::caffe2);
+Option<int32_t> GroupOpt3(optname("-group_opt3"), overview("description of group option"), 42,
+ optional(true), optvalues(""), nullptr, separators(""), showopt(true),
+ IOption::Group::onnx);
+
+// test options
+TEST(SUPPORT_NNC, verify_cl_options)
+{
+ // create command line
+ const char *argv[] = {
+ "CLTest", // program name
+ // string options
+ "-m", "multiopt_value", // second name for option with several names
+ "--single", "single_value", // option with single name
+ "-several_separators:SOME_VALUE1,SOME_VALUE2", // test option with several separators
+ "--one_separarot=AAA_VALUE", // test option whit one separator
+ "-default_val_opt", // test option with default value
+ "--optional_opt", "/home/guest/tmp", // test optional option
+ "-valid_opt", "value2", // test options with defined values
+ // integer options
+ "-neg_val", "-42", // test negative value for integer option
+ "-default_neg_val", // test integer option with default value
+ "-pos_val", "33", // test positive value for integer option
+ // char options
+ "-char-opt", "b", "-dash_opt", "-",
+ // bool options
+ "-bool_opt=false", "-bool-opt2",
+ // vector of strings options
+ "-vec_opt1", "1", "c", "222", "ABC", "857", "-vec_opt2", "--vec_opt_with_vals", "abc", "123",
+ "xxx", "abc", "xxx",
+ // grouped options
+ "-group_opt1", "-group_opt2", "abc", "-group_opt3", "11", nullptr};
+ int argc = (sizeof(argv) / sizeof(argv[0])) - 1;
+
+ // It must be failed if option is not passed and other options are in the same group
+ argv[argc - 5] = "-m"; // disable -group_opt1
+ ASSERT_DEATH(CommandLine::getParser()->parseCommandLine(argc, argv), "");
+ argv[argc - 5] = "-group_opt1"; // enable -group_opt1
+
+ // test when mandatory grouped option is not passed. It must be OK if options being from the same
+ // group are missed
+ argv[argc - 4] = "-m"; // disable -group_opt2
+ argv[argc - 2] = "-m"; // disable -group_opt3
+ CommandLine::getParser()->parseCommandLine(argc, argv);
+ argv[argc - 4] = "-group_opt2"; // enable -group_opt2
+ argv[argc - 2] = "-group_opt3"; // enable -group_opt3
+
+ // parse command line
+ CommandLine::getParser()->parseCommandLine(argc, argv);
+
+ // here we put value from options
+ std::string tmp_string = SMultiOpt;
+ int32_t tmp_sint = NNegOpt;
+ char tmp_char = CharOpt;
+ bool tmp_bool = BoolOpt;
+ std::vector<std::string> tmp_vec = VecStrOpt1;
+
+ //
+ // string options
+ //
+ // check option with several names
+ ASSERT_EQ(SMultiOpt, "multiopt_value");
+ ASSERT_EQ(tmp_string, "multiopt_value");
+ ASSERT_EQ(tmp_string, SMultiOpt);
+
+ // check option with single name
+ tmp_string = SSingleOpt;
+ ASSERT_EQ(SSingleOpt, "single_value");
+ ASSERT_EQ(tmp_string, "single_value");
+ ASSERT_EQ(SSingleOpt, tmp_string);
+
+ // check option with separators
+ ASSERT_EQ(SSeveralSepOpt, "SOME_VALUE1,SOME_VALUE2");
+
+ // check option with one separator
+ ASSERT_EQ(SOneSepOpt, "AAA_VALUE");
+
+ // check option with default value
+ ASSERT_EQ(SDefaultOpt, "DEFAULT_VALUE");
+
+ // check optional option
+ ASSERT_EQ(SOptionalOpt, "/home/guest/tmp");
+
+ //
+ // integer options
+ //
+ // check option with valid values
+ ASSERT_EQ(SValidValsOpt, "value2");
+
+ // check option with negative value
+ ASSERT_EQ(NNegOpt, -42);
+ ASSERT_EQ(tmp_sint, -42);
+ ASSERT_EQ(tmp_sint, NNegOpt);
+
+ // check integer option with default value
+ tmp_sint = NDefaultNegOpt;
+ ASSERT_EQ(NDefaultNegOpt, -33);
+ ASSERT_EQ(tmp_sint, -33);
+ ASSERT_EQ(NDefaultNegOpt, tmp_sint);
+
+ // check integer option with positive value
+ ASSERT_EQ(NPosOpt, 33u);
+
+ //
+ // char options
+ //
+ ASSERT_EQ(CharOpt, 'b');
+ ASSERT_EQ(tmp_char, 'b');
+ ASSERT_EQ(tmp_char, CharOpt);
+
+ tmp_char = DashOpt;
+ ASSERT_EQ(DashOpt, '-');
+ ASSERT_EQ(tmp_char, '-');
+ ASSERT_EQ(DashOpt, tmp_char);
+
+ //
+ // bool options
+ //
+ ASSERT_EQ(BoolOpt, false);
+ ASSERT_EQ(tmp_bool, false);
+ ASSERT_EQ(tmp_bool, BoolOpt);
+
+ tmp_bool = BoolOpt2;
+ ASSERT_EQ(BoolOpt2, true);
+ ASSERT_EQ(tmp_bool, true);
+ ASSERT_EQ(BoolOpt2, tmp_bool);
+
+ //
+ // vector of strings options
+ //
+ ASSERT_EQ(tmp_vec, VecStrOpt1);
+ ASSERT_EQ(VecStrOpt1[0], "1");
+ ASSERT_EQ(tmp_vec[1], "c");
+ ASSERT_EQ(VecStrOpt1[2], "222");
+ ASSERT_EQ(tmp_vec[3], "ABC");
+ ASSERT_EQ(VecStrOpt1[4], "857");
+
+ tmp_vec = VecStrOpt2;
+ ASSERT_EQ(VecStrOpt2, tmp_vec);
+ ASSERT_TRUE(VecStrOpt2.empty());
+
+ ASSERT_EQ(VecStrOptWithVals[0], "abc");
+ ASSERT_EQ(VecStrOptWithVals[1], "123");
+ ASSERT_EQ(VecStrOptWithVals[2], "xxx");
+ ASSERT_EQ(VecStrOptWithVals[3], "abc");
+ ASSERT_EQ(VecStrOptWithVals[4], "xxx");
+
+ //
+ // grouped options
+ //
+ ASSERT_TRUE(GroupOpt1.isGrouped() && GroupOpt2.isGrouped() && GroupOpt3.isGrouped());
+ ASSERT_EQ(GroupOpt1.getGroup(), GroupOpt2.getGroup());
+ ASSERT_NE(GroupOpt2.getGroup(), GroupOpt3.getGroup());
+ ASSERT_EQ(GroupOpt3.getGroupName(), "onnx");
+}
diff --git a/compiler/nnc/unittests/transformations/CMakeLists.txt b/compiler/nnc/unittests/transformations/CMakeLists.txt
new file mode 100644
index 000000000..eb3303a98
--- /dev/null
+++ b/compiler/nnc/unittests/transformations/CMakeLists.txt
@@ -0,0 +1,4 @@
+set(TESTS_TRANSFORMATIONS_SRC Switcher.cpp)
+
+nnc_add_unit_test(nnc_transformations_test ${TESTS} ${TESTS_TRANSFORMATIONS_SRC})
+optional_target_link_libraries(nnc_transformations_test nnc_transformations mir)
diff --git a/compiler/nnc/unittests/transformations/Switcher.cpp b/compiler/nnc/unittests/transformations/Switcher.cpp
new file mode 100644
index 000000000..049ac44cd
--- /dev/null
+++ b/compiler/nnc/unittests/transformations/Switcher.cpp
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "passes/transformations/DataFormatSwitcher.h"
+
+#include "mir/ops/AvgPool2DOp.h"
+#include "mir/ops/Conv2DOp.h"
+#include "mir/ops/Deconv2DOp.h"
+#include "mir/ops/DepthwiseConv2DOp.h"
+#include "mir/ops/MaxPool2DOp.h"
+#include "mir/ops/TransposeOp.h"
+
+TEST(TRANSFORMATIONS, Switcher_Conv2D_NCHW2NHWC)
+{
+ mir::Graph g;
+ mir::TensorType input_type{mir::DataType::FLOAT32, {1, 3, 299, 299}};
+ auto *input = g.create<mir::ops::InputOp>(input_type);
+
+ mir::TensorType kernel_type{mir::DataType::FLOAT32, {3, 32, 3, 3}};
+ auto *kernel = g.create<mir::ops::InputOp>(kernel_type);
+ // Conv2DOp
+ mir::Conv2DOpAttributes attributes;
+ attributes.strides = {2, 5};
+ attributes.padding_before = {8, 1};
+ attributes.padding_after = {7, 9};
+ attributes.data_format = mir::DataFormat::NCHW;
+ auto *conv = g.create<mir::ops::Conv2DOp>(input->getOutput(0), kernel->getOutput(0), attributes);
+
+ auto *output = g.create<mir::ops::OutputOp>(conv->getOutput(0));
+
+ nnc::DataFormatSwitcher switcher(mir::DataFormat::NHWC);
+
+ switcher.run(&g);
+
+ auto *trans_out = output->getInput(0)->getNode();
+ auto *conv_ = trans_out->getInput(0)->getNode();
+ auto *trans_in = conv_->getInput(0)->getNode();
+ auto *input_ = trans_in->getInput(0)->getNode();
+
+ ASSERT_EQ(trans_out->getType(), mir::Operation::Type::transpose);
+ ASSERT_NE(conv_, conv);
+ ASSERT_EQ(trans_in->getType(), mir::Operation::Type::transpose);
+ ASSERT_EQ(input_, input);
+
+ auto &in_axis_order = dynamic_cast<mir::ops::TransposeOp *>(trans_in)->getAxisOrder();
+ auto &out_axis_order = dynamic_cast<mir::ops::TransposeOp *>(trans_out)->getAxisOrder();
+
+ ASSERT_EQ(in_axis_order.size(), 4);
+ ASSERT_EQ(in_axis_order, std::vector<size_t>({0, 2, 3, 1}));
+
+ ASSERT_EQ(out_axis_order.size(), 4);
+ ASSERT_EQ(out_axis_order, std::vector<size_t>({0, 3, 1, 2}));
+ // Check Conv2D params
+ auto *nhwc_conv = dynamic_cast<mir::ops::Conv2DOp *>(conv_);
+ ASSERT_EQ(nhwc_conv->getDataFormat(), mir::DataFormat::NHWC);
+ ASSERT_EQ(nhwc_conv->getStrides(), std::vector<int32_t>({2, 5}));
+ ASSERT_EQ(nhwc_conv->getPaddingBefore(), std::vector<int32_t>({8, 1}));
+ ASSERT_EQ(nhwc_conv->getPaddingAfter(), std::vector<int32_t>({7, 9}));
+}
+
+TEST(TRANSFORMATIONS, Switcher_DWConv2D_NHWC2NCHW)
+{
+ mir::Graph g;
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, {1, 112, 112, 32}};
+ auto *input = g.create<mir::ops::InputOp>(input_type);
+
+ mir::TensorType kernel_type{mir::DataType::FLOAT32, {3, 3, 32, 3}};
+ auto *kernel = g.create<mir::ops::InputOp>(kernel_type);
+ // DepthwiseConv2DOp
+ mir::Conv2DOpAttributes attributes;
+ attributes.strides = {3, 25};
+ attributes.padding_before = {67, 123};
+ attributes.padding_after = {32, 356};
+ auto *dw_conv =
+ g.create<mir::ops::DepthwiseConv2DOp>(input->getOutput(0), kernel->getOutput(0), attributes);
+
+ auto *output = g.create<mir::ops::OutputOp>(dw_conv->getOutput(0));
+
+ nnc::DataFormatSwitcher switcher(mir::DataFormat::NCHW);
+
+ switcher.run(&g);
+
+ auto *trans_out = output->getInput(0)->getNode();
+ auto *dw_conv_ = trans_out->getInput(0)->getNode();
+ auto *trans_in = dw_conv_->getInput(0)->getNode();
+ auto *input_ = trans_in->getInput(0)->getNode();
+
+ ASSERT_EQ(trans_out->getType(), mir::Operation::Type::transpose);
+ ASSERT_NE(dw_conv_, dw_conv);
+ ASSERT_EQ(trans_in->getType(), mir::Operation::Type::transpose);
+ ASSERT_EQ(input_, input);
+
+ auto &in_axis_order = dynamic_cast<mir::ops::TransposeOp *>(trans_in)->getAxisOrder();
+ auto &out_axis_order = dynamic_cast<mir::ops::TransposeOp *>(trans_out)->getAxisOrder();
+
+ ASSERT_EQ(in_axis_order.size(), 4);
+ ASSERT_EQ(in_axis_order, std::vector<size_t>({0, 3, 1, 2}));
+
+ ASSERT_EQ(out_axis_order.size(), 4);
+ ASSERT_EQ(out_axis_order, std::vector<size_t>({0, 2, 3, 1}));
+ // Check DepthwiseConv2D params
+ auto *nhwc_dw_conv = dynamic_cast<mir::ops::DepthwiseConv2DOp *>(dw_conv_);
+ ASSERT_EQ(nhwc_dw_conv->getDataFormat(), mir::DataFormat::NCHW);
+ ASSERT_EQ(nhwc_dw_conv->getStrides(), std::vector<int32_t>({3, 25}));
+ ASSERT_EQ(nhwc_dw_conv->getPaddingBefore(), std::vector<int32_t>({67, 123}));
+ ASSERT_EQ(nhwc_dw_conv->getPaddingAfter(), std::vector<int32_t>({32, 356}));
+}
+
+TEST(TRANSFORMATIONS, Switcher_DeConv2D_NHWC2NCHW)
+{
+ mir::Graph g;
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, {1, 112, 112, 32}};
+ auto *input = g.create<mir::ops::InputOp>(input_type);
+
+ mir::TensorType kernel_type{mir::DataType::FLOAT32, {3, 3, 3, 32}};
+ auto *kernel = g.create<mir::ops::InputOp>(kernel_type);
+ // DeConv2DOp
+
+ mir::Deconv2DOpAttributes attributes;
+ attributes.strides = {255, 54};
+ attributes.padding_before = {31, 72};
+ attributes.padding_after = {32, 71};
+ auto *deconv =
+ g.create<mir::ops::DeConv2DOp>(input->getOutput(0), kernel->getOutput(0), attributes);
+
+ auto *output = g.create<mir::ops::OutputOp>(deconv->getOutput(0));
+
+ nnc::DataFormatSwitcher switcher(mir::DataFormat::NCHW);
+
+ switcher.run(&g);
+
+ auto *trans_out = output->getInput(0)->getNode();
+ auto *deconv_ = trans_out->getInput(0)->getNode();
+ auto *trans_in = deconv_->getInput(0)->getNode();
+ auto *input_ = trans_in->getInput(0)->getNode();
+
+ ASSERT_EQ(trans_out->getType(), mir::Operation::Type::transpose);
+ ASSERT_NE(deconv_, deconv);
+ ASSERT_EQ(trans_in->getType(), mir::Operation::Type::transpose);
+ ASSERT_EQ(input_, input);
+
+ auto &in_axis_order = dynamic_cast<mir::ops::TransposeOp *>(trans_in)->getAxisOrder();
+ auto &out_axis_order = dynamic_cast<mir::ops::TransposeOp *>(trans_out)->getAxisOrder();
+
+ ASSERT_EQ(in_axis_order.size(), 4);
+ ASSERT_EQ(in_axis_order, std::vector<size_t>({0, 3, 1, 2}));
+
+ ASSERT_EQ(out_axis_order.size(), 4);
+ ASSERT_EQ(out_axis_order, std::vector<size_t>({0, 2, 3, 1}));
+ // Check DeConv2D params
+ auto *nhwc_deconv = dynamic_cast<mir::ops::DeConv2DOp *>(deconv_);
+ ASSERT_EQ(nhwc_deconv->getDataFormat(), mir::DataFormat::NCHW);
+ ASSERT_EQ(nhwc_deconv->getStrides(), std::vector<int32_t>({255, 54}));
+ ASSERT_EQ(nhwc_deconv->getPaddingBefore(), std::vector<int32_t>({31, 72}));
+ ASSERT_EQ(nhwc_deconv->getPaddingAfter(), std::vector<int32_t>({32, 71}));
+}
+
+TEST(TRANSFORMATIONS, Switcher_AvgPool2D_NHWC2NCHW)
+{
+ mir::Graph g;
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, {1, 112, 112, 32}};
+ auto *input = g.create<mir::ops::InputOp>(input_type);
+ // AvgPool2DOp
+ mir::AvgPool2DOpAttributes attributes;
+ attributes.window = {41, 54};
+ attributes.strides = {22, 53};
+ attributes.padding_before = {11, 36};
+ attributes.padding_after = {38, 45};
+ auto *avg_pool = g.create<mir::ops::AvgPool2DOp>(input->getOutput(0), attributes);
+
+ auto *output = g.create<mir::ops::OutputOp>(avg_pool->getOutput(0));
+
+ nnc::DataFormatSwitcher switcher(mir::DataFormat::NCHW);
+
+ switcher.run(&g);
+
+ auto *trans_out = output->getInput(0)->getNode();
+ auto *avg_pool_ = trans_out->getInput(0)->getNode();
+ auto *trans_in = avg_pool_->getInput(0)->getNode();
+ auto *input_ = trans_in->getInput(0)->getNode();
+
+ ASSERT_EQ(trans_out->getType(), mir::Operation::Type::transpose);
+ ASSERT_NE(avg_pool_, avg_pool);
+ ASSERT_EQ(trans_in->getType(), mir::Operation::Type::transpose);
+ ASSERT_EQ(input_, input);
+
+ auto &in_axis_order = dynamic_cast<mir::ops::TransposeOp *>(trans_in)->getAxisOrder();
+ auto &out_axis_order = dynamic_cast<mir::ops::TransposeOp *>(trans_out)->getAxisOrder();
+
+ ASSERT_EQ(in_axis_order.size(), 4);
+ ASSERT_EQ(in_axis_order, std::vector<size_t>({0, 3, 1, 2}));
+
+ ASSERT_EQ(out_axis_order.size(), 4);
+ ASSERT_EQ(out_axis_order, std::vector<size_t>({0, 2, 3, 1}));
+ // Check AvgPool2D params
+ auto *nhwc_avg_pool = dynamic_cast<mir::ops::AvgPool2DOp *>(avg_pool_);
+ ASSERT_EQ(nhwc_avg_pool->getDataFormat(), mir::DataFormat::NCHW);
+ ASSERT_EQ(nhwc_avg_pool->getWindowSize(), std::vector<int32_t>({41, 54}));
+ ASSERT_EQ(nhwc_avg_pool->getStrides(), std::vector<int32_t>({22, 53}));
+ ASSERT_EQ(nhwc_avg_pool->getPaddingBefore(), std::vector<int32_t>({11, 36}));
+ ASSERT_EQ(nhwc_avg_pool->getPaddingAfter(), std::vector<int32_t>({38, 45}));
+ ASSERT_EQ(nhwc_avg_pool->getIncludePad(), true);
+}
+
+TEST(TRANSFORMATIONS, Switcher_MaxPool2D_NCHW2NHWC)
+{
+ mir::Graph g;
+
+ mir::TensorType input_type{mir::DataType::FLOAT32, {1, 3, 299, 299}};
+ auto *input = g.create<mir::ops::InputOp>(input_type);
+
+ mir::TensorType kernel_type{mir::DataType::FLOAT32, {3, 32, 3, 3}};
+ auto *kernel = g.create<mir::ops::InputOp>(kernel_type);
+ // MaxPool2DOp
+ mir::MaxPool2DOpAttributes attributes;
+ attributes.window = {41, 54};
+ attributes.strides = {22, 53};
+ attributes.padding_before = {11, 36};
+ attributes.padding_after = {38, 45};
+ attributes.data_format = mir::DataFormat::NCHW;
+ auto *max_pool = g.create<mir::ops::MaxPool2DOp>(input->getOutput(0), attributes);
+
+ auto *output = g.create<mir::ops::OutputOp>(max_pool->getOutput(0));
+
+ nnc::DataFormatSwitcher switcher(mir::DataFormat::NHWC);
+
+ switcher.run(&g);
+
+ auto *trans_out = output->getInput(0)->getNode();
+ auto *max_pool_ = trans_out->getInput(0)->getNode();
+ auto *trans_in = max_pool_->getInput(0)->getNode();
+ auto *input_ = trans_in->getInput(0)->getNode();
+
+ ASSERT_EQ(trans_out->getType(), mir::Operation::Type::transpose);
+ ASSERT_NE(max_pool_, max_pool);
+ ASSERT_EQ(trans_in->getType(), mir::Operation::Type::transpose);
+ ASSERT_EQ(input_, input);
+
+ auto &in_axis_order = dynamic_cast<mir::ops::TransposeOp *>(trans_in)->getAxisOrder();
+ auto &out_axis_order = dynamic_cast<mir::ops::TransposeOp *>(trans_out)->getAxisOrder();
+
+ ASSERT_EQ(in_axis_order.size(), 4);
+ ASSERT_EQ(in_axis_order, std::vector<size_t>({0, 2, 3, 1}));
+
+ ASSERT_EQ(out_axis_order.size(), 4);
+ ASSERT_EQ(out_axis_order, std::vector<size_t>({0, 3, 1, 2}));
+ // Check MaxPool2D params
+ auto *nhwc_max_pool = dynamic_cast<mir::ops::MaxPool2DOp *>(max_pool_);
+ ASSERT_EQ(nhwc_max_pool->getDataFormat(), mir::DataFormat::NHWC);
+ ASSERT_EQ(nhwc_max_pool->getWindowSize(), std::vector<int32_t>({41, 54}));
+ ASSERT_EQ(nhwc_max_pool->getStrides(), std::vector<int32_t>({22, 53}));
+ ASSERT_EQ(nhwc_max_pool->getPaddingBefore(), std::vector<int32_t>({11, 36}));
+ ASSERT_EQ(nhwc_max_pool->getPaddingAfter(), std::vector<int32_t>({38, 45}));
+}
diff --git a/compiler/nnc/utils/CMakeLists.txt b/compiler/nnc/utils/CMakeLists.txt
new file mode 100644
index 000000000..e5d96d005
--- /dev/null
+++ b/compiler/nnc/utils/CMakeLists.txt
@@ -0,0 +1,7 @@
+# dumpers of NN models
+add_subdirectory(tflite_dot_dumper)
+add_subdirectory(caffe_dot_dumper)
+add_subdirectory(caffe2_dot_dumper)
+
+# input tensors generator
+add_subdirectory(input_gen)
diff --git a/compiler/nnc/utils/caffe2_dot_dumper/CMakeLists.txt b/compiler/nnc/utils/caffe2_dot_dumper/CMakeLists.txt
new file mode 100644
index 000000000..0bde55394
--- /dev/null
+++ b/compiler/nnc/utils/caffe2_dot_dumper/CMakeLists.txt
@@ -0,0 +1,6 @@
+if (NOT TARGET mir_caffe2_importer)
+ return()
+endif()
+
+add_executable(caffe2_model_dumper model_dump.cpp)
+target_link_libraries(caffe2_model_dumper PRIVATE nnc_support mir_caffe2_importer)
diff --git a/compiler/nnc/utils/caffe2_dot_dumper/model_dump.cpp b/compiler/nnc/utils/caffe2_dot_dumper/model_dump.cpp
new file mode 100644
index 000000000..13b5cfb37
--- /dev/null
+++ b/compiler/nnc/utils/caffe2_dot_dumper/model_dump.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "support/CommandLine.h"
+#include "mir/IrDotDumper.h"
+
+#include <caffe2_importer.h>
+
+#include <exception>
+#include <iostream>
+
+using namespace nnc;
+using namespace mir;
+
+int main(int argc, const char **argv)
+{
+ cli::Option<std::string> predict_net(cli::optname("--predict-net"),
+ cli::overview("Path to the model"));
+ cli::Option<std::string> init_net(cli::optname("--init-net"),
+ cli::overview("Path to the weights"));
+ cli::Option<std::vector<int>> input_shape(cli::optname("--input-shape"),
+ cli::overview("Shape of the input"));
+ cli::CommandLine::getParser()->parseCommandLine(argc, argv);
+
+ try
+ {
+ // FIXME: caffe2 input shapes are not provided by model and must be set from cli
+ auto graph = mir_caffe2::loadModel(predict_net, init_net, {input_shape});
+ dumpGraph(graph.get(), std::cout);
+ }
+ catch (std::exception &e)
+ {
+ std::cout << "Error: " << e.what() << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/compiler/nnc/utils/caffe_dot_dumper/CMakeLists.txt b/compiler/nnc/utils/caffe_dot_dumper/CMakeLists.txt
new file mode 100644
index 000000000..780dece20
--- /dev/null
+++ b/compiler/nnc/utils/caffe_dot_dumper/CMakeLists.txt
@@ -0,0 +1,6 @@
+if (NOT TARGET mir_caffe_importer)
+ return ()
+endif()
+
+add_executable(caffe_model_dumper model_dump.cpp)
+target_link_libraries(caffe_model_dumper PRIVATE nnc_support mir_caffe_importer)
diff --git a/compiler/nnc/utils/caffe_dot_dumper/model_dump.cpp b/compiler/nnc/utils/caffe_dot_dumper/model_dump.cpp
new file mode 100644
index 000000000..6dab885f8
--- /dev/null
+++ b/compiler/nnc/utils/caffe_dot_dumper/model_dump.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "support/CommandLine.h"
+#include "mir/IrDotDumper.h"
+
+#include <caffe_importer.h>
+
+#include <exception>
+#include <iostream>
+
+using namespace nnc;
+using namespace mir;
+
+int main(int argc, const char **argv)
+{
+ cli::Option<std::string> model_path(cli::optname("--model"), cli::overview("Path to the model"));
+ cli::CommandLine::getParser()->parseCommandLine(argc, argv);
+
+ try
+ {
+ auto graph = mir_caffe::loadModel(model_path);
+ dumpGraph(graph.get(), std::cout);
+ }
+ catch (std::exception &e)
+ {
+ std::cout << "Error: " << e.what() << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/compiler/nnc/utils/caffe_model_maker/AllFill.sh b/compiler/nnc/utils/caffe_model_maker/AllFill.sh
new file mode 100755
index 000000000..93e38d1d7
--- /dev/null
+++ b/compiler/nnc/utils/caffe_model_maker/AllFill.sh
@@ -0,0 +1,48 @@
+#!/bin/sh
+: '
+Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'
+
+
+#Fills all models and writes errors
+usage () {
+ echo "Filler.sh should be in the working directory\nusage:
+ no args - assumes current directory
+ -d=<dir> fills models in <dir>
+ Example:
+ $(basename $0) -d='./foobar/'"
+}
+
+DIR="./"
+for i in "$@"
+do
+ case $i in
+ -h|--help|help)
+ usage
+ exit 1
+ ;;
+ -d=*)
+ DIR=${i#*=}
+ ;;
+ esac
+ shift
+done
+echo $DIR
+if [ $# -eq 0 ]; then
+ echo "Assume working directory"
+fi
+for a in `ls $DIR*.prototxt`; do
+ ./Filler.sh $a
+done 2>error.log
diff --git a/compiler/nnc/utils/caffe_model_maker/Filler.sh b/compiler/nnc/utils/caffe_model_maker/Filler.sh
new file mode 100755
index 000000000..963edbfb3
--- /dev/null
+++ b/compiler/nnc/utils/caffe_model_maker/Filler.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+: '
+Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'
+
+#Fills $1 with random weights
+if [ $# -eq 0 ]
+ then
+ echo "usage:\n $(basename $0) foo.prototxt"
+ exit 1
+fi
+FN=$1
+NOEXT=${FN%%.*} # filename without the extension
+mkdir $NOEXT
+caffegen init < $FN > $NOEXT/filled.prototxt
+caffegen encode < $NOEXT/filled.prototxt > $NOEXT/model.caffemodel
diff --git a/compiler/nnc/utils/caffe_model_maker/GenerateCaffeModels.py b/compiler/nnc/utils/caffe_model_maker/GenerateCaffeModels.py
new file mode 100755
index 000000000..ca8b3776a
--- /dev/null
+++ b/compiler/nnc/utils/caffe_model_maker/GenerateCaffeModels.py
@@ -0,0 +1,722 @@
+#!/usr/bin/python3
+"""
+Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import caffe
+import numpy as np
+import sys
+import h5py
+from itertools import chain
+from caffe import layers as L
+import random
+import lmdb
+from collections import Counter, OrderedDict
+
+if (len(sys.argv) < 2):
+ dest_folder = ''
+ print('Using current directory as destination folder')
+else:
+ dest_folder = sys.argv[1] + '/'
+
+
+class PH:
+ """
+ PlaceHolder value
+ """
+
+ def __init__(self, type, param):
+ self.type = type
+ self.param = param
+
+
+# Bookkeeping
+LS = 224
+# bynaryProto file for Infogain
+H = np.eye(3, dtype='f4')
+blob = caffe.io.array_to_blobproto(H.reshape((1, 1, 3, 3)))
+with open(dest_folder + 'infogainH.binaryproto', 'wb+') as f:
+ f.write(blob.SerializeToString())
+
+# List of hdf5 files
+with open(dest_folder + "in", 'w+') as f:
+ f.write('in.hdf5')
+
+#Window File
+with open(dest_folder + "in_winds", 'w+') as f:
+ f.write("""# 1
+in.jpg
+3
+224
+224
+2
+1 0.1 50 50 60 70
+1 0.9 30 30 50 50
+# 2
+in.jpg
+3
+224
+224
+2
+1 0.1 50 50 70 70
+1 0.9 30 30 50 50
+""")
+
+# HDF5 file for HDF5DataSet
+h5f = h5py.File(dest_folder + "in.hdf5", "w")
+h5f.create_dataset("data", data=np.random.rand(1, 3, LS, LS))
+h5f.close()
+
+# LMDB file
+env = lmdb.open(dest_folder + 'test-lmdb')
+with env.begin(write=True) as txn:
+ img_data = np.random.rand(3, LS, LS)
+ datum = caffe.io.array_to_datum(img_data, label=1)
+ txn.put('{:0>10d}'.format(1).encode('ascii'), datum.SerializeToString())
+env.close()
+
+# recurring parameters
+losspara = {'ignore_label': True, 'normalization': 1, 'normalize': True}
+softmaxpara = {'engine': 0, 'axis': 1}
+gdfil = {'type': 'gaussian', 'std': 0.001}
+cofil = {'type': 'constant', 'value': 0}
+rp = {
+ 'num_output': 1,
+ 'weight_filler': gdfil,
+ 'bias_filler': cofil,
+ 'expose_hidden': True
+}
+
+filler_par = {
+ 'type': 'constant',
+ 'value': 0,
+ 'min': 0,
+ 'max': 1,
+ 'mean': 0,
+ 'std': 1,
+ 'sparse': -1, # -1 means no sparsification
+ 'variance_norm': 0
+} # 0 = FAN_IN, 1 = FAN_OUT, 2 = AVERAGE
+
+OPS = [
+ ('Parameter', {
+ 'shape': {
+ 'dim': [1]
+ },
+ "is_data": True
+ }), # ok
+ (
+ 'Data',
+ {
+ 'source': 'test-lmdb', # FIXME: unknown DB backend
+ 'batch_size': 1,
+ 'rand_skip': 0,
+ 'backend': 1, # 0 = LEVELDB, 1 = LMDB
+ 'scale': 1.0, # deprecated in favor of TransformationParameter
+ 'mean_file': 'wtf.is_that',
+ 'crop_size': 0,
+ 'mirror': False,
+ 'force_encoded_color': False,
+ 'prefetch': 4,
+ "is_data": True
+ }),
+ (
+ 'DummyData',
+ {
+ 'data_filler': cofil, # ok
+ #'num' : [1,1,1], # deprecated shape specification
+ #'channels' : [2,2,2],
+ #'height' : [3,3,3],
+ #'width' : [4,4,4]},
+ 'shape': {
+ 'dim': [1, 3, LS, LS]
+ },
+ "is_data": True
+ }),
+ (
+ 'ImageData',
+ {
+ 'source': 'in_imgs', # file with list of imgs
+ 'top': 'op2',
+ 'batch_size': 1,
+ 'rand_skip': 0,
+ 'shuffle': False,
+ 'new_height': 0,
+ 'new_width': 0,
+ 'is_color': True,
+ 'root_folder': '',
+ 'scale': 1.0, # deprecated in favor of TransformationParameter
+ 'mirror': False,
+ "is_data": True
+ }),
+ (
+ 'WindowData',
+ {
+ 'source': 'in_winds',
+ 'top': 'op2',
+ 'batch_size': 1,
+ 'mean_file': 'in.jpg',
+ 'transform_param': {
+ 'scale': 0.8,
+ 'crop_size': 24,
+ 'mirror': False,
+ #'fg_treshold' : 0.5,
+ #'bg_treshold' : 0.5,
+ #'fg_fraction' : 0.25,
+ },
+ 'context_pad': 1,
+ 'crop_mode': 'warp',
+ 'cache_images': True,
+ 'root_folder': './',
+ "is_data": True
+ }),
+ (
+ 'HDF5Data',
+ {
+ 'source': 'in', # This is the name of the file WITH HDF5 FILENAMES 0_0
+ # Top should have the same name as the dataset in the hdf5 file
+ # FIXME Requires Caffegen to be built with Caffe that supports LMDB
+ 'batch_size': 1,
+ 'shuffle': False,
+ "is_data": True
+ }),
+ ('Input', {
+ 'shape': {
+ 'dim': [1, 2, 3, 4]
+ },
+ "is_data": True
+ }), # ok
+ (
+ 'MemoryData',
+ {
+ 'batch_size': 1, # ok
+ 'channels': 2,
+ 'height': 3,
+ 'width': 4,
+ 'top': "foo",
+ "is_data": True
+ }),
+
+ ## Regular OPS
+ (
+ "Convolution",
+ {
+ 'num_output': 64, # ok
+ 'kernel_size': 9,
+ 'stride': 1,
+ 'pad': 0,
+ 'weight_filler': gdfil,
+ 'param': [{
+ 'lr_mult': 1
+ }, {
+ 'lr_mult': 0.1
+ }],
+ 'bias_filler': cofil
+ }),
+
+ # Depthvise conv
+ (
+ "Convolution",
+ {
+ 'num_output': 12, # ok
+ 'kernel_size': 9,
+ 'stride': 1,
+ 'dilation': 2,
+ 'group': 3,
+ 'pad': 0,
+ 'weight_filler': gdfil,
+ 'param': [{
+ 'lr_mult': 1
+ }, {
+ 'lr_mult': 0.1
+ }],
+ 'bias_filler': cofil
+ }),
+ (
+ "Deconvolution",
+ {
+ 'convolution_param': # ok
+ {
+ 'num_output': 4,
+ 'kernel_size': 9,
+ 'stride': 1,
+ 'pad': 0,
+ 'weight_filler': gdfil,
+ 'bias_filler': cofil
+ }
+ }),
+ # Depthvise deconv
+ (
+ "Deconvolution",
+ {
+ 'convolution_param': # ok
+ {
+ 'num_output': 12,
+ 'kernel_size': 9,
+ 'stride': 1,
+ 'dilation': 2,
+ 'group': 3,
+ 'pad': 0,
+ 'weight_filler': gdfil,
+ 'bias_filler': cofil
+ }
+ }),
+ (
+ 'BatchNorm',
+ {
+ 'eps': 1e-5, # ok
+ 'moving_average_fraction': 0.999
+ }),
+ (
+ 'LRN',
+ {
+ 'alpha': 1., # ok
+ 'beta': 0.75,
+ 'norm_region': 1,
+ 'local_size': 5,
+ 'k': 1,
+ 'engine': 0
+ }),
+ # local_size[default 5]: the number of channels to sum over
+ # alpha[default 1]: the scaling paramete
+ # beta[default5]: the exponent
+ # norm_region[default ACROSS_CHANNLS]: whether to sum over adjacent channels(ACROSS_CHANNLS) or nearby
+ # spatial locations(WITHIN_CHANNLS)
+ # `input / (1 + (\alpha/n) \sum_i x_i^2)^\beta`
+ (
+ "MVN",
+ {
+ 'normalize_variance': True, # ok
+ 'across_channels': False,
+ 'eps': 1e-9
+ }),
+ (
+ 'Im2col',
+ {
+ 'convolution_param': # ok
+ {
+ 'num_output': 64,
+ 'kernel_size': 9,
+ 'stride': 1,
+ 'pad': 0,
+ 'weight_filler': gdfil,
+ # 'param' : [{'lr_mult':1},{'lr_mult':0.1}],
+ 'bias_filler': cofil
+ }
+ }),
+ ('Dropout', {
+ 'dropout_ratio': 0.5
+ }), # ok
+ ('Split', {}), # ok
+ ('Concat', {
+ 'axis': 1
+ }), # ok
+ (
+ 'Tile',
+ {
+ 'axis': 1, # ok
+ 'tiles': 2
+ }),
+ ('Slice', {
+ 'axis': 1,
+ 'top': 'op2',
+ 'slice_point': 1
+ }),
+ (
+ 'Reshape',
+ {
+ 'shape': {
+ 'dim': [1, 0, -1]
+ }, # ok
+ 'axis': 0,
+ 'num_axes': -1
+ }),
+ # reshapes only [axis, axis + num_axes] if those aren't 0 and -1; axis can be negative
+ # 0 in shape means retaining dim size, -1 means auto size
+ (
+ 'Flatten',
+ {
+ 'axis': 1, # ok
+ 'end_axis': -1
+ }),
+ (
+ 'Pooling',
+ {
+ 'pool': 0, # ok # pool: 0 = MAX, 1 = AVE, 2 = STOCHASTIC
+ 'pad': 0, # can be replaced with pad_w, pad_h
+ 'kernel_size': 3, # can be replaced with kernel_w, kernel_h
+ 'stride': 1, # can be replaced with stride_w, stride_h
+ 'engine': 0,
+ 'global_pooling': False
+ }),
+ # 'round_mode' : 0}), # 0 = CELS, 1 = FLOOR
+ (
+ 'Reduction',
+ {
+ 'operation': 1, # ok # 1 = SUM, 2 = ASUM, 3 = SUMSQ, 4 = MEAN # ok
+ 'axis': 0,
+ 'coeff': 1.0
+ }),
+ (
+ 'SPP',
+ {
+ 'pyramid_height': 1, # ok
+ 'pool': 0,
+ 'engine': 0
+ }),
+ (
+ 'InnerProduct',
+ {
+ 'num_output': 2, # ok
+ 'bias_term': True,
+ 'weight_filler': filler_par,
+ 'bias_filler': filler_par,
+ 'axis': 1,
+ 'transpose': False
+ }),
+ (
+ 'Embed',
+ {
+ 'num_output': 2, # ok
+ 'input_dim': 1,
+ 'bias_term': True,
+ 'weight_filler': filler_par,
+ 'bias_filler': filler_par
+ }),
+ (
+ 'ArgMax',
+ {
+ 'out_max_val': False, # ok # if True, outputs pairs (argmax, maxval) # ok
+ 'top_k': 1,
+ 'axis': -1
+ }),
+ (
+ 'Softmax',
+ {
+ 'engine': 0, # ok
+ 'axis': 1
+ }),
+ (
+ 'ReLU',
+ {
+ 'negative_slope': 0, # ok
+ 'engine': 0
+ }),
+ (
+ 'PReLU',
+ {
+ 'filler': filler_par, # ok
+ 'channel_shared': False
+ }),
+ ('ELU', {
+ 'alpha': 1
+ }), # ok
+ ('Sigmoid', {
+ 'engine': 0
+ }), # ok
+ ('BNLL', {}), # ok
+ ('TanH', {
+ 'engine': 0
+ }), # ok
+ ('Threshold', {
+ 'threshold': 0
+ }), # ok
+ (
+ 'Bias',
+ {
+ 'axis': 0, # ok
+ 'num_axes': -1,
+ 'filler': filler_par
+ }),
+ (
+ 'Scale',
+ {
+ 'axis': 0, # ok
+ 'num_axes': -1,
+ 'filler': filler_par,
+ 'bias_term': False,
+ 'bias_filler': filler_par
+ }),
+ ('AbsVal', {}), # ok
+ (
+ 'Log',
+ {
+ 'base': -1.0, # ok
+ 'scale': 1.0,
+ 'shift': PH(float, (2.0, 10.0)),
+ 'how_many': 10
+ }), # y = ln(shift + scale * x) (log_base() for base > 0)
+ (
+ 'Power',
+ {
+ 'power': -1.0, # ok
+ 'scale': 1.0,
+ 'shift': 0.0
+ }), # y = (shift + scale * x) ^ power
+ (
+ 'Exp',
+ {
+ 'base': -1.0, # ok
+ 'scale': 1.0,
+ 'shift': 0.0
+ }),
+
+ ## TWO INPUTS
+ (
+ 'Crop',
+ {
+ 'axis': 2, # ok
+ 'offset': [0],
+ "inputs": 2
+ }), # if one offset - for all dims, more - specifies
+ (
+ "Eltwise",
+ {
+ 'operation': 1, # ok
+ 'coeff': [3, 3],
+ 'stable_prod_grad': True,
+ "inputs": 2
+ }),
+ ("EuclideanLoss", {
+ "inputs": 2
+ }), # ok
+ ("HingeLoss", {
+ 'norm': 1,
+ "inputs": 2
+ }), # L1 = 1; L2 = 2; # ok
+ ("SigmoidCrossEntropyLoss", {
+ 'loss_param': losspara,
+ "inputs": 2
+ }), # ok
+
+ ## TWO Inputs, special shape
+ (
+ "Accuracy",
+ {
+ 'top_k': 1, # FIXME: different bottom shapes needed
+ 'axis': 0,
+ 'ignore_label': 0,
+ "inputs": 2,
+ "special_shape": [1, 3, 1, 1]
+ }),
+ (
+ "SoftmaxWithLoss",
+ {
+ 'loss_param': losspara, # FIXME: different bottom shapes needed
+ 'softmax_param': softmaxpara,
+ "inputs": 2,
+ "special_shape": [1, 1, 1, 1]
+ }),
+ ("MultinomialLogisticLoss", {
+ 'loss_param': losspara,
+ "inputs": 2,
+ "special_shape": [1, 1, 1, 1]
+ }), # FIXME: different bottom shapes needed
+ ("Filter", {
+ "inputs": 2,
+ "special_shape": [1, 1, 1, 1]
+ }), # FIXME: different bottom shapes needed
+ ('BatchReindex', {
+ "inputs": 2,
+ "special_shape": [2]
+ }), # takes indices as second blob
+ ("InfogainLoss", {
+ 'source': 'infogainH.binaryproto',
+ 'axis': 1,
+ "inputs": 2,
+ "special_shape": [1, 1, 1, 1]
+ }),
+ (
+ 'Python',
+ {
+ 'python_param': # Custom Loss layer
+ {
+ 'module': 'Pyloss', # the module name -- usually the filename -- that needs to be in $PYTHONPATH
+ 'layer': 'EuclideanLossLayer', # the layer name -- the class name in the module
+ 'share_in_parallel': False
+ },
+ # set loss weight so Caffe knows this is a loss layer.
+ # since PythonLayer inherits directly from Layer, this isn't automatically
+ # known to Caffe
+ 'loss_weight': 1,
+ "inputs": 2,
+ "special_shape": [1, 3, 1, 1]
+ },
+ ),
+
+ ## NOTOP OPS
+ ('HDF5Output', {
+ 'file_name': 'out.hdf5',
+ "inputs": 2,
+ "is_notop": True
+ }), # ok
+ ('Silence', {
+ "inputs": 2,
+ "is_notop": True
+ }), # ok, need to remove tops
+
+ ## THREE INPUTS
+ ("RNN", {
+ 'recurrent_param': rp,
+ 'top': "out2",
+ "inputs": 3
+ }), # ok
+ ("Recurrent", {
+ 'recurrent_param': rp,
+ 'top': "out2",
+ "inputs": 3
+ }), # ok
+
+ ## FOUR INPUTS
+ ("LSTM", {
+ 'recurrent_param': rp,
+ 'top': ["out2", "out3"],
+ "inputs": 4
+ }), # ok
+
+ ## Handled explicitly (special case)
+ ("ContrastiveLoss", {
+ 'margin': 1.0,
+ 'legacy_version': False
+ }),
+]
+
+#Helper functions
+
+
+def traverse(obj, callback=None):
+ """
+ walks a nested dict/list recursively
+ :param obj:
+ :param callback:
+ :return:
+ """
+ if isinstance(obj, dict):
+ value = {k: traverse(v, callback) for k, v in obj.items()}
+ elif isinstance(obj, list):
+ value = [traverse(elem, callback) for elem in obj]
+ else:
+ value = obj
+
+ if callback is None:
+ return value
+ else:
+ return callback(value)
+
+
+def mock(inp):
+ if not (isinstance(inp, PH)): return inp
+ if inp.type == int:
+ return random.randint(*inp.param)
+ if inp.type == float:
+ return random.uniform(*inp.param)
+
+
+EXTRA_SHAPES = \
+ [(), # alredy defined
+ [1, 3],
+ [1, 3, 1],
+ [1, 3, 1]]
+
+
+class Layer:
+ """
+ Represents a caffe layer
+ """
+
+ def __init__(self, name, params):
+ self.name = name
+ self.args = params
+ if self.args == None: self.args = dict()
+ self.num_inp = self.args.pop("inputs", 1)
+ self.num_out = self.args.pop("outputs", 1)
+ self.special_shape = self.args.pop("special_shape",
+ False) # 2nd input has special shape
+ self.is_data = self.args.pop("is_data", False)
+ self.is_notop = self.args.pop("is_notop", False)
+
+ def make_net(self):
+ """
+ Creates a protobuf network
+ :return:
+ """
+ net = caffe.NetSpec()
+
+ if self.is_data:
+ net.data = getattr(L, self.name)(**self.args)
+
+ # Very special,
+ elif self.name == "ContrastiveLoss":
+ net.data = L.Input(shape={'dim': [1, 4]})
+ net.data1 = L.DummyData(data_filler=cofil, shape={'dim': [1, 4]})
+ net.data2 = L.DummyData(data_filler=cofil, shape={'dim': [1, 1]})
+
+ net.op = getattr(L, self.name)(net.data, net.data1, net.data2, **self.args)
+
+ # this covers most cases
+ else:
+ net.data = L.Input(shape={'dim': [1, 3, LS, LS]})
+ if self.num_inp == 2:
+ net.data1 = L.DummyData(data_filler=cofil, shape={'dim': [1, 3, LS, LS]})
+ elif self.num_inp > 2:
+ for i in range(1, self.num_inp):
+ setattr(
+ net, "data" + str(i),
+ L.DummyData(data_filler=cofil, shape={'dim': EXTRA_SHAPES[i]}))
+ if self.special_shape:
+ net.data = L.Input(shape={'dim': [1, 3, 1, 1]})
+ net.data1 = L.DummyData(
+ data_filler=cofil, shape={'dim': self.special_shape})
+
+ net.op = getattr(L, self.name)(
+ net.data,
+ *[getattr(net, "data" + str(i))
+ for i in range(1, self.num_inp)], **self.args)
+
+ if self.is_notop:
+ net.op.fn.tops = OrderedDict()
+ net.op.fn.ntop = 0 # the messing about in question
+
+ return net
+
+
+class LayerMaker:
+ """
+ Factory class for Layer
+ """
+
+ def __init__(self, params):
+ self.name, self.args = params
+ self.how_many = self.args.pop("how_many", 1)
+
+ def make(self):
+ return [Layer(self.name, traverse(self.args, mock)) for i in range(self.how_many)]
+
+
+layer_gen = chain(*map(lambda para: LayerMaker(para).make(), OPS))
+
+filename = dest_folder + '{}_{}.prototxt'
+
+counter = Counter()
+for layer in layer_gen:
+ n = layer.make_net()
+ counter[layer.name] += 1
+
+ with open(filename.format(layer.name, counter[layer.name] - 1), 'w+') as ptxt_file:
+ print(n.to_proto(), file=ptxt_file)
+
+ if layer.name == "Python": # Special case for python layer
+ with open("Python_0.caffemodel", 'wb+') as caffemodelFile:
+ caffemodelFile.write(n.to_proto().SerializeToString())
diff --git a/compiler/nnc/utils/caffe_model_maker/Pyloss.py b/compiler/nnc/utils/caffe_model_maker/Pyloss.py
new file mode 100755
index 000000000..e3f781759
--- /dev/null
+++ b/compiler/nnc/utils/caffe_model_maker/Pyloss.py
@@ -0,0 +1,83 @@
+"""
+COPYRIGHT
+
+All contributions by the University of California:
+Copyright (c) 2014-2017 The Regents of the University of California (Regents)
+All rights reserved.
+
+All other contributions:
+Copyright (c) 2014-2017, the respective contributors
+All rights reserved.
+
+Caffe uses a shared copyright model: each contributor holds copyright over
+their contributions to Caffe. The project versioning records all such
+contribution and copyright details. If a contributor wants to further mark
+their specific copyright on a particular contribution, they should indicate
+their copyright solely in the commit message of the change when it is
+committed.
+
+LICENSE
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+CONTRIBUTION AGREEMENT
+
+By contributing to the BVLC/caffe repository through pull-request, comment,
+or otherwise, the contributor releases their content to the
+license and copyright terms herein.
+"""
+import caffe
+import numpy as np
+
+
+class EuclideanLossLayer(caffe.Layer):
+ """
+ Compute the Euclidean Loss in the same manner as the C++ EuclideanLossLayer
+ to demonstrate the class interface for developing layers in Python.
+ """
+
+ def setup(self, bottom, top):
+ # check input pair
+ if len(bottom) != 2:
+ raise Exception("Need two inputs to compute distance.")
+
+ def reshape(self, bottom, top):
+ # check input dimensions match
+ if bottom[0].count != bottom[1].count:
+ raise Exception("Inputs must have the same dimension.")
+ # difference is shape of inputs
+ self.diff = np.zeros_like(bottom[0].data, dtype=np.float32)
+ # loss output is scalar
+ top[0].reshape(1)
+
+ def forward(self, bottom, top):
+ self.diff[...] = bottom[0].data - bottom[1].data
+ top[0].data[...] = np.sum(self.diff**2) / bottom[0].num / 2.
+
+ def backward(self, top, propagate_down, bottom):
+ for i in range(2):
+ if not propagate_down[i]:
+ continue
+ if i == 0:
+ sign = 1
+ else:
+ sign = -1
+ bottom[i].diff[...] = sign * self.diff / bottom[i].num
diff --git a/compiler/nnc/utils/caffe_model_maker/README.md b/compiler/nnc/utils/caffe_model_maker/README.md
new file mode 100644
index 000000000..e34a769a0
--- /dev/null
+++ b/compiler/nnc/utils/caffe_model_maker/README.md
@@ -0,0 +1,22 @@
+# Utils
+Caffe model generation helpers
+
+REQUIRES:
+
+* caffe
+* h5py
+* lmdb
+* numpy
+* caffegen in `$PATH`
+
+`GenerateCaffeModels.py` creates `*.prototxt` files for 1 and 2 layer caffe models
+The generator can create multiple examples of any layer, assuming you add a
+`how_many` field into the layer's dict. You will also need to replace the constants in said dict with `PH(type, param)` values, where `type` is the type of the placeholder variable
+and `params` is a list (or tuple) of paramenters for generating the mock.
+
+For an example of generating multiple instances of a layer see the `Log` layer.
+
+`Filler.sh` fills a single model with random weights by using `caffegen` and creates a dir with a filled `prototxt` and a `caffemodel` binary file. The result directory is located in the same directory as the `prototxt` file
+
+`AllFill.sh` fills all `*.prototxt` files in the current directory or in provided directory
+(-d)
diff --git a/compiler/nnc/utils/def2src.cpp b/compiler/nnc/utils/def2src.cpp
new file mode 100644
index 000000000..32a3e48b5
--- /dev/null
+++ b/compiler/nnc/utils/def2src.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <fstream>
+#include <cassert>
+
+int fileToArray(const std::string &source, const std::string &dest, const std::string &arrName)
+{
+ FILE *fs = fopen(source.c_str(), "rb");
+ if (!fs)
+ {
+ std::cerr << "source file not found: <" << source << ">" << std::endl;
+ return -1;
+ }
+
+ std::ofstream fo(dest.c_str());
+ if (fo.fail())
+ {
+ std::cerr << "cannot generate file: <" << dest << ">" << std::endl;
+ fclose(fs);
+ return -1;
+ }
+
+ std::cout << "generating <" << dest << ">" << std::endl;
+
+ fo << "#ifndef _" << arrName << "_H_" << std::endl;
+ fo << "#define _" << arrName << "_H_" << std::endl;
+
+ fo << "const char " << arrName << "[] = {" << std::endl;
+
+ int is_error = fseek(fs, 0L, SEEK_SET);
+ assert(!is_error);
+ (void)is_error;
+ size_t bytes;
+ do
+ {
+ char buf[1024];
+ bytes = fread(buf, 1, sizeof(buf), fs);
+ assert(!ferror(fs) && "file read error");
+
+ // convert line
+ for (size_t i = 0; i < bytes; i++)
+ {
+ fo << "0x" << std::hex << static_cast<int>(buf[i]) << ", ";
+ }
+ } while (bytes != 0);
+
+ fo << "};" << std::endl;
+
+ fo << std::endl;
+ fo << "#endif /* _" << arrName << "_H_ */" << std::endl;
+
+ fo.flush();
+ fclose(fs);
+
+ return 0;
+}
+
+std::string extractFileName(std::string path)
+{
+ auto pos = path.find_last_of('/');
+ if (pos != std::string::npos)
+ path = path.substr(pos + 1);
+
+ pos = path.find_first_of('.');
+ if (pos != std::string::npos)
+ path = path.substr(0, pos);
+
+ return path;
+}
+
+int main(int argc, char *argv[])
+{
+ if (argc < 3)
+ return -1;
+
+ std::string OutPutDir = argv[1];
+
+ for (int i = 2; i < argc; i++)
+ {
+ std::string sourceFullFileName = argv[i];
+ std::string filename = extractFileName(sourceFullFileName);
+ // NOLINTNEXTLINE (performance-inefficient-string-concatenation)
+ std::string outputFileName = OutPutDir + "/" + filename + ".generated.h";
+
+ if (fileToArray(sourceFullFileName, outputFileName, filename) != 0)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/compiler/nnc/utils/infer_tests/README.md b/compiler/nnc/utils/infer_tests/README.md
new file mode 100644
index 000000000..aa6dd5f58
--- /dev/null
+++ b/compiler/nnc/utils/infer_tests/README.md
@@ -0,0 +1,9 @@
+These scripts can be useful for developing/testing nnc. Usage and purpose of the scripts can be found in comments in their source code.
+
+Note that these scripts are just development artifacts and are not supposed to go into production in any form.
+
+infer_testcases.py: run inference with `nnkit` on testcases
+res2bin.py: used by infer_testcases.py to convert resulting hdf5 to binary format
+
+'testcases' folder structure:
+At the moment we use the following structure: a folder for a model contains 'models' and 'testcases' subfolders. The 'models' subfolder contains model that we run inference on, 'testcases' subfolder contains a 'testcase*' folder for each different testcase. Each of those folders in turn contain 'input' with a '.JPEG' file (and '.hdf5' and '.dat' files after running `jpeg2hdf5` script), and 'output' folder where inference results are stored.
diff --git a/compiler/nnc/utils/infer_tests/infer_testcases.py b/compiler/nnc/utils/infer_tests/infer_testcases.py
new file mode 100755
index 000000000..fab887c1a
--- /dev/null
+++ b/compiler/nnc/utils/infer_tests/infer_testcases.py
@@ -0,0 +1,154 @@
+from __future__ import print_function
+import sys
+import glob
+import subprocess
+import res2bin
+import datetime
+
+# This script uses nnkit to run inference for given model on a given data
+# Messages are printed to stderr
+# Usage:
+# -b - specifies path to nnkit build folder, inside which tools/run is located
+# -f - specifies framework ('tfl' for tflite or 'caf' for caffe) that the model belogs to
+# -t - specifies path to testcase folder (see it's structure in readme)
+# -p - allow some sort of parallelism by processing only a subset of files,
+# you need to specify number of processes and run as much of them
+# manually with diferent numbers
+# -r - infer all testcases regardless of whether the result files are present
+# last argument(s) is the model to infer
+#
+# Example of usage:
+# python3 infer_testcases.py -f tfl -b /mnt/nncc_ci/nncc_new/build/contrib/nnkit -t /mnt/nncc_ci/images/inc_slim/testcases/ -p 10 1 -r /mnt/nncc_ci/images/inc_slim/models/inception_v3_2018.tflite
+#
+
+helpstr = "Expected arguments: -b <path_to_nnkit>" + \
+ "-f (tfl | caf) " + \
+ "-t <testcases_dir> " + \
+ "[-p <nporc> <proc_num>] " + \
+ "[-r] " + \
+ "(<tflite_model_file> | <caffe_prototxt_model> <caffe_caffemodel_file>)"
+
+
+def eprint(*args, **kwargs):
+ print(*args, file=sys.stderr, **kwargs)
+
+
+nproc = 1
+proc_num = 1
+min_argc = 8
+
+args = {}
+args['-p'] = (1, 1)
+args['-r'] = False
+
+argc = len(sys.argv)
+for i in range(argc):
+ arg = sys.argv[i]
+ if arg == '-r':
+ args[arg] = True
+ elif arg == '-b' or arg == '-f' or arg == '-t':
+ if i + 1 >= argc:
+ eprint(arg, " is missing it's value")
+ eprint(helpstr)
+ exit()
+ args[arg] = sys.argv[i + 1]
+ elif arg == '-p':
+ min_argc += 3
+ if i + 2 >= argc:
+ eprint(arg, " is missing some of it's values")
+ eprint(helpstr)
+ exit()
+ args[arg] = (int(sys.argv[i + 1]), int(sys.argv[i + 2]))
+ elif arg[0] == '-':
+ print('Unsupported argument: ', arg)
+ exit()
+
+if not ('-b' in args and '-f' in args and '-t' in args):
+ eprint('Some arguments are not provided')
+ eprint(helpstr)
+ exit()
+
+fw = args['-f']
+build_path = args['-b']
+testcases_dir = args['-t']
+nproc, proc_num = args['-p']
+remove_existing = args['-r']
+
+if fw == 'tfl':
+ model = sys.argv[-1]
+ print('Model: ', model)
+elif fw == 'caf':
+ model_proto = sys.argv[-2]
+ model_caffe = sys.argv[-1]
+ print('Models: ', model_proto, model_caffe)
+else:
+ eprint('Unsupported framework:', fw)
+ exit()
+
+eprint('started at', datetime.datetime.now())
+print('Framework: ', fw)
+print('Path to nnkit: ', build_path)
+print('Testcases folder: ', testcases_dir)
+
+hdf_suffix = '.hdf5'
+bin_suffix = '.dat'
+
+
+def get_command_caf(infilename, outfilename, proto, caffemodel):
+ return [
+ build_path + "/tools/run/nnkit-run", "--pre",
+ build_path + "/actions/HDF5/libnnkit_HDF5_import_action.so", "--pre-arg",
+ infilename, "--backend", build_path + "/backends/caffe/libnnkit_caffe_backend.so",
+ "--backend-arg", proto, "--backend-arg", caffemodel, "--post",
+ build_path + "/actions/HDF5/libnnkit_HDF5_export_action.so", "--post-arg",
+ outfilename
+ ]
+
+
+def get_command_tfl(infilename, outfilename, model_file):
+ return [
+ build_path + "/tools/run/nnkit-run", "--pre",
+ build_path + "/actions/HDF5/libnnkit_HDF5_import_action.so", "--pre-arg",
+ infilename, "--backend",
+ build_path + "/backends/tflite/libnnkit_tflite_backend.so", "--backend-arg",
+ model_file, "--post", build_path + "/actions/builtin/libnnkit_show_action.so",
+ "--post", build_path + "/actions/HDF5/libnnkit_HDF5_export_action.so",
+ "--post-arg", outfilename
+ ]
+
+
+testcase_num = 0
+testcases = glob.glob(testcases_dir + '/testcase*')
+
+#testcases = [t
+# for t in testcases
+# if remove_existing
+# or len(glob.glob(t + '/output/output' + hdf_suffix)) == 0
+# or len(glob.glob(t + '/output/output' + bin_suffix)) == 0]
+testcases = testcases[proc_num - 1::nproc]
+
+testcases.sort()
+for testcase in testcases:
+ testcase_num += 1
+ try:
+ infile = glob.glob(testcase + '/input/*' + hdf_suffix)
+ if len(infile) > 0:
+ infile = infile[0]
+ outfile = testcase + '/output/output' + hdf_suffix
+ outfile_bin = testcase + '/output/output' + bin_suffix
+ if len(glob.glob(outfile)) == 0 or remove_existing:
+ if fw == 'tfl':
+ command = get_command_tfl(infile, outfile, model)
+ elif fw == 'caf':
+ command = get_command_caf(infile, outfile, model_proto, model_caffe)
+ #subprocess.call(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+ subprocess.call(command)
+ if len(glob.glob(outfile_bin)) == 0 or remove_existing:
+ res2bin.res2bin(outfile, outfile_bin)
+ eprint(testcase_num, "/", len(testcases))
+ else:
+ eprint(testcase, ': input not found')
+ except:
+ eprint(testcase, 'failed')
+
+eprint('ended at', datetime.datetime.now())
diff --git a/compiler/nnc/utils/infer_tests/res2bin.py b/compiler/nnc/utils/infer_tests/res2bin.py
new file mode 100755
index 000000000..0c21848d9
--- /dev/null
+++ b/compiler/nnc/utils/infer_tests/res2bin.py
@@ -0,0 +1,33 @@
+import numpy as np
+import h5py
+import struct
+import sys
+
+# This script takes hdf5 file and unfolds it in a vector of float values
+# which is then writen in binary format to a given file
+# This is used by infer_testcases.py
+
+
+def res2bin(infilename, outfilename):
+ # print("Input filename: ", infilename)
+ # print("Output filename: " , outfilename)
+
+ f = h5py.File(infilename)
+ dset = f[list(f.keys())[0]]
+
+ vals = np.zeros(np.shape(dset), dtype='float32')
+ for i in range(np.size(dset, 0)):
+ vals[i, :] = np.asarray(dset[i], dtype='float32')
+ vals = list(np.reshape(vals, (vals.size)))
+
+ with open(outfilename, 'wb') as outfile:
+ outfile.write(struct.pack('f' * len(vals), *vals))
+
+
+if __name__ == '__main__':
+ argc = len(sys.argv)
+ if (argc > 2):
+ res2bin(sys.argv[1], sys.argv[2])
+ else:
+ print("Not enough arguments, expected: hdf5 filename, output filename")
+ exit()
diff --git a/compiler/nnc/utils/input_gen/CMakeLists.txt b/compiler/nnc/utils/input_gen/CMakeLists.txt
new file mode 100644
index 000000000..57734fe6c
--- /dev/null
+++ b/compiler/nnc/utils/input_gen/CMakeLists.txt
@@ -0,0 +1,9 @@
+find_package(HDF5 COMPONENTS CXX QUIET)
+
+if(NOT HDF5_FOUND)
+ return()
+endif(NOT HDF5_FOUND)
+
+add_executable(tensor_gen tensor_gen.cpp)
+target_include_directories(tensor_gen PRIVATE ${HDF5_INCLUDE_DIRS})
+target_link_libraries(tensor_gen ${HDF5_CXX_LIBRARIES})
diff --git a/compiler/nnc/utils/input_gen/tensor_gen.cpp b/compiler/nnc/utils/input_gen/tensor_gen.cpp
new file mode 100644
index 000000000..04798c8fb
--- /dev/null
+++ b/compiler/nnc/utils/input_gen/tensor_gen.cpp
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <stdexcept>
+#include <functional>
+#include <algorithm>
+#include <H5Cpp.h>
+
+using namespace std;
+
+class Tensor;
+
+static void iterate(Tensor &tensor, function<void(vector<int> &)> on_loop);
+
+class Tensor
+{
+public:
+ explicit Tensor(const vector<hsize_t> &shape) : _shape(shape), _data(0), _num_elems(1)
+ {
+ _strides.resize(shape.size());
+
+ for (int i = _shape.size() - 1; i >= 0; --i)
+ {
+ _strides[i] = _num_elems;
+ _num_elems *= _shape[i];
+ }
+
+ _data = new float[_num_elems];
+ }
+
+ ~Tensor() { delete[] _data; }
+ int rank() const { return _shape.size(); }
+ int dim(int d) const { return _shape[d]; }
+ float *data() { return _data; }
+ float numElems() const { return _num_elems; }
+
+ float &at(const vector<int> &coords)
+ {
+ int offset = 0;
+
+ for (auto i = 0; i < coords.size(); ++i)
+ offset += coords[i] * _strides[i];
+
+ return _data[offset];
+ }
+
+ Tensor transpose(const vector<hsize_t> &reshape)
+ {
+ vector<hsize_t> tr_shape(_shape.size());
+
+ for (auto i = 0; i < _shape.size(); ++i)
+ tr_shape[i] = _shape[reshape[i]];
+
+ Tensor result(tr_shape);
+ auto on_loop = [this, &reshape, &result](vector<int> &coords) {
+ vector<int> tr_coords(_shape.size());
+
+ for (int i = 0; i < rank(); ++i)
+ tr_coords[i] = coords[reshape[i]];
+
+ result.at(tr_coords) = at(coords);
+ };
+ iterate(*this, on_loop);
+ return result;
+ }
+
+private:
+ vector<hsize_t> _shape;
+ vector<hsize_t> _strides;
+ float *_data;
+ hsize_t _num_elems;
+};
+
+static void fillTensor(Tensor &tensor)
+{
+ int v = 10;
+
+ for (int i = 0; i < tensor.numElems(); ++i)
+ {
+ tensor.data()[i] = v;
+ v += 10;
+ }
+}
+
+static void iterate(Tensor &tensor, function<void(vector<int> &)> on_loop)
+{
+ int num_dims = tensor.rank();
+ vector<int> coords(num_dims, 0);
+ vector<int> dims(num_dims);
+
+ for (int i = 0; i < num_dims; ++i)
+ dims[i] = tensor.dim(i);
+
+ for (;;)
+ {
+ on_loop(coords);
+
+ int i;
+ for (i = num_dims - 1; i >= 0; --i)
+ {
+ if (coords[i] < dims[i] - 1)
+ {
+ ++coords[i];
+ break;
+ }
+ }
+
+ if (i < 0)
+ break;
+
+ fill(coords.begin() + i + 1, coords.end(), 0);
+ }
+}
+
+static void dumpTensor(Tensor &tensor)
+{
+ auto on_loop = [&tensor](vector<int> &coords) {
+ for (int i = 0; i < tensor.rank(); ++i)
+ {
+ if (i > 0)
+ cout << ", ";
+
+ cout << coords[i];
+ }
+
+ cout << " = " << tensor.at(coords) << endl;
+ };
+
+ iterate(tensor, on_loop);
+}
+
+static void writeTensorToDatFile(const string &file_name, Tensor &tensor)
+{
+ ofstream of(file_name + ".dat", ios_base::binary);
+
+ if (of.fail())
+ cout << "Could not output tensor to the: " << file_name + ".dat";
+
+ of.write(reinterpret_cast<char *>(tensor.data()), tensor.numElems() * sizeof(float));
+}
+
+static void writeTensorToHDF5File(const vector<hsize_t> &dimensions, const string &tensor_name,
+ const string &file_name, Tensor &tensor)
+{
+ H5::H5File h5File(file_name + ".hdf5", H5F_ACC_TRUNC);
+ H5::DataSpace dataspace(dimensions.size(), &dimensions[0]);
+ auto dataset = h5File.createDataSet(tensor_name, H5::PredType::IEEE_F32BE, dataspace);
+ dataset.write(tensor.data(), H5::PredType::NATIVE_FLOAT);
+}
+
+int main(int argc, char *argv[])
+{
+ if (argc < 4)
+ {
+ cout << "Usage: " << argv[0] << " <tensor name> <output file name> dim0, dim1, dim2, ..."
+ << endl;
+ cout << "Where: dim0, dim1, dim2, ... are the generated tensor shape dimensions" << endl;
+ return 1;
+ }
+
+ vector<hsize_t> dimensions;
+
+ for (int i = 3; i < argc; ++i)
+ {
+ try
+ {
+ int d = stoi(argv[i]);
+
+ if (d <= 0)
+ {
+ cout << "The dimensions must be positive values. This is not a correct dimension value: "
+ << d << endl;
+ return 1;
+ }
+
+ dimensions.push_back(d);
+ }
+ catch (const invalid_argument &)
+ {
+ cout << "The parameter does not look as an integer value: " << argv[i] << endl;
+ return 1;
+ }
+ catch (const out_of_range &)
+ {
+ cout << "The value is out of the C++ \"int\" type range: " << argv[i] << endl;
+ return 1;
+ }
+ }
+
+ Tensor caffe_tensor(dimensions);
+ fillTensor(caffe_tensor);
+ writeTensorToHDF5File(dimensions, argv[1], "in_" + string(argv[2]) + "_caffe", caffe_tensor);
+
+ vector<hsize_t> tf_reshape{0, 2, 3, 1};
+ Tensor tf_tensor = caffe_tensor.transpose(tf_reshape);
+ writeTensorToDatFile(string(argv[2]) + "_tf", tf_tensor);
+
+ return 0;
+}
diff --git a/compiler/nnc/utils/model_runner/common_place.py b/compiler/nnc/utils/model_runner/common_place.py
new file mode 100755
index 000000000..eb8953455
--- /dev/null
+++ b/compiler/nnc/utils/model_runner/common_place.py
@@ -0,0 +1,70 @@
+import h5py
+import argparse
+from argparse import RawTextHelpFormatter
+
+
+def regular_step():
+ """
+ This function is intended to decompose the necessary steps to obtain information from the command line.
+
+ :return: argparse object, which hold paths to nn model and input data
+ """
+ parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter)
+
+ parser.add_argument(
+ '-m',
+ '--model',
+ help=("specify input file with NN model, \n[depends from model, "
+ " two for caffe and caffe2, one for onnx and tflite]"),
+ nargs='+')
+ parser.add_argument(
+ '-i',
+ '--input',
+ help=(" specify file with neural"
+ " network input data, hdf5 for caffe caffe2 tflite "
+ "and pb for onnx"),
+ required=True)
+ parser.add_argument(
+ '-o',
+ '--output_path',
+ help='here you specify which place will hold your output, default here',
+ default='')
+
+ args = parser.parse_args()
+ # added to check is our input file or not. most simple way
+ try:
+ with open(args.input) as f:
+ pass
+ except IOError as e:
+ print('input file your enter doesnt exist!')
+
+ # added to check is our model right or not
+ try:
+ for i in args.model:
+ with open(i) as f:
+ pass
+ except IOError as e:
+ print('model you enter doesnt exist, write correct PATH ')
+
+ return args
+
+
+def save_result(output_path, output_data):
+ """
+ This function save result of nn working in .hdf5 file
+ :param output_path: you specify directory to store your result
+ :param output_data: information that you write to .hdf5 file
+ :return:
+ """
+ with open(output_path + 'responce.txt', 'w+') as f:
+ f.write(str(output_data))
+ f = h5py.File(output_path + 'responce.hdf5', 'w')
+ f.create_dataset('out', dtype='float32', data=output_data)
+ f.close()
+
+
+def read_input(input_path):
+ h5f = h5py.File(input_path, 'r')
+ for t in h5f:
+ tensorName = str(t)
+ return h5py.File(input_path, 'r')[tensorName][:]
diff --git a/compiler/nnc/utils/model_runner/model_runner_caffe.py b/compiler/nnc/utils/model_runner/model_runner_caffe.py
new file mode 100755
index 000000000..a2e94272c
--- /dev/null
+++ b/compiler/nnc/utils/model_runner/model_runner_caffe.py
@@ -0,0 +1,22 @@
+from common_place import *
+import caffe
+
+
+def run_caffe(model_topology, model_weight, input_path, output_path=''):
+ path = model_topology
+ path_w = model_weight
+
+ net = caffe.Net(path_w, path, caffe.TEST)
+ # TODO get 'data' parameter more universal, blobs contain other names
+ net.blobs['data'].data[...] = read_input(input_path)
+ out = net.forward()
+ all_names = [n for n in net._layer_names]
+ out = out[all_names[-1]]
+ save_result(output_path, out)
+ print(out)
+
+
+if __name__ == '__main__':
+ args = regular_step()
+
+ run_caffe(args.model[0], args.model[1], args.input, args.output_path)
diff --git a/compiler/nnc/utils/model_runner/model_runner_caffe2.py b/compiler/nnc/utils/model_runner/model_runner_caffe2.py
new file mode 100755
index 000000000..0c8feca92
--- /dev/null
+++ b/compiler/nnc/utils/model_runner/model_runner_caffe2.py
@@ -0,0 +1,23 @@
+from common_place import *
+
+from caffe2.python import workspace
+
+
+def run_caffe2(init_net, predict_net, input_path, output_path=''):
+ x = read_input(input_path)
+ with open(init_net, 'rb') as f:
+ init_net = f.read()
+
+ with open(predict_net, 'rb') as f:
+ predict_net = f.read()
+ p = workspace.Predictor(init_net, predict_net)
+ # TODO get 'data' parameter more universal, blobs contain other names
+ results = p.run({'data': x})
+ print(results)
+ save_result(output_path, results)
+
+
+if __name__ == '__main__':
+ args = regular_step()
+
+ run_caffe2(args.model[0], args.model[1], args.input, args.output_path)
diff --git a/compiler/nnc/utils/model_runner/model_runner_onnx.py b/compiler/nnc/utils/model_runner/model_runner_onnx.py
new file mode 100755
index 000000000..6e0e8a657
--- /dev/null
+++ b/compiler/nnc/utils/model_runner/model_runner_onnx.py
@@ -0,0 +1,27 @@
+from common_place import *
+
+import onnx
+import caffe2.python.onnx.backend
+
+
+def run_onnx(model, input_path, output_path=''): #args.model[0] , args.input
+ path = model
+
+ #I'll leave it in case anyone needs to read the .pb file.
+ #proto_arr = onnx.TensorProto()
+ #with open(input_path, 'rb') as f:
+ # proto_arr.ParseFromString(f.read())
+ # input_arr = onnx.numpy_helper.to_array(proto_arr)
+
+ modelFile = onnx.load(path, 'rb')
+ input_arr = read_input(input_path)
+ output = caffe2.python.onnx.backend.run_model(modelFile, input_arr)
+
+ print(output)
+ save_result(output_path, output)
+
+
+if __name__ == '__main__':
+ args = regular_step()
+
+ run_onnx(args.model[0], args.input, args.output_path)
diff --git a/compiler/nnc/utils/model_runner/model_runner_tflite.py b/compiler/nnc/utils/model_runner/model_runner_tflite.py
new file mode 100755
index 000000000..80847b7df
--- /dev/null
+++ b/compiler/nnc/utils/model_runner/model_runner_tflite.py
@@ -0,0 +1,25 @@
+from common_place import *
+import tensorflow as tf
+
+
+def run_tflite(model, input_path, output_path=''):
+ input = read_input(input_path)
+
+ interpreter = tf.contrib.lite.Interpreter(model_path=model)
+ interpreter.allocate_tensors()
+
+ input_details = interpreter.get_input_details()
+ output_details = interpreter.get_output_details()
+ input_data = input
+ interpreter.set_tensor(input_details[0]['index'], input_data)
+
+ interpreter.invoke()
+ output_data = interpreter.get_tensor(output_details[0]['index'])
+ print(output_data)
+ save_result(output_path, output_data)
+
+
+if __name__ == '__main__':
+ args = regular_step()
+
+ run_tflite(args.model[0], args.input, args.output_path)
diff --git a/compiler/nnc/utils/model_runner/readme.md b/compiler/nnc/utils/model_runner/readme.md
new file mode 100644
index 000000000..51ff65b6f
--- /dev/null
+++ b/compiler/nnc/utils/model_runner/readme.md
@@ -0,0 +1,39 @@
+# here I write how I run model on my computer
+
+sections:
+a) goal of this script
+b) examples of code running in author's local machine
+c) parametrs and short comment
+____
+## goal of this script
+
+Here the author has attempted to implement a program capable of running any of the 4 models (caffe, caffe2, tflite, onnx) in a simple and user-friendly manner. The goal of the program is to get the file containing the output of the computation graph at the program output.
+_______
+
+## examples of code running in author's local machine
+The purpose of the examples below is to demonstrate which arguments and in which order you should use to run this script correctly.
+
+caffe:
+```
+$ python3 model_runner.py -m caffe1_runer/inception-v3_ref.caffemodel caffe1_runer/inception-v3_ref.prototxt -i caffe1_runer/ILSVRC2012_val_00000002.JPEG.tfl.hdf5
+```
+caffe2:
+```
+$ python model_runner.py -m caffe2_runer_and_photo/caffe2_models/init_net.pb caffe2_runer_and_photo/caffe2_models/predict_net.pb -i randomInput.hdf5
+```
+tflite:
+```
+$ python model_runner.py -m tflite_runer_and_photo/TST-1-2\ AVARAGE_POOP_2D.tflite -i tflite_runer_and_photo/in.hdf5
+```
+onnx:
+```
+$ python model_runner.py -m onnx_runer/model.onnx -i RANDOM.hdf5
+```
+
+ ------
+
+ ## parametrs and short comment
+
+ -m mean pre learned model which you run
+ -i mean model's input
+
diff --git a/compiler/nnc/utils/prepare_inputs/README.md b/compiler/nnc/utils/prepare_inputs/README.md
new file mode 100644
index 000000000..c11759ee3
--- /dev/null
+++ b/compiler/nnc/utils/prepare_inputs/README.md
@@ -0,0 +1,8 @@
+These scripts can be useful for developing/testing nnc. Usage and purpose of the scripts can be found in comments in their source code.
+
+Note that these scripts are just development artifacts and are not supposed to go into production in any form.
+
+jpeg2hdf5.py: prepare '.hdf5' files from '.JPEG' to be used by nnkit. Can also convert those '.JPEG's to binary format along the way.
+
+'testcases' folder structure:
+At the moment we use the following structure: a folder for a model contains 'models' and 'testcases' subfolders. The 'models' subfolder contains model that we run inference on, 'testcases' subfolder contains a 'testcase*' folder for each different testcase. Each of those folders in turn contain 'input' with a '.JPEG' file (and '.hdf5' and '.dat' files after running `jpeg2hdf5` script), and 'output' folder where inference results are stored.
diff --git a/compiler/nnc/utils/prepare_inputs/jpeg2hdf5.py b/compiler/nnc/utils/prepare_inputs/jpeg2hdf5.py
new file mode 100755
index 000000000..54f12b062
--- /dev/null
+++ b/compiler/nnc/utils/prepare_inputs/jpeg2hdf5.py
@@ -0,0 +1,170 @@
+from PIL import Image
+import numpy as np
+import h5py
+import sys
+import glob
+import subprocess
+import struct
+import datetime
+
+# Generates hdf5 files (and optionally binary files) from JPEGs
+# -f - specifies framework to generate them for
+# -t - specifies testcases directory (see it's structure in readme)
+# -i - specifies input node name of the model that will use them (required by nnkit)
+# -r - if files already exist, rewrites them
+# -b - enable binary file generation
+# -p - allow some sort of parallelism by processing only a subset of files,
+# you need to specify number of processes and run as much of them
+# manually with diferent numbers
+#
+# Example:
+# python3 conv.py -f tfl -t inc_slim/testcases -i input -p 16 1
+#
+
+helpstr = 'Usage: -f (tfl | caf) ' + \
+ '-t <testcases_directory> ' + \
+ '[-i <input_layer_name>] ' + \
+ '[-r] [-b]' + \
+ '[-p <number_of_processes> <process number>]'
+
+supported_frameworks = ['tfl', 'caf']
+args = {}
+# Defaults
+args['-p'] = (1, 1)
+args['-r'] = False
+args['-b'] = False
+
+argc = len(sys.argv)
+for i in range(len(sys.argv)):
+ arg = sys.argv[i]
+ if arg == '-r' or arg == '-b':
+ args[arg] = True
+ elif arg == '-f' or arg == '-t' or arg == '-i':
+ if i + 1 >= argc or sys.argv[i + 1][0] == '-':
+ print(arg, " is missing it's value")
+ print(helpstr)
+ exit()
+ args[arg] = sys.argv[i + 1]
+ elif arg == '-p':
+ if i + 2 >= argc or sys.argv[i + 1][0] == '-' or sys.argv[i + 2][0] == '-':
+ print(arg, " is missing some of it's values")
+ print(helpstr)
+ exit()
+ args[arg] = (int(sys.argv[i + 1]), int(sys.argv[i + 2]))
+ elif arg[0] == '-':
+ print('Unsupported argument: ', arg)
+ exit()
+
+if not ('-f' in args and '-t' in args):
+ print('Some arguments are not provided')
+ print(helpstr)
+ exit()
+
+fw = args['-f']
+if not fw in supported_frameworks:
+ print('Unsupported framework: ', fw)
+ exit()
+
+indirname = args['-t']
+
+if not '-i' in args:
+ if fw == 'caf':
+ inputname = 'data'
+ elif fw == 'tfl':
+ inputname = 'input'
+else:
+ inputname = args['-i']
+
+nproc, proc_num = args['-p']
+remove_existing = args['-r']
+gen_binary = args['-b']
+
+print('started at', datetime.datetime.now())
+testcases = glob.glob(indirname + '/testcase*/')
+testcases.sort()
+testcases = testcases[proc_num - 1::nproc]
+
+number = 0
+for testcase in testcases:
+ try:
+ infilename = glob.glob(testcase + 'input/*.JPEG')
+ if len(infilename) > 0:
+ number += 1
+ infilename = infilename[0]
+ outfilename = testcase + 'input/' + infilename.split('/')[-1] + '.hdf5'
+ binoutfilename = testcase + 'input/' + infilename.split('/')[-1] + '.dat'
+ found_hdf = len(glob.glob(outfilename)) != 0
+ found_bin = len(glob.glob(binoutfilename)) != 0
+ if not found_hdf or (not found_bin and gen_binary) or remove_existing:
+ with Image.open(infilename) as im:
+ #TODO: check if order is correct here and in other places
+ h = im.size[0]
+ w = im.size[1]
+ s = im.split()
+ if len(s) == 3:
+ r, g, b = s
+ else:
+ r = s[0]
+ g = s[0]
+ b = s[0]
+ rf = r.convert('F')
+ gf = g.convert('F')
+ bf = b.convert('F')
+ rfb = rf.tobytes()
+ gfb = gf.tobytes()
+ bfb = bf.tobytes()
+
+ made_hdf = False
+ if not found_hdf or remove_existing:
+ if fw == 'tfl':
+ reds = np.fromstring(rfb, count=(h * w), dtype='float32')
+ greens = np.fromstring(gfb, count=(h * w), dtype='float32')
+ blues = np.fromstring(bfb, count=(h * w), dtype='float32')
+
+ dset_shape = (1, h, w, 3)
+ narr = np.ndarray(shape=(0))
+ mixed_ch = []
+ for i in range(h * w):
+ mixed_ch += [
+ reds[i] / 255.0, greens[i] / 255.0, blues[i] / 255.0
+ ]
+ narr = np.append(narr, mixed_ch)
+ elif fw == 'caf':
+ dset_shape = (1, 3, h, w)
+ narr = np.fromstring(
+ rfb + gfb + bfb, count=(3 * h * w), dtype='float32')
+ for i in range(3 * h * w):
+ narr[i] /= 255.0
+ if remove_existing:
+ subprocess.call(['rm', '-f', outfilename])
+ with h5py.File(outfilename) as f:
+ # nnkit hdf5_import asserts to use IEEE_F32BE, which is >f4 in numpy
+ dset = f.require_dataset(inputname, dset_shape, dtype='>f4')
+ dset[0] = np.reshape(narr, dset_shape)
+ made_hdf = True
+
+ if gen_binary and (not found_bin or remove_existing):
+ if fw == 'tfl' and made_hdf:
+ l = narr.tolist()
+ else:
+ reds = np.fromstring(rfb, count=(h * w), dtype='float32')
+ greens = np.fromstring(gfb, count=(h * w), dtype='float32')
+ blues = np.fromstring(bfb, count=(h * w), dtype='float32')
+ l = np.ndarray(shape=(0))
+ mixed_ch = []
+ for i in range(h * w):
+ mixed_ch += [
+ reds[i] / 255.0, greens[i] / 255.0, blues[i] / 255.0
+ ]
+ l = np.append(l, mixed_ch)
+ l = l.tolist()
+ with open(binoutfilename, 'wb') as out:
+ out.write(struct.pack('f' * len(l), *l))
+ print(number, ': ' + testcase + ' Done')
+ else:
+ print(testcase, ' nothing to do')
+ else:
+ print(testcase, ' JPEG not found')
+ except:
+ print(testcase, " FAILED")
+print('started at', ended.datetime.now())
diff --git a/compiler/nnc/utils/tflite_dot_dumper/CMakeLists.txt b/compiler/nnc/utils/tflite_dot_dumper/CMakeLists.txt
new file mode 100644
index 000000000..f5e894e67
--- /dev/null
+++ b/compiler/nnc/utils/tflite_dot_dumper/CMakeLists.txt
@@ -0,0 +1,7 @@
+if (NOT TARGET mir_tflite_importer)
+ return ()
+endif()
+
+add_executable(tflite_model_dumper model_dump.cpp)
+
+target_link_libraries(tflite_model_dumper PRIVATE nnc_support mir_tflite_importer)
diff --git a/compiler/nnc/utils/tflite_dot_dumper/model_dump.cpp b/compiler/nnc/utils/tflite_dot_dumper/model_dump.cpp
new file mode 100644
index 000000000..ab264c82f
--- /dev/null
+++ b/compiler/nnc/utils/tflite_dot_dumper/model_dump.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "support/CommandLine.h"
+#include "mir/IrDotDumper.h"
+
+#include <tflite_importer.h>
+
+#include <exception>
+#include <iostream>
+
+using namespace nnc;
+using namespace mir;
+
+int main(int argc, const char **argv)
+{
+ cli::Option<std::string> model_path(cli::optname("--model"), cli::overview("Path to the model"));
+ cli::CommandLine::getParser()->parseCommandLine(argc, argv);
+
+ try
+ {
+ auto graph = mir_tflite::loadModel(model_path);
+ dumpGraph(graph.get(), std::cout);
+ }
+ catch (std::exception &e)
+ {
+ std::cout << "Error: " << e.what() << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/compiler/nnkit-caffe/CMakeLists.txt b/compiler/nnkit-caffe/CMakeLists.txt
new file mode 100644
index 000000000..07e2ea4f1
--- /dev/null
+++ b/compiler/nnkit-caffe/CMakeLists.txt
@@ -0,0 +1,8 @@
+nnas_find_package(Caffe QUIET)
+
+if(NOT Caffe_FOUND)
+ return()
+endif(NOT Caffe_FOUND)
+
+add_subdirectory(support)
+add_subdirectory(backend)
diff --git a/compiler/nnkit-caffe/backend/CMakeLists.txt b/compiler/nnkit-caffe/backend/CMakeLists.txt
new file mode 100644
index 000000000..b18aa4f11
--- /dev/null
+++ b/compiler/nnkit-caffe/backend/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_library(nnkit_caffe_backend SHARED Module.cpp)
+target_link_libraries(nnkit_caffe_backend nnkit_support_caffe)
+target_link_libraries(nnkit_caffe_backend stdex)
diff --git a/compiler/nnkit-caffe/backend/Module.cpp b/compiler/nnkit-caffe/backend/Module.cpp
new file mode 100644
index 000000000..cb24a4e60
--- /dev/null
+++ b/compiler/nnkit-caffe/backend/Module.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnkit/support/caffe/Backend.h"
+
+#include <nnkit/CmdlineArguments.h>
+#include <stdex/Memory.h>
+
+extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
+{
+ using stdex::make_unique;
+
+ auto net = make_unique<::caffe::Net<float>>(args.at(0), caffe::TEST);
+
+ if (args.size() > 1)
+ {
+ net->CopyTrainedLayersFrom(args.at(1));
+ }
+
+ return make_unique<::nnkit::support::caffe::Backend<float>>(std::move(net));
+}
diff --git a/compiler/nnkit-caffe/requires.cmake b/compiler/nnkit-caffe/requires.cmake
new file mode 100644
index 000000000..be53ae74f
--- /dev/null
+++ b/compiler/nnkit-caffe/requires.cmake
@@ -0,0 +1 @@
+require("nnkit-intf")
diff --git a/compiler/nnkit-caffe/support/CMakeLists.txt b/compiler/nnkit-caffe/support/CMakeLists.txt
new file mode 100644
index 000000000..d56622cb3
--- /dev/null
+++ b/compiler/nnkit-caffe/support/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_library(nnkit_support_caffe INTERFACE)
+target_include_directories(nnkit_support_caffe INTERFACE include)
+target_link_libraries(nnkit_support_caffe INTERFACE nnkit_intf_backend)
+target_link_libraries(nnkit_support_caffe INTERFACE caffe)
diff --git a/compiler/nnkit-caffe/support/include/nnkit/support/caffe/Backend.h b/compiler/nnkit-caffe/support/include/nnkit/support/caffe/Backend.h
new file mode 100644
index 000000000..77a9f86b9
--- /dev/null
+++ b/compiler/nnkit-caffe/support/include/nnkit/support/caffe/Backend.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_CAFFE_BACKEND_H__
+#define __NNKIT_SUPPORT_CAFFE_BACKEND_H__
+
+#include "nnkit/support/caffe/InputBlobContext.h"
+#include "nnkit/support/caffe/OutputBlobContext.h"
+#include "nnkit/support/caffe/TensorContext.h"
+
+#include <nnkit/Backend.h>
+
+#include <caffe/net.hpp>
+
+#include <memory>
+#include <functional>
+
+namespace nnkit
+{
+namespace support
+{
+namespace caffe
+{
+
+template <typename DType> class Backend final : public nnkit::Backend
+{
+public:
+ Backend(std::unique_ptr<::caffe::Net<DType>> &&net) : _net{std::move(net)}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void prepare(const std::function<void(nnkit::TensorContext &)> &f) override
+ {
+ InputBlobContext<DType> blobs(*_net);
+ TensorContext<DType> tensors(blobs);
+ f(tensors);
+ }
+
+public:
+ void run(void) override { _net->Forward(); }
+
+public:
+ void teardown(const std::function<void(nnkit::TensorContext &)> &f) override
+ {
+ OutputBlobContext<DType> blobs(*_net);
+ TensorContext<DType> tensors(blobs);
+ f(tensors);
+ }
+
+private:
+ std::unique_ptr<::caffe::Net<DType>> _net;
+};
+
+} // namespace caffe
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_CAFFE_BACKEND_H__
diff --git a/compiler/nnkit-caffe/support/include/nnkit/support/caffe/BlobContext.h b/compiler/nnkit-caffe/support/include/nnkit/support/caffe/BlobContext.h
new file mode 100644
index 000000000..17f4a9a37
--- /dev/null
+++ b/compiler/nnkit-caffe/support/include/nnkit/support/caffe/BlobContext.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_CAFFE_BLOB_CONTEXT_H__
+#define __NNKIT_SUPPORT_CAFFE_BLOB_CONTEXT_H__
+
+#include <caffe/blob.hpp>
+
+namespace nnkit
+{
+namespace support
+{
+namespace caffe
+{
+
+template <typename DType> struct BlobContext
+{
+ virtual ~BlobContext() = default;
+
+ virtual uint32_t size(void) const = 0;
+
+ virtual std::string name(uint32_t n) const = 0;
+ virtual ::caffe::Blob<DType> *blob(uint32_t n) = 0;
+
+ DType *region(uint32_t n) { return blob(n)->mutable_cpu_data(); }
+};
+
+} // namespace caffe
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_CAFFE_BLOB_CONTEXT_H__
diff --git a/compiler/nnkit-caffe/support/include/nnkit/support/caffe/InputBlobContext.h b/compiler/nnkit-caffe/support/include/nnkit/support/caffe/InputBlobContext.h
new file mode 100644
index 000000000..698e73a03
--- /dev/null
+++ b/compiler/nnkit-caffe/support/include/nnkit/support/caffe/InputBlobContext.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_CAFFE_INPUT_BLOB_CONTEXT_H__
+#define __NNKIT_SUPPORT_CAFFE_INPUT_BLOB_CONTEXT_H__
+
+#include "nnkit/support/caffe/BlobContext.h"
+
+#include <caffe/net.hpp>
+
+namespace nnkit
+{
+namespace support
+{
+namespace caffe
+{
+
+template <typename DType> class InputBlobContext final : public BlobContext<DType>
+{
+public:
+ InputBlobContext(::caffe::Net<DType> &net) : _net(net)
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t size(void) const override { return _net.num_inputs(); }
+
+ std::string name(uint32_t n) const override
+ {
+ return _net.blob_names().at(_net.input_blob_indices().at(n));
+ }
+
+ ::caffe::Blob<DType> *blob(uint32_t n) override { return _net.input_blobs().at(n); }
+
+private:
+ ::caffe::Net<DType> &_net;
+};
+
+} // namespace caffe
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_CAFFE_INPUT_BLOB_CONTEXT_H__
diff --git a/compiler/nnkit-caffe/support/include/nnkit/support/caffe/OutputBlobContext.h b/compiler/nnkit-caffe/support/include/nnkit/support/caffe/OutputBlobContext.h
new file mode 100644
index 000000000..81cc72ef6
--- /dev/null
+++ b/compiler/nnkit-caffe/support/include/nnkit/support/caffe/OutputBlobContext.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_CAFFE_OUTPUT_BLOB_CONTEXT_H__
+#define __NNKIT_SUPPORT_CAFFE_OUTPUT_BLOB_CONTEXT_H__
+
+#include "nnkit/support/caffe/BlobContext.h"
+
+#include <caffe/net.hpp>
+
+namespace nnkit
+{
+namespace support
+{
+namespace caffe
+{
+
+template <typename DType> class OutputBlobContext final : public BlobContext<DType>
+{
+public:
+ OutputBlobContext(::caffe::Net<DType> &net) : _net(net)
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t size(void) const override { return _net.num_outputs(); }
+
+ std::string name(uint32_t n) const override
+ {
+ return _net.blob_names().at(_net.output_blob_indices().at(n));
+ }
+
+ ::caffe::Blob<DType> *blob(uint32_t n) override { return _net.output_blobs().at(n); }
+
+private:
+ ::caffe::Net<DType> &_net;
+};
+
+} // namespace caffe
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_CAFFE_OUTPUT_BLOB_CONTEXT_H__
diff --git a/compiler/nnkit-caffe/support/include/nnkit/support/caffe/TensorContext.h b/compiler/nnkit-caffe/support/include/nnkit/support/caffe/TensorContext.h
new file mode 100644
index 000000000..cb0fc02ff
--- /dev/null
+++ b/compiler/nnkit-caffe/support/include/nnkit/support/caffe/TensorContext.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_CAFFE_TENSOR_CONTEXT_H__
+#define __NNKIT_SUPPORT_CAFFE_TENSOR_CONTEXT_H__
+
+#include "nnkit/support/caffe/BlobContext.h"
+
+#include <nnkit/TensorContext.h>
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+
+#include <type_traits>
+#include <stdexcept>
+
+namespace nnkit
+{
+namespace support
+{
+namespace caffe
+{
+
+template <typename DType> class TensorContext final : public nnkit::TensorContext
+{
+public:
+ TensorContext(BlobContext<DType> &blobs) : _blobs(blobs)
+ {
+ // DO NOTHING
+ }
+
+private:
+ static nncc::core::ADT::tensor::Shape shapeOf(const ::caffe::Blob<DType> &blob)
+ {
+ nncc::core::ADT::tensor::Shape shape;
+
+ const uint32_t rank = blob.shape().size();
+
+ shape.resize(rank);
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ shape.dim(axis) = blob.shape(axis);
+ }
+
+ return shape;
+ }
+
+public:
+ uint32_t size(void) const override { return _blobs.size(); }
+
+ std::string name(uint32_t n) const override { return _blobs.name(n); }
+
+ nncc::core::ADT::tensor::Shape shape(uint32_t n) const override
+ {
+ return shapeOf(*_blobs.blob(n));
+ }
+
+ // Float (fp32) tensor support
+ bool isFloatTensor(uint32_t n) const override { return std::is_same<DType, float>::value; }
+
+ void getMutableFloatTensor(uint32_t n, const TensorContext::TypedAccessor<float> &f) override
+ {
+ if (!std::is_same<DType, float>::value)
+ {
+ throw std::runtime_error{"type mismatch"};
+ }
+
+ using nncc::core::ADT::tensor::LexicalLayout;
+ using nncc::core::ADT::tensor::make_overlay;
+
+ auto base = _blobs.region(n);
+ auto view = make_overlay<float, LexicalLayout>(shape(n), base);
+
+ f(*this, n, view);
+ }
+
+ void getConstFloatTensor(uint32_t n, const TensorContext::TypedReader<float> &f) const override
+ {
+ if (!std::is_same<DType, float>::value)
+ {
+ throw std::runtime_error{"type mismatch"};
+ }
+
+ using nncc::core::ADT::tensor::LexicalLayout;
+ using nncc::core::ADT::tensor::make_overlay;
+
+ auto base = _blobs.region(n);
+ auto view = make_overlay<float, LexicalLayout>(shape(n), base);
+
+ f(*this, n, view);
+ }
+
+private:
+ BlobContext<DType> &_blobs;
+};
+
+} // namespace caffe
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_CAFFE_TENSOR_CONTEXT_H__
diff --git a/compiler/nnkit-intf/CMakeLists.txt b/compiler/nnkit-intf/CMakeLists.txt
new file mode 100644
index 000000000..820fd507b
--- /dev/null
+++ b/compiler/nnkit-intf/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_subdirectory(tensor)
+add_subdirectory(cmdline)
+add_subdirectory(action)
+add_subdirectory(backend)
diff --git a/compiler/nnkit-intf/README.md b/compiler/nnkit-intf/README.md
new file mode 100644
index 000000000..e3598dfd6
--- /dev/null
+++ b/compiler/nnkit-intf/README.md
@@ -0,0 +1,3 @@
+# nnkit-intf
+
+_nnkit-intf_ provides **basic** interface classes for _nnkit_ backend/action.
diff --git a/compiler/nnkit-intf/action/CMakeLists.txt b/compiler/nnkit-intf/action/CMakeLists.txt
new file mode 100644
index 000000000..82bb5f821
--- /dev/null
+++ b/compiler/nnkit-intf/action/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_library(nnkit_intf_action INTERFACE)
+target_include_directories(nnkit_intf_action INTERFACE include)
+target_link_libraries(nnkit_intf_action INTERFACE nnkit_intf_tensor)
+target_link_libraries(nnkit_intf_action INTERFACE nnkit_intf_cmdline)
diff --git a/compiler/nnkit-intf/action/include/nnkit/Action.h b/compiler/nnkit-intf/action/include/nnkit/Action.h
new file mode 100644
index 000000000..d2835023f
--- /dev/null
+++ b/compiler/nnkit-intf/action/include/nnkit/Action.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_ACTION_H__
+#define __NNKIT_ACTION_H__
+
+#include <nnkit/TensorContext.h>
+
+namespace nnkit
+{
+
+struct Action
+{
+ virtual ~Action() = default;
+
+ virtual void run(TensorContext &ctx) = 0;
+};
+
+} // namespace nnkit
+
+#endif // __NNKIT_ACTION_H__
diff --git a/compiler/nnkit-intf/backend/CMakeLists.txt b/compiler/nnkit-intf/backend/CMakeLists.txt
new file mode 100644
index 000000000..84161e9da
--- /dev/null
+++ b/compiler/nnkit-intf/backend/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_library(nnkit_intf_backend INTERFACE)
+target_include_directories(nnkit_intf_backend INTERFACE include)
+target_link_libraries(nnkit_intf_backend INTERFACE nnkit_intf_tensor)
+target_link_libraries(nnkit_intf_backend INTERFACE nnkit_intf_cmdline)
diff --git a/compiler/nnkit-intf/backend/include/nnkit/Backend.h b/compiler/nnkit-intf/backend/include/nnkit/Backend.h
new file mode 100644
index 000000000..2db34ccd8
--- /dev/null
+++ b/compiler/nnkit-intf/backend/include/nnkit/Backend.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_BACKEND_H__
+#define __NNKIT_BACKEND_H__
+
+#include <nnkit/TensorContext.h>
+
+#include <functional>
+
+namespace nnkit
+{
+
+struct Backend
+{
+ virtual ~Backend() = default;
+
+ virtual void prepare(const std::function<void(TensorContext &)> &f) = 0;
+ virtual void run(void) = 0;
+ virtual void teardown(const std::function<void(TensorContext &)> &f) = 0;
+};
+
+} // namespace nnkit
+
+#endif // __NNKIT_BACKEND_H__
diff --git a/compiler/nnkit-intf/cmdline/CMakeLists.txt b/compiler/nnkit-intf/cmdline/CMakeLists.txt
new file mode 100644
index 000000000..74f9f3c94
--- /dev/null
+++ b/compiler/nnkit-intf/cmdline/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_library(nnkit_intf_cmdline INTERFACE)
+target_include_directories(nnkit_intf_cmdline INTERFACE include)
diff --git a/compiler/nnkit-intf/cmdline/include/nnkit/CmdlineArguments.h b/compiler/nnkit-intf/cmdline/include/nnkit/CmdlineArguments.h
new file mode 100644
index 000000000..b480b4dc2
--- /dev/null
+++ b/compiler/nnkit-intf/cmdline/include/nnkit/CmdlineArguments.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CMDLINE_ARGUMENTS_H__
+#define __CMDLINE_ARGUMENTS_H__
+
+#include <cstdint>
+
+namespace nnkit
+{
+
+struct CmdlineArguments
+{
+ virtual ~CmdlineArguments() = default;
+
+ virtual uint32_t size(void) const = 0;
+
+ virtual const char *at(uint32_t n) const = 0;
+};
+
+} // namespace nnkit
+
+#endif // __CMDLINE_ARGUMENTS_H__
diff --git a/compiler/nnkit-intf/tensor/CMakeLists.txt b/compiler/nnkit-intf/tensor/CMakeLists.txt
new file mode 100644
index 000000000..86962a594
--- /dev/null
+++ b/compiler/nnkit-intf/tensor/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_library(nnkit_intf_tensor INTERFACE)
+target_include_directories(nnkit_intf_tensor INTERFACE include)
+target_link_libraries(nnkit_intf_tensor INTERFACE angkor)
diff --git a/compiler/nnkit-intf/tensor/include/nnkit/TensorContext.h b/compiler/nnkit-intf/tensor/include/nnkit/TensorContext.h
new file mode 100644
index 000000000..07d8d154c
--- /dev/null
+++ b/compiler/nnkit-intf/tensor/include/nnkit/TensorContext.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_TENSOR_CONTEXT_H__
+#define __NNKIT_TENSOR_CONTEXT_H__
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Reader.h>
+#include <nncc/core/ADT/tensor/Accessor.h>
+
+#include <string>
+#include <functional>
+#include <stdexcept>
+#include <cstdint>
+
+namespace nnkit
+{
+
+// NOTE This interface is subject to change.
+struct TensorContext
+{
+ template <typename T>
+ using TypedReader = std::function<void(const TensorContext &, uint32_t n,
+ const nncc::core::ADT::tensor::Reader<T> &)>;
+
+ template <typename T>
+ using TypedAccessor = std::function<void(const TensorContext &, uint32_t n,
+ nncc::core::ADT::tensor::Accessor<T> &)>;
+
+ virtual ~TensorContext() = default;
+
+ // The number of tensors that this context provides
+ virtual uint32_t size(void) const = 0;
+
+ // Query on properties of each tensor
+ virtual std::string name(uint32_t n) const = 0;
+ virtual nncc::core::ADT::tensor::Shape shape(uint32_t n) const = 0;
+
+ // TODO Support generic byte tensor
+ // TODO Support typed tensor for primitive types such as half(fp16), double(fp64), int8(s8),
+ // uint8(u8), uint(u32)
+
+ // Float (fp32) tensor support
+ virtual bool isFloatTensor(uint32_t n) const
+ {
+ throw std::runtime_error("This method should be overriden");
+ }
+
+ virtual void getMutableFloatTensor(uint32_t n, const TypedAccessor<float> &cb)
+ {
+ throw std::runtime_error("This method should be overriden");
+ }
+
+ virtual void getConstFloatTensor(uint32_t n, const TypedReader<float> &cb) const
+ {
+ throw std::runtime_error("This method should be overriden");
+ }
+
+ // S32
+ virtual bool isS32Tensor(uint32_t n) const
+ {
+ throw std::runtime_error("This method should be overriden");
+ }
+
+ virtual void getMutableS32Tensor(uint32_t n, const TypedAccessor<int32_t> &cb)
+ {
+ throw std::runtime_error("This method should be overriden");
+ }
+
+ virtual void getConstS32Tensor(uint32_t n, const TypedReader<int32_t> &cb) const
+ {
+ throw std::runtime_error("This method should be overriden");
+ }
+};
+
+} // namespace nnkit
+
+#endif // __NNKIT_TENSOR_CONTEXT_H__
diff --git a/compiler/nnkit-misc/CMakeLists.txt b/compiler/nnkit-misc/CMakeLists.txt
new file mode 100644
index 000000000..5ea6cdadd
--- /dev/null
+++ b/compiler/nnkit-misc/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectories()
diff --git a/compiler/nnkit-misc/README.md b/compiler/nnkit-misc/README.md
new file mode 100644
index 000000000..2017cf8d4
--- /dev/null
+++ b/compiler/nnkit-misc/README.md
@@ -0,0 +1,3 @@
+# nnkit-misc
+
+_nnkit-misc_ includes various helpers that make it easy to implement _nnkit_ extensions and tools.
diff --git a/compiler/nnkit-misc/backend/CMakeLists.txt b/compiler/nnkit-misc/backend/CMakeLists.txt
new file mode 100644
index 000000000..d351d5ce5
--- /dev/null
+++ b/compiler/nnkit-misc/backend/CMakeLists.txt
@@ -0,0 +1,14 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_library(nnkit_support_backend STATIC ${SOURCES})
+target_include_directories(nnkit_support_backend PUBLIC include)
+target_link_libraries(nnkit_support_backend PUBLIC nnkit_intf_backend)
+target_link_libraries(nnkit_support_backend PUBLIC dl)
+target_link_libraries(nnkit_support_backend PUBLIC stdex)
+
+find_package(Threads QUIET)
+
+if(TARGET Threads::Threads)
+ # This is necessary to support multi-threaded backends
+ target_link_libraries(nnkit_support_backend PUBLIC Threads::Threads)
+endif(TARGET Threads::Threads)
diff --git a/compiler/nnkit-misc/backend/include/nnkit/BackendPlugin.h b/compiler/nnkit-misc/backend/include/nnkit/BackendPlugin.h
new file mode 100644
index 000000000..1a324bf71
--- /dev/null
+++ b/compiler/nnkit-misc/backend/include/nnkit/BackendPlugin.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_BACKEND_PLUGIN_H__
+#define __NNKIT_BACKEND_PLUGIN_H__
+
+#include <nnkit/CmdlineArguments.h>
+#include <nnkit/Backend.h>
+
+#include <string>
+#include <memory>
+
+namespace nnkit
+{
+
+class BackendPlugin
+{
+public:
+ typedef std::unique_ptr<Backend> (*Entry)(const CmdlineArguments &);
+
+public:
+ BackendPlugin(void *handle, Entry entry) : _handle{handle}, _entry{entry}
+ {
+ // DO NOTHING
+ }
+
+public:
+ // Copy is not allowed to avoid double close
+ BackendPlugin(const BackendPlugin &) = delete;
+ BackendPlugin(BackendPlugin &&);
+
+public:
+ ~BackendPlugin();
+
+public:
+ std::unique_ptr<Backend> create(const CmdlineArguments &args) const;
+
+private:
+ void *_handle;
+ Entry _entry;
+};
+
+std::unique_ptr<BackendPlugin> make_backend_plugin(const std::string &path);
+
+} // namespace nnkit
+
+#endif // __NNKIT_BACKEND_PLUGIN_H__
diff --git a/compiler/nnkit-misc/backend/src/BackendPlugin.cpp b/compiler/nnkit-misc/backend/src/BackendPlugin.cpp
new file mode 100644
index 000000000..54b1fdc83
--- /dev/null
+++ b/compiler/nnkit-misc/backend/src/BackendPlugin.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnkit/BackendPlugin.h"
+
+#include <cassert>
+#include <stdex/Memory.h>
+#include <iostream>
+
+// NOTE dlfcn.h is not a standard library
+#include <dlfcn.h>
+
+namespace nnkit
+{
+
+BackendPlugin::BackendPlugin(BackendPlugin &&plugin)
+{
+ // Handle is transferd from 'binder' instance into this instance.
+ _handle = plugin._handle;
+ _entry = plugin._entry;
+
+ plugin._handle = nullptr;
+ plugin._entry = nullptr;
+}
+
+BackendPlugin::~BackendPlugin()
+{
+ if (_handle != nullptr)
+ {
+ dlclose(_handle);
+ }
+}
+
+std::unique_ptr<Backend> BackendPlugin::create(const CmdlineArguments &args) const
+{
+ return _entry(args);
+}
+
+std::unique_ptr<BackendPlugin> make_backend_plugin(const std::string &path)
+{
+ if (path.empty())
+ {
+ throw std::runtime_error{"Backend library does not defined"};
+ }
+
+ void *handle;
+ BackendPlugin::Entry entry;
+
+ // NOTE Some backend (such as tflite) needs multithreading support (std::thread).
+ //
+ // std::thread in libstdc++.so includes weak symbols for pthread_XXX functions,
+ // and these weak symbols should be overridden by strong symbols in libpthread.so.
+ // If not, std::thread will not work correctly.
+ //
+ // RTLD_GLOBAL flag is necessary to allow weak symbols to be overridden.
+ handle = dlopen(path.c_str(), RTLD_LAZY | RTLD_GLOBAL);
+ if (handle == nullptr)
+ {
+ std::cerr << dlerror() << std::endl;
+ exit(1);
+ }
+
+ char *error;
+ entry = reinterpret_cast<BackendPlugin::Entry>(dlsym(handle, "make_backend"));
+ if ((error = dlerror()) != nullptr)
+ {
+ dlclose(handle);
+ std::cerr << error << std::endl;
+ exit(1);
+ }
+
+ return stdex::make_unique<BackendPlugin>(handle, entry);
+}
+
+} // namespace nnkit
diff --git a/compiler/nnkit-misc/cmdline/CMakeLists.txt b/compiler/nnkit-misc/cmdline/CMakeLists.txt
new file mode 100644
index 000000000..52419a8ec
--- /dev/null
+++ b/compiler/nnkit-misc/cmdline/CMakeLists.txt
@@ -0,0 +1,5 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_library(nnkit_support_cmdline STATIC ${SOURCES})
+target_include_directories(nnkit_support_cmdline PUBLIC include)
+target_link_libraries(nnkit_support_cmdline PUBLIC nnkit_intf_cmdline)
diff --git a/compiler/nnkit-misc/cmdline/include/nnkit/VectorArguments.h b/compiler/nnkit-misc/cmdline/include/nnkit/VectorArguments.h
new file mode 100644
index 000000000..37b090816
--- /dev/null
+++ b/compiler/nnkit-misc/cmdline/include/nnkit/VectorArguments.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_VECTOR_ARGUMENTS_H__
+#define __NNKIT_VECTOR_ARGUMENTS_H__
+
+#include <nnkit/CmdlineArguments.h>
+
+#include <vector>
+#include <string>
+
+namespace nnkit
+{
+
+class VectorArguments final : public CmdlineArguments
+{
+public:
+ uint32_t size(void) const override { return _args.size(); }
+ const char *at(uint32_t nth) const override { return _args.at(nth).c_str(); }
+
+public:
+ VectorArguments &append(const std::string &arg);
+
+private:
+ std::vector<std::string> _args;
+};
+
+} // namespace nnkit
+
+#endif // __NNKIT_VECTOR_ARGUMENTS_H__
diff --git a/compiler/nnkit-misc/cmdline/src/VectorArguments.cpp b/compiler/nnkit-misc/cmdline/src/VectorArguments.cpp
new file mode 100644
index 000000000..f7d5478cc
--- /dev/null
+++ b/compiler/nnkit-misc/cmdline/src/VectorArguments.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnkit/VectorArguments.h"
+
+namespace nnkit
+{
+
+VectorArguments &VectorArguments::append(const std::string &arg)
+{
+ _args.emplace_back(arg);
+ return (*this);
+}
+
+} // namespace nnkit
diff --git a/compiler/nnkit-mocotf/CMakeLists.txt b/compiler/nnkit-mocotf/CMakeLists.txt
new file mode 100644
index 000000000..8cd8623e1
--- /dev/null
+++ b/compiler/nnkit-mocotf/CMakeLists.txt
@@ -0,0 +1,6 @@
+if (NOT TARGET moco_tf_frontend)
+ return()
+endif(NOT TARGET moco_tf_frontend)
+
+add_subdirectory(support)
+add_subdirectory(backend)
diff --git a/compiler/nnkit-mocotf/backend/Backend.cpp b/compiler/nnkit-mocotf/backend/Backend.cpp
new file mode 100644
index 000000000..4900684eb
--- /dev/null
+++ b/compiler/nnkit-mocotf/backend/Backend.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnkit/support/moco/tf/Backend.h"
+
+#include <nnkit/CmdlineArguments.h>
+#include <stdex/Memory.h>
+
+#include <cassert>
+
+extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
+{
+ using stdex::make_unique;
+
+ assert(args.size() == 2); // args.at[0] : *.pb path, args.at[1]: *.info path
+
+ return make_unique<::nnkit::support::moco::tf::Backend>(args.at(0), args.at(1));
+}
diff --git a/compiler/nnkit-mocotf/backend/CMakeLists.txt b/compiler/nnkit-mocotf/backend/CMakeLists.txt
new file mode 100644
index 000000000..72e16c75a
--- /dev/null
+++ b/compiler/nnkit-mocotf/backend/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_library(nnkit_moco_tf_backend SHARED Backend.cpp)
+target_link_libraries(nnkit_moco_tf_backend nnkit_support_moco_tf)
+target_link_libraries(nnkit_moco_tf_backend stdex)
diff --git a/compiler/nnkit-mocotf/requires.cmake b/compiler/nnkit-mocotf/requires.cmake
new file mode 100644
index 000000000..6949ec808
--- /dev/null
+++ b/compiler/nnkit-mocotf/requires.cmake
@@ -0,0 +1,7 @@
+require("stdex")
+# To use "nnkit_support_tftestinfo"
+require("tfinfo")
+require("loco")
+require("locomotiv")
+require("moco-tf")
+require("nnkit-intf")
diff --git a/compiler/nnkit-mocotf/support/CMakeLists.txt b/compiler/nnkit-mocotf/support/CMakeLists.txt
new file mode 100644
index 000000000..76c7c04b1
--- /dev/null
+++ b/compiler/nnkit-mocotf/support/CMakeLists.txt
@@ -0,0 +1,13 @@
+file(GLOB_RECURSE TF_SOURCES "src/*.cpp")
+
+add_library(nnkit_support_moco_tf STATIC ${TF_SOURCES})
+
+set_target_properties(nnkit_support_moco_tf PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(nnkit_support_moco_tf PUBLIC include)
+target_link_libraries(nnkit_support_moco_tf angkor)
+target_link_libraries(nnkit_support_moco_tf nnkit_intf_backend)
+target_link_libraries(nnkit_support_moco_tf nnkit_support_tftestinfo)
+target_link_libraries(nnkit_support_moco_tf locomotiv)
+target_link_libraries(nnkit_support_moco_tf moco_tf_frontend)
+target_link_libraries(nnkit_support_moco_tf loco)
+target_link_libraries(nnkit_support_moco_tf stdex)
diff --git a/compiler/nnkit-mocotf/support/include/nnkit/support/moco/tf/Backend.h b/compiler/nnkit-mocotf/support/include/nnkit/support/moco/tf/Backend.h
new file mode 100644
index 000000000..ad5e06d2a
--- /dev/null
+++ b/compiler/nnkit-mocotf/support/include/nnkit/support/moco/tf/Backend.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_MOCO_TF_BACKEND_H__
+#define __NNKIT_SUPPORT_MOCO_TF_BACKEND_H__
+
+#include "nnkit/Backend.h"
+#include "nnkit/TensorContext.h"
+#include "nnkit/support/tftestinfo/ParsedTensor.h"
+
+#include "loco/IR/Graph.h"
+#include "locomotiv/Session.h"
+
+#include <vector>
+#include <memory>
+
+namespace nnkit
+{
+namespace support
+{
+namespace moco
+{
+namespace tf
+{
+
+class Backend final : public nnkit::Backend
+{
+ using ParsedTensors = std::vector<std::unique_ptr<nnkit::support::tftestinfo::ParsedTensor>>;
+
+public:
+ Backend(const char *pb_path, const char *info_path);
+
+ void setInputOutputFromGraph(const std::unique_ptr<loco::Graph> &loco_graph,
+ ParsedTensors &parsed_tensors);
+
+ void prepare(const std::function<void(nnkit::TensorContext &)> &f) override;
+
+ void run(void) override;
+
+ void teardown(const std::function<void(nnkit::TensorContext &)> &f);
+
+private:
+ std::unique_ptr<loco::Graph> _loco_graph;
+ std::unique_ptr<locomotiv::Session> _sess;
+
+ ParsedTensors _inputs;
+ ParsedTensors _outputs;
+};
+
+} // namespace tf
+} // namespace moco
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_MOCO_TF_BACKEND_H__
diff --git a/compiler/nnkit-mocotf/support/src/Backend.cpp b/compiler/nnkit-mocotf/support/src/Backend.cpp
new file mode 100644
index 000000000..2d9e21fd7
--- /dev/null
+++ b/compiler/nnkit-mocotf/support/src/Backend.cpp
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnkit/support/moco/tf/Backend.h"
+
+#include "InputTensorContext.h"
+#include "OutputTensorContext.h"
+
+#include "nnkit/TensorContext.h"
+#include "nnkit/support/tftestinfo/ParsedTensor.h"
+#include "nnkit/support/tftestinfo/TensorInfoParser.h"
+
+#include <moco/tf/Frontend.h>
+#include <moco/Names.h>
+#include <stdex/Memory.h>
+
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <utility> // std::move
+#include <stdexcept>
+
+namespace nnkit
+{
+namespace support
+{
+namespace moco
+{
+namespace tf
+{
+
+void Backend::setInputOutputFromGraph(const std::unique_ptr<loco::Graph> &loco_graph,
+ ParsedTensors &parsed_tensors)
+{
+ auto inputs = loco_graph.get()->inputs();
+ auto outputs = loco_graph.get()->outputs();
+ uint32_t input_idx = 0;
+ uint32_t output_idx = 0;
+ for (auto &parsed_tensor : parsed_tensors)
+ {
+ if (parsed_tensor->kind() == ParsedTensor::Kind::Input)
+ {
+ if (!parsed_tensor->hasShape())
+ {
+ auto input_shape = inputs->at(input_idx++)->shape();
+
+ uint32_t size = input_shape->rank();
+ parsed_tensor->mutable_shape().resize(size);
+ for (uint32_t d = 0; d < size; d++)
+ {
+ parsed_tensor->mutable_shape().dim(d) = input_shape->dim(d).value();
+ }
+ }
+ _inputs.emplace_back(std::move(parsed_tensor));
+ }
+ else // Output
+ {
+ if (!parsed_tensor->hasShape())
+ {
+ auto output_shape = outputs->at(output_idx++)->shape();
+
+ uint32_t size = output_shape->rank();
+ parsed_tensor->mutable_shape().resize(size);
+ for (uint32_t d = 0; d < size; d++)
+ {
+ parsed_tensor->mutable_shape().dim(d) = output_shape->dim(d).value();
+ }
+ }
+ _outputs.emplace_back(std::move(parsed_tensor));
+ }
+ }
+}
+
+Backend::Backend(const char *pb_path, const char *info_path)
+{
+ // read test.info
+ ::moco::ModelSignature sig;
+
+ auto parsed_tensors = nnkit::support::tftestinfo::parse(info_path);
+
+ for (auto &parsed_tensor : parsed_tensors)
+ {
+ if (parsed_tensor->kind() == ParsedTensor::Kind::Input)
+ {
+ sig.add_input(::moco::TensorName(parsed_tensor->name()));
+ }
+ else
+ {
+ sig.add_output(::moco::TensorName(parsed_tensor->name()));
+ }
+ if (parsed_tensor->hasShape())
+ sig.shape(parsed_tensor->name(), parsed_tensor->shape());
+ }
+
+ // get loco::Graph
+ ::moco::tf::Frontend moco;
+
+ // After converting, all shapes will be determined.
+ auto loco_graph = moco.load(sig, pb_path, ::moco::tf::Frontend::FileType::Binary);
+
+ // Set input and output from loco graph.
+ setInputOutputFromGraph(loco_graph, parsed_tensors);
+
+ // set member vars
+ _loco_graph = std::move(loco_graph);
+ _sess = stdex::make_unique<locomotiv::Session>(_loco_graph.get());
+}
+
+void Backend::prepare(const std::function<void(nnkit::TensorContext &)> &f)
+{
+ using nncc::core::ADT::tensor::Buffer;
+ using nncc::core::ADT::tensor::make_buffer;
+ using nncc::core::ADT::tensor::LexicalLayout;
+
+ // allocate memory for inputs of loco interpreter
+ std::vector<std::unique_ptr<Buffer<float>>> buf_list; // TODO Support more types other than float
+
+ for (int n = 0; n < _inputs.size(); n++)
+ {
+ auto buf = make_buffer<float, LexicalLayout>(_inputs.at(n)->shape());
+ buf_list.emplace_back(stdex::make_unique<nncc::core::ADT::tensor::Buffer<float>>(buf));
+ }
+
+ // fill test input values
+ InputTensorContext ctx(_inputs, buf_list);
+ f(ctx);
+
+ // set input of locomotiv
+ for (int n = 0; n < buf_list.size(); n++)
+ {
+ auto buf = buf_list.at(n).get();
+ auto node_data = locomotiv::make_data(*buf);
+ _sess->set_input(n, std::move(node_data));
+ }
+}
+
+void Backend::run(void) { _sess->infer(); }
+
+void Backend::teardown(const std::function<void(nnkit::TensorContext &)> &f)
+{
+ // get output
+ OutputTensorContext ctx(_outputs, _sess.get());
+ f(ctx);
+}
+
+} // namespace tf
+} // namespace moco
+} // namespace support
+} // namespace nnkit
diff --git a/compiler/nnkit-mocotf/support/src/InputTensorContext.cpp b/compiler/nnkit-mocotf/support/src/InputTensorContext.cpp
new file mode 100644
index 000000000..98f500730
--- /dev/null
+++ b/compiler/nnkit-mocotf/support/src/InputTensorContext.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "InputTensorContext.h"
+#include "nnkit/TensorContext.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+
+namespace nnkit
+{
+namespace support
+{
+namespace moco
+{
+namespace tf
+{
+
+void InputTensorContext::getMutableFloatTensor(uint32_t n,
+ const nnkit::TensorContext::TypedAccessor<float> &f)
+{
+ auto buf = _buffers.at(n).get();
+ f(*this, n, *buf);
+}
+
+void InputTensorContext::getConstFloatTensor(
+ uint32_t n, const nnkit::TensorContext::TypedReader<float> &f) const
+{
+ auto buf = _buffers.at(n).get();
+ f(*this, n, *buf);
+}
+
+} // namespace tf
+} // namespace moco
+} // namespace support
+} // namespace nnkit
diff --git a/compiler/nnkit-mocotf/support/src/InputTensorContext.h b/compiler/nnkit-mocotf/support/src/InputTensorContext.h
new file mode 100644
index 000000000..bbb25adea
--- /dev/null
+++ b/compiler/nnkit-mocotf/support/src/InputTensorContext.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_MOCO_TF_INPUT_TENSOR_CONTEXT_H__
+#define __NNKIT_SUPPORT_MOCO_TF_INPUT_TENSOR_CONTEXT_H__
+
+#include "TensorContext.h"
+
+#include <nnkit/TensorContext.h>
+#include <nnkit/support/tftestinfo/ParsedTensor.h>
+
+#include <locomotiv/NodeData.h>
+
+#include <vector>
+#include <memory>
+
+namespace nnkit
+{
+namespace support
+{
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Class for the context of input tensors
+ */
+class InputTensorContext final : public TensorContext
+{
+ using Buffers = std::vector<std::unique_ptr<nncc::core::ADT::tensor::Buffer<float>>>;
+
+public:
+ InputTensorContext(const ParsedTensors &parsed_tensors, const Buffers &buffers)
+ : TensorContext(parsed_tensors), _buffers(buffers)
+ { /* empty */
+ }
+
+ InputTensorContext(const InputTensorContext &) = delete;
+ InputTensorContext(InputTensorContext &&) = delete;
+
+public:
+ void getMutableFloatTensor(uint32_t n,
+ const nnkit::TensorContext::TypedAccessor<float> &f) override;
+
+ void getConstFloatTensor(uint32_t n,
+ const nnkit::TensorContext::TypedReader<float> &f) const override;
+
+private:
+ const Buffers &_buffers;
+};
+
+} // namespace tf
+} // namespace moco
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_MOCO_TF_INPUT_TENSOR_CONTEXT_H__
diff --git a/compiler/nnkit-mocotf/support/src/OutputTensorContext.cpp b/compiler/nnkit-mocotf/support/src/OutputTensorContext.cpp
new file mode 100644
index 000000000..2b36fc67a
--- /dev/null
+++ b/compiler/nnkit-mocotf/support/src/OutputTensorContext.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OutputTensorContext.h"
+#include "nnkit/TensorContext.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+
+namespace nnkit
+{
+namespace support
+{
+namespace moco
+{
+namespace tf
+{
+
+void OutputTensorContext::getConstFloatTensor(
+ uint32_t n, const nnkit::TensorContext::TypedReader<float> &f) const
+{ // for output
+ using nncc::core::ADT::tensor::LexicalLayout;
+ using nncc::core::ADT::tensor::make_overlay;
+
+ auto *buf = const_cast<float *>(_sess->get_output(n)->as_f32_bufptr()->base());
+ auto overlay = make_overlay<float, LexicalLayout>(shape(n), buf);
+
+ f(*this, n, overlay);
+}
+
+} // namespace tf
+} // namespace moco
+} // namespace support
+} // namespace nnkit
diff --git a/compiler/nnkit-mocotf/support/src/OutputTensorContext.h b/compiler/nnkit-mocotf/support/src/OutputTensorContext.h
new file mode 100644
index 000000000..8cb8d8bf0
--- /dev/null
+++ b/compiler/nnkit-mocotf/support/src/OutputTensorContext.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_MOCO_TF_OUTPUT_TENSOR_CONTEXT_H__
+#define __NNKIT_SUPPORT_MOCO_TF_OUTPUT_TENSOR_CONTEXT_H__
+
+#include "TensorContext.h"
+#include "nnkit/TensorContext.h"
+#include "nnkit/support/tftestinfo/ParsedTensor.h"
+
+#include <locomotiv/Session.h>
+#include <locomotiv/NodeData.h>
+
+#include <vector>
+#include <memory>
+
+namespace nnkit
+{
+namespace support
+{
+namespace moco
+{
+namespace tf
+{
+
+/**
+ * @brief Class for the context of output tensors
+ */
+class OutputTensorContext final : public TensorContext
+{
+public:
+ OutputTensorContext(const ParsedTensors &parsed_tensors, locomotiv::Session *sess)
+ : TensorContext(parsed_tensors), _sess(sess)
+ { /* empty */
+ }
+
+ OutputTensorContext(const OutputTensorContext &) = delete;
+ OutputTensorContext(OutputTensorContext &&) = delete;
+
+public:
+ void getMutableFloatTensor(uint32_t n,
+ const nnkit::TensorContext::TypedAccessor<float> &f) override
+ {
+ throw std::runtime_error("This method is for input and should not be called");
+ }
+
+ void getConstFloatTensor(uint32_t n,
+ const nnkit::TensorContext::TypedReader<float> &f) const override;
+
+private:
+ locomotiv::Session *_sess;
+};
+
+} // namespace tf
+} // namespace moco
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_MOCO_TF_OUTPUT_TENSOR_CONTEXT_H__
diff --git a/compiler/nnkit-mocotf/support/src/TensorContext.h b/compiler/nnkit-mocotf/support/src/TensorContext.h
new file mode 100644
index 000000000..ebebd670b
--- /dev/null
+++ b/compiler/nnkit-mocotf/support/src/TensorContext.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_MOCO_TF_TENSOR_CONTEXT_H__
+#define __NNKIT_SUPPORT_MOCO_TF_TENSOR_CONTEXT_H__
+
+#include "nnkit/TensorContext.h"
+#include "nnkit/support/tftestinfo/ParsedTensor.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+
+#include <vector>
+#include <memory>
+
+namespace nnkit
+{
+namespace support
+{
+namespace moco
+{
+namespace tf
+{
+using nnkit::support::tftestinfo::ParsedTensor;
+using ParsedTensors = std::vector<std::unique_ptr<ParsedTensor>>;
+
+/**
+ * @brief Parent class of InputTensorContext and OutputTensorContext
+ */
+class TensorContext : public nnkit::TensorContext
+{
+public:
+ TensorContext(const ParsedTensors &tensors) : _tensors(tensors)
+ {
+ // empty
+ }
+
+ TensorContext(const TensorContext &) = delete;
+ TensorContext(TensorContext &&) = delete;
+
+public:
+ uint32_t size(void) const override { return _tensors.size(); }
+
+public:
+ std::string name(uint32_t n) const override // name with ":0", ":1", etc
+ {
+ return _tensors.at(n)->name();
+ }
+
+public:
+ nncc::core::ADT::tensor::Shape shape(uint32_t n) const override
+ {
+ return _tensors.at(n)->shape();
+ }
+
+public:
+ // Float (fp32) tensor support
+ bool isFloatTensor(uint32_t n) const override { return _tensors.at(n)->isFloatTensor(); }
+
+ virtual void getMutableFloatTensor(uint32_t n,
+ const nnkit::TensorContext::TypedAccessor<float> &f) = 0;
+ virtual void getConstFloatTensor(uint32_t n,
+ const nnkit::TensorContext::TypedReader<float> &f) const = 0;
+
+private:
+ const ParsedTensors &_tensors;
+};
+
+} // namespace tf
+} // namespace moco
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_MOCO_TF_TENSOR_CONTEXT_H__
diff --git a/compiler/nnkit-onnxrt/CMakeLists.txt b/compiler/nnkit-onnxrt/CMakeLists.txt
new file mode 100644
index 000000000..f72b03a02
--- /dev/null
+++ b/compiler/nnkit-onnxrt/CMakeLists.txt
@@ -0,0 +1,8 @@
+nnas_find_package(ONNXRuntime QUIET)
+
+if(NOT ONNXRuntime_FOUND)
+ return()
+endif(NOT ONNXRuntime_FOUND)
+
+add_subdirectory(support)
+add_subdirectory(backend)
diff --git a/compiler/nnkit-onnxrt/backend/Backend.cpp b/compiler/nnkit-onnxrt/backend/Backend.cpp
new file mode 100644
index 000000000..9247fbf34
--- /dev/null
+++ b/compiler/nnkit-onnxrt/backend/Backend.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnkit/support/onnx/Backend.h"
+
+#include <nnkit/CmdlineArguments.h>
+#include <stdex/Memory.h>
+
+#include <cassert>
+
+extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
+{
+ assert(args.size() == 1); // args.at[0] : onnx file
+
+ return stdex::make_unique<::nnkit::support::onnx::Backend>(args.at(0));
+}
diff --git a/compiler/nnkit-onnxrt/backend/CMakeLists.txt b/compiler/nnkit-onnxrt/backend/CMakeLists.txt
new file mode 100644
index 000000000..b00e5593d
--- /dev/null
+++ b/compiler/nnkit-onnxrt/backend/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_library(nnkit_onnx_backend SHARED Backend.cpp)
+target_link_libraries(nnkit_onnx_backend nnkit_support_onnx)
+target_link_libraries(nnkit_onnx_backend stdex)
diff --git a/compiler/nnkit-onnxrt/requires.cmake b/compiler/nnkit-onnxrt/requires.cmake
new file mode 100644
index 000000000..d370fc17c
--- /dev/null
+++ b/compiler/nnkit-onnxrt/requires.cmake
@@ -0,0 +1,2 @@
+require("stdex")
+require("nnkit-intf")
diff --git a/compiler/nnkit-onnxrt/support/CMakeLists.txt b/compiler/nnkit-onnxrt/support/CMakeLists.txt
new file mode 100644
index 000000000..1b51d4ed8
--- /dev/null
+++ b/compiler/nnkit-onnxrt/support/CMakeLists.txt
@@ -0,0 +1,10 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_library(nnkit_support_onnx-1.4 STATIC ${SOURCES})
+set_target_properties(nnkit_support_onnx-1.4 PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(nnkit_support_onnx-1.4 PUBLIC include)
+target_link_libraries(nnkit_support_onnx-1.4 nnkit_intf_backend)
+target_link_libraries(nnkit_support_onnx-1.4 onnxruntime)
+target_link_libraries(nnkit_support_onnx-1.4 stdex)
+
+add_library(nnkit_support_onnx ALIAS nnkit_support_onnx-1.4)
diff --git a/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/Allocator.h b/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/Allocator.h
new file mode 100644
index 000000000..bf4d735ad
--- /dev/null
+++ b/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/Allocator.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_ONNX_ALLOCATOR_H__
+#define __NNKIT_SUPPORT_ONNX_ALLOCATOR_H__
+
+#include <onnxruntime_c_api.h>
+
+#include <atomic>
+
+namespace nnkit
+{
+namespace support
+{
+namespace onnx
+{
+
+class Allocator final : public OrtAllocator
+{
+public:
+ Allocator(void);
+ ~Allocator(void);
+
+ void *Alloc(size_t size);
+ void Free(void *p);
+ const OrtAllocatorInfo *Info(void) const;
+
+ void LeakCheck(void);
+
+ // Disallow copying
+ Allocator(const Allocator &) = delete;
+ Allocator &operator=(const Allocator &) = delete;
+
+private:
+ std::atomic<size_t> _memory_inuse{0};
+ OrtAllocatorInfo *_cpu_allocator_info;
+};
+
+} // namespace onnx
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_ONNX_ALLOCATOR_H__
diff --git a/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/Backend.h b/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/Backend.h
new file mode 100644
index 000000000..be68007d7
--- /dev/null
+++ b/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/Backend.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_ONNX_BACKEND_H__
+#define __NNKIT_SUPPORT_ONNX_BACKEND_H__
+
+#include <nnkit/Backend.h>
+
+#include "nnkit/support/onnx/Runner.h"
+
+namespace nnkit
+{
+namespace support
+{
+namespace onnx
+{
+
+class Backend final : public nnkit::Backend
+{
+public:
+ Backend(const std::string &onnx_path) : _runner(onnx_path)
+ {
+ // DO NOTHING
+ }
+
+ void prepare(const std::function<void(nnkit::TensorContext &)> &f) override;
+
+ void run(void) override;
+
+ void teardown(const std::function<void(nnkit::TensorContext &)> &f) override;
+
+private:
+ Runner _runner;
+};
+
+} // namespace onnx
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_ONNX_BACKEND_H__
diff --git a/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/Runner.h b/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/Runner.h
new file mode 100644
index 000000000..f1b7b5424
--- /dev/null
+++ b/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/Runner.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_ONNX_RUNNER_H__
+#define __NNKIT_SUPPORT_ONNX_RUNNER_H__
+
+#include "nnkit/support/onnx/Allocator.h"
+#include "nnkit/support/onnx/TensorSet.h"
+
+#include <onnxruntime_c_api.h>
+
+#include <memory>
+
+namespace nnkit
+{
+namespace support
+{
+namespace onnx
+{
+
+class Runner
+{
+public:
+ Runner(const std::string &path);
+ ~Runner(void);
+
+ void prepareInputs(void);
+ void prepareOutputs(void);
+
+ TensorSet &inputs(void) { return *_inputs; }
+ TensorSet &outputs(void) { return *_outputs; }
+
+ void run(void);
+
+public:
+ // Disallow copy
+ Runner(const Runner &) = delete;
+ Runner &operator=(const Runner &) = delete;
+
+private:
+ OrtEnv *_env;
+ OrtSession *_session;
+
+ std::unique_ptr<Allocator> _allocator;
+
+ std::unique_ptr<TensorSet> _inputs;
+ std::unique_ptr<TensorSet> _outputs;
+};
+
+} // namespace onnx
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_ONNX_RUNNER_H__
diff --git a/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/Status.h b/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/Status.h
new file mode 100644
index 000000000..085306960
--- /dev/null
+++ b/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/Status.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_ONNX_STATUS_H__
+#define __NNKIT_SUPPORT_ONNX_STATUS_H__
+
+#include <onnxruntime_c_api.h>
+
+#include <string>
+#include <stdexcept>
+
+namespace nnkit
+{
+namespace support
+{
+namespace onnx
+{
+
+class Status
+{
+public:
+ Status() : _status(nullptr)
+ {
+ // DO NOTHING
+ }
+
+ Status(OrtStatus *status) : _status(status)
+ {
+ // DO NOTHING
+ }
+
+ ~Status()
+ {
+ if (_status)
+ {
+ OrtReleaseStatus(_status);
+ }
+ }
+
+ Status &operator=(OrtStatus *status)
+ {
+ if (_status)
+ {
+ OrtReleaseStatus(_status);
+ }
+ _status = status;
+ return *this;
+ }
+
+ bool isError(void) { return (_status != nullptr); }
+
+ void throwOnError(void)
+ {
+ if (_status)
+ {
+ const char *msg = OrtGetErrorMessage(_status);
+ std::string err{msg};
+ OrtReleaseStatus(_status);
+ throw std::runtime_error{err};
+ }
+ }
+
+private:
+ // NOTE nullptr for OrtStatus* indicates success
+ OrtStatus *_status;
+};
+
+} // namespace onnx
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_ONNX_STATUS_H__
diff --git a/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/TensorContext.h b/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/TensorContext.h
new file mode 100644
index 000000000..d76ed0e21
--- /dev/null
+++ b/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/TensorContext.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_ONNX_TENSOR_CONTEXT_H__
+#define __NNKIT_SUPPORT_ONNX_TENSOR_CONTEXT_H__
+
+#include "nnkit/support/onnx/TensorSet.h"
+
+#include <nnkit/TensorContext.h>
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+
+namespace nnkit
+{
+namespace support
+{
+namespace onnx
+{
+
+class TensorContext final : public nnkit::TensorContext
+{
+public:
+ TensorContext(TensorSet &tensors) : _tensors(tensors)
+ {
+ // DO NOTHING
+ }
+
+ uint32_t size(void) const override { return _tensors.size(); }
+
+ std::string name(uint32_t n) const override { return std::string{_tensors.name(n)}; }
+
+ nncc::core::ADT::tensor::Shape shape(uint32_t n) const override
+ {
+ const std::vector<size_t> &dims = _tensors.dim(n);
+
+ nncc::core::ADT::tensor::Shape shape;
+ shape.resize(dims.size());
+ for (size_t i = 0; i < dims.size(); ++i)
+ {
+ shape.dim(i) = dims[i];
+ }
+ return shape;
+ }
+
+ bool isFloatTensor(uint32_t n) const override
+ {
+ return (_tensors.type(n) == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT);
+ }
+
+ void getMutableFloatTensor(uint32_t n, const TensorContext::TypedAccessor<float> &f) override
+ {
+ if (_tensors.type(n) != ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT)
+ {
+ throw std::runtime_error{"type mismatch"};
+ }
+
+ using nncc::core::ADT::tensor::LexicalLayout;
+ using nncc::core::ADT::tensor::make_overlay;
+
+ Status status;
+
+ OrtValue *base = _tensors.mutable_tensor(n);
+ float *data;
+
+ status = OrtGetTensorMutableData(base, (void **)&data);
+ status.throwOnError();
+
+ auto overlay = make_overlay<float, LexicalLayout>(shape(n), data);
+
+ f(*this, n, overlay);
+ }
+
+ void getConstFloatTensor(uint32_t n, const TensorContext::TypedReader<float> &f) const override
+ {
+ if (_tensors.type(n) != ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT)
+ {
+ throw std::runtime_error{"type mismatch"};
+ }
+
+ using nncc::core::ADT::tensor::LexicalLayout;
+ using nncc::core::ADT::tensor::make_overlay;
+
+ Status status;
+
+ OrtValue *base = _tensors.mutable_tensor(n);
+ float *data;
+
+ status = OrtGetTensorMutableData(base, (void **)&data);
+ status.throwOnError();
+
+ auto overlay = make_overlay<float, LexicalLayout>(shape(n), data);
+
+ f(*this, n, overlay);
+ }
+
+private:
+ TensorSet &_tensors;
+};
+
+} // namespace onnx
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_ONNX_TENSOR_CONTEXT_H__
diff --git a/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/TensorSet.h b/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/TensorSet.h
new file mode 100644
index 000000000..b38fc9bb0
--- /dev/null
+++ b/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/TensorSet.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_ONNX_TENSOR_SET_H__
+#define __NNKIT_SUPPORT_ONNX_TENSOR_SET_H__
+
+#include "nnkit/support/onnx/Allocator.h"
+#include "nnkit/support/onnx/Status.h"
+
+#include <onnxruntime_c_api.h>
+
+#include <string>
+#include <vector>
+#include <cassert>
+
+namespace nnkit
+{
+namespace support
+{
+namespace onnx
+{
+
+class TensorSet final
+{
+public:
+ TensorSet(Allocator *allocator, size_t nums)
+ : _allocator(allocator), _names(nums), _types(nums), _dims(nums), _tensors(nums, nullptr)
+ {
+ // DO NOTHING
+ }
+
+ ~TensorSet(void)
+ {
+ for (auto it : _tensors)
+ {
+ OrtReleaseValue(it);
+ }
+ }
+
+ void set(size_t index, const std::string &name, ONNXTensorElementDataType type,
+ const std::vector<size_t> &dims)
+ {
+ _names[index] = name;
+ _types[index] = type;
+ _dims[index] = dims;
+
+ Status status;
+
+ status =
+ OrtCreateTensorAsOrtValue(_allocator, dims.data(), dims.size(), type, &_tensors[index]);
+ status.throwOnError();
+
+ assert(OrtIsTensor(_tensors[index]));
+ }
+
+ size_t size(void) { return _names.size(); }
+
+ const char *name(size_t index) { return _names[index].c_str(); }
+ const std::vector<std::string> &names(void) { return _names; }
+
+ ONNXTensorElementDataType type(size_t index) { return _types[index]; }
+
+ const std::vector<size_t> &dim(size_t index) { return _dims[index]; }
+
+ const OrtValue *tensor(size_t index) { return _tensors[index]; }
+ const std::vector<OrtValue *> &tensors(void) { return _tensors; }
+
+ OrtValue *mutable_tensor(size_t index) { return _tensors[index]; }
+ std::vector<OrtValue *> mutable_tensors(void) { return _tensors; }
+
+private:
+ Allocator *_allocator;
+
+ std::vector<std::string> _names;
+ std::vector<ONNXTensorElementDataType> _types;
+ std::vector<std::vector<size_t>> _dims;
+ std::vector<OrtValue *> _tensors;
+};
+
+} // namespace onnx
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_ONNX_TENSOR_SET_H__
diff --git a/compiler/nnkit-onnxrt/support/src/Allocator.cpp b/compiler/nnkit-onnxrt/support/src/Allocator.cpp
new file mode 100644
index 000000000..331f4ec50
--- /dev/null
+++ b/compiler/nnkit-onnxrt/support/src/Allocator.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnkit/support/onnx/Allocator.h"
+#include "nnkit/support/onnx/Status.h"
+
+#include <stdexcept>
+
+namespace nnkit
+{
+namespace support
+{
+namespace onnx
+{
+
+Allocator::Allocator(void)
+{
+ OrtAllocator::version = ORT_API_VERSION;
+ OrtAllocator::Alloc = [](OrtAllocator *this_, size_t size) {
+ return static_cast<Allocator *>(this_)->Alloc(size);
+ };
+ OrtAllocator::Free = [](OrtAllocator *this_, void *p) {
+ static_cast<Allocator *>(this_)->Free(p);
+ };
+ OrtAllocator::Info = [](const OrtAllocator *this_) {
+ return static_cast<const Allocator *>(this_)->Info();
+ };
+
+ Status status;
+ status = OrtCreateCpuAllocatorInfo(OrtDeviceAllocator, OrtMemTypeDefault, &_cpu_allocator_info);
+ status.throwOnError();
+}
+
+Allocator::~Allocator(void) { OrtReleaseAllocatorInfo(_cpu_allocator_info); }
+
+void *Allocator::Alloc(size_t size)
+{
+ // NOTE The extra_len is added to check resource leak.
+ //
+ // This Alloc function will allocate the given size with extra_len.
+ // The first extra_len will save the allocated memory size and
+ // the user will use address from the allocated memory plus extra_len.
+ // The size value that saved in extra_len is used to Free function
+ // to check resource leak. The size value uses in _memory_inuse.
+ constexpr size_t extra_len = sizeof(size_t);
+ _memory_inuse.fetch_add(size += extra_len);
+ void *p = ::malloc(size);
+ *(size_t *)p = size;
+ return (char *)p + extra_len;
+}
+
+void Allocator::Free(void *p)
+{
+ constexpr size_t extra_len = sizeof(size_t);
+ if (!p)
+ return;
+ p = (char *)p - extra_len;
+ size_t len = *(size_t *)p;
+ _memory_inuse.fetch_sub(len);
+ return ::free(p);
+}
+
+const OrtAllocatorInfo *Allocator::Info(void) const { return _cpu_allocator_info; }
+
+void Allocator::LeakCheck(void)
+{
+ if (_memory_inuse.load())
+ {
+ throw std::runtime_error{"memory leak!!!"};
+ }
+}
+
+} // namespace onnx
+} // namespace support
+} // namespace nnkit
diff --git a/compiler/nnkit-onnxrt/support/src/Backend.cpp b/compiler/nnkit-onnxrt/support/src/Backend.cpp
new file mode 100644
index 000000000..af9e54c26
--- /dev/null
+++ b/compiler/nnkit-onnxrt/support/src/Backend.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnkit/support/onnx/Backend.h"
+#include "nnkit/support/onnx/TensorContext.h"
+
+namespace nnkit
+{
+namespace support
+{
+namespace onnx
+{
+
+void Backend::prepare(const std::function<void(nnkit::TensorContext &)> &f)
+{
+ // Prepare input and output tensors
+ _runner.prepareInputs();
+ _runner.prepareOutputs();
+
+ TensorContext ctx(_runner.inputs());
+ f(ctx);
+}
+
+void Backend::run(void) { _runner.run(); }
+
+void Backend::teardown(const std::function<void(nnkit::TensorContext &)> &f)
+{
+ TensorContext ctx(_runner.outputs());
+ f(ctx);
+}
+
+} // namespace onnx
+} // namespace support
+} // namespace nnkit
diff --git a/compiler/nnkit-onnxrt/support/src/Runner.cpp b/compiler/nnkit-onnxrt/support/src/Runner.cpp
new file mode 100644
index 000000000..bc6a81a5c
--- /dev/null
+++ b/compiler/nnkit-onnxrt/support/src/Runner.cpp
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnkit/support/onnx/Runner.h"
+#include "nnkit/support/onnx/Status.h"
+
+#include <stdex/Memory.h>
+#include <cassert>
+
+namespace nnkit
+{
+namespace support
+{
+namespace onnx
+{
+
+Runner::Runner(const std::string &path) : _allocator(stdex::make_unique<Allocator>())
+{
+ Status status;
+
+ status = OrtCreateEnv(ORT_LOGGING_LEVEL_WARNING, "nnkit", &_env);
+ assert(!status.isError());
+
+ status = OrtCreateSession(_env, path.c_str(), nullptr, &_session);
+ assert(!status.isError());
+}
+
+Runner::~Runner(void)
+{
+ if (_session)
+ {
+ OrtReleaseSession(_session);
+ }
+
+ if (_env)
+ {
+ OrtReleaseEnv(_env);
+ }
+}
+
+void Runner::prepareInputs(void)
+{
+ Status status;
+
+ assert(_inputs == nullptr);
+
+ size_t num_input_nodes;
+ status = OrtSessionGetInputCount(_session, &num_input_nodes);
+ status.throwOnError();
+
+ _inputs = stdex::make_unique<TensorSet>(_allocator.get(), num_input_nodes);
+
+ for (size_t i = 0; i < num_input_nodes; ++i)
+ {
+ char *input_name;
+ status = OrtSessionGetInputName(_session, i, _allocator.get(), &input_name);
+ status.throwOnError();
+
+ assert(input_name != nullptr);
+
+ std::string name{input_name};
+ _allocator->Free(input_name);
+
+ OrtTypeInfo *typeinfo;
+ status = OrtSessionGetInputTypeInfo(_session, i, &typeinfo);
+ status.throwOnError();
+
+ const OrtTensorTypeAndShapeInfo *tensor_info = OrtCastTypeInfoToTensorInfo(typeinfo);
+ ONNXTensorElementDataType type = OrtGetTensorElementType(tensor_info);
+
+ uint32_t num_dims = OrtGetNumOfDimensions(tensor_info);
+ std::vector<size_t> dims(num_dims);
+ OrtGetDimensions(tensor_info, (int64_t *)dims.data(), num_dims);
+
+ // NOTE To run OnnxRuntime, the total size of input tensor must be fixed.
+ // In the present code, the unknown shape that is -1 is arbitrarily changed to 1.
+ //
+ // TODO Add user argument related to unknown shape
+ //
+ for (uint32_t j = 0; j < num_dims; ++j)
+ {
+ if (dims[j] == -1)
+ {
+ dims[j] = 1;
+ }
+ }
+ OrtReleaseTypeInfo(typeinfo);
+
+ _inputs->set(i, name, type, dims);
+ }
+}
+
+void Runner::prepareOutputs(void)
+{
+ Status status;
+
+ assert(_outputs == nullptr);
+
+ size_t num_output_nodes;
+ status = OrtSessionGetOutputCount(_session, &num_output_nodes);
+ status.throwOnError();
+
+ _outputs = stdex::make_unique<TensorSet>(_allocator.get(), num_output_nodes);
+
+ for (size_t i = 0; i < num_output_nodes; ++i)
+ {
+ char *output_name;
+ status = OrtSessionGetOutputName(_session, i, _allocator.get(), &output_name);
+ status.throwOnError();
+
+ assert(output_name != nullptr);
+
+ std::string name{output_name};
+ _allocator->Free(output_name);
+
+ OrtTypeInfo *typeinfo;
+ status = OrtSessionGetOutputTypeInfo(_session, i, &typeinfo);
+ status.throwOnError();
+
+ const OrtTensorTypeAndShapeInfo *tensor_info = OrtCastTypeInfoToTensorInfo(typeinfo);
+ ONNXTensorElementDataType type = OrtGetTensorElementType(tensor_info);
+
+ uint32_t num_dims = OrtGetNumOfDimensions(tensor_info);
+ std::vector<size_t> dims(num_dims);
+ OrtGetDimensions(tensor_info, (int64_t *)dims.data(), num_dims);
+
+ // NOTE To run OnnxRuntime, the total size of output tensor must be fixed.
+ // In the present code, the unknown shape that is -1 is arbitrarily changed to 1.
+ //
+ // TODO Add user argument related to unknown shape
+ //
+ for (uint32_t j = 0; j < num_dims; ++j)
+ {
+ if (dims[j] == -1)
+ {
+ dims[j] = 1;
+ }
+ }
+ OrtReleaseTypeInfo(typeinfo);
+
+ _outputs->set(i, name, type, dims);
+ }
+}
+
+void Runner::run(void)
+{
+ Status status;
+
+ auto pinput_names = _inputs->names();
+ std::vector<const char *> input_names(pinput_names.size());
+ for (size_t i = 0; i < pinput_names.size(); ++i)
+ {
+ input_names[i] = pinput_names[i].c_str();
+ }
+
+ auto poutput_names = _outputs->names();
+ std::vector<const char *> output_names(poutput_names.size());
+ for (size_t i = 0; i < poutput_names.size(); ++i)
+ {
+ output_names[i] = poutput_names[i].c_str();
+ }
+
+ status = OrtRun(_session, NULL, input_names.data(), _inputs->tensors().data(), _inputs->size(),
+ output_names.data(), _outputs->size(), _outputs->mutable_tensors().data());
+ status.throwOnError();
+}
+
+} // namespace onnx
+} // namespace support
+} // namespace nnkit
diff --git a/compiler/nnkit-tf/CMakeLists.txt b/compiler/nnkit-tf/CMakeLists.txt
new file mode 100644
index 000000000..ea6131fc2
--- /dev/null
+++ b/compiler/nnkit-tf/CMakeLists.txt
@@ -0,0 +1,8 @@
+nnas_find_package(TensorFlow QUIET)
+
+if(NOT TensorFlow_FOUND)
+ return()
+endif(NOT TensorFlow_FOUND)
+
+add_subdirectory(support)
+add_subdirectory(backend)
diff --git a/compiler/nnkit-tf/backend/Backend.cpp b/compiler/nnkit-tf/backend/Backend.cpp
new file mode 100644
index 000000000..ee0476469
--- /dev/null
+++ b/compiler/nnkit-tf/backend/Backend.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnkit/support/tf/Backend.h"
+
+#include <nnkit/CmdlineArguments.h>
+#include <stdex/Memory.h>
+
+#include <cassert>
+
+extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
+{
+ using stdex::make_unique;
+
+ assert(args.size() == 2); // args.at[0] : test.pb path, argas.at[1]: test.info path
+
+ return make_unique<::nnkit::support::tf::Backend>(args.at(0), args.at(1));
+}
diff --git a/compiler/nnkit-tf/backend/CMakeLists.txt b/compiler/nnkit-tf/backend/CMakeLists.txt
new file mode 100644
index 000000000..dd2e469e8
--- /dev/null
+++ b/compiler/nnkit-tf/backend/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_library(nnkit_tf_backend SHARED Backend.cpp)
+target_link_libraries(nnkit_tf_backend nnkit_support_tf)
+target_link_libraries(nnkit_tf_backend stdex)
diff --git a/compiler/nnkit-tf/requires.cmake b/compiler/nnkit-tf/requires.cmake
new file mode 100644
index 000000000..4b9fd68b2
--- /dev/null
+++ b/compiler/nnkit-tf/requires.cmake
@@ -0,0 +1,3 @@
+require("stdex")
+require("tfinfo")
+require("nnkit-intf")
diff --git a/compiler/nnkit-tf/support/CMakeLists.txt b/compiler/nnkit-tf/support/CMakeLists.txt
new file mode 100644
index 000000000..471a1c70f
--- /dev/null
+++ b/compiler/nnkit-tf/support/CMakeLists.txt
@@ -0,0 +1,9 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_library(nnkit_support_tf-1.12 STATIC ${SOURCES})
+set_target_properties(nnkit_support_tf-1.12 PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(nnkit_support_tf-1.12 PUBLIC include)
+target_link_libraries(nnkit_support_tf-1.12 nnkit_intf_backend stdex nnkit_support_tftestinfo)
+target_link_libraries(nnkit_support_tf-1.12 tensorflow)
+
+add_library(nnkit_support_tf ALIAS nnkit_support_tf-1.12)
diff --git a/compiler/nnkit-tf/support/include/nnkit/support/tf/Backend.h b/compiler/nnkit-tf/support/include/nnkit/support/tf/Backend.h
new file mode 100644
index 000000000..c1022c14e
--- /dev/null
+++ b/compiler/nnkit-tf/support/include/nnkit/support/tf/Backend.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_TF_BACKEND_H__
+#define __NNKIT_SUPPORT_TF_BACKEND_H__
+
+#include "nnkit/support/tf/TensorDataMap.h"
+#include "nnkit/support/tf/TensorContext.h"
+#include "nnkit/support/tf/Runner.h"
+#include "nnkit/support/tftestinfo/ParsedTensor.h"
+
+#include <nnkit/Backend.h>
+
+#include <memory>
+#include <vector>
+
+namespace nnkit
+{
+namespace support
+{
+namespace tf
+{
+
+using nnkit::support::tftestinfo::ParsedTensor;
+
+class Backend final : public nnkit::Backend
+{
+public:
+ Backend() = delete;
+ Backend(const Backend &) = delete;
+ Backend(Backend &&) = delete;
+
+ Backend(const char *pb_path, const char *info_path);
+
+ void prepare(const std::function<void(nnkit::TensorContext &)> &f) override;
+
+ void run(void) override;
+
+ void teardown(const std::function<void(nnkit::TensorContext &)> &f) override;
+
+private:
+ std::vector<std::unique_ptr<ParsedTensor>> _inputs;
+ std::vector<std::unique_ptr<ParsedTensor>> _outputs;
+
+ TensorDataMap _data_map;
+
+ Runner _tf_runner;
+};
+
+} // namespace tf
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_TF_BACKEND_H__
diff --git a/compiler/nnkit-tf/support/include/nnkit/support/tf/Runner.h b/compiler/nnkit-tf/support/include/nnkit/support/tf/Runner.h
new file mode 100644
index 000000000..f782ea879
--- /dev/null
+++ b/compiler/nnkit-tf/support/include/nnkit/support/tf/Runner.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_TF_RUNNER_H__
+#define __NNKIT_SUPPORT_TF_RUNNER_H__
+
+#include "nnkit/support/tftestinfo/ParsedTensor.h"
+#include "nnkit/support/tf/TensorDataMap.h"
+#include <angkor/TensorShape.h>
+
+#include <tensorflow/c/c_api.h>
+
+#include <vector>
+
+namespace nnkit
+{
+namespace support
+{
+namespace tf
+{
+
+using nnkit::support::tftestinfo::ParsedTensor;
+
+class Runner final
+{
+public:
+ enum class DataType
+ {
+ Unknown, // Unknown type (serves as a default value)
+
+ U8, // 8-bit unsigned integer
+ U16, // 16-bit unsigned integer
+ U32, // 32-bit unsigned integer
+ U64, // 64-bit unsigned integer
+
+ S8, // 8-bit signed integer
+ S16, // 16-bit signed integer
+ S32, // 32-bit signed integer
+ S64, // 64-bit signed integer
+
+ FLOAT, // floating-point
+ };
+
+public:
+ Runner(const char *pb_path);
+
+ ~Runner();
+
+ /**
+ * @brief Get tensor shape from GraphDef for input tensor only.
+ *
+ * @note If the node cannot be found or shape you provided is wrong or not enough though shape
+ * must be needed because of unknown shape in GraphDef, it returns false.
+ */
+ bool getTensorShapeFromGraphDef(const std::unique_ptr<ParsedTensor> &tensor,
+ angkor::TensorShape &shape);
+
+ /**
+ * @brief Get tensor data type from GraphDef.
+ *
+ * @note If the node cannot be found or dtype of the node is unknown, it returns false.
+ */
+ bool getTensorDtypeFromGraphDef(const std::unique_ptr<ParsedTensor> &tensor,
+ Runner::DataType &dtype);
+
+ void prepareInputs(const std::vector<std::unique_ptr<ParsedTensor>> &inputs,
+ TensorDataMap &data_map);
+
+ void prepareOutputs(const std::vector<std::unique_ptr<ParsedTensor>> &outputs);
+
+ void run();
+
+ const std::vector<TF_Tensor *> &output() { return _output_tensors; }
+
+private:
+ TF_Graph *_graph;
+ TF_Session *_sess;
+
+ std::vector<TF_Output> _input_ops;
+ std::vector<TF_Tensor *> _input_tensors;
+
+ std::vector<TF_Output> _output_ops;
+ std::vector<TF_Tensor *> _output_tensors;
+
+ TF_Status *_status;
+};
+
+} // namespace tf
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_TF_RUNNER_H__
diff --git a/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorContext.h b/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorContext.h
new file mode 100644
index 000000000..f1ecd6c9c
--- /dev/null
+++ b/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorContext.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_TF_TENSOR_CONTEXT_H__
+#define __NNKIT_SUPPORT_TF_TENSOR_CONTEXT_H__
+
+#include "nnkit/TensorContext.h"
+#include "nnkit/support/tftestinfo/ParsedTensor.h"
+#include "nnkit/support/tf/TensorDataMap.h"
+
+#include <memory>
+
+namespace nnkit
+{
+namespace support
+{
+namespace tf
+{
+
+using nnkit::support::tftestinfo::ParsedTensor;
+
+class TensorContext final : public nnkit::TensorContext
+{
+public:
+ TensorContext(const std::vector<std::unique_ptr<ParsedTensor>> &tensors, TensorDataMap &data_map)
+ : _tensors(tensors), _data_map(data_map)
+ {
+ // empty
+ }
+
+ TensorContext(const TensorContext &) = delete; // prevent accidental use
+ TensorContext(TensorContext &&) = delete;
+
+public:
+ uint32_t size(void) const override { return _tensors.size(); }
+
+public:
+ std::string name(uint32_t n) const override // name with ":0", ":1", etc
+ {
+ return _tensors.at(n)->name();
+ }
+
+public:
+ nncc::core::ADT::tensor::Shape shape(uint32_t n) const override
+ {
+ return _tensors.at(n)->shape();
+ }
+
+public:
+ // Float (fp32) tensor support
+ bool isFloatTensor(uint32_t n) const override { return _tensors.at(n)->isFloatTensor(); }
+
+ void getMutableFloatTensor(uint32_t n,
+ const nnkit::TensorContext::TypedAccessor<float> &f) override;
+ void getConstFloatTensor(uint32_t n,
+ const nnkit::TensorContext::TypedReader<float> &f) const override;
+
+private:
+ const std::vector<std::unique_ptr<ParsedTensor>> &_tensors;
+ TensorDataMap &_data_map;
+};
+
+} // namespace tf
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_TF_TENSOR_CONTEXT_H__
diff --git a/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorDataMap.h b/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorDataMap.h
new file mode 100644
index 000000000..daa1a95b3
--- /dev/null
+++ b/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorDataMap.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_TF_TENSOR_DATA_MAP_H__
+#define __NNKIT_SUPPORT_TF_TENSOR_DATA_MAP_H__
+
+#include "nnkit/support/tftestinfo/ParsedTensor.h"
+
+#include <tensorflow/c/c_api.h>
+
+#include <stdexcept>
+#include <memory>
+#include <map>
+
+namespace nnkit
+{
+namespace support
+{
+namespace tf
+{
+
+using nnkit::support::tftestinfo::ParsedTensor;
+
+/**
+ * @brief Class to map parsed tensor and memory for tensor values.
+ * For parsed tensor, this memory is used to fill input or output values of graph.
+ */
+class TensorDataMap
+{
+public:
+ TensorDataMap() { /* empty */}
+
+ uint8_t *allocate(const ParsedTensor *parsed_tensor)
+ {
+ auto it = _data_map.find(parsed_tensor);
+ if (it != _data_map.end())
+ throw std::runtime_error("Already allocated");
+
+ int bytes = 0;
+ if (parsed_tensor->isFloatTensor())
+ bytes = sizeof(float);
+ else
+ throw std::runtime_error("Unsupported or wrong data type");
+
+ uint64_t size = num_elements(parsed_tensor->shape()) * bytes;
+ _data_map[parsed_tensor] = std::move(std::unique_ptr<uint8_t[]>(new uint8_t[size]));
+
+ return _data_map[parsed_tensor].get();
+ }
+
+ uint8_t *data(const ParsedTensor *parsed_tensor)
+ {
+ auto it = _data_map.find(parsed_tensor);
+ if (it == _data_map.end())
+ throw std::runtime_error("Cannot find parsed tensor");
+
+ return it->second.get();
+ }
+
+private:
+ std::map<const ParsedTensor *, std::unique_ptr<uint8_t[]>> _data_map;
+};
+
+} // namespace tf
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_TF_TENSOR_DATA_MAP_H__
diff --git a/compiler/nnkit-tf/support/src/Backend.cpp b/compiler/nnkit-tf/support/src/Backend.cpp
new file mode 100644
index 000000000..f28e05f74
--- /dev/null
+++ b/compiler/nnkit-tf/support/src/Backend.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnkit/support/tf/Backend.h"
+
+#include "nnkit/support/tftestinfo/ParsedTensor.h"
+#include "nnkit/support/tftestinfo/TensorInfoParser.h"
+#include "nnkit/support/tf/TensorDataMap.h"
+#include "nnkit/support/tf/TensorContext.h"
+#include "nnkit/support/tf/Runner.h"
+
+#include <angkor/TensorShape.h>
+
+#include <nnkit/Backend.h>
+
+#include <cstring> // memcpy
+
+namespace nnkit
+{
+namespace support
+{
+namespace tf
+{
+
+using nnkit::support::tftestinfo::ParsedTensor;
+
+Backend::Backend(const char *pb_path, const char *info_path) : _tf_runner(pb_path)
+{
+ auto parsed_tensors = nnkit::support::tftestinfo::parse(info_path);
+ for (auto &parsed_tensor : parsed_tensors)
+ {
+ if (parsed_tensor->kind() == ParsedTensor::Kind::Input)
+ {
+ // user didn't specify input
+ if (!parsed_tensor->hasShape())
+ {
+ angkor::TensorShape shape;
+ if (!_tf_runner.getTensorShapeFromGraphDef(parsed_tensor, shape))
+ throw oops::UserExn(
+ "Info you provided may be wrong or not enough. Please check the info file.");
+
+ parsed_tensor->mutable_shape().resize(shape.rank());
+ for (int r = 0; r < shape.rank(); r++)
+ {
+ parsed_tensor->mutable_shape().dim(r) = shape.dim(r);
+ }
+ }
+ _inputs.emplace_back(std::move(parsed_tensor));
+ }
+ else
+ _outputs.emplace_back(std::move(parsed_tensor));
+ }
+}
+
+void Backend::prepare(const std::function<void(nnkit::TensorContext &)> &f)
+{
+ for (const auto &input_tensor : _inputs)
+ _data_map.allocate(input_tensor.get());
+
+ TensorContext ctx(_inputs, _data_map);
+ f(ctx); // fill values
+
+ _tf_runner.prepareInputs(_inputs, _data_map);
+ _tf_runner.prepareOutputs(_outputs);
+}
+
+void Backend::run(void)
+{
+ _tf_runner.run();
+
+ // get result
+ auto actual_outputs = _tf_runner.output();
+
+ for (int n = 0; n < _outputs.size(); n++)
+ {
+ auto actual = actual_outputs[n];
+ const size_t byte_size = TF_TensorByteSize(actual);
+ const uint8_t *tf_data = reinterpret_cast<const uint8_t *>(TF_TensorData(actual));
+
+ const uint32_t shape_rank = TF_NumDims(actual);
+ _outputs[n]->mutable_shape().resize(shape_rank);
+ for (uint32_t r = 0; r < shape_rank; r++)
+ {
+ _outputs[n]->mutable_shape().dim(r) = TF_Dim(actual, r);
+ }
+ uint8_t *dest = _data_map.allocate(_outputs[n].get());
+
+ std::memcpy(dest, tf_data, byte_size);
+ }
+}
+
+void Backend::teardown(const std::function<void(nnkit::TensorContext &)> &f)
+{
+ TensorContext ctx(_outputs, _data_map);
+ f(ctx);
+}
+
+} // namespace tf
+} // namespace support
+} // namespace nnkit
diff --git a/compiler/nnkit-tf/support/src/Runner.cpp b/compiler/nnkit-tf/support/src/Runner.cpp
new file mode 100644
index 000000000..0d36ee2f4
--- /dev/null
+++ b/compiler/nnkit-tf/support/src/Runner.cpp
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Code here refers https://github.com/Neargye/hello_tf_c_api
+//
+// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018 - 2019 Daniil Goncharov <neargye@gmail.com>.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include "nnkit/support/tf/Runner.h"
+
+#include "nnkit/support/tftestinfo/ParsedTensor.h"
+#include "nncc/core/ADT/tensor/Shape.h"
+
+#include <tensorflow/c/c_api.h>
+
+#include <vector>
+#include <cassert>
+#include <cstring> // std::memcpy()
+#include <stdexcept>
+
+namespace nnkit
+{
+namespace support
+{
+namespace tf
+{
+
+using nncc::core::ADT::tensor::num_elements;
+using nnkit::support::tftestinfo::ParsedTensor;
+
+namespace
+{
+TF_Tensor *create_tensor(const TF_DataType data_type, const std::int64_t *dims,
+ const std::size_t num_dims, const void *data, const std::size_t len)
+{
+ if (dims == nullptr || data == nullptr)
+ return nullptr;
+
+ TF_Tensor *tensor = TF_AllocateTensor(data_type, dims, static_cast<int>(num_dims), len);
+ if (tensor == nullptr)
+ return nullptr;
+
+ void *tensor_data = TF_TensorData(tensor);
+ if (tensor_data == nullptr)
+ {
+ TF_DeleteTensor(tensor);
+ return nullptr;
+ }
+
+ std::memcpy(tensor_data, data, std::min(len, TF_TensorByteSize(tensor)));
+
+ return tensor;
+}
+
+void deallocate_buffer(void *data, size_t)
+{
+ assert(data);
+ std::free(data);
+}
+
+TF_Buffer *build_TFBuffer(const char *file)
+{
+ const auto f = std::fopen(file, "rb");
+
+ if (f == nullptr)
+ throw std::runtime_error(std::string("cannot open ") + file);
+
+ std::fseek(f, 0, SEEK_END); // to get file size
+ const auto fsize = ftell(f);
+
+ std::fseek(f, 0, SEEK_SET);
+
+ if (fsize < 1)
+ {
+ std::fclose(f);
+ throw std::runtime_error(std::string("file read error: ") + file);
+ }
+
+ const auto data = std::malloc(fsize);
+ std::fread(data, fsize, 1, f);
+ std::fclose(f);
+
+ TF_Buffer *buf = TF_NewBuffer();
+ buf->data = data;
+ buf->length = fsize;
+ buf->data_deallocator = deallocate_buffer;
+
+ return buf;
+}
+
+} // namespace
+
+Runner::Runner(const char *pb_path)
+{
+ // initialize member variables
+ _sess = nullptr;
+ _graph = TF_NewGraph();
+ _status = TF_NewStatus();
+
+ // import graph from file
+ TF_Buffer *buffer = build_TFBuffer(pb_path);
+ if (buffer == nullptr)
+ throw std::runtime_error("Can't read buffer from file");
+
+ TF_ImportGraphDefOptions *opts = TF_NewImportGraphDefOptions();
+
+ TF_GraphImportGraphDef(_graph, buffer, opts, _status);
+
+ TF_DeleteImportGraphDefOptions(opts);
+ TF_DeleteBuffer(buffer);
+
+ if (TF_GetCode(_status) != TF_OK) // TODO Consider wrapper to prevent memory leak
+ throw std::runtime_error("Can't import GraphDef");
+}
+
+Runner::~Runner()
+{
+ if (_graph)
+ TF_DeleteGraph(_graph);
+
+ if (_sess)
+ {
+ TF_CloseSession(_sess, _status);
+ TF_DeleteSession(_sess, _status);
+ }
+
+ for (auto tensor : _input_tensors)
+ TF_DeleteTensor(tensor);
+
+ for (auto tensor : _output_tensors)
+ TF_DeleteTensor(tensor);
+
+ TF_DeleteStatus(_status);
+}
+
+bool Runner::getTensorShapeFromGraphDef(const std::unique_ptr<ParsedTensor> &tensor,
+ angkor::TensorShape &shape)
+{
+ assert(!tensor->hasShape());
+ TF_Output tensor_op = {TF_GraphOperationByName(_graph, tensor->nodeName().c_str()),
+ tensor->tensorIndex()};
+
+ if (tensor_op.oper == nullptr)
+ return false;
+
+ int dim_size = TF_GraphGetTensorNumDims(_graph, tensor_op, _status);
+ if (dim_size == -1)
+ return false;
+ int64_t dims[dim_size];
+
+ TF_GraphGetTensorShape(_graph, tensor_op, dims, dim_size, _status);
+
+ shape.resize(dim_size);
+ for (int d = 0; d < dim_size; d++)
+ {
+ if (dims[d] == -1)
+ return false;
+ shape.dim(d) = dims[d];
+ }
+ return true;
+}
+
+bool Runner::getTensorDtypeFromGraphDef(const std::unique_ptr<ParsedTensor> &tensor,
+ Runner::DataType &dtype)
+{
+ TF_Output tensor_op = {TF_GraphOperationByName(_graph, tensor->nodeName().c_str()),
+ tensor->tensorIndex()};
+
+ if (tensor_op.oper == nullptr)
+ return false;
+
+ TF_DataType tf_dtype = TF_OperationOutputType(tensor_op);
+
+ switch (tf_dtype)
+ {
+ case TF_DataType::TF_FLOAT:
+ dtype = DataType::FLOAT;
+ break;
+ case TF_DataType::TF_UINT8:
+ dtype = DataType::U8;
+ break;
+ case TF_DataType::TF_UINT16:
+ dtype = DataType::U16;
+ break;
+ case TF_DataType::TF_UINT32:
+ dtype = DataType::U32;
+ break;
+ case TF_DataType::TF_UINT64:
+ dtype = DataType::U64;
+ break;
+ case TF_DataType::TF_INT8:
+ dtype = DataType::S8;
+ break;
+ case TF_DataType::TF_INT16:
+ dtype = DataType::S16;
+ break;
+ case TF_DataType::TF_INT32:
+ dtype = DataType::S32;
+ break;
+ case TF_DataType::TF_INT64:
+ dtype = DataType::S64;
+ break;
+ default:
+ dtype = DataType::Unknown;
+ return false;
+ }
+ return true;
+}
+
+void Runner::prepareInputs(const std::vector<std::unique_ptr<ParsedTensor>> &inputs,
+ TensorDataMap &data_map)
+{
+ assert(_graph);
+
+ for (const auto &tensor : inputs)
+ {
+ TF_Output input_op = {TF_GraphOperationByName(_graph, tensor->nodeName().c_str()),
+ tensor->tensorIndex()};
+
+ if (input_op.oper == nullptr)
+ throw std::runtime_error("Can't init input_op : " + tensor->name());
+
+ std::vector<int64_t> shape;
+ for (int r = 0; r < tensor->shape().rank(); r++)
+ shape.emplace_back(tensor->shape().dim(r));
+
+ int size = 0;
+ if (tensor->isFloatTensor())
+ size = sizeof(float);
+ else
+ throw std::runtime_error("Not supported tensor type");
+
+ TF_Tensor *input_tensor =
+ create_tensor(TF_FLOAT, shape.data(), shape.size(), data_map.data(tensor.get()),
+ num_elements(tensor->shape()) * size);
+
+ _input_ops.emplace_back(input_op);
+ _input_tensors.emplace_back(input_tensor);
+ }
+}
+
+void Runner::prepareOutputs(const std::vector<std::unique_ptr<ParsedTensor>> &outputs)
+{
+ assert(_graph);
+
+ for (const auto &tensor : outputs)
+ {
+ TF_Output output_op = {TF_GraphOperationByName(_graph, tensor->nodeName().c_str()),
+ tensor->tensorIndex()};
+
+ if (output_op.oper == nullptr)
+ throw std::runtime_error("Can't init output_op : " + tensor->name());
+
+ _output_ops.emplace_back(output_op);
+ }
+
+ _output_tensors.resize(_output_ops.size());
+}
+
+void Runner::run()
+{
+ assert(_graph);
+ assert(_output_ops.size() > 0);
+
+ TF_SessionOptions *options = TF_NewSessionOptions();
+ _sess = TF_NewSession(_graph, options, _status);
+ TF_DeleteSessionOptions(options);
+
+ if (TF_GetCode(_status) != TF_OK)
+ throw std::runtime_error(TF_Message(_status));
+
+ TF_SessionRun(_sess,
+ nullptr, // Run options.
+ _input_ops.data(), _input_tensors.data(), _input_ops.size(), _output_ops.data(),
+ _output_tensors.data(), _output_ops.size(), nullptr,
+ 0, // Target operations, number of targets.
+ nullptr, // Run metadata.
+ _status // Output status.
+ );
+
+ if (TF_GetCode(_status) != TF_OK)
+ throw std::runtime_error(TF_Message(_status));
+
+ TF_CloseSession(_sess, _status);
+ TF_DeleteSession(_sess, _status);
+ _sess = nullptr;
+}
+
+} // namespace tf
+} // namespace support
+} // namespace nnkit
diff --git a/compiler/nnkit-tf/support/src/TensorContext.cpp b/compiler/nnkit-tf/support/src/TensorContext.cpp
new file mode 100644
index 000000000..528ccba8c
--- /dev/null
+++ b/compiler/nnkit-tf/support/src/TensorContext.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnkit/support/tf/TensorContext.h"
+#include "nnkit/support/tftestinfo/ParsedTensor.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+
+namespace nnkit
+{
+namespace support
+{
+namespace tf
+{
+
+using nnkit::support::tftestinfo::ParsedTensor;
+
+void TensorContext::getMutableFloatTensor(uint32_t n,
+ const nnkit::TensorContext::TypedAccessor<float> &f)
+{ // for input
+ using nncc::core::ADT::tensor::LexicalLayout;
+ using nncc::core::ADT::tensor::make_overlay;
+
+ const ParsedTensor *parsed_tensor = _tensors.at(n).get();
+ float *data = reinterpret_cast<float *>(_data_map.data(parsed_tensor));
+ auto overlay = make_overlay<float, LexicalLayout>(shape(n), data);
+
+ f(*this, n, overlay);
+}
+
+void TensorContext::getConstFloatTensor(uint32_t n,
+ const nnkit::TensorContext::TypedReader<float> &f) const
+{ // for output
+ using nncc::core::ADT::tensor::LexicalLayout;
+ using nncc::core::ADT::tensor::make_overlay;
+
+ const ParsedTensor *parsed_tensor = _tensors.at(n).get();
+ float *data = reinterpret_cast<float *>(_data_map.data(parsed_tensor));
+ auto overlay = make_overlay<float, LexicalLayout>(shape(n), data);
+
+ f(*this, n, overlay);
+}
+
+} // namespace tf
+} // namespace support
+} // namespace nnkit
diff --git a/compiler/nnkit-tflite/CMakeLists.txt b/compiler/nnkit-tflite/CMakeLists.txt
new file mode 100644
index 000000000..d1dbbd772
--- /dev/null
+++ b/compiler/nnkit-tflite/CMakeLists.txt
@@ -0,0 +1,8 @@
+nnas_find_package(TensorFlowLite QUIET EXACT 1.12)
+
+if(NOT TensorFlowLite_FOUND)
+ return()
+endif(NOT TensorFlowLite_FOUND)
+
+add_subdirectory(support)
+add_subdirectory(backend)
diff --git a/compiler/nnkit-tflite/backend/Backend.cpp b/compiler/nnkit-tflite/backend/Backend.cpp
new file mode 100644
index 000000000..7d766063e
--- /dev/null
+++ b/compiler/nnkit-tflite/backend/Backend.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnkit/support/tflite/AbstractBackend.h"
+
+#include <tensorflow/contrib/lite/kernels/register.h>
+#include <tensorflow/contrib/lite/model.h>
+
+#include <stdexcept>
+
+namespace
+{
+
+class GenericBackend final : public nnkit::support::tflite::AbstractBackend
+{
+public:
+ GenericBackend(const std::string &path)
+ {
+ ::tflite::StderrReporter error_reporter;
+
+ _model = ::tflite::FlatBufferModel::BuildFromFile(path.c_str(), &error_reporter);
+
+ ::tflite::ops::builtin::BuiltinOpResolver resolver;
+ ::tflite::InterpreterBuilder builder(*_model, resolver);
+
+ if (kTfLiteOk != builder(&_interp))
+ {
+ throw std::runtime_error{"Failed to build a tflite interpreter"};
+ }
+
+ _interp->SetNumThreads(1);
+ }
+
+public:
+ ::tflite::Interpreter &interpreter(void) override { return *_interp; }
+
+private:
+ std::unique_ptr<::tflite::FlatBufferModel> _model;
+ std::unique_ptr<::tflite::Interpreter> _interp;
+};
+}
+
+#include <nnkit/CmdlineArguments.h>
+#include <stdex/Memory.h>
+
+extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
+{
+ return stdex::make_unique<GenericBackend>(args.at(0));
+}
diff --git a/compiler/nnkit-tflite/backend/CMakeLists.txt b/compiler/nnkit-tflite/backend/CMakeLists.txt
new file mode 100644
index 000000000..3f4a8ca53
--- /dev/null
+++ b/compiler/nnkit-tflite/backend/CMakeLists.txt
@@ -0,0 +1,7 @@
+if(NOT TARGET nnkit_support_tflite)
+ return()
+endif(NOT TARGET nnkit_support_tflite)
+
+add_library(nnkit_tflite_backend SHARED Backend.cpp)
+target_link_libraries(nnkit_tflite_backend nnkit_support_tflite)
+target_link_libraries(nnkit_tflite_backend stdex)
diff --git a/compiler/nnkit-tflite/requires.cmake b/compiler/nnkit-tflite/requires.cmake
new file mode 100644
index 000000000..d370fc17c
--- /dev/null
+++ b/compiler/nnkit-tflite/requires.cmake
@@ -0,0 +1,2 @@
+require("stdex")
+require("nnkit-intf")
diff --git a/compiler/nnkit-tflite/support/CMakeLists.txt b/compiler/nnkit-tflite/support/CMakeLists.txt
new file mode 100644
index 000000000..0a3e2fbe8
--- /dev/null
+++ b/compiler/nnkit-tflite/support/CMakeLists.txt
@@ -0,0 +1,10 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+# TODO Rename nnkit_support_tflite-1.12 as nnkit_tflite_support-1.12
+add_library(nnkit_support_tflite-1.12 STATIC ${SOURCES})
+set_target_properties(nnkit_support_tflite-1.12 PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(nnkit_support_tflite-1.12 PUBLIC include)
+target_link_libraries(nnkit_support_tflite-1.12 nnkit_intf_backend)
+target_link_libraries(nnkit_support_tflite-1.12 tensorflowlite-1.12)
+
+add_library(nnkit_support_tflite ALIAS nnkit_support_tflite-1.12)
diff --git a/compiler/nnkit-tflite/support/include/nnkit/support/tflite/AbstractBackend.h b/compiler/nnkit-tflite/support/include/nnkit/support/tflite/AbstractBackend.h
new file mode 100644
index 000000000..d2f6aa9f3
--- /dev/null
+++ b/compiler/nnkit-tflite/support/include/nnkit/support/tflite/AbstractBackend.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_TFLITE_ABSTRACT_BACKEND_H__
+#define __NNKIT_SUPPORT_TFLITE_ABSTRACT_BACKEND_H__
+
+#include <nnkit/Backend.h>
+#include <tensorflow/contrib/lite/interpreter.h>
+
+namespace nnkit
+{
+namespace support
+{
+namespace tflite
+{
+
+class AbstractBackend : public nnkit::Backend
+{
+public:
+ virtual ~AbstractBackend() = default;
+
+public:
+ virtual ::tflite::Interpreter &interpreter(void) = 0;
+
+public:
+ void prepare(const std::function<void(TensorContext &)> &f) override;
+ void run(void) override;
+ void teardown(const std::function<void(TensorContext &)> &f) override;
+};
+
+} // namespace tflite
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_TFLITE_ABSTRACT_BACKEND_H__
diff --git a/compiler/nnkit-tflite/support/include/nnkit/support/tflite/TensorContext.h b/compiler/nnkit-tflite/support/include/nnkit/support/tflite/TensorContext.h
new file mode 100644
index 000000000..a2f041398
--- /dev/null
+++ b/compiler/nnkit-tflite/support/include/nnkit/support/tflite/TensorContext.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_TFLITE_TENSOR_CONTEXT_H__
+#define __NNKIT_SUPPORT_TFLITE_TENSOR_CONTEXT_H__
+
+#include "nnkit/support/tflite/TensorSet.h"
+
+#include <nnkit/TensorContext.h>
+
+namespace nnkit
+{
+namespace support
+{
+namespace tflite
+{
+
+class TensorContext final : public nnkit::TensorContext
+{
+public:
+ TensorContext(TensorSet &tensors) : _tensors(tensors)
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t size(void) const override { return _tensors.size(); }
+
+public:
+ std::string name(uint32_t n) const override { return _tensors.at(n)->name; }
+
+public:
+ nncc::core::ADT::tensor::Shape shape(uint32_t n) const override;
+
+public:
+ // Float (fp32) tensor support
+ bool isFloatTensor(uint32_t n) const override { return _tensors.at(n)->type == kTfLiteFloat32; }
+
+ void getMutableFloatTensor(uint32_t n, const TensorContext::TypedAccessor<float> &f) override;
+ void getConstFloatTensor(uint32_t n, const TensorContext::TypedReader<float> &f) const override;
+
+private:
+ TensorSet &_tensors;
+};
+
+} // namespace tflite
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_TFLITE_TENSOR_CONTEXT_H__
diff --git a/compiler/nnkit-tflite/support/include/nnkit/support/tflite/TensorSet.h b/compiler/nnkit-tflite/support/include/nnkit/support/tflite/TensorSet.h
new file mode 100644
index 000000000..d28ab6e77
--- /dev/null
+++ b/compiler/nnkit-tflite/support/include/nnkit/support/tflite/TensorSet.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_TFLITE_TENSOR_SET_H__
+#define __NNKIT_SUPPORT_TFLITE_TENSOR_SET_H__
+
+#include <tensorflow/contrib/lite/context.h>
+
+#include <cstdint>
+
+namespace nnkit
+{
+namespace support
+{
+namespace tflite
+{
+
+struct TensorSet
+{
+ virtual ~TensorSet() = default;
+
+ virtual uint32_t size(void) const = 0;
+
+ virtual TfLiteTensor *at(uint32_t n) const = 0;
+};
+
+} // namespace tflite
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_TFLITE_TENSOR_SET_H__
diff --git a/compiler/nnkit-tflite/support/include/nnkit/support/tflite/TensorSets.h b/compiler/nnkit-tflite/support/include/nnkit/support/tflite/TensorSets.h
new file mode 100644
index 000000000..570803117
--- /dev/null
+++ b/compiler/nnkit-tflite/support/include/nnkit/support/tflite/TensorSets.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_TFLITE_TENSOR_SETS_H__
+#define __NNKIT_SUPPORT_TFLITE_TENSOR_SETS_H__
+
+#include "nnkit/support/tflite/TensorSet.h"
+
+#include <tensorflow/contrib/lite/interpreter.h>
+
+namespace nnkit
+{
+namespace support
+{
+namespace tflite
+{
+
+class InputTensorSet final : public TensorSet
+{
+public:
+ explicit InputTensorSet(::tflite::Interpreter &interp) : _interp(interp)
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t size(void) const override { return _interp.inputs().size(); }
+
+public:
+ TfLiteTensor *at(uint32_t n) const override { return _interp.tensor(_interp.inputs().at(n)); }
+
+private:
+ ::tflite::Interpreter &_interp;
+};
+
+class OutputTensorSet final : public TensorSet
+{
+public:
+ OutputTensorSet(::tflite::Interpreter &interp) : _interp(interp)
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t size(void) const override { return _interp.outputs().size(); }
+
+public:
+ TfLiteTensor *at(uint32_t n) const override { return _interp.tensor(_interp.outputs().at(n)); }
+
+private:
+ ::tflite::Interpreter &_interp;
+};
+
+} // namespace tflite
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_TFLITE_TENSOR_SETS_H__
diff --git a/compiler/nnkit-tflite/support/include/nnkit/support/tflite/TensorUtils.h b/compiler/nnkit-tflite/support/include/nnkit/support/tflite/TensorUtils.h
new file mode 100644
index 000000000..05fb7d58c
--- /dev/null
+++ b/compiler/nnkit-tflite/support/include/nnkit/support/tflite/TensorUtils.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_TENSOR_UTILS_H__
+#define __NNKIT_SUPPORT_TENSOR_UTILS_H__
+
+#include <tensorflow/contrib/lite/context.h>
+#include <nncc/core/ADT/tensor/Shape.h>
+
+namespace nnkit
+{
+namespace support
+{
+namespace tflite
+{
+
+nncc::core::ADT::tensor::Shape tensor_shape(const TfLiteTensor *t);
+
+} // namespace tflite
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_TFLITE_TENSOR_UTILS_H__
diff --git a/compiler/nnkit-tflite/support/src/Backend.cpp b/compiler/nnkit-tflite/support/src/Backend.cpp
new file mode 100644
index 000000000..018c4906e
--- /dev/null
+++ b/compiler/nnkit-tflite/support/src/Backend.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnkit/support/tflite/AbstractBackend.h"
+#include "nnkit/support/tflite/TensorSets.h"
+#include "nnkit/support/tflite/TensorContext.h"
+
+#include <cassert>
+
+static inline void ensure(TfLiteStatus status) { assert(status == kTfLiteOk); }
+
+namespace nnkit
+{
+namespace support
+{
+namespace tflite
+{
+
+void AbstractBackend::prepare(const std::function<void(nnkit::TensorContext &)> &f)
+{
+ ensure(interpreter().AllocateTensors());
+
+ InputTensorSet inputs(interpreter());
+ TensorContext ctx(inputs);
+ f(ctx);
+}
+
+void AbstractBackend::run(void) { ensure(interpreter().Invoke()); }
+
+void AbstractBackend::teardown(const std::function<void(nnkit::TensorContext &)> &f)
+{
+ OutputTensorSet outputs(interpreter());
+ TensorContext ctx(outputs);
+ f(ctx);
+}
+
+} // namespace tflite
+} // namespace support
+} // namespace nnkit
diff --git a/compiler/nnkit-tflite/support/src/TensorContext.cpp b/compiler/nnkit-tflite/support/src/TensorContext.cpp
new file mode 100644
index 000000000..3ed5a18cb
--- /dev/null
+++ b/compiler/nnkit-tflite/support/src/TensorContext.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnkit/support/tflite/TensorContext.h"
+#include "nnkit/support/tflite/TensorUtils.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+
+namespace nnkit
+{
+namespace support
+{
+namespace tflite
+{
+
+nncc::core::ADT::tensor::Shape TensorContext::shape(uint32_t n) const
+{
+ return tensor_shape(_tensors.at(n));
+}
+
+void TensorContext::getMutableFloatTensor(uint32_t n,
+ const nnkit::TensorContext::TypedAccessor<float> &f)
+{
+ using nncc::core::ADT::tensor::LexicalLayout;
+ using nncc::core::ADT::tensor::make_overlay;
+
+ auto t = _tensors.at(n);
+
+ float *data = reinterpret_cast<float *>(t->data.f);
+ auto overlay = make_overlay<float, LexicalLayout>(shape(n), data);
+
+ f(*this, n, overlay);
+}
+
+void TensorContext::getConstFloatTensor(uint32_t n,
+ const nnkit::TensorContext::TypedReader<float> &f) const
+{
+ using nncc::core::ADT::tensor::LexicalLayout;
+ using nncc::core::ADT::tensor::make_overlay;
+
+ auto t = _tensors.at(n);
+
+ float *data = reinterpret_cast<float *>(t->data.f);
+ auto overlay = make_overlay<float, LexicalLayout>(shape(n), data);
+
+ f(*this, n, overlay);
+}
+
+} // namespace tflite
+} // namespace support
+} // namespace nnkit
diff --git a/compiler/nnkit-tflite/support/src/TensorUtils.cpp b/compiler/nnkit-tflite/support/src/TensorUtils.cpp
new file mode 100644
index 000000000..96d305b3b
--- /dev/null
+++ b/compiler/nnkit-tflite/support/src/TensorUtils.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnkit/support/tflite/TensorUtils.h"
+
+namespace nnkit
+{
+namespace support
+{
+namespace tflite
+{
+
+nncc::core::ADT::tensor::Shape tensor_shape(const TfLiteTensor *t)
+{
+ nncc::core::ADT::tensor::Shape shape;
+
+ const int rank = t->dims->size;
+
+ shape.resize(rank);
+ for (int axis = 0; axis < rank; ++axis)
+ {
+ shape.dim(axis) = t->dims->data[axis];
+ }
+
+ return shape;
+}
+
+} // namespace tflite
+} // namespace support
+} // namespace nnkit
diff --git a/compiler/nnkit/CMakeLists.txt b/compiler/nnkit/CMakeLists.txt
new file mode 100644
index 000000000..8a04e704f
--- /dev/null
+++ b/compiler/nnkit/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(actions)
+add_subdirectory(tools)
diff --git a/compiler/nnkit/README.md b/compiler/nnkit/README.md
new file mode 100644
index 000000000..f368b8664
--- /dev/null
+++ b/compiler/nnkit/README.md
@@ -0,0 +1,179 @@
+# nnkit
+
+`nnkit` is collection of neural networks tools for our _nncc_ project.
+This tool is mostly used for testing.
+
+# Purpose
+For testing, we need to have
+- a tool to run existing framework such as Tensorflow for expected tensor result --- (1)
+- a tool to run our implementation for actual tensor result --- (2)
+
+`nnkit` provides a flexible framework to get expected and actual result.
+
+# Design
+
+## Requirements to address:
+
+- Input
+ - Same randomized input is used for both of (1) and (2)
+ - Expect tensor layout (e.g., NHWC) could be different for (1) and (2)
+- Input and output format
+ - Results of (1) and (2) have same file format and data format
+
+For (1), `nnkit` designed to enable the following:
+- Input of `nnkit` is randomized and saved into a file in a specific format
+- Existing framework such as Tensorflow can run with input tensors that is properly translated
+- Result is written into a file in a specific format
+
+For (2), `nnkit` designed to enable the following:
+- Data of `nnkit` in a file by (1) is used as input
+- Our implementation can run with input tensors that is properly translated
+- Result is written into a file in a specific format
+
+## `nnkit-run`
+
+`nnkit-run` is a command line interface to interact with existing inference engines
+or compiled artifacts.
+
+## How `nnkit-run` works
+
+`nnkit-run` first dynamically loads `backend` and multiple pre/post `action`
+specified by command-line. After loading backend and actions, `nnkit-run` requests
+`backend` to prepare itself. When backend is prepared, `backend` exposes its
+internal state to `nnkit-run` (as `nnkit::TensorContext`).
+`nnkit-run` takes this state, and passes it to registered pre `action`(s).
+Each action may read tensor(s) (e.g. dump the content into a file),
+or manipulate their value (e.g. fill random values).
+`nnkit-run` then invokes `backend` through `run()` method.
+After successful running the `backend`, post `action`(s) are called same like
+pre `action`(s) as a teardown step.
+
+## Backends
+
+In 2019 there will be the following backends as of writing this document
+
+- Backends for the existing framework:
+ - Caffe as `libnnkit_caffe_backend.so`
+ - Tensorflow Lite as `libnnkit_tflite_backend.so`
+ - Tensorflow as `libnnkit_tf_backend.so`
+ - Onnx as `libnnkit_onnx_backend.so`
+
+- Backends for our implementation:
+ - Moco Tensorflow (TBD)
+ - Moco Onnx (TBD)
+
+# How to use
+
+## How to run inference with nnkit-run
+
+To run `nnkit-run`, we need to provide a backend module and argument(s) if required
+and optional `pre-` or `post-` action module(s)
+
+## How to pass arguments
+
+Syntax is `--argument` with `value` form. Existing arguments are as follows.
+- `--backend` [Backend module path]. Only one is needed.
+- `--backend-arg` [Backend argument]. Argument(s) for the backend.
+- `--pre` [Pre-Action module path]. Multiple Pre-Action can be given.
+- `--pre-arg` [Pre-Action argument]. Set argument(s) for the pre-action just before.
+- `--post` [Post-Action module path]. Multiple Post-Action can be given.
+- `--post-arg` [Post-Action argument]. Set argument(s) for the post-action just before.
+
+For example,
+```
+nnkit-run \
+--backend ./path/to/backend --backend-arg arg1 --backend-arg arg2 \
+--pre ./path/to/preA --pre-arg arg1preA --pre-arg arg2preA \
+--pre ./path/to/preB --pre-arg arg1preB --pre-arg arg2preB \
+--post ./path/to/postA --post-arg arg1postA
+```
+
+This will run
+- backend `./path/to/backend` with arguments `arg1 arg2` with
+ - pre-action `./path/to/preA` with arguments `arg1preA arg2preA`,
+ - pre-action `./path/to/preB` with arguments `arg1preB arg2preB` and
+ - post-action `./path/to/postA` with an argument `arg1postA`
+
+## Example : Running with Tensorflow backend
+
+To run Tensorflow backend, you need two parameters: model file in protobuf format (`pb` file) and input/output tensor information such as tensor name, data type, shape. Please refer to `test.info` files under `moco/test/tf`.
+
+```
+cd build
+
+compiler/nnkit/tools/run/nnkit-run \
+--backend ./compiler/nnkit-tf/backend/libnnkit_tf_backend.so \
+--backend-arg inceptionv3_non_slim_2015.pb \
+--backend-arg inceptionv3_non_slim_2015.info
+```
+
+## Example: Running with Onnx backend
+TBD
+
+## Example : Running with tflite backend
+
+```
+cd build
+
+compiler/nnkit/tools/run/nnkit-run \
+--backend ./compiler/nnkit-tflite/backend/libnnkit_tflite_backend.so \
+--backend-arg inceptionv3_non_slim_2015.tflite
+```
+
+## Example: Running with Caffe backend
+
+Running with caffe backend is similar to running with tflite, except that you need to provide `prototxt` file, `caffemodel` is not necessary, unless you want to use specific weights (weights are random if `caffemodel` is not provided and `prototxt` is not filled with specific weights):
+
+```
+cd build
+
+compiler/nnkit/tools/run/nnkit-run \
+--backend ./compiler/nnkit-caffe/backend/libnnkit_caffe_backend.so \
+--backend-arg inception_v3.prototxt
+```
+
+## Running with pre & post actions
+
+The above command for the tflite backend shows nothing except `nnapi error: unable to open library libneuralnetworks.so` warning even though running correctly. The following command displays inferenced values.
+```
+cd build
+
+compiler/nnkit/tools/run/nnkit-run \
+--backend ./compiler/nnkit-tflite/backend/libnnkit_tflite_backend.so \
+--backend-arg inceptionv3_non_slim_2015.tflite \
+--post ./compiler/nnkit/actions/builtin/libnnkit_show_action.so
+```
+
+The following command initializes input tensors with random values generated by `RandomizeAction` pre-action.
+```
+compiler/nnkit/tools/run/nnkit-run \
+--backend ./compiler/nnkit-tflite/backend/libnnkit_tflite_backend.so \
+--backend-arg inceptionv3_non_slim_2015.tflite \
+--pre ./compiler/nnkit/actions/builtin/libnnkit_randomize_action.so \
+--post ./compiler/nnkit/actions/builtin/libnnkit_show_action.so
+```
+
+## Example: Dump HDF5
+
+You can drop a HDF5 file of inputs and outputs with `HDF5_export_action` action module.
+
+```
+cd build
+
+compiler/nnkit/tools/run/nnkit-run \
+--backend ./compiler/nnkit-tflite/backend/libnnkit_tflite_backend.so \
+--backend-arg inceptionv3_non_slim_2015.tflite \
+--pre ./compiler/nnkit/actions/builtin/libnnkit_randomize_action.so \ # randomize first
+--pre ./compiler/nnkit/actions/HDF5/libnnkit_HDF5_export_action.so \ # then drop input in HDF5 format
+--pre-arg ./pre.hdf5 \
+--post ./compiler/nnkit/actions/HDF5/libnnkit_HDF5_export_action.so \ # drop output in HDF5 format
+--post-arg ./post.hdf5
+```
+
+This will drop `pre.hdf5` and `post.hdf5` files containing input and output
+tensor of inceptionv3_non_slim_2015.tflite model.
+
+# To do
+- `nnkit` backend for `moco` Tensorflow frontend
+- `nnkit` backend for `moco` Onnx frontend
+- `nnkit` backend for Onnx frontend
diff --git a/compiler/nnkit/actions/CMakeLists.txt b/compiler/nnkit/actions/CMakeLists.txt
new file mode 100644
index 000000000..5ea6cdadd
--- /dev/null
+++ b/compiler/nnkit/actions/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectories()
diff --git a/compiler/nnkit/actions/HDF5/CMakeLists.txt b/compiler/nnkit/actions/HDF5/CMakeLists.txt
new file mode 100644
index 000000000..b799f6df1
--- /dev/null
+++ b/compiler/nnkit/actions/HDF5/CMakeLists.txt
@@ -0,0 +1,21 @@
+find_package(HDF5 COMPONENTS CXX QUIET)
+
+if(NOT HDF5_FOUND)
+ return()
+endif(NOT HDF5_FOUND)
+
+add_library(nnkit_HDF5_common STATIC Common.cpp)
+set_target_properties(nnkit_HDF5_common PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_link_libraries(nnkit_HDF5_common ${HDF5_CXX_LIBRARIES})
+
+add_library(nnkit_HDF5_export_action SHARED Export.cpp)
+target_include_directories(nnkit_HDF5_export_action PRIVATE ${HDF5_INCLUDE_DIRS})
+target_link_libraries(nnkit_HDF5_export_action nnkit_intf_action)
+target_link_libraries(nnkit_HDF5_export_action nnkit_HDF5_common)
+target_link_libraries(nnkit_HDF5_export_action stdex)
+
+add_library(nnkit_HDF5_import_action SHARED Import.cpp)
+target_include_directories(nnkit_HDF5_import_action PRIVATE ${HDF5_INCLUDE_DIRS})
+target_link_libraries(nnkit_HDF5_import_action nnkit_intf_action)
+target_link_libraries(nnkit_HDF5_import_action nnkit_HDF5_common)
+target_link_libraries(nnkit_HDF5_import_action stdex)
diff --git a/compiler/nnkit/actions/HDF5/Common.cpp b/compiler/nnkit/actions/HDF5/Common.cpp
new file mode 100644
index 000000000..b60070faf
--- /dev/null
+++ b/compiler/nnkit/actions/HDF5/Common.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Common.h"
+
+std::string mangle(const std::string &name)
+{
+ std::string res = name;
+
+ for (uint32_t n = 0; n < res.size(); ++n)
+ {
+ if (res.at(n) == '/')
+ {
+ res.at(n) = '_';
+ }
+ }
+
+ return res;
+}
+
+std::string value_grpname(void) { return "value"; }
+std::string value_filename(uint32_t n) { return std::to_string(n); }
+
+std::string name_grpname(void) { return "name"; }
+std::string name_filename(uint32_t n) { return std::to_string(n); }
diff --git a/compiler/nnkit/actions/HDF5/Common.h b/compiler/nnkit/actions/HDF5/Common.h
new file mode 100644
index 000000000..be7fd3e43
--- /dev/null
+++ b/compiler/nnkit/actions/HDF5/Common.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COMMON_H__
+#define __COMMON_H__
+
+#include <string>
+
+/**
+ * @brief Construct HDF5-compatible dataset name from a given string
+ *
+ * When someone attempts to access 'A/B/C' dataset, HDF5 tries to open
+ * dataset C in group B in top-level group A, which menas that dataset
+ * names SHOULD NOT contain '/' in it.
+ *
+ * This mangle function replaces all the occurence of '/' in a given
+ * string with '_' to construct HDF5-compatible dataset name.
+ */
+std::string mangle(const std::string &);
+
+#if 0
+Let us assume that a tensor context includes N + 1 tensors.
+
+Then, HDF5 export will generate a HDF5 file whose structure is given as follows:
+[value group]/
+ [file 0] <- A dataset that contains the value of 1st (=0) tensor
+ [file 1]
+ ...
+ [file N]
+[name group]/
+ [file 0] <- An attribute that contains the name of 1st (=0) tensor
+ [file 1]
+ ...
+ [file N]
+#endif
+
+/// @brief Return the name of "value group"
+std::string value_grpname(void);
+/// @brief Return the name of n-th tensor dataset
+std::string value_filename(uint32_t n);
+
+/// @brief Return the name of "name group"
+std::string name_grpname(void);
+/// @brief Return the name of n-th tensor attribute
+std::string name_filename(uint32_t n);
+
+#endif // __COMMON_H__
diff --git a/compiler/nnkit/actions/HDF5/Export.cpp b/compiler/nnkit/actions/HDF5/Export.cpp
new file mode 100644
index 000000000..389f5c050
--- /dev/null
+++ b/compiler/nnkit/actions/HDF5/Export.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Common.h"
+
+#include <nnkit/Action.h>
+
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <H5Cpp.h>
+
+using nnkit::TensorContext;
+
+class HD5ExportAction final : public nnkit::Action
+{
+public:
+ HD5ExportAction(const std::string &path) : _file{path, H5F_ACC_TRUNC}
+ {
+ _value_grp = _file.createGroup(value_grpname());
+ _name_grp = _file.createGroup(name_grpname());
+ }
+
+public:
+ void run(TensorContext &ctx) override
+ {
+ for (uint32_t n = 0; n < ctx.size(); ++n)
+ {
+ using nncc::core::ADT::tensor::Reader;
+
+ // TODO Support other data types
+ auto fn = [this](const TensorContext &ctx, uint32_t n, const Reader<float> &t) {
+ const auto name = ctx.name(n);
+ const auto shape = ctx.shape(n);
+
+ const auto rank = shape.rank();
+
+ hsize_t dims[rank];
+
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ dims[axis] = shape.dim(axis);
+ }
+
+ H5::DataSpace dataspace(rank, dims);
+
+ auto dataset =
+ _value_grp.createDataSet(value_filename(n), H5::PredType::IEEE_F32BE, dataspace);
+
+ float *data = new float[nncc::core::ADT::tensor::num_elements(shape)];
+
+ using nncc::core::ADT::tensor::Index;
+ using nncc::core::ADT::tensor::IndexEnumerator;
+ using nncc::core::ADT::tensor::LexicalLayout;
+
+ LexicalLayout layout{};
+
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ auto i = e.current();
+ data[layout.offset(shape, i)] = t.at(i);
+ }
+
+ dataset.write(data, H5::PredType::NATIVE_FLOAT);
+
+ delete[] data;
+
+ // Record name
+ {
+ H5::DataSpace name_dataspace(H5S_SCALAR);
+ H5::StrType name_datatype(H5::PredType::C_S1, name.size());
+
+ auto name_attr =
+ _name_grp.createAttribute(value_filename(n), name_datatype, name_dataspace);
+
+ name_attr.write(name_datatype, name);
+ }
+ };
+
+ ctx.getConstFloatTensor(n, fn);
+ }
+ }
+
+private:
+ H5::H5File _file;
+ H5::Group _value_grp;
+ H5::Group _name_grp;
+};
+
+#include <nnkit/CmdlineArguments.h>
+#include <stdex/Memory.h>
+
+extern "C" std::unique_ptr<nnkit::Action> make_action(const nnkit::CmdlineArguments &args)
+{
+ return stdex::make_unique<HD5ExportAction>(args.at(0));
+}
diff --git a/compiler/nnkit/actions/HDF5/Import.cpp b/compiler/nnkit/actions/HDF5/Import.cpp
new file mode 100644
index 000000000..bba5ab701
--- /dev/null
+++ b/compiler/nnkit/actions/HDF5/Import.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Common.h"
+
+#include <nnkit/Action.h>
+
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <H5Cpp.h>
+
+#include <cassert>
+
+using nnkit::TensorContext;
+
+class HD5ImportAction final : public nnkit::Action
+{
+public:
+ HD5ImportAction(const std::string &path) : _file{path, H5F_ACC_RDONLY}
+ {
+ _value_grp = _file.openGroup(value_grpname());
+ }
+
+public:
+ void run(TensorContext &ctx) override
+ {
+ for (uint32_t n = 0; n < ctx.size(); ++n)
+ {
+ using nncc::core::ADT::tensor::Accessor;
+
+ auto fn = [this](const TensorContext &ctx, uint32_t n, Accessor<float> &t) {
+ const auto name = ctx.name(n);
+
+ auto dataset = _value_grp.openDataSet(value_filename(n));
+
+ // TODO Support non-float tensors
+ assert(dataset.getDataType() == H5::PredType::IEEE_F32BE);
+
+ // TODO Check whether shape is consistent
+ const auto shape = ctx.shape(n);
+
+ std::vector<float> buffer;
+
+ using nncc::core::ADT::tensor::num_elements;
+ buffer.resize(num_elements(shape));
+
+ dataset.read(buffer.data(), H5::PredType::NATIVE_FLOAT);
+
+ using nncc::core::ADT::tensor::Index;
+ using nncc::core::ADT::tensor::IndexEnumerator;
+ using nncc::core::ADT::tensor::LexicalLayout;
+
+ LexicalLayout layout{};
+
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ auto i = e.current();
+ t.at(i) = buffer[layout.offset(shape, i)];
+ }
+
+ // TODO Check name
+ };
+
+ try
+ {
+ ctx.getMutableFloatTensor(n, fn);
+ }
+ catch (const H5::FileIException &)
+ {
+ // Skip if data is not present in HDF5 file
+ }
+ }
+ }
+
+private:
+ H5::H5File _file;
+ H5::Group _value_grp;
+};
+
+#include <nnkit/CmdlineArguments.h>
+#include <stdex/Memory.h>
+
+extern "C" std::unique_ptr<nnkit::Action> make_action(const nnkit::CmdlineArguments &args)
+{
+ return stdex::make_unique<HD5ImportAction>(args.at(0));
+}
diff --git a/compiler/nnkit/actions/builtin/CMakeLists.txt b/compiler/nnkit/actions/builtin/CMakeLists.txt
new file mode 100644
index 000000000..910e12ea9
--- /dev/null
+++ b/compiler/nnkit/actions/builtin/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_library(nnkit_show_action SHARED Show.cpp)
+target_link_libraries(nnkit_show_action nnkit_intf_action)
+target_link_libraries(nnkit_show_action stdex)
+
+add_library(nnkit_randomize_action SHARED Randomize.cpp)
+target_link_libraries(nnkit_randomize_action nnkit_intf_action)
+target_link_libraries(nnkit_randomize_action stdex)
diff --git a/compiler/nnkit/actions/builtin/Randomize.cpp b/compiler/nnkit/actions/builtin/Randomize.cpp
new file mode 100644
index 000000000..9b023ef3b
--- /dev/null
+++ b/compiler/nnkit/actions/builtin/Randomize.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <nnkit/Action.h>
+
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <chrono>
+#include <random>
+
+using nnkit::TensorContext;
+
+struct RandomizeAction final : public nnkit::Action
+{
+ void run(TensorContext &ctx) override
+ {
+ int seed = std::chrono::system_clock::now().time_since_epoch().count();
+
+ std::minstd_rand rand(seed);
+ std::normal_distribution<float> dist(0.0f, 2.0f);
+
+ for (uint32_t n = 0; n < ctx.size(); ++n)
+ {
+ using nncc::core::ADT::tensor::Accessor;
+
+ auto fn = [&dist, &rand](const TensorContext &ctx, uint32_t n, Accessor<float> &t) {
+ using nncc::core::ADT::tensor::Index;
+ using nncc::core::ADT::tensor::IndexEnumerator;
+
+ for (IndexEnumerator e{ctx.shape(n)}; e.valid(); e.advance())
+ {
+ t.at(e.current()) = dist(rand);
+ }
+ };
+
+ ctx.getMutableFloatTensor(n, fn);
+ }
+ }
+};
+
+#include <nnkit/CmdlineArguments.h>
+#include <stdex/Memory.h>
+
+extern "C" std::unique_ptr<nnkit::Action> make_action(const nnkit::CmdlineArguments &args)
+{
+ return stdex::make_unique<RandomizeAction>();
+}
diff --git a/compiler/nnkit/actions/builtin/Show.cpp b/compiler/nnkit/actions/builtin/Show.cpp
new file mode 100644
index 000000000..2630177ef
--- /dev/null
+++ b/compiler/nnkit/actions/builtin/Show.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <nnkit/Action.h>
+
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <iostream>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+std::ostream &operator<<(std::ostream &os, const Index &index)
+{
+ if (index.rank() > 0)
+ {
+ os << index.at(0);
+ for (uint32_t axis = 1; axis < index.rank(); ++axis)
+ {
+ os << "," << index.at(axis);
+ }
+ }
+ return os;
+}
+
+struct ShowAction final : public nnkit::Action
+{
+ void run(nnkit::TensorContext &ctx) override;
+};
+
+void ShowAction::run(nnkit::TensorContext &ctx)
+{
+ std::cout << "count: " << ctx.size() << std::endl;
+ for (uint32_t n = 0; n < ctx.size(); ++n)
+ {
+ std::cout << " tensor(" << n << ") : " << ctx.name(n) << std::endl;
+
+ using nncc::core::ADT::tensor::Reader;
+ using nnkit::TensorContext;
+
+ ctx.getConstFloatTensor(n, [](const TensorContext &ctx, uint32_t n, const Reader<float> &t) {
+ for (IndexEnumerator e{ctx.shape(n)}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+
+ std::cout << " " << index << ": " << t.at(index) << std::endl;
+ }
+ });
+ }
+}
+
+#include <nnkit/CmdlineArguments.h>
+#include <stdex/Memory.h>
+
+extern "C" std::unique_ptr<nnkit::Action> make_action(const nnkit::CmdlineArguments &args)
+{
+ return stdex::make_unique<ShowAction>();
+}
diff --git a/compiler/nnkit/requires.cmake b/compiler/nnkit/requires.cmake
new file mode 100644
index 000000000..e78f0d400
--- /dev/null
+++ b/compiler/nnkit/requires.cmake
@@ -0,0 +1,8 @@
+require("angkor")
+require("nnkit-intf")
+require("nnkit-misc")
+require("nnkit-caffe")
+require("nnkit-tflite")
+require("nnkit-tf")
+require("nnkit-mocotf")
+require("nnkit-onnxrt")
diff --git a/compiler/nnkit/tools/CMakeLists.txt b/compiler/nnkit/tools/CMakeLists.txt
new file mode 100644
index 000000000..5ea6cdadd
--- /dev/null
+++ b/compiler/nnkit/tools/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectories()
diff --git a/compiler/nnkit/tools/benchmark/CMakeLists.txt b/compiler/nnkit/tools/benchmark/CMakeLists.txt
new file mode 100644
index 000000000..c2cde00f4
--- /dev/null
+++ b/compiler/nnkit/tools/benchmark/CMakeLists.txt
@@ -0,0 +1,14 @@
+if(NOT TARGET nnkit_support_cmdline)
+ return()
+endif(NOT TARGET nnkit_support_cmdline)
+
+if(NOT TARGET nnkit_support_backend)
+ return()
+endif(NOT TARGET nnkit_support_backend)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(nnkit-benchmark ${SOURCES})
+target_link_libraries(nnkit-benchmark nnkit_support_cmdline)
+target_link_libraries(nnkit-benchmark nnkit_support_backend)
+target_link_libraries(nnkit-benchmark stdex)
diff --git a/compiler/nnkit/tools/benchmark/src/Benchmark.cpp b/compiler/nnkit/tools/benchmark/src/Benchmark.cpp
new file mode 100644
index 000000000..6c3ebc90b
--- /dev/null
+++ b/compiler/nnkit/tools/benchmark/src/Benchmark.cpp
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <nnkit/CmdlineArguments.h>
+#include <nnkit/VectorArguments.h>
+#include <nnkit/BackendPlugin.h>
+
+#include <stdex/Memory.h>
+
+#include <map>
+#include <string>
+
+#include <chrono>
+
+#include <iostream>
+#include <iomanip>
+
+using stdex::make_unique;
+
+using std::chrono::milliseconds;
+using std::chrono::microseconds;
+
+using milliseconds_f = std::chrono::duration<float, std::milli>;
+
+using std::chrono::duration_cast;
+
+namespace
+{
+
+template <class Rep, class Period> class Session
+{
+public:
+ Session(std::chrono::duration<Rep, Period> *out) : _out{out}
+ {
+ // DO NOTHING
+ }
+
+public:
+ template <typename Callable> void measure(Callable cb)
+ {
+ using namespace std::chrono;
+
+ auto beg = steady_clock::now();
+ cb();
+ auto end = steady_clock::now();
+
+ (*_out) += duration_cast<duration<Rep, Period>>(end - beg);
+ }
+
+private:
+ std::chrono::duration<Rep, Period> *_out;
+};
+
+template <class Rep, class Period, typename Callable>
+Session<Rep, Period> &operator<<(Session<Rep, Period> &&sess, Callable &&cb)
+{
+ sess.measure(std::forward<Callable>(cb));
+ return sess;
+}
+
+template <class Rep, class Period>
+Session<Rep, Period> measure(std::chrono::duration<Rep, Period> &out)
+{
+ return Session<Rep, Period>{&out};
+}
+
+class Message
+{
+public:
+ Message(const std::string &head) { std::cout << head; }
+
+public:
+ ~Message() { std::cout << std::endl; }
+
+public:
+ std::ostream &os(void) const { return std::cout; }
+};
+
+Message info(void) { return Message{"INFO: "}; }
+
+using OptionHook = std::function<void(const std::string &arg)>;
+
+} // namespace
+
+template <typename T> std::ostream &operator<<(const ::Message &m, T &&value)
+{
+ return m.os() << std::forward<T>(value);
+}
+
+int main(int argc, char **argv)
+{
+ std::unique_ptr<nnkit::BackendPlugin> backend_plugin;
+ nnkit::VectorArguments backend_arguments;
+
+ uint32_t warmup_count = 3;
+ uint32_t benchmark_count = 1;
+
+ // Simple argument parser (based on map)
+ std::map<std::string, OptionHook> argparse;
+
+ argparse["--backend"] = [&backend_plugin](const std::string &tag) {
+ backend_plugin = std::move(nnkit::make_backend_plugin(tag));
+ };
+
+ argparse["--backend-arg"] = [&backend_arguments](const std::string &arg) {
+ backend_arguments.append(arg);
+ };
+
+ argparse["--benchmark-count"] = [&benchmark_count](const std::string &arg) {
+ benchmark_count = std::stoi(arg);
+ };
+
+ for (int n = 1; n < argc; n += 2)
+ {
+ const std::string tag{argv[n]};
+ const std::string arg{argv[n + 1]};
+
+ auto it = argparse.find(tag);
+
+ if (it == argparse.end())
+ {
+ std::cerr << "Option '" << tag << "' is not supported" << std::endl;
+ return 255;
+ }
+
+ it->second(arg);
+ }
+
+ // CHECK: Command-line arguments
+ if (backend_plugin == nullptr)
+ {
+ std::cerr << "ERROR: --backend is mssing" << std::endl;
+ return 255;
+ }
+
+ // Initialize
+ auto backend = backend_plugin->create(backend_arguments);
+
+ // Run warm-up iterations
+ info() << "Start warming-up iterations(" << warmup_count << ")";
+
+ for (uint32_t n = 0; n < warmup_count; ++n)
+ {
+ backend->prepare([](nnkit::TensorContext &ctx) {
+ // DO NOTHING
+ });
+
+ backend->run();
+
+ backend->teardown([](nnkit::TensorContext &ctx) {
+ // DO NOTHING
+ });
+ }
+
+ // Run benchmark iterations
+ info() << "Start benchmark iterations(" << benchmark_count << ")";
+
+ microseconds elapsed_min{0};
+ microseconds elapsed_max{0};
+ microseconds elapsed_total{0};
+
+ for (uint32_t n = 0; n < benchmark_count; ++n)
+ {
+ backend->prepare([](nnkit::TensorContext &ctx) {
+ // DO NOTHING
+ });
+
+ microseconds elapsed{0};
+
+ ::measure(elapsed) << [&](void) { backend->run(); };
+
+ info() << "#" << n + 1 << " takes " << duration_cast<milliseconds_f>(elapsed).count() << "ms";
+
+ elapsed_min = (n == 0) ? elapsed : std::min(elapsed_min, elapsed);
+ elapsed_max = (n == 0) ? elapsed : std::max(elapsed_max, elapsed);
+ elapsed_total += elapsed;
+
+ backend->teardown([](nnkit::TensorContext &ctx) {
+ // DO NOTHING
+ });
+ }
+
+ // Show summary
+ info() << "Show statistics";
+
+ auto min_ms = duration_cast<milliseconds_f>(elapsed_min).count();
+ auto max_ms = duration_cast<milliseconds_f>(elapsed_max).count();
+ auto avg_ms = duration_cast<milliseconds_f>(elapsed_total).count() / benchmark_count;
+
+ info() << "MIN: " << min_ms << "ms"
+ << ", MAX: " << max_ms << "ms, AVG: " << avg_ms << "ms";
+
+ return 0;
+}
diff --git a/compiler/nnkit/tools/run/CMakeLists.txt b/compiler/nnkit/tools/run/CMakeLists.txt
new file mode 100644
index 000000000..5f42ed941
--- /dev/null
+++ b/compiler/nnkit/tools/run/CMakeLists.txt
@@ -0,0 +1,22 @@
+if(NOT TARGET nnkit_intf_action)
+ return()
+endif(NOT TARGET nnkit_intf_action)
+
+if(NOT TARGET nnkit_intf_backend)
+ return()
+endif(NOT TARGET nnkit_intf_backend)
+
+if(NOT TARGET nnkit_support_cmdline)
+ return()
+endif(NOT TARGET nnkit_support_cmdline)
+
+if(NOT TARGET nnkit_support_backend)
+ return()
+endif(NOT TARGET nnkit_support_backend)
+
+add_executable(nnkit-run nnkit-run.cpp)
+target_link_libraries(nnkit-run nnkit_intf_action)
+target_link_libraries(nnkit-run nnkit_intf_backend)
+target_link_libraries(nnkit-run nnkit_support_cmdline)
+target_link_libraries(nnkit-run nnkit_support_backend)
+target_link_libraries(nnkit-run stdex)
diff --git a/compiler/nnkit/tools/run/nnkit-run.cpp b/compiler/nnkit/tools/run/nnkit-run.cpp
new file mode 100644
index 000000000..e60e5797a
--- /dev/null
+++ b/compiler/nnkit/tools/run/nnkit-run.cpp
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <nnkit/CmdlineArguments.h>
+#include <nnkit/VectorArguments.h>
+#include <nnkit/BackendPlugin.h>
+
+namespace
+{
+
+class Section
+{
+public:
+ Section() = default;
+
+public:
+ const nnkit::CmdlineArguments &args(void) const { return _args; }
+
+public:
+ void append(const std::string &arg) { _args.append(arg); }
+
+private:
+ nnkit::VectorArguments _args;
+};
+}
+
+namespace
+{
+
+class BackendSection : public Section
+{
+public:
+ BackendSection(const std::string &path) : _path{path}
+ {
+ // DO NOTHING
+ }
+
+public:
+ std::unique_ptr<nnkit::Backend> initialize(void)
+ {
+ _plugin = std::move(nnkit::make_backend_plugin(_path));
+ return _plugin->create(args());
+ }
+
+private:
+ std::string _path;
+ std::unique_ptr<nnkit::BackendPlugin> _plugin;
+};
+}
+
+// TODO Extract Action-related helpers
+#include <nnkit/Action.h>
+
+#include <memory>
+
+#include <dlfcn.h>
+#include <assert.h>
+
+namespace
+{
+
+class ActionBinder
+{
+private:
+ typedef std::unique_ptr<nnkit::Action> (*Entry)(const nnkit::CmdlineArguments &);
+
+public:
+ ActionBinder(const std::string &path)
+ {
+ // Q: Do we need RTLD_GLOBAL here?
+ _handle = dlopen(path.c_str(), RTLD_LAZY);
+ assert(_handle != nullptr);
+
+ _entry = reinterpret_cast<Entry>(dlsym(_handle, "make_action"));
+ assert(_entry != nullptr);
+ }
+
+public:
+ // Copy is not allowed to avoid double close
+ ActionBinder(const ActionBinder &) = delete;
+ ActionBinder(ActionBinder &&binder)
+ {
+ // Handle is transferd from 'binder' instance into this instance.
+ _handle = binder._handle;
+ _entry = binder._entry;
+
+ binder._handle = nullptr;
+ binder._entry = nullptr;
+ }
+
+public:
+ ~ActionBinder()
+ {
+ if (_handle)
+ {
+ dlclose(_handle);
+ }
+ }
+
+public:
+ std::unique_ptr<nnkit::Action> make(const nnkit::CmdlineArguments &args) const
+ {
+ return _entry(args);
+ }
+
+private:
+ void *_handle;
+ Entry _entry;
+};
+}
+
+namespace
+{
+
+class ActionSection : public Section
+{
+public:
+ ActionSection(const std::string &path) : _binder{path}
+ {
+ // DO NOTHING
+ }
+
+public:
+ std::unique_ptr<nnkit::Action> initialize(void) const { return _binder.make(args()); }
+
+private:
+ ActionBinder _binder;
+};
+}
+
+#include <stdex/Memory.h>
+
+#include <map>
+#include <iostream>
+
+int main(int argc, char **argv)
+{
+ // Usage:
+ // [Command] --backend [Backend module path] --backend-arg ... --backend-arg ...
+ // --pre [Action module path] --pre-arg ... --pre-arg ...
+ // --post [Action module path] --post-arg ... --post-arg ...
+
+ // Argument sections
+ //
+ // NOTE Command-line arguments should include one backend section, and may include multiple
+ // pre/post action sections.
+ struct Sections
+ {
+ std::unique_ptr<BackendSection> backend;
+ std::vector<ActionSection> pre;
+ std::vector<ActionSection> post;
+ };
+
+ Sections sections;
+
+ // Simple argument parser (based on map)
+ std::map<std::string, std::function<void(const std::string &arg)>> argparse;
+
+ argparse["--backend"] = [&sections](const std::string &tag) {
+ sections.backend = stdex::make_unique<BackendSection>(tag);
+ };
+
+ argparse["--backend-arg"] = [&sections](const std::string &arg) {
+ sections.backend->append(arg);
+ };
+
+ argparse["--pre"] = [&sections](const std::string &tag) { sections.pre.emplace_back(tag); };
+
+ argparse["--pre-arg"] = [&sections](const std::string &arg) { sections.pre.back().append(arg); };
+
+ argparse["--post"] = [&sections](const std::string &tag) { sections.post.emplace_back(tag); };
+
+ argparse["--post-arg"] = [&sections](const std::string &arg) {
+ sections.post.back().append(arg);
+ };
+
+ if (argc < 2)
+ {
+ std::cerr << "Usage:" << std::endl
+ << "[Command] --backend [Backend module path] "
+ << "--backend-arg [Backend argument] ..." << std::endl
+ << " --pre [Pre-Action module path] "
+ << "--pre-arg [Pre-Action argument] ..." << std::endl
+ << " --post [Post-Action module path] "
+ << "--post-arg [Post-Action argument] ..." << std::endl;
+ return 255;
+ }
+
+ for (int n = 1; n < argc; n += 2)
+ {
+ const std::string tag{argv[n]};
+ const std::string arg{argv[n + 1]};
+
+ auto it = argparse.find(tag);
+
+ if (it == argparse.end())
+ {
+ std::cerr << "Option '" << tag << "' is not supported" << std::endl;
+ return 255;
+ }
+
+ it->second(arg);
+ }
+
+ // we need a backend
+ if (sections.backend == nullptr)
+ {
+ std::cerr << "Error: Backend is required. Provide with [--backend]" << std::endl;
+ return 255;
+ }
+
+ // Initialize a backend
+ auto backend = sections.backend->initialize();
+
+ // Initialize pre actions
+ std::vector<std::unique_ptr<nnkit::Action>> pre_actions;
+
+ for (const auto &section : sections.pre)
+ {
+ pre_actions.emplace_back(section.initialize());
+ }
+
+ // Initialize post actions
+ std::vector<std::unique_ptr<nnkit::Action>> post_actions;
+
+ for (const auto &section : sections.post)
+ {
+ post_actions.emplace_back(section.initialize());
+ }
+
+ //
+ // Run inference
+ //
+ backend->prepare([&pre_actions](nnkit::TensorContext &ctx) {
+ // Run pre-actions on prepared tensor context
+ for (auto &action : pre_actions)
+ {
+ action->run(ctx);
+ }
+ });
+
+ backend->run();
+
+ backend->teardown([&post_actions](nnkit::TensorContext &ctx) {
+ // Run post-actions before teardown
+ for (auto &action : post_actions)
+ {
+ action->run(ctx);
+ }
+ });
+
+ return 0;
+}
diff --git a/compiler/nnop/CMakeLists.txt b/compiler/nnop/CMakeLists.txt
new file mode 100644
index 000000000..82c0e3a86
--- /dev/null
+++ b/compiler/nnop/CMakeLists.txt
@@ -0,0 +1,16 @@
+add_library(nnop INTERFACE)
+target_include_directories(nnop INTERFACE include)
+target_link_libraries(nnop INTERFACE angkor)
+
+nnas_find_package(GTest QUIET)
+
+if(NOT GTest_FOUND)
+ return()
+endif(NOT GTest_FOUND)
+
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+
+add_executable(nnop_test ${TESTS})
+target_link_libraries(nnop_test nnop)
+target_link_libraries(nnop_test gtest_main)
+add_test(nnop_test nnop_test)
diff --git a/compiler/nnop/include/nnop/Conv2D.h b/compiler/nnop/include/nnop/Conv2D.h
new file mode 100644
index 000000000..a39caa3d8
--- /dev/null
+++ b/compiler/nnop/include/nnop/Conv2D.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNOP_CONV2D_H__
+#define __NNOP_CONV2D_H__
+
+#include "nnop/PadInfo.h"
+#include "nnop/StrideInfo.h"
+
+#include <nncc/core/ADT/feature/Shape.h>
+#include <nncc/core/ADT/feature/Reader.h>
+#include <nncc/core/ADT/feature/Accessor.h>
+
+#include <nncc/core/ADT/kernel/Shape.h>
+#include <nncc/core/ADT/kernel/Reader.h>
+
+namespace nnop
+{
+
+template <typename OutputDType, typename InputDType, typename KernelDType>
+void conv(const nncc::core::ADT::feature::Shape &out_shape,
+ nncc::core::ADT::feature::Accessor<OutputDType> &out_data,
+ const nncc::core::ADT::feature::Shape &in_shape,
+ const nncc::core::ADT::feature::Reader<InputDType> &in_data,
+ const nncc::core::ADT::kernel::Shape &ker_shape,
+ const nncc::core::ADT::kernel::Reader<KernelDType> &ker_data, const PadInfo &pad_info,
+ const StrideInfo &stride_info)
+{
+ for (uint32_t out_ch = 0; out_ch < out_shape.depth(); ++out_ch)
+ {
+ for (uint32_t out_row = 0; out_row < out_shape.height(); ++out_row)
+ {
+ for (uint32_t out_col = 0; out_col < out_shape.width(); ++out_col)
+ {
+ OutputDType out_value = 0;
+
+ for (uint32_t ker_ch = 0; ker_ch < ker_shape.depth(); ++ker_ch)
+ {
+ for (uint32_t ker_row = 0; ker_row < ker_shape.height(); ++ker_row)
+ {
+ for (uint32_t ker_col = 0; ker_col < ker_shape.width(); ++ker_col)
+ {
+ const int64_t vertical_stride = static_cast<int64_t>(stride_info.vertical());
+ const int64_t horizontal_stride = static_cast<int64_t>(stride_info.horizontal());
+ const int64_t top_padding = static_cast<int64_t>(pad_info.top());
+ const int64_t left_padding = static_cast<int64_t>(pad_info.left());
+
+ const uint32_t in_ch = ker_ch;
+ const int64_t in_row = vertical_stride * out_row - top_padding + ker_row;
+ const int64_t in_col = horizontal_stride * out_col - left_padding + ker_col;
+
+ const bool is_padding = (in_row < 0) || (in_row >= in_shape.height()) ||
+ (in_col < 0) || (in_col >= in_shape.width());
+
+ const auto in_value = (is_padding) ? 0
+ : in_data.at(in_ch, static_cast<uint32_t>(in_row),
+ static_cast<uint32_t>(in_col));
+
+ const auto ker_value = ker_data.at(out_ch, in_ch, ker_row, ker_col);
+
+ out_value += in_value * ker_value;
+ }
+ }
+ }
+
+ out_data.at(out_ch, out_row, out_col) = out_value;
+ }
+ }
+ }
+}
+
+} // namespace nnop
+
+#endif // __NNOP_CONV2D_H__
diff --git a/compiler/nnop/include/nnop/PadInfo.h b/compiler/nnop/include/nnop/PadInfo.h
new file mode 100644
index 000000000..228f08514
--- /dev/null
+++ b/compiler/nnop/include/nnop/PadInfo.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNOP_PAD_INFO_H__
+#define __NNOP_PAD_INFO_H__
+
+#include <cstdint>
+
+namespace nnop
+{
+
+class PadInfo
+{
+public:
+ PadInfo(uint32_t top, uint32_t bottom, uint32_t left, uint32_t right)
+ : _top{top}, _bottom{bottom}, _left{left}, _right{right}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t top(void) const { return _top; }
+ uint32_t bottom(void) const { return _bottom; }
+ uint32_t left(void) const { return _left; }
+ uint32_t right(void) const { return _right; }
+
+private:
+ uint32_t _top;
+ uint32_t _bottom;
+ uint32_t _left;
+ uint32_t _right;
+};
+
+} // namespace nnop
+
+#endif // __NNCC_CORE_PAD_INFO_H__
diff --git a/compiler/nnop/include/nnop/StrideInfo.h b/compiler/nnop/include/nnop/StrideInfo.h
new file mode 100644
index 000000000..e47489fa7
--- /dev/null
+++ b/compiler/nnop/include/nnop/StrideInfo.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNOP_STRIDE_INFO_H__
+#define __NNOP_STRIDE_INFO_H__
+
+#include <cstdint>
+
+namespace nnop
+{
+
+class StrideInfo
+{
+public:
+ StrideInfo(uint32_t vertical, uint32_t horizontal) : _vertical{vertical}, _horizontal{horizontal}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t vertical(void) const { return _vertical; }
+ uint32_t horizontal(void) const { return _horizontal; }
+
+private:
+ uint32_t _horizontal;
+ uint32_t _vertical;
+};
+
+} // namespace nncc
+
+#endif // __NNOP_STRIDE_INFO_H__
diff --git a/compiler/nnop/requires.cmake b/compiler/nnop/requires.cmake
new file mode 100644
index 000000000..654db88c3
--- /dev/null
+++ b/compiler/nnop/requires.cmake
@@ -0,0 +1 @@
+require("angkor")
diff --git a/compiler/nnop/src/Conv2D.test.cpp b/compiler/nnop/src/Conv2D.test.cpp
new file mode 100644
index 000000000..9cb2ecb82
--- /dev/null
+++ b/compiler/nnop/src/Conv2D.test.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnop/Conv2D.h"
+
+#include <nncc/core/ADT/feature/Overlay.h>
+#include <nncc/core/ADT/feature/CHWLayout.h>
+
+#include <nncc/core/ADT/kernel/Overlay.h>
+#include <nncc/core/ADT/kernel/NCHWLayout.h>
+
+#include <gtest/gtest.h>
+
+using namespace nnop;
+using namespace nncc::core::ADT;
+
+TEST(CONV2D, conv_1)
+{
+ const feature::Shape ofm_shape{1, 1, 1};
+ int ofm_data[] = {0};
+ auto ofm_overlay = feature::make_overlay<int, feature::CHWLayout>(ofm_shape, ofm_data);
+
+ const feature::Shape ifm_shape{1, 3, 3};
+ int ifm_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
+ auto ifm_overlay = feature::make_overlay<int, feature::CHWLayout>(ifm_shape, ifm_data);
+
+ const kernel::Shape ker_shape{1, 1, 3, 3};
+ int ker_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
+ auto ker_overlay = kernel::make_overlay<int, kernel::NCHWLayout>(ker_shape, ker_data);
+
+ const PadInfo pad{0, 0, 0, 0};
+ const StrideInfo stride{1, 1};
+
+ nnop::conv(ofm_shape, ofm_overlay, ifm_shape, ifm_overlay, ker_shape, ker_overlay, pad, stride);
+
+ EXPECT_EQ(ofm_data[0], 204);
+}
diff --git a/compiler/nnop/src/PadInfo.test.cpp b/compiler/nnop/src/PadInfo.test.cpp
new file mode 100644
index 000000000..592e7f167
--- /dev/null
+++ b/compiler/nnop/src/PadInfo.test.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnop/PadInfo.h"
+
+#include <gtest/gtest.h>
+
+TEST(PAD_INFO, explicit_constructor)
+{
+ const uint32_t top = 3;
+ const uint32_t bottom = 4;
+ const uint32_t left = 5;
+ const uint32_t right = 6;
+
+ nnop::PadInfo pad_info{top, bottom, left, right};
+
+ ASSERT_EQ(pad_info.top(), top);
+ ASSERT_EQ(pad_info.bottom(), bottom);
+ ASSERT_EQ(pad_info.left(), left);
+ ASSERT_EQ(pad_info.right(), right);
+}
diff --git a/compiler/nnop/src/StrideInfo.test.cpp b/compiler/nnop/src/StrideInfo.test.cpp
new file mode 100644
index 000000000..d40f2fafc
--- /dev/null
+++ b/compiler/nnop/src/StrideInfo.test.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnop/StrideInfo.h"
+
+#include <gtest/gtest.h>
+
+TEST(STRIDE_INFO, explicit_constructor)
+{
+ const uint32_t vertical = 3;
+ const uint32_t horizontal = 4;
+
+ nnop::StrideInfo stride_info{vertical, horizontal};
+
+ ASSERT_EQ(stride_info.vertical(), vertical);
+ ASSERT_EQ(stride_info.horizontal(), horizontal);
+}
diff --git a/compiler/nnsuite/CMakeLists.txt b/compiler/nnsuite/CMakeLists.txt
new file mode 100644
index 000000000..5ea6cdadd
--- /dev/null
+++ b/compiler/nnsuite/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectories()
diff --git a/compiler/nnsuite/conv/CMakeLists.txt b/compiler/nnsuite/conv/CMakeLists.txt
new file mode 100644
index 000000000..a120de2f3
--- /dev/null
+++ b/compiler/nnsuite/conv/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_subdirectory(model)
+add_subdirectory(nnkit-caffe)
+add_subdirectory(nnkit-tflite)
diff --git a/compiler/nnsuite/conv/model/CMakeLists.txt b/compiler/nnsuite/conv/model/CMakeLists.txt
new file mode 100644
index 000000000..5864397ad
--- /dev/null
+++ b/compiler/nnsuite/conv/model/CMakeLists.txt
@@ -0,0 +1,6 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_library(nnsuite_conv STATIC ${SOURCES})
+set_target_properties(nnsuite_conv PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(nnsuite_conv PUBLIC include)
+target_link_libraries(nnsuite_conv PUBLIC angkor)
diff --git a/compiler/nnsuite/conv/model/include/nnsuite/conv/Model.h b/compiler/nnsuite/conv/model/include/nnsuite/conv/Model.h
new file mode 100644
index 000000000..2aa9d2848
--- /dev/null
+++ b/compiler/nnsuite/conv/model/include/nnsuite/conv/Model.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNSUITE_CONV_MODEL_H__
+#define __NNSUITE_CONV_MODEL_H__
+
+#include <nncc/core/ADT/feature/Shape.h>
+#include <nncc/core/ADT/kernel/Shape.h>
+#include <nncc/core/ADT/kernel/Reader.h>
+
+#include <string>
+
+namespace nnsuite
+{
+namespace conv
+{
+
+struct Model
+{
+ virtual ~Model() = default;
+
+ virtual const nncc::core::ADT::feature::Shape &ifm_shape(void) const = 0;
+ virtual const std::string &ifm_name(void) const = 0;
+
+ virtual const nncc::core::ADT::feature::Shape &ofm_shape(void) const = 0;
+ virtual const std::string &ofm_name(void) const = 0;
+
+ virtual const nncc::core::ADT::kernel::Shape &ker_shape(void) const = 0;
+ virtual const nncc::core::ADT::kernel::Reader<float> &ker_data(void) const = 0;
+
+ // TODO Support padding configuration
+ // TODO Support stride configuration
+};
+
+// TODO Support model validation
+
+} // namespace conv
+} // namespace nnsuite
+
+#endif // __NNSUITE_CONV_MODEL_H__
diff --git a/compiler/nnsuite/conv/model/include/nnsuite/conv/RandomModel.h b/compiler/nnsuite/conv/model/include/nnsuite/conv/RandomModel.h
new file mode 100644
index 000000000..b93de8e2d
--- /dev/null
+++ b/compiler/nnsuite/conv/model/include/nnsuite/conv/RandomModel.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNSUITE_CONV2D_RANDOM_MODEL_H__
+#define __NNSUITE_CONV2D_RANDOM_MODEL_H__
+
+#include "nnsuite/conv/Model.h"
+
+#include <nncc/core/ADT/kernel/Buffer.h>
+
+#include <string>
+
+namespace nnsuite
+{
+namespace conv
+{
+
+class RandomModel final : public Model
+{
+public:
+ explicit RandomModel(int32_t seed);
+
+public:
+ const nncc::core::ADT::feature::Shape &ifm_shape(void) const override { return _ifm_shape; }
+ const std::string &ifm_name(void) const override { return _ifm_name; }
+
+public:
+ const nncc::core::ADT::feature::Shape &ofm_shape(void) const override { return _ofm_shape; }
+ const std::string &ofm_name(void) const override { return _ofm_name; }
+
+public:
+ const nncc::core::ADT::kernel::Shape &ker_shape(void) const override
+ {
+ return _ker_buffer.shape();
+ }
+
+ const nncc::core::ADT::kernel::Reader<float> &ker_data(void) const override
+ {
+ return _ker_buffer;
+ }
+
+private:
+ const nncc::core::ADT::feature::Shape _ifm_shape;
+ const std::string _ifm_name;
+
+private:
+ const nncc::core::ADT::feature::Shape _ofm_shape;
+ const std::string _ofm_name;
+
+private:
+ nncc::core::ADT::kernel::Buffer<float> _ker_buffer;
+};
+
+} // namespace conv
+} // namespace nnsuite
+
+#endif // __NNSUTIE_CONV2D_RANDOM_MODEL_H__
diff --git a/compiler/nnsuite/conv/model/src/RandomModel.cpp b/compiler/nnsuite/conv/model/src/RandomModel.cpp
new file mode 100644
index 000000000..7b15d4c96
--- /dev/null
+++ b/compiler/nnsuite/conv/model/src/RandomModel.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnsuite/conv/RandomModel.h"
+
+#include <nncc/core/ADT/kernel/NCHWLayout.h>
+
+#include <random>
+
+using namespace nncc::core::ADT;
+
+namespace nnsuite
+{
+namespace conv
+{
+
+RandomModel::RandomModel(int32_t seed)
+ : _ifm_shape{1, 8, 8}, _ifm_name{"ifm"}, _ofm_name{"ofm"}, _ofm_shape{2, 6, 6},
+ _ker_buffer{kernel::Shape{2, 1, 3, 3}, kernel::NCHWLayout{}}
+{
+ std::default_random_engine gen{static_cast<uint32_t>(seed)};
+ std::normal_distribution<float> dist{0.0f, 1.0f};
+
+ const uint32_t N = _ker_buffer.shape().count();
+ const uint32_t C = _ker_buffer.shape().depth();
+ const uint32_t H = _ker_buffer.shape().height();
+ const uint32_t W = _ker_buffer.shape().width();
+
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ for (uint32_t ch = 0; ch < C; ++ch)
+ {
+ for (uint32_t row = 0; row < H; ++row)
+ {
+ for (uint32_t col = 0; col < W; ++col)
+ {
+ _ker_buffer.at(n, ch, row, col) = dist(gen);
+ }
+ }
+ }
+ }
+}
+
+} // namespace conv
+} // namespace nnsuite
diff --git a/compiler/nnsuite/conv/nnkit-caffe/CMakeLists.txt b/compiler/nnsuite/conv/nnkit-caffe/CMakeLists.txt
new file mode 100644
index 000000000..6445cc6fb
--- /dev/null
+++ b/compiler/nnsuite/conv/nnkit-caffe/CMakeLists.txt
@@ -0,0 +1,24 @@
+if(NOT TARGET nnkit_support_caffe)
+ return()
+endif(NOT TARGET nnkit_support_caffe)
+
+file(GLOB_RECURSE TESTS "*.test.cpp")
+file(GLOB_RECURSE SOURCES "*.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(nnsuite_conv_caffe SHARED ${SOURCES})
+target_link_libraries(nnsuite_conv_caffe nnsuite_conv)
+target_link_libraries(nnsuite_conv_caffe nnkit_support_caffe)
+target_link_libraries(nnsuite_conv_caffe stdex)
+
+nnas_find_package(GTest QUIET)
+
+if(NOT GTest_FOUND)
+ return()
+endif(NOT GTest_FOUND)
+
+add_executable(nnsuite_conv_caffe_test ${TESTS})
+target_link_libraries(nnsuite_conv_caffe_test nnsuite_conv_caffe)
+target_link_libraries(nnsuite_conv_caffe_test morph)
+target_link_libraries(nnsuite_conv_caffe_test gtest_main)
+add_test(nnsuite_conv_caffe_test nnsuite_conv_caffe_test)
diff --git a/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.cpp b/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.cpp
new file mode 100644
index 000000000..31d2b33fc
--- /dev/null
+++ b/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvBackend.h"
+
+#include <caffe/proto/caffe.pb.h>
+
+#include <nnkit/support/caffe/Backend.h>
+
+#include <nncc/core/ADT/kernel/Overlay.h>
+#include <nncc/core/ADT/kernel/NCHWLayout.h>
+
+#include <stdex/Memory.h>
+
+using stdex::make_unique;
+
+std::unique_ptr<nnkit::Backend> ConvBackend::create(const nnsuite::conv::Model &model)
+{
+ ::caffe::NetParameter param;
+
+ param.set_name("conv");
+
+ // Create 'Input' layer
+ {
+ auto input = param.add_layer();
+ input->set_name("input");
+ input->set_type("Input");
+ input->add_top(model.ifm_name());
+
+ auto input_param = new ::caffe::InputParameter{};
+ auto input_shape = input_param->add_shape();
+ input_shape->add_dim(1);
+ input_shape->add_dim(model.ifm_shape().depth());
+ input_shape->add_dim(model.ifm_shape().height());
+ input_shape->add_dim(model.ifm_shape().width());
+ input->set_allocated_input_param(input_param);
+ }
+
+ // Create 'Convolution' layer
+ {
+ auto conv = param.add_layer();
+ conv->set_name("conv");
+ conv->set_type("Convolution");
+ conv->add_bottom(model.ifm_name());
+ conv->add_top(model.ofm_name());
+
+ const auto &ker_shape = model.ker_shape();
+
+ auto ker_blob_shape = new ::caffe::BlobShape{};
+
+ ker_blob_shape->add_dim(ker_shape.count());
+ ker_blob_shape->add_dim(ker_shape.depth());
+ ker_blob_shape->add_dim(ker_shape.height());
+ ker_blob_shape->add_dim(ker_shape.width());
+
+ auto ker_blob = conv->add_blobs();
+
+ for (uint32_t n = 0; n < ker_shape.count(); ++n)
+ {
+ for (uint32_t ch = 0; ch < ker_shape.depth(); ++ch)
+ {
+ for (uint32_t row = 0; row < ker_shape.height(); ++row)
+ {
+ for (uint32_t col = 0; col < ker_shape.width(); ++col)
+ {
+ ker_blob->add_data(model.ker_data().at(n, ch, row, col));
+ }
+ }
+ }
+ }
+
+ ker_blob->set_allocated_shape(ker_blob_shape);
+
+ auto conv_param = new ::caffe::ConvolutionParameter{};
+ conv_param->set_num_output(model.ker_shape().count());
+ conv_param->set_bias_term(false);
+ conv_param->add_kernel_size(model.ker_shape().height());
+ conv_param->add_kernel_size(model.ker_shape().width());
+ conv->set_allocated_convolution_param(conv_param);
+ }
+
+ auto net = make_unique<::caffe::Net<float>>(param);
+ return make_unique<nnkit::support::caffe::Backend<float>>(std::move(net));
+}
diff --git a/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.h b/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.h
new file mode 100644
index 000000000..3701180f5
--- /dev/null
+++ b/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONV_BACKEND_H__
+#define __CONV_BACKEND_H__
+
+#include <nnsuite/conv/Model.h>
+#include <nnkit/support/caffe/Backend.h>
+
+#include <memory>
+
+struct ConvBackend
+{
+ static std::unique_ptr<nnkit::Backend> create(const nnsuite::conv::Model &);
+};
+
+#endif // __CONV_BACKEND_H__
diff --git a/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.test.cpp b/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.test.cpp
new file mode 100644
index 000000000..776bf186b
--- /dev/null
+++ b/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.test.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvBackend.h"
+
+#include <nncc/core/ADT/kernel/Overlay.h>
+#include <nncc/core/ADT/kernel/NCHWLayout.h>
+
+#include <nncc/core/ADT/tensor/Overlay.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <morph/caffe.h>
+
+#include <gtest/gtest.h>
+
+using namespace nncc::core::ADT;
+
+class TestModel : public nnsuite::conv::Model
+{
+public:
+ TestModel(const std::string &ifm_name, const feature::Shape &ifm_shape,
+ const std::string &ofm_name, const feature::Shape &ofm_shape,
+ const kernel::Shape &ker_shape, const kernel::Layout &ker_layout, float *ker_data)
+ : _ifm_name(ifm_name), _ifm_shape(ifm_shape), _ofm_name(ofm_name), _ofm_shape(ofm_shape),
+ _ker{ker_shape, ker_layout, ker_data}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const std::string &ifm_name(void) const override { return _ifm_name; }
+ const feature::Shape &ifm_shape(void) const override { return _ifm_shape; }
+
+public:
+ const std::string &ofm_name(void) const override { return _ofm_name; }
+ const feature::Shape &ofm_shape(void) const override { return _ofm_shape; }
+
+public:
+ const kernel::Shape &ker_shape(void) const override { return _ker.shape(); }
+ const kernel::Reader<float> &ker_data(void) const override { return _ker; }
+
+private:
+ const std::string _ifm_name;
+ const feature::Shape _ifm_shape;
+
+private:
+ const std::string _ofm_name;
+ const feature::Shape _ofm_shape;
+
+private:
+ const kernel::Overlay<float, float *> _ker;
+};
+
+TEST(CONV_BACKEND, conv_3x3)
+{
+ const std::string ofm_name{"ofm"};
+ const feature::Shape ofm_shape{1, 1, 1};
+ float ofm_data[1] = {204.0f}; // EXPECTED
+
+ const std::string ifm_name{"ifm"};
+ const feature::Shape ifm_shape{1, 3, 3};
+ float ifm_data[9] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+
+ const kernel::Shape ker_shape{1, 1, 3, 3};
+ float ker_data[9] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+
+ using kernel::NCHWLayout;
+ using tensor::LexicalLayout;
+
+ TestModel model{ifm_name, ifm_shape, ofm_name, ofm_shape, ker_shape, NCHWLayout{}, ker_data};
+
+ auto backend = ConvBackend::create(model);
+
+ backend->prepare([&](nnkit::TensorContext &ctx) {
+ ASSERT_EQ(ctx.size(), 1);
+ ASSERT_EQ(ctx.name(0), ifm_name);
+ // TODO Check IFM shape
+
+ auto fill = [&](const nnkit::TensorContext &, uint32_t, tensor::Accessor<float> &t) {
+ const auto tensor_shape = morph::caffe::as_tensor_shape(ifm_shape);
+ const auto overlay = tensor::make_overlay<float, LexicalLayout>(tensor_shape, ifm_data);
+
+ for (tensor::IndexEnumerator e{tensor_shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ t.at(index) = overlay.at(index);
+ }
+ };
+
+ ctx.getMutableFloatTensor(0, fill);
+ });
+
+ backend->run();
+
+ backend->teardown([&](nnkit::TensorContext &ctx) {
+ ASSERT_EQ(ctx.size(), 1);
+ ASSERT_EQ(ctx.name(0), ofm_name);
+
+ auto verify = [&](const nnkit::TensorContext &, uint32_t, const tensor::Reader<float> &t) {
+ const auto tensor_shape = morph::caffe::as_tensor_shape(ofm_shape);
+ const auto overlay = tensor::make_overlay<float, LexicalLayout>(tensor_shape, ofm_data);
+
+ for (tensor::IndexEnumerator e{tensor_shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ EXPECT_EQ(t.at(index), overlay.at(index));
+ }
+ };
+
+ ctx.getConstFloatTensor(0, verify);
+ });
+}
diff --git a/compiler/nnsuite/conv/nnkit-caffe/Entry.cpp b/compiler/nnsuite/conv/nnkit-caffe/Entry.cpp
new file mode 100644
index 000000000..1411fb41c
--- /dev/null
+++ b/compiler/nnsuite/conv/nnkit-caffe/Entry.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvBackend.h"
+
+#include <nnsuite/conv/RandomModel.h>
+#include <nnkit/CmdlineArguments.h>
+
+#include <chrono>
+#include <iostream>
+
+extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
+{
+ // Set random seed
+ int32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+
+ if (args.size() > 0)
+ {
+ seed = std::stoi(args.at(0), nullptr, 0);
+ }
+
+ std::cout << "SEED: " << seed << std::endl;
+
+ const nnsuite::conv::RandomModel model{seed};
+
+ return ConvBackend::create(model);
+}
diff --git a/compiler/nnsuite/conv/nnkit-tflite/CMakeLists.txt b/compiler/nnsuite/conv/nnkit-tflite/CMakeLists.txt
new file mode 100644
index 000000000..c1cf88812
--- /dev/null
+++ b/compiler/nnsuite/conv/nnkit-tflite/CMakeLists.txt
@@ -0,0 +1,23 @@
+if(NOT TARGET nnkit_support_tflite-1.7)
+ return()
+endif(NOT TARGET nnkit_support_tflite-1.7)
+
+file(GLOB_RECURSE TESTS "*.test.cpp")
+file(GLOB_RECURSE SOURCES "*.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(nnsuite_conv_tflite SHARED ${SOURCES})
+target_link_libraries(nnsuite_conv_tflite nnsuite_conv)
+target_link_libraries(nnsuite_conv_tflite nnkit_support_tflite-1.7)
+target_link_libraries(nnsuite_conv_tflite stdex)
+
+nnas_find_package(GTest QUIET)
+
+if(NOT GTest_FOUND)
+ return()
+endif(NOT GTest_FOUND)
+
+add_executable(nnsuite_conv_tflite_test ${TESTS})
+target_link_libraries(nnsuite_conv_tflite_test nnsuite_conv_tflite)
+target_link_libraries(nnsuite_conv_tflite_test gtest_main)
+add_test(nnsuite_conv_tflite_test nnsuite_conv_tflite_test)
diff --git a/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.cpp b/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.cpp
new file mode 100644
index 000000000..8ec9ce491
--- /dev/null
+++ b/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvBackend.h"
+
+#include <nncc/core/ADT/kernel/Overlay.h>
+#include <nncc/core/ADT/kernel/NHWCLayout.h>
+
+#include <tensorflow/contrib/lite/kernels/register.h>
+#include <tensorflow/contrib/lite/model.h>
+#include <tensorflow/contrib/lite/builtin_op_data.h>
+
+#include <cstdlib>
+#include <stdexcept>
+
+using namespace ::tflite;
+using namespace ::tflite::ops::builtin;
+
+/**
+ * @brief Allocate memory with malloc and return a typed pointer
+ *
+ * NOTE This function throws std::bac_alloc exception on allocation failure
+ */
+template <typename T> T *typed_malloc(void)
+{
+ if (auto res = reinterpret_cast<T *>(malloc(sizeof(T))))
+ {
+ return res;
+ }
+ throw std::bad_alloc{};
+}
+
+// Comment from 'context.h'
+//
+// Parameters for asymmetric quantization. Quantized values can be converted
+// back to float using:
+// real_value = scale * (quantized_value - zero_point);
+static inline TfLiteQuantizationParams make_default_quantization(void)
+{
+ return TfLiteQuantizationParams{1.0f, 0};
+}
+
+static inline std::vector<int> as_dims(const nncc::core::ADT::feature::Shape &shape)
+{
+ const int N = 1;
+ const int C = static_cast<int>(shape.depth());
+ const int H = static_cast<int>(shape.height());
+ const int W = static_cast<int>(shape.width());
+
+ return std::vector<int>{N, H, W, C};
+}
+
+static inline std::vector<int> as_dims(const nncc::core::ADT::kernel::Shape &shape)
+{
+ const int N = static_cast<int>(shape.count());
+ const int C = static_cast<int>(shape.depth());
+ const int H = static_cast<int>(shape.height());
+ const int W = static_cast<int>(shape.width());
+
+ return std::vector<int>{N, H, W, C};
+}
+
+ConvBackend::ConvBackend(const nnsuite::conv::Model &model)
+ : _ifm_name{model.ifm_name()}, _ofm_name{model.ofm_name()}
+{
+ using nncc::core::ADT::kernel::Overlay;
+ using nncc::core::ADT::kernel::NHWCLayout;
+
+ using nncc::core::ADT::kernel::make_overlay;
+ using nncc::core::ADT::kernel::num_elements;
+
+ // Set kernel data
+ const auto &ker_shape = model.ker_shape();
+
+ _kernel.resize(num_elements(ker_shape));
+
+ auto kernel_overlay = make_overlay<float, NHWCLayout>(ker_shape, _kernel.data());
+
+ for (uint32_t n = 0; n < ker_shape.count(); ++n)
+ {
+ for (uint32_t ch = 0; ch < ker_shape.depth(); ++ch)
+ {
+ for (uint32_t row = 0; row < ker_shape.height(); ++row)
+ {
+ for (uint32_t col = 0; col < ker_shape.width(); ++col)
+ {
+ kernel_overlay.at(n, ch, row, col) = model.ker_data().at(n, ch, row, col);
+ }
+ }
+ }
+ }
+
+ // Set bias data
+ _bias.resize(ker_shape.count(), 0.0f);
+
+ // Initialize interpreter
+ auto quantization = make_default_quantization();
+
+ // Create Tensors
+ // 0 -> OFM
+ // 1 -> IFM
+ // 2 -> Kernel
+ // 3 -> Bias
+ _interp.AddTensors(4);
+
+ _interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, _ofm_name.c_str(),
+ as_dims(model.ofm_shape()), quantization);
+
+ _interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, _ifm_name.c_str(),
+ as_dims(model.ifm_shape()), quantization);
+
+ _interp.SetTensorParametersReadOnly(
+ 2, kTfLiteFloat32 /* type */, "kernel" /* name */, as_dims(model.ker_shape()), quantization,
+ reinterpret_cast<const char *>(_kernel.data()), _kernel.size() * sizeof(float));
+
+ _interp.SetTensorParametersReadOnly(
+ 3, kTfLiteFloat32 /* type */, "bias" /* name */, {static_cast<int>(_bias.size())},
+ quantization, reinterpret_cast<const char *>(_bias.data()), _bias.size() * sizeof(float));
+
+ auto param = typed_malloc<TfLiteConvParams>();
+
+ param->padding = kTfLitePaddingValid;
+ param->stride_width = 1;
+ param->stride_height = 1;
+ param->activation = kTfLiteActNone;
+
+ _interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
+ BuiltinOpResolver().FindOp(BuiltinOperator_CONV_2D));
+
+ _interp.SetInputs({1});
+ _interp.SetOutputs({0});
+}
diff --git a/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.h b/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.h
new file mode 100644
index 000000000..b01f42837
--- /dev/null
+++ b/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONV_BACKEND_H__
+#define __CONV_BACKEND_H__
+
+#include <nnsuite/conv/Model.h>
+#include <nnkit/support/tflite/AbstractBackend.h>
+
+#include <vector>
+
+class ConvBackend final : public nnkit::support::tflite::AbstractBackend
+{
+public:
+ explicit ConvBackend(const nnsuite::conv::Model &model);
+
+public:
+ ::tflite::Interpreter &interpreter(void) override { return _interp; }
+
+private:
+ // NOTE tflite interpreter just stores the pointer of its name
+ const std::string _ifm_name;
+ const std::string _ofm_name;
+
+ // NOTE kernel data should live longer than tflite interpreter itself
+ std::vector<float> _kernel;
+
+ // NOTE bias is mssing in conv sample model, but conv op kernel in
+ // tensorflow lite interpreter does not work without bias.
+ //
+ // Let's feed zero-bias as a workaround
+ std::vector<float> _bias;
+
+private:
+ ::tflite::Interpreter _interp;
+};
+
+#endif // __BACKEND_H__
diff --git a/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.test.cpp b/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.test.cpp
new file mode 100644
index 000000000..db82f0cf9
--- /dev/null
+++ b/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.test.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvBackend.h"
+
+#include <nncc/core/ADT/kernel/Overlay.h>
+#include <nncc/core/ADT/kernel/NHWCLayout.h>
+
+#include <nncc/core/ADT/tensor/Overlay.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using namespace nncc::core::ADT;
+
+static inline tensor::Shape as_tensor_shape(const feature::Shape &shape)
+{
+ return tensor::Shape{1, shape.height(), shape.width(), shape.depth()};
+}
+
+class TestModel : public nnsuite::conv::Model
+{
+public:
+ TestModel(const std::string &ifm_name, const feature::Shape &ifm_shape,
+ const std::string &ofm_name, const feature::Shape &ofm_shape,
+ const kernel::Shape &ker_shape, const kernel::Layout &ker_layout, float *ker_data)
+ : _ifm_name(ifm_name), _ifm_shape(ifm_shape), _ofm_name(ofm_name), _ofm_shape(ofm_shape),
+ _ker{ker_shape, ker_layout, ker_data}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const std::string &ifm_name(void) const override { return _ifm_name; }
+ const feature::Shape &ifm_shape(void) const override { return _ifm_shape; }
+
+public:
+ const std::string &ofm_name(void) const override { return _ofm_name; }
+ const feature::Shape &ofm_shape(void) const override { return _ofm_shape; }
+
+public:
+ const kernel::Shape &ker_shape(void) const override { return _ker.shape(); }
+ const kernel::Reader<float> &ker_data(void) const override { return _ker; }
+
+private:
+ const std::string _ifm_name;
+ const feature::Shape _ifm_shape;
+
+private:
+ const std::string _ofm_name;
+ const feature::Shape _ofm_shape;
+
+private:
+ const kernel::Overlay<float, float *> _ker;
+};
+
+TEST(CONV_BACKEND, conv_3x3)
+{
+ const std::string ofm_name{"ofm"};
+ const feature::Shape ofm_shape{1, 1, 1};
+ float ofm_data[1] = {204.0f}; // EXPECTED
+
+ const std::string ifm_name{"ifm"};
+ const feature::Shape ifm_shape{1, 3, 3};
+ float ifm_data[9] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+
+ const kernel::Shape ker_shape{1, 1, 3, 3};
+ float ker_data[9] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+
+ using kernel::NHWCLayout;
+ using tensor::LexicalLayout;
+
+ TestModel model{ifm_name, ifm_shape, ofm_name, ofm_shape, ker_shape, NHWCLayout{}, ker_data};
+
+ ConvBackend backend{model};
+
+ backend.prepare([&](nnkit::TensorContext &ctx) {
+ ASSERT_EQ(ctx.size(), 1);
+ ASSERT_EQ(ctx.name(0), ifm_name);
+ // TODO Check IFM shape
+
+ auto fill = [&](const nnkit::TensorContext &, uint32_t, tensor::Accessor<float> &t) {
+ const auto tensor_shape = as_tensor_shape(ifm_shape);
+ const auto overlay = tensor::make_overlay<float, LexicalLayout>(tensor_shape, ifm_data);
+
+ for (tensor::IndexEnumerator e{tensor_shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ t.at(index) = overlay.at(index);
+ }
+ };
+
+ ctx.getMutableFloatTensor(0, fill);
+ });
+
+ backend.run();
+
+ backend.teardown([&](nnkit::TensorContext &ctx) {
+ ASSERT_EQ(ctx.size(), 1);
+ ASSERT_EQ(ctx.name(0), ofm_name);
+
+ auto verify = [&](const nnkit::TensorContext &, uint32_t, const tensor::Reader<float> &t) {
+ const auto tensor_shape = as_tensor_shape(ofm_shape);
+ const auto overlay = tensor::make_overlay<float, LexicalLayout>(tensor_shape, ofm_data);
+
+ for (tensor::IndexEnumerator e{tensor_shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ EXPECT_EQ(t.at(index), overlay.at(index));
+ }
+ };
+
+ ctx.getConstFloatTensor(0, verify);
+ });
+}
diff --git a/compiler/nnsuite/conv/nnkit-tflite/Entry.cpp b/compiler/nnsuite/conv/nnkit-tflite/Entry.cpp
new file mode 100644
index 000000000..2c84f72e6
--- /dev/null
+++ b/compiler/nnsuite/conv/nnkit-tflite/Entry.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvBackend.h"
+
+#include <nnsuite/conv/RandomModel.h>
+
+#include <nnkit/Backend.h>
+#include <nnkit/CmdlineArguments.h>
+
+#include <stdex/Memory.h>
+
+#include <chrono>
+#include <iostream>
+
+extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
+{
+ // Set random seed
+ int32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+
+ if (args.size() > 0)
+ {
+ seed = std::stoi(args.at(0), nullptr, 0);
+ }
+
+ std::cout << "SEED: " << seed << std::endl;
+
+ const nnsuite::conv::RandomModel model{seed};
+
+ return stdex::make_unique<ConvBackend>(model);
+}
diff --git a/compiler/nnsuite/requires.cmake b/compiler/nnsuite/requires.cmake
new file mode 100644
index 000000000..654db88c3
--- /dev/null
+++ b/compiler/nnsuite/requires.cmake
@@ -0,0 +1 @@
+require("angkor")
diff --git a/compiler/oneco-value-pbtxt-test/CMakeLists.txt b/compiler/oneco-value-pbtxt-test/CMakeLists.txt
new file mode 100644
index 000000000..6d9563f40
--- /dev/null
+++ b/compiler/oneco-value-pbtxt-test/CMakeLists.txt
@@ -0,0 +1,53 @@
+option(ONECO_VALUE_PBTXT_TEST "Enable oneco value test for pbtxt input model" ON)
+
+if(NOT ONECO_VALUE_PBTXT_TEST)
+ return()
+endif(NOT ONECO_VALUE_PBTXT_TEST)
+
+if(NOT TARGET onnxkit)
+ message(STATUS "oneco: Skip test material preparation as onnxkit is not defined")
+ return()
+endif(NOT TARGET onnxkit)
+
+#
+# Copy [Testcase]/test.pbtxt to Testcase.pbtxt in binary folder
+# Encode Testcase.pbtxt to Testcase.pb
+#
+set(TEST_PBTXT_FILE "test.pbtxt")
+set(TEST_REPO "${CMAKE_CURRENT_SOURCE_DIR}") # Where to find tests
+set(TEST_SPACE "${CMAKE_CURRENT_BINARY_DIR}") # Where to run tests
+
+file(GLOB PBTXTFILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*/${TEST_PBTXT_FILE}")
+
+unset(TEST_DEPS)
+
+foreach(PBTXTFILE IN ITEMS ${PBTXTFILES})
+ get_filename_component(DIR_NAME ${PBTXTFILE} DIRECTORY)
+
+ set(PBTXT_SOURCE_FILE "${DIR_NAME}.pbtxt")
+ set(PBTXT_SOURCE_PATH "${TEST_SPACE}/${PBTXT_SOURCE_FILE}")
+
+ set(PB_OUTPUT_FILE "${DIR_NAME}.pb")
+ set(PB_OUTPUT_PATH "${TEST_SPACE}/${PB_OUTPUT_FILE}")
+
+ # Copy files
+ add_custom_command(
+ OUTPUT ${PBTXT_SOURCE_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TEST_REPO}/${PBTXTFILE}" "${PBTXT_SOURCE_PATH}"
+ COMMENT "Copy ${PBTXT_SOURCE_FILE}"
+ DEPENDS "${TEST_REPO}/${PBTXTFILE}"
+ )
+
+ # Use onnxkit to encode
+ add_custom_command(
+ OUTPUT ${PB_OUTPUT_PATH}
+ COMMAND $<TARGET_FILE:onnxkit> encode ${PBTXT_SOURCE_PATH} ${PB_OUTPUT_PATH}
+ DEPENDS ${PBTXT_SOURCE_PATH}
+ COMMENT "Generate ${PB_OUTPUT_FILE}"
+ )
+
+ list(APPEND TEST_DEPS "${PB_OUTPUT_PATH}")
+endforeach(PBTXTFILE)
+
+# This target enforces CMake to generate all the dependencies during "build" phase
+add_custom_target(oneco_value_pbtxt_test_deps ALL DEPENDS ${TEST_DEPS})
diff --git a/compiler/oneco-value-pbtxt-test/Const_000/test.pbtxt b/compiler/oneco-value-pbtxt-test/Const_000/test.pbtxt
new file mode 100644
index 000000000..c5ae298d4
--- /dev/null
+++ b/compiler/oneco-value-pbtxt-test/Const_000/test.pbtxt
@@ -0,0 +1,52 @@
+# Latest IR_VERSION of 1.4.1 version is 4
+# https://github.com/onnx/onnx/blob/rel-1.4.1/onnx/onnx.proto3
+ir_version: 4
+
+# Opset version of IR_VERSION 4 is 9
+# https://github.com/onnx/onnx/blob/rel-1.4.1/onnx/defs/operator_sets.h
+opset_import {
+ version: 9
+}
+
+graph {
+ name: "Const_000"
+
+ node {
+ name: "const_node"
+ output: "output:0"
+ op_type: "Constant"
+ attribute {
+ name: "const/value"
+ t {
+ dims: 2
+ dims: 3
+ data_type: 1 # FLOAT type
+ float_data: 1.1
+ float_data: 2.2
+ float_data: 3.3
+ float_data: 4.4
+ float_data: 5.5
+ float_data: 6.6
+ name: "const_tensor"
+ }
+ type: TENSOR
+ }
+ }
+
+ output {
+ name: "output:0"
+ type {
+ tensor_type {
+ elem_type: 1 # FLOAT type
+ shape {
+ dim {
+ dim_value: 2
+ }
+ dim {
+ dim_value: 3
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/compiler/oneco-value-pbtxt-test/Identity_000/test.pbtxt b/compiler/oneco-value-pbtxt-test/Identity_000/test.pbtxt
new file mode 100644
index 000000000..e8aff1d9d
--- /dev/null
+++ b/compiler/oneco-value-pbtxt-test/Identity_000/test.pbtxt
@@ -0,0 +1,66 @@
+# Latest IR_VERSION of 1.4.1 version is 4
+# https://github.com/onnx/onnx/blob/rel-1.4.1/onnx/onnx.proto3
+ir_version: 4
+
+# Opset version of IR_VERSION 4 is 9
+# https://github.com/onnx/onnx/blob/rel-1.4.1/onnx/defs/operator_sets.h
+opset_import {
+ version: 9
+}
+
+graph {
+ name: "Identity_000"
+
+ node {
+ input: "input:0"
+ output: "output:0"
+ name: "identity_node"
+ op_type: "Identity"
+ }
+
+ input {
+ name: "input:0"
+ type {
+ tensor_type {
+ elem_type: 1 # FLOAT type
+ shape {
+ dim {
+ dim_value: 1
+ }
+ dim {
+ dim_value: 2
+ }
+ dim {
+ dim_value: 1
+ }
+ dim {
+ dim_value: 2
+ }
+ }
+ }
+ }
+ }
+
+ output {
+ name: "output:0"
+ type {
+ tensor_type {
+ elem_type: 1 # FLOAT type
+ shape {
+ dim {
+ dim_value: 1
+ }
+ dim {
+ dim_value: 2
+ }
+ dim {
+ dim_value: 1
+ }
+ dim {
+ dim_value: 2
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/compiler/oneco-value-pbtxt-test/requires.cmake b/compiler/oneco-value-pbtxt-test/requires.cmake
new file mode 100644
index 000000000..0f21fa998
--- /dev/null
+++ b/compiler/oneco-value-pbtxt-test/requires.cmake
@@ -0,0 +1 @@
+require("onnxkit")
diff --git a/compiler/oneco/CMakeLists.txt b/compiler/oneco/CMakeLists.txt
new file mode 100644
index 000000000..73bc57d43
--- /dev/null
+++ b/compiler/oneco/CMakeLists.txt
@@ -0,0 +1,36 @@
+nnas_find_package(Protobuf QUIET)
+nnas_find_package(ONNXSource EXACT 1.4.1 QUIET)
+
+if(NOT Protobuf_FOUND)
+ return()
+endif(NOT Protobuf_FOUND)
+
+if(NOT ONNXSource_FOUND)
+ return()
+endif(NOT ONNXSource_FOUND)
+
+add_subdirectory(proto)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(moco_onnx_frontend SHARED ${SOURCES})
+target_include_directories(moco_onnx_frontend PRIVATE src)
+target_include_directories(moco_onnx_frontend PUBLIC include)
+target_link_libraries(moco_onnx_frontend PUBLIC moco_onnx_proto)
+target_link_libraries(moco_onnx_frontend PUBLIC loco)
+target_link_libraries(moco_onnx_frontend PRIVATE stdex)
+target_link_libraries(moco_onnx_frontend PRIVATE cwrap)
+
+nnas_find_package(GTest QUIET)
+
+if(NOT GTest_FOUND)
+ return()
+endif(NOT GTest_FOUND)
+
+add_executable(moco_onnx_frontend_test ${TESTS})
+target_include_directories(moco_onnx_frontend_test PRIVATE src)
+target_link_libraries(moco_onnx_frontend_test gtest_main)
+target_link_libraries(moco_onnx_frontend_test moco_onnx_frontend)
+add_test(moco_onnx_frontend_test moco_onnx_frontend_test)
diff --git a/compiler/oneco/include/moco/onnx/Frontend.h b/compiler/oneco/include/moco/onnx/Frontend.h
new file mode 100644
index 000000000..5d3527590
--- /dev/null
+++ b/compiler/oneco/include/moco/onnx/Frontend.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_ONNX_FRONTEND_H__
+#define __MOCO_ONNX_FRONTEND_H__
+
+#include <loco.h>
+
+#include <memory>
+
+namespace moco
+{
+namespace onnx
+{
+
+class Frontend
+{
+public:
+ enum class FileType
+ {
+ Text,
+ Binary,
+ };
+
+public:
+ Frontend();
+
+public:
+ std::unique_ptr<loco::Graph> load(const char *, FileType) const;
+};
+
+} // namespace onnx
+} // namespace moco
+
+#endif // __MOCO_ONNX_FRONTEND_H__
diff --git a/compiler/oneco/proto/CMakeLists.txt b/compiler/oneco/proto/CMakeLists.txt
new file mode 100644
index 000000000..e72985671
--- /dev/null
+++ b/compiler/oneco/proto/CMakeLists.txt
@@ -0,0 +1,13 @@
+# Minimal Protocol Buffer specification for ModelProto file (.pb) encoding/decoding
+unset(PROTO_FILES)
+list(APPEND PROTO_FILES onnx/onnx.proto)
+
+Protobuf_Generate(ONNX_MODEL_PROTO
+ "${CMAKE_CURRENT_BINARY_DIR}/generated"
+ "${ONNXSource_DIR}"
+ ${PROTO_FILES})
+
+add_library(moco_onnx_proto STATIC ${ONNX_MODEL_PROTO_SOURCES})
+set_target_properties(moco_onnx_proto PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(moco_onnx_proto PUBLIC ${ONNX_MODEL_PROTO_INCLUDE_DIRS})
+target_link_libraries(moco_onnx_proto PUBLIC libprotobuf)
diff --git a/compiler/oneco/requires.cmake b/compiler/oneco/requires.cmake
new file mode 100644
index 000000000..4e99b0eac
--- /dev/null
+++ b/compiler/oneco/requires.cmake
@@ -0,0 +1,3 @@
+require("stdex")
+require("loco")
+require("cwrap")
diff --git a/compiler/oneco/src/Convert.cpp b/compiler/oneco/src/Convert.cpp
new file mode 100644
index 000000000..b56b03ef7
--- /dev/null
+++ b/compiler/oneco/src/Convert.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Convert.h"
+
+#include <cassert>
+#include <stdexcept>
+
+namespace moco
+{
+namespace onnx
+{
+
+loco::DataType as_loco_datatype(const int32_t tensor_dtype)
+{
+ switch (tensor_dtype)
+ {
+ case 0: // UNDEFINED
+ return loco::DataType::Unknown;
+ case 1: // FLOAT
+ return loco::DataType::FLOAT32;
+ case 2: // UINT8
+ return loco::DataType::U8;
+ case 3: // INT8
+ return loco::DataType::S8;
+ case 4: // UINT16
+ return loco::DataType::U16;
+ case 5: // INT16
+ return loco::DataType::S16;
+ case 6: // INT32
+ return loco::DataType::S32;
+ case 7: // INT64
+ return loco::DataType::S64;
+ case 10: // FLOAT16
+ return loco::DataType::FLOAT16;
+ case 11: // DOUBLE
+ return loco::DataType::FLOAT64;
+ case 12: // UINT32
+ return loco::DataType::U32;
+ case 13: // UINT64
+ return loco::DataType::U64;
+
+ case 8: // STRING
+ case 9: // BOOL
+ case 14: // COMPLEX64
+ case 15: // COMPLEX128
+ case 16: // BFLOAT16
+ default:
+ break;
+ }
+ throw std::runtime_error{"Unsupported onnx dtype"};
+}
+
+std::string tensor_dtype_as_string(const int32_t tensor_dtype)
+{
+ switch (tensor_dtype)
+ {
+ case 0: // UNDEFINED
+ return "UNDEFINED";
+ case 1: // FLOAT
+ return "FLOAT";
+ case 2: // UINT8
+ return "UINT8";
+ case 3: // INT8
+ return "INT8";
+ case 4: // UINT16
+ return "UINT16";
+ case 5: // INT16
+ return "INT16";
+ case 6: // INT32
+ return "INT32";
+ case 7: // INT64
+ return "INT64";
+ case 8: // STRING
+ return "STRING";
+ case 9: // BOOL
+ return "BOOL";
+ case 10: // FLOAT16
+ return "FLOAT16";
+ case 11: // DOUBLE
+ return "DOUBLE";
+ case 12: // UINT32
+ return "UINT32";
+ case 13: // UINT64
+ return "UINT64";
+ case 14: // COMPLEX64
+ return "COMPLEX64";
+ case 15: // COMPLEX128
+ return "COMPLEX128";
+ case 16: // BFLOAT16
+ return "BFLOAT16";
+ default:
+ break;
+ }
+ throw std::runtime_error{"Unsupported onnx dtype"};
+}
+
+} // namespace onnx
+} // namespace moco
diff --git a/compiler/oneco/src/Convert.h b/compiler/oneco/src/Convert.h
new file mode 100644
index 000000000..0935afeea
--- /dev/null
+++ b/compiler/oneco/src/Convert.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_FRONTEND_ONNX_CONVERT_H__
+#define __MOCO_FRONTEND_ONNX_CONVERT_H__
+
+#include <loco.h>
+
+#include <onnx/onnx.pb.h>
+
+#include <string>
+
+namespace moco
+{
+namespace onnx
+{
+
+loco::DataType as_loco_datatype(const int32_t dtype);
+std::string tensor_dtype_as_string(const int32_t tensor_dtype);
+
+} // namespace onnx
+} // namespace moco
+
+#endif // __MOCO_FRONTEND_ONNX_CONVERT_H__
diff --git a/compiler/oneco/src/Frontend.cpp b/compiler/oneco/src/Frontend.cpp
new file mode 100644
index 000000000..d633c1c2e
--- /dev/null
+++ b/compiler/oneco/src/Frontend.cpp
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <moco/onnx/Frontend.h>
+
+#include "Convert.h"
+#include "GraphBuilderContext.h"
+#include "GraphBuilderRegistry.h"
+#include "Onnxutil.h"
+
+#include <cwrap/Fildes.h>
+
+#include <onnx/onnx.pb.h>
+
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+#include <sstream>
+#include <string>
+#include <stdexcept>
+
+#include <fcntl.h>
+#include <unistd.h>
+
+namespace
+{
+
+bool load_text(const cwrap::Fildes &fildes, onnx::ModelProto &model_proto)
+{
+ google::protobuf::io::FileInputStream fis(fildes.get());
+
+ return google::protobuf::TextFormat::Parse(&fis, &model_proto);
+}
+
+bool load_binary(const cwrap::Fildes &fildes, onnx::ModelProto &model_proto)
+{
+ google::protobuf::io::FileInputStream fis(fildes.get());
+ google::protobuf::io::CodedInputStream cis(&fis);
+
+ return model_proto.ParseFromCodedStream(&cis);
+}
+
+void load_onnx(const std::string &path, moco::onnx::Frontend::FileType type,
+ onnx::ModelProto &model_proto)
+{
+ cwrap::Fildes fildes{open(path.c_str(), O_RDONLY)};
+
+ if (fildes.get() < 0)
+ {
+ throw std::runtime_error{"Error: " + path + " not found"};
+ }
+
+ bool result = (type == moco::onnx::Frontend::FileType::Text) ? load_text(fildes, model_proto)
+ : load_binary(fildes, model_proto);
+
+ if (!result)
+ {
+ throw std::runtime_error{"Error: Failed to parse " + path};
+ }
+}
+
+// TODO Make comments clear
+void convert_graph(::onnx::ModelProto &onnx_model_proto, loco::Graph *graph)
+{
+ auto nodes = stdex::make_unique<moco::onnx::SymbolTable>();
+ auto input_names = stdex::make_unique<moco::onnx::SymbolTable>();
+
+ moco::onnx::GraphBuilderContext gb_context(graph, nodes.get(), input_names.get());
+
+ // Building a loco graph
+ // 1. Convert onnx::node to loco::Node
+ // 2. Convert onnx::initializer to loco::ConstGen node
+ // 3. Convert onnx::input to loco::Pull node
+ // 4. Connect inputs: set all node input(from a string) to actual node object
+ // 5. Set graph input
+ // 6. Create loco::Push node (with a proper input), and mark it as a graph output
+
+ assert(onnx_model_proto.has_graph());
+ ::onnx::GraphProto onnx_graph_proto = onnx_model_proto.graph();
+
+ /// All nodes in the ModelProto's graph will bind against the operator
+ /// with the same-domain/same-op_type operator with the HIGHEST version
+ /// in the referenced operator sets.
+ assert(onnx_model_proto.opset_import_size() > 0);
+ int64_t opset_version = 1;
+ for (int i = 0; i < onnx_model_proto.opset_import_size(); ++i)
+ {
+ auto opset = onnx_model_proto.opset_import(i);
+
+ if (!opset.has_domain() || moco::onnx::is_default_domain(opset.domain()))
+ {
+ if (opset.version() > opset_version)
+ {
+ opset_version = opset.version();
+ }
+ }
+ else
+ {
+ throw std::runtime_error("Not supported for custom operation");
+ }
+ }
+
+ // 1. Convert all the nodes to loco::Node
+ for (const auto &n : onnx_graph_proto.node())
+ {
+ if (const auto *graph_builder = moco::onnx::GraphBuilderRegistry::get().lookup(n.op_type()))
+ {
+ if (!graph_builder->validate(opset_version, n))
+ {
+ throw std::runtime_error{"Invalid operator: " + n.op_type()};
+ }
+
+ graph_builder->build(opset_version, n, &gb_context);
+ }
+ else
+ {
+ throw std::runtime_error{"Not supported: " + n.op_type()};
+ }
+ }
+
+ // 2. Convert onnx::initializer to loco::ConstGen node
+ std::set<std::string> initializer_name_set;
+ for (int i = 0; i < onnx_graph_proto.initializer_size(); ++i)
+ {
+ auto initializer = onnx_graph_proto.initializer(i);
+
+ initializer_name_set.insert(initializer.name());
+
+ // TODO Support other data types
+ auto data = moco::onnx::get_float_data(initializer);
+
+ auto const_node = graph->nodes()->create<loco::ConstGen>();
+ const_node->dtype(moco::onnx::as_loco_datatype(initializer.data_type()));
+ const_node->rank(initializer.dims_size());
+ // TODO Support other data types
+ const_node->size<loco::DataType::FLOAT32>(data.size());
+
+ for (uint32_t i = 0; i < const_node->rank(); ++i)
+ {
+ const_node->dim(i) = initializer.dims(i);
+ }
+
+ for (uint32_t i = 0; i < data.size(); ++i)
+ {
+ // TODO Support other data types
+ const_node->at<loco::DataType::FLOAT32>(i) = data.at(i);
+ }
+
+ nodes->enroll(initializer.name(), const_node);
+ }
+
+ // 3. Convert onnx::input to loco::Pull node
+ for (int i = 0; i < onnx_graph_proto.input_size(); i++)
+ {
+ auto input = onnx_graph_proto.input(i);
+
+ // Already substituted by ConstGen node
+ if (initializer_name_set.find(input.name()) != initializer_name_set.end())
+ continue;
+
+ auto pull_node = graph->nodes()->create<loco::Pull>();
+
+ auto tensor = input.type().tensor_type();
+ pull_node->dtype(moco::onnx::as_loco_datatype(tensor.elem_type()));
+ pull_node->rank(tensor.shape().dim_size());
+ for (uint32_t i = 0; i < pull_node->rank(); ++i)
+ {
+ pull_node->dim(i) = (uint32_t)tensor.shape().dim(i).dim_value();
+ }
+
+ nodes->enroll(input.name(), pull_node);
+ }
+
+ // 4. Connect inputs: set all node input(from a string) to actual node object
+ loco::Graph::NodeContext *graph_nodes = graph->nodes();
+ uint32_t nodes_count = graph_nodes->size();
+ for (uint32_t n = 0; n < nodes_count; ++n)
+ {
+ loco::Node *node_to_set = graph_nodes->at(n);
+
+ unsigned int names_size = input_names->size(node_to_set);
+ assert(names_size == node_to_set->arity());
+ for (unsigned int i = 0; i < names_size; ++i)
+ {
+ auto input_name = input_names->name(node_to_set, i);
+ auto node = nodes->node(input_name);
+
+ // TODO use enum instead of dynamic_cast
+ loco::Forward *forward_node = dynamic_cast<loco::Forward *>(node_to_set);
+ if (forward_node != nullptr)
+ forward_node->input(node);
+ }
+ }
+
+ // 5. Set graph input
+ for (int i = 0; i < onnx_graph_proto.input_size(); i++)
+ {
+ auto input = onnx_graph_proto.input(i).name();
+
+ // Already substituted by ConstGen node
+ if (initializer_name_set.find(input) != initializer_name_set.end())
+ continue;
+
+ auto node = nodes->node(input);
+ assert(node != nullptr);
+
+ auto graph_input = graph->inputs()->create();
+
+ loco::Pull *pull_node = dynamic_cast<loco::Pull *>(node);
+ assert(pull_node != nullptr);
+
+ graph_input->name(input);
+ loco::link(graph_input, pull_node);
+ }
+
+ // 6. Create loco::Push node (with a proper input), and mark it as a graph output
+ for (int i = 0; i < onnx_graph_proto.output_size(); i++)
+ {
+ auto output = onnx_graph_proto.output(i).name();
+
+ auto output_node = nodes->node(output);
+ assert(output_node);
+
+ // create loco::Push for output of graph
+ auto push_node = graph->nodes()->create<loco::Push>();
+ push_node->from(output_node); // set input of Push to output node
+
+ // set the graph output name and node object
+ auto graph_output = graph->outputs()->create();
+ graph_output->name(output);
+ loco::link(graph_output, push_node);
+ }
+}
+
+} // namespace
+
+namespace moco
+{
+namespace onnx
+{
+
+Frontend::Frontend()
+{
+ // DO NOTHING
+}
+
+std::unique_ptr<loco::Graph> Frontend::load(const char *modelfile, FileType type) const
+{
+ ::onnx::ModelProto onnx_model_proto;
+
+ load_onnx(modelfile, type, onnx_model_proto);
+
+ auto graph = loco::make_graph();
+
+ convert_graph(onnx_model_proto, graph.get());
+
+ return std::move(graph);
+}
+
+} // namespace onnx
+} // namespace moco
diff --git a/compiler/oneco/src/Frontend.test.cpp b/compiler/oneco/src/Frontend.test.cpp
new file mode 100644
index 000000000..58e1144da
--- /dev/null
+++ b/compiler/oneco/src/Frontend.test.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <moco/onnx/Frontend.h>
+
+#include <gtest/gtest.h>
+
+TEST(MocoONNXFrontendTest, Dummy) { moco::onnx::Frontend frontend; }
diff --git a/compiler/oneco/src/GraphBuilder.h b/compiler/oneco/src/GraphBuilder.h
new file mode 100644
index 000000000..7271eb81a
--- /dev/null
+++ b/compiler/oneco/src/GraphBuilder.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_FRONTEND_ONNX_GRAPH_BUILDER_H__
+#define __MOCO_FRONTEND_ONNX_GRAPH_BUILDER_H__
+
+#include "GraphBuilderContext.h"
+
+#include <onnx/onnx.pb.h>
+
+namespace moco
+{
+namespace onnx
+{
+
+/**
+* @brief Parent class of onnx operation graph builders
+* @note GraphBuilder call proper build and validate function according to opset version
+*/
+class GraphBuilder
+{
+public:
+ using OpsetVersion = int64_t;
+
+ virtual bool validate(OpsetVersion, const ::onnx::NodeProto &) const { return true; }
+ virtual void build(OpsetVersion, const ::onnx::NodeProto &, GraphBuilderContext *) const = 0;
+ virtual ~GraphBuilder() {}
+};
+
+} // namespace onnx
+} // namespace moco
+
+#endif // __MOCO_FRONTEND_ONNX_GRAPH_BUILDER_H__
diff --git a/compiler/oneco/src/GraphBuilderContext.cpp b/compiler/oneco/src/GraphBuilderContext.cpp
new file mode 100644
index 000000000..00d3f4b06
--- /dev/null
+++ b/compiler/oneco/src/GraphBuilderContext.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GraphBuilderContext.h"
+
+namespace moco
+{
+namespace onnx
+{
+
+void SymbolTable::enroll(const std::string &node_name, loco::Node *node)
+{
+ MapNameNode_t::iterator iter = _namenode.find(node_name);
+
+ if (iter != _namenode.end())
+ {
+ throw std::runtime_error{"Error: Duplicate node name in Graph: " + node_name};
+ }
+
+ _namenode[node_name] = node;
+}
+
+loco::Node *SymbolTable::node(const std::string &node_name)
+{
+ MapNameNode_t::iterator iter = _namenode.find(node_name);
+
+ if (iter == _namenode.end())
+ {
+ throw std::runtime_error{"Error: Cannot find node with name in Graph: " + node_name};
+ }
+
+ return iter->second;
+}
+
+void SymbolTable::list(loco::Node *node, const std::string &name)
+{
+ MapNodeNames_t::iterator iter = _nodenames.find(node);
+
+ if (iter == _nodenames.end())
+ {
+ // add a new vector for the first name
+ _nodenames[node] = {name};
+ return;
+ }
+
+ _nodenames[node].push_back(name);
+}
+
+unsigned SymbolTable::size(loco::Node *node)
+{
+ MapNodeNames_t::iterator iter = _nodenames.find(node);
+
+ if (iter == _nodenames.end())
+ {
+ return 0;
+ }
+
+ return iter->second.size();
+}
+
+const std::string &SymbolTable::name(loco::Node *node, unsigned index)
+{
+ MapNodeNames_t::iterator iter = _nodenames.find(node);
+
+ if (iter == _nodenames.end())
+ {
+ throw std::runtime_error{"Error: Cannot find names given node"};
+ }
+
+ if (index >= iter->second.size())
+ {
+ throw std::runtime_error{"Error: Invalid name index for given node"};
+ }
+
+ return iter->second.at(index);
+}
+
+} // namespace onnx
+} // namespace moco
diff --git a/compiler/oneco/src/GraphBuilderContext.h b/compiler/oneco/src/GraphBuilderContext.h
new file mode 100644
index 000000000..f1f394b50
--- /dev/null
+++ b/compiler/oneco/src/GraphBuilderContext.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_FRONTEND_ONNX_GRAPHBUILDERCONTEXT_H__
+#define __MOCO_FRONTEND_ONNX_GRAPHBUILDERCONTEXT_H__
+
+#include <loco.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+namespace moco
+{
+namespace onnx
+{
+
+/**
+ * @brief Class to store relations of Nodes and string names
+ */
+class SymbolTable
+{
+public:
+ /**
+ * @brief Registers one node for a name
+ */
+ void enroll(const std::string &node_name, loco::Node *node);
+ /**
+ * @brief Queries enrolled(registered) with name and return node if found
+ * Will throw runtime_error if not found
+ * Table is independent with registering with list()
+ */
+ loco::Node *node(const std::string &node_name);
+
+ /**
+ * @brief Registers multiple (appends) names for a node
+ * Table is independent with registering with enroll()
+ */
+ void list(loco::Node *node, const std::string &name);
+ /**
+ * @brief Returns number of listed(registered) names for a node
+ */
+ unsigned size(loco::Node *node);
+ /**
+ * @brief Queries listed(registered) with node and index(from 0 to size-1)
+ * Will throw runtime_error if node is not found or index is out of bounds
+ */
+ const std::string &name(loco::Node *node, unsigned index);
+
+private:
+ using MapNameNode_t = std::map<std::string, loco::Node *>;
+ using MapNodeNames_t = std::map<loco::Node *, std::vector<std::string>>;
+
+ MapNameNode_t _namenode;
+ MapNodeNames_t _nodenames;
+};
+
+/**
+* @brief Class to store context to build IR from onnx
+*/
+class GraphBuilderContext
+{
+public:
+ GraphBuilderContext(loco::Graph *g, SymbolTable *nodes, SymbolTable *input_names)
+ : _g(g), _nodes(nodes), _input_names(input_names)
+ {
+ // DO NOTHING
+ }
+
+ GraphBuilderContext(const GraphBuilderContext &) = delete;
+ GraphBuilderContext(GraphBuilderContext &&) = delete;
+
+public:
+ loco::Graph *graph() { return _g; }
+ SymbolTable *nodes() { return _nodes; }
+ SymbolTable *input_names() { return _input_names; }
+
+private:
+ loco::Graph *_g;
+ SymbolTable *_nodes;
+ SymbolTable *_input_names;
+};
+
+} // namespace onnx
+} // namespace moco
+
+#endif // __MOCO_FRONTEND_ONNX_GRAPHBUILDERCONTEXT_H__
diff --git a/compiler/oneco/src/GraphBuilderRegistry.h b/compiler/oneco/src/GraphBuilderRegistry.h
new file mode 100644
index 000000000..1bf4d9514
--- /dev/null
+++ b/compiler/oneco/src/GraphBuilderRegistry.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_FRONTEND_ONNX_GRAPH_BUILDER_REGISTRY_H__
+#define __MOCO_FRONTEND_ONNX_GRAPH_BUILDER_REGISTRY_H__
+
+#include "GraphBuilder.h"
+
+#include <map>
+
+namespace moco
+{
+namespace onnx
+{
+
+/**
+* @brief Class to return graph builder for passed onnx Operator
+*/
+class GraphBuilderRegistry
+{
+public:
+ /**
+ * @brief Returns registered GraphBuilder pointer for operator or
+ * nullptr if not registered
+ */
+ const GraphBuilder *lookup(const std::string &op) const
+ {
+ if (_builder_map.find(op) == _builder_map.end())
+ return nullptr;
+
+ return _builder_map.at(op).get();
+ }
+
+ static GraphBuilderRegistry &get()
+ {
+ static GraphBuilderRegistry me;
+ return me;
+ }
+
+public:
+ void add(const std::string op, std::unique_ptr<GraphBuilder> &&builder)
+ {
+ _builder_map[op] = std::move(builder);
+ }
+
+private:
+ std::map<const std::string, std::unique_ptr<GraphBuilder>> _builder_map;
+};
+
+} // namespace onnx
+} // namespace moco
+
+#include <stdex/Memory.h>
+
+#define REGISTER_OP_BUILDER(NAME, BUILDER) \
+ namespace \
+ { \
+ __attribute__((constructor)) void reg_op(void) \
+ { \
+ std::unique_ptr<moco::onnx::BUILDER> builder = stdex::make_unique<moco::onnx::BUILDER>(); \
+ moco::onnx::GraphBuilderRegistry::get().add(#NAME, std::move(builder)); \
+ } \
+ }
+
+#endif // __MOCO_FRONTEND_ONNX_GRAPH_BUILDER_REGISTRY_H__
diff --git a/compiler/oneco/src/Onnxutil.cpp b/compiler/oneco/src/Onnxutil.cpp
new file mode 100644
index 000000000..93f06677f
--- /dev/null
+++ b/compiler/oneco/src/Onnxutil.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Convert.h"
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+/**
+ * @note If the platform is little endian, 0x00112233 would be saved as [0x33, 0x22, 0x11, 0x00]
+ * If not, it would be saved as [0x00, 0x11, 0x22, 0x33]
+ * @return Whether platform is little endian or not
+ */
+bool is_platform_little_endian()
+{
+ int32_t num = 0x00112233;
+ return (*(char *)&num == 0x33);
+}
+
+} // namespace
+
+namespace moco
+{
+namespace onnx
+{
+
+bool is_default_domain(const std::string domain)
+{
+ return (domain.compare("") == 0 || domain.compare("onnx.ai") == 0);
+}
+
+std::vector<float> get_float_data(const ::onnx::TensorProto &tensor)
+{
+ std::vector<float> data;
+
+ // Exactly one of the fields is used to store the elements of the tensor
+ assert(!(tensor.has_raw_data() && (tensor.float_data_size() > 0)));
+ assert(tensor.has_raw_data() || (tensor.float_data_size() > 0));
+
+ if (tensor.has_raw_data())
+ {
+ const std::string raw_data = tensor.raw_data();
+
+ // If platform is big endian, we should convert data as big endian
+ if (!is_platform_little_endian())
+ {
+ // TODO Revise implementation of this logic. This is too complex.
+ const char *little_endian_bytes = raw_data.c_str();
+ char *big_endian_bytes = reinterpret_cast<char *>(std::malloc(raw_data.size()));
+
+ for (int i = 0; i < raw_data.size(); ++i)
+ big_endian_bytes[i] = little_endian_bytes[i];
+
+ const size_t element_size = sizeof(float);
+ const size_t num_elements = raw_data.size() / element_size;
+ for (size_t i = 0; i < num_elements; ++i)
+ {
+ char *start_byte = big_endian_bytes + i * element_size;
+ char *end_byte = start_byte + element_size - 1;
+
+ for (size_t count = 0; count < element_size / 2; ++count)
+ {
+ char temp = *start_byte;
+ *start_byte = *end_byte;
+ *end_byte = temp;
+ ++start_byte;
+ --end_byte;
+ }
+ }
+
+ data.insert(data.end(), reinterpret_cast<const float *>(big_endian_bytes),
+ reinterpret_cast<const float *>(big_endian_bytes + raw_data.size()));
+
+ std::free(big_endian_bytes);
+ }
+ else
+ {
+ const char *bytes = raw_data.c_str();
+ data.insert(data.end(), reinterpret_cast<const float *>(bytes),
+ reinterpret_cast<const float *>(bytes + raw_data.size()));
+ }
+ }
+ else
+ {
+ for (int i = 0; i < tensor.float_data_size(); ++i)
+ data.push_back(tensor.float_data(i));
+ }
+
+ return data;
+}
+
+} // namespace onnx
+} // namespace moco
diff --git a/compiler/oneco/src/Onnxutil.h b/compiler/oneco/src/Onnxutil.h
new file mode 100644
index 000000000..0c2fcac33
--- /dev/null
+++ b/compiler/oneco/src/Onnxutil.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MOCO_FRONTEND_ONNX_ONNXUTIL_H__
+#define __MOCO_FRONTEND_ONNX_ONNXUTIL_H__
+
+#include <onnx/onnx.pb.h>
+
+#include <string>
+
+namespace moco
+{
+namespace onnx
+{
+
+/**
+ * @brief If domain is empty string or onnx.ai, it is default domain
+ * @param [in] domain The name of domain
+ * @return Whether it is default domain or not
+ */
+bool is_default_domain(const std::string domain);
+
+/**
+ * @brief Get float tensor data
+ * @param [in] tensor Tensor to get float data
+ * @return Float vector which stores float tensor data
+ */
+std::vector<float> get_float_data(const ::onnx::TensorProto &tensor);
+
+} // namespace onnx
+} // namespace moco
+
+#endif // __MOCO_FRONTEND_ONNX_ONNXUTIL_H__
diff --git a/compiler/oneco/src/Op/Constant.cpp b/compiler/oneco/src/Op/Constant.cpp
new file mode 100644
index 000000000..c14d2729b
--- /dev/null
+++ b/compiler/oneco/src/Op/Constant.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Constant.h"
+
+#include <cassert>
+
+namespace moco
+{
+namespace onnx
+{
+
+bool ConstantGraphBuilder::validate(OpsetVersion opset_version, const ::onnx::NodeProto &node) const
+{
+ if (opset_version >= 9)
+ return Constant_V9().validate(node);
+ else if (opset_version >= 1)
+ return Constant_V1().validate(node);
+ else
+ throw std::runtime_error("Invalid ONNX IR version");
+}
+
+void ConstantGraphBuilder::build(OpsetVersion opset_version, const ::onnx::NodeProto &node,
+ GraphBuilderContext *context) const
+{
+ if (opset_version >= 9)
+ Constant_V9().build(node, context);
+ else if (opset_version >= 1)
+ Constant_V1().build(node, context);
+ else
+ throw std::runtime_error("Invalid ONNX IR version");
+}
+
+} // namespace onnx
+} // namespace moco
+
+#include "GraphBuilderRegistry.h"
+
+REGISTER_OP_BUILDER(Constant, ConstantGraphBuilder)
diff --git a/compiler/oneco/src/Op/Constant.h b/compiler/oneco/src/Op/Constant.h
new file mode 100644
index 000000000..e25441d58
--- /dev/null
+++ b/compiler/oneco/src/Op/Constant.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GraphBuilder.h"
+
+#include <cassert>
+
+namespace moco
+{
+namespace onnx
+{
+
+/**
+ * @brief GraphBuilder for Constant(since version 1) node
+ */
+class Constant_V1
+{
+public:
+ bool validate(const ::onnx::NodeProto &) const;
+ void build(const ::onnx::NodeProto &, GraphBuilderContext *) const;
+};
+
+/**
+ * @brief GraphBuilder for Constant(since version 9) node
+ * @note Until version 1, only FLOAT16, FLOAT, DOUBLE was supported
+ * Since version 9, all types are supported
+ */
+class Constant_V9
+{
+public:
+ bool validate(const ::onnx::NodeProto &) const;
+ void build(const ::onnx::NodeProto &, GraphBuilderContext *) const;
+};
+
+/**
+ * @brief GraphBuilder for Constant node
+ */
+class ConstantGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(OpsetVersion, const ::onnx::NodeProto &) const;
+ void build(OpsetVersion, const ::onnx::NodeProto &, GraphBuilderContext *) const;
+};
+
+} // namespace onnx
+} // namespace moco
diff --git a/compiler/oneco/src/Op/Constant_V1.cpp b/compiler/oneco/src/Op/Constant_V1.cpp
new file mode 100644
index 000000000..916f5fa3a
--- /dev/null
+++ b/compiler/oneco/src/Op/Constant_V1.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Constant.h"
+#include "Convert.h"
+
+#include <cassert>
+
+namespace moco
+{
+namespace onnx
+{
+
+bool Constant_V1::validate(const ::onnx::NodeProto &node) const
+{
+ if (node.attribute_size() == 0 || !node.attribute(0).has_t())
+ return false;
+
+ auto type = moco::onnx::tensor_dtype_as_string(node.attribute(0).t().data_type());
+ if (type.compare("FLOAT16") != 0 && type.compare("FLOAT") != 0 && type.compare("DOUBLE") != 0)
+ return false;
+
+ return true;
+}
+
+void Constant_V1::build(const ::onnx::NodeProto &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *nodes = context->nodes();
+
+ // Create a "ConstGen" node for Constant
+ auto const_node = graph->nodes()->create<loco::ConstGen>();
+ auto tensor_attribute = node.attribute().Get(0).t();
+ const_node->dtype(as_loco_datatype(tensor_attribute.data_type()));
+ const_node->rank(tensor_attribute.dims_size());
+ // TODO Support other data types
+ assert(const_node->dtype() == loco::DataType::FLOAT32);
+ const_node->size<loco::DataType::FLOAT32>(tensor_attribute.float_data_size());
+
+ for (uint32_t i = 0; i < const_node->rank(); ++i)
+ {
+ const_node->dim(i) = tensor_attribute.dims(i);
+ }
+
+ // TODO Support other data types
+ for (int i = 0; i < tensor_attribute.float_data_size(); ++i)
+ {
+ const_node->at<loco::DataType::FLOAT32>(i) = tensor_attribute.float_data(i);
+ }
+
+ nodes->enroll(node.name(), const_node);
+ nodes->enroll(node.output(0), const_node);
+}
+
+} // namespace onnx
+} // namespace moco
diff --git a/compiler/oneco/src/Op/Constant_V9.cpp b/compiler/oneco/src/Op/Constant_V9.cpp
new file mode 100644
index 000000000..56dc6cca0
--- /dev/null
+++ b/compiler/oneco/src/Op/Constant_V9.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Constant.h"
+#include "Convert.h"
+
+#include <cassert>
+
+namespace moco
+{
+namespace onnx
+{
+
+bool Constant_V9::validate(const ::onnx::NodeProto &node) const
+{
+ if (node.attribute_size() == 0 || !node.attribute(0).has_t())
+ return false;
+
+ return true;
+}
+
+void Constant_V9::build(const ::onnx::NodeProto &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *nodes = context->nodes();
+
+ // Create a "ConstGen" node for Constant
+ auto const_node = graph->nodes()->create<loco::ConstGen>();
+ auto tensor_attribute = node.attribute().Get(0).t();
+ const_node->dtype(as_loco_datatype(tensor_attribute.data_type()));
+ const_node->rank(tensor_attribute.dims_size());
+ // TODO Support other data types
+ assert(const_node->dtype() == loco::DataType::FLOAT32);
+ const_node->size<loco::DataType::FLOAT32>(tensor_attribute.float_data_size());
+
+ for (uint32_t i = 0; i < const_node->rank(); ++i)
+ {
+ const_node->dim(i) = tensor_attribute.dims(i);
+ }
+
+ // TODO Support other data types
+ for (int i = 0; i < tensor_attribute.float_data_size(); ++i)
+ {
+ const_node->at<loco::DataType::FLOAT32>(i) = tensor_attribute.float_data(i);
+ }
+
+ nodes->enroll(node.name(), const_node);
+ nodes->enroll(node.output(0), const_node);
+}
+
+} // namespace onnx
+} // namespace moco
diff --git a/compiler/oneco/src/Op/Identity.cpp b/compiler/oneco/src/Op/Identity.cpp
new file mode 100644
index 000000000..6314b6f96
--- /dev/null
+++ b/compiler/oneco/src/Op/Identity.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Identity.h"
+
+#include <cassert>
+
+namespace moco
+{
+namespace onnx
+{
+
+bool IdentityGraphBuilder::validate(OpsetVersion opset_version, const ::onnx::NodeProto &node) const
+{
+ if (opset_version >= 1)
+ return Identity_V1().validate(node);
+ else
+ throw std::runtime_error("Invalid ONNX IR version");
+}
+
+void IdentityGraphBuilder::build(OpsetVersion opset_version, const ::onnx::NodeProto &node,
+ GraphBuilderContext *context) const
+{
+ if (opset_version >= 1)
+ Identity_V1().build(node, context);
+ else
+ throw std::runtime_error("Invalid ONNX IR version");
+}
+
+} // namespace onnx
+} // namespace moco
+
+#include "GraphBuilderRegistry.h"
+
+REGISTER_OP_BUILDER(Identity, IdentityGraphBuilder)
diff --git a/compiler/oneco/src/Op/Identity.h b/compiler/oneco/src/Op/Identity.h
new file mode 100644
index 000000000..41367bea0
--- /dev/null
+++ b/compiler/oneco/src/Op/Identity.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GraphBuilder.h"
+
+#include <cassert>
+
+namespace moco
+{
+namespace onnx
+{
+
+/**
+ * @brief GraphBuilder for Identity(since version 1) node
+ */
+class Identity_V1
+{
+public:
+ bool validate(const ::onnx::NodeProto &) const;
+ void build(const ::onnx::NodeProto &, GraphBuilderContext *) const;
+};
+
+/**
+ * @brief GraphBuilder for Identity node
+ */
+class IdentityGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(OpsetVersion, const ::onnx::NodeProto &) const;
+ void build(OpsetVersion, const ::onnx::NodeProto &, GraphBuilderContext *) const;
+};
+
+} // namespace onnx
+} // namespace moco
diff --git a/compiler/oneco/src/Op/Identity_V1.cpp b/compiler/oneco/src/Op/Identity_V1.cpp
new file mode 100644
index 000000000..6ae65589e
--- /dev/null
+++ b/compiler/oneco/src/Op/Identity_V1.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Identity.h"
+
+#include <cassert>
+
+namespace moco
+{
+namespace onnx
+{
+
+bool Identity_V1::validate(const ::onnx::NodeProto &) const { return true; }
+
+void Identity_V1::build(const ::onnx::NodeProto &node, GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ loco::Graph *graph = context->graph();
+ SymbolTable *nodes = context->nodes();
+ SymbolTable *input_names = context->input_names();
+
+ // Create a "Forward" node for Identity
+ auto forward_node = graph->nodes()->create<loco::Forward>();
+
+ nodes->enroll(node.name(), forward_node);
+ nodes->enroll(node.output(0), forward_node);
+
+ // Record all inputs to forward_node
+ for (int i = 0; i < node.input_size(); ++i)
+ {
+ const auto &input_name = node.input(i);
+ input_names->list(forward_node, input_name);
+ }
+}
+
+} // namespace onnx
+} // namespace moco
diff --git a/compiler/onnx2circle/CMakeLists.txt b/compiler/onnx2circle/CMakeLists.txt
new file mode 100644
index 000000000..a0d393bd9
--- /dev/null
+++ b/compiler/onnx2circle/CMakeLists.txt
@@ -0,0 +1,27 @@
+# TODO Allow users to force onnx2circle build
+if(NOT TARGET mir2loco)
+ return()
+endif(NOT TARGET mir2loco)
+
+if(NOT TARGET mir_onnx_importer)
+ return()
+endif(NOT TARGET mir_onnx_importer)
+
+if(NOT TARGET exo)
+ return()
+endif(NOT TARGET exo)
+
+message(STATUS "Build onnx2circle: TRUE")
+
+set(SOURCES "src/onnx2circle.cpp")
+
+add_executable(onnx2circle ${SOURCES})
+target_link_libraries(onnx2circle PRIVATE moco_log)
+target_link_libraries(onnx2circle PRIVATE exo)
+target_link_libraries(onnx2circle PRIVATE locop)
+target_link_libraries(onnx2circle PRIVATE hermes_std)
+target_link_libraries(onnx2circle PRIVATE stdex)
+target_link_libraries(onnx2circle PRIVATE angkor cwrap)
+target_link_libraries(onnx2circle PRIVATE mir2loco)
+target_link_libraries(onnx2circle PRIVATE mir_onnx_importer)
+install(TARGETS onnx2circle DESTINATION bin)
diff --git a/compiler/onnx2circle/README.md b/compiler/onnx2circle/README.md
new file mode 100644
index 000000000..55b73870e
--- /dev/null
+++ b/compiler/onnx2circle/README.md
@@ -0,0 +1,3 @@
+# onnx2circle
+
+_onnx2circle_ is a ONNX-to-Circle model converter.
diff --git a/compiler/onnx2circle/requires.cmake b/compiler/onnx2circle/requires.cmake
new file mode 100644
index 000000000..418148e4f
--- /dev/null
+++ b/compiler/onnx2circle/requires.cmake
@@ -0,0 +1,9 @@
+require("stdex")
+require("hermes-std")
+require("mir2loco")
+require("mir-onnx-importer")
+require("exo")
+require("locop")
+require("loco")
+require("cwrap")
+require("angkor")
diff --git a/compiler/onnx2circle/src/onnx2circle.cpp b/compiler/onnx2circle/src/onnx2circle.cpp
new file mode 100644
index 000000000..c329ed3d5
--- /dev/null
+++ b/compiler/onnx2circle/src/onnx2circle.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exo/LoggingContext.h"
+#include "exo/CircleExporter.h"
+
+#include "mir2loco.h"
+#include "ONNXImporterImpl.h"
+
+#include "locop/FormattedGraph.h"
+
+#include "hermes/ConsoleReporter.h"
+#include "hermes/EnvConfig.h"
+
+#include "stdex/Memory.h"
+
+#include <cassert>
+
+#include <iostream>
+#include <stdexcept>
+#include <string>
+
+//
+// Logging Support
+//
+namespace
+{
+
+struct Logger final : public hermes::Source
+{
+ Logger(hermes::Context *ctx) { activate(ctx->sources(), ctx->bus()); }
+ ~Logger() { deactivate(); }
+};
+
+struct LoggingContext
+{
+ static hermes::Context *get(void)
+ {
+ using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
+
+ static hermes::Context *ctx = nullptr;
+
+ if (ctx == nullptr)
+ {
+ ctx = new hermes::Context;
+ ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
+ ctx->config(stdex::make_unique<EnvConfig>("ONNX2CIRCLE_Log"));
+ }
+
+ return ctx;
+ }
+};
+
+void print_help()
+{
+ std::cerr << "Usage: onnx2circle <path/to/onnx> <path/to/circle/model> " << std::endl;
+}
+
+} // namespace
+
+#define LOGGER(name) \
+ ::Logger name { ::LoggingContext::get() }
+
+#define INFO(name) HERMES_INFO(name)
+
+int main(int argc, char **argv)
+{
+ using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
+
+ // This line allows users to control all the exo-circle loggers via ONNX2CIRCLE_Log_Backend
+ exo::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("ONNX2CIRCLE_Log_Backend"));
+
+ LOGGER(l);
+
+ // TODO We need better args parsing in future
+ if (!(argc == 3))
+ {
+ print_help();
+ return 255;
+ }
+
+ std::string onnx_path{argv[1]}; // .pb file
+ std::string circle_path{argv[2]};
+
+ std::cout << "Import from '" << onnx_path << "'" << std::endl;
+ auto mir_g = mir_onnx::loadModel(onnx_path);
+ auto loco_g = mir2loco::Transformer().transform(mir_g.get());
+ std::cout << "Import from '" << onnx_path << "' - Done" << std::endl;
+
+ INFO(l) << "Import Graph" << std::endl;
+ INFO(l) << locop::fmt<locop::Formatter::LinearV1>(loco_g) << std::endl;
+
+ std::cout << "Export into '" << circle_path << "'" << std::endl;
+ exo::CircleExporter(loco_g.get()).dumpToFile(circle_path.c_str());
+ std::cout << "Export into '" << circle_path << "' - Done" << std::endl;
+
+ return 0;
+}
diff --git a/compiler/onnx2tflite-integration-test/CMakeLists.txt b/compiler/onnx2tflite-integration-test/CMakeLists.txt
new file mode 100644
index 000000000..9904be470
--- /dev/null
+++ b/compiler/onnx2tflite-integration-test/CMakeLists.txt
@@ -0,0 +1,120 @@
+nnas_include(TargetRequire)
+nncc_find_resource(ONNXTests)
+
+unset(REQUIRED_TARGETS)
+list(APPEND REQUIRED_TARGETS onnx2tflite)
+list(APPEND REQUIRED_TARGETS onnxkit)
+list(APPEND REQUIRED_TARGETS nnkit-run)
+list(APPEND REQUIRED_TARGETS nnkit_onnx_backend)
+list(APPEND REQUIRED_TARGETS nnkit_tflite_backend)
+list(APPEND REQUIRED_TARGETS nnkit_randomize_action)
+list(APPEND REQUIRED_TARGETS nnkit_HDF5_export_action)
+list(APPEND REQUIRED_TARGETS nnkit_HDF5_import_action)
+list(APPEND REQUIRED_TARGETS i5diff)
+TargetRequire_Return(${REQUIRED_TARGETS})
+
+message(STATUS "Build onnx2tflite-integration-test: ON")
+
+set(TEST_REPO "${ONNXTests_DIR}")
+set(TEST_PBTXT_FILENAME "test.pbtxt")
+
+unset(TESTCASES)
+
+macro(add NAME)
+ list(APPEND TESTCASES ${NAME})
+endmacro(add)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+unset(DEPS)
+unset(KEYS)
+
+foreach(PREFIX IN ITEMS ${TESTCASES})
+ if(NOT IS_DIRECTORY "${TEST_REPO}/${PREFIX}")
+ message(FATAL_ERROR "Missing '${PREFIX}' test")
+ endif()
+
+ set(PBTXT_SOURCE_PATH "${TEST_REPO}/${PREFIX}/${TEST_PBTXT_FILENAME}")
+
+ set(PBTXT_FILE "${PREFIX}.pbtxt")
+ set(PBTXT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PBTXT_FILE}")
+
+ set(ONNX_FILE "${PREFIX}.onnx")
+ set(ONNX_PATH "${CMAKE_CURRENT_BINARY_DIR}/${ONNX_FILE}")
+
+ # Copy .pbtxt
+ add_custom_command(OUTPUT ${PBTXT_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${PBTXT_SOURCE_PATH}" "${PBTXT_PATH}"
+ DEPENDS ${PBTXT_SOURCE_PATH}
+ COMMENT "Generate ${PBTXT_FILE}"
+ )
+
+ # Generate .onnx from .pbtxt
+ add_custom_command(OUTPUT ${ONNX_PATH}
+ COMMAND $<TARGET_FILE:onnxkit> encode ${PBTXT_PATH} ${ONNX_PATH}
+ DEPENDS ${PBTXT_PATH}
+ COMMENT "Generate ${ONNX_FILE}"
+ )
+
+ list(APPEND DEPS ${INFO_PATH} ${ONNX_PATH})
+ list(APPEND KEYS ${PREFIX})
+endforeach(PREFIX)
+
+##
+## Copy testall
+##
+set(TEST_RUNNER_TEMPLATE "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh")
+set(TEST_RUNNER "${CMAKE_CURRENT_BINARY_DIR}/run")
+
+add_custom_command(
+ OUTPUT ${TEST_RUNNER}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TEST_RUNNER_TEMPLATE}" "${TEST_RUNNER}"
+ DEPENDS ${TEST_RUNNER_TEMPLATE}
+ COMMENT "Generate test runner"
+)
+
+list(APPEND DEPS "${TEST_RUNNER}")
+
+###
+### Generate test.config
+###
+set(TOOLCHAIN_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/toolchain.config")
+
+add_custom_command(
+ OUTPUT ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'NNKIT_RUN_PATH=\"$<TARGET_FILE:nnkit-run>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'ONNX_BACKEND_PATH=\"$<TARGET_FILE:nnkit_onnx_backend>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TFLITE_BACKEND_PATH=\"$<TARGET_FILE:nnkit_tflite_backend>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'RANDOMIZE_ACTION_PATH=\"$<TARGET_FILE:nnkit_randomize_action>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_EXPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_export_action>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_IMPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_import_action>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'ONNX2TFLITE_PATH=\"$<TARGET_FILE:onnx2tflite>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'I5DIFF_PATH=\"$<TARGET_FILE:i5diff>\"' >> ${TOOLCHAIN_CONFIG}
+ DEPENDS
+ nnkit-run
+ nnkit_onnx_backend
+ nnkit_tflite_backend
+ nnkit_randomize_action
+ nnkit_HDF5_export_action
+ nnkit_HDF5_import_action
+ onnx2tflite
+ i5diff
+ COMMENT "Generate toolchain configuration"
+)
+
+list(APPEND DEPS "${TOOLCHAIN_CONFIG}")
+
+# This "onnx2tflite_integration_test_deps" target enforces CMake to generate all the dependencies during "build" phase
+add_custom_target(onnx2tflite_integration_test_deps ALL DEPENDS ${DEPS})
+
+# Run tests
+add_test(
+ NAME onnx2tflite_integration_test
+ COMMAND "${TEST_RUNNER}"
+ "${TOOLCHAIN_CONFIG}"
+ ${KEYS}
+)
diff --git a/compiler/onnx2tflite-integration-test/requires.cmake b/compiler/onnx2tflite-integration-test/requires.cmake
new file mode 100644
index 000000000..ac96b57f7
--- /dev/null
+++ b/compiler/onnx2tflite-integration-test/requires.cmake
@@ -0,0 +1,6 @@
+require("i5diff")
+require("nnkit-onnxrt")
+require("nnkit-tflite")
+require("nnkit")
+require("onnx2tflite")
+require("onnxkit")
diff --git a/compiler/onnx2tflite-integration-test/test.lst b/compiler/onnx2tflite-integration-test/test.lst
new file mode 100644
index 000000000..58fd26a73
--- /dev/null
+++ b/compiler/onnx2tflite-integration-test/test.lst
@@ -0,0 +1,5 @@
+#add(Const_000)
+add(UNIT_Identity_000)
+add(UNIT_Gemm_000)
+# runtime used in testing does not support 11 op version, skip this test until we update
+#add(UNIT_Gemm_001)
diff --git a/compiler/onnx2tflite-integration-test/testall.sh b/compiler/onnx2tflite-integration-test/testall.sh
new file mode 100755
index 000000000..a1ab44405
--- /dev/null
+++ b/compiler/onnx2tflite-integration-test/testall.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+
+WORKDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+
+if [[ $# -lt 1 ]]; then
+ echo "USAGE: $0 ..."
+ echo
+ echo "ARGUMENTS:"
+ echo " [toolchain.config path]"
+ echo " [Prefix1]"
+ echo " [Prefix2]"
+ echo " ..."
+ exit 3
+fi
+
+CONFIG_PATH="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Use onnx2tflite at '${ONNX2TFLITE_PATH}'"
+echo "-- Use nnkit-run at '${NNKIT_RUN_PATH}'"
+echo "-- Use ONNX backend: ${ONNX_BACKEND_PATH}"
+echo "-- Use TFLITE backend: ${TFLITE_BACKEND_PATH}"
+echo "-- Use randomize action: ${RANDOMIZE_ACTION_PATH}"
+echo "-- Use HDF5 export action: ${HDF5_EXPORT_ACTION_PATH}"
+echo "-- Use HDF5 import action: ${HDF5_IMPORT_ACTION_PATH}"
+echo "-- Use i5diff: ${I5DIFF_PATH}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+
+while [[ $# -ne 0 ]]; do
+ PREFIX="$1"; shift
+
+ TESTED+=("${PREFIX}")
+
+ PASSED_TAG="${PREFIX}.passed"
+
+ rm -f "${PASSED_TAG}"
+
+ exec 2>&1
+
+ echo "-- Use '${PREFIX}.onnx'"
+
+ # Show commands
+ set -x
+
+ # Generate tflite
+ "${ONNX2TFLITE_PATH}" \
+ "-b" \
+ "${WORKDIR}/${PREFIX}.onnx" \
+ "${WORKDIR}/${PREFIX}.tflite"
+
+ if [[ $? -ne 0 ]]; then
+ continue
+ fi
+
+ # Run ONNX
+ "${NNKIT_RUN_PATH}" \
+ --backend "${ONNX_BACKEND_PATH}" \
+ --backend-arg "${WORKDIR}/${PREFIX}.onnx" \
+ --pre "${RANDOMIZE_ACTION_PATH}" \
+ --pre "${HDF5_EXPORT_ACTION_PATH}" \
+ --pre-arg "${WORKDIR}/${PREFIX}.input.h5" \
+ --post "${HDF5_EXPORT_ACTION_PATH}" \
+ --post-arg "${WORKDIR}/${PREFIX}.expected.h5"
+
+ if [[ $? -ne 0 ]]; then
+ continue
+ fi
+
+ # Run T/F Lite
+ "${NNKIT_RUN_PATH}" \
+ --backend "${TFLITE_BACKEND_PATH}" \
+ --backend-arg "${WORKDIR}/${PREFIX}.tflite" \
+ --pre "${HDF5_IMPORT_ACTION_PATH}" \
+ --pre-arg "${WORKDIR}/${PREFIX}.input.h5" \
+ --post "${HDF5_EXPORT_ACTION_PATH}" \
+ --post-arg "${WORKDIR}/${PREFIX}.obtained.h5"
+
+ if [[ $? -ne 0 ]]; then
+ continue
+ fi
+
+ "${I5DIFF_PATH}" -d 0.001 "${PREFIX}.expected.h5" "${PREFIX}.obtained.h5"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("$PREFIX")
+ else
+ FAILED+=("$PREFIX")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 3
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/onnx2tflite/CMakeLists.txt b/compiler/onnx2tflite/CMakeLists.txt
new file mode 100644
index 000000000..140fba94b
--- /dev/null
+++ b/compiler/onnx2tflite/CMakeLists.txt
@@ -0,0 +1,8 @@
+nnas_include(TargetRequire)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(onnx2tflite ${SOURCES})
+target_link_libraries(onnx2tflite mir_onnx_importer)
+target_link_libraries(onnx2tflite mir2loco)
+target_link_libraries(onnx2tflite exo)
diff --git a/compiler/onnx2tflite/requires.cmake b/compiler/onnx2tflite/requires.cmake
new file mode 100644
index 000000000..cc05edd84
--- /dev/null
+++ b/compiler/onnx2tflite/requires.cmake
@@ -0,0 +1,3 @@
+require("mir-onnx-importer")
+require("mir2loco")
+require("exo")
diff --git a/compiler/onnx2tflite/src/Driver.cpp b/compiler/onnx2tflite/src/Driver.cpp
new file mode 100644
index 000000000..2028b5cb0
--- /dev/null
+++ b/compiler/onnx2tflite/src/Driver.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <ONNXImporterImpl.h>
+#include <mir2loco.h>
+#include <exo/TFLExporter.h>
+
+#include <iostream>
+
+namespace
+{
+
+// String decorators?
+std::string quote(const std::string &s) { return "'" + s + "'"; }
+
+std::unique_ptr<mir::Graph> import(const std::string &onnx_path)
+{
+ return mir_onnx::loadModel(onnx_path);
+}
+
+std::unique_ptr<loco::Graph> transform(const std::unique_ptr<mir::Graph> &mir_graph)
+{
+ mir2loco::Transformer transformer;
+ return transformer.transform(mir_graph.get());
+}
+
+void printHelp()
+{
+ std::cout << "Usage: onnx2tflite <mode> <path/to/onnx> <path/to/output>\n"
+ "Modes: -t (text file); -b (binary file)"
+ << std::endl;
+}
+
+} // namespace
+
+// ONNX-to-MIR (mir-onnx-importer)
+// MIR-to-LOCO (mir2loco)
+// LOCO-to-TFLITE (exo-tflite)
+int main(int argc, char **argv)
+{
+ // onnx2tflite <mode> <path/to/onnx> <path/to/output>
+ // modes: -t (text file); -b (binary file)
+ if (argc != 4)
+ {
+ printHelp();
+ exit(1);
+ }
+ std::string mode{argv[1]};
+ std::string onnx_path{argv[2]};
+ std::string tflite_path{argv[3]};
+
+ std::cout << "Import " << quote(onnx_path) << std::endl;
+ std::unique_ptr<mir::Graph> mir_graph;
+ if (mode == "-t")
+ mir_graph = mir_onnx::importModelFromTextFile(onnx_path);
+ else if (mode == "-b")
+ mir_graph = mir_onnx::importModelFromBinaryFile(onnx_path);
+ else
+ {
+ printHelp();
+ exit(1);
+ }
+ std::cout << "Import " << quote(onnx_path) << " - Done" << std::endl;
+
+ auto loco_graph = transform(mir_graph);
+
+ exo::TFLExporter(loco_graph.get()).dumpToFile(tflite_path.c_str());
+
+ return 0;
+}
diff --git a/compiler/onnxkit/CMakeLists.txt b/compiler/onnxkit/CMakeLists.txt
new file mode 100644
index 000000000..18f1ed423
--- /dev/null
+++ b/compiler/onnxkit/CMakeLists.txt
@@ -0,0 +1,30 @@
+nnas_find_package(Protobuf QUIET)
+nnas_find_package(ONNXSource EXACT 1.4.1 QUIET)
+
+if(NOT Protobuf_FOUND)
+ return()
+endif(NOT Protobuf_FOUND)
+
+if(NOT ONNXSource_FOUND)
+ return()
+endif(NOT ONNXSource_FOUND)
+
+message(STATUS "Build onnxkit: TRUE")
+
+Protobuf_Generate(ONNX_PROTO
+ "${CMAKE_CURRENT_BINARY_DIR}/generated"
+ "${ONNXSource_DIR}"
+ "onnx/onnx.proto")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_library(onnxkitproto STATIC ${ONNX_PROTO_SOURCES})
+set_target_properties(onnxkitproto PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(onnxkitproto PUBLIC ${ONNX_PROTO_INCLUDE_DIRS})
+target_link_libraries(onnxkitproto PUBLIC libprotobuf)
+
+add_executable(onnxkit ${SOURCES})
+target_link_libraries(onnxkit PRIVATE stdex)
+target_link_libraries(onnxkit PRIVATE cli)
+target_link_libraries(onnxkit PRIVATE onnxkitproto)
+target_link_libraries(onnxkit PRIVATE nncc_common)
diff --git a/compiler/onnxkit/README.md b/compiler/onnxkit/README.md
new file mode 100644
index 000000000..d2066cf65
--- /dev/null
+++ b/compiler/onnxkit/README.md
@@ -0,0 +1,61 @@
+# onnxkit
+
+### Purpose
+
+_onnxkit_ allows users to encode/decode ONNX model files.
+
+### How to use
+
+Currently it supports two operations, _decode_ and _encode_.
+
+```
+nncc$ path_to_onnxkit/onnxkit
+ERROR: COMMAND is not provided
+
+USAGE: path_to_onnxkit/onnxkit [COMMAND] ...
+
+SUPPORTED COMMANDS:
+ decode
+ encode
+```
+
+`decode` reads a binary graphproto file and shows its textual form.
+
+`encode` is the reverse of decode, it reads a textual graphproto file and prints
+its binary form.
+
+Each command can read from or print to the console or from/to a file if given
+through the argument. First argument is used as an input file path and second
+as a output file path. If second argument is omitted, output is the console.
+To give the first argument as a console, please use `-`.
+
+### Examples
+
+Example to `decode`
+```
+nncc$ cat my_awesome_model.pb | path_to_onnxkit/onnxkit decode > decoded.pbtxt
+```
+```
+nncc$ cat my_awesome_model.pb | path_to_onnxkit/onnxkit decode - decoded.pbtxt
+```
+```
+nncc$ path_to_onnxkit/onnxkit decode my_awesome_model.pb > decoded.pbtxt
+```
+```
+nncc$ path_to_onnxkit/onnxkit decode my_awesome_model.pb decoded.pbtxt
+```
+
+Above four examples for `decode` command gives the same result. This applies
+to other commands.
+
+Example to `encode`
+```
+nncc$ cat decoded.pbtxt | path_to_onnxkit/onnxkit encode > encoded.pb
+```
+
+### Dependency
+
+- onnx
+- Protobuf
+- cli
+- stdex
diff --git a/compiler/onnxkit/src/DecodeCommand.cpp b/compiler/onnxkit/src/DecodeCommand.cpp
new file mode 100644
index 000000000..b1ab8eb8f
--- /dev/null
+++ b/compiler/onnxkit/src/DecodeCommand.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DecodeCommand.hpp"
+#include "Support.hpp"
+
+#include <onnx/onnx.pb.h>
+
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+int DecodeCommand::run(int argc, const char *const *argv) const
+{
+ onnx::ModelProto model;
+
+ Cmdline cmdline(argc, argv);
+
+ auto ui = make_ui(cmdline);
+
+ google::protobuf::io::IstreamInputStream is{ui->in()};
+ google::protobuf::io::CodedInputStream coded_is{&is};
+
+ if (!model.ParseFromCodedStream(&coded_is))
+ {
+ std::cerr << "ERROR: Failed to parse ONNX model" << std::endl;
+ return 255;
+ }
+
+ google::protobuf::io::OstreamOutputStream os{ui->out()};
+ google::protobuf::TextFormat::Print(model, &os);
+
+ return 0;
+}
diff --git a/compiler/onnxkit/src/DecodeCommand.hpp b/compiler/onnxkit/src/DecodeCommand.hpp
new file mode 100644
index 000000000..023791bb2
--- /dev/null
+++ b/compiler/onnxkit/src/DecodeCommand.hpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DECODE_COMMAND_H__
+#define __DECODE_COMMAND_H__
+
+#include <cli/Command.h>
+
+struct DecodeCommand final : public cli::Command
+{
+ int run(int argc, const char *const *argv) const override;
+};
+
+#endif // __DECODE_COMMAND_H__
diff --git a/compiler/onnxkit/src/EncodeCommand.cpp b/compiler/onnxkit/src/EncodeCommand.cpp
new file mode 100644
index 000000000..99997998e
--- /dev/null
+++ b/compiler/onnxkit/src/EncodeCommand.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EncodeCommand.hpp"
+#include "Support.hpp"
+
+#include <onnx/onnx.pb.h>
+
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+int EncodeCommand::run(int argc, const char *const *argv) const
+{
+ onnx::ModelProto model;
+
+ Cmdline cmdline(argc, argv);
+
+ auto ui = make_ui(cmdline);
+
+ google::protobuf::io::IstreamInputStream is{ui->in()};
+
+ if (!google::protobuf::TextFormat::Parse(&is, &model))
+ {
+ std::cerr << "ERROR: Failed to parse text" << std::endl;
+ return 255;
+ }
+
+ google::protobuf::io::OstreamOutputStream os{ui->out()};
+ google::protobuf::io::CodedOutputStream coded_os{&os};
+
+ if (!model.SerializeToCodedStream(&coded_os))
+ {
+ std::cerr << "ERROR: Failed to serialize a ONNX model" << std::endl;
+ return 255;
+ }
+
+ return 0;
+}
diff --git a/compiler/onnxkit/src/EncodeCommand.hpp b/compiler/onnxkit/src/EncodeCommand.hpp
new file mode 100644
index 000000000..5676bd66f
--- /dev/null
+++ b/compiler/onnxkit/src/EncodeCommand.hpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCODE_COMMAND_H__
+#define __ENCODE_COMMAND_H__
+
+#include <cli/Command.h>
+
+struct EncodeCommand final : public cli::Command
+{
+ int run(int argc, const char *const *argv) const override;
+};
+
+#endif // __ENCODE_COMMAND_H__
diff --git a/compiler/onnxkit/src/Main.cpp b/compiler/onnxkit/src/Main.cpp
new file mode 100644
index 000000000..3dfd580ec
--- /dev/null
+++ b/compiler/onnxkit/src/Main.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EncodeCommand.hpp"
+#include "DecodeCommand.hpp"
+
+#include <cli/App.h>
+#include <stdex/Memory.h>
+
+int main(int argc, char **argv)
+{
+ cli::App app{argv[0]};
+
+ app.insert("encode", stdex::make_unique<EncodeCommand>());
+ app.insert("decode", stdex::make_unique<DecodeCommand>());
+
+ return app.run(argc - 1, argv + 1);
+}
diff --git a/compiler/onnxkit/src/Support.cpp b/compiler/onnxkit/src/Support.cpp
new file mode 100644
index 000000000..8c0774175
--- /dev/null
+++ b/compiler/onnxkit/src/Support.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Support.hpp"
+
+#include <stdex/Memory.h>
+
+#include <cassert>
+#include <fstream>
+#include <stdexcept>
+
+namespace
+{
+
+template <typename T>
+std::unique_ptr<T> open_fstream(const std::string &path, std::ios_base::openmode mode)
+{
+ if (path == "-")
+ {
+ return nullptr;
+ }
+
+ auto stream = stdex::make_unique<T>(path.c_str(), mode);
+ if (!stream->is_open())
+ {
+ throw std::runtime_error{"ERROR: Failed to open " + path};
+ }
+ return stream;
+}
+
+} // namespace
+
+std::string Cmdline::get(unsigned int index) const
+{
+ if (index >= _argc)
+ throw std::runtime_error("Argument index out of bound");
+
+ return std::string(_argv[index]);
+}
+
+std::string Cmdline::get_or(unsigned int index, const std::string &s) const
+{
+ if (index >= _argc)
+ return s;
+
+ return std::string(_argv[index]);
+}
+
+std::unique_ptr<UI> make_ui(const Cmdline &cmdargs)
+{
+ auto iocfg = stdex::make_unique<UI>();
+
+ auto in = open_fstream<std::ifstream>(cmdargs.get_or(0, "-"), std::ios::in | std::ios::binary);
+ iocfg->in(std::move(in));
+
+ auto out = open_fstream<std::ofstream>(cmdargs.get_or(1, "-"), std::ios::out | std::ios::binary);
+ iocfg->out(std::move(out));
+
+ return iocfg;
+}
diff --git a/compiler/onnxkit/src/Support.hpp b/compiler/onnxkit/src/Support.hpp
new file mode 100644
index 000000000..a7bf0124d
--- /dev/null
+++ b/compiler/onnxkit/src/Support.hpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SUPPORT_H__
+#define __SUPPORT_H__
+
+#include <string>
+
+#include <iostream>
+#include <memory>
+
+class Cmdline
+{
+public:
+ Cmdline() = delete;
+ Cmdline(int argc, const char *const *argv) : _argc(static_cast<unsigned int>(argc)), _argv{argv}
+ {
+ // DO NOTHING
+ }
+
+ std::string get(unsigned int index) const;
+ std::string get_or(unsigned int index, const std::string &) const;
+
+private:
+ unsigned int _argc;
+ const char *const *_argv;
+};
+
+class UI
+{
+public:
+ std::istream *in() const { return _in ? _in.get() : &std::cin; }
+ std::ostream *out() const { return _out ? _out.get() : &std::cout; }
+
+public:
+ void in(std::unique_ptr<std::istream> &&in) { _in = std::move(in); }
+ void out(std::unique_ptr<std::ostream> &&out) { _out = std::move(out); }
+
+private:
+ std::unique_ptr<std::istream> _in;
+ std::unique_ptr<std::ostream> _out;
+};
+
+std::unique_ptr<UI> make_ui(const Cmdline &cmdargs);
+
+#endif // __SUPPORT_H__
diff --git a/compiler/oops/CMakeLists.txt b/compiler/oops/CMakeLists.txt
new file mode 100644
index 000000000..f12572d54
--- /dev/null
+++ b/compiler/oops/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_library(oops INTERFACE)
+target_include_directories(oops INTERFACE include)
+target_link_libraries(oops INTERFACE pepper_str)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(oops_test test.cpp)
+target_link_libraries(oops_test oops)
diff --git a/compiler/oops/include/oops/InternalExn.h b/compiler/oops/include/oops/InternalExn.h
new file mode 100644
index 000000000..0e11085c0
--- /dev/null
+++ b/compiler/oops/include/oops/InternalExn.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OOPS_INTERNAL_EXN_H__
+#define __OOPS_INTERNAL_EXN_H__
+
+#include <exception>
+#include <string>
+
+/// @ brief throw internal exception with message
+#define INTERNAL_EXN(msg) throw oops::InternalExn(__FILE__, __LINE__, msg)
+
+/// @ brief throw internal exception with message and value
+#define INTERNAL_EXN_V(msg, val) throw oops::InternalExn(__FILE__, __LINE__, msg, val)
+
+namespace oops
+{
+
+template <typename T> uint32_t to_uint32(T a) { return static_cast<uint32_t>(a); }
+
+/**
+ * @brief Exception caused by internal error
+ *
+ * Note: Please use the above MACROs
+ */
+class InternalExn : public std::exception
+{
+public:
+ InternalExn(const char *filename, const int line, const std::string &msg)
+ : _filename(filename), _line(line), _msg(msg)
+ {
+ construct_full_msg();
+ }
+
+ explicit InternalExn(const char *filename, const int line, const std::string &msg, uint32_t val)
+ : _filename(filename), _line(line), _msg(msg + ": " + std::to_string(val))
+ {
+ construct_full_msg();
+ }
+
+ explicit InternalExn(const char *filename, const int line, const std::string &msg,
+ const std::string &val)
+ : _filename(filename), _line(line), _msg(msg + ": " + val)
+ {
+ construct_full_msg();
+ }
+
+ const char *what() const noexcept override { return _full_msg.c_str(); }
+
+private:
+ const std::string _filename;
+ const uint32_t _line;
+ const std::string _msg;
+
+private:
+ void construct_full_msg()
+ {
+ _full_msg =
+ "Internal Exception. " + _msg + " [" + _filename + ":" + std::to_string(_line) + "]";
+ }
+
+ std::string _full_msg;
+};
+
+} // namespace oops
+
+#endif // __OOPS_INTERNAL_EXN_H__
diff --git a/compiler/oops/include/oops/UserExn.h b/compiler/oops/include/oops/UserExn.h
new file mode 100644
index 000000000..d0138322d
--- /dev/null
+++ b/compiler/oops/include/oops/UserExn.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OOPS_USER_EXN_H__
+#define __OOPS_USER_EXN_H__
+
+#include <pepper/str.h>
+
+#include <exception>
+#include <string>
+#include <map>
+
+namespace oops
+{
+
+/**
+ * @brief Exception to user
+ *
+ * Pass msg and one additional info, e.g.,
+ * ex) UserExn("Unsupported rank", 4);
+ * ex) UserExn("Unsupported layout", "NHWC");
+ *
+ * Or pass msg with attribute pairs of name & val ,
+ * ex) UserExn("Node has unsupported layout",
+ * "Node", node->name(),
+ * "layout", node->layout());
+ */
+class UserExn : public std::exception
+{
+public:
+ UserExn() = delete;
+
+ template <typename... Info> UserExn(const std::string &msg, Info &&... args)
+ {
+ std::stringstream out;
+
+ out << "Error: " << msg + ": ";
+
+ build_info(out, args...);
+
+ _msg = out.str();
+ }
+
+ const char *what() const noexcept override { return _msg.c_str(); };
+
+private:
+ template <typename Attr, typename Val, typename... AttsVals>
+ void build_info(std::stringstream &out, Attr &attr, Val &val, AttsVals &... args)
+ {
+ out << pepper::str(attr, " = ", val);
+ out << ", ";
+
+ build_info(out, args...);
+ }
+
+ template <typename Attr, typename Val>
+ void build_info(std::stringstream &out, Attr &attr, Val &val)
+ {
+ out << pepper::str(attr, " = ", val);
+ }
+
+ void build_info(std::stringstream &) { /* empty */}
+
+ // when only one info of string is provided
+ void build_info(std::stringstream &out, const std::string &val) { out << val; }
+
+ // when only one info of uint32_t is provided
+ void build_info(std::stringstream &out, const uint32_t &val) { out << val; }
+
+private:
+ std::string _msg;
+};
+
+} // namespace oops
+
+#endif // __OOPS_USER_EXN_H__
diff --git a/compiler/oops/test.cpp b/compiler/oops/test.cpp
new file mode 100644
index 000000000..666f62f54
--- /dev/null
+++ b/compiler/oops/test.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "oops/InternalExn.h"
+#include "oops/UserExn.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+void batman() { INTERNAL_EXN("Here comes Joker"); }
+
+void star_wars() { INTERNAL_EXN_V("Something is approaching", "Darth Vader"); }
+
+enum class InfinityStones
+{
+ SpaceStone,
+ RealityStone,
+ OtherStones,
+};
+
+void avengers()
+{
+ std::string where;
+ std::string separator = ":";
+ try
+ {
+ // exception will be raised in next line
+ where = __FILE__ + separator + std::to_string(__LINE__ + 1);
+ INTERNAL_EXN_V("Last stone was gathered", oops::to_uint32(InfinityStones::SpaceStone));
+ }
+ catch (const oops::InternalExn &e)
+ {
+ auto msg = std::string(e.what());
+ ASSERT_TRUE(msg.find("Last stone was gathered: 0") != std::string::npos);
+ ASSERT_TRUE(msg.find(where) != std::string::npos);
+ }
+}
+
+} // namespace
+
+TEST(oopsTest, InternalExn)
+{
+ ASSERT_THROW(batman(), oops::InternalExn);
+ ASSERT_THROW(star_wars(), oops::InternalExn);
+
+ avengers();
+}
+
+TEST(oopsTest, UserExn_one_info_after_msg)
+{
+ try
+ {
+ throw oops::UserExn("Not a member of Avenger", "Kingsman");
+ }
+ catch (const oops::UserExn &e)
+ {
+ auto msg = std::string(e.what());
+ ASSERT_TRUE(msg.find("Not a member of Avenger: Kingsman") != std::string::npos);
+ }
+}
+
+TEST(oopsTest, UserExn_two_pairs_after_msg)
+{
+ try
+ {
+ std::string hero("Spiderman");
+
+ // clang-format off
+ throw oops::UserExn("Hero's age is wrong",
+ "Hero", hero,
+ "Age", 97);
+ // clang-format on
+ }
+ catch (const oops::UserExn &e)
+ {
+ auto msg = std::string(e.what());
+ ASSERT_TRUE(msg.find("Hero = Spiderman, Age = 97") != std::string::npos);
+ }
+}
diff --git a/compiler/pepper-assert/CMakeLists.txt b/compiler/pepper-assert/CMakeLists.txt
new file mode 100644
index 000000000..314ba51ad
--- /dev/null
+++ b/compiler/pepper-assert/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_library(pepper_assert INTERFACE)
+target_include_directories(pepper_assert INTERFACE include)
diff --git a/compiler/pepper-assert/include/pepper/assert.h b/compiler/pepper-assert/include/pepper/assert.h
new file mode 100644
index 000000000..fc3b4f40a
--- /dev/null
+++ b/compiler/pepper-assert/include/pepper/assert.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PEPPER_ASSERT_H__
+#define __PEPPER_ASSERT_H__
+
+#include <cassert>
+
+//
+// This example shows how to use DBGARG macro.
+//
+// void f(DBGARG(uint32_t, n))
+// {
+// assert(n < 128);
+// }
+//
+// This will make it easy to remove unused variable warnings in Release build.
+//
+#ifdef NDEBUG
+#define DBGARG(TYP, VAR) TYP
+#else
+#define DBGARG(TYP, VAR) TYP VAR
+#endif // NDEBUG
+
+#endif // __PEPPER_ASSERT_H__
diff --git a/compiler/pepper-env/CMakeLists.txt b/compiler/pepper-env/CMakeLists.txt
new file mode 100644
index 000000000..7371d4caf
--- /dev/null
+++ b/compiler/pepper-env/CMakeLists.txt
@@ -0,0 +1,19 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(pepper_env STATIC ${SOURCES})
+set_target_properties(pepper_env PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(pepper_env PUBLIC include)
+target_link_libraries(pepper_env PRIVATE nncc_common)
+target_link_libraries(pepper_env PUBLIC nncc_coverage)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Google Test is mandatory for test
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(pepper_env_test ${TESTS})
+target_link_libraries(pepper_env_test pepper_env)
diff --git a/compiler/pepper-env/README.md b/compiler/pepper-env/README.md
new file mode 100644
index 000000000..91a1f8be8
--- /dev/null
+++ b/compiler/pepper-env/README.md
@@ -0,0 +1,3 @@
+# pepper-env
+
+_pepper-env_ makes it easy to access "process environment variables".
diff --git a/compiler/pepper-env/include/pepper/env.h b/compiler/pepper-env/include/pepper/env.h
new file mode 100644
index 000000000..233c8b421
--- /dev/null
+++ b/compiler/pepper-env/include/pepper/env.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PEPPER_ENV_H__
+#define __PEPPER_ENV_H__
+
+#include <string>
+
+//
+// KVStore: Key-Value Store Interface
+//
+namespace pepper // TODO Extract this section if necessary
+{
+
+enum class KVStoreTrait
+{
+ Queryable,
+};
+
+template <KVStoreTrait Trait> class KVStoreInterface;
+
+template <> class KVStoreInterface<KVStoreTrait::Queryable>
+{
+public:
+ KVStoreInterface() = default;
+
+public:
+ virtual ~KVStoreInterface() = default;
+
+public: // Core interface (PLEASE PREFER TO THE BELOW HELPERS)
+ //
+ // "query(k)" SHOULD
+ // - return a valid C-string if the key "k" exists in the store, or
+ // - return nullptr otherwise.
+ //
+ // DESIGN NOTE - Why "query" instead of "get"?
+ //
+ // Let us consider the following class declarations as an example:
+ //
+ // struct Base {
+ // virtual const char *get(const char *) const = 0;
+ // const char *get(const std::string &s) const { return nullptr; }
+ // };
+ //
+ // struct Derived : public Base {
+ // const char *get(const char *) const final { return nullptr; }
+ // };
+ //
+ // It is impossible to write the code of the following form:
+ //
+ // Derived obj;
+ //
+ // std::string s = ...;
+ // obj.get(s);
+ // ^^^^^^^^^^^
+ // error: no viable conversion from 'std::string' (aka 'basic_string<char>') to 'const char *'
+ //
+ // Please refer to the concept of name hiding in C++ for more details.
+ virtual const char *query(const char *k) const = 0;
+
+public: // Derived helper methods
+ const char *get(const std::string &k) const { return query(k.c_str()); }
+
+ /**
+ * NOTE
+ *
+ * get(k, v) same as query(k) if the key "k" exists in the store.
+ * get(k, v) returns "v" otherwise
+ */
+ std::string get(const std::string &key, const std::string &default_value) const;
+};
+
+} // namespace pepper
+
+//
+// ProcessEnvironment
+//
+namespace pepper
+{
+
+struct ProcessEnvironment final : public KVStoreInterface<KVStoreTrait::Queryable>
+{
+ const char *query(const char *k) const final;
+};
+
+} // namespace pepper
+
+#endif // __PEPPER_ENV_H__
diff --git a/compiler/pepper-env/src/env.cpp b/compiler/pepper-env/src/env.cpp
new file mode 100644
index 000000000..273db7b4b
--- /dev/null
+++ b/compiler/pepper-env/src/env.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pepper/env.h"
+
+//
+// KVStoreInterface
+//
+namespace pepper
+{
+
+std::string KVStoreInterface<KVStoreTrait::Queryable>::get(const std::string &key,
+ const std::string &default_value) const
+{
+ if (auto p = query(key.c_str()))
+ {
+ return p;
+ }
+ return default_value;
+}
+
+} // namespace pepper
+
+//
+// ProcessEnvironment
+//
+#include <cstdlib>
+
+namespace pepper
+{
+
+const char *ProcessEnvironment::query(const char *k) const { return std::getenv(k); }
+
+} // namespace pepper
diff --git a/compiler/pepper-env/src/env.test.cpp b/compiler/pepper-env/src/env.test.cpp
new file mode 100644
index 000000000..73244da8b
--- /dev/null
+++ b/compiler/pepper-env/src/env.test.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pepper/env.h"
+
+#include <gtest/gtest.h>
+
+using namespace pepper;
+
+namespace
+{
+
+struct EmptyKVStore : public KVStoreInterface<KVStoreTrait::Queryable>
+{
+ const char *query(const char *k) const final { return nullptr; }
+};
+
+} // namespace
+
+TEST(KVStoreQueryableInterfaceTests, get_with_default_value)
+{
+ EmptyKVStore kvs;
+
+ auto obtained = kvs.get("K", "V");
+
+ ASSERT_EQ(obtained, "V");
+}
diff --git a/compiler/pepper-str/CMakeLists.txt b/compiler/pepper-str/CMakeLists.txt
new file mode 100644
index 000000000..cbe01b86a
--- /dev/null
+++ b/compiler/pepper-str/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_library(pepper_str INTERFACE)
+target_include_directories(pepper_str INTERFACE include)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Google Test is mandatory for test
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(pepper_str_test test.cpp)
+target_link_libraries(pepper_str_test pepper_str)
diff --git a/compiler/pepper-str/README.md b/compiler/pepper-str/README.md
new file mode 100644
index 000000000..ee99383d8
--- /dev/null
+++ b/compiler/pepper-str/README.md
@@ -0,0 +1,15 @@
+# pepper-str
+
+Let us simulate string interpolation in C++!
+
+## HOW TO USE
+
+```cxx
+#include <pepper/str.h>
+
+int main(int argc, char **argv)
+{
+ std::cout << pepper::str("There are ", argc, " arguments") << std::endl;
+ return 0;
+}
+```
diff --git a/compiler/pepper-str/include/pepper/str.h b/compiler/pepper-str/include/pepper/str.h
new file mode 100644
index 000000000..efbc3a9c8
--- /dev/null
+++ b/compiler/pepper-str/include/pepper/str.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PEPPER_STR_H__
+#define __PEPPER_STR_H__
+
+#include <ostream>
+#include <sstream>
+
+#include <string>
+
+namespace pepper
+{
+namespace details
+{
+
+template <typename... Arg> void str_impl(std::ostream &os, Arg &&... args);
+
+template <> inline void str_impl(std::ostream &)
+{
+ // DO NOTHING
+ return;
+}
+
+template <typename Arg> inline void str_impl(std::ostream &os, Arg &&arg)
+{
+ os << std::forward<Arg>(arg);
+}
+
+template <typename Arg, typename... Args>
+inline void str_impl(std::ostream &os, Arg &&arg, Args &&... args)
+{
+ str_impl(os, std::forward<Arg>(arg));
+ str_impl(os, std::forward<Args>(args)...);
+}
+
+} // namesapce details
+} // namespace pepper
+
+namespace pepper
+{
+
+template <typename... Args> static inline std::string str(Args &&... args)
+{
+ std::stringstream ss;
+ details::str_impl(ss, std::forward<Args>(args)...);
+ return ss.str();
+}
+
+} // namespace pepper
+
+#endif // __PEPPER_STR_H__
diff --git a/compiler/pepper-str/test.cpp b/compiler/pepper-str/test.cpp
new file mode 100644
index 000000000..222c371c8
--- /dev/null
+++ b/compiler/pepper-str/test.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pepper/str.h"
+
+#include <iostream>
+
+#include <gtest/gtest.h>
+
+TEST(StrTests, README)
+{
+ // Let us check whether the example in README.md works!
+ int argc = 4;
+
+ std::cout << pepper::str("There are ", argc, " arguments") << std::endl;
+
+ SUCCEED();
+}
+
+TEST(StrTests, Empty)
+{
+ // pepper::str() returns an empty string
+ ASSERT_EQ(pepper::str(), "");
+}
+
+TEST(StrTests, Single_Int)
+{
+ // Convert a single "int" value as a string
+ ASSERT_EQ(pepper::str(3), "3");
+}
+
+TEST(StrTests, Concat_000)
+{
+ const int n = 3;
+ const int m = 4;
+
+ ASSERT_EQ(pepper::str(n, "+", m, "=", n + m), "3+4=7");
+}
diff --git a/compiler/pepper-strcast/CMakeLists.txt b/compiler/pepper-strcast/CMakeLists.txt
new file mode 100644
index 000000000..5f87e9488
--- /dev/null
+++ b/compiler/pepper-strcast/CMakeLists.txt
@@ -0,0 +1,19 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(pepper_strcast STATIC ${SOURCES})
+set_target_properties(pepper_strcast PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(pepper_strcast PUBLIC include)
+target_link_libraries(pepper_strcast PRIVATE nncc_common)
+target_link_libraries(pepper_strcast PUBLIC nncc_coverage)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Google Test is mandatory for test
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(pepper_strcast_test ${TESTS})
+target_link_libraries(pepper_strcast_test pepper_strcast)
diff --git a/compiler/pepper-strcast/README.md b/compiler/pepper-strcast/README.md
new file mode 100644
index 000000000..e79194617
--- /dev/null
+++ b/compiler/pepper-strcast/README.md
@@ -0,0 +1,3 @@
+# pepper-strcast
+
+_pepper-strcast_ is a collection of string-to-value casting functions.
diff --git a/compiler/pepper-strcast/include/pepper/strcast.h b/compiler/pepper-strcast/include/pepper/strcast.h
new file mode 100644
index 000000000..5ff1b5eb1
--- /dev/null
+++ b/compiler/pepper-strcast/include/pepper/strcast.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PEPPER_STRCAST_H__
+#define __PEPPER_STRCAST_H__
+
+namespace pepper
+{
+
+/**
+ * @brief Cast a C-string as a value of type T
+ *
+ * safe_strcast(s, v) returns v if s is nullptr.
+ */
+template <typename T> T safe_strcast(const char *s, const T &v);
+
+template <> int safe_strcast<int>(const char *s, const int &v);
+
+} // namespace pepper
+
+#endif // __PEPPER_STRCAST_H__
diff --git a/compiler/pepper-strcast/src/strcast.cpp b/compiler/pepper-strcast/src/strcast.cpp
new file mode 100644
index 000000000..7b78305ce
--- /dev/null
+++ b/compiler/pepper-strcast/src/strcast.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pepper/strcast.h"
+
+#include <string>
+
+namespace pepper
+{
+
+template <> int safe_strcast<int>(const char *s, const int &v)
+{
+ return (s == nullptr) ? v : std::stoi(s);
+}
+
+} // namespace pepper
diff --git a/compiler/pepper-strcast/src/strcast.test.cpp b/compiler/pepper-strcast/src/strcast.test.cpp
new file mode 100644
index 000000000..136479dbc
--- /dev/null
+++ b/compiler/pepper-strcast/src/strcast.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pepper/strcast.h"
+
+#include <gtest/gtest.h>
+
+TEST(StrCastTests, safe_strcast_int)
+{
+ ASSERT_EQ(pepper::safe_strcast<int>("0", 128), 0);
+ ASSERT_EQ(pepper::safe_strcast<int>("2", 128), 2);
+ ASSERT_EQ(pepper::safe_strcast<int>(nullptr, 128), 128);
+}
diff --git a/compiler/plier-tf/CMakeLists.txt b/compiler/plier-tf/CMakeLists.txt
new file mode 100644
index 000000000..b817d138b
--- /dev/null
+++ b/compiler/plier-tf/CMakeLists.txt
@@ -0,0 +1,28 @@
+if(NOT TARGET mio_tf)
+ return()
+endif(NOT TARGET mio_tf)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(plier_tf STATIC ${SOURCES})
+set_target_properties(plier_tf PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(plier_tf PUBLIC include)
+
+target_link_libraries(plier_tf PUBLIC angkor)
+target_link_libraries(plier_tf PUBLIC loco)
+target_link_libraries(plier_tf PUBLIC mio_tf)
+
+# Apply global configurations (e.g. warnings as error)
+target_link_libraries(plier_tf PRIVATE nncc_common)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Google Test is mandatory for test
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(plier_tf_test ${TESTS})
+target_link_libraries(plier_tf_test plier_tf)
diff --git a/compiler/plier-tf/README.md b/compiler/plier-tf/README.md
new file mode 100644
index 000000000..b7c1d6116
--- /dev/null
+++ b/compiler/plier-tf/README.md
@@ -0,0 +1,3 @@
+# plier-tf
+
+_plier-tf_ is a collection of small tools to handle TensorFlow model.
diff --git a/compiler/plier-tf/include/plier/tf/Convert.h b/compiler/plier-tf/include/plier/tf/Convert.h
new file mode 100644
index 000000000..13e855d20
--- /dev/null
+++ b/compiler/plier-tf/include/plier/tf/Convert.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PLIER_TF_CONVERT_H__
+#define __PLIER_TF_CONVERT_H__
+
+#include <loco.h>
+#include <nncc/core/ADT/tensor/Shape.h>
+
+#include <tensorflow/core/framework/graph.pb.h>
+
+#include <string>
+
+namespace plier
+{
+namespace tf
+{
+
+bool has_attr(const tensorflow::NodeDef &node, const std::string &attr_name);
+bool has_attrs(const tensorflow::NodeDef &node, const std::vector<std::string> &attr_names);
+
+tensorflow::DataType get_datatype_attr(const tensorflow::NodeDef &node,
+ const std::string &attr_name);
+const tensorflow::TensorShapeProto &get_shape_attr(const tensorflow::NodeDef &node,
+ const std::string &attr_name);
+const tensorflow::TensorProto &get_tensor_attr(const tensorflow::NodeDef &node,
+ const std::string &attr_name);
+const tensorflow::AttrValue_ListValue &get_list_attr(const tensorflow::NodeDef &node,
+ const std::string &attr_name);
+const std::string &get_string_attr(const tensorflow::NodeDef &node, const std::string &attr_name);
+int64_t get_int_attr(const tensorflow::NodeDef &node, const std::string &attr_name);
+float get_float_attr(const tensorflow::NodeDef &node, const std::string &attr_name);
+bool get_bool_attr(const tensorflow::NodeDef &node, const std::string &attr_name);
+
+std::vector<int64_t> as_int64_list(const tensorflow::AttrValue_ListValue &lv);
+
+loco::DataType as_loco_datatype(const tensorflow::DataType dtype);
+
+/**
+ * @brief Class to represent TensorFlow "data_format" attr.
+ */
+enum class DataLayout
+{
+ NHWC,
+ NCHW,
+};
+
+/// @ brief Convert TF Data Layout string (e.g., "NHWC") to enum class for programming convenience
+DataLayout as_data_layout(const std::string &tf_layout_str);
+
+DataLayout get_data_layout(const tensorflow::NodeDef &node, const std::string &attr_name);
+
+/**
+ * @brief Copy shape defined in TensorShapeProto to angkor shape
+ *
+ * @note Unknown dimension is not supported
+ */
+void copy_shape(const tensorflow::TensorShapeProto &tf_shape,
+ nncc::core::ADT::tensor::Shape &to_shape);
+
+} // namespace tf
+} // namespace plier
+
+#endif // __PLIER_TF_CONVERT_H__
diff --git a/compiler/plier-tf/include/plier/tf/TestHelper.h b/compiler/plier-tf/include/plier/tf/TestHelper.h
new file mode 100644
index 000000000..2062f2a00
--- /dev/null
+++ b/compiler/plier-tf/include/plier/tf/TestHelper.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file TestHelper.h
+ */
+
+#ifndef __PLIER_TF_TEST_HELPER_H__
+#define __PLIER_TF_TEST_HELPER_H__
+
+#include <tensorflow/core/framework/graph.pb.h>
+
+namespace plier
+{
+namespace tf
+{
+
+bool parse_graphdef(char const *pbtxt, tensorflow::GraphDef &graphdef);
+
+bool parse_nodedef(char const *pbtxt, tensorflow::NodeDef &nodedef);
+
+} // namespace tf
+} // namespace plier
+
+#endif // __PLIER_TF_TEST_HELPER_H__
diff --git a/compiler/plier-tf/requires.cmake b/compiler/plier-tf/requires.cmake
new file mode 100644
index 000000000..ebf09bcad
--- /dev/null
+++ b/compiler/plier-tf/requires.cmake
@@ -0,0 +1,3 @@
+require("angkor")
+require("loco")
+require("mio-tf")
diff --git a/compiler/plier-tf/src/Convert.cpp b/compiler/plier-tf/src/Convert.cpp
new file mode 100644
index 000000000..c36df53ed
--- /dev/null
+++ b/compiler/plier-tf/src/Convert.cpp
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <plier/tf/Convert.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace plier
+{
+namespace tf
+{
+
+bool has_attr(const tensorflow::NodeDef &node, const std::string &attr_name)
+{
+ return node.attr().count(attr_name) > 0;
+}
+
+bool has_attrs(const tensorflow::NodeDef &node, const std::vector<std::string> &attr_names)
+{
+ for (auto &attr : attr_names)
+ if (!has_attr(node, attr))
+ return false;
+ return true;
+}
+
+tensorflow::DataType get_datatype_attr(const tensorflow::NodeDef &node,
+ const std::string &attr_name)
+{
+ assert(has_attr(node, attr_name));
+ const auto &attr = node.attr().at(attr_name);
+ assert(attr.value_case() == tensorflow::AttrValue::kType);
+ return attr.type();
+}
+
+const tensorflow::TensorShapeProto &get_shape_attr(const tensorflow::NodeDef &node,
+ const std::string &attr_name)
+{
+ assert(has_attr(node, attr_name));
+ const auto &attr = node.attr().at(attr_name);
+ assert(attr.value_case() == tensorflow::AttrValue::kShape);
+ return attr.shape();
+}
+
+const tensorflow::TensorProto &get_tensor_attr(const tensorflow::NodeDef &node,
+ const std::string &attr_name)
+{
+ assert(has_attr(node, attr_name));
+ const auto &attr = node.attr().at(attr_name);
+ assert(attr.value_case() == tensorflow::AttrValue::kTensor);
+ return attr.tensor();
+}
+
+const ::tensorflow::AttrValue_ListValue &get_list_attr(const tensorflow::NodeDef &node,
+ const std::string &attr_name)
+{
+ assert(has_attr(node, attr_name));
+ const auto &attr = node.attr().at(attr_name);
+ assert(attr.value_case() == tensorflow::AttrValue::kList);
+ return attr.list();
+}
+
+const std::string &get_string_attr(const tensorflow::NodeDef &node, const std::string &attr_name)
+{
+ assert(has_attr(node, attr_name));
+ const auto &attr = node.attr().at(attr_name);
+ assert(attr.value_case() == tensorflow::AttrValue::kS);
+ return attr.s();
+}
+
+int64_t get_int_attr(const tensorflow::NodeDef &node, const std::string &attr_name)
+{
+ assert(has_attr(node, attr_name));
+ const auto &attr = node.attr().at(attr_name);
+ assert(attr.value_case() == tensorflow::AttrValue::kI);
+ return attr.i();
+}
+
+float get_float_attr(const tensorflow::NodeDef &node, const std::string &attr_name)
+{
+ assert(has_attr(node, attr_name));
+ const auto &attr = node.attr().at(attr_name);
+ assert(attr.value_case() == tensorflow::AttrValue::kF);
+ return attr.f();
+}
+
+bool get_bool_attr(const tensorflow::NodeDef &node, const std::string &attr_name)
+{
+ assert(has_attr(node, attr_name));
+ const auto &attr = node.attr().at(attr_name);
+ assert(attr.value_case() == tensorflow::AttrValue::kB);
+ return attr.b();
+}
+
+std::vector<int64_t> as_int64_list(const tensorflow::AttrValue_ListValue &lv)
+{
+ std::vector<int64_t> vi;
+ int isize = lv.i_size();
+
+ vi.resize(isize);
+ for (int i = 0; i < isize; ++i)
+ vi[i] = lv.i(i);
+
+ return vi;
+}
+
+loco::DataType as_loco_datatype(const tensorflow::DataType tf_dtype)
+{
+ switch (tf_dtype)
+ {
+ case tensorflow::DT_INT8:
+ return loco::DataType::S8;
+ case tensorflow::DT_UINT8:
+ return loco::DataType::U8;
+ case tensorflow::DT_FLOAT:
+ return loco::DataType::FLOAT32;
+ case tensorflow::DT_INT32:
+ return loco::DataType::S32;
+ case tensorflow::DT_INT64:
+ return loco::DataType::S64;
+ case tensorflow::DT_BOOL:
+ case tensorflow::DT_STRING:
+ case tensorflow::DT_COMPLEX64:
+ default:
+ break;
+ }
+ throw std::runtime_error{"Unsupported tensorflow dtype: " + tensorflow::DataType_Name(tf_dtype)};
+}
+
+DataLayout as_data_layout(const std::string &tf_layout_str)
+{
+ if (tf_layout_str == "NHWC")
+ return DataLayout::NHWC;
+ else if (tf_layout_str == "NCHW")
+ return DataLayout::NCHW;
+ else
+ throw std::runtime_error("unknown data layout");
+}
+
+DataLayout get_data_layout(const tensorflow::NodeDef &node, const std::string &attr_name)
+{
+ auto layout = get_string_attr(node, attr_name);
+
+ if (layout == "NHWC")
+ return DataLayout::NHWC;
+ else if (layout == "NCHW")
+ return DataLayout::NCHW;
+ else
+ throw std::runtime_error("unknown data layout");
+}
+
+void copy_shape(const tensorflow::TensorShapeProto &tf_shape,
+ nncc::core::ADT::tensor::Shape &to_shape)
+{
+ assert(!tf_shape.unknown_rank());
+
+ int64_t tf_rank = tf_shape.dim_size();
+ assert(tf_rank < std::numeric_limits<uint32_t>::max());
+
+ int32_t rank = static_cast<int32_t>(tf_rank);
+ to_shape.resize(rank);
+
+ for (int32_t d = 0; d < rank; d++)
+ {
+ int64_t dim_value = tf_shape.dim(d).size();
+ assert(dim_value < std::numeric_limits<uint32_t>::max());
+
+ if (dim_value >= 0LL)
+ {
+ uint32_t dim_value32 = static_cast<uint32_t>(dim_value);
+ to_shape.dim(d) = dim_value32;
+ }
+ else
+ {
+ throw std::runtime_error("Cannot handle unknown dimension");
+ // TODO support unknown dimension
+ }
+ }
+}
+
+} // namespace tf
+} // namespace plier
diff --git a/compiler/plier-tf/src/Convert.test.cpp b/compiler/plier-tf/src/Convert.test.cpp
new file mode 100644
index 000000000..d6dfed4a0
--- /dev/null
+++ b/compiler/plier-tf/src/Convert.test.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <plier/tf/Convert.h>
+
+#include <tensorflow/core/framework/graph.pb.h>
+
+#include <gtest/gtest.h>
+
+#include <string>
+
+namespace
+{
+
+void prepare_test_node(tensorflow::NodeDef &node)
+{
+ node.set_op("Placeholder");
+ node.set_name("node");
+
+ tensorflow::AttrValue dtype_attr;
+ dtype_attr.set_type(tensorflow::DT_FLOAT);
+ (*node.mutable_attr())["dtype_1"] = dtype_attr;
+
+ auto *shape = (*node.mutable_attr())["shape_1"].mutable_shape();
+ shape->add_dim()->set_size(1);
+ shape->add_dim()->set_size(2);
+ shape->add_dim()->set_size(4);
+ shape->add_dim()->set_size(8);
+
+ auto *list = (*node.mutable_attr())["list_1"].mutable_list();
+ list->add_i(1);
+ list->add_i(20);
+ list->add_i(1LL << 40);
+ list->add_i(-(1LL << 40));
+}
+
+} // namespace
+
+TEST(plier_Convert, attr)
+{
+ tensorflow::NodeDef node;
+ prepare_test_node(node);
+
+ ASSERT_TRUE(plier::tf::has_attr(node, "dtype_1"));
+ ASSERT_FALSE(plier::tf::has_attr(node, "other"));
+}
+
+TEST(plier_Convert, attr_datatype)
+{
+ tensorflow::NodeDef node;
+ prepare_test_node(node);
+
+ ASSERT_EQ(plier::tf::get_datatype_attr(node, "dtype_1"), tensorflow::DT_FLOAT);
+}
+
+TEST(plier_Convert, attr_shape)
+{
+ tensorflow::NodeDef node;
+ prepare_test_node(node);
+
+ const auto &shape = plier::tf::get_shape_attr(node, "shape_1");
+ ASSERT_EQ(shape.dim_size(), 4);
+ ASSERT_EQ(shape.dim(0).size(), 1);
+ ASSERT_EQ(shape.dim(1).size(), 2);
+ ASSERT_EQ(shape.dim(2).size(), 4);
+ ASSERT_EQ(shape.dim(3).size(), 8);
+}
+
+TEST(plier_Convert, to_loco_datatype)
+{
+ ASSERT_EQ(plier::tf::as_loco_datatype(tensorflow::DT_FLOAT), loco::DataType::FLOAT32);
+}
+
+TEST(plier_Convert, attr_ilist)
+{
+ tensorflow::NodeDef node;
+ prepare_test_node(node);
+
+ const auto &p_list = plier::tf::get_list_attr(node, "list_1");
+ auto i_list = plier::tf::as_int64_list(p_list);
+ ASSERT_EQ(i_list.size(), 4);
+ ASSERT_EQ(i_list.at(0), 1);
+ ASSERT_EQ(i_list.at(1), 20);
+ ASSERT_EQ(i_list.at(2), 1LL << 40);
+ ASSERT_EQ(i_list.at(3), -(1LL << 40));
+}
+
+TEST(plier_Convert, to_data_layout)
+{
+ ASSERT_EQ(plier::tf::as_data_layout("NHWC"), plier::tf::DataLayout::NHWC);
+ ASSERT_EQ(plier::tf::as_data_layout("NCHW"), plier::tf::DataLayout::NCHW);
+}
+
+TEST(plier_Convert, copy_shape_thrown_on_unknown_dim)
+{
+ tensorflow::TensorShapeProto tf_shape;
+ nncc::core::ADT::tensor::Shape angkor_shape;
+
+ tf_shape.add_dim()->set_size(-1);
+
+ ASSERT_ANY_THROW(plier::tf::copy_shape(tf_shape, angkor_shape));
+}
diff --git a/compiler/plier-tf/src/TestHelper.cpp b/compiler/plier-tf/src/TestHelper.cpp
new file mode 100644
index 000000000..a551e89f9
--- /dev/null
+++ b/compiler/plier-tf/src/TestHelper.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This is copied from moco
+
+#include <plier/tf/TestHelper.h>
+
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+#include <cstring>
+#include <istream>
+
+namespace
+{
+
+struct membuf : std::streambuf
+{
+ membuf(char const *base, size_t size)
+ {
+ char *p(const_cast<char *>(base));
+ this->setg(p, p, p + size);
+ }
+};
+
+struct imemstream : virtual membuf, std::istream
+{
+ imemstream(char const *base, size_t size)
+ : membuf(base, size), std::istream(static_cast<std::streambuf *>(this))
+ {
+ }
+};
+
+} // namespace
+
+namespace plier
+{
+namespace tf
+{
+
+bool parse_graphdef(char const *pbtxt, tensorflow::GraphDef &graphdef)
+{
+ imemstream mempb(pbtxt, std::strlen(pbtxt));
+ google::protobuf::io::IstreamInputStream iis(&mempb);
+ return google::protobuf::TextFormat::Parse(&iis, &graphdef);
+}
+
+bool parse_nodedef(char const *pbtxt, tensorflow::NodeDef &nodedef)
+{
+ imemstream mempb(pbtxt, std::strlen(pbtxt));
+ google::protobuf::io::IstreamInputStream iis(&mempb);
+ return google::protobuf::TextFormat::Parse(&iis, &nodedef);
+}
+
+} // namespace tf
+} // namespace plier
diff --git a/compiler/pp/CMakeLists.txt b/compiler/pp/CMakeLists.txt
new file mode 100644
index 000000000..2c25c6406
--- /dev/null
+++ b/compiler/pp/CMakeLists.txt
@@ -0,0 +1,20 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(pp STATIC ${SOURCES})
+set_target_properties(pp PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(pp PUBLIC include)
+target_link_libraries(pp PRIVATE nncc_common)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Google Test is mandatory for internal testing
+nnas_find_package(GTest REQUIRED)
+
+add_executable(pp_test ${TESTS})
+target_link_libraries(pp_test pp)
+target_link_libraries(pp_test gtest_main)
+add_test(pp_test pp_test)
diff --git a/compiler/pp/README.md b/compiler/pp/README.md
new file mode 100644
index 000000000..04811c367
--- /dev/null
+++ b/compiler/pp/README.md
@@ -0,0 +1,35 @@
+# pp
+
+`pp` is a library which provides various helper functions and classes for pretty-printing.
+This was originted while writing C/C++ code generator.
+
+# Function (Feature)
+
+With `pp`, the following can be built:
+- multi-line structure with easy indentation, where each line can be accessed by index
+- indented string
+- concating `string`, `int`, etc., without user's explicit type conversion
+- multi-line string
+an so on.
+
+# How to use
+
+- Some of examples are listed below:
+ - `pp::fmt`
+
+ ```cpp
+ std::cout << pp::fmt("Hello ", 2) << "\n"; // "Hello 2"
+ std::cout << pp::fmt("Hello ", "Good ", "World") << "\n"; // ""Hello Good World"
+ ```
+ - `pp::IndentedStringBuilder`
+
+ ```cpp
+ pp::IndentedStringBuilder builder{};
+
+ std::cout << builder.build("A") << "\n"; // "A"
+ builder.increase();
+ std::cout << builder.build("B") << "\n"; // " B"
+ builder.decrease();
+ std::cout << builder.build("C") << "\n"; // "C"
+ ```
+ - For more usage and examples, please refer to `*.test.cpp` under `pp/src`.
diff --git a/compiler/pp/include/pp/EnclosedDocument.h b/compiler/pp/include/pp/EnclosedDocument.h
new file mode 100644
index 000000000..92f96e08b
--- /dev/null
+++ b/compiler/pp/include/pp/EnclosedDocument.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PP_ENCLOSED_DOCUMENT_H__
+#define __PP_ENCLOSED_DOCUMENT_H__
+
+#include "pp/LinearDocument.h"
+#include "pp/MultiLineText.h"
+
+namespace pp
+{
+
+class EnclosedDocument final : public MultiLineText
+{
+public:
+ EnclosedDocument() : _front{}, _back{LinearDocument::Direction::Reverse}
+ {
+ // DO NOTHING
+ }
+
+public:
+ LinearDocument &front(void) { return _front; }
+ const LinearDocument &front(void) const { return _front; }
+
+public:
+ LinearDocument &back(void) { return _back; }
+ const LinearDocument &back(void) const { return _back; }
+
+public:
+ uint32_t lines(void) const override;
+ const std::string &line(uint32_t n) const override;
+
+private:
+ LinearDocument _front;
+ LinearDocument _back;
+};
+
+} // namespace pp
+
+#endif // __PP_ENCLOSED_DOCUMENT_H__
diff --git a/compiler/pp/include/pp/Format.h b/compiler/pp/include/pp/Format.h
new file mode 100644
index 000000000..68c1d6c31
--- /dev/null
+++ b/compiler/pp/include/pp/Format.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PP_FORMAT_H__
+#define __PP_FORMAT_H__
+
+#include <ostream>
+#include <sstream>
+
+namespace pp
+{
+
+template <typename Arg> static inline void _fmt(std::ostream &os, const Arg &arg) { os << arg; }
+template <typename Arg, typename... Args>
+static inline void _fmt(std::ostream &os, const Arg &arg, const Args &... args)
+{
+ _fmt(os, arg);
+ _fmt(os, args...);
+}
+
+template <typename... Args> static inline std::string fmt(const Args &... args)
+{
+ std::stringstream ss;
+ _fmt(ss, args...);
+ return ss.str();
+}
+
+} // namespace pp
+
+#endif // __PP_FORMAT_H__
diff --git a/compiler/pp/include/pp/IndentedStringBuilder.h b/compiler/pp/include/pp/IndentedStringBuilder.h
new file mode 100644
index 000000000..2655aff05
--- /dev/null
+++ b/compiler/pp/include/pp/IndentedStringBuilder.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PP_INDENTED_STRING_BUILDER_H__
+#define __PP_INDENTED_STRING_BUILDER_H__
+
+#include "pp/Format.h"
+
+namespace pp
+{
+
+class IndentedStringBuilder
+{
+public:
+ IndentedStringBuilder() : _level{0}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void increase(void);
+ void decrease(void);
+
+public:
+ std::string build(const std::string &content);
+
+public:
+ template <typename... Args> std::string build(const Args &... args)
+ {
+ return build(fmt(args...));
+ }
+
+private:
+ uint32_t _level;
+};
+
+} // namespace pp
+
+#endif // __PP_INDENTED_STRING_BUILDER_H__
diff --git a/compiler/pp/include/pp/LinearDocument.h b/compiler/pp/include/pp/LinearDocument.h
new file mode 100644
index 000000000..f9abd89cd
--- /dev/null
+++ b/compiler/pp/include/pp/LinearDocument.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PP_LINEAR_DOCUMENT_H__
+#define __PP_LINEAR_DOCUMENT_H__
+
+#include "pp/Format.h"
+#include "pp/IndentedStringBuilder.h"
+#include "pp/MultiLineText.h"
+
+#include <vector>
+#include <string>
+
+#include <type_traits>
+
+namespace pp
+{
+
+class LinearDocument final : public MultiLineText
+{
+public:
+ enum class Direction
+ {
+ Forward,
+ Reverse
+ };
+
+public:
+ LinearDocument() : _direction{Direction::Forward}
+ {
+ // DO NOTHING
+ }
+
+public:
+ LinearDocument(const Direction &direction) : _direction{direction}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void indent(void);
+ void unindent(void);
+
+public:
+ void append(void);
+
+public:
+ void append(const std::string &line);
+
+ template <typename Derived>
+ typename std::enable_if<std::is_base_of<MultiLineText, Derived>::value>::type
+ append(const Derived &txt)
+ {
+ for (uint32_t n = 0; n < txt.lines(); ++n)
+ {
+ append(txt.line(n));
+ }
+ }
+
+ template <typename... Args> void append(const Args &... args) { append(fmt(args...)); }
+
+public:
+ void append(const LinearDocument &doc);
+
+public:
+ uint32_t lines(void) const override { return _lines.size(); }
+
+public:
+ const std::string &line(uint32_t n) const override;
+
+private:
+ Direction const _direction;
+ IndentedStringBuilder _indent;
+ std::vector<std::string> _lines;
+};
+
+} // namespace pp
+
+#endif // __PP_LINEAR_DOCUMENT_H__
diff --git a/compiler/pp/include/pp/MultiLineText.h b/compiler/pp/include/pp/MultiLineText.h
new file mode 100644
index 000000000..3ea87e792
--- /dev/null
+++ b/compiler/pp/include/pp/MultiLineText.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PP_MULTI_LINE_TEXT_H__
+#define __PP_MULTI_LINE_TEXT_H__
+
+#include <string>
+
+#include <cstdint>
+
+namespace pp
+{
+
+struct MultiLineText
+{
+ virtual ~MultiLineText() = default;
+
+ virtual uint32_t lines(void) const = 0;
+ virtual const std::string &line(uint32_t n) const = 0;
+};
+
+} // namespace pp
+
+#endif // __PP_MULTI_LINE_TEXT_H__
diff --git a/compiler/pp/include/pp/MultiLineTextUtils.h b/compiler/pp/include/pp/MultiLineTextUtils.h
new file mode 100644
index 000000000..5e715e790
--- /dev/null
+++ b/compiler/pp/include/pp/MultiLineTextUtils.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PP_MULTI_LINE_TEXT_UTILS_H__
+#define __PP_MULTI_LINE_TEXT_UTILS_H__
+
+#include "pp/MultiLineText.h"
+
+#include <ostream>
+
+std::ostream &operator<<(std::ostream &os, const pp::MultiLineText &txt);
+
+#endif // __PP_MULTI_LINE_TEXT_UTILS_H__
diff --git a/compiler/pp/src/EnclosedDocument.cpp b/compiler/pp/src/EnclosedDocument.cpp
new file mode 100644
index 000000000..d0bd4495f
--- /dev/null
+++ b/compiler/pp/src/EnclosedDocument.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pp/EnclosedDocument.h"
+
+namespace pp
+{
+
+uint32_t EnclosedDocument::lines(void) const { return _front.lines() + _back.lines(); }
+
+const std::string &EnclosedDocument::line(uint32_t n) const
+{
+ if (n < _front.lines())
+ {
+ return _front.line(n);
+ }
+
+ return _back.line(n - _front.lines());
+}
+
+} // namespace pp
diff --git a/compiler/pp/src/EnclosedDocument.test.cpp b/compiler/pp/src/EnclosedDocument.test.cpp
new file mode 100644
index 000000000..25ed4e06a
--- /dev/null
+++ b/compiler/pp/src/EnclosedDocument.test.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pp/EnclosedDocument.h"
+
+#include <gtest/gtest.h>
+
+TEST(LINEAR_DOCUMENT, lines)
+{
+ pp::EnclosedDocument doc;
+
+ doc.front().append("A");
+ doc.back().append("C");
+ doc.back().append("B");
+
+ ASSERT_EQ(doc.lines(), 3);
+}
+
+TEST(LINEAR_DOCUMENT, line)
+{
+ pp::EnclosedDocument doc;
+
+ doc.front().append("A");
+ doc.front().indent();
+ doc.front().append("B");
+ doc.back().append("C");
+ doc.back().append("B");
+
+ ASSERT_EQ(doc.lines(), 4);
+ ASSERT_EQ(doc.line(0), "A");
+ ASSERT_EQ(doc.line(1), " B");
+ ASSERT_EQ(doc.line(2), "B");
+ ASSERT_EQ(doc.line(3), "C");
+}
diff --git a/compiler/pp/src/Format.test.cpp b/compiler/pp/src/Format.test.cpp
new file mode 100644
index 000000000..4cef73a3e
--- /dev/null
+++ b/compiler/pp/src/Format.test.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pp/Format.h"
+
+#include <gtest/gtest.h>
+
+TEST(FORMAT, simple_string)
+{
+ ASSERT_EQ(pp::fmt("Hello"), "Hello");
+ ASSERT_EQ(pp::fmt("Hello ", 2), "Hello 2");
+ ASSERT_EQ(pp::fmt("Hello ", 2 + 2), "Hello 4");
+}
+
+TEST(FORMAT, simple_number) { ASSERT_EQ(pp::fmt(2), "2"); }
+TEST(FORMAT, concat_lvalue) { ASSERT_EQ(pp::fmt("Hello ", 2), "Hello 2"); }
+TEST(FORMAT, concat_rvalue) { ASSERT_EQ(pp::fmt("Hello ", 2 + 2), "Hello 4"); }
diff --git a/compiler/pp/src/IndentedStringBuilder.cpp b/compiler/pp/src/IndentedStringBuilder.cpp
new file mode 100644
index 000000000..65e9b32dd
--- /dev/null
+++ b/compiler/pp/src/IndentedStringBuilder.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pp/IndentedStringBuilder.h"
+
+#include <algorithm>
+#include <cassert>
+
+namespace pp
+{
+
+void IndentedStringBuilder::increase(void)
+{
+ // TODO Check overflow
+ ++_level;
+}
+
+void IndentedStringBuilder::decrease(void)
+{
+ assert(_level > 0);
+ --_level;
+}
+
+std::string IndentedStringBuilder::build(const std::string &content)
+{
+ assert(std::find(content.begin(), content.end(), '\n') == content.end());
+
+ const char c = ' ';
+ const size_t space_per_indent_level = 2;
+ const size_t space_count = space_per_indent_level * _level;
+
+ return std::string(space_count, c) + content;
+}
+
+} // namespace pp
diff --git a/compiler/pp/src/IndentedStringBuilder.test.cpp b/compiler/pp/src/IndentedStringBuilder.test.cpp
new file mode 100644
index 000000000..314243027
--- /dev/null
+++ b/compiler/pp/src/IndentedStringBuilder.test.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pp/IndentedStringBuilder.h"
+
+#include <gtest/gtest.h>
+
+TEST(INDENTED_STRING_BUILDER, usage)
+{
+ pp::IndentedStringBuilder builder{};
+
+ ASSERT_EQ(builder.build("A"), "A");
+ builder.increase();
+ ASSERT_EQ(builder.build("B"), " B");
+ builder.decrease();
+ ASSERT_EQ(builder.build("C"), "C");
+}
diff --git a/compiler/pp/src/LinearDocument.cpp b/compiler/pp/src/LinearDocument.cpp
new file mode 100644
index 000000000..2bc5f260c
--- /dev/null
+++ b/compiler/pp/src/LinearDocument.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pp/LinearDocument.h"
+
+#include <stdexcept>
+
+namespace pp
+{
+
+void LinearDocument::indent(void) { _indent.increase(); }
+void LinearDocument::unindent(void) { _indent.decrease(); }
+
+void LinearDocument::append(void)
+{
+ // NOTE Do NOT indent empty lines
+ _lines.emplace_back("");
+}
+
+void LinearDocument::append(const std::string &line)
+{
+ if (line.empty())
+ {
+ append();
+ return;
+ }
+
+ // Append indentation space(s), and insert the update string to lines
+ _lines.emplace_back(_indent.build(line));
+}
+
+void LinearDocument::append(const LinearDocument &doc)
+{
+ for (uint32_t n = 0; n < doc.lines(); ++n)
+ {
+ // NOTE Do NOT update _lines here and use append method
+ append(doc.line(n));
+ }
+}
+
+const std::string &LinearDocument::line(uint32_t n) const
+{
+ switch (_direction)
+ {
+ case Direction::Forward:
+ {
+ return _lines.at(n);
+ }
+ case Direction::Reverse:
+ {
+ return _lines.at(lines() - n - 1);
+ }
+ }
+
+ throw std::runtime_error{"unreachable"};
+}
+
+} // namespace pp
diff --git a/compiler/pp/src/LinearDocument.test.cpp b/compiler/pp/src/LinearDocument.test.cpp
new file mode 100644
index 000000000..e5c232be4
--- /dev/null
+++ b/compiler/pp/src/LinearDocument.test.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pp/LinearDocument.h"
+
+#include <gtest/gtest.h>
+
+TEST(LINEAR_DOCUMENT, append_void)
+{
+ pp::LinearDocument doc;
+
+ doc.indent();
+ doc.append();
+
+ ASSERT_EQ(doc.lines(), 1);
+ ASSERT_EQ(doc.line(0), "");
+}
+
+TEST(LINEAR_DOCUMENT, append_empty_string)
+{
+ pp::LinearDocument doc;
+
+ doc.indent();
+ doc.append("");
+
+ ASSERT_EQ(doc.lines(), 1);
+ ASSERT_EQ(doc.line(0), "");
+}
+
+TEST(LINEAR_DOCUMENT, formatted_append)
+{
+ pp::LinearDocument doc;
+
+ doc.append("Hello ", 1);
+ ASSERT_EQ(doc.lines(), 1);
+ ASSERT_EQ(doc.line(0), "Hello 1");
+}
+
+TEST(LINEAR_DOCUMENT, forward_append)
+{
+ pp::LinearDocument doc;
+
+ ASSERT_EQ(doc.lines(), 0);
+
+ doc.append("A");
+ doc.append("B");
+ doc.append("C");
+
+ ASSERT_EQ(doc.lines(), 3);
+ ASSERT_EQ(doc.line(0), "A");
+ ASSERT_EQ(doc.line(1), "B");
+ ASSERT_EQ(doc.line(2), "C");
+}
+
+TEST(LINEAR_DOCUMENT, reverse_append)
+{
+ pp::LinearDocument doc{pp::LinearDocument::Direction::Reverse};
+
+ ASSERT_EQ(doc.lines(), 0);
+
+ doc.append("A");
+ doc.append("B");
+ doc.append("C");
+
+ ASSERT_EQ(doc.lines(), 3);
+ ASSERT_EQ(doc.line(0), "C");
+ ASSERT_EQ(doc.line(1), "B");
+ ASSERT_EQ(doc.line(2), "A");
+}
+
+struct TwoLineDocument final : public pp::MultiLineText
+{
+ uint32_t lines(void) const override { return 2; }
+
+ const std::string &line(uint32_t n) const override { return _lines[n]; }
+
+ std::string _lines[2];
+};
+
+TEST(LINEAR_DOCUMENT, append_multi_line_text)
+{
+ pp::LinearDocument doc;
+ TwoLineDocument sub;
+
+ sub._lines[0] = "B";
+ sub._lines[1] = " C";
+
+ doc.append("A");
+ doc.indent();
+
+ doc.append(sub);
+ doc.unindent();
+ doc.append("D");
+
+ ASSERT_EQ(doc.lines(), 4);
+
+ ASSERT_EQ(doc.line(0), "A");
+ ASSERT_EQ(doc.line(1), " B");
+ ASSERT_EQ(doc.line(2), " C");
+ ASSERT_EQ(doc.line(3), "D");
+}
+
+TEST(LINEAR_DOCUMENT, document_append)
+{
+ pp::LinearDocument doc{pp::LinearDocument::Direction::Forward};
+ pp::LinearDocument sub{pp::LinearDocument::Direction::Reverse};
+
+ doc.append("A");
+ doc.indent();
+
+ sub.append("D");
+ sub.indent();
+ sub.append("C");
+ sub.unindent();
+ sub.append("B");
+
+ doc.append(sub);
+ doc.unindent();
+ doc.append("E");
+
+ ASSERT_EQ(doc.lines(), 5);
+
+ ASSERT_EQ(doc.line(0), "A");
+ ASSERT_EQ(doc.line(1), " B");
+ ASSERT_EQ(doc.line(2), " C");
+ ASSERT_EQ(doc.line(3), " D");
+ ASSERT_EQ(doc.line(4), "E");
+}
+
+TEST(LINEAR_DOCUMENT, indent)
+{
+ pp::LinearDocument doc;
+
+ ASSERT_EQ(doc.lines(), 0);
+
+ doc.append("A");
+ doc.indent();
+ doc.append("B");
+ doc.unindent();
+ doc.append("C");
+
+ ASSERT_EQ(doc.lines(), 3);
+
+ ASSERT_EQ(doc.line(0), "A");
+ ASSERT_EQ(doc.line(1), " B");
+ ASSERT_EQ(doc.line(2), "C");
+}
diff --git a/compiler/pp/src/MultiLineTextUtils.cpp b/compiler/pp/src/MultiLineTextUtils.cpp
new file mode 100644
index 000000000..5a40c72f0
--- /dev/null
+++ b/compiler/pp/src/MultiLineTextUtils.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pp/MultiLineTextUtils.h"
+
+std::ostream &operator<<(std::ostream &os, const pp::MultiLineText &txt)
+{
+ if (txt.lines() > 0)
+ {
+ os << txt.line(0);
+
+ for (uint32_t n = 1; n < txt.lines(); ++n)
+ {
+ os << std::endl << txt.line(n);
+ }
+ }
+
+ return os;
+}
diff --git a/compiler/pp/src/MultiLineTextUtils.test.cpp b/compiler/pp/src/MultiLineTextUtils.test.cpp
new file mode 100644
index 000000000..6268f5c15
--- /dev/null
+++ b/compiler/pp/src/MultiLineTextUtils.test.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pp/MultiLineTextUtils.h"
+
+#include <sstream>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+struct DummyMultiLineText final : public pp::MultiLineText
+{
+ std::vector<std::string> content;
+
+ uint32_t lines(void) const override { return content.size(); }
+ const std::string &line(uint32_t n) const override { return content.at(n); }
+};
+
+TEST(MUILTI_LINE_TEXT_UTILS, operator_shift)
+{
+ DummyMultiLineText txt;
+
+ txt.content.emplace_back("A");
+ txt.content.emplace_back(" B");
+ txt.content.emplace_back(" C");
+
+ const char *expected = "A\n"
+ " B\n"
+ " C\n";
+
+ std::stringstream ss;
+
+ ss << txt << std::endl;
+
+ ASSERT_EQ(ss.str(), expected);
+}
diff --git a/compiler/safemain/CMakeLists.txt b/compiler/safemain/CMakeLists.txt
new file mode 100644
index 000000000..c73306ac2
--- /dev/null
+++ b/compiler/safemain/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_library(safemain STATIC SafeMain.cpp)
+set_target_properties(safemain PROPERTIES POSITION_INDEPENDENT_CODE ON)
diff --git a/compiler/safemain/SafeMain.cpp b/compiler/safemain/SafeMain.cpp
new file mode 100644
index 000000000..fc0fc96e1
--- /dev/null
+++ b/compiler/safemain/SafeMain.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <stdexcept>
+
+int entry(int argc, char **argv);
+
+#ifdef NDEBUG
+int main(int argc, char **argv)
+{
+ try
+ {
+ return entry(argc, argv);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "ERROR: " << e.what() << std::endl;
+ }
+
+ return 255;
+}
+#else // NDEBUG
+int main(int argc, char **argv)
+{
+ // NOTE main does not catch internal exceptions for debug build to make it easy to
+ // check the stacktrace with a debugger
+ return entry(argc, argv);
+}
+#endif // !NDEBUG
diff --git a/compiler/stdex/CMakeLists.txt b/compiler/stdex/CMakeLists.txt
new file mode 100644
index 000000000..91f07e69f
--- /dev/null
+++ b/compiler/stdex/CMakeLists.txt
@@ -0,0 +1,16 @@
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+
+add_library(stdex INTERFACE)
+target_include_directories(stdex INTERFACE include)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Google Test is mandatory for test
+nnas_find_package(GTest REQUIRED)
+
+add_executable(stdex_test ${TESTS})
+target_link_libraries(stdex_test stdex)
+target_link_libraries(stdex_test gtest_main)
+add_test(stdex_test stdex_test)
diff --git a/compiler/stdex/README.md b/compiler/stdex/README.md
new file mode 100644
index 000000000..054d08569
--- /dev/null
+++ b/compiler/stdex/README.md
@@ -0,0 +1,22 @@
+# stdex
+
+`stdex` is an extension over standard C++ libraries.
+
+# How to use
+
+Please read each header files.
+
+One example of `stdex::make_unique(..)` in `compiler/stdex/Memory.h` is as follows:
+
+```cpp
+#include <stdex/Memory.h>
+
+using stdex::make_unique;
+
+class A { ... };
+
+...
+
+std::unique_ptr<A> a = make_unique<A>(); // Note: std::make_unique is not supported in C++ 11
+
+```
diff --git a/compiler/stdex/include/stdex/Memory.h b/compiler/stdex/include/stdex/Memory.h
new file mode 100644
index 000000000..86751f073
--- /dev/null
+++ b/compiler/stdex/include/stdex/Memory.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __STDEX_MEMORY_H__
+#define __STDEX_MEMORY_H__
+
+#include <memory>
+
+namespace stdex
+{
+
+using std::make_unique;
+
+} // namespace stdex
+
+#endif // __STDEX_MEMORY_H__
diff --git a/compiler/stdex/include/stdex/Queue.h b/compiler/stdex/include/stdex/Queue.h
new file mode 100644
index 000000000..c72297bc8
--- /dev/null
+++ b/compiler/stdex/include/stdex/Queue.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __STDEX_QUEUE_H__
+#define __STDEX_QUEUE_H__
+
+#include <queue>
+
+namespace stdex
+{
+
+/**
+ * @brief Take the front (= first) element from the queue
+ * @note The queue SHOULD have at least one element
+ */
+template <typename T> T take(std::queue<T> &q)
+{
+ auto res = q.front();
+ q.pop();
+ return res;
+}
+
+} // namespace stdex
+
+#endif // __STDEX_QUEUE_H__
diff --git a/compiler/stdex/include/stdex/Set.h b/compiler/stdex/include/stdex/Set.h
new file mode 100644
index 000000000..2c61e0d01
--- /dev/null
+++ b/compiler/stdex/include/stdex/Set.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __STDEX_SET_H__
+#define __STDEX_SET_H__
+
+#include <set>
+
+template <typename T> bool operator==(const std::set<T> &lhs, const std::set<T> &rhs)
+{
+ if (rhs.size() != lhs.size())
+ {
+ return false;
+ }
+
+ for (const auto &element : lhs)
+ {
+ if (rhs.find(element) == rhs.end())
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+template <typename T> std::set<T> operator-(const std::set<T> &lhs, const std::set<T> &rhs)
+{
+ std::set<T> res;
+
+ for (const auto &element : lhs)
+ {
+ if (rhs.find(element) == rhs.end())
+ {
+ res.insert(element);
+ }
+ }
+
+ return res;
+}
+
+#endif // __STDEX_SET_H__
diff --git a/compiler/stdex/src/Memory.test.cpp b/compiler/stdex/src/Memory.test.cpp
new file mode 100644
index 000000000..433af4534
--- /dev/null
+++ b/compiler/stdex/src/Memory.test.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "stdex/Memory.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+struct Stat
+{
+ unsigned allocated = 0;
+ unsigned freed = 0;
+};
+
+struct Counter
+{
+public:
+ Counter(Stat *stat) : _stat{stat} { _stat->allocated += 1; }
+
+public:
+ ~Counter() { _stat->freed += 1; }
+
+private:
+ Stat *_stat;
+};
+
+} // namespace
+
+TEST(MemoryTest, make_unique)
+{
+ Stat stat;
+
+ ASSERT_EQ(stat.allocated, 0);
+ ASSERT_EQ(stat.freed, 0);
+
+ auto o = stdex::make_unique<::Counter>(&stat);
+
+ ASSERT_EQ(stat.allocated, 1);
+ ASSERT_EQ(stat.freed, 0);
+
+ o.reset();
+
+ ASSERT_EQ(stat.allocated, 1);
+ ASSERT_EQ(stat.freed, 1);
+}
diff --git a/compiler/stdex/src/Queue.test.cpp b/compiler/stdex/src/Queue.test.cpp
new file mode 100644
index 000000000..d76cd3ee6
--- /dev/null
+++ b/compiler/stdex/src/Queue.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "stdex/Queue.h"
+
+#include <gtest/gtest.h>
+
+TEST(QueueTest, take)
+{
+ std::queue<int> q;
+
+ q.emplace(3);
+ q.emplace(4);
+ q.emplace(5);
+
+ ASSERT_EQ(stdex::take(q), 3);
+ ASSERT_EQ(stdex::take(q), 4);
+ ASSERT_EQ(stdex::take(q), 5);
+}
diff --git a/compiler/stdex/src/Set.test.cpp b/compiler/stdex/src/Set.test.cpp
new file mode 100644
index 000000000..90361936f
--- /dev/null
+++ b/compiler/stdex/src/Set.test.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "stdex/Set.h"
+
+#include <gtest/gtest.h>
+
+TEST(SET, operator_eq)
+{
+ ASSERT_TRUE(std::set<int>({1, 2, 3}) == std::set<int>({1, 2, 3}));
+ ASSERT_FALSE(std::set<int>({1, 3}) == std::set<int>({1, 2, 3}));
+}
+
+TEST(SET, operator_diff)
+{
+ const std::set<int> lhs{1, 2, 3};
+ const std::set<int> rhs{2, 4};
+
+ auto res = lhs - rhs;
+
+ ASSERT_EQ(res.size(), 2);
+ ASSERT_NE(res.find(1), res.end());
+ ASSERT_NE(res.find(3), res.end());
+}
diff --git a/compiler/tf2circle-conversion-test/.gitignore b/compiler/tf2circle-conversion-test/.gitignore
new file mode 100644
index 000000000..8dbfa9012
--- /dev/null
+++ b/compiler/tf2circle-conversion-test/.gitignore
@@ -0,0 +1 @@
+/test.local.lst
diff --git a/compiler/tf2circle-conversion-test/CMakeLists.txt b/compiler/tf2circle-conversion-test/CMakeLists.txt
new file mode 100644
index 000000000..27f2463f3
--- /dev/null
+++ b/compiler/tf2circle-conversion-test/CMakeLists.txt
@@ -0,0 +1,138 @@
+nnas_include(TargetRequire)
+
+unset(REQUIRED_TARGETS)
+list(APPEND REQUIRED_TARGETS tf2circle)
+list(APPEND REQUIRED_TARGETS tfkit)
+TargetRequire_Return(${REQUIRED_TARGETS})
+
+message(STATUS "tf2circle-conversion-test: run tests")
+
+nncc_find_resource(TensorFlowTests)
+
+#
+# Copy [PREFIX]/test.pbtxt to PREFIX.pbtxt in binary folder
+# Copy [PREFIX]/test.info to PREFIX.info in binary folder
+# Copy [PREFIX]/customop.conf to PREFIX_customop.conf in binary folder
+# Encode PREFIX.pbtxt to PREFIX.pb
+#
+set(TEST_REPO "${TensorFlowTests_DIR}")
+set(TEST_PBTXT_FILENAME "test.pbtxt")
+set(TEST_INFO_FILENAME "test.info")
+set(TEST_CUSTOMOP_CONF_FILENAME "customop.conf")
+
+unset(TESTCASES)
+
+macro(add NAME)
+ list(APPEND TESTCASES ${NAME})
+endmacro(add)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+unset(TEST_DEPS)
+unset(TEST_NAMES)
+
+foreach(PREFIX IN ITEMS ${TESTCASES})
+ if(NOT IS_DIRECTORY "${TEST_REPO}/${PREFIX}")
+ message(FATAL_ERROR "Missing '${PREFIX}' test")
+ endif()
+
+ set(PBTXT_SOURCE_PATH "${TEST_REPO}/${PREFIX}/${TEST_PBTXT_FILENAME}")
+ set(INFO_SOURCE_PATH "${TEST_REPO}/${PREFIX}/${TEST_INFO_FILENAME}")
+ set(CUSTOMOP_CONF_SOURCE_PATH "${TEST_REPO}/${PREFIX}/${TEST_CUSTOMOP_CONF_FILENAME}")
+
+ set(PBTXT_FILE "${PREFIX}.pbtxt")
+ set(PBTXT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PBTXT_FILE}")
+
+ set(INFO_FILE "${PREFIX}.info")
+ set(INFO_PATH "${CMAKE_CURRENT_BINARY_DIR}/${INFO_FILE}")
+
+ set(CUSTOMOP_CONF_FILE "${PREFIX}.${TEST_CUSTOMOP_CONF_FILENAME}") # ex) CustomOp_001.customop.conf
+ set(CUSTOMOP_CONF_PATH "${CMAKE_CURRENT_BINARY_DIR}/${CUSTOMOP_CONF_FILE}")
+
+ set(PB_FILE "${PREFIX}.pb")
+ set(PB_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PB_FILE}")
+
+ # Copy .pbtxt
+ add_custom_command(OUTPUT ${PBTXT_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${PBTXT_SOURCE_PATH}" "${PBTXT_PATH}"
+ DEPENDS ${PBTXT_SOURCE_PATH}
+ COMMENT "Generate ${PBTXT_FILE}"
+ )
+
+ # Copy .info
+ add_custom_command(OUTPUT ${INFO_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${INFO_SOURCE_PATH}" "${INFO_PATH}"
+ DEPENDS ${INFO_SOURCE_PATH}
+ COMMENT "Generate ${INFO_FILE}"
+ )
+
+ # Generate .pb from .pbtxt
+ add_custom_command(OUTPUT ${PB_PATH}
+ COMMAND $<TARGET_FILE:tfkit> encode ${PBTXT_PATH} ${PB_PATH}
+ DEPENDS ${PBTXT_PATH}
+ COMMENT "Generate ${PB_FILE}"
+ )
+
+ list(APPEND TEST_DEPS ${INFO_PATH} ${PB_PATH})
+
+ if (EXISTS "${CUSTOMOP_CONF_SOURCE_PATH}")
+
+ # Copy customop.conf
+ add_custom_command(OUTPUT ${CUSTOMOP_CONF_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${CUSTOMOP_CONF_SOURCE_PATH}" "${CUSTOMOP_CONF_PATH}"
+ DEPENDS ${CUSTOMOP_CONF_SOURCE_PATH}
+ COMMENT "Generate ${CUSTOMOP_CONF_FILE}"
+ )
+
+ list(APPEND TEST_DEPS ${CUSTOMOP_CONF_PATH})
+
+ endif (EXISTS "${CUSTOMOP_CONF_SOURCE_PATH}")
+
+ list(APPEND TEST_NAMES ${PREFIX})
+endforeach(PREFIX)
+
+##
+## Copy testall
+##
+set(TEST_RUNNER "${CMAKE_CURRENT_BINARY_DIR}/testall.sh")
+set(TEST_RUNNER_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh")
+
+add_custom_command(
+ OUTPUT ${TEST_RUNNER}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TEST_RUNNER_SOURCE}" "${TEST_RUNNER}"
+ DEPENDS ${TEST_RUNNER_SOURCE}
+ COMMENT "Generate test runner"
+)
+
+list(APPEND TEST_DEPS "${TEST_RUNNER}")
+
+###
+### Generate test.config
+###
+set(TEST_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/test.config")
+
+add_custom_command(
+ OUTPUT ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TF2CIRCLE_PATH=\"$<TARGET_FILE:tf2circle>\"' >> ${TEST_CONFIG}
+ DEPENDS
+ tf2circle
+ COMMENT "Generate test configuration"
+)
+
+list(APPEND TEST_DEPS "${TEST_CONFIG}")
+
+# This "tf2circle_conversion_test_deps" target enforces CMake to generate all the dependencies during "build" phase
+add_custom_target(tf2circle_conversion_test_deps ALL DEPENDS ${TEST_DEPS})
+
+# Run tests
+add_test(
+ NAME tf2circle_conversion_test
+ COMMAND "${TEST_RUNNER}"
+ "${TEST_CONFIG}"
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ ${TEST_NAMES}
+)
diff --git a/compiler/tf2circle-conversion-test/README.md b/compiler/tf2circle-conversion-test/README.md
new file mode 100644
index 000000000..bee7aec18
--- /dev/null
+++ b/compiler/tf2circle-conversion-test/README.md
@@ -0,0 +1,3 @@
+# tf2circle-conversion-test
+
+Run `tf2circle` to `test.lst` and check whether given TF model is able to be converted into Circle model. Write `test.local.lst` for local test list.
diff --git a/compiler/tf2circle-conversion-test/requires.cmake b/compiler/tf2circle-conversion-test/requires.cmake
new file mode 100644
index 000000000..b1f872669
--- /dev/null
+++ b/compiler/tf2circle-conversion-test/requires.cmake
@@ -0,0 +1,2 @@
+require("tf2circle")
+require("tfkit")
diff --git a/compiler/tf2circle-conversion-test/test.lst b/compiler/tf2circle-conversion-test/test.lst
new file mode 100644
index 000000000..f24de50c0
--- /dev/null
+++ b/compiler/tf2circle-conversion-test/test.lst
@@ -0,0 +1,103 @@
+# TODO Enable skipped tests
+
+add(NET_0000)
+add(NET_0001)
+add(NET_0002)
+add(NET_0003)
+add(NET_0004)
+add(NET_0005)
+add(NET_0006)
+add(NET_0007)
+add(NET_0008)
+add(NET_0009)
+add(NET_0010)
+add(NET_0011)
+add(NET_0012)
+add(NET_0013)
+add(NET_0014)
+add(NET_0015)
+add(NET_0016)
+add(NET_0017)
+add(NET_0018)
+add(NET_0019)
+add(NET_0020)
+add(NET_0021)
+add(NET_0022)
+add(NET_0023)
+add(NET_0024)
+add(NET_0025)
+add(NET_0028)
+add(NET_0029)
+add(NET_0030)
+add(NET_0031)
+add(NET_0032)
+add(NET_0033)
+add(NET_0034)
+add(NET_0035)
+add(NET_0036)
+add(NET_0037)
+add(NET_0038)
+add(NET_0039)
+add(NET_0040)
+add(NET_0041)
+add(REGRESSION_0000)
+add(REGRESSION_0001)
+add(REGRESSION_0002)
+add(UNIT_Add_000)
+add(UNIT_Add_001)
+add(UNIT_Add_002)
+add(UNIT_Add_004)
+add(UNIT_Add_005)
+add(UNIT_AvgPool_000)
+add(UNIT_AvgPool_001)
+#add(UNIT_BiasAdd_000)
+#add(UNIT_BiasAdd_001)
+add(UNIT_BiasAdd_002)
+#add(UNIT_ConcatV2_000)
+#add(UNIT_ConcatV2_001)
+add(UNIT_ConcatV2_002)
+add(UNIT_Const_000)
+#add(UNIT_Const_001)
+add(UNIT_Conv2D_000)
+add(UNIT_Conv2DBackpropInput_000)
+add(UNIT_Conv2DBackpropInput_001)
+#add(UNIT_CustomOp_000)
+add(UNIT_CustomOp_001)
+add(UNIT_DepthwiseConv2dNative_000)
+add(UNIT_DepthwiseConv2dNative_001)
+add(UNIT_Maximum_000)
+add(UNIT_Maximum_001)
+add(UNIT_Maximum_002)
+add(UNIT_MaxPool_000)
+add(UNIT_MaxPool_001)
+add(UNIT_Mean_000)
+add(UNIT_Mean_001)
+add(UNIT_Mean_002)
+add(UNIT_Mean_003)
+add(UNIT_Mul_000)
+add(UNIT_Mul_001)
+add(UNIT_Mul_002)
+add(UNIT_Pad_000)
+add(UNIT_Placeholder_000)
+add(UNIT_Placeholder_001)
+add(UNIT_Placeholder_002)
+add(UNIT_Placeholder_003)
+add(UNIT_RealDiv_000)
+add(UNIT_RealDiv_001)
+add(UNIT_Relu_000)
+add(UNIT_Relu6_000)
+add(UNIT_Reshape_000)
+add(UNIT_Rsqrt_000)
+add(UNIT_Softmax_001)
+add(UNIT_Sqrt_000)
+add(UNIT_SquaredDifference_000)
+add(UNIT_SquaredDifference_001)
+add(UNIT_Squeeze_000)
+add(UNIT_Squeeze_001)
+add(UNIT_Squeeze_002)
+add(UNIT_Squeeze_003)
+add(UNIT_StopGradient_000)
+add(UNIT_StopGradient_001)
+add(UNIT_Sub_000)
+add(UNIT_Sub_001)
+add(UNIT_Tanh_000)
diff --git a/compiler/tf2circle-conversion-test/testall.sh b/compiler/tf2circle-conversion-test/testall.sh
new file mode 100755
index 000000000..a7e8037d5
--- /dev/null
+++ b/compiler/tf2circle-conversion-test/testall.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+
+# Need at least 2 arguments
+if [[ $# -lt 2 ]]; then
+ echo "USAGE: $0 ..."
+ echo
+ echo "ARGUMENTS:"
+ echo " [test.config path]"
+ echo " [WORKDIR]"
+ echo " [Prefix1]"
+ echo " [Prefix2]"
+ echo " ..."
+ exit 255
+fi
+
+CONFIG_PATH="$1"; shift
+WORKDIR="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found TF2CIRCLE: ${TF2CIRCLE_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [[ $# -ne 0 ]]; do
+ PREFIX="$1"; shift
+
+ TESTED+=("${PREFIX}")
+
+ PASSED_TAG="${PREFIX}.passed"
+
+ rm -f "${PASSED_TAG}"
+
+ cat > "${PREFIX}.log" <(
+ exec 2>&1
+
+ echo "-- Found pb: ${PREFIX}.pb"
+
+ # Exit immediately if any command fails
+ set -e
+ # Show commands
+ set -x
+
+ # tflite is generated both for COMPARE and EXPORT actions
+ if [ -f "${WORKDIR}/${PREFIX}.customop.conf" ]; then
+
+ # Generate tflite
+ "${TF2CIRCLE_PATH}" \
+ "${WORKDIR}/${PREFIX}.info" \
+ "${WORKDIR}/${PREFIX}.pb" \
+ "${WORKDIR}/${PREFIX}.circle" \
+ "--customop" "${WORKDIR}/${PREFIX}.customop.conf"
+ else
+
+ # Generate circle
+ "${TF2CIRCLE_PATH}" \
+ "${WORKDIR}/${PREFIX}.info" \
+ "${WORKDIR}/${PREFIX}.pb" \
+ "${WORKDIR}/${PREFIX}.circle"
+
+ fi
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("$PREFIX")
+ else
+ FAILED+=("$PREFIX")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/tf2circle-dredd-pb-test/.gitignore b/compiler/tf2circle-dredd-pb-test/.gitignore
new file mode 100644
index 000000000..23c7c1bb3
--- /dev/null
+++ b/compiler/tf2circle-dredd-pb-test/.gitignore
@@ -0,0 +1 @@
+/contrib.lst
diff --git a/compiler/tf2circle-dredd-pb-test/CMakeLists.txt b/compiler/tf2circle-dredd-pb-test/CMakeLists.txt
new file mode 100644
index 000000000..48b098e24
--- /dev/null
+++ b/compiler/tf2circle-dredd-pb-test/CMakeLists.txt
@@ -0,0 +1,141 @@
+nnas_include(TargetRequire)
+
+unset(REQUIRED_TARGETS)
+list(APPEND REQUIRED_TARGETS circle-inspect)
+list(APPEND REQUIRED_TARGETS circle-verify)
+list(APPEND REQUIRED_TARGETS tf2circle)
+list(APPEND REQUIRED_TARGETS dredd_rule_lib)
+TargetRequire_Return(${REQUIRED_TARGETS})
+
+set(PB_MODEL_REPO "${CMAKE_CURRENT_SOURCE_DIR}/contrib") # Where to find models to test
+
+unset(KEYS)
+unset(DEPS)
+
+function(check_file_exist)
+
+ foreach(FILE_PATH IN LISTS ARGV)
+ if(NOT EXISTS "${FILE_PATH}")
+ message(FATAL_ERROR "${FILE_PATH} does not exist." )
+ endif()
+ endforeach()
+
+endfunction()
+
+#
+# processing models in contrib.lst
+#
+# Example)
+#
+# Add(Inception_v3 RULE circle_1.0_rel_requirement.rule)
+# -> Read compiler/tf2circle-dredd-pb-test/contrib/Inception_v3/model.pb and generate
+# "Inception_v3.circle". Then rule file is tested for the generated circle file.
+#
+macro(Add MODEL_DIR)
+
+ set(ARG_OPTION)
+ set(ARG_ONE_VALUE RULE) # rule file name
+ set(ARG_MULTI_VALUE)
+ cmake_parse_arguments(ARG "${ARG_OPTION}" "${ARG_ONE_VALUE}" "${ARG_MULTI_VALUE}" ${ARGN})
+
+ if(NOT ARG_RULE )
+ message( FATAL_ERROR "RULE is mandadatory arg" )
+ endif()
+
+ set(RULE_FILENAME ${ARG_RULE})
+
+ set(MODEL_SOURCE_DIR "${PB_MODEL_REPO}/${MODEL_DIR}")
+
+ set(PB_PATH "${MODEL_SOURCE_DIR}/model.pb")
+ set(INFO_PATH "${MODEL_SOURCE_DIR}/model.info")
+ set(RULE_PATH "${MODEL_SOURCE_DIR}/${RULE_FILENAME}")
+
+ check_file_exist(${PB_PATH} ${INFO_PATH} ${RULE_PATH})
+
+ # Generate .test file which declares path of target pb, info, rule files
+ set(TARGET_TESTNAME "${MODEL_DIR}")
+ set(TEST_CONFIG_FILE "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_TESTNAME}.test")
+
+ add_custom_command(
+ OUTPUT ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E echo 'MODEL_PB_PATH="${PB_PATH}"' >> ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E echo 'MODEL_INFO_PATH="${INFO_PATH}"' >> ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E echo 'MODEL_RULE_PATH="${RULE_PATH}"' >> ${TEST_CONFIG_FILE}
+ DEPENDS
+ ${PB_PATH}
+ COMMENT "Generate ${TARGET_TESTNAME} configuration for BIN"
+ )
+
+ list(APPEND KEYS "${TARGET_TESTNAME}")
+ list(APPEND DEPS "${TEST_CONFIG_FILE}")
+
+endmacro(Add)
+
+include(contrib.lst OPTIONAL)
+
+#
+# Generate toolchain.config
+#
+set(TOOLCHAIN_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/toolchain.config")
+
+add_custom_command(
+ OUTPUT ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'CIRCLE_INSPECT_PATH=\"$<TARGET_FILE:circle-inspect>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'CIRCLE_VERIFY_PATH=\"$<TARGET_FILE:circle-verify>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TF2CIRCLE_PATH=\"$<TARGET_FILE:tf2circle>\"' >> ${TOOLCHAIN_CONFIG}
+ # add more if new excutable file is needed in runner.sh and rule-lib.sh
+ DEPENDS
+ circle-inspect
+ circle-verify
+ tf2circle
+ COMMENT "Generate toolchin configuration"
+)
+
+list(APPEND DEPS "${TOOLCHAIN_CONFIG}")
+
+#
+# Generate quality test runner
+#
+set(SOURCE_RUNNER "${CMAKE_CURRENT_SOURCE_DIR}/runner.sh")
+set(TARGET_RUNNER "${CMAKE_CURRENT_BINARY_DIR}/runner.sh")
+
+add_custom_command(
+ OUTPUT ${TARGET_RUNNER}
+ COMMAND ${CMAKE_COMMAND} -E copy "${SOURCE_RUNNER}" "${TARGET_RUNNER}"
+ DEPENDS ${SOURCE_RUNNER}
+ COMMENT "Generate test runner"
+)
+
+list(APPEND DEPS "${TARGET_RUNNER}")
+
+#
+# copy rule-lib.sh (a library of shell script functions)
+#
+
+# getting path for rule-lib.sh in dredd-rule-lib
+get_target_property(DREDD_RULE_LIB_DIR dredd_rule_lib BINARY_DIR)
+
+set(SOURCE_RULE_LIB "${DREDD_RULE_LIB_DIR}/rule-lib.sh")
+set(TARGET_RULE_LIB "${CMAKE_CURRENT_BINARY_DIR}/rule-lib.sh")
+
+add_custom_command(
+ OUTPUT ${TARGET_RULE_LIB}
+ COMMAND ${CMAKE_COMMAND} -E copy "${SOURCE_RULE_LIB}" "${TARGET_RULE_LIB}"
+ DEPENDS ${SOURCE_RULE_LIB}
+ COMMENT "Generate rule lib"
+)
+
+list(APPEND DEPS "${TARGET_RULE_LIB}")
+
+# Generate dependencies
+add_custom_target(tf2circle_dredd_pb_deps ALL DEPENDS ${DEPS})
+
+add_test(
+ NAME tf2circle_dredd_pb_test
+ COMMAND
+ "${TARGET_RUNNER}"
+ "${TOOLCHAIN_CONFIG}"
+ ${KEYS}
+)
diff --git a/compiler/tf2circle-dredd-pb-test/README.md b/compiler/tf2circle-dredd-pb-test/README.md
new file mode 100644
index 000000000..65b4cd1fa
--- /dev/null
+++ b/compiler/tf2circle-dredd-pb-test/README.md
@@ -0,0 +1,3 @@
+# tf2circle-dredd-pb-test
+
+TODO write content
diff --git a/compiler/tf2circle-dredd-pb-test/contrib/.gitignore b/compiler/tf2circle-dredd-pb-test/contrib/.gitignore
new file mode 100644
index 000000000..968c34510
--- /dev/null
+++ b/compiler/tf2circle-dredd-pb-test/contrib/.gitignore
@@ -0,0 +1,3 @@
+/*
+# Exclude all except below
+!.gitignore
diff --git a/compiler/tf2circle-dredd-pb-test/requires.cmake b/compiler/tf2circle-dredd-pb-test/requires.cmake
new file mode 100644
index 000000000..0fb6cde85
--- /dev/null
+++ b/compiler/tf2circle-dredd-pb-test/requires.cmake
@@ -0,0 +1,4 @@
+require("tf2circle")
+require("circle-inspect")
+require("circle-verify")
+require("dredd-rule-lib")
diff --git a/compiler/tf2circle-dredd-pb-test/runner.sh b/compiler/tf2circle-dredd-pb-test/runner.sh
new file mode 100755
index 000000000..7c7b123c4
--- /dev/null
+++ b/compiler/tf2circle-dredd-pb-test/runner.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+
+# This script checks circle file generated by tf2circle
+
+# exit if unknown var is used
+set -u
+
+WORKDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+
+# Need at least toolchain.config
+if [[ $# -lt 1 ]]; then
+ echo "USAGE: $0 ..."
+ echo
+ echo "ARGUMENTS:"
+ echo " [toolchain.config path]"
+ echo " [Prefix1]"
+ echo " [Prefix2]"
+ echo " ..."
+ exit 255
+fi
+
+CONFIG_PATH="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found circle-inspect: ${CIRCLE_INSPECT_PATH}"
+echo "-- Found circle-verify: ${CIRCLE_VERIFY_PATH}"
+echo "-- Found tf2circle: ${TF2CIRCLE_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+
+# running each rule file
+
+while [[ $# -ne 0 ]]; do
+ PREFIX="$1"; shift
+
+ echo "[ RUN ] ${PREFIX}"
+
+ TESTED+=("${PREFIX}")
+
+ PASSED_TAG="${PREFIX}.passed"
+
+ rm -f "${PASSED_TAG}"
+
+ cat > "${PREFIX}.log" <(
+ exec 2>&1
+
+ source "${PREFIX}.test"
+
+ echo "-- Use '${MODEL_PB_PATH}', '${MODEL_INFO_PATH}', and '${MODEL_RULE_PATH}'"
+
+ # Exit immediately if any command fails
+ set -e
+ # Show commands
+ set -x
+
+ # Generate circle
+ "${TF2CIRCLE_PATH}" \
+ "${MODEL_INFO_PATH}" \
+ "${MODEL_PB_PATH}" \
+ "${WORKDIR}/${PREFIX}.circle"
+
+ #
+ # Run rule prepared to check circle file
+ #
+
+ # set vars needed by rule file
+ CIRCLE_PATH="${WORKDIR}/${PREFIX}.circle"
+
+ # Note: turn off 'command printing'. Otherwise printing will be so messy
+ set +x
+
+ # set vars required by rule-lib.sh and rule file
+ COMPILED_FILE=${CIRCLE_PATH}
+ INSPECT_PROG_PATH=${CIRCLE_INSPECT_PATH}
+ VERIFY_PROG_PATH=${CIRCLE_VERIFY_PATH}
+ ERROR_LOG="${PREFIX}.error"
+
+ rm -f "${ERROR_LOG}"
+
+ # in case error while running ${MODEL_RULE_PATH}, prints error msg
+ trap 'echo "** ERROR **" ; cat "${ERROR_LOG}"' ERR
+
+ source rule-lib.sh
+ source "${MODEL_RULE_PATH}"
+
+ # unset
+ trap - ERR
+ set -x
+
+ # At this point, the exit code of all commands is 0
+ # If not 0, execution of this script ends because of "set -e"
+ touch "${PASSED_TAG}"
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ echo "[ OK ] ${PREFIX}"
+ PASSED+=("$PREFIX")
+ else
+ echo "[ FAIL] ${PREFIX}"
+ FAILED+=("$PREFIX")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/tf2circle-dredd-pbtxt-test/.gitignore b/compiler/tf2circle-dredd-pbtxt-test/.gitignore
new file mode 100644
index 000000000..8dbfa9012
--- /dev/null
+++ b/compiler/tf2circle-dredd-pbtxt-test/.gitignore
@@ -0,0 +1 @@
+/test.local.lst
diff --git a/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt b/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt
new file mode 100644
index 000000000..789e58535
--- /dev/null
+++ b/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt
@@ -0,0 +1,184 @@
+nnas_include(TargetRequire)
+
+unset(REQUIRED_TARGETS)
+list(APPEND REQUIRED_TARGETS circle-inspect)
+list(APPEND REQUIRED_TARGETS circle-verify)
+list(APPEND REQUIRED_TARGETS tf2circle)
+list(APPEND REQUIRED_TARGETS tfkit)
+list(APPEND REQUIRED_TARGETS dredd_rule_lib)
+TargetRequire_Return(${REQUIRED_TARGETS})
+
+nncc_find_resource(TensorFlowTests)
+
+set(MODEL_REPO "${TensorFlowTests_DIR}") # Where to find text models to test
+
+unset(KEYS)
+unset(DEPS)
+
+#
+# processing models in test.lst and test.local.lst
+#
+# Example)
+#
+# Add(NET_0025 RULE test.rule)
+# -> Read test.pbtxt file under res/TensorFlowTests/NET_0025 and create "NET_0025.circle"
+# Then the circle is tested against rules in test.rule file.
+#
+macro(Add MODEL_DIR)
+
+ set(ARG_OPTION)
+ set(ARG_ONE_VALUE RULE) # rule file name
+ set(ARG_MULTI_VALUE)
+ cmake_parse_arguments(ARG "${ARG_OPTION}" "${ARG_ONE_VALUE}" "${ARG_MULTI_VALUE}" ${ARGN})
+
+ if(NOT ARG_RULE)
+ message( FATAL_ERROR "RULE is mandadatory arg" )
+ endif()
+
+ set(RULE_FILENAME ${ARG_RULE})
+
+ set(TARGET_TESTNAME "${MODEL_DIR}")
+
+ set(MODEL_SOURCE_DIR "${MODEL_REPO}/${MODEL_DIR}")
+
+ set(TXT_SOURCE_PBTXT_PATH "${MODEL_SOURCE_DIR}/test.pbtxt")
+ set(TXT_SOURCE_INFO_PATH "${MODEL_SOURCE_DIR}/test.info")
+ set(TXT_SOURCE_RULE_PATH "${MODEL_SOURCE_DIR}/${RULE_FILENAME}")
+
+ set(TXT_TARGET_PB_PATH "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_TESTNAME}.pb")
+ set(TXT_TARGET_PBTXT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_TESTNAME}.pbtxt")
+ set(TXT_TARGET_INFO_PATH "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_TESTNAME}.info")
+ set(TXT_TARGET_RULE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_TESTNAME}.rule")
+
+ if(NOT EXISTS "${TXT_SOURCE_PBTXT_PATH}")
+ message(FATAL_ERROR "${TXT_SOURCE_PBTXT_PATH} - pbtxt file does not exist")
+ endif(NOT EXISTS "${TXT_SOURCE_PBTXT_PATH}")
+
+ if(NOT EXISTS "${TXT_SOURCE_INFO_PATH}")
+ message(FATAL_ERROR "${TXT_SOURCE_INFO_PATH} - info file does not exist")
+ endif(NOT EXISTS "${TXT_SOURCE_INFO_PATH}")
+
+ if(NOT EXISTS "${TXT_SOURCE_RULE_PATH}")
+ message(FATAL_ERROR "${TXT_SOURCE_RULE_PATH} - rule file does not exist")
+ endif(NOT EXISTS "${TXT_SOURCE_RULE_PATH}")
+
+ # Copy .pbtxt
+ add_custom_command(OUTPUT ${TXT_TARGET_PBTXT_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TXT_SOURCE_PBTXT_PATH}" "${TXT_TARGET_PBTXT_PATH}"
+ DEPENDS ${TXT_SOURCE_PBTXT_PATH}
+ COMMENT "Generate ${TXT_TARGET_PBTXT_PATH}"
+ )
+
+ # Copy .info
+ add_custom_command(OUTPUT ${TXT_TARGET_INFO_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TXT_SOURCE_INFO_PATH}" "${TXT_TARGET_INFO_PATH}"
+ DEPENDS ${TXT_SOURCE_INFO_PATH}
+ COMMENT "Generate ${TXT_TARGET_INFO_PATH}"
+ )
+
+ # Copy .rule
+ add_custom_command(OUTPUT ${TXT_TARGET_RULE_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TXT_SOURCE_RULE_PATH}" "${TXT_TARGET_RULE_PATH}"
+ DEPENDS ${TXT_SOURCE_RULE_PATH}
+ COMMENT "Generate ${TXT_TARGET_RULE_PATH}"
+ )
+
+ # Generate .pb from .pbtxt
+ add_custom_command(OUTPUT ${TXT_TARGET_PB_PATH}
+ COMMAND $<TARGET_FILE:tfkit> encode ${TXT_TARGET_PBTXT_PATH} ${TXT_TARGET_PB_PATH}
+ DEPENDS ${TXT_TARGET_PBTXT_PATH}
+ COMMENT "Generate ${TXT_TARGET_PB_PATH}"
+ )
+
+ # Generate .test file which declares path of target pb, info, rule files
+ # this file is used inside runner.sh
+ set(TEST_CONFIG_FILE "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_TESTNAME}.test")
+
+ add_custom_command(
+ OUTPUT ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E echo 'MODEL_PB_PATH="${TXT_TARGET_PB_PATH}"' >> ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E echo 'MODEL_INFO_PATH="${TXT_TARGET_INFO_PATH}"' >> ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E echo 'MODEL_RULE_PATH="${TXT_TARGET_RULE_PATH}"' >> ${TEST_CONFIG_FILE}
+ DEPENDS
+ ${TXT_TARGET_PB_PATH}
+ ${TXT_TARGET_INFO_PATH}
+ ${TXT_TARGET_RULE_PATH}
+ COMMENT "Generate ${TARGET_TESTNAME} configuration for TXT"
+ )
+
+ list(APPEND DEPS "${TEST_CONFIG_FILE}")
+ list(APPEND KEYS "${TARGET_TESTNAME}")
+
+endmacro(Add)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+#
+# Generate toolchain.config
+#
+set(TOOLCHAIN_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/toolchain.config")
+
+add_custom_command(
+ OUTPUT ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'CIRCLE_INSPECT_PATH=\"$<TARGET_FILE:circle-inspect>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'CIRCLE_VERIFY_PATH=\"$<TARGET_FILE:circle-verify>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TF2CIRCLE_PATH=\"$<TARGET_FILE:tf2circle>\"' >> ${TOOLCHAIN_CONFIG}
+ # add more if new excutable file is needed in runner.sh and rule-lib.sh
+ DEPENDS
+ circle-inspect
+ circle-verify
+ tf2circle
+ COMMENT "Generate toolchin configuration"
+)
+
+list(APPEND DEPS "${TOOLCHAIN_CONFIG}")
+
+#
+# copy runner.sh
+#
+set(SOURCE_RUNNER "${CMAKE_CURRENT_SOURCE_DIR}/runner.sh")
+set(TARGET_RUNNER "${CMAKE_CURRENT_BINARY_DIR}/runner.sh")
+
+add_custom_command(
+ OUTPUT ${TARGET_RUNNER}
+ COMMAND ${CMAKE_COMMAND} -E copy "${SOURCE_RUNNER}" "${TARGET_RUNNER}"
+ DEPENDS ${SOURCE_RUNNER}
+ COMMENT "Generate test runner"
+)
+
+list(APPEND DEPS "${TARGET_RUNNER}")
+
+#
+# copy rule-lib.sh (a library of shell script functions)
+#
+
+# getting path for rule-lib.sh in dredd-rule-lib
+get_target_property(DREDD_RULE_LIB_DIR dredd_rule_lib BINARY_DIR)
+
+set(SOURCE_RULE_LIB "${DREDD_RULE_LIB_DIR}/rule-lib.sh")
+set(TARGET_RULE_LIB "${CMAKE_CURRENT_BINARY_DIR}/rule-lib.sh")
+
+add_custom_command(
+ OUTPUT ${TARGET_RULE_LIB}
+ COMMAND ${CMAKE_COMMAND} -E copy "${SOURCE_RULE_LIB}" "${TARGET_RULE_LIB}"
+ DEPENDS ${SOURCE_RULE_LIB}
+ COMMENT "Generate rule lib"
+)
+
+list(APPEND DEPS "${TARGET_RULE_LIB}")
+
+# Generate dependencies
+add_custom_target(tf2circle_dredd_pbtxt_deps ALL DEPENDS ${DEPS})
+
+add_test(
+ NAME tf2circle_dredd_pbtxt_test
+ COMMAND
+ "${TARGET_RUNNER}"
+ "${TOOLCHAIN_CONFIG}"
+ ${KEYS}
+)
diff --git a/compiler/tf2circle-dredd-pbtxt-test/README.md b/compiler/tf2circle-dredd-pbtxt-test/README.md
new file mode 100644
index 000000000..8eb906185
--- /dev/null
+++ b/compiler/tf2circle-dredd-pbtxt-test/README.md
@@ -0,0 +1,3 @@
+# tf2circle-dredd-pbtxt-test
+
+TODO write content.
diff --git a/compiler/tf2circle-dredd-pbtxt-test/requires.cmake b/compiler/tf2circle-dredd-pbtxt-test/requires.cmake
new file mode 100644
index 000000000..747d8ab3c
--- /dev/null
+++ b/compiler/tf2circle-dredd-pbtxt-test/requires.cmake
@@ -0,0 +1,5 @@
+require("tfkit")
+require("tf2circle")
+require("circle-inspect")
+require("circle-verify")
+require("dredd-rule-lib")
diff --git a/compiler/tf2circle-dredd-pbtxt-test/runner.sh b/compiler/tf2circle-dredd-pbtxt-test/runner.sh
new file mode 100755
index 000000000..7c7b123c4
--- /dev/null
+++ b/compiler/tf2circle-dredd-pbtxt-test/runner.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+
+# This script checks circle file generated by tf2circle
+
+# exit if unknown var is used
+set -u
+
+WORKDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+
+# Need at least toolchain.config
+if [[ $# -lt 1 ]]; then
+ echo "USAGE: $0 ..."
+ echo
+ echo "ARGUMENTS:"
+ echo " [toolchain.config path]"
+ echo " [Prefix1]"
+ echo " [Prefix2]"
+ echo " ..."
+ exit 255
+fi
+
+CONFIG_PATH="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found circle-inspect: ${CIRCLE_INSPECT_PATH}"
+echo "-- Found circle-verify: ${CIRCLE_VERIFY_PATH}"
+echo "-- Found tf2circle: ${TF2CIRCLE_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+
+# running each rule file
+
+while [[ $# -ne 0 ]]; do
+ PREFIX="$1"; shift
+
+ echo "[ RUN ] ${PREFIX}"
+
+ TESTED+=("${PREFIX}")
+
+ PASSED_TAG="${PREFIX}.passed"
+
+ rm -f "${PASSED_TAG}"
+
+ cat > "${PREFIX}.log" <(
+ exec 2>&1
+
+ source "${PREFIX}.test"
+
+ echo "-- Use '${MODEL_PB_PATH}', '${MODEL_INFO_PATH}', and '${MODEL_RULE_PATH}'"
+
+ # Exit immediately if any command fails
+ set -e
+ # Show commands
+ set -x
+
+ # Generate circle
+ "${TF2CIRCLE_PATH}" \
+ "${MODEL_INFO_PATH}" \
+ "${MODEL_PB_PATH}" \
+ "${WORKDIR}/${PREFIX}.circle"
+
+ #
+ # Run rule prepared to check circle file
+ #
+
+ # set vars needed by rule file
+ CIRCLE_PATH="${WORKDIR}/${PREFIX}.circle"
+
+ # Note: turn off 'command printing'. Otherwise printing will be so messy
+ set +x
+
+ # set vars required by rule-lib.sh and rule file
+ COMPILED_FILE=${CIRCLE_PATH}
+ INSPECT_PROG_PATH=${CIRCLE_INSPECT_PATH}
+ VERIFY_PROG_PATH=${CIRCLE_VERIFY_PATH}
+ ERROR_LOG="${PREFIX}.error"
+
+ rm -f "${ERROR_LOG}"
+
+ # in case error while running ${MODEL_RULE_PATH}, prints error msg
+ trap 'echo "** ERROR **" ; cat "${ERROR_LOG}"' ERR
+
+ source rule-lib.sh
+ source "${MODEL_RULE_PATH}"
+
+ # unset
+ trap - ERR
+ set -x
+
+ # At this point, the exit code of all commands is 0
+ # If not 0, execution of this script ends because of "set -e"
+ touch "${PASSED_TAG}"
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ echo "[ OK ] ${PREFIX}"
+ PASSED+=("$PREFIX")
+ else
+ echo "[ FAIL] ${PREFIX}"
+ FAILED+=("$PREFIX")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/tf2circle-dredd-pbtxt-test/test.lst b/compiler/tf2circle-dredd-pbtxt-test/test.lst
new file mode 100644
index 000000000..51bc4f21e
--- /dev/null
+++ b/compiler/tf2circle-dredd-pbtxt-test/test.lst
@@ -0,0 +1,4 @@
+# TODO add tests like the following:
+# Add(NET_0030 RULE circle_1.0_rel_requirement.rule) # Concat
+Add(NET_0028 RULE circle_1.0_rel_requirement.rule) # Instance Norm
+Add(UNIT_SquaredDifference_000 RULE circle.rule)
diff --git a/compiler/tf2circle-model-test/.gitignore b/compiler/tf2circle-model-test/.gitignore
new file mode 100644
index 000000000..23c7c1bb3
--- /dev/null
+++ b/compiler/tf2circle-model-test/.gitignore
@@ -0,0 +1 @@
+/contrib.lst
diff --git a/compiler/tf2circle-model-test/CMakeLists.txt b/compiler/tf2circle-model-test/CMakeLists.txt
new file mode 100644
index 000000000..2fb82236a
--- /dev/null
+++ b/compiler/tf2circle-model-test/CMakeLists.txt
@@ -0,0 +1,110 @@
+nnas_include(TargetRequire)
+
+unset(REQUIRED_TARGETS)
+list(APPEND REQUIRED_TARGETS tf2circle)
+list(APPEND REQUIRED_TARGETS tfkit)
+TargetRequire_Return(${REQUIRED_TARGETS})
+
+unset(KEYS)
+unset(DEPS)
+
+###
+### Add "Contrib" tests
+###
+macro(Add PREFIX)
+ # Let's use CONTRIB prefix to avoid name conflicts with official models
+ set(TEST_KEY "CONTRIB.${PREFIX}")
+
+ set(PACKAGE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/contrib/${PREFIX}")
+
+ set(MODEL_DOWNLOAD_SCRIPT "${PACKAGE_DIR}/model.download")
+ set(MODEL_PB_FILE "${PACKAGE_DIR}/model.pb")
+ set(MODEL_INFO_FILE "${PACKAGE_DIR}/model.info")
+ set(MODEL_MD5SUM_FILE "${PACKAGE_DIR}/model.md5sum")
+
+ # Try to download a model if it is missing
+ if(NOT EXISTS "${MODEL_PB_FILE}")
+ # TODO Extract this routine as a helper function
+ if(NOT EXISTS "${MODEL_DOWNLOAD_SCRIPT}")
+ message(FATAL_ERROR "${TEST_KEY} - Download script is missing")
+ endif(NOT EXISTS "${MODEL_DOWNLOAD_SCRIPT}")
+
+ execute_process(
+ COMMAND ${CMAKE_COMMAND} -D OUTPUT_PATH=${MODEL_PB_FILE} -P "${MODEL_DOWNLOAD_SCRIPT}"
+ RESULT_VARIABLE EXITCODE
+ )
+
+ if(NOT EXITCODE EQUAL 0)
+ message(FATAL_ERROR "${TEST_KEY} - Download fails")
+ endif(NOT EXITCODE EQUAL 0)
+ endif()
+
+ if(EXISTS "${MODEL_MD5SUM_FILE}")
+ # TODO Extract this routine as a helper function
+ file(STRINGS "${MODEL_MD5SUM_FILE}" EXPECTED_MD5SUM)
+ file(MD5 "${MODEL_PB_FILE}" OBTAINED_MD5SUM)
+
+ if(NOT "${EXPECTED_MD5SUM}" STREQUAL "${OBTAINED_MD5SUM}")
+ message(FATAL_ERROR "${TEST_KEY} - Checksum mismatches")
+ endif()
+ endif()
+
+ # Generate .test file which declares MODEL_PB_PATH and MODEL_INFO_PATH
+ set(TEST_CONFIG_FILE "${CMAKE_CURRENT_BINARY_DIR}/${TEST_KEY}.test")
+
+ add_custom_command(
+ OUTPUT ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E echo 'MODEL_PB_PATH="${MODEL_PB_FILE}"' >> ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E echo 'MODEL_INFO_PATH="${MODEL_INFO_FILE}"' >> ${TEST_CONFIG_FILE}
+ COMMENT "Generate ${TEST_KEY} configuration"
+ )
+
+ list(APPEND KEYS "${TEST_KEY}")
+ list(APPEND DEPS "${TEST_CONFIG_FILE}")
+endmacro(Add)
+
+include(contrib.lst OPTIONAL)
+
+###
+### Generate toolchain.config
+###
+set(TOOLCHAIN_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/toolchain.config")
+
+add_custom_command(
+ OUTPUT ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TF2CIRCLE_PATH=\"$<TARGET_FILE:tf2circle>\"' >> ${TOOLCHAIN_CONFIG}
+ DEPENDS
+ tf2circle
+ COMMENT "Generate toolchin configuration"
+)
+
+list(APPEND DEPS "${TOOLCHAIN_CONFIG}")
+
+##
+## Generate test runner
+##
+set(TEST_RUNNER_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/runner.sh")
+set(TEST_RUNNER_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/run")
+
+add_custom_command(
+ OUTPUT ${TEST_RUNNER_SCRIPT}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TEST_RUNNER_SOURCE}" "${TEST_RUNNER_SCRIPT}"
+ DEPENDS ${TEST_RUNNER_SOURCE}
+ COMMENT "Generate test runner"
+)
+
+list(APPEND DEPS "${TEST_RUNNER_SCRIPT}")
+
+### Generate dependencies
+add_custom_target(tf2circle_model_test_deps ALL DEPENDS ${DEPS})
+
+# NOTE This target is not built by default
+add_test(
+ NAME tf2circle_model_test
+ COMMAND
+ "${TEST_RUNNER_SCRIPT}"
+ "${TOOLCHAIN_CONFIG}"
+ ${KEYS}
+)
diff --git a/compiler/tf2circle-model-test/README.md b/compiler/tf2circle-model-test/README.md
new file mode 100644
index 000000000..fb5343700
--- /dev/null
+++ b/compiler/tf2circle-model-test/README.md
@@ -0,0 +1 @@
+# tf2circle-model-test
diff --git a/compiler/tf2circle-model-test/contrib/.gitignore b/compiler/tf2circle-model-test/contrib/.gitignore
new file mode 100644
index 000000000..968c34510
--- /dev/null
+++ b/compiler/tf2circle-model-test/contrib/.gitignore
@@ -0,0 +1,3 @@
+/*
+# Exclude all except below
+!.gitignore
diff --git a/compiler/tf2circle-model-test/requires.cmake b/compiler/tf2circle-model-test/requires.cmake
new file mode 100644
index 000000000..b1f872669
--- /dev/null
+++ b/compiler/tf2circle-model-test/requires.cmake
@@ -0,0 +1,2 @@
+require("tf2circle")
+require("tfkit")
diff --git a/compiler/tf2circle-model-test/runner.sh b/compiler/tf2circle-model-test/runner.sh
new file mode 100755
index 000000000..9e3b75c26
--- /dev/null
+++ b/compiler/tf2circle-model-test/runner.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+WORKDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+
+# Need at least toolchain.config
+if [[ $# -lt 1 ]]; then
+ echo "USAGE: $0 ..."
+ echo
+ echo "ARGUMENTS:"
+ echo " [toolchain.config path]"
+ echo " [Prefix1]"
+ echo " [Prefix2]"
+ echo " ..."
+ exit 255
+fi
+
+CONFIG_PATH="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found TF2CIRCLE: ${TF2CIRCLE_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [[ $# -ne 0 ]]; do
+ PREFIX="$1"; shift
+
+ echo "[ RUN ] ${PREFIX}"
+
+ TESTED+=("${PREFIX}")
+
+ PASSED_TAG="${PREFIX}.passed"
+
+ rm -f "${PASSED_TAG}"
+
+ cat > "${PREFIX}.log" <(
+ exec 2>&1
+
+ source "${PREFIX}.test"
+
+ echo "-- Use '${MODEL_PB_PATH}' and '${MODEL_INFO_PATH}'"
+
+ # Exit immediately if any command fails
+ set -e
+ # Show commands
+ set -x
+
+ # Generate circle
+ "${TF2CIRCLE_PATH}" \
+ "${MODEL_INFO_PATH}" \
+ "${MODEL_PB_PATH}" \
+ "${WORKDIR}/${PREFIX}.circle"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ echo "[ OK ] ${PREFIX}"
+ PASSED+=("$PREFIX")
+ else
+ echo "[ FAIL] ${PREFIX}"
+ FAILED+=("$PREFIX")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/tf2circle-ui-check/.gitignore b/compiler/tf2circle-ui-check/.gitignore
new file mode 100644
index 000000000..a178c13cd
--- /dev/null
+++ b/compiler/tf2circle-ui-check/.gitignore
@@ -0,0 +1 @@
+/test.lst
diff --git a/compiler/tf2circle-ui-check/CMakeLists.txt b/compiler/tf2circle-ui-check/CMakeLists.txt
new file mode 100644
index 000000000..863f1cf70
--- /dev/null
+++ b/compiler/tf2circle-ui-check/CMakeLists.txt
@@ -0,0 +1,44 @@
+nnas_include(TargetRequire)
+
+unset(REQUIRED_TARGETS)
+list(APPEND REQUIRED_TARGETS tfkit)
+list(APPEND REQUIRED_TARGETS tf2circle)
+TargetRequire_Return(${REQUIRED_TARGETS})
+
+nncc_find_resource(TensorFlowTests)
+
+message(STATUS "Build tf2circle-ui-check: TRUE")
+unset(TESTCASES)
+
+macro(Add NAME)
+ list(APPEND TESTCASES "${NAME}")
+endmacro(Add)
+
+include("test.lst" OPTIONAL)
+
+##
+## Generate configuration
+##
+set(CONFIG_PATH "${CMAKE_CURRENT_BINARY_DIR}/check.config")
+
+add_custom_command(
+ OUTPUT ${CONFIG_PATH}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${CONFIG_PATH}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TESTCASE_BASE="${TensorFlowTests_DIR}"' >> ${CONFIG_PATH}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TFKIT_PATH=\"$<TARGET_FILE:tfkit>\"' >> ${CONFIG_PATH}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TF2CIRCLE_PATH=\"$<TARGET_FILE:tf2circle>\"' >> ${CONFIG_PATH}
+ DEPENDS
+ tfkit
+ tf2circle
+ COMMENT "Generate configuration"
+)
+
+## NOTE tf2circle_ui_check is not built by default
+add_custom_target(tf2circle_ui_check
+ COMMAND
+ "${CMAKE_CURRENT_SOURCE_DIR}/checkall.sh"
+ "${CONFIG_PATH}"
+ ${TESTCASES}
+ DEPENDS "${CONFIG_PATH}"
+ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+)
diff --git a/compiler/tf2circle-ui-check/README.md b/compiler/tf2circle-ui-check/README.md
new file mode 100644
index 000000000..aea870e6f
--- /dev/null
+++ b/compiler/tf2circle-ui-check/README.md
@@ -0,0 +1,21 @@
+# tf2circle-ui-check
+
+tf2circle-ui-check makes it easy to check what ``tf2circle`` shows for selected TensorFlow testcases.
+
+## HOW TO USE
+
+First of all, create "test.lst" file and add tests of interest. Here is an example of "test.lst"
+```
+Add(NET_0000)
+Add(NET_0001)
+```
+
+Run "nncc configure". You may find the below messages if ``tf2circle-ui-check`` is configured properly:
+```
+-- Configure TF2CIRCLE-UI-CHECK
+-- Build tf2circle-ui-check: TRUE
+-- Configure TF2CIRCLE-UI-CHECK - Done
+```
+
+Finally, build ``tf2circle_ui_check`` target and see what happens!
+If CMake uses "make" as a generator, you may build ``tf2circle_ui_check`` target via running ``./nncc build tf2circle_ui_check``.
diff --git a/compiler/tf2circle-ui-check/checkall.sh b/compiler/tf2circle-ui-check/checkall.sh
new file mode 100755
index 000000000..447bf041e
--- /dev/null
+++ b/compiler/tf2circle-ui-check/checkall.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+# USAGE: check_all.sh [CONFIG] [TEST 1] [TEST 2] ...
+CONFIG_PATH="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "###"
+echo "### tf2circle UI check"
+echo "###"
+echo
+
+echo "Get each test from '${TESTCASE_BASE}/'"
+echo "Use tfkit at '${TFKIT_PATH}'"
+echo "Use tf2circle at '${TF2CIRCLE_PATH}'"
+echo
+
+while [[ $# -ne 0 ]]; do
+ NAME="$1"; shift
+ TESTCASE_DIR="${TESTCASE_BASE}/${NAME}"
+
+ INFO_FILE="${TESTCASE_DIR}/test.info"
+ PBTXT_FILE="${TESTCASE_DIR}/test.pbtxt"
+ MANIFEST_FILE="${TESTCASE_DIR}/test.manifest"
+
+ PB_FILE="${NAME}.pb"
+
+
+ echo "Running '${NAME}'"
+ if [[ -f ${MANIFEST_FILE} ]]; then
+ # TODO Only dump SUMMARY
+ cat ${MANIFEST_FILE}
+ fi
+ echo
+
+ # Create a pb model
+ "${TFKIT_PATH}" encode "${PBTXT_FILE}" "${PB_FILE}"
+
+ echo "OUTPUT:"
+ echo "---------------------------------------------------------"
+ # Generate circle
+ "${TF2CIRCLE_PATH}" "${INFO_FILE}" "${PB_FILE}" "${NAME}.circle"
+ EXITCODE=$?
+ echo "---------------------------------------------------------"
+
+ echo
+ echo "EXITCODE: ${EXITCODE}"
+
+ echo "Running '${NAME}' - Done"
+done
+
+echo
+echo "###"
+echo "### tf2circle UI check (done)"
+echo "###"
+
+exit 0
diff --git a/compiler/tf2circle-ui-check/requires.cmake b/compiler/tf2circle-ui-check/requires.cmake
new file mode 100644
index 000000000..2f87c6a91
--- /dev/null
+++ b/compiler/tf2circle-ui-check/requires.cmake
@@ -0,0 +1,2 @@
+require("tfkit")
+require("tf2circle")
diff --git a/compiler/tf2circle-value-pbtxt-remote-test/.gitignore b/compiler/tf2circle-value-pbtxt-remote-test/.gitignore
new file mode 100644
index 000000000..a178c13cd
--- /dev/null
+++ b/compiler/tf2circle-value-pbtxt-remote-test/.gitignore
@@ -0,0 +1 @@
+/test.lst
diff --git a/compiler/tf2circle-value-pbtxt-remote-test/CMakeLists.txt b/compiler/tf2circle-value-pbtxt-remote-test/CMakeLists.txt
new file mode 100644
index 000000000..f5292a0d1
--- /dev/null
+++ b/compiler/tf2circle-value-pbtxt-remote-test/CMakeLists.txt
@@ -0,0 +1,170 @@
+nnas_include(TargetRequire)
+
+unset(REQUIRED_TARGETS)
+list(APPEND REQUIRED_TARGETS tf2circle)
+list(APPEND REQUIRED_TARGETS tfkit)
+list(APPEND REQUIRED_TARGETS nnkit-run)
+list(APPEND REQUIRED_TARGETS nnkit_tf_backend)
+list(APPEND REQUIRED_TARGETS nnkit_randomize_action)
+list(APPEND REQUIRED_TARGETS nnkit_HDF5_export_action)
+list(APPEND REQUIRED_TARGETS nnkit_HDF5_import_action)
+TargetRequire_Return(${REQUIRED_TARGETS})
+
+message(STATUS "tf2circle-value_pbtxt-remote-test: run tests")
+
+nncc_find_resource(TensorFlowTests)
+
+#
+# Copy [PREFIX]/test.pbtxt to PREFIX.pbtxt in binary folder
+# Copy [PREFIX]/test.info to PREFIX.info in binary folder
+# Copy [PREFIX]/customop.conf to PREFIX.customop.conf in binary folder
+# Encode PREFIX.pbtxt to PREFIX.pb
+#
+set(TEST_REPO "${TensorFlowTests_DIR}")
+set(TEST_PBTXT_FILENAME "test.pbtxt")
+set(TEST_INFO_FILENAME "test.info")
+set(TEST_CUSTOMOP_CONF_FILENAME "customop.conf")
+
+unset(TESTCASES)
+
+macro(add NAME)
+ list(APPEND TESTCASES ${NAME})
+endmacro(add)
+
+# Read "test.lst" if exists
+include("test.lst" OPTIONAL)
+
+# Do not make test if there are no remote machine information
+if(NOT REMOTE_IP)
+ return()
+endif(NOT REMOTE_IP)
+
+if(NOT REMOTE_USER)
+ return()
+endif(NOT REMOTE_USER)
+
+# Do not make test if there are no testcases
+if(NOT TESTCASES)
+ return()
+endif(NOT TESTCASES)
+
+unset(TEST_DEPS)
+unset(TEST_NAMES)
+
+foreach(PREFIX IN ITEMS ${TESTCASES})
+ if(NOT IS_DIRECTORY "${TEST_REPO}/${PREFIX}")
+ message(FATAL_ERROR "Missing '${PREFIX}' test")
+ endif()
+
+ set(PBTXT_SOURCE_PATH "${TEST_REPO}/${PREFIX}/${TEST_PBTXT_FILENAME}")
+ set(INFO_SOURCE_PATH "${TEST_REPO}/${PREFIX}/${TEST_INFO_FILENAME}")
+ set(CUSTOMOP_CONF_SOURCE_PATH "${TEST_REPO}/${PREFIX}/${TEST_CUSTOMOP_CONF_FILENAME}")
+
+ set(PBTXT_FILE "${PREFIX}.pbtxt")
+ set(PBTXT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PBTXT_FILE}")
+
+ set(INFO_FILE "${PREFIX}.info")
+ set(INFO_PATH "${CMAKE_CURRENT_BINARY_DIR}/${INFO_FILE}")
+
+ set(CUSTOMOP_CONF_FILE "${PREFIX}.${TEST_CUSTOMOP_CONF_FILENAME}") # ex) CustomOp_001.customop.conf
+ set(CUSTOMOP_CONF_PATH "${CMAKE_CURRENT_BINARY_DIR}/${CUSTOMOP_CONF_FILE}")
+
+ set(PB_FILE "${PREFIX}.pb")
+ set(PB_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PB_FILE}")
+
+ # Copy .pbtxt
+ add_custom_command(OUTPUT ${PBTXT_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${PBTXT_SOURCE_PATH}" "${PBTXT_PATH}"
+ DEPENDS ${PBTXT_SOURCE_PATH}
+ COMMENT "Generate ${PBTXT_FILE}"
+ )
+
+ # Copy .info
+ add_custom_command(OUTPUT ${INFO_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${INFO_SOURCE_PATH}" "${INFO_PATH}"
+ DEPENDS ${INFO_SOURCE_PATH}
+ COMMENT "Generate ${INFO_FILE}"
+ )
+
+ # Generate .pb from .pbtxt
+ add_custom_command(OUTPUT ${PB_PATH}
+ COMMAND $<TARGET_FILE:tfkit> encode ${PBTXT_PATH} ${PB_PATH}
+ DEPENDS ${PBTXT_PATH}
+ COMMENT "Generate ${PB_FILE}"
+ )
+
+ list(APPEND TEST_DEPS ${INFO_PATH} ${PB_PATH})
+
+ if (EXISTS "${CUSTOMOP_CONF_SOURCE_PATH}")
+
+ # Copy customop.conf
+ add_custom_command(OUTPUT ${CUSTOMOP_CONF_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${CUSTOMOP_CONF_SOURCE_PATH}" "${CUSTOMOP_CONF_PATH}"
+ DEPENDS ${CUSTOMOP_CONF_SOURCE_PATH}
+ COMMENT "Generate ${CUSTOMOP_CONF_FILE}"
+ )
+
+ list(APPEND TEST_DEPS ${CUSTOMOP_CONF_PATH})
+
+ endif (EXISTS "${CUSTOMOP_CONF_SOURCE_PATH}")
+
+ list(APPEND TEST_NAMES ${PREFIX})
+endforeach(PREFIX)
+
+##
+## Copy testall
+##
+set(TEST_RUNNER "${CMAKE_CURRENT_BINARY_DIR}/testall.sh")
+set(TEST_RUNNER_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh")
+
+add_custom_command(
+ OUTPUT ${TEST_RUNNER}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TEST_RUNNER_SOURCE}" "${TEST_RUNNER}"
+ DEPENDS ${TEST_RUNNER_SOURCE}
+ COMMENT "Generate test runner"
+)
+
+list(APPEND TEST_DEPS "${TEST_RUNNER}")
+
+###
+### Generate test.config
+###
+set(TEST_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/test.config")
+
+add_custom_command(
+ OUTPUT ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'NNKIT_RUN_PATH=\"$<TARGET_FILE:nnkit-run>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TF_BACKEND_PATH=\"$<TARGET_FILE:nnkit_tf_backend>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TF2CIRCLE_PATH=\"$<TARGET_FILE:tf2circle>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'RANDOMIZE_ACTION_PATH=\"$<TARGET_FILE:nnkit_randomize_action>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_EXPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_export_action>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_IMPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_import_action>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'MODEL2NNPKG_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'NNPKG_TEST_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/nnpkg_test/nnpkg_test.sh\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'RUNTIME_LIBRARY_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/Product/out/\"' >> ${TEST_CONFIG}
+ DEPENDS
+ nnkit-run
+ nnkit_tf_backend
+ tf2circle
+ nnkit_randomize_action
+ nnkit_HDF5_export_action
+ nnkit_HDF5_import_action
+ COMMENT "Generate test configuration"
+)
+
+list(APPEND TEST_DEPS "${TEST_CONFIG}")
+
+# This "tf2circle_value_pbtxt_remote_test_deps" target enforces CMake to generate all the dependencies during "build" phase
+add_custom_target(tf2circle_value_pbtxt_remote_test_deps ALL DEPENDS ${TEST_DEPS})
+
+# Run tests
+add_test(
+ NAME tf2circle_value_pbtxt_remote_test
+ COMMAND "${TEST_RUNNER}"
+ "${TEST_CONFIG}"
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ "${REMOTE_IP}"
+ "${REMOTE_USER}"
+ ${TEST_NAMES}
+)
diff --git a/compiler/tf2circle-value-pbtxt-remote-test/README.md b/compiler/tf2circle-value-pbtxt-remote-test/README.md
new file mode 100644
index 000000000..5546cc879
--- /dev/null
+++ b/compiler/tf2circle-value-pbtxt-remote-test/README.md
@@ -0,0 +1,138 @@
+# tf2circle-value-pbtxt-remote-test
+
+`tf2circle-value-pbtxt-remote-test` does random value test for `.circle` file using remote machine, normally Odroid, which `nnfw` runs on.
+
+### Prerequisites
+
+1. Tensorflow library
+ - Make sure that Tensorflow library could be found at `nncc configure` step. If there is no Tensorflow library, this test will not be created.
+ - If CMake reports TensorFlow library is not found in configure step, even when the library exists, set [`TENSORFLOW_PREFIX`](../../infra/cmake/packages/TensorFlowConfig.cmake#1) to include Tensorflow library like below.
+ ```sh
+ $ ./nncc configure -DTENSORFLOW_PREFIX=/path/to/Tensorflow/library
+ ```
+ - `TENSORFLOW_PREFIX` should contain Tensorflow library as shown below.
+ ```
+ TENSORFLOW_PREFIX
+ ├ include
+ | ├ tensorflow
+ | | └ c
+ | | ├ c_api.h
+ | ├ ...
+ |
+ ├ lib
+ | ├ libtensorflow.so
+ | ├ ...
+ ├ ...
+ ```
+1. Runtime Library and Binary files
+ - Detailed information is located in [here](../../docs/nnfw/howto/CrossBuildForArm.md)
+ - If you build runtime, related files will be produced in `Product/out`. Do not rename or move it.
+ - (TBD) Support native build option
+1. Remote machine information and test list
+ - You should create `test.lst` file first as shown below.
+ - Set IP address and username of remote machine using `set` command.
+ - Add Tensorflow models which you want to verify, which are in `/res/TensorflowTests/`
+ ```cmake
+ #--------------- Remote Machine Setting ---------------#
+ set(REMOTE_IP "xxx.xxx.xxx.xxx")
+ set(REMOTE_USER "remote_username")
+
+ #--------------------- Tests list ---------------------#
+ add(UNIT_Add_000)
+ add(UNIT_Add_001)
+ ...
+ ```
+ - If any Tensorflow model is added, or if `REMOTE_IP` and `REMOTE_USER` is not given, `tf2circle-value-pbtxt-remote-test` will not be created.
+1. (Optional) ssh authentication
+ - This test uses `ssh` and `scp` commands, and those commands require a password of remote machine whenever they are called. This means that you should enter the password everytime when `ssh` and `scp` require.
+ - This test resolves the problem by using `ssh-copy-id`, which copies the public key of host machine to `authorized_keys` of remote machine. Because of that, this test will ask the password of remote machine only once, at the first time. This is the only user interaction while running this test.
+ - If you do not want to interact with system, just do `ssh-copy-id ${REMOTE_USER}@${REMOTE_IP}` in advance, before running this test. Once `ssh-copy-id` is done, there will be no user-interaction action while running the test.
+
+### Running
+
+- If you finished prerequisites properly, configuring -> building -> testing steps create cmake test automatically.
+- All the related materials will be sent to `REMOTE_WORKDIR` in remote machine. Default value of `REMOTE_WORKDIR` is `CVT_YYMMDD_hhmmss`, which means Circle Value Test done on YY/MM/DD at hh:mm:ss.
+- `REMOTE_WORKDIR` will not be removed automatically after this test finish.
+ ```sh
+ $ ./nncc configure && ./nncc build
+
+ # Default REMOTE_WORKDIR is CVT_YYMMDD_hhmmss folder
+ $ ./nncc test -R tf2circle_value_pbtxt_remote_test
+
+ # You can set REMOTE_WORKDIR where you have write privilege
+ $ REMOTE_WORKDIR=/path/you/want/ ./nncc test -R tf2circle_value_pbtxt_remote_test
+ ```
+
+### Generated Files While Running
+
+- All related files(`pb`, `circle`, `h5` ... etc.) are created in `build/compiler/tf2circle-value-pbtxt-remote-test` folder.
+ ```
+ build/compiler/tf2circle-value-pbtxt-remote-test
+ ├ Result_latest -> Result_YYMMDD_hhmmss.csv
+ ├ Result_YYMMDD_hhmmss.csv
+ ├ ...
+ |
+ ├ UNIT_Add_000
+ | ├ metadata
+ | | ├ MANIFEST
+ | | └ tc
+ | | ├ expected.h5
+ | | └ input.h5
+ | └ UNIT_Add_000.circle
+ |
+ ├ UNIT_Add_000.circle
+ ├ UNIT_Add_000.expected.h5
+ ├ UNIT_Add_000.info
+ ├ UNIT_Add_000.input.h5
+ ├ UNIT_Add_000.log
+ ├ UNIT_Add_000.passed
+ ├ UNIT_Add_000.pb
+ ├ UNIT_Add_000.pbtxt
+ |
+ ├ ...
+ ```
+- `nnpkg_test.sh`, runtime products and each nnpackage are sent to `REMOTE_WORKDIR` in remote machine.
+- (TBD) Modify script not to remove obtained h5 file.
+ ```
+ REMOTE_WORKDIR
+ ├ nnpkg_test.sh
+ |
+ ├ Product
+ | └ out
+ | ├ bin
+ | ├ lib
+ | ├ ...
+ |
+ ├ UNIT_Add_000
+ | ├ metadata
+ | | ├ MANIFEST
+ | | └ tc
+ | | ├ expected.h5
+ | | ├ input.h5
+ | | └ UNIT_Add_000.out.h5
+ | | (Only when comparing with expected.h5 fails)
+ | |
+ | └ UNIT_Add_000.circle
+ ├ ...
+ ```
+
+### Check Test Result
+
+- Summary of test result will be created as csv file in host.
+ ```sh
+ # Result_latest is symbolic link to the latest csv result file
+ # Print the latest test result
+ $ cat build/compiler/tf2circle-value-pbtxt-remote-test/Result_latest
+ TEST_NAME, TF2CIRCLE, CIRCLE_VALUE_TEST
+ UNIT_Add_000, TRUE, TRUE
+ ...
+
+ # List all result csv files
+ $ ls build/compiler/tf2circle-value-pbtxt-remote-test/Result_*.csv
+ Result_20191119_212521.csv
+ ...
+ ```
+- Detailed log file for each test cases is also created.
+ ```sh
+ $ cat build/compiler/tf2circle-value-pbtxt-remote-test/*.log
+ ```
diff --git a/compiler/tf2circle-value-pbtxt-remote-test/requires.cmake b/compiler/tf2circle-value-pbtxt-remote-test/requires.cmake
new file mode 100644
index 000000000..8d05cb50b
--- /dev/null
+++ b/compiler/tf2circle-value-pbtxt-remote-test/requires.cmake
@@ -0,0 +1,3 @@
+require("tf2circle")
+require("nnkit")
+require("tfkit")
diff --git a/compiler/tf2circle-value-pbtxt-remote-test/testall.sh b/compiler/tf2circle-value-pbtxt-remote-test/testall.sh
new file mode 100755
index 000000000..ca6fb49c8
--- /dev/null
+++ b/compiler/tf2circle-value-pbtxt-remote-test/testall.sh
@@ -0,0 +1,161 @@
+#!/bin/bash
+
+# Need at least 4 arguments
+if [[ $# -lt 4 ]]; then
+ echo "USAGE: $0 ..."
+ echo
+ echo "ARGUMENTS:"
+ echo " [test.config path]"
+ echo " [WORKDIR]"
+ echo " [REMOTE_IP]"
+ echo " [REMOTE_USER]"
+ echo " [Prefix1]"
+ echo " [Prefix2]"
+ echo " ..."
+ exit 255
+fi
+
+CONFIG_PATH="$1"; shift
+WORKDIR="$1"; shift
+REMOTE_IP="$1"; shift
+REMOTE_USER="$1"; shift
+
+CURRENT_DATETIME=$(date +'%Y%m%d_%H%M%S')
+REMOTE_WORKDIR=${REMOTE_WORKDIR:-"CVT_${CURRENT_DATETIME}"}
+RESULT_CSV="${WORKDIR}/Result_${CURRENT_DATETIME}.csv"
+
+source "${CONFIG_PATH}"
+
+echo "-- Found nnkit-run: ${NNKIT_RUN_PATH}"
+echo "-- Found TF backend: ${TF_BACKEND_PATH}"
+echo "-- Found TF2CIRCLE: ${TF2CIRCLE_PATH}"
+echo "-- Found MODEL2NNPKG: ${MODEL2NNPKG_PATH}"
+echo "-- Found nnpkg_test: ${NNPKG_TEST_PATH}"
+echo "-- Found Runtime library: ${RUNTIME_LIBRARY_PATH}"
+echo "-- Found randomize action: ${RANDOMIZE_ACTION_PATH}"
+echo "-- Found HDF5 export action: ${HDF5_EXPORT_ACTION_PATH}"
+echo "-- Found HDF5 import action: ${HDF5_IMPORT_ACTION_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+if [ -z ${MODEL2NNPKG_PATH} ] || [ ! -f ${MODEL2NNPKG_PATH} ]; then
+ echo "MODEL2NNPKG is not found"
+ exit 3
+fi
+
+if [ -z ${NNPKG_TEST_PATH} ] || [ ! -f ${NNPKG_TEST_PATH} ]; then
+ echo "nnpkg_test is not found"
+ exit 4
+fi
+
+# Register remote machine ssh information
+cat /dev/zero | ssh-keygen -q -N ""
+ssh-copy-id -o ConnectTimeout=5 "${REMOTE_USER}@${REMOTE_IP}"
+
+# Odroid IP address validation
+if [[ $? -ne 0 ]]; then
+ echo "Cannot reach to given remote machine. Check IP address or username."
+ exit 5
+fi
+
+# Send runtime library files
+ssh "${REMOTE_USER}@${REMOTE_IP}" "mkdir -p ${REMOTE_WORKDIR}/Product/"
+scp -r "${RUNTIME_LIBRARY_PATH}" "${REMOTE_USER}@${REMOTE_IP}:${REMOTE_WORKDIR}/Product/"
+
+# Send nnpkg_test.sh
+scp "${NNPKG_TEST_PATH}" "${REMOTE_USER}@${REMOTE_IP}:${REMOTE_WORKDIR}/"
+
+TESTED=()
+PASSED=()
+FAILED=()
+echo "TEST_NAME, TF2CIRCLE, CIRCLE_VALUE_TEST" >> ${RESULT_CSV}
+
+pushd "${WORKDIR}"
+while [[ $# -ne 0 ]]; do
+ PREFIX="$1"; shift
+
+ TESTED+=("${PREFIX}")
+
+ PASSED_TAG="${PREFIX}.passed"
+
+ rm -f "${PASSED_TAG}" "${PREFIX}.circle"
+
+ # Information to be recorded
+ TF2CIRCLE_PASSED=FALSE
+ CIRCLE_VALUE_PASSED=FALSE
+
+ cat > "${PREFIX}.log" <(
+ exec 2>&1
+
+ echo "-- Found pb: ${PREFIX}.pb"
+
+ # Exit immediately if any command fails
+ set -e
+ # Show commands
+ set -x
+
+ # Generate circle
+ "${TF2CIRCLE_PATH}" \
+ "${WORKDIR}/${PREFIX}.info" \
+ "${WORKDIR}/${PREFIX}.pb" \
+ "${WORKDIR}/${PREFIX}.circle"
+
+ # Run TensorFlow
+ "${NNKIT_RUN_PATH}" \
+ --backend "${TF_BACKEND_PATH}" \
+ --backend-arg "${WORKDIR}/${PREFIX}.pb" \
+ --backend-arg "${WORKDIR}/${PREFIX}.info" \
+ --pre "${RANDOMIZE_ACTION_PATH}" \
+ --pre "${HDF5_EXPORT_ACTION_PATH}" \
+ --pre-arg "${WORKDIR}/${PREFIX}.input.h5" \
+ --post "${HDF5_EXPORT_ACTION_PATH}" \
+ --post-arg "${WORKDIR}/${PREFIX}.expected.h5"
+
+ # Generate nnpackage model
+ "${MODEL2NNPKG_PATH}" -o "${WORKDIR}" "${WORKDIR}/${PREFIX}.circle"
+
+ # Copy h5 files into nnpackage
+ mkdir -p "${WORKDIR}/${PREFIX}/metadata/tc"
+ cp "${WORKDIR}/${PREFIX}.input.h5" "${WORKDIR}/${PREFIX}/metadata/tc/input.h5"
+ cp "${WORKDIR}/${PREFIX}.expected.h5" "${WORKDIR}/${PREFIX}/metadata/tc/expected.h5"
+
+ # Run test_arm_nnpkg in remote machine
+ scp -r "${WORKDIR}/${PREFIX}/" "${REMOTE_USER}@${REMOTE_IP}:${REMOTE_WORKDIR}/${PREFIX}/"
+ ssh "${REMOTE_USER}@${REMOTE_IP}" "cd ${REMOTE_WORKDIR}; ./nnpkg_test.sh -i . -o ${PREFIX}/metadata/tc ${PREFIX}"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PREFIX}.circle" ]]; then
+ TF2CIRCLE_PASSED=TRUE
+ else
+ TF2CIRCLE_PASSED=FALSE
+ fi
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("$PREFIX")
+ CIRCLE_VALUE_PASSED=TRUE
+ else
+ FAILED+=("$PREFIX")
+ CIRCLE_VALUE_PASSED=FALSE
+ fi
+
+ echo "${PREFIX}, ${TF2CIRCLE_PASSED}, ${CIRCLE_VALUE_PASSED}" >> ${RESULT_CSV}
+done
+popd
+
+rm -f Result_latest
+ln -s ${RESULT_CSV} Result_latest
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/tf2circle/CMakeLists.txt b/compiler/tf2circle/CMakeLists.txt
new file mode 100644
index 000000000..549f731a4
--- /dev/null
+++ b/compiler/tf2circle/CMakeLists.txt
@@ -0,0 +1,47 @@
+# TODO Allow users to force tf2circle build
+if(NOT TARGET moco_tf_frontend)
+ return()
+endif(NOT TARGET moco_tf_frontend)
+
+if(NOT TARGET tfinfo)
+ return()
+endif(NOT TARGET tfinfo)
+
+if(NOT TARGET exo)
+ return()
+endif(NOT TARGET exo)
+
+nnas_find_package(Protobuf QUIET)
+
+if(NOT Protobuf_FOUND)
+ return()
+endif(NOT Protobuf_FOUND)
+
+# generating and building schema for customop.conf
+Protobuf_Generate(CIRCLE_CUSTOMOP_INFO_PROTO
+ "${CMAKE_CURRENT_BINARY_DIR}/generated"
+ "./proto"
+ CustomOpInfo.proto)
+
+add_library(tf2circle_customop_info_proto STATIC ${CIRCLE_CUSTOMOP_INFO_PROTO_SOURCES})
+set_target_properties(tf2circle_customop_info_proto PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(tf2circle_customop_info_proto PUBLIC ${CIRCLE_CUSTOMOP_INFO_PROTO_INCLUDE_DIRS})
+target_link_libraries(tf2circle_customop_info_proto PUBLIC libprotobuf)
+install(TARGETS tf2circle_customop_info_proto DESTINATION lib)
+
+message(STATUS "Build tf2circle: TRUE")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(tf2circle ${SOURCES})
+target_link_libraries(tf2circle PRIVATE moco_log)
+target_link_libraries(tf2circle PRIVATE moco_tf_frontend)
+target_link_libraries(tf2circle PRIVATE tfinfo)
+target_link_libraries(tf2circle PRIVATE exo)
+target_link_libraries(tf2circle PRIVATE locop)
+target_link_libraries(tf2circle PRIVATE hermes_std)
+target_link_libraries(tf2circle PRIVATE stdex)
+target_link_libraries(tf2circle PRIVATE angkor cwrap)
+target_link_libraries(tf2circle PRIVATE tf2circle_customop_info_proto)
+
+install(TARGETS tf2circle DESTINATION bin)
diff --git a/compiler/tf2circle/README.md b/compiler/tf2circle/README.md
new file mode 100644
index 000000000..421431294
--- /dev/null
+++ b/compiler/tf2circle/README.md
@@ -0,0 +1,3 @@
+# tf2circle
+
+_tf2circle_ is a TensorFlow-to-Circle model converter.
diff --git a/compiler/tf2circle/proto/CustomOpInfo.proto b/compiler/tf2circle/proto/CustomOpInfo.proto
new file mode 100644
index 000000000..753370aea
--- /dev/null
+++ b/compiler/tf2circle/proto/CustomOpInfo.proto
@@ -0,0 +1,57 @@
+syntax = "proto3";
+
+package tf2circle;
+option cc_enable_arenas = true;
+
+/* example of prototxt file
+ custom_op {
+ name: "my/customOp/000"
+ op: "new_custom_op"
+ attr {
+ key: "output_shape"
+ value {
+ shape {
+ dim { size: 1 }
+ dim { size: 2 }
+ dim { size: 1 }
+ dim { size: 2 }
+ }
+ }
+ }
+ }
+*/
+
+enum DataType {
+ // Not a legal value for DataType. Used to indicate a DataType field
+ // has not been set.
+ DT_INVALID = 0;
+
+ DT_FLOAT = 1;
+ DT_INT32 = 15; // Set to 15, considering possibility for reordering. 10 for INT, 10+N for INT 2^N
+ // TODO Support more types
+}
+
+message ShapeProto {
+ message Dim {
+ int64 size = 1; // tensorflow uses int64
+ };
+
+ repeated Dim dim = 2;
+}
+
+message AttrValue {
+ oneof value {
+ ShapeProto shape = 1;
+ DataType type = 2;
+ }
+}
+
+message CustomOpDef {
+ string name = 1;
+ string op = 2;
+ map<string, AttrValue> attr = 3;
+}
+
+message CustomOpInfoDef {
+ repeated CustomOpDef custom_op = 1;
+}
diff --git a/compiler/tf2circle/requires.cmake b/compiler/tf2circle/requires.cmake
new file mode 100644
index 000000000..68d45bf3a
--- /dev/null
+++ b/compiler/tf2circle/requires.cmake
@@ -0,0 +1,8 @@
+require("stdex")
+require("hermes-std")
+require("moco-tf")
+require("exo")
+require("locop")
+require("loco")
+require("cwrap")
+require("angkor")
diff --git a/compiler/tf2circle/src/CustomopConfLoader.cpp b/compiler/tf2circle/src/CustomopConfLoader.cpp
new file mode 100644
index 000000000..412405893
--- /dev/null
+++ b/compiler/tf2circle/src/CustomopConfLoader.cpp
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CustomopConfLoader.h"
+
+#include <loco.h>
+#include <cwrap/Fildes.h>
+#include <angkor/TensorShape.h>
+
+#include <CustomOpInfo.pb.h>
+
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+#include <fcntl.h>
+
+namespace
+{
+bool load_text(const cwrap::Fildes &fildes, tf2circle::CustomOpInfoDef &def)
+{
+ google::protobuf::io::FileInputStream fis(fildes.get());
+
+ return google::protobuf::TextFormat::Parse(&fis, &def);
+}
+
+angkor::TensorShape convert_shape(const tf2circle::ShapeProto &shape)
+{
+ angkor::TensorShape to_shape;
+
+ int64_t rank64 = shape.dim_size();
+ assert(rank64 < std::numeric_limits<uint32_t>::max());
+
+ int32_t rank = static_cast<int32_t>(rank64);
+ to_shape.resize(rank);
+
+ for (int32_t d = 0; d < rank; d++)
+ {
+ int64_t dim_value = shape.dim(d).size();
+ assert(dim_value >= 0ULL);
+ assert(dim_value < std::numeric_limits<uint32_t>::max());
+
+ uint32_t dim_value32 = static_cast<uint32_t>(dim_value);
+ to_shape.dim(d) = dim_value32;
+ }
+
+ return to_shape;
+}
+
+loco::DataType convert_dtype(const tf2circle::DataType &dtype)
+{
+ if (dtype == tf2circle::DT_FLOAT)
+ return loco::DataType::FLOAT32;
+ else if (dtype == tf2circle::DT_INT32)
+ return loco::DataType::S32;
+ else
+ throw std::runtime_error("Not yet supported datatype. Cannot convert.");
+}
+
+// Note : the following functions look similar with plier::tf::Convert.h.
+// However, the schema is different.(not "tensorflow::..." but "tf2circle::...")
+// So, plier::tf cannot be used.
+loco::DataType get_dtype_attr(const tf2circle::CustomOpDef &custom_op)
+{
+ std::string type_attr_name("dtype");
+
+ assert(custom_op.attr().count(type_attr_name) > 0);
+ const auto &attr = custom_op.attr().at(type_attr_name);
+ assert(attr.value_case() == tf2circle::AttrValue::kType);
+ auto dtype_def = attr.type();
+
+ return convert_dtype(dtype_def);
+}
+
+angkor::TensorShape get_shape_attr(const tf2circle::CustomOpDef &custom_op)
+{
+ std::string shape_attr_name("output_shape");
+
+ assert(custom_op.attr().count(shape_attr_name) > 0);
+ const auto &attr = custom_op.attr().at(shape_attr_name);
+ assert(attr.value_case() == tf2circle::AttrValue::kShape);
+ auto shape_def = attr.shape();
+
+ return convert_shape(shape_def);
+}
+
+void add_customop(tf2circle::CustomOpInfoDef &def, moco::ModelSignature &sig)
+{
+ for (const auto &custom_op : def.custom_op())
+ {
+ sig.add_customop(custom_op.op());
+
+ auto name = custom_op.name();
+
+ // setting dtype and shape to ModelSignature
+ sig.dtype(name, get_dtype_attr(custom_op));
+ sig.shape(name, get_shape_attr(custom_op));
+ }
+}
+
+} // namespace
+
+namespace tf2circle
+{
+
+void load_customop_conf(const std::string &path, moco::ModelSignature &sig)
+{
+ CustomOpInfoDef def;
+
+ // TODO Consider Windows
+ cwrap::Fildes fildes{open(path.c_str(), O_RDONLY)};
+
+ if (fildes.get() < 0)
+ {
+ throw std::runtime_error{"Error: " + path + " not found"};
+ }
+
+ if (!load_text(fildes, def))
+ {
+ throw std::runtime_error{"Error: Failed to parse prototxt " + path};
+ }
+
+ add_customop(def, sig);
+}
+
+} // namespace tf2circle
diff --git a/compiler/tf2circle/src/CustomopConfLoader.h b/compiler/tf2circle/src/CustomopConfLoader.h
new file mode 100644
index 000000000..500b6acdf
--- /dev/null
+++ b/compiler/tf2circle/src/CustomopConfLoader.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CUSTOMOP_CONF_LOADER_H__
+#define __CUSTOMOP_CONF_LOADER_H__
+
+#include <moco/tf/Frontend.h>
+
+#include <string>
+
+namespace tf2circle
+{
+
+/// @brief Loads customop.conf into ModelSignature
+void load_customop_conf(const std::string &path, moco::ModelSignature &sig);
+
+} // namespace tf2circle
+
+#endif // __CUSTOMOP_CONF_LOADER_H__
diff --git a/compiler/tf2circle/src/tf2circle.cpp b/compiler/tf2circle/src/tf2circle.cpp
new file mode 100644
index 000000000..a1160e968
--- /dev/null
+++ b/compiler/tf2circle/src/tf2circle.cpp
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CustomopConfLoader.h"
+
+#include <moco/LoggingContext.h>
+#include <moco/tf/Frontend.h>
+#include <exo/LoggingContext.h>
+#include <exo/CircleExporter.h>
+
+#include <nnkit/support/tftestinfo/TensorInfoParser.h>
+
+#include <locop/FormattedGraph.h>
+
+#include <hermes/ConsoleReporter.h>
+#include <hermes/EnvConfig.h>
+
+#include <stdex/Memory.h>
+
+#include <cassert>
+
+#include <iostream>
+#include <stdexcept>
+#include <string>
+
+namespace
+{
+
+std::unique_ptr<loco::Graph> import(const moco::ModelSignature &sig, const std::string &path)
+{
+ moco::tf::Frontend frontend;
+ return frontend.load(sig, path.c_str(), moco::tf::Frontend::FileType::Binary);
+}
+
+} // namespace
+
+//
+// Logging Support
+//
+namespace
+{
+
+struct Logger final : public hermes::Source
+{
+ Logger(hermes::Context *ctx) { activate(ctx->sources(), ctx->bus()); }
+ ~Logger() { deactivate(); }
+};
+
+struct LoggingContext
+{
+ static hermes::Context *get(void)
+ {
+ using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
+
+ static hermes::Context *ctx = nullptr;
+
+ if (ctx == nullptr)
+ {
+ ctx = new hermes::Context;
+ ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
+ ctx->config(stdex::make_unique<EnvConfig>("TF2CIRCLE_Log"));
+ }
+
+ return ctx;
+ }
+};
+
+void print_help()
+{
+ std::cerr << "Usage: tf2circle <path/to/info> <path/to/pb> <path/to/circle/model> " << std::endl
+ << "Options: --customop <path/to/customop.conf>" << std::endl;
+}
+
+} // namespace
+
+#define LOGGER(name) \
+ ::Logger name { ::LoggingContext::get() }
+
+#define INFO(name) HERMES_INFO(name)
+
+namespace
+{
+
+void internal_error(void)
+{
+ std::cerr << "tf2circle: internal compiler error" << std::endl;
+
+ // TODO Explain how to report a bug
+}
+
+} // namespace
+
+namespace
+{
+
+class EntryFunctor
+{
+public:
+ EntryFunctor();
+
+public:
+ ~EntryFunctor();
+
+public:
+ int operator()(int argc, char **argv) const;
+};
+
+EntryFunctor::EntryFunctor()
+{
+ // NOTE Implement initialization here
+}
+
+EntryFunctor::~EntryFunctor()
+{
+ // NOTE Implement finialization here
+}
+
+int EntryFunctor::operator()(int argc, char **argv) const
+{
+ using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
+
+ // This line allows users to control all the moco-tf loggers via TF2CIRCLE_Log_Frontend
+ moco::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2CIRCLE_Log_Frontend"));
+ // This line allows users to control all the exo-circle loggers via TF2CIRCLE_Log_Backend
+ exo::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2CIRCLE_Log_Backend"));
+
+ LOGGER(l);
+
+ // TODO We need better args parsing in future
+ if (!(argc == 4 or argc == 6))
+ {
+ print_help();
+ return 255;
+ }
+
+ std::string info_path{argv[1]};
+ std::string tf_path{argv[2]}; // .pb file
+ std::string circle_path{argv[3]};
+
+ std::cout << "Read '" << info_path << "'" << std::endl;
+
+ moco::ModelSignature sig;
+ {
+ for (const auto &info : nnkit::support::tftestinfo::parse(info_path.c_str()))
+ {
+ switch (info->kind())
+ {
+ case nnkit::support::tftestinfo::ParsedTensor::Kind::Input:
+ sig.add_input(moco::TensorName{info->name()});
+ sig.shape(info->name(), info->shape());
+ break;
+
+ case nnkit::support::tftestinfo::ParsedTensor::Kind::Output:
+ sig.add_output(moco::TensorName{info->name()});
+ sig.shape(info->name(), info->shape());
+ break;
+
+ default:
+ throw std::runtime_error{"Unknown kind"};
+ }
+ }
+ }
+
+ if (argc == 6) // optional parameter: path of customop.conf
+ {
+ if (std::string{argv[4]} == "--customop")
+ {
+ tf2circle::load_customop_conf(argv[5], sig);
+ }
+ else
+ {
+ print_help();
+ return 255;
+ }
+ }
+
+ std::cout << "Read '" << info_path << "' - Done" << std::endl;
+
+ std::cout << "Import from '" << tf_path << "'" << std::endl;
+ auto g = import(sig, tf_path);
+ std::cout << "Import from '" << tf_path << "' - Done" << std::endl;
+
+ INFO(l) << "Import Graph" << std::endl;
+ INFO(l) << locop::fmt<locop::Formatter::LinearV1>(g) << std::endl;
+
+ std::cout << "Export into '" << circle_path << "'" << std::endl;
+ exo::CircleExporter(g.get()).dumpToFile(circle_path.c_str());
+ std::cout << "Export into '" << circle_path << "' - Done" << std::endl;
+
+ return 0;
+}
+
+} // namespace
+
+int main(int argc, char **argv)
+{
+ // TODO Add "signal" handler here
+
+ try
+ {
+ EntryFunctor entry;
+ return entry(argc, argv);
+ }
+ catch (...)
+ {
+ // Catch all the exception and print the default error message.
+ internal_error();
+ }
+
+ // EX_SOFTWARE defined in "sysexits.h"
+ return 70;
+}
diff --git a/compiler/tf2nnpkg/CMakeLists.txt b/compiler/tf2nnpkg/CMakeLists.txt
new file mode 100644
index 000000000..8e1edf858
--- /dev/null
+++ b/compiler/tf2nnpkg/CMakeLists.txt
@@ -0,0 +1,35 @@
+if(NOT TARGET moco_tf_frontend)
+ return()
+endif(NOT TARGET moco_tf_frontend)
+
+if(NOT TARGET tfinfo)
+ return()
+endif(NOT TARGET tfinfo)
+
+if(NOT TARGET exo)
+ return()
+endif(NOT TARGET exo)
+
+message(STATUS "Build tf2nnpkg: TRUE")
+
+unset(SOURCES)
+list(APPEND SOURCES "src/tf2nnpkg.cpp")
+list(APPEND SOURCES "src/filesystem_common.cpp")
+
+option(TF2NNPKG_FOR_WINDOWS "Use Windows filesystem" OFF)
+if(TF2NNPKG_FOR_WINDOWS)
+ list(APPEND SOURCES "src/filesystem_windows.cpp")
+else()
+ list(APPEND SOURCES "src/filesystem_linux.cpp") # default
+endif()
+
+add_executable(tf2nnpkg ${SOURCES})
+target_link_libraries(tf2nnpkg PRIVATE moco_log)
+target_link_libraries(tf2nnpkg PRIVATE moco_tf_frontend)
+target_link_libraries(tf2nnpkg PRIVATE tfinfo)
+target_link_libraries(tf2nnpkg PRIVATE exo)
+target_link_libraries(tf2nnpkg PRIVATE locop)
+target_link_libraries(tf2nnpkg PRIVATE hermes_std)
+target_link_libraries(tf2nnpkg PRIVATE stdex)
+target_link_libraries(tf2nnpkg PRIVATE angkor cwrap)
+install(TARGETS tf2nnpkg DESTINATION bin)
diff --git a/compiler/tf2nnpkg/requires.cmake b/compiler/tf2nnpkg/requires.cmake
new file mode 100644
index 000000000..68d45bf3a
--- /dev/null
+++ b/compiler/tf2nnpkg/requires.cmake
@@ -0,0 +1,8 @@
+require("stdex")
+require("hermes-std")
+require("moco-tf")
+require("exo")
+require("locop")
+require("loco")
+require("cwrap")
+require("angkor")
diff --git a/compiler/tf2nnpkg/src/filesystem.h b/compiler/tf2nnpkg/src/filesystem.h
new file mode 100644
index 000000000..0da608a6f
--- /dev/null
+++ b/compiler/tf2nnpkg/src/filesystem.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TF2NNPKG_FILESYSTEM_H__
+#define __TF2NNPKG_FILESYSTEM_H__
+
+/// @file OS-dependent filesystem functionalities
+
+#include <string>
+
+namespace filesystem
+{
+
+const std::string separator();
+
+/// @brief Normalize compatible separator in path to default separator
+std::string normalize_path(const std::string &path);
+
+bool is_dir(const std::string &path);
+
+bool mkdir(const std::string &path);
+
+// TODO use variadic argument
+std::string join(const std::string &path1, const std::string &path2);
+
+std::string basename(const std::string &path);
+
+} // namespace filesystem
+
+#endif // __TF2NNPKG_FILESYSTEM_H__
diff --git a/compiler/tf2nnpkg/src/filesystem_common.cpp b/compiler/tf2nnpkg/src/filesystem_common.cpp
new file mode 100644
index 000000000..e14528afb
--- /dev/null
+++ b/compiler/tf2nnpkg/src/filesystem_common.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "filesystem.h"
+
+namespace filesystem
+{
+
+std::string join(const std::string &path1, const std::string &path2)
+{
+ // TODO check path1 does not end with separator
+ // TODO check path2 does not start with separator
+ return path1 + separator() + path2;
+}
+
+std::string basename(const std::string &path)
+{
+ auto last_index = path.find_last_of(separator());
+
+ // No separator
+ if (last_index == std::string::npos)
+ return path;
+
+ // Trailing separator
+ if (last_index + separator().size() == path.size())
+ return basename(path.substr(0, last_index));
+
+ return path.substr(last_index + separator().size());
+}
+
+} // namespace filesystem
diff --git a/compiler/tf2nnpkg/src/filesystem_linux.cpp b/compiler/tf2nnpkg/src/filesystem_linux.cpp
new file mode 100644
index 000000000..a2fa96732
--- /dev/null
+++ b/compiler/tf2nnpkg/src/filesystem_linux.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "filesystem.h"
+
+#include <sys/stat.h>
+#include <dirent.h>
+
+namespace filesystem
+{
+
+const std::string separator() { return "/"; }
+
+std::string normalize_path(const std::string &path)
+{
+ // DO NOTHING
+ return path;
+}
+
+bool is_dir(const std::string &path)
+{
+ DIR *dir = opendir(path.c_str());
+ if (dir)
+ {
+ closedir(dir);
+ return true;
+ }
+ return false;
+}
+
+bool mkdir(const std::string &path) { return ::mkdir(path.c_str(), 0775) == 0; }
+
+} // namespace filesystem
diff --git a/compiler/tf2nnpkg/src/filesystem_windows.cpp b/compiler/tf2nnpkg/src/filesystem_windows.cpp
new file mode 100644
index 000000000..e7fba3352
--- /dev/null
+++ b/compiler/tf2nnpkg/src/filesystem_windows.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "filesystem.h"
+
+#include <direct.h>
+#include <windows.h>
+
+namespace filesystem
+{
+
+const std::string separator() { return "\\"; }
+
+std::string normalize_path(const std::string &path)
+{
+ std::string ret = path;
+
+ std::string candidate = "/";
+ size_t start_pos = 0;
+ while ((start_pos = ret.find(candidate, start_pos)) != std::string::npos)
+ {
+ ret.replace(start_pos, candidate.length(), separator());
+ start_pos += separator().length();
+ }
+ return ret;
+}
+
+bool is_dir(const std::string &path)
+{
+ DWORD ftyp = GetFileAttributesA(path.c_str());
+ if (ftyp == INVALID_FILE_ATTRIBUTES)
+ return false; // something is wrong with path
+
+ if (ftyp & FILE_ATTRIBUTE_DIRECTORY)
+ return true; // this is a directory
+
+ return false; // this is not a directory
+}
+
+bool mkdir(const std::string &path) { return _mkdir(path.c_str()) == 0; }
+
+} // namespace filesystem
diff --git a/compiler/tf2nnpkg/src/tf2nnpkg.cpp b/compiler/tf2nnpkg/src/tf2nnpkg.cpp
new file mode 100644
index 000000000..d9a0d9d2f
--- /dev/null
+++ b/compiler/tf2nnpkg/src/tf2nnpkg.cpp
@@ -0,0 +1,300 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "filesystem.h"
+
+#include <moco/LoggingContext.h>
+#include <moco/tf/Frontend.h>
+#include <exo/LoggingContext.h>
+#include <exo/CircleExporter.h>
+
+#include <nnkit/support/tftestinfo/TensorInfoParser.h>
+
+#include <locop/FormattedGraph.h>
+
+#include <hermes/ConsoleReporter.h>
+#include <hermes/EnvConfig.h>
+
+#include <stdex/Memory.h>
+
+#include <iostream>
+#include <fstream>
+#include <functional>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+namespace
+{
+
+std::unique_ptr<loco::Graph> import(const moco::ModelSignature &sig, const std::string &path)
+{
+ moco::tf::Frontend frontend;
+ return frontend.load(sig, path.c_str(), moco::tf::Frontend::FileType::Binary);
+}
+
+} // namespace
+
+//
+// Logging Support
+//
+namespace
+{
+
+struct Logger final : public hermes::Source
+{
+ Logger(hermes::Context *ctx) { activate(ctx->sources(), ctx->bus()); }
+ ~Logger() { deactivate(); }
+};
+
+struct LoggingContext
+{
+ static hermes::Context *get(void)
+ {
+ using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
+
+ static hermes::Context *ctx = nullptr;
+
+ if (ctx == nullptr)
+ {
+ ctx = new hermes::Context;
+ ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
+ ctx->config(stdex::make_unique<EnvConfig>("TF2NNPKG_Log"));
+ }
+
+ return ctx;
+ }
+};
+
+void print_help()
+{
+ std::cerr << "Usage:" << std::endl;
+ std::cerr << " tf2nnpkg --info <path/to/info>" << std::endl;
+ std::cerr << " --graphdef <path/to/pb>" << std::endl;
+ std::cerr << " -o <path/to/package/dir>" << std::endl;
+}
+
+} // namespace
+
+#define LOGGER(name) \
+ ::Logger name { ::LoggingContext::get() }
+
+#define INFO(name) HERMES_INFO(name)
+
+namespace
+{
+
+void internal_error(void)
+{
+ std::cerr << "tf2nnpkg: internal compiler error" << std::endl;
+
+ // TODO Explain how to report a bug
+}
+
+} // namespace
+
+namespace
+{
+
+std::string extract_modelname(std::string tf_path)
+{
+ auto filename = filesystem::basename(tf_path);
+ // TODO Find better way
+ const std::string key = ".pb";
+ auto suffix_index = filename.find(key);
+ assert(suffix_index != std::string::npos);
+ assert(suffix_index + key.size() == filename.size());
+
+ return filename.substr(0, suffix_index);
+}
+
+class EntryFunctor
+{
+public:
+ EntryFunctor();
+
+public:
+ ~EntryFunctor();
+
+public:
+ int operator()(int argc, char **argv) const;
+};
+
+EntryFunctor::EntryFunctor()
+{
+ // NOTE Implement initialization here
+}
+
+EntryFunctor::~EntryFunctor()
+{
+ // NOTE Implement finialization here
+}
+
+int EntryFunctor::operator()(int argc, char **argv) const
+{
+ using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
+
+ // This line allows users to control all the moco-tf loggers via TF2NNPKG_Log_Frontend
+ moco::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2NNPKG_Log_Frontend"));
+ // This line allows users to control all the exo-circle loggers via TF2NNPKG_Log_Backend
+ exo::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2NNPKG_Log_Backend"));
+
+ LOGGER(l);
+
+ // Simple argument parser (based on map)
+ std::map<std::string, std::function<void(const std::string &arg)>> argparse;
+
+ std::string arg_info;
+ std::string arg_graphdef;
+ std::string arg_output;
+
+ argparse["--info"] = [&](const std::string &arg) { arg_info = arg; };
+ argparse["--graphdef"] = [&](const std::string &arg) { arg_graphdef = arg; };
+ argparse["-o"] = [&](const std::string &arg) { arg_output = arg; };
+
+ // TODO We need better args parsing in future
+
+ for (int n = 1; n < argc; n += 2)
+ {
+ const std::string tag{argv[n]};
+ const std::string arg{argv[n + 1]};
+
+ auto it = argparse.find(tag);
+ if (it == argparse.end())
+ {
+ std::cerr << "Option '" << tag << "' is not supported" << std::endl;
+ print_help();
+ return 255;
+ }
+
+ it->second(arg);
+ }
+ if (arg_info.empty() || arg_graphdef.empty() || arg_output.empty())
+ {
+ print_help();
+ return 255;
+ }
+
+ // Input paths
+ std::string info_path = arg_info;
+ std::string tf_path = arg_graphdef; // .pb file
+
+ // Output paths
+ std::string outdir_path = arg_output;
+ std::string modelname = extract_modelname(filesystem::normalize_path(tf_path));
+ std::string nnpkg_path = filesystem::join(outdir_path, modelname);
+ std::string model_filename = modelname + ".circle";
+ std::string metadata_path = filesystem::join(nnpkg_path, "metadata");
+ std::string circle_path = filesystem::join(nnpkg_path, model_filename);
+ std::string manifest_path = filesystem::join(metadata_path, "MANIFEST");
+
+ std::cout << "Read '" << info_path << "'" << std::endl;
+
+ moco::ModelSignature sig;
+ {
+ for (const auto &info : nnkit::support::tftestinfo::parse(info_path.c_str()))
+ {
+ switch (info->kind())
+ {
+ case nnkit::support::tftestinfo::ParsedTensor::Kind::Input:
+ sig.add_input(moco::TensorName{info->name()});
+ sig.shape(info->name(), info->shape());
+ break;
+
+ case nnkit::support::tftestinfo::ParsedTensor::Kind::Output:
+ sig.add_output(moco::TensorName{info->name()});
+ sig.shape(info->name(), info->shape());
+ break;
+
+ default:
+ throw std::runtime_error{"Unknown kind"};
+ }
+ }
+ }
+
+ std::cout << "Read '" << info_path << "' - Done" << std::endl;
+
+ std::cout << "Import from '" << tf_path << "'" << std::endl;
+ auto g = import(sig, tf_path);
+ std::cout << "Import from '" << tf_path << "' - Done" << std::endl;
+
+ INFO(l) << "Import Graph" << std::endl;
+ INFO(l) << locop::fmt<locop::Formatter::LinearV1>(g) << std::endl;
+
+ if (not filesystem::is_dir(outdir_path))
+ {
+ std::cout << "Make output directory '" << outdir_path << "'" << std::endl;
+ if (not filesystem::mkdir(outdir_path))
+ throw std::runtime_error("Fail to make directory " + outdir_path);
+ std::cout << "Make output directory '" << outdir_path << "' - Done" << std::endl;
+ }
+
+ if (not filesystem::is_dir(nnpkg_path))
+ {
+ std::cout << "Make package directory '" << nnpkg_path << "'" << std::endl;
+ if (not filesystem::mkdir(nnpkg_path))
+ throw std::runtime_error("Fail to make directory " + nnpkg_path);
+ std::cout << "Make package directory '" << nnpkg_path << "' - Done" << std::endl;
+ }
+
+ std::cout << "Export into '" << circle_path << "'" << std::endl;
+ exo::CircleExporter(g.get()).dumpToFile(circle_path.c_str());
+ std::cout << "Export into '" << circle_path << "' - Done" << std::endl;
+
+ if (not filesystem::is_dir(metadata_path))
+ {
+ std::cout << "Make metadata directory '" << metadata_path << "'" << std::endl;
+ if (not filesystem::mkdir(metadata_path))
+ throw std::runtime_error("Fail to make directory " + metadata_path);
+ std::cout << "Make metadata directory '" << metadata_path << "' - Done" << std::endl;
+ }
+
+ std::cout << "Make manifest file '" << manifest_path << "'" << std::endl;
+ std::ofstream manifest_file;
+ manifest_file.open(manifest_path, std::ios::out | std::ios::binary);
+ manifest_file << "{\n";
+ manifest_file << " \"major-version\" : \"1\",\n";
+ manifest_file << " \"minor-version\" : \"0\",\n";
+ manifest_file << " \"patch-version\" : \"0\",\n";
+ manifest_file << " \"models\" : [ \"" + model_filename + "\" ],\n";
+ manifest_file << " \"model-types\" : [ \"circle\" ]\n";
+ manifest_file << "}";
+ manifest_file.close();
+ std::cout << "Make manifest file '" << manifest_path << "' - Done" << std::endl;
+
+ return 0;
+}
+
+} // namespace
+
+int main(int argc, char **argv)
+{
+ // TODO Add "signal" handler here
+
+ try
+ {
+ EntryFunctor entry;
+ return entry(argc, argv);
+ }
+ catch (...)
+ {
+ // Catch all the exception and print the default error message.
+ internal_error();
+ }
+
+ // EX_SOFTWARE defined in "sysexits.h"
+ return 70;
+}
diff --git a/compiler/tf2tflite-dredd-pb-test/.gitignore b/compiler/tf2tflite-dredd-pb-test/.gitignore
new file mode 100644
index 000000000..23c7c1bb3
--- /dev/null
+++ b/compiler/tf2tflite-dredd-pb-test/.gitignore
@@ -0,0 +1 @@
+/contrib.lst
diff --git a/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt b/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt
new file mode 100644
index 000000000..b75c50772
--- /dev/null
+++ b/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt
@@ -0,0 +1,141 @@
+nnas_include(TargetRequire)
+
+unset(REQUIRED_TARGETS)
+list(APPEND REQUIRED_TARGETS tfl-inspect)
+list(APPEND REQUIRED_TARGETS tfl-verify)
+list(APPEND REQUIRED_TARGETS tf2tflite)
+list(APPEND REQUIRED_TARGETS dredd_rule_lib)
+TargetRequire_Return(${REQUIRED_TARGETS})
+
+set(PB_MODEL_REPO "${CMAKE_CURRENT_SOURCE_DIR}/contrib") # Where to find models to test
+
+unset(KEYS)
+unset(DEPS)
+
+function(check_file_exist)
+
+ foreach(FILE_PATH IN LISTS ARGV)
+ if(NOT EXISTS "${FILE_PATH}")
+ message(FATAL_ERROR "${FILE_PATH} does not exist." )
+ endif()
+ endforeach()
+
+endfunction()
+
+#
+# processing models in contrib.lst
+#
+# Example)
+#
+# Add(Inception_v3 RULE tflite_1.0_rel_requirement.rule)
+# -> Read compiler/tf2tflite-dredd-pb-test/contrib/Inception_v3/model.pb and generate
+# "Inception_v3.tflite". Then rule file is tested for the generated tflite file.
+#
+macro(Add MODEL_DIR)
+
+ set(ARG_OPTION)
+ set(ARG_ONE_VALUE RULE) # rule file name
+ set(ARG_MULTI_VALUE)
+ cmake_parse_arguments(ARG "${ARG_OPTION}" "${ARG_ONE_VALUE}" "${ARG_MULTI_VALUE}" ${ARGN})
+
+ if(NOT ARG_RULE )
+ message( FATAL_ERROR "RULE is mandadatory arg" )
+ endif()
+
+ set(RULE_FILENAME ${ARG_RULE})
+
+ set(MODEL_SOURCE_DIR "${PB_MODEL_REPO}/${MODEL_DIR}")
+
+ set(PB_PATH "${MODEL_SOURCE_DIR}/model.pb")
+ set(INFO_PATH "${MODEL_SOURCE_DIR}/model.info")
+ set(RULE_PATH "${MODEL_SOURCE_DIR}/${RULE_FILENAME}")
+
+ check_file_exist(${PB_PATH} ${INFO_PATH} ${RULE_PATH})
+
+ # Generate .test file which declares path of target pb, info, rule files
+ set(TARGET_TESTNAME "${MODEL_DIR}")
+ set(TEST_CONFIG_FILE "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_TESTNAME}.test")
+
+ add_custom_command(
+ OUTPUT ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E echo 'MODEL_PB_PATH="${PB_PATH}"' >> ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E echo 'MODEL_INFO_PATH="${INFO_PATH}"' >> ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E echo 'MODEL_RULE_PATH="${RULE_PATH}"' >> ${TEST_CONFIG_FILE}
+ DEPENDS
+ ${PB_PATH}
+ COMMENT "Generate ${TARGET_TESTNAME} configuration for BIN"
+ )
+
+ list(APPEND KEYS "${TARGET_TESTNAME}")
+ list(APPEND DEPS "${TEST_CONFIG_FILE}")
+
+endmacro(Add)
+
+include(contrib.lst OPTIONAL)
+
+#
+# Generate toolchain.config
+#
+set(TOOLCHAIN_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/toolchain.config")
+
+add_custom_command(
+ OUTPUT ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TFL_INSPECT_PATH=\"$<TARGET_FILE:tfl-inspect>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TFL_VERIFY_PATH=\"$<TARGET_FILE:tfl-verify>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TF2TFLITE_PATH=\"$<TARGET_FILE:tf2tflite>\"' >> ${TOOLCHAIN_CONFIG}
+ # add more if new excutable file is needed in runner.sh and rule-lib.sh
+ DEPENDS
+ tfl-inspect
+ tfl-verify
+ tf2tflite
+ COMMENT "Generate toolchin configuration"
+)
+
+list(APPEND DEPS "${TOOLCHAIN_CONFIG}")
+
+#
+# Generate quality test runner
+#
+set(SOURCE_RUNNER "${CMAKE_CURRENT_SOURCE_DIR}/runner.sh")
+set(TARGET_RUNNER "${CMAKE_CURRENT_BINARY_DIR}/runner.sh")
+
+add_custom_command(
+ OUTPUT ${TARGET_RUNNER}
+ COMMAND ${CMAKE_COMMAND} -E copy "${SOURCE_RUNNER}" "${TARGET_RUNNER}"
+ DEPENDS ${SOURCE_RUNNER}
+ COMMENT "Generate test runner"
+)
+
+list(APPEND DEPS "${TARGET_RUNNER}")
+
+#
+# copy rule-lib.sh (a library of shell script functions)
+#
+
+# getting path for rule-lib.sh in dredd-rule-lib
+get_target_property(DREDD_RULE_LIB_DIR dredd_rule_lib BINARY_DIR)
+
+set(SOURCE_RULE_LIB "${DREDD_RULE_LIB_DIR}/rule-lib.sh")
+set(TARGET_RULE_LIB "${CMAKE_CURRENT_BINARY_DIR}/rule-lib.sh")
+
+add_custom_command(
+ OUTPUT ${TARGET_RULE_LIB}
+ COMMAND ${CMAKE_COMMAND} -E copy "${SOURCE_RULE_LIB}" "${TARGET_RULE_LIB}"
+ DEPENDS ${SOURCE_RULE_LIB}
+ COMMENT "Generate rule lib"
+)
+
+list(APPEND DEPS "${TARGET_RULE_LIB}")
+
+# Generate dependencies
+add_custom_target(tf2tflite_dredd_pb_deps ALL DEPENDS ${DEPS})
+
+add_test(
+ NAME tf2tflite_dredd_pb_test
+ COMMAND
+ "${TARGET_RUNNER}"
+ "${TOOLCHAIN_CONFIG}"
+ ${KEYS}
+)
diff --git a/compiler/tf2tflite-dredd-pb-test/README.md b/compiler/tf2tflite-dredd-pb-test/README.md
new file mode 100644
index 000000000..87bd18dd7
--- /dev/null
+++ b/compiler/tf2tflite-dredd-pb-test/README.md
@@ -0,0 +1,6 @@
+# tf2tflite-dredd-pb-test
+
+*tf2tflite-dredd-pb-test* validates non-functional aspects of `.tflite` files, which are compiled
+from `.pb` files.
+
+For more information, please refer to `README.md` in *dredd-rule-lib*.
diff --git a/compiler/tf2tflite-dredd-pb-test/contrib/.gitignore b/compiler/tf2tflite-dredd-pb-test/contrib/.gitignore
new file mode 100644
index 000000000..968c34510
--- /dev/null
+++ b/compiler/tf2tflite-dredd-pb-test/contrib/.gitignore
@@ -0,0 +1,3 @@
+/*
+# Exclude all except below
+!.gitignore
diff --git a/compiler/tf2tflite-dredd-pb-test/requires.cmake b/compiler/tf2tflite-dredd-pb-test/requires.cmake
new file mode 100644
index 000000000..da019550f
--- /dev/null
+++ b/compiler/tf2tflite-dredd-pb-test/requires.cmake
@@ -0,0 +1,4 @@
+require("tf2tflite")
+require("tfl-inspect")
+require("tfl-verify")
+require("dredd-rule-lib")
diff --git a/compiler/tf2tflite-dredd-pb-test/runner.sh b/compiler/tf2tflite-dredd-pb-test/runner.sh
new file mode 100755
index 000000000..4eef372b0
--- /dev/null
+++ b/compiler/tf2tflite-dredd-pb-test/runner.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+
+# This script checks tflite file generated by tf2tflite
+
+# exit if unknown var is used
+set -u
+
+WORKDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+
+# Need at least toolchain.config
+if [[ $# -lt 1 ]]; then
+ echo "USAGE: $0 ..."
+ echo
+ echo "ARGUMENTS:"
+ echo " [toolchain.config path]"
+ echo " [Prefix1]"
+ echo " [Prefix2]"
+ echo " ..."
+ exit 255
+fi
+
+CONFIG_PATH="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found tfl-inspect: ${TFL_INSPECT_PATH}"
+echo "-- Found tfl-verify: ${TFL_VERIFY_PATH}"
+echo "-- Found tf2tflite: ${TF2TFLITE_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+
+# running each rule file
+
+while [[ $# -ne 0 ]]; do
+ PREFIX="$1"; shift
+
+ echo "[ RUN ] ${PREFIX}"
+
+ TESTED+=("${PREFIX}")
+
+ PASSED_TAG="${PREFIX}.passed"
+
+ rm -f "${PASSED_TAG}"
+
+ cat > "${PREFIX}.log" <(
+ exec 2>&1
+
+ source "${PREFIX}.test"
+
+ echo "-- Use '${MODEL_PB_PATH}', '${MODEL_INFO_PATH}', and '${MODEL_RULE_PATH}'"
+
+ # Exit immediately if any command fails
+ set -e
+ # Show commands
+ set -x
+
+ # Generate tflite
+ "${TF2TFLITE_PATH}" \
+ "${MODEL_INFO_PATH}" \
+ "${MODEL_PB_PATH}" \
+ "${WORKDIR}/${PREFIX}.tflite"
+
+ #
+ # Run rule prepared to check tflite file
+ #
+
+ # set vars needed by rule file
+ TFLITE_PATH="${WORKDIR}/${PREFIX}.tflite"
+
+ # Note: turn off 'command printing'. Otherwise printing will be so messy
+ set +x
+
+ # set vars required by rule-lib.sh and rule file
+ COMPILED_FILE=${TFLITE_PATH}
+ INSPECT_PROG_PATH=${TFL_INSPECT_PATH}
+ VERIFY_PROG_PATH=${TFL_VERIFY_PATH}
+ ERROR_LOG="${PREFIX}.error"
+
+ rm -f "${ERROR_LOG}"
+
+ # in case error while running ${MODEL_RULE_PATH}, prints error msg
+ trap 'echo "** ERROR **" ; cat "${ERROR_LOG}"' ERR
+
+ source rule-lib.sh
+ source "${MODEL_RULE_PATH}"
+
+ # unset
+ trap - ERR
+ set -x
+
+ # At this point, the exit code of all commands is 0
+ # If not 0, execution of this script ends because of "set -e"
+ touch "${PASSED_TAG}"
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ echo "[ OK ] ${PREFIX}"
+ PASSED+=("$PREFIX")
+ else
+ echo "[ FAIL] ${PREFIX}"
+ FAILED+=("$PREFIX")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/tf2tflite-dredd-pbtxt-test/.gitignore b/compiler/tf2tflite-dredd-pbtxt-test/.gitignore
new file mode 100644
index 000000000..8dbfa9012
--- /dev/null
+++ b/compiler/tf2tflite-dredd-pbtxt-test/.gitignore
@@ -0,0 +1 @@
+/test.local.lst
diff --git a/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt b/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt
new file mode 100644
index 000000000..87cf7836f
--- /dev/null
+++ b/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt
@@ -0,0 +1,184 @@
+nnas_include(TargetRequire)
+
+unset(REQUIRED_TARGETS)
+list(APPEND REQUIRED_TARGETS tfl-verify)
+list(APPEND REQUIRED_TARGETS tfl-inspect)
+list(APPEND REQUIRED_TARGETS tf2tflite)
+list(APPEND REQUIRED_TARGETS tfkit)
+list(APPEND REQUIRED_TARGETS dredd_rule_lib)
+TargetRequire_Return(${REQUIRED_TARGETS})
+
+nncc_find_resource(TensorFlowTests)
+
+set(MODEL_REPO "${TensorFlowTests_DIR}") # Where to find text models to test
+
+unset(KEYS)
+unset(DEPS)
+
+#
+# processing models in test.lst and test.local.lst
+#
+# Example)
+#
+# Add(NET_0025 RULE test.rule)
+# -> Read test.pbtxt file under res/TensorFlowTests/NET_0025 and create "NET_0025.tflite"
+# Then the tflite is tested against rules in test.rule file.
+#
+macro(Add MODEL_DIR)
+
+ set(ARG_OPTION)
+ set(ARG_ONE_VALUE RULE) # rule file name
+ set(ARG_MULTI_VALUE)
+ cmake_parse_arguments(ARG "${ARG_OPTION}" "${ARG_ONE_VALUE}" "${ARG_MULTI_VALUE}" ${ARGN})
+
+ if(NOT ARG_RULE)
+ message( FATAL_ERROR "RULE is mandadatory arg" )
+ endif()
+
+ set(RULE_FILENAME ${ARG_RULE})
+
+ set(TARGET_TESTNAME "${MODEL_DIR}")
+
+ set(MODEL_SOURCE_DIR "${MODEL_REPO}/${MODEL_DIR}")
+
+ set(TXT_SOURCE_PBTXT_PATH "${MODEL_SOURCE_DIR}/test.pbtxt")
+ set(TXT_SOURCE_INFO_PATH "${MODEL_SOURCE_DIR}/test.info")
+ set(TXT_SOURCE_RULE_PATH "${MODEL_SOURCE_DIR}/${RULE_FILENAME}")
+
+ set(TXT_TARGET_PB_PATH "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_TESTNAME}.pb")
+ set(TXT_TARGET_PBTXT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_TESTNAME}.pbtxt")
+ set(TXT_TARGET_INFO_PATH "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_TESTNAME}.info")
+ set(TXT_TARGET_RULE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_TESTNAME}.rule")
+
+ if(NOT EXISTS "${TXT_SOURCE_PBTXT_PATH}")
+ message(FATAL_ERROR "${TXT_SOURCE_PBTXT_PATH} - pbtxt file does not exist")
+ endif(NOT EXISTS "${TXT_SOURCE_PBTXT_PATH}")
+
+ if(NOT EXISTS "${TXT_SOURCE_INFO_PATH}")
+ message(FATAL_ERROR "${TXT_SOURCE_INFO_PATH} - info file does not exist")
+ endif(NOT EXISTS "${TXT_SOURCE_INFO_PATH}")
+
+ if(NOT EXISTS "${TXT_SOURCE_RULE_PATH}")
+ message(FATAL_ERROR "${TXT_SOURCE_RULE_PATH} - rule file does not exist")
+ endif(NOT EXISTS "${TXT_SOURCE_RULE_PATH}")
+
+ # Copy .pbtxt
+ add_custom_command(OUTPUT ${TXT_TARGET_PBTXT_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TXT_SOURCE_PBTXT_PATH}" "${TXT_TARGET_PBTXT_PATH}"
+ DEPENDS ${TXT_SOURCE_PBTXT_PATH}
+ COMMENT "Generate ${TXT_TARGET_PBTXT_PATH}"
+ )
+
+ # Copy .info
+ add_custom_command(OUTPUT ${TXT_TARGET_INFO_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TXT_SOURCE_INFO_PATH}" "${TXT_TARGET_INFO_PATH}"
+ DEPENDS ${TXT_SOURCE_INFO_PATH}
+ COMMENT "Generate ${TXT_TARGET_INFO_PATH}"
+ )
+
+ # Copy .rule
+ add_custom_command(OUTPUT ${TXT_TARGET_RULE_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TXT_SOURCE_RULE_PATH}" "${TXT_TARGET_RULE_PATH}"
+ DEPENDS ${TXT_SOURCE_RULE_PATH}
+ COMMENT "Generate ${TXT_TARGET_RULE_PATH}"
+ )
+
+ # Generate .pb from .pbtxt
+ add_custom_command(OUTPUT ${TXT_TARGET_PB_PATH}
+ COMMAND $<TARGET_FILE:tfkit> encode ${TXT_TARGET_PBTXT_PATH} ${TXT_TARGET_PB_PATH}
+ DEPENDS ${TXT_TARGET_PBTXT_PATH}
+ COMMENT "Generate ${TXT_TARGET_PB_PATH}"
+ )
+
+ # Generate .test file which declares path of target pb, info, rule files
+ # this file is used inside runner.sh
+ set(TEST_CONFIG_FILE "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_TESTNAME}.test")
+
+ add_custom_command(
+ OUTPUT ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E echo 'MODEL_PB_PATH="${TXT_TARGET_PB_PATH}"' >> ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E echo 'MODEL_INFO_PATH="${TXT_TARGET_INFO_PATH}"' >> ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E echo 'MODEL_RULE_PATH="${TXT_TARGET_RULE_PATH}"' >> ${TEST_CONFIG_FILE}
+ DEPENDS
+ ${TXT_TARGET_PB_PATH}
+ ${TXT_TARGET_INFO_PATH}
+ ${TXT_TARGET_RULE_PATH}
+ COMMENT "Generate ${TARGET_TESTNAME} configuration for TXT"
+ )
+
+ list(APPEND DEPS "${TEST_CONFIG_FILE}")
+ list(APPEND KEYS "${TARGET_TESTNAME}")
+
+endmacro(Add)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+#
+# Generate toolchain.config
+#
+set(TOOLCHAIN_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/toolchain.config")
+
+add_custom_command(
+ OUTPUT ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TFL_INSPECT_PATH=\"$<TARGET_FILE:tfl-inspect>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TFL_VERIFY_PATH=\"$<TARGET_FILE:tfl-verify>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TF2TFLITE_PATH=\"$<TARGET_FILE:tf2tflite>\"' >> ${TOOLCHAIN_CONFIG}
+ # add more if new excutable file is needed in runner.sh and rule-lib.sh
+ DEPENDS
+ tfl-inspect
+ tfl-verify
+ tf2tflite
+ COMMENT "Generate toolchin configuration"
+)
+
+list(APPEND DEPS "${TOOLCHAIN_CONFIG}")
+
+#
+# copy runner.sh
+#
+set(SOURCE_RUNNER "${CMAKE_CURRENT_SOURCE_DIR}/runner.sh")
+set(TARGET_RUNNER "${CMAKE_CURRENT_BINARY_DIR}/runner.sh")
+
+add_custom_command(
+ OUTPUT ${TARGET_RUNNER}
+ COMMAND ${CMAKE_COMMAND} -E copy "${SOURCE_RUNNER}" "${TARGET_RUNNER}"
+ DEPENDS ${SOURCE_RUNNER}
+ COMMENT "Generate test runner"
+)
+
+list(APPEND DEPS "${TARGET_RUNNER}")
+
+#
+# copy rule-lib.sh (a library of shell script functions)
+#
+
+# getting path for rule-lib.sh in dredd-rule-lib
+get_target_property(DREDD_RULE_LIB_DIR dredd_rule_lib BINARY_DIR)
+
+set(SOURCE_RULE_LIB "${DREDD_RULE_LIB_DIR}/rule-lib.sh")
+set(TARGET_RULE_LIB "${CMAKE_CURRENT_BINARY_DIR}/rule-lib.sh")
+
+add_custom_command(
+ OUTPUT ${TARGET_RULE_LIB}
+ COMMAND ${CMAKE_COMMAND} -E copy "${SOURCE_RULE_LIB}" "${TARGET_RULE_LIB}"
+ DEPENDS ${SOURCE_RULE_LIB}
+ COMMENT "Generate rule lib"
+)
+
+list(APPEND DEPS "${TARGET_RULE_LIB}")
+
+# Generate dependencies
+add_custom_target(tf2tflite_dredd_pbtxt_deps ALL DEPENDS ${DEPS})
+
+add_test(
+ NAME tf2tflite_dredd_pbtxt_test
+ COMMAND
+ "${TARGET_RUNNER}"
+ "${TOOLCHAIN_CONFIG}"
+ ${KEYS}
+)
diff --git a/compiler/tf2tflite-dredd-pbtxt-test/requires.cmake b/compiler/tf2tflite-dredd-pbtxt-test/requires.cmake
new file mode 100644
index 000000000..94fb442af
--- /dev/null
+++ b/compiler/tf2tflite-dredd-pbtxt-test/requires.cmake
@@ -0,0 +1,5 @@
+require("tfkit")
+require("tf2tflite")
+require("tfl-verify")
+require("tfl-inspect")
+require("dredd-rule-lib")
diff --git a/compiler/tf2tflite-dredd-pbtxt-test/runner.sh b/compiler/tf2tflite-dredd-pbtxt-test/runner.sh
new file mode 100755
index 000000000..8575bc282
--- /dev/null
+++ b/compiler/tf2tflite-dredd-pbtxt-test/runner.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+
+# exit if unknown var is used
+set -u
+
+# This script checks tflite file generated by tf2tflite
+
+WORKDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+
+# Need at least toolchain.config
+if [[ $# -lt 1 ]]; then
+ echo "USAGE: $0 ..."
+ echo
+ echo "ARGUMENTS:"
+ echo " [toolchain.config path]"
+ echo " [Prefix1]"
+ echo " [Prefix2]"
+ echo " ..."
+ exit 255
+fi
+
+CONFIG_PATH="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found tfl-inspect: ${TFL_INSPECT_PATH}"
+echo "-- Found tfl-verify: ${TFL_VERIFY_PATH}"
+echo "-- Found tf2tflite: ${TF2TFLITE_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+
+# running each rule file
+
+while [[ $# -ne 0 ]]; do
+ PREFIX="$1"; shift
+
+ echo "[ RUN ] ${PREFIX}"
+
+ TESTED+=("${PREFIX}")
+
+ PASSED_TAG="${PREFIX}.passed"
+
+ rm -f "${PASSED_TAG}"
+
+ cat > "${PREFIX}.log" <(
+ exec 2>&1
+
+ source "${PREFIX}.test"
+
+ echo "-- Use '${MODEL_PB_PATH}', '${MODEL_INFO_PATH}', and '${MODEL_RULE_PATH}'"
+
+ # Exit immediately if any command fails
+ set -e
+ # Show commands
+ set -x
+
+ # Generate tflite
+ "${TF2TFLITE_PATH}" \
+ "${MODEL_INFO_PATH}" \
+ "${MODEL_PB_PATH}" \
+ "${WORKDIR}/${PREFIX}.tflite"
+
+ #
+ # Run rule prepared to check tflite file
+ #
+
+ # set vars needed by rule file
+ TFLITE_PATH="${WORKDIR}/${PREFIX}.tflite"
+
+ # Note: turn off 'command printing'. Otherwise printing will be so messy
+ set +x
+
+ # set vars required by rule-lib.sh and rule file
+ COMPILED_FILE=${TFLITE_PATH}
+ INSPECT_PROG_PATH=${TFL_INSPECT_PATH}
+ VERIFY_PROG_PATH=${TFL_VERIFY_PATH}
+ ERROR_LOG="${PREFIX}.error"
+
+ rm -f "${ERROR_LOG}"
+
+ # in case error while running ${MODEL_RULE_PATH}, prints error msg
+ trap 'echo "** ERROR **" ; cat "${ERROR_LOG}"' ERR
+
+ source rule-lib.sh
+ source "${MODEL_RULE_PATH}"
+
+ # unset
+ trap - ERR
+ set -x
+
+ # At this point, the exit code of all commands is 0
+ # If not 0, execution of this script ends because of "set -e"
+ touch "${PASSED_TAG}"
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ echo "[ OK ] ${PREFIX}"
+ PASSED+=("$PREFIX")
+ else
+ echo "[ FAIL] ${PREFIX}"
+ FAILED+=("$PREFIX")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/tf2tflite-dredd-pbtxt-test/test.lst b/compiler/tf2tflite-dredd-pbtxt-test/test.lst
new file mode 100644
index 000000000..2fed541d4
--- /dev/null
+++ b/compiler/tf2tflite-dredd-pbtxt-test/test.lst
@@ -0,0 +1 @@
+Add(NET_0025 RULE tflite_1.0_rel_requirement.rule) # CBR
diff --git a/compiler/tf2tflite-value-pb-test/.gitignore b/compiler/tf2tflite-value-pb-test/.gitignore
new file mode 100644
index 000000000..23c7c1bb3
--- /dev/null
+++ b/compiler/tf2tflite-value-pb-test/.gitignore
@@ -0,0 +1 @@
+/contrib.lst
diff --git a/compiler/tf2tflite-value-pb-test/CMakeLists.txt b/compiler/tf2tflite-value-pb-test/CMakeLists.txt
new file mode 100644
index 000000000..41974f72c
--- /dev/null
+++ b/compiler/tf2tflite-value-pb-test/CMakeLists.txt
@@ -0,0 +1,131 @@
+nnas_include(TargetRequire)
+
+unset(REQUIRED_TARGETS)
+list(APPEND REQUIRED_TARGETS tf2tflite)
+list(APPEND REQUIRED_TARGETS tfkit)
+list(APPEND REQUIRED_TARGETS nnkit-run)
+list(APPEND REQUIRED_TARGETS nnkit_tf_backend)
+list(APPEND REQUIRED_TARGETS nnkit_tflite_backend)
+list(APPEND REQUIRED_TARGETS nnkit_randomize_action)
+list(APPEND REQUIRED_TARGETS nnkit_HDF5_export_action)
+list(APPEND REQUIRED_TARGETS nnkit_HDF5_import_action)
+list(APPEND REQUIRED_TARGETS i5diff)
+TargetRequire_Return(${REQUIRED_TARGETS})
+
+unset(KEYS)
+unset(DEPS)
+
+###
+### Add "Contrib" tests
+###
+macro(Add PREFIX)
+ # Let's use CONTRIB prefix to avoid name conflicts with official models
+ set(TEST_KEY "CONTRIB.${PREFIX}")
+
+ set(PACKAGE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/contrib/${PREFIX}")
+
+ set(MODEL_DOWNLOAD_SCRIPT "${PACKAGE_DIR}/model.download")
+ set(MODEL_PB_FILE "${PACKAGE_DIR}/model.pb")
+ set(MODEL_INFO_FILE "${PACKAGE_DIR}/model.info")
+ set(MODEL_MD5SUM_FILE "${PACKAGE_DIR}/model.md5sum")
+
+ # Try to download a model if it is missing
+ if(NOT EXISTS "${MODEL_PB_FILE}")
+ # TODO Extract this routine as a helper function
+ if(NOT EXISTS "${MODEL_DOWNLOAD_SCRIPT}")
+ message(FATAL_ERROR "${TEST_KEY} - Download script is missing")
+ endif(NOT EXISTS "${MODEL_DOWNLOAD_SCRIPT}")
+
+ execute_process(
+ COMMAND ${CMAKE_COMMAND} -D OUTPUT_PATH=${MODEL_PB_FILE} -P "${MODEL_DOWNLOAD_SCRIPT}"
+ RESULT_VARIABLE EXITCODE
+ )
+
+ if(NOT EXITCODE EQUAL 0)
+ message(FATAL_ERROR "${TEST_KEY} - Download fails")
+ endif(NOT EXITCODE EQUAL 0)
+ endif()
+
+ if(EXISTS "${MODEL_MD5SUM_FILE}")
+ # TODO Extract this routine as a helper function
+ file(STRINGS "${MODEL_MD5SUM_FILE}" EXPECTED_MD5SUM)
+ file(MD5 "${MODEL_PB_FILE}" OBTAINED_MD5SUM)
+
+ if(NOT "${EXPECTED_MD5SUM}" STREQUAL "${OBTAINED_MD5SUM}")
+ message(FATAL_ERROR "${TEST_KEY} - Checksum mismatches")
+ endif()
+ endif()
+
+ # Generate .test file which declares MODEL_PB_PATH and MODEL_INFO_PATH
+ set(TEST_CONFIG_FILE "${CMAKE_CURRENT_BINARY_DIR}/${TEST_KEY}.test")
+
+ add_custom_command(
+ OUTPUT ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E echo 'MODEL_PB_PATH="${MODEL_PB_FILE}"' >> ${TEST_CONFIG_FILE}
+ COMMAND ${CMAKE_COMMAND} -E echo 'MODEL_INFO_PATH="${MODEL_INFO_FILE}"' >> ${TEST_CONFIG_FILE}
+ COMMENT "Generate ${TEST_KEY} configuration"
+ )
+
+ list(APPEND KEYS "${TEST_KEY}")
+ list(APPEND DEPS "${TEST_CONFIG_FILE}")
+endmacro(Add)
+
+include(contrib.lst OPTIONAL)
+
+###
+### Generate toolchain.config
+###
+set(TOOLCHAIN_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/toolchain.config")
+
+add_custom_command(
+ OUTPUT ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'NNKIT_RUN_PATH=\"$<TARGET_FILE:nnkit-run>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TF_BACKEND_PATH=\"$<TARGET_FILE:nnkit_tf_backend>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TFLITE_BACKEND_PATH=\"$<TARGET_FILE:nnkit_tflite_backend>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TF2TFLITE_PATH=\"$<TARGET_FILE:tf2tflite>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'RANDOMIZE_ACTION_PATH=\"$<TARGET_FILE:nnkit_randomize_action>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_EXPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_export_action>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_IMPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_import_action>\"' >> ${TOOLCHAIN_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'I5DIFF_PATH=\"$<TARGET_FILE:i5diff>\"' >> ${TOOLCHAIN_CONFIG}
+ DEPENDS
+ nnkit-run
+ nnkit_tf_backend
+ nnkit_tflite_backend
+ tf2tflite
+ nnkit_randomize_action
+ nnkit_HDF5_export_action
+ nnkit_HDF5_import_action
+ i5diff
+ COMMENT "Generate toolchin configuration"
+)
+
+list(APPEND DEPS "${TOOLCHAIN_CONFIG}")
+
+##
+## Generate test runner
+##
+set(TEST_RUNNER_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/runner.sh")
+set(TEST_RUNNER_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/runner.sh")
+
+add_custom_command(
+ OUTPUT ${TEST_RUNNER_SCRIPT}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TEST_RUNNER_SOURCE}" "${TEST_RUNNER_SCRIPT}"
+ DEPENDS ${TEST_RUNNER_SOURCE}
+ COMMENT "Generate test runner"
+)
+
+list(APPEND DEPS "${TEST_RUNNER_SCRIPT}")
+
+### Generate dependencies
+add_custom_target(tf2tflite_value_pb_test_deps ALL DEPENDS ${DEPS})
+
+# NOTE This target is not built by default
+add_test(
+ NAME tf2tflite_value_pb_test
+ COMMAND
+ "${TEST_RUNNER_SCRIPT}"
+ "${TOOLCHAIN_CONFIG}"
+ ${KEYS}
+)
diff --git a/compiler/tf2tflite-value-pb-test/README.md b/compiler/tf2tflite-value-pb-test/README.md
new file mode 100644
index 000000000..6c4a68cc5
--- /dev/null
+++ b/compiler/tf2tflite-value-pb-test/README.md
@@ -0,0 +1 @@
+# tf2tflite-value-pb-test
diff --git a/compiler/tf2tflite-value-pb-test/contrib/.gitignore b/compiler/tf2tflite-value-pb-test/contrib/.gitignore
new file mode 100644
index 000000000..968c34510
--- /dev/null
+++ b/compiler/tf2tflite-value-pb-test/contrib/.gitignore
@@ -0,0 +1,3 @@
+/*
+# Exclude all except below
+!.gitignore
diff --git a/compiler/tf2tflite-value-pb-test/requires.cmake b/compiler/tf2tflite-value-pb-test/requires.cmake
new file mode 100644
index 000000000..471025e5e
--- /dev/null
+++ b/compiler/tf2tflite-value-pb-test/requires.cmake
@@ -0,0 +1,6 @@
+require("i5diff")
+require("nnkit-tf")
+require("nnkit-tflite")
+require("nnkit")
+require("tf2tflite")
+require("tfkit")
diff --git a/compiler/tf2tflite-value-pb-test/runner.sh b/compiler/tf2tflite-value-pb-test/runner.sh
new file mode 100755
index 000000000..faec12521
--- /dev/null
+++ b/compiler/tf2tflite-value-pb-test/runner.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+
+WORKDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+
+# Need at least toolchain.config
+if [[ $# -lt 1 ]]; then
+ echo "USAGE: $0 ..."
+ echo
+ echo "ARGUMENTS:"
+ echo " [toolchain.config path]"
+ echo " [Prefix1]"
+ echo " [Prefix2]"
+ echo " ..."
+ exit 255
+fi
+
+CONFIG_PATH="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found nnkit-run: ${NNKIT_RUN_PATH}"
+echo "-- Found TF backend: ${TF_BACKEND_PATH}"
+echo "-- Found TFLITE backend: ${TFLITE_BACKEND_PATH}"
+echo "-- Found TF2TFLITE: ${TF2TFLITE_PATH}"
+echo "-- Found randomize action: ${RANDOMIZE_ACTION_PATH}"
+echo "-- Found HDF5 export action: ${HDF5_EXPORT_ACTION_PATH}"
+echo "-- Found HDF5 import action: ${HDF5_IMPORT_ACTION_PATH}"
+echo "-- Found i5diff: ${I5DIFF_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [[ $# -ne 0 ]]; do
+ PREFIX="$1"; shift
+
+ echo "[ RUN ] ${PREFIX}"
+
+ TESTED+=("${PREFIX}")
+
+ PASSED_TAG="${PREFIX}.passed"
+
+ rm -f "${PASSED_TAG}"
+
+ cat > "${PREFIX}.log" <(
+ exec 2>&1
+
+ source "${PREFIX}.test"
+
+ echo "-- Use '${MODEL_PB_PATH}' and '${MODEL_INFO_PATH}'"
+
+ # Exit immediately if any command fails
+ set -e
+ # Show commands
+ set -x
+
+ # Generate tflite
+ "${TF2TFLITE_PATH}" \
+ "${MODEL_INFO_PATH}" \
+ "${MODEL_PB_PATH}" \
+ "${WORKDIR}/${PREFIX}.tflite"
+
+ # Run TensorFlow
+ "${NNKIT_RUN_PATH}" \
+ --backend "${TF_BACKEND_PATH}" \
+ --backend-arg "${MODEL_PB_PATH}" \
+ --backend-arg "${MODEL_INFO_PATH}" \
+ --pre "${RANDOMIZE_ACTION_PATH}" \
+ --pre "${HDF5_EXPORT_ACTION_PATH}" \
+ --pre-arg "${WORKDIR}/${PREFIX}.input.h5" \
+ --post "${HDF5_EXPORT_ACTION_PATH}" \
+ --post-arg "${WORKDIR}/${PREFIX}.expected.h5"
+
+ # Run TensorFlow Lite
+ "${NNKIT_RUN_PATH}" \
+ --backend "${TFLITE_BACKEND_PATH}" \
+ --backend-arg "${WORKDIR}/${PREFIX}.tflite" \
+ --pre "${HDF5_IMPORT_ACTION_PATH}" \
+ --pre-arg "${WORKDIR}/${PREFIX}.input.h5" \
+ --post "${HDF5_EXPORT_ACTION_PATH}" \
+ --post-arg "${WORKDIR}/${PREFIX}.obtained.h5"
+
+ "${I5DIFF_PATH}" -d 0.001 "${PREFIX}.expected.h5" "${PREFIX}.obtained.h5"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ echo "[ OK ] ${PREFIX}"
+ PASSED+=("$PREFIX")
+ else
+ echo "[ FAIL] ${PREFIX}"
+ FAILED+=("$PREFIX")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/tf2tflite-value-pbtxt-test/.gitignore b/compiler/tf2tflite-value-pbtxt-test/.gitignore
new file mode 100644
index 000000000..8dbfa9012
--- /dev/null
+++ b/compiler/tf2tflite-value-pbtxt-test/.gitignore
@@ -0,0 +1 @@
+/test.local.lst
diff --git a/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt b/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt
new file mode 100644
index 000000000..2e76e21d3
--- /dev/null
+++ b/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt
@@ -0,0 +1,159 @@
+nnas_include(TargetRequire)
+
+unset(REQUIRED_TARGETS)
+list(APPEND REQUIRED_TARGETS tf2tflite)
+list(APPEND REQUIRED_TARGETS tfkit)
+list(APPEND REQUIRED_TARGETS nnkit-run)
+list(APPEND REQUIRED_TARGETS nnkit_tf_backend)
+list(APPEND REQUIRED_TARGETS nnkit_tflite_backend)
+list(APPEND REQUIRED_TARGETS nnkit_randomize_action)
+list(APPEND REQUIRED_TARGETS nnkit_HDF5_export_action)
+list(APPEND REQUIRED_TARGETS nnkit_HDF5_import_action)
+list(APPEND REQUIRED_TARGETS i5diff)
+TargetRequire_Return(${REQUIRED_TARGETS})
+
+message(STATUS "tf2tflite-value-pbtxt-test: run tests")
+
+nncc_find_resource(TensorFlowTests)
+
+#
+# Copy [PREFIX]/test.pbtxt to PREFIX.pbtxt in binary folder
+# Copy [PREFIX]/test.info to PREFIX.info in binary folder
+# Copy [PREFIX]/customop.conf to PREFIX_customop.conf in binary folder
+# Encode PREFIX.pbtxt to PREFIX.pb
+#
+set(TEST_REPO "${TensorFlowTests_DIR}")
+set(TEST_PBTXT_FILENAME "test.pbtxt")
+set(TEST_INFO_FILENAME "test.info")
+set(TEST_CUSTOMOP_CONF_FILENAME "customop.conf")
+
+unset(TESTCASES)
+
+macro(add NAME)
+ list(APPEND TESTCASES ${NAME})
+endmacro(add)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+unset(TEST_DEPS)
+unset(TEST_NAMES)
+
+foreach(PREFIX IN ITEMS ${TESTCASES})
+ if(NOT IS_DIRECTORY "${TEST_REPO}/${PREFIX}")
+ message(FATAL_ERROR "Missing '${PREFIX}' test")
+ endif()
+
+ set(PBTXT_SOURCE_PATH "${TEST_REPO}/${PREFIX}/${TEST_PBTXT_FILENAME}")
+ set(INFO_SOURCE_PATH "${TEST_REPO}/${PREFIX}/${TEST_INFO_FILENAME}")
+ set(CUSTOMOP_CONF_SOURCE_PATH "${TEST_REPO}/${PREFIX}/${TEST_CUSTOMOP_CONF_FILENAME}")
+
+ set(PBTXT_FILE "${PREFIX}.pbtxt")
+ set(PBTXT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PBTXT_FILE}")
+
+ set(INFO_FILE "${PREFIX}.info")
+ set(INFO_PATH "${CMAKE_CURRENT_BINARY_DIR}/${INFO_FILE}")
+
+ set(CUSTOMOP_CONF_FILE "${PREFIX}.${TEST_CUSTOMOP_CONF_FILENAME}") # ex) CustomOp_001.customop.conf
+ set(CUSTOMOP_CONF_PATH "${CMAKE_CURRENT_BINARY_DIR}/${CUSTOMOP_CONF_FILE}")
+
+ set(PB_FILE "${PREFIX}.pb")
+ set(PB_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PB_FILE}")
+
+ # Copy .pbtxt
+ add_custom_command(OUTPUT ${PBTXT_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${PBTXT_SOURCE_PATH}" "${PBTXT_PATH}"
+ DEPENDS ${PBTXT_SOURCE_PATH}
+ COMMENT "Generate ${PBTXT_FILE}"
+ )
+
+ # Copy .info
+ add_custom_command(OUTPUT ${INFO_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${INFO_SOURCE_PATH}" "${INFO_PATH}"
+ DEPENDS ${INFO_SOURCE_PATH}
+ COMMENT "Generate ${INFO_FILE}"
+ )
+
+ # Generate .pb from .pbtxt
+ add_custom_command(OUTPUT ${PB_PATH}
+ COMMAND $<TARGET_FILE:tfkit> encode ${PBTXT_PATH} ${PB_PATH}
+ DEPENDS ${PBTXT_PATH}
+ COMMENT "Generate ${PB_FILE}"
+ )
+
+ list(APPEND TEST_DEPS ${INFO_PATH} ${PB_PATH})
+
+ if (EXISTS "${CUSTOMOP_CONF_SOURCE_PATH}")
+
+ # Copy customop.conf
+ add_custom_command(OUTPUT ${CUSTOMOP_CONF_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${CUSTOMOP_CONF_SOURCE_PATH}" "${CUSTOMOP_CONF_PATH}"
+ DEPENDS ${CUSTOMOP_CONF_SOURCE_PATH}
+ COMMENT "Generate ${CUSTOMOP_CONF_FILE}"
+ )
+
+ list(APPEND TEST_DEPS ${CUSTOMOP_CONF_PATH})
+
+ endif (EXISTS "${CUSTOMOP_CONF_SOURCE_PATH}")
+
+ list(APPEND TEST_NAMES ${PREFIX})
+endforeach(PREFIX)
+
+##
+## Copy testall
+##
+set(TEST_RUNNER "${CMAKE_CURRENT_BINARY_DIR}/testall.sh")
+set(TEST_RUNNER_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh")
+
+add_custom_command(
+ OUTPUT ${TEST_RUNNER}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TEST_RUNNER_SOURCE}" "${TEST_RUNNER}"
+ DEPENDS ${TEST_RUNNER_SOURCE}
+ COMMENT "Generate test runner"
+)
+
+list(APPEND TEST_DEPS "${TEST_RUNNER}")
+
+###
+### Generate test.config
+###
+set(TEST_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/test.config")
+
+add_custom_command(
+ OUTPUT ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'NNKIT_RUN_PATH=\"$<TARGET_FILE:nnkit-run>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TF_BACKEND_PATH=\"$<TARGET_FILE:nnkit_tf_backend>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TFLITE_BACKEND_PATH=\"$<TARGET_FILE:nnkit_tflite_backend>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TF2TFLITE_PATH=\"$<TARGET_FILE:tf2tflite>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'RANDOMIZE_ACTION_PATH=\"$<TARGET_FILE:nnkit_randomize_action>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_EXPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_export_action>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_IMPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_import_action>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'I5DIFF_PATH=\"$<TARGET_FILE:i5diff>\"' >> ${TEST_CONFIG}
+ DEPENDS
+ nnkit-run
+ nnkit_tf_backend
+ nnkit_tflite_backend
+ tf2tflite
+ nnkit_randomize_action
+ nnkit_HDF5_export_action
+ nnkit_HDF5_import_action
+ i5diff
+ COMMENT "Generate test configuration"
+)
+
+list(APPEND TEST_DEPS "${TEST_CONFIG}")
+
+# This "tf2tflite_value_pbtxt_test_deps" target enforces CMake to generate all the dependencies during "build" phase
+add_custom_target(tf2tflite_value_pbtxt_test_deps ALL DEPENDS ${TEST_DEPS})
+
+# Run tests
+add_test(
+ NAME tf2tflite_value_pbtxt_test
+ COMMAND "${TEST_RUNNER}"
+ "${TEST_CONFIG}"
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ ${TEST_NAMES}
+)
diff --git a/compiler/tf2tflite-value-pbtxt-test/README.md b/compiler/tf2tflite-value-pbtxt-test/README.md
new file mode 100644
index 000000000..30429549a
--- /dev/null
+++ b/compiler/tf2tflite-value-pbtxt-test/README.md
@@ -0,0 +1,3 @@
+# tf2tflite-value-pbtxt-test
+
+Run `tf2tflite` to `test.lst` and do random value test using `nnkit`. Write `test.local.lst` for local test list.
diff --git a/compiler/tf2tflite-value-pbtxt-test/requires.cmake b/compiler/tf2tflite-value-pbtxt-test/requires.cmake
new file mode 100644
index 000000000..bb783e254
--- /dev/null
+++ b/compiler/tf2tflite-value-pbtxt-test/requires.cmake
@@ -0,0 +1,4 @@
+require("tf2tflite")
+require("nnkit")
+require("tfkit")
+require("i5diff")
diff --git a/compiler/tf2tflite-value-pbtxt-test/test.lst b/compiler/tf2tflite-value-pbtxt-test/test.lst
new file mode 100644
index 000000000..419918b26
--- /dev/null
+++ b/compiler/tf2tflite-value-pbtxt-test/test.lst
@@ -0,0 +1,101 @@
+# TODO Enable skipped tests
+
+add(NET_0000)
+add(NET_0001)
+add(NET_0002)
+add(NET_0003)
+add(NET_0004)
+add(NET_0005)
+add(NET_0006)
+add(NET_0007)
+add(NET_0008)
+add(NET_0009)
+add(NET_0010)
+add(NET_0011)
+add(NET_0012)
+add(NET_0013)
+add(NET_0014)
+add(NET_0015)
+add(NET_0016)
+add(NET_0017)
+add(NET_0018)
+add(NET_0019)
+add(NET_0020)
+add(NET_0021)
+add(NET_0022)
+add(NET_0023)
+add(NET_0024)
+add(NET_0025)
+add(NET_0028)
+add(NET_0029)
+add(NET_0030)
+add(NET_0031)
+add(NET_0032)
+add(NET_0033)
+add(NET_0034)
+add(NET_0035)
+add(NET_0036)
+add(NET_0037)
+add(NET_0038)
+add(NET_0039)
+add(NET_0040)
+add(NET_0041)
+add(REGRESSION_0000)
+add(REGRESSION_0001)
+add(REGRESSION_0002)
+add(UNIT_Add_000)
+add(UNIT_Add_001)
+add(UNIT_Add_002)
+add(UNIT_Add_004)
+add(UNIT_Add_005)
+add(UNIT_AvgPool_000)
+add(UNIT_AvgPool_001)
+#add(UNIT_BiasAdd_000)
+#add(UNIT_BiasAdd_001)
+add(UNIT_BiasAdd_002)
+#add(UNIT_ConcatV2_000)
+#add(UNIT_ConcatV2_001)
+add(UNIT_ConcatV2_002)
+add(UNIT_Const_000)
+#add(UNIT_Const_001)
+add(UNIT_Conv2D_000)
+add(UNIT_Conv2DBackpropInput_000)
+add(UNIT_Conv2DBackpropInput_001)
+add(UNIT_DepthwiseConv2dNative_000)
+add(UNIT_DepthwiseConv2dNative_001)
+add(UNIT_Maximum_000)
+add(UNIT_Maximum_001)
+add(UNIT_Maximum_002)
+add(UNIT_MaxPool_000)
+add(UNIT_MaxPool_001)
+add(UNIT_Mean_000)
+add(UNIT_Mean_001)
+add(UNIT_Mean_002)
+add(UNIT_Mean_003)
+add(UNIT_Mul_000)
+add(UNIT_Mul_001)
+add(UNIT_Mul_002)
+add(UNIT_Pad_000)
+add(UNIT_Placeholder_000)
+add(UNIT_Placeholder_001)
+add(UNIT_Placeholder_002)
+add(UNIT_Placeholder_003)
+add(UNIT_RealDiv_000)
+add(UNIT_RealDiv_001)
+add(UNIT_Relu_000)
+add(UNIT_Relu6_000)
+add(UNIT_Reshape_000)
+add(UNIT_Rsqrt_000)
+add(UNIT_Softmax_001)
+add(UNIT_Sqrt_000)
+add(UNIT_SquaredDifference_000)
+add(UNIT_SquaredDifference_001)
+add(UNIT_Squeeze_000)
+add(UNIT_Squeeze_001)
+add(UNIT_Squeeze_002)
+add(UNIT_Squeeze_003)
+add(UNIT_StopGradient_000)
+add(UNIT_StopGradient_001)
+add(UNIT_Sub_000)
+add(UNIT_Sub_001)
+add(UNIT_Tanh_000)
diff --git a/compiler/tf2tflite-value-pbtxt-test/testall.sh b/compiler/tf2tflite-value-pbtxt-test/testall.sh
new file mode 100755
index 000000000..ff18b6058
--- /dev/null
+++ b/compiler/tf2tflite-value-pbtxt-test/testall.sh
@@ -0,0 +1,106 @@
+#!/bin/bash
+
+# Need at least 2 arguments
+if [[ $# -lt 2 ]]; then
+ echo "USAGE: $0 ..."
+ echo
+ echo "ARGUMENTS:"
+ echo " [test.config path]"
+ echo " [WORKDIR]"
+ echo " [Prefix1]"
+ echo " [Prefix2]"
+ echo " ..."
+ exit 255
+fi
+
+CONFIG_PATH="$1"; shift
+WORKDIR="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found nnkit-run: ${NNKIT_RUN_PATH}"
+echo "-- Found TF backend: ${TF_BACKEND_PATH}"
+echo "-- Found TFLITE backend: ${TFLITE_BACKEND_PATH}"
+echo "-- Found TF2TFLITE: ${TF2TFLITE_PATH}"
+echo "-- Found randomize action: ${RANDOMIZE_ACTION_PATH}"
+echo "-- Found HDF5 export action: ${HDF5_EXPORT_ACTION_PATH}"
+echo "-- Found HDF5 import action: ${HDF5_IMPORT_ACTION_PATH}"
+echo "-- Found i5diff: ${I5DIFF_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [[ $# -ne 0 ]]; do
+ PREFIX="$1"; shift
+
+ TESTED+=("${PREFIX}")
+
+ PASSED_TAG="${PREFIX}.passed"
+
+ rm -f "${PASSED_TAG}"
+
+ cat > "${PREFIX}.log" <(
+ exec 2>&1
+
+ echo "-- Found pb: ${PREFIX}.pb"
+
+ # Exit immediately if any command fails
+ set -e
+ # Show commands
+ set -x
+
+ # Generate tflite
+ "${TF2TFLITE_PATH}" \
+ "${WORKDIR}/${PREFIX}.info" \
+ "${WORKDIR}/${PREFIX}.pb" \
+ "${WORKDIR}/${PREFIX}.tflite"
+
+ # Run TensorFlow
+ "${NNKIT_RUN_PATH}" \
+ --backend "${TF_BACKEND_PATH}" \
+ --backend-arg "${WORKDIR}/${PREFIX}.pb" \
+ --backend-arg "${WORKDIR}/${PREFIX}.info" \
+ --pre "${RANDOMIZE_ACTION_PATH}" \
+ --pre "${HDF5_EXPORT_ACTION_PATH}" \
+ --pre-arg "${WORKDIR}/${PREFIX}.input.h5" \
+ --post "${HDF5_EXPORT_ACTION_PATH}" \
+ --post-arg "${WORKDIR}/${PREFIX}.expected.h5"
+
+ # Run TensorFlow Lite
+ "${NNKIT_RUN_PATH}" \
+ --backend "${TFLITE_BACKEND_PATH}" \
+ --backend-arg "${WORKDIR}/${PREFIX}.tflite" \
+ --pre "${HDF5_IMPORT_ACTION_PATH}" \
+ --pre-arg "${WORKDIR}/${PREFIX}.input.h5" \
+ --post "${HDF5_EXPORT_ACTION_PATH}" \
+ --post-arg "${WORKDIR}/${PREFIX}.obtained.h5"
+
+ "${I5DIFF_PATH}" -d 0.001 "${PREFIX}.expected.h5" "${PREFIX}.obtained.h5"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("$PREFIX")
+ else
+ FAILED+=("$PREFIX")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/tf2tflite/.gitignore b/compiler/tf2tflite/.gitignore
new file mode 100644
index 000000000..8dbfa9012
--- /dev/null
+++ b/compiler/tf2tflite/.gitignore
@@ -0,0 +1 @@
+/test.local.lst
diff --git a/compiler/tf2tflite/CMakeLists.txt b/compiler/tf2tflite/CMakeLists.txt
new file mode 100644
index 000000000..663563e00
--- /dev/null
+++ b/compiler/tf2tflite/CMakeLists.txt
@@ -0,0 +1,44 @@
+# TODO Allow users to force tf2tflite build
+if(NOT TARGET moco_tf_frontend)
+ return()
+endif(NOT TARGET moco_tf_frontend)
+
+if(NOT TARGET tfinfo)
+ return()
+endif(NOT TARGET tfinfo)
+
+if(NOT TARGET exo)
+ return()
+endif(NOT TARGET exo)
+
+nnas_find_package(Protobuf QUIET)
+
+if(NOT Protobuf_FOUND)
+ return()
+endif(NOT Protobuf_FOUND)
+
+# generating and building schema for customop.conf
+Protobuf_Generate(CUSTOMOP_INFO_PROTO
+ "${CMAKE_CURRENT_BINARY_DIR}/generated"
+ "./proto"
+ CustomOpInfo.proto)
+
+add_library(tf2tflite_customop_info_proto SHARED ${CUSTOMOP_INFO_PROTO_SOURCES})
+set_target_properties(tf2tflite_customop_info_proto PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(tf2tflite_customop_info_proto PUBLIC ${CUSTOMOP_INFO_PROTO_INCLUDE_DIRS})
+target_link_libraries(tf2tflite_customop_info_proto PUBLIC libprotobuf)
+install(TARGETS tf2tflite_customop_info_proto DESTINATION lib)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(tf2tflite ${SOURCES})
+target_link_libraries(tf2tflite PRIVATE moco_log)
+target_link_libraries(tf2tflite PRIVATE moco_tf_frontend)
+target_link_libraries(tf2tflite PRIVATE tfinfo)
+target_link_libraries(tf2tflite PRIVATE exo)
+target_link_libraries(tf2tflite PRIVATE locop)
+target_link_libraries(tf2tflite PRIVATE hermes_std)
+target_link_libraries(tf2tflite PRIVATE stdex)
+target_link_libraries(tf2tflite PRIVATE angkor cwrap)
+target_link_libraries(tf2tflite PRIVATE tf2tflite_customop_info_proto)
+install(TARGETS tf2tflite DESTINATION bin)
diff --git a/compiler/tf2tflite/README.md b/compiler/tf2tflite/README.md
new file mode 100644
index 000000000..64f4fb7c6
--- /dev/null
+++ b/compiler/tf2tflite/README.md
@@ -0,0 +1,3 @@
+# tf2tflite
+
+_tf2tflite_ is a TensorFlow-to-TensorFlow Lite model converter.
diff --git a/compiler/tf2tflite/proto/CustomOpInfo.proto b/compiler/tf2tflite/proto/CustomOpInfo.proto
new file mode 100644
index 000000000..b8c4d1665
--- /dev/null
+++ b/compiler/tf2tflite/proto/CustomOpInfo.proto
@@ -0,0 +1,57 @@
+syntax = "proto3";
+
+package tf2tflite;
+option cc_enable_arenas = true;
+
+ /* example of prototxt file
+ custom_op {
+ name: "my/customOp/000"
+ op: "new_custom_op"
+ attr {
+ key: "output_shape"
+ value {
+ shape {
+ dim { size: 1 }
+ dim { size: 2 }
+ dim { size: 1 }
+ dim { size: 2 }
+ }
+ }
+ }
+ }
+*/
+
+enum DataType {
+ // Not a legal value for DataType. Used to indicate a DataType field
+ // has not been set.
+ DT_INVALID = 0;
+
+ DT_FLOAT = 1;
+ DT_INT32 = 15; // Set to 15, considering possibility for reordering. 10 for INT, 10+N for INT 2^N
+ // TODO Support more types
+}
+
+message ShapeProto {
+ message Dim {
+ int64 size = 1; // tensorflow uses int64
+ };
+
+ repeated Dim dim = 2;
+}
+
+message AttrValue {
+ oneof value {
+ ShapeProto shape = 1;
+ DataType type = 2;
+ }
+}
+
+message CustomOpDef {
+ string name = 1;
+ string op = 2;
+ map<string, AttrValue> attr = 3;
+}
+
+message CustomOpInfoDef {
+ repeated CustomOpDef custom_op = 1;
+}
diff --git a/compiler/tf2tflite/requires.cmake b/compiler/tf2tflite/requires.cmake
new file mode 100644
index 000000000..68d45bf3a
--- /dev/null
+++ b/compiler/tf2tflite/requires.cmake
@@ -0,0 +1,8 @@
+require("stdex")
+require("hermes-std")
+require("moco-tf")
+require("exo")
+require("locop")
+require("loco")
+require("cwrap")
+require("angkor")
diff --git a/compiler/tf2tflite/src/CustomopConfLoader.cpp b/compiler/tf2tflite/src/CustomopConfLoader.cpp
new file mode 100644
index 000000000..7399a432a
--- /dev/null
+++ b/compiler/tf2tflite/src/CustomopConfLoader.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CustomopConfLoader.h"
+
+#include <loco.h>
+#include <cwrap/Fildes.h>
+#include <angkor/TensorShape.h>
+
+#include <CustomOpInfo.pb.h>
+
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+#include <fcntl.h>
+
+namespace
+{
+bool load_text(const cwrap::Fildes &fildes, tf2tflite::CustomOpInfoDef &def)
+{
+ google::protobuf::io::FileInputStream fis(fildes.get());
+
+ return google::protobuf::TextFormat::Parse(&fis, &def);
+}
+
+angkor::TensorShape convert_shape(const tf2tflite::ShapeProto &shape)
+{
+ angkor::TensorShape to_shape;
+
+ int64_t rank64 = shape.dim_size();
+ assert(rank64 < std::numeric_limits<uint32_t>::max());
+
+ int32_t rank = static_cast<int32_t>(rank64);
+ to_shape.resize(rank);
+
+ for (int32_t d = 0; d < rank; d++)
+ {
+ int64_t dim_value = shape.dim(d).size();
+ assert(dim_value >= 0ULL);
+ assert(dim_value < std::numeric_limits<uint32_t>::max());
+
+ uint32_t dim_value32 = static_cast<uint32_t>(dim_value);
+ to_shape.dim(d) = dim_value32;
+ }
+
+ return to_shape;
+}
+
+loco::DataType convert_dtype(const tf2tflite::DataType &dtype)
+{
+ if (dtype == tf2tflite::DT_FLOAT)
+ return loco::DataType::FLOAT32;
+ else if (dtype == tf2tflite::DT_INT32)
+ return loco::DataType::S32;
+ else
+ throw std::runtime_error("Not yet supported datatype. Cannot convert.");
+}
+
+// Note : the following functions look similar with plier::tf::Convert.h.
+// However, the schema is different.(not "tensorflow::..." but "tf2tflite::...")
+// So, plier::tf cannot be used.
+loco::DataType get_dtype_attr(const tf2tflite::CustomOpDef &custom_op)
+{
+ std::string type_attr_name("dtype");
+
+ assert(custom_op.attr().count(type_attr_name) > 0);
+ const auto &attr = custom_op.attr().at(type_attr_name);
+ assert(attr.value_case() == tf2tflite::AttrValue::kType);
+ auto dtype_def = attr.type();
+
+ return convert_dtype(dtype_def);
+}
+
+angkor::TensorShape get_shape_attr(const tf2tflite::CustomOpDef &custom_op)
+{
+ std::string shape_attr_name("output_shape");
+
+ assert(custom_op.attr().count(shape_attr_name) > 0);
+ const auto &attr = custom_op.attr().at(shape_attr_name);
+ assert(attr.value_case() == tf2tflite::AttrValue::kShape);
+ auto shape_def = attr.shape();
+
+ return convert_shape(shape_def);
+}
+
+void add_customop(tf2tflite::CustomOpInfoDef &def, moco::ModelSignature &sig)
+{
+ for (const auto &custom_op : def.custom_op())
+ {
+ sig.add_customop(custom_op.op());
+
+ auto name = custom_op.name();
+
+ // setting dtype and shape to ModelSignature
+ sig.dtype(name, get_dtype_attr(custom_op));
+ sig.shape(name, get_shape_attr(custom_op));
+ }
+}
+
+} // namespace
+
+namespace tf2tflite
+{
+
+void load_customop_conf(const std::string &path, moco::ModelSignature &sig)
+{
+ CustomOpInfoDef def;
+
+ cwrap::Fildes fildes{open(path.c_str(), O_RDONLY)};
+
+ if (fildes.get() < 0)
+ {
+ throw std::runtime_error{"Error: " + path + " not found"};
+ }
+
+ if (!load_text(fildes, def))
+ {
+ throw std::runtime_error{"Error: Failed to parse prototxt " + path};
+ }
+
+ add_customop(def, sig);
+}
+
+} // namespace tf2tflite
diff --git a/compiler/tf2tflite/src/CustomopConfLoader.h b/compiler/tf2tflite/src/CustomopConfLoader.h
new file mode 100644
index 000000000..4f5f3a6cb
--- /dev/null
+++ b/compiler/tf2tflite/src/CustomopConfLoader.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CUSTOMOP_CONF_LOADER_H__
+#define __CUSTOMOP_CONF_LOADER_H__
+
+#include <moco/tf/Frontend.h>
+
+#include <string>
+
+namespace tf2tflite
+{
+
+/// @brief Loads customop.conf into ModelSignature
+void load_customop_conf(const std::string &path, moco::ModelSignature &sig);
+
+} // namespace tf2tflite
+
+#endif // __CUSTOMOP_CONF_LOADER_H__
diff --git a/compiler/tf2tflite/src/Driver.cpp b/compiler/tf2tflite/src/Driver.cpp
new file mode 100644
index 000000000..e43d30bb2
--- /dev/null
+++ b/compiler/tf2tflite/src/Driver.cpp
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CustomopConfLoader.h"
+
+#include <moco/LoggingContext.h>
+#include <moco/tf/Frontend.h>
+#include <exo/LoggingContext.h>
+#include <exo/TFLExporter.h>
+
+#include <nnkit/support/tftestinfo/TensorInfoParser.h>
+
+#include <locop/FormattedGraph.h>
+
+#include <hermes/ConsoleReporter.h>
+#include <hermes/EnvConfig.h>
+
+#include <stdex/Memory.h>
+
+#include <cassert>
+
+#include <iostream>
+#include <stdexcept>
+#include <string>
+
+namespace
+{
+
+std::unique_ptr<loco::Graph> import(const moco::ModelSignature &sig, const std::string &path)
+{
+ moco::tf::Frontend frontend;
+ return frontend.load(sig, path.c_str(), moco::tf::Frontend::FileType::Binary);
+}
+
+} // namespace
+
+//
+// Logging Support
+//
+namespace
+{
+
+struct Logger final : public hermes::Source
+{
+ Logger(hermes::Context *ctx) { activate(ctx->sources(), ctx->bus()); }
+ ~Logger() { deactivate(); }
+};
+
+struct LoggingContext
+{
+ static hermes::Context *get(void)
+ {
+ using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
+
+ static hermes::Context *ctx = nullptr;
+
+ if (ctx == nullptr)
+ {
+ ctx = new hermes::Context;
+ ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
+ ctx->config(stdex::make_unique<EnvConfig>("TF2TFLITE_Log"));
+ }
+
+ return ctx;
+ }
+};
+
+void print_help()
+{
+ std::cerr << "Usage: tf2tflite <path/to/info> <path/to/pb> <path/to/tflite/model> " << std::endl
+ << "Options: --customop <path/to/customop.conf>" << std::endl;
+}
+
+} // namespace
+
+#define LOGGER(name) \
+ ::Logger name { ::LoggingContext::get() }
+
+#define INFO(name) HERMES_INFO(name)
+
+int main(int argc, char **argv)
+{
+ using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
+
+ // This line allows users to control all the moco-tf loggers via TF2TFLITE_Log_Frontend
+ moco::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2TFLITE_Log_Frontend"));
+ // This line allows users to control all the exo-tflite loggers via TF2TFLITE_Log_Backend
+ exo::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2TFLITE_Log_Backend"));
+
+ LOGGER(l);
+
+ // TODO We need better args parsing in future
+ if (!(argc == 4 or argc == 6))
+ {
+ print_help();
+ return 255;
+ }
+
+ std::string info_path{argv[1]};
+ std::string tf_path{argv[2]}; // .pb file
+ std::string tflite_path{argv[3]};
+
+ std::cout << "Read '" << info_path << "'" << std::endl;
+
+ moco::ModelSignature sig;
+ {
+ for (const auto &info : nnkit::support::tftestinfo::parse(info_path.c_str()))
+ {
+ switch (info->kind())
+ {
+ case nnkit::support::tftestinfo::ParsedTensor::Kind::Input:
+ sig.add_input(moco::TensorName{info->name()});
+ sig.shape(info->name(), info->shape());
+ break;
+
+ case nnkit::support::tftestinfo::ParsedTensor::Kind::Output:
+ sig.add_output(moco::TensorName{info->name()});
+ sig.shape(info->name(), info->shape());
+ break;
+
+ default:
+ throw std::runtime_error{"Unknown kind"};
+ }
+ }
+ }
+
+ if (argc == 6) // optional parameter: path of customop.conf
+ {
+ if (std::string{argv[4]} == "--customop")
+ {
+ tf2tflite::load_customop_conf(argv[5], sig);
+ }
+ else
+ {
+ print_help();
+ return 255;
+ }
+ }
+
+ std::cout << "Read '" << info_path << "' - Done" << std::endl;
+
+ std::cout << "Import from '" << tf_path << "'" << std::endl;
+ auto g = import(sig, tf_path);
+ std::cout << "Import from '" << tf_path << "' - Done" << std::endl;
+
+ INFO(l) << "Import Graph" << std::endl;
+ INFO(l) << locop::fmt<locop::Formatter::LinearV1>(g) << std::endl;
+
+ std::cout << "Export into '" << tflite_path << "'" << std::endl;
+ exo::TFLExporter(g.get()).dumpToFile(tflite_path.c_str());
+ std::cout << "Export into '" << tflite_path << "' - Done" << std::endl;
+
+ return 0;
+}
diff --git a/compiler/tf2tfliteV2-value-pbtxt-test/CMakeLists.txt b/compiler/tf2tfliteV2-value-pbtxt-test/CMakeLists.txt
new file mode 100644
index 000000000..2526db561
--- /dev/null
+++ b/compiler/tf2tfliteV2-value-pbtxt-test/CMakeLists.txt
@@ -0,0 +1,183 @@
+find_package(PythonInterp 3 QUIET)
+find_package(PythonLibs 3 QUIET)
+
+if(NOT ${PYTHONINTERP_FOUND})
+ message("Build tf2tfliteV2-value-pbtxt-test: FALSE (Python3 is missing)")
+ return()
+endif()
+
+if(${PYTHON_VERSION_MINOR} LESS 3)
+ message("Build tf2tfliteV2-value-pbtxt-test: FALSE (You need to install Python version higher than 3.3)")
+ return()
+endif()
+
+nnas_include(TargetRequire)
+
+unset(REQUIRED_TARGETS)
+list(APPEND REQUIRED_TARGETS tfkit)
+list(APPEND REQUIRED_TARGETS tf2tfliteV2)
+list(APPEND REQUIRED_TARGETS nnkit-run)
+list(APPEND REQUIRED_TARGETS nnkit_tf_backend)
+list(APPEND REQUIRED_TARGETS nnkit_tflite_backend)
+list(APPEND REQUIRED_TARGETS nnkit_randomize_action)
+list(APPEND REQUIRED_TARGETS nnkit_HDF5_export_action)
+list(APPEND REQUIRED_TARGETS nnkit_HDF5_import_action)
+list(APPEND REQUIRED_TARGETS i5diff)
+TargetRequire_Return(${REQUIRED_TARGETS})
+
+message(STATUS "tf2tfliteV2-value-pbtxt-test: run tests")
+
+# Create python virtual environment
+set(VIRTUALENV "${CMAKE_CURRENT_BINARY_DIR}/venv")
+
+add_custom_command(
+ OUTPUT ${VIRTUALENV}
+ COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV}
+)
+
+# Copy requirements.txt and install required pip packages
+set(REQUIREMENTS_FILE "requirements.txt")
+set(REQUIREMENTS_SRC_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${REQUIREMENTS_FILE}")
+set(REQUIREMENTS_BIN_PATH "${CMAKE_CURRENT_BINARY_DIR}/${REQUIREMENTS_FILE}")
+
+add_custom_command(
+ OUTPUT ${REQUIREMENTS_BIN_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy ${REQUIREMENTS_SRC_PATH} ${REQUIREMENTS_BIN_PATH}
+ COMMAND ${VIRTUALENV}/bin/python -m pip install --upgrade pip setuptools --timeout 100
+ COMMAND ${VIRTUALENV}/bin/python -m pip install -r requirements.txt --upgrade --timeout 100
+ DEPENDS ${VIRTUALENV} ${REQUIREMENTS_SRC_PATH}
+)
+
+add_custom_target(tf2tfliteV2_value_pbtxt_python_deps ALL
+ DEPENDS ${VIRTUALENV} ${REQUIREMENTS_BIN_PATH} #${TF2TFLITEV2_BIN_PATH}
+)
+
+nncc_find_resource(TensorFlowTests)
+
+#
+# Copy [PREFIX]/test.pbtxt to PREFIX.pbtxt in binary folder
+# Copy [PREFIX]/test.info to PREFIX.info in binary folder
+# Encode PREFIX.pbtxt to PREFIX.pb
+#
+set(TEST_REPO "${TensorFlowTests_DIR}")
+set(TEST_PBTXT_FILENAME "test.pbtxt")
+set(TEST_INFO_FILENAME "test.info")
+
+unset(TESTCASES)
+
+macro(add NAME)
+ list(APPEND TESTCASES ${NAME})
+endmacro(add)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+unset(TEST_DEPS)
+unset(TEST_NAMES)
+
+foreach(PREFIX IN ITEMS ${TESTCASES})
+ if(NOT IS_DIRECTORY "${TEST_REPO}/${PREFIX}")
+ message(FATAL_ERROR "Missing '${PREFIX}' test")
+ endif()
+
+ set(PBTXT_SOURCE_PATH "${TEST_REPO}/${PREFIX}/${TEST_PBTXT_FILENAME}")
+ set(INFO_SOURCE_PATH "${TEST_REPO}/${PREFIX}/${TEST_INFO_FILENAME}")
+
+ set(PBTXT_FILE "${PREFIX}.pbtxt")
+ set(PBTXT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PBTXT_FILE}")
+
+ set(INFO_FILE "${PREFIX}.info")
+ set(INFO_PATH "${CMAKE_CURRENT_BINARY_DIR}/${INFO_FILE}")
+
+ set(PB_FILE "${PREFIX}.pb")
+ set(PB_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PB_FILE}")
+
+ # Copy .pbtxt
+ add_custom_command(OUTPUT ${PBTXT_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${PBTXT_SOURCE_PATH}" "${PBTXT_PATH}"
+ DEPENDS ${PBTXT_SOURCE_PATH}
+ COMMENT "Generate ${PBTXT_FILE}"
+ )
+
+ # Copy .info
+ add_custom_command(OUTPUT ${INFO_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${INFO_SOURCE_PATH}" "${INFO_PATH}"
+ DEPENDS ${INFO_SOURCE_PATH}
+ COMMENT "Generate ${INFO_FILE}"
+ )
+
+ # Generate .pb from .pbtxt
+ add_custom_command(OUTPUT ${PB_PATH}
+ COMMAND $<TARGET_FILE:tfkit> encode ${PBTXT_PATH} ${PB_PATH}
+ DEPENDS ${PBTXT_PATH}
+ COMMENT "Generate ${PB_FILE}"
+ )
+
+ list(APPEND TEST_DEPS ${INFO_PATH} ${PB_PATH})
+ list(APPEND TEST_NAMES ${PREFIX})
+endforeach(PREFIX)
+
+##
+## Copy testall
+##
+set(TEST_RUNNER "${CMAKE_CURRENT_BINARY_DIR}/testall.sh")
+set(TEST_RUNNER_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh")
+
+add_custom_command(
+ OUTPUT ${TEST_RUNNER}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TEST_RUNNER_SOURCE}" "${TEST_RUNNER}"
+ DEPENDS ${TEST_RUNNER_SOURCE}
+ COMMENT "Generate test runner"
+)
+
+list(APPEND TEST_DEPS "${TEST_RUNNER}")
+
+###
+### Generate test.config
+###
+set(TEST_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/test.config")
+
+# Get tf2tfliteV2 binary path
+get_target_property(TF2TFLITEV2_BIN_DIR tf2tfliteV2 BINARY_DIR)
+set(TF2TFLITEV2_PATH "${TF2TFLITEV2_BIN_DIR}/tf2tfliteV2.py")
+
+add_custom_command(
+ OUTPUT ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'NNKIT_RUN_PATH=\"$<TARGET_FILE:nnkit-run>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TF_BACKEND_PATH=\"$<TARGET_FILE:nnkit_tf_backend>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TFLITE_BACKEND_PATH=\"$<TARGET_FILE:nnkit_tflite_backend>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'RANDOMIZE_ACTION_PATH=\"$<TARGET_FILE:nnkit_randomize_action>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_EXPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_export_action>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_IMPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_import_action>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'I5DIFF_PATH=\"$<TARGET_FILE:i5diff>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TF2TFLITEV2_PATH=\"${TF2TFLITEV2_PATH}\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'VIRTUALENV=\"${VIRTUALENV}\"' >> ${TEST_CONFIG}
+ DEPENDS
+ i5diff
+ nnkit-run
+ nnkit_tf_backend
+ nnkit_tflite_backend
+ nnkit_randomize_action
+ nnkit_HDF5_export_action
+ nnkit_HDF5_import_action
+ tf2tfliteV2
+ tf2tfliteV2_value_pbtxt_python_deps
+ COMMENT "Generate test configuration"
+)
+
+list(APPEND TEST_DEPS "${TEST_CONFIG}")
+
+# This "tf2tfliteV2_value_pbtxt_test_deps" target enforces CMake to generate all the dependencies during "build" phase
+add_custom_target(tf2tfliteV2_value_pbtxt_test_deps ALL DEPENDS ${TEST_DEPS})
+
+# Run tests
+add_test(
+ NAME tf2tfliteV2_value_pbtxt_test
+ COMMAND "${TEST_RUNNER}"
+ "${TEST_CONFIG}"
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ ${TEST_NAMES}
+)
diff --git a/compiler/tf2tfliteV2-value-pbtxt-test/requirements.txt b/compiler/tf2tfliteV2-value-pbtxt-test/requirements.txt
new file mode 100644
index 000000000..2eef5dbb4
--- /dev/null
+++ b/compiler/tf2tfliteV2-value-pbtxt-test/requirements.txt
@@ -0,0 +1,2 @@
+# TODO : Handling TF v2
+tensorflow==1.13.1
diff --git a/compiler/tf2tfliteV2-value-pbtxt-test/requires.cmake b/compiler/tf2tfliteV2-value-pbtxt-test/requires.cmake
new file mode 100644
index 000000000..a51236119
--- /dev/null
+++ b/compiler/tf2tfliteV2-value-pbtxt-test/requires.cmake
@@ -0,0 +1,4 @@
+require("nnkit")
+require("tfkit")
+require("i5diff")
+require("tf2tfliteV2")
diff --git a/compiler/tf2tfliteV2-value-pbtxt-test/test.lst b/compiler/tf2tfliteV2-value-pbtxt-test/test.lst
new file mode 100644
index 000000000..328366b16
--- /dev/null
+++ b/compiler/tf2tfliteV2-value-pbtxt-test/test.lst
@@ -0,0 +1,101 @@
+# TODO Enable skipped tests
+
+add(NET_0000)
+add(NET_0001)
+add(NET_0002)
+add(NET_0003)
+add(NET_0004)
+add(NET_0005)
+add(NET_0006)
+add(NET_0007)
+add(NET_0008)
+add(NET_0009)
+add(NET_0010)
+add(NET_0011)
+add(NET_0012)
+add(NET_0013)
+add(NET_0014)
+add(NET_0015)
+add(NET_0016)
+add(NET_0017)
+add(NET_0018)
+add(NET_0019)
+add(NET_0020)
+add(NET_0021)
+add(NET_0022)
+#add(NET_0023)
+add(NET_0024)
+add(NET_0025)
+#add(NET_0028)
+add(NET_0029)
+add(NET_0030)
+add(NET_0031)
+add(NET_0032)
+add(NET_0033)
+add(NET_0034)
+add(NET_0035)
+add(NET_0036)
+add(NET_0037)
+add(NET_0038)
+add(NET_0039)
+add(NET_0040)
+add(NET_0041)
+add(REGRESSION_0000)
+add(REGRESSION_0001)
+add(REGRESSION_0002)
+add(UNIT_Add_000)
+add(UNIT_Add_001)
+add(UNIT_Add_002)
+add(UNIT_Add_004)
+add(UNIT_Add_005)
+add(UNIT_AvgPool_000)
+add(UNIT_AvgPool_001)
+#add(UNIT_BiasAdd_000)
+#add(UNIT_BiasAdd_001)
+add(UNIT_BiasAdd_002)
+#add(UNIT_ConcatV2_000)
+#add(UNIT_ConcatV2_001)
+add(UNIT_ConcatV2_002)
+#add(UNIT_Const_000)
+#add(UNIT_Const_001)
+add(UNIT_Conv2D_000)
+add(UNIT_Conv2DBackpropInput_000)
+add(UNIT_Conv2DBackpropInput_001)
+add(UNIT_DepthwiseConv2dNative_000)
+add(UNIT_DepthwiseConv2dNative_001)
+add(UNIT_Maximum_000)
+add(UNIT_Maximum_001)
+add(UNIT_Maximum_002)
+add(UNIT_MaxPool_000)
+add(UNIT_MaxPool_001)
+add(UNIT_Mean_000)
+add(UNIT_Mean_001)
+add(UNIT_Mean_002)
+add(UNIT_Mean_003)
+add(UNIT_Mul_000)
+add(UNIT_Mul_001)
+add(UNIT_Mul_002)
+add(UNIT_Pad_000)
+#add(UNIT_Placeholder_000)
+#add(UNIT_Placeholder_001)
+#add(UNIT_Placeholder_002)
+#add(UNIT_Placeholder_003)
+add(UNIT_RealDiv_000)
+add(UNIT_RealDiv_001)
+add(UNIT_Relu_000)
+add(UNIT_Relu6_000)
+add(UNIT_Reshape_000)
+add(UNIT_Rsqrt_000)
+add(UNIT_Softmax_001)
+add(UNIT_Sqrt_000)
+#add(UNIT_SquaredDifference_000)
+#add(UNIT_SquaredDifference_001)
+add(UNIT_Squeeze_000)
+add(UNIT_Squeeze_001)
+#add(UNIT_Squeeze_002)
+#add(UNIT_Squeeze_003)
+#add(UNIT_StopGradient_000)
+#add(UNIT_StopGradient_001)
+add(UNIT_Sub_000)
+add(UNIT_Sub_001)
+add(UNIT_Tanh_000)
diff --git a/compiler/tf2tfliteV2-value-pbtxt-test/testall.sh b/compiler/tf2tfliteV2-value-pbtxt-test/testall.sh
new file mode 100755
index 000000000..9dde41bfe
--- /dev/null
+++ b/compiler/tf2tfliteV2-value-pbtxt-test/testall.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+
+# Need at least 2 arguments
+if [[ $# -lt 2 ]]; then
+ echo "USAGE: $0 ..."
+ echo
+ echo "ARGUMENTS:"
+ echo " [test.config path]"
+ echo " [WORKDIR]"
+ echo " [Prefix1]"
+ echo " [Prefix2]"
+ echo " ..."
+ exit 255
+fi
+
+CONFIG_PATH="$1"; shift
+WORKDIR="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found nnkit-run: ${NNKIT_RUN_PATH}"
+echo "-- Found TF backend: ${TF_BACKEND_PATH}"
+echo "-- Found TFLITE backend: ${TFLITE_BACKEND_PATH}"
+echo "-- Found TF2TFLITEV2: ${TF2TFLITEV2_PATH}"
+echo "-- Found randomize action: ${RANDOMIZE_ACTION_PATH}"
+echo "-- Found HDF5 export action: ${HDF5_EXPORT_ACTION_PATH}"
+echo "-- Found HDF5 import action: ${HDF5_IMPORT_ACTION_PATH}"
+echo "-- Found i5diff: ${I5DIFF_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [[ $# -ne 0 ]]; do
+ PREFIX="$1"; shift
+
+ TESTED+=("${PREFIX}")
+
+ PASSED_TAG="${PREFIX}.passed"
+
+ rm -f "${PASSED_TAG}"
+
+ cat > "${PREFIX}.log" <(
+ exec 2>&1
+
+ echo "-- Found pb: ${PREFIX}.pb"
+
+ # Exit immediately if any command fails
+ set -e
+ # Show commands
+ set -x
+
+ # Generate tflite
+ source "${VIRTUALENV}/bin/activate"
+ "${VIRTUALENV}/bin/python" "${TF2TFLITEV2_PATH}" \
+ --v1 \
+ --input_path "${WORKDIR}/${PREFIX}.pb" \
+ --input_arrays "$(awk -F, '/^input/ { print $2 }' ${PREFIX}.info | cut -d: -f1 | tr -d ' ' | paste -d, -s)" \
+ --input_shapes "$(cat ${PREFIX}.info | grep '^input' | cut -d '[' -f2 | cut -d ']' -f1 | tr -d ' ' | xargs | tr ' ' ':')" \
+ --output_path "${WORKDIR}/${PREFIX}.tflite" \
+ --output_arrays "$(awk -F, '/^output/ { print $2 }' ${PREFIX}.info | cut -d: -f1 | tr -d ' ' | paste -d, -s)"
+
+ # Run TensorFlow
+ "${NNKIT_RUN_PATH}" \
+ --backend "${TF_BACKEND_PATH}" \
+ --backend-arg "${WORKDIR}/${PREFIX}.pb" \
+ --backend-arg "${WORKDIR}/${PREFIX}.info" \
+ --pre "${RANDOMIZE_ACTION_PATH}" \
+ --pre "${HDF5_EXPORT_ACTION_PATH}" \
+ --pre-arg "${WORKDIR}/${PREFIX}.input.h5" \
+ --post "${HDF5_EXPORT_ACTION_PATH}" \
+ --post-arg "${WORKDIR}/${PREFIX}.expected.h5"
+
+ # Run TensorFlow Lite
+ "${NNKIT_RUN_PATH}" \
+ --backend "${TFLITE_BACKEND_PATH}" \
+ --backend-arg "${WORKDIR}/${PREFIX}.tflite" \
+ --pre "${HDF5_IMPORT_ACTION_PATH}" \
+ --pre-arg "${WORKDIR}/${PREFIX}.input.h5" \
+ --post "${HDF5_EXPORT_ACTION_PATH}" \
+ --post-arg "${WORKDIR}/${PREFIX}.obtained.h5"
+
+ "${I5DIFF_PATH}" -d 0.001 "${PREFIX}.expected.h5" "${PREFIX}.obtained.h5"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("$PREFIX")
+ else
+ FAILED+=("$PREFIX")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/tf2tfliteV2/CMakeLists.txt b/compiler/tf2tfliteV2/CMakeLists.txt
new file mode 100644
index 000000000..8a5c2dcd8
--- /dev/null
+++ b/compiler/tf2tfliteV2/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(tf2tfliteV2_FILE "tf2tfliteV2.py")
+set(tf2tfliteV2_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${tf2tfliteV2_FILE}")
+set(tf2tfliteV2_BIN "${CMAKE_CURRENT_BINARY_DIR}/${tf2tfliteV2_FILE}")
+
+add_custom_command(OUTPUT ${tf2tfliteV2_BIN}
+ COMMAND ${CMAKE_COMMAND} -E copy "${tf2tfliteV2_SRC}" "${tf2tfliteV2_BIN}"
+ DEPENDS ${tf2tfliteV2_SRC}
+ COMMENT "Generate ${tf2tfliteV2_BIN}"
+ )
+
+add_custom_target(tf2tfliteV2 ALL DEPENDS ${tf2tfliteV2_BIN})
diff --git a/compiler/tf2tfliteV2/README.md b/compiler/tf2tfliteV2/README.md
new file mode 100644
index 000000000..836740a5c
--- /dev/null
+++ b/compiler/tf2tfliteV2/README.md
@@ -0,0 +1,47 @@
+# tf2tfliteV2
+
+_tf2tfliteV2_ is a TensorFlow to TensorFlow Lite model Converter.
+
+## Where does V2 come from?
+Even though we alreay have _tf2tflite_, we cannot cover all opeartors in TensorFlow. To expand coverage, we introduce _tf2tfliteV2_ which uses `TensorFlow Lite Converter`(by Google) internally.
+
+## Prerequisite
+- Frozen graph from TensorFlow 1.13.1
+- Desired version of TensorFlow(You can use python virtualenv, docker, etc.)
+
+## Example
+```
+python tf2tfliteV2.py \
+> --v1 \
+> --input_path=frozen_graph.pb \
+> --output_path=converted.tflite \
+> --input_arrays=model_inputs \
+> --output_arrays=model_outputs
+
+```
+```
+python tf2tfliteV2.py \
+> --v2 \
+> --input_path=frozen_graph.pb \
+> --output_path=converted.tflite \
+> --input_arrays=model_inputs \
+> --output_arrays=model_outputs
+```
+
+## optional argument
+```
+ -h, --help show this help message and exit
+ --v1 Use TensorFlow Lite Converter 1.x
+ --v2 Use TensorFlow Lite Converter 2.x
+ --input_path INPUT_PATH
+ Full filepath of the input file.
+ --output_path OUTPUT_PATH
+ Full filepath of the output file.
+ --input_arrays INPUT_ARRAYS
+ Names of the input arrays, comma-separated.
+ --input_shapes INPUT_SHAPES
+ Shapes corresponding to --input_arrays, colon-
+ separated.
+ --output_arrays OUTPUT_ARRAYS
+ Names of the output arrays, comma-separated.
+```
diff --git a/compiler/tf2tfliteV2/tf2tfliteV2.py b/compiler/tf2tfliteV2/tf2tfliteV2.py
new file mode 100755
index 000000000..8b6ba0dc4
--- /dev/null
+++ b/compiler/tf2tfliteV2/tf2tfliteV2.py
@@ -0,0 +1,183 @@
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+# Copyright (C) 2018 The TensorFlow Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tensorflow as tf
+import argparse
+import sys
+
+from google.protobuf.message import DecodeError
+from google.protobuf import text_format as _text_format
+
+
+def wrap_frozen_graph(graph_def, inputs, outputs):
+ def _imports_graph_def():
+ tf.compat.v1.import_graph_def(graph_def, name="")
+
+ wrapped_import = tf.compat.v1.wrap_function(_imports_graph_def, [])
+ import_graph = wrapped_import.graph
+ return wrapped_import.prune(
+ tf.nest.map_structure(import_graph.as_graph_element, inputs),
+ tf.nest.map_structure(import_graph.as_graph_element, outputs))
+
+
+def _get_parser():
+ """
+ Returns an ArgumentParser for TensorFlow Lite Converter.
+ """
+ parser = argparse.ArgumentParser(
+ description=("Command line tool to run TensorFlow Lite Converter."))
+
+ # Converter version.
+ converter_version = parser.add_mutually_exclusive_group(required=True)
+ converter_version.add_argument(
+ "--v1", action="store_true", help="Use TensorFlow Lite Converter 1.x")
+ converter_version.add_argument(
+ "--v2", action="store_true", help="Use TensorFlow Lite Converter 2.x")
+
+ # Input and output path.
+ parser.add_argument(
+ "--input_path", type=str, help="Full filepath of the input file.", required=True)
+ parser.add_argument(
+ "--output_path",
+ type=str,
+ help="Full filepath of the output file.",
+ required=True)
+
+ # Input and output arrays.
+ parser.add_argument(
+ "--input_arrays",
+ type=str,
+ help="Names of the input arrays, comma-separated.",
+ required=True)
+ parser.add_argument(
+ "--input_shapes",
+ type=str,
+ help="Shapes corresponding to --input_arrays, colon-separated.")
+ parser.add_argument(
+ "--output_arrays",
+ type=str,
+ help="Names of the output arrays, comma-separated.",
+ required=True)
+
+ return parser
+
+
+def _check_flags(flags):
+ """
+ Checks the parsed flags to ensure they are valid.
+ """
+ if flags.v1:
+ invalid = ""
+ # To be filled
+
+ if invalid:
+ raise ValueError(invalid + " options must be used with v2")
+
+ if flags.v2:
+ if tf.__version__.find("2.") != 0:
+ raise ValueError(
+ "Imported TensorFlow should have version >= 2.0 but you have " +
+ tf.__version__)
+
+ invalid = ""
+ # To be filled
+
+ if invalid:
+ raise ValueError(invalid + " options must be used with v1")
+
+ if flags.input_shapes:
+ if not flags.input_arrays:
+ raise ValueError("--input_shapes must be used with --input_arrays")
+ if flags.input_shapes.count(":") != flags.input_arrays.count(","):
+ raise ValueError("--input_shapes and --input_arrays must have the same "
+ "number of items")
+
+
+def _parse_array(arrays, type_fn=str):
+ return list(map(type_fn, arrays.split(",")))
+
+
+def _v1_convert(flags):
+ input_shapes = None
+ if flags.input_shapes:
+ input_arrays = _parse_array(flags.input_arrays)
+ input_shapes_list = [
+ _parse_array(shape, type_fn=int) for shape in flags.input_shapes.split(":")
+ ]
+ input_shapes = dict(list(zip(input_arrays, input_shapes_list)))
+
+ converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(
+ flags.input_path, _parse_array(flags.input_arrays),
+ _parse_array(flags.output_arrays), input_shapes)
+
+ converter.allow_custom_ops = True
+
+ tflite_model = converter.convert()
+ open(flags.output_path, "wb").write(tflite_model)
+
+
+def _v2_convert(flags):
+ file_content = open(flags.input_path, 'rb').read()
+ try:
+ graph_def = tf.compat.v1.GraphDef()
+ graph_def.ParseFromString(file_content)
+ except (_text_format.ParseError, DecodeError):
+ try:
+ _text_format.Merge(file_content, graph_def)
+ except (_text_format.ParseError, DecodeError):
+ raise IOError("Unable to parse input file '{}'.".format(flags.input_path))
+
+ wrap_func = wrap_frozen_graph(
+ graph_def,
+ inputs=[_str + ":0" for _str in _parse_array(flags.input_arrays)],
+ # TODO What if multiple outputs come in?
+ outputs=[_str + ":0" for _str in _parse_array(flags.output_arrays)])
+ converter = tf.lite.TFLiteConverter.from_concrete_functions([wrap_func])
+
+ converter.allow_custom_ops = True
+ converter.experimental_new_converter = True
+
+ converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
+
+ tflite_model = converter.convert()
+ open(flags.output_path, "wb").write(tflite_model)
+
+
+def _convert(flags):
+ if (flags.v1):
+ _v1_convert(flags)
+ else:
+ _v2_convert(flags)
+
+
+"""
+Input frozen graph must be from TensorFlow 1.13.1
+"""
+
+
+def main():
+ # Parse argument.
+ parser = _get_parser()
+
+ # Check if the flags are valid.
+ flags = parser.parse_known_args(args=sys.argv[1:])
+ _check_flags(flags[0])
+
+ # Convert
+ _convert(flags[0])
+
+
+if __name__ == "__main__":
+ main()
diff --git a/compiler/tfgraph-xform/CMakeLists.txt b/compiler/tfgraph-xform/CMakeLists.txt
new file mode 100644
index 000000000..d6e0a4cde
--- /dev/null
+++ b/compiler/tfgraph-xform/CMakeLists.txt
@@ -0,0 +1,328 @@
+macro(require_package PKGNAME)
+ nnas_find_package(${PKGNAME} ${ARGN} QUIET)
+ if(NOT ${PKGNAME}_FOUND)
+ message(STATUS "Build tfgraph-xform: FALSE (${PKGNAME} is missing)")
+ return()
+ endif(NOT ${PKGNAME}_FOUND)
+endmacro(require_package)
+
+require_package(Abseil)
+require_package(Protobuf)
+require_package(EigenSource-fd6845384b86)
+require_package(GoogleDoubleConversion)
+require_package(GoogleNSync)
+require_package(TensorFlowSource EXACT 1.12)
+require_package(TensorFlowProtoText EXACT 1.12)
+
+message(STATUS "Build tfgraph-xform: TRUE")
+
+#
+# Set "SOURCE_FILES"
+#
+unset(SOURCE_FILES)
+
+macro(Source_Add RPATH)
+ list(APPEND SOURCE_FILES "${TensorFlowSource_DIR}/tensorflow/${RPATH}")
+endmacro(Source_Add)
+
+# TensorFlow "core"
+Source_Add(core/lib/core/status.cc)
+Source_Add(core/lib/core/coding.cc)
+Source_Add(core/lib/core/arena.cc)
+Source_Add(core/lib/core/threadpool.cc)
+
+Source_Add(core/lib/strings/scanner.cc)
+Source_Add(core/lib/strings/str_util.cc)
+Source_Add(core/lib/strings/numbers.cc)
+Source_Add(core/lib/strings/stringprintf.cc)
+Source_Add(core/lib/strings/strcat.cc)
+Source_Add(core/lib/strings/proto_text_util.cc)
+Source_Add(core/lib/strings/proto_serialization.cc)
+Source_Add(core/lib/strings/ordered_code.cc)
+Source_Add(core/lib/hash/hash.cc)
+Source_Add(core/lib/hash/crc32c.cc)
+Source_Add(core/lib/hash/crc32c_accelerate.cc)
+Source_Add(core/lib/io/iterator.cc)
+Source_Add(core/lib/io/two_level_iterator.cc)
+Source_Add(core/lib/io/format.cc)
+Source_Add(core/lib/io/block.cc)
+Source_Add(core/lib/io/table.cc)
+Source_Add(core/lib/random/random.cc)
+Source_Add(core/lib/io/path.cc)
+
+Source_Add(core/platform/cpu_info.cc)
+Source_Add(core/platform/abi.cc)
+Source_Add(core/platform/env.cc)
+Source_Add(core/platform/env_time.cc)
+Source_Add(core/platform/file_system.cc)
+Source_Add(core/platform/file_system_helper.cc)
+Source_Add(core/platform/tensor_coding.cc)
+Source_Add(core/platform/tracing.cc)
+Source_Add(core/platform/setround.cc)
+Source_Add(core/platform/denormal.cc)
+Source_Add(core/platform/protobuf_util.cc)
+
+Source_Add(core/platform/default/mutex.cc)
+Source_Add(core/platform/default/logging.cc)
+Source_Add(core/platform/default/string_coding.cc)
+
+Source_Add(core/platform/posix/error.cc)
+Source_Add(core/platform/posix/env.cc)
+Source_Add(core/platform/posix/env_time.cc)
+Source_Add(core/platform/posix/port.cc)
+Source_Add(core/platform/posix/load_library.cc)
+Source_Add(core/platform/posix/posix_file_system.cc)
+
+Source_Add(core/util/env_var.cc)
+Source_Add(core/util/padding.cc)
+Source_Add(core/util/mirror_pad_mode.cc)
+Source_Add(core/util/command_line_flags.cc)
+Source_Add(core/util/tensor_format.cc)
+Source_Add(core/util/tensor_slice_set.cc)
+Source_Add(core/util/tensor_slice_reader.cc)
+Source_Add(core/util/tensor_slice_reader_cache.cc)
+Source_Add(core/util/saved_tensor_slice_util.cc)
+Source_Add(core/util/equal_graph_def.cc)
+Source_Add(core/util/device_name_utils.cc)
+Source_Add(core/util/work_sharder.cc)
+Source_Add(core/util/use_cudnn.cc)
+Source_Add(core/util/strided_slice_op.cc)
+Source_Add(core/util/bcast.cc)
+
+Source_Add(core/graph/tensor_id.cc)
+Source_Add(core/graph/algorithm.cc)
+Source_Add(core/graph/node_builder.cc)
+Source_Add(core/graph/subgraph.cc)
+Source_Add(core/graph/graph.cc)
+Source_Add(core/graph/graph_constructor.cc)
+Source_Add(core/graph/edgeset.cc)
+Source_Add(core/graph/while_context.cc)
+Source_Add(core/graph/control_flow.cc)
+Source_Add(core/graph/gradients.cc)
+Source_Add(core/graph/optimizer_cse.cc)
+
+Source_Add(core/framework/versions.cc)
+Source_Add(core/framework/types.cc)
+Source_Add(core/framework/function.cc)
+Source_Add(core/framework/op.cc)
+Source_Add(core/framework/op_def_builder.cc)
+Source_Add(core/framework/op_kernel.cc)
+Source_Add(core/framework/op_segment.cc)
+Source_Add(core/framework/resource_handle.cc)
+Source_Add(core/framework/tensor.cc)
+Source_Add(core/framework/tensor_shape.cc)
+Source_Add(core/framework/tensor_reference.cc)
+Source_Add(core/framework/tensor_slice.cc)
+Source_Add(core/framework/tensor_util.cc)
+Source_Add(core/framework/unique_tensor_references.cc)
+Source_Add(core/framework/allocator.cc)
+Source_Add(core/framework/allocator_registry.cc)
+Source_Add(core/framework/tracking_allocator.cc)
+Source_Add(core/framework/variant.cc)
+Source_Add(core/framework/variant_op_registry.cc)
+Source_Add(core/framework/variant_tensor_data.cc)
+Source_Add(core/framework/memory_types.cc)
+Source_Add(core/framework/log_memory.cc)
+Source_Add(core/framework/node_def_builder.cc)
+
+Source_Add(core/framework/common_shape_fns.cc)
+Source_Add(core/framework/shape_inference.cc)
+
+Source_Add(core/framework/resource_mgr.cc)
+Source_Add(core/framework/device_base.cc)
+Source_Add(core/framework/rendezvous.cc)
+Source_Add(core/framework/cancellation.cc)
+
+Source_Add(core/framework/attr_value_util.cc)
+Source_Add(core/framework/attr_value_util.cc)
+Source_Add(core/framework/op_def_util.cc)
+Source_Add(core/framework/node_def_util.cc)
+Source_Add(core/framework/kernel_def_builder.cc)
+Source_Add(core/framework/kernel_def_util.cc)
+
+Source_Add(core/common_runtime/device.cc)
+Source_Add(core/common_runtime/device_mgr.cc)
+Source_Add(core/common_runtime/function.cc)
+Source_Add(core/common_runtime/memory_types.cc)
+Source_Add(core/common_runtime/copy_tensor.cc)
+Source_Add(core/common_runtime/shape_refiner.cc)
+Source_Add(core/common_runtime/constant_folding.cc)
+Source_Add(core/common_runtime/eval_const_tensor.cc)
+Source_Add(core/common_runtime/graph_optimizer.cc)
+Source_Add(core/common_runtime/graph_runner.cc)
+Source_Add(core/common_runtime/rendezvous_mgr.cc)
+Source_Add(core/common_runtime/rendezvous_util.cc)
+Source_Add(core/common_runtime/process_function_library_runtime.cc)
+Source_Add(core/common_runtime/executor.cc)
+Source_Add(core/common_runtime/executor_factory.cc)
+
+# TensorFlow - Operations
+Source_Add(core/ops/no_op.cc)
+Source_Add(core/ops/sendrecv_ops.cc)
+Source_Add(core/ops/array_ops.cc)
+Source_Add(core/ops/math_ops.cc)
+Source_Add(core/ops/image_ops.cc)
+Source_Add(core/ops/nn_ops.cc)
+
+# TensorFlow - OpKernel Implementations
+Source_Add(core/kernels/ops_util.cc)
+Source_Add(core/kernels/cwise_ops_common.cc)
+Source_Add(core/kernels/cwise_op_add_1.cc)
+Source_Add(core/kernels/cwise_op_sub.cc)
+Source_Add(core/kernels/cwise_op_mul_1.cc)
+Source_Add(core/kernels/strided_slice_op.cc)
+Source_Add(core/kernels/strided_slice_op_inst_0.cc)
+Source_Add(core/kernels/strided_slice_op_inst_1.cc)
+Source_Add(core/kernels/strided_slice_op_inst_2.cc)
+Source_Add(core/kernels/strided_slice_op_inst_3.cc)
+Source_Add(core/kernels/strided_slice_op_inst_4.cc)
+Source_Add(core/kernels/strided_slice_op_inst_5.cc)
+Source_Add(core/kernels/strided_slice_op_inst_6.cc)
+Source_Add(core/kernels/strided_slice_op_inst_7.cc)
+Source_Add(core/kernels/relu_op.cc)
+Source_Add(core/kernels/conv_ops.cc)
+Source_Add(core/kernels/conv_grad_ops.cc)
+Source_Add(core/kernels/conv_grad_input_ops.cc)
+Source_Add(core/kernels/bias_op.cc)
+Source_Add(core/kernels/pad_op.cc)
+Source_Add(core/kernels/cast_op_impl_bool.cc)
+Source_Add(core/kernels/cast_op_impl_int8.cc)
+Source_Add(core/kernels/cast_op_impl_uint8.cc)
+Source_Add(core/kernels/cast_op_impl_int16.cc)
+Source_Add(core/kernels/cast_op_impl_uint16.cc)
+Source_Add(core/kernels/cast_op_impl_int32.cc)
+Source_Add(core/kernels/cast_op_impl_uint32.cc)
+Source_Add(core/kernels/cast_op_impl_int64.cc)
+Source_Add(core/kernels/cast_op_impl_uint64.cc)
+Source_Add(core/kernels/cast_op_impl_half.cc)
+Source_Add(core/kernels/cast_op_impl_bfloat.cc)
+Source_Add(core/kernels/cast_op_impl_float.cc)
+Source_Add(core/kernels/cast_op_impl_double.cc)
+Source_Add(core/kernels/cast_op_impl_complex64.cc)
+Source_Add(core/kernels/cast_op_impl_complex128.cc)
+Source_Add(core/kernels/cast_op.cc)
+Source_Add(core/kernels/split_op.cc)
+Source_Add(core/kernels/concat_lib_cpu.cc)
+Source_Add(core/kernels/concat_op.cc)
+Source_Add(core/kernels/resize_bilinear_op.cc)
+Source_Add(core/kernels/constant_op.cc)
+Source_Add(core/kernels/pack_op.cc)
+Source_Add(core/kernels/reshape_op.cc)
+Source_Add(core/kernels/shape_ops.cc)
+Source_Add(core/kernels/fill_functor.cc)
+Source_Add(core/kernels/fused_batch_norm_op.cc)
+Source_Add(core/kernels/identity_op.cc)
+Source_Add(core/kernels/split_lib_cpu.cc)
+Source_Add(core/kernels/unpack_op.cc)
+Source_Add(core/kernels/pooling_ops_common.cc)
+Source_Add(core/kernels/maxpooling_op.cc)
+Source_Add(core/kernels/deep_conv2d.cc)
+Source_Add(core/kernels/no_op.cc)
+Source_Add(core/kernels/sendrecv_ops.cc)
+
+# TensorFlow "transform_graph" - Basic Infrastructure
+Source_Add(tools/graph_transforms/file_utils.cc)
+Source_Add(tools/graph_transforms/transform_utils.cc)
+Source_Add(tools/graph_transforms/transform_graph.cc)
+Source_Add(tools/graph_transforms/transform_graph_main.cc)
+
+# TensorFlow "trasnform_graph" - Transfrom Implementations
+Source_Add(tools/graph_transforms/fold_constants_lib.cc)
+Source_Add(tools/graph_transforms/fold_old_batch_norms.cc)
+Source_Add(tools/graph_transforms/strip_unused_nodes.cc)
+
+#
+# Set "PROTO_FILES"
+#
+unset(PROTO_FILES)
+
+macro(Proto_Add RPATH)
+ list(APPEND PROTO_FILES "${RPATH}")
+endmacro(Proto_Add)
+
+Proto_Add(tensorflow/core/lib/core/error_codes.proto)
+
+# Minimal Protocol Buffer Specification to read GraphDef
+Proto_Add(tensorflow/core/framework/versions.proto)
+Proto_Add(tensorflow/core/framework/resource_handle.proto)
+Proto_Add(tensorflow/core/framework/types.proto)
+Proto_Add(tensorflow/core/framework/tensor.proto)
+Proto_Add(tensorflow/core/framework/tensor_shape.proto)
+Proto_Add(tensorflow/core/framework/tensor_slice.proto)
+Proto_Add(tensorflow/core/framework/attr_value.proto)
+Proto_Add(tensorflow/core/framework/op_def.proto)
+Proto_Add(tensorflow/core/framework/node_def.proto)
+Proto_Add(tensorflow/core/framework/function.proto)
+Proto_Add(tensorflow/core/framework/graph.proto)
+
+Proto_Add(tensorflow/core/framework/api_def.proto)
+# "tensorflow/core/framework/tensor.cc" requires these headers
+Proto_Add(tensorflow/core/framework/allocation_description.proto)
+Proto_Add(tensorflow/core/framework/tensor_description.proto)
+Proto_Add(tensorflow/core/framework/log_memory.proto)
+Proto_Add(tensorflow/core/framework/kernel_def.proto)
+Proto_Add(tensorflow/core/framework/device_attributes.proto)
+Proto_Add(tensorflow/core/framework/cost_graph.proto)
+Proto_Add(tensorflow/core/framework/step_stats.proto)
+
+Proto_Add(tensorflow/core/protobuf/cluster.proto)
+Proto_Add(tensorflow/core/protobuf/config.proto)
+Proto_Add(tensorflow/core/protobuf/debug.proto)
+Proto_Add(tensorflow/core/protobuf/rewriter_config.proto)
+
+Proto_Add(tensorflow/core/util/saved_tensor_slice.proto)
+
+#
+# Set "PROTO_TEXT_FILES"
+#
+unset(PROTO_TEXT_FILES)
+
+macro(ProtoText_Add RPATH)
+ list(APPEND PROTO_TEXT_FILES "${RPATH}")
+endmacro(ProtoText_Add)
+
+ProtoText_Add(tensorflow/core/framework/versions.proto)
+ProtoText_Add(tensorflow/core/framework/attr_value.proto)
+ProtoText_Add(tensorflow/core/framework/resource_handle.proto)
+ProtoText_Add(tensorflow/core/framework/types.proto)
+ProtoText_Add(tensorflow/core/framework/tensor_shape.proto)
+ProtoText_Add(tensorflow/core/framework/tensor_description.proto)
+ProtoText_Add(tensorflow/core/framework/allocation_description.proto)
+ProtoText_Add(tensorflow/core/framework/tensor.proto)
+ProtoText_Add(tensorflow/core/framework/op_def.proto)
+ProtoText_Add(tensorflow/core/framework/node_def.proto)
+ProtoText_Add(tensorflow/core/framework/function.proto)
+ProtoText_Add(tensorflow/core/framework/graph.proto)
+ProtoText_Add(tensorflow/core/framework/kernel_def.proto)
+ProtoText_Add(tensorflow/core/framework/log_memory.proto)
+ProtoText_Add(tensorflow/core/framework/device_attributes.proto)
+
+#
+# Build "tfgraph-xform" executable
+#
+Protobuf_Generate(TF_PROTO
+ "${CMAKE_CURRENT_BINARY_DIR}/gen/tensorflow-proto" # OUTPUT ROOT
+ "${TensorFlowSource_DIR}" # BASE DIRECTORY
+ ${PROTO_FILES} # .proto path (relative to the BASE)
+)
+
+ProtoText_Generate(TF_PROTO_TEXT
+ "${CMAKE_CURRENT_BINARY_DIR}/gen/tensorflow-prototext" # OUTPUT ROOT
+ ${PROTO_FILES}
+)
+
+add_executable(tfgraph-xform
+ ${SOURCE_FILES} # TensorFlow Source Files
+ ${TF_PROTO_SOURCES} # Source Files generated by Protocol Buffer
+ ${TF_PROTO_TEXT_SOURCES} # Source Files generated by Proto Text
+)
+
+target_include_directories(tfgraph-xform PRIVATE ${TensorFlowSource_DIR})
+target_include_directories(tfgraph-xform PRIVATE ${TF_PROTO_INCLUDE_DIRS})
+target_include_directories(tfgraph-xform PRIVATE ${TF_PROTO_TEXT_INCLUDE_DIRS})
+target_include_directories(tfgraph-xform PRIVATE ${EigenSource_DIR})
+target_link_libraries(tfgraph-xform PRIVATE ${TF_PROTO_LIBRARIES})
+target_link_libraries(tfgraph-xform PRIVATE abseil)
+target_link_libraries(tfgraph-xform PRIVATE dl)
+target_link_libraries(tfgraph-xform PRIVATE Google::DoubleConversion)
+target_link_libraries(tfgraph-xform PRIVATE Google::NSync)
diff --git a/compiler/tfgraph-xform/README.md b/compiler/tfgraph-xform/README.md
new file mode 100644
index 000000000..41fb88530
--- /dev/null
+++ b/compiler/tfgraph-xform/README.md
@@ -0,0 +1,5 @@
+# tfgraph-xform
+
+Let's build TensorFlow "transform-graph" tool without Bazel.
+
+**DISCLAIMER** Not every transformation is supported.
diff --git a/compiler/tfinfo-v2/CMakeLists.txt b/compiler/tfinfo-v2/CMakeLists.txt
new file mode 100644
index 000000000..cf438ea29
--- /dev/null
+++ b/compiler/tfinfo-v2/CMakeLists.txt
@@ -0,0 +1,36 @@
+nnas_find_package(Protobuf QUIET)
+
+if(NOT Protobuf_FOUND)
+ return()
+endif(NOT Protobuf_FOUND)
+
+# generating and building schema
+Protobuf_Generate(TFINFO_PROTO
+ "${CMAKE_CURRENT_BINARY_DIR}/generated"
+ "./proto"
+ tfinfo-v2.proto)
+
+add_library(tfinfo_v2_proto STATIC ${TFINFO_PROTO_SOURCES})
+set_target_properties(tfinfo_v2_proto PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(tfinfo_v2_proto PUBLIC ${TFINFO_PROTO_INCLUDE_DIRS})
+target_link_libraries(tfinfo_v2_proto PUBLIC libprotobuf)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(tfinfo_v2 STATIC ${SOURCES})
+set_target_properties(tfinfo_v2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(tfinfo_v2 PUBLIC include)
+target_link_libraries(tfinfo_v2 PRIVATE tfinfo_v2_proto)
+target_link_libraries(tfinfo_v2 PRIVATE oops)
+target_link_libraries(tfinfo_v2 PRIVATE stdex)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(tfinfo_v2_test ${TESTS})
+target_link_libraries(tfinfo_v2_test tfinfo_v2)
diff --git a/compiler/tfinfo-v2/include/tfinfo-v2/TensorInfoLoader.h b/compiler/tfinfo-v2/include/tfinfo-v2/TensorInfoLoader.h
new file mode 100644
index 000000000..ee3348e85
--- /dev/null
+++ b/compiler/tfinfo-v2/include/tfinfo-v2/TensorInfoLoader.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFINFO_TENSOR_INFO_LOADER_H__
+#define __TFINFO_TENSOR_INFO_LOADER_H__
+
+#include "TensorSignature.h"
+
+#include <memory>
+#include <vector>
+
+namespace tfinfo
+{
+inline namespace v2
+{
+
+/**
+ * @brief Function to create TensorSignatures defined in info file
+ */
+TensorSignatures load(const char *info_path);
+
+/**
+ * @brief Function to create TensorSignatures from stream
+ */
+TensorSignatures load(std::istream *stream, const char *path_for_error_msg);
+
+} // namespace v2
+} // namespace tfinfo
+
+#endif // __TFINFO_TENSOR_INFO_LOADER_H__
diff --git a/compiler/tfinfo-v2/include/tfinfo-v2/TensorSignature.h b/compiler/tfinfo-v2/include/tfinfo-v2/TensorSignature.h
new file mode 100644
index 000000000..f26d0354a
--- /dev/null
+++ b/compiler/tfinfo-v2/include/tfinfo-v2/TensorSignature.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFINFO_V2_TENSORSIGNATURE_H__
+#define __TFINFO_V2_TENSORSIGNATURE_H__
+
+#include <map>
+#include <vector>
+#include <memory>
+#include <string>
+#include <stdexcept>
+
+namespace tfinfo
+{
+inline namespace v2
+{
+
+/**
+ * @brief Supported Data Types
+ */
+enum class DataType
+{
+ UNKNOWN,
+
+ FLOAT32, // IEEE 32-bit floating point
+ /* To be added */
+};
+
+/**
+ * @brief Class to represent axis and size of dims.
+ * User should enter axis and size of dim(s) when input tensor(s) contain(s) unknown dim(s).
+ * Such axis and size of dim(s) will be stored in ShapeHint.
+ */
+class ShapeHint
+{
+ using AxisHint = uint32_t;
+ using SizeHint = uint64_t;
+
+public:
+ ShapeHint() = default;
+
+ void add(AxisHint axis, SizeHint size)
+ {
+ if (_dims.find(axis) != _dims.end())
+ throw std::runtime_error("dim value already exists");
+
+ _dims[axis] = size;
+ }
+
+ std::map<AxisHint, SizeHint>::const_iterator cbegin() const { return _dims.cbegin(); }
+
+ std::map<AxisHint, SizeHint>::const_iterator cend() const { return _dims.cend(); }
+
+ bool empty() { return _dims.size() == 0; }
+
+ size_t size() { return _dims.size(); }
+
+private:
+ std::map<AxisHint, SizeHint> _dims;
+};
+
+using TensorName = std::string;
+
+/**
+ * @brief Class to store input and output tensor information
+ */
+class TensorSignature final
+{
+public:
+ enum class Kind
+ {
+ Input,
+ Output
+ };
+
+ TensorSignature(const Kind kind, const std::string &name) : _kind(kind), _tensor_name()
+ {
+ // tensor name can be a form of "placeholder:0" or "placeholder".
+ // If tensor index is omitted, ":0" is appended
+ auto pos = name.find(":");
+ if (pos == std::string::npos)
+ _tensor_name.assign(name + ":0");
+ else
+ _tensor_name.assign(name);
+ }
+
+ TensorSignature(const Kind kind, const std::string &name, const ShapeHint &shape_hint)
+ : TensorSignature(kind, name)
+ {
+ _shape_hint = shape_hint;
+ }
+
+public:
+ Kind kind() const { return _kind; }
+
+ const TensorName &name() { return _tensor_name; }
+
+ ShapeHint &shapeHint() { return _shape_hint; }
+
+private:
+ Kind _kind;
+ std::string _tensor_name;
+ ShapeHint _shape_hint;
+};
+
+using TensorSignatures = std::vector<std::unique_ptr<TensorSignature>>;
+
+} // namespace v2
+} // namespace tfinfo
+
+#endif // __TFINFO_V2_TENSORSIGNATURE_H__
diff --git a/compiler/tfinfo-v2/proto/tfinfo-v2.proto b/compiler/tfinfo-v2/proto/tfinfo-v2.proto
new file mode 100644
index 000000000..4f7c47b4a
--- /dev/null
+++ b/compiler/tfinfo-v2/proto/tfinfo-v2.proto
@@ -0,0 +1,46 @@
+syntax = "proto3";
+
+package tfinfo_v2_proto;
+option cc_enable_arenas = true;
+
+/*
+Example of prototxt file is as follows:
+
+input {
+ name : "placeholder:0"
+}
+output {
+ name : "relu:0"
+}
+
+When a model has unknown dim in its input,
+value of all unknowm dims must be provided like the following:
+
+input {
+ name : "placeholder:0"
+ dim { axis: 0 size: 8 }
+ dim { axis: 3 size: 4 }
+}
+output {
+ name : "relu:0"
+}
+*/
+
+message Dim {
+ int32 axis = 1;
+ int64 size = 2; // tensorflow uses int64
+}
+
+message OutputDef {
+ string name = 1;
+}
+
+message InputDef {
+ string name = 1;
+ repeated Dim dim = 2;
+}
+
+message InfoDef {
+ repeated InputDef input = 1;
+ repeated OutputDef output = 2;
+}
diff --git a/compiler/tfinfo-v2/requires.cmake b/compiler/tfinfo-v2/requires.cmake
new file mode 100644
index 000000000..e7efab4fb
--- /dev/null
+++ b/compiler/tfinfo-v2/requires.cmake
@@ -0,0 +1,2 @@
+require("oops")
+require("stdex")
diff --git a/compiler/tfinfo-v2/src/TFInfo_v2.test.cpp b/compiler/tfinfo-v2/src/TFInfo_v2.test.cpp
new file mode 100644
index 000000000..02a2d9199
--- /dev/null
+++ b/compiler/tfinfo-v2/src/TFInfo_v2.test.cpp
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tfinfo-v2/TensorInfoLoader.h"
+
+#include "tfinfo-v2/TensorSignature.h"
+
+#include <gtest/gtest.h>
+
+#include <sstream>
+#include <map>
+
+#define TC_CASE(content) #content
+
+using namespace tfinfo::v2;
+
+namespace
+{
+
+// clang-format off
+const std::vector<std::string> success_cases =
+{
+ TC_CASE(
+ output {
+ name : "relu:0"
+ }
+ ),
+
+ TC_CASE(
+ input {
+ name : "placeholder:0"
+ }
+
+ input {
+ name : "placeholder:1"
+ dim { axis:0 size: 1 }
+ dim { axis:2 size: 4 }
+ }
+
+ output {
+ name : "relu:0"
+ }
+ ),
+ // clang-format on
+};
+
+} // namespace
+
+TEST(TFINFO_V2, success_0)
+{
+ std::stringstream ss{success_cases[0]};
+
+ auto tensors = load(&ss, "tfinfo_v2_test");
+
+ std::map<std::string, tfinfo::v2::TensorSignature *> m;
+
+ for (auto &tensor : tensors)
+ {
+ m[tensor->name()] = tensor.get();
+ }
+
+ ASSERT_EQ(m.size(), 1);
+
+ auto t1 = m["relu:0"];
+ {
+ ASSERT_EQ(t1->kind(), tfinfo::v2::TensorSignature::Kind::Output);
+ ASSERT_TRUE(t1->shapeHint().empty());
+ }
+}
+
+TEST(TFINFO_V2, success_1)
+{
+ std::stringstream ss{success_cases[1]};
+
+ auto tensors = load(&ss, "tfinfo_v2_test");
+
+ std::map<std::string, tfinfo::v2::TensorSignature *> m;
+
+ for (auto &tensor : tensors)
+ {
+ m[tensor->name()] = tensor.get();
+ }
+
+ ASSERT_EQ(m.size(), 3);
+
+ auto t1 = m["placeholder:0"];
+ {
+ ASSERT_EQ(t1->kind(), tfinfo::v2::TensorSignature::Kind::Input);
+ ASSERT_TRUE(t1->shapeHint().empty());
+ }
+
+ auto t2 = m["placeholder:1"];
+ {
+ ASSERT_EQ(t2->kind(), tfinfo::v2::TensorSignature::Kind::Input);
+ ASSERT_FALSE(t2->shapeHint().empty());
+
+ auto iter = t2->shapeHint().cbegin();
+
+ ASSERT_TRUE(iter != t2->shapeHint().cend());
+ ASSERT_EQ(iter->first, 0); // axis
+ ASSERT_EQ(iter->second, 1); // size
+
+ iter++;
+
+ ASSERT_TRUE(iter != t2->shapeHint().cend());
+ ASSERT_EQ(iter->first, 2); // axis
+ ASSERT_EQ(iter->second, 4); // size
+
+ iter++;
+
+ ASSERT_TRUE(iter == t2->shapeHint().cend());
+ }
+
+ auto t3 = m["relu:0"];
+ {
+ ASSERT_EQ(t3->kind(), tfinfo::v2::TensorSignature::Kind::Output);
+ ASSERT_TRUE(t3->shapeHint().empty());
+ }
+}
+
+namespace
+{
+
+// clang-format off
+const std::vector<std::string> fail_cases =
+ {
+ // no output
+ TC_CASE(
+ input {
+ name : "relu:0"
+ }
+ ),
+
+ // no name in input
+ TC_CASE(
+ input {
+ shape {
+ dim { size: 1 }
+ dim { size: 2 }
+ }
+ }
+ output {
+ name : "relu:0"
+ }
+ ),
+
+ // wrong name format - no tensor index
+ TC_CASE(
+ output {
+ name : "name_with_no_index"
+ }
+ ),
+
+ // wrong name format - no name but numbers
+ TC_CASE(
+ output {
+ name : "1"
+ }
+ ),
+
+ // duplicated node def - input, input
+ TC_CASE(
+ input {
+ name : "duplicated_name:0"
+ }
+
+ input {
+ name : "duplicated_name:0"
+ }
+ ),
+
+ // duplicated node def - input, output
+ TC_CASE(
+ input {
+ name : "duplicated_name:0"
+ }
+
+ output {
+ name : "duplicated_name:0"
+ }
+ ),
+
+ // wrong keyword ('in', 'out' instead of 'input', 'output')
+ TC_CASE(
+ in {
+ name : "a:0"
+ }
+
+ out {
+ name : "b:0"
+ }
+ ),
+
+ // wrong keyword ('input_name' instead of 'name')
+ TC_CASE(
+ input {
+ input_name : "a:0"
+ }
+
+ output {
+ name : "b:0"
+ }
+ ),
+
+ // using deprecated format
+ // (note that because of ",", macro TC_CASE cannot be used.)
+ R"(
+ input, a:0, TF_FLOAT, [2, 3 ,4]
+ output, b:0, TF_FLOAT, [2, 3 ,4]
+ )",
+ // clang-format on
+};
+
+} // namespace
+
+TEST(TFINFO_V2, failure)
+{
+ for (int i = 0; i < fail_cases.size(); i++)
+ {
+ std::stringstream ss{fail_cases[i]};
+
+ try
+ {
+ load(&ss, "tfinfo_v2_test");
+
+ FAIL();
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << ss.str() << std::endl << e.what() << '\n';
+ }
+ }
+}
diff --git a/compiler/tfinfo-v2/src/TensorInfoLoader.cpp b/compiler/tfinfo-v2/src/TensorInfoLoader.cpp
new file mode 100644
index 000000000..0bf828773
--- /dev/null
+++ b/compiler/tfinfo-v2/src/TensorInfoLoader.cpp
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tfinfo-v2/TensorInfoLoader.h"
+
+#include "tfinfo-v2/TensorSignature.h"
+
+#include <oops/UserExn.h>
+#include <stdex/Memory.h>
+
+#include <tfinfo-v2.pb.h>
+
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+#include <fstream>
+#include <fcntl.h>
+
+namespace
+{
+
+// for testing purpose
+bool load_text(std::istream *stream, tfinfo_v2_proto::InfoDef &info_def)
+{
+ google::protobuf::io::IstreamInputStream iis(stream);
+
+ return google::protobuf::TextFormat::Parse(&iis, &info_def);
+}
+
+bool is_num(const std::string &num)
+{
+ for (int i = 0; i < num.length(); i++)
+ if (!isdigit(num[i]))
+ return false;
+
+ return true;
+}
+
+void validate_tensor_name(const std::string &tensor_name, const char *path)
+{
+ // Note that Tensorflow tensor name format is
+ // operation name ":" index, e.g., "in/placeholder:0"
+ int pos = tensor_name.find(":");
+ if (pos == std::string::npos)
+ throw oops::UserExn("Missing index separator, ':'", "name", tensor_name, "file", path);
+
+ if (tensor_name.length() == pos + 1) // ':' is the last char
+ throw oops::UserExn("Missing tensor index after ':'", "name", tensor_name, "file", path);
+
+ // 1. Validating operation name.
+ // for naming format, refer to https://www.tensorflow.org/api_docs/python/tf/Operation#__init__
+ // First char is in the form of "[A-Za-z0-9.]"
+ // and the rest chars are in the form of "[A-Za-z0-9_.\\-/]*"
+ std::string op_name = tensor_name.substr(0, pos);
+
+ // first character
+ if (!(isalnum(op_name[0]) || op_name[0] == '.'))
+ throw oops::UserExn("Wrong tensor name format", "name", tensor_name, "file", path);
+
+ // and the rest chars
+ for (int i = 1; i < op_name.length(); i++)
+ if (!(isalnum(op_name[i]) || std::string("_.\\-/").find(op_name[i]) != std::string::npos))
+ throw oops::UserExn("Wrong tensor name format", "name", tensor_name, "file", path);
+
+ // 2. validating index after ":"
+ std::string index = tensor_name.substr(pos + 1, op_name.length() - pos - 1);
+
+ if (!is_num(index))
+ throw oops::UserExn("Wrong tensor name format", "name", tensor_name, "file", path);
+}
+
+void check_duplicate(tfinfo::v2::TensorSignatures &tensors, const char *path)
+{
+ std::map<std::string, bool> tool;
+ for (auto &tensor : tensors)
+ {
+ if (tool.find(tensor->name()) != tool.end())
+ throw oops::UserExn("Duplicate tensor definition", "name", tensor->name(), "file", path);
+ else
+ tool[tensor->name()] = true;
+ }
+}
+
+void convert(tfinfo_v2_proto::InfoDef &info_def, tfinfo::v2::TensorSignatures &tensors,
+ const char *path)
+{
+ // processing input. Note that there could be no input.
+ if (auto input_size = info_def.input_size())
+ {
+ for (int i = 0; i < input_size; i++)
+ {
+ auto input_def = info_def.input().Get(i);
+
+ auto name = input_def.name();
+ validate_tensor_name(name, path);
+
+ auto tensor = stdex::make_unique<tfinfo::v2::TensorSignature>(
+ tfinfo::v2::TensorSignature::Kind::Input, name);
+
+ // when there is dim attribute for unknown shape
+ if (input_def.dim_size() > 0)
+ {
+ for (int d = 0; d < input_def.dim().size(); d++)
+ {
+ auto dim = input_def.dim(d);
+ tensor->shapeHint().add(dim.axis(), dim.size());
+ }
+ }
+
+ tensors.emplace_back(std::move(tensor));
+ }
+ }
+
+ // processing output
+ auto output_size = info_def.output_size();
+ if (output_size == 0)
+ throw oops::UserExn("Missing output node. At least 1 output node must exist", "file", path);
+
+ if (auto output_node_size = info_def.output_size())
+ {
+ for (int i = 0; i < output_node_size; i++)
+ {
+ auto name = info_def.output().Get(i).name();
+ validate_tensor_name(name, path);
+
+ auto tensor = stdex::make_unique<tfinfo::v2::TensorSignature>(
+ tfinfo::v2::TensorSignature::Kind::Output, name);
+ tensors.emplace_back(std::move(tensor));
+ }
+ }
+
+ check_duplicate(tensors, path);
+}
+
+} // namespace
+
+namespace tfinfo
+{
+inline namespace v2
+{
+
+TensorSignatures load(const char *path)
+{
+ std::ifstream stream(path, std::ios::in | std::ios::binary);
+
+ return load(&stream, path);
+}
+
+TensorSignatures load(std::istream *stream, const char *path_for_error_msg)
+{
+ tfinfo_v2_proto::InfoDef info_def;
+
+ if (!load_text(stream, info_def))
+ {
+ throw oops::UserExn("Cannot parse the info file", "path", path_for_error_msg);
+ }
+
+ TensorSignatures tensors;
+
+ convert(info_def, tensors, path_for_error_msg);
+
+ return tensors;
+}
+
+} // namespace v2
+} // namespace tfinfo
diff --git a/compiler/tfinfo-v2/src/TensorSignature.cpp b/compiler/tfinfo-v2/src/TensorSignature.cpp
new file mode 100644
index 000000000..3107f33db
--- /dev/null
+++ b/compiler/tfinfo-v2/src/TensorSignature.cpp
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tfinfo-v2/TensorSignature.h"
diff --git a/compiler/tfinfo/CMakeLists.txt b/compiler/tfinfo/CMakeLists.txt
new file mode 100644
index 000000000..678912e6f
--- /dev/null
+++ b/compiler/tfinfo/CMakeLists.txt
@@ -0,0 +1,20 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(tfinfo STATIC ${SOURCES})
+set_target_properties(tfinfo PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(tfinfo PUBLIC include)
+target_link_libraries(tfinfo stdex angkor oops)
+
+# TODO Remove "nnkit_support_tftestinfo" later
+add_library(nnkit_support_tftestinfo ALIAS tfinfo)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(tfinfo_test ${TESTS})
+target_link_libraries(tfinfo_test tfinfo)
diff --git a/compiler/tfinfo/README.md b/compiler/tfinfo/README.md
new file mode 100644
index 000000000..21a4cc47f
--- /dev/null
+++ b/compiler/tfinfo/README.md
@@ -0,0 +1,12 @@
+# tfinfo
+
+This dir contains a helper classes to handle `test.info` files under `res/TensorFlowTests`.
+
+## Format of 'test.info' file
+
+Each line should contain the following fields:
+- `input` or `output`
+- node_name:digits
+- type (see enum TF_DataType in tensorflow/c/c_api.h)
+- [ shapes ]
+ - In case of scalar, use '[ ]' as shape
diff --git a/compiler/tfinfo/include/nnkit/support/tftestinfo/ParsedTensor.h b/compiler/tfinfo/include/nnkit/support/tftestinfo/ParsedTensor.h
new file mode 100644
index 000000000..aec8c5e40
--- /dev/null
+++ b/compiler/tfinfo/include/nnkit/support/tftestinfo/ParsedTensor.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNKIT_SUPPORT_TFTESTINFO_PARSED_TENSOR_H__
+#define __NNKIT_SUPPORT_TFTESTINFO_PARSED_TENSOR_H__
+
+#include "nncc/core/ADT/tensor/Shape.h"
+
+#include <oops/UserExn.h>
+
+#include <string>
+#include <cassert>
+#include <stdexcept>
+
+namespace nnkit
+{
+namespace support
+{
+namespace tftestinfo
+{
+
+/**
+ * @brief Supported Data Types
+ */
+enum class DataType
+{
+ FLOAT32, // IEEE 32-bit floating point
+ /* To be added */
+};
+
+/**
+ * @brief Class to store tensor information parsed from test.info file under moco/test/tf
+ */
+class ParsedTensor final
+{
+public:
+ enum class Kind
+ {
+ Input,
+ Output
+ };
+
+ ParsedTensor() = delete;
+
+ ParsedTensor(const Kind kind, const std::string &name, const DataType &dtype,
+ const std::vector<int32_t> &shape)
+ : _kind(kind), _dtype(dtype)
+ {
+ _tensor_name.assign(name);
+
+ _shape.resize(shape.size());
+ for (int rank = 0; rank < shape.size(); rank++)
+ _shape.dim(rank) = shape.at(rank);
+ }
+
+ ~ParsedTensor() { /* empty */}
+
+public:
+ Kind kind() const { return _kind; }
+
+ const std::string &name() { return _tensor_name; }
+
+ const nncc::core::ADT::tensor::Shape &shape() const { return _shape; }
+ // TODO This method is a bridge between testinfo and testinfo-v2. When testinfo-v2 is introduced,
+ // this method will be removed.
+ nncc::core::ADT::tensor::Shape &mutable_shape() { return _shape; }
+
+ const DataType &dtype() const { return _dtype; }
+
+ /**
+ * @brief Get the name of node that has this tensor.
+ * E.g., if the name of this tensor is "MyOp:0", this method returns "MyOp".
+ */
+ std::string nodeName() const { return _tensor_name.substr(0, _tensor_name.find(":")); }
+
+ /**
+ * @brief Get the index from the tensor name.
+ * E.g., if the name of this tensor is "MyOp:12", this method returns 12.
+ */
+ int tensorIndex() const
+ {
+ int separator = _tensor_name.find(":");
+
+ // sanity check
+ if (separator == std::string::npos)
+ throw oops::UserExn("Tensor name in wrong format", "name", _tensor_name);
+
+ return std::stoi(_tensor_name.substr(separator + 1, _tensor_name.length() - separator - 1));
+ }
+
+public:
+ bool isFloatTensor() const { return _dtype == DataType::FLOAT32; }
+ bool hasShape() const { return _has_shape; }
+
+private:
+ Kind _kind;
+ std::string _tensor_name;
+ nncc::core::ADT::tensor::Shape _shape;
+ DataType _dtype;
+ // Now, every info file has a shape.
+ bool _has_shape = true;
+};
+
+} // namespace tftestinfo
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_TFTESTINFO_PARSED_TENSOR_H__
diff --git a/compiler/tfinfo/include/nnkit/support/tftestinfo/TensorInfoParser.h b/compiler/tfinfo/include/nnkit/support/tftestinfo/TensorInfoParser.h
new file mode 100644
index 000000000..9421b5e05
--- /dev/null
+++ b/compiler/tfinfo/include/nnkit/support/tftestinfo/TensorInfoParser.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file TensorInfoParser.h
+ * @brief This file contains functions to parse test.info files in moco/test/tf
+ */
+
+#ifndef __NNKIT_SUPPORT_TFTESTINFO_TENSOR_INFO_PARSER_H__
+#define __NNKIT_SUPPORT_TFTESTINFO_TENSOR_INFO_PARSER_H__
+
+#include "ParsedTensor.h"
+
+#include <memory>
+#include <vector>
+
+namespace nnkit
+{
+namespace support
+{
+namespace tftestinfo
+{
+
+/**
+ * @brief Function to parse test.info
+ */
+std::vector<std::unique_ptr<ParsedTensor>> parse(const char *info_path);
+
+} // namespace tftestinfo
+} // namespace support
+} // namespace nnkit
+
+#endif // __NNKIT_SUPPORT_TFTESTINFO_TENSOR_INFO_PARSER_H__
diff --git a/compiler/tfinfo/requires.cmake b/compiler/tfinfo/requires.cmake
new file mode 100644
index 000000000..3b45c6458
--- /dev/null
+++ b/compiler/tfinfo/requires.cmake
@@ -0,0 +1,3 @@
+require("stdex")
+require("angkor")
+require("oops")
diff --git a/compiler/tfinfo/src/Compat.h b/compiler/tfinfo/src/Compat.h
new file mode 100644
index 000000000..6580a97d7
--- /dev/null
+++ b/compiler/tfinfo/src/Compat.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COMPAT_H__
+#define __COMPAT_H__
+
+#include "nnkit/support/tftestinfo/ParsedTensor.h"
+
+// TODO Remove these helper declarations after code cleanup
+using TF_DataType = ::nnkit::support::tftestinfo::DataType;
+
+#define TF_FLOAT ::nnkit::support::tftestinfo::DataType::FLOAT32
+
+#endif // __COMPAT_H__
diff --git a/compiler/tfinfo/src/TensorInfoParser.cpp b/compiler/tfinfo/src/TensorInfoParser.cpp
new file mode 100644
index 000000000..9eb3da296
--- /dev/null
+++ b/compiler/tfinfo/src/TensorInfoParser.cpp
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnkit/support/tftestinfo/TensorInfoParser.h"
+#include "nnkit/support/tftestinfo/ParsedTensor.h"
+
+// TODO Remove this file after code cleanup
+#include "Compat.h"
+
+#include <oops/UserExn.h>
+#include <stdex/Memory.h>
+#include <nncc/core/ADT/tensor/Shape.h>
+
+#include <cctype>
+#include <memory>
+#include <vector>
+#include <sstream>
+#include <fstream>
+#include <stdexcept>
+
+/**
+ * @file TensorInfoParser.cpp
+ * @brief This file contains functions to parse test.info files in moco/test/tf
+ */
+
+namespace
+{
+
+using nnkit::support::tftestinfo::ParsedTensor;
+
+// remove comment
+void remove_comment(std::string &line)
+{
+ int pos = line.find_first_of("#");
+ if (pos != std::string::npos)
+ line.erase(pos);
+}
+
+std::string trim(const std::string &str)
+{
+ static const std::string whitespace = " \t";
+ static const std::string empty = "";
+
+ const auto begin = str.find_first_not_of(whitespace);
+ if (begin == std::string::npos)
+ return empty;
+
+ const auto end = str.find_last_not_of(whitespace);
+
+ return str.substr(begin, end - begin + 1);
+}
+
+ParsedTensor::Kind get_kind(const std::string &tok)
+{
+ if (tok == "input")
+ return ParsedTensor::Kind::Input;
+ else if (tok == "output")
+ return ParsedTensor::Kind::Output;
+ else
+ throw oops::UserExn("Unrecognizable token", "token", tok);
+}
+
+TF_DataType get_dtype(const std::string &tok)
+{
+ if (tok == "TF_FLOAT")
+ return TF_FLOAT;
+ else
+ throw oops::UserExn("Unsupported tensor datatype", "data type", tok);
+}
+
+bool validate_num(const std::string &num)
+{
+ for (int i = 0; i < num.length(); i++)
+ if (!isdigit(num[i]))
+ return false;
+
+ return true;
+}
+
+bool validate_name(const std::string &tensor_name)
+{
+ // Note that Tensorflow tensor name format is
+ // operation name ":" index, e.g., "in/placeholder:0"
+ int pos = tensor_name.find(":");
+ if (pos == std::string::npos)
+ return false;
+
+ if (tensor_name.length() == pos + 1) // ':' is the last char
+ return false;
+
+ // 1. Validating operation name.
+ // for naming format, refer to https://www.tensorflow.org/api_docs/python/tf/Operation#__init__
+ // First char is in the form of "[A-Za-z0-9.]"
+ // and the rest chars are in the form of "[A-Za-z0-9_.\\-/]*"
+ std::string op_name = tensor_name.substr(0, pos);
+
+ // first character
+ if (!(isalnum(op_name[0]) || op_name[0] == '.'))
+ return false;
+
+ // and the rest chars
+ for (int i = 1; i < op_name.length(); i++)
+ if (!(isalnum(op_name[i]) || std::string("_.\\-/").find(op_name[i]) != std::string::npos))
+ return false;
+
+ // 2. validating index after ":"
+ std::string index = tensor_name.substr(pos + 1, op_name.length() - pos - 1);
+
+ return validate_num(index);
+}
+
+} // namespace
+
+namespace nnkit
+{
+namespace support
+{
+namespace tftestinfo
+{
+
+#define CHECK_NOT_NULL(x) \
+ if (!(x)) \
+ oops::UserExn("Cannot find required token")
+
+/**
+ * @brief Function to parse a line of test.info file
+ * Examples:
+ * - "input, in/placeholder_32:0, TF_INT32, [3, 4, 2, 3]"
+ * - "output, result:0, TF_FLOAT, []"
+ */
+std::unique_ptr<ParsedTensor> parse_line(std::string &line)
+{
+ // parsed data
+ ParsedTensor::Kind kind;
+ std::string name;
+ TF_DataType dtype;
+ std::vector<int32_t> shape;
+
+ remove_comment(line);
+
+ if (line.length() == 0) // empty line or line with comment
+ return nullptr;
+
+ std::string tok, trimmed, dim;
+
+ std::istringstream line_stream(line);
+
+ CHECK_NOT_NULL(std::getline(line_stream, tok, ',')); // kind
+ kind = get_kind(trim(tok));
+
+ CHECK_NOT_NULL(std::getline(line_stream, tok, ',')); // tensor name
+ trimmed = trim(tok);
+ if (!validate_name(trimmed))
+ throw oops::UserExn("Tensor name in wrong format", "name", tok);
+ name.assign(trimmed);
+
+ CHECK_NOT_NULL(std::getline(line_stream, tok, ',')); // data type
+ dtype = get_dtype(trim(tok));
+
+ CHECK_NOT_NULL(std::getline(line_stream, tok, '[')); // start of shape
+ trimmed = trim(tok);
+ if (trimmed.length())
+ throw oops::UserExn("Unknown token between data type and shape", "token", tok);
+
+ CHECK_NOT_NULL(std::getline(line_stream, tok, ']'));
+
+ std::istringstream shape_stream(tok);
+
+ bool first = true;
+ while (std::getline(shape_stream, dim, ',')) // each dim
+ {
+ dim = trim(dim);
+
+ if (first && dim.length() == 0)
+ continue; // scalar
+ first = false;
+
+ if (dim.length() == 0)
+ throw oops::UserExn("Empty dim in shape", "shape", tok);
+
+ if (!validate_num(dim))
+ throw oops::UserExn("Dim in shape must be a number", "dim", dim);
+
+ shape.emplace_back(std::stoi(dim));
+ }
+
+ return stdex::make_unique<ParsedTensor>(kind, name, dtype, shape);
+}
+
+#undef CHECK_NOT_NULL
+
+std::vector<std::unique_ptr<ParsedTensor>> parse(const char *info_path)
+{
+ std::ifstream infile;
+ infile.open(info_path);
+
+ if (infile.fail())
+ {
+ throw oops::UserExn("Fail to open file", "path", info_path);
+ }
+
+ std::vector<std::unique_ptr<ParsedTensor>> tensors;
+
+ std::string line;
+ while (std::getline(infile, line))
+ {
+ auto tensor = parse_line(line);
+ if (tensor)
+ tensors.emplace_back(std::move(tensor));
+ }
+
+ return tensors;
+}
+
+} // namespace tftestinfo
+} // namespace support
+} // namespace nnkit
diff --git a/compiler/tfinfo/src/TensorInfoParser.test.cpp b/compiler/tfinfo/src/TensorInfoParser.test.cpp
new file mode 100644
index 000000000..73ec61c31
--- /dev/null
+++ b/compiler/tfinfo/src/TensorInfoParser.test.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorInfoParser.cpp"
+#include "nnkit/support/tftestinfo/ParsedTensor.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+using namespace nnkit::support::tftestinfo;
+
+namespace
+{
+
+struct TensorInfo
+{
+ std::string line;
+ ParsedTensor::Kind kind;
+ std::string name;
+ TF_DataType dtype;
+ uint32_t rank;
+ uint32_t dim[2];
+};
+
+TEST(NNKIT_TF_PARSER, success_case)
+{
+ // clang-format off
+ TensorInfo tc_list[] = {
+ {"input, in/placeholder_1:0, TF_FLOAT, [3, 2] # correct case",
+ ParsedTensor::Kind::Input, "in/placeholder_1:0", TF_FLOAT, 2, {3, 2} },
+
+ {"output, aa/bb.cc:0, TF_FLOAT, []", // empty shape
+ ParsedTensor::Kind::Output, "aa/bb.cc:0", TF_FLOAT, 0, {0, 0} },
+
+ {"output, aa:0, TF_FLOAT, [] # this is a comment", // string with comment
+ ParsedTensor::Kind::Output, "aa:0", TF_FLOAT, 0, {0, 0} },
+
+ {"output, ...:0, TF_FLOAT, [] # this is a comment", // name test. TF works with this name
+ ParsedTensor::Kind::Output, "...:0", TF_FLOAT, 0, {0, 0} },
+ };
+ // clang-format on
+
+ for (auto tc : tc_list)
+ {
+ std::unique_ptr<ParsedTensor> tensor = parse_line(tc.line);
+
+ ASSERT_EQ(tensor->kind(), tc.kind);
+ ASSERT_EQ(tensor->name(), tc.name);
+ ASSERT_EQ(tensor->dtype(), tc.dtype);
+ ASSERT_EQ(tensor->shape().rank(), tc.rank);
+ for (int d = 0; d < tc.rank; d++)
+ ASSERT_EQ(tensor->shape().dim(d), tc.dim[d]);
+ }
+}
+
+TEST(NNKIT_TF_PARSER, failure_case)
+{
+ // clang-format off
+ std::string exception_list[] = {
+ "WRONG_KIND, a:0, TF_FLOAT, [3, 2]",
+ "input, a:0, WRONG_TYPE, [3, 2]",
+ "input, a:0, TF_FLOAT, 3, 2", // missing bracelets
+ "input, a:0, TF_FLOAT,, [3, 2]", // wrong commas, wrong bracelets
+ "input, a:0, TF_FLOAT,, [3, 2,]", // wrong commas
+ "a:0, TF_FLOAT, [3, 2]", // missing kind
+ "input, TF_FLOAT, [3, 2]", // missing name
+ "input, a:0, [3, 2]", // missing type
+ "input, aa:0, TF_FLOAT", // missing shape
+ "input, aa, TF_FLOAT, [abc]", // wrong name
+ "input, a$a:0, TF_FLOAT, [abc]", // wrong name
+ "input, aa:a, TF_FLOAT, [abc]", // wrong name (wrong value index)
+ "input aa:a, TF_FLOAT, [1]", // missing comma, exception.what() is "A line must be either 'input' or 'output' but : input aa:a"
+ "input, aa:a TF_FLOAT, [1]", // missing comma
+ "input, aa:a, TF_FLOAT [1]", // missing comma,
+ };
+ // clang-format on
+
+ for (auto tc : exception_list)
+ {
+ try
+ {
+ parse_line(tc);
+ FAIL();
+ }
+ catch (const std::exception &e)
+ {
+ std::cout << e.what() << '\n';
+ }
+ }
+}
+
+TEST(NNKIT_TF_PARSER, comment_case)
+{
+ // clang-format off
+ std::string tc_list[] = {
+ "", // empty line
+ "# this is a comment",
+ };
+ // clang-format on
+
+ for (auto tc : tc_list)
+ {
+ ASSERT_EQ(parse_line(tc), nullptr);
+ }
+}
+
+} // namespace
diff --git a/compiler/tfkit/CMakeLists.txt b/compiler/tfkit/CMakeLists.txt
new file mode 100644
index 000000000..b809658b1
--- /dev/null
+++ b/compiler/tfkit/CMakeLists.txt
@@ -0,0 +1,13 @@
+if(NOT TARGET mio_tf)
+ return()
+endif(NOT TARGET mio_tf)
+
+message(STATUS "Build tfkit: TRUE")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(tfkit ${SOURCES})
+target_link_libraries(tfkit PRIVATE stdex)
+target_link_libraries(tfkit PRIVATE cli)
+target_link_libraries(tfkit PRIVATE mio_tf)
+target_link_libraries(tfkit PRIVATE nncc_common)
diff --git a/compiler/tfkit/README.md b/compiler/tfkit/README.md
new file mode 100644
index 000000000..5a89971aa
--- /dev/null
+++ b/compiler/tfkit/README.md
@@ -0,0 +1,73 @@
+# tfkit
+
+## What is tfkit?
+
+`tfkit` is a tool for manipulating TensorFlow model files.
+
+## Tutorial: How to use?
+
+Currently it supports two operations, _decode_ and _encode_.
+
+```
+nncc$ path_to_tfkit/tfkit
+ERROR: COMMAND is not provided
+
+USAGE: path_to_tfkit/tfkit [COMMAND] ...
+
+SUPPORTED COMMANDS:
+ decode
+ encode
+ unpack
+ pack
+```
+
+`decode` reads a binary graphdef file and shows its textual form.
+
+`encode` is the reverse of decode, it reads a textual graphdef file and prints
+its binary form.
+
+`unpack` decodes tensor value in byte encoded string in `tensor_content` field
+to human readable list of float values. currently only supports textual
+graphdef files.
+
+`pack` is the reverse of unpack. this can be used to change the values for
+debugging. also currently only supports textual graphdef files.
+
+Each command can read from or print to the console or from/to a file if given
+through the argument. First argument is used as an input file path and second
+as a output file path. If second argument is omitted, output is the console.
+To give the first argument as a console, please use `-`.
+
+### Examples
+
+Example to `decode`
+```
+nncc$ cat my_awesome_model.pb | path_to_tfkit/tfkit decode > decoded.pbtxt
+```
+```
+nncc$ cat my_awesome_model.pb | path_to_tfkit/tfkit decode - decoded.pbtxt
+```
+```
+nncc$ path_to_tfkit/tfkit decode my_awesome_model.pb > decoded.pbtxt
+```
+```
+nncc$ path_to_tfkit/tfkit decode my_awesome_model.pb decoded.pbtxt
+```
+
+Above four examples for `decode` command gives the same result. This applies
+to other commands.
+
+Example to `encode`
+```
+nncc$ cat decoded.pbtxt | path_to_tfkit/tfkit encode > encoded.pb
+```
+
+Example to `unpack`
+```
+nncc$ cat packed.pbtxt | path_to_tfkit/tfkit unpack > unpacked.pbtxt
+```
+
+Example to `pack`
+```
+nncc$ cat unpacked.pbtxt | path_to_tfkit/tfkit pack > packed.pbtxt
+```
diff --git a/compiler/tfkit/src/ConvertCommand.cpp b/compiler/tfkit/src/ConvertCommand.cpp
new file mode 100644
index 000000000..3e417cc78
--- /dev/null
+++ b/compiler/tfkit/src/ConvertCommand.cpp
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvertCommand.hpp"
+#include "Support.hpp"
+
+#include <stdex/Memory.h>
+
+#include <tensorflow/core/framework/graph.pb.h>
+
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+#include <google/protobuf/util/json_util.h>
+
+#include <cassert>
+#include <map>
+#include <string>
+
+// TODO Extract this as a library
+namespace
+{
+
+enum class DataFormat
+{
+ PBBIN,
+ PBTXT,
+ JSON,
+};
+
+struct Importer
+{
+ virtual ~Importer() = default;
+
+ virtual bool run(std::istream *, tensorflow::GraphDef &) const = 0;
+};
+
+struct Exporter
+{
+ virtual ~Exporter() = default;
+
+ virtual bool run(const tensorflow::GraphDef &, std::ostream *) const = 0;
+};
+
+template <DataFormat F> class ImporterImpl;
+
+template <> class ImporterImpl<DataFormat::PBTXT> final : public Importer
+{
+public:
+ bool run(std::istream *is, tensorflow::GraphDef &graph_def) const final
+ {
+ google::protobuf::io::IstreamInputStream iis{is};
+ return google::protobuf::TextFormat::Parse(&iis, &graph_def);
+ }
+};
+
+template <> class ImporterImpl<DataFormat::PBBIN> final : public Importer
+{
+public:
+ bool run(std::istream *is, tensorflow::GraphDef &graph_def) const final
+ {
+ google::protobuf::io::IstreamInputStream iis{is};
+ google::protobuf::io::CodedInputStream cis{&iis};
+ return graph_def.ParseFromCodedStream(&cis);
+ }
+};
+
+template <DataFormat F> class ExporterImpl;
+
+template <> class ExporterImpl<DataFormat::JSON> final : public Exporter
+{
+public:
+ bool run(const tensorflow::GraphDef &graph_def, std::ostream *os) const final
+ {
+ std::string str;
+ google::protobuf::util::MessageToJsonString(graph_def, &str);
+ *os << str << std::endl;
+ return true;
+ }
+};
+
+} // namespace
+
+namespace tfkit
+{
+
+int ConvertCommand::run(int argc, const char *const *argv) const
+{
+ tensorflow::GraphDef graph_def;
+
+ // NOTE The current implementation accepts only command-line for the following form:
+ //
+ // tfkit convert --input-format (pb or pbtxt) --output-format json ...
+ //
+ // TODO Support more options
+ assert(argc >= 4);
+ assert(std::string(argv[0]) == "--input-format");
+ const std::string input_format{argv[1]};
+ assert(std::string(argv[2]) == "--output-format");
+ const std::string output_format{argv[3]};
+
+ std::map<std::string, std::unique_ptr<Importer>> importers;
+
+ importers["pb"] = stdex::make_unique<ImporterImpl<DataFormat::PBBIN>>();
+ importers["pbtxt"] = stdex::make_unique<ImporterImpl<DataFormat::PBTXT>>();
+
+ std::map<std::string, std::unique_ptr<Exporter>> exporters;
+
+ exporters["json"] = stdex::make_unique<ExporterImpl<DataFormat::JSON>>();
+
+ auto importer = importers.at(input_format).get();
+ auto exporter = exporters.at(output_format).get();
+
+ CmdArguments cmdargs(argc - 4, argv + 4);
+
+ auto ioconfig = make_ioconfig(cmdargs);
+
+ if (!importer->run(ioconfig->in(), graph_def))
+ {
+ std::cerr << "ERROR: Failed to import" << std::endl;
+ return 255;
+ }
+
+ if (!exporter->run(graph_def, ioconfig->out()))
+ {
+ std::cerr << "ERROR: Failed to export" << std::endl;
+ return 255;
+ }
+
+ return 0;
+}
+
+} // namespace tfkit
diff --git a/compiler/tfkit/src/ConvertCommand.hpp b/compiler/tfkit/src/ConvertCommand.hpp
new file mode 100644
index 000000000..78f446e47
--- /dev/null
+++ b/compiler/tfkit/src/ConvertCommand.hpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERT_COMMAND_H__
+#define __CONVERT_COMMAND_H__
+
+#include <cli/Command.h>
+
+namespace tfkit
+{
+
+struct ConvertCommand final : public cli::Command
+{
+ int run(int argc, const char *const *argv) const override;
+};
+
+} // namespace tfkit
+
+#endif // __CONVERT_COMMAND_H__
diff --git a/compiler/tfkit/src/DecodeCommand.cpp b/compiler/tfkit/src/DecodeCommand.cpp
new file mode 100644
index 000000000..ca7603f65
--- /dev/null
+++ b/compiler/tfkit/src/DecodeCommand.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DecodeCommand.hpp"
+#include "Support.hpp"
+
+#include <tensorflow/core/framework/graph.pb.h>
+
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+namespace tfkit
+{
+
+int DecodeCommand::run(int argc, const char *const *argv) const
+{
+ tensorflow::GraphDef graph_def;
+
+ CmdArguments cmdargs(argc, argv);
+
+ auto ioconfig = make_ioconfig(cmdargs);
+
+ google::protobuf::io::IstreamInputStream is{ioconfig->in()};
+ google::protobuf::io::CodedInputStream coded_is{&is};
+
+ if (!graph_def.ParseFromCodedStream(&coded_is))
+ {
+ std::cerr << "ERROR: Failed to parse tensorflow model" << std::endl;
+ return 255;
+ }
+
+ google::protobuf::io::OstreamOutputStream os{ioconfig->out()};
+ google::protobuf::TextFormat::Print(graph_def, &os);
+
+ return 0;
+}
+
+} // namespace tfkit
diff --git a/compiler/tfkit/src/DecodeCommand.hpp b/compiler/tfkit/src/DecodeCommand.hpp
new file mode 100644
index 000000000..cea4e6a85
--- /dev/null
+++ b/compiler/tfkit/src/DecodeCommand.hpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DECODE_COMMAND_H__
+#define __DECODE_COMMAND_H__
+
+#include <cli/Command.h>
+
+namespace tfkit
+{
+
+struct DecodeCommand final : public cli::Command
+{
+ int run(int argc, const char *const *argv) const override;
+};
+
+} // namespace tfkit
+
+#endif // __DECODE_COMMAND_H__
diff --git a/compiler/tfkit/src/EncodeCommand.cpp b/compiler/tfkit/src/EncodeCommand.cpp
new file mode 100644
index 000000000..65c878587
--- /dev/null
+++ b/compiler/tfkit/src/EncodeCommand.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EncodeCommand.hpp"
+#include "Support.hpp"
+
+#include <tensorflow/core/framework/graph.pb.h>
+
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+namespace tfkit
+{
+
+int EncodeCommand::run(int argc, const char *const *argv) const
+{
+ tensorflow::GraphDef graph_def;
+
+ CmdArguments cmdargs(argc, argv);
+
+ auto ioconfig = make_ioconfig(cmdargs);
+
+ google::protobuf::io::IstreamInputStream is{ioconfig->in()};
+
+ if (!google::protobuf::TextFormat::Parse(&is, &graph_def))
+ {
+ std::cerr << "ERROR: Failed to parse prototxt" << std::endl;
+ return 255;
+ }
+
+ google::protobuf::io::OstreamOutputStream os{ioconfig->out()};
+ google::protobuf::io::CodedOutputStream coded_os{&os};
+
+ if (!graph_def.SerializeToCodedStream(&coded_os))
+ {
+ std::cerr << "ERROR: Failed to serialize" << std::endl;
+ return 255;
+ }
+
+ return 0;
+}
+
+} // namespace tfkit
diff --git a/compiler/tfkit/src/EncodeCommand.hpp b/compiler/tfkit/src/EncodeCommand.hpp
new file mode 100644
index 000000000..2137413a4
--- /dev/null
+++ b/compiler/tfkit/src/EncodeCommand.hpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ENCODE_COMMAND_H__
+#define __ENCODE_COMMAND_H__
+
+#include <cli/Command.h>
+
+namespace tfkit
+{
+
+struct EncodeCommand final : public cli::Command
+{
+ int run(int argc, const char *const *argv) const override;
+};
+
+} // namespace tfkit
+
+#endif // __ENCODE_COMMAND_H__
diff --git a/compiler/tfkit/src/Main.cpp b/compiler/tfkit/src/Main.cpp
new file mode 100644
index 000000000..60bd6abfa
--- /dev/null
+++ b/compiler/tfkit/src/Main.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EncodeCommand.hpp"
+#include "DecodeCommand.hpp"
+#include "UnpackCommand.hpp"
+#include "PackCommand.hpp"
+#include "ConvertCommand.hpp"
+
+#include <cli/App.h>
+#include <stdex/Memory.h>
+
+int main(int argc, char **argv)
+{
+ cli::App app{argv[0]};
+
+ app.insert("encode", stdex::make_unique<tfkit::EncodeCommand>());
+ app.insert("decode", stdex::make_unique<tfkit::DecodeCommand>());
+ app.insert("unpack", stdex::make_unique<tfkit::UnpackCommand>());
+ app.insert("pack", stdex::make_unique<tfkit::PackCommand>());
+ app.insert("convert", stdex::make_unique<tfkit::ConvertCommand>());
+
+ return app.run(argc - 1, argv + 1);
+}
diff --git a/compiler/tfkit/src/PackCommand.cpp b/compiler/tfkit/src/PackCommand.cpp
new file mode 100644
index 000000000..36bf5a71c
--- /dev/null
+++ b/compiler/tfkit/src/PackCommand.cpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PackCommand.hpp"
+#include "Support.hpp"
+
+#include <tensorflow/core/framework/graph.pb.h>
+
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+#include <cassert>
+#include <stdexcept>
+#include <vector>
+
+namespace
+{
+
+template <typename T> void pack(tensorflow::TensorProto *);
+
+template <> void pack<float>(tensorflow::TensorProto *input_tensor)
+{
+ const auto &input_shape = input_tensor->tensor_shape();
+ assert(input_shape.dim_size() <= 6);
+ int input_flat_size = tfkit::tf::GetElementCount(input_shape);
+
+ // Adjust where shape is not set but actual value exist
+ if (input_tensor->float_val().size() > 0 && input_flat_size == -1)
+ {
+ input_flat_size = input_tensor->float_val().size();
+ }
+
+ if (input_tensor->float_val().size() == 0)
+ {
+ // There may be tensor_content and we don't need to do anything as it is
+ // already packed format
+ }
+ else if (input_tensor->float_val().size() == input_flat_size)
+ {
+ // clang-format off
+ // TODO fix indentation
+ input_tensor->clear_tensor_content();
+
+ std::vector<float> tensor_content;
+ for (int i = 0; i < input_flat_size; ++i)
+ {
+ tensor_content.push_back(input_tensor->float_val(i));
+ }
+
+ input_tensor->set_tensor_content(std::string(
+ reinterpret_cast<const char *>(tensor_content.data()), sizeof(float) * input_flat_size));
+
+ input_tensor->clear_float_val();
+ // clang-format on
+ }
+ else
+ {
+ throw std::runtime_error{"Number of elements mismatch in pack<float>."};
+ // TODO: support for these
+ }
+}
+
+template <> void pack<int32_t>(tensorflow::TensorProto *input_tensor)
+{
+ const auto &input_shape = input_tensor->tensor_shape();
+ assert(input_shape.dim_size() <= 6);
+ int input_flat_size = tfkit::tf::GetElementCount(input_shape);
+
+ // Adjust where shape is not set but actual value exist
+ if (input_tensor->int_val().size() > 0 && input_flat_size == -1)
+ {
+ input_flat_size = input_tensor->int_val().size();
+ }
+
+ if (input_tensor->int_val().size() == 0)
+ {
+ // There may be tensor_content and we don't need to do anything as it is
+ // already packed format
+ }
+ else if (input_tensor->int_val().size() == input_flat_size)
+ {
+ input_tensor->clear_tensor_content();
+
+ std::vector<int32_t> tensor_content;
+ for (int i = 0; i < input_flat_size; ++i)
+ {
+ tensor_content.push_back(input_tensor->int_val(i));
+ }
+
+ input_tensor->set_tensor_content(std::string(
+ reinterpret_cast<const char *>(tensor_content.data()), sizeof(int32_t) * input_flat_size));
+
+ input_tensor->clear_int_val();
+ }
+ else
+ {
+ throw std::runtime_error{"Number of elements mismatch in pack<int32_t>."};
+ // TODO: support for these
+ }
+}
+
+void pack(tensorflow::GraphDef &graph_def)
+{
+ auto nodes = graph_def.mutable_node();
+ for (int i = 0; i < nodes->size(); ++i)
+ {
+ tensorflow::NodeDef *n = nodes->Mutable(i);
+ // TODO: handle for other operators
+ if (n->op() == "Const")
+ {
+ const auto dtype = tfkit::tf::GetDataTypeAttr(*n, "dtype");
+ tensorflow::TensorProto *tensor = tfkit::tf::GetTensorAttr(*n, "value");
+
+ switch (dtype)
+ {
+ case tensorflow::DT_FLOAT:
+ pack<float>(tensor);
+ break;
+ case tensorflow::DT_INT32:
+ pack<int32_t>(tensor);
+ break;
+ default:
+ throw std::runtime_error{"Unsupported dtype"};
+ }
+ }
+ }
+}
+
+} // namespace
+
+namespace tfkit
+{
+
+int PackCommand::run(int argc, const char *const *argv) const
+{
+ tensorflow::GraphDef graph_def;
+
+ CmdArguments cmdargs(argc, argv);
+
+ auto ioconfig = make_ioconfig(cmdargs);
+
+ google::protobuf::io::IstreamInputStream is{ioconfig->in()};
+
+ if (!google::protobuf::TextFormat::Parse(&is, &graph_def))
+ {
+ std::cerr << "ERROR: Failed to parse prototxt" << std::endl;
+ return 255;
+ }
+
+ // convert float_val to tensor_content
+ pack(graph_def);
+
+ google::protobuf::io::OstreamOutputStream os{ioconfig->out()};
+ google::protobuf::TextFormat::Print(graph_def, &os);
+
+ return 0;
+}
+
+} // namespace tfkit
diff --git a/compiler/tfkit/src/PackCommand.hpp b/compiler/tfkit/src/PackCommand.hpp
new file mode 100644
index 000000000..0114220d9
--- /dev/null
+++ b/compiler/tfkit/src/PackCommand.hpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PACK_COMMAND_H__
+#define __PACK_COMMAND_H__
+
+#include <cli/Command.h>
+
+namespace tfkit
+{
+
+struct PackCommand final : public cli::Command
+{
+ int run(int argc, const char *const *argv) const override;
+};
+
+} // namespace tfkit
+
+#endif // __PACK_COMMAND_H__
diff --git a/compiler/tfkit/src/Support.cpp b/compiler/tfkit/src/Support.cpp
new file mode 100644
index 000000000..40d8705a7
--- /dev/null
+++ b/compiler/tfkit/src/Support.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Support.hpp"
+
+#include <stdex/Memory.h>
+
+#include <tensorflow/core/framework/graph.pb.h>
+
+#include <cassert>
+#include <fstream>
+#include <stdexcept>
+
+namespace
+{
+
+template <typename T>
+std::unique_ptr<T> open_fstream(const std::string &path, std::ios_base::openmode mode)
+{
+ if (path == "-")
+ {
+ return nullptr;
+ }
+
+ auto stream = stdex::make_unique<T>(path.c_str(), mode);
+ if (!stream->is_open())
+ {
+ throw std::runtime_error{"ERROR: Failed to open " + path};
+ }
+ return stream;
+}
+
+} // namespace
+
+namespace tfkit
+{
+namespace tf
+{
+
+bool HasAttr(const tensorflow::NodeDef &node, const std::string &attr_name)
+{
+ return node.attr().count(attr_name) > 0;
+}
+
+tensorflow::DataType GetDataTypeAttr(const tensorflow::NodeDef &node, const std::string &attr_name)
+{
+ assert(HasAttr(node, attr_name));
+ const auto &attr = node.attr().at(attr_name);
+ assert(attr.value_case() == tensorflow::AttrValue::kType);
+ return attr.type();
+}
+
+tensorflow::TensorProto *GetTensorAttr(tensorflow::NodeDef &node, const std::string &attr_name)
+{
+ assert(HasAttr(node, attr_name));
+ tensorflow::AttrValue &attr = node.mutable_attr()->at(attr_name);
+ assert(attr.value_case() == tensorflow::AttrValue::kTensor);
+ return attr.mutable_tensor();
+}
+
+int GetElementCount(const tensorflow::TensorShapeProto &shape)
+{
+ int count = -1;
+
+ for (auto &d : shape.dim())
+ {
+ if (d.size() == 0)
+ {
+ count = 0;
+ break;
+ }
+ if (count == -1)
+ count = 1;
+
+ count *= d.size();
+ }
+ return count;
+}
+
+} // namespace tf
+
+std::string CmdArguments::get(unsigned int index) const
+{
+ if (index >= _argc)
+ throw std::runtime_error("Argument index out of bound");
+
+ return std::string(_argv[index]);
+}
+
+std::string CmdArguments::get_or(unsigned int index, const std::string &s) const
+{
+ if (index >= _argc)
+ return s;
+
+ return std::string(_argv[index]);
+}
+
+std::unique_ptr<IOConfiguration> make_ioconfig(const CmdArguments &cmdargs)
+{
+ auto iocfg = stdex::make_unique<IOConfiguration>();
+
+ auto in = open_fstream<std::ifstream>(cmdargs.get_or(0, "-"), std::ios::in | std::ios::binary);
+ iocfg->in(std::move(in));
+
+ auto out = open_fstream<std::ofstream>(cmdargs.get_or(1, "-"), std::ios::out | std::ios::binary);
+ iocfg->out(std::move(out));
+
+ return iocfg;
+}
+
+} // namespace tfkit
diff --git a/compiler/tfkit/src/Support.hpp b/compiler/tfkit/src/Support.hpp
new file mode 100644
index 000000000..a5b954d5e
--- /dev/null
+++ b/compiler/tfkit/src/Support.hpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SUPPORT_H__
+#define __SUPPORT_H__
+
+#include <tensorflow/core/framework/graph.pb.h>
+
+#include <iostream>
+#include <string>
+
+namespace tfkit
+{
+namespace tf
+{
+
+bool HasAttr(const tensorflow::NodeDef &, const std::string &);
+tensorflow::DataType GetDataTypeAttr(const tensorflow::NodeDef &, const std::string &);
+tensorflow::TensorProto *GetTensorAttr(tensorflow::NodeDef &, const std::string &);
+/// GetElementCount returns -1 for rank-0 tensor shape
+int GetElementCount(const tensorflow::TensorShapeProto &);
+
+} // namespace tf
+
+class CmdArguments
+{
+public:
+ CmdArguments() = delete;
+ CmdArguments(int argc, const char *const *argv)
+ : _argc(static_cast<unsigned int>(argc)), _argv{argv}
+ {
+ }
+
+ std::string get(unsigned int index) const;
+ std::string get_or(unsigned int index, const std::string &) const;
+
+private:
+ unsigned int _argc;
+ const char *const *_argv;
+};
+
+class IOConfiguration
+{
+public:
+ std::istream *in() const { return _in ? _in.get() : &std::cin; }
+ std::ostream *out() const { return _out ? _out.get() : &std::cout; }
+
+public:
+ void in(std::unique_ptr<std::istream> &&in) { _in = std::move(in); }
+ void out(std::unique_ptr<std::ostream> &&out) { _out = std::move(out); }
+
+private:
+ std::unique_ptr<std::istream> _in;
+ std::unique_ptr<std::ostream> _out;
+};
+
+std::unique_ptr<IOConfiguration> make_ioconfig(const CmdArguments &cmdargs);
+
+} // namespace tfkit
+
+#endif // __SUPPORT_H__
diff --git a/compiler/tfkit/src/UnpackCommand.cpp b/compiler/tfkit/src/UnpackCommand.cpp
new file mode 100644
index 000000000..77ec1edd8
--- /dev/null
+++ b/compiler/tfkit/src/UnpackCommand.cpp
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "UnpackCommand.hpp"
+#include "Support.hpp"
+
+#include <tensorflow/core/framework/graph.pb.h>
+
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+template <typename T> void unpack(tensorflow::TensorProto *);
+
+template <> void unpack<float>(tensorflow::TensorProto *input_tensor)
+{
+ const auto &input_shape = input_tensor->tensor_shape();
+ assert(input_shape.dim_size() <= 6);
+ int input_flat_size = tfkit::tf::GetElementCount(input_shape);
+
+ // Adjust where shape is not set but actual value exist
+ if (input_tensor->tensor_content().size() > 0 && input_flat_size == -1)
+ {
+ input_flat_size = input_tensor->tensor_content().size() / sizeof(float);
+ }
+
+ if (input_tensor->tensor_content().size() == 0)
+ {
+ // Do nothing as there is no tensor content to unpack
+ }
+ else if (input_tensor->tensor_content().size() == input_flat_size * sizeof(float))
+ {
+ // clang-format off
+ // TODO fix indentation
+ input_tensor->clear_float_val();
+
+ const float *tensor_content =
+ reinterpret_cast<const float *>(input_tensor->tensor_content().data());
+ for (int i = 0; i < input_flat_size; i++)
+ {
+ input_tensor->add_float_val(tensor_content[i]);
+ }
+ input_tensor->clear_tensor_content();
+ // clang-format on
+ }
+ else
+ {
+ throw std::runtime_error{"Number of elements mismatch in unpack<float>."};
+ // TODO: support for these
+ }
+}
+
+template <> void unpack<int32_t>(tensorflow::TensorProto *input_tensor)
+{
+ const auto &input_shape = input_tensor->tensor_shape();
+ assert(input_shape.dim_size() <= 6);
+ int input_flat_size = tfkit::tf::GetElementCount(input_shape);
+
+ // Adjust where shape is not set but actual value exist
+ if (input_tensor->tensor_content().size() > 0 && input_flat_size == -1)
+ {
+ input_flat_size = input_tensor->tensor_content().size() / sizeof(int32_t);
+ }
+
+ if (input_tensor->tensor_content().size() == 0)
+ {
+ // Do nothing as there is no tensor content to unpack
+ }
+ else if (input_tensor->tensor_content().size() == input_flat_size * sizeof(int32_t))
+ {
+ input_tensor->clear_int_val();
+
+ const int32_t *tensor_content =
+ reinterpret_cast<const int32_t *>(input_tensor->tensor_content().data());
+ for (int i = 0; i < input_flat_size; i++)
+ {
+ input_tensor->add_int_val(tensor_content[i]);
+ }
+ input_tensor->clear_tensor_content();
+ }
+ else
+ {
+ throw std::runtime_error{"Number of elements mismatch in unpack<int32_t>."};
+ // TODO: support for these
+ }
+}
+
+template <> void unpack<int8_t>(tensorflow::TensorProto *input_tensor)
+{
+ const auto &input_shape = input_tensor->tensor_shape();
+ assert(input_shape.dim_size() <= 6);
+ int input_flat_size = tfkit::tf::GetElementCount(input_shape);
+
+ // Adjust where shape is not set but actual value exist
+ if (input_tensor->tensor_content().size() > 0 && input_flat_size == -1)
+ {
+ input_flat_size = input_tensor->tensor_content().size() / sizeof(int8_t);
+ }
+
+ if (input_tensor->tensor_content().size() == 0)
+ {
+ // Do nothing as there is no tensor content to unpack
+ }
+ else if (input_tensor->tensor_content().size() == input_flat_size * sizeof(int8_t))
+ {
+ input_tensor->clear_int_val();
+
+ const int8_t *tensor_content =
+ reinterpret_cast<const int8_t *>(input_tensor->tensor_content().data());
+ for (int i = 0; i < input_flat_size; i++)
+ {
+ input_tensor->add_int_val(tensor_content[i]);
+ }
+ input_tensor->clear_tensor_content();
+ }
+ else
+ {
+ throw std::runtime_error{"Number of elements mismatch in unpack<int8_t>."};
+ // TODO: support for these
+ }
+}
+
+void unpack(tensorflow::GraphDef &graph_def)
+{
+ auto nodes = graph_def.mutable_node();
+ for (int i = 0; i < nodes->size(); ++i)
+ {
+ tensorflow::NodeDef *n = nodes->Mutable(i);
+ // TODO: handle for other operators
+ if (n->op() == "Const")
+ {
+ const auto dtype = tfkit::tf::GetDataTypeAttr(*n, "dtype");
+ tensorflow::TensorProto *tensor = tfkit::tf::GetTensorAttr(*n, "value");
+
+ switch (dtype)
+ {
+ case tensorflow::DT_FLOAT:
+ unpack<float>(tensor);
+ break;
+ case tensorflow::DT_INT32:
+ unpack<int32_t>(tensor);
+ break;
+ case tensorflow::DT_INT8:
+ unpack<int8_t>(tensor);
+ break;
+ default:
+ throw std::runtime_error{"Unsupported dtype"};
+ }
+ }
+ }
+}
+
+} // namespace
+
+namespace tfkit
+{
+
+int UnpackCommand::run(int argc, const char *const *argv) const
+{
+ tensorflow::GraphDef graph_def;
+
+ CmdArguments cmdargs(argc, argv);
+
+ auto ioconfig = make_ioconfig(cmdargs);
+
+ google::protobuf::io::IstreamInputStream is{ioconfig->in()};
+
+ if (!google::protobuf::TextFormat::Parse(&is, &graph_def))
+ {
+ std::cerr << "ERROR: Failed to parse prototxt" << std::endl;
+ return 255;
+ }
+
+ // convert tensor_content to float_val
+ unpack(graph_def);
+
+ google::protobuf::io::OstreamOutputStream os{ioconfig->out()};
+ google::protobuf::TextFormat::Print(graph_def, &os);
+
+ return 0;
+}
+
+} // namespace tfkit
diff --git a/compiler/tfkit/src/UnpackCommand.hpp b/compiler/tfkit/src/UnpackCommand.hpp
new file mode 100644
index 000000000..e6b31142d
--- /dev/null
+++ b/compiler/tfkit/src/UnpackCommand.hpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UNPACK_COMMAND_H__
+#define __UNPACK_COMMAND_H__
+
+#include <cli/Command.h>
+
+namespace tfkit
+{
+
+struct UnpackCommand final : public cli::Command
+{
+ int run(int argc, const char *const *argv) const override;
+};
+
+} // namespace tfkit
+
+#endif // __UNPACK_COMMAND_H__
diff --git a/compiler/tfl-inspect/CMakeLists.txt b/compiler/tfl-inspect/CMakeLists.txt
new file mode 100644
index 000000000..c2c7dfe9d
--- /dev/null
+++ b/compiler/tfl-inspect/CMakeLists.txt
@@ -0,0 +1,13 @@
+if(NOT TARGET mio_tflite)
+ return()
+endif(NOT TARGET mio_tflite)
+
+set(DRIVER "driver/Driver.cpp")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(tfl-inspect ${DRIVER} ${SOURCES})
+target_include_directories(tfl-inspect PRIVATE src)
+target_link_libraries(tfl-inspect mio_tflite)
+target_link_libraries(tfl-inspect safemain)
+target_link_libraries(tfl-inspect stdex)
diff --git a/compiler/tfl-inspect/README.md b/compiler/tfl-inspect/README.md
new file mode 100644
index 000000000..65dacc8a2
--- /dev/null
+++ b/compiler/tfl-inspect/README.md
@@ -0,0 +1,51 @@
+# tfl-inspect
+
+_tfl-inspect_ allows users to retrieve various information from a TensorFlow Lite model files
+
+## Information to inspect
+
+#### --operators
+
+Operators with `--operators`
+- show operator codes one line at a time in execution order
+
+Example
+```
+$ tfl_inspect --operators model.tflite
+```
+
+Result
+```
+RESHAPE
+DEPTHWISE_CONV_2D
+ADD
+```
+
+To get the count of specific operator, use other tools like sort, uniq, etc.
+
+Example
+```
+$ tfl-inspect --operators inception_v3.tflite | sort | uniq -c
+```
+Result
+```
+ 10 AVERAGE_POOL_2D
+ 15 CONCATENATION
+ 95 CONV_2D
+ 4 MAX_POOL_2D
+ 1 RESHAPE
+ 1 SOFTMAX
+```
+
+#### --conv2d_weight
+
+Conv2D series weight input node type with `--conv2d_weight`
+- shows Conv2D series node weight input node type
+- Conv2D series: CONV2D, DEPTHWISE_CONV_2D
+
+Example result
+```
+CONV2D,CONST
+DEPTHWISE_CONV_2D,RELU
+CONV2D,CONST
+```
diff --git a/compiler/tfl-inspect/driver/Driver.cpp b/compiler/tfl-inspect/driver/Driver.cpp
new file mode 100644
index 000000000..5cad63c4b
--- /dev/null
+++ b/compiler/tfl-inspect/driver/Driver.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Model.h"
+#include "Dump.h"
+
+#include <stdex/Memory.h>
+
+#include <functional>
+#include <iostream>
+#include <map>
+#include <vector>
+#include <string>
+
+using OptionHook = std::function<std::unique_ptr<tflinspect::DumpInterface>(void)>;
+
+int entry(int argc, char **argv)
+{
+ if (argc < 3)
+ {
+ std::cerr << "ERROR: Failed to parse arguments" << std::endl;
+ std::cerr << std::endl;
+ std::cerr << "USAGE: " << argv[0] << " [options] [tflite]" << std::endl;
+ std::cerr << " --operators : dump operators in tflite file" << std::endl;
+ std::cerr << " --conv2d_weight : dump Conv2D series weight operators in tflite file"
+ << std::endl;
+ return 255;
+ }
+
+ // Simple argument parser (based on map)
+ std::map<std::string, OptionHook> argparse;
+
+ argparse["--operators"] = [&](void) {
+ // dump all operators
+ return std::move(stdex::make_unique<tflinspect::DumpOperators>());
+ };
+
+ argparse["--conv2d_weight"] = [&](void) {
+ // dump Conv2D, DepthwiseConv2D weight operators
+ return std::move(stdex::make_unique<tflinspect::DumpConv2DWeight>());
+ };
+
+ std::vector<std::unique_ptr<tflinspect::DumpInterface>> dumps;
+
+ for (int n = 1; n < argc - 1; ++n)
+ {
+ const std::string tag{argv[n]};
+
+ auto it = argparse.find(tag);
+ if (it == argparse.end())
+ {
+ std::cerr << "Option '" << tag << "' is not supported" << std::endl;
+ return 255;
+ }
+ auto dump = it->second();
+ assert(dump != nullptr);
+ dumps.push_back(std::move(dump));
+ }
+
+ std::string model_file = argv[argc - 1];
+
+ // Load TF lite model from a tflite file
+ auto model = tflinspect::load_tflite(model_file);
+ if (model == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load tflite '" << model_file << "'" << std::endl;
+ return 255;
+ }
+
+ const tflite::Model *tflmodel = model->model();
+ if (tflmodel == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load tflite '" << model_file << "'" << std::endl;
+ return 255;
+ }
+
+ for (auto &dump : dumps)
+ {
+ dump->run(std::cout, tflmodel);
+ }
+
+ return 0;
+}
diff --git a/compiler/tfl-inspect/requires.cmake b/compiler/tfl-inspect/requires.cmake
new file mode 100644
index 000000000..2aa101e02
--- /dev/null
+++ b/compiler/tfl-inspect/requires.cmake
@@ -0,0 +1,3 @@
+require("mio-tflite")
+require("safemain")
+require("stdex")
diff --git a/compiler/tfl-inspect/src/Dump.cpp b/compiler/tfl-inspect/src/Dump.cpp
new file mode 100644
index 000000000..8d879a84e
--- /dev/null
+++ b/compiler/tfl-inspect/src/Dump.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dump.h"
+#include "Reader.h"
+
+#include <string>
+#include <ostream>
+#include <stdexcept>
+
+namespace tflinspect
+{
+
+void DumpOperators::run(std::ostream &os, const tflite::Model *model)
+{
+ tflinspect::Reader reader(model);
+
+ assert(reader.num_subgraph() == 1);
+ reader.select_subgraph(0);
+
+ auto ops = reader.operators();
+
+ // dump operators
+ for (uint32_t i = 0; i < ops->Length(); ++i)
+ {
+ const auto op = ops->Get(i);
+
+ auto op_name = reader.opcode_name(op);
+
+ os << op_name << std::endl;
+ }
+}
+
+} // namespace tflinspect
+
+namespace
+{
+
+const tflite::Operator *operator_match_output(tflinspect::Reader &reader, const int32_t tensor)
+{
+ auto ops = reader.operators();
+
+ for (uint32_t i = 0; i < ops->Length(); ++i)
+ {
+ const auto op = ops->Get(i);
+
+ const std::vector<int32_t> &outputs = tflinspect::as_index_vector(op->outputs());
+
+ for (auto output : outputs)
+ {
+ if (output == tensor)
+ return op;
+ }
+ }
+ return nullptr;
+}
+
+size_t tensor_buffer_size(tflinspect::Reader &reader, const int32_t tensor_id)
+{
+ auto tensors = reader.tensors();
+
+ if (tensor_id < 0 || tensor_id >= tensors->Length())
+ {
+ throw std::runtime_error("Invalid Tensor ID");
+ }
+
+ auto tensor = tensors->Get(tensor_id);
+ auto buffer_id = tensor->buffer();
+
+ size_t size = reader.buffer_info(buffer_id, nullptr);
+
+ return size;
+}
+
+} // namespace
+
+namespace tflinspect
+{
+
+void DumpConv2DWeight::run(std::ostream &os, const tflite::Model *model)
+{
+ tflinspect::Reader reader(model);
+
+ assert(reader.num_subgraph() == 1);
+ reader.select_subgraph(0);
+
+ auto ops = reader.operators();
+
+ // dump Conv2D, DepthwiseConv2D and its weight input operator
+ for (uint32_t i = 0; i < ops->Length(); ++i)
+ {
+ const auto op = ops->Get(i);
+ auto bc = reader.builtin_code(op);
+
+ if (bc == tflite::BuiltinOperator_CONV_2D || bc == tflite::BuiltinOperator_DEPTHWISE_CONV_2D)
+ {
+ const std::vector<int32_t> &inputs = tflinspect::as_index_vector(op->inputs());
+ if (inputs.size() < 2)
+ {
+ throw std::runtime_error("Operator has invalid input");
+ }
+ auto weight_input = inputs[1]; // Tensor ID of weight input
+
+ const auto op_weight = operator_match_output(reader, weight_input);
+ const auto buffer_size = tensor_buffer_size(reader, weight_input);
+
+ std::string weight_op_name = "?";
+
+ if (op_weight == nullptr && buffer_size > 0)
+ {
+ weight_op_name = "CONST";
+ }
+ else if (op_weight != nullptr)
+ {
+ weight_op_name = reader.opcode_name(op_weight);
+ }
+
+ auto op_name = reader.opcode_name(op);
+ os << op_name << "," << weight_op_name << std::endl;
+ }
+ }
+}
+
+} // namespace tflinspect
diff --git a/compiler/tfl-inspect/src/Dump.h b/compiler/tfl-inspect/src/Dump.h
new file mode 100644
index 000000000..798c1db0e
--- /dev/null
+++ b/compiler/tfl-inspect/src/Dump.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DUMP_H__
+#define __DUMP_H__
+
+#include <mio/tflite/schema_generated.h>
+
+#include <ostream>
+
+namespace tflinspect
+{
+
+class DumpInterface
+{
+public:
+ virtual ~DumpInterface() = default;
+
+public:
+ virtual void run(std::ostream &os, const tflite::Model *model) = 0;
+};
+
+class DumpOperators final : public DumpInterface
+{
+public:
+ DumpOperators() = default;
+
+public:
+ void run(std::ostream &os, const tflite::Model *model);
+};
+
+class DumpConv2DWeight final : public DumpInterface
+{
+public:
+ DumpConv2DWeight() = default;
+
+public:
+ void run(std::ostream &os, const tflite::Model *model);
+};
+
+} // namespace tflinspect
+
+#endif // __DUMP_H__
diff --git a/compiler/tfl-inspect/src/Model.cpp b/compiler/tfl-inspect/src/Model.cpp
new file mode 100644
index 000000000..8c3bf379a
--- /dev/null
+++ b/compiler/tfl-inspect/src/Model.cpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Model.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+namespace
+{
+
+class MemoryMappedModel final : public tflinspect::Model
+{
+public:
+ /**
+ * @require fd and data SHOULD be valid
+ */
+ explicit MemoryMappedModel(int fd, void *data, size_t size) : _fd{fd}, _data{data}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ ~MemoryMappedModel()
+ {
+ munmap(_data, _size);
+ close(_fd);
+ }
+
+public:
+ MemoryMappedModel(const MemoryMappedModel &) = delete;
+ MemoryMappedModel(MemoryMappedModel &&) = delete;
+
+public:
+ const ::tflite::Model *model(void) const override { return ::tflite::GetModel(_data); }
+
+private:
+ int _fd = -1;
+ void *_data = nullptr;
+ size_t _size = 0;
+};
+
+class FileDescriptor final
+{
+public:
+ FileDescriptor(int value) : _value{value}
+ {
+ // DO NOTHING
+ }
+
+public:
+ // NOTE Copy is not allowed
+ FileDescriptor(const FileDescriptor &) = delete;
+
+public:
+ // NOTE Move is allowed
+ FileDescriptor(FileDescriptor &&fd) { _value = fd.release(); }
+
+public:
+ ~FileDescriptor()
+ {
+ if (_value != -1)
+ {
+ // Close on descturction
+ close(_value);
+ }
+ }
+
+public:
+ int value(void) const { return _value; }
+
+public:
+ int release(void)
+ {
+ auto res = _value;
+ _value = -1;
+ return res;
+ }
+
+private:
+ int _value = -1;
+};
+
+} // namespace
+
+namespace tflinspect
+{
+
+std::unique_ptr<Model> load_tflite(const std::string &path)
+{
+ FileDescriptor fd = open(path.c_str(), O_RDONLY);
+
+ if (fd.value() == -1)
+ {
+ // Return nullptr on open failure
+ return nullptr;
+ }
+
+ struct stat st;
+ if (fstat(fd.value(), &st) == -1)
+ {
+ // Return nullptr on fstat failure
+ return nullptr;
+ }
+
+ auto size = st.st_size;
+ auto data = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd.value(), 0);
+
+ if (data == MAP_FAILED)
+ {
+ // Return nullptr on mmap failure
+ return nullptr;
+ }
+
+ // Check if file is a valid Flatbuffer file
+ const uint8_t *u8data = reinterpret_cast<const uint8_t *>(data);
+ flatbuffers::Verifier verifier{u8data, static_cast<size_t>(size)};
+ if (!tflite::VerifyModelBuffer(verifier))
+ {
+ munmap(data, size);
+ close(fd.release());
+ return nullptr;
+ }
+
+ return std::unique_ptr<tflinspect::Model>{new MemoryMappedModel(fd.release(), data, size)};
+}
+
+} // namespace tflinspect
diff --git a/compiler/tfl-inspect/src/Model.h b/compiler/tfl-inspect/src/Model.h
new file mode 100644
index 000000000..a69fb8be9
--- /dev/null
+++ b/compiler/tfl-inspect/src/Model.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MODEL_H__
+#define __MODEL_H__
+
+#include <mio/tflite/schema_generated.h>
+
+#include <memory>
+
+namespace tflinspect
+{
+
+struct Model
+{
+ virtual ~Model() = default;
+
+ virtual const ::tflite::Model *model(void) const = 0;
+};
+
+/**
+ * @brief Load TensorFlow Lite model (as a raw Model) from a given path
+ *
+ * @note May return a nullptr
+ */
+std::unique_ptr<Model> load_tflite(const std::string &path);
+
+} // namespace tflinspeat
+
+#endif // __MODEL_H__
diff --git a/compiler/tfl-inspect/src/Reader.cpp b/compiler/tfl-inspect/src/Reader.cpp
new file mode 100644
index 000000000..7bd2fe2c6
--- /dev/null
+++ b/compiler/tfl-inspect/src/Reader.cpp
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Reader.h"
+
+#include <sstream>
+#include <string>
+
+namespace tflinspect
+{
+
+bool is_valid(const tflite::OperatorCode *opcode)
+{
+ tflite::BuiltinOperator code = opcode->builtin_code();
+ return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX);
+}
+
+bool is_custom(const tflite::OperatorCode *opcode)
+{
+ tflite::BuiltinOperator code = opcode->builtin_code();
+ return (code == tflite::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const tflite::OperatorCode *opcode)
+{
+ assert(opcode);
+
+ if (!is_valid(opcode))
+ {
+ std::ostringstream oss;
+ oss << "(invalid)";
+ return oss.str();
+ }
+
+ if (is_custom(opcode))
+ {
+ if (!opcode->custom_code())
+ return "(invalid custom)";
+
+ return opcode->custom_code()->c_str();
+ }
+
+ tflite::BuiltinOperator code = opcode->builtin_code();
+ return tflite::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const tflite::Tensor *tensor)
+{
+ return tflite::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const tflite::Tensor *tensor)
+{
+ static const char *kEmptyTensorName = "(noname)";
+
+ auto name = tensor->name();
+ if (name)
+ return name->c_str();
+
+ return kEmptyTensorName;
+}
+
+Reader::Reader(const tflite::Model *model)
+{
+ _subgraphs = model->subgraphs();
+ _buffers = model->buffers();
+
+ auto opcodes = model->operator_codes();
+ for (const ::tflite::OperatorCode *opcode : *opcodes)
+ {
+ _op_codes.push_back(opcode);
+ }
+}
+
+size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data)
+{
+ if (buff_data != nullptr)
+ {
+ *buff_data = nullptr;
+ }
+
+ if (buf_idx == 0)
+ return 0;
+
+ if (auto *buffer = (*_buffers)[buf_idx])
+ {
+ if (auto *array = buffer->data())
+ {
+ if (size_t size = array->size())
+ {
+ if (buff_data != nullptr)
+ {
+ *buff_data = reinterpret_cast<const uint8_t *>(array->data());
+ }
+ return size;
+ }
+ }
+ }
+
+ return 0;
+}
+
+tflite::BuiltinOperator Reader::builtin_code(const tflite::Operator *op) const
+{
+ uint32_t index = op->opcode_index();
+ assert(index < _op_codes.size());
+ const tflite::OperatorCode *opcode = _op_codes.at(index);
+
+ return opcode->builtin_code();
+}
+
+std::string Reader::opcode_name(const tflite::Operator *op) const
+{
+ uint32_t index = op->opcode_index();
+ assert(index < _op_codes.size());
+ const tflite::OperatorCode *opcode = _op_codes.at(index);
+
+ if (!is_valid(opcode))
+ {
+ std::ostringstream oss;
+ oss << "(invalid: " << index << ")";
+ return oss.str();
+ }
+
+ return tflinspect::opcode_name(opcode);
+}
+
+bool Reader::select_subgraph(uint32_t sgindex)
+{
+ _tensors = nullptr;
+ _operators = nullptr;
+
+ _inputs.clear();
+ _outputs.clear();
+
+ if (_subgraphs->Length() <= sgindex)
+ {
+ assert(false);
+ return false;
+ }
+
+ const tflite::SubGraph *subgraph = (*_subgraphs)[sgindex];
+
+ _tensors = subgraph->tensors();
+ _operators = subgraph->operators();
+
+ _inputs = as_index_vector(subgraph->inputs());
+ _outputs = as_index_vector(subgraph->outputs());
+
+ return true;
+}
+
+} // namespace tflinspect
diff --git a/compiler/tfl-inspect/src/Reader.h b/compiler/tfl-inspect/src/Reader.h
new file mode 100644
index 000000000..e9e182a4b
--- /dev/null
+++ b/compiler/tfl-inspect/src/Reader.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __READER_H__
+#define __READER_H__
+
+#include <mio/tflite/schema_generated.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+namespace tflinspect
+{
+
+template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
+{
+ std::vector<T> ret(flat_array->Length());
+ for (uint32_t i = 0; i < flat_array->Length(); i++)
+ {
+ ret[i] = flat_array->Get(i);
+ }
+ return ret;
+}
+
+bool is_valid(const tflite::OperatorCode *opcode);
+bool is_custom(const tflite::OperatorCode *opcode);
+std::string opcode_name(const tflite::OperatorCode *opcode);
+const char *tensor_type(const tflite::Tensor *tensor);
+const char *tensor_name(const tflite::Tensor *tensor);
+
+/**
+ * @brief Loads TF lite file and provides helpers to access attributes
+ */
+class Reader
+{
+private:
+ using TFliteSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<tflite::SubGraph>>;
+ using TFliteBuffers_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>>;
+ using TFliteTensors_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>>;
+ using TFliteOperators_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>>;
+
+public:
+ Reader(const tflite::Model *model);
+
+ Reader() = delete;
+
+public:
+ const std::vector<const tflite::OperatorCode *> &opcodes() { return _op_codes; }
+ const TFliteBuffers_t *buffers() { return _buffers; }
+ const TFliteTensors_t *tensors() { return _tensors; }
+ const TFliteOperators_t *operators() { return _operators; }
+ const std::vector<int32_t> &inputs() const { return _inputs; }
+ const std::vector<int32_t> &outputs() const { return _outputs; }
+
+ uint32_t num_subgraph() const { return _subgraphs->Length(); }
+
+ size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
+ tflite::BuiltinOperator builtin_code(const tflite::Operator *op) const;
+ std::string opcode_name(const tflite::Operator *op) const;
+
+public:
+ bool select_subgraph(uint32_t subgraph);
+
+private:
+ const TFliteSubGraphs_t *_subgraphs{nullptr};
+ const TFliteBuffers_t *_buffers{nullptr};
+ const TFliteTensors_t *_tensors{nullptr};
+ const TFliteOperators_t *_operators{nullptr};
+
+ std::vector<const tflite::OperatorCode *> _op_codes;
+ std::vector<int32_t> _inputs;
+ std::vector<int32_t> _outputs;
+};
+
+} // namespace tflinspect
+
+#endif // __READER_H__
diff --git a/compiler/tfl-verify/CMakeLists.txt b/compiler/tfl-verify/CMakeLists.txt
new file mode 100644
index 000000000..a368b2930
--- /dev/null
+++ b/compiler/tfl-verify/CMakeLists.txt
@@ -0,0 +1,12 @@
+if(NOT TARGET mio_tflite)
+ return()
+endif(NOT TARGET mio_tflite)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(tfl-verify ${SOURCES})
+target_include_directories(tfl-verify PRIVATE src)
+target_link_libraries(tfl-verify mio_tflite)
+target_link_libraries(tfl-verify safemain)
+target_link_libraries(tfl-verify cwrap)
+target_link_libraries(tfl-verify stdex)
diff --git a/compiler/tfl-verify/README.md b/compiler/tfl-verify/README.md
new file mode 100644
index 000000000..c50016873
--- /dev/null
+++ b/compiler/tfl-verify/README.md
@@ -0,0 +1,23 @@
+# tfl-verify
+
+_tfl-verify_ allows users to verify TF Lite models.
+
+## Usage
+
+Provide _tflite_ file as a parameter to verify validity.
+
+```
+$ tfl-verify tflitefile.tflite
+```
+
+Result for valid file
+```
+[ RUN ] Check tflitefile.tflite
+[ PASS ] Check tflitefile.tflite
+```
+
+Result for invalid file
+```
+[ RUN ] Check tflitefile.tflite
+[ FAIL ] Check tflitefile.tflite
+```
diff --git a/compiler/tfl-verify/requires.cmake b/compiler/tfl-verify/requires.cmake
new file mode 100644
index 000000000..e479a8329
--- /dev/null
+++ b/compiler/tfl-verify/requires.cmake
@@ -0,0 +1,4 @@
+require("mio-tflite")
+require("safemain")
+require("cwrap")
+require("stdex")
diff --git a/compiler/tfl-verify/src/Driver.cpp b/compiler/tfl-verify/src/Driver.cpp
new file mode 100644
index 000000000..367c731a6
--- /dev/null
+++ b/compiler/tfl-verify/src/Driver.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "VerifyFlatBuffers.h"
+
+#include <stdex/Memory.h>
+
+#include <iostream>
+#include <string>
+
+int entry(int argc, char **argv)
+{
+ if (argc != 2)
+ {
+ std::cerr << "ERROR: Failed to parse arguments" << std::endl;
+ std::cerr << std::endl;
+ std::cerr << "USAGE: " << argv[0] << " [tflite]" << std::endl;
+ return 255;
+ }
+ auto verifier = stdex::make_unique<VerifyFlatbuffers>();
+
+ std::string model_file = argv[argc - 1];
+
+ std::cout << "[ RUN ] Check " << model_file << std::endl;
+
+ auto result = verifier->run(model_file);
+
+ if (result == 0)
+ {
+ std::cout << "[ PASS ] Check " << model_file << std::endl;
+ }
+ else
+ {
+ std::cout << "[ FAIL ] Check " << model_file << std::endl;
+ }
+
+ return result;
+}
diff --git a/compiler/tfl-verify/src/Model.cpp b/compiler/tfl-verify/src/Model.cpp
new file mode 100644
index 000000000..efac1210d
--- /dev/null
+++ b/compiler/tfl-verify/src/Model.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Model.h"
+
+#include <cwrap/Fildes.h>
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+namespace
+{
+
+class MemoryMappedModel final : public ModelData
+{
+public:
+ /**
+ * @require fd and data SHOULD be valid
+ */
+ explicit MemoryMappedModel(int fd, void *data, size_t size) : _fd{fd}, _data{data}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ ~MemoryMappedModel()
+ {
+ munmap(_data, _size);
+ close(_fd);
+ }
+
+public:
+ MemoryMappedModel(const MemoryMappedModel &) = delete;
+ MemoryMappedModel(MemoryMappedModel &&) = delete;
+
+public:
+ const void *data(void) const override { return _data; };
+ const size_t size(void) const override { return _size; };
+
+private:
+ int _fd = -1;
+ void *_data = nullptr;
+ size_t _size = 0;
+};
+
+} // namespace
+
+std::unique_ptr<ModelData> load_modeldata(const std::string &path)
+{
+ cwrap::Fildes fd(open(path.c_str(), O_RDONLY));
+
+ if (fd.get() == -1)
+ {
+ // Return nullptr on open failure
+ return nullptr;
+ }
+
+ struct stat st;
+ if (fstat(fd.get(), &st) == -1)
+ {
+ // Return nullptr on fstat failure
+ return nullptr;
+ }
+
+ auto size = st.st_size;
+ auto data = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd.get(), 0);
+
+ if (data == MAP_FAILED)
+ {
+ // Return nullptr on mmap failure
+ return nullptr;
+ }
+
+ return std::unique_ptr<ModelData>{new MemoryMappedModel(fd.release(), data, size)};
+}
diff --git a/compiler/tfl-verify/src/Model.h b/compiler/tfl-verify/src/Model.h
new file mode 100644
index 000000000..44f40e24c
--- /dev/null
+++ b/compiler/tfl-verify/src/Model.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MODEL_H__
+#define __MODEL_H__
+
+#include <memory>
+#include <string>
+
+struct ModelData
+{
+ virtual ~ModelData() = default;
+
+ virtual const void *data(void) const = 0;
+ virtual const size_t size(void) const = 0;
+};
+
+/**
+ * @brief Load TF Lite model (as a raw data) from a given path
+ *
+ * @note May return a nullptr
+ */
+std::unique_ptr<ModelData> load_modeldata(const std::string &path);
+
+#endif // __MODEL_H__
diff --git a/compiler/tfl-verify/src/VerifyFlatBuffers.cpp b/compiler/tfl-verify/src/VerifyFlatBuffers.cpp
new file mode 100644
index 000000000..253fcb210
--- /dev/null
+++ b/compiler/tfl-verify/src/VerifyFlatBuffers.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "VerifyFlatBuffers.h"
+
+#include "Model.h"
+
+#include <mio/tflite/schema_generated.h>
+
+int VerifyFlatbuffers::run(const std::string &model_file)
+{
+ auto modeldata = load_modeldata(model_file);
+
+ const uint8_t *data = reinterpret_cast<const uint8_t *>(modeldata->data());
+ flatbuffers::Verifier verifier{data, modeldata->size()};
+
+ if (!tflite::VerifyModelBuffer(verifier))
+ {
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/compiler/tfl-verify/src/VerifyFlatBuffers.h b/compiler/tfl-verify/src/VerifyFlatBuffers.h
new file mode 100644
index 000000000..c301b5b10
--- /dev/null
+++ b/compiler/tfl-verify/src/VerifyFlatBuffers.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __VERIFY_FLATBUFFERS_H__
+#define __VERIFY_FLATBUFFERS_H__
+
+#include <ostream>
+#include <string>
+
+class VerifyFlatbuffers
+{
+public:
+ VerifyFlatbuffers() = default;
+
+public:
+ int run(const std::string &model_file);
+};
+
+#endif // __VERIFY_FLATBUFFERS_H__
diff --git a/compiler/tflchef/CMakeLists.txt b/compiler/tflchef/CMakeLists.txt
new file mode 100644
index 000000000..71c5e2ab1
--- /dev/null
+++ b/compiler/tflchef/CMakeLists.txt
@@ -0,0 +1,19 @@
+nnas_find_package(Protobuf QUIET)
+
+if(NOT Protobuf_FOUND)
+ return()
+endif(NOT Protobuf_FOUND)
+
+if(NOT TARGET mio_tflite)
+ return()
+endif(NOT TARGET mio_tflite)
+
+# Recipe Parser
+add_subdirectory(proto)
+# Core Library
+add_subdirectory(core)
+# TFlite Library
+add_subdirectory(tflite)
+# Tools
+add_subdirectory(tools)
+add_subdirectory(tests)
diff --git a/compiler/tflchef/README.md b/compiler/tflchef/README.md
new file mode 100644
index 000000000..c940f2203
--- /dev/null
+++ b/compiler/tflchef/README.md
@@ -0,0 +1,76 @@
+# tflchef
+
+## What is tflchef?
+
+Do you need a tensorflow lite model for testing? Ask it to _tflchef_.
+Given a recipe, _tflchef_ will cook a tensorflow lite model for you.
+
+**NOTE** A model that _tflchef_ generates is compatible with TensorFlow Lite in TensorFlow v1.12.0 release
+
+## Tutorial: How to use?
+
+This example explains how to generate a tensorflow lite model with a single Conv2D operation
+with a kernel filled with random values generated according to normal (or gaussian) distribution (mean = 0.0f / stddev = 1.0f) and bias with constant values (1.1f) with _tflchef_.
+
+The first step is to write a recipe!
+Type the following command, and then you may get ``sample.recipe``:
+```
+$ cat > sample.recipe <<END
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "constant"
+ arg: "1.1"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+input: "ker"
+output: "ofm"
+END
+```
+
+Generate ``sample.tflite`` from ``sample.recipe`` with one of the following commands:
+- With redirection
+```
+$ cat sample.recipe | tflchef > sample.tflite
+```
+- Without redirection
+```
+$ tflchef-file sample.recipe sample.tflite
+```
+
+Done :)
diff --git a/compiler/tflchef/core/CMakeLists.txt b/compiler/tflchef/core/CMakeLists.txt
new file mode 100644
index 000000000..6a6282027
--- /dev/null
+++ b/compiler/tflchef/core/CMakeLists.txt
@@ -0,0 +1,7 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_library(tflchef_core STATIC ${SOURCES})
+target_include_directories(tflchef_core PUBLIC include)
+target_include_directories(tflchef_core PRIVATE src)
+target_link_libraries(tflchef_core tflchef_proto)
+target_link_libraries(tflchef_core mio_tflite)
diff --git a/compiler/tflchef/core/include/tflchef/ModelChef.h b/compiler/tflchef/core/include/tflchef/ModelChef.h
new file mode 100644
index 000000000..4e22f7555
--- /dev/null
+++ b/compiler/tflchef/core/include/tflchef/ModelChef.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MODEL_CHEF_H__
+#define __MODEL_CHEF_H__
+
+#include <tflchef.pb.h>
+
+#include <memory>
+
+namespace tflchef
+{
+
+class GeneratedModel final
+{
+public:
+ struct Impl
+ {
+ virtual ~Impl() = default;
+
+ virtual const char *base(void) const = 0;
+ virtual size_t size(void) const = 0;
+ };
+
+public:
+ GeneratedModel(std::unique_ptr<Impl> &&impl) : _impl{std::move(impl)}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const char *base(void) const { return _impl->base(); }
+ size_t size(void) const { return _impl->size(); }
+
+private:
+ std::unique_ptr<Impl> _impl;
+};
+
+GeneratedModel cook(const ModelRecipe &model_recipe);
+
+} // namespace tflchef
+
+#endif // __MODEL_CHEF_H__
diff --git a/compiler/tflchef/core/src/Arguments.h b/compiler/tflchef/core/src/Arguments.h
new file mode 100644
index 000000000..341aea6c9
--- /dev/null
+++ b/compiler/tflchef/core/src/Arguments.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ARGUMENTS_H__
+#define __ARGUMENTS_H__
+
+#include <cstdint>
+#include <string>
+
+/**
+ * @brief Read-only string sequence view
+ */
+struct Arguments
+{
+ virtual ~Arguments() = default;
+
+ virtual uint32_t count(void) const = 0;
+ virtual const std::string &value(uint32_t n) const = 0;
+};
+
+#endif // __ARGUMENTS_H__
diff --git a/compiler/tflchef/core/src/Convert.cpp b/compiler/tflchef/core/src/Convert.cpp
new file mode 100644
index 000000000..86a31d9b7
--- /dev/null
+++ b/compiler/tflchef/core/src/Convert.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Convert.h"
+
+#include <stdexcept>
+
+tflite::Padding as_tflite_padding(const tflchef::Padding &value)
+{
+ switch (value)
+ {
+ case tflchef::SAME:
+ return tflite::Padding_SAME;
+ case tflchef::VALID:
+ return tflite::Padding_VALID;
+ default:
+ break;
+ }
+
+ throw std::runtime_error{"Unknown padding value"};
+}
+
+tflite::ActivationFunctionType as_tflite_activation(const tflchef::Activation &value)
+{
+ switch (value)
+ {
+ case tflchef::NONE:
+ return tflite::ActivationFunctionType_NONE;
+ case tflchef::RELU:
+ return tflite::ActivationFunctionType_RELU;
+ case tflchef::RELU6:
+ return tflite::ActivationFunctionType_RELU6;
+ default:
+ break;
+ }
+
+ throw std::runtime_error{"Unknown activation"};
+}
+
+tflite::TensorType as_tflite_tensortype(const tflchef::TensorType &value)
+{
+ switch (value)
+ {
+ case tflchef::FLOAT32:
+ return tflite::TensorType_FLOAT32;
+ case tflchef::INT32:
+ return tflite::TensorType_INT32;
+ case tflchef::UINT8:
+ return tflite::TensorType_UINT8;
+ case tflchef::INT64:
+ return tflite::TensorType_INT64;
+ case tflchef::BOOL:
+ return tflite::TensorType_BOOL;
+ default:
+ break;
+ }
+
+ throw std::runtime_error{"Unknown tensor type"};
+}
diff --git a/compiler/tflchef/core/src/Convert.h b/compiler/tflchef/core/src/Convert.h
new file mode 100644
index 000000000..ed15a5572
--- /dev/null
+++ b/compiler/tflchef/core/src/Convert.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Convert.h
+ * @brief This header declares various as_tflite_TYPE functions
+ */
+#ifndef __CONVERT_H__
+#define __CONVERT_H__
+
+#include <tflchef.pb.h>
+#include <mio/tflite/schema_generated.h>
+
+tflite::Padding as_tflite_padding(const tflchef::Padding &value);
+tflite::ActivationFunctionType as_tflite_activation(const tflchef::Activation &value);
+tflite::TensorType as_tflite_tensortype(const tflchef::TensorType &value);
+
+#endif // __CONVERT_H__
diff --git a/compiler/tflchef/core/src/Data/Constant.h b/compiler/tflchef/core/src/Data/Constant.h
new file mode 100644
index 000000000..ebe1f3d93
--- /dev/null
+++ b/compiler/tflchef/core/src/Data/Constant.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONSTANT_FILLER_H__
+#define __CONSTANT_FILLER_H__
+
+#include "DataChef.h"
+#include "LexicalCast.h"
+
+template <typename T> class ConstantDataChef final : public DataChef
+{
+public:
+ ConstantDataChef(const T &value) : _value{value}
+ {
+ // DO NOTHING
+ }
+
+public:
+ std::vector<uint8_t> generate(int32_t count) const override
+ {
+ std::vector<uint8_t> res;
+
+ for (uint32_t n = 0; n < count; ++n)
+ {
+ const uint8_t *arr = reinterpret_cast<const uint8_t *>(&_value);
+
+ for (uint32_t b = 0; b < sizeof(T); ++b)
+ {
+ res.emplace_back(arr[b]);
+ }
+ }
+
+ return res;
+ }
+
+private:
+ T _value;
+};
+
+template <typename T> struct ConstantDataChefFactory : public DataChefFactory
+{
+ std::unique_ptr<DataChef> create(const Arguments &args) const
+ {
+ auto const value = to_number<T>(args.value(0));
+ return std::unique_ptr<DataChef>{new ConstantDataChef<T>{value}};
+ }
+};
+
+#endif // __CONSTANT_FILLER_H__
diff --git a/compiler/tflchef/core/src/Data/Explicit.h b/compiler/tflchef/core/src/Data/Explicit.h
new file mode 100644
index 000000000..088e791b9
--- /dev/null
+++ b/compiler/tflchef/core/src/Data/Explicit.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __EXPLICIT_FILLER_H__
+#define __EXPLICIT_FILLER_H__
+
+#include "DataChef.h"
+#include "LexicalCast.h"
+
+#include <vector>
+
+template <typename T> class ExplicitDataChef final : public DataChef
+{
+public:
+ ExplicitDataChef()
+ {
+ // DO NOTHING
+ }
+
+public:
+ std::vector<uint8_t> generate(int32_t count) const override
+ {
+ std::vector<uint8_t> res;
+
+ for (uint32_t n = 0; n < count; ++n)
+ {
+ T const value = (n < _values.size()) ? _values.at(n) : T{};
+ const uint8_t *arr = reinterpret_cast<const uint8_t *>(&value);
+
+ for (uint32_t b = 0; b < sizeof(T); ++b)
+ {
+ res.emplace_back(arr[b]);
+ }
+ }
+
+ return res;
+ }
+
+public:
+ void insert(const T &value) { _values.emplace_back(value); }
+
+private:
+ std::vector<T> _values;
+};
+
+template <typename T> struct ExplicitDataChefFactory : public DataChefFactory
+{
+ std::unique_ptr<DataChef> create(const Arguments &args) const
+ {
+ std::unique_ptr<ExplicitDataChef<T>> res{new ExplicitDataChef<T>};
+
+ for (uint32_t n = 0; n < args.count(); ++n)
+ {
+ auto const value = to_number<T>(args.value(n));
+ res->insert(value);
+ }
+
+ return std::move(res);
+ }
+};
+
+#endif // __EXPLICIT_FILLER_H__
diff --git a/compiler/tflchef/core/src/Data/Gaussian.cpp b/compiler/tflchef/core/src/Data/Gaussian.cpp
new file mode 100644
index 000000000..c515d1104
--- /dev/null
+++ b/compiler/tflchef/core/src/Data/Gaussian.cpp
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Gaussian.h"
+#include "LexicalCast.h"
+
+#include <random>
+#include <chrono>
+
+#include <cassert>
+#include <stdexcept>
+
+std::vector<uint8_t> GaussianFloat32DataChef::generate(int32_t count) const
+{
+ // TODO Support seed value override
+ int seed = std::chrono::system_clock::now().time_since_epoch().count();
+
+ std::minstd_rand rand{seed};
+ std::normal_distribution<float> dist{_mean, _stddev};
+
+ std::vector<uint8_t> res;
+
+ for (uint32_t n = 0; n < count; ++n)
+ {
+ auto const value = dist(rand);
+ auto const arr = reinterpret_cast<const uint8_t *>(&value);
+
+ for (uint32_t b = 0; b < sizeof(float); ++b)
+ {
+ res.emplace_back(arr[b]);
+ }
+ }
+
+ return res;
+}
+
+std::vector<uint8_t> GaussianInt32DataChef::generate(int32_t count) const
+{
+ // TODO Support seed value override
+ int seed = std::chrono::system_clock::now().time_since_epoch().count();
+
+ std::minstd_rand rand{seed};
+ std::normal_distribution<float> dist{_mean, _stddev};
+
+ std::vector<uint8_t> res;
+
+ for (uint32_t n = 0; n < count; ++n)
+ {
+ auto const value = static_cast<int32_t>(dist(rand));
+ auto const arr = reinterpret_cast<const uint8_t *>(&value);
+
+ for (uint32_t b = 0; b < sizeof(int32_t); ++b)
+ {
+ res.emplace_back(arr[b]);
+ }
+ }
+
+ return res;
+}
+
+std::vector<uint8_t> GaussianUint8DataChef::generate(int32_t count) const
+{
+ // TODO Support seed value override
+ int seed = std::chrono::system_clock::now().time_since_epoch().count();
+
+ std::minstd_rand rand{seed};
+ std::normal_distribution<float> dist{_mean, _stddev};
+
+ std::vector<uint8_t> res;
+
+ for (uint32_t n = 0; n < count; ++n)
+ {
+ auto const value = static_cast<uint8_t>(dist(rand)); // uint8_t for data type
+ auto const arr = reinterpret_cast<const uint8_t *>(&value); // uint8_t for byte streaming
+
+ for (uint32_t b = 0; b < sizeof(uint8_t); ++b)
+ {
+ res.emplace_back(arr[b]);
+ }
+ }
+
+ return res;
+}
+
+std::unique_ptr<DataChef> GaussianFloat32DataChefFactory::create(const Arguments &args) const
+{
+ if (args.count() != 2)
+ {
+ throw std::runtime_error{"invalid argument count: two arguments (mean/stddev) are expected"};
+ }
+
+ auto const mean = to_number<float>(args.value(0));
+ auto const stddev = to_number<float>(args.value(1));
+
+ return std::unique_ptr<DataChef>{new GaussianFloat32DataChef{mean, stddev}};
+}
+
+std::unique_ptr<DataChef> GaussianInt32DataChefFactory::create(const Arguments &args) const
+{
+ if (args.count() != 2)
+ {
+ throw std::runtime_error{"invalid argument count: two arguments (mean/stddev) are expected"};
+ }
+
+ auto const mean = to_number<float>(args.value(0));
+ auto const stddev = to_number<float>(args.value(1));
+
+ return std::unique_ptr<DataChef>{new GaussianInt32DataChef{mean, stddev}};
+}
+
+std::unique_ptr<DataChef> GaussianUint8DataChefFactory::create(const Arguments &args) const
+{
+ if (args.count() != 2)
+ {
+ throw std::runtime_error{"invalid argument count: two arguments (mean/stddev) are expected"};
+ }
+
+ auto const mean = to_number<float>(args.value(0));
+ auto const stddev = to_number<float>(args.value(1));
+
+ return std::unique_ptr<DataChef>{new GaussianUint8DataChef{mean, stddev}};
+}
diff --git a/compiler/tflchef/core/src/Data/Gaussian.h b/compiler/tflchef/core/src/Data/Gaussian.h
new file mode 100644
index 000000000..81a28d2d1
--- /dev/null
+++ b/compiler/tflchef/core/src/Data/Gaussian.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __GAUSSIAN_FILLER_H__
+#define __GAUSSIAN_FILLER_H__
+
+#include "DataChef.h"
+
+/**
+ * @brief Generate a sequence of random values according to the gaussian(=normal) distribution
+ */
+class GaussianFloat32DataChef final : public DataChef
+{
+public:
+ GaussianFloat32DataChef(float mean, float stddev) : _mean{mean}, _stddev{stddev}
+ {
+ // DO NOTHING
+ }
+
+public:
+ std::vector<uint8_t> generate(int32_t count) const override;
+
+private:
+ float _mean;
+ float _stddev;
+};
+
+class GaussianInt32DataChef final : public DataChef
+{
+public:
+ GaussianInt32DataChef(float mean, float stddev) : _mean{mean}, _stddev{stddev}
+ {
+ // DO NOTHING
+ }
+
+public:
+ std::vector<uint8_t> generate(int32_t count) const override;
+
+private:
+ float _mean;
+ float _stddev;
+};
+
+class GaussianUint8DataChef final : public DataChef
+{
+public:
+ GaussianUint8DataChef(float mean, float stddev) : _mean{mean}, _stddev{stddev}
+ {
+ // DO NOTHING
+ }
+
+public:
+ std::vector<uint8_t> generate(int32_t count) const override;
+
+private:
+ float _mean;
+ float _stddev;
+};
+
+struct GaussianFloat32DataChefFactory : public DataChefFactory
+{
+ std::unique_ptr<DataChef> create(const Arguments &args) const;
+};
+
+struct GaussianInt32DataChefFactory : public DataChefFactory
+{
+ std::unique_ptr<DataChef> create(const Arguments &args) const;
+};
+
+struct GaussianUint8DataChefFactory : public DataChefFactory
+{
+ std::unique_ptr<DataChef> create(const Arguments &args) const;
+};
+
+#endif // __GAUSSIAN_FILLER_H__
diff --git a/compiler/tflchef/core/src/DataChef.def b/compiler/tflchef/core/src/DataChef.def
new file mode 100644
index 000000000..89d34a202
--- /dev/null
+++ b/compiler/tflchef/core/src/DataChef.def
@@ -0,0 +1,15 @@
+#ifndef DATA_CHEF
+#error "Define DATA_CHEF first"
+#endif // DATA_CHEF
+
+// DATA_CHEF(TYPE, NAME, FACTORY_CLASS)
+// "TYPE" SHOULD BE an enum tag of tflchef::TensorType
+DATA_CHEF(FLOAT32, constant, ConstantDataChefFactory<float>)
+DATA_CHEF(BOOL, constant, ConstantDataChefFactory<bool>)
+DATA_CHEF(INT32, explicit, ExplicitDataChefFactory<int>)
+DATA_CHEF(UINT8, explicit, ExplicitDataChefFactory<uint8_t>)
+DATA_CHEF(BOOL, explicit, ExplicitDataChefFactory<bool>)
+DATA_CHEF(FLOAT32, explicit, ExplicitDataChefFactory<float>)
+DATA_CHEF(FLOAT32, gaussian, GaussianFloat32DataChefFactory)
+DATA_CHEF(INT32, gaussian, GaussianInt32DataChefFactory)
+DATA_CHEF(UINT8, gaussian, GaussianUint8DataChefFactory)
diff --git a/compiler/tflchef/core/src/DataChef.h b/compiler/tflchef/core/src/DataChef.h
new file mode 100644
index 000000000..d0571028a
--- /dev/null
+++ b/compiler/tflchef/core/src/DataChef.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DATA_CHEF_H__
+#define __DATA_CHEF_H__
+
+#include "Arguments.h"
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+using Data = std::vector<uint8_t>;
+
+/**
+ * @brief Data Generator
+ */
+struct DataChef
+{
+ virtual ~DataChef() = default;
+
+ // TODO Allow users to query the type of elements that this DataChef generates
+
+ /**
+ * @brief Generate a sequence of 'count' elements as a byte sequence
+ *
+ * Let D be the return value of generate(N).
+ * Then, D.size() == N * sizeof(T) where T is the element type.
+ */
+ virtual Data generate(int32_t count) const = 0;
+};
+
+/**
+ * @brief Data Generator Factory
+ */
+struct DataChefFactory
+{
+ virtual ~DataChefFactory() = default;
+
+ virtual std::unique_ptr<DataChef> create(const Arguments &args) const = 0;
+};
+
+#endif // __DATA_CHEF_H__
diff --git a/compiler/tflchef/core/src/DataChefs.h b/compiler/tflchef/core/src/DataChefs.h
new file mode 100644
index 000000000..2310ae89d
--- /dev/null
+++ b/compiler/tflchef/core/src/DataChefs.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DATA_CHEFS_H__
+#define __DATA_CHEFS_H__
+
+#include "Data/Constant.h"
+#include "Data/Explicit.h"
+#include "Data/Gaussian.h"
+
+#endif // __DATA_CHEFS_H__
diff --git a/compiler/tflchef/core/src/Dataset.h b/compiler/tflchef/core/src/Dataset.h
new file mode 100644
index 000000000..9d5c7a43f
--- /dev/null
+++ b/compiler/tflchef/core/src/Dataset.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DATASET_H__
+#define __DATASET_H__
+
+#include <vector>
+
+template <typename T> class Dataset
+{
+public:
+ Dataset(const std::vector<T> &vec) : _vec{vec}
+ {
+ // DO NOTHING
+ }
+
+public:
+ Dataset(std::vector<T> &&vec) : _vec{std::move(vec)}
+ {
+ // DO NOTHING
+ }
+
+public:
+ template <typename Func> auto map(Func f) const -> Dataset<decltype(f(std::declval<T>()))>
+ {
+ using U = decltype(f(std::declval<T>()));
+ std::vector<U> res;
+
+ for (const auto &elem : _vec)
+ {
+ res.emplace_back(f(elem));
+ }
+
+ return Dataset<U>(std::move(res));
+ }
+
+public:
+ const std::vector<T> &vectorize(void) const { return _vec; }
+
+private:
+ std::vector<T> _vec;
+};
+
+#endif // __DATASET_H__
diff --git a/compiler/tflchef/core/src/LexicalCast.cpp b/compiler/tflchef/core/src/LexicalCast.cpp
new file mode 100644
index 000000000..38a5f9290
--- /dev/null
+++ b/compiler/tflchef/core/src/LexicalCast.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LexicalCast.h"
+
+#include <cassert>
+#include <limits>
+
+template <> float to_number(const std::string &s) { return std::stof(s); }
+template <> int to_number(const std::string &s) { return std::stoi(s); }
+template <> uint8_t to_number(const std::string &s)
+{
+ int temp = std::stoi(s);
+ assert(temp >= 0);
+ assert(temp <= std::numeric_limits<uint8_t>::max());
+ return static_cast<uint8_t>(temp);
+}
+template <> bool to_number(const std::string &s)
+{
+ if (std::stoi(s) || s == "T" || s == "t" || s == "TRUE" || s == "true")
+ return true;
+ return false;
+}
diff --git a/compiler/tflchef/core/src/LexicalCast.h b/compiler/tflchef/core/src/LexicalCast.h
new file mode 100644
index 000000000..4aeccb482
--- /dev/null
+++ b/compiler/tflchef/core/src/LexicalCast.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @brief This file provides string <-> number cast helpers
+ */
+#ifndef __LEXICAL_CAST_H__
+#define __LEXICAL_CAST_H__
+
+#include <string>
+
+/**
+ * @brief Return a numeric value that corresponds to a given string
+ *
+ * @note This function will throw an exception on casting failure
+ */
+template <typename Number> Number to_number(const std::string &s);
+
+#endif // __LEXICAL_CAST_H__
diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp
new file mode 100644
index 000000000..2c69efd4b
--- /dev/null
+++ b/compiler/tflchef/core/src/ModelChef.cpp
@@ -0,0 +1,765 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflchef/ModelChef.h"
+#include "Arguments.h"
+
+#include "Convert.h"
+
+#include "DataChef.h"
+#include "DataChefs.h"
+
+#include "OpChef.h"
+#include "OpChefs.h"
+
+#include "Dataset.h"
+
+#include <iterator>
+#include <map>
+#include <string>
+#include <vector>
+
+#include <cassert>
+#include <fstream>
+#include <iostream>
+#include <numeric>
+#include <sstream>
+#include <stdexcept>
+
+namespace
+{
+
+template <typename InputIt> class RangedArguments : public Arguments
+{
+public:
+ RangedArguments(InputIt beg, InputIt end) : _beg{beg}, _end{end}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t count(void) const override { return _end - _beg; }
+
+public:
+ const std::string &value(uint32_t n) const override { return *(_beg + n); }
+
+private:
+ InputIt _beg;
+ InputIt _end;
+};
+
+template <typename InputIt> RangedArguments<InputIt> ranged_arguments(InputIt beg, InputIt end)
+{
+ return RangedArguments<InputIt>{beg, end};
+}
+
+} // namespace
+
+namespace
+{
+
+template <typename T> std::vector<T> as_vector(const ::google::protobuf::RepeatedPtrField<T> &field)
+{
+ std::vector<T> res;
+ for (const auto &elem : field)
+ {
+ res.emplace_back(elem);
+ }
+ return res;
+}
+
+template <typename T> Dataset<T> as_dataset(const ::google::protobuf::RepeatedPtrField<T> &field)
+{
+ return Dataset<T>(as_vector<T>(field));
+}
+
+} // namespace
+
+namespace
+{
+
+template <typename T> using Dims = std::vector<T>;
+
+Dims<int32_t> as_dims(const tflchef::TensorShape &shape)
+{
+ std::vector<int32_t> res;
+
+ for (auto &dim : shape.dim())
+ {
+ res.emplace_back(static_cast<int32_t>(dim));
+ }
+
+ return res;
+}
+
+int32_t element_count(const Dims<int32_t> &dims)
+{
+ return std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int32_t>());
+}
+
+} // namespace
+
+namespace
+{
+
+class GeneratedModelImpl final : public tflchef::GeneratedModel::Impl
+{
+public:
+ GeneratedModelImpl(std::unique_ptr<flatbuffers::FlatBufferBuilder> &&builder)
+ : _builder{std::move(builder)}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const char *base(void) const override
+ {
+ // Return the base address of generated flatbuffer model
+ return reinterpret_cast<const char *>(_builder->GetBufferPointer());
+ }
+
+public:
+ size_t size(void) const override
+ {
+ // Return the size of generated flatbuffer model
+ return _builder->GetSize();
+ }
+
+private:
+ std::unique_ptr<flatbuffers::FlatBufferBuilder> _builder;
+};
+
+} // namespace
+
+namespace
+{
+
+template <typename T> class Registry
+{
+public:
+ void add(const std::string &name, std::unique_ptr<T> &&entry)
+ {
+ _content[name] = std::move(entry);
+ }
+
+ const T &lookup(const std::string &name) const { return *(_content.at(name)); }
+
+private:
+ std::map<std::string, std::unique_ptr<T>> _content;
+};
+
+struct DataChefRegistry final : public Registry<DataChefFactory>
+{
+};
+
+DataChefRegistry &data_chef_registry(const tflchef::TensorType &type)
+{
+ static DataChefRegistry s32;
+ static DataChefRegistry fp32;
+ static DataChefRegistry u8;
+ static DataChefRegistry boolean;
+
+ switch (type)
+ {
+ case tflchef::INT32:
+ return s32;
+ case tflchef::FLOAT32:
+ return fp32;
+ case tflchef::UINT8:
+ return u8;
+ case tflchef::BOOL:
+ return boolean;
+ default:
+ break;
+ }
+
+ throw std::runtime_error{"Unknown tensor type"};
+}
+
+struct OpChefRegistry final : public Registry<OpChefFactory>
+{
+};
+
+OpChefRegistry &op_chef_registry(void)
+{
+ static OpChefRegistry registry;
+ return registry;
+}
+
+// @brief This will prepare a set of unique operator codes in the mode recipe
+std::set<tflite::BuiltinOperator> gather_opcode_set(const ::tflchef::ModelRecipe &model_recipe)
+{
+ std::set<tflite::BuiltinOperator> opcode_set;
+ for (const auto &operation : model_recipe.operation())
+ {
+ auto op_chef = op_chef_registry().lookup(operation.type()).create(&operation);
+ opcode_set.insert(op_chef->code());
+ }
+
+ // Add ops used in Graphs(subgraphs)
+ for (int g = 0; g < model_recipe.graph_size(); ++g)
+ {
+ const auto &graph = model_recipe.graph(g);
+ for (const auto &operation : graph.operation())
+ {
+ auto op_chef = op_chef_registry().lookup(operation.type()).create(&operation);
+ opcode_set.insert(op_chef->code());
+ }
+ }
+
+ return opcode_set;
+}
+
+} // namespace
+
+namespace tflchef
+{
+
+/**
+ * @brief Generate a (in-memory) TensorFlow Lite model from a given model recipe
+ */
+GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
+{
+// Initialize Op Chef Registry
+#define OP_CHEF(NAME, FACTORY_CLASS) \
+ op_chef_registry().add(#NAME, std::unique_ptr<FACTORY_CLASS>(new FACTORY_CLASS()));
+#include "OpChef.def"
+#undef OP_CHEF
+
+// Initialize Data Chef Registry
+#define DATA_CHEF(TYPE, NAME, FACTORY_CLASS) \
+ data_chef_registry(::tflchef::TYPE) \
+ .add(#NAME, std::unique_ptr<FACTORY_CLASS>(new FACTORY_CLASS()));
+#include "DataChef.def"
+#undef DATA_CHEF
+
+ //
+ // Create FlatBufferBuilder
+ //
+ auto flatbuffer_builder =
+ std::unique_ptr<flatbuffers::FlatBufferBuilder>(new flatbuffers::FlatBufferBuilder(1024));
+
+ // Operand-related
+ std::vector<flatbuffers::Offset<::tflite::Buffer>> buffer_vec;
+
+ // Operation-related
+ std::vector<flatbuffers::Offset<::tflite::OperatorCode>> code_vec;
+
+ // Graphs-related
+ std::vector<flatbuffers::Offset<::tflite::SubGraph>> subgraph_vec;
+
+ // Create OperatorCode
+ std::set<tflite::BuiltinOperator> opcode_set = gather_opcode_set(model_recipe);
+ for (auto opcode : opcode_set)
+ {
+ tflite::OperatorCodeBuilder code_builder{*flatbuffer_builder};
+ code_builder.add_builtin_code(opcode);
+ auto code = code_builder.Finish();
+ // Update OperatorCode vector
+ code_vec.emplace_back(code);
+ }
+
+ // Create an Empty Buffer
+ //
+ // Buffer 0 SHOULD be an empty buffer in TensorFlow Lite model file
+ // (Please refer to the comment for Tensor.buffer field in schema)
+ {
+ tflite::BufferBuilder buffer_builder{*flatbuffer_builder};
+ buffer_vec.emplace_back(buffer_builder.Finish());
+ }
+
+ //
+ // Create Main graph
+ //
+ {
+ // Operand-related
+ std::vector<flatbuffers::Offset<::tflite::Tensor>> tensor_vec;
+
+ // Operation-related
+ std::vector<flatbuffers::Offset<::tflite::Operator>> operator_vec;
+
+ // Tensor Name -> Tensor ID mapping (per Graph)
+ std::map<std::string, int32_t> symbol_table;
+
+ auto lookup = [&symbol_table](const std::string &name) { return symbol_table.at(name); };
+
+ int32_t buffer_start = buffer_vec.size();
+ int32_t buffer_index = 0;
+
+ // Create buffer(s) 1~n(I) for input(s)
+ const auto size_input = model_recipe.input_size();
+ for (int ci = 0; ci < size_input; ++ci)
+ {
+ tflite::BufferBuilder buffer_builder{*flatbuffer_builder};
+ buffer_vec.emplace_back(buffer_builder.Finish());
+ }
+ // Create buffer(s) n(I)+1~n(I)+n(O) for output(s)
+ const auto size_output = model_recipe.output_size();
+ for (int co = 0; co < size_output; ++co)
+ {
+ tflite::BufferBuilder buffer_builder{*flatbuffer_builder};
+ buffer_vec.emplace_back(buffer_builder.Finish());
+ }
+
+ // default name for main graph
+ std::string graph_name = "main";
+ if (model_recipe.has_name())
+ graph_name = model_recipe.name();
+
+ auto input_names = as_dataset(model_recipe.input()).vectorize();
+ auto output_names = as_dataset(model_recipe.output()).vectorize();
+
+ for (const auto &operand : model_recipe.operand())
+ {
+ assert(operand.has_name());
+
+ assert(operand.has_type());
+ assert(operand.has_shape());
+
+ std::vector<int32_t> dims = as_dims(operand.shape());
+
+ auto shape = flatbuffer_builder->CreateVector(dims);
+ auto name = flatbuffer_builder->CreateString(operand.name());
+
+ buffer_index = 0;
+
+ // Create Buffer if filler is specified
+ if (operand.has_filler())
+ {
+ const auto &filler = operand.filler();
+
+ assert(filler.has_tag());
+
+ auto args = ranged_arguments(filler.arg().begin(), filler.arg().end());
+ auto chef = data_chef_registry(operand.type()).lookup(filler.tag()).create(args);
+
+ assert(chef != nullptr);
+
+ // Create Data
+ auto data_vec = chef->generate(element_count(dims));
+ auto data = flatbuffer_builder->CreateVector(data_vec);
+
+ // Create Buffer
+ tflite::BufferBuilder buffer_builder{*flatbuffer_builder};
+ buffer_builder.add_data(data);
+ auto buffer = buffer_builder.Finish();
+
+ // Update Buffer Index & Vector
+ buffer_index = buffer_vec.size();
+ buffer_vec.emplace_back(buffer);
+ }
+ else
+ {
+ // if this is input or output, assign to that buffer_index
+ int idx = 0;
+ for (auto it = input_names.begin(); it != input_names.end(); ++it, ++idx)
+ {
+ if (*it == operand.name())
+ {
+ buffer_index = buffer_start + idx;
+ break;
+ }
+ }
+ if (buffer_index == 0)
+ {
+ idx = 0;
+ for (auto it = output_names.begin(); it != output_names.end(); ++it, ++idx)
+ {
+ if (*it == operand.name())
+ {
+ buffer_index = buffer_start + size_input + idx;
+ break;
+ }
+ }
+ }
+ }
+
+ flatbuffers::Offset<tflite::QuantizationParameters> quant_index;
+
+ // Create QuantizationParameters if quant is specified
+ if (operand.has_quant())
+ {
+ const auto &quant = operand.quant();
+
+ // Create each parameters
+ // NOTE if some parameters are not given, those will be set to default value
+ std::vector<float> quant_max_vec(quant.max_size());
+ std::vector<float> quant_min_vec(quant.min_size());
+ std::vector<float> quant_scale_vec(quant.scale_size());
+ std::vector<int64_t> quant_zero_point_vec(quant.zero_point_size());
+
+ for (uint32_t i = 0; i < quant.max_size(); ++i)
+ quant_max_vec.at(i) = quant.max(i);
+ for (uint32_t i = 0; i < quant.min_size(); ++i)
+ quant_min_vec.at(i) = quant.min(i);
+ for (uint32_t i = 0; i < quant.scale_size(); ++i)
+ quant_scale_vec.at(i) = quant.scale(i);
+ for (uint32_t i = 0; i < quant.zero_point_size(); ++i)
+ quant_zero_point_vec.at(i) = quant.zero_point(i);
+
+ auto quant_max = flatbuffer_builder->CreateVector(quant_max_vec);
+ auto quant_min = flatbuffer_builder->CreateVector(quant_min_vec);
+ auto quant_scale = flatbuffer_builder->CreateVector(quant_scale_vec);
+ auto quant_zero_point = flatbuffer_builder->CreateVector(quant_zero_point_vec);
+
+ // Create QuantizationParameters
+ tflite::QuantizationParametersBuilder quant_builder{*flatbuffer_builder};
+ quant_builder.add_max(quant_max);
+ quant_builder.add_min(quant_min);
+ quant_builder.add_scale(quant_scale);
+ quant_builder.add_zero_point(quant_zero_point);
+
+ // Update QuantizationParameters Index
+ quant_index = quant_builder.Finish();
+ }
+
+ // Create Tensor
+ tflite::TensorBuilder tensor_builder{*flatbuffer_builder};
+
+ tensor_builder.add_shape(shape);
+ tensor_builder.add_type(as_tflite_tensortype(operand.type()));
+ tensor_builder.add_buffer(buffer_index);
+ tensor_builder.add_name(name);
+ if (operand.has_quant())
+ tensor_builder.add_quantization(quant_index);
+
+ // Append!
+ tensor_vec.emplace_back(tensor_builder.Finish());
+
+ // Update Tensor Name -> Tensor Index Map
+ int32_t tensor_index = symbol_table.size();
+ const auto &tensor_name = operand.name();
+
+ symbol_table[tensor_name] = tensor_index;
+ }
+
+ // Create Operator
+ for (const auto &operation : model_recipe.operation())
+ {
+ assert(operation.has_type());
+
+ auto op_chef = op_chef_registry().lookup(operation.type()).create(&operation);
+
+ // Create 'inputs'
+ std::vector<int32_t> input_vec = as_dataset(operation.input()).map(lookup).vectorize();
+ auto inputs = flatbuffer_builder->CreateVector(input_vec);
+
+ // Create 'outputs'
+ std::vector<int32_t> output_vec = as_dataset(operation.output()).map(lookup).vectorize();
+ auto outputs = flatbuffer_builder->CreateVector(output_vec);
+
+ // Create Option
+ auto options = op_chef->value(*flatbuffer_builder);
+
+ // Create Operator
+ tflite::OperatorBuilder op_builder{*flatbuffer_builder};
+
+ // Get operator code index from opcode_set with assumption, order of
+ // opcode_set is same as that of code_vec
+ auto op_it = opcode_set.find(op_chef->code());
+ assert(op_it != opcode_set.end());
+ uint32_t opcode_index = std::distance(opcode_set.begin(), op_it);
+
+ op_builder.add_opcode_index(opcode_index);
+ op_builder.add_inputs(inputs);
+ op_builder.add_outputs(outputs);
+ op_builder.add_builtin_options_type(op_chef->type());
+ op_builder.add_builtin_options(options);
+
+ // Append Operator
+ operator_vec.emplace_back(op_builder.Finish());
+ }
+
+ // Create network input/output vector
+ std::vector<int32_t> input_vec = as_dataset(model_recipe.input()).map(lookup).vectorize();
+ std::vector<int32_t> output_vec = as_dataset(model_recipe.output()).map(lookup).vectorize();
+
+ // Create "SubGraph" arguments
+ auto tensors = flatbuffer_builder->CreateVector(tensor_vec);
+ auto inputs = flatbuffer_builder->CreateVector(input_vec);
+ auto outputs = flatbuffer_builder->CreateVector(output_vec);
+ auto operators = flatbuffer_builder->CreateVector(operator_vec);
+ auto name = flatbuffer_builder->CreateString(graph_name);
+
+ tflite::SubGraphBuilder subgraph_builder{*flatbuffer_builder};
+
+ subgraph_builder.add_tensors(tensors);
+ subgraph_builder.add_inputs(inputs);
+ subgraph_builder.add_outputs(outputs);
+ subgraph_builder.add_operators(operators);
+ subgraph_builder.add_name(name);
+
+ subgraph_vec.emplace_back(subgraph_builder.Finish());
+ }
+
+ //
+ // Create subgraphs if exist
+ // TODO refactor main graph and subgraphs generation to reduce duplicate codes
+ //
+ for (int g = 0; g < model_recipe.graph_size(); ++g)
+ {
+ // Operand-related
+ std::vector<flatbuffers::Offset<::tflite::Tensor>> tensor_vec;
+
+ // Operation-related
+ std::vector<flatbuffers::Offset<::tflite::Operator>> operator_vec;
+
+ // Tensor Name -> Tensor ID mapping (per Graph)
+ std::map<std::string, int32_t> symbol_table;
+
+ auto lookup = [&symbol_table](const std::string &name) { return symbol_table.at(name); };
+
+ const auto &graph = model_recipe.graph(g);
+
+ int32_t buffer_start = buffer_vec.size();
+ int32_t buffer_index = 0;
+
+ // Create buffer(s) for input(s)
+ const auto size_input = graph.input_size();
+ for (int ci = 0; ci < size_input; ++ci)
+ {
+ tflite::BufferBuilder buffer_builder{*flatbuffer_builder};
+ buffer_vec.emplace_back(buffer_builder.Finish());
+ }
+ // Create buffer(s) for output(s)
+ const auto size_output = graph.output_size();
+ for (int co = 0; co < size_output; ++co)
+ {
+ tflite::BufferBuilder buffer_builder{*flatbuffer_builder};
+ buffer_vec.emplace_back(buffer_builder.Finish());
+ }
+
+ // default name for sub graph
+ // TODO naming rule here may have conflit if recipe file provides it.
+ // fix this when this happens.
+ std::ostringstream stringStream;
+ stringStream << "sub_" << (g + 1);
+ std::string graph_name = stringStream.str();
+ if (graph.has_name())
+ graph_name = graph.name();
+
+ auto input_names = as_dataset(graph.input()).vectorize();
+ auto output_names = as_dataset(graph.output()).vectorize();
+
+ for (const auto &operand : graph.operand())
+ {
+ assert(operand.has_name());
+
+ assert(operand.has_type());
+ assert(operand.has_shape());
+
+ std::vector<int32_t> dims = as_dims(operand.shape());
+
+ auto shape = flatbuffer_builder->CreateVector(dims);
+ auto name = flatbuffer_builder->CreateString(operand.name());
+
+ // Create Buffer if filler is specified
+ if (operand.has_filler())
+ {
+ const auto &filler = operand.filler();
+
+ assert(filler.has_tag());
+
+ auto args = ranged_arguments(filler.arg().begin(), filler.arg().end());
+ auto chef = data_chef_registry(operand.type()).lookup(filler.tag()).create(args);
+
+ assert(chef != nullptr);
+
+ // Create Data
+ auto data_vec = chef->generate(element_count(dims));
+ auto data = flatbuffer_builder->CreateVector(data_vec);
+
+ // Create Buffer
+ tflite::BufferBuilder buffer_builder{*flatbuffer_builder};
+ buffer_builder.add_data(data);
+ auto buffer = buffer_builder.Finish();
+
+ // Update Buffer Index & Vector
+ buffer_index = buffer_vec.size();
+ buffer_vec.emplace_back(buffer);
+ }
+ else
+ {
+ // if this is input or output, assign to that buffer_index
+ int idx = 0;
+ buffer_index = 0;
+ for (auto it = input_names.begin(); it != input_names.end(); ++it, ++idx)
+ {
+ if (*it == operand.name())
+ {
+ buffer_index = buffer_start + idx;
+ break;
+ }
+ }
+ if (buffer_index == 0)
+ {
+ idx = 0;
+ for (auto it = output_names.begin(); it != output_names.end(); ++it, ++idx)
+ {
+ if (*it == operand.name())
+ {
+ buffer_index = buffer_start + size_input + idx;
+ break;
+ }
+ }
+ }
+ }
+ // NOTE buffer_index can be 0 when this operand does not have a filler or not I/O
+
+ flatbuffers::Offset<tflite::QuantizationParameters> quant_index;
+
+ // Create QuantizationParameters if quant is specified
+ if (operand.has_quant())
+ {
+ const auto &quant = operand.quant();
+
+ // Create each parameters
+ // NOTE if some parameters are not given, those will be set to default value
+ std::vector<float> quant_max_vec(quant.max_size());
+ std::vector<float> quant_min_vec(quant.min_size());
+ std::vector<float> quant_scale_vec(quant.scale_size());
+ std::vector<int64_t> quant_zero_point_vec(quant.zero_point_size());
+
+ for (uint32_t i = 0; i < quant.max_size(); ++i)
+ quant_max_vec.at(i) = quant.max(i);
+ for (uint32_t i = 0; i < quant.min_size(); ++i)
+ quant_min_vec.at(i) = quant.min(i);
+ for (uint32_t i = 0; i < quant.scale_size(); ++i)
+ quant_scale_vec.at(i) = quant.scale(i);
+ for (uint32_t i = 0; i < quant.zero_point_size(); ++i)
+ quant_zero_point_vec.at(i) = quant.zero_point(i);
+
+ auto quant_max = flatbuffer_builder->CreateVector(quant_max_vec);
+ auto quant_min = flatbuffer_builder->CreateVector(quant_min_vec);
+ auto quant_scale = flatbuffer_builder->CreateVector(quant_scale_vec);
+ auto quant_zero_point = flatbuffer_builder->CreateVector(quant_zero_point_vec);
+
+ // Create QuantizationParameters
+ tflite::QuantizationParametersBuilder quant_builder{*flatbuffer_builder};
+ quant_builder.add_max(quant_max);
+ quant_builder.add_min(quant_min);
+ quant_builder.add_scale(quant_scale);
+ quant_builder.add_zero_point(quant_zero_point);
+
+ // Update QuantizationParameters Index
+ quant_index = quant_builder.Finish();
+ }
+
+ // Create Tensor
+ tflite::TensorBuilder tensor_builder{*flatbuffer_builder};
+
+ tensor_builder.add_shape(shape);
+ tensor_builder.add_type(as_tflite_tensortype(operand.type()));
+ tensor_builder.add_buffer(buffer_index);
+ tensor_builder.add_name(name);
+ if (operand.has_quant())
+ tensor_builder.add_quantization(quant_index);
+
+ // Append!
+ tensor_vec.emplace_back(tensor_builder.Finish());
+
+ // Update Tensor Name -> Tensor Index Map
+ int32_t tensor_index = symbol_table.size();
+ const auto &tensor_name = operand.name();
+
+ symbol_table[tensor_name] = tensor_index;
+ }
+
+ // Create Operator
+ for (const auto &operation : graph.operation())
+ {
+ assert(operation.has_type());
+
+ auto op_chef = op_chef_registry().lookup(operation.type()).create(&operation);
+
+ // Create 'inputs'
+ std::vector<int32_t> input_vec = as_dataset(operation.input()).map(lookup).vectorize();
+ auto inputs = flatbuffer_builder->CreateVector(input_vec);
+
+ // Create 'outputs'
+ std::vector<int32_t> output_vec = as_dataset(operation.output()).map(lookup).vectorize();
+ auto outputs = flatbuffer_builder->CreateVector(output_vec);
+
+ // Create Option
+ auto options = op_chef->value(*flatbuffer_builder);
+
+ // Create Operator
+ tflite::OperatorBuilder op_builder{*flatbuffer_builder};
+
+ // Get operator code index from opcode_set with assumption, order of
+ // opcode_set is same as that of code_vec
+ auto op_it = opcode_set.find(op_chef->code());
+ assert(op_it != opcode_set.end());
+ uint32_t opcode_index = std::distance(opcode_set.begin(), op_it);
+
+ op_builder.add_opcode_index(opcode_index);
+ op_builder.add_inputs(inputs);
+ op_builder.add_outputs(outputs);
+ op_builder.add_builtin_options_type(op_chef->type());
+ op_builder.add_builtin_options(options);
+
+ // Append Operator
+ operator_vec.emplace_back(op_builder.Finish());
+ }
+
+ // Create network input/output vector
+ std::vector<int32_t> input_vec = as_dataset(graph.input()).map(lookup).vectorize();
+ std::vector<int32_t> output_vec = as_dataset(graph.output()).map(lookup).vectorize();
+
+ // Create "SubGraph" arguments
+ auto tensors = flatbuffer_builder->CreateVector(tensor_vec);
+ auto inputs = flatbuffer_builder->CreateVector(input_vec);
+ auto outputs = flatbuffer_builder->CreateVector(output_vec);
+ auto operators = flatbuffer_builder->CreateVector(operator_vec);
+ auto name = flatbuffer_builder->CreateString(graph_name);
+
+ tflite::SubGraphBuilder subgraph_builder{*flatbuffer_builder};
+
+ subgraph_builder.add_tensors(tensors);
+ subgraph_builder.add_inputs(inputs);
+ subgraph_builder.add_outputs(outputs);
+ subgraph_builder.add_operators(operators);
+ subgraph_builder.add_name(name);
+
+ subgraph_vec.emplace_back(subgraph_builder.Finish());
+ }
+
+ // Create "Model" arguments
+ auto buffers = flatbuffer_builder->CreateVector(buffer_vec);
+ auto operator_codes = flatbuffer_builder->CreateVector(code_vec);
+ auto subgraphs = flatbuffer_builder->CreateVector(subgraph_vec);
+ auto description = flatbuffer_builder->CreateString("Generated by tflchef");
+
+ // Create "Model"
+ tflite::ModelBuilder model_builder{*flatbuffer_builder};
+
+ model_builder.add_version(3);
+ model_builder.add_operator_codes(operator_codes);
+ model_builder.add_subgraphs(subgraphs);
+ model_builder.add_description(description);
+ model_builder.add_buffers(buffers);
+
+ auto model = model_builder.Finish();
+
+ // Finalize
+ ::tflite::FinishModelBuffer(*flatbuffer_builder, model);
+
+ // Return "GenerateModel"
+ return GeneratedModel{
+ std::unique_ptr<GeneratedModelImpl>(new GeneratedModelImpl(std::move(flatbuffer_builder)))};
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/core/src/Op/Abs.cpp b/compiler/tflchef/core/src/Op/Abs.cpp
new file mode 100644
index 000000000..dcb27784c
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Abs.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Abs.h"
+#include "Convert.h"
+
+flatbuffers::Offset<void> AbsChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ tflite::AbsOptionsBuilder abs_options_builder{fbb};
+
+ return abs_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> AbsChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new AbsChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Abs.h b/compiler/tflchef/core/src/Op/Abs.h
new file mode 100644
index 000000000..5b694c6b6
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Abs.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_ABS_H__
+#define __OP_ABS_H__
+
+#include "OpChef.h"
+
+class AbsChef final : public OpChef
+{
+public:
+ explicit AbsChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_ABS; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_AbsOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct AbsChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_ABS_H__
diff --git a/compiler/tflchef/core/src/Op/Add.cpp b/compiler/tflchef/core/src/Op/Add.cpp
new file mode 100644
index 000000000..8679ba35e
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Add.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Add.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> AddChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ auto &operation = (*_operation);
+
+ assert(operation.has_add_options());
+
+ auto tflite_activation = as_tflite_activation(operation.add_options().activation());
+
+ tflite::AddOptionsBuilder add_options_builder{fbb};
+ add_options_builder.add_fused_activation_function(tflite_activation);
+
+ return add_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> AddChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new AddChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Add.h b/compiler/tflchef/core/src/Op/Add.h
new file mode 100644
index 000000000..29ddb9470
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Add.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_ADD_H__
+#define __OP_ADD_H__
+
+#include "OpChef.h"
+
+class AddChef final : public OpChef
+{
+public:
+ explicit AddChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_ADD; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_AddOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct AddChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_ADD_H__
diff --git a/compiler/tflchef/core/src/Op/ArgMax.cpp b/compiler/tflchef/core/src/Op/ArgMax.cpp
new file mode 100644
index 000000000..2c2995da5
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/ArgMax.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ArgMax.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> ArgMaxChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ auto &operation = (*_operation);
+
+ assert(operation.has_argmax_options());
+
+ auto tflite_output_type = as_tflite_tensortype(operation.argmax_options().output_type());
+
+ tflite::ArgMaxOptionsBuilder argmax_options_builder{fbb};
+ argmax_options_builder.add_output_type(tflite_output_type);
+
+ return argmax_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> ArgMaxChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new ArgMaxChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/ArgMax.h b/compiler/tflchef/core/src/Op/ArgMax.h
new file mode 100644
index 000000000..4033e0f6c
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/ArgMax.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_ARGMAX_H__
+#define __OP_ARGMAX_H__
+
+#include "OpChef.h"
+
+class ArgMaxChef final : public OpChef
+{
+public:
+ explicit ArgMaxChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_ARG_MAX; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_ArgMaxOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct ArgMaxChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_ARGMAX_H__
diff --git a/compiler/tflchef/core/src/Op/AveragePool2D.cpp b/compiler/tflchef/core/src/Op/AveragePool2D.cpp
new file mode 100644
index 000000000..84d6a7571
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/AveragePool2D.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AveragePool2D.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> AveragePool2DChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ auto &operation = (*_operation);
+
+ assert(operation.has_averagepool2d_options());
+
+ auto options = operation.averagepool2d_options();
+
+ auto tflite_padding = as_tflite_padding(options.padding());
+ auto tflite_activation = as_tflite_activation(options.activation());
+
+ tflite::Pool2DOptionsBuilder options_builder{fbb};
+ options_builder.add_padding(tflite_padding);
+ options_builder.add_stride_h(options.stride_h());
+ options_builder.add_stride_w(options.stride_w());
+ options_builder.add_filter_width(options.filter_width());
+ options_builder.add_filter_height(options.filter_height());
+ options_builder.add_fused_activation_function(tflite_activation);
+
+ return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> AveragePool2DChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new AveragePool2DChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/AveragePool2D.h b/compiler/tflchef/core/src/Op/AveragePool2D.h
new file mode 100644
index 000000000..652fae28b
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/AveragePool2D.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_AVERAGE_POOL_2D_H__
+#define __OP_AVERAGE_POOL_2D_H__
+
+#include "OpChef.h"
+
+class AveragePool2DChef final : public OpChef
+{
+public:
+ explicit AveragePool2DChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override
+ {
+ return tflite::BuiltinOperator_AVERAGE_POOL_2D;
+ }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_Pool2DOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct AveragePool2DChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_AVERAGE_POOL_2D_H__
diff --git a/compiler/tflchef/core/src/Op/BatchToSpaceND.cpp b/compiler/tflchef/core/src/Op/BatchToSpaceND.cpp
new file mode 100644
index 000000000..972f93256
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/BatchToSpaceND.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BatchToSpaceND.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> BatchToSpaceNDChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ tflite::BatchToSpaceNDOptionsBuilder batch_to_space_nd_options_builder{fbb};
+
+ return batch_to_space_nd_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> BatchToSpaceNDChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new BatchToSpaceNDChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/BatchToSpaceND.h b/compiler/tflchef/core/src/Op/BatchToSpaceND.h
new file mode 100644
index 000000000..6ba1352ab
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/BatchToSpaceND.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_BATCHTOSPACEND_H__
+#define __OP_BATCHTOSPACEND_H__
+
+#include "OpChef.h"
+
+class BatchToSpaceNDChef final : public OpChef
+{
+public:
+ explicit BatchToSpaceNDChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override
+ {
+ return tflite::BuiltinOperator_BATCH_TO_SPACE_ND;
+ }
+
+ tflite::BuiltinOptions type(void) const override
+ {
+ return tflite::BuiltinOptions_BatchToSpaceNDOptions;
+ }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct BatchToSpaceNDChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_BATCHTOSPACEND_H__
diff --git a/compiler/tflchef/core/src/Op/Concatenation.cpp b/compiler/tflchef/core/src/Op/Concatenation.cpp
new file mode 100644
index 000000000..89b8a6122
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Concatenation.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Concatenation.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> ConcatenationChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ auto &operation = (*_operation);
+
+ assert(operation.has_concatenation_options());
+
+ auto options = operation.concatenation_options();
+
+ auto tflite_activation = as_tflite_activation(options.activation());
+
+ tflite::ConcatenationOptionsBuilder options_builder{fbb};
+
+ options_builder.add_axis(options.axis());
+ options_builder.add_fused_activation_function(tflite_activation);
+
+ return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> ConcatenationChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new ConcatenationChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Concatenation.h b/compiler/tflchef/core/src/Op/Concatenation.h
new file mode 100644
index 000000000..a59310a1d
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Concatenation.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_CONCATENATION_H__
+#define __OP_CONCATENATION_H__
+
+#include "OpChef.h"
+
+class ConcatenationChef final : public OpChef
+{
+public:
+ explicit ConcatenationChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override
+ {
+ return tflite::BuiltinOperator_CONCATENATION;
+ }
+
+ tflite::BuiltinOptions type(void) const override
+ {
+ return tflite::BuiltinOptions_ConcatenationOptions;
+ }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct ConcatenationChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_CONCATENATION_H__
diff --git a/compiler/tflchef/core/src/Op/Conv2D.cpp b/compiler/tflchef/core/src/Op/Conv2D.cpp
new file mode 100644
index 000000000..d99c53351
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Conv2D.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Conv2D.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> Conv2DChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ auto &operation = (*_operation);
+
+ assert(operation.has_conv2d_options());
+
+ auto tflite_padding = as_tflite_padding(operation.conv2d_options().padding());
+ auto tflite_activation = as_tflite_activation(operation.conv2d_options().activation());
+
+ tflite::Conv2DOptionsBuilder conv2d_options_builder{fbb};
+ conv2d_options_builder.add_padding(tflite_padding);
+ conv2d_options_builder.add_stride_h(operation.conv2d_options().stride_h());
+ conv2d_options_builder.add_stride_w(operation.conv2d_options().stride_w());
+ conv2d_options_builder.add_fused_activation_function(tflite_activation);
+
+ return conv2d_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> Conv2DChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new Conv2DChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Conv2D.h b/compiler/tflchef/core/src/Op/Conv2D.h
new file mode 100644
index 000000000..22c45e89a
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Conv2D.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_CONV2D_H__
+#define __OP_CONV2D_H__
+
+#include "OpChef.h"
+
+class Conv2DChef final : public OpChef
+{
+public:
+ explicit Conv2DChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_CONV_2D; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_Conv2DOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct Conv2DChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_CONV2D_H__
diff --git a/compiler/tflchef/core/src/Op/Cos.cpp b/compiler/tflchef/core/src/Op/Cos.cpp
new file mode 100644
index 000000000..547bee1a9
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Cos.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Cos.h"
+
+flatbuffers::Offset<void> CosChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ tflite::CosOptionsBuilder options_builder{fbb};
+
+ return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> CosChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new CosChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Cos.h b/compiler/tflchef/core/src/Op/Cos.h
new file mode 100644
index 000000000..9bf8cbeab
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Cos.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_COS_H__
+#define __OP_COS_H__
+
+#include "OpChef.h"
+
+class CosChef final : public OpChef
+{
+public:
+ explicit CosChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_COS; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_CosOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct CosChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_COS_H__
diff --git a/compiler/tflchef/core/src/Op/DepthwiseConv2D.cpp b/compiler/tflchef/core/src/Op/DepthwiseConv2D.cpp
new file mode 100644
index 000000000..e04cf50ff
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/DepthwiseConv2D.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthwiseConv2D.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> DepthwiseConv2DChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ auto &operation = (*_operation);
+
+ assert(operation.has_depthwiseconv2d_options());
+
+ auto options = operation.depthwiseconv2d_options();
+
+ auto tflite_padding = as_tflite_padding(options.padding());
+ auto tflite_activation = as_tflite_activation(options.activation());
+
+ tflite::DepthwiseConv2DOptionsBuilder options_builder{fbb};
+ options_builder.add_padding(tflite_padding);
+ options_builder.add_stride_w(options.stride_w());
+ options_builder.add_stride_h(options.stride_h());
+ options_builder.add_depth_multiplier(options.depth_multiplier());
+ options_builder.add_fused_activation_function(tflite_activation);
+
+ return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef>
+DepthwiseConv2DChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new DepthwiseConv2DChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/DepthwiseConv2D.h b/compiler/tflchef/core/src/Op/DepthwiseConv2D.h
new file mode 100644
index 000000000..718ee7943
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/DepthwiseConv2D.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_DEPTHWISECONV2D_H__
+#define __OP_DEPTHWISECONV2D_H__
+
+#include "OpChef.h"
+
+class DepthwiseConv2DChef final : public OpChef
+{
+public:
+ explicit DepthwiseConv2DChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override
+ {
+ return tflite::BuiltinOperator_DEPTHWISE_CONV_2D;
+ }
+
+ tflite::BuiltinOptions type(void) const override
+ {
+ return tflite::BuiltinOptions_DepthwiseConv2DOptions;
+ }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct DepthwiseConv2DChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_DEPTHWISECONV2D_H__
diff --git a/compiler/tflchef/core/src/Op/Div.cpp b/compiler/tflchef/core/src/Op/Div.cpp
new file mode 100644
index 000000000..98eead815
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Div.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Div.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> DivChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ auto &operation = (*_operation);
+
+ assert(operation.has_div_options());
+
+ auto tflite_activation = as_tflite_activation(operation.div_options().activation());
+
+ tflite::DivOptionsBuilder div_options_builder{fbb};
+ div_options_builder.add_fused_activation_function(tflite_activation);
+
+ return div_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> DivChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new DivChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Div.h b/compiler/tflchef/core/src/Op/Div.h
new file mode 100644
index 000000000..0d1063cce
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Div.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_DIV_H__
+#define __OP_DIV_H__
+
+#include "OpChef.h"
+
+class DivChef final : public OpChef
+{
+public:
+ explicit DivChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_DIV; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_DivOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct DivChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_DIV_H__
diff --git a/compiler/tflchef/core/src/Op/Equal.cpp b/compiler/tflchef/core/src/Op/Equal.cpp
new file mode 100644
index 000000000..f7a39f03d
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Equal.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Equal.h"
+
+flatbuffers::Offset<void> EqualChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ tflite::EqualOptionsBuilder options_builder{fbb};
+
+ return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> EqualChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new EqualChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Equal.h b/compiler/tflchef/core/src/Op/Equal.h
new file mode 100644
index 000000000..6e097991d
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Equal.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_EQUAL_H__
+#define __OP_EQUAL_H__
+
+#include "OpChef.h"
+
+class EqualChef final : public OpChef
+{
+public:
+ explicit EqualChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_EQUAL; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_EqualOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct EqualChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_EQUAL_H__
diff --git a/compiler/tflchef/core/src/Op/Exp.cpp b/compiler/tflchef/core/src/Op/Exp.cpp
new file mode 100644
index 000000000..b3c8d7e73
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Exp.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Exp.h"
+#include "Convert.h"
+
+flatbuffers::Offset<void> ExpChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ tflite::ExpOptionsBuilder exp_options_builder{fbb};
+
+ return exp_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> ExpChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new ExpChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Exp.h b/compiler/tflchef/core/src/Op/Exp.h
new file mode 100644
index 000000000..422a3310f
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Exp.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_EXP_H__
+#define __OP_EXP_H__
+
+#include "OpChef.h"
+
+class ExpChef final : public OpChef
+{
+public:
+ explicit ExpChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_EXP; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_ExpOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct ExpChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_ABS_H__
diff --git a/compiler/tflchef/core/src/Op/FloorDiv.cpp b/compiler/tflchef/core/src/Op/FloorDiv.cpp
new file mode 100644
index 000000000..0d531bede
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/FloorDiv.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FloorDiv.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> FloorDivChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ tflite::FloorDivOptionsBuilder floor_div_options_builder{fbb};
+ return floor_div_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> FloorDivChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new FloorDivChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/FloorDiv.h b/compiler/tflchef/core/src/Op/FloorDiv.h
new file mode 100644
index 000000000..151f24314
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/FloorDiv.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_FLOORDIV_H__
+#define __OP_FLOORDIV_H__
+
+#include "OpChef.h"
+
+class FloorDivChef final : public OpChef
+{
+public:
+ explicit FloorDivChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_FLOOR_DIV; }
+
+ tflite::BuiltinOptions type(void) const override
+ {
+ return tflite::BuiltinOptions_FloorDivOptions;
+ }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct FloorDivChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_FLOORDIV_H__
diff --git a/compiler/tflchef/core/src/Op/FullyConnected.cpp b/compiler/tflchef/core/src/Op/FullyConnected.cpp
new file mode 100644
index 000000000..45269916c
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/FullyConnected.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnected.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> FullyConnectedChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ auto &operation = (*_operation);
+
+ assert(operation.has_fullyconnected_options());
+
+ auto tflite_activation = as_tflite_activation(operation.fullyconnected_options().activation());
+
+ tflite::FullyConnectedOptionsBuilder fc_options_builder{fbb};
+ fc_options_builder.add_fused_activation_function(tflite_activation);
+
+ return fc_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> FullyConnectedChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new FullyConnectedChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/FullyConnected.h b/compiler/tflchef/core/src/Op/FullyConnected.h
new file mode 100644
index 000000000..ea71012e6
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/FullyConnected.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_FULLYCONNECTED_H__
+#define __OP_FULLYCONNECTED_H__
+
+#include "OpChef.h"
+
+class FullyConnectedChef final : public OpChef
+{
+public:
+ explicit FullyConnectedChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override
+ {
+ return tflite::BuiltinOperator_FULLY_CONNECTED;
+ }
+
+ tflite::BuiltinOptions type(void) const override
+ {
+ return tflite::BuiltinOptions_FullyConnectedOptions;
+ }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct FullyConnectedChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_FULLYCONNECTED_H__
diff --git a/compiler/tflchef/core/src/Op/LogicalNot.cpp b/compiler/tflchef/core/src/Op/LogicalNot.cpp
new file mode 100644
index 000000000..26cdef308
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/LogicalNot.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LogicalNot.h"
+
+flatbuffers::Offset<void> LogicalNotChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ tflite::LogicalNotOptionsBuilder options_builder{fbb};
+
+ return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> LogicalNotChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new LogicalNotChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/LogicalNot.h b/compiler/tflchef/core/src/Op/LogicalNot.h
new file mode 100644
index 000000000..d2ca21b93
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/LogicalNot.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_LOGICALNOT_H__
+#define __OP_LOGICALNOT_H__
+
+#include "OpChef.h"
+
+class LogicalNotChef final : public OpChef
+{
+public:
+ explicit LogicalNotChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_LOGICAL_NOT; }
+
+ tflite::BuiltinOptions type(void) const override
+ {
+ return tflite::BuiltinOptions_LogicalNotOptions;
+ }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct LogicalNotChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_LOGICALNOT_H__
diff --git a/compiler/tflchef/core/src/Op/LogicalOr.cpp b/compiler/tflchef/core/src/Op/LogicalOr.cpp
new file mode 100644
index 000000000..483373a81
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/LogicalOr.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LogicalOr.h"
+
+flatbuffers::Offset<void> LogicalOrChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ tflite::LogicalOrOptionsBuilder options_builder{fbb};
+
+ return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> LogicalOrChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new LogicalOrChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/LogicalOr.h b/compiler/tflchef/core/src/Op/LogicalOr.h
new file mode 100644
index 000000000..b84c9a6ab
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/LogicalOr.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_LOGICALOR_H__
+#define __OP_LOGICALOR_H__
+
+#include "OpChef.h"
+
+class LogicalOrChef final : public OpChef
+{
+public:
+ explicit LogicalOrChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_LOGICAL_OR; }
+
+ tflite::BuiltinOptions type(void) const override
+ {
+ return tflite::BuiltinOptions_LogicalOrOptions;
+ }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct LogicalOrChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_LOGICALOR_H__
diff --git a/compiler/tflchef/core/src/Op/MaxPool2D.cpp b/compiler/tflchef/core/src/Op/MaxPool2D.cpp
new file mode 100644
index 000000000..666ac98f0
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/MaxPool2D.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MaxPool2D.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> MaxPool2DChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ auto &operation = (*_operation);
+
+ assert(operation.has_maxpool2d_options());
+
+ auto options = operation.maxpool2d_options();
+
+ auto tflite_padding = as_tflite_padding(options.padding());
+ auto tflite_activation = as_tflite_activation(options.activation());
+
+ tflite::Pool2DOptionsBuilder options_builder{fbb};
+ options_builder.add_padding(tflite_padding);
+ options_builder.add_stride_h(options.stride_h());
+ options_builder.add_stride_w(options.stride_w());
+ options_builder.add_filter_width(options.filter_width());
+ options_builder.add_filter_height(options.filter_height());
+ options_builder.add_fused_activation_function(tflite_activation);
+
+ return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> MaxPool2DChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new MaxPool2DChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/MaxPool2D.h b/compiler/tflchef/core/src/Op/MaxPool2D.h
new file mode 100644
index 000000000..56960a0d2
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/MaxPool2D.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_MAX_POOL_2D_H__
+#define __OP_MAX_POOL_2D_H__
+
+#include "OpChef.h"
+
+class MaxPool2DChef final : public OpChef
+{
+public:
+ explicit MaxPool2DChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_MAX_POOL_2D; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_Pool2DOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct MaxPool2DChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_MAX_POOL_2D_H__
diff --git a/compiler/tflchef/core/src/Op/Mean.cpp b/compiler/tflchef/core/src/Op/Mean.cpp
new file mode 100644
index 000000000..def8f7b3b
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Mean.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Mean.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> MeanChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ auto &operation = (*_operation);
+
+ assert(operation.has_mean_options());
+
+ auto keep_dims = operation.mean_options().keep_dims();
+
+ tflite::ReducerOptionsBuilder mean_options_builder{fbb};
+ mean_options_builder.add_keep_dims(keep_dims);
+
+ return mean_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> MeanChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new MeanChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Mean.h b/compiler/tflchef/core/src/Op/Mean.h
new file mode 100644
index 000000000..9032aef3f
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Mean.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_MEAN_H__
+#define __OP_MEAN_H__
+
+#include "OpChef.h"
+
+class MeanChef final : public OpChef
+{
+public:
+ explicit MeanChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_MEAN; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_ReducerOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct MeanChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_MEAN_H__
diff --git a/compiler/tflchef/core/src/Op/Mul.cpp b/compiler/tflchef/core/src/Op/Mul.cpp
new file mode 100644
index 000000000..10ec918c2
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Mul.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Mul.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> MulChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ auto &operation = (*_operation);
+
+ assert(operation.has_mul_options());
+
+ auto tflite_activation = as_tflite_activation(operation.mul_options().activation());
+
+ tflite::MulOptionsBuilder mul_options_builder{fbb};
+ mul_options_builder.add_fused_activation_function(tflite_activation);
+
+ return mul_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> MulChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new MulChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Mul.h b/compiler/tflchef/core/src/Op/Mul.h
new file mode 100644
index 000000000..7f1d07ac9
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Mul.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_MUL_H__
+#define __OP_MUL_H__
+
+#include "OpChef.h"
+
+class MulChef final : public OpChef
+{
+public:
+ explicit MulChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_MUL; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_MulOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct MulChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_MUL_H__
diff --git a/compiler/tflchef/core/src/Op/Pack.cpp b/compiler/tflchef/core/src/Op/Pack.cpp
new file mode 100644
index 000000000..2532ac744
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Pack.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Pack.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> PackChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ auto &operation = (*_operation);
+
+ assert(operation.has_pack_options());
+
+ tflite::PackOptionsBuilder pack_options_builder{fbb};
+ pack_options_builder.add_values_count(operation.pack_options().values_count());
+ pack_options_builder.add_axis(operation.pack_options().axis());
+
+ return pack_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> PackChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new PackChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Pack.h b/compiler/tflchef/core/src/Op/Pack.h
new file mode 100644
index 000000000..54bdc9338
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Pack.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_PACK_H__
+#define __OP_PACK_H__
+
+#include "OpChef.h"
+
+class PackChef final : public OpChef
+{
+public:
+ explicit PackChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_PACK; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_PackOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct PackChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_PACK_H__
diff --git a/compiler/tflchef/core/src/Op/Pad.cpp b/compiler/tflchef/core/src/Op/Pad.cpp
new file mode 100644
index 000000000..d0c471981
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Pad.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Pad.h"
+
+flatbuffers::Offset<void> PadChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ tflite::PadOptionsBuilder pad_options_builder{fbb};
+ return pad_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> PadChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new PadChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Pad.h b/compiler/tflchef/core/src/Op/Pad.h
new file mode 100644
index 000000000..9da9c9b8a
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Pad.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_PAD_H__
+#define __OP_PAD_H__
+
+#include "OpChef.h"
+
+class PadChef final : public OpChef
+{
+public:
+ explicit PadChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_PAD; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_PadOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct PadChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_PAD_H__
diff --git a/compiler/tflchef/core/src/Op/ReLU.cpp b/compiler/tflchef/core/src/Op/ReLU.cpp
new file mode 100644
index 000000000..3fb8d3f46
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/ReLU.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReLU.h"
+
+flatbuffers::Offset<void> ReLUChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ return flatbuffers::Offset<void>();
+}
+
+std::unique_ptr<OpChef> ReLUChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new ReLUChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/ReLU.h b/compiler/tflchef/core/src/Op/ReLU.h
new file mode 100644
index 000000000..778458cb8
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/ReLU.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_RELU_H__
+#define __OP_RELU_H__
+
+#include "OpChef.h"
+
+class ReLUChef final : public OpChef
+{
+public:
+ explicit ReLUChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_RELU; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_NONE; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct ReLUChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_RELU_H__
diff --git a/compiler/tflchef/core/src/Op/ReLU6.cpp b/compiler/tflchef/core/src/Op/ReLU6.cpp
new file mode 100644
index 000000000..6fe9bcbfd
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/ReLU6.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReLU6.h"
+
+flatbuffers::Offset<void> ReLU6Chef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ return flatbuffers::Offset<void>();
+}
+
+std::unique_ptr<OpChef> ReLU6ChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new ReLU6Chef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/ReLU6.h b/compiler/tflchef/core/src/Op/ReLU6.h
new file mode 100644
index 000000000..45a27cac2
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/ReLU6.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_RELU6_H__
+#define __OP_RELU6_H__
+
+#include "OpChef.h"
+
+class ReLU6Chef final : public OpChef
+{
+public:
+ explicit ReLU6Chef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_RELU6; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_NONE; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct ReLU6ChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_RELU6_H__
diff --git a/compiler/tflchef/core/src/Op/Reshape.cpp b/compiler/tflchef/core/src/Op/Reshape.cpp
new file mode 100644
index 000000000..99555e898
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Reshape.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Reshape.h"
+#include "Convert.h"
+
+#include <cassert>
+#include <vector>
+
+namespace
+{
+
+std::vector<int32_t> vector_new_shape(const tflchef::ReshapeOptions &options)
+{
+ std::vector<int32_t> shapes;
+
+ for (int i = 0; i < options.new_shape_size(); ++i)
+ {
+ shapes.push_back(options.new_shape(i));
+ }
+
+ return shapes;
+}
+
+} // namespace
+
+flatbuffers::Offset<void> ReshapeChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ auto &operation = (*_operation);
+
+ assert(operation.has_reshape_options());
+
+ auto options = operation.reshape_options();
+ auto shapes = vector_new_shape(options);
+ // Note: 'CreateVector' should be placed before 'options_builder'
+ // Read flatbuffers.h 'void NotNested()' for more information
+ auto fb_new_shape = fbb.CreateVector(shapes);
+
+ tflite::ReshapeOptionsBuilder options_builder{fbb};
+
+ options_builder.add_new_shape(fb_new_shape);
+
+ return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> ReshapeChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new ReshapeChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Reshape.h b/compiler/tflchef/core/src/Op/Reshape.h
new file mode 100644
index 000000000..78e91dada
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Reshape.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_RESHAPE_H__
+#define __OP_RESHAPE_H__
+
+#include "OpChef.h"
+
+class ReshapeChef final : public OpChef
+{
+public:
+ explicit ReshapeChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_RESHAPE; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_ReshapeOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct ReshapeChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_RESHAPE_H__
diff --git a/compiler/tflchef/core/src/Op/Rsqrt.cpp b/compiler/tflchef/core/src/Op/Rsqrt.cpp
new file mode 100644
index 000000000..fa837f6fa
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Rsqrt.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Rsqrt.h"
+
+flatbuffers::Offset<void> RsqrtChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ // No tflite option for Rsqrt. Use void.
+ return flatbuffers::Offset<void>();
+}
+
+std::unique_ptr<OpChef> RsqrtChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new RsqrtChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Rsqrt.h b/compiler/tflchef/core/src/Op/Rsqrt.h
new file mode 100644
index 000000000..657f51ccb
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Rsqrt.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_RSQRT_H__
+#define __OP_RSQRT_H__
+
+#include "OpChef.h"
+
+class RsqrtChef final : public OpChef
+{
+public:
+ explicit RsqrtChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_RSQRT; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_NONE; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct RsqrtChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_RSQRT_H__
diff --git a/compiler/tflchef/core/src/Op/Shape.cpp b/compiler/tflchef/core/src/Op/Shape.cpp
new file mode 100644
index 000000000..74b1894da
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Shape.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Shape.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> ShapeChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ auto &operation = (*_operation);
+
+ assert(operation.has_shape_options());
+
+ auto tflite_out_type = as_tflite_tensortype(operation.shape_options().out_type());
+
+ tflite::ShapeOptionsBuilder shape_options_builder{fbb};
+ shape_options_builder.add_out_type(tflite_out_type);
+
+ return shape_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> ShapeChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new ShapeChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Shape.h b/compiler/tflchef/core/src/Op/Shape.h
new file mode 100644
index 000000000..ddaeb1d95
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Shape.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_SHAPE_H__
+#define __OP_SHAPE_H__
+
+#include "OpChef.h"
+
+class ShapeChef final : public OpChef
+{
+public:
+ explicit ShapeChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_SHAPE; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_ShapeOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct ShapeChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_SHAPE_H__
diff --git a/compiler/tflchef/core/src/Op/Softmax.cpp b/compiler/tflchef/core/src/Op/Softmax.cpp
new file mode 100644
index 000000000..a554e0d81
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Softmax.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Softmax.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> SoftmaxChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ auto &operation = (*_operation);
+
+ assert(operation.has_softmax_options());
+
+ auto tflite_beta = operation.softmax_options().beta();
+
+ tflite::SoftmaxOptionsBuilder soft_options_builder{fbb};
+ soft_options_builder.add_beta(tflite_beta);
+
+ return soft_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> SoftmaxChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new SoftmaxChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Softmax.h b/compiler/tflchef/core/src/Op/Softmax.h
new file mode 100644
index 000000000..8b3f0ebf6
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Softmax.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_SOFTMAX_H__
+#define __OP_SOFTMAX_H__
+
+#include "OpChef.h"
+
+class SoftmaxChef final : public OpChef
+{
+public:
+ explicit SoftmaxChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_SOFTMAX; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_SoftmaxOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct SoftmaxChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_SOFTMAX_H__
diff --git a/compiler/tflchef/core/src/Op/Sqrt.cpp b/compiler/tflchef/core/src/Op/Sqrt.cpp
new file mode 100644
index 000000000..101a8130b
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Sqrt.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Sqrt.h"
+
+flatbuffers::Offset<void> SqrtChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ return flatbuffers::Offset<void>();
+}
+
+std::unique_ptr<OpChef> SqrtChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new SqrtChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Sqrt.h b/compiler/tflchef/core/src/Op/Sqrt.h
new file mode 100644
index 000000000..2f91a99e3
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Sqrt.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_SQRT_H__
+#define __OP_SQRT_H__
+
+#include "OpChef.h"
+
+class SqrtChef final : public OpChef
+{
+public:
+ explicit SqrtChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_SQRT; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_NONE; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct SqrtChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_SQRT_H__
diff --git a/compiler/tflchef/core/src/Op/Sub.cpp b/compiler/tflchef/core/src/Op/Sub.cpp
new file mode 100644
index 000000000..0ebb1d26c
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Sub.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Sub.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> SubChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ auto &operation = (*_operation);
+
+ assert(operation.has_sub_options());
+
+ auto tflite_activation = as_tflite_activation(operation.sub_options().activation());
+
+ tflite::SubOptionsBuilder sub_options_builder{fbb};
+ sub_options_builder.add_fused_activation_function(tflite_activation);
+
+ return sub_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> SubChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new SubChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Sub.h b/compiler/tflchef/core/src/Op/Sub.h
new file mode 100644
index 000000000..dfebc06c0
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Sub.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_SUB_H__
+#define __OP_SUB_H__
+
+#include "OpChef.h"
+
+class SubChef final : public OpChef
+{
+public:
+ explicit SubChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_SUB; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_SubOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct SubChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_SUB_H__
diff --git a/compiler/tflchef/core/src/Op/Tanh.cpp b/compiler/tflchef/core/src/Op/Tanh.cpp
new file mode 100644
index 000000000..c25cad8f0
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Tanh.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Tanh.h"
+
+flatbuffers::Offset<void> TanhChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ // No tflite option for Tanh. Use void.
+ return flatbuffers::Offset<void>();
+}
+
+std::unique_ptr<OpChef> TanhChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new TanhChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Tanh.h b/compiler/tflchef/core/src/Op/Tanh.h
new file mode 100644
index 000000000..f8f707a04
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Tanh.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_TANH_H__
+#define __OP_TANH_H__
+
+#include "OpChef.h"
+
+class TanhChef final : public OpChef
+{
+public:
+ explicit TanhChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_TANH; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_NONE; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct TanhChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_TANH_H__
diff --git a/compiler/tflchef/core/src/Op/Transpose.cpp b/compiler/tflchef/core/src/Op/Transpose.cpp
new file mode 100644
index 000000000..caae6cfa8
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Transpose.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Transpose.h"
+#include "Convert.h"
+
+#include <cassert>
+#include <vector>
+
+flatbuffers::Offset<void> TransposeChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ tflite::TransposeOptionsBuilder options_builder{fbb};
+ return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> TransposeChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new TransposeChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Transpose.h b/compiler/tflchef/core/src/Op/Transpose.h
new file mode 100644
index 000000000..bb30f7bc9
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Transpose.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_TRANSPOSE_H__
+#define __OP_TRANSPOSE_H__
+
+#include "OpChef.h"
+
+class TransposeChef final : public OpChef
+{
+public:
+ explicit TransposeChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_TRANSPOSE; }
+
+ tflite::BuiltinOptions type(void) const override
+ {
+ return tflite::BuiltinOptions_TransposeOptions;
+ }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct TransposeChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_TRANSPOSE_H__
diff --git a/compiler/tflchef/core/src/OpChef.def b/compiler/tflchef/core/src/OpChef.def
new file mode 100644
index 000000000..a25250c46
--- /dev/null
+++ b/compiler/tflchef/core/src/OpChef.def
@@ -0,0 +1,37 @@
+#ifndef OP_CHEF
+#error "Define OP first"
+#endif // OP_CHEF
+
+// Please keep the list in alphabetical order
+// OP_CHEF(NAME, FACTORY_CLASS)
+OP_CHEF(Abs, AbsChefFactory)
+OP_CHEF(Add, AddChefFactory)
+OP_CHEF(ArgMax, ArgMaxChefFactory)
+OP_CHEF(BatchToSpaceND, BatchToSpaceNDChefFactory)
+OP_CHEF(AveragePool2D, AveragePool2DChefFactory)
+OP_CHEF(Concatenation, ConcatenationChefFactory)
+OP_CHEF(Conv2D, Conv2DChefFactory)
+OP_CHEF(Cos, CosChefFactory)
+OP_CHEF(DepthwiseConv2D, DepthwiseConv2DChefFactory)
+OP_CHEF(Div, DivChefFactory)
+OP_CHEF(Equal, EqualChefFactory)
+OP_CHEF(Exp, ExpChefFactory)
+OP_CHEF(FloorDiv, FloorDivChefFactory)
+OP_CHEF(FullyConnected, FullyConnectedChefFactory)
+OP_CHEF(LogicalNot, LogicalNotChefFactory)
+OP_CHEF(LogicalOr, LogicalOrChefFactory)
+OP_CHEF(MaxPool2D, MaxPool2DChefFactory)
+OP_CHEF(Mean, MeanChefFactory)
+OP_CHEF(Mul, MulChefFactory)
+OP_CHEF(Pack, PackChefFactory)
+OP_CHEF(Pad, PadChefFactory)
+OP_CHEF(ReLU, ReLUChefFactory)
+OP_CHEF(ReLU6, ReLU6ChefFactory)
+OP_CHEF(Reshape, ReshapeChefFactory)
+OP_CHEF(Rsqrt, RsqrtChefFactory)
+OP_CHEF(Shape, ShapeChefFactory)
+OP_CHEF(Softmax, SoftmaxChefFactory)
+OP_CHEF(Sqrt, SqrtChefFactory)
+OP_CHEF(Sub, SubChefFactory)
+OP_CHEF(Tanh, TanhChefFactory)
+OP_CHEF(Transpose, TransposeChefFactory)
diff --git a/compiler/tflchef/core/src/OpChef.h b/compiler/tflchef/core/src/OpChef.h
new file mode 100644
index 000000000..0b7d9cf08
--- /dev/null
+++ b/compiler/tflchef/core/src/OpChef.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_CHEF_H__
+#define __OP_CHEF_H__
+
+#include <tflchef.pb.h>
+#include <mio/tflite/schema_generated.h>
+
+#include <memory>
+
+struct OpChef
+{
+ virtual ~OpChef() = default;
+
+ virtual tflite::BuiltinOperator code(void) const = 0;
+ virtual tflite::BuiltinOptions type(void) const = 0;
+ virtual flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const = 0;
+};
+
+struct OpChefFactory
+{
+ virtual ~OpChefFactory() = default;
+
+ virtual std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const = 0;
+};
+
+#endif // __OP_CHEF_H__
diff --git a/compiler/tflchef/core/src/OpChefs.h b/compiler/tflchef/core/src/OpChefs.h
new file mode 100644
index 000000000..65dbd4b92
--- /dev/null
+++ b/compiler/tflchef/core/src/OpChefs.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_CHEFS_H__
+#define __OP_CHEFS_H__
+
+#include "Op/Abs.h"
+#include "Op/Add.h"
+#include "Op/ArgMax.h"
+#include "Op/AveragePool2D.h"
+#include "Op/BatchToSpaceND.h"
+#include "Op/Concatenation.h"
+#include "Op/Conv2D.h"
+#include "Op/Cos.h"
+#include "Op/DepthwiseConv2D.h"
+#include "Op/Div.h"
+#include "Op/Equal.h"
+#include "Op/Exp.h"
+#include "Op/FloorDiv.h"
+#include "Op/FullyConnected.h"
+#include "Op/LogicalOr.h"
+#include "Op/LogicalNot.h"
+#include "Op/MaxPool2D.h"
+#include "Op/Mean.h"
+#include "Op/Mul.h"
+#include "Op/Pack.h"
+#include "Op/Pad.h"
+#include "Op/ReLU.h"
+#include "Op/ReLU6.h"
+#include "Op/Reshape.h"
+#include "Op/Rsqrt.h"
+#include "Op/Shape.h"
+#include "Op/Softmax.h"
+#include "Op/Sqrt.h"
+#include "Op/Sub.h"
+#include "Op/Tanh.h"
+#include "Op/Transpose.h"
+
+#endif // __OP_CHEFS_H__
diff --git a/compiler/tflchef/proto/CMakeLists.txt b/compiler/tflchef/proto/CMakeLists.txt
new file mode 100644
index 000000000..c504b11ae
--- /dev/null
+++ b/compiler/tflchef/proto/CMakeLists.txt
@@ -0,0 +1,5 @@
+Protobuf_Generate(TFLCHEF_PROTO "${CMAKE_CURRENT_BINARY_DIR}/generated" "${CMAKE_CURRENT_SOURCE_DIR}" "tflchef.proto")
+
+add_library(tflchef_proto STATIC ${TFLCHEF_PROTO_SOURCES})
+target_include_directories(tflchef_proto PUBLIC ${TFLCHEF_PROTO_INCLUDE_DIRS})
+target_link_libraries(tflchef_proto libprotobuf)
diff --git a/compiler/tflchef/proto/tflchef.proto b/compiler/tflchef/proto/tflchef.proto
new file mode 100644
index 000000000..486aa8a67
--- /dev/null
+++ b/compiler/tflchef/proto/tflchef.proto
@@ -0,0 +1,232 @@
+syntax = "proto2";
+
+package tflchef;
+
+//
+// Initial version
+// - Our initial version
+//
+// Version 1
+// - Backward compatible with Initial version
+// - Added Graph to represent sub graphs
+// - Added name, version(default as 1), graph in ModelRecipe
+//
+
+// This enum value corresponds to TensorType in TensorFlow Lite schema
+enum TensorType {
+ FLOAT32 = 0;
+ INT32 = 2;
+ UINT8 = 3;
+ INT64 = 4;
+ BOOL = 6;
+}
+
+message TensorShape {
+ repeated uint32 dim = 3;
+}
+
+message TensorFiller {
+ optional string tag = 1;
+ repeated string arg = 2;
+}
+
+message TensorQuantization {
+ repeated float min = 1;
+ repeated float max = 2;
+ repeated float scale = 3;
+ repeated int64 zero_point = 4;
+}
+
+message Operand {
+ optional string name = 1;
+ optional TensorType type = 2;
+ optional TensorShape shape = 3;
+ optional TensorFiller filler = 4;
+ optional TensorQuantization quant = 5;
+}
+
+// This enum value corresponds to Padding in TensorFlow Lite schema
+enum Padding {
+ SAME = 0;
+ VALID = 1;
+}
+
+// This enum value corresponds to ActivationFunctionType in TensorFlow Lite schema
+enum Activation {
+ NONE = 0;
+ RELU = 1;
+ RELU6 = 3;
+}
+
+message Conv2DOptions
+{
+ optional Padding padding = 1 [default = VALID];
+ optional int32 stride_w = 2 [default = 1];
+ optional int32 stride_h = 3 [default = 1];
+ optional Activation activation = 4 [default = NONE];
+}
+
+message Pool2DOptions {
+ optional Padding padding = 1 [default = VALID];
+ optional int32 stride_w = 2 [default = 1];
+ optional int32 stride_h = 3 [default = 1];
+ optional int32 filter_width = 4 [default = 1];
+ optional int32 filter_height = 5 [ default = 1];
+ optional Activation activation = 6 [default = NONE];
+}
+
+message ConcatenationOptions {
+ optional int32 axis = 1 [default = 0];
+ optional Activation activation = 2 [default = NONE];
+}
+
+message ReshapeOptions {
+ repeated int32 new_shape = 1;
+}
+
+message DepthwiseConv2DOptions
+{
+ optional Padding padding = 1 [default = VALID];
+ optional int32 stride_w = 2 [default = 1];
+ optional int32 stride_h = 3 [default = 1];
+ optional int32 depth_multiplier = 4 [default = 1];
+ optional Activation activation = 5 [default = NONE];
+}
+
+message SubOptions {
+ optional Activation activation = 1 [default = NONE];
+}
+
+message DivOptions {
+ optional Activation activation = 1 [default = NONE];
+}
+
+message FloorDivOptions {
+ // None
+}
+
+message FullyConnectedOptions {
+ optional Activation activation = 1 [default = NONE];
+}
+
+message AddOptions {
+ optional Activation activation = 1 [default = NONE];
+}
+
+message ArgMaxOptions {
+ optional TensorType output_type = 1 [default = INT64];
+}
+
+message PackOptions {
+ optional int32 values_count = 1;
+ optional int32 axis = 2 [default = 0];
+}
+
+message PadOptions {
+ // None
+}
+
+message SoftmaxOptions {
+ optional float beta = 1 [default = 0.0];
+}
+
+message MulOptions {
+ optional Activation activation = 1 [default = NONE];
+}
+
+message ReducerOptions {
+ optional bool keep_dims = 1 [ default = false ];
+}
+
+message LogicalOrOptions {
+ // None
+}
+
+message LogicalNotOptions {
+ // None
+}
+
+message LogicalAndOptions {
+ // None
+}
+
+message TransposeOptions {
+ // None
+}
+
+message AbsOptions {
+ // None
+}
+
+message CosOptions {
+ // None
+}
+
+message EqualOptions {
+ // None
+}
+
+message ShapeOptions {
+ optional TensorType out_type = 1 [default = INT32];
+}
+
+message BatchToSpaceNDOptions {
+ // None
+}
+
+message ExpOptions {
+ // None
+}
+
+message Operation {
+ optional string type = 1;
+ repeated string input = 2;
+ repeated string output = 3;
+
+ optional Conv2DOptions conv2d_options = 100;
+ optional Pool2DOptions averagepool2d_options = 101;
+ optional ConcatenationOptions concatenation_options = 102;
+ optional Pool2DOptions maxpool2d_options = 103;
+ optional ReshapeOptions reshape_options = 104;
+ optional DepthwiseConv2DOptions depthwiseconv2d_options = 105;
+ optional SubOptions sub_options = 106;
+ optional DivOptions div_options = 107;
+ optional FullyConnectedOptions fullyconnected_options = 108;
+ optional AddOptions add_options = 109;
+ optional ArgMaxOptions argmax_options = 110;
+ optional PadOptions pad_options = 111;
+ optional SoftmaxOptions softmax_options = 112;
+ optional MulOptions mul_options = 113;
+ optional ReducerOptions mean_options = 114;
+ optional TransposeOptions transpose_options = 115;
+ optional PackOptions pack_options = 116;
+ optional LogicalOrOptions logical_or_options = 117;
+ optional LogicalNotOptions logical_not_options = 118;
+ optional LogicalAndOptions logical_and_options = 119;
+ optional AbsOptions abs_options = 120;
+ optional CosOptions cos_options = 121;
+ optional EqualOptions equal_options = 122;
+ optional ShapeOptions shape_options = 123;
+ optional FloorDivOptions floordiv_options = 124;
+ optional BatchToSpaceNDOptions batch_to_space_options = 125;
+ optional ExpOptions exp_options = 126;
+}
+
+// For additional subgraphs
+message Graph {
+ repeated Operand operand = 1;
+ repeated Operation operation = 2;
+ repeated string input = 3;
+ repeated string output = 4;
+ optional string name = 5;
+}
+
+message ModelRecipe {
+ repeated Operand operand = 1;
+ repeated Operation operation = 2;
+ repeated string input = 3;
+ repeated string output = 4;
+ optional string name = 5;
+ optional uint32 version = 6 [default = 1];
+ repeated Graph graph = 7;
+}
diff --git a/compiler/tflchef/requires.cmake b/compiler/tflchef/requires.cmake
new file mode 100644
index 000000000..3c5bb197f
--- /dev/null
+++ b/compiler/tflchef/requires.cmake
@@ -0,0 +1,4 @@
+require("nnkit")
+require("cwrap")
+require("mio-tflite")
+require("safemain")
diff --git a/compiler/tflchef/tests/CMakeLists.txt b/compiler/tflchef/tests/CMakeLists.txt
new file mode 100644
index 000000000..5c4dff012
--- /dev/null
+++ b/compiler/tflchef/tests/CMakeLists.txt
@@ -0,0 +1,129 @@
+if(NOT TARGET nnkit-run)
+ return()
+endif(NOT TARGET nnkit-run)
+
+if(NOT TARGET nnkit_tflite_backend)
+ return()
+endif(NOT TARGET nnkit_tflite_backend)
+
+nncc_find_resource(TensorFlowLiteRecipes)
+set(TENSORFLOWLITERECIPES_DIR "${TensorFlowLiteRecipes_DIR}")
+
+file(GLOB RECIPES RELATIVE ${TENSORFLOWLITERECIPES_DIR} "${TENSORFLOWLITERECIPES_DIR}/*/test.recipe")
+
+foreach(RECIPE IN ITEMS ${RECIPES})
+ get_filename_component(RECIPE_PREFIX ${RECIPE} DIRECTORY)
+
+ set(RECIPE_SOURCE_FILE "${RECIPE_PREFIX}.recipe")
+ set(RECIPE_OUTPUT_FILE "${RECIPE_PREFIX}.tflite")
+
+ # Copy .recipe
+ add_custom_command(OUTPUT ${RECIPE_SOURCE_FILE}
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different
+ "${TENSORFLOWLITERECIPES_DIR}/${RECIPE}" ${RECIPE_SOURCE_FILE}
+ DEPENDS "${TENSORFLOWLITERECIPES_DIR}/${RECIPE}"
+ COMMENT "Generating ${RECIPE_SOURCE_FILE}")
+
+ # Generate .tflite
+ add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE}
+ COMMAND tflchef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
+ DEPENDS tflchef-file ${RECIPE_SOURCE_FILE}
+ COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
+
+ list(APPEND TESTS ${RECIPE_PREFIX})
+ list(APPEND TESTFILES ${RECIPE_OUTPUT_FILE})
+endforeach(RECIPE)
+
+# Add local files
+file(GLOB RECIPES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*/test.recipe")
+
+foreach(RECIPE IN ITEMS ${RECIPES})
+ get_filename_component(RECIPE_PREFIX ${RECIPE} DIRECTORY)
+
+ set(RECIPE_SOURCE_FILE "${RECIPE_PREFIX}.recipe")
+ set(RECIPE_OUTPUT_FILE "${RECIPE_PREFIX}.tflite")
+
+ # Copy .recipe
+ add_custom_command(OUTPUT ${RECIPE_SOURCE_FILE}
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different
+ "${CMAKE_CURRENT_SOURCE_DIR}/${RECIPE}" ${RECIPE_SOURCE_FILE}
+ DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RECIPE}"
+ COMMENT "Generating ${RECIPE_SOURCE_FILE}")
+
+ # Generate .tflite
+ add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE}
+ COMMAND tflchef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
+ DEPENDS tflchef-file ${RECIPE_SOURCE_FILE}
+ COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
+
+ list(APPEND TESTS ${RECIPE_PREFIX})
+ list(APPEND TESTFILES ${RECIPE_OUTPUT_FILE})
+endforeach(RECIPE)
+
+# Test tflchef-reverse
+file(GLOB GEN_TFLITEFILES RELATIVE ${TENSORFLOWLITERECIPES_DIR} "${TENSORFLOWLITERECIPES_DIR}/*/test.reverse")
+# Note: While in development, tflchef-reverse may not handle the operator.
+# To separate this linkage scan empty test.reverse for test targets for tflchef-reverse.
+
+foreach(TFLITEFILE IN ITEMS ${GEN_TFLITEFILES})
+ get_filename_component(TFLITE_PREFIX ${TFLITEFILE} DIRECTORY)
+
+ # file from above tflchef-file block
+ # use tflite file as input of tflchef-reverse generated from tflchef-file
+ set(RECIPE_OUTPUT_FILE "${TFLITE_PREFIX}.tflite")
+ set(RECIPE_GEN_OUTPUT_FILE "${TFLITE_PREFIX}.gen.recipe")
+ set(RECIPE_GEN_OUTPUT_FILE2 "${TFLITE_PREFIX}.gen.tflite")
+
+ # Generate .gen.recipe from generated .tflite
+ add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE}
+ COMMAND tflchef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
+ DEPENDS tflchef-reverse ${RECIPE_OUTPUT_FILE}
+ COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}")
+
+ # now we are going to generate .gen.tflite from .gen.recipe
+ # to check generated .gen.recipe file is correct by using it.
+ # as weight values may be different, binary comparision is not acceptable.
+ add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2}
+ COMMAND tflchef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
+ DEPENDS tflchef-file ${RECIPE_GEN_OUTPUT_FILE}
+ COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}")
+
+ list(APPEND TESTS ${TFLITE_PREFIX}.gen)
+ list(APPEND TESTFILES ${RECIPE_GEN_OUTPUT_FILE2})
+endforeach(TFLITEFILE)
+
+# Test local tflchef-reverse
+file(GLOB GEN_TFLITEFILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*/test.reverse")
+
+foreach(TFLITEFILE IN ITEMS ${GEN_TFLITEFILES})
+ get_filename_component(TFLITE_PREFIX ${TFLITEFILE} DIRECTORY)
+
+ set(RECIPE_OUTPUT_FILE "${TFLITE_PREFIX}.tflite")
+ set(RECIPE_GEN_OUTPUT_FILE "${TFLITE_PREFIX}.gen.recipe")
+ set(RECIPE_GEN_OUTPUT_FILE2 "${TFLITE_PREFIX}.gen.tflite")
+
+ # Generate .gen.recipe from generated .tflite
+ add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE}
+ COMMAND tflchef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
+ DEPENDS tflchef-reverse ${RECIPE_OUTPUT_FILE}
+ COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}")
+
+ add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2}
+ COMMAND tflchef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
+ DEPENDS tflchef-file ${RECIPE_GEN_OUTPUT_FILE}
+ COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}")
+
+ list(APPEND TESTS ${TFLITE_PREFIX}.gen)
+ list(APPEND TESTFILES ${RECIPE_GEN_OUTPUT_FILE2})
+endforeach(TFLITEFILE)
+
+# Add a dummy target to create a target-level dependency.
+# TODO Find a way to create a dependency between tflchef_test and generated testfiles.
+add_custom_target(tflchef_testfiles ALL DEPENDS ${TESTFILES})
+
+# Using mio_tflite_validate for temporary as it only calls flatbuffer validate
+# TODO do testing with running the model with runtime/interpreter
+add_test(NAME tflchef_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/runvalidate.sh"
+ $<TARGET_FILE:mio_tflite_validate>
+ ${TESTS})
diff --git a/compiler/tflchef/tests/explicit_datachef/test.recipe b/compiler/tflchef/tests/explicit_datachef/test.recipe
new file mode 100644
index 000000000..bd5213f39
--- /dev/null
+++ b/compiler/tflchef/tests/explicit_datachef/test.recipe
@@ -0,0 +1,28 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 10 }
+}
+operand {
+ name: "shape"
+ type: INT32
+ shape { dim: 2 }
+ filler { tag: "explicit" arg: "-1" arg: "10" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 10 }
+}
+operation {
+ type: "Reshape"
+ reshape_options {
+ new_shape: -1
+ new_shape: 10
+ }
+ input: "ifm"
+ input: "shape"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/compiler/tflchef/tests/explicit_datachef/test.reverse b/compiler/tflchef/tests/explicit_datachef/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/tflchef/tests/explicit_datachef/test.reverse
diff --git a/compiler/tflchef/tests/multisubgraph/test.recipe b/compiler/tflchef/tests/multisubgraph/test.recipe
new file mode 100644
index 000000000..b55af1337
--- /dev/null
+++ b/compiler/tflchef/tests/multisubgraph/test.recipe
@@ -0,0 +1,72 @@
+version: 1
+
+graph {
+ operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ }
+ operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ }
+ operation {
+ type: "ReLU"
+ input: "ifm"
+ output: "ofm"
+ }
+ input: "ifm"
+ output: "ofm"
+ name: "Sub_01"
+}
+
+graph {
+ operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ }
+ operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ }
+ operation {
+ type: "ReLU6"
+ input: "ifm"
+ output: "ofm"
+ }
+ input: "ifm"
+ output: "ofm"
+ name: "Sub_01"
+}
+
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Add"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
+name: "Main"
diff --git a/compiler/tflchef/tests/readme/test.recipe b/compiler/tflchef/tests/readme/test.recipe
new file mode 100644
index 000000000..bc41a3fc0
--- /dev/null
+++ b/compiler/tflchef/tests/readme/test.recipe
@@ -0,0 +1,44 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "constant"
+ arg: "1.1"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+input: "ker"
+output: "ofm"
diff --git a/compiler/tflchef/tests/readme/test.reverse b/compiler/tflchef/tests/readme/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/tflchef/tests/readme/test.reverse
diff --git a/compiler/tflchef/tests/runall.sh b/compiler/tflchef/tests/runall.sh
new file mode 100755
index 000000000..c697f1bc5
--- /dev/null
+++ b/compiler/tflchef/tests/runall.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+if [[ $# -le 3 ]]; then
+ echo "USAGE: $0 [nnkit-run path] [tflite backend path] [working directory] [prefix 0] [prefix 1] ..."
+ exit 255
+fi
+
+NNKIT_RUN_PATH="$1"; shift
+TFLITE_BACKEND_PATH="$1"; shift
+WORKDIR="$1"; shift
+
+echo "-- Found nnkit-run: ${NNKIT_RUN_PATH}"
+echo "-- Found tflite backend: ${TFLITE_BACKEND_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [[ $# -ne 0 ]]; do
+ PREFIX="$1"; shift
+
+ TESTED+=("${PREFIX}")
+
+ PASSED_TAG="${PREFIX}.passed"
+
+ rm -f "${PASSED_TAG}"
+
+ cat > "${PREFIX}.log" <(
+ exec 2>&1
+
+ echo "'${NNKIT_RUN_PATH}' --backend '${TFLITE_BACKEND_PATH}' --backend-arg '${PREFIX}.tflite'"
+ "${NNKIT_RUN_PATH}" --backend "${TFLITE_BACKEND_PATH}" --backend-arg "${PREFIX}.tflite"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("$PREFIX")
+ else
+ FAILED+=("$PREFIX")
+ fi
+done
+popd
+
+echo "SUMMARY: ${#PASSED[@]} PASS AND ${#FAILED[@]} FAIL AMONG ${#TESTED[@]} TESTS"
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+exit 0
diff --git a/compiler/tflchef/tests/runvalidate.sh b/compiler/tflchef/tests/runvalidate.sh
new file mode 100755
index 000000000..a1453b399
--- /dev/null
+++ b/compiler/tflchef/tests/runvalidate.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+if [[ $# -le 3 ]]; then
+ echo "USAGE: $0 [mio_tflite_validate path] [prefix 0] "
+ exit 255
+fi
+
+MIO_TFLITE_VALIDATE_PATH="$1"; shift
+
+echo "-- Found mio_tflite_validate: ${NNKIT_RUN_PATH}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [[ $# -ne 0 ]]; do
+ PREFIX="$1"; shift
+
+ TESTED+=("${PREFIX}")
+
+ PASSED_TAG="${PREFIX}.passed"
+
+ rm -f "${PASSED_TAG}"
+
+ cat > "${PREFIX}.log" <(
+ exec 2>&1
+
+ echo "'${MIO_TFLITE_VALIDATE_PATH}' '${PREFIX}.tflite'"
+ "${MIO_TFLITE_VALIDATE_PATH}" "${PREFIX}.tflite"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("$PREFIX")
+ else
+ FAILED+=("$PREFIX")
+ fi
+done
+popd
+
+echo "SUMMARY: ${#PASSED[@]} PASS AND ${#FAILED[@]} FAIL AMONG ${#TESTED[@]} TESTS"
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+exit 0
diff --git a/compiler/tflchef/tflite/CMakeLists.txt b/compiler/tflchef/tflite/CMakeLists.txt
new file mode 100644
index 000000000..645c16144
--- /dev/null
+++ b/compiler/tflchef/tflite/CMakeLists.txt
@@ -0,0 +1,9 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_library(tflchef_tflite STATIC ${SOURCES})
+target_include_directories(tflchef_tflite PUBLIC include)
+target_include_directories(tflchef_tflite PRIVATE src)
+target_link_libraries(tflchef_tflite tflchef_proto)
+target_link_libraries(tflchef_tflite mio_tflite)
+target_link_libraries(tflchef_tflite stdex)
+target_link_libraries(tflchef_tflite cwrap)
diff --git a/compiler/tflchef/tflite/include/tflchef/RawModel.h b/compiler/tflchef/tflite/include/tflchef/RawModel.h
new file mode 100644
index 000000000..a8c8fefb7
--- /dev/null
+++ b/compiler/tflchef/tflite/include/tflchef/RawModel.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __RAW_MODEL_H__
+#define __RAW_MODEL_H__
+
+#include <mio/tflite/schema_generated.h>
+
+namespace tflchef
+{
+
+struct RawModel
+{
+ virtual ~RawModel() = default;
+
+ virtual const ::tflite::Model *model(void) const = 0;
+};
+
+/**
+ * @brief Load TensorFlow Lite model (as a RawModel) from a given path
+ *
+ * @note May return a nullptr
+ */
+std::unique_ptr<RawModel> load_tflite(const std::string &path);
+
+} // namespace tflchef
+
+#endif // __RAW_MODEL_H__
diff --git a/compiler/tflchef/tflite/include/tflchef/RecipeChef.h b/compiler/tflchef/tflite/include/tflchef/RecipeChef.h
new file mode 100644
index 000000000..2d292c3d5
--- /dev/null
+++ b/compiler/tflchef/tflite/include/tflchef/RecipeChef.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __RECIPE_CHEF_H__
+#define __RECIPE_CHEF_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <tflchef.pb.h>
+
+#include <memory>
+#include <string>
+
+namespace tflchef
+{
+
+/**
+ * @brief Create ModelRecipe from tflite::Model
+ */
+std::unique_ptr<ModelRecipe> generate_recipe(const tflite::Model *model);
+
+/**
+ * @brief Write ModelRecipe to file with given name
+ */
+bool write_recipe(const std::string &filename, std::unique_ptr<ModelRecipe> &recipe);
+
+} // namespace tflchef
+
+#endif // __RECIPE_CHEF_H__
diff --git a/compiler/tflchef/tflite/src/Convert.cpp b/compiler/tflchef/tflite/src/Convert.cpp
new file mode 100644
index 000000000..dc60e0087
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Convert.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+tflchef::TensorType as_tflchef_type(const tflite::TensorType type)
+{
+ switch (type)
+ {
+ case tflite::TensorType_FLOAT32:
+ return tflchef::FLOAT32;
+ case tflite::TensorType_INT32:
+ return tflchef::INT32;
+ case tflite::TensorType_INT64:
+ return tflchef::INT64;
+ case tflite::TensorType_UINT8:
+ return tflchef::UINT8;
+ case tflite::TensorType_BOOL:
+ return tflchef::BOOL;
+ // TODO handle other types
+ // TensorType_FLOAT16
+ // TensorType_STRING
+ // TensorType_INT16
+ // TensorType_COMPLEX64
+ default:
+ throw std::runtime_error{"unsupported tensor type"};
+ }
+}
+
+tflchef::Activation as_tflchef_activation(const tflite::ActivationFunctionType type)
+{
+ switch (type)
+ {
+ case tflite::ActivationFunctionType_NONE:
+ return tflchef::NONE;
+ case tflite::ActivationFunctionType_RELU:
+ return tflchef::RELU;
+ case tflite::ActivationFunctionType_RELU6:
+ return tflchef::RELU6;
+ // TODO handle other types
+ // ActivationFunctionType_RELU_N1_TO_1
+ // ActivationFunctionType_TANH
+ // ActivationFunctionType_SIGN_BIT
+ default:
+ throw std::runtime_error{"unsupported activation type"};
+ }
+}
+
+tflchef::Padding as_tflchef_padding(const tflite::Padding padding)
+{
+ switch (padding)
+ {
+ case tflite::Padding_SAME:
+ return tflchef::SAME;
+ case tflite::Padding_VALID:
+ return tflchef::VALID;
+ default:
+ throw std::runtime_error{"unsupported padding"};
+ }
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Convert.h b/compiler/tflchef/tflite/src/Convert.h
new file mode 100644
index 000000000..8623e7b78
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Convert.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONVERT_H__
+#define __CONVERT_H__
+
+#include <mio/tflite/schema_generated.h>
+
+#include <tflchef.pb.h>
+
+namespace tflchef
+{
+
+tflchef::TensorType as_tflchef_type(const tflite::TensorType type);
+tflchef::Activation as_tflchef_activation(const tflite::ActivationFunctionType type);
+tflchef::Padding as_tflchef_padding(const tflite::Padding padding);
+
+/**
+ * @brief extract buffer data to std::vector<DT>
+ */
+template <typename DT> std::vector<DT> extract_buffer(const tflite::Buffer *buffer)
+{
+ auto buffer_length = buffer->data()->size();
+ auto num_elements = buffer_length / sizeof(DT);
+ std::vector<DT> result(num_elements);
+ std::memcpy(result.data(), buffer->data()->data(), buffer_length);
+ return result;
+}
+
+template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
+{
+ std::vector<T> ret(flat_array->Length());
+ for (uint32_t i = 0; i < flat_array->Length(); i++)
+ {
+ ret[i] = flat_array->Get(i);
+ }
+ return ret;
+}
+
+} // namespace tflchef
+
+#endif // __CONVERT_H__
diff --git a/compiler/tflchef/tflite/src/Op/Abs.cpp b/compiler/tflchef/tflite/src/Op/Abs.cpp
new file mode 100644
index 000000000..7d769e344
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Abs.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Abs.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpAbs::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpAbs::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("Abs");
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Abs.h b/compiler/tflchef/tflite/src/Op/Abs.h
new file mode 100644
index 000000000..d99b0d593
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Abs.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_ABS_H__
+#define __TFLITE_OP_ABS_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for abs
+ */
+class TFliteOpAbs : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_ABS_H__
diff --git a/compiler/tflchef/tflite/src/Op/Add.cpp b/compiler/tflchef/tflite/src/Op/Add.cpp
new file mode 100644
index 000000000..7e669ecc9
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Add.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Add.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpAdd::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpAdd::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto op_params = op->builtin_options_as_AddOptions();
+ assert(op_params != nullptr);
+
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("Add");
+
+ auto op_options = operation->mutable_add_options();
+
+ op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function()));
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Add.h b/compiler/tflchef/tflite/src/Op/Add.h
new file mode 100644
index 000000000..49d945f8b
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Add.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_ADD_H__
+#define __TFLITE_OP_ADD_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for add
+ */
+class TFliteOpAdd : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_ADD_H__
diff --git a/compiler/tflchef/tflite/src/Op/ArgMax.cpp b/compiler/tflchef/tflite/src/Op/ArgMax.cpp
new file mode 100644
index 000000000..f4d1c5e66
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/ArgMax.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ArgMax.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpArgMax::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // filler for second input, argmax/dim
+ const auto &inputs = *op->inputs();
+
+ const tflite::Tensor *dim_tensor = import->tensors()->Get(inputs[1]);
+ assert(dim_tensor->type() == tflite::TensorType::TensorType_INT32);
+ const tflite::Buffer *buffer = import->buffers()->Get(dim_tensor->buffer());
+ auto vec = extract_buffer<int32_t>(buffer);
+ import->set_tensor_filler(inputs[1], vec);
+}
+
+tflchef::Operation *TFliteOpArgMax::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto op_params = op->builtin_options_as_ArgMaxOptions();
+ assert(op_params != nullptr);
+
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("ArgMax");
+
+ auto op_options = operation->mutable_argmax_options();
+
+ op_options->set_output_type(as_tflchef_type(op_params->output_type()));
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/ArgMax.h b/compiler/tflchef/tflite/src/Op/ArgMax.h
new file mode 100644
index 000000000..30068ecf2
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/ArgMax.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_ARGMAX_H__
+#define __TFLITE_OP_ARGMAX_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for ArgMax
+ */
+class TFliteOpArgMax : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_ARGMAX_H__
diff --git a/compiler/tflchef/tflite/src/Op/AveragePool2D.cpp b/compiler/tflchef/tflite/src/Op/AveragePool2D.cpp
new file mode 100644
index 000000000..1f269e45e
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/AveragePool2D.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AveragePool2D.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpAveragePool2D::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpAveragePool2D::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto op_params = op->builtin_options_as_Pool2DOptions();
+ assert(op_params != nullptr);
+
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("AveragePool2D");
+
+ auto op_options = operation->mutable_averagepool2d_options();
+
+ op_options->set_padding(as_tflchef_padding(op_params->padding()));
+ op_options->set_stride_h(op_params->stride_h());
+ op_options->set_stride_w(op_params->stride_w());
+ op_options->set_filter_height(op_params->filter_height());
+ op_options->set_filter_width(op_params->filter_width());
+ op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function()));
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/AveragePool2D.h b/compiler/tflchef/tflite/src/Op/AveragePool2D.h
new file mode 100644
index 000000000..f9e9fb254
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/AveragePool2D.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_AVERAGEPOOL2D_H__
+#define __TFLITE_OP_AVERAGEPOOL2D_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for AveragePool2D
+ */
+class TFliteOpAveragePool2D : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_AVERAGEPOOL2D_H__
diff --git a/compiler/tflchef/tflite/src/Op/BatchToSpaceND.cpp b/compiler/tflchef/tflite/src/Op/BatchToSpaceND.cpp
new file mode 100644
index 000000000..d5d9606d1
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/BatchToSpaceND.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BatchToSpaceND.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpBatchToSpaceND::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // filler for second, third input
+ const auto &inputs = *op->inputs();
+
+ const tflite::Tensor *tensor = import->tensors()->Get(inputs[1]);
+ assert(tensor->type() == tflite::TensorType::TensorType_INT32);
+ const tflite::Buffer *buffer = import->buffers()->Get(tensor->buffer());
+ auto vec = extract_buffer<int32_t>(buffer);
+ import->set_tensor_filler(inputs[1], vec);
+
+ tensor = import->tensors()->Get(inputs[2]);
+ assert(tensor->type() == tflite::TensorType::TensorType_INT32);
+ buffer = import->buffers()->Get(tensor->buffer());
+ vec = extract_buffer<int32_t>(buffer);
+ import->set_tensor_filler(inputs[2], vec);
+}
+
+tflchef::Operation *TFliteOpBatchToSpaceND::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("BatchToSpaceND");
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/BatchToSpaceND.h b/compiler/tflchef/tflite/src/Op/BatchToSpaceND.h
new file mode 100644
index 000000000..ae2114c97
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/BatchToSpaceND.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_BATCHTOSPACEND_H__
+#define __TFLITE_OP_BATCHTOSPACEND_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for BatchToSpaceND
+ */
+class TFliteOpBatchToSpaceND : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_BATCHTOSPACEND_H__
diff --git a/compiler/tflchef/tflite/src/Op/Concatenation.cpp b/compiler/tflchef/tflite/src/Op/Concatenation.cpp
new file mode 100644
index 000000000..126402f14
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Concatenation.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Concatenation.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpConcatenation::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpConcatenation::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto op_params = op->builtin_options_as_ConcatenationOptions();
+ assert(op_params != nullptr);
+
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("Concatenation");
+
+ auto op_options = operation->mutable_concatenation_options();
+
+ op_options->set_axis(op_params->axis());
+ op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function()));
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Concatenation.h b/compiler/tflchef/tflite/src/Op/Concatenation.h
new file mode 100644
index 000000000..4a7ea5791
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Concatenation.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_CONCATENATION_H__
+#define __TFLITE_OP_CONCATENATION_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for Concatenation
+ */
+class TFliteOpConcatenation : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_CONCATENATION_H__
diff --git a/compiler/tflchef/tflite/src/Op/Conv2D.cpp b/compiler/tflchef/tflite/src/Op/Conv2D.cpp
new file mode 100644
index 000000000..5d48ee24f
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Conv2D.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Conv2D.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpConv2D::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+
+ bool hasBias = (inputs.size() == 3);
+ assert(inputs.size() == 2 || hasBias);
+
+ import->set_tensor_filler(inputs.at(1)); // kernel
+ if (hasBias)
+ import->set_tensor_filler(inputs.at(2)); // bias
+}
+
+tflchef::Operation *TFliteOpConv2D::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto op_params = op->builtin_options_as_Conv2DOptions();
+ assert(op_params != nullptr);
+
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("Conv2D");
+
+ auto op_options = operation->mutable_conv2d_options();
+
+ op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function()));
+ op_options->set_stride_h(op_params->stride_h());
+ op_options->set_stride_w(op_params->stride_w());
+ op_options->set_padding(as_tflchef_padding(op_params->padding()));
+ // TODO support dilation
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Conv2D.h b/compiler/tflchef/tflite/src/Op/Conv2D.h
new file mode 100644
index 000000000..0216e9ce9
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Conv2D.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_CONV2D_H__
+#define __TFLITE_OP_CONV2D_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for Conv2D
+ */
+class TFliteOpConv2D : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_CONV2D_H__
diff --git a/compiler/tflchef/tflite/src/Op/Cos.cpp b/compiler/tflchef/tflite/src/Op/Cos.cpp
new file mode 100644
index 000000000..9f2c49d49
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Cos.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Cos.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpCos::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpCos::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("Cos");
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Cos.h b/compiler/tflchef/tflite/src/Op/Cos.h
new file mode 100644
index 000000000..8f3dbe3a6
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Cos.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_COS_H__
+#define __TFLITE_OP_COS_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for Cos
+ */
+class TFliteOpCos : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_COS_H__
diff --git a/compiler/tflchef/tflite/src/Op/DepthwiseConv2D.cpp b/compiler/tflchef/tflite/src/Op/DepthwiseConv2D.cpp
new file mode 100644
index 000000000..b19f9330f
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/DepthwiseConv2D.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthwiseConv2D.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpDepthwiseConv2D::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+
+ bool hasBias = (inputs.size() == 3);
+ assert(inputs.size() == 2 || hasBias);
+
+ import->set_tensor_filler(inputs.at(1)); // kernel
+ if (hasBias)
+ import->set_tensor_filler(inputs.at(2)); // bias
+}
+
+tflchef::Operation *TFliteOpDepthwiseConv2D::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto op_params = op->builtin_options_as_DepthwiseConv2DOptions();
+ assert(op_params != nullptr);
+
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("DepthwiseConv2D");
+
+ auto op_options = operation->mutable_depthwiseconv2d_options();
+
+ op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function()));
+ op_options->set_stride_h(op_params->stride_h());
+ op_options->set_stride_w(op_params->stride_w());
+ op_options->set_depth_multiplier(op_params->depth_multiplier());
+ // TODO support dilation
+ // op_params->dilation_w_factor()
+ // op_params->dilation_h_factor()
+ op_options->set_padding(as_tflchef_padding(op_params->padding()));
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/DepthwiseConv2D.h b/compiler/tflchef/tflite/src/Op/DepthwiseConv2D.h
new file mode 100644
index 000000000..c172536b4
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/DepthwiseConv2D.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_DEPTHWISECONV2D_H__
+#define __TFLITE_OP_DEPTHWISECONV2D_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for DepthwiseConv2D
+ */
+class TFliteOpDepthwiseConv2D : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_DEPTHWISECONV2D_H__
diff --git a/compiler/tflchef/tflite/src/Op/Div.cpp b/compiler/tflchef/tflite/src/Op/Div.cpp
new file mode 100644
index 000000000..0fd87dc05
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Div.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Div.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpDiv::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpDiv::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto op_params = op->builtin_options_as_DivOptions();
+ assert(op_params != nullptr);
+
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("Div");
+
+ auto op_options = operation->mutable_div_options();
+
+ op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function()));
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Div.h b/compiler/tflchef/tflite/src/Op/Div.h
new file mode 100644
index 000000000..254a4cd99
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Div.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_DIV_H__
+#define __TFLITE_OP_DIV_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for division
+ */
+class TFliteOpDiv : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_DIV_H__
diff --git a/compiler/tflchef/tflite/src/Op/Equal.cpp b/compiler/tflchef/tflite/src/Op/Equal.cpp
new file mode 100644
index 000000000..a51586228
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Equal.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Equal.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpEqual::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpEqual::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("Equal");
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Equal.h b/compiler/tflchef/tflite/src/Op/Equal.h
new file mode 100644
index 000000000..fd4b40001
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Equal.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_EQUAL_H__
+#define __TFLITE_OP_EQUAL_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for Equal
+ */
+class TFliteOpEqual : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_EQUAL_H__
diff --git a/compiler/tflchef/tflite/src/Op/Exp.cpp b/compiler/tflchef/tflite/src/Op/Exp.cpp
new file mode 100644
index 000000000..f715da6ef
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Exp.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Exp.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpExp::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpExp::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("Exp");
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Exp.h b/compiler/tflchef/tflite/src/Op/Exp.h
new file mode 100644
index 000000000..5ff3ddc8b
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Exp.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_EXP_H__
+#define __TFLITE_OP_EXP_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for Exp
+ */
+class TFliteOpExp : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_EXP_H__
diff --git a/compiler/tflchef/tflite/src/Op/FloorDiv.cpp b/compiler/tflchef/tflite/src/Op/FloorDiv.cpp
new file mode 100644
index 000000000..492c6941f
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/FloorDiv.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FloorDiv.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpFloorDiv::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpFloorDiv::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("FloorDiv");
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/FloorDiv.h b/compiler/tflchef/tflite/src/Op/FloorDiv.h
new file mode 100644
index 000000000..5d049a668
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/FloorDiv.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_FLOORDIV_H__
+#define __TFLITE_OP_FLOORDIV_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for floor division
+ */
+class TFliteOpFloorDiv : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_FLOORDIV_H__
diff --git a/compiler/tflchef/tflite/src/Op/FullyConnected.cpp b/compiler/tflchef/tflite/src/Op/FullyConnected.cpp
new file mode 100644
index 000000000..4291c844b
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/FullyConnected.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnected.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpFullyConnected::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpFullyConnected::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto op_params = op->builtin_options_as_FullyConnectedOptions();
+ assert(op_params != nullptr);
+
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("FullyConnected");
+
+ auto op_options = operation->mutable_fullyconnected_options();
+
+ op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function()));
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/FullyConnected.h b/compiler/tflchef/tflite/src/Op/FullyConnected.h
new file mode 100644
index 000000000..8fbe1f3ed
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/FullyConnected.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_FULLYCONNECTED_H__
+#define __TFLITE_OP_FULLYCONNECTED_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for FullyConnected
+ */
+class TFliteOpFullyConnected : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_FULLYCONNECTED_H__
diff --git a/compiler/tflchef/tflite/src/Op/LogicalNot.cpp b/compiler/tflchef/tflite/src/Op/LogicalNot.cpp
new file mode 100644
index 000000000..ecd5b903c
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/LogicalNot.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LogicalNot.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpLogicalNot::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpLogicalNot::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("LogicalNot");
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/LogicalNot.h b/compiler/tflchef/tflite/src/Op/LogicalNot.h
new file mode 100644
index 000000000..b75d33554
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/LogicalNot.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_LOGICALNOT_H__
+#define __TFLITE_OP_LOGICALNOT_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for LogicalNot
+ */
+class TFliteOpLogicalNot : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_LOGICALNOT_H__
diff --git a/compiler/tflchef/tflite/src/Op/LogicalOr.cpp b/compiler/tflchef/tflite/src/Op/LogicalOr.cpp
new file mode 100644
index 000000000..b91f4cfca
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/LogicalOr.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LogicalOr.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpLogicalOr::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpLogicalOr::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("LogicalOr");
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/LogicalOr.h b/compiler/tflchef/tflite/src/Op/LogicalOr.h
new file mode 100644
index 000000000..5331a0d65
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/LogicalOr.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_LOGICALOR_H__
+#define __TFLITE_OP_LOGICALOR_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for LogicalOr
+ */
+class TFliteOpLogicalOr : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_LOGICALOR_H__
diff --git a/compiler/tflchef/tflite/src/Op/MaxPool2D.cpp b/compiler/tflchef/tflite/src/Op/MaxPool2D.cpp
new file mode 100644
index 000000000..1366366ac
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/MaxPool2D.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MaxPool2D.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpMaxPool2D::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpMaxPool2D::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto op_params = op->builtin_options_as_Pool2DOptions();
+ assert(op_params != nullptr);
+
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("MaxPool2D");
+
+ auto op_options = operation->mutable_maxpool2d_options();
+
+ op_options->set_padding(as_tflchef_padding(op_params->padding()));
+ op_options->set_stride_h(op_params->stride_h());
+ op_options->set_stride_w(op_params->stride_w());
+ op_options->set_filter_height(op_params->filter_height());
+ op_options->set_filter_width(op_params->filter_width());
+ op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function()));
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/MaxPool2D.h b/compiler/tflchef/tflite/src/Op/MaxPool2D.h
new file mode 100644
index 000000000..36533f80c
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/MaxPool2D.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_MAXPOOL2D_H__
+#define __TFLITE_OP_MAXPOOL2D_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for MaxPool2D
+ */
+class TFliteOpMaxPool2D : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_MAXPOOL2D_H__
diff --git a/compiler/tflchef/tflite/src/Op/Mean.cpp b/compiler/tflchef/tflite/src/Op/Mean.cpp
new file mode 100644
index 000000000..1c2975781
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Mean.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Mean.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpMean::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // filler for second input
+ const auto &inputs = *op->inputs();
+
+ const tflite::Tensor *tensor = import->tensors()->Get(inputs[1]);
+ assert(tensor->type() == tflite::TensorType::TensorType_INT32);
+ const tflite::Buffer *buffer = import->buffers()->Get(tensor->buffer());
+ auto vec = extract_buffer<int32_t>(buffer);
+ import->set_tensor_filler(inputs[1], vec);
+}
+
+tflchef::Operation *TFliteOpMean::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto op_params = op->builtin_options_as_ReducerOptions();
+ assert(op_params != nullptr);
+
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("Mean");
+
+ auto op_options = operation->mutable_mean_options();
+
+ op_options->set_keep_dims(op_params->keep_dims());
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Mean.h b/compiler/tflchef/tflite/src/Op/Mean.h
new file mode 100644
index 000000000..532c40c66
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Mean.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_MEAN_H__
+#define __TFLITE_OP_MEAN_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for mean
+ */
+class TFliteOpMean : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_MEAN_H__
diff --git a/compiler/tflchef/tflite/src/Op/Pack.cpp b/compiler/tflchef/tflite/src/Op/Pack.cpp
new file mode 100644
index 000000000..ddf8c7d5d
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Pack.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Pack.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpPack::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpPack::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto op_params = op->builtin_options_as_PackOptions();
+ assert(op_params != nullptr);
+
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("Pack");
+
+ auto op_options = operation->mutable_pack_options();
+
+ op_options->set_axis(op_params->axis());
+ op_options->set_values_count(op_params->values_count());
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Pack.h b/compiler/tflchef/tflite/src/Op/Pack.h
new file mode 100644
index 000000000..7779f64ed
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Pack.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_PACK_H__
+#define __TFLITE_OP_PACK_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for pack
+ */
+class TFliteOpPack : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_PACK_H__
diff --git a/compiler/tflchef/tflite/src/Op/Pad.cpp b/compiler/tflchef/tflite/src/Op/Pad.cpp
new file mode 100644
index 000000000..2978e4422
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Pad.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Pad.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpPad::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // filler for second input
+ const auto &inputs = *op->inputs();
+
+ const tflite::Tensor *tensor = import->tensors()->Get(inputs[1]);
+ assert(tensor->type() == tflite::TensorType::TensorType_INT32);
+ const tflite::Buffer *buffer = import->buffers()->Get(tensor->buffer());
+ auto vec = extract_buffer<int32_t>(buffer);
+ import->set_tensor_filler(inputs[1], vec);
+}
+
+tflchef::Operation *TFliteOpPad::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("Pad");
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Pad.h b/compiler/tflchef/tflite/src/Op/Pad.h
new file mode 100644
index 000000000..99998d418
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Pad.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_PAD_H__
+#define __TFLITE_OP_PAD_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for PAD
+ */
+class TFliteOpPad : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_PAD_H__
diff --git a/compiler/tflchef/tflite/src/Op/ReLU.cpp b/compiler/tflchef/tflite/src/Op/ReLU.cpp
new file mode 100644
index 000000000..e4474b6fa
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/ReLU.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReLU.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpReLU::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpReLU::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("ReLU");
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/ReLU.h b/compiler/tflchef/tflite/src/Op/ReLU.h
new file mode 100644
index 000000000..be1090270
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/ReLU.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_RELU_H__
+#define __TFLITE_OP_RELU_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for ReLU
+ */
+class TFliteOpReLU : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_RELU_H__
diff --git a/compiler/tflchef/tflite/src/Op/ReLU6.cpp b/compiler/tflchef/tflite/src/Op/ReLU6.cpp
new file mode 100644
index 000000000..14371884b
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/ReLU6.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReLU6.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpReLU6::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpReLU6::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("ReLU6");
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/ReLU6.h b/compiler/tflchef/tflite/src/Op/ReLU6.h
new file mode 100644
index 000000000..64ddb6a2e
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/ReLU6.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_RELU6_H__
+#define __TFLITE_OP_RELU6_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for ReLU6
+ */
+class TFliteOpReLU6 : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_RELU6_H__
diff --git a/compiler/tflchef/tflite/src/Op/Reshape.cpp b/compiler/tflchef/tflite/src/Op/Reshape.cpp
new file mode 100644
index 000000000..663ab3ec3
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Reshape.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Reshape.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpReshape::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+
+ bool hasShape = (inputs.size() == 2);
+ assert(inputs.size() == 1 || hasShape);
+
+ if (hasShape)
+ {
+ auto op_params = op->builtin_options_as_ReshapeOptions();
+ std::vector<int32_t> new_shape = as_index_vector(op_params->new_shape());
+ import->set_tensor_filler(inputs.at(1), new_shape);
+ }
+}
+
+tflchef::Operation *TFliteOpReshape::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto op_params = op->builtin_options_as_ReshapeOptions();
+ assert(op_params != nullptr);
+
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("Reshape");
+
+ auto op_options = operation->mutable_reshape_options();
+
+ std::vector<int32_t> new_shape = as_index_vector(op_params->new_shape());
+
+ for (auto shape : new_shape)
+ {
+ op_options->add_new_shape(shape);
+ }
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Reshape.h b/compiler/tflchef/tflite/src/Op/Reshape.h
new file mode 100644
index 000000000..be9fdac08
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Reshape.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_RESHAPE_H__
+#define __TFLITE_OP_RESHAPE_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for Reshape
+ */
+class TFliteOpReshape : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_RESHAPE_H__
diff --git a/compiler/tflchef/tflite/src/Op/Rsqrt.cpp b/compiler/tflchef/tflite/src/Op/Rsqrt.cpp
new file mode 100644
index 000000000..1639214e4
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Rsqrt.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Rsqrt.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpRsqrt::filler(const tflite::Operator *, TFliteImport *, tflchef::ModelRecipe *) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpRsqrt::build(const tflite::Operator *, TFliteImport *,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("Rsqrt");
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Rsqrt.h b/compiler/tflchef/tflite/src/Op/Rsqrt.h
new file mode 100644
index 000000000..5d68344c2
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Rsqrt.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_RSQRT_H__
+#define __TFLITE_OP_RSQRT_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for Rsqrt
+ */
+class TFliteOpRsqrt : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_RSQRT_H__
diff --git a/compiler/tflchef/tflite/src/Op/Softmax.cpp b/compiler/tflchef/tflite/src/Op/Softmax.cpp
new file mode 100644
index 000000000..5b5c94f7e
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Softmax.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Softmax.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpSoftmax::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpSoftmax::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto op_params = op->builtin_options_as_SoftmaxOptions();
+ assert(op_params != nullptr);
+
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("Softmax");
+
+ auto op_options = operation->mutable_softmax_options();
+
+ op_options->set_beta(op_params->beta());
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Softmax.h b/compiler/tflchef/tflite/src/Op/Softmax.h
new file mode 100644
index 000000000..cf168bdd9
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Softmax.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_SOFTMAX_H__
+#define __TFLITE_OP_SOFTMAX_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for Softmax
+ */
+class TFliteOpSoftmax : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_SOFTMAX_H__
diff --git a/compiler/tflchef/tflite/src/Op/Sqrt.cpp b/compiler/tflchef/tflite/src/Op/Sqrt.cpp
new file mode 100644
index 000000000..dd6bfcab0
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Sqrt.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Sqrt.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpSqrt::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+ // But input has filler for constant inputs
+ const auto &inputs = *op->inputs();
+
+ const tflite::Tensor *tensor = import->tensors()->Get(inputs[0]);
+ if (tensor->type() == tflite::TensorType::TensorType_FLOAT32)
+ {
+ const tflite::Buffer *buffer = import->buffers()->Get(tensor->buffer());
+ if (buffer && buffer->data())
+ {
+ auto vec = extract_buffer<float>(buffer);
+ import->set_tensor_filler(inputs[0], vec);
+ }
+ }
+}
+
+tflchef::Operation *TFliteOpSqrt::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("Sqrt");
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Sqrt.h b/compiler/tflchef/tflite/src/Op/Sqrt.h
new file mode 100644
index 000000000..9f0ad04ae
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Sqrt.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_SQRT_H__
+#define __TFLITE_OP_SQRT_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for Sqrt
+ */
+class TFliteOpSqrt : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_SQRT_H__
diff --git a/compiler/tflchef/tflite/src/Op/Sub.cpp b/compiler/tflchef/tflite/src/Op/Sub.cpp
new file mode 100644
index 000000000..db77fddf7
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Sub.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Sub.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpSub::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpSub::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto op_params = op->builtin_options_as_SubOptions();
+ assert(op_params != nullptr);
+
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("Sub");
+
+ auto op_options = operation->mutable_sub_options();
+
+ auto tflchef_activation = as_tflchef_activation(op_params->fused_activation_function());
+ op_options->set_activation(tflchef_activation);
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Sub.h b/compiler/tflchef/tflite/src/Op/Sub.h
new file mode 100644
index 000000000..2168e5e0d
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Sub.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_SUB_H__
+#define __TFLITE_OP_SUB_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for Sub
+ */
+class TFliteOpSub : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_SUB_H__
diff --git a/compiler/tflchef/tflite/src/Op/Tanh.cpp b/compiler/tflchef/tflite/src/Op/Tanh.cpp
new file mode 100644
index 000000000..cab8ca460
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Tanh.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Tanh.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpTanh::filler(const tflite::Operator *, TFliteImport *, tflchef::ModelRecipe *) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpTanh::build(const tflite::Operator *, TFliteImport *,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("Tanh");
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Tanh.h b/compiler/tflchef/tflite/src/Op/Tanh.h
new file mode 100644
index 000000000..7339e4103
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Tanh.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_TANH_H__
+#define __TFLITE_OP_TANH_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for Tanh
+ */
+class TFliteOpTanh : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_TANH_H__
diff --git a/compiler/tflchef/tflite/src/Op/Transpose.cpp b/compiler/tflchef/tflite/src/Op/Transpose.cpp
new file mode 100644
index 000000000..ae97a19e2
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Transpose.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Transpose.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpTranspose::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ const auto &inputs = *op->inputs();
+
+ const tflite::Tensor *perm_tensor = import->tensors()->Get(inputs[1]);
+ assert(perm_tensor->type() == tflite::TensorType::TensorType_INT32);
+ const tflite::Buffer *buffer = import->buffers()->Get(perm_tensor->buffer());
+ auto vec = extract_buffer<int32_t>(buffer);
+ import->set_tensor_filler(inputs[1], vec);
+}
+
+tflchef::Operation *TFliteOpTranspose::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto op_params = op->builtin_options_as<tflite::TransposeOptions>();
+ assert(op_params != nullptr);
+ (void)op_params;
+
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("Transpose");
+
+ auto op_options = operation->mutable_transpose_options();
+ (void)op_options;
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Transpose.h b/compiler/tflchef/tflite/src/Op/Transpose.h
new file mode 100644
index 000000000..f0d944b6b
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Transpose.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_TRANSPOSE_H__
+#define __TFLITE_OP_TRANSPOSE_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for Transpose
+ */
+class TFliteOpTranspose : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_TRANSPOSE_H__
diff --git a/compiler/tflchef/tflite/src/RawModelLoader.cpp b/compiler/tflchef/tflite/src/RawModelLoader.cpp
new file mode 100644
index 000000000..e9ef8ec8b
--- /dev/null
+++ b/compiler/tflchef/tflite/src/RawModelLoader.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <tflchef/RawModel.h>
+
+#include <cwrap/Fildes.h>
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+namespace
+{
+
+class MemoryMappedRawModel final : public tflchef::RawModel
+{
+public:
+ /**
+ * @require fd and data SHOULD be valid
+ */
+ explicit MemoryMappedRawModel(int fd, void *data, size_t size) : _fd{fd}, _data{data}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ ~MemoryMappedRawModel()
+ {
+ munmap(_data, _size);
+ close(_fd);
+ }
+
+public:
+ MemoryMappedRawModel(const MemoryMappedRawModel &) = delete;
+ MemoryMappedRawModel(MemoryMappedRawModel &&) = delete;
+
+public:
+ const ::tflite::Model *model(void) const override { return ::tflite::GetModel(_data); }
+
+private:
+ int _fd = -1;
+ void *_data = nullptr;
+ size_t _size = 0;
+};
+
+} // namespace
+
+namespace tflchef
+{
+
+std::unique_ptr<RawModel> load_tflite(const std::string &path)
+{
+ cwrap::Fildes fildes{open(path.c_str(), O_RDONLY)};
+
+ if (fildes.get() == -1)
+ {
+ // Return nullptr on open failure
+ return nullptr;
+ }
+
+ struct stat st;
+ if (fstat(fildes.get(), &st) == -1)
+ {
+ // Return nullptr on fstat failure
+ return nullptr;
+ }
+
+ auto size = st.st_size;
+ auto data = mmap(nullptr, size, PROT_READ, MAP_SHARED, fildes.get(), 0);
+
+ if (data == MAP_FAILED)
+ {
+ // Return nullptr on mmap failure
+ return nullptr;
+ }
+
+ return std::unique_ptr<tflchef::RawModel>{new MemoryMappedRawModel(fildes.release(), data, size)};
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/RecipeChef.cpp b/compiler/tflchef/tflite/src/RecipeChef.cpp
new file mode 100644
index 000000000..407006b26
--- /dev/null
+++ b/compiler/tflchef/tflite/src/RecipeChef.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <tflchef/RecipeChef.h>
+
+#include "Convert.h"
+#include "TFliteImport.h"
+#include "TFliteOpChef.h"
+#include "TFliteOpChefs.h"
+#include "TFliteOpRegistry.h"
+
+#include <fstream>
+#include <sstream>
+
+namespace tflchef
+{
+
+void set_inputs(TFliteImport *import, tflchef::Operation *operation, const tflite::Operator *op)
+{
+ auto tensors = import->tensors();
+ const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+
+ for (auto input : inputs)
+ {
+ auto tensor = tensors->Get(input);
+ std::string name = tensor_name(tensor);
+ operation->add_input(name);
+ }
+}
+
+void set_outputs(TFliteImport *import, tflchef::Operation *operation, const tflite::Operator *op)
+{
+ auto tensors = import->tensors();
+ const std::vector<int32_t> &outputs = as_index_vector(op->outputs());
+
+ for (auto output : outputs)
+ {
+ auto tensor = tensors->Get(output);
+ std::string name = tensor_name(tensor);
+ operation->add_output(name);
+ }
+}
+
+/**
+ * @brief This will build ModelRecipe from tflite::Model
+ * First to check operand filler options by scanning all operators,
+ * then translate all operands and operators.
+ * Last will set network inputs and outputs.
+ */
+std::unique_ptr<ModelRecipe> generate_recipe(const tflite::Model *model)
+{
+ std::unique_ptr<ModelRecipe> model_recipe{new ModelRecipe()};
+
+ TFliteImport tflite_import(model);
+
+ assert(tflite_import.num_subgraph() == 1);
+ tflite_import.select_sub_graph(0);
+
+ auto tensors = tflite_import.tensors();
+ auto buffers = tflite_import.buffers();
+ auto operators = tflite_import.operators();
+
+ // operand fillers for adding all operators
+ for (uint32_t i = 0; i < operators->Length(); ++i)
+ {
+ const auto *op = operators->Get(i);
+ tflite::BuiltinOperator builtincode = tflite_import.builtin_code(op);
+
+ if (const auto *graph_builder = TFliteOpRegistry::get().lookup(builtincode))
+ {
+ graph_builder->filler(op, &tflite_import, model_recipe.get());
+ }
+ else
+ {
+ std::string opcodename = tflite_import.opcode_name(op);
+ throw std::runtime_error{"Not supported: " + opcodename};
+ }
+ }
+
+ // add all operands(tensors)
+ for (uint32_t i = 0; i < tensors->Length(); ++i)
+ {
+ auto tensor = tensors->Get(i);
+
+ // check buffer
+ if (tensor->buffer() >= buffers->size())
+ throw std::runtime_error{"file load failed"};
+
+ ::tflchef::Operand *operand = model_recipe->add_operand();
+
+ operand->set_name(tensor_name(tensor));
+ operand->set_type(as_tflchef_type(tensor->type()));
+
+ std::vector<int32_t> dims = as_index_vector(tensor->shape());
+ ::tflchef::TensorShape *shape = operand->mutable_shape();
+ for (auto dim : dims)
+ {
+ shape->add_dim(dim);
+ }
+
+ // filler for weights, bias and so on
+ std::vector<int32_t> expvalues;
+ std::vector<float> expfvalues;
+ if (tflite_import.get_tensor_filler(i))
+ {
+ tflchef::TensorFiller *filler = operand->mutable_filler();
+ // Note: it is OK to use random weights for functionality validation
+ filler->set_tag("gaussian");
+ filler->add_arg("0.0"); // average
+ filler->add_arg("0.1"); // standard deviation
+ }
+ else if (tflite_import.get_tensor_filler(i, expvalues))
+ {
+ tflchef::TensorFiller *filler = operand->mutable_filler();
+ filler->set_tag("explicit");
+ for (auto value : expvalues)
+ {
+ std::ostringstream ss;
+ ss << value;
+ filler->add_arg(ss.str());
+ }
+ }
+ else if (tflite_import.get_tensor_filler(i, expfvalues))
+ {
+ tflchef::TensorFiller *filler = operand->mutable_filler();
+ filler->set_tag("explicit");
+ for (auto value : expfvalues)
+ {
+ std::ostringstream ss;
+ ss << value;
+ filler->add_arg(ss.str());
+ }
+ }
+
+ auto quant = tensor->quantization();
+ if (quant != nullptr)
+ {
+ // Note: Calling 'operand->mutable_quant()' will create empty 'quant' node
+ // in the recipe file. We want this only when valid parameter exist.
+ if (quant->min() != nullptr && quant->min()->size() > 0)
+ {
+ tflchef::TensorQuantization *chef_quant = operand->mutable_quant();
+ for (uint32_t idx = 0; idx < quant->min()->size(); ++idx)
+ chef_quant->add_min(quant->min()->Get(idx));
+ }
+ if (quant->max() != nullptr && quant->max()->size() > 0)
+ {
+ tflchef::TensorQuantization *chef_quant = operand->mutable_quant();
+ for (uint32_t idx = 0; idx < quant->max()->size(); idx++)
+ chef_quant->add_max(quant->max()->Get(idx));
+ }
+ if (quant->scale() != nullptr && quant->scale()->size() > 0)
+ {
+ tflchef::TensorQuantization *chef_quant = operand->mutable_quant();
+ for (uint32_t idx = 0; idx < quant->scale()->size(); ++idx)
+ chef_quant->add_scale(quant->scale()->Get(idx));
+ }
+ if (quant->zero_point() != nullptr && quant->zero_point()->size() > 0)
+ {
+ tflchef::TensorQuantization *chef_quant = operand->mutable_quant();
+ for (uint32_t idx = 0; idx < quant->zero_point()->size(); ++idx)
+ chef_quant->add_zero_point(quant->zero_point()->Get(idx));
+ }
+ }
+ }
+
+ // add all operators
+ for (uint32_t i = 0; i < operators->Length(); ++i)
+ {
+ const auto *op = operators->Get(i);
+ tflite::BuiltinOperator builtincode = tflite_import.builtin_code(op);
+
+ if (const auto *graph_builder = TFliteOpRegistry::get().lookup(builtincode))
+ {
+ auto operation = graph_builder->build(op, &tflite_import, model_recipe.get());
+
+ // common for all operators: inputs, outputs
+ set_inputs(&tflite_import, operation, op);
+ set_outputs(&tflite_import, operation, op);
+ }
+ else
+ {
+ std::string opcodename = tflite_import.opcode_name(op);
+ throw std::runtime_error{"Not supported: " + opcodename};
+ }
+ }
+
+ // network inputs/outputs
+ const std::vector<int32_t> &inputs = tflite_import.inputs();
+ const std::vector<int32_t> &outputs = tflite_import.outputs();
+
+ for (const auto input : inputs)
+ {
+ auto tensor = tensors->Get(input);
+ std::string name = tensor_name(tensor);
+
+ model_recipe->add_input(name);
+ }
+ for (const auto output : outputs)
+ {
+ auto tensor = tensors->Get(output);
+ std::string name = tensor_name(tensor);
+
+ model_recipe->add_output(name);
+ }
+
+ return std::move(model_recipe);
+}
+
+bool write_recipe(const std::string &filename, std::unique_ptr<ModelRecipe> &recipe)
+{
+ std::fstream fo(filename, std::ios::binary | std::ios::out);
+
+ if (!fo.is_open())
+ {
+ throw std::runtime_error{"file store failed"};
+ }
+
+ // Note: SerializeToString() or SerializeToOstream() writes in binary mode
+ // DebugString() and Utf8DebugString() will print as a human readable text
+ fo << recipe->Utf8DebugString();
+
+ fo.close();
+
+ return true;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/TFliteImport.cpp b/compiler/tflchef/tflite/src/TFliteImport.cpp
new file mode 100644
index 000000000..51d9b5ffa
--- /dev/null
+++ b/compiler/tflchef/tflite/src/TFliteImport.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TFliteImport.h"
+
+#include "Convert.h"
+
+#include <sstream>
+
+namespace tflchef
+{
+
+const char *kEmptyTensorName = "(noname)";
+
+const char *tensor_type(const tflite::Tensor *tensor)
+{
+ return tflite::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const tflite::Tensor *tensor)
+{
+ auto name = tensor->name();
+ if (name)
+ return name->c_str();
+ return kEmptyTensorName;
+}
+
+bool is_valid(const tflite::OperatorCode *opcode)
+{
+ tflite::BuiltinOperator code = opcode->builtin_code();
+ return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX);
+}
+
+bool is_custom(const tflite::OperatorCode *opcode)
+{
+ tflite::BuiltinOperator code = opcode->builtin_code();
+ return (code == tflite::BuiltinOperator_CUSTOM);
+}
+
+TFliteImport::TFliteImport(const tflite::Model *model)
+{
+ _subgraphs = model->subgraphs();
+ _buffers = model->buffers();
+
+ auto opcodes = model->operator_codes();
+ for (const ::tflite::OperatorCode *opcode : *opcodes)
+ {
+ _op_codes.push_back(opcode);
+ }
+}
+
+bool TFliteImport::select_sub_graph(uint32_t sgindex)
+{
+ _tensors = nullptr;
+ _operators = nullptr;
+ _inputs.clear();
+ _outputs.clear();
+
+ if (_subgraphs->Length() <= sgindex)
+ {
+ assert(false);
+ return false;
+ }
+
+ const tflite::SubGraph *subgraph = (*_subgraphs)[sgindex];
+
+ _tensors = subgraph->tensors();
+ _operators = subgraph->operators();
+
+ _inputs = as_index_vector(subgraph->inputs());
+ _outputs = as_index_vector(subgraph->outputs());
+
+ return true;
+}
+
+tflite::BuiltinOperator TFliteImport::builtin_code(const tflite::Operator *op) const
+{
+ uint32_t index = op->opcode_index();
+ assert(index < _op_codes.size());
+ const tflite::OperatorCode *opcode = _op_codes.at(index);
+
+ return opcode->builtin_code();
+}
+
+std::string TFliteImport::opcode_name(const tflite::Operator *op) const
+{
+ uint32_t index = op->opcode_index();
+ assert(index < _op_codes.size());
+ const tflite::OperatorCode *opcode = _op_codes.at(index);
+
+ if (!is_valid(opcode))
+ {
+ std::ostringstream oss;
+ oss << "(invalid: " << index << ")";
+ return oss.str();
+ }
+
+ if (is_custom(opcode))
+ {
+ if (!opcode->custom_code())
+ return "(invalid custom)";
+
+ return opcode->custom_code()->c_str();
+ }
+
+ tflite::BuiltinOperator code = opcode->builtin_code();
+ return EnumNameBuiltinOperator(code);
+}
+
+size_t TFliteImport::buffer_info(const tflite::Tensor *tensor, const uint8_t **buff_data)
+{
+ *buff_data = nullptr;
+
+ if (tensor->buffer() == 0)
+ return 0;
+
+ if (auto *buffer = (*_buffers)[tensor->buffer()])
+ {
+ if (auto *array = buffer->data())
+ {
+ if (size_t size = array->size())
+ {
+ *buff_data = reinterpret_cast<const uint8_t *>(array->data());
+ return size;
+ }
+ }
+ }
+
+ return 0;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/TFliteImport.h b/compiler/tflchef/tflite/src/TFliteImport.h
new file mode 100644
index 000000000..fa8196405
--- /dev/null
+++ b/compiler/tflchef/tflite/src/TFliteImport.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_IMPORT_H__
+#define __TFLITE_IMPORT_H__
+
+#include <mio/tflite/schema_generated.h>
+
+#include <tflchef.pb.h>
+
+#include <map>
+#include <vector>
+
+namespace tflchef
+{
+
+using TFliteSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<tflite::SubGraph>>;
+using TFliteTensors_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>>;
+using TFliteBuffers_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>>;
+using TFliteOperators_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>>;
+
+const char *tensor_type(const tflite::Tensor *tensor);
+const char *tensor_name(const tflite::Tensor *tensor);
+bool is_valid(const tflite::OperatorCode *opcode);
+bool is_custom(const tflite::OperatorCode *opcode);
+
+/**
+ * @brief Loads TF lite file and provides helpers to access attributes
+ */
+class TFliteImport
+{
+public:
+ TFliteImport(const tflite::Model *model);
+
+ TFliteImport() = delete;
+
+public:
+ bool select_sub_graph(uint32_t subgraph);
+
+public:
+ const TFliteBuffers_t *buffers() { return _buffers; }
+ const TFliteTensors_t *tensors() { return _tensors; }
+ const TFliteOperators_t *operators() { return _operators; }
+ const std::vector<int32_t> &inputs() const { return _inputs; }
+ const std::vector<int32_t> &outputs() const { return _outputs; }
+
+ uint32_t num_subgraph() const { return _subgraphs->Length(); }
+
+ tflite::BuiltinOperator builtin_code(const tflite::Operator *op) const;
+ std::string opcode_name(const tflite::Operator *op) const;
+ size_t buffer_info(const tflite::Tensor *tensor, const uint8_t **buff_data);
+
+ /**
+ * @brief This will record the tensor by index, if it needs filler option,
+ * such as kernel, bias.
+ */
+ void set_tensor_filler(uint32_t tensor_index) { _tensor_filler[tensor_index] = true; }
+
+ /**
+ * @brief This will store int32 filler values such as reshape information for the tensor
+ */
+ void set_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
+ {
+ _tensor_filler_vint32[tensor_index] = expvalues;
+ }
+
+ void set_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
+ {
+ _tensor_filler_vfloat[tensor_index] = expvalues;
+ }
+
+ /**
+ * @brief This will return true if the tensor by index, needs a filler option.
+ */
+ bool get_tensor_filler(uint32_t tensor_index)
+ {
+ auto it = _tensor_filler.find(tensor_index);
+ if (it != _tensor_filler.end())
+ {
+ return it->second;
+ }
+ return false;
+ }
+
+ /**
+ * @brief This will return true if the tensor by index, needs a int array filler option.
+ */
+ bool get_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
+ {
+ auto it = _tensor_filler_vint32.find(tensor_index);
+ if (it != _tensor_filler_vint32.end())
+ {
+ expvalues = it->second;
+ return true;
+ }
+ return false;
+ }
+
+ bool get_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
+ {
+ auto it = _tensor_filler_vfloat.find(tensor_index);
+ if (it != _tensor_filler_vfloat.end())
+ {
+ expvalues = it->second;
+ return true;
+ }
+ return false;
+ }
+
+private:
+ const TFliteSubGraphs_t *_subgraphs;
+ const TFliteBuffers_t *_buffers;
+ const TFliteTensors_t *_tensors;
+ const TFliteOperators_t *_operators;
+
+ std::vector<const tflite::OperatorCode *> _op_codes;
+ std::vector<int32_t> _inputs;
+ std::vector<int32_t> _outputs;
+
+ std::map<uint32_t, bool> _tensor_filler;
+ std::map<uint32_t, std::vector<int32_t>> _tensor_filler_vint32;
+ std::map<uint32_t, std::vector<float>> _tensor_filler_vfloat;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_IMPORT_H__
diff --git a/compiler/tflchef/tflite/src/TFliteOpChef.h b/compiler/tflchef/tflite/src/TFliteOpChef.h
new file mode 100644
index 000000000..98564293b
--- /dev/null
+++ b/compiler/tflchef/tflite/src/TFliteOpChef.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_CHEF_H__
+#define __TFLITE_OP_CHEF_H__
+
+#include <mio/tflite/schema_generated.h>
+
+#include <tflchef.pb.h>
+
+#include "TFliteImport.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief Interface for each operators to build tflchef
+ */
+class TFliteOpChef
+{
+public:
+ virtual void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const = 0;
+ virtual ::tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const = 0;
+ virtual ~TFliteOpChef() {}
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_CHEF_H__
diff --git a/compiler/tflchef/tflite/src/TFliteOpChefs.h b/compiler/tflchef/tflite/src/TFliteOpChefs.h
new file mode 100644
index 000000000..685d6861b
--- /dev/null
+++ b/compiler/tflchef/tflite/src/TFliteOpChefs.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_CHEFS_H__
+#define __TFLITE_OP_CHEFS_H__
+
+// In alphabet order
+#include "Op/Abs.h"
+#include "Op/Add.h"
+#include "Op/ArgMax.h"
+#include "Op/AveragePool2D.h"
+#include "Op/BatchToSpaceND.h"
+#include "Op/Concatenation.h"
+#include "Op/Conv2D.h"
+#include "Op/Cos.h"
+#include "Op/DepthwiseConv2D.h"
+#include "Op/Div.h"
+#include "Op/Equal.h"
+#include "Op/Exp.h"
+#include "Op/FloorDiv.h"
+#include "Op/FullyConnected.h"
+#include "Op/LogicalNot.h"
+#include "Op/LogicalOr.h"
+#include "Op/MaxPool2D.h"
+#include "Op/Mean.h"
+#include "Op/Pack.h"
+#include "Op/Pad.h"
+#include "Op/ReLU.h"
+#include "Op/ReLU6.h"
+#include "Op/Reshape.h"
+#include "Op/Rsqrt.h"
+#include "Op/Softmax.h"
+#include "Op/Sqrt.h"
+#include "Op/Sub.h"
+#include "Op/Tanh.h"
+#include "Op/Transpose.h"
+
+#endif // __TFLITE_OP_CHEFS_H__
diff --git a/compiler/tflchef/tflite/src/TFliteOpRegistry.h b/compiler/tflchef/tflite/src/TFliteOpRegistry.h
new file mode 100644
index 000000000..f0aed2113
--- /dev/null
+++ b/compiler/tflchef/tflite/src/TFliteOpRegistry.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_REGISTRY_H__
+#define __TFLITE_OP_REGISTRY_H__
+
+#include "TFliteOpChef.h"
+#include "TFliteOpChefs.h"
+
+#include <memory>
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator registry
+ */
+class TFliteOpRegistry
+{
+public:
+ /**
+ * @brief Returns registered TFliteOpChef pointer for BuiltinOperator or
+ * nullptr if not registered
+ */
+ const TFliteOpChef *lookup(tflite::BuiltinOperator op) const
+ {
+ if (_tfliteop_map.find(op) == _tfliteop_map.end())
+ return nullptr;
+
+ return _tfliteop_map.at(op).get();
+ }
+
+ static TFliteOpRegistry &get()
+ {
+ static TFliteOpRegistry me;
+ return me;
+ }
+
+private:
+ TFliteOpRegistry()
+ {
+#define REG_TFL_OP(OPCODE, CLASS) \
+ _tfliteop_map[tflite::BuiltinOperator_##OPCODE] = std::make_unique<CLASS>()
+
+ REG_TFL_OP(ABS, TFliteOpAbs);
+ REG_TFL_OP(ADD, TFliteOpAdd);
+ REG_TFL_OP(ARG_MAX, TFliteOpArgMax);
+ REG_TFL_OP(AVERAGE_POOL_2D, TFliteOpAveragePool2D);
+ REG_TFL_OP(BATCH_TO_SPACE_ND, TFliteOpBatchToSpaceND);
+ REG_TFL_OP(CONCATENATION, TFliteOpConcatenation);
+ REG_TFL_OP(CONV_2D, TFliteOpConv2D);
+ REG_TFL_OP(COS, TFliteOpCos);
+ REG_TFL_OP(DEPTHWISE_CONV_2D, TFliteOpDepthwiseConv2D);
+ REG_TFL_OP(DIV, TFliteOpDiv);
+ REG_TFL_OP(EQUAL, TFliteOpEqual);
+ REG_TFL_OP(EXP, TFliteOpExp);
+ REG_TFL_OP(FLOOR_DIV, TFliteOpFloorDiv);
+ REG_TFL_OP(FULLY_CONNECTED, TFliteOpFullyConnected);
+ REG_TFL_OP(LOGICAL_NOT, TFliteOpLogicalNot);
+ REG_TFL_OP(LOGICAL_OR, TFliteOpLogicalOr);
+ REG_TFL_OP(MAX_POOL_2D, TFliteOpMaxPool2D);
+ REG_TFL_OP(MEAN, TFliteOpMean);
+ REG_TFL_OP(PACK, TFliteOpPack);
+ REG_TFL_OP(PAD, TFliteOpPad);
+ REG_TFL_OP(RELU, TFliteOpReLU);
+ REG_TFL_OP(RELU6, TFliteOpReLU6);
+ REG_TFL_OP(RESHAPE, TFliteOpReshape);
+ REG_TFL_OP(RSQRT, TFliteOpRsqrt);
+ REG_TFL_OP(SOFTMAX, TFliteOpSoftmax);
+ REG_TFL_OP(SQRT, TFliteOpSqrt);
+ REG_TFL_OP(SUB, TFliteOpSub);
+ REG_TFL_OP(TANH, TFliteOpTanh);
+ REG_TFL_OP(TRANSPOSE, TFliteOpTranspose);
+
+#undef REG_TFL_OP
+ }
+
+private:
+ std::map<tflite::BuiltinOperator, std::unique_ptr<TFliteOpChef>> _tfliteop_map;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_REGISTRY_H__
diff --git a/compiler/tflchef/tools/CMakeLists.txt b/compiler/tflchef/tools/CMakeLists.txt
new file mode 100644
index 000000000..92e3a6e6e
--- /dev/null
+++ b/compiler/tflchef/tools/CMakeLists.txt
@@ -0,0 +1,6 @@
+# Console-based tool (tflchef)
+add_subdirectory(console)
+# File-based tool (tflchef-file)
+add_subdirectory(file)
+# Reverse tool to generate recipe from tflite (tflchef-reverse)
+add_subdirectory(reverse)
diff --git a/compiler/tflchef/tools/console/CMakeLists.txt b/compiler/tflchef/tools/console/CMakeLists.txt
new file mode 100644
index 000000000..d9160c3a2
--- /dev/null
+++ b/compiler/tflchef/tools/console/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_executable(tflchef Driver.cpp)
+target_link_libraries(tflchef tflchef_core)
+target_link_libraries(tflchef safemain)
diff --git a/compiler/tflchef/tools/console/Driver.cpp b/compiler/tflchef/tools/console/Driver.cpp
new file mode 100644
index 000000000..d6f7ba1ae
--- /dev/null
+++ b/compiler/tflchef/tools/console/Driver.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflchef/ModelChef.h"
+
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+#include <iostream>
+
+int entry(int argc, char **argv)
+{
+ int32_t model_version = 1;
+
+ ::tflchef::ModelRecipe model_recipe;
+
+ // Read a model recipe from standard input
+ {
+ google::protobuf::io::IstreamInputStream iis{&std::cin};
+ if (!google::protobuf::TextFormat::Parse(&iis, &model_recipe))
+ {
+ std::cerr << "ERROR: Failed to parse recipe" << std::endl;
+ return 255;
+ }
+
+ if (model_recipe.has_version())
+ {
+ model_version = model_recipe.version();
+ }
+ }
+
+ if (model_version > 1)
+ {
+ std::cerr << "ERROR: Unsupported recipe version: " << model_version << std::endl;
+ return 255;
+ }
+
+ auto generated_model = tflchef::cook(model_recipe);
+
+ // Write a generated model into standard output
+ std::cout.write(generated_model.base(), generated_model.size());
+
+ return 0;
+}
diff --git a/compiler/tflchef/tools/file/CMakeLists.txt b/compiler/tflchef/tools/file/CMakeLists.txt
new file mode 100644
index 000000000..477b7d974
--- /dev/null
+++ b/compiler/tflchef/tools/file/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_executable(tflchef-file Driver.cpp)
+target_link_libraries(tflchef-file tflchef_core)
+target_link_libraries(tflchef-file safemain)
diff --git a/compiler/tflchef/tools/file/Driver.cpp b/compiler/tflchef/tools/file/Driver.cpp
new file mode 100644
index 000000000..3ef701910
--- /dev/null
+++ b/compiler/tflchef/tools/file/Driver.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflchef/ModelChef.h"
+
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+#include <fstream>
+#include <iostream>
+
+int entry(int argc, char **argv)
+{
+ if (argc != 3)
+ {
+ std::cerr << "ERROR: Failed to parse arguments" << std::endl;
+ std::cerr << std::endl;
+ std::cerr << "USAGE: " << argv[0] << " [recipe] [output]" << std::endl;
+ return 255;
+ }
+
+ int32_t model_version = 1;
+
+ ::tflchef::ModelRecipe model_recipe;
+
+ // Load model recipe from a file
+ {
+ std::ifstream is{argv[1]};
+ google::protobuf::io::IstreamInputStream iis{&is};
+ if (!google::protobuf::TextFormat::Parse(&iis, &model_recipe))
+ {
+ std::cerr << "ERROR: Failed to parse recipe '" << argv[1] << "'" << std::endl;
+ return 255;
+ }
+
+ if (model_recipe.has_version())
+ {
+ model_version = model_recipe.version();
+ }
+ }
+
+ if (model_version > 1)
+ {
+ std::cerr << "ERROR: Unsupported recipe version: " << model_version << ", '" << argv[1] << "'"
+ << std::endl;
+ return 255;
+ }
+
+ auto generated_model = tflchef::cook(model_recipe);
+
+ // Dump generated model into a file
+ {
+ std::ofstream os{argv[2], std::ios::binary};
+ os.write(generated_model.base(), generated_model.size());
+ }
+
+ return 0;
+}
diff --git a/compiler/tflchef/tools/reverse/CMakeLists.txt b/compiler/tflchef/tools/reverse/CMakeLists.txt
new file mode 100644
index 000000000..63cb36c06
--- /dev/null
+++ b/compiler/tflchef/tools/reverse/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_executable(tflchef-reverse Driver.cpp)
+target_link_libraries(tflchef-reverse tflchef_tflite)
+target_link_libraries(tflchef-reverse safemain)
diff --git a/compiler/tflchef/tools/reverse/Driver.cpp b/compiler/tflchef/tools/reverse/Driver.cpp
new file mode 100644
index 000000000..549756463
--- /dev/null
+++ b/compiler/tflchef/tools/reverse/Driver.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <tflchef/RawModel.h>
+#include <tflchef/RecipeChef.h>
+
+#include <memory>
+#include <iostream>
+
+int entry(int argc, char **argv)
+{
+ if (argc != 3)
+ {
+ std::cerr << "ERROR: Failed to parse arguments" << std::endl;
+ std::cerr << std::endl;
+ std::cerr << "USAGE: " << argv[0] << " [tflite] [output]" << std::endl;
+ return 255;
+ }
+
+ // Load TF lite model from a tflite file
+ std::unique_ptr<tflchef::RawModel> rawmodel = tflchef::load_tflite(argv[1]);
+ if (rawmodel == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load tflite '" << argv[1] << "'" << std::endl;
+ return 255;
+ }
+
+ const tflite::Model *tflmodel = rawmodel->model();
+ if (tflmodel == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load tflite '" << argv[1] << "'" << std::endl;
+ return 255;
+ }
+
+ // Generate ModelRecipe recipe
+ std::unique_ptr<tflchef::ModelRecipe> recipe = tflchef::generate_recipe(tflmodel);
+ if (recipe.get() == nullptr)
+ {
+ std::cerr << "ERROR: Failed to generate recipe" << std::endl;
+ return 255;
+ }
+
+ // Save to a file
+ bool result = tflchef::write_recipe(argv[2], recipe);
+ if (!result)
+ {
+ std::cerr << "ERROR: Failed to write to recipe '" << argv[2] << "'" << std::endl;
+ return 255;
+ }
+ return 0;
+}
diff --git a/compiler/tfldump/CMakeLists.txt b/compiler/tfldump/CMakeLists.txt
new file mode 100644
index 000000000..99b6365cc
--- /dev/null
+++ b/compiler/tfldump/CMakeLists.txt
@@ -0,0 +1,14 @@
+if(NOT TARGET mio_tflite)
+ return()
+endif(NOT TARGET mio_tflite)
+
+set(DRIVER "driver/Driver.cpp")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(tfldump ${DRIVER} ${SOURCES})
+target_include_directories(tfldump PRIVATE include)
+target_link_libraries(tfldump mio_tflite)
+target_link_libraries(tfldump safemain)
+target_link_libraries(tfldump stdex)
+target_link_libraries(tfldump flatbuffers)
diff --git a/compiler/tfldump/README.md b/compiler/tfldump/README.md
new file mode 100644
index 000000000..50d003f12
--- /dev/null
+++ b/compiler/tfldump/README.md
@@ -0,0 +1,67 @@
+# tfldump
+
+### What is this?
+
+tfldump is a tool that dumps binary tflite file into human readable text to console.
+
+tfldump is implemented with C++ not python. We can do the same thing much easier
+with python but this tool doesn't need to install TensorFlow python package.
+
+Schema for FlatBuffer used is from TensorFlow v1.12.0 release.
+
+### Design philosophy
+
+Make the code simple.
+
+### To do
+
+- Print weight values other than uint8_t
+- Add more operators
+
+### How to use
+
+Command argument format:
+```
+tfldump tflite_file
+```
+
+Example output of dump `readme.tflite` file
+```
+Dump: readme.tflite
+
+Operator Codes: [order] OpCodeName (OpCode Enum)
+[0] CONV_2D (code: 3)
+
+Buffers: B(index) (length) values, if any
+B(0) (0)
+B(1) (8) 0x94 0x5b 0x95 0xbf 0x42 0xa4 0x52 0xbf ...
+B(2) (4) 0xcd 0xcc 0x8c 0x3f
+
+Operands: T(tensor index) TYPE (shape) B(buffer index) OperandName
+T(0) FLOAT32 (1, 3, 3, 2) B(0) ifm
+T(1) FLOAT32 (1, 1, 1, 2) B(1) ker
+T(2) FLOAT32 (1) B(2) bias
+T(3) FLOAT32 (1, 3, 3, 1) B(0) ofm
+
+Operators: O(operator index) OpCodeName
+ Option(values) ... <-- depending on OpCode
+ I T(tensor index) OperandName <-- as input
+ O T(tensor index) OperandName <-- as output
+O(0) CONV_2D
+ Padding(1) Stride.W(1) Stride.H(1) Activation(0)
+ I T(0) ifm
+ I T(1) ker
+ I T(2) bias
+ O T(3) ofm
+
+Inputs/Outputs: I(input)/O(output) T(tensor index) OperandName
+I T(0) ifm
+I T(1) ker
+O T(3) ofm
+```
+
+### Dependency
+
+- safemain
+- stdex
+- FlatBuffers
diff --git a/compiler/tfldump/driver/Driver.cpp b/compiler/tfldump/driver/Driver.cpp
new file mode 100644
index 000000000..2ede0fdd9
--- /dev/null
+++ b/compiler/tfldump/driver/Driver.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <tflread/Model.h>
+#include <tfldump/Dump.h>
+
+#include <iostream>
+
+int entry(int argc, char **argv)
+{
+ if (argc != 2)
+ {
+ std::cerr << "ERROR: Failed to parse arguments" << std::endl;
+ std::cerr << std::endl;
+ std::cerr << "USAGE: " << argv[0] << " [tflite]" << std::endl;
+ return 255;
+ }
+
+ // Load TF lite model from a tflite file
+ std::unique_ptr<tflread::Model> model = tflread::load_tflite(argv[1]);
+ if (model == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load tflite '" << argv[1] << "'" << std::endl;
+ return 255;
+ }
+
+ const tflite::Model *tflmodel = model->model();
+ if (tflmodel == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load tflite '" << argv[1] << "'" << std::endl;
+ return 255;
+ }
+
+ std::cout << "Dump: " << argv[1] << std::endl << std::endl;
+
+ std::cout << tflmodel << std::endl;
+
+ return 0;
+}
diff --git a/compiler/tfldump/include/tfldump/Dump.h b/compiler/tfldump/include/tfldump/Dump.h
new file mode 100644
index 000000000..af04bb132
--- /dev/null
+++ b/compiler/tfldump/include/tfldump/Dump.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLDUMP_DUMP_H__
+#define __TFLDUMP_DUMP_H__
+
+#include <mio/tflite/schema_generated.h>
+
+#include <ostream>
+
+namespace tfldump
+{
+
+void dump_model(std::ostream &os, const tflite::Model *model);
+}
+
+std::ostream &operator<<(std::ostream &os, const tflite::Model *model);
+
+#endif // __TFLDUMP_DUMP_H__
diff --git a/compiler/tfldump/include/tflread/Model.h b/compiler/tfldump/include/tflread/Model.h
new file mode 100644
index 000000000..c6e4a94ac
--- /dev/null
+++ b/compiler/tfldump/include/tflread/Model.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLREAD_MODEL_H__
+#define __TFLREAD_MODEL_H__
+
+#include <mio/tflite/schema_generated.h>
+
+#include <memory>
+
+namespace tflread
+{
+
+struct Model
+{
+ virtual ~Model() = default;
+
+ virtual const ::tflite::Model *model(void) const = 0;
+};
+
+/**
+ * @brief Load TensorFlow Lite model (as a raw Model) from a given path
+ *
+ * @note May return a nullptr
+ */
+std::unique_ptr<Model> load_tflite(const std::string &path);
+
+} // namespace tflread
+
+#endif // __TFLREAD_MODEL_H__
diff --git a/compiler/tfldump/requires.cmake b/compiler/tfldump/requires.cmake
new file mode 100644
index 000000000..adcae7c1f
--- /dev/null
+++ b/compiler/tfldump/requires.cmake
@@ -0,0 +1 @@
+require("mio-tflite")
diff --git a/compiler/tfldump/src/Dump.cpp b/compiler/tfldump/src/Dump.cpp
new file mode 100644
index 000000000..e6b84251a
--- /dev/null
+++ b/compiler/tfldump/src/Dump.cpp
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <tfldump/Dump.h>
+
+#include "Read.h"
+#include "OpPrinter.h"
+
+#include <ostream>
+
+#include <algorithm> // min
+#include <iomanip> // setfill
+
+namespace tfldump
+{
+
+void dump_buffer(std::ostream &os, const uint8_t *buffer, size_t size, size_t amount)
+{
+ std::ios_base::fmtflags saveflags(os.flags());
+
+ bool second = false;
+ bool ellipsis = amount > 0 && size > 4;
+ size_t count = ellipsis ? std::min(size, amount) : size;
+
+ for (size_t i = 0; i < count; i++)
+ {
+ if (second)
+ {
+ os << " ";
+ }
+
+ os << std::showbase << std::setfill('0') << std::setw(2);
+ os << std::hex << (uint32_t)buffer[i];
+
+ second = true;
+ }
+ if (ellipsis)
+ {
+ os << " ...";
+ }
+
+ os.flags(saveflags);
+}
+
+void dump_vector(std::ostream &os, const std::vector<int32_t> &vs)
+{
+ uint32_t seq = 0;
+ for (auto &v : vs)
+ {
+ if (seq)
+ os << ", ";
+ os << v;
+ seq++;
+ }
+}
+
+std::ostream &operator<<(std::ostream &os, const std::vector<int32_t> &vect)
+{
+ tfldump::dump_vector(os, vect);
+ return os;
+}
+
+template <typename T> void dump_fbvect(std::ostream &os, const flatbuffers::Vector<T> *fbvect)
+{
+ if (fbvect == nullptr)
+ return;
+
+ bool ellipsis = (fbvect->size() > 4);
+ auto limit_size = ellipsis ? 4 : fbvect->size();
+
+ if (ellipsis)
+ {
+ os << "(" << fbvect->size() << ") ";
+ }
+ for (uint32_t q = 0; q < limit_size; q++)
+ {
+ if (q)
+ os << ", ";
+ os << fbvect->Get(q);
+ }
+ if (ellipsis)
+ {
+ os << " ... ";
+ }
+}
+
+template <typename T>
+std::ostream &operator<<(std::ostream &os, const flatbuffers::Vector<T> *fbvect)
+{
+ dump_fbvect(os, fbvect);
+ return os;
+}
+
+void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
+{
+ auto tensors = reader.tensors();
+ auto operators = reader.operators();
+
+ // dump operands(tensors)
+ os << "Operands: T(subgraph index : tensor index) TYPE (shape) B(buffer index) OperandName"
+ << std::endl;
+ for (uint32_t i = 0; i < tensors->Length(); ++i)
+ {
+ // TODO refactor to some better structure
+ auto tensor = tensors->Get(i);
+ std::vector<int32_t> dims = {-1};
+
+ if (tensor->shape())
+ dims = tflread::as_index_vector(tensor->shape());
+
+ os << "T(" << reader.subgraph_index() << ":" << i << ") " << tflread::tensor_type(tensor)
+ << " ";
+ os << "(" << dims << ") ";
+ os << "B(" << tensor->buffer() << ") ";
+ os << tflread::tensor_name(tensor) << std::endl;
+
+ if (auto q_params = tensor->quantization())
+ {
+ if ((q_params->min() && q_params->max()) || (q_params->scale() && q_params->zero_point()))
+ {
+ std::string strquantiz = " Quantization: ";
+ std::string strqindent(strquantiz.size(), ' ');
+ os << strquantiz;
+
+ if (q_params->min())
+ {
+ os << "min(" << q_params->min() << ") ";
+ if (q_params->min()->size() > 1)
+ os << std::endl << strqindent;
+ }
+ if (q_params->max())
+ {
+ os << "max(" << q_params->max() << ") ";
+ if (q_params->max()->size() > 1)
+ os << std::endl << strqindent;
+ }
+ if (q_params->scale())
+ {
+ os << "scale(" << q_params->scale() << ") ";
+ if (q_params->scale()->size() > 1)
+ os << std::endl << strqindent;
+ }
+ if (q_params->zero_point())
+ os << "zeropt(" << q_params->zero_point() << ") ";
+
+ os << std::endl;
+ }
+ }
+ }
+ os << std::endl;
+
+ // dump operators
+ os << "Operators: O(subgraph index : operator index) OpCodeName " << std::endl;
+ os << " Option(values) ... <-- depending on OpCode" << std::endl;
+ os << " I T(tensor index) OperandName <-- as input" << std::endl;
+ os << " O T(tensor index) OperandName <-- as output" << std::endl;
+ for (uint32_t i = 0; i < operators->Length(); ++i)
+ {
+ const auto op = operators->Get(i);
+ tflite::BuiltinOperator builtincode = reader.builtin_code(op);
+
+ const std::vector<int32_t> &inputs = tflread::as_index_vector(op->inputs());
+ const std::vector<int32_t> &outputs = tflread::as_index_vector(op->outputs());
+ auto op_name = reader.opcode_name(op);
+
+ os << "O(" << reader.subgraph_index() << ":" << i << ") " << op_name << " ";
+ os << std::endl;
+
+ if (auto op_prn = OpPrinterRegistry::get().lookup(builtincode))
+ {
+ op_prn->options(op, os);
+ }
+
+ for (auto input : inputs)
+ {
+ os << " I T(" << input << ") ";
+ if (input >= 0)
+ {
+ auto tensor = tensors->Get(input);
+ os << tflread::tensor_name(tensor);
+ }
+ os << std::endl;
+ }
+ for (auto output : outputs)
+ {
+ os << " O T(" << output << ") ";
+ if (output >= 0)
+ {
+ auto tensor = tensors->Get(output);
+ os << tflread::tensor_name(tensor);
+ }
+ os << std::endl;
+ }
+ }
+ os << std::endl;
+
+ // dump network inputs/outputs
+ os << "Inputs/Outputs: I(input)/O(output) T(tensor index) OperandName" << std::endl;
+
+ for (const auto input : reader.inputs())
+ {
+ auto tensor = tensors->Get(input);
+ std::string name = tflread::tensor_name(tensor);
+ os << "I T(" << input << ") " << name << std::endl;
+ }
+
+ for (const auto output : reader.outputs())
+ {
+ auto tensor = tensors->Get(output);
+ std::string name = tflread::tensor_name(tensor);
+ os << "O T(" << output << ") " << name << std::endl;
+ }
+
+ os << std::endl;
+}
+
+void dump_model(std::ostream &os, const tflite::Model *model)
+{
+ tflread::Reader reader(model);
+
+ uint32_t num_subgraph = reader.num_subgraph();
+
+ // dump model version
+ os << "===================================================================" << std::endl;
+ os << "Model version: " << reader.version() << std::endl;
+ os << " # sub graphs: " << num_subgraph << std::endl;
+ os << std::endl;
+
+ auto opcodes = reader.opcodes();
+ auto buffers = reader.buffers();
+
+ // dump operator_codes
+ os << "Operator Codes: [order] OpCodeName (OpCode Enum)" << std::endl;
+ int32_t opcode_index = 0;
+ for (auto opcode : opcodes)
+ {
+ tflite::BuiltinOperator op_code = opcode->builtin_code();
+ auto op_name = tflread::opcode_name(opcode);
+ auto op_version = opcode->version();
+
+ os << "[" << opcode_index << "] " << op_name << " (code: " << op_code
+ << ", version: " << op_version << ")" << std::endl;
+
+ opcode_index++;
+ }
+ os << std::endl;
+
+ // dump buffer
+ os << "Buffers: B(index) (length) values, if any" << std::endl;
+ for (uint32_t i = 0; i < buffers->Length(); ++i)
+ {
+ const uint8_t *buff_data;
+ size_t size = reader.buffer_info(i, &buff_data);
+
+ os << "B(" << i << ") (" << size << ") ";
+ if (buff_data != nullptr)
+ {
+ dump_buffer(os, buff_data, size, 16);
+ }
+ os << std::endl;
+ }
+ os << std::endl;
+
+ for (uint32_t sg = 0; sg < num_subgraph; ++sg)
+ {
+ reader.select_subgraph(sg);
+
+ os << "-------------------------------------------------------------------" << std::endl;
+ os << "Sub-Graph: #" << sg << " " << reader.subgraph_name() << std::endl;
+ os << std::endl;
+
+ dump_sub_graph(os, reader);
+ }
+
+ os << "===================================================================" << std::endl;
+}
+
+} // namespace tfldump
+
+std::ostream &operator<<(std::ostream &os, const tflite::Model *model)
+{
+ tfldump::dump_model(os, model);
+ return os;
+}
diff --git a/compiler/tfldump/src/Load.cpp b/compiler/tfldump/src/Load.cpp
new file mode 100644
index 000000000..fe04a5dd6
--- /dev/null
+++ b/compiler/tfldump/src/Load.cpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <tflread/Model.h>
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+namespace
+{
+
+class MemoryMappedModel final : public tflread::Model
+{
+public:
+ /**
+ * @require fd and data SHOULD be valid
+ */
+ explicit MemoryMappedModel(int fd, void *data, size_t size) : _fd{fd}, _data{data}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ ~MemoryMappedModel()
+ {
+ munmap(_data, _size);
+ close(_fd);
+ }
+
+public:
+ MemoryMappedModel(const MemoryMappedModel &) = delete;
+ MemoryMappedModel(MemoryMappedModel &&) = delete;
+
+public:
+ const ::tflite::Model *model(void) const override { return ::tflite::GetModel(_data); }
+
+private:
+ int _fd = -1;
+ void *_data = nullptr;
+ size_t _size = 0;
+};
+
+class FileDescriptor final
+{
+public:
+ FileDescriptor(int value) : _value{value}
+ {
+ // DO NOTHING
+ }
+
+public:
+ // NOTE Copy is not allowed
+ FileDescriptor(const FileDescriptor &) = delete;
+
+public:
+ // NOTE Move is allowed
+ FileDescriptor(FileDescriptor &&fd) { _value = fd.release(); }
+
+public:
+ ~FileDescriptor()
+ {
+ if (_value != -1)
+ {
+ // Close on descturction
+ close(_value);
+ }
+ }
+
+public:
+ int value(void) const { return _value; }
+
+public:
+ int release(void)
+ {
+ auto res = _value;
+ _value = -1;
+ return res;
+ }
+
+private:
+ int _value = -1;
+};
+
+} // namespace
+
+namespace tflread
+{
+
+std::unique_ptr<Model> load_tflite(const std::string &path)
+{
+ FileDescriptor fd = open(path.c_str(), O_RDONLY);
+
+ if (fd.value() == -1)
+ {
+ // Return nullptr on open failure
+ return nullptr;
+ }
+
+ struct stat st;
+ if (fstat(fd.value(), &st) == -1)
+ {
+ // Return nullptr on fstat failure
+ return nullptr;
+ }
+
+ auto size = st.st_size;
+ auto data = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd.value(), 0);
+
+ if (data == MAP_FAILED)
+ {
+ // Return nullptr on mmap failure
+ return nullptr;
+ }
+
+ return std::unique_ptr<tflread::Model>{new MemoryMappedModel(fd.release(), data, size)};
+}
+
+} // namespace tflread
diff --git a/compiler/tfldump/src/OpPrinter.cpp b/compiler/tfldump/src/OpPrinter.cpp
new file mode 100644
index 000000000..5120f42b1
--- /dev/null
+++ b/compiler/tfldump/src/OpPrinter.cpp
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OpPrinter.h"
+#include "Read.h"
+
+#include <stdex/Memory.h>
+
+#include <flatbuffers/flexbuffers.h>
+
+using stdex::make_unique;
+
+namespace tfldump
+{
+
+// TODO move to some header
+std::ostream &operator<<(std::ostream &os, const std::vector<int32_t> &vect);
+
+// TODO Re-arrange in alphabetical order
+
+class AddPrinter : public OpPrinter
+{
+public:
+ void options(const tflite::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_AddOptions())
+ {
+ os << " ";
+ os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+ << ") ";
+ os << std::endl;
+ }
+ }
+};
+
+class ArgMaxPrinter : public OpPrinter
+{
+public:
+ void options(const tflite::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_ArgMaxOptions())
+ {
+ os << " ";
+ os << "OutputType(" << EnumNameTensorType(params->output_type()) << ") ";
+ os << std::endl;
+ }
+ }
+};
+
+class Conv2DPrinter : public OpPrinter
+{
+public:
+ void options(const tflite::Operator *op, std::ostream &os) const override
+ {
+ if (auto conv_params = op->builtin_options_as_Conv2DOptions())
+ {
+ os << " ";
+ os << "Padding(" << conv_params->padding() << ") ";
+ os << "Stride.W(" << conv_params->stride_w() << ") ";
+ os << "Stride.H(" << conv_params->stride_h() << ") ";
+ os << "Activation("
+ << EnumNameActivationFunctionType(conv_params->fused_activation_function()) << ")";
+ os << std::endl;
+ }
+ }
+};
+
+class DivPrinter : public OpPrinter
+{
+public:
+ void options(const tflite::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_DivOptions())
+ {
+ os << " ";
+ os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+ << ") ";
+ os << std::endl;
+ }
+ }
+};
+
+class Pool2DPrinter : public OpPrinter
+{
+public:
+ void options(const tflite::Operator *op, std::ostream &os) const override
+ {
+ if (auto pool_params = op->builtin_options_as_Pool2DOptions())
+ {
+ os << " ";
+ os << "Padding(" << pool_params->padding() << ") ";
+ os << "Stride.W(" << pool_params->stride_w() << ") ";
+ os << "Stride.H(" << pool_params->stride_h() << ") ";
+ os << "Filter.W(" << pool_params->filter_width() << ") ";
+ os << "Filter.H(" << pool_params->filter_height() << ") ";
+ os << "Activation("
+ << EnumNameActivationFunctionType(pool_params->fused_activation_function()) << ")";
+ os << std::endl;
+ }
+ }
+};
+
+class ConcatenationPrinter : public OpPrinter
+{
+public:
+ void options(const tflite::Operator *op, std::ostream &os) const override
+ {
+ if (auto *concatenation_params = op->builtin_options_as_ConcatenationOptions())
+ {
+ os << " ";
+ os << "Activation("
+ << EnumNameActivationFunctionType(concatenation_params->fused_activation_function())
+ << ") ";
+ os << "Axis(" << concatenation_params->axis() << ")";
+ os << std::endl;
+ }
+ }
+};
+
+class ReshapePrinter : public OpPrinter
+{
+public:
+ void options(const tflite::Operator *op, std::ostream &os) const override
+ {
+ if (auto *reshape_params = op->builtin_options_as_ReshapeOptions())
+ {
+ auto new_shape = tflread::as_index_vector(reshape_params->new_shape());
+ os << " ";
+ os << "NewShape(" << new_shape << ")";
+ os << std::endl;
+ }
+ }
+};
+
+class DepthwiseConv2DPrinter : public OpPrinter
+{
+public:
+ void options(const tflite::Operator *op, std::ostream &os) const override
+ {
+ if (auto conv_params = op->builtin_options_as_DepthwiseConv2DOptions())
+ {
+ os << " ";
+ os << "Padding(" << conv_params->padding() << ") ";
+ os << "Stride.W(" << conv_params->stride_w() << ") ";
+ os << "Stride.H(" << conv_params->stride_h() << ") ";
+ os << "DepthMultiplier(" << conv_params->depth_multiplier() << ") ";
+ os << "Dilation.W(" << conv_params->dilation_w_factor() << ") ";
+ os << "Dilation.H(" << conv_params->dilation_h_factor() << ")";
+ os << "Activation("
+ << EnumNameActivationFunctionType(conv_params->fused_activation_function()) << ") ";
+ os << std::endl;
+ }
+ }
+};
+
+class FullyConnectedPrinter : public OpPrinter
+{
+public:
+ void options(const tflite::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_FullyConnectedOptions())
+ {
+ os << " ";
+ os << "WeightFormat(" << EnumNameFullyConnectedOptionsWeightsFormat(params->weights_format())
+ << ") ";
+ os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+ << ") ";
+
+ os << std::endl;
+ }
+ }
+};
+
+class MulPrinter : public OpPrinter
+{
+public:
+ void options(const tflite::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_MulOptions())
+ {
+ os << " ";
+ os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+ << ") ";
+ os << std::endl;
+ }
+ }
+};
+
+class PackPrinter : public OpPrinter
+{
+public:
+ void options(const tflite::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_PackOptions())
+ {
+ os << " ";
+ os << "ValuesCount(" << params->values_count() << ") ";
+ os << "Axis(" << params->axis() << ") ";
+ os << std::endl;
+ }
+ }
+};
+
+class SoftmaxPrinter : public OpPrinter
+{
+public:
+ void options(const tflite::Operator *op, std::ostream &os) const override
+ {
+ if (auto *softmax_params = op->builtin_options_as_SoftmaxOptions())
+ {
+ os << " ";
+ os << "Beta(" << softmax_params->beta() << ")";
+ os << std::endl;
+ }
+ }
+};
+
+class SubPrinter : public OpPrinter
+{
+public:
+ void options(const tflite::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_SubOptions())
+ {
+ os << " ";
+ os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+ << ") ";
+ os << std::endl;
+ }
+ }
+};
+
+class CustomOpPrinter : public OpPrinter
+{
+public:
+ void options(const tflite::Operator *op, std::ostream &os) const override
+ {
+ if (op->custom_options_format() != tflite::CustomOptionsFormat::CustomOptionsFormat_FLEXBUFFERS)
+ {
+ os << " ";
+ os << "Unknown custom option format";
+ return;
+ }
+
+ const flatbuffers::Vector<uint8_t> *option_buf = op->custom_options();
+
+ if (option_buf == nullptr || option_buf->size() == 0)
+ {
+ os << "No attrs found." << std::endl;
+ return;
+ }
+
+ // printing attrs
+ // attrs of custom ops are encoded in flexbuffer format
+ auto attr_map = flexbuffers::GetRoot(option_buf->data(), option_buf->size()).AsMap();
+
+ os << " ";
+ auto keys = attr_map.Keys();
+ for (int i = 0; i < keys.size(); i++)
+ {
+ auto key = keys[i].ToString();
+ os << key << "(" << attr_map[key].ToString() << ") ";
+ }
+
+ // Note: attr in "Shape" type does not seem to be converted by tflite_convert.
+ // When the converted tflite file (with custom op) is opened with hexa editory,
+ // attrs names can be found but attr name in "Shape" type is not found.
+
+ os << std::endl;
+ }
+};
+
+OpPrinterRegistry::OpPrinterRegistry()
+{
+ _op_map[tflite::BuiltinOperator_ADD] = make_unique<AddPrinter>();
+ _op_map[tflite::BuiltinOperator_ARG_MAX] = make_unique<ArgMaxPrinter>();
+ _op_map[tflite::BuiltinOperator_AVERAGE_POOL_2D] = make_unique<Pool2DPrinter>();
+ _op_map[tflite::BuiltinOperator_CONCATENATION] = make_unique<ConcatenationPrinter>();
+ _op_map[tflite::BuiltinOperator_CONV_2D] = make_unique<Conv2DPrinter>();
+ _op_map[tflite::BuiltinOperator_DEPTHWISE_CONV_2D] = make_unique<DepthwiseConv2DPrinter>();
+ _op_map[tflite::BuiltinOperator_DIV] = make_unique<DivPrinter>();
+ _op_map[tflite::BuiltinOperator_FULLY_CONNECTED] = make_unique<FullyConnectedPrinter>();
+ _op_map[tflite::BuiltinOperator_MAX_POOL_2D] = make_unique<Pool2DPrinter>();
+ _op_map[tflite::BuiltinOperator_MUL] = make_unique<MulPrinter>();
+ _op_map[tflite::BuiltinOperator_PACK] = make_unique<PackPrinter>();
+ // There is no Option for ReLU and ReLU6
+ _op_map[tflite::BuiltinOperator_RESHAPE] = make_unique<ReshapePrinter>();
+ _op_map[tflite::BuiltinOperator_SOFTMAX] = make_unique<SoftmaxPrinter>();
+ _op_map[tflite::BuiltinOperator_SUB] = make_unique<SubPrinter>();
+ _op_map[tflite::BuiltinOperator_CUSTOM] = make_unique<CustomOpPrinter>();
+}
+
+} // namespace tfldump
diff --git a/compiler/tfldump/src/OpPrinter.h b/compiler/tfldump/src/OpPrinter.h
new file mode 100644
index 000000000..f46b22fdc
--- /dev/null
+++ b/compiler/tfldump/src/OpPrinter.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLDUMP_OPPRINTER_H__
+#define __TFLDUMP_OPPRINTER_H__
+
+#include <mio/tflite/schema_generated.h>
+
+#include <ostream>
+#include <map>
+
+namespace tfldump
+{
+
+class OpPrinter
+{
+public:
+ virtual void options(const tflite::Operator *, std::ostream &) const {};
+};
+
+class OpPrinterRegistry
+{
+public:
+ OpPrinterRegistry();
+
+public:
+ const OpPrinter *lookup(tflite::BuiltinOperator op) const
+ {
+ if (_op_map.find(op) == _op_map.end())
+ return nullptr;
+
+ return _op_map.at(op).get();
+ }
+
+public:
+ static OpPrinterRegistry &get()
+ {
+ static OpPrinterRegistry me;
+ return me;
+ }
+
+private:
+ std::map<tflite::BuiltinOperator, std::unique_ptr<OpPrinter>> _op_map;
+};
+
+} // namespace tfldump
+
+#endif // __TFLDUMP_OPPRINTER_H__
diff --git a/compiler/tfldump/src/Read.cpp b/compiler/tfldump/src/Read.cpp
new file mode 100644
index 000000000..f9782d9ef
--- /dev/null
+++ b/compiler/tfldump/src/Read.cpp
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Read.h"
+
+#include <sstream>
+#include <string>
+
+namespace tflread
+{
+
+bool is_valid(const tflite::OperatorCode *opcode)
+{
+ tflite::BuiltinOperator code = opcode->builtin_code();
+ return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX);
+}
+
+bool is_custom(const tflite::OperatorCode *opcode)
+{
+ tflite::BuiltinOperator code = opcode->builtin_code();
+ return (code == tflite::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const tflite::OperatorCode *opcode)
+{
+ assert(opcode);
+
+ if (!is_valid(opcode))
+ {
+ std::ostringstream oss;
+ oss << "(invalid)";
+ return oss.str();
+ }
+
+ if (is_custom(opcode))
+ {
+ if (!opcode->custom_code())
+ return "(invalid custom)";
+
+ std::string custom_op = "CUSTOM(";
+ custom_op += opcode->custom_code()->c_str();
+ custom_op += ")";
+ return custom_op;
+ }
+
+ tflite::BuiltinOperator code = opcode->builtin_code();
+ return tflite::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const tflite::Tensor *tensor)
+{
+ return tflite::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const tflite::Tensor *tensor)
+{
+ static const char *kEmptyTensorName = "(noname)";
+
+ auto name = tensor->name();
+ if (name)
+ return name->c_str();
+
+ return kEmptyTensorName;
+}
+
+Reader::Reader(const tflite::Model *model)
+{
+ _version = model->version();
+ _subgraphs = model->subgraphs();
+ _buffers = model->buffers();
+
+ auto opcodes = model->operator_codes();
+ for (const ::tflite::OperatorCode *opcode : *opcodes)
+ {
+ _op_codes.push_back(opcode);
+ }
+}
+
+size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data)
+{
+ *buff_data = nullptr;
+
+ if (buf_idx == 0)
+ return 0;
+
+ if (auto *buffer = (*_buffers)[buf_idx])
+ {
+ if (auto *array = buffer->data())
+ {
+ if (size_t size = array->size())
+ {
+ *buff_data = reinterpret_cast<const uint8_t *>(array->data());
+ return size;
+ }
+ }
+ }
+
+ return 0;
+}
+
+tflite::BuiltinOperator Reader::builtin_code(const tflite::Operator *op) const
+{
+ uint32_t index = op->opcode_index();
+ assert(index < _op_codes.size());
+ const tflite::OperatorCode *opcode = _op_codes.at(index);
+
+ return opcode->builtin_code();
+}
+
+std::string Reader::opcode_name(const tflite::Operator *op) const
+{
+ uint32_t index = op->opcode_index();
+ assert(index < _op_codes.size());
+ const tflite::OperatorCode *opcode = _op_codes.at(index);
+
+ if (!is_valid(opcode))
+ {
+ std::ostringstream oss;
+ oss << "(invalid: " << index << ")";
+ return oss.str();
+ }
+
+ return tflread::opcode_name(opcode);
+}
+
+bool Reader::select_subgraph(uint32_t sgindex)
+{
+ _subgraph_index = sgindex;
+ _tensors = nullptr;
+ _operators = nullptr;
+
+ _inputs.clear();
+ _outputs.clear();
+
+ if (_subgraphs->Length() <= sgindex)
+ {
+ assert(false);
+ return false;
+ }
+
+ const tflite::SubGraph *subgraph = (*_subgraphs)[sgindex];
+
+ auto name = subgraph->name();
+ _subgraph_name = name ? name->c_str() : "(noname)";
+
+ _tensors = subgraph->tensors();
+ _operators = subgraph->operators();
+
+ _inputs = as_index_vector(subgraph->inputs());
+ _outputs = as_index_vector(subgraph->outputs());
+
+ return true;
+}
+
+} // namespace tflread
diff --git a/compiler/tfldump/src/Read.h b/compiler/tfldump/src/Read.h
new file mode 100644
index 000000000..7af2fa59b
--- /dev/null
+++ b/compiler/tfldump/src/Read.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLREAD_READ_H__
+#define __TFLREAD_READ_H__
+
+#include <mio/tflite/schema_generated.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+namespace tflread
+{
+
+template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
+{
+ std::vector<T> ret(flat_array->Length());
+ for (uint32_t i = 0; i < flat_array->Length(); i++)
+ {
+ ret[i] = flat_array->Get(i);
+ }
+ return ret;
+}
+
+bool is_valid(const tflite::OperatorCode *opcode);
+bool is_custom(const tflite::OperatorCode *opcode);
+std::string opcode_name(const tflite::OperatorCode *opcode);
+const char *tensor_type(const tflite::Tensor *tensor);
+const char *tensor_name(const tflite::Tensor *tensor);
+
+/**
+ * @brief Loads TF lite file and provides helpers to access attributes
+ */
+class Reader
+{
+private:
+ using TFliteSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<tflite::SubGraph>>;
+ using TFliteBuffers_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>>;
+ using TFliteTensors_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>>;
+ using TFliteOperators_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>>;
+
+public:
+ Reader(const tflite::Model *model);
+
+ Reader() = delete;
+
+public:
+ uint32_t version() const { return _version; }
+
+ const std::vector<const tflite::OperatorCode *> &opcodes() { return _op_codes; }
+ const TFliteBuffers_t *buffers() { return _buffers; }
+ const TFliteTensors_t *tensors() { return _tensors; }
+ const TFliteOperators_t *operators() { return _operators; }
+ const std::vector<int32_t> &inputs() const { return _inputs; }
+ const std::vector<int32_t> &outputs() const { return _outputs; }
+
+ uint32_t num_subgraph() const { return _subgraphs->Length(); }
+
+ size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
+ tflite::BuiltinOperator builtin_code(const tflite::Operator *op) const;
+ std::string opcode_name(const tflite::Operator *op) const;
+
+public:
+ bool select_subgraph(uint32_t subgraph);
+ const std::string &subgraph_name(void) const { return _subgraph_name; }
+ uint32_t subgraph_index(void) const { return _subgraph_index; }
+
+private:
+ uint32_t _version;
+
+ const TFliteSubGraphs_t *_subgraphs{nullptr};
+ const TFliteBuffers_t *_buffers{nullptr};
+ const TFliteTensors_t *_tensors{nullptr};
+ const TFliteOperators_t *_operators{nullptr};
+
+ uint32_t _subgraph_index;
+ std::string _subgraph_name;
+ std::vector<const tflite::OperatorCode *> _op_codes;
+ std::vector<int32_t> _inputs;
+ std::vector<int32_t> _outputs;
+};
+
+} // namespace tflread
+
+#endif // __TFLREAD_READ_H__
diff --git a/compiler/tflite2circle-conversion-test/CMakeLists.txt b/compiler/tflite2circle-conversion-test/CMakeLists.txt
new file mode 100644
index 000000000..d7b644242
--- /dev/null
+++ b/compiler/tflite2circle-conversion-test/CMakeLists.txt
@@ -0,0 +1,94 @@
+nnas_include(TargetRequire)
+
+unset(REQUIRED_TARGETS)
+list(APPEND REQUIRED_TARGETS tflite2circle)
+list(APPEND REQUIRED_TARGETS tflchef)
+TargetRequire_Return(${REQUIRED_TARGETS})
+
+nncc_find_resource(TensorFlowLiteRecipes)
+
+set(TEST_REPO "${TensorFlowLiteRecipes_DIR}")
+set(TEST_RECIPE_FILENAME "test.recipe")
+
+unset(TESTCASES)
+macro(add NAME)
+ list(APPEND TESTCASES ${NAME})
+endmacro(add)
+
+# Read "test.lst"
+include("test.lst")
+
+unset(TEST_DEPS)
+unset(TEST_NAMES)
+
+foreach(PREFIX IN ITEMS ${TESTCASES})
+ if(NOT IS_DIRECTORY "${TEST_REPO}/${PREFIX}")
+ message(FATAL_ERROR "Missing '${PREFIX}' test")
+ endif()
+
+ set(RECIPE_SOURCE_PATH "${TEST_REPO}/${PREFIX}/${TEST_RECIPE_FILENAME}")
+ set(RECIPE_FILE "${PREFIX}.recipe")
+ set(RECIPE_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE_FILE}")
+
+ set(TFLITE_FILE "${PREFIX}.tflite")
+ set(TFLITE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${TFLITE_FILE}")
+
+ # Copy .recipe
+ add_custom_command(OUTPUT ${RECIPE_BINARY_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${RECIPE_SOURCE_PATH}" "${RECIPE_BINARY_PATH}"
+ DEPENDS ${RECIPE_SOURCE_PATH}
+ COMMENT "Generate ${RECIPE_FILE}"
+ )
+
+ # Generate .tflite
+ add_custom_command(OUTPUT ${TFLITE_OUTPUT_PATH}
+ COMMAND $<TARGET_FILE:tflchef-file> ${RECIPE_BINARY_PATH} ${TFLITE_OUTPUT_PATH}
+ DEPENDS ${RECIPE_BINARY_PATH}
+ COMMENT "Generate ${PREFIX}.tflite"
+ )
+
+ list(APPEND TEST_DEPS ${RECIPE_BINARY_PATH} ${TFLITE_OUTPUT_PATH})
+ list(APPEND TEST_NAMES ${PREFIX})
+endforeach(PREFIX IN ITEMS ${TESTCASES})
+
+##
+## Copy testall
+##
+set(TEST_RUNNER "${CMAKE_CURRENT_BINARY_DIR}/testall.sh")
+set(TEST_RUNNER_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh")
+
+add_custom_command(
+ OUTPUT ${TEST_RUNNER}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TEST_RUNNER_SOURCE}" "${TEST_RUNNER}"
+ DEPENDS ${TEST_RUNNER_SOURCE}
+ COMMENT "Generate test runner"
+)
+
+list(APPEND TEST_DEPS "${TEST_RUNNER}")
+
+###
+### Generate test.config
+###
+set(TEST_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/test.config")
+
+add_custom_command(
+ OUTPUT ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'TFLITE2CIRCLE_PATH=\"$<TARGET_FILE:tflite2circle>\"' >> ${TEST_CONFIG}
+ DEPENDS tflite2circle
+ COMMENT "Generate test configuration"
+)
+
+list(APPEND TEST_DEPS "${TEST_CONFIG}")
+
+# This "tflite2circle_conversion_test_deps" target enforces CMake to generate all the dependencies during "build" phase
+add_custom_target(tflite2circle_conversion_test_deps ALL DEPENDS ${TEST_DEPS})
+
+# Run tests
+add_test(
+ NAME tflite2circle_conversion_test
+ COMMAND "${TEST_RUNNER}"
+ "${TEST_CONFIG}"
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ ${TEST_NAMES}
+)
diff --git a/compiler/tflite2circle-conversion-test/README.md b/compiler/tflite2circle-conversion-test/README.md
new file mode 100644
index 000000000..e6b4cd223
--- /dev/null
+++ b/compiler/tflite2circle-conversion-test/README.md
@@ -0,0 +1,3 @@
+# tflite2circle-conversion-test
+
+Run `tflite2circle` to check whether _tflite_ model is able to be converted into _circle_ model.
diff --git a/compiler/tflite2circle-conversion-test/requires.cmake b/compiler/tflite2circle-conversion-test/requires.cmake
new file mode 100644
index 000000000..730711aca
--- /dev/null
+++ b/compiler/tflite2circle-conversion-test/requires.cmake
@@ -0,0 +1,2 @@
+require("tflite2circle")
+require("tflchef")
diff --git a/compiler/tflite2circle-conversion-test/test.lst b/compiler/tflite2circle-conversion-test/test.lst
new file mode 100644
index 000000000..8b8f0aaf3
--- /dev/null
+++ b/compiler/tflite2circle-conversion-test/test.lst
@@ -0,0 +1,20 @@
+add(Add_000)
+add(AveragePool2D_000)
+add(Concatenation_000)
+add(Conv2D_000)
+add(Conv2D_001)
+add(Conv2D_U8_000)
+add(DepthwiseConv2D_000)
+add(Div_000)
+add(FullyConnected_000)
+add(FullyConnected_001)
+add(MaxPool2D_000)
+add(Quantization_000)
+add(ReLU_000)
+add(ReLU6_000)
+add(Reshape_000)
+add(Reshape_001)
+add(Reshape_U8_000)
+add(Sqrt_000)
+add(Sub_000)
+add(Sub_001)
diff --git a/compiler/tflite2circle-conversion-test/testall.sh b/compiler/tflite2circle-conversion-test/testall.sh
new file mode 100755
index 000000000..664543736
--- /dev/null
+++ b/compiler/tflite2circle-conversion-test/testall.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+# Need at least 2 arguments
+if [[ $# -lt 2 ]]; then
+ echo "USAGE: $0 ..."
+ echo
+ echo "ARGUMENTS:"
+ echo " [test.config path]"
+ echo " [WORKDIR]"
+ echo " [Prefix1]"
+ echo " [Prefix2]"
+ echo " ..."
+ exit 255
+fi
+
+CONFIG_PATH="$1"; shift
+WORKDIR="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found TFLITE2CIRCLE: ${TFLITE2CIRCLE_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [[ $# -ne 0 ]]; do
+ PREFIX="$1"; shift
+
+ TESTED+=("${PREFIX}")
+
+ PASSED_TAG="${PREFIX}.passed"
+
+ rm -f "${PASSED_TAG}"
+
+ cat > "${PREFIX}.log" <(
+ exec 2>&1
+
+ echo "-- Found tflite: ${PREFIX}.tflite"
+
+ # Exit immediately if any command fails
+ set -e
+ # Show commands
+ set -x
+
+ # Generate circle
+ "${TFLITE2CIRCLE_PATH}" \
+ "${WORKDIR}/${PREFIX}.tflite" \
+ "${WORKDIR}/${PREFIX}.circle"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("$PREFIX")
+ else
+ FAILED+=("$PREFIX")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/tflite2circle/CMakeLists.txt b/compiler/tflite2circle/CMakeLists.txt
new file mode 100644
index 000000000..f846a2bc5
--- /dev/null
+++ b/compiler/tflite2circle/CMakeLists.txt
@@ -0,0 +1,16 @@
+nnas_include(TargetRequire)
+
+unset(REQUIRED_TARGETS)
+list(APPEND REQUIRED_TARGETS mio_tflite)
+list(APPEND REQUIRED_TARGETS mio_circle)
+TargetRequire_Return(${REQUIRED_TARGETS})
+
+set(DRIVER "driver/Driver.cpp")
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+add_executable(tflite2circle ${DRIVER} ${SOURCES})
+target_include_directories(tflite2circle PRIVATE include)
+target_include_directories(tflite2circle PRIVATE src)
+target_link_libraries(tflite2circle safemain)
+target_link_libraries(tflite2circle stdex)
+target_link_libraries(tflite2circle mio_tflite)
+target_link_libraries(tflite2circle mio_circle)
diff --git a/compiler/tflite2circle/README.md b/compiler/tflite2circle/README.md
new file mode 100644
index 000000000..f6eba73d3
--- /dev/null
+++ b/compiler/tflite2circle/README.md
@@ -0,0 +1,11 @@
+# tflite2circle
+
+_tflite2circle_ is a Tensorflow Lite to Circle model converter.
+
+## Usage
+
+Provide _tflite_ file input path and _circle_ file output path as a parameter to convert.
+
+```
+$ tflite2circle in.tflite out.circle
+```
diff --git a/compiler/tflite2circle/driver/Driver.cpp b/compiler/tflite2circle/driver/Driver.cpp
new file mode 100644
index 000000000..826f9dee7
--- /dev/null
+++ b/compiler/tflite2circle/driver/Driver.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <vector>
+
+#include "CircleModel.h"
+#include "TFLModel.h"
+
+int entry(int argc, char **argv)
+{
+ if (argc != 3)
+ {
+ std::cerr << "ERROR: Failed to parse arguments" << std::endl;
+ std::cerr << std::endl;
+ std::cerr << "USAGE: " << argv[0] << " [tflite] [circle]" << std::endl;
+ return 255;
+ }
+
+ // read tflite file
+ tflite2circle::TFLModel tfl_model(argv[1]);
+ if (!tfl_model.is_valid())
+ {
+ std::cerr << "ERROR: Failed to load tflite '" << argv[1] << "'" << std::endl;
+ return 255;
+ }
+
+ // create flatbuffer builder
+ auto flatbuffer_builder = stdex::make_unique<flatbuffers::FlatBufferBuilder>(1024);
+
+ // convert tflite to circle
+ tflite2circle::CircleModel circle_model{flatbuffer_builder, tfl_model};
+
+ std::ofstream outfile{argv[2], std::ios::binary};
+
+ outfile.write(circle_model.base(), circle_model.size());
+ outfile.close();
+ // TODO find a better way of error handling
+ if (outfile.fail())
+ {
+ std::cerr << "ERROR: Failed to write circle '" << argv[1] << "'" << std::endl;
+ return 255;
+ }
+
+ return 0;
+}
diff --git a/compiler/tflite2circle/include/CircleModel.h b/compiler/tflite2circle/include/CircleModel.h
new file mode 100644
index 000000000..ee1a8fe75
--- /dev/null
+++ b/compiler/tflite2circle/include/CircleModel.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_MODEL_H__
+#define __CIRCLE_MODEL_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+#include <iostream>
+#include <string>
+#include <vector>
+#include <stdex/Memory.h>
+
+#include "TFLModel.h"
+
+namespace tflite2circle
+{
+
+using FlatBufBuilder = std::unique_ptr<flatbuffers::FlatBufferBuilder>;
+
+struct OperatorCodeLink
+{
+ using TFL = flatbuffers::Offset<::tflite::OperatorCode>;
+ using CIR = flatbuffers::Offset<::circle::OperatorCode>;
+};
+
+struct SubGraphLink
+{
+ using TFL = flatbuffers::Offset<::tflite::SubGraph>;
+ using CIR = flatbuffers::Offset<::circle::SubGraph>;
+};
+
+struct BufferLink
+{
+ using TFL = flatbuffers::Offset<::tflite::Buffer>;
+ using CIR = flatbuffers::Offset<::circle::Buffer>;
+};
+
+struct MetaDataBufferLink
+{
+ using TFL = int32_t;
+ using CIR = int32_t;
+};
+
+template <typename T> class Offset
+{
+private:
+ using TFLFlatBufVec = flatbuffers::Vector<typename T::TFL>;
+ using CIRFlatBufVecOffset = flatbuffers::Offset<flatbuffers::Vector<typename T::CIR>>;
+
+public:
+ Offset(void) = delete;
+ Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec);
+
+public:
+ CIRFlatBufVecOffset offset(void) const { return _circle_flatbuffer_vec_offset; }
+
+private:
+ CIRFlatBufVecOffset _circle_flatbuffer_vec_offset;
+};
+
+class CircleModel
+{
+private:
+ using Description = flatbuffers::Offset<flatbuffers::String>;
+
+public:
+ CircleModel(void) = delete;
+ CircleModel(FlatBufBuilder &fb, TFLModel &tfl_model);
+
+public:
+ void model_build(void) const;
+ const char *base(void) const;
+ size_t size(void) const;
+
+private:
+ uint32_t _version;
+ Description _description;
+ FlatBufBuilder &_fb;
+ std::unique_ptr<Offset<OperatorCodeLink>> _operator_codes_offset;
+ std::unique_ptr<Offset<SubGraphLink>> _subGraphs_offset;
+ std::unique_ptr<Offset<BufferLink>> _buffers_offset;
+ std::unique_ptr<Offset<MetaDataBufferLink>> _metadata_buffer_offset;
+};
+
+} // namespace tflite2circle
+
+#endif // __CIRCLE_MODEL_H__
diff --git a/compiler/tflite2circle/include/TFLModel.h b/compiler/tflite2circle/include/TFLModel.h
new file mode 100644
index 000000000..e53d62749
--- /dev/null
+++ b/compiler/tflite2circle/include/TFLModel.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFL_MODEL_H__
+#define __TFL_MODEL_H__
+
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include <mio/tflite/schema_generated.h>
+
+namespace tflite2circle
+{
+
+class TFLModel
+{
+private:
+ using DataBuffer = std::vector<char>;
+
+public:
+ TFLModel(void) = delete;
+ TFLModel(const std::string &path);
+
+public:
+ bool is_valid(void) { return _valid; }
+
+private:
+ const tflite::Model *load_model(void);
+
+private:
+ std::ifstream _infile;
+ DataBuffer _data;
+ bool _valid;
+
+ friend class CircleModel;
+};
+
+} // namespace tflite2circle
+
+#endif // __TFL_MODEL_H__
diff --git a/compiler/tflite2circle/requires.cmake b/compiler/tflite2circle/requires.cmake
new file mode 100644
index 000000000..dd67319b8
--- /dev/null
+++ b/compiler/tflite2circle/requires.cmake
@@ -0,0 +1,4 @@
+require("mio-tflite")
+require("mio-circle")
+require("safemain")
+require("stdex")
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions.h
new file mode 100644
index 000000000..0167da284
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BUILD_BUITIN_OPTIONS_H__
+#define __BUILD_BUITIN_OPTIONS_H__
+
+// NOTE please add new option headers in alphabetical order
+
+#include "BuildBuiltinOptions/AbsOptions.h"
+#include "BuildBuiltinOptions/AddOptions.h"
+#include "BuildBuiltinOptions/ArgMaxOptions.h"
+#include "BuildBuiltinOptions/BatchToSpaceNDOptions.h"
+#include "BuildBuiltinOptions/CastOptions.h"
+#include "BuildBuiltinOptions/ConcatenationOptions.h"
+#include "BuildBuiltinOptions/Conv2DOptions.h"
+#include "BuildBuiltinOptions/CosOptions.h"
+#include "BuildBuiltinOptions/DepthwiseConv2DOptions.h"
+#include "BuildBuiltinOptions/DivOptions.h"
+#include "BuildBuiltinOptions/EqualOptions.h"
+#include "BuildBuiltinOptions/ExpandDimsOptions.h"
+#include "BuildBuiltinOptions/ExpOptions.h"
+#include "BuildBuiltinOptions/FillOptions.h"
+#include "BuildBuiltinOptions/FullyConnectedOptions.h"
+#include "BuildBuiltinOptions/GreaterEqualOptions.h"
+#include "BuildBuiltinOptions/LogicalNotOptions.h"
+#include "BuildBuiltinOptions/LogicalOrOptions.h"
+#include "BuildBuiltinOptions/MulOptions.h"
+#include "BuildBuiltinOptions/NotEqualOptions.h"
+#include "BuildBuiltinOptions/PackOptions.h"
+#include "BuildBuiltinOptions/PadOptions.h"
+#include "BuildBuiltinOptions/Pool2DOptions.h"
+#include "BuildBuiltinOptions/ReducerOptions.h"
+#include "BuildBuiltinOptions/ReshapeOptions.h"
+#include "BuildBuiltinOptions/ShapeOptions.h"
+#include "BuildBuiltinOptions/SoftmaxOptions.h"
+#include "BuildBuiltinOptions/SplitOptions.h"
+#include "BuildBuiltinOptions/SqueezeOptions.h"
+#include "BuildBuiltinOptions/SubOptions.h"
+#include "BuildBuiltinOptions/TransposeOptions.h"
+
+#endif // __BUILD_BUITIN_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/AbsOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/AbsOptions.cpp
new file mode 100644
index 000000000..70aa06d3e
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/AbsOptions.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AbsOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::AbsOptions> build_circle_AbsOptions(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *)
+{
+ circle::AbsOptionsBuilder builtin_options_builder{fb};
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/AbsOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/AbsOptions.h
new file mode 100644
index 000000000..a118eb52b
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/AbsOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_ABS_OPTIONS_H__
+#define __BBO_ABS_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::AbsOptions> build_circle_AbsOptions(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_ABS_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/AddOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/AddOptions.cpp
new file mode 100644
index 000000000..f93a0f21f
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/AddOptions.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AddOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::AddOptions> build_circle_AddOptions(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_AddOptions();
+ assert(tflite_builtin_options);
+ circle::AddOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_fused_activation_function(
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/AddOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/AddOptions.h
new file mode 100644
index 000000000..bbfa03f0d
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/AddOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_ADD_OPTIONS_H__
+#define __BBO_ADD_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::AddOptions> build_circle_AddOptions(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_ADD_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMaxOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMaxOptions.cpp
new file mode 100644
index 000000000..0ccdde4cb
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMaxOptions.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ArgMaxOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::ArgMaxOptions>
+build_circle_ArgMaxOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_ArgMaxOptions();
+ assert(tflite_builtin_options);
+ circle::ArgMaxOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_output_type(
+ get_circle_tensortype(tflite_builtin_options->output_type()));
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMaxOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMaxOptions.h
new file mode 100644
index 000000000..2c8476252
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMaxOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_ARGMAX_OPTIONS_H__
+#define __BBO_ARGMAX_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::ArgMaxOptions>
+build_circle_ArgMaxOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_ARGMAX_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/BatchToSpaceNDOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/BatchToSpaceNDOptions.cpp
new file mode 100644
index 000000000..29799de70
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/BatchToSpaceNDOptions.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BatchToSpaceNDOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::BatchToSpaceNDOptions>
+build_circle_BatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *)
+{
+ circle::BatchToSpaceNDOptionsBuilder builtin_options_builder{fb};
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/BatchToSpaceNDOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/BatchToSpaceNDOptions.h
new file mode 100644
index 000000000..ca720f03c
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/BatchToSpaceNDOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_BATCH_TO_SPACE_ND_OPTIONS_H__
+#define __BBO_BATCH_TO_SPACE_ND_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::BatchToSpaceNDOptions>
+build_circle_BatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_BATCH_TO_SPACE_ND_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/CastOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/CastOptions.cpp
new file mode 100644
index 000000000..f07fb3b1f
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/CastOptions.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CastOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::CastOptions>
+build_circle_CastOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_CastOptions();
+ assert(tflite_builtin_options);
+ circle::CastOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_in_data_type(
+ get_circle_tensortype(tflite_builtin_options->in_data_type()));
+ builtin_options_builder.add_out_data_type(
+ get_circle_tensortype(tflite_builtin_options->out_data_type()));
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/CastOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/CastOptions.h
new file mode 100644
index 000000000..a8cc5441b
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/CastOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_CAST_OPTIONS_H__
+#define __BBO_CAST_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::CastOptions>
+build_circle_CastOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_CAST_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ConcatenationOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/ConcatenationOptions.cpp
new file mode 100644
index 000000000..933e7cf66
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ConcatenationOptions.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConcatenationOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::ConcatenationOptions>
+build_circle_ConcatenationOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_ConcatenationOptions();
+ assert(tflite_builtin_options);
+ circle::ConcatenationOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_axis(tflite_builtin_options->axis());
+ builtin_options_builder.add_fused_activation_function(
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ConcatenationOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/ConcatenationOptions.h
new file mode 100644
index 000000000..004e6433c
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ConcatenationOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_CONCATENATION_OPTIONS_H__
+#define __BBO_CONCATENATION_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::ConcatenationOptions>
+build_circle_ConcatenationOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_CONCATENATION_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/Conv2DOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/Conv2DOptions.cpp
new file mode 100644
index 000000000..ace63dd26
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/Conv2DOptions.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Conv2DOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::Conv2DOptions>
+build_circle_Conv2DOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_Conv2DOptions();
+ assert(tflite_builtin_options);
+ circle::Conv2DOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_padding(get_circle_padding(tflite_builtin_options->padding()));
+ builtin_options_builder.add_stride_w(tflite_builtin_options->stride_w());
+ builtin_options_builder.add_stride_h(tflite_builtin_options->stride_h());
+ builtin_options_builder.add_fused_activation_function(
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ builtin_options_builder.add_dilation_w_factor(tflite_builtin_options->dilation_w_factor());
+ builtin_options_builder.add_dilation_h_factor(tflite_builtin_options->dilation_h_factor());
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/Conv2DOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/Conv2DOptions.h
new file mode 100644
index 000000000..1f2bdc169
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/Conv2DOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_CONV2D_OPTIONS_H__
+#define __BBO_CONV2D_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::Conv2DOptions>
+build_circle_Conv2DOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_CONV2D_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/CosOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/CosOptions.cpp
new file mode 100644
index 000000000..9f6ebf16b
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/CosOptions.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CosOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::CosOptions> build_circle_CosOptions(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op)
+{
+ circle::CosOptionsBuilder builtin_options_builder{fb};
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/CosOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/CosOptions.h
new file mode 100644
index 000000000..bc15573b9
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/CosOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_COS_OPTIONS_H__
+#define __BBO_COS_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::CosOptions> build_circle_CosOptions(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_COS_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DepthwiseConv2DOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/DepthwiseConv2DOptions.cpp
new file mode 100644
index 000000000..2aa35abc6
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DepthwiseConv2DOptions.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthwiseConv2DOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::DepthwiseConv2DOptions>
+build_circle_DepthwiseConv2DOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_DepthwiseConv2DOptions();
+ assert(tflite_builtin_options);
+ circle::DepthwiseConv2DOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_padding(get_circle_padding(tflite_builtin_options->padding()));
+ builtin_options_builder.add_stride_w(tflite_builtin_options->stride_w());
+ builtin_options_builder.add_stride_h(tflite_builtin_options->stride_h());
+ builtin_options_builder.add_depth_multiplier(tflite_builtin_options->depth_multiplier());
+ builtin_options_builder.add_fused_activation_function(
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ builtin_options_builder.add_dilation_w_factor(tflite_builtin_options->dilation_w_factor());
+ builtin_options_builder.add_dilation_h_factor(tflite_builtin_options->dilation_h_factor());
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DepthwiseConv2DOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/DepthwiseConv2DOptions.h
new file mode 100644
index 000000000..2d92993df
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DepthwiseConv2DOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_DEPTHWISECONV2D_OPTIONS_H__
+#define __BBO_DEPTHWISECONV2D_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::DepthwiseConv2DOptions>
+build_circle_DepthwiseConv2DOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_DEPTHWISECONV2D_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DivOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/DivOptions.cpp
new file mode 100644
index 000000000..4272fe144
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DivOptions.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DivOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::DivOptions> build_circle_DivOptions(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_DivOptions();
+ assert(tflite_builtin_options);
+ circle::DivOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_fused_activation_function(
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DivOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/DivOptions.h
new file mode 100644
index 000000000..2b2bc57dc
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DivOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_DIV_OPTIONS_H__
+#define __BBO_DIV_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::DivOptions> build_circle_DivOptions(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_DIV_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/EqualOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/EqualOptions.cpp
new file mode 100644
index 000000000..cc2efa4fa
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/EqualOptions.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EqualOptions.h"
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::EqualOptions>
+build_circle_EqualOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ circle::EqualOptionsBuilder builtin_options_builder{fb};
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/EqualOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/EqualOptions.h
new file mode 100644
index 000000000..8572157f2
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/EqualOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_EQUAL_OPTIONS_H__
+#define __BBO_EQUAL_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::EqualOptions>
+build_circle_EqualOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_EQUAL_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ExpOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/ExpOptions.cpp
new file mode 100644
index 000000000..20a64c714
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ExpOptions.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExpOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::ExpOptions> build_circle_ExpOptions(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *)
+{
+ circle::ExpOptionsBuilder builtin_options_builder{fb};
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ExpOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/ExpOptions.h
new file mode 100644
index 000000000..2279083fb
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ExpOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_EXP_OPTIONS_H__
+#define __BBO_EXP_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::ExpOptions> build_circle_ExpOptions(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_EXP_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ExpandDimsOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/ExpandDimsOptions.cpp
new file mode 100644
index 000000000..ee2af2c09
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ExpandDimsOptions.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExpandDimsOptions.h"
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::ExpandDimsOptions>
+build_circle_ExpandDimsOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ circle::ExpandDimsOptionsBuilder builtin_options_builder{fb};
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ExpandDimsOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/ExpandDimsOptions.h
new file mode 100644
index 000000000..814e50374
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ExpandDimsOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_EXPANDDIMS_OPTIONS_H__
+#define __BBO_EXPANDDIMS_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::ExpandDimsOptions>
+build_circle_ExpandDimsOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_EXPANDDIMS_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/FillOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/FillOptions.cpp
new file mode 100644
index 000000000..919c57cdc
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/FillOptions.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FillOptions.h"
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::FillOptions>
+build_circle_FillOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ circle::FillOptionsBuilder builtin_options_builder{fb};
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/FillOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/FillOptions.h
new file mode 100644
index 000000000..81eb5e3a2
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/FillOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_FILL_OPTIONS_H__
+#define __BBO_FILL_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::FillOptions>
+build_circle_FillOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_FILL_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp
new file mode 100644
index 000000000..098a96a40
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::FullyConnectedOptions>
+build_circle_FullyConnectedOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_FullyConnectedOptions();
+ assert(tflite_builtin_options);
+ circle::FullyConnectedOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_fused_activation_function(
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ // Get FullyConnectedOptionsWeightsFormat
+ auto tflite_weight_format = tflite_builtin_options->weights_format();
+ if (tflite_weight_format == tflite::FullyConnectedOptionsWeightsFormat_DEFAULT)
+ builtin_options_builder.add_weights_format(circle::FullyConnectedOptionsWeightsFormat_DEFAULT);
+ else if (tflite_weight_format == tflite::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8)
+ builtin_options_builder.add_weights_format(
+ circle::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8);
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.h
new file mode 100644
index 000000000..8f4ac43cf
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_FULLYCONNECTED_OPTIONS_H__
+#define __BBO_FULLYCONNECTED_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::FullyConnectedOptions>
+build_circle_FullyConnectedOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_FULLYCONNECTED_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/GreaterEqualOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/GreaterEqualOptions.cpp
new file mode 100644
index 000000000..6c890e579
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/GreaterEqualOptions.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GreaterEqualOptions.h"
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::GreaterEqualOptions>
+build_circle_GreaterEqualOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ circle::GreaterEqualOptionsBuilder builtin_options_builder{fb};
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/GreaterEqualOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/GreaterEqualOptions.h
new file mode 100644
index 000000000..bddb55371
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/GreaterEqualOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_GREATEREQUAL_OPTIONS_H__
+#define __BBO_GREATEREQUAL_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::GreaterEqualOptions>
+build_circle_GreaterEqualOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_GREATEREQUAL_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/LogicalNotOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/LogicalNotOptions.cpp
new file mode 100644
index 000000000..fc17fd5b1
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/LogicalNotOptions.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LogicalNotOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::LogicalNotOptions>
+build_circle_LogicalNotOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ circle::LogicalNotOptionsBuilder builtin_options_builder{fb};
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/LogicalNotOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/LogicalNotOptions.h
new file mode 100644
index 000000000..da830b781
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/LogicalNotOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_LOGICALNOT_OPTIONS_H__
+#define __BBO_LOGICALNOT_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::LogicalNotOptions>
+build_circle_LogicalNotOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_LOGICALNOT_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/LogicalOrOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/LogicalOrOptions.cpp
new file mode 100644
index 000000000..6c3a6b3a6
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/LogicalOrOptions.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LogicalOrOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::LogicalOrOptions>
+build_circle_LogicalOrOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ circle::LogicalOrOptionsBuilder builtin_options_builder{fb};
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/LogicalOrOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/LogicalOrOptions.h
new file mode 100644
index 000000000..59ba914c2
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/LogicalOrOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_LOGICALOR_OPTIONS_H__
+#define __BBO_LOGICALOR_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::LogicalOrOptions>
+build_circle_LogicalOrOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_LOGICALOR_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/MulOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/MulOptions.cpp
new file mode 100644
index 000000000..009daea8b
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/MulOptions.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AddOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::MulOptions> build_circle_MulOptions(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_MulOptions();
+ assert(tflite_builtin_options);
+ circle::MulOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_fused_activation_function(
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/MulOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/MulOptions.h
new file mode 100644
index 000000000..18f1e4967
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/MulOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_MUL_OPTIONS_H__
+#define __BBO_MUL_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::MulOptions> build_circle_MulOptions(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_MUL_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/NotEqualOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/NotEqualOptions.cpp
new file mode 100644
index 000000000..8ed3bb0bb
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/NotEqualOptions.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NotEqualOptions.h"
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::NotEqualOptions>
+build_circle_NotEqualOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ circle::NotEqualOptionsBuilder builtin_options_builder{fb};
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/NotEqualOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/NotEqualOptions.h
new file mode 100644
index 000000000..f7533ec06
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/NotEqualOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_NOTEQUAL_OPTIONS_H__
+#define __BBO_NOTEQUAL_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::NotEqualOptions>
+build_circle_NotEqualOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_NOTEQUAL_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/PackOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/PackOptions.cpp
new file mode 100644
index 000000000..7950f62e9
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/PackOptions.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PackOptions.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::PackOptions>
+build_circle_PackOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_PackOptions();
+ assert(tflite_builtin_options);
+ circle::PackOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_values_count(tflite_builtin_options->values_count());
+ builtin_options_builder.add_axis(tflite_builtin_options->axis());
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/PackOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/PackOptions.h
new file mode 100644
index 000000000..7bf7643ef
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/PackOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_PACK_OPTIONS_H__
+#define __BBO_PACK_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::PackOptions>
+build_circle_PackOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_PACK_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/PadOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/PadOptions.cpp
new file mode 100644
index 000000000..1214390c1
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/PadOptions.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PadOptions.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::PadOptions> build_circle_PadOptions(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op)
+{
+ circle::PadOptionsBuilder builtin_options_builder{fb};
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/PadOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/PadOptions.h
new file mode 100644
index 000000000..a22b0a126
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/PadOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_PAD_OPTIONS_H__
+#define __BBO_PAD_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::PadOptions> build_circle_PadOptions(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_PAD_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/Pool2DOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/Pool2DOptions.cpp
new file mode 100644
index 000000000..6b0bd1288
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/Pool2DOptions.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Pool2DOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::Pool2DOptions>
+build_circle_Pool2DOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_Pool2DOptions();
+ assert(tflite_builtin_options);
+ circle::Pool2DOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_padding(get_circle_padding(tflite_builtin_options->padding()));
+ builtin_options_builder.add_stride_w(tflite_builtin_options->stride_w());
+ builtin_options_builder.add_stride_h(tflite_builtin_options->stride_h());
+ builtin_options_builder.add_filter_width(tflite_builtin_options->filter_width());
+ builtin_options_builder.add_filter_height(tflite_builtin_options->filter_height());
+ builtin_options_builder.add_fused_activation_function(
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/Pool2DOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/Pool2DOptions.h
new file mode 100644
index 000000000..69119f377
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/Pool2DOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_POOL2D_OPTIONS_H__
+#define __BBO_POOL2D_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::Pool2DOptions>
+build_circle_Pool2DOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_POOL2D_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ReducerOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/ReducerOptions.cpp
new file mode 100644
index 000000000..4bcb04e5b
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ReducerOptions.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReducerOptions.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::ReducerOptions>
+build_circle_ReducerOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_ReducerOptions();
+ assert(tflite_builtin_options);
+ circle::ReducerOptionsBuilder reducer_options_builder{fb};
+ reducer_options_builder.add_keep_dims(tflite_builtin_options->keep_dims());
+ return reducer_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ReducerOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/ReducerOptions.h
new file mode 100644
index 000000000..b9b39fcb6
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ReducerOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_REDUCER_OPTIONS_H__
+#define __BBO_REDUCER_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::ReducerOptions>
+build_circle_ReducerOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_REDUCER_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ReshapeOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/ReshapeOptions.cpp
new file mode 100644
index 000000000..dc8e73633
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ReshapeOptions.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReshapeOptions.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::ReshapeOptions>
+build_circle_ReshapeOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_ReshapeOptions();
+ assert(tflite_builtin_options);
+ std::vector<int32_t> new_shape_vec{tflite_builtin_options->new_shape()->begin(),
+ tflite_builtin_options->new_shape()->end()};
+ auto new_shape = fb.CreateVector(new_shape_vec);
+ circle::ReshapeOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_new_shape(new_shape);
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ReshapeOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/ReshapeOptions.h
new file mode 100644
index 000000000..55546ba3e
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ReshapeOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_RESHAPE_OPTIONS_H__
+#define __BBO_RESHAPE_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::ReshapeOptions>
+build_circle_ReshapeOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_RESHAPE_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ShapeOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/ShapeOptions.cpp
new file mode 100644
index 000000000..b1cbaba05
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ShapeOptions.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ShapeOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::ShapeOptions>
+build_circle_ShapeOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_ShapeOptions();
+ assert(tflite_builtin_options);
+ circle::ShapeOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_out_type(get_circle_tensortype(tflite_builtin_options->out_type()));
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ShapeOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/ShapeOptions.h
new file mode 100644
index 000000000..2c6e72074
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ShapeOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_SHAPE_OPTIONS_H__
+#define __BBO_SHAPE_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::ShapeOptions>
+build_circle_ShapeOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_SHAPE_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/SoftmaxOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/SoftmaxOptions.cpp
new file mode 100644
index 000000000..a1cf9abe7
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/SoftmaxOptions.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SoftmaxOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::SoftmaxOptions>
+build_circle_SoftmaxOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_SoftmaxOptions();
+ assert(tflite_builtin_options);
+ circle::SoftmaxOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_beta(tflite_builtin_options->beta());
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/SoftmaxOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/SoftmaxOptions.h
new file mode 100644
index 000000000..f4a876aaa
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/SoftmaxOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_SOFTMAX_OPTIONS_H__
+#define __BBO_SOFTMAX_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::SoftmaxOptions>
+build_circle_SoftmaxOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_SOFTMAX_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/SplitOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/SplitOptions.cpp
new file mode 100644
index 000000000..fc6c036bf
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/SplitOptions.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SplitOptions.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::SplitOptions>
+build_circle_SplitOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_SplitOptions();
+ assert(tflite_builtin_options);
+ circle::SplitOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_num_splits(tflite_builtin_options->num_splits());
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/SplitOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/SplitOptions.h
new file mode 100644
index 000000000..1bbcc397e
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/SplitOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_SPLIT_OPTIONS_H__
+#define __BBO_SPLIT_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::SplitOptions>
+build_circle_SplitOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_SPLIT_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/SqueezeOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/SqueezeOptions.cpp
new file mode 100644
index 000000000..ec29b9bf5
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/SqueezeOptions.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SqueezeOptions.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::SqueezeOptions>
+build_circle_SqueezeOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_SqueezeOptions();
+ assert(tflite_builtin_options);
+ std::vector<int32_t> squeeze_dims_vec{tflite_builtin_options->squeeze_dims()->begin(),
+ tflite_builtin_options->squeeze_dims()->end()};
+ auto squeeze_dims = fb.CreateVector(squeeze_dims_vec);
+ circle::SqueezeOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_squeeze_dims(squeeze_dims);
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/SqueezeOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/SqueezeOptions.h
new file mode 100644
index 000000000..f6237c977
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/SqueezeOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_SQUEEZE_OPTIONS_H__
+#define __BBO_SQUEEZE_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::SqueezeOptions>
+build_circle_SqueezeOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_SQUEEZE_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/SubOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/SubOptions.cpp
new file mode 100644
index 000000000..2e55f4dab
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/SubOptions.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SubOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::SubOptions> build_circle_SubOptions(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_SubOptions();
+ assert(tflite_builtin_options);
+ circle::SubOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_fused_activation_function(
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/SubOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/SubOptions.h
new file mode 100644
index 000000000..a655548d1
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/SubOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_SUB_OPTIONS_H__
+#define __BBO_SUB_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::SubOptions> build_circle_SubOptions(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_SUB_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/TransposeOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/TransposeOptions.cpp
new file mode 100644
index 000000000..427322949
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/TransposeOptions.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TransposeOptions.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::TransposeOptions>
+build_circle_TransposeOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ circle::TransposeOptionsBuilder builtin_options_builder{fb};
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/TransposeOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/TransposeOptions.h
new file mode 100644
index 000000000..bc5e5317a
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/TransposeOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_TRANSPOSE_OPTIONS_H__
+#define __BBO_TRANSPOSE_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::TransposeOptions>
+build_circle_TransposeOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_TRANSPOSE_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp
new file mode 100644
index 000000000..3a569323c
--- /dev/null
+++ b/compiler/tflite2circle/src/CircleModel.cpp
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+
+#include "CircleModel.h"
+#include "DataLookup.h"
+
+namespace tflite2circle
+{
+
+template <>
+Offset<MetaDataBufferLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+{
+ if (tflite_flatbuffer_vec == nullptr)
+ return;
+ std::vector<int32_t> metadata_buffer_vec{tflite_flatbuffer_vec->begin(),
+ tflite_flatbuffer_vec->end()};
+ _circle_flatbuffer_vec_offset = fb->CreateVector(metadata_buffer_vec);
+}
+
+template <>
+Offset<BufferLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+{
+ std::vector<flatbuffers::Offset<circle::Buffer>> buffers_vec;
+
+ for (auto it : *tflite_flatbuffer_vec)
+ {
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> buffer_data;
+ if (it->data())
+ {
+ std::vector<uint8_t> data_vec{it->data()->begin(), it->data()->end()};
+ buffer_data = fb->CreateVector(data_vec);
+ }
+ circle::BufferBuilder circle_buffer_builder{*fb};
+ circle_buffer_builder.add_data(buffer_data);
+ auto circle_buffers = circle_buffer_builder.Finish();
+ buffers_vec.emplace_back(circle_buffers);
+ }
+ _circle_flatbuffer_vec_offset = fb->CreateVector(buffers_vec);
+}
+
+template <>
+Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+{
+ std::vector<flatbuffers::Offset<circle::SubGraph>> subgprahs_vec;
+
+ for (auto it_sg : *tflite_flatbuffer_vec)
+ {
+ // tensors of subgraph
+ std::vector<flatbuffers::Offset<circle::Tensor>> tensor_vec;
+
+ auto tflite_tensors = it_sg->tensors();
+ for (auto it : *tflite_tensors)
+ {
+ // shape
+ std::vector<int32_t> shape_vec{it->shape()->begin(), it->shape()->end()};
+ auto shape = fb->CreateVector(shape_vec);
+ // name
+ flatbuffers::Offset<flatbuffers::String> name;
+ if (it->name())
+ name = fb->CreateString(it->name()->str());
+ // quantization
+ flatbuffers::Offset<circle::QuantizationParameters> quantization;
+ if (it->quantization())
+ {
+ std::vector<float> tfmin;
+ std::vector<float> tfmax;
+ std::vector<float> tfscale;
+ std::vector<int64_t> tfzerop;
+ flatbuffers::Offset<flatbuffers::Vector<float>> min;
+ flatbuffers::Offset<flatbuffers::Vector<float>> max;
+ flatbuffers::Offset<flatbuffers::Vector<float>> scale;
+ flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point;
+
+ if (it->quantization()->min() && it->quantization()->max())
+ {
+ auto rmin = it->quantization()->min();
+ auto rmax = it->quantization()->max();
+ tfmin = std::vector<float>{rmin->begin(), rmin->end()};
+ tfmax = std::vector<float>{rmax->begin(), rmax->end()};
+ min = fb->CreateVector(tfmin);
+ max = fb->CreateVector(tfmax);
+ }
+
+ if (it->quantization()->scale() && it->quantization()->zero_point())
+ {
+ auto rs = it->quantization()->scale();
+ auto rz = it->quantization()->zero_point();
+ tfscale = std::vector<float>{rs->begin(), rs->end()};
+ tfzerop = std::vector<int64_t>{rz->begin(), rz->end()};
+ scale = fb->CreateVector(tfscale);
+ zero_point = fb->CreateVector(tfzerop);
+ }
+
+ quantization = circle::CreateQuantizationParameters(*fb, min, max, scale, zero_point);
+ }
+ // is_variable
+ bool is_variable = it->is_variable();
+
+ circle::TensorBuilder tensor_builder{*fb};
+ tensor_builder.add_shape(shape);
+ tensor_builder.add_type(get_circle_tensortype(it->type()));
+ tensor_builder.add_buffer(it->buffer());
+ tensor_builder.add_name(name);
+ tensor_builder.add_quantization(quantization);
+ tensor_builder.add_is_variable(is_variable);
+ auto tensor = tensor_builder.Finish();
+ tensor_vec.emplace_back(tensor);
+ }
+ auto circle_tensors = fb->CreateVector(tensor_vec);
+
+ // inputs of subgraph
+ auto tflite_inputs = it_sg->inputs();
+ std::vector<int32_t> input_vec{tflite_inputs->begin(), tflite_inputs->end()};
+
+ auto circle_inputs = fb->CreateVector(input_vec);
+
+ // outputs of subgraph
+ auto tflite_outputs = it_sg->outputs();
+ std::vector<int32_t> output_vec{tflite_outputs->begin(), tflite_outputs->end()};
+
+ auto circle_outputs = fb->CreateVector(output_vec);
+
+ // operators of subgraph
+ std::vector<flatbuffers::Offset<circle::Operator>> operator_vec;
+
+ auto tflite_operators = it_sg->operators();
+ for (auto it : *tflite_operators)
+ {
+ // inputs
+ std::vector<int32_t> input_vec{it->inputs()->begin(), it->inputs()->end()};
+ auto circle_inputs = fb->CreateVector(input_vec);
+ // outputs
+ std::vector<int32_t> output_vec{it->outputs()->begin(), it->outputs()->end()};
+ auto circle_outputs = fb->CreateVector(output_vec);
+ // builtin options
+ auto circle_builtin_options = get_circle_builtin_options(*fb, it);
+ auto circle_builtin_options_type = get_circle_builtin_options_type(it);
+
+ circle::OperatorBuilder operator_builder{*fb};
+ operator_builder.add_opcode_index(it->opcode_index());
+ operator_builder.add_inputs(circle_inputs);
+ operator_builder.add_outputs(circle_outputs);
+ operator_builder.add_builtin_options(circle_builtin_options);
+ operator_builder.add_builtin_options_type(circle_builtin_options_type);
+ // TODO custom_options, mutating_variable_inputs
+ auto opeartor = operator_builder.Finish();
+ operator_vec.emplace_back(opeartor);
+ }
+ auto circle_operators = fb->CreateVector(operator_vec);
+
+ // name of subgraph
+ auto subgraphs_name = fb->CreateString(it_sg->name());
+
+ // subgraphs
+ auto circle_subgraph_builder = circle::SubGraphBuilder{*fb};
+
+ circle_subgraph_builder.add_tensors(circle_tensors);
+ circle_subgraph_builder.add_inputs(circle_inputs);
+ circle_subgraph_builder.add_outputs(circle_outputs);
+ circle_subgraph_builder.add_operators(circle_operators);
+ circle_subgraph_builder.add_name(subgraphs_name);
+ circle_subgraph_builder.add_data_format(circle::DataFormat_CHANNELS_LAST);
+
+ auto circle_subgraph = circle_subgraph_builder.Finish();
+ subgprahs_vec.emplace_back(circle_subgraph);
+ }
+ _circle_flatbuffer_vec_offset = fb->CreateVector(subgprahs_vec);
+}
+
+template <>
+Offset<OperatorCodeLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+{
+ std::vector<flatbuffers::Offset<circle::OperatorCode>> operator_code_vec;
+
+ for (auto it : *tflite_flatbuffer_vec)
+ {
+ auto custom_code = fb->CreateString(it->custom_code());
+ circle::OperatorCodeBuilder operator_code_builder{*fb};
+ operator_code_builder.add_builtin_code(get_circle_builtin_code(it->builtin_code()));
+ operator_code_builder.add_custom_code(custom_code);
+ operator_code_builder.add_version(it->version());
+ auto code = operator_code_builder.Finish();
+ operator_code_vec.emplace_back(code);
+ }
+ _circle_flatbuffer_vec_offset = fb->CreateVector(operator_code_vec);
+}
+
+CircleModel::CircleModel(FlatBufBuilder &fb, TFLModel &model)
+ : _version{0}, _description{fb->CreateString("nnpackage")}, _fb{fb}
+{
+ const tflite::Model *tfl_model = model.load_model();
+ _operator_codes_offset =
+ stdex::make_unique<Offset<OperatorCodeLink>>(fb, tfl_model->operator_codes());
+ _subGraphs_offset = stdex::make_unique<Offset<SubGraphLink>>(fb, tfl_model->subgraphs());
+ _buffers_offset = stdex::make_unique<Offset<BufferLink>>(fb, tfl_model->buffers());
+ _metadata_buffer_offset =
+ stdex::make_unique<Offset<MetaDataBufferLink>>(fb, tfl_model->metadata_buffer());
+ model_build();
+}
+
+void CircleModel::model_build(void) const
+{
+ circle::ModelBuilder model_builder{*_fb};
+
+ model_builder.add_version(_version);
+ model_builder.add_description(_description);
+ model_builder.add_operator_codes(_operator_codes_offset->offset());
+ model_builder.add_subgraphs(_subGraphs_offset->offset());
+ model_builder.add_buffers(_buffers_offset->offset());
+ model_builder.add_metadata_buffer(_metadata_buffer_offset->offset());
+
+ auto model = model_builder.Finish();
+ circle::FinishModelBuffer(*_fb, model);
+}
+
+const char *CircleModel::base(void) const
+{
+ return reinterpret_cast<const char *>(_fb->GetBufferPointer());
+}
+
+size_t CircleModel::size(void) const { return _fb->GetSize(); }
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/DataLookup.cpp b/compiler/tflite2circle/src/DataLookup.cpp
new file mode 100644
index 000000000..a1b780650
--- /dev/null
+++ b/compiler/tflite2circle/src/DataLookup.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DataLookup.h"
+#include "BuildBuiltinOptions.h"
+
+namespace tflite2circle
+{
+
+circle::BuiltinOperator get_circle_builtin_code(tflite::BuiltinOperator tfl_bop)
+{
+ switch (tfl_bop)
+ {
+#define TFL_OPERATOR(OP) \
+ case tflite::BuiltinOperator_##OP: \
+ return circle::BuiltinOperator_##OP;
+#include "TFLOperator.lst"
+#undef TFL_OPERATOR
+ default:
+ throw std::runtime_error("tflite2circle: wrong op");
+ }
+}
+
+circle::TensorType get_circle_tensortype(tflite::TensorType tfl_tt)
+{
+ switch (tfl_tt)
+ {
+#define TFL_TENSORTYPE(TENSORTYPE) \
+ case tflite::TensorType_##TENSORTYPE: \
+ return circle::TensorType_##TENSORTYPE;
+#include "TFLTensorType.lst"
+#undef TFL_TENSORTYPE
+ default:
+ throw std::runtime_error("tflite2circle: wrong tensor type");
+ }
+}
+
+circle::Padding get_circle_padding(tflite::Padding tfl_p)
+{
+ switch (tfl_p)
+ {
+ case tflite::Padding_SAME:
+ return circle::Padding_SAME;
+ case tflite::Padding_VALID:
+ return circle::Padding_VALID;
+ default:
+ throw std::runtime_error("tflite2circle: wrong padding");
+ }
+}
+
+circle::ActivationFunctionType
+get_circle_activation_function_type(tflite::ActivationFunctionType tfl_aft)
+{
+ switch (tfl_aft)
+ {
+#define TFL_ACTIVATION_FUNCTION(TYPE) \
+ case tflite::ActivationFunctionType_##TYPE: \
+ return circle::ActivationFunctionType_##TYPE;
+#include "TFLActivationFunctionType.lst"
+#undef TFL_ACTIVATION_FUNCTION
+ default:
+ throw std::runtime_error("tflite2circle: wrong activation function type.");
+ }
+}
+
+flatbuffers::Offset<void> get_circle_builtin_options(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op)
+{
+ auto tflite_builtin_options_type = op->builtin_options_type();
+ switch (tflite_builtin_options_type)
+ {
+ case tflite::BuiltinOptions_NONE:
+ return flatbuffers::Offset<void>();
+#define TFL_BUILTIN_OPTIONS(TYPE) \
+ case tflite::BuiltinOptions_##TYPE: \
+ return build_circle_##TYPE(fb, op).Union();
+#include "TFLBuiltinOptions.lst"
+#undef TFL_BUILTIN_OPTIONS
+ default:
+ throw std::runtime_error("tflite2circle: wrong builtin options type.");
+ }
+}
+
+circle::BuiltinOptions get_circle_builtin_options_type(const tflite::Operator *op)
+{
+ switch (op->builtin_options_type())
+ {
+ case tflite::BuiltinOptions_NONE:
+ return circle::BuiltinOptions_NONE;
+#define TFL_BUILTIN_OPTIONS(TYPE) \
+ case tflite::BuiltinOptions_##TYPE: \
+ return circle::BuiltinOptions_##TYPE;
+#include "TFLBuiltinOptions.lst"
+#undef TFL_BUILTIN_OPTIONS
+ default:
+ throw std::runtime_error("tflite2circle: wrong builtin options type.");
+ }
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/DataLookup.h b/compiler/tflite2circle/src/DataLookup.h
new file mode 100644
index 000000000..3f141ec08
--- /dev/null
+++ b/compiler/tflite2circle/src/DataLookup.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DATA_LOOKUP_H__
+#define __DATA_LOOKUP_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+circle::BuiltinOperator get_circle_builtin_code(tflite::BuiltinOperator tfl_bop);
+circle::TensorType get_circle_tensortype(tflite::TensorType tfl_tt);
+circle::Padding get_circle_padding(tflite::Padding tfl_p);
+circle::ActivationFunctionType
+get_circle_activation_function_type(tflite::ActivationFunctionType tfl_aft);
+flatbuffers::Offset<void> get_circle_builtin_options(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op);
+circle::BuiltinOptions get_circle_builtin_options_type(const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __DATA_LOOKUP_H__
diff --git a/compiler/tflite2circle/src/TFLActivationFunctionType.lst b/compiler/tflite2circle/src/TFLActivationFunctionType.lst
new file mode 100644
index 000000000..a378bdc67
--- /dev/null
+++ b/compiler/tflite2circle/src/TFLActivationFunctionType.lst
@@ -0,0 +1,12 @@
+#ifndef TFL_ACTIVATION_FUNCTION
+#error "Activation function type is not defined."
+#endif
+
+// NOTE order of function follows TensorFlow lite schema.fbs 'enum ActivationFunctionType'
+
+TFL_ACTIVATION_FUNCTION(NONE)
+TFL_ACTIVATION_FUNCTION(RELU)
+TFL_ACTIVATION_FUNCTION(RELU_N1_TO_1)
+TFL_ACTIVATION_FUNCTION(RELU6)
+TFL_ACTIVATION_FUNCTION(TANH)
+TFL_ACTIVATION_FUNCTION(SIGN_BIT)
diff --git a/compiler/tflite2circle/src/TFLBuiltinOptions.lst b/compiler/tflite2circle/src/TFLBuiltinOptions.lst
new file mode 100644
index 000000000..65c60b8ec
--- /dev/null
+++ b/compiler/tflite2circle/src/TFLBuiltinOptions.lst
@@ -0,0 +1,103 @@
+#ifndef TFL_BUILTIN_OPTIONS
+#error "TFL_BUILTIN_OPTIONS is not defined"
+#endif
+
+// NOTE order of option follows TensorFlow lite schema.fbs 'union BuiltinOptions'
+
+TFL_BUILTIN_OPTIONS(Conv2DOptions)
+TFL_BUILTIN_OPTIONS(DepthwiseConv2DOptions)
+//TFL_BUILTIN_OPTIONS(ConcatEmbeddingsOptions)
+//TFL_BUILTIN_OPTIONS(LSHProjectionOptions)
+TFL_BUILTIN_OPTIONS(Pool2DOptions)
+//TFL_BUILTIN_OPTIONS(SVDFOptions)
+//TFL_BUILTIN_OPTIONS(RNNOptions)
+TFL_BUILTIN_OPTIONS(FullyConnectedOptions)
+TFL_BUILTIN_OPTIONS(SoftmaxOptions)
+TFL_BUILTIN_OPTIONS(ConcatenationOptions)
+TFL_BUILTIN_OPTIONS(AddOptions)
+//TFL_BUILTIN_OPTIONS(L2NormOptions)
+//TFL_BUILTIN_OPTIONS(LocalResponseNormalizationOptions)
+//TFL_BUILTIN_OPTIONS(LSTMOptions)
+//TFL_BUILTIN_OPTIONS(ResizeBilinearOptions)
+//TFL_BUILTIN_OPTIONS(CallOptions)
+TFL_BUILTIN_OPTIONS(ReshapeOptions)
+//TFL_BUILTIN_OPTIONS(SkipGramOptions)
+//TFL_BUILTIN_OPTIONS(SpaceToDepthOptions)
+//TFL_BUILTIN_OPTIONS(EmbeddingLookupSparseOptions)
+TFL_BUILTIN_OPTIONS(MulOptions)
+TFL_BUILTIN_OPTIONS(PadOptions)
+//TFL_BUILTIN_OPTIONS(GatherOptions)
+TFL_BUILTIN_OPTIONS(BatchToSpaceNDOptions)
+//TFL_BUILTIN_OPTIONS(SpaceToBatchNDOptions)
+TFL_BUILTIN_OPTIONS(TransposeOptions)
+TFL_BUILTIN_OPTIONS(ReducerOptions)
+TFL_BUILTIN_OPTIONS(SubOptions)
+TFL_BUILTIN_OPTIONS(DivOptions)
+TFL_BUILTIN_OPTIONS(SqueezeOptions)
+//TFL_BUILTIN_OPTIONS(SequenceRNNOptions)
+//TFL_BUILTIN_OPTIONS(StridedSliceOptions)
+TFL_BUILTIN_OPTIONS(ExpOptions)
+//TFL_BUILTIN_OPTIONS(TopKV2Options)
+TFL_BUILTIN_OPTIONS(SplitOptions)
+//TFL_BUILTIN_OPTIONS(LogSoftmaxOptions)
+TFL_BUILTIN_OPTIONS(CastOptions)
+//TFL_BUILTIN_OPTIONS(DequantizeOptions)
+//TFL_BUILTIN_OPTIONS(MaximumMinimumOptions)
+TFL_BUILTIN_OPTIONS(ArgMaxOptions)
+//TFL_BUILTIN_OPTIONS(LessOptions)
+//TFL_BUILTIN_OPTIONS(NegOptions)
+//TFL_BUILTIN_OPTIONS(PadV2Options)
+//TFL_BUILTIN_OPTIONS(GreaterOptions)
+TFL_BUILTIN_OPTIONS(GreaterEqualOptions)
+//TFL_BUILTIN_OPTIONS(LessEqualOptions)
+//TFL_BUILTIN_OPTIONS(SelectOptions)
+//TFL_BUILTIN_OPTIONS(SliceOptions)
+//TFL_BUILTIN_OPTIONS(TransposeConvOptions)
+//TFL_BUILTIN_OPTIONS(SparseToDenseOptions)
+//TFL_BUILTIN_OPTIONS(TileOptions)
+TFL_BUILTIN_OPTIONS(ExpandDimsOptions)
+TFL_BUILTIN_OPTIONS(EqualOptions)
+TFL_BUILTIN_OPTIONS(NotEqualOptions)
+TFL_BUILTIN_OPTIONS(ShapeOptions)
+//TFL_BUILTIN_OPTIONS(PowOptions)
+//TFL_BUILTIN_OPTIONS(ArgMinOptions)
+//TFL_BUILTIN_OPTIONS(FakeQuantOptions)
+TFL_BUILTIN_OPTIONS(PackOptions)
+TFL_BUILTIN_OPTIONS(LogicalOrOptions)
+//TFL_BUILTIN_OPTIONS(OneHotOptions)
+//TFL_BUILTIN_OPTIONS(LogicalAndOptions)
+TFL_BUILTIN_OPTIONS(LogicalNotOptions)
+//TFL_BUILTIN_OPTIONS(UnpackOptions)
+//TFL_BUILTIN_OPTIONS(FloorDivOptions)
+//TFL_BUILTIN_OPTIONS(SquareOptions)
+//TFL_BUILTIN_OPTIONS(ZerosLikeOptions)
+TFL_BUILTIN_OPTIONS(FillOptions)
+//TFL_BUILTIN_OPTIONS(BidirectionalSequenceLSTMOptions)
+//TFL_BUILTIN_OPTIONS(BidirectionalSequenceRNNOptions)
+//TFL_BUILTIN_OPTIONS(UnidirectionalSequenceLSTMOptions)
+//TFL_BUILTIN_OPTIONS(FloorModOptions)
+//TFL_BUILTIN_OPTIONS(RangeOptions)
+//TFL_BUILTIN_OPTIONS(ResizeNearestNeighborOptions)
+//TFL_BUILTIN_OPTIONS(LeakyReluOptions)
+//TFL_BUILTIN_OPTIONS(SquaredDifferenceOptions)
+//TFL_BUILTIN_OPTIONS(MirrorPadOptions)
+TFL_BUILTIN_OPTIONS(AbsOptions)
+//TFL_BUILTIN_OPTIONS(SplitVOptions)
+//TFL_BUILTIN_OPTIONS(UniqueOptions)
+//TFL_BUILTIN_OPTIONS(ReverseV2Options)
+//TFL_BUILTIN_OPTIONS(AddNOptions)
+//TFL_BUILTIN_OPTIONS(GatherNdOptions)
+TFL_BUILTIN_OPTIONS(CosOptions)
+//TFL_BUILTIN_OPTIONS(WhereOptions)
+//TFL_BUILTIN_OPTIONS(RankOptions)
+//TFL_BUILTIN_OPTIONS(ReverseSequenceOptions)
+//TFL_BUILTIN_OPTIONS(MatrixDiagOptions)
+//TFL_BUILTIN_OPTIONS(QuantizeOptions)
+//TFL_BUILTIN_OPTIONS(MatrixSetDiagOptions)
+//TFL_BUILTIN_OPTIONS(HardSwishOptions)
+//TFL_BUILTIN_OPTIONS(IfOptions)
+//TFL_BUILTIN_OPTIONS(WhileOptions)
+//TFL_BUILTIN_OPTIONS(DepthToSpaceOptions)
+//TFL_BUILTIN_OPTIONS(NonMaxSuppressionV4Options)
+//TFL_BUILTIN_OPTIONS(NonMaxSuppressionV5Options)
+//TFL_BUILTIN_OPTIONS(ScatterNdOptions)
diff --git a/compiler/tflite2circle/src/TFLModel.cpp b/compiler/tflite2circle/src/TFLModel.cpp
new file mode 100644
index 000000000..33f11fb83
--- /dev/null
+++ b/compiler/tflite2circle/src/TFLModel.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+
+#include "TFLModel.h"
+
+namespace tflite2circle
+{
+
+TFLModel::TFLModel(const std::string &path)
+{
+ _infile.open(path, std::ios::binary | std::ios::in);
+ _valid = _infile.good();
+}
+
+const tflite::Model *TFLModel::load_model(void)
+{
+ assert(_valid == true);
+ _infile.seekg(0, std::ios::end);
+ auto fileSize = _infile.tellg();
+ _infile.seekg(0, std::ios::beg);
+ _data.resize(fileSize);
+ _infile.read(_data.data(), fileSize);
+ _infile.close();
+
+ return tflite::GetModel(_data.data());
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/TFLOperator.lst b/compiler/tflite2circle/src/TFLOperator.lst
new file mode 100644
index 000000000..ac2f9daec
--- /dev/null
+++ b/compiler/tflite2circle/src/TFLOperator.lst
@@ -0,0 +1,128 @@
+#ifndef TFL_OPERATOR
+#error "TFL_OPERATOR is not defined."
+#endif
+
+// NOTE order of operator follows TensorFlow lite schema.fbs 'enum BuiltinOperator'
+
+TFL_OPERATOR(ADD)
+TFL_OPERATOR(AVERAGE_POOL_2D)
+TFL_OPERATOR(CONCATENATION)
+TFL_OPERATOR(CONV_2D)
+TFL_OPERATOR(DEPTHWISE_CONV_2D)
+TFL_OPERATOR(DEQUANTIZE)
+TFL_OPERATOR(EMBEDDING_LOOKUP)
+TFL_OPERATOR(FLOOR)
+TFL_OPERATOR(FULLY_CONNECTED)
+TFL_OPERATOR(HASHTABLE_LOOKUP)
+TFL_OPERATOR(L2_NORMALIZATION)
+TFL_OPERATOR(L2_POOL_2D)
+TFL_OPERATOR(LOCAL_RESPONSE_NORMALIZATION)
+TFL_OPERATOR(LOGISTIC)
+TFL_OPERATOR(LSH_PROJECTION)
+TFL_OPERATOR(LSTM)
+TFL_OPERATOR(MAX_POOL_2D)
+TFL_OPERATOR(MUL)
+TFL_OPERATOR(RELU)
+TFL_OPERATOR(RELU_N1_TO_1)
+TFL_OPERATOR(RELU6)
+TFL_OPERATOR(RESHAPE)
+TFL_OPERATOR(RESIZE_BILINEAR)
+TFL_OPERATOR(RNN)
+TFL_OPERATOR(SOFTMAX)
+TFL_OPERATOR(SPACE_TO_DEPTH)
+TFL_OPERATOR(SVDF)
+TFL_OPERATOR(TANH)
+TFL_OPERATOR(CONCAT_EMBEDDINGS)
+TFL_OPERATOR(SKIP_GRAM)
+TFL_OPERATOR(CALL)
+TFL_OPERATOR(CUSTOM)
+TFL_OPERATOR(EMBEDDING_LOOKUP_SPARSE)
+TFL_OPERATOR(PAD)
+TFL_OPERATOR(UNIDIRECTIONAL_SEQUENCE_RNN)
+TFL_OPERATOR(GATHER)
+TFL_OPERATOR(BATCH_TO_SPACE_ND)
+TFL_OPERATOR(SPACE_TO_BATCH_ND)
+TFL_OPERATOR(TRANSPOSE)
+TFL_OPERATOR(MEAN)
+TFL_OPERATOR(SUB)
+TFL_OPERATOR(DIV)
+TFL_OPERATOR(SQUEEZE)
+TFL_OPERATOR(UNIDIRECTIONAL_SEQUENCE_LSTM)
+TFL_OPERATOR(STRIDED_SLICE)
+TFL_OPERATOR(BIDIRECTIONAL_SEQUENCE_RNN)
+TFL_OPERATOR(EXP)
+TFL_OPERATOR(TOPK_V2)
+TFL_OPERATOR(SPLIT)
+TFL_OPERATOR(LOG_SOFTMAX)
+TFL_OPERATOR(DELEGATE)
+TFL_OPERATOR(BIDIRECTIONAL_SEQUENCE_LSTM)
+TFL_OPERATOR(CAST)
+TFL_OPERATOR(PRELU)
+TFL_OPERATOR(MAXIMUM)
+TFL_OPERATOR(ARG_MAX)
+TFL_OPERATOR(MINIMUM)
+TFL_OPERATOR(LESS)
+TFL_OPERATOR(NEG)
+TFL_OPERATOR(PADV2)
+TFL_OPERATOR(GREATER)
+TFL_OPERATOR(GREATER_EQUAL)
+TFL_OPERATOR(LESS_EQUAL)
+TFL_OPERATOR(SELECT)
+TFL_OPERATOR(SLICE)
+TFL_OPERATOR(SIN)
+TFL_OPERATOR(TRANSPOSE_CONV)
+TFL_OPERATOR(SPARSE_TO_DENSE)
+TFL_OPERATOR(TILE)
+TFL_OPERATOR(EXPAND_DIMS)
+TFL_OPERATOR(EQUAL)
+TFL_OPERATOR(NOT_EQUAL)
+TFL_OPERATOR(LOG)
+TFL_OPERATOR(SUM)
+TFL_OPERATOR(SQRT)
+TFL_OPERATOR(RSQRT)
+TFL_OPERATOR(SHAPE)
+TFL_OPERATOR(POW)
+TFL_OPERATOR(ARG_MIN)
+TFL_OPERATOR(FAKE_QUANT)
+TFL_OPERATOR(REDUCE_PROD)
+TFL_OPERATOR(REDUCE_MAX)
+TFL_OPERATOR(PACK)
+TFL_OPERATOR(LOGICAL_OR)
+TFL_OPERATOR(ONE_HOT)
+TFL_OPERATOR(LOGICAL_AND)
+TFL_OPERATOR(LOGICAL_NOT)
+TFL_OPERATOR(UNPACK)
+TFL_OPERATOR(REDUCE_MIN)
+TFL_OPERATOR(FLOOR_DIV)
+TFL_OPERATOR(REDUCE_ANY)
+TFL_OPERATOR(SQUARE)
+TFL_OPERATOR(ZEROS_LIKE)
+TFL_OPERATOR(FILL)
+TFL_OPERATOR(FLOOR_MOD)
+TFL_OPERATOR(RANGE)
+TFL_OPERATOR(RESIZE_NEAREST_NEIGHBOR)
+TFL_OPERATOR(LEAKY_RELU)
+TFL_OPERATOR(SQUARED_DIFFERENCE)
+TFL_OPERATOR(MIRROR_PAD)
+TFL_OPERATOR(ABS)
+TFL_OPERATOR(SPLIT_V)
+TFL_OPERATOR(UNIQUE)
+TFL_OPERATOR(CEIL)
+TFL_OPERATOR(REVERSE_V2)
+TFL_OPERATOR(ADD_N)
+TFL_OPERATOR(GATHER_ND)
+TFL_OPERATOR(COS)
+TFL_OPERATOR(WHERE)
+TFL_OPERATOR(RANK)
+TFL_OPERATOR(ELU)
+TFL_OPERATOR(REVERSE_SEQUENCE)
+TFL_OPERATOR(MATRIX_DIAG)
+TFL_OPERATOR(QUANTIZE)
+TFL_OPERATOR(MATRIX_SET_DIAG)
+TFL_OPERATOR(ROUND)
+TFL_OPERATOR(HARD_SWISH)
+TFL_OPERATOR(IF)
+TFL_OPERATOR(WHILE)
+TFL_OPERATOR(NON_MAX_SUPPRESSION_V4)
+TFL_OPERATOR(NON_MAX_SUPPRESSION_V5)
+TFL_OPERATOR(SCATTER_ND)
diff --git a/compiler/tflite2circle/src/TFLTensorType.lst b/compiler/tflite2circle/src/TFLTensorType.lst
new file mode 100644
index 000000000..968153a5d
--- /dev/null
+++ b/compiler/tflite2circle/src/TFLTensorType.lst
@@ -0,0 +1,16 @@
+#ifndef TFL_TENSORTYPE
+#error "TFL_TENSORTYPE is not defined."
+#endif
+
+// NOTE order of type follows TensorFlow lite schema.fbs 'enum TensorType'
+
+TFL_TENSORTYPE(FLOAT32)
+TFL_TENSORTYPE(FLOAT16)
+TFL_TENSORTYPE(INT32)
+TFL_TENSORTYPE(UINT8)
+TFL_TENSORTYPE(INT64)
+TFL_TENSORTYPE(STRING)
+TFL_TENSORTYPE(BOOL)
+TFL_TENSORTYPE(INT16)
+TFL_TENSORTYPE(COMPLEX64)
+TFL_TENSORTYPE(INT8)
diff --git a/compiler/tfts/CMakeLists.txt b/compiler/tfts/CMakeLists.txt
new file mode 100644
index 000000000..eda39b6ef
--- /dev/null
+++ b/compiler/tfts/CMakeLists.txt
@@ -0,0 +1,30 @@
+nncc_find_resource(TensorFlowTests)
+nnas_find_package(TensorFlow QUIET)
+
+if(NOT TensorFlow_FOUND)
+ return()
+endif(NOT TensorFlow_FOUND)
+
+if(NOT TARGET tfkit)
+ return()
+endif(NOT TARGET tfkit)
+
+message(STATUS "Build tfts: TRUE")
+
+# NOTE
+#
+# tfts_check fails on UNIT_Const_001 and UNIT_Shape_000 due to
+# the limitation of "nnkit_tf_backend"
+#
+# "nnkit_tf_backend" supports only TF_FLOAT32 input/output.
+#
+# TODO Remove nnkit dependency
+add_custom_target(tfts_check
+ COMMAND
+ "${CMAKE_CURRENT_SOURCE_DIR}/check_all.sh"
+ "${TensorFlowTests_DIR}"
+ $<TARGET_FILE:tfkit>
+ $<TARGET_FILE:nnkit-run>
+ $<TARGET_FILE:nnkit_tf_backend>
+ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+)
diff --git a/compiler/tfts/README.md b/compiler/tfts/README.md
new file mode 100644
index 000000000..4d66589e1
--- /dev/null
+++ b/compiler/tfts/README.md
@@ -0,0 +1,3 @@
+# tfts
+
+TensorFlow Testcase Service provides various services on the TensorFlow testcases commited in this repo.
diff --git a/compiler/tfts/check_all.sh b/compiler/tfts/check_all.sh
new file mode 100755
index 000000000..f93ccce8c
--- /dev/null
+++ b/compiler/tfts/check_all.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+TESTCASE_REPO="$1"; shift
+TFKIT_PATH="$1"; shift
+NNKIT_RUN_PATH="$1"; shift
+NNKIT_TF_BACKEND_PATH="$1"; shift
+
+echo "-- Found TensorFlow testcases: '${TESTCASE_REPO}'"
+echo "-- Found tfkit: '${TFKIT_PATH}'"
+echo "-- Found nnkit-run: '${NNKIT_RUN_PATH}'"
+echo "-- Found nnkit TensorFlow backend: '${NNKIT_TF_BACKEND_PATH}'"
+
+EXITCODE=0
+
+PASSED=()
+FAILED=()
+SKIPPED=()
+
+for PREFIX in $(cd "${TESTCASE_REPO}"; ls */test.info | xargs -i dirname {} | sort); do
+ TESTCASE_DIR="${TESTCASE_REPO}/${PREFIX}"
+
+ if [[ ! -f "${TESTCASE_DIR}/customop.conf" ]]; then
+ PASSED_TAG="${PREFIX}.passed"
+
+ cat > "${PREFIX}.log" <(
+ exec 2>&1
+
+ rm -f "${PASSED_TAG}"
+
+ set -ex
+ # Create a pb model
+ "${TFKIT_PATH}" encode "${TESTCASE_DIR}/test.pbtxt" "${PREFIX}.pb"
+
+ # Do inference
+ "${NNKIT_RUN_PATH}" \
+ --backend "${NNKIT_TF_BACKEND_PATH}" \
+ --backend-arg "${PREFIX}.pb" \
+ --backend-arg "${TESTCASE_DIR}/test.info"
+ set +ex
+
+ touch "${PASSED_TAG}"
+ )
+
+ if [[ ! -f "${PASSED_TAG}" ]]; then
+ FAILED+=("$PREFIX")
+ RESULT="FAIL"
+ else
+ PASSED+=("$PREFIX")
+ RESULT="PASS"
+ fi
+ else
+ SKIPPED+=("$PREFIX")
+ RESULT="SKIP"
+ fi
+
+ echo "Check '${PREFIX}' testcase - ${RESULT}"
+done
+
+echo
+echo "PASSED: ${#PASSED[@]}, FAILED: ${#FAILED[@]}, SKIPPED: ${#SKIPPED[@]}"
+
+if [[ ${#FAILED[@]} -ne 0 ]]; then
+ echo
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+exit 0
diff --git a/compiler/tfts/requires.cmake b/compiler/tfts/requires.cmake
new file mode 100644
index 000000000..771418fed
--- /dev/null
+++ b/compiler/tfts/requires.cmake
@@ -0,0 +1,2 @@
+require("tfkit")
+require("nnkit")
diff --git a/compiler/v4tf/README.md b/compiler/v4tf/README.md
new file mode 100644
index 000000000..54c8bc889
--- /dev/null
+++ b/compiler/v4tf/README.md
@@ -0,0 +1,16 @@
+# v4tf
+
+## What is this?
+
+*v4tf* is is a wrapper interface to use TensorFlow by its C API.
+The name was originated from the movie, *V for Vendetta*, where the main character *V* hides his face by wearing a mask.
+
+## Why do we need this?
+
+In *nncc*, some tests use TensorFlow, which uses Protocol Buffers.
+For example, TensorFlow 1.12 uses Protocol Buffers 3.5.2.
+
+Some of *nncc* modules use different version Protocol Buffers for internal purpose.
+If such modules also try to use TensorFlow API, errors were thrown due to resolution of wrong symbols of different versions of Protocol Buffers.
+
+To prevent these errors, *v4tf* loads TensorFlow dynamically with all of its symbols resolved.
diff --git a/compute/ARMComputeEx/CMakeLists.txt b/compute/ARMComputeEx/CMakeLists.txt
index aaebff758..8dfe0c40b 100644
--- a/compute/ARMComputeEx/CMakeLists.txt
+++ b/compute/ARMComputeEx/CMakeLists.txt
@@ -28,5 +28,9 @@ target_compile_definitions(arm_compute_ex PRIVATE EMBEDDED_KERNELS
ARM_COMPUTE_LOGGING_ENABLED>)
# Validate check functions are not used on release build
# Some parameter are used for validate check function call, and these parameter may not used on release build
-target_compile_options(arm_compute_ex PRIVATE $<$<NOT:$<CONFIG:Debug>>:-Wno-unused-parameter -Wno-unused-function>)
+# Because clang requires to add "-Wno-unused-parameter -Wno-unused-function" after "-Wall",
+# this should be after linking nnfw_common and use interface lib linking
+add_library(ignore_unused_warning INTERFACE)
+target_compile_options(ignore_unused_warning INTERFACE -Wno-unused-parameter -Wno-unused-function)
+target_link_libraries(arm_compute_ex PRIVATE $<$<NOT:$<CONFIG:Debug>>:ignore_unused_warning>)
install(TARGETS arm_compute_ex DESTINATION lib)
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h b/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h
index e4e752ef9..d29886a9d 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
/**
* @file CLKernelLibraryEx.h
* @ingroup COM_AI_RUNTIME
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h
index b98b174f7..9699b5c00 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
/**
* @file CLArgOperationKernel.h
* @brief This file defines CLArgOperationKernel
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h
index ab33d9d3a..bb6fcb8f5 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#ifndef __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__
#define __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h
index 16cef0b61..b0357fe99 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
/**
* @file CLCastKernel.h
* @ingroup COM_AI_RUNTIME
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h
index 60ec7a82a..8615cf120 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#ifndef __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__
#define __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h
index da075db69..a614d5259 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
/**
* @file CLEmbeddingLookupKernel.h
* @ingroup COM_AI_RUNTIME
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h
new file mode 100644
index 000000000..9321c3677
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNELEX_H__
+#define __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNELEX_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to multiply matrices
+ *
+ * @note This kernel should be used ONLY for Midgard architectures
+ *
+ * This kernel performs the following computation:
+ *
+ * -# Convert a values from int8 to int32
+ * -# Convert b values from int8 to int32
+ * -# Compute the int32 matrix product of the resulting a * b and store the result as int32
+ *
+ */
+class CLGEMMLowpMatrixMultiplyKernelEx : public ICLKernel
+{
+public:
+ /** Default Constructor */
+ CLGEMMLowpMatrixMultiplyKernelEx();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMLowpMatrixMultiplyKernelEx(const CLGEMMLowpMatrixMultiplyKernelEx &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMLowpMatrixMultiplyKernelEx &operator=(const CLGEMMLowpMatrixMultiplyKernelEx &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpMatrixMultiplyKernelEx(CLGEMMLowpMatrixMultiplyKernelEx &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpMatrixMultiplyKernelEx &operator=(CLGEMMLowpMatrixMultiplyKernelEx &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @note This kernel should be used ONLY for Midgard architectures
+ *
+ * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8
+ * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p
+ * input0
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type
+ * supported: S32
+ * @param[in] gemm_info (Optional) GEMM information used to retrieve the original dimensions of
+ * the input matrices
+ */
+ void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output,
+ const GEMMReshapeInfo &gemm_info = GEMMReshapeInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLGEMMLowpMatrixMultiplyKernelEx
+ *
+ * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8
+ * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p
+ * input0
+ * @param[in] output Output tensor to store the result of matrix multiplication. Data type
+ * supported: S32
+ * @param[in] gemm_info (Optional) GEMM information used to retrieve the original dimensions of
+ * the input matrices
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input0, const ITensorInfo *input1,
+ const ITensorInfo *output,
+ const GEMMReshapeInfo &gemm_info = GEMMReshapeInfo());
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input0;
+ const ICLTensor *_input1;
+ ICLTensor *_output;
+ bool _slide_matrix_b;
+ bool _reinterpret_input_as_3d;
+ bool _reinterpret_output_as_3d;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNELEX_H__*/
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h
index aa81a1efa..6630c7be7 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
/**
* @file CLGatherExKernel.h
* @ingroup COM_AI_RUNTIME
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h
index 8269e5a7a..99cfa61ec 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
/**
* @file CLHashtableLookupKernel.h
* @ingroup COM_AI_RUNTIME
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h
index f5e147e03..f57e799ad 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNELEX_H__
#define __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNELEX_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h
new file mode 100644
index 000000000..90e8b5705
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_CLMULTIPLYSCALEFACTORKERNEL_H__
+#define __ARM_COMPUTE_CLMULTIPLYSCALEFACTORKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface to multiply scale factor kernel. */
+class CLMultiplyScaleFactorKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLMultiplyScaleFactorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMultiplyScaleFactorKernel(const CLMultiplyScaleFactorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMultiplyScaleFactorKernel &operator=(const CLMultiplyScaleFactorKernel &) = delete;
+ /** Default Move Constructor. */
+ CLMultiplyScaleFactorKernel(CLMultiplyScaleFactorKernel &&) = default;
+ /** Default move assignment operator */
+ CLMultiplyScaleFactorKernel &operator=(CLMultiplyScaleFactorKernel &&) = default;
+ /** Default destructor */
+ ~CLMultiplyScaleFactorKernel() = default;
+ /** Set input, output tensors.
+ *
+ * @param[in/out] input Source tensor. Data type supported: S32.
+ * @param[in] scale_factor Scale tensor. Data type supported: F16/F32.
+ * @param[out] output Destination tensor. Data type supported: Same as @p scale_factor.
+ * @param[in] multiplier Additional scale value.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *scale_factor, ICLTensor *output,
+ float multiplier = 1.f);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLMultiplyScaleFactorKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: S32.
+ * @param[in] scale_factor Scale tensor. Data type supported: F16/F32.
+ * @param[in] output Output tensor info. Data types supported: Same as @p scale_factor.
+ * @param[in] multiplier Additional scale value.
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *scale_factor,
+ const ITensorInfo *output);
+
+ /**
+ * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command
+ * queue.
+ * @note The queue is *not* flushed by this method, and therefore the kernel will not have
+ * been executed by the time this method returns.
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of
+ * the window returned by window()).
+ * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_scale_factor;
+ ICLTensor *_output;
+ float _multiplier;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLMULTIPLYSCALEFACTORKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h
index ccbea147e..fa383c0d0 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#ifndef __ARM_COMPUTE_CLNEGKERNEL_H__
#define __ARM_COMPUTE_CLNEGKERNEL_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h
index eff1b8bd5..dd2dbf6a4 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#ifndef __ARM_COMPUTE_CLPRELU_KERNEL_H__
#define __ARM_COMPUTE_CLPRELU_KERNEL_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h
new file mode 100644
index 000000000..4e1b56cba
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_CLQUANTIZATIONSYMMETRICKERNEL_H__
+#define __ARM_COMPUTE_CLQUANTIZATIONSYMMETRICKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the quantization layer kernel.
+ *
+ * @note The implementation supports only 2D input tensors.
+ */
+class CLQuantizationSymmetricKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLQuantizationSymmetricKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLQuantizationSymmetricKernel(const CLQuantizationSymmetricKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLQuantizationSymmetricKernel &operator=(const CLQuantizationSymmetricKernel &) = delete;
+ /** Default Move Constructor. */
+ CLQuantizationSymmetricKernel(CLQuantizationSymmetricKernel &&) = default;
+ /** Default move assignment operator */
+ CLQuantizationSymmetricKernel &operator=(CLQuantizationSymmetricKernel &&) = default;
+ /** Default destructor */
+ ~CLQuantizationSymmetricKernel() = default;
+ /** Set the input, output.
+ *
+ * @param[in] input Source tensor. Data types supported: F32/F16.
+ * @param[in] scale_factor Scale tensor of @p output. Data type supported: Same as @p input.
+ * @param[out] output Destination tensor with the same dimensions of input. Data types supported:
+ * S8.
+ *
+ * @note Output auto initialization is not supported by this kernel
+ */
+ void configure(const ICLTensor *input, const ICLTensor *scale_factor, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLQuantizationSymmetricKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: F32/F16.
+ * @param[in] scale_factor Scale tensor of @p output. Data type supported: Same as @p input.
+ * @param[in] output Destination tensor info with the same dimensions of input. Data types
+ * supported: S8.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *scale_factor,
+ const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_scale_factor;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLQUANTIZATIONSYMMETRICKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h
index a26a4a7fc..9b8a239d3 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
/**
* @file CLReduceOperationKernel.h
* @brief This file defines CLReduceOperationKernel class
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h
new file mode 100644
index 000000000..4d4478ece
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_CLSCALEFACTORSYMM8KERNEL_H__
+#define __ARM_COMPUTE_CLSCALEFACTORSYMM8KERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to perform min max search on a 3D tensor.
+ */
+class CLScaleFactorSymm8Kernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLScaleFactorSymm8Kernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLScaleFactorSymm8Kernel(const CLScaleFactorSymm8Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLScaleFactorSymm8Kernel &operator=(const CLScaleFactorSymm8Kernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLScaleFactorSymm8Kernel(CLScaleFactorSymm8Kernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLScaleFactorSymm8Kernel &operator=(CLScaleFactorSymm8Kernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor with 2 dimensions. The first dimension will be interpreted as
+ * batches. Data types supported: F32.
+ * @param[out] output Output tensor with shape [batches] which stores the scale values for each 2D
+ * input tensor.
+ * The dimensions over the first must match the batched dimensions of the input
+ * tensor. Data types supported: F32.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLScaleFactorSymm8Kernel
+ *
+ * @param[in] input Input tensor info. Data types supported: F32.
+ * @param[in] output Output tensor info with shape [batches] which stores the scale values for
+ * each 2D input tensor.
+ * The dimensions over the first must match the batched dimensions of the input
+ * tensor. Data types supported: F32.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ /** Resets global minimum and maximum
+ *
+ * @param[in,out] queue Command queue on which to map and unmap the min_max tensor
+ */
+ void reset(cl::CommandQueue &queue);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLSCALEFACTORSYMM8KERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h
index 577e38cc4..75ba0c074 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#ifndef __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__
#define __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h
index be845a549..4c0a82ce1 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#ifndef __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__
#define __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h
index 8da2daecc..aa4a14812 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
/**
* @file CLTopKV2Kernel.h
* @brief This file defines classes for TopKV2Kernel
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h
index c5ef730b6..9d174deb5 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__
#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPOneHotKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPOneHotKernelEx.h
new file mode 100644
index 000000000..59c337a12
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPOneHotKernelEx.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_CPPONEHOTERNEL_H__
+#define __ARM_COMPUTE_CPPONEHOTERNEL_H__
+
+#include "arm_compute/core/CPP/ICPPKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** CPP kernel to perform tensor OneHot operation. */
+class CPPOneHotKernelEx : public ICPPKernel
+{
+public:
+ const char *name() const override { return "CPPOneHotKernelEx"; }
+ /** Default constructor */
+ CPPOneHotKernelEx();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CPPOneHotKernelEx(const CPPOneHotKernelEx &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CPPOneHotKernelEx &operator=(const CPPOneHotKernelEx &) = delete;
+ /** Allow instances of this class to be moved */
+ CPPOneHotKernelEx(CPPOneHotKernelEx &&) = default;
+ /** Allow instances of this class to be moved */
+ CPPOneHotKernelEx &operator=(CPPOneHotKernelEx &&) = default;
+ /** Default destructor */
+ ~CPPOneHotKernelEx() = default;
+
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] indices A tensor for indices. Data types supported: S32
+ * @param[out] output A tensor for computed value of one hot operator
+ * @param[in] depth An int value for depth
+ * @param[in] on_value A float value for on_value
+ * @param[in] off_value A float value for off_value
+ * @param[in] axis An int value for axis
+ */
+ void configure(const ITensor *indices, ITensor *output, const int depth, const float on_value,
+ const float off_value, const int axis);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CPPOneHotKernelEx
+ *
+ * @param[in] indices A tensor for indices. Data types supported: S32
+ * @param[in] depth An int value for depth
+ * @param[in] on_value A float value for on_value
+ * @param[in] off_value A float value for off_value
+ * @param[in] axis An int value for axis
+ *
+ * @return a status
+ */
+ static Status validate(const ITensor *indices, const int depth, const float on_value,
+ const float off_value, const int axis);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ bool is_parallelisable() const override;
+
+private:
+ /** Template function to run the topKV operation. */
+ template <typename T> void run_one_hot();
+
+ const ITensor *_indices;
+ ITensor *_output;
+ int _depth;
+ float _on_value;
+ float _off_value;
+ int _axis;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CPPONEHOTKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h
index d093c22cb..d4c9c610a 100644
--- a/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__
#define __ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/NEElementwiseOperationFuncs.h b/compute/ARMComputeEx/arm_compute/core/NEON/NEElementwiseOperationFuncs.h
index 358e0ebc6..28114f8b5 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/NEElementwiseOperationFuncs.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/NEElementwiseOperationFuncs.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h
new file mode 100644
index 000000000..a827f48f8
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_NEACTIVATIONLAYERKERNELEX_H__
+#define __ARM_COMPUTE_NEACTIVATIONLAYERKERNELEX_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+#include "arm_compute/core/utils/misc/Traits.h"
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#include <arm_fp16.h>
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the activation layer kernel. */
+class NEActivationLayerKernelEx : public INEKernel
+{
+public:
+ const char *name() const override { return "NEActivationLayerKernelEx"; }
+ /** Constructor */
+ NEActivationLayerKernelEx();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEActivationLayerKernelEx(const NEActivationLayerKernelEx &) = delete;
+ /** Default move constructor */
+ NEActivationLayerKernelEx(NEActivationLayerKernelEx &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEActivationLayerKernelEx &operator=(const NEActivationLayerKernelEx &) = delete;
+ /** Default move assignment operator */
+ NEActivationLayerKernelEx &operator=(NEActivationLayerKernelEx &&) = default;
+ /** Set the input and output tensor.
+ *
+ * @note If the output tensor is a nullptr, the activation function will be performed in-place
+ *
+ * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this
+ * tensor will store the result
+ * of the activation function. Data types supported:
+ * QASYMM8/QSYMM16/F16/F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] activation_info Activation layer information.
+ */
+ void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEActivationLayerKernelEx
+ *
+ * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor
+ * will store the result
+ * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: same as @p input
+ * @param[in] act_info Activation layer information.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ActivationLayerInfo &act_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ using ActivationFunction = ActivationLayerInfo::ActivationFunction;
+ /** Common signature for all the specialised @ref NEActivationLayerKernelEx functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using ActivationFunctionExecutorPtr = void (NEActivationLayerKernelEx::*)(const Window &window);
+ /** Function to apply an activation function on a tensor.
+ *
+ * @param[in] window Region on which to execute the kernel
+ */
+ template <ActivationLayerInfo::ActivationFunction F, typename T>
+ typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type
+ activation(const Window &window);
+ /** Function to apply an activation function on a tensor.
+ *
+ * @param[in] window Region on which to execute the kernel
+ */
+ template <ActivationLayerInfo::ActivationFunction F, typename T>
+ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type
+ activation(const Window &window);
+ /** Function to apply an activation function on a tensor.
+ *
+ * @param[in] window Region on which to execute the kernel
+ */
+ template <ActivationLayerInfo::ActivationFunction F, typename T>
+ typename std::enable_if<std::is_same<T, qsymm16_t>::value, void>::type
+ activation(const Window &window);
+
+private:
+ ITensor *_input;
+ ITensor *_output;
+ ActivationFunctionExecutorPtr _func;
+ ActivationLayerInfo _act_info;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_NEACTIVATIONLAYERKERNELEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h
index 61992bd50..8c544cda8 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__
#define __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastKernel.h
index fd2a2ee3b..4e9f097c2 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastKernel.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NECASTKERNEL_H__
#define __ARM_COMPUTE_NECASTKERNEL_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h
index 5b6ef6bfb..b62897e68 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNELEX_H__
#define __ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNELEX_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h
index d6fad1155..57de78dd8 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEELEMENTWISEUNARYKERNELEX_H__
#define __ARM_COMPUTE_NEELEMENTWISEUNARYKERNELEX_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h
index 1490e75f2..88f21c96e 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__
#define __ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h
index 3fa9c6e9a..e765aa489 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h
index d8976e7d0..cb2a485d5 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEHASHTABLELOOKUPKERNEL_H__
#define __ARM_COMPUTE_NEHASHTABLELOOKUPKERNEL_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h
index 76e2587af..8724cc69b 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNELEX_H__
#define __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNELEX_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h
index 723b14523..198b0be9d 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEMULTIPLYSCALEFACTORKERNEL_H__
#define __ARM_COMPUTE_NEMULTIPLYSCALEFACTORKERNEL_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEPReLUKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEPReLUKernel.h
index 79bb78661..722efd3d0 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEPReLUKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEPReLUKernel.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEPRELUKERNEL_H__
#define __ARM_COMPUTE_NEPRELUKERNEL_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h
index 590b23873..0b080cf73 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEQUANTIZATIONSYMMETRICKERNEL_H__
#define __ARM_COMPUTE_NEQUANTIZATIONSYMMETRICKERNEL_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h
index 73991b67d..c9024fbb3 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEREDUCTIONOPERATIONKERNELEX_H__
#define __ARM_COMPUTE_NEREDUCTIONOPERATIONKERNELEX_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h
index 5d697c2b2..0ffcf6be8 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NESPACETODEPTHLAYERKERNELEX_H__
#define __ARM_COMPUTE_NESPACETODEPTHLAYERKERNELEX_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/TypesEx.h b/compute/ARMComputeEx/arm_compute/core/TypesEx.h
index 3b0902f08..faba8a449 100644
--- a/compute/ARMComputeEx/arm_compute/core/TypesEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/TypesEx.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#ifndef __ARM_COMPUTE_TYPESEX_H__
#define __ARM_COMPUTE_TYPESEX_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/UtilsEx.h b/compute/ARMComputeEx/arm_compute/core/UtilsEx.h
index 39026e6bb..d57e8fcf5 100644
--- a/compute/ARMComputeEx/arm_compute/core/UtilsEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/UtilsEx.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#ifndef __ARM_COMPUTE_UTILSEX_H__
#define __ARM_COMPUTE_UTILSEX_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h b/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h
index 16fd40ed9..a9ceacbea 100644
--- a/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#ifndef __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__
#define __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h
index d9d0d4d35..c37096f7c 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
/**
* @file CLArgOperation.h
* @ingroup COM_AI_RUNTIME
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h
index d16a0762d..eed5cb8a4 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#ifndef __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__
#define __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h
index 061e34f26..88a9b00ec 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#ifndef __ARM_COMPUTE_CLBINARYLOGICALOP_H__
#define __ARM_COMPUTE_CLBINARYLOGICALOP_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h
index 36acfaed7..ebe0d8a1c 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
/**
* @file CLCast.h
* @ingroup COM_AI_RUNTIME
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h
index d78a6ada4..d52a538df 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#ifndef __ARM_COMPUTE_CLDEPTHTOSPACE_H__
#define __ARM_COMPUTE_CLDEPTHTOSPACE_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h
index 257772a89..fbee7e40e 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
/**
* @file CLEmbeddingLookup.h
* @ingroup COM_AI_RUNTIME
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h
new file mode 100644
index 000000000..1a0284a3e
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_CLFULLYCONNECTEDHYBRIDLAYER_H__
+#define __ARM_COMPUTE_CLFULLYCONNECTEDHYBRIDLAYER_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
+#include "arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h"
+#include "arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h"
+#include "arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h"
+#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h"
+
+namespace arm_compute
+{
+/** Basic function to reshape the weights of Fully Connected layer with OpenCL. This function calls
+ * the following kernels:
+ *
+ * -# @ref CLTransposeKernel
+ *
+ * @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ */
+class CLFullyConnectedHybridLayerReshapeWeights : public ICLSimpleFunction
+{
+public:
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported:
+ * S8.
+ * @param[out] output Destination tensor which stores the transposed input tensor. Data type
+ * supported: Same as @p input.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLFullyConnectedHybridLayerReshapeWeights
+ *
+ * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported:
+ * S8.
+ * @param[in] output Destination tensor which stores the transposed input tensor. Data type
+ * supported: Same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+};
+
+/** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following
+ * OpenCL kernels:
+ *
+ * -# @ref CLIm2ColKernel (called when the input comes from a convolutional layer)
+ * -# @ref CLFullyConnectedHybridLayerReshapeWeights (if @p are_weights_reshaped is set to false
+ * and transpose_weights is set to true ) (called once)
+ * -# @ref CLGEMMLowpMatrixMultiplyCore (if quantized symmetric)
+ * -# @ref CLGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr)
+ *
+ * @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ */
+class CLFullyConnectedHybridLayer : public IFunction
+{
+public:
+ /** Constructor */
+ CLFullyConnectedHybridLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFullyConnectedHybridLayer(const CLFullyConnectedHybridLayer &) = delete;
+ /** Default move constructor */
+ CLFullyConnectedHybridLayer(CLFullyConnectedHybridLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFullyConnectedHybridLayer &operator=(const CLFullyConnectedHybridLayer &) = delete;
+ /** Default move assignment operator */
+ CLFullyConnectedHybridLayer &operator=(CLFullyConnectedHybridLayer &&) = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data type supported: F16/F32.
+ * @param[in] weights Weights tensor. The weights must be 2 dimensional.
+ * If this function is called after a Convolution Layer, the (transposed)
+ * weights will have as many rows as the product of the first 3 input's dimensions.
+ * If it is called after another FullyConnected Layer, the (transposed)
+ * weights will have as many rows as the input's first dimension.
+ * Data type supported: S8.
+ * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix
+ * multiplication between:
+ * - The output of im2col on the input and the (transposed) 2D weights, if the
+ * function is called after a Convolution Layer
+ * - The input tensor and the (transposed) 2D weights, if the function is
+ * called after another FullyConnected Layer.
+ * Data type supported: Same as @p input.
+ * @param[in] fc_info (Optional) Fully connected layer additional info
+ */
+ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases,
+ ICLTensor *output, FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLFullyConnectedHybridLayer
+ *
+ * @param[in] input Source tensor info. Data type supported: F16/F32.
+ * @param[in] weights Weights tensor info. The weights must be 2 dimensional.
+ * If this function is called after a Convolution Layer, the (transposed)
+ * weights will have as many rows as the product of the first 3 input's dimensions.
+ * If it is called after another FullyConnected Layer, the (transposed)
+ * weights will have as many rows as the input's first dimension.
+ * Data type supported: S8.
+ * @param[in] biases Bias tensor info. Can be nullptr. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor info. Its shape should be equal to the output of a
+ * matrix multiplication between:
+ * - The output of im2col on the input and the (transposed) 2D weights, if the
+ * function is called after a Convolution Layer
+ * - The input tensor and the (transposed) 2D weights, if the function is
+ * called after another FullyConnected Layer.
+ * Data type supported: Same as @p input.
+ * @param[in] fc_info (Optional) Fully connected layer additional info
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *biases, const ITensorInfo *output,
+ FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+
+ // Inherited methods override
+ void run() override;
+ void prepare() override;
+
+private:
+ void configure_mm(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output,
+ bool retain_internal_weights);
+
+ MemoryGroup _memory_group;
+ CLFullyConnectedHybridLayerReshapeWeights _reshape_weights_kernel;
+ CLScaleFactorSymm8Kernel _scale_factor_kernel;
+ CLQuantizationSymmetricKernel _quant_input_kernel;
+ CLGEMMLowpMatrixMultiplyCoreEx _mm_gemmlowp;
+ CLMultiplyScaleFactorKernel _multiply_scale_kernel;
+ CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; // TODO(COMPMID-1889): Use CLGEMM to
+ // add bias in
+ // CLFullyConnectedHybridLayer
+ CLTensor _reshape_weights_output;
+ CLTensor _quantized_input;
+ CLTensor _scale_factor;
+ CLTensor _gemmlowp_output;
+ bool _are_weights_reshaped;
+ bool _accumulate_biases;
+ bool _is_prepared;
+ const ICLTensor *_original_weights;
+};
+}
+#endif /* __ARM_COMPUTE_CLFULLYCONNECTEDHYBRIDLAYER_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h
new file mode 100644
index 000000000..e65a646dc
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_CLFULLYCONNECTEDLAYEREX_H__
+#define __ARM_COMPUTE_CLFULLYCONNECTEDLAYEREX_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
+#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h"
+#include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
+#include "arm_compute/runtime/CL/functions/CLGEMM.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
+#include "arm_compute/runtime/IWeightsManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+
+namespace arm_compute
+{
+/** Basic function to reshape the weights of Fully Connected layer with OpenCL. This function calls
+ * the following kernels:
+ *
+ * -# @ref CLTransposeKernel
+ *
+ * @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ */
+class CLFullyConnectedLayerReshapeWeightsEx : public ICLSimpleFunction
+{
+public:
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported:
+ * QASYMM8/F16/F32.
+ * @param[out] output Destination tensor which stores the transposed input tensor. Data type
+ * supported: Same as @p input.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLFullyConnectedLayerReshapeWeightsEx
+ *
+ * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported:
+ * QASYMM8/F16/F32.
+ * @param[in] output Destination tensor which stores the transposed input tensor. Data type
+ * supported: Same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+};
+
+namespace weights_transformations
+{
+/** Basic function to manage the reshape weights generated from @ref
+ * CLFullyConnectedLayerReshapeWeightsEx */
+class CLFullyConnectedLayerReshapeWeightsExManaged : public ITransformWeights
+{
+public:
+ // Inherited method override
+ void run() override
+ {
+ _output.allocator()->allocate();
+ _func.run();
+ _reshape_run = true;
+ }
+
+ // Inherited method override
+ void release() override { _output.allocator()->free(); }
+
+ // Inherited method override
+ ICLTensor *get_weights() override { return &_output; }
+
+ // Inherited method override
+ uint32_t uid() override { return _uid; }
+
+ /** Configures the @ref CLFullyConnectedLayerReshapeWeightsEx function
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32.
+ */
+ void configure(const ICLTensor *input) { _func.configure(input, &_output); }
+
+private:
+ static constexpr uint32_t _uid = 0x0;
+ CLTensor _output{};
+ CLFullyConnectedLayerReshapeWeightsEx _func{};
+};
+} // namespace weights_transformations
+
+/** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following
+ * OpenCL kernels:
+ *
+ * -# @ref CLIm2ColKernel (called when the input comes from a convolutional layer)
+ * -# @ref CLFullyConnectedLayerReshapeWeightsEx (if @p are_weights_reshaped is set to false and
+ * transpose_weights is set to true ) (called once)
+ * -# @ref CLGEMMMatrixMultiplyKernel or @ref CLGEMMLowpMatrixMultiplyCore (if quantized
+ * asymmetric)
+ * -# @ref CLGEMMMatrixAccumulateBiasesKernel or @ref
+ * CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is
+ * not equal to nullptr)
+ *
+ * @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ */
+class CLFullyConnectedLayerEx : public IFunction
+{
+public:
+ /** Constructor */
+ CLFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr,
+ IWeightsManager *weights_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFullyConnectedLayerEx(const CLFullyConnectedLayerEx &) = delete;
+ /** Default move constructor */
+ CLFullyConnectedLayerEx(CLFullyConnectedLayerEx &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFullyConnectedLayerEx &operator=(const CLFullyConnectedLayerEx &) = delete;
+ /** Default move assignment operator */
+ CLFullyConnectedLayerEx &operator=(CLFullyConnectedLayerEx &&) = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor. The weights must be 2 dimensional.
+ * If this function is called after a Convolution Layer, the (transposed)
+ * weights will have as many rows as the product of the first 3 input's dimensions.
+ * If it is called after another FullyConnected Layer, the (transposed)
+ * weights will have as many rows as the input's first dimension.
+ * Data type supported: Same as @p input.
+ * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix
+ * multiplication between:
+ * - The output of im2col on the input and the (transposed) 2D weights, if the
+ * function is called after a Convolution Layer
+ * - The input tensor and the (transposed) 2D weights, if the function is
+ * called after another FullyConnected Layer.
+ * Data type supported: Same as @p input.
+ * @param[in] fc_info (Optional) Fully connected layer additional info
+ */
+ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases,
+ ICLTensor *output, FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLFullyConnectedLayerEx
+ *
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor info. The weights must be 2 dimensional.
+ * If this function is called after a Convolution Layer, the (transposed)
+ * weights will have as many rows as the product of the first 3 input's dimensions.
+ * If it is called after another FullyConnected Layer, the (transposed)
+ * weights will have as many rows as the input's first dimension.
+ * Data type supported: Same as @p input.
+ * @param[in] biases Bias tensor info. Can be nullptr. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor info. Its shape should be equal to the output of a
+ * matrix multiplication between:
+ * - The output of im2col on the input and the (transposed) 2D weights, if the
+ * function is called after a Convolution Layer
+ * - The input tensor and the (transposed) 2D weights, if the function is
+ * called after another FullyConnected Layer.
+ * Data type supported: Same as @p input.
+ * @param[in] fc_info (Optional) Fully connected layer additional info
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *biases, const ITensorInfo *output,
+ FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+
+ // Inherited methods override
+ void run() override;
+ void prepare() override;
+
+private:
+ void configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias,
+ ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
+ void configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias,
+ ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
+ void configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias,
+ ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
+
+ MemoryGroup _memory_group;
+ IWeightsManager *_weights_manager;
+ CLConvertFullyConnectedWeights _convert_weights;
+ weights_transformations::CLConvertFullyConnectedWeightsManaged _convert_weights_managed;
+ weights_transformations::CLFullyConnectedLayerReshapeWeightsExManaged
+ _reshape_weights_managed_function;
+ CLFlattenLayer _flatten_layer;
+ CLFullyConnectedLayerReshapeWeightsEx _reshape_weights_function;
+ CLGEMM _mm_gemm;
+ CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
+ CLTensor _flatten_output;
+ CLTensor _converted_weights_output;
+ CLTensor _reshape_weights_output;
+ bool _are_weights_converted;
+ bool _are_weights_reshaped;
+ bool _is_fc_after_conv;
+ bool _is_quantized;
+ bool _is_prepared;
+ const ICLTensor *_original_weights;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLFULLYCONNECTEDLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h
index fd0a65f20..289ab167f 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h
@@ -23,8 +23,8 @@
#ifndef __ARM_COMPUTE_CL_FULLY_CONNECTED_RESHAPING_LAYER_H__
#define __ARM_COMPUTE_CL_FULLY_CONNECTED_RESHAPING_LAYER_H__
-#include <arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h>
-#include <arm_compute/runtime/misc/functions/GenericReshapeLayer.h>
+#include <arm_compute/runtime/CL/CLTensor.h>
+#include <arm_compute/runtime/CL/functions/CLReshapeLayer.h>
#include <arm_compute/runtime/IMemoryManager.h>
namespace arm_compute
@@ -35,9 +35,16 @@ namespace arm_compute
class CLFullyConnectedReshapingLayer : public arm_compute::IFunction
{
public:
+ enum class KernelType
+ {
+ GENERAL, //< General FC
+ PREPROCESSED_WEIGHTS //< Weights are constants so it can be preprocessed
+ };
+
+public:
CLFullyConnectedReshapingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr)
: _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{},
- _cl_fc{memory_manager}, _cl_reshape{}, _needs_reshape(false)
+ _memory_manager{memory_manager}, _cl_fc{nullptr}, _cl_reshape{}, _needs_reshape(false)
{
// DO NOTHING
}
@@ -55,7 +62,8 @@ public:
*/
void configure(const arm_compute::ICLTensor *input, const arm_compute::ICLTensor *weights,
const arm_compute::ICLTensor *biases, arm_compute::ICLTensor *output,
- bool needs_reshape, const arm_compute::TensorShape &reshape);
+ bool needs_reshape, const arm_compute::TensorShape &reshape,
+ KernelType kernel_type);
public:
/**
@@ -79,9 +87,9 @@ private:
arm_compute::CLTensor _cl_buffer;
private:
- arm_compute::CLFullyConnectedLayer _cl_fc;
- // TODO Change to CLReshapeLayer
- arm_compute::misc::GenericReshapeLayer _cl_reshape;
+ std::shared_ptr<IMemoryManager> _memory_manager;
+ std::unique_ptr<arm_compute::IFunction> _cl_fc;
+ CLReshapeLayer _cl_reshape;
bool _needs_reshape;
};
} // namespace arm_compute
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h
new file mode 100644
index 000000000..68aba74ab
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCOREEX_H__
+#define __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCOREEX_H__
+
+#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h"
+#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+
+namespace arm_compute
+{
+class IMemoryManager;
+class ICLTensor;
+
+/** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. This function calls the
+ * following OpenCL kernels:
+ *
+ * -# @ref CLGEMMLowpMatrixMultiplyKernel (if the parameter "reshape_b_only_on_first_run" of
+ * GEMMInfo is FALSE)
+ * -# @ref CLGEMMLowpMatrixAReductionKernel (if the offset of matrix B is not 0)
+ * -# @ref CLGEMMLowpMatrixBReductionKernel (if the offset of matrix A is not 0)
+ *
+*/
+class CLGEMMLowpMatrixMultiplyCoreEx : public IFunction
+{
+public:
+ /** Constructor */
+ CLGEMMLowpMatrixMultiplyCoreEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMLowpMatrixMultiplyCoreEx(const CLGEMMLowpMatrixMultiplyCoreEx &) = delete;
+ /** Default move constructor */
+ CLGEMMLowpMatrixMultiplyCoreEx(CLGEMMLowpMatrixMultiplyCoreEx &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMLowpMatrixMultiplyCoreEx &operator=(const CLGEMMLowpMatrixMultiplyCoreEx &) = delete;
+ /** Default move assignment operator */
+ CLGEMMLowpMatrixMultiplyCoreEx &operator=(CLGEMMLowpMatrixMultiplyCoreEx &&) = default;
+ /** Initialise the kernel's inputs, output
+ *
+ * @note GEMMLowp: low precision GEMM kernel. [A * B + C]
+ * This kernel performs the following computations:
+ *
+ * -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.
+ * -# Convert b values from QASYMM8 to int32 and add b_offset to each of them.
+ * -# Compute the matrix product of the resulting a * b in int32.
+ * -# Quantize to uint8 if gemm_info.gemmlowp_output_stage != NONE
+ *
+ * @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8.
+ * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a
+ * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported:
+ * S32
+ * @param[out] output Output tensor. Data type supported: S32 or QASYMM8 if
+ * gemm_info.gemmlowp_output_stage != NONE
+ * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped
+ * and
+ * if the reshape of matrix B should be executed only for the first run
+ */
+ void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output,
+ const GEMMInfo &gemm_info = GEMMInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLGEMMLowpMatrixMultiplyCoreEx
+ *
+ * @param[in] a First input tensor info (Matrix A). Data type supported: QASYMM8.
+ * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a
+ * @param[in] c Third input tensor info (Matrix C). It can be a nullptr. Data type
+ * supported: S32
+ * @param[in] output Output tensor info. Data type supported: S32 or QASYMM8 if
+ * gemm_info.gemmlowp_output_stage != NONE
+ * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped
+ * and
+ * if the reshape of matrix B should be executed only for the first run
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c,
+ const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo());
+
+ // Inherited methods overridden:
+ void run() override;
+ void prepare() override;
+
+private:
+ MemoryGroup _memory_group;
+
+ // Kernels used
+ CLGEMMLowpMatrixMultiplyKernelEx _mm_midgard_kernel;
+ CLGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel;
+ CLGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
+
+ // Temporary tensors
+ CLTensor _vector_sum_col;
+ CLTensor _vector_sum_row;
+
+ int32_t _a_offset;
+ int32_t _b_offset;
+ bool _reshape_b_only_on_first_run;
+ bool _is_prepared;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCOREEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h
index 04d227aa7..b01ec4255 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
/**
* @file CLGatherEx.h
* @brief This file contains CLGatherEx class
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h
index 65aa6cbd5..6618f5aa4 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
/**
* @file CLHashtableLookup.h
* @ingroup COM_AI_RUNTIME
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h
index ed29db925..887e7aaa5 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__
#define __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h
index 4bf203c5a..51216715f 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#ifndef __ARM_COMPUTE_CLLOGICALNOT_H__
#define __ARM_COMPUTE_CLLOGICALNOT_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h
index 198a0fd4e..8ec9aa307 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#ifndef __ARM_COMPUTE_CLNEG_H__
#define __ARM_COMPUTE_CLNEG_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h
index 622a61b5e..7fbe558ff 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#ifndef __ARM_COMPUTE_CLPRELU_H__
#define __ARM_COMPUTE_CLPRELU_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h
index b142d3a2e..e83fb01cd 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
/**
* @file CLPixelWiseDivision.h
* @ingroup COM_AI_RUNTIME
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h
index 7e88cb369..b49cbd873 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_CLRNN_LAYER_EX_H__
#define __ARM_COMPUTE_CLRNN_LAYER_EX_H__
@@ -89,7 +105,7 @@ public:
void prepare() override;
private:
- CLMemoryGroup _memory_group;
+ MemoryGroup _memory_group;
CLGEMM _gemm_state_f;
CLSaturatedArithmeticOperationKernel _add_kernel;
CLActivationLayerKernel _activation_kernel;
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
index 1d367d56b..7dba84b12 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
/**
* @file CLReduceOperation.h
* @ingroup COM_AI_RUNTIME
@@ -26,7 +49,7 @@
#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h"
#include "arm_compute/core/TypesEx.h"
-#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
@@ -82,7 +105,7 @@ public:
void run() override;
private:
- CLMemoryGroup _memory_group;
+ MemoryGroup _memory_group;
ICLTensor *_input;
ICLTensor *_output;
std::set<uint32_t> _axis;
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h
index 7e2df8986..09eb69ae5 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#ifndef __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_H__
#define __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h
index 17f762092..2090b46fa 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#ifndef __ARM_COMPUTE_CLSPACETODEPTH_H__
#define __ARM_COMPUTE_CLSPACETODEPTH_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h
index 6b26a85c8..03edd15e6 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
/**
* @file CLStridedSlice.h
* @ingroup COM_AI_RUNTIME
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
index 20c749e0b..e301a5152 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
/**
* @file CLTopKV2.h
* @ingroup COM_AI_RUNTIME
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
index 340a7bfe9..54a697e69 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__
#define __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__
@@ -30,7 +46,6 @@
#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
-#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
@@ -144,7 +159,7 @@ public:
void prepare() override;
private:
- CLMemoryGroup _memory_group;
+ MemoryGroup _memory_group;
CLTransposeConvLayerUpsample _scale_f;
CLConvolutionLayer _conv_f;
CPPFlipWeightsKernel _flip_weights;
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h
index 4ae0e1830..7570fe76d 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__
#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__
@@ -21,7 +45,6 @@
#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPOneHotEx.h b/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPOneHotEx.h
new file mode 100644
index 000000000..f132dfcc8
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPOneHotEx.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_CPPONEHOT_EX_H__
+#define __ARM_COMPUTE_CPPONEHOT_EX_H__
+
+#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to run @ref CPPOneHot */
+class CPPOneHotEx : public ICPPSimpleFunction
+{
+public:
+ /** Configure the one_hot function
+ *
+ * @param[in] indices A tensor for indices. Data types supported: S32
+ * @param[out] output A tensor for computed value of one hot operator
+ * @param[in] depth An int value for depth
+ * @param[in] on_value A float value for on_value
+ * @param[in] off_value A float value for off_value
+ * @param[in] axis An int value for axis
+ */
+ void configure(const ITensor *indices, ITensor *output, const int depth, const float on_value,
+ const float off_value, const int axis);
+};
+}
+#endif /* __ARM_COMPUTE_CPPONEHOT_EX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h b/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h
index 8e7e2f937..666afef4b 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_CPPUPSAMPLE_EX_H__
#define __ARM_COMPUTE_CPPUPSAMPLE_EX_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
index 37bccc52c..49504fde3 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
@@ -16,11 +16,10 @@
#ifndef __ARM_COMPUTE_NEFUNCTIONSEX_H__
#define __ARM_COMPUTE_NEFUNCTIONSEX_H__
-#include <arm_compute/runtime/NEON/functions/NEArgMinMax.h>
+#include <arm_compute/runtime/NEON/functions/NEActivationLayerEx.h>
#include <arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h>
#include <arm_compute/runtime/NEON/functions/NECast.h>
#include <arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h>
-#include <arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h>
#include <arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h>
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h>
#include <arm_compute/runtime/NEON/functions/NEGatherEx.h>
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEActivationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEActivationLayerEx.h
new file mode 100644
index 000000000..6156c84f8
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEActivationLayerEx.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_NEACTIVATIONLAYEREX_H__
+#define __ARM_COMPUTE_NEACTIVATIONLAYEREX_H__
+
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Basic function to run @ref NEActivationLayerKernelEx
+ *
+ * @note The function simulates an activation layer with the specified activation function.
+ */
+class NEActivationLayerEx : public INESimpleFunctionNoBorder
+{
+public:
+ /** Constructor
+ *
+ * @param[in] ctx Runtime context to be used by the function
+ */
+ NEActivationLayerEx(IRuntimeContext *ctx = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEActivationLayerEx(const NEActivationLayerEx &) = delete;
+ /** Default move constructor */
+ NEActivationLayerEx(NEActivationLayerEx &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEActivationLayerEx &operator=(const NEActivationLayerEx &) = delete;
+ /** Default move assignment operator */
+ NEActivationLayerEx &operator=(NEActivationLayerEx &&) = default;
+ /** [NEActivationLayerEx snippet] **/
+ /** Set the input and output tensor.
+ *
+ * @note If the output tensor is a nullptr or is equal to the input, the activation function will
+ * be performed in-place
+ *
+ * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this
+ * tensor will store the result
+ * of the activation function. Data types supported:
+ * QASYMM8/QSYMM16/F16/F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] activation_info Activation layer parameters.
+ */
+ void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info);
+ /** [NEActivationLayerEx snippet] **/
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEActivationLayerEx
+ *
+ * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor
+ * will store the result
+ * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: same as @p input
+ * @param[in] act_info Activation layer information.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ActivationLayerInfo &act_info);
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEACTIVATIONLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h
deleted file mode 100644
index 604cd93c4..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__
-#define __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to perform reduce min/max operation */
-template <ReductionOperation op> class NEArgMinMaxStatic : public IFunction
-{
-public:
- /** Constructor */
- NEArgMinMaxStatic(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Configure kernel
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
- * @param[in] axis Reduction axis.
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- */
- void configure(ITensor *input, int axis, ITensor *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEArgMinMax
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
- * @param[in] axis Reduction axis.
- * @param[in] output Destination tensor. Data type supported: Same as @p input
- *
- * @return A status
- */
- static Status validate(const ITensorInfo *input, int axis, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- NEArgMinMaxLayer _reduction_kernel;
- Tensor _reduced_out;
- NEReshapeLayer _reshape;
-};
-
-/** Basic function to run arg max. */
-using NEArgMax = NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MAX>;
-/** Basic function to run arg min. */
-using NEArgMin = NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MIN>;
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h
index 2a624656d..026d30098 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__
#define __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECast.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECast.h
index ae2f57f19..f0f0d8114 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECast.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECast.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NECAST_H__
#define __ARM_COMPUTE_NECAST_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h
index 90c0751b8..005d85add 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEDEPTHTOSPACELAYEREX_H__
#define __ARM_COMPUTE_NEDEPTHTOSPACELAYEREX_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h
index f0c8ecdb5..27a38e982 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__
#define __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h
index 0646f1668..63f7714aa 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
/**
* @file NEEmbeddingLookup.h
* @ingroup COM_AI_RUNTIME
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h
index 42a786821..39c57eb70 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEFULLYCONNECTEDHYBRIDLAYER_H__
#define __ARM_COMPUTE_NEFULLYCONNECTEDHYBRIDLAYER_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h
index 6bd67f322..8f98f220a 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEFULLYCONNECTEDLAYEREX_H__
#define __ARM_COMPUTE_NEFULLYCONNECTEDLAYEREX_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h
index 414b9f7d9..d844513c9 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h
@@ -1,4 +1,20 @@
/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -21,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCOREEX_H__
#define __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCOREEX_H__
@@ -129,7 +146,6 @@ private:
NEGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
NEGEMMLowpOffsetContributionKernel _offset_contribution_kernel;
NEGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel;
- // NEActivationLayer _activation_func;
Tensor _vector_sum_col;
Tensor _vector_sum_row;
@@ -148,7 +164,6 @@ private:
bool _reshape_b_only_on_first_run;
bool _is_prepared;
bool _fuse_output_stage;
- bool _run_activation;
bool _flip_signedness;
};
} // namespace arm_compute
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h
index d95e6a81e..155a1b837 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h
index 69abf0192..521a05ad9 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
/**
* @file NEHashtableLookup.h
* @ingroup COM_AI_RUNTIME
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h
index 521f50d2f..18e813923 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYEREX_H__
#define __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYEREX_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h
index 5664c57cb..ca8413352 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEPRELU_H__
#define __ARM_COMPUTE_NEPRELU_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h
index 17c37d806..8a7b17946 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NERNNLAYER_EX_H__
#define __ARM_COMPUTE_NERNNLAYER_EX_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h
index 7209acf19..03ac45798 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__
#define __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h
index 9c558e6a2..7f764b000 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEON_REDUCE_OPERATION_H__
#define __ARM_COMPUTE_NEON_REDUCE_OPERATION_H__
@@ -32,6 +48,7 @@
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEReductionOperationEx.h"
#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
+#include "arm_compute/runtime/Tensor.h"
namespace arm_compute
{
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h
index c028ea658..48b416923 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEON_REDUCE_SUM_H__
#define __ARM_COMPUTE_NEON_REDUCE_SUM_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReductionOperationEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReductionOperationEx.h
index 7180742df..1693922b7 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReductionOperationEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReductionOperationEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NEREDUCTIONOPERATIONEX_H__
#define __ARM_COMPUTE_NEREDUCTIONOPERATIONEX_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h
index 302f9af2e..3b695fbc0 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NESPACETOBATCHLAYEREX_H__
#define __ARM_COMPUTE_NESPACETOBATCHLAYEREX_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h
index 117717b55..9f32616f3 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NESPACETODEPTHLAYEREX_H__
#define __ARM_COMPUTE_NESPACETODEPTHLAYEREX_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
index a50b9ea60..408d150d0 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__
#define __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/misc/functions/Utils.h b/compute/ARMComputeEx/arm_compute/runtime/misc/functions/Utils.h
index 53736f55f..84ee4ce93 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/misc/functions/Utils.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/misc/functions/Utils.h
@@ -95,8 +95,8 @@ void copyCast(const FromT value, arm_compute::ITensor *to, const arm_compute::Co
float realValue = static_cast<float>(value);
// NOTE We haven't known the policy of rounding for quantization.
// So this is set to a temporary value.
- *(to->ptr_to_element(id)) =
- to->info()->quantization_info().quantize(realValue, arm_compute::RoundingPolicy::TO_ZERO);
+ *(to->ptr_to_element(id)) = quantize_qasymm8(realValue, to->info()->quantization_info(),
+ arm_compute::RoundingPolicy::TO_ZERO);
break;
}
default:
diff --git a/compute/ARMComputeEx/resolve_includes.py b/compute/ARMComputeEx/resolve_includes.py
index b3e252892..f37c2a957 100644..100755
--- a/compute/ARMComputeEx/resolve_includes.py
+++ b/compute/ARMComputeEx/resolve_includes.py
@@ -1,4 +1,17 @@
# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
# Copyright (c) 2016, 2017 ARM Limited.
#
# SPDX-License-Identifier: MIT
@@ -20,6 +33,7 @@
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
+
import collections
import os.path
import re
diff --git a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
index 7d4760600..191a5bc2a 100644
--- a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
+++ b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/CLKernelLibraryEx.h"
@@ -53,13 +69,16 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map
{"gather_ex", "gather_ex.cl"},
{"gather_ex_1d", "gather_ex.cl"},
{"gather_ex_1d_out", "gather_ex.cl"},
+ {"gemmlowp_mm_midgard_ex", "gemmlowp_ex.cl"},
{"hashtable_lookup", "hashtable_lookup.cl"},
{"instance_normalization_ex", "instance_normalization_ex.cl"},
+ {"multiply_scale_factor", "multiply_scale_factor.cl"},
{"neg_tensor", "neg_tensor.cl"},
{"permute_generic", "permute_ex.cl"},
{"pixelwise_mul_qasymm8", "pixelwise_mul_quantized.cl"},
{"prelu", "prelu.cl"},
{"prelu_qasymm8", "prelu_quantized.cl"},
+ {"quantization_symm8", "quantization_symm8.cl"},
{"reduce_min_max", "reduce_operation.cl"},
{"reduce_sum_mean", "reduce_operation.cl"},
{"topkv2_init", "topkv2.cl"},
@@ -71,6 +90,7 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map
{"radixsort_pastehistograms", "topkv2_radixsort.cl"},
{"radixsort_reorder", "topkv2_radixsort.cl"},
{"topkv2_quicksort", "topkv2_quicksort.cl"},
+ {"scale_factor_symm8", "scale_factor.cl"},
{"space_to_batch_4d_nchw", "space_to_batch.cl"},
{"space_to_batch_4d_nhwc", "space_to_batch.cl"},
{"space_to_depth_nchw", "space_to_depth.cl"},
@@ -100,6 +120,10 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map
#include "./cl_kernels/gather_ex.clembed"
},
{
+ "gemmlowp_ex.cl",
+#include "./cl_kernels/gemmlowp_ex.clembed"
+ },
+ {
"hashtable_lookup.cl",
#include "./cl_kernels/hashtable_lookup.clembed"
},
@@ -120,6 +144,10 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map
#include "./cl_kernels/binary_logical_op.clembed"
},
{
+ "multiply_scale_factor.cl",
+#include "./cl_kernels/multiply_scale_factor.clembed"
+ },
+ {
"neg_tensor.cl",
#include "./cl_kernels/neg_tensor.clembed"
},
@@ -132,10 +160,18 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map
#include "./cl_kernels/prelu_quantized.clembed"
},
{
+ "quantization_symm8.cl",
+#include "./cl_kernels/quantization_symm8.clembed"
+ },
+ {
"reduce_operation.cl",
#include "./cl_kernels/reduce_operation.clembed"
},
{
+ "scale_factor.cl",
+#include "./cl_kernels/scale_factor.clembed"
+ },
+ {
"space_to_batch.cl",
#include "./cl_kernels/space_to_batch.clembed"
},
@@ -180,7 +216,7 @@ Kernel CLKernelLibraryEx::create_kernel(const std::string &kernel_name,
if (_kernel_program_map.end() == kernel_program_it)
{
- ARM_COMPUTE_ERROR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str());
+ ARM_COMPUTE_ERROR_VAR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str());
}
std::string concat_str;
@@ -261,7 +297,7 @@ const Program &CLKernelLibraryEx::load_program(const std::string &program_name)
if (_program_source_map.end() == program_source_it)
{
- ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str());
+ ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str());
}
program = Program(_context, program_name, program_source_it->second);
@@ -282,7 +318,7 @@ const Program &CLKernelLibraryEx::load_program(const std::string &program_name)
}
else
{
- ARM_COMPUTE_ERROR("Kernel file %s does not exist.", source_name.c_str());
+ ARM_COMPUTE_ERROR_VAR("Kernel file %s does not exist.", source_name.c_str());
}
#endif /* EMBEDDED_KERNELS */
@@ -315,7 +351,7 @@ std::string CLKernelLibraryEx::get_program_source(const std::string &program_nam
if (program_source_it == _program_source_map.end())
{
- ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str());
+ ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str());
}
return program_source_it->second;
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl
index 2a6dfc91f..03717cfe9 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl
index 77e239f55..f74c1c103 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers_asymm.h"
#ifdef SATURATE
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl
index 8c875516d..e249663bc 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#ifndef VEC_SIZE
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl
index 2342fda9f..4147a0017 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#ifndef SCALE
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl
index e005322f7..0285c955b 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE) && defined(Z_OUT)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl
index dd8cb6d93..92e5dfbee 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#ifndef VEC_SIZE
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/gather_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/gather_ex.cl
index 09f776156..2236021f1 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/gather_ex.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/gather_ex.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#if defined(DATA_TYPE) && defined(AXIS) && defined(INDICES_DIM)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl
new file mode 100644
index 000000000..80ba73d1d
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "helpers.h"
+
+#if defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_Y) && \
+ defined(COLS_A)
+#define VECTOR_CHAR VEC_DATA_TYPE(char, NUM_ELEMS_PROCESSED_PER_THREAD_X)
+#define VECTOR_INT VEC_DATA_TYPE(int, NUM_ELEMS_PROCESSED_PER_THREAD_X)
+#define VECTOR_FLOAT VEC_DATA_TYPE(float, NUM_ELEMS_PROCESSED_PER_THREAD_X)
+/** This OpenCL kernel computes the matrix multiplication between matrix A (src0) and matrix B
+ * (src1) in case both matrices have not beed reshaped
+ *
+ * @attention The number of matrix A columns needs to be passed at compile time using -DCOLS_A
+ *
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ * -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in] src0_ptr Pointer to the source matrix. Supported data type:
+ * QASYMM8
+ * @param[in] src0_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src0_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src0_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src1_ptr Pointer to the source matrix. Supported data type:
+ * same as @p src0_ptr
+ * @param[in] src1_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src1_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src1_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * type: S32
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] src0_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in] src_cross_plane_pad (Optional) Bottom paddings in unit of elements for
+ * the input tensor (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in] dst_cross_plane_pad (Optional) Bottom paddings in unit of elements for
+ * the output tensor (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemmlowp_mm_midgard_ex(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(src1),
+ IMAGE_DECLARATION(dst), uint src0_stride_z, uint src1_stride_z,
+ uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+ ,
+ uint src_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+ )
+{
+ int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
+
+ // Compute starting address for matrix A and Matrix B
+ int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes));
+
+ // Update address for the matrix A
+ src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y;
+
+ // Update address for the matrix B
+ src_addr.s1 += idx;
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across
+ // the z dimension
+ // in order to take into account the presence of possible cross plane paddings
+ //
+ // | |
+ // | plane0 |
+ // | |
+ // |__________________|
+ // |******************|
+ // | cross_plane_pad |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | |
+ // |__________________|
+
+ // The plane (zin) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+ // by HEIGHT_GEMM3D
+ uint4 zin = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+ (uint4)HEIGHT_GEMM3D;
+ zin = min(DEPTH_GEMM3D - 1, zin);
+
+ // Add offset due to the cross plane paddings
+ zin *= (src_cross_plane_pad * src0_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply src0_stride_z by DEPTH_GEMM3D
+ src_addr.s0 += get_global_id(2) * src0_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ src_addr.s0 += get_global_id(2) * src0_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ src_addr.s1 += get_global_id(2) * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ int end_row_vec_a = src_addr.s0 + COLS_A;
+
+ VECTOR_INT acc0 = 0;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ VECTOR_INT acc1 = 0;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ VECTOR_INT acc2 = 0;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ VECTOR_INT acc3 = 0;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ VECTOR_INT acc4 = 0;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+
+ for (; src_addr.s0 <= (end_row_vec_a - 2); src_addr += (int2)(2, 2 * src1_stride_y))
+ {
+ // Load values from matrix A
+ char2 a0 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ char2 a1 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ char2 a2 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ char2 a3 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ char2 a4 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 4 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ // Load values from matrix B
+ VECTOR_CHAR b0 =
+ VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1));
+ VECTOR_CHAR b1 = VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(
+ 0, (__global char *)(src1_ptr + src_addr.s1 + src1_stride_y));
+
+ // Accumulate
+ acc0 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a0.s0;
+ acc0 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a0.s1;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ acc1 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a1.s0;
+ acc1 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a1.s1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ acc2 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a2.s0;
+ acc2 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a2.s1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ acc3 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a3.s0;
+ acc3 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a3.s1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ acc4 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a4.s0;
+ acc4 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a4.s1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ }
+
+ for (; src_addr.s0 < end_row_vec_a; src_addr += (int2)(1, src1_stride_y))
+ {
+ // Load values from matrix A
+ char a0 = *(__global char *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ char a1 = *(__global char *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ char a2 = *(__global char *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ char a3 = *(__global char *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ char a4 = *(__global char *)(src0_ptr + src_addr.s0 + 4 * src0_stride_y);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ // Load values from matrix B
+ VECTOR_CHAR b0 =
+ VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1));
+
+ // Accumulate
+ acc0 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a0;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ acc1 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ acc2 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a2;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ acc3 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a3;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ acc4 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a4;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ }
+
+ const int z = get_global_id(2);
+
+ // Compute destination address
+ Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+ // the z dimension
+ // in order to take into account the presence of possible cross plane paddings
+ //
+ // | |
+ // | plane0 |
+ // | |
+ // |__________________|
+ // |******************|
+ // | cross_plane_pad |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | |
+ // |__________________|
+
+ // The plane (zout) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+ // by HEIGHT_GEMM3D
+ uint8 zout = ((uint8)(0, 1, 2, 3, 4, 5, 6, 7) +
+ (uint8)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+ (uint8)HEIGHT_GEMM3D;
+ zout = min(DEPTH_GEMM3D - 1, zout);
+
+ // Add offset due to the cross plane paddings
+ zout *= (dst_cross_plane_pad * dst_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst.ptr += z * dst_stride_z * DEPTH_GEMM3D;
+
+ // Store the result
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc0, VECTOR_INT), 0, (__global int *)(dst.ptr + 0 * dst_stride_y + zout.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc1, VECTOR_INT), 0, (__global int *)(dst.ptr + 1 * dst_stride_y + zout.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc2, VECTOR_INT), 0, (__global int *)(dst.ptr + 2 * dst_stride_y + zout.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc3, VECTOR_INT), 0, (__global int *)(dst.ptr + 3 * dst_stride_y + zout.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc4, VECTOR_INT), 0, (__global int *)(dst.ptr + 4 * dst_stride_y + zout.s4));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+ // Add offset for batched GEMM
+ dst.ptr += z * dst_stride_z;
+
+ // Store the result
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc0, VECTOR_INT), 0, (__global int *)(dst.ptr + 0 * dst_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc1, VECTOR_INT), 0, (__global int *)(dst.ptr + 1 * dst_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc2, VECTOR_INT), 0, (__global int *)(dst.ptr + 2 * dst_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc3, VECTOR_INT), 0, (__global int *)(dst.ptr + 3 * dst_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc4, VECTOR_INT), 0, (__global int *)(dst.ptr + 4 * dst_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+}
+#endif // defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_Y) &&
+ // defined(COLS_A)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl
index 73f29e3e5..a4f7dbd48 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#ifndef VEC_SIZE
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h
index 0e123ae0a..2d0b6a299 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h
@@ -1,4 +1,20 @@
/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -21,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef ARM_COMPUTE_HELPER_H
#define ARM_COMPUTE_HELPER_H
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h
index c39138caa..a83b1a8a5 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h
@@ -1,4 +1,20 @@
/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -21,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef ARM_COMPUTE_HELPERS_ASYMM_H
#define ARM_COMPUTE_HELPERS_ASYMM_H
@@ -403,4 +420,4 @@ ASYMM_RESCALE_IMPL(4)
ASYMM_RESCALE_IMPL(8)
ASYMM_RESCALE_IMPL(16)
-#endif // ARM_COMPUTE_HELPERS_ASYMM_H \ No newline at end of file
+#endif // ARM_COMPUTE_HELPERS_ASYMM_H
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl
index 1d96150f8..014842680 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "helpers.h"
#if defined(VEC_SIZE) && defined(DATA_TYPE) && defined(EPSILON) && defined(DIM_X) && \
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl
new file mode 100644
index 000000000..3943fc4c2
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "helpers.h"
+
+#if defined(VEC_SIZE) && defined(DATA_TYPE)
+
+/** This performs to multiply input by scale_factor.
+ *
+ * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g.
+ * -DDATA_TYPE=float
+ * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g.
+ * -DVEC_SIZE=16
+ * @note Quantization scale of input tensor is passed in with -DSCALE=scale.
+ *
+ * @param[in] input_ptr Pointer to the source tensor. Supported data
+ * types: S8
+ * @param[in] input_stride_x Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in] input_step_x input_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] input_stride_y Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in] input_step_y input_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[in] scale_ptr Pointer to the source tensor. Supported data
+ * types: S32
+ * @param[in] scale_stride_x Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in] scale_step_x scale_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] scale_offset_first_element_in_bytes The offset of the first element in the scale
+ * tensor
+ * @param[out] output_ptr Pointer to the destination tensor. Supported
+ * data types: F16/F32
+ * @param[in] output_stride_x Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in] output_step_x output_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] output_stride_y Stride of the destination tensor in Y dimension
+ * (in bytes)
+ * @param[in] output_step_y output_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
+ * destination tensor
+ */
+__kernel void multiply_scale_factor(IMAGE_DECLARATION(input), VECTOR_DECLARATION(scale),
+ IMAGE_DECLARATION(output), float multiplier)
+{
+ // Get pixels pointer
+ Image input = CONVERT_TO_IMAGE_STRUCT(input);
+ Image output = CONVERT_TO_IMAGE_STRUCT(output);
+
+#if defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
+ // Check if access on width gets out of bounds
+ // If it does shift access vector to access elements within bounds
+ const int xi = (int)(get_global_id(0) * VEC_SIZE);
+ input.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * input_stride_x;
+ output.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * output_stride_x;
+
+ // Load data
+ VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+ val = CONVERT(VLOAD(VEC_SIZE)(0, (__global int *)input.ptr), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE));
+
+ // Create scale vector
+ VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+ vscale = *(((__global DATA_TYPE *)(scale_ptr)) + get_global_id(1));
+
+ // Dequantize
+ vscale *= (DATA_TYPE)(multiplier);
+ val *= vscale;
+
+ // Store result
+ VSTORE(VEC_SIZE)
+ (val, 0, (__global DATA_TYPE *)output.ptr);
+#else // !defined(VEC_SIZE) || !defined(LAST_ACCESSED_X)
+ *((__global DATA_TYPE *)(output.ptr)) =
+ ((DATA_TYPE)(*((__global int *)(input.ptr)))) *
+ *(((__global DATA_TYPE *)(scale_ptr)) + get_global_id(1)) * (DATA_TYPE)(multiplier);
+#endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
+}
+
+#endif // defined(VEC_SIZE) && defined(DATA_TYPE)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl
index 4aa7883c3..15c16f80c 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#ifndef VEC_SIZE
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl
index 2074d3ceb..76fda9041 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers_asymm.h"
#ifdef SATURATE
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl
index 62a8901f6..12c8eeb79 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#ifndef VEC_SIZE
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl
index 5e0abd585..a66e107d1 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#define SUB(x, y) (x) - (y)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl
new file mode 100644
index 000000000..4ae9adb0b
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "helpers.h"
+
+#define CONVERT_RTE(x, type) (convert_##type##_rte((x)))
+#define CONVERT_RTE_VEC_STR(x, type, size) (convert_##type##size##_rte((x)))
+#define CONVERT_RTE_VEC(x, type, size) CONVERT_RTE_VEC_STR(x, type, size)
+#define MIN_QUANT_VAL -127
+#define MAX_QUANT_VAL 127
+
+#if defined(VEC_SIZE) && defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT)
+
+/** This performs the quantization of floating point inputs to 8-bit unsigned integers.
+ *
+ * @note Input data type should be given as a preprocessor argument using -DDATA_TYPE_IN=type. e.g.
+ * -DDATA_TYPE=short
+ * @note Output data type should be given as a preprocessor argument using -DDATA_TYPE_OUT=type.
+ * e.g. -DDATA_TYPE=short
+ * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g.
+ * -DVEC_SIZE=16
+ * @note Quantization scale should be given as a preprocessor argument using -DSCALE=scale. e.g.
+ * -DSCALE=0.125
+ * @note Quantization offset should be given as a preprocessor argument using -DOFFSET=offset. e.g.
+ * -DOFFSET=125
+ * @note Minimum value for quantized type should be given as a preprocessor argument using
+ * -DMIN_QUANT_VAL=value. e.g. -DMIN_QUANT_VAL=0
+ * @note Maximum value for quantized type should be given as a preprocessor argument using
+ * -DMAX_QUANT_VAL=value. e.g. -DMAXIN_QUANT_VAL=255
+ *
+ * @param[in] input_ptr Pointer to the source tensor. Supported data
+ * types: F32
+ * @param[in] input_stride_x Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in] input_step_x input_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] input_stride_y Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in] input_step_y input_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[out] output_ptr Pointer to the destination tensor. Supported
+ * data types: S8
+ * @param[in] output_stride_x Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in] output_step_x output_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] output_stride_y Stride of the destination tensor in Y dimension
+ * (in bytes)
+ * @param[in] output_step_y output_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
+ * destination tensor
+ * @param[out] scale_ptr Pointer to the scale tensor. Supported data
+ * types: F32
+ * @param[in] scale_stride_x Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in] scale_step_x scale_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ */
+__kernel void quantization_symm8(IMAGE_DECLARATION(input), VECTOR_DECLARATION(scale),
+ IMAGE_DECLARATION(output))
+{
+ // Get pixels pointer
+ Image input = CONVERT_TO_IMAGE_STRUCT(input);
+ Image output = CONVERT_TO_IMAGE_STRUCT(output);
+
+#if defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
+ // Check if access on width gets out of bounds
+ // If it does shift access vector to access elements within bounds
+ const int xi = (int)(get_global_id(0) * VEC_SIZE);
+ input.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * input_stride_x;
+ output.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * output_stride_x;
+
+ // Load data
+ VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE)
+ val = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr);
+
+ // Create scale vector
+ const VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE) vscale =
+ *(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1));
+
+ // Quantize
+ VEC_DATA_TYPE(int, VEC_SIZE)
+ res = CLAMP(CONVERT_RTE_VEC(val / vscale, int, VEC_SIZE), MIN_QUANT_VAL, MAX_QUANT_VAL);
+
+ // Store result
+ VSTORE(VEC_SIZE)
+ (CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), 0, (__global DATA_TYPE_OUT *)output.ptr);
+#else //! defined(VEC_SIZE) || !defined(LAST_ACCESSED_X)
+ *((__global DATA_TYPE_OUT *)(output.ptr)) = (DATA_TYPE_OUT)CLAMP(
+ CONVERT_RTE((*(__global DATA_TYPE_IN *)input.ptr) /
+ (*(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1))),
+ int),
+ MIN_QUANT_VAL, MAX_QUANT_VAL);
+#endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
+}
+#endif // defined(VEC_SIZE) && defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl
index d7ea2e2c4..832ac1270 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/scale_factor.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/scale_factor.cl
new file mode 100644
index 000000000..3d5e90356
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/scale_factor.cl
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "helpers.h"
+
+#if defined(WIDTH)
+/** This function identifies the min and maximum value of an input 3D tensor.
+ *
+ * @note The width, height and depth of the input tensor must be provided at compile time using
+ * -DWIDTH, -DHEIGHT and -DDEPTH (e.g. -DWIDTH=320, -DHEIGHT=240, -DDEPTH=3)
+ *
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types:
+ * F32
+ * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[in] dst_ptr Pointer to the min/max vector. Minimum value in
+ * position 0, maximum value in position 1. Supported data types: F32.
+ * @param[in] dst_stride_x Stride of the min/max vector in X dimension (in
+ * bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the min/max
+ * vector
+ */
+__kernel void scale_factor_symm8(IMAGE_DECLARATION(src), VECTOR_DECLARATION(dst))
+{
+ Image src = CONVERT_TO_IMAGE_STRUCT(src);
+
+ float4 min_value = (float4)FLT_MAX;
+ float4 max_value = (float4)-FLT_MAX;
+
+ int x = 0;
+ __global float *src_addr = (__global float *)(src.ptr);
+
+ for (; x <= (int)(WIDTH - 8); x += 8)
+ {
+ float8 value = vload8(0, (__global float *)(src_addr + x));
+
+ min_value = select(value.s0123, min_value, min_value < value.s0123);
+ min_value = select(value.s4567, min_value, min_value < value.s4567);
+
+ max_value = select(value.s0123, max_value, max_value > value.s0123);
+ max_value = select(value.s4567, max_value, max_value > value.s4567);
+ }
+
+ for (; x < WIDTH; ++x)
+ {
+ float value = *(src_addr + x);
+
+ min_value.s0 = min(min_value.s0, value);
+ max_value.s0 = max(max_value.s0, value);
+ }
+
+ // Perform min/max reduction
+ min_value.s01 = min(min_value.s01, min_value.s23);
+ min_value.s0 = min(min_value.s0, min_value.s1);
+ max_value.s01 = max(max_value.s01, max_value.s23);
+ max_value.s0 = max(max_value.s0, max_value.s1);
+
+ // Extract scale
+ max_value.s0 = max(fabs(min_value.s0), fabs(max_value.s0)) / 127.0f;
+
+ // Store min and max
+ *((__global float *)(dst_ptr) + get_global_id(1)) = max_value.s0;
+}
+#endif // defined(WIDTH)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl
index 7367da7fb..b1611043b 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BATCH_IN) && defined(HEIGHT_IN) && \
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl
index a26e762e8..eb612f834 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#if defined(DATA_TYPE) && defined(DEPTH_IN) && defined(BLOCK_SIZE) && defined(Z_IN)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl
index 50472e4f9..3eb1a4ce7 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
__kernel void topkv2_init(VECTOR_DECLARATION(input), __global float *in_key_buf,
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl
index 9594daf19..460de790b 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
__global inline float *get_vec_elem(Vector *vec, int idx)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl
index f6830d229..e9d4696b4 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
// reference:
// https://code.google.com/archive/p/ocl-radix-sort/source/default/source
// OpenCL kernel sources for the CLRadixSort class
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp
index 7f4b5b0df..06eeb5b98 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLArgOperationKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp
index c14e73634..bb5556888 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp
index 35f607bd0..01ea655b4 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLCastKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
@@ -52,8 +76,9 @@ void CLCastKernel::configure(const ICLTensor *input, ICLTensor *output, SubDataT
// Create kernel
if (is_data_type_quantized_asymmetric(input->info()->data_type()))
{
- const float scale_in = input->info()->quantization_info().scale;
- const int offset_in = input->info()->quantization_info().offset;
+ UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
+ const float scale_in = qinfo.scale;
+ const int offset_in = qinfo.offset;
build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_in));
build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_in));
@@ -62,8 +87,10 @@ void CLCastKernel::configure(const ICLTensor *input, ICLTensor *output, SubDataT
}
else if (is_data_type_quantized_asymmetric(output->info()->data_type()))
{
- const float scale_in = output->info()->quantization_info().scale;
- const int offset_in = output->info()->quantization_info().offset;
+ UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform();
+ const float scale_in = qinfo.scale;
+ const float offset_in = qinfo.offset;
+
build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_in));
build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_in));
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp
index 2a3433c2b..389136817 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp
index 0862b78bf..79f5ce065 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.cpp
new file mode 100644
index 000000000..235e8975d
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.cpp
@@ -0,0 +1,372 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/AccessWindowTranspose.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "support/ToolchainSupport.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <tuple>
+
+using namespace arm_compute;
+using namespace arm_compute::misc::shape_calculator;
+
+namespace arm_compute
+{
+class Coordinates;
+} // namespace arm_compute
+
+namespace
+{
+using ElementsProcessed = Steps;
+
+Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1,
+ const ITensorInfo *output, const GEMMReshapeInfo &gemm_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::S8);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input0->num_dimensions() > 4,
+ "The number of dimensions for the matrix A must be <= 4");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 3,
+ "The number of dimensions for the matrix B must be <= 3");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 2 &&
+ gemm_info.reinterpret_input_as_3d(),
+ "The input1 tensor cannot have more than 2 dimensions if input0 "
+ "has to be reinterpreted as 3D");
+
+ const int m = gemm_info.m();
+ const int n = gemm_info.n();
+ const int k = gemm_info.k();
+
+ ARM_COMPUTE_UNUSED(m);
+ ARM_COMPUTE_UNUSED(n);
+ ARM_COMPUTE_UNUSED(k);
+
+ ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != static_cast<unsigned int>(k));
+ ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) != static_cast<unsigned int>(n));
+ ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(1) != static_cast<unsigned int>(k));
+ if (gemm_info.reinterpret_input_as_3d())
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) * input0->dimension(2) !=
+ static_cast<unsigned int>(m));
+ }
+ else
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) != static_cast<unsigned int>(m));
+ }
+
+ if (output->total_size() != 0)
+ {
+ const TensorInfo tensor_info_output =
+ output->clone()->set_tensor_shape(compute_mm_shape(*input0, *input1, false, gemm_info));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32);
+ }
+
+ return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1,
+ ITensorInfo *output,
+ const GEMMReshapeInfo &gemm_info,
+ ElementsProcessed &num_elements_processed)
+{
+ unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0];
+ unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1];
+ bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
+ bool reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d() != 0);
+
+ Window win{};
+ Window win_out{};
+ bool window_changed = false;
+
+ // In case both input and output have to be reinterpreted as 3D tensors,
+ // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false.
+ if (reinterpret_input_as_3d == reinterpret_output_as_3d)
+ {
+ reinterpret_input_as_3d = false;
+ reinterpret_output_as_3d = false;
+ }
+
+ // Output tensor auto inizialitation if not yet initialized
+ auto_init_if_empty(*output,
+ input0->clone()
+ ->set_tensor_shape(compute_mm_shape(*input0, *input1, false, gemm_info))
+ .set_data_type(DataType::S32));
+
+ TensorInfo tmp_info(*output);
+
+ if (reinterpret_output_as_3d)
+ {
+ // Since the output tensor has to be reinterpreted as 3D and the execute window is based on a 2D
+ // GEMM,
+ // the window needs to be constructed on the 2D collapsed version of the tensor
+ TensorShape tmp_shape(output->tensor_shape());
+ tmp_shape.collapse(2U, 1U);
+ tmp_info.set_tensor_shape(tmp_shape);
+ }
+
+ // Special case for 1xN, 2xN, 3xN and 4xN input0 tensor. num_elems_processed_per_iteration_x
+ // Note: if the dot product instruction is available, the 8x2 tile has to be used
+ num_elems_processed_per_iteration_x = 4;
+ num_elems_processed_per_iteration_y = std::min(static_cast<int>(output->dimension(1)), 4);
+
+ // Note: bottom paddings are calculated manually as the output can be reinterpreted as 3D tensor
+ // The only way to set properly the paddings, it is to set those explicitly through the
+ // AccessWindowStatic
+ const int m = reinterpret_input_as_3d ? input0->tensor_shape()[1] * input0->tensor_shape()[2]
+ : input0->tensor_shape()[1];
+ const int bottom_pad =
+ (num_elems_processed_per_iteration_y - (m % num_elems_processed_per_iteration_y)) %
+ num_elems_processed_per_iteration_y;
+
+ // Configure window
+ win = calculate_max_window(
+ tmp_info, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
+ win_out = calculate_max_window(
+ *output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
+
+ AccessWindowStatic input0_access(input0, 0, 0, input0->dimension(0),
+ input0->dimension(1) + bottom_pad);
+ AccessWindowStatic input1_access(
+ input1, 0, 0, ceil_to_multiple(input1->dimension(0), num_elems_processed_per_iteration_x),
+ input1->dimension(1));
+ AccessWindowStatic output_access(
+ output, 0, 0, ceil_to_multiple(output->dimension(0), num_elems_processed_per_iteration_x),
+ output->dimension(1) + bottom_pad);
+
+ window_changed =
+ update_window_and_padding(win, input0_access,
+ input1_access) || // window used by the execute_window_loop
+ update_window_and_padding(
+ win_out,
+ output_access); // window used to update the padding requirements of output tensor
+
+ Coordinates coord;
+ coord.set_num_dimensions(output->num_dimensions());
+ output_access.set_valid_region(win_out, ValidRegion(coord, output->tensor_shape()));
+
+ // Collapse along the Z direction
+ // This collapse needs to be here in order to tune the Z dimension of LWS
+ Window collapsed = win;
+ const unsigned int dimension_to_collapse =
+ std::min(static_cast<unsigned int>(output->num_dimensions()), 2u);
+ collapsed = win.collapse(win, dimension_to_collapse);
+
+ Status err = (window_changed)
+ ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+ : Status{};
+ return std::make_pair(err, collapsed);
+}
+} // namespace
+
+CLGEMMLowpMatrixMultiplyKernelEx::CLGEMMLowpMatrixMultiplyKernelEx()
+ : _input0(nullptr), _input1(nullptr), _output(nullptr), _slide_matrix_b(true),
+ _reinterpret_input_as_3d(false), _reinterpret_output_as_3d(false)
+{
+}
+
+void CLGEMMLowpMatrixMultiplyKernelEx::configure(const ICLTensor *input0, const ICLTensor *input1,
+ ICLTensor *output,
+ const GEMMReshapeInfo &gemm_info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
+
+ ARM_COMPUTE_ERROR_THROW_ON(
+ validate_arguments(input0->info(), input1->info(), output->info(), gemm_info));
+
+ _input0 = input0;
+ _input1 = input1;
+ _output = output;
+ _reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
+ _reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d() != 0);
+
+ // In case both input and output have to be reinterpreted as 3D tensors,
+ // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false.
+ if (_reinterpret_input_as_3d == _reinterpret_output_as_3d)
+ {
+ _reinterpret_input_as_3d = false;
+ _reinterpret_output_as_3d = false;
+ }
+
+ // Check if we need to slide the matrix B
+ const unsigned int num_dimensions_input0 = _reinterpret_input_as_3d
+ ? _input0->info()->num_dimensions() - 1
+ : _input0->info()->num_dimensions();
+ _slide_matrix_b = (_input1->info()->num_dimensions() >= num_dimensions_input0);
+
+ ElementsProcessed num_elements_processed{};
+
+ // Configure kernel window
+ auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(),
+ gemm_info, num_elements_processed);
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ ICLKernel::configure_internal(win_config.second);
+
+ // Create build options
+ std::string kernel_name(" ");
+ CLBuildOptions build_opts;
+ build_opts.add_option_if(_reinterpret_input_as_3d, "-DREINTERPRET_INPUT_AS_3D");
+ build_opts.add_option_if(_reinterpret_output_as_3d, "-DREINTERPRET_OUTPUT_AS_3D");
+ build_opts.add_option_if(_reinterpret_input_as_3d || _reinterpret_output_as_3d,
+ "-DHEIGHT_GEMM3D=" +
+ support::cpp11::to_string(output->info()->dimension(1)));
+ build_opts.add_option_if(_reinterpret_input_as_3d || _reinterpret_output_as_3d,
+ "-DDEPTH_GEMM3D=" +
+ support::cpp11::to_string(output->info()->dimension(2)));
+ build_opts.add_option_if(!_slide_matrix_b,
+ "-DMATRIX_B_DEPTH=" +
+ support::cpp11::to_string(input1->info()->dimension(2)));
+ build_opts.add_option("-DCOLS_A=" + support::cpp11::to_string(input0->info()->dimension(0)));
+ build_opts.add_option("-DNUM_ELEMS_PROCESSED_PER_THREAD_X=" +
+ support::cpp11::to_string(num_elements_processed.x()));
+ build_opts.add_option("-DNUM_ELEMS_PROCESSED_PER_THREAD_Y=" +
+ support::cpp11::to_string(num_elements_processed.y()));
+
+ kernel_name = "gemmlowp_mm_midgard_ex";
+
+ // Create kernel
+ _kernel = static_cast<cl::Kernel>(
+ CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options()));
+
+ // Set config_id for enabling LWS tuning
+ _config_id = kernel_name;
+ _config_id += "_";
+ _config_id += (_reinterpret_input_as_3d ? "3di_" : "");
+ _config_id += (_reinterpret_output_as_3d ? "3do_" : "");
+ _config_id += lower_string(string_from_data_type(input0->info()->data_type()));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(output->info()->dimension(1));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(output->info()->dimension(0));
+}
+
+Status CLGEMMLowpMatrixMultiplyKernelEx::validate(const ITensorInfo *input0,
+ const ITensorInfo *input1,
+ const ITensorInfo *output,
+ const GEMMReshapeInfo &gemm_info)
+{
+ ElementsProcessed num_elements_processed{};
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output, gemm_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_and_configure_window(input0->clone().get(), input1->clone().get(),
+ output->clone().get(), gemm_info, num_elements_processed)
+ .first);
+
+ return Status{};
+}
+
+void CLGEMMLowpMatrixMultiplyKernelEx::run(const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ if (_input1->info()->num_dimensions() < 3)
+ {
+ // The stride_z for matrix B must be zero if we do not slice
+ ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
+ }
+
+ Window slice = window.first_slice_window_3D();
+ Window slice_matrix_b = slice;
+
+ slice_matrix_b.set(Window::DimX, Window::Dimension(0, 1, 1));
+ slice_matrix_b.set(Window::DimY, Window::Dimension(0, 1, 1));
+
+ if (_reinterpret_input_as_3d)
+ {
+ // Pass bottom paddings to the kernel if the input has to be reinterpreted as 3D tensor
+ const unsigned int idx0 = 3 * num_arguments_per_2D_tensor() + 3;
+ const unsigned int total_cross_plane_pad =
+ _input0->info()->padding().top + _input0->info()->padding().bottom;
+ _kernel.setArg<cl_uint>(idx0, static_cast<unsigned int>(total_cross_plane_pad));
+ }
+
+ if (_reinterpret_output_as_3d)
+ {
+ // Pass bottom paddings to the kernel if the output has to be reinterpreted as 3D tensor
+ const unsigned int idx0 =
+ 3 * num_arguments_per_2D_tensor() + 3 + (_reinterpret_input_as_3d ? 1 : 0);
+ const unsigned int total_cross_plane_pad =
+ _output->info()->padding().top + _output->info()->padding().bottom;
+ _kernel.setArg<cl_uint>(idx0, static_cast<unsigned int>(total_cross_plane_pad));
+ }
+
+ do
+ {
+ Window slice_b = slice;
+ // Don't slice matrix B along the z dimension if matrix B has just 2 dimensions and matrix A
+ // more than 2
+ // This scenario can happen when the matrix multiplication is used to perform a convolution
+ // operation
+ if (!_slide_matrix_b)
+ {
+ slice_b = slice_matrix_b;
+ }
+
+ unsigned int idx = 0;
+ add_2D_tensor_argument(idx, _input0, slice);
+ add_2D_tensor_argument(idx, _input1, slice_b);
+ add_2D_tensor_argument(idx, _output, slice);
+ _kernel.setArg<cl_uint>(idx++,
+ static_cast<unsigned int>(_input0->info()->strides_in_bytes()[2]));
+ _kernel.setArg<cl_uint>(idx++,
+ static_cast<unsigned int>(_input1->info()->strides_in_bytes()[2]));
+ _kernel.setArg<cl_uint>(idx++,
+ static_cast<unsigned int>(_output->info()->strides_in_bytes()[2]));
+ enqueue(queue, *this, slice, lws_hint());
+ } while (window.slide_window_slice_3D(slice));
+}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp
index 718f615f9..3a25987d0 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLGatherExKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp
index 31e98c9a8..7fbdcdaa7 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLHashtableLookupKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp
index 5db414f62..b45f6bb24 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp
new file mode 100644
index 000000000..d305896ea
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/CL/CLValidate.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+using namespace arm_compute;
+
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *scale_factor,
+ const ITensorInfo *output)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scale_factor, 1, DataType::F16,
+ DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->tensor_shape().total_size() == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->num_dimensions() > 1);
+ ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->dimension(0) != input->dimension(1));
+ ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+
+ // Checks performed when output is configured
+ if ((output->total_size() != 0))
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+ }
+
+ return Status{};
+}
+
+std::tuple<Status, Window> validate_and_configure_window(const ITensorInfo *input,
+ ITensorInfo *output)
+{
+ // Configure kernel window
+ Window win = calculate_max_window(*input, Steps());
+
+ // Output tensor auto initialization if not yet initialized
+ auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::F32);
+
+ // CLMultiplyScaleFactorKernel doesn't need padding so update_window_and_padding() can be
+ // skipped
+ Coordinates coord;
+ coord.set_num_dimensions(output->num_dimensions());
+ output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
+
+ return std::make_tuple(Status{}, win);
+}
+} // namespace
+
+CLMultiplyScaleFactorKernel::CLMultiplyScaleFactorKernel()
+ : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f)
+{
+}
+
+void CLMultiplyScaleFactorKernel::configure(const ICLTensor *input, const ICLTensor *scale_factor,
+ ICLTensor *output, float multiplier)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_ERROR_THROW_ON(
+ validate_arguments(input->info(), scale_factor->info(), output->info()));
+
+ _input = input;
+ _scale_factor = scale_factor;
+ _output = output;
+ _multiplier = multiplier;
+
+ const int vec_size_x = 16 / output->info()->element_size();
+ const int output_width_x = output->info()->tensor_shape().x();
+ const bool multi_access_x = (output_width_x / vec_size_x > 0);
+
+ // Create and update the window (if needed)
+ Window win = calculate_max_window(*output->info());
+ if (multi_access_x)
+ {
+ win.set(Window::DimX,
+ Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x),
+ vec_size_x));
+ }
+ ICLKernel::configure_internal(win);
+
+ // Create kernel
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type()));
+ build_opts.add_option_if(
+ multi_access_x, "-DLAST_ACCESSED_X=" +
+ support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
+
+ _kernel = static_cast<cl::Kernel>(
+ CLKernelLibraryEx::get().create_kernel("multiply_scale_factor", build_opts.options()));
+}
+
+Status CLMultiplyScaleFactorKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *scale_factor,
+ const ITensorInfo *output)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, scale_factor, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
+ return Status{};
+}
+
+void CLMultiplyScaleFactorKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
+ Window slice = window_collapsed.first_slice_window_2D();
+
+ // Set scale_factor window
+ Window win_scale = calculate_max_window(*_scale_factor->info(), Steps());
+
+ do
+ {
+ unsigned int idx = 0;
+ add_2D_tensor_argument(idx, _input, slice);
+ add_1D_tensor_argument(idx, _scale_factor, win_scale);
+ add_2D_tensor_argument(idx, _output, slice);
+ _kernel.setArg<float>(idx++, _multiplier);
+ enqueue(queue, *this, slice, lws_hint());
+ } while (window_collapsed.slide_window_slice_2D(slice));
+}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp
index ecfe05a51..74f7b4158 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLNegKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp
index e7d587029..8910a7b80 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLPReLUKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
@@ -72,18 +96,18 @@ void CLPReLUKernel::configure(const ICLTensor *input, const ICLTensor *alpha, IC
if (is_data_type_quantized_asymmetric(input->info()->data_type()))
{
- build_opts.emplace("-DOFF_IN=" +
- support::cpp11::to_string(input->info()->quantization_info().offset));
- build_opts.emplace("-DOFF_ALPHA=" +
- support::cpp11::to_string(alpha->info()->quantization_info().offset));
- build_opts.emplace("-DOFF_OUT=" +
- support::cpp11::to_string(output->info()->quantization_info().offset));
- build_opts.emplace("-DSCALE_IN=" +
- support::cpp11::to_string(input->info()->quantization_info().scale));
- build_opts.emplace("-DSCALE_ALPHA=" +
- support::cpp11::to_string(alpha->info()->quantization_info().scale));
- build_opts.emplace("-DSCALE_OUT=" +
- support::cpp11::to_string(output->info()->quantization_info().scale));
+ build_opts.emplace("-DOFF_IN=" + support::cpp11::to_string(
+ input->info()->quantization_info().uniform().offset));
+ build_opts.emplace("-DOFF_ALPHA=" + support::cpp11::to_string(
+ alpha->info()->quantization_info().uniform().offset));
+ build_opts.emplace("-DOFF_OUT=" + support::cpp11::to_string(
+ output->info()->quantization_info().uniform().offset));
+ build_opts.emplace("-DSCALE_IN=" + support::cpp11::to_string(
+ input->info()->quantization_info().uniform().scale));
+ build_opts.emplace("-DSCALE_ALPHA=" + support::cpp11::to_string(
+ alpha->info()->quantization_info().uniform().scale));
+ build_opts.emplace("-DSCALE_OUT=" + support::cpp11::to_string(
+ output->info()->quantization_info().uniform().scale));
kernel_name += "_qasymm8";
}
_kernel =
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp
new file mode 100644
index 000000000..2d551f654
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/CL/CLValidate.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+namespace arm_compute
+{
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *scale_factor,
+ const ITensorInfo *output)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, scale_factor);
+ ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->tensor_shape().total_size() == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->num_dimensions() > 1);
+ ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->dimension(0) != input->dimension(1));
+
+ // Output must always be initialized
+ ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S8);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+
+ return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
+{
+ // Configure kernel window
+ Window win = calculate_max_window(*input, Steps());
+
+ const int vec_size_x = 16 / input->element_size();
+ const int input_width_x = input->tensor_shape().x();
+ const bool multi_access_x = (input_width_x / vec_size_x > 0);
+
+ if (multi_access_x)
+ {
+ win.set(Window::DimX,
+ Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x),
+ vec_size_x));
+ }
+
+ Coordinates coord;
+ coord.set_num_dimensions(output->num_dimensions());
+ output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
+
+ return std::make_pair(Status{}, win);
+}
+} // namespace
+
+CLQuantizationSymmetricKernel::CLQuantizationSymmetricKernel()
+ : _input(nullptr), _scale_factor(nullptr), _output(nullptr)
+{
+}
+
+void CLQuantizationSymmetricKernel::configure(const ICLTensor *input, const ICLTensor *scale_factor,
+ ICLTensor *output)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, scale_factor, output);
+ ARM_COMPUTE_ERROR_THROW_ON(
+ validate_arguments(input->info(), scale_factor->info(), output->info()));
+
+ _input = input;
+ _scale_factor = scale_factor;
+ _output = output;
+
+ const int vec_size_x = 16 / input->info()->element_size();
+ const int input_width_x = input->info()->tensor_shape().x();
+ const bool multi_access_x = (input_width_x / vec_size_x > 0);
+
+ // Configure kernel window
+ auto win_config = validate_and_configure_window(input->info(), output->info());
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ ICLKernel::configure_internal(win_config.second);
+
+ // Create kernel
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
+ build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
+ build_opts.add_option("-DDATA_TYPE_OUT=" +
+ get_cl_type_from_data_type(output->info()->data_type()));
+ build_opts.add_option_if(
+ multi_access_x, "-DLAST_ACCESSED_X=" +
+ support::cpp11::to_string(std::max<int>(input_width_x - vec_size_x, 0)));
+
+ _kernel = static_cast<cl::Kernel>(
+ CLKernelLibraryEx::get().create_kernel("quantization_symm8", build_opts.options()));
+}
+
+Status CLQuantizationSymmetricKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *scale_factor,
+ const ITensorInfo *output)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, scale_factor, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_and_configure_window(input->clone().get(), output->clone().get()).first);
+
+ return Status{};
+}
+
+void CLQuantizationSymmetricKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ // Support only 2D
+ Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
+ Window slice = window_collapsed.first_slice_window_2D();
+
+ do
+ {
+ Window scale_slice = slice.shift_dimensions(1);
+
+ unsigned int idx = 0;
+ add_2D_tensor_argument(idx, _input, slice);
+ add_1D_tensor_argument(idx, _scale_factor, scale_slice);
+ add_2D_tensor_argument(idx, _output, slice);
+ enqueue(queue, *this, slice, lws_hint());
+ } while (window_collapsed.slide_window_slice_2D(slice));
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp
index 24e89db28..a98318323 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp
new file mode 100644
index 000000000..ff1904abd
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
+#include <climits>
+
+using namespace arm_compute;
+using namespace arm_compute::misc::shape_calculator;
+
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2);
+
+ if (output->tensor_shape().total_size() > 0)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+
+ TensorShape output_shape = TensorShape{input->dimension(1)};
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
+ }
+
+ return Status{};
+}
+
+std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
+{
+ TensorShape output_shape = TensorShape{input->dimension(1)};
+
+ // Output auto initialization if not yet initialized
+ auto_init_if_empty(*output, output_shape, 1, input->data_type());
+
+ const unsigned int num_elems_processed_per_iteration = 1;
+
+ // Configure kernel window
+ Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
+ AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
+ AccessWindowStatic output_access(output, 0, 0, output->dimension(0), 1);
+
+ bool window_changed = update_window_and_padding(win, input_access, output_access);
+
+ output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
+
+ Status err = (window_changed)
+ ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+ : Status{};
+ return std::make_tuple(err, win);
+}
+} // namespace
+
+CLScaleFactorSymm8Kernel::CLScaleFactorSymm8Kernel() : _input(nullptr), _output(nullptr) {}
+
+void CLScaleFactorSymm8Kernel::configure(const ICLTensor *input, ICLTensor *output)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
+
+ _input = input;
+ _output = output;
+
+ std::set<std::string> build_opts;
+ build_opts.emplace("-DWIDTH=" + support::cpp11::to_string(input->info()->dimension(0)));
+
+ // Create kernel
+ _kernel = static_cast<cl::Kernel>(
+ CLKernelLibraryEx::get().create_kernel("scale_factor_symm8", build_opts));
+
+ auto win_config = validate_and_configure_window(input->info(), output->info());
+
+ ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
+
+ ICLKernel::configure_internal(std::get<1>(win_config));
+}
+
+Status CLScaleFactorSymm8Kernel::validate(const ITensorInfo *input, const ITensorInfo *output)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
+
+ return Status{};
+}
+
+void CLScaleFactorSymm8Kernel::run(const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+
+ Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
+ Window slice = window_collapsed.first_slice_window_2D();
+ slice.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ do
+ {
+ Window output_slice = slice.shift_dimensions(1);
+
+ unsigned int idx = 0;
+ // Set inputs
+ add_2D_tensor_argument(idx, _input, slice);
+ add_1D_tensor_argument(idx, _output, output_slice);
+ enqueue(queue, *this, slice, lws_hint());
+ } while (window_collapsed.slide_window_slice_2D(slice));
+}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp
index f7836b6cd..8b9b57fd8 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
@@ -147,8 +171,8 @@ void CLSpaceToBatchNDKernel::configure(const ICLTensor *input, const ICLTensor *
build_opts.emplace("-DBATCH_IN=" + support::cpp11::to_string(input->info()->dimension(3)));
if (input->info()->data_type() == DataType::QASYMM8)
{
- build_opts.emplace("-DZERO_VALUE=" +
- support::cpp11::to_string(input->info()->quantization_info().offset));
+ build_opts.emplace("-DZERO_VALUE=" + support::cpp11::to_string(
+ input->info()->quantization_info().uniform().offset));
}
else
{
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp
index b085192a2..64fc0384e 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp
index 4f2b388c9..151d45e8d 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLTopKV2Kernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp
index 6cc8d9d13..61999cbd4 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017-2019 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CPP/kernels/CPPOneHotKernelEx.cpp b/compute/ARMComputeEx/src/core/CPP/kernels/CPPOneHotKernelEx.cpp
new file mode 100644
index 000000000..d6c49b2b4
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CPP/kernels/CPPOneHotKernelEx.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/CPP/kernels/CPPOneHotKernelEx.h"
+#include "arm_compute/core/Coordinates.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+#include "arm_compute/core/utils/misc/Traits.h"
+
+namespace arm_compute
+{
+CPPOneHotKernelEx::CPPOneHotKernelEx()
+ : _indices(nullptr), _output(nullptr), _depth(0), _on_value(0), _off_value(0), _axis(-1)
+{
+}
+
+void CPPOneHotKernelEx::configure(const ITensor *indices, ITensor *output, const int depth,
+ const float on_value, const float off_value, const int axis)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(indices, output);
+ ARM_COMPUTE_ERROR_THROW_ON(validate(indices, depth, on_value, off_value, axis));
+
+ _indices = indices;
+ _output = output;
+ _depth = depth;
+ _on_value = on_value;
+ _off_value = off_value;
+ _axis = axis;
+
+ ICPPKernel::configure(Window()); // Default 1 iteration window
+}
+
+Status CPPOneHotKernelEx::validate(const ITensor *indices, const int depth, const float on_value,
+ const float off_value, const int axis)
+{
+ ARM_COMPUTE_UNUSED(on_value, off_value);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(indices, DataType::S32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(indices->info()->num_dimensions() != 1,
+ "Only 1D indices are supported.");
+ ARM_COMPUTE_RETURN_ERROR_ON(depth <= 0);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis != -1, "Only axis = -1 is supported.");
+ return Status{};
+}
+
+bool CPPOneHotKernelEx::is_parallelisable() const { return false; }
+
+void CPPOneHotKernelEx::run(const Window &window, const ThreadInfo &info)
+{
+ ARM_COMPUTE_UNUSED(info);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IKernel::window(), window);
+
+ const auto num_indices = _indices->info()->dimension(0);
+ for (size_t i = 0; i < num_indices; ++i)
+ {
+ const auto index = *reinterpret_cast<int32_t *>(_indices->ptr_to_element(Coordinates{i}));
+ for (int d = 0; d < _depth; ++d)
+ *reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(d, i))) =
+ (d == index) ? _on_value : _off_value;
+ }
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp b/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp
index 8ac667ceb..648afb304 100644
--- a/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h"
#include "arm_compute/core/Error.h"
@@ -81,7 +97,7 @@ void CPPUpsampleKernelEx::run(const Window &window, const ThreadInfo &info)
// The fill value is normally 0, but for QASYMM8 the '0' corresponds to the offset
const uint8_t fill_value =
_output->info()->data_type() == DataType::QASYMM8
- ? utility::clamp<uint8_t>(_output->info()->quantization_info().offset)
+ ? utility::clamp<uint8_t>(_output->info()->quantization_info().uniform().offset)
: 0;
// Filling a value different than 0 works only for QASYMM8 datatype since we are filling 1byte
// values in a buffer of uint8_ts
diff --git a/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp b/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp
index 4508f5800..254c33ea9 100644
--- a/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -221,8 +236,9 @@ void elementwise_op_quantized(
const auto window_end_x = static_cast<int>(window.x().end());
const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
- const float output_scale = out->info()->quantization_info().scale;
- const int output_offset = out->info()->quantization_info().offset;
+ UniformQuantizationInfo qinfo = out->info()->quantization_info().uniform();
+ const float output_scale = qinfo.scale;
+ const int output_offset = qinfo.offset;
// Output quantization info (add 0.5 to round toward the nearest integer - 0.5 rounds away from
// zero)
@@ -238,8 +254,10 @@ void elementwise_op_quantized(
const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1;
const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1;
- const QuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info();
- const QuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info();
+ const UniformQuantizationInfo broadcast_qinfo =
+ broadcast_tensor->info()->quantization_info().uniform();
+ const UniformQuantizationInfo non_broadcast_qinfo =
+ non_broadcast_tensor->info()->quantization_info().uniform();
const int32x4_t voffset_non_broadcast = vdupq_n_s32(non_broadcast_qinfo.offset);
const float32x4_t vscale_non_broadcast = vdupq_n_f32(non_broadcast_qinfo.scale);
@@ -269,10 +287,8 @@ void elementwise_op_quantized(
for (; x < window_end_x; ++x)
{
const float afs =
- scvt_f32_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo.scale,
- non_broadcast_qinfo.offset);
- const float bfs =
- scvt_f32_qasymm8(broadcast_value, broadcast_qinfo.scale, broadcast_qinfo.offset);
+ dequantize_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo);
+ const float bfs = dequantize_qasymm8(broadcast_value, broadcast_qinfo);
*(output_ptr + x) =
(*scalar_func)(!is_broadcast_input_2 ? bfs : afs, !is_broadcast_input_2 ? afs : bfs,
out->info()->quantization_info());
@@ -283,12 +299,14 @@ void elementwise_op_quantized(
else
{
// Input1 quantization info
- const int32x4_t voffset1 = vdupq_n_s32(in1->info()->quantization_info().offset);
- const float32x4_t vscale1 = vdupq_n_f32(in1->info()->quantization_info().scale);
+ UniformQuantizationInfo qinfo = in1->info()->quantization_info().uniform();
+ const int32x4_t voffset1 = vdupq_n_s32(qinfo.offset);
+ const float32x4_t vscale1 = vdupq_n_f32(qinfo.scale);
// Input2 quantization info
- const int32x4_t voffset2 = vdupq_n_s32(in2->info()->quantization_info().offset);
- const float32x4_t vscale2 = vdupq_n_f32(in2->info()->quantization_info().scale);
+ qinfo = in2->info()->quantization_info().uniform();
+ const int32x4_t voffset2 = vdupq_n_s32(qinfo.offset);
+ const float32x4_t vscale2 = vdupq_n_f32(qinfo.scale);
// Clear X Dimension on execution window as we handle manually
input1_win.set(Window::DimX, Window::Dimension(0, 1, 1));
@@ -301,26 +319,24 @@ void elementwise_op_quantized(
Iterator input2(in2, input2_win);
Iterator output(out, win);
- execute_window_loop(
- win,
- [&](const Coordinates &) {
- const auto input1_ptr = reinterpret_cast<const uint8_t *>(input1.ptr());
- const auto input2_ptr = reinterpret_cast<const uint8_t *>(input2.ptr());
- const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
+ execute_window_loop(win,
+ [&](const Coordinates &) {
+ const auto input1_ptr = reinterpret_cast<const uint8_t *>(input1.ptr());
+ const auto input2_ptr = reinterpret_cast<const uint8_t *>(input2.ptr());
+ const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
- int x =
- (*neon_func)(window_start_x, window_end_x, window_step_x, input1_ptr, input2_ptr,
- output_ptr, voffset1, voffset2, vscale1, vscale2, voffseto, invvscaleo);
- for (; x < window_end_x; ++x)
- {
- const float afs =
- scvt_f32_qasymm8(*(input1_ptr + x), input1_qinfo.scale, input1_qinfo.offset);
- const float bfs =
- scvt_f32_qasymm8(*(input2_ptr + x), input2_qinfo.scale, input2_qinfo.offset);
- *(output_ptr + x) = (*scalar_func)(afs, bfs, out->info()->quantization_info());
- }
- },
- input1, input2, output);
+ int x = (*neon_func)(window_start_x, window_end_x, window_step_x,
+ input1_ptr, input2_ptr, output_ptr, voffset1,
+ voffset2, vscale1, vscale2, voffseto, invvscaleo);
+ for (; x < window_end_x; ++x)
+ {
+ const float afs = dequantize_qasymm8(*(input1_ptr + x), input1_qinfo);
+ const float bfs = dequantize_qasymm8(*(input2_ptr + x), input2_qinfo);
+ *(output_ptr + x) =
+ (*scalar_func)(afs, bfs, out->info()->quantization_info());
+ }
+ },
+ input1, input2, output);
}
}
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEActivationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEActivationLayerKernelEx.cpp
new file mode 100644
index 000000000..648705ba9
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEActivationLayerKernelEx.cpp
@@ -0,0 +1,730 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h"
+
+#include "arm_compute/core/CPP/Validate.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/NEAsymm.h"
+#include "arm_compute/core/NEON/NEFixedPoint.h"
+#include "arm_compute/core/NEON/NEMath.h"
+#include "arm_compute/core/NEON/NESymm.h"
+#include "arm_compute/core/NEON/wrapper/wrapper.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include <arm_neon.h>
+#include <array>
+#include <cmath>
+#include <map>
+#include <set>
+
+using namespace arm_compute;
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
+ const ActivationLayerInfo &activation_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
+ input, 1, DataType::U8, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32);
+
+ static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations = {
+ ActivationLayerInfo::ActivationFunction::RELU,
+ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+ ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
+ ActivationLayerInfo::ActivationFunction::LOGISTIC,
+ ActivationLayerInfo::ActivationFunction::TANH};
+ static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations = {
+ ActivationLayerInfo::ActivationFunction::LOGISTIC,
+ ActivationLayerInfo::ActivationFunction::TANH};
+ const DataType data_type = input->data_type();
+ const QuantizationInfo &oq_info =
+ (output != nullptr) ? output->quantization_info() : input->quantization_info();
+ const ActivationLayerInfo::ActivationFunction f_act = activation_info.activation();
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(
+ is_data_type_quantized_asymmetric(data_type) &&
+ (qasymm8_supported_activations.count(f_act) == 0),
+ "For QASYMM8 only tanh, logistic, relu and lower/upper bounded relu are supported");
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) &&
+ (qsymm16_supported_activations.count(f_act) == 0),
+ "For QSYMM16 only tanh and logistic are supported");
+ ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) &&
+ (f_act == ActivationLayerInfo::ActivationFunction::TANH) &&
+ (oq_info != QuantizationInfo(1.f / 128.f, 128)));
+ ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) &&
+ (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) &&
+ (oq_info != QuantizationInfo(1.f / 256.f, 0)));
+
+ ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) &&
+ (f_act == ActivationLayerInfo::ActivationFunction::TANH) &&
+ (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
+ ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) &&
+ (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) &&
+ (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
+
+ // Checks performed when output is configured
+ if ((output != nullptr) && (output->total_size() != 0))
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ }
+
+ return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
+{
+ // Configure kernel window
+ Window win = calculate_max_window(*input, Steps());
+
+ if (output != nullptr)
+ {
+ // Output auto inizialitation if not yet initialized
+ auto_init_if_empty(*output, *input->clone());
+
+ // NEActivationLayerKernelEx doesn't need padding so update_window_and_padding() can be skipped
+ Coordinates coord;
+ coord.set_num_dimensions(output->num_dimensions());
+ output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
+ }
+
+ return std::make_pair(Status{}, win);
+}
+
+inline uint32x4_t vreinterpret_unsigend_int(const float32x4_t &vec)
+{
+ return vreinterpretq_u32_f32(vec);
+}
+
+inline float32x4_t vreinterpret_floating_point(const uint32x4_t &vec)
+{
+ return vreinterpretq_f32_u32(vec);
+}
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+inline uint16x8_t vreinterpret_unsigend_int(const float16x8_t &vec)
+{
+ return vreinterpretq_u16_f16(vec);
+}
+inline float16x8_t vreinterpret_floating_point(const uint16x8_t &vec)
+{
+ return vreinterpretq_f16_u16(vec);
+}
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
+} // namespace
+
+NEActivationLayerKernelEx::NEActivationLayerKernelEx()
+ : _input(nullptr), _output(nullptr), _func(nullptr), _act_info()
+{
+}
+
+void NEActivationLayerKernelEx::configure(ITensor *input, ITensor *output,
+ ActivationLayerInfo activation_info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input);
+
+ _input = input;
+ _act_info = activation_info;
+ _output = input;
+
+ // Out-of-place calculation
+ if (output != nullptr)
+ {
+ _output = output;
+ }
+
+ // Disabled activation, thus no operation needed
+ if (!activation_info.enabled())
+ {
+ _func = nullptr;
+ }
+
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(
+ input->info(), (output != nullptr) ? output->info() : nullptr, activation_info));
+
+ // Activation functions : FP32
+ static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f32 = {
+ {ActivationFunction::ABS,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::ABS, float>},
+ {ActivationFunction::LINEAR,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LINEAR, float>},
+ {ActivationFunction::LOGISTIC,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, float>},
+ {ActivationFunction::RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::RELU, float>},
+ {ActivationFunction::BOUNDED_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::BOUNDED_RELU, float>},
+ {ActivationFunction::LU_BOUNDED_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LU_BOUNDED_RELU, float>},
+ {ActivationFunction::LEAKY_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LEAKY_RELU, float>},
+ {ActivationFunction::SOFT_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::SOFT_RELU, float>},
+ {ActivationFunction::ELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::ELU, float>},
+ {ActivationFunction::SQRT,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::SQRT, float>},
+ {ActivationFunction::SQUARE,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::SQUARE, float>},
+ {ActivationFunction::TANH,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, float>},
+ {ActivationFunction::IDENTITY,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::IDENTITY, float>},
+ };
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+ // Activation functions : FP16
+ static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f16 = {
+ {ActivationFunction::ABS,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::ABS, float16_t>},
+ {ActivationFunction::LINEAR,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LINEAR, float16_t>},
+ {ActivationFunction::LOGISTIC,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, float16_t>},
+ {ActivationFunction::RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::RELU, float16_t>},
+ {ActivationFunction::BOUNDED_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::BOUNDED_RELU, float16_t>},
+ {ActivationFunction::LU_BOUNDED_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LU_BOUNDED_RELU, float16_t>},
+ {ActivationFunction::LEAKY_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LEAKY_RELU, float16_t>},
+ {ActivationFunction::SOFT_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::SOFT_RELU, float16_t>},
+ {ActivationFunction::ELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::ELU, float16_t>},
+ {ActivationFunction::SQRT,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::SQRT, float16_t>},
+ {ActivationFunction::SQUARE,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::SQUARE, float16_t>},
+ {ActivationFunction::TANH,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, float16_t>},
+ {ActivationFunction::IDENTITY,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::IDENTITY, float16_t>},
+ };
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
+
+ // Activation functions : QASYMM8
+ static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qasymm8 = {
+ {ActivationFunction::LOGISTIC,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, qasymm8_t>},
+ {ActivationFunction::BOUNDED_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::BOUNDED_RELU, qasymm8_t>},
+ {ActivationFunction::LU_BOUNDED_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LU_BOUNDED_RELU, qasymm8_t>},
+ {ActivationFunction::RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::RELU, qasymm8_t>},
+ {ActivationFunction::TANH,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, qasymm8_t>},
+ {ActivationFunction::IDENTITY,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::IDENTITY, qasymm8_t>},
+ };
+
+ // Activation functions : QSYMM16
+ static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qsymm16 = {
+ {ActivationFunction::LOGISTIC,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, qsymm16_t>},
+ {ActivationFunction::TANH,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, qsymm16_t>},
+ };
+
+ switch (input->info()->data_type())
+ {
+ case DataType::QASYMM8:
+ _func = act_map_qasymm8[activation_info.activation()];
+ break;
+ case DataType::QSYMM16:
+ _func = act_map_qsymm16[activation_info.activation()];
+ break;
+ case DataType::F32:
+ _func = act_map_f32[activation_info.activation()];
+ break;
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+ case DataType::F16:
+ _func = act_map_f16[activation_info.activation()];
+ break;
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+ default:
+ ARM_COMPUTE_ERROR("Unsupported data type.");
+ }
+
+ // Configure kernel window
+ auto win_config =
+ validate_and_configure_window(input->info(), (output != nullptr) ? output->info() : nullptr);
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ ICPPKernel::configure(win_config.second);
+}
+
+template <ActivationLayerInfo::ActivationFunction F, typename T>
+typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type
+NEActivationLayerKernelEx::activation(const Window &window)
+{
+ /** NEON vector tag type. */
+ using ExactTagType =
+ typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
+
+ const int window_step_x = 16 / sizeof(T);
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
+ const ActivationFunction act = F;
+
+ Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+ win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input(_input, win_collapsed);
+ Iterator output(_output, win_collapsed);
+
+ const auto infinity = wrapper::vdup_n(std::numeric_limits<T>::infinity(), ExactTagType{});
+ const auto epsilon = wrapper::vdup_n(static_cast<T>(1e-24), ExactTagType{});
+ const auto const_1 = wrapper::vdup_n(static_cast<T>(1.f), ExactTagType{});
+ const auto const_0 = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
+ const auto va = wrapper::vdup_n(static_cast<T>(_act_info.a()), ExactTagType{});
+ const auto vb = wrapper::vdup_n(static_cast<T>(_act_info.b()), ExactTagType{});
+ const auto a = static_cast<T>(_act_info.a());
+ const auto b = static_cast<T>(_act_info.b());
+
+ execute_window_loop(
+ win_collapsed,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<T *>(output.ptr());
+
+ wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
+
+ // Compute S elements per iteration
+ int x = window_start_x;
+
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const auto vin = wrapper::vloadq(input_ptr + x);
+ switch (act)
+ {
+ case ActivationFunction::ABS:
+ tmp = wrapper::vabs(vin);
+ break;
+ case ActivationFunction::LINEAR:
+ tmp = wrapper::vmla(vb, va, vin);
+ break;
+ case ActivationFunction::LOGISTIC:
+ // exp(-vin)
+ tmp = wrapper::vexpq(wrapper::vneg(vin));
+
+ // NaN -> INF
+ tmp = vreinterpret_floating_point(wrapper::vorr(
+ wrapper::vand(wrapper::vnot(wrapper::vceq(tmp, tmp)),
+ vreinterpret_unsigend_int(infinity)),
+ wrapper::vand(wrapper::vceq(tmp, tmp), vreinterpret_unsigend_int(tmp))));
+
+ // 1 / 1 + tmp
+ tmp = wrapper::vinv(wrapper::vadd(const_1, tmp));
+ break;
+ case ActivationFunction::RELU:
+ tmp = wrapper::vmax(const_0, vin);
+ break;
+ case ActivationFunction::BOUNDED_RELU:
+ tmp = wrapper::vmin(va, wrapper::vmax(const_0, vin));
+ break;
+ case ActivationFunction::LU_BOUNDED_RELU:
+ tmp = wrapper::vmin(va, wrapper::vmax(vb, vin));
+ break;
+ case ActivationFunction::LEAKY_RELU:
+ tmp = wrapper::vbsl(wrapper::vcgt(vin, const_0), vin, wrapper::vmul(va, vin));
+ break;
+ case ActivationFunction::SOFT_RELU:
+ tmp = wrapper::vlog(wrapper::vadd(const_1, wrapper::vexpq(vin)));
+ break;
+ case ActivationFunction::ELU:
+ tmp = wrapper::vbsl(wrapper::vcge(vin, const_0), vin,
+ wrapper::vmul(va, wrapper::vsub(wrapper::vexpq(vin), const_1)));
+ break;
+ case ActivationFunction::SQRT:
+ tmp = wrapper::vinv(wrapper::vinvsqrt(vin + epsilon));
+ break;
+ case ActivationFunction::SQUARE:
+ tmp = wrapper::vmul(vin, vin);
+ break;
+ case ActivationFunction::TANH:
+ tmp = wrapper::vmul(va, wrapper::vtanh(wrapper::vmul(vb, vin)));
+ break;
+ case ActivationFunction::IDENTITY:
+ tmp = vin;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ wrapper::vstore(output_ptr + x, tmp);
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ const T in = *(reinterpret_cast<const T *>(input_ptr + x));
+ T tmp;
+ switch (act)
+ {
+ case ActivationFunction::ABS:
+ tmp = std::abs(in);
+ break;
+ case ActivationFunction::LINEAR:
+ tmp = a * in + b;
+ break;
+ case ActivationFunction::LOGISTIC:
+ tmp = static_cast<T>(1) / (static_cast<T>(1) + std::exp(-in));
+ break;
+ case ActivationFunction::RELU:
+ tmp = std::max<T>(static_cast<T>(0), in);
+ break;
+ case ActivationFunction::BOUNDED_RELU:
+ tmp = std::min<T>(a, std::max(static_cast<T>(0), in));
+ break;
+ case ActivationFunction::LU_BOUNDED_RELU:
+ tmp = std::min<T>(a, std::max<T>(b, in));
+ break;
+ case ActivationFunction::LEAKY_RELU:
+ tmp = (in > 0) ? in : a * in;
+ break;
+ case ActivationFunction::SOFT_RELU:
+ tmp = std::log(static_cast<T>(1) + std::exp(in));
+ break;
+ case ActivationFunction::ELU:
+ tmp = (in >= 0) ? in : a * (std::exp(in) - 1);
+ break;
+ case ActivationFunction::SQRT:
+ tmp = std::sqrt(in);
+ break;
+ case ActivationFunction::SQUARE:
+ tmp = in * in;
+ break;
+ case ActivationFunction::TANH:
+ tmp = a * std::tanh(b * in);
+ break;
+ case ActivationFunction::IDENTITY:
+ tmp = in;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ *(output_ptr + x) = tmp;
+ }
+ },
+ input, output);
+}
+
+template <ActivationLayerInfo::ActivationFunction F, typename T>
+typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type
+NEActivationLayerKernelEx::activation(const Window &window)
+{
+ const int window_step_x = 16 / sizeof(T);
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
+ const ActivationFunction act = F;
+
+ Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+ win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input(_input, win_collapsed);
+ Iterator output(_output, win_collapsed);
+
+ const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
+ const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in));
+ const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in));
+ const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in);
+ const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in);
+ const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in);
+ const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0);
+ const auto vconst_1 = vdupq_n_f32(1.f);
+ const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
+ const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
+ const float a_f32 = _act_info.a();
+ const float b_f32 = _act_info.b();
+
+ // Initialise scale/offset for re-quantization
+ float s = qi_in.scale / qi_out.scale;
+ float o = -qi_in.offset * s + qi_out.offset;
+ float32x4_t vs = vdupq_n_f32(s);
+ float32x4_t vo = vdupq_n_f32(o);
+
+ execute_window_loop(
+ win_collapsed,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<T *>(output.ptr());
+
+ wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
+
+ // Compute S elements per iteration
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const auto vin = wrapper::vloadq(input_ptr + x);
+ if (act == ActivationFunction::RELU)
+ {
+ // Perform activation
+ tmp = vmaxq_u8(vconst_0, vin);
+ // Re-quantize to new output space
+ tmp = vmlaq_qasymm8(tmp, vs, vo);
+ }
+ else if (act == ActivationFunction::BOUNDED_RELU)
+ {
+ // Perform activation
+ tmp = vminq_u8(va, vmaxq_u8(vconst_0, vin));
+ // Re-quantize to new output space
+ tmp = vmlaq_qasymm8(tmp, vs, vo);
+ }
+ else if (act == ActivationFunction::LU_BOUNDED_RELU)
+ {
+ // Perform activation
+ tmp = vminq_u8(va, vmaxq_u8(vb, vin));
+ // Re-quantize to new output space
+ tmp = vmlaq_qasymm8(tmp, vs, vo);
+ }
+ else if (act == ActivationFunction::LOGISTIC)
+ {
+ // De-quantize
+ const auto vin_deq = vdequantize(vin, qi_in);
+ // Perform activation
+ const float32x4x4_t tmp_dep = {{
+ wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(
+ vin_deq.val[0])))),
+ wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(
+ vin_deq.val[1])))),
+ wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(
+ vin_deq.val[2])))),
+ wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(
+ vin_deq.val[3])))),
+ }};
+ // Re-quantize to new output space
+ tmp = vquantize(tmp_dep, qi_out);
+ }
+ else if (act == ActivationFunction::TANH)
+ {
+ // De-quantize
+ const auto vin_deq = vdequantize(vin, qi_in);
+ // Perform activation
+ const float32x4x4_t tmp_dep = {{
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[2], vb_f32))),
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[3], vb_f32))),
+ }};
+ // Re-quantize to new output space
+ tmp = vquantize(tmp_dep, qi_out);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ wrapper::vstore(output_ptr + x, tmp);
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ T in = *(reinterpret_cast<const T *>(input_ptr + x));
+ T tmp;
+ if (act == ActivationFunction::RELU)
+ {
+ tmp = std::max(const_0, in);
+ tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
+ }
+ else if (act == ActivationFunction::BOUNDED_RELU)
+ {
+ tmp = std::min(a, std::max(const_0, in));
+ tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
+ }
+ else if (act == ActivationFunction::LU_BOUNDED_RELU)
+ {
+ tmp = std::min(a, std::max(b, in));
+ tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
+ }
+ else if (act == ActivationFunction::LOGISTIC)
+ {
+ float tmp_f = dequantize_qasymm8(in, qi_in);
+ tmp_f = 1.f / (1.f + std::exp(-tmp_f));
+ tmp = quantize_qasymm8(tmp_f, qi_out);
+ }
+ else if (act == ActivationFunction::TANH)
+ {
+ float tmp_f = dequantize_qasymm8(in, qi_in);
+ tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
+ tmp = quantize_qasymm8(tmp_f, qi_out);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ *(output_ptr + x) = tmp;
+ }
+ },
+ input, output);
+}
+
+template <ActivationLayerInfo::ActivationFunction F, typename T>
+typename std::enable_if<std::is_same<T, qsymm16_t>::value, void>::type
+NEActivationLayerKernelEx::activation(const Window &window)
+{
+ const int window_step_x = 16 / sizeof(T);
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
+ const ActivationFunction act = F;
+
+ Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+ win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input(_input, win_collapsed);
+ Iterator output(_output, win_collapsed);
+
+ const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
+ const auto vconst_1 = vdupq_n_f32(1.f);
+ const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
+ const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
+ const float a_f32 = _act_info.a();
+ const float b_f32 = _act_info.b();
+
+ execute_window_loop(
+ win_collapsed,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<T *>(output.ptr());
+
+ wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
+ ARM_COMPUTE_UNUSED(tmp);
+
+ // Compute S elements per iteration
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const auto vin = wrapper::vloadq(input_ptr + x);
+ if (act == ActivationFunction::LOGISTIC)
+ {
+ // De-quantize
+ const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
+ // Perform activation
+ const float32x4x2_t tmp_dep = {{
+ wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(
+ vin_deq.val[0])))),
+ wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(
+ vin_deq.val[1])))),
+ }};
+ // Re-quantize to new output space
+ tmp = vquantize_int16(tmp_dep, qi_out.scale);
+ }
+ else if (act == ActivationFunction::TANH)
+ {
+ // De-quantize
+ const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
+ // Perform activation
+ const float32x4x2_t tmp_dep = {{
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
+ }};
+ // Re-quantize to new output space
+ tmp = vquantize_int16(tmp_dep, qi_out.scale);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ wrapper::vstore(output_ptr + x, tmp);
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ T in = *(reinterpret_cast<const T *>(input_ptr + x));
+ T tmp;
+ if (act == ActivationFunction::LOGISTIC)
+ {
+ float tmp_f = dequantize_qsymm16(in, qi_in.scale);
+ tmp_f = 1.f / (1.f + std::exp(-tmp_f));
+ tmp = quantize_qsymm16(tmp_f, qi_out);
+ }
+ else if (act == ActivationFunction::TANH)
+ {
+ float tmp_f = dequantize_qsymm16(in, qi_in.scale);
+ tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
+ tmp = quantize_qsymm16(tmp_f, qi_out);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ *(output_ptr + x) = tmp;
+ }
+ },
+ input, output);
+}
+
+Status NEActivationLayerKernelEx::validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ActivationLayerInfo &act_info)
+{
+ ARM_COMPUTE_UNUSED(act_info);
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_and_configure_window(input->clone().get(),
+ (output != nullptr) ? output->clone().get() : nullptr)
+ .first);
+
+ return Status{};
+}
+
+void NEActivationLayerKernelEx::run(const Window &window, const ThreadInfo &info)
+{
+ // Early exit on disabled activation
+ if (!_act_info.enabled())
+ {
+ return;
+ }
+
+ ARM_COMPUTE_UNUSED(info);
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON(_func == nullptr);
+
+ (this->*_func)(window);
+}
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp
index d2f42de53..32d7d6237 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h"
#include "arm_compute/core/Error.h"
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp
index 7e4fc129b..fbb9dbca9 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NECastKernel.h"
#include "arm_compute/core/AccessWindowStatic.h"
@@ -394,7 +410,8 @@ template <typename FromT> void run_cast(const ITensor *input, ITensor *output, c
case DataType::QASYMM8:
{
using to_vector = typename cast_vector<float>::type;
- const QuantizationInfo &qinfo_out = output->info()->quantization_info();
+ const UniformQuantizationInfo &qinfo_out =
+ output->info()->quantization_info().uniform();
const auto vf = vcast<to_vector, from_vector>(vin);
const auto vout = vquantize(vf, qinfo_out);
store_result<qasymm8_t>(reinterpret_cast<qasymm8_t *>(out.ptr()) + x, vout);
@@ -440,7 +457,8 @@ template <typename FromT> void run_cast(const ITensor *input, ITensor *output, c
case DataType::QASYMM8:
{
const QuantizationInfo &qinfo_out = output->info()->quantization_info();
- const auto qval = qinfo_out.quantize(static_cast<float>(val), rounding_policy);
+ const auto qval =
+ quantize_qasymm8(static_cast<float>(val), qinfo_out, rounding_policy);
*(reinterpret_cast<qasymm8_t *>(out.ptr()) + x) = qval;
break;
}
@@ -486,8 +504,8 @@ void run_cast_qasymm8(const ITensor *input, ITensor *output, const Window &windo
#else //__aarch64__
constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO;
#endif //__aarch64__
- const auto &qinfo_in = input->info()->quantization_info();
- const auto &qinfo_out = output->info()->quantization_info();
+ const auto &qinfo_in = input->info()->quantization_info().uniform();
+ const auto &qinfo_out = output->info()->quantization_info().uniform();
execute_window_loop(
win_collapsed,
@@ -547,7 +565,7 @@ void run_cast_qasymm8(const ITensor *input, ITensor *output, const Window &windo
for (; x < window_end_x; ++x)
{
qasymm8_t qval_in = *(in_ptr + x);
- const auto val = qinfo_in.dequantize(qval_in);
+ const auto val = dequantize_qasymm8(qval_in, qinfo_in);
switch (output->info()->data_type())
{
@@ -558,7 +576,7 @@ void run_cast_qasymm8(const ITensor *input, ITensor *output, const Window &windo
}
case DataType::QASYMM8:
{
- const auto qval_out = qinfo_out.quantize(val, rounding_policy);
+ const auto qval_out = quantize_qasymm8(val, qinfo_out, rounding_policy);
*(reinterpret_cast<qasymm8_t *>(out.ptr()) + x) = qval_out;
break;
}
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp
index 8a2223c26..95e269dee 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h"
#include "arm_compute/core/Helpers.h"
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp
index cebd614df..200fc4f87 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h"
#include "arm_compute/core/CPP/Validate.h"
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp
index 5401afea0..091d38c56 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h"
#include "arm_compute/core/Error.h"
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp
index ce2413dc1..4c0a5e799 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h"
#include "arm_compute/core/CPP/Validate.h"
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp
index 391337bfb..30787c0a4 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h"
#include "arm_compute/core/Error.h"
@@ -118,7 +134,7 @@ void NEHashtableLookupKernel::run(const Window &window, const ThreadInfo &info)
const size_t lookup_dim = _output->info()->num_dimensions() - 1;
const int const_0 = _output->info()->data_type() == DataType::QASYMM8
- ? _output->info()->quantization_info().offset
+ ? _output->info()->quantization_info().uniform().offset
: 0;
std::unordered_map<int32_t, size_t> key_index_map;
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp
index 1ea77fb5c..49adf1462 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h"
#include "arm_compute/core/CPP/Validate.h"
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp
index de218d489..b92130cec 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h"
#include "arm_compute/core/Error.h"
@@ -71,12 +87,6 @@ inline int32x4x4_t load_value(const int32_t *input_ptr)
wrapper::vloadq(input_ptr + 8), wrapper::vloadq(input_ptr + 12)};
}
-inline float32x4x4_t load_value(const float *input_ptr)
-{
- return {wrapper::vloadq(input_ptr), wrapper::vloadq(input_ptr + 4),
- wrapper::vloadq(input_ptr + 8), wrapper::vloadq(input_ptr + 12)};
-}
-
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
inline const float32x4x4_t load_value(const float16_t *input_ptr)
{
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp
index ad1bb9051..641641b5a 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEPReLUKernel.h"
#include "arm_compute/core/ITensor.h"
@@ -63,7 +79,8 @@ template <ConditionalOperation op>
inline uint8_t elementwise_conditional_op_quantized_scalar(const float &a, const float &b,
QuantizationInfo qinfo)
{
- return qinfo.quantize(elementwise_conditional_op_scalar<op>(a, b), RoundingPolicy::TO_NEAREST_UP);
+ return quantize_qasymm8(elementwise_conditional_op_scalar<op>(a, b), qinfo,
+ RoundingPolicy::TO_NEAREST_UP);
}
template <ConditionalOperation op, typename VectorType>
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp
index acf0092eb..6ba0f1fd4 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h"
#include "arm_compute/core/Error.h"
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp
index 59e7d9beb..3b65eac10 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h"
#include "arm_compute/core/CPP/Validate.h"
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp
index 36a2f55a9..44feb200f 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h"
#include "arm_compute/core/Helpers.h"
diff --git a/compute/ARMComputeEx/src/core/UtilsEx.cpp b/compute/ARMComputeEx/src/core/UtilsEx.cpp
index 94242b56b..863316909 100644
--- a/compute/ARMComputeEx/src/core/UtilsEx.cpp
+++ b/compute/ARMComputeEx/src/core/UtilsEx.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/UtilsEx.h"
#include "arm_compute/core/Error.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp
index ae64a6edd..2d379cf36 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLArgOperation.h"
#include "arm_compute/core/CL/kernels/CLArgOperationKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
index 7c5fe5eda..92ee69a36 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h"
#include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp
index 742fc6f59..b3118f39e 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLCast.h"
#include "arm_compute/core/CL/kernels/CLCastKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp
index c2e4ca9ff..db662505a 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLDepthToSpace.h"
#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
index 2781784ca..3d9a28a48 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLEmbeddingLookup.h"
#include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
new file mode 100644
index 000000000..f098832b0
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
@@ -0,0 +1,337 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h"
+
+#include "arm_compute/core/Size2D.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "support/ToolchainSupport.h"
+
+#include <algorithm>
+
+using namespace arm_compute;
+using namespace arm_compute::misc::shape_calculator;
+
+namespace
+{
+Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output)
+{
+ ARM_COMPUTE_UNUSED(input);
+ ARM_COMPUTE_UNUSED(weights);
+ ARM_COMPUTE_UNUSED(output);
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLGEMMLowpMatrixMultiplyCoreEx::validate(&input, &weights, nullptr, &output));
+
+ return Status{};
+}
+} // namespace
+
+void CLFullyConnectedHybridLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}
+
+Status CLFullyConnectedHybridLayerReshapeWeights::validate(const ITensorInfo *input,
+ const ITensorInfo *output)
+{
+ return CLTransposeKernel::validate(input, output);
+}
+
+CLFullyConnectedHybridLayer::CLFullyConnectedHybridLayer(
+ std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(memory_manager), _reshape_weights_kernel(), _quant_input_kernel(),
+ _mm_gemmlowp(memory_manager), _multiply_scale_kernel(), _accumulate_biases_kernel(),
+ _reshape_weights_output(), _quantized_input(), _scale_factor(), _gemmlowp_output(),
+ _are_weights_reshaped(true), _accumulate_biases(false), _is_prepared(false),
+ _original_weights(nullptr)
+{
+}
+void CLFullyConnectedHybridLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights,
+ ICLTensor *output, bool retain_internal_weights)
+{
+ ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1));
+
+ ARM_COMPUTE_UNUSED(output);
+ ARM_COMPUTE_UNUSED(retain_internal_weights);
+ // Configure gemmlowp function
+ _mm_gemmlowp.configure(input, weights, nullptr, output);
+}
+
+void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTensor *weights,
+ const ICLTensor *biases, ICLTensor *output,
+ FullyConnectedLayerInfo fc_info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+
+ // Perform validate step
+ ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedHybridLayer::validate(
+ input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+ fc_info));
+
+ _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
+ _accumulate_biases = false;
+ _is_prepared = fc_info.retain_internal_weights;
+ _original_weights = weights;
+
+ // Configure accumulate biases kernel for non quantized asymmetric types
+ if (biases != nullptr)
+ {
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
+
+ _accumulate_biases = true;
+
+ // Configure accumulate biases kernel
+ _accumulate_biases_kernel.set_target(CLScheduler::get().target());
+ _accumulate_biases_kernel.configure(output, biases);
+ }
+
+ const ICLTensor *weights_to_use = weights;
+
+ // With the Fully Connected layer we can have 4 different cases:
+ // 1) Convolution layer -> Fully Connected layer without batches
+ // 2) Fully Connected layer -> Fully Connected layer without batches
+ // 3) Convolution layer -> Fully Connected layer with batches
+ // 4) Fully Connected layer -> Fully Connected layer with batches
+
+ // Check if we have a fully connected layer with batches
+ const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
+ bool is_fc_after_conv = false;
+ if (is_batched_fc_layer)
+ {
+ is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->info()->tensor_shape().cbegin() + 3,
+ input->info()->tensor_shape().cend(),
+ output->info()->tensor_shape().cbegin() + 1));
+ }
+ else
+ {
+ is_fc_after_conv = input->info()->num_dimensions() > 1 && input->info()->dimension(1) > 1;
+ }
+ ARM_COMPUTE_ERROR_ON_MSG(is_fc_after_conv,
+ "CLFullyConnectedHybridLayer does not support after conv");
+ ARM_COMPUTE_UNUSED(is_fc_after_conv);
+
+ // Reshape weights if needed
+ if (!_are_weights_reshaped)
+ {
+ // Reshape the weights
+ _reshape_weights_output.allocator()->init(
+ weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights->info())));
+ _reshape_weights_kernel.configure(weights_to_use, &_reshape_weights_output);
+ weights_to_use = &_reshape_weights_output;
+ }
+
+ // Extract scale factor
+ _scale_factor.allocator()->init(
+ TensorInfo(TensorShape{output->info()->dimension(1)}, 1, input->info()->data_type()));
+ _memory_group.manage(&_scale_factor);
+ _scale_factor_kernel.configure(input, &_scale_factor);
+
+ // Quantize input
+ _quantized_input.allocator()->init(
+ input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8));
+ _memory_group.manage(&_quantized_input);
+ _quant_input_kernel.configure(input, &_scale_factor, &_quantized_input);
+
+ // GEMMLowp
+ _gemmlowp_output.allocator()->init(
+ output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ _memory_group.manage(&_gemmlowp_output);
+ configure_mm(&_quantized_input, weights_to_use, &_gemmlowp_output,
+ fc_info.retain_internal_weights);
+ _quantized_input.allocator()->allocate();
+
+ // Multiply scale
+ _multiply_scale_kernel.configure(&_gemmlowp_output, &_scale_factor, output,
+ weights->info()->quantization_info().uniform().scale);
+ _gemmlowp_output.allocator()->allocate();
+ _scale_factor.allocator()->allocate();
+
+ _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights;
+}
+
+Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *biases, const ITensorInfo *output,
+ FullyConnectedLayerInfo fc_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::S8);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
+
+ bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
+ bool is_fc_after_conv = true;
+ const GPUTarget gpu_target = CLScheduler::get().target();
+
+ const ITensorInfo &reshaped_weights =
+ TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights)));
+
+ // Configure accumulate biases kernel for non quantized asymmetric types
+ if (biases != nullptr)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLGEMMMatrixAccumulateBiasesKernel::validate(output, biases, gpu_target));
+ }
+
+ // With the Fully Connected layer we can have 4 different cases:
+ // 1) Convolution layer -> Fully Connected layer without batches
+ // 2) Fully Connected layer -> Fully Connected layer without batches
+ // 3) Convolution layer -> Fully Connected layer with batches
+ // 4) Fully Connected layer -> Fully Connected layer with batches
+
+ const ITensorInfo *weights_to_use = weights;
+
+ // Check if we have a fully connected layer with batches
+ const bool is_batched_fc_layer = output->dimension(1) > 1;
+ if (is_batched_fc_layer)
+ {
+ is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->tensor_shape().cbegin() + 3, input->tensor_shape().cend(),
+ output->tensor_shape().cbegin() + 1));
+ }
+ else
+ {
+ is_fc_after_conv = input->num_dimensions() > 1 && input->dimension(1) > 1;
+ }
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_fc_after_conv,
+ "CLFullyConnectedHybridLayer does not support after conv");
+
+ if (!weights_reshaped)
+ {
+ // Validate reshape weights kernel
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
+ weights_to_use = &reshaped_weights;
+ }
+
+ // Validate Scale factor kernel
+ const ITensorInfo &scale_factor =
+ TensorInfo(TensorShape{output->dimension(1)}, 1, input->data_type());
+ ARM_COMPUTE_RETURN_ON_ERROR(CLScaleFactorSymm8Kernel::validate(input, &scale_factor));
+
+ // Validate quantization symm8 kernel
+ const ITensorInfo &quantized_input = TensorInfo(
+ input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input));
+
+ // Fully Connected layer after a Fully Connected Layer without batches
+ ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
+
+ // Validate matrix multiply kernel
+ const ITensorInfo &gemmlowp_output = TensorInfo(
+ output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output));
+
+ // Multiply scale
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLMultiplyScaleFactorKernel::validate(&gemmlowp_output, &scale_factor, output));
+
+ return Status{};
+}
+
+void CLFullyConnectedHybridLayer::run()
+{
+ prepare();
+
+ MemoryGroupResourceScope scope_mg(_memory_group);
+
+ // Extract scale_factor
+ CLScheduler::get().enqueue(_scale_factor_kernel);
+
+ // Quantize input
+ CLScheduler::get().enqueue(_quant_input_kernel);
+
+ // Run matrix multiply
+ _mm_gemmlowp.run();
+
+ // Multiply scale factor
+ CLScheduler::get().enqueue(_multiply_scale_kernel);
+
+ // Accumulate biases if provided
+ if (_accumulate_biases)
+ {
+ CLScheduler::get().enqueue(_accumulate_biases_kernel);
+ }
+}
+
+void CLFullyConnectedHybridLayer::prepare()
+{
+ if (!_is_prepared)
+ {
+ ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+
+ auto release_unused = [](CLTensor *w) {
+ if (!w->is_used())
+ {
+ CLScheduler::get().queue().finish();
+ w->allocator()->free();
+ }
+ };
+
+ // Reshape of the weights if needed (happens only once)
+ if (!_are_weights_reshaped)
+ {
+ // Run reshape weights kernel and mark weights as unused
+ _reshape_weights_output.allocator()->allocate();
+ _reshape_weights_kernel.run();
+
+ _are_weights_reshaped = true;
+ // We can not release _original_weights because it can be used in other nodes
+ }
+
+ // Prepare GEMM prepare and release unused weights
+ _mm_gemmlowp.prepare();
+
+ // Release reshaped weights if unused
+ release_unused(&_reshape_weights_output);
+
+ _is_prepared = true;
+ }
+}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
new file mode 100644
index 000000000..63e291b36
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
@@ -0,0 +1,583 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h"
+
+#include "arm_compute/core/Size2D.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/Cast.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "support/ToolchainSupport.h"
+
+#include <algorithm>
+
+namespace arm_compute
+{
+using namespace arm_compute::misc::shape_calculator;
+using namespace arm_compute::utils::cast;
+
+namespace
+{
+Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorInfo &weights,
+ const ITensorInfo &output,
+ GEMMLowpOutputStageInfo &gemmlowp_output_stage)
+{
+ gemmlowp_output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
+ gemmlowp_output_stage.gemmlowp_offset = 0;
+ gemmlowp_output_stage.gemmlowp_multiplier = 0;
+ gemmlowp_output_stage.gemmlowp_shift = 0;
+
+ // Configure output stage for quantized case
+ if (is_data_type_quantized_asymmetric(input.data_type()))
+ {
+ const UniformQuantizationInfo iq_info = input.quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output.quantization_info().uniform();
+
+ const auto output_quant_info = (output.total_size() == 0) ? iq_info : oq_info;
+
+ const float multiplier = (iq_info.scale * wq_info.scale) / output_quant_info.scale;
+ int output_multiplier = 0;
+ int output_shift = 0;
+ ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(
+ multiplier, &output_multiplier, &output_shift));
+
+ // Set the GEMMLowp output stage info
+ gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
+ gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier;
+ gemmlowp_output_stage.gemmlowp_shift = output_shift;
+ gemmlowp_output_stage.gemmlowp_min_bound = 0;
+ gemmlowp_output_stage.gemmlowp_max_bound = 255;
+ gemmlowp_output_stage.gemmlowp_multipliers.push_back(output_multiplier);
+ gemmlowp_output_stage.gemmlowp_shifts.push_back(output_shift);
+ }
+
+ return Status{};
+}
+
+Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo *bias,
+ const ITensorInfo &output, const FullyConnectedLayerInfo &fc_info)
+{
+ GEMMLowpOutputStageInfo gemmlowp_output_stage;
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage));
+
+ const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
+ false, // is_b_reshaped
+ true, // reshape_b_only_on_first_run
+ 0, // depth_output_gemm3d
+ false, // reinterpret_input_as_3d
+ fc_info.retain_internal_weights, // retain_internal_weights
+ gemmlowp_output_stage, // gemmlowp_output_stage
+ fc_info.fp_mixed_precision, // fp_mixed_precision
+ true, // broadcast_bias
+ ActivationLayerInfo()); // activation_info
+
+ if (is_data_type_quantized_asymmetric(input.data_type()))
+ {
+ const UniformQuantizationInfo iq_info = input.quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
+
+ // Since we need negative offsets for computing convolution, we need to change
+ // QuantizationInfo()
+ // Extract and negate input and weights offset
+ const QuantizationInfo input_quantization_info(iq_info.scale, -iq_info.offset);
+ const QuantizationInfo weights_quantization_info(wq_info.scale, -wq_info.offset);
+
+ // Validate gemmlowp function
+ ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate(
+ &input.clone()->set_quantization_info(input_quantization_info),
+ &weights.clone()->set_quantization_info(weights_quantization_info), bias, &output,
+ gemm_info));
+ }
+ else
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLGEMM::validate(&input, &weights, bias, &output, 1.f, 1.f, gemm_info));
+ }
+
+ return Status{};
+}
+} // namespace
+
+void CLFullyConnectedLayerReshapeWeightsEx::configure(const ICLTensor *input, ICLTensor *output)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}
+
+Status CLFullyConnectedLayerReshapeWeightsEx::validate(const ITensorInfo *input,
+ const ITensorInfo *output)
+{
+ return CLTransposeKernel::validate(input, output);
+}
+
+CLFullyConnectedLayerEx::CLFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager,
+ IWeightsManager *weights_manager)
+ : _memory_group(memory_manager), _weights_manager(weights_manager), _convert_weights(),
+ _convert_weights_managed(), _reshape_weights_managed_function(), _flatten_layer(),
+ _reshape_weights_function(), _mm_gemm(memory_manager, weights_manager),
+ _mm_gemmlowp(memory_manager), _flatten_output(), _converted_weights_output(),
+ _reshape_weights_output(), _are_weights_converted(true), _are_weights_reshaped(true),
+ _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr)
+{
+}
+void CLFullyConnectedLayerEx::configure_mm(const ICLTensor *input, const ICLTensor *weights,
+ const ICLTensor *bias, ICLTensor *output,
+ const FullyConnectedLayerInfo &fc_info)
+{
+ GEMMLowpOutputStageInfo gemmlowp_output_stage;
+ construct_gemmlowp_output_stage(*input->info(), *weights->info(), *output->info(),
+ gemmlowp_output_stage);
+
+ const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
+ false, // is_b_reshaped
+ true, // reshape_b_only_on_first_run
+ 0, // depth_output_gemm3d
+ false, // reinterpret_input_as_3d
+ fc_info.retain_internal_weights, // retain_internal_weights
+ gemmlowp_output_stage, // gemmlowp_output_stage
+ fc_info.fp_mixed_precision, // fp_mixed_precision
+ true, // broadcast_bias
+ ActivationLayerInfo()); // activation_info
+
+ if (_is_quantized)
+ {
+ // Since we need negative offsets for computing convolution, we need to change
+ // QuantizationInfo()
+ // Extract and negate input and weights offset
+ const QuantizationInfo input_quantization_info = input->info()->quantization_info();
+ const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
+
+ input->info()->set_quantization_info(QuantizationInfo(
+ input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+ weights->info()->set_quantization_info(QuantizationInfo(
+ weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
+
+ // Configure gemmlowp function
+ _mm_gemmlowp.configure(input, weights, bias, output, gemm_info);
+
+ // Revert back QuantizatioInfo as input and weights could be used in other fully connected
+ // layers
+ input->info()->set_quantization_info(input_quantization_info);
+ weights->info()->set_quantization_info(weights_quantization_info);
+ }
+ else
+ {
+ // Configure matrix multiply kernel
+ _mm_gemm.configure(input, weights, bias, output, 1.f, 1.f, gemm_info);
+ }
+}
+
+void CLFullyConnectedLayerEx::configure_conv_fc(const ICLTensor *input, const ICLTensor *weights,
+ const ICLTensor *bias, ICLTensor *output,
+ const FullyConnectedLayerInfo &fc_info)
+{
+ ARM_COMPUTE_ERROR_ON(
+ (weights->info()->dimension(1) !=
+ (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
+
+ // If the fully connected layer is called after a convolution layer, the input tensor must be
+ // linearized
+
+ // Initialize output tensor for flatten
+ TensorShape shape_flatten = compute_flatten_shape(input->info());
+ _flatten_output.allocator()->init(input->info()
+ ->clone()
+ ->set_is_resizable(true)
+ .reset_padding()
+ .set_tensor_shape(shape_flatten)
+ .set_data_layout(DataLayout::NCHW));
+
+ // Configure flatten kernel
+ _memory_group.manage(&_flatten_output);
+ _flatten_layer.configure(input, &_flatten_output);
+
+ // Configure matrix multiply kernel
+ configure_mm(&_flatten_output, weights, bias, output, fc_info);
+
+ // Allocate the output tensor for flatten once all the configure methods have been called
+ _flatten_output.allocator()->allocate();
+}
+
+void CLFullyConnectedLayerEx::configure_fc_fc(const ICLTensor *input, const ICLTensor *weights,
+ const ICLTensor *bias, ICLTensor *output,
+ const FullyConnectedLayerInfo &fc_info)
+{
+ ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1));
+
+ // Configure matrix multiply kernel
+ configure_mm(input, weights, bias, output, fc_info);
+}
+
+void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor *weights,
+ const ICLTensor *biases, ICLTensor *output,
+ FullyConnectedLayerInfo fc_info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+
+ // Perform validate step
+ ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedLayerEx::validate(
+ input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+ fc_info));
+
+ _are_weights_converted = true;
+ _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
+ _is_fc_after_conv = true;
+ _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
+ _is_prepared = fc_info.retain_internal_weights;
+ _original_weights = weights;
+
+ if (_weights_manager)
+ {
+ _weights_manager->manage(weights);
+ }
+
+ const ICLTensor *weights_to_use = weights;
+
+ // With the Fully Connected layer we can have 4 different cases:
+ // 1) Convolution layer -> Fully Connected layer without batches
+ // 2) Fully Connected layer -> Fully Connected layer without batches
+ // 3) Convolution layer -> Fully Connected layer with batches
+ // 4) Fully Connected layer -> Fully Connected layer with batches
+
+ // Check if we have a fully connected layer with batches
+ const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
+ if (is_batched_fc_layer)
+ {
+ _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->info()->tensor_shape().cbegin() + 3,
+ input->info()->tensor_shape().cend(),
+ output->info()->tensor_shape().cbegin() + 1));
+ }
+ else
+ {
+ _is_fc_after_conv = input->info()->num_dimensions() > 1;
+ }
+
+ // Reshape weights if needed
+ if (!_are_weights_reshaped)
+ {
+ if (_weights_manager && _weights_manager->are_weights_managed(weights))
+ {
+ _reshape_weights_managed_function.configure(weights);
+ weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(
+ _weights_manager->acquire(weights, &_reshape_weights_managed_function));
+ }
+ else
+ {
+ // Reshape the weights
+ _reshape_weights_function.configure(weights, &_reshape_weights_output);
+ weights_to_use = &_reshape_weights_output;
+ }
+ }
+
+ // Convert weights if needed
+ if (_is_fc_after_conv && (input->info()->data_layout() != fc_info.weights_trained_layout))
+ {
+ if (_weights_manager && _weights_manager->are_weights_managed(weights_to_use))
+ {
+ _convert_weights_managed.configure(weights_to_use, input->info()->tensor_shape(),
+ fc_info.weights_trained_layout);
+ weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(
+ _weights_manager->acquire(weights, &_convert_weights_managed));
+ }
+ else
+ {
+ // Convert weights
+ _convert_weights.configure(weights_to_use, &_converted_weights_output,
+ input->info()->tensor_shape(), fc_info.weights_trained_layout);
+
+ weights_to_use = &_converted_weights_output;
+ }
+ _are_weights_converted = false;
+ }
+
+ if (_is_fc_after_conv)
+ {
+ // Fully Connected layer after a Convolution Layer without batches
+ configure_conv_fc(input, weights_to_use, biases, output, fc_info);
+ }
+ else
+ {
+ // Fully Connected layer after a Fully Connected Layer without batches
+ configure_fc_fc(input, weights_to_use, biases, output, fc_info);
+ }
+}
+
+Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *biases, const ITensorInfo *output,
+ FullyConnectedLayerInfo fc_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
+ DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
+
+ bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
+ bool is_fc_after_conv = true;
+
+ const ITensorInfo &flatten_input = TensorInfo(input->clone()
+ ->set_is_resizable(true)
+ .reset_padding()
+ .set_tensor_shape(compute_flatten_shape(input))
+ .set_data_layout(DataLayout::NCHW));
+ const ITensorInfo &reshaped_weights =
+ TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights)));
+ const ITensorInfo &converted_weights =
+ weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
+ : TensorInfo(*reshaped_weights.clone());
+
+ // With the Fully Connected layer we can have 4 different cases:
+ // 1) Convolution layer -> Fully Connected layer without batches
+ // 2) Fully Connected layer -> Fully Connected layer without batches
+ // 3) Convolution layer -> Fully Connected layer with batches
+ // 4) Fully Connected layer -> Fully Connected layer with batches
+
+ const ITensorInfo *input_to_use = input;
+ const ITensorInfo *weights_to_use = weights;
+
+ // Check if we have a fully connected layer with batches
+ const bool is_batched_fc_layer = output->dimension(1) > 1;
+ if (is_batched_fc_layer)
+ {
+ is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->tensor_shape().cbegin() + 3, input->tensor_shape().cend(),
+ output->tensor_shape().cbegin() + 1));
+ }
+ else
+ {
+ is_fc_after_conv = input->num_dimensions() > 1;
+ }
+
+ if (!weights_reshaped)
+ {
+ // Validate reshape weights kernel
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLFullyConnectedLayerReshapeWeightsEx::validate(weights, &reshaped_weights));
+ weights_to_use = &reshaped_weights;
+ }
+
+ if (is_fc_after_conv && (input->data_layout() != fc_info.weights_trained_layout))
+ {
+ // Validate convert weights kernel
+ ARM_COMPUTE_RETURN_ON_ERROR(CLConvertFullyConnectedWeights::validate(
+ weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
+ weights_to_use = &converted_weights;
+ }
+
+ if (is_fc_after_conv)
+ {
+ // Fully Connected layer after a Convolution Layer without batches
+ ARM_COMPUTE_RETURN_ERROR_ON(
+ (weights_to_use->dimension(1) !=
+ (input->dimension(0) * input->dimension(1) * input->dimension(2))));
+
+ // Validate flatten kernel
+ ARM_COMPUTE_RETURN_ON_ERROR(CLFlattenLayer::validate(input, &flatten_input));
+ input_to_use = &flatten_input;
+ }
+ else
+ {
+ // Fully Connected layer after a Fully Connected Layer without batches
+ ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
+ }
+
+ // Validate matrix multiply kernel
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_mm(*input_to_use, *weights_to_use, biases, *output, fc_info));
+
+ return Status{};
+}
+
+void CLFullyConnectedLayerEx::run()
+{
+ if (!_is_prepared)
+ {
+ if (!_are_weights_reshaped)
+ _reshape_weights_output.allocator()->allocate();
+ if (!_are_weights_converted)
+ _converted_weights_output.allocator()->allocate();
+ _is_prepared = true;
+ }
+
+ {
+ if (!_weights_manager)
+ {
+ ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+ }
+
+ // Pointer to current weights
+ const ICLTensor *cur_weights = _original_weights;
+ // Reshape of the weights
+ if (!_are_weights_reshaped)
+ {
+ if (_weights_manager && _weights_manager->are_weights_managed(cur_weights))
+ {
+ _original_weights = utils::cast::polymorphic_downcast<ICLTensor *>(
+ _weights_manager->run(cur_weights, &_reshape_weights_managed_function));
+ }
+ else
+ {
+ _reshape_weights_function.run();
+ cur_weights = &_reshape_weights_output;
+ }
+ }
+
+ // Convert weights if needed
+ if (!_are_weights_converted)
+ {
+ if (_weights_manager && _weights_manager->are_weights_managed(cur_weights))
+ {
+ _weights_manager->run(cur_weights, &_convert_weights_managed);
+ }
+ else
+ {
+ _convert_weights.run();
+ }
+ }
+
+ // Prepare GEMM prepare
+ if (!_is_quantized)
+ {
+ _mm_gemm.prepare();
+ }
+ }
+
+ MemoryGroupResourceScope scope_mg(_memory_group);
+
+ // Linearize input if it comes from a convolutional layer
+ if (_is_fc_after_conv)
+ {
+ _flatten_layer.run();
+ }
+
+ // Run matrix multiply
+ if (_is_quantized)
+ {
+ _mm_gemmlowp.run();
+ }
+ else
+ {
+ _mm_gemm.run();
+ }
+}
+
+void CLFullyConnectedLayerEx::prepare()
+{
+#if 0 // TODO Remove this block
+ if(!_is_prepared)
+ {
+ if(!_weights_manager)
+ {
+ ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+ }
+
+ auto release_unused = [](CLTensor * w)
+ {
+ if(!w->is_used())
+ {
+ CLScheduler::get().queue().finish();
+ w->allocator()->free();
+ }
+ };
+
+ // Pointer to current weights
+ const ICLTensor *cur_weights = _original_weights;
+
+ // Reshape of the weights if needed (happens only once)
+ if(!_are_weights_reshaped)
+ {
+ if(_weights_manager && _weights_manager->are_weights_managed(_original_weights))
+ {
+ cur_weights = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->run(cur_weights, &_reshape_weights_managed_function));
+ }
+ else
+ {
+ // Run reshape weights kernel and mark weights as unused
+ _reshape_weights_output.allocator()->allocate();
+ _reshape_weights_function.run();
+
+ cur_weights->mark_as_unused();
+ cur_weights = &_reshape_weights_output;
+ }
+ _are_weights_reshaped = true;
+ }
+
+ // Convert weights if needed (happens only once)
+ if(!_are_weights_converted)
+ {
+ if(_weights_manager && _weights_manager->are_weights_managed(cur_weights))
+ {
+ _weights_manager->run(cur_weights, &_convert_weights_managed);
+ }
+ else
+ {
+ _converted_weights_output.allocator()->allocate();
+ _convert_weights.run();
+ cur_weights->mark_as_unused();
+ }
+
+ _are_weights_converted = true;
+ }
+
+ // Release reshaped weights if unused
+ release_unused(&_reshape_weights_output);
+
+ // Prepare GEMM prepare and release unused weights
+ if(!_is_quantized)
+ {
+ _mm_gemm.prepare();
+ }
+
+ // Release converted weights if unused
+ release_unused(&_reshape_weights_output);
+ release_unused(&_converted_weights_output);
+
+ _is_prepared = true;
+ }
+#endif
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
index c6b166163..9aebc473e 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
@@ -16,13 +16,18 @@
#include "arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h"
+#include <arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h>
+#include <arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h>
+#include <arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h>
+
using namespace arm_compute;
void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *input,
const arm_compute::ICLTensor *weights,
const arm_compute::ICLTensor *biases,
arm_compute::ICLTensor *output, bool needs_reshape,
- const arm_compute::TensorShape &reshape)
+ const arm_compute::TensorShape &reshape,
+ KernelType kernel_type)
{
_input = input;
_weights = weights;
@@ -30,6 +35,7 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp
_output = output;
_needs_reshape = needs_reshape;
+ const ICLTensor *input_to_use = input;
if (_needs_reshape)
{
// reshape
@@ -37,16 +43,44 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp
_input->info()->clone()->set_tensor_shape(reshape).set_data_layout(
_input->info()->data_layout()));
_cl_reshape.configure(_input, &_cl_buffer);
+ input_to_use = &_cl_buffer;
+ }
+
+ _cl_fc = [&]() {
+ if (kernel_type == KernelType::GENERAL)
+ {
+ auto fc = new arm_compute::CLFullyConnectedLayerEx{_memory_manager};
+ fc->configure(input_to_use, _weights, _biases, _output);
+ return std::unique_ptr<arm_compute::IFunction>(fc);
+ }
+ else
+ {
+ assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS);
+
+ bool is_hybrid = (input->info()->data_type() == DataType::F32 ||
+ input->info()->data_type() == DataType::F16) &&
+ weights->info()->data_type() == DataType::S8;
- _cl_fc.configure(&_cl_buffer, _weights, _biases, _output);
+ if (is_hybrid)
+ {
+ auto fc = new arm_compute::CLFullyConnectedHybridLayer{_memory_manager};
+ fc->configure(input_to_use, _weights, _biases, _output);
+ return std::unique_ptr<arm_compute::IFunction>(fc);
+ }
+ else
+ {
+ auto fc = new arm_compute::CLFullyConnectedLayer{_memory_manager};
+ fc->configure(input_to_use, _weights, _biases, _output);
+ return std::unique_ptr<arm_compute::IFunction>(fc);
+ }
+ }
+ }();
+ if (_needs_reshape)
+ {
// NOTE _cl_buffer is inaccessible from outside, and thus it is safe to invoke allocate here.
_cl_buffer.allocator()->allocate();
}
- else
- {
- _cl_fc.configure(_input, _weights, _biases, _output);
- }
}
void CLFullyConnectedReshapingLayer::run(void)
@@ -54,7 +88,7 @@ void CLFullyConnectedReshapingLayer::run(void)
if (_needs_reshape)
_cl_reshape.run();
- _cl_fc.run();
+ _cl_fc->run();
}
-void CLFullyConnectedReshapingLayer::prepare(void) { _cl_fc.prepare(); }
+void CLFullyConnectedReshapingLayer::prepare(void) { _cl_fc->prepare(); }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp
new file mode 100644
index 000000000..ca5499dfc
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h"
+
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+
+namespace arm_compute
+{
+using namespace arm_compute::misc::shape_calculator;
+using namespace arm_compute::cl_gemm;
+
+namespace
+{
+inline bool is_gemm_reshaped(bool reshape_b_only_on_first_run, GPUTarget gpu_target)
+{
+ return (get_arch_from_target(gpu_target) != GPUTarget::MIDGARD) && (reshape_b_only_on_first_run);
+}
+} // namespace
+
+CLGEMMLowpMatrixMultiplyCoreEx::CLGEMMLowpMatrixMultiplyCoreEx(
+ std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _mm_midgard_kernel(), _mtx_a_reduction_kernel(),
+ _mtx_b_reduction_kernel(), _vector_sum_col(), _vector_sum_row(), _a_offset(0), _b_offset(0),
+ _reshape_b_only_on_first_run(false), _is_prepared(false)
+{
+}
+
+void CLGEMMLowpMatrixMultiplyCoreEx::configure(const ICLTensor *a, const ICLTensor *b,
+ const ICLTensor *c, ICLTensor *output,
+ const GEMMInfo &gemm_info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
+ ARM_COMPUTE_UNUSED(c);
+ ARM_COMPUTE_ERROR_THROW_ON(CLGEMMLowpMatrixMultiplyCoreEx::validate(
+ a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), gemm_info));
+
+ _is_prepared = false;
+ _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
+ _a_offset = a->info()->quantization_info().uniform().offset;
+ _b_offset = b->info()->quantization_info().uniform().offset;
+
+ // Get the GPU target
+ const GPUTarget gpu_target = CLScheduler::get().target();
+
+ // Set the target for the kernels
+ _mm_midgard_kernel.set_target(gpu_target);
+
+ // GEMMRHSMatrixInfo rhs_info;
+ // GEMMLHSMatrixInfo lhs_info;
+
+ // Arguments used by GEMMReshapeInfo
+ // If we pass the matrix A and matrix B reshaped to CLGEMMMatrixMultiplyKernel, we need to pass m,
+ // n, k, mult_transpose1xW_width and mult_interleave4x4_height to CLGEMMReshapeInfo
+ // in order to know how the matrices have been reshaped
+ bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
+ const unsigned int m = reinterpret_input_as_3d
+ ? (a->info()->dimension(1) * a->info()->dimension(2))
+ : a->info()->dimension(1);
+ const unsigned int n = b->info()->dimension(0);
+ const unsigned int k = a->info()->dimension(0);
+ const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
+
+ const ICLTensor *matrix_b = b;
+ // Configure matrix multiply kernel
+ _mm_midgard_kernel.configure(
+ a, matrix_b, output,
+ GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d));
+}
+
+Status CLGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITensorInfo *b,
+ const ITensorInfo *c, const ITensorInfo *output,
+ const GEMMInfo &gemm_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::S8);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, b);
+ ARM_COMPUTE_UNUSED(c);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(),
+ "Matrix A already reshaped is not supported");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(),
+ "Matrix B already reshaped is not supported");
+
+ const ITensorInfo *matrix_a_info = a;
+
+ // Get the GPU target
+ const GPUTarget gpu_target = CLScheduler::get().target();
+
+ bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
+ const unsigned int m =
+ reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
+ const unsigned int n = b->dimension(0);
+ const unsigned int k = a->dimension(0);
+ const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
+
+ bool reshape_matrix_b = is_gemm_reshaped(gemm_info.reshape_b_only_on_first_run(), gpu_target);
+
+ const GEMMReshapeInfo reshape_info =
+ GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d);
+
+ TensorInfo weights_info(*b);
+ const ITensorInfo *matrix_b_info = &weights_info;
+ if (reshape_matrix_b)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(false,
+ "CLGEMMLowpMatrixMultiplyCoreEx does not support reshape_b");
+ }
+
+ // Validate matrix multiply
+ ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyKernelEx::validate(
+ matrix_a_info, matrix_b_info, output, reshape_info));
+
+ return Status{};
+}
+
+void CLGEMMLowpMatrixMultiplyCoreEx::run()
+{
+ prepare();
+
+ MemoryGroupResourceScope scope_mg(_memory_group);
+
+ // Run matrix multiply
+ CLScheduler::get().enqueue(_mm_midgard_kernel, false);
+}
+
+void CLGEMMLowpMatrixMultiplyCoreEx::prepare()
+{
+ if (!_is_prepared)
+ {
+ _is_prepared = true;
+ }
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp
index 6cad9bd2e..f594d7a2e 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLGatherEx.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
index 7180e9356..27ed8e828 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLHashtableLookup.h"
#include "arm_compute/core/CL/kernels/CLHashtableLookupKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp
index 86ea5a66d..80393e8d1 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h"
#include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp
index be35ea732..28e5bc0da 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLNeg.h"
#include "arm_compute/core/CL/kernels/CLNegKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp
index 38adedd10..fbb15ab1d 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLPReLU.h"
#include "arm_compute/core/CL/kernels/CLPReLUKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp
index 2a34c0664..6049b7e70 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/CL/functions/CLRNNLayerEx.h"
#include "arm_compute/core/Helpers.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
index 13a25c901..8ce2d746c 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLReduceOperation.h"
#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp
index c03826891..1f946d37b 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLSpaceToBatchND.h"
#include "arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp
index 0f455f96f..7d7b2264b 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLSpaceToDepth.h"
#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
index 80d50ad94..3ac95a8e6 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLTopKV2.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
index 40e21671d..e61746ef2 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/CL/functions/CLTransposeConvLayer.h"
#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp
index 0ce3e6700..07feb5a64 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,11 +13,37 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/core/CL/ICLTensor.h"
#include <cmath>
#include <memory>
@@ -54,7 +79,7 @@ void CLTransposeConvLayerUpsample::run()
_output->map(CLScheduler::get().queue(), true);
if (is_data_type_quantized_asymmetric(_output->info()->data_type()))
{
- const uint8_t quantized_zero = _output->info()->quantization_info().offset;
+ const uint8_t quantized_zero = _output->info()->quantization_info().uniform().offset;
std::fill_n(_output->buffer(), _output->info()->total_size(), quantized_zero);
}
else
diff --git a/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp b/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp
new file mode 100644
index 000000000..5405934ad
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CPP/functions/CPPOneHotEx.h"
+
+#include "arm_compute/core/CPP/kernels/CPPOneHotKernelEx.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+void CPPOneHotEx::configure(const ITensor *indices, ITensor *output, const int depth,
+ const float on_value, const float off_value, const int axis)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CPPOneHotKernelEx>();
+ k->configure(indices, output, depth, on_value, off_value, axis);
+ _kernel = std::move(k);
+}
diff --git a/compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp b/compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp
index f8e0ef8a6..6c90ef3b4 100644
--- a/compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/CPP/functions/CPPUpsampleEx.h"
#include "arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h"
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp
new file mode 100644
index 000000000..ff81ff854
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/NEON/functions/NEActivationLayerEx.h"
+
+#include "arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h"
+#include "arm_compute/runtime/IRuntimeContext.h"
+#include "support/ToolchainSupport.h"
+
+namespace arm_compute
+{
+NEActivationLayerEx::NEActivationLayerEx(IRuntimeContext *ctx) // NOLINT
+ : INESimpleFunctionNoBorder(ctx)
+{
+}
+void NEActivationLayerEx::configure(ITensor *input, ITensor *output,
+ ActivationLayerInfo activation_info)
+{
+ auto k = arm_compute::support::cpp14::make_unique<NEActivationLayerKernelEx>();
+ k->configure(input, output, activation_info);
+ _kernel = std::move(k);
+}
+
+Status NEActivationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ActivationLayerInfo &act_info)
+{
+ return NEActivationLayerKernelEx::validate(input, output, act_info);
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp
deleted file mode 100644
index 5ba465b61..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NEArgMinMax.h"
-
-#include "arm_compute/core/CPP/Validate.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-namespace arm_compute
-{
-
-template <ReductionOperation OP>
-NEArgMinMaxStatic<OP>::NEArgMinMaxStatic(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _reduction_kernel(), _reduced_out(), _reshape()
-{
-}
-
-template <ReductionOperation OP>
-Status NEArgMinMaxStatic<OP>::validate(const ITensorInfo *input, int axis,
- const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
- DataType::F32);
-
- TensorShape out_shape = input->tensor_shape();
- const int input_dims = input->num_dimensions();
- int axis_local = axis;
-
- // Convert negative axis
- axis_local = wrap_around(axis_local, input_dims);
-
- ARM_COMPUTE_RETURN_ERROR_ON(axis_local > 3);
- ARM_COMPUTE_RETURN_ERROR_ON(static_cast<unsigned int>(axis_local) > input->num_dimensions() - 1);
- out_shape.remove_dimension(axis_local);
-
- const TensorInfo out_info = output->clone()->set_tensor_shape(out_shape);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info);
-
- return Status{};
-}
-
-template <ReductionOperation OP>
-void NEArgMinMaxStatic<OP>::configure(ITensor *input, int axis, ITensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input);
-
- int axis_local = axis;
- const int input_dims = input->info()->num_dimensions();
-
- // Convert negative axis
- axis_local = wrap_around(axis_local, input_dims);
-
- // Perform reduction for axis
- TensorShape intermediate_shape = input->info()->tensor_shape();
- intermediate_shape.set(axis_local, 1);
- auto in = input;
-
- _reduced_out.allocator()->init(TensorInfo(intermediate_shape, output->info()->num_channels(),
- output->info()->data_type(),
- output->info()->quantization_info()));
- _memory_group.manage(&_reduced_out);
- _reduction_kernel.configure(in, axis_local, &_reduced_out, OP);
-
- // Allocate intermediate tensor
- _reduced_out.allocator()->allocate();
-
- // Configure reshape layer if we want to drop the dimensions
- TensorShape out_shape = input->info()->tensor_shape();
- out_shape.remove_dimension(axis_local);
- auto_init_if_empty(*output->info(), output->info()->clone()->set_tensor_shape(out_shape));
- _reshape.configure(&_reduced_out, output);
-}
-
-template <ReductionOperation OP> void NEArgMinMaxStatic<OP>::run()
-{
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- _reduction_kernel.run();
- _reshape.run();
-}
-
-// Supported Specializations
-template class NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MAX>;
-template class NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MIN>;
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
index 7c15fc453..e42c453cf 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h"
#include <arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h>
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp
index f2490e4e8..dc5c62061 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp
index db419e3a8..5ec0b8677 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEElementwiseUnaryLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEElementwiseUnaryLayerEx.cpp
deleted file mode 100644
index a95018a28..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEElementwiseUnaryLayerEx.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h"
-
-#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h"
-#include "support/ToolchainSupport.h"
-
-#include <utility>
-
-namespace arm_compute
-{
-void NENegLayer::configure(const ITensor *input, ITensor *output)
-{
- auto k = arm_compute::support::cpp14::make_unique<NEElementwiseUnaryKernelEx>();
- k->configure(ElementWiseUnaryEx::NEG, input, output);
- _kernel = std::move(k);
-}
-Status NENegLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
-{
- return NEElementwiseUnaryKernelEx::validate(ElementWiseUnaryEx::NEG, input, output);
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp
index 00c3ed94f..53fb15081 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h"
#include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
index d604fedbf..f45773251 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h"
#include "arm_compute/core/Helpers.h"
@@ -154,7 +170,7 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor
// Multiply scale
_multiply_scale_kernel.configure(&_gemmlowp_output, &_scale_factor, output,
- weights->info()->quantization_info().scale);
+ weights->info()->quantization_info().uniform().scale);
_are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights;
@@ -220,7 +236,7 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output));
ARM_COMPUTE_RETURN_ON_ERROR(NEMultiplyScaleFactorKernel::validate(
- &gemmlowp_output, &scale_factor, output, weights->quantization_info().scale));
+ &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale));
return Status{};
}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
index a944f699a..cb7557a5a 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h"
#include "arm_compute/core/Helpers.h"
@@ -46,10 +62,10 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
// Since we need negative offsets for computing convolution, we need to change
// QuantizationInfo()
// Extract and negate input and weights offset
- const QuantizationInfo input_quantization_info(input.quantization_info().scale,
- -input.quantization_info().offset);
- const QuantizationInfo weights_quantization_info(weights.quantization_info().scale,
- -weights.quantization_info().offset);
+ const QuantizationInfo input_quantization_info(input.quantization_info().uniform().scale,
+ -input.quantization_info().uniform().offset);
+ const QuantizationInfo weights_quantization_info(weights.quantization_info().uniform().scale,
+ -weights.quantization_info().uniform().offset);
// Validate gemmlowp function
ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate(
@@ -88,10 +104,10 @@ void NEFullyConnectedLayerEx::configure_mm(const ITensor *input, const ITensor *
const QuantizationInfo input_quantization_info = input->info()->quantization_info();
const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
- input->info()->set_quantization_info(
- QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset));
- weights->info()->set_quantization_info(
- QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
+ input->info()->set_quantization_info(QuantizationInfo(
+ input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+ weights->info()->set_quantization_info(QuantizationInfo(
+ weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
// Configure gemmlowp function
_mm_gemmlowp.configure(input, weights, nullptr, output);
@@ -236,15 +252,16 @@ void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *wei
// Configure output stage for asymmetric quantized types
if (_is_quantized)
{
- float multiplier = input->info()->quantization_info().scale *
- weights->info()->quantization_info().scale /
- output->info()->quantization_info().scale;
+ float multiplier = input->info()->quantization_info().uniform().scale *
+ weights->info()->quantization_info().uniform().scale /
+ output->info()->quantization_info().uniform().scale;
int output_multiplier;
int output_shift;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier,
&output_shift);
_gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier,
- output_shift, output->info()->quantization_info().offset);
+ output_shift,
+ output->info()->quantization_info().uniform().offset);
_gemmlowp_output.allocator()->allocate();
}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp
index 11794a1ea..1290cfd39 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h"
#include "arm_compute/core/Error.h"
@@ -50,7 +66,7 @@ NEGEMMLowpMatrixMultiplyCoreEx::NEGEMMLowpMatrixMultiplyCoreEx(
_tmp_b(), _mm_result_s32(), _signed_a(), _signed_output(), _original_b(nullptr), _a_offset(0),
_b_offset(0), _run_vector_matrix_multiplication(false), _assembly_path(false),
_fused_assembly_path(false), _reshape_b_only_on_first_run(false), _is_prepared(false),
- _fuse_output_stage(false), _run_activation(false), _flip_signedness(false)
+ _fuse_output_stage(false), _flip_signedness(false)
{
}
@@ -71,8 +87,8 @@ void NEGEMMLowpMatrixMultiplyCoreEx::configure(const ITensor *a, const ITensor *
_mtx_b_reshape_kernel = nullptr;
// Set internal variables
- _a_offset = a->info()->quantization_info().offset;
- _b_offset = b->info()->quantization_info().offset;
+ _a_offset = a->info()->quantization_info().uniform().offset;
+ _b_offset = b->info()->quantization_info().uniform().offset;
_run_vector_matrix_multiplication = a->info()->dimension(1) < 2;
_reshape_b_only_on_first_run = info.reshape_b_only_on_first_run();
_is_prepared = false;
@@ -91,7 +107,6 @@ void NEGEMMLowpMatrixMultiplyCoreEx::configure(const ITensor *a, const ITensor *
}
#ifdef __aarch64__
-#if 0 // Can use after arm compute library v19.11
switch (a->info()->data_type())
{
case DataType::QASYMM8:
@@ -119,8 +134,6 @@ void NEGEMMLowpMatrixMultiplyCoreEx::configure(const ITensor *a, const ITensor *
break;
}
}
-#endif // 0
- ARM_COMPUTE_ERROR("aarch64 not supported");
#endif /* __aarch64__ */
if (!(_assembly_path || _run_vector_matrix_multiplication))
{
@@ -277,8 +290,8 @@ Status NEGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITen
TensorInfo tmp_b_info{};
TensorInfo mm_result_s32_info{};
- int32_t a_offset = a->quantization_info().offset;
- int32_t b_offset = b->quantization_info().offset;
+ int32_t a_offset = a->quantization_info().uniform().offset;
+ int32_t b_offset = b->quantization_info().uniform().offset;
bool fuse_output_stage = info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE;
if (fuse_output_stage)
@@ -291,19 +304,16 @@ Status NEGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITen
// Check if we need to run the optimized assembly kernel
bool run_optimised = false;
bool run_optimised_requantized = false;
- const bool reshape_b_only_on_first_run = info.reshape_b_only_on_first_run();
if (a_to_use->data_type() == DataType::QASYMM8 &&
info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
{
- run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, output, 1.f, 0.f,
- reshape_b_only_on_first_run));
+ run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, c, output, gemm_info));
run_optimised_requantized = run_optimised;
}
else
{
run_optimised = bool(NEGEMMAssemblyDispatch::validate(
- a_to_use, b, fuse_output_stage ? &mm_result_s32_info : output, 1.f, 0.f,
- reshape_b_only_on_first_run));
+ a_to_use, b, c, fuse_output_stage ? &mm_result_s32_info : output, gemm_info));
}
if (run_optimised)
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp
index 90dabb35a..c8bb88aea 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEGatherEx.h"
#include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h"
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp
index 624185d2c..078019f4e 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/NEON/functions/NEHashtableLookup.h"
#include "arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp
index 1c2c8f027..16d74e62d 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h"
#include "arm_compute/core/Helpers.h"
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp
index 1150cef76..dac3b849d 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEPReLU.h"
#include "arm_compute/core/NEON/kernels/NEPReLUKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp
index 84411c266..0e9a5e969 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp
index c65e93570..116bba3c0 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEReduceMeanEx.h"
#include "arm_compute/core/Helpers.h"
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
index b36f8287a..aedb537e9 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,11 +37,14 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEReduceOperation.h"
#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/runtime/Tensor.h"
using namespace arm_compute;
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp
index 3c18217ef..26a887912 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEReduceSum.h"
#include "arm_compute/core/CPP/Validate.h"
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp
index c3431c418..2aa0d2d4b 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEReductionOperationEx.h"
#include "arm_compute/core/Helpers.h"
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp
index c9f914fb0..198bb7672 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp
index b6ae21cc0..97697e3ea 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
index fd15ef05f..df0689273 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NETransposeConvLayer.h"
#include "arm_compute/core/Helpers.h"
diff --git a/compute/cker/CMakeLists.txt b/compute/cker/CMakeLists.txt
index 9ddec350b..b8ceba5e3 100644
--- a/compute/cker/CMakeLists.txt
+++ b/compute/cker/CMakeLists.txt
@@ -1,11 +1,7 @@
-add_library(nnfw_lib_cker INTERFACE)
-
-nnfw_find_package(Eigen QUIET)
-option(BUILD_CKER_OPTIMIZE "Build optimize cker library" ON)
-
-if(Eigen_FOUND AND BUILD_CKER_OPTIMIZE)
- target_link_libraries(nnfw_lib_cker INTERFACE eigen)
- target_compile_definitions(nnfw_lib_cker INTERFACE CKER_OPTIMIZED_EIGEN)
-endif(Eigen_FOUND AND BUILD_CKER_OPTIMIZE)
+nnfw_find_package(Eigen REQUIRED)
+nnfw_find_package(GEMMLowp REQUIRED)
+add_library(nnfw_lib_cker INTERFACE)
+target_link_libraries(nnfw_lib_cker INTERFACE eigen)
+target_link_libraries(nnfw_lib_cker INTERFACE gemmlowp)
target_include_directories(nnfw_lib_cker INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include)
diff --git a/compute/cker/README.md b/compute/cker/README.md
index 149320ffc..3d98362ab 100644
--- a/compute/cker/README.md
+++ b/compute/cker/README.md
@@ -1,7 +1,7 @@
# cker
-cker - Portable CPU kernel library
+cker - CPU kernel library
__cker__ means `CPU kernel`
-Current __cker__ is porting of Tensorflow lite's reference_op kernel (Tensorflow 1.12) and gemmlow
+Current __cker__ is porting of Tensorflow lite's operation kernel
diff --git a/compute/cker/include/cker/NeonTensorUtils.h b/compute/cker/include/cker/NeonTensorUtils.h
new file mode 100644
index 000000000..4d97dd187
--- /dev/null
+++ b/compute/cker/include/cker/NeonTensorUtils.h
@@ -0,0 +1,319 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_NEON_TENSOR_UTILS_H__
+#define __NNFW_CKER_NEON_TENSOR_UTILS_H__
+
+#include "cker/neon/neon_check.h"
+
+#include <cassert>
+#include <cmath>
+
+#ifdef USE_NEON
+
+#define kFloatWeightsPerNeonLane 4
+
+namespace nnfw
+{
+namespace cker
+{
+
+namespace
+{
+
+// Allocates, at least, size bytes of uninitialized storage whose alignment is
+// specified by alignment. The size parameter must be an integral multiple of
+// alignment.
+// Caller is responsible by freeing the allocated memory by calling free on
+// the passed freeing_buffer pointer.
+void *aligned_alloc(size_t alignment, size_t size, void **freeing_buffer)
+{
+ *freeing_buffer = malloc(size + alignment);
+ const size_t offset = ((uintptr_t)*freeing_buffer) % alignment; // NOLINT
+ return offset == 0 ? *freeing_buffer : ((char *)*freeing_buffer + (alignment - offset)); // NOLINT
+}
+
+} // namespace
+
+bool NeonIsZeroVector(const float *vector, int v_size)
+{
+ // If v_size is not divisible by kFloatWeightsPerNeonLane, we cannot
+ // use the main vectorized loop, and we need to process sequentially.
+ // postamble_start shows the start index where this should happen.
+ const int postamble_start = v_size - (v_size & (kFloatWeightsPerNeonLane - 1));
+
+ const float32x4_t zero_x4_float = vmovq_n_f32(0.0f);
+ for (int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane)
+ {
+ const float32x4_t i_x4_float = vld1q_f32(vector + v);
+ uint32x4_t cmp_result = vceqq_f32(i_x4_float, zero_x4_float);
+ if (vgetq_lane_u32(cmp_result, 0) == 0)
+ return false;
+ if (vgetq_lane_u32(cmp_result, 1) == 0)
+ return false;
+ if (vgetq_lane_u32(cmp_result, 2) == 0)
+ return false;
+ if (vgetq_lane_u32(cmp_result, 3) == 0)
+ return false;
+ }
+
+ // Postamble loop
+ for (int v = postamble_start; v < v_size; ++v)
+ {
+ if (vector[v] != 0.0)
+ return false;
+ }
+ return true;
+}
+
+void NeonSymmetricQuantizeFloats(const float *values, const int size, int8_t *quantized_values,
+ float *min, float *max, float *scaling_factor)
+{
+ // TODO(raziel): vectorize min/max calculation.
+ auto minmax = std::minmax_element(values, values + size);
+ *min = *minmax.first;
+ *max = *minmax.second;
+ const int kScale = 127;
+ const float range = std::max(std::abs(*min), std::abs(*max));
+ if (range == 0)
+ {
+ memset(quantized_values, 0, size * sizeof(int8_t));
+ *scaling_factor = 1;
+ return;
+ }
+ *scaling_factor = range / kScale;
+ const float scaling_factor_inv = kScale / range;
+
+ const int postamble_start = size - (size & (2 * kFloatWeightsPerNeonLane - 1));
+
+ // Vectorized constants.
+ const float32x4_t q_factor_f32x4 = vmovq_n_f32(scaling_factor_inv);
+ const float32x4_t point5_f32x4 = vmovq_n_f32(0.5);
+ const float32x4_t zero_f32x4 = vmovq_n_f32(0.0);
+ const int32x4_t scale_i32x4 = vmovq_n_s32(kScale);
+ const int32x4_t neg_scale_i32x4 = vmovq_n_s32(-kScale);
+
+ for (int i = 0; i < postamble_start; i += 2 * kFloatWeightsPerNeonLane)
+ {
+ // Implements the vectorized version of the following:
+ // const int32_t quantized_value = static_cast<int32>(
+ // std::round(*scaling_factor * values[i]));
+ // Since the vectorized round intrinsics (vrndqa_f32) is not supported
+ // on all Neon flavors, we use the following method for rounding: if (x
+ // < 0) (int)(x - 0.5) if (x >= 0) (int)(x + 0.5)
+ float32x4_t value0_f32x4 = vld1q_f32(&values[i]);
+ float32x4_t value1_f32x4 = vld1q_f32(&values[i + kFloatWeightsPerNeonLane]);
+ float32x4_t mul0_f32x4 = vmulq_f32(value0_f32x4, q_factor_f32x4);
+ float32x4_t mul1_f32x4 = vmulq_f32(value1_f32x4, q_factor_f32x4);
+
+ int32x4_t cmp_with_zero0_ui32x4 = (int32x4_t)vcltq_f32(mul0_f32x4, zero_f32x4); // NOLINT
+ int32x4_t cmp_with_zero1_ui32x4 = (int32x4_t)vcltq_f32(mul1_f32x4, zero_f32x4); // NOLINT
+
+ float32x4_t cmp_with_zero0_f32x4 = vcvtq_f32_s32(cmp_with_zero0_ui32x4);
+ float32x4_t cmp_with_zero1_f32x4 = vcvtq_f32_s32(cmp_with_zero1_ui32x4);
+ cmp_with_zero0_f32x4 = vaddq_f32(cmp_with_zero0_f32x4, point5_f32x4);
+ cmp_with_zero1_f32x4 = vaddq_f32(cmp_with_zero1_f32x4, point5_f32x4);
+
+ mul0_f32x4 = vaddq_f32(mul0_f32x4, cmp_with_zero0_f32x4);
+ mul1_f32x4 = vaddq_f32(mul1_f32x4, cmp_with_zero1_f32x4);
+
+ int32x4_t f2i0_i32x4 = vcvtq_s32_f32(mul0_f32x4);
+ int32x4_t f2i1_i32x4 = vcvtq_s32_f32(mul1_f32x4);
+
+ // Implements the vectorized version of the folowing block:
+ // quantized_values[i] = std::min(kScale, std::max(-kScale,
+ // quantized_value));
+ int32x4_t max0_i32x4 = vmaxq_s32(f2i0_i32x4, neg_scale_i32x4);
+ int32x4_t max1_i32x4 = vmaxq_s32(f2i1_i32x4, neg_scale_i32x4);
+ int32x4_t min0_i32x4 = vminq_s32(max0_i32x4, scale_i32x4);
+ int32x4_t min1_i32x4 = vminq_s32(max1_i32x4, scale_i32x4);
+
+ int16x4_t min0_16x4 = vmovn_s32(min0_i32x4);
+ int16x4_t min1_16x4 = vmovn_s32(min1_i32x4);
+
+ int16x8_t min_16x8 = vcombine_s16(min0_16x4, min1_16x4);
+ int8x8_t min_s8x8 = vqmovn_s16(min_16x8);
+ vst1_s8(&quantized_values[i], min_s8x8);
+ }
+
+ for (int i = postamble_start; i < size; ++i)
+ {
+ const int32_t quantized_value =
+ static_cast<int32_t>(std::round(scaling_factor_inv * values[i]));
+ quantized_values[i] = std::min(kScale, std::max(-kScale, quantized_value));
+ }
+}
+
+void NeonMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix, const int m_rows,
+ const int m_cols, const int8_t *__restrict__ vectors,
+ const float *scaling_factors, int n_batch,
+ float *__restrict__ result, int result_stride)
+{
+ const int kWeightsPerUint32 = 4;
+ const int kWeightsPerNeonLane = 16;
+ // If the number of rows is not divisible by kWeightsPerUint32, we set a
+ // flag and allocate an aligned memory block. The flag is used to use the
+ // aligned memory block later in the kernel loop.
+ bool unaligned = false;
+ int8_t *aligned_row = nullptr;
+ void *aligned_row_free = nullptr;
+ if ((m_cols & (kWeightsPerUint32 - 1)) != 0)
+ {
+ unaligned = true;
+ aligned_row = (int8_t *)aligned_alloc(kWeightsPerUint32, m_cols, // NOLINT
+ &aligned_row_free);
+ }
+ void *aligned_vec_free = nullptr;
+ int8_t *aligned_vec = (int8_t *)aligned_alloc(kWeightsPerUint32, m_cols, // NOLINT
+ &aligned_vec_free);
+
+ // If m_cols is not at least kWeightsPerNeonLane, we cannot use the main
+ // vectorized loop, and we need to process sequentially. postamble_start shows
+ // the start index where this should happen.
+ const int postamble_start = m_cols - (m_cols & (kWeightsPerNeonLane - 1));
+
+ int batch, row, col;
+ for (batch = 0; batch < n_batch; ++batch)
+ {
+ const float batch_scaling_factor = scaling_factors[batch];
+ // Copy the vector data to an aligned vector.
+ memcpy(aligned_vec, vectors + batch * m_cols, sizeof(int8_t) * m_cols);
+ // Compute dot-product for every column.
+ for (row = 0; row < m_rows; ++row, result += result_stride)
+ {
+ // Get the address of the first element of the row.
+ int8_t *row_ptr = (int8_t *)matrix + row * m_cols; // NOLINT
+ if (unaligned)
+ {
+ memcpy(aligned_row, row_ptr, sizeof(int8_t) * m_cols);
+ row_ptr = aligned_row;
+ }
+
+ // Initialize the dot product sum for the row to 0.
+ int32x4_t dotprod = vmovq_n_s32(0);
+
+ // Prefetch the row to cache.
+ __builtin_prefetch(row_ptr, 0 /* prefetch for read */, 3 /* temporal locality */);
+
+ // For every block of 16 8-bit elements.
+ col = 0;
+ for (; col < postamble_start; col += kWeightsPerNeonLane)
+ {
+ // Load 16 8-bit values from the row and vector, each, to operate on.
+ // Here the assumption is that each buffer is 4-byte aligned.
+ assert(((uintptr_t)(&row_ptr[col]) & (kWeightsPerUint32 - 1)) == 0);
+ const int8x16_t s1_8x16 = vld1q_s8((const int8_t *)(aligned_vec + col));
+ const int8x16_t s2_8x16 = vld1q_s8((const int8_t *)(row_ptr + col));
+ // Multiply the low bits (i.e. the lower 8 8bit numbers in the
+ // registers).
+ int16x8_t prod_16x8 = vmull_s8(vget_low_s8(s1_8x16), vget_low_s8(s2_8x16));
+ // Multiply the high bits (i.e. the lower 8 8bit numbers in the
+ // registers), and accumulate with the result of the low bits product.
+ // The assumption here is that overflow will not happen as we quantize
+ // our values to be in the range [-127, 127]. As such the sum of the 2
+ // products is always strictly smaller than 15-bits (32767 in absolute
+ // value).
+ prod_16x8 = vmlal_s8(prod_16x8, vget_high_s8(s1_8x16), vget_high_s8(s2_8x16));
+
+ dotprod = vpadalq_s16(dotprod, prod_16x8);
+ } // for col
+
+ int32_t postable_sum = 0;
+ // Postamble loop.
+ // TODO(raziel): if (ABSL_PREDICT_FALSE(postamble_start < m_rows))
+ if (postamble_start < m_cols)
+ {
+ col = postamble_start;
+ if ((m_cols - postamble_start) >= (kWeightsPerNeonLane >> 1))
+ {
+ // Load 8 8-bit values from the row and column each to operate on.
+ // Here the assumption is that each buffer is 4-bytes aligned.
+ assert(((uintptr_t)(&row_ptr[col]) & (kWeightsPerUint32 - 1)) == 0);
+ const int8x8_t s1_8x8 = vld1_s8((const int8_t *)(aligned_vec + col));
+ const int8x8_t s2_8x8 = vld1_s8((const int8_t *)(row_ptr + col));
+ const int16x8_t prod_16x8 = vmull_s8(s1_8x8, s2_8x8);
+ dotprod = vpadalq_s16(dotprod, prod_16x8);
+ col += (kWeightsPerNeonLane >> 1);
+ }
+ for (; col < m_cols; ++col)
+ {
+ postable_sum += row_ptr[col] * aligned_vec[col];
+ } // for col
+ }
+ // Add the 4 intermediate sum values to get the final dot-prod value for
+ // this row.
+ int64x2_t pairwiseAdded = vpaddlq_s32(dotprod);
+ int32_t neon_sum = vgetq_lane_s64(pairwiseAdded, 0) + vgetq_lane_s64(pairwiseAdded, 1);
+
+ *result += ((neon_sum + postable_sum) * batch_scaling_factor);
+ } // for row
+ } // for batch
+
+ if (unaligned)
+ {
+ free(aligned_row_free);
+ }
+ free(aligned_vec_free);
+}
+
+void NeonMatrixBatchVectorMultiplyAccumulate(const float *matrix, int m_rows, int m_cols,
+ const float *vector, int n_batch, float *result,
+ int result_stride)
+{
+ // If v_size is not divisible by kWeightsPerNeonLane, we cannot use the main
+ // vectorized loop, and we need to process sequentially. postamble_start shows
+ // the start index where this should happen.
+ const int postamble_start = m_cols - (m_cols & (kFloatWeightsPerNeonLane - 1));
+
+ for (int b = 0; b < n_batch; b++)
+ {
+ float *result_in_batch = result + b * m_rows * result_stride;
+ const float *vector_in_batch = vector + b * m_cols;
+ const float *matrix_row = matrix;
+
+ // Main matrix by vector multiplication loop
+ for (int r = 0; r < m_rows; r++)
+ {
+ float32x4_t acc_32x4 = vmovq_n_f32(0.0);
+ for (int c = 0; c < postamble_start; c += kFloatWeightsPerNeonLane)
+ {
+ // Load 4 float values from vector and matrix row.
+ float32x4_t vector_f32x4 = vld1q_f32(vector_in_batch + c);
+ float32x4_t matrix_f32x4 = vld1q_f32(matrix_row + c);
+ // Multiply the vector and matrix row and add to accumulator.
+ acc_32x4 = vmlaq_f32(acc_32x4, matrix_f32x4, vector_f32x4);
+ }
+ // Add the 4 intermediate sum values to get the final dot-prod value for
+ // this column.
+ *result_in_batch += (vgetq_lane_f32(acc_32x4, 0) + vgetq_lane_f32(acc_32x4, 1) +
+ vgetq_lane_f32(acc_32x4, 2) + vgetq_lane_f32(acc_32x4, 3));
+ for (int c = postamble_start; c < m_cols; c++)
+ {
+ *result_in_batch += matrix_row[c] * vector_in_batch[c];
+ }
+ matrix_row += m_cols;
+ result_in_batch += result_stride;
+ }
+ }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // USE_NEON
+
+#endif // __NNFW_CKER_NEON_TENSOR_UTILS_H__
diff --git a/compute/cker/include/cker/PortableTensorUtils.h b/compute/cker/include/cker/PortableTensorUtils.h
new file mode 100644
index 000000000..87b792e6e
--- /dev/null
+++ b/compute/cker/include/cker/PortableTensorUtils.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_PORTABLE_TENSOR_UTILS_H__
+#define __NNFW_CKER_PORTABLE_TENSOR_UTILS_H__
+
+#include "cker/Types.h"
+#include "cker/neon/neon_check.h"
+
+#include <cstring>
+#include <cmath>
+
+namespace nnfw
+{
+namespace cker
+{
+
+class ActivationFunctor
+{
+public:
+ explicit ActivationFunctor(FusedActivationFunctionType act) : act_(act) {}
+
+ float operator()(float a) const
+ {
+ switch (act_)
+ {
+ case FusedActivationFunctionType::kNone:
+ return a;
+ case FusedActivationFunctionType::kRelu:
+ return a < 0.f ? 0.f : a;
+ case FusedActivationFunctionType::kRelu6:
+ return std::max(0.f, std::min(a, 6.f));
+ default:
+ // TODO(aselle): More informative fatal error!
+ exit(1);
+ }
+ }
+
+private:
+ FusedActivationFunctionType act_;
+};
+
+void PortableVectorBatchVectorAssign(const float *vector, int v_size, int n_batch,
+ float *batch_vector)
+{
+ for (int b = 0; b < n_batch; b++)
+ {
+ memcpy(batch_vector + b * v_size, vector, v_size * sizeof(float));
+ }
+}
+
+bool PortableIsZeroVector(const float *vector, int v_size)
+{
+ for (int i = 0; i < v_size; ++i)
+ {
+ if (*vector++ != 0.0f)
+ return false;
+ }
+ return true;
+}
+
+void PortableApplyActivationToVector(const float *vector, int v_size,
+ FusedActivationFunctionType activation, float *result)
+{
+ auto activation_func = ActivationFunctor(activation);
+ for (int v = 0; v < v_size; v++)
+ {
+ *result++ = (activation_func)(*vector++);
+ }
+}
+
+void PortableSymmetricQuantizeFloats(const float *values, const int size, int8_t *quantized_values,
+ float *min_value, float *max_value, float *scaling_factor)
+{
+ auto minmax = std::minmax_element(values, values + size);
+ *min_value = *minmax.first;
+ *max_value = *minmax.second;
+ const int kScale = 127;
+ const float range = std::max(std::abs(*min_value), std::abs(*max_value));
+ if (range == 0)
+ {
+ memset(quantized_values, 0, size * sizeof(int8_t));
+ *scaling_factor = 1;
+ return;
+ }
+ *scaling_factor = range / kScale;
+ const float scaling_factor_inv = kScale / range;
+ for (int i = 0; i < size; ++i)
+ {
+ const int32_t quantized_value =
+ static_cast<int32_t>(std::round(values[i] * scaling_factor_inv));
+ // Clamp: just in case some odd numeric offset.
+ quantized_values[i] = std::min(kScale, std::max(-kScale, quantized_value));
+ }
+}
+
+void PortableMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix,
+ const int m_rows, const int m_cols,
+ const int8_t *__restrict__ vectors,
+ const float *scaling_factors, int n_batch,
+ float *__restrict__ result, int result_stride)
+{
+ int batch, row, col;
+ for (batch = 0; batch < n_batch; ++batch, vectors += m_cols)
+ {
+ const float batch_scaling_factor = scaling_factors[batch];
+ // Get the address of the first row.
+ const int8_t *row_ptr = matrix;
+ for (row = 0; row < m_rows; ++row, result += result_stride)
+ {
+ // Initialize the dot product sum for the row to 0.
+ int32_t dotprod = 0;
+#if defined(__GNUC__)
+ // Prefetch the row to cache.
+ __builtin_prefetch(row_ptr, 0 /* prefetch for read */, 3 /* temporal locality */);
+#endif
+ for (col = 0; col < m_cols; ++col, ++row_ptr)
+ {
+ dotprod += (*row_ptr) * (vectors[col]);
+ } // for col
+ *result += (dotprod * batch_scaling_factor);
+ } // for row
+ } // for batch
+}
+
+void PortableMatrixBatchVectorMultiplyAccumulate(const float *matrix, int m_rows, int m_cols,
+ const float *vector, int n_batch, float *result,
+ int result_stride)
+{
+ float *result_in_batch = result;
+ for (int b = 0; b < n_batch; b++)
+ {
+ const float *matrix_ptr = matrix;
+ for (int r = 0; r < m_rows; r++)
+ {
+ float dot_prod = 0.0f;
+ const float *vector_in_batch = vector + b * m_cols;
+ for (int c = 0; c < m_cols; c++)
+ {
+ dot_prod += *matrix_ptr++ * *vector_in_batch++;
+ }
+ *result_in_batch += dot_prod;
+ result_in_batch += result_stride;
+ }
+ }
+}
+
+void PortableZeroVector(float *vector, int v_size) { std::fill_n(vector, v_size, 0); }
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_PORTABLE_TENSOR_UTILS_H__
diff --git a/compute/cker/include/cker/Shape.h b/compute/cker/include/cker/Shape.h
index 39449c68f..43b511d05 100644
--- a/compute/cker/include/cker/Shape.h
+++ b/compute/cker/include/cker/Shape.h
@@ -226,6 +226,11 @@ inline int Offset(const Shape &shape, int i0, int i1, int i2, int i3)
return ((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3;
}
+inline int Offset(const Shape &shape, int *index)
+{
+ return Offset(shape, index[0], index[1], index[2], index[3]);
+}
+
inline int FlatSizeSkipDim(const Shape &shape, int skip_dim)
{
const int dims_count = shape.DimensionsCount();
@@ -241,29 +246,35 @@ inline int FlatSizeSkipDim(const Shape &shape, int skip_dim)
// Flat size calculation, checking that dimensions match with one or more other
// arrays.
-inline int MatchingFlatSize(const Shape &shape, const Shape &check_shape_0)
+template <typename... Ts> inline bool checkMatching(const Shape &shape, Ts... check_shapes)
{
- UNUSED_RELEASE(check_shape_0);
- assert(shape.DimensionsCount() == check_shape_0.DimensionsCount());
- const int dims_count = shape.DimensionsCount();
- for (int i = 0; i < dims_count; ++i)
+ const Shape check_shapes_array[sizeof...(Ts)] = {std::forward<Ts>(check_shapes)...};
+ for (const auto &check_shape : check_shapes_array)
{
- assert(shape.Dims(i) == check_shape_0.Dims(i));
+ if (shape.DimensionsCount() != check_shape.DimensionsCount())
+ {
+ return false;
+ }
+ for (int i = 0; i < shape.DimensionsCount(); ++i)
+ {
+ if (shape.Dims(i) != check_shape.Dims(i))
+ {
+ return false;
+ }
+ }
}
- return shape.FlatSize();
+ return true;
}
-inline int MatchingFlatSize(const Shape &shape, const Shape &check_shape_0,
- const Shape &check_shape_1)
+struct UNUSED_ALL
{
- UNUSED_RELEASE(check_shape_0);
- assert(shape.DimensionsCount() == check_shape_0.DimensionsCount());
- const int dims_count = shape.DimensionsCount();
- for (int i = 0; i < dims_count; ++i)
- {
- assert(shape.Dims(i) == check_shape_0.Dims(i));
- }
- return MatchingFlatSize(shape, check_shape_1);
+ template <typename... Args> UNUSED_ALL(Args const &...) {}
+};
+template <typename... Ts> inline int MatchingFlatSize(const Shape &shape, Ts... check_shapes)
+{
+ UNUSED_ALL{check_shapes...};
+ assert(checkMatching(shape, std::forward<Ts>(check_shapes)...));
+ return shape.FlatSize();
}
inline int MatchingFlatSizeSkipDim(const Shape &shape, int skip_dim, const Shape &check_shape_0)
diff --git a/compute/cker/include/cker/TensorUtils.h b/compute/cker/include/cker/TensorUtils.h
new file mode 100644
index 000000000..6c5bacb8e
--- /dev/null
+++ b/compute/cker/include/cker/TensorUtils.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_TENSOR_UTILS_H__
+#define __NNFW_CKER_TENSOR_UTILS_H__
+
+#include "cker/Types.h"
+#include "cker/PortableTensorUtils.h"
+#include "cker/NeonTensorUtils.h"
+#include "cker/neon/neon_check.h"
+
+#include <cstring>
+#include <cmath>
+
+namespace nnfw
+{
+namespace cker
+{
+
+void VectorBatchVectorAssign(const float *vector, int v_size, int n_batch, float *batch_vector)
+{
+ PortableVectorBatchVectorAssign(vector, v_size, n_batch, batch_vector);
+}
+
+bool IsZeroVector(const float *vector, int v_size)
+{
+ return NEON_OR_PORTABLE(IsZeroVector, vector, v_size);
+}
+
+void ApplyActivationToVector(const float *vector, int v_size,
+ FusedActivationFunctionType activation, float *result)
+{
+ PortableApplyActivationToVector(vector, v_size, activation, result);
+}
+
+void SymmetricQuantizeFloats(const float *values, const int size, int8_t *quantized_values,
+ float *min, float *max, float *scaling_factor)
+{
+ return NEON_OR_PORTABLE(SymmetricQuantizeFloats, values, size, quantized_values, min, max,
+ scaling_factor);
+}
+
+void MatrixBatchVectorMultiplyAccumulate(const int8_t *matrix, const int m_rows, const int m_cols,
+ const int8_t *vector, const float *scaling_factors,
+ int n_batch, float *result, int result_stride)
+{
+ NEON_OR_PORTABLE(MatrixBatchVectorMultiplyAccumulate, matrix, m_rows, m_cols, vector,
+ scaling_factors, n_batch, result, result_stride);
+}
+
+void MatrixBatchVectorMultiplyAccumulate(const float *matrix, int m_rows, int m_cols,
+ const float *vector, int n_batch, float *result,
+ int result_stride)
+{
+ NEON_OR_PORTABLE(MatrixBatchVectorMultiplyAccumulate, matrix, m_rows, m_cols, vector, n_batch,
+ result, result_stride);
+}
+
+void ZeroVector(float *vector, int v_size) { PortableZeroVector(vector, v_size); }
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_TENSOR_UTILS_H__
diff --git a/compute/cker/include/cker/Types.h b/compute/cker/include/cker/Types.h
index 85654b040..3c3396abe 100644
--- a/compute/cker/include/cker/Types.h
+++ b/compute/cker/include/cker/Types.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -39,6 +39,24 @@ enum class PaddingType
kValid = 2,
};
+enum class BinaryArithmeticOpType
+{
+ ADD = 0,
+ SUB = 1,
+ MUL = 2,
+ DIV = 3,
+};
+
+enum class ComparisonOpType
+{
+ Equal,
+ NotEqual,
+ Greater,
+ GreaterEqual,
+ Less,
+ LessEqual
+};
+
struct PaddingValues
{
int16_t width;
@@ -76,6 +94,217 @@ struct SoftmaxParams
int diff_min;
};
+struct PackParams
+{
+ int8_t axis;
+ // zeropoint and scale were only used to implement PackWithScaling in the legacy code of
+ // tensorflow
+ // const int32_t* input_zeropoint;
+ // const float* input_scale;
+ uint16_t inputs_count;
+ // int32_t output_zeropoint;
+ // float output_scale;
+};
+
+struct UnpackParams
+{
+ uint16_t num_split;
+ int16_t axis;
+};
+
+struct ConvParams
+{
+ PaddingType padding_type;
+ PaddingValues padding_values;
+ // TODO(starka): This was just "stride", so check that width+height is OK.
+ int16_t stride_width;
+ int16_t stride_height;
+ int16_t dilation_width_factor;
+ int16_t dilation_height_factor;
+ // uint8_t inference params.
+ // TODO(b/65838351): Use smaller types if appropriate.
+ int32_t input_offset;
+ int32_t weights_offset;
+ int32_t output_offset;
+ int32_t output_multiplier;
+ int output_shift;
+ // uint8_t, etc, activation params.
+ int32_t quantized_activation_min;
+ int32_t quantized_activation_max;
+ // float activation params.
+ float float_activation_min;
+ float float_activation_max;
+ bool is_replaced_weights{false};
+};
+
+struct ComparisonParams
+{
+ ComparisonOpType type;
+ bool is_broadcast;
+};
+
+struct BinaryArithmeticOpParam
+{
+ BinaryArithmeticOpType type;
+ // Shape dependent / common to data / op types.
+ // BroadcastableOpCategory broadcast_category;
+ // uint8 inference params.
+ int32_t input1_offset;
+ int32_t input2_offset;
+ int32_t output_offset;
+ int32_t output_multiplier;
+ int32_t output_shift;
+ // Add / Sub, not Mul, uint8 inference params.
+ int32_t left_shift;
+ int32_t input1_multiplier;
+ int32_t input1_shift;
+ int32_t input2_multiplier;
+ int32_t input2_shift;
+ // uint8, etc, activation params.
+ int32_t quantized_activation_min;
+ int32_t quantized_activation_max;
+ // float activation params.
+ float float_activation_min;
+ float float_activation_max;
+
+ // Processed output dimensions.
+ // Let input "a" be the one that broadcasts in the faster-changing dimension.
+ // Then, after coalescing, for shapes {a0, a1, a2, a3, a4} and
+ // {b0, b1, b2, b3, b4},
+ // broadcast_shape[4] = b0 = a0.
+ // broadcast_shape[3] = b1; a1 = 1.
+ // broadcast_shape[2] = b2 = a2.
+ // broadcast_shape[1] = a3; b3 = 1.
+ // broadcast_shape[0] = b4 = a4.
+ // int broadcast_shape[5];
+};
+
+struct TransposeParams
+{
+ int8_t perm_count;
+ int32_t perm[4];
+};
+
+struct ConcatenationParams
+{
+ int8_t axis;
+ const int32_t *input_zeropoint;
+ const float *input_scale;
+ uint16_t inputs_count;
+ int32_t output_zeropoint;
+ float output_scale;
+};
+
+struct DepthwiseConvParams
+{
+ PaddingType padding_type;
+ PaddingValues padding_values;
+ int16_t stride_width;
+ int16_t stride_height;
+ int16_t dilation_width_factor;
+ int16_t dilation_height_factor;
+ int16_t depth_multiplier;
+ // uint8 inference params.
+ // TODO(b/65838351): Use smaller types if appropriate.
+ int32_t input_offset;
+ int32_t weights_offset;
+ int32_t output_offset;
+ int32_t output_multiplier;
+ int output_shift;
+ // uint8, etc, activation params.
+ int32_t quantized_activation_min;
+ int32_t quantized_activation_max;
+ // float activation params.
+ float float_activation_min;
+ float float_activation_max;
+};
+
+struct FullyConnectedParams
+{
+ FusedActivationFunctionType activation{FusedActivationFunctionType::kNone};
+ // uint8 inference params.
+ // TODO(b/65838351): Use smaller types if appropriate.
+ int32_t input_offset;
+ int32_t weights_offset;
+ float weights_scale;
+ int32_t output_offset;
+ int32_t output_multiplier;
+ int output_shift;
+ // uint8, etc, activation params.
+ int32_t quantized_activation_min;
+ int32_t quantized_activation_max;
+ // float activation params.
+ float float_activation_min;
+ float float_activation_max;
+ // FullyConnectedWeightsFormat weights_format;
+};
+
+struct GatherParams
+{
+ int32_t axis;
+};
+
+struct InstanceNormParams
+{
+ float epsilon;
+ float float_activation_min;
+ float float_activation_max;
+};
+
+struct TransposeConvParams
+{
+ PaddingType padding_type;
+ PaddingValues padding_values;
+ // TODO(starka): This was just "stride", so check that width+height is OK.
+ int16_t stride_width;
+ int16_t stride_height;
+ int16_t dilation_width_factor;
+ int16_t dilation_height_factor;
+ // uint8_t inference params.
+ // TODO(b/65838351): Use smaller types if appropriate.
+ int32_t input_offset;
+ int32_t weights_offset;
+ int32_t output_offset;
+ int32_t output_multiplier;
+ int output_shift;
+ // uint8_t, etc, activation params.
+ int32_t quantized_activation_min;
+ int32_t quantized_activation_max;
+ // float activation params.
+ float float_activation_min;
+ float float_activation_max;
+};
+
+struct SliceParams
+{
+ int8_t begin_count;
+ int32_t begin[4];
+ int8_t size_count;
+ int32_t size[4];
+};
+
+struct StridedSliceParams
+{
+ int8_t start_indices_count;
+ int16_t start_indices[4];
+ int8_t stop_indices_count;
+ int16_t stop_indices[4];
+ int8_t strides_count;
+ int16_t strides[4];
+
+ int16_t begin_mask;
+ int16_t ellipsis_mask;
+ int16_t end_mask;
+ int16_t new_axis_mask;
+ int16_t shrink_axis_mask;
+};
+
+struct SplitParams
+{
+ uint16_t num_split;
+ int16_t axis;
+};
+
} // namespace cker
} // namespace nnfw
diff --git a/compute/cker/include/cker/Utils.h b/compute/cker/include/cker/Utils.h
index d1f1723c4..17ef3c374 100644
--- a/compute/cker/include/cker/Utils.h
+++ b/compute/cker/include/cker/Utils.h
@@ -18,11 +18,11 @@
#ifndef __NNFW_CKER_UTILS_H__
#define __NNFW_CKER_UTILS_H__
+#include "Shape.h"
+
#include <algorithm>
#include <cstdint>
-
-#include "cker/gemmlowp/FixedPoint.h"
-#include "Shape.h"
+#include <fixedpoint/fixedpoint.h>
namespace nnfw
{
@@ -153,6 +153,106 @@ NdArrayDescsForElementwiseBroadcast(const Shape &input0_shape, const Shape &inpu
}
}
+// Gets next index to iterate through a multidimensional array.
+inline bool NextIndex(const int num_dims, const int *dims, int *current)
+{
+ if (num_dims == 0)
+ {
+ return false;
+ }
+ assert(dims != nullptr);
+ assert(current != nullptr);
+ int carry = 1;
+ for (int idx = num_dims - 1; idx >= 0; --idx)
+ {
+ int current_val = current[idx] + carry;
+ assert(dims[idx] >= current_val);
+ if (dims[idx] == current_val)
+ {
+ current[idx] = 0;
+ }
+ else
+ {
+ current[idx] = current_val;
+ carry = 0;
+ break;
+ }
+ }
+ return (carry == 0);
+}
+
+// Gets offset of index if reducing on axis. When reducing, the flattened offset
+// will not change, if the input index changes on the given axis. For example,
+// if you have a 3D tensor and you are reducing to 2D by eliminating axis 0,
+// then index (0, 1, 2) and index (1, 1, 2) will map to the same flattened
+// offset.
+// TODO(kanlig): uses Dims to represent dimensions.
+inline size_t ReducedOutputOffset(const int num_dims, const int *dims, const int *index,
+ const int num_axis, const int *axis)
+{
+ if (num_dims == 0)
+ {
+ return 0;
+ }
+
+ assert(dims != nullptr);
+ assert(index != nullptr);
+
+ size_t offset = 0;
+ for (int idx = 0; idx < num_dims; ++idx)
+ {
+ // if we need to skip this axis
+ bool is_axis = false;
+ if (axis != nullptr)
+ {
+ for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
+ {
+ if (idx == axis[axis_idx])
+ {
+ is_axis = true;
+ break;
+ }
+ }
+ }
+ if (!is_axis)
+ {
+ offset = offset * static_cast<size_t>(dims[idx]) + static_cast<size_t>(index[idx]);
+ }
+ }
+ return offset;
+}
+
+template <typename T> void optimized_ops_preload_l1_keep(const T *ptr)
+{
+#ifdef __GNUC__
+ // builtin offered by GCC-compatible compilers including clang
+ __builtin_prefetch(ptr, /* 0 means read */ 0, /* 3 means high locality */ 3);
+#else
+ (void)ptr;
+#endif
+}
+
+// Writes randomly accessed values from `input` sequentially into `output`.
+template <typename T> class SequentialTensorWriter
+{
+public:
+ SequentialTensorWriter(const T *input_data, T *output_data)
+ : input_data_(input_data), output_ptr_(output_data)
+ {
+ }
+
+ void Write(int position) { *output_ptr_++ = input_data_[position]; }
+ void WriteN(int position, int len)
+ {
+ memcpy(output_ptr_, &input_data_[position], sizeof(T) * len);
+ output_ptr_ += len;
+ }
+
+private:
+ const T *input_data_;
+ T *output_ptr_;
+};
+
} // namespace cker
} // namespace nnfw
diff --git a/compute/cker/include/cker/eigen/EigenSupport.h b/compute/cker/include/cker/eigen/EigenSupport.h
new file mode 100644
index 000000000..49c34211a
--- /dev/null
+++ b/compute/cker/include/cker/eigen/EigenSupport.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_EIGEN_EIGEN_SUPPORT_H__
+#define __NNFW_CKER_EIGEN_EIGEN_SUPPORT_H__
+
+//#if defined(CKER_OPTIMIZED_EIGEN)
+
+#include <Eigen/Core>
+#include <thread>
+#include "cker/eigen/eigen_spatial_convolutions.h"
+
+#ifdef EIGEN_USE_THREADS
+#include <unsupported/Eigen/CXX11/ThreadPool>
+#endif
+
+namespace nnfw
+{
+namespace cker
+{
+namespace eigen_support
+{
+
+// Shorthands for the types we need when interfacing with the EigenTensor
+// library.
+typedef Eigen::TensorMap<Eigen::Tensor<float, 2, Eigen::RowMajor, Eigen::DenseIndex>,
+ Eigen::Aligned>
+ EigenMatrix;
+typedef Eigen::TensorMap<Eigen::Tensor<const float, 2, Eigen::RowMajor, Eigen::DenseIndex>,
+ Eigen::Aligned>
+ ConstEigenMatrix;
+
+typedef Eigen::TensorMap<Eigen::Tensor<float, 4, Eigen::RowMajor, Eigen::DenseIndex>,
+ Eigen::Aligned>
+ EigenTensor;
+typedef Eigen::TensorMap<Eigen::Tensor<const float, 4, Eigen::RowMajor, Eigen::DenseIndex>,
+ Eigen::Aligned>
+ ConstEigenTensor;
+
+// Utility functions we need for the EigenTensor API.
+template <typename Device, typename T> struct MatMulConvFunctor
+{
+ // Computes on device "d": out = in0 * in1, where * is matrix
+ // multiplication.
+ void operator()(const Device &d, EigenMatrix out, ConstEigenMatrix in0, ConstEigenMatrix in1,
+ const Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> &dim_pair)
+ {
+ out.device(d) = in0.contract(in1, dim_pair);
+ }
+};
+
+// We have a single global threadpool for all convolution operations. This means
+// that inferences started from different threads may block each other, but
+// since the underlying resource of CPU cores should be consumed by the
+// operations anyway, it shouldn't affect overall performance.
+class EigenThreadPoolWrapper : public Eigen::ThreadPoolInterface
+{
+public:
+ // Takes ownership of 'pool'
+ explicit EigenThreadPoolWrapper(Eigen::ThreadPool *pool) : pool_(pool) {}
+ ~EigenThreadPoolWrapper() override {}
+
+ void Schedule(std::function<void()> fn) override { pool_->Schedule(std::move(fn)); }
+ int NumThreads() const override { return pool_->NumThreads(); }
+ int CurrentThreadId() const override { return pool_->CurrentThreadId(); }
+
+private:
+ std::unique_ptr<Eigen::ThreadPool> pool_;
+};
+
+struct EigenContext
+{
+ constexpr static int default_num_threadpool_threads = 4;
+ std::unique_ptr<Eigen::ThreadPoolInterface> thread_pool_wrapper;
+ std::unique_ptr<Eigen::ThreadPoolDevice> device;
+
+ EigenContext()
+ {
+ int num_threads = std::thread::hardware_concurrency();
+ if (num_threads == 0)
+ {
+ num_threads = default_num_threadpool_threads;
+ }
+ device.reset(); // destroy before we invalidate the thread pool
+ thread_pool_wrapper.reset(new EigenThreadPoolWrapper(new Eigen::ThreadPool(num_threads)));
+ device.reset(new Eigen::ThreadPoolDevice(thread_pool_wrapper.get(), num_threads));
+ }
+
+ static inline EigenContext &GetEigenContext()
+ {
+ static EigenContext instance;
+ return instance;
+ }
+};
+
+inline const Eigen::ThreadPoolDevice *GetThreadPoolDevice()
+{
+ auto &ctx = EigenContext::GetEigenContext();
+ return ctx.device.get();
+}
+
+} // namespace eigen_support
+} // namespace cker
+} // namespace nnfw
+
+//#endif // defined(CKER_OPTIMIZED_EIGEN)
+
+#endif // __NNFW_CKER_EIGEN_EIGEN_SUPPORT_H__
diff --git a/compute/cker/include/cker/eigen/Utils.h b/compute/cker/include/cker/eigen/Utils.h
index 645a61485..f9c706370 100644
--- a/compute/cker/include/cker/eigen/Utils.h
+++ b/compute/cker/include/cker/eigen/Utils.h
@@ -18,8 +18,6 @@
#ifndef __NNFW_CKER_EIGEN_UTILS_H__
#define __NNFW_CKER_EIGEN_UTILS_H__
-#if defined(CKER_OPTIMIZED_EIGEN)
-
#include <Eigen/Core>
#include <type_traits>
#include "cker/Shape.h"
@@ -30,6 +28,25 @@ namespace cker
{
// Make a local VectorMap typedef allowing to map a float array
+// as a Eigen vector expression. The std::conditional here is to
+// construct the suitable Eigen type for the constness of the
+// data. Indeed, for const data, we need to produce
+// Eigen::Map<const Eigen::Matrix<float, ...>>
+// and not the more straightforward
+// Eigen::Map<Eigen::Matrix<const float, ...>>
+template <typename Scalar>
+using VectorMap = typename std::conditional<
+ std::is_const<Scalar>::value,
+ Eigen::Map<const Eigen::Matrix<typename std::remove_const<Scalar>::type, Eigen::Dynamic, 1>>,
+ Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, 1>>>::type;
+
+template <typename Scalar> VectorMap<Scalar> MapAsVector(Scalar *data, const Shape &shape)
+{
+ const int size = shape.FlatSize();
+ return VectorMap<Scalar>(data, size, 1);
+}
+
+// Make a local VectorMap typedef allowing to map a float array
// as a Eigen matrix expression. The same explanation as for VectorMap
// above also applies here.
template <typename Scalar>
@@ -51,6 +68,4 @@ MatrixMap<Scalar> MapAsMatrixWithLastDimAsRows(Scalar *data, const Shape &shape)
} // namespace cker
} // namespace nnfw
-#endif // defined(CKER_OPTIMIZED_EIGEN)
-
#endif // __NNFW_CKER_EIGEN_UTILS_H__
diff --git a/compute/cker/include/cker/eigen/eigen_convolution_helpers.h b/compute/cker/include/cker/eigen/eigen_convolution_helpers.h
new file mode 100644
index 000000000..52bd12219
--- /dev/null
+++ b/compute/cker/include/cker/eigen/eigen_convolution_helpers.h
@@ -0,0 +1,88 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef __NNFW_CKER_EIGEN_EIGEN_CONVOLUTION_HELPERS_H__
+#define __NNFW_CKER_EIGEN_EIGEN_CONVOLUTION_HELPERS_H__
+
+namespace Eigen
+{
+namespace internal
+{
+
+// TensorEvaluatorHasPartialPacket<TensorEvaluatorType, PacketType, IndexType>
+// provides `value` that is true if TensorEvaluatorType has `PacketType
+// partialPacket<PacketType>(IndexType, unpacket_traits<PacketType>::mask_t)
+// const` and if the PacketType supports masked load.
+//
+// Partial packets are used to:
+//
+// 1) Split the packet over two columns in eigen based spatial convolution and
+// use partial loads for each individual part before combining them to get the
+// required packet. This class is used to pick the correct implementation of
+// loadPacketStandard function.
+//
+// 2) Split the packet over two rows (within the same column) in eigen based
+// cuboid convolution and use partial loads for each individual part before
+// combining them to get the required packet. This class is used to pick the
+// correct implementation of loadPacketStandard function. This usage is similar
+// to the usage in eigen based spatial convolution described above.
+//
+// 3) Finalize packing of columns in gemm_pack_colmajor after processing
+// vectorized part with full packets (see eigen_spatial_convolutions.h).
+template <typename TensorEvaluatorType, typename PacketType, typename IndexType>
+class TensorEvaluatorHasPartialPacket
+{
+public:
+ template <typename TensorEvaluatorT, typename PacketT, typename IndexT>
+ static auto functionExistsSfinae(
+ typename std::enable_if<
+ unpacket_traits<PacketT>::masked_load_available &&
+ std::is_same<
+ PacketT,
+ decltype(std::declval<const TensorEvaluatorT>().template partialPacket<PacketT>(
+ std::declval<IndexT>(),
+ std::declval<typename unpacket_traits<PacketT>::mask_t>()))>::value>::type *)
+ -> std::true_type;
+
+ template <typename TensorEvaluatorT, typename PacketT, typename IndexT>
+ static auto functionExistsSfinae(...) -> std::false_type;
+
+ typedef decltype(
+ functionExistsSfinae<TensorEvaluatorType, PacketType, IndexType>(nullptr)) status;
+
+ static const bool value = status::value;
+};
+
+// Compute a mask for loading/storing coefficients in/from a packet in a
+// [from, to) range. If the mask bit is 1, element will be loaded/stored.
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+ typename std::enable_if<unpacket_traits<Packet>::masked_load_available,
+ typename unpacket_traits<Packet>::mask_t>::type
+ mask(int from, int to)
+{
+ const Index packet_size = internal::unpacket_traits<Packet>::size;
+ eigen_assert(0 <= from && to <= (packet_size + 1) && from < to);
+
+ using Mask = typename internal::unpacket_traits<Packet>::mask_t;
+ const Mask mask_max = std::numeric_limits<Mask>::max();
+
+ return (mask_max >> (packet_size - to)) ^ (mask_max >> (packet_size - from));
+}
+
+} // namespace internal
+} // namespace Eigen
+
+#endif // __NNFW_CKER_EIGEN_EIGEN_CONVOLUTION_HELPERS_H__
diff --git a/compute/cker/include/cker/eigen/eigen_spatial_convolutions-inl.h b/compute/cker/include/cker/eigen/eigen_spatial_convolutions-inl.h
new file mode 100644
index 000000000..6536c6f3e
--- /dev/null
+++ b/compute/cker/include/cker/eigen/eigen_spatial_convolutions-inl.h
@@ -0,0 +1,1754 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef __NNFW_CKER_EIGEN_EIGEN_SPATIAL_CONVOLUTIONS_INL_H__
+#define __NNFW_CKER_EIGEN_EIGEN_SPATIAL_CONVOLUTIONS_INL_H__
+
+#include "cker/eigen/eigen_convolution_helpers.h"
+
+// Note this header is used in both TF and TFLite.
+namespace Eigen
+{
+
+namespace internal
+{
+
+// WARNING: Most of the code here implicitly assumes that the matrix is in
+// ColMajor layout. This is guaranteed by the tensor contraction (see
+// TensorContraction.h).
+//
+// Inside Eigen a tensor contraction is represented by a matrix multiplication.
+// We don't want to actually extract image patches and reshape the result into
+// a matrix (this involves allocating huge extra memory), so the patch
+// extraction and reshape operations are implicit.
+//
+// TensorContractionInputMapper takes a matrix index and returns the coefficient
+// (or the packet) of the "virtual tensor", that would be at that index if we
+// were to actually reshape the result of patch extraction.
+//
+// TensorContractionSubMapper provides a similar view into the "virtual matrix"
+// at the given vertical and horizontal offsets.
+//
+// "Virtual matrix" dimensions:
+// *0: kernelChannels * kernelRows * kernelCols;
+// 1: out_height * out_width; * OTHERS (e.g batches, etc...)
+//
+// *) extracted patches are continuous in memory (innermost dimension assuming
+// col major layout)
+//
+// With this dimensions:
+// row - offset within a single patch (in code: patchId)
+// col - index of the extracted patch (in code: patchIndex)
+// patchIndex ∈ [0..num_patches * OTHERS] (batch and other dimensions)
+//
+// TODO(ezhulenev): Consolidate this part of the code with the image patch
+// extraction code since they are both very similar.
+
+template <typename NewDimension, Index Rows, Index Cols, typename ArgType, typename Device,
+ typename Scalar_, typename Index, typename nocontract_t, typename contract_t, int Side,
+ int packet_size, bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
+class TensorContractionInputMapper<
+ Scalar_, Index, Side,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
+ Device>,
+ nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+{
+public:
+ typedef Scalar_ Scalar;
+
+ typedef TensorContractionInputMapper<
+ Scalar, Index, Side,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
+ Device>,
+ nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+ Self;
+
+ typedef TensorContractionSubMapper<
+ Scalar, Index, Side,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
+ Device>,
+ nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+ SubMapper;
+
+ typedef SubMapper VectorMapper;
+ typedef SubMapper LinearMapper;
+ typedef typename packet_traits<Scalar>::type Packet;
+
+ typedef TensorEvaluator<ArgType, Device> TensorEvaluatorT;
+
+ EIGEN_DEVICE_FUNC
+ TensorContractionInputMapper(
+ const TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
+ Device> &tensor,
+ const nocontract_t &, const nocontract_t &, const contract_t &, const contract_t &)
+ : m_impl(tensor.impl().impl())
+ {
+ Index patch_rows;
+ Index patch_depth;
+ if (internal::traits<ArgType>::Layout == ColMajor)
+ {
+ patch_depth = tensor.impl().dimensions()[0];
+ patch_rows = tensor.impl().dimensions()[1];
+ m_patch_cols = tensor.impl().dimensions()[2];
+ m_num_patches = tensor.impl().dimensions()[3];
+ }
+ else
+ {
+ const size_t NumDims = tensor.impl().dimensions().size();
+ patch_depth = tensor.impl().dimensions()[NumDims - 1];
+ patch_rows = tensor.impl().dimensions()[NumDims - 2];
+ m_patch_cols = tensor.impl().dimensions()[NumDims - 3];
+ m_num_patches = tensor.impl().dimensions()[NumDims - 4];
+ }
+
+ // Strides for navigating through the single patch.
+ m_patch_row_stride = patch_depth;
+ m_patch_col_stride = patch_rows * m_patch_row_stride;
+
+ m_patch_row_inflate_strides = tensor.impl().rowInflateStride();
+ m_patch_col_inflate_strides = tensor.impl().colInflateStride();
+
+ m_colStride = patch_rows;
+
+ m_outputRows = tensor.impl().outputRows();
+ m_row_strides = tensor.impl().userRowStride();
+ m_col_strides = tensor.impl().userColStride();
+
+ m_in_row_strides = tensor.impl().userInRowStride();
+ m_in_col_strides = tensor.impl().userInColStride();
+
+ if (internal::traits<ArgType>::Layout == ColMajor)
+ {
+ m_inputRows = tensor.impl().impl().dimensions()[1];
+ m_inputCols = tensor.impl().impl().dimensions()[2];
+ }
+ else
+ {
+ const int NumDims = tensor.impl().impl().dimensions().size();
+ m_inputRows = tensor.impl().impl().dimensions()[NumDims - 2];
+ m_inputCols = tensor.impl().impl().dimensions()[NumDims - 3];
+ }
+
+ m_rowInputStride = patch_depth;
+ m_colInputStride = patch_depth * m_inputRows;
+ m_patchInputStride = patch_depth * m_inputRows * m_inputCols;
+
+ m_rowPaddingTop = tensor.impl().rowPaddingTop();
+ m_colPaddingLeft = tensor.impl().colPaddingLeft();
+
+ m_fastPatchRowStride = internal::TensorIntDivisor<Index>(m_patch_row_stride);
+ m_fastPatchColStride = internal::TensorIntDivisor<Index>(m_patch_col_stride);
+ m_fastInputRowStride = internal::TensorIntDivisor<Index>(m_patch_row_inflate_strides);
+ m_fastInputColStride = internal::TensorIntDivisor<Index>(m_patch_col_inflate_strides);
+ m_fastNumPatches = internal::TensorIntDivisor<Index>(m_num_patches);
+ m_fastColStride = internal::TensorIntDivisor<Index>(m_colStride);
+ m_fastOutputRows = internal::TensorIntDivisor<Index>(m_outputRows);
+ m_fastDimZero = internal::TensorIntDivisor<Index>(patch_depth);
+ }
+
+ EIGEN_DEVICE_FUNC
+ TensorContractionInputMapper(const TensorContractionInputMapper &base_mapper)
+ : m_impl(base_mapper.m_impl)
+ {
+ m_patch_cols = base_mapper.m_patch_cols;
+ m_num_patches = base_mapper.m_num_patches;
+
+ m_patch_row_stride = base_mapper.m_patch_row_stride;
+ m_patch_col_stride = base_mapper.m_patch_col_stride;
+
+ m_patch_row_inflate_strides = base_mapper.m_patch_row_inflate_strides;
+ m_patch_col_inflate_strides = base_mapper.m_patch_col_inflate_strides;
+
+ m_colStride = base_mapper.m_colStride;
+
+ m_rowInputStride = base_mapper.m_rowInputStride;
+ m_colInputStride = base_mapper.m_colInputStride;
+ m_patchInputStride = base_mapper.m_patchInputStride;
+
+ m_inputRows = base_mapper.m_inputRows;
+ m_inputCols = base_mapper.m_inputCols;
+
+ m_outputRows = base_mapper.m_outputRows;
+ m_row_strides = base_mapper.m_row_strides;
+ m_col_strides = base_mapper.m_col_strides;
+
+ m_in_row_strides = base_mapper.m_in_row_strides;
+ m_in_col_strides = base_mapper.m_in_col_strides;
+
+ m_rowPaddingTop = base_mapper.m_rowPaddingTop;
+ m_colPaddingLeft = base_mapper.m_colPaddingLeft;
+
+ m_fastPatchRowStride = base_mapper.m_fastPatchRowStride;
+ m_fastPatchColStride = base_mapper.m_fastPatchColStride;
+ m_fastInputRowStride = base_mapper.m_fastInputRowStride;
+ m_fastInputColStride = base_mapper.m_fastInputColStride;
+ m_fastNumPatches = base_mapper.m_fastNumPatches;
+ m_fastColStride = base_mapper.m_fastColStride;
+ m_fastOutputRows = base_mapper.m_fastOutputRows;
+ m_fastDimZero = base_mapper.m_fastDimZero;
+ }
+
+ // If true, turns off some optimizations for loading packets since the image
+ // patches are "non-standard" such as there are non-trivial strides or
+ // inflations in the input.
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE bool nonStandardPatches() const
+ {
+ return m_in_row_strides != 1 || m_in_col_strides != 1 || m_patch_row_inflate_strides != 1 ||
+ m_patch_col_inflate_strides != 1;
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const
+ {
+ return SubMapper(*this, i, j);
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE LinearMapper getLinearMapper(Index i, Index j) const
+ {
+ return LinearMapper(*this, i, j);
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Scalar operator()(Index row) const
+ {
+ Index rowIndex, colIndex, otherIndex;
+ computeBaseIndices(0, rowIndex, colIndex, otherIndex);
+ return loadCoeff(row, rowIndex, colIndex, otherIndex);
+ }
+
+ // Load the coefficient at the patchIndex location instead of the usual
+ // m_rowIndex,
+ // m_colIndex, m_otherIndex. This is currently only used by the gpu code.
+ // EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar operator()(Index row, Index patchIndex) const
+ {
+ Index rowIndex, colIndex, otherIndex;
+ computeBaseIndices(patchIndex, rowIndex, colIndex, otherIndex);
+ return loadCoeff(row, rowIndex, colIndex, otherIndex);
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Packet loadPacket(Index row) const
+ {
+ Index rowIndex, colIndex, otherIndex;
+ computeBaseIndices(0, rowIndex, colIndex, otherIndex);
+ return loadPacket(row, rowIndex, colIndex, otherIndex);
+ }
+
+ // Load the packet at the patchIndex location instead of the usual m_rowIndex,
+ // m_colIndex, m_otherIndex. This is currently only used by the gpu code.
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Packet loadPacket(Index row, Index patchIndex) const
+ {
+ Index rowIndex, colIndex, otherIndex;
+ computeBaseIndices(patchIndex, rowIndex, colIndex, otherIndex);
+ return loadPacket(row, rowIndex, colIndex, otherIndex);
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE const TensorEvaluator<ArgType, Device> &impl() const { return m_impl; }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Index patchDepth() const { return m_rowInputStride; }
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Index patchRows() const { return m_colStride; }
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Index patchCols() const { return m_patch_cols; }
+
+private:
+ friend class TensorContractionSubMapper<
+ Scalar, Index, Side,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
+ Device>,
+ nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>;
+
+ // Load coefficient from a patch specified by the "within patch offset"
+ // (patchId) and the precomputed indices of the first element of the patch.
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar loadCoeff(Index patchId, Index rowIndex, Index colIndex,
+ Index otherIndex) const
+ {
+ // Find the offset of the element wrt the location of the first element.
+ const Index patchOffset = patchId / m_fastDimZero;
+
+ const Index colOffset = patchOffset / m_fastColStride;
+ const Index inputCol = colIndex + colOffset * m_in_col_strides;
+ const Index origInputCol = (m_patch_col_inflate_strides == 1)
+ ? inputCol
+ : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0);
+
+ const Index rowOffset = patchOffset - colOffset * m_colStride;
+ const Index inputRow = rowIndex + rowOffset * m_in_row_strides;
+ const Index origInputRow = (m_patch_row_inflate_strides == 1)
+ ? inputRow
+ : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0);
+ if (origInputCol < 0 || origInputRow < 0 || origInputCol >= m_inputCols ||
+ origInputRow >= m_inputRows || (inputCol != origInputCol * m_patch_col_inflate_strides) ||
+ (inputRow != origInputRow * m_patch_row_inflate_strides))
+ {
+ return Scalar(0);
+ }
+ const Index depth = patchId - patchOffset * patchDepth();
+ const Index inputIndex =
+ depth + origInputRow * m_rowInputStride + origInputCol * m_colInputStride + otherIndex;
+ return m_impl.coeff(inputIndex);
+ }
+
+ // This is the same as loadCoeff(...), but optimized for all `inflate_strides`
+ // and `in_strides` equal to 1 (template specialization without templates).
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar loadCoeffStandard(Index patchId, Index rowIndex, Index colIndex,
+ Index otherIndex) const
+ {
+ eigen_assert(!nonStandardPatches());
+
+ // Find the offset of the element wrt the location of the first element.
+ const Index patchOffset = patchId / m_fastDimZero;
+ const Index colOffset = patchOffset / m_fastColStride;
+ const Index rowOffset = patchOffset - colOffset * m_colStride;
+ const Index inputCol = colIndex + colOffset;
+ const Index inputRow = rowIndex + rowOffset;
+ if (inputCol < 0 || inputCol >= m_inputCols || inputRow < 0 || inputRow >= m_inputRows)
+ {
+ return Scalar(0);
+ }
+ const Index depth = patchId - patchOffset * patchDepth();
+ const Index inputIndex =
+ depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex;
+ return m_impl.coeff(inputIndex);
+ }
+
+ // Load packet from a patch specified by the "within patch offset"
+ // (patchId) and the precomputed indices of the first element of the patch.
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Packet loadPacket(Index patchId, Index rowIndex, Index colIndex,
+ Index otherIndex) const
+ {
+ const Index packetSize = internal::unpacket_traits<Packet>::size;
+ EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
+ eigen_assert(patchId < patchDepth() * patchRows() * m_patch_cols);
+
+ if (nonStandardPatches())
+ {
+ return packetWithPossibleZero(patchId, rowIndex, colIndex, otherIndex);
+ }
+ typedef decltype(m_impl) TensorEvaluatorT;
+ return loadPacketStandard<Packet, TensorEvaluatorT>(patchId, rowIndex, colIndex, otherIndex);
+ }
+
+ // Helper function to load a 'partial' packet - this is the single column
+ // part of a packet that is split across two columns. In the 'partial' packet,
+ // the elements corresponding to the column (specified through colOffset) are
+ // loaded and the rest of the elements are zero-filled into the 'partial'
+ // packet. This function is called from loadPacketStandardFromTwoColumns().
+ // This code path is exercied only when the packet type supports masked load
+ // and when the partial packet load is available in the TensorEvaluator.
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Packet loadPartialPacketStandard(Index rowIndex, Index colIndex,
+ Index otherIndex, Index patchId,
+ const Index span[],
+ const Index patchOffsets[],
+ Index colOffset) const
+ {
+ const Index inputCol = colIndex + colOffset;
+ const Index rowOffsets[2] = {patchOffsets[0] - colOffset * m_colStride,
+ patchOffsets[1] - colOffset * m_colStride};
+ const Index inputRows[2] = {rowIndex + rowOffsets[0], rowIndex + rowOffsets[1]};
+
+ if (inputRows[0] >= m_inputRows || inputRows[1] < 0 || inputCol >= m_inputCols || inputCol < 0)
+ {
+ // Partial packet is all zeros
+ return internal::pset1<Packet>(Scalar(0));
+ }
+ else if (inputRows[0] >= 0 && inputRows[1] < m_inputRows)
+ {
+ // From inputIndex-span[0], we need to load elements starting from index
+ // span[0] all the way upto (and including) span[1].
+ const Index depth = patchId - patchOffsets[0] * patchDepth();
+ const Index inputIndex =
+ depth + inputRows[0] * m_rowInputStride + inputCol * m_colInputStride + otherIndex;
+ return m_impl.template partialPacket<Packet>(inputIndex - span[0],
+ mask<Packet>(span[0], span[1] + 1));
+ }
+ else
+ {
+ // Using slow path for this partial packet.
+ // We need to load elements starting from index span[0] all the way upto
+ // (and including) span[1]. We split this load into 3 parts:
+ // 0 : span[0]-1 - Zeros will be loaded for these indices
+ // span[0] : span[1] - Elements will be loaded here for these indices
+ // span[1]+1 : packetSize-1 - Zeross will be loaded for these indices
+ const Index packetSize = internal::unpacket_traits<Packet>::size;
+ EIGEN_ALIGN_MAX
+ typename internal::remove_const<Scalar>::type values[packetSize];
+ for (int i = 0; i < span[0]; ++i)
+ values[i] = Scalar(0);
+ for (int i = span[0]; i < span[1] + 1; ++i)
+ values[i] = loadCoeff(patchId - span[0] + i, rowIndex, colIndex, otherIndex);
+ for (int i = span[1] + 1; i < packetSize; ++i)
+ values[i] = Scalar(0);
+ return internal::pload<Packet>(values);
+ }
+ }
+
+ // Helper function to load a packet that is split across two columns.
+ // If required, this function is called from loadPacketStandard() when the
+ // packet type supports masked load and when the partial packet load is
+ // available in the TensorEvaluator.
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Packet loadPacketStandardFromTwoColumns(Index patchId, Index rowIndex,
+ Index colIndex, Index otherIndex,
+ const Index patchOffsets[],
+ const Index colOffsets[]) const
+ {
+ eigen_assert(colOffsets[1] == colOffsets[0] + 1);
+ const Index packetSize = internal::unpacket_traits<Packet>::size;
+
+ // Packet to load will be split into 2 parts where each part spans a single
+ // column. First determine where to split.
+ const Index patchIdSplit = ((colOffsets[1] * m_colStride) * m_rowInputStride) - 1;
+ const Index patchOffsetSplit = patchIdSplit / m_fastDimZero;
+
+ // patchIds[i]: patchId corresponding to partial packet i
+ // spans[i]: Start and end indices corresponding to the elements
+ // to be loaded for partial packet i
+ // patchOffsets2Cols[i]: patchOffsets corresponding to partial packet i
+ const Index patchIds[2] = {patchId, patchIdSplit + 1};
+ const Index spans[2][2] = {{0, patchIdSplit - patchId},
+ {patchIdSplit - patchId + 1, packetSize - 1}};
+ const Index patchOffsets2Cols[2][2] = {{patchOffsets[0], patchOffsetSplit},
+ {patchOffsetSplit + 1, patchOffsets[1]}};
+
+ // Load partial packets and do bit-wise OR to generate required packet
+ return internal::por<Packet>(
+ loadPartialPacketStandard(rowIndex, colIndex, otherIndex, patchIds[0], spans[0],
+ patchOffsets2Cols[0], colOffsets[0]),
+ loadPartialPacketStandard(rowIndex, colIndex, otherIndex, patchIds[1], spans[1],
+ patchOffsets2Cols[1], colOffsets[1]));
+ }
+
+ // Helper function to load a packet that is present in a single columns.
+ // If required, this function is called from loadPacketStandard().
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Packet loadPacketStandardFromSingleColumn(Index patchId, Index rowIndex,
+ Index colIndex, Index otherIndex,
+ const Index patchOffsets[],
+ const Index colOffsets[],
+ const Index inputCols[]) const
+ {
+ eigen_assert(colOffsets[0] == colOffsets[1]);
+ const Index rowOffsets[2] = {patchOffsets[0] - colOffsets[0] * m_colStride,
+ patchOffsets[1] - colOffsets[1] * m_colStride};
+ eigen_assert(rowOffsets[0] <= rowOffsets[1]);
+ const Index inputRows[2] = {rowIndex + rowOffsets[0], rowIndex + rowOffsets[1]};
+
+ if (inputRows[0] >= m_inputRows || inputRows[1] < 0)
+ {
+ // all zeros
+ return internal::pset1<Packet>(Scalar(0)); // all zeros
+ }
+
+ if (inputRows[0] >= 0 && inputRows[1] < m_inputRows)
+ {
+ // no padding
+ const Index depth = patchId - patchOffsets[0] * patchDepth();
+ const Index inputIndex =
+ depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex;
+ return m_impl.template packet<Unaligned>(inputIndex);
+ }
+ return packetWithPossibleZero(patchId, rowIndex, colIndex, otherIndex);
+ }
+
+ // Load standard packet from a patch specified by the "within patch offset"
+ // (patchId) and the precomputed indices of the first element of the patch.
+ // This function will be called if partial packet loading is not available
+ // for the TesnorEvaluator or if the packet type does not support masked
+ // load.
+ template <typename PacketT, typename TensorEvaluatorT>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if<
+ !TensorEvaluatorHasPartialPacket<TensorEvaluatorT, PacketT, Index>::value, PacketT>::type
+ loadPacketStandard(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const
+ {
+ const Index packetSize = internal::unpacket_traits<Packet>::size;
+ EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
+ eigen_assert(patchId < patchDepth() * patchRows() * m_patch_cols);
+
+ eigen_assert(!nonStandardPatches());
+
+ if ((patchDepth() % packetSize) == 0)
+ {
+ return loadPacketFast(patchId, rowIndex, colIndex, otherIndex);
+ }
+
+ // Offsets and input calculation here are identical to
+ // loadCoeffStandard(...), but repeated twice.
+ const Index patchOffsets[2] = {patchId / m_fastDimZero,
+ (patchId + packetSize - 1) / m_fastDimZero};
+ const Index colOffsets[2] = {patchOffsets[0] / m_fastColStride,
+ patchOffsets[1] / m_fastColStride};
+ const Index inputCols[2] = {colIndex + colOffsets[0], colIndex + colOffsets[1]};
+
+ if (inputCols[0] >= m_inputCols || inputCols[1] < 0)
+ {
+ // all zeros
+ return internal::pset1<Packet>(Scalar(0));
+ }
+ if (inputCols[0] == inputCols[1])
+ {
+ return loadPacketStandardFromSingleColumn(patchId, rowIndex, colIndex, otherIndex,
+ patchOffsets, colOffsets, inputCols);
+ }
+ return packetWithPossibleZero(patchId, rowIndex, colIndex, otherIndex);
+ }
+
+ // Load standard packet from a patch specified by the "within patch offset"
+ // (patchId) and the precomputed indices of the first element of the patch.
+ // This function will be called if partial packet loading is available for
+ // the TesnorEvaluator and if the packet type supports masked load.
+ // The only difference between this and the other case is that if the packet
+ // to load is split across two columns, then in this case instead of going to
+ // the slow (element-by-element) load, we load two packets - each containing
+ // elements from one of the columns (rest of the elements of the packets are
+ // zeroes), and then combine these two packets to generate the required
+ // packet. The idea is to enable fast load (if possible) of these 'partial'
+ // packets.
+ template <typename PacketT, typename TensorEvaluatorT>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if<
+ TensorEvaluatorHasPartialPacket<TensorEvaluatorT, PacketT, Index>::value, PacketT>::type
+ loadPacketStandard(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const
+ {
+ const Index packetSize = internal::unpacket_traits<PacketT>::size;
+ EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
+ eigen_assert(patchId < patchDepth() * patchRows() * m_patch_cols);
+
+ eigen_assert(!nonStandardPatches());
+
+ if ((patchDepth() % packetSize) == 0)
+ {
+ return loadPacketFast(patchId, rowIndex, colIndex, otherIndex);
+ }
+
+ // Offsets and input calculation here are identical to
+ // loadCoeffStandard(...), but repeated twice.
+ const Index patchOffsets[2] = {patchId / m_fastDimZero,
+ (patchId + packetSize - 1) / m_fastDimZero};
+ const Index colOffsets[2] = {patchOffsets[0] / m_fastColStride,
+ patchOffsets[1] / m_fastColStride};
+ const Index inputCols[2] = {colIndex + colOffsets[0], colIndex + colOffsets[1]};
+
+ if (inputCols[0] >= m_inputCols || inputCols[1] < 0)
+ {
+ // all zeros
+ return internal::pset1<PacketT>(Scalar(0));
+ }
+ if (inputCols[0] == inputCols[1])
+ {
+ return loadPacketStandardFromSingleColumn(patchId, rowIndex, colIndex, otherIndex,
+ patchOffsets, colOffsets, inputCols);
+ }
+ if (inputCols[1] == inputCols[0] + 1)
+ {
+ return loadPacketStandardFromTwoColumns(patchId, rowIndex, colIndex, otherIndex, patchOffsets,
+ colOffsets);
+ }
+ return packetWithPossibleZero(patchId, rowIndex, colIndex, otherIndex);
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Packet loadPacketFast(Index patchId, Index rowIndex, Index colIndex,
+ Index otherIndex) const
+ {
+ const Index packetSize = internal::unpacket_traits<Packet>::size;
+ EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
+ eigen_assert(patchId < patchDepth() * patchRows() * m_patch_cols);
+
+ eigen_assert(!nonStandardPatches());
+ eigen_assert((patchDepth() % packetSize) == 0);
+ // Find the offset of the element wrt the location of the first element.
+ const Index patchOffset = patchId / m_fastDimZero;
+ eigen_assert((patchId + packetSize - 1) / m_fastDimZero == patchOffset);
+
+ const Index colOffset = patchOffset / m_fastColStride;
+ const Index rowOffset = patchOffset - colOffset * m_colStride;
+ const Index inputCol = colIndex + colOffset;
+ const Index inputRow = rowIndex + rowOffset;
+ if (inputCol < 0 || inputRow < 0 || inputCol >= m_inputCols || inputRow >= m_inputRows)
+ {
+ // all zeros
+ return internal::pset1<Packet>(Scalar(0));
+ }
+ // no padding
+ const Index depth = patchId - patchOffset * patchDepth();
+ const Index inputIndex =
+ depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex;
+ return m_impl.template packet<Unaligned>(inputIndex);
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet packetWithPossibleZero(Index patchId, Index rowIndex,
+ Index colIndex,
+ Index otherIndex) const
+ {
+ const int packetSize = internal::unpacket_traits<Packet>::size;
+ EIGEN_ALIGN_MAX
+ typename internal::remove_const<Scalar>::type values[packetSize];
+ for (int i = 0; i < packetSize; ++i)
+ {
+ values[i] = loadCoeff(patchId + i, rowIndex, colIndex, otherIndex);
+ }
+ Packet rslt = internal::pload<Packet>(values);
+ return rslt;
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
+ computeBaseIndices(Index patchIndex, Index &rowIndex, Index &colIndex, Index &otherIndex) const
+ {
+ const size_t NumInputDims =
+ array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
+ otherIndex = (NumInputDims == 3) ? 0 : patchIndex / m_fastNumPatches;
+ const Index patch2DIndex =
+ (NumInputDims == 3) ? patchIndex : (patchIndex - otherIndex * m_num_patches);
+ otherIndex *= m_patchInputStride;
+ colIndex = patch2DIndex / m_fastOutputRows;
+ rowIndex = patch2DIndex - colIndex * m_outputRows;
+ colIndex = colIndex * m_col_strides - m_colPaddingLeft;
+ rowIndex = rowIndex * m_row_strides - m_rowPaddingTop;
+ }
+
+ Index m_patch_cols; // number of columns in the patch
+ Index m_num_patches; // number of patches to extract.
+
+ // Strides for navigating through the single patch.
+ Index m_patch_row_stride;
+ Index m_patch_col_stride;
+ internal::TensorIntDivisor<Index> m_fastPatchRowStride;
+ internal::TensorIntDivisor<Index> m_fastPatchColStride;
+
+ Index m_patch_row_inflate_strides; // the strides for row inflation in the
+ // image patch
+ Index m_patch_col_inflate_strides; // the strides for col inflation in the
+ // image patch
+ // Fast representation of inflation strides.
+ internal::TensorIntDivisor<Index> m_fastInputRowStride;
+ internal::TensorIntDivisor<Index> m_fastInputColStride;
+
+ Index m_otherStride;
+ Index m_colStride;
+ internal::TensorIntDivisor<Index> m_fastNumPatches;
+ internal::TensorIntDivisor<Index> m_fastColStride;
+
+ Index m_rowInputStride; // row stride in the input tensor
+ Index m_colInputStride; // col stride in the input tensor
+ Index m_patchInputStride; // patch stride in the input tensor
+
+ Index m_inputRows; // Number of rows in the input tensor
+ Index m_inputCols; // Number of cols in the input tensor
+
+ Index m_outputRows; // Number of patch rows
+
+ Index m_row_strides; // User specified row stride
+ Index m_col_strides; // User specified col stride
+
+ Index m_in_row_strides; // User specified input row stride
+ Index m_in_col_strides; // User specified input col stride
+
+ Index m_rowPaddingTop; // Row padding
+ Index m_colPaddingLeft; // Column padding
+
+ internal::TensorIntDivisor<Index> m_fastOutputRows;
+ internal::TensorIntDivisor<Index> m_fastDimZero;
+
+ const TensorEvaluator<ArgType, Device> m_impl;
+};
+
+template <typename NewDimension, Index Rows, Index Cols, typename ArgType, typename Device,
+ typename Scalar, typename Index, typename nocontract_t, typename contract_t, int Side,
+ int packet_size, bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
+class TensorContractionSubMapper<
+ Scalar, Index, Side,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
+ Device>,
+ nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+{
+public:
+ typedef typename packet_traits<Scalar>::type Packet;
+ typedef typename packet_traits<Scalar>::half HalfPacket;
+
+ typedef TensorContractionInputMapper<
+ Scalar, Index, Side,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
+ Device>,
+ nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+ ParentMapper;
+
+ typedef TensorContractionSubMapper<
+ Scalar, Index, Side,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
+ Device>,
+ nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+ Self;
+
+ typedef Self LinearMapper;
+
+ typedef typename ParentMapper::TensorEvaluatorT TensorEvaluatorT;
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionSubMapper(const ParentMapper &base_mapper,
+ Index vert_offset,
+ Index horiz_offset)
+ : m_depth_offset(vert_offset), m_col_offset(horiz_offset), m_base_mapper(base_mapper)
+ {
+ m_base_mapper.computeBaseIndices(m_col_offset, m_rowIndex, m_colIndex, m_otherIndex);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionSubMapper(const Self &base_mapper,
+ Index vert_offset,
+ Index horiz_offset)
+ : m_depth_offset(vert_offset + base_mapper.m_depth_offset),
+ m_col_offset(horiz_offset + base_mapper.m_col_offset),
+ m_base_mapper(base_mapper.m_base_mapper)
+ {
+ m_base_mapper.computeBaseIndices(m_col_offset, m_rowIndex, m_colIndex, m_otherIndex);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const
+ {
+ return m_base_mapper.loadCoeff(i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const
+ {
+ return m_base_mapper(i + m_depth_offset, j + m_col_offset);
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const
+ {
+ return m_base_mapper.loadPacket(i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const
+ {
+ return m_base_mapper.template loadPacket<Alignment>(i + m_depth_offset, j + m_col_offset);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar loadCoeffStandard(Index i) const
+ {
+ return m_base_mapper.loadCoeffStandard(i + m_depth_offset, m_rowIndex, m_colIndex,
+ m_otherIndex);
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacketFast(Index i) const
+ {
+ return m_base_mapper.loadPacketFast(i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacketStandard(Index i) const
+ {
+ typedef decltype(m_base_mapper.m_impl) TensorEvaluatorT;
+ return m_base_mapper.template loadPacketStandard<Packet, TensorEvaluatorT>(
+ i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex);
+ }
+ template <typename Packet> EIGEN_DEVICE_FUNC bool aligned(Index) const { return false; }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE bool nonStandardPatches() const { return m_base_mapper.nonStandardPatches(); }
+
+ // Max(Col|Row|Depth): compute the upper limit for the column, row and depth
+ // index respectively that fits into the peeled_k elements starting at
+ // m_depth_offset.
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Index maxCol(const Index peeled_k) const
+ {
+ const Index max_col =
+ (m_depth_offset + (peeled_k == 0 ? 0 : peeled_k - 1)) / fastPatchColStride();
+ return std::min<Index>(1 + max_col, patchCols());
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Index maxRow(const Index peeled_k, const Index col) const
+ {
+ const Index max_row =
+ (m_depth_offset + (peeled_k == 0 ? 0 : peeled_k - 1) - col * patchColStride()) /
+ fastPatchRowStride();
+ return std::min<Index>(1 + max_row, patchRows());
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Index maxDepth(const Index peeled_k, const Index col, Index row) const
+ {
+ const Index max_depth = m_depth_offset + peeled_k - //
+ col * patchColStride() - //
+ row * patchRowStride();
+ return std::min<Index>(max_depth, patchDepth());
+ }
+
+ // MaxDepth uses only the remaining number of elements in the peeled_k.
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Index maxDepth(const Index num_elements, const Index start_depth) const
+ {
+ return std::min<Index>(start_depth + num_elements, patchDepth());
+ }
+
+ // Every register matters in this code, so sometimes to prevent register
+ // spilling, instead of the variable that you would expect to see, we use
+ // another one, that is guaranteed to have the same value. E.g. patch depth is
+ // always the same as input depth, and it's also the same as input row stride.
+ // Bunch of other parameters have similar relations.
+
+ typedef internal::TensorIntDivisor<Index> IndexDivisor;
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Index patchDepth() const { return m_base_mapper.m_rowInputStride; }
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Index patchRows() const { return m_base_mapper.m_colStride; }
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Index patchCols() const { return m_base_mapper.m_patch_cols; }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Index patchRowStride() const
+ {
+ eigen_assert(patchDepth() == m_base_mapper.m_patch_row_stride &&
+ "Patch depth must be equal to patch row stride.");
+ return patchDepth();
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Index patchColStride() const { return m_base_mapper.m_patch_col_stride; }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE IndexDivisor fastPatchRowStride() const
+ {
+ eigen_assert(patchDepth() == m_base_mapper.m_patch_row_stride &&
+ "Patch depth must be equal to patch row stride.");
+ return m_base_mapper.m_fastDimZero; // patch_depth
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE IndexDivisor fastPatchColStride() const
+ {
+ return m_base_mapper.m_fastPatchColStride;
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Packet packetNoPadding(const Index depth, const Index baseIndex) const
+ {
+ const Index inputIndex = depth + baseIndex;
+ return m_base_mapper.m_impl.template packet<Unaligned>(inputIndex);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Scalar coeffNoPadding(const Index depth, const Index baseIndex) const
+ {
+ const Index inputIndex = depth + baseIndex;
+ return m_base_mapper.m_impl.coeff(inputIndex);
+ }
+ template <typename PacketT = Packet>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if<
+ TensorEvaluatorHasPartialPacket<TensorEvaluatorT, PacketT, Index>::value, PacketT>::type
+ partialPacketNoPadding(const Index depth, const Index baseIndex, Index num_coeffs) const
+ {
+ const Index inputIndex = depth + baseIndex;
+ return m_base_mapper.m_impl.template partialPacket<PacketT>(inputIndex,
+ mask<PacketT>(0, num_coeffs));
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE bool padRow(const Index row) const
+ {
+ const Index r = m_rowIndex + row;
+ return r < 0 || r >= m_base_mapper.m_inputRows;
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE bool padAnyRow(const Index first_row, const Index last_row) const
+ {
+ return m_rowIndex + first_row < 0 || m_rowIndex + last_row >= m_base_mapper.m_inputRows;
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE bool padOrSkipRow(const Index row, Index *orig_row) const
+ {
+ eigen_assert(nonStandardPatches());
+
+ const Index input_row = m_rowIndex + row * m_base_mapper.m_in_row_strides;
+ *orig_row = (m_base_mapper.m_patch_row_inflate_strides == 1)
+ ? input_row
+ : ((input_row >= 0) ? (input_row / m_base_mapper.m_fastInputRowStride) : 0);
+
+ return (*orig_row < 0 || *orig_row >= m_base_mapper.m_inputRows) ||
+ (input_row != *orig_row * m_base_mapper.m_patch_row_inflate_strides);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE bool padCol(const Index col) const
+ {
+ const Index c = m_colIndex + col;
+ return c < 0 || c >= m_base_mapper.m_inputCols;
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE bool padOrSkipCol(const Index col, Index *orig_col) const
+ {
+ eigen_assert(nonStandardPatches());
+
+ const Index input_col = m_colIndex + col * m_base_mapper.m_in_col_strides;
+ *orig_col = (m_base_mapper.m_patch_col_inflate_strides == 1)
+ ? input_col
+ : ((input_col >= 0) ? (input_col / m_base_mapper.m_fastInputColStride) : 0);
+
+ return (*orig_col < 0 || *orig_col >= m_base_mapper.m_inputCols) ||
+ (input_col != *orig_col * m_base_mapper.m_patch_col_inflate_strides);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Index baseIndex(const Index row, const Index col) const
+ {
+ const Index r = m_rowIndex + row;
+ const Index c = m_colIndex + col;
+ return r * m_base_mapper.m_rowInputStride + c * m_base_mapper.m_colInputStride + m_otherIndex;
+ }
+ // Compute a base index when original input row and column were precomputed
+ // using padOrSkipRow and padOrSkipCol. Used only for non standard patches.
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Index origBaseIndex(const Index orig_row, const Index orig_col) const
+ {
+ return orig_row * m_base_mapper.m_rowInputStride + orig_col * m_base_mapper.m_colInputStride +
+ m_otherIndex;
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Index rowStride() const { return m_base_mapper.m_row_strides; }
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Index colStride() const { return m_base_mapper.m_col_strides; }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Index rowOffset() const
+ {
+ const Index patchOffset = m_depth_offset / m_base_mapper.m_fastDimZero;
+ const Index colOffset = patchOffset / m_base_mapper.m_fastColStride;
+ return patchOffset - colOffset * m_base_mapper.m_colStride;
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Index colOffset() const
+ {
+ const Index patchOffset = m_depth_offset / m_base_mapper.m_fastDimZero;
+ const Index colOffset = patchOffset / m_base_mapper.m_fastColStride;
+ return colOffset;
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Index depthOffset() const { return m_depth_offset % patchDepth(); }
+
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const
+ {
+ return LinearMapper(m_base_mapper, i + m_depth_offset, j + m_col_offset);
+ }
+
+private:
+ Index m_depth_offset; // First row in the input matrix
+ Index m_col_offset; // First col in the input matrix
+
+ // Knowing that: col_offset == patchIndex * OTHERS, we keep precomputed base
+ // indices for the first element in a patch specified by col_offset
+ // (see computeBaseIndices(...) for details).
+ Index m_rowIndex;
+ Index m_colIndex;
+ Index m_otherIndex;
+
+ const ParentMapper m_base_mapper; // Keeping a copy instead of a reference
+ // performs better in benchmarks.
+};
+
+// Arrange a block of the right input matrix (in our case it's always a "virtual
+// matrix" constructed from extracted image patches) in contiguous memory.
+//
+// Given column major input (A0 beside A1 in memory):
+// A0 B0 C0 D0 E0 F0 G0 H0 ... Z0
+// A1 B1 C1 D1 E1 F1 G1 H1 ... Z1
+// A2 B2 C2 D2 E2 F2 G2 H2 ... Z2
+// A3 B3 C3 D3 E3 F3 G3 H3 ... Z3
+// A4 B4 C4 D4 E4 F4 G4 H4 ... Z4
+// A5 B5 C5 D5 E5 F5 G5 H5 ... Z5
+// A6 B6 C6 D6 E6 F6 G6 H6 ... Z6
+// A7 B7 C7 D7 E7 F7 G7 H7 ... Z7
+// A8 ...
+// ...
+//
+// *) A, B, C, ... - patches extracted from the original input.
+// *) A0, A1, A2 ... - values from the same patch at different offsets.
+//
+// The traversal (packed rhs memory) order (B0 besides A0 in memory):
+// A0 B0 C0 D0 A1 B1 C1 D1 ...
+// E0 F0 G0 H0 E1 F1 G1 H1 ...
+// ...
+// Z0 Z1 Z2 Z3 Z4 Z5 Z6 Z7 ... <- doesn't belong to any block (nr = 4)
+//
+// This traversal order must be the same as in default gemm_pack_rhs defined in
+// GeneralBlockPanelKernel.h.
+//
+// *) nr - number of registers along the 'n' dimension.
+// See GeneralBlockPanelKernel.h and "Anatomy of High-Performance Matrix
+// Multiplication" paper.
+template <typename NewDimension, Index Rows, Index Cols, typename ArgType, typename Device,
+ typename Scalar, typename Index, typename nocontract_t, typename contract_t,
+ int packet_size, bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment,
+ int nr>
+struct gemm_pack_rhs<
+ Scalar, Index,
+ TensorContractionSubMapper<
+ Scalar, Index, Rhs,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
+ Device>,
+ nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered,
+ Alignment>,
+ nr, ColMajor, false, false>
+{
+ typedef TensorContractionSubMapper<
+ Scalar, Index, Rhs,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
+ Device>,
+ nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+ SubMapper;
+ typedef SubMapper DataMapper;
+ typedef typename packet_traits<Scalar>::type Packet;
+
+ EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE)
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_DONT_INLINE void operator()(Scalar *block, const DataMapper &rhs, Index depth, Index cols,
+ Index stride = 0, Index offset = 0) const
+ {
+ eigen_assert(stride == 0);
+ eigen_assert(offset == 0);
+ (void)stride;
+ (void)offset;
+
+ const Index packet_cols4 = (cols / 4) * 4;
+ const Index peeled_k = (depth / packet_size) * packet_size;
+ const bool non_standard_patches = rhs.nonStandardPatches();
+
+ for (Index j2 = 0; j2 < packet_cols4; j2 += 4)
+ {
+ const SubMapper dm0 = rhs.getLinearMapper(0, j2 + 0);
+ const SubMapper dm1 = rhs.getLinearMapper(0, j2 + 1);
+ const SubMapper dm2 = rhs.getLinearMapper(0, j2 + 2);
+ const SubMapper dm3 = rhs.getLinearMapper(0, j2 + 3);
+
+ Index k = 0;
+ if ((packet_size % 4) == 0 && !non_standard_patches)
+ {
+ // FAST PATH:
+ // Iterate over patch columns and rows, if we know that a single
+ // packet do not span across multiple rows or columns.
+ if ((rhs.patchDepth() % packet_size) == 0)
+ {
+ const Index start_col = rhs.colOffset();
+ const Index max_col = rhs.maxCol(peeled_k);
+
+ for (Index c = start_col; c < max_col; ++c)
+ {
+ eigen_assert(k <= peeled_k);
+
+ const Index start_row = (c == start_col) ? rhs.rowOffset() : 0;
+ const Index max_row = rhs.maxRow(peeled_k, c);
+
+ const bool pad_col0 = dm0.padCol(c);
+ const bool pad_col1 = dm1.padCol(c);
+ const bool pad_col2 = dm2.padCol(c);
+ const bool pad_col3 = dm3.padCol(c);
+
+ // Check if we can squeeze reads along the `row` and `depth`
+ // dimensions (two innermost dimensions).
+ if (!pad_col0 && !pad_col1 && !pad_col2 && !pad_col3 && //
+ !dm0.padRow(start_row) && !dm0.padRow(max_row - 1) && //
+ !dm1.padRow(start_row) && !dm1.padRow(max_row - 1) && //
+ !dm2.padRow(start_row) && !dm2.padRow(max_row - 1) && //
+ !dm3.padRow(start_row) && !dm3.padRow(max_row - 1))
+ {
+ // Compute how many elements we can squeeze read.
+ const Index start_depth = (c == start_col) ? rhs.depthOffset() : 0;
+
+ // Upper bound for the number of elements in the depth dimension
+ // that we can squeeze read.
+ const Index squeeze_length = (max_row - start_row) * rhs.patchDepth() - start_depth;
+
+ // Do not overshoot beyond the block size.
+ const Index max_depth = start_depth + std::min<Index>(peeled_k - k, squeeze_length);
+ eigen_assert((max_depth - start_depth) % packet_size == 0);
+
+ const Index idx0 = dm0.baseIndex(start_row, c);
+ const Index idx1 = dm1.baseIndex(start_row, c);
+ const Index idx2 = dm2.baseIndex(start_row, c);
+ const Index idx3 = dm3.baseIndex(start_row, c);
+
+ for (Index d = start_depth; d < max_depth; d += packet_size)
+ {
+ eigen_assert(k < peeled_k);
+ PacketBlock<Packet, 4> kernel;
+ kernel.packet[0] = rhs.packetNoPadding(d, idx0);
+ kernel.packet[1] = rhs.packetNoPadding(d, idx1);
+ kernel.packet[2] = rhs.packetNoPadding(d, idx2);
+ kernel.packet[3] = rhs.packetNoPadding(d, idx3);
+ ptranspose(kernel);
+ pstoreu(block + 0 * packet_size, kernel.packet[0]);
+ pstoreu(block + 1 * packet_size, kernel.packet[1]);
+ pstoreu(block + 2 * packet_size, kernel.packet[2]);
+ pstoreu(block + 3 * packet_size, kernel.packet[3]);
+ block += 4 * packet_size;
+ k += packet_size;
+ }
+
+ // Go to the next column.
+ continue;
+ }
+
+ // If we can't squeeze reads, process rows one by one.
+ for (Index r = start_row; r < max_row; ++r)
+ {
+ eigen_assert(k <= peeled_k);
+
+ const bool pad0 = pad_col0 || dm0.padRow(r);
+ const bool pad1 = pad_col1 || dm1.padRow(r);
+ const bool pad2 = pad_col2 || dm2.padRow(r);
+ const bool pad3 = pad_col3 || dm3.padRow(r);
+
+ const Index idx0 = dm0.baseIndex(r, c);
+ const Index idx1 = dm1.baseIndex(r, c);
+ const Index idx2 = dm2.baseIndex(r, c);
+ const Index idx3 = dm3.baseIndex(r, c);
+
+ const Index start_depth =
+ ((c == start_col) && (r == start_row)) ? rhs.depthOffset() : 0;
+ const Index max_depth = rhs.maxDepth(peeled_k - k, start_depth);
+ eigen_assert((max_depth - start_depth) % packet_size == 0);
+
+ for (Index d = start_depth; d < max_depth; d += packet_size)
+ {
+ eigen_assert(k < peeled_k);
+ PacketBlock<Packet, 4> kernel;
+ kernel.packet[0] = pad0 ? pset1<Packet>(Scalar(0)) : rhs.packetNoPadding(d, idx0);
+ kernel.packet[1] = pad1 ? pset1<Packet>(Scalar(0)) : rhs.packetNoPadding(d, idx1);
+ kernel.packet[2] = pad2 ? pset1<Packet>(Scalar(0)) : rhs.packetNoPadding(d, idx2);
+ kernel.packet[3] = pad3 ? pset1<Packet>(Scalar(0)) : rhs.packetNoPadding(d, idx3);
+ ptranspose(kernel);
+ pstoreu(block + 0 * packet_size, kernel.packet[0]);
+ pstoreu(block + 1 * packet_size, kernel.packet[1]);
+ pstoreu(block + 2 * packet_size, kernel.packet[2]);
+ pstoreu(block + 3 * packet_size, kernel.packet[3]);
+ block += 4 * packet_size;
+ k += packet_size;
+ }
+ }
+ }
+
+ // The loop above should fill peeled_k elements.
+ eigen_assert(peeled_k == k);
+ }
+ else
+ {
+ for (; k < peeled_k; k += packet_size)
+ {
+ PacketBlock<Packet, 4> kernel;
+ kernel.packet[0] = dm0.loadPacketStandard(k);
+ kernel.packet[1] = dm1.loadPacketStandard(k);
+ kernel.packet[2] = dm2.loadPacketStandard(k);
+ kernel.packet[3] = dm3.loadPacketStandard(k);
+ ptranspose(kernel);
+ pstoreu(block + 0 * packet_size, kernel.packet[0]);
+ pstoreu(block + 1 * packet_size, kernel.packet[1]);
+ pstoreu(block + 2 * packet_size, kernel.packet[2]);
+ pstoreu(block + 3 * packet_size, kernel.packet[3]);
+ block += 4 * packet_size;
+ }
+ }
+ }
+
+ // Copy the remaining coefficients of the column block after the peeled_k.
+ if (!rhs.nonStandardPatches())
+ {
+ for (; k < depth; k++)
+ {
+ block[0] = dm0.loadCoeffStandard(k);
+ block[1] = dm1.loadCoeffStandard(k);
+ block[2] = dm2.loadCoeffStandard(k);
+ block[3] = dm3.loadCoeffStandard(k);
+ block += 4;
+ }
+ }
+ else
+ {
+ for (; k < depth; k++)
+ {
+ block[0] = dm0(k);
+ block[1] = dm1(k);
+ block[2] = dm2(k);
+ block[3] = dm3(k);
+ block += 4;
+ }
+ }
+ }
+
+ // copy the remaining columns one at a time (nr==1)
+ for (Index j2 = packet_cols4; j2 < cols; ++j2)
+ {
+ const SubMapper dm0 = rhs.getLinearMapper(0, j2);
+ for (Index k = 0; k < depth; k++)
+ {
+ *block = dm0(k);
+ block += 1;
+ }
+ }
+ }
+};
+
+// Template specialization for packet_size = 2. We must special-case packet
+// blocks with nr > packet_size, e.g. PacketBlock<Packet2d, 4>.
+template <typename NewDimension, Index Rows, Index Cols, typename ArgType, typename Device,
+ typename Scalar, typename Index, typename nocontract_t, typename contract_t,
+ bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment, int nr>
+struct gemm_pack_rhs<
+ Scalar, Index,
+ TensorContractionSubMapper<
+ Scalar, Index, Rhs,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
+ Device>,
+ nocontract_t, contract_t, 2, inner_dim_contiguous, inner_dim_reordered, Alignment>,
+ nr, ColMajor, false, false>
+{
+ typedef TensorContractionSubMapper<
+ Scalar, Index, Rhs,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
+ Device>,
+ nocontract_t, contract_t, 2, inner_dim_contiguous, inner_dim_reordered, Alignment>
+ SubMapper;
+ typedef SubMapper DataMapper;
+ typedef typename packet_traits<Scalar>::type Packet;
+
+ EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE)
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_DONT_INLINE void operator()(Scalar *block, const DataMapper &rhs, Index depth, Index cols,
+ Index stride = 0, Index offset = 0) const
+ {
+ eigen_assert(stride == 0);
+ eigen_assert(offset == 0);
+
+ (void)stride;
+ (void)offset;
+
+ const int packet_size = 2;
+ const Index packet_cols4 = (cols / 4) * 4;
+ const Index peeled_k = (depth / packet_size) * packet_size;
+ const bool non_standard_patches = rhs.nonStandardPatches();
+
+ for (Index j2 = 0; j2 < packet_cols4; j2 += 4)
+ {
+ const SubMapper dm0 = rhs.getLinearMapper(0, j2 + 0);
+ const SubMapper dm1 = rhs.getLinearMapper(0, j2 + 1);
+ const SubMapper dm2 = rhs.getLinearMapper(0, j2 + 2);
+ const SubMapper dm3 = rhs.getLinearMapper(0, j2 + 3);
+
+ Index k = 0;
+ if (!non_standard_patches)
+ {
+ // FAST PATH:
+ // Iterate over patch columns and rows if we know that a single
+ // packet do not span across multiple rows or columns.
+ if ((rhs.patchDepth() % packet_size) == 0)
+ {
+ const Index start_col = rhs.colOffset();
+ const Index max_col = rhs.maxCol(peeled_k);
+
+ for (Index c = start_col; c < max_col; ++c)
+ {
+ eigen_assert(k <= peeled_k);
+
+ const Index start_row = (c == start_col) ? rhs.rowOffset() : 0;
+ const Index max_row = rhs.maxRow(peeled_k, c);
+
+ const bool pad_col0 = dm0.padCol(c);
+ const bool pad_col1 = dm1.padCol(c);
+ const bool pad_col2 = dm2.padCol(c);
+ const bool pad_col3 = dm3.padCol(c);
+
+ // We can squeeze reads along the `row` and `depth` dimensions if
+ // the row stride is `1`, which means that `row` and `depth`
+ // dimensions are contiguous (two innermost dimensions).
+ if (rhs.rowStride() == 1 && //
+ !pad_col0 && !pad_col1 && !pad_col2 && !pad_col3 && //
+ !dm0.padRow(start_row) && !dm0.padRow(max_row - 1) && //
+ !dm1.padRow(start_row) && !dm1.padRow(max_row - 1) && //
+ !dm2.padRow(start_row) && !dm2.padRow(max_row - 1) && //
+ !dm3.padRow(start_row) && !dm3.padRow(max_row - 1))
+ {
+ // Compute how many elements we can squeeze read.
+ const Index start_depth = (c == start_col) ? rhs.depthOffset() : 0;
+
+ // Upper bound for the number of elements in the depth dimension
+ // that we can squeeze read.
+ const Index squeeze_length = (max_row - start_row) * rhs.patchDepth() - start_depth;
+
+ // Do not overshoot beyond the block size.
+ const Index max_depth = start_depth + std::min<Index>(peeled_k - k, squeeze_length);
+ eigen_assert((max_depth - start_depth) % packet_size == 0);
+
+ const Index idx0 = dm0.baseIndex(start_row, c);
+ const Index idx1 = dm1.baseIndex(start_row, c);
+ const Index idx2 = dm2.baseIndex(start_row, c);
+ const Index idx3 = dm3.baseIndex(start_row, c);
+
+ for (Index d = start_depth; d < max_depth; d += packet_size)
+ {
+ PacketBlock<Packet, 2> kernel0;
+ PacketBlock<Packet, 2> kernel1;
+ kernel0.packet[0] = rhs.packetNoPadding(d, idx0);
+ kernel0.packet[1] = rhs.packetNoPadding(d, idx1);
+ kernel1.packet[0] = rhs.packetNoPadding(d, idx2);
+ kernel1.packet[1] = rhs.packetNoPadding(d, idx3);
+ ptranspose(kernel0);
+ ptranspose(kernel1);
+ pstoreu(block + 0 * packet_size, kernel0.packet[0]);
+ pstoreu(block + 1 * packet_size, kernel1.packet[0]);
+ pstoreu(block + 2 * packet_size, kernel0.packet[1]);
+ pstoreu(block + 3 * packet_size, kernel1.packet[1]);
+ block += 4 * packet_size;
+ k += packet_size;
+ }
+
+ // Go to the next column.
+ continue;
+ }
+
+ // If we can't squeeze reads, process rows one by one.
+ for (Index r = start_row; r < max_row; ++r)
+ {
+ eigen_assert(k <= peeled_k);
+
+ const bool pad0 = pad_col0 || dm0.padRow(r);
+ const bool pad1 = pad_col1 || dm1.padRow(r);
+ const bool pad2 = pad_col2 || dm2.padRow(r);
+ const bool pad3 = pad_col3 || dm3.padRow(r);
+
+ const Index idx0 = dm0.baseIndex(r, c);
+ const Index idx1 = dm1.baseIndex(r, c);
+ const Index idx2 = dm2.baseIndex(r, c);
+ const Index idx3 = dm3.baseIndex(r, c);
+
+ const Index start_depth =
+ ((c == start_col) && (r == start_row)) ? rhs.depthOffset() : 0;
+ const Index max_depth = rhs.maxDepth(peeled_k - k, start_depth);
+ eigen_assert((max_depth - start_depth) % packet_size == 0);
+
+ for (Index d = start_depth; d < max_depth; d += packet_size)
+ {
+ eigen_assert(k < peeled_k);
+ PacketBlock<Packet, 2> kernel0;
+ PacketBlock<Packet, 2> kernel1;
+ kernel0.packet[0] = pad0 ? pset1<Packet>(Scalar(0)) : rhs.packetNoPadding(d, idx0);
+ kernel0.packet[1] = pad1 ? pset1<Packet>(Scalar(0)) : rhs.packetNoPadding(d, idx1);
+ kernel1.packet[0] = pad2 ? pset1<Packet>(Scalar(0)) : rhs.packetNoPadding(d, idx2);
+ kernel1.packet[1] = pad3 ? pset1<Packet>(Scalar(0)) : rhs.packetNoPadding(d, idx3);
+ ptranspose(kernel0);
+ ptranspose(kernel1);
+ pstoreu(block + 0 * packet_size, kernel0.packet[0]);
+ pstoreu(block + 1 * packet_size, kernel1.packet[0]);
+ pstoreu(block + 2 * packet_size, kernel0.packet[1]);
+ pstoreu(block + 3 * packet_size, kernel1.packet[1]);
+ block += 4 * packet_size;
+ k += packet_size;
+ }
+ }
+ }
+
+ // The loop above should fill peeled_k elements.
+ eigen_assert(peeled_k == k);
+ }
+ else
+ {
+ // Packet can span multiple rows or columns, so we have to go
+ // though the slower "standard" path.
+ for (; k < peeled_k; k += packet_size)
+ {
+ PacketBlock<Packet, 2> kernel0;
+ PacketBlock<Packet, 2> kernel1;
+ kernel0.packet[0] = dm0.loadPacketStandard(k);
+ kernel0.packet[1] = dm1.loadPacketStandard(k);
+ kernel1.packet[0] = dm2.loadPacketStandard(k);
+ kernel1.packet[1] = dm3.loadPacketStandard(k);
+ ptranspose(kernel0);
+ ptranspose(kernel1);
+ pstoreu(block + 0 * packet_size, kernel0.packet[0]);
+ pstoreu(block + 1 * packet_size, kernel1.packet[0]);
+ pstoreu(block + 2 * packet_size, kernel0.packet[1]);
+ pstoreu(block + 3 * packet_size, kernel1.packet[1]);
+ block += 4 * packet_size;
+ }
+ }
+ }
+
+ // Copy the remaining coefficients of the column block after the peeled_k.
+ if (!non_standard_patches)
+ {
+ for (; k < depth; k++)
+ {
+ block[0] = dm0.loadCoeffStandard(k);
+ block[1] = dm1.loadCoeffStandard(k);
+ block[2] = dm2.loadCoeffStandard(k);
+ block[3] = dm3.loadCoeffStandard(k);
+ block += 4;
+ }
+ }
+ else
+ {
+ for (; k < depth; k++)
+ {
+ block[0] = dm0(k);
+ block[1] = dm1(k);
+ block[2] = dm2(k);
+ block[3] = dm3(k);
+ block += 4;
+ }
+ }
+ }
+
+ // Copy the remaining columns one at a time (nr==1).
+ for (Index j2 = packet_cols4; j2 < cols; ++j2)
+ {
+ const SubMapper dm0 = rhs.getLinearMapper(0, j2);
+ for (Index k = 0; k < depth; k++)
+ {
+ *block = dm0(k);
+ block += 1;
+ }
+ }
+ }
+};
+
+// Special case for non-vectorized types such as float16.
+template <typename NewDimension, Index Rows, Index Cols, typename ArgType, typename Device,
+ typename Scalar, typename Index, typename nocontract_t, typename contract_t,
+ bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment, int nr>
+struct gemm_pack_rhs<
+ Scalar, Index,
+ TensorContractionSubMapper<
+ Scalar, Index, Rhs,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
+ Device>,
+ nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment>,
+ nr, ColMajor, false, false>
+{
+ typedef TensorContractionSubMapper<
+ Scalar, Index, Rhs,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
+ Device>,
+ nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment>
+ SubMapper;
+ typedef SubMapper DataMapper;
+
+ EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE)
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_DONT_INLINE void operator()(Scalar *block, const DataMapper &rhs, Index depth, Index cols,
+ Index stride = 0, Index offset = 0) const
+ {
+ eigen_assert(stride == 0);
+ eigen_assert(offset == 0);
+
+ (void)offset;
+ (void)stride;
+
+ const Index packet_cols4 = (cols / 4) * 4;
+
+ for (Index j2 = 0; j2 < packet_cols4; j2 += 4)
+ {
+ const SubMapper dm0 = rhs.getLinearMapper(0, j2 + 0);
+ const SubMapper dm1 = rhs.getLinearMapper(0, j2 + 1);
+ const SubMapper dm2 = rhs.getLinearMapper(0, j2 + 2);
+ const SubMapper dm3 = rhs.getLinearMapper(0, j2 + 3);
+
+ if (!rhs.nonStandardPatches())
+ {
+ for (Index k = 0; k < depth; k++)
+ {
+ block[0] = dm0.loadCoeffStandard(k);
+ block[1] = dm1.loadCoeffStandard(k);
+ block[2] = dm2.loadCoeffStandard(k);
+ block[3] = dm3.loadCoeffStandard(k);
+ block += 4;
+ }
+ }
+ else
+ {
+ for (Index k = 0; k < depth; k++)
+ {
+ block[0] = dm0(k);
+ block[1] = dm1(k);
+ block[2] = dm2(k);
+ block[3] = dm3(k);
+ block += 4;
+ }
+ }
+ }
+
+ // Copy the remaining columns one at a time (nr==1).
+ for (Index j2 = packet_cols4; j2 < cols; ++j2)
+ {
+ const SubMapper dm0 = rhs.getLinearMapper(0, j2);
+ for (Index k = 0; k < depth; k++)
+ {
+ *block = dm0(k);
+ block += 1;
+ }
+ }
+ }
+};
+} // end namespace internal
+
+/** SpatialConvolution
+ * \ingroup CXX11_NeuralNetworks_Module
+ *
+ * \brief Applies a 2D convolution over a multichannel input image.
+ *
+ * The input parameter is expected to be a tensor with a rank of 3 or more
+ * (channels, height, width, and optionally others)
+ * The kernel parameter is expected to be a 4D tensor (filters, channels,
+ * kernel_height, kernel_width)
+ * The input and the kernel must both be in col-major layout. The result will
+ * also be in col-major layout.
+ *
+ * If col_in_stride, row_in_stride > 1, then applies convolution with holes
+ * (aka atrous convolution), sampling every col_in_stride, row_in_stride input
+ * pixels.
+ *
+ * If padding_top, padding_bottom, padding_left, or padding_right is specified,
+ * then those paddings will be used to pad the input, and padding_type must be
+ * PADDING_VALID.
+ *
+ * The result can be assigned to a tensor of rank equal to the rank of the
+ * input. The dimensions of the result will be filters, height, width (and
+ * others if applicable).
+ *
+ * It is possible to swap the order of the width and height dimensions provided
+ * that the same order is used in the input, the kernel, and the output.
+ *
+ * It is also possible to add an output kernel to the contraction, output
+ * kernel is called by Eigen when it "finalizes" the block of an output tensor.
+ *
+ */
+template <typename Input, typename Kernel, typename OutputKernel = const NoOpOutputKernel>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static const typename internal::conditional<
+ internal::traits<Input>::Layout == ColMajor,
+ TensorReshapingOp<
+ const DSizes<typename internal::traits<Input>::Index,
+ internal::traits<Input>::NumDimensions>,
+ const TensorContractionOp<
+ const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
+ const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
+ const Kernel>,
+ const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
+ const TensorImagePatchOp<Dynamic, Dynamic, const Input>>,
+ const OutputKernel>>,
+ TensorReshapingOp<
+ const DSizes<typename internal::traits<Input>::Index,
+ internal::traits<Input>::NumDimensions>,
+ const TensorContractionOp<
+ const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
+ const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
+ const TensorImagePatchOp<Dynamic, Dynamic, const Input>>,
+ const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
+ const Kernel>,
+ const OutputKernel>>>::type
+SpatialConvolution(const Input &input, const Kernel &kernel, const Index row_stride = 1,
+ const Index col_stride = 1, const PaddingType padding_type = PADDING_SAME,
+ const Index row_in_stride = 1, const Index col_in_stride = 1,
+ const OutputKernel &output_kernel = OutputKernel(), Index padding_top = 0,
+ Index padding_bottom = 0, Index padding_left = 0, Index padding_right = 0)
+{
+ typedef typename internal::traits<Input>::Index TensorIndex;
+ TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions,
+ internal::traits<Input>::Layout, TensorIndex>>
+ in(input);
+ TensorRef<
+ Tensor<typename internal::traits<Kernel>::Scalar, internal::traits<Kernel>::NumDimensions,
+ internal::traits<Kernel>::Layout, TensorIndex>>
+ kern(kernel);
+
+ EIGEN_STATIC_ASSERT(internal::traits<Input>::Layout == internal::traits<Kernel>::Layout,
+ YOU_MADE_A_PROGRAMMING_MISTAKE)
+ const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
+
+ const int NumDims = internal::traits<Input>::NumDimensions;
+
+ // Number of filters to apply. This is the same as the output depth of the
+ // result
+ const TensorIndex kernelFilters = isColMajor ? kern.dimensions()[0] : kern.dimensions()[3];
+ // Number of channels. This is the same as the input depth.
+ const TensorIndex kernelChannels = isColMajor ? kern.dimensions()[1] : kern.dimensions()[2];
+ const TensorIndex kernelRows = isColMajor ? kern.dimensions()[2] : kern.dimensions()[1];
+ const TensorIndex kernelCols = isColMajor ? kern.dimensions()[3] : kern.dimensions()[0];
+
+ const Index kernelRowsEff = kernelRows + (kernelRows - 1) * (row_in_stride - 1);
+ const Index kernelColsEff = kernelCols + (kernelCols - 1) * (col_in_stride - 1);
+
+ array<IndexPair<TensorIndex>, 1> contract_dims;
+ contract_dims[0] = IndexPair<TensorIndex>(1, 0);
+
+ const TensorIndex InputRows = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2);
+ const TensorIndex InputCols = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3);
+ const bool padding_explicit = (padding_top || padding_bottom || padding_left || padding_right);
+
+ TensorIndex out_height;
+ TensorIndex out_width;
+ switch (padding_type)
+ {
+ case PADDING_VALID:
+ {
+ const TensorIndex InputRowsEff = InputRows + padding_top + padding_bottom;
+ const TensorIndex InputColsEff = InputCols + padding_left + padding_right;
+ out_height =
+ numext::ceil((InputRowsEff - kernelRowsEff + 1.f) / static_cast<float>(row_stride));
+ out_width =
+ numext::ceil((InputColsEff - kernelColsEff + 1.f) / static_cast<float>(col_stride));
+ break;
+ }
+ case PADDING_SAME:
+ {
+ eigen_assert(!padding_explicit);
+ out_height = numext::ceil(InputRows / static_cast<float>(row_stride));
+ out_width = numext::ceil(InputCols / static_cast<float>(col_stride));
+ break;
+ }
+ default:
+ {
+ // Initialize unused variables to avoid a compiler warning
+ out_height = 0;
+ out_width = 0;
+ eigen_assert(false && "unexpected padding");
+ }
+ }
+
+ // Molds the output of the patch extraction code into a 2d tensor:
+ // - the first dimension (dims[0]): the patch values to be multiplied with the
+ // kernels
+ // - the second dimension (dims[1]): everything else
+ DSizes<TensorIndex, 2> pre_contract_dims;
+ if (isColMajor)
+ {
+ pre_contract_dims[0] = kernelChannels * kernelRows * kernelCols;
+ pre_contract_dims[1] = out_height * out_width;
+ for (int i = 3; i < NumDims; ++i)
+ {
+ pre_contract_dims[1] *= in.dimension(i);
+ }
+ }
+ else
+ {
+ pre_contract_dims[1] = kernelChannels * kernelRows * kernelCols;
+ pre_contract_dims[0] = out_height * out_width;
+ for (int i = 0; i < NumDims - 3; ++i)
+ {
+ pre_contract_dims[0] *= in.dimension(i);
+ }
+ }
+
+ // Molds the output of the contraction into the shape expected by the used
+ // (assuming this is ColMajor):
+ // - 1st dim: kernel filters
+ // - 2nd dim: output height
+ // - 3rd dim: output width
+ // - 4th dim and beyond: everything else including batch size
+ DSizes<TensorIndex, NumDims> post_contract_dims;
+ if (isColMajor)
+ {
+ post_contract_dims[0] = kernelFilters;
+ post_contract_dims[1] = out_height;
+ post_contract_dims[2] = out_width;
+ for (int i = 3; i < NumDims; ++i)
+ {
+ post_contract_dims[i] = in.dimension(i);
+ }
+ }
+ else
+ {
+ post_contract_dims[NumDims - 1] = kernelFilters;
+ post_contract_dims[NumDims - 2] = out_height;
+ post_contract_dims[NumDims - 3] = out_width;
+ for (int i = 0; i < NumDims - 3; ++i)
+ {
+ post_contract_dims[i] = in.dimension(i);
+ }
+ }
+
+ DSizes<TensorIndex, 2> kernel_dims;
+ if (isColMajor)
+ {
+ kernel_dims[0] = kernelFilters;
+ kernel_dims[1] = kernelChannels * kernelRows * kernelCols;
+ }
+ else
+ {
+ kernel_dims[0] = kernelChannels * kernelRows * kernelCols;
+ kernel_dims[1] = kernelFilters;
+ }
+ if (padding_explicit)
+ {
+ return choose(
+ Cond<internal::traits<Input>::Layout == ColMajor>(),
+ kernel.reshape(kernel_dims)
+ .contract(input
+ .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride,
+ row_in_stride, col_in_stride,
+ /*row_inflate_stride=*/1,
+ /*col_inflate_stride=*/1, padding_top,
+ padding_bottom, padding_left, padding_right,
+ /*padding_value=*/0)
+ .reshape(pre_contract_dims),
+ contract_dims, output_kernel)
+ .reshape(post_contract_dims),
+ input
+ .extract_image_patches(
+ kernelRows, kernelCols, row_stride, col_stride, row_in_stride, col_in_stride,
+ /*row_inflate_stride=*/1,
+ /*col_inflate_stride=*/1, padding_top, padding_bottom, padding_left, padding_right,
+ /*padding_value=*/0)
+ .reshape(pre_contract_dims)
+ .contract(kernel.reshape(kernel_dims), contract_dims, output_kernel)
+ .reshape(post_contract_dims));
+ }
+ else
+ {
+ return choose(
+ Cond<internal::traits<Input>::Layout == ColMajor>(),
+ kernel.reshape(kernel_dims)
+ .contract(input
+ .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride,
+ row_in_stride, col_in_stride, padding_type)
+ .reshape(pre_contract_dims),
+ contract_dims, output_kernel)
+ .reshape(post_contract_dims),
+ input
+ .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride, row_in_stride,
+ col_in_stride, padding_type)
+ .reshape(pre_contract_dims)
+ .contract(kernel.reshape(kernel_dims), contract_dims, output_kernel)
+ .reshape(post_contract_dims));
+ }
+}
+
+} // end namespace Eigen
+
+#endif // __NNFW_CKER_EIGEN_EIGEN_SPATIAL_CONVOLUTIONS_INL_H__
diff --git a/compute/cker/include/cker/eigen/eigen_spatial_convolutions.h b/compute/cker/include/cker/eigen/eigen_spatial_convolutions.h
new file mode 100644
index 000000000..5af2e4836
--- /dev/null
+++ b/compute/cker/include/cker/eigen/eigen_spatial_convolutions.h
@@ -0,0 +1,27 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef __NNFW_CKER_EGIEN_EIGEN_SPATIAL_CONVOLUTIONS_H__
+#define __NNFW_CKER_EGIEN_EIGEN_SPATIAL_CONVOLUTIONS_H__
+
+//#define EIGEN_USE_CUSTOM_THREAD_POOL
+#define EIGEN_USE_THREADS
+#include "unsupported/Eigen/CXX11/Tensor"
+
+// Note the following header is used in both TF and TFLite. Particularly, it's
+// used for float TFLite Conv2D.
+#include "cker/eigen/eigen_spatial_convolutions-inl.h"
+
+#endif // __NNFW_CKER_EGIEN_EIGEN_SPATIAL_CONVOLUTIONS_H__
diff --git a/compute/cker/include/cker/eigen/eigen_tensor_reduced_instantiations_oss.h b/compute/cker/include/cker/eigen/eigen_tensor_reduced_instantiations_oss.h
new file mode 100644
index 000000000..4ca321955
--- /dev/null
+++ b/compute/cker/include/cker/eigen/eigen_tensor_reduced_instantiations_oss.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This is essentially unsupported/CXX11/Eigen/Tensor.h
+// TODO(petewarden) - move this to a common location in Eigen itself.
+
+// clang-format off
+
+
+#ifndef __NNFW_CKER_EGIEN_EIGEN_TENSOR_REDUCED_INSTANTIATIONS_OSS_H__
+#define __NNFW_CKER_EGIEN_EIGEN_TENSOR_REDUCED_INSTANTIATIONS_OSS_H__
+
+
+#include "Eigen/Core"
+
+#if defined(EIGEN_USE_SYCL)
+#undef min
+#undef max
+#undef isnan
+#undef isinf
+#undef isfinite
+#include <CL/sycl.hpp>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <utility>
+#endif
+#include <cmath>
+#include <cstddef>
+#include <cstring>
+
+
+
+
+
+#ifdef _WIN32
+typedef __int16 int16_t;
+typedef unsigned __int16 uint16_t;
+typedef __int32 int32_t;
+typedef unsigned __int32 uint32_t;
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+#include <windows.h>
+#else
+#include <stdint.h>
+#include <unistd.h>
+#endif
+
+#if __cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900
+#include <random>
+#endif
+
+#ifdef _WIN32
+#include <windows.h>
+#elif defined(__APPLE__)
+#include <mach/mach_time.h>
+#else
+#include <time.h>
+#endif
+
+// #if defined(EIGEN_USE_LIBXSMM)
+// #include "libxsmm.h"
+// #endif
+
+#ifdef EIGEN_USE_THREADS
+#include "unsupported/Eigen/CXX11/ThreadPool"
+#endif
+
+
+#include "Eigen/src/Core/util/DisableStupidWarnings.h"
+
+#include "unsupported/Eigen/SpecialFunctions"
+#include "unsupported/Eigen/CXX11/src/util/CXX11Meta.h"
+#include "unsupported/Eigen/CXX11/src/util/MaxSizeVector.h"
+
+
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h"
+
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorBase.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h"
+
+#undef TENSOR_CONTRACTION_DISPATCH
+#define TENSOR_CONTRACTION_DISPATCH(METHOD, ALIGNMENT, ARGS) \
+ if (this->m_lhs_inner_dim_contiguous && \
+ this->m_rhs_inner_dim_contiguous && \
+ !this->m_rhs_inner_dim_reordered) { \
+ METHOD<true, true, false, ALIGNMENT> ARGS; \
+ } else { \
+ eigen_assert(false && "Unsupported contraction formats"); \
+ }
+
+
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorScan.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/Tensor.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorMap.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorRef.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorIO.h"
+
+#include "Eigen/src/Core/util/ReenableStupidWarnings.h"
+
+
+#endif // __NNFW_CKER_EGIEN_EIGEN_TENSOR_REDUCED_INSTANTIATIONS_OSS_H__
diff --git a/compute/cker/include/cker/gemmlowp/FixedPoint.h b/compute/cker/include/cker/gemmlowp/FixedPoint.h
deleted file mode 100644
index 159e01a22..000000000
--- a/compute/cker/include/cker/gemmlowp/FixedPoint.h
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_CKER_GEMMLOWP_FIXED_POINT_H__
-#define __NNFW_CKER_GEMMLOWP_FIXED_POINT_H__
-
-#include <algorithm>
-#include <cassert>
-
-namespace nnfw
-{
-namespace cker
-{
-namespace gemmlowp
-{
-
-inline int32_t RoundingHalfSum(int32_t a, int32_t b)
-{
- int64_t a64 = a;
- int64_t b64 = b;
- int64_t sum = a64 + b64;
- int64_t sign = sum >= 0 ? 1 : -1;
- return static_cast<int32_t>((sum + sign) / 2);
-}
-
-inline int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)
-{
- bool overflow = a == b && a == std::numeric_limits<int32_t>::min();
- int64_t a_64(a);
- int64_t b_64(b);
- int64_t ab_64 = a_64 * b_64;
- int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
- int32_t ab_x2_high32 = static_cast<int32_t>((ab_64 + nudge) / (1ll << 31));
- return overflow ? std::numeric_limits<int32_t>::max() : ab_x2_high32;
-}
-
-// Correctly-rounded-to-nearest division by a power-of-two.
-// Also known as a rounding arithmetic right shift.
-inline int32_t RoundingDivideByPOT(int32_t x, int exponent)
-{
- assert(exponent >= 0);
- assert(exponent <= 31);
- const int32_t mask = ((1ll << exponent) - 1);
- const int32_t zero = 0;
- const int32_t one = 1;
- const int32_t remainder = x & mask;
- const int32_t threshold = (mask >> 1) + ((x < zero) ? one : zero);
- return ((x >> exponent) + ((remainder > threshold) ? one : zero));
-}
-
-// Returns the product of a run-time integer value by a compile-time power
-// of two, with either a positive exponent (equivalent to an arithmetic
-// left shift, saturating) or a negative exponent (equivalent to an arithmetic
-// right shift, rounding to nearest).
-template <int Exponent, int ExponentSign = (Exponent > 0 ? 1 : Exponent < 0 ? -1 : 0)>
-struct ImplSaturatingRoundingMultiplyByPOT
-{
-};
-
-template <int Exponent> struct ImplSaturatingRoundingMultiplyByPOT<Exponent, 0>
-{
- static int32_t eval(int32_t x) { return x; }
-};
-
-template <int Exponent> struct ImplSaturatingRoundingMultiplyByPOT<Exponent, 1>
-{
- static int32_t eval(int32_t x)
- {
- const int32_t min = (std::numeric_limits<int32_t>::min());
- const int32_t max = (std::numeric_limits<int32_t>::max());
- const int32_t threshold = ((1 << (31 - Exponent)) - 1);
- const int32_t zero = 0;
- const int32_t one = 1;
-
- const int32_t positive_mask = ((x > threshold) ? ~zero : zero);
- const int32_t negative_mask = ((x < -threshold) ? ~zero : zero);
-
- int32_t result = (x * (one << Exponent));
- result = (positive_mask ? max : result);
- result = (negative_mask ? min : result);
- return result;
- }
-};
-
-template <int Exponent> struct ImplSaturatingRoundingMultiplyByPOT<Exponent, -1>
-{
- static int32_t eval(int32_t x) { return RoundingDivideByPOT(x, -Exponent); }
-};
-
-template <int Exponent> int32_t SaturatingRoundingMultiplyByPOT(int32_t x)
-{
- return ImplSaturatingRoundingMultiplyByPOT<Exponent>::eval(x);
-}
-
-template <int tIntegerBits> class FixedPoint
-{
-public:
- static constexpr int kTotalBits = 8 * sizeof(int32_t);
- static constexpr int kIntegerBits = tIntegerBits;
- static constexpr int kFractionalBits = kTotalBits - 1 - kIntegerBits;
- static_assert(kIntegerBits >= 0 && kIntegerBits < kTotalBits, "bad IntegerBits");
-
- static int32_t ScalarRawMax() { return std::numeric_limits<int32_t>::max(); }
-
- static FixedPoint FromRaw(int32_t x)
- {
- FixedPoint retval;
- retval.raw() = x;
- return retval;
- }
-
- static FixedPoint FromScalarRaw(int32_t x) { return FromRaw(x); }
-
- template <int Exponent> static FixedPoint ConstantPOT()
- {
- static constexpr int kOffset = kFractionalBits + Exponent;
- static_assert(kOffset < 31, "Constant not exactly representable in this fixed-point format");
- return FromScalarRaw((int32_t)1 << kOffset);
- }
-
- static FixedPoint Zero() { return FromScalarRaw(0); }
-
- static FixedPoint One()
- {
- return FromScalarRaw(kIntegerBits == 0 ? ScalarRawMax() : ((int32_t)1 << kFractionalBits));
- }
-
- int32_t raw() const { return i_; }
- int32_t &raw() { return i_; }
-
-private:
- int32_t i_;
-};
-
-// A FixedPoint multiplication is just a
-// SaturatingRoundingDoublingHighMul operation on the underlying
-// raw integer values. The IntegerBits simply add up, as is obvious
-// from the fact that the range is [-2^IntegerBits, 2^IntegerBits).
-template <int tIntegerBits_a, int tIntegerBits_b>
-FixedPoint<tIntegerBits_a + tIntegerBits_b> operator*(FixedPoint<tIntegerBits_a> a,
- FixedPoint<tIntegerBits_b> b)
-{
- FixedPoint<tIntegerBits_a + tIntegerBits_b> c;
- c.raw() = SaturatingRoundingDoublingHighMul(a.raw(), b.raw());
- return c;
-}
-
-// Tweaking IntegerBits gives exact multiplication by a power of two.
-template <int tExponent, int tIntegerBits>
-FixedPoint<tExponent + tIntegerBits> ExactMulByPot(FixedPoint<tIntegerBits> a)
-{
- FixedPoint<tExponent + tIntegerBits> c;
- c.raw() = a.raw();
- return c;
-}
-
-template <int tIntegerBits>
-FixedPoint<tIntegerBits> operator+(FixedPoint<tIntegerBits> a, FixedPoint<tIntegerBits> b)
-{
- return FixedPoint<tIntegerBits>::FromRaw((a.raw() + b.raw()));
-}
-template <int tIntegerBits>
-FixedPoint<tIntegerBits> operator-(FixedPoint<tIntegerBits> a, FixedPoint<tIntegerBits> b)
-{
- return FixedPoint<tIntegerBits>::FromRaw((a.raw() - b.raw()));
-}
-template <int tIntegerBits>
-FixedPoint<tIntegerBits> operator&(FixedPoint<tIntegerBits> a, FixedPoint<tIntegerBits> b)
-{
- return FixedPoint<tIntegerBits>::FromRaw((a.raw() & b.raw()));
-}
-
-// Rescale changes the number of IntegerBits and updates the underlying
-// raw integer value accordingly.
-template <int tIntegerBitsDst, int tIntegerBitsSrc>
-FixedPoint<tIntegerBitsDst> Rescale(FixedPoint<tIntegerBitsSrc> x)
-{
- static constexpr int kExponent = tIntegerBitsSrc - tIntegerBitsDst;
- FixedPoint<tIntegerBitsDst> result;
- result.raw() = SaturatingRoundingMultiplyByPOT<kExponent>(x.raw());
- return result;
-}
-
-// Implementation of exponential function.
-
-// Returns exp(x) for x in [-1/4, 0).
-inline FixedPoint<0> exp_on_interval_between_negative_one_quarter_and_0_excl(FixedPoint<0> a)
-{
- typedef FixedPoint<0> F;
- const F constant_term = F::FromScalarRaw(RoundingDivideByPOT(1895147668, 0));
- const F constant_1_over_3 = F::FromScalarRaw(RoundingDivideByPOT(715827883, 0));
- // We're evaluating a Taylor expansion around -1/8, so we do the change of
- // variable: x = a + 1/8.
- // In fixed-point with 0 integer bits, 1/8 is represented by 1 << 28.
- F x = a + F::template ConstantPOT<-3>();
- F x2 = x * x;
- F x3 = x2 * x;
- F x4 = x2 * x2;
- F x4_over_4 = F::FromScalarRaw(SaturatingRoundingMultiplyByPOT<-2>(x4.raw()));
- F x4_over_24_plus_x3_over_6_plus_x2_over_2 = F::FromScalarRaw(
- SaturatingRoundingMultiplyByPOT<-1>((((x4_over_4 + x3) * constant_1_over_3) + x2).raw()));
- return (constant_term + constant_term * (x + x4_over_24_plus_x3_over_6_plus_x2_over_2));
-}
-
-// Returns exp(x) for x < 0.
-template <int tIntegerBits> FixedPoint<0> exp_on_negative_values(FixedPoint<tIntegerBits> a)
-{
- typedef FixedPoint<tIntegerBits> InputF;
- typedef FixedPoint<0> ResultF;
- static constexpr int kFractionalBits = InputF::kFractionalBits;
- static constexpr int kIntegerBits = InputF::kIntegerBits;
- const InputF kOneQuarter = InputF::template ConstantPOT<-2>();
- InputF mask = kOneQuarter - InputF::FromScalarRaw(1);
- InputF a_mod_quarter_minus_one_quarter = (a & mask) - kOneQuarter;
- ResultF result = exp_on_interval_between_negative_one_quarter_and_0_excl(
- Rescale<0>(a_mod_quarter_minus_one_quarter));
- int32_t remainder = (a_mod_quarter_minus_one_quarter - a).raw();
-
-#define GEMMLOWP_EXP_BARREL_SHIFTER(Exponent, FixedPointMultiplier) \
- if (kIntegerBits > Exponent) \
- { \
- const ResultF kMultiplier = \
- ResultF::FromScalarRaw(RoundingDivideByPOT(FixedPointMultiplier, 0)); \
- static constexpr int kShiftAmount = \
- ((kIntegerBits > Exponent) ? (kFractionalBits + Exponent) : 0); \
- result = ((remainder & (1 << kShiftAmount)) ? (result * kMultiplier) : result); \
- }
-
- GEMMLOWP_EXP_BARREL_SHIFTER(-2, 1672461947);
- GEMMLOWP_EXP_BARREL_SHIFTER(-1, 1302514674);
- GEMMLOWP_EXP_BARREL_SHIFTER(+0, 790015084);
- GEMMLOWP_EXP_BARREL_SHIFTER(+1, 290630308);
- GEMMLOWP_EXP_BARREL_SHIFTER(+2, 39332535);
- GEMMLOWP_EXP_BARREL_SHIFTER(+3, 720401);
- GEMMLOWP_EXP_BARREL_SHIFTER(+4, 242);
-
-#undef GEMMLOWP_EXP_BARREL_SHIFTER
-
- static constexpr int clampB = ((kIntegerBits > 5) ? (36 - kIntegerBits) : 0);
- if (kIntegerBits > 5)
- {
- const InputF clamp = InputF::FromScalarRaw(RoundingDivideByPOT(-(1 << clampB), 0));
- result.raw() = ((a.raw() < clamp.raw()) ? ResultF::Zero().raw() : result.raw());
- }
-
- result.raw() = (a.raw() ? result.raw() : ResultF::One().raw());
- return result;
-}
-
-// Returns 1 / (1 + x) for x in (0, 1).
-inline FixedPoint<0> one_over_one_plus_x_for_x_in_0_1(FixedPoint<0> a)
-{
- typedef FixedPoint<0> F0;
- typedef FixedPoint<2> F2;
- F0 half_denominator = F0::FromScalarRaw(RoundingHalfSum(a.raw(), F0::One().raw()));
- // Newton-Raphson division
- // https://en.wikipedia.org/wiki/Division_algorithm#Newton.E2.80.93Raphson_division
- // Refer to that page for the logic behind the 48/17 and 32/17 constants.
- const F2 constant_48_over_17 = F2::FromScalarRaw(RoundingDivideByPOT(1515870810, 0));
- const F2 constant_neg_32_over_17 = F2::FromScalarRaw(RoundingDivideByPOT(-1010580540, 0));
- F2 x = constant_48_over_17 + half_denominator * constant_neg_32_over_17;
- for (int i = 0; i < 3; i++)
- {
- F2 half_denominator_times_x = half_denominator * x;
- F2 one_minus_half_denominator_times_x = F2::One() - half_denominator_times_x;
- x = x + Rescale<2>(x * one_minus_half_denominator_times_x);
- }
- return Rescale<0>(ExactMulByPot<-1>(x));
-}
-
-} // namespace gemmlowp
-} // namespace cker
-} // namespace nnfw
-
-#endif // __NNFW_CKER_GEMMLOWP_FIXED_POINT_H__
diff --git a/compute/cker/include/cker/gemmlowp/GEMMSupport.h b/compute/cker/include/cker/gemmlowp/GEMMSupport.h
new file mode 100644
index 000000000..76486eded
--- /dev/null
+++ b/compute/cker/include/cker/gemmlowp/GEMMSupport.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_GEMMLOWP_GEMM_SUPPORT_H__
+#define __NNFW_CKER_GEMMLOWP_GEMM_SUPPORT_H__
+
+#include <public/gemmlowp.h>
+
+#include <memory>
+#include <thread>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace gemm_support
+{
+
+struct GemmContext
+{
+ std::unique_ptr<gemmlowp::GemmContext> gemm_context;
+ constexpr static int default_num_threadpool_threads = 4;
+
+ GemmContext()
+ {
+ int num_threads = std::thread::hardware_concurrency() / 2;
+ if (num_threads == 0)
+ {
+ num_threads = default_num_threadpool_threads;
+ }
+
+ gemm_context.reset(new gemmlowp::GemmContext());
+ gemm_context->set_max_num_threads(num_threads);
+ }
+
+ static inline GemmContext &GetGemmLowpContext()
+ {
+ static GemmContext instance;
+ return instance;
+ }
+};
+
+inline gemmlowp::GemmContext *GetGemmLowpContext()
+{
+ auto &ctx = GemmContext::GetGemmLowpContext();
+ return ctx.gemm_context.get();
+}
+
+} // namespace gemm_support
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_GEMMLOWP_GEMM_SUPPORT_H__
diff --git a/compute/cker/include/cker/neon/neon_check.h b/compute/cker/include/cker/neon/neon_check.h
new file mode 100644
index 000000000..119d82719
--- /dev/null
+++ b/compute/cker/include/cker/neon/neon_check.h
@@ -0,0 +1,48 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef __NNFW_CKER_NEON_CHECK_H__
+#define __NNFW_CKER_NEON_CHECK_H__
+
+#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+#define USE_NEON
+#include <arm_neon.h>
+#endif
+
+// Disable X86_NEON
+// #if defined __GNUC__ && defined __SSE4_1__ && !defined TF_LITE_DISABLE_X86_NEON
+#if 0
+#define USE_NEON
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#pragma GCC diagnostic ignored "-Wattributes"
+#pragma GCC diagnostic ignored "-Wnarrowing"
+#pragma GCC diagnostic ignored "-Wsequence-point"
+#include "NEON_2_SSE.h"
+#pragma GCC diagnostic pop
+#endif
+
+// NEON_OR_PORTABLE(SomeFunc, args) calls NeonSomeFunc(args) if USE_NEON is
+// defined, PortableSomeFunc(args) otherwise.
+#ifdef USE_NEON
+// Always use Neon code
+#define NEON_OR_PORTABLE(funcname, ...) Neon##funcname(__VA_ARGS__)
+
+#else
+// No NEON available: Use Portable code
+#define NEON_OR_PORTABLE(funcname, ...) Portable##funcname(__VA_ARGS__)
+
+#endif // defined(USE_NEON)
+
+#endif // __NNFW_CKER_NEON_CHECK_H__
diff --git a/compute/cker/include/cker/operation/AveragePool.h b/compute/cker/include/cker/operation/AveragePool.h
index b20919429..de43ba3bd 100644
--- a/compute/cker/include/cker/operation/AveragePool.h
+++ b/compute/cker/include/cker/operation/AveragePool.h
@@ -18,30 +18,93 @@
#ifndef __NNFW_CKER_AVERAGE_POOL_H__
#define __NNFW_CKER_AVERAGE_POOL_H__
-#if defined(CKER_OPTIMIZED_EIGEN)
-#include "cker/operation/optimized/AveragePool.h"
-#endif // defined(CKER_OPTIMIZED_EIGEN)
+#include "cker/neon/neon_check.h"
+#include "cker/eigen/Utils.h"
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
-#include "cker/operation/reference/AveragePool.h"
+#include <Eigen/Core>
namespace nnfw
{
namespace cker
{
+// TODO Change to apply neon for this function if it is faster
inline void AveragePool(const PoolParams &params, const Shape &input_shape, const float *input_data,
const Shape &output_shape, float *output_data)
{
-#if defined(CKER_OPTIMIZED_EIGEN)
- optimized::AveragePool(params, input_shape, input_data, output_shape, output_data);
-#else // defined(CKER_OPTIMIZED_EIGEN)
- reference::AveragePool(params, input_shape, input_data, output_shape, output_data);
-#endif // defined(CKER_OPTIMIZED_EIGEN)
+ assert(input_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
+ const int stride_height = params.stride_height;
+ const int stride_width = params.stride_width;
+
+ // TODO(benoitjacob) make this a proper reference impl without Eigen!
+ const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape);
+ auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
+ // TODO(benoitjacob) get rid of the dynamic memory allocation here!
+ Eigen::VectorXf out_count(out_mat.cols());
+ out_count.setZero();
+ // Prefill the output to 0.
+ out_mat.setZero();
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int h = 0; h < input_height; ++h)
+ {
+ for (int w = 0; w < input_width; ++w)
+ {
+ // (h_start, h_end) * (w_start, w_end) is the range that the input
+ // vector projects to.
+ int hpad = h + params.padding_values.height;
+ int wpad = w + params.padding_values.width;
+ int h_start =
+ (hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1;
+ int h_end = std::min(hpad / stride_height + 1, output_height);
+ int w_start =
+ (wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1;
+ int w_end = std::min(wpad / stride_width + 1, output_width);
+ // compute elementwise sum
+ for (int ph = h_start; ph < h_end; ++ph)
+ {
+ for (int pw = w_start; pw < w_end; ++pw)
+ {
+ int out_offset = NodeOffset(b, ph, pw, output_height, output_width);
+ out_mat.col(out_offset) += in_mat.col(NodeOffset(b, h, w, input_height, input_width));
+ out_count(out_offset)++;
+ }
+ }
+ }
+ }
+ }
+ // Divide the output by the actual number of elements being averaged over
+ assert(out_count.minCoeff() > 0);
+ out_mat.array().rowwise() /= out_count.transpose().array();
+
+ const int flat_size = output_shape.FlatSize();
+ for (int i = 0; i < flat_size; ++i)
+ {
+ output_data[i] = ActivationFunctionWithMinMax(output_data[i], params.float_activation_min,
+ params.float_activation_max);
+ }
}
-inline void AveragePool(const PoolParams &params, const Shape &input_shape,
- const uint8_t *input_data, const Shape &output_shape, uint8_t *output_data)
+inline void AveragePool16(const PoolParams &params, const Shape &input_shape,
+ const uint8_t *input_data, const Shape &output_shape,
+ uint8_t *output_data)
{
+ // Here, and in other pooling ops, in order to maintain locality of reference,
+ // to minimize some recalculations, and to load into NEON vector registers, we
+ // use an inner loop down the depth. Since depths can be large and hence we
+ // would need arbitrarily large temporary storage, we divide the work up into
+ // depth tranches just within the batch loop.
+ static constexpr int kPoolingAccTrancheSize = 256;
+
assert(params.quantized_activation_min <= params.quantized_activation_max);
assert(input_shape.DimensionsCount() == 4);
assert(output_shape.DimensionsCount() == 4);
@@ -53,48 +116,274 @@ inline void AveragePool(const PoolParams &params, const Shape &input_shape,
const int output_width = output_shape.Dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
+
+ uint16_t acc[kPoolingAccTrancheSize];
for (int batch = 0; batch < batches; ++batch)
{
- for (int out_y = 0; out_y < output_height; ++out_y)
+ // We proceed through the depth in tranches (see comment above). The
+ // depth_base is the depth at the beginning of the tranche. The
+ // tranche_depth is the depth dimension of the tranche.
+ for (int depth_base = 0; depth_base < depth; depth_base += kPoolingAccTrancheSize)
{
- for (int out_x = 0; out_x < output_width; ++out_x)
+ const int tranche_depth = std::min(depth - depth_base, kPoolingAccTrancheSize);
+ for (int out_y = 0; out_y < output_height; ++out_y)
{
- const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
- const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
- // Compute the boundaries of the filter region clamped so as to
- // ensure that the filter window fits in the input array.
- const int filter_x_start = std::max(0, -in_x_origin);
- const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
- const int filter_y_start = std::max(0, -in_y_origin);
- const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
- int filter_count = (filter_y_end - filter_y_start) * (filter_x_end - filter_x_start);
- if (filter_count <= 0)
+ for (int out_x = 0; out_x < output_width; ++out_x)
{
- continue;
+ const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
+ const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
+ const int filter_x_start = std::max(0, -in_x_origin);
+ const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
+ const int filter_y_start = std::max(0, -in_y_origin);
+ const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
+ const int filter_count =
+ (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
+ memset(acc, 0, tranche_depth * sizeof(acc[0]));
+ const uint8_t *input_ptr =
+ input_data + depth_base +
+ depth * (in_x_origin + input_width * (in_y_origin + input_height * batch));
+ for (int fy = filter_y_start; fy < filter_y_end; fy++)
+ {
+ const uint8_t *input_row_ptr = input_ptr + depth * (fy * input_width + filter_x_start);
+ for (int fx = filter_x_start; fx < filter_x_end; fx++)
+ {
+ const uint8_t *input_channel_ptr = input_row_ptr;
+ int channel = 0;
+#ifdef USE_NEON
+ for (; channel <= tranche_depth - 16; channel += 16)
+ {
+ uint16x8_t acc_reg[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc_reg[i] = vld1q_u16(acc + channel + 8 * i);
+ }
+ uint8x16_t input_reg = vld1q_u8(input_channel_ptr);
+ input_channel_ptr += 16;
+ acc_reg[0] = vaddw_u8(acc_reg[0], vget_low_u8(input_reg));
+ acc_reg[1] = vaddw_u8(acc_reg[1], vget_high_u8(input_reg));
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_u16(acc + channel + 8 * i, acc_reg[i]);
+ }
+ }
+ for (; channel <= tranche_depth - 8; channel += 8)
+ {
+ uint16x8_t acc_reg = vld1q_u16(acc + channel);
+ uint8x8_t input_reg = vld1_u8(input_channel_ptr);
+ input_channel_ptr += 8;
+ acc_reg = vaddw_u8(acc_reg, input_reg);
+ vst1q_u16(acc + channel, acc_reg);
+ }
+#endif
+ for (; channel < tranche_depth; ++channel)
+ {
+ acc[channel] += *input_channel_ptr++;
+ }
+ input_row_ptr += depth;
+ }
+ }
+ uint8_t *output_ptr = output_data + Offset(output_shape, batch, out_y, out_x, depth_base);
+ int channel = 0;
+#ifdef USE_NEON
+#define AVGPOOL_DIVIDING_BY(FILTER_COUNT) \
+ if (filter_count == FILTER_COUNT) \
+ { \
+ for (; channel <= tranche_depth - 8; channel += 8) \
+ { \
+ uint16_t buf[8]; \
+ for (int i = 0; i < 8; i++) \
+ { \
+ buf[i] = (acc[channel + i] + FILTER_COUNT / 2) / FILTER_COUNT; \
+ } \
+ uint8x8_t buf8 = vqmovn_u16(vld1q_u16(buf)); \
+ buf8 = vmin_u8(buf8, vdup_n_u8(params.quantized_activation_max)); \
+ buf8 = vmax_u8(buf8, vdup_n_u8(params.quantized_activation_min)); \
+ vst1_u8(output_ptr + channel, buf8); \
+ } \
+ }
+ AVGPOOL_DIVIDING_BY(9)
+ AVGPOOL_DIVIDING_BY(15)
+#undef AVGPOOL_DIVIDING_BY
+ for (; channel <= tranche_depth - 8; channel += 8)
+ {
+ uint16_t buf[8];
+ for (int i = 0; i < 8; i++)
+ {
+ buf[i] = (acc[channel + i] + filter_count / 2) / filter_count;
+ }
+ uint8x8_t buf8 = vqmovn_u16(vld1q_u16(buf));
+ buf8 = vmin_u8(buf8, vdup_n_u8(params.quantized_activation_max));
+ buf8 = vmax_u8(buf8, vdup_n_u8(params.quantized_activation_min));
+ vst1_u8(output_ptr + channel, buf8);
+ }
+#endif
+ for (; channel < tranche_depth; ++channel)
+ {
+ uint8_t a = (acc[channel] + filter_count / 2) / filter_count;
+ a = std::max<uint16_t>(a, params.quantized_activation_min);
+ a = std::min<uint16_t>(a, params.quantized_activation_max);
+ output_ptr[channel] = static_cast<uint8_t>(a);
+ }
}
- for (int channel = 0; channel < depth; ++channel)
+ }
+ }
+ }
+}
+
+inline void AveragePool32(const PoolParams &params, const Shape &input_shape,
+ const uint8_t *input_data, const Shape &output_shape,
+ uint8_t *output_data)
+{
+
+ // Here, and in other pooling ops, in order to maintain locality of reference,
+ // to minimize some recalculations, and to load into NEON vector registers, we
+ // use an inner loop down the depth. Since depths can be large and hence we
+ // would need arbitrarily large temporary storage, we divide the work up into
+ // depth tranches just within the batch loop.
+ static constexpr int kPoolingAccTrancheSize = 256;
+
+ assert(params.quantized_activation_min <= params.quantized_activation_max);
+ assert(input_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int depth = MatchingDim(input_shape, 3, output_shape, 3);
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
+ const int stride_height = params.stride_height;
+ const int stride_width = params.stride_width;
+
+ uint32_t acc[kPoolingAccTrancheSize];
+ for (int batch = 0; batch < batches; ++batch)
+ {
+ // We proceed through the depth in tranches (see comment above). The
+ // depth_base is the depth at the beginning of the tranche. The
+ // tranche_depth is the depth dimension of the tranche.
+ for (int depth_base = 0; depth_base < depth; depth_base += kPoolingAccTrancheSize)
+ {
+ const int tranche_depth = std::min(depth - depth_base, kPoolingAccTrancheSize);
+ for (int out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int out_x = 0; out_x < output_width; ++out_x)
{
- int32_t acc = 0;
- for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+ const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
+ const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
+ const int filter_x_start = std::max(0, -in_x_origin);
+ const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
+ const int filter_y_start = std::max(0, -in_y_origin);
+ const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
+ const int filter_count =
+ (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
+ memset(acc, 0, tranche_depth * sizeof(acc[0]));
+ const uint8_t *input_ptr =
+ input_data + depth_base +
+ depth * (in_x_origin + input_width * (in_y_origin + input_height * batch));
+ for (int fy = filter_y_start; fy < filter_y_end; fy++)
+ {
+ const uint8_t *input_row_ptr = input_ptr + depth * (fy * input_width + filter_x_start);
+ for (int fx = filter_x_start; fx < filter_x_end; fx++)
+ {
+ const uint8_t *input_channel_ptr = input_row_ptr;
+ int channel = 0;
+#ifdef USE_NEON
+ for (; channel <= tranche_depth - 16; channel += 16)
+ {
+ uint16x4_t acc_reg[4];
+ uint8x16_t input_reg = vld1q_u8(input_channel_ptr);
+ input_channel_ptr += 16;
+ acc_reg[0] = vget_low_u16(vmovl_u8(vget_low_u8(input_reg)));
+ acc_reg[1] = vget_high_u16(vmovl_u8(vget_low_u8(input_reg)));
+ acc_reg[2] = vget_low_u16(vmovl_u8(vget_high_u8(input_reg)));
+ acc_reg[3] = vget_high_u16(vmovl_u8(vget_high_u8(input_reg)));
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_u32(acc + channel + 4 * i,
+ vaddw_u16(vld1q_u32(acc + channel + 4 * i), acc_reg[i]));
+ }
+ }
+ for (; channel <= tranche_depth - 8; channel += 8)
+ {
+ uint16x4_t acc_reg[2];
+ uint16x8_t input_reg = vmovl_u8(vld1_u8(input_channel_ptr));
+ input_channel_ptr += 8;
+ acc_reg[0] = vget_low_u16(input_reg);
+ acc_reg[1] = vget_high_u16(input_reg);
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_u32(acc + channel + 4 * i,
+ vaddw_u16(vld1q_u32(acc + channel + 4 * i), acc_reg[i]));
+ }
+ }
+#endif
+ for (; channel < tranche_depth; ++channel)
+ {
+ acc[channel] += *input_channel_ptr++;
+ }
+ input_row_ptr += depth;
+ }
+ }
+ uint8_t *output_ptr = output_data + Offset(output_shape, batch, out_y, out_x, depth_base);
+ int channel = 0;
+#ifdef USE_NEON
+#define AVGPOOL_DIVIDING_BY(FILTER_COUNT) \
+ if (filter_count == FILTER_COUNT) \
+ { \
+ for (; channel <= tranche_depth - 8; channel += 8) \
+ { \
+ uint16_t buf[8]; \
+ for (int i = 0; i < 8; i++) \
+ { \
+ buf[i] = (acc[channel + i] + FILTER_COUNT / 2) / FILTER_COUNT; \
+ } \
+ uint8x8_t buf8 = vqmovn_u16(vld1q_u16(buf)); \
+ buf8 = vmin_u8(buf8, vdup_n_u8(params.quantized_activation_max)); \
+ buf8 = vmax_u8(buf8, vdup_n_u8(params.quantized_activation_min)); \
+ vst1_u8(output_ptr + channel, buf8); \
+ } \
+ }
+ AVGPOOL_DIVIDING_BY(9)
+ AVGPOOL_DIVIDING_BY(15)
+#undef AVGPOOL_DIVIDING_BY
+ for (; channel <= tranche_depth - 8; channel += 8)
{
- for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
+ uint16_t buf[8];
+ for (int i = 0; i < 8; i++)
{
- const int in_x = in_x_origin + filter_x;
- const int in_y = in_y_origin + filter_y;
- acc += input_data[Offset(input_shape, batch, in_y, in_x, channel)];
+ buf[i] = (acc[channel + i] + filter_count / 2) / filter_count;
}
+ uint8x8_t buf8 = vqmovn_u16(vld1q_u16(buf));
+ buf8 = vmin_u8(buf8, vdup_n_u8(params.quantized_activation_max));
+ buf8 = vmax_u8(buf8, vdup_n_u8(params.quantized_activation_min));
+ vst1_u8(output_ptr + channel, buf8);
+ }
+#endif
+ for (; channel < tranche_depth; ++channel)
+ {
+ uint16_t a = (acc[channel] + filter_count / 2) / filter_count;
+ a = std::max<uint16_t>(a, params.quantized_activation_min);
+ a = std::min<uint16_t>(a, params.quantized_activation_max);
+ output_ptr[channel] = static_cast<uint8_t>(a);
}
- acc = (acc + filter_count / 2) / filter_count;
- acc = std::max(acc, params.quantized_activation_min);
- acc = std::min(acc, params.quantized_activation_max);
- output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
- static_cast<uint8_t>(acc);
}
}
}
}
}
+inline void AveragePool(const PoolParams &params, const Shape &input_shape,
+ const uint8_t *input_data, const Shape &output_shape, uint8_t *output_data)
+{
+ if (params.filter_height * params.filter_width > 16 * 16)
+ {
+ AveragePool32(params, input_shape, input_data, output_shape, output_data);
+ }
+ else
+ {
+ AveragePool16(params, input_shape, input_data, output_shape, output_data);
+ }
+}
+
} // namespace cker
} // namespace nnfw
diff --git a/compute/cker/include/cker/operation/BinaryArithmeticOps.h b/compute/cker/include/cker/operation/BinaryArithmeticOps.h
index 60dd02651..27b3fa49a 100644
--- a/compute/cker/include/cker/operation/BinaryArithmeticOps.h
+++ b/compute/cker/include/cker/operation/BinaryArithmeticOps.h
@@ -19,6 +19,8 @@
#define __NNFW_CKER_BINARY_ARITHMETIC_OPS_H__
#include <functional>
+#include "cker/operation/optimized/BinaryArithmeticOps.h"
+#include "cker/operation/reference/BinaryArithmeticOps.h"
#include "cker/Shape.h"
#include "cker/Types.h"
#include "cker/Utils.h"
@@ -28,69 +30,82 @@ namespace nnfw
namespace cker
{
-struct BinaryArithmeticOpParam
+namespace
{
- // Shape dependent / common to data / op types.
- // BroadcastableOpCategory broadcast_category;
- // uint8 inference params.
- int32_t input1_offset;
- int32_t input2_offset;
- int32_t output_offset;
- int32_t output_multiplier;
- int32_t output_shift;
- // Add / Sub, not Mul, uint8 inference params.
- int32_t left_shift;
- int32_t input1_multiplier;
- int32_t input1_shift;
- int32_t input2_multiplier;
- int32_t input2_shift;
- // uint8, etc, activation params.
- int32_t quantized_activation_min;
- int32_t quantized_activation_max;
- // float activation params.
- float float_activation_min;
- float float_activation_max;
-
- // Processed output dimensions.
- // Let input "a" be the one that broadcasts in the faster-changing dimension.
- // Then, after coalescing, for shapes {a0, a1, a2, a3, a4} and
- // {b0, b1, b2, b3, b4},
- // broadcast_shape[4] = b0 = a0.
- // broadcast_shape[3] = b1; a1 = 1.
- // broadcast_shape[2] = b2 = a2.
- // broadcast_shape[1] = a3; b3 = 1.
- // broadcast_shape[0] = b4 = a4.
- // int broadcast_shape[5];
-};
+template <typename T>
+const std::function<T(const T &, const T &)> GetBinaryArtithmeticFn(BinaryArithmeticOpType type)
+{
+ switch (type)
+ {
+ case BinaryArithmeticOpType::ADD:
+ {
+ return [](const T &a, const T &b) -> T { return a + b; };
+ }
+ case BinaryArithmeticOpType::MUL:
+ {
+ return [](const T &a, const T &b) -> T { return a * b; };
+ }
+ case BinaryArithmeticOpType::SUB:
+ {
+ return [](const T &a, const T &b) -> T { return a - b; };
+ }
+ case BinaryArithmeticOpType::DIV:
+ {
+ return [](const T &a, const T &b) -> T {
+ if (b == 0)
+ {
+ throw std::runtime_error("Divide by zero");
+ }
+ return a / b;
+ };
+ }
+ default:
+ {
+ assert(false);
+ return nullptr;
+ }
+ }
+}
+} // namespace
template <typename T>
inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
const T *input1_data, const Shape &input2_shape,
- const T *input2_data, const Shape &output_shape, T *output_data,
- const std::function<T(const T &, const T &)> &fn)
+ const T *input2_data, const Shape &output_shape, T *output_data)
{
- const int32_t flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
- for (int i = 0; i < flat_size; ++i)
- {
- output_data[i] = ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
- params.quantized_activation_min,
- params.quantized_activation_max);
- }
+ reference::BinaryArithmeticOp(params, input1_shape, input1_data, input2_shape, input2_data,
+ output_shape, output_data, GetBinaryArtithmeticFn<T>(params.type));
}
template <>
inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
const float *input1_data, const Shape &input2_shape,
const float *input2_data, const Shape &output_shape,
- float *output_data,
- const std::function<float(const float &, const float &)> &fn)
+ float *output_data)
{
- const int size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
- for (int i = 0; i < size; i++)
+ // Supported type is only float now
+ switch (params.type)
{
- output_data[i] =
- ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
- params.float_activation_min, params.float_activation_max);
+ case nnfw::cker::BinaryArithmeticOpType::ADD:
+ optimized::Add(params, input1_shape, input1_data, input2_shape, input2_data, output_shape,
+ output_data);
+ break;
+ case nnfw::cker::BinaryArithmeticOpType::MUL:
+ optimized::Mul(params, input1_shape, input1_data, input2_shape, input2_data, output_shape,
+ output_data);
+ break;
+ case nnfw::cker::BinaryArithmeticOpType::SUB:
+ optimized::Sub(params, input1_shape, input1_data, input2_shape, input2_data, output_shape,
+ output_data);
+ break;
+ case nnfw::cker::BinaryArithmeticOpType::DIV:
+ reference::BinaryArithmeticOp(params, input1_shape, input1_data, input2_shape, input2_data,
+ output_shape, output_data,
+ GetBinaryArtithmeticFn<float>(params.type));
+ break;
+ default:
+ assert(false);
+ break;
}
}
@@ -98,14 +113,15 @@ template <typename T>
inline void BroadcastBinaryArithmeticOpSlow(const BinaryArithmeticOpParam &params,
const Shape &input1_shape, const T *input1_data,
const Shape &input2_shape, const T *input2_data,
- const Shape &output_shape, T *output_data,
- const std::function<T(const T &, const T &)> &fn)
+ const Shape &output_shape, T *output_data)
{
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
+ const auto fn = GetBinaryArtithmeticFn<T>(params.type);
+
// Comment from tensorflow lite:
//
// In Tensorflow, the dimensions are canonically named (batch_number, row,
@@ -138,16 +154,18 @@ inline void BroadcastBinaryArithmeticOpSlow(const BinaryArithmeticOpParam &param
}
template <>
-inline void BroadcastBinaryArithmeticOpSlow(
- const BinaryArithmeticOpParam &params, const Shape &input1_shape, const float *input1_data,
- const Shape &input2_shape, const float *input2_data, const Shape &output_shape,
- float *output_data, const std::function<float(const float &, const float &)> &fn)
+inline void BroadcastBinaryArithmeticOpSlow(const BinaryArithmeticOpParam &params,
+ const Shape &input1_shape, const float *input1_data,
+ const Shape &input2_shape, const float *input2_data,
+ const Shape &output_shape, float *output_data)
{
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
+ const auto fn = GetBinaryArtithmeticFn<float>(params.type);
+
for (int b = 0; b < extended_output_shape.Dims(0); ++b)
{
for (int y = 0; y < extended_output_shape.Dims(1); ++y)
diff --git a/compute/cker/include/cker/operation/Common.h b/compute/cker/include/cker/operation/Common.h
new file mode 100644
index 000000000..d69b38aca
--- /dev/null
+++ b/compute/cker/include/cker/operation/Common.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_COMMON_H__
+#define __NNFW_CKER_COMMON_H__
+
+#include "cker/neon/neon_check.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size, const float *bias_data,
+ int array_size, float *array_data)
+{
+ // Note: see b/132215220: in May 2019 we thought it would be OK to replace
+ // this with the Eigen one-liner:
+ // return (array.colwise() + bias).cwiseMin(clamp_max).cwiseMin(clamp_max).
+ // This turned out to severely regress performance: +4ms (i.e. 8%) on
+ // MobileNet v2 / 1.0 / 224. So we keep custom NEON code for now.
+ assert((array_size % bias_size) == 0);
+#ifdef USE_NEON
+ float *array_ptr = array_data;
+ float *array_end_ptr = array_ptr + array_size;
+ const auto clamp_min_vec = vdupq_n_f32(clamp_min);
+ const auto clamp_max_vec = vdupq_n_f32(clamp_max);
+ for (; array_ptr != array_end_ptr; array_ptr += bias_size)
+ {
+ int i = 0;
+ for (; i <= bias_size - 16; i += 16)
+ {
+ auto b0 = vld1q_f32(bias_data + i);
+ auto b1 = vld1q_f32(bias_data + i + 4);
+ auto b2 = vld1q_f32(bias_data + i + 8);
+ auto b3 = vld1q_f32(bias_data + i + 12);
+ auto a0 = vld1q_f32(array_ptr + i);
+ auto a1 = vld1q_f32(array_ptr + i + 4);
+ auto a2 = vld1q_f32(array_ptr + i + 8);
+ auto a3 = vld1q_f32(array_ptr + i + 12);
+ auto x0 = vaddq_f32(a0, b0);
+ auto x1 = vaddq_f32(a1, b1);
+ auto x2 = vaddq_f32(a2, b2);
+ auto x3 = vaddq_f32(a3, b3);
+ x0 = vmaxq_f32(clamp_min_vec, x0);
+ x1 = vmaxq_f32(clamp_min_vec, x1);
+ x2 = vmaxq_f32(clamp_min_vec, x2);
+ x3 = vmaxq_f32(clamp_min_vec, x3);
+ x0 = vminq_f32(clamp_max_vec, x0);
+ x1 = vminq_f32(clamp_max_vec, x1);
+ x2 = vminq_f32(clamp_max_vec, x2);
+ x3 = vminq_f32(clamp_max_vec, x3);
+ vst1q_f32(array_ptr + i, x0);
+ vst1q_f32(array_ptr + i + 4, x1);
+ vst1q_f32(array_ptr + i + 8, x2);
+ vst1q_f32(array_ptr + i + 12, x3);
+ }
+ for (; i <= bias_size - 4; i += 4)
+ {
+ auto b = vld1q_f32(bias_data + i);
+ auto a = vld1q_f32(array_ptr + i);
+ auto x = vaddq_f32(a, b);
+ x = vmaxq_f32(clamp_min_vec, x);
+ x = vminq_f32(clamp_max_vec, x);
+ vst1q_f32(array_ptr + i, x);
+ }
+ for (; i < bias_size; i++)
+ {
+ array_ptr[i] =
+ ActivationFunctionWithMinMax(array_ptr[i] + bias_data[i], clamp_min, clamp_max);
+ }
+ }
+#else // not NEON
+ for (int array_offset = 0; array_offset < array_size; array_offset += bias_size)
+ {
+ for (int i = 0; i < bias_size; i++)
+ {
+ array_data[array_offset + i] = ActivationFunctionWithMinMax(
+ array_data[array_offset + i] + bias_data[i], clamp_min, clamp_max);
+ }
+ }
+#endif
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_COMMON_H__
diff --git a/compute/cker/include/cker/operation/Comparison.h b/compute/cker/include/cker/operation/Comparison.h
new file mode 100644
index 000000000..4516358de
--- /dev/null
+++ b/compute/cker/include/cker/operation/Comparison.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_COMPARISON_H__
+#define __NNFW_CKER_COMPARISON_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+template <typename T> inline bool EqualFn(T lhs, T rhs) { return lhs == rhs; }
+template <typename T> inline bool NotEqualFn(T lhs, T rhs) { return lhs != rhs; }
+template <typename T> inline bool GreaterFn(T lhs, T rhs) { return lhs > rhs; }
+template <typename T> inline bool GreaterEqualFn(T lhs, T rhs) { return lhs >= rhs; }
+template <typename T> inline bool LessFn(T lhs, T rhs) { return lhs < rhs; }
+template <typename T> inline bool LessEqualFn(T lhs, T rhs) { return lhs <= rhs; }
+
+template <typename T> using ComparisonFn = bool (*)(T, T);
+
+template <typename T, ComparisonFn<T> F>
+inline void ComparisonImpl(const Shape &input1_shape, const T *input1_data,
+ const Shape &input2_shape, const T *input2_data,
+ const Shape &output_shape, bool *output_data)
+{
+ const int64_t flatsize = // number of data....
+ MatchingFlatSize(input1_shape, input2_shape, output_shape);
+ for (int64_t i = 0; i < flatsize; ++i)
+ {
+ output_data[i] = F(input1_data[i], input2_data[i]);
+ }
+}
+
+template <ComparisonFn<float> F>
+inline void Comparison(const Shape &input1_shape, const float *input1_data,
+ const Shape &input2_shape, const float *input2_data,
+ const Shape &output_shape, bool *output_data)
+{
+ ComparisonImpl<float, F>(input1_shape, input1_data, input2_shape, input2_data, output_shape,
+ output_data);
+}
+
+template <typename T, ComparisonFn<T> F>
+inline void
+BroadcastComparison4DSlowImpl(const Shape &unextended_input1_shape, const T *input1_data,
+ const Shape &unextended_input2_shape, const T *input2_data,
+ const Shape &unextended_output_shape, bool *output_data)
+{
+ assert(unextended_input1_shape.DimensionsCount() <= 4);
+ assert(unextended_input2_shape.DimensionsCount() <= 4);
+ assert(unextended_output_shape.DimensionsCount() <= 4);
+ const Shape output_shape = Shape::ExtendedShape(4, unextended_output_shape);
+
+ NdArrayDesc<4> desc1;
+ NdArrayDesc<4> desc2;
+ NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1,
+ &desc2);
+
+ for (int b = 0; b < output_shape.Dims(0); ++b)
+ {
+ for (int y = 0; y < output_shape.Dims(1); ++y)
+ {
+ for (int x = 0; x < output_shape.Dims(2); ++x)
+ {
+ for (int c = 0; c < output_shape.Dims(3); ++c)
+ {
+ output_data[Offset(output_shape, b, y, x, c)] =
+ F(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
+ input2_data[SubscriptToIndex(desc2, b, y, x, c)]);
+ }
+ }
+ }
+ }
+}
+
+template <typename T, ComparisonFn<T> F>
+inline void BroadcastComparison4DSlow(const Shape &input1_shape, const T *input1_data,
+ const Shape &input2_shape, const T *input2_data,
+ const Shape &output_shape, bool *output_data)
+{
+ BroadcastComparison4DSlowImpl<T, F>(input1_shape, input1_data, input2_shape, input2_data,
+ output_shape, output_data);
+}
+
+#define TFLITE_COMPARISON_OP(name) \
+ inline void name(const Shape &input1_shape, const float *input1_data, const Shape &input2_shape, \
+ const float *input2_data, const Shape &output_shape, bool *output_data) \
+ { \
+ Comparison<name##Fn>(input1_shape, input1_data, input2_shape, input2_data, output_shape, \
+ output_data); \
+ } \
+ template <typename T> \
+ inline void name##NoScaling(const Shape &input1_shape, const T *input1_data, \
+ const Shape &input2_shape, const T *input2_data, \
+ const Shape &output_shape, bool *output_data) \
+ { \
+ ComparisonImpl<T, name##Fn>(input1_shape, input1_data, input2_shape, input2_data, \
+ output_shape, output_data); \
+ } \
+ template <typename T> \
+ inline void Broadcast4DSlow##name##NoScaling(const Shape &input1_shape, const T *input1_data, \
+ const Shape &input2_shape, const T *input2_data, \
+ const Shape &output_shape, bool *output_data) \
+ { \
+ BroadcastComparison4DSlowImpl<T, name##Fn>(input1_shape, input1_data, input2_shape, \
+ input2_data, output_shape, output_data); \
+ } \
+ template <typename T> \
+ inline void Broadcast4DSlow##name(const Shape &input1_shape, const T *input1_data, \
+ const Shape &input2_shape, const T *input2_data, \
+ const Shape &output_shape, bool *output_data) \
+ { \
+ BroadcastComparison4DSlow<T, name##Fn>(input1_shape, input1_data, input2_shape, input2_data, \
+ output_shape, output_data); \
+ }
+
+TFLITE_COMPARISON_OP(Equal);
+TFLITE_COMPARISON_OP(NotEqual);
+TFLITE_COMPARISON_OP(Greater);
+TFLITE_COMPARISON_OP(GreaterEqual);
+TFLITE_COMPARISON_OP(Less);
+TFLITE_COMPARISON_OP(LessEqual);
+#undef TFLITE_COMPARISON_OP
+
+} // namespace cker
+} // namespace nnfw
+
+#endif
diff --git a/compute/cker/include/cker/operation/Concatenation.h b/compute/cker/include/cker/operation/Concatenation.h
index 69a179c8c..394123e30 100644
--- a/compute/cker/include/cker/operation/Concatenation.h
+++ b/compute/cker/include/cker/operation/Concatenation.h
@@ -18,25 +18,17 @@
#ifndef __NNFW_CKER_CONCATENATION_H__
#define __NNFW_CKER_CONCATENATION_H__
-#include <cstdint>
-
#include "cker/Shape.h"
+#include "cker/Types.h"
+
+#include <cstdint>
+#include <cmath>
namespace nnfw
{
namespace cker
{
-struct ConcatenationParams
-{
- int8_t axis;
- const int32_t *input_zeropoint;
- const float *input_scale;
- uint16_t inputs_count;
- int32_t output_zeropoint;
- float output_scale;
-};
-
template <typename Scalar>
inline void Concatenation(const ConcatenationParams &params, const Shape *const *input_shapes,
const Scalar *const *input_data, const Shape &output_shape,
@@ -87,6 +79,78 @@ inline void Concatenation(const ConcatenationParams &params, const Shape *const
}
}
+// quantized as it takes scale as a floating point value. This should be fixed
+// when optimizng this routine further.
+inline void ConcatenationWithScaling(const ConcatenationParams &params,
+ const Shape *const *input_shapes,
+ const uint8_t *const *input_data, const Shape &output_shape,
+ uint8_t *output_data)
+{
+ int axis = params.axis;
+ const int32_t *input_zeropoint = params.input_zeropoint;
+ const float *input_scale = params.input_scale;
+ int inputs_count = params.inputs_count;
+ const int32_t output_zeropoint = params.output_zeropoint;
+ const float output_scale = params.output_scale;
+
+ const int concat_dimensions = output_shape.DimensionsCount();
+ assert(axis <= concat_dimensions);
+
+ int64_t concat_size = 0;
+ for (int i = 0; i < inputs_count; i++)
+ {
+ assert(input_shapes[i]->DimensionsCount() == concat_dimensions);
+ for (int j = 0; j < concat_dimensions; j++)
+ {
+ if (j != axis)
+ {
+ assert(input_shapes[i]->Dims(j) == output_shape.Dims(j));
+ }
+ }
+ concat_size += input_shapes[i]->Dims(axis);
+ }
+ assert(concat_size == output_shape.Dims(axis));
+ int64_t outer_size = 1;
+ for (int i = 0; i < axis; ++i)
+ {
+ outer_size *= output_shape.Dims(i);
+ }
+ // For all input arrays,
+ // FlatSize() = outer_size * Dims(axis) * base_inner_size;
+ int64_t base_inner_size = 1;
+ for (int i = axis + 1; i < concat_dimensions; ++i)
+ {
+ base_inner_size *= output_shape.Dims(i);
+ }
+
+ const float inverse_output_scale = 1.f / output_scale;
+ uint8_t *output_ptr = output_data;
+ for (int k = 0; k < outer_size; k++)
+ {
+ for (int i = 0; i < inputs_count; ++i)
+ {
+ const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
+ const uint8_t *input_ptr = input_data[i] + k * copy_size;
+ if (input_zeropoint[i] == output_zeropoint && input_scale[i] == output_scale)
+ {
+ memcpy(output_ptr, input_ptr, copy_size);
+ }
+ else
+ {
+ const float scale = input_scale[i] * inverse_output_scale;
+ const float bias = -input_zeropoint[i] * scale;
+ for (int j = 0; j < copy_size; ++j)
+ {
+ const int32_t value =
+ static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
+ output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
+ }
+ }
+ output_ptr += copy_size;
+ }
+ }
+}
+
} // namespace cker
} // namespace nnfw
diff --git a/compute/cker/include/cker/operation/Conv.h b/compute/cker/include/cker/operation/Conv.h
index 35b0336fa..191cdb35e 100644
--- a/compute/cker/include/cker/operation/Conv.h
+++ b/compute/cker/include/cker/operation/Conv.h
@@ -21,196 +21,129 @@
#include "cker/Types.h"
#include "cker/Shape.h"
#include "cker/Utils.h"
+#include "cker/operation/reference/Conv.h"
+#include "cker/operation/optimized/Conv.h"
+#include <vector>
namespace nnfw
{
namespace cker
{
-struct ConvParams
+namespace
{
- PaddingType padding_type;
- PaddingValues padding_values;
- // TODO(starka): This was just "stride", so check that width+height is OK.
- int16_t stride_width;
- int16_t stride_height;
- int16_t dilation_width_factor;
- int16_t dilation_height_factor;
- // uint8_t inference params.
- // TODO(b/65838351): Use smaller types if appropriate.
- int32_t input_offset;
- int32_t weights_offset;
- int32_t output_offset;
- int32_t output_multiplier;
- int output_shift;
- // uint8_t, etc, activation params.
- int32_t quantized_activation_min;
- int32_t quantized_activation_max;
- // float activation params.
- float float_activation_min;
- float float_activation_max;
-};
-
-inline void Conv(const ConvParams &params, const Shape &input_shape, const float *input_data,
- const Shape &filter_shape, const float *filter_data, const Shape &bias_shape,
- const float *bias_data, const Shape &output_shape, float *output_data)
+// Naive implementation of transpose for floats. Could be optimized to be more
+// cache friendly, but for now it's a one-time cost on first run, and we would
+// prefer to remove the need to do this at all eventually.
+inline void TransposeFloatTensor(const float *input_data, const nnfw::cker::Shape &output_shape,
+ float *output_data)
{
- const int stride_width = params.stride_width;
- const int stride_height = params.stride_height;
- const int dilation_width_factor = params.dilation_width_factor;
- const int dilation_height_factor = params.dilation_height_factor;
- const int pad_width = params.padding_values.width;
- const int pad_height = params.padding_values.height;
- const float output_activation_min = params.float_activation_min;
- const float output_activation_max = params.float_activation_max;
- assert(input_shape.DimensionsCount() == 4);
- assert(filter_shape.DimensionsCount() == 4);
- assert(output_shape.DimensionsCount() == 4);
- UNUSED_RELEASE(bias_shape);
+ const int rows = output_shape.Dims(1);
+ const int cols = output_shape.Dims(0);
+ for (int i = 0; i < rows; ++i)
+ {
+ for (int j = 0; j < cols; ++j)
+ {
+ const float in_value = input_data[i * cols + j];
+ output_data[j * rows + i] = in_value;
+ }
+ }
+}
+} // namespace
- const int batches = MatchingDim(input_shape, 0, output_shape, 0);
- const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
- const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
- if (bias_data)
+class Conv
+{
+public:
+ Conv()
+ : _modified_filter_data(), _im2col_data(), _im2col_shape(4), _need_im2col(false),
+ _prepared(false)
{
- assert(bias_shape.FlatSize() == output_depth);
}
- const int input_height = input_shape.Dims(1);
- const int input_width = input_shape.Dims(2);
- const int filter_height = filter_shape.Dims(1);
- const int filter_width = filter_shape.Dims(2);
- const int output_height = output_shape.Dims(1);
- const int output_width = output_shape.Dims(2);
- for (int batch = 0; batch < batches; ++batch)
+
+ void prepare(const Shape &filter_shape, const float *filter_data, PaddingType padding_type,
+ bool &is_replaced_weights)
{
- for (int out_y = 0; out_y < output_height; ++out_y)
+ (void)filter_shape;
+ (void)filter_data;
+ (void)padding_type;
+ (void)is_replaced_weights;
+ if (!_prepared)
{
- for (int out_x = 0; out_x < output_width; ++out_x)
+ if (padding_type != PaddingType::kNone && std::thread::hardware_concurrency() > 1)
{
- for (int out_channel = 0; out_channel < output_depth; ++out_channel)
- {
- const int in_x_origin = (out_x * stride_width) - pad_width;
- const int in_y_origin = (out_y * stride_height) - pad_height;
- float total = 0.f;
- for (int filter_y = 0; filter_y < filter_height; ++filter_y)
- {
- for (int filter_x = 0; filter_x < filter_width; ++filter_x)
- {
- const int in_x = in_x_origin + dilation_width_factor * filter_x;
- const int in_y = in_y_origin + dilation_height_factor * filter_y;
- // If the location is outside the bounds of the input image,
- // use zero as a default value.
- if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
- {
- const int in_offset = Offset(input_shape, batch, in_y, in_x, 0);
- const int filter_offset = Offset(filter_shape, out_channel, filter_y, filter_x, 0);
- for (int in_channel = 0; in_channel < input_depth; ++in_channel)
- {
- float input_value = input_data[in_offset + in_channel];
- float filter_value = filter_data[filter_offset + in_channel];
- total += (input_value * filter_value);
- }
- }
- }
- }
- float bias_value = 0.0f;
- if (bias_data)
- {
- bias_value = bias_data[out_channel];
- }
- output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
- ActivationFunctionWithMinMax(total + bias_value, output_activation_min,
- output_activation_max);
- }
+ const auto output_depth = filter_shape.Dims(0);
+ const Shape hwcn_filter_shape{filter_shape.FlatSize() / output_depth, output_depth};
+ _modified_filter_data.resize(hwcn_filter_shape.FlatSize());
+ TransposeFloatTensor(filter_data, hwcn_filter_shape, &_modified_filter_data[0]);
+ is_replaced_weights = true;
}
+ _prepared = true;
}
}
-}
-inline void Conv(const ConvParams &params, const Shape &input_shape, const uint8_t *input_data,
- const Shape &filter_shape, const uint8_t *filter_data, const Shape &bias_shape,
- const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
-{
- const int stride_width = params.stride_width;
- const int stride_height = params.stride_height;
- const int dilation_width_factor = params.dilation_width_factor;
- const int dilation_height_factor = params.dilation_height_factor;
- const int pad_width = params.padding_values.width;
- const int pad_height = params.padding_values.height;
- const int32_t input_offset = params.input_offset;
- const int32_t filter_offset = params.weights_offset;
- const int32_t output_offset = params.output_offset;
- const int32_t output_multiplier = params.output_multiplier;
- const int output_shift = params.output_shift;
- const int32_t output_activation_min = params.quantized_activation_min;
- const int32_t output_activation_max = params.quantized_activation_max;
- assert(output_activation_min <= output_activation_max);
-
- assert(input_shape.DimensionsCount() == 4);
- assert(filter_shape.DimensionsCount() == 4);
- assert(output_shape.DimensionsCount() == 4);
- UNUSED_RELEASE(bias_shape);
- const int batches = MatchingDim(input_shape, 0, output_shape, 0);
- const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
- const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
- if (bias_data)
+ void prepareQuant(const Shape &input_shape, const Shape &kernel_shape, const Shape &output_shape,
+ uint32_t stride_width, uint32_t stride_height)
{
- assert(bias_shape.FlatSize() == output_depth);
+ _need_im2col = stride_width != 1 || stride_height != 1 || kernel_shape.Dims(1) != 1 ||
+ kernel_shape.Dims(2) != 1;
+ if (!_prepared && _need_im2col)
+ {
+ _im2col_shape.SetDim(0, output_shape.Dims(0));
+ _im2col_shape.SetDim(1, output_shape.Dims(1));
+ _im2col_shape.SetDim(2, output_shape.Dims(2));
+ _im2col_shape.SetDim(3, input_shape.Dims(3) * kernel_shape.Dims(1) * kernel_shape.Dims(2));
+ _im2col_data.resize(_im2col_shape.FlatSize());
+ }
+ _prepared = true;
}
- const int input_height = input_shape.Dims(1);
- const int input_width = input_shape.Dims(2);
- const int filter_height = filter_shape.Dims(1);
- const int filter_width = filter_shape.Dims(2);
- const int output_height = output_shape.Dims(1);
- const int output_width = output_shape.Dims(2);
- for (int batch = 0; batch < batches; ++batch)
+
+ void operator()(const ConvParams &params, const Shape &input_shape, const float *input_data,
+ const Shape &filter_shape, const float *filter_data, const Shape &bias_shape,
+ const float *bias_data, const Shape &output_shape, float *output_data)
{
- for (int out_y = 0; out_y < output_height; ++out_y)
+ if (params.padding_type != PaddingType::kNone && std::thread::hardware_concurrency() > 1)
{
- for (int out_x = 0; out_x < output_width; ++out_x)
+ if (!_prepared)
{
- for (int out_channel = 0; out_channel < output_depth; ++out_channel)
- {
- const int in_x_origin = (out_x * stride_width) - pad_width;
- const int in_y_origin = (out_y * stride_height) - pad_height;
- int32_t acc = 0;
- for (int filter_y = 0; filter_y < filter_height; ++filter_y)
- {
- for (int filter_x = 0; filter_x < filter_width; ++filter_x)
- {
- const int in_x = in_x_origin + dilation_width_factor * filter_x;
- const int in_y = in_y_origin + dilation_height_factor * filter_y;
- // If the location is outside the bounds of the input image,
- // use zero as a default value.
- const int in_base = Offset(input_shape, batch, in_y, in_x, 0);
- const int filter_base = Offset(filter_shape, out_channel, filter_y, filter_x, 0);
- if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
- {
- for (int in_channel = 0; in_channel < input_depth; in_channel++)
- {
- int32_t input_val = input_data[in_channel + in_base];
- int32_t filter_val = filter_data[in_channel + filter_base];
- acc += (filter_val + filter_offset) * (input_val + input_offset);
- }
- }
- }
- }
- if (bias_data)
- {
- acc += bias_data[out_channel];
- }
- acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
- acc += output_offset;
- acc = std::max(acc, output_activation_min);
- acc = std::min(acc, output_activation_max);
- output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
- static_cast<uint8_t>(acc);
- }
+ bool not_used_condition = false;
+ prepare(filter_shape, filter_data, params.padding_type, not_used_condition);
+ _prepared = true;
}
+ multithreaded::Conv(params, input_shape, input_data, filter_shape, &_modified_filter_data[0],
+ bias_shape, bias_data, output_shape, output_data);
+ }
+ else
+ {
+ // TODO Support optimized kernel
+ reference::Conv(params, input_shape, input_data, filter_shape, filter_data, bias_shape,
+ bias_data, output_shape, output_data);
}
}
-}
+ void operator()(const ConvParams &params, const Shape &input_shape, const uint8_t *input_data,
+ const Shape &filter_shape, const uint8_t *filter_data, const Shape &bias_shape,
+ const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
+ {
+ if (_prepared)
+ {
+ uint8_t *im2col_raw_data = _im2col_data.data();
+ optimized::Conv(params, input_shape, input_data, filter_shape, filter_data, bias_shape,
+ bias_data, output_shape, output_data, _im2col_shape, im2col_raw_data);
+ }
+ else
+ {
+ reference::Conv(params, input_shape, input_data, filter_shape, filter_data, bias_shape,
+ bias_data, output_shape, output_data);
+ }
+ }
+
+private:
+ std::vector<float> _modified_filter_data;
+ std::vector<uint8_t> _im2col_data;
+ Shape _im2col_shape;
+ bool _need_im2col;
+ bool _prepared;
+};
} // namespace cker
} // namespace nnfw
diff --git a/compute/cker/include/cker/operation/DepthwiseConv.h b/compute/cker/include/cker/operation/DepthwiseConv.h
index 7d022477d..814a9e019 100644
--- a/compute/cker/include/cker/operation/DepthwiseConv.h
+++ b/compute/cker/include/cker/operation/DepthwiseConv.h
@@ -21,118 +21,68 @@
#include "cker/Shape.h"
#include "cker/Types.h"
#include "cker/Utils.h"
+#include "cker/neon/neon_check.h"
+#include "cker/operation/optimized/DepthwiseConvUint8.h"
namespace nnfw
{
namespace cker
{
-struct DepthwiseConvParams
-{
- PaddingType padding_type;
- PaddingValues padding_values;
- int16_t stride_width;
- int16_t stride_height;
- int16_t dilation_width_factor;
- int16_t dilation_height_factor;
- int16_t depth_multiplier;
- // uint8 inference params.
- // TODO(b/65838351): Use smaller types if appropriate.
- int32_t input_offset;
- int32_t weights_offset;
- int32_t output_offset;
- int32_t output_multiplier;
- int output_shift;
- // uint8, etc, activation params.
- int32_t quantized_activation_min;
- int32_t quantized_activation_max;
- // float activation params.
- float float_activation_min;
- float float_activation_max;
-};
-
inline void DepthwiseConv(const DepthwiseConvParams &params, const Shape &input_shape,
const uint8_t *input_data, const Shape &filter_shape,
const uint8_t *filter_data, const Shape &bias_shape,
const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
{
- const int stride_width = params.stride_width;
- const int stride_height = params.stride_height;
- const int dilation_width_factor = params.dilation_width_factor;
- const int dilation_height_factor = params.dilation_height_factor;
- const int pad_width = params.padding_values.width;
- const int pad_height = params.padding_values.height;
const int depth_multiplier = params.depth_multiplier;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
- const int32_t input_offset = params.input_offset;
- const int32_t filter_offset = params.weights_offset;
- const int32_t output_offset = params.output_offset;
- const int32_t output_multiplier = params.output_multiplier;
- const int output_shift = params.output_shift;
+ const int dilation_width_factor = params.dilation_width_factor;
+ const int dilation_height_factor = params.dilation_height_factor;
+ assert(dilation_width_factor >= 1);
+ assert(dilation_height_factor >= 1);
+ UNUSED_RELEASE(dilation_width_factor);
+ UNUSED_RELEASE(dilation_height_factor);
assert(input_shape.DimensionsCount() == 4);
assert(filter_shape.DimensionsCount() == 4);
assert(output_shape.DimensionsCount() == 4);
-
assert(output_activation_min <= output_activation_max);
- const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ UNUSED_RELEASE(output_activation_min);
+ UNUSED_RELEASE(output_activation_max);
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
- const int input_height = input_shape.Dims(1);
- const int input_width = input_shape.Dims(2);
const int input_depth = input_shape.Dims(3);
- const int filter_height = filter_shape.Dims(1);
- const int filter_width = filter_shape.Dims(2);
- const int output_height = output_shape.Dims(1);
- const int output_width = output_shape.Dims(2);
assert(output_depth == input_depth * depth_multiplier);
assert(bias_shape.FlatSize() == output_depth);
+ UNUSED_RELEASE(input_depth);
UNUSED_RELEASE(output_depth);
- UNUSED_RELEASE(bias_shape);
+ UNUSED_RELEASE(depth_multiplier);
- for (int b = 0; b < batches; ++b)
- {
- for (int out_y = 0; out_y < output_height; ++out_y)
- {
- for (int out_x = 0; out_x < output_width; ++out_x)
- {
- for (int ic = 0; ic < input_depth; ++ic)
- {
- for (int m = 0; m < depth_multiplier; m++)
- {
- const int oc = m + ic * depth_multiplier;
- const int in_x_origin = (out_x * stride_width) - pad_width;
- const int in_y_origin = (out_y * stride_height) - pad_height;
- int32_t acc = 0;
- for (int filter_y = 0; filter_y < filter_height; ++filter_y)
- {
- for (int filter_x = 0; filter_x < filter_width; ++filter_x)
- {
- const int in_x = in_x_origin + dilation_width_factor * filter_x;
- const int in_y = in_y_origin + dilation_height_factor * filter_y;
- // If the location is outside the bounds of the input image,
- // use zero as a default value.
- if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
- {
- int32_t input_val = input_data[Offset(input_shape, b, in_y, in_x, ic)];
- int32_t filter_val = filter_data[Offset(filter_shape, 0, filter_y, filter_x, oc)];
- acc += (filter_val + filter_offset) * (input_val + input_offset);
- }
- }
- }
- if (bias_data)
- {
- acc += bias_data[oc];
- }
- acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
- acc += output_offset;
- acc = std::max(acc, output_activation_min);
- acc = std::min(acc, output_activation_max);
- output_data[Offset(output_shape, b, out_y, out_x, oc)] = static_cast<uint8_t>(acc);
- }
- }
- }
- }
- }
+// Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on
+// Jetson TX-2. This compiler does not support the offsetof() macro.
+#if defined(__aarch64__)
+// TODO Use below codes
+
+// const int stride_width = params.stride_width;
+// const int stride_height = params.stride_height;
+// const int pad_width = params.padding_values.width;
+// const int pad_height = params.padding_values.height;
+// const int output_shift = params.output_shift;
+//
+// // Call kernel optimized for depthwise convolutions using 3x3 filters if
+// // parameters are supported.
+// if (Fast3x3FilterKernelSupported(
+// input_shape, filter_shape, stride_width, stride_height,
+// dilation_width_factor, dilation_height_factor, pad_width, pad_height,
+// depth_multiplier, output_shape, output_shift)) {
+// DepthwiseConv3x3Filter(params, input_shape, input_data, filter_shape,
+// filter_data, bias_shape, bias_data, output_shape,
+// output_data);
+// return;
+// }
+#endif
+
+ optimized::DepthwiseConvGeneral(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data);
}
inline void DepthwiseConv(const DepthwiseConvParams &params, const Shape &input_shape,
diff --git a/compute/cker/include/cker/operation/Elementwise.h b/compute/cker/include/cker/operation/Elementwise.h
new file mode 100644
index 000000000..83581e84b
--- /dev/null
+++ b/compute/cker/include/cker/operation/Elementwise.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_ELEMENTWISE_H__
+#define __NNFW_CKER_ELEMENTWISE_H__
+
+#include "cker/eigen/Utils.h"
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include <Eigen/Core>
+
+namespace nnfw
+{
+namespace cker
+{
+
+inline void Sin(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+ float *output_data)
+{
+ const int size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < size; i++)
+ {
+ output_data[i] = std::sin(input_data[i]);
+ }
+}
+
+inline void Abs(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+ float *output_data)
+{
+ auto input_map = MapAsVector(input_data, input_shape);
+ auto output_map = MapAsVector(output_data, output_shape);
+ output_map.array() = input_map.array().abs();
+}
+
+inline void Rsqrt(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+ float *output_data)
+{
+ const int size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < size; i++)
+ {
+ output_data[i] = 1.f / std::sqrt(input_data[i]);
+ }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_ELEMENTWISE_H__
diff --git a/compute/cker/include/cker/operation/Exp.h b/compute/cker/include/cker/operation/Exp.h
new file mode 100644
index 000000000..ed3c73d73
--- /dev/null
+++ b/compute/cker/include/cker/operation/Exp.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_EXP_H__
+#define __NNFW_CKER_EXP_H__
+
+#include "cker/Shape.h"
+
+#include <cmath>
+
+namespace nnfw
+{
+namespace cker
+{
+
+inline void Exp(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+ float *output_data)
+{
+ const int size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < size; i++)
+ {
+ output_data[i] = std::exp(input_data[i]);
+ }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_EXP_H__
diff --git a/compute/cker/include/cker/operation/FullyConnected.h b/compute/cker/include/cker/operation/FullyConnected.h
index 428fb1b53..01b925efb 100644
--- a/compute/cker/include/cker/operation/FullyConnected.h
+++ b/compute/cker/include/cker/operation/FullyConnected.h
@@ -19,69 +19,66 @@
#define __NNFW_CKER_FULLY_CONNECTED_H__
#include "cker/Shape.h"
+#include "cker/Types.h"
#include "cker/Utils.h"
+#include "cker/TensorUtils.h"
namespace nnfw
{
namespace cker
{
-struct FullyConnectedParams
+class FCTempArena
{
- // uint8 inference params.
- // TODO(b/65838351): Use smaller types if appropriate.
- int32_t input_offset;
- int32_t weights_offset;
- int32_t output_offset;
- int32_t output_multiplier;
- int output_shift;
- // uint8, etc, activation params.
- int32_t quantized_activation_min;
- int32_t quantized_activation_max;
- // float activation params.
- float float_activation_min;
- float float_activation_max;
- // FullyConnectedWeightsFormat weights_format;
+public:
+ FCTempArena(void) : prepared(false), input_quantized(), scaling_factors()
+ {
+ // DO NOTHING
+ }
+
+ void prepare(const Shape &input_shape, const Shape &weights_shape)
+ {
+ auto input_size = input_shape.FlatSize();
+ input_quantized.resize(input_size);
+
+ assert(weights_shape.DimensionsCount() == 2);
+ int batch_size = input_size / weights_shape.Dims(1);
+ scaling_factors.resize(batch_size);
+ prepared = true;
+ }
+
+public:
+ bool prepared;
+ std::vector<int8_t> input_quantized;
+ std::vector<float> scaling_factors;
};
inline void FullyConnected(const FullyConnectedParams &params, const Shape &input_shape,
const float *input_data, const Shape &weights_shape,
- const float *weights_data, const Shape &bias_shape,
- const float *bias_data, const Shape &output_shape, float *output_data)
+ const float *weights_data, const Shape &, const float *bias_data,
+ const Shape &, float *output_data)
{
- UNUSED_RELEASE(input_shape);
- UNUSED_RELEASE(bias_shape);
- const float output_activation_min = params.float_activation_min;
- const float output_activation_max = params.float_activation_max;
- // TODO(benoitjacob): This really should be:
- // const int batches = ArraySize(output_dims, 1);
- // but the current --variable_batch hack consists in overwriting the 3rd
- // dimension with the runtime batch size, as we don't keep track for each
- // array of which dimension is the batch dimension in it.
- const int output_dims_count = output_shape.DimensionsCount();
- const int weights_dims_count = weights_shape.DimensionsCount();
- const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
- const int output_depth =
- MatchingDim(weights_shape, weights_dims_count - 2, output_shape, output_dims_count - 1);
- const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
- for (int b = 0; b < batches; ++b)
+ int total_input_size = input_shape.FlatSize();
+ int input_size = weights_shape.Dims(1);
+ const int batch_size = total_input_size / input_size;
+ const int num_units = weights_shape.Dims(0);
+
+ // Output = bias if bias tensor exists.
+ if (bias_data)
{
- for (int out_c = 0; out_c < output_depth; ++out_c)
- {
- float total = 0.f;
- for (int d = 0; d < accum_depth; ++d)
- {
- total += input_data[b * accum_depth + d] * weights_data[out_c * accum_depth + d];
- }
- float bias_value = 0.0f;
- if (bias_data)
- {
- bias_value = bias_data[out_c];
- }
- output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax(
- total + bias_value, output_activation_min, output_activation_max);
- }
+ VectorBatchVectorAssign(bias_data, num_units, batch_size, output_data);
+ }
+ else
+ {
+ ZeroVector(output_data, batch_size * num_units);
}
+
+ // Compute output += weight * input
+ MatrixBatchVectorMultiplyAccumulate(weights_data, num_units, input_size, input_data, batch_size,
+ output_data, /*result_stride=*/1);
+
+ // Apply activation function
+ ApplyActivationToVector(output_data, batch_size * num_units, params.activation, output_data);
}
inline void FullyConnected(const FullyConnectedParams &params, const Shape &input_shape,
@@ -138,6 +135,51 @@ inline void FullyConnected(const FullyConnectedParams &params, const Shape &inpu
}
}
+inline void FullyConnectedHybrid(const FullyConnectedParams &params, const Shape &input_shape,
+ const float *input_data, const Shape &filter_shape,
+ const int8_t *filter_data, const Shape &, const float *bias_data,
+ const Shape &, float *output_data, FCTempArena &temp_arena)
+{
+ int total_input_size = input_shape.FlatSize();
+ const int input_size = filter_shape.Dims(1);
+ const int batch_size = total_input_size / input_size;
+ const int num_units = filter_shape.Dims(0);
+
+ // Output = bias if bias tensor exists.
+ VectorBatchVectorAssign(bias_data, num_units, batch_size, output_data);
+
+ // Save matrix multiplication computation for all zero input.
+ if (IsZeroVector(input_data, total_input_size))
+ {
+ ApplyActivationToVector(output_data, batch_size * num_units, params.activation, output_data);
+ return;
+ }
+
+ // Quantize input from float to uint8 + quantization params (scaling factor).
+ float unused_min, unused_max;
+ float *scaling_factors_ptr = temp_arena.scaling_factors.data();
+ int8_t *quant_data = temp_arena.input_quantized.data();
+
+ // Quantize each batch independently.
+ for (int b = 0; b < batch_size; ++b)
+ {
+ const int offset = b * input_size;
+ SymmetricQuantizeFloats(input_data + offset, input_size, quant_data + offset, &unused_min,
+ &unused_max, &scaling_factors_ptr[b]);
+ // Incorporate scaling of the filter.
+ scaling_factors_ptr[b] *= params.weights_scale;
+ }
+
+ // Compute output += weight * quantized_input
+ MatrixBatchVectorMultiplyAccumulate(filter_data, num_units, input_size, quant_data,
+ scaling_factors_ptr, batch_size, output_data,
+ /*result_stride=*/1);
+
+ // Apply activation function to floats.
+ ApplyActivationToVector(output_data, batch_size * num_units, params.activation, output_data);
+ return;
+}
+
} // namespace cker
} // namespace nnfw
diff --git a/compute/cker/include/cker/operation/Gather.h b/compute/cker/include/cker/operation/Gather.h
index 9cd96eeb7..65a71887e 100644
--- a/compute/cker/include/cker/operation/Gather.h
+++ b/compute/cker/include/cker/operation/Gather.h
@@ -27,11 +27,6 @@ namespace nnfw
namespace cker
{
-struct GatherParams
-{
- int32_t axis;
-};
-
template <typename T, typename CoordsT = int32_t>
inline void Gather(const GatherParams &op_params, const Shape &input_shape, const T *input_data,
const Shape &coords_shape, const CoordsT *coords_data, const Shape &,
diff --git a/compute/cker/include/cker/operation/InstanceNorm.h b/compute/cker/include/cker/operation/InstanceNorm.h
index 794dcebc8..6445e8a2b 100644
--- a/compute/cker/include/cker/operation/InstanceNorm.h
+++ b/compute/cker/include/cker/operation/InstanceNorm.h
@@ -28,13 +28,6 @@ namespace nnfw
namespace cker
{
-struct InstanceNormParams
-{
- float epsilon;
- float float_activation_min;
- float float_activation_max;
-};
-
inline void InstanceNorm(const InstanceNormParams &params, const Shape &input_shape,
const float *input_data, const Shape &gamma_shape, const float *gamma_data,
const Shape &beta_shape, const float *beta_data, const Shape &output_shape,
diff --git a/compute/cker/include/cker/operation/MaxMin.h b/compute/cker/include/cker/operation/MaxMin.h
new file mode 100644
index 000000000..691b3b0b3
--- /dev/null
+++ b/compute/cker/include/cker/operation/MaxMin.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_MAXMIN_H__
+#define __NNFW_CKER_MAXMIN_H__
+
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+struct MaximumOp
+{
+ template <typename data_type> static data_type op(data_type el1, data_type el2)
+ {
+ return el1 > el2 ? el1 : el2;
+ }
+};
+
+struct MinimumOp
+{
+ template <typename data_type> static data_type op(data_type el1, data_type el2)
+ {
+ return el1 < el2 ? el1 : el2;
+ }
+};
+
+template <typename T, typename Op>
+inline void
+MaximumMinimumBroadcast4DSlow(const Shape &unextended_input1_shape, const T *input1_data,
+ const Shape &unextended_input2_shape, const T *input2_data,
+ const Shape &unextended_output_shape, T *output_data, Op op)
+{
+ assert(unextended_input1_shape.DimensionsCount() <= 4);
+ assert(unextended_input2_shape.DimensionsCount() <= 4);
+ assert(unextended_output_shape.DimensionsCount() <= 4);
+ const Shape output_shape = Shape::ExtendedShape(4, unextended_output_shape);
+
+ NdArrayDesc<4> desc1;
+ NdArrayDesc<4> desc2;
+ NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1,
+ &desc2);
+
+ for (int b = 0; b < output_shape.Dims(0); ++b)
+ {
+ for (int y = 0; y < output_shape.Dims(1); ++y)
+ {
+ for (int x = 0; x < output_shape.Dims(2); ++x)
+ {
+ for (int c = 0; c < output_shape.Dims(3); ++c)
+ {
+ auto out_idx = Offset(output_shape, b, y, x, c);
+ auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
+ auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
+ auto in1_val = input1_data[in1_idx];
+ auto in2_val = input2_data[in2_idx];
+ output_data[out_idx] = op(in1_val, in2_val);
+ }
+ }
+ }
+ }
+}
+
+template <typename T>
+inline void Max(const Shape &unextended_input1_shape, const T *input1_data,
+ const Shape &unextended_input2_shape, const T *input2_data,
+ const Shape &unextended_output_shape, T *output_data)
+{
+ MaximumMinimumBroadcast4DSlow<T>(unextended_input1_shape, input1_data, unextended_input2_shape,
+ input2_data, unextended_output_shape, output_data,
+ MaximumOp::template op<T>);
+}
+
+template <typename T>
+inline void Min(const Shape &unextended_input1_shape, const T *input1_data,
+ const Shape &unextended_input2_shape, const T *input2_data,
+ const Shape &unextended_output_shape, T *output_data)
+{
+ MaximumMinimumBroadcast4DSlow<T>(unextended_input1_shape, input1_data, unextended_input2_shape,
+ input2_data, unextended_output_shape, output_data,
+ MinimumOp::template op<T>);
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_MAXMIN_H__
diff --git a/compute/cker/include/cker/operation/MaxPool.h b/compute/cker/include/cker/operation/MaxPool.h
index 326168b99..339547298 100644
--- a/compute/cker/include/cker/operation/MaxPool.h
+++ b/compute/cker/include/cker/operation/MaxPool.h
@@ -21,9 +21,10 @@
#include "cker/Shape.h"
#include "cker/Types.h"
#include "cker/Utils.h"
+#include "cker/neon/neon_check.h"
+#include "cker/eigen/Utils.h"
-#include "cker/operation/optimized/MaxPool.h"
-#include "cker/operation/reference/MaxPool.h"
+#include <Eigen/Core>
namespace nnfw
{
@@ -33,19 +34,70 @@ namespace cker
inline void MaxPool(const PoolParams &params, const Shape &input_shape, const float *input_data,
const Shape &output_shape, float *output_data)
{
-#if defined(CKER_OPTIMIZED_EIGEN)
- optimized::MaxPool(params, input_shape, input_data, output_shape, output_data);
-#else // defined(CKER_OPTIMIZED_EIGEN)
- reference::MaxPool(params, input_shape, input_data, output_shape, output_data);
-#endif // defined(CKER_OPTIMIZED_EIGEN)
+ assert(input_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
+ const int stride_height = params.stride_height;
+ const int stride_width = params.stride_width;
+
+ const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape);
+ auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
+ // Prefill the output to minimum representable float value
+ out_mat.setConstant(std::numeric_limits<float>::lowest());
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int h = 0; h < input_height; ++h)
+ {
+ for (int w = 0; w < input_width; ++w)
+ {
+ // (h_start, h_end) * (w_start, w_end) is the range that the input
+ // vector projects to.
+ int hpad = h + params.padding_values.height;
+ int wpad = w + params.padding_values.width;
+ int h_start =
+ (hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1;
+ int h_end = std::min(hpad / stride_height + 1, output_height);
+ int w_start =
+ (wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1;
+ int w_end = std::min(wpad / stride_width + 1, output_width);
+ // compute elementwise sum
+ for (int ph = h_start; ph < h_end; ++ph)
+ {
+ for (int pw = w_start; pw < w_end; ++pw)
+ {
+ int out_offset = NodeOffset(b, ph, pw, output_height, output_width);
+ out_mat.col(out_offset) =
+ out_mat.col(out_offset)
+ .cwiseMax(in_mat.col(NodeOffset(b, h, w, input_height, input_width)));
+ }
+ }
+ }
+ }
+ }
+ const int flat_size = output_shape.FlatSize();
+ for (int i = 0; i < flat_size; ++i)
+ {
+ output_data[i] = ActivationFunctionWithMinMax(output_data[i], params.float_activation_min,
+ params.float_activation_max);
+ }
}
inline void MaxPool(const PoolParams &params, const Shape &input_shape, const uint8_t *input_data,
const Shape &output_shape, uint8_t *output_data)
{
+
+ // Here, and in other pooling ops, in order to maintain locality of reference,
+ // to minimize some recalculations, and to load into NEON vector registers, we
+ // use an inner loop down the depth. Since depths can be large and hence we
+ // would need arbitrarily large temporary storage, we divide the work up into
+ // depth tranches just within the batch loop.
+ static constexpr int kPoolingAccTrancheSize = 256;
+
assert(params.quantized_activation_min <= params.quantized_activation_max);
- assert(params.quantized_activation_min >= 0);
- assert(params.quantized_activation_max <= 255);
assert(input_shape.DimensionsCount() == 4);
assert(output_shape.DimensionsCount() == 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
@@ -56,36 +108,88 @@ inline void MaxPool(const PoolParams &params, const Shape &input_shape, const ui
const int output_width = output_shape.Dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
+
+ uint8_t acc[kPoolingAccTrancheSize];
for (int batch = 0; batch < batches; ++batch)
{
- for (int out_y = 0; out_y < output_height; ++out_y)
+ // We proceed through the depth in tranches (see comment above). The
+ // depth_base is the depth at the beginning of the tranche. The
+ // tranche_depth is the depth dimension of the tranche.
+ for (int depth_base = 0; depth_base < depth; depth_base += kPoolingAccTrancheSize)
{
- for (int out_x = 0; out_x < output_width; ++out_x)
+ const int tranche_depth = std::min(depth - depth_base, kPoolingAccTrancheSize);
+ for (int out_y = 0; out_y < output_height; ++out_y)
{
- for (int channel = 0; channel < depth; ++channel)
+ for (int out_x = 0; out_x < output_width; ++out_x)
{
const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
- // Compute the boundaries of the filter region clamped so as to
- // ensure that the filter window fits in the input array.
const int filter_x_start = std::max(0, -in_x_origin);
const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
- uint8_t max = 0;
- for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+ memset(acc, 0, tranche_depth * sizeof(acc[0]));
+ const uint8_t *input_ptr =
+ input_data + depth_base +
+ depth * (in_x_origin + input_width * (in_y_origin + input_height * batch));
+ for (int fy = filter_y_start; fy < filter_y_end; fy++)
{
- for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
+ const uint8_t *input_row_ptr = input_ptr + depth * (fy * input_width + filter_x_start);
+ for (int fx = filter_x_start; fx < filter_x_end; fx++)
{
- const int in_x = in_x_origin + filter_x;
- const int in_y = in_y_origin + filter_y;
- max = std::max(max, input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
+ const uint8_t *input_channel_ptr = input_row_ptr;
+ int channel = 0;
+#ifdef USE_NEON
+ for (; channel <= tranche_depth - 16; channel += 16)
+ {
+ uint8x16_t acc_reg = vld1q_u8(acc + channel);
+ uint8x16_t input_reg = vld1q_u8(input_channel_ptr);
+ input_channel_ptr += 16;
+ acc_reg = vmaxq_u8(acc_reg, input_reg);
+ vst1q_u8(acc + channel, acc_reg);
+ }
+
+ for (; channel <= tranche_depth - 8; channel += 8)
+ {
+ uint8x8_t acc_reg = vld1_u8(acc + channel);
+ uint8x8_t input_reg = vld1_u8(input_channel_ptr);
+ input_channel_ptr += 8;
+ acc_reg = vmax_u8(acc_reg, input_reg);
+ vst1_u8(acc + channel, acc_reg);
+ }
+#endif
+ for (; channel < tranche_depth; ++channel)
+ {
+ acc[channel] = std::max(acc[channel], *input_channel_ptr++);
+ }
+ input_row_ptr += depth;
}
}
- max = std::max<uint8_t>(max, params.quantized_activation_min);
- max = std::min<uint8_t>(max, params.quantized_activation_max);
- output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
- static_cast<uint8_t>(max);
+ uint8_t *output_ptr = output_data + Offset(output_shape, batch, out_y, out_x, depth_base);
+ int channel = 0;
+#ifdef USE_NEON
+ for (; channel <= tranche_depth - 16; channel += 16)
+ {
+ uint8x16_t a = vld1q_u8(acc + channel);
+ a = vminq_u8(a, vdupq_n_u8(params.quantized_activation_max));
+ a = vmaxq_u8(a, vdupq_n_u8(params.quantized_activation_min));
+ vst1q_u8(output_ptr + channel, a);
+ }
+ for (; channel <= tranche_depth - 8; channel += 8)
+ {
+ uint8x8_t a = vld1_u8(acc + channel);
+ a = vmin_u8(a, vdup_n_u8(params.quantized_activation_max));
+ a = vmax_u8(a, vdup_n_u8(params.quantized_activation_min));
+ vst1_u8(output_ptr + channel, a);
+ }
+#endif
+ for (; channel < tranche_depth; ++channel)
+ {
+ uint8_t a = acc[channel];
+ a = std::max<uint8_t>(a, params.quantized_activation_min);
+ a = std::min<uint8_t>(a, params.quantized_activation_max);
+ output_ptr[channel] = static_cast<uint8_t>(a);
+ }
}
}
}
diff --git a/compute/cker/include/cker/operation/OneHot.h b/compute/cker/include/cker/operation/OneHot.h
new file mode 100644
index 000000000..2df82d1bb
--- /dev/null
+++ b/compute/cker/include/cker/operation/OneHot.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_ONEHOT_H__
+#define __NNFW_CKER_ONEHOT_H__
+
+#include "cker/Shape.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+template <typename T, typename TI>
+void OneHot(const int32_t depth, const T on_value, const T off_value, int32_t axis,
+ const Shape &indices_shape, const TI *indices_data, const Shape &, T *output_data)
+{
+ // prefix_dim_size == # of elements before the axis
+ // depth == # of elements per axis
+ // suffix_dim_size == # of elements after the axis
+ int prefix_dim_size = 1;
+ for (int i = 0; i < axis; ++i)
+ {
+ prefix_dim_size *= indices_shape.Dims(i);
+ }
+ const int suffix_dim_size = indices_shape.FlatSize() / prefix_dim_size;
+
+ // View the indices as a matrix of size:
+ // prefix_dim_size x suffix_dim_size
+ // View the output as a matrix of size:
+ // prefix_dim_size x depth x suffix_dim_size
+ // Then the output is:
+ // output(i, j, k) == (indices(i, k) == j) ? on : off
+ for (int i = 0; i < prefix_dim_size; ++i)
+ {
+ for (int j = 0; j < depth; ++j)
+ {
+ for (int k = 0; k < suffix_dim_size; ++k, ++output_data)
+ {
+ *output_data =
+ static_cast<int>(indices_data[i * suffix_dim_size + k]) == j ? on_value : off_value;
+ }
+ }
+ }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_ONEHOT_H__
diff --git a/compute/cker/include/cker/operation/Pack.h b/compute/cker/include/cker/operation/Pack.h
new file mode 100644
index 000000000..fd865047d
--- /dev/null
+++ b/compute/cker/include/cker/operation/Pack.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_PACK_H__
+#define __NNFW_CKER_PACK_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+template <typename Scalar>
+inline void Pack(const PackParams &params, const Scalar *const *input_data,
+ const Shape &output_shape, Scalar *output_data)
+{
+ const int dimensions = output_shape.DimensionsCount();
+ int axis = params.axis;
+ int inputs_count = params.inputs_count;
+
+ int outer_size = 1;
+ for (int i = 0; i < axis; i++)
+ {
+ outer_size *= output_shape.Dims(i);
+ }
+ int copy_size = 1;
+ for (int i = params.axis + 1; i < dimensions; i++)
+ {
+ copy_size *= output_shape.Dims(i);
+ }
+
+ for (int i = 0; i < inputs_count; ++i)
+ {
+ for (int k = 0; k < outer_size; k++)
+ {
+ const Scalar *input_ptr = input_data[i] + copy_size * k;
+ int loc = k * inputs_count * copy_size + i * copy_size;
+ memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar));
+ }
+ }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_PACK_H__
diff --git a/compute/cker/include/cker/operation/Reduce.h b/compute/cker/include/cker/operation/Reduce.h
new file mode 100644
index 000000000..52e7395e7
--- /dev/null
+++ b/compute/cker/include/cker/operation/Reduce.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_REDUCE_H__
+#define __NNFW_CKER_REDUCE_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+// A generic reduce method that can be used for reduce_sum, reduce_mean, etc.
+// This method iterates through input data and reduce elements along the
+// dimensions given in axis.
+template <typename In, typename Out>
+inline bool ReduceImpl(const In *input_data, const Shape &input_shape, const Shape &,
+ const int *axis, const int num_axis, int *input_iter,
+ Out reducer(const Out current, const In in), Out *output_data)
+{
+ const auto input_dims = input_shape.DimsData();
+ const auto input_num_dims = input_shape.DimensionsCount();
+
+ // Reset input iterator.
+ for (int idx = 0; idx < input_num_dims; ++idx)
+ {
+ input_iter[idx] = 0;
+ }
+ // Iterate through input_data.
+ do
+ {
+ size_t input_offset = ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
+ size_t output_offset =
+ ReducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis);
+ output_data[output_offset] = reducer(output_data[output_offset], input_data[input_offset]);
+ } while (NextIndex(input_num_dims, input_dims, input_iter));
+ return true;
+}
+
+// This method parses the input 'axis' to remove duplicates and handle negative
+// values, and returns a valid 'out_axis'
+inline bool ResolveAxis(const int num_dims, const std::vector<int> &axes, int *out_axis,
+ int *out_num_axis)
+{
+ auto num_axis = axes.size();
+ auto axis = axes.data();
+
+ *out_num_axis = 0; // Just in case.
+ // Short-circuit axis resolution for scalars; the axis will go unused.
+ if (num_dims == 0)
+ {
+ return true;
+ }
+ // o(n^2) is fine since out_num_axis should be really small, mostly <= 4
+ for (size_t idx = 0; idx < num_axis; ++idx)
+ {
+ // Handle negative index. A positive index 'p_idx' can be represented as a
+ // negative index 'n_idx' as: n_idx = p_idx-num_dims
+ // eg: For num_dims=3, [0, 1, 2] is the same as [-3, -2, -1] */
+ int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx];
+ assert(current >= 0 && current < num_dims);
+ bool is_dup = false;
+ for (int j = 0; j < *out_num_axis; ++j)
+ {
+ if (out_axis[j] == current)
+ {
+ is_dup = true;
+ break;
+ }
+ }
+ if (!is_dup)
+ {
+ out_axis[*out_num_axis] = current;
+ *out_num_axis += 1;
+ }
+ }
+ return true;
+}
+
+template <typename T>
+inline bool InitTensorDataForReduce(const Shape &shape, const T init_value, T *data)
+{
+ const auto dims = shape.DimsData();
+ const auto num_dims = shape.DimensionsCount();
+ size_t num_elements = 1;
+ for (int idx = 0; idx < num_dims; ++idx)
+ {
+ size_t current = static_cast<size_t>(dims[idx]);
+ // Overflow prevention.
+ if (num_elements > std::numeric_limits<size_t>::max() / current)
+ {
+ return false;
+ }
+ num_elements *= current;
+ }
+ for (size_t idx = 0; idx < num_elements; ++idx)
+ {
+ data[idx] = init_value;
+ }
+ return true;
+}
+
+class Reduce
+{
+public:
+ Reduce() : _temp_index(), _resolved_axis(), _prepared(false) {}
+
+ void prepare(size_t temp_index_size, size_t resolved_axis_size)
+ {
+ if (_prepared)
+ return;
+
+ // prepare space for temp_index and resolved_axis
+ _temp_index.resize(temp_index_size);
+ _resolved_axis.resize(resolved_axis_size);
+ _prepared = true;
+ }
+
+ // Computes the generic value (i.e., sum/max/min/prod) of elements across
+ // dimensions given in axis. It needs to pass in init_value and reducer.
+ template <typename T>
+ inline bool ReduceGeneric(const Shape &input_shape, const T *input_data,
+ const Shape &output_shape, T *output_data, const std::vector<int> &axes,
+ bool, T init_value, T reducer(const T current, const T in))
+ {
+ // Reset output data.
+ if (!InitTensorDataForReduce(output_shape, init_value, output_data))
+ {
+ return false;
+ }
+
+ // Resolve axis.
+ int num_resolved_axis = 0;
+ if (!ResolveAxis(input_shape.DimensionsCount(), axes, _resolved_axis.data(),
+ &num_resolved_axis))
+ {
+ return false;
+ }
+
+ return ReduceImpl<T, T>(input_data, input_shape, output_shape, _resolved_axis.data(),
+ num_resolved_axis, _temp_index.data(), reducer, output_data);
+ }
+
+private:
+ std::vector<int> _temp_index;
+ std::vector<int> _resolved_axis;
+ bool _prepared;
+};
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_REDUCE_H__
diff --git a/compute/cker/include/cker/operation/Slice.h b/compute/cker/include/cker/operation/Slice.h
new file mode 100644
index 000000000..a072cff8e
--- /dev/null
+++ b/compute/cker/include/cker/operation/Slice.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_SLICE_H__
+#define __NNFW_CKER_SLICE_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+template <typename T>
+inline void Slice(const SliceParams &op_params, const Shape &input_shape,
+ SequentialTensorWriter<T> *writer)
+{
+ // TODO(dkalenichenko): This op only supports 4D tensors or smaller.
+ assert(op_params.begin_count <= 4);
+ assert(op_params.size_count <= 4);
+
+ const int begin_count = op_params.begin_count;
+ const int size_count = op_params.size_count;
+ // We front-pad the begin and size vectors.
+ const int start_b = 4 - begin_count > 0 ? 0 : op_params.begin[0];
+ const int stop_b = (4 - size_count > 0 || op_params.size[0] == -1) ? input_shape.Dims(0)
+ : start_b + op_params.size[0];
+ const int start_h = begin_count < 3 ? 0 : op_params.begin[begin_count - 3];
+ const int stop_h = (size_count < 3 || op_params.size[size_count - 3] == -1)
+ ? input_shape.Dims(1)
+ : start_h + op_params.size[size_count - 3];
+ const int start_w = begin_count < 2 ? 0 : op_params.begin[begin_count - 2];
+ const int stop_w = (size_count < 2 || op_params.size[size_count - 2] == -1)
+ ? input_shape.Dims(2)
+ : start_w + op_params.size[size_count - 2];
+ const int start_d = begin_count < 1 ? 0 : op_params.begin[begin_count - 1];
+ const int stop_d = (size_count < 1 || op_params.size[size_count - 1] == -1)
+ ? input_shape.Dims(3)
+ : start_d + op_params.size[size_count - 1];
+
+ for (int in_b = start_b; in_b < stop_b; ++in_b)
+ {
+ for (int in_h = start_h; in_h < stop_h; ++in_h)
+ {
+ for (int in_w = start_w; in_w < stop_w; ++in_w)
+ {
+ const int len = stop_d - start_d;
+ if (len > 0)
+ writer->WriteN(Offset(input_shape, in_b, in_h, in_w, start_d), len);
+ }
+ }
+ }
+}
+
+template <typename T>
+inline void Slice(const SliceParams &op_params, const Shape &input_shape, const T *input_data,
+ T *output_data)
+{
+ SequentialTensorWriter<T> writer(input_data, output_data);
+ return Slice(op_params, input_shape, &writer);
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_SLICE_H__
diff --git a/compute/cker/include/cker/operation/SoftMax.h b/compute/cker/include/cker/operation/SoftMax.h
index ea404a002..bb394619e 100644
--- a/compute/cker/include/cker/operation/SoftMax.h
+++ b/compute/cker/include/cker/operation/SoftMax.h
@@ -21,10 +21,10 @@
#include "cker/Shape.h"
#include "cker/Utils.h"
#include "cker/Types.h"
-#include "cker/gemmlowp/FixedPoint.h"
-#include "cker/operation/optimized/SoftMax.h"
-#include "cker/operation/reference/SoftMax.h"
+#include "cker/eigen/Utils.h"
+#include <Eigen/Core>
+#include <fixedpoint/fixedpoint.h>
#include <cmath>
namespace nnfw
@@ -35,11 +35,19 @@ namespace cker
inline void Softmax(const SoftmaxParams &params, const Shape &input_shape, const float *input_data,
const Shape &output_shape, float *output_data)
{
-#if defined(CKER_OPTIMIZED_EIGEN)
- optimized::Softmax(params, input_shape, input_data, output_shape, output_data);
-#else // defined(CKER_OPTIMIZED_EIGEN)
- reference::Softmax(params, input_shape, input_data, output_shape, output_data);
-#endif // defined(CKER_OPTIMIZED_EIGEN)
+ // Validate whether if shapes of input and output are the same
+ MatchingFlatSize(input_shape, output_shape);
+
+ const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape);
+ auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
+ // Compute the exponential first, removing the max coefficient for numerical
+ // stability.
+ out_mat = (in_mat.rowwise() - in_mat.colwise().maxCoeff()).array() * params.beta;
+ // We are separating out the exp function so that exp can be vectorized.
+ out_mat = out_mat.array().exp();
+ // Normalize to get the activations.
+ Eigen::Array<float, 1, Eigen::Dynamic> scale = out_mat.array().colwise().sum().inverse();
+ out_mat.array().rowwise() *= scale;
}
inline void Softmax(const SoftmaxParams &params, const Shape &input_shape,
@@ -55,9 +63,9 @@ inline void Softmax(const SoftmaxParams &params, const Shape &input_shape,
// accumulation, but exp(-16) definitely is.
static const int kScaledDiffIntegerBits = 5;
static const int kAccumulationIntegerBits = 12;
- using FixedPointScaledDiff = gemmlowp::FixedPoint<kScaledDiffIntegerBits>;
- using FixedPointAccum = gemmlowp::FixedPoint<kAccumulationIntegerBits>;
- using FixedPoint0 = gemmlowp::FixedPoint<0>;
+ using FixedPointScaledDiff = gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
+ using FixedPointAccum = gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
+ using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
diff --git a/compute/cker/include/cker/operation/Split.h b/compute/cker/include/cker/operation/Split.h
new file mode 100644
index 000000000..08a436ee9
--- /dev/null
+++ b/compute/cker/include/cker/operation/Split.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_SPLIT_H__
+#define __NNFW_CKER_SPLIT_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+template <typename Scalar>
+void Split(const SplitParams &params, const Shape &input_shape, const Scalar *input_data,
+ const Shape &output_shape, Scalar *const *output_data)
+{
+ const int split_dimensions = input_shape.DimensionsCount();
+ int axis = params.axis < 0 ? params.axis + split_dimensions : params.axis;
+ int outputs_count = params.num_split;
+
+ int64_t outer_size = 1;
+ for (int i = 0; i < axis; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+ // For all output arrays,
+ // FlatSize() = outer_size * Dims(axis) * base_inner_size;
+ int64_t base_inner_size = 1;
+ for (int i = axis + 1; i < split_dimensions; ++i)
+ {
+ base_inner_size *= input_shape.Dims(i);
+ }
+
+ const Scalar *input_ptr = input_data;
+ for (int k = 0; k < outer_size; k++)
+ {
+ for (int i = 0; i < outputs_count; ++i)
+ {
+ const int copy_size = output_shape.Dims(axis) * base_inner_size;
+ memcpy(output_data[i] + k * copy_size, input_ptr, copy_size * sizeof(Scalar));
+ input_ptr += copy_size;
+ }
+ }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_SPLIT_H__
diff --git a/compute/cker/include/cker/operation/StridedSlice.h b/compute/cker/include/cker/operation/StridedSlice.h
new file mode 100644
index 000000000..c57b4daa0
--- /dev/null
+++ b/compute/cker/include/cker/operation/StridedSlice.h
@@ -0,0 +1,308 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_STRIDEDSLICE_H__
+#define __NNFW_CKER_STRIDEDSLICE_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+
+#include <cmath>
+
+namespace nnfw
+{
+namespace cker
+{
+// Use until std::clamp() is available from C++17.
+inline int Clamp(const int v, const int lo, const int hi)
+{
+ assert(!(hi < lo));
+ if (hi < v)
+ return hi;
+ if (v < lo)
+ return lo;
+ return v;
+}
+
+inline void StridedSlicePadIndices(StridedSliceParams *p, int dim_count)
+{
+ // Add indices and mask bits to fully include extra dimensions
+ assert(dim_count <= 4);
+ assert(dim_count >= p->start_indices_count);
+ assert(p->start_indices_count == p->stop_indices_count);
+ assert(p->stop_indices_count == p->strides_count);
+
+ const int pad_count = dim_count - p->start_indices_count;
+
+ // Pad indices at start, so move arrays by pad_count.
+ for (int i = p->start_indices_count - 1; i >= 0; --i)
+ {
+ p->strides[i + pad_count] = p->strides[i];
+ p->start_indices[i + pad_count] = p->start_indices[i];
+ p->stop_indices[i + pad_count] = p->stop_indices[i];
+ }
+ for (int i = 0; i < pad_count; ++i)
+ {
+ p->start_indices[i] = 0;
+ p->stop_indices[i] = 1;
+ p->strides[i] = 1;
+ }
+
+ // Pad masks with 0s or 1s as required.
+ p->shrink_axis_mask <<= pad_count;
+ p->ellipsis_mask <<= pad_count;
+ p->new_axis_mask <<= pad_count;
+ p->begin_mask <<= pad_count;
+ p->end_mask <<= pad_count;
+ p->begin_mask |= (1 << pad_count) - 1;
+ p->end_mask |= (1 << pad_count) - 1;
+
+ p->start_indices_count = dim_count;
+ p->stop_indices_count = dim_count;
+ p->strides_count = dim_count;
+}
+
+// Return the index for the first element along that axis. This index will be a
+// positive integer between [0, axis_size - 1] that can be used to index
+// directly into the data.
+inline int StartForAxis(const StridedSliceParams &params, const Shape &input_shape, int axis)
+{
+ const auto begin_mask = params.begin_mask;
+ const auto *start_indices = params.start_indices;
+ const auto *strides = params.strides;
+ // Begin with the specified index.
+ int start = start_indices[axis];
+
+ // begin_mask override
+ if (begin_mask & 1 << axis)
+ {
+ if (strides[axis] > 0)
+ {
+ // Forward iteration - use the first element. These values will get
+ // clamped below (Note: We could have set them to 0 and axis_size-1, but
+ // use lowest() and max() to maintain symmetry with StopForAxis())
+ start = std::numeric_limits<int>::lowest();
+ }
+ else
+ {
+ // Backward iteration - use the last element.
+ start = std::numeric_limits<int>::max();
+ }
+ }
+
+ // Handle negative indices
+ int axis_size = input_shape.Dims(axis);
+ if (start < 0)
+ {
+ start += axis_size;
+ }
+
+ // Clamping
+ start = Clamp(start, 0, axis_size - 1);
+
+ return start;
+}
+
+// Return the "real" index for the end of iteration along that axis. This is an
+// "end" in the traditional C sense, in that it points to one past the last
+// element. ie. So if you were iterating through all elements of a 1D array of
+// size 4, this function would return 4 as the stop, because it is one past the
+// "real" indices of 0, 1, 2 & 3.
+inline int StopForAxis(const StridedSliceParams &params, const Shape &input_shape, int axis,
+ int start_for_axis)
+{
+ const auto end_mask = params.end_mask;
+ const auto shrink_axis_mask = params.shrink_axis_mask;
+ const auto *stop_indices = params.stop_indices;
+ const auto *strides = params.strides;
+
+ // Begin with the specified index
+ const bool shrink_axis = shrink_axis_mask & (1 << axis);
+ int stop = stop_indices[axis];
+
+ // When shrinking an axis, the end position does not matter (and can be
+ // incorrect when negative indexing is used, see Issue #19260). Always use
+ // start_for_axis + 1 to generate a length 1 slice, since start_for_axis has
+ // already been adjusted for negative indices.
+ if (shrink_axis)
+ {
+ stop = start_for_axis + 1;
+ }
+
+ // end_mask override
+ if (end_mask & (1 << axis))
+ {
+ if (strides[axis] > 0)
+ {
+ // Forward iteration - use the last element. These values will get
+ // clamped below
+ stop = std::numeric_limits<int>::max();
+ }
+ else
+ {
+ // Backward iteration - use the first element.
+ stop = std::numeric_limits<int>::lowest();
+ }
+ }
+
+ // Handle negative indices
+ const int axis_size = input_shape.Dims(axis);
+ if (stop < 0)
+ {
+ stop += axis_size;
+ }
+
+ // Clamping
+ // Because the end index points one past the last element, we need slightly
+ // different clamping ranges depending on the direction.
+ if (strides[axis] > 0)
+ {
+ // Forward iteration
+ stop = Clamp(stop, 0, axis_size);
+ }
+ else
+ {
+ // Backward iteration
+ stop = Clamp(stop, -1, axis_size - 1);
+ }
+
+ return stop;
+}
+
+inline bool LoopCondition(int index, int stop, int stride)
+{
+ // True when we have reached the end of an axis and should loop.
+ return stride > 0 ? index >= stop : index <= stop;
+}
+
+template <typename T>
+inline StridedSliceParams
+buildStridedSliceParams(const T *begin, const T *end, const T *strides, const uint32_t begin_mask,
+ const uint32_t end_mask, const uint32_t shrink_axis_mask,
+ const uint8_t rank)
+{
+ StridedSliceParams op_params;
+ op_params.start_indices_count = rank;
+ op_params.stop_indices_count = rank;
+ op_params.strides_count = rank;
+
+ for (int i = 0; i < rank; ++i)
+ {
+ op_params.start_indices[i] = begin[i];
+ op_params.stop_indices[i] = end[i];
+ op_params.strides[i] = strides[i];
+
+ assert(op_params.strides[i] != 0);
+ }
+
+ op_params.begin_mask = begin_mask;
+ op_params.ellipsis_mask = 0; // NYI
+ op_params.end_mask = end_mask;
+ op_params.new_axis_mask = 0; // NYI
+ op_params.shrink_axis_mask = shrink_axis_mask;
+
+ assert(sizeof(op_params.begin_mask) * 4 >= rank);
+
+ return op_params;
+}
+
+void checkOutputSize(const StridedSliceParams &op_params, const Shape &input_shape,
+ const Shape &output_shape, uint32_t rank)
+{
+ UNUSED_RELEASE(output_shape);
+
+ int32_t shape_size = 0;
+
+ for (uint32_t idx = 0; idx < rank; ++idx)
+ {
+ int32_t stride = op_params.strides[idx];
+ int32_t begin = StartForAxis(op_params, input_shape, idx);
+ int32_t end = StopForAxis(op_params, input_shape, idx, begin);
+
+ // When shrinking an axis, the end position does not matter (and can be
+ // incorrect when negative indexing is used, see Issue #19260). Always use
+ // begin + 1 to generate a length 1 slice, since begin has
+ // already been adjusted for negative indices by StartForAxis.
+ const bool shrink_axis = op_params.shrink_axis_mask & (1 << idx);
+ if (shrink_axis)
+ {
+ end = begin + 1;
+ }
+
+ int32_t dim_shape = std::ceil((end - begin) / static_cast<float>(stride));
+ dim_shape = dim_shape < 0 ? 0 : dim_shape;
+ if (!shrink_axis)
+ {
+ assert(output_shape.Dims(shape_size) == dim_shape);
+ shape_size++;
+ }
+ }
+
+ assert(output_shape.DimensionsCount() == shape_size);
+}
+
+template <typename T>
+inline void StridedSlice(const StridedSliceParams &op_params, const Shape &unextended_input_shape,
+ const T *input_data, const Shape &unextended_output_shape, T *output_data)
+{
+ // Note that the output_shape is not used herein.
+ StridedSliceParams params_copy = op_params;
+
+ assert(unextended_input_shape.DimensionsCount() <= 4);
+ assert(unextended_output_shape.DimensionsCount() <= 4);
+
+ const Shape input_shape = Shape::ExtendedShape(4, unextended_input_shape);
+ const Shape output_shape = Shape::ExtendedShape(4, unextended_output_shape);
+
+ // Reverse and pad to 4 dimensions because that is what the runtime code
+ // requires (ie. all shapes must be 4D and are given backwards).
+ StridedSlicePadIndices(&params_copy, 4);
+
+ const int start_b = StartForAxis(params_copy, input_shape, 0);
+ const int stop_b = StopForAxis(params_copy, input_shape, 0, start_b);
+ const int start_h = StartForAxis(params_copy, input_shape, 1);
+ const int stop_h = StopForAxis(params_copy, input_shape, 1, start_h);
+ const int start_w = StartForAxis(params_copy, input_shape, 2);
+ const int stop_w = StopForAxis(params_copy, input_shape, 2, start_w);
+ const int start_d = StartForAxis(params_copy, input_shape, 3);
+ const int stop_d = StopForAxis(params_copy, input_shape, 3, start_d);
+
+ T *out_ptr = output_data;
+ for (int in_b = start_b; !LoopCondition(in_b, stop_b, params_copy.strides[0]);
+ in_b += params_copy.strides[0])
+ {
+ for (int in_h = start_h; !LoopCondition(in_h, stop_h, params_copy.strides[1]);
+ in_h += params_copy.strides[1])
+ {
+ for (int in_w = start_w; !LoopCondition(in_w, stop_w, params_copy.strides[2]);
+ in_w += params_copy.strides[2])
+ {
+ for (int in_d = start_d; !LoopCondition(in_d, stop_d, params_copy.strides[3]);
+ in_d += params_copy.strides[3])
+ {
+ *out_ptr++ = input_data[Offset(input_shape, in_b, in_h, in_w, in_d)];
+ }
+ }
+ }
+ }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_STRIDEDSLICE_H__
diff --git a/compute/cker/include/cker/operation/Tanh.h b/compute/cker/include/cker/operation/Tanh.h
new file mode 100644
index 000000000..8747d52b4
--- /dev/null
+++ b/compute/cker/include/cker/operation/Tanh.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_TANH_H__
+#define __NNFW_CKER_TANH_H__
+
+#include "cker/eigen/Utils.h"
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include <Eigen/Core>
+
+namespace nnfw
+{
+namespace cker
+{
+
+inline void Tanh(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+ float *output_data)
+{
+ auto input_map = MapAsVector(input_data, input_shape);
+ auto output_map = MapAsVector(output_data, output_shape);
+ output_map.array() = input_map.array().tanh();
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_TANH_H__
diff --git a/compute/cker/include/cker/operation/Transpose.h b/compute/cker/include/cker/operation/Transpose.h
new file mode 100644
index 000000000..9d8cd340d
--- /dev/null
+++ b/compute/cker/include/cker/operation/Transpose.h
@@ -0,0 +1,580 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_TRANSPOSE_H__
+#define __NNFW_CKER_TRANSPOSE_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+namespace reference
+{
+
+template <typename T>
+void TransposeImpl(const TransposeParams &params, const Shape &unextended_input_shape,
+ const T *input_data, const Shape &unextended_output_shape, T *output_data)
+{
+ const int unextended_output_size = unextended_output_shape.DimensionsCount();
+ assert(unextended_input_shape.DimensionsCount() <= 4);
+ assert(unextended_output_size <= 4);
+ assert(unextended_output_size == params.perm_count);
+ const Shape input_shape = Shape::ExtendedShape(4, unextended_input_shape);
+ const Shape output_shape = Shape::ExtendedShape(4, unextended_output_shape);
+ const int input_ext_size = 4 - unextended_input_shape.DimensionsCount();
+ const int output_ext_size = 4 - unextended_output_size;
+
+ // The perm data is extended to match the output, each index incremented by
+ // the amount of front padding of the input shape.
+ int extended_perm[4];
+ for (int i = 0; i < output_ext_size; ++i)
+ {
+ extended_perm[i] = i;
+ }
+ for (int i = 0; i < unextended_output_size; ++i)
+ {
+ extended_perm[i + output_ext_size] = params.perm[i] + input_ext_size;
+ }
+
+ int out_sizes[4];
+ // Compute the inverse permutation array so we can do an output centered
+ // transpose. Also, check to make sure output_dims is matching input_dims.
+ for (int k = 0; k < 4; k++)
+ {
+ out_sizes[k] = MatchingDim(input_shape, extended_perm[k], output_shape, k);
+ }
+
+ // Naive transpose loop (iterate on output index and compute input index).
+ int o[4]; // loop index (on output).
+ int i[4];
+ for (o[3] = 0; o[3] < out_sizes[3]; o[3]++)
+ {
+ i[extended_perm[3]] = o[3];
+ for (o[2] = 0; o[2] < out_sizes[2]; o[2]++)
+ {
+ i[extended_perm[2]] = o[2];
+ for (o[1] = 0; o[1] < out_sizes[1]; o[1]++)
+ {
+ i[extended_perm[1]] = o[1];
+ for (o[0] = 0; o[0] < out_sizes[0]; o[0]++)
+ {
+ i[extended_perm[0]] = o[0];
+ output_data[Offset(output_shape, o)] = input_data[Offset(input_shape, i)];
+ }
+ }
+ }
+ }
+}
+
+template <typename T>
+void Transpose(const TransposeParams &params, const Shape &unextended_input_shape,
+ const T *input_data, const Shape &unextended_output_shape, T *output_data)
+{
+ // Transpose kernel only does rearranging values not numeric evaluations on
+ // each cell. It's safe to implement per size of scalar type and this trick
+ // keeps the total code size in a reasonable range.
+ switch (sizeof(T))
+ {
+ case 1:
+ TransposeImpl<int8_t>(params, unextended_input_shape,
+ reinterpret_cast<const int8_t *>(input_data), unextended_output_shape,
+ reinterpret_cast<int8_t *>(output_data));
+ break;
+ case 2:
+ TransposeImpl<int16_t>(params, unextended_input_shape,
+ reinterpret_cast<const int16_t *>(input_data), unextended_output_shape,
+ reinterpret_cast<int16_t *>(output_data));
+ break;
+
+ case 4:
+ TransposeImpl<int32_t>(params, unextended_input_shape,
+ reinterpret_cast<const int32_t *>(input_data), unextended_output_shape,
+ reinterpret_cast<int32_t *>(output_data));
+ break;
+ case 8:
+ TransposeImpl<int64_t>(params, unextended_input_shape,
+ reinterpret_cast<const int64_t *>(input_data), unextended_output_shape,
+ reinterpret_cast<int64_t *>(output_data));
+ break;
+ }
+}
+} // namespace reference
+
+namespace
+{
+
+bool IsTranspose2DApplicable(const TransposeParams &params, const Shape &input_shape, int *dim0,
+ int *dim1)
+{
+ const int dims_cnt = input_shape.DimensionsCount();
+
+ if (dims_cnt == 2)
+ {
+ *dim0 = input_shape.Dims(0);
+ *dim1 = input_shape.Dims(1);
+ return true;
+ }
+
+ const int first_perm = params.perm[0];
+ for (int i = 1; i < dims_cnt; ++i)
+ {
+ int rebased = params.perm[i] - first_perm;
+ if (rebased < 0)
+ {
+ rebased += dims_cnt;
+ }
+ if (rebased != i)
+ {
+ return false;
+ }
+ }
+ *dim0 = 1;
+ *dim1 = 1;
+ for (int i = 0; i < dims_cnt; ++i)
+ {
+ if (i < first_perm)
+ {
+ *dim0 *= input_shape.Dims(i);
+ }
+ else
+ {
+ *dim1 *= input_shape.Dims(i);
+ }
+ }
+ return true;
+}
+
+void RemoveOneSizeDimensions(Shape *input_shape, Shape *output_shape, TransposeParams *params)
+{
+ const int dims_cnt = input_shape->DimensionsCount();
+ assert(params->perm_count == dims_cnt);
+
+ bool foundOneSizeDim = false;
+ for (int i = 0; i < dims_cnt; ++i)
+ {
+ if (input_shape->Dims(i) == 1)
+ {
+ foundOneSizeDim = true;
+ break;
+ }
+ }
+
+ // Return here if there is no one size dimension.
+ if (!foundOneSizeDim)
+ return;
+
+ // Handle the case where all the dimension size is one.
+ if (input_shape->FlatSize() == 1)
+ {
+ input_shape->Resize(1);
+ input_shape->SetDim(0, 1);
+ output_shape->Resize(1);
+ output_shape->SetDim(0, 1);
+ params->perm_count = 1;
+ params->perm[0] = 0;
+ return;
+ }
+
+ // Resize input shape.
+ int new_dims_cnt = 0;
+ for (int i = 0; i < dims_cnt; ++i)
+ {
+ if (input_shape->Dims(i) == 1)
+ {
+ continue;
+ }
+ input_shape->SetDim(new_dims_cnt, input_shape->Dims(i));
+ ++new_dims_cnt;
+ }
+ input_shape->Resize(new_dims_cnt);
+
+ // Resize output shape and re-calculate the perm parameter.
+ TransposeParams new_params;
+ new_dims_cnt = 0;
+ for (int i = 0; i < dims_cnt; ++i)
+ {
+ if (output_shape->Dims(i) == 1)
+ {
+ continue;
+ }
+ new_params.perm[new_dims_cnt] = params->perm[i];
+ output_shape->SetDim(new_dims_cnt, output_shape->Dims(i));
+ ++new_dims_cnt;
+ }
+ output_shape->Resize(new_dims_cnt);
+ new_params.perm_count = new_dims_cnt;
+
+ for (int i = 0; i < new_dims_cnt; ++i)
+ {
+ int min_val_idx = -1;
+ for (int j = 0; j < new_dims_cnt; ++j)
+ {
+ if (new_params.perm[j] >= i &&
+ (min_val_idx == -1 || new_params.perm[min_val_idx] > new_params.perm[j]))
+ {
+ min_val_idx = j;
+ }
+ }
+ new_params.perm[min_val_idx] = i;
+ }
+ *params = new_params;
+}
+
+size_t Flatten(const Shape &input_shape, const Shape &output_shape, const TransposeParams &params,
+ Shape *non_flatten_input_shape, Shape *non_flatten_output_shape,
+ TransposeParams *non_flatten_params)
+{
+ // Calculate the total size of non-flatten dimensions.
+ int skip_dims_cnt = 0;
+ size_t flat_size = input_shape.FlatSize();
+ for (int i = 0; i < params.perm_count; ++i)
+ {
+ if (params.perm[i] == i)
+ {
+ flat_size /= input_shape.Dims(i);
+ ++skip_dims_cnt;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ // Shrink the shapes and re-calculate the perm parameter.
+ const int new_dims_cnt = params.perm_count - skip_dims_cnt;
+ non_flatten_input_shape->Resize(new_dims_cnt);
+ non_flatten_output_shape->Resize(new_dims_cnt);
+ non_flatten_params->perm_count = new_dims_cnt;
+
+ for (int i = skip_dims_cnt; i < params.perm_count; ++i)
+ {
+ non_flatten_input_shape->SetDim(i - skip_dims_cnt, input_shape.Dims(i));
+ non_flatten_output_shape->SetDim(i - skip_dims_cnt, output_shape.Dims(i));
+ non_flatten_params->perm[i - skip_dims_cnt] = params.perm[i];
+ }
+ for (int i = 0; i < new_dims_cnt; ++i)
+ {
+ int min_val_idx = -1;
+ for (int j = 0; j < new_dims_cnt; ++j)
+ {
+ if (non_flatten_params->perm[j] >= i &&
+ (min_val_idx == -1 ||
+ non_flatten_params->perm[min_val_idx] > non_flatten_params->perm[j]))
+ {
+ min_val_idx = j;
+ }
+ }
+ non_flatten_params->perm[min_val_idx] = i;
+ }
+
+ return flat_size;
+}
+
+} // namespace anonymous (util)
+
+// Transpose2D only deals with typical 2D matrix transpose ops.
+// Perform transpose by transposing 4x4 blocks of the input, proceeding from
+// left to right (down the rows) of the input, and then from top to bottom.
+template <typename T>
+inline void Transpose2D(const Shape &input_shape, const T *input_data, const Shape &output_shape,
+ T *output_data)
+{
+ assert(input_shape.DimensionsCount() == 2);
+ assert(output_shape.DimensionsCount() == 2);
+ UNUSED_RELEASE(output_shape);
+
+ const int d0 = input_shape.DimsData()[0];
+ const int d1 = input_shape.DimsData()[1];
+ const int kLines = 4;
+ const int kSkipSize = (kLines - 1) * d1;
+
+ const T *input = input_data;
+
+ int i = 0;
+ for (; i <= d0 - kLines; i += kLines)
+ {
+ T *output = output_data + i;
+
+ const T *input_ptr = input;
+ optimized_ops_preload_l1_keep(input_ptr);
+ input_ptr += d1;
+ optimized_ops_preload_l1_keep(input_ptr);
+ input_ptr += d1;
+ optimized_ops_preload_l1_keep(input_ptr);
+ input_ptr += d1;
+ optimized_ops_preload_l1_keep(input_ptr);
+
+ int j = 0;
+ for (; j <= d1 - kLines; j += kLines)
+ {
+ input_ptr = input;
+ const T a00 = input_ptr[0];
+ const T a01 = input_ptr[1];
+ const T a02 = input_ptr[2];
+ const T a03 = input_ptr[3];
+ input_ptr += d1;
+ const T a10 = input_ptr[0];
+ const T a11 = input_ptr[1];
+ const T a12 = input_ptr[2];
+ const T a13 = input_ptr[3];
+ input_ptr += d1;
+ const T a20 = input_ptr[0];
+ const T a21 = input_ptr[1];
+ const T a22 = input_ptr[2];
+ const T a23 = input_ptr[3];
+ input_ptr += d1;
+ const T a30 = input_ptr[0];
+ const T a31 = input_ptr[1];
+ const T a32 = input_ptr[2];
+ const T a33 = input_ptr[3];
+
+ output[0] = a00;
+ output[1] = a10;
+ output[2] = a20;
+ output[3] = a30;
+ output += d0;
+
+ output[0] = a01;
+ output[1] = a11;
+ output[2] = a21;
+ output[3] = a31;
+ output += d0;
+
+ output[0] = a02;
+ output[1] = a12;
+ output[2] = a22;
+ output[3] = a32;
+ output += d0;
+
+ output[0] = a03;
+ output[1] = a13;
+ output[2] = a23;
+ output[3] = a33;
+ output += d0;
+
+ input += kLines;
+ }
+ if (j == d1)
+ {
+ input += kSkipSize;
+ }
+ else
+ {
+ for (int p = 0; p < kLines; ++p)
+ {
+ for (int q = 0; q < d1 - j; ++q)
+ {
+ *(output + q * d0 + p) = *(input + p * d1 + q);
+ }
+ }
+ input += (d1 - j) + kSkipSize;
+ }
+ }
+ for (; i < d0; ++i)
+ {
+ T *output = output_data + i;
+ for (int j = 0; j < d1; ++j)
+ {
+ *output = *input;
+ output += d0;
+ ++input;
+ }
+ }
+}
+
+// TODO(alanchiao): see if we can reduce the number
+// of lines of code in branching without affecting latency.
+template <typename T>
+inline void Transpose3D(const TransposeParams &params, const Shape &input_shape,
+ const T *input_data, const Shape &, T *output_data)
+{
+ int s2, s3;
+ s2 = input_shape.Dims(1);
+ s3 = input_shape.Dims(2);
+
+ int p1 = 0;
+ int p2 = 0;
+ int p3 = 0;
+
+ if (params.perm[0] == 2)
+ {
+ p1 = 1;
+ }
+ else if (params.perm[1] == 2)
+ {
+ p2 = 1;
+ }
+ else
+ {
+ p3 = 1;
+ }
+
+ if (params.perm[0] == 1)
+ {
+ p1 = s3;
+ }
+ else if (params.perm[1] == 1)
+ {
+ p2 = s3;
+ }
+ else
+ {
+ p3 = s3;
+ }
+
+ if (params.perm[0] == 0)
+ {
+ p1 = s2 * s3;
+ }
+ else if (params.perm[1] == 0)
+ {
+ p2 = s2 * s3;
+ }
+ else
+ {
+ p3 = s2 * s3;
+ }
+
+ int o_s[3];
+ o_s[0] = input_shape.Dims(params.perm[0]);
+ o_s[1] = input_shape.Dims(params.perm[1]);
+ o_s[2] = input_shape.Dims(params.perm[2]);
+
+ for (int i1 = 0; i1 < o_s[0]; ++i1)
+ {
+ for (int i2 = 0; i2 < o_s[1]; ++i2)
+ {
+ for (int i3 = 0; i3 < o_s[2]; ++i3)
+ {
+ const int i = i1 * p1 + i2 * p2 + i3 * p3;
+ const int o = i1 * o_s[1] * o_s[2] + i2 * o_s[2] + i3;
+ output_data[o] = input_data[i];
+ }
+ }
+ }
+}
+
+template <typename T>
+void TransposeImpl(const TransposeParams &params, const Shape &input_shape, const T *input_data,
+ const Shape &output_shape, T *output_data)
+{
+ const int dims_cnt = input_shape.DimensionsCount();
+
+ int dim0, dim1;
+ if (IsTranspose2DApplicable(params, input_shape, &dim0, &dim1))
+ {
+ Transpose2D(Shape({dim0, dim1}), input_data, Shape({dim1, dim0}), output_data);
+ return;
+ }
+
+ // TODO(b/141217325): notably Eigen is better suited for
+ // larger inputs whereas Transpose3D is generally
+ // better for smaller ones.
+ //
+ // E.g. on Nexus 5, Eigen is better for size 96^3 and up
+ // and Transpose3D is better for 72^3 and down.
+ //
+ // 96^3 is not mobile-friendly for certain usecases
+ // (e.g. model used in beam search for seq2seq) but is in others.
+ // Consider tradeoffs.
+ if (dims_cnt == 3)
+ {
+ Transpose3D(params, input_shape, input_data, output_shape, output_data);
+ return;
+ }
+
+ // Reroute to the reference version if an optimized method for the given data
+ // is not available.
+ reference::Transpose(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename T>
+void Transpose(const TransposeParams &unshrunk_params, const Shape &unshrunk_input_shape,
+ const T *input_data, const Shape &unshrunk_output_shape, T *output_data)
+{
+ const int output_size = unshrunk_output_shape.DimensionsCount();
+ assert(unshrunk_input_shape.DimensionsCount() <= 4);
+ assert(output_size <= 4);
+ assert(output_size == unshrunk_params.perm_count);
+
+ Shape shrunk_input_shape = Shape(unshrunk_input_shape);
+
+ Shape shrunk_output_shape = Shape(unshrunk_output_shape);
+
+ TransposeParams shrunk_params = unshrunk_params;
+
+ // Reduce any dimensions that have one size. Lower transpose op usually
+ // performs better since memory access patterns will be improved.
+ RemoveOneSizeDimensions(&shrunk_input_shape, &shrunk_output_shape, &shrunk_params);
+
+ // Handle identity cases.
+ // TODO(b/140779653): Add an optimization pass in the conversion process to
+ // remove transpose op nodes where they do nothing like the below one.
+ bool identical = true;
+ for (int i = 0; i < shrunk_params.perm_count; ++i)
+
+ {
+ if (shrunk_params.perm[i] != i)
+
+ {
+ identical = false;
+ break;
+ }
+ }
+ if (identical)
+ {
+ memcpy(output_data, input_data, unshrunk_input_shape.FlatSize() * sizeof(T));
+ return;
+ }
+
+ // Reduce dimensions by flattening.
+ if (shrunk_params.perm[0] == 0 && output_size >= 3)
+
+ {
+ Shape non_flatten_input_shape;
+ Shape non_flatten_output_shape;
+ TransposeParams non_flatten_params;
+ const int total_size = shrunk_input_shape.FlatSize();
+
+ const int non_flatten_size =
+ Flatten(shrunk_input_shape, shrunk_output_shape, shrunk_params,
+
+ &non_flatten_input_shape, &non_flatten_output_shape, &non_flatten_params);
+ assert(non_flatten_params.perm[0] != 0);
+
+ for (int i = 0; i < total_size; i += non_flatten_size)
+ {
+ TransposeImpl(non_flatten_params, non_flatten_input_shape, input_data + i,
+ non_flatten_output_shape, output_data + i);
+ }
+ return;
+ }
+
+ // Call non-flattened case.
+ TransposeImpl(shrunk_params, shrunk_input_shape, input_data, shrunk_output_shape,
+
+ output_data);
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_TRANSPOSE_H__
diff --git a/compute/cker/include/cker/operation/TransposeConv.h b/compute/cker/include/cker/operation/TransposeConv.h
index 535fe86cf..7db3a1179 100644
--- a/compute/cker/include/cker/operation/TransposeConv.h
+++ b/compute/cker/include/cker/operation/TransposeConv.h
@@ -27,30 +27,6 @@ namespace nnfw
namespace cker
{
-struct TransposeConvParams
-{
- PaddingType padding_type;
- PaddingValues padding_values;
- // TODO(starka): This was just "stride", so check that width+height is OK.
- int16_t stride_width;
- int16_t stride_height;
- int16_t dilation_width_factor;
- int16_t dilation_height_factor;
- // uint8_t inference params.
- // TODO(b/65838351): Use smaller types if appropriate.
- int32_t input_offset;
- int32_t weights_offset;
- int32_t output_offset;
- int32_t output_multiplier;
- int output_shift;
- // uint8_t, etc, activation params.
- int32_t quantized_activation_min;
- int32_t quantized_activation_max;
- // float activation params.
- float float_activation_min;
- float float_activation_max;
-};
-
inline void TransposeConv(const TransposeConvParams &params, const Shape &input_shape,
const float *input_data, const Shape &filter_shape,
const float *filter_data, const Shape &output_shape, float *output_data)
diff --git a/compute/cker/include/cker/operation/Unpack.h b/compute/cker/include/cker/operation/Unpack.h
new file mode 100644
index 000000000..242aadf46
--- /dev/null
+++ b/compute/cker/include/cker/operation/Unpack.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_UNPACK_H__
+#define __NNFW_CKER_UNPACK_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+template <typename Scalar>
+void Unpack(const UnpackParams &params, const Shape &input_shape, const Scalar *input_data,
+ const Shape &output_shape, Scalar *const *output_datas)
+{
+ const int dimensions = input_shape.DimensionsCount();
+ const int outputs_count = params.num_split;
+
+ int outer_size = 1;
+ for (int i = 0; i < params.axis; i++)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+ int copy_size = 1;
+ for (int i = params.axis + 1; i < dimensions; i++)
+ {
+ copy_size *= input_shape.Dims(i);
+ }
+ assert(output_shape.FlatSize() == copy_size * outer_size);
+ UNUSED_RELEASE(output_shape);
+
+ for (int i = 0; i < outputs_count; ++i)
+ {
+ for (int k = 0; k < outer_size; k++)
+ {
+ Scalar *output_ptr = output_datas[i] + copy_size * k;
+ int loc = k * outputs_count * copy_size + i * copy_size;
+ memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+ }
+ }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_UNPACK_H__
diff --git a/compute/cker/include/cker/operation/optimized/AveragePool.h b/compute/cker/include/cker/operation/optimized/AveragePool.h
deleted file mode 100644
index d94a5811a..000000000
--- a/compute/cker/include/cker/operation/optimized/AveragePool.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_CKER_OPTIMIZED_AVERAGE_POOL_H__
-#define __NNFW_CKER_OPTIMIZED_AVERAGE_POOL_H__
-
-#if defined(CKER_OPTIMIZED_EIGEN)
-
-#include "cker/eigen/Utils.h"
-#include "cker/Shape.h"
-#include "cker/Types.h"
-#include "cker/Utils.h"
-#include <Eigen/Core>
-
-namespace nnfw
-{
-namespace cker
-{
-namespace optimized
-{
-
-// TODO Change to apply neon for this function if it is faster
-inline void AveragePool(const PoolParams &params, const Shape &input_shape, const float *input_data,
- const Shape &output_shape, float *output_data)
-{
- assert(input_shape.DimensionsCount() == 4);
- assert(output_shape.DimensionsCount() == 4);
- const int batches = MatchingDim(input_shape, 0, output_shape, 0);
- const int input_height = input_shape.Dims(1);
- const int input_width = input_shape.Dims(2);
- const int output_height = output_shape.Dims(1);
- const int output_width = output_shape.Dims(2);
- const int stride_height = params.stride_height;
- const int stride_width = params.stride_width;
-
- // TODO(benoitjacob) make this a proper reference impl without Eigen!
- const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape);
- auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
- // TODO(benoitjacob) get rid of the dynamic memory allocation here!
- Eigen::VectorXf out_count(out_mat.cols());
- out_count.setZero();
- // Prefill the output to 0.
- out_mat.setZero();
- for (int b = 0; b < batches; ++b)
- {
- for (int h = 0; h < input_height; ++h)
- {
- for (int w = 0; w < input_width; ++w)
- {
- // (h_start, h_end) * (w_start, w_end) is the range that the input
- // vector projects to.
- int hpad = h + params.padding_values.height;
- int wpad = w + params.padding_values.width;
- int h_start =
- (hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1;
- int h_end = std::min(hpad / stride_height + 1, output_height);
- int w_start =
- (wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1;
- int w_end = std::min(wpad / stride_width + 1, output_width);
- // compute elementwise sum
- for (int ph = h_start; ph < h_end; ++ph)
- {
- for (int pw = w_start; pw < w_end; ++pw)
- {
- int out_offset = NodeOffset(b, ph, pw, output_height, output_width);
- out_mat.col(out_offset) += in_mat.col(NodeOffset(b, h, w, input_height, input_width));
- out_count(out_offset)++;
- }
- }
- }
- }
- }
- // Divide the output by the actual number of elements being averaged over
- assert(out_count.minCoeff() > 0);
- out_mat.array().rowwise() /= out_count.transpose().array();
-
- const int flat_size = output_shape.FlatSize();
- for (int i = 0; i < flat_size; ++i)
- {
- output_data[i] = ActivationFunctionWithMinMax(output_data[i], params.float_activation_min,
- params.float_activation_max);
- }
-}
-
-} // namespace optimized
-} // namespace cker
-} // namespace nnfw
-
-#endif // defined(CKER_OPTIMIZED_EIGEN)
-
-#endif // __NNFW_CKER_OPTIMIZED_AVERAGE_POOL_H__
diff --git a/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h b/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h
new file mode 100644
index 000000000..c88706563
--- /dev/null
+++ b/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_OPTIMIZED_BINARYARITHMETICOPS_H__
+#define __NNFW_CKER_OPTIMIZED_BINARYARITHMETICOPS_H__
+
+#include "cker/neon/neon_check.h"
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+namespace optimized
+{
+
+inline void Add(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+ const float *input1_data, const Shape &input2_shape, const float *input2_data,
+ const Shape &output_shape, float *output_data)
+{
+ int i = 0;
+ const int size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+#ifdef USE_NEON
+ const auto activation_min = vdupq_n_f32(params.float_activation_min);
+ const auto activation_max = vdupq_n_f32(params.float_activation_max);
+ for (; i <= size - 16; i += 16)
+ {
+ auto a10 = vld1q_f32(input1_data + i);
+ auto a11 = vld1q_f32(input1_data + i + 4);
+ auto a12 = vld1q_f32(input1_data + i + 8);
+ auto a13 = vld1q_f32(input1_data + i + 12);
+ auto a20 = vld1q_f32(input2_data + i);
+ auto a21 = vld1q_f32(input2_data + i + 4);
+ auto a22 = vld1q_f32(input2_data + i + 8);
+ auto a23 = vld1q_f32(input2_data + i + 12);
+ auto x0 = vaddq_f32(a10, a20);
+ auto x1 = vaddq_f32(a11, a21);
+ auto x2 = vaddq_f32(a12, a22);
+ auto x3 = vaddq_f32(a13, a23);
+ x0 = vmaxq_f32(activation_min, x0);
+ x1 = vmaxq_f32(activation_min, x1);
+ x2 = vmaxq_f32(activation_min, x2);
+ x3 = vmaxq_f32(activation_min, x3);
+ x0 = vminq_f32(activation_max, x0);
+ x1 = vminq_f32(activation_max, x1);
+ x2 = vminq_f32(activation_max, x2);
+ x3 = vminq_f32(activation_max, x3);
+ vst1q_f32(output_data + i, x0);
+ vst1q_f32(output_data + i + 4, x1);
+ vst1q_f32(output_data + i + 8, x2);
+ vst1q_f32(output_data + i + 12, x3);
+ }
+ for (; i <= size - 4; i += 4)
+ {
+ auto a1 = vld1q_f32(input1_data + i);
+ auto a2 = vld1q_f32(input2_data + i);
+ auto x = vaddq_f32(a1, a2);
+ x = vmaxq_f32(activation_min, x);
+ x = vminq_f32(activation_max, x);
+ vst1q_f32(output_data + i, x);
+ }
+#endif // NEON
+
+ for (; i < size; i++)
+ {
+ auto x = input1_data[i] + input2_data[i];
+ output_data[i] =
+ ActivationFunctionWithMinMax(x, params.float_activation_min, params.float_activation_max);
+ }
+}
+
+inline void Sub(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+ const float *input1_data, const Shape &input2_shape, const float *input2_data,
+ const Shape &output_shape, float *output_data)
+{
+ int i = 0;
+ const int size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+#ifdef USE_NEON
+ const auto activation_min = vdupq_n_f32(params.float_activation_min);
+ const auto activation_max = vdupq_n_f32(params.float_activation_max);
+ for (; i <= size - 16; i += 16)
+ {
+ auto a10 = vld1q_f32(input1_data + i);
+ auto a11 = vld1q_f32(input1_data + i + 4);
+ auto a12 = vld1q_f32(input1_data + i + 8);
+ auto a13 = vld1q_f32(input1_data + i + 12);
+ auto a20 = vld1q_f32(input2_data + i);
+ auto a21 = vld1q_f32(input2_data + i + 4);
+ auto a22 = vld1q_f32(input2_data + i + 8);
+ auto a23 = vld1q_f32(input2_data + i + 12);
+ auto x0 = vsubq_f32(a10, a20);
+ auto x1 = vsubq_f32(a11, a21);
+ auto x2 = vsubq_f32(a12, a22);
+ auto x3 = vsubq_f32(a13, a23);
+ x0 = vmaxq_f32(activation_min, x0);
+ x1 = vmaxq_f32(activation_min, x1);
+ x2 = vmaxq_f32(activation_min, x2);
+ x3 = vmaxq_f32(activation_min, x3);
+ x0 = vminq_f32(activation_max, x0);
+ x1 = vminq_f32(activation_max, x1);
+ x2 = vminq_f32(activation_max, x2);
+ x3 = vminq_f32(activation_max, x3);
+ vst1q_f32(output_data + i, x0);
+ vst1q_f32(output_data + i + 4, x1);
+ vst1q_f32(output_data + i + 8, x2);
+ vst1q_f32(output_data + i + 12, x3);
+ }
+ for (; i <= size - 4; i += 4)
+ {
+ auto a1 = vld1q_f32(input1_data + i);
+ auto a2 = vld1q_f32(input2_data + i);
+ auto x = vsubq_f32(a1, a2);
+ x = vmaxq_f32(activation_min, x);
+ x = vminq_f32(activation_max, x);
+ vst1q_f32(output_data + i, x);
+ }
+#endif // NEON
+
+ for (; i < size; i++)
+ {
+ auto x = input1_data[i] - input2_data[i];
+ output_data[i] =
+ ActivationFunctionWithMinMax(x, params.float_activation_min, params.float_activation_max);
+ }
+}
+
+inline void Mul(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+ const float *input1_data, const Shape &input2_shape, const float *input2_data,
+ const Shape &output_shape, float *output_data)
+{
+ int i = 0;
+ const int size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+#ifdef USE_NEON
+ const auto activation_min = vdupq_n_f32(params.float_activation_min);
+ const auto activation_max = vdupq_n_f32(params.float_activation_max);
+ for (; i <= size - 16; i += 16)
+ {
+ auto a10 = vld1q_f32(input1_data + i);
+ auto a11 = vld1q_f32(input1_data + i + 4);
+ auto a12 = vld1q_f32(input1_data + i + 8);
+ auto a13 = vld1q_f32(input1_data + i + 12);
+ auto a20 = vld1q_f32(input2_data + i);
+ auto a21 = vld1q_f32(input2_data + i + 4);
+ auto a22 = vld1q_f32(input2_data + i + 8);
+ auto a23 = vld1q_f32(input2_data + i + 12);
+ auto x0 = vmulq_f32(a10, a20);
+ auto x1 = vmulq_f32(a11, a21);
+ auto x2 = vmulq_f32(a12, a22);
+ auto x3 = vmulq_f32(a13, a23);
+ x0 = vmaxq_f32(activation_min, x0);
+ x1 = vmaxq_f32(activation_min, x1);
+ x2 = vmaxq_f32(activation_min, x2);
+ x3 = vmaxq_f32(activation_min, x3);
+ x0 = vminq_f32(activation_max, x0);
+ x1 = vminq_f32(activation_max, x1);
+ x2 = vminq_f32(activation_max, x2);
+ x3 = vminq_f32(activation_max, x3);
+ vst1q_f32(output_data + i, x0);
+ vst1q_f32(output_data + i + 4, x1);
+ vst1q_f32(output_data + i + 8, x2);
+ vst1q_f32(output_data + i + 12, x3);
+ }
+ for (; i <= size - 4; i += 4)
+ {
+ auto a1 = vld1q_f32(input1_data + i);
+ auto a2 = vld1q_f32(input2_data + i);
+ auto x = vmulq_f32(a1, a2);
+ x = vmaxq_f32(activation_min, x);
+ x = vminq_f32(activation_max, x);
+ vst1q_f32(output_data + i, x);
+ }
+#endif // NEON
+
+ for (; i < size; i++)
+ {
+ auto x = input1_data[i] * input2_data[i];
+ output_data[i] =
+ ActivationFunctionWithMinMax(x, params.float_activation_min, params.float_activation_max);
+ }
+}
+
+} // namespace optimized
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_OPTIMIZED_BINARYARITHMETICOPS_H__
diff --git a/compute/cker/include/cker/operation/optimized/Conv.h b/compute/cker/include/cker/operation/optimized/Conv.h
new file mode 100644
index 000000000..0f620146c
--- /dev/null
+++ b/compute/cker/include/cker/operation/optimized/Conv.h
@@ -0,0 +1,290 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_OPTIMIZED_CONV_H__
+#define __NNFW_CKER_OPTIMIZED_CONV_H__
+
+#include "OptimizedUtils.h"
+
+#include "cker/eigen/EigenSupport.h"
+#include "cker/eigen/Utils.h"
+#include "cker/gemmlowp/GEMMSupport.h"
+#include "cker/neon/neon_check.h"
+#include "cker/operation/Common.h"
+#include "cker/Shape.h"
+#include "cker/Types.h"
+
+#include <public/gemmlowp.h>
+#include <public/map.h>
+#include <fixedpoint/fixedpoint.h>
+
+#include <vector>
+#include <tuple>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace optimized
+{
+
+struct GemmlowpOutputPipeline
+{
+ typedef gemmlowp::VectorMap<const int32_t, gemmlowp::VectorShape::Col> ColVectorMap;
+ typedef std::tuple<gemmlowp::OutputStageBiasAddition<ColVectorMap>,
+ gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent,
+ gemmlowp::OutputStageClamp, gemmlowp::OutputStageSaturatingCastToUint8>
+ Pipeline;
+ static Pipeline MakeExp(const int32_t *bias_data, int output_rows, int32_t output_offset,
+ int32_t output_multiplier, int output_left_shift,
+ int32_t output_activation_min, int32_t output_activation_max)
+ {
+ ColVectorMap bias_vector(bias_data, output_rows);
+ gemmlowp::OutputStageBiasAddition<ColVectorMap> bias_addition_stage;
+ bias_addition_stage.bias_vector = bias_vector;
+ gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent quantize_down_stage;
+ quantize_down_stage.result_offset_after_shift = output_offset;
+ quantize_down_stage.result_fixedpoint_multiplier = output_multiplier;
+ quantize_down_stage.result_exponent = output_left_shift;
+ gemmlowp::OutputStageClamp clamp_stage;
+ clamp_stage.min = output_activation_min;
+ clamp_stage.max = output_activation_max;
+ gemmlowp::OutputStageSaturatingCastToUint8 saturating_cast_stage;
+ return std::make_tuple(bias_addition_stage, quantize_down_stage, clamp_stage,
+ saturating_cast_stage);
+ }
+};
+
+inline void AddBiasAndEvalActivationFunction(float output_activation_min,
+ float output_activation_max, const Shape &bias_shape,
+ const float *bias_data, const Shape &array_shape,
+ float *array_data)
+{
+ BiasAndClamp(output_activation_min, output_activation_max, bias_shape.FlatSize(), bias_data,
+ array_shape.FlatSize(), array_data);
+}
+
+inline void Conv(const ConvParams &params, const Shape &input_shape, const uint8_t *input_data,
+ const Shape &filter_shape, const uint8_t *filter_data, const Shape &bias_shape,
+ const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data,
+ const Shape &im2col_shape, uint8_t *im2col_data)
+{
+ gemmlowp::GemmContext *gemm_context = gemm_support::GetGemmLowpContext();
+
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int dilation_width_factor = params.dilation_width_factor;
+ const int dilation_height_factor = params.dilation_height_factor;
+ const int32_t input_offset = params.input_offset;
+ const int32_t filter_offset = params.weights_offset;
+ const int32_t output_offset = params.output_offset;
+ const int32_t output_multiplier = params.output_multiplier;
+ const int output_shift = params.output_shift;
+ const int32_t output_activation_min = params.quantized_activation_min;
+ const int32_t output_activation_max = params.quantized_activation_max;
+ assert(input_shape.DimensionsCount() == 4);
+ assert(filter_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+
+ const uint8_t *gemm_input_data = nullptr;
+ const Shape *gemm_input_shape = nullptr;
+ const int filter_width = filter_shape.Dims(2);
+ const int filter_height = filter_shape.Dims(1);
+ const bool need_dilated_im2col = dilation_width_factor != 1 || dilation_height_factor != 1;
+ const bool need_im2col =
+ stride_width != 1 || stride_height != 1 || filter_width != 1 || filter_height != 1;
+ if (need_dilated_im2col)
+ {
+ assert(im2col_data);
+ const int input_zero_point = -input_offset;
+ assert(input_zero_point >= 0);
+ assert(input_zero_point <= 255);
+ DilatedIm2col(params, input_zero_point, input_shape, input_data, filter_shape, output_shape,
+ im2col_data);
+ gemm_input_data = im2col_data;
+ gemm_input_shape = &im2col_shape;
+ }
+ else if (need_im2col)
+ {
+ assert(im2col_data);
+ const int input_zero_point = -input_offset;
+ assert(input_zero_point >= 0);
+ assert(input_zero_point <= 255);
+ Im2col(params, filter_height, filter_width, input_zero_point, input_shape, input_data,
+ im2col_shape, im2col_data);
+ gemm_input_data = im2col_data;
+ gemm_input_shape = &im2col_shape;
+ }
+ else
+ {
+ gemm_input_data = input_data;
+ gemm_input_shape = &input_shape;
+ }
+
+ const int gemm_input_rows = gemm_input_shape->Dims(3);
+ // Using FlatSizeSkipDim causes segfault in some contexts (see b/79927784).
+ // The root cause has not yet been identified though. Same applies below for
+ // the other calls commented out. This is a partial rollback of cl/196819423.
+ // const int gemm_input_cols = FlatSizeSkipDim(*gemm_input_shape, 3);
+ const int gemm_input_cols =
+ gemm_input_shape->Dims(0) * gemm_input_shape->Dims(1) * gemm_input_shape->Dims(2);
+ const int filter_rows = filter_shape.Dims(0);
+ // See b/79927784.
+ // const int filter_cols = FlatSizeSkipDim(filter_shape, 0);
+ const int filter_cols = filter_shape.Dims(1) * filter_shape.Dims(2) * filter_shape.Dims(3);
+ const int output_rows = output_shape.Dims(3);
+ // See b/79927784.
+ // const int output_cols = FlatSizeSkipDim(output_shape, 3);
+ const int output_cols = output_shape.Dims(0) * output_shape.Dims(1) * output_shape.Dims(2);
+ assert(output_rows == filter_rows);
+ assert(output_cols == gemm_input_cols);
+ assert(filter_cols == gemm_input_rows);
+ assert(bias_shape.FlatSize() == output_rows);
+ UNUSED_RELEASE(bias_shape);
+ gemmlowp::MatrixMap<const uint8_t, gemmlowp::MapOrder::RowMajor> filter_matrix(
+ filter_data, filter_rows, filter_cols);
+ gemmlowp::MatrixMap<const uint8_t, gemmlowp::MapOrder::ColMajor> input_matrix(
+ gemm_input_data, gemm_input_rows, gemm_input_cols);
+ gemmlowp::MatrixMap<uint8_t, gemmlowp::MapOrder::ColMajor> output_matrix(output_data, output_rows,
+ output_cols);
+ const auto &output_pipeline =
+ GemmlowpOutputPipeline::MakeExp(bias_data, output_rows, output_offset, output_multiplier,
+ output_shift, output_activation_min, output_activation_max);
+ gemmlowp::GemmWithOutputPipeline<uint8_t, uint8_t, gemmlowp::L8R8WithLhsNonzeroBitDepthParams>(
+ gemm_context, filter_matrix, input_matrix, &output_matrix, filter_offset, input_offset,
+ output_pipeline);
+}
+
+} // namespace optimized
+
+namespace multithreaded
+{
+namespace
+{
+template <class T> class EigenTensorConvFunctor
+{
+private:
+ Eigen::PaddingType RuntimePadding2EigenPadding(PaddingType padding)
+ {
+ switch (padding)
+ {
+ case PaddingType::kValid:
+ return Eigen::PADDING_VALID;
+ case PaddingType::kSame:
+ return Eigen::PADDING_SAME;
+ case PaddingType::kNone:
+ assert(false); // should never get here.
+ return Eigen::PADDING_VALID;
+ }
+ return Eigen::PADDING_SAME; // Prevent compiler warning about missing
+ // return
+ }
+
+public:
+ void operator()(const Eigen::ThreadPoolDevice &device, const T *input_data, int input_batches,
+ int input_height, int input_width, int input_depth, const T *filter_data,
+ int filter_height, int filter_width, int filter_count, int stride_rows,
+ int stride_cols, int pad_height, int pad_width, nnfw::cker::PaddingType padding,
+ T *output_data, int output_height, int output_width)
+ {
+ const bool is_1x1_kernel =
+ (filter_height == 1 && filter_width == 1 && stride_rows == 1 && stride_cols == 1);
+ const bool is_same_height_width =
+ (filter_height == input_height && filter_width == input_width && pad_width == 0 &&
+ pad_height == 0);
+ if (is_1x1_kernel || is_same_height_width)
+ {
+ // is_1x1_kernel: For 1x1 kernel, the 2D convolution is reduced to matrix multiplication.
+ // - output (input_batches * conv_width, filter_count)
+ // - input (input_batches * conv_width, input_depth)
+ // - filter (input_depth, filter_count)
+ // is_same_height_width: If the input data and filter have the same height/width, the 2D
+ // convolution is reduced to matrix multiplication.
+ // - output (input_batches, filter_count)
+ // - input (input_batches, filter_width * filter_height * input_depth)
+ // - filter (filter_width * filter_height * input_depth, filter_count)
+ const int conv_width = output_height * output_width;
+ int io_col = input_batches;
+ int filter_col = input_depth * filter_width * filter_height;
+ if (is_1x1_kernel)
+ {
+ io_col *= conv_width;
+ }
+ Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair;
+ dim_pair[0] = Eigen::IndexPair<Eigen::DenseIndex>(1, 0);
+ eigen_support::EigenMatrix output(output_data, io_col, filter_count);
+ eigen_support::ConstEigenMatrix input(input_data, io_col, filter_col);
+ eigen_support::ConstEigenMatrix filter(filter_data, filter_col, filter_count);
+ eigen_support::MatMulConvFunctor<Eigen::ThreadPoolDevice, T>()(device, output, input, filter,
+ dim_pair);
+ }
+ else
+ {
+ eigen_support::EigenTensor output(output_data, input_batches, output_height, output_width,
+ filter_count);
+ eigen_support::ConstEigenTensor input(input_data, input_batches, input_height, input_width,
+ input_depth);
+ eigen_support::ConstEigenTensor filter(filter_data, filter_height, filter_width, input_depth,
+ filter_count);
+ output.device(device) = Eigen::SpatialConvolution(input, filter, stride_cols, stride_rows,
+ RuntimePadding2EigenPadding(padding));
+ }
+ }
+};
+} // namespace
+
+inline void Conv(const ConvParams &params, const Shape &input_shape, const float *input_data,
+ const Shape &filter_shape, const float *filter_data, const Shape &bias_shape,
+ const float *bias_data, const Shape &output_shape, float *output_data)
+{
+ const Eigen::ThreadPoolDevice &device = *eigen_support::GetThreadPoolDevice();
+
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const PaddingType padding = params.padding_type;
+ const int pad_width = params.padding_values.width;
+ const int pad_height = params.padding_values.height;
+ const float output_activation_min = params.float_activation_min;
+ const float output_activation_max = params.float_activation_max;
+ assert(input_shape.DimensionsCount() == 4);
+ assert(filter_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+ const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int filter_height = filter_shape.Dims(1);
+ const int filter_width = filter_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
+
+ EigenTensorConvFunctor<float> conv_functor;
+ conv_functor(device, input_data, batches, input_height, input_width, input_depth, filter_data,
+ filter_height, filter_width, output_depth, stride_height, stride_width, pad_height,
+ pad_width, padding, output_data, output_height, output_width);
+
+ optimized::AddBiasAndEvalActivationFunction(output_activation_min, output_activation_max,
+ bias_shape, bias_data, output_shape, output_data);
+}
+
+} // namespace multithreaded
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_OPTIMIZED_CONV_H__
diff --git a/compute/cker/include/cker/operation/optimized/DepthwiseConvUint8.h b/compute/cker/include/cker/operation/optimized/DepthwiseConvUint8.h
new file mode 100644
index 000000000..d383b126d
--- /dev/null
+++ b/compute/cker/include/cker/operation/optimized/DepthwiseConvUint8.h
@@ -0,0 +1,2123 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_OPTIMIZED_DEPTHWISE_CONV_UINT8_H__
+#define __NNFW_CKER_OPTIMIZED_DEPTHWISE_CONV_UINT8_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+#include "cker/neon/neon_check.h"
+
+#include <fixedpoint/fixedpoint.h>
+#include <public/gemmlowp.h>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace optimized
+{
+
+// Implementation of quantized DepthwiseConv
+
+template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
+struct QuantizedDepthwiseConvKernel
+{
+};
+
+#ifdef USE_NEON
+template <> struct QuantizedDepthwiseConvKernel<true, 8, 2>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ // Load the filters, add filter_offset.
+ uint8x8x2_t filter_u8;
+ filter_u8.val[0] = vld1_u8(filter_ptr);
+ filter_u8.val[1] = vld1_u8(filter_ptr + 8);
+ int16x8_t filter[2];
+ for (int i = 0; i < 2; i++)
+ {
+ filter[i] =
+ vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(filter_u8.val[i])), vdupq_n_s16(filter_offset));
+ }
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4x2_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i].val[0] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ acc[i].val[1] = vld1q_s32(acc_buffer_ptr + 4 * i + 8);
+ }
+ // Load the inputs, add input_offset.
+ const uint8x8_t input_u8 = vld1_u8(input_ptr);
+ input_ptr += input_ptr_increment;
+ const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8));
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+ // Duplicate the input values, 2-fold
+ const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++)
+ {
+ acc[0].val[i] =
+ vmlal_s16(acc[0].val[i], vget_low_s16(filter[i]), vget_low_s16(input_dup2.val[i]));
+ acc[1].val[i] =
+ vmlal_s16(acc[1].val[i], vget_high_s16(filter[i]), vget_high_s16(input_dup2.val[i]));
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i].val[0]);
+ vst1q_s32(acc_buffer_ptr + 4 * i + 8, acc[i].val[1]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 8, 1>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ (void)input_ptr_increment;
+ // Load the filters, add filter_offset.
+ const uint8x8_t filter_u8 = vld1_u8(filter_ptr);
+ const int16x8_t filter_s16 = vreinterpretq_s16_u16(vmovl_u8(filter_u8));
+ const int16x8_t filter = vaddq_s16(filter_s16, vdupq_n_s16(filter_offset));
+
+ int outp = 0;
+ // Handle 2 output pixels at a time.
+ for (; outp <= num_output_pixels - 2; outp += 2)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Load the inputs, add input_offset.
+ uint8x8_t input_u8[2];
+ for (int i = 0; i < 2; i++)
+ {
+ input_u8[i] = vld1_u8(input_ptr + 8 * i);
+ }
+ input_ptr += 16;
+ int16x8_t input[2];
+ for (int i = 0; i < 2; i++)
+ {
+ input[i] = vreinterpretq_s16_u16(vmovl_u8(input_u8[i]));
+ }
+ for (int i = 0; i < 2; i++)
+ {
+ input[i] = vaddq_s16(input[i], vdupq_n_s16(input_offset));
+ }
+ // Multiply-accumulate.
+ acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input[0]));
+ acc[1] = vmlal_s16(acc[1], vget_high_s16(filter), vget_high_s16(input[0]));
+ acc[2] = vmlal_s16(acc[2], vget_low_s16(filter), vget_low_s16(input[1]));
+ acc[3] = vmlal_s16(acc[3], vget_high_s16(filter), vget_high_s16(input[1]));
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle 1 output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x4_t acc[2];
+ acc[0] = vld1q_s32(acc_buffer_ptr);
+ acc[1] = vld1q_s32(acc_buffer_ptr + 4);
+
+ // Load the inputs, add input_offset.
+ const uint8x8_t input_u8 = vld1_u8(input_ptr);
+ input_ptr += 8;
+ const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8));
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+ // Multiply-accumulate.
+ acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input));
+ acc[1] = vmlal_s16(acc[1], vget_high_s16(filter), vget_high_s16(input));
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr, acc[0]);
+ vst1q_s32(acc_buffer_ptr + 4, acc[1]);
+ acc_buffer_ptr += 8;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 4, 2>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ (void)input_ptr_increment;
+ // Load the filters, add filter_offset.
+ const uint8x8_t filter_u8 = vld1_u8(filter_ptr);
+ const int16x8_t filter_s16 = vreinterpretq_s16_u16(vmovl_u8(filter_u8));
+ const int16x8_t filter = vaddq_s16(filter_s16, vdupq_n_s16(filter_offset));
+
+ int outp = 0;
+ // Handle 2 output pixels at a time.
+ for (; outp <= num_output_pixels - 2; outp += 2)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Load the inputs, add input_offset.
+ const uint8x8_t input_u8 = vld1_u8(input_ptr);
+ input_ptr += 8;
+ const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8));
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+ // Duplicate the input values, 2-fold
+ const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++)
+ {
+ acc[2 * i + 0] =
+ vmlal_s16(acc[2 * i + 0], vget_low_s16(filter), vget_low_s16(input_dup2.val[i]));
+ acc[2 * i + 1] =
+ vmlal_s16(acc[2 * i + 1], vget_high_s16(filter), vget_high_s16(input_dup2.val[i]));
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle one output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Load the inputs, add input_offset.
+ uint8x8_t input_u8 = vdup_n_u8(0);
+ input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0);
+ input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1);
+ input_u8 = vset_lane_u8(input_ptr[2], input_u8, 2);
+ input_u8 = vset_lane_u8(input_ptr[3], input_u8, 3);
+ input_ptr += 4;
+ const int16x4_t input_s16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8)));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+ // Duplicate the input values, 2-fold
+ const int16x4x2_t input_dup2 = vzip_s16(input, input);
+ // Multiply-accumulate
+ acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), input_dup2.val[0]);
+ acc[1] = vmlal_s16(acc[1], vget_high_s16(filter), input_dup2.val[1]);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 2, 8>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ (void)input_ptr_increment;
+ // Load the filters, add filter_offset.
+ int16x8_t filter[2];
+ for (int i = 0; i < 2; i++)
+ {
+ const uint8x8_t filter_u8 = vld1_u8(filter_ptr + 8 * i);
+ const int16x8_t filter_s16 = vreinterpretq_s16_u16(vmovl_u8(filter_u8));
+ filter[i] = vaddq_s16(filter_s16, vdupq_n_s16(filter_offset));
+ }
+ int outp = 0;
+ // Handle two output pixels at a time.
+ for (; outp <= num_output_pixels - 2; outp += 2)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x4_t acc[8];
+ for (int i = 0; i < 8; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Load the inputs, add input_offset.
+ uint8x8_t input_u8 = vdup_n_u8(0);
+ input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0);
+ input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1);
+ input_u8 = vset_lane_u8(input_ptr[2], input_u8, 2);
+ input_u8 = vset_lane_u8(input_ptr[3], input_u8, 3);
+ input_ptr += 4;
+ const int16x4_t input_s16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8)));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+ // Multiply-accumulate.
+ acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), input, 0);
+ acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), input, 0);
+ acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), input, 1);
+ acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), input, 1);
+ acc[4] = vmlal_lane_s16(acc[4], vget_low_s16(filter[0]), input, 2);
+ acc[5] = vmlal_lane_s16(acc[5], vget_high_s16(filter[0]), input, 2);
+ acc[6] = vmlal_lane_s16(acc[6], vget_low_s16(filter[1]), input, 3);
+ acc[7] = vmlal_lane_s16(acc[7], vget_high_s16(filter[1]), input, 3);
+ // Store the accumulators back to acc_buffer.
+ for (int i = 0; i < 8; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 32;
+ }
+ // Handle one output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Load the inputs, add input_offset.
+ uint8x8_t input_u8 = vdup_n_u8(0);
+ input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0);
+ input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1);
+ input_ptr += 2;
+ const int16x4_t input_s16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8)));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+ // Multiply-accumulate.
+ acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), input, 0);
+ acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), input, 0);
+ acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), input, 1);
+ acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), input, 1);
+
+ // Store the accumulators back to acc_buffer.
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 2, 2>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ (void)input_ptr_increment;
+ // Load the filters, add filter_offset.
+ uint8x8_t filter_u8 = vdup_n_u8(0);
+ filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 0);
+ filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 1);
+ filter_u8 = vset_lane_u8(filter_ptr[2], filter_u8, 2);
+ filter_u8 = vset_lane_u8(filter_ptr[3], filter_u8, 3);
+ const int16x4_t filter_s16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(filter_u8)));
+ const int16x4_t filter = vadd_s16(filter_s16, vdup_n_s16(filter_offset));
+
+ int outp = 0;
+ // Handle 4 output pixels at a time.
+ for (; outp <= num_output_pixels - 4; outp += 4)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+
+ // Load the inputs, add input_offset.
+ const uint8x8_t input_u8 = vld1_u8(input_ptr);
+ input_ptr += 8;
+ const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8));
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+ // Duplicate the input values, 2-fold
+ const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+ // Multiply-accumulate
+ acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input_dup2.val[0]));
+ acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input_dup2.val[0]));
+ acc[2] = vmlal_s16(acc[2], filter, vget_low_s16(input_dup2.val[1]));
+ acc[3] = vmlal_s16(acc[3], filter, vget_high_s16(input_dup2.val[1]));
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle one output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc = vld1q_s32(acc_buffer_ptr);
+
+ uint8x8_t input_u8 = vdup_n_u8(0);
+ input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0);
+ input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1);
+ input_ptr += 2;
+ const int16x4_t input_s16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8)));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+ // Duplicate the input values, 2-fold
+ const int16x4_t input_dup2 = vzip_s16(input, input).val[0];
+ // Multiply-accumulate
+ acc = vmlal_s16(acc, filter, input_dup2);
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 2, 1>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ (void)input_ptr_increment;
+ // Load the filters, add filter_offset.
+ uint8x8_t filter_u8 = vdup_n_u8(0);
+ filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 0);
+ filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 1);
+ filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 2);
+ filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 3);
+ const int16x4_t filter_s16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(filter_u8)));
+ const int16x4_t filter = vadd_s16(filter_s16, vdup_n_s16(filter_offset));
+
+ int outp = 0;
+ // Handle 8 output pixels at a time.
+ for (; outp <= num_output_pixels - 8; outp += 8)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Load the inputs, add input_offset.
+ uint8x8_t input_u8[2];
+ for (int i = 0; i < 2; i++)
+ {
+ input_u8[i] = vld1_u8(input_ptr + 8 * i);
+ }
+ input_ptr += 16;
+ int16x8_t input[2];
+ for (int i = 0; i < 2; i++)
+ {
+ input[i] = vreinterpretq_s16_u16(vmovl_u8(input_u8[i]));
+ }
+ for (int i = 0; i < 2; i++)
+ {
+ input[i] = vaddq_s16(input[i], vdupq_n_s16(input_offset));
+ }
+
+ // Multiply-accumulate.
+ acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input[0]));
+ acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input[0]));
+ acc[2] = vmlal_s16(acc[2], filter, vget_low_s16(input[1]));
+ acc[3] = vmlal_s16(acc[3], filter, vget_high_s16(input[1]));
+ // Store the accumulators back to acc_buffer.
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle 4 output pixels at a time.
+ for (; outp <= num_output_pixels - 4; outp += 4)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x4_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Load the inputs, add input_offset.
+ const uint8x8_t input_u8 = vld1_u8(input_ptr);
+ input_ptr += 8;
+ const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8));
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+
+ // Multiply-accumulate.
+ acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input));
+ acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input));
+ // Store the accumulators back to acc_buffer.
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ }
+ // Handle 2 output pixels at a time.
+ for (; outp <= num_output_pixels - 2; outp += 2)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x4_t acc = vld1q_s32(acc_buffer_ptr);
+ // Load the inputs, add input_offset.
+ uint8x8_t input_u8 = vdup_n_u8(0);
+ input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0);
+ input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1);
+ input_u8 = vset_lane_u8(input_ptr[2], input_u8, 2);
+ input_u8 = vset_lane_u8(input_ptr[3], input_u8, 3);
+ input_ptr += 4;
+ const int16x4_t input_s16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8)));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+ // Multiply-accumulate.
+ acc = vmlal_s16(acc, filter, input);
+ // Store the accumulators back to acc_buffer.
+ vst1q_s32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ }
+ // Handle 1 output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x2_t acc = vld1_s32(acc_buffer_ptr);
+ // Load the inputs, add input_offset.
+ uint8x8_t input_u8 = vdup_n_u8(0);
+ input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0);
+ input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1);
+ input_ptr += 2;
+ const int16x4_t input_s16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8)));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+ // Multiply-accumulate.
+ acc = vget_low_s32(vmlal_s16(vcombine_s32(acc, acc), filter, input));
+ // Store the accumulators back to acc_buffer.
+ vst1_s32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 2;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 1, 2>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ (void)input_ptr_increment;
+ // Load the filters, add filter_offset.
+ uint8x8_t filter_u8 = vdup_n_u8(0);
+ filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 0);
+ filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 1);
+ filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 2);
+ filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 3);
+ const int16x4_t filter_s16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(filter_u8)));
+ const int16x4_t filter = vadd_s16(filter_s16, vdup_n_s16(filter_offset));
+
+ int outp = 0;
+ // Handle 8 output pixels at a time.
+ for (; outp <= num_output_pixels - 8; outp += 8)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+
+ // Load the inputs, add input_offset.
+ const uint8x8_t input_u8 = vld1_u8(input_ptr);
+ input_ptr += 8;
+ const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8));
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+ // Duplicate the input values, 2-fold
+ const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+ // Multiply-accumulate
+ acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input_dup2.val[0]));
+ acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input_dup2.val[0]));
+ acc[2] = vmlal_s16(acc[2], filter, vget_low_s16(input_dup2.val[1]));
+ acc[3] = vmlal_s16(acc[3], filter, vget_high_s16(input_dup2.val[1]));
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle one output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer
+ int32x2_t acc = vld1_s32(acc_buffer_ptr);
+
+ // Load the inputs, add input_offset.
+ const uint32_t input = *input_ptr++ + input_offset;
+
+ // Multiply-accumulate
+ acc = vget_low_s32(vmlal_n_s16(vcombine_s32(acc, acc), filter, input));
+ // Store the accumulators back to acc_buffer
+ vst1_s32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 2;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 1, 4>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ (void)input_ptr_increment;
+ // Load the filters, add filter_offset.
+ uint8x8_t filter_u8 = vdup_n_u8(0);
+ filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 0);
+ filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 1);
+ filter_u8 = vset_lane_u8(filter_ptr[2], filter_u8, 2);
+ filter_u8 = vset_lane_u8(filter_ptr[3], filter_u8, 3);
+ const int16x4_t filter_s16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(filter_u8)));
+ const int16x4_t filter = vadd_s16(filter_s16, vdup_n_s16(filter_offset));
+
+ int outp = 0;
+ // Handle 8 output pixels at a time.
+ for (; outp <= num_output_pixels - 8; outp += 8)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[8];
+ for (int i = 0; i < 8; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+
+ // Load the inputs, add input_offset.
+ uint8x8_t input_u8 = vld1_u8(input_ptr);
+ input_ptr += 8;
+ const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8));
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+
+ // Multiply-accumulate
+ acc[0] = vmlal_lane_s16(acc[0], filter, vget_low_s16(input), 0);
+ acc[1] = vmlal_lane_s16(acc[1], filter, vget_low_s16(input), 1);
+ acc[2] = vmlal_lane_s16(acc[2], filter, vget_low_s16(input), 2);
+ acc[3] = vmlal_lane_s16(acc[3], filter, vget_low_s16(input), 3);
+ acc[4] = vmlal_lane_s16(acc[4], filter, vget_high_s16(input), 0);
+ acc[5] = vmlal_lane_s16(acc[5], filter, vget_high_s16(input), 1);
+ acc[6] = vmlal_lane_s16(acc[6], filter, vget_high_s16(input), 2);
+ acc[7] = vmlal_lane_s16(acc[7], filter, vget_high_s16(input), 3);
+
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 8; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 32;
+ }
+ // Handle 4 output pixels at a time.
+ for (; outp <= num_output_pixels - 4; outp += 4)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+
+ // Load the inputs, add input_offset.
+ uint8x8_t input_u8 = vdup_n_u8(0);
+ input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0);
+ input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1);
+ input_u8 = vset_lane_u8(input_ptr[2], input_u8, 2);
+ input_u8 = vset_lane_u8(input_ptr[3], input_u8, 3);
+ input_ptr += 4;
+ const int16x4_t input_s16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8)));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+ // Multiply-accumulate
+ acc[0] = vmlal_lane_s16(acc[0], filter, input, 0);
+ acc[1] = vmlal_lane_s16(acc[1], filter, input, 1);
+ acc[2] = vmlal_lane_s16(acc[2], filter, input, 2);
+ acc[3] = vmlal_lane_s16(acc[3], filter, input, 3);
+
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle one output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc = vld1q_s32(acc_buffer_ptr);
+
+ // Load the inputs, add input_offset.
+ const uint32_t input = *input_ptr++ + input_offset;
+
+ // Multiply-accumulate
+ acc = vmlal_n_s16(acc, filter, input);
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 4, 1>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ (void)input_ptr_increment;
+ // Load the filters, add filter_offset.
+ uint8x8_t filter_u8 = vdup_n_u8(0);
+ filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 0);
+ filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 1);
+ filter_u8 = vset_lane_u8(filter_ptr[2], filter_u8, 2);
+ filter_u8 = vset_lane_u8(filter_ptr[3], filter_u8, 3);
+ const int16x4_t filter_s16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(filter_u8)));
+ const int16x4_t filter = vadd_s16(filter_s16, vdup_n_s16(filter_offset));
+
+ int outp = 0;
+ // Handle 4 output pixels at a time.
+ for (; outp <= num_output_pixels - 4; outp += 4)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Load the inputs, add input_offset.
+ int16x8_t input[2];
+ for (int i = 0; i < 2; i++)
+ {
+ const uint8x8_t input_u8 = vld1_u8(input_ptr + 8 * i);
+ const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8));
+ input[i] = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+ }
+ input_ptr += 16;
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++)
+ {
+ acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], filter, vget_low_s16(input[i]));
+ acc[2 * i + 1] = vmlal_s16(acc[2 * i + 1], filter, vget_high_s16(input[i]));
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle one output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc;
+ acc = vld1q_s32(acc_buffer_ptr);
+
+ // Load the inputs, add input_offset.
+ uint8x8_t input_u8 = vdup_n_u8(0);
+ input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0);
+ input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1);
+ input_u8 = vset_lane_u8(input_ptr[2], input_u8, 2);
+ input_u8 = vset_lane_u8(input_ptr[3], input_u8, 3);
+ input_ptr += 4;
+ const int16x4_t input_s16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8)));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+ // Multiply-accumulate
+ acc = vmlal_s16(acc, filter, input);
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 4, 4>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ (void)input_ptr_increment;
+ // Load the filters, add filter_offset.
+ int16x8_t filter[2];
+ for (int i = 0; i < 2; i++)
+ {
+ const uint8x8_t filter_u8 = vld1_u8(filter_ptr + 8 * i);
+ const int16x8_t filter_s16 = vreinterpretq_s16_u16(vmovl_u8(filter_u8));
+ filter[i] = vaddq_s16(filter_s16, vdupq_n_s16(filter_offset));
+ }
+
+ int outp = 0;
+ // Handle 2 output pixels at a time.
+ for (; outp <= num_output_pixels - 2; outp += 2)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[8];
+ for (int i = 0; i < 8; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+
+ // Load the inputs, add input_offset.
+ uint8x8_t input_u8 = vld1_u8(input_ptr);
+ input_ptr += 8;
+ const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8));
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+
+ // Multiply-accumulate
+ acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), vget_low_s16(input), 0);
+ acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), vget_low_s16(input), 1);
+ acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), vget_low_s16(input), 2);
+ acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), vget_low_s16(input), 3);
+ acc[4] = vmlal_lane_s16(acc[4], vget_low_s16(filter[0]), vget_high_s16(input), 0);
+ acc[5] = vmlal_lane_s16(acc[5], vget_high_s16(filter[0]), vget_high_s16(input), 1);
+ acc[6] = vmlal_lane_s16(acc[6], vget_low_s16(filter[1]), vget_high_s16(input), 2);
+ acc[7] = vmlal_lane_s16(acc[7], vget_high_s16(filter[1]), vget_high_s16(input), 3);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 8; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 32;
+ }
+ // Handle one output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+
+ // Load the inputs, add input_offset.
+ uint8x8_t input_u8 = vdup_n_u8(0);
+ input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0);
+ input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1);
+ input_u8 = vset_lane_u8(input_ptr[2], input_u8, 2);
+ input_u8 = vset_lane_u8(input_ptr[3], input_u8, 3);
+ input_ptr += 4;
+ const int16x4_t input_s16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8)));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+ // Multiply-accumulate
+ acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), input, 0);
+ acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), input, 1);
+ acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), input, 2);
+ acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), input, 3);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 0, 3>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ // We will have to duplicate bytes in a NEON register, 3-fold.
+ // We will do that by register-level table-look-up using VTBL instructions.
+ // Here we prepare the registers containing the table-lookup indices.
+ static const uint8_t dup3_indices_array[3][8] = {
+ {0, 0, 0, 1, 1, 1, 2, 2}, {2, 3, 3, 3, 4, 4, 4, 5}, {5, 5, 6, 6, 6, 7, 7, 7}};
+ uint8x8_t dup3_indices[3];
+ for (int i = 0; i < 3; i++)
+ {
+ dup3_indices[i] = vld1_u8(dup3_indices_array[i]);
+ }
+
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ const uint8_t *local_filter_ptr = filter_ptr;
+ const uint8_t *local_input_ptr = input_ptr;
+ int ic = 0;
+ // Handle 8 input channels at a time.
+ for (; ic <= input_depth - 8; ic += 8)
+ {
+ // Load the filters, add filter_offset.
+ int16x8_t filter[3];
+ uint8x8x3_t filter_u8;
+ filter_u8.val[0] = vld1_u8(local_filter_ptr);
+ filter_u8.val[1] = vld1_u8(local_filter_ptr + 8);
+ filter_u8.val[2] = vld1_u8(local_filter_ptr + 16);
+ local_filter_ptr += 24;
+ for (int i = 0; i < 3; i++)
+ {
+ const int16x8_t filter_s16 = vreinterpretq_s16_u16(vmovl_u8(filter_u8.val[i]));
+ filter[i] = vaddq_s16(filter_s16, vdupq_n_s16(filter_offset));
+ }
+ // Load the inputs, duplicate 3-fold, add input_offset.
+ const uint8x8_t input_u8 = vld1_u8(local_input_ptr);
+ local_input_ptr += 8;
+
+ uint8x8_t input_u8_dup3[3];
+ for (int i = 0; i < 3; i++)
+ {
+ input_u8_dup3[i] = vtbl1_u8(input_u8, dup3_indices[i]);
+ }
+ int16x8_t input_dup3[3];
+ for (int i = 0; i < 3; i++)
+ {
+ const int16x8_t input_s16_dup3 = vreinterpretq_s16_u16(vmovl_u8(input_u8_dup3[i]));
+ input_dup3[i] = vaddq_s16(input_s16_dup3, vdupq_n_s16(input_offset));
+ }
+ // Load the accumulators from acc_buffer
+ int32x4x3_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i].val[0] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ acc[i].val[1] = vld1q_s32(acc_buffer_ptr + 4 * i + 8);
+ acc[i].val[2] = vld1q_s32(acc_buffer_ptr + 4 * i + 16);
+ }
+ // Multiply-accumulate
+ for (int j = 0; j < 3; j++)
+ {
+ acc[0].val[j] =
+ vmlal_s16(acc[0].val[j], vget_low_s16(input_dup3[j]), vget_low_s16(filter[j]));
+ acc[1].val[j] =
+ vmlal_s16(acc[1].val[j], vget_high_s16(input_dup3[j]), vget_high_s16(filter[j]));
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i].val[0]);
+ vst1q_s32(acc_buffer_ptr + 4 * i + 8, acc[i].val[1]);
+ vst1q_s32(acc_buffer_ptr + 4 * i + 16, acc[i].val[2]);
+ }
+ acc_buffer_ptr += 24;
+ }
+ // Handle one input channel at a time.
+ for (; ic < input_depth; ic++)
+ {
+ const uint16_t input_val = *local_input_ptr++ + input_offset;
+ for (int i = 0; i < 3; i++)
+ {
+ const uint16_t filter_val = local_filter_ptr[i] + filter_offset;
+ *acc_buffer_ptr++ += static_cast<int32_t>(filter_val) * input_val;
+ }
+ local_filter_ptr += 3;
+ }
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 0, 2>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ const uint8_t *local_filter_ptr = filter_ptr;
+ const uint8_t *local_input_ptr = input_ptr;
+ int ic = 0;
+ // Handle 8 input channels at a time.
+ for (; ic <= input_depth - 8; ic += 8)
+ {
+ // Load the filters, add filter_offset.
+ int16x8_t filter[2];
+ uint8x8x2_t filter_u8;
+ filter_u8.val[0] = vld1_u8(local_filter_ptr);
+ filter_u8.val[1] = vld1_u8(local_filter_ptr + 8);
+ local_filter_ptr += 16;
+ for (int i = 0; i < 2; i++)
+ {
+ const int16x8_t filter_s16 = vreinterpretq_s16_u16(vmovl_u8(filter_u8.val[i]));
+ filter[i] = vaddq_s16(filter_s16, vdupq_n_s16(filter_offset));
+ }
+ // Load the inputs, add input_offset, duplicate 2-fold.
+ const uint8x8_t input_u8 = vld1_u8(local_input_ptr);
+ local_input_ptr += 8;
+ const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8));
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+ const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+ // Load the accumulators from acc_buffer.
+ int32x4x2_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i].val[0] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ acc[i].val[1] = vld1q_s32(acc_buffer_ptr + 4 * i + 8);
+ }
+ // Multiply-accumulate.
+ for (int j = 0; j < 2; j++)
+ {
+ acc[0].val[j] =
+ vmlal_s16(acc[0].val[j], vget_low_s16(filter[j]), vget_low_s16(input_dup2.val[j]));
+ acc[1].val[j] =
+ vmlal_s16(acc[1].val[j], vget_high_s16(filter[j]), vget_high_s16(input_dup2.val[j]));
+ }
+ // Store the accumulators back to acc_buffer.
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i].val[0]);
+ vst1q_s32(acc_buffer_ptr + 4 * i + 8, acc[i].val[1]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle one input channel at a time.
+ for (; ic < input_depth; ic++)
+ {
+ // Load the inputs.
+ const uint16_t input_val = *local_input_ptr++ + input_offset;
+ for (int i = 0; i < 2; i++)
+ {
+ const uint16_t filter_val = local_filter_ptr[i] + filter_offset;
+ *acc_buffer_ptr++ += static_cast<int32_t>(filter_val) * input_val;
+ }
+ local_filter_ptr += 2;
+ }
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 0, 1>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ const uint8_t *local_filter_ptr = filter_ptr;
+ const uint8_t *local_input_ptr = input_ptr;
+ int ic = 0;
+ // Handle 16 input channels at a time.
+ for (; ic <= input_depth - 16; ic += 16)
+ {
+ // Load the filters, add filter_offset.
+ uint8x8_t filter_u8_0 = vld1_u8(local_filter_ptr + 8 * 0);
+ uint8x8_t filter_u8_1 = vld1_u8(local_filter_ptr + 8 * 1);
+ local_filter_ptr += 16;
+ int16x8_t filter_0 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_0));
+ int16x8_t filter_1 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_1));
+ filter_0 = vaddq_s16(filter_0, vdupq_n_s16(filter_offset));
+ filter_1 = vaddq_s16(filter_1, vdupq_n_s16(filter_offset));
+ // Load the inputs, add input_offset.
+ uint8x8_t input_u8_0 = vld1_u8(local_input_ptr + 8 * 0);
+ uint8x8_t input_u8_1 = vld1_u8(local_input_ptr + 8 * 1);
+ local_input_ptr += 16;
+ int16x8_t input_0 = vreinterpretq_s16_u16(vmovl_u8(input_u8_0));
+ int16x8_t input_1 = vreinterpretq_s16_u16(vmovl_u8(input_u8_1));
+ input_0 = vaddq_s16(input_0, vdupq_n_s16(input_offset));
+ input_1 = vaddq_s16(input_1, vdupq_n_s16(input_offset));
+ // Load the accumulators from acc_buffer
+ int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
+ int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
+ int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2);
+ int32x4_t acc_3 = vld1q_s32(acc_buffer_ptr + 4 * 3);
+ acc_0 = vmlal_s16(acc_0, vget_low_s16(input_0), vget_low_s16(filter_0));
+ acc_1 = vmlal_s16(acc_1, vget_high_s16(input_0), vget_high_s16(filter_0));
+ acc_2 = vmlal_s16(acc_2, vget_low_s16(input_1), vget_low_s16(filter_1));
+ acc_3 = vmlal_s16(acc_3, vget_high_s16(input_1), vget_high_s16(filter_1));
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0);
+ vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1);
+ vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2);
+ vst1q_s32(acc_buffer_ptr + 4 * 3, acc_3);
+ acc_buffer_ptr += 16;
+ }
+ // Handle 8 input channels at a time.
+ for (; ic <= input_depth - 8; ic += 8)
+ {
+ // Load the filters, add filter_offset.
+ const uint8x8_t filter_u8 = vld1_u8(local_filter_ptr);
+ local_filter_ptr += 8;
+ const int16x8_t filter_s16 = vreinterpretq_s16_u16(vmovl_u8(filter_u8));
+ const int16x8_t filter = vaddq_s16(filter_s16, vdupq_n_s16(filter_offset));
+ // Load the inputs, add input_offset.
+ const uint8x8_t input_u8 = vld1_u8(local_input_ptr);
+ local_input_ptr += 8;
+ const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8));
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ acc[0] = vmlal_s16(acc[0], vget_low_s16(input), vget_low_s16(filter));
+ acc[1] = vmlal_s16(acc[1], vget_high_s16(input), vget_high_s16(filter));
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ }
+ // Handle one input channel at a time.
+ for (; ic < input_depth; ic++)
+ {
+ const uint16_t input_val = *local_input_ptr++ + input_offset;
+ const uint16_t filter_val = *local_filter_ptr++ + filter_offset;
+ *acc_buffer_ptr++ += static_cast<int32_t>(filter_val) * input_val;
+ }
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 16, 1>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ // Load the filters, add filter_offset.
+ uint8x8_t filter_u8[2];
+ for (int i = 0; i < 2; i++)
+ {
+ filter_u8[i] = vld1_u8(filter_ptr + 8 * i);
+ }
+ int16x8_t filter[2];
+ for (int i = 0; i < 2; i++)
+ {
+ filter[i] = vreinterpretq_s16_u16(vmovl_u8(filter_u8[i]));
+ }
+ for (int i = 0; i < 2; i++)
+ {
+ filter[i] = vaddq_s16(filter[i], vdupq_n_s16(filter_offset));
+ }
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ // Load the inputs, add input_offset.
+ uint8x8_t input_u8[2];
+ for (int i = 0; i < 2; i++)
+ {
+ input_u8[i] = vld1_u8(input_ptr + 8 * i);
+ }
+ input_ptr += input_ptr_increment;
+ int16x8_t input[2];
+ for (int i = 0; i < 2; i++)
+ {
+ input[i] = vreinterpretq_s16_u16(vmovl_u8(input_u8[i]));
+ }
+ for (int i = 0; i < 2; i++)
+ {
+ input[i] = vaddq_s16(input[i], vdupq_n_s16(input_offset));
+ }
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++)
+ {
+ acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], vget_low_s16(input[i]), vget_low_s16(filter[i]));
+ acc[2 * i + 1] =
+ vmlal_s16(acc[2 * i + 1], vget_high_s16(input[i]), vget_high_s16(filter[i]));
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 8, 1>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ // Load the filters, add filter_offset.
+ const uint8x8_t filter_u8 = vld1_u8(filter_ptr);
+ const int16x8_t filter_s16 = vreinterpretq_s16_u16(vmovl_u8(filter_u8));
+ const int16x8_t filter = vaddq_s16(filter_s16, vdupq_n_s16(filter_offset));
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ // Load the inputs, add input_offset.
+ const uint8x8_t input_u8 = vld1_u8(input_ptr);
+ const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8));
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ acc[0] = vmlal_s16(acc[0], vget_low_s16(input), vget_low_s16(filter));
+ acc[1] = vmlal_s16(acc[1], vget_high_s16(input), vget_high_s16(filter));
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 1, 16>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ // Load the filters, add filter_offset.
+ uint8x8_t filter_u8[2];
+ for (int i = 0; i < 2; i++)
+ {
+ filter_u8[i] = vld1_u8(filter_ptr + 8 * i);
+ }
+ int16x8_t filter[2];
+ for (int i = 0; i < 2; i++)
+ {
+ filter[i] = vreinterpretq_s16_u16(vmovl_u8(filter_u8[i]));
+ }
+ for (int i = 0; i < 2; i++)
+ {
+ filter[i] = vaddq_s16(filter[i], vdupq_n_s16(filter_offset));
+ }
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ uint8_t input_u8 = *input_ptr;
+ input_ptr += input_ptr_increment;
+ uint16_t input = static_cast<int16_t>(input_u8 + input_offset);
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++)
+ {
+ acc[2 * i + 0] = vmlal_n_s16(acc[2 * i + 0], vget_low_s16(filter[i]), input);
+ acc[2 * i + 1] = vmlal_n_s16(acc[2 * i + 1], vget_high_s16(filter[i]), input);
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 1, 32>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ // Load the filters, add filter_offset.
+ uint8x8_t filter_u8_0 = vld1_u8(filter_ptr + 8 * 0);
+ uint8x8_t filter_u8_1 = vld1_u8(filter_ptr + 8 * 1);
+ uint8x8_t filter_u8_2 = vld1_u8(filter_ptr + 8 * 2);
+ uint8x8_t filter_u8_3 = vld1_u8(filter_ptr + 8 * 3);
+ int16x8_t filter_0 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_0));
+ int16x8_t filter_1 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_1));
+ int16x8_t filter_2 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_2));
+ int16x8_t filter_3 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_3));
+ filter_0 = vaddq_s16(filter_0, vdupq_n_s16(filter_offset));
+ filter_1 = vaddq_s16(filter_1, vdupq_n_s16(filter_offset));
+ filter_2 = vaddq_s16(filter_2, vdupq_n_s16(filter_offset));
+ filter_3 = vaddq_s16(filter_3, vdupq_n_s16(filter_offset));
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ uint8_t input_u8 = *input_ptr;
+ input_ptr += input_ptr_increment;
+ uint16_t input = static_cast<int16_t>(input_u8 + input_offset);
+ // Load the accumulators from acc_buffer
+ int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
+ int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
+ int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2);
+ int32x4_t acc_3 = vld1q_s32(acc_buffer_ptr + 4 * 3);
+ int32x4_t acc_4 = vld1q_s32(acc_buffer_ptr + 4 * 4);
+ int32x4_t acc_5 = vld1q_s32(acc_buffer_ptr + 4 * 5);
+ int32x4_t acc_6 = vld1q_s32(acc_buffer_ptr + 4 * 6);
+ int32x4_t acc_7 = vld1q_s32(acc_buffer_ptr + 4 * 7);
+ // Multiply-accumulate
+ acc_0 = vmlal_n_s16(acc_0, vget_low_s16(filter_0), input);
+ acc_1 = vmlal_n_s16(acc_1, vget_high_s16(filter_0), input);
+ acc_2 = vmlal_n_s16(acc_2, vget_low_s16(filter_1), input);
+ acc_3 = vmlal_n_s16(acc_3, vget_high_s16(filter_1), input);
+ acc_4 = vmlal_n_s16(acc_4, vget_low_s16(filter_2), input);
+ acc_5 = vmlal_n_s16(acc_5, vget_high_s16(filter_2), input);
+ acc_6 = vmlal_n_s16(acc_6, vget_low_s16(filter_3), input);
+ acc_7 = vmlal_n_s16(acc_7, vget_high_s16(filter_3), input);
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0);
+ vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1);
+ vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2);
+ vst1q_s32(acc_buffer_ptr + 4 * 3, acc_3);
+ vst1q_s32(acc_buffer_ptr + 4 * 4, acc_4);
+ vst1q_s32(acc_buffer_ptr + 4 * 5, acc_5);
+ vst1q_s32(acc_buffer_ptr + 4 * 6, acc_6);
+ vst1q_s32(acc_buffer_ptr + 4 * 7, acc_7);
+ acc_buffer_ptr += 32;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 1, 20>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ // Load the filters, add filter_offset.
+ // NEON wants to load 8 bytes at a time, but 20 is not divisible by 8.
+ // We load the first 16 bytes into filter_u8_{0,1} as usual.
+ // Then we load the 8 last bytes into filter_u8_x (x for 'extra').
+ // This is redundant: the first 4 bytes of filter_u8_x are the same
+ // as the last 4 bytes of filter_u8_x.
+ uint8x8_t filter_u8_0 = vld1_u8(filter_ptr + 8 * 0);
+ uint8x8_t filter_u8_1 = vld1_u8(filter_ptr + 8 * 1);
+ uint8x8_t filter_u8_x = vld1_u8(filter_ptr + 8 * 1 + 4);
+ int16x8_t filter_0 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_0));
+ int16x8_t filter_1 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_1));
+ int16x8_t filter_x = vreinterpretq_s16_u16(vmovl_u8(filter_u8_x));
+ filter_0 = vaddq_s16(filter_0, vdupq_n_s16(filter_offset));
+ filter_1 = vaddq_s16(filter_1, vdupq_n_s16(filter_offset));
+ filter_x = vaddq_s16(filter_x, vdupq_n_s16(filter_offset));
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ uint8_t input_u8 = *input_ptr;
+ input_ptr += input_ptr_increment;
+ uint16_t input = static_cast<int16_t>(input_u8 + input_offset);
+ // Load the accumulators from acc_buffer
+ int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
+ int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
+ int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2);
+ int32x4_t acc_3 = vld1q_s32(acc_buffer_ptr + 4 * 3);
+ int32x4_t acc_4 = vld1q_s32(acc_buffer_ptr + 4 * 4);
+ // Multiply-accumulate
+ acc_0 = vmlal_n_s16(acc_0, vget_low_s16(filter_0), input);
+ acc_1 = vmlal_n_s16(acc_1, vget_high_s16(filter_0), input);
+ acc_2 = vmlal_n_s16(acc_2, vget_low_s16(filter_1), input);
+ acc_3 = vmlal_n_s16(acc_3, vget_high_s16(filter_1), input);
+ acc_4 = vmlal_n_s16(acc_4, vget_high_s16(filter_x), input);
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0);
+ vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1);
+ vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2);
+ vst1q_s32(acc_buffer_ptr + 4 * 3, acc_3);
+ vst1q_s32(acc_buffer_ptr + 4 * 4, acc_4);
+ acc_buffer_ptr += 20;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 1, 8>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ // Load the filters, add filter_offset.
+ const uint8x8_t filter_u8 = vld1_u8(filter_ptr);
+ const int16x8_t filter =
+ vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(filter_u8)), vdupq_n_s16(filter_offset));
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ uint8_t input_u8 = *input_ptr;
+ input_ptr += input_ptr_increment;
+ uint16_t input = static_cast<int16_t>(input_u8 + input_offset);
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ acc[0] = vmlal_n_s16(acc[0], vget_low_s16(filter), input);
+ acc[1] = vmlal_n_s16(acc[1], vget_high_s16(filter), input);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 2, 1>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ // Load the filters, add filter_offset.
+ uint8x8_t filter_u8 = vdup_n_u8(0);
+ filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 0);
+ filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 1);
+ filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 2);
+ filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 3);
+ const int16x4_t filter_s16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(filter_u8)));
+ const int16x4_t filter = vadd_s16(filter_s16, vdup_n_s16(filter_offset));
+
+ int outp = 0;
+
+ // Handle 2 output pixels at a time.
+ for (; outp <= num_output_pixels - 2; outp += 2)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x4_t acc = vld1q_s32(acc_buffer_ptr);
+ // Load the inputs, add input_offset.
+ uint16x4_t input_u16 = vdup_n_u16(0);
+ input_u16 = vset_lane_u16((reinterpret_cast<const uint16_t *>(input_ptr))[0], input_u16, 0);
+ input_ptr += input_ptr_increment;
+ input_u16 = vset_lane_u16((reinterpret_cast<const uint16_t *>(input_ptr))[0], input_u16, 1);
+ input_ptr += input_ptr_increment;
+ const int16x4_t input_s16 =
+ vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u16(input_u16))));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+ // Multiply-accumulate.
+ acc = vmlal_s16(acc, filter, input);
+ // Store the accumulators back to acc_buffer.
+ vst1q_s32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ }
+
+ // Handle 1 output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x2_t acc = vld1_s32(acc_buffer_ptr);
+ // Load the inputs, add input_offset.
+ uint8x8_t input_u8 = vdup_n_u8(0);
+ input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0);
+ input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1);
+ input_ptr += input_ptr_increment;
+ const int16x4_t input_s16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8)));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+ // Multiply-accumulate.
+ acc = vget_low_s32(vmlal_s16(vcombine_s32(acc, acc), filter, input));
+ // Store the accumulators back to acc_buffer.
+ vst1_s32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 2;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 4, 1>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ if (num_output_pixels <= 0)
+ {
+ return;
+ }
+
+ // Load the filters, add filter_offset.
+ uint8x8_t filter_u8 = vdup_n_u8(0);
+ filter_u8 = vset_lane_u8(filter_ptr[0], filter_u8, 0);
+ filter_u8 = vset_lane_u8(filter_ptr[1], filter_u8, 1);
+ filter_u8 = vset_lane_u8(filter_ptr[2], filter_u8, 2);
+ filter_u8 = vset_lane_u8(filter_ptr[3], filter_u8, 3);
+ const int16x4_t filter_s16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(filter_u8)));
+ const int16x4_t filter = vadd_s16(filter_s16, vdup_n_s16(filter_offset));
+
+ int outp = 0;
+
+ // Handle one output pixel at a time until second to the last pixel. Second
+ // to the last because we read eight input pixels while only processing
+ // four.
+ for (; outp < num_output_pixels - 1; outp++)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc;
+ acc = vld1q_s32(acc_buffer_ptr);
+
+ // Load the inputs, add input_offset.
+ uint8x8_t input_u8 = vld1_u8(input_ptr);
+ input_ptr += input_ptr_increment;
+ const int16x4_t input_s16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8)));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+ // Multiply-accumulate
+ acc = vmlal_s16(acc, filter, input);
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ }
+
+ // Handle the last output pixel.
+ // Load the accumulators from acc_buffer
+ int32x4_t acc;
+ acc = vld1q_s32(acc_buffer_ptr);
+
+ // Load the inputs, add input_offset.
+ uint8x8_t input_u8 = vdup_n_u8(0);
+ input_u8 = vset_lane_u8(input_ptr[0], input_u8, 0);
+ input_u8 = vset_lane_u8(input_ptr[1], input_u8, 1);
+ input_u8 = vset_lane_u8(input_ptr[2], input_u8, 2);
+ input_u8 = vset_lane_u8(input_ptr[3], input_u8, 3);
+ const int16x4_t input_s16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8)));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+ // Multiply-accumulate
+ acc = vmlal_s16(acc, filter, input);
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr, acc);
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 12, 1>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ // Load the filters, add filter_offset.
+ uint8x8_t filter_u8_0 = vld1_u8(filter_ptr);
+ uint8x8_t filter_u8_1 = vld1_u8(filter_ptr + 4);
+ int16x8_t filter_s16_0 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_0));
+ int16x8_t filter_s16_1 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_1));
+ filter_s16_0 = vaddq_s16(filter_s16_0, vdupq_n_s16(filter_offset));
+ filter_s16_1 = vaddq_s16(filter_s16_1, vdupq_n_s16(filter_offset));
+ int16x4_t filter_0 = vget_low_s16(filter_s16_0);
+ int16x4_t filter_1 = vget_high_s16(filter_s16_0);
+ int16x4_t filter_2 = vget_high_s16(filter_s16_1);
+
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ // Load the inputs, add input_offset.
+ uint8x8_t input_u8_0 = vld1_u8(input_ptr);
+ uint8x8_t input_u8_1 = vld1_u8(input_ptr + 4);
+ input_ptr += input_ptr_increment;
+ int16x8_t input_0 = vreinterpretq_s16_u16(vmovl_u8(input_u8_0));
+ int16x8_t input_1 = vreinterpretq_s16_u16(vmovl_u8(input_u8_1));
+ input_0 = vaddq_s16(input_0, vdupq_n_s16(input_offset));
+ input_1 = vaddq_s16(input_1, vdupq_n_s16(input_offset));
+
+ // Load the accumulators from acc_buffer
+ int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
+ int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
+ int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2);
+
+ // Multiply-accumulate
+ acc_0 = vmlal_s16(acc_0, vget_low_s16(input_0), filter_0);
+ acc_1 = vmlal_s16(acc_1, vget_high_s16(input_0), filter_1);
+ acc_2 = vmlal_s16(acc_2, vget_high_s16(input_1), filter_2);
+
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0);
+ vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1);
+ vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2);
+
+ acc_buffer_ptr += 12;
+ }
+ }
+};
+#endif
+
+// Accumulates the effect of one row of the filter, on a segment of one row
+// of the output, accessing the corresponding one row of the input.
+template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
+void QuantizedDepthwiseConvAccumRow(int stride, int dilation_factor, int input_depth,
+ int input_width, const uint8_t *input_data,
+ int16_t input_offset, int pad_width, int depth_multiplier,
+ int filter_width, const uint8_t *filter_data,
+ int16_t filter_offset, int out_x_buffer_start,
+ int out_x_buffer_end, int output_depth, int32_t *acc_buffer)
+{
+ // Sanity check parameters. This is important in particular to ensure
+ // that we keep the number of template instantiations minimal, so we don't
+ // increase binary size unnecessarily.
+ static_assert(kFixedDepthMultiplier || !kFixedInputDepth, "");
+ static_assert(kFixedInputDepth || kAllowStrided, "");
+ assert(stride == 1 || kAllowStrided);
+ if (kFixedInputDepth)
+ {
+ assert(input_depth == kFixedInputDepth);
+ }
+ if (kFixedDepthMultiplier)
+ {
+ assert(depth_multiplier == kFixedDepthMultiplier);
+ }
+ assert(output_depth == input_depth * depth_multiplier);
+ const int input_ptr_increment = stride * input_depth;
+ const uint8_t *filter_base_ptr = filter_data;
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ // For the current (filter_x, filter_y) point in the filter,
+ // compute the boundaries of the corresponding output row segment.
+ int out_x_loop_start_unclampled = 0;
+ int out_x_loop_end_unclampled = 0;
+ if (kAllowStrided)
+ {
+ if (stride == 2)
+ {
+ out_x_loop_start_unclampled = (pad_width - dilation_factor * filter_x + 1) / 2;
+ out_x_loop_end_unclampled = (pad_width + input_width - dilation_factor * filter_x + 1) / 2;
+ }
+ else if (stride == 4)
+ {
+ out_x_loop_start_unclampled = (pad_width - dilation_factor * filter_x + 3) / 4;
+ out_x_loop_end_unclampled = (pad_width + input_width - dilation_factor * filter_x + 3) / 4;
+ }
+ else
+ {
+ out_x_loop_start_unclampled =
+ (pad_width - dilation_factor * filter_x + stride - 1) / stride;
+ out_x_loop_end_unclampled =
+ (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride;
+ }
+ }
+ else
+ {
+ out_x_loop_start_unclampled = pad_width - dilation_factor * filter_x;
+ out_x_loop_end_unclampled = pad_width + input_width - dilation_factor * filter_x;
+ }
+ // The kernel will have to iterate on the segment of the
+ // output row that starts at out_x_loop_start and out_x_loop_end.
+ const int out_x_loop_start = std::max(out_x_buffer_start, out_x_loop_start_unclampled);
+ const int out_x_loop_end = std::min(out_x_buffer_end, out_x_loop_end_unclampled);
+
+ int32_t *acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
+ const int in_x_origin = (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x;
+ const uint8_t *input_ptr = input_data + in_x_origin * input_depth;
+ const int num_output_pixels = out_x_loop_end - out_x_loop_start;
+ QuantizedDepthwiseConvKernel<kAllowStrided, kFixedInputDepth, kFixedDepthMultiplier>::Run(
+ num_output_pixels, input_depth, depth_multiplier, input_ptr, input_offset,
+ input_ptr_increment, filter_base_ptr, filter_offset, acc_buffer_ptr);
+ filter_base_ptr += output_depth;
+ }
+}
+
+// generic fallback of DepthwiseConvAccumRow, portable, non-templatized.
+inline void QuantizedDepthwiseConvAccumRowGeneric(int stride, int dilation_factor, int input_depth,
+ int input_width, const uint8_t *input_data,
+ int16_t input_offset, int pad_width,
+ int depth_multiplier, int filter_width,
+ const uint8_t *filter_data, int16_t filter_offset,
+ int out_x_buffer_start, int out_x_buffer_end,
+ int output_depth, int32_t *acc_buffer)
+{
+ const uint8_t *filter_base_ptr = filter_data;
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int out_x_loop_start = std::max(
+ out_x_buffer_start, (pad_width - dilation_factor * filter_x + stride - 1) / stride);
+ const int out_x_loop_end =
+ std::min(out_x_buffer_end,
+ (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride);
+
+ int32_t *acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
+ const int in_x_origin = (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x;
+ const uint8_t *input_ptr = input_data + in_x_origin * input_depth;
+ const int input_ptr_increment = (stride - 1) * input_depth;
+ for (int out_x = out_x_loop_start; out_x < out_x_loop_end; out_x++)
+ {
+ const uint8_t *filter_ptr = filter_base_ptr;
+ for (int ic = 0; ic < input_depth; ++ic)
+ {
+ const int16_t input_val = *input_ptr++ + input_offset;
+ for (int m = 0; m < depth_multiplier; m++)
+ {
+ const int16_t filter_val = *filter_ptr++ + filter_offset;
+ *acc_buffer_ptr++ += static_cast<int32_t>(filter_val) * input_val;
+ }
+ }
+ input_ptr += input_ptr_increment;
+ }
+ filter_base_ptr += output_depth;
+ }
+}
+
+// Initializes the accumulator buffer with bias values.
+inline void DepthwiseConvInitAccBuffer(int num_output_pixels, int output_depth,
+ const int32_t *bias_data, int32_t *acc_buffer)
+{
+ int i = 0;
+#ifdef USE_NEON
+ if (output_depth == 1)
+ {
+ const int32x4_t b = vdupq_n_s32(bias_data[0]);
+ for (; i <= num_output_pixels - 16; i += 16)
+ {
+ vst1q_s32(acc_buffer + i + 0, b);
+ vst1q_s32(acc_buffer + i + 4, b);
+ vst1q_s32(acc_buffer + i + 8, b);
+ vst1q_s32(acc_buffer + i + 12, b);
+ }
+ for (; i <= num_output_pixels - 4; i += 4)
+ {
+ vst1q_s32(acc_buffer + i, b);
+ }
+ }
+ else if (output_depth == 2)
+ {
+ int32x4_t b = vdupq_n_s32(bias_data[0]);
+ b = vsetq_lane_s32(bias_data[1], b, 1);
+ b = vsetq_lane_s32(bias_data[1], b, 3);
+ for (; i <= num_output_pixels - 8; i += 8)
+ {
+ vst1q_s32(acc_buffer + 2 * i + 0, b);
+ vst1q_s32(acc_buffer + 2 * i + 4, b);
+ vst1q_s32(acc_buffer + 2 * i + 8, b);
+ vst1q_s32(acc_buffer + 2 * i + 12, b);
+ }
+ for (; i <= num_output_pixels - 2; i += 2)
+ {
+ vst1q_s32(acc_buffer + 2 * i, b);
+ }
+ }
+ else if (output_depth == 4)
+ {
+ const int32x4_t b = vld1q_s32(bias_data);
+ for (; i <= num_output_pixels - 4; i += 4)
+ {
+ vst1q_s32(acc_buffer + 4 * i + 0, b);
+ vst1q_s32(acc_buffer + 4 * i + 4, b);
+ vst1q_s32(acc_buffer + 4 * i + 8, b);
+ vst1q_s32(acc_buffer + 4 * i + 12, b);
+ }
+ for (; i < num_output_pixels; i++)
+ {
+ vst1q_s32(acc_buffer + 4 * i, b);
+ }
+ }
+ else if (output_depth == 8)
+ {
+ const int32x4_t b0 = vld1q_s32(bias_data);
+ const int32x4_t b1 = vld1q_s32(bias_data + 4);
+ for (; i <= num_output_pixels - 2; i += 2)
+ {
+ vst1q_s32(acc_buffer + 8 * i + 0, b0);
+ vst1q_s32(acc_buffer + 8 * i + 4, b1);
+ vst1q_s32(acc_buffer + 8 * i + 8, b0);
+ vst1q_s32(acc_buffer + 8 * i + 12, b1);
+ }
+ for (; i < num_output_pixels; i++)
+ {
+ vst1q_s32(acc_buffer + 8 * i + 0, b0);
+ vst1q_s32(acc_buffer + 8 * i + 4, b1);
+ }
+ }
+ else if (output_depth == 16)
+ {
+ const int32x4_t b0 = vld1q_s32(bias_data);
+ const int32x4_t b1 = vld1q_s32(bias_data + 4);
+ const int32x4_t b2 = vld1q_s32(bias_data + 8);
+ const int32x4_t b3 = vld1q_s32(bias_data + 12);
+ for (; i < num_output_pixels; i++)
+ {
+ vst1q_s32(acc_buffer + 16 * i + 0, b0);
+ vst1q_s32(acc_buffer + 16 * i + 4, b1);
+ vst1q_s32(acc_buffer + 16 * i + 8, b2);
+ vst1q_s32(acc_buffer + 16 * i + 12, b3);
+ }
+ }
+#endif
+ for (; i < num_output_pixels; i++)
+ {
+ memcpy(acc_buffer + i * output_depth, bias_data, sizeof(acc_buffer[0]) * output_depth);
+ }
+}
+
+inline void DepthwiseConvGeneral(const DepthwiseConvParams &params, const Shape &input_shape,
+ const uint8_t *input_data, const Shape &filter_shape,
+ const uint8_t *filter_data, const Shape &bias_shape,
+ const int32_t *bias_data, const Shape &output_shape,
+ uint8_t *output_data)
+{
+ (void)bias_shape;
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int pad_width = params.padding_values.width;
+ const int pad_height = params.padding_values.height;
+ const int depth_multiplier = params.depth_multiplier;
+ const int32_t output_activation_min = params.quantized_activation_min;
+ const int32_t output_activation_max = params.quantized_activation_max;
+ const int32_t input_offset = params.input_offset;
+ const int32_t filter_offset = params.weights_offset;
+ const int32_t output_offset = params.output_offset;
+ const int32_t output_multiplier = params.output_multiplier;
+ const int output_shift = params.output_shift;
+ const int dilation_width_factor = params.dilation_width_factor;
+ const int dilation_height_factor = params.dilation_height_factor;
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int input_depth = input_shape.Dims(3);
+ const int filter_height = filter_shape.Dims(1);
+ const int filter_width = filter_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
+#ifdef USE_NEON
+ const bool shift_left = (output_shift > 0);
+ const int32_t multiplier_power_of_two = shift_left ? (1 << output_shift) : 1;
+#endif
+
+ static const int kAccBufferMaxSize = 2048;
+ int32_t acc_buffer[kAccBufferMaxSize];
+ assert(kAccBufferMaxSize >= output_depth);
+ const int kOutputPixelsInAccBuffer = kAccBufferMaxSize / output_depth;
+ const int kAccBufferActualSize = kOutputPixelsInAccBuffer * output_depth;
+ assert(kOutputPixelsInAccBuffer * output_depth <= kAccBufferActualSize);
+ assert(kAccBufferActualSize <= kAccBufferMaxSize);
+ assert(kOutputPixelsInAccBuffer >= 1);
+ UNUSED_RELEASE(kAccBufferActualSize);
+
+ // row_accum_func will point to the core accumulation function to be used
+ // for this DepthwiseConv op.
+ using row_accum_func_t = decltype(&QuantizedDepthwiseConvAccumRowGeneric);
+ row_accum_func_t row_accum_func = nullptr;
+
+#define TFMINI_USE_DEPTHWISECONV_KERNEL(ALLOW_STRIDED, FIXED_INPUT_DEPTH, FIXED_DEPTH_MULTIPLIER) \
+ if (!row_accum_func && (stride_width == 1 || ALLOW_STRIDED) && \
+ (input_depth == FIXED_INPUT_DEPTH || FIXED_INPUT_DEPTH == 0) && \
+ depth_multiplier == FIXED_DEPTH_MULTIPLIER) \
+ { \
+ row_accum_func = \
+ QuantizedDepthwiseConvAccumRow<ALLOW_STRIDED, FIXED_INPUT_DEPTH, FIXED_DEPTH_MULTIPLIER>; \
+ }
+
+#ifdef USE_NEON
+ // We go over our list of kernels by decreasing order of preference
+ // for the cases where multiple kernels could apply.
+
+ // Start with the fastest kernels: AllowStrided=false, fixed input depth.
+
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 1, 2)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 2)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 4, 2)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 1, 4)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 4, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 4, 4)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 8, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 8)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 12, 1)
+
+ // Next come the strided kernels: AllowStrided=true, fixed input depth.
+ // They are a bit less efficient, but allow stride!=1.
+
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 8, 2)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 16, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 16)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 20)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 32)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 8)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 8, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 2, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 4, 1)
+
+ // Finally, the kernels allowing a variable input depth,
+ // these are the least efficient but most general kernels.
+
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 2)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 3)
+#endif // USE_NEON
+
+ // No matching fast kernel found, use slow fallback.
+ if (!row_accum_func)
+ {
+ row_accum_func = QuantizedDepthwiseConvAccumRowGeneric;
+ }
+
+#undef TFMINI_USE_DEPTHWISECONV_KERNEL
+
+ const int input_height_stride = input_shape.Dims(3) * input_shape.Dims(2);
+ const int input_batch_stride = input_height_stride * input_shape.Dims(1);
+ const int filter_height_stride = filter_shape.Dims(3) * filter_shape.Dims(2);
+
+ // Now that we have determined row_accum_func, we can start work.
+ uint8_t *output_ptr = output_data;
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int out_y = 0; out_y < output_height; ++out_y)
+ {
+ const int in_y_origin = (out_y * stride_height) - pad_height;
+ const int filter_y_start =
+ std::max(0, (-in_y_origin + dilation_height_factor - 1) / dilation_height_factor);
+ const int filter_y_end =
+ std::min(filter_height, (input_height - in_y_origin + dilation_height_factor - 1) /
+ dilation_height_factor);
+ for (int out_x_buffer_start = 0; out_x_buffer_start < output_width;
+ out_x_buffer_start += kOutputPixelsInAccBuffer)
+ {
+ const int out_x_buffer_end =
+ std::min(output_width, out_x_buffer_start + kOutputPixelsInAccBuffer);
+ // We call a 'pixel' a group of activation that share all but the
+ // 'depth'/'channel' coordinate. num_output_pixels is the number of
+ // output pixels that we will accumulate in this loop iteration.
+ const int num_output_pixels = out_x_buffer_end - out_x_buffer_start;
+ // Initialize our local accumulator with the bias values, so we don't
+ // have to add them later.
+ DepthwiseConvInitAccBuffer(num_output_pixels, output_depth, bias_data, acc_buffer);
+ // Accumulation loop. Most of the time should be spent in here.
+ for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+ {
+ const int in_y = in_y_origin + dilation_height_factor * filter_y;
+ row_accum_func(stride_width, dilation_width_factor, input_depth, input_width,
+ input_data + in_y * input_height_stride + b * input_batch_stride,
+ input_offset, pad_width, depth_multiplier, filter_width,
+ filter_data + filter_y * filter_height_stride, filter_offset,
+ out_x_buffer_start, out_x_buffer_end, output_depth, acc_buffer);
+ }
+ // Finished accumulating int32 values. Now need to convert them to
+ // the final 8bit form and store them.
+ const int num_output_values = output_depth * num_output_pixels;
+ int i = 0;
+#ifdef USE_NEON
+ using gemmlowp::RoundingDivideByPOT;
+ const int32x4_t output_offset_vec = vdupq_n_s32(output_offset);
+ const int32x4_t output_activation_min_vec = vdupq_n_s32(output_activation_min);
+ const int32x4_t output_activation_max_vec = vdupq_n_s32(output_activation_max);
+ // Handle 16 values at once.
+ // This allows us to issue 4 mutually independent int32
+ // multiplications (vqrdmulh), which should alleviate most of their
+ // high latency.
+ for (; i <= num_output_values - 16; i += 16)
+ {
+ int32x4_t acc[4];
+ for (int j = 0; j < 4; j++)
+ {
+ acc[j] = vld1q_s32(acc_buffer + i + 4 * j);
+ }
+
+ if (!shift_left)
+ {
+ // Fixed-point multiplication.
+ for (int j = 0; j < 4; j++)
+ {
+ acc[j] = vqrdmulhq_n_s32(acc[j], output_multiplier);
+ }
+ for (int j = 0; j < 4; j++)
+ {
+ acc[j] = RoundingDivideByPOT(acc[j], -output_shift);
+ }
+ }
+ else
+ {
+ // Fixed-point multiplication.
+ for (int j = 0; j < 4; j++)
+ {
+ acc[j] = vmulq_n_s32(acc[j], multiplier_power_of_two);
+ acc[j] = vqrdmulhq_n_s32(acc[j], output_multiplier);
+ }
+ }
+ // Add the output offset.
+ for (int j = 0; j < 4; j++)
+ {
+ acc[j] = vaddq_s32(acc[j], output_offset_vec);
+ }
+ // Apply the activation function.
+ for (int j = 0; j < 4; j++)
+ {
+ acc[j] = vmaxq_s32(acc[j], output_activation_min_vec);
+ }
+ for (int j = 0; j < 4; j++)
+ {
+ acc[j] = vminq_s32(acc[j], output_activation_max_vec);
+ }
+ // Saturating cast to uint8_t and store to destination.
+ int16x4_t acc_s16[4];
+ for (int j = 0; j < 4; j++)
+ {
+ acc_s16[j] = vqmovn_s32(acc[j]);
+ }
+ const int16x8_t res_s16_0 = vcombine_s16(acc_s16[0], acc_s16[1]);
+ const int16x8_t res_s16_1 = vcombine_s16(acc_s16[2], acc_s16[3]);
+ const uint8x8_t res_u8_0 = vqmovun_s16(res_s16_0);
+ const uint8x8_t res_u8_1 = vqmovun_s16(res_s16_1);
+ vst1q_u8(output_ptr, vcombine_u8(res_u8_0, res_u8_1));
+ output_ptr += 16;
+ }
+ // Handle 8 values at once.
+ // Not as good as 16 (now we're only issuing 2 mutually independent
+ // vqrdmulh instructions, so we're probably paying for their high
+ // latency).
+ for (; i <= num_output_values - 8; i += 8)
+ {
+ int32x4_t acc0 = vld1q_s32(acc_buffer + i);
+ int32x4_t acc1 = vld1q_s32(acc_buffer + i + 4);
+ if (!shift_left)
+ {
+ // Fixed-point multiplication.
+ acc0 = vqrdmulhq_n_s32(acc0, output_multiplier);
+ acc1 = vqrdmulhq_n_s32(acc1, output_multiplier);
+ // Rounding right shift.
+ acc0 = RoundingDivideByPOT(acc0, -output_shift);
+ acc1 = RoundingDivideByPOT(acc1, -output_shift);
+ }
+ else
+ {
+ // Fixed-point multiplication.
+ acc0 = vmulq_n_s32(acc0, multiplier_power_of_two);
+ acc0 = vqrdmulhq_n_s32(acc0, output_multiplier);
+
+ acc1 = vmulq_n_s32(acc1, multiplier_power_of_two);
+ acc1 = vqrdmulhq_n_s32(acc1, output_multiplier);
+ }
+ // Add the output offset.
+ acc0 = vaddq_s32(acc0, output_offset_vec);
+ acc1 = vaddq_s32(acc1, output_offset_vec);
+ // Apply the activation function.
+ acc0 = vmaxq_s32(acc0, output_activation_min_vec);
+ acc1 = vmaxq_s32(acc1, output_activation_min_vec);
+ acc0 = vminq_s32(acc0, output_activation_max_vec);
+ acc1 = vminq_s32(acc1, output_activation_max_vec);
+ // Saturating cast to uint8_t and store to destination.
+ const int16x4_t acc0_s16 = vqmovn_s32(acc0);
+ const int16x4_t acc1_s16 = vqmovn_s32(acc1);
+ const int16x8_t res_s16 = vcombine_s16(acc0_s16, acc1_s16);
+ const uint8x8_t res_u8 = vqmovun_s16(res_s16);
+ vst1_u8(output_ptr, res_u8);
+ output_ptr += 8;
+ }
+ // Handle 4 values at once. Now we're paying the full price of the
+ // high latency of vqrdmulh. Also, storing only 4 bytes at the end
+ // (without any alignment) can only be done 1 byte at a time.
+ // Yet, that is still worth doing to minimize the amount of leftover
+ // that will have to go through the very slow scalar code.
+ for (; i <= num_output_values - 4; i += 4)
+ {
+ int32x4_t acc = vld1q_s32(acc_buffer + i);
+ if (!shift_left)
+ {
+ // Fixed-point multiplication.
+ acc = vqrdmulhq_n_s32(acc, output_multiplier);
+ // Rounding right shift.
+ acc = RoundingDivideByPOT(acc, -output_shift);
+ }
+ else
+ {
+ // Fixed-point multiplication.
+ acc = vmulq_n_s32(acc, multiplier_power_of_two);
+ acc = vqrdmulhq_n_s32(acc, output_multiplier);
+ }
+ // Add the output offset.
+ acc = vaddq_s32(acc, output_offset_vec);
+ // Apply the activation function.
+ acc = vmaxq_s32(acc, output_activation_min_vec);
+ acc = vminq_s32(acc, output_activation_max_vec);
+ // Saturating cast to uint8_t and store to destination.
+ const int16x4_t acc_s16 = vqmovn_s32(acc);
+ const int16x8_t res_s16 = vcombine_s16(acc_s16, acc_s16);
+ const uint8x8_t res_u8 = vqmovun_s16(res_s16);
+ vst1_lane_u8(output_ptr + 0, res_u8, 0);
+ vst1_lane_u8(output_ptr + 1, res_u8, 1);
+ vst1_lane_u8(output_ptr + 2, res_u8, 2);
+ vst1_lane_u8(output_ptr + 3, res_u8, 3);
+ output_ptr += 4;
+ }
+#endif // USE_NEON
+
+ // Handle leftover values, one by one. This is very slow.
+ for (; i < num_output_values; i++)
+ {
+ int32_t acc = acc_buffer[i];
+ acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+ acc += output_offset;
+ acc = std::max(acc, output_activation_min);
+ acc = std::min(acc, output_activation_max);
+ *output_ptr++ = static_cast<uint8_t>(acc);
+ }
+ }
+ }
+ }
+}
+
+} // namespace optimized
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_OPTIMIZED_DEPTHWISE_CONV_UINT8_H__
diff --git a/compute/cker/include/cker/operation/optimized/MaxPool.h b/compute/cker/include/cker/operation/optimized/MaxPool.h
deleted file mode 100644
index 07a14aee4..000000000
--- a/compute/cker/include/cker/operation/optimized/MaxPool.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_CKER_OPTIMIZED_MAX_POOL_H__
-#define __NNFW_CKER_OPTIMIZED_MAX_POOL_H__
-
-#if defined(CKER_OPTIMIZED_EIGEN)
-#include "cker/eigen/Utils.h"
-#include "cker/Shape.h"
-#include "cker/Types.h"
-#include "cker/Utils.h"
-#include <Eigen/Core>
-
-namespace nnfw
-{
-namespace cker
-{
-namespace optimized
-{
-
-// TODO Change to apply neon for this function if it is faster
-inline void MaxPool(const PoolParams &params, const Shape &input_shape, const float *input_data,
- const Shape &output_shape, float *output_data)
-{
- assert(input_shape.DimensionsCount() == 4);
- assert(output_shape.DimensionsCount() == 4);
- const int batches = MatchingDim(input_shape, 0, output_shape, 0);
- const int input_height = input_shape.Dims(1);
- const int input_width = input_shape.Dims(2);
- const int output_height = output_shape.Dims(1);
- const int output_width = output_shape.Dims(2);
- const int stride_height = params.stride_height;
- const int stride_width = params.stride_width;
-
- const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape);
- auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
- // Prefill the output to minimum representable float value
- out_mat.setConstant(std::numeric_limits<float>::lowest());
- for (int b = 0; b < batches; ++b)
- {
- for (int h = 0; h < input_height; ++h)
- {
- for (int w = 0; w < input_width; ++w)
- {
- // (h_start, h_end) * (w_start, w_end) is the range that the input
- // vector projects to.
- int hpad = h + params.padding_values.height;
- int wpad = w + params.padding_values.width;
- int h_start =
- (hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1;
- int h_end = std::min(hpad / stride_height + 1, output_height);
- int w_start =
- (wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1;
- int w_end = std::min(wpad / stride_width + 1, output_width);
- // compute elementwise sum
- for (int ph = h_start; ph < h_end; ++ph)
- {
- for (int pw = w_start; pw < w_end; ++pw)
- {
- int out_offset = NodeOffset(b, ph, pw, output_height, output_width);
- out_mat.col(out_offset) =
- out_mat.col(out_offset)
- .cwiseMax(in_mat.col(NodeOffset(b, h, w, input_height, input_width)));
- }
- }
- }
- }
- }
- const int flat_size = output_shape.FlatSize();
- for (int i = 0; i < flat_size; ++i)
- {
- output_data[i] = ActivationFunctionWithMinMax(output_data[i], params.float_activation_min,
- params.float_activation_max);
- }
-}
-
-} // namespace optimized
-} // namespace cker
-} // namespace nnfw
-
-#endif // defined(CKER_OPTIMIZED_EIGEN)
-
-#endif // __NNFW_CKER_OPTIMIZED_MAX_POOL_H__
diff --git a/compute/cker/include/cker/operation/optimized/OptimizedUtils.h b/compute/cker/include/cker/operation/optimized/OptimizedUtils.h
new file mode 100644
index 000000000..3f4ff8afb
--- /dev/null
+++ b/compute/cker/include/cker/operation/optimized/OptimizedUtils.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_OPTIMIZED_OPTIMIZED_UTILS_H__
+#define __NNFW_CKER_OPTIMIZED_OPTIMIZED_UTILS_H__
+
+#include "cker/Types.h"
+#include "cker/Shape.h"
+
+#include <stdexcept>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace optimized
+{
+
+template <typename T>
+inline void ExtractPatchIntoBufferColumn(const Shape &input_shape, int w, int h, int b, int kheight,
+ int kwidth, int stride_width, int stride_height,
+ int pad_width, int pad_height, int in_width, int in_height,
+ int in_depth, int single_buffer_length, int buffer_id,
+ const T *in_data, T *conv_buffer_data, uint8_t zero_byte)
+{
+ assert(input_shape.DimensionsCount() == 4);
+ // This chunk of code reshapes all the inputs corresponding to
+ // output (b, h, w) to a column vector in conv_buffer(:, buffer_id).
+ const int kwidth_times_indepth = kwidth * in_depth;
+ const int inwidth_times_indepth = in_width * in_depth;
+ const int ih_ungated_start = h * stride_height - pad_height;
+ const int ih_ungated_end = (ih_ungated_start + kheight);
+ const int ih_end = std::min(ih_ungated_end, in_height);
+ const int iw_ungated_start = w * stride_width - pad_width;
+ const int iw_ungated_end = (iw_ungated_start + kwidth);
+ const int iw_end = std::min(iw_ungated_end, in_width);
+ // If the patch is off the edge of the input image, skip writing those rows
+ // and columns from the patch into the output array.
+ const int h_offset = std::max(0, -ih_ungated_start);
+ const int w_offset = std::max(0, -iw_ungated_start);
+ const int ih_start = std::max(0, ih_ungated_start);
+ const int iw_start = std::max(0, iw_ungated_start);
+ const int single_row_num = std::min(kwidth - w_offset, in_width - iw_start) * in_depth;
+ const int output_row_offset = (buffer_id * single_buffer_length);
+ int out_offset = output_row_offset + (h_offset * kwidth + w_offset) * in_depth;
+ int in_offset = Offset(input_shape, b, ih_start, iw_start, 0);
+
+ // Express all of the calculations as padding around the input patch.
+ const int top_padding = h_offset;
+ const int bottom_padding = (ih_ungated_end - ih_end);
+ const int left_padding = w_offset;
+ const int right_padding = (iw_ungated_end - iw_end);
+ assert(single_row_num == ((kwidth - (left_padding + right_padding)) * in_depth));
+
+ // Write out zeroes to the elements representing the top rows of the input
+ // patch that are off the edge of the input image.
+ if (top_padding > 0)
+ {
+ const int top_row_elements = (top_padding * kwidth * in_depth);
+ memset(conv_buffer_data + output_row_offset, zero_byte, (top_row_elements * sizeof(T)));
+ }
+
+ // If the patch is on the interior of the input image horizontally, just copy
+ // over the rows sequentially, otherwise add zero padding at the start or end.
+ if ((left_padding == 0) && (right_padding == 0))
+ {
+ for (int ih = ih_start; ih < ih_end; ++ih)
+ {
+ memcpy(conv_buffer_data + out_offset, in_data + in_offset, single_row_num * sizeof(T));
+ out_offset += kwidth_times_indepth;
+ in_offset += inwidth_times_indepth;
+ }
+ }
+ else
+ {
+ for (int ih = ih_start; ih < ih_end; ++ih)
+ {
+ if (left_padding > 0)
+ {
+ const int left_start = (out_offset - (left_padding * in_depth));
+ memset(conv_buffer_data + left_start, zero_byte, (left_padding * in_depth * sizeof(T)));
+ }
+ memcpy(conv_buffer_data + out_offset, in_data + in_offset, single_row_num * sizeof(T));
+ if (right_padding > 0)
+ {
+ const int right_start = (out_offset + single_row_num);
+ memset(conv_buffer_data + right_start, zero_byte, (right_padding * in_depth * sizeof(T)));
+ }
+ out_offset += kwidth_times_indepth;
+ in_offset += inwidth_times_indepth;
+ }
+ }
+
+ // If the bottom of the patch falls off the input image, pad the values
+ // representing those input rows with zeroes.
+ if (bottom_padding > 0)
+ {
+ const int bottom_row_elements = (bottom_padding * kwidth * in_depth);
+ const int bottom_start =
+ output_row_offset + ((top_padding + (ih_end - ih_start)) * kwidth * in_depth);
+ memset(conv_buffer_data + bottom_start, zero_byte, (bottom_row_elements * sizeof(T)));
+ }
+}
+
+template <typename T>
+void DilatedIm2col(const ConvParams &params, uint8_t zero_byte, const Shape &input_shape,
+ const T *input_data, const Shape &filter_shape, const Shape &output_shape,
+ T *im2col_data)
+{
+ (void)params;
+ (void)zero_byte;
+ (void)input_shape;
+ (void)input_data;
+ (void)filter_shape;
+ (void)output_shape;
+ (void)im2col_data;
+ throw std::runtime_error{"NYI: cker DilatedIm2col"};
+}
+
+template <typename T>
+void Im2col(const ConvParams &params, int kheight, int kwidth, uint8_t zero_byte,
+ const Shape &input_shape, const T *input_data, const Shape &output_shape,
+ T *output_data)
+{
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int pad_width = params.padding_values.width;
+ const int pad_height = params.padding_values.height;
+ assert(input_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int input_depth = input_shape.Dims(3);
+ const int input_width = input_shape.Dims(2);
+ const int input_height = input_shape.Dims(1);
+ const int output_depth = output_shape.Dims(3);
+ const int output_width = output_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+
+ int buffer_id = 0;
+ // Loop over the output nodes.
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int h = 0; h < output_height; ++h)
+ {
+ for (int w = 0; w < output_width; ++w)
+ {
+ ExtractPatchIntoBufferColumn(input_shape, w, h, b, kheight, kwidth, stride_width,
+ stride_height, pad_width, pad_height, input_width,
+ input_height, input_depth, output_depth, buffer_id, input_data,
+ output_data, zero_byte);
+ ++buffer_id;
+ }
+ }
+ }
+}
+
+} // namespace optimized
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_OPTIMIZED_OPTIMIZED_UTILS_H__
diff --git a/compute/cker/include/cker/operation/optimized/SoftMax.h b/compute/cker/include/cker/operation/optimized/SoftMax.h
deleted file mode 100644
index e44f251d0..000000000
--- a/compute/cker/include/cker/operation/optimized/SoftMax.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_CKER_OPTIMIZED_SOFTMAX_H__
-#define __NNFW_CKER_OPTIMIZED_SOFTMAX_H__
-
-#if defined(CKER_OPTIMIZED_EIGEN)
-
-#include "cker/eigen/Utils.h"
-#include "cker/Shape.h"
-#include "cker/Types.h"
-#include <Eigen/Core>
-
-namespace nnfw
-{
-namespace cker
-{
-namespace optimized
-{
-
-inline void Softmax(const SoftmaxParams &params, const Shape &input_shape, const float *input_data,
- const Shape &output_shape, float *output_data)
-{
- // Validate whether if shapes of input and output are the same
- MatchingFlatSize(input_shape, output_shape);
-
- const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape);
- auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
- // Compute the exponential first, removing the max coefficient for numerical
- // stability.
- out_mat = (in_mat.rowwise() - in_mat.colwise().maxCoeff()).array() * params.beta;
- // We are separating out the exp function so that exp can be vectorized.
- out_mat = out_mat.array().exp();
- // Normalize to get the activations.
- Eigen::Array<float, 1, Eigen::Dynamic> scale = out_mat.array().colwise().sum().inverse();
- out_mat.array().rowwise() *= scale;
-}
-
-} // namespace optimized
-} // namespace cker
-} // namespace nnfw
-
-#endif // defined(CKER_OPTIMIZED_EIGEN)
-
-#endif // __NNFW_CKER_OPTIMIZED_SOFTMAX_H__
diff --git a/compute/cker/include/cker/operation/reference/AveragePool.h b/compute/cker/include/cker/operation/reference/AveragePool.h
deleted file mode 100644
index 3ddab4b24..000000000
--- a/compute/cker/include/cker/operation/reference/AveragePool.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_CKER_REFERENCE_AVERAGE_POOL_H__
-#define __NNFW_CKER_REFERENCE_AVERAGE_POOL_H__
-
-#include "cker/Shape.h"
-#include "cker/Types.h"
-#include "cker/Utils.h"
-
-namespace nnfw
-{
-namespace cker
-{
-namespace reference
-{
-
-inline void AveragePool(const PoolParams &params, const Shape &input_shape, const float *input_data,
- const Shape &output_shape, float *output_data)
-{
- assert(input_shape.DimensionsCount() == 4);
- assert(output_shape.DimensionsCount() == 4);
- const int batches = MatchingDim(input_shape, 0, output_shape, 0);
- const int depth = MatchingDim(input_shape, 3, output_shape, 3);
- const int input_height = input_shape.Dims(1);
- const int input_width = input_shape.Dims(2);
- const int output_height = output_shape.Dims(1);
- const int output_width = output_shape.Dims(2);
- const int stride_height = params.stride_height;
- const int stride_width = params.stride_width;
- for (int batch = 0; batch < batches; ++batch)
- {
- for (int out_y = 0; out_y < output_height; ++out_y)
- {
- for (int out_x = 0; out_x < output_width; ++out_x)
- {
- const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
- const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
- // Compute the boundaries of the filter region clamped so as to
- // ensure that the filter window fits in the input array.
- const int filter_x_start = std::max(0, -in_x_origin);
- const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
- const int filter_y_start = std::max(0, -in_y_origin);
- const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
- int filter_count = (filter_y_end - filter_y_start) * (filter_x_end - filter_x_start);
- if (filter_count <= 0)
- {
- continue;
- }
- for (int channel = 0; channel < depth; ++channel)
- {
- float total = 0.f;
- for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
- {
- for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
- {
- const int in_x = in_x_origin + filter_x;
- const int in_y = in_y_origin + filter_y;
- total += input_data[Offset(input_shape, batch, in_y, in_x, channel)];
- }
- }
- const float average = total / (float)filter_count;
- output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
- ActivationFunctionWithMinMax(average, params.float_activation_min,
- params.float_activation_max);
- }
- }
- }
- }
-}
-
-} // namespace reference
-} // namespace cker
-} // namespace nnfw
-
-#endif // __NNFW_CKER_REFERENCE_AVERAGE_POOL_H__
diff --git a/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h b/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h
new file mode 100644
index 000000000..438a67124
--- /dev/null
+++ b/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_REFERENCE_BINARYARITHMETICOPS_H__
+#define __NNFW_CKER_REFERENCE_BINARYARITHMETICOPS_H__
+
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+
+#include <cmath>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace reference
+{
+
+template <typename T>
+inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+ const T *input1_data, const Shape &input2_shape,
+ const T *input2_data, const Shape &output_shape, T *output_data,
+ const std::function<T(const T &, const T &)> &fn)
+{
+ const int32_t flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ output_data[i] = ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
+ params.quantized_activation_min,
+ params.quantized_activation_max);
+ }
+}
+
+template <>
+inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+ const float *input1_data, const Shape &input2_shape,
+ const float *input2_data, const Shape &output_shape,
+ float *output_data,
+ const std::function<float(const float &, const float &)> &fn)
+{
+ const int size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+ for (int i = 0; i < size; i++)
+ {
+ output_data[i] =
+ ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
+ params.float_activation_min, params.float_activation_max);
+ }
+}
+
+} // namespace reference
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_REFERENCE_BINARYARITHMETICOPS_H__
diff --git a/compute/cker/include/cker/operation/reference/Conv.h b/compute/cker/include/cker/operation/reference/Conv.h
new file mode 100644
index 000000000..86e8b5143
--- /dev/null
+++ b/compute/cker/include/cker/operation/reference/Conv.h
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_REFERENCE_CONV_H__
+#define __NNFW_CKER_REFERENCE_CONV_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+
+#include <cmath>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace reference
+{
+
+inline void Conv(const ConvParams &params, const Shape &input_shape, const float *input_data,
+ const Shape &filter_shape, const float *filter_data, const Shape &bias_shape,
+ const float *bias_data, const Shape &output_shape, float *output_data)
+{
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int dilation_width_factor = params.dilation_width_factor;
+ const int dilation_height_factor = params.dilation_height_factor;
+ const int pad_width = params.padding_values.width;
+ const int pad_height = params.padding_values.height;
+ const float output_activation_min = params.float_activation_min;
+ const float output_activation_max = params.float_activation_max;
+ assert(input_shape.DimensionsCount() == 4);
+ assert(filter_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+ UNUSED_RELEASE(bias_shape);
+
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+ const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
+ if (bias_data)
+ {
+ assert(bias_shape.FlatSize() == output_depth);
+ }
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int filter_height = filter_shape.Dims(1);
+ const int filter_width = filter_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
+ for (int batch = 0; batch < batches; ++batch)
+ {
+ for (int out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int out_channel = 0; out_channel < output_depth; ++out_channel)
+ {
+ const int in_x_origin = (out_x * stride_width) - pad_width;
+ const int in_y_origin = (out_y * stride_height) - pad_height;
+ float total = 0.f;
+ for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int in_x = in_x_origin + dilation_width_factor * filter_x;
+ const int in_y = in_y_origin + dilation_height_factor * filter_y;
+ // If the location is outside the bounds of the input image,
+ // use zero as a default value.
+ if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
+ {
+ const int in_offset = Offset(input_shape, batch, in_y, in_x, 0);
+ const int filter_offset = Offset(filter_shape, out_channel, filter_y, filter_x, 0);
+ for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+ {
+ float input_value = input_data[in_offset + in_channel];
+ float filter_value = filter_data[filter_offset + in_channel];
+ total += (input_value * filter_value);
+ }
+ }
+ }
+ }
+ float bias_value = 0.0f;
+ if (bias_data)
+ {
+ bias_value = bias_data[out_channel];
+ }
+ output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
+ ActivationFunctionWithMinMax(total + bias_value, output_activation_min,
+ output_activation_max);
+ }
+ }
+ }
+ }
+}
+
+inline void Conv(const ConvParams &params, const Shape &input_shape, const uint8_t *input_data,
+ const Shape &filter_shape, const uint8_t *filter_data, const Shape &bias_shape,
+ const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
+{
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int dilation_width_factor = params.dilation_width_factor;
+ const int dilation_height_factor = params.dilation_height_factor;
+ const int pad_width = params.padding_values.width;
+ const int pad_height = params.padding_values.height;
+ const int32_t input_offset = params.input_offset;
+ const int32_t filter_offset = params.weights_offset;
+ const int32_t output_offset = params.output_offset;
+ const int32_t output_multiplier = params.output_multiplier;
+ const int output_shift = params.output_shift;
+ const int32_t output_activation_min = params.quantized_activation_min;
+ const int32_t output_activation_max = params.quantized_activation_max;
+ assert(output_activation_min <= output_activation_max);
+
+ assert(input_shape.DimensionsCount() == 4);
+ assert(filter_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+ UNUSED_RELEASE(bias_shape);
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+ const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
+ if (bias_data)
+ {
+ assert(bias_shape.FlatSize() == output_depth);
+ }
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int filter_height = filter_shape.Dims(1);
+ const int filter_width = filter_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
+ for (int batch = 0; batch < batches; ++batch)
+ {
+ for (int out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int out_channel = 0; out_channel < output_depth; ++out_channel)
+ {
+ const int in_x_origin = (out_x * stride_width) - pad_width;
+ const int in_y_origin = (out_y * stride_height) - pad_height;
+ int32_t acc = 0;
+ for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int in_x = in_x_origin + dilation_width_factor * filter_x;
+ const int in_y = in_y_origin + dilation_height_factor * filter_y;
+ // If the location is outside the bounds of the input image,
+ // use zero as a default value.
+ if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
+ {
+ const int in_base = Offset(input_shape, batch, in_y, in_x, 0);
+ const int filter_base = Offset(filter_shape, out_channel, filter_y, filter_x, 0);
+ for (int in_channel = 0; in_channel < input_depth; in_channel++)
+ {
+ int32_t input_val = input_data[in_channel + in_base];
+ int32_t filter_val = filter_data[in_channel + filter_base];
+ acc += (filter_val + filter_offset) * (input_val + input_offset);
+ }
+ }
+ }
+ }
+ if (bias_data)
+ {
+ acc += bias_data[out_channel];
+ }
+ acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+ acc += output_offset;
+ acc = std::max(acc, output_activation_min);
+ acc = std::min(acc, output_activation_max);
+ output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
+ static_cast<uint8_t>(acc);
+ }
+ }
+ }
+ }
+}
+
+} // namespace reference
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_REFERENCE_CONV_H__
diff --git a/compute/cker/include/cker/operation/reference/MaxPool.h b/compute/cker/include/cker/operation/reference/MaxPool.h
deleted file mode 100644
index a0f0263c7..000000000
--- a/compute/cker/include/cker/operation/reference/MaxPool.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_CKER_REFERENCE_MAX_POOL_H__
-#define __NNFW_CKER_REFERENCE_MAX_POOL_H__
-
-#include "cker/Shape.h"
-#include "cker/Types.h"
-#include "cker/Utils.h"
-
-namespace nnfw
-{
-namespace cker
-{
-namespace reference
-{
-
-inline void MaxPool(const PoolParams &params, const Shape &input_shape, const float *input_data,
- const Shape &output_shape, float *output_data)
-{
- assert(input_shape.DimensionsCount() == 4);
- assert(output_shape.DimensionsCount() == 4);
- const int batches = MatchingDim(input_shape, 0, output_shape, 0);
- const int depth = MatchingDim(input_shape, 3, output_shape, 3);
- const int input_height = input_shape.Dims(1);
- const int input_width = input_shape.Dims(2);
- const int output_height = output_shape.Dims(1);
- const int output_width = output_shape.Dims(2);
- const int stride_height = params.stride_height;
- const int stride_width = params.stride_width;
- for (int batch = 0; batch < batches; ++batch)
- {
- for (int out_y = 0; out_y < output_height; ++out_y)
- {
- for (int out_x = 0; out_x < output_width; ++out_x)
- {
- for (int channel = 0; channel < depth; ++channel)
- {
- const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
- const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
- // Compute the boundaries of the filter region clamped so as to
- // ensure that the filter window fits in the input array.
- const int filter_x_start = std::max(0, -in_x_origin);
- const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
- const int filter_y_start = std::max(0, -in_y_origin);
- const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
- float max = std::numeric_limits<float>::lowest();
- for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
- {
- for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
- {
- const int in_x = in_x_origin + filter_x;
- const int in_y = in_y_origin + filter_y;
- max = std::max(max, input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
- }
- }
- output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
- ActivationFunctionWithMinMax(max, params.float_activation_min,
- params.float_activation_max);
- }
- }
- }
- }
-}
-
-} // namespace reference
-} // namespace cker
-} // namespace nnfw
-
-#endif // __NNFW_CKER_REFERENCE_MAX_POOL_H__
diff --git a/compute/cker/include/cker/operation/reference/SoftMax.h b/compute/cker/include/cker/operation/reference/SoftMax.h
deleted file mode 100644
index 420cb319b..000000000
--- a/compute/cker/include/cker/operation/reference/SoftMax.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_CKER_REFERENCE_SOFTMAX_H__
-#define __NNFW_CKER_REFERENCE_SOFTMAX_H__
-
-#include "cker/Shape.h"
-#include "cker/Types.h"
-
-#include <cmath>
-
-namespace nnfw
-{
-namespace cker
-{
-namespace reference
-{
-
-inline void Softmax(const SoftmaxParams &params, const Shape &input_shape, const float *input_data,
- const Shape &output_shape, float *output_data)
-{
- const int trailing_dim = input_shape.DimensionsCount() - 1;
- const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
- const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
-
- for (int i = 0; i < outer_size; ++i)
- {
- // Find max element value which we'll use to ensure numerical stability
- // taking advantage of the following equality:
- // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
- float max = std::numeric_limits<float>::lowest();
- for (int c = 0; c < depth; ++c)
- {
- max = std::max(max, input_data[i * depth + c]);
- }
-
- // Compute sum.
- float sum = 0.f;
- for (int c = 0; c < depth; ++c)
- {
- sum += std::exp((input_data[i * depth + c] - max) * params.beta);
- }
-
- // Compute result.
- for (int c = 0; c < depth; ++c)
- {
- output_data[i * depth + c] = std::exp((input_data[i * depth + c] - max) * params.beta) / sum;
- }
- }
-}
-
-} // namespace reference
-} // namespace cker
-} // namespace nnfw
-
-#endif // __NNFW_CKER_REFERENCE_SOFTMAX_H__
diff --git a/compute/ncnn/CMakeLists.txt b/compute/ncnn/CMakeLists.txt
deleted file mode 100644
index a8f50120f..000000000
--- a/compute/ncnn/CMakeLists.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-if(NOT BUILD_SRCN_KERNEL)
- message(STATUS "SRCN kernel library build: disabled")
- return()
-else(NOT BUILD_SRCN_KERNEL)
- message(STATUS "SRCN kernel library build: OK")
-endif()
-
-# Find and use pre-installed OpenMP
-find_package(OpenMP QUIET)
-if(NOT OpenMP_FOUND)
- return()
-endif(NOT OpenMP_FOUND)
-
-file(GLOB_RECURSE SOURCES src/*.cc)
-file(GLOB_RECURSE TESTS src/*_test.cc)
-list(REMOVE_ITEM SOURCES ${TESTS})
-
-add_library(nnfw_lib_srcn STATIC ${SOURCES})
-target_include_directories(nnfw_lib_srcn PUBLIC include)
-if(NOT TARGET OpenMP::OpenMP_CXX)
- find_package(Threads REQUIRED)
- add_library(OpenMP::OpenMP_CXX IMPORTED INTERFACE)
- set_property(TARGET OpenMP::OpenMP_CXX
- PROPERTY INTERFACE_COMPILE_OPTIONS ${OpenMP_CXX_FLAGS})
- # Only works if the same flag is passed to the linker; use CMake 3.9+ otherwise (Intel, AppleClang)
- set_property(TARGET OpenMP::OpenMP_CXX
- PROPERTY INTERFACE_LINK_LIBRARIES ${OpenMP_CXX_FLAGS} Threads::Threads)
-
-endif()
-target_link_libraries(nnfw_lib_srcn PRIVATE OpenMP::OpenMP_CXX)
-target_link_libraries(nnfw_lib_srcn PRIVATE nnfw_common)
-target_compile_definitions(nnfw_lib_srcn PRIVATE TIZEN) # ANDROID or TIZEN
-#target_compile_definitions(nnfw_lib_srcn PRIVATE NCNN) # Enable if ready
-set_target_properties(nnfw_lib_srcn PROPERTIES POSITION_INDEPENDENT_CODE ON)
diff --git a/compute/ncnn/README.md b/compute/ncnn/README.md
deleted file mode 100644
index 5c39d249a..000000000
--- a/compute/ncnn/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-### NCNN compute library
-
-This compute library is based on NCNN project (https://github.com/Tencent/ncnn) with custom optimization
-
-Current base commit: https://github.com/Tencent/ncnn/commit/0219f507b71bdb945d776c8586c162f2c22bba54
-
-Added files for custom optimization is placed on
-- Headers: include/ncnn/srcn
-- Soruces: src/srcn
diff --git a/compute/ncnn/include/ncnn/layer/binaryop.h b/compute/ncnn/include/ncnn/layer/binaryop.h
deleted file mode 100644
index 4ccfd94b4..000000000
--- a/compute/ncnn/include/ncnn/layer/binaryop.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-// Tencent is pleased to support the open source community by making ncnn available.
-//
-// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
-//
-// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
-// in compliance with the License. You may obtain a copy of the License at
-//
-// https://opensource.org/licenses/BSD-3-Clause
-//
-// Unless required by applicable law or agreed to in writing, software distributed
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations under the License.
-
-#ifndef __NCNN_LAYER_BINARYOP_H__
-#define __NCNN_LAYER_BINARYOP_H__
-
-#include "ncnn/mat.h"
-
-namespace nnfw
-{
-namespace ncnn
-{
-
-enum class BinaryOp
-{
- Operation_ADD = 0,
- Operation_SUB = 1,
- Operation_MUL = 2,
- Operation_DIV = 3,
- Operation_MAX = 4,
- Operation_MIN = 5,
- Operation_POW = 6,
- Operation_SQUAREDDIFFERENCE = 7
-};
-
-struct BinaryOpParam
-{
- BinaryOp op_type;
- float b;
-
- BinaryOpParam() : op_type{BinaryOp::Operation_ADD}, b{0.0f} {}
-};
-
-int ncnn_binary_op(const BinaryOpParam &param, const Mat &bottom_blob, const Mat &bottom_blob1,
- Mat &top_blob);
-// TODO Inplace function porting
-// int ncnn_binary_op_inplace(const BinaryParam &param, Mat &bottom_top_blob) const;
-// int ncnn_binary_op_inplace(const BinaryOpParam &param, std::vector<Mat> &bottom_top_blobs) const;
-
-} // namespace ncnn
-} // naemsapce nnfw
-
-#endif // __NCNN_LAYER_BINARYOP_H__
diff --git a/compute/ncnn/include/ncnn/layer/instance_norm.h b/compute/ncnn/include/ncnn/layer/instance_norm.h
deleted file mode 100644
index b7d89281d..000000000
--- a/compute/ncnn/include/ncnn/layer/instance_norm.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-// Tencent is pleased to support the open source community by making ncnn available.
-//
-// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
-//
-// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
-// in compliance with the License. You may obtain a copy of the License at
-//
-// https://opensource.org/licenses/BSD-3-Clause
-//
-// Unless required by applicable law or agreed to in writing, software distributed
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations under the License.
-
-#ifndef __NNFW_LAYER_INSTANCE_NORM_H_
-#define __NNFW_LAYER_INSTANCE_NORM_H_
-
-#include "ncnn/mat.h"
-#ifdef __ARM_NEON
-#include <arm_neon.h>
-#endif // __ARM_NEON
-
-namespace nnfw
-{
-namespace ncnn
-{
-
-void ncnn_instance_norm_rowmajor(Mat &in_mat, Mat &out_mat, Mat &gamma_mat, Mat &beta_mat,
- int channels, float eps);
-
-void ncnn_instance_norm_colmajor(Mat &in_mat, Mat &out_mat, Mat &gamma_mat, Mat &beta_mat,
- int channels, float eps);
-
-void ncnn_instance_norm_with_relu_rowmajor(Mat &in_mat, Mat &out_mat, Mat &gamma_mat, Mat &beta_mat,
- int channels, float eps, float slope);
-
-void ncnn_instance_norm_with_relu_colmajor(Mat &in_mat, Mat &out_mat, Mat &gamma_mat, Mat &beta_mat,
- int channels, float eps, float slope);
-
-} // namespace ncnn
-
-} // namespace nnfw
-
-#endif // __NNFW_LAYER_INSTANCE_NORM_H_
diff --git a/compute/ncnn/include/ncnn/mat.h b/compute/ncnn/include/ncnn/mat.h
deleted file mode 100644
index 2a577939d..000000000
--- a/compute/ncnn/include/ncnn/mat.h
+++ /dev/null
@@ -1,738 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_NCNN_MAT_H__
-#define __NNFW_NCNN_MAT_H__
-
-#include <stdlib.h>
-#include <string.h>
-#if __ARM_NEON
-#include <arm_neon.h>
-#endif
-
-namespace nnfw
-{
-namespace ncnn
-{
-
-// the three dimension matrix
-class Mat
-{
-public:
- // empty
- Mat();
- // vec
- Mat(int w, size_t elemsize = 4);
- // image
- Mat(int w, int h, size_t elemsize = 4);
- // dim
- Mat(int w, int h, int c, size_t elemsize = 4);
- // copy
- Mat(const Mat &m);
- // external vec
- Mat(int w, void *data, size_t elemsize = 4);
- // external image
- Mat(int w, int h, void *data, size_t elemsize = 4);
- // external dim
- Mat(int w, int h, int c, void *data, size_t elemsize = 4);
- // release
- ~Mat();
- // assign
- Mat &operator=(const Mat &m);
- // set all
- void fill(float v);
- template <typename T> void fill(T v);
- // deep copy
- Mat clone() const;
- // reshape vec
- Mat reshape(int w) const;
- // reshape image
- Mat reshape(int w, int h) const;
- // reshape dim
- Mat reshape(int w, int h, int c) const;
- // allocate vec
- void create(int w, size_t elemsize = 4);
- // allocate image
- void create(int w, int h, size_t elemsize = 4);
-// allocate dim
-#ifdef _MEMORY_TO_TIME_
- void create(int w, int h, int c, size_t elemsize = 4, bool isNew = false);
-#else
- void create(int w, int h, int c, size_t elemsize = 4);
-#endif
-#ifdef USE_OPENCL_INSIDE
- void create_empity_mat(int _w, int _h, int _c, size_t _elemsize);
-#endif
-
- // refcount++
- void addref();
- // refcount--
- void release();
-
- bool empty() const;
- size_t total() const;
-
- // data reference
- Mat channel(int c);
- const Mat channel(int c) const;
- float *row(int y);
- const float *row(int y) const;
- template <typename T> T *row(int y);
- template <typename T> const T *row(int y) const;
-
- // access raw data
- template <typename T> operator T *();
- template <typename T> operator const T *() const;
-
- // convenient access float vec element
- float &operator[](int i);
- const float &operator[](int i) const;
-
- enum
- {
- PIXEL_CONVERT_SHIFT = 16,
- PIXEL_FORMAT_MASK = 0x0000ffff,
- PIXEL_CONVERT_MASK = 0xffff0000,
-
- PIXEL_RGB = 1,
- PIXEL_BGR = (1 << 1),
- PIXEL_GRAY = (1 << 2),
- PIXEL_RGBA = (1 << 3),
-
- PIXEL_RGB2BGR = PIXEL_RGB | (PIXEL_BGR << PIXEL_CONVERT_SHIFT),
- PIXEL_RGB2GRAY = PIXEL_RGB | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT),
-
- PIXEL_BGR2RGB = PIXEL_BGR | (PIXEL_RGB << PIXEL_CONVERT_SHIFT),
- PIXEL_BGR2GRAY = PIXEL_BGR | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT),
-
- PIXEL_GRAY2RGB = PIXEL_GRAY | (PIXEL_RGB << PIXEL_CONVERT_SHIFT),
- PIXEL_GRAY2BGR = PIXEL_GRAY | (PIXEL_BGR << PIXEL_CONVERT_SHIFT),
-
- PIXEL_RGBA2RGB = PIXEL_RGBA | (PIXEL_RGB << PIXEL_CONVERT_SHIFT),
- PIXEL_RGBA2BGR = PIXEL_RGBA | (PIXEL_BGR << PIXEL_CONVERT_SHIFT),
- PIXEL_RGBA2GRAY = PIXEL_RGBA | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT),
- };
-
-#ifdef _MEMORY_TO_TIME_
- static void from_pixels(const unsigned char *pixels, Mat &m, int type, int w, int h);
- static void from_pixels(const unsigned char *pixels, Mat &m, int type, int w, int h, int top,
- int bottom, int left, int right);
-#endif // _MEMORY_TO_TIME_
-
- // convenient construct from pixel data
- static Mat from_pixels(const unsigned char *pixels, int type, int w, int h);
- // convenient construct from pixel data and add the padding && only supports same PIXEL_RGB2BGR
- // and PIXEL_BGR2RGB now
- static Mat from_pixels(const unsigned char *pixels, int type, int w, int h, int top, int bottom,
- int left, int right);
- // convenient construct from pixel data and resize to specific size
- static Mat from_pixels_resize(const unsigned char *pixels, int type, int w, int h,
- int target_width, int target_height);
-
- // convenient export to pixel data
- void to_pixels(unsigned char *pixels, int type);
- // convenient export to pixel data and cut the padding && only supports same PIXEL_RGB2BGR and
- // PIXEL_BGR2RGB now
- void to_pixels(unsigned char *pixels, int type, int top, int bottom, int left, int right);
- // convenient export to pixel data and resize to specific size
- void to_pixels_resize(unsigned char *pixels, int type, int target_width, int target_height);
-
- // substract channel-wise mean values, then multiply by normalize values, pass 0 to skip
- void substract_mean_normalize(const float *mean_vals, const float *norm_vals);
-
- // convenient construct from half precisoin floating point data
- static Mat from_float16(const unsigned short *data, int size);
-
- // pointer to the data
- void *data;
-
- // pointer to the reference counter
- // when points to user-allocated data, the pointer is NULL
- int *refcount;
-
- // element size in bytes
- // 4 = float32/int32
- // 2 = float16
- // 1 = int8/uint8
- // 0 = empty
- size_t elemsize;
-
- // the dimensionality
- int dims;
-
- int w;
- int h;
- int c;
-
- size_t cstep;
-};
-
-// misc function
-// image pixel bilinear resize
-void resize_bilinear_c1(const unsigned char *src, int srcw, int srch, unsigned char *dst, int w,
- int h);
-void resize_bilinear_c3(const unsigned char *src, int srcw, int srch, unsigned char *dst, int w,
- int h);
-void resize_bilinear_c4(const unsigned char *src, int srcw, int srch, unsigned char *dst, int w,
- int h);
-
-// mat process
-enum
-{
- BORDER_CONSTANT = 0,
- BORDER_REPLICATE = 1,
-};
-void copy_make_border(const Mat &src, Mat &dst, int top, int bottom, int left, int right, int type,
- float v);
-void copy_cut_border(const Mat &src, Mat &dst, int top, int bottom, int left, int right);
-void resize_bilinear(const Mat &src, Mat &dst, int w, int h);
-
-// the alignment of all the allocated buffers
-#define MALLOC_ALIGN 16
-
-// Aligns a pointer to the specified number of bytes
-// ptr Aligned pointer
-// n Alignment size that must be a power of two
-template <typename _Tp> static inline _Tp *alignPtr(_Tp *ptr, int n = (int)sizeof(_Tp))
-{
- return (_Tp *)(((size_t)ptr + n - 1) & -n);
-}
-
-// Aligns a buffer size to the specified number of bytes
-// The function returns the minimum number that is greater or equal to sz and is divisible by n
-// sz Buffer size to align
-// n Alignment size that must be a power of two
-static inline size_t alignSize(size_t sz, int n) { return (sz + n - 1) & -n; }
-
-static inline void *fastMalloc(size_t size)
-{
- unsigned char *udata = (unsigned char *)malloc(size + sizeof(void *) + MALLOC_ALIGN);
- if (!udata)
- return 0;
- unsigned char **adata = alignPtr((unsigned char **)udata + 1, MALLOC_ALIGN);
- adata[-1] = udata;
- return adata;
-}
-
-static inline void fastFree(void *ptr)
-{
- if (ptr)
- {
- unsigned char *udata = ((unsigned char **)ptr)[-1];
- free(udata);
- }
-}
-
-// exchange-add operation for atomic operations on reference counters
-#if defined __INTEL_COMPILER && !(defined WIN32 || defined _WIN32)
-// atomic increment on the linux version of the Intel(tm) compiler
-#define NCNN_XADD(addr, delta) \
- (int)_InterlockedExchangeAdd(const_cast<void *>(reinterpret_cast<volatile void *>(addr)), delta)
-#elif defined __GNUC__
-#if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && \
- !defined __EMSCRIPTEN__ && !defined(__CUDACC__)
-#ifdef __ATOMIC_ACQ_REL
-#define NCNN_XADD(addr, delta) \
- __c11_atomic_fetch_add((_Atomic(int) *)(addr), delta, __ATOMIC_ACQ_REL)
-#else
-#define NCNN_XADD(addr, delta) __atomic_fetch_add((_Atomic(int) *)(addr), delta, 4)
-#endif
-#else
-#if defined __ATOMIC_ACQ_REL && !defined __clang__
-// version for gcc >= 4.7
-#define NCNN_XADD(addr, delta) \
- (int)__atomic_fetch_add((unsigned *)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL)
-#else
-#define NCNN_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned *)(addr), (unsigned)(delta))
-#endif
-#endif
-#elif defined _MSC_VER && !defined RC_INVOKED
-#include <intrin.h>
-#define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile *)addr, delta)
-#else
-static inline void NCNN_XADD(int *addr, int delta)
-{
- int tmp = *addr;
- *addr += delta;
- return tmp;
-}
-#endif
-
-inline Mat::Mat() : data(0), refcount(0), elemsize(0), dims(0), w(0), h(0), c(0), cstep(0) {}
-
-inline Mat::Mat(int _w, size_t _elemsize) : data(0), refcount(0), dims(0) { create(_w, _elemsize); }
-
-inline Mat::Mat(int _w, int _h, size_t _elemsize) : data(0), refcount(0), dims(0)
-{
- create(_w, _h, _elemsize);
-}
-
-inline Mat::Mat(int _w, int _h, int _c, size_t _elemsize) : data(0), refcount(0), dims(0)
-{
- create(_w, _h, _c, _elemsize);
-}
-
-inline Mat::Mat(const Mat &m)
- : data(m.data), refcount(m.refcount), elemsize(m.elemsize), dims(m.dims)
-{
- if (refcount)
- NCNN_XADD(refcount, 1);
-
- w = m.w;
- h = m.h;
- c = m.c;
-
- cstep = m.cstep;
-}
-
-inline Mat::Mat(int _w, void *_data, size_t _elemsize)
- : data(_data), refcount(0), elemsize(_elemsize), dims(1)
-{
- w = _w;
- h = 1;
- c = 1;
-
- cstep = w;
-}
-
-inline Mat::Mat(int _w, int _h, void *_data, size_t _elemsize)
- : data(_data), refcount(0), elemsize(_elemsize), dims(2)
-{
- w = _w;
- h = _h;
- c = 1;
-
- cstep = w * h;
-}
-
-inline Mat::Mat(int _w, int _h, int _c, void *_data, size_t _elemsize)
- : data(_data), refcount(0), elemsize(_elemsize), dims(3)
-{
- w = _w;
- h = _h;
- c = _c;
-
- cstep = alignSize(w * h * elemsize, 16) / elemsize;
-}
-
-inline Mat::~Mat() { release(); }
-
-inline Mat &Mat::operator=(const Mat &m)
-{
- if (this == &m)
- return *this;
-
- if (m.refcount)
- NCNN_XADD(m.refcount, 1);
-
- release();
-
- data = m.data;
- refcount = m.refcount;
- elemsize = m.elemsize;
-
- dims = m.dims;
- w = m.w;
- h = m.h;
- c = m.c;
-
- cstep = m.cstep;
-
- return *this;
-}
-
-inline void Mat::fill(float _v)
-{
- int size = total();
- float *ptr = (float *)data;
-
-#if __ARM_NEON
- int nn = size >> 2;
- int remain = size - (nn << 2);
-#else
- int remain = size;
-#endif // __ARM_NEON
-
-#if __ARM_NEON
- float32x4_t _c = vdupq_n_f32(_v);
-#if __aarch64__
- if (nn > 0)
- {
- asm volatile("0: \n"
- "subs %w0, %w0, #1 \n"
- "st1 {%4.4s}, [%1], #16 \n"
- "bne 0b \n"
- : "=r"(nn), // %0
- "=r"(ptr) // %1
- : "0"(nn), "1"(ptr),
- "w"(_c) // %4
- : "cc", "memory");
- }
-#else
- if (nn > 0)
- {
- asm volatile("0: \n"
- "subs %0, #1 \n"
- "vst1.f32 {%e4-%f4}, [%1 :128]!\n"
- "bne 0b \n"
- : "=r"(nn), // %0
- "=r"(ptr) // %1
- : "0"(nn), "1"(ptr),
- "w"(_c) // %4
- : "cc", "memory");
- }
-#endif // __aarch64__
-#endif // __ARM_NEON
- for (; remain > 0; remain--)
- {
- *ptr++ = _v;
- }
-}
-
-template <typename T> inline void Mat::fill(T _v)
-{
- int size = total();
- T *ptr = (T *)data;
- for (int i = 0; i < size; i++)
- {
- ptr[i] = _v;
- }
-}
-
-inline Mat Mat::clone() const
-{
- if (empty())
- return Mat();
-
- Mat m;
- if (dims == 1)
- m.create(w, elemsize);
- else if (dims == 2)
- m.create(w, h, elemsize);
- else if (dims == 3)
- m.create(w, h, c, elemsize);
-
- if (total() > 0)
- {
- memcpy(m.data, data, total() * elemsize);
- }
-
- return m;
-}
-
-inline Mat Mat::reshape(int _w) const
-{
- if (w * h * c != _w)
- return Mat();
-
- if (dims == 3 && cstep != (size_t)w * h)
- {
- Mat m;
- m.create(_w, elemsize);
-
- // flatten
- for (int i = 0; i < c; i++)
- {
- const void *ptr = (unsigned char *)data + i * cstep * elemsize;
- void *mptr = (unsigned char *)m.data + i * w * h * elemsize;
- memcpy(mptr, ptr, w * h * elemsize);
- }
-
- return m;
- }
-
- Mat m = *this;
-
- m.dims = 1;
- m.w = _w;
- m.h = 1;
- m.c = 1;
-
- m.cstep = _w;
-
- return m;
-}
-
-inline Mat Mat::reshape(int _w, int _h) const
-{
- if (w * h * c != _w * _h)
- return Mat();
-
- if (dims == 3 && cstep != (size_t)w * h)
- {
- Mat m;
- m.create(_w, _h, elemsize);
-
- // flatten
- for (int i = 0; i < c; i++)
- {
- const void *ptr = (unsigned char *)data + i * cstep * elemsize;
- void *mptr = (unsigned char *)m.data + i * w * h * elemsize;
- memcpy(mptr, ptr, w * h * elemsize);
- }
-
- return m;
- }
-
- Mat m = *this;
-
- m.dims = 2;
- m.w = _w;
- m.h = _h;
- m.c = 1;
-
- m.cstep = _w * _h;
-
- return m;
-}
-
-inline Mat Mat::reshape(int _w, int _h, int _c) const
-{
- if (w * h * c != _w * _h * _c)
- return Mat();
-
- if (dims < 3)
- {
- if ((size_t)_w * _h != alignSize(_w * _h * elemsize, 16) / elemsize)
- {
- Mat m;
- m.create(_w, _h, _c, elemsize);
-
- // align channel
- for (int i = 0; i < _c; i++)
- {
- const void *ptr = (unsigned char *)data + i * _w * _h * elemsize;
- void *mptr = (unsigned char *)m.data + i * m.cstep * m.elemsize;
- memcpy(mptr, ptr, _w * _h * elemsize);
- }
-
- return m;
- }
- }
- else if (c != _c)
- {
- // flatten and then align
- Mat tmp = reshape(_w * _h * _c);
- return tmp.reshape(_w, _h, _c);
- }
-
- Mat m = *this;
-
- m.dims = 3;
- m.w = _w;
- m.h = _h;
- m.c = _c;
-
- m.cstep = alignSize(_w * _h * elemsize, 16) / elemsize;
-
- return m;
-}
-
-inline void Mat::create(int _w, size_t _elemsize)
-{
- if (dims == 1 && w == _w && elemsize == _elemsize)
- return;
-
- release();
-
- elemsize = _elemsize;
-
- dims = 1;
- w = _w;
- h = 1;
- c = 1;
-
- cstep = w;
-
- if (total() > 0)
- {
- size_t totalsize = total() * elemsize;
- data = fastMalloc(totalsize + (int)sizeof(*refcount));
- refcount = (int *)(((unsigned char *)data) + totalsize);
- *refcount = 1;
- }
-}
-
-inline void Mat::create(int _w, int _h, size_t _elemsize)
-{
- if (dims == 2 && w == _w && h == _h && elemsize == _elemsize)
- return;
-
- release();
-
- elemsize = _elemsize;
-
- dims = 2;
- w = _w;
- h = _h;
- c = 1;
-
- cstep = w * h;
-
- if (total() > 0)
- {
- size_t totalsize = total() * elemsize;
- data = fastMalloc(totalsize + (int)sizeof(*refcount));
- refcount = (int *)(((unsigned char *)data) + totalsize);
- *refcount = 1;
- }
-}
-
-#ifdef _MEMORY_TO_TIME_
-inline void Mat::create(int _w, int _h, int _c, size_t _elemsize, bool isNew)
-{
- if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize)
- return;
-
- if (!isNew && dims == 3)
- {
- elemsize = _elemsize;
-
- w = _w;
- h = _h;
- c = _c;
-
- cstep = alignSize(w * h * elemsize, 16) / elemsize;
- return;
- }
-
- release();
-
- elemsize = _elemsize;
-
- dims = 3;
- w = _w;
- h = _h;
- c = _c;
-
- cstep = alignSize(w * h * elemsize, 16) / elemsize;
-
- if (total() > 0)
- {
- size_t totalsize = total() * elemsize;
- data = fastMalloc(totalsize + (int)sizeof(*refcount));
- refcount = (int *)(((unsigned char *)data) + totalsize);
- *refcount = 1;
- }
-}
-
-#else
-inline void Mat::create(int _w, int _h, int _c, size_t _elemsize)
-{
- if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize)
- return;
-
- release();
-
- elemsize = _elemsize;
-
- dims = 3;
- w = _w;
- h = _h;
- c = _c;
-
- cstep = alignSize(w * h * elemsize, 16) / elemsize;
-
- if (total() > 0)
- {
- size_t totalsize = total() * elemsize;
- data = fastMalloc(totalsize + (int)sizeof(*refcount));
- refcount = (int *)(((unsigned char *)data) + totalsize);
- *refcount = 1;
- }
-}
-#endif //_MEMORY_TO_TIME_
-
-#ifdef USE_OPENCL_INSIDE
-inline void Mat::create_empity_mat(int _w, int _h, int _c, size_t _elemsize)
-{
- if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize)
- return;
-
- release();
-
- elemsize = _elemsize;
-
- dims = 3;
- w = _w;
- h = _h;
- c = _c;
-
- cstep = alignSize(w * h * elemsize, 16) / elemsize;
- data = NULL;
-}
-#endif // USE_OPENCL_INSIDE
-
-inline void Mat::addref()
-{
- if (refcount)
- NCNN_XADD(refcount, 1);
-}
-
-inline void Mat::release()
-{
- if (refcount && NCNN_XADD(refcount, -1) == 1)
- fastFree(data);
-
- data = 0;
-
- elemsize = 0;
-
- dims = 0;
- w = 0;
- h = 0;
- c = 0;
-
- cstep = 0;
-
- refcount = 0;
-}
-
-inline bool Mat::empty() const { return data == 0 || total() == 0; }
-
-inline size_t Mat::total() const { return cstep * c; }
-
-inline Mat Mat::channel(int c)
-{
- return Mat(w, h, (unsigned char *)data + cstep * c * elemsize, elemsize);
-}
-
-inline const Mat Mat::channel(int c) const
-{
- return Mat(w, h, (unsigned char *)data + cstep * c * elemsize, elemsize);
-}
-
-inline float *Mat::row(int y) { return (float *)data + w * y; }
-
-inline const float *Mat::row(int y) const { return (const float *)data + w * y; }
-
-template <typename T> inline T *Mat::row(int y) { return (T *)data + w * y; }
-
-template <typename T> inline const T *Mat::row(int y) const { return (const T *)data + w * y; }
-
-template <typename T> inline Mat::operator T *() { return (T *)data; }
-
-template <typename T> inline Mat::operator const T *() const { return (const T *)data; }
-
-inline float &Mat::operator[](int i) { return ((float *)data)[i]; }
-
-inline const float &Mat::operator[](int i) const { return ((const float *)data)[i]; }
-
-} // namespace ncnn
-} // namespace nnfw
-
-#endif // __NNFW_NCNN_MAT_H__
diff --git a/compute/ncnn/include/ncnn/srcn/conv_type.h b/compute/ncnn/include/ncnn/srcn/conv_type.h
deleted file mode 100644
index 59152a094..000000000
--- a/compute/ncnn/include/ncnn/srcn/conv_type.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_SRCN_CONV_TYPE_H__
-#define __NNFW_SRCN_CONV_TYPE_H__
-
-namespace nnfw
-{
-namespace srcn
-{
-
-enum convType_t
-{
- row_major = 0,
- col_major
-};
-
-struct convMat_t
-{
- int w;
- int h;
- int c;
- int n;
- float *data;
-};
-
-struct convParams_t
-{
- int kernel_w;
- int kernel_h;
- int stride_w;
- int stride_h;
- int dilation_w;
- int dilation_h;
- int padding;
- int pad_w;
- int pad_h;
-};
-
-struct winogradParams_t
-{
- int kernel_w;
- int kernel_h;
- int stride_w;
- int stride_h;
- int dilation_w;
- int dilation_h;
- int batch;
- int w;
- int h;
- int inch;
- int outch;
- int num_threads;
- convType_t conv_type;
- float *weight_data;
-};
-
-} // namespace srcn
-} // namespace nnfw
-
-#endif // __NNFW_SRCN_CONV_TYPE_H__
diff --git a/compute/ncnn/include/ncnn/srcn/srcn_conv.h b/compute/ncnn/include/ncnn/srcn/srcn_conv.h
deleted file mode 100644
index 11130c0db..000000000
--- a/compute/ncnn/include/ncnn/srcn/srcn_conv.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_SRCN_CONV_H__
-#define __NNFW_SRCN_CONV_H__
-
-#include "conv_type.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-int check_winograd(winogradParams_t &params);
-
-float *trans_weight2winograd(winogradParams_t &params, unsigned int *size = NULL);
-
-void winograd_release(float *winograd_weight);
-
-void srcn_convolution2D(const convMat_t &in_mat, const convMat_t &weights_mat, convMat_t &out_mat,
- const convParams_t &in_param, const float *winograd_weight, int num_threads,
- convType_t conv_type);
-
-void srcn_deconvolution2D(const convMat_t &in_mat, const convMat_t &weights_mat, convMat_t &out_mat,
- const convParams_t &in_param, int num_threads, convType_t conv_type);
-
-void *trans_weight2sparse(const convMat_t &weights_mat);
-
-void sparse_release(const int outch, void *ptr);
-
-void srcn_sparse_convolution2D(const convMat_t &in_mat, convMat_t &out_mat,
- const convParams_t &in_param, const void *sparse_weight,
- int number_threas, convType_t conv_type);
-
-void srcn_batch_convolution2D(const convMat_t &in_mat, const convMat_t &weights_mat,
- convMat_t &out_mat, const convParams_t &in_param,
- const float *winograd_weight, int num_threads, convType_t conv_type);
-
-void srcn_convolution2D_gpu(const convMat_t &in_mat, const convMat_t &weights_mat,
- convMat_t &out_mat, const convParams_t &in_param, convType_t conv_type);
-
-void srcn_convolution2D_dpu(const convMat_t &in_mat, const convMat_t &weights_mat,
- convMat_t &out_mat, const convParams_t &in_param, convType_t conv_type);
-
-void srcn_depthwise_conv(const convMat_t &in_mat, const convMat_t &weights_mat, convMat_t &out_mat,
- const convMat_t &bias, const convParams_t &in_param, int num_threads,
- convType_t conv_type);
-
-} // namespace srcn
-} // namespace nnfw
-
-#endif // __NNFW_SRCN_CONV_H__
diff --git a/compute/ncnn/src/layer/arm/neon_mathfun.h b/compute/ncnn/src/layer/arm/neon_mathfun.h
deleted file mode 100644
index 6e3cb66c8..000000000
--- a/compute/ncnn/src/layer/arm/neon_mathfun.h
+++ /dev/null
@@ -1,315 +0,0 @@
-/* NEON implementation of sin, cos, exp and log
- *
- * Inspired by Intel Approximate Math library, and based on the
- * corresponding algorithms of the cephes math library
- */
-
-/* Copyright (C) 2011 Julien Pommier
- *
- * This software is provided 'as-is', without any express or implied
- * warranty. In no event will the authors be held liable for any damages
- * arising from the use of this software.
- *
- * Permission is granted to anyone to use this software for any purpose,
- * including commercial applications, and to alter it and redistribute it
- * freely, subject to the following restrictions:
- *
- * 1. The origin of this software must not be misrepresented; you must not
- * claim that you wrote the original software. If you use this software
- * in a product, an acknowledgment in the product documentation would be
- * appreciated but is not required.
- * 2. Altered source versions must be plainly marked as such, and must not be
- * misrepresented as being the original software.
- * 3. This notice may not be removed or altered from any source distribution.
- *
- * (this is the zlib license)
- */
-
-#include <arm_neon.h>
-
-#define c_inv_mant_mask ~0x7f800000u
-#define c_cephes_SQRTHF 0.707106781186547524
-#define c_cephes_log_p0 7.0376836292E-2
-#define c_cephes_log_p1 -1.1514610310E-1
-#define c_cephes_log_p2 1.1676998740E-1
-#define c_cephes_log_p3 -1.2420140846E-1
-#define c_cephes_log_p4 +1.4249322787E-1
-#define c_cephes_log_p5 -1.6668057665E-1
-#define c_cephes_log_p6 +2.0000714765E-1
-#define c_cephes_log_p7 -2.4999993993E-1
-#define c_cephes_log_p8 +3.3333331174E-1
-#define c_cephes_log_q1 -2.12194440e-4
-#define c_cephes_log_q2 0.693359375
-
-/* natural logarithm computed for 4 simultaneous float
- * return NaN for x <= 0
- */
-static inline float32x4_t log_ps(float32x4_t x)
-{
- float32x4_t one = vdupq_n_f32(1);
-
- x = vmaxq_f32(x, vdupq_n_f32(0)); /* force flush to zero on denormal values */
- uint32x4_t invalid_mask = vcleq_f32(x, vdupq_n_f32(0));
-
- int32x4_t ux = vreinterpretq_s32_f32(x);
-
- int32x4_t emm0 = vshrq_n_s32(ux, 23);
-
- /* keep only the fractional part */
- ux = vandq_s32(ux, vdupq_n_s32(c_inv_mant_mask));
- ux = vorrq_s32(ux, vreinterpretq_s32_f32(vdupq_n_f32(0.5f)));
- x = vreinterpretq_f32_s32(ux);
-
- emm0 = vsubq_s32(emm0, vdupq_n_s32(0x7f));
- float32x4_t e = vcvtq_f32_s32(emm0);
-
- e = vaddq_f32(e, one);
-
- /* part2:
- * if( x < SQRTHF ) {
- * e -= 1;
- * x = x + x - 1.0;
- * } else { x = x - 1.0; }
- */
- uint32x4_t mask = vcltq_f32(x, vdupq_n_f32(c_cephes_SQRTHF));
- float32x4_t tmp = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(x), mask));
- x = vsubq_f32(x, one);
- e = vsubq_f32(e, vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(one), mask)));
- x = vaddq_f32(x, tmp);
-
- float32x4_t z = vmulq_f32(x, x);
-
- float32x4_t y = vdupq_n_f32(c_cephes_log_p0);
- y = vmulq_f32(y, x);
- y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p1));
- y = vmulq_f32(y, x);
- y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p2));
- y = vmulq_f32(y, x);
- y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p3));
- y = vmulq_f32(y, x);
- y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p4));
- y = vmulq_f32(y, x);
- y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p5));
- y = vmulq_f32(y, x);
- y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p6));
- y = vmulq_f32(y, x);
- y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p7));
- y = vmulq_f32(y, x);
- y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p8));
- y = vmulq_f32(y, x);
-
- y = vmulq_f32(y, z);
-
- tmp = vmulq_f32(e, vdupq_n_f32(c_cephes_log_q1));
- y = vaddq_f32(y, tmp);
-
- tmp = vmulq_f32(z, vdupq_n_f32(0.5f));
- y = vsubq_f32(y, tmp);
-
- tmp = vmulq_f32(e, vdupq_n_f32(c_cephes_log_q2));
- x = vaddq_f32(x, y);
- x = vaddq_f32(x, tmp);
- x = vreinterpretq_f32_u32(
- vorrq_u32(vreinterpretq_u32_f32(x), invalid_mask)); // negative arg will be NAN
- return x;
-}
-
-#define c_exp_hi 88.3762626647949f
-#define c_exp_lo -88.3762626647949f
-
-#define c_cephes_LOG2EF 1.44269504088896341
-#define c_cephes_exp_C1 0.693359375
-#define c_cephes_exp_C2 -2.12194440e-4
-
-#define c_cephes_exp_p0 1.9875691500E-4
-#define c_cephes_exp_p1 1.3981999507E-3
-#define c_cephes_exp_p2 8.3334519073E-3
-#define c_cephes_exp_p3 4.1665795894E-2
-#define c_cephes_exp_p4 1.6666665459E-1
-#define c_cephes_exp_p5 5.0000001201E-1
-
-/* exp() computed for 4 float at once */
-static inline float32x4_t exp_ps(float32x4_t x)
-{
- float32x4_t tmp, fx;
-
- float32x4_t one = vdupq_n_f32(1);
- x = vminq_f32(x, vdupq_n_f32(c_exp_hi));
- x = vmaxq_f32(x, vdupq_n_f32(c_exp_lo));
-
- /* express exp(x) as exp(g + n*log(2)) */
- fx = vmlaq_f32(vdupq_n_f32(0.5f), x, vdupq_n_f32(c_cephes_LOG2EF));
-
- /* perform a floorf */
- tmp = vcvtq_f32_s32(vcvtq_s32_f32(fx));
-
- /* if greater, substract 1 */
- uint32x4_t mask = vcgtq_f32(tmp, fx);
- mask = vandq_u32(mask, vreinterpretq_u32_f32(one));
-
- fx = vsubq_f32(tmp, vreinterpretq_f32_u32(mask));
-
- tmp = vmulq_f32(fx, vdupq_n_f32(c_cephes_exp_C1));
- float32x4_t z = vmulq_f32(fx, vdupq_n_f32(c_cephes_exp_C2));
- x = vsubq_f32(x, tmp);
- x = vsubq_f32(x, z);
-
- static const float cephes_exp_p[6] = {c_cephes_exp_p0, c_cephes_exp_p1, c_cephes_exp_p2,
- c_cephes_exp_p3, c_cephes_exp_p4, c_cephes_exp_p5};
- float32x4_t y = vld1q_dup_f32(cephes_exp_p + 0);
- float32x4_t c1 = vld1q_dup_f32(cephes_exp_p + 1);
- float32x4_t c2 = vld1q_dup_f32(cephes_exp_p + 2);
- float32x4_t c3 = vld1q_dup_f32(cephes_exp_p + 3);
- float32x4_t c4 = vld1q_dup_f32(cephes_exp_p + 4);
- float32x4_t c5 = vld1q_dup_f32(cephes_exp_p + 5);
-
- y = vmulq_f32(y, x);
- z = vmulq_f32(x, x);
-
- y = vaddq_f32(y, c1);
- y = vmulq_f32(y, x);
- y = vaddq_f32(y, c2);
- y = vmulq_f32(y, x);
- y = vaddq_f32(y, c3);
- y = vmulq_f32(y, x);
- y = vaddq_f32(y, c4);
- y = vmulq_f32(y, x);
- y = vaddq_f32(y, c5);
-
- y = vmulq_f32(y, z);
- y = vaddq_f32(y, x);
- y = vaddq_f32(y, one);
-
- /* build 2^n */
- int32x4_t mm;
- mm = vcvtq_s32_f32(fx);
- mm = vaddq_s32(mm, vdupq_n_s32(0x7f));
- mm = vshlq_n_s32(mm, 23);
- float32x4_t pow2n = vreinterpretq_f32_s32(mm);
-
- y = vmulq_f32(y, pow2n);
- return y;
-}
-
-#define c_minus_cephes_DP1 -0.78515625
-#define c_minus_cephes_DP2 -2.4187564849853515625e-4
-#define c_minus_cephes_DP3 -3.77489497744594108e-8
-#define c_sincof_p0 -1.9515295891E-4
-#define c_sincof_p1 8.3321608736E-3
-#define c_sincof_p2 -1.6666654611E-1
-#define c_coscof_p0 2.443315711809948E-005
-#define c_coscof_p1 -1.388731625493765E-003
-#define c_coscof_p2 4.166664568298827E-002
-#define c_cephes_FOPI 1.27323954473516 // 4 / M_PI
-
-/* evaluation of 4 sines & cosines at once.
- *
- * The code is the exact rewriting of the cephes sinf function.
- * Precision is excellent as long as x < 8192 (I did not bother to
- * take into account the special handling they have for greater values
- * -- it does not return garbage for arguments over 8192, though, but
- * the extra precision is missing).
- *
- * Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the
- * surprising but correct result.
- *
- * Note also that when you compute sin(x), cos(x) is available at
- * almost no extra price so both sin_ps and cos_ps make use of
- * sincos_ps..
- */
-static inline void sincos_ps(float32x4_t x, float32x4_t *ysin, float32x4_t *ycos)
-{
- // any x
- float32x4_t xmm1, xmm2, xmm3, y;
-
- uint32x4_t emm2;
-
- uint32x4_t sign_mask_sin, sign_mask_cos;
- sign_mask_sin = vcltq_f32(x, vdupq_n_f32(0));
- x = vabsq_f32(x);
-
- /* scale by 4/Pi */
- y = vmulq_f32(x, vdupq_n_f32(c_cephes_FOPI));
-
- /* store the integer part of y in mm0 */
- emm2 = vcvtq_u32_f32(y);
- /* j=(j+1) & (~1) (see the cephes sources) */
- emm2 = vaddq_u32(emm2, vdupq_n_u32(1));
- emm2 = vandq_u32(emm2, vdupq_n_u32(~1));
- y = vcvtq_f32_u32(emm2);
-
- /* get the polynom selection mask
- * there is one polynom for 0 <= x <= Pi/4
- * and another one for Pi/4<x<=Pi/2
- *
- * Both branches will be computed.
- */
- uint32x4_t poly_mask = vtstq_u32(emm2, vdupq_n_u32(2));
-
- /* The magic pass: "Extended precision modular arithmetic"
- * x = ((x - y * DP1) - y * DP2) - y * DP3; */
- xmm1 = vmulq_n_f32(y, c_minus_cephes_DP1);
- xmm2 = vmulq_n_f32(y, c_minus_cephes_DP2);
- xmm3 = vmulq_n_f32(y, c_minus_cephes_DP3);
- x = vaddq_f32(x, xmm1);
- x = vaddq_f32(x, xmm2);
- x = vaddq_f32(x, xmm3);
-
- sign_mask_sin = veorq_u32(sign_mask_sin, vtstq_u32(emm2, vdupq_n_u32(4)));
- sign_mask_cos = vtstq_u32(vsubq_u32(emm2, vdupq_n_u32(2)), vdupq_n_u32(4));
-
- /* Evaluate the first polynom (0 <= x <= Pi/4) in y1,
- * and the second polynom (Pi/4 <= x <= 0) in y2 */
- float32x4_t z = vmulq_f32(x, x);
- float32x4_t y1, y2;
-
- y1 = vmulq_n_f32(z, c_coscof_p0);
- y2 = vmulq_n_f32(z, c_sincof_p0);
- y1 = vaddq_f32(y1, vdupq_n_f32(c_coscof_p1));
- y2 = vaddq_f32(y2, vdupq_n_f32(c_sincof_p1));
- y1 = vmulq_f32(y1, z);
- y2 = vmulq_f32(y2, z);
- y1 = vaddq_f32(y1, vdupq_n_f32(c_coscof_p2));
- y2 = vaddq_f32(y2, vdupq_n_f32(c_sincof_p2));
- y1 = vmulq_f32(y1, z);
- y2 = vmulq_f32(y2, z);
- y1 = vmulq_f32(y1, z);
- y2 = vmulq_f32(y2, x);
- y1 = vsubq_f32(y1, vmulq_f32(z, vdupq_n_f32(0.5f)));
- y2 = vaddq_f32(y2, x);
- y1 = vaddq_f32(y1, vdupq_n_f32(1));
-
- /* select the correct result from the two polynoms */
- float32x4_t ys = vbslq_f32(poly_mask, y1, y2);
- float32x4_t yc = vbslq_f32(poly_mask, y2, y1);
- *ysin = vbslq_f32(sign_mask_sin, vnegq_f32(ys), ys);
- *ycos = vbslq_f32(sign_mask_cos, yc, vnegq_f32(yc));
-}
-
-static inline float32x4_t sin_ps(float32x4_t x)
-{
- float32x4_t ysin, ycos;
- sincos_ps(x, &ysin, &ycos);
- return ysin;
-}
-
-static inline float32x4_t cos_ps(float32x4_t x)
-{
- float32x4_t ysin, ycos;
- sincos_ps(x, &ysin, &ycos);
- return ycos;
-}
-
-static inline float32x4_t div_ps(float32x4_t a, float32x4_t b)
-{
- float32x4_t reciprocal = vrecpeq_f32(b);
- reciprocal = vmulq_f32(vrecpsq_f32(b, reciprocal), reciprocal);
- // reciprocal = vmulq_f32(vrecpsq_f32(b, reciprocal), reciprocal);
- return vmulq_f32(a, reciprocal);
-}
-
-static inline float32x4_t pow_ps(float32x4_t a, float32x4_t b)
-{
- // pow(x, m) = exp(m * log(x))
- return exp_ps(vmulq_f32(b, log_ps(a)));
-}
diff --git a/compute/ncnn/src/layer/binaryop.cc b/compute/ncnn/src/layer/binaryop.cc
deleted file mode 100644
index a09d55f78..000000000
--- a/compute/ncnn/src/layer/binaryop.cc
+++ /dev/null
@@ -1,1640 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-// Tencent is pleased to support the open source community by making ncnn available.
-//
-// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
-//
-// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
-// in compliance with the License. You may obtain a copy of the License at
-//
-// https://opensource.org/licenses/BSD-3-Clause
-//
-// Unless required by applicable law or agreed to in writing, software distributed
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations under the License.
-
-#include "ncnn/layer/binaryop.h"
-#include <math.h>
-#include <algorithm>
-#include <functional>
-#include <sys/time.h>
-
-#if __ARM_NEON
-#include <arm_neon.h>
-#include "arm/neon_mathfun.h"
-#endif // __ARM_NEON
-
-namespace nnfw
-{
-namespace ncnn
-{
-
-template <typename Op> static int binary_op(const Mat &a, const Mat &b, Mat &c)
-{
- Op op;
-
- int w = a.w;
- int h = a.h;
- int channels = a.c;
- int size = w * h;
-
- int w1 = b.w;
- int h1 = b.h;
- int channels1 = b.c;
- int size1 = w1 * h1;
-
- if (a.dims == 3)
- {
- c.create(w, h, channels);
- if (c.empty())
- return -100;
-
- if (b.dims == 3)
- {
- if (b.w == 1 && b.h == 1)
- {
-
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = a.channel(q);
- const float *ptr1 = b.channel(q);
- float *outptr = c.channel(q);
-
- float tt = *ptr1;
- for (int i = 0; i < size; i++)
- {
- outptr[i] = op(ptr[i], tt);
- }
- }
-
- return 0;
- }
-
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = a.channel(q);
- const float *ptr1 = b.channel(q);
- float *outptr = c.channel(q);
-
- for (int i = 0; i < size; i++)
- {
- outptr[i] = op(ptr[i], ptr1[i]);
- }
- }
-
- return 0;
- }
-
- if (b.dims == 2)
- {
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = a.channel(q);
- const float *ptr1 = (const float *)b + h * q;
- float *outptr = c.channel(q);
-
- for (int y = 0; y < h; y++)
- {
- const float b0 = ptr1[y];
- for (int x = 0; x < w; x++)
- {
- outptr[x] = op(ptr[x], b0);
- }
-
- ptr += w;
- outptr += w;
- }
- }
-
- return 0;
- }
-
- if (b.dims == 1)
- {
- if (b.w == 1)
- {
- const float b0 = b[0];
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = a.channel(q);
- float *outptr = c.channel(q);
-
- for (int i = 0; i < size; i++)
- {
- outptr[i] = op(ptr[i], b0);
- }
- }
-
- return 0;
- }
-
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = a.channel(q);
- const float b0 = b[q];
- float *outptr = c.channel(q);
-
- for (int i = 0; i < size; i++)
- {
- outptr[i] = op(ptr[i], b0);
- }
- }
-
- return 0;
- }
- }
- else if (a.dims == 2)
- {
- if (b.dims == 3)
- {
- c.create(w1, h1, channels1);
- if (c.empty())
- return -100;
-
-#pragma omp parallel for
- for (int q = 0; q < channels1; q++)
- {
- const float *ptr = (const float *)a + h1 * q;
- const float *ptr1 = b.channel(q);
- float *outptr = c.channel(q);
-
- for (int y = 0; y < h1; y++)
- {
- const float a0 = ptr[y];
- for (int x = 0; x < w1; x++)
- {
- outptr[x] = op(a0, ptr1[x]);
- }
-
- ptr1 += w1;
- outptr += w1;
- }
- }
-
- return 0;
- }
-
- c.create(w, h);
- if (c.empty())
- return -100;
-
- if (b.dims == 2)
- {
- for (int i = 0; i < size; i++)
- {
- c[i] = op(a[i], b[i]);
- }
-
- return 0;
- }
-
- if (b.dims == 1)
- {
- c.create(w, h);
- if (c.empty())
- return -100;
-
- if (b.w == 1)
- {
- const float b0 = b[0];
- for (int i = 0; i < size; i++)
- {
- c[i] = op(a[i], b0);
- }
-
- return 0;
- }
-
- const float *ptr = a;
- float *outptr = c;
-
- for (int y = 0; y < h; y++)
- {
- const float b0 = b[y];
- for (int x = 0; x < w; x++)
- {
- outptr[x] = op(ptr[x], b0);
- }
-
- ptr += w;
- outptr += w;
- }
-
- return 0;
- }
- }
- else if (a.dims == 1)
- {
- if (a.w == 1)
- {
- if (b.dims == 3)
- {
- c.create(w1, h1, channels1);
- if (c.empty())
- return -100;
-
- const float a0 = a[0];
-#pragma omp parallel for
- for (int q = 0; q < channels1; q++)
- {
- const float *ptr1 = b.channel(q);
- float *outptr = c.channel(q);
-
- for (int i = 0; i < size1; i++)
- {
- outptr[i] = op(a0, ptr1[i]);
- }
- }
-
- return 0;
- }
-
- if (b.dims == 2)
- {
- c.create(w1, h1);
- if (c.empty())
- return -100;
-
- const float a0 = a[0];
- for (int i = 0; i < size1; i++)
- {
- c[i] = op(a0, b[i]);
- }
-
- return 0;
- }
-
- if (b.dims == 1)
- {
- c.create(w1);
- if (c.empty())
- return -100;
-
- const float a0 = a[0];
- for (int i = 0; i < size1; i++)
- {
- c[i] = op(a0, b[i]);
- }
-
- return 0;
- }
- }
-
- if (b.dims == 3)
- {
- c.create(w1, h1, channels1);
- if (c.empty())
- return -100;
-
-#pragma omp parallel for
- for (int q = 0; q < channels1; q++)
- {
- const float a0 = a[q];
- const float *ptr1 = b.channel(q);
- float *outptr = c.channel(q);
-
- for (int i = 0; i < size1; i++)
- {
- outptr[i] = op(a0, ptr1[i]);
- }
- }
-
- return 0;
- }
-
- if (b.dims == 2)
- {
- c.create(w1, h1);
- if (c.empty())
- return -100;
-
- const float *ptr1 = b;
- float *outptr = c;
-
- for (int y = 0; y < h1; y++)
- {
- const float a0 = a[y];
- for (int x = 0; x < w1; x++)
- {
- outptr[x] = op(a0, ptr1[x]);
- }
-
- ptr1 += w1;
- outptr += w1;
- }
-
- return 0;
- }
-
- if (b.dims == 1)
- {
- c.create(w);
- if (c.empty())
- return -100;
-
- if (b.w == 1)
- {
- const float b0 = b[0];
- for (int i = 0; i < size; i++)
- {
- c[i] = op(a[i], b0);
- }
-
- return 0;
- }
-
- for (int i = 0; i < size; i++)
- {
- c[i] = op(a[i], b[i]);
- }
- }
- }
-
- return 0;
-}
-
-template <typename Op> static int binary_op_scalar_inplace(Mat &a, float b)
-{
- Op op;
-
- int w = a.w;
- int h = a.h;
- int channels = a.c;
- int size = w * h;
-
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- float *ptr = a.channel(q);
-
- for (int i = 0; i < size; i++)
- {
- ptr[i] = op(ptr[i], b);
- }
- }
-
- return 0;
-}
-
-template <typename T> struct binary_op_max : std::binary_function<T, T, T>
-{
- T operator()(const T &x, const T &y) const { return std::max(x, y); }
-};
-
-template <typename T> struct binary_op_min : std::binary_function<T, T, T>
-{
- T operator()(const T &x, const T &y) const { return std::min(x, y); }
-};
-
-template <typename T> struct binary_op_pow : std::binary_function<T, T, T>
-{
- T operator()(const T &x, const T &y) const { return pow(x, y); }
-};
-
-template <typename T> struct binary_op_SquaredDifference : std::binary_function<T, T, T>
-{
- T operator()(const T &x, const T &y) const { return pow((x - y), 2); }
-};
-
-int ncnn_binary_op(const BinaryOpParam &param, const Mat &bottom_blob, const Mat &bottom_blob1,
- Mat &top_blob)
-{
- int ret = 0;
- auto op_type = param.op_type;
- // auto b = param.b;
-
- // Only support add operation, none broadcasting
- // Other case, need to remove internal memory allocation and check correctness
- if (op_type != BinaryOp::Operation_ADD)
- {
- throw std::runtime_error{"NYI: Only support ADD operation"};
- }
- if (bottom_blob.dims != bottom_blob1.dims)
- {
- throw std::runtime_error{"NYI: Cannot use broadcasting"};
- }
-
-// printf("-------------------BinaryOp---------------\n");
-
-// printf("op_type = %d, ", op_type);
-// printf("in1: (%d, %d, %d), dims = %d, ", bottom_blob.w, bottom_blob.h, bottom_blob.c,
-// bottom_blob.dims);
-// printf("in2: (%d, %d, %d), dims = %d\n", bottom_blob1.w, bottom_blob1.h, bottom_blob1.c,
-// bottom_blob1.dims);
-
-#if __ARM_NEON
- int w = bottom_blob.w;
- int h = bottom_blob.h;
- int channels = bottom_blob.c;
- int size = w * h;
-
- int w1 = bottom_blob1.w;
- int h1 = bottom_blob1.h;
- int channels1 = bottom_blob1.c;
- int size1 = w1 * h1;
-
- if (op_type == BinaryOp::Operation_ADD)
- {
- if (bottom_blob.dims == 3 && bottom_blob1.dims == 3)
- {
- // Fix for nnfw: disable allocation for output
- // top_blob.create(w, h, channels);
- if (bottom_blob1.w == 1 && bottom_blob1.h == 1)
- {
-
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = bottom_blob.channel(q);
- const float *ptr1 = bottom_blob1.channel(q);
- float *outptr = top_blob.channel(q);
-
-#if __ARM_NEON
- int nn = size >> 2;
- int remain = size - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr);
- float *out = const_cast<float *>(outptr);
- float tt = *ptr1;
-
- float32x4_t _p2 = vdupq_n_f32(tt);
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vld1q_f32(in1);
-
- _p1 = vaddq_f32(_p1, _p2);
- vst1q_f32(out, _p1);
- in1 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = (*in1 + tt);
- in1++;
- out++;
- }
-
-#else
- float tt = *ptr1;
- for (int i = 0; i < size; i++)
- {
- outptr[i] = (ptr[i] + tt);
- }
-#endif
- }
-
- ret = 0;
- }
- else
- {
- if (size * bottom_blob.elemsize % 16 != 0)
- {
- throw std::runtime_error{"Unmatched alignment"};
- }
-
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = bottom_blob.channel(q);
- const float *ptr1 = bottom_blob1.channel(q);
- float *outptr = top_blob.channel(q);
-
- int nn = size >> 2;
- int remain = size - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr);
- float *in2 = const_cast<float *>(ptr1);
- float *out = const_cast<float *>(outptr);
-
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vld1q_f32(in1);
- float32x4_t _p2 = vld1q_f32(in2);
-
- _p1 = vaddq_f32(_p1, _p2);
- vst1q_f32(out, _p1);
- in1 += 4;
- in2 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = *in1 + *in2;
- in1++;
- in2++;
- out++;
- }
- }
- }
- }
- else if (bottom_blob.dims == 3 && bottom_blob1.dims == 1)
- {
- top_blob.create(w, h, channels);
- if (bottom_blob1.w == 1)
- {
- ret = binary_op<std::plus<float>>(bottom_blob, bottom_blob1, top_blob);
- // return ret;
- goto out;
- }
- float *pt = (float *)bottom_blob1.data;
-
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = bottom_blob.channel(q);
- const float b0 = pt[q];
- float *outptr = top_blob.channel(q);
-
- int nn = size >> 2;
- int remain = size - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr);
- float *out = const_cast<float *>(outptr);
-
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vld1q_f32(in1);
- float32x4_t _p2 = vdupq_n_f32(b0);
-
- _p1 = vaddq_f32(_p1, _p2);
- vst1q_f32(out, _p1);
- in1 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = (*in1 + b0);
- in1++;
- out++;
- }
- }
- }
- else if (bottom_blob.dims == 1 && bottom_blob1.dims == 3)
- {
- top_blob.create(w1, h1, channels1);
- if (top_blob.empty())
- return -100;
-
-#pragma omp parallel for
- for (int q = 0; q < channels1; q++)
- {
- const float a0 = bottom_blob[q];
- const float *ptr1 = bottom_blob1.channel(q);
- float *outptr = top_blob.channel(q);
-
- int nn = size1 >> 2;
- int remain = size1 - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr1);
- float *out = const_cast<float *>(outptr);
-
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vdupq_n_f32(a0);
- float32x4_t _p2 = vld1q_f32(in1);
-
- _p1 = vaddq_f32(_p1, _p2);
- vst1q_f32(out, _p1);
- in1 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = (a0 + *in1);
- in1++;
- out++;
- }
- }
- }
- else
- ret = binary_op<std::plus<float>>(bottom_blob, bottom_blob1, top_blob);
- }
-
-#if 0 // Disable operation except Operation_ADD
-
- if (op_type == BinaryOp::Operation_SUB)
- {
- if (bottom_blob.dims == 3 && bottom_blob1.dims == 3)
- {
- top_blob.create(w, h, channels);
-
- if (bottom_blob1.w == 1 && bottom_blob1.h == 1)
- {
-
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = bottom_blob.channel(q);
- const float *ptr1 = bottom_blob1.channel(q);
- float *outptr = top_blob.channel(q);
-
-#if __ARM_NEON
- int nn = size >> 2;
- int remain = size - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr);
- float *out = const_cast<float *>(outptr);
- float tt = *ptr1;
-
- float32x4_t _p2 = vdupq_n_f32(tt);
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vld1q_f32(in1);
-
- _p1 = vsubq_f32(_p1, _p2);
- vst1q_f32(out, _p1);
- in1 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = (*in1 - tt);
- in1++;
- out++;
- }
-
-#else
- float tt = *ptr1;
- for (int i = 0; i < size; i++)
- {
- outptr[i] = (ptr[i] - tt);
- }
-#endif
- }
-
- ret = 0;
- }
- else
- {
- top_blob.create(w, h, channels);
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = bottom_blob.channel(q);
- const float *ptr1 = bottom_blob1.channel(q);
- float *outptr = top_blob.channel(q);
-
- int nn = size >> 2;
- int remain = size - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr);
- float *in2 = const_cast<float *>(ptr1);
- float *out = const_cast<float *>(outptr);
-
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vld1q_f32(in1);
- float32x4_t _p2 = vld1q_f32(in2);
-
- _p1 = vsubq_f32(_p1, _p2);
- vst1q_f32(out, _p1);
- in1 += 4;
- in2 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = *in1 - *in2;
- in1++;
- in2++;
- out++;
- }
- }
- }
- }
- else if (bottom_blob.dims == 3 && bottom_blob1.dims == 1)
- {
- top_blob.create(w, h, channels);
- if (bottom_blob1.w == 1)
- {
- ret = binary_op<std::minus<float>>(bottom_blob, bottom_blob1, top_blob);
- // return ret;
- goto out;
- }
-
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = bottom_blob.channel(q);
- const float b0 = bottom_blob1[q];
- float *outptr = top_blob.channel(q);
-
- int nn = size >> 2;
- int remain = size - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr);
- float *out = const_cast<float *>(outptr);
-
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vld1q_f32(in1);
- float32x4_t _p2 = vdupq_n_f32(b0);
-
- _p1 = vsubq_f32(_p1, _p2);
- vst1q_f32(out, _p1);
- in1 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = (*in1 - b0);
- in1++;
- out++;
- }
- }
- }
- else if (bottom_blob.dims == 1 && bottom_blob1.dims == 3)
- {
- top_blob.create(w1, h1, channels1);
- if (top_blob.empty())
- return -100;
-
-#pragma omp parallel for
- for (int q = 0; q < channels1; q++)
- {
- const float a0 = bottom_blob[q];
- const float *ptr1 = bottom_blob1.channel(q);
- float *outptr = top_blob.channel(q);
-
- int nn = size1 >> 2;
- int remain = size1 - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr1);
- float *out = const_cast<float *>(outptr);
-
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vdupq_n_f32(a0);
- float32x4_t _p2 = vld1q_f32(in1);
-
- _p1 = vsubq_f32(_p1, _p2);
- vst1q_f32(out, _p1);
- in1 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = (a0 - *in1);
- in1++;
- out++;
- }
- }
- }
- else
- ret = binary_op<std::minus<float>>(bottom_blob, bottom_blob1, top_blob);
- }
-
- if (op_type == BinaryOp::Operation_MUL)
- {
- if (bottom_blob.dims == 3 && bottom_blob1.dims == 3)
- {
- top_blob.create(w, h, channels);
-
- if (bottom_blob1.w == 1 && bottom_blob1.h == 1)
- {
-
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = bottom_blob.channel(q);
- const float *ptr1 = bottom_blob1.channel(q);
- float *outptr = top_blob.channel(q);
-
-#if __ARM_NEON
- int nn = size >> 2;
- int remain = size - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr);
- float *out = const_cast<float *>(outptr);
- float tt = *ptr1;
-
- float32x4_t _p2 = vdupq_n_f32(tt);
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vld1q_f32(in1);
-
- _p1 = vmulq_f32(_p1, _p2);
- vst1q_f32(out, _p1);
- in1 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = (*in1 * tt);
- in1++;
- out++;
- }
-
-#else
- float tt = *ptr1;
- for (int i = 0; i < size; i++)
- {
- outptr[i] = (ptr[i] * tt);
- }
-#endif
- }
-
- ret = 0;
- }
- else
- {
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = bottom_blob.channel(q);
- const float *ptr1 = bottom_blob1.channel(q);
- float *outptr = top_blob.channel(q);
-
- int nn = size >> 2;
- int remain = size - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr);
- float *in2 = const_cast<float *>(ptr1);
- float *out = const_cast<float *>(outptr);
-
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vld1q_f32(in1);
- float32x4_t _p2 = vld1q_f32(in2);
-
- _p1 = vmulq_f32(_p1, _p2);
- vst1q_f32(out, _p1);
- in1 += 4;
- in2 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = *in1 * *in2;
- in1++;
- in2++;
- out++;
- }
- }
- }
- }
- else if (bottom_blob.dims == 3 && bottom_blob1.dims == 1)
- {
- top_blob.create(w, h, channels);
- if (bottom_blob1.w == 1)
- {
- ret = binary_op<std::multiplies<float>>(bottom_blob, bottom_blob1, top_blob);
- // return ret;
- goto out;
- }
-
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = bottom_blob.channel(q);
- const float b0 = bottom_blob1[q];
- float *outptr = top_blob.channel(q);
-
- int nn = size >> 2;
- int remain = size - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr);
- float *out = const_cast<float *>(outptr);
-
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vld1q_f32(in1);
- float32x4_t _p2 = vdupq_n_f32(b0);
-
- _p1 = vmulq_f32(_p1, _p2);
- vst1q_f32(out, _p1);
- in1 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = (*in1 * b0);
- in1++;
- out++;
- }
- }
- }
- else if (bottom_blob.dims == 1 && bottom_blob1.dims == 3)
- {
- top_blob.create(w1, h1, channels1);
- if (top_blob.empty())
- return -100;
-
- if (bottom_blob.w != bottom_blob1.c)
- {
- ret = binary_op<std::multiplies<float>>(bottom_blob, bottom_blob1, top_blob);
- goto out;
- }
-
- float *pt = (float *)bottom_blob.data;
-
-#pragma omp parallel for
- for (int q = 0; q < channels1; q++)
- {
- const float a0 = pt[q];
- const float *ptr1 = bottom_blob1.channel(q);
- float *outptr = top_blob.channel(q);
-
- int nn = size1 >> 2;
- int remain = size1 - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr1);
- float *out = const_cast<float *>(outptr);
-
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vdupq_n_f32(a0);
- float32x4_t _p2 = vld1q_f32(in1);
-
- _p1 = vmulq_f32(_p1, _p2);
- vst1q_f32(out, _p1);
- in1 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = (a0 * *in1);
- in1++;
- out++;
- }
- }
- }
- else
- ret = binary_op<std::multiplies<float>>(bottom_blob, bottom_blob1, top_blob);
- }
-
- if (op_type == BinaryOp::Operation_DIV)
- {
- if (bottom_blob.dims == 3 && bottom_blob1.dims == 3)
- {
- top_blob.create(w, h, channels);
- if (bottom_blob1.w == 1 && bottom_blob1.h == 1)
- {
-
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = bottom_blob.channel(q);
- const float *ptr1 = bottom_blob1.channel(q);
- float *outptr = top_blob.channel(q);
-
-#if __ARM_NEON
- int nn = size >> 2;
- int remain = size - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr);
- float *out = const_cast<float *>(outptr);
- float tt = *ptr1;
-
- float32x4_t _p2 = vdupq_n_f32(tt);
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vld1q_f32(in1);
-
- float32x4_t _p3 = vrecpeq_f32(_p2);
- _p3 = vmulq_f32(vrecpsq_f32(_p2, _p3), _p3);
- _p1 = vmulq_f32(_p1, _p3);
-
- vst1q_f32(out, _p1);
- in1 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = (*in1 / tt);
- in1++;
- out++;
- }
-
-#else
- float tt = *ptr1;
- for (int i = 0; i < size; i++)
- {
- outptr[i] = (ptr[i] / tt);
- }
-#endif
- }
-
- // return 0;
- goto out;
- }
- else
- {
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = bottom_blob.channel(q);
- const float *ptr1 = bottom_blob1.channel(q);
- float *outptr = top_blob.channel(q);
-
- int nn = size >> 2;
- int remain = size - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr);
- float *in2 = const_cast<float *>(ptr1);
- float *out = const_cast<float *>(outptr);
-
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vld1q_f32(in1);
- float32x4_t _p2 = vld1q_f32(in2);
-
- float32x4_t _p3 = vrecpeq_f32(_p2);
- _p2 = vmulq_f32(vrecpsq_f32(_p2, _p3), _p3);
- _p1 = vmulq_f32(_p1, _p2);
- vst1q_f32(out, _p1);
- in1 += 4;
- in2 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = *in1 / *in2;
- in1++;
- in2++;
- out++;
- }
- }
- }
- }
- else if (bottom_blob.dims == 3 && bottom_blob1.dims == 1)
- {
- top_blob.create(w, h, channels);
- if (bottom_blob1.w == 1)
- {
- ret = binary_op<std::divides<float>>(bottom_blob, bottom_blob1, top_blob);
- // return ret;
- goto out;
- }
-
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = bottom_blob.channel(q);
- const float b0 = bottom_blob1[q];
- float *outptr = top_blob.channel(q);
-
- int nn = size >> 2;
- int remain = size - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr);
- float *out = const_cast<float *>(outptr);
-
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vld1q_f32(in1);
- float32x4_t _p2 = vdupq_n_f32(b0);
-
- //_p1 = vsubq_f32(_p1, _p2);
- float32x4_t _p3 = vrecpeq_f32(_p2);
- _p2 = vmulq_f32(vrecpsq_f32(_p2, _p3), _p3);
- _p1 = vmulq_f32(_p1, _p2);
- vst1q_f32(out, _p1);
- in1 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = (*in1 / b0);
- in1++;
- out++;
- }
- }
- }
- else if (bottom_blob.dims == 1 && bottom_blob1.dims == 3)
- {
- top_blob.create(w1, h1, channels1);
- if (top_blob.empty())
- return -100;
-
-#pragma omp parallel for
- for (int q = 0; q < channels1; q++)
- {
- const float a0 = bottom_blob[q];
- const float *ptr1 = bottom_blob1.channel(q);
- float *outptr = top_blob.channel(q);
-
- int nn = size1 >> 2;
- int remain = size1 - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr1);
- float *out = const_cast<float *>(outptr);
-
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vdupq_n_f32(a0);
- float32x4_t _p2 = vld1q_f32(in1);
-
- //_p1 = vsubq_f32(_p1, _p2);
- float32x4_t _p3 = vrecpeq_f32(_p2);
- _p2 = vmulq_f32(vrecpsq_f32(_p2, _p3), _p3);
- _p1 = vmulq_f32(_p1, _p2);
- vst1q_f32(out, _p1);
- in1 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = (a0 / *in1);
- in1++;
- out++;
- }
- }
- }
- else
- ret = binary_op<std::divides<float>>(bottom_blob, bottom_blob1, top_blob);
- }
-
- if (op_type == BinaryOp::Operation_MAX)
- ret = binary_op<binary_op_max<float>>(bottom_blob, bottom_blob1, top_blob);
-
- if (op_type == BinaryOp::Operation_MIN)
- ret = binary_op<binary_op_min<float>>(bottom_blob, bottom_blob1, top_blob);
-
- if (op_type == BinaryOp::Operation_POW)
- {
- if (bottom_blob.dims == 3 && bottom_blob1.dims == 3)
- {
- top_blob.create(w, h, channels);
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = bottom_blob.channel(q);
- const float *ptr1 = bottom_blob1.channel(q);
- float *outptr = top_blob.channel(q);
-
- int nn = size >> 2;
- int remain = size - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr);
- float *in2 = const_cast<float *>(ptr1);
- float *out = const_cast<float *>(outptr);
-
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vld1q_f32(in1);
- float32x4_t _p2 = vld1q_f32(in2);
-
- _p1 = pow_ps(_p1, _p2);
- vst1q_f32(out, _p1);
- in1 += 4;
- in2 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = pow(*in1, *in2);
- in1++;
- in2++;
- out++;
- }
- }
- }
- else if (bottom_blob.dims == 3 && bottom_blob1.dims == 1)
- {
- top_blob.create(w, h, channels);
- if (bottom_blob1.w == 1)
- {
- ret = binary_op<binary_op_pow<float>>(bottom_blob, bottom_blob1, top_blob);
- // return ret;
- goto out;
- }
-
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = bottom_blob.channel(q);
- const float b0 = bottom_blob1[q];
- float *outptr = top_blob.channel(q);
-
- int nn = size >> 2;
- int remain = size - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr);
- float *out = const_cast<float *>(outptr);
-
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vld1q_f32(in1);
- float32x4_t _p2 = vdupq_n_f32(b0);
-
- _p1 = pow_ps(_p1, _p2);
- vst1q_f32(out, _p1);
- in1 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = pow(*in1, b0);
- in1++;
- out++;
- }
- }
- }
- else if (bottom_blob.dims == 1 && bottom_blob1.dims == 3)
- {
- top_blob.create(w1, h1, channels1);
- if (top_blob.empty())
- return -100;
-
-#pragma omp parallel for
- for (int q = 0; q < channels1; q++)
- {
- const float a0 = bottom_blob[q];
- const float *ptr1 = bottom_blob1.channel(q);
- float *outptr = top_blob.channel(q);
-
- int nn = size1 >> 2;
- int remain = size1 - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr1);
- float *out = const_cast<float *>(outptr);
-
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vdupq_n_f32(a0);
- float32x4_t _p2 = vld1q_f32(in1);
-
- _p1 = pow_ps(_p1, _p2);
- vst1q_f32(out, _p1);
- in1 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = pow(a0, *in1);
- in1++;
- out++;
- }
- }
- }
- else
- ret = binary_op<binary_op_pow<float>>(bottom_blob, bottom_blob1, top_blob);
- }
-
- if (op_type == BinaryOp::Operation_SQUAREDDIFFERENCE)
- {
- if (bottom_blob.dims == 3 && bottom_blob1.dims == 3)
- {
- top_blob.create(w, h, channels);
-
- if (bottom_blob1.w == 1 && bottom_blob1.h == 1)
- {
-
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = bottom_blob.channel(q);
- const float *ptr1 = bottom_blob1.channel(q);
- float *outptr = top_blob.channel(q);
-
-#if __ARM_NEON
- int nn = size >> 2;
- int remain = size - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr);
- float *out = const_cast<float *>(outptr);
- float tt = *ptr1;
-
- float32x4_t _p2 = vdupq_n_f32(tt);
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vld1q_f32(in1);
-
- _p1 = vsubq_f32(_p1, _p2);
- _p1 = vmulq_f32(_p1, _p1);
- vst1q_f32(out, _p1);
- in1 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- float t2 = *in1 - tt;
- *out = t2 * t2;
- in1++;
- out++;
- }
-
-#else
- float tt = *ptr1;
- for (int i = 0; i < size; i++)
- {
- float t2 = (ptr[i] - tt);
- outptr[i] = t2 * t2;
- }
-#endif
- }
-
- ret = 0;
- }
- else
- {
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = bottom_blob.channel(q);
- const float *ptr1 = bottom_blob1.channel(q);
- float *outptr = top_blob.channel(q);
-
- int nn = size >> 2;
- int remain = size - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr);
- float *in2 = const_cast<float *>(ptr1);
- float *out = const_cast<float *>(outptr);
-
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vld1q_f32(in1);
- float32x4_t _p2 = vld1q_f32(in2);
-
- _p1 = vsubq_f32(_p1, _p2);
- _p1 = vmulq_f32(_p1, _p1);
- vst1q_f32(out, _p1);
- in1 += 4;
- in2 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = (*in1 - *in2) * (*in1 - *in2);
- in1++;
- in2++;
- out++;
- }
- }
- }
- }
- else if (bottom_blob.dims == 3 && bottom_blob1.dims == 1)
- {
- top_blob.create(w, h, channels);
- if (bottom_blob1.w == 1)
- {
- ret = binary_op<binary_op_SquaredDifference<float>>(bottom_blob, bottom_blob1, top_blob);
- // return ret;
- goto out;
- }
-
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const float *ptr = bottom_blob.channel(q);
- const float b0 = bottom_blob1[q];
- float *outptr = top_blob.channel(q);
-
- int nn = size >> 2;
- int remain = size - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr);
- float *out = const_cast<float *>(outptr);
-
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vld1q_f32(in1);
- float32x4_t _p2 = vdupq_n_f32(b0);
-
- _p1 = vsubq_f32(_p1, _p2);
- _p1 = vmulq_f32(_p1, _p1);
- vst1q_f32(out, _p1);
- in1 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = (*in1 - b0) * (*in1 - b0);
- in1++;
- out++;
- }
- }
- }
- else if (bottom_blob.dims == 1 && bottom_blob1.dims == 3)
- {
- top_blob.create(w1, h1, channels1);
- if (top_blob.empty())
- return -100;
-
-#pragma omp parallel for
- for (int q = 0; q < channels1; q++)
- {
- const float a0 = bottom_blob[q];
- const float *ptr1 = bottom_blob1.channel(q);
- float *outptr = top_blob.channel(q);
-
- int nn = size1 >> 2;
- int remain = size1 - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr1);
- float *out = const_cast<float *>(outptr);
-
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vdupq_n_f32(a0);
- float32x4_t _p2 = vld1q_f32(in1);
-
- _p1 = vsubq_f32(_p1, _p2);
- _p1 = vmulq_f32(_p1, _p1);
- vst1q_f32(out, _p1);
- in1 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = (a0 - *in1) * (a0 - *in1);
- in1++;
- out++;
- }
- }
- }
- else
- ret = binary_op<binary_op_SquaredDifference<float>>(bottom_blob, bottom_blob1, top_blob);
- }
-
-#endif // 0 (Disable operation except Operation_ADD)
-
-#else
-
- if (op_type == BinaryOp::Operation_ADD)
- ret = binary_op<std::plus<float>>(bottom_blob, bottom_blob1, top_blob);
-
- if (op_type == BinaryOp::Operation_SUB)
- ret = binary_op<std::minus<float>>(bottom_blob, bottom_blob1, top_blob);
-
- if (op_type == BinaryOp::Operation_MUL)
- ret = binary_op<std::multiplies<float>>(bottom_blob, bottom_blob1, top_blob);
-
- if (op_type == BinaryOp::Operation_DIV)
- ret = binary_op<std::divides<float>>(bottom_blob, bottom_blob1, top_blob);
-
- if (op_type == BinaryOp::Operation_MAX)
- ret = binary_op<binary_op_max<float>>(bottom_blob, bottom_blob1, top_blob);
-
- if (op_type == BinaryOp::Operation_MIN)
- ret = binary_op<binary_op_min<float>>(bottom_blob, bottom_blob1, top_blob);
-
- if (op_type == BinaryOp::Operation_POW)
- ret = binary_op<binary_op_pow<float>>(bottom_blob, bottom_blob1, top_blob);
- if (op_type == BinaryOp::Operation_SQUAREDDIFFERENCE)
- ret = binary_op<binary_op_SquaredDifference<float>>(bottom_blob, bottom_blob1, top_blob);
-#endif
-
-/*
-for (int p = 0; p < top_blob.c && p < 5; p++)
-{
- float* outptr = top_blob.channel(p);
- printf("channel: %d\n", p);
- for (int i = 0; i < 1; i++)
- {
- for (int j = 0; j < 5; j++)
- {
- printf("%f ", outptr[j]);
- }
- printf("\n");
- outptr += top_blob.w;
- }
-}
-printf("----------------------------\n");
-*/
-
-out:
- return ret;
-}
-
-int ncnn_binary_op_inplace(const BinaryOpParam &param, Mat &bottom_top_blob)
-{
- auto op_type = param.op_type;
- auto b = param.b;
-
- // printf("-------------------BinaryOp-----forward_inplace----------\n");
- if (op_type == BinaryOp::Operation_ADD)
- return binary_op_scalar_inplace<std::plus<float>>(bottom_top_blob, b);
-
- if (op_type == BinaryOp::Operation_SUB)
- return binary_op_scalar_inplace<std::minus<float>>(bottom_top_blob, b);
-
- if (op_type == BinaryOp::Operation_MUL)
- return binary_op_scalar_inplace<std::multiplies<float>>(bottom_top_blob, b);
-
- if (op_type == BinaryOp::Operation_DIV)
- return binary_op_scalar_inplace<std::divides<float>>(bottom_top_blob, b);
-
- if (op_type == BinaryOp::Operation_MAX)
- return binary_op_scalar_inplace<binary_op_max<float>>(bottom_top_blob, b);
-
- if (op_type == BinaryOp::Operation_MIN)
- return binary_op_scalar_inplace<binary_op_min<float>>(bottom_top_blob, b);
-
- if (op_type == BinaryOp::Operation_POW)
- return binary_op_scalar_inplace<binary_op_pow<float>>(bottom_top_blob, b);
-
- if (op_type == BinaryOp::Operation_SQUAREDDIFFERENCE)
- return binary_op_scalar_inplace<binary_op_SquaredDifference<float>>(bottom_top_blob, b);
-
- return 0;
-}
-
-int ncnn_binary_op_inplace(const BinaryOpParam &param, Mat &bottom_blob, Mat &bottom_top_blob)
-{
- int ret = 0;
-
- Mat &bottom_blob1 = bottom_top_blob;
- Mat &top_blob = bottom_top_blob;
- auto op_type = param.op_type;
-
- if (op_type == BinaryOp::Operation_ADD)
- {
- int w = bottom_blob.w;
- int h = bottom_blob.h;
- int channels = bottom_blob.c;
- int size = w * h;
-
-// Unused variables
-// int w1 = bottom_blob1.w;
-// int h1 = bottom_blob1.h;
-// int channels1 = bottom_blob1.c;
-// int size1 = w1 * h1;
-
-#if __ARM_NEON
-
- if (bottom_blob.dims == 3 && bottom_blob1.dims == 3)
- {
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- float *ptr = bottom_blob.channel(q);
- float *ptr1 = bottom_blob1.channel(q);
- float *outptr = top_blob.channel(q);
-
- int nn = size >> 2;
- int remain = size - (nn << 2);
-
- float *in1 = const_cast<float *>(ptr);
- float *in2 = const_cast<float *>(ptr1);
- float *out = const_cast<float *>(outptr);
-
- for (; nn > 0; nn--)
- {
- float32x4_t _p1 = vld1q_f32(in1);
- float32x4_t _p2 = vld1q_f32(in2);
-
- _p1 = vaddq_f32(_p1, _p2);
- vst1q_f32(out, _p1);
- in1 += 4;
- in2 += 4;
- out += 4;
- }
- for (; remain > 0; remain--)
- {
- *out = *in1 + *in2;
- in1++;
- in2++;
- out++;
- }
- }
- }
-#else
- if (bottom_blob.dims == 3 && bottom_blob1.dims == 3)
- {
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- float *ptr = bottom_blob.channel(q);
- float *ptr1 = bottom_blob1.channel(q);
- float *outptr = top_blob.channel(q);
-
- for (int i = 0; i < size; i++)
- {
- outptr[i] = ptr[i] + ptr1[i];
- }
- }
- return 0;
- }
-#endif
- }
- else
- {
- return -1;
- }
- return ret;
-}
-
-} // namespace ncnn
-} // namespace ncnn
diff --git a/compute/ncnn/src/layer/instance_norm.cc b/compute/ncnn/src/layer/instance_norm.cc
deleted file mode 100644
index 08c3f2c23..000000000
--- a/compute/ncnn/src/layer/instance_norm.cc
+++ /dev/null
@@ -1,371 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-// Tencent is pleased to support the open source community by making ncnn available.
-//
-// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
-//
-// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
-// in compliance with the License. You may obtain a copy of the License at
-//
-// https://opensource.org/licenses/BSD-3-Clause
-//
-// Unless required by applicable law or agreed to in writing, software distributed
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations under the License.
-
-#include "ncnn/layer/instance_norm.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#include <math.h>
-#include "ncnn/mat.h"
-#ifdef __ARM_NEON
-#include <arm_neon.h>
-#endif // __ARM_NEON
-
-namespace nnfw
-{
-namespace ncnn
-{
-
-void ncnn_instance_norm_rowmajor(Mat &in_mat, Mat &out_mat, Mat &gamma_mat, Mat &beta_mat,
- int channels, float eps)
-{
- // x = (x - mean) / (sqrt(var) + eps) * gamma + beta
-
- int w = in_mat.w;
- int h = in_mat.h;
- int size = w * h;
-#ifdef __ARM_NEON
- int nn = size >> 2;
- int left4 = size & 3;
-#endif
-
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
-#ifdef __ARM_NEON
- float *in_ptr = in_mat.channel(q);
- float *out_ptr = out_mat.channel(q);
- float32x4_t _sum = vdupq_n_f32(0.f);
- float32x4_t _sq_sum = vdupq_n_f32(0.f);
- for (int n = nn; n > 0; n--)
- {
- float32x4_t _p = vld1q_f32(in_ptr);
- _sum = vaddq_f32(_sum, _p);
- _p = vmulq_f32(_p, _p);
- _sq_sum = vaddq_f32(_sq_sum, _p);
- in_ptr += 4;
- }
- float sum = vgetq_lane_f32(_sum, 0) + vgetq_lane_f32(_sum, 1);
- sum += vgetq_lane_f32(_sum, 2);
- sum += vgetq_lane_f32(_sum, 3);
- float sqsum = vgetq_lane_f32(_sq_sum, 0) + vgetq_lane_f32(_sq_sum, 1);
- sqsum += vgetq_lane_f32(_sq_sum, 2);
- sqsum += vgetq_lane_f32(_sq_sum, 3);
-
- for (int left = left4; left > 0; left--)
- {
- sum += *in_ptr;
- sqsum += (*in_ptr) * (*in_ptr);
- in_ptr++;
- }
-
- float mean = sum / size;
- float var = sqsum / size - mean * mean;
- float gamma = gamma_mat[q];
- float beta = beta_mat[q];
- float a = gamma / (sqrt(var + eps));
- float b = -mean * a + beta;
-
- in_ptr = in_mat.channel(q);
- float32x4_t _a = vdupq_n_f32(a);
- float32x4_t _b = vdupq_n_f32(b);
- for (int n = nn; n > 0; n--)
- {
- float32x4_t _p = vld1q_f32(in_ptr);
- _p = vmulq_f32(_p, _a);
- _p = vaddq_f32(_p, _b);
- vst1q_f32(out_ptr, _p);
- in_ptr += 4;
- out_ptr += 4;
- }
- for (int left = left4; left > 0; left--)
- {
- *out_ptr = (*in_ptr) * a + b;
- in_ptr++;
- out_ptr++;
- }
-#else
- float *in_ptr = in_mat.channel(q);
- float *out_ptr = out_mat.channel(q);
- // mean and var
- float sum = 0.f;
- float sqsum = 0.f;
- for (int i = 0; i < size; i++)
- {
- sum += in_ptr[i];
- sqsum += in_ptr[i] * in_ptr[i];
- }
- float mean = sum / size;
- float var = sqsum / size - mean * mean;
-
- float gamma = gamma_mat[q];
- float beta = beta_mat[q];
-
- float a = gamma / (sqrt(var + eps));
- float b = -mean * a + beta;
- for (int i = 0; i < size; i++)
- {
- out_ptr[i] = in_ptr[i] * a + b;
- }
-#endif
- }
-}
-
-void ncnn_instance_norm_colmajor(Mat &in_mat, Mat &out_mat, Mat &gamma_mat, Mat &beta_mat,
- int /*channels*/, float eps)
-{
- // Treat CHW layout as HWC layout
- int h = in_mat.c;
- int w = in_mat.h;
- int c = in_mat.w;
-
- int size = w * h;
- int total = size * c;
-
- float sum[c] = {};
- float sqsum[c] = {};
-
- float mean[c] = {};
- float var[c] = {};
- float a[c] = {};
- float b[c] = {};
-
- float *in_ptr = in_mat.channel(0);
- float *out_ptr = out_mat.channel(0);
-
-#pragma omp parallel for reduction(+ : sum, sqsum) schedule(guided)
- for (int i = 0; i < total; i += c)
- {
- for (int j = 0; j < c; j++)
- {
- sum[j] += in_ptr[i + j];
- sqsum[j] += in_ptr[i + j] * in_ptr[i + j];
- }
- }
-
- for (int i = 0; i < c; i++)
- {
- mean[i] = sum[i] / size;
- var[i] = sqsum[i] / size - mean[i] * mean[i];
- a[i] = gamma_mat[i] / (sqrt(var[i] + eps));
- b[i] = -mean[i] * a[i] + beta_mat[i];
- }
-
-#pragma omp parallel for schedule(guided)
- for (int i = 0; i < total; i += c)
- {
- for (int j = 0; j < c; j++)
- {
- out_ptr[i + j] = in_ptr[i + j] * a[j] + b[j];
- }
- }
-}
-
-void ncnn_instance_norm_with_relu_rowmajor(Mat &in_mat, Mat &out_mat, Mat &gamma_mat, Mat &beta_mat,
- int channels, float eps, float /*slope*/)
-{
- int w = in_mat.w;
- int h = in_mat.h;
- int size = w * h;
-#ifdef __ARM_NEON
- int nn = size >> 2;
- int left4 = size & 3;
-#endif
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
-#ifdef __ARM_NEON
- float *in_ptr = in_mat.channel(q);
- float *out_ptr = out_mat.channel(q);
- float32x4_t _sum = vdupq_n_f32(0.f);
- float32x4_t _sq_sum = vdupq_n_f32(0.f);
- for (int n = nn; n > 0; n--)
- {
- float32x4_t _p = vld1q_f32(in_ptr);
- _sum = vaddq_f32(_sum, _p);
- _p = vmulq_f32(_p, _p);
- _sq_sum = vaddq_f32(_sq_sum, _p);
- in_ptr += 4;
- }
- // float sum =
- // vgetq_lane_f32(_sum,0)+vgetq_lane_f32(_sum,1)+vgetq_lane_f32(_sum,2)+vgetq_lane_f32(_sum,3);
- // float sqsum = vgetq_lane_f32(_sq_sum,0)+vgetq_lane_f32(_sq_sum,1)+
- // vgetq_lane_f32(_sq_sum,2)+vgetq_lane_f32(_sq_sum,3);
- float sum = vgetq_lane_f32(_sum, 0) + vgetq_lane_f32(_sum, 1);
- sum += vgetq_lane_f32(_sum, 2);
- sum += vgetq_lane_f32(_sum, 3);
- float sqsum = vgetq_lane_f32(_sq_sum, 0) + vgetq_lane_f32(_sq_sum, 1);
- sqsum += vgetq_lane_f32(_sq_sum, 2);
- sqsum += vgetq_lane_f32(_sq_sum, 3);
- for (int left = left4; left > 0; left--)
- {
- sum += *in_ptr;
- sqsum += (*in_ptr) * (*in_ptr);
- in_ptr++;
- }
-
- float mean = sum / size;
- float var = sqsum / size - mean * mean;
- float gamma = gamma_mat[q];
- float beta = beta_mat[q];
- float a = gamma / (sqrt(var + eps));
- float b = -mean * a + beta;
- // TODO:slop is not used here , only for RELU which slop is always = 0;
- in_ptr = in_mat.channel(q);
- float32x4_t _a = vdupq_n_f32(a);
- float32x4_t _b = vdupq_n_f32(b);
- float32x4_t _zero = vdupq_n_f32(0.f);
- for (int n = nn; n > 0; n--)
- {
- float32x4_t _p = vld1q_f32(in_ptr);
- _p = vmulq_f32(_p, _a);
- _p = vaddq_f32(_p, _b);
- _p = vmaxq_f32(_p, _zero);
- vst1q_f32(out_ptr, _p);
- in_ptr += 4;
- out_ptr += 4;
- }
- for (int left = left4; left > 0; left--)
- {
- int temp = (*in_ptr) * a + b;
- *out_ptr = temp > 0 ? temp : 0;
- in_ptr++;
- out_ptr++;
- }
-#else
- float *in_ptr = in_mat.channel(q);
- float *out_ptr = out_mat.channel(q);
-
- // mean and var
- float sum = 0.f;
- float sqsum = 0.f;
- for (int i = 0; i < size; i++)
- {
- sum += in_ptr[i];
- sqsum += in_ptr[i] * in_ptr[i];
- }
- float mean = sum / size;
- float var = sqsum / size - mean * mean;
-
- float gamma = gamma_mat[q];
- float beta = beta_mat[q];
-
- float a = gamma / (sqrt(var + eps));
- float b = -mean * a + beta;
-
- if (slope == 0.f)
- {
- for (int i = 0; i < size; i++)
- {
- float temp = in_ptr[i] * a + b;
- out_ptr[i] = temp > 0 ? temp : 0;
- }
- }
- else
- {
- for (int i = 0; i < size; i++)
- {
- float temp = in_ptr[i] * a + b;
- out_ptr[i] = temp > 0 ? temp : temp * slope;
- }
- }
-#endif
- }
-}
-
-void ncnn_instance_norm_with_relu_colmajor(Mat &in_mat, Mat &out_mat, Mat &gamma_mat, Mat &beta_mat,
- int /*channels*/, float eps, float slope)
-{
- // Treat CHW layout as HWC layout
- int h = in_mat.c;
- int w = in_mat.h;
- int c = in_mat.w;
-
- int size = w * h;
- int total = size * c;
-
- float sum[c] = {};
- float sqsum[c] = {};
-
- float mean[c] = {};
- float var[c] = {};
- float a[c] = {};
- float b[c] = {};
-
- float *in_ptr = in_mat.channel(0);
- float *out_ptr = out_mat.channel(0);
-
-#pragma omp parallel for reduction(+ : sum, sqsum) schedule(guided)
- for (int i = 0; i < total; i += c)
- {
- for (int j = 0; j < c; j++)
- {
- sum[j] += in_ptr[i + j];
- sqsum[j] += in_ptr[i + j] * in_ptr[i + j];
- }
- }
-
- for (int i = 0; i < c; i++)
- {
- mean[i] = sum[i] / size;
- var[i] = sqsum[i] / size - mean[i] * mean[i];
- a[i] = gamma_mat[i] / (sqrt(var[i] + eps));
- b[i] = -mean[i] * a[i] + beta_mat[i];
- }
-
- if (slope == 0.f)
- {
-#pragma omp parallel for schedule(guided)
- for (int i = 0; i < total; i += c)
- {
- for (int j = 0; j < c; j++)
- {
- float temp = in_ptr[i + j] * a[j] + b[j];
- out_ptr[i + j] = temp > 0 ? temp : 0;
- }
- }
- }
- else
- {
-#pragma omp parallel for schedule(guided)
- for (int i = 0; i < total; i += c)
- {
- for (int j = 0; j < c; j++)
- {
- float temp = in_ptr[i + j] * a[j] + b[j];
- out_ptr[i + j] = temp > 0 ? temp : temp * slope;
- }
- }
- }
-}
-
-} // namespace ncnn
-
-} // namespace nnfw
diff --git a/compute/ncnn/src/mat.cc b/compute/ncnn/src/mat.cc
deleted file mode 100644
index 568378ef7..000000000
--- a/compute/ncnn/src/mat.cc
+++ /dev/null
@@ -1,940 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ncnn/mat.h"
-
-#if __ARM_NEON
-#include <arm_neon.h>
-#endif // __ARM_NEON
-
-// Fix for nnfw: comment out cpu.h
-//#include "cpu.h"
-
-namespace nnfw
-{
-namespace ncnn
-{
-
-void Mat::substract_mean_normalize(const float *mean_vals, const float *norm_vals)
-{
- int size = w * h;
-
- if (mean_vals && !norm_vals)
- {
-// substract mean only
-#pragma omp parallel for
- for (int q = 0; q < c; q++)
- {
- float *ptr = channel(q); // data + cstep * q;
- const float mean = mean_vals[q];
-
-#if __ARM_NEON
- int nn = size >> 2;
- int remain = size - (nn << 2);
-#else
- int remain = size;
-#endif // __ARM_NEON
-
-#if __ARM_NEON
-#if __aarch64__
- if (nn > 0)
- {
- asm volatile("dup v1.4s, %w4 \n"
- "0: \n"
- "prfm pldl1keep, [%1, #128] \n"
- "ld1 {v0.4s}, [%1] \n"
- "fsub v0.4s, v0.4s, v1.4s \n"
- "subs %w0, %w0, #1 \n"
- "st1 {v0.4s}, [%1], #16 \n"
- "bne 0b \n"
- : "=r"(nn), // %0
- "=r"(ptr) // %1
- : "0"(nn), "1"(ptr),
- "r"(mean) // %4
- : "cc", "memory", "v0", "v1");
- }
-#else
- if (nn > 0)
- {
- asm volatile("vdup.f32 q1, %4 \n"
- "0: \n"
- "pld [%1, #128] \n"
- "vld1.f32 {d0-d1}, [%1 :128] \n"
- "vsub.f32 q0, q0, q1 \n"
- "subs %0, #1 \n"
- "vst1.f32 {d0-d1}, [%1 :128]! \n"
- "bne 0b \n"
- : "=r"(nn), // %0
- "=r"(ptr) // %1
- : "0"(nn), "1"(ptr),
- "r"(mean) // %4
- : "cc", "memory", "q0", "q1");
- }
-#endif // __aarch64__
-#endif // __ARM_NEON
- for (; remain > 0; remain--)
- {
- *ptr -= mean;
- ptr++;
- }
- }
- }
- else if (!mean_vals && norm_vals)
- {
-// normalize only
-#pragma omp parallel for
- for (int q = 0; q < c; q++)
- {
- float *ptr = channel(q); // data + cstep * q;
- const float norm = norm_vals[q];
-
-#if __ARM_NEON
- int nn = size >> 2;
- int remain = size - (nn << 2);
-#else
- int remain = size;
-#endif // __ARM_NEON
-
-#if __ARM_NEON
-#if __aarch64__
- if (nn > 0)
- {
- asm volatile("dup v1.4s, %w4 \n"
- "0: \n"
- "prfm pldl1keep, [%1, #128] \n"
- "ld1 {v0.4s}, [%1] \n"
- "fmul v0.4s, v0.4s, v1.4s \n"
- "subs %w0, %w0, #1 \n"
- "st1 {v0.4s}, [%1], #16 \n"
- "bne 0b \n"
- : "=r"(nn), // %0
- "=r"(ptr) // %1
- : "0"(nn), "1"(ptr),
- "r"(norm) // %4
- : "cc", "memory", "v0", "v1");
- }
-#else
- if (nn > 0)
- {
- asm volatile("vdup.f32 q1, %4 \n"
- "0: \n"
- "pld [%1, #128] \n"
- "vld1.f32 {d0-d1}, [%1 :128] \n"
- "vmul.f32 q0, q0, q1 \n"
- "subs %0, #1 \n"
- "vst1.f32 {d0-d1}, [%1 :128]! \n"
- "bne 0b \n"
- : "=r"(nn), // %0
- "=r"(ptr) // %1
- : "0"(nn), "1"(ptr),
- "r"(norm) // %4
- : "cc", "memory", "q0", "q1");
- }
-#endif // __aarch64__
-#endif // __ARM_NEON
- for (; remain > 0; remain--)
- {
- *ptr *= norm;
- ptr++;
- }
- }
- }
- else if (mean_vals && norm_vals)
- {
-// substract mean and normalize
-#pragma omp parallel for
- for (int q = 0; q < c; q++)
- {
- float *ptr = channel(q); // data + cstep * q;
- const float mean = mean_vals[q];
- const float norm = norm_vals[q];
-
-#if __ARM_NEON
- int nn = size >> 2;
- int remain = size - (nn << 2);
-#else
- int remain = size;
-#endif // __ARM_NEON
-
-#if __ARM_NEON
-#if __aarch64__
- if (nn > 0)
- {
- asm volatile("dup v1.4s, %w4 \n"
- "dup v2.4s, %w5 \n"
- "0: \n"
- "prfm pldl1keep, [%1, #128] \n"
- "ld1 {v0.4s}, [%1] \n"
- "fsub v0.4s, v0.4s, v1.4s \n"
- "fmul v0.4s, v0.4s, v2.4s \n"
- "subs %w0, %w0, #1 \n"
- "st1 {v0.4s}, [%1], #16 \n"
- "bne 0b \n"
- : "=r"(nn), // %0
- "=r"(ptr) // %1
- : "0"(nn), "1"(ptr),
- "r"(mean), // %4
- "r"(norm) // %5
- : "cc", "memory", "v0", "v1", "v2");
- }
-#else
- if (nn > 0)
- {
- asm volatile("vdup.f32 q1, %4 \n"
- "vdup.f32 q2, %5 \n"
- "0: \n"
- "pld [%1, #128] \n"
- "vld1.f32 {d0-d1}, [%1 :128] \n"
- "vsub.f32 q0, q0, q1 \n"
- "vmul.f32 q0, q0, q2 \n"
- "subs %0, #1 \n"
- "vst1.f32 {d0-d1}, [%1 :128]! \n"
- "bne 0b \n"
- : "=r"(nn), // %0
- "=r"(ptr) // %1
- : "0"(nn), "1"(ptr),
- "r"(mean), // %4
- "r"(norm) // %5
- : "cc", "memory", "q0", "q1", "q2");
- }
-#endif // __aarch64__
-#endif // __ARM_NEON
- for (; remain > 0; remain--)
- {
- *ptr = (*ptr - mean) * norm;
- ptr++;
- }
- }
- }
-}
-
-// convert half precision floating point to float
-static float half2float(unsigned short value)
-{
- // 1 : 5 : 10
- unsigned short sign = (value & 0x8000) >> 15;
- unsigned short exponent = (value & 0x7c00) >> 10;
- unsigned short significand = value & 0x03FF;
-
- // fprintf(stderr, "%d %d %d\n", sign, exponent, significand);
-
- // 1 : 8 : 23
- union {
- unsigned int u;
- float f;
- } tmp;
- if (exponent == 0)
- {
- if (significand == 0)
- {
- // zero
- tmp.u = (sign << 31);
- }
- else
- {
- // denormal
- exponent = 0;
- // find non-zero bit
- while ((significand & 0x200) == 0)
- {
- significand <<= 1;
- exponent++;
- }
- significand <<= 1;
- significand &= 0x3FF;
- tmp.u = (sign << 31) | ((-exponent + (-15 + 127)) << 23) | (significand << 13);
- }
- }
- else if (exponent == 0x1F)
- {
- // infinity or NaN
- tmp.u = (sign << 31) | (0xFF << 23) | (significand << 13);
- }
- else
- {
- // normalized
- tmp.u = (sign << 31) | ((exponent + (-15 + 127)) << 23) | (significand << 13);
- }
-
- return tmp.f;
-}
-
-Mat Mat::from_float16(const unsigned short *data, int size)
-{
- Mat m(size);
- if (m.empty())
- return m;
-
- float *ptr = m; //.data;
-
-#if __ARM_NEON && (__ARM_FP & 2)
- // Fix for nnfw: Alway support vfpv4
- // int nn = cpu_support_arm_vfpv4() ? size >> 2 : 0;
- int nn = size >> 2;
- int remain = size - (nn << 2);
-#else
- int remain = size;
-#endif // __ARM_NEON
-
-#if __ARM_NEON && (__ARM_FP & 2)
-#if __aarch64__
- if (nn > 0)
- {
- asm volatile("0: \n"
- "ld1 {v0.4h}, [%1], #8 \n"
- "fcvtl v1.4s, v0.4h \n"
- "subs %w0, %w0, #1 \n"
- "st1 {v1.4s}, [%2], #16 \n"
- "bne 0b \n"
- : "=r"(nn), // %0
- "=r"(data), // %1
- "=r"(ptr) // %2
- : "0"(nn), "1"(data), "2"(ptr)
- : "cc", "memory", "v0", "v1");
- }
-#else
- if (nn > 0)
- {
- asm volatile("0: \n"
- "pld [%1, #64] \n"
- "vld1.s16 {d0}, [%1 :64]! \n"
- "vcvt.f32.f16 q1, d0 \n"
- "subs %0, #1 \n"
- "vst1.f32 {d2-d3}, [%2 :128]! \n"
- "bne 0b \n"
- : "=r"(nn), // %0
- "=r"(data), // %1
- "=r"(ptr) // %2
- : "0"(nn), "1"(data), "2"(ptr)
- : "cc", "memory", "q0", "q1");
- }
-#endif // __aarch64__
-#endif // __ARM_NEON
- for (; remain > 0; remain--)
- {
- *ptr = half2float(*data);
-
- data++;
- ptr++;
- }
-
- return m;
-}
-
-static void copy_make_border_image(const Mat &src, Mat &dst, int top, int left, int type, float v)
-{
- int w = dst.w;
- int h = dst.h;
-
- const float *ptr = src; //.data;
- float *outptr = dst; //.data;
-
- if (type == BORDER_CONSTANT)
- {
- int y = 0;
- // fill top
- for (; y < top; y++)
- {
- int x = 0;
- for (; x < w; x++)
- {
- outptr[x] = v;
- }
- outptr += w;
- }
- // fill center
- for (; y < (top + src.h); y++)
- {
- int x = 0;
- for (; x < left; x++)
- {
- outptr[x] = v;
- }
- if (src.w < 12)
- {
- for (; x < (left + src.w); x++)
- {
- outptr[x] = ptr[x - left];
- }
- }
- else
- {
- memcpy(outptr + left, ptr, src.w * sizeof(float));
- x += src.w;
- }
- for (; x < w; x++)
- {
- outptr[x] = v;
- }
- ptr += src.w;
- outptr += w;
- }
- // fill bottom
- for (; y < h; y++)
- {
- int x = 0;
- for (; x < w; x++)
- {
- outptr[x] = v;
- }
- outptr += w;
- }
- }
- else if (type == BORDER_REPLICATE)
- {
- int y = 0;
- // fill top
- for (; y < top; y++)
- {
- int x = 0;
- for (; x < left; x++)
- {
- outptr[x] = ptr[0];
- }
- if (src.w < 12)
- {
- for (; x < (left + src.w); x++)
- {
- outptr[x] = ptr[x - left];
- }
- }
- else
- {
- memcpy(outptr + left, ptr, src.w * sizeof(float));
- x += src.w;
- }
- for (; x < w; x++)
- {
- outptr[x] = ptr[src.w - 1];
- }
- outptr += w;
- }
- // fill center
- for (; y < (top + src.h); y++)
- {
- int x = 0;
- for (; x < left; x++)
- {
- outptr[x] = ptr[0];
- }
- if (src.w < 12)
- {
- for (; x < (left + src.w); x++)
- {
- outptr[x] = ptr[x - left];
- }
- }
- else
- {
- memcpy(outptr + left, ptr, src.w * sizeof(float));
- x += src.w;
- }
- for (; x < w; x++)
- {
- outptr[x] = ptr[src.w - 1];
- }
- ptr += src.w;
- outptr += w;
- }
- // fill bottom
- ptr -= src.w;
- for (; y < h; y++)
- {
- int x = 0;
- for (; x < left; x++)
- {
- outptr[x] = ptr[0];
- }
- if (src.w < 12)
- {
- for (; x < (left + src.w); x++)
- {
- outptr[x] = ptr[x - left];
- }
- }
- else
- {
- memcpy(outptr + left, ptr, src.w * sizeof(float));
- x += src.w;
- }
- for (; x < w; x++)
- {
- outptr[x] = ptr[src.w - 1];
- }
- outptr += w;
- }
- }
-}
-
-#if defined(_MEMORY_TO_TIME_) && defined(_TIME_TO_MEMORY_)
-static void copy_make_border_image_inplace(const Mat &src, Mat &dst, int top, int left, int type,
- float v)
-{
- int w = dst.w;
- int h = dst.h;
-
- const float *ptr = src;
- float *outptr = dst;
-
- if (type == BORDER_CONSTANT)
- {
- // fill bottom
- int y = src.h + top;
- outptr += y * w;
- for (; y < h; y++)
- {
- int x = 0;
- for (; x < w; x++)
- {
- outptr[x] = v;
- }
- outptr += w;
- }
-
- // fill center
- y = src.h + top - 1;
- outptr = dst;
- outptr += y * w;
- ptr += (src.h - 1) * src.w;
-
- for (; y >= top; y--)
- {
- int x = left + src.w;
- for (; x < w; x++)
- {
- outptr[x] = v;
- }
-
- x = left + src.w - 1;
-
- for (; x >= left; x--)
- {
- outptr[x] = ptr[x - left];
- }
-
- for (x = 0; x < left; x++)
- {
- outptr[x] = v;
- }
- ptr -= src.w;
- outptr -= w;
- }
-
- // fill top
- y = 0;
- outptr = dst;
- for (; y < top; y++)
- {
- int x = 0;
- for (; x < w; x++)
- {
- outptr[x] = v;
- }
- outptr += w;
- }
- }
-}
-#endif // _MEMORY_TO_TIME_ && _TIME_TO_MEMORY_
-
-void copy_make_border(const Mat &src, Mat &dst, int top, int bottom, int left, int right, int type,
- float v)
-{
- int w = src.w + left + right;
- int h = src.h + top + bottom;
-
- if (w == src.w && h == src.h)
- {
- dst = src;
- return;
- }
-
- if (src.dims == 2)
- {
- dst.create(w, h);
- if (dst.empty())
- return;
- copy_make_border_image(src, dst, top, left, type, v);
- }
- else if (src.dims == 3)
- {
- int channels = src.c;
- dst.create(w, h, channels);
- if (dst.empty())
- return;
-
- if (src.data != dst.data)
- {
-// unroll image channel
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const Mat m = src.channel(q);
- Mat borderm = dst.channel(q);
-
- copy_make_border_image(m, borderm, top, left, type, v);
- }
- }
- else
- {
-#if defined(_MEMORY_TO_TIME_) && defined(_TIME_TO_MEMORY_)
- for (int q = channels - 1; q >= 0; q--)
- {
- Mat m = src.channel(q);
- Mat borderm = dst.channel(q);
- copy_make_border_image_inplace(m, borderm, top, left, type, v);
- }
-#else
-// unroll image channel
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const Mat m = src.channel(q);
- Mat borderm = dst.channel(q);
-
- copy_make_border_image(m, borderm, top, left, type, v);
- }
-#endif // _MEMORY_TO_TIME_ && _TIME_TO_MEMORY_
- }
- }
-}
-
-static void copy_cut_border_image(const Mat &src, Mat &dst, int top, int left)
-{
- int w = dst.w;
- int h = dst.h;
-
- const float *ptr = src.row(top) + left; //.data + src.w * top + left;
- float *outptr = dst; //.data;
-
- for (int y = 0; y < h; y++)
- {
- if (w < 12)
- {
- for (int x = 0; x < w; x++)
- {
- outptr[x] = ptr[x];
- }
- }
- else
- {
- memcpy(outptr, ptr, w * sizeof(float));
- }
- outptr += w;
- ptr += src.w;
- }
-}
-
-void copy_cut_border(const Mat &src, Mat &dst, int top, int bottom, int left, int right)
-{
- int w = src.w - left - right;
- int h = src.h - top - bottom;
-
-#ifndef _MEMORY_TO_TIME_
- if (w == src.w && h == src.h)
- {
- dst = src;
- return;
- }
-#endif
-
- if (src.dims == 2)
- {
- dst.create(w, h);
- if (dst.empty())
- return;
-
- copy_cut_border_image(src, dst, top, left);
- }
- else if (src.dims == 3)
- {
- int channels = src.c;
-
- dst.create(w, h, channels);
- if (dst.empty())
- return;
-
-#if !defined(_MEMORY_TO_TIME_) || !defined(_TIME_TO_MEMORY_)
-// unroll image channel
-#pragma omp parallel for
-#endif
- for (int q = 0; q < channels; q++)
- {
- const Mat m = src.channel(q);
- Mat cutm = dst.channel(q);
-
- copy_cut_border_image(m, cutm, top, left);
- }
- }
-}
-
-static void resize_bilinear_image(const Mat &src, Mat &dst, int w, int h)
-{
- double scale_x = (double)src.w / w;
- double scale_y = (double)src.h / h;
-
- int *buf = new int[w + h + w * 2 + h * 2];
-
- int *xofs = buf; // new int[w];
- int *yofs = buf + w; // new int[h];
-
- float *alpha = (float *)(buf + w + h); // new float[w * 2];
- float *beta = (float *)(buf + w + h + w * 2); // new float[h * 2];
-
- float fx;
- float fy;
- int sx;
- int sy;
-
- for (int dx = 0; dx < w; dx++)
- {
- fx = (float)((dx + 0.5) * scale_x - 0.5);
- sx = fx; // cvFloor(fx);
- fx -= sx;
-
- if (sx >= src.w - 1)
- {
- sx = src.w - 2;
- fx = 1.f;
- }
-
- xofs[dx] = sx;
-
- alpha[dx * 2] = 1.f - fx;
- alpha[dx * 2 + 1] = fx;
- }
-
- for (int dy = 0; dy < h; dy++)
- {
- fy = (float)((dy + 0.5) * scale_y - 0.5);
- sy = fy; // cvFloor(fy);
- fy -= sy;
-
- if (sy >= src.h - 1)
- {
- sy = src.h - 2;
- fy = 1.f;
- }
-
- yofs[dy] = sy;
-
- beta[dy * 2] = 1.f - fy;
- beta[dy * 2 + 1] = fy;
- }
-
- // loop body
- Mat rowsbuf0(w + 1);
- Mat rowsbuf1(w + 1);
- float *rows0 = rowsbuf0;
- float *rows1 = rowsbuf1;
-
- int prev_sy1 = -1;
-
- for (int dy = 0; dy < h; dy++)
- {
- int sy = yofs[dy];
-
- if (sy == prev_sy1)
- {
- // hresize one row
- float *rows0_old = rows0;
- rows0 = rows1;
- rows1 = rows0_old;
- const float *S1 = src.row(sy + 1);
-
- const float *alphap = alpha;
- float *rows1p = rows1;
- int dx = 0;
-#if __ARM_NEON
- for (; dx + 1 < w; dx += 2)
- {
- int sx = xofs[dx];
- int sxn = xofs[dx + 1];
- const float *S1p = S1 + sx;
- const float *S1np = S1 + sxn;
-
- float32x4_t _a = vld1q_f32(alphap);
- float32x2_t _S1 = vld1_f32(S1p);
- float32x2_t _S1n = vld1_f32(S1np);
-
- float32x4_t _S1S1n = vcombine_f32(_S1, _S1n);
- float32x4_t _ms1 = vmulq_f32(_S1S1n, _a);
- float32x2_t _rows1 = vpadd_f32(vget_low_f32(_ms1), vget_high_f32(_ms1));
-
- vst1_f32(rows1p + dx, _rows1);
-
- alphap += 4;
- }
-#endif // __ARM_NEON
- for (; dx < w; dx++)
- {
- int sx = xofs[dx];
- const float *S1p = S1 + sx;
-
- float a0 = alphap[0];
- float a1 = alphap[1];
- rows1p[dx] = S1p[0] * a0 + S1p[1] * a1;
-
- alphap += 2;
- }
- }
- else
- {
- // hresize two rows
- const float *S0 = src.row(sy);
- const float *S1 = src.row(sy + 1);
-
- const float *alphap = alpha;
- float *rows0p = rows0;
- float *rows1p = rows1;
- int dx = 0;
-#if __ARM_NEON
- for (; dx + 1 < w; dx += 2)
- {
- int sx = xofs[dx];
- int sxn = xofs[dx + 1];
- const float *S0p = S0 + sx;
- const float *S1p = S1 + sx;
- const float *S0np = S0 + sxn;
- const float *S1np = S1 + sxn;
-
- float32x4_t _a = vld1q_f32(alphap);
- float32x2_t _S0 = vld1_f32(S0p);
- float32x2_t _S1 = vld1_f32(S1p);
- float32x2_t _S0n = vld1_f32(S0np);
- float32x2_t _S1n = vld1_f32(S1np);
-
- float32x4_t _S0S0n = vcombine_f32(_S0, _S0n);
- float32x4_t _S1S1n = vcombine_f32(_S1, _S1n);
- float32x4_t _ms0 = vmulq_f32(_S0S0n, _a);
- float32x4_t _ms1 = vmulq_f32(_S1S1n, _a);
- float32x2_t _rows0 = vpadd_f32(vget_low_f32(_ms0), vget_high_f32(_ms0));
- float32x2_t _rows1 = vpadd_f32(vget_low_f32(_ms1), vget_high_f32(_ms1));
-
- vst1_f32(rows0p + dx, _rows0);
- vst1_f32(rows1p + dx, _rows1);
-
- alphap += 4;
- }
-#endif // __ARM_NEON
- for (; dx < w; dx++)
- {
- int sx = xofs[dx];
- const float *S0p = S0 + sx;
- const float *S1p = S1 + sx;
-
- float a0 = alphap[0];
- float a1 = alphap[1];
- rows0p[dx] = S0p[0] * a0 + S0p[1] * a1;
- rows1p[dx] = S1p[0] * a0 + S1p[1] * a1;
-
- alphap += 2;
- }
- }
-
- prev_sy1 = sy + 1;
-
- // vresize
- float b0 = beta[0];
- float b1 = beta[1];
-
- float *rows0p = rows0;
- float *rows1p = rows1;
- float *Dp = dst.row(dy);
-
-#if __ARM_NEON
- int nn = w >> 3;
-#else
- int nn = 0;
-#endif
- int remain = w - (nn << 3);
-
-#if __ARM_NEON
- float32x4_t _b0 = vdupq_n_f32(b0);
- float32x4_t _b1 = vdupq_n_f32(b1);
- for (; nn > 0; nn--)
- {
- float32x4_t _rows0 = vld1q_f32(rows0p);
- float32x4_t _rows1 = vld1q_f32(rows1p);
-
- float32x4_t _D = vmulq_f32(_rows0, _b0);
- _D = vmlaq_f32(_D, _rows1, _b1);
-
- vst1q_f32(Dp, _D);
-
- float32x4_t _rows0n = vld1q_f32(rows0p + 4);
- float32x4_t _rows1n = vld1q_f32(rows1p + 4);
-
- float32x4_t _Dn = vmulq_f32(_rows0n, _b0);
- _Dn = vmlaq_f32(_Dn, _rows1n, _b1);
-
- vst1q_f32(Dp + 4, _Dn);
-
- Dp += 8;
- rows0p += 8;
- rows1p += 8;
- }
-#endif // __ARM_NEON
- for (; remain; --remain)
- {
- // D[x] = rows0[x]*b0 + rows1[x]*b1;
- *Dp++ = *rows0p++ * b0 + *rows1p++ * b1;
- }
-
- beta += 2;
- }
-
- delete[] buf;
-}
-
-void resize_bilinear(const Mat &src, Mat &dst, int w, int h)
-{
- if (w == src.w && h == src.h)
- {
- dst = src;
- return;
- }
-
- if (src.dims == 2)
- {
- dst.create(w, h);
- if (dst.empty())
- return;
-
- resize_bilinear_image(src, dst, w, h);
- }
- else if (src.dims == 3)
- {
- int channels = src.c;
-
- dst.create(w, h, channels);
- if (dst.empty())
- return;
-
-// unroll image channel
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
- const Mat m = src.channel(q);
- Mat resizem = dst.channel(q);
-
- resize_bilinear_image(m, resizem, w, h);
- }
- }
-}
-
-} // namespace ncnn
-} // namespace nnfw
diff --git a/compute/ncnn/src/srcn/common.h b/compute/ncnn/src/srcn/common.h
deleted file mode 100644
index 778a17a80..000000000
--- a/compute/ncnn/src/srcn/common.h
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_SRCN_COMMON_H__
-#define __NNFW_SRCN_COMMON_H__
-
-#include <string.h>
-#include <limits>
-#include <arm_neon.h>
-
-#include "ncnn/srcn/conv_type.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-#define sizeof_RhsScalar 4
-#define sizeof_LhsScalar 4
-#define sizeof_ResScalar 4
-
-#define MIN(a, b) (a) > (b) ? (b) : (a)
-#define MAX(a, b) (a) > (b) ? (a) : (b)
-
-enum shardType_t
-{
- shardByCol = 0,
- shardByRow
-};
-
-#ifdef TIZEN
-#define L1_CACHE_SIZE (16536 * 2)
-#define L2_CACHE_SIZE (524288 * 2)
-#define L3_CACHE_SIZE (0) // no L3
-#define MAX_K (512)
-// single-thread
-#define GEN_COL (1440)
-// multi-threads
-#define MAX_COL (90)
-#define MIN_COL (32)
-#elif defined ANDROID
-#define L1_CACHE_SIZE (16536 * 4)
-#define L2_CACHE_SIZE (524288 * 8)
-#define L3_CACHE_SIZE (0) //(524288 * 8) //no L3
-#define MAX_K (512 * 2)
-// single-thread
-#define GEN_COL (1440)
-// multi-threads
-#if __aarch64__
-#define MAX_COL (1024)
-#else
-#define MAX_COL (90)
-#endif
-#define MIN_COL (32)
-#endif
-
-enum
-{
- USE_COMMON_KENEL = 0,
- USE_12BIT_KERNEL,
- USE_NONZERO_KERENL
-};
-
-template <typename T> static T divup(const T &x, const T &y)
-{
- return static_cast<T>((x + y - 1) / y);
-}
-
-#ifdef NCNN
-static inline size_t alignSize(size_t sz, int n) { return (sz + n - 1) / n * n; }
-
-static inline size_t alignBy2(size_t sz) { return (sz + 1) & -2; }
-#endif // NCNN
-
-static inline int32_t BitNot(int32_t a) { return ~a; }
-
-static inline int32_t MaskIfNonZero(int32_t a)
-{
- static int32_t zero = 0;
- return a ? BitNot(zero) : zero;
-}
-
-static inline int32_t BitAnd(int32_t a, int32_t b) { return a & b; }
-
-static inline int32_t ShiftRight(int32_t a, int offset) { return a >> offset; }
-
-static inline int32_t MaskIfLessThan(int32_t a, int32_t b) { return MaskIfNonZero(a < b); }
-
-static inline int32_t MaskIfGreaterThan(int32_t a, int32_t b) { return MaskIfNonZero(a > b); }
-
-static inline int32_t Add(int32_t a, int32_t b) { return a + b; }
-
-static inline int32_t RoundingDivideByPOT(int32_t x, int exponent)
-{
- const int32_t mask = (1ll << exponent) - 1;
- const int32_t zero = 0;
- const int32_t one = 1;
- const int32_t remainder = BitAnd(x, mask);
- const int32_t threshold = Add(ShiftRight(mask, 1), BitAnd(MaskIfLessThan(x, zero), one));
- return Add(ShiftRight(x, exponent), BitAnd(MaskIfGreaterThan(remainder, threshold), one));
-}
-static inline int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)
-{
- bool overflow = a == b && a == std::numeric_limits<int32_t>::min();
- int64_t a_64(a);
- int64_t b_64(b);
- int64_t ab_64 = a_64 * b_64;
- int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
- int32_t ab_x2_high32 = static_cast<int32_t>((ab_64 + nudge) / (1ll << 31));
- return overflow ? std::numeric_limits<int32_t>::max() : ab_x2_high32;
-}
-
-static inline int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier,
- int shift)
-{
- int left_shift = shift > 0 ? shift : 0;
- int right_shift = shift > 0 ? 0 : -shift;
- return RoundingDivideByPOT(
- SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift);
-}
-
-static inline int32x4_t SaturatingRoundingDoublingHighMulV(int32x4_t a, int32x4_t b)
-{
- return vqrdmulhq_s32(a, b);
-}
-
-static inline int32x4_t RoundingDivideByPOTV(int32x4_t x, int exponent)
-{
- const int32x4_t shift_vec = vdupq_n_s32(-exponent);
- const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift_vec), 31);
- const int32x4_t fixed_up_x = vqaddq_s32(x, fixup);
- return vrshlq_s32(fixed_up_x, shift_vec);
-}
-
-static inline int32x4_t MultiplyByQuantizedMultiplierV(int32x4_t x, int32_t quantized_multiplier,
- int shift)
-{
- int left_shift = shift > 0 ? shift : 0;
- int right_shift = shift > 0 ? 0 : -shift;
- return RoundingDivideByPOTV(
- SaturatingRoundingDoublingHighMulV(vrshlq_s32(x, vdupq_n_s32(left_shift)),
- vdupq_n_s32(quantized_multiplier)),
- right_shift);
-}
-
-} // namespace srcn
-} // namespace nnfw
-
-#endif // __NNFW_SRCN_COMMON_H__
diff --git a/compute/ncnn/src/srcn/conv_sgemm_multithreads.cc b/compute/ncnn/src/srcn/conv_sgemm_multithreads.cc
deleted file mode 100644
index 21083f677..000000000
--- a/compute/ncnn/src/srcn/conv_sgemm_multithreads.cc
+++ /dev/null
@@ -1,483 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#include "ncnn/srcn/conv_type.h"
-#include "common.h"
-#include "sgemm_kernel.h"
-#include "sgemm_pack.h"
-#include "conv_sgemm_multithreads.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-void conv_sgemm_multithreads::param_init()
-{
-#if __aarch64__
- if (conv_type_ == row_major)
- {
- mr_ = 8;
- nr_ = 12;
- }
- else if (conv_type_ == col_major)
- {
-#ifdef BATCH_DILATION_FIX
- if (out_mat_.n > 1)
- {
-
- mr_ = 24;
- nr_ = 4;
- }
- else
-#endif // BATCH_DILATION_FIX
- {
- if (m_ > n_)
- {
- mr_ = 24;
- nr_ = 4;
- }
- else
- {
- mr_ = 12;
- nr_ = 8;
- }
- }
- }
-#else // __aarch64__
- if (conv_type_ == row_major)
- {
- mr_ = 6;
- nr_ = 8;
- }
- else if (conv_type_ == col_major)
- {
- mr_ = 8;
- nr_ = 6;
- }
-#endif // __aarch64__
- int col = n_;
-
- if (m_ > n_)
- {
- shard_type_ = shardByRow;
- col = m_;
- }
- else
- {
- shard_type_ = shardByCol;
- }
-
- int th_base = divup(col, num_threads_);
-
- th_base = MIN(MAX(th_base, MIN_COL), MAX_COL);
-
- int k_div = (nr_ * sizeof_RhsScalar);
- int k_sub = (mr_ * nr_ * sizeof_ResScalar);
-
- const int k_cache = MIN(divup((int)(L1_CACHE_SIZE - k_sub), (int)k_div * 2), MAX_K);
- bk_ = MIN(k_cache, k_);
-
- if (shard_type_ == shardByCol)
- {
- int m_sub = (bk_ * nr_ * sizeof_RhsScalar);
- int m_div = (sizeof_LhsScalar * bk_ * 2 * num_threads_);
- if (L3_CACHE_SIZE)
- m_div = (sizeof_LhsScalar * bk_ * 2);
- int m_cache = divup((L2_CACHE_SIZE - m_sub), m_div);
- bm_ = MIN(m_cache, m_);
-
- bn_ = MIN(th_base, n_);
- if (L3_CACHE_SIZE)
- {
- int n_sub = (bk_ * bm_ * sizeof_RhsScalar);
- int n_div = (sizeof_LhsScalar * bk_ * 2 * num_threads_);
- int n_cache = divup((L3_CACHE_SIZE - n_sub), n_div);
- bn_ = MIN(n_cache, bn_);
- }
- }
- else
- {
- int n_sub = (bk_ * mr_ * sizeof_LhsScalar);
- int n_div = (sizeof_LhsScalar * bk_ * 2 * num_threads_);
- if (L3_CACHE_SIZE)
- n_div = (sizeof_LhsScalar * bk_ * 2);
- int n_cache = divup((L2_CACHE_SIZE - n_sub), n_div);
- bn_ = MIN(n_cache, n_);
-
- bm_ = MIN(th_base, m_);
- if (L3_CACHE_SIZE)
- {
- int m_sub = (bk_ * bn_ * sizeof_RhsScalar);
- int m_div = (sizeof_LhsScalar * bk_ * 2 * num_threads_);
- int m_cache = divup((L3_CACHE_SIZE - m_sub), m_div);
- bm_ = MIN(m_cache, bm_);
- }
- }
-
- nm_ = divup(m_, bm_);
- nn_ = divup(n_, bn_);
- nk_ = divup(k_, bk_);
-
- rm_ = m_ % bm_;
- rn_ = n_ % bn_;
- rk_ = k_ % bk_;
-}
-
-conv_sgemm_multithreads::conv_sgemm_multithreads(const convMat_t &in_mat,
- const convMat_t &weights_mat, convMat_t &out_mat,
- const convParams_t &in_param, int num_threads,
- convType_t conv_type)
-
- : in_mat_(in_mat), weights_mat_(weights_mat), out_mat_(out_mat), in_param_(in_param),
- conv_type_(conv_type), num_threads_(num_threads)
-{
- m_ = out_mat_.c;
-#ifdef NCNN
-#ifdef WITH_DPU
- np_ = out_mat_.n * alignSize(out_mat_.h * out_mat_.w, 16 / sizeof(float));
- n_ = (np_ + 1) / 2;
-#else // WITH_DPU
- n_ = out_mat_.n * alignSize(out_mat_.h * out_mat_.w, 16 / sizeof(float));
-#endif // WITH_DPU
-#else // NCNN
-#ifdef WITH_DPU
- np_ = out_mat_.n * out_mat_.w * out_mat_.h;
- n_ = (np_ + 1) / 2;
-#else // WITH_DPU
- n_ = out_mat_.n * out_mat_.w * out_mat_.h;
-#endif // WITH_DPU
-#endif // NCNN
- k_ = in_param_.kernel_h * in_param_.kernel_w * in_mat.c;
-
- param_init();
-
- int lhs_stride = (bm_ + mr_ - 1) / mr_ * mr_ * bk_;
- int rhs_stride = (bn_ + nr_ - 1) / nr_ * nr_ * bk_;
-
- if (shard_type_ == shardByCol)
- {
- plhs_buffer_ = new float[lhs_stride * 1 * nm_];
- prhs_buffer_ = new float[rhs_stride * num_threads_];
- }
- else
- {
- plhs_buffer_ = new float[lhs_stride * num_threads_];
- prhs_buffer_ = new float[rhs_stride * 1 * nn_];
- }
-
- if (plhs_buffer_ == NULL || prhs_buffer_ == NULL)
- {
- error_ = 1;
- }
-
- if (in_param_.kernel_w != 1 || in_param_.kernel_h != 1 || in_param_.stride_w != 1 ||
- in_param_.stride_h != 1 || in_param_.padding != 0)
- {
- need_im2col_ = 1;
- }
- else
- {
- need_im2col_ = 0;
- }
-
- omp_set_num_threads(num_threads_);
-
- error_ = 0;
-}
-
-conv_sgemm_multithreads::~conv_sgemm_multithreads()
-{
- if (plhs_buffer_)
- delete[] plhs_buffer_;
- if (prhs_buffer_)
- delete[] prhs_buffer_;
-}
-
-void conv_sgemm_multithreads::run()
-{
- if (error_)
- return;
-
- if (shard_type_ == shardByCol && conv_type_ == col_major)
- {
- compute_colmajor_colshard();
- }
- else if (shard_type_ == shardByRow && conv_type_ == col_major)
- {
- compute_colmajor_rowshard();
- }
- else if (shard_type_ == shardByCol && conv_type_ == row_major)
- {
- compute_rowmajor_colshard();
- }
- else if (shard_type_ == shardByRow && conv_type_ == row_major)
- {
- compute_rowmajor_rowshard();
- }
-}
-
-void conv_sgemm_multithreads::compute_rowmajor_colshard()
-{
- int lhs_stride = (bm_ + mr_ - 1) / mr_ * mr_ * bk_;
- int rhs_stride = (bn_ + nr_ - 1) / nr_ * nr_ * bk_;
-
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
-#pragma omp parallel for
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _pack_rowmajor_notrans_lhs(mr_, bm, bk, k_, &weights_mat_.data[i * bm_ * k_ + l * bk_],
- &plhs_buffer_[i * lhs_stride]);
- }
-
-#pragma omp parallel for
- for (int j = 0; j < nn_; j++)
- {
- int thread_num = omp_get_thread_num();
- // float *plhs_ptr = &plhs_buffer_[lhs_stride * thread_num];
- float *prhs_ptr = &prhs_buffer_[rhs_stride * thread_num];
-
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- if (need_im2col_)
- {
- if (out_mat_.n == 1)
- {
- _pack_rowmajor_image_rhs(nr_, bn, bk, l * bk_, j * bn_, const_cast<convMat_t *>(&in_mat_),
- &out_mat_, const_cast<convParams_t *>(&in_param_), prhs_ptr);
- }
- else
- {
- _pack_rowmajor_image_rhs_batch(nr_, bn, bk, l * bk_, j * bn_,
- const_cast<convMat_t *>(&in_mat_), &out_mat_,
- const_cast<convParams_t *>(&in_param_), prhs_ptr);
- }
- }
- else
- {
-#ifdef WITH_DPU
- _pack_rowmajor_notrans_rhs(nr_, bn, bk, np_, &in_mat_.data[n_ + l * bk_ * np_ + j * bn_],
- prhs_ptr);
-#else
- _pack_rowmajor_notrans_rhs(nr_, bn, bk, n_, &in_mat_.data[l * bk_ * n_ + j * bn_],
- prhs_ptr);
-#endif
- }
-
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
-#ifdef WITH_DPU
- _sgemm_rowmajor_macro_kernel_divnm(mr_, nr_, bm, bn, bk, &plhs_buffer_[i * lhs_stride],
- prhs_ptr, &out_mat_.data[n_ + i * bm_ * np_ + j * bn_],
- l, np_, bk);
-#else // WITH_DPU
- _sgemm_rowmajor_macro_kernel_divnm(mr_, nr_, bm, bn, bk, &plhs_buffer_[i * lhs_stride],
- prhs_ptr, &out_mat_.data[i * bm_ * n_ + j * bn_], l, n_,
- bk);
-#endif // WITH_DPU
- }
- }
- }
-}
-
-void conv_sgemm_multithreads::compute_rowmajor_rowshard()
-{
- int lhs_stride = (bm_ + mr_ - 1) / mr_ * mr_ * bk_;
- int rhs_stride = (bn_ + nr_ - 1) / nr_ * nr_ * bk_;
-
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
-#pragma omp parallel for
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
-
- if (need_im2col_)
- {
- if (out_mat_.n == 1)
- {
- _pack_rowmajor_image_rhs(nr_, bn, bk, l * bk_, j * bn_, const_cast<convMat_t *>(&in_mat_),
- &out_mat_, const_cast<convParams_t *>(&in_param_),
- &prhs_buffer_[j * rhs_stride]);
- }
- else
- {
- _pack_rowmajor_image_rhs_batch(
- nr_, bn, bk, l * bk_, j * bn_, const_cast<convMat_t *>(&in_mat_), &out_mat_,
- const_cast<convParams_t *>(&in_param_), &prhs_buffer_[j * rhs_stride]);
- }
- }
- else
- {
- _pack_rowmajor_notrans_rhs(nr_, bn, bk, n_, &in_mat_.data[l * bk_ * n_ + j * bn_],
- &prhs_buffer_[j * rhs_stride]);
- }
- }
-
-#pragma omp parallel for
- for (int i = 0; i < nm_; i++)
- {
- int thread_num = omp_get_thread_num();
- float *plhs_ptr = &plhs_buffer_[lhs_stride * thread_num];
-
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _pack_rowmajor_notrans_lhs(mr_, bm, bk, k_, &weights_mat_.data[i * bm_ * k_ + l * bk_],
- plhs_ptr);
-
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
-
- _sgemm_rowmajor_macro_kernel_divmn(mr_, nr_, bm, bn, bk, plhs_ptr,
- &prhs_buffer_[j * rhs_stride],
- &out_mat_.data[i * bm_ * n_ + j * bn_], l, n_, bk);
- }
- }
- }
-}
-
-void conv_sgemm_multithreads::compute_colmajor_colshard()
-{
- int lhs_stride = (bm_ + mr_ - 1) / mr_ * mr_ * bk_;
- int rhs_stride = (bn_ + nr_ - 1) / nr_ * nr_ * bk_;
-
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
-#pragma omp parallel for
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _pack_colmajor_notrans_lhs(mr_, bm, bk, m_, &weights_mat_.data[l * bk_ * m_ + i * bm_],
- &plhs_buffer_[i * lhs_stride]);
- }
-
-#pragma omp parallel for
- for (int j = 0; j < nn_; j++)
- {
- int thread_num = omp_get_thread_num();
- float *prhs_ptr = &prhs_buffer_[rhs_stride * thread_num];
-
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
-
- if (need_im2col_)
- {
- if (out_mat_.n == 1)
- {
- _pack_colmajor_image_rhs(nr_, bn, bk, l * bk_, j * bn_, const_cast<convMat_t *>(&in_mat_),
- &out_mat_, const_cast<convParams_t *>(&in_param_), prhs_ptr);
- }
- else
- {
- _pack_colmajor_image_rhs_batch(nr_, bn, bk, l * bk_, j * bn_,
- const_cast<convMat_t *>(&in_mat_), &out_mat_,
- const_cast<convParams_t *>(&in_param_), prhs_ptr);
- }
- }
- else
- {
- _pack_colmajor_notrans_rhs(nr_, bn, bk, k_, &in_mat_.data[j * bn_ * k_ + l * bk_],
- prhs_ptr);
- }
-
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _sgemm_colmajor_macro_kernel_divnm(mr_, nr_, bm, bn, bk, &plhs_buffer_[i * lhs_stride],
- prhs_ptr, &out_mat_.data[j * bn_ * m_ + i * bm_], l, m_,
- bk);
- }
- }
- }
-}
-
-void conv_sgemm_multithreads::compute_colmajor_rowshard()
-{
- int lhs_stride = (bm_ + mr_ - 1) / mr_ * mr_ * bk_;
- int rhs_stride = (bn_ + nr_ - 1) / nr_ * nr_ * bk_;
-
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
-#pragma omp parallel for
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
-
- if (need_im2col_)
- {
- if (out_mat_.n == 1)
- {
- _pack_colmajor_image_rhs(nr_, bn, bk, l * bk_, j * bn_, const_cast<convMat_t *>(&in_mat_),
- &out_mat_, const_cast<convParams_t *>(&in_param_),
- &prhs_buffer_[j * rhs_stride]);
- }
- else
- {
- _pack_colmajor_image_rhs_batch(
- nr_, bn, bk, l * bk_, j * bn_, const_cast<convMat_t *>(&in_mat_), &out_mat_,
- const_cast<convParams_t *>(&in_param_), &prhs_buffer_[j * rhs_stride]);
- }
- }
- else
- {
- _pack_colmajor_notrans_rhs(nr_, bn, bk, k_, &in_mat_.data[j * bn_ * k_ + l * bk_],
- &prhs_buffer_[j * rhs_stride]);
- }
- }
-
-#pragma omp parallel for
- for (int i = 0; i < nm_; i++)
- {
- int thread_num = omp_get_thread_num();
- float *plhs_ptr = &plhs_buffer_[lhs_stride * thread_num];
-
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _pack_colmajor_notrans_lhs(mr_, bm, bk, m_, &weights_mat_.data[l * bk_ * m_ + i * bm_],
- plhs_ptr);
-
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
-
- _sgemm_colmajor_macro_kernel_divmn(mr_, nr_, bm, bn, bk, plhs_ptr,
- &prhs_buffer_[j * rhs_stride],
- &out_mat_.data[j * bn_ * m_ + i * bm_], l, m_, bk);
- }
- }
- }
-}
-
-} // namespace srcn
-} // namespace nnfw
diff --git a/compute/ncnn/src/srcn/conv_sgemm_multithreads.h b/compute/ncnn/src/srcn/conv_sgemm_multithreads.h
deleted file mode 100644
index 9c9ce7437..000000000
--- a/compute/ncnn/src/srcn/conv_sgemm_multithreads.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_SRCN_CONV_SGEMM_MULTITHREADS_H__
-#define __NNFW_SRCN_CONV_SGEMM_MULTITHREADS_H__
-
-#include "ncnn/srcn/conv_type.h"
-#include "common.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-class conv_sgemm_multithreads
-{
-public:
- conv_sgemm_multithreads(const convMat_t &in_mat, const convMat_t &weights_mat, convMat_t &out_mat,
- const convParams_t &in_param, int num_threads, convType_t conv_type);
- ~conv_sgemm_multithreads();
-
- void run();
-
-private:
- void param_init();
-
- void compute_rowmajor_colshard();
- void compute_rowmajor_rowshard();
- void compute_colmajor_colshard();
- void compute_colmajor_rowshard();
-
- const convMat_t in_mat_;
- const convMat_t weights_mat_;
- convMat_t out_mat_;
- const convParams_t in_param_;
- convType_t conv_type_;
- int num_threads_;
-
- int m_;
- int n_;
-#ifdef WITH_DPU
- int np_;
-#endif
- int k_;
-
- int bm_;
- int bn_;
- int bk_;
-
- int rm_;
- int rn_;
- int rk_;
-
- int nm_;
- int nn_;
- int nk_;
-
- int mr_;
- int nr_;
-
- int need_im2col_;
- shardType_t shard_type_;
-
- float *prhs_buffer_;
- float *plhs_buffer_;
-
- int error_;
-};
-
-} // namespace srcn
-} // namespace nnfw
-
-#endif // __NNFW_SRCN_CONV_SGEMM_MULTITHREADS_H__
diff --git a/compute/ncnn/src/srcn/conv_sgemm_singlethread.cc b/compute/ncnn/src/srcn/conv_sgemm_singlethread.cc
deleted file mode 100644
index 4cbbf217f..000000000
--- a/compute/ncnn/src/srcn/conv_sgemm_singlethread.cc
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <stdexcept>
-
-#include "common.h"
-#include "sgemm_kernel.h"
-#include "sgemm_pack.h"
-#include "conv_sgemm_singlethread.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-void conv_sgemm_singlethread::param_init()
-{
- if (n_ > 3 * m_)
- {
- shard_type_ = shardByRow;
- }
- else
- {
- shard_type_ = shardByCol;
- }
-
-#if __aarch64__
- if (conv_type_ == row_major)
- {
- if (shard_type_ == shardByRow)
- {
- mr_ = 8;
- nr_ = 12;
- }
- else
- {
- mr_ = 12;
- nr_ = 8;
- }
- }
- else if (conv_type_ == col_major)
- {
-#ifndef BATCH_DILATION_FIX
- mr_ = 12;
- nr_ = 8;
-#else // BATCH_DILATION_FIX
- // TODO: batch(dilation) + inw * inh
- if (out_mat_.n > 1)
- {
- mr_ = 24;
- nr_ = 4;
- }
- else
- {
- mr_ = 12;
- nr_ = 8;
- }
-#endif // BATCH_DILATION_FIX
- }
-#else // __aarch64__
- if (conv_type_ == row_major)
- {
- mr_ = 6;
- nr_ = 8;
- }
- else if (conv_type_ == col_major)
- {
- mr_ = 8;
- nr_ = 6;
- }
-#endif // __aarch64__
-
- int k_div = (nr_ * sizeof_RhsScalar);
- int k_sub = (mr_ * nr_ * sizeof_ResScalar);
-
- const int k_cache = MIN(divup((int)(L1_CACHE_SIZE - k_sub), (int)k_div), MAX_K);
- bk_ = MIN(k_cache, k_);
-
- if (shard_type_ == shardByCol)
- {
- int m_sub = (bk_ * nr_ * sizeof_RhsScalar);
- int m_cache = divup((L2_CACHE_SIZE - m_sub), (sizeof_LhsScalar * bk_ * 2));
- bm_ = MIN(m_cache, m_);
-
- bn_ = MIN(GEN_COL, n_);
- if (L3_CACHE_SIZE)
- {
- int n_sub = (bk_ * bm_ * sizeof_RhsScalar);
- int n_cache = divup((L3_CACHE_SIZE - n_sub), (sizeof_LhsScalar * bk_ * 2));
- bn_ = MIN(n_cache, bn_);
- }
- }
- else
- {
- int n_sub = (bk_ * mr_ * sizeof_RhsScalar);
- int n_cache = divup((L2_CACHE_SIZE - n_sub), (sizeof_LhsScalar * bk_ * 2));
- bn_ = MIN(n_cache, n_);
-
- bm_ = MIN(GEN_COL, m_);
- if (L3_CACHE_SIZE)
- {
- int m_sub = (bk_ * bn_ * sizeof_RhsScalar);
- int m_cache = divup((L3_CACHE_SIZE - m_sub), (sizeof_LhsScalar * bk_ * 2));
- bm_ = MIN(m_cache, bm_);
- }
- }
-
- nm_ = divup(m_, bm_);
- nn_ = divup(n_, bn_);
- nk_ = divup(k_, bk_);
-
- rm_ = m_ % bm_;
- rn_ = n_ % bn_;
- rk_ = k_ % bk_;
-}
-
-conv_sgemm_singlethread::conv_sgemm_singlethread(const convMat_t &in_mat,
- const convMat_t &weights_mat, convMat_t &out_mat,
- const convParams_t &in_param, convType_t conv_type)
- : in_mat_(in_mat), weights_mat_(weights_mat), out_mat_(out_mat), in_param_(in_param),
- conv_type_(conv_type)
-{
- m_ = out_mat_.c;
-#ifdef NCNN
- n_ = out_mat_.n * alignSize(out_mat_.h * out_mat_.w, 16 / sizeof(float));
-#else
- n_ = out_mat_.n * out_mat_.w * out_mat_.h;
-#endif
- k_ = in_param_.kernel_h * in_param_.kernel_w * in_mat.c;
-
- param_init();
-
- if (in_param_.kernel_w != 1 || in_param_.kernel_h != 1 || in_param_.stride_w != 1 ||
- in_param_.stride_h != 1 || in_param_.padding != 0 || out_mat_.n > 1)
- {
- need_im2col_ = 1;
- }
- else
- {
- need_im2col_ = 0;
- }
-}
-
-conv_sgemm_singlethread::~conv_sgemm_singlethread() {}
-
-void conv_sgemm_singlethread::run()
-{
- int mstride = (bm_ + mr_ - 1) / mr_ * mr_;
- int nstride = (bn_ + nr_ - 1) / nr_ * nr_;
-
- float *plhs_ptr = new float[mstride * bk_];
- float *prhs_ptr = new float[nstride * bk_];
-
- if (conv_type_ == row_major)
- {
- if (shard_type_ == shardByCol)
- {
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
-
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- if (need_im2col_)
- {
- if (out_mat_.n == 1)
- {
- _pack_rowmajor_image_rhs(nr_, bn, bk, l * bk_, j * bn_,
- const_cast<convMat_t *>(&in_mat_), &out_mat_,
- const_cast<convParams_t *>(&in_param_), prhs_ptr);
- }
- else
- {
- _pack_rowmajor_image_rhs_batch(nr_, bn, bk, l * bk_, j * bn_,
- const_cast<convMat_t *>(&in_mat_), &out_mat_,
- const_cast<convParams_t *>(&in_param_), prhs_ptr);
- }
- }
- else
- {
- _pack_rowmajor_notrans_rhs(nr_, bn, bk, n_, &in_mat_.data[l * bk_ * n_ + j * bn_],
- prhs_ptr);
- }
-
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _pack_rowmajor_notrans_lhs(mr_, bm, bk, k_, &weights_mat_.data[i * bm_ * k_ + l * bk_],
- plhs_ptr);
-
- _sgemm_rowmajor_macro_kernel_divnm(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &out_mat_.data[i * bm_ * n_ + j * bn_], l, n_, bk);
- }
- }
- }
- }
- else if (shard_type_ == shardByRow)
- {
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- _pack_rowmajor_notrans_lhs(mr_, bm, bk, k_, &weights_mat_.data[i * bm_ * k_ + l * bk_],
- plhs_ptr);
-
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
-
- if (need_im2col_)
- {
- if (out_mat_.n == 1)
- {
- _pack_rowmajor_image_rhs(nr_, bn, bk, l * bk_, j * bn_,
- const_cast<convMat_t *>(&in_mat_), &out_mat_,
- const_cast<convParams_t *>(&in_param_), prhs_ptr);
- }
- else
- {
- _pack_rowmajor_image_rhs_batch(nr_, bn, bk, l * bk_, j * bn_,
- const_cast<convMat_t *>(&in_mat_), &out_mat_,
- const_cast<convParams_t *>(&in_param_), prhs_ptr);
- }
- }
- else
- {
- _pack_rowmajor_notrans_rhs(nr_, bn, bk, n_, &in_mat_.data[l * bk_ * n_ + j * bn_],
- prhs_ptr);
- }
-
- _sgemm_rowmajor_macro_kernel_divmn(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &out_mat_.data[i * bm_ * n_ + j * bn_], l, n_, bk);
- }
- }
- }
- }
- else
- {
- throw std::runtime_error{"Error shrad type!"};
- }
- }
- else if (conv_type_ == col_major)
- {
- if (shard_type_ == shardByCol)
- {
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- if (need_im2col_)
- {
- if (out_mat_.n == 1)
- {
- _pack_colmajor_image_rhs(nr_, bn, bk, l * bk_, j * bn_,
- const_cast<convMat_t *>(&in_mat_), &out_mat_,
- const_cast<convParams_t *>(&in_param_), prhs_ptr);
- }
- else
- {
- _pack_colmajor_image_rhs_batch(nr_, bn, bk, l * bk_, j * bn_,
- const_cast<convMat_t *>(&in_mat_), &out_mat_,
- const_cast<convParams_t *>(&in_param_), prhs_ptr);
- }
- }
- else
- {
- _pack_colmajor_notrans_rhs(nr_, bn, bk, k_, &in_mat_.data[j * bn_ * k_ + l * bk_],
- prhs_ptr);
- }
-
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _pack_colmajor_notrans_lhs(mr_, bm, bk, m_, &weights_mat_.data[l * bk_ * m_ + i * bm_],
- plhs_ptr);
-
- _sgemm_colmajor_macro_kernel_divnm(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &out_mat_.data[j * bn_ * m_ + i * bm_], l, m_, bk);
- }
- }
- }
- }
- else if (shard_type_ == shardByRow)
- {
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- _pack_colmajor_notrans_lhs(mr_, bm, bk, m_, &weights_mat_.data[l * bk_ * m_ + i * bm_],
- plhs_ptr);
-
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
-
- if (need_im2col_)
- {
- if (out_mat_.n == 1)
- {
- _pack_colmajor_image_rhs(nr_, bn, bk, l * bk_, j * bn_,
- const_cast<convMat_t *>(&in_mat_), &out_mat_,
- const_cast<convParams_t *>(&in_param_), prhs_ptr);
- }
- else
- {
- _pack_colmajor_image_rhs_batch(nr_, bn, bk, l * bk_, j * bn_,
- const_cast<convMat_t *>(&in_mat_), &out_mat_,
- const_cast<convParams_t *>(&in_param_), prhs_ptr);
- }
- }
- else
- {
- _pack_colmajor_notrans_rhs(nr_, bn, bk, k_, &in_mat_.data[j * bn_ * k_ + l * bk_],
- prhs_ptr);
- }
-
- _sgemm_colmajor_macro_kernel_divmn(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &out_mat_.data[j * bn_ * m_ + i * bm_], l, m_, bk);
- }
- }
- }
- }
- else
- {
- throw std::runtime_error{"Error shrad type!"};
- }
- }
- else
- {
- throw std::runtime_error{"Error conv type!"};
- }
-
- delete[] plhs_ptr;
- delete[] prhs_ptr;
-}
-
-} // namespace srcn
-} // namespace nnfw
diff --git a/compute/ncnn/src/srcn/conv_sgemm_singlethread.h b/compute/ncnn/src/srcn/conv_sgemm_singlethread.h
deleted file mode 100644
index 63f8b6e66..000000000
--- a/compute/ncnn/src/srcn/conv_sgemm_singlethread.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_SRCN_CONV_SGEMM_SINGLETHREAD_H__
-#define __NNFW_SRCN_CONV_SGEMM_SINGLETHREAD_H__
-
-#include "ncnn/srcn/conv_type.h"
-#include "common.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-class conv_sgemm_singlethread
-{
-public:
- conv_sgemm_singlethread(const convMat_t &in_mat, const convMat_t &weights_mat, convMat_t &out_mat,
- const convParams_t &in_param, convType_t conv_type);
- ~conv_sgemm_singlethread();
-
- void run();
-
-private:
- void param_init();
-
- const convMat_t in_mat_;
- const convMat_t weights_mat_;
- convMat_t out_mat_;
- const convParams_t in_param_;
- convType_t conv_type_;
-
- int m_;
- int n_;
- int k_;
-
- int bm_;
- int bn_;
- int bk_;
-
- int rm_;
- int rn_;
- int rk_;
-
- int nm_;
- int nn_;
- int nk_;
-
- int mr_;
- int nr_;
-
- int need_im2col_;
-
- shardType_t shard_type_;
-};
-
-} // namespace srcn
-} // namespace nnfw
-
-#endif // __NNFW_SRCN_CONV_SGEMM_SINGLETHREAD_H__
diff --git a/compute/ncnn/src/srcn/conv_sparse.cc b/compute/ncnn/src/srcn/conv_sparse.cc
deleted file mode 100644
index 10e2a2b93..000000000
--- a/compute/ncnn/src/srcn/conv_sparse.cc
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#include <stdexcept>
-
-#include "common.h"
-#include "sgemm_kernel.h"
-#include "sgemm_pack.h"
-#include "conv_sparse.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-void conv_sparse::param_init()
-{
-#ifdef NCNN
- n_ = alignSize(out_mat_.h * out_mat_.w, 16 / sizeof(float));
-#else
- n_ = out_mat_.w * out_mat_.h;
-#endif
-
- bch_ = BCH;
- nch_ = (out_mat_.c + bch_ - 1) / bch_;
-
- rch_ = out_mat_.c % bch_;
-
- bn_ = MIN(n_, L1_CACHE_SIZE / (sizeof(float) * 2));
- bn_ = MIN(bn_, (L2_CACHE_SIZE / 2 - bch_ * sizeof(weight_data_t)) / ((bch_ + 1) * sizeof(float)) /
- num_threads_);
- nn_ = (n_ + bn_ - 1) / bn_;
- rn_ = n_ % bn_;
-
- if (in_param_.kernel_w != 1 || in_param_.kernel_h != 1 || in_param_.stride_w != 1 ||
- in_param_.stride_h != 1 || in_param_.padding != 0)
- {
- need_im2col_ = 1;
- }
- else
- {
- need_im2col_ = 0;
- }
-}
-
-conv_sparse::conv_sparse(const convMat_t &in_mat, convMat_t &out_mat, const convParams_t &in_param,
- const sparse_weight_t *weights, int num_threads, convType_t conv_type)
- : in_mat_(in_mat), out_mat_(out_mat), in_param_(in_param), weights_(weights),
- num_threads_(num_threads), conv_type_(conv_type)
-{
- param_init();
-}
-
-conv_sparse::~conv_sparse() {}
-
-void conv_sparse::compute_singlethread()
-{
- if (need_im2col_)
- {
- for (int i = 0; i < nch_; i++)
- {
- const sparse_weight_t *weight_ptr = weights_ + i;
- const int mxk = weight_ptr->mxk;
- float prhs_ptr[bn_];
-
- for (int j = 0; j < nn_; j++)
- {
- int k = -1;
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- weight_data_t *lhs_ptr = weight_ptr->wdata;
-
- for (int l = 0; l < mxk; l++)
- {
- if (k != lhs_ptr->k)
- {
- k = lhs_ptr->k;
- _sparse_pack_rowmajor_image(bn, k, j * bn_, const_cast<convMat_t *>(&in_mat_),
- &out_mat_, const_cast<convParams_t *>(&in_param_),
- prhs_ptr);
- }
-
- // Why n_ = 64 x 64 is too much slower on Tizen???
- _sparse_sgemm_kernel(bn, lhs_ptr->data, prhs_ptr,
- &out_mat_.data[lhs_ptr->m * n_ + j * bn_]);
-
- lhs_ptr++;
- }
- }
- }
- }
- else
- {
- for (int i = 0; i < nch_; i++)
- {
- const sparse_weight_t *weight_ptr = weights_ + i;
- const int mxk = weight_ptr->mxk;
-
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- weight_data_t *lhs_ptr = weight_ptr->wdata;
- float *rhs_ptr = in_mat_.data + j * bn_;
-
- for (int l = 0; l < mxk; l++)
- {
- // Why n_ = 64 x 64 is too much slower on Tizen???
- _sparse_sgemm_kernel(bn, lhs_ptr->data, rhs_ptr + lhs_ptr->k * n_,
- &out_mat_.data[lhs_ptr->m * n_ + j * bn_]);
-
- lhs_ptr++;
- }
- }
- }
- }
-}
-
-void conv_sparse::compute_multithreads()
-{
- omp_set_num_threads(num_threads_);
-
- if (nch_ >= num_threads_ || nch_ >= nn_)
- {
- if (need_im2col_)
- {
-#pragma omp parallel for
- for (int i = 0; i < nch_; i++)
- {
- const sparse_weight_t *weight_ptr = weights_ + i;
- const int mxk = weight_ptr->mxk;
- float prhs_ptr[bn_];
-
- for (int j = 0; j < nn_; j++)
- {
- int k = -1;
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- weight_data_t *lhs_ptr = weight_ptr->wdata;
-
- for (int l = 0; l < mxk; l++)
- {
- if (k != lhs_ptr->k)
- {
- k = lhs_ptr->k;
- _sparse_pack_rowmajor_image(bn, k, j * bn_, const_cast<convMat_t *>(&in_mat_),
- &out_mat_, const_cast<convParams_t *>(&in_param_),
- prhs_ptr);
- }
-
- _sparse_sgemm_kernel(bn, lhs_ptr->data, prhs_ptr,
- &out_mat_.data[lhs_ptr->m * n_ + j * bn_]);
-
- lhs_ptr++;
- }
- }
- }
- }
- else
- {
-#pragma omp parallel for
- for (int i = 0; i < nch_; i++)
- {
- const sparse_weight_t *weight_ptr = weights_ + i;
- const int mxk = weight_ptr->mxk;
-
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- weight_data_t *lhs_ptr = weight_ptr->wdata;
- float *rhs_ptr = in_mat_.data + j * bn_;
-
- for (int l = 0; l < mxk; l++)
- {
- _sparse_sgemm_kernel(bn, lhs_ptr->data, rhs_ptr + lhs_ptr->k * n_,
- &out_mat_.data[lhs_ptr->m * n_ + j * bn_]);
-
- lhs_ptr++;
- }
- }
- }
- }
- }
- else
- {
- if (need_im2col_)
- {
- for (int i = 0; i < nch_; i++)
- {
- const sparse_weight_t *weight_ptr = weights_ + i;
- const int mxk = weight_ptr->mxk;
-
-#pragma omp parallel for
- for (int j = 0; j < nn_; j++)
- {
- int k = -1;
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- weight_data_t *lhs_ptr = weight_ptr->wdata;
- float prhs_ptr[bn];
-
- for (int l = 0; l < mxk; l++)
- {
- if (k != lhs_ptr->k)
- {
- k = lhs_ptr->k;
- _sparse_pack_rowmajor_image(bn, k, j * bn_, const_cast<convMat_t *>(&in_mat_),
- &out_mat_, const_cast<convParams_t *>(&in_param_),
- prhs_ptr);
- }
-
- _sparse_sgemm_kernel(bn, lhs_ptr->data, prhs_ptr,
- &out_mat_.data[lhs_ptr->m * n_ + j * bn_]);
-
- lhs_ptr++;
- }
- }
- }
- }
- else
- {
- for (int i = 0; i < nch_; i++)
- {
- const sparse_weight_t *weight_ptr = weights_ + i;
- const int mxk = weight_ptr->mxk;
-
-#pragma omp parallel for
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- weight_data_t *lhs_ptr = weight_ptr->wdata;
- float *rhs_ptr = in_mat_.data + j * bn_;
-
- for (int l = 0; l < mxk; l++)
- {
- _sparse_sgemm_kernel(bn, lhs_ptr->data, rhs_ptr + lhs_ptr->k * n_,
- &out_mat_.data[lhs_ptr->m * n_ + j * bn_]);
-
- lhs_ptr++;
- }
- }
- }
- }
- }
-}
-
-void conv_sparse::run()
-{
- if (num_threads_ == 1)
- compute_singlethread();
- else if (num_threads_ > 1)
- compute_multithreads();
- else
- throw std::runtime_error{"Invalid thread number."};
-}
-
-} // namespace srcn
-} // namespace nnfw
diff --git a/compute/ncnn/src/srcn/conv_sparse.h b/compute/ncnn/src/srcn/conv_sparse.h
deleted file mode 100644
index 7ac358fd8..000000000
--- a/compute/ncnn/src/srcn/conv_sparse.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_SRCN_CONV_SPARSE_H__
-#define __NNFW_SRCN_CONV_SPARSE_H__
-
-#include "ncnn/srcn/conv_type.h"
-#include "common.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-#define BCH 128
-
-typedef struct
-{
- short m;
- short k;
- float data;
-} weight_data_t;
-
-typedef struct
-{
- int mxk;
- weight_data_t *wdata;
-} sparse_weight_t;
-
-class conv_sparse
-{
-public:
- conv_sparse(const convMat_t &in_mat, convMat_t &out_mat, const convParams_t &in_param,
- const sparse_weight_t *weights, int num_threads, convType_t conv_type);
- ~conv_sparse();
-
- void run();
-
-private:
- void param_init();
- void compute_singlethread();
- void compute_multithreads();
-
- const convMat_t in_mat_;
- convMat_t out_mat_;
- const convParams_t in_param_;
- const sparse_weight_t *weights_;
- int num_threads_;
- convType_t conv_type_;
-
- uint32_t n_;
- uint32_t bn_;
- int rn_;
- int nn_;
-
- int bch_;
- int rch_;
- int nch_;
-
- int need_im2col_;
-};
-
-} // namespace srcn
-} // namespace nnfw
-
-#endif // __NNFW_SRCN_CONV_SPARSE_H__
diff --git a/compute/ncnn/src/srcn/conv_winograd.cc b/compute/ncnn/src/srcn/conv_winograd.cc
deleted file mode 100644
index 69649ea2a..000000000
--- a/compute/ncnn/src/srcn/conv_winograd.cc
+++ /dev/null
@@ -1,341 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "common.h"
-#include "conv_winograd.h"
-
-namespace std
-{
-template <typename Dtype> static inline Dtype max(Dtype a, Dtype b)
-{
- if (a > b)
- return a;
- else
- return b;
-}
-}
-
-namespace nnfw
-{
-namespace srcn
-{
-
-void conv_winograd::param_init()
-{
- if ((in_param_.kernel_w != in_param_.kernel_h) || (in_param_.stride_w != in_param_.stride_h) ||
- (in_param_.kernel_w != 3 && in_param_.kernel_w != 5) || (in_param_.stride_w != 1) ||
- (!winograd_weight_))
- {
- error_ = 1;
- return;
- }
-
- int M, N;
- const int w = in_mat_.w;
- const int h = in_mat_.h;
- const int outw = out_mat_.w;
- const int outh = out_mat_.h;
- const int pad_w = in_param_.pad_w;
- const int pad_h = in_param_.pad_h;
-
- if (in_param_.kernel_w == 3)
- {
- M = winograd_para_3x3s1::M;
- N = winograd_para_3x3s1::N;
- }
- else
- {
- M = winograd_para_5x5s1::M;
- N = winograd_para_5x5s1::N;
- }
-
- tile_h_in_ = tile_w_in_ = M;
- tile_h_out_ = tile_h_in_ - N + 1;
- tile_w_out_ = tile_w_in_ - N + 1;
- ntiles_h_ = (std::max(h + pad_h - tile_h_in_ + 1, outh) + tile_h_out_ - 1) / tile_h_out_;
- ntiles_w_ = (std::max(w + pad_w - tile_w_in_ + 1, outw) + tile_w_out_ - 1) / tile_w_out_;
-
- error_ = 0;
-}
-
-conv_winograd::conv_winograd(const convMat_t &in_mat, convMat_t &out_mat,
- const convParams_t &in_param, convType_t conv_type,
- const float *winograd_weight, int num_threads, int inc_stride,
- int outc_stride, int c_stride)
- : in_mat_(in_mat), out_mat_(out_mat), in_param_(in_param), conv_type_(conv_type),
- winograd_weight_(winograd_weight), num_threads_(num_threads), inc_stride_(inc_stride),
- outc_stride_(outc_stride), c_stride_(c_stride)
-
-{
- param_init();
-}
-
-conv_winograd::~conv_winograd() {}
-
-void conv_winograd::compute_sgemm(sgemmType_t major_type, sgemmTrans_t ltrans, sgemmTrans_t rtrans,
- const int m, const int n, const int k, const float *lhs_data,
- const float *rhs_data, float *res_data)
-{
- class sgemm_singlethread sgemm(major_type, ltrans, rtrans, m, n, k, lhs_data, rhs_data, res_data,
- num_threads_);
-
- sgemm.run();
-}
-
-void conv_winograd::winograd_input_im2col(float *col_buff)
-{
- const int w = in_mat_.w;
- const int h = in_mat_.h;
- const float *data = in_mat_.data;
- const int channels = in_mat_.c;
- const int pad_w = in_param_.pad_w;
- const int pad_h = in_param_.pad_h;
-
- if (conv_type_ == row_major)
- {
-#ifdef NCNN
- const int n = alignSize(inc_stride_, 16 / sizeof(float));
-#else // NCNN
- const int n = inc_stride_;
-#endif // NCNN
- for (int c = 0; c < channels; ++c)
- {
- for (int tile_h = 0; tile_h < ntiles_h_; ++tile_h)
- {
- for (int tile_w = 0; tile_w < ntiles_w_; ++tile_w)
- {
- for (int y = 0; y < tile_h_in_; ++y)
- {
- for (int x = 0; x < tile_w_in_; ++x)
- {
- int in_y = tile_h * tile_h_out_ + y - pad_h;
- int in_x = tile_w * tile_w_out_ + x - pad_w;
-
- if (in_y < 0 || in_x < 0 || in_y >= h || in_x >= w)
- {
- col_buff[(((c * ntiles_h_ + tile_h) * ntiles_w_ + tile_w) * tile_h_in_ + y) *
- tile_w_in_ +
- x] = 0;
- }
- else
- {
- col_buff[(((c * ntiles_h_ + tile_h) * ntiles_w_ + tile_w) * tile_h_in_ + y) *
- tile_w_in_ +
- x] = data[c * n + in_y * w + in_x];
- }
- }
- }
- }
- }
- }
- }
- else if (conv_type_ == col_major)
- {
- for (int tile_h = 0; tile_h < ntiles_h_; ++tile_h)
- {
- for (int tile_w = 0; tile_w < ntiles_w_; ++tile_w)
- {
- for (int y = 0; y < tile_h_in_; ++y)
- {
- for (int x = 0; x < tile_w_in_; ++x)
- {
- for (int c = 0; c < channels; ++c)
- {
- int in_y = tile_h * tile_h_out_ + y - pad_h;
- int in_x = tile_w * tile_w_out_ + x - pad_w;
-
- if (in_y < 0 || in_x < 0 || in_y >= h || in_x >= w)
- {
- col_buff[(((c * ntiles_h_ + tile_h) * ntiles_w_ + tile_w) * tile_h_in_ + y) *
- tile_w_in_ +
- x] = 0;
- }
- else
- {
- col_buff[(((c * ntiles_h_ + tile_h) * ntiles_w_ + tile_w) * tile_h_in_ + y) *
- tile_w_in_ +
- x] = data[c + (in_y * w + in_x) * channels];
- }
- }
- }
- }
- }
- }
- }
-}
-
-void conv_winograd::winograd_output_col2im(const float *col_buff)
-{
- int outh = out_mat_.h;
- int outw = out_mat_.w;
- float *data = out_mat_.data;
- int channels = out_mat_.c;
-
- if (conv_type_ == row_major)
- {
-#ifdef NCNN
- const int n = alignSize(outc_stride_, 16 / sizeof(float));
-#else // NCNN
- const int n = outc_stride_;
-#endif // NCNN
- for (int c = 0; c < channels; ++c)
- {
- for (int tile_h = 0; tile_h < ntiles_h_; ++tile_h)
- {
- for (int tile_w = 0; tile_w < ntiles_w_; ++tile_w)
- {
- for (int y = 0; y < tile_h_out_; ++y)
- {
- for (int x = 0; x < tile_w_out_; ++x)
- {
- int out_y = tile_h * tile_h_out_ + y;
- int out_x = tile_w * tile_w_out_ + x;
- if (out_y < outh && out_x < outw)
- {
- data[c * n + out_y * outw + out_x] =
- col_buff[(((c * ntiles_h_ + tile_h) * ntiles_w_ + tile_w) * tile_h_out_ + y) *
- tile_w_out_ +
- x];
- }
- }
- }
- }
- }
- }
- }
- else if (conv_type_ == col_major)
- {
- for (int tile_h = 0; tile_h < ntiles_h_; ++tile_h)
- {
- for (int tile_w = 0; tile_w < ntiles_w_; ++tile_w)
- {
- for (int y = 0; y < tile_h_out_; ++y)
- {
- for (int x = 0; x < tile_w_out_; ++x)
- {
- for (int c = 0; c < channels; ++c)
- {
- int out_y = tile_h * tile_h_out_ + y;
- int out_x = tile_w * tile_w_out_ + x;
- if (out_y < outh && out_x < outw)
- {
- data[c + (out_y * outw + out_x) * c_stride_] =
- col_buff[(((c * ntiles_h_ + tile_h) * ntiles_w_ + tile_w) * tile_h_out_ + y) *
- tile_w_out_ +
- x];
- }
- }
- }
- }
- }
- }
- }
-}
-
-void conv_winograd::compute_winograd()
-{
- // const int w = in_mat_.w;
- // const int h = in_mat_.h;
- const int inch = in_mat_.c;
- // const int outw = out_mat_.w;
- // const int outh = out_mat_.h;
- const int outch = out_mat_.c;
- const int kernel_size = in_param_.kernel_w;
-
- int M, N;
- const double *A;
- const double *B;
-
- if (kernel_size == 3)
- {
- M = winograd_para_3x3s1::M;
- N = winograd_para_3x3s1::N;
- B = winograd_para_3x3s1::getB();
- A = winograd_para_3x3s1::getA();
- }
- else
- {
- M = winograd_para_5x5s1::M;
- N = winograd_para_5x5s1::N;
- B = winograd_para_5x5s1::getB();
- A = winograd_para_5x5s1::getA();
- }
-
- /*Step 2: transfer image to winograd domain*/
- float *col_buff =
- new float[std::max(outch, inch) * ntiles_h_ * ntiles_w_ * tile_h_in_ * tile_w_in_];
-
- int temp1_n = inch * ntiles_h_ * ntiles_w_;
- float *temp1_ =
- new float[tile_h_in_ * tile_w_in_ * std::max(outch, inch) * ntiles_h_ * ntiles_w_];
-
- float *winograd_b = new float[M * M * M * M];
-
- if ((NULL == col_buff) || (NULL == temp1_) || (NULL == winograd_b))
- {
- delete[] col_buff;
- delete[] temp1_;
- delete[] winograd_b;
- return;
- }
-
- winograd_input_im2col(col_buff);
-
- kronecker_product(winograd_b, B, B, M, M, M, M);
-
- compute_sgemm(rowMajor, trans, trans, tile_h_in_ * tile_w_in_, temp1_n, tile_h_in_ * tile_w_in_,
- winograd_b, col_buff, temp1_);
-
- delete[] winograd_b;
-
- /*Step 3: convolution in winograd domain*/
- for (int j = 0; j < tile_h_in_ * tile_w_in_; ++j)
- {
- compute_sgemm(rowMajor, notrans, notrans, outch, ntiles_h_ * ntiles_w_, inch,
- winograd_weight_ + j * c_stride_ * inch,
- temp1_ + j * inch * ntiles_h_ * ntiles_w_,
- col_buff + j * outch * ntiles_h_ * ntiles_w_);
- }
-
- /*Step 4: transfer back to time domain*/
- float *winograd_a = new float[M * (M - N + 1) * M * (M - N + 1)];
- if (NULL == winograd_a)
- {
- delete[] col_buff;
- delete[] temp1_;
- return;
- }
- kronecker_product(winograd_a, A, A, M, M - N + 1, M, M - N + 1);
- compute_sgemm(rowMajor, trans, notrans, outch * ntiles_h_ * ntiles_w_, tile_h_out_ * tile_w_out_,
- tile_h_in_ * tile_w_in_, col_buff, winograd_a, temp1_);
- delete[] winograd_a;
- delete[] col_buff;
-
- winograd_output_col2im(temp1_);
-
- delete[] temp1_;
-}
-
-void conv_winograd::run()
-{
- if (error_)
- return;
-
- compute_winograd();
-}
-
-} // namespace srcn
-} // namespace nnfw
diff --git a/compute/ncnn/src/srcn/conv_winograd.h b/compute/ncnn/src/srcn/conv_winograd.h
deleted file mode 100644
index 76c2601f2..000000000
--- a/compute/ncnn/src/srcn/conv_winograd.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_SRCN_CONV_WINOGRAD_H__
-#define __NNFW_SRCN_CONV_WINOGRAD_H__
-
-#include "ncnn/srcn/conv_type.h"
-#include "winograd.h"
-#include "sgemm_singlethread.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-class conv_winograd
-{
-public:
- conv_winograd(const convMat_t &in_mat, convMat_t &out_mat, const convParams_t &in_param,
- convType_t conv_type, const float *winograd_weight, int num_threads, int inc_stride,
- int outc_stride, int c_stride);
- ~conv_winograd();
-
- void run();
-
-private:
- void param_init();
- void compute_sgemm(sgemmType_t major_type, sgemmTrans_t ltrans, sgemmTrans_t rtrans, const int m,
- const int n, const int k, const float *lhs_data, const float *rhs_data,
- float *res_data);
- void winograd_input_im2col(float *col_buff);
- void winograd_output_col2im(const float *col_buff);
- void compute_winograd();
-
- const convMat_t in_mat_;
- convMat_t out_mat_;
- const convParams_t in_param_;
- convType_t conv_type_;
- const float *winograd_weight_;
- const int num_threads_;
-
- int tile_w_in_;
- int tile_h_in_;
- int tile_w_out_;
- int tile_h_out_;
- int ntiles_w_;
- int ntiles_h_;
-
- int inc_stride_;
- int outc_stride_;
- int c_stride_;
-
- int error_;
-};
-
-} // namespace srcn
-} // namespace nnfw
-
-#endif // __NNFW_SRCN_CONV_WINOGRAD_H__
diff --git a/compute/ncnn/src/srcn/conv_winograd_batch.cc b/compute/ncnn/src/srcn/conv_winograd_batch.cc
deleted file mode 100644
index cba45c648..000000000
--- a/compute/ncnn/src/srcn/conv_winograd_batch.cc
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "common.h"
-#include "conv_winograd_batch.h"
-
-namespace std
-{
-template <typename Dtype> static inline Dtype max(Dtype a, Dtype b)
-{
- if (a > b)
- return a;
- else
- return b;
-}
-}
-
-namespace nnfw
-{
-namespace srcn
-{
-
-void conv_winograd_batch::param_init()
-{
- if ((in_param_.kernel_w != in_param_.kernel_h) || (in_param_.stride_w != in_param_.stride_h) ||
- (in_param_.kernel_w != 3 && in_param_.kernel_w != 5) || (in_param_.stride_w != 1) ||
- (!winograd_weight_))
- {
- error_ = 1;
- return;
- }
-
- int M, N;
- const int w = in_mat_.w;
- const int h = in_mat_.h;
- const int outw = out_mat_.w;
- const int outh = out_mat_.h;
- const int pad_w = in_param_.pad_w;
- const int pad_h = in_param_.pad_h;
-
- if (in_param_.kernel_w == 3)
- {
- if (w == 4)
- {
- M = winograd_para_3x3s1_2::M;
- N = winograd_para_3x3s1_2::N;
- }
- else
- {
- M = winograd_para_3x3s1::M;
- N = winograd_para_3x3s1::N;
- }
- }
- else
- {
- M = winograd_para_5x5s1::M;
- N = winograd_para_5x5s1::N;
- }
-
- tile_h_in_ = tile_w_in_ = M;
- tile_h_out_ = tile_h_in_ - N + 1;
- tile_w_out_ = tile_w_in_ - N + 1;
- ntiles_h_ = (std::max(h + pad_h - tile_h_in_ + 1, outh) + tile_h_out_ - 1) / tile_h_out_;
- ntiles_w_ = (std::max(w + pad_w - tile_w_in_ + 1, outw) + tile_w_out_ - 1) / tile_w_out_;
-
- error_ = 0;
-}
-
-conv_winograd_batch::conv_winograd_batch(const convMat_t &in_mat, convMat_t &out_mat,
- const convParams_t &in_param, convType_t conv_type,
- const float *winograd_weight, int num_threads)
- : in_mat_(in_mat), out_mat_(out_mat), in_param_(in_param), conv_type_(conv_type),
- winograd_weight_(winograd_weight), num_threads_(num_threads)
-{
- param_init();
-}
-
-conv_winograd_batch::~conv_winograd_batch() {}
-
-void conv_winograd_batch::compute_sgemm(sgemmType_t major_type, sgemmTrans_t ltrans,
- sgemmTrans_t rtrans, const int m, const int n, const int k,
- const float *lhs_data, const float *rhs_data,
- float *res_data)
-{
- class sgemm_singlethread sgemm(major_type, ltrans, rtrans, m, n, k, lhs_data, rhs_data, res_data,
- num_threads_);
-
- sgemm.run();
-}
-
-void conv_winograd_batch::winograd_input_im2col(float *col_buff)
-{
- const int w = in_mat_.w;
- const int h = in_mat_.h;
- const float *data = in_mat_.data;
- const int channels = in_mat_.c;
- const int batch = in_mat_.n;
- const int pad_w = in_param_.pad_w;
- const int pad_h = in_param_.pad_h;
-
- // TODO: row_major
- if (conv_type_ == col_major)
- {
- for (int n = 0; n < batch; n++)
- {
- for (int tile_h = 0; tile_h < ntiles_h_; ++tile_h)
- {
- for (int tile_w = 0; tile_w < ntiles_w_; ++tile_w)
- {
- for (int y = 0; y < tile_h_in_; ++y)
- {
- for (int x = 0; x < tile_w_in_; ++x)
- {
- for (int c = 0; c < channels; ++c)
- {
- int in_y = tile_h * tile_h_out_ + y - pad_h;
- int in_x = tile_w * tile_w_out_ + x - pad_w;
-
- if (in_y < 0 || in_x < 0 || in_y >= h || in_x >= w)
- {
- col_buff[((((c * batch + n) * ntiles_h_ + tile_h) * ntiles_w_ + tile_w) *
- tile_h_in_ +
- y) *
- tile_w_in_ +
- x] = 0;
- }
- else
- {
- col_buff[((((c * batch + n) * ntiles_h_ + tile_h) * ntiles_w_ + tile_w) *
- tile_h_in_ +
- y) *
- tile_w_in_ +
- x] = data[((n * h + in_y) * w + in_x) * channels + c];
- }
- }
- }
- }
- }
- }
- }
- }
-}
-
-void conv_winograd_batch::winograd_output_col2im(const float *col_buff)
-{
- int outh = out_mat_.h;
- int outw = out_mat_.w;
- float *data = out_mat_.data;
- int channels = out_mat_.c;
- int batch = out_mat_.n;
-
- // TODO: row_major
- if (conv_type_ == col_major)
- {
- for (int n = 0; n < batch; n++)
- {
- for (int tile_h = 0; tile_h < ntiles_h_; ++tile_h)
- {
- for (int tile_w = 0; tile_w < ntiles_w_; ++tile_w)
- {
- for (int y = 0; y < tile_h_out_; ++y)
- {
- for (int x = 0; x < tile_w_out_; ++x)
- {
- for (int c = 0; c < channels; ++c)
- {
- int out_y = tile_h * tile_h_out_ + y;
- int out_x = tile_w * tile_w_out_ + x;
- if (out_y < outh && out_x < outw)
- {
- data[((n * outh + out_y) * outw + out_x) * channels + c] =
- col_buff[((((c * batch + n) * ntiles_h_ + tile_h) * ntiles_w_ + tile_w) *
- tile_h_out_ +
- y) *
- tile_w_out_ +
- x];
- }
- }
- }
- }
- }
- }
- }
- }
-}
-
-void conv_winograd_batch::compute_winograd()
-{
- const int w = in_mat_.w;
- // const int h = in_mat_.h;
- const int inch = in_mat_.c;
- // const int outw = out_mat_.w;
- // const int outh = out_mat_.h;
- const int outch = out_mat_.c;
- const int kernel_size = in_param_.kernel_w;
- const int batch = in_mat_.n;
-
- int M, N;
- const double *A;
- const double *B;
-
- if (kernel_size == 3)
- {
- if (w == 4)
- {
- M = winograd_para_3x3s1_2::M;
- N = winograd_para_3x3s1_2::N;
- B = winograd_para_3x3s1_2::getB();
- A = winograd_para_3x3s1_2::getA();
- }
- else
- {
- M = winograd_para_3x3s1::M;
- N = winograd_para_3x3s1::N;
- B = winograd_para_3x3s1::getB();
- A = winograd_para_3x3s1::getA();
- }
- }
- else
- {
- M = winograd_para_5x5s1::M;
- N = winograd_para_5x5s1::N;
- B = winograd_para_5x5s1::getB();
- A = winograd_para_5x5s1::getA();
- }
-
- /*Step 2: transfer image to winograd domain*/
- float *col_buff =
- new float[std::max(outch, inch) * batch * ntiles_h_ * ntiles_w_ * tile_h_in_ * tile_w_in_];
-
- int temp1_n = batch * inch * ntiles_h_ * ntiles_w_;
- float *temp1_ =
- new float[batch * tile_h_in_ * tile_w_in_ * std::max(outch, inch) * ntiles_h_ * ntiles_w_];
-
- float *winograd_b = new float[M * M * M * M];
-
- if ((NULL == col_buff) || (NULL == temp1_) || (NULL == winograd_b))
- {
- delete[] col_buff;
- delete[] temp1_;
- delete[] winograd_b;
- return;
- }
-
- winograd_input_im2col(col_buff);
-
- kronecker_product(winograd_b, B, B, M, M, M, M);
-
- compute_sgemm(rowMajor, trans, trans, tile_h_in_ * tile_w_in_, temp1_n, tile_h_in_ * tile_w_in_,
- winograd_b, col_buff, temp1_);
- delete[] winograd_b;
-
- /*Step 3: convolution in winograd domain*/
- for (int j = 0; j < tile_h_in_ * tile_w_in_; ++j)
- {
- compute_sgemm(rowMajor, notrans, notrans, outch, batch * ntiles_h_ * ntiles_w_, inch,
- winograd_weight_ + j * outch * inch,
- temp1_ + j * batch * inch * ntiles_h_ * ntiles_w_,
- col_buff + j * batch * outch * ntiles_h_ * ntiles_w_);
- }
-
- /*Step 4: transfer back to time domain*/
- float *winograd_a = new float[M * (M - N + 1) * M * (M - N + 1)];
- if (NULL == winograd_a)
- {
- delete[] col_buff;
- delete[] temp1_;
- return;
- }
-
- kronecker_product(winograd_a, A, A, M, M - N + 1, M, M - N + 1);
- compute_sgemm(rowMajor, trans, notrans, batch * outch * ntiles_h_ * ntiles_w_,
- tile_h_out_ * tile_w_out_, tile_h_in_ * tile_w_in_, col_buff, winograd_a, temp1_);
- delete[] winograd_a;
- delete[] col_buff;
-
- winograd_output_col2im(temp1_);
-
- delete[] temp1_;
-}
-
-void conv_winograd_batch::run()
-{
- if (error_)
- return;
-
- compute_winograd();
-}
-
-} // namespace srcn
-} // namespace nnfw
diff --git a/compute/ncnn/src/srcn/conv_winograd_batch.h b/compute/ncnn/src/srcn/conv_winograd_batch.h
deleted file mode 100644
index a022d9c52..000000000
--- a/compute/ncnn/src/srcn/conv_winograd_batch.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_SRCN_CONV_WINOGRAD_BATCH_H__
-#define __NNFW_SRCN_CONV_WINOGRAD_BATCH_H__
-
-#include "ncnn/srcn/conv_type.h"
-#include "winograd.h"
-#include "sgemm_singlethread.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-class conv_winograd_batch
-{
-public:
- conv_winograd_batch(const convMat_t &in_mat, convMat_t &out_mat, const convParams_t &in_param,
- convType_t conv_type, const float *winograd_weight, int num_threads);
- ~conv_winograd_batch();
-
- void run();
-
-private:
- void param_init();
- void compute_sgemm(sgemmType_t major_type, sgemmTrans_t ltrans, sgemmTrans_t rtrans, const int m,
- const int n, const int k, const float *lhs_data, const float *rhs_data,
- float *res_data);
- void winograd_input_im2col(float *col_buff);
- void winograd_output_col2im(const float *col_buff);
- void compute_winograd();
-
- const convMat_t in_mat_;
- convMat_t out_mat_;
- const convParams_t in_param_;
- convType_t conv_type_;
- const float *winograd_weight_;
- const int num_threads_;
-
- int tile_w_in_;
- int tile_h_in_;
- int tile_w_out_;
- int tile_h_out_;
- int ntiles_w_;
- int ntiles_h_;
-
- int error_;
-};
-
-} // namespace srcn
-} // namespace nnfw
-
-#endif // __NNFW_SRCN_CONV_WINOGRAD_BATCH_H__
diff --git a/compute/ncnn/src/srcn/deconv_sgemm_multithreads.cc b/compute/ncnn/src/srcn/deconv_sgemm_multithreads.cc
deleted file mode 100644
index f3ccf13e5..000000000
--- a/compute/ncnn/src/srcn/deconv_sgemm_multithreads.cc
+++ /dev/null
@@ -1,387 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#include "common.h"
-#include "sgemm_kernel.h"
-#include "sgemm_pack.h"
-#include "deconv_sgemm_multithreads.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-void deconv_sgemm_multithreads::param_init()
-{
-#if __aarch64__
- if (conv_type_ == row_major)
- {
- mr_ = 8;
- nr_ = 12;
- }
- else if (conv_type_ == col_major)
- {
-
- mr_ = 12;
- nr_ = 8;
- }
-#else // __aarch64__
- if (conv_type_ == row_major)
- {
- mr_ = 6;
- nr_ = 8;
- }
- else if (conv_type_ == col_major)
- {
- mr_ = 8;
- nr_ = 6;
- }
-#endif // __aarch64__
-
- int col = n_;
-
- if (m_ > n_)
- {
- shard_type_ = shardByRow;
- col = m_;
- }
- else
- {
- shard_type_ = shardByCol;
- }
-
- int th_base = divup(col, num_threads_);
-
- th_base = MIN(MAX(th_base, MIN_COL), MAX_COL);
-
- int k_div = (nr_ * sizeof_RhsScalar);
- int k_sub = (mr_ * nr_ * sizeof_ResScalar);
-
- const int k_cache = MIN(divup((int)(L1_CACHE_SIZE - k_sub), (int)k_div * 2), MAX_K);
- bk_ = MIN(k_cache, k_);
-
- if (shard_type_ == shardByCol)
- {
- int m_sub = (bk_ * nr_ * sizeof_RhsScalar);
- int m_div = (sizeof_LhsScalar * bk_ * 2 * num_threads_);
- if (L3_CACHE_SIZE)
- m_div = (sizeof_LhsScalar * bk_ * 2);
- int m_cache = divup((L2_CACHE_SIZE - m_sub), m_div);
- bm_ = MIN(m_cache, m_);
-
- bn_ = MIN(th_base, n_);
- if (L3_CACHE_SIZE)
- {
- int n_sub = (bk_ * bm_ * sizeof_RhsScalar);
- int n_div = (sizeof_LhsScalar * bk_ * 2 * num_threads_);
- int n_cache = divup((L3_CACHE_SIZE - n_sub), n_div);
- bn_ = MIN(n_cache, bn_);
- }
- }
- else
- {
- int n_sub = (bk_ * mr_ * sizeof_LhsScalar);
- int n_div = (sizeof_LhsScalar * bk_ * 2 * num_threads_);
- if (L3_CACHE_SIZE)
- n_div = (sizeof_LhsScalar * bk_ * 2);
- int n_cache = divup((L2_CACHE_SIZE - n_sub), n_div);
- bn_ = MIN(n_cache, n_);
-
- bm_ = MIN(th_base, m_);
- if (L3_CACHE_SIZE)
- {
- int m_sub = (bk_ * bn_ * sizeof_RhsScalar);
- int m_div = (sizeof_LhsScalar * bk_ * 2 * num_threads_);
- int m_cache = divup((L3_CACHE_SIZE - m_sub), m_div);
- bm_ = MIN(m_cache, bm_);
- }
- }
-
- nm_ = divup(m_, bm_);
- nn_ = divup(n_, bn_);
- nk_ = divup(k_, bk_);
-
- rm_ = m_ % bm_;
- rn_ = n_ % bn_;
- rk_ = k_ % bk_;
-}
-
-deconv_sgemm_multithreads::deconv_sgemm_multithreads(const convMat_t &in_mat,
- const convMat_t &weights_mat,
- convMat_t &out_mat,
- const convParams_t &in_param, int num_threads,
- convType_t conv_type)
-
- : in_mat_(in_mat), weights_mat_(weights_mat), out_mat_(out_mat), in_param_(in_param),
- conv_type_(conv_type), num_threads_(num_threads)
-{
- m_ = in_param_.kernel_h * in_param_.kernel_w * out_mat_.c;
-#ifdef NCNN
- n_ = alignSize(in_mat_.h * in_mat_.w, 16 / sizeof(float));
-#else // NCNN
- n_ = in_mat_.w * in_mat_.h;
-#endif // NCNN
- k_ = in_mat.c;
-
- param_init();
-
- int lhs_stride = (bm_ + mr_ - 1) / mr_ * mr_ * bk_;
- int rhs_stride = (bn_ + nr_ - 1) / nr_ * nr_ * bk_;
-
- if (shard_type_ == shardByCol)
- {
- plhs_buffer_ = new float[lhs_stride * 1 * nm_];
- prhs_buffer_ = new float[rhs_stride * num_threads_];
- }
- else
- {
- plhs_buffer_ = new float[lhs_stride * num_threads_];
- prhs_buffer_ = new float[rhs_stride * 1 * nn_];
- }
-
- pres_buffer_ = new float[bm_ * bn_ * num_threads_];
-
- if (plhs_buffer_ == NULL || prhs_buffer_ == NULL || pres_buffer_ == NULL)
- {
- error_ = 1;
- }
-
- if (in_param_.kernel_w != 1 || in_param_.kernel_h != 1 || in_param_.stride_w != 1 ||
- in_param_.stride_h != 1 || in_param_.padding != 0)
- {
- need_col2im_ = 1;
- }
- else
- {
- need_col2im_ = 0;
- }
-
- omp_set_num_threads(num_threads_);
-
- error_ = 0;
-}
-
-deconv_sgemm_multithreads::~deconv_sgemm_multithreads()
-{
- if (plhs_buffer_)
- delete[] plhs_buffer_;
- if (prhs_buffer_)
- delete[] prhs_buffer_;
- if (pres_buffer_)
- delete[] pres_buffer_;
-}
-
-void deconv_sgemm_multithreads::run()
-{
- if (error_)
- return;
-
- if (shard_type_ == shardByCol && conv_type_ == col_major)
- {
- compute_colmajor_colshard();
- }
- else if (shard_type_ == shardByRow && conv_type_ == col_major)
- {
- compute_colmajor_rowshard();
- }
- else if (shard_type_ == shardByCol && conv_type_ == row_major)
- {
- compute_rowmajor_colshard();
- }
- else if (shard_type_ == shardByRow && conv_type_ == row_major)
- {
- compute_rowmajor_rowshard();
- }
-}
-
-void deconv_sgemm_multithreads::compute_rowmajor_colshard()
-{
- int lhs_stride = (bm_ + mr_ - 1) / mr_ * mr_ * bk_;
- int rhs_stride = (bn_ + nr_ - 1) / nr_ * nr_ * bk_;
-
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
-#pragma omp parallel for
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _pack_rowmajor_trans_lhs(mr_, bm, bk, m_, &weights_mat_.data[l * bk_ * m_ + i * bm_],
- &plhs_buffer_[i * lhs_stride]);
- }
-
-#pragma omp parallel for
- for (int j = 0; j < nn_; j++)
- {
- int thread_num = omp_get_thread_num();
- float *prhs_ptr = &prhs_buffer_[rhs_stride * thread_num];
- float *pres_ptr = &pres_buffer_[bm_ * bn_ * thread_num];
-
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- _pack_rowmajor_notrans_rhs(nr_, bn, bk, n_, &in_mat_.data[l * bk_ * n_ + j * bn_], prhs_ptr);
-
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _sgemm_rowmajor_macro_kernel_divnm(mr_, nr_, bm, bn, bk, &plhs_buffer_[i * lhs_stride],
- prhs_ptr, pres_ptr, 0, bn, bk);
-
- if (need_col2im_)
- _unpack_rowmajor_image_res(bm, bn, i * bm_, j * bn_, const_cast<convMat_t *>(&in_mat_),
- &out_mat_, const_cast<convParams_t *>(&in_param_), pres_ptr);
- }
- }
- }
-}
-
-void deconv_sgemm_multithreads::compute_rowmajor_rowshard()
-{
- int lhs_stride = (bm_ + mr_ - 1) / mr_ * mr_ * bk_;
- int rhs_stride = (bn_ + nr_ - 1) / nr_ * nr_ * bk_;
-
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
-#pragma omp parallel for
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- _pack_rowmajor_notrans_rhs(nr_, bn, bk, n_, &in_mat_.data[l * bk_ * n_ + j * bn_],
- &prhs_buffer_[j * rhs_stride]);
- }
-
-#pragma omp parallel for
- for (int i = 0; i < nm_; i++)
- {
- int thread_num = omp_get_thread_num();
- float *plhs_ptr = &plhs_buffer_[lhs_stride * thread_num];
- float *pres_ptr = &pres_buffer_[bm_ * bn_ * thread_num];
-
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _pack_rowmajor_trans_lhs(mr_, bm, bk, m_, &weights_mat_.data[l * bk_ * m_ + i * bm_],
- plhs_ptr);
-
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- _sgemm_rowmajor_macro_kernel_divmn(mr_, nr_, bm, bn, bk, plhs_ptr,
- &prhs_buffer_[j * rhs_stride], pres_ptr, 0, bn, bk);
- if (need_col2im_)
- _unpack_rowmajor_image_res(bm, bn, i * bm_, j * bn_, const_cast<convMat_t *>(&in_mat_),
- &out_mat_, const_cast<convParams_t *>(&in_param_), pres_ptr);
- }
- }
- }
-}
-
-void deconv_sgemm_multithreads::compute_colmajor_colshard()
-{
- int lhs_stride = (bm_ + mr_ - 1) / mr_ * mr_ * bk_;
- int rhs_stride = (bn_ + nr_ - 1) / nr_ * nr_ * bk_;
-
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
-#pragma omp parallel for
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _pack_colmajor_trans_lhs(mr_, bm, bk, k_, &weights_mat_.data[i * bm_ * k_ + l * bk_],
- &plhs_buffer_[i * lhs_stride]);
- }
-
-#pragma omp parallel for
- for (int j = 0; j < nn_; j++)
- {
- int thread_num = omp_get_thread_num();
- float *prhs_ptr = &prhs_buffer_[rhs_stride * thread_num];
- float *pres_ptr = &pres_buffer_[bm_ * bn_ * thread_num];
-
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- _pack_colmajor_notrans_rhs(nr_, bn, bk, k_, &in_mat_.data[j * bn_ * k_ + l * bk_], prhs_ptr);
-
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _sgemm_colmajor_macro_kernel_divnm(mr_, nr_, bm, bn, bk, &plhs_buffer_[i * lhs_stride],
- prhs_ptr, pres_ptr, 0, bm, bk);
-
- // Need to add lock?
- if (need_col2im_)
- _unpack_colmajor_image_res(bm, bn, i * bm_, j * bn_, const_cast<convMat_t *>(&in_mat_),
- &out_mat_, const_cast<convParams_t *>(&in_param_), pres_ptr);
- }
- }
- }
-}
-
-void deconv_sgemm_multithreads::compute_colmajor_rowshard()
-{
- int lhs_stride = (bm_ + mr_ - 1) / mr_ * mr_ * bk_;
- int rhs_stride = (bn_ + nr_ - 1) / nr_ * nr_ * bk_;
-
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
-#pragma omp parallel for
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- _pack_colmajor_notrans_rhs(nr_, bn, bk, k_, &in_mat_.data[j * bn_ * k_ + l * bk_],
- &prhs_buffer_[j * rhs_stride]);
- }
-
-#pragma omp parallel for
- for (int i = 0; i < nm_; i++)
- {
- int thread_num = omp_get_thread_num();
- float *plhs_ptr = &plhs_buffer_[lhs_stride * thread_num];
- float *pres_ptr = &pres_buffer_[bm_ * bn_ * thread_num];
-
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _pack_colmajor_trans_lhs(mr_, bm, bk, k_, &weights_mat_.data[i * bm_ * k_ + l * bk_],
- plhs_ptr);
-
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- _sgemm_colmajor_macro_kernel_divmn(mr_, nr_, bm, bn, bk, plhs_ptr,
- &prhs_buffer_[j * rhs_stride], pres_ptr, 0, bm, bk);
-
- if (need_col2im_)
- _unpack_colmajor_image_res(bm, bn, i * bm_, j * bn_, const_cast<convMat_t *>(&in_mat_),
- &out_mat_, const_cast<convParams_t *>(&in_param_), pres_ptr);
- }
- }
- }
-}
-
-} // namespace srcn
-} // namespace nnfw
diff --git a/compute/ncnn/src/srcn/deconv_sgemm_multithreads.h b/compute/ncnn/src/srcn/deconv_sgemm_multithreads.h
deleted file mode 100644
index 762f20380..000000000
--- a/compute/ncnn/src/srcn/deconv_sgemm_multithreads.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_SRCN_DECONV_SGEMM_MULTITHREADS_H__
-#define __NNFW_SRCN_DECONV_SGEMM_MULTITHREADS_H__
-
-#include "ncnn/srcn/conv_type.h"
-#include "common.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-class deconv_sgemm_multithreads
-{
-public:
- deconv_sgemm_multithreads(const convMat_t &in_mat, const convMat_t &weights_mat,
- convMat_t &out_mat, const convParams_t &in_param, int num_threads,
- convType_t conv_type);
- ~deconv_sgemm_multithreads();
-
- void run();
-
-private:
- void param_init();
-
- void compute_rowmajor_colshard();
- void compute_rowmajor_rowshard();
- void compute_colmajor_colshard();
- void compute_colmajor_rowshard();
-
- const convMat_t in_mat_;
- const convMat_t weights_mat_;
- convMat_t out_mat_;
- const convParams_t in_param_;
- convType_t conv_type_;
- const int num_threads_;
-
- int m_;
- int n_;
- int k_;
-
- int bm_;
- int bn_;
- int bk_;
-
- int rm_;
- int rn_;
- int rk_;
-
- int nm_;
- int nn_;
- int nk_;
-
- int mr_;
- int nr_;
-
- int need_col2im_;
- shardType_t shard_type_;
-
- float *prhs_buffer_;
- float *plhs_buffer_;
- float *pres_buffer_;
-
- int error_;
-};
-
-} // namespace srcn
-} // namespace nnfw
-
-#endif // __NNFW_SRCN_DECONV_SGEMM_MULTITHREADS_H__
diff --git a/compute/ncnn/src/srcn/depthwise_conv.cc b/compute/ncnn/src/srcn/depthwise_conv.cc
deleted file mode 100644
index cd092d5ac..000000000
--- a/compute/ncnn/src/srcn/depthwise_conv.cc
+++ /dev/null
@@ -1,2684 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#include <arm_neon.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "common.h"
-#include "ncnn/srcn/conv_type.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-static void depthwise_conv3x3S1_nopad(const convMat_t &in_mat, convMat_t &out_mat,
- const convMat_t &kernel, const convMat_t &bias)
-{
-#if !__aarch64__
- int w = in_mat.w;
- int h = in_mat.h;
- int outw = out_mat.w;
- int outh = out_mat.h;
- int channels = in_mat.c;
-
-#pragma omp parallel for
- for (int c = 0; c < channels; c++)
- {
- const float *filter = kernel.data + c * 9;
-#ifdef NCNN
- float *inbuf = in_mat.data + c * alignSize(w * h, 16 / sizeof(float));
- float *outbuf = out_mat.data + c * alignSize(outw * outh, 16 / sizeof(float));
-#else // NCNN
- float *inbuf = in_mat.data + c * w * h;
- float *outbuf = out_mat.data + c * outw * outh;
-#endif // NCNN
- float bias0 = bias.data ? bias.data[c] : 0.0f;
-
- register float32x4_t weight012 asm("q4") = vld1q_f32(filter);
- register float32x4_t weight345 asm("q5") = vld1q_f32(filter + 3);
- register float32x4_t weight678 asm("q6") = vld1q_f32(filter + 6);
- register float32x4_t qbias0 asm("q7") = vdupq_n_f32(bias0);
-
- float *in_ptr0 = inbuf + 0 * w;
- float *in_ptr1 = inbuf + 1 * w;
- float *in_ptr2 = inbuf + 2 * w;
- float *in_ptr3 = inbuf + 3 * w;
-
- float *out_ptr0 = outbuf + 0 * outw;
- float *out_ptr1 = outbuf + 1 * outw;
-
- int i;
- for (i = 0; i + 1 < outh; i += 2)
- {
- int nn = (outw >> 2) - 1;
- int remain = outw & 0x03;
-
- if (nn > 0)
- {
- __asm __volatile("pld [%[in_ptr0], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr0]]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
- "add %[in_ptr0], %[in_ptr0], #16\n"
-
- "1:\n"
- "add %[in_ptr0], %[in_ptr0], #16\n"
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q0, %e[weight012][0]\n"
- "vmul.f32 q11, q2, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight012][0]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
- "add %[in_ptr1], %[in_ptr1], #16\n"
-
- "vand q15, %q[qbias0], %q[qbias0]\n"
- "vmla.f32 q10, q0, %e[weight345][0]\n"
- "vmla.f32 q11, q2, %e[weight345][1]\n"
- "vmul.f32 q12, q0, %e[weight012][0]\n"
- "vmul.f32 q13, q2, %e[weight012][1]\n"
-
- "pld [%[in_ptr2], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr2]]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
- "vmla.f32 q15, q3, %f[weight012][0]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
- "add %[in_ptr2], %[in_ptr2], #16\n"
-
- "vmla.f32 q10, q0, %e[weight678][0]\n"
- "vmla.f32 q11, q2, %e[weight678][1]\n"
- "vmla.f32 q12, q0, %e[weight345][0]\n"
- "vmla.f32 q13, q2, %e[weight345][1]\n"
-
- "pld [%[in_ptr3], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr3]]\n"
- "vmla.f32 q14, q3, %f[weight678][0]\n"
- "vmla.f32 q15, q3, %f[weight345][0]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
- "add %[in_ptr3], %[in_ptr3], #16\n"
-
- "vmla.f32 q12, q0, %e[weight678][0]\n"
- "vmla.f32 q13, q2, %e[weight678][1]\n"
-
- "pld [%[in_ptr0], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr0]]\n"
- "vmla.f32 q15, q3, %f[weight678][0]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
- "vadd.f32 q15, q15, q12\n"
- "vadd.f32 q15, q15, q13\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[out_ptr1]]!\n"
-
- "bne 1b\n"
- : [in_ptr0] "+r"(in_ptr0), [in_ptr1] "+r"(in_ptr1),
- [in_ptr2] "+r"(in_ptr2), [in_ptr3] "+r"(in_ptr3),
-
- [out_ptr0] "+r"(out_ptr0), [out_ptr1] "+r"(out_ptr1), [nn] "+r"(nn)
- : [weight012] "w"(weight012), [weight345] "w"(weight345),
- [weight678] "w"(weight678), [qbias0] "w"(qbias0)
- : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14",
- "q15", "cc", "memory");
- }
-
- for (; remain > 0; remain--)
- {
- float32x4_t input0 = vld1q_f32(in_ptr0);
- float32x4_t input1 = vld1q_f32(in_ptr1);
- float32x4_t input2 = vld1q_f32(in_ptr2);
- float32x4_t input3 = vld1q_f32(in_ptr3);
-
- float32x4_t out0 = vmulq_f32(input0, weight012);
- out0 = vmlaq_f32(out0, input1, weight345);
- out0 = vmlaq_f32(out0, input2, weight678);
-
- float32x4_t out1 = vmulq_f32(input1, weight012);
- out1 = vmlaq_f32(out1, input2, weight345);
- out1 = vmlaq_f32(out1, input3, weight678);
-
- out0 = vsetq_lane_f32(bias0, out0, 3);
- out1 = vsetq_lane_f32(bias0, out1, 3);
-
- float32x2_t out00 = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
- float32x2_t out11 = vadd_f32(vget_low_f32(out1), vget_high_f32(out1));
-
- float32x2_t out01 = vpadd_f32(out00, out11);
-
- *out_ptr0 = vget_lane_f32(out01, 0);
- *out_ptr1 = vget_lane_f32(out01, 1);
-
- in_ptr0++;
- in_ptr1++;
- in_ptr2++;
- in_ptr3++;
- out_ptr0++;
- out_ptr1++;
- }
-
- in_ptr0 += w + 2;
- in_ptr1 += w + 2;
- in_ptr2 += w + 2;
- in_ptr3 += w + 2;
-
- out_ptr0 += outw;
- out_ptr1 += outw;
- }
-
- for (; i < outh; i++)
- {
- int nn = outw >> 2;
- int remain = outw & 0x03;
-
- if (nn > 0)
- {
- __asm __volatile("1:\n"
- "vld1.f32 {d0-d2}, [%[in_ptr0]]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
- "add %[in_ptr0], %[in_ptr0], #16\n"
-
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmla.f32 q14, q0, %e[weight012][0]\n"
- "vmla.f32 q14, q2, %e[weight012][1]\n"
- "vmla.f32 q14, q3, %f[weight012][0]\n"
-
- "vld1.f32 {d0-d2}, [%[in_ptr1]]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
- "add %[in_ptr1], %[in_ptr1], #16\n"
-
- "vmla.f32 q14, q0, %e[weight345][0]\n"
- "vmla.f32 q14, q2, %e[weight345][1]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
-
- "vld1.f32 {d0-d2}, [%[in_ptr2]]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
- "add %[in_ptr2], %[in_ptr2], #16\n"
-
- "vmla.f32 q14, q0, %e[weight678][0]\n"
- "vmla.f32 q14, q2, %e[weight678][1]\n"
- "vmla.f32 q14, q3, %f[weight678][0]\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
-
- "bne 1b\n"
- : [in_ptr0] "+r"(in_ptr0), [in_ptr1] "+r"(in_ptr1),
- [in_ptr2] "+r"(in_ptr2), [out_ptr0] "+r"(out_ptr0), [nn] "+r"(nn)
- : [weight012] "w"(weight012), [weight345] "w"(weight345),
- [weight678] "w"(weight678), [qbias0] "w"(qbias0)
- : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14",
- "q15", "cc", "memory");
- }
-
- for (; remain > 0; remain--)
- {
- float32x4_t input0 = vld1q_f32(in_ptr0);
- float32x4_t input1 = vld1q_f32(in_ptr1);
- float32x4_t input2 = vld1q_f32(in_ptr2);
-
- float32x4_t out0 = vmulq_f32(input0, weight012);
- out0 = vmlaq_f32(out0, input1, weight345);
- out0 = vmlaq_f32(out0, input2, weight678);
-
- out0 = vsetq_lane_f32(bias0, out0, 3);
-
- float32x2_t out00 = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
-
- float32x2_t out01 = vpadd_f32(out00, out00);
-
- *out_ptr0 = vget_lane_f32(out01, 0);
-
- in_ptr0++;
- in_ptr1++;
- in_ptr2++;
- out_ptr0++;
- }
-
- in_ptr0 += 2;
- in_ptr1 += 2;
- in_ptr2 += 2;
- }
- }
-#else // __aarch64__
- (void)in_mat;
- (void)out_mat;
- (void)kernel;
- (void)bias;
-#endif // !__aarch64__
-}
-
-static void depthwise_conv3x3S1_padding(const convMat_t &in_mat, convMat_t &out_mat,
- const convMat_t &kernel, const convMat_t &bias)
-{
-#if !__aarch64__
- int w = in_mat.w;
- int h = in_mat.h;
- int outw = out_mat.w;
- int outh = out_mat.h;
- int channels = in_mat.c;
-
-#pragma omp parallel for
- for (int c = 0; c < channels; c++)
- {
- const float *filter = kernel.data + c * 9;
-#ifdef NCNN
- float *inbuf = in_mat.data + c * alignSize(w * h, 16 / sizeof(float));
- float *outbuf = out_mat.data + c * alignSize(outw * outh, 16 / sizeof(float));
-#else // NCNN
- float *inbuf = in_mat.data + c * w * h;
- float *outbuf = out_mat.data + c * outw * outh;
-#endif // NCNN
- float bias0 = bias.data ? bias.data[c] : 0.0f;
-
- register float32x4_t weight012 asm("q4") = vld1q_f32(filter);
- register float32x4_t weight345 asm("q5") = vld1q_f32(filter + 3);
- register float32x4_t weight678 asm("q6") = vld1q_f32(filter + 6);
- register float32x4_t qbias0 asm("q7") = vdupq_n_f32(bias0);
-
- float *in_ptr0 = inbuf + 0 * w;
- float *in_ptr1 = inbuf + 1 * w;
- float *in_ptr2 = inbuf + 2 * w;
- float *in_ptr3 = inbuf + 3 * w;
-
- float *out_ptr0 = outbuf + 0 * outw;
- float *out_ptr1 = outbuf + 1 * outw;
-
- int i;
- for (i = 0; i + 1 < outh; i += 2)
- {
- int nn = (outw >> 2) - 1;
- int remain = (outw & 0x03) + 4;
- if (i == 0)
- {
- if (nn > 0)
- {
- __asm __volatile("vmov.i32 q8, #0\n"
- "pld [%[in_ptr0], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr0]]\n"
- "vext.32 q2, q8, q0, #3\n"
- "vext.32 q3, q0, q1, #1\n"
- "add %[in_ptr0], %[in_ptr0], #12\n"
-
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vand q15, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q2, %e[weight345][0]\n"
- "vmul.f32 q11, q0, %e[weight345][1]\n"
- "vmul.f32 q12, q2, %e[weight012][0]\n"
- "vmul.f32 q13, q0, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
- "vmla.f32 q15, q3, %f[weight012][0]\n"
- "vext.32 q2, q8, q0, #3\n"
- "vext.32 q3, q0, q1, #1\n"
- "add %[in_ptr1], %[in_ptr1], #12\n"
-
- "vmla.f32 q10, q2, %e[weight678][0]\n"
- "vmla.f32 q11, q0, %e[weight678][1]\n"
- "vmla.f32 q12, q2, %e[weight345][0]\n"
- "vmla.f32 q13, q0, %e[weight345][1]\n"
-
- "pld [%[in_ptr2], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr2]]\n"
- "vmla.f32 q14, q3, %f[weight678][0]\n"
- "vmla.f32 q15, q3, %f[weight345][0]\n"
- "vext.32 q2, q8, q0, #3\n"
- "vext.32 q3, q0, q1, #1\n"
- "add %[in_ptr2], %[in_ptr2], #12\n"
-
- "vmla.f32 q12, q2, %e[weight678][0]\n"
- "vmla.f32 q13, q0, %e[weight678][1]\n"
- "vmla.f32 q15, q3, %f[weight678][0]\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
- "vadd.f32 q15, q15, q12\n"
- "vadd.f32 q15, q15, q13\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[out_ptr1]]!\n"
- "beq 2f\n"
-
- "pld [%[in_ptr0], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr0]]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
-
- "1:\n"
- "add %[in_ptr0], %[in_ptr0], #16\n"
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vand q15, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q0, %e[weight345][0]\n"
- "vmul.f32 q11, q2, %e[weight345][1]\n"
- "vmul.f32 q12, q0, %e[weight012][0]\n"
- "vmul.f32 q13, q2, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
- "vmla.f32 q15, q3, %f[weight012][0]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
- "add %[in_ptr1], %[in_ptr1], #16\n"
-
- "vmla.f32 q10, q0, %e[weight678][0]\n"
- "vmla.f32 q11, q2, %e[weight678][1]\n"
- "vmla.f32 q12, q0, %e[weight345][0]\n"
- "vmla.f32 q13, q2, %e[weight345][1]\n"
-
- "pld [%[in_ptr2], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr2]]\n"
- "vmla.f32 q14, q3, %f[weight678][0]\n"
- "vmla.f32 q15, q3, %f[weight345][0]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
- "add %[in_ptr2], %[in_ptr2], #16\n"
-
- "vmla.f32 q12, q0, %e[weight678][0]\n"
- "vmla.f32 q13, q2, %e[weight678][1]\n"
-
- "pld [%[in_ptr0], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr0]]\n"
- "vmla.f32 q15, q3, %f[weight678][0]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
- "vadd.f32 q15, q15, q12\n"
- "vadd.f32 q15, q15, q13\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[out_ptr1]]!\n"
- "bne 1b\n"
- "2:\n"
- : [in_ptr0] "+r"(in_ptr0), [in_ptr1] "+r"(in_ptr1),
- [in_ptr2] "+r"(in_ptr2), [out_ptr0] "+r"(out_ptr0),
- [out_ptr1] "+r"(out_ptr1), [nn] "+r"(nn)
- : [weight012] "w"(weight012), [weight345] "w"(weight345),
- [weight678] "w"(weight678), [qbias0] "w"(qbias0)
- : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14",
- "q15", "cc", "memory");
- }
-
- for (; remain > 0; remain--)
- {
- // TODO: when nn == 0, pad_left comes here.
- float32x4_t input0 = vld1q_f32(in_ptr0);
- float32x4_t input1 = vld1q_f32(in_ptr1);
- float32x4_t input2 = vld1q_f32(in_ptr2);
-
- if (remain == 1)
- {
- input0 = vsetq_lane_f32(0.0f, input0, 2);
- input1 = vsetq_lane_f32(0.0f, input1, 2);
- input2 = vsetq_lane_f32(0.0f, input2, 2);
- }
-
- float32x4_t out0 = vmulq_f32(input0, weight345);
- out0 = vmlaq_f32(out0, input1, weight678);
-
- float32x4_t out1 = vmulq_f32(input0, weight012);
- out1 = vmlaq_f32(out1, input1, weight345);
- out1 = vmlaq_f32(out1, input2, weight678);
-
- out0 = vsetq_lane_f32(bias0, out0, 3);
- out1 = vsetq_lane_f32(bias0, out1, 3);
-
- float32x2_t out00 = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
- float32x2_t out11 = vadd_f32(vget_low_f32(out1), vget_high_f32(out1));
-
- float32x2_t out01 = vpadd_f32(out00, out11);
-
- *out_ptr0 = vget_lane_f32(out01, 0);
- *out_ptr1 = vget_lane_f32(out01, 1);
-
- in_ptr0++;
- in_ptr1++;
- in_ptr2++;
- out_ptr0++;
- out_ptr1++;
- }
-
- in_ptr0 += 1;
- in_ptr1 += 1;
- in_ptr2 += 1;
- in_ptr3 += w;
- }
- else if (i == outh - 2)
- {
- if (nn > 0)
- {
- __asm __volatile("vmov.i32 q8, #0\n"
- "pld [%[in_ptr0], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr0]]\n"
- "vext.32 q2, q8, q0, #3\n"
- "vext.32 q3, q0, q1, #1\n"
- "add %[in_ptr0], %[in_ptr0], #12\n"
-
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q2, %e[weight012][0]\n"
- "vmul.f32 q11, q0, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight012][0]\n"
- "vext.32 q2, q8, q0, #3\n"
- "vext.32 q3, q0, q1, #1\n"
- "add %[in_ptr1], %[in_ptr1], #12\n"
-
- "vand q15, %q[qbias0], %q[qbias0]\n"
- "vmla.f32 q10, q2, %e[weight345][0]\n"
- "vmla.f32 q11, q0, %e[weight345][1]\n"
- "vmul.f32 q12, q2, %e[weight012][0]\n"
- "vmul.f32 q13, q0, %e[weight012][1]\n"
-
- "pld [%[in_ptr2], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr2]]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
- "vmla.f32 q15, q3, %f[weight012][0]\n"
- "vext.32 q2, q8, q0, #3\n"
- "vext.32 q3, q0, q1, #1\n"
- "add %[in_ptr2], %[in_ptr2], #12\n"
-
- "vmla.f32 q10, q2, %e[weight678][0]\n"
- "vmla.f32 q11, q0, %e[weight678][1]\n"
- "vmla.f32 q12, q2, %e[weight345][0]\n"
- "vmla.f32 q13, q0, %e[weight345][1]\n"
-
- "vmla.f32 q14, q3, %f[weight678][0]\n"
- "vmla.f32 q15, q3, %f[weight345][0]\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
- "vadd.f32 q15, q15, q12\n"
- "vadd.f32 q15, q15, q13\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[out_ptr1]]!\n"
- "beq 2f\n"
-
- "pld [%[in_ptr0], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr0]]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
-
- "1:\n"
- "add %[in_ptr0], %[in_ptr0], #16\n"
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q0, %e[weight012][0]\n"
- "vmul.f32 q11, q2, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight012][0]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
- "add %[in_ptr1], %[in_ptr1], #16\n"
-
- "vand q15, %q[qbias0], %q[qbias0]\n"
- "vmla.f32 q10, q0, %e[weight345][0]\n"
- "vmla.f32 q11, q2, %e[weight345][1]\n"
- "vmul.f32 q12, q0, %e[weight012][0]\n"
- "vmul.f32 q13, q2, %e[weight012][1]\n"
-
- "pld [%[in_ptr2], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr2]]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
- "vmla.f32 q15, q3, %f[weight012][0]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
- "add %[in_ptr2], %[in_ptr2], #16\n"
-
- "vmla.f32 q10, q0, %e[weight678][0]\n"
- "vmla.f32 q11, q2, %e[weight678][1]\n"
- "vmla.f32 q12, q0, %e[weight345][0]\n"
- "vmla.f32 q13, q2, %e[weight345][1]\n"
-
- "pld [%[in_ptr0], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr0]]\n"
- "vmla.f32 q14, q3, %f[weight678][0]\n"
- "vmla.f32 q15, q3, %f[weight345][0]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
- "vadd.f32 q15, q15, q12\n"
- "vadd.f32 q15, q15, q13\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[out_ptr1]]!\n"
- "bne 1b\n"
- "2:\n"
- : [in_ptr0] "+r"(in_ptr0), [in_ptr1] "+r"(in_ptr1),
- [in_ptr2] "+r"(in_ptr2), [out_ptr0] "+r"(out_ptr0),
- [out_ptr1] "+r"(out_ptr1), [nn] "+r"(nn)
- : [weight012] "w"(weight012), [weight345] "w"(weight345),
- [weight678] "w"(weight678), [qbias0] "w"(qbias0)
- : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14",
- "q15", "cc", "memory");
- }
- for (; remain > 0; remain--)
- {
- // TODO: when nn == 0, pad_left comes here.
- float32x4_t input0 = vld1q_f32(in_ptr0);
- float32x4_t input1 = vld1q_f32(in_ptr1);
- float32x4_t input2 = vld1q_f32(in_ptr2);
-
- if (remain == 1)
- {
- input0 = vsetq_lane_f32(0.0f, input0, 2);
- input1 = vsetq_lane_f32(0.0f, input1, 2);
- input2 = vsetq_lane_f32(0.0f, input2, 2);
- }
-
- float32x4_t out0 = vmulq_f32(input0, weight012);
- out0 = vmlaq_f32(out0, input1, weight345);
- out0 = vmlaq_f32(out0, input2, weight678);
-
- float32x4_t out1 = vmulq_f32(input1, weight012);
- out1 = vmlaq_f32(out1, input2, weight345);
-
- out0 = vsetq_lane_f32(bias0, out0, 3);
- out1 = vsetq_lane_f32(bias0, out1, 3);
-
- float32x2_t out00 = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
- float32x2_t out11 = vadd_f32(vget_low_f32(out1), vget_high_f32(out1));
-
- float32x2_t out01 = vpadd_f32(out00, out11);
-
- *out_ptr0 = vget_lane_f32(out01, 0);
- *out_ptr1 = vget_lane_f32(out01, 1);
-
- in_ptr0++;
- in_ptr1++;
- in_ptr2++;
- out_ptr0++;
- out_ptr1++;
- }
- }
- else
- {
- if (nn > 0)
- {
- __asm __volatile("vmov.i32 q8, #0\n"
- "pld [%[in_ptr0], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr0]]\n"
- "vext.32 q2, q8, q0, #3\n"
- "vext.32 q3, q0, q1, #1\n"
- "add %[in_ptr0], %[in_ptr0], #12\n"
-
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q2, %e[weight012][0]\n"
- "vmul.f32 q11, q0, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight012][0]\n"
- "vext.32 q2, q8, q0, #3\n"
- "vext.32 q3, q0, q1, #1\n"
- "add %[in_ptr1], %[in_ptr1], #12\n"
-
- "vand q15, %q[qbias0], %q[qbias0]\n"
- "vmla.f32 q10, q2, %e[weight345][0]\n"
- "vmla.f32 q11, q0, %e[weight345][1]\n"
- "vmul.f32 q12, q2, %e[weight012][0]\n"
- "vmul.f32 q13, q0, %e[weight012][1]\n"
-
- "pld [%[in_ptr2], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr2]]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
- "vmla.f32 q15, q3, %f[weight012][0]\n"
- "vext.32 q2, q8, q0, #3\n"
- "vext.32 q3, q0, q1, #1\n"
- "add %[in_ptr2], %[in_ptr2], #12\n"
-
- "vmla.f32 q10, q2, %e[weight678][0]\n"
- "vmla.f32 q11, q0, %e[weight678][1]\n"
- "vmla.f32 q12, q2, %e[weight345][0]\n"
- "vmla.f32 q13, q0, %e[weight345][1]\n"
-
- "pld [%[in_ptr3], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr3]]\n"
- "vmla.f32 q14, q3, %f[weight678][0]\n"
- "vmla.f32 q15, q3, %f[weight345][0]\n"
- "vext.32 q2, q8, q0, #3\n"
- "vext.32 q3, q0, q1, #1\n"
- "add %[in_ptr3], %[in_ptr3], #12\n"
-
- "vmla.f32 q15, q2, %e[weight678][0]\n"
- "vmla.f32 q15, q0, %e[weight678][1]\n"
- "vmla.f32 q15, q3, %f[weight678][0]\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
- "vadd.f32 q15, q15, q12\n"
- "vadd.f32 q15, q15, q13\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[out_ptr1]]!\n"
- "beq 2f\n"
-
- "pld [%[in_ptr0], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr0]]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
-
- "1:\n"
- "add %[in_ptr0], %[in_ptr0], #16\n"
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q0, %e[weight012][0]\n"
- "vmul.f32 q11, q2, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight012][0]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
- "add %[in_ptr1], %[in_ptr1], #16\n"
-
- "vand q15, %q[qbias0], %q[qbias0]\n"
- "vmla.f32 q10, q0, %e[weight345][0]\n"
- "vmla.f32 q11, q2, %e[weight345][1]\n"
- "vmul.f32 q12, q0, %e[weight012][0]\n"
- "vmul.f32 q13, q2, %e[weight012][1]\n"
-
- "pld [%[in_ptr2], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr2]]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
- "vmla.f32 q15, q3, %f[weight012][0]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
- "add %[in_ptr2], %[in_ptr2], #16\n"
-
- "vmla.f32 q10, q0, %e[weight678][0]\n"
- "vmla.f32 q11, q2, %e[weight678][1]\n"
- "vmla.f32 q12, q0, %e[weight345][0]\n"
- "vmla.f32 q13, q2, %e[weight345][1]\n"
-
- "pld [%[in_ptr3], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr3]]\n"
- "vmla.f32 q14, q3, %f[weight678][0]\n"
- "vmla.f32 q15, q3, %f[weight345][0]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
- "add %[in_ptr3], %[in_ptr3], #16\n"
-
- "vmla.f32 q15, q0, %e[weight678][0]\n"
- "vmla.f32 q15, q2, %e[weight678][1]\n"
-
- "pld [%[in_ptr0], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr0]]\n"
- "vmla.f32 q15, q3, %f[weight678][0]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q15, q15, q12\n"
- "vadd.f32 q14, q14, q11\n"
- "vadd.f32 q15, q15, q13\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[out_ptr1]]!\n"
- "bne 1b\n"
- "2:\n"
- : [in_ptr0] "+r"(in_ptr0), [in_ptr1] "+r"(in_ptr1),
- [in_ptr2] "+r"(in_ptr2), [in_ptr3] "+r"(in_ptr3),
-
- [out_ptr0] "+r"(out_ptr0), [out_ptr1] "+r"(out_ptr1), [nn] "+r"(nn)
- : [weight012] "w"(weight012), [weight345] "w"(weight345),
- [weight678] "w"(weight678), [qbias0] "w"(qbias0)
- : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14",
- "q15", "cc", "memory");
- }
- for (; remain > 0; remain--)
- {
- // TODO: when nn == 0, pad_left comes here.
- float32x4_t input0 = vld1q_f32(in_ptr0);
- float32x4_t input1 = vld1q_f32(in_ptr1);
- float32x4_t input2 = vld1q_f32(in_ptr2);
- float32x4_t input3 = vld1q_f32(in_ptr3);
-
- if (remain == 1)
- {
- input0 = vsetq_lane_f32(0.0f, input0, 2);
- input1 = vsetq_lane_f32(0.0f, input1, 2);
- input2 = vsetq_lane_f32(0.0f, input2, 2);
- input3 = vsetq_lane_f32(0.0f, input3, 2);
- }
-
- float32x4_t out0 = vmulq_f32(input0, weight012);
- out0 = vmlaq_f32(out0, input1, weight345);
- out0 = vmlaq_f32(out0, input2, weight678);
-
- float32x4_t out1 = vmulq_f32(input1, weight012);
- out1 = vmlaq_f32(out1, input2, weight345);
- out1 = vmlaq_f32(out1, input3, weight678);
-
- out0 = vsetq_lane_f32(bias0, out0, 3);
- out1 = vsetq_lane_f32(bias0, out1, 3);
-
- float32x2_t out00 = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
- float32x2_t out11 = vadd_f32(vget_low_f32(out1), vget_high_f32(out1));
-
- float32x2_t out01 = vpadd_f32(out00, out11);
-
- *out_ptr0 = vget_lane_f32(out01, 0);
- *out_ptr1 = vget_lane_f32(out01, 1);
-
- in_ptr0++;
- in_ptr1++;
- in_ptr2++;
- in_ptr3++;
- out_ptr0++;
- out_ptr1++;
- }
- in_ptr0 += w + 1;
- in_ptr1 += w + 1;
- in_ptr2 += w + 1;
- in_ptr3 += w + 1;
- }
-
- out_ptr0 += outw;
- out_ptr1 += outw;
- }
-
- for (; i < outh; i++)
- {
- // TODO:if i == 0, pad_top comes here.
- int nn = (outw >> 2) - 1;
- int remain = (outw & 0x03) + 4;
-
- if (nn > 0)
- {
- __asm __volatile("vmov.i32 q8, #0\n"
- "pld [%[in_ptr0], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr0]]\n"
- "vext.32 q2, q8, q0, #3\n"
- "vext.32 q3, q0, q1, #1\n"
- "add %[in_ptr0], %[in_ptr0], #12\n"
-
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q2, %e[weight012][0]\n"
- "vmul.f32 q11, q0, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight012][0]\n"
- "vext.32 q2, q8, q0, #3\n"
- "vext.32 q3, q0, q1, #1\n"
- "add %[in_ptr1], %[in_ptr1], #12\n"
-
- "vmla.f32 q10, q2, %e[weight345][0]\n"
- "vmla.f32 q11, q0, %e[weight345][1]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "beq 2f\n"
-
- "pld [%[in_ptr0], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr0]]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
-
- "1:\n"
- "add %[in_ptr0], %[in_ptr0], #16\n"
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q0, %e[weight012][0]\n"
- "vmul.f32 q11, q2, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight012][0]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
- "add %[in_ptr1], %[in_ptr1], #16\n"
-
- "vmla.f32 q10, q0, %e[weight345][0]\n"
- "vmla.f32 q11, q2, %e[weight345][1]\n"
-
- "pld [%[in_ptr0], #192]\n"
- "vld1.f32 {d0-d2}, [%[in_ptr0]]\n"
- "vmla.f32 q14, q3, %f[weight678][0]\n"
- "vext.32 q2, q0, q1, #1\n"
- "vext.32 q3, q0, q1, #2\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "bne 1b\n"
- "2:\n"
- : [in_ptr0] "+r"(in_ptr0), [in_ptr1] "+r"(in_ptr1),
- [out_ptr0] "+r"(out_ptr0), [nn] "+r"(nn)
- : [weight012] "w"(weight012), [weight345] "w"(weight345),
- [weight678] "w"(weight678), [qbias0] "w"(qbias0)
- : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14",
- "q15", "cc", "memory");
- }
- for (; remain > 0; remain--)
- {
- // TODO: when nn == 0, pad_left comes here.
- float32x4_t input0 = vld1q_f32(in_ptr0);
- float32x4_t input1 = vld1q_f32(in_ptr1);
-
- if (remain == 1)
- {
- input0 = vsetq_lane_f32(0.0f, input0, 2);
- input1 = vsetq_lane_f32(0.0f, input1, 2);
- }
-
- float32x4_t out0 = vmulq_f32(input0, weight012);
- out0 = vmlaq_f32(out0, input1, weight345);
-
- out0 = vsetq_lane_f32(bias0, out0, 3);
-
- float32x2_t out00 = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
-
- float32x2_t out01 = vpadd_f32(out00, out00);
-
- *out_ptr0 = vget_lane_f32(out01, 0);
-
- in_ptr0++;
- in_ptr1++;
- out_ptr0++;
- out_ptr1++;
- }
- }
- }
-#else // __aarch64__
- (void)in_mat;
- (void)out_mat;
- (void)kernel;
- (void)bias;
-#endif // __aarch64__
-}
-
-static void depthwise_conv3x3S2_nopad(const convMat_t &in_mat, convMat_t &out_mat,
- const convMat_t &kernel, const convMat_t &bias)
-{
-#if !__aarch64__
- int w = in_mat.w;
- int h = in_mat.h;
- int outw = out_mat.w;
- int outh = out_mat.h;
- int channels = in_mat.c;
-
- const int tailstep = w - 2 * outw + w;
-
-#pragma omp parallel for
- for (int c = 0; c < channels; c++)
- {
- const float *filter = kernel.data + c * 9;
-#ifdef NCNN
- float *inbuf = in_mat.data + c * alignSize(w * h, 16 / sizeof(float));
- float *outbuf = out_mat.data + c * alignSize(outw * outh, 16 / sizeof(float));
-#else // NCNN
- float *inbuf = in_mat.data + c * w * h;
- float *outbuf = out_mat.data + c * outw * outh;
-#endif // NCNN
- float bias0 = bias.data ? bias.data[c] : 0.0f;
-
- register float32x4_t weight012 asm("q4") = vld1q_f32(filter);
- register float32x4_t weight345 asm("q5") = vld1q_f32(filter + 3);
- register float32x4_t weight678 asm("q6") = vld1q_f32(filter + 6);
- register float32x4_t qbias0 asm("q7") = vdupq_n_f32(bias0);
-
- float *in_ptr0 = inbuf + 0 * w;
- float *in_ptr1 = inbuf + 1 * w;
- float *in_ptr2 = inbuf + 2 * w;
-
- float *out_ptr0 = outbuf + 0 * outw;
-
- int i;
- for (i = 0; i < outh; i++)
- {
- int nn = outw >> 2;
- int remain = outw & 0x03;
-
- if (nn > 0)
- {
- __asm __volatile("pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "1:\n"
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q0, %e[weight012][0]\n"
- "vmul.f32 q11, q1, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr1]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight012][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vmla.f32 q10, q0, %e[weight345][0]\n"
- "vmla.f32 q11, q1, %e[weight345][1]\n"
-
- "pld [%[in_ptr2], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr2]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr2]]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vmla.f32 q10, q0, %e[weight678][0]\n"
- "vmla.f32 q11, q1, %e[weight678][1]\n"
-
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vmla.f32 q14, q3, %f[weight678][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "bne 1b\n"
- "sub %[in_ptr0], %[in_ptr0], #32\n"
- : [in_ptr0] "+r"(in_ptr0), [in_ptr1] "+r"(in_ptr1),
- [in_ptr2] "+r"(in_ptr2), [out_ptr0] "+r"(out_ptr0), [nn] "+r"(nn)
- : [weight012] "w"(weight012), [weight345] "w"(weight345),
- [weight678] "w"(weight678), [qbias0] "w"(qbias0)
- : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14",
- "q15", "cc", "memory");
- }
-
- for (; remain > 0; remain--)
- {
- float32x4_t input0 = vld1q_f32(in_ptr0);
- float32x4_t input1 = vld1q_f32(in_ptr1);
- float32x4_t input2 = vld1q_f32(in_ptr2);
-
- float32x4_t out0 = vmulq_f32(input0, weight012);
- out0 = vmlaq_f32(out0, input1, weight345);
- out0 = vmlaq_f32(out0, input2, weight678);
-
- out0 = vsetq_lane_f32(bias0, out0, 3);
-
- float32x2_t out00 = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
-
- float32x2_t out01 = vpadd_f32(out00, out00);
-
- *out_ptr0 = vget_lane_f32(out01, 0);
-
- in_ptr0 += 2;
- in_ptr1 += 2;
- in_ptr2 += 2;
- out_ptr0++;
- }
-
- in_ptr0 += tailstep;
- in_ptr1 += tailstep;
- in_ptr2 += tailstep;
- }
- }
-
-#else // __aarch64__
- (void)in_mat;
- (void)out_mat;
- (void)kernel;
- (void)bias;
-#endif // __aarch64__
-}
-
-static void depthwise_conv3x3S2_padding00(const convMat_t &in_mat, convMat_t &out_mat,
- const convMat_t &kernel, const convMat_t &bias)
-{
-#if !__aarch64__
- int w = in_mat.w;
- int h = in_mat.h;
- int outw = out_mat.w;
- int outh = out_mat.h;
- int channels = in_mat.c;
-
-#pragma omp parallel for
- for (int c = 0; c < channels; c++)
- {
- const float *filter = kernel.data + c * 9;
-#ifdef NCNN
- float *inbuf = in_mat.data + c * alignSize(w * h, 16 / sizeof(float));
- float *outbuf = out_mat.data + c * alignSize(outw * outh, 16 / sizeof(float));
-#else // NCNN
- float *inbuf = in_mat.data + c * w * h;
- float *outbuf = out_mat.data + c * outw * outh;
-#endif // NCNN
- float bias0 = bias.data ? bias.data[c] : 0.0f;
-
- register float32x4_t weight012 asm("q4") = vld1q_f32(filter);
- register float32x4_t weight345 asm("q5") = vld1q_f32(filter + 3);
- register float32x4_t weight678 asm("q6") = vld1q_f32(filter + 6);
- register float32x4_t qbias0 asm("q7") = vdupq_n_f32(bias0);
-
- float *in_ptr0 = inbuf + 0 * w;
- float *in_ptr1 = inbuf + 1 * w;
- float *in_ptr2 = inbuf + 2 * w;
-
- float *out_ptr0 = outbuf + 0 * outw;
-
- int i;
- for (i = 0; i < outh; i++)
- {
- int nn = (outw >> 2) - 1;
- int remain = (outw & 0x03) + 4;
-
- if (i == outh - 1)
- {
- if (nn > 0)
- {
- __asm __volatile("pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "1:\n"
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q0, %e[weight012][0]\n"
- "vmul.f32 q11, q1, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr1]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight012][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vmla.f32 q10, q0, %e[weight345][0]\n"
- "vmla.f32 q11, q1, %e[weight345][1]\n"
-
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "bne 1b\n"
- "sub %[in_ptr0], %[in_ptr0], #32\n"
- : [in_ptr0] "+r"(in_ptr0), [in_ptr1] "+r"(in_ptr1),
- [out_ptr0] "+r"(out_ptr0), [nn] "+r"(nn)
- : [weight012] "w"(weight012), [weight345] "w"(weight345),
- [weight678] "w"(weight678), [qbias0] "w"(qbias0)
- : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14",
- "q15", "cc", "memory");
- }
- for (; remain > 0; remain--)
- {
- float32x4_t input0 = vld1q_f32(in_ptr0);
- float32x4_t input1 = vld1q_f32(in_ptr1);
-
- if (remain == 1)
- {
- input0 = vsetq_lane_f32(0.0f, input0, 2);
- input1 = vsetq_lane_f32(0.0f, input1, 2);
- }
-
- float32x4_t out0 = vmulq_f32(input0, weight012);
- out0 = vmlaq_f32(out0, input1, weight345);
-
- out0 = vsetq_lane_f32(bias0, out0, 3);
-
- float32x2_t out00 = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
-
- float32x2_t out01 = vpadd_f32(out00, out00);
-
- *out_ptr0 = vget_lane_f32(out01, 0);
-
- in_ptr0 += 2;
- in_ptr1 += 2;
- out_ptr0++;
- }
- }
- else
- {
- if (nn > 0)
- {
- __asm __volatile("pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "1:\n"
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q0, %e[weight012][0]\n"
- "vmul.f32 q11, q1, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr1]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight012][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vmla.f32 q10, q0, %e[weight345][0]\n"
- "vmla.f32 q11, q1, %e[weight345][1]\n"
-
- "pld [%[in_ptr2], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr2]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr2]]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vmla.f32 q10, q0, %e[weight678][0]\n"
- "vmla.f32 q11, q1, %e[weight678][1]\n"
-
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vmla.f32 q14, q3, %f[weight678][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "bne 1b\n"
- "sub %[in_ptr0], %[in_ptr0], #32\n"
- : [in_ptr0] "+r"(in_ptr0), [in_ptr1] "+r"(in_ptr1),
- [in_ptr2] "+r"(in_ptr2), [out_ptr0] "+r"(out_ptr0), [nn] "+r"(nn)
- : [weight012] "w"(weight012), [weight345] "w"(weight345),
- [weight678] "w"(weight678), [qbias0] "w"(qbias0)
- : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14",
- "q15", "cc", "memory");
- }
- for (; remain > 0; remain--)
- {
- float32x4_t input0 = vld1q_f32(in_ptr0);
- float32x4_t input1 = vld1q_f32(in_ptr1);
- float32x4_t input2 = vld1q_f32(in_ptr2);
-
- if (remain == 1)
- {
- input0 = vsetq_lane_f32(0.0f, input0, 2);
- input1 = vsetq_lane_f32(0.0f, input1, 2);
- input2 = vsetq_lane_f32(0.0f, input2, 2);
- }
-
- float32x4_t out0 = vmulq_f32(input0, weight012);
- out0 = vmlaq_f32(out0, input1, weight345);
- out0 = vmlaq_f32(out0, input2, weight678);
-
- out0 = vsetq_lane_f32(bias0, out0, 3);
-
- float32x2_t out00 = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
-
- float32x2_t out01 = vpadd_f32(out00, out00);
-
- *out_ptr0 = vget_lane_f32(out01, 0);
-
- in_ptr0 += 2;
- in_ptr1 += 2;
- in_ptr2 += 2;
- out_ptr0++;
- }
-
- in_ptr0 += w;
- in_ptr1 += w;
- in_ptr2 += w;
- }
- }
- }
-#else // __aarch64__
- (void)in_mat;
- (void)out_mat;
- (void)kernel;
- (void)bias;
-#endif // !__aarch64__
-}
-
-static void depthwise_conv3x3S2_padding01(const convMat_t &in_mat, convMat_t &out_mat,
- const convMat_t &kernel, const convMat_t &bias)
-{
-#if !__aarch64__
- int w = in_mat.w;
- int h = in_mat.h;
- int outw = out_mat.w;
- int outh = out_mat.h;
- int channels = in_mat.c;
-
-#pragma omp parallel for
- for (int c = 0; c < channels; c++)
- {
- const float *filter = kernel.data + c * 9;
-#ifdef NCNN
- float *inbuf = in_mat.data + c * alignSize(w * h, 16 / sizeof(float));
- float *outbuf = out_mat.data + c * alignSize(outw * outh, 16 / sizeof(float));
-#else // NCNN
- float *inbuf = in_mat.data + c * w * h;
- float *outbuf = out_mat.data + c * outw * outh;
-#endif // NCNN
- float bias0 = bias.data ? bias.data[c] : 0.0f;
-
- register float32x4_t weight012 asm("q4") = vld1q_f32(filter);
- register float32x4_t weight345 asm("q5") = vld1q_f32(filter + 3);
- register float32x4_t weight678 asm("q6") = vld1q_f32(filter + 6);
- register float32x4_t qbias0 asm("q7") = vdupq_n_f32(bias0);
-
- float *in_ptr0 = inbuf + 0 * w;
- float *in_ptr1 = inbuf + 1 * w;
- float *in_ptr2 = inbuf + 2 * w;
-
- float *out_ptr0 = outbuf + 0 * outw;
-
- int i;
- for (i = 0; i < outh; i++)
- {
- int nn = (outw >> 2) - 1;
- int remain = (outw & 0x03) + 4;
-
- if (i == outh - 1)
- {
- if (nn > 0)
- {
- __asm __volatile("vmov.i32 q2, #0\n"
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]\n"
- "vext.32 q3, q2, q0, #3\n"
- "add %[in_ptr0], %[in_ptr0], #28\n"
-
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q3, %e[weight012][0]\n"
- "vmul.f32 q11, q0, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q1, %f[weight012][0]\n"
- "vext.32 q3, q2, q0, #3\n"
- "add %[in_ptr1], %[in_ptr1], #28\n"
-
- "vmla.f32 q10, q3, %e[weight345][0]\n"
- "vmla.f32 q11, q0, %e[weight345][1]\n"
- "vmla.f32 q14, q1, %f[weight345][0]\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "beq 2f\n"
-
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "1:\n"
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q0, %e[weight012][0]\n"
- "vmul.f32 q11, q1, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr1]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight012][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vmla.f32 q10, q0, %e[weight345][0]\n"
- "vmla.f32 q11, q1, %e[weight345][1]\n"
-
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "bne 1b\n"
- "sub %[in_ptr0], %[in_ptr0], #32\n"
-
- "2:\n"
- : [in_ptr0] "+r"(in_ptr0), [in_ptr1] "+r"(in_ptr1),
- [out_ptr0] "+r"(out_ptr0), [nn] "+r"(nn)
- : [weight012] "w"(weight012), [weight345] "w"(weight345),
- [weight678] "w"(weight678), [qbias0] "w"(qbias0)
- : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14",
- "q15", "cc", "memory");
- }
- for (; remain > 0; remain--)
- {
- // TODO: if nn == 0, pad_left comes here.
- float32x4_t input0 = vld1q_f32(in_ptr0);
- float32x4_t input1 = vld1q_f32(in_ptr1);
-
- if (remain == 1)
- {
- input0 = vsetq_lane_f32(0.0f, input0, 2);
- input1 = vsetq_lane_f32(0.0f, input1, 2);
- }
-
- float32x4_t out0 = vmulq_f32(input0, weight012);
- out0 = vmlaq_f32(out0, input1, weight345);
-
- out0 = vsetq_lane_f32(bias0, out0, 3);
-
- float32x2_t out00 = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
-
- float32x2_t out01 = vpadd_f32(out00, out00);
-
- *out_ptr0 = vget_lane_f32(out01, 0);
-
- in_ptr0 += 2;
- in_ptr1 += 2;
- out_ptr0++;
- }
- }
- else
- {
- if (nn > 0)
- {
- __asm __volatile("vmov.i32 q2, #0\n"
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]\n"
- "vext.32 q3, q2, q0, #3\n"
- "add %[in_ptr0], %[in_ptr0], #28\n"
-
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q3, %e[weight012][0]\n"
- "vmul.f32 q11, q0, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q1, %f[weight012][0]\n"
- "vext.32 q3, q2, q0, #3\n"
- "add %[in_ptr1], %[in_ptr1], #28\n"
-
- "vmla.f32 q10, q3, %e[weight345][0]\n"
- "vmla.f32 q11, q0, %e[weight345][1]\n"
-
- "pld [%[in_ptr2], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr2]]\n"
- "vmla.f32 q14, q1, %f[weight345][0]\n"
- "vext.32 q3, q2, q0, #3\n"
- "add %[in_ptr2], %[in_ptr2], #28\n"
-
- "vmla.f32 q10, q3, %e[weight678][0]\n"
- "vmla.f32 q11, q0, %e[weight678][1]\n"
- "vmla.f32 q14, q1, %f[weight678][0]\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "beq 2f\n"
-
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "1:\n"
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q0, %e[weight012][0]\n"
- "vmul.f32 q11, q1, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr1]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight012][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vmla.f32 q10, q0, %e[weight345][0]\n"
- "vmla.f32 q11, q1, %e[weight345][1]\n"
-
- "pld [%[in_ptr2], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr2]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr2]]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vmla.f32 q10, q0, %e[weight678][0]\n"
- "vmla.f32 q11, q1, %e[weight678][1]\n"
-
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vmla.f32 q14, q3, %f[weight678][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "bne 1b\n"
- "sub %[in_ptr0], %[in_ptr0], #32\n"
- "2:\n"
- : [in_ptr0] "+r"(in_ptr0), [in_ptr1] "+r"(in_ptr1),
- [in_ptr2] "+r"(in_ptr2), [out_ptr0] "+r"(out_ptr0), [nn] "+r"(nn)
- : [weight012] "w"(weight012), [weight345] "w"(weight345),
- [weight678] "w"(weight678), [qbias0] "w"(qbias0)
- : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14",
- "q15", "cc", "memory");
- }
- for (; remain > 0; remain--)
- {
- // TODO: if nn == 0, pad_left comes here.
- float32x4_t input0 = vld1q_f32(in_ptr0);
- float32x4_t input1 = vld1q_f32(in_ptr1);
- float32x4_t input2 = vld1q_f32(in_ptr2);
-
- if (remain == 1)
- {
- input0 = vsetq_lane_f32(0.0f, input0, 2);
- input1 = vsetq_lane_f32(0.0f, input1, 2);
- input2 = vsetq_lane_f32(0.0f, input2, 2);
- }
-
- float32x4_t out0 = vmulq_f32(input0, weight012);
- out0 = vmlaq_f32(out0, input1, weight345);
- out0 = vmlaq_f32(out0, input2, weight678);
-
- out0 = vsetq_lane_f32(bias0, out0, 3);
-
- float32x2_t out00 = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
-
- float32x2_t out01 = vpadd_f32(out00, out00);
-
- *out_ptr0 = vget_lane_f32(out01, 0);
-
- in_ptr0 += 2;
- in_ptr1 += 2;
- in_ptr2 += 2;
- out_ptr0++;
- }
-
- in_ptr0 += w;
- in_ptr1 += w;
- in_ptr2 += w;
- }
- }
- }
-
-#else // __aarch64__
- (void)in_mat;
- (void)out_mat;
- (void)kernel;
- (void)bias;
-#endif // __aarch64__
-}
-
-static void depthwise_conv3x3S2_padding10(const convMat_t &in_mat, convMat_t &out_mat,
- const convMat_t &kernel, const convMat_t &bias)
-{
-#if !__aarch64__
- int w = in_mat.w;
- int h = in_mat.h;
- int outw = out_mat.w;
- int outh = out_mat.h;
- int channels = in_mat.c;
-
-#pragma omp parallel for
- for (int c = 0; c < channels; c++)
- {
- const float *filter = kernel.data + c * 9;
-#ifdef NCNN
- float *inbuf = in_mat.data + c * alignSize(w * h, 16 / sizeof(float));
- float *outbuf = out_mat.data + c * alignSize(outw * outh, 16 / sizeof(float));
-#else // NCNN
- float *inbuf = in_mat.data + c * w * h;
- float *outbuf = out_mat.data + c * outw * outh;
-#endif // NCNN
- float bias0 = bias.data ? bias.data[c] : 0.0f;
-
- register float32x4_t weight012 asm("q4") = vld1q_f32(filter);
- register float32x4_t weight345 asm("q5") = vld1q_f32(filter + 3);
- register float32x4_t weight678 asm("q6") = vld1q_f32(filter + 6);
- register float32x4_t qbias0 asm("q7") = vdupq_n_f32(bias0);
-
- float *in_ptr0 = inbuf + 0 * w;
- float *in_ptr1 = inbuf + 1 * w;
- float *in_ptr2 = inbuf + 2 * w;
-
- float *out_ptr0 = outbuf + 0 * outw;
-
- int i;
- for (i = 0; i < outh; i++)
- {
- int nn = (outw >> 2) - 1;
- int remain = (outw & 0x03) + 4;
-
- // TODO: i == 0 && i == outh -1
- if (i == 0)
- {
- if (nn > 0)
- {
- __asm __volatile("pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "1:\n"
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q0, %e[weight345][0]\n"
- "vmul.f32 q11, q1, %e[weight345][1]\n"
-
- "pld [%[in_ptr1], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr1]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vmla.f32 q10, q0, %e[weight678][0]\n"
- "vmla.f32 q11, q1, %e[weight678][1]\n"
-
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vmla.f32 q14, q3, %f[weight678][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "bne 1b\n"
- "sub %[in_ptr0], %[in_ptr0], #32\n"
- : [in_ptr0] "+r"(in_ptr0), [in_ptr1] "+r"(in_ptr1),
- [out_ptr0] "+r"(out_ptr0), [nn] "+r"(nn)
- : [weight012] "w"(weight012), [weight345] "w"(weight345),
- [weight678] "w"(weight678), [qbias0] "w"(qbias0)
- : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14",
- "q15", "cc", "memory");
- }
- for (; remain > 0; remain--)
- {
- float32x4_t input0 = vld1q_f32(in_ptr0);
- float32x4_t input1 = vld1q_f32(in_ptr1);
-
- if (remain == 1)
- {
- input0 = vsetq_lane_f32(0.0f, input0, 2);
- input1 = vsetq_lane_f32(0.0f, input1, 2);
- }
-
- float32x4_t out0 = vmulq_f32(input0, weight345);
- out0 = vmlaq_f32(out0, input1, weight678);
-
- out0 = vsetq_lane_f32(bias0, out0, 3);
-
- float32x2_t out00 = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
-
- float32x2_t out01 = vpadd_f32(out00, out00);
-
- *out_ptr0 = vget_lane_f32(out01, 0);
-
- in_ptr0 += 2;
- in_ptr1 += 2;
- out_ptr0++;
- }
-
- in_ptr2 += w;
- }
- else if (i == outh - 1)
- {
- if (nn > 0)
- {
- __asm __volatile("pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "1:\n"
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q0, %e[weight012][0]\n"
- "vmul.f32 q11, q1, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr1]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight012][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vmla.f32 q10, q0, %e[weight345][0]\n"
- "vmla.f32 q11, q1, %e[weight345][1]\n"
-
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "bne 1b\n"
- "sub %[in_ptr0], %[in_ptr0], #32\n"
- : [in_ptr0] "+r"(in_ptr0), [in_ptr1] "+r"(in_ptr1),
- [out_ptr0] "+r"(out_ptr0), [nn] "+r"(nn)
- : [weight012] "w"(weight012), [weight345] "w"(weight345),
- [weight678] "w"(weight678), [qbias0] "w"(qbias0)
- : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14",
- "q15", "cc", "memory");
- }
- for (; remain > 0; remain--)
- {
- float32x4_t input0 = vld1q_f32(in_ptr0);
- float32x4_t input1 = vld1q_f32(in_ptr1);
-
- if (remain == 1)
- {
- input0 = vsetq_lane_f32(0.0f, input0, 2);
- input1 = vsetq_lane_f32(0.0f, input1, 2);
- }
-
- float32x4_t out0 = vmulq_f32(input0, weight012);
- out0 = vmlaq_f32(out0, input1, weight345);
-
- out0 = vsetq_lane_f32(bias0, out0, 3);
-
- float32x2_t out00 = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
-
- float32x2_t out01 = vpadd_f32(out00, out00);
-
- *out_ptr0 = vget_lane_f32(out01, 0);
-
- in_ptr0 += 2;
- in_ptr1 += 2;
- out_ptr0++;
- }
- }
- else
- {
- if (nn > 0)
- {
- __asm __volatile("pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "1:\n"
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q0, %e[weight012][0]\n"
- "vmul.f32 q11, q1, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr1]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight012][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vmla.f32 q10, q0, %e[weight345][0]\n"
- "vmla.f32 q11, q1, %e[weight345][1]\n"
-
- "pld [%[in_ptr2], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr2]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr2]]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vmla.f32 q10, q0, %e[weight678][0]\n"
- "vmla.f32 q11, q1, %e[weight678][1]\n"
-
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vmla.f32 q14, q3, %f[weight678][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "bne 1b\n"
- "sub %[in_ptr0], %[in_ptr0], #32\n"
- : [in_ptr0] "+r"(in_ptr0), [in_ptr1] "+r"(in_ptr1),
- [in_ptr2] "+r"(in_ptr2), [out_ptr0] "+r"(out_ptr0), [nn] "+r"(nn)
- : [weight012] "w"(weight012), [weight345] "w"(weight345),
- [weight678] "w"(weight678), [qbias0] "w"(qbias0)
- : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14",
- "q15", "cc", "memory");
- }
- for (; remain > 0; remain--)
- {
- float32x4_t input0 = vld1q_f32(in_ptr0);
- float32x4_t input1 = vld1q_f32(in_ptr1);
- float32x4_t input2 = vld1q_f32(in_ptr2);
-
- if (remain == 1)
- {
- input0 = vsetq_lane_f32(0.0f, input0, 2);
- input1 = vsetq_lane_f32(0.0f, input1, 2);
- input2 = vsetq_lane_f32(0.0f, input2, 2);
- }
-
- float32x4_t out0 = vmulq_f32(input0, weight012);
- out0 = vmlaq_f32(out0, input1, weight345);
- out0 = vmlaq_f32(out0, input2, weight678);
-
- out0 = vsetq_lane_f32(bias0, out0, 3);
-
- float32x2_t out00 = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
-
- float32x2_t out01 = vpadd_f32(out00, out00);
-
- *out_ptr0 = vget_lane_f32(out01, 0);
-
- in_ptr0 += 2;
- in_ptr1 += 2;
- in_ptr2 += 2;
- out_ptr0++;
- }
-
- in_ptr0 += w;
- in_ptr1 += w;
- in_ptr2 += w;
- }
- }
- }
-
-#else // __aarch64__
- (void)in_mat;
- (void)out_mat;
- (void)kernel;
- (void)bias;
-#endif // __aarch64__
-}
-
-static void depthwise_conv3x3S2_padding11(const convMat_t &in_mat, convMat_t &out_mat,
- const convMat_t &kernel, const convMat_t &bias)
-{
-#if !__aarch64__
- int w = in_mat.w;
- int h = in_mat.h;
- int outw = out_mat.w;
- int outh = out_mat.h;
- int channels = in_mat.c;
-
-#pragma omp parallel for
- for (int c = 0; c < channels; c++)
- {
- const float *filter = kernel.data + c * 9;
-#ifdef NCNN
- float *inbuf = in_mat.data + c * alignSize(w * h, 16 / sizeof(float));
- float *outbuf = out_mat.data + c * alignSize(outw * outh, 16 / sizeof(float));
-#else // NCNN
- float *inbuf = in_mat.data + c * w * h;
- float *outbuf = out_mat.data + c * outw * outh;
-#endif // NCNN
- float bias0 = bias.data ? bias.data[c] : 0.0f;
-
- register float32x4_t weight012 asm("q4") = vld1q_f32(filter);
- register float32x4_t weight345 asm("q5") = vld1q_f32(filter + 3);
- register float32x4_t weight678 asm("q6") = vld1q_f32(filter + 6);
- register float32x4_t qbias0 asm("q7") = vdupq_n_f32(bias0);
-
- float *in_ptr0 = inbuf + 0 * w;
- float *in_ptr1 = inbuf + 1 * w;
- float *in_ptr2 = inbuf + 2 * w;
-
- float *out_ptr0 = outbuf + 0 * outw;
-
- int i;
- for (i = 0; i < outh; i++)
- {
- int nn = (outw >> 2) - 1;
- int remain = (outw & 0x03) + 4;
-
- // TODO: i == 0 && i == outh - 1
- if (i == 0)
- {
- if (nn > 0)
- {
- __asm __volatile("vmov.i32 q2, #0\n"
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]\n"
- "vext.32 q3, q2, q0, #3\n"
- "add %[in_ptr0], %[in_ptr0], #28\n"
-
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q3, %e[weight345][0]\n"
- "vmul.f32 q11, q0, %e[weight345][1]\n"
-
- "pld [%[in_ptr1], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q1, %f[weight345][0]\n"
- "vext.32 q3, q2, q0, #3\n"
- "add %[in_ptr1], %[in_ptr1], #28\n"
-
- "vmla.f32 q10, q3, %e[weight678][0]\n"
- "vmla.f32 q11, q0, %e[weight678][1]\n"
- "vmla.f32 q14, q1, %f[weight678][0]\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "beq 2f\n"
-
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "1:\n"
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q0, %e[weight345][0]\n"
- "vmul.f32 q11, q1, %e[weight345][1]\n"
-
- "pld [%[in_ptr1], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr1]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vmla.f32 q10, q0, %e[weight678][0]\n"
- "vmla.f32 q11, q1, %e[weight678][1]\n"
-
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vmla.f32 q14, q3, %f[weight678][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "bne 1b\n"
- "sub %[in_ptr0], %[in_ptr0], #32\n"
- "2:\n"
- : [in_ptr0] "+r"(in_ptr0), [in_ptr1] "+r"(in_ptr1),
- [out_ptr0] "+r"(out_ptr0), [nn] "+r"(nn)
- : [weight012] "w"(weight012), [weight345] "w"(weight345),
- [weight678] "w"(weight678), [qbias0] "w"(qbias0)
- : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14",
- "q15", "cc", "memory");
- }
- for (; remain > 0; remain--)
- {
- // TODO: if nn == 0, pad_left comes here.
- float32x4_t input0 = vld1q_f32(in_ptr0);
- float32x4_t input1 = vld1q_f32(in_ptr1);
-
- if (remain == 1)
- {
- input0 = vsetq_lane_f32(0.0f, input0, 2);
- input1 = vsetq_lane_f32(0.0f, input1, 2);
- }
-
- float32x4_t out0 = vmulq_f32(input0, weight345);
- out0 = vmlaq_f32(out0, input1, weight678);
-
- out0 = vsetq_lane_f32(bias0, out0, 3);
-
- float32x2_t out00 = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
-
- float32x2_t out01 = vpadd_f32(out00, out00);
-
- *out_ptr0 = vget_lane_f32(out01, 0);
-
- in_ptr0 += 2;
- in_ptr1 += 2;
- out_ptr0++;
- }
-
- in_ptr2 += w;
- }
- else if (i == outh - 1)
- {
- if (nn > 0)
- {
- __asm __volatile("vmov.i32 q2, #0\n"
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]\n"
- "vext.32 q3, q2, q0, #3\n"
- "add %[in_ptr0], %[in_ptr0], #28\n"
-
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q3, %e[weight012][0]\n"
- "vmul.f32 q11, q0, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q1, %f[weight012][0]\n"
- "vext.32 q3, q2, q0, #3\n"
- "add %[in_ptr1], %[in_ptr1], #28\n"
-
- "vmla.f32 q10, q3, %e[weight345][0]\n"
- "vmla.f32 q11, q0, %e[weight345][1]\n"
- "vmla.f32 q14, q1, %f[weight345][0]\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "beq 2f\n"
-
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "1:\n"
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q0, %e[weight012][0]\n"
- "vmul.f32 q11, q1, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr1]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight012][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vmla.f32 q10, q0, %e[weight345][0]\n"
- "vmla.f32 q11, q1, %e[weight345][1]\n"
-
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "bne 1b\n"
- "sub %[in_ptr0], %[in_ptr0], #32\n"
-
- "2:\n"
- : [in_ptr0] "+r"(in_ptr0), [in_ptr1] "+r"(in_ptr1),
- [out_ptr0] "+r"(out_ptr0), [nn] "+r"(nn)
- : [weight012] "w"(weight012), [weight345] "w"(weight345),
- [weight678] "w"(weight678), [qbias0] "w"(qbias0)
- : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14",
- "q15", "cc", "memory");
- }
- for (; remain > 0; remain--)
- {
- // TODO: if nn == 0, pad_left comes here.
- float32x4_t input0 = vld1q_f32(in_ptr0);
- float32x4_t input1 = vld1q_f32(in_ptr1);
-
- if (remain == 1)
- {
- input0 = vsetq_lane_f32(0.0f, input0, 2);
- input1 = vsetq_lane_f32(0.0f, input1, 2);
- }
-
- float32x4_t out0 = vmulq_f32(input0, weight012);
- out0 = vmlaq_f32(out0, input1, weight345);
-
- out0 = vsetq_lane_f32(bias0, out0, 3);
-
- float32x2_t out00 = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
-
- float32x2_t out01 = vpadd_f32(out00, out00);
-
- *out_ptr0 = vget_lane_f32(out01, 0);
-
- in_ptr0 += 2;
- in_ptr1 += 2;
- out_ptr0++;
- }
- }
- else
- {
- if (nn > 0)
- {
- __asm __volatile("vmov.i32 q2, #0\n"
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]\n"
- "vext.32 q3, q2, q0, #3\n"
- "add %[in_ptr0], %[in_ptr0], #28\n"
-
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q3, %e[weight012][0]\n"
- "vmul.f32 q11, q0, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q1, %f[weight012][0]\n"
- "vext.32 q3, q2, q0, #3\n"
- "add %[in_ptr1], %[in_ptr1], #28\n"
-
- "vmla.f32 q10, q3, %e[weight345][0]\n"
- "vmla.f32 q11, q0, %e[weight345][1]\n"
-
- "pld [%[in_ptr2], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr2]]\n"
- "vmla.f32 q14, q1, %f[weight345][0]\n"
- "vext.32 q3, q2, q0, #3\n"
- "add %[in_ptr2], %[in_ptr2], #28\n"
-
- "vmla.f32 q10, q3, %e[weight678][0]\n"
- "vmla.f32 q11, q0, %e[weight678][1]\n"
- "vmla.f32 q14, q1, %f[weight678][0]\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "beq 2f\n"
-
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "1:\n"
- "vand q14, %q[qbias0], %q[qbias0]\n"
- "vmul.f32 q10, q0, %e[weight012][0]\n"
- "vmul.f32 q11, q1, %e[weight012][1]\n"
-
- "pld [%[in_ptr1], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr1]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr1]]\n"
- "vmla.f32 q14, q3, %f[weight012][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vmla.f32 q10, q0, %e[weight345][0]\n"
- "vmla.f32 q11, q1, %e[weight345][1]\n"
-
- "pld [%[in_ptr2], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr2]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr2]]\n"
- "vmla.f32 q14, q3, %f[weight345][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vmla.f32 q10, q0, %e[weight678][0]\n"
- "vmla.f32 q11, q1, %e[weight678][1]\n"
-
- "pld [%[in_ptr0], #256]\n"
- "vld2.f32 {d0-d3}, [%[in_ptr0]]!\n"
- "vld1.f32 {d4[0]}, [%[in_ptr0]]\n"
- "vmla.f32 q14, q3, %f[weight678][0]\n"
- "vext.32 q3, q0, q2, #1\n"
-
- "vadd.f32 q14, q14, q10\n"
- "vadd.f32 q14, q14, q11\n"
-
- "subs %[nn], %[nn], #1\n"
- "vst1.f32 {d28-d29}, [%[out_ptr0]]!\n"
- "bne 1b\n"
- "sub %[in_ptr0], %[in_ptr0], #32\n"
- "2:\n"
- : [in_ptr0] "+r"(in_ptr0), [in_ptr1] "+r"(in_ptr1),
- [in_ptr2] "+r"(in_ptr2), [out_ptr0] "+r"(out_ptr0), [nn] "+r"(nn)
- : [weight012] "w"(weight012), [weight345] "w"(weight345),
- [weight678] "w"(weight678), [qbias0] "w"(qbias0)
- : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14",
- "q15", "cc", "memory");
- }
- for (; remain > 0; remain--)
- {
- // TODO: if nn == 0, pad_left comes here.
- float32x4_t input0 = vld1q_f32(in_ptr0);
- float32x4_t input1 = vld1q_f32(in_ptr1);
- float32x4_t input2 = vld1q_f32(in_ptr2);
-
- if (remain == 1)
- {
- input0 = vsetq_lane_f32(0.0f, input0, 2);
- input1 = vsetq_lane_f32(0.0f, input1, 2);
- input2 = vsetq_lane_f32(0.0f, input2, 2);
- }
-
- float32x4_t out0 = vmulq_f32(input0, weight012);
- out0 = vmlaq_f32(out0, input1, weight345);
- out0 = vmlaq_f32(out0, input2, weight678);
-
- out0 = vsetq_lane_f32(bias0, out0, 3);
-
- float32x2_t out00 = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
-
- float32x2_t out01 = vpadd_f32(out00, out00);
-
- *out_ptr0 = vget_lane_f32(out01, 0);
-
- in_ptr0 += 2;
- in_ptr1 += 2;
- in_ptr2 += 2;
- out_ptr0++;
- }
-
- in_ptr0 += w;
- in_ptr1 += w;
- in_ptr2 += w;
- }
- }
- }
-#else // __aarch64__
- (void)in_mat;
- (void)out_mat;
- (void)kernel;
- (void)bias;
-#endif // __aarch64__
-}
-
-static void depthwise_conv_colmajor(const convMat_t &in_mat, convMat_t &out_mat,
- const convMat_t &kernel, const convParams_t &in_param)
-{
-#if __aarch64__
- const int w = in_mat.w;
- const int h = in_mat.h;
- const int outw = out_mat.w;
- const int outh = out_mat.h;
- const int channels = out_mat.c;
- const int stridew = in_param.stride_w;
- const int strideh = in_param.stride_h;
- const int padding = in_param.padding;
- const int padw = in_param.pad_w;
- const int padh = in_param.pad_h;
-
-#pragma omp parallel for
- for (int oh = 0; oh < outh; oh++)
- {
- const float *input_data0 = in_mat.data + (oh * strideh - padh) * w * channels;
-
- memset(out_mat.data + oh * outw * channels, 0x00, outw * channels * sizeof(float));
-
- for (int kh = 0; kh < in_param.kernel_h; kh++)
- {
- for (int kw = 0; kw < in_param.kernel_w; kw++)
- {
- const float *kernel_data = kernel.data + (kh * in_param.kernel_w + kw) * channels;
- const float *input_data1 = input_data0 + (kh * w + kw) * channels;
-
- if (padding && ((oh * strideh + kh < padh) || (oh * strideh + kh >= padh + h)))
- {
- continue;
- }
-
- int ow = 0;
- for (; ow + 3 < outw; /*ow += 4*/)
- {
- if (((ow + 3) * stridew + kw < padw) || (ow * stridew + kw >= padw + w))
- {
- ow += 4;
- continue;
- }
- else if ((ow + 3) * stridew + kw >= padw + w)
- {
- break;
- }
- else if (ow * stridew + kw < padw)
- {
- int delta = (padw - kw) / stridew - ow;
- delta += (padw - kw) % stridew ? 1 : 0;
- ow += delta;
- continue;
- }
-
- int nn = channels >> 2;
- int remain = channels & 0x03;
-
- const float *input_r0 = input_data1 + (ow * stridew - padw) * channels;
-
- const float *input_r1 = input_r0 + stridew * channels;
- const float *input_r2 = input_r1 + stridew * channels;
- const float *input_r3 = input_r2 + stridew * channels;
- const float *weights_data = kernel_data;
- float *output_r0 = out_mat.data + (oh * outw + ow) * channels;
- float *output_r1 = output_r0 + channels;
- float *output_r2 = output_r1 + channels;
- float *output_r3 = output_r2 + channels;
-
- if (nn > 0)
- {
- int _n = (nn + 1) >> 1;
- int oddn = nn & 1;
-
- asm volatile("subs %[_n], %[_n], #1\n"
- "ld1 {v4.4s}, [%[weights_data]], #16\n"
- "ld1 {v5.4s}, [%[input_r0]], #16\n"
- "ld1 {v6.4s}, [%[input_r1]], #16\n"
- "ld1 {v7.4s}, [%[input_r2]], #16\n"
- "ld1 {v8.4s}, [%[input_r3]], #16\n"
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v24.4s, v25.4s}, [%[output_r0]]\n"
- "ld1 {v26.4s, v27.4s}, [%[output_r1]]\n"
- "ld1 {v28.4s, v29.4s}, [%[output_r2]]\n"
- "ld1 {v30.4s, v31.4s}, [%[output_r3]]\n"
-
- "ld1 {v9.4s}, [%[weights_data]], #16\n"
- "ld1 {v10.4s}, [%[input_r0]], #16\n"
- "ld1 {v11.4s}, [%[input_r1]], #16\n"
- "ld1 {v12.4s}, [%[input_r2]], #16\n"
- "ld1 {v13.4s}, [%[input_r3]], #16\n"
-
- "fmla v24.4s, v4.4s, v5.4s\n"
- "fmla v26.4s, v4.4s, v6.4s\n"
-
- "fmla v28.4s, v4.4s, v7.4s\n"
- "fmla v30.4s, v4.4s, v8.4s\n"
-
- "ld1 {v4.4s}, [%[weights_data]], #16\n"
- "ld1 {v5.4s}, [%[input_r0]], #16\n"
- "ld1 {v6.4s}, [%[input_r1]], #16\n"
- "ld1 {v7.4s}, [%[input_r2]], #16\n"
- "ld1 {v8.4s}, [%[input_r3]], #16\n"
-
- "fmla v25.4s, v9.4s, v10.4s\n"
- "fmla v27.4s, v9.4s, v11.4s\n"
-
- "fmla v29.4s, v9.4s, v12.4s\n"
- "fmla v31.4s, v9.4s, v13.4s\n"
-
- "st1 {v24.4s, v25.4s}, [%[output_r0]], #32\n"
- "st1 {v26.4s, v27.4s}, [%[output_r1]], #32\n"
- "st1 {v28.4s, v29.4s}, [%[output_r2]], #32\n"
- "st1 {v30.4s, v31.4s}, [%[output_r3]], #32\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v24.4s}, [%[output_r0]]\n"
- "ld1 {v26.4s}, [%[output_r1]]\n"
- "ld1 {v28.4s}, [%[output_r2]]\n"
- "ld1 {v30.4s}, [%[output_r3]]\n"
- "cmp %[oddn], #1\n"
-
- "fmla v24.4s, v4.4s, v5.4s\n"
- "fmla v26.4s, v4.4s, v6.4s\n"
-
- "fmla v28.4s, v4.4s, v7.4s\n"
- "fmla v30.4s, v4.4s, v8.4s\n"
-
- "st1 {v24.4s}, [%[output_r0]], #16\n"
- "st1 {v26.4s}, [%[output_r1]], #16\n"
- "st1 {v28.4s}, [%[output_r2]], #16\n"
- "st1 {v30.4s}, [%[output_r3]], #16\n"
-
- "beq 2f\n"
- "ld1 {v25.4s}, [%[output_r0]]\n"
- "ld1 {v27.4s}, [%[output_r1]]\n"
- "ld1 {v29.4s}, [%[output_r2]]\n"
- "ld1 {v31.4s}, [%[output_r3]]\n"
-
- "ld1 {v9.4s}, [%[weights_data]], #16\n"
- "ld1 {v10.4s}, [%[input_r0]], #16\n"
- "ld1 {v11.4s}, [%[input_r1]], #16\n"
- "ld1 {v12.4s}, [%[input_r2]], #16\n"
- "ld1 {v13.4s}, [%[input_r3]], #16\n"
-
- "fmla v25.4s, v9.4s, v10.4s\n"
- "fmla v27.4s, v9.4s, v11.4s\n"
-
- "fmla v29.4s, v9.4s, v12.4s\n"
- "fmla v31.4s, v9.4s, v13.4s\n"
-
- "st1 {v25.4s}, [%[output_r0]], #16\n"
- "st1 {v27.4s}, [%[output_r1]], #16\n"
- "st1 {v29.4s}, [%[output_r2]], #16\n"
- "st1 {v31.4s}, [%[output_r3]], #16\n"
- "2:\n"
- : [weights_data] "+r"(weights_data), [input_r0] "+r"(input_r0),
- [input_r1] "+r"(input_r1), [input_r2] "+r"(input_r2),
- [input_r3] "+r"(input_r3), [output_r0] "+r"(output_r0),
- [output_r1] "+r"(output_r1), [output_r2] "+r"(output_r2),
- [output_r3] "+r"(output_r3), [_n] "+r"(_n)
- : [oddn] "r"(oddn)
- : "cc", "memory", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12",
- "v13", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31");
- }
- if (remain >= 2)
- {
- asm volatile(
- "ld1 {v24.2s}, [%[output_r0]]\n"
- "ld1 {v26.2s}, [%[output_r1]]\n"
- "ld1 {v28.2s}, [%[output_r2]]\n"
- "ld1 {v30.2s}, [%[output_r3]]\n"
- "ld1 {v4.2s}, [%[weights_data]], #8\n"
- "ld1 {v5.2s}, [%[input_r0]], #8\n"
-
- "ld1 {v6.2s}, [%[input_r1]], #8\n"
- "ld1 {v7.2s}, [%[input_r2]], #8\n"
- "ld1 {v8.2s}, [%[input_r3]], #8\n"
-
- "fmla v24.2s, v4.2s, v5.2s\n"
- "fmla v26.2s, v4.2s, v6.2s\n"
-
- "fmla v28.2s, v4.2s, v7.2s\n"
- "fmla v30.2s, v4.2s, v8.2s\n"
-
- "st1 {v24.2s}, [%[output_r0]], #8\n"
- "st1 {v26.2s}, [%[output_r1]], #8\n"
- "st1 {v28.2s}, [%[output_r2]], #8\n"
- "st1 {v30.2s}, [%[output_r3]], #8\n"
- : [weights_data] "+r"(weights_data), [input_r0] "+r"(input_r0),
- [input_r1] "+r"(input_r1), [input_r2] "+r"(input_r2), [input_r3] "+r"(input_r3),
- [output_r0] "+r"(output_r0), [output_r1] "+r"(output_r1),
- [output_r2] "+r"(output_r2), [output_r3] "+r"(output_r3)
- :
- : "cc", "memory", "v4", "v5", "v6", "v7", "v8", "v24", "v26", "v28", "v30");
- remain -= 2;
- }
-
- if (remain > 0)
- {
- *output_r0++ += (*weights_data) * (*input_r0++);
- *output_r1++ += (*weights_data++) * (*input_r1++);
- *output_r2++ += (*weights_data) * (*input_r2++);
- *output_r3++ += (*weights_data++) * (*input_r3++);
- }
- ow += 4;
- }
-
- for (; ow + 1 < outw; /*ow += 2*/)
- {
- if (padding)
- {
- if (((ow + 1) * stridew + kw < padw) || (ow * stridew + kw >= padw + w))
- {
- ow += 2;
- continue;
- }
- else if ((ow + 1) * stridew + kw >= padw + w)
- {
- break;
- }
- else if (ow * stridew + kw < padw)
- {
- ow++;
- continue;
- }
- }
-
- int nn = channels >> 2;
- int remain = channels & 0x03;
-
- const float *input_r0 = input_data1 + (ow * stridew - padw) * channels;
-
- const float *input_r1 = input_r0 + stridew * channels;
- const float *weights_data = kernel_data;
- float *output_r0 = out_mat.data + (oh * outw + ow) * channels;
- float *output_r1 = output_r0 + channels;
-
- if (nn > 0)
- {
- int _n = (nn + 1) >> 1;
- int oddn = nn & 1;
-
- asm volatile("subs %[_n], %[_n], #1\n"
- "ld1 {v4.4s}, [%[weights_data]], #16\n"
- "ld1 {v5.4s}, [%[input_r0]], #16\n"
- "ld1 {v6.4s}, [%[input_r1]], #16\n"
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v24.4s, v25.4s}, [%[output_r0]]\n"
- "ld1 {v26.4s, v27.4s}, [%[output_r1]]\n"
-
- "ld1 {v9.4s}, [%[weights_data]], #16\n"
- "ld1 {v10.4s}, [%[input_r0]], #16\n"
- "ld1 {v11.4s}, [%[input_r1]], #16\n"
-
- "fmla v24.4s, v4.4s, v5.4s\n"
- "fmla v26.4s, v4.4s, v6.4s\n"
-
- "ld1 {v4.4s}, [%[weights_data]], #16\n"
- "ld1 {v5.4s}, [%[input_r0]], #16\n"
- "ld1 {v6.4s}, [%[input_r1]], #16\n"
-
- "fmla v25.4s, v9.4s, v10.4s\n"
- "fmla v27.4s, v9.4s, v11.4s\n"
-
- "st1 {v24.4s, v25.4s}, [%[output_r0]], #32\n"
- "st1 {v26.4s, v27.4s}, [%[output_r1]], #32\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v24.4s}, [%[output_r0]]\n"
- "ld1 {v26.4s}, [%[output_r1]]\n"
- "cmp %[oddn], #1\n"
-
- "fmla v24.4s, v4.4s, v5.4s\n"
- "fmla v26.4s, v4.4s, v6.4s\n"
-
- "st1 {v24.4s}, [%[output_r0]], #16\n"
- "st1 {v26.4s}, [%[output_r1]], #16\n"
-
- "beq 2f\n"
- "ld1 {v25.4s}, [%[output_r0]]\n"
- "ld1 {v27.4s}, [%[output_r1]]\n"
-
- "ld1 {v9.4s}, [%[weights_data]], #16\n"
- "ld1 {v10.4s}, [%[input_r0]], #16\n"
- "ld1 {v11.4s}, [%[input_r1]], #16\n"
-
- "fmla v25.4s, v9.4s, v10.4s\n"
- "fmla v27.4s, v9.4s, v11.4s\n"
-
- "st1 {v25.4s}, [%[output_r0]], #16\n"
- "st1 {v27.4s}, [%[output_r1]], #16\n"
- "2:\n"
- : [weights_data] "+r"(weights_data), [input_r0] "+r"(input_r0),
- [input_r1] "+r"(input_r1), [output_r0] "+r"(output_r0),
- [output_r1] "+r"(output_r1), [_n] "+r"(_n)
- : [oddn] "r"(oddn)
- : "cc", "memory", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12",
- "v13", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31");
- }
- if (remain >= 2)
- {
- asm volatile("ld1 {v24.2s}, [%[output_r0]]\n"
- "ld1 {v26.2s}, [%[output_r1]]\n"
- "ld1 {v4.2s}, [%[weights_data]], #8\n"
- "ld1 {v5.2s}, [%[input_r0]], #8\n"
-
- "ld1 {v6.2s}, [%[input_r1]], #8\n"
-
- "fmla v24.2s, v4.2s, v5.2s\n"
- "fmla v26.2s, v4.2s, v6.2s\n"
-
- "st1 {v24.2s}, [%[output_r0]], #8\n"
- "st1 {v26.2s}, [%[output_r1]], #8\n"
- : [weights_data] "+r"(weights_data), [input_r0] "+r"(input_r0),
- [input_r1] "+r"(input_r1), [output_r0] "+r"(output_r0),
- [output_r1] "+r"(output_r1)
- :
- : "cc", "memory", "v4", "v5", "v6", "v7", "v8", "v24", "v26", "v28",
- "v30");
- remain -= 2;
- }
-
- if (remain > 0)
- {
- *output_r0++ += (*weights_data) * (*input_r0++);
- *output_r1++ += (*weights_data++) * (*input_r1++);
- }
- ow += 2;
- }
-
- for (; ow < outw; ow++)
- {
- const float *input_data = input_data1 + (ow * stridew - padw) * channels;
-
- if (padding && ((ow * stridew + kw < padw) || (ow * strideh + kw >= padw + w)))
- {
- continue;
- }
-
- int nn = channels >> 2;
- int remain = channels & 0x03;
-
- const float *weights_data = kernel_data;
- float *output_data = out_mat.data + (oh * outw + ow) * channels;
-
- if (nn > 0)
- {
- int _n = (nn + 1) >> 1;
- int oddn = nn & 1;
-
- asm volatile("subs %[_n], %[_n], #1\n"
- "ld1 {v4.4s}, [%[weights_data]], #16\n"
- "ld1 {v5.4s}, [%[input_data]], #16\n"
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v30.4s, v31.4s}, [%[output_data]]\n"
- "ld1 {v6.4s}, [%[weights_data]], #16\n"
- "ld1 {v7.4s}, [%[input_data]], #16\n"
- "fmla v30.4s, v4.4s, v5.4s\n"
-
- "ld1 {v4.4s}, [%[weights_data]], #16\n"
- "ld1 {v5.4s}, [%[input_data]], #16\n"
- "fmla v31.4s, v6.4s, v7.4s\n"
-
- "st1 {v30.4s, v31.4s}, [%[output_data]], #32\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v30.4s}, [%[output_data]]\n"
- "cmp %[oddn], #1\n"
- "fmla v30.4s, v4.4s, v5.4s\n"
- "st1 {v30.4s}, [%[output_data]], #16\n"
- "beq 2f\n"
- "ld1 {v31.4s}, [%[output_data]]\n"
- "ld1 {v6.4s}, [%[weights_data]], #16\n"
- "ld1 {v7.4s}, [%[input_data]], #16\n"
- "fmla v31.4s, v6.4s, v7.4s\n"
-
- "st1 {v31.4s}, [%[output_data]], #16\n"
- "2:\n"
- : [weights_data] "+r"(weights_data), [input_data] "+r"(input_data),
- [output_data] "+r"(output_data), [_n] "+r"(_n)
- : [oddn] "r"(oddn)
- : "cc", "memory", "v4", "v5", "v30", "v31");
- }
- if (remain >= 2)
- {
- asm volatile("ld1 {v30.2s}, [%[output_data]]\n"
- "ld1 {v4.2s}, [%[weights_data]], #8\n"
- "ld1 {v5.2s}, [%[input_data]], #8\n"
-
- "fmla v30.2s, v4.2s, v5.2s\n"
-
- "st1 {v30.2s}, [%[output_data]], #8\n"
- : [weights_data] "+r"(weights_data), [input_data] "+r"(input_data),
- [output_data] "+r"(output_data)
- :
- : "cc", "memory", "v4", "v5", "v30");
- remain -= 2;
- }
-
- if (remain > 0)
- {
- *output_data++ += (*weights_data++) * (*input_data++);
- }
- }
- }
- }
- }
-#else // __aarch64__
- (void)in_mat;
- (void)out_mat;
- (void)kernel;
- (void)in_param;
-#endif // __aarch64__
-}
-
-void srcn_depthwise_conv(const convMat_t &in_mat, const convMat_t &weights_mat, convMat_t &out_mat,
- const convMat_t &bias, const convParams_t &in_param, int num_threads,
- convType_t conv_type)
-{
- omp_set_num_threads(num_threads);
-
- if (conv_type == col_major)
- {
- depthwise_conv_colmajor(in_mat, out_mat, weights_mat, in_param);
- return;
- }
-
- else if (conv_type == row_major)
- {
- if (in_param.kernel_w == 3 && in_param.kernel_h == 3 && in_param.dilation_w == 1 &&
- in_param.dilation_h == 1)
- {
- if (in_param.stride_w == 1 && in_param.stride_h == 1)
- {
- if (in_param.padding == 0)
- depthwise_conv3x3S1_nopad(in_mat, out_mat, weights_mat, bias);
- else
- depthwise_conv3x3S1_padding(in_mat, out_mat, weights_mat, bias);
- }
- else if (in_param.stride_w == 2 && in_param.stride_h == 2)
- {
- if (in_param.padding == 0)
- depthwise_conv3x3S2_nopad(in_mat, out_mat, weights_mat, bias);
- else
- {
- if (in_param.pad_w == 0 && in_param.pad_h == 0)
- depthwise_conv3x3S2_padding00(in_mat, out_mat, weights_mat, bias);
- else if (in_param.pad_w == 0 && in_param.pad_h == 1)
- depthwise_conv3x3S2_padding10(in_mat, out_mat, weights_mat, bias);
- else if (in_param.pad_w == 1 && in_param.pad_h == 0)
- depthwise_conv3x3S2_padding01(in_mat, out_mat, weights_mat, bias);
- else if (in_param.pad_w == 1 && in_param.pad_h == 1)
- depthwise_conv3x3S2_padding11(in_mat, out_mat, weights_mat, bias);
- }
- }
- }
- }
-}
-
-} // namespace srcn
-} // namespace nnfw
diff --git a/compute/ncnn/src/srcn/direct_conv_colmajor.cc b/compute/ncnn/src/srcn/direct_conv_colmajor.cc
deleted file mode 100644
index 300235222..000000000
--- a/compute/ncnn/src/srcn/direct_conv_colmajor.cc
+++ /dev/null
@@ -1,5872 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#include <stdlib.h>
-#include <arm_neon.h>
-#include "ncnn/srcn/conv_type.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-#if __aarch64__
-static void direct_conv_l(const convMat_t &bottom_blob, convMat_t &top_blob,
- const convMat_t &_kernel, const int _stride, const int padding,
- const int pad_top, const int pad_left)
-{
- const int w = bottom_blob.w;
- const int h = bottom_blob.h;
- const int inch = bottom_blob.c;
- const int outw = top_blob.w;
- const int outh = top_blob.h;
- const int outch = top_blob.c;
- const int kernel_w = _kernel.w;
- const int kernel_h = _kernel.h;
-
- for (int m = 0; m < kernel_w * kernel_h; m++)
- {
- const float *_kernel0 = _kernel.data + m * inch * outch;
- const float *img0 =
- bottom_blob.data + (m / kernel_w - pad_top) * w * inch + (m % kernel_w - pad_left) * inch;
-
-#ifdef _OPENMP
-#pragma omp parallel for
-#endif // _OPENMP
- for (int p = 0; p < outh; p++)
- {
- float *out0 = top_blob.data + p * outw * outch;
-
- // clear output
- if (m == 0)
- {
- for (int j = 0; j < outw * outch; j++)
- {
- *(out0 + j) = 0.f;
- }
- }
-
- if (padding)
- {
- if (((p * _stride + m / kernel_w) < pad_top) || (p * _stride + m / kernel_w >= pad_top + h))
- {
- continue;
- }
- }
-
- const float *img1 = img0 + p * w * inch * _stride;
-
- int q = 0;
- for (; q + 3 < outw; /*q += 4*/)
- {
- if (padding)
- {
- if (((q + 3) * _stride + m % kernel_w < pad_left) ||
- (q * _stride + m % kernel_w) >= pad_left + w)
- {
- out0 += outch * 4;
- img1 += inch * _stride * 4;
- q += 4;
- continue;
- }
- else if ((q + 3) * _stride + m % kernel_w >= pad_left + w)
- {
- break;
- }
- else if (q * _stride + m % kernel_w < pad_left)
- {
- int delta = (pad_left - m % kernel_w) / _stride - q;
- delta += (pad_left - m % kernel_w) % _stride ? 1 : 0;
- out0 += outch * delta;
- img1 += inch * _stride * delta;
- q += delta;
- continue;
- }
- }
-
- const float *_x0 = img1;
- const float *_x1 = img1 + inch * _stride;
- const float *_x2 = img1 + inch * _stride * 2;
- const float *_x3 = img1 + inch * _stride * 3;
- const float *kernel0 = _kernel0;
-
- int i = 0;
- for (; i + 3 < inch; i += 4)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x4_t rx0 asm("v4") = vld1q_f32(_x0);
- register float32x4_t rx1 asm("v5") = vld1q_f32(_x1);
- register float32x4_t rx2 asm("v16") = vld1q_f32(_x2);
- register float32x4_t rx3 asm("v17") = vld1q_f32(_x3);
-
- float *outptr0 = out0;
- float *outptr1 = out0 + outch;
- float *outptr2 = out0 + outch * 2;
- float *outptr3 = out0 + outch * 3;
-
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.4s}, [x0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v12.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v13.4s}, [x0]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v6.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v6.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v7.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v7.4s, %[rx3].s[1]\n"
- "fmla v14.4s, v8.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v8.4s, %[rx1].s[2]\n"
- "fmla v30.4s, v8.4s, %[rx2].s[2]\n"
- "fmla v31.4s, v8.4s, %[rx3].s[2]\n"
- "fmla v14.4s, v9.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v9.4s, %[rx1].s[3]\n"
- "fmla v30.4s, v9.4s, %[rx2].s[3]\n"
- "fmla v31.4s, v9.4s, %[rx3].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v10.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v10.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v11.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v11.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v11.4s, %[rx3].s[1]\n"
- "fmla v14.4s, v12.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v12.4s, %[rx1].s[2]\n"
- "fmla v30.4s, v12.4s, %[rx2].s[2]\n"
- "fmla v31.4s, v12.4s, %[rx3].s[2]\n"
- "fmla v14.4s, v13.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v13.4s, %[rx1].s[3]\n"
- "fmla v30.4s, v13.4s, %[rx2].s[3]\n"
- "fmla v31.4s, v13.4s, %[rx3].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v6.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v6.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v7.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v7.4s, %[rx3].s[1]\n"
- "fmla v14.4s, v8.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v8.4s, %[rx1].s[2]\n"
- "fmla v30.4s, v8.4s, %[rx2].s[2]\n"
- "fmla v31.4s, v8.4s, %[rx3].s[2]\n"
- "fmla v14.4s, v9.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v9.4s, %[rx1].s[3]\n"
- "fmla v30.4s, v9.4s, %[rx2].s[3]\n"
- "fmla v31.4s, v9.4s, %[rx3].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v12.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v13.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v10.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v10.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v11.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v11.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v11.4s, %[rx3].s[1]\n"
- "fmla v14.4s, v12.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v12.4s, %[rx1].s[2]\n"
- "fmla v30.4s, v12.4s, %[rx2].s[2]\n"
- "fmla v31.4s, v12.4s, %[rx3].s[2]\n"
- "fmla v14.4s, v13.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v13.4s, %[rx1].s[3]\n"
- "fmla v30.4s, v13.4s, %[rx2].s[3]\n"
- "fmla v31.4s, v13.4s, %[rx3].s[3]\n"
-
- "cmp %[oddn], #1\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.4s}, [x0]\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v6.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v6.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v7.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v7.4s, %[rx3].s[1]\n"
- "fmla v14.4s, v8.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v8.4s, %[rx1].s[2]\n"
- "fmla v30.4s, v8.4s, %[rx2].s[2]\n"
- "fmla v31.4s, v8.4s, %[rx3].s[2]\n"
- "fmla v14.4s, v9.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v9.4s, %[rx1].s[3]\n"
- "fmla v30.4s, v9.4s, %[rx2].s[3]\n"
- "fmla v31.4s, v9.4s, %[rx3].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [_n] "+r"(_n), [outptr2] "+r"(outptr2),
- [outptr3] "+r"(outptr3)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1), [oddn] "r"(oddn),
- [rx2] "w"(rx2), [rx3] "w"(rx3)
- : "cc", "memory", "x0", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13",
- "v14", "v15", "v30", "v31");
- }
-
- if (remain >= 2)
- {
- asm volatile("ld1 {v14.2s}, [%[outptr0]]\n"
- "ld1 {v15.2s}, [%[outptr1]]\n"
- "ld1 {v30.2s}, [%[outptr2]]\n"
- "ld1 {v31.2s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "ld1 {v6.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.2s}, [x0]\n"
-
- "fmla v14.2s, v6.2s, %[rx0].s[0]\n"
- "fmla v15.2s, v6.2s, %[rx1].s[0]\n"
- "fmla v30.2s, v6.2s, %[rx2].s[0]\n"
- "fmla v31.2s, v6.2s, %[rx3].s[0]\n"
- "fmla v14.2s, v7.2s, %[rx0].s[1]\n"
- "fmla v15.2s, v7.2s, %[rx1].s[1]\n"
- "fmla v30.2s, v7.2s, %[rx2].s[1]\n"
- "fmla v31.2s, v7.2s, %[rx3].s[1]\n"
- "fmla v14.2s, v8.2s, %[rx0].s[2]\n"
- "fmla v15.2s, v8.2s, %[rx1].s[2]\n"
- "fmla v30.2s, v8.2s, %[rx2].s[2]\n"
- "fmla v31.2s, v8.2s, %[rx3].s[2]\n"
- "fmla v14.2s, v9.2s, %[rx0].s[3]\n"
- "fmla v15.2s, v9.2s, %[rx1].s[3]\n"
- "fmla v30.2s, v9.2s, %[rx2].s[3]\n"
- "fmla v31.2s, v9.2s, %[rx3].s[3]\n"
-
- "st1 {v14.2s}, [%[outptr0]], #8\n"
- "st1 {v15.2s}, [%[outptr1]], #8\n"
- "st1 {v30.2s}, [%[outptr2]], #8\n"
- "st1 {v31.2s}, [%[outptr3]], #8\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [outptr2] "+r"(outptr2), [outptr3] "+r"(outptr3)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1),
-
- [rx2] "w"(rx2), [rx3] "w"(rx3)
- : "cc", "memory", "x0", "v6", "v7", "v8", "v9", "v14", "v15", "v30",
- "v31");
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x0 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x0 + 3));
-
- *outptr1 += (*kernel0) * (*_x1) + (*(kernel0 + outch)) * (*(_x1 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x1 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x1 + 3));
-
- *outptr2 += (*kernel0) * (*_x2) + (*(kernel0 + outch)) * (*(_x2 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x2 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x2 + 3));
-
- *outptr3 += (*kernel0) * (*_x3) + (*(kernel0 + outch)) * (*(_x3 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x3 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x3 + 3));
-
- kernel0++;
- outptr0++;
- outptr1++;
- outptr2++;
- outptr3++;
- }
-
- kernel0 += outch * 3;
- _x0 += 4;
- _x1 += 4;
- _x2 += 4;
- _x3 += 4;
- }
-
- for (; i + 1 < inch; i += 2)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("v4") = vld1_f32(_x0);
- register float32x2_t rx1 asm("v5") = vld1_f32(_x1);
- register float32x2_t rx2 asm("v16") = vld1_f32(_x2);
- register float32x2_t rx3 asm("v17") = vld1_f32(_x3);
-
- float *outptr0 = out0;
- float *outptr1 = out0 + outch;
- float *outptr2 = out0 + outch * 2;
- float *outptr3 = out0 + outch * 3;
-
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile(
- "cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v6.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v6.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v7.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v7.4s, %[rx3].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v10.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v10.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v11.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v11.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v11.4s, %[rx3].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v6.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v6.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v7.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v7.4s, %[rx3].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v10.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v10.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v11.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v11.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v11.4s, %[rx3].s[1]\n"
-
- "cmp %[oddn], #1\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v6.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v6.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v7.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v7.4s, %[rx3].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [outptr1] "+r"(outptr1),
- [_n] "+r"(_n), [outptr2] "+r"(outptr2), [outptr3] "+r"(outptr3)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1), [oddn] "r"(oddn),
- [rx2] "w"(rx2), [rx3] "w"(rx3)
- : "cc", "memory", "x0", "v6", "v7", "v10", "v11", "v14", "v15", "v30", "v31");
- }
-
- if (remain >= 2)
- {
- asm volatile("ld1 {v14.2s}, [%[outptr0]]\n"
- "ld1 {v15.2s}, [%[outptr1]]\n"
- "ld1 {v30.2s}, [%[outptr2]]\n"
- "ld1 {v31.2s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "ld1 {v6.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.2s}, [x0]\n"
-
- "fmla v14.2s, v6.2s, %[rx0].s[0]\n"
- "fmla v15.2s, v6.2s, %[rx1].s[0]\n"
- "fmla v30.2s, v6.2s, %[rx2].s[0]\n"
- "fmla v31.2s, v6.2s, %[rx3].s[0]\n"
- "fmla v14.2s, v7.2s, %[rx0].s[1]\n"
- "fmla v15.2s, v7.2s, %[rx1].s[1]\n"
- "fmla v30.2s, v7.2s, %[rx2].s[1]\n"
- "fmla v31.2s, v7.2s, %[rx3].s[1]\n"
-
- "st1 {v14.2s}, [%[outptr0]], #8\n"
- "st1 {v15.2s}, [%[outptr1]], #8\n"
- "st1 {v30.2s}, [%[outptr2]], #8\n"
- "st1 {v31.2s}, [%[outptr3]], #8\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [outptr2] "+r"(outptr2), [outptr3] "+r"(outptr3)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1),
-
- [rx2] "w"(rx2), [rx3] "w"(rx3)
- : "cc", "memory", "x0", "v6", "v7", "v14", "v15", "v30", "v31");
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1));
- *outptr1 += (*kernel0) * (*_x1) + (*(kernel0 + outch)) * (*(_x1 + 1));
- *outptr2 += (*kernel0) * (*_x2) + (*(kernel0 + outch)) * (*(_x2 + 1));
- *outptr3 += (*kernel0) * (*_x3) + (*(kernel0 + outch)) * (*(_x3 + 1));
-
- kernel0++;
- outptr0++;
- outptr1++;
- outptr2++;
- outptr3++;
- }
-
- kernel0 += outch;
- _x0 += 2;
- _x1 += 2;
- _x2 += 2;
- _x3 += 2;
- }
-
- for (; i < inch; i++)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("v4") = vld1_dup_f32(_x0);
- register float32x2_t rx1 asm("v5") = vld1_dup_f32(_x1);
- register float32x2_t rx2 asm("v16") = vld1_dup_f32(_x2);
- register float32x2_t rx3 asm("v17") = vld1_dup_f32(_x3);
-
- float *outptr0 = out0;
- float *outptr1 = out0 + outch;
- float *outptr2 = out0 + outch * 2;
- float *outptr3 = out0 + outch * 3;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile(
- "cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v6.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v6.4s, %[rx3].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v10.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v10.4s, %[rx3].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v6.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v6.4s, %[rx3].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v10.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v10.4s, %[rx3].s[0]\n"
-
- "cmp %[oddn], #1\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v6.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v6.4s, %[rx3].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [outptr1] "+r"(outptr1),
- [_n] "+r"(_n), [outptr2] "+r"(outptr2), [outptr3] "+r"(outptr3)
- : [rx0] "w"(rx0), [rx1] "w"(rx1), [oddn] "r"(oddn), [rx2] "w"(rx2), [rx3] "w"(rx3)
- : "cc", "memory", "x0", "v6", "v10", "v14", "v15", "v30", "v31");
- }
-
- if (remain >= 2)
- {
- asm volatile("ld1 {v14.2s}, [%[outptr0]]\n"
- "ld1 {v15.2s}, [%[outptr1]]\n"
- "ld1 {v30.2s}, [%[outptr2]]\n"
- "ld1 {v31.2s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "ld1 {v6.2s}, [x0]\n"
-
- "fmla v14.2s, v6.2s, %[rx0].s[0]\n"
- "fmla v15.2s, v6.2s, %[rx1].s[0]\n"
- "fmla v30.2s, v6.2s, %[rx2].s[0]\n"
- "fmla v31.2s, v6.2s, %[rx3].s[0]\n"
-
- "st1 {v14.2s}, [%[outptr0]], #8\n"
- "st1 {v15.2s}, [%[outptr1]], #8\n"
- "st1 {v30.2s}, [%[outptr2]], #8\n"
- "st1 {v31.2s}, [%[outptr3]], #8\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [outptr2] "+r"(outptr2), [outptr3] "+r"(outptr3)
- : [rx0] "w"(rx0), [rx1] "w"(rx1),
-
- [rx2] "w"(rx2), [rx3] "w"(rx3)
- : "cc", "memory", "x0", "v6", "v14", "v15", "v30", "v31");
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0);
- *outptr1 += (*kernel0) * (*_x1);
- *outptr2 += (*kernel0) * (*_x2);
- *outptr3 += (*kernel0) * (*_x3);
-
- kernel0++;
- outptr0++;
- outptr1++;
- outptr2++;
- outptr3++;
- }
-
- _x0 += 1;
- _x1 += 1;
- _x2 += 1;
- _x3 += 1;
- }
-
- img1 += inch * 4 * _stride;
- out0 += outch * 4;
- q += 4;
- }
-
- for (; q + 1 < outw; /*q += 2*/)
- {
- if (padding)
- {
- if (((q + 1) * _stride + m % kernel_w < pad_left) ||
- (q * _stride + m % kernel_w) >= pad_left + w)
- {
- out0 += outch * 2;
- img1 += inch * _stride * 2;
- q += 2;
- continue;
- }
- else if ((q + 1) * _stride + m % kernel_w >= pad_left + w)
- {
- break;
- }
- else if (q * _stride + m % kernel_w < pad_left)
- {
- out0 += outch;
- img1 += inch * _stride;
- q++;
- continue;
- }
- }
-
- const float *_x0 = img1;
- const float *_x1 = img1 + inch * _stride;
- const float *kernel0 = _kernel0;
-
- int i = 0;
- for (; i + 3 < inch; i += 4)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x4_t rx0 asm("v4") = vld1q_f32(_x0);
- register float32x4_t rx1 asm("v5") = vld1q_f32(_x1);
-
- float *outptr0 = out0;
- float *outptr1 = out0 + outch;
-
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.4s}, [x0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v12.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v13.4s}, [x0]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
- "fmla v14.4s, v8.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v8.4s, %[rx1].s[2]\n"
- "fmla v14.4s, v9.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v9.4s, %[rx1].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v11.4s, %[rx1].s[1]\n"
- "fmla v14.4s, v12.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v12.4s, %[rx1].s[2]\n"
- "fmla v14.4s, v13.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v13.4s, %[rx1].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
- "fmla v14.4s, v8.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v8.4s, %[rx1].s[2]\n"
- "fmla v14.4s, v9.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v9.4s, %[rx1].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v12.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v13.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v11.4s, %[rx1].s[1]\n"
- "fmla v14.4s, v12.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v12.4s, %[rx1].s[2]\n"
- "fmla v14.4s, v13.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v13.4s, %[rx1].s[3]\n"
-
- "cmp %[oddn], #1\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.4s}, [x0]\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
- "fmla v14.4s, v8.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v8.4s, %[rx1].s[2]\n"
- "fmla v14.4s, v9.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v9.4s, %[rx1].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [_n] "+r"(_n)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1), [oddn] "r"(oddn)
- : "cc", "memory", "x0", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13",
- "v14", "v15");
- }
-
- if (remain >= 2)
- {
- asm volatile("ld1 {v14.2s}, [%[outptr0]]\n"
- "ld1 {v15.2s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "ld1 {v6.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.2s}, [x0]\n"
-
- "fmla v14.2s, v6.2s, %[rx0].s[0]\n"
- "fmla v15.2s, v6.2s, %[rx1].s[0]\n"
- "fmla v14.2s, v7.2s, %[rx0].s[1]\n"
- "fmla v15.2s, v7.2s, %[rx1].s[1]\n"
- "fmla v14.2s, v8.2s, %[rx0].s[2]\n"
- "fmla v15.2s, v8.2s, %[rx1].s[2]\n"
- "fmla v14.2s, v9.2s, %[rx0].s[3]\n"
- "fmla v15.2s, v9.2s, %[rx1].s[3]\n"
-
- "st1 {v14.2s}, [%[outptr0]], #8\n"
- "st1 {v15.2s}, [%[outptr1]], #8\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [outptr1] "+r"(outptr1)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1)
- : "cc", "memory", "x0", "v6", "v7", "v8", "v9", "v14", "v15");
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x0 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x0 + 3));
-
- *outptr1 += (*kernel0) * (*_x1) + (*(kernel0 + outch)) * (*(_x1 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x1 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x1 + 3));
-
- kernel0++;
- outptr0++;
- outptr1++;
- }
-
- kernel0 += outch * 3;
- _x0 += 4;
- _x1 += 4;
- }
-
- for (; i + 1 < inch; i += 2)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("v4") = vld1_f32(_x0);
- register float32x2_t rx1 asm("v5") = vld1_f32(_x1);
-
- float *outptr0 = out0;
- float *outptr1 = out0 + outch;
-
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v11.4s, %[rx1].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v11.4s, %[rx1].s[1]\n"
-
- "cmp %[oddn], #1\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [_n] "+r"(_n)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1), [oddn] "r"(oddn)
- : "cc", "memory", "x0", "v6", "v7", "v10", "v11", "v14", "v15");
- }
-
- if (remain >= 2)
- {
- asm volatile("ld1 {v14.2s}, [%[outptr0]]\n"
- "ld1 {v15.2s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "ld1 {v6.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.2s}, [x0]\n"
-
- "fmla v14.2s, v6.2s, %[rx0].s[0]\n"
- "fmla v15.2s, v6.2s, %[rx1].s[0]\n"
- "fmla v14.2s, v7.2s, %[rx0].s[1]\n"
- "fmla v15.2s, v7.2s, %[rx1].s[1]\n"
-
- "st1 {v14.2s}, [%[outptr0]], #8\n"
- "st1 {v15.2s}, [%[outptr1]], #8\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [outptr1] "+r"(outptr1)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1)
- : "cc", "memory", "x0", "v6", "v7", "v14", "v15");
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1));
- *outptr1 += (*kernel0) * (*_x1) + (*(kernel0 + outch)) * (*(_x1 + 1));
-
- kernel0++;
- outptr0++;
- outptr1++;
- }
-
- kernel0 += outch;
- _x0 += 2;
- _x1 += 2;
- }
-
- for (; i < inch; i++)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("v4") = vld1_dup_f32(_x0);
- register float32x2_t rx1 asm("v5") = vld1_dup_f32(_x1);
-
- float *outptr0 = out0;
- float *outptr1 = out0 + outch;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
-
- "cmp %[oddn], #1\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [_n] "+r"(_n)
- : [rx0] "w"(rx0), [rx1] "w"(rx1), [oddn] "r"(oddn)
- : "cc", "memory", "x0", "v6", "v10", "v14", "v15");
- }
-
- if (remain >= 2)
- {
- asm volatile("ld1 {v14.2s}, [%[outptr0]]\n"
- "ld1 {v15.2s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "ld1 {v6.2s}, [x0]\n"
-
- "fmla v14.2s, v6.2s, %[rx0].s[0]\n"
- "fmla v15.2s, v6.2s, %[rx1].s[0]\n"
-
- "st1 {v14.2s}, [%[outptr0]], #8\n"
- "st1 {v15.2s}, [%[outptr1]], #8\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [outptr1] "+r"(outptr1)
- : [rx0] "w"(rx0), [rx1] "w"(rx1)
- : "cc", "memory", "x0", "v6", "v14", "v15");
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0);
- *outptr1 += (*kernel0) * (*_x1);
-
- kernel0++;
- outptr0++;
- outptr1++;
- }
-
- _x0 += 1;
- _x1 += 1;
- }
-
- img1 += inch * 2 * _stride;
- out0 += outch * 2;
- q += 2;
- }
-
- for (; q < outw; q++)
- {
- if (padding)
- {
- if ((q * _stride + m % kernel_w < pad_left) ||
- (q * _stride + m % kernel_w >= pad_left + w))
- {
- img1 += inch * _stride;
- out0 += outch;
- continue;
- }
- }
-
- const float *_x0 = img1;
- const float *kernel0 = _kernel0;
-
- int i = 0;
- for (; i + 3 < inch; i += 4)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x4_t rx0 asm("v4") = vld1q_f32(_x0);
-
- float *outptr0 = out0;
-
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.4s}, [x0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v12.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v13.4s}, [x0]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v14.4s, v8.4s, %[rx0].s[2]\n"
- "fmla v14.4s, v9.4s, %[rx0].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v14.4s, v12.4s, %[rx0].s[2]\n"
- "fmla v14.4s, v13.4s, %[rx0].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v14.4s, v8.4s, %[rx0].s[2]\n"
- "fmla v14.4s, v9.4s, %[rx0].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v12.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v13.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v14.4s, v12.4s, %[rx0].s[2]\n"
- "fmla v14.4s, v13.4s, %[rx0].s[3]\n"
-
- "cmp %[oddn], #1\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.4s}, [x0]\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v14.4s, v8.4s, %[rx0].s[2]\n"
- "fmla v14.4s, v9.4s, %[rx0].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [_n] "+r"(_n)
- : [stride] "r"(stride), [rx0] "w"(rx0), [oddn] "r"(oddn)
- : "cc", "memory", "x0", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13",
- "v14");
- }
-
- if (remain >= 2)
- {
- asm volatile("ld1 {v14.2s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "ld1 {v6.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.2s}, [x0]\n"
-
- "fmla v14.2s, v6.2s, %[rx0].s[0]\n"
- "fmla v14.2s, v7.2s, %[rx0].s[1]\n"
- "fmla v14.2s, v8.2s, %[rx0].s[2]\n"
- "fmla v14.2s, v9.2s, %[rx0].s[3]\n"
-
- "st1 {v14.2s}, [%[outptr0]], #8\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0)
- : [stride] "r"(stride), [rx0] "w"(rx0)
- : "cc", "memory", "x0", "v6", "v7", "v8", "v9", "v14");
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x0 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x0 + 3));
-
- kernel0++;
- outptr0++;
- }
-
- kernel0 += outch * 3;
- _x0 += 4;
- }
-
- for (; i + 1 < inch; i += 2)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("v4") = vld1_f32(_x0);
-
- float *outptr0 = out0;
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
-
- "cmp %[oddn], #1\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [_n] "+r"(_n)
- : [stride] "r"(stride), [rx0] "w"(rx0), [oddn] "r"(oddn)
- : "cc", "memory", "x0", "v6", "v7", "v10", "v11", "v14");
- }
-
- if (remain >= 2)
- {
- asm volatile("ld1 {v14.2s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "ld1 {v6.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.2s}, [x0]\n"
-
- "fmla v14.2s, v6.2s, %[rx0].s[0]\n"
- "fmla v14.2s, v7.2s, %[rx0].s[1]\n"
-
- "st1 {v14.2s}, [%[outptr0]], #8\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0)
- : [stride] "r"(stride), [rx0] "w"(rx0)
- : "cc", "memory", "x0", "v6", "v7", "v14");
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1));
-
- kernel0++;
- outptr0++;
- }
-
- kernel0 += outch;
- _x0 += 2;
- }
-
- for (; i < inch; i++)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("v4") = vld1_dup_f32(_x0);
-
- float *outptr0 = out0;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
-
- "cmp %[oddn], #1\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [_n] "+r"(_n)
- : [rx0] "w"(rx0), [oddn] "r"(oddn)
- : "cc", "memory", "x0", "v6", "v10", "v14");
- }
-
- if (remain >= 2)
- {
- asm volatile("ld1 {v14.2s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "ld1 {v6.2s}, [x0]\n"
-
- "fmla v14.2s, v6.2s, %[rx0].s[0]\n"
-
- "st1 {v14.2s}, [%[outptr0]], #8\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0)
- : [rx0] "w"(rx0)
- : "cc", "memory", "x0", "v6", "v14");
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0);
-
- kernel0++;
- outptr0++;
- }
-
- _x0 += 1;
- }
-
- img1 += inch * _stride;
- out0 += outch;
- }
- }
- }
-}
-
-static void direct_conv_s(const convMat_t &bottom_blob, convMat_t &top_blob,
- const convMat_t &_kernel, const int _stride, const int padding,
- const int pad_top, const int pad_left)
-{
- const int w = bottom_blob.w;
- const int h = bottom_blob.h;
- const int inch = bottom_blob.c;
- const int outw = top_blob.w;
- const int outh = top_blob.h;
- const int outch = top_blob.c;
- const int kernel_w = _kernel.w;
- const int kernel_h = _kernel.h;
-
-#ifdef _OPENMP
-#pragma omp parallel for
-#endif
- for (int p = 0; p < outh; p++)
- {
- const float *img0 = bottom_blob.data + (p * _stride - pad_top) * w * inch;
- float *out = top_blob.data + p * outw * outch;
-
- // clear output
- for (int j = 0; j < outw * outch; j++)
- {
- *(out + j) = 0.f;
- }
-
- for (int m = 0; m < kernel_w * kernel_h; m++)
- {
- if (padding)
- {
- if (((p * _stride + m / kernel_w) < pad_top) || (p * _stride + m / kernel_w >= pad_top + h))
- {
- continue;
- }
- }
-
- float *out0 = out;
- const float *_kernel0 = _kernel.data + m * inch * outch;
- const float *img1 = img0 + (m / kernel_w) * w * inch + (m % kernel_w - pad_left) * inch;
-
- int q = 0;
- for (; q + 3 < outw; /*q += 4*/)
- {
- if (padding)
- {
- if (((q + 3) * _stride + m % kernel_w < pad_left) ||
- (q * _stride + m % kernel_w) >= pad_left + w)
- {
- out0 += outch * 4;
- img1 += inch * _stride * 4;
- q += 4;
- continue;
- }
- else if ((q + 3) * _stride + m % kernel_w >= pad_left + w)
- {
- break;
- }
- else if (q * _stride + m % kernel_w < pad_left)
- {
- int delta = (pad_left - m % kernel_w) / _stride - q;
- delta += (pad_left - m % kernel_w) % _stride ? 1 : 0;
- out0 += outch * delta;
- img1 += inch * _stride * delta;
- q += delta;
- continue;
- }
- }
-
- const float *_x0 = img1;
- const float *_x1 = img1 + inch * _stride;
- const float *_x2 = img1 + inch * _stride * 2;
- const float *_x3 = img1 + inch * _stride * 3;
- const float *kernel0 = _kernel0;
-
- int i = 0;
- for (; i + 3 < inch; i += 4)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x4_t rx0 asm("v4") = vld1q_f32(_x0);
- register float32x4_t rx1 asm("v5") = vld1q_f32(_x1);
- register float32x4_t rx2 asm("v16") = vld1q_f32(_x2);
- register float32x4_t rx3 asm("v17") = vld1q_f32(_x3);
-
- float *outptr0 = out0;
- float *outptr1 = out0 + outch;
- float *outptr2 = out0 + outch * 2;
- float *outptr3 = out0 + outch * 3;
-
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.4s}, [x0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v12.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v13.4s}, [x0]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v6.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v6.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v7.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v7.4s, %[rx3].s[1]\n"
- "fmla v14.4s, v8.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v8.4s, %[rx1].s[2]\n"
- "fmla v30.4s, v8.4s, %[rx2].s[2]\n"
- "fmla v31.4s, v8.4s, %[rx3].s[2]\n"
- "fmla v14.4s, v9.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v9.4s, %[rx1].s[3]\n"
- "fmla v30.4s, v9.4s, %[rx2].s[3]\n"
- "fmla v31.4s, v9.4s, %[rx3].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v10.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v10.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v11.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v11.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v11.4s, %[rx3].s[1]\n"
- "fmla v14.4s, v12.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v12.4s, %[rx1].s[2]\n"
- "fmla v30.4s, v12.4s, %[rx2].s[2]\n"
- "fmla v31.4s, v12.4s, %[rx3].s[2]\n"
- "fmla v14.4s, v13.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v13.4s, %[rx1].s[3]\n"
- "fmla v30.4s, v13.4s, %[rx2].s[3]\n"
- "fmla v31.4s, v13.4s, %[rx3].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v6.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v6.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v7.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v7.4s, %[rx3].s[1]\n"
- "fmla v14.4s, v8.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v8.4s, %[rx1].s[2]\n"
- "fmla v30.4s, v8.4s, %[rx2].s[2]\n"
- "fmla v31.4s, v8.4s, %[rx3].s[2]\n"
- "fmla v14.4s, v9.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v9.4s, %[rx1].s[3]\n"
- "fmla v30.4s, v9.4s, %[rx2].s[3]\n"
- "fmla v31.4s, v9.4s, %[rx3].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v12.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v13.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v10.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v10.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v11.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v11.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v11.4s, %[rx3].s[1]\n"
- "fmla v14.4s, v12.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v12.4s, %[rx1].s[2]\n"
- "fmla v30.4s, v12.4s, %[rx2].s[2]\n"
- "fmla v31.4s, v12.4s, %[rx3].s[2]\n"
- "fmla v14.4s, v13.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v13.4s, %[rx1].s[3]\n"
- "fmla v30.4s, v13.4s, %[rx2].s[3]\n"
- "fmla v31.4s, v13.4s, %[rx3].s[3]\n"
-
- "cmp %[oddn], #1\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.4s}, [x0]\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v6.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v6.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v7.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v7.4s, %[rx3].s[1]\n"
- "fmla v14.4s, v8.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v8.4s, %[rx1].s[2]\n"
- "fmla v30.4s, v8.4s, %[rx2].s[2]\n"
- "fmla v31.4s, v8.4s, %[rx3].s[2]\n"
- "fmla v14.4s, v9.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v9.4s, %[rx1].s[3]\n"
- "fmla v30.4s, v9.4s, %[rx2].s[3]\n"
- "fmla v31.4s, v9.4s, %[rx3].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [_n] "+r"(_n), [outptr2] "+r"(outptr2),
- [outptr3] "+r"(outptr3)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1), [oddn] "r"(oddn),
- [rx2] "w"(rx2), [rx3] "w"(rx3)
- : "cc", "memory", "x0", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13",
- "v14", "v15", "v30", "v31");
- }
-
- if (remain >= 2)
- {
- asm volatile("ld1 {v14.2s}, [%[outptr0]]\n"
- "ld1 {v15.2s}, [%[outptr1]]\n"
- "ld1 {v30.2s}, [%[outptr2]]\n"
- "ld1 {v31.2s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "ld1 {v6.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.2s}, [x0]\n"
-
- "fmla v14.2s, v6.2s, %[rx0].s[0]\n"
- "fmla v15.2s, v6.2s, %[rx1].s[0]\n"
- "fmla v30.2s, v6.2s, %[rx2].s[0]\n"
- "fmla v31.2s, v6.2s, %[rx3].s[0]\n"
- "fmla v14.2s, v7.2s, %[rx0].s[1]\n"
- "fmla v15.2s, v7.2s, %[rx1].s[1]\n"
- "fmla v30.2s, v7.2s, %[rx2].s[1]\n"
- "fmla v31.2s, v7.2s, %[rx3].s[1]\n"
- "fmla v14.2s, v8.2s, %[rx0].s[2]\n"
- "fmla v15.2s, v8.2s, %[rx1].s[2]\n"
- "fmla v30.2s, v8.2s, %[rx2].s[2]\n"
- "fmla v31.2s, v8.2s, %[rx3].s[2]\n"
- "fmla v14.2s, v9.2s, %[rx0].s[3]\n"
- "fmla v15.2s, v9.2s, %[rx1].s[3]\n"
- "fmla v30.2s, v9.2s, %[rx2].s[3]\n"
- "fmla v31.2s, v9.2s, %[rx3].s[3]\n"
-
- "st1 {v14.2s}, [%[outptr0]], #8\n"
- "st1 {v15.2s}, [%[outptr1]], #8\n"
- "st1 {v30.2s}, [%[outptr2]], #8\n"
- "st1 {v31.2s}, [%[outptr3]], #8\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [outptr2] "+r"(outptr2), [outptr3] "+r"(outptr3)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1),
-
- [rx2] "w"(rx2), [rx3] "w"(rx3)
- : "cc", "memory", "x0", "v6", "v7", "v8", "v9", "v14", "v15", "v30",
- "v31");
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x0 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x0 + 3));
-
- *outptr1 += (*kernel0) * (*_x1) + (*(kernel0 + outch)) * (*(_x1 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x1 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x1 + 3));
-
- *outptr2 += (*kernel0) * (*_x2) + (*(kernel0 + outch)) * (*(_x2 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x2 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x2 + 3));
-
- *outptr3 += (*kernel0) * (*_x3) + (*(kernel0 + outch)) * (*(_x3 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x3 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x3 + 3));
-
- kernel0++;
- outptr0++;
- outptr1++;
- outptr2++;
- outptr3++;
- }
-
- kernel0 += outch * 3;
- _x0 += 4;
- _x1 += 4;
- _x2 += 4;
- _x3 += 4;
- }
-
- for (; i + 1 < inch; i += 2)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("v4") = vld1_f32(_x0);
- register float32x2_t rx1 asm("v5") = vld1_f32(_x1);
- register float32x2_t rx2 asm("v16") = vld1_f32(_x2);
- register float32x2_t rx3 asm("v17") = vld1_f32(_x3);
-
- float *outptr0 = out0;
- float *outptr1 = out0 + outch;
- float *outptr2 = out0 + outch * 2;
- float *outptr3 = out0 + outch * 3;
-
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile(
- "cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v6.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v6.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v7.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v7.4s, %[rx3].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v10.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v10.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v11.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v11.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v11.4s, %[rx3].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v6.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v6.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v7.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v7.4s, %[rx3].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v10.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v10.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v11.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v11.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v11.4s, %[rx3].s[1]\n"
-
- "cmp %[oddn], #1\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v6.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v6.4s, %[rx3].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
- "fmla v30.4s, v7.4s, %[rx2].s[1]\n"
- "fmla v31.4s, v7.4s, %[rx3].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [outptr1] "+r"(outptr1),
- [_n] "+r"(_n), [outptr2] "+r"(outptr2), [outptr3] "+r"(outptr3)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1), [oddn] "r"(oddn),
- [rx2] "w"(rx2), [rx3] "w"(rx3)
- : "cc", "memory", "x0", "v6", "v7", "v10", "v11", "v14", "v15", "v30", "v31");
- }
-
- if (remain >= 2)
- {
- asm volatile("ld1 {v14.2s}, [%[outptr0]]\n"
- "ld1 {v15.2s}, [%[outptr1]]\n"
- "ld1 {v30.2s}, [%[outptr2]]\n"
- "ld1 {v31.2s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "ld1 {v6.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.2s}, [x0]\n"
-
- "fmla v14.2s, v6.2s, %[rx0].s[0]\n"
- "fmla v15.2s, v6.2s, %[rx1].s[0]\n"
- "fmla v30.2s, v6.2s, %[rx2].s[0]\n"
- "fmla v31.2s, v6.2s, %[rx3].s[0]\n"
- "fmla v14.2s, v7.2s, %[rx0].s[1]\n"
- "fmla v15.2s, v7.2s, %[rx1].s[1]\n"
- "fmla v30.2s, v7.2s, %[rx2].s[1]\n"
- "fmla v31.2s, v7.2s, %[rx3].s[1]\n"
-
- "st1 {v14.2s}, [%[outptr0]], #8\n"
- "st1 {v15.2s}, [%[outptr1]], #8\n"
- "st1 {v30.2s}, [%[outptr2]], #8\n"
- "st1 {v31.2s}, [%[outptr3]], #8\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [outptr2] "+r"(outptr2), [outptr3] "+r"(outptr3)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1),
-
- [rx2] "w"(rx2), [rx3] "w"(rx3)
- : "cc", "memory", "x0", "v6", "v7", "v14", "v15", "v30", "v31");
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1));
- *outptr1 += (*kernel0) * (*_x1) + (*(kernel0 + outch)) * (*(_x1 + 1));
- *outptr2 += (*kernel0) * (*_x2) + (*(kernel0 + outch)) * (*(_x2 + 1));
- *outptr3 += (*kernel0) * (*_x3) + (*(kernel0 + outch)) * (*(_x3 + 1));
-
- kernel0++;
- outptr0++;
- outptr1++;
- outptr2++;
- outptr3++;
- }
-
- kernel0 += outch;
- _x0 += 2;
- _x1 += 2;
- _x2 += 2;
- _x3 += 2;
- }
-
- for (; i < inch; i++)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("v4") = vld1_dup_f32(_x0);
- register float32x2_t rx1 asm("v5") = vld1_dup_f32(_x1);
- register float32x2_t rx2 asm("v16") = vld1_dup_f32(_x2);
- register float32x2_t rx3 asm("v17") = vld1_dup_f32(_x3);
-
- float *outptr0 = out0;
- float *outptr1 = out0 + outch;
- float *outptr2 = out0 + outch * 2;
- float *outptr3 = out0 + outch * 3;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile(
- "cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v6.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v6.4s, %[rx3].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v10.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v10.4s, %[rx3].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v6.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v6.4s, %[rx3].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v10.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v10.4s, %[rx3].s[0]\n"
-
- "cmp %[oddn], #1\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
- "ld1 {v30.4s}, [%[outptr2]]\n"
- "ld1 {v31.4s}, [%[outptr3]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v30.4s, v6.4s, %[rx2].s[0]\n"
- "fmla v31.4s, v6.4s, %[rx3].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "st1 {v30.4s}, [%[outptr2]], #16\n"
- "st1 {v31.4s}, [%[outptr3]], #16\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [outptr1] "+r"(outptr1),
- [_n] "+r"(_n), [outptr2] "+r"(outptr2), [outptr3] "+r"(outptr3)
- : [rx0] "w"(rx0), [rx1] "w"(rx1), [oddn] "r"(oddn), [rx2] "w"(rx2), [rx3] "w"(rx3)
- : "cc", "memory", "x0", "v6", "v10", "v14", "v15", "v30", "v31");
- }
-
- if (remain >= 2)
- {
- asm volatile("ld1 {v14.2s}, [%[outptr0]]\n"
- "ld1 {v15.2s}, [%[outptr1]]\n"
- "ld1 {v30.2s}, [%[outptr2]]\n"
- "ld1 {v31.2s}, [%[outptr3]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "ld1 {v6.2s}, [x0]\n"
-
- "fmla v14.2s, v6.2s, %[rx0].s[0]\n"
- "fmla v15.2s, v6.2s, %[rx1].s[0]\n"
- "fmla v30.2s, v6.2s, %[rx2].s[0]\n"
- "fmla v31.2s, v6.2s, %[rx3].s[0]\n"
-
- "st1 {v14.2s}, [%[outptr0]], #8\n"
- "st1 {v15.2s}, [%[outptr1]], #8\n"
- "st1 {v30.2s}, [%[outptr2]], #8\n"
- "st1 {v31.2s}, [%[outptr3]], #8\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [outptr2] "+r"(outptr2), [outptr3] "+r"(outptr3)
- : [rx0] "w"(rx0), [rx1] "w"(rx1),
-
- [rx2] "w"(rx2), [rx3] "w"(rx3)
- : "cc", "memory", "x0", "v6", "v14", "v15", "v30", "v31");
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0);
- *outptr1 += (*kernel0) * (*_x1);
- *outptr2 += (*kernel0) * (*_x2);
- *outptr3 += (*kernel0) * (*_x3);
-
- kernel0++;
- outptr0++;
- outptr1++;
- outptr2++;
- outptr3++;
- }
-
- _x0 += 1;
- _x1 += 1;
- _x2 += 1;
- _x3 += 1;
- }
-
- img1 += inch * 4 * _stride;
- out0 += outch * 4;
- q += 4;
- }
-
- for (; q + 1 < outw; /*q += 2*/)
- {
- if (padding)
- {
- if (((q + 1) * _stride + m % kernel_w < pad_left) ||
- (q * _stride + m % kernel_w) >= pad_left + w)
- {
- out0 += outch * 2;
- img1 += inch * _stride * 2;
- q += 2;
- continue;
- }
- else if ((q + 1) * _stride + m % kernel_w >= pad_left + w)
- {
- break;
- }
- else if (q * _stride + m % kernel_w < pad_left)
- {
- out0 += outch;
- img1 += inch * _stride;
- q++;
- continue;
- }
- }
-
- const float *_x0 = img1;
- const float *_x1 = img1 + inch * _stride;
- const float *kernel0 = _kernel0;
-
- int i = 0;
- for (; i + 3 < inch; i += 4)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x4_t rx0 asm("v4") = vld1q_f32(_x0);
- register float32x4_t rx1 asm("v5") = vld1q_f32(_x1);
-
- float *outptr0 = out0;
- float *outptr1 = out0 + outch;
-
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.4s}, [x0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v12.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v13.4s}, [x0]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
- "fmla v14.4s, v8.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v8.4s, %[rx1].s[2]\n"
- "fmla v14.4s, v9.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v9.4s, %[rx1].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v11.4s, %[rx1].s[1]\n"
- "fmla v14.4s, v12.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v12.4s, %[rx1].s[2]\n"
- "fmla v14.4s, v13.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v13.4s, %[rx1].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
- "fmla v14.4s, v8.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v8.4s, %[rx1].s[2]\n"
- "fmla v14.4s, v9.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v9.4s, %[rx1].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v12.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v13.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v11.4s, %[rx1].s[1]\n"
- "fmla v14.4s, v12.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v12.4s, %[rx1].s[2]\n"
- "fmla v14.4s, v13.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v13.4s, %[rx1].s[3]\n"
-
- "cmp %[oddn], #1\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.4s}, [x0]\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
- "fmla v14.4s, v8.4s, %[rx0].s[2]\n"
- "fmla v15.4s, v8.4s, %[rx1].s[2]\n"
- "fmla v14.4s, v9.4s, %[rx0].s[3]\n"
- "fmla v15.4s, v9.4s, %[rx1].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [_n] "+r"(_n)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1), [oddn] "r"(oddn)
- : "cc", "memory", "x0", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13",
- "v14", "v15");
- }
-
- if (remain >= 2)
- {
- asm volatile("ld1 {v14.2s}, [%[outptr0]]\n"
- "ld1 {v15.2s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "ld1 {v6.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.2s}, [x0]\n"
-
- "fmla v14.2s, v6.2s, %[rx0].s[0]\n"
- "fmla v15.2s, v6.2s, %[rx1].s[0]\n"
- "fmla v14.2s, v7.2s, %[rx0].s[1]\n"
- "fmla v15.2s, v7.2s, %[rx1].s[1]\n"
- "fmla v14.2s, v8.2s, %[rx0].s[2]\n"
- "fmla v15.2s, v8.2s, %[rx1].s[2]\n"
- "fmla v14.2s, v9.2s, %[rx0].s[3]\n"
- "fmla v15.2s, v9.2s, %[rx1].s[3]\n"
-
- "st1 {v14.2s}, [%[outptr0]], #8\n"
- "st1 {v15.2s}, [%[outptr1]], #8\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [outptr1] "+r"(outptr1)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1)
- : "cc", "memory", "x0", "v6", "v7", "v8", "v9", "v14", "v15");
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x0 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x0 + 3));
-
- *outptr1 += (*kernel0) * (*_x1) + (*(kernel0 + outch)) * (*(_x1 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x1 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x1 + 3));
-
- kernel0++;
- outptr0++;
- outptr1++;
- }
-
- kernel0 += outch * 3;
- _x0 += 4;
- _x1 += 4;
- }
-
- for (; i + 1 < inch; i += 2)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("v4") = vld1_f32(_x0);
- register float32x2_t rx1 asm("v5") = vld1_f32(_x1);
-
- float *outptr0 = out0;
- float *outptr1 = out0 + outch;
-
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v11.4s, %[rx1].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v11.4s, %[rx1].s[1]\n"
-
- "cmp %[oddn], #1\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v15.4s, v7.4s, %[rx1].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [_n] "+r"(_n)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1), [oddn] "r"(oddn)
- : "cc", "memory", "x0", "v6", "v7", "v10", "v11", "v14", "v15");
- }
-
- if (remain >= 2)
- {
- asm volatile("ld1 {v14.2s}, [%[outptr0]]\n"
- "ld1 {v15.2s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "ld1 {v6.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.2s}, [x0]\n"
-
- "fmla v14.2s, v6.2s, %[rx0].s[0]\n"
- "fmla v15.2s, v6.2s, %[rx1].s[0]\n"
- "fmla v14.2s, v7.2s, %[rx0].s[1]\n"
- "fmla v15.2s, v7.2s, %[rx1].s[1]\n"
-
- "st1 {v14.2s}, [%[outptr0]], #8\n"
- "st1 {v15.2s}, [%[outptr1]], #8\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [outptr1] "+r"(outptr1)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1)
- : "cc", "memory", "x0", "v6", "v7", "v14", "v15");
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1));
- *outptr1 += (*kernel0) * (*_x1) + (*(kernel0 + outch)) * (*(_x1 + 1));
-
- kernel0++;
- outptr0++;
- outptr1++;
- }
-
- kernel0 += outch;
- _x0 += 2;
- _x1 += 2;
- }
-
- for (; i < inch; i++)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("v4") = vld1_dup_f32(_x0);
- register float32x2_t rx1 asm("v5") = vld1_dup_f32(_x1);
-
- float *outptr0 = out0;
- float *outptr1 = out0 + outch;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v10.4s, %[rx1].s[0]\n"
-
- "cmp %[oddn], #1\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
- "ld1 {v15.4s}, [%[outptr1]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v15.4s, v6.4s, %[rx1].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "st1 {v15.4s}, [%[outptr1]], #16\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [_n] "+r"(_n)
- : [rx0] "w"(rx0), [rx1] "w"(rx1), [oddn] "r"(oddn)
- : "cc", "memory", "x0", "v6", "v10", "v14", "v15");
- }
-
- if (remain >= 2)
- {
- asm volatile("ld1 {v14.2s}, [%[outptr0]]\n"
- "ld1 {v15.2s}, [%[outptr1]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "ld1 {v6.2s}, [x0]\n"
-
- "fmla v14.2s, v6.2s, %[rx0].s[0]\n"
- "fmla v15.2s, v6.2s, %[rx1].s[0]\n"
-
- "st1 {v14.2s}, [%[outptr0]], #8\n"
- "st1 {v15.2s}, [%[outptr1]], #8\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [outptr1] "+r"(outptr1)
- : [rx0] "w"(rx0), [rx1] "w"(rx1)
- : "cc", "memory", "x0", "v6", "v14", "v15");
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0);
- *outptr1 += (*kernel0) * (*_x1);
-
- kernel0++;
- outptr0++;
- outptr1++;
- }
-
- _x0 += 1;
- _x1 += 1;
- }
-
- img1 += inch * 2 * _stride;
- out0 += outch * 2;
- q += 2;
- }
-
- for (; q < outw; q++)
- {
- if (padding)
- {
- if ((q * _stride + m % kernel_w < pad_left) ||
- (q * _stride + m % kernel_w >= pad_left + w))
- {
- img1 += inch * _stride;
- out0 += outch;
- continue;
- }
- }
-
- const float *_x0 = img1;
- const float *kernel0 = _kernel0;
-
- int i = 0;
- for (; i + 3 < inch; i += 4)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x4_t rx0 asm("v4") = vld1q_f32(_x0);
-
- float *outptr0 = out0;
-
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.4s}, [x0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v12.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v13.4s}, [x0]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v14.4s, v8.4s, %[rx0].s[2]\n"
- "fmla v14.4s, v9.4s, %[rx0].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v14.4s, v12.4s, %[rx0].s[2]\n"
- "fmla v14.4s, v13.4s, %[rx0].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v14.4s, v8.4s, %[rx0].s[2]\n"
- "fmla v14.4s, v9.4s, %[rx0].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v12.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v13.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
- "fmla v14.4s, v12.4s, %[rx0].s[2]\n"
- "fmla v14.4s, v13.4s, %[rx0].s[3]\n"
-
- "cmp %[oddn], #1\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.4s}, [x0]\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
- "fmla v14.4s, v8.4s, %[rx0].s[2]\n"
- "fmla v14.4s, v9.4s, %[rx0].s[3]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [_n] "+r"(_n)
- : [stride] "r"(stride), [rx0] "w"(rx0), [oddn] "r"(oddn)
- : "cc", "memory", "x0", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13",
- "v14");
- }
-
- if (remain >= 2)
- {
- asm volatile("ld1 {v14.2s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "ld1 {v6.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v8.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v9.2s}, [x0]\n"
-
- "fmla v14.2s, v6.2s, %[rx0].s[0]\n"
- "fmla v14.2s, v7.2s, %[rx0].s[1]\n"
- "fmla v14.2s, v8.2s, %[rx0].s[2]\n"
- "fmla v14.2s, v9.2s, %[rx0].s[3]\n"
-
- "st1 {v14.2s}, [%[outptr0]], #8\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0)
- : [stride] "r"(stride), [rx0] "w"(rx0)
- : "cc", "memory", "x0", "v6", "v7", "v8", "v9", "v14");
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x0 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x0 + 3));
-
- kernel0++;
- outptr0++;
- }
-
- kernel0 += outch * 3;
- _x0 += 4;
- }
-
- for (; i + 1 < inch; i += 2)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("v4") = vld1_f32(_x0);
-
- float *outptr0 = out0;
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v11.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v11.4s, %[rx0].s[1]\n"
-
- "cmp %[oddn], #1\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
- "fmla v14.4s, v7.4s, %[rx0].s[1]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [_n] "+r"(_n)
- : [stride] "r"(stride), [rx0] "w"(rx0), [oddn] "r"(oddn)
- : "cc", "memory", "x0", "v6", "v7", "v10", "v11", "v14");
- }
-
- if (remain >= 2)
- {
- asm volatile("ld1 {v14.2s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "ld1 {v6.2s}, [x0]\n"
- "add x0, x0, %[stride]\n"
- "ld1 {v7.2s}, [x0]\n"
-
- "fmla v14.2s, v6.2s, %[rx0].s[0]\n"
- "fmla v14.2s, v7.2s, %[rx0].s[1]\n"
-
- "st1 {v14.2s}, [%[outptr0]], #8\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0)
- : [stride] "r"(stride), [rx0] "w"(rx0)
- : "cc", "memory", "x0", "v6", "v7", "v14");
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1));
-
- kernel0++;
- outptr0++;
- }
-
- kernel0 += outch;
- _x0 += 2;
- }
-
- for (; i < inch; i++)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("v4") = vld1_dup_f32(_x0);
-
- float *outptr0 = out0;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
- "beq 1f\n"
-
- "0:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v10.4s}, [x0]\n"
-
- "fmla v14.4s, v10.4s, %[rx0].s[0]\n"
-
- "cmp %[oddn], #1\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "ld1 {v6.4s}, [x0]\n"
-
- "ld1 {v14.4s}, [%[outptr0]]\n"
-
- "fmla v14.4s, v6.4s, %[rx0].s[0]\n"
-
- "st1 {v14.4s}, [%[outptr0]], #16\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [_n] "+r"(_n)
- : [rx0] "w"(rx0), [oddn] "r"(oddn)
- : "cc", "memory", "x0", "v6", "v10", "v14");
- }
-
- if (remain >= 2)
- {
- asm volatile("ld1 {v14.2s}, [%[outptr0]]\n"
-
- "mov x0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "ld1 {v6.2s}, [x0]\n"
-
- "fmla v14.2s, v6.2s, %[rx0].s[0]\n"
-
- "st1 {v14.2s}, [%[outptr0]], #8\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0)
- : [rx0] "w"(rx0)
- : "cc", "memory", "x0", "v6", "v14");
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0);
-
- kernel0++;
- outptr0++;
- }
-
- _x0 += 1;
- }
-
- img1 += inch * _stride;
- out0 += outch;
- }
- }
- }
-}
-
-#else // __aarch64__
-static void direct_conv_l(const convMat_t &bottom_blob, convMat_t &top_blob,
- const convMat_t &_kernel, const int _stride, const int padding,
- const int pad_top, const int pad_left)
-{
- const int w = bottom_blob.w;
- const int h = bottom_blob.h;
- const int inch = bottom_blob.c;
- const int outw = top_blob.w;
- const int outh = top_blob.h;
- const int outch = top_blob.c;
- const int kernel_w = _kernel.w;
- const int kernel_h = _kernel.h;
-
- for (int m = 0; m < kernel_w * kernel_h; m++)
- {
- const float *_kernel0 = _kernel.data + m * inch * outch;
- const float *img0 =
- bottom_blob.data + (m / kernel_w - pad_top) * w * inch + (m % kernel_w - pad_left) * inch;
-
-#ifdef _OPENMP
-#pragma omp parallel for
-#endif // _OPENMP
- for (int p = 0; p < outh; p++)
- {
- float *out0 = top_blob.data + p * outw * outch;
- // clear output.
- if (m == 0)
- {
- for (int j = 0; j < outw * outch; j++)
- {
- *(out0 + j) = 0.f;
- }
- }
-
- if (padding)
- {
- if (((p * _stride + m / kernel_w) < pad_top) || (p * _stride + m / kernel_w >= pad_top + h))
- {
- continue;
- }
- }
-
- const float *img1 = img0 + p * w * inch * _stride;
-
- int q = 0;
- for (; q + 1 < outw; /*q += 2*/)
- {
- if (padding)
- {
- if (((q + 1) * _stride + m % kernel_w < pad_left) ||
- (q * _stride + m % kernel_w) >= pad_left + w)
- {
- out0 += outch * 2;
- img1 += inch * _stride * 2;
- q += 2;
- continue;
- }
- else if (q * _stride + m % kernel_w < pad_left)
- {
- out0 += outch;
- img1 += inch * _stride;
- q++;
- continue;
- }
- else if ((q + 1) * _stride + m % kernel_w >= pad_left + w)
- {
- break;
- }
- }
-
- const float *_x0 = img1;
- const float *_x1 = img1 + inch * _stride;
- const float *kernel0 = _kernel0;
-
- int i = 0;
- for (; i + 3 < inch; i += 4)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x4_t rx0 asm("q4") = vld1q_f32(_x0);
- register float32x4_t rx1 asm("q5") = vld1q_f32(_x1);
-
- float *outptr0 = out0;
- float *outptr1 = out0 + outch;
-
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d18-d19}, [r0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d24-d25}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d26-d27}, [r0]\n"
-
- "vmla.f32 q14, q6, %e[rx0][0]\n"
- "vmla.f32 q15, q6, %e[rx1][0]\n"
- "vmla.f32 q14, q7, %e[rx0][1]\n"
- "vmla.f32 q15, q7, %e[rx1][1]\n"
- "vmla.f32 q14, q8, %f[rx0][0]\n"
- "vmla.f32 q15, q8, %f[rx1][0]\n"
- "vmla.f32 q14, q9, %f[rx0][1]\n"
- "vmla.f32 q15, q9, %f[rx1][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d18-d19}, [r0]\n"
-
- "vmla.f32 q14, q10, %e[rx0][0]\n"
- "vmla.f32 q15, q10, %e[rx1][0]\n"
- "vmla.f32 q14, q11, %e[rx0][1]\n"
- "vmla.f32 q15, q11, %e[rx1][1]\n"
- "vmla.f32 q14, q12, %f[rx0][0]\n"
- "vmla.f32 q15, q12, %f[rx1][0]\n"
- "vmla.f32 q14, q13, %f[rx0][1]\n"
- "vmla.f32 q15, q13, %f[rx1][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "vmla.f32 q14, q6, %e[rx0][0]\n"
- "vmla.f32 q15, q6, %e[rx1][0]\n"
- "vmla.f32 q14, q7, %e[rx0][1]\n"
- "vmla.f32 q15, q7, %e[rx1][1]\n"
- "vmla.f32 q14, q8, %f[rx0][0]\n"
- "vmla.f32 q15, q8, %f[rx1][0]\n"
- "vmla.f32 q14, q9, %f[rx0][1]\n"
- "vmla.f32 q15, q9, %f[rx1][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d24-d25}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d26-d27}, [r0]\n"
-
- "vmla.f32 q14, q10, %e[rx0][0]\n"
- "vmla.f32 q15, q10, %e[rx1][0]\n"
- "vmla.f32 q14, q11, %e[rx0][1]\n"
- "vmla.f32 q15, q11, %e[rx1][1]\n"
- "vmla.f32 q14, q12, %f[rx0][0]\n"
- "vmla.f32 q15, q12, %f[rx1][0]\n"
- "vmla.f32 q14, q13, %f[rx0][1]\n"
- "vmla.f32 q15, q13, %f[rx1][1]\n"
-
- "cmp %[oddn], #1\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d18-d19}, [r0]\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "vmla.f32 q14, q6, %e[rx0][0]\n"
- "vmla.f32 q15, q6, %e[rx1][0]\n"
- "vmla.f32 q14, q7, %e[rx0][1]\n"
- "vmla.f32 q15, q7, %e[rx1][1]\n"
- "vmla.f32 q14, q8, %f[rx0][0]\n"
- "vmla.f32 q15, q8, %f[rx1][0]\n"
- "vmla.f32 q14, q9, %f[rx0][1]\n"
- "vmla.f32 q15, q9, %f[rx1][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [_n] "+r"(_n)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1), [oddn] "r"(oddn)
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15");
- }
-
- if (remain >= 2)
- {
- asm volatile("vld1.f32 {d28}, [%[outptr0]]\n"
- "vld1.f32 {d30}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "vld1.f32 {d12}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d16}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d18}, [r0]\n"
-
- "vmla.f32 d28, d12, %e[rx0][0]\n"
- "vmla.f32 d30, d12, %e[rx1][0]\n"
- "vmla.f32 d28, d14, %e[rx0][1]\n"
- "vmla.f32 d30, d14, %e[rx1][1]\n"
- "vmla.f32 d28, d16, %f[rx0][0]\n"
- "vmla.f32 d30, d16, %f[rx1][0]\n"
- "vmla.f32 d28, d18, %f[rx0][1]\n"
- "vmla.f32 d30, d18, %f[rx1][1]\n"
-
- "vst1.f32 {d28}, [%[outptr0]]!\n"
- "vst1.f32 {d30}, [%[outptr1]]!\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [outptr1] "+r"(outptr1)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1)
-#ifndef _OPENMP
-
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q14", "q15"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
- );
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x0 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x0 + 3));
-
- *outptr1 += (*kernel0) * (*_x1) + (*(kernel0 + outch)) * (*(_x1 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x1 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x1 + 3));
-
- kernel0++;
- outptr0++;
- outptr1++;
- }
-
- kernel0 += outch * 3;
- _x0 += 4;
- _x1 += 4;
- }
-
- for (; i + 1 < inch; i += 2)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("d8") = vld1_f32(_x0);
- register float32x2_t rx1 asm("d10") = vld1_f32(_x1);
-
- float *outptr0 = out0;
- float *outptr1 = out0 + outch;
-
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
- "vmla.f32 q15, q6, %P[rx1][0]\n"
- "vmla.f32 q14, q7, %P[rx0][1]\n"
- "vmla.f32 q15, q7, %P[rx1][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
-
- "vmla.f32 q14, q10, %P[rx0][0]\n"
- "vmla.f32 q15, q10, %P[rx1][0]\n"
- "vmla.f32 q14, q11, %P[rx0][1]\n"
- "vmla.f32 q15, q11, %P[rx1][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
- "vmla.f32 q15, q6, %P[rx1][0]\n"
- "vmla.f32 q14, q7, %P[rx0][1]\n"
- "vmla.f32 q15, q7, %P[rx1][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
-
- "vmla.f32 q14, q10, %P[rx0][0]\n"
- "vmla.f32 q15, q10, %P[rx1][0]\n"
- "vmla.f32 q14, q11, %P[rx0][1]\n"
- "vmla.f32 q15, q11, %P[rx1][1]\n"
-
- "cmp %[oddn], #1\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
- "vmla.f32 q15, q6, %P[rx1][0]\n"
- "vmla.f32 q14, q7, %P[rx0][1]\n"
- "vmla.f32 q15, q7, %P[rx1][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [_n] "+r"(_n)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1), [oddn] "r"(oddn)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q10", "q11", "q14", "q15"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
-
- );
- }
-
- if (remain >= 2)
- {
- asm volatile("vld1.f32 {d28}, [%[outptr0]]\n"
- "vld1.f32 {d30}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "vld1.f32 {d12}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14}, [r0]\n"
-
- "vmla.f32 d28, d12, %P[rx0][0]\n"
- "vmla.f32 d30, d12, %P[rx1][0]\n"
- "vmla.f32 d28, d14, %P[rx0][1]\n"
- "vmla.f32 d30, d14, %P[rx1][1]\n"
-
- "vst1.f32 {d28}, [%[outptr0]]!\n"
- "vst1.f32 {d30}, [%[outptr1]]!\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [outptr1] "+r"(outptr1)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q14", "q15"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
-
- );
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1));
- *outptr1 += (*kernel0) * (*_x1) + (*(kernel0 + outch)) * (*(_x1 + 1));
-
- kernel0++;
- outptr0++;
- outptr1++;
- }
-
- kernel0 += outch;
- _x0 += 2;
- _x1 += 2;
- }
-
- for (; i < inch; i++)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("d8") = vld1_dup_f32(_x0);
- register float32x2_t rx1 asm("d10") = vld1_dup_f32(_x1);
-
- float *outptr0 = out0;
- float *outptr1 = out0 + outch;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
- "vmla.f32 q15, q6, %P[rx1][0]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
-
- "vmla.f32 q14, q10, %P[rx0][0]\n"
- "vmla.f32 q15, q10, %P[rx1][0]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
- "vmla.f32 q15, q6, %P[rx1][0]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
-
- "vmla.f32 q14, q10, %P[rx0][0]\n"
- "vmla.f32 q15, q10, %P[rx1][0]\n"
-
- "cmp %[oddn], #1\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
- "vmla.f32 q15, q6, %P[rx1][0]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [_n] "+r"(_n)
- : [rx0] "w"(rx0), [rx1] "w"(rx1), [oddn] "r"(oddn)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q10", "q14", "q15"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
- );
- }
-
- if (remain >= 2)
- {
- asm volatile("vld1.f32 {d28}, [%[outptr0]]\n"
- "vld1.f32 {d30}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "vld1.f32 {d12}, [r0]\n"
-
- "vmla.f32 d28, d12, %P[rx0][0]\n"
- "vmla.f32 d30, d12, %P[rx1][0]\n"
-
- "vst1.f32 {d28}, [%[outptr0]]!\n"
- "vst1.f32 {d30}, [%[outptr1]]!\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [outptr1] "+r"(outptr1)
- : [rx0] "w"(rx0), [rx1] "w"(rx1)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q14", "q15"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
-
- );
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0);
- *outptr1 += (*kernel0) * (*_x1);
-
- kernel0++;
- outptr0++;
- outptr1++;
- }
-
- _x0 += 1;
- _x1 += 1;
- }
-
- img1 += inch * 2 * _stride;
- out0 += outch * 2;
- q += 2;
- }
-
- for (; q < outw; q++)
- {
- if (padding)
- {
- if ((q * _stride + m % kernel_w < pad_left) ||
- (q * _stride + m % kernel_w) >= pad_left + bottom_blob.w)
- {
- img1 += inch * _stride;
- out0 += outch;
- continue;
- }
- }
-
- const float *_x0 = img1;
- const float *kernel0 = _kernel0;
-
- int i = 0;
- for (; i + 3 < inch; i += 4)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x4_t rx0 asm("q4") = vld1q_f32(_x0);
-
- float *outptr0 = out0;
-
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d18-d19}, [r0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d24-d25}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d26-d27}, [r0]\n"
-
- "vmla.f32 q14, q6, %e[rx0][0]\n"
- "vmla.f32 q14, q7, %e[rx0][1]\n"
- "vmla.f32 q14, q8, %f[rx0][0]\n"
- "vmla.f32 q14, q9, %f[rx0][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d18-d19}, [r0]\n"
-
- "vmla.f32 q14, q10, %e[rx0][0]\n"
- "vmla.f32 q14, q11, %e[rx0][1]\n"
- "vmla.f32 q14, q12, %f[rx0][0]\n"
- "vmla.f32 q14, q13, %f[rx0][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "vmla.f32 q14, q6, %e[rx0][0]\n"
- "vmla.f32 q14, q7, %e[rx0][1]\n"
- "vmla.f32 q14, q8, %f[rx0][0]\n"
- "vmla.f32 q14, q9, %f[rx0][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d24-d25}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d26-d27}, [r0]\n"
-
- "vmla.f32 q14, q10, %e[rx0][0]\n"
- "vmla.f32 q14, q11, %e[rx0][1]\n"
- "vmla.f32 q14, q12, %f[rx0][0]\n"
- "vmla.f32 q14, q13, %f[rx0][1]\n"
-
- "cmp %[oddn], #1\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d18-d19}, [r0]\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "vmla.f32 q14, q6, %e[rx0][0]\n"
- "vmla.f32 q14, q7, %e[rx0][1]\n"
- "vmla.f32 q14, q8, %f[rx0][0]\n"
- "vmla.f32 q14, q9, %f[rx0][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
-
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [_n] "+r"(_n)
- : [stride] "r"(stride), [rx0] "w"(rx0), [oddn] "r"(oddn)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
-
- );
- }
-
- if (remain >= 2)
- {
- asm volatile("vld1.f32 {d28}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "vld1.f32 {d12}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d16}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d18}, [r0]\n"
-
- "vmla.f32 d28, d12, %e[rx0][0]\n"
- "vmla.f32 d28, d14, %e[rx0][1]\n"
- "vmla.f32 d28, d16, %f[rx0][0]\n"
- "vmla.f32 d28, d18, %f[rx0][1]\n"
-
- "vst1.f32 {d28}, [%[outptr0]]!\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0)
- : [stride] "r"(stride), [rx0] "w"(rx0)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q14", "q15"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
-
- );
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x0 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x0 + 3));
-
- kernel0++;
- outptr0++;
- }
-
- kernel0 += outch * 3;
- _x0 += 4;
- }
-
- for (; i + 1 < inch; i += 2)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("d8") = vld1_f32(_x0);
-
- float *outptr0 = out0;
-
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
- "vmla.f32 q14, q7, %P[rx0][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
-
- "vmla.f32 q14, q10, %P[rx0][0]\n"
- "vmla.f32 q14, q11, %P[rx0][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
- "vmla.f32 q14, q7, %P[rx0][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
-
- "vmla.f32 q14, q10, %P[rx0][0]\n"
- "vmla.f32 q14, q11, %P[rx0][1]\n"
-
- "cmp %[oddn], #1\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "bne 3f\n"
-
- "2:\n"
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
- "vmla.f32 q14, q7, %P[rx0][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [_n] "+r"(_n)
- : [stride] "r"(stride), [rx0] "w"(rx0), [oddn] "r"(oddn)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q10", "q11", "q14"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
-
- );
- }
-
- if (remain >= 2)
- {
- asm volatile("vld1.f32 {d28}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "vld1.f32 {d12}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14}, [r0]\n"
-
- "vmla.f32 d28, d12, %P[rx0][0]\n"
- "vmla.f32 d28, d14, %P[rx0][1]\n"
-
- "vst1.f32 {d28}, [%[outptr0]]!\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0)
- : [stride] "r"(stride), [rx0] "w"(rx0)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q14", "q15"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
-
- );
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1));
-
- kernel0++;
- outptr0++;
- }
-
- kernel0 += outch;
- _x0 += 2;
- }
-
- for (; i < inch; i++)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("d8") = vld1_dup_f32(_x0);
-
- float *outptr0 = out0;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
-
- "vmla.f32 q14, q10, %P[rx0][0]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
-
- "vmla.f32 q14, q10, %P[rx0][0]\n"
-
- "cmp %[oddn], #1\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [_n] "+r"(_n)
- : [rx0] "w"(rx0), [oddn] "r"(oddn)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q10", "q14"
-
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
- );
- }
-
- if (remain >= 2)
- {
- asm volatile("vld1.f32 {d28}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "vld1.f32 {d12}, [r0]\n"
-
- "vmla.f32 d28, d12, %P[rx0][0]\n"
-
- "vst1.f32 {d28}, [%[outptr0]]!\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0)
- : [rx0] "w"(rx0)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q14", "q15"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
-
- );
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0);
-
- kernel0++;
- outptr0++;
- }
-
- _x0 += 1;
- }
-
- img1 += inch * _stride;
- out0 += outch;
- }
- }
- }
-}
-
-static void direct_conv_s(const convMat_t &bottom_blob, convMat_t &top_blob,
- const convMat_t &_kernel, const int _stride, const int padding,
- const int pad_top, const int pad_left)
-{
- const int w = bottom_blob.w;
- const int h = bottom_blob.h;
- const int inch = bottom_blob.c;
- const int outw = top_blob.w;
- const int outh = top_blob.h;
- const int outch = top_blob.c;
- const int kernel_w = _kernel.w;
- const int kernel_h = _kernel.h;
-
-#ifdef _OPENMP
-#pragma omp parallel for
-#endif // _OPENMP
- for (int p = 0; p < outh; p++)
- {
- const float *img0 = bottom_blob.data + (p * _stride - pad_top) * w * inch;
- float *out = top_blob.data + p * outw * outch;
-
- // clear output.
- for (int j = 0; j < outw * outch; j++)
- {
- *(out + j) = 0.f;
- }
-
- for (int m = 0; m < kernel_w * kernel_h; m++)
- {
- if (padding)
- {
- if (((p * _stride + m / kernel_w) < pad_top) || (p * _stride + m / kernel_w >= pad_top + h))
- {
- continue;
- }
- }
-
- float *out0 = out;
- const float *_kernel0 = _kernel.data + m * inch * outch;
- const float *img1 = img0 + (m / kernel_w) * w * inch + (m % kernel_w - pad_left) * inch;
-
- int q = 0;
- for (; q + 1 < outw; /*q += 2*/)
- {
- if (padding)
- {
- if (((q + 1) * _stride + m % kernel_w < pad_left) ||
- (q * _stride + m % kernel_w >= pad_left + w))
- {
- out0 += outch * 2;
- img1 += inch * _stride * 2;
- q += 2;
- continue;
- }
- else if (q * _stride + m % kernel_w < pad_left)
- {
- out0 += outch;
- img1 += inch * _stride;
- q++;
- continue;
- }
- else if ((q + 1) * _stride + m % kernel_w >= pad_left + w)
- {
- break;
- }
- }
-
- const float *_x0 = img1;
- const float *_x1 = img1 + inch * _stride;
-
- const float *kernel0 = _kernel0;
-
- int i = 0;
- for (; i + 3 < inch; i += 4)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x4_t rx0 asm("q4") = vld1q_f32(_x0);
- register float32x4_t rx1 asm("q5") = vld1q_f32(_x1);
-
- float *outptr0 = out0;
- float *outptr1 = out0 + outch;
-
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d18-d19}, [r0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d24-d25}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d26-d27}, [r0]\n"
-
- "vmla.f32 q14, q6, %e[rx0][0]\n"
- "vmla.f32 q15, q6, %e[rx1][0]\n"
- "vmla.f32 q14, q7, %e[rx0][1]\n"
- "vmla.f32 q15, q7, %e[rx1][1]\n"
- "vmla.f32 q14, q8, %f[rx0][0]\n"
- "vmla.f32 q15, q8, %f[rx1][0]\n"
- "vmla.f32 q14, q9, %f[rx0][1]\n"
- "vmla.f32 q15, q9, %f[rx1][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d18-d19}, [r0]\n"
-
- "vmla.f32 q14, q10, %e[rx0][0]\n"
- "vmla.f32 q15, q10, %e[rx1][0]\n"
- "vmla.f32 q14, q11, %e[rx0][1]\n"
- "vmla.f32 q15, q11, %e[rx1][1]\n"
- "vmla.f32 q14, q12, %f[rx0][0]\n"
- "vmla.f32 q15, q12, %f[rx1][0]\n"
- "vmla.f32 q14, q13, %f[rx0][1]\n"
- "vmla.f32 q15, q13, %f[rx1][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "vmla.f32 q14, q6, %e[rx0][0]\n"
- "vmla.f32 q15, q6, %e[rx1][0]\n"
- "vmla.f32 q14, q7, %e[rx0][1]\n"
- "vmla.f32 q15, q7, %e[rx1][1]\n"
- "vmla.f32 q14, q8, %f[rx0][0]\n"
- "vmla.f32 q15, q8, %f[rx1][0]\n"
- "vmla.f32 q14, q9, %f[rx0][1]\n"
- "vmla.f32 q15, q9, %f[rx1][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d24-d25}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d26-d27}, [r0]\n"
-
- "vmla.f32 q14, q10, %e[rx0][0]\n"
- "vmla.f32 q15, q10, %e[rx1][0]\n"
- "vmla.f32 q14, q11, %e[rx0][1]\n"
- "vmla.f32 q15, q11, %e[rx1][1]\n"
- "vmla.f32 q14, q12, %f[rx0][0]\n"
- "vmla.f32 q15, q12, %f[rx1][0]\n"
- "vmla.f32 q14, q13, %f[rx0][1]\n"
- "vmla.f32 q15, q13, %f[rx1][1]\n"
-
- "cmp %[oddn], #1\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d18-d19}, [r0]\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "vmla.f32 q14, q6, %e[rx0][0]\n"
- "vmla.f32 q15, q6, %e[rx1][0]\n"
- "vmla.f32 q14, q7, %e[rx0][1]\n"
- "vmla.f32 q15, q7, %e[rx1][1]\n"
- "vmla.f32 q14, q8, %f[rx0][0]\n"
- "vmla.f32 q15, q8, %f[rx1][0]\n"
- "vmla.f32 q14, q9, %f[rx0][1]\n"
- "vmla.f32 q15, q9, %f[rx1][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [_n] "+r"(_n)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1), [oddn] "r"(oddn)
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15");
- }
-
- if (remain >= 2)
- {
- asm volatile("vld1.f32 {d28}, [%[outptr0]]\n"
- "vld1.f32 {d30}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "vld1.f32 {d12}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d16}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d18}, [r0]\n"
-
- "vmla.f32 d28, d12, %e[rx0][0]\n"
- "vmla.f32 d30, d12, %e[rx1][0]\n"
- "vmla.f32 d28, d14, %e[rx0][1]\n"
- "vmla.f32 d30, d14, %e[rx1][1]\n"
- "vmla.f32 d28, d16, %f[rx0][0]\n"
- "vmla.f32 d30, d16, %f[rx1][0]\n"
- "vmla.f32 d28, d18, %f[rx0][1]\n"
- "vmla.f32 d30, d18, %f[rx1][1]\n"
-
- "vst1.f32 {d28}, [%[outptr0]]!\n"
- "vst1.f32 {d30}, [%[outptr1]]!\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [outptr1] "+r"(outptr1)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q14", "q15"
-#else
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif
- );
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x0 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x0 + 3));
-
- *outptr1 += (*kernel0) * (*_x1) + (*(kernel0 + outch)) * (*(_x1 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x1 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x1 + 3));
-
- kernel0++;
- outptr0++;
- outptr1++;
- }
-
- kernel0 += outch * 3;
- _x0 += 4;
- _x1 += 4;
- }
-
- for (; i + 1 < inch; i += 2)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("d8") = vld1_f32(_x0);
- register float32x2_t rx1 asm("d10") = vld1_f32(_x1);
-
- float *outptr0 = out0;
- float *outptr1 = out0 + outch;
-
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
- "vmla.f32 q15, q6, %P[rx1][0]\n"
- "vmla.f32 q14, q7, %P[rx0][1]\n"
- "vmla.f32 q15, q7, %P[rx1][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
-
- "vmla.f32 q14, q10, %P[rx0][0]\n"
- "vmla.f32 q15, q10, %P[rx1][0]\n"
- "vmla.f32 q14, q11, %P[rx0][1]\n"
- "vmla.f32 q15, q11, %P[rx1][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
- "vmla.f32 q15, q6, %P[rx1][0]\n"
- "vmla.f32 q14, q7, %P[rx0][1]\n"
- "vmla.f32 q15, q7, %P[rx1][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
-
- "vmla.f32 q14, q10, %P[rx0][0]\n"
- "vmla.f32 q15, q10, %P[rx1][0]\n"
- "vmla.f32 q14, q11, %P[rx0][1]\n"
- "vmla.f32 q15, q11, %P[rx1][1]\n"
-
- "cmp %[oddn], #1\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
- "vmla.f32 q15, q6, %P[rx1][0]\n"
- "vmla.f32 q14, q7, %P[rx0][1]\n"
- "vmla.f32 q15, q7, %P[rx1][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [_n] "+r"(_n)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1), [oddn] "r"(oddn)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q10", "q11", "q14", "q15"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
- );
- }
-
- if (remain >= 2)
- {
- asm volatile("vld1.f32 {d28}, [%[outptr0]]\n"
- "vld1.f32 {d30}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "vld1.f32 {d12}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14}, [r0]\n"
-
- "vmla.f32 d28, d12, %P[rx0][0]\n"
- "vmla.f32 d30, d12, %P[rx1][0]\n"
- "vmla.f32 d28, d14, %P[rx0][1]\n"
- "vmla.f32 d30, d14, %P[rx1][1]\n"
-
- "vst1.f32 {d28}, [%[outptr0]]!\n"
- "vst1.f32 {d30}, [%[outptr1]]!\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [outptr1] "+r"(outptr1)
- : [stride] "r"(stride), [rx0] "w"(rx0), [rx1] "w"(rx1)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q14", "q15"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
- );
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1));
- *outptr1 += (*kernel0) * (*_x1) + (*(kernel0 + outch)) * (*(_x1 + 1));
-
- kernel0++;
- outptr0++;
- outptr1++;
- }
-
- kernel0 += outch;
- _x0 += 2;
- _x1 += 2;
- }
-
- for (; i < inch; i++)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("d8") = vld1_dup_f32(_x0);
- register float32x2_t rx1 asm("d10") = vld1_dup_f32(_x1);
-
- float *outptr0 = out0;
- float *outptr1 = out0 + outch;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
- "vmla.f32 q15, q6, %P[rx1][0]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
-
- "vmla.f32 q14, q10, %P[rx0][0]\n"
- "vmla.f32 q15, q10, %P[rx1][0]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
- "vmla.f32 q15, q6, %P[rx1][0]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
-
- "vmla.f32 q14, q10, %P[rx0][0]\n"
- "vmla.f32 q15, q10, %P[rx1][0]\n"
-
- "cmp %[oddn], #1\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
- "vld1.f32 {d30-d31}, [%[outptr1]]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
- "vmla.f32 q15, q6, %P[rx1][0]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vst1.f32 {d30-d31}, [%[outptr1]]!\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0),
- [outptr1] "+r"(outptr1), [_n] "+r"(_n)
- : [rx0] "w"(rx0), [rx1] "w"(rx1), [oddn] "r"(oddn)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q10", "q14", "q15"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
- );
- }
-
- if (remain >= 2)
- {
- asm volatile("vld1.f32 {d28}, [%[outptr0]]\n"
- "vld1.f32 {d30}, [%[outptr1]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "vld1.f32 {d12}, [r0]\n"
-
- "vmla.f32 d28, d12, %P[rx0][0]\n"
- "vmla.f32 d30, d12, %P[rx1][0]\n"
-
- "vst1.f32 {d28}, [%[outptr0]]!\n"
- "vst1.f32 {d30}, [%[outptr1]]!\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [outptr1] "+r"(outptr1)
- : [rx0] "w"(rx0), [rx1] "w"(rx1)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q14", "q15"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
- );
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0);
- *outptr1 += (*kernel0) * (*_x1);
-
- kernel0++;
- outptr0++;
- outptr1++;
- }
-
- _x0 += 1;
- _x1 += 1;
- }
-
- img1 += inch * 2 * _stride;
- out0 += outch * 2;
- q += 2;
- }
-
- for (; q < outw; q++)
- {
- if (padding)
- {
- if ((q * _stride + m % kernel_w < pad_left) ||
- (q * _stride + m % kernel_w >= pad_left + w))
- {
- img1 += inch * _stride;
- out0 += outch;
- continue;
- }
- }
-
- const float *_x0 = img1;
- const float *kernel0 = _kernel0;
-
- int i = 0;
- for (; i + 3 < inch; i += 4)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x4_t rx0 asm("q4") = vld1q_f32(_x0);
-
- float *outptr0 = out0;
-
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d18-d19}, [r0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d24-d25}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d26-d27}, [r0]\n"
-
- "vmla.f32 q14, q6, %e[rx0][0]\n"
- "vmla.f32 q14, q7, %e[rx0][1]\n"
- "vmla.f32 q14, q8, %f[rx0][0]\n"
- "vmla.f32 q14, q9, %f[rx0][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d18-d19}, [r0]\n"
-
- "vmla.f32 q14, q10, %e[rx0][0]\n"
- "vmla.f32 q14, q11, %e[rx0][1]\n"
- "vmla.f32 q14, q12, %f[rx0][0]\n"
- "vmla.f32 q14, q13, %f[rx0][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "vmla.f32 q14, q6, %e[rx0][0]\n"
- "vmla.f32 q14, q7, %e[rx0][1]\n"
- "vmla.f32 q14, q8, %f[rx0][0]\n"
- "vmla.f32 q14, q9, %f[rx0][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d24-d25}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d26-d27}, [r0]\n"
-
- "vmla.f32 q14, q10, %e[rx0][0]\n"
- "vmla.f32 q14, q11, %e[rx0][1]\n"
- "vmla.f32 q14, q12, %f[rx0][0]\n"
- "vmla.f32 q14, q13, %f[rx0][1]\n"
-
- "cmp %[oddn], #1\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d18-d19}, [r0]\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "vmla.f32 q14, q6, %e[rx0][0]\n"
- "vmla.f32 q14, q7, %e[rx0][1]\n"
- "vmla.f32 q14, q8, %f[rx0][0]\n"
- "vmla.f32 q14, q9, %f[rx0][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
-
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [_n] "+r"(_n)
- : [stride] "r"(stride), [rx0] "w"(rx0), [oddn] "r"(oddn)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
- );
- }
-
- if (remain >= 2)
- {
- asm volatile("vld1.f32 {d28}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "vld1.f32 {d12}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d16}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d18}, [r0]\n"
-
- "vmla.f32 d28, d12, %e[rx0][0]\n"
- "vmla.f32 d28, d14, %e[rx0][1]\n"
- "vmla.f32 d28, d16, %f[rx0][0]\n"
- "vmla.f32 d28, d18, %f[rx0][1]\n"
-
- "vst1.f32 {d28}, [%[outptr0]]!\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0)
- : [stride] "r"(stride), [rx0] "w"(rx0)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q14", "q15"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
- );
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1)) +
- (*(kernel0 + outch * 2)) * (*(_x0 + 2)) +
- (*(kernel0 + outch * 3)) * (*(_x0 + 3));
-
- kernel0++;
- outptr0++;
- }
-
- kernel0 += outch * 3;
- _x0 += 4;
- }
-
- for (; i + 1 < inch; i += 2)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("d8") = vld1_f32(_x0);
-
- float *outptr0 = out0;
-
- int stride = outch << 2;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
- "vmla.f32 q14, q7, %P[rx0][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
-
- "vmla.f32 q14, q10, %P[rx0][0]\n"
- "vmla.f32 q14, q11, %P[rx0][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
- "vmla.f32 q14, q7, %P[rx0][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
-
- "vmla.f32 q14, q10, %P[rx0][0]\n"
- "vmla.f32 q14, q11, %P[rx0][1]\n"
-
- "cmp %[oddn], #1\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "bne 3f\n"
-
- "2:\n"
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
- "vmla.f32 q14, q7, %P[rx0][1]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [_n] "+r"(_n)
- : [stride] "r"(stride), [rx0] "w"(rx0), [oddn] "r"(oddn)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q10", "q11", "q14"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
- );
- }
-
- if (remain >= 2)
- {
- asm volatile("vld1.f32 {d28}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "vld1.f32 {d12}, [r0]\n"
- "add r0, r0, %[stride]\n"
- "vld1.f32 {d14}, [r0]\n"
-
- "vmla.f32 d28, d12, %P[rx0][0]\n"
- "vmla.f32 d28, d14, %P[rx0][1]\n"
-
- "vst1.f32 {d28}, [%[outptr0]]!\n"
-
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0)
- : [stride] "r"(stride), [rx0] "w"(rx0)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q14", "q15"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
- );
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0) + (*(kernel0 + outch)) * (*(_x0 + 1));
-
- kernel0++;
- outptr0++;
- }
-
- kernel0 += outch;
- _x0 += 2;
- }
-
- for (; i < inch; i++)
- {
- int nn = outch >> 2;
- int remain = outch & 0x03;
-
- register float32x2_t rx0 asm("d8") = vld1_dup_f32(_x0);
-
- float *outptr0 = out0;
-
- if (nn > 0)
- {
- int _n = nn >> 1;
- int oddn = nn & 1;
-
- asm volatile("cmp %[_n], #0\n"
- "beq 2f\n"
- "subs %[_n], %[_n], #1\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
-
- "beq 1f\n"
-
- "0:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
-
- "vmla.f32 q14, q10, %P[rx0][0]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "subs %[_n], %[_n], #1\n"
- "bne 0b\n"
-
- "1:\n"
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d20-d21}, [r0]\n"
-
- "vmla.f32 q14, q10, %P[rx0][0]\n"
-
- "cmp %[oddn], #1\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
-
- "bne 3f\n"
-
- "2:\n"
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #16\n"
- "vld1.f32 {d12-d13}, [r0]\n"
-
- "vld1.f32 {d28-d29}, [%[outptr0]]\n"
-
- "vmla.f32 q14, q6, %P[rx0][0]\n"
-
- "vst1.f32 {d28-d29}, [%[outptr0]]!\n"
- "3:\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0), [_n] "+r"(_n)
- : [rx0] "w"(rx0), [oddn] "r"(oddn)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q10", "q14"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
- );
- }
-
- if (remain >= 2)
- {
- asm volatile("vld1.f32 {d28}, [%[outptr0]]\n"
-
- "mov r0, %[kernel0]\n"
- "add %[kernel0], %[kernel0], #8\n"
- "vld1.f32 {d12}, [r0]\n"
-
- "vmla.f32 d28, d12, %P[rx0][0]\n"
-
- "vst1.f32 {d28}, [%[outptr0]]!\n"
- : [kernel0] "+r"(kernel0), [outptr0] "+r"(outptr0)
- : [rx0] "w"(rx0)
-#ifndef _OPENMP
- : "cc", "memory", "r0", "q6", "q14", "q15"
-#else // _OPENMP
- : "cc", "memory", "r0", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13",
- "q14", "q15"
-#endif // _OPENMP
- );
- remain -= 2;
- }
-
- if (remain == 1)
- {
- *outptr0 += (*kernel0) * (*_x0);
-
- kernel0++;
- outptr0++;
- }
-
- _x0 += 1;
- }
-
- img1 += inch * _stride;
- out0 += outch;
- }
- }
- }
-}
-#endif // __aarch64__
-
-void direct_conv_colmajor(const convMat_t &bottom_blob, convMat_t &top_blob,
- const convMat_t &kernel, const convParams_t &params, int num_threads)
-{
- omp_set_num_threads(num_threads);
-
- if (bottom_blob.c * top_blob.c < 256 * 256)
- {
- direct_conv_s(bottom_blob, top_blob, kernel, params.stride_w, params.padding, params.pad_h,
- params.pad_w);
- return;
- }
-
- direct_conv_l(bottom_blob, top_blob, kernel, params.stride_w, params.padding, params.pad_h,
- params.pad_w);
-}
-
-} // namespace srcn
-} // namespace nnfw
diff --git a/compute/ncnn/src/srcn/direct_conv_colmajor.h b/compute/ncnn/src/srcn/direct_conv_colmajor.h
deleted file mode 100644
index 5e15192c9..000000000
--- a/compute/ncnn/src/srcn/direct_conv_colmajor.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_SRCN_DIRECT_CONV_COLMAJOR_H__
-#define __NNFW_SRCN_DIRECT_CONV_COLMAJOR_H__
-
-#include "ncnn/srcn/conv_type.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-void direct_conv_colmajor(const convMat_t &, convMat_t &, const convMat_t &, const convParams_t &,
- int);
-
-} // namespace srcn
-} // namespace nnfw
-
-#endif // __NNFW_SRCN_DIRECT_CONV_COLMAJOR_H__
diff --git a/compute/ncnn/src/srcn/sgemm_kernel.cc b/compute/ncnn/src/srcn/sgemm_kernel.cc
deleted file mode 100644
index 90c3641db..000000000
--- a/compute/ncnn/src/srcn/sgemm_kernel.cc
+++ /dev/null
@@ -1,2508 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <arm_neon.h>
-
-namespace nnfw
-{
-namespace srcn
-{
-
-#if __aarch64__
-static void sgemm_rowmajor_micro_kernel_8x12(const float *lhs_ptr, const float *rhs_ptr,
- float *res_ptr, const int k, const int k0,
- const int stride)
-{
- int oddk = (k & 1);
- int nk = ((k + 1) / 2) - 1;
-
- const int nstride = stride << 2;
-
- __asm __volatile("ld1 {v0.4s}, [%[lhs_ptr]], #16\n"
- "ld1 {v2.4s, v3.4s, v4.4s}, [%[rhs_ptr]], #48\n"
-
- "cmp %[k0], #0\n"
- "beq 0f\n"
-
- "mov x0, %[res_ptr]\n"
- "ld1 {v8.4s, v9.4s, v10.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v11.4s, v12.4s, v13.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v14.4s, v15.4s, v16.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v17.4s, v18.4s, v19.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v20.4s, v21.4s, v22.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v23.4s, v24.4s, v25.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v26.4s, v27.4s, v28.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v29.4s, v30.4s, v31.4s}, [x0]\n"
- "cbz %w[nk], 4f\n"
- "b 1f\n"
-
- "0:\n"
- "movi v8.4s, #0x0\n"
- "movi v9.4s, #0x0\n"
- "movi v10.4s, #0x0\n"
- "movi v11.4s, #0x0\n"
- "movi v12.4s, #0x0\n"
- "movi v13.4s, #0x0\n"
- "movi v14.4s, #0x0\n"
- "movi v15.4s, #0x0\n"
- "movi v16.4s, #0x0\n"
- "movi v17.4s, #0x0\n"
- "movi v18.4s, #0x0\n"
- "movi v19.4s, #0x0\n"
- "movi v20.4s, #0x0\n"
- "movi v21.4s, #0x0\n"
- "movi v22.4s, #0x0\n"
- "movi v23.4s, #0x0\n"
- "movi v24.4s, #0x0\n"
- "movi v25.4s, #0x0\n"
- "movi v26.4s, #0x0\n"
- "movi v27.4s, #0x0\n"
- "movi v28.4s, #0x0\n"
- "movi v29.4s, #0x0\n"
- "movi v30.4s, #0x0\n"
- "movi v31.4s, #0x0\n"
- "cbz %w[nk], 4f\n"
-
- "1:\n"
- "fmla v8.4s, v2.4s, v0.s[0]\n"
- "fmla v11.4s, v2.4s, v0.s[1]\n"
- "ld1 {v1.4s}, [%[lhs_ptr]], #16\n"
- "fmla v14.4s, v2.4s, v0.s[2]\n"
- "fmla v17.4s, v2.4s, v0.s[3]\n"
- "fmla v9.4s, v3.4s, v0.s[0]\n"
- "fmla v12.4s, v3.4s, v0.s[1]\n"
- "fmla v15.4s, v3.4s, v0.s[2]\n"
- "fmla v18.4s, v3.4s, v0.s[3]\n"
- "fmla v10.4s, v4.4s, v0.s[0]\n"
- "fmla v13.4s, v4.4s, v0.s[1]\n"
- "fmla v16.4s, v4.4s, v0.s[2]\n"
- "fmla v19.4s, v4.4s, v0.s[3]\n"
-
- "ld1 {v5.4s, v6.4s, v7.4s}, [%[rhs_ptr]], #48\n"
-
- "fmla v20.4s, v2.4s, v1.s[0]\n"
- "fmla v23.4s, v2.4s, v1.s[1]\n"
- "ld1 {v0.4s}, [%[lhs_ptr]], #16\n"
- "fmla v26.4s, v2.4s, v1.s[2]\n"
- "fmla v29.4s, v2.4s, v1.s[3]\n"
- "fmla v21.4s, v3.4s, v1.s[0]\n"
- "fmla v24.4s, v3.4s, v1.s[1]\n"
- "fmla v27.4s, v3.4s, v1.s[2]\n"
- "fmla v30.4s, v3.4s, v1.s[3]\n"
- "fmla v22.4s, v4.4s, v1.s[0]\n"
- "fmla v25.4s, v4.4s, v1.s[1]\n"
- "fmla v28.4s, v4.4s, v1.s[2]\n"
- "fmla v31.4s, v4.4s, v1.s[3]\n"
-
- "fmla v8.4s, v5.4s, v0.s[0]\n"
- "fmla v11.4s, v5.4s, v0.s[1]\n"
- "ld1 {v1.4s}, [%[lhs_ptr]], #16\n"
- "fmla v14.4s, v5.4s, v0.s[2]\n"
- "fmla v17.4s, v5.4s, v0.s[3]\n"
- "fmla v9.4s, v6.4s, v0.s[0]\n"
- "fmla v12.4s, v6.4s, v0.s[1]\n"
- "fmla v15.4s, v6.4s, v0.s[2]\n"
- "fmla v18.4s, v6.4s, v0.s[3]\n"
- "fmla v10.4s, v7.4s, v0.s[0]\n"
- "fmla v13.4s, v7.4s, v0.s[1]\n"
- "fmla v16.4s, v7.4s, v0.s[2]\n"
- "fmla v19.4s, v7.4s, v0.s[3]\n"
-
- "ld1 {v2.4s, v3.4s, v4.4s}, [%[rhs_ptr]], #48\n"
-
- "fmla v20.4s, v5.4s, v1.s[0]\n"
- "fmla v23.4s, v5.4s, v1.s[1]\n"
- "ld1 {v0.4s}, [%[lhs_ptr]], #16\n"
- "fmla v26.4s, v5.4s, v1.s[2]\n"
- "fmla v29.4s, v5.4s, v1.s[3]\n"
- "fmla v21.4s, v6.4s, v1.s[0]\n"
- "fmla v24.4s, v6.4s, v1.s[1]\n"
- "fmla v27.4s, v6.4s, v1.s[2]\n"
- "fmla v30.4s, v6.4s, v1.s[3]\n"
- "fmla v22.4s, v7.4s, v1.s[0]\n"
- "fmla v25.4s, v7.4s, v1.s[1]\n"
- "subs %w[nk], %w[nk], #1\n"
- "fmla v28.4s, v7.4s, v1.s[2]\n"
- "fmla v31.4s, v7.4s, v1.s[3]\n"
- "bne 1b\n"
-
- "4:\n"
- "mov x0, %[res_ptr]\n"
- "cbnz %[oddk], 2f\n"
-
- "fmla v8.4s, v2.4s, v0.s[0]\n"
- "fmla v9.4s, v3.4s, v0.s[0]\n"
- "ld1 {v1.4s}, [%[lhs_ptr]], #16\n"
- "fmla v10.4s, v4.4s, v0.s[0]\n"
- "fmla v11.4s, v2.4s, v0.s[1]\n"
- "fmla v12.4s, v3.4s, v0.s[1]\n"
- "fmla v13.4s, v4.4s, v0.s[1]\n"
- "ld1 {v5.4s, v6.4s, v7.4s}, [%[rhs_ptr]], #48\n"
- "fmla v14.4s, v2.4s, v0.s[2]\n"
- "fmla v15.4s, v3.4s, v0.s[2]\n"
- "fmla v16.4s, v4.4s, v0.s[2]\n"
- "fmla v17.4s, v2.4s, v0.s[3]\n"
- "fmla v18.4s, v3.4s, v0.s[3]\n"
- "fmla v19.4s, v4.4s, v0.s[3]\n"
-
- "fmla v20.4s, v2.4s, v1.s[0]\n"
- "fmla v21.4s, v3.4s, v1.s[0]\n"
- "ld1 {v0.4s}, [%[lhs_ptr]], #16\n"
- "fmla v22.4s, v4.4s, v1.s[0]\n"
- "fmla v23.4s, v2.4s, v1.s[1]\n"
- "fmla v24.4s, v3.4s, v1.s[1]\n"
- "fmla v25.4s, v4.4s, v1.s[1]\n"
- "fmla v26.4s, v2.4s, v1.s[2]\n"
- "fmla v27.4s, v3.4s, v1.s[2]\n"
- "fmla v28.4s, v4.4s, v1.s[2]\n"
- "fmla v29.4s, v2.4s, v1.s[3]\n"
- "fmla v30.4s, v3.4s, v1.s[3]\n"
- "fmla v31.4s, v4.4s, v1.s[3]\n"
-
- "ld1 {v1.4s}, [%[lhs_ptr]], #16\n"
- "ld1 {v2.4s, v3.4s, v4.4s}, [%[rhs_ptr]], #48\n"
-
- "fmla v8.4s, v5.4s, v0.s[0]\n"
- "fmla v9.4s, v6.4s, v0.s[0]\n"
- "fmla v10.4s, v7.4s, v0.s[0]\n"
- "st1 {v8.4s, v9.4s, v10.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v11.4s, v5.4s, v0.s[1]\n"
- "fmla v12.4s, v6.4s, v0.s[1]\n"
- "fmla v13.4s, v7.4s, v0.s[1]\n"
- "st1 {v11.4s, v12.4s, v13.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v14.4s, v5.4s, v0.s[2]\n"
- "fmla v15.4s, v6.4s, v0.s[2]\n"
- "fmla v16.4s, v7.4s, v0.s[2]\n"
- "st1 {v14.4s, v15.4s, v16.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v17.4s, v5.4s, v0.s[3]\n"
- "fmla v18.4s, v6.4s, v0.s[3]\n"
- "fmla v19.4s, v7.4s, v0.s[3]\n"
- "st1 {v17.4s, v18.4s, v19.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
-
- "fmla v20.4s, v5.4s, v1.s[0]\n"
- "fmla v21.4s, v6.4s, v1.s[0]\n"
- "fmla v22.4s, v7.4s, v1.s[0]\n"
- "st1 {v20.4s, v21.4s, v22.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v23.4s, v5.4s, v1.s[1]\n"
- "fmla v24.4s, v6.4s, v1.s[1]\n"
- "fmla v25.4s, v7.4s, v1.s[1]\n"
- "st1 {v23.4s, v24.4s, v25.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v26.4s, v5.4s, v1.s[2]\n"
- "fmla v27.4s, v6.4s, v1.s[2]\n"
- "fmla v28.4s, v7.4s, v1.s[2]\n"
- "st1 {v26.4s, v27.4s, v28.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v29.4s, v5.4s, v1.s[3]\n"
- "fmla v30.4s, v6.4s, v1.s[3]\n"
- "fmla v31.4s, v7.4s, v1.s[3]\n"
- "b 3f\n"
-
- "2:\n"
- "ld1 {v1.4s}, [%[lhs_ptr]], #16\n"
- "ld1 {v5.4s, v6.4s, v7.4s}, [%[rhs_ptr]], #48\n"
-
- "fmla v8.4s, v2.4s, v0.s[0]\n"
- "fmla v9.4s, v3.4s, v0.s[0]\n"
- "fmla v10.4s, v4.4s, v0.s[0]\n"
- "st1 {v8.4s, v9.4s, v10.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v11.4s, v2.4s, v0.s[1]\n"
- "fmla v12.4s, v3.4s, v0.s[1]\n"
- "fmla v13.4s, v4.4s, v0.s[1]\n"
- "st1 {v11.4s, v12.4s, v13.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v14.4s, v2.4s, v0.s[2]\n"
- "fmla v15.4s, v3.4s, v0.s[2]\n"
- "fmla v16.4s, v4.4s, v0.s[2]\n"
- "st1 {v14.4s, v15.4s, v16.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v17.4s, v2.4s, v0.s[3]\n"
- "fmla v18.4s, v3.4s, v0.s[3]\n"
- "fmla v19.4s, v4.4s, v0.s[3]\n"
- "st1 {v17.4s, v18.4s, v19.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
-
- "fmla v20.4s, v2.4s, v1.s[0]\n"
- "fmla v21.4s, v3.4s, v1.s[0]\n"
- "fmla v22.4s, v4.4s, v1.s[0]\n"
- "st1 {v20.4s, v21.4s, v22.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v23.4s, v2.4s, v1.s[1]\n"
- "fmla v24.4s, v3.4s, v1.s[1]\n"
- "fmla v25.4s, v4.4s, v1.s[1]\n"
- "st1 {v23.4s, v24.4s, v25.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v26.4s, v2.4s, v1.s[2]\n"
- "fmla v27.4s, v3.4s, v1.s[2]\n"
- "fmla v28.4s, v4.4s, v1.s[2]\n"
- "st1 {v26.4s, v27.4s, v28.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v29.4s, v2.4s, v1.s[3]\n"
- "fmla v30.4s, v3.4s, v1.s[3]\n"
- "fmla v31.4s, v4.4s, v1.s[3]\n"
-
- "3:\n"
- "st1 {v29.4s, v30.4s, v31.4s}, [x0]\n"
- : [lhs_ptr] "+r"(lhs_ptr), [rhs_ptr] "+r"(rhs_ptr), [res_ptr] "+r"(res_ptr),
- [nk] "+r"(nk)
- : [oddk] "r"(oddk), [k0] "r"(k0), [nstride] "r"(nstride)
- : "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11",
- "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22",
- "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31");
-}
-
-static void sgemm_rowmajor_micro_kernel_12x8(const float *lhs_ptr, const float *rhs_ptr,
- float *res_ptr, const int k, const int k0,
- const int stride)
-{
- int oddk = (k & 1);
- int nk = ((k + 1) / 2) - 1;
-
- const int nstride = stride << 2;
-
- __asm __volatile("ld1 {v0.4s}, [%[lhs_ptr]], #16\n"
- "ld1 {v4.4s, v5.4s}, [%[rhs_ptr]], #32\n"
-
- "cmp %[k0], #0\n"
- "beq 0f\n"
-
- "mov x0, %[res_ptr]\n"
- "ld1 {v8.4s, v9.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v10.4s, v11.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v12.4s, v13.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v14.4s, v15.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v16.4s, v17.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v18.4s, v19.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v20.4s, v21.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v22.4s, v23.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v24.4s, v25.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v26.4s, v27.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v28.4s, v29.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v30.4s, v31.4s}, [x0]\n"
- "cbz %w[nk], 4f\n"
- "b 1f\n"
-
- "0:\n"
- "movi v8.4s, #0x0\n"
- "movi v9.4s, #0x0\n"
- "movi v10.4s, #0x0\n"
- "movi v11.4s, #0x0\n"
- "movi v12.4s, #0x0\n"
- "movi v13.4s, #0x0\n"
- "movi v14.4s, #0x0\n"
- "movi v15.4s, #0x0\n"
- "movi v16.4s, #0x0\n"
- "movi v17.4s, #0x0\n"
- "movi v18.4s, #0x0\n"
- "movi v19.4s, #0x0\n"
- "movi v20.4s, #0x0\n"
- "movi v21.4s, #0x0\n"
- "movi v22.4s, #0x0\n"
- "movi v23.4s, #0x0\n"
- "movi v24.4s, #0x0\n"
- "movi v25.4s, #0x0\n"
- "movi v26.4s, #0x0\n"
- "movi v27.4s, #0x0\n"
- "movi v28.4s, #0x0\n"
- "movi v29.4s, #0x0\n"
- "movi v30.4s, #0x0\n"
- "movi v31.4s, #0x0\n"
- "cbz %w[nk], 4f\n"
-
- "1:\n"
- "fmla v8.4s, v4.4s, v0.s[0]\n"
- "fmla v10.4s, v4.4s, v0.s[1]\n"
- "ld1 {v1.4s}, [%[lhs_ptr]], #16\n"
- "fmla v12.4s, v4.4s, v0.s[2]\n"
- "fmla v14.4s, v4.4s, v0.s[3]\n"
- "fmla v9.4s, v5.4s, v0.s[0]\n"
- "fmla v11.4s, v5.4s, v0.s[1]\n"
- "fmla v13.4s, v5.4s, v0.s[2]\n"
- "fmla v15.4s, v5.4s, v0.s[3]\n"
-
- "fmla v16.4s, v4.4s, v1.s[0]\n"
- "fmla v18.4s, v4.4s, v1.s[1]\n"
- "ld1 {v2.4s}, [%[lhs_ptr]], #16\n"
- "fmla v20.4s, v4.4s, v1.s[2]\n"
- "fmla v22.4s, v4.4s, v1.s[3]\n"
- "fmla v17.4s, v5.4s, v1.s[0]\n"
- "fmla v19.4s, v5.4s, v1.s[1]\n"
- "fmla v21.4s, v5.4s, v1.s[2]\n"
- "fmla v23.4s, v5.4s, v1.s[3]\n"
-
- "ld1 {v6.4s, v7.4s}, [%[rhs_ptr]], #32\n"
-
- "fmla v24.4s, v4.4s, v2.s[0]\n"
- "fmla v26.4s, v4.4s, v2.s[1]\n"
- "ld1 {v0.4s}, [%[lhs_ptr]], #16\n"
- "fmla v28.4s, v4.4s, v2.s[2]\n"
- "fmla v30.4s, v4.4s, v2.s[3]\n"
- "fmla v25.4s, v5.4s, v2.s[0]\n"
- "fmla v27.4s, v5.4s, v2.s[1]\n"
- "fmla v29.4s, v5.4s, v2.s[2]\n"
- "fmla v31.4s, v5.4s, v2.s[3]\n"
-
- "fmla v8.4s, v6.4s, v0.s[0]\n"
- "fmla v10.4s, v6.4s, v0.s[1]\n"
- "ld1 {v1.4s}, [%[lhs_ptr]], #16\n"
- "fmla v12.4s, v6.4s, v0.s[2]\n"
- "fmla v14.4s, v6.4s, v0.s[3]\n"
- "fmla v9.4s, v7.4s, v0.s[0]\n"
- "fmla v11.4s, v7.4s, v0.s[1]\n"
- "fmla v13.4s, v7.4s, v0.s[2]\n"
- "fmla v15.4s, v7.4s, v0.s[3]\n"
-
- "fmla v16.4s, v6.4s, v1.s[0]\n"
- "fmla v18.4s, v6.4s, v1.s[1]\n"
- "ld1 {v2.4s}, [%[lhs_ptr]], #16\n"
- "fmla v20.4s, v6.4s, v1.s[2]\n"
- "fmla v22.4s, v6.4s, v1.s[3]\n"
- "fmla v17.4s, v7.4s, v1.s[0]\n"
- "fmla v19.4s, v7.4s, v1.s[1]\n"
- "fmla v21.4s, v7.4s, v1.s[2]\n"
- "fmla v23.4s, v7.4s, v1.s[3]\n"
-
- "ld1 {v4.4s, v5.4s}, [%[rhs_ptr]], #32\n"
-
- "fmla v24.4s, v6.4s, v2.s[0]\n"
- "fmla v26.4s, v6.4s, v2.s[1]\n"
- "ld1 {v0.4s}, [%[lhs_ptr]], #16\n"
- "fmla v28.4s, v6.4s, v2.s[2]\n"
- "fmla v30.4s, v6.4s, v2.s[3]\n"
- "fmla v25.4s, v7.4s, v2.s[0]\n"
- "fmla v27.4s, v7.4s, v2.s[1]\n"
- "subs %w[nk], %w[nk], #1\n"
- "fmla v29.4s, v7.4s, v2.s[2]\n"
- "fmla v31.4s, v7.4s, v2.s[3]\n"
- "bne 1b\n"
-
- "4:\n"
- "mov x0, %[res_ptr]\n"
- "cbnz %[oddk], 2f\n"
-
- "fmla v8.4s, v4.4s, v0.s[0]\n"
- "fmla v9.4s, v5.4s, v0.s[0]\n"
- "ld1 {v1.4s}, [%[lhs_ptr]], #16\n"
- "fmla v10.4s, v4.4s, v0.s[1]\n"
- "fmla v11.4s, v5.4s, v0.s[1]\n"
- "fmla v12.4s, v4.4s, v0.s[2]\n"
- "fmla v13.4s, v5.4s, v0.s[2]\n"
- "fmla v14.4s, v4.4s, v0.s[3]\n"
- "fmla v15.4s, v5.4s, v0.s[3]\n"
-
- "fmla v16.4s, v4.4s, v1.s[0]\n"
- "fmla v17.4s, v5.4s, v1.s[0]\n"
- "ld1 {v2.4s}, [%[lhs_ptr]], #16\n"
- "fmla v18.4s, v4.4s, v1.s[1]\n"
- "fmla v19.4s, v5.4s, v1.s[1]\n"
- "fmla v20.4s, v4.4s, v1.s[2]\n"
- "fmla v21.4s, v5.4s, v1.s[2]\n"
- "fmla v22.4s, v4.4s, v1.s[3]\n"
- "fmla v23.4s, v5.4s, v1.s[3]\n"
-
- "ld1 {v6.4s, v7.4s}, [%[rhs_ptr]], #32\n"
-
- "fmla v24.4s, v4.4s, v2.s[0]\n"
- "fmla v25.4s, v5.4s, v2.s[0]\n"
- "ld1 {v0.4s}, [%[lhs_ptr]], #16\n"
- "fmla v26.4s, v4.4s, v2.s[1]\n"
- "fmla v27.4s, v5.4s, v2.s[1]\n"
- "fmla v28.4s, v4.4s, v2.s[2]\n"
- "fmla v29.4s, v5.4s, v2.s[2]\n"
- "fmla v30.4s, v4.4s, v2.s[3]\n"
- "fmla v31.4s, v5.4s, v2.s[3]\n"
-
- "fmla v8.4s, v6.4s, v0.s[0]\n"
- "fmla v9.4s, v7.4s, v0.s[0]\n"
- "ld1 {v1.4s}, [%[lhs_ptr]], #16\n"
- "st1 {v8.4s, v9.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v10.4s, v6.4s, v0.s[1]\n"
- "fmla v11.4s, v7.4s, v0.s[1]\n"
- "st1 {v10.4s, v11.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v12.4s, v6.4s, v0.s[2]\n"
- "fmla v13.4s, v7.4s, v0.s[2]\n"
- "st1 {v12.4s, v13.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v14.4s, v6.4s, v0.s[3]\n"
- "fmla v15.4s, v7.4s, v0.s[3]\n"
- "st1 {v14.4s, v15.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
-
- "fmla v16.4s, v6.4s, v1.s[0]\n"
- "fmla v17.4s, v7.4s, v1.s[0]\n"
- "ld1 {v2.4s}, [%[lhs_ptr]], #16\n"
- "st1 {v16.4s, v17.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v18.4s, v6.4s, v1.s[1]\n"
- "fmla v19.4s, v7.4s, v1.s[1]\n"
- "st1 {v18.4s, v19.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v20.4s, v6.4s, v1.s[2]\n"
- "fmla v21.4s, v7.4s, v1.s[2]\n"
- "st1 {v20.4s, v21.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v22.4s, v6.4s, v1.s[3]\n"
- "fmla v23.4s, v7.4s, v1.s[3]\n"
- "st1 {v22.4s, v23.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
-
- "fmla v24.4s, v6.4s, v2.s[0]\n"
- "fmla v25.4s, v7.4s, v2.s[0]\n"
- "st1 {v24.4s, v25.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v26.4s, v6.4s, v2.s[1]\n"
- "fmla v27.4s, v7.4s, v2.s[1]\n"
- "st1 {v26.4s, v27.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v28.4s, v6.4s, v2.s[2]\n"
- "fmla v29.4s, v7.4s, v2.s[2]\n"
- "st1 {v28.4s, v29.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v30.4s, v6.4s, v2.s[3]\n"
- "fmla v31.4s, v7.4s, v2.s[3]\n"
- "b 3f\n"
-
- "2:\n"
- "fmla v8.4s, v4.4s, v0.s[0]\n"
- "fmla v9.4s, v5.4s, v0.s[0]\n"
- "ld1 {v1.4s}, [%[lhs_ptr]], #16\n"
- "st1 {v8.4s, v9.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v10.4s, v4.4s, v0.s[1]\n"
- "fmla v11.4s, v5.4s, v0.s[1]\n"
- "st1 {v10.4s, v11.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v12.4s, v4.4s, v0.s[2]\n"
- "fmla v13.4s, v5.4s, v0.s[2]\n"
- "st1 {v12.4s, v13.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v14.4s, v4.4s, v0.s[3]\n"
- "fmla v15.4s, v5.4s, v0.s[3]\n"
- "st1 {v14.4s, v15.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
-
- "fmla v16.4s, v4.4s, v1.s[0]\n"
- "fmla v17.4s, v5.4s, v1.s[0]\n"
- "ld1 {v2.4s}, [%[lhs_ptr]], #16\n"
- "st1 {v16.4s, v17.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v18.4s, v4.4s, v1.s[1]\n"
- "fmla v19.4s, v5.4s, v1.s[1]\n"
- "st1 {v18.4s, v19.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v20.4s, v4.4s, v1.s[2]\n"
- "fmla v21.4s, v5.4s, v1.s[2]\n"
- "st1 {v20.4s, v21.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v22.4s, v4.4s, v1.s[3]\n"
- "fmla v23.4s, v5.4s, v1.s[3]\n"
- "st1 {v22.4s, v23.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
-
- "fmla v24.4s, v4.4s, v2.s[0]\n"
- "fmla v25.4s, v5.4s, v2.s[0]\n"
- "st1 {v24.4s, v25.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v26.4s, v4.4s, v2.s[1]\n"
- "fmla v27.4s, v5.4s, v2.s[1]\n"
- "st1 {v26.4s, v27.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v28.4s, v4.4s, v2.s[2]\n"
- "fmla v29.4s, v5.4s, v2.s[2]\n"
- "st1 {v28.4s, v29.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v30.4s, v4.4s, v2.s[3]\n"
- "fmla v31.4s, v5.4s, v2.s[3]\n"
-
- "3:\n"
- "st1 {v30.4s, v31.4s}, [x0]\n"
- : [lhs_ptr] "+r"(lhs_ptr), [rhs_ptr] "+r"(rhs_ptr), [res_ptr] "+r"(res_ptr),
- [nk] "+r"(nk)
- : [oddk] "r"(oddk), [k0] "r"(k0), [nstride] "r"(nstride)
- : "x0", "v0", "v1", "v2", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11",
- "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22",
- "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31");
-}
-
-#ifdef BATCH_DILATION_FIX
-static void sgemm_rowmajor_micro_kernel_4x24(const float *lhs_ptr, const float *rhs_ptr,
- float *res_ptr, const int k, const int k0,
- const int stride)
-{
- int oddk = (k & 1);
- int nk = ((k + 1) / 2) - 1;
-
- const int nstride = stride << 2;
-
- __asm __volatile("ld1 {v0.4s}, [%[lhs_ptr]], #16\n"
-
- "cmp %[k0], #0\n"
- "beq 0f\n"
-
- "mov x0, %[res_ptr]\n"
- "mov x1, x0\n"
- "ld1 {v8.4s, v9.4s, v10.4s}, [x1], #48\n"
- "ld1 {v11.4s, v12.4s, v13.4s}, [x1]\n"
- "add x0, x0, %[nstride]\n"
- "mov x1, x0\n"
- "ld1 {v14.4s, v15.4s, v16.4s}, [x1], #48\n"
- "ld1 {v17.4s, v18.4s, v19.4s}, [x1]\n"
- "add x0, x0, %[nstride]\n"
- "mov x1, x0\n"
- "ld1 {v20.4s, v21.4s, v22.4s}, [x1], #48\n"
- "ld1 {v23.4s, v24.4s, v25.4s}, [x1]\n"
- "add x0, x0, %[nstride]\n"
- "mov x1, x0\n"
- "ld1 {v26.4s, v27.4s, v28.4s}, [x1], #48\n"
- "ld1 {v29.4s, v30.4s, v31.4s}, [x1]\n"
- "cbz %w[nk], 4f\n"
- "b 1f\n"
-
- "0:\n"
- "movi v8.4s, #0x0\n"
- "movi v9.4s, #0x0\n"
- "movi v10.4s, #0x0\n"
- "movi v11.4s, #0x0\n"
- "movi v12.4s, #0x0\n"
- "movi v13.4s, #0x0\n"
- "movi v14.4s, #0x0\n"
- "movi v15.4s, #0x0\n"
- "movi v16.4s, #0x0\n"
- "movi v17.4s, #0x0\n"
- "movi v18.4s, #0x0\n"
- "movi v19.4s, #0x0\n"
- "movi v20.4s, #0x0\n"
- "movi v21.4s, #0x0\n"
- "movi v22.4s, #0x0\n"
- "movi v23.4s, #0x0\n"
- "movi v24.4s, #0x0\n"
- "movi v25.4s, #0x0\n"
- "movi v26.4s, #0x0\n"
- "movi v27.4s, #0x0\n"
- "movi v28.4s, #0x0\n"
- "movi v29.4s, #0x0\n"
- "movi v30.4s, #0x0\n"
- "movi v31.4s, #0x0\n"
- "cbz %w[nk], 4f\n"
-
- "1:\n"
- "mov x0, v0.d[0]\n"
- "cmp x0, #0\n"
- "bne 5f\n"
- "mov x0, v0.d[1]\n"
- "cmp x0, #0\n"
- "bne 5f\n"
- "add %[rhs_ptr], %[rhs_ptr], #96\n"
- "ld1 {v1.4s}, [%[lhs_ptr]], #16\n"
- "b 6f\n"
- "5:\n"
- "ld1 {v2.4s, v3.4s, v4.4s}, [%[rhs_ptr]], #48\n"
- "fmla v8.4s, v2.4s, v0.s[0]\n"
- "fmla v14.4s, v2.4s, v0.s[1]\n"
- "fmla v20.4s, v2.4s, v0.s[2]\n"
- "fmla v26.4s, v2.4s, v0.s[3]\n"
- "ld1 {v5.4s, v6.4s, v7.4s}, [%[rhs_ptr]], #48\n"
- "fmla v9.4s, v3.4s, v0.s[0]\n"
- "fmla v15.4s, v3.4s, v0.s[1]\n"
- "fmla v21.4s, v3.4s, v0.s[2]\n"
- "fmla v27.4s, v3.4s, v0.s[3]\n"
- "fmla v10.4s, v4.4s, v0.s[0]\n"
- "fmla v16.4s, v4.4s, v0.s[1]\n"
- "fmla v22.4s, v4.4s, v0.s[2]\n"
- "fmla v28.4s, v4.4s, v0.s[3]\n"
-
- "ld1 {v1.4s}, [%[lhs_ptr]], #16\n"
-
- "fmla v11.4s, v5.4s, v0.s[0]\n"
- "fmla v17.4s, v5.4s, v0.s[1]\n"
- "fmla v23.4s, v5.4s, v0.s[2]\n"
- "fmla v29.4s, v5.4s, v0.s[3]\n"
- "fmla v12.4s, v6.4s, v0.s[0]\n"
- "fmla v18.4s, v6.4s, v0.s[1]\n"
- "fmla v24.4s, v6.4s, v0.s[2]\n"
- "fmla v30.4s, v6.4s, v0.s[3]\n"
- "fmla v13.4s, v7.4s, v0.s[0]\n"
- "fmla v19.4s, v7.4s, v0.s[1]\n"
- "fmla v25.4s, v7.4s, v0.s[2]\n"
- "fmla v31.4s, v7.4s, v0.s[3]\n"
-
- "6:\n"
- "mov x0, v1.d[0]\n"
- "cmp x0, #0\n"
- "bne 7f\n"
- "mov x0, v1.d[1]\n"
- "cmp x0, #0\n"
- "bne 7f\n"
- "add %[rhs_ptr], %[rhs_ptr], #96\n"
- "ld1 {v0.4s}, [%[lhs_ptr]], #16\n"
- "b 8f\n"
- "7:\n"
- "ld1 {v2.4s, v3.4s, v4.4s}, [%[rhs_ptr]], #48\n"
- "fmla v8.4s, v2.4s, v1.s[0]\n"
- "fmla v14.4s, v2.4s, v1.s[1]\n"
- "fmla v20.4s, v2.4s, v1.s[2]\n"
- "fmla v26.4s, v2.4s, v1.s[3]\n"
- "ld1 {v5.4s, v6.4s, v7.4s}, [%[rhs_ptr]], #48\n"
- "fmla v9.4s, v3.4s, v1.s[0]\n"
- "fmla v15.4s, v3.4s, v1.s[1]\n"
- "fmla v21.4s, v3.4s, v1.s[2]\n"
- "fmla v27.4s, v3.4s, v1.s[3]\n"
- "fmla v10.4s, v4.4s, v1.s[0]\n"
- "fmla v16.4s, v4.4s, v1.s[1]\n"
- "fmla v22.4s, v4.4s, v1.s[2]\n"
- "fmla v28.4s, v4.4s, v1.s[3]\n"
-
- "ld1 {v0.4s}, [%[lhs_ptr]], #16\n"
-
- "fmla v11.4s, v5.4s, v1.s[0]\n"
- "fmla v17.4s, v5.4s, v1.s[1]\n"
- "fmla v23.4s, v5.4s, v1.s[2]\n"
- "fmla v29.4s, v5.4s, v1.s[3]\n"
- "fmla v12.4s, v6.4s, v1.s[0]\n"
- "fmla v18.4s, v6.4s, v1.s[1]\n"
- "fmla v24.4s, v6.4s, v1.s[2]\n"
- "fmla v30.4s, v6.4s, v1.s[3]\n"
- "fmla v13.4s, v7.4s, v1.s[0]\n"
- "fmla v19.4s, v7.4s, v1.s[1]\n"
- "fmla v25.4s, v7.4s, v1.s[2]\n"
- "fmla v31.4s, v7.4s, v1.s[3]\n"
-
- "8:\n"
- "subs %w[nk], %w[nk], #1\n"
- "bne 1b\n"
-
- "4:\n"
- "mov x0, %[res_ptr]\n"
- "cbnz %[oddk], 2f\n"
- "ld1 {v1.4s}, [%[lhs_ptr]], #16\n"
- "ld1 {v2.4s, v3.4s, v4.4s}, [%[rhs_ptr]], #48\n"
- "ld1 {v5.4s, v6.4s, v7.4s}, [%[rhs_ptr]], #48\n"
-
- "fmla v8.4s, v2.4s, v0.s[0]\n"
- "fmla v9.4s, v3.4s, v0.s[0]\n"
- "fmla v10.4s, v4.4s, v0.s[0]\n"
- "fmla v14.4s, v2.4s, v0.s[1]\n"
- "fmla v15.4s, v3.4s, v0.s[1]\n"
- "fmla v16.4s, v4.4s, v0.s[1]\n"
- "fmla v20.4s, v2.4s, v0.s[2]\n"
- "fmla v21.4s, v3.4s, v0.s[2]\n"
- "fmla v22.4s, v4.4s, v0.s[2]\n"
- "fmla v26.4s, v2.4s, v0.s[3]\n"
- "fmla v27.4s, v3.4s, v0.s[3]\n"
- "fmla v28.4s, v4.4s, v0.s[3]\n"
-
- "ld1 {v2.4s, v3.4s, v4.4s}, [%[rhs_ptr]], #48\n"
-
- "fmla v11.4s, v5.4s, v0.s[0]\n"
- "fmla v12.4s, v6.4s, v0.s[0]\n"
- "fmla v13.4s, v7.4s, v0.s[0]\n"
- "fmla v17.4s, v5.4s, v0.s[1]\n"
- "fmla v18.4s, v6.4s, v0.s[1]\n"
- "fmla v19.4s, v7.4s, v0.s[1]\n"
- "fmla v23.4s, v5.4s, v0.s[2]\n"
- "fmla v24.4s, v6.4s, v0.s[2]\n"
- "fmla v25.4s, v7.4s, v0.s[2]\n"
- "fmla v29.4s, v5.4s, v0.s[3]\n"
- "fmla v30.4s, v6.4s, v0.s[3]\n"
- "fmla v31.4s, v7.4s, v0.s[3]\n"
-
- "ld1 {v5.4s, v6.4s, v7.4s}, [%[rhs_ptr]], #48\n"
-
- "fmla v8.4s, v2.4s, v1.s[0]\n"
- "fmla v9.4s, v3.4s, v1.s[0]\n"
- "fmla v10.4s, v4.4s, v1.s[0]\n"
- "mov x1, x0\n"
- "st1 {v8.4s, v9.4s, v10.4s}, [x1], #48\n"
- "fmla v11.4s, v5.4s, v1.s[0]\n"
- "fmla v12.4s, v6.4s, v1.s[0]\n"
- "fmla v13.4s, v7.4s, v1.s[0]\n"
- "st1 {v11.4s, v12.4s, v13.4s}, [x1]\n"
- "fmla v14.4s, v2.4s, v1.s[1]\n"
- "fmla v15.4s, v3.4s, v1.s[1]\n"
- "fmla v16.4s, v4.4s, v1.s[1]\n"
- "add x0, x0, %[nstride]\n"
- "mov x1, x0\n"
- "st1 {v14.4s, v15.4s, v16.4s}, [x1], #48\n"
- "fmla v17.4s, v5.4s, v1.s[1]\n"
- "fmla v18.4s, v6.4s, v1.s[1]\n"
- "fmla v19.4s, v7.4s, v1.s[1]\n"
- "st1 {v17.4s, v18.4s, v19.4s}, [x1]\n"
- "fmla v20.4s, v2.4s, v1.s[2]\n"
- "fmla v21.4s, v3.4s, v1.s[2]\n"
- "fmla v22.4s, v4.4s, v1.s[2]\n"
- "add x0, x0, %[nstride]\n"
- "mov x1, x0\n"
- "st1 {v20.4s, v21.4s, v22.4s}, [x1], #48\n"
- "fmla v23.4s, v5.4s, v1.s[2]\n"
- "fmla v24.4s, v6.4s, v1.s[2]\n"
- "fmla v25.4s, v7.4s, v1.s[2]\n"
- "st1 {v23.4s, v24.4s, v25.4s}, [x1]\n"
- "fmla v26.4s, v2.4s, v1.s[3]\n"
- "fmla v27.4s, v3.4s, v1.s[3]\n"
- "fmla v28.4s, v4.4s, v1.s[3]\n"
- "add x0, x0, %[nstride]\n"
- "mov x1, x0\n"
- "st1 {v26.4s, v27.4s, v28.4s}, [x1], #48\n"
- "fmla v29.4s, v5.4s, v1.s[3]\n"
- "fmla v30.4s, v6.4s, v1.s[3]\n"
- "fmla v31.4s, v7.4s, v1.s[3]\n"
- "b 3f\n"
-
- "2:\n"
- "ld1 {v2.4s, v3.4s, v4.4s}, [%[rhs_ptr]], #48\n"
- "ld1 {v5.4s, v6.4s, v7.4s}, [%[rhs_ptr]], #48\n"
-
- "fmla v8.4s, v2.4s, v0.s[0]\n"
- "fmla v9.4s, v3.4s, v0.s[0]\n"
- "fmla v10.4s, v4.4s, v0.s[0]\n"
- "mov x1, x0\n"
- "st1 {v8.4s, v9.4s, v10.4s}, [x1], #48\n"
- "fmla v11.4s, v5.4s, v0.s[0]\n"
- "fmla v12.4s, v6.4s, v0.s[0]\n"
- "fmla v13.4s, v7.4s, v0.s[0]\n"
- "st1 {v11.4s, v12.4s, v13.4s}, [x1]\n"
- "fmla v14.4s, v2.4s, v0.s[1]\n"
- "fmla v15.4s, v3.4s, v0.s[1]\n"
- "fmla v16.4s, v4.4s, v0.s[1]\n"
- "add x0, x0, %[nstride]\n"
- "mov x1, x0\n"
- "st1 {v14.4s, v15.4s, v16.4s}, [x1], #48\n"
- "fmla v17.4s, v5.4s, v0.s[1]\n"
- "fmla v18.4s, v6.4s, v0.s[1]\n"
- "fmla v19.4s, v7.4s, v0.s[1]\n"
- "st1 {v17.4s, v18.4s, v19.4s}, [x1]\n"
- "fmla v20.4s, v2.4s, v0.s[2]\n"
- "fmla v21.4s, v3.4s, v0.s[2]\n"
- "fmla v22.4s, v4.4s, v0.s[2]\n"
- "add x0, x0, %[nstride]\n"
- "mov x1, x0\n"
- "st1 {v20.4s, v21.4s, v22.4s}, [x1], #48\n"
- "fmla v23.4s, v5.4s, v0.s[2]\n"
- "fmla v24.4s, v6.4s, v0.s[2]\n"
- "fmla v25.4s, v7.4s, v0.s[2]\n"
- "st1 {v23.4s, v24.4s, v25.4s}, [x1]\n"
- "fmla v26.4s, v2.4s, v0.s[3]\n"
- "fmla v27.4s, v3.4s, v0.s[3]\n"
- "fmla v28.4s, v4.4s, v0.s[3]\n"
- "add x0, x0, %[nstride]\n"
- "mov x1, x0\n"
- "st1 {v26.4s, v27.4s, v28.4s}, [x1], #48\n"
- "fmla v29.4s, v5.4s, v0.s[3]\n"
- "fmla v30.4s, v6.4s, v0.s[3]\n"
- "fmla v31.4s, v7.4s, v0.s[3]\n"
- "3:\n"
- "st1 {v29.4s, v30.4s, v31.4s}, [x1]\n"
- : [lhs_ptr] "+r"(lhs_ptr), [rhs_ptr] "+r"(rhs_ptr), [res_ptr] "+r"(res_ptr),
- [nk] "+r"(nk)
- : [oddk] "r"(oddk), [k0] "r"(k0), [nstride] "r"(nstride)
- : "x0", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10",
- "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21",
- "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31");
-}
-#else // BATCH_DILATION_FIX
-static void sgemm_rowmajor_micro_kernel_4x24(const float *lhs_ptr, const float *rhs_ptr,
- float *res_ptr, const int k, const int k0,
- const int stride)
-{
- int oddk = (k & 1);
- int nk = ((k + 1) / 2) - 1;
-
- const int nstride = stride << 2;
-
- __asm __volatile("ld1 {v0.4s}, [%[lhs_ptr]], #16\n"
- "ld1 {v2.4s}, [%[rhs_ptr]], #16\n"
- "ld1 {v3.4s}, [%[rhs_ptr]], #16\n"
- "ld1 {v4.4s}, [%[rhs_ptr]], #16\n"
-
- "cmp %[k0], #0\n"
- "beq 0f\n"
-
- "mov x0, %[res_ptr]\n"
- "mov x1, x0\n"
- "ld1 {v8.4s, v9.4s, v10.4s}, [x1], #48\n"
- "ld1 {v11.4s, v12.4s, v13.4s}, [x1]\n"
- "add x0, x0, %[nstride]\n"
- "mov x1, x0\n"
- "ld1 {v14.4s, v15.4s, v16.4s}, [x1], #48\n"
- "ld1 {v17.4s, v18.4s, v19.4s}, [x1]\n"
- "add x0, x0, %[nstride]\n"
- "mov x1, x0\n"
- "ld1 {v20.4s, v21.4s, v22.4s}, [x1], #48\n"
- "ld1 {v23.4s, v24.4s, v25.4s}, [x1]\n"
- "add x0, x0, %[nstride]\n"
- "mov x1, x0\n"
- "ld1 {v26.4s, v27.4s, v28.4s}, [x1], #48\n"
- "ld1 {v29.4s, v30.4s, v31.4s}, [x1]\n"
- "cbz %w[nk], 4f\n"
- "b 1f\n"
-
- "0:\n"
- "movi v8.4s, #0x0\n"
- "movi v9.4s, #0x0\n"
- "movi v10.4s, #0x0\n"
- "movi v11.4s, #0x0\n"
- "movi v12.4s, #0x0\n"
- "movi v13.4s, #0x0\n"
- "movi v14.4s, #0x0\n"
- "movi v15.4s, #0x0\n"
- "movi v16.4s, #0x0\n"
- "movi v17.4s, #0x0\n"
- "movi v18.4s, #0x0\n"
- "movi v19.4s, #0x0\n"
- "movi v20.4s, #0x0\n"
- "movi v21.4s, #0x0\n"
- "movi v22.4s, #0x0\n"
- "movi v23.4s, #0x0\n"
- "movi v24.4s, #0x0\n"
- "movi v25.4s, #0x0\n"
- "movi v26.4s, #0x0\n"
- "movi v27.4s, #0x0\n"
- "movi v28.4s, #0x0\n"
- "movi v29.4s, #0x0\n"
- "movi v30.4s, #0x0\n"
- "movi v31.4s, #0x0\n"
- "cbz %w[nk], 4f\n"
-
- "1:\n"
- "fmla v8.4s, v2.4s, v0.s[0]\n"
- "fmla v14.4s, v2.4s, v0.s[1]\n"
- "fmla v20.4s, v2.4s, v0.s[2]\n"
- "fmla v26.4s, v2.4s, v0.s[3]\n"
- "fmla v9.4s, v3.4s, v0.s[0]\n"
- "fmla v15.4s, v3.4s, v0.s[1]\n"
- "ld1 {v5.4s, v6.4s, v7.4s}, [%[rhs_ptr]], #48\n"
- "fmla v21.4s, v3.4s, v0.s[2]\n"
- "fmla v27.4s, v3.4s, v0.s[3]\n"
- "fmla v10.4s, v4.4s, v0.s[0]\n"
- "fmla v16.4s, v4.4s, v0.s[1]\n"
- "fmla v22.4s, v4.4s, v0.s[2]\n"
- "fmla v28.4s, v4.4s, v0.s[3]\n"
-
- "ld1 {v1.4s}, [%[lhs_ptr]], #16\n"
-
- "fmla v11.4s, v5.4s, v0.s[0]\n"
- "fmla v17.4s, v5.4s, v0.s[1]\n"
- "fmla v23.4s, v5.4s, v0.s[2]\n"
- "fmla v29.4s, v5.4s, v0.s[3]\n"
- "fmla v12.4s, v6.4s, v0.s[0]\n"
- "fmla v18.4s, v6.4s, v0.s[1]\n"
- "ld1 {v2.4s, v3.4s, v4.4s}, [%[rhs_ptr]], #48\n"
- "fmla v24.4s, v6.4s, v0.s[2]\n"
- "fmla v30.4s, v6.4s, v0.s[3]\n"
- "fmla v13.4s, v7.4s, v0.s[0]\n"
- "fmla v19.4s, v7.4s, v0.s[1]\n"
- "fmla v25.4s, v7.4s, v0.s[2]\n"
- "fmla v31.4s, v7.4s, v0.s[3]\n"
-
- "fmla v8.4s, v2.4s, v1.s[0]\n"
- "fmla v14.4s, v2.4s, v1.s[1]\n"
- "fmla v20.4s, v2.4s, v1.s[2]\n"
- "fmla v26.4s, v2.4s, v1.s[3]\n"
- "fmla v9.4s, v3.4s, v1.s[0]\n"
- "fmla v15.4s, v3.4s, v1.s[1]\n"
- "ld1 {v5.4s, v6.4s, v7.4s}, [%[rhs_ptr]], #48\n"
- "fmla v21.4s, v3.4s, v1.s[2]\n"
- "fmla v27.4s, v3.4s, v1.s[3]\n"
- "fmla v10.4s, v4.4s, v1.s[0]\n"
- "fmla v16.4s, v4.4s, v1.s[1]\n"
- "fmla v22.4s, v4.4s, v1.s[2]\n"
- "fmla v28.4s, v4.4s, v1.s[3]\n"
-
- "ld1 {v0.4s}, [%[lhs_ptr]], #16\n"
-
- "fmla v11.4s, v5.4s, v1.s[0]\n"
- "fmla v17.4s, v5.4s, v1.s[1]\n"
- "fmla v23.4s, v5.4s, v1.s[2]\n"
- "fmla v29.4s, v5.4s, v1.s[3]\n"
- "fmla v12.4s, v6.4s, v1.s[0]\n"
- "fmla v18.4s, v6.4s, v1.s[1]\n"
- "ld1 {v2.4s, v3.4s, v4.4s}, [%[rhs_ptr]], #48\n"
- "fmla v24.4s, v6.4s, v1.s[2]\n"
- "fmla v30.4s, v6.4s, v1.s[3]\n"
- "fmla v13.4s, v7.4s, v1.s[0]\n"
- "fmla v19.4s, v7.4s, v1.s[1]\n"
- "subs %w[nk], %w[nk], #1\n"
- "fmla v25.4s, v7.4s, v1.s[2]\n"
- "fmla v31.4s, v7.4s, v1.s[3]\n"
- "bne 1b\n"
-
- "4:\n"
- "mov x0, %[res_ptr]\n"
- "cbnz %[oddk], 2f\n"
- "ld1 {v1.4s}, [%[lhs_ptr]], #16\n"
- "ld1 {v5.4s, v6.4s, v7.4s}, [%[rhs_ptr]], #48\n"
-
- "fmla v8.4s, v2.4s, v0.s[0]\n"
- "fmla v9.4s, v3.4s, v0.s[0]\n"
- "fmla v10.4s, v4.4s, v0.s[0]\n"
- "fmla v14.4s, v2.4s, v0.s[1]\n"
- "fmla v15.4s, v3.4s, v0.s[1]\n"
- "fmla v16.4s, v4.4s, v0.s[1]\n"
- "fmla v20.4s, v2.4s, v0.s[2]\n"
- "fmla v21.4s, v3.4s, v0.s[2]\n"
- "fmla v22.4s, v4.4s, v0.s[2]\n"
- "fmla v26.4s, v2.4s, v0.s[3]\n"
- "fmla v27.4s, v3.4s, v0.s[3]\n"
- "fmla v28.4s, v4.4s, v0.s[3]\n"
-
- "ld1 {v2.4s, v3.4s, v4.4s}, [%[rhs_ptr]], #48\n"
-
- "fmla v11.4s, v5.4s, v0.s[0]\n"
- "fmla v12.4s, v6.4s, v0.s[0]\n"
- "fmla v13.4s, v7.4s, v0.s[0]\n"
- "fmla v17.4s, v5.4s, v0.s[1]\n"
- "fmla v18.4s, v6.4s, v0.s[1]\n"
- "fmla v19.4s, v7.4s, v0.s[1]\n"
- "fmla v23.4s, v5.4s, v0.s[2]\n"
- "fmla v24.4s, v6.4s, v0.s[2]\n"
- "fmla v25.4s, v7.4s, v0.s[2]\n"
- "fmla v29.4s, v5.4s, v0.s[3]\n"
- "fmla v30.4s, v6.4s, v0.s[3]\n"
- "fmla v31.4s, v7.4s, v0.s[3]\n"
-
- "ld1 {v5.4s, v6.4s, v7.4s}, [%[rhs_ptr]], #48\n"
-
- "fmla v8.4s, v2.4s, v1.s[0]\n"
- "fmla v9.4s, v3.4s, v1.s[0]\n"
- "fmla v10.4s, v4.4s, v1.s[0]\n"
- "mov x1, x0\n"
- "st1 {v8.4s, v9.4s, v10.4s}, [x1], #48\n"
- "fmla v11.4s, v5.4s, v1.s[0]\n"
- "fmla v12.4s, v6.4s, v1.s[0]\n"
- "fmla v13.4s, v7.4s, v1.s[0]\n"
- "st1 {v11.4s, v12.4s, v13.4s}, [x1]\n"
- "fmla v14.4s, v2.4s, v1.s[1]\n"
- "fmla v15.4s, v3.4s, v1.s[1]\n"
- "fmla v16.4s, v4.4s, v1.s[1]\n"
- "add x0, x0, %[nstride]\n"
- "mov x1, x0\n"
- "st1 {v14.4s, v15.4s, v16.4s}, [x1], #48\n"
- "fmla v17.4s, v5.4s, v1.s[1]\n"
- "fmla v18.4s, v6.4s, v1.s[1]\n"
- "fmla v19.4s, v7.4s, v1.s[1]\n"
- "st1 {v17.4s, v18.4s, v19.4s}, [x1]\n"
- "fmla v20.4s, v2.4s, v1.s[2]\n"
- "fmla v21.4s, v3.4s, v1.s[2]\n"
- "fmla v22.4s, v4.4s, v1.s[2]\n"
- "add x0, x0, %[nstride]\n"
- "mov x1, x0\n"
- "st1 {v20.4s, v21.4s, v22.4s}, [x1], #48\n"
- "fmla v23.4s, v5.4s, v1.s[2]\n"
- "fmla v24.4s, v6.4s, v1.s[2]\n"
- "fmla v25.4s, v7.4s, v1.s[2]\n"
- "st1 {v23.4s, v24.4s, v25.4s}, [x1]\n"
- "fmla v26.4s, v2.4s, v1.s[3]\n"
- "fmla v27.4s, v3.4s, v1.s[3]\n"
- "fmla v28.4s, v4.4s, v1.s[3]\n"
- "add x0, x0, %[nstride]\n"
- "mov x1, x0\n"
- "st1 {v26.4s, v27.4s, v28.4s}, [x1], #48\n"
- "fmla v29.4s, v5.4s, v1.s[3]\n"
- "fmla v30.4s, v6.4s, v1.s[3]\n"
- "fmla v31.4s, v7.4s, v1.s[3]\n"
- "b 3f\n"
-
- "2:\n"
- "ld1 {v5.4s, v6.4s, v7.4s}, [%[rhs_ptr]], #48\n"
-
- "fmla v8.4s, v2.4s, v0.s[0]\n"
- "fmla v9.4s, v3.4s, v0.s[0]\n"
- "fmla v10.4s, v4.4s, v0.s[0]\n"
- "mov x1, x0\n"
- "st1 {v8.4s, v9.4s, v10.4s}, [x1], #48\n"
- "fmla v11.4s, v5.4s, v0.s[0]\n"
- "fmla v12.4s, v6.4s, v0.s[0]\n"
- "fmla v13.4s, v7.4s, v0.s[0]\n"
- "st1 {v11.4s, v12.4s, v13.4s}, [x1]\n"
- "fmla v14.4s, v2.4s, v0.s[1]\n"
- "fmla v15.4s, v3.4s, v0.s[1]\n"
- "fmla v16.4s, v4.4s, v0.s[1]\n"
- "add x0, x0, %[nstride]\n"
- "mov x1, x0\n"
- "st1 {v14.4s, v15.4s, v16.4s}, [x1], #48\n"
- "fmla v17.4s, v5.4s, v0.s[1]\n"
- "fmla v18.4s, v6.4s, v0.s[1]\n"
- "fmla v19.4s, v7.4s, v0.s[1]\n"
- "st1 {v17.4s, v18.4s, v19.4s}, [x1]\n"
- "fmla v20.4s, v2.4s, v0.s[2]\n"
- "fmla v21.4s, v3.4s, v0.s[2]\n"
- "fmla v22.4s, v4.4s, v0.s[2]\n"
- "add x0, x0, %[nstride]\n"
- "mov x1, x0\n"
- "st1 {v20.4s, v21.4s, v22.4s}, [x1], #48\n"
- "fmla v23.4s, v5.4s, v0.s[2]\n"
- "fmla v24.4s, v6.4s, v0.s[2]\n"
- "fmla v25.4s, v7.4s, v0.s[2]\n"
- "st1 {v23.4s, v24.4s, v25.4s}, [x1]\n"
- "fmla v26.4s, v2.4s, v0.s[3]\n"
- "fmla v27.4s, v3.4s, v0.s[3]\n"
- "fmla v28.4s, v4.4s, v0.s[3]\n"
- "add x0, x0, %[nstride]\n"
- "mov x1, x0\n"
- "st1 {v26.4s, v27.4s, v28.4s}, [x1], #48\n"
- "fmla v29.4s, v5.4s, v0.s[3]\n"
- "fmla v30.4s, v6.4s, v0.s[3]\n"
- "fmla v31.4s, v7.4s, v0.s[3]\n"
- "3:\n"
- "st1 {v29.4s, v30.4s, v31.4s}, [x1]\n"
- : [lhs_ptr] "+r"(lhs_ptr), [rhs_ptr] "+r"(rhs_ptr), [res_ptr] "+r"(res_ptr),
- [nk] "+r"(nk)
- : [oddk] "r"(oddk), [k0] "r"(k0), [nstride] "r"(nstride)
- : "x0", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10",
- "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21",
- "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31");
-}
-#endif // BATCH_DILATION_FIX
-
-static void sgemm_rowmajor_micro_kernel_24x4(const float *lhs_ptr, const float *rhs_ptr,
- float *res_ptr, const int k, const int k0,
- const int stride)
-{
- int oddk = (k & 1);
- int nk = ((k + 1) / 2) - 1;
-
- const int nstride = stride << 2;
-
- __asm __volatile("ld1 {v0.4s, v1.4s, v2.4s}, [%[lhs_ptr]], #48\n"
- "ld1 {v6.4s}, [%[rhs_ptr]], #16\n"
-
- "cmp %[k0], #0\n"
- "beq 0f\n"
-
- "mov x0, %[res_ptr]\n"
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v9.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v11.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v12.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v13.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v14.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v15.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v16.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v17.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v18.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v19.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v20.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v21.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v22.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v23.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v24.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v25.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v26.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v27.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v28.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v29.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v30.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "ld1 {v31.4s}, [x0]\n"
- "cbz %w[nk], 4f\n"
- "b 1f\n"
-
- "0:\n"
- "movi v8.4s, #0x0\n"
- "movi v9.4s, #0x0\n"
- "movi v10.4s, #0x0\n"
- "movi v11.4s, #0x0\n"
- "movi v12.4s, #0x0\n"
- "movi v13.4s, #0x0\n"
- "movi v14.4s, #0x0\n"
- "movi v15.4s, #0x0\n"
- "movi v16.4s, #0x0\n"
- "movi v17.4s, #0x0\n"
- "movi v18.4s, #0x0\n"
- "movi v19.4s, #0x0\n"
- "movi v20.4s, #0x0\n"
- "movi v21.4s, #0x0\n"
- "movi v22.4s, #0x0\n"
- "movi v23.4s, #0x0\n"
- "movi v24.4s, #0x0\n"
- "movi v25.4s, #0x0\n"
- "movi v26.4s, #0x0\n"
- "movi v27.4s, #0x0\n"
- "movi v28.4s, #0x0\n"
- "movi v29.4s, #0x0\n"
- "movi v30.4s, #0x0\n"
- "movi v31.4s, #0x0\n"
- "cbz %w[nk], 4f\n"
-
- "1:\n"
- "ld1 {v3.4s, v4.4s, v5.4s}, [%[lhs_ptr]], #48\n"
- "fmla v8.4s, v6.4s, v0.s[0]\n"
- "fmla v9.4s, v6.4s, v0.s[1]\n"
- "fmla v10.4s, v6.4s, v0.s[2]\n"
- "fmla v11.4s, v6.4s, v0.s[3]\n"
- "fmla v12.4s, v6.4s, v1.s[0]\n"
- "fmla v13.4s, v6.4s, v1.s[1]\n"
- "ld1 {v7.4s}, [%[rhs_ptr]], #16\n"
- "fmla v14.4s, v6.4s, v1.s[2]\n"
- "fmla v15.4s, v6.4s, v1.s[3]\n"
- "fmla v16.4s, v6.4s, v2.s[0]\n"
- "fmla v17.4s, v6.4s, v2.s[1]\n"
- "fmla v18.4s, v6.4s, v2.s[2]\n"
- "fmla v19.4s, v6.4s, v2.s[3]\n"
- "ld1 {v0.4s, v1.4s, v2.4s}, [%[lhs_ptr]], #48\n"
- "fmla v20.4s, v6.4s, v3.s[0]\n"
- "fmla v21.4s, v6.4s, v3.s[1]\n"
- "fmla v22.4s, v6.4s, v3.s[2]\n"
- "fmla v23.4s, v6.4s, v3.s[3]\n"
- "fmla v24.4s, v6.4s, v4.s[0]\n"
- "fmla v25.4s, v6.4s, v4.s[1]\n"
- "fmla v26.4s, v6.4s, v4.s[2]\n"
- "fmla v27.4s, v6.4s, v4.s[3]\n"
- "fmla v28.4s, v6.4s, v5.s[0]\n"
- "fmla v29.4s, v6.4s, v5.s[1]\n"
- "fmla v30.4s, v6.4s, v5.s[2]\n"
- "fmla v31.4s, v6.4s, v5.s[3]\n"
-
- "ld1 {v3.4s, v4.4s, v5.4s}, [%[lhs_ptr]], #48\n"
- "fmla v8.4s, v7.4s, v0.s[0]\n"
- "fmla v9.4s, v7.4s, v0.s[1]\n"
- "fmla v10.4s, v7.4s, v0.s[2]\n"
- "fmla v11.4s, v7.4s, v0.s[3]\n"
- "fmla v12.4s, v7.4s, v1.s[0]\n"
- "fmla v13.4s, v7.4s, v1.s[1]\n"
- "ld1 {v6.4s}, [%[rhs_ptr]], #16\n"
- "fmla v14.4s, v7.4s, v1.s[2]\n"
- "fmla v15.4s, v7.4s, v1.s[3]\n"
- "fmla v16.4s, v7.4s, v2.s[0]\n"
- "fmla v17.4s, v7.4s, v2.s[1]\n"
- "fmla v18.4s, v7.4s, v2.s[2]\n"
- "fmla v19.4s, v7.4s, v2.s[3]\n"
- "ld1 {v0.4s, v1.4s, v2.4s}, [%[lhs_ptr]], #48\n"
- "fmla v20.4s, v7.4s, v3.s[0]\n"
- "fmla v21.4s, v7.4s, v3.s[1]\n"
- "fmla v22.4s, v7.4s, v3.s[2]\n"
- "fmla v23.4s, v7.4s, v3.s[3]\n"
- "fmla v24.4s, v7.4s, v4.s[0]\n"
- "fmla v25.4s, v7.4s, v4.s[1]\n"
- "fmla v26.4s, v7.4s, v4.s[2]\n"
- "fmla v27.4s, v7.4s, v4.s[3]\n"
- "fmla v28.4s, v7.4s, v5.s[0]\n"
- "fmla v29.4s, v7.4s, v5.s[1]\n"
- "subs %w[nk], %w[nk], #1\n"
- "fmla v30.4s, v7.4s, v5.s[2]\n"
- "fmla v31.4s, v7.4s, v5.s[3]\n"
- "bne 1b\n"
-
- "4:\n"
- "mov x0, %[res_ptr]\n"
- "cbnz %[oddk], 2f\n"
-
- "ld1 {v3.4s, v4.4s, v5.4s}, [%[lhs_ptr]], #48\n"
- "fmla v8.4s, v6.4s, v0.s[0]\n"
- "fmla v9.4s, v6.4s, v0.s[1]\n"
- "fmla v10.4s, v6.4s, v0.s[2]\n"
- "fmla v11.4s, v6.4s, v0.s[3]\n"
- "fmla v12.4s, v6.4s, v1.s[0]\n"
- "fmla v13.4s, v6.4s, v1.s[1]\n"
- "ld1 {v7.4s}, [%[rhs_ptr]], #16\n"
- "fmla v14.4s, v6.4s, v1.s[2]\n"
- "fmla v15.4s, v6.4s, v1.s[3]\n"
- "fmla v16.4s, v6.4s, v2.s[0]\n"
- "fmla v17.4s, v6.4s, v2.s[1]\n"
- "fmla v18.4s, v6.4s, v2.s[2]\n"
- "fmla v19.4s, v6.4s, v2.s[3]\n"
- "ld1 {v0.4s, v1.4s, v2.4s}, [%[lhs_ptr]], #48\n"
- "fmla v20.4s, v6.4s, v3.s[0]\n"
- "fmla v21.4s, v6.4s, v3.s[1]\n"
- "fmla v22.4s, v6.4s, v3.s[2]\n"
- "fmla v23.4s, v6.4s, v3.s[3]\n"
- "fmla v24.4s, v6.4s, v4.s[0]\n"
- "fmla v25.4s, v6.4s, v4.s[1]\n"
- "fmla v26.4s, v6.4s, v4.s[2]\n"
- "fmla v27.4s, v6.4s, v4.s[3]\n"
- "fmla v28.4s, v6.4s, v5.s[0]\n"
- "fmla v29.4s, v6.4s, v5.s[1]\n"
- "fmla v30.4s, v6.4s, v5.s[2]\n"
- "fmla v31.4s, v6.4s, v5.s[3]\n"
-
- "ld1 {v3.4s, v4.4s, v5.4s}, [%[lhs_ptr]], #48\n"
- "fmla v8.4s, v7.4s, v0.s[0]\n"
- "fmla v9.4s, v7.4s, v0.s[1]\n"
- "st1 {v8.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v10.4s, v7.4s, v0.s[2]\n"
- "st1 {v9.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v11.4s, v7.4s, v0.s[3]\n"
- "st1 {v10.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v12.4s, v7.4s, v1.s[0]\n"
- "st1 {v11.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v13.4s, v7.4s, v1.s[1]\n"
- "st1 {v12.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v14.4s, v7.4s, v1.s[2]\n"
- "st1 {v13.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v15.4s, v7.4s, v1.s[3]\n"
- "st1 {v14.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v16.4s, v7.4s, v2.s[0]\n"
- "st1 {v15.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v17.4s, v7.4s, v2.s[1]\n"
- "st1 {v16.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v18.4s, v7.4s, v2.s[2]\n"
- "st1 {v17.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v19.4s, v7.4s, v2.s[3]\n"
- "st1 {v18.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v20.4s, v7.4s, v3.s[0]\n"
- "st1 {v19.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v21.4s, v7.4s, v3.s[1]\n"
- "st1 {v20.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v22.4s, v7.4s, v3.s[2]\n"
- "st1 {v21.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v23.4s, v7.4s, v3.s[3]\n"
- "st1 {v22.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v24.4s, v7.4s, v4.s[0]\n"
- "st1 {v23.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v25.4s, v7.4s, v4.s[1]\n"
- "st1 {v24.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v26.4s, v7.4s, v4.s[2]\n"
- "st1 {v25.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v27.4s, v7.4s, v4.s[3]\n"
- "st1 {v26.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v28.4s, v7.4s, v5.s[0]\n"
- "st1 {v27.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v29.4s, v7.4s, v5.s[1]\n"
- "st1 {v28.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v30.4s, v7.4s, v5.s[2]\n"
- "st1 {v29.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v31.4s, v7.4s, v5.s[3]\n"
- "st1 {v30.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "b 3f\n"
-
- "2:\n"
- "ld1 {v3.4s, v4.4s, v5.4s}, [%[lhs_ptr]], #48\n"
- "fmla v8.4s, v6.4s, v0.s[0]\n"
- "fmla v9.4s, v6.4s, v0.s[1]\n"
- "st1 {v8.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v10.4s, v6.4s, v0.s[2]\n"
- "st1 {v9.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v11.4s, v6.4s, v0.s[3]\n"
- "st1 {v10.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v12.4s, v6.4s, v1.s[0]\n"
- "st1 {v11.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v13.4s, v6.4s, v1.s[1]\n"
- "st1 {v12.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v14.4s, v6.4s, v1.s[2]\n"
- "st1 {v13.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v15.4s, v6.4s, v1.s[3]\n"
- "st1 {v14.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v16.4s, v6.4s, v2.s[0]\n"
- "st1 {v15.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v17.4s, v6.4s, v2.s[1]\n"
- "st1 {v16.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v18.4s, v6.4s, v2.s[2]\n"
- "st1 {v17.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v19.4s, v6.4s, v2.s[3]\n"
- "st1 {v18.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v20.4s, v6.4s, v3.s[0]\n"
- "st1 {v19.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v21.4s, v6.4s, v3.s[1]\n"
- "st1 {v20.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v22.4s, v6.4s, v3.s[2]\n"
- "st1 {v21.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v23.4s, v6.4s, v3.s[3]\n"
- "st1 {v22.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v24.4s, v6.4s, v4.s[0]\n"
- "st1 {v23.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v25.4s, v6.4s, v4.s[1]\n"
- "st1 {v24.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v26.4s, v6.4s, v4.s[2]\n"
- "st1 {v25.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v27.4s, v6.4s, v4.s[3]\n"
- "st1 {v26.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v28.4s, v6.4s, v5.s[0]\n"
- "st1 {v27.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v29.4s, v6.4s, v5.s[1]\n"
- "st1 {v28.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v30.4s, v6.4s, v5.s[2]\n"
- "st1 {v29.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "fmla v31.4s, v6.4s, v5.s[3]\n"
- "st1 {v30.4s}, [x0]\n"
- "add x0, x0, %[nstride]\n"
- "3:\n"
- "st1 {v31.4s}, [x0]\n"
- : [lhs_ptr] "+r"(lhs_ptr), [rhs_ptr] "+r"(rhs_ptr), [res_ptr] "+r"(res_ptr),
- [nk] "+r"(nk)
- : [oddk] "r"(oddk), [k0] "r"(k0), [nstride] "r"(nstride)
- : "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11",
- "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22",
- "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31");
-}
-
-#else // __aarch64__
-static void sgemm_rowmajor_micro_kernel_6x8(const float *lhs_ptr, const float *rhs_ptr,
- float *res_ptr, const int k, const int k0,
- const int stride)
-{
- int nk = k >> 2;
- int rk = k & 3;
-
- const int nstride = stride << 2;
-
- if (rk == 0)
- {
- nk--;
- rk = 4;
- }
-
- __asm __volatile("vld1.32 {d0-d1}, [%[lhs_ptr]]!\n"
- "vld1.32 {d4-d5}, [%[rhs_ptr]]!\n"
-
- "cmp %[k0], #0\n"
- "beq 0f\n"
-
- "mov r0, %[res_ptr]\n"
-
- "vld1.f32 {d8-d11}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vld1.f32 {d12-d15}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vld1.f32 {d16-d19}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vld1.f32 {d20-d23}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vld1.f32 {d24-d27}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vld1.f32 {d28-d31}, [r0]\n"
- "b 1f\n"
-
- "0:\n"
- "vmov.i32 q4, #0\n"
- "vmov.i32 q5, #0\n"
- "vmov.i32 q6, #0\n"
- "pld [%[lhs_ptr], #48]\n"
- "vmov.i32 q7, #0\n"
- "pld [%[rhs_ptr], #48]\n"
- "vmov.i32 q8, #0\n"
- "pld [%[lhs_ptr], #112]\n"
- "vmov.i32 q9, #0\n"
- "pld [%[rhs_ptr], #112]\n"
- "vmov.i32 q10, #0\n"
- "vmov.i32 q11, #0\n"
- "vmov.i32 q12, #0\n"
- "vmov.i32 q13, #0\n"
- "pld [%[lhs_ptr], #176]\n"
- "vmov.i32 q14, #0\n"
- "pld [%[rhs_ptr], #176]\n"
- "vmov.i32 q15, #0\n"
-
- "1:\n"
- "cmp %[nk], #0\n"
- "beq 6f\n"
- "vmla.f32 q4, q2, d0[0]\n"
- "vld1.32 {d2-d3}, [%[lhs_ptr]]!\n"
- "vmla.f32 q6, q2, d0[1]\n"
- "vmla.f32 q8, q2, d1[0]\n"
- "vld1.32 {d6-d7}, [%[rhs_ptr]]!\n"
- "vmla.f32 q10, q2, d1[1]\n"
- "vmla.f32 q12, q2, d2[0]\n"
- "vmla.f32 q14, q2, d2[1]\n"
- "vld1.32 {d4-d5}, [%[rhs_ptr]]!\n"
-
- "vmla.f32 q5, q3, d0[0]\n"
- "vmla.f32 q7, q3, d0[1]\n"
- "vmla.f32 q9, q3, d1[0]\n"
- "vmla.f32 q11, q3, d1[1]\n"
- "vld1.32 {d0-d1}, [%[lhs_ptr]]!\n"
- "vmla.f32 q13, q3, d2[0]\n"
- "vmla.f32 q15, q3, d2[1]\n"
- "vld1.32 {d6-d7}, [%[rhs_ptr]]!\n"
-
- "vmla.f32 q4, q2, d3[0]\n"
- "subs %[nk], %[nk], #1\n"
- "vmla.f32 q6, q2, d3[1]\n"
- "pld [%[lhs_ptr], #208]\n"
- "vmla.f32 q8, q2, d0[0]\n"
- "vmla.f32 q10, q2, d0[1]\n"
- "pld [%[rhs_ptr], #192]\n"
- "vmla.f32 q12, q2, d1[0]\n"
- "vmla.f32 q14, q2, d1[1]\n"
- "vld1.32 {d4-d5}, [%[rhs_ptr]]!\n"
-
- "vmla.f32 q5, q3, d3[0]\n"
- "vmla.f32 q7, q3, d3[1]\n"
- "vld1.32 {d2-d3}, [%[lhs_ptr]]!\n"
- "vmla.f32 q9, q3, d0[0]\n"
- "vmla.f32 q11, q3, d0[1]\n"
- "vmla.f32 q13, q3, d1[0]\n"
- "vmla.f32 q15, q3, d1[1]\n"
- "vld1.32 {d0-d1}, [%[lhs_ptr]]!\n"
-
- "vmla.f32 q4, q2, d2[0]\n"
- "vmla.f32 q6, q2, d2[1]\n"
- "vld1.32 {d6-d7}, [%[rhs_ptr]]!\n"
- "vmla.f32 q8, q2, d3[0]\n"
- "vmla.f32 q10, q2, d3[1]\n"
- "pld [%[lhs_ptr], #240]\n"
- "vmla.f32 q12, q2, d0[0]\n"
- "vmla.f32 q14, q2, d0[1]\n"
- "vld1.32 {d4-d5}, [%[rhs_ptr]]!\n"
-
- "vmla.f32 q5, q3, d2[0]\n"
- "vmla.f32 q7, q3, d2[1]\n"
- "pld [%[rhs_ptr], #208]\n"
- "vmla.f32 q9, q3, d3[0]\n"
- "vmla.f32 q11, q3, d3[1]\n"
- "vld1.32 {d2-d3}, [%[lhs_ptr]]!\n"
- "vmla.f32 q13, q3, d0[0]\n"
- "vmla.f32 q15, q3, d0[1]\n"
- "vld1.32 {d6-d7}, [%[rhs_ptr]]!\n"
-
- "vmla.f32 q4, q2, d1[0]\n"
- "vmla.f32 q6, q2, d1[1]\n"
- "vmla.f32 q8, q2, d2[0]\n"
- "vmla.f32 q10, q2, d2[1]\n"
- "vmla.f32 q12, q2, d3[0]\n"
- "vmla.f32 q14, q2, d3[1]\n"
- "vld1.32 {d4-d5}, [%[rhs_ptr]]!\n"
-
- "vmla.f32 q5, q3, d1[0]\n"
- "vmla.f32 q7, q3, d1[1]\n"
- "vld1.32 {d0-d1}, [%[lhs_ptr]]!\n"
- "vmla.f32 q9, q3, d2[0]\n"
- "vmla.f32 q11, q3, d2[1]\n"
- "vmla.f32 q13, q3, d3[0]\n"
- "vmla.f32 q15, q3, d3[1]\n"
- "bne 1b\n"
-
- "6:\n"
- "mov r0, %[res_ptr]\n"
- "subs %[rk], %[rk], #1\n"
- "beq 3f\n"
-
- "vmla.f32 q4, q2, d0[0]\n"
- "vld1.32 {d2-d3}, [%[lhs_ptr]]!\n"
- "vmla.f32 q6, q2, d0[1]\n"
- "vmla.f32 q8, q2, d1[0]\n"
- "vld1.32 {d6-d7}, [%[rhs_ptr]]!\n"
- "vmla.f32 q10, q2, d1[1]\n"
- "vmla.f32 q12, q2, d2[0]\n"
- "subs %[rk], %[rk], #1\n"
- "vmla.f32 q14, q2, d2[1]\n"
- "vld1.32 {d4-d5}, [%[rhs_ptr]]!\n"
-
- "vmla.f32 q5, q3, d0[0]\n"
- "vmla.f32 q7, q3, d0[1]\n"
- "vmla.f32 q9, q3, d1[0]\n"
- "vmla.f32 q11, q3, d1[1]\n"
- "vld1.32 {d0-d1}, [%[lhs_ptr]]!\n"
- "vmla.f32 q13, q3, d2[0]\n"
- "vmla.f32 q15, q3, d2[1]\n"
- "vld1.32 {d6-d7}, [%[rhs_ptr]]!\n"
- "beq 4f\n"
-
- "vmla.f32 q4, q2, d3[0]\n"
- "vmla.f32 q6, q2, d3[1]\n"
- "subs %[rk], %[rk], #1\n"
- "vmla.f32 q8, q2, d0[0]\n"
- "vmla.f32 q10, q2, d0[1]\n"
- "vmla.f32 q12, q2, d1[0]\n"
- "vmla.f32 q14, q2, d1[1]\n"
- "vld1.32 {d4-d5}, [%[rhs_ptr]]!\n"
-
- "vmla.f32 q5, q3, d3[0]\n"
- "vmla.f32 q7, q3, d3[1]\n"
- "vld1.32 {d2-d3}, [%[lhs_ptr]]!\n"
- "vmla.f32 q9, q3, d0[0]\n"
- "vmla.f32 q11, q3, d0[1]\n"
- "vmla.f32 q13, q3, d1[0]\n"
- "vmla.f32 q15, q3, d1[1]\n"
- "vld1.32 {d6-d7}, [%[rhs_ptr]]!\n"
- "beq 5f\n"
-
- "vld1.32 {d0-d1}, [%[lhs_ptr]]!\n"
- "vmla.f32 q4, q2, d2[0]\n"
- "vmla.f32 q6, q2, d2[1]\n"
- "vmla.f32 q8, q2, d3[0]\n"
- "vmla.f32 q10, q2, d3[1]\n"
- "vmla.f32 q12, q2, d0[0]\n"
- "vmla.f32 q14, q2, d0[1]\n"
- "vld1.32 {d4-d5}, [%[rhs_ptr]]!\n"
-
- "vmla.f32 q5, q3, d2[0]\n"
- "vmla.f32 q7, q3, d2[1]\n"
- "vmla.f32 q9, q3, d3[0]\n"
- "vmla.f32 q11, q3, d3[1]\n"
- "vld1.32 {d2-d3}, [%[lhs_ptr]]!\n"
- "vmla.f32 q13, q3, d0[0]\n"
- "vmla.f32 q15, q3, d0[1]\n"
- "vld1.32 {d6-d7}, [%[rhs_ptr]]!\n"
-
- "vmla.f32 q4, q2, d1[0]\n"
- "vmla.f32 q5, q3, d1[0]\n"
- "vst1.32 {d8-d11}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q6, q2, d1[1]\n"
- "vmla.f32 q7, q3, d1[1]\n"
- "vst1.32 {d12-d15}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q8, q2, d2[0]\n"
- "vmla.f32 q9, q3, d2[0]\n"
- "vst1.32 {d16-d19}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q10, q2, d2[1]\n"
- "vmla.f32 q11, q3, d2[1]\n"
- "vst1.32 {d20-d23}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q12, q2, d3[0]\n"
- "vmla.f32 q13, q3, d3[0]\n"
- "vst1.32 {d24-d27}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q14, q2, d3[1]\n"
- "vmla.f32 q15, q3, d3[1]\n"
- "b 2f\n"
-
- "3:\n"
- "vld1.32 {d6-d7}, [%[rhs_ptr]]!\n"
- "vmla.f32 q4, q2, d0[0]\n"
- "vmla.f32 q5, q3, d0[0]\n"
- "vst1.32 {d8-d11}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q6, q2, d0[1]\n"
- "vmla.f32 q7, q3, d0[1]\n"
- "vst1.32 {d12-d15}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q8, q2, d1[0]\n"
- "vld1.32 {d2}, [%[lhs_ptr]]!\n"
- "vmla.f32 q9, q3, d1[0]\n"
- "vst1.32 {d16-d19}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q10, q2, d1[1]\n"
- "vmla.f32 q11, q3, d1[1]\n"
- "vst1.32 {d20-d23}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q12, q2, d2[0]\n"
- "vmla.f32 q13, q3, d2[0]\n"
- "vst1.32 {d24-d27}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q14, q2, d2[1]\n"
- "vmla.f32 q15, q3, d2[1]\n"
- "b 2f\n"
-
- "4:\n"
- "vmla.f32 q4, q2, d3[0]\n"
- "vmla.f32 q5, q3, d3[0]\n"
- "vst1.32 {d8-d11}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q6, q2, d3[1]\n"
- "vmla.f32 q7, q3, d3[1]\n"
- "vst1.32 {d12-d15}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q8, q2, d0[0]\n"
- "vmla.f32 q9, q3, d0[0]\n"
- "vst1.32 {d16-d19}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q10, q2, d0[1]\n"
- "vmla.f32 q11, q3, d0[1]\n"
- "vst1.32 {d20-d23}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q12, q2, d1[0]\n"
- "vmla.f32 q13, q3, d1[0]\n"
- "vst1.32 {d24-d27}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q14, q2, d1[1]\n"
- "vmla.f32 q15, q3, d1[1]\n"
- "b 2f\n"
-
- "5:\n"
- "vld1.32 {d0}, [%[lhs_ptr]]!\n"
- "vmla.f32 q4, q2, d2[0]\n"
- "vmla.f32 q5, q3, d2[0]\n"
- "vst1.32 {d8-d11}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q6, q2, d2[1]\n"
- "vmla.f32 q7, q3, d2[1]\n"
- "vst1.32 {d12-d15}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q8, q2, d3[0]\n"
- "vmla.f32 q9, q3, d3[0]\n"
- "vst1.32 {d16-d19}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q10, q2, d3[1]\n"
- "vmla.f32 q11, q3, d3[1]\n"
- "vst1.32 {d20-d23}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q12, q2, d0[0]\n"
- "vmla.f32 q13, q3, d0[0]\n"
- "vst1.32 {d24-d27}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q14, q2, d0[1]\n"
- "vmla.f32 q15, q3, d0[1]\n"
- "2:\n"
- "vst1.32 {d28-d31}, [r0]\n"
- : [lhs_ptr] "+r"(lhs_ptr), [rhs_ptr] "+r"(rhs_ptr), [res_ptr] "+r"(res_ptr),
- [nk] "+r"(nk), [rk] "+r"(rk)
- : [k0] "r"(k0), [nstride] "r"(nstride)
- : "r0", "r1", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10",
- "q11", "q12", "q13", "q14", "q15", "cc");
-}
-
-static void sgemm_rowmajor_micro_kernel_4x12(const float *lhs_ptr, const float *rhs_ptr,
- float *res_ptr, const int k, const int k0,
- const int stride)
-{
- int rk = (k & 1);
- int nk = (k + 1) / 2;
-
- const int nstride = stride << 2;
-
- asm volatile("vld1.f32 {d0-d1}, [%[lhs_ptr]]!\n"
- "vld1.f32 {d4-d5}, [%[rhs_ptr]]!\n"
-
- "cmp %[k0], #0\n"
- "beq 0f\n"
-
- "mov r1, %[res_ptr]\n"
-
- "subs %[nk], %[nk], #1\n"
- "mov r0, r1\n"
- "vld1.f32 {d8-d9}, [r0]!\n"
- "add r1, %[nstride]\n"
- "vld1.f32 {d16-d17}, [r0]!\n"
- "vld1.f32 {d24-d25}, [r0]\n"
- "mov r0, r1\n"
- "vld1.f32 {d10-d11}, [r0]!\n"
- "add r1, %[nstride]\n"
- "vld1.f32 {d18-d19}, [r0]!\n"
- "vld1.f32 {d26-d27}, [r0]\n"
- "mov r0, r1\n"
- "vld1.f32 {d12-d13}, [r0]!\n"
- "add r1, %[nstride]\n"
- "vld1.f32 {d20-d21}, [r0]!\n"
- "vld1.f32 {d28-d29}, [r0]\n"
- "mov r0, r1\n"
- "vld1.f32 {d14-d15}, [r0]!\n"
- "vld1.f32 {d22-d23}, [r0]!\n"
- "vld1.f32 {d30-d31}, [r0]\n"
- "beq 2f\n"
-
- "b 1f\n"
-
- "0:\n"
- "veor q4, q4\n"
- "subs %[nk],%[nk], #1\n"
- "vmov.f32 q8, q4\n"
- "vmov.f32 q12, q4\n"
- "vmov.f32 q5, q4\n"
- "vmov.f32 q9, q4\n"
- "vmov.f32 q13, q4\n"
- "vmov.f32 q6, q4\n"
- "vmov.f32 q10, q4\n"
- "vmov.f32 q14, q4\n"
- "vmov.f32 q7, q4\n"
- "vmov.f32 q11, q4\n"
- "vmov.f32 q15, q4\n"
-
- "beq 2f\n"
-
- "1:\n"
- "vld1.f32 {d6-d7}, [%[rhs_ptr]]!\n"
- "vmla.f32 q4, q2, d0[0]\n"
- "vmla.f32 q5, q2, d0[1]\n"
- "vmla.f32 q6, q2, d1[0]\n"
- "vmla.f32 q7, q2, d1[1]\n"
- "vld1.f32 {d4-d5}, [%[rhs_ptr]]!\n"
- "vmla.f32 q8, q3, d0[0]\n"
- "vmla.f32 q9, q3, d0[1]\n"
- "vld1.f32 {d2-d3}, [%[lhs_ptr]]!\n"
- "vmla.f32 q10, q3, d1[0]\n"
- "vmla.f32 q11, q3, d1[1]\n"
- "vld1.f32 {d6-d7}, [%[rhs_ptr]]!\n"
- "vmla.f32 q12, q2, d0[0]\n"
- "vmla.f32 q13, q2, d0[1]\n"
- "pld [%[lhs_ptr], #208]\n"
- "vmla.f32 q14, q2, d1[0]\n"
- "pld [%[rhs_ptr], #192]\n"
- "vmla.f32 q15, q2, d1[1]\n"
-
- "vld1.f32 {d4-d5}, [%[rhs_ptr]]!\n"
- "vmla.f32 q4, q3, d2[0]\n"
- "vmla.f32 q5, q3, d2[1]\n"
- "vmla.f32 q6, q3, d3[0]\n"
- "vmla.f32 q7, q3, d3[1]\n"
- "vld1.f32 {d6-d7}, [%[rhs_ptr]]!\n"
- "vmla.f32 q8, q2, d2[0]\n"
- "vmla.f32 q9, q2, d2[1]\n"
- "vld1.f32 {d0-d1}, [%[lhs_ptr]]!\n"
- "vmla.f32 q10, q2, d3[0]\n"
- "vmla.f32 q11, q2, d3[1]\n"
- "vld1.f32 {d4-d5}, [%[rhs_ptr]]!\n"
- "vmla.f32 q12, q3, d2[0]\n"
- "vmla.f32 q13, q3, d2[1]\n"
- "subs %[nk],%[nk], #1\n"
- "pld [%[lhs_ptr], #240]\n"
- "vmla.f32 q14, q3, d3[0]\n"
- "pld [%[rhs_ptr], #208]\n"
- "vmla.f32 q15, q3, d3[1]\n"
- "bne 1b\n"
-
- "2:\n"
- "cmp %[rk], #1\n"
- "beq 3f\n"
-
- "vld1.f32 {d2-d3}, [%[lhs_ptr]]!\n"
- "vld1.f32 {d6-d7}, [%[rhs_ptr]]!\n"
- "vmla.f32 q4, q2, d0[0]\n"
- "vmla.f32 q5, q2, d0[1]\n"
- "vmla.f32 q6, q2, d1[0]\n"
- "vmla.f32 q7, q2, d1[1]\n"
- "vld1.f32 {d4-d5}, [%[rhs_ptr]]!\n"
- "vmla.f32 q8, q3, d0[0]\n"
- "vmla.f32 q9, q3, d0[1]\n"
- "vmla.f32 q10, q3, d1[0]\n"
- "vmla.f32 q11, q3, d1[1]\n"
- "vld1.f32 {d6-d7}, [%[rhs_ptr]]!\n"
- "vmla.f32 q12, q2, d0[0]\n"
- "vmla.f32 q13, q2, d0[1]\n"
- "vmla.f32 q14, q2, d1[0]\n"
- "vmla.f32 q15, q2, d1[1]\n"
-
- "vld1.f32 {d4-d5}, [%[rhs_ptr]]!\n"
- "vld1.f32 {d0-d1}, [%[rhs_ptr]]!\n"
- "mov r1, %[res_ptr]\n"
- "mov r0, r1\n"
- "vmla.f32 q4, q3, d2[0]\n"
- "vmla.f32 q8, q2, d2[0]\n"
- "vmla.f32 q12, q0, d2[0]\n"
- "vst1.f32 {d8-d9}, [r0]!\n"
- "add r1, %[nstride]\n"
- "vmla.f32 q5, q3, d2[1]\n"
- "vst1.f32 {d16-d17}, [r0]!\n"
- "vmla.f32 q9, q2, d2[1]\n"
- "vst1.f32 {d24-d25}, [r0]\n"
- "mov r0, r1\n"
- "vmla.f32 q13, q0, d2[1]\n"
- "vst1.f32 {d10-d11}, [r0]!\n"
- "vmla.f32 q6, q3, d3[0]\n"
- "add r1, %[nstride]\n"
- "vst1.f32 {d18-d19}, [r0]!\n"
- "vmla.f32 q10, q2, d3[0]\n"
- "vst1.f32 {d26-d27}, [r0]\n"
- "mov r0, r1\n"
- "vmla.f32 q14, q0, d3[0]\n"
- "vst1.f32 {d12-d13}, [r0]!\n"
- "add r1, %[nstride]\n"
- "vmla.f32 q7, q3, d3[1]\n"
- "vst1.f32 {d20-d21}, [r0]!\n"
- "vmla.f32 q11, q2, d3[1]\n"
- "vst1.f32 {d28-d29}, [r0]\n"
- "mov r0, r1\n"
- "vmla.f32 q15, q0, d3[1]\n"
- "b 4f\n"
-
- "3:\n"
- "vld1.f32 {d6-d7}, [%[rhs_ptr]]!\n"
- "vld1.f32 {d2-d3}, [%[rhs_ptr]]!\n"
- "mov r1, %[res_ptr]\n"
- "mov r0, r1\n"
- "vmla.f32 q4, q2, d0[0]\n"
- "vmla.f32 q8, q3, d0[0]\n"
- "vmla.f32 q12, q1, d0[0]\n"
- "vst1.f32 {d8-d9}, [r0]!\n"
- "add r1, %[nstride]\n"
- "vmla.f32 q5, q2, d0[1]\n"
- "vst1.f32 {d16-d17}, [r0]!\n"
- "vmla.f32 q9, q3, d0[1]\n"
- "vst1.f32 {d24-d25}, [r0]\n"
- "mov r0, r1\n"
- "vmla.f32 q13, q1, d0[1]\n"
- "vst1.f32 {d10-d11}, [r0]!\n"
- "vmla.f32 q6, q2, d1[0]\n"
- "add r1, %[nstride]\n"
- "vst1.f32 {d18-d19}, [r0]!\n"
- "vmla.f32 q10, q3, d1[0]\n"
- "vst1.f32 {d26-d27}, [r0]\n"
- "mov r0, r1\n"
- "vmla.f32 q14, q1, d1[0]\n"
- "vst1.f32 {d12-d13}, [r0]!\n"
- "add r1, %[nstride]\n"
- "vmla.f32 q7, q2, d1[1]\n"
- "vst1.f32 {d20-d21}, [r0]!\n"
- "vmla.f32 q11, q3, d1[1]\n"
- "vst1.f32 {d28-d29}, [r0]\n"
- "mov r0, r1\n"
- "vmla.f32 q15, q1, d1[1]\n"
-
- "4:\n"
- "vst1.f32 {d14-d15}, [r0]!\n"
- "vst1.f32 {d22-d23}, [r0]!\n"
- "vst1.f32 {d30-d31}, [r0]\n"
-
- : [lhs_ptr] "+r"(lhs_ptr), [rhs_ptr] "+r"(rhs_ptr), [res_ptr] "+r"(res_ptr),
- [nk] "+r"(nk), [rk] "+r"(rk)
- : [k0] "r"(k0), [nstride] "r"(nstride)
- : "r0", "r1", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10",
- "q11", "q12", "q13", "q14", "q15", "cc");
-}
-
-static void sgemm_rowmajor_micro_kernel_12x4(const float *lhs_ptr, const float *rhs_ptr,
- float *res_ptr, const int k, const int k0,
- const int stride)
-{
- int rk = (k & 1);
- int nk = (k + 1) / 2;
-
- const int nstride = stride << 2;
-
- asm volatile("vld1.f32 {d0-d1}, [%[lhs_ptr]]!\n"
- "vld1.f32 {d4-d5}, [%[rhs_ptr]]!\n"
-
- "cmp %[k0], #0\n"
- "beq 0f\n"
-
- "mov r0, %[res_ptr]\n"
- "subs %[nk], %[nk], #1\n"
- "vld1.f32 {d8-d9}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vld1.f32 {d10-d11}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vld1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vld1.f32 {d18-d19}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vld1.f32 {d24-d25}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vld1.f32 {d26-d27}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vld1.f32 {d28-d29}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vld1.f32 {d30-d31}, [r0]\n"
- "beq 2f\n"
- "b 1f\n"
-
- "0:\n"
- "veor q4, q4\n"
- "subs %[nk],%[nk], #1\n"
- "vmov.f32 q5, q4\n"
- "vmov.f32 q6, q4\n"
- "vmov.f32 q7, q4\n"
- "vmov.f32 q8, q4\n"
- "vmov.f32 q9, q4\n"
- "vmov.f32 q10, q4\n"
- "vmov.f32 q11, q4\n"
- "vmov.f32 q12, q4\n"
- "vmov.f32 q13, q4\n"
- "vmov.f32 q14, q4\n"
- "vmov.f32 q15, q4\n"
-
- "beq 2f\n"
-
- "1:\n"
- "vld1.f32 {d2-d3}, [%[lhs_ptr]]!\n"
- "vmla.f32 q4, q2, d0[0]\n"
- "vmla.f32 q5, q2, d0[1]\n"
- "vmla.f32 q6, q2, d1[0]\n"
- "vmla.f32 q7, q2, d1[1]\n"
- "vld1.f32 {d0-d1}, [%[lhs_ptr]]!\n"
- "vmla.f32 q8, q2, d2[0]\n"
- "vmla.f32 q9, q2, d2[1]\n"
- "vld1.f32 {d6-d7}, [%[rhs_ptr]]!\n"
- "vmla.f32 q10, q2, d3[0]\n"
- "vmla.f32 q11, q2, d3[1]\n"
- "vld1.f32 {d2-d3}, [%[lhs_ptr]]!\n"
- "vmla.f32 q12, q2, d0[0]\n"
- "vmla.f32 q13, q2, d0[1]\n"
- "pld [%[rhs_ptr], #208]\n"
- "vmla.f32 q14, q2, d1[0]\n"
- "pld [%[lhs_ptr], #192]\n"
- "vmla.f32 q15, q2, d1[1]\n"
-
- "vld1.f32 {d0-d1}, [%[lhs_ptr]]!\n"
- "vmla.f32 q4, q3, d2[0]\n"
- "vmla.f32 q5, q3, d2[1]\n"
- "vmla.f32 q6, q3, d3[0]\n"
- "vmla.f32 q7, q3, d3[1]\n"
- "vld1.f32 {d2-d3}, [%[lhs_ptr]]!\n"
- "vmla.f32 q8, q3, d0[0]\n"
- "vmla.f32 q9, q3, d0[1]\n"
- "vld1.f32 {d4-d5}, [%[rhs_ptr]]!\n"
- "vmla.f32 q10, q3, d1[0]\n"
- "vmla.f32 q11, q3, d1[1]\n"
- "vld1.f32 {d0-d1}, [%[lhs_ptr]]!\n"
- "vmla.f32 q12, q3, d2[0]\n"
- "vmla.f32 q13, q3, d2[1]\n"
- "subs %[nk],%[nk], #1\n"
- "pld [%[rhs_ptr], #240]\n"
- "vmla.f32 q14, q3, d3[0]\n"
- "pld [%[lhs_ptr], #208]\n"
- "vmla.f32 q15, q3, d3[1]\n"
- "bne 1b\n"
-
- "2:\n"
- "cmp %[rk], #1\n"
- "beq 3f\n"
-
- "vld1.f32 {d2-d3}, [%[lhs_ptr]]!\n"
- "vmla.f32 q4, q2, d0[0]\n"
- "vmla.f32 q5, q2, d0[1]\n"
- "vmla.f32 q6, q2, d1[0]\n"
- "vmla.f32 q7, q2, d1[1]\n"
- "vld1.f32 {d0-d1}, [%[lhs_ptr]]!\n"
- "vmla.f32 q8, q2, d2[0]\n"
- "vmla.f32 q9, q2, d2[1]\n"
- "vld1.f32 {d6-d7}, [%[rhs_ptr]]!\n"
- "vmla.f32 q10, q2, d3[0]\n"
- "vmla.f32 q11, q2, d3[1]\n"
- "vld1.f32 {d2-d3}, [%[lhs_ptr]]!\n"
- "vmla.f32 q12, q2, d0[0]\n"
- "vmla.f32 q13, q2, d0[1]\n"
- "vmla.f32 q14, q2, d1[0]\n"
- "vmla.f32 q15, q2, d1[1]\n"
-
- "mov r0, %[res_ptr]\n"
- "vld1.f32 {d0-d1}, [%[lhs_ptr]]!\n"
- "vmla.f32 q4, q3, d2[0]\n"
- "vst1.f32 {d8-d9}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q5, q3, d2[1]\n"
- "vst1.f32 {d10-d11}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q6, q3, d3[0]\n"
- "vst1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q7, q3, d3[1]\n"
- "vst1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vld1.f32 {d2-d3}, [%[lhs_ptr]]!\n"
- "vmla.f32 q8, q3, d0[0]\n"
- "vst1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q9, q3, d0[1]\n"
- "vst1.f32 {d18-d19}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q10, q3, d1[0]\n"
- "vst1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q11, q3, d1[1]\n"
- "vst1.f32 {d22-d23}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q12, q3, d2[0]\n"
- "vst1.f32 {d24-d25}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q13, q3, d2[1]\n"
- "vst1.f32 {d26-d27}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q14, q3, d3[0]\n"
- "vst1.f32 {d28-d29}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q15, q3, d3[1]\n"
- "b 4f\n"
-
- "3:\n"
- "mov r0, %[res_ptr]\n"
- "vld1.f32 {d2-d3}, [%[lhs_ptr]]!\n"
- "vmla.f32 q4, q2, d0[0]\n"
- "vst1.f32 {d8-d9}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q5, q2, d0[1]\n"
- "vst1.f32 {d10-d11}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q6, q2, d1[0]\n"
- "vst1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q7, q2, d1[1]\n"
- "vst1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vld1.f32 {d0-d1}, [%[lhs_ptr]]!\n"
- "vmla.f32 q8, q2, d2[0]\n"
- "vst1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q9, q2, d2[1]\n"
- "vst1.f32 {d18-d19}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q10, q2, d3[0]\n"
- "vst1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q11, q2, d3[1]\n"
- "vst1.f32 {d22-d23}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q12, q2, d0[0]\n"
- "vst1.f32 {d24-d25}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q13, q2, d0[1]\n"
- "vst1.f32 {d26-d27}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q14, q2, d1[0]\n"
- "vst1.f32 {d28-d29}, [r0]\n"
- "add r0, r0, %[nstride]\n"
- "vmla.f32 q15, q3, d1[1]\n"
-
- "4:\n"
- "vst1.f32 {d30-d31}, [r0]\n"
- : [lhs_ptr] "+r"(lhs_ptr), [rhs_ptr] "+r"(rhs_ptr), [res_ptr] "+r"(res_ptr),
- [nk] "+r"(nk), [rk] "+r"(rk)
- : [k0] "r"(k0), [nstride] "r"(nstride)
- : "r0", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11",
- "q12", "q13", "q14", "q15", "cc");
-}
-#endif // __aarch64__
-
-typedef void (*sgemm_rowmajoy_micro_kernel_func)(const float *, const float *, float *, const int,
- const int, const int);
-
-static sgemm_rowmajoy_micro_kernel_func sgemm_rowmajoy_micro_kernel_table[12][12] = {
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {
-
- 0, 0, 0, 0, 0,
-#if !__aarch64__
- sgemm_rowmajor_micro_kernel_4x12,
-#else // !__aarch64__
- 0,
-#endif // !__aarch64__
- 0, 0, 0, 0, 0,
-#if __aarch64__
- sgemm_rowmajor_micro_kernel_4x24
-#else // __aarch64__
- 0
-#endif // __aarch64__
- },
- {0, 0, 0,
-#if !__aarch64__
- sgemm_rowmajor_micro_kernel_6x8,
-#else // !__aarch64__
- 0,
-#endif // !__aarch64__
- 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0,
-#if __aarch64__
- sgemm_rowmajor_micro_kernel_8x12,
-#else // __aarch64__
- 0,
-#endif // __aarch64__
- 0, 0, 0, 0, 0, 0
-
- },
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-
- },
- {0,
-#if !__aarch64__
- sgemm_rowmajor_micro_kernel_12x4,
-#else // !__aarch64__
- 0,
-#endif // !__aarch64__
- 0,
-#if __aarch64__
- sgemm_rowmajor_micro_kernel_12x8,
-#else // __aarch64__
- 0,
-#endif // __aarch64__
- 0, 0, 0, 0, 0, 0, 0, 0
-
- },
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {
-
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-
- },
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-
- },
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-
- },
- {0,
-#if __aarch64__
- sgemm_rowmajor_micro_kernel_24x4,
-#else // __aarch64__
- 0,
-#endif // __aarch64__
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-
- },
-
-};
-
-void _sgemm_rowmajor_macro_kernel_divnm(const int mr, const int nr, const int mb, const int nb,
- const int kb, const float *lhs_ptr, const float *rhs_ptr,
- float *res_ptr, const int k0, const int nstride,
- const int kstride)
-{
- const int nm = (mb + mr - 1) / mr;
- const int nn = (nb + nr - 1) / nr;
- const int rm = mb % mr;
- const int rn = nb % nr;
-
- sgemm_rowmajoy_micro_kernel_func sgemm_rowmajoy_micro_kernel =
- sgemm_rowmajoy_micro_kernel_table[mr / 2 - 1][nr / 2 - 1];
- if (!sgemm_rowmajoy_micro_kernel)
- return;
-
- for (int j = 0; j < nn; j++)
- {
- const int _nr = (j != nn - 1 || rn == 0) ? nr : rn;
- for (int i = 0; i < nm; i++)
- {
- const int _mr = (i != nm - 1 || rm == 0) ? mr : rm;
- if (_mr == mr && _nr == nr)
- {
- sgemm_rowmajoy_micro_kernel(&lhs_ptr[i * mr * kstride], &rhs_ptr[j * nr * kstride],
- &res_ptr[i * mr * nstride + j * nr], kb, k0, nstride);
- }
- else
- {
- float res_micro[mr * nr];
- float *res = &res_ptr[i * mr * nstride + j * nr];
-
- sgemm_rowmajoy_micro_kernel(&lhs_ptr[i * mr * kstride], &rhs_ptr[j * nr * kstride],
- res_micro, kb, 0, nr);
- if (k0 == 0)
- {
- for (int pi = 0; pi < _mr; pi++)
- {
- for (int pj = 0; pj < _nr; pj++)
- {
- res[pi * nstride + pj] = res_micro[pi * nr + pj];
- }
- }
- }
- else
- {
- for (int pi = 0; pi < _mr; pi++)
- {
- for (int pj = 0; pj < _nr; pj++)
- {
- res[pi * nstride + pj] += res_micro[pi * nr + pj];
- }
- }
- }
- }
- }
- }
-}
-
-void _sgemm_rowmajor_macro_kernel_divmn(const int mr, const int nr, const int mb, const int nb,
- const int kb, const float *lhs_ptr, const float *rhs_ptr,
- float *res_ptr, const int k0, const int nstride,
- const int kstride)
-{
- const int nm = (mb + mr - 1) / mr;
- const int nn = (nb + nr - 1) / nr;
- const int rm = mb % mr;
- const int rn = nb % nr;
-
- sgemm_rowmajoy_micro_kernel_func sgemm_rowmajoy_micro_kernel =
- sgemm_rowmajoy_micro_kernel_table[mr / 2 - 1][nr / 2 - 1];
- if (!sgemm_rowmajoy_micro_kernel)
- return;
-
- for (int j = 0; j < nm; j++)
- {
- const int _mr = (j != nm - 1 || rm == 0) ? mr : rm;
- for (int i = 0; i < nn; i++)
- {
- const int _nr = (i != nn - 1 || rn == 0) ? nr : rn;
- if (_mr == mr && _nr == nr)
- {
- sgemm_rowmajoy_micro_kernel(&lhs_ptr[j * mr * kstride], &rhs_ptr[i * nr * kstride],
- &res_ptr[j * mr * nstride + i * nr], kb, k0, nstride);
- }
- else
- {
- float res_micro[mr * nr];
- float *res = &res_ptr[j * mr * nstride + i * nr];
-
- sgemm_rowmajoy_micro_kernel(&lhs_ptr[j * mr * kstride], &rhs_ptr[i * nr * kstride],
- res_micro, kb, 0, nr);
- if (k0 == 0)
- {
- for (int pi = 0; pi < _mr; pi++)
- {
- for (int pj = 0; pj < _nr; pj++)
- {
- res[pi * nstride + pj] = res_micro[pi * nr + pj];
- }
- }
- }
- else
- {
- for (int pi = 0; pi < _mr; pi++)
- {
- for (int pj = 0; pj < _nr; pj++)
- {
- res[pi * nstride + pj] += res_micro[pi * nr + pj];
- }
- }
- }
- }
- }
- }
-}
-
-void _sgemm_colmajor_macro_kernel_divnm(const int mr, const int nr, const int mb, const int nb,
- const int kb, const float *lhs_ptr, const float *rhs_ptr,
- float *res_ptr, const int k0, const int mstride,
- const int kstride)
-{
- _sgemm_rowmajor_macro_kernel_divmn(nr, mr, nb, mb, kb, rhs_ptr, lhs_ptr, res_ptr, k0, mstride,
- kstride);
-}
-
-void _sgemm_colmajor_macro_kernel_divmn(const int mr, const int nr, const int mb, const int nb,
- const int kb, const float *lhs_ptr, const float *rhs_ptr,
- float *res_ptr, const int k0, const int mstride,
- const int kstride)
-{
- _sgemm_rowmajor_macro_kernel_divnm(nr, mr, nb, mb, kb, rhs_ptr, lhs_ptr, res_ptr, k0, mstride,
- kstride);
-}
-
-#if __aarch64__
-void _sparse_sgemm_kernel(const int nb, float lhs_data, const float *rhs_ptr, float *res_ptr)
-{
- int nn = nb >> 3;
- int rn = nb & 7;
-
- if (nn > 0)
- {
- asm volatile("mov x0, %[res_ptr]\n"
- "dup v0.2d, %[lhs_data]\n"
- "ld1 {v1.4s}, [%[rhs_ptr]], #16\n"
- "ld1 {v2.4s}, [x0], #16\n"
-
- "subs %[nn], %[nn], #1\n"
- "beq 2f\n"
-
- "1:\n"
- "ld1 {v4.4s}, [x0], #16\n"
- "ld1 {v3.4s}, [%[rhs_ptr]], #16\n"
-
- "fmla v2.4s, v1.4s, v0.s[0]\n"
- "st1 {v2.4s}, [%[res_ptr]], #16\n"
-
- "ld1 {v2.4s}, [x0], #16\n"
- "ld1 {v1.4s}, [%[rhs_ptr]], #16\n"
-
- "fmla v4.4s, v3.4s, v0.s[0]\n"
- "st1 {v4.4s}, [%[res_ptr]], #16\n"
-
- "subs %[nn], %[nn], #1\n"
- "bne 1b\n"
-
- "2:\n"
- "ld1 {v3.4s}, [%[rhs_ptr]], #16\n"
- "ld1 {v4.4s}, [x0], #16\n"
-
- "fmla v2.4s, v1.4s, v0.s[0]\n"
- "st1 {v2.4s}, [%[res_ptr]], #16\n"
-
- "fmla v4.4s, v3.4s, v0.s[0]\n"
- "st1 {v4.4s}, [%[res_ptr]], #16\n"
- : [rhs_ptr] "+r"(rhs_ptr), [res_ptr] "+r"(res_ptr), [nn] "+r"(nn)
- : [lhs_data] "r"(lhs_data)
- : "x0", "v0", "v1", "v2", "v3", "v4", "cc");
- }
- if (rn > 0)
- {
- int _nn = rn >> 2;
- int _rn = rn & 3;
-
- if (_nn > 0)
- {
- asm volatile("dup v0.2d, %[lhs_data]\n"
- "ld1 {v1.4s}, [%[rhs_ptr]], #16\n"
- "ld1 {v2.4s}, [%[res_ptr]]\n"
- "fmla v2.4s, v1.4s, v0.s[0]\n"
- "st1 {v2.4s}, [%[res_ptr]], #16\n"
- : [rhs_ptr] "+r"(rhs_ptr), [res_ptr] "+r"(res_ptr)
- : [lhs_data] "r"(lhs_data)
- : "x0", "x1", "x2", "cc");
- }
- if (_rn > 0)
- {
- for (int i = 0; i < _rn; i++)
- {
- res_ptr[i] += lhs_data * rhs_ptr[i];
- }
- }
- }
-}
-
-#else // __aarch64__
-void _sparse_sgemm_kernel(const int nb, float lhs_data, const float *rhs_ptr, float *res_ptr)
-{
- int nn = nb >> 3;
- int rn = nb & 7;
-
- if (nn > 0)
- {
- asm volatile("mov r0, %[res_ptr]\n"
- "vdup.32 d0, %[lhs_data]\n"
- "vld1.f32 {d2-d3}, [%[rhs_ptr]]!\n"
- "vld1.f32 {d4-d5}, [r0]!\n"
-
- "subs %[nn], %[nn], #1\n"
- "beq 2f\n"
-
- "1:\n"
- "vld1.f32 {d8-d9}, [r0]!\n"
- "vld1.f32 {d6-d7}, [%[rhs_ptr]]!\n"
-
- "vmla.f32 q2, q1, d0[0]\n"
- "vst1.f32 {d4-d5}, [%[res_ptr]]!\n"
-
- "vld1.f32 {d4-d5}, [r0]!\n"
- "vld1.f32 {d2-d3}, [%[rhs_ptr]]!\n"
-
- "vmla.f32 q4, q3, d0[0]\n"
- "vst1.f32 {d8-d9}, [%[res_ptr]]!\n"
-
- "subs %[nn], %[nn], #1\n"
- "bne 1b\n"
-
- "2:\n"
- "vld1.f32 {d6-d7}, [%[rhs_ptr]]!\n"
- "vld1.f32 {d8-d9}, [r0]!\n"
-
- "vmla.f32 q2, q1, d0[0]\n"
- "vst1.f32 {d4-d5}, [%[res_ptr]]!\n"
-
- "vmla.f32 q4, q3, d0[0]\n"
- "vst1.f32 {d8-d9}, [%[res_ptr]]!\n"
- : [rhs_ptr] "+r"(rhs_ptr), [res_ptr] "+r"(res_ptr), [nn] "+r"(nn)
- : [lhs_data] "r"(lhs_data)
- : "r0", "q0", "q1", "q2", "q3", "q4", "cc");
- }
- if (rn > 0)
- {
- int _nn = rn >> 2;
- int _rn = rn & 3;
-
- if (_nn > 0)
- {
- asm volatile("vdup.32 d0, %[lhs_data]\n"
- "vld1.f32 {d2-d3}, [%[rhs_ptr]]!\n"
- "vld1.f32 {d4-d5}, [%[res_ptr]]\n"
- "vmla.f32 q2, q1, d0[0]\n"
- "vst1.f32 {d4-d5}, [%[res_ptr]]!\n"
- : [rhs_ptr] "+r"(rhs_ptr), [res_ptr] "+r"(res_ptr)
- : [lhs_data] "r"(lhs_data)
- : "q0", "q1", "q2", "cc");
- }
- if (_rn > 0)
- {
- for (int i = 0; i < _rn; i++)
- {
- res_ptr[i] += lhs_data * rhs_ptr[i];
- }
- }
- }
-}
-#endif // __aarch64__
-
-} // namespace srcn
-} // namespace nnfw
diff --git a/compute/ncnn/src/srcn/sgemm_kernel.h b/compute/ncnn/src/srcn/sgemm_kernel.h
deleted file mode 100644
index 9e220bc33..000000000
--- a/compute/ncnn/src/srcn/sgemm_kernel.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_SRCN_SGEMM_KERNEL_H__
-#define __NNFW_SRCN_SGEMM_KERNEL_H__
-
-#include "ncnn/srcn/conv_type.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-void _sgemm_rowmajor_macro_kernel_divnm(const int mr, const int nr, const int mb, const int nb,
- const int kb, const float *lhs_ptr, const float *rhs_ptr,
- float *res_ptr, const int k0, const int nstride,
- const int kstride);
-
-void _sgemm_rowmajor_macro_kernel_divmn(const int mr, const int nr, const int mb, const int nb,
- const int kb, const float *lhs_ptr, const float *rhs_ptr,
- float *res_ptr, const int k0, const int nstride,
- const int kstride);
-
-void _sgemm_colmajor_macro_kernel_divnm(const int mr, const int nr, const int mb, const int nb,
- const int kb, const float *lhs_ptr, const float *rhs_ptr,
- float *res_ptr, const int k0, const int mstride,
- const int kstride);
-
-void _sgemm_colmajor_macro_kernel_divmn(const int mr, const int nr, const int mb, const int nb,
- const int kb, const float *lhs_ptr, const float *rhs_ptr,
- float *res_ptr, const int k0, const int mstride,
- const int kstride);
-
-void _sparse_sgemm_kernel(const int nb, float lhs_data, const float *rhs_ptr, float *res_ptr);
-
-} // namespace srcn
-} // namespace nnfw
-
-#endif // __NNFW_SRCN_SGEMM_KERNEL_H__
diff --git a/compute/ncnn/src/srcn/sgemm_pack.cc b/compute/ncnn/src/srcn/sgemm_pack.cc
deleted file mode 100644
index 8767f6c0a..000000000
--- a/compute/ncnn/src/srcn/sgemm_pack.cc
+++ /dev/null
@@ -1,2316 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <stdlib.h>
-#include <arm_neon.h>
-
-#include "ncnn/srcn/conv_type.h"
-#include "common.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-void _pack_rowmajor_notrans_lhs(const int mr, const int mb, const int kb, const int stride,
- const float *lhs_ptr, float *plhs_ptr)
-{
- const int nm = mb / mr;
- const int rm = mb % mr;
-
- switch (mr)
- {
-#if __aarch64__
- case 24:
- for (int i = 0; i < nm; i++)
- {
- int nk = kb >> 2;
- int rk = kb & 0x03;
-
- const float *lhs_temp = lhs_ptr;
- const int _stride = stride << 2;
-
- if (nk > 0)
- {
- asm volatile("0:\n"
- "mov x0, %[lhs_temp]\n"
-
- "ld1 {v4.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v5.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
-
- "zip1 v28.4s, v4.4s, v6.4s\n"
- "zip2 v30.4s, v4.4s, v6.4s\n"
- "zip1 v29.4s, v5.4s, v7.4s\n"
- "zip2 v31.4s, v5.4s, v7.4s\n"
- "zip1 v4.4s, v28.4s, v29.4s\n"
- "zip2 v5.4s, v28.4s, v29.4s\n"
- "zip1 v6.4s, v30.4s, v31.4s\n"
- "zip2 v7.4s, v30.4s, v31.4s\n"
-
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v9.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v11.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
-
- "zip1 v28.4s, v8.4s, v10.4s\n"
- "zip2 v30.4s, v8.4s, v10.4s\n"
- "zip1 v29.4s, v9.4s, v11.4s\n"
- "zip2 v31.4s, v9.4s, v11.4s\n"
- "zip1 v8.4s, v28.4s, v29.4s\n"
- "zip2 v9.4s, v28.4s, v29.4s\n"
- "zip1 v10.4s, v30.4s, v31.4s\n"
- "zip2 v11.4s, v30.4s, v31.4s\n"
-
- "ld1 {v12.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v13.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v14.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v15.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
-
- "zip1 v28.4s, v12.4s, v14.4s\n"
- "zip2 v30.4s, v12.4s, v14.4s\n"
- "zip1 v29.4s, v13.4s, v15.4s\n"
- "zip2 v31.4s, v13.4s, v15.4s\n"
- "zip1 v12.4s, v28.4s, v29.4s\n"
- "zip2 v13.4s, v28.4s, v29.4s\n"
- "zip1 v14.4s, v30.4s, v31.4s\n"
- "zip2 v15.4s, v30.4s, v31.4s\n"
-
- "ld1 {v16.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v17.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v18.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v19.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
-
- "zip1 v28.4s, v16.4s, v18.4s\n"
- "zip2 v30.4s, v16.4s, v18.4s\n"
- "zip1 v29.4s, v17.4s, v19.4s\n"
- "zip2 v31.4s, v17.4s, v19.4s\n"
- "zip1 v16.4s, v28.4s, v29.4s\n"
- "zip2 v17.4s, v28.4s, v29.4s\n"
- "zip1 v18.4s, v30.4s, v31.4s\n"
- "zip2 v19.4s, v30.4s, v31.4s\n"
-
- "ld1 {v20.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v21.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v22.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v23.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
-
- "zip1 v28.4s, v20.4s, v22.4s\n"
- "zip2 v30.4s, v20.4s, v22.4s\n"
- "zip1 v29.4s, v21.4s, v23.4s\n"
- "zip2 v31.4s, v21.4s, v23.4s\n"
- "zip1 v20.4s, v28.4s, v29.4s\n"
- "zip2 v21.4s, v28.4s, v29.4s\n"
- "zip1 v22.4s, v30.4s, v31.4s\n"
- "zip2 v23.4s, v30.4s, v31.4s\n"
-
- "ld1 {v24.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v25.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v26.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v27.4s}, [x0]\n"
-
- "zip1 v28.4s, v24.4s, v26.4s\n"
- "zip2 v30.4s, v24.4s, v26.4s\n"
- "zip1 v29.4s, v25.4s, v27.4s\n"
- "zip2 v31.4s, v25.4s, v27.4s\n"
- "zip1 v24.4s, v28.4s, v29.4s\n"
- "zip2 v25.4s, v28.4s, v29.4s\n"
- "zip1 v26.4s, v30.4s, v31.4s\n"
- "zip2 v27.4s, v30.4s, v31.4s\n"
-
- "st1 {v4.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v8.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v12.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v16.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v20.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v24.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v5.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v9.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v13.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v17.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v21.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v25.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v6.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v10.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v14.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v18.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v22.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v26.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v7.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v11.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v15.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v19.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v23.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v27.4s}, [%[plhs_ptr]], #16\n"
-
- "subs %[nk], %[nk], #1\n"
- "add %[lhs_temp], %[lhs_temp], #16\n"
- "bne 0b\n"
- : [lhs_temp] "+r"(lhs_temp), [plhs_ptr] "+r"(plhs_ptr), [nk] "+r"(nk)
- : [_stride] "r"(_stride)
- : "cc", "memory", "x0", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11",
- "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21",
- "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31");
- }
-
- for (int j = 0; j < rk; j++)
- {
- plhs_ptr[0] = lhs_temp[0];
- plhs_ptr[1] = lhs_temp[stride];
- plhs_ptr[2] = lhs_temp[stride << 1];
- plhs_ptr[3] = lhs_temp[3 * stride];
- plhs_ptr[4] = lhs_temp[stride << 2];
- plhs_ptr[5] = lhs_temp[5 * stride];
- plhs_ptr[6] = lhs_temp[6 * stride];
- plhs_ptr[7] = lhs_temp[7 * stride];
- plhs_ptr[8] = lhs_temp[stride << 3];
- plhs_ptr[9] = lhs_temp[9 * stride];
- plhs_ptr[10] = lhs_temp[10 * stride];
- plhs_ptr[11] = lhs_temp[11 * stride];
- plhs_ptr[12] = lhs_temp[0];
- plhs_ptr[13] = lhs_temp[13 * stride];
- plhs_ptr[14] = lhs_temp[14 * stride];
- plhs_ptr[15] = lhs_temp[15 * stride];
- plhs_ptr[16] = lhs_temp[stride << 4];
- plhs_ptr[17] = lhs_temp[17 * stride];
- plhs_ptr[18] = lhs_temp[18 * stride];
- plhs_ptr[19] = lhs_temp[19 * stride];
- plhs_ptr[20] = lhs_temp[20 * stride];
- plhs_ptr[21] = lhs_temp[21 * stride];
- plhs_ptr[22] = lhs_temp[22 * stride];
- plhs_ptr[23] = lhs_temp[23 * stride];
- plhs_ptr += mr;
- lhs_temp++;
- }
-
- lhs_ptr += mr * stride;
- }
- break;
- case 16:
- for (int i = 0; i < nm; i++)
- {
- int nk = kb >> 2;
- int rk = kb & 0x03;
-
- const float *lhs_temp = lhs_ptr;
- const int _stride = stride << 2;
-
- if (nk > 0)
- {
- asm volatile("0:\n"
- "mov x0, %[lhs_temp]\n"
-
- "ld1 {v4.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v5.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
-
- "zip1 v28.4s, v4.4s, v6.4s\n"
- "zip2 v30.4s, v4.4s, v6.4s\n"
- "zip1 v29.4s, v5.4s, v7.4s\n"
- "zip2 v31.4s, v5.4s, v7.4s\n"
- "zip1 v4.4s, v28.4s, v29.4s\n"
- "zip2 v5.4s, v28.4s, v29.4s\n"
- "zip1 v6.4s, v30.4s, v31.4s\n"
- "zip2 v7.4s, v30.4s, v31.4s\n"
-
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v9.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v11.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
-
- "zip1 v28.4s, v8.4s, v10.4s\n"
- "zip2 v30.4s, v8.4s, v10.4s\n"
- "zip1 v29.4s, v9.4s, v11.4s\n"
- "zip2 v31.4s, v9.4s, v11.4s\n"
- "zip1 v8.4s, v28.4s, v29.4s\n"
- "zip2 v9.4s, v28.4s, v29.4s\n"
- "zip1 v10.4s, v30.4s, v31.4s\n"
- "zip2 v11.4s, v30.4s, v31.4s\n"
-
- "ld1 {v12.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v13.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v14.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v15.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
-
- "zip1 v28.4s, v12.4s, v14.4s\n"
- "zip2 v30.4s, v12.4s, v14.4s\n"
- "zip1 v29.4s, v13.4s, v15.4s\n"
- "zip2 v31.4s, v13.4s, v15.4s\n"
- "zip1 v12.4s, v28.4s, v29.4s\n"
- "zip2 v13.4s, v28.4s, v29.4s\n"
- "zip1 v14.4s, v30.4s, v31.4s\n"
- "zip2 v15.4s, v30.4s, v31.4s\n"
-
- "ld1 {v16.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v17.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v18.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v19.4s}, [x0]\n"
-
- "zip1 v28.4s, v16.4s, v18.4s\n"
- "zip2 v30.4s, v16.4s, v18.4s\n"
- "zip1 v29.4s, v17.4s, v19.4s\n"
- "zip2 v31.4s, v17.4s, v19.4s\n"
- "zip1 v16.4s, v28.4s, v29.4s\n"
- "zip2 v17.4s, v28.4s, v29.4s\n"
- "zip1 v18.4s, v30.4s, v31.4s\n"
- "zip2 v19.4s, v30.4s, v31.4s\n"
-
- "st1 {v4.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v8.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v12.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v16.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v5.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v9.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v13.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v17.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v6.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v10.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v14.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v18.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v7.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v11.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v15.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v19.4s}, [%[plhs_ptr]], #16\n"
-
- "subs %[nk], %[nk], #1\n"
- "add %[lhs_temp], %[lhs_temp], #16\n"
- "bne 0b\n"
- : [lhs_temp] "+r"(lhs_temp), [plhs_ptr] "+r"(plhs_ptr), [nk] "+r"(nk)
- : [_stride] "r"(_stride)
- : "cc", "memory", "x0", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11",
- "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v28", "v29",
- "v30", "v31");
- }
-
- for (int j = 0; j < rk; j++)
- {
- plhs_ptr[0] = lhs_temp[0];
- plhs_ptr[1] = lhs_temp[stride];
- plhs_ptr[2] = lhs_temp[stride << 1];
- plhs_ptr[3] = lhs_temp[3 * stride];
- plhs_ptr[4] = lhs_temp[stride << 2];
- plhs_ptr[5] = lhs_temp[5 * stride];
- plhs_ptr[6] = lhs_temp[6 * stride];
- plhs_ptr[7] = lhs_temp[7 * stride];
- plhs_ptr[8] = lhs_temp[stride << 3];
- plhs_ptr[9] = lhs_temp[9 * stride];
- plhs_ptr[10] = lhs_temp[10 * stride];
- plhs_ptr[11] = lhs_temp[11 * stride];
- plhs_ptr[12] = lhs_temp[0];
- plhs_ptr[13] = lhs_temp[13 * stride];
- plhs_ptr[14] = lhs_temp[14 * stride];
- plhs_ptr[15] = lhs_temp[15 * stride];
- plhs_ptr += mr;
- lhs_temp++;
- }
-
- lhs_ptr += mr * stride;
- }
- break;
-#endif // __aarch64__
- case 12:
- for (int i = 0; i < nm; i++)
- {
- int nk = kb >> 2;
- int rk = kb & 0x03;
-
- const float *lhs_temp = lhs_ptr;
- const int _stride = stride << 2;
-
- if (nk > 0)
- {
-#if __aarch64__
- asm volatile("0:\n"
- "mov x0, %[lhs_temp]\n"
-
- "ld1 {v4.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v5.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
-
- "zip1 v28.4s, v4.4s, v6.4s\n"
- "zip2 v30.4s, v4.4s, v6.4s\n"
- "zip1 v29.4s, v5.4s, v7.4s\n"
- "zip2 v31.4s, v5.4s, v7.4s\n"
- "zip1 v4.4s, v28.4s, v29.4s\n"
- "zip2 v5.4s, v28.4s, v29.4s\n"
- "zip1 v6.4s, v30.4s, v31.4s\n"
- "zip2 v7.4s, v30.4s, v31.4s\n"
-
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v9.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v11.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
-
- "zip1 v28.4s, v8.4s, v10.4s\n"
- "zip2 v30.4s, v8.4s, v10.4s\n"
- "zip1 v29.4s, v9.4s, v11.4s\n"
- "zip2 v31.4s, v9.4s, v11.4s\n"
- "zip1 v8.4s, v28.4s, v29.4s\n"
- "zip2 v9.4s, v28.4s, v29.4s\n"
- "zip1 v10.4s, v30.4s, v31.4s\n"
- "zip2 v11.4s, v30.4s, v31.4s\n"
-
- "ld1 {v12.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v13.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v14.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v15.4s}, [x0]\n"
-
- "zip1 v28.4s, v12.4s, v14.4s\n"
- "zip2 v30.4s, v12.4s, v14.4s\n"
- "zip1 v29.4s, v13.4s, v15.4s\n"
- "zip2 v31.4s, v13.4s, v15.4s\n"
- "zip1 v12.4s, v28.4s, v29.4s\n"
- "zip2 v13.4s, v28.4s, v29.4s\n"
- "zip1 v14.4s, v30.4s, v31.4s\n"
- "zip2 v15.4s, v30.4s, v31.4s\n"
-
- "st1 {v4.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v8.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v12.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v5.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v9.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v13.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v6.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v10.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v14.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v7.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v11.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v15.4s}, [%[plhs_ptr]], #16\n"
-
- "subs %[nk], %[nk], #1\n"
- "add %[lhs_temp], %[lhs_temp], #16\n"
- "bne 0b\n"
- : [lhs_temp] "+r"(lhs_temp), [plhs_ptr] "+r"(plhs_ptr), [nk] "+r"(nk)
- : [_stride] "r"(_stride)
- : "cc", "memory", "x0", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11",
- "v12", "v13", "v14", "v15", "v28", "v29", "v30", "v31");
-#else // __aarch64__
- asm volatile("0:\n"
- "mov r0, %[lhs_temp]\n"
-
- "vld1.f32 {d8-d9}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d10-d11}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[_stride]\n"
-
- "vzip.32 q4, q6\n"
- "vzip.32 q5, q7\n"
- "vzip.32 q4, q5\n"
- "vzip.32 q6, q7\n"
-
- "vld1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d18-d19}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
- "add r0, r0, %[_stride]\n"
-
- "vzip.32 q8, q10\n"
- "vzip.32 q9, q11\n"
- "vzip.32 q8, q9\n"
- "vzip.32 q10, q11\n"
-
- "vld1.f32 {d24-d25}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d26-d27}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d28-d29}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d30-d31}, [r0]\n"
-
- "vzip.32 q12, q14\n"
- "vzip.32 q13, q15\n"
- "vzip.32 q12, q13\n"
- "vzip.32 q14, q15\n"
-
- "vst1.f32 {d8-d9}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d16-d17}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d24-d25}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d10-d11}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d18-d19}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d26-d27}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d12-d13}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d20-d21}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d28-d29}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d14-d15}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d22-d23}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d30-d31}, [%[plhs_ptr]]!\n"
-
- "subs %[nk], %[nk], #1\n"
- "add %[lhs_temp], %[lhs_temp], #16\n"
- "bne 0b\n"
- : [lhs_temp] "+r"(lhs_temp), [plhs_ptr] "+r"(plhs_ptr), [nk] "+r"(nk)
- : [_stride] "r"(_stride)
- : "cc", "memory", "r0", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11",
- "q12", "q13", "q14", "q15");
-#endif // __aarch64__
- }
-
- for (int j = 0; j < rk; j++)
- {
- plhs_ptr[0] = lhs_temp[0];
- plhs_ptr[1] = lhs_temp[stride];
- plhs_ptr[2] = lhs_temp[stride << 1];
- plhs_ptr[3] = lhs_temp[3 * stride];
- plhs_ptr[4] = lhs_temp[stride << 2];
- plhs_ptr[5] = lhs_temp[5 * stride];
- plhs_ptr[6] = lhs_temp[6 * stride];
- plhs_ptr[7] = lhs_temp[7 * stride];
- plhs_ptr[8] = lhs_temp[stride << 3];
- plhs_ptr[9] = lhs_temp[9 * stride];
- plhs_ptr[10] = lhs_temp[10 * stride];
- plhs_ptr[11] = lhs_temp[11 * stride];
- plhs_ptr += mr;
- lhs_temp++;
- }
-
- lhs_ptr += mr * stride;
- }
- break;
- case 8:
- for (int i = 0; i < nm; i++)
- {
- int nk = kb >> 2;
- int rk = kb & 0x03;
-
- const float *lhs_temp = lhs_ptr;
- const int _stride = stride << 2;
-
- if (nk > 0)
- {
-#if __aarch64__
- asm volatile("0:\n"
- "mov x0, %[lhs_temp]\n"
-
- "ld1 {v4.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v5.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
-
- "zip1 v28.4s, v4.4s, v6.4s\n"
- "zip2 v30.4s, v4.4s, v6.4s\n"
- "zip1 v29.4s, v5.4s, v7.4s\n"
- "zip2 v31.4s, v5.4s, v7.4s\n"
- "zip1 v4.4s, v28.4s, v29.4s\n"
- "zip2 v5.4s, v28.4s, v29.4s\n"
- "zip1 v6.4s, v30.4s, v31.4s\n"
- "zip2 v7.4s, v30.4s, v31.4s\n"
-
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v9.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v11.4s}, [x0]\n"
-
- "zip1 v28.4s, v8.4s, v10.4s\n"
- "zip2 v30.4s, v8.4s, v10.4s\n"
- "zip1 v29.4s, v9.4s, v11.4s\n"
- "zip2 v31.4s, v9.4s, v11.4s\n"
- "zip1 v8.4s, v28.4s, v29.4s\n"
- "zip2 v9.4s, v28.4s, v29.4s\n"
- "zip1 v10.4s, v30.4s, v31.4s\n"
- "zip2 v11.4s, v30.4s, v31.4s\n"
-
- "st1 {v4.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v8.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v5.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v9.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v6.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v10.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v7.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v11.4s}, [%[plhs_ptr]], #16\n"
-
- "subs %[nk], %[nk], #1\n"
- "add %[lhs_temp], %[lhs_temp], #16\n"
- "bne 0b\n"
- : [lhs_temp] "+r"(lhs_temp), [plhs_ptr] "+r"(plhs_ptr), [nk] "+r"(nk)
- : [_stride] "r"(_stride)
- : "cc", "memory", "x0", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11",
- "v28", "v29", "v30", "v31");
-#else // __aarch64__
- asm volatile("0:\n"
- "mov r0, %[lhs_temp]\n"
-
- "vld1.f32 {d8-d9}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d10-d11}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[_stride]\n"
-
- "vzip.32 q4, q6\n"
- "vzip.32 q5, q7\n"
- "vzip.32 q4, q5\n"
- "vzip.32 q6, q7\n"
-
- "vld1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d18-d19}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
-
- "vzip.32 q8, q10\n"
- "vzip.32 q9, q11\n"
- "vzip.32 q8, q9\n"
- "vzip.32 q10, q11\n"
-
- "vst1.f32 {d8-d9}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d16-d17}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d10-d11}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d18-d19}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d12-d13}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d20-d21}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d14-d15}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d22-d23}, [%[plhs_ptr]]!\n"
-
- "subs %[nk], %[nk], #1\n"
- "add %[lhs_temp], %[lhs_temp], #16\n"
- "bne 0b\n"
- : [lhs_temp] "+r"(lhs_temp), [plhs_ptr] "+r"(plhs_ptr), [nk] "+r"(nk)
- : [_stride] "r"(_stride)
- : "cc", "memory", "r0", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11");
-#endif // __aarch64__
- }
-
- for (int j = 0; j < rk; j++)
- {
- plhs_ptr[0] = lhs_temp[0];
- plhs_ptr[1] = lhs_temp[stride];
- plhs_ptr[2] = lhs_temp[stride << 1];
- plhs_ptr[3] = lhs_temp[3 * stride];
- plhs_ptr[4] = lhs_temp[stride << 2];
- plhs_ptr[5] = lhs_temp[5 * stride];
- plhs_ptr[6] = lhs_temp[6 * stride];
- plhs_ptr[7] = lhs_temp[7 * stride];
- plhs_ptr += mr;
- lhs_temp++;
- }
-
- lhs_ptr += mr * stride;
- }
- break;
- case 6:
- for (int i = 0; i < nm; i++)
- {
- int nk = kb >> 2;
- int rk = kb & 0x03;
-
- const float *lhs_temp = lhs_ptr;
- const int _stride = stride << 2;
-
- if (nk > 0)
- {
-#if __aarch64__
- // TODO: 4--->6
- asm volatile("0:\n"
- "mov x0, %[lhs_temp]\n"
-
- "ld1 {v4.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v5.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v8.4s}, [x0]\n"
-
- "zip1 v28.4s, v4.4s, v6.4s\n"
- "zip2 v30.4s, v4.4s, v6.4s\n"
- "zip1 v29.4s, v5.4s, v7.4s\n"
- "zip2 v31.4s, v5.4s, v7.4s\n"
- "zip1 v4.4s, v28.4s, v29.4s\n"
- "zip2 v5.4s, v28.4s, v29.4s\n"
- "zip1 v6.4s, v30.4s, v31.4s\n"
- "zip2 v7.4s, v30.4s, v31.4s\n"
-
- "st1 {v4.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v5.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v6.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v7.4s}, [%[plhs_ptr]], #16\n"
-
- "subs %[nk], %[nk], #1\n"
- "add %[lhs_temp], %[lhs_temp], #16\n"
- "bne 0b\n"
- : [lhs_temp] "+r"(lhs_temp), [plhs_ptr] "+r"(plhs_ptr), [nk] "+r"(nk)
- : [_stride] "r"(_stride)
- : "cc", "memory", "x0", "v4", "v5", "v6", "v7", "v28", "v29", "v30", "v31");
-#else // __aarch64__
- asm volatile("0:\n"
- "mov r0, %[lhs_temp]\n"
-
- "vld1.f32 {d8-d9}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d10-d11}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d18-d19}, [r0]\n"
-
- "vzip.32 q4, q6\n"
- "vzip.32 q5, q7\n"
- "vzip.32 q4, q5\n"
- "vzip.32 q6, q7\n"
- "vzip.32 q8, q9\n"
-
- "vst1.f32 {d8-d9}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d16}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d10-d11}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d17}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d12-d13}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d18}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d14-d15}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d19}, [%[plhs_ptr]]!\n"
-
- "subs %[nk], %[nk], #1\n"
- "add %[lhs_temp], %[lhs_temp], #16\n"
- "bne 0b\n"
- : [lhs_temp] "+r"(lhs_temp), [plhs_ptr] "+r"(plhs_ptr), [nk] "+r"(nk)
- : [_stride] "r"(_stride)
- : "cc", "memory", "r0", "q4", "q5", "q6", "q7", "q8", "q9");
-#endif // __aarch64__
- }
-
- for (int j = 0; j < rk; j++)
- {
- plhs_ptr[0] = lhs_temp[0];
- plhs_ptr[1] = lhs_temp[stride];
- plhs_ptr[2] = lhs_temp[stride << 1];
- plhs_ptr[3] = lhs_temp[3 * stride];
- plhs_ptr[4] = lhs_temp[stride << 2];
- plhs_ptr[5] = lhs_temp[5 * stride];
- plhs_ptr += mr;
- lhs_temp++;
- }
-
- lhs_ptr += mr * stride;
- }
- break;
- case 4:
- for (int i = 0; i < nm; i++)
- {
- int nk = kb >> 2;
- int rk = kb & 0x03;
-
- const float *lhs_temp = lhs_ptr;
- const int _stride = stride << 2;
-
- if (nk > 0)
- {
-#if __aarch64__
- asm volatile("0:\n"
- "mov x0, %[lhs_temp]\n"
-
- "ld1 {v4.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v5.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "zip1 v28.4s, v4.4s, v6.4s\n"
- "zip2 v30.4s, v4.4s, v6.4s\n"
- "zip1 v29.4s, v5.4s, v7.4s\n"
- "zip2 v31.4s, v5.4s, v7.4s\n"
- "zip1 v4.4s, v28.4s, v29.4s\n"
- "zip2 v5.4s, v28.4s, v29.4s\n"
- "zip1 v6.4s, v30.4s, v31.4s\n"
- "zip2 v7.4s, v30.4s, v31.4s\n"
-
- "st1 {v4.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v5.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v6.4s}, [%[plhs_ptr]], #16\n"
- "st1 {v7.4s}, [%[plhs_ptr]], #16\n"
-
- "subs %[nk], %[nk], #1\n"
- "add %[lhs_temp], %[lhs_temp], #16\n"
- "bne 0b\n"
- : [lhs_temp] "+r"(lhs_temp), [plhs_ptr] "+r"(plhs_ptr), [nk] "+r"(nk)
- : [_stride] "r"(_stride)
- : "cc", "memory", "x0", "v4", "v5", "v6", "v7", "v28", "v29", "v30", "v31");
-#else // __aarch64__
- asm volatile("0:\n"
- "mov r0, %[lhs_temp]\n"
-
- "vld1.f32 {d8-d9}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d10-d11}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
-
- "vzip.32 q4, q6\n"
- "vzip.32 q5, q7\n"
- "vzip.32 q4, q5\n"
- "vzip.32 q6, q7\n"
-
- "vst1.f32 {d8-d9}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d10-d11}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d12-d13}, [%[plhs_ptr]]!\n"
- "vst1.f32 {d14-d15}, [%[plhs_ptr]]!\n"
-
- "subs %[nk], %[nk], #1\n"
- "add %[lhs_temp], %[lhs_temp], #16\n"
- "bne 0b\n"
- : [lhs_temp] "+r"(lhs_temp), [plhs_ptr] "+r"(plhs_ptr), [nk] "+r"(nk)
- : [_stride] "r"(_stride)
- : "cc", "memory", "r0", "q4", "q5", "q6", "q7");
-#endif // __aarch64__
- }
-
- for (int j = 0; j < rk; j++)
- {
- plhs_ptr[0] = lhs_temp[0];
- plhs_ptr[1] = lhs_temp[stride];
- plhs_ptr[2] = lhs_temp[stride << 1];
- plhs_ptr[3] = lhs_temp[3 * stride];
- plhs_ptr += mr;
- lhs_temp++;
- }
-
- lhs_ptr += mr * stride;
- }
- break;
- default:
- break;
- }
-
- if (rm > 0)
- {
- for (int j = 0; j < kb; j++)
- {
- for (int i = 0; i < rm; i++)
- {
- plhs_ptr[i] = lhs_ptr[i * stride];
- }
- for (int i = rm; i < mr; i++)
- {
- plhs_ptr[i] = 0.f;
- }
- plhs_ptr += mr;
- lhs_ptr++;
- }
- }
-}
-
-void _pack_rowmajor_notrans_rhs(const int nr, const int nb, const int kb, const int stride,
- const float *rhs_ptr, float *prhs_ptr)
-{
- const int nn = nb / nr;
- const int rn = nb % nr;
-
- switch (nr)
- {
- case 24:
- for (int j = 0; j < nn; j++)
- {
- const float *rhs_temp = rhs_ptr;
- float32x4_t q0, q1, q2, q3, q4, q5;
- for (int i = 0; i < kb; i++)
- {
- q0 = vld1q_f32(rhs_temp);
- q1 = vld1q_f32(rhs_temp + 4);
- q2 = vld1q_f32(rhs_temp + 8);
- q3 = vld1q_f32(rhs_temp + 12);
- q4 = vld1q_f32(rhs_temp + 16);
- q5 = vld1q_f32(rhs_temp + 20);
- vst1q_f32(prhs_ptr, q0);
- vst1q_f32(prhs_ptr + 4, q1);
- vst1q_f32(prhs_ptr + 8, q2);
- vst1q_f32(prhs_ptr + 12, q3);
- vst1q_f32(prhs_ptr + 16, q4);
- vst1q_f32(prhs_ptr + 20, q5);
-
- rhs_temp += stride;
- prhs_ptr += nr;
- }
-
- rhs_ptr += nr;
- }
- break;
- case 16:
- for (int j = 0; j < nn; j++)
- {
- const float *rhs_temp = rhs_ptr;
- float32x4_t q0, q1, q2, q3;
- for (int i = 0; i < kb; i++)
- {
- q0 = vld1q_f32(rhs_temp);
- q1 = vld1q_f32(rhs_temp + 4);
- q2 = vld1q_f32(rhs_temp + 8);
- q3 = vld1q_f32(rhs_temp + 12);
- vst1q_f32(prhs_ptr, q0);
- vst1q_f32(prhs_ptr + 4, q1);
- vst1q_f32(prhs_ptr + 8, q2);
- vst1q_f32(prhs_ptr + 12, q3);
-
- rhs_temp += stride;
- prhs_ptr += nr;
- }
-
- rhs_ptr += nr;
- }
- break;
- case 12:
- for (int j = 0; j < nn; j++)
- {
- const float *rhs_temp = rhs_ptr;
- float32x4_t q0, q1, q2;
- for (int i = 0; i < kb; i++)
- {
- q0 = vld1q_f32(rhs_temp);
- q1 = vld1q_f32(rhs_temp + 4);
- q2 = vld1q_f32(rhs_temp + 8);
- vst1q_f32(prhs_ptr, q0);
- vst1q_f32(prhs_ptr + 4, q1);
- vst1q_f32(prhs_ptr + 8, q2);
-
- rhs_temp += stride;
- prhs_ptr += nr;
- }
-
- rhs_ptr += nr;
- }
- break;
- case 8:
- for (int j = 0; j < nn; j++)
-
- {
- const float *rhs_temp = rhs_ptr;
- float32x4_t q0, q1, q2, q3;
-
- int i = 0;
- for (; i + 1 < kb; i += 2)
- {
- q0 = vld1q_f32(rhs_temp);
- q1 = vld1q_f32(rhs_temp + 4);
- q2 = vld1q_f32(rhs_temp + stride);
- q3 = vld1q_f32(rhs_temp + stride + 4);
- vst1q_f32(prhs_ptr, q0);
- vst1q_f32(prhs_ptr + 4, q1);
- vst1q_f32(prhs_ptr + 8, q2);
- vst1q_f32(prhs_ptr + 12, q3);
-
- rhs_temp += stride << 1;
- prhs_ptr += nr << 1;
- }
-
- for (; i < kb; i++)
- {
- q0 = vld1q_f32(rhs_temp);
- q1 = vld1q_f32(rhs_temp + 4);
- vst1q_f32(prhs_ptr, q0);
- vst1q_f32(prhs_ptr + 4, q1);
-
- rhs_temp += stride;
- prhs_ptr += nr;
- }
-
- rhs_ptr += nr;
- }
- break;
- case 6:
- for (int j = 0; j < nn; j++)
-
- {
- const float *rhs_temp = rhs_ptr;
- float32x4_t q0, q2;
- float32x2_t q1, q3;
-
- int i = 0;
- for (; i + 1 < kb; i += 2)
- {
- q0 = vld1q_f32(rhs_temp);
- q1 = vld1_f32(rhs_temp + 4);
-
- q2 = vld1q_f32(rhs_temp + stride);
- q3 = vld1_f32(rhs_temp + stride + 4);
- vst1q_f32(prhs_ptr, q0);
- vst1_f32(prhs_ptr + 4, q1);
- vst1q_f32(prhs_ptr + 6, q2);
- vst1_f32(prhs_ptr + 10, q3);
-
- rhs_temp += stride << 1;
- prhs_ptr += nr << 1;
- }
-
- for (; i < kb; i++)
- {
- q0 = vld1q_f32(rhs_temp);
- q1 = vld1_f32(rhs_temp + 4);
-
- vst1q_f32(prhs_ptr, q0);
- vst1_f32(prhs_ptr + 4, q1);
-
- rhs_temp += stride;
- prhs_ptr += nr;
- }
-
- rhs_ptr += nr;
- }
- break;
- case 4:
- for (int j = 0; j < nn; j++)
-
- {
- const float *rhs_temp = rhs_ptr;
- float32x4_t q0, q1, q2, q3;
-
- int i = 0;
- for (; i + 3 < kb; i += 4)
- {
- q0 = vld1q_f32(rhs_temp);
- q1 = vld1q_f32(rhs_temp + stride);
- q2 = vld1q_f32(rhs_temp + (stride << 1));
- q3 = vld1q_f32(rhs_temp + (stride * 3));
- vst1q_f32(prhs_ptr, q0);
- vst1q_f32(prhs_ptr + 4, q1);
- vst1q_f32(prhs_ptr + 8, q2);
- vst1q_f32(prhs_ptr + 12, q3);
-
- rhs_temp += stride << 2;
- prhs_ptr += nr << 2;
- }
- for (; i + 1 < kb; i += 2)
- {
- q0 = vld1q_f32(rhs_temp);
- q1 = vld1q_f32(rhs_temp + stride);
- vst1q_f32(prhs_ptr, q0);
- vst1q_f32(prhs_ptr + 4, q1);
-
- rhs_temp += stride << 1;
- prhs_ptr += nr << 1;
- }
- for (; i < kb; i++)
- {
- q0 = vld1q_f32(rhs_temp);
- vst1q_f32(prhs_ptr, q0);
-
- rhs_temp += stride;
- prhs_ptr += nr;
- }
-
- rhs_ptr += nr;
- }
- break;
- default:
- break;
- }
-
- if (rn > 0)
- {
- for (int i = 0; i < kb; i++)
- {
- for (int j = 0; j < rn; j++)
- {
- prhs_ptr[j] = rhs_ptr[j];
- }
- for (int j = rn; j < nr; j++)
- {
- prhs_ptr[j] = 0.f;
- }
- prhs_ptr += nr;
- rhs_ptr += stride;
- }
- }
-}
-
-void _pack_rowmajor_trans_lhs(const int mr, const int mb, const int kb, const int stride,
- const float *lhs_ptr, float *plhs_ptr)
-{
- _pack_rowmajor_notrans_rhs(mr, mb, kb, stride, lhs_ptr, plhs_ptr);
-}
-
-void _pack_rowmajor_trans_rhs(const int nr, const int nb, const int kb, const int stride,
- const float *rhs_ptr, float *prhs_ptr)
-{
- _pack_rowmajor_notrans_lhs(nr, nb, kb, stride, rhs_ptr, prhs_ptr);
-}
-
-static inline void _pack_rowmajor_image_subn(const int nr, const int nb, const int stride,
- const float *buffer, float *prhs_ptr)
-{
- const int nn = nb / nr;
- const int rn = nb % nr;
-
- switch (nr)
- {
- case 24:
- for (int j = 0; j < nn; j++)
- {
- float32x4_t q0, q1, q2, q3, q4, q5;
- q0 = vld1q_f32(buffer);
- q1 = vld1q_f32(buffer + 4);
- q2 = vld1q_f32(buffer + 8);
- q3 = vld1q_f32(buffer + 12);
- q4 = vld1q_f32(buffer + 16);
- q5 = vld1q_f32(buffer + 20);
- vst1q_f32(prhs_ptr, q0);
- vst1q_f32(prhs_ptr + 4, q1);
- vst1q_f32(prhs_ptr + 8, q2);
- vst1q_f32(prhs_ptr + 12, q3);
- vst1q_f32(prhs_ptr + 16, q4);
- vst1q_f32(prhs_ptr + 20, q5);
- prhs_ptr += stride;
- buffer += nr;
- }
- break;
- case 16:
- for (int j = 0; j < nn; j++)
- {
- float32x4_t q0, q1, q2, q3;
- q0 = vld1q_f32(buffer);
- q1 = vld1q_f32(buffer + 4);
- q2 = vld1q_f32(buffer + 8);
- q3 = vld1q_f32(buffer + 12);
- vst1q_f32(prhs_ptr, q0);
- vst1q_f32(prhs_ptr + 4, q1);
- vst1q_f32(prhs_ptr + 8, q2);
- vst1q_f32(prhs_ptr + 12, q3);
- prhs_ptr += stride;
- buffer += nr;
- }
- break;
- case 12:
- for (int j = 0; j < nn; j++)
- {
- float32x4_t q0, q1, q2;
- q0 = vld1q_f32(buffer);
- q1 = vld1q_f32(buffer + 4);
- q2 = vld1q_f32(buffer + 8);
- vst1q_f32(prhs_ptr, q0);
- vst1q_f32(prhs_ptr + 4, q1);
- vst1q_f32(prhs_ptr + 8, q2);
- prhs_ptr += stride;
- buffer += nr;
- }
- break;
- case 8:
- for (int j = 0; j < nn; j++)
- {
- float32x4_t q0, q1;
- q0 = vld1q_f32(buffer);
- q1 = vld1q_f32(buffer + 4);
- vst1q_f32(prhs_ptr, q0);
- vst1q_f32(prhs_ptr + 4, q1);
- prhs_ptr += stride;
- buffer += nr;
- }
- break;
- case 6:
- for (int j = 0; j < nn; j++)
- {
- float32x4_t q0;
- float32x2_t q1;
- q0 = vld1q_f32(buffer);
- q1 = vld1_f32(buffer + 4);
- vst1q_f32(prhs_ptr, q0);
- vst1_f32(prhs_ptr + 4, q1);
- prhs_ptr += stride;
- buffer += nr;
- }
- break;
- case 4:
- for (int j = 0; j < nn; j++)
- {
- float32x4_t q0;
- q0 = vld1q_f32(buffer);
- vst1q_f32(prhs_ptr, q0);
- prhs_ptr += stride;
- buffer += nr;
- }
- break;
- default:
- break;
- }
-
- if (rn > 0)
- {
- for (int j = 0; j < rn; j++)
- {
- prhs_ptr[j] = buffer[j];
- }
- for (int j = rn; j < nr; j++)
- {
- prhs_ptr[j] = 0.f;
- }
- }
-}
-
-void _pack_rowmajor_image_rhs(const int nr, const int nb, const int kb, const int k0, const int n0,
- convMat_t *input, convMat_t *output, convParams_t *params,
- float *prhs_ptr)
-{
- const int w = input->w;
- const int h = input->h;
- const int outw = output->w;
- const int kernel_w = params->kernel_w;
- const int kernel_h = params->kernel_h;
- const int stride_w = params->stride_w;
- const int stride_h = params->stride_h;
- const int pad_w = params->pad_w;
- const int pad_h = params->pad_h;
-
- const int in_row0 = n0 / outw * stride_h;
- const int in_col0 = n0 % outw * stride_w;
- int seg0 = outw - n0 % outw;
- if (seg0 > nb)
- seg0 = nb;
- int rows = (nb - seg0 + outw - 1) / outw;
- if (seg0)
- rows++;
- const int segn = (nb - seg0) % outw;
-
- float row_data[nb];
-
- for (int i = k0; i < kb + k0; i++)
- {
- const int ic = i / (kernel_w * kernel_h);
- const int in_row1 = ((i / kernel_w) % kernel_h) * params->dilation_h + in_row0;
- const int in_col1 = i % kernel_w * params->dilation_w;
-
-#ifdef NCNN
- const float *input_data = input->data + ic * alignSize(w * h, 16 / sizeof(float));
-#else // NCNN
- const float *input_data = input->data + ic * w * h;
-#endif // NCNN
- float *buffer = row_data;
- int in_row = in_row1 - pad_h;
-
- for (int out_rows = rows; out_rows; out_rows--)
- {
- int cols = (out_rows != 1 || segn == 0) ? outw : segn;
- int in_col = in_col1 - pad_w;
- if (out_rows == rows)
- {
- cols = seg0;
- in_col += in_col0;
- }
- if ((unsigned int)in_row < (unsigned int)h)
- {
- for (int out_col = cols; out_col; out_col--)
- {
- if ((unsigned int)in_col < (unsigned int)w)
- *(buffer++) = input_data[in_row * w + in_col];
- else
- *(buffer++) = 0;
- in_col += stride_w;
- }
- }
- else
- {
- for (int out_col = cols; out_col; out_col--)
- {
- *(buffer++) = 0;
- in_col += stride_w;
- }
- }
-
- in_row += stride_h;
- }
-
- _pack_rowmajor_image_subn(nr, nb, nr * kb, row_data, prhs_ptr);
- prhs_ptr += nr;
- }
-}
-
-void _pack_rowmajor_image_rhs_batch(const int nr, const int nb, const int kb, const int k0,
- const int n0, convMat_t *input, convMat_t *output,
- convParams_t *params, float *prhs_ptr)
-{
- const int w = input->w;
- const int h = input->h;
- const int c = input->c;
-
-#ifdef NCNN
- const int seg_size = alignSize(output->w * output->h, 16 / sizeof(float));
-#else // NCNN
- const int seg_size = output->w * output->h;
-#endif // NCNN
-
-#ifdef NCNN
- float *data = input->data + (alignSize(w * h, 16 / sizeof(float)) * c) * (n0 / seg_size);
-#else // NCNN
- float *data = input->data + (w * h * c) * (n0 / seg_size);
-#endif // NCNN
-
- int seg0 = seg_size - n0 % seg_size;
- if (seg0 > nb)
- seg0 = nb;
- int nseg = (nb - seg0 + seg_size - 1) / seg_size;
- if (seg0)
- nseg++;
- const int segn = (nb - seg0) % seg_size;
- convMat_t _input = {w, h, c, 1, data};
-
- for (int i = 0; i < nseg; i++)
- {
- const int _nb = (i == 0 ? seg0 : (i == nseg - 1 ? segn : seg_size));
- const int _n0 = (i == 0 ? seg_size - seg0 : 0);
-
- _pack_rowmajor_image_rhs(nr, _nb, kb, k0, _n0, &_input, output, params, prhs_ptr);
-
-#ifdef NCNN
- _input.data += alignSize(w * h, 16 / sizeof(float)) * c;
-#else // NCNN
- _input.data += w * h * c;
-#endif // NCNN
- }
-}
-
-void _unpack_rowmajor_image_res(const int mb, const int nb, const int m0, const int n0,
- convMat_t *input, convMat_t *output, convParams_t *params,
- float *pres_ptr)
-{
- const int outw = output->w;
- const int outh = output->h;
- const int w = input->w;
- const int kernel_w = params->kernel_w;
- const int kernel_h = params->kernel_h;
- const int stride_w = params->stride_w;
- const int stride_h = params->stride_h;
- const int pad_w = params->pad_w;
- const int pad_h = params->pad_h;
-
- const int out_row0 = n0 / w * stride_h;
- const int out_col0 = n0 % w * stride_w;
- int seg0 = w - n0 % w;
- if (seg0 > nb)
- seg0 = nb;
- int rows = (nb - seg0 + w - 1) / w;
- if (seg0)
- rows++;
- const int segn = (nb - seg0) % w;
-
- for (int i = m0; i < mb + m0; i++)
- {
- const int oc = i / (kernel_w * kernel_h);
- const int out_row1 = ((i / kernel_w) % kernel_h) * params->dilation_h + out_row0;
- const int out_col1 = i % kernel_w * params->dilation_w;
-
-#ifdef NCNN
- float *output_data = output->data + oc * alignSize(outw * outh, 16 / sizeof(float));
-#else // NCNN
- float *output_data = output->data + oc * outw * outh;
-#endif // NCNN
- int out_row = out_row1 - pad_h;
-
- for (int in_rows = rows; in_rows; in_rows--)
- {
- int cols = (in_rows != 1 || segn == 0) ? w : segn;
- int out_col = out_col1 - pad_w;
- if (in_rows == rows)
- {
- cols = seg0;
- out_col += out_col0;
- }
- if ((unsigned int)out_row < (unsigned int)outh)
- {
- for (int in_col = cols; in_col; in_col--)
- {
- if ((unsigned int)out_col < (unsigned int)outw)
- output_data[out_row * outw + out_col] += *pres_ptr++;
- else
- pres_ptr++;
- out_col += stride_w;
- }
- }
- else
- {
- pres_ptr += cols;
- }
- out_row += stride_h;
- }
- }
-}
-
-// TODO:v8 & other case.
-static inline void _pack_colmajor_image_rhs_sub(const int nr, const int k, const float *buffer,
- float *prhs_ptr)
-{
- int nk = k >> 2;
- int rk = k & 0x03;
-
- const int _stride = k << 2;
-
- switch (nr)
- {
- case 12:
- if (nk > 0)
- {
-#if __aarch64__
- asm volatile("0:\n"
- "mov x0, %[buffer]\n"
-
- "ld1 {v4.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v5.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
-
- "zip1 v28.4s, v4.4s, v6.4s\n"
- "zip2 v30.4s, v4.4s, v6.4s\n"
- "zip1 v29.4s, v5.4s, v7.4s\n"
- "zip2 v31.4s, v5.4s, v7.4s\n"
- "zip1 v4.4s, v28.4s, v29.4s\n"
- "zip2 v5.4s, v28.4s, v29.4s\n"
- "zip1 v6.4s, v30.4s, v31.4s\n"
- "zip2 v7.4s, v30.4s, v31.4s\n"
-
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v9.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v11.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
-
- "zip1 v28.4s, v8.4s, v10.4s\n"
- "zip2 v30.4s, v8.4s, v10.4s\n"
- "zip1 v29.4s, v9.4s, v11.4s\n"
- "zip2 v31.4s, v9.4s, v11.4s\n"
- "zip1 v8.4s, v28.4s, v29.4s\n"
- "zip2 v9.4s, v28.4s, v29.4s\n"
- "zip1 v10.4s, v30.4s, v31.4s\n"
- "zip2 v11.4s, v30.4s, v31.4s\n"
-
- "ld1 {v12.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v13.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v14.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v15.4s}, [x0]\n"
-
- "zip1 v28.4s, v12.4s, v14.4s\n"
- "zip2 v30.4s, v12.4s, v14.4s\n"
- "zip1 v29.4s, v13.4s, v15.4s\n"
- "zip2 v31.4s, v13.4s, v15.4s\n"
- "zip1 v12.4s, v28.4s, v29.4s\n"
- "zip2 v13.4s, v28.4s, v29.4s\n"
- "zip1 v14.4s, v30.4s, v31.4s\n"
- "zip2 v15.4s, v30.4s, v31.4s\n"
-
- "st1 {v4.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v8.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v12.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v5.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v9.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v13.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v6.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v10.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v14.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v7.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v11.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v15.4s}, [%[prhs_ptr]], #16\n"
-
- "subs %[nk], %[nk], #1\n"
- "add %[buffer], %[buffer], #16\n"
- "bne 0b\n"
- : [buffer] "+r"(buffer), [prhs_ptr] "+r"(prhs_ptr), [nk] "+r"(nk)
- : [_stride] "r"(_stride)
- : "cc", "memory", "x0", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11",
- "v12", "v13", "v14", "v15", "v28", "v29", "v30", "v31");
-#else // __aarch64__
- asm volatile("0:\n"
- "mov r0, %[buffer]\n"
-
- "vld1.f32 {d8-d9}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d10-d11}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[_stride]\n"
-
- "vzip.32 q4, q6\n"
- "vzip.32 q5, q7\n"
- "vzip.32 q4, q5\n"
- "vzip.32 q6, q7\n"
-
- "vld1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d18-d19}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
- "add r0, r0, %[_stride]\n"
-
- "vzip.32 q8, q10\n"
- "vzip.32 q9, q11\n"
- "vzip.32 q8, q9\n"
- "vzip.32 q10, q11\n"
-
- "vld1.f32 {d24-d25}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d26-d27}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d28-d29}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d30-d31}, [r0]\n"
-
- "vzip.32 q12, q14\n"
- "vzip.32 q13, q15\n"
- "vzip.32 q12, q13\n"
- "vzip.32 q14, q15\n"
-
- "vst1.f32 {d8-d9}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d16-d17}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d24-d25}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d10-d11}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d18-d19}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d26-d27}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d12-d13}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d20-d21}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d28-d29}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d14-d15}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d22-d23}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d30-d31}, [%[prhs_ptr]]!\n"
-
- "subs %[nk], %[nk], #1\n"
- "add %[buffer], %[buffer], #16\n"
- "bne 0b\n"
- : [buffer] "+r"(buffer), [prhs_ptr] "+r"(prhs_ptr), [nk] "+r"(nk)
- : [_stride] "r"(_stride)
- : "cc", "memory", "r0", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11",
- "q12", "q13", "q14", "q15");
-#endif // __aarch64__
- }
-
- for (int j = 0; j < rk; j++)
- {
- prhs_ptr[0] = buffer[0];
- prhs_ptr[1] = buffer[k];
- prhs_ptr[2] = buffer[k << 1];
- prhs_ptr[3] = buffer[3 * k];
- prhs_ptr[4] = buffer[k << 2];
- prhs_ptr[5] = buffer[5 * k];
- prhs_ptr[6] = buffer[6 * k];
- prhs_ptr[7] = buffer[7 * k];
- prhs_ptr[8] = buffer[k << 3];
- prhs_ptr[9] = buffer[9 * k];
- prhs_ptr[10] = buffer[10 * k];
- prhs_ptr[11] = buffer[11 * k];
- prhs_ptr += nr;
- buffer++;
- }
- break;
-
- case 8:
- if (nk > 0)
- {
-#if __aarch64__
- asm volatile("0:\n"
- "mov x0, %[buffer]\n"
-
- "ld1 {v4.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v5.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v7.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
-
- "zip1 v28.4s, v4.4s, v6.4s\n"
- "zip2 v30.4s, v4.4s, v6.4s\n"
- "zip1 v29.4s, v5.4s, v7.4s\n"
- "zip2 v31.4s, v5.4s, v7.4s\n"
- "zip1 v4.4s, v28.4s, v29.4s\n"
- "zip2 v5.4s, v28.4s, v29.4s\n"
- "zip1 v6.4s, v30.4s, v31.4s\n"
- "zip2 v7.4s, v30.4s, v31.4s\n"
-
- "ld1 {v8.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v9.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v10.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v11.4s}, [x0]\n"
-
- "zip1 v28.4s, v8.4s, v10.4s\n"
- "zip2 v30.4s, v8.4s, v10.4s\n"
- "zip1 v29.4s, v9.4s, v11.4s\n"
- "zip2 v31.4s, v9.4s, v11.4s\n"
- "zip1 v8.4s, v28.4s, v29.4s\n"
- "zip2 v9.4s, v28.4s, v29.4s\n"
- "zip1 v10.4s, v30.4s, v31.4s\n"
- "zip2 v11.4s, v30.4s, v31.4s\n"
-
- "st1 {v4.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v8.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v5.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v9.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v6.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v10.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v7.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v11.4s}, [%[prhs_ptr]], #16\n"
-
- "subs %[nk], %[nk], #1\n"
- "add %[buffer], %[buffer], #16\n"
- "bne 0b\n"
- : [buffer] "+r"(buffer), [prhs_ptr] "+r"(prhs_ptr), [nk] "+r"(nk)
- : [_stride] "r"(_stride)
- : "cc", "memory", "x0", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11",
- "v28", "v29", "v30", "v31");
-#else // __aarch64__
- asm volatile("0:\n"
- "mov r0, %[buffer]\n"
-
- "vld1.f32 {d8-d9}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d10-d11}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[_stride]\n"
-
- "vzip.32 q4, q6\n"
- "vzip.32 q5, q7\n"
- "vzip.32 q4, q5\n"
- "vzip.32 q6, q7\n"
-
- "vld1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d18-d19}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d20-d21}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d22-d23}, [r0]\n"
-
- "vzip.32 q8, q10\n"
- "vzip.32 q9, q11\n"
- "vzip.32 q8, q9\n"
- "vzip.32 q10, q11\n"
-
- "vst1.f32 {d8-d9}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d16-d17}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d10-d11}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d18-d19}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d12-d13}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d20-d21}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d14-d15}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d22-d23}, [%[prhs_ptr]]!\n"
-
- "subs %[nk], %[nk], #1\n"
- "add %[buffer], %[buffer], #16\n"
- "bne 0b\n"
- : [buffer] "+r"(buffer), [prhs_ptr] "+r"(prhs_ptr), [nk] "+r"(nk)
- : [_stride] "r"(_stride)
- : "cc", "memory", "r0", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11");
-#endif // __aarch64__
- }
-
- for (int j = 0; j < rk; j++)
- {
- prhs_ptr[0] = buffer[0];
- prhs_ptr[1] = buffer[k];
- prhs_ptr[2] = buffer[k << 1];
- prhs_ptr[3] = buffer[3 * k];
- prhs_ptr[4] = buffer[k << 2];
- prhs_ptr[5] = buffer[5 * k];
- prhs_ptr[6] = buffer[6 * k];
- prhs_ptr[7] = buffer[7 * k];
- prhs_ptr += nr;
- buffer++;
- }
- break;
-#if !__aarch64__
- case 6:
- if (nk > 0)
- {
- asm volatile("0:\n"
- "mov r0, %[buffer]\n"
-
- "vld1.f32 {d8-d9}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d10-d11}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d16-d17}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d18-d19}, [r0]\n"
-
- "vzip.32 q4, q6\n"
- "vzip.32 q5, q7\n"
- "vzip.32 q4, q5\n"
- "vzip.32 q6, q7\n"
- "vzip.32 q8, q9\n"
-
- "vst1.f32 {d8-d9}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d16}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d10-d11}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d17}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d12-d13}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d18}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d14-d15}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d19}, [%[prhs_ptr]]!\n"
-
- "subs %[nk], %[nk], #1\n"
- "add %[buffer], %[buffer], #16\n"
- "bne 0b\n"
- : [buffer] "+r"(buffer), [prhs_ptr] "+r"(prhs_ptr), [nk] "+r"(nk)
- : [_stride] "r"(_stride)
- : "cc", "memory", "r0", "q4", "q5", "q6", "q7", "q8", "q9");
- }
-
- for (int j = 0; j < rk; j++)
- {
- prhs_ptr[0] = buffer[0];
- prhs_ptr[1] = buffer[k];
- prhs_ptr[2] = buffer[k << 1];
- prhs_ptr[3] = buffer[3 * k];
- prhs_ptr[4] = buffer[k << 2];
- prhs_ptr[5] = buffer[5 * k];
- prhs_ptr += nr;
- buffer++;
- }
- break;
-#endif // !__aarch64__
- case 4:
- if (nk > 0)
- {
-#if __aarch64__
- asm volatile("0:\n"
- "mov x0, %[buffer]\n"
-
- "ld1 {v4.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v5.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v6.4s}, [x0]\n"
- "add x0, x0, %[_stride]\n"
- "ld1 {v7.4s}, [x0]\n"
-
- "zip1 v28.4s, v4.4s, v6.4s\n"
- "zip2 v30.4s, v4.4s, v6.4s\n"
- "zip1 v29.4s, v5.4s, v7.4s\n"
- "zip2 v31.4s, v5.4s, v7.4s\n"
- "zip1 v4.4s, v28.4s, v29.4s\n"
- "zip2 v5.4s, v28.4s, v29.4s\n"
- "zip1 v6.4s, v30.4s, v31.4s\n"
- "zip2 v7.4s, v30.4s, v31.4s\n"
-
- "st1 {v4.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v5.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v6.4s}, [%[prhs_ptr]], #16\n"
- "st1 {v7.4s}, [%[prhs_ptr]], #16\n"
-
- "subs %[nk], %[nk], #1\n"
- "add %[buffer], %[buffer], #16\n"
- "bne 0b\n"
- : [buffer] "+r"(buffer), [prhs_ptr] "+r"(prhs_ptr), [nk] "+r"(nk)
- : [_stride] "r"(_stride)
- : "cc", "memory", "x0", "v4", "v5", "v6", "v7", "v28", "v29", "v30", "v31");
-#else // __aarch64__
- asm volatile("0:\n"
- "mov r0, %[buffer]\n"
-
- "vld1.f32 {d8-d9}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d10-d11}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d12-d13}, [r0]\n"
- "add r0, r0, %[_stride]\n"
- "vld1.f32 {d14-d15}, [r0]\n"
-
- "vzip.32 q4, q6\n"
- "vzip.32 q5, q7\n"
- "vzip.32 q4, q5\n"
- "vzip.32 q6, q7\n"
-
- "vst1.f32 {d8-d9}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d10-d11}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d12-d13}, [%[prhs_ptr]]!\n"
- "vst1.f32 {d14-d15}, [%[prhs_ptr]]!\n"
-
- "subs %[nk], %[nk], #1\n"
- "add %[buffer], %[buffer], #16\n"
- "bne 0b\n"
- : [buffer] "+r"(buffer), [prhs_ptr] "+r"(prhs_ptr), [nk] "+r"(nk)
- : [_stride] "r"(_stride)
- : "cc", "memory", "r0", "q4", "q5", "q6", "q7");
-#endif // __aarch64__
- }
-
- for (int j = 0; j < rk; j++)
- {
- prhs_ptr[0] = buffer[0];
- prhs_ptr[1] = buffer[k];
- prhs_ptr[2] = buffer[k << 1];
- prhs_ptr[3] = buffer[3 * k];
- prhs_ptr += nr;
- buffer++;
- }
- break;
- default:
- break;
- }
-}
-
-void _pack_colmajor_notrans_lhs(const int mr, const int mb, const int kb, const int stride,
- const float *lhs_ptr, float *plhs_ptr)
-{
- _pack_rowmajor_notrans_rhs(mr, mb, kb, stride, lhs_ptr, plhs_ptr);
-}
-
-void _pack_colmajor_notrans_rhs(const int nr, const int nb, const int kb, const int stride,
- const float *rhs_ptr, float *prhs_ptr)
-{
- _pack_rowmajor_notrans_lhs(nr, nb, kb, stride, rhs_ptr, prhs_ptr);
-}
-
-void _pack_colmajor_trans_lhs(const int mr, const int mb, const int kb, const int stride,
- const float *lhs_ptr, float *plhs_ptr)
-{
- _pack_rowmajor_notrans_lhs(mr, mb, kb, stride, lhs_ptr, plhs_ptr);
-}
-
-void _pack_colmajor_trans_rhs(const int nr, const int nb, const int kb, const int stride,
- const float *rhs_ptr, float *prhs_ptr)
-{
- _pack_rowmajor_notrans_rhs(nr, nb, kb, stride, rhs_ptr, prhs_ptr);
-}
-
-void _pack_colmajor_image_rhs(const int nr, const int nb, const int kb, const int k0, const int n0,
- convMat_t *input, convMat_t *output, convParams_t *params,
- float *prhs_ptr)
-{
- const int w = input->w;
- const int h = input->h;
- const int c = input->c;
- const int outw = output->w;
- const int kernel_w = params->kernel_w;
- const int kernel_h = params->kernel_h;
- const int stride_w = params->stride_w;
- const int stride_h = params->stride_h;
- const int pad_w = params->pad_w;
- const int pad_h = params->pad_h;
- const float *input_data = input->data;
-
- int c0 = c - k0 % c;
- if (c0 > kb)
- c0 = kb;
- int nc = (kb - c0 + c - 1) / c;
- if (c0)
- nc++;
- const int cn = (kb - c0) % c;
-
- int seg0 = outw - n0 % outw;
- if (seg0 > nb)
- seg0 = nb;
- int rows = (nb - seg0 + outw - 1) / outw;
- if (seg0)
- rows++;
- const int segn = (nb - seg0) % outw;
-
- const int in_row0 = n0 / outw * stride_h;
- const int in_col0 = n0 % outw * stride_w;
-
- for (int i = 0; i < nc; i++)
- {
- const int channels = (i == 0 && c0 != 0) ? c0 : ((i == nc - 1 && cn != 0) ? cn : c);
- const int c1 = (i == 0) ? k0 % c : 0;
-
- float tmp_data[channels * nr];
- int nindex = 0;
- float *buffer = tmp_data;
- float *prhs_tmp = prhs_ptr;
-
- const int in_row1 = (k0 / c + i) / kernel_w % kernel_h * params->dilation_h + in_row0;
- const int in_col1 = (k0 / c + i) % kernel_w * params->dilation_w;
-
- int in_row = in_row1 - pad_h;
-
- for (int out_rows = rows; out_rows; out_rows--)
- {
- int cols = (out_rows != 1 || segn == 0) ? outw : segn;
- int in_col = in_col1 - pad_w;
- if (out_rows == rows)
- {
- cols = seg0;
- in_col += in_col0;
- }
- if ((unsigned int)in_row < (unsigned int)h)
- {
- for (int out_col = cols; out_col; out_col--)
- {
- if ((unsigned int)in_col < (unsigned int)w)
- {
- for (int j = c1; j < c1 + channels; j++)
- {
- *(buffer++) = input_data[(in_row * w + in_col) * c + j];
- }
- }
- else
- {
- for (int j = 0; j < channels; j++)
- {
- *(buffer++) = 0;
- }
- }
- in_col += stride_w;
-
- nindex++;
- if (nindex == nr)
- {
- nindex = 0;
- buffer = tmp_data;
- _pack_colmajor_image_rhs_sub(nr, channels, tmp_data, prhs_tmp);
- prhs_tmp += kb * nr;
- }
- }
- }
- else
- {
- for (int out_col = cols; out_col; out_col--)
- {
- for (int j = 0; j < channels; j++)
- {
- *(buffer++) = 0;
- }
- in_col += stride_w;
-
- nindex++;
- if (nindex == nr)
- {
- nindex = 0;
- buffer = tmp_data;
- _pack_colmajor_image_rhs_sub(nr, channels, tmp_data, prhs_tmp);
- prhs_tmp += kb * nr;
- }
- }
- }
-
- in_row += stride_h;
- }
-
- if (nindex > 0)
- {
- float *data = tmp_data;
- for (int i = 0; i < channels; i++)
- {
- for (int j = 0; j < nindex; j++)
- {
- prhs_tmp[j] = data[j * channels];
- }
- for (int j = nindex; j < nr; j++)
- {
- prhs_tmp[j] = 0.f;
- }
- prhs_tmp += nr;
- data++;
- }
- }
-
- prhs_ptr += channels * nr;
- }
-}
-
-void _pack_colmajor_image_rhs_batch(const int nr, const int nb, const int kb, const int k0,
- const int n0, convMat_t *input, convMat_t *output,
- convParams_t *params, float *prhs_ptr)
-{
- const int w = input->w;
- const int h = input->h;
- const int c = input->c;
- const int outw = output->w;
- const int kernel_w = params->kernel_w;
- const int kernel_h = params->kernel_h;
- const int stride_w = params->stride_w;
- const int stride_h = params->stride_h;
-
- int c0 = c - k0 % c;
- if (c0 > kb)
- c0 = kb;
- int nc = (kb - c0 + c - 1) / c;
- if (c0)
- nc++;
- const int cn = (kb - c0) % c;
-
- const int seg_size = output->w * output->h;
-
- const float *indata = input->data + (w * h * c) * (n0 / seg_size);
-
- int bseg0 = seg_size - n0 % seg_size;
- if (bseg0 > nb)
- bseg0 = nb;
- int bnseg = (nb - bseg0 + seg_size - 1) / seg_size;
- if (bseg0)
- bnseg++;
- const int bsegn = (nb - bseg0) % seg_size;
-
- for (int ll = 0; ll < nc; ll++)
- {
- const float *input_data = indata;
-
- const int channels = (ll == 0 && c0 != 0) ? c0 : ((ll == nc - 1 && cn != 0) ? cn : c);
- const int c1 = (ll == 0) ? k0 % c : 0;
-
- int nindex = 0;
- float *prhs_tmp = prhs_ptr;
- float tmp_data[channels * nr];
- float *buffer = tmp_data;
-
- for (int i = 0; i < bnseg; i++)
- {
- const int _nb =
- ((i == 0 && bseg0 != 0) ? bseg0 : ((i == bnseg - 1 && bsegn != 0) ? bsegn : seg_size));
- const int _n0 = (i == 0 ? n0 % seg_size : 0);
-
- int seg0 = outw - _n0 % outw;
- if (seg0 > _nb)
- seg0 = _nb;
- int rows = (_nb - seg0 + outw - 1) / outw;
- if (seg0)
- rows++;
- const int segn = (_nb - seg0) % outw;
-
- const int in_row0 = _n0 / outw * stride_h;
- const int in_col0 = _n0 % outw * stride_w;
-
- const int in_row1 = (k0 / c + ll) / kernel_w % kernel_h + in_row0;
- const int in_col1 = (k0 / c + ll) % kernel_w;
-
- int in_row = in_row1;
-
- for (int out_rows = rows; out_rows; out_rows--)
- {
- int cols = (out_rows != 1 || segn == 0) ? outw : segn;
- int in_col = in_col1;
- if (out_rows == rows)
- {
- cols = seg0;
- in_col += in_col0;
- }
- if ((unsigned int)in_row < (unsigned int)h)
- {
- for (int out_col = cols; out_col; out_col--)
- {
- if ((unsigned int)in_col < (unsigned int)w)
- {
- for (int j = c1; j < c1 + channels; j++)
- {
- *(buffer++) = input_data[(in_row * w + in_col) * c + j];
- }
- }
- else
- {
- for (int j = 0; j < channels; j++)
- {
- *(buffer++) = 0;
- }
- }
- in_col += stride_w;
-
- nindex++;
- if (nindex == nr)
- {
- nindex = 0;
- buffer = tmp_data;
- _pack_colmajor_image_rhs_sub(nr, channels, tmp_data, prhs_tmp);
- prhs_tmp += kb * nr;
- }
- }
- }
- else
- {
- for (int out_col = cols; out_col; out_col--)
- {
- for (int j = 0; j < channels; j++)
- {
- *(buffer++) = 0;
- }
- in_col += stride_w;
-
- nindex++;
- if (nindex == nr)
- {
- nindex = 0;
- buffer = tmp_data;
- _pack_colmajor_image_rhs_sub(nr, channels, tmp_data, prhs_tmp);
- prhs_tmp += kb * nr;
- }
- }
- }
-
- in_row += stride_h;
- }
-
- input_data += w * h * c;
- }
-
- if (nindex > 0)
- {
- float *data = tmp_data;
- for (int ii = 0; ii < channels; ii++)
- {
- for (int jj = 0; jj < nindex; jj++)
- {
- prhs_tmp[jj] = data[jj * channels];
- }
- for (int jj = nindex; jj < nr; jj++)
- {
- prhs_tmp[jj] = 0.f;
- }
- prhs_tmp += nr;
- data++;
- }
- }
-
- prhs_ptr += channels * nr;
- }
-}
-
-void _unpack_colmajor_image_res(const int mb, const int nb, const int m0, const int n0,
- convMat_t *input, convMat_t *output, convParams_t *params,
- float *pres_ptr)
-{
- const int w = input->w;
- const int outw = output->w;
- const int outh = output->h;
- const int outc = output->c;
- const int kernel_w = params->kernel_w;
- const int kernel_h = params->kernel_h;
- const int stride_w = params->stride_w;
- const int stride_h = params->stride_h;
- const int pad_w = params->pad_w;
- const int pad_h = params->pad_h;
- float *output_data = output->data;
-
- int c0 = outc - m0 % outc;
- if (c0 > mb)
- c0 = mb;
- int nc = (mb - c0 + outc - 1) / outc;
- if (c0)
- nc++;
- const int cn = (mb - c0) % outc;
-
- int seg0 = w - n0 % w;
- if (seg0 > nb)
- seg0 = nb;
- int rows = (nb - seg0 + w - 1) / w;
- if (seg0)
- rows++;
- const int segn = (nb - seg0) % w;
-
- const int out_row0 = n0 / w * stride_h;
- const int out_col0 = n0 % w * stride_w;
-
- for (int i = 0; i < nc; i++)
- {
- const int channels = (i == 0 && c0 != 0) ? c0 : ((i == nc - 1 && cn != 0) ? cn : outc);
- const int c1 = (i == 0) ? m0 % outc : 0;
-
- float *buffer = pres_ptr;
-
- const int out_row1 = (m0 / outc + i) / kernel_w % kernel_h * params->dilation_h + out_row0;
- const int out_col1 = (m0 / outc + i) % kernel_w * params->dilation_w;
-
- int out_row = out_row1 - pad_h;
-
- for (int in_rows = rows; in_rows; in_rows--)
- {
- int cols = (in_rows != 1 || segn == 0) ? w : segn;
- int out_col = out_col1 - pad_w;
- if (in_rows == rows)
- {
- cols = seg0;
- out_col += out_col0;
- }
- if ((unsigned int)out_row < (unsigned int)outh)
- {
- for (int in_col = cols; in_col; in_col--)
- {
- if ((unsigned int)out_col < (unsigned int)outw)
- {
- for (int j = c1; j < c1 + channels; j++)
- {
- // Note:Data competition for multi-threads
- //#pragma omp atomic //low performance
- output_data[(out_row * outw + out_col) * outc + j] += *(buffer + j - c1);
- }
- }
- buffer += mb;
- out_col += stride_w;
- }
- }
- else
- {
- buffer += cols * mb;
- }
- out_row += stride_h;
- }
-
- pres_ptr += channels;
- }
-}
-
-void _sparse_pack_rowmajor_image(const int nb, const int k0, const int n0, convMat_t *input,
- convMat_t *output, convParams_t *params, float *prhs_ptr)
-{
- const int w = input->w;
- const int h = input->h;
- const int outw = output->w;
- const int kernel_w = params->kernel_w;
- const int kernel_h = params->kernel_h;
- const int stride_w = params->stride_w;
- const int stride_h = params->stride_h;
- const int pad_w = params->pad_w;
- const int pad_h = params->pad_h;
-
- const int in_row0 = n0 / outw * stride_h;
- const int in_col0 = n0 % outw * stride_w;
- int seg0 = outw - n0 % outw;
- if (seg0 > nb)
- seg0 = nb;
- int rows = (nb - seg0 + outw - 1) / outw;
- if (seg0)
- rows++;
- const int segn = (nb - seg0) % outw;
-
- const int ic = k0 / (kernel_w * kernel_h);
- const int in_row1 = ((k0 / kernel_w) % kernel_h) * params->dilation_h + in_row0;
- const int in_col1 = k0 % kernel_w * params->dilation_w;
-
-#ifdef NCNN
- const float *input_data = input->data + ic * alignSize(w * h, 16 / sizeof(float));
-#else // NCNN
- const float *input_data = input->data + ic * w * h;
-#endif // NCNN
-
- int in_row = in_row1 - pad_h;
-
- for (int out_rows = rows; out_rows; out_rows--)
- {
- int cols = (out_rows != 1 || segn == 0) ? outw : segn;
- int in_col = in_col1 - pad_w;
- if (out_rows == rows)
- {
- cols = seg0;
- in_col += in_col0;
- }
- if ((unsigned int)in_row < (unsigned int)h)
- {
- for (int out_col = cols; out_col; out_col--)
- {
- if ((unsigned int)in_col < (unsigned int)w)
- *(prhs_ptr++) = input_data[in_row * w + in_col];
- else
- *(prhs_ptr++) = 0;
- in_col += stride_w;
- }
- }
- else
- {
- for (int out_col = cols; out_col; out_col--)
- {
- *(prhs_ptr++) = 0;
- in_col += stride_w;
- }
- }
-
- in_row += stride_h;
- }
-}
-
-} // namespace srcn
-} // namespace nnfw
diff --git a/compute/ncnn/src/srcn/sgemm_pack.h b/compute/ncnn/src/srcn/sgemm_pack.h
deleted file mode 100644
index d64843ebb..000000000
--- a/compute/ncnn/src/srcn/sgemm_pack.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_SRCN_SGEMM_PACK_H__
-#define __NNFW_SRCN_SGEMM_PACK_H__
-
-#include "ncnn/srcn/conv_type.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-void _pack_rowmajor_notrans_lhs(const int mr, const int mb, const int kb, const int stride,
- const float *lhs_ptr, float *plhs_ptr);
-void _pack_rowmajor_notrans_rhs(const int nr, const int nb, const int kb, const int stride,
- const float *rhs_ptr, float *prhs_ptr);
-void _pack_rowmajor_trans_lhs(const int mr, const int mb, const int kb, const int stride,
- const float *lhs_ptr, float *plhs_ptr);
-void _pack_rowmajor_trans_rhs(const int nr, const int nb, const int kb, const int stride,
- const float *rhs_ptr, float *prhs_ptr);
-void _pack_rowmajor_image_rhs(const int nr, const int nb, const int kb, const int k0, const int n0,
- convMat_t *input, convMat_t *output, convParams_t *params,
- float *prhs_ptr);
-void _pack_rowmajor_image_rhs_batch(const int nr, const int nb, const int kb, const int k0,
- const int n0, convMat_t *input, convMat_t *output,
- convParams_t *params, float *prhs_ptr);
-
-void _unpack_rowmajor_image_res(const int mb, const int nb, const int m0, const int n0,
- convMat_t *input, convMat_t *output, convParams_t *params,
- float *pres_ptr);
-
-void _pack_colmajor_notrans_lhs(const int mr, const int mb, const int kb, const int stride,
- const float *lhs_ptr, float *plhs_ptr);
-void _pack_colmajor_notrans_rhs(const int nr, const int nb, const int kb, const int stride,
- const float *rhs_ptr, float *prhs_ptr);
-void _pack_colmajor_trans_lhs(const int mr, const int mb, const int kb, const int stride,
- const float *lhs_ptr, float *plhs_ptr);
-void _pack_colmajor_trans_rhs(const int nr, const int nb, const int kb, const int stride,
- const float *rhs_ptr, float *prhs_ptr);
-
-void _pack_colmajor_image_rhs(const int nr, const int nb, const int kb, const int k0, const int n0,
- convMat_t *input, convMat_t *output, convParams_t *params,
- float *prhs_ptr);
-
-void _pack_colmajor_image_rhs_batch(const int nr, const int nb, const int kb, const int k0,
- const int n0, convMat_t *input, convMat_t *output,
- convParams_t *params, float *prhs_ptr);
-
-void _unpack_colmajor_image_res(const int mb, const int nb, const int m0, const int n0,
- convMat_t *input, convMat_t *output, convParams_t *params,
- float *pres_ptr);
-
-void _sparse_pack_rowmajor_image(const int nb, const int k0, const int n0, convMat_t *input,
- convMat_t *output, convParams_t *params, float *prhs_ptr);
-
-} // namespace srcn
-} // namespace nnfw
-
-#endif // __NNFW_SRCN_SGEMM_PACK_H__
diff --git a/compute/ncnn/src/srcn/sgemm_singlethread.cc b/compute/ncnn/src/srcn/sgemm_singlethread.cc
deleted file mode 100644
index 3de3e1214..000000000
--- a/compute/ncnn/src/srcn/sgemm_singlethread.cc
+++ /dev/null
@@ -1,689 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <stdexcept>
-
-#include "common.h"
-#include "sgemm_kernel.h"
-#include "sgemm_pack.h"
-#include "sgemm_singlethread.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-void sgemm_singlethread::param_init()
-{
- if (n_ >= m_)
- {
- shard_type_ = shardByRow;
- }
- else
- {
- shard_type_ = shardByCol;
- }
-
-#if __aarch64__
- if (major_type_ == rowMajor)
- {
- if (shard_type_ == shardByRow)
- {
- mr_ = 8;
- nr_ = 12;
- }
- else
- {
- mr_ = 12;
- nr_ = 8;
- }
- }
- else if (major_type_ == colMajor)
- {
- mr_ = 12;
- nr_ = 8;
- }
-#else // __aarch64__
- if (major_type_ == rowMajor)
- {
- // it is a bug, but i do not know why as now.
- if (ltrans_ == notrans && rtrans_ == trans)
- {
- mr_ = 4;
- nr_ = 12;
- }
- else
- {
- mr_ = 6;
- nr_ = 8;
- }
- }
- else if (major_type_ == colMajor)
- {
- mr_ = 8;
- nr_ = 6;
- }
-#endif // __aarch64__
-
- int k_div = (nr_ * sizeof_RhsScalar);
- int k_sub = (mr_ * nr_ * sizeof_ResScalar);
-
- int gen_col = GEN_COL / cache_div_;
- int min_k = MAX_K / cache_div_;
-
- const int k_cache = MIN(divup((int)(L1_CACHE_SIZE - k_sub), (int)k_div), min_k);
- bk_ = MIN(k_cache, k_);
-
- if (shard_type_ == shardByCol)
- {
- int m_sub = (bk_ * nr_ * sizeof_RhsScalar);
- int m_div = (sizeof_LhsScalar * bk_ * 2 * cache_div_);
- if (L3_CACHE_SIZE)
- m_div = (sizeof_LhsScalar * bk_ * 2);
- int m_cache = divup((L2_CACHE_SIZE - m_sub), m_div);
- bm_ = MIN(m_cache, m_);
-
- bn_ = MIN(gen_col, n_);
- if (L3_CACHE_SIZE)
- {
- int n_sub = (bk_ * bm_ * sizeof_RhsScalar);
- int n_cache = divup((L3_CACHE_SIZE - n_sub), (sizeof_LhsScalar * bk_ * 2));
- bn_ = MIN(n_cache, bn_);
- }
- }
- else
- {
- int n_sub = (bk_ * mr_ * sizeof_RhsScalar);
- int n_div = (sizeof_LhsScalar * bk_ * 2 * cache_div_);
- if (L3_CACHE_SIZE)
- n_div = (sizeof_LhsScalar * bk_ * 2);
- int n_cache = divup((L2_CACHE_SIZE - n_sub), n_div);
- bn_ = MIN(n_cache, n_);
-
- bm_ = MIN(gen_col, m_);
- if (L3_CACHE_SIZE)
- {
- int m_sub = (bk_ * bn_ * sizeof_RhsScalar);
- int m_cache = divup((L3_CACHE_SIZE - m_sub), (sizeof_LhsScalar * bk_ * 2));
- bm_ = MIN(m_cache, bm_);
- }
- }
-
- nm_ = divup(m_, bm_);
- nn_ = divup(n_, bn_);
- nk_ = divup(k_, bk_);
-
- rm_ = m_ % bm_;
- rn_ = n_ % bn_;
- rk_ = k_ % bk_;
-}
-
-sgemm_singlethread::sgemm_singlethread(sgemmType_t major_type, sgemmTrans_t ltrans,
- sgemmTrans_t rtrans, const int m, const int n, const int k,
- const float *lhs_data, const float *rhs_data,
- float *res_data, int cache_div)
- : lhs_data_(lhs_data), rhs_data_(rhs_data), res_data_(res_data), major_type_(major_type),
- ltrans_(ltrans), rtrans_(rtrans), m_(m), n_(n), k_(k), cache_div_(cache_div)
-{
- param_init();
-}
-
-sgemm_singlethread::~sgemm_singlethread() {}
-
-void sgemm_singlethread::run()
-{
- if (major_type_ == rowMajor)
- {
- if (ltrans_ == notrans && rtrans_ == notrans)
- {
- compute_rowmajor_nn();
- }
- else if (ltrans_ == notrans && rtrans_ == trans)
- {
- compute_rowmajor_nt();
- }
- else if (ltrans_ == trans && rtrans_ == notrans)
- {
- compute_rowmajor_tn();
- }
- else if (ltrans_ == trans && rtrans_ == trans)
- {
- compute_rowmajor_tt();
- }
- else
- {
- throw std::runtime_error{"error trans type."};
- }
- }
- else if (major_type_ == colMajor)
- {
- if (ltrans_ == notrans && rtrans_ == notrans)
- {
- compute_colmajor_nn();
- }
- else if (ltrans_ == notrans && rtrans_ == trans)
- {
- compute_colmajor_nt();
- }
- else if (ltrans_ == trans && rtrans_ == notrans)
- {
- compute_colmajor_tn();
- }
- else if (ltrans_ == trans && rtrans_ == trans)
- {
- compute_colmajor_tt();
- }
- else
- {
- throw std::runtime_error{"error trans type."};
- }
- }
- else
- {
- throw std::runtime_error{"error major type."};
- }
-}
-
-void sgemm_singlethread::compute_rowmajor_nn()
-{
- int mstride = (bm_ + mr_ - 1) / mr_ * mr_;
- int nstride = (bn_ + nr_ - 1) / nr_ * nr_;
-
- float plhs_ptr[mstride * bk_];
- float prhs_ptr[nstride * bk_];
-
- if (shard_type_ == shardByCol)
- {
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- _pack_rowmajor_notrans_rhs(nr_, bn, bk, n_, &rhs_data_[l * bk_ * n_ + j * bn_], prhs_ptr);
-
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _pack_rowmajor_notrans_lhs(mr_, bm, bk, k_, &lhs_data_[i * bm_ * k_ + l * bk_], plhs_ptr);
-
- _sgemm_rowmajor_macro_kernel_divnm(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &res_data_[i * bm_ * n_ + j * bn_], l, n_, bk);
- }
- }
- }
- }
- else if (shard_type_ == shardByRow)
- {
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- _pack_rowmajor_notrans_lhs(mr_, bm, bk, k_, &lhs_data_[i * bm_ * k_ + l * bk_], plhs_ptr);
-
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
-
- _pack_rowmajor_notrans_rhs(nr_, bn, bk, n_, &rhs_data_[l * bk_ * n_ + j * bn_], prhs_ptr);
-
- _sgemm_rowmajor_macro_kernel_divmn(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &res_data_[i * bm_ * n_ + j * bn_], l, n_, bk);
- }
- }
- }
- }
- else
- {
- throw std::runtime_error{"error shard type."};
- }
-}
-
-void sgemm_singlethread::compute_rowmajor_nt()
-{
- int mstride = (bm_ + mr_ - 1) / mr_ * mr_;
- int nstride = (bn_ + nr_ - 1) / nr_ * nr_;
-
- float plhs_ptr[mstride * bk_];
- float prhs_ptr[nstride * bk_];
-
- if (shard_type_ == shardByCol)
- {
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- _pack_rowmajor_trans_rhs(nr_, bn, bk, k_, &rhs_data_[j * bn_ * k_ + l * bk_], prhs_ptr);
-
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _pack_rowmajor_notrans_lhs(mr_, bm, bk, k_, &lhs_data_[i * bm_ * k_ + l * bk_], plhs_ptr);
-
- _sgemm_rowmajor_macro_kernel_divnm(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &res_data_[i * bm_ * n_ + j * bn_], l, n_, bk);
- }
- }
- }
- }
- else if (shard_type_ == shardByRow)
- {
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- _pack_rowmajor_notrans_lhs(mr_, bm, bk, k_, &lhs_data_[i * bm_ * k_ + l * bk_], plhs_ptr);
-
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
-
- _pack_rowmajor_trans_rhs(nr_, bn, bk, k_, &rhs_data_[j * bn_ * k_ + l * bk_], prhs_ptr);
-
- _sgemm_rowmajor_macro_kernel_divmn(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &res_data_[i * bm_ * n_ + j * bn_], l, n_, bk);
- }
- }
- }
- }
- else
- {
- throw std::runtime_error{"error shard type."};
- }
-}
-
-void sgemm_singlethread::compute_rowmajor_tn()
-{
- int mstride = (bm_ + mr_ - 1) / mr_ * mr_;
- int nstride = (bn_ + nr_ - 1) / nr_ * nr_;
-
- float plhs_ptr[mstride * bk_];
- float prhs_ptr[nstride * bk_];
-
- if (shard_type_ == shardByCol)
- {
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- _pack_rowmajor_notrans_rhs(nr_, bn, bk, n_, &rhs_data_[l * bk_ * n_ + j * bn_], prhs_ptr);
-
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _pack_rowmajor_trans_lhs(mr_, bm, bk, m_, &lhs_data_[l * bk_ * m_ + i * bm_], plhs_ptr);
-
- _sgemm_rowmajor_macro_kernel_divnm(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &res_data_[i * bm_ * n_ + j * bn_], l, n_, bk);
- }
- }
- }
- }
- else if (shard_type_ == shardByRow)
- {
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- _pack_rowmajor_trans_lhs(mr_, bm, bk, m_, &lhs_data_[l * bk_ * m_ + i * bm_], plhs_ptr);
-
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
-
- _pack_rowmajor_notrans_rhs(nr_, bn, bk, n_, &rhs_data_[l * bk_ * n_ + j * bn_], prhs_ptr);
-
- _sgemm_rowmajor_macro_kernel_divmn(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &res_data_[i * bm_ * n_ + j * bn_], l, n_, bk);
- }
- }
- }
- }
- else
- {
- throw std::runtime_error{"error shard type."};
- }
-}
-
-void sgemm_singlethread::compute_rowmajor_tt()
-{
- int mstride = (bm_ + mr_ - 1) / mr_ * mr_;
- int nstride = (bn_ + nr_ - 1) / nr_ * nr_;
-
- float plhs_ptr[mstride * bk_];
- float prhs_ptr[nstride * bk_];
-
- if (shard_type_ == shardByCol)
- {
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- _pack_rowmajor_trans_rhs(nr_, bn, bk, k_, &rhs_data_[j * bn_ * k_ + l * bk_], prhs_ptr);
-
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _pack_rowmajor_trans_lhs(mr_, bm, bk, m_, &lhs_data_[l * bk_ * m_ + i * bm_], plhs_ptr);
-
- _sgemm_rowmajor_macro_kernel_divnm(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &res_data_[i * bm_ * n_ + j * bn_], l, n_, bk);
- }
- }
- }
- }
- else if (shard_type_ == shardByRow)
- {
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- _pack_rowmajor_trans_lhs(mr_, bm, bk, m_, &lhs_data_[l * bk_ * m_ + i * bm_], plhs_ptr);
-
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
-
- _pack_rowmajor_trans_rhs(nr_, bn, bk, k_, &rhs_data_[j * bn_ * k_ + l * bk_], prhs_ptr);
-
- _sgemm_rowmajor_macro_kernel_divmn(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &res_data_[i * bm_ * n_ + j * bn_], l, n_, bk);
- }
- }
- }
- }
- else
- {
- throw std::runtime_error{"error shard type."};
- }
-}
-
-void sgemm_singlethread::compute_colmajor_nn()
-{
- int mstride = (bm_ + mr_ - 1) / mr_ * mr_;
- int nstride = (bn_ + nr_ - 1) / nr_ * nr_;
-
- float plhs_ptr[mstride * bk_];
- float prhs_ptr[nstride * bk_];
-
- if (shard_type_ == shardByCol)
- {
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- _pack_colmajor_notrans_rhs(nr_, bn, bk, k_, &rhs_data_[j * bn_ * k_ + l * bk_], prhs_ptr);
-
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _pack_colmajor_notrans_lhs(mr_, bm, bk, m_, &lhs_data_[l * bk_ * m_ + i * bm_], plhs_ptr);
-
- _sgemm_colmajor_macro_kernel_divnm(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &res_data_[j * bn_ * m_ + i * bm_], l, m_, bk);
- }
- }
- }
- }
- else if (shard_type_ == shardByRow)
- {
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- _pack_colmajor_notrans_lhs(mr_, bm, bk, m_, &lhs_data_[l * bk_ * m_ + i * bm_], plhs_ptr);
-
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
-
- _pack_colmajor_notrans_rhs(nr_, bn, bk, k_, &rhs_data_[j * bn_ * k_ + l * bk_], prhs_ptr);
-
- _sgemm_colmajor_macro_kernel_divmn(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &res_data_[j * bn_ * m_ + i * bm_], l, m_, bk);
- }
- }
- }
- }
- else
- {
- throw std::runtime_error{"error shard type."};
- }
-}
-
-void sgemm_singlethread::compute_colmajor_nt()
-{
- int mstride = (bm_ + mr_ - 1) / mr_ * mr_;
- int nstride = (bn_ + nr_ - 1) / nr_ * nr_;
-
- float plhs_ptr[mstride * bk_];
- float prhs_ptr[nstride * bk_];
-
- if (shard_type_ == shardByCol)
- {
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- _pack_colmajor_trans_rhs(nr_, bn, bk, n_, &rhs_data_[l * bk_ * n_ + j * bn_], prhs_ptr);
-
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _pack_colmajor_notrans_lhs(mr_, bm, bk, m_, &lhs_data_[l * bk_ * m_ + i * bm_], plhs_ptr);
-
- _sgemm_colmajor_macro_kernel_divnm(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &res_data_[j * bn_ * m_ + i * bm_], l, m_, bk);
- }
- }
- }
- }
- else if (shard_type_ == shardByRow)
- {
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- _pack_colmajor_notrans_lhs(mr_, bm, bk, m_, &lhs_data_[l * bk_ * m_ + i * bm_], plhs_ptr);
-
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
-
- _pack_colmajor_trans_rhs(nr_, bn, bk, n_, &rhs_data_[l * bk_ * n_ + j * bn_], prhs_ptr);
-
- _sgemm_colmajor_macro_kernel_divmn(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &res_data_[j * bn_ * m_ + i * bm_], l, m_, bk);
- }
- }
- }
- }
- else
- {
- throw std::runtime_error{"error shard type."};
- }
-}
-
-void sgemm_singlethread::compute_colmajor_tn()
-{
- int mstride = (bm_ + mr_ - 1) / mr_ * mr_;
- int nstride = (bn_ + nr_ - 1) / nr_ * nr_;
-
- float plhs_ptr[mstride * bk_];
- float prhs_ptr[nstride * bk_];
-
- if (shard_type_ == shardByCol)
- {
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- _pack_colmajor_notrans_rhs(nr_, bn, bk, k_, &rhs_data_[j * bn_ * k_ + l * bk_], prhs_ptr);
-
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _pack_colmajor_trans_lhs(mr_, bm, bk, k_, &lhs_data_[i * bm_ * k_ + l * bk_], plhs_ptr);
-
- _sgemm_colmajor_macro_kernel_divnm(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &res_data_[j * bn_ * m_ + i * bm_], l, m_, bk);
- }
- }
- }
- }
- else if (shard_type_ == shardByRow)
- {
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- _pack_colmajor_trans_lhs(mr_, bm, bk, k_, &lhs_data_[i * bm_ * k_ + l * bk_], plhs_ptr);
-
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
-
- _pack_colmajor_notrans_rhs(nr_, bn, bk, k_, &rhs_data_[j * bn_ * k_ + l * bk_], prhs_ptr);
-
- _sgemm_colmajor_macro_kernel_divmn(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &res_data_[j * bn_ * m_ + i * bm_], l, m_, bk);
- }
- }
- }
- }
- else
- {
- throw std::runtime_error{"error shard type."};
- }
-}
-
-void sgemm_singlethread::compute_colmajor_tt()
-{
- int mstride = (bm_ + mr_ - 1) / mr_ * mr_;
- int nstride = (bn_ + nr_ - 1) / nr_ * nr_;
-
- float plhs_ptr[mstride * bk_];
- float prhs_ptr[nstride * bk_];
-
- if (shard_type_ == shardByCol)
- {
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- _pack_colmajor_trans_rhs(nr_, bn, bk, n_, &rhs_data_[l * bk_ * n_ + j * bn_], prhs_ptr);
-
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- _pack_colmajor_trans_lhs(mr_, bm, bk, k_, &lhs_data_[i * bm_ * k_ + l * bk_], plhs_ptr);
-
- _sgemm_colmajor_macro_kernel_divnm(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &res_data_[j * bn_ * m_ + i * bm_], l, m_, bk);
- }
- }
- }
- }
- else if (shard_type_ == shardByRow)
- {
- for (int i = 0; i < nm_; i++)
- {
- const int bm = (i != nm_ - 1 || rm_ == 0) ? bm_ : rm_;
-
- for (int l = 0; l < nk_; l++)
- {
- const int bk = (l != nk_ - 1 || rk_ == 0) ? bk_ : rk_;
-
- _pack_colmajor_trans_lhs(mr_, bm, bk, k_, &lhs_data_[i * bm_ * k_ + l * bk_], plhs_ptr);
-
- for (int j = 0; j < nn_; j++)
- {
- const int bn = (j != nn_ - 1 || rn_ == 0) ? bn_ : rn_;
-
- _pack_colmajor_trans_rhs(nr_, bn, bk, n_, &rhs_data_[l * bk_ * n_ + j * bn_], prhs_ptr);
-
- _sgemm_colmajor_macro_kernel_divmn(mr_, nr_, bm, bn, bk, plhs_ptr, prhs_ptr,
- &res_data_[j * bn_ * m_ + i * bm_], l, m_, bk);
- }
- }
- }
- }
- else
- {
- throw std::runtime_error{"error shard type."};
- }
-}
-
-} // namespace srcn
-} // namespace nnfw
diff --git a/compute/ncnn/src/srcn/sgemm_singlethread.h b/compute/ncnn/src/srcn/sgemm_singlethread.h
deleted file mode 100644
index 47954e028..000000000
--- a/compute/ncnn/src/srcn/sgemm_singlethread.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_SRCN_SGEMM_SINGLETHREAD_H__
-#define __NNFW_SRCN_SGEMM_SINGLETHREAD_H__
-
-#include "common.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-typedef enum { rowMajor = 0, colMajor } sgemmType_t;
-
-typedef enum { trans = 0, notrans } sgemmTrans_t;
-
-class sgemm_singlethread
-{
-public:
- sgemm_singlethread(sgemmType_t major_type, sgemmTrans_t ltrans, sgemmTrans_t rtrans, const int m,
- const int n, const int k, const float *lhs_data, const float *rhs_data,
- float *res_data, int cache_div);
- ~sgemm_singlethread();
-
- void run();
-
-private:
- void param_init();
-
- void compute_rowmajor_nn();
- void compute_rowmajor_nt();
- void compute_rowmajor_tn();
- void compute_rowmajor_tt();
-
- void compute_colmajor_nn();
- void compute_colmajor_nt();
- void compute_colmajor_tn();
- void compute_colmajor_tt();
-
- const float *lhs_data_;
- const float *rhs_data_;
- float *res_data_;
-
- sgemmType_t major_type_;
- sgemmTrans_t ltrans_;
- sgemmTrans_t rtrans_;
-
- int m_;
- int n_;
- int k_;
-
- int bm_;
- int bn_;
- int bk_;
-
- int rm_;
- int rn_;
- int rk_;
-
- int nm_;
- int nn_;
- int nk_;
-
- int mr_;
- int nr_;
-
- shardType_t shard_type_;
- int cache_div_;
-};
-
-} // namespace srcn
-} // namespace nnfw
-
-#endif // __NNFW_SRCN_SGEMM_SINGLETHREAD_H__
diff --git a/compute/ncnn/src/srcn/sgemm_test.cc b/compute/ncnn/src/srcn/sgemm_test.cc
deleted file mode 100644
index 1b10970bb..000000000
--- a/compute/ncnn/src/srcn/sgemm_test.cc
+++ /dev/null
@@ -1,1883 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/time.h>
-#include <unistd.h>
-
-#include "ncnn/srcn/conv_type.h"
-#include "srcn/srcn_conv.h"
-//#include "srcn_sgemm.h"
-#include "conv_sgemm_singlethread.h"
-#include "conv_sgemm_multithreads.h"
-//#include "conv_sgemm_batch.h"
-#include "sgemm_singlethread.h"
-#include "conv_winograd.h"
-#include "winograd.h"
-
-//#include "conv_gpu.h"
-//#include "convolutiondepthwise_3x3.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-static void direct_conv_rowmajor(convMat_t *input, convMat_t *output, convMat_t *filter,
- convParams_t *params)
-{
- const int w = input->w;
- const int h = input->h;
- const int inch = input->c;
- const int outw = output->w;
- const int outh = output->h;
- const int outch = output->c;
- const int kernel_w = params->kernel_w;
- const int kernel_h = params->kernel_h;
- const int stride_w = params->stride_w;
- const int stride_h = params->stride_h;
- const int pad_w = params->pad_w;
- const int pad_h = params->pad_h;
- const int dilation_w = params->dilation_w;
- const int dilation_h = params->dilation_h;
- const float *input_data = input->data;
- const float *filter_data = filter->data;
- float *output_data = output->data;
-
- for (int out_c = 0; out_c < outch; out_c++)
- {
- for (int out_row = 0; out_row < outh; out_row++)
- {
- for (int out_col = 0; out_col < outw; out_col++)
- {
- const int in_col0 = (out_col * stride_w) - pad_w;
- const int in_row0 = (out_row * stride_h) - pad_h;
- float sum = 0.f;
- for (int in_c = 0; in_c < inch; in_c++)
- {
- for (int filter_y = 0; filter_y < kernel_h; filter_y++)
- {
- for (int filter_x = 0; filter_x < kernel_w; filter_x++)
- {
- const int in_col = in_col0 + filter_x * dilation_w;
- const int in_row = in_row0 + filter_y * dilation_h;
-
- if (((unsigned int)in_col < (unsigned int)w) &&
- ((unsigned int)in_row < (unsigned int)h))
- {
- float input_value = input_data[(in_c * h + in_row) * w + in_col];
- float filter_value =
- filter_data[((out_c * inch + in_c) * kernel_h + filter_y) * kernel_w +
- filter_x];
- sum += (input_value * filter_value);
- }
- }
- }
- }
- output_data[(out_c * outh + out_row) * outw + out_col] = sum;
- }
- }
- }
-}
-
-static void direct_deconv_rowmajor(convMat_t *input, convMat_t *output, convMat_t *filter,
- convParams_t *params)
-{
- const int w = input->w;
- const int h = input->h;
- const int inch = input->c;
- const int outw = output->w;
- const int outh = output->h;
- const int outch = output->c;
- const int kernel_w = params->kernel_w;
- const int kernel_h = params->kernel_h;
- const int stride_w = params->stride_w;
- const int stride_h = params->stride_h;
- const int pad_w = params->pad_w;
- const int pad_h = params->pad_h;
- const int dilation_w = params->dilation_w;
- const int dilation_h = params->dilation_h;
- const float *input_data = input->data;
- const float *filter_data = filter->data;
- float *output_data = output->data;
-
- for (int i = 0; i < outw * outh * outch; i++)
- {
- output_data[i] = 0;
- }
-
- for (int in_c = 0; in_c < inch; in_c++)
- {
- for (int in_row = 0; in_row < h; in_row++)
- {
- for (int in_col = 0; in_col < w; in_col++)
- {
- const int out_col0 = (in_col * stride_w) - pad_w;
- const int out_row0 = (in_row * stride_h) - pad_h;
- float in_value = input_data[(in_c * h + in_row) * w + in_col];
- for (int out_c = 0; out_c < outch; out_c++)
- {
- for (int filter_y = 0; filter_y < kernel_h; filter_y++)
- {
- for (int filter_x = 0; filter_x < kernel_w; filter_x++)
- {
- const int out_col = out_col0 + filter_x * dilation_w;
- const int out_row = out_row0 + filter_y * dilation_h;
-
- if (((unsigned int)out_col < (unsigned int)outw) &&
- ((unsigned int)out_row < (unsigned int)outh))
- {
- float filter_value =
- filter_data[((in_c * outch + out_c) * kernel_h + filter_y) * kernel_w +
- filter_x];
- output_data[(out_c * outh + out_row) * outw + out_col] += filter_value * in_value;
- }
- }
- }
- }
- }
- }
- }
-}
-
-static void direct_sgemm_rowmajor(int Atrans, int Btrans, int m, int n, int k, float *A, float *B,
- float *C)
-{
- float *aa, *bb;
-
- if (Atrans == trans)
- {
- aa = (float *)malloc(m * k * sizeof(float));
- if (!aa)
- return;
-
- for (int i = 0; i < k; i++)
- {
- for (int j = 0; j < m; j++)
- {
- aa[j * k + i] = A[i * m + j];
- }
- }
- }
- else
- {
- aa = A;
- }
-
- if (Btrans == trans)
- {
- bb = (float *)malloc(n * k * sizeof(float));
- if (!bb)
- return;
-
- for (int i = 0; i < n; i++)
- {
- for (int j = 0; j < k; j++)
- {
- bb[j * n + i] = B[i * k + j];
- }
- }
- }
- else
- {
- bb = B;
- }
-
- for (int i = 0; i < m; i++)
- {
- for (int j = 0; j < n; j++)
- {
- float res = 0.f;
- for (int l = 0; l < k; l++)
- {
- res += aa[i * k + l] * bb[l * n + j];
- }
- C[i * n + j] = res;
- }
- }
-}
-
-/*static void direct_sgemm_kernel(const int k, const int lhs_stride, const int rhs_stride, const int
-res_stride,
- const float *lhs_ptr, const float *rhs_ptr, float *res_ptr)
-{
- int lstride = lhs_stride << 2;
- int rstride = rhs_stride << 2;
- int estride = res_stride << 2;
- int rstep = rstride << 2;
-
- int nk = (k >> 2) - 1;
-
- __asm __volatile (
- "movi v16.4s, #0x0\n"
- "movi v17.4s, #0x0\n"
- "movi v18.4s, #0x0\n"
- "movi v19.4s, #0x0\n"
- "movi v20.4s, #0x0\n"
- "movi v21.4s, #0x0\n"
- "movi v22.4s, #0x0\n"
- "movi v23.4s, #0x0\n"
- "movi v24.4s, #0x0\n"
- "movi v25.4s, #0x0\n"
- "movi v26.4s, #0x0\n"
- "movi v27.4s, #0x0\n"
- "movi v28.4s, #0x0\n"
- "movi v29.4s, #0x0\n"
- "movi v30.4s, #0x0\n"
- "movi v31.4s, #0x0\n"
-
- "mov x0, %[lhs_ptr]\n"
- "add %[lhs_ptr], %[lhs_ptr], #16\n"
- "ld1 {v0.4s}, [x0]\n"
- "add x0, x0, %[lstride]\n"
- "ld1 {v1.4s}, [x0]\n"
- "add x0, x0, %[lstride]\n"
- "ld1 {v2.4s}, [x0]\n"
- "add x0, x0, %[lstride]\n"
- "ld1 {v3.4s}, [x0]\n"
- "add x0, x0, %[lstride]\n"
-
- "mov x1, %[rhs_ptr]\n"
- "add %[rhs_ptr], %[rhs_ptr], %[rstep]\n"
- "ld1 {v8.4s, v9.4s}, [x1]\n"
- "add x1, x1, %[rstride]\n"
- "ld1 {v10.4s, v11.4s}, [x1]\n"
- "add x1, x1, %[rstride]\n"
-
- "1:\n"
- "fmla v16.4s, v8.4s, v0.s[0]\n"
- "fmla v17.4s, v9.4s, v0.s[0]\n"
- "fmla v16.4s, v10.4s, v0.s[1]\n"
- "fmla v17.4s, v11.4s, v0.s[1]\n"
- "fmla v18.4s, v8.4s, v1.s[0]\n"
- "fmla v19.4s, v9.4s, v1.s[0]\n"
- "fmla v18.4s, v10.4s, v1.s[1]\n"
- "fmla v19.4s, v11.4s, v1.s[1]\n"
- "ld1 {v12.4s, v13.4s}, [x1]\n"
- "fmla v20.4s, v8.4s, v2.s[0]\n"
- "add x1, x1, %[rstride]\n"
- "fmla v21.4s, v9.4s, v2.s[0]\n"
- "ld1 {v14.4s, v15.4s}, [x1]\n"
- "fmla v20.4s, v10.4s, v2.s[1]\n"
- "add x1, x1, %[rstride]\n"
- "fmla v21.4s, v11.4s, v2.s[1]\n"
- "fmla v22.4s, v8.4s, v3.s[0]\n"
- "fmla v23.4s, v9.4s, v3.s[0]\n"
- "fmla v22.4s, v10.4s, v3.s[1]\n"
- "fmla v23.4s, v11.4s, v3.s[1]\n"
-
- "ld1 {v4.4s}, [x0]\n"
- "fmla v16.4s, v12.4s, v0.s[2]\n"
- "add x0, x0, %[lstride]\n"
- "fmla v17.4s, v13.4s, v0.s[2]\n"
- "ld1 {v5.4s}, [x0]\n"
- "fmla v16.4s, v14.4s, v0.s[3]\n"
- "add x0, x0, %[lstride]\n"
- "fmla v17.4s, v15.4s, v0.s[3]\n"
- "ld1 {v6.4s}, [x0]\n"
- "fmla v18.4s, v12.4s, v1.s[2]\n"
- "add x0, x0, %[lstride]\n"
- "fmla v19.4s, v13.4s, v1.s[2]\n"
- "ld1 {v7.4s}, [x0]\n"
- "fmla v18.4s, v14.4s, v1.s[3]\n"
- "add x0, x0, %[lstride]\n"
- "fmla v19.4s, v15.4s, v1.s[3]\n"
- "fmla v20.4s, v12.4s, v2.s[2]\n"
- "fmla v21.4s, v13.4s, v2.s[2]\n"
- "fmla v20.4s, v14.4s, v2.s[3]\n"
- "fmla v21.4s, v15.4s, v2.s[3]\n"
- "fmla v22.4s, v12.4s, v3.s[2]\n"
- "fmla v23.4s, v13.4s, v3.s[2]\n"
- "fmla v22.4s, v14.4s, v3.s[3]\n"
- "fmla v23.4s, v15.4s, v3.s[3]\n"
-
- "mov x0, %[lhs_ptr]\n"
- "add %[lhs_ptr], %[lhs_ptr], #16\n"
-
- "fmla v24.4s, v8.4s, v4.s[0]\n"
- "fmla v25.4s, v9.4s, v4.s[0]\n"
- "ld1 {v0.4s}, [x0]\n"
- "fmla v24.4s, v10.4s, v4.s[1]\n"
- "add x0, x0, %[lstride]\n"
- "fmla v25.4s, v11.4s, v4.s[1]\n"
- "ld1 {v1.4s}, [x0]\n"
- "fmla v26.4s, v8.4s, v5.s[0]\n"
- "add x0, x0, %[lstride]\n"
- "fmla v27.4s, v9.4s, v5.s[0]\n"
- "ld1 {v2.4s}, [x0]\n"
- "fmla v26.4s, v10.4s, v5.s[1]\n"
- "add x0, x0, %[lstride]\n"
- "fmla v27.4s, v11.4s, v5.s[1]\n"
- "ld1 {v3.4s}, [x0]\n"
- "fmla v28.4s, v8.4s, v6.s[0]\n"
- "add x0, x0, %[lstride]\n"
- "fmla v29.4s, v9.4s, v6.s[0]\n"
- "fmla v28.4s, v10.4s, v6.s[1]\n"
- "fmla v29.4s, v11.4s, v6.s[1]\n"
- "fmla v30.4s, v8.4s, v7.s[0]\n"
- "fmla v31.4s, v9.4s, v7.s[0]\n"
- "fmla v30.4s, v10.4s, v7.s[1]\n"
- "fmla v31.4s, v11.4s, v7.s[1]\n"
-
- "mov x1, %[rhs_ptr]\n"
- "add %[rhs_ptr], %[rhs_ptr], %[rstep]\n"
-
- "fmla v24.4s, v12.4s, v4.s[2]\n"
- "fmla v25.4s, v13.4s, v4.s[2]\n"
- "ld1 {v8.4s, v9.4s}, [x1]\n"
- "fmla v24.4s, v14.4s, v4.s[3]\n"
- "add x1, x1, %[rstride]\n"
- "fmla v25.4s, v15.4s, v4.s[3]\n"
- "ld1 {v10.4s, v11.4s}, [x1]\n"
- "fmla v26.4s, v12.4s, v5.s[2]\n"
- "add x1, x1, %[rstride]\n"
- "fmla v27.4s, v13.4s, v5.s[2]\n"
- "fmla v26.4s, v14.4s, v5.s[3]\n"
- "fmla v27.4s, v15.4s, v5.s[3]\n"
- "fmla v28.4s, v12.4s, v6.s[2]\n"
- "fmla v29.4s, v13.4s, v6.s[2]\n"
- "fmla v28.4s, v14.4s, v6.s[3]\n"
- "fmla v29.4s, v15.4s, v6.s[3]\n"
- "fmla v30.4s, v12.4s, v7.s[2]\n"
- "fmla v31.4s, v13.4s, v7.s[2]\n"
- "subs %w[nk], %w[nk], #1\n"
- "fmla v30.4s, v14.4s, v7.s[3]\n"
- "fmla v31.4s, v15.4s, v7.s[3]\n"
- "bne 1b\n"
-
- "fmla v16.4s, v8.4s, v0.s[0]\n"
- "fmla v17.4s, v9.4s, v0.s[0]\n"
- "fmla v16.4s, v10.4s, v0.s[1]\n"
- "fmla v17.4s, v11.4s, v0.s[1]\n"
- "fmla v18.4s, v8.4s, v1.s[0]\n"
- "fmla v19.4s, v9.4s, v1.s[0]\n"
- "fmla v18.4s, v10.4s, v1.s[1]\n"
- "fmla v19.4s, v11.4s, v1.s[1]\n"
- "ld1 {v12.4s, v13.4s}, [x1]\n"
- "fmla v20.4s, v8.4s, v2.s[0]\n"
- "add x1, x1, %[rstride]\n"
- "fmla v21.4s, v9.4s, v2.s[0]\n"
- "ld1 {v14.4s, v15.4s}, [x1]\n"
- "fmla v20.4s, v10.4s, v2.s[1]\n"
- "add x1, x1, %[rstride]\n"
- "fmla v21.4s, v11.4s, v2.s[1]\n"
- "fmla v22.4s, v8.4s, v3.s[0]\n"
- "fmla v23.4s, v9.4s, v3.s[0]\n"
- "fmla v22.4s, v10.4s, v3.s[1]\n"
- "fmla v23.4s, v11.4s, v3.s[1]\n"
-
- "ld1 {v4.4s}, [x0]\n"
- "fmla v16.4s, v12.4s, v0.s[2]\n"
- "add x0, x0, %[lstride]\n"
- "fmla v17.4s, v13.4s, v0.s[2]\n"
- "ld1 {v5.4s}, [x0]\n"
- "fmla v16.4s, v14.4s, v0.s[3]\n"
- "add x0, x0, %[lstride]\n"
- "fmla v17.4s, v15.4s, v0.s[3]\n"
- "ld1 {v6.4s}, [x0]\n"
- "fmla v18.4s, v12.4s, v1.s[2]\n"
- "add x0, x0, %[lstride]\n"
- "fmla v19.4s, v13.4s, v1.s[2]\n"
- "ld1 {v7.4s}, [x0]\n"
- "fmla v18.4s, v14.4s, v1.s[3]\n"
- "add x0, x0, %[lstride]\n"
- "fmla v19.4s, v15.4s, v1.s[3]\n"
- "fmla v20.4s, v12.4s, v2.s[2]\n"
- "fmla v21.4s, v13.4s, v2.s[2]\n"
- "fmla v20.4s, v14.4s, v2.s[3]\n"
- "fmla v21.4s, v15.4s, v2.s[3]\n"
- "fmla v22.4s, v12.4s, v3.s[2]\n"
- "fmla v23.4s, v13.4s, v3.s[2]\n"
- "fmla v22.4s, v14.4s, v3.s[3]\n"
- "fmla v23.4s, v15.4s, v3.s[3]\n"
-
- "mov x0, %[res_ptr]\n"
- "fmla v24.4s, v8.4s, v4.s[0]\n"
- "fmla v25.4s, v9.4s, v4.s[0]\n"
- "st1 {v16.4s, v17.4s}, [x0]\n"
- "add x0, x0, %[estride]\n"
- "fmla v24.4s, v10.4s, v4.s[1]\n"
- "fmla v25.4s, v11.4s, v4.s[1]\n"
- "st1 {v18.4s, v19.4s}, [x0]\n"
- "add x0, x0, %[estride]\n"
- "fmla v26.4s, v8.4s, v5.s[0]\n"
- "fmla v27.4s, v9.4s, v5.s[0]\n"
- "st1 {v20.4s, v21.4s}, [x0]\n"
- "add x0, x0, %[estride]\n"
- "fmla v26.4s, v10.4s, v5.s[1]\n"
- "fmla v27.4s, v11.4s, v5.s[1]\n"
- "st1 {v22.4s, v23.4s}, [x0]\n"
- "add x0, x0, %[estride]\n"
- "fmla v28.4s, v8.4s, v6.s[0]\n"
- "fmla v29.4s, v9.4s, v6.s[0]\n"
- "fmla v28.4s, v10.4s, v6.s[1]\n"
- "fmla v29.4s, v11.4s, v6.s[1]\n"
- "fmla v30.4s, v8.4s, v7.s[0]\n"
- "fmla v31.4s, v9.4s, v7.s[0]\n"
- "fmla v30.4s, v10.4s, v7.s[1]\n"
- "fmla v31.4s, v11.4s, v7.s[1]\n"
-
- "fmla v24.4s, v12.4s, v4.s[2]\n"
- "fmla v25.4s, v13.4s, v4.s[2]\n"
- "fmla v24.4s, v14.4s, v4.s[3]\n"
- "fmla v25.4s, v15.4s, v4.s[3]\n"
- "fmla v26.4s, v12.4s, v5.s[2]\n"
- "fmla v27.4s, v13.4s, v5.s[2]\n"
- "st1 {v24.4s, v25.4s}, [x0]\n"
- "add x0, x0, %[estride]\n"
- "fmla v26.4s, v14.4s, v5.s[3]\n"
- "fmla v27.4s, v15.4s, v5.s[3]\n"
- "fmla v28.4s, v12.4s, v6.s[2]\n"
- "fmla v29.4s, v13.4s, v6.s[2]\n"
- "st1 {v26.4s, v27.4s}, [x0]\n"
- "add x0, x0, %[estride]\n"
- "fmla v28.4s, v14.4s, v6.s[3]\n"
- "fmla v29.4s, v15.4s, v6.s[3]\n"
- "fmla v30.4s, v12.4s, v7.s[2]\n"
- "fmla v31.4s, v13.4s, v7.s[2]\n"
- "st1 {v28.4s, v29.4s}, [x0]\n"
- "add x0, x0, %[estride]\n"
- "fmla v30.4s, v14.4s, v7.s[3]\n"
- "fmla v31.4s, v15.4s, v7.s[3]\n"
- "st1 {v30.4s, v31.4s}, [x0]\n"
- :[lhs_ptr] "+r" (lhs_ptr), [rhs_ptr] "+r" (rhs_ptr), [res_ptr] "+r" (res_ptr),
- [nk] "+r" (nk)
- : [lstride] "r" (lstride), [rstride] "r" (rstride), [estride] "r" (estride), [rstep] "r"
-(rstep)
- : "x0", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18",
- "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"
- );
-}*/
-
-static void direct_conv_colmajor(convMat_t *input, convMat_t *output, convMat_t *filter,
- convParams_t *params)
-{
- const int w = input->w;
- const int h = input->h;
- const int inch = input->c;
- const int outw = output->w;
- const int outh = output->h;
- const int outch = output->c;
- const int kernel_w = params->kernel_w;
- const int kernel_h = params->kernel_h;
- const int stride_w = params->stride_w;
- const int stride_h = params->stride_h;
- const int pad_w = params->pad_w;
- const int pad_h = params->pad_h;
- const int dilation_w = params->dilation_w;
- const int dilation_h = params->dilation_h;
- const float *input_data = input->data;
- const float *filter_data = filter->data;
- float *output_data = output->data;
-
- for (int out_row = 0; out_row < outh; out_row++)
- {
- for (int out_col = 0; out_col < outw; out_col++)
- {
- const int in_col0 = (out_col * stride_w) - pad_w;
- const int in_row0 = (out_row * stride_h) - pad_h;
-
- for (int out_c = 0; out_c < outch; out_c++)
- {
- float sum = 0.f;
- for (int filter_y = 0; filter_y < kernel_h; filter_y++)
- {
- for (int filter_x = 0; filter_x < kernel_w; filter_x++)
- {
- const int in_col = in_col0 + filter_x * dilation_w;
- const int in_row = in_row0 + filter_y * dilation_h;
-
- if (((unsigned int)in_col < (unsigned int)w) &&
- ((unsigned int)in_row < (unsigned int)h))
- {
- for (int in_c = 0; in_c < inch; in_c++)
- {
- float input_value = input_data[(in_row * w + in_col) * inch + in_c];
- float filter_value =
- filter_data[((filter_y * kernel_w + filter_x) * inch + in_c) * outch + out_c];
- sum += (input_value * filter_value);
- }
- }
- }
- }
- output_data[(out_row * outw + out_col) * outch + out_c] = sum;
- }
- }
- }
-}
-
-static void direct_sgemm_colmajor(int Atrans, int Btrans, int m, int n, int k, float *A, float *B,
- float *C)
-{
- float *aa, *bb;
-
- if (Atrans)
- {
- aa = (float *)malloc(m * k * sizeof(float));
- if (!aa)
- return;
-
- for (int i = 0; i < k; i++)
- {
- for (int j = 0; j < m; j++)
- {
- aa[i * m + j] = A[j * k + i];
- }
- }
- }
- else
- {
- aa = A;
- }
-
- if (Btrans)
- {
- bb = (float *)malloc(n * k * sizeof(float));
- if (!bb)
- return;
-
- for (int i = 0; i < n; i++)
- {
- for (int j = 0; j < k; j++)
- {
- bb[i * k + j] = B[j * n + i];
- }
- }
- }
- else
- {
- bb = B;
- }
-
- for (int i = 0; i < m; i++)
- {
- for (int j = 0; j < n; j++)
- {
- float res = 0.f;
- for (int l = 0; l < k; l++)
- {
- res += bb[j * k + l] * aa[l * m + i];
- }
- C[j * m + i] = res;
- }
- }
-}
-
-#if 0
-static int test_sgemm(int m, int n, int k, int loops)
-{
- struct timeval start, end;
- float total_time = 0.f;
-
- const int mb = 180;
- const int nb = 1440;
- const int kb = 512;
-
- const int mr = 4;
- const int nr = 12;
-
-#if 0
- const int pm = (m + mr - 1) / mr * mr;
- const int pn = (n + nr - 1) / nr * nr;
- const int pk = k;
-#else
- const int pm = (mb + mr - 1) / mr * mr;
- const int pn = (nb + nr - 1) / nr * nr;
- const int pk = kb;
-#endif
- const int nm = (m + mb - 1) / mb;
- const int nn = (n + nb - 1) / nb;
- const int nk = (k + kb - 1) / kb;
-
- const int rm = m % mb;
- const int rn = n % nb;
- const int rk = k % kb;
-
- float *A = (float *)malloc(m * k * sizeof(float));
- if(!A) return 0;
-
- for(int i = 0 ; i < m * k; i++)
- {
- A[i] = 0.001 + i * 0.000001;
- }
-
- float *B = (float *)malloc(k * n * sizeof(float));
- if(!B) return 0;
-
- for(int i = 0 ; i < n * k; i++)
- {
- B[i] = 0.001 - i * 0.000001;
- }
-
- float *C = (float *)malloc(m * n * sizeof(float));
- if(!C) return 0;
-
-#if 0
- float *PA = (float *)malloc(pm * pk * sizeof(float));
- if(!PA) return 0;
-
- float *PB = (float *)malloc(pk * pn * sizeof(float));
- if(!PB) return 0;
-#else
- float PA[pm * pk];
- float PB[pk * pn];
-#endif
-
- for(int nloop = 0; nloop < loops; nloop++)
-
- {
- gettimeofday(&start, NULL);
-
- //pack_rowmajor_notrans_lhs(mr, m, k, k, A, PA);
- //pack_rowmajor_notrans_rhs(nr, n, k, n, B, PB);
-#if 1
- for (int j = 0; j < nn; j++)
- {
- const int _nb = (j != nn - 1 || rn == 0) ? nb : rn;
- for (int l = 0; l < nk; l++)
- {
- const int _kb = (l != nk - 1 || rk == 0) ? kb : rk;
- pack_rowmajor_notrans_rhs(nr, _nb, _kb, 1, n, &B[l * kb * n + j * nb], PB);
- for(int i = 0; i < nm; i++)
- {
- const int _mb = (i != nm - 1 || rm == 0) ? mb : rm;
- pack_rowmajor_notrans_lhs(mr, _mb, _kb, 1, k, &A[i * mb * k + l * kb], PA);
- sgemm_rowmajor_macro_kernel_divnm(mr, nr, _mb, _nb, _kb, PA, PB, &C[i * mb * n + j * nb], l, n, _kb);
- //sgemm_rowmajor_macro_kernel_divnm(mr, nr, _mb, _nb, _kb, &PA[i * mb * k + l * kb], &PB[l * kb * pn + j * nb], &C[i * mb * n + j * nb], l, n, pk);
- }
- }
- }
-#else
- for (int j = 0; j < nm; j++)
- {
- const int _mb = (j != nm - 1 || rm == 0) ? mb : rm;
- for (int l = 0; l < nk; l++)
- {
- const int _kb = (l != nk - 1 || rk == 0) ? kb : rk;
- pack_rowmajor_notrans_lhs(mr, _mb, _kb, 1, k, &A[j * mb * k + l * kb], PA);
- for(int i = 0; i < nn; i++)
- {
- const int _nb = (i != nn - 1 || rn == 0) ? nb : rn;
- pack_rowmajor_notrans_rhs(nr, _nb, _kb, 1, n, &B[l * kb * n + i * nb], PB);
- sgemm_rowmajor_macro_kernel_divmn(mr, nr, _mb, _nb, _kb, PA, PB, &C[j * mb * n + i * nb], l, n, _kb);
- //sgemm_rowmajor_macro_kernel_divmn(mr, nr, _mb, _nb, _kb, &PA[i * mb * k + l * kb], &PB[l * kb * pn + j * nb], &C[i * mb * n + j * nb], l, n, pk);
- }
- }
- }
-#endif
- gettimeofday(&end, NULL);
- total_time += ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec))/1000;
- }
-
- int div = m * n < 16 ? m * n : 16;
- int num = m * n > 64 ? 64 : m * n;
-
- float *c_ptr = &C[0];
- for(int i = 0; i < num; i++)
- {
- printf("%f ", c_ptr[i]);
- if((i + 1) % div == 0) printf("\n");
- }
-
- printf("\n");
-
- c_ptr = &C[m * n - num];
- for(int i = 0; i < num; i++)
- {
- printf("%f ", c_ptr[i]);
- if((i + 1) % div == 0) printf("\n");
- }
-
- printf("\n");
-
- long long total_size = (long long)m *n * k * 2;
- printf("AVER Time consuming: %.2fms, total size: %lld, (GFLOP: %.2f)\n", total_time / loops , total_size, (double)total_size/(total_time / loops)/1000000);
-
- free(A);
- free(B);
- free(C);
-
- //free(PA);
- //free(PB);
-
-}
-#endif
-
-static int test_sgemm(int m, int n, int k, int type, int loops)
-{
- struct timeval start, end;
- float total_time = 0.f;
-
- // printf("1.\n");
-
- float *A = (float *)malloc(m * k * sizeof(float));
- if (!A)
- return 0;
-
- for (int i = 0; i < m * k; i++)
- {
- A[i] = 0.001 + i * 0.001; // i * 0.000001;
- }
-
- float *B = (float *)malloc(k * n * sizeof(float));
- if (!B)
- return 0;
-
- for (int i = 0; i < n * k; i++)
- {
- B[i] = 0.001 - i * 0.001; // - i * 0.000001;
- }
-
- float *C = (float *)malloc(m * n * sizeof(float));
- if (!C)
- return 0;
-
- for (int nloop = 0; nloop < loops; nloop++)
-
- {
- gettimeofday(&start, NULL);
-
- if (type == 0)
- {
- // direct_sgemm_rowmajor(notrans, notrans, m, n, k, A, B, C);
- direct_sgemm_colmajor(notrans, notrans, m, n, k, A, B, C);
- }
-
- else if (type == 1)
- {
- class sgemm_singlethread my_gemm(colMajor, notrans, notrans, m, n, k, A, B, C, 1);
- my_gemm.run();
- }
-
- /*else if(type == 2)
- {
- for(int i = 0; i < m / 8; i++)
- {
- for(int j = 0; j < n / 8; j++)
- {
- direct_sgemm_kernel(k, k, n, n, A + i * 8 * k, B + j * 8, C + i * 8 * n + j * 8);
- }
- }
- }*/
-
- gettimeofday(&end, NULL);
- total_time +=
- ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000;
- }
-
- int div = m * n < 16 ? m * n : 16;
- int num = m * n > 64 ? 64 : m * n;
-
- float *c_ptr = &C[0];
- for (int i = 0; i < num; i++)
- {
- printf("%f ", c_ptr[i]);
- if ((i + 1) % div == 0)
- printf("\n");
- }
-
- printf("\n");
-
- c_ptr = &C[m * n - num];
- for (int i = 0; i < num; i++)
- {
- printf("%f ", c_ptr[i]);
- if ((i + 1) % div == 0)
- printf("\n");
- }
-
- printf("\n");
-
- long long total_size = (long long)m * n * k * 2;
- printf("AVER Time consuming: %.2fms, total size: %lld, (GFLOP: %.2f)\n", total_time / loops,
- total_size, (double)total_size / (total_time / loops) / 1000000);
-
- free(A);
- free(B);
- free(C);
-
- return 0;
-}
-
-void weight_tensorflow2caffe(float *out, float *in, int H, int W, int C, int N)
-{ // HWCN ---> NCHW
- for (int h = 0; h < H; ++h)
- {
- for (int w = 0; w < W; ++w)
- {
- for (int c = 0; c < C; ++c)
- {
- for (int n = 0; n < N; ++n)
- {
- int index_in = h * W * C * N + w * C * N + c * N + n;
- int index_out = n * C * H * W + c * H * W + h * W + w;
- // printf("%3d <--- %3d\n", index_out, index_in);
- out[index_out] = in[index_in];
- }
- }
- }
- }
-}
-
-void trans_weight2winograd(const convMat_t &_kernel, float **winograd_weight)
-{
- const double *G;
- const int kernel_size = _kernel.h;
- const int channels = _kernel.c;
- const int num_output = _kernel.n;
-
- int tile_h_in_, tile_w_in_;
- int M, N;
-
- /*Step 1: transfer weight to winograd domain*/
- if (kernel_size == 3)
- {
- M = winograd_para_3x3s1::M;
- N = winograd_para_3x3s1::N;
- G = winograd_para_3x3s1::getG();
- }
- else
- {
- M = winograd_para_5x5s1::M;
- N = winograd_para_5x5s1::N;
- G = winograd_para_5x5s1::getG();
- }
-
- tile_h_in_ = tile_w_in_ = M;
-
- float *winograd_g = new float[M * M * N * N];
- if (NULL == winograd_g)
- return;
- kronecker_product(winograd_g, G, G, M, N, M, N);
-
- *winograd_weight = new float[tile_h_in_ * tile_w_in_ * channels * num_output];
-
- if (NULL == *winograd_weight)
- return;
-
- float *weight_data_tran = new float[_kernel.h * _kernel.w * _kernel.c * _kernel.n];
- if (NULL == weight_data_tran)
- return;
- weight_tensorflow2caffe(weight_data_tran, _kernel.data, kernel_size, kernel_size, channels,
- num_output);
-
- class sgemm_singlethread sgemm(rowMajor, notrans, trans, tile_h_in_ * tile_w_in_,
- channels * num_output, kernel_size * kernel_size, winograd_g,
- weight_data_tran, *winograd_weight, 1);
-
- sgemm.run();
-
- delete[] weight_data_tran;
-
- /*With winograd, original weight data is useless.*/
- delete[] winograd_g;
-}
-
-static int test_conv(const int w, const int h, const int kernel_size, const int stride,
- const int inch, const int outch, const int padding, const int conv_type,
- const int thread_num, const int loops)
-{
- struct timeval start, end;
- float total_time = 0.f;
-
- struct timeval start1, end1;
- float total_time1 = 0.f;
-
- const int dilation = 1;
-
- const int kernel_dilation = dilation * (kernel_size - 1) + 1;
-
- convMat_t input;
- convMat_t output;
- convMat_t filter;
- convParams_t params;
-
- int pad_l, pad_r, pad_t, pad_b;
- if (padding)
- {
- int pad_w = kernel_dilation + (w - 1) / stride * stride - w;
- int pad_h = kernel_dilation + (h - 1) / stride * stride - h;
- pad_l = pad_w / 2;
- pad_r = pad_w - pad_l;
- pad_t = pad_h / 2;
- pad_b = pad_h - pad_t;
- }
- else
- {
- pad_l = pad_r = pad_t = pad_b = 0;
- }
-
- input.w = w;
- input.h = h;
- input.c = inch;
- input.n = 1;
-#ifdef NCNN
- input.data =
- (float *)malloc(alignSize(input.w * input.h, 16 / sizeof(float)) * input.c * sizeof(float));
-#else
- input.data = (float *)malloc(input.w * input.h * input.c * sizeof(float));
-#endif
-
- if (!input.data)
- return 0;
-
- output.w = (w + pad_l + pad_r - kernel_dilation) / stride + 1;
- output.h = (h + pad_t + pad_b - kernel_dilation) / stride + 1;
- output.c = outch;
- output.n = 1;
-#ifdef NCNN
- output.data = (float *)malloc(alignSize(output.w * output.h, 16 / sizeof(float)) * output.c *
- sizeof(float));
-#else
- output.data = (float *)malloc(output.w * output.h * output.c * sizeof(float));
-#endif
-
- if (!output.data)
- return 0;
-
- for (int i = 0; i < output.w * output.h * output.c; i++)
- {
- output.data[i] = 0;
- }
-
- filter.w = kernel_size;
- filter.h = kernel_size;
- filter.c = inch;
- filter.n = outch;
- filter.data = (float *)malloc(filter.w * filter.h * filter.c * filter.n * sizeof(float));
- if (!filter.data)
- return 0;
-
- for (int i = 0; i < input.w * input.h * input.c; i++)
- {
- input.data[i] = 0.001 + i * 0.000001;
- }
-
-#if 1
- for (int i = 0; i < filter.w * filter.h * filter.c * filter.n; i++)
- {
- filter.data[i] = 0.001 - i * 0.000001;
- }
-#else
- for (int i = 0; i < filter.w * filter.h * filter.c * filter.n; i++)
- {
- if ((i + 1) % 15 == 0)
- filter.data[i] = 0.001 - i * 0.000001;
- else
- filter.data[i] = 0;
- }
-#endif
- params.kernel_w = kernel_size;
- params.kernel_h = kernel_size;
- params.stride_w = stride;
- params.stride_h = stride;
- params.padding = padding;
- params.pad_w = pad_l;
- params.pad_h = pad_t;
- params.dilation_w = dilation;
- params.dilation_h = dilation;
-
- const int m = output.c;
- const int n = output.w * output.h;
- const int k = params.kernel_h * params.kernel_w * input.c;
-
- // ocl_context_t context;
- size_t local_min[2];
- /**
- if(conv_type == 14 || conv_type == 15 || conv_type == 6)
- {
- if(init_gpu(&context) < 0) return -1;
- //if(conv_type ==14 || conv_type == 5) sgemm_ocltune(&context, m, n, (k < 1024 ? k :
- 1024), local_min);
- //else if(conv_type == 6)
- {
- if(kernel_size == 3) directconv_3x3S1_tune(&context, &input, &filter, &output,
- local_min);
- else if(kernel_size == 1) directconv_1x1S1_tune(&context, &input, &filter, &output,
- local_min);
- }
- //local_min[0] = 1; local_min[1] = 1;
- }
- **/
- if (conv_type == 0)
- {
- for (int nloop = 0; nloop < loops; nloop++)
- {
- gettimeofday(&start, NULL);
-
- direct_conv_rowmajor(&input, &output, &filter, &params);
- // direct_conv_colmajor(&input, &output, &filter, &params);
-
- gettimeofday(&end, NULL);
- total_time +=
- ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000;
- }
- }
- else if (conv_type == 1)
- {
- for (int nloop = 0; nloop < loops; nloop++)
- {
- // printf("nloop = %d, thread_num = %d\n", nloop, thread_num);
- // class srcn_sgemm my_gemm(input, filter, output, params, thread_num, col_major);
- gettimeofday(&start, NULL);
-
- /*if(thread_num == 1)
- {
- class conv_sgemm_singlethread my_gemm(input, filter, output, params, col_major);
- my_gemm.run();
- }
- else
- {
- class conv_sgemm_multithreads my_gemm(input, filter, output, params, thread_num,
- col_major);
- my_gemm.run();
- }*/
-
- srcn_convolution2D(input, filter, output, params, NULL, thread_num, row_major);
-
- // printf("sync\n");
-
- gettimeofday(&end, NULL);
- total_time +=
- ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000;
- }
- }
- else if (conv_type == 2)
- {
- float *winograd_weight;
-
- // trans_weight2winograd(filter, &winograd_weight);
-
- winogradParams_t wparams = {params.kernel_w,
- params.kernel_h,
- params.stride_w,
- params.stride_h,
- params.dilation_w,
- params.dilation_h,
- 1,
- w,
- h,
- input.c,
- output.c,
- thread_num,
- col_major,
- filter.data};
- winograd_weight = trans_weight2winograd(wparams);
-
- for (int nloop = 0; nloop < loops; nloop++)
- {
- gettimeofday(&start, NULL);
-
- // class conv_winograd my_sgemm(input, output, params, col_major, winograd_weight, thread_num,
- // w * h, n);
- // my_sgemm.run();
-
- srcn_convolution2D(input, filter, output, params, winograd_weight, thread_num, row_major);
-
- gettimeofday(&end, NULL);
- total_time +=
- ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000;
- }
- }
- else if (conv_type == 3)
- {
- void *sparse_weight = trans_weight2sparse(filter);
-
- for (int nloop = 0; nloop < loops; nloop++)
- {
- gettimeofday(&start, NULL);
-
- srcn_sparse_convolution2D(input, output, params, sparse_weight, thread_num, row_major);
-
- gettimeofday(&end, NULL);
- total_time +=
- ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000;
- }
-
- sparse_release(outch, sparse_weight);
- } /**
-else if(conv_type == 4)
-{
-#if 0
- cl_int err;
- convlib::load_opencl("./libmali.so");
- const int mpad = (m + 4 - 1) / 4 * 4;
- const int npad = (n + 4 - 1) / 4 * 4;
- cl_mem lhs_gpu = convlib::clCreateBuffer(context.context, CL_MEM_READ_WRITE |
-CL_MEM_ALLOC_HOST_PTR, mpad * k * sizeof(float), NULL, &err);
- if(err != CL_SUCCESS)
- {
- printf("err = %d@%s:%d\n", err, __FUNCTION__, __LINE__);
- return -1;
- }
-
- cl_image_format rhs_format = {CL_RGBA, CL_FLOAT};
- cl_image_desc desc =
- {
- CL_MEM_OBJECT_IMAGE2D,
- (size_t)npad / 4,
- (size_t)k,
- 0, 0,
- 0,
- 0, 0, 0, 0
- };
- cl_mem rhs_gpu = convlib::clCreateImage(context.context, CL_MEM_READ_ONLY |
-CL_MEM_ALLOC_HOST_PTR, &rhs_format, &desc, NULL, &err);
- if(err != CL_SUCCESS)
- {
- printf("err = %d@%s:%d\n", err, __FUNCTION__, __LINE__);
- return -1;
- }
-
- cl_mem rhs_gpu = convlib::clCreateBuffer(context.context, CL_MEM_READ_WRITE |
-CL_MEM_ALLOC_HOST_PTR, npad * k * sizeof(float), NULL, &err);
- if(err != CL_SUCCESS)
- {
- printf("err = %d@%s:%d\n", err, __FUNCTION__, __LINE__);
- return -1;;
- }
-
- cl_mem res_gpu = convlib::clCreateBuffer(context.context, CL_MEM_READ_WRITE |
-CL_MEM_ALLOC_HOST_PTR, mpad * npad * sizeof(float), NULL, &err);
- if(err != CL_SUCCESS)
- {
- printf("err = %d@%s:%d\n", err, __FUNCTION__, __LINE__);
- return -1;
- }
-#endif
- for(int nloop = 0; nloop < loops + 1; nloop++)
- {
- gettimeofday(&start, NULL);
-
- //cl_mem _res_gpu = conv2D_gpu_sgemm(&context, &input, &filter, &output, &params, local_min,
-lhs_gpu, rhs_gpu, res_gpu);
-
- //get_result_gpu(&context, output.data + gpu_data_off, _res_gpu, m, n);
- srcn_convolution2D_gpu(input, filter, output, params, row_major);
-
- gettimeofday(&end, NULL);
-
- if(nloop > 0) total_time += ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000
-+ start.tv_usec))/1000;
- }
-}
-else if(conv_type == 5)
-{
-
- for(int nloop = 0; nloop < loops + 1; nloop++)
- {
- gettimeofday(&start, NULL);
-
- //cl_mem res_gpu = conv2D_gpu_sgemm(&context, &input, &filter, &output, &params, local_min);
-
- //clFlush(context.cmdQueue);
- gettimeofday(&start1, NULL);
- #if 1
- srcn_convolution2D(input, filter, output, params, NULL, thread_num, row_major
-
- #endif
- //usleep(80 * 1000);
- gettimeofday(&end1, NULL);
- total_time1 += ((end1.tv_sec * 1000000 + end1.tv_usec) - (start1.tv_sec * 1000000 +
-start1.tv_usec))/1000;
-
- //get_result_gpu(&context, output.data + gpu_data_off, res_gpu, m, n);
-
- srcn_convolution2D_dpu(input, filter, output, params, row_major);
-
- gettimeofday(&end, NULL);
- if(nloop > 0) total_time += ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000
-+ start.tv_usec))/1000;
- }
-}
-else if(conv_type == 6)
-{
- for(int nloop = 0; nloop < loops; nloop++)
- {
- gettimeofday(&start, NULL);
-
- if(kernel_size == 3 && stride == 1 && padding == 0)
- {
- conv2D_gpu_directconv_3x3S1(&context, &input, &filter, &output, &params, local_min);
- }
- else if(kernel_size == 1 && stride == 1 && padding == 0)
- {
- conv2D_gpu_directconv_1x1S1(&context, &input, &filter, &output, &params, local_min);
- }
-
- gettimeofday(&end, NULL);
- total_time += ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 +
-start.tv_usec))/1000;
- }
-}**/
-
- int div = m * n < 16 ? m * n : 16;
- int num = m * n > 64 ? 64 : m * n;
-
- if (conv_type < 4)
- printf("[CPU RESULT]\n");
- else if (conv_type == 4)
- printf("[GPU RESULT]\n");
- else if (conv_type == 5)
- printf("[DPU RESULT]\n");
- float *c_ptr = output.data;
- for (int i = 0; i < num; i++)
- {
- printf("%f ", c_ptr[i]);
- if ((i + 1) % div == 0)
- printf("\n");
- }
-
- printf("\n");
-
- c_ptr = &output.data[m * n - num];
- for (int i = 0; i < num; i++)
- {
- printf("%f ", c_ptr[i]);
- if ((i + 1) % div == 0)
- printf("\n");
- }
-
- printf("\n");
-
- long long total_size = (long long)m * n * k * 2;
- printf(
- "AVER Time consuming: %.2fms, CPU Time consuming: %.2fms, total size: %lld, (GFLOP: %.2f)\n",
- total_time / loops, total_time1 / loops, total_size,
- (double)total_size / (total_time / loops) / 1000000);
-
- free(input.data);
- free(output.data);
- free(filter.data);
-
- return 0;
-}
-
-static int test_deconv(const int w, const int h, const int kernel_size, const int stride,
- const int inch, const int outch, const int padding, const int conv_type,
- const int thread_num, const int loops)
-{
- struct timeval start, end;
- float total_time = 0.f;
-
- const int dilation = 1;
-
- const int kernel_dilation = dilation * (kernel_size - 1) + 1;
-
- convMat_t input;
- convMat_t output;
- convMat_t filter;
- convParams_t params;
-
- int pad_l, pad_r, pad_t, pad_b;
- if (padding)
- {
- int pad_w = kernel_dilation - 1;
- int pad_h = kernel_dilation - 1;
- pad_l = pad_w / 2;
- pad_r = pad_w - pad_l;
- pad_t = pad_h / 2;
- pad_b = pad_h - pad_t;
- }
- else
- {
- pad_l = pad_r = pad_t = pad_b = 0;
- }
-
- input.w = w;
- input.h = h;
- input.c = inch;
- input.data = (float *)malloc(input.w * input.h * input.c * sizeof(float));
- if (!input.data)
- return 0;
-
- // output.w = (w + pad_l + pad_r - kernel_dilation) / stride + 1;
- // output.h = (h + pad_t + pad_b - kernel_dilation) / stride + 1;
- output.w = stride * (w - 1) + kernel_dilation - (pad_l + pad_r);
- output.h = stride * (h - 1) + kernel_dilation - (pad_t + pad_b);
- output.c = outch;
- output.data = (float *)malloc(output.w * output.h * output.c * sizeof(float));
- if (!output.data)
- return 0;
-
- filter.w = kernel_size;
- filter.h = kernel_size;
- filter.c = outch;
- filter.n = inch;
- filter.data = (float *)malloc(filter.w * filter.h * filter.c * filter.n * sizeof(float));
- if (!filter.data)
- return 0;
-
- for (int i = 0; i < input.w * input.h * input.c; i++)
- {
- input.data[i] = 0.001 + i * 0.000001;
- }
-
- for (int i = 0; i < filter.w * filter.h * filter.c * filter.n; i++)
- {
- filter.data[i] = 0.001 - i * 0.000001;
- }
-
- params.kernel_w = kernel_size;
- params.kernel_h = kernel_size;
- params.stride_w = stride;
- params.stride_h = stride;
- params.padding = padding;
- params.pad_w = pad_l;
- params.pad_h = pad_t;
- params.dilation_w = dilation;
- params.dilation_h = dilation;
-
- const int m = params.kernel_h * params.kernel_w * output.c;
- const int n = input.w * input.h;
- const int k = input.c;
-
- if (conv_type == 0)
- {
- for (int nloop = 0; nloop < loops; nloop++)
-
- {
- gettimeofday(&start, NULL);
-
- direct_deconv_rowmajor(&input, &output, &filter, &params);
-
- gettimeofday(&end, NULL);
- total_time +=
- ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000;
- }
- }
- else if (conv_type == 1)
- {
- for (int nloop = 0; nloop < loops; nloop++)
-
- {
- gettimeofday(&start, NULL);
-
- for (int i = 0; i < output.w * output.h * output.c; i++)
- {
- output.data[i] = 0;
- }
-
- srcn_deconvolution2D(input, filter, output, params, thread_num, row_major);
-
- gettimeofday(&end, NULL);
- total_time +=
- ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000;
- }
- }
-
- const int output_size = output.w * output.h * output.c;
-
- int div = output_size < 16 ? output_size : 16;
- int num = output_size > 64 ? 64 : output_size;
-
- float *c_ptr = output.data;
- for (int i = 0; i < num; i++)
- {
- printf("%f ", c_ptr[i]);
- if ((i + 1) % div == 0)
- printf("\n");
- }
-
- printf("\n");
-
- c_ptr = &output.data[output_size - num];
- for (int i = 0; i < num; i++)
- {
- printf("%f ", c_ptr[i]);
- if ((i + 1) % div == 0)
- printf("\n");
- }
-
- printf("\n");
-
- long long total_size = (long long)m * n * k * 2;
- printf("AVER Time consuming: %.2fms, total size: %lld, (GFLOP: %.2f)\n", total_time / loops,
- total_size, (double)total_size / (total_time / loops) / 1000000);
-
- free(input.data);
- free(output.data);
- free(filter.data);
-
- return 0;
-}
-
-static int test_batch_conv(const int batch, const int w, const int h, const int kernel_size,
- const int stride, const int inch, const int outch, const int padding,
- const int conv_type, const int thread_num, const int loops)
-{
- struct timeval start, end;
- float total_time = 0.f;
-
- const int dilation = 1;
-
- const int kernel_dilation = dilation * (kernel_size - 1) + 1;
-
- convMat_t input;
- convMat_t output;
- convMat_t filter;
- convParams_t params;
-
- int pad_l, pad_r, pad_t, pad_b;
- if (padding)
- {
- int pad_w = kernel_dilation + (w - 1) / stride * stride - w;
- int pad_h = kernel_dilation + (h - 1) / stride * stride - h;
- pad_l = pad_w / 2;
- pad_r = pad_w - pad_l;
- pad_t = pad_h / 2;
- pad_b = pad_h - pad_t;
- }
- else
- {
- pad_l = pad_r = pad_t = pad_b = 0;
- }
-
- input.w = w;
- input.h = h;
- input.c = inch;
- input.n = batch;
- input.data = (float *)malloc(input.n * input.w * input.h * input.c * sizeof(float));
- if (!input.data)
- return 0;
-
- output.w = (w + pad_l + pad_r - kernel_dilation) / stride + 1;
- output.h = (h + pad_t + pad_b - kernel_dilation) / stride + 1;
- output.c = outch;
- output.n = batch;
- output.data = (float *)malloc(output.n * output.w * output.h * output.c * sizeof(float));
- if (!output.data)
- return 0;
-
- filter.w = kernel_size;
- filter.h = kernel_size;
- filter.c = inch;
- filter.n = outch;
- filter.data = (float *)malloc(filter.w * filter.h * filter.c * filter.n * sizeof(float));
- if (!filter.data)
- return 0;
-
- for (int i = 0; i < input.w * input.h * input.c * input.n; i++)
- {
- input.data[i] = 0.001 + i * 0.000001;
- }
-
- for (int i = 0; i < filter.w * filter.h * filter.c * filter.n; i++)
- {
- filter.data[i] = 0.001 - i * 0.000001;
- }
-
- params.kernel_w = kernel_size;
- params.kernel_h = kernel_size;
- params.stride_w = stride;
- params.stride_h = stride;
- params.padding = padding;
- params.pad_w = pad_l;
- params.pad_h = pad_t;
- params.dilation_w = dilation;
- params.dilation_h = dilation;
-
- const int m = output.c;
- const int n = output.w * output.h;
- const int k = params.kernel_h * params.kernel_w * input.c;
-
- if (conv_type == 1)
- {
- for (int nloop = 0; nloop < loops; nloop++)
-
- {
- // printf("nloop = %d, thread_num = %d\n", nloop, thread_num);
- // class srcn_sgemm my_gemm(input, filter, output, params, thread_num, col_major);
-
- gettimeofday(&start, NULL);
-
- srcn_batch_convolution2D(input, filter, output, params, NULL, thread_num, col_major);
-
- gettimeofday(&end, NULL);
- total_time +=
- ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000;
- }
- }
- else if (conv_type == 2)
- {
- float *winograd_weight;
-
- // trans_weight2winograd(filter, &winograd_weight);
-
- winogradParams_t wparams = {params.kernel_w,
- params.kernel_h,
- params.stride_w,
- params.stride_h,
- params.dilation_w,
- params.dilation_h,
- input.n,
- w,
- h,
- input.c,
- output.c,
- thread_num,
- col_major,
- filter.data};
- winograd_weight = trans_weight2winograd(wparams);
-
- for (int nloop = 0; nloop < loops; nloop++)
-
- {
- gettimeofday(&start, NULL);
-
- srcn_batch_convolution2D(input, filter, output, params, winograd_weight, thread_num,
- col_major);
-
- gettimeofday(&end, NULL);
- total_time +=
- ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000;
- }
- }
-
- int div = m * n < 16 ? m * n : 16;
- int num = m * n > 64 ? 64 : m * n;
-
- float *c_ptr = output.data;
- for (int i = 0; i < num; i++)
- {
- printf("%f ", c_ptr[i]);
- if ((i + 1) % div == 0)
- printf("\n");
- }
-
- printf("\n");
-
- c_ptr = &output.data[m * n * batch - num];
- for (int i = 0; i < num; i++)
- {
- printf("%f ", c_ptr[i]);
- if ((i + 1) % div == 0)
- printf("\n");
- }
-
- printf("\n");
-
- long long total_size = (long long)batch * m * n * k * 2;
- printf("AVER Time consuming: %.2fms, total size: %lld, (GFLOP: %.2f)\n", total_time / loops,
- total_size, (double)total_size / (total_time / loops) / 1000000);
-
- free(input.data);
- free(output.data);
- free(filter.data);
-
- return 0;
-}
-
-static int test_depthwise_conv(const int w, const int h, const int kernel_size, const int stride,
- const int inch, const int outch, const int padding,
- const int conv_type, const int thread_num, const int loops)
-{
- if (outch != inch)
- return -1;
- struct timeval start, end;
- float total_time = 0.f;
-
- const int dilation = 1;
-
- const int kernel_dilation = dilation * (kernel_size - 1) + 1;
-
- convMat_t input;
- convMat_t output;
- convMat_t filter;
- convMat_t bias;
- convParams_t params;
-
- int pad_l, pad_r, pad_t, pad_b;
- if (padding)
- {
- int pad_w = kernel_dilation + (w - 1) / stride * stride - w;
- int pad_h = kernel_dilation + (h - 1) / stride * stride - h;
- pad_l = pad_w / 2;
- pad_r = pad_w - pad_l;
- pad_t = pad_h / 2;
- pad_b = pad_h - pad_t;
- }
- else
- {
- pad_l = pad_r = pad_t = pad_b = 0;
- }
-
- input.w = w;
- input.h = h;
- input.c = inch;
- input.n = 1;
-#ifdef NCNN
- input.data =
- (float *)malloc(alignSize(input.w * input.h, 16 / sizeof(float)) * input.c * sizeof(float));
-#else
- input.data = (float *)malloc(input.w * input.h * input.c * sizeof(float));
-#endif
- if (!input.data)
- return 0;
-
- output.w = (w + pad_l + pad_r - kernel_dilation) / stride + 1;
- output.h = (h + pad_t + pad_b - kernel_dilation) / stride + 1;
- output.c = outch;
- output.n = 1;
-
-#ifdef NCNN
- output.data = (float *)malloc(alignSize(output.w * output.h, 16 / sizeof(float)) * output.c *
- sizeof(float));
-#else
- output.data = (float *)malloc(output.w * output.h * output.c * sizeof(float));
-#endif
- const int gpu_data_off = output.w * output.h * output.c;
- if (!output.data)
- return 0;
-
- for (int i = 0; i < output.w * output.h * output.c; i++)
- {
- output.data[i] = 1.f;
- }
-
- filter.w = kernel_size;
- filter.h = kernel_size;
- filter.c = 1;
- filter.n = outch;
- filter.data = (float *)malloc(filter.w * filter.h * filter.c * filter.n * sizeof(float));
- if (!filter.data)
- return 0;
-
- for (int i = 0; i < input.w * input.h * input.c; i++)
- {
- input.data[i] = 0.001 + i * 0.000001;
- }
-
- for (int i = 0; i < filter.w * filter.h * filter.c * filter.n; i++)
- {
- filter.data[i] = 0.001 - i * 0.000001;
- }
-
- bias.w = outch;
- bias.data = (float *)malloc(bias.w * sizeof(float));
- if (!bias.data)
- return 0;
- for (int i = 0; i < bias.w; i++)
- {
- bias.data[i] = 0.f;
- }
-
- params.kernel_w = kernel_size;
- params.kernel_h = kernel_size;
- params.stride_w = stride;
- params.stride_h = stride;
- params.padding = padding;
- params.pad_w = pad_l;
- params.pad_h = pad_t;
- params.dilation_w = dilation;
- params.dilation_h = dilation;
-
- const int m = output.c;
- const int n = output.w * output.h;
- const int k = params.kernel_h * params.kernel_w * input.c;
-
- // ocl_context_t context;
- size_t local_min[2] = {4, 4};
- /**
- if(conv_type == 1)
- {
- if(init_gpu(&context) < 0) return -1;
- depthwise_conv_3x3S1_tune(&context, &input, &filter, &output, local_min);
- }**/
-
- gettimeofday(&start, NULL);
- if (conv_type == 0)
- srcn_depthwise_conv(input, filter, output, bias, params, 4,
- row_major); // convdw3x3s1_neon(input, output, filter, filter);
- // else if(conv_type == 1) depthwise_conv_gpu3x3S1(&context, &input, &filter, &output, &params,
- // local_min);
- else if (conv_type == 2)
- {
- for (int i = 0; i < input.c; i++)
- {
- convMat_t _input;
- convMat_t _output;
- convMat_t _filter;
- convParams_t _params = params;
-
- _input.w = input.w;
- _input.h = input.h;
- _input.c = 1;
- _input.n = 1;
-#ifdef NCNN
- _input.data = input.data + i * alignSize(input.w * input.h, 16 / sizeof(float));
-#else
- _input.data = input.data + i * input.w * input.h;
-#endif
-
- _output.w = output.w;
- _output.h = output.h;
- _output.c = 1;
- _output.n = 1;
-#ifdef NCNN
- _output.data = output.data + i * alignSize(output.w * output.h, 16 / sizeof(float));
-#else
- _output.data = output.data + i * output.w * output.h;
-#endif
- _filter.w = filter.w;
- _filter.h = filter.h;
- _filter.c = 1; // filter.c;
- _filter.n = 1; // filter.n;
- _filter.data = filter.data + i * 9;
-
- srcn_convolution2D(_input, _filter, _output, _params, NULL, 1, row_major);
- // direct_conv_rowmajor(&_input, &_output, &_filter, &_params);
- }
- }
-
- gettimeofday(&end, NULL);
- total_time +=
- ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000;
-
- int div = m * n < 16 ? m * n : 16;
- int num = m * n > 64 ? 64 : m * n;
-
- if (conv_type == 0)
- printf("[CPU RESULT]\n");
- else if (conv_type == 1)
- printf("[GPU RESULT]\n");
- float *c_ptr = output.data;
- for (int i = 0; i < num; i++)
- {
- printf("%f ", c_ptr[i]);
- if ((i + 1) % div == 0)
- printf("\n");
- }
-
- printf("\n");
-
- c_ptr = &output.data[m * n - num];
- for (int i = 0; i < num; i++)
- {
- printf("%f ", c_ptr[i]);
- if ((i + 1) % div == 0)
- printf("\n");
- }
-
- printf("\n");
-
- long long total_size = (long long)m * n * k * 2;
- printf("AVER Time consuming: %.2fms, total size: %lld, (GFLOP: %.2f)\n", total_time / loops,
- total_size, (double)total_size / (total_time / loops) / 1000000);
-
- free(input.data);
- free(output.data);
- free(filter.data);
- free(bias.data);
-
- return 0;
-}
-
-//#define TEST_SGEMM
-#define TEST_CONV
-//#define TEST_DECONV
-//#define TEST_BATCH_CONV
-//#define TEST_DEPTHWISE_CONV
-
-int main(int argc, char **argv)
-{
-#ifdef TEST_SGEMM
- if (argc < 6)
- return 0;
-
- const int m = atoi(argv[1]);
- const int n = atoi(argv[2]);
- const int k = atoi(argv[3]);
- const int type = atoi(argv[4]);
- const int loops = atoi(argv[5]);
-
- test_sgemm(m, n, k, type, loops);
-#elif (defined TEST_CONV)
- if (argc < 10)
- return 0;
- const int w = atoi(argv[1]);
- const int h = atoi(argv[2]);
- const int kernel_size = atoi(argv[3]);
- const int stride = atoi(argv[4]);
- const int outch = atoi(argv[5]);
- const int inch = atoi(argv[6]);
- const int padding = atoi(argv[7]);
- const int conv_type = atoi(argv[8]);
- const int thread_num = atoi(argv[9]);
- int loops = 1;
- if (argc > 10)
- loops = atoi(argv[10]);
- test_conv(w, h, kernel_size, stride, inch, outch, padding, conv_type, thread_num, loops);
-#elif (defined TEST_DECONV)
- if (argc < 10)
- return 0;
- const int w = atoi(argv[1]);
- const int h = atoi(argv[2]);
- const int kernel_size = atoi(argv[3]);
- const int stride = atoi(argv[4]);
- const int outch = atoi(argv[5]);
- const int inch = atoi(argv[6]);
- const int padding = atoi(argv[7]);
- const int conv_type = atoi(argv[8]);
- const int thread_num = atoi(argv[9]);
- int loops = 1;
- if (argc > 10)
- loops = atoi(argv[10]);
- test_deconv(w, h, kernel_size, stride, inch, outch, padding, conv_type, thread_num, loops);
-#elif (defined TEST_BATCH_CONV)
- if (argc < 11)
- return 0;
- const int batch = atoi(argv[1]);
- const int w = atoi(argv[2]);
- const int h = atoi(argv[3]);
- const int kernel_size = atoi(argv[4]);
- const int stride = atoi(argv[5]);
- const int outch = atoi(argv[6]);
- const int inch = atoi(argv[7]);
- const int padding = atoi(argv[8]);
- const int conv_type = atoi(argv[9]);
- const int thread_num = atoi(argv[10]);
- int loops = 1;
- if (argc > 11)
- loops = atoi(argv[11]);
- test_batch_conv(batch, w, h, kernel_size, stride, inch, outch, padding, conv_type, thread_num,
- loops);
-#elif (defined TEST_DEPTHWISE_CONV)
- if (argc < 10)
- return 0;
- const int w = atoi(argv[1]);
- const int h = atoi(argv[2]);
- const int kernel_size = atoi(argv[3]);
- const int stride = atoi(argv[4]);
- const int outch = atoi(argv[5]);
- const int inch = atoi(argv[6]);
- const int padding = atoi(argv[7]);
- const int conv_type = atoi(argv[8]);
- const int thread_num = atoi(argv[9]);
- int loops = 1;
- if (argc > 10)
- loops = atoi(argv[10]);
- test_depthwise_conv(w, h, kernel_size, stride, inch, outch, padding, conv_type, thread_num,
- loops);
-#endif
-
- return 0;
-}
-
-} // namespace srcn
-} // namespace nnfw
diff --git a/compute/ncnn/src/srcn/srcn_conv.cc b/compute/ncnn/src/srcn/srcn_conv.cc
deleted file mode 100644
index bb8e4f13e..000000000
--- a/compute/ncnn/src/srcn/srcn_conv.cc
+++ /dev/null
@@ -1,614 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#include "ncnn/srcn/conv_type.h"
-#include "common.h"
-#include "sgemm_singlethread.h"
-#include "conv_sgemm_singlethread.h"
-#include "conv_sgemm_multithreads.h"
-#include "conv_winograd.h"
-#include "direct_conv_colmajor.h"
-#include "winograd.h"
-
-#include "deconv_sgemm_multithreads.h"
-#include "conv_sparse.h"
-#include "conv_winograd_batch.h"
-
-namespace nnfw
-{
-namespace srcn
-{
-
-static inline void weight_transfer(float *out, float *in, int H, int W, int C, int N)
-{
- // HWCN ---> NCHW
- for (int h = 0; h < H; ++h)
- {
- for (int w = 0; w < W; ++w)
- {
- for (int c = 0; c < C; ++c)
- {
- for (int n = 0; n < N; ++n)
- {
- int index_in = h * W * C * N + w * C * N + c * N + n;
- int index_out = n * C * H * W + c * H * W + h * W + w;
- out[index_out] = in[index_in];
- }
- }
- }
- }
-}
-
-int check_winograd(winogradParams_t &params)
-{
- int winograd_flag =
- ((params.kernel_w == params.kernel_h) && (params.stride_w == params.stride_h) &&
- (params.kernel_w == 3 || params.kernel_w == 5) && (params.stride_w == 1) &&
- (params.dilation_w == 1) && (params.dilation_h == 1));
-
- int winograd_channel_cond = 64 * 64;
- int winograd_image_cond = 10 * 10;
-
-#ifdef TIZEN
- if (params.num_threads > 1)
- {
- winograd_channel_cond = 128 * 128;
- winograd_image_cond = 20 * 20;
- }
-#endif // TIZEN
-
- winograd_flag &= (params.inch * params.outch >= winograd_channel_cond);
-
- if (params.w > 0 && params.h > 0 && params.batch == 1)
- {
- winograd_flag &= (params.w * params.h >= winograd_image_cond);
- }
-
- return winograd_flag;
-}
-
-float *trans_weight2winograd(winogradParams_t &params, unsigned int *size = NULL)
-{
- int M, N;
- const double *G;
-
- float *winograd_weight;
-
- int winograd_channel_cond = 64 * 64;
- int winograd_image_cond = 10 * 10;
-
-#ifdef TIZEN
- if (params.num_threads > 1)
- {
- winograd_channel_cond = 128 * 128;
- // int winograd_image_cond = 20 * 20;
- }
-#endif // TIZEN
-
- int winograd_flag =
- ((params.kernel_w == params.kernel_h) && (params.stride_w == params.stride_h) &&
- (params.kernel_w == 3 || params.kernel_w == 5) && (params.stride_w == 1) &&
- (params.dilation_w == 1) && (params.dilation_h == 1));
- if (!winograd_flag)
- return NULL;
-
- winograd_flag = (params.inch * params.outch >= winograd_channel_cond);
-
- if (!winograd_flag)
- return NULL;
-
- if (params.w > 0 && params.h > 0 && params.batch == 1)
- {
- winograd_flag &= (params.w * params.h >= winograd_image_cond);
- if (!winograd_flag)
- return NULL;
- }
-
- const int kernel_size = params.kernel_w;
- const int inch = params.inch;
- const int outch = params.outch;
- float *weight_data = params.weight_data;
-
- /*Step 1: transfer weight to winograd domain*/
- if (kernel_size == 3)
- {
- if (params.w == 4 && params.batch > 1)
- {
- M = winograd_para_3x3s1_2::M;
- N = winograd_para_3x3s1_2::N;
- G = winograd_para_3x3s1_2::getG();
- }
- else
- {
- M = winograd_para_3x3s1::M;
- N = winograd_para_3x3s1::N;
- G = winograd_para_3x3s1::getG();
- }
- }
- else
- {
- M = winograd_para_5x5s1::M;
- N = winograd_para_5x5s1::N;
- G = winograd_para_5x5s1::getG();
- }
-
- int tile_h_in_, tile_w_in_;
- tile_h_in_ = tile_w_in_ = M;
-
- if (size)
- *size = tile_h_in_ * tile_w_in_ * inch * outch;
-
- winograd_weight = new float[tile_h_in_ * tile_w_in_ * inch * outch];
- if (!winograd_weight)
- return NULL;
-
- float *winograd_g = new float[M * M * N * N];
- if (!winograd_g)
- {
- delete[] winograd_weight;
- return NULL;
- }
-
- kronecker_product(winograd_g, G, G, M, N, M, N);
-
- if (params.conv_type == col_major)
- {
- weight_data = new float[kernel_size * kernel_size * inch * outch];
- if (!weight_data)
- {
- delete[] winograd_weight;
- delete[] winograd_g;
- return NULL;
- }
- weight_transfer(weight_data, params.weight_data, kernel_size, kernel_size, inch, outch);
- }
-
- class sgemm_singlethread sgemm(rowMajor, notrans, trans, tile_h_in_ * tile_w_in_, inch * outch,
- kernel_size * kernel_size, winograd_g, weight_data,
- winograd_weight, 1);
-
- sgemm.run();
-
- if (params.conv_type == col_major)
- delete[] weight_data;
-
- delete[] winograd_g;
-
- return winograd_weight;
-}
-
-void winograd_release(float *winograd_weight)
-{
- if (winograd_weight)
- delete[] winograd_weight;
-}
-
-void srcn_convolution2D(const convMat_t &in_mat, const convMat_t &weights_mat, convMat_t &out_mat,
- const convParams_t &in_param, const float *winograd_weight, int num_threads,
- convType_t conv_type)
-{
- const int outw = out_mat.w;
- const int outh = out_mat.h;
- const int inch = in_mat.c;
- const int outch = out_mat.c;
-
- int winograd_flag =
- ((in_param.kernel_w == in_param.kernel_h) && (in_param.stride_w == in_param.stride_h) &&
- (in_param.kernel_w == 3 || in_param.kernel_w == 5) && (in_param.stride_w == 1) &&
- (winograd_weight) && (in_param.dilation_w == 1) && (in_param.dilation_h == 1));
-
- int direct_flag = ((conv_type == col_major) && (in_param.stride_w == in_param.stride_h) &&
- (in_param.dilation_w == 1) && (in_param.dilation_h == 1));
-
- int winograd_image_cond = 10 * 10;
- int winograd_channel_cond = 64 * 64;
- int direct_image_cond = 4 * 4;
- int direct_channel_cond = 16 * 16;
-
-#ifdef TIZEN
- if (num_threads > 1)
- {
- winograd_image_cond = 20 * 20;
- winograd_channel_cond = 128 * 128;
- }
-#endif
-
- winograd_flag &=
- ((outw * outh >= winograd_image_cond) && (inch * outch >= winograd_channel_cond));
- direct_flag &= ((outw * outh <= direct_image_cond) || (inch * outch <= direct_channel_cond));
-
- if (num_threads == 1)
- {
- if (winograd_flag)
- {
- class conv_winograd conv(in_mat, out_mat, in_param, conv_type, winograd_weight, num_threads,
- in_mat.w * in_mat.h, outw * outh, outch);
- conv.run();
- }
- else if (direct_flag)
- {
- direct_conv_colmajor(in_mat, out_mat, weights_mat, in_param, num_threads);
- }
- else
- {
- class conv_sgemm_singlethread conv(in_mat, weights_mat, out_mat, in_param, conv_type);
- conv.run();
- }
- }
- else if (num_threads > 1)
- {
- if (winograd_flag)
- {
- const int npart = num_threads > 4 ? 4 : num_threads;
-
- omp_set_num_threads(npart);
-
- if (conv_type == col_major)
- {
- if (outch < 512)
- {
- const int _H = (outh + npart - 1) / npart;
-
- if (_H < in_param.pad_h)
- {
- class conv_winograd conv(in_mat, out_mat, in_param, conv_type, winograd_weight, 1,
- in_mat.w * in_mat.h, outw * outh, outch);
- conv.run();
- return;
- }
-
- // const int ih = (_H - 1) * in_param.stride_w + in_param.kernel_w;
- // const int oh = _H;
- const int nh = (outh + _H - 1) / _H;
- int rh = outh % _H;
- if (rh == 0)
- rh = _H;
-
-#pragma omp parallel for
- for (int i = 0; i < nh; i++)
- {
- int pad_h_part = 0;
- convMat_t in_part;
- convMat_t out_part;
- const int oh = (i != nh - 1 || rh == 0) ? _H : rh;
- const int ih = (oh - 1) * in_param.stride_w + in_param.kernel_w;
-
- in_part.w = in_mat.w;
- in_part.c = inch;
- out_part.w = outw;
- out_part.c = outch;
- in_part.h = ih;
- out_part.h = oh;
-
- int bottom_offset = i * _H - in_param.pad_h;
- if (bottom_offset < 0)
- {
- bottom_offset = 0;
- pad_h_part = in_param.pad_h;
- }
- in_part.data = in_mat.data + bottom_offset * in_mat.w * inch * in_param.stride_w;
- if (ih + bottom_offset > in_mat.h)
- {
- in_part.h = in_mat.h - bottom_offset;
- }
-
- out_part.data = out_mat.data + i * _H * outw * outch;
-
- convParams_t params = {
- in_param.kernel_w, in_param.kernel_h, in_param.stride_w, in_param.stride_h, 1, 1,
- in_param.padding, in_param.pad_w, pad_h_part};
-
- class conv_winograd conv(in_part, out_part, params, conv_type, winograd_weight,
- num_threads, in_mat.w * in_mat.h, outw * outh, outch);
- conv.run();
- }
- }
- else
- {
- const int _OUTC = (outch + npart - 1) / npart;
-
- const int nc = (outch + _OUTC - 1) / _OUTC;
- int rc = out_mat.c % _OUTC;
- if (rc == 0)
- rc = _OUTC;
-
-#pragma omp parallel for
- for (int i = 0; i < nc; i++)
- {
- const float *weight_part;
- convMat_t out_part;
-
- const int oc = (i != nc - 1 || rc == 0) ? _OUTC : rc;
-
- out_part.w = outw;
- out_part.h = outh;
- out_part.c = oc;
- out_part.data = out_mat.data + i * _OUTC;
- weight_part = winograd_weight + i * _OUTC * inch;
- class conv_winograd conv(in_mat, out_part, in_param, conv_type, weight_part,
- num_threads, in_mat.w * in_mat.h, outw * outh, outch);
- conv.run();
- }
- }
- }
- else if (conv_type == row_major)
- {
-#ifdef TIZEN
- if (outch < 512)
-#else // TIZEN
- if (outh >= 20)
-#endif // TIZEN
- {
- const int _H = (outh + npart - 1) / npart;
-
- if (_H < in_param.pad_h)
- {
- class conv_winograd conv(in_mat, out_mat, in_param, conv_type, winograd_weight, 1,
- in_mat.w * in_mat.h, outw * outh, outch);
- conv.run();
- return;
- }
-
- // const int ih = (_H - 1) * in_param.stride_w + in_param.kernel_w;
- // const int oh = _H;
- const int nh = (outh + _H - 1) / _H;
- int rh = outh % _H;
- if (rh == 0)
- rh = _H;
-
-#pragma omp parallel for
- for (int i = 0; i < nh; i++)
- {
- int pad_h_part = 0;
- convMat_t in_part;
- convMat_t out_part;
- const int oh = (i != nh - 1 || rh == 0) ? _H : rh;
- const int ih = (oh - 1) * in_param.stride_w + in_param.kernel_w;
-
- in_part.w = in_mat.w;
- in_part.c = inch;
- out_part.w = outw;
- out_part.c = outch;
- in_part.h = ih;
- out_part.h = oh;
-
- int bottom_offset = i * _H - in_param.pad_h;
- if (bottom_offset < 0)
- {
- bottom_offset = 0;
- pad_h_part = in_param.pad_h;
- }
- in_part.data = in_mat.data + bottom_offset * in_mat.w * in_param.stride_w;
- if (ih + bottom_offset > in_mat.h)
- {
- in_part.h = in_mat.h - bottom_offset;
- }
-
- out_part.data = out_mat.data + i * _H * outw;
-
- convParams_t params = {
- in_param.kernel_w, in_param.kernel_h, in_param.stride_w, 1, 1,
- in_param.stride_h, in_param.padding, in_param.pad_w, pad_h_part};
-
- class conv_winograd conv(in_part, out_part, params, conv_type, winograd_weight,
- num_threads, in_mat.w * in_mat.h, outw * outh, outch);
- conv.run();
- }
- }
- else
- {
- const int _OUTC = (outch + npart - 1) / npart;
-
- const int nc = (outch + _OUTC - 1) / _OUTC;
- int rc = out_mat.c % _OUTC;
- if (rc == 0)
- rc = _OUTC;
-
-#pragma omp parallel for
- for (int i = 0; i < nc; i++)
- {
- const float *weight_part;
- convMat_t out_part;
-
- const int oc = (i != nc - 1 || rc == 0) ? _OUTC : rc;
-
- out_part.w = outw;
- out_part.h = outh;
- out_part.c = oc;
- out_part.data = out_mat.data + i * _OUTC * outw * outh;
- weight_part = winograd_weight + i * _OUTC * inch;
- class conv_winograd conv(in_mat, out_part, in_param, conv_type, weight_part,
- num_threads, in_mat.w * in_mat.h, outw * outh, outch);
- conv.run();
- }
- }
- }
- }
- else if (direct_flag)
- {
- direct_conv_colmajor(in_mat, out_mat, weights_mat, in_param, num_threads);
- }
- else
- {
- class conv_sgemm_multithreads conv(in_mat, weights_mat, out_mat, in_param, num_threads,
- conv_type);
- conv.run();
- }
- }
-}
-
-void srcn_deconvolution2D(const convMat_t &in_mat, const convMat_t &weights_mat, convMat_t &out_mat,
- const convParams_t &in_param, int num_threads, convType_t conv_type)
-{
- class deconv_sgemm_multithreads deconv(in_mat, weights_mat, out_mat, in_param, num_threads,
- conv_type);
- deconv.run();
-}
-
-void *trans_weight2sparse(const convMat_t &weights_mat)
-{
- const int kernel_w = weights_mat.w;
- const int kernel_h = weights_mat.h;
- const int inch = weights_mat.c;
- const int outch = weights_mat.n;
-
- const int nch = (outch + BCH - 1) / BCH;
- const int rch = outch % BCH;
-
- const float *data = weights_mat.data;
- const int klength = inch * kernel_h * kernel_w;
-
- sparse_weight_t *sparse_weight = new sparse_weight_t[nch];
- if (!sparse_weight)
- return NULL;
-
- for (int i = 0; i < nch; i++)
- {
- int _bch = (i != nch - 1 || rch == 0) ? BCH : rch;
- sparse_weight_t *sparse_weight_n = &sparse_weight[i];
- sparse_weight_n->mxk = 0;
-
- for (int j = 0; j < _bch; j++)
- {
- for (int l = 0; l < klength; l++)
- {
- float val = *(data + (i * BCH + j) * klength + l);
- if (val != 0)
- {
- sparse_weight_n->mxk++;
- }
- }
- }
- }
-
- for (int i = 0; i < nch; i++)
- {
- int _bch = (i != nch - 1 || rch == 0) ? BCH : rch;
- sparse_weight_t *sparse_weight_n = &sparse_weight[i];
- sparse_weight_n->wdata = new weight_data_t[sparse_weight_n->mxk];
- int index = 0;
-
- for (int l = 0; l < klength; l++)
- {
- for (int j = 0; j < _bch; j++)
- {
- float val = *(data + (i * BCH + j) * klength + l);
- if (val != 0)
- {
- sparse_weight_n->wdata[index].m = i * BCH + j;
- sparse_weight_n->wdata[index].k = l;
- sparse_weight_n->wdata[index++].data = val;
- }
- }
- }
- }
-
- return (void *)sparse_weight;
-}
-
-void sparse_release(const int outch, void *ptr)
-{
- sparse_weight_t *sparse_weight = (sparse_weight_t *)ptr;
- const int nch = (outch + BCH - 1) / BCH;
-
- if (!sparse_weight)
- return;
-
- for (int i = 0; i < nch; i++)
- {
- sparse_weight_t *sparse_weight_n = &sparse_weight[i];
- if (sparse_weight_n->wdata)
- delete[] sparse_weight_n->wdata;
- }
-
- if (sparse_weight)
- delete[] sparse_weight;
-}
-
-void srcn_sparse_convolution2D(const convMat_t &in_mat, convMat_t &out_mat,
- const convParams_t &in_param, const void *sparse_weight,
- int number_threas, convType_t conv_type)
-{
- class conv_sparse conv(in_mat, out_mat, in_param, (const sparse_weight_t *)sparse_weight,
- number_threas, conv_type);
-
- for (int i = 0; i < out_mat.c * out_mat.h * out_mat.w; i++)
- {
- *(out_mat.data + i) = 0;
- }
-
- conv.run();
-}
-
-void srcn_batch_convolution2D(const convMat_t &in_mat, const convMat_t &weights_mat,
- convMat_t &out_mat, const convParams_t &in_param,
- const float *winograd_weight, int num_threads, convType_t conv_type)
-{
- int winograd_flag = (winograd_weight != NULL);
-
- if (winograd_flag)
- {
- if (num_threads > 1)
- {
- omp_set_num_threads(num_threads);
- const int batch = in_mat.n;
- const int npart = (batch + num_threads - 1) / num_threads;
- const int nn = (batch + npart - 1) / npart;
- const int rn = batch % npart;
-
-#pragma omp parallel for
- for (int i = 0; i < nn; i++)
- {
- const int pn = (i != nn - 1 || rn == 0) ? npart : rn;
- convMat_t in_mat_part = {in_mat.w, in_mat.h, in_mat.c, pn,
- in_mat.data + i * npart * in_mat.w * in_mat.h * in_mat.c};
- convMat_t out_mat_part = {out_mat.w, out_mat.h, out_mat.c, pn,
- out_mat.data + i * npart * out_mat.w * out_mat.h * out_mat.c};
-
- class conv_winograd_batch conv(in_mat_part, out_mat_part, in_param, conv_type,
- winograd_weight, num_threads);
- conv.run();
- }
- }
- else
- {
- class conv_winograd_batch conv(in_mat, out_mat, in_param, conv_type, winograd_weight,
- num_threads);
- conv.run();
- }
- }
- else
- {
- if (num_threads == 1)
- {
- class conv_sgemm_singlethread conv(in_mat, weights_mat, out_mat, in_param, conv_type);
- conv.run();
- }
- else
- {
- class conv_sgemm_multithreads conv(in_mat, weights_mat, out_mat, in_param, num_threads,
- conv_type);
- conv.run();
- }
- }
-}
-
-} // namespace srcn
-} // namespace nnfw
diff --git a/compute/ncnn/src/srcn/winograd.h b/compute/ncnn/src/srcn/winograd.h
deleted file mode 100644
index 5ad8f1126..000000000
--- a/compute/ncnn/src/srcn/winograd.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_SRCN_WINOGRAD_H__
-#define __NNFW_SRCN_WINOGRAD_H__
-
-namespace nnfw
-{
-namespace srcn
-{
-
-struct winograd_para_3x3s1
-{
- static const int M = 3 + 4 - 1;
- static const int N = 3;
-
- static const double *getG()
- {
- static const double G[M * N] = {
- 1. / 4., 0, 0, -1. / 6., -1. / 6., -1. / 6., -1. / 6., 1. / 6., -1. / 6.,
- 1. / 24., 1. / 12., 1. / 6., 1. / 24., -1. / 12., 1. / 6., 0, 0, 1,
- };
- return G;
- }
-
- static const double *getA()
- {
- static const double A[M * (M - N + 1)] = {
- 1, 0, 0, 0, 1, 1, 1, 1, 1, -1, 1, -1, 1, 2, 4, 8, 1, -2, 4, -8, 0, 0, 0, 1,
- };
- return A;
- }
-
- static const double *getB()
- {
- static const double B[M * M] = {
- 4, 0, 0, 0, 0, 0, 0, -4, 4, -2, 2, 4, -5, -4, -4, -1, -1, 0,
- 0, 1, -1, 2, -2, -5, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1,
- };
- return B;
- };
-};
-
-struct winograd_para_3x3s1_2
-{
- static const int M = 3 + 2 - 1;
- static const int N = 3;
-
- static const double *getG()
- {
- static const double G[M * N] = {
- 1, 0, 0, 1. / 2., 1. / 2., 1. / 2., 1. / 2., -1. / 2., 1. / 2., 0, 0, 1,
- };
- return G;
- }
-
- static const double *getA()
- {
- static const double A[M * (M - N + 1)] = {
- 1, 0, 1, 1, 1, -1, 0, 1,
- };
- return A;
- }
-
- static const double *getB()
- {
- static const double B[M * M] = {
- 1, 0, 0, 0, 0, 1, -1, -1, -1, 1, 1, 0, 0, 0, 0, 1,
- };
- return B;
- };
-};
-
-struct winograd_para_5x5s1
-{
- static const int M = 5 + 4 - 1;
- static const int N = 5;
-
- static const double *getG()
- {
- static const double G[M * N] = {
- 1, 0, 0, 0, 0, -2. / 9., -2. / 9., -2. / 9.,
- -2. / 9., -2. / 9., -2. / 9., 2. / 9., -2. / 9., 2. / 9., -2. / 9., 1. / 90.,
- 1. / 45., 2. / 45., 4. / 45., 8. / 45., 1. / 90., -1. / 45., 2. / 45., -4. / 45.,
- 8. / 45., 4. / 45., 2. / 45., 1. / 45., 1. / 90., 1. / 180., 4. / 45., -2. / 45.,
- 1. / 45., -1. / 90., 1. / 180., 0, 0, 0, 0, 1,
- };
- return G;
- }
-
- static const double *getA()
- {
- static const double A[M * (M - N + 1)] = {1, 0, 0, 0, 1, 1, 1, 1, 1, -1, 1, -1, 1, 2, 4, 8,
- 1, -2, 4, -8, 8, 4, 2, 1, 8, -4, 2, -1, 0, 0, 0, 1};
- return A;
- }
-
- static const double *getB()
- {
- static const double B[M * M] = {
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
- -1, 1. / 2, -1. / 2, 2, -2, -1, -21. / 4, 1, 1, 1. / 4,
- 1. / 4, 4, 4, 0, 0, -17. / 4, 17. / 4, -5. / 2, 5. / 2, -5. / 2,
- 5. / 2, 21. / 4, 21. / 4, -17. / 4, -17. / 4, -5. / 4, -5. / 4, -5, -5, 0,
- 0, 1, -1, 2, -2, 1. / 2, -1. / 2, -21. / 4, -1, 1,
- 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
- 0, 0, 0, 1,
- };
- return B;
- }
-};
-
-static void kronecker_product(float *out, const double *in1, const double *in2, int m, int n, int p,
- int q)
-{
- for (int i = 0; i < m; ++i)
- {
- for (int j = 0; j < n; ++j)
- {
- for (int k = 0; k < p; ++k)
- {
- for (int l = 0; l < q; ++l)
- {
- out[(p * i + k) * n * q + q * j + l] = in1[n * i + j] * in2[k * q + l];
- /* compute in double precision and then convert it back to Dtype for accuracy */
- }
- }
- }
- }
-}
-
-} // namespace srcn
-} // namespace nnfw
-
-#endif // __NNFW_SRCN_WINOGRAD_H__
diff --git a/docs/HowToContribute.md b/docs/HowToContribute.md
deleted file mode 100644
index c6f89c3cf..000000000
--- a/docs/HowToContribute.md
+++ /dev/null
@@ -1,72 +0,0 @@
-_nnfw_ always welcomes your contribution, but there are basic guidelines that you should follow in
-order to make your contribution be accepted.
-
-This document explains such guidelines for beginners.
-
-# General contribution guidelines
-
-If you are not familiar with git or github, please visit
-[here](https://guides.github.com/activities/hello-world/) for basic understanding of git and github.
-
-For general rules and information in STAR regarding contribution, please see the guidelines in the
-[STAR-DeveloperGuide](https://github.sec.samsung.net/STAR/STAR-DeveloperGuide) repo.
-
-
-# HOWTO
-## How to create a Pull Request
-
-This section explains the steps to create a pull request (PR).
-
-1. Create an issue
-
- Maintainers will accept your contribution only when it is well aligned with the roadmap and
- design principles of [_nnfw_](./nnfw/roadmap.md) and [_nncc_](./nncc/roadmap.md). So, it is optional, but recommended for contributors
- to create an issue and have a discussion with maintainers before writing code.
-
-1. Create a draft PR
-
- Maintainers will accept your pull request only when it is **reasonably small** and **focused**.
- Sometimes, your contribution may require huge and loosely-coupled changes. You **should** split
- your contribution into multiple small, but focused pull requests in this case. Unfortunately, it
- is possible that maintainers reject your pull request as it is hard for them to understand the
- intuition behind these changes. So, it is optional, but recommended for contributors to present
- the full draft of your contribution and have a discussion with maintainers before creating PR(s).
-
-1. Create a commit
-
- It is time to create a commit for submission once you are convinced that your contribution is
- ready to go. Please include signed-off message at the end of commit message. If not, your pull
- request will be **rejected** by CI.
-
-1. Check code format locally
-
- _nnfw_ has its code formatting rules, and any pull request that violates these rules will be
- **rejected** by CI. So, it is optional, but recommended for contributor to check code format
- locally before submission.
-
-1. Create a PR
-
- It is time to send a pull request. Please explain your intention via description. Maintainers
- will review your pull request based on that description. Each pull request needs approval from at
- least two reviewers to be accepted. Note that **description should include at least four words**.
- If not, your pull request will be **rejected** by CI.
-
-1. Request review
-
- It is recommended to assign reviewers yourself. Maintainers will honor your review request,
- and accept your pull request only when
-
- - Approved by 1+ reviewers
- - 0 rejection(Request Changes)
- - 0 pending review request
- - All the reviewers in the list must approve your pull request
-
- You can add/remove pending review requests in the middle of the review process. Maintainers
- (or reviewers) could review your pull request even without explicit review request.
-
-1. Update per feedback
-
- Sometimes, maintainers (or reviewers) will request changes on your pull request. Please update
- your pull request upon such feedbacks. These update commits will be squashed into the first
- commit of your pull request later. Please do **NOT** include a sign-off message or write a full
- description for update commits.
diff --git a/docs/UseDoxygen.md b/docs/UseDoxygen.md
deleted file mode 100644
index 1b016c0ec..000000000
--- a/docs/UseDoxygen.md
+++ /dev/null
@@ -1,36 +0,0 @@
-# How to generate documentation from source code using doxygen
-
-## Install doxygen
-
-If you want to use doxygen to generate documentation on Ubuntu, please install packages
-
-```
-$ sudo apt install doxygen
-```
-
-## Generate documentation
-
-### Pre-defined configuration
-
-You can find pre-defined configuration at `infra/doxygen/Doxyfile`
-
-### Option 1: Use pre-defined configuration
-
-You can use pre-defined configuration directly at nnas's root path
-
-```
-<nnas-root-path>$ doxygen infra/doxygen/Doxyfile
-```
-
-Generated documentation html is in `doxygen/html`
-
-### Option 2: Use nnas command (recommand)
-
-You can use nnas command `doxygen`
-
-```
-$ <nnas-root-path>/nnas doxygen
-```
-
-Generated documentation html is in your workspace directory: `<NNAS_WORKSPACE>/doxygen/html`
-Default workspace directory is `build`
diff --git a/docs/fig/compiler_flow.png b/docs/fig/compiler_flow.png
deleted file mode 100644
index 25daa0ca1..000000000
--- a/docs/fig/compiler_flow.png
+++ /dev/null
Binary files differ
diff --git a/docs/fig/nnfw_compiler_structure.png b/docs/fig/nnfw_compiler_structure.png
deleted file mode 100644
index 4c650c186..000000000
--- a/docs/fig/nnfw_compiler_structure.png
+++ /dev/null
Binary files differ
diff --git a/docs/fig/nnfw_compiler_structure.pptx b/docs/fig/nnfw_compiler_structure.pptx
deleted file mode 100644
index 9b5585d0c..000000000
--- a/docs/fig/nnfw_compiler_structure.pptx
+++ /dev/null
Binary files differ
diff --git a/docs/fig/nnfw_components.png b/docs/fig/nnfw_components.png
deleted file mode 100644
index 2c6bc6d97..000000000
--- a/docs/fig/nnfw_components.png
+++ /dev/null
Binary files differ
diff --git a/docs/fig/nnfw_components.pptx b/docs/fig/nnfw_components.pptx
deleted file mode 100644
index a4e86fa82..000000000
--- a/docs/fig/nnfw_components.pptx
+++ /dev/null
Binary files differ
diff --git a/docs/fig/nnfw_nativeapi_flow.png b/docs/fig/nnfw_nativeapi_flow.png
deleted file mode 100644
index 31e82900d..000000000
--- a/docs/fig/nnfw_nativeapi_flow.png
+++ /dev/null
Binary files differ
diff --git a/docs/fig/nnfw_nativeapi_flow.pptx b/docs/fig/nnfw_nativeapi_flow.pptx
deleted file mode 100644
index 27f6d6e80..000000000
--- a/docs/fig/nnfw_nativeapi_flow.pptx
+++ /dev/null
Binary files differ
diff --git a/docs/fig/nnfw_nnapi_flow.png b/docs/fig/nnfw_nnapi_flow.png
deleted file mode 100644
index 2faceb9f2..000000000
--- a/docs/fig/nnfw_nnapi_flow.png
+++ /dev/null
Binary files differ
diff --git a/docs/fig/nnfw_nnapi_flow.pptx b/docs/fig/nnfw_nnapi_flow.pptx
deleted file mode 100644
index 7407a3940..000000000
--- a/docs/fig/nnfw_nnapi_flow.pptx
+++ /dev/null
Binary files differ
diff --git a/docs/fig/nnfw_runtime_behavior.png b/docs/fig/nnfw_runtime_behavior.png
deleted file mode 100644
index 952f22c93..000000000
--- a/docs/fig/nnfw_runtime_behavior.png
+++ /dev/null
Binary files differ
diff --git a/docs/fig/nnfw_runtime_behavior.pptx b/docs/fig/nnfw_runtime_behavior.pptx
deleted file mode 100644
index 2fbcedacb..000000000
--- a/docs/fig/nnfw_runtime_behavior.pptx
+++ /dev/null
Binary files differ
diff --git a/docs/fig/nnfw_runtime_structure.png b/docs/fig/nnfw_runtime_structure.png
deleted file mode 100644
index 554b5aa04..000000000
--- a/docs/fig/nnfw_runtime_structure.png
+++ /dev/null
Binary files differ
diff --git a/docs/fig/nnfw_runtime_structure.pptx b/docs/fig/nnfw_runtime_structure.pptx
deleted file mode 100644
index 213925e91..000000000
--- a/docs/fig/nnfw_runtime_structure.pptx
+++ /dev/null
Binary files differ
diff --git a/docs/fig/runtime_nativeapi_flow.png b/docs/fig/runtime_nativeapi_flow.png
deleted file mode 100644
index 1f9c88236..000000000
--- a/docs/fig/runtime_nativeapi_flow.png
+++ /dev/null
Binary files differ
diff --git a/docs/nncc/README.md b/docs/nncc/README.md
deleted file mode 100644
index 203b4aa45..000000000
--- a/docs/nncc/README.md
+++ /dev/null
@@ -1,56 +0,0 @@
-# 1. nnas SDK
-
-_describe simply that current version is 1.0.0, and nnas SDK has nncc and nnfw._
-
- _we use symantic versioning. Provide link to https://semver.org/_
-
-_simply mention that we go with apache license_
-
-# 2. nncc
-
-_please write a short description_
-_for example, what is this compiler_
-_design philosophy and advantages of this compiler_
-
-## 2.1. Architecture
-
-_For example, simple architecture or compiling flow, showing we're cool_
-
-## 2.2. Getting Started
-
-This section will explain how to install and compile a Tensorflow model file.
-
-### 2.2.1. Supported Environment
-
-_x86, ubuntu 16.04... versions of Tensorflow that produce models.. frozen file..., ... etc..._
-
-### 2.2.2. How to Install
-
-_please write how to install_
-
-### 2.2.3. How to Compile and Package
-
-_what is 'nnpackage'?_
-_environment variables_
-_compiling inception v3 pb file and packaging into an nnpackage_
-_explaining files in an nnpackage_
-_an example with custom op_
-
-## 2.3. List of Supported Operations
-
-_separate md file_
-_showing a list of [ tensorflow op , circle op, limitation ]_
-
-## 2.4. Benchmark
-
-_inception v3 (we have shorter ops)_
-_instance normalization (link to runtime performance)_
-_showing we have bright future_
-
-## 2.5. Support
-
-_report a bug into our github_
-
-## 2.6. Revision History
-
-_separate md file where SDK 1.0.0 and future version history are maintained_
diff --git a/docs/nncc/design.md b/docs/nncc/design.md
deleted file mode 100644
index a01d6fec4..000000000
--- a/docs/nncc/design.md
+++ /dev/null
@@ -1,10 +0,0 @@
-This document describes basic principles behind _nncc_ design.
-
-## Goals and non-goals
-
-As mentioned in README.md, _nncc_ aims to provide a general framework for compiling a given NN model
-to an artifact that runs on a target device (such as CPU, GPU, or NPU).
-
-More specifically, _nncc_ aims to create an efficient artifact (in terms of throughput or memory)
-for a specific target via focusing on a restricted set of NN operations. It is not the goal of _nncc_
-to support all the known NN operations although _nncc_ will keep trying to broaden its coverage.
diff --git a/docs/nncc/getting_started.md b/docs/nncc/getting_started.md
deleted file mode 100644
index 8f01bd2a4..000000000
--- a/docs/nncc/getting_started.md
+++ /dev/null
@@ -1,73 +0,0 @@
-#### Prerequisites
-
-The following toolchains are needed to build _nncc_ project:
- - CMake (>= 3.1)
- - g++ (>= 4.8)
-
-#### How to build _nncc_ with docker
-
-_nncc_ provides ``Dockerfile`` in order to make it easy to setup development environment.
-
-One may build ``nncc`` docker image with the following command:
-```
-nncc$ cat infra/docker/Dockerfile | docker build -t nncc -
-...
-```
-
-By default, this ``Dockerfile`` uses "archive.ubuntu.com" which may be quite slow. One may use mirror site via ``UBUNTU_MIRROR`` variable.
-For example, one may enable the use of ``kr.archive.ubuntu.com`` via the following command
-```
-nncc$ cat infra/docker/Dockerfile | docker build --build-arg UBUNTU_MIRROR="kr.archive.ubuntu.com" -t nncc -
-...
-```
-
-One who works behind proxy should provide proxy configuration via the following command:
-```
-nncc$ cat infra/docker/Dockerfile | docker build --build-arg HTTP_PROXY=<HTTP proxy address> --build-arg HTTPS_PROXY=<HTTPS proxy address> -t nncc -
-...
-```
-One may use simplified command if ``HTTP_PROXY`` and ``HTTPS_PROXY`` environment variables are already set:
-```
-nncc$ export
-...
-declare -x HTTP_PROXY=...
-declare -x HTTPS_PROXY=...
-...
-nncc$ cat infra/docker/Dockerfile | docker build --build-arg HTTP_PROXY --build-arg HTTPS_PROXY -t nncc -
-...
-```
-
-Note that these configurations are orthogonal to each other. One may freely combine these options as follows:
-```
-nncc$ cat infra/docker/Dockerfile | docker build --build-arg HTTP_PROXY --build-arg HTTPS_PROXY --build-arg UBUNTU_MIRROR="kr.archive.ubuntu.com" -t nncc -
-```
-
-One may easily build _nncc_ with the following command once ``nncc`` docker image is built.
-```
-nncc$ ./nncc docker-nncc configure
-...
-nncc$ ./nncc docker-nncc build
-...
-```
-
-#### How to build _nncc_ with ninja
-
-You may build _nncc_ with ninja (instead of make) if ninja is available. Please try the following commands:
-```
-nncc$ rm -rf build
-nncc$ ./nncc configure -G Ninja
-nncc$ ./nncc build
-```
-
-#### How to build and run _nncc_ unittests
-
-_nncc_ includes various unittests to check its correctness. One may build and run these unittests via the following command:
-```
-nncc$ rm -rf build
-nncc$ ./nncc configure -DENABLE_TEST=1
-nncc$ ./nncc build
-nncc$ ./nncc test
-```
-
-**NOTE** As _nncc_ unittests are implemented on top of google test framework (_gtest_), _nncc_ build script will automatically download _gtest_ 1.8 from public GitHub.
-If you are not able to access public GitHub from your machine, please override download URL via ``GTEST_URL`` environment variable.
diff --git a/docs/nncc/images/nncc_components.png b/docs/nncc/images/nncc_components.png
deleted file mode 100644
index becd63d14..000000000
--- a/docs/nncc/images/nncc_components.png
+++ /dev/null
Binary files differ
diff --git a/docs/nncc/images/nncc_idef0_a0.png b/docs/nncc/images/nncc_idef0_a0.png
deleted file mode 100644
index 9ba09681f..000000000
--- a/docs/nncc/images/nncc_idef0_a0.png
+++ /dev/null
Binary files differ
diff --git a/docs/nncc/images/nncc_idef0_a1.png b/docs/nncc/images/nncc_idef0_a1.png
deleted file mode 100644
index c5ebec5d9..000000000
--- a/docs/nncc/images/nncc_idef0_a1.png
+++ /dev/null
Binary files differ
diff --git a/docs/nncc/images/nncc_idef0_a12.png b/docs/nncc/images/nncc_idef0_a12.png
deleted file mode 100644
index dabcad718..000000000
--- a/docs/nncc/images/nncc_idef0_a12.png
+++ /dev/null
Binary files differ
diff --git a/docs/nncc/project/detailed_level_design.md b/docs/nncc/project/detailed_level_design.md
deleted file mode 100644
index 50fb8fa13..000000000
--- a/docs/nncc/project/detailed_level_design.md
+++ /dev/null
@@ -1,329 +0,0 @@
-# SW Detailed Level Design
-
-**Revision history**
-
-| Ver. | Date | Contents | Author | Approver |
-| ---- | ---------- | ----------------- | ----------------- | ------------ |
-| 0.1 | 2018.06.20 | Initial version | Vostokov Sergey | Sung-Jae Lee |
-| 0.2 | 2018.06.21 | SE member review | Alexey Kondrashov | |
-| 1.0 | 2018.06.22 | Final DR1 version | Vostokov Sergey | Sung-Jae Lee |
-
-**Terminology and Abbreviation**
-
-| | |
-| ------------ | ------------------------------------------------------------- |
-| OS | Operating System |
-| OS API | Application interface of OS |
-| HW | Hardware |
-| SW | Software |
-| NN | Neural Network |
-| NN model | Neural network model (Instance of NN built with ML framework) |
-| NN compiler | The compiler for neural network |
-| ML framework | The machine learning framework |
-| TF/TF Lite | Tensorflow/Tensorflow Lite ML framework |
-| IR | Intermediate representation |
-| CI/CI system | Continuous integration system |
-| UI | The user interface |
-| GUI | The graphical user interface |
-| CLI | The command-line interface |
-
-**References**
-
-\[1\] Vostokov Sergey, [SW Requirements Specification](requirements_specification.md)
-
-\[2\] Vostokov Sergey, [SW High-Level Design](high_level_design.md)
-
-## Overview
-
-### Scope
-
-The main goal of the project is to develop a compiler for neural
-networks to produce executable artefact for specified SW and HW
-platform.
-
-The development scope includes the following components:
-
- - Develop importer module to parse, verify and represent NN model for
- further optimization and compilation
- - Develop code emitters to produce executable binary for CPU and GPU
-
-
-**2018 year goals:**
-
- - Support TensorFlow Lite NN model format
- - Support Caffe NN model format
- - Support Caffe2 NN model format (Optional)
- - Support compilation of MobileNet NN
- - Support compilation of Inception v3 NN
- - Support ARM CPU
- - Support ARM GPU (Mali)
- - Support Tizen OS
- - Support SmartMachine OS
-(Optional)
-
-| Product | Target Model Name | Comment |
-| ------------------- | ------------------------------ | ---------------- |
-| Tizen phone | Tizen TM2 | Reference device |
-| Tizen device | Odroid XU4 | Reference board |
-| SmartMachine target | Microvision mv8890, exynos8890 | Reference device |
-
-Table 1-1. Target Model
-
-### Design Consideration
-
-Deep learning software demands reliability and performance. The common
-approach which comes from the history is to develop a SW framework
-(machine learning framework) which would compute each step of the neural
-network inference process using supported hardware. This approach is
-used in many popular solutions like Google Tensorflow/Tensorflow Lite,
-Caffe/2, etc. Traditionally, neural network developers build a
-computation graph and then an appropriate machine learning framework
-interprets it. The latest discoveries in AI field show that the
-node-visitor method of execution is inefficient. As a result, a second
-approach has been worked out by the industry, which is a neural network
-compiler that executes code more efficiently.
-
-This document presents the design of the *nncc*, a neural network
-compiler collection. The design should provide the easiest way to extend
-the functionality of the *nncc* by adding new modules with the following
-features:
-
- - Support neural networks produced by various machine learning
- frameworks;
- - Produce an artefact taking advantages of various hardware
- including specialized processors like NPU;
- - Apply new domain specific optimization techniques over given NN.
-
-Non-functional requirements to the developed software are well-described
-in the SW Requirements Specification, such requirements are not shown
-here to avoid duplication.
-
-### Constraints
-
-See constraints in SW Requirements Specification.
-
-
-<table>
-<colgroup>
-<col style="width: 24%" />
-<col style="width: 64%" />
-<col style="width: 10%" />
-</colgroup>
-<thead>
-<tr class="header">
-<th>Item</th>
-<th>Assumptions, Dependencies and the Constraints</th>
-<th>Reference</th>
-</tr>
-</thead>
-<tbody>
-<tr class="odd">
-<td>Tizen SW Platform</td>
-<td><dl>
-<dt>The following items should be provided:</dt>
-<dd><ul>
-<li>Tizen API</li>
-<li>Tizen kernel</li>
-<li>Tizen FW</li>
-<li>Tizen SDK</li>
-<li>Tizen naming convention</li>
-</ul>
-</dd>
-</dl></td>
-<td>- <a href="www.tizen.org" class="uri">www.tizen.org</a> <br>- <a href="wiki.tizen.org" class="uri">wiki.tizen.org</a> <br>- <a href="developer.tizen.org" class="uri">developer.tizen.org</a></td>
-</tr>
-<tr class="even">
-<td>SmartMachine OS Platform</td>
-<td><dl>
-<dt>The following items should be provided:</dt>
-<dd><ul>
-<li>SmartMachine API</li>
-<li>SmartMachine kernel</li>
-<li>SmartMachine FW</li>
-<li>SmartMachine SDK</li>
-<li>SmartMachine naming convention</li>
-</ul>
-</dd>
-</dl></td>
-<td>- <a href="http://suprem.sec.samsung.net/confluence/pages/viewpage.action?pageId=81833987">Platform confluence</a> <br>- <a href="https://github.sec.samsung.net/RS7-SmartMachine">Github</a> <br>- <a href="http://suprem.sec.samsung.net/confluence/display/ASEC/Adaptive+AUTOSAR">Functional Safety confluence</a></td>
-</tr>
-<tr class="odd">
-<td>Host OS</td>
-<td>Linux-based OS (Ubuntu, Archlinux, etc)</td>
-<td>- <a href="https://www.ubuntu.com/">Ubuntu site</a> <br>- <a href="https://www.archlinux.org/">Archlinux site</a></td>
-</tr>
-<tr class="even">
-<td>Tizen target HW</td>
-<td>The reference device should be provided: Tizen TM2</td>
-<td></td>
-</tr>
-<tr class="odd">
-<td>SmartMachine target HW</td>
-<td>The reference device should be provided</td>
-<td></td>
-</tr>
-</tbody>
-</table>
-Table 1-2. Assumptions, Dependecies and the Constraints</caption>
-
-## SW Detailed Structure Design
-
-### SW Block Structure
-
-Top-Level Components of the nncc descriped in HLD. More detailed
-structure and class diagram will be available after development
-completion.
-
-### SW Block Feature
-
-1. Initialization: configure all internal modules (see
- [{Initialization} Detailed Design](#initialization-detailed-design))
-2. Frontend: Import NN model (see [{Import NN model} Detailed
- Design](#import-nn-model-detailed-design))
- - *Caffe frontend*: includes the parser of Caffe NN model format,
- verifier to ensure that parsed data is valid and consentient,
- and Caffe-specific IR converter
- - *Caffe2 frontend*: includes the parser of Caffe2 NN model
- format, verifier to ensure that parsed data is valid and
- consentient, and Caffe2-specific IR converter to Model IR
- - *Tensorflow Lite frontend*: includes the parser of Tensorflow NN
- model format with automatic version recognition feature,
- verifier to ensure that parsed data is valid and consentient,
- and Tensorflow Lite-specific IR converter to Model IR
-3. Backend: Generate the code (see [{Generate the code} Detailed
- Design](#generate-the-code-detailed-design))
- - *Interpreter:* As it was described in SW High-Level Document
- imported NN model may proceed through three step of Intermediate
- representation: Model IR, Coarse-Grained IR, Fine-Grained IR.
- The Interpreter backend uses each this IR to do inference of
- given NN model. As the output, the user gets the resulting
- calculation of all NN ops included into original computation
- graph.
- - *Binary*:This type refers to generating binary code that can be
- executed on the target device. NN compiler can generate code
- that is either executed solely on CPU or takes advantage of the
- GPU when possible if the corresponding target was specified. The
- user may want to incorporate 3rd party libraries included into
- target firmware or delivered with the application package. In
- this case, the compiler prepares the data following EABI
- convention and embeds an invocation of high-level functions by
- appropriate symbol.
- - *Soft*: Resulting program is a generated source code in
- high-level programming language C or C++. Here there are two
- options: the first one is to generate the source code that does
- not depend on libraries outside of itself, with the exception of
- system libraries. The second one is to include the code to
- invoke high-level functions from 3rd party libraries. For
- example, it may be an invocation of matrix multiplication from
- GEMM library.
-
-## SW Detailed Operation Design
-
-### {Initialization} Detailed Design
-
-#### Major Function
-
-To provide a valid configuration session for all modules of *nncc* using
-user input from the command line/config file/environment variables.
-
-#### Operation Sequence
-
-Initialization of the *nncc* includes command line option processing,
-configuration of its subsystems as well as any error checking possible
-at this stage. It consists of the following steps:
-
-1. Collect all command line options and verify their format for
- validity (no syntax errors etc.)
-
-2. Check for validity and then process general options
-
-3. Load subsystem modules
-
-4. For each one of them:
-
- - Configure
- - Pass command line options
- - Check command line options for validity (for example, check
- that every required option is present)
-
-At the end of this process each subsystem is configured and has access
-to all data needed for its operation.
-
-### {Import NN model} Detailed Design
-
-#### Major Function
-
-To convert given NN model from framework-specific IR to Model IR for
-further processing.
-
-#### Operation Sequence
-
-As you may see on the diagram, neural network import is the main
-function of the compiler front-end part. The result of this operation is
-a computation graph which is presented as Model IR.
-
-![image](../images/nncc_idef0_a12.png)
-
-The import process consists of three parts:
-
-1. NN model parsing
-2. Verification of the result from the previous step
-3. Converting the model to the Model IR
-
-During the first step, file or files containing the model are read and
-represented in some format specific to each NN framework.
-
-Verification step is included to ensure that:
-
- - None of the files constituting the model are damaged
- - Model format corresponds to the specified one
- - Version of the model format corresponds to the specified one
-
-The most important step is accurately converting the model from the
-framework-specific representation to the Model IR. This conversion
-includes:
-
- - *Translation of the NN model computation graph to the Model IR
- computation graph.* During the translation new nodes may be
- introduced - for example, a high-level NN operation may be split
- into a few smaller ones.
- - *NN model parameter layout conversion.* The way parameters (also
- known as weights) of a model are layed out in each specific NN
- framework may differ, and it is necessary to convert such layout
- into a unified format.
- - *NN operation parameter conversion.* Each NN operation has a set
- of its own parameters describing the way this operation should be
- performed, and these parameters also differ between frameworks.
-
-Resulting Model IR is equivalent to the initial NN model in terms of how
-NN model inputs would be transformed into its outputs if all the
-operations in the Model IR were executed.
-
-### {Generate the code} Detailed Design
-
-Development in progress. Will be described on Completion DR.
-
-## Interface Design
-
-Development in progress. Will be described on DR2.
-
-## SW Code Structure
-
-| Directory | Description |
-| ------------------------ | -------------------------------------------------------------------- |
-| / | source codes of the build system, main README file |
-| /contrib | Incubating projects |
-| /doc | Contains the documentation of the project |
-| /doc/project | Contains project management documents (SRS, SDD, STD, HLD, DLD, etc) |
-| /libs | Contains the source of the libraries which are used by the nncc |
-| /libs/core | Contains the source code of the core library of nncc |
-| /libs/frontend | Contains the source code of supported frontend's plugins |
-| /libs/frontend/caffe | The source code for the Caffe frontend |
-| /libs/frontend/caffe2 | The source code for the Caffe2 frontend |
-| /libs/frontend/tflite | The source code for the Tensorflow Lite frontend |
-| /libs/backend | Contains the source code of supported backend’ plugins |
-| /libs/backend/cpu | Contains the source code of CPU backend |
-| /libs/backend/gpu | Contains the source code of GPU backend |
-| /libs/backend/3rd\_party | Contains the source code of backend to utilize 3rd party libraries |
-| /scripts | Various scripts for building and testing the nncc |
-| /tools | The source code of the executables |
diff --git a/docs/nncc/project/development_document.md b/docs/nncc/project/development_document.md
deleted file mode 100644
index 8315dd3b6..000000000
--- a/docs/nncc/project/development_document.md
+++ /dev/null
@@ -1,257 +0,0 @@
-# SW Development Document
-
-**Revision history**
-
-| Ver. | Date | Contents | Author | Approver |
-| ---- | ---------- | --------------------------- | --------------- | ------------ |
-| 0.1 | 2018.04.12 | Initial version | Vostokov Sergey | Sung-Jae Lee |
-| 0.2 | 2018.04.16 | SE member in-charge review | Ilya Lopatin | |
-| 1.0 | 2018.04.17 | Final Execution DR version | Vostokov Sergey | Sung-Jae Lee |
-| 1.1 | 2018.04.17 | Add SW Quality Verification | Vostokov Sergey | Sung-Jae Lee |
-
-**Terminology and Abbreviation**
-
-| | |
-| ------------ | ------------------------------------------------------------- |
-| OS | Operating System |
-| OS API | Application interface of OS |
-| HW | Hardware |
-| SW | Software |
-| NN | Neural Network |
-| NN model | Neural network model (Instance of NN built with ML framework) |
-| NN compiler | The compiler for neural network |
-| ML framework | The machine learning framework |
-| TF/TF Lite | Tensorflow/Tensorflow Lite ML framework |
-| IR | Intermediate representation |
-| CI/CI system | Continuous integration system |
-| UI | The user interface |
-| GUI | The graphical user interface |
-| CLI | The command-line interface |
-
-## Project Overview
-
-### Purpose and Scope
-
-The main goal of the project is to develop a compiler for neural networks to produce executable artefact for specified SW and HW platform.
-
-The development scope includes the following components:
-
- - Develop importer module to parse, verify and represent NN model for further optimization and compilation
- - Develop code emitters to produce executable binary for CPU and GPU
-
-
-**2018 year goals:**
-
- - Support TensorFlow Lite NN model format
- - Support Caffe NN model format
- - Support Caffe2 NN model format (Optional)
- - Support compilation of MobileNet NN
- - Support compilation of Inception v3 NN
- - Support ARM CPU
- - Support ARM GPU (Mali)
- - Support Tizen OS
- - Support SmartMachine OS (Optional)
-
-| Product | Target Model Name | Comment |
-| ------------------- | ------------------------------ | ---------------- |
-| Tizen phone | Tizen TM2 | Reference device |
-| Tizen device | Odroid XU4 | Reference board |
-| SmartMachine target | Microvision mv8890, exynos8890 | Reference device |
-
-### Assumptions, Dependencies and Constraints
-
-<table>
-<colgroup>
-<col style="width: 26%" />
-<col style="width: 46%" />
-<col style="width: 26%" />
-</colgroup>
-<thead>
-<tr class="header">
-<th>Item</th>
-<th>Assumptions, Dependencies and the Constraints</th>
-<th>Reference</th>
-</tr>
-</thead>
-<tbody>
-<tr class="odd">
-<td>Tizen SW Platform</td>
-<td><dl>
-<dt>The following items should be provided:</dt>
-<dd><ul>
-<li>Tizen API</li>
-<li>Tizen kernel</li>
-<li>Tizen FW</li>
-<li>Tizen SDK</li>
-<li>Tizen naming convention</li>
-</ul>
-</dd>
-</dl></td>
-<td><ul>
-<li><a href="www.tizen.org" class="uri">www.tizen.org</a></li>
-<li><a href="wiki.tizen.org" class="uri">wiki.tizen.org</a></li>
-<li><a href="developer.tizen.org" class="uri">developer.tizen.org</a></li>
-</ul></td>
-</tr>
-<tr class="even">
-<td>SmartMachine OS Platform</td>
-<td><dl>
-<dt>The following items should be provided:</dt>
-<dd><ul>
-<li>SmartMachine API</li>
-<li>SmartMachine kernel</li>
-<li>SmartMachine FW</li>
-<li>SmartMachine SDK</li>
-<li>SmartMachine naming convention</li>
-</ul>
-</dd>
-</dl></td>
-<td>- <a href="http://suprem.sec.samsung.net/confluence/pages/viewpage.action?pageId=81833987">Platform confluence</a> <br>- <a href="https://github.sec.samsung.net/RS7-SmartMachine">Github</a> <br>- <a href="http://suprem.sec.samsung.net/confluence/display/ASEC/Adaptive+AUTOSAR">Functional Safety confluence</a></td>
-</tr>
-<tr class="odd">
-<td>Host OS</td>
-<td>Linux-based OS (Ubuntu, Archlinux, etc)</td>
-<td>- <a href="https://www.ubuntu.com/">Ubuntu site</a> <br>- <a href="https://www.archlinux.org/">Archlinux site</a></td>
-</tr>
-<tr class="even">
-<td>Tizen target HW</td>
-<td>The reference device should be provided: Tizen TM2</td>
-<td></td>
-</tr>
-<tr class="odd">
-<td>SmartMachine target HW</td>
-<td>The reference device should be provided</td>
-<td></td>
-</tr>
-</tbody>
-</table>
-
-## Development Plan And Result
-
-### Development Schedule
-
-| Task | Deliverable | Plan start | Plan end | Result start | Result end | Responsibility |
-| ------------------------------------ | --------------------------------- | ---------- | -------- | ------------ | ---------- | -------------- |
-| Prepare SW requirements | SRS | 04.2018 | 04.2018 | | | S. Vostokov |
-| Prepare initial SW Test Document | STD | 04.2018 | 04.2018 | | | S. Vostokov |
-| Prepare Initial Project Plan | SDD | 04.2018 | 04.2018 | | | S. Vostokov |
-| Prepare SW Test Document | STD | 04.2018 | 06.2018 | | | S. Vostokov |
-| Prepare design document | HLD, DLD | 05.2018 | 08.2018 | | | S. Vostokov |
-| Prepare test result | STD, UTR | 04.2018 | 10.2018 | | | S. Vostokov |
-| Prepare project completion documents | SDD, Project completion report | 05.2018 | 12.2018 | | | S. Vostokov |
-| Implement Caffe Importer | Caffe NN model Importer | 05.2018 | 09.2018 | | | S. Vostokov |
-| Implement code emitter for CPU | Code emitter | 05.2018 | 09.2018 | | | S. Vostokov |
-| Implement TF Lite Importer | TensorFlow Lite NN model Importer | 05.2018 | 11.2018 | | | S. Vostokov |
-| Implement code emitter for GPU | Code emitter | 02.2018 | 11.2018 | | | S. Vostokov |
-
-### SW Metrics
-
-| Category | Metric | Collection Method | Collection Period | Planned | Actual | Responsibility |
-| -------- | ---------------------------------------------------------------------- | ------------------------ | ----------------------- | ----------------- | ------ | -------------- |
-| Quality | Test pass rate | GTest | 22.02.2018 - 31.12.2018 | 100% | | S. Vostokov |
-| Quality | Defects density | Defect management system | 22.02.2018 - 31.12.2018 | \<= 1 defect/KLOC | | S. Vostokov |
-| Quality | Defects removal rate | Defect management system | 22.02.2018 - 31.12.2018 | 100% | | S. Vostokov |
-| Quality | Critical defects | Static analysis | 22.02.2018 - 31.12.2018 | 0 | | S. Vostokov |
-| Quality | Major defects | Static analysis | 22.02.2018 - 31.12.2018 | 0 | | S. Vostokov |
-| Quality | Code review issue removal | Samsung Research github | 22.02.2018 - 31.12.2018 | 100% | | S. Vostokov |
-| Quality | Comments Rate | `cloc` tool | 22.02.2018 - 31.12.2018 | Exceed 20% | | S. Vostokov |
-| Quality | Cyclomatic Complexity | SVACE | 22.02.2018 - 31.12.2018 | \< 50 | | S. Vostokov |
-| Quality | Unused Items (Unused Files, Unused Functions, Unused Global Variables) | gcc/g++ | 22.02.2018 - 31.12.2018 | 0 | | S. Vostokov |
-| Process | Project On-time Completion Rate | PLM | 22.02.2018 - 31.12.2018 | 100% | | S. Vostokov |
-| Process | Milestone On-time Completion Rate | PLM | 22.02.2018 - 31.12.2018 | 100% | | S. Vostokov |
-| Process | Process compliance | Audit | 22.02.2018 - 31.12.2018 | 100% | | S. Vostokov |
-
-### SW Configurations Management
-
-#### Document
-
-| No | Configuration Item | Location | Submitter |
-| -- | ---------------------------- | -------- | ----------- |
-| 1 | SW Requirement Specification | PLM | S. Vostokov |
-| 2 | SW Development Document | PLM | S. Vostokov |
-| 3 | SW High Level Document | PLM | S. Vostokov |
-| 4 | SW Detailed Level Document | PLM | S. Vostokov |
-| 5 | SW System Test Document | PLM | S. Vostokov |
-| 6 | SW Unit Test Report | PLM | S. Vostokov |
-
-#### SW Source Code
-
-SW Repository:
-<https://github.sec.samsung.net/STAR/nncc>
-
- git clone https://github.sec.samsung.net/STAR/nncc.git
-
-#### Baseline
-
-| Phase | Baseline Name | SW Configuration Item |
-| ------------------ | ------------------ | ------------------------------------------------------------------------------------------- |
-| 04.2018 Plan | Execution DR | SW Requirement Specification, SW Development Document, System Test Document initial version |
-| 06.2018 Execution | DR1 | System Test Document |
-| 08.2018 Execution | Design document | SW High Level Document, SW Detailed Design Document |
-| 09.2018 Execution | DR2 | |
-| 10.2018 Execution | Test report | SW System Test Document (result), SW Unit Test Report |
-| 12.2018 Completion | Project Completion | Project Completion Report |
-
-## SW Quality Verification
-
-### SW Verification
-
-| No | Verification Item | Quality Goal | Tool | Phase | Development Team Member in Charge | Result | Note |
-| -- | -------------------------------- | ------------------------------------------ | -------- | --------- | --------------------------------- | ------ | ---- |
-| 1 | Open source License Verification | Clear violations of open source obligation | ProtexIP | Execution | Vostokov Sergey | | |
-| 2 | Potential Defect | Fix all defects | Svace | Test | Vostokov Sergey | | |
-| 3 | System Defect | Fix Critical/ Major defects | Github | Test | Vostokov Sergey | | |
-
-### Static Analysis
-
-| No | Activity | Schedule | Result | Comment |
-| -- | --------------------------- | ---------- | ------ | ------- |
-| 1 | SA Verification I (SVACE) | 28.09.2018 | | |
-| 2 | SA Verification II (SVACE) | 30.11.2018 | | |
-| 2 | SA Verification III (SVACE) | 31.12.2018 | | |
-
-### Coding Standard
-
-| No | Activity | Schedule | Result | Comment |
-| -- | ----------------------------------------------------- | -------- | ------ | ------- |
-| 1 | Coding standard enforcement with `clang-format` tool. | Regular | | |
-
-
-### Convergence (integration testing)
-
-Out of scope since the integration with other SW is not required by SW
-Requirement Specification.
-
-### Dynamic Analysis
-
-| No | Activity | Schedule | Result | Comment |
-| -- | ------------------- | ---------- | ------ | ------- |
-| 1 | DA Verification I | 28.09.2018 | | |
-| 2 | DA Verification II | 30.11.2018 | | |
-| 2 | DA Verification III | 31.12.2018 | | |
-
-
-### Architecture Analysis
-
-SW architecture verification is managed by HQ.
-
-### SW Security
-
-Out of the project scope since the project is not related to SW security.
-
-### Code Review
-
-| No | Activity | Schedule | Result | Comment |
-| -- | ----------- | -------- | ------ | ------------------------------------------------------------------- |
-| 1 | Code review | Regular | | All code is reviewed manually using `github` tool before committing |
-
-## Risk Management
-
-| Priority | Risk Description | Risk Reduction Solution | Schedule | Result | Responsibility |
-| -------- | ------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------- | ----------------- | ------ | -------------- |
-| 1 | Project scope is changed due extra HQ request | Discuss the new requirements via email and messenger, update SRS | 02.2018 - 12.2018 | | S. Vostokov |
-| 2 | Unavoidable technical difficulties during requirements implementation | Submit requirements changes and get confirmation from HQ | 02.2018 - 12.2018 | | S. Vostokov |
-| 3 | Not enough HR | Hire team members as soon as possible, request assistance from other teams | 02.2018 - 12.2018 | | S. Vostokov |
-| 4 | Use of GPL code | Minimize usage of GPL code, wrap GPL modules with well-defined interfaces so they can be easily replaced. | 02.2018 - 12.2018 | | S. Vostokov |
-| 5 | Requirements would change due external or internal circumstances, e.g. new technology or product launch | Discuss project changes and make corrections | 02.2018 - 12.2018 | | S. Vostokov |
-
diff --git a/docs/nncc/project/high_level_design.md b/docs/nncc/project/high_level_design.md
deleted file mode 100644
index a15aaca4a..000000000
--- a/docs/nncc/project/high_level_design.md
+++ /dev/null
@@ -1,457 +0,0 @@
-# SW High Level Design
-
-**Revision history**
-
-| Ver. | Date | Contents | Author | Approver |
-| ---- | ---------- | ----------------- | ----------------- | ------------ |
-| 0.1 | 2018.05.25 | Initial version | Vostokov Sergey | Sung-Jae Lee |
-| 0.2 | 2018.06.21 | SE member review | Alexey Kondrashov | |
-| 1.0 | 2018.06.22 | Final DR1 version | Vostokov Sergey | Sung-Jae Lee |
-
-**Terminology and Abbreviation**
-
-| Terminology | Description |
-| ------------ | ------------------------------------------------------------- |
-| OS | Operating System |
-| OS API | Application interface of OS |
-| HW | Hardware |
-| SW | Software |
-| NN | Neural Network |
-| NN model | Neural network model (Instance of NN built with ML framework) |
-| NN compiler | The compiler for neural network |
-| ML framework | The machine learning framework |
-| TF/TF Lite | Tensorflow/Tensorflow Lite ML framework |
-| IR | Intermediate representation |
-| CI/CI system | Continuous integration system |
-| UI | The user interface |
-| GUI | The graphical user interface |
-| CLI | The command-line interface |
-
-**References**
-
-\[1\] Vostokov Sergey, [SW Requirements Specification](requirements_specification.md)
-
-## Overview
-
-### Scope
-
-The main goal of the project is to develop a compiler for neural
-networks to produce executable artefact for specified SW and HW
-platform.
-
-The development scope includes the following components:
-
- - Develop importer module to parse, verify and represent NN model for
- further optimization and compilation
- - Develop code emitters to produce executable binary for CPU and GPU
-
-
-**2018 year goals:**
-
- - Support TensorFlow Lite NN model format
- - Support Caffe NN model format
- - Support Caffe2 NN model format (Optional)
- - Support compilation of MobileNet NN
- - Support compilation of Inception v3 NN
- - Support ARM CPU
- - Support ARM GPU (Mali)
- - Support Tizen OS
- - Support SmartMachine OS(Optional)
-
-| Product | Target Model Name | Comment |
-| ------------------- | ------------------------------ | ---------------- |
-| Tizen phone | Tizen TM2 | Reference device |
-| Tizen device | Odroid XU4 | Reference board |
-| SmartMachine target | Microvision mv8890, exynos8890 | Reference device |
-
-Table 1-1. Target Model
-
-### Design Consideration
-
-Deep learning software demands reliability and performance. The common
-approach which comes from the history is to develop a SW framework
-(machine learning framework) which would compute each step of the neural
-network inference process using supported hardware. This approach is
-used in many popular solutions like Google Tensorflow/Tensorflow Lite,
-Caffe/2, etc. Traditionally, neural network developers build a
-computation graph and then an appropriate machine learning framework
-interprets it. The latest discoveries in AI field show that the
-node-visitor method of execution is inefficient. As a result, a second
-approach has been worked out by the industry, which is a neural network
-compiler that executes code more efficiently.
-
-This document presents the design of the *nncc*, a neural network
-compiler collection. The design should provide the easiest way to extend
-the functionality of the *nncc* by adding new modules with the following
-features:
-
- - Support neural networks produced by various machine learning
- frameworks;
- - Produce an artefact taking advantages of various hardware
- including specialized processors like NPU;
- - Apply new domain specific optimization techniques over given NN.
-
-### Constraints
-
-See constraints in SW Requirements Specification.
-
-<table>
-<colgroup>
-<col style="width: 24%" />
-<col style="width: 64%" />
-<col style="width: 10%" />
-</colgroup>
-<thead>
-<tr class="header">
-<th>Item</th>
-<th>Assumptions, Dependencies and the Constraints</th>
-<th>Reference</th>
-</tr>
-</thead>
-<tbody>
-<tr class="odd">
-<td>Tizen SW Platform</td>
-<td><dl>
-<dt>The following items should be provided:</dt>
-<dd><ul>
-<li>Tizen API</li>
-<li>Tizen kernel</li>
-<li>Tizen FW</li>
-<li>Tizen SDK</li>
-<li>Tizen naming convention</li>
-</ul>
-</dd>
-</dl></td>
-<td>- <a href="www.tizen.org" class="uri">www.tizen.org</a> <br>- <a href="wiki.tizen.org" class="uri">wiki.tizen.org</a> <br>- <a href="developer.tizen.org" class="uri">developer.tizen.org</a></td>
-</tr>
-<tr class="even">
-<td>SmartMachine OS Platform</td>
-<td><dl>
-<dt>The following items should be provided:</dt>
-<dd><ul>
-<li>SmartMachine API</li>
-<li>SmartMachine kernel</li>
-<li>SmartMachine FW</li>
-<li>SmartMachine SDK</li>
-<li>SmartMachine naming convention</li>
-</ul>
-</dd>
-</dl></td>
-<td>- <a href="http://suprem.sec.samsung.net/confluence/pages/viewpage.action?pageId=81833987">Platform confluence</a> <br>- <a href="https://github.sec.samsung.net/RS7-SmartMachine">Github</a> <br>- <a href="http://suprem.sec.samsung.net/confluence/display/ASEC/Adaptive+AUTOSAR">Functional Safety confluence</a></td>
-</tr>
-<tr class="odd">
-<td>Host OS</td>
-<td>Linux-based OS (Ubuntu, Archlinux, etc)</td>
-<td>- <a href="https://www.ubuntu.com/">Ubuntu site</a> <br>- <a href="https://www.archlinux.org/">Archlinux site</a></td>
-</tr>
-<tr class="even">
-<td>Tizen target HW</td>
-<td>The reference device should be provided: Tizen TM2</td>
-<td></td>
-</tr>
-<tr class="odd">
-<td>SmartMachine target HW</td>
-<td>The reference device should be provided</td>
-<td></td>
-</tr>
-</tbody>
-</table>
-Table 1-2. Assumptions, Dependecies and the Constraints</caption>
-
-## SW System Architecture Design
-
-### Overall Architecture
-
-The picture below presents the result of high-level analysis of the
-requirements which **nncc** should satisfy. It describes the main
-function **Compilation** of the compiler collection using IDEF0
-(functional modeling) notation. The full information on IDEF family of
-modeling languages is available at this link on [Wikipedia:
-IDEF](https://en.wikipedia.org/wiki/IDEF).
-
-![image](../images/nncc_idef0_a0.png)
-
-Figure 1. Top-Level Context Diagram of compilation function.
-
-
-The short explanation of the **Figure 1**:
-
-**1. Input entities:**
-
- - *NN Model instance:* It is the main input of *nncc*. The compiler
- takes from a user information describing a neural network which
- should be compiled. In most cases, this NN is produced by a
- machine learning framework and stored in one or many files. The
- contents of these files constitute the essence of the neural
- network. Here it is denoted as an instance of NN model.
- - *Command line options:* In order to provide the most convenient
- way to use the compiler, it should be configurable. Current design
- presents a tool which has a Command Line Interface (CLI). Command
- line options are a symbolic representation of directions
- instructing the compiler how to set up a working session to get
- the desired result.
-
-**2. Output:**
-
- - *Target binaries:* Everything that is produced by the compilation
- operation. In general case the result may consist of one or more
- files. Each of them may be one of the following: an executable, a
- source code file, a log/verification/error report. For example,
- when we require the compiler to compile a neural network for
- execution on GPU, the output artefact may be OpenCL/C/C++ source
- code, or a binary containing invocation of the procedures
- delegating the calculations to GPU.
-
-**3. Rules and notations:**
-
- - *NN Model specification:* Each machine learning framework has its
- own architecture design and uses its own format to
- serialize/deserialize computation graphs which represent neural
- networks. On a storage device, it may be saved as a file or many
- files using a unique markup of binary data. To enable *nncc* to
- read such data and process it, in the future it should recognize
- the format of the container. Importer/parser subsystem of *nncc*
- stores the full knowledge of the NN specifications and is
- responsible for reading and parsing NN models (see [Import NN
- model](#import-nn-model)).
- - *High-Level and Low-Level Optimization techniques:* Before
- deployment, a neural network developer might want to verify their
- product and optimize it by size and performance. There are many
- techniques for reducing the common size of neural network weights
- and improving performance of the inference. NN optimization
- activity can be automated by implementing each technique in the
- middleend according to its specifications (see [Apply
- Optimizations](#apply-optimizations)).
- - *Target Runtime Environment (TRE):* In the case when the compiler
- produces the binary for execution on a specific SW platform, it
- should take into account the common API of this SW Platform. It
- includes the full public API of a chosen OS available to the 3rd
- party developers.
- - *Target Instruction Set Architecture (Target ISA):* Resulting
- artefact is always executed on a SW Platform using some specified
- API. The user may want to generate the artefact that would use
- OpenBlas or Arm Compute Library or something else (if supported by
- the compiler), to perform calculations. In order to provide such
- possibility, *nncc* should be aware of the API to the specified
- 3rd party libraries.
- - *Device specifications:* Some of the optimization techniques may
- take into account the technological features of the computing
- device, like the time to perform some specific calculations. Such
- information is very helpful during optimization of the final code
- of the compiled artefact because it may be used to select an
- optimal sequence of command invocations in order to achieve the
- best performance.
-
-**4. Mechanism:**
-
- - *Optimizing NN Compiler:* The implemented compiler itself. Since
- *nncc* is dedicated to producing the code for the most efficient
- execution, we may regard the tool as optimizing.
- - *Host OS:* Since the compiler is a tool that works in some SW
- Environment, the main Top-Level SW system is an Operating System.
- In the SW Requirements specification it may be defined as a
- Linux-like OS, for example Ubuntu, Archlinux, etc.
-
-### Composition of Architecture
-
-The compiler consists of three main parts: frontend, middleend, backend.
-Together they form a Neural Network instance processing pipeline.
-Moreover, there is one additional part that is in charge of the compiler
-configuration.
-
-![image](../images/nncc_components.png)
-
-Figure 2. Top-Level Components of the
-*nncc*.
-
-| Layer or Subsystem Name | Description |
-| ----------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
-| Frontend | Imports a specified Neural Network, presents it as a computation graph |
-| Middleend | Provides various optimizations over the computation graph; at the end transforms it to internal IR |
-| Backend | Produces the specified artefact as a result of compilation procedure using specified parameters describing the target OS, target HW, etc |
-| Configuration system | Accepts command line options and configures *nncc* according to their contents |
-
-
-The detailed decomposition of the main function **Compilation** is
-presented on the diagram A1 below.
-
-### Interface
-
-Similar to any console application the *nncc* CLI accepts two types of
-options:
-
- - Options that have values, for example, a name of the output executable
- - Options that don't have values (switches) that turn various features on and off
-
-Additionally, options can be general and subsystem-specific.
-
-General options direct the process of the neural network compilation as
-a whole, and also control the utility functions like the verbosity of
-the messages that *nncc* outputs during the compilation process.
-
-Subsystem-specific options control each respective subsystem:
-
- - Frontend subsystem takes options that point to the NN model to
- compile, which format it has, which version of the format and so
- on.
- - Middleend subsystem takes options that either turn on specific
- optimizations for the NN model, or just point at the more desired
- outcome, for example "target performance efficiency" or "target
- memory efficiency".
- - Backend subsystem takes options that describe the desired target
- device or architecture and so on.
-
-For better usability, high-level options are also supported. A single
-high-level option is mapped to a group of lower level options, similarly
-to how it is done with conventional compiler drivers, like gcc. This way
-by choosing a single Middleend option "target performance", nncc will
-automatically choose a number of performance optimizations by itself.
-
-## SW System Operation Design
-
-The Figure 3 presents a more detailed composition of the main function
-**Compilation**. As it was shown in previous section [Composition of
-Architecture](#composition-of-architecture) it is composed of 5
-subfunctions:
-
- - Setup and configure each module - *Block 1* (See
- [Initialization](#initialization) section)
- - Import the specified neural network - *Block 2* (See [Import NN
- model](#import-nn-model) section)
- - Apply High-Level optimizations - *Block 3* (See [Apply
- Optimizations](#apply-optimizations) section)
- - Apply Low-Level optimizations - *Block 4* (See [Apply
- Optimizations](#apply-optimizations) section)
- - Generate the output code for specified target - *Block 5* (See
- [Generate the code](#generate-the-code) section)
-
-![image](../images/nncc_idef0_a1.png)
-
-Figure 3. Decomposition of top-Level function **Compilation**.
-
-### Initialization
-
-At this stage the initialization of all submodules of the *nncc*
-happens. This procedure starts from command line option processing till
-selection of all required and correctly configured modules. At the
-parsing stage the configuration system checks its own consistency. If
-command line option set is not enought to establish a valid
-configuration the environment variables will be used. Also, almost all
-configuration options can be read from config file if it is specified in
-command line.
-
-### Import NN model
-
-The major function of the *nncc* frontend is to import specified NN
-model. It means that frontend should recognize the format of given NN
-model, parse all internal structures (load computation graph using
-framework specific IR: NN topology, NN ops, weights), verify their
-correctness and convert to Model IR.
-
-### Apply Optimizations
-
-There are two levels of neural network optimizations in *nncc*.
-
-First one is High-Level Optimizations, they are applied to the Model IR,
-which is output by the NN Import subsystem.
-
-#### High-Level Optimizations
-
-High-Level optimizations can be divided into two groups:
-
- - optimizations aimed at reducing the size of the resulting model -
- *size optimizations*
- - optimizations aimed at reducing the inference time of the model -
- *performance optimizations*
-
-These two groups are not mutually exclusive. Some optimization
-techniques positively affect both size and performance, while some of
-them might reduce the size of the model at some performance cost.
-
-High-Level Optimizations in this sense are purely
-neural-network-specific, as they attempt to improve the model by
-manipulating the computation graph and the weights. For example, some
-techniques search for unused parts of the computation graph and remove
-them, or they search for the parts of the graph that can be merged
-together and thus gain some performance. Other techniques manipulate the
-neural network weights - either reduce their amount or modify their
-values in a way that allows for the reduced storage consumption.
-
-Currently, High-Level Optimizations are out of scope of the project.
-
-#### Low-Level Optimization
-
-The Low-Level Optimizations are applied by the compiler closer to the
-end of the whole compilation process, before the executable generation.
-The input for this stage of *nncc* is the Coarse-Grained IR, which is
-output but High-Level Optimization subsystem.
-
-### Generate the code
-
-Present architecture allows for several backend solutions, depending on
-target specified. Those solutions can be divided into 3 types:
-
- - *Interpretation.* At every step inference can be carried out by
- interpreting IR produced after that step.
- - *Soft backend.* Resulting program can be generated as source code
- in high-level programming language (e.g., C/C++) that does not
- depend on libraries outside of itself, with the exception of
- system libraries.
- - *Hardware (Binary) backend.* This type refers to generating binary
- code that can be executed on target device. NN compiler can
- generate code that is either executed solely on CPU, or takes
- advantage of the GPU when possible if corresponding target was
- specified.
-
-Third-party libraries incorporation can be done either in form of source
-code or by compiling a binary artefact.
-
-## Appendix 1. Traceability Matrix
-
-The following table shows mapping between SW Requirements Specification
-and SW High-Level Design
-Document.
-
-| Requirement | Description | Section |
-| ----------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- |
-| RF-1 (Frontend: Tensorflow Lite) | The compiler should support import of NN model in Tensorflow Lite format (parsing & verification of data scheme v0-v3, 50 NN ops) | [Import NN model](#import-nn-model) |
-| RF-2 (Frontend: Caffe) | The compiler should support import of NN model in Caffe format (parsing & verification) | [Import NN model](#import-nn-model) |
-| RF-3 (Frontend: Caffe2 (Optional)) | The compiler should support import of NN model in Caffe2 format (parsing & verification) | [Import NN model](#import-nn-model) |
-| RF-4 (Frontend: lossless import) | The frontend should use the lossless approach while it is converting any NN model to IR | [Import NN model](#import-nn-model) |
-| RF-5 (Frontend: Inception\_v3) | The frontend should successful import the Inception V3 NN model | [Import NN model](#import-nn-model) |
-| RF-6 (Frontend: MobileNet) | The frontend should successful import the MobileNet NN model | [Import NN model](#import-nn-model) |
-| RF-7 (Backend: ARM CPU) | The compiler should produce executable for ARM CPU | [Generate the code](#generate-the-code) |
-| RF-8 (Backend: ARM GPU) | The compiler should produce the binary that takes advantages of GPU when it was specified before compilation | [Generate the code](#generate-the-code) |
-| RF-9 (Backend: Artefact type) | The compiler should produce executable as a shared library or as a static library | [Generate the code](#generate-the-code) |
-| RF-10 (Backend: Inception\_v3) | The compiler should produce the valid compiled artefact for Inception v3 NN model | [Generate the code](#generate-the-code) |
-| RF-11 (Backend: MobileNet) | The compiler should produce the valid compiled artefact for MobileNet NN model | [Generate the code](#generate-the-code) |
-| RF-12 (Config: command line) | The compiler should get configuration parameters from command line | [Initialization](#initialization) |
-| RF-13 (Config: config file (Optional)) | The compiler should get configuration parameters from config file | [Initialization](#initialization) |
-| RF-14 (Config: environment variable (Optional)) | The compiler should get configuration parameters from environment variables | [Initialization](#initialization) |
-| RF-15 (Artefact: result) | The artefact should provide comparable result to the original NN model for the same input data | [Generate the code](#generate-the-code) |
-| RF-16 (Artefact: input verifications) | The artefact should verify any input data and check consistency | [Generate the code](#generate-the-code) |
-| RF-17 (Artefact: GPU) | The artefact should take advantage of the GPU for GPU-enabled operations | [Generate the code](#generate-the-code) |
-| RF-18 (Artefact: CPU) | The artefact should take advantage of CPU if it was specified | [Generate the code](#generate-the-code) |
-
-**Design Module of S/W Architecture**
-
-| Requirement | Import NN model | Generate the code | Initialization |
-| ----------------------------------------------- | --------------- | ----------------- | -------------- |
-| RF-1 (Frontend: Tensorflow Lite) | O | | |
-| RF-2 (Frontend: Caffe) | O | | |
-| RF-3 (Frontend: Caffe2 (Optional)) | O | | |
-| RF-4 (Frontend: lossless import) | O | | |
-| RF-5 (Frontend: Inception\_v3) | O | | |
-| RF-6 (Frontend: MobileNet) | O | | |
-| RF-7 (Backend: ARM CPU) | | O | |
-| RF-8 (Backend: ARM GPU) | | O | |
-| RF-9 (Backend: Artefact type) | | O | |
-| RF-10 (Backend: Inception\_v3) | | O | |
-| RF-11 (Backend: MobileNet) | | O | |
-| RF-12 (Config: command line) | | | O |
-| RF-13 (Config: config file (Optional)) | | | O |
-| RF-14 (Config: environment variable (Optional)) | | | O |
-| RF-15 (Artefact: result) | | O | |
-| RF-16 (Artefact: input verifications) | | O | |
-| RF-17 (Artefact: GPU) | | O | |
-| RF-18 (Artefact: CPU) | | O | |
diff --git a/docs/nncc/project/requirements_specification.md b/docs/nncc/project/requirements_specification.md
deleted file mode 100644
index 7a6fce762..000000000
--- a/docs/nncc/project/requirements_specification.md
+++ /dev/null
@@ -1,272 +0,0 @@
-# SW Requirements Specification
-
-
-**Revision history**
-
-| Ver. | Date | Contents | Author | Approver |
-| ---- | ---------- | ------------------------------------------ | ------------------ | ------------ |
-| 0.1 | 2018.04.11 | Initial version | Vostokov Sergey | Sung-Jae Lee |
-| 0.2 | 2018.04.11 | SE member in-charge review | Aleksei Kondrashov | |
-| 1.0 | 2018.04.13 | Final Execution DR version | Vostokov Sergey | Sung-Jae Lee |
-| 1.1 | 2018.05.24 | Add new requirement in Source code section | Vostokov Sergey | Sung-Jae Lee |
-
-## Introduction
-
-### Purpose and scope
-
-The main goal of the project is to develop a compiler for neural
-networks to produce executable artefact for specified SW and HW
-platform.
-
-The development scope includes the following components:
-
- - Develop importer module to parse, verify and represent NN model for
- further optimization and compilation
- - Develop code emitters to produce executable binary for CPU and GPU
-
-2018 year goals:
-
- - Support TensorFlow Lite NN model format
- - Support Caffe NN model format
- - Support Caffe2 NN model format (Optional)
- - Support compilation of MobileNet NN
- - Support compilation of Inception v3 NN
- - Support ARM CPU
- - Support ARM GPU (Mali)
- - Support Tizen OS
- - Support SmartMachine OS (Optional)
-
-### Terminology and Abbreviation
-
-| | |
-| ------------ | ------------------------------------------------------------- |
-| OS | Operating System |
-| OS API | Application interface of OS |
-| HW | Hardware |
-| SW | Software |
-| NN | Neural Network |
-| NN model | Neural network model (Instance of NN built with ML framework) |
-| NN compiler | The compiler for neural network |
-| ML framework | The machine learning framework |
-| TF/TF Lite | Tensorflow/Tensorflow Lite ML framework |
-| IR | Intermediate representation |
-| CI/CI system | Continuous integration system |
-| UI | The user interface |
-| GUI | The graphical user interface |
-| CLI | The command-line interface |
-
-### SW System Architecture
-
-The main components of the compiler are the following:
-
- - Configuration system
- - Importer (convert supported NN model to Model IR before
- optimization)
- - High-Level optimization (Applies HW independent optimizations)
- - Low-Level optimization (Applies optimizations appropriate to the
- specified target HW)
- - Code emitter (Produces the binary to take advantages of CPU and/or
- GPU)
-
-![image](../images/nncc_idef0_a1.png)
-
-### Relevant Industry Standards
-
-Architecture design is described using IDEF notation. Since the nncc is a part of open source STAR Platform project
-any other industry standards not required and/or applicable.
-
-## SW Functional Requirements
-
-### Frontend
-
-| ID | Requirement Name | Description |
-| ---- | --------------------------- | --------------------------------------------------------------------------------------------------------------------------------- |
-| RF-1 | Frontend: Tensorflow Lite | The compiler should support import of NN model in Tensorflow Lite format (parsing & verification of data scheme v0-v3, 50 NN ops) |
-| RF-2 | Frontend: Caffe | The compiler should support import of NN model in Caffe format (parsing & verification) |
-| RF-3 | Frontend: Caffe2 (Optional) | The compiler should support import of NN model in Caffe2 format (parsing & verification) |
-| RF-4 | Frontend: lossless import | The front-end should use the lossless approach while it is converting any NN model to IR |
-| RF-5 | Frontend: Inception\_v3 | The front-end should successful import the Inception V3 NN model |
-| RF-6 | Frontend: MobileNet | The front-end should successful import the MobileNet NN model |
-
-### High-Level optimization
-
-No special requirements
-
-### Low-Level optimization
-
-No special requirements
-
-### Backend
-
-| ID | Requirement Name | Description |
-| ----- | ---------------------- | ------------------------------------------------------------------------------------------------------------ |
-| RF-7 | Backend: ARM CPU | The compiler should produce executable for ARM CPU |
-| RF-8 | Backend: ARM GPU | The compiler should produce the binary that takes advantages of GPU when it was specified before compilation |
-| RF-9 | Backend: Artefact type | The compiler should produce executable as a shared library or as a static library |
-| RF-10 | Backend: Inception\_v3 | The compiler should produce the valid compiled artefact for Inception v3 NN model |
-| RF-11 | Backend: MobileNet | The compiler should produce the valid compiled artefact for MobileNet NN model |
-
-### Configuration
-
-| ID | Requirement Name | Description |
-| ----- | --------------------------------------- | --------------------------------------------------------------------------- |
-| RF-12 | Config: command line | The compiler should get configuration parameters from command line |
-| RF-13 | Config: config file (Optional) | The compiler should get configuration parameters from config file |
-| RF-14 | Config: environment variable (Optional) | The compiler should get configuration parameters from environment variables |
-
-### Compiled Artefact
-
-| ID | Requirement Name | Description |
-| ----- | ----------------------------- | ---------------------------------------------------------------------------------------------- |
-| RF-15 | Artefact: result | The artefact should provide comparable result to the original NN model for the same input data |
-| RF-16 | Artefact: input verifications | The artefact should verify any input data and check consistency |
-| RF-17 | Artefact: GPU | The artefact should take advantage of the GPU for GPU-enabled operations |
-| RF-18 | Artefact: CPU | The artefact should take advantage of CPU if it was specified |
-
-## SW Non-Functional Requirements
-
-### The compiler
-
-#### Performance
-
-No special requirements
-
-#### SW capacity
-
-No special requirements
-
-#### Reliability
-
-| ID | Requirement Name | Description |
-| ----- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| RNF-1 | Reliability: input | The compiler should produce correct executable in order to utilize CPU and GPU when the correct input data is provided. If the incorrect input data are provided the compiler should not produce a compiled artefact, but inform user about all errors which were met |
-
-#### Security
-
-No special requirements
-
-#### Usability
-
-No special requirements
-
-#### Availability
-
-No special requirements
-
-#### Maintainability
-
-No special
-requirements
-
-#### Extendibility
-
-| ID | Requirement Name | Description |
-| ----- | ----------------------- | ------------------------------------------------------------------------------------------------------------------------- |
-| RNF-2 | Extendibility: frontend | The compiler design and implementations should provide possibility to add new features to front-end: new NN models format |
-| RNF-3 | Extendibility: backend | The compiler design and implementations should provide possibility to add new features to backend (new targets) |
-
-#### Testability
-
-| ID | Requirement Name | Description |
-| ----- | ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| RNF-4 | Testability: environment | The test environment should be built in order to verify compiler functionality, product build status, artefact build/execution status, artefact calculation result and calculation memory footprint and performance |
-
-#### Portability
-
-| ID | Requirement Name | Description |
-| ----- | ------------------ | --------------------------------------------------- |
-| RNF-5 | Portability: Linux | The compiler should be portable with Linux-based OS |
-
-#### Scalability
-
-No special requirements
-
-#### Expandability
-
-No special
-requirements
-
-#### Configurability
-
-| ID | Requirement Name | Description |
-| ----- | --------------------------------------- | --------------------------------------------------------------------------------- |
-| RNF-6 | Configurability: command line | The compiler should support applying configuration through command line options. |
-| RNF-7 | Configurability: file (Optional) | The compiler should support applying configuration through configuration file. |
-| RNF-8 | Configurability: environment (Optional) | The compiler should support applying configuration through environment variables. |
-
-### The compiled artefact
-
-No special
-requirements
-
-### The source code
-
-| ID | Requirement Name | Description |
-| ------ | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| RNF-9 | Legislation | All source code files should follows its original license and general project license without any conflicts |
-| RNF-10 | Legitimacy | The project should have its own general license |
-| RNF-11 | Coding style | Each source code file should follow the one defined for the project coding style |
-| RNF-12 | Contrib | RNF-9, RNF-10, RNF-11 are applicable only for the final release version of source code. These requirements are not applicable to the source code placed in development branch or any folder which is used as temporary storage for the source code under development. |
-
-## SW Interface Requirements
-
-### The compiler interface
-
-#### User Interface
-
-| ID | Requirement Name | Description |
-| ----- | ---------------------------- | --------------------------------------------------------------------------------------------------------------------------------------- |
-| RIF-1 | Compiler UI: no interaction | The compiler should not require any user interation during compilation (completed compilations, fatal exit) |
-| RIF-2 | Compiler UI: CLI | The compiler is considering as a command line tool which proceed parameters from command line and/or config file, environment variables |
-| RIF-3 | Compiler UI: input | The compiler should provide the facility to specify NN model to be compiled |
-| RIF-4 | Compiler UI: target device | The compiler should provide the facility to specify result target device (CPU or GPU) |
-| RIF-5 | Compiler UI: target platform | The compiler should provide the facility to specify result target SW platform |
-| RIF-6 | Compiler UI: output | The compiler should provide the facility to specify result target name |
-| RIF-7 | Compiler UI: target type | The compiler should provide the facility to specify result target type: shared or static library |
-
-#### Hardware Interface
-
-| ID | Requirement Name | Description |
-| ----- | -------------------------------- | --------------------------------------------------------------------------- |
-| RIF-8 | Compiler HWI: x86\_64 executable | The solution should provide executables to run on x86\_64-compatible system |
-
-#### Software Interface
-
-| ID | Requirement Name | Description |
-| ------ | ------------------------------------------ | ------------------------------------------------------------------------------------------------ |
-| RIF-9 | Compiler SWI: frontend plugin | The compiler should provide the SW interface in order to add support of the new NN model formats |
-| RIF-10 | Compiler SWI: backend plugin (HW) | The compiler should provide the SW interface in order to add support of the new HW |
-| RIF-11 | Compiler SWI: backend plugin (SW Platform) | The compiler should provide the SW interface in order to add support of the new SW Platform |
-
-#### Communication Interface
-
-No requirements for communication interface.
-
-### The compiled artefact interface
-
-#### User Interface
-
-| ID | Requirement Name | Description |
-| ------ | ------------------- | ----------------------------------- |
-| RIF-12 | Artefact UI: no GUI | Command line UI in text is suitable |
-
-#### Hardware Interface
-
-| ID | Requirement Name | Description |
-| ------ | ----------------- | ----------------------------------------------------------------------------- |
-| RIF-13 | Artefact HWI: CPU | The artefact should use ARM CPU instruction set when it was built for ARM CPU |
-| RIF-14 | Artefact HWI: GPU | The artefact should use ARM GPU instruction set when it was build for ARM GPU |
-
-#### Software Interface
-
-| ID | Requirement Name | Description |
-| ------ | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
-| RIF-15 | Artefact SWI: GPU driver | The artefact should use ARM GPU driver to invoke calculations when it was built for ARM GPU |
-| RIF-16 | Artefact SWI: C/C++ header | The artefact should provide C/C++ interface in order to use it in other applications |
-| RIF-17 | Artefact SWI: shared type | The compiled artefact should be a shared library in order to share it between several executables when it was specified before compilation |
-| RIF-18 | Artefact SWI: static type | The compiled artefact should be a static library in order to be built-in to an executable when it was specified before compilation |
-| RIF-19 | Artefact SWI: Info | The artefact should provide SW interface in order to get the actual status of calculation process (progress, errors, final result) |
-
-#### Communication Interface
-
-No requirements for communication interface.
diff --git a/docs/nncc/project/test_plan.md b/docs/nncc/project/test_plan.md
deleted file mode 100644
index a1f0f0a97..000000000
--- a/docs/nncc/project/test_plan.md
+++ /dev/null
@@ -1,442 +0,0 @@
-# SW System Test Document
-
-**Revision history**
-
-| Ver. | Date | Contents | Author | Approver |
-| ---- | ---------- | -------------------------- | ------------------ | ------------ |
-| 0.1 | 2018.04.12 | Initial version | Vostokov Sergey | Sung-Jae Lee |
-| 0.2 | 2018.04.13 | SE member in-charge review | Aleksei Kondrashov | |
-| 1.0 | 2018.04.17 | Final Execution DR version | Vostokov Sergey | Sung-Jae Lee |
-| 1.1 | 2018.06.20 | DR1 version | Vostokov Sergey | Sung-Jae Lee |
-
-**Terminology and Abbreviation**
-
-| | |
-| ------------ | ------------------------------------------------------------- |
-| OS | Operating System |
-| OS API | Application interface of OS |
-| HW | Hardware |
-| SW | Software |
-| NN | Neural Network |
-| NN model | Neural network model (Instance of NN built with ML framework) |
-| NN compiler | The compiler for neural network |
-| ML framework | The machine learning framework |
-| TF/TF Lite | Tensorflow/Tensorflow Lite ML framework |
-| IR | Intermediate representation |
-| CI/CI system | Continuous integration system |
-| UI | The user interface |
-| GUI | The graphical user interface |
-| CLI | The command-line interface |
-
-**References**
-
-\[1\] Vostokov Sergey, [SW Requirements Specification](requirements_specification.md)
-
-## SW System Test Overview
-
-### Purpose
-
-Software testing is an investigation to provide the quality of the
-product under test and to reduce risk of its failure to users or
-customers. Purpose of testing is to detect software failures so that
-defects may be discovered and corrected.
-
-Software system test procedure is a collection of processes and methods
-used to ensure quality. An additional goal is to make sure that the
-product follows regulations and meets the quality standards expected by
-the customer.
-
-### Scope
-
-As the number of possible tests for every software is practically
-infinite, we use some strategy to select tests that are feasible for the
-available time and resources.
-
-Software system tests attempt to cover requirements listed in the [SW
-Requirement
-Specification](https://github.sec.samsung.net/STAR/nncc/doc/project/requirements_specification.md).
-
-Since the projest outcome is a compiler then its testing are in
-different domain than many other kinds of application or system testing.
-They are dedicated to find all possible issues that cause the following
-bugs:
-
- - Compiler crashes (also known as an ICE or Internal Compiler Error)
-
- - Compiler hangs (kind of infinite loop in the compiler)
-
- - Bad code generation (a result of incorrect compiler output):
-
- - Bad code generation that leads to a crash in the application
- - “Silent” bad code generation
-
- - Compiler throughput issues (Issues that affect the amount of time
- the compiler takes to compile code )
-
- - Code quality issues (Issues that affect the performance of the
- compiled application)
-
- - Compiler feature correctness issues (This class of bugs involves the
- compiler generating correct code, but not doing what a particular
- feature specifies should be
-done)
-
-## SW System Test Items
-
-### Functions to be tested
-
-| Feature | Test Item ID | Test Item description |
-| ---------------------------------------- | ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| RF-1, RIF-3 - RIF-7 | TST-1 | Test suite checks NN ops import from Tensorflow Lite format by loading NN model that consists of a single NN op. One test for each NN op. |
-| RF-2, RIF-3 - RIF-7 | TST-2 | Test suite checks NN ops import from Caffe format by loading NN model that consists of a single NN op. One test for each NN op. |
-| RF-3, RIF-3 - RIF-7 | TST-3 | Test suite checks NN ops import from Caffe2 format by loading NN model that consists of a single NN op. One test for each NN op. |
-| RF-5, RIF-3 - RIF-7 | TST-4 | The test should verify successful loading the Inception V3 NN model |
-| RF-6, RIF-3 - RIF-7 | TST-5 | The test should verify successful loading the MobileNet NN model |
-| RF-4 | TST-6 | The test suite should automatically verify the completeness of information that was read from the raw data by comparing it with serialized raw data from Model IR |
-| RF-7, RF-18, RIF-13 | TST-7 | The unit test should automatically verify successful execution of binary on target ARM CPU |
-| RF-8, RF-17, RIF-14, RIF-15 | TST-8 | The unit test should automatically verify successful execution of calculation on GPU |
-| RF-9, RNF-1, RIF-17, RIF-18 | TST-9 | Unit test should verify the existence and format of binary (shared or static) in accordance to specified options |
-| RF-10 | TST-10 | Unit test should verify that compiler produces a compiled artefact for the Inception V3 NN model (Validity of compiled artefact is checked by other tests) |
-| RF-11 | TST-11 | Unit test should verify that compiler produces a compiled artefact for the MobileNet NN model (Validity of compiled artefact is checked by other tests) |
-| RF-12, RF-13, RF-14, RNF-6, RNF-7, RNF-8 | TST-12 | The test suite should verify correctness of configuration object by unit testing |
-| RF-15, RNF-1 | TST-13 | The test suite is to verify the correctness of calculations by comparing the result of original NN model and the result of compiled artefact on the same input data |
-| RF-16 | TST-14 | Unit test should verify that the incorrect input data is processed with an error message without unexpected termination of the application |
-| RNF-4, RNF-5, RIF-8 | TST-15 | A Linux-based OS should be used while the test environment are built. |
-| RIF-16 | TST-16 | The unit test should verify the existence and validity of generated C/C++ header for compiled artefact |
-
-Table 2-1. Test Item
-
-**The following requirements can be tested only manually:**
-
- - Non-functional requirements: RNF-2, RNF-3 (They would be tested
- during development)
- - Interface requirements: RIF-1, RIF-2, RIF-9 - RIF-12, RIF-19
-
-### Functions not to be tested
-
-The following requirements cannot be tested:
-
- - The source code requirements (RNF-9. RNF-10. RNF-11)
-
-## SW System Test Procedure
-
-### Test approaches
-
-While implementation of the project deliverables several kinds of
-testing are used. All of them are performed automatically by continuous
-integration system since it is developed. CI system subscribes on source
-code modification in the version control system. The configuration does
-not allow any changes to be merged into the main line if these changes
-do not pass merge mandatory tests.
-
- - **Code style check** (Merge mandatory test): to verify consistency
- of coding style
- - **Build test** (Merge mandatory test): to verify the current build
- - **Unit tests**: to verify SW system consistency. All new implemented
- features, code refactoring, optimizations must not cause unit test
- failure. Each unit test reflect the exact logic of testing
- component, thus, it should be adopted any time when program logic
- changes.
- - **System tests**: to verify the feature quality as well as
- compliance with its specified requirements.
- - **Manual-based UI testing approach**: for interface requirements,
- which cannot be automated
-
-### Test Pass/Fail Criteria
-
-All tests (unit/system) must be executed without any issues at any time
-for newly implemented, refactored, or changed code.
-
-### Test Start/Suspension/Resumption criteria
-
-Two mandatory tests (code style check and build test) are performed for
-every pool request (PR) before it is merged. The configuration of
-continuous integration system (CI) does not allow to merge the changes
-into devel branch if they does not pass the tests.
-
-Unit and feature testing are performed for the devel branch
-automatically. The merge to master branch (release) are possible when
-all these tests passed.
-
-### Regression Test strategy
-
-If a new issue is detected and it is not covered by an existing test
-then a new test will be developed. In other case the issue should be
-resolved.
-
-### Test tools
-
-| | |
-| ------------------------------- | ------------------------------------------------------------------------------------ |
-| Source code static verification | AEGIS (CODE pre-commit test suite: static/structure/open source violation analyzers) |
-| Test execution | CMake |
-| Defect management | Samsung Research GitHub |
-| Continuous Integration system | HQ CI (CODE) |
-
-Table 3-1. Test Tools
-
-## SW System Test Schedule Plan
-
-### Test task & schedule
-
-| | | | |
-| -------------- | ----------------------- | -------------- | -------------------------------------- |
-| Task | Schedule | Responsibility | Detailed Task |
-| Unit testing | 01.04.2018 - 31.12.2018 | All | All unit tests should be carried out |
-| System testing | 01.04.2018 - 31.12.2018 | All | All system tests should be carried out |
-
-Table 4-1. Test Tasks and Schedule
-
-### Test Resource organization plan
-
-#### Test environment
-
-| Type/Model | Operating System | Usage |
-| ---------- | --------------------------------- | ------------------------------------------------------------------------ |
-| PC/x86 | Ubuntu GNU/Linux version \>=14.04 | Build system with unit tests. System and system tests are performed too. |
-| Tizen TM2 | Tizen | Unit and system testing |
-| Odroid XU4 | Tizen | Unit and system testing |
-
-Table 4-2. Hardware / Operating System
-
-| Type | Spec | Usage |
-| ------------------- | ----------------------------------------------------- | ------------------------------------------------------------------------------- |
-| Library | Google test | Organize test code and provide utility methods |
-| VCS | Samsung github | The source code version controlling system |
-| CI | CODE | The HQ CI system |
-| Build system | CMake | Run test and check status |
-| Device connectivity | sdb | Send tools to the device and provide shell to run it |
-| Management tool | The CODE (Collaborative Open Development Environment) | Source code version control, code review, issue tracker, Continuous Integration |
-
-Table 4-3. Software
-
-### Risk management plan
-
-| Risk | Description | Probability | Countermeasures |
-| ------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | --------------------------------------------------------------------------------------- |
-| SmartMachine OS SDK toolchain is not available | In order to support compilation for SmartMachine OS the SDK is required. The compiler would have dependency of a SmartMachine OS SDK toolchain. | High | Suspend support of SmartMachine OS, and make plans when SmartMachine OS SDK is released |
-| SmartMachine OS targets are not available | To perform testing of executables for SmartMachine OS the specified targets are required. | High | Request targets or SW emulator when SmartMachine OS is released |
-| HQ CI does not support target testing | Some tests required the target devices to be run on it. The provided CI system may not support such type of testing. | High | Set CI environment on site |
-| Targets for testing/development are not available | Full automatic testing may take a long time. It also required target devices to execute the binaries. | Medium | Request/Buy enough amount of devices |
-
-Table 4-5. Risk Management
-
-### SW configuration management plan
-
-#### SW Configuration items identification
-
-| No | Document number | SW configuration Item | File name |
-| -- | ------------------------- | ------------------------------ | ------------------------------------------- |
-| 1 | SRR-RAJ0118ZZ-BWRF-STD001 | System Test Document | 18 NN compiler and Optimizer (STD) v1.0.pdf |
-| 2 | SRR-RAJ0118ZZ-BWRF-STS001 | System Test Case Specification | 18 NN compiler and Optimizer (STS) v1.0.pdf |
-| 3 | SRR-RAJ0118ZZ-BWRF-UTR001 | Unit Test Report | 18 NN compiler and Optimizer (UTR) v1.0.pdf |
-
-Table 4-6. SW Configuration Items List
-
-#### Directory Structure
-
-| Directory | Description |
-| ------------------------ | -------------------------------------------------------------------- |
-| / | source codes of the build system, main README file |
-| /contrib | Incubating projects |
-| /doc | Contains the documentation of the project |
-| /doc/project | Contains project management documents (SRS, SDD, STD, HLD, DLD, etc) |
-| /libs | Contains the source of the libraries which are used by the nncc |
-| /libs/core | Contains the source code of the core library of nncc |
-| /libs/frontend | Contains the source code of supported frontend's plugins |
-| /libs/frontend/caffe | The source code for the Caffe frontend |
-| /libs/frontend/caffe2 | The source code for the Caffe2 frontend |
-| /libs/frontend/tflite | The source code for the Tensorflow Lite frontend |
-| /libs/backend | Contains the source code of supported backend plugins |
-| /libs/backend/cpu | Contains the source code of CPU backend |
-| /libs/backend/gpu | Contains the source code of GPU backend |
-| /libs/backend/3rd\_party | Contains the source code of backend to utilize 3rd party libraries |
-| /scripts | Various scripts for building and testing the nncc |
-| /tools | The source code of the executables |
-
-Table 4-7. Directory Structure
-
-#### Baseline
-
-| Test Round | Baseline Name | Configuration Item | Schedule |
-| ---------- | ------------- | ---------------------------------------------------- | ---------- |
-| Round 1 | The nncc v0.5 | SRR-RAJ0118ZZ-BWRF-STD001, SRR-RAJ0118ZZ-BWRF-UTR001 | 01.09.2018 |
-| Round 2 | The nncc v1.0 | SRR-RAJ0118ZZ-BWRF-STD002, SRR-RAJ0118ZZ-BWRF-UTR002 | 01.12.2018 |
-
-Table 4-8. Baselines
-
-## SW System Test Case
-
-| TestItem ID | Testcase ID | Test Procedures | Expected Results |
-| ----------- | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| TST-1 | TST-1-1 | Import a NN consisting of a single Tensorflow Lite ADD operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-2 | Import a NN consisting of a single Tensorflow Lite AVERAGE\_POOL\_2D operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-3 | Import a NN consisting of a single Tensorflow Lite CONCATENATION operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-4 | Import a NN consisting of a single Tensorflow Lite CONV\_2D operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-5 | Import a NN consisting of a single Tensorflow Lite DEPTHWISE\_CONV\_2D operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-6 | Import a NN consisting of a single Tensorflow Lite DEQUANTIZE operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-7 | Import a NN consisting of a single Tensorflow Lite EMBEDDING\_LOOKUP operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-8 | Import a NN consisting of a single Tensorflow Lite FULLY\_CONNECTED operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-9 | Import a NN consisting of a single Tensorflow Lite HASHTABLE\_LOOKUP operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-10 | Import a NN consisting of a single Tensorflow Lite L2\_NORMALIZATION operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-11 | Import a NN consisting of a single Tensorflow Lite L2\_POOL\_2D operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-12 | Import a NN consisting of a single Tensorflow Lite LOCAL\_RESPONSE\_NORMALIZATION operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-13 | Import a NN consisting of a single Tensorflow Lite LOGISTIC operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-14 | Import a NN consisting of a single Tensorflow Lite LSH\_PROJECTION operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-15 | Import a NN consisting of a single Tensorflow Lite LSTM operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-16 | Import a NN consisting of a single Tensorflow Lite MAX\_POOL\_2D operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-17 | Import a NN consisting of a single Tensorflow Lite MUL operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-18 | Import a NN consisting of a single Tensorflow Lite RELU operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-19 | Import a NN consisting of a single Tensorflow Lite RELU\_N1\_TO\_1 operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-20 | Import a NN consisting of a single Tensorflow Lite RELU6 operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-21 | Import a NN consisting of a single Tensorflow Lite RESHAPE operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-22 | Import a NN consisting of a single Tensorflow Lite RESIZE\_BILINEAR operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-23 | Import a NN consisting of a single Tensorflow Lite RNN operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-24 | Import a NN consisting of a single Tensorflow Lite SOFTMAX operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-25 | Import a NN consisting of a single Tensorflow Lite SPACE\_TO\_DEPTH operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-26 | Import a NN consisting of a single Tensorflow Lite SVDF operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-27 | Import a NN consisting of a single Tensorflow Lite TANH operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-28 | Import a NN consisting of a single Tensorflow Lite CONCAT\_EMBEDDINGS operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-29 | Import a NN consisting of a single Tensorflow Lite SKIP\_GRAM operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-30 | Import a NN consisting of a single Tensorflow Lite CALL operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-31 | Import a NN consisting of a single Tensorflow Lite CUSTOM operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-32 | Import a NN consisting of a single Tensorflow Lite EMBEDDING\_LOOKUP\_SPARSE operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-33 | Import a NN consisting of a single Tensorflow Lite PAD operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-34 | Import a NN consisting of a single Tensorflow Lite UNIDIRECTIONAL\_SEQUENCE\_RNN operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-35 | Import a NN consisting of a single Tensorflow Lite GATHER operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-36 | Import a NN consisting of a single Tensorflow Lite BATCH\_TO\_SPACE\_ND operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-37 | Import a NN consisting of a single Tensorflow Lite SPACE\_TO\_BATCH\_ND operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-38 | Import a NN consisting of a single Tensorflow Lite TRANSPOSE operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-39 | Import a NN consisting of a single Tensorflow Lite MEAN operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-40 | Import a NN consisting of a single Tensorflow Lite SUB operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-41 | Import a NN consisting of a single Tensorflow Lite DIV operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-42 | Import a NN consisting of a single Tensorflow Lite SQUEEZE operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-43 | Import a NN consisting of a single Tensorflow Lite UNIDIRECTIONAL\_SEQUENCE\_LSTM operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-44 | Import a NN consisting of a single Tensorflow Lite STRIDED\_SLICE operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-45 | Import a NN consisting of a single Tensorflow Lite BIDIRECTIONAL\_SEQUENCE\_RNN operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-46 | Import a NN consisting of a single Tensorflow Lite EXP operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-47 | Import a NN consisting of a single Tensorflow Lite TOPK\_V2 operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-48 | Import a NN consisting of a single Tensorflow Lite SPLIT operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-49 | Import a NN consisting of a single Tensorflow Lite LOG\_SOFTMAX operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-50 | Import a NN consisting of a single Tensorflow Lite DELEGATE operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-51 | Import a NN consisting of a single Tensorflow Lite BIDIRECTIONAL\_SEQUENCE\_LSTM operation | During import no crashes or error messages occurred |
-| TST-1 | TST-1-52 | Import a NN consisting of a single Tensorflow Lite CAST operation | During import no crashes or error messages occurred |
-| TST-2 | TST-2-1 | Import a NN consisting of Caffe ImageData layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-2 | Import a NN consisting of Caffe Data layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-3 | Import a NN consisting of Caffe HDF5Input layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-4 | Import a NN consisting of two Caffe layers - Input layer and HDF5Output layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-5 | Import a NN consisting of Caffe Input layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-6 | Import a NN consisting of Caffe WindowData layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-7 | Import a NN consisting of Caffe MemoryData layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-8 | Import a NN consisting of Caffe DummyData layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-9 | Import a NN consisting of two Caffe layers - Input layer and Convolution layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-10 | Import a NN consisting of two Caffe layers - Input layer and Pooling layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-11 | Import a NN consisting of two Caffe layers - Input layer and SPP layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-12 | Import a NN consisting of two Caffe layers - Input layer and Crop layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-13 | Import a NN consisting of two Caffe layers - Input layer and Deconvolution layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-14 | Import a NN consisting of two Caffe layers - Input layer and Im2Col layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-15 | Import a NN consisting of two Caffe layers - Input layer and Recurrent layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-16 | Import a NN consisting of two Caffe layers - Input layer and RNN layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-17 | Import a NN consisting of two Caffe layers - Input layer and LSTM layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-18 | Import a NN consisting of two Caffe layers - Input layer and InnerProduct layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-19 | Import a NN consisting of two Caffe layers - Input layer and Dropout layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-20 | Import a NN consisting of two Caffe layers - Input layer and Embed layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-21 | Import a NN consisting of two Caffe layers - Input layer and LRN layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-22 | Import a NN consisting of two Caffe layers - Input layer and MVN layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-23 | Import a NN consisting of two Caffe layers - Input layer and BatchNorm layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-24 | Import a NN consisting of two Caffe layers - Input layer and ReLU layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-25 | Import a NN consisting of two Caffe layers - Input layer and PReLU layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-26 | Import a NN consisting of two Caffe layers - Input layer and ELU layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-27 | Import a NN consisting of two Caffe layers - Input layer and Sigmoid layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-28 | Import a NN consisting of two Caffe layers - Input layer and TanH layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-29 | Import a NN consisting of two Caffe layers - Input layer and AbsVal layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-30 | Import a NN consisting of two Caffe layers - Input layer and Power layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-31 | Import a NN consisting of two Caffe layers - Input layer and Exp layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-32 | Import a NN consisting of two Caffe layers - Input layer and Log layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-33 | Import a NN consisting of two Caffe layers - Input layer and BNLL layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-34 | Import a NN consisting of two Caffe layers - Input layer and Threshold layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-35 | Import a NN consisting of two Caffe layers - Input layer and Bias layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-36 | Import a NN consisting of two Caffe layers - Input layer and Scale layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-37 | Import a NN consisting of two Caffe layers - Input layer and Flatten layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-38 | Import a NN consisting of two Caffe layers - Input layer and Reshape layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-39 | Import a NN consisting of two Caffe layers - Input layer and BatchReindex layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-40 | Import a NN consisting of two Caffe layers - Input layer and Split layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-41 | Import a NN consisting of two Caffe layers - Input layer and Concat layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-42 | Import a NN consisting of two Caffe layers - Input layer and Slice layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-43 | Import a NN consisting of two Caffe layers - Input layer and Eltwise layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-44 | Import a NN consisting of two Caffe layers - Input layer and Filter layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-45 | Import a NN consisting of two Caffe layers - Input layer and Parameter layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-46 | Import a NN consisting of two Caffe layers - Input layer and Reduction layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-47 | Import a NN consisting of two Caffe layers - Input layer and Silence layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-48 | Import a NN consisting of two Caffe layers - Input layer and ArgMax layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-49 | Import a NN consisting of two Caffe layers - Input layer and Softmax layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-50 | Import a NN consisting of two Caffe layers - Input layer and Python layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-51 | Import a NN consisting of two Caffe layers - Input layer and MultinomialLogisticLoss layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-52 | Import a NN consisting of two Caffe layers - Input layer and Infogain layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-53 | Import a NN consisting of two Caffe layers - Input layer and SoftmaxWithLoss layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-54 | Import a NN consisting of two Caffe layers - Input layer and EuclideanLoss layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-55 | Import a NN consisting of two Caffe layers - Input layer and HingeLoss layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-56 | Import a NN consisting of two Caffe layers - Input layer and SigmoidCrossEntropyLoss layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-57 | Import a NN consisting of two Caffe layers - Input layer and Accuracy layer | During import no crashes or error messages occurred |
-| TST-2 | TST-2-58 | Import a NN consisting of two Caffe layers - Input layer and ContrastiveLoss layer | During import no crashes or error messages occurred |
-| TST-3 | TST-3-1 | Import a NN consisting of a single Caffe2 Add operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-2 | Import a NN consisting of a single Caffe2 AveragePool2D operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-3 | Import a NN consisting of a single Caffe2 Concat operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-4 | Import a NN consisting of a single Caffe2 Conv2D operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-5 | Import a NN consisting of a single Caffe2 FC operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-6 | Import a NN consisting of a single Caffe2 LRN operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-7 | Import a NN consisting of a single Caffe2 Sigmoid operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-8 | Import a NN consisting of a single Caffe2 MaxPool2D operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-9 | Import a NN consisting of a single Caffe2 Mul operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-10 | Import a NN consisting of a single Caffe2 Relu operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-11 | Import a NN consisting of a single Caffe2 Reshape operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-12 | Import a NN consisting of a single Caffe2 Softmax operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-13 | Import a NN consisting of a single Caffe2 Tanh operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-14 | Import a NN consisting of a single Caffe2 PadImage operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-15 | Import a NN consisting of a single Caffe2 BatchToSpace operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-16 | Import a NN consisting of a single Caffe2 SpaceToBatch operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-17 | Import a NN consisting of a single Caffe2 Transpose operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-18 | Import a NN consisting of a single Caffe2 Mean operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-19 | Import a NN consisting of a single Caffe2 Sub operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-20 | Import a NN consisting of a single Caffe2 Div operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-21 | Import a NN consisting of a single Caffe2 Squeeze operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-22 | Import a NN consisting of a single Caffe2 Exp operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-23 | Import a NN consisting of a single Caffe2 TopK operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-24 | Import a NN consisting of a single Caffe2 Split operation | During import no crashes or error messages occurred |
-| TST-3 | TST-3-25 | Import a NN consisting of a single Caffe2 Cast operation | During import no crashes or error messages occurred |
-| TST-4 | TST-4-1 | Import Inception V3 NN model | During import no crashes or error messages occurred |
-| TST-5 | TST-5-1 | Import MobileNet NN model | During import no crashes or error messages occurred |
-| TST-6 | TST-6-1 | Import Inception V3 NN model, serialize all model weights, compare serialized data with the initial NN model | Test executed successfully, serialized weights are equal to initial model weights |
-| TST-6 | TST-6-2 | Import MobileNet NN model, serialize all model weigths, compare serialized data with the initial NN model | Test executed successfully, serialized weights are equal to initial model weights |
-| TST-7 | TST-7-1 | Generate binary for the Inception V3 NN model and run its inference on a device with ARM CPU | Test executed successfully, no crashes occurred, inference result was output, amount and format of the outputs corresponds to the expected NN model outputs |
-| TST-7 | TST-7-2 | Generate binary for the MobileNet NN model and run its inference on a device with ARM CPU | Test executed successfully, no crashes occurred, inference result was output, amount and format of the outputs corresponds to the expected NN model outputs |
-| TST-8 | TST-8-1 | Generate binary for the Inception V3 NN model and run its inference on a GPU-enabled device | Test executed successfully, no crashes occurred, inference result was output, amount and format of the outputs corresponds to the expected NN model outputs |
-| TST-8 | TST-8-2 | Generate binary for the MobileNet V3 NN model and run its inference on a GPU-enabled device | Test executed successfully, no crashes occurred, inference result was output, amount and format of the outputs corresponds to the expected NN model outputs |
-| TST-9 | TST-9-1 | Provide correct NN model, compile it as a static library, then check that corresponding binary exists and it is a static library | Test executed successfully |
-| TST-9 | TST-9-2 | Provide correct NN model, compile it as a shared library, then check that corresponding binary exists and it is a shared library | Test executed successfully |
-| TST-9 | TST-9-3 | Provide incorrect model, compile it as a static library, then check that no compiled artifact is produced | Test executed successfully |
-| TST-9 | TST-9-4 | Provide incorrect model, compile it as a shared library, then check that no compiled artifact is produced | Test executed successfully |
-| TST-10 | TST-10-1 | Check that a static library is provided after compiling Inception V3 as a static library | Test executed successfully |
-| TST-10 | TST-10-2 | Check that a shared library is provided after compiling Inception V3 as a shared library | Test executed successfully |
-| TST-11 | TST-11-1 | Check that a static library is provided after compiling MobileNet as a static library | Test executed successfully |
-| TST-11 | TST-11-2 | Check that a shared library is provided after compiling MobileNet as a shared library | Test executed successfully |
-| TST-12 | TST-12-1 | Check that configuration object is constructed correctly when getting configuration parameters from command line | Test executed successfully |
-| TST-12 | TST-12-2 | Check that configuration object is constructed correctly when getting configuration parameters from config file | Test executed successfully |
-| TST-12 | TST-12-3 | Check that configuration object is constructed correctly when getting configuration parameters from environment variables | Test executed successfully |
-| TST-13 | TST-13-1 | Compile Inception V3 as static library for CPU, provide it and the original model with same correct input data, then compare the result from original model with the result from compiled artifact | Test executed successfully, results are comparable |
-| TST-13 | TST-13-2 | Compile Inception V3 as shared library for CPU, provide it and the original model with same correct input data, then compare the result from original model with the result from compiled artifact | Test executed successfully, results are comparable |
-| TST-13 | TST-13-3 | Compile Inception V3 as static library for GPU, provide it and the original model with same correct input data, then compare the result from original model with the result from compiled artifact | Test executed successfully, results are comparable |
-| TST-13 | TST-13-4 | Compile Inception V3 as shared library for GPU, provide it and the original model with same correct input data, then compare the result from original model with the result from compiled artifact | Test executed successfully, results are comparable |
-| TST-13 | TST-13-5 | Compile MobileNet as static library for CPU, provide it and the original model with same correct input data, then compare the result from original model with the result from compiled artifact | Test executed successfully, results are comparable |
-| TST-13 | TST-13-6 | Compile MobileNet as shared library for CPU, provide it and the original model with same correct input data, then compare the result from original model with the result from compiled artifact | Test executed successfully, results are comparable |
-| TST-13 | TST-13-7 | Compile MobileNet as static library for GPU, provide it and the original model with same correct input data, then compare the result from original model with the result from compiled artifact | Test executed successfully, results are comparable |
-| TST-13 | TST-13-8 | Compile MobileNet as shared library for GPU, provide it and the original model with same correct input data, then compare the result from original model with the result from compiled artifact | Test executed successfully, results are comparable |
-| TST-14 | TST-14-1 | Provide compiled Inception V3 artifact with invalid input, check that no unexpected termination occurs | Test executed successfully |
-| TST-14 | TST-14-2 | Provide compiled Inception V3 artifact with invalid input, check that an error message is provided | Test executed successfully |
-| TST-14 | TST-14-3 | Provide compiled MobileNet artifact with invalid input, check that no unexpected termination occurs | Test executed successfully |
-| TST-14 | TST-14-4 | Provide compiled MobileNet artifact with invalid input, check that an error message is provided | Test executed successfully |
-| TST-15 | TST-15-1 | Check that the OS used during test environment build is Linux-based | Test executed successfully |
-| TST-16 | TST-16-1 | Compile a valid NN model, then check that C/C++ header corresponding to compiled artifact exists | Test executed successfully |
-| TST-16 | TST-16-2 | Compile a valid NN model, then if C/C++ header corresponding to compiled artifact exists, verify its validity | Test executed successfully |
-
-Table 5-1. System Test case
diff --git a/docs/nncc/project_guide.md b/docs/nncc/project_guide.md
deleted file mode 100644
index af6a5acfd..000000000
--- a/docs/nncc/project_guide.md
+++ /dev/null
@@ -1,27 +0,0 @@
-### How to create your own project
-_nncc_ aims to make it easy to develop optimized, retargetable NN compilers. Anyone or team interested in _nncc_ can create a new incubating project.
-
-#### Subject
-Subject is related to NN(Neural Network) complier. Some examples are below, but not limited:
-- NN IR(Intermediate Representation)
-- Extended frontend and backend
-- High-performance (model optimization, memory optimization, scheduling, etc.)
-- Tools (verification, benchmark, visualization, etc.)
-- Tutorial, testbed
-
-#### How to propose
-There is no formal proposal process. Anyone can submit an issue or a PR as a starting point of a proposal. It would be helpful that the submissions have documents or descriptions containing the followings to share your idea and concept and attract new contibutors to your project (not mandatory):
-- Overview, goal or architecture description to explain your project
-- How-to guide including building and running your programs
-
-#### Directory to use
-- A directory under `compiler/`, which starts with your project name.
-
-#### Requirement
-- A project should follow the formal review process that _nncc_ is currently using [[(How to create a Pull Request (in contribution guide)](contribution_guide.md#how-to-create-a-pull-request)].
-
-#### How to enable format checker
-- Create a `.FORMATCHECKED` file in your project directory for format checker to check the source code of the directory and its subdirectories.
-
-#### How to contribute`
-Anyone who wants to contribute can create and submit PRs and issues following [nncc contribution_guide](contribution_guide.md). _nncc_ always welcomes your contribution.
diff --git a/docs/nncc/roadmap.md b/docs/nncc/roadmap.md
deleted file mode 100644
index d2227e8be..000000000
--- a/docs/nncc/roadmap.md
+++ /dev/null
@@ -1,6 +0,0 @@
-## 2018
-
-In 2018, _nncc_ will provide Caffe/TensorFlow Lite frontends and ARM CPU/GPU backends built on top of
-well-specified common (re-targetable) intermediate representation (IR) which is expressive enough to
-encode Inception(v3) and MobileNet, and is flexible enough to support next-gen H/W architectures, such
-as DSP or NPU.
diff --git a/docs/nncc/v1.0.0/getting_started.md b/docs/nncc/v1.0.0/getting_started.md
deleted file mode 100644
index ee8014042..000000000
--- a/docs/nncc/v1.0.0/getting_started.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# Getting Started
-
-## Environments
-
-Currently, Ubuntu 16.04 is officially supported as development environment.
-Other environments may be available but not confirmed.
-
-## How to compile your own model
-
-### What should we preapare
-
-- Tensorflow model file (`.pb` file)
- - TensorFlow model file should be frozen. [[How to freeze?]](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py)
- - Only inference operations are supported. Training operations are not supported yet.
- - Quantization is not yet supported.
- - `device` attribute should not have `GPU` value.
-- Model information file (`.info` file)
- - `.info` file should include 4 things.
- - Specification of input or output
- - name of input/output node
- - type of input/output node
- - shape of input/output node
- - Example format is written below.
- ```
- # input/output, node_name, node_type, node_shape
-
- input, input:0, TF_FLOAT, [1, 299, 299, 3]
- output, InceptionV3/Predictions/Reshape_1:0, TF_FLOAT, [1, 1001]
- ```
-
-### How to compile
-
-1. Generate `nnpkg` using `.pb` file and `.info` file.
- ```sh
- tf2nnpkg --graphdef <model.pb> --info <model.info> -o <path/to/generate>
- ```
-
-1. Check if all files are generated correctly.
- - Directory name of `nnpkg` is prefix of `.pb` file.
- - For example, if there is `model.pb` file, directory name will be `model`.
- ```
- path/to/generate
- └ model
- ├ model.circle
- └ metadata
- └ MANIFEST
- ```
-
-1. Check if `MANIFEST` contents are correct.
- ```sh
- $ cat path/to/generate/model/metadata/MANIFEST
- {
- "major-version" : "1",
- "minor-version" : "0",
- "patch-version" : "0",
- "models" : [ "model.circle" ],
- "model-types" : [ "circle" ]
- }
- ```
diff --git a/docs/nncc/v1.0.0/operation-list.md b/docs/nncc/v1.0.0/operation-list.md
deleted file mode 100644
index 9a43eb518..000000000
--- a/docs/nncc/v1.0.0/operation-list.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# List of TensorFlow Operations Supported by nncc
-
-The list of TensorFlow operations supported by nncc as follows:
-
-**Notice: There may be some restrictions on the support of each operation. Details will be updated soon.**
-
-- Add
-- AvgPool
-- BiasAdd
-- ConcatV2
-- Const
-- Conv2D
-- Conv2DBackpropInput
-- DepthwiseConv2dNative
-- FusedBatchNorm
-- Identity
-- MaxPool
-- Mean
-- Mul
-- Pad
-- Placeholder
-- RealDiv
-- Relu
-- Relu6
-- Reshape
-- Rsqrt
-- Shape
-- Softmax
-- Sqrt
-- SquaredDifference
-- Squeeze
-- StopGradient
-- Sub
-- Tanh
diff --git a/docs/nncc/v1.0.0/tutorial.md b/docs/nncc/v1.0.0/tutorial.md
deleted file mode 100644
index 9d1f97e67..000000000
--- a/docs/nncc/v1.0.0/tutorial.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# Tutorial
-
-Let's compile Inception_v3 model and make a nnpackage!
-
-## Prepare inception_v3 files
-
-1. Download pre-trained `inception_v3.pb` model file.
- ```sh
- $ wget https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_v3_2018_04_27.tgz
- $ tar -xvf inception_v3_2018_04_27.tgz
- ```
-1. Create model information file as `inception_v3.info`.
- ```
- $ cat > inception_v3.info << "END"
- input, input:0, TF_FLOAT, [1, 299, 299, 3]
- output, InceptionV3/Predictions/Reshape_1:0, TF_FLOAT, [1, 1001]
- END
- ```
-
-## Let's compile inception_v3
-
-1. Generate `nnpkg`. In this tutorial, let's generate to current directory.
- ```sh
- tf2nnpkg --use-tf2circle \
- --graphdef inception_v3.pb \
- --info inception_v3.info \
- -o .
- ```
-
-## Check whether compilation is well done
-
-- Check if all files are generated correctly.
- ```
- inception_v3
- ├ inception_v3.circle
- └ metadata
- └ MANIFEST
- ```
-- Check if `MANIFEST` contents are correct.
- ```sh
- $ cat inception_v3/metadata/MANIFEST
- {
- "major-version" : "1",
- "minor-version" : "0",
- "patch-version" : "0",
- "models" : [ "inception_v3.circle" ],
- "model-types" : [ "circle" ]
- }
- ```
diff --git a/docs/nncc/v1.1.0/nncc_in_tizen_studio.md b/docs/nncc/v1.1.0/nncc_in_tizen_studio.md
deleted file mode 100644
index d0f89a49b..000000000
--- a/docs/nncc/v1.1.0/nncc_in_tizen_studio.md
+++ /dev/null
@@ -1,52 +0,0 @@
-# nncc for Tizen Studio Plugin
-
-## Environments
-
-- Windows 10
-
-## How to install nncc in Tizen Studio
-
-### Things to prepare
-
-- Tizen Studio with IDE
-- Tizen Studio Package Manager
- - Will be automatically installed when Tizen Studio is installed
-- Firewall Registration
- - To add a repository at Package Manager, firewall registration must be applied in advance.
- - IP Address : 107.110.2.162
- - Service Port : 80(TCP)
-
-### Installation of SDK
-
-1. Execute Package Manager of Tizen Studio.
-1. Click cogwheel at right-top side.
-1. Click `Extension SDK`.
-1. Click `+` button.
-1. Write `http://107.110.2.162/packages/ai_tool_ext/` at `Repository`, and anything at `Name`.
-1. Click `OK`. And then click `OK` again. Refresh progress will be run.
-1. At `Extension SDK` tab, click `install` of `nnas`
-
-## Tutorial
-Let's create nnpackage in Tizen Studio!
-
-1. Enter [File] - [New] - [Tizen Project].
-1. Select `Sample` and click `Next`.
-1. Select `Mobile` with any version and click `Next`.
-1. Select `Web Application` and click `Next`.
-1. Select `Application` - `App Callee` and click `Next`.
-1. Write `AppCallee` at `Project name` and click `Finish`.
-1. Click `Finish`. (Default project name is `AppCallee`)
-1. After project `AppCallee` was created, click `AppCallee` at Project Explorer.
-1. Click `AI extension` (AI chip icon) at the top.
-1. Give `.pb` file path to `Model File` and `.info` file path to `info file`.
- - Information about `.pb` and `.info`, please refer to [Getting Started](../v1.0.0/getting_started.md#10)
-1. Click `OK`. Generating circle file progress will be done.
-1. Check whether nnpackage is created in `AppCallee\res\shared` folder.
- - Suppose that `model.pb` and `model.info` were used
- ```
- AppCallee\res\shared
- └ model
- ├ model.circle
- └ metadata
- └ MANIFEST
- ``` \ No newline at end of file
diff --git a/docs/nncc/v1.1.0/nncc_in_visual_studio.md b/docs/nncc/v1.1.0/nncc_in_visual_studio.md
deleted file mode 100644
index bc9e59fa9..000000000
--- a/docs/nncc/v1.1.0/nncc_in_visual_studio.md
+++ /dev/null
@@ -1,61 +0,0 @@
-# nncc for Visual Studio Tizen Extension
-
-## Environments
-
-- Windows 10
-
-## How to install nncc in Visual Studio
-
-### Things to prepare
-
-- Visual Studio 2019 for Windows
- - Version Status
- - Community version : Not available yet
- - Professional version : Available
- - Enterprise version : Available
- - Needed Workload
- - .NET Desktop Development
- - If above workload was not installed, please install it using Visual Studio Installer.
- - Under 2019 version, some details can be different
- - Express version : Not available
- - Other versions : Not confirmed
- - Refer to https://developer.tizen.org/development/visual-studio-tools-tizen/installing-visual-studio-tools-tizen
-- Tizen Baseline SDK
- - Install `nnas` by using Package Manager. For details, [click here.](nncc_in_tizen_studio.md)
-
-### Installation
-
-1. Download `VisualStudioToolsForTizen_2019AI_3.1.0116.1.vsix` from the release page.
-1. Execute the `vsix` file.
- - Do not execute Visual Studio during this step. If executed, the process will wait infinitely.
-1. Open Visual Studio and click `Continue without code`.
-1. Enter [Tools] - [NuGet Package Manager] - [Package Manager Settings] - [NuGet Package Manager - Package Sources]
-1. Click green `+` button to add new package source.
-1. Set like the following. Then, click `Update`.
- - `Name` : write `Tizen.NET.SDK`
- - `Source`: write `https://tizen.myget.org/F/dotnet/api/v3/index.json`
-1. <b>Only when</b> `nuget.org` is not found in `Available package sources`, follow below three steps.
- - Click green `+` button
- - Set `Name` as `nuget.org` and set `Source` as `https://api.nuget.org/v3/index.json`
- - Click `Update`
-1. Click `OK`.
-
-## Tutorial
-Let's create nnpackage in Visual Studio!
-
-1. Open Visual Studio.
-1. Enter [File] - [New] - [Project].
-1. Select `AI App Project` and click `Next`.
-1. Click `Create`. (Default project name is `AIAppTemplate`)
-1. A dialog pops up. Enter the path of your `model.pb` and `model.info` into the dialog.
- - In this version, names of model file and info file <b>must be</b> `model.pb` and `model.info`.
- - Detailed information about `.pb` file and `.info` file is in [getting_started](../v1.0.0/getting_started.md#12)
-1. Open `AIAppTemplate_App.cs` in `AIAppTemplate` and build it.
-1. If build succeeded, nnpackage will be found at `AIAppTemplate\res\shared` folder.
- ```
- AIAppTemplate\res\shared
- └ model
- ├ model.circle
- └ metadata
- └ MANIFEST
- ```
diff --git a/docs/nnfw/2018/fig/nnfw_architecture.png b/docs/nnfw/2018/fig/nnfw_architecture.png
deleted file mode 100644
index d183e2b56..000000000
--- a/docs/nnfw/2018/fig/nnfw_architecture.png
+++ /dev/null
Binary files differ
diff --git a/docs/nnfw/2018/fig/nnfw_architecture.pptx b/docs/nnfw/2018/fig/nnfw_architecture.pptx
deleted file mode 100644
index 3e5b4fad5..000000000
--- a/docs/nnfw/2018/fig/nnfw_architecture.pptx
+++ /dev/null
Binary files differ
diff --git a/docs/nnfw/2018/roadmap.md b/docs/nnfw/2018/roadmap.md
deleted file mode 100644
index aca206889..000000000
--- a/docs/nnfw/2018/roadmap.md
+++ /dev/null
@@ -1,123 +0,0 @@
-This document describes roadmap of 2018 NN Runtime (or _nnfw_) project.
-
-# Goal
-This project _nnfw_ aims at providing a high-performance, on-device neural network (NN) inference
-framework that performs inference of a given NN model on processors, such as CPU, GPU, or NPU, in
-the target platform, such as Tizen and SmartMachine Platform (SMP).
-
-# Architecture
-![nnfw_architecture](./fig/nnfw_architecture.png)
-
-The figure above illustrates the overall architecture and scope of _nnfw_, which consists of ML
-Framework and NN Runtime, as well as NN Compute that is provided by the platform:
-1. ML Framework
- - Provide TensorFlow (TF) Lite on Tizen and SMP
- - We chose TF Lite as a standard ML framework in _nnfw_ for this year, since TF Lite is
- lightweight compared to other ML frameworks and its community is rapidly growing. We expect
- supporting TF Lite on Samsung's OS platforms would be beneficial to Samsung's diverse
- business areas and AI solutions.
- - Provide TF Lite C# API for Tizen .NET
- - Considering the existing TF Lite supports only C++ and Java API, C# API for TF Lite would
- be a great complement to TF Lite and natural extension for Tizen.
-1. NN Runtime
- - Provide a common runtime interface, which is Android NN API
- - Android NN API (NN API for short) was selected for seamless integration with TF Lite. As
- long as our NN runtime provides NN API as an interface, TF Lite can link to our NN runtime
- without any modification.
- - Although we borrowed NN API as the runtime's interface, we plan to design and implement the
- runtime itself by ourselves. For the implementation, we will utilize ARM Compute Library
- (ACL) for NN operation acceleration on ARM CPU and GPU.
-1. NN Compute
- - Provide computation acceleration library, such as ACL, or device driver for NPU
- - This layer will be provided by OS platform, and we will use the library or device driver as it
- is. We may request a specific version to the Platform team, but we don't expect we will be
- modifying the library.
-
-# Deliverables
-- On-Device AI SW Stack (a.k.a STAR Lite) for Tizen
-- On-Device AI SW Stack for SMP
-- ML Framework that can run ADAS models
-
-# Milestones
-## Project Milestones
-- Support all 50 TF Lite operations on ARM CPU and GPU
-- Support all 29 operations of NN API on ARM CPU and GPU
-- Support InceptionV3 and MobileNet, written in TF Lite model format, on ARM CPU and GPU
-
-## Monthly Milestones
-(These will be updated as we proceed with the project and can estimate development time more
-accurately.)
-- March: Set up milestones, tasks, workgroups, initial code structure, and build/test infra
-- April: Run InceptionV3 using ACL on the Tizen TM2 and ODroid XU4
- - Mid of April: Establish a full SW stack that is ready to run InceptionV3
-- May: Run MobileNet on Tizen / Tizen M1 release
-- June: Run ADAS models on Tizen / STAR Platform 2nd release
-- September: Tizen M2 release / STAR Platform 3rd release
-- October: SMP v1.0 release / STAR Platform v1.0 release
-
-# Tasks
-Below is an overall list of major topics (tasks) throughout the project this year. For the details
-of each topic, please visit each topic's issue page.
-Please note that the list might not be complete and thus it could be updated as we make progress in
-the project and discuss more about the implementation details.
-
-## ML Framework
-### Technical Goals
-- Provide TF Lite on Tizen and SMP
-- Develop TF Lite C# API for Tizen .NET
-
-### Milestones
-- March
- 1. Enable Tizen build / C# API / test code
- 1. Complete enabling Tizen build and test codes / Test infra / Benchmark
-- Mid April
- 1. Complete all tasks needed to run InceptionV3
-- May
- 1. Support custom operators to run ADAS models
- 1. Complete all test codes and benchmarks
-
-### Tasks
-- Visit [#74](https://github.sec.samsung.net/STAR/nnfw/issues/74) for the list of tasks, issue
- tracking, and discussions.
-
-## NN Runtime
-- NN Runtime is an actual implementation of NN API.
-
-### Technical Goals
-- Develop an NN model interpreter targeting ARM CPU and GPU
-- Develop a device memory manager
-- Develop an operation scheduler supporting both CPU and GPU
-
-### Milestones
-- March: Run simple NN with CPU backend
- 1. Prepare a working vertical SW stack of NN runtime
-- Mid of April (for testing): Run InceptionV3 with ACL backend and CPU backend
- 1. Evaluate performance of InceptionV3 and improve performance for ADAS if necessary
-- May (Tizen M1)
- 1. Optimize NN runtime (improving interpreter or using IR from
- [nncc](https://github.sec.samsung.net/STAR/nncc))
- 1. Implement more operators of NN API
-
-### Tasks
-- Visit [#72](https://github.sec.samsung.net/STAR/nnfw/issues/72) for the list of tasks, issue
- tracking, and discussions.
-
-## NN API Operations
-### Technical Goals
-- Implement NN operations optimized for ARM CPU and GPU
-
-### Milestones
-- March: Run convolution using `tflite_run`
- - Test framework: ?
-- Mid of April : InceptionV3 complete on CPU/GPU
- - For ADAS, we need to make the performance to be goods as we can make.
-- May: optimized kernels for InceptionV3 on CPU/GPU
-
-### Tasks
-- Visit [#73](https://github.sec.samsung.net/STAR/nnfw/issues/73) for the list of tasks, issue
- tracking, and discussions.
-
-# Workgroups (WGs)
-- We organize WGs for major topics above, and each WG will be working on its own major topic by
- breaking it into small tasks/issues, performing them inside WG, and collaborating between WGs.
-- The WG information can be found [here](workgroups.md).
diff --git a/docs/nnfw/HowToImplementOperatorKernel.md b/docs/nnfw/HowToImplementOperatorKernel.md
deleted file mode 100644
index 715575a5f..000000000
--- a/docs/nnfw/HowToImplementOperatorKernel.md
+++ /dev/null
@@ -1 +0,0 @@
-Under preparation. Coming soon!
diff --git a/docs/nnfw/fig/nnfw_architecture.png b/docs/nnfw/fig/nnfw_architecture.png
deleted file mode 100644
index 566151e4a..000000000
--- a/docs/nnfw/fig/nnfw_architecture.png
+++ /dev/null
Binary files differ
diff --git a/docs/nnfw/fig/nnfw_architecture.pptx b/docs/nnfw/fig/nnfw_architecture.pptx
deleted file mode 100644
index 9a4e8fbb7..000000000
--- a/docs/nnfw/fig/nnfw_architecture.pptx
+++ /dev/null
Binary files differ
diff --git a/docs/nnfw/fig/nnfw_behavior.png b/docs/nnfw/fig/nnfw_behavior.png
deleted file mode 100644
index b7527b48c..000000000
--- a/docs/nnfw/fig/nnfw_behavior.png
+++ /dev/null
Binary files differ
diff --git a/docs/nnfw/fig/nnfw_behavior.pptx b/docs/nnfw/fig/nnfw_behavior.pptx
deleted file mode 100644
index bac51f363..000000000
--- a/docs/nnfw/fig/nnfw_behavior.pptx
+++ /dev/null
Binary files differ
diff --git a/docs/nnfw/howto.md b/docs/nnfw/howto.md
deleted file mode 100644
index 2c28453bd..000000000
--- a/docs/nnfw/howto.md
+++ /dev/null
@@ -1,38 +0,0 @@
-## Build Requires
-
-If you are building this project, then the following modules must be installed on your system:
-
-- CMake
-- Boost C++ libraries
-
-```
-$ sudo apt-get install cmake libboost-all-dev
-```
-
-## How to use (simple) NNAPI Binding
-
-This repo provides a T/F Lite Model loader(named ``tflite_run``), and simple NNAPI binding.
-
-Let's type the following commands, and see what happens!
-```
-$ make install
-$ USE_NNAPI=1 LD_LIBRARY_PATH="$(pwd)/Product/obj/runtimes/logging:$(pwd)/Product/out/lib" Product/out/bin/tflite_run [T/F Lite Flatbuffer Model Path]
-```
-
-## How to get pre-built T/F Lite Flatbuffer models?
-Google provides several pre-built T/F Lite models. Please check [this page](https://www.tensorflow.org/lite/models)
-
-
-## Build How-to
-- [Cross building for ARM](howto/CrossBuildForArm.md)
-- [Cross building for AARCH64](howto/CrossBuildForAarch64.md)
-- [Build using prebuilt docker image](howto/HowToUseDockerImage.md)
-
-
-## Other how-to documents
-- [Building TensorFlow and TOCO from source](howto/BuildTFfromSource.md)
-- [How to setup XU3 with Ubuntu 16.04](howto/device/xu3_ubuntu.md)
-- [How to setup XU4 with Ubuntu 16.04](howto/device/xu4_ubuntu.md)
-- [How to add unittest using gtest](howto/HowToAddUnittest.md)
-- [How to manually test NNFW on single model/input pair](howto/HowToTestManualy.md)
-- [How to use nnfw API](howto/HowToUseNNFWAPI.md)
diff --git a/docs/nnfw/howto/BuildTFfromSource.md b/docs/nnfw/howto/BuildTFfromSource.md
deleted file mode 100644
index 3880d5ab9..000000000
--- a/docs/nnfw/howto/BuildTFfromSource.md
+++ /dev/null
@@ -1,66 +0,0 @@
-# Building TensorFlow and TOCO from source
-
-You can build TensorFlow and tools including `TOCO` from source.
-Please read
-[Installing TensorFlow from Sources](https://www.tensorflow.org/install/install_sources)
-for full description.
-
-## Install Bazel
-
-Follow [Installing Bazel](https://docs.bazel.build/versions/master/install.html)
-- For Ubuntu, follow [Installing Bazel on Ubuntu](https://docs.bazel.build/versions/master/install-ubuntu.html)
-
-These are the actual steps to install using apt package manager:
-```
-sudo apt-get install openjdk-8-jdk
-```
-```
-echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" \
-| sudo tee /etc/apt/sources.list.d/bazel.list
-curl https://bazel.build/bazel-release.pub.gpg | sudo apt-key add -
-```
-```
-sudo apt-get update && sudo apt-get install bazel
-```
-```
-sudo apt-get upgrade bazel
-```
-
-## Install python packages
-
-```
-sudo apt-get install python-numpy python-dev python-pip python-wheel
-```
-
-## Configure
-
-```
-cd external/tensorflow
-./configure
-```
-
-Select options like this page: https://www.tensorflow.org/install/install_sources#ConfigureInstallation
-
-## Build with Bazel
-
-```
-bazel build --config=opt //tensorflow/tools/pip_package:build_pip_package
-bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
-```
-
-If you have any problems while building, please fire an issue.
-
-## Uninstall if already installed
-
-You may skip this if you haven't installed
-```
-pip uninstall /tmp/tensorflow_pkg/tensorflow-1.6.0rc1-cp27-cp27mu-linux_x86_64.whl
-```
-
-## Install TensorFlow and tools
-
-```
-pip install /tmp/tensorflow_pkg/tensorflow-1.6.0rc1-cp27-cp27mu-linux_x86_64.whl --user
-```
-
-You should see installed `toco` at `~/.local/bin` folder.
diff --git a/docs/nnfw/howto/CrossBuildForAarch64.md b/docs/nnfw/howto/CrossBuildForAarch64.md
deleted file mode 100644
index 9f0af85b8..000000000
--- a/docs/nnfw/howto/CrossBuildForAarch64.md
+++ /dev/null
@@ -1,77 +0,0 @@
-# Cross building for AARCH64 (ARM64)
-
-In nnfw, we use `AARCH64` on build files such as Makefile, CMakeLists.txt and so on.
-
-## Prepare Ubuntu RootFS
-
-Install required packages
-
-```
-sudo apt-get install qemu qemu-user-static binfmt-support debootstrap
-```
-
-Use `build_rootfs.sh` script to prepare Root File System. You should have `sudo`
-
-```
-sudo ./tools/cross/build_rootfs.sh aarch64
-```
-- supports `arm`(default) and `aarch64` architecutre for now
-- supports `xenial`(default) and `trusty` release
-
-To see the options,
-```
-./tools/cross/build_rootfs.sh -h
-```
-
-RootFS will be prepared at `tools/cross/rootfs/aarch64` folder.
-
-### Prepare RootFS at alternative folder
-
-Use `ROOTFS_DIR` to a full path to prepare at alternative path.
-
-```
-ROOTFS_DIR=/home/user/rootfs/aarch64-xenial sudo ./tools/cross/build_rootfs.sh aarch64
-```
-
-### Using proxy
-
-If you need to use proxy server while building the rootfs, use `--setproxy` option.
-
-```
-# for example,
-sudo ./tools/cross/build_rootfs.sh aarch64 --setproxy="1.2.3.4:8080"
-# or
-sudo ./tools/cross/build_rootfs.sh aarch64 --setproxy="proxy.server.com:8888"
-```
-
-This will put `apt` proxy settings in `rootfs/etc/apt/apt.conf.d/90proxy` file
-for `http`, `https` and `ftp` protocol.
-
-## Cross build for AARCH64
-
-Install cross compilers
-```
-sudo apt-get install gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
-```
-
-Build and install ARM Compute Library
-```
-CROSS_BUILD=1 TARGET_ARCH=aarch64 make acl
-```
-Mostly you only need once of ACL build. This will build and install to
-`Product/(target_arch-os)/out/bin` folder.
-- this is required for `AARCH64` on Ubuntu
-
-Give `TARGET_ARCH` variable to set the target architecture
-```
-CROSS_BUILD=1 TARGET_ARCH=aarch64 make
-CROSS_BUILD=1 TARGET_ARCH=aarch64 make install
-```
-- supports `armv7l` and `aarch64` for now
-
-If you used `ROOTFS_DIR` to prepare in alternative folder,
-you should also give this to makefile.
-```
-CROSS_BUILD=1 ROOTFS_DIR=/home/user/rootfs/aarch64-xenial TARGET_ARCH=aarch64 make
-CROSS_BUILD=1 ROOTFS_DIR=/home/user/rootfs/aarch64-xenial TARGET_ARCH=aarch64 make install
-```
diff --git a/docs/nnfw/howto/CrossBuildForAndroid.md b/docs/nnfw/howto/CrossBuildForAndroid.md
deleted file mode 100644
index ab9d04e92..000000000
--- a/docs/nnfw/howto/CrossBuildForAndroid.md
+++ /dev/null
@@ -1,52 +0,0 @@
-# Cross building for Android
-
-Supported Architecture : AARCH64 only (ARM32 is not supported yet)
-
-## Prepare Android NDK
-
-Use `tools/cross/build_android_ndk.sh` script to prepare Android NDK. This is recommended way to build Android NDK.
-You may download it yourself from the offical Android NDK website, but the script does a little more than just downloading and unzipping.
-
-## Build
-
-### Host Environment Requirements
-
-With Ubuntu 16.04, everything is fine except one. CMake 3.6.0 or later is required for Android NDK CMake support.
-So if you want to use Docker, please use `infra/docker/Dockerfile.1804` which is based on Ubuntu 18.04. It has CMake 3.10.2.
-
-```bash
-docker build --network host -t nnas1804 -f infra/docker/Dockerfile.1804 infra/docker
-```
-
-### Get prebuilt ARM Compute Library
-
-Download prebuilt binary from [github](https://github.com/ARM-software/ComputeLibrary/releases). Check the version we support and platform(Android).
-
-Then extract the tarball and we will use the ones in `lib/android-arm64-v8a-neon-cl`. The following files are used.
-
-```
-libarm_compute_core.so
-libarm_compute_graph.so
-libarm_compute.so
-```
-
-### Build and install the runtime
-
-Some tools/libs are still not supported and those are not built by default - mostly due to dependency on Boost library.
-Please refer to `infra/nnfw/cmake/options/options_aarch64-android.cmake` for details.
-
-Different from cross build for linux,
-
-- `NDK_DIR` is required
-
-Here is an example of using Makefile.
-
-```bash
-cp -n Makefile.template Makefile
-
-TARGET_OS=android \
-CROSS_BUILD=1 \
-NDK_DIR=/path/android-tools/r20/ndk \
-EXT_ACL_FOLDER=/path/arm_compute-v19.05-bin-android/lib/android-arm64-v8a-neon-cl \
-make install
-```
diff --git a/docs/nnfw/howto/CrossBuildForArm.md b/docs/nnfw/howto/CrossBuildForArm.md
deleted file mode 100644
index 07b4a17b3..000000000
--- a/docs/nnfw/howto/CrossBuildForArm.md
+++ /dev/null
@@ -1,118 +0,0 @@
-# Cross building for ARM
-
-## Prepare Ubuntu RootFS
-
-Install required packages
-
-```
-sudo apt-get install qemu qemu-user-static binfmt-support debootstrap
-```
-
-Use `build_rootfs.sh` script to prepare Root File System. You should have `sudo`
-
-```
-sudo ./tools/cross/build_rootfs.sh arm
-```
-- supports `arm`(default) and `aarch` architecutre for now
-- supports `xenial`(default) `trusty`, and `bionic` release
-
-To see the options,
-```
-./tools/cross/build_rootfs.sh -h
-```
-
-RootFS will be prepared at `tools/cross/rootfs/arm` folder.
-
-### Prepare RootFS at alternative folder
-
-Use `ROOTFS_DIR` to a full path to prepare at alternative path.
-
-```
-ROOTFS_DIR=/home/user/rootfs/arm-xenial sudo ./tools/cross/build_rootfs.sh arm
-```
-
-### Using proxy
-
-If you need to use proxy server while building the rootfs, use `--setproxy` option.
-
-```
-# for example,
-sudo ./tools/cross/build_rootfs.sh arm --setproxy="1.2.3.4:8080"
-# or
-sudo ./tools/cross/build_rootfs.sh arm --setproxy="proxy.server.com:8888"
-```
-
-This will put `apt` proxy settings in `rootfs/etc/apt/apt.conf.d/90proxy` file
-for `http`, `https` and `ftp` protocol.
-
-## Install ARM Cross Toolchain
-
-We recommend you have g++ >= 6 installed on your system because NN generated tests require it.
-
-- On Ubuntu 16.04 or older, follow the next steps:
-
-```
-cd ~/your/path
-wget https://releases.linaro.org/components/toolchain/binaries/7.2-2017.11/arm-linux-gnueabihf/gcc-linaro-7.2.1-2017.11-x86_64_arm-linux-gnueabihf.tar.xz
-tar xvf gcc-linaro-7.2.1-2017.11-x86_64_arm-linux-gnueabihf.tar.xz
-echo 'PATH=~/your/path/gcc-linaro-7.2.1-2017.11-x86_64_arm-linux-gnueabihf/bin:$PATH' >> ~/.bashrc
-```
-
-- On Ubuntu 18.04 LTS, you can install using `apt-get`.
-Choose g++ version whatever you prefer: 6, 7 or 8.
-
-```
-sudo apt-get install g++-{6,7,8}-arm-linux-gnueabihf
-```
-
-Make sure you get `libstdc++.so` updated on your target with your new toolchain's corresponding one.
-
-For example, if you installed gcc-linaro-7.2.1-2017.11 above, do
-
-```
-wget https://releases.linaro.org/components/toolchain/binaries/7.2-2017.11/arm-linux-gnueabihf/runtime-gcc-linaro-7.2.1-2017.11-arm-linux-gnueabihf.tar.xz
-tar xvf runtime-gcc-linaro-7.2.1-2017.11-arm-linux-gnueabihf.tar.xz
-```
-
-Then, copy `libstdc++.so.6.0.24` into `/usr/lib/arm-linux-gnueabihf`, and update symbolic links on your device.
-
-## Build and install ARM Compute Library
-
-Mostly you only need once of ACL build.
-
-ACL will be automatically installed in `externals/acl` when you build nnfw without any changes.
-
-You can check ACL source information in `cmake/packages/ARMComputeSourceConfig.cmake`
-
-## Build nnfw
-
-Give `TARGET_ARCH` variable to set the target architecture.
-
-If you used `ROOTFS_DIR` to prepare in alternative folder, you should also give this to makefile.
-
-```
-CROSS_BUILD=1 TARGET_ARCH=armv7l make all install
-
-# If ROOTFS_DIR is in alternative folder
-ROOTFS_DIR=/path/to/your/rootfs/arm \
-CROSS_BUILD=1 TARGET_ARCH=armv7l make all install
-```
-
-You can also omit the `CROSS_BUILD=1` option if you explicitly pass `ROOTFS_DIR`. In that case, if
-the `TARGET_ARCH` are differs from the hostarchitecture, the make script automatically applies
-`CROSS_BUILD=1`. So, if you set `ROOTFS_DIR` as an environment variable, you can simply perform
-normal build and cross build as follows.
-
-```
-export ROOTFS_DIR = xxx
-...
-make all install # do normal build
-TARGET_ARCH = armv7l make all install # do cross build
-```
-
-## Run test
-
-```
- ./tests/scripts/test_driver.sh --artifactpath=. \
- --frameworktest_list_file=tests/scripts/list/neurun_frameworktest_list.armv7l.acl_cl.txt
-```
diff --git a/docs/nnfw/howto/HowToAddUnittest.md b/docs/nnfw/howto/HowToAddUnittest.md
deleted file mode 100644
index 5bb75b258..000000000
--- a/docs/nnfw/howto/HowToAddUnittest.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# How to Add Unittest using gtest(googletest)
-
-### 1. make own test code
-```
-#include "gtest/gtest.h"
-
-TEST(TFLite_test_case, simple_test)
-{
- EXPECT_EQ(1, 1);
-}
-```
-
-### 2. Find and prepare package `googletest` to your test executable
-```
-find_nnfw_package(GTest QUITE)
-if(NOT GTest_FOUND)
- ## Cannot find and prepare googletest package
- return()
-endif(NOT GTest_FOUND)
-add_executable($YOURTEST_TARGET yourtest1.cc yourtest2.cc)
-```
-
-### 3. Link test executable against libgtest.a and libgtest_main.a (+ pthread)
-```
-target_link_libraries($YOURTEST_TARGET gtest gtest_main pthread)
-```
-
-### 4. Install test executable into Product/out/unittest
-```
-install(TARGETS $YOURTEST_TARGET DESTINATION unittest)
-```
diff --git a/docs/nnfw/howto/HowToRunNnpackge.md b/docs/nnfw/howto/HowToRunNnpackge.md
deleted file mode 100644
index 93dd74e83..000000000
--- a/docs/nnfw/howto/HowToRunNnpackge.md
+++ /dev/null
@@ -1,75 +0,0 @@
-# How To Run 'nnpackage' (for beginners)
-
-## 0. Environment
-
-This document is based on an experience with ...
-
-```
-- Architecture : armhf
-- OS : ubuntu 18.04
-```
-
-## 1. What is 'nnpackage'?
-
-'nnpackage' is the input of nnfw and the output of nncc.
-
-'nnpackage' contains all data (such as model, MANIFEST, custom_op) that requires to run a given model.
-
-'nnpackage' is a Zip archive in the following structure:
-
-```
-nnpackage
-├── custom_op
-├── metadata
-│ └── MANIFEST
-└── mymodel.model
-```
-
-For more information, find the document [nnpackage/spec/10_packaging_and_manifest.md](../../../nnpackage/spec/10_packaging_and_manifest.md)
-
-## 2. How to generate nnpackage?
-
-'nnpackage' can be generated from either '.circle' or '.tflite'.
-
-In this example, we generate 'nnpackage' from '.tflite'.
-
- [1] Find 'model2nnpkg.sh'.
- ```
- nnfw/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
- ```
-
- [2] Get any \*.tflite model file.
- You can simply use a file in test framework directory, 'nnfw/tests/framework/cache/'.
- If you don't have /cache directory, download them with command
- ```
- cd nnfw
- MODELFILE_SERVER={MODELFILE_SERVER_LINK} ./tests/framework/run_test.sh --download=on
-
- For {MODELFILE_SERVER_LINK}, put appropriate server link.
- ```
- In this example, we will use 'nnfw/tests/framework/cache/add/1D/add_test1.tflite'
-
- [3] Simply run.
- ```
- $./model2nnpkg.sh add_test1
- ```
- Now, you got add_test1 directory. Check into the directory to find the hierchical structure inside.
-
-## 3. How to set up an environment and run?
-
- [1] Build 'nnfw'.
-
- After build, you can see an execution file 'nnfw/Product/armv7l-linux.debug/out/bin/nnpackage_run'.
- For how to build, check out the document [docs/nnfw/howto/CrossBuildForArm.md](../../../docs/nnfw/howto/CrossBuildForArm.md).
-
- [2] Install package 'libhdf5-cpp-100'.
- ```
- $ sudo apt install libhdf5-cpp-100
- ```
-
- [3] Run nnpackage.
- ```
- $ ./nnpackage_run add_test1
- ```
- Note that you need to put an whole 'add_test_1' directory,
- because 'nnpackage' means an archive, not a single file.
diff --git a/docs/nnfw/howto/HowToTestManualy.md b/docs/nnfw/howto/HowToTestManualy.md
deleted file mode 100644
index bb36cc67b..000000000
--- a/docs/nnfw/howto/HowToTestManualy.md
+++ /dev/null
@@ -1,62 +0,0 @@
-# How to test NNFW on single model/input pair
-
-1. Select backend through environment variables:
- * acl_cl: `export OP_BACKEND_ALLOPS=acl_cl`
- * acl_neon: `export OP_BACKEND_ALLOPS=acl_neon`
- * cpu: `export OP_BACKEND_ALLOPS=cpu`
- * different backends for different operations:
- ```
- unset OP_BACKEND_ALLOPS
- export OP_BACKEND_Conv2D=cpu
- export OP_BACKEND_MaxPool2D=acl_cl
- export OP_BACKEND_AvgPool2D=acl_neon
- ```
-
-2. Select executor through environment variable:
- * linear: `export EXECUTOR=Linear`
- * dataflow: `export EXECUTOR=Dataflow`
- * parallel: `export EXECUTOR=Parallel`
-
-## Test NNFW through NNAPI
-
-### Testing on random input
-1. Generate random input, get reference result using tflite interpreter, dump input and result into file:
- ```
- /path/to/tflite_run --tflite /path/to/model.tflite --dump /path/to/out.dat
- ```
-2. Inference with NNFW NNAPI and compare result with reference one:
- ```
- USE_NNAPI=1 /path/to/tflite_run --tflite /path/to/model.tflite ---compare /path/to/out.dat
- ```
-
-### Testing on particular input
-1. Prepare input:
-
- `tflite_run` consumes input as sequence of floats.
-
- For example, you could convert `.jpg` image into such format file with next python3 script:
- ```
- from PIL import Image
- import numpy as np
-
- img = Image.open("./image.jpg")
- np_img = np.array(img.getdata()).reshape(img.size[0], img.size[1], 3).astype(np.float32) / 255.
-
- with open('./converted_image.dat', 'wb') as f:
- for i in np_img.flatten('C'):
- f.write(i)
- ```
-
-2. Get reference result using tflite interpreter, dump input and result into file:
-
- ```
- /path/to/tflite_run --tflite /path/to/model.tflite --input /path/to/input.dat --dump /path/to/out.dat
- ```
-3. Inference with NNFW NNAPI and compare result with reference one:
- ```
- USE_NNAPI=1 /path/to/tflite_run --tflite /path/to/model.tflite ---compare /path/to/out.dat
- ```
-
-## Test NNFW through NNPackage
-
-TODO: fill this section when NNPackage will be implemented
diff --git a/docs/nnfw/howto/HowToUseDockerImage.md b/docs/nnfw/howto/HowToUseDockerImage.md
deleted file mode 100644
index 2c8d98f58..000000000
--- a/docs/nnfw/howto/HowToUseDockerImage.md
+++ /dev/null
@@ -1,154 +0,0 @@
-# How to use docker image of nnfw
-
-We have a docker image to build `nnfw` repo.
-
-This docker image is built from https://github.sec.samsung.net/STAR/nnfw/blob/master/infra/docker/Dockerfile and based on Ubuntu 16.04.
-And prebuilt docker image is available from Samsung private docker registry.
-
-This document describes how to use prebuilt docker image when developing `nnfw`.
-
-## How to install docker
-
-Follow [Installing Docker](https://docs.docker.com/)
-
-- For Ubuntu, follow [Installing Docker on Ubuntu](https://docs.docker.com/install/linux/docker-ce/ubuntu/)
-
-These are the actual steps to install using apt package manager:
-```
-$ sudo apt-get install \
- apt-transport-https \
- ca-certificates \
- curl \
- software-properties-common
-$ curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
-$ sudo apt-key fingerprint 0EBFCD88
-```
-```
-$ sudo add-apt-repository \
- "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
- $(lsb_release -cs) \
- stable"
-$ sudo apt-get update
-```
-```
-$ sudo apt-get install docker-ce
-```
-
-## Configure docker daemon
-
-1. Set HTTP/HTTPS proxy
-
- * For Ubuntu, follow [Setting HTTP/HTTPS proxy environment variables](https://docs.docker.com/v17.09/engine/admin/systemd/#httphttps-proxy)
-
-If you are behind an HTTP or HTTPS proxy server, you will need to add this configuration in the Docker systemd service file.
-These are the actual steps to set an HTTP/HTTPS proxy environment variable:
-```
-$ sudo mkdir -p /etc/systemd/system/docker.service.d
-$ sudo vi /etc/systemd/system/docker.service.d/http-proxy.conf
-```
-```
-[Service]
-Environment="HTTP_PROXY=http://10.112.1.184:8080/" "HTTPS_PROXY=https://10.112.1.184:8080/" "NO_PROXY=localhost,127.0.0.1"
-```
-```
-$ sudo systemctl daemon-reload
-$ sudo systemctl restart docker
-$ systemctl show --property=Environment docker
-```
-
-2. Edit configuration file of docker daemon
-
-First you have to add Samsung private docker reigstry to your docker daemon.
-Depending on your docker daemon installed, there are two ways of configuration.
-
-
-If there is a `/etc/default/docker`, please edit the file as below.
-```
-$ sudo vi /etc/default/docker
-
-DOCKER_OPTS="--insecure-registry npuci.mooo.com:5000"
-```
-
-If there is a `/etc/docker/daemon.json`, please edit the file as below.
-```
-{
- ...,
- "insecure-registries": [..., "npuci.mooo.com:5000"]
-}
-```
-
-3. Then restart docker daemon as below.
-
-```
-$ sudo service docker restart // Ubuntu 14.04
-
-or
-
-$ sudo systemctl restart docker // Ubuntu 16.04
-```
-
-## Install docker image of `nnfw`
-
-Let's pull docker image for `nnfw` repo and tag it to `nnas:latest`
-
-```
-$ docker pull npuci.mooo.com:5000/star/nnfw/nnas:latest
-$ docker tag npuci.mooo.com:5000/star/nnfw/nnas:latest nnas:latest
-```
-
-## Build docker image instead of pull
-
-You can build docker image in your environment instead of pull docker image from server.
-
-```
-$ cd nnfw
-$ ./nnas build-docker-image
-```
-
-Default docker image name is `nnas`. If you want to change image name, set environment variable `DOCKER_IMAGE_NAME`
-
-```
-$ cd nnfw
-$ DOCKER_IMAGE_NAME=nnas_test ./nnas build-docker-image
-```
-
-You can use options supported by `docker build` command (ex. `--network` or `--build-arg` option)
-
-In case of error with a message : 'Temporary failure resolving..', try to build with '--network host' option
-
-```
-$ cd nnfw
-$ ./nnas build-docker-image --network host --build-arg UBUNTU_MIRROR="kr.archive.ubuntu.com"
-```
-
-## Use docker image to build `neurun`
-Three different targets for `nnfw` can be built using docker image.
-
-1. Build `neurun` for `x86_64` target
-```
-$ cd nnfw
-$ docker run --rm -v $(pwd):/opt/nnfw -w /opt/nnfw nnas make install
-```
-or use `docker_build_test_x64.sh` for convenience as below.
-```
-$ cd nnfw
-$ ./infra/scripts/docker_build_test_x64.sh
-```
-You can find built artifacts at `nnfw/Product/x86_64-linux.debug`.
-
-2. Cross build `neurun` for ARM on x86_64 host
-
-You should prepare RootFS, following [Cross Building for ARM](./CrossBuildForArm.md) except ACL build and cross build steps. Then execute below commands. If your RootFS directory is different with below directory, change it to correct path and ensure the path is absolute.
-```
-$ cd nnfw
-$ ROOTFS_DIR=$(pwd)/tools/cross/rootfs/arm \
-./infra/scripts/docker_build_cross_arm_neurun.sh
-```
-You can find built artifacts at `nnfw/Product/armv7l-linux.debug/`.
-
-3. Build `neurun` for Tizen ARM package on x86_64 host
-```
-$ cd nnfw
-$ ./infra/scripts/docker_build_tizen_gbs.sh
-```
-You can find built artifacts at `Product/out/rpm`.
diff --git a/docs/nnfw/howto/HowToUseNNFWAPI.md b/docs/nnfw/howto/HowToUseNNFWAPI.md
deleted file mode 100644
index e09343275..000000000
--- a/docs/nnfw/howto/HowToUseNNFWAPI.md
+++ /dev/null
@@ -1,63 +0,0 @@
-# Prepare nnpackage
-
-## Convert tensorflow pb file to nnpackage
-Follow the [compiler guide](https://github.sec.samsung.net/STAR/nnfw/blob/master/docs/nncc/Release_2019/tutorial.md) to generate nnpackge from tensorflow pb file
-
-## Convert tflite file to nnpackage
-Please see [model2nnpkg](https://github.sec.samsung.net/STAR/nnfw/tree/master/tools/nnpackage_tool/model2nnpkg) for converting from tflite model file.
-
-# Build app with nnfw API
-
-Here are basic steps to build app with [nnfw C API](https://github.sec.samsung.net/STAR/nnfw/blob/master/runtime/neurun/api/include/nnfw.h)
-
-1) Initialize nnfw_session
-``` c
-nnfw_session *session = nullptr;
-nnfw_create_session(&session);
-```
-2) Load nnpackage
-``` c
-nnfw_load_model_from_file(session, nnpackage_path);
-```
-3) (Optional) Assign a specific backend to operations
-``` c
- // Use acl_neon backend for CONV_2D and acl_cl for otherwise.
- // Note that defalut backend is acl_cl
- nnfw_set_op_backend(session, "CONV_2D", "acl_neon");
-```
-
-4) Compilation
-``` c
- // Compile model
- nnfw_prepare(session);
-```
-
-5) Prepare Input/Output
-``` c
- // Prepare input. Here we just allocate dummy input arrays.
- std::vector<float> input;
- nnfw_tensorinfo ti;
- nnfw_input_tensorinfo(session, 0, &ti); // get first input's info
- uint32_t input_elements = num_elems(&ti);
- input.resize(input_elements);
- // TODO: Please add initialization for your input.
- nnfw_set_input(session, 0, ti.dtype, input.data(), sizeof(float) * input_elements);
-
- // Prepare output
- std::vector<float> output;
- nnfw_output_tensorinfo(session, 0, &ti); // get first output's info
- uint32_t output_elements = num_elems(&ti);
- output.resize(output_elements);
- nnfw_set_output(session, 0, ti.dtype, output.data(), sizeof(float) * output_elements);
-```
-6) Inference
-``` c
- // Do inference
- nnfw_run(session);
-```
-## Run Inference with app on the target devices
-reference app : [minimal app](https://github.sec.samsung.net/STAR/nnfw/blob/master/runtime/neurun/sample/minimal)
-
-```
-$ ./minimal path_to_nnpackage_directory
-```
diff --git a/docs/nnfw/howto/HowtoMakeSampleAppOnNnfw.md b/docs/nnfw/howto/HowtoMakeSampleAppOnNnfw.md
deleted file mode 100644
index d272a8390..000000000
--- a/docs/nnfw/howto/HowtoMakeSampleAppOnNnfw.md
+++ /dev/null
@@ -1,132 +0,0 @@
-# How to make a sample app on nnfw
-
-Our runtime `neurun` support `NNAPI` as interface currently. To use `NNAPI` efficiently, one of solution is to use tensorflow lite. We support additional library to help using tensorflow lite in `/libs/tflite`. (this library is not official support)
-
-To use tensorflow lite, you need to prepare tensorflow lite model file, and you should know input/output tensor name. Then write sample app.
-
-## Prepare loaded tensorflow lite model object
-
-You can select one of kernel register: tensorflow lite official kernel register or extended register (for pre-implemented custom op)
-```
-#include "tensorflow/lite/kernels/register.h"
-#include "tflite/ext/kernels/register.h"
-```
-
-To use tensorflow lite interpreter, need tensorflow lite interpreter session header
-```
-#include "tflite/InterpreterSession.h"
-```
-
-For NNAPI usage, need NNAPI session header
-```
-#include "tflite/NNAPISession.h"
-```
-
-Load the model object into `FlatBuffer`, create a tensorflow lite operator resolver `BuiltinOpResolver` and construct a tensorflow interpreter builder using them:
-```
-tflite::StderrReporter error_reporter;
-auto model = tflite::FlatBufferModel::BuildFromFile(model_file.c_str(), &error_reporter);
-
-// TODO: determine which BuiltinOpResolver and prepend namespace
-BuiltinOpResolver resolver;
-
-tflite::InterpreterBuilder builder(*model, resolver);
-```
-
-Create a tensorflow interpreter and init the builder using it:
-```
-std::unique_ptr<tflite::Interpreter> interpreter;
-builder(&interpreter);
-```
-
-Create a tensorflow lite session to use NNAPI:
-```
-std::shared_ptr<nnfw::tflite::Session> sess = std::make_shared<nnfw::tflite::NNAPISession>(interpreter.get());
-```
-
-If you want to use tensorflow lite interpreter instead of NNAPI, then:
-```
-std::shared_ptr<nnfw::tflite::Session> sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
-```
-
-`NNAPISession` constructs a computational graph from the interpreter and builds the model.
-
-## Prepare tensors memory allocation and model input for inference
-
-Allocate the memory for tensors of `tflite::Interpreter`:
-```
-sess->prepare();
-```
-
-Prepare inputs. How to prepare is out of scope and task specific.<br/>
-Copy the input data into model, i.e. into `interpreter->inputs`. This is tensorflow specific, not nnfw, so one can use any method, that is applicable to Tensorflow, e.g.:
-```
-for (const auto &id : interpreter->inputs())
-{
- if (interpreter->tensor(id)->name == input_name)
- {
- float *p = interpreter->tensor(id)->data.f;
-
- for (int y = 0; y < height; ++y)
- {
- for (int x = 0; x < width; ++x)
- {
- for (int c = 0; c < channel; ++c)
- {
- *p++ = data[y * width * channel + x * channel + c];
- }
- }
- }
- }
-}
-```
-where:<br/>
-`input_name` - name of the inputs of the model;<br/>
-`data` - source vector of size `height * width * channel`.
-
-## Run the inference and get outputs
-
-Run the inference
-```
-sess->run();
-```
-
-Get the result from `interpreter->outputs()`. This is tensorflow lite specific, not nnfw, so one can use any method, that is applicable to tensorflow lite, e.g.:
-```
-for (const auto &id : interpreter->outputs())
-{
- if (interpreter->tensor(id)->name == output_name)
- {
- float *p = interpreter->tensor(id)->data.f;
-
- for (int i = 0; i < result.capacity(); ++i)
- {
- result.push_back(p[i]);
- }
- }
-}
-```
-where:<br/>
-`output_name` - name of the outputs of the model;<br/>
-`result` - float vector, where to put output. Its size can be calculated using
-```
-for (const auto &id : interpreter->outputs())
-{
- if (interpreter->tensor(id)->name == output_name)
- {
- TfLiteTensor *t = interpreter->tensor(id);
- int v = 1;
- for (int i = 0; i < t->dims->size; ++i)
- {
- v *= t->dims->data[i];
- }
- return v;
- }
-}
-return -1;
-```
-
-Release the session
-```
-sess->teardown();
-```
diff --git a/docs/nnfw/howto/RemoteDebuggingForVSCode.md b/docs/nnfw/howto/RemoteDebuggingForVSCode.md
deleted file mode 100644
index c83a09bd5..000000000
--- a/docs/nnfw/howto/RemoteDebuggingForVSCode.md
+++ /dev/null
@@ -1,147 +0,0 @@
-# Remote Debugging for Visual Studio Code
-
-This document describes how to debug nnfw on arm devices using visual studio code.
-
-## Install gdb-multiarch on build host
-
-1. Install `gdb-multiarch`
-
-```bash
-$ sudo apt install gdb-multiarch
-```
-
-## Configure VS code on build host
-
-1. Install `Native Debug` extension on VS code
-
-2. Setup GDB environment on VS code
-
-- Debug -> Add configuration -> GDB: Connect to gdbserver
-- Change configuration as below
- - Change `<TARGET_IP>` to IP of your target
- - The default port number for gdbserver is 2345. You can change this number.
- - You can change `executable` configuration from `tflite_run` to other binaries you want to debug.
-
-```json
-{
- "version": "0.2.0",
- "configurations": [
- {
- "type": "gdb",
- "request": "attach",
- "name": "Attach to gdbserver",
- "gdbpath": "/usr/bin/gdb-multiarch",
- "executable": "./Product/armv7l-linux.debug/out/bin/tflite_run",
- "target": "<TARGET_IP>:2345",
- "remote": true,
- "printCalls": true,
- "cwd": "${workspaceRoot}",
- "valuesFormatting": "parseText"
- }
- ]
-}
-```
-
-## Install gdbserver and debugging symbols at target
-
-You need to setup a target device for remote debugging.
-
-1. Install `gdbserver`
-```bash
-$ sudo apt install gdbserver
-```
-
-2. Install `libc6-dbg` and copy debugging symbols
-```bash
-$ sudo apt install libc6-dbg
-$ sudo mkdir -p /lib/.debug
-$ sudo ln -s /usr/lib/debug/lib/arm-linux-gnueabihf/ld-2.27.so /lib/.debug
-```
-
-## Run remote debugging
-
-1. Start gdbserver on target
-
-```bash
-gdbserver --multi :<PORT> <BINARY_PATH> <EXECUTION_ARGUMENTS>
-```
-
-Example
-```bash
-gdbserver --multi :2345 Product/armv7l-linux.debug/out/bin/tflite_run ../models/slice_test.tflite
-```
-
-2. Connect to gdbserver using VS code
-
-- Setup breakpoints on any code you want.
-
-- Click F5 to start remote debugging.
-
-- Program will execute and exit if no breakpoint exists.
-
-## Optional: Setup rootfs on build host
-
-When debugging starts, `gdb` downloads shared libraries that nnfw uses from the target device.
-This process makes `gdb` to wait for shared library download to finish for every debugging start.
-
-To reduce shared library loading, you can setup an arm root file system on your build host and use it.
-
-1. Create arm root file system
-
-Following [CrossBuildForArm](docs/nnfw/howto/CrossBuildForArm.md) to create an arm root file system.
-
-You can use an arm root file system created for arm cross-compile.
-
-2. Install `libc6-dbg` on arm root file system
-
-`<ROOTF_DIR>` should point ARM root file system.
-
-Default path is `tools/cross/rootfs/arm` folder.
-
-```bash
-$ sudo chroot <ROOTFS_DIR>
-$ apt install libc6-dbg
-$ exit
-```
-
-3. Create symbolic link of nnfw on arm rootfs
-
-`gdb` will use source code folder at sysroot.
-
-```bash
-$ ln -s <NNFW_DIR> <ROOTFS_DIR>/<NNFW_DIR>
-```
-Example
-```bash
-$ ln -s /home/user/nnfw /home/user/nnfw/tools/cross/rootfs/arm/home/user/nnfw
-```
-
-4. Setup `.gdbinit` file on nnfw folder
-
-`gdb` will use `<ROOTFS_DIR>` to find arm related symbols.
-
-```bash
-set sysroot <ROOTFS_DIR>
-set debug-file-directory <ROOTFS_DIR>/usr/lib/debug
-```
-
-# Troubleshooting
-
-### Unable to open 'unordered_map.h'
-
-If you are using docker to build nnfw, you should download and decompress gcc-linaro at `/opt` folder
-
-```bash
-wget https://releases.linaro.org/components/toolchain/binaries/6.3-2017.02/arm-linux-gnueabihf/gcc-linaro-6.3.1-2017.02-x86_64_arm-linux-gnueabihf.tar.xz -O gcc-hardfp.tar.xz
-sudo tar -xf gcc-hardfp.tar.xz -C /opt/ && sudo rm -rf gcc-hardfp.tar.xz
-```
-
-### Skip STL files
-
-Step into (F11) will debug STL files such as `unordered_map` or `vector`.
-
-To skip those files from debugging, you can add below line to `.gdbinit` file.
-
-```bash
-skip -gfile /opt/gcc-linaro-6.3.1-2017.02-x86_64_arm-linux-gnueabihf/arm-linux-gnueabihf/include/c++/6.3.1/bits/*
-```
diff --git a/docs/nnfw/howto/device/xu3-dip.png b/docs/nnfw/howto/device/xu3-dip.png
deleted file mode 100644
index 59c0be3f2..000000000
--- a/docs/nnfw/howto/device/xu3-dip.png
+++ /dev/null
Binary files differ
diff --git a/docs/nnfw/howto/device/xu3_tizen.md b/docs/nnfw/howto/device/xu3_tizen.md
deleted file mode 100644
index 6473ab9a8..000000000
--- a/docs/nnfw/howto/device/xu3_tizen.md
+++ /dev/null
@@ -1,140 +0,0 @@
-# About
-
-This will describe how to flash microSD with Tizen-5.5 for ODroid XU3.
-
-Host environment is Ubuntu 18.04
-
-This document will explain the only on eMMC + XU3.
-
-# Download files
-
-## Images
-
-Boot
-- https://download.tizen.org/snapshots/tizen/unified/latest/images/standard/tv-boot-armv7l-odroidxu3/
-- download the biggest file
-
-Root FS
-- https://download.tizen.org/snapshots/tizen/unified/latest/images/standard/tv-wayland-armv7l-odroidu3/
-- download the biggest file
-
-U-Boot images
-```
-wget https://github.com/hardkernel/u-boot/raw/odroidxu3-v2012.07/sd_fuse/hardkernel_1mb_uboot/bl1.bin.hardkernel
-wget https://github.com/hardkernel/u-boot/raw/odroidxu3-v2012.07/sd_fuse/hardkernel_1mb_uboot/bl2.bin.hardkernel.1mb_uboot
-wget https://github.com/hardkernel/u-boot/raw/odroidxu3-v2012.07/sd_fuse/hardkernel_1mb_uboot/tzsw.bin.hardkernel
-```
-
-You also need `u-boot-mmc.bin` that is inside `tizen-unified_20180425.2_tv-boot-armv7l-odroidxu3.tar.gz` file.
-```
-tar xvf tizen-unified_20180425.2_tv-boot-armv7l-odroidxu3.tar.gz u-boot-mmc.bin
-```
-
-
-## Flashing script
-
-Download [sd_fusing_xu4.sh](https://git.tizen.org/cgit/platform/kernel/u-boot/plain/scripts/tizen/sd_fusing_xu4.sh?h=tizen)
-
-This file name has `xu4` but it works on also xu3.
-
-
-## Files
-
-```
-dragon@loki:~/Works/tizen/odroid-xu3/flashing$ ls -l
-total 1316
--rw-rw-r-- 1 dragon dragon 15616 9월 5 14:41 bl1.bin.hardkernel
--rw-rw-r-- 1 dragon dragon 14592 9월 5 14:41 bl2.bin.hardkernel.1mb_uboot
--rw-rw-r-- 1 dragon dragon 262144 9월 5 14:41 tzsw.bin.hardkernel
--rwxr-xr-x 1 dragon dragon 1048576 9월 4 15:17 u-boot-mmc.bin
-```
-
-# Flash
-
-Host environment
-- Ubuntu 18.04
-- eMMC connected through microUSB from xu3 to host
-
-## Flash boot files
-
-on target
-```
-...
-
-CPU: Exynos5422 @ 800 MHz
-
-Model: Odroid XU3 based on EXYNOS5422
-Board: Odroid XU3 based on EXYNOS5422
-Type: xu3
-DRAM: 2 GiB
-MMC: EXYNOS DWMMC: 0, EXYNOS DWMMC: 1
-In: serial
-Out: serial
-Err: serial
-Net: No ethernet found.
-Hit any key to stop autoboot: 0
-ODROID-XU3 #
-
-ODROID-XU3 # mmc list
-EXYNOS DWMMC: 0 (eMMC)
-EXYNOS DWMMC: 1
-
-ODROID-XU3 # ums 0 mmc 0
-
-UMS: LUN 0, dev 0, hwpart 0, sector 0x0, count 0x1d5a000
-
-/
-```
-
-then on host
-```
-$ sudo fdisk -l
-..........
-
-Partition table entries are not in disk order
-
-Disk /dev/sdh: 32.0 GB, 32010928128 bytes
-
-64 heads, 32 sectors/track, 30528 cylinders, total 62521344 sectors
-
-Units = sectors of 1 * 512 = 512 bytes
-
-Sector size (logical/physical): 512 bytes / 512 bytes
-
-I/O size (minimum/optimal): 512 bytes / 512 bytes
-
-Disk identifier: 0x00000000
-
-
-Device Boot Start End Blocks Id System
-
-/dev/sdh1 * 8192 139263 65536 e W95 FAT16 (LBA) ..........
-```
-
-```
-$ sudo ../sd_fusing_xu4.sh -d /dev/sdh --format \
- -b bl1.bin.hardkernel bl2.bin.hardkernel.1mb_uboot tzsw.bin.hardkernel u-boot-mmc.bin
-...
-```
-
-`--format` option will, 1) delete current partition 2) create new partition table, 3) format each partitions.
-
-- If you meet `./sd_fusing_xu4-u1604.sh: line 147: pv: command not found` message and want to remove this message, install pv package by `sudo apt-get install pv`
-
-## Flash image files
-```
-$ sudo ../sd_fusing_xu4.sh -d /dev/sdh \
- -b tizen-unified_20190905.1_tv-boot-armv7l-odroidxu3.tar.gz \
- tizen-unified_20190905.1_tv-wayland-armv7l-odroidxu3.tar.gz
-```
-
-# After boot
-
-Follow [xu4_tizen](xu4_tizen.md)
-
-# References
-
-- http://suprem.sec.samsung.net/confluence/display/KS/Odroid+XU3
-- http://suprem.sec.samsung.net/confluence/pages/viewpage.action?pageId=104635990
-- http://suprem.sec.samsung.net/confluence/pages/viewpage.action?spaceKey=TPLAB&title=XU3+Image+Flashing
-- http://download.tizen.org/snapshots/tizen/unified/latest/images/standard/
diff --git a/docs/nnfw/howto/device/xu3_ubuntu.md b/docs/nnfw/howto/device/xu3_ubuntu.md
deleted file mode 100644
index 38dbc69b0..000000000
--- a/docs/nnfw/howto/device/xu3_ubuntu.md
+++ /dev/null
@@ -1,114 +0,0 @@
-## How to setup XU3 with Ubuntu 16.04
-
-Ref: https://wiki.odroid.com/old_product/odroid-xu3/odroid-xu3
-
-MicroSD card images
-- https://dn.odroid.com/5422/ODROID-XU3/Ubuntu/
-
-Latest image (as of writing this file)
-- https://dn.odroid.com/5422/ODROID-XU3/Ubuntu/ubuntu-16.04.3-4.14-minimal-odroid-xu4-20171213.img.xz
-- Flash with `WinFlashTool`
-
-MicroSD boot DIP settings
-- ![image](xu3-dip.png)
-
-SW1-1,2 | 1st Boot media
--- | --
-ON ON | eMMC
-OFF ON | MicroSD card
-
-Boot
-- login with serial console
-- password: `root`/`odroid`
-
-Set ethernet
-`/etc/network/interfaces`
-```
-# interfaces(5) file used by ifup(8) and ifdown(8)
-# Include files from /etc/network/interfaces.d:
-source-directory /etc/network/interfaces.d
-
-auto lo eth0
-iface lo inet loopback
-
-iface eth0 inet static
- address 10.113.xxx.yyy
- netmask 255.255.255.0
- network 10.113.xxx.0
- broadcast 10.113.xxx.255
- gateway 10.113.xxx.1
- dns-nameservers 10.32.192.11 10.32.193.11 8.8.8.8
-```
-Change `xxx.yyy` to your IP address.
-
-Reboot and login with SSH
-
-### Add proxy settings
-
-Add `/etc/apt/apt.conf.d/90proxies`
-```
-Acquire::http::proxy "http://10.112.1.184:8080/";
-Acquire::https::proxy "http://10.112.1.184:8080/";
-Acquire::ftp::proxy "ftp://10.112.1.184:8080/";
-```
-
-Add `/etc/profile.d/proxy.sh`
-```
-#!/bin/bash
-
-# Proxy
-export HTTP_PROXY=http://10.112.1.184:8080/
-export HTTPS_PROXY=https://10.112.1.184:8080/
-```
-
-### Update and install programs
-
-```
-sudo apt-get update
-sudo apt-get upgrade
-sudo apt-get install vim nfs-common
-```
-
-### For convenience
-
-Edit `~/.profile`
-```
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:.
-```
-
-### MALI GPU driver
-
-https://developer.arm.com/products/software/mali-drivers/user-space
-
-Download at `Odroid-XU3` section
-- https://developer.arm.com/-/media/Files/downloads/mali-drivers/user-space/odroid-xu3/malit62xr12p004rel0linux1fbdev.tar.gz?revision=b4f9b859-ac02-408e-9729-c1e50d3a9c6c
-
-Extract and copy to `/usr/lib/fbdev`
-
-File list
-```
-$ll /usr/lib/fbdev/
-
-total 22520
-drwxr-xr-x 2 root root 4096 Feb 21 02:35 ./
-drwxr-xr-x 57 root root 4096 Feb 21 08:33 ../
-lrwxrwxrwx 1 root root 11 Feb 21 02:35 libEGL.so -> libEGL.so.1*
-lrwxrwxrwx 1 root root 10 Feb 21 02:35 libEGL.so.1 -> libmali.so*
-lrwxrwxrwx 1 root root 17 Feb 21 02:35 libGLESv1_CM.so -> libGLESv1_CM.so.1*
-lrwxrwxrwx 1 root root 10 Feb 21 02:35 libGLESv1_CM.so.1 -> libmali.so*
-lrwxrwxrwx 1 root root 14 Feb 21 02:35 libGLESv2.so -> libGLESv2.so.2*
-lrwxrwxrwx 1 root root 10 Feb 21 02:35 libGLESv2.so.2 -> libmali.so*
-lrwxrwxrwx 1 root root 14 Feb 21 02:35 libOpenCL.so -> libOpenCL.so.1*
-lrwxrwxrwx 1 root root 10 Feb 21 02:35 libOpenCL.so.1 -> libmali.so*
--rwxr-xr-x 1 root root 21471208 Feb 21 02:35 libmali.so*
--rwxr-xr-x 1 root root 1580048 Feb 21 02:35 liboffline_compiler_api.so*
-```
-
-Add `/etc/ld.so.conf.d/malifbdev.conf`
-```
-# arm mali
-/usr/lib/fbdev
-```
-
-Rename `arm-linux-gnueabihf_EGL.conf` to `arm-linux-gnueabihf_EGL.conf.not`
-- This is to disable mesa (software emulator of EGL)
diff --git a/docs/nnfw/howto/device/xu4_tizen.md b/docs/nnfw/howto/device/xu4_tizen.md
deleted file mode 100644
index a270bef1b..000000000
--- a/docs/nnfw/howto/device/xu4_tizen.md
+++ /dev/null
@@ -1,228 +0,0 @@
-# About
-
-This will describe how to flash microSD with Tizen for ODroid XU4.
-
-Tested host environment is Ubuntu 16.04, target environment is Tizen 5.5
-
-# Download files
-
-## Images
-
-Boot
-- https://download.tizen.org/snapshots/tizen/unified/latest/images/standard/tv-boot-armv7l-odroidxu3/
-- download the biggest file
-
-Root FS
-- https://download.tizen.org/snapshots/tizen/unified/latest/images/standard/tv-wayland-armv7l-odroidxu3/
-- download the biggest file
-
-If you cannot access directories `tv-boot-armv7l-odroidxu3` or `tv-wayland-armv7l-odroidxu3`, or cannot find images in those directories, go to https://download.tizen.org/snapshots/tizen/unified/ and find latest snapshot including images for Odroid-XU3.
-
-U-Boot images
-```
-wget https://github.com/hardkernel/u-boot/raw/odroidxu3-v2012.07/sd_fuse/hardkernel_1mb_uboot/bl1.bin.hardkernel
-wget https://github.com/hardkernel/u-boot/raw/odroidxu3-v2012.07/sd_fuse/hardkernel_1mb_uboot/bl2.bin.hardkernel.1mb_uboot
-wget https://github.com/hardkernel/u-boot/raw/odroidxu3-v2012.07/sd_fuse/hardkernel_1mb_uboot/tzsw.bin.hardkernel
-```
-
-## Flashing script
-
-Download `sd_fusing_xu4.sh` from https://git.tizen.org/cgit/platform/kernel/u-boot/plain/scripts/tizen/sd_fusing_xu4.sh?h=tizen
-
-This file works on Ubuntu 16.04 and 18.04
-
-Make it executable
-```
-chmod u+x sd_fusing_xu4.sh
-```
-
-
-## Files
-
-You should see like this
-```
--rw-r--r-- 1 hseok82 hseok82 15616 11월 5 13:56 bl1.bin.hardkernel
--rw-r--r-- 1 hseok82 hseok82 14592 11월 5 13:56 bl2.bin.hardkernel.1mb_uboot
--rwxrwxr-x 1 hseok82 hseok82 8040 11월 5 13:53 sd_fusing_xu4.sh
--rw-rw-r-- 1 hseok82 hseok82 10515369 11월 5 14:01 tizen-unified_20191105.1_tv-boot-armv7l-odroidxu3.tar.gz
--rw-rw-r-- 1 hseok82 hseok82 465487683 11월 5 14:01 tizen-unified_20191105.1_tv-wayland-armv7l-odroidxu3.tar.gz
--rw-r--r-- 1 hseok82 hseok82 262144 11월 5 13:56 tzsw.bin.hardkernel
-```
-
-# Flash
-
-Host environment
-- Ubuntu 16.04
-- microSD connected through USB Reader as `/dev/sdd` file.
-
-## Flash boot files and image files
-
-Give `--format` if it's a new flash memory.
-```
-sudo ./sd_fusing_xu4.sh --format \
--d /dev/sdd \
--b bl1.bin.hardkernel bl2.bin.hardkernel.1mb_uboot tzsw.bin.hardkernel \
-tizen-unified_20191105.1_tv-boot-armv7l-odroidxu3.tar.gz \
-tizen-unified_20191105.1_tv-wayland-armv7l-odroidxu3.tar.gz
-```
-Change `/dev/sdd` to your configuration.
-
-You will be asked to confirm format when used `--format`. Please type `y` to continue.
-```
-/dev/sdd will be formatted, Is it OK? [y/n]
-y
-```
-
-You can omit `--format` from the second time and followings.
-```
-sudo ./sd_fusing_xu4.sh \
--d /dev/sdd \
--b bl1.bin.hardkernel bl2.bin.hardkernel.1mb_uboot tzsw.bin.hardkernel \
-tizen-unified_20191105.1_tv-boot-armv7l-odroidxu3.tar.gz \
-tizen-unified_20191105.1_tv-wayland-armv7l-odroidxu3.tar.gz
-```
-`--format` option will, 1) delete current partition 2) create new partition table, 3) format each partitions.
-
-- If you meet `./sd_fusing_xu4.sh: line 147: pv: command not found` message and want to remove this message, install pv package by `sudo apt-get install pv`
-
-# Boot with Tizen
-
-Follow the steps
-
-Step 1.
-- Take out eMMC memory card if you have any
-
-Step 2.
-- Plug-In microSD with Tizen
-
-Step 3. Set boot switch
-- Refer https://wiki.odroid.com/odroid-xu4/hardware/hardware
-- Set `Boot mode selector` switch on the bottom of the board to `uSD`
-
-Step 4. Connect Serial Console port with USB of Host computer
-- Install `minicom`
-```
-sudo apt-get install minicom
-```
-- Add yourself to the group `dialout`
- - `sudo vi /etc/group`
-- Use serial terminal program like `minicom` (note that `/dev/ttyUSB1` might be different in your environment.)
-```
-minicom --baudrate 115200 --device /dev/ttyUSB1
-```
-- Use `CTRL-a z o` > `Serial port setup` to enter the dialog
-- Baud should be `115200-8N1`
-- Set configuration `Hardware Flow Control` to `No` to enable communication(keyboard typing..)
-- `Save setup as dfl` in configuration
-- If you are connecting from Windows or Mac my need to install the driver
- - https://www.silabs.com/products/development-tools/software/usb-to-uart-bridge-vcp-drivers
- - Use `PuTTY` for Windows.
-
-Step 5. Connect Power
-- You should see the boot logs...
-
-Step 6. Login root
-- login `root` pwd `tizen`
-
-# After boot
-
-## Slow down the fan speed
-
-If the fan noise is disturbing, you can slow down a little.
-
-```
-echo "100" > /sys/devices/platform/pwm-fan/hwmon/hwmon0/pwm1
-```
-This will slow down the speed to 100. Range is from 0 to 255. "0" to make it stop. "255" for maximum speed.
-This value resets automatically and after reboot so may have to set the value every time you reboot and when fan noise loud again.
-
-Other solution is changing cpu governors policy for big core to `ondemand`
-
-```
-echo ondemand | tee /sys/devices/system/cpu/cpu{0..7}/cpufreq/scaling_governor
-```
-
-## Remount root file system writable
-
-Default ROOT FS (except `/opt/usr`) is read-only. If you want to modify FS, you need to remount as wriable.
-
-```
-mount -o remount,rw /
-```
-
-This is resets after reboot so need to fix `/etc/fstab` when you want to mount FS with wriable on every boot
-
-## Wide console
-
-```
-stty cols 200
-```
-
-## Setting IP Address of Target Device
-
-Use `connmanctl`
-
-**CAUTION** PLEASE DO THIS IN YOUR TARGET DEVICE. RUNNING THIS IN YOUR HOST MAY DAMAGE.
-
-Step 1. Get the service name
-- You first need to connect Ethernet cable.
-```
-connmanctl services
-```
-Will drop something like this
-```
-*AR Wired ethernet_1a43230d5dfa_cable
-```
-
-Step 2. Use `config` to set the IP address
-```
-connmanctl config ethernet_1a43230d5dfa_cable --ipv4 manual 10.113.XXX.YYY 255.255.255.0 10.113.XXX.1
-connmanctl config ethernet_1a43230d5dfa_cable --nameservers 10.32.192.11 10.32.193.11
-```
-where `XXX.YYY` is your address for the target board.
-
-Setting for proxy can be done with connmanctl but don't know how to check.
-```
-connmanctl config ethernet_1a43230d5dfa_cable --proxy manual http://10.112.1.184:8080/
-```
-You can use environment variable but still don't know how to check.
-
-
-This information remains after reboot.
-
-# Connecting with SDB
-
-Default Tizen image has running SDBD in the device with default port (26101).
-
-In your Linux or Windows with `sdb` command,
-```
-sdb connect 10.113.XXX.YYY
-```
-Result will be something like
-```
-* Server is not running. Start it now on port 26099 *
-* Server has started successfully *
-connecting to 10.113.xxx.yyy:26101 ...
-connected to 10.113.xxx.yyy:26101
-```
-With `sdb devices`,
-```
-sdb devices
-List of devices attached
-10.113.xxx.yyy:26101 device xu3
-```
-It comes up with `xu3` as our `xu4` also uses same image `xu3` image.
-
-# (Optional) Install OpenCL
-
-To use arm compute CL backend, install OpenCL.
-You can get OpenCL for tizen in Tizen Mali DDK.
-
-# Known issue
-- `ls -al` of root folder shows strange output.
-
-# Reference
-- https://wiki.tizen.org/Quick_guide_for_odroidxu4
-- and the mail got from "김석원님"
-- https://magazine.odroid.com/wp-content/uploads/odroid-xu4-user-manual.pdf
- - https://magazine.odroid.com/odroid-xu4
diff --git a/docs/nnfw/howto/device/xu4_ubuntu.md b/docs/nnfw/howto/device/xu4_ubuntu.md
deleted file mode 100644
index 7b8a3aa2b..000000000
--- a/docs/nnfw/howto/device/xu4_ubuntu.md
+++ /dev/null
@@ -1,99 +0,0 @@
-## How to use XU4 with Ubuntu 16.04
-
-Ref: https://wiki.odroid.com/odroid-xu4/odroid-xu4
-
-eMMC card pre-installed Ubuntu 16.04
-
-Preparation for IO via serial cable
-- Refer to `minicom` section in xu4_tizen.md
-- To find the name of serial device, plug your odroid into your host machine and power it on. Then, run the following on your host:
- ```
- $ dmesg | grep tty
- [ 0.000000] console [tty0] enabled
- [322282.017985] usb 2-1: cp210x converter now attached to ttyUSB0
- ```
-- Use `CTRL-a z o` > `Serial port setup` to enter the dialog
-- Set configuration `Serial Device` to `/dev/ttyUSB0` for the name of serial device
-- Baud should be `115200-8N1`
-- Set configuration `Hardware Flow Control` to `No` to enable communication(keyboard typing..)
-
-Connect
-- Connect eMMC to bottom of the board
-- Connect Serial Console to Host USB
-- Connect power and boot
-
-Login with serial console. you can login with `root` or default `odroid` account
-- `root` password: `odroid`
-- `odroid `password: `odroid`
-
-Set ethernet
-`/etc/network/interfaces`
-```
-# interfaces(5) file used by ifup(8) and ifdown(8)
-# Include files from /etc/network/interfaces.d:
-source-directory /etc/network/interfaces.d
-
-auto lo eth0
-iface lo inet loopback
-
-iface eth0 inet static
- address 10.113.xxx.yyy
- netmask 255.255.255.0
- network 10.113.xxx.0
- broadcast 10.113.xxx.255
- gateway 10.113.xxx.1
- dns-nameservers 10.32.192.11 10.32.193.11 8.8.8.8
-```
-Change `xxx.yyy` to your IP address.
-
-Reboot and login with SSH
-
-### Add proxy settings
-
-Add `/etc/apt/apt.conf.d/90proxies`
-```
-Acquire::http::proxy "http://10.112.1.184:8080/";
-Acquire::https::proxy "http://10.112.1.184:8080/";
-Acquire::ftp::proxy "ftp://10.112.1.184:8080/";
-```
-
-Add `/etc/profile.d/proxy.sh`
-```
-#!/bin/bash
-
-# Proxy
-export HTTP_PROXY=http://10.112.1.184:8080/
-export HTTPS_PROXY=https://10.112.1.184:8080/
-```
-
-### Update and install programs
-
-```
-sudo apt-get update
-sudo apt-get upgrade
-sudo apt-get install vim nfs-common
-```
-
-### MALI GPU driver
-
-Driver files are pre-installed in eMMC as follows
-```
-odroid@odroid:/usr/lib/arm-linux-gnueabihf/mali-egl$ ll
-total 20136
-drwxr-xr-x 2 root root 4096 Aug 20 2017 ./
-drwxr-xr-x 106 root root 90112 Mar 26 08:32 ../
--rw-r--r-- 1 root root 38 Apr 30 2017 ld.so.conf
--rwxr-xr-x 1 root root 2752 Apr 30 2017 libEGL.so*
-lrwxrwxrwx 1 root root 9 Apr 30 2017 libEGL.so.1 -> libEGL.so*
-lrwxrwxrwx 1 root root 9 Apr 30 2017 libEGL.so.1.4 -> libEGL.so*
--rwxr-xr-x 1 root root 2752 Apr 30 2017 libGLESv1_CM.so*
-lrwxrwxrwx 1 root root 15 Apr 30 2017 libGLESv1_CM.so.1 -> libGLESv1_CM.so*
-lrwxrwxrwx 1 root root 15 Apr 30 2017 libGLESv1_CM.so.1.1 -> libGLESv1_CM.so*
--rwxr-xr-x 1 root root 2752 Apr 30 2017 libGLESv2.so*
-lrwxrwxrwx 1 root root 12 Apr 30 2017 libGLESv2.so.2 -> libGLESv2.so*
-lrwxrwxrwx 1 root root 12 Apr 30 2017 libGLESv2.so.2.0 -> libGLESv2.so*
--rwxr-xr-x 1 root root 20493444 May 8 2017 libmali.so*
--rwxr-xr-x 1 root root 2752 Apr 30 2017 libOpenCL.so*
-lrwxrwxrwx 1 root root 12 Apr 30 2017 libOpenCL.so.1 -> libOpenCL.so*
-lrwxrwxrwx 1 root root 12 Apr 30 2017 libOpenCL.so.1.1 -> libOpenCL.so*
-```
diff --git a/docs/nnfw/op_list.md b/docs/nnfw/op_list.md
deleted file mode 100644
index a19c0937a..000000000
--- a/docs/nnfw/op_list.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# List of Operations Supported by Runtime
-
-The list is based on commit 6f09c89f90216aed7df792.
-
-**Notice: There may be some restrictions on the support of each operation. Details will be updated soon.**
-
-
-| Operaion Name | acl_cl | acl_neon | srcn | cpu |
-| -------------------------- | --- | ----- | -- | --- |
-| Abs | O | O | | |
-| Add | O | O | O | O |
-| ArgMax | O | O | | |
-| AvgPool2D | O | O | | |
-| BatchToSpaceND | O | O | | |
-| Cast | O | O | | |
-| Comparison | O | O | | |
-| Concat | O | O | | O |
-| Conv2D | O | O | O | O |
-| Custom | | | | O |
-| DepthToSpace | O | O | | |
-| DepthwiseConv2D | O | O | O | O |
-| Dequantize | O | O | | |
-| Div | O | O | | |
-| EmbeddingLookup | O | O | | |
-| Exp | O | O | | |
-| Floor | O | O | | |
-| FullyConnected | O | O | | O |
-| Gather | O | O | | O |
-| HashtableLookup | O | O | | |
-| InstanceNorm | O | O | O | |
-| L2Normalization | O | O | | |
-| L2Pool2D | O | O | | |
-| LSTM | O | O | | |
-| LocalResponseNormalization | O | O | | |
-| LogicalAnd | O | O | | |
-| LogicalNot | O | O | | |
-| LogicalOr | O | O | | |
-| Logistic | O | O | | O |
-| Max | O | O | | |
-| MaxPool2D | O | O | | O |
-| Mean | O | O | | |
-| Min | O | O | | |
-| Mul | O | O | | O |
-| Neg | O | O | | |
-| PReLU | O | O | | |
-| Pack | O | O | | |
-| Pad | O | O | | O |
-| Permute | O | O | | O |
-| RNN | O | O | | |
-| RSQRT | O | O | | |
-| ReLU | O | O | | |
-| ReLU1 | O | O | | |
-| ReLU6 | O | O | | |
-| ReduceMax | O | O | | |
-| ReduceMin | O | O | | |
-| ReduceSum | O | O | | |
-| Reshape | O | O | | O |
-| ResizeBilinear | O | O | | |
-| SQRT | O | O | | |
-| Softmax | O | O | | O |
-| SpaceToBatchND | O | O | | |
-| SpaceToDepth | O | O | | |
-| Split | O | O | | |
-| SquaredDifference | O | O | | |
-| Squeeze | O | O | | O |
-| StridedSlice | O | O | | |
-| Sub | O | O | | O |
-| Tanh | O | O | | |
-| TopKV2 | O | | | |
-| Transpose | O | O | | |
-| TransposeConv | O | O | O | |
diff --git a/docs/nnfw/roadmap.md b/docs/nnfw/roadmap.md
deleted file mode 100644
index c04bab66b..000000000
--- a/docs/nnfw/roadmap.md
+++ /dev/null
@@ -1,76 +0,0 @@
-This document describes roadmap of 2019 NN Runtime (or _nnfw_) project.
-
-# Goal
-
-This project _nnfw_ aims at providing a high-performance, on-device neural network (NN) inference
-framework that performs inference of a given NN model on processors, such as CPU, GPU, or NPU, in
-the target platform, such as Tizen and Android.
-
-Last year in 2018, we already saw significant gains in accelerating with a single CPU or GPU
-back-end. Now we want to gain more benefits by using a mixture of CPU and GPU according to each
-operation characteristic. It could give us an opportunity to have a high degree of freedom in terms
-of operator coverage, and possibly provide better performance compared to single back-end
-acceleration.
-
-On the other hand, we are going to introduce a new compiler to the front-end. This will support a
-variety of deep learning frameworks in relatively spacious host PC environments, while the runtime
-running on the target device is intended to take a smaller burden. In this process, the compiler and
-the runtime will effectively share information among themselves by the Common IR, which is referred
-to as the NN Package.
-
-# Architecture
-
-![nnfw_architecture](./fig/nnfw_architecture.png)
-
-The figure above illustrates the overall architecture and scope of _nnfw_, along with _nncc_, a
-sibling project, to help understand. In this document, we will deal specifically with _nnfw_.
-
-The _nnfw_ can be divided into three parts which is NN API and NN Runtime, as well as NN Compute
-that is provided by the platform.
-
-1. NN API
- - Provide a common interface to application.
- - Last year, Android NN API was selected for seamless integration with TF Lite. As long as our
- NN runtime provides Android NN API as an interface, TF Lite can link to our NN runtime without
- any modification.
- - In choosing Android NN API, we expected standardization and rapid adoption. But the results
- were far less than that. We could not control its specifications, and its growth rate was too
- slow to accommodate our needs. So we try to define our own new one, NN Runtime API, in this
- year. (Once the new API is stable, we provide a way to replace the Android NN API and it will
- naturally be deprecated.)
-1. NN Runtime
- - It already provides significant performance improvements using CPU or GPU acceleration. Now we
- want to add the flexibility to this by providing various functions suitable to specific device
- configuration.
- - Mixed back-end acceleration enables various usage scenarios according to device-specific CPU
- or GPU configurations and usage conditions.
- - By introducing an interpreter, it will respond to dynamic conditions that the compiler can not
- handle, and will effectively utilize the memory through the memory manager.
-1. NN Compute
- - Provide computation acceleration library, such as ACL, or device driver for NPU.
- - This layer will be provided by OS platform, and we will use the library or device driver as it
- is. We may request a specific version to the Platform team, but we don't expect we will be
- modifying the library.
- - In this year, we will also introduce an extension mechanism to support custom operations on
- this part.
-
-# Deliverables
-
-- On-Device AI SW stack for Tizen
- + Advanced runtime support with interpreter, memory manager, and execution planner.
- + Provides back-end flexibility, such as CPU/GPU mixed acceleration
- + Well designed custom op support.
- + Basic infrastructure for NPU support.
-- Specification and implementation of Common IR and Runtime API
-
-# Milestones
-
-- [Project Milestones](https://github.sec.samsung.net/orgs/STAR/projects/1)
-- [Monthly Milestones](https://github.sec.samsung.net/STAR/nnfw/projects/25)
-
-# Workgroups (WGs)
-
-- We organize WGs for major topics, and each WG will be working on its own major topic by breaking
- it into small tasks/issues, performing them inside WG, and collaborating between WGs.
-- The WG information can be found [here](workgroups.md).
-
diff --git a/docs/nnfw/tests/Convolution_manual_3x3.xlsx b/docs/nnfw/tests/Convolution_manual_3x3.xlsx
deleted file mode 100644
index 7211f6ab3..000000000
--- a/docs/nnfw/tests/Convolution_manual_3x3.xlsx
+++ /dev/null
Binary files differ
diff --git a/docs/nnfw/tests/Softmax_manual.xlsx b/docs/nnfw/tests/Softmax_manual.xlsx
deleted file mode 100644
index 5ad4b8b2b..000000000
--- a/docs/nnfw/tests/Softmax_manual.xlsx
+++ /dev/null
Binary files differ
diff --git a/docs/release/release_note_1.0.0.md b/docs/release/release_note_1.0.0.md
deleted file mode 100644
index e5f58d1fa..000000000
--- a/docs/release/release_note_1.0.0.md
+++ /dev/null
@@ -1,65 +0,0 @@
-# NNAS 1.0.0 Release Note
-Welcome to the first release of NNAS !
-
-## Feature Highlights
-
-- `nnpackage` : package format for NNAS
-- `nncc` : compiler collection for converting neural network model to `nnpackage`
- - Currently supports 28 operations and 3 models
- - Model optimization
-- `nnfw` : on-device runtime for runnning `nnpackage` on multiple devices
- - Currently supports 63 operations
- - Heterogeneous Execution
- - (Experimental) Support custom operation
-
-## nnpackage
-`nnpackage` is our new package format for handling various formats easily in NNAS.
-
-Please refer to `nnpackage`'s [spec documentation](https://github.sec.samsung.net/STAR/nnfw/blob/master/nnpackage/spec) for the details.
-
-## nncc
-
-### Guide
-- Compilation tutorial : [inception_v3 compilation](https://github.sec.samsung.net/STAR/nnfw/blob/master/docs/nncc/v1.0.0/tutorial.md)
-- Detailed compilation guide : [getting started](https://github.sec.samsung.net/STAR/nnfw/blob/master/docs/nncc/v1.0.0/getting_started.md)
-
-### Supported Operations and Models
-
-#### Operations
-Compiler supports total [28 operations](https://github.sec.samsung.net/STAR/nnfw/blob/master/docs/nncc/v1.0.0/operation-list.md).
-
-#### Models
-_Note that compiler does not support quantized model(e.g. QASYMM8) yet._
-
-Officially, compiler supports the following models :
-- Inception V3 (FLOAT32 model)
-- MobileNet V1 (FLOAT32 model)
-- Style Transfer (FLOAT32 model)
-
-
-### Model Optimizations
-- Constant Folding
-- Remove dead operation
-- Remove `Identity`
-- Resolve duplicate `Reshape`
-- Resolve redundant `Reshape`
-- Merge `Concat`
-- Fuse some fusible operations
-
-## nnfw
-
-### Guide
-- User can run own app with nnpackage via [nnfw API](https://github.sec.samsung.net/STAR/nnfw/blob/master/runtime/neurun/api/include/nnfw.h). You can find a guide in [Usage guide](https://github.sec.samsung.net/STAR/nnfw/blob/master/docs/nnfw/howto).
-- For building an app with nnfw API, a minimal sample app is also provided at [minimal app](https://github.sec.samsung.net/STAR/nnfw/blob/master/runtime/neurun/sample/minimal).
-
-### Target Devices
-Runtime does not restrict which target devices your app can run on as far as our backends support the target devices. However, dev team uses [odroid-xu4](https://www.hardkernel.com/shop/odroid-xu4-special-price/) as a reference target. For setting odroid-xu4 board, you can find a guide on [arm Ubuntu guide](https://github.sec.samsung.net/STAR/nnfw/blob/master/docs/nnfw/howto/device/xu4_ubuntu.md) and [arm Tizen guide](https://github.sec.samsung.net/STAR/nnfw/blob/master/docs/nnfw/howto/device/xu4_tizen.md).
-
-### Supported Operations
-Runtime supports 63 NN operations. Note that operation coverage differs per backend. Please refer to [Runtime OP table](https://github.sec.samsung.net/STAR/nnfw/blob/master/docs/nnfw/op_list.md) for full list.
-
-### Heterogeneous Execution
-Runtime provides 4 backends : CPU, Compute Library OpenCL(acl_cl), Compute Library NEON(acl_neon), SRCN. Each backend has their own characteristic. In order to exploit the characteristic, runtime provides a way to assign specific backend at operation level. Please see `nnfw_set_op_backend` function of [nnfw API](https://github.sec.samsung.net/STAR/nnfw/blob/master/runtime/neurun/api/include/nnfw.h) for details. For concrete example, refer to [minimal app](https://github.sec.samsung.net/STAR/nnfw/blob/master/runtime/neurun/sample/minimal), please.
-
-### (Experimental) Custom Operator
-If your model has unsupported operation by runtime, you can still run this model with custom operator. Custom operator allows for users to provide your own implementation of such operations. For more details, refer to [custom operator documentation](https://github.sec.samsung.net/STAR/nnfw/blob/master/nnpackage/spec/30_custom_op.md). Note that this feature is experimental and subject to change.
diff --git a/docs/release/release_note_1.1.0.md b/docs/release/release_note_1.1.0.md
deleted file mode 100644
index f267c4cbe..000000000
--- a/docs/release/release_note_1.1.0.md
+++ /dev/null
@@ -1,40 +0,0 @@
-# NNAS 1.1.0 Release Note
-
-## Feature Highlights
-
-- `nncc`
- - Available for Tizen Studio in Windows
- - Available for Visual Studio in Windows
-- `nnfw`
- - Interpreter supports more operations
- - CPU Arithmetic kernels support broadcasing
- - Fully Connected Operation supports hybrid quantization
-
-
-## nncc
-
-### Available for Tizen Studio in Windows
-We now support `nncc` in Tizen Studio as plugin. For detailed information and simple tutorial, please refer to [NNCC Installation Guide for Tizen Studio](../nncc/v1.1.0/nncc_in_tizen_studio).
-
-#### Known Issues
-- Output directory of nnpackage is fixed to `res/shared`.
-
-### Available for Visual Studio in Windows
-We now support `nncc` in Visual Studio as Tizen extension program. For detailed information and simple tutorial, please refer to [NNCC Installation Guide for Visual Studio](../nncc/v1.1.0/nncc_in_visual_studio).
-
-#### Known Issues
-- `nncc` in Visual Studio extension program only accepts `model.pb` and `model.info` in `model` folder.
- - If user want to create nnpackage using `model2.pb` and `model2.info`, user should change the names as `model.pb` and `model.info` first.
-- Output directory of nnpackage is fixed to `res/shared/model`.
-
-## nnfw
-
- ### The following operations are supported on Interpreter :
- - Activation : Relu, Relu1, Relu6, Tanh
- - Logistics
- - Gather
- - Instance Normalization
- - Transpose Convolution
- ### CPU Arithmetic kernels support broadcasing
- ### Fully Connected Operation supports hybrid quantization
- Note that this support is only for acl_neon backend. See [hybrid quatization document](https://www.tensorflow.org/lite/performance/post_training_quantization#weight_quantization) for more about hybrid quantization. \ No newline at end of file
diff --git a/docs/release/release_note_1.4.0.md b/docs/release/release_note_1.4.0.md
new file mode 100644
index 000000000..b061e17cf
--- /dev/null
+++ b/docs/release/release_note_1.4.0.md
@@ -0,0 +1,23 @@
+# ONE 1.4.0 Release Note
+
+## Feature Highlights
+
+- ONE runtime(a.k.a `onert`)
+ - CPU backend supports more operations
+ - (Experimental) Android package is released
+
+## ONE Runtime
+
+#### CPU backend supports more operations
+
+The following operations are supported on CPU backend :
+- Rsqrt
+- Abs
+- Sin
+- StrideSlice
+- Shape
+
+#### (Experimental) Android package is released
+
+- For building onert from scratch, read [CrossBuildForAndroid.md](https://github.com/Samsung/ONE/blob/master/docs/nnfw/howto/CrossBuildForAndroid.md).
+- For developing application with onert, please read [HowToUseONEAPI.md](https://github.com/Samsung/ONE/blob/master/docs/nnfw/howto/CrossBuildForAndroid.md).
diff --git a/infra/cmake/modules/ExternalBuildTools.cmake b/infra/cmake/modules/ExternalBuildTools.cmake
index e76cd6bc3..4f2027b4b 100644
--- a/infra/cmake/modules/ExternalBuildTools.cmake
+++ b/infra/cmake/modules/ExternalBuildTools.cmake
@@ -63,7 +63,12 @@ function(ExternalBuild_CMake)
message(FATAL_ERROR "${ARG_PKG_NAME} Package: Build failed (check '${BUILD_LOG_PATH}' for details)")
endif(NOT BUILD_EXITCODE EQUAL 0)
- execute_process(COMMAND ${CMAKE_COMMAND} --build . -- install
+ set(NUM_BUILD_THREADS 1)
+ if(DEFINED EXTERNALS_BUILD_THREADS)
+ set(NUM_BUILD_THREADS ${EXTERNALS_BUILD_THREADS})
+ endif(DEFINED EXTERNALS_BUILD_THREADS)
+
+ execute_process(COMMAND ${CMAKE_COMMAND} --build . -- -j${NUM_BUILD_THREADS} install
OUTPUT_FILE ${INSTALL_LOG_PATH}
ERROR_FILE ${INSTALL_LOG_PATH}
WORKING_DIRECTORY ${ARG_BUILD_DIR}
diff --git a/infra/cmake/packages/ARMComputeConfig.cmake b/infra/cmake/packages/ARMComputeConfig.cmake
index f014f3eac..83c8d20f6 100644
--- a/infra/cmake/packages/ARMComputeConfig.cmake
+++ b/infra/cmake/packages/ARMComputeConfig.cmake
@@ -101,6 +101,7 @@ function(_ARMCompute_Build ARMCompute_INSTALL_PREFIX)
find_program(SCONS_PATH scons)
if(NOT SCONS_PATH)
+ message(WARNING "SCONS NOT FOUND. Please install SCONS to build ARMCompute.")
return()
endif(NOT SCONS_PATH)
@@ -152,7 +153,7 @@ function(_ARMCompute_Build ARMCompute_INSTALL_PREFIX)
# Copy externals/SConstruct to externals/acl/ for Tizen build support.
# TODO The change of externals/SConstruct should be upstreamed to ARM Compute Library community layer.
- execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_LIST_DIR}/ARMCompute/SConstruct" "${ARMComputeSource_DIR}")
+ # execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_LIST_DIR}/ARMCompute/SConstruct" "${ARMComputeSource_DIR}")
# Build ARMCompute libraries with SCONS
# NOTE ARMCompute SConstruct unconditioanlly appends "arm-linux-gnueabihf-" prefix for linux
diff --git a/infra/cmake/packages/ARMComputeSourceConfig.cmake b/infra/cmake/packages/ARMComputeSourceConfig.cmake
index 2720bf7da..45d8727d4 100644
--- a/infra/cmake/packages/ARMComputeSourceConfig.cmake
+++ b/infra/cmake/packages/ARMComputeSourceConfig.cmake
@@ -3,7 +3,7 @@ function(_ARMComputeSource_import)
nnas_include(OptionTools)
envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
- set(ARMCOMPUTE_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/ComputeLibrary/archive/v19.05.tar.gz)
+ set(ARMCOMPUTE_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/ComputeLibrary/archive/v19.11.1.tar.gz)
ExternalSource_Get(ARMCOMPUTE ${DOWNLOAD_ARMCOMPUTE} ${ARMCOMPUTE_URL})
set(ARMComputeSource_DIR ${ARMCOMPUTE_SOURCE_DIR} PARENT_SCOPE)
diff --git a/infra/cmake/packages/BoostConfig.cmake b/infra/cmake/packages/BoostConfig.cmake
index 0023b6b20..c4d7d5857 100644
--- a/infra/cmake/packages/BoostConfig.cmake
+++ b/infra/cmake/packages/BoostConfig.cmake
@@ -27,8 +27,16 @@ function(_Boost_Build Boost_PREFIX)
set(JAM_FILENAME ${BoostBuild_DIR}/user-config.jam)
- file(WRITE ${JAM_FILENAME} "using gcc : local : ${CMAKE_CXX_COMPILER} ;\n")
- list(APPEND Boost_Options toolset=gcc-local)
+ if(ANDROID)
+ set(NDK_CXX ${NDK_DIR}/toolchains/llvm/prebuilt/linux-x86_64/bin/${TARGET_ARCH}-linux-android${ANDROID_API_LEVEL}-clang++)
+ file(WRITE ${JAM_FILENAME} "using clang : arm64v8a : ${NDK_CXX} ;")
+ list(APPEND Boost_Options toolset=clang-arm64v8a)
+ # without target-os=android, it complains it cannot find -lrt.
+ list(APPEND Boost_Options target-os=android)
+ else()
+ file(WRITE ${JAM_FILENAME} "using gcc : local : ${CMAKE_CXX_COMPILER} ;\n")
+ list(APPEND Boost_Options toolset=gcc-local)
+ endif(ANDROID)
# Install Boost libraries
execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory "${BoostInstall_DIR}")
@@ -38,9 +46,13 @@ function(_Boost_Build Boost_PREFIX)
endfunction(_Boost_Build)
# Find pre-installed boost library and update Boost variables.
-find_package(Boost 1.58.0 QUIET COMPONENTS log program_options filesystem system)
-if(Boost_FOUND)
- return()
+if (NOT BUILD_BOOST)
+ # BoostConfig.cmake does not honor QUIET argument at least till cmake 1.70.0.
+ # Thus, don't try to find_package if you're not entirely sure you have boost.
+ find_package(Boost 1.58.0 QUIET COMPONENTS log program_options filesystem system)
+ if(Boost_FOUND)
+ return()
+ endif()
endif()
set(Boost_PREFIX ${CMAKE_INSTALL_PREFIX})
@@ -53,6 +65,13 @@ if(BUILD_BOOST)
# in ${Boost_PREFIX} directory
list(APPEND CMAKE_PREFIX_PATH "${Boost_PREFIX}")
+ # Without Boost_INCLUDE_DIR, it complains the variable is missing during find_package.
+ set(Boost_INCLUDE_DIR ${CMAKE_INSTALL_PREFIX}/include)
+
+ # 1) without static build, it will complain it cannot find libc++_shared.so.
+ # 2) We uses static libraries for other libraries.
+ set(Boost_USE_STATIC_LIBS ON)
+
# We built boost library so update Boost variables.
find_package(Boost 1.58.0 QUIET COMPONENTS log program_options filesystem system)
endif(BUILD_BOOST)
diff --git a/infra/cmake/packages/EigenConfig.cmake b/infra/cmake/packages/EigenConfig.cmake
index 424db1841..18aee13d7 100644
--- a/infra/cmake/packages/EigenConfig.cmake
+++ b/infra/cmake/packages/EigenConfig.cmake
@@ -9,6 +9,8 @@ function(_Eigen_import)
if(NOT TARGET eigen)
add_library(eigen INTERFACE)
target_include_directories(eigen INTERFACE "${EigenSource_DIR}")
+ # Add EIGEN_MPL2_ONLY to remove license issue posibility
+ target_compile_definitions(eigen INTERFACE EIGEN_MPL2_ONLY)
endif(NOT TARGET eigen)
set(EigenSource_FOUND TRUE PARENT_SCOPE)
diff --git a/infra/cmake/packages/NNPACKSourceConfig.cmake b/infra/cmake/packages/NNPACKSourceConfig.cmake
index 68c823443..10a08ea7c 100644
--- a/infra/cmake/packages/NNPACKSourceConfig.cmake
+++ b/infra/cmake/packages/NNPACKSourceConfig.cmake
@@ -1,7 +1,7 @@
function(_NNPACKSource_import)
if(NOT DOWNLOAD_NNPACK)
set(NNPACKSource_FOUND FALSE PARENT_SCOPE)
- message(WARN "NNPACK not downloaded")
+ message(WARNING "NNPACK not downloaded")
return()
endif(NOT DOWNLOAD_NNPACK)
diff --git a/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake
new file mode 100644
index 000000000..0d2a95056
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake
@@ -0,0 +1,18 @@
+function(_TensorFlowSource_import)
+ if(NOT DOWNLOAD_TENSORFLOW)
+ set(TensorFlowSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_TENSORFLOW)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ envoption(TENSORFLOW_2_1_0_URL https://github.com/tensorflow/tensorflow/archive/v2.1.0.tar.gz)
+
+ ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.1.0 ${TENSORFLOW_2_1_0_URL})
+
+ set(TensorFlowSource_DIR ${TENSORFLOW_SOURCE_DIR} PARENT_SCOPE)
+ set(TensorFlowSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowSource_import)
+
+_TensorFlowSource_import()
diff --git a/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfigVersion.cmake
new file mode 100644
index 000000000..80f43dde8
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.1.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowVersionChecker.c b/infra/cmake/packages/TensorFlowVersionChecker.c
index 6161ef74a..3759c2d56 100644
--- a/infra/cmake/packages/TensorFlowVersionChecker.c
+++ b/infra/cmake/packages/TensorFlowVersionChecker.c
@@ -1,3 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
#include <string.h>
#include <tensorflow/c/c_api.h>
diff --git a/infra/command/format b/infra/command/format
index cc1fc0959..9fe475371 100644
--- a/infra/command/format
+++ b/infra/command/format
@@ -2,7 +2,56 @@
INVALID_EXIT=0
FILES_TO_CHECK=()
+DIRECTORIES_TO_BE_TESTED=()
DIRECTORIES_NOT_TO_BE_TESTED=()
+CLANG_FORMAT_CANDIDATES=()
+PATCH_FILE=format.patch
+CHECK_DIFF_ONLY="0"
+CHECK_STAGED_ONLY="0"
+
+function Usage()
+{
+ echo "Usage: $0 $(basename ${BASH_SOURCE[0]}) [OPTIONS] [<file|dir> ...]"
+ echo "If no arguments are specified, it formats all nnas codes"
+ echo "If <file>s are given, it reformats the files"
+ echo ""
+ echo "Options:"
+ echo " --clang-format <TOOL> clang format bin (default: clang-format-3.9, clang-format)"
+ echo " --diff-only check diff files with master"
+ echo " --staged-only check git staged files"
+}
+
+while [[ $# -gt 0 ]]
+do
+ arg="$1"
+ case $arg in
+ -h|--help|help)
+ Usage
+ exit 0
+ ;;
+ --clang-format)
+ CLANG_FORMAT_CANDIDATES=($2)
+ shift 2
+ ;;
+ --clang-format=*)
+ CLANG_FORMAT_CANDIDATES=(${1#*=})
+ shift
+ ;;
+ --staged-only)
+ CHECK_STAGED_ONLY="1"
+ CHECK_DIFF_ONLY="1"
+ shift
+ ;;
+ --diff-only)
+ CHECK_DIFF_ONLY="1"
+ shift
+ ;;
+ *)
+ DIRECTORIES_TO_BE_TESTED+=($1)
+ shift
+ ;;
+ esac
+done
function pushd () {
command pushd "$@" > /dev/null
@@ -29,6 +78,12 @@ function check_newline() {
for f in ${FILES_TO_FIX[@]}; do
tr '\r' '\n' < $f > $f.fixed && cat $f.fixed > $f && rm $f.fixed
done
+ # Check no new line at end of file
+ for f in ${FILES_TO_CHECK[@]}; do
+ if diff /dev/null "$f" | tail -1 | grep '^\\ No newline' > /dev/null; then
+ echo >> "$f"
+ fi
+ done
}
function check_permission() {
@@ -58,13 +113,13 @@ function check_cpp_files() {
return
fi
- CLANG_FORMAT_CANDIDATES=()
- CLANG_FORMAT_CANDIDATES+=("clang-format")
CLANG_FORMAT_CANDIDATES+=("clang-format-3.9")
+ CLANG_FORMAT_CANDIDATES+=("clang-format")
for CLANG_FORMAT_CANDIDATE in ${CLANG_FORMAT_CANDIDATES[@]}; do
if command_exists ${CLANG_FORMAT_CANDIDATE} ; then
CLANG_FORMAT="${CLANG_FORMAT_CANDIDATE}"
+ break
fi
done
@@ -139,15 +194,15 @@ function check_python_files() {
pushd ${NNAS_PROJECT_PATH}
-if [ -n "$(git diff)" ]; then
+if [[ -n "$(git diff)" ]] && { [[ "${CHECK_DIFF_ONLY}" != "1" ]] || [[ "${CHECK_STAGED_ONLY}" != "1" ]]; }; then
echo "[WARNING] Commit all the changes before running format check"
- echo " format.patch file will contain unstaged files"
+ echo " ${PATCH_FILE} file will contain unstaged files"
fi
__Check_CPP=${CHECK_CPP:-"1"}
__Check_PYTHON=${CHECK_PYTHON:-"1"}
-FILES_TO_CHECK=$(git ls-files -c --exclude-standard)
+FILES_TO_CHECK=$(git ls-files -c --exclude-standard ${DIRECTORIES_TO_BE_TESTED[@]})
if [[ "${CHECK_DIFF_ONLY}" = "1" ]]; then
MASTER_EXIST=$(git rev-parse --verify master)
CURRENT_BRANCH=$(git branch | grep \* | cut -d ' ' -f2-)
@@ -157,7 +212,11 @@ if [[ "${CHECK_DIFF_ONLY}" = "1" ]]; then
elif [[ "${CURRENT_BRANCH}" = "master" ]]; then
echo "Current branch is master"
else
- FILES_TO_CHECK=$(git diff --name-only --diff-filter=d HEAD~${DIFF_COMMITS})
+ if [[ "${CHECK_STAGED_ONLY}" = "1" ]]; then
+ FILES_TO_CHECK=$(git diff --staged --name-only --diff-filter=d)
+ else
+ FILES_TO_CHECK=$(git diff --name-only --diff-filter=d HEAD~${DIFF_COMMITS})
+ fi
fi
fi
@@ -170,7 +229,13 @@ check_permission
check_cpp_files
check_python_files
-DIFF=$(git diff | tee format.patch)
+if [[ "${CHECK_DIFF_ONLY}" = "1" ]] && [[ "${CHECK_STAGED_ONLY}" = "1" ]]; then
+ if [[ ! -z "${FILES_TO_CHECK}" ]]; then
+ DIFF=$(git diff ${FILES_TO_CHECK} | tee ${PATCH_FILE})
+ fi
+else
+ DIFF=$(git diff | tee ${PATCH_FILE})
+fi
popd
@@ -186,9 +251,9 @@ if [[ ! -z "${CRCHECK}" ]]; then
echo "${CRCHECK}"
fi
-if [[ ${PATCHFILE_SIZE} -ne 0 ]]; then
+if [[ -s ${PATCH_FILE} ]]; then
echo "[FAILED] Format checker failed and update code to follow convention."
- echo " You can find changes in format.patch"
+ echo " You can find changes in ${PATCH_FILE}"
fi
if [[ ${INVALID_EXIT} -ne 0 ]]; then
diff --git a/infra/command/gen-coverage-report b/infra/command/gen-coverage-report
index 5d3552422..c3a8202e7 100644
--- a/infra/command/gen-coverage-report
+++ b/infra/command/gen-coverage-report
@@ -70,6 +70,10 @@ done
"${LCOV_PATH}" -r "${EXTRACTED_COVERAGE_INFO_PATH}" -o "${EXCLUDED_COVERAGE_INFO_PATH}" \
'*.test.cpp'
+# Exclude flatbuffer generated files from coverage report
+"${LCOV_PATH}" -r "${EXTRACTED_COVERAGE_INFO_PATH}" -o "${EXCLUDED_COVERAGE_INFO_PATH}" \
+ '*_schema_generated.h'
+
# Final coverage data
cp -v ${EXCLUDED_COVERAGE_INFO_PATH} ${COVERAGE_INFO_PATH}
diff --git a/infra/command/install-githooks b/infra/command/install-githooks
index e624aa6d7..909a16542 100644
--- a/infra/command/install-githooks
+++ b/infra/command/install-githooks
@@ -1,15 +1,65 @@
#!/usr/bin/env bash
+function Usage()
+{
+ echo "Usage: $0 $(basename ${BASH_SOURCE[0]}) [<OPTIONS>]"
+ echo ""
+ echo "Options:"
+ echo " --no-pre-push don't install pre-push hook"
+ echo " --no-pre-commit don't install pre-commit hook"
+}
+
+SKIP_PREPUSH_INSTALL="0"
+SKIP_PRECOMMIT_INSTALL="0"
+
+while [[ $# -gt 0 ]]
+do
+ arg="$1"
+ case $arg in
+ -h|--help|help)
+ Usage
+ exit 1
+ ;;
+ --no-pre-push)
+ SKIP_PREPUSH_INSTALL="1"
+ shift
+ ;;
+ --no-pre-commit)
+ SKIP_PRECOMMIT_INSTALL="1"
+ shift
+ ;;
+ *)
+ echo "ERROR: invalid option"
+ exit 255
+ ;;
+ esac
+done
+
REPO_HOOKS_PATH=$NNAS_PROJECT_PATH/infra/git-hooks
GIT_HOOKS_PATH=$NNAS_PROJECT_PATH/.git/hooks
-# Create symbolic links to hooks dir
-if [ -e $GIT_HOOKS_PATH/pre-push ]; then
- echo "Backup old $GIT_HOOKS_PATH/pre-push to $GIT_HOOKS_PATH/pre-push~"
- mv -v $GIT_HOOKS_PATH/pre-push $GIT_HOOKS_PATH/pre-push~
-elif [ -h $GIT_HOOKS_PATH/pre-push ]; then
- ls -l $GIT_HOOKS_PATH/pre-push
- echo "Remove broken symlink $GIT_HOOKS_PATH/pre-push"
- rm -v $GIT_HOOKS_PATH/pre-push
+if [ $SKIP_PREPUSH_INSTALL == "0" ]; then
+ # Create symbolic links to hooks dir
+ if [ -e $GIT_HOOKS_PATH/pre-push ]; then
+ echo "Backup old $GIT_HOOKS_PATH/pre-push to $GIT_HOOKS_PATH/pre-push~"
+ mv -v $GIT_HOOKS_PATH/pre-push $GIT_HOOKS_PATH/pre-push~
+ elif [ -h $GIT_HOOKS_PATH/pre-push ]; then
+ ls -l $GIT_HOOKS_PATH/pre-push
+ echo "Remove broken symlink $GIT_HOOKS_PATH/pre-push"
+ rm -v $GIT_HOOKS_PATH/pre-push
+ fi
+ ln -sv $REPO_HOOKS_PATH/pre-push.sh $GIT_HOOKS_PATH/pre-push
+fi
+
+if [ $SKIP_PRECOMMIT_INSTALL == "0" ]; then
+ # Create symbolic links to hooks dir
+ if [ -e $GIT_HOOKS_PATH/pre-commit ]; then
+ echo "Backup old $GIT_HOOKS_PATH/pre-commit to $GIT_HOOKS_PATH/pre-commit~"
+ mv -v $GIT_HOOKS_PATH/pre-commit $GIT_HOOKS_PATH/pre-commit~
+ elif [ -h $GIT_HOOKS_PATH/pre-commit ]; then
+ ls -l $GIT_HOOKS_PATH/pre-commit
+ echo "Remove broken symlink $GIT_HOOKS_PATH/pre-commit"
+ rm -v $GIT_HOOKS_PATH/pre-commit
+ fi
+ ln -sv $REPO_HOOKS_PATH/pre-commit.sh $GIT_HOOKS_PATH/pre-commit
fi
-ln -sv $REPO_HOOKS_PATH/pre-push.sh $GIT_HOOKS_PATH/pre-push
diff --git a/infra/docker/Dockerfile b/infra/docker/Dockerfile
index 3b11e3845..e675b53ad 100644
--- a/infra/docker/Dockerfile
+++ b/infra/docker/Dockerfile
@@ -21,7 +21,7 @@ RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoo
RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
# Additonal tools
-RUN apt-get update && apt-get -qqy install doxygen graphviz wget unzip clang-format-3.9 python3 python3-pip hdf5-tools pylint
+RUN apt-get update && apt-get -qqy install doxygen graphviz wget unzip clang-format-3.9 python3 python3-pip python3-venv hdf5-tools pylint
RUN pip3 install yapf==0.22.0 numpy
# Install google test (source)
diff --git a/infra/docker/Dockerfile.1804 b/infra/docker/Dockerfile.1804
index 856295760..fc6fc9a1a 100644
--- a/infra/docker/Dockerfile.1804
+++ b/infra/docker/Dockerfile.1804
@@ -12,7 +12,7 @@ RUN if [ -n "$UBUNTU_MIRROR" ] ; then sed "s/archive.ubuntu.com/${UBUNTU_MIRROR}
RUN apt-get update && apt-get -qqy install software-properties-common
# Build tool
-RUN apt-get update && apt-get -qqy install build-essential cmake scons git lcov g++-7-arm-linux-gnueabihf g++-7-aarch64-linux-gnu
+RUN apt-get update && apt-get -qqy install build-essential cmake scons git lcov g++-arm-linux-gnueabihf g++-aarch64-linux-gnu
# Install extra dependencies (Caffe, nnkit)
RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoogle-glog-dev libatlas-base-dev libhdf5-dev
@@ -21,7 +21,7 @@ RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoo
RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
# Additonal tools
-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qqy install doxygen graphviz wget unzip clang-format-3.9 python3 python3-pip hdf5-tools pylint
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qqy install doxygen graphviz wget unzip clang-format-3.9 python3 python3-pip python3-venv hdf5-tools pylint
RUN pip3 install yapf==0.22.0 numpy
# Install google test (source)
diff --git a/infra/doxygen/Doxyfile b/infra/doxygen/Doxyfile
index 844c441b2..0dc6fdfff 100644
--- a/infra/doxygen/Doxyfile
+++ b/infra/doxygen/Doxyfile
@@ -1487,7 +1487,7 @@ DISABLE_INDEX = NO
# The default value is: NO.
# This tag requires that the tag GENERATE_HTML is set to YES.
-GENERATE_TREEVIEW = NO
+GENERATE_TREEVIEW = YES
# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
# doxygen will group on one line in the generated HTML documentation.
diff --git a/infra/git-hooks/pre-commit.sh b/infra/git-hooks/pre-commit.sh
new file mode 100755
index 000000000..127245da1
--- /dev/null
+++ b/infra/git-hooks/pre-commit.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+# An example hook script to verify what is about to be pushed. Called by "git
+# push" after it has checked the remote status, but before anything has been
+# pushed. If this script exits with a non-zero status nothing will be pushed.
+#
+# This hook is called with the following parameters:
+#
+# $1 -- Name of the remote to which the push is being done
+# $2 -- URL to which the push is being done
+#
+# If pushing without using a named remote those arguments will be equal.
+#
+# Information about the commits which are being pushed is supplied as lines to
+# the standard input in the form:
+#
+# <local ref> <local sha1> <remote ref> <remote sha1>
+#
+# This sample shows how to prevent push of commits where the log message starts
+# with "WIP" (work in progress).
+
+remote="$1"
+url="$2"
+
+# RUN FORMAT CHECKER
+
+REPO_PATH=$(git rev-parse --show-toplevel)
+cd $REPO_PATH
+
+./nnas format --staged-only
+
+exit $?
diff --git a/infra/git-hooks/pre-push.sh b/infra/git-hooks/pre-push.sh
index ce751333a..d64c72317 100755
--- a/infra/git-hooks/pre-push.sh
+++ b/infra/git-hooks/pre-push.sh
@@ -27,6 +27,6 @@ url="$2"
REPO_PATH=$(git rev-parse --show-toplevel)
cd $REPO_PATH
-CHECK_DIFF_ONLY=1 ./nnas format
+./nnas format --diff-only
exit $?
diff --git a/infra/nncc/CMakeLists.txt b/infra/nncc/CMakeLists.txt
index aa84391c8..4bcb616e8 100644
--- a/infra/nncc/CMakeLists.txt
+++ b/infra/nncc/CMakeLists.txt
@@ -4,7 +4,7 @@ project(nncc)
enable_testing()
-set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD 14)
set(CMAKE_SKIP_BUILD_RPATH FALSE)
set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
diff --git a/infra/nncc/command/utcount b/infra/nncc/command/utcount
new file mode 100644
index 000000000..3b3baa067
--- /dev/null
+++ b/infra/nncc/command/utcount
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+import "build.configuration"
+
+BUILD_WORKSPACE_PATH="${NNCC_PROJECT_PATH}/${BUILD_WORKSPACE_RPATH}"
+
+if [[ ! -d "${BUILD_WORKSPACE_PATH}" ]]; then
+ echo "'${BUILD_WORKSPACE_RPATH}' does not exist. Please run 'configure' first"
+ exit 255
+fi
+
+BUILD_ITEMS="angkor cwrap pepper-str pepper-strcast pp stdex \
+oops pepper-assert \
+hermes hermes-std \
+loco locop locomotiv logo-core logo \
+safemain mio-circle mio-tflite \
+tflite2circle \
+luci \
+circle2circle"
+
+function count_all()
+{
+ local result=`ctest --verbose | grep -c '\[ RUN \]'`
+ echo $result
+}
+
+function count_neg()
+{
+ local result=`ctest --verbose | grep '\[ RUN \]' | grep -c '_NEG'`
+ echo $result
+}
+
+export CTEST_OUTPUT_ON_FAILURE=0
+
+for item in $BUILD_ITEMS
+do
+ cd ${BUILD_WORKSPACE_PATH}/compiler/$item &&
+ printf "$item = " &&
+ res="$(count_all)" && printf "$res " &&
+ res="$(count_neg)" && echo "$res"
+done
diff --git a/infra/nnfw/CMakeLists.txt b/infra/nnfw/CMakeLists.txt
index 6b5c465d2..437573541 100644
--- a/infra/nnfw/CMakeLists.txt
+++ b/infra/nnfw/CMakeLists.txt
@@ -48,7 +48,7 @@ macro(nnas_find_package PREFIX)
)
endmacro(nnas_find_package)
-set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_EXTENSIONS OFF)
# This feature works with CMake 3.5.2 or later. However, using previous versions does not produce
@@ -92,6 +92,12 @@ if(ENABLE_STRICT_BUILD)
target_compile_options(nnfw_common INTERFACE -Werror -Wall -Wextra)
endif(ENABLE_STRICT_BUILD)
+macro(nnfw_strict_build TARGET)
+ if(ENABLE_STRICT_BUILD)
+ target_compile_options(${TARGET} PRIVATE -Werror -Wall -Wextra)
+ endif(ENABLE_STRICT_BUILD)
+endmacro(nnfw_strict_build)
+
# TODO Replace using default build option setting in cmake/buildtool/config/config_linux.cmake
# to link nnfw_coverage on each module which want to check coverage
add_library(nnfw_coverage INTERFACE)
diff --git a/infra/nnfw/cmake/ApplyCompileFlags.cmake b/infra/nnfw/cmake/ApplyCompileFlags.cmake
index 956ddfe1c..0ee7e8ef0 100644
--- a/infra/nnfw/cmake/ApplyCompileFlags.cmake
+++ b/infra/nnfw/cmake/ApplyCompileFlags.cmake
@@ -16,7 +16,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG")
#
# Platform specific compile flag setting
#
-include("cmake/buildtool/config/config_${TARGET_PLATFORM}.cmake")
+include("${CMAKE_CURRENT_LIST_DIR}/buildtool/config/config_${TARGET_PLATFORM}.cmake")
#
# Apply compile flags
diff --git a/infra/nnfw/cmake/CfgOptionFlags.cmake b/infra/nnfw/cmake/CfgOptionFlags.cmake
index 4d15d9a10..794fcc355 100644
--- a/infra/nnfw/cmake/CfgOptionFlags.cmake
+++ b/infra/nnfw/cmake/CfgOptionFlags.cmake
@@ -13,7 +13,7 @@ include("cmake/options/options_${TARGET_PLATFORM}.cmake")
option(ENABLE_STRICT_BUILD "Treat warning as error" ON)
option(ENABLE_COVERAGE "Build for coverage test" OFF)
option(BUILD_EXT_MULTITHREAD "Build external build using multi thread" ON)
-option(BUILD_NEURUN "Build neurun" ON)
+option(BUILD_ONERT "Build onert" ON)
option(BUILD_LOGGING "Build logging runtime" ON)
option(BUILD_PURE_ARM_COMPUTE "Build pure_arm_compute runtime" OFF)
CMAKE_DEPENDENT_OPTION(BUILD_RUNTIME_NNAPI_TEST "Build Runtime NN API Generated Test"
@@ -22,6 +22,7 @@ CMAKE_DEPENDENT_OPTION(BUILD_RUNTIME_NNAPI_TEST "Build Runtime NN API Generated
ON "CMAKE_COMPILER_IS_GNUCC;NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.2"
# Otherwise set BUILD_RUNTIME_NNAPI_TEST as OFF
OFF)
+option(BUILD_RUNTIME_NNFW_API_TEST "Build Runtime NNFW API Tests" ON)
option(BUILD_TFLITE_RUN "Build tflite-run" ON)
option(BUILD_TFLITE_BENCHMARK "Build tflite benchmark" ON)
option(BUILD_TFLITE_BENCHMARK_MODEL "Build tflite benchmark model" OFF)
@@ -31,7 +32,8 @@ option(BUILD_TFLITE_LOADER "Build TensorFlow Lite loader" ON)
option(BUILD_CIRCLE_LOADER "Build circle loader" ON)
option(BUILD_TFLITE_LOADER_TEST_TOOL "Build tflite loader testing tool" ON)
option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" ON)
-option(ENVVAR_NEURUN_CONFIG "Use environment variable for neurun configuration" ON)
+option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" ON)
+option(INSTALL_TEST_SCRIPTS "Install test scripts" ON)
#
# Default build configuration for contrib
#
@@ -41,6 +43,7 @@ option(BUILD_BENCHMARK_ACL "Build ARM Compute Library Benchmarks" OFF)
option(BUILD_DETECTION_APP "Build detection example app" OFF)
option(BUILD_HEAP_TRACE "Build heap trace tool" OFF)
option(BUILD_LABS "Build lab projects" OFF)
+option(BUILD_STYLE_TRANSFER_APP "Build style transfer app" OFF)
option(BUILD_TFLITE_TEST "Build tensorflow lite test" OFF)
option(BUILD_TFLITE_CLASSIFY_APP "Build tflite_classify app" OFF)
option(BUILD_UBEN "Build micro-benchmark (uben) suite" OFF)
@@ -61,7 +64,7 @@ option(DOWNLOAD_EIGEN "Download Eigen source" ON)
option(DOWNLOAD_FARMHASH "Download farmhash source" ON)
option(DOWNLOAD_GEMMLOWP "Download GEMM low precesion library source" ON)
option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" ON)
-option(DOWNLOAD_NNPACK "Download NNPACK source" ON)
+option(DOWNLOAD_NNPACK "Download NNPACK source" OFF)
option(DOWNLOAD_FLATBUFFERS "Download FlatBuffers source" ON)
option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" ON)
option(DOWNLOAD_NONIUS "Download nonius source" ON)
diff --git a/infra/nnfw/cmake/buildtool/config/config_armv7l-linux.cmake b/infra/nnfw/cmake/buildtool/config/config_armv7l-linux.cmake
index 6d6459f0f..9a0b5e528 100644
--- a/infra/nnfw/cmake/buildtool/config/config_armv7l-linux.cmake
+++ b/infra/nnfw/cmake/buildtool/config/config_armv7l-linux.cmake
@@ -5,7 +5,7 @@
message(STATUS "Building for ARMv7l Linux")
# include linux common
-include("cmake/buildtool/config/config_linux.cmake")
+include("${CMAKE_CURRENT_LIST_DIR}/config_linux.cmake")
# addition for arm-linux
set(FLAGS_COMMON ${FLAGS_COMMON}
diff --git a/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-android.cmake b/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-android.cmake
index 2b2792a6a..4da261878 100644
--- a/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-android.cmake
+++ b/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-android.cmake
@@ -11,7 +11,8 @@ if(NOT DEFINED NDK_DIR)
endif(NOT DEFINED NDK_DIR)
set(ANDROID_ABI arm64-v8a)
-set(ANDROID_PLATFORM android-27)
+set(ANDROID_API_LEVEL 29)
+set(ANDROID_PLATFORM android-${ANDROID_API_LEVEL})
# Find package in the host. `nnfw_find_package` won't work without this
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE NEVER)
diff --git a/infra/nnfw/cmake/options/options_aarch64-android.cmake b/infra/nnfw/cmake/options/options_aarch64-android.cmake
index 2393764a1..903267b72 100644
--- a/infra/nnfw/cmake/options/options_aarch64-android.cmake
+++ b/infra/nnfw/cmake/options/options_aarch64-android.cmake
@@ -13,5 +13,3 @@ option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" OFF)
option(BUILD_TFLITE_RUN "Build tflite-run" OFF)
option(BUILD_TFLITE_LOADER_TEST_TOOL "Build tflite loader testing tool" OFF)
option(BUILD_LOGGING "Build logging runtime" OFF)
-
-option(ENABLE_STRICT_BUILD "Treat warning as error" OFF)
diff --git a/infra/nnfw/cmake/options/options_aarch64-tizen.cmake b/infra/nnfw/cmake/options/options_aarch64-tizen.cmake
index 23eecbc46..4499fcd1b 100644
--- a/infra/nnfw/cmake/options/options_aarch64-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_aarch64-tizen.cmake
@@ -12,4 +12,4 @@ option(BUILD_TFLITE_RUN "Build tflite-run" OFF)
option(BUILD_TFLITE_LOADER_TEST_TOOL "Build tflite loader testing tool" OFF)
option(BUILD_SRCN_KERNEL "Build srcn kernel" ON)
option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
-option(ENVVAR_NEURUN_CONFIG "Use environment variable for neurun configuration" OFF)
+option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
diff --git a/infra/nnfw/cmake/options/options_armv7l-tizen.cmake b/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
index af31958ff..e1e4e68f5 100644
--- a/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
@@ -3,7 +3,6 @@
#
option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
-option(DOWNLOAD_EIGEN "Download Eigen source" OFF)
option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
option(DOWNLOAD_NNPACK "Download NNPACK source" OFF)
@@ -11,4 +10,4 @@ option(BUILD_LOGGING "Build logging runtime" OFF)
option(BUILD_TFLITE_RUN "Build tflite-run" OFF)
option(BUILD_SRCN_KERNEL "Build srcn kernel" ON)
option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
-option(ENVVAR_NEURUN_CONFIG "Use environment variable for neurun configuration" OFF)
+option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
diff --git a/infra/nnfw/cmake/packages/EigenConfig.cmake b/infra/nnfw/cmake/packages/EigenConfig.cmake
index 7a8d3d5c6..23e38cda8 100644
--- a/infra/nnfw/cmake/packages/EigenConfig.cmake
+++ b/infra/nnfw/cmake/packages/EigenConfig.cmake
@@ -9,6 +9,8 @@ function(_Eigen_import)
if(NOT TARGET eigen)
add_library(eigen INTERFACE)
target_include_directories(eigen SYSTEM INTERFACE "${EigenSource_DIR}")
+ # Add EIGEN_MPL2_ONLY to remove license issue posibility
+ target_compile_definitions(eigen INTERFACE EIGEN_MPL2_ONLY)
endif(NOT TARGET eigen)
set(Eigen_FOUND TRUE PARENT_SCOPE)
diff --git a/infra/nnfw/cmake/packages/GEMMLowpConfig.cmake b/infra/nnfw/cmake/packages/GEMMLowpConfig.cmake
new file mode 100644
index 000000000..ddfcc787e
--- /dev/null
+++ b/infra/nnfw/cmake/packages/GEMMLowpConfig.cmake
@@ -0,0 +1,20 @@
+function(_GEMMLowp_import)
+ nnfw_find_package(GEMMLowpSource QUIET)
+
+ if(NOT GEMMLowpSource_FOUND)
+ set(GEMMLowp_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT GEMMLowpSource_FOUND)
+
+ if(NOT TARGET gemmlowp)
+ find_package(Threads REQUIRED)
+
+ add_library(gemmlowp INTERFACE)
+ target_include_directories(gemmlowp SYSTEM INTERFACE ${GEMMLowpSource_DIR})
+ target_link_libraries(gemmlowp INTERFACE ${LIB_PTHREAD})
+ endif(NOT TARGET gemmlowp)
+
+ set(GEMMLowp_FOUND TRUE PARENT_SCOPE)
+endfunction(_GEMMLowp_import)
+
+_GEMMLowp_import()
diff --git a/infra/nnfw/command/copyright-check b/infra/nnfw/command/copyright-check
index 79ead2e31..b9ae5b46a 100644
--- a/infra/nnfw/command/copyright-check
+++ b/infra/nnfw/command/copyright-check
@@ -3,7 +3,7 @@
INVALID_EXIT=0
check_copyright() {
- DIRECTORIES_NOT_TO_BE_TESTED=$2
+ DIRECTORIES_NOT_TO_BE_TESTED=$1
CORRECT_COPYRIGHT="Copyright \(c\) [0-9]+ Samsung Electronics Co\., Ltd\. All Rights Reserved"
FILES_TO_CHECK=$(git ls-files -c --exclude-standard)
@@ -33,8 +33,9 @@ check_copyright() {
DIRECTORIES_NOT_TO_BE_TESTED=()
-for DIR_NOT_TO_BE_TESTED in $(find -name '.FORMATDENY' -exec dirname {} \;); do
+for DIR_NOT_TO_BE_TESTED in $(git ls-files -co --exclude-standard '*/.FORMATDENY'); do
DIRECTORIES_NOT_TO_BE_TESTED+=("$DIR_NOT_TO_BE_TESTED")
+ DIRECTORIES_NOT_TO_BE_TESTED+=($(dirname "${DIR_NOT_TO_BE_TESTED}"))
done
check_copyright $DIRECTORIES_NOT_TO_BE_TESTED
diff --git a/infra/nnfw/command/gen-coverage-report b/infra/nnfw/command/gen-coverage-report
deleted file mode 100644
index 8fd398db3..000000000
--- a/infra/nnfw/command/gen-coverage-report
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-# This file is based on https://github.sec.samsung.net/STAR/nncc/pull/80
-
-LCOV_PATH=$(command -v lcov)
-GENHTML_PATH=$(command -v genhtml)
-
-SRC_PREFIX=${SRC_PREFIX:-${NNFW_PROJECT_PATH}}
-
-if [[ -z "${LCOV_PATH}" ]]; then
- echo "ERROR: 'lcov' is not found"
- exit 255
-fi
-
-if [[ -z "${GENHTML_PATH}" ]]; then
- echo "ERROR: 'genhtml' is not found"
- exit 255
-fi
-
-if [[ -z "${GCOV_PATH}" ]]; then
- GCOV_PATH=$(command -v gcov)
- if [[ -z "${GCOV_PATH}" ]]; then
- echo "ERROR: 'gcov' is not found"
- exit 255
- fi
-fi
-
-OUTPUT_PATH="$1"
-
-if [[ -z "${OUTPUT_PATH}" ]]; then
- OUTPUT_PATH="$NNFW_PROJECT_PATH/coverage"
-fi
-
-if [[ -e "${OUTPUT_PATH}" ]]; then
- echo "ERROR: '${OUTPUT_PATH}' already exists"
- exit 255
-fi
-
-mkdir -p "${OUTPUT_PATH}"
-
-RAW_COVERAGE_INFO_PATH="${OUTPUT_PATH}/coverage.raw.info"
-LIBS_COVERAGE_INFO_PATH="${OUTPUT_PATH}/coverage.libs.info"
-INCLUDE_COVERAGE_INFO_PATH="${OUTPUT_PATH}/coverage.include.info"
-RUNTIMES_COVERAGE_INFO_PATH="${OUTPUT_PATH}/coverage.runtimes.info"
-TOOLS_COVERAGE_INFO_PATH="${OUTPUT_PATH}/coverage.tools.info"
-FINAL_COVERAGE_INFO_PATH="${OUTPUT_PATH}/coverage.info"
-HTML_PATH="${OUTPUT_PATH}/html"
-COVERTURA_PATH="${OUTPUT_PATH}/nnfw_coverage.xml"
-
-"${LCOV_PATH}" -c -d "${NNFW_PROJECT_PATH}" --gcov-tool ${GCOV_PATH} -o "${RAW_COVERAGE_INFO_PATH}"
-#"${LCOV_PATH}" -e "${RAW_COVERAGE_INFO_PATH}" -o "${LIBS_COVERAGE_INFO_PATH}" "${SRC_PREFIX}/libs/*"
-#"${LCOV_PATH}" -e "${RAW_COVERAGE_INFO_PATH}" -o "${INCLUDE_COVERAGE_INFO_PATH}" "${SRC_PREFIX}/include/*"
-"${LCOV_PATH}" -e "${RAW_COVERAGE_INFO_PATH}" -o "${RUNTIMES_COVERAGE_INFO_PATH}" "${SRC_PREFIX}/runtimes/*"
-"${LCOV_PATH}" -e "${RAW_COVERAGE_INFO_PATH}" -o "${TOOLS_COVERAGE_INFO_PATH}" "${SRC_PREFIX}/tests/tools/*"
-#"${LCOV_PATH}" -a "${LIBS_COVERAGE_INFO_PATH}" -a "${INCLUDE_COVERAGE_INFO_PATH}" \
-# -a "${RUNTIMES_COVERAGE_INFO_PATH}" -a "${TOOLS_COVERAGE_INFO_PATH}" \
-# -o "${FINAL_COVERAGE_INFO_PATH}"
-"${LCOV_PATH}" -a "${RUNTIMES_COVERAGE_INFO_PATH}" -a "${TOOLS_COVERAGE_INFO_PATH}" -o "${FINAL_COVERAGE_INFO_PATH}"
-"${GENHTML_PATH}" "${FINAL_COVERAGE_INFO_PATH}" --output-directory "${HTML_PATH}" ${GENHTML_FLAG:-}
diff --git a/infra/nnfw/config/docker.configuration b/infra/nnfw/config/docker.configuration
index 862e5f8b5..962c02c7f 100644
--- a/infra/nnfw/config/docker.configuration
+++ b/infra/nnfw/config/docker.configuration
@@ -24,6 +24,8 @@ DOCKER_ENV_VARS+=" -e no_proxy"
DOCKER_ENV_VARS+=" -e GIT_SSL_NO_VERIFY"
DOCKER_ENV_VARS+=" -e EXTERNAL_DOWNLOAD_SERVER"
DOCKER_ENV_VARS+=" -e NNFW_WORKSPACE"
+DOCKER_ENV_VARS+=" -e EXT_ACL_FOLDER"
+DOCKER_ENV_VARS+=" -e NDK_DIR"
DOCKER_RUN_OPTS="${DOCKER_OPTS}"
DOCKER_RUN_OPTS+=" --rm"
diff --git a/infra/packaging/preset/20191231_windows b/infra/packaging/preset/20191231_windows
index 7b511196f..aad64ea72 100644
--- a/infra/packaging/preset/20191231_windows
+++ b/infra/packaging/preset/20191231_windows
@@ -23,6 +23,8 @@ function preset_configure()
# Tools
REQUIRED_UNITS+=("tf2circle")
+ NPROC=$(cat /proc/cpuinfo | grep -c processor)
+
# TODO Use "nncc configure" and "nncc build"
cmake \
-G "MSYS Makefiles" \
@@ -37,6 +39,7 @@ function preset_configure()
-DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
-DCMAKE_BUILD_TYPE=release \
-DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+ -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
${EXTRA_OPTIONS[@]} \
"${NNAS_PROJECT_PATH}/infra/nncc"
}
diff --git a/infra/packaging/preset/20200115_windows b/infra/packaging/preset/20200115_windows
index da349bb3a..f71b7643d 100644
--- a/infra/packaging/preset/20200115_windows
+++ b/infra/packaging/preset/20200115_windows
@@ -23,6 +23,8 @@ function preset_configure()
# Tools
REQUIRED_UNITS+=("tf2nnpkg")
+ NPROC=$(cat /proc/cpuinfo | grep -c processor)
+
# TODO Use "nncc configure" and "nncc build"
cmake \
-G "MSYS Makefiles" \
@@ -38,6 +40,7 @@ function preset_configure()
-DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
-DCMAKE_BUILD_TYPE=release \
-DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+ -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
${EXTRA_OPTIONS[@]} \
"${NNAS_PROJECT_PATH}/infra/nncc"
}
diff --git a/infra/packaging/preset/20200220 b/infra/packaging/preset/20200220
new file mode 100644
index 000000000..411f3771c
--- /dev/null
+++ b/infra/packaging/preset/20200220
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+PRESET="20200220"
+
+function preset_configure()
+{
+ REQUIRED_UNITS=()
+ # Common Libraries
+ REQUIRED_UNITS+=("angkor" "bino" "cwrap" "fipe" "pepper-str" "pepper-strcast" "pp" "stdex")
+ REQUIRED_UNITS+=("oops" "pepper-assert")
+ # Hermes Logging Framework
+ REQUIRED_UNITS+=("hermes" "hermes-std")
+ # loco IR and related utilities
+ REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+ # loco IR extension: Custom Op Support
+ REQUIRED_UNITS+=("locoex-customop")
+ # TensorFlow Libraries
+ REQUIRED_UNITS+=("tfinfo" "plier-tf")
+ # TensorFlow GraphDef I/O
+ REQUIRED_UNITS+=("mio-tf")
+ # TensorFlow Frontend (.pb/.pbtxt -> loco.caninical)
+ REQUIRED_UNITS+=("moco-log" "moco" "moco-tf")
+ # TensorFlow Lite/Circle Backend (loco.canonical -> .tflite, loco.circle -> .circle)
+ REQUIRED_UNITS+=("exo")
+ # Tools
+ REQUIRED_UNITS+=("tf2circle")
+
+ # TODO Use "nncc configure" and "nncc build"
+ cmake \
+ -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+ -DCMAKE_BUILD_TYPE=release \
+ -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+ ${EXTRA_OPTIONS[@]} \
+ "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+ install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+ "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+ # Install tf2nnpkg
+ install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+}
diff --git a/infra/packaging/res/tf2nnpkg.20200220 b/infra/packaging/res/tf2nnpkg.20200220
new file mode 100644
index 000000000..0875bad2f
--- /dev/null
+++ b/infra/packaging/res/tf2nnpkg.20200220
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+set -e
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+command_exists() {
+ if [ "$#" -le 0 ]; then
+ return 1
+ fi
+ command -v "$@" > /dev/null 2>&1
+}
+
+usage()
+{
+ echo "Convert TensorFlow model to nnpackage."
+ echo "Usage: tf2nnpkg --info <path/to/info> --graphdef <path/to/pb> [OPTION] -o <path/to/nnpkg/directory>"
+ echo "option:"
+ echo " --customop <path/to/customop.conf>"
+ exit 0
+}
+
+# Parse command-line arguments
+#
+while [ "$#" -ne 0 ]; do
+ CUR="$1"
+
+ case $CUR in
+ '--help')
+ usage
+ ;;
+ '--info')
+ export INFO_FILE="$2"
+ shift 2
+ ;;
+ '--graphdef')
+ export GRAPHDEF_FILE="$2"
+ shift 2
+ ;;
+ '--customop')
+ export CUSTOMOP_CONF_FILE="$2"
+ shift 2
+ ;;
+ '-o')
+ export OUTPUT_DIR="$2"
+ shift 2
+ ;;
+ '--use-tf2circle')
+ echo "WARNING! --use-tf2circle is deprecated"
+ shift 1
+ ;;
+ *)
+ echo "${CUR}"
+ shift
+ ;;
+ esac
+done
+
+if [ -z ${GRAPHDEF_FILE} ] || [ ! -e ${GRAPHDEF_FILE} ]; then
+ echo "pb is not found. Please check --graphdef is correct."
+ exit 2
+fi
+
+if [ -z ${INFO_FILE} ] || [ ! -e ${INFO_FILE} ]; then
+ echo "info is not found. Please check --info is correct."
+ exit 2
+fi
+
+# optional param
+if [ ${CUSTOMOP_CONF_FILE} ]; then
+ if [ ! -e ${CUSTOMOP_CONF_FILE} ]; then
+ echo "customop.conf is not found. Please check --customop is correct."
+ exit 2
+ fi
+fi
+
+FILE_BASE=$(basename ${GRAPHDEF_FILE})
+MODEL_NAME="${FILE_BASE%.*}"
+TMPDIR=$(mktemp -d)
+trap "{ rm -rf $TMPDIR; }" EXIT
+
+if [ ${CUSTOMOP_CONF_FILE} ]; then
+ "${ROOT}/bin/tf2circle" "${INFO_FILE}" "${GRAPHDEF_FILE}" "${TMPDIR}/${MODEL_NAME}.circle" \
+ "--customop" "${CUSTOMOP_CONF_FILE}"
+else
+ "${ROOT}/bin/tf2circle" "${INFO_FILE}" "${GRAPHDEF_FILE}" "${TMPDIR}/${MODEL_NAME}.circle"
+fi
+
+"${ROOT}/bin/model2nnpkg.sh" -o "${OUTPUT_DIR}" "${TMPDIR}/${MODEL_NAME}.circle"
diff --git a/infra/scripts/build_android_runtime_release.sh b/infra/scripts/build_android_runtime_release.sh
new file mode 100755
index 000000000..fe933c648
--- /dev/null
+++ b/infra/scripts/build_android_runtime_release.sh
@@ -0,0 +1,21 @@
+[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
+
+CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ROOT_PATH="$CURRENT_PATH/../../"
+
+# prepare pre-built armcompute library
+# android build requires pre-built armcompute library
+if [ ! -n "$EXT_ACL_FOLDER" ]; then
+ echo "Please set EXT_ACL_FOLDER to use pre-built armcompute library"
+ exit 1
+fi
+
+# prepare ndk
+if [ ! -n "$NDK_DIR" ]; then
+ export NDK_DIR=$ROOT_PATH/tools/cross/ndk/r20/ndk
+ echo "It will use default external path"
+fi
+
+export TARGET_OS=android
+export CROSS_BUILD=1
+make -f Makefile.template
diff --git a/infra/scripts/common.sh b/infra/scripts/common.sh
index edd21163c..edd21163c 100644..100755
--- a/infra/scripts/common.sh
+++ b/infra/scripts/common.sh
diff --git a/infra/scripts/docker_build_cross_aarch64_runtime.sh b/infra/scripts/docker_build_cross_aarch64_runtime.sh
new file mode 100755
index 000000000..7da673601
--- /dev/null
+++ b/infra/scripts/docker_build_cross_aarch64_runtime.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
+
+CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ROOT_PATH="$CURRENT_PATH/../../"
+
+# prepare rootfs
+if [ ! -d $ROOTFS_DIR ]; then
+ echo "It will use default rootfs path"
+else
+ DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
+ DOCKER_ENV_VARS+=" -e ROOTFS_DIR=/opt/rootfs"
+fi
+
+# mount volume (or directory) for externals
+if [ -n "$EXTERNAL_VOLUME" ]; then
+ DOCKER_VOLUMES+=" -v $EXTERNAL_VOLUME:/externals"
+ DOCKER_ENV_VARS+=" -e EXTERNAL_VOLUME=/externals"
+else
+ echo "It will use default external path"
+fi
+
+# docker image name
+if [[ -z $DOCKER_IMAGE_NAME ]]; then
+ echo "It will use default docker image name"
+fi
+
+# Mirror server setting
+if [[ -z $EXTERNAL_DOWNLOAD_SERVER ]]; then
+ echo "It will not use mirror server"
+fi
+
+DOCKER_ENV_VARS+=" -e TARGET_ARCH=aarch64"
+DOCKER_ENV_VARS+=" -e CROSS_BUILD=1"
+
+set -e
+
+pushd $ROOT_PATH > /dev/null
+
+# TODO use command instead of makefile
+export DOCKER_ENV_VARS
+export DOCKER_VOLUMES
+CMD="cp -nv Makefile.template Makefile && \
+ make all install build_test_suite"
+./nnfw docker-run bash -c "$CMD"
+
+popd > /dev/null
diff --git a/infra/scripts/docker_build_cross_arm_benchmark_model.sh b/infra/scripts/docker_build_cross_arm_benchmark_model.sh
index 9a511741d..f63edf118 100755
--- a/infra/scripts/docker_build_cross_arm_benchmark_model.sh
+++ b/infra/scripts/docker_build_cross_arm_benchmark_model.sh
@@ -43,7 +43,7 @@ pushd $ROOT_PATH > /dev/null
# TODO use command instead of makefile
export DOCKER_ENV_VARS
export DOCKER_VOLUMES
-CMD="export OPTIONS='-DBUILD_PURE_ARM_COMPUTE=ON -DBUILD_NEURUN=OFF -DBUILD_TFLITE_BENCHMARK_MODEL=ON -DBUILD_TFLITE_LOADER=OFF' && \
+CMD="export OPTIONS='-DBUILD_PURE_ARM_COMPUTE=ON -DBUILD_ONERT=OFF -DBUILD_TFLITE_BENCHMARK_MODEL=ON -DBUILD_TFLITE_LOADER=OFF' && \
cp -nv Makefile.template Makefile && \
make all install build_test_suite"
./nnfw docker-run bash -c "$CMD"
diff --git a/infra/scripts/docker_build_cross_arm_neurun.sh b/infra/scripts/docker_build_cross_arm_runtime.sh
index f1f666aa3..f1f666aa3 100755
--- a/infra/scripts/docker_build_cross_arm_neurun.sh
+++ b/infra/scripts/docker_build_cross_arm_runtime.sh
diff --git a/infra/scripts/docker_build_cross_arm_neurun_release.sh b/infra/scripts/docker_build_cross_arm_runtime_release.sh
index ea66f1774..ea66f1774 100755
--- a/infra/scripts/docker_build_cross_arm_neurun_release.sh
+++ b/infra/scripts/docker_build_cross_arm_runtime_release.sh
diff --git a/infra/scripts/docker_build_test_x64.sh b/infra/scripts/docker_build_test_x64.sh
index ac1998ee3..eb620df95 100755
--- a/infra/scripts/docker_build_test_x64.sh
+++ b/infra/scripts/docker_build_test_x64.sh
@@ -31,6 +31,7 @@ export DOCKER_ENV_VARS
export DOCKER_VOLUMES
# Disable nnpackage_run build: mismatch between buildtool for CI and installed hdf5
CMD="export OPTIONS='-DBUILD_NNPACKAGE_RUN=OFF' && \
+ export BUILD_TYPE=Release && \
cp -nv Makefile.template Makefile && \
make all install build_test_suite"
./nnfw docker-run bash -c "$CMD"
@@ -42,7 +43,7 @@ if [[ -z $MODELFILE_SERVER ]]; then
fi
export DOCKER_ENV_VARS=" -e MODELFILE_SERVER=$MODELFILE_SERVER"
-./nnfw docker-run-user bash -c "./infra/scripts/test_x64_neurun_cpu.sh"
-./nnfw docker-run-user bash -c "./infra/scripts/test_neurun_interp.sh"
+./nnfw docker-run-user ./infra/scripts/test_ubuntu_runtime.sh --backend cpu
+./nnfw docker-run-user ./infra/scripts/test_ubuntu_runtime_interp.sh
popd > /dev/null
diff --git a/infra/scripts/test_arm_neurun_acl_cl.sh b/infra/scripts/test_arm_neurun_acl_cl.sh
deleted file mode 100755
index e709132b5..000000000
--- a/infra/scripts/test_arm_neurun_acl_cl.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-
-set -eo pipefail
-source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
-
-CheckTestPrepared
-
-BACKEND="acl_cl"
-UNITTEST_SKIPLIST="Product/out/unittest/nnapi_gtest.skip.armv7l-linux"
-FRAMEWORK_TESTLIST="tests/scripts/list/neurun_frameworktest_list.armv7l.acl_cl.txt"
-REPORT_BASE="report/${BACKEND}"
-EXECUTORS=("Linear" "Dataflow" "Parallel")
-
-for EXECUTOR in "${EXECUTORS[@]}";
-do
- echo "[EXECUTOR]: ${EXECUTOR}"
- export EXECUTOR="${EXECUTOR}"
- Unittests "${BACKEND}" "${UNITTEST_SKIPLIST}" "${REPORT_BASE}/${EXECUTOR}"
- TFLiteModelVerification "${BACKEND}" "${FRAMEWORK_TESTLIST}" "${REPORT_BASE}/${EXECUTOR}"
-done
-unset EXECUTOR
-
-# Test tflite_loader
-pushd ${ROOT_PATH} > /dev/null
-./tests/scripts/test-driver.sh \
- --frameworktest \
- --framework_driverbin="$ROOT_PATH/Product/out/bin/tflite_loader_test_tool" \
- --frameworktest_list_file=tests/scripts/list/tflite_loader_list.armv7l.txt \
- --reportdir="$ROOT_PATH/report/acl_cl/tfliteloader" .
-popd > /dev/null
-
diff --git a/infra/scripts/test_arm_neurun_acl_neon.sh b/infra/scripts/test_arm_neurun_acl_neon.sh
deleted file mode 100755
index c2cd805a8..000000000
--- a/infra/scripts/test_arm_neurun_acl_neon.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-
-set -eo pipefail
-source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
-
-CheckTestPrepared
-
-BACKEND="acl_neon"
-UNITTEST_SKIPLIST="Product/out/unittest/nnapi_gtest.skip.armv7l-linux.acl_neon"
-FRAMEWORK_TESTLIST="tests/scripts/list/neurun_frameworktest_list.armv7l.acl_neon.txt"
-REPORT_BASE="report/${BACKEND}"
-EXECUTORS=("Linear" "Dataflow" "Parallel")
-
-for EXECUTOR in "${EXECUTORS[@]}";
-do
- echo "[EXECUTOR]: ${EXECUTOR}"
- export EXECUTOR="${EXECUTOR}"
- Unittests "${BACKEND}" "${UNITTEST_SKIPLIST}" "${REPORT_BASE}/${EXECUTOR}"
- TFLiteModelVerification "${BACKEND}" "${FRAMEWORK_TESTLIST}" "${REPORT_BASE}/${EXECUTOR}"
-done
-unset EXECUTOR
-
-# NCNN backend test
-# TODO Independent script when supporting ops are increased
-export NCNN_LAYOUT="NCHW"
-Unittests "srcn" "Product/out/unittest/nnapi_gtest.skip.armv7l-linux.ncnn" "report/ncnn"
-TFLiteModelVerification "srcn" "tests/scripts/list/neurun_frameworktest_list.armv7l.ncnn.txt" "report/ncnn"
diff --git a/infra/scripts/test_arm_neurun_cpu.sh b/infra/scripts/test_arm_neurun_cpu.sh
deleted file mode 100755
index 616cba624..000000000
--- a/infra/scripts/test_arm_neurun_cpu.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-
-set -eo pipefail
-source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
-
-CheckTestPrepared
-
-BACKEND="cpu"
-UNITTEST_SKIPLIST="Product/out/unittest/nnapi_gtest.skip.armv7l-linux.cpu"
-FRAMEWORK_TESTLIST="tests/scripts/list/neurun_frameworktest_list.armv7l.cpu.txt"
-REPORT_BASE="report/${BACKEND}"
-EXECUTORS=("Linear" "Dataflow" "Parallel")
-
-for EXECUTOR in "${EXECUTORS[@]}";
-do
- echo "[EXECUTOR]: ${EXECUTOR}"
- export EXECUTOR="${EXECUTOR}"
- Unittests "${BACKEND}" "${UNITTEST_SKIPLIST}" "${REPORT_BASE}/${EXECUTOR}"
- TFLiteModelVerification "${BACKEND}" "${FRAMEWORK_TESTLIST}" "${REPORT_BASE}/${EXECUTOR}"
-done
-
-# Test custom op
-./Product/out/tests/FillFrom_runner
diff --git a/infra/scripts/test_arm_neurun_mixed.sh b/infra/scripts/test_arm_neurun_mixed.sh
deleted file mode 100755
index de141c5a0..000000000
--- a/infra/scripts/test_arm_neurun_mixed.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-
-set -eo pipefail
-source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
-
-CheckTestPrepared
-
-pushd ${ROOT_PATH}
-
-# NOTE Fixed backend assignment by type of operation
-# TODO Enhance this with randomized test
-BACKENDS=(cpu acl_neon acl_cl)
-
-# Get the intersect of framework test list files
-TESTLIST_PREFIX="tests/scripts/list/neurun_frameworktest_list.armv7l"
-cat $TESTLIST_PREFIX.${BACKENDS[0]}.txt | sort > $TESTLIST_PREFIX.intersect.txt
-for BACKEND in "${BACKENDS[@]}"; do
- comm -12 <(sort $TESTLIST_PREFIX.intersect.txt) <(sort $TESTLIST_PREFIX.$BACKEND.txt) > $TESTLIST_PREFIX.intersect.next.txt
- mv $TESTLIST_PREFIX.intersect.next.txt $TESTLIST_PREFIX.intersect.txt
-done
-popd > /dev/null
-
-# Run the test
-export OP_BACKEND_Conv2D="cpu"
-export OP_BACKEND_MaxPool2D="acl_cl"
-export OP_BACKEND_AvgPool2D="acl_neon"
-export ACL_LAYOUT="NCHW"
-export NCNN_LAYOUT="NCHW"
-Unittests "acl_cl;acl_neon;cpu" "Product/out/unittest/nnapi_gtest.skip.armv7l-linux" "report/mixed"
-TFLiteModelVerification "acl_cl;acl_neon;cpu" "${TESTLIST_PREFIX}.intersect.txt" "report/mixed"
diff --git a/infra/scripts/test_arm_nnpkg.sh b/infra/scripts/test_arm_nnpkg.sh
index 03ad00862..d6561ff6b 100755
--- a/infra/scripts/test_arm_nnpkg.sh
+++ b/infra/scripts/test_arm_nnpkg.sh
@@ -3,7 +3,7 @@
set -eo pipefail
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
-BACKENDS=("acl_cl" "acl_neon" "srcn" "cpu")
+BACKENDS=("acl_cl" "acl_neon" "cpu")
for BACKEND in "${BACKENDS[@]}";
do
diff --git a/infra/scripts/test_coverage.sh b/infra/scripts/test_coverage.sh
index 0a41080d2..773122e9b 100755
--- a/infra/scripts/test_coverage.sh
+++ b/infra/scripts/test_coverage.sh
@@ -24,19 +24,19 @@ if [[ ! -e $ROOT_PATH/tests/scripts/build_path_depth.txt ]]; then
fi
export GCOV_PREFIX_STRIP=`cat $ROOT_PATH/tests/scripts/build_path_depth.txt`
-./infra/scripts/test_arm_neurun_acl_cl.sh
-./infra/scripts/test_arm_neurun_acl_neon.sh
-./infra/scripts/test_arm_neurun_cpu.sh
+./infra/scripts/test_ubuntu_runtime.sh --backend acl_cl --tflite-loader
+./infra/scripts/test_ubuntu_runtime.sh --backend acl_neon
+./infra/scripts/test_ubuntu_runtime.sh --backend cpu
# Enable all logs (mixed backend)
-NEURUN_LOG_ENABLE=1 GRAPH_DOT_DUMP=1 ./infra/scripts/test_arm_neurun_mixed.sh
+TENSOR_LOGGING=trace_log.txt ONERT_LOG_ENABLE=1 GRAPH_DOT_DUMP=1 ./infra/scripts/test_ubuntu_runtime_mixed.sh
# Enable trace event (acl_cl default backend)
export TRACE_FILEPATH=trace.json
-TFLiteModelVerification "acl_cl" "tests/scripts/list/neurun_frameworktest_list.armv7l.acl_cl.txt" "report/acl_cl/trace"
+TFLiteModelVerification "acl_cl" "tests/scripts/list/frameworktest_list.armv7l.acl_cl.txt" "report/acl_cl/trace"
unset TRACE_FILEPATH
# Interpreter
-./infra/scripts/test_neurun_interp.sh
+./infra/scripts/test_ubuntu_runtime_interp.sh
# nnpackage test suite
if [[ -e ${ARCHIVE_PATH}/nnpkg-test-suite.tar.gz ]]; then
diff --git a/infra/scripts/test_neurun_interp.sh b/infra/scripts/test_neurun_interp.sh
deleted file mode 100755
index 5ee212d41..000000000
--- a/infra/scripts/test_neurun_interp.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-
-set -eo pipefail
-source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
-
-export DISABLE_COMPILE=1
-CheckTestPrepared
-Unittests "cpu" "Product/out/unittest/nnapi_gtest.skip.noarch.interp" "report/interp"
-TFLiteModelVerification "cpu" "tests/scripts/list/neurun_frameworktest_list.noarch.interp.txt" "report/interp"
-
-unset DISABLE_COMPILE
diff --git a/infra/scripts/test_ubuntu_runtime.sh b/infra/scripts/test_ubuntu_runtime.sh
new file mode 100755
index 000000000..d4190bd80
--- /dev/null
+++ b/infra/scripts/test_ubuntu_runtime.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+set -eo pipefail
+source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
+
+BACKEND="cpu"
+TEST_ARCH=$(uname -m | tr '[:upper:]' '[:lower:]')
+TEST_OS="linux"
+TFLITE_LOADER="0"
+LINEAR_ONLY="0"
+
+function Usage()
+{
+ echo "Usage: $0 $(basename ${BASH_SOURCE[0]}) [OPTIONS]"
+ echo ""
+ echo "Options:"
+ echo " --backend <BACKEND> Runtime backend to test (default: ${BACKEND})"
+ echo " --tflite-loader Enable TFLite Loader test"
+ echo " --linear-only Use Linear executor only"
+}
+
+while [[ $# -gt 0 ]]
+do
+ arg="$1"
+ case $arg in
+ -h|--help|help)
+ Usage
+ exit 0
+ ;;
+ --backend)
+ BACKEND=$(echo $2 | tr '[:upper:]' '[:lower:]')
+ shift 2
+ ;;
+ --backend=*)
+ BACKEND=$(echo ${1#*=} | tr '[:upper:]' '[:lower:]')
+ shift
+ ;;
+ --tflite-loader)
+ TFLITE_LOADER="1"
+ shift
+ ;;
+ --linear-only)
+ LINEAR_ONLY="1"
+ shift
+ ;;
+ *)
+ # Ignore
+ shift
+ ;;
+ esac
+done
+
+CheckTestPrepared
+echo "[[ ${TEST_ARCH}-${TEST_OS}: ${BACKEND} backend test ]]"
+UNITTEST_SKIPLIST="Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}.${BACKEND}"
+FRAMEWORK_TESTLIST="tests/scripts/list/frameworktest_list.${TEST_ARCH}.${BACKEND}.txt"
+REPORT_BASE="report/${BACKEND}"
+EXECUTORS=("Linear" "Dataflow" "Parallel")
+if [ $LINEAR_ONLY = "1" ]; then
+ EXECUTORS=("Linear")
+fi
+
+for EXECUTOR in "${EXECUTORS[@]}";
+do
+ echo "[EXECUTOR]: ${EXECUTOR}"
+ export EXECUTOR="${EXECUTOR}"
+ Unittests "${BACKEND}" "${UNITTEST_SKIPLIST}" "${REPORT_BASE}/${EXECUTOR}"
+ TFLiteModelVerification "${BACKEND}" "${FRAMEWORK_TESTLIST}" "${REPORT_BASE}/${EXECUTOR}"
+ unset EXECUTOR
+done
+
+if [[ $TFLITE_LOADER = "1" ]]; then
+ # Test tflite_loader
+ pushd ${ROOT_PATH} > /dev/null
+ ./tests/scripts/test-driver.sh \
+ --frameworktest \
+ --framework_driverbin="$ROOT_PATH/Product/out/bin/tflite_loader_test_tool" \
+ --frameworktest_list_file=tests/scripts/list/tflite_loader_list.${TEST_ARCH}.txt \
+ --reportdir="$ROOT_PATH/report/tfliteloader" .
+
+ # Test custom op
+ ./Product/out/tests/FillFrom_runner
+ popd > /dev/null
+fi
diff --git a/infra/scripts/test_ubuntu_runtime_interp.sh b/infra/scripts/test_ubuntu_runtime_interp.sh
new file mode 100755
index 000000000..4113b0653
--- /dev/null
+++ b/infra/scripts/test_ubuntu_runtime_interp.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -eo pipefail
+source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
+
+export DISABLE_COMPILE=1
+CheckTestPrepared
+echo "[[ Interpreter test ]]"
+Unittests "cpu" "Product/out/unittest/nnapi_gtest.skip.noarch.interp" "report/interp"
+TFLiteModelVerification "cpu" "tests/scripts/list/frameworktest_list.noarch.interp.txt" "report/interp"
+
+unset DISABLE_COMPILE
diff --git a/infra/scripts/test_ubuntu_runtime_mixed.sh b/infra/scripts/test_ubuntu_runtime_mixed.sh
new file mode 100755
index 000000000..71b33deb5
--- /dev/null
+++ b/infra/scripts/test_ubuntu_runtime_mixed.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+set -eo pipefail
+source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
+
+CheckTestPrepared
+
+# TODO Get argument for mix configuration
+TEST_ARCH=$(uname -m | tr '[:upper:]' '[:lower:]')
+TEST_OS="linux"
+
+pushd ${ROOT_PATH}
+
+# NOTE Fixed backend assignment by type of operation
+# TODO Enhance this with randomized test
+BACKENDS=(acl_cl acl_neon cpu)
+
+# Get the intersect of framework test list files
+TESTLIST_PREFIX="tests/scripts/list/frameworktest_list.${TEST_ARCH}"
+SKIPLIST_PREFIX="Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}"
+sort $TESTLIST_PREFIX.${BACKENDS[0]}.txt > $TESTLIST_PREFIX.intersect.txt
+sort $SKIPLIST_PREFIX.${BACKENDS[0]} > $SKIPLIST_PREFIX.union
+for BACKEND in "${BACKENDS[@]:1}"; do
+ comm -12 <(sort $TESTLIST_PREFIX.intersect.txt) <(sort $TESTLIST_PREFIX.$BACKEND.txt) > $TESTLIST_PREFIX.intersect.next.txt
+ comm <(sort $SKIPLIST_PREFIX.union) <(sort $SKIPLIST_PREFIX.$BACKEND) | tr -d "[:blank:]" > $SKIPLIST_PREFIX.union.next
+ mv $TESTLIST_PREFIX.intersect.next.txt $TESTLIST_PREFIX.intersect.txt
+ mv $SKIPLIST_PREFIX.union.next $SKIPLIST_PREFIX.union
+done
+popd > /dev/null
+
+# Run the test
+export OP_BACKEND_Conv2D="cpu"
+export OP_BACKEND_MaxPool2D="acl_cl"
+export OP_BACKEND_AvgPool2D="acl_neon"
+export ACL_LAYOUT="NCHW"
+Unittests "acl_cl;acl_neon;cpu" "Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}.union" "report/mixed"
+TFLiteModelVerification "acl_cl;acl_neon;cpu" "${TESTLIST_PREFIX}.intersect.txt" "report/mixed"
diff --git a/infra/scripts/test_x64_neurun_cpu.sh b/infra/scripts/test_x64_neurun_cpu.sh
deleted file mode 100755
index b75578390..000000000
--- a/infra/scripts/test_x64_neurun_cpu.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-
-set -eo pipefail
-source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
-
-pushd ${ROOT_PATH} > /dev/null
-
-CheckTestPrepared
-Unittests "cpu" "Product/out/unittest/nnapi_gtest.skip.x86_64-linux" "report"
-TFLiteModelVerification "cpu" "tests/scripts/list/neurun_frameworktest_list.x86-64.cpu.txt" "report"
-
-popd > /dev/null
diff --git a/infra/scripts/tizen_xu4_test.sh b/infra/scripts/tizen_xu4_test.sh
index f3c9bd51e..6a479b789 100755
--- a/infra/scripts/tizen_xu4_test.sh
+++ b/infra/scripts/tizen_xu4_test.sh
@@ -31,9 +31,9 @@ function prepare_rpm_test()
# download tflite model files
pushd $HOST_HOME
- tests/framework/run_test.sh --download=on
+ tests/scripts/framework/run_test.sh --download=on
find tests -name "*.zip" -exec rm {} \;
- tar -zcf cache.tar.gz tests/framework/cache
+ tar -zcf cache.tar.gz tests/scripts/framework/cache
$SDB_CMD push cache.tar.gz $TEST_ROOT/.
rm -rf cache.tar.gz
$SDB_CMD shell tar -zxf $TEST_ROOT/cache.tar.gz -C $TEST_ROOT
@@ -52,9 +52,9 @@ function prepare_suite_test()
# download tflite model files
pushd $HOST_HOME
- tests/framework/run_test.sh --download=on
+ tests/scripts/framework/run_test.sh --download=on
find tests -name "*.zip" -exec rm {} \;
- tar -zcf cache.tar.gz tests/framework/cache
+ tar -zcf cache.tar.gz tests/scripts/framework/cache
$SDB_CMD push cache.tar.gz $TEST_ROOT/.
rm -rf cache.tar.gz
$SDB_CMD shell tar -zxf $TEST_ROOT/cache.tar.gz -C $TEST_ROOT
@@ -120,11 +120,11 @@ else
fi
if [ -z "${GCOV_DIR}" ]; then
- ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_arm_neurun_acl_cl.sh"
- ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_arm_neurun_acl_neon.sh"
- ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_arm_neurun_cpu.sh"
- ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_arm_neurun_mixed.sh"
- ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_neurun_interp.sh"
+ ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_cl --tflite-loader"
+ ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_neon"
+ ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend cpu"
+ ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime_mixed.sh"
+ ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime_interp.sh"
else
mkdir -p ${GCOV_DIR}
rm -rf ${GCOV_DIR}/*
@@ -136,11 +136,11 @@ else
GCOV_DATA_PATH="/opt/usr/nnfw-gcov"
# TODO For coverage check, we run acl_cl and mixed test
- ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_arm_neurun_acl_cl.sh"
- ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_arm_neurun_acl_neon.sh"
- ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_arm_neurun_cpu.sh"
- ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_arm_neurun_mixed.sh"
- ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_neurun_interp.sh"
+ ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_cl --tflite-loader"
+ ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_neon"
+ ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend cpu"
+ ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime_mixed.sh"
+ ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime_interp.sh"
# More test to check coverage
${SDB_CMD} shell "rm -rf ${GCOV_DATA_PATH} && mkdir -p ${GCOV_DATA_PATH}"
diff --git a/nnpackage/schema/circle_schema.fbs b/nnpackage/schema/circle_schema.fbs
index fe0d9447d..42bce8411 100644
--- a/nnpackage/schema/circle_schema.fbs
+++ b/nnpackage/schema/circle_schema.fbs
@@ -1,4 +1,4 @@
-// Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+// Copyright (c) 2019~2020 Samsung Electronics Co., Ltd. All Rights Reserved
// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,7 +14,18 @@
// limitations under the License.
// Revision History
-// Version 0: Initial version. Based on TensorFlow Lite v1.13.1 schema.
+//
+// Version Major.Minor
+//
+// Major version is schema version.
+// We keep schema version if it is compatible
+// Minor version is for human communication
+// It will not be stored in circle model.
+//
+// Version 0.0: Initial version. Based on TensorFlow Lite v1.13.1 schema.
+// Version 0.1: Based on TF v2.2-rc2 + more (from TensorFlow `56d281c`)
+// `BATCH_MATMUL` operator, `FLOAT64` tensor type,
+// `asymmetric_quantize_inputs` for several operator options
namespace circle;
@@ -38,6 +49,7 @@ enum TensorType : byte {
INT16 = 7,
COMPLEX64 = 8,
INT8 = 9,
+ FLOAT64 = 10,
}
// Custom quantization parameters for experimenting with new quantization
@@ -62,9 +74,119 @@ table QuantizationParameters {
scale:[float]; // For dequantizing the tensor's values.
zero_point:[long];
- // If this is not none, the quantization parameters above are ignored and the
- // value of the QuantizationDetails union below should be used.
+ // If this is not none, the other quantization parameters (i.e. min, max,
+ // scale, zero_point fields above) are ignored and the value of the
+ // QuantizationDetails union should be used.
details:QuantizationDetails;
+
+ // Specifies the dimension of the Tensor's shape that the scales and
+ // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+ // with quantization params:
+ // scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+ // will be quantized across the second dimension of t.
+ // t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+ // t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+ // t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+ quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+// 1. In what order to traverse these dimensions. For example, to store a 2-D
+// matrix in row major order, the traversal order would be (d0, d1),
+// whereas to store it in column major order, the traversal order would be
+// (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+// could be (d0, d1, d2, d3).
+// 2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+// tensor dimension in (d0, ..., dn-1).
+// 3. In the traversal order defined above, the format (dense vs. sparse) and
+// index metadata for each dimension. For a dense dimension, this is just
+// the size of that dimension. For a sparse dimension, it's the same as
+// the compressed index defined in the Compressed Sparse Row (CSR) format.
+// (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+// 1. DENSE: each coordinate in this dimension is stored implicitly.
+// 2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+// compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+ DENSE = 0,
+ SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+ values:[int];
+}
+
+table Uint16Vector {
+ values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+ values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+ Int32Vector,
+ Uint16Vector,
+ Uint8Vector
+}
+
+table DimensionMetadata {
+ // Whether a dimension is dense or sparse.
+ format:DimensionType;
+ // Index metadata used for a dimension.
+ // - If format is DimensionType.DENSE then we use the dense_size field to
+ // store the size of that dimension. Each index in that dimension is
+ // stored implicitly.
+ // - If format is DimensionType.SPARSE_CSR then we use array_segments and
+ // array_indices to encode that dimension. array_segments represents how
+ // to segment the indices array, each segment corresponds to one element
+ // in the previous dimension. array_indices represents the index of the
+ // non-zero elements within this dimension (as those in the CSR matrix
+ // format, where the first array is row pointers and the second array is
+ // column indices).
+ dense_size:int;
+ array_segments:SparseIndexVector;
+ array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+ // The traversal order of the dimensions defined in the `shape` field of the
+ // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+ // ..., dn-1),
+ // - if not block sparse, the traversal_order is just a permutation of (d0,
+ // ..., dn-1). For example, a 2-D matrix stored in row-major order would
+ // have traversal_order = (d0, d1).
+ // - if block sparse with a k-dimensional block (0 <= k <= n), the
+ // traversal_order has n + k elements. The first n elements are still a
+ // permutation of (d0, ..., dn-1). The lask k elements are a permutation
+ // of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+ // example, a 2-D matrix with 2-D blocks, both stored in row-major order
+ // would have traversal_order = (d0, d1, d2, d3).
+ traversal_order:[int];
+ // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+ // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+ // tensor dimension in (d0, ..., dn).
+ // It's stored in the order of (dn, ..., dn+k-1).
+ // If not block-sparse, this field is NULL.
+ block_map:[int];
+ // In the traversal order defined above, the metadata needed for
+ // each dimension to locate the non-zero values in the original dense tensor.
+ // The size of the dim_metadata array = the size of the traversal_order array
+ // = n + k.
+ dim_metadata:[DimensionMetadata];
}
table Tensor {
@@ -86,19 +208,28 @@ table Tensor {
quantization:QuantizationParameters; // Optional.
is_variable:bool = false;
+
+ // Parameters to encode a sparse tensor. See the example in
+ // tensorflow/lite/testdata/sparse_tensor.json.
+ sparsity:SparsityParameters; // Optional.
+
+ // Encodes `shape` with unknown dimensions. Unknown dimensions are
+ // represented with -1.
+ shape_signature:[int]; // Optional.
}
// A list of builtin operators. Builtin operators are slightly faster than custom
// ones, but not by much. Moreover, while custom operators accept an opaque
// object containing configuration parameters, builtins have a predetermined
// set of acceptable options.
+
enum BuiltinOperator : ubyte {
ADD = 0,
AVERAGE_POOL_2D = 1,
CONCATENATION = 2,
CONV_2D = 3,
DEPTHWISE_CONV_2D = 4,
- // DEPTH_TO_SPACE = 5,
+ DEPTH_TO_SPACE = 5,
DEQUANTIZE = 6,
EMBEDDING_LOOKUP = 7,
FLOOR = 8,
@@ -203,6 +334,30 @@ enum BuiltinOperator : ubyte {
MIRROR_PAD = 100,
ABS = 101,
SPLIT_V = 102,
+ UNIQUE = 103,
+ CEIL = 104,
+ REVERSE_V2 = 105,
+ ADD_N = 106,
+ GATHER_ND = 107,
+ COS = 108,
+ WHERE = 109,
+ RANK = 110,
+ ELU = 111,
+ REVERSE_SEQUENCE = 112,
+ MATRIX_DIAG = 113,
+ QUANTIZE = 114,
+ MATRIX_SET_DIAG = 115,
+ ROUND = 116,
+ HARD_SWISH = 117,
+ IF = 118,
+ WHILE = 119,
+ NON_MAX_SUPPRESSION_V4 = 120,
+ NON_MAX_SUPPRESSION_V5 = 121,
+ SCATTER_ND = 122,
+ SELECT_V2 = 123,
+ DENSIFY = 124,
+ SEGMENT_SUM = 125,
+ BATCH_MATMUL = 126,
INSTANCE_NORM = 254,
}
@@ -287,7 +442,29 @@ union BuiltinOptions {
MirrorPadOptions,
AbsOptions,
SplitVOptions,
- InstanceNormOptions,
+ UniqueOptions,
+ ReverseV2Options,
+ AddNOptions,
+ GatherNdOptions,
+ CosOptions,
+ WhereOptions,
+ RankOptions,
+ ReverseSequenceOptions,
+ MatrixDiagOptions,
+ QuantizeOptions,
+ MatrixSetDiagOptions,
+ HardSwishOptions,
+ IfOptions,
+ WhileOptions,
+ DepthToSpaceOptions,
+ NonMaxSuppressionV4Options,
+ NonMaxSuppressionV5Options,
+ ScatterNdOptions,
+ SelectV2Options,
+ DensifyOptions,
+ SegmentSumOptions,
+ BatchMatMulOptions,
+ InstanceNormOptions = 254,
}
enum Padding : byte { SAME, VALID }
@@ -324,6 +501,9 @@ table DepthwiseConv2DOptions {
padding:Padding;
stride_w:int;
stride_h:int;
+ // `depth_multiplier` is redundant. It's used by CPU kernels in
+ // TensorFlow 2.0 or below, but ignored in versions above.
+ // See comments in lite/c/builtin_op_data.h for more details.
depth_multiplier:int;
fused_activation_function:ActivationFunctionType;
// Parameters for DepthwiseConv version 2 or above.
@@ -350,17 +530,22 @@ table LSHProjectionOptions {
table SVDFOptions {
rank:int;
fused_activation_function:ActivationFunctionType;
+ // For weights-only quantization, use asymmetric quantization for non
+ // constant inputs at evaluation time.
+ asymmetric_quantize_inputs:bool;
}
// An implementation of TensorFlow RNNCell.
table RNNOptions {
fused_activation_function:ActivationFunctionType;
+ asymmetric_quantize_inputs:bool;
}
// An implementation of TensorFlow dynamic_rnn with RNNCell.
table SequenceRNNOptions {
time_major:bool;
fused_activation_function:ActivationFunctionType;
+ asymmetric_quantize_inputs:bool;
}
// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
@@ -368,6 +553,7 @@ table BidirectionalSequenceRNNOptions {
time_major:bool;
fused_activation_function:ActivationFunctionType;
merge_outputs: bool;
+ asymmetric_quantize_inputs:bool;
}
enum FullyConnectedOptionsWeightsFormat: byte {
@@ -382,6 +568,16 @@ table FullyConnectedOptions {
// Parameters for FullyConnected version 2 or above.
weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+ // Parameters for FullyConnected version 5 or above.
+ // If set to true, then the number of dimension is preserved. Furthermore,
+ // all but the last dimension of the input and output shapes will be equal.
+ keep_num_dims: bool;
+
+ // Parameters for FullyConnected version 7 or above.
+ // If set to true, then weights-only op will use asymmetric quantization for
+ // inputs.
+ asymmetric_quantize_inputs: bool;
}
table SoftmaxOptions {
@@ -430,6 +626,9 @@ table LSTMOptions {
// Parameters for LSTM version 2 or above.
// Basic kernel is only supported in version 2 or above.
kernel_type: LSTMKernelType = FULL;
+
+ // Parameters for LSTM version 4 or above.
+ asymmetric_quantize_inputs: bool;
}
// An implementation of TensorFlow dynamic_rnn with LSTMCell.
@@ -440,21 +639,35 @@ table UnidirectionalSequenceLSTMOptions {
// If true then first dimension is sequence, otherwise batch.
time_major:bool;
+
+ // Parameter for Unidirectional Sequence LSTM version 4.
+ asymmetric_quantize_inputs:bool;
}
table BidirectionalSequenceLSTMOptions {
+ // Parameters supported by version 1:
fused_activation_function:ActivationFunctionType;
cell_clip: float; // Optional, 0.0 means no clipping
proj_clip: float; // Optional, 0.0 means no clipping
// If true, store the outputs of both directions into the first output.
merge_outputs: bool;
+
+ // Parameters supported by version 2:
+ // If true then first dimension is sequence, otherwise batch.
+ // Version 1 implementations assumed time_major to be true, so this default
+ // value should never change.
+ time_major: bool = true;
+
+ // Parameters for version 3 or above.
+ asymmetric_quantize_inputs:bool;
}
table ResizeBilinearOptions {
new_height: int (deprecated);
new_width: int (deprecated);
align_corners: bool;
+ half_pixel_centers: bool;
}
table ResizeNearestNeighborOptions {
@@ -493,6 +706,10 @@ table SpaceToDepthOptions {
block_size: int;
}
+table DepthToSpaceOptions {
+ block_size: int;
+}
+
table SubOptions {
fused_activation_function:ActivationFunctionType;
}
@@ -524,6 +741,9 @@ table TransposeOptions {
table ExpOptions {
}
+table CosOptions {
+}
+
table ReducerOptions {
keep_dims: bool;
}
@@ -618,6 +838,9 @@ table ShapeOptions {
out_type : TensorType;
}
+table RankOptions {
+}
+
table PowOptions {
}
@@ -647,6 +870,9 @@ table AbsOptions {
}
+table HardSwishOptions {
+}
+
table LogicalAndOptions {
}
@@ -694,6 +920,69 @@ table MirrorPadOptions {
mode:MirrorPadMode;
}
+table UniqueOptions {
+ idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+ seq_dim:int;
+ batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+ then_subgraph_index:int;
+ else_subgraph_index:int;
+}
+
+table WhileOptions {
+ cond_subgraph_index:int;
+ body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+ adjoint_lhs:bool;
+ adjoint_rhs:bool;
+}
+
table InstanceNormOptions {
epsilon:float;
fused_activation_function:ActivationFunctionType;
@@ -732,7 +1021,7 @@ table Operator {
// complicate map lookups.
opcode_index:uint;
- // Optional input and output tensors are indicated by -1.
+ // Optional input are indicated by -1.
inputs:[int];
outputs:[int];
@@ -749,6 +1038,15 @@ table Operator {
// If the list is empty, no variable is mutated in this operator.
// The list either has the same length as `inputs`, or is empty.
mutating_variable_inputs:[bool];
+
+ // A list of indices to the subgraph's "tensors" that are internal to an Op.
+ // Internal tensors are those that do not flow in or out of the operation,
+ // but instead are part of internal computation. As such, the operation's
+ // implementation may manage its memory more efficiently. They are needed
+ // however (i.e. not just an implementation detail) since they are part of the
+ // computation, which may require relevant metadata such as quantization
+ // parameters.
+ intermediates:[int];
}
// The root type, defining a subgraph, which typically represents an entire
@@ -782,6 +1080,13 @@ table Buffer {
data:[ubyte] (force_align: 16);
}
+table Metadata {
+ // A human readable string to uniquely identify a Metadata.
+ name:string;
+ // An index to the buffers table.
+ buffer:uint;
+}
+
table Model {
// Version of the schema.
version:uint;
@@ -804,8 +1109,12 @@ table Model {
// their buffer.
buffers:[Buffer];
- // Metadata about the model. Indirects into the existings buffers list.
+ // Metadata about the model. Indirects into the existings buffers list.
+ // Deprecated, prefer to use metadata field.
metadata_buffer:[int];
+
+ // Metadata about the model.
+ metadata:[Metadata];
}
root_type Model;
diff --git a/nnpackage/schema/circle_schema_v0.fbs b/nnpackage/schema/circle_schema_v0.fbs
new file mode 100644
index 000000000..fe0d9447d
--- /dev/null
+++ b/nnpackage/schema/circle_schema_v0.fbs
@@ -0,0 +1,811 @@
+// Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version. Based on TensorFlow Lite v1.13.1 schema.
+
+namespace circle;
+
+// This corresponds to the version.
+file_identifier "CIR0";
+// File extension of any written files.
+file_extension "circle";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+ FLOAT32 = 0,
+ FLOAT16 = 1,
+ INT32 = 2,
+ UINT8 = 3,
+ INT64 = 4,
+ STRING = 5,
+ BOOL = 6,
+ INT16 = 7,
+ COMPLEX64 = 8,
+ INT8 = 9,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+ custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+ CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+ // These four parameters are the asymmetric linear quantization parameters.
+ // Given a quantized value q, the corresponding float value f should be:
+ // f = scale * (q - zero_point)
+ // For other quantization types, the QuantizationDetails below is used.
+ min:[float]; // For importing back into tensorflow.
+ max:[float]; // For importing back into tensorflow.
+ scale:[float]; // For dequantizing the tensor's values.
+ zero_point:[long];
+
+ // If this is not none, the quantization parameters above are ignored and the
+ // value of the QuantizationDetails union below should be used.
+ details:QuantizationDetails;
+}
+
+table Tensor {
+ // The tensor shape. The meaning of each entry is operator-specific but
+ // builtin ops use: [batch size, height, width, number of channels] (That's
+ // Tensorflow's NHWC).
+ shape:[int];
+ type:TensorType;
+ // An index that refers to the buffers table at the root of the model. Or,
+ // if there is no data buffer associated (i.e. intermediate results), then
+ // this is 0 (which refers to an always existent empty buffer).
+ //
+ // The data_buffer itself is an opaque container, with the assumption that the
+ // target device is little-endian. In addition, all builtin operators assume
+ // the memory is ordered such that if `shape` is [4, 3, 2], then index
+ // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+ buffer:uint;
+ name:string; // For debugging and importing back into tensorflow.
+ quantization:QuantizationParameters; // Optional.
+
+ is_variable:bool = false;
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+enum BuiltinOperator : ubyte {
+ ADD = 0,
+ AVERAGE_POOL_2D = 1,
+ CONCATENATION = 2,
+ CONV_2D = 3,
+ DEPTHWISE_CONV_2D = 4,
+ // DEPTH_TO_SPACE = 5,
+ DEQUANTIZE = 6,
+ EMBEDDING_LOOKUP = 7,
+ FLOOR = 8,
+ FULLY_CONNECTED = 9,
+ HASHTABLE_LOOKUP = 10,
+ L2_NORMALIZATION = 11,
+ L2_POOL_2D = 12,
+ LOCAL_RESPONSE_NORMALIZATION = 13,
+ LOGISTIC = 14,
+ LSH_PROJECTION = 15,
+ LSTM = 16,
+ MAX_POOL_2D = 17,
+ MUL = 18,
+ RELU = 19,
+ // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+ // since different model developers use RELU1 in different ways. Never
+ // create another op called RELU1.
+ RELU_N1_TO_1 = 20,
+ RELU6 = 21,
+ RESHAPE = 22,
+ RESIZE_BILINEAR = 23,
+ RNN = 24,
+ SOFTMAX = 25,
+ SPACE_TO_DEPTH = 26,
+ SVDF = 27,
+ TANH = 28,
+ // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
+ CONCAT_EMBEDDINGS = 29,
+ SKIP_GRAM = 30,
+ CALL = 31,
+ CUSTOM = 32,
+ EMBEDDING_LOOKUP_SPARSE = 33,
+ PAD = 34,
+ UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+ GATHER = 36,
+ BATCH_TO_SPACE_ND = 37,
+ SPACE_TO_BATCH_ND = 38,
+ TRANSPOSE = 39,
+ MEAN = 40,
+ SUB = 41,
+ DIV = 42,
+ SQUEEZE = 43,
+ UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+ STRIDED_SLICE = 45,
+ BIDIRECTIONAL_SEQUENCE_RNN = 46,
+ EXP = 47,
+ TOPK_V2 = 48,
+ SPLIT = 49,
+ LOG_SOFTMAX = 50,
+ // DELEGATE is a special op type for the operations which are delegated to
+ // other backends.
+ // WARNING: Experimental interface, subject to change
+ DELEGATE = 51,
+ BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+ CAST = 53,
+ PRELU = 54,
+ MAXIMUM = 55,
+ ARG_MAX = 56,
+ MINIMUM = 57,
+ LESS = 58,
+ NEG = 59,
+ PADV2 = 60,
+ GREATER = 61,
+ GREATER_EQUAL = 62,
+ LESS_EQUAL = 63,
+ SELECT = 64,
+ SLICE = 65,
+ SIN = 66,
+ TRANSPOSE_CONV = 67,
+ SPARSE_TO_DENSE = 68,
+ TILE = 69,
+ EXPAND_DIMS = 70,
+ EQUAL = 71,
+ NOT_EQUAL = 72,
+ LOG = 73,
+ SUM = 74,
+ SQRT = 75,
+ RSQRT = 76,
+ SHAPE = 77,
+ POW = 78,
+ ARG_MIN = 79,
+ FAKE_QUANT = 80,
+ REDUCE_PROD = 81,
+ REDUCE_MAX = 82,
+ PACK = 83,
+ LOGICAL_OR = 84,
+ ONE_HOT = 85,
+ LOGICAL_AND = 86,
+ LOGICAL_NOT = 87,
+ UNPACK = 88,
+ REDUCE_MIN = 89,
+ FLOOR_DIV = 90,
+ REDUCE_ANY = 91,
+ SQUARE = 92,
+ ZEROS_LIKE = 93,
+ FILL = 94,
+ FLOOR_MOD = 95,
+ RANGE = 96,
+ RESIZE_NEAREST_NEIGHBOR = 97,
+ LEAKY_RELU = 98,
+ SQUARED_DIFFERENCE = 99,
+ MIRROR_PAD = 100,
+ ABS = 101,
+ SPLIT_V = 102,
+ INSTANCE_NORM = 254,
+}
+
+// Options for the builtin operators.
+union BuiltinOptions {
+ Conv2DOptions,
+ DepthwiseConv2DOptions,
+ ConcatEmbeddingsOptions,
+ LSHProjectionOptions,
+ Pool2DOptions,
+ SVDFOptions,
+ RNNOptions,
+ FullyConnectedOptions,
+ SoftmaxOptions,
+ ConcatenationOptions,
+ AddOptions,
+ L2NormOptions,
+ LocalResponseNormalizationOptions,
+ LSTMOptions,
+ ResizeBilinearOptions,
+ CallOptions,
+ ReshapeOptions,
+ SkipGramOptions,
+ SpaceToDepthOptions,
+ EmbeddingLookupSparseOptions,
+ MulOptions,
+ PadOptions,
+ GatherOptions,
+ BatchToSpaceNDOptions,
+ SpaceToBatchNDOptions,
+ TransposeOptions,
+ ReducerOptions,
+ SubOptions,
+ DivOptions,
+ SqueezeOptions,
+ SequenceRNNOptions,
+ StridedSliceOptions,
+ ExpOptions,
+ TopKV2Options,
+ SplitOptions,
+ LogSoftmaxOptions,
+ CastOptions,
+ DequantizeOptions,
+ MaximumMinimumOptions,
+ ArgMaxOptions,
+ LessOptions,
+ NegOptions,
+ PadV2Options,
+ GreaterOptions,
+ GreaterEqualOptions,
+ LessEqualOptions,
+ SelectOptions,
+ SliceOptions,
+ TransposeConvOptions,
+ SparseToDenseOptions,
+ TileOptions,
+ ExpandDimsOptions,
+ EqualOptions,
+ NotEqualOptions,
+ ShapeOptions,
+ PowOptions,
+ ArgMinOptions,
+ FakeQuantOptions,
+ PackOptions,
+ LogicalOrOptions,
+ OneHotOptions,
+ LogicalAndOptions,
+ LogicalNotOptions,
+ UnpackOptions,
+ FloorDivOptions,
+ SquareOptions,
+ ZerosLikeOptions,
+ FillOptions,
+ BidirectionalSequenceLSTMOptions,
+ BidirectionalSequenceRNNOptions,
+ UnidirectionalSequenceLSTMOptions,
+ FloorModOptions,
+ RangeOptions,
+ ResizeNearestNeighborOptions,
+ LeakyReluOptions,
+ SquaredDifferenceOptions,
+ MirrorPadOptions,
+ AbsOptions,
+ SplitVOptions,
+ InstanceNormOptions,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+ NONE = 0,
+ RELU = 1,
+ RELU_N1_TO_1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ filter_width:int;
+ filter_height:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+ // Parameters for DepthwiseConv version 1 or above.
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ depth_multiplier:int;
+ fused_activation_function:ActivationFunctionType;
+ // Parameters for DepthwiseConv version 2 or above.
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+ num_channels:int;
+ num_columns_per_channel:[int];
+ embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+ UNKNOWN = 0,
+ SPARSE = 1,
+ DENSE = 2,
+}
+
+table LSHProjectionOptions {
+ type: LSHProjectionType;
+}
+
+table SVDFOptions {
+ rank:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ merge_outputs: bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+ DEFAULT = 0,
+ SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+ // Parameters for FullyConnected version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+
+ // Parameters for FullyConnected version 2 or above.
+ weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+}
+
+table SoftmaxOptions {
+ beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+ axis:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table MulOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+ radius:int;
+ bias:float;
+ alpha:float;
+ beta:float;
+}
+
+enum LSTMKernelType : byte {
+ // Full LSTM kernel which supports peephole and projection.
+ FULL = 0,
+ // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+ BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+ // Parameters for LSTM version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // Parameters for LSTM version 2 or above.
+ // Basic kernel is only supported in version 2 or above.
+ kernel_type: LSTMKernelType = FULL;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true then first dimension is sequence, otherwise batch.
+ time_major:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true, store the outputs of both directions into the first output.
+ merge_outputs: bool;
+}
+
+table ResizeBilinearOptions {
+ new_height: int (deprecated);
+ new_width: int (deprecated);
+ align_corners: bool;
+}
+
+table ResizeNearestNeighborOptions {
+ align_corners: bool;
+}
+
+// A call operation options
+table CallOptions {
+ // The subgraph index that needs to be called.
+ subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+ new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+ ngram_size: int;
+ max_skip_size: int;
+ include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+ block_size: int;
+}
+
+table SubOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DivOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+ SUM = 0,
+ MEAN = 1,
+ SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+ combiner:CombinerType;
+}
+
+table GatherOptions {
+ axis: int;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table ReducerOptions {
+ keep_dims: bool;
+}
+
+table SqueezeOptions {
+ squeeze_dims:[int];
+}
+
+table SplitOptions {
+ num_splits: int;
+}
+
+table SplitVOptions {
+ num_splits: int;
+}
+
+table StridedSliceOptions {
+ begin_mask: int;
+ end_mask: int;
+ ellipsis_mask: int;
+ new_axis_mask: int;
+ shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+ in_data_type: TensorType;
+ out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+ output_type : TensorType;
+}
+
+table ArgMinOptions {
+ output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+ validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+ // Optional output type of the operation (int32 or int64). Defaults to int32.
+ out_type : TensorType;
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+ // Parameters supported by version 1:
+ min:float;
+ max:float;
+ num_bits:int;
+
+ // Parameters supported by version 2:
+ narrow_range:bool;
+}
+
+table PackOptions {
+ values_count:int;
+ axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+ axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+ num:int;
+ axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+ alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+ // Doesn't include borders.
+ REFLECT = 0,
+ // Includes borders.
+ SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+ mode:MirrorPadMode;
+}
+
+table InstanceNormOptions {
+ epsilon:float;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+ builtin_code:BuiltinOperator;
+ custom_code:string;
+
+ // The version of the operator. The version need to be bumped whenever new
+ // parameters are introduced into an op.
+ version:int = 1;
+}
+
+enum CustomOptionsFormat : byte {
+ FLEXBUFFERS = 0,
+}
+
+enum DataFormat : byte {
+ // For 2D data, NHWC(batch, height, width, channels)
+ // For 3D data, NDHWC(batch, depth, height, width, channels)
+ CHANNELS_LAST = 0,
+ // For 2D data, NCHW(batch, channels, height, width)
+ // For 3D data, NCDHW(batch, channels, depth, height, width)
+ CHANNELS_FIRST = 1,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+ // Index into the operator_codes array. Using an integer here avoids
+ // complicate map lookups.
+ opcode_index:uint;
+
+ // Optional input and output tensors are indicated by -1.
+ inputs:[int];
+ outputs:[int];
+
+ builtin_options:BuiltinOptions;
+ custom_options:[ubyte];
+ custom_options_format:CustomOptionsFormat;
+
+ // A list of booleans indicating the input tensors which are being mutated by
+ // this operator.(e.g. used by RNN and LSTM).
+ // For example, if the "inputs" array refers to 5 tensors and the second and
+ // fifth are mutable variables, then this list will contain
+ // [false, true, false, false, true].
+ //
+ // If the list is empty, no variable is mutated in this operator.
+ // The list either has the same length as `inputs`, or is empty.
+ mutating_variable_inputs:[bool];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+ // A list of all tensors used in this subgraph.
+ tensors:[Tensor];
+
+ // Indices of the tensors that are inputs into this subgraph. Note this is
+ // the list of non-static tensors that feed into the subgraph for inference.
+ inputs:[int];
+
+ // Indices of the tensors that are outputs out of this subgraph. Note this is
+ // the list of output tensors that are considered the product of the
+ // subgraph's inference.
+ outputs:[int];
+
+ // All operators, in execution order.
+ operators:[Operator];
+
+ // Name of this subgraph (used for debugging).
+ name:string;
+
+ // Data format for input/output of SubGraph
+ data_format: DataFormat;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+ data:[ubyte] (force_align: 16);
+}
+
+table Model {
+ // Version of the schema.
+ version:uint;
+
+ // A list of all operator codes used in this model. This is
+ // kept in order because operators carry an index into this
+ // vector.
+ operator_codes:[OperatorCode];
+
+ // All the subgraphs of the model. The 0th is assumed to be the main
+ // model.
+ subgraphs:[SubGraph];
+
+ // A description of the model.
+ description:string;
+
+ // Buffers of the model.
+ // Note the 0th entry of this array must be an empty buffer (sentinel).
+ // This is a convention so that tensors without a buffer can provide 0 as
+ // their buffer.
+ buffers:[Buffer];
+
+ // Metadata about the model. Indirects into the existings buffers list.
+ metadata_buffer:[int];
+}
+
+root_type Model;
diff --git a/nnpackage/spec/30_custom_op.md b/nnpackage/spec/30_custom_op.md
index 58e0acddb..504695fdf 100644
--- a/nnpackage/spec/30_custom_op.md
+++ b/nnpackage/spec/30_custom_op.md
@@ -60,7 +60,7 @@ FillFrom
└── MANIFEST
```
-All custom operator libraries are put under `{nnpackage_root}/custom_op/lib{customop_name}.{arch}-{os}-{buildtype}.a`.
+All custom operator libraries are put under `{nnpackage_root}/custom_op/lib{customop_name}.{arch}-{os}.{buildtype}.a`.
## How to use custom op in app
diff --git a/packaging/eigen.tar.gz b/packaging/eigen.tar.gz
new file mode 100644
index 000000000..65337b4d2
--- /dev/null
+++ b/packaging/eigen.tar.gz
Binary files differ
diff --git a/packaging/gemmlowp.tar.gz b/packaging/gemmlowp.tar.gz
new file mode 100644
index 000000000..68339cdb1
--- /dev/null
+++ b/packaging/gemmlowp.tar.gz
Binary files differ
diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec
index 9d53d4c42..e12632d8e 100644
--- a/packaging/nnfw.spec
+++ b/packaging/nnfw.spec
@@ -1,6 +1,6 @@
Name: nnfw
Summary: nnfw
-Version: 1.1.0
+Version: 1.4.0
Release: 1
Group: Development
License: Apache-2.0 and MIT and BSD-2-Clause
@@ -10,14 +10,19 @@ Source1: %{name}.manifest
Source1001: flatbuffers.tar.gz
Source1002: nnapi_test_generated.tar.gz
Source1003: gtest.tar.gz
+Source1004: eigen.tar.gz
+Source1005: gemmlowp.tar.gz
Source2001: nnfw.pc.in
+%{!?build_type: %define build_type Release}
+%{!?coverage_build: %define coverage_build 0}
+%{!?test_build: %define test_build 1}
+%{!?extra_option: %define extra_option %{nil}}
+%if %{coverage_build} == 1
+%define test_build 1
+%endif
+
BuildRequires: cmake
-BuildRequires: boost-devel
-BuildRequires: tensorflow-lite-devel
-BuildRequires: hdf5-devel
-BuildRequires: libaec-devel
-BuildRequires: zlib-devel
%ifarch %{arm} aarch64
# Require python for acl-ex library build pre-process
@@ -28,9 +33,14 @@ BuildRequires: libarmcl-devel
Requires(post): /sbin/ldconfig
Requires(postun): /sbin/ldconfig
-%{!?build_type: %define build_type Release}
-%{!?coverage_build: %define coverage_build 0}
-%{!?extra_option: %define extra_option %{nil}}
+%if %{test_build} == 1
+BuildRequires: boost-devel
+BuildRequires: tensorflow-lite-devel
+BuildRequires: hdf5-devel
+BuildRequires: libaec-devel
+BuildRequires: zlib-devel
+BuildRequires: libjpeg-devel
+%endif
%description
nnfw is a high-performance, on-device neural network framework for Tizen
@@ -42,11 +52,13 @@ Requires: %{name} = %{version}-%{release}
%description devel
NNFW devel package.
+%if %{test_build} == 1
%package test
Summary: NNFW Test
%description test
NNFW test rpm. It does not depends on nnfw rpm since it contains nnfw runtime.
+%endif
%ifarch %{arm}
%define target_arch armv7l
@@ -61,20 +73,21 @@ NNFW test rpm. It does not depends on nnfw rpm since it contains nnfw runtime.
%define install_dir %{_prefix}
%define install_path %{buildroot}%{install_dir}
%define build_env NNFW_WORKSPACE=build
-%define build_options -DCMAKE_BUILD_TYPE=%{build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen
+%define build_options -DCMAKE_BUILD_TYPE=%{build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen -DENABLE_TEST=off
# Set option for test build (and coverage test build)
%define test_install_home /opt/usr/nnfw-test
%define test_install_dir %{test_install_home}/Product/out
%define test_install_path %{buildroot}%{test_install_dir}
%define coverage_option %{nil}
-%define test_suite_list infra/scripts tests/scripts tests/framework
+%define test_suite_list infra/scripts tests/scripts
+%define test_build_type %{build_type}
%if %{coverage_build} == 1
%define coverage_option -DENABLE_COVERAGE=ON
%define test_build_type Debug
%endif
%define test_build_env NNFW_INSTALL_PREFIX=%{test_install_path} NNFW_WORKSPACE=build_for_test
-%define test_build_options %{coverage_option} -DCMAKE_BUILD_TYPE=%{test_build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen -DENVVAR_NEURUN_CONFIG=ON
+%define test_build_options %{coverage_option} -DCMAKE_BUILD_TYPE=%{test_build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen -DENVVAR_ONERT_CONFIG=ON
%prep
%setup -q
@@ -83,6 +96,8 @@ mkdir ./externals
tar -xf %{SOURCE1001} -C ./externals
tar -xf %{SOURCE1002} -C ./tests/nnapi/src/
tar -xf %{SOURCE1003} -C ./externals
+tar -xf %{SOURCE1004} -C ./externals
+tar -xf %{SOURCE1005} -C ./externals
%build
%ifarch arm armv7l aarch64
@@ -93,23 +108,25 @@ tar -xf %{SOURCE1003} -C ./externals
# TODO Set install path
%{build_env} ./nnfw install
+%if %{test_build} == 1
# test runtime
# TODO remove duplicated build process
%{test_build_env} ./nnfw configure %{test_build_options} %{extra_option}
%{test_build_env} ./nnfw build
%if %{coverage_build} == 1
pwd > tests/scripts/build_path.txt
-%endif
-tar -zcf test-suite.tar.gz infra/scripts tests/scripts tests/framework
-%endif
+%endif # coverage_build
+tar -zcf test-suite.tar.gz infra/scripts tests/scripts
+%endif # test_build
+%endif # arm armv7l aarch64
%install
%ifarch arm armv7l aarch64
mkdir -p %{buildroot}%{_libdir}
-mkdir -p %{buildroot}%{_includedir}
+mkdir -p %{buildroot}%{_includedir}/nnfw
install -m 644 build/out/lib/*.so %{buildroot}%{_libdir}
-cp -r build/out/include/* %{buildroot}%{_includedir}/
+cp -r build/out/include/nnfw/* %{buildroot}%{_includedir}/nnfw/
# For developer
cp %{SOURCE2001} .
@@ -118,16 +135,18 @@ sed -i 's:@libdir@:%{_libdir}:g
mkdir -p %{buildroot}%{_libdir}/pkgconfig
install -m 0644 ./nnfw.pc.in %{buildroot}%{_libdir}/pkgconfig/nnfw.pc
+%if %{test_build} == 1
%{test_build_env} ./nnfw install
# Share test script with ubuntu (ignore error if there is no list for target)
cp tests/nnapi/nnapi_gtest.skip.* %{buildroot}%{test_install_dir}/unittest/.
-cp %{buildroot}%{test_install_dir}/unittest/nnapi_gtest.skip %{buildroot}%{test_install_dir}/unittest/nnapi_gtest.skip.%{target_arch}-linux || true
+cp %{buildroot}%{test_install_dir}/unittest/nnapi_gtest.skip.%{target_arch}-linux.cpu %{buildroot}%{test_install_dir}/unittest/nnapi_gtest.skip
tar -zxf test-suite.tar.gz -C %{buildroot}%{test_install_home}
%if %{coverage_build} == 1
mkdir -p %{buildroot}%{test_install_home}/gcov
find . -name "*.gcno" -exec xargs cp {} %{buildroot}%{test_install_home}/gcov/. \;
-%endif
+%endif # coverage_build
+%endif # test_build
%endif
@@ -150,6 +169,7 @@ find . -name "*.gcno" -exec xargs cp {} %{buildroot}%{test_install_home}/gcov/.
%{_libdir}/pkgconfig/nnfw.pc
%endif
+%if %{test_build} == 1
%files test
%manifest %{name}.manifest
%defattr(-,root,root,-)
@@ -157,7 +177,8 @@ find . -name "*.gcno" -exec xargs cp {} %{buildroot}%{test_install_home}/gcov/.
%dir %{test_install_home}
%{test_install_home}/*
%exclude %{_libdir}/debug
-%endif
+%endif # arm armv7l aarch64
+%endif # test_build
%changelog
* Thu Mar 15 2018 Chunseok Lee <chunseok.lee@samsung.com>
diff --git a/res/ONNXTests/UNIT_Gemm_000/test.pbtxt b/res/ONNXTests/UNIT_Gemm_000/test.pbtxt
new file mode 100644
index 000000000..6fd497f89
--- /dev/null
+++ b/res/ONNXTests/UNIT_Gemm_000/test.pbtxt
@@ -0,0 +1,79 @@
+# This testcase is compatible with ONNX 1.4.1 or newer
+ir_version: 5
+
+opset_import {
+ version: 7
+}
+
+graph {
+ name: "Gemm_000"
+
+ node {
+ input: "input"
+ input: "weight"
+ input: "bias"
+ output: "output"
+ op_type: "Gemm"
+ attribute {
+ name: "alpha"
+ f: 1.5
+ type: FLOAT
+ }
+ attribute {
+ name: "beta"
+ f: 1.5
+ type: FLOAT
+ }
+ attribute {
+ name: "transA"
+ i: 1
+ type: INT
+ }
+ }
+
+# Initializers generated by python helper script:
+# a = np.ones((2,2), dtype = np.float32)
+# onnx.numpy_helper.from_array(a)
+#
+ initializer {
+ dims: 3
+ dims: 2
+ data_type: 1
+ name: "weight"
+ raw_data: "\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?"
+ }
+
+ initializer {
+ dims: 2
+ dims: 2
+ data_type: 1
+ name: "bias"
+ raw_data: "\000\000\200?\000\000\200?\000\000\200?\000\000\200?"
+ }
+
+ input {
+ name: "input"
+ type {
+ tensor_type {
+ elem_type: 1 # FLOAT type
+ shape {
+ dim { dim_value: 3 }
+ dim { dim_value: 2 }
+ }
+ }
+ }
+ }
+
+ output {
+ name: "output"
+ type {
+ tensor_type {
+ elem_type: 1 # FLOAT type
+ shape {
+ dim { dim_value: 2 }
+ dim { dim_value: 2 }
+ }
+ }
+ }
+ }
+}
diff --git a/res/ONNXTests/UNIT_Gemm_001/test.pbtxt b/res/ONNXTests/UNIT_Gemm_001/test.pbtxt
new file mode 100644
index 000000000..0371fc250
--- /dev/null
+++ b/res/ONNXTests/UNIT_Gemm_001/test.pbtxt
@@ -0,0 +1,70 @@
+# This testcase is compatible with ONNX 1.4.1 or newer
+ir_version: 5
+
+opset_import {
+ version: 11
+}
+
+graph {
+ name: "Gemm_001"
+
+ node {
+ input: "input"
+ input: "weight"
+ output: "output"
+ op_type: "Gemm"
+ attribute {
+ name: "alpha"
+ f: 1.5
+ type: FLOAT
+ }
+ attribute {
+ name: "beta"
+ f: 1.5
+ type: FLOAT
+ }
+ attribute {
+ name: "transA"
+ i: 1
+ type: INT
+ }
+ }
+
+# Initializers generated by python helper script:
+# a = np.ones((2,2), dtype = np.float32)
+# onnx.numpy_helper.from_array(a)
+#
+ initializer {
+ dims: 3
+ dims: 2
+ data_type: 1
+ name: "weight"
+ raw_data: "\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?"
+ }
+
+ input {
+ name: "input"
+ type {
+ tensor_type {
+ elem_type: 1 # FLOAT type
+ shape {
+ dim { dim_value: 3 }
+ dim { dim_value: 2 }
+ }
+ }
+ }
+ }
+
+ output {
+ name: "output"
+ type {
+ tensor_type {
+ elem_type: 1 # FLOAT type
+ shape {
+ dim { dim_value: 2 }
+ dim { dim_value: 2 }
+ }
+ }
+ }
+ }
+}
diff --git a/res/TensorFlowLiteRecipes/Abs_000/test.recipe b/res/TensorFlowLiteRecipes/Abs_000/test.recipe
new file mode 100644
index 000000000..0603a43fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Abs_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Abs"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Abs_000/test.reverse b/res/TensorFlowLiteRecipes/Abs_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Abs_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Add_000/test.recipe b/res/TensorFlowLiteRecipes/Add_000/test.recipe
new file mode 100644
index 000000000..54018446a
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Add_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+ type: "Add"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Add_000/test.reverse b/res/TensorFlowLiteRecipes/Add_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Add_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Add_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Add_U8_000/test.recipe
new file mode 100644
index 000000000..f4ccc3cc8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Add_U8_000/test.recipe
@@ -0,0 +1,30 @@
+operand {
+ name: "ifm1"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "ifm2"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+ type: "Add"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Add_U8_000/test.reverse b/res/TensorFlowLiteRecipes/Add_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Add_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ArgMax_000/test.recipe b/res/TensorFlowLiteRecipes/ArgMax_000/test.recipe
new file mode 100644
index 000000000..2883e1853
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_000/test.recipe
@@ -0,0 +1,30 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 4 }
+}
+operand {
+ name: "ofm"
+ type: INT64
+ shape { }
+}
+operand {
+ name: "argmax/dim"
+ type: INT32
+ shape { }
+ filler {
+ tag: "explicit"
+ arg: "0"
+ }
+}
+operation {
+ type: "ArgMax"
+ argmax_options {
+ output_type: INT64
+ }
+ input: "ifm"
+ input: "argmax/dim"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ArgMax_000/test.reverse b/res/TensorFlowLiteRecipes/ArgMax_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ArgMax_001/test.recipe b/res/TensorFlowLiteRecipes/ArgMax_001/test.recipe
new file mode 100644
index 000000000..1f3961cae
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_001/test.recipe
@@ -0,0 +1,30 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 4 dim: 5 }
+}
+operand {
+ name: "ofm"
+ type: INT64
+ shape { dim: 5 }
+}
+operand {
+ name: "argmax/dim"
+ type: INT32
+ shape { }
+ filler {
+ tag: "explicit"
+ arg: "0"
+ }
+}
+operation {
+ type: "ArgMax"
+ argmax_options {
+ output_type: INT64
+ }
+ input: "ifm"
+ input: "argmax/dim"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ArgMax_001/test.reverse b/res/TensorFlowLiteRecipes/ArgMax_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ArgMax_002/test.recipe b/res/TensorFlowLiteRecipes/ArgMax_002/test.recipe
new file mode 100644
index 000000000..56d951f3d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_002/test.recipe
@@ -0,0 +1,30 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 4 dim: 5 }
+}
+operand {
+ name: "ofm"
+ type: INT64
+ shape { dim: 4 }
+}
+operand {
+ name: "argmax/dim"
+ type: INT32
+ shape { }
+ filler {
+ tag: "explicit"
+ arg: "1"
+ }
+}
+operation {
+ type: "ArgMax"
+ argmax_options {
+ output_type: INT64
+ }
+ input: "ifm"
+ input: "argmax/dim"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ArgMax_002/test.reverse b/res/TensorFlowLiteRecipes/ArgMax_002/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_002/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ArgMax_003/test.recipe b/res/TensorFlowLiteRecipes/ArgMax_003/test.recipe
new file mode 100644
index 000000000..cb34e3824
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_003/test.recipe
@@ -0,0 +1,30 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 4 dim: 5 dim: 6 }
+}
+operand {
+ name: "ofm"
+ type: INT64
+ shape { dim: 4 dim: 6 }
+}
+operand {
+ name: "argmax/dim"
+ type: INT32
+ shape { }
+ filler {
+ tag: "explicit"
+ arg: "1"
+ }
+}
+operation {
+ type: "ArgMax"
+ argmax_options {
+ output_type: INT64
+ }
+ input: "ifm"
+ input: "argmax/dim"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ArgMax_003/test.reverse b/res/TensorFlowLiteRecipes/ArgMax_003/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_003/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ArgMax_U8_000/test.recipe b/res/TensorFlowLiteRecipes/ArgMax_U8_000/test.recipe
new file mode 100644
index 000000000..12e2b250b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_U8_000/test.recipe
@@ -0,0 +1,31 @@
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 4 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: INT64
+ shape { }
+}
+operand {
+ name: "argmax/dim"
+ type: INT32
+ shape { }
+ filler {
+ tag: "explicit"
+ arg: "0"
+ }
+}
+operation {
+ type: "ArgMax"
+ argmax_options {
+ output_type: INT64
+ }
+ input: "ifm"
+ input: "argmax/dim"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ArgMax_U8_000/test.reverse b/res/TensorFlowLiteRecipes/ArgMax_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ArgMax_U8_001/test.recipe b/res/TensorFlowLiteRecipes/ArgMax_U8_001/test.recipe
new file mode 100644
index 000000000..78a519304
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_U8_001/test.recipe
@@ -0,0 +1,31 @@
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 4 dim: 5 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: INT64
+ shape { dim: 5 }
+}
+operand {
+ name: "argmax/dim"
+ type: INT32
+ shape { }
+ filler {
+ tag: "explicit"
+ arg: "0"
+ }
+}
+operation {
+ type: "ArgMax"
+ argmax_options {
+ output_type: INT64
+ }
+ input: "ifm"
+ input: "argmax/dim"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ArgMax_U8_001/test.reverse b/res/TensorFlowLiteRecipes/ArgMax_U8_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_U8_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ArgMax_U8_002/test.recipe b/res/TensorFlowLiteRecipes/ArgMax_U8_002/test.recipe
new file mode 100644
index 000000000..3f1e5ec53
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_U8_002/test.recipe
@@ -0,0 +1,31 @@
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 4 dim: 5 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: INT64
+ shape { dim: 4 }
+}
+operand {
+ name: "argmax/dim"
+ type: INT32
+ shape { }
+ filler {
+ tag: "explicit"
+ arg: "1"
+ }
+}
+operation {
+ type: "ArgMax"
+ argmax_options {
+ output_type: INT64
+ }
+ input: "ifm"
+ input: "argmax/dim"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ArgMax_U8_002/test.reverse b/res/TensorFlowLiteRecipes/ArgMax_U8_002/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_U8_002/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ArgMax_U8_003/test.recipe b/res/TensorFlowLiteRecipes/ArgMax_U8_003/test.recipe
new file mode 100644
index 000000000..2ef292045
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_U8_003/test.recipe
@@ -0,0 +1,31 @@
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 4 dim: 5 dim: 6 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: INT64
+ shape { dim: 4 dim: 6 }
+}
+operand {
+ name: "argmax/dim"
+ type: INT32
+ shape { }
+ filler {
+ tag: "explicit"
+ arg: "1"
+ }
+}
+operation {
+ type: "ArgMax"
+ argmax_options {
+ output_type: INT64
+ }
+ input: "ifm"
+ input: "argmax/dim"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ArgMax_U8_003/test.reverse b/res/TensorFlowLiteRecipes/ArgMax_U8_003/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_U8_003/test.reverse
diff --git a/res/TensorFlowLiteRecipes/AveragePool2D_000/test.recipe b/res/TensorFlowLiteRecipes/AveragePool2D_000/test.recipe
new file mode 100644
index 000000000..746c34334
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/AveragePool2D_000/test.recipe
@@ -0,0 +1,24 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+}
+operation {
+ type: "AveragePool2D"
+ averagepool2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ filter_width: 2
+ filter_height: 2
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/AveragePool2D_000/test.reverse b/res/TensorFlowLiteRecipes/AveragePool2D_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/AveragePool2D_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/BatchToSpaceND_000/test.recipe b/res/TensorFlowLiteRecipes/BatchToSpaceND_000/test.recipe
new file mode 100644
index 000000000..3d7c28cb0
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/BatchToSpaceND_000/test.recipe
@@ -0,0 +1,38 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 4 dim: 1 dim: 1 dim: 1 }
+}
+operand {
+ name: "crops"
+ type: INT32
+ shape { dim: 2 dim: 2 }
+ filler {
+ tag: "explicit"
+ arg: "0" arg: "0"
+ arg: "0" arg: "0"
+ }
+}
+operand {
+ name: "block_shape"
+ type: INT32
+ shape { dim: 2 }
+ filler {
+ tag: "explicit"
+ arg: "2" arg: "2"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 2 dim: 1 }
+}
+operation {
+ type: "BatchToSpaceND"
+ input: "ifm"
+ input: "block_shape"
+ input: "crops"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/BatchToSpaceND_000/test.reverse b/res/TensorFlowLiteRecipes/BatchToSpaceND_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/BatchToSpaceND_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Concatenation_000/test.recipe b/res/TensorFlowLiteRecipes/Concatenation_000/test.recipe
new file mode 100644
index 000000000..35641bd07
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Concatenation_000/test.recipe
@@ -0,0 +1,28 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 1 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+ type: "Concatenation"
+ concatenation_options {
+ axis: 3
+ activation: NONE
+ }
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Concatenation_000/test.reverse b/res/TensorFlowLiteRecipes/Concatenation_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Concatenation_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Concatenation_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Concatenation_U8_000/test.recipe
new file mode 100644
index 000000000..3ae21e356
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Concatenation_U8_000/test.recipe
@@ -0,0 +1,31 @@
+operand {
+ name: "ifm1"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 1 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "ifm2"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 2 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+ type: "Concatenation"
+ concatenation_options {
+ axis: 3
+ activation: NONE
+ }
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Conv2D_000/test.recipe b/res/TensorFlowLiteRecipes/Conv2D_000/test.recipe
new file mode 100644
index 000000000..9cf8a0f69
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Conv2D_000/test.recipe
@@ -0,0 +1,44 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Conv2D_000/test.reverse b/res/TensorFlowLiteRecipes/Conv2D_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Conv2D_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Conv2D_001/test.recipe b/res/TensorFlowLiteRecipes/Conv2D_001/test.recipe
new file mode 100644
index 000000000..bc41a3fc0
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Conv2D_001/test.recipe
@@ -0,0 +1,44 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "constant"
+ arg: "1.1"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+input: "ker"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Conv2D_001/test.reverse b/res/TensorFlowLiteRecipes/Conv2D_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Conv2D_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Conv2D_002/test.recipe b/res/TensorFlowLiteRecipes/Conv2D_002/test.recipe
new file mode 100644
index 000000000..1901ead3b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Conv2D_002/test.recipe
@@ -0,0 +1,45 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 1 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 1 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 2 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: SAME
+ stride_w: 2
+ stride_h: 2
+ activation: RELU
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Conv2D_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Conv2D_U8_000/test.recipe
new file mode 100644
index 000000000..9a8e47853
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Conv2D_U8_000/test.recipe
@@ -0,0 +1,48 @@
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ quant { min: 0 max: 1 scale: 0.004 zero_point: 0 }
+}
+operand {
+ name: "ker"
+ type: UINT8
+ shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+ filler {
+ tag: "gaussian"
+ arg: "102"
+ arg: "32"
+ }
+ quant { min: -4 max: 6 scale: 0.039215686 zero_point: 102 }
+}
+operand {
+ name: "bias"
+ type: INT32
+ shape { dim: 1 }
+ filler {
+ tag: "gaussian"
+ arg: "0"
+ arg: "1024"
+ }
+ quant { scale: 0.00015686276310589164 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+ quant { min: -4 max: 6 scale: 0.039215686 zero_point: 102 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Cos_000/test.recipe b/res/TensorFlowLiteRecipes/Cos_000/test.recipe
new file mode 100644
index 000000000..6fa8ac9b8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Cos_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Cos"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Cos_000/test.reverse b/res/TensorFlowLiteRecipes/Cos_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Cos_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/DepthwiseConv2D_000/test.recipe b/res/TensorFlowLiteRecipes/DepthwiseConv2D_000/test.recipe
new file mode 100644
index 000000000..17a3b06c7
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/DepthwiseConv2D_000/test.recipe
@@ -0,0 +1,41 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 8 }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 8 }
+ filler {
+ tag: "constant"
+ arg: "1.1"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operation {
+ type: "DepthwiseConv2D"
+ depthwiseconv2d_options {
+ padding: SAME
+ stride_w: 1
+ stride_h: 1
+ depth_multiplier: 1
+ activation : RELU6
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+input: "ker"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/DepthwiseConv2D_000/test.reverse b/res/TensorFlowLiteRecipes/DepthwiseConv2D_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/DepthwiseConv2D_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/DepthwiseConv2D_U8_000/test.recipe b/res/TensorFlowLiteRecipes/DepthwiseConv2D_U8_000/test.recipe
new file mode 100644
index 000000000..9a4ddf155
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/DepthwiseConv2D_U8_000/test.recipe
@@ -0,0 +1,46 @@
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "ker"
+ type: UINT8
+ shape { dim: 1 dim: 3 dim: 3 dim: 8 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "bias"
+ type: INT32
+ shape { dim: 8 }
+ filler {
+ tag: "gaussian"
+ arg: "0"
+ arg: "1024"
+ }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+ type: "DepthwiseConv2D"
+ depthwiseconv2d_options {
+ padding: SAME
+ stride_w: 1
+ stride_h: 1
+ depth_multiplier: 1
+ activation : RELU6
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+input: "ker"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/DepthwiseConv2D_U8_000/test.reverse b/res/TensorFlowLiteRecipes/DepthwiseConv2D_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/DepthwiseConv2D_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Div_000/test.recipe b/res/TensorFlowLiteRecipes/Div_000/test.recipe
new file mode 100644
index 000000000..4fb76d467
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Div_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+ type: "Div"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+ div_options {
+ activation: NONE
+ }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Div_000/test.reverse b/res/TensorFlowLiteRecipes/Div_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Div_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Equal_000/test.recipe b/res/TensorFlowLiteRecipes/Equal_000/test.recipe
new file mode 100644
index 000000000..dcc81f9f5
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Equal_000/test.recipe
@@ -0,0 +1,26 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ofm"
+ type: BOOL
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+ type: "Equal"
+ equal_options {
+ }
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Equal_000/test.reverse b/res/TensorFlowLiteRecipes/Equal_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Equal_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Exp_000/test.recipe b/res/TensorFlowLiteRecipes/Exp_000/test.recipe
new file mode 100644
index 000000000..06e0054db
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Exp_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Exp"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Exp_000/test.reverse b/res/TensorFlowLiteRecipes/Exp_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Exp_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_000/test.recipe b/res/TensorFlowLiteRecipes/FullyConnected_000/test.recipe
new file mode 100644
index 000000000..dca4c09f0
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_000/test.recipe
@@ -0,0 +1,34 @@
+operand {
+ name: "in"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 }
+}
+operand {
+ name: "weight"
+ type: FLOAT32
+ shape { dim: 8 dim: 64 }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 8 }
+}
+operand {
+ name: "out"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 }
+}
+operation {
+ type: "FullyConnected"
+ fullyconnected_options {
+ activation: NONE
+ }
+ input: "in"
+ input: "weight"
+ input: "bias"
+ output: "out"
+}
+input: "in"
+input: "weight"
+input: "bias"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_000/test.reverse b/res/TensorFlowLiteRecipes/FullyConnected_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_001/test.recipe b/res/TensorFlowLiteRecipes/FullyConnected_001/test.recipe
new file mode 100644
index 000000000..e404f759f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_001/test.recipe
@@ -0,0 +1,34 @@
+operand {
+ name: "in"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 4 }
+}
+operand {
+ name: "weight"
+ type: FLOAT32
+ shape { dim: 2 dim: 4 }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 2 }
+}
+operand {
+ name: "out"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 }
+}
+operation {
+ type: "FullyConnected"
+ fullyconnected_options {
+ activation: NONE
+ }
+ input: "in"
+ input: "weight"
+ input: "bias"
+ output: "out"
+}
+input: "in"
+input: "weight"
+input: "bias"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_001/test.reverse b/res/TensorFlowLiteRecipes/FullyConnected_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_U8_000/test.recipe b/res/TensorFlowLiteRecipes/FullyConnected_U8_000/test.recipe
new file mode 100644
index 000000000..3c996218f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_U8_000/test.recipe
@@ -0,0 +1,35 @@
+operand {
+ name: "in"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 }
+}
+operand {
+ name: "weight"
+ type: UINT8
+ shape { dim: 8 dim: 64 }
+ quant { min: 0 max: 1 scale: 0.000553869 zero_point: 0 }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 8 }
+}
+operand {
+ name: "out"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 }
+}
+operation {
+ type: "FullyConnected"
+ fullyconnected_options {
+ activation: NONE
+ }
+ input: "in"
+ input: "weight"
+ input: "bias"
+ output: "out"
+}
+input: "in"
+input: "weight"
+input: "bias"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_U8_000/test.reverse b/res/TensorFlowLiteRecipes/FullyConnected_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/LogicalNot_000/test.recipe b/res/TensorFlowLiteRecipes/LogicalNot_000/test.recipe
new file mode 100644
index 000000000..da02bd9af
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/LogicalNot_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+ name: "ifm"
+ type: BOOL
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: BOOL
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "LogicalNot"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/LogicalNot_000/test.reverse b/res/TensorFlowLiteRecipes/LogicalNot_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/LogicalNot_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/LogicalOr_000/test.recipe b/res/TensorFlowLiteRecipes/LogicalOr_000/test.recipe
new file mode 100644
index 000000000..636c8b857
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/LogicalOr_000/test.recipe
@@ -0,0 +1,24 @@
+operand {
+ name: "ifm1"
+ type: BOOL
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ifm2"
+ type: BOOL
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ofm"
+ type: BOOL
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+ type: "LogicalOr"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/LogicalOr_000/test.reverse b/res/TensorFlowLiteRecipes/LogicalOr_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/LogicalOr_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/MaxPool2D_000/test.recipe b/res/TensorFlowLiteRecipes/MaxPool2D_000/test.recipe
new file mode 100644
index 000000000..718630f08
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/MaxPool2D_000/test.recipe
@@ -0,0 +1,24 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+}
+operation {
+ type: "MaxPool2D"
+ maxpool2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ filter_width: 2
+ filter_height: 2
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/MaxPool2D_000/test.reverse b/res/TensorFlowLiteRecipes/MaxPool2D_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/MaxPool2D_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/MaxPool2D_U8_000/test.recipe b/res/TensorFlowLiteRecipes/MaxPool2D_U8_000/test.recipe
new file mode 100644
index 000000000..a736988e9
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/MaxPool2D_U8_000/test.recipe
@@ -0,0 +1,26 @@
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+ quant { min: 0 max: 1 scale: 0.004 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+ quant { min: 0 max: 1 scale: 0.004 zero_point: 0 }
+}
+operation {
+ type: "MaxPool2D"
+ maxpool2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ filter_width: 2
+ filter_height: 2
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Mean_000/test.recipe b/res/TensorFlowLiteRecipes/Mean_000/test.recipe
new file mode 100644
index 000000000..d383997d3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Mean_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+ name: "reduction_indices"
+ type: INT32
+ shape { dim: 1 }
+ filler { tag: "explicit" arg: "-1" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operation {
+ type: "Mean"
+ mean_options {
+ keep_dims: true
+ }
+ input: "ifm"
+ input: "reduction_indices"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Mean_000/test.reverse b/res/TensorFlowLiteRecipes/Mean_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Mean_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Mul_000/test.recipe b/res/TensorFlowLiteRecipes/Mul_000/test.recipe
new file mode 100644
index 000000000..43ca30dec
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Mul_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+ type: "Mul"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+ mul_options {
+ activation: NONE
+ }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Mul_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Mul_U8_000/test.recipe
new file mode 100644
index 000000000..2fbf96d29
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Mul_U8_000/test.recipe
@@ -0,0 +1,30 @@
+operand {
+ name: "ifm1"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ quant { min: 0 max: 255 scale: 0.5 zero_point: 0 }
+}
+operand {
+ name: "ifm2"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ quant { min: 0 max: 255 scale: 0.5 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ quant { min: 0 max: 255 scale: 0.5 zero_point: 0 }
+}
+operation {
+ type: "Mul"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+ mul_options {
+ activation: NONE
+ }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Pack_000/test.recipe b/res/TensorFlowLiteRecipes/Pack_000/test.recipe
new file mode 100644
index 000000000..5d9141d9e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Pack_000/test.recipe
@@ -0,0 +1,28 @@
+operand {
+ name: "input"
+ type: FLOAT32
+ shape { dim: 2 dim: 4 dim: 3 }
+}
+operand {
+ name: "input_1"
+ type: FLOAT32
+ shape { dim: 2 dim: 4 dim: 3 }
+}
+operand {
+ name: "stack_4d"
+ type: FLOAT32
+ shape { dim: 2 dim: 2 dim: 4 dim: 3 }
+}
+operation {
+ type: "Pack"
+ pack_options {
+ values_count: 2,
+ axis: 1
+ }
+ input: "input"
+ input: "input_1"
+ output: "stack_4d"
+}
+input: "input"
+input: "input_1"
+output: "stack_4d"
diff --git a/res/TensorFlowLiteRecipes/Pack_000/test.reverse b/res/TensorFlowLiteRecipes/Pack_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Pack_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Pack_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Pack_U8_000/test.recipe
new file mode 100644
index 000000000..f00199980
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Pack_U8_000/test.recipe
@@ -0,0 +1,31 @@
+operand {
+ name: "input"
+ type: UINT8
+ shape { dim: 2 dim: 4 dim: 3 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "input_1"
+ type: UINT8
+ shape { dim: 2 dim: 4 dim: 3 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "stack_4d"
+ type: UINT8
+ shape { dim: 2 dim: 2 dim: 4 dim: 3 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+ type: "Pack"
+ pack_options {
+ values_count: 2,
+ axis: 1
+ }
+ input: "input"
+ input: "input_1"
+ output: "stack_4d"
+}
+input: "input"
+input: "input_1"
+output: "stack_4d"
diff --git a/res/TensorFlowLiteRecipes/Pack_U8_000/test.reverse b/res/TensorFlowLiteRecipes/Pack_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Pack_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Pad_000/test.recipe b/res/TensorFlowLiteRecipes/Pad_000/test.recipe
new file mode 100644
index 000000000..2cc980b9c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Pad_000/test.recipe
@@ -0,0 +1,30 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "padding"
+ type: INT32
+ shape { dim: 4 dim: 2 }
+ filler {
+ tag: "explicit"
+ arg: "0" arg: "0"
+ arg: "1" arg: "1"
+ arg: "2" arg: "2"
+ arg: "0" arg: "0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim: 7 dim: 2 }
+}
+operation {
+ type: "Pad"
+ input: "ifm"
+ input: "padding"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Pad_000/test.reverse b/res/TensorFlowLiteRecipes/Pad_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Pad_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Pad_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Pad_U8_000/test.recipe
new file mode 100644
index 000000000..7a835bdb8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Pad_U8_000/test.recipe
@@ -0,0 +1,32 @@
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "padding"
+ type: INT32
+ shape { dim: 4 dim: 2 }
+ filler {
+ tag: "explicit"
+ arg: "0" arg: "0"
+ arg: "1" arg: "1"
+ arg: "2" arg: "2"
+ arg: "0" arg: "0"
+ }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 5 dim: 7 dim: 2 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+ type: "Pad"
+ input: "ifm"
+ input: "padding"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Pad_U8_000/test.reverse b/res/TensorFlowLiteRecipes/Pad_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Pad_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quantization_000/test.recipe b/res/TensorFlowLiteRecipes/Quantization_000/test.recipe
new file mode 100644
index 000000000..be5d222a2
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quantization_000/test.recipe
@@ -0,0 +1,46 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ quant { min: 0 max: 128 scale: 2 zero_point: 2 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+ quant { min: 0 max: 80 scale: 1.5 zero_point: 3 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quantization_000/test.reverse b/res/TensorFlowLiteRecipes/Quantization_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quantization_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ReLU6_000/test.recipe b/res/TensorFlowLiteRecipes/ReLU6_000/test.recipe
new file mode 100644
index 000000000..226593593
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReLU6_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "ReLU6"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ReLU6_000/test.reverse b/res/TensorFlowLiteRecipes/ReLU6_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReLU6_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ReLU_000/test.recipe b/res/TensorFlowLiteRecipes/ReLU_000/test.recipe
new file mode 100644
index 000000000..8eaa3602f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReLU_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "ReLU"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ReLU_000/test.reverse b/res/TensorFlowLiteRecipes/ReLU_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReLU_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Reshape_000/test.recipe b/res/TensorFlowLiteRecipes/Reshape_000/test.recipe
new file mode 100644
index 000000000..cdca58980
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Reshape_000/test.recipe
@@ -0,0 +1,20 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 10 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 10 }
+}
+operation {
+ type: "Reshape"
+ reshape_options {
+ new_shape: 10
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Reshape_000/test.reverse b/res/TensorFlowLiteRecipes/Reshape_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Reshape_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Reshape_001/test.recipe b/res/TensorFlowLiteRecipes/Reshape_001/test.recipe
new file mode 100644
index 000000000..bd5213f39
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Reshape_001/test.recipe
@@ -0,0 +1,28 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 10 }
+}
+operand {
+ name: "shape"
+ type: INT32
+ shape { dim: 2 }
+ filler { tag: "explicit" arg: "-1" arg: "10" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 10 }
+}
+operation {
+ type: "Reshape"
+ reshape_options {
+ new_shape: -1
+ new_shape: 10
+ }
+ input: "ifm"
+ input: "shape"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Reshape_001/test.reverse b/res/TensorFlowLiteRecipes/Reshape_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Reshape_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Reshape_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Reshape_U8_000/test.recipe
new file mode 100644
index 000000000..5fe10e599
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Reshape_U8_000/test.recipe
@@ -0,0 +1,22 @@
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 1 dim: 1 dim: 1 dim: 10 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 10 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+ type: "Reshape"
+ reshape_options {
+ new_shape: 10
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Reshape_U8_000/test.reverse b/res/TensorFlowLiteRecipes/Reshape_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Reshape_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Rsqrt_000/test.recipe b/res/TensorFlowLiteRecipes/Rsqrt_000/test.recipe
new file mode 100644
index 000000000..ba16f2b6b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Rsqrt_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Rsqrt"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Rsqrt_000/test.reverse b/res/TensorFlowLiteRecipes/Rsqrt_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Rsqrt_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Softmax_000/test.recipe b/res/TensorFlowLiteRecipes/Softmax_000/test.recipe
new file mode 100644
index 000000000..ce9abf555
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Softmax_000/test.recipe
@@ -0,0 +1,20 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Softmax"
+ softmax_options {
+ beta: 0.0
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Softmax_000/test.reverse b/res/TensorFlowLiteRecipes/Softmax_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Softmax_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Softmax_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Softmax_U8_000/test.recipe
new file mode 100644
index 000000000..a753ca437
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Softmax_U8_000/test.recipe
@@ -0,0 +1,22 @@
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 1 dim: 1001 }
+ quant { min: -6.02353 max: 5.97647 scale: 0.0470588 zero_point: 128 }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 1001 }
+ quant { min: 0 max: 0.996094 scale: 0.00390625 zero_point: 0 }
+}
+operation {
+ type: "Softmax"
+ softmax_options {
+ beta: 1.0
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Softmax_U8_000/test.reverse b/res/TensorFlowLiteRecipes/Softmax_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Softmax_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Sqrt_000/test.recipe b/res/TensorFlowLiteRecipes/Sqrt_000/test.recipe
new file mode 100644
index 000000000..1754f9a58
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Sqrt_000/test.recipe
@@ -0,0 +1,18 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ filler { tag: "constant" arg: "3.5" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Sqrt"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Sqrt_000/test.reverse b/res/TensorFlowLiteRecipes/Sqrt_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Sqrt_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Sub_000/test.recipe b/res/TensorFlowLiteRecipes/Sub_000/test.recipe
new file mode 100644
index 000000000..c934bb8aa
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Sub_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim:2 dim:3 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim:2 dim:3 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim:2 dim:3 }
+}
+operation {
+ type: "Sub"
+ sub_options {
+ activation: 0
+ }
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Sub_000/test.reverse b/res/TensorFlowLiteRecipes/Sub_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Sub_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Sub_001/test.recipe b/res/TensorFlowLiteRecipes/Sub_001/test.recipe
new file mode 100644
index 000000000..09f46e4f6
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Sub_001/test.recipe
@@ -0,0 +1,42 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim:2 dim:3 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim:2 dim:3 }
+}
+operand {
+ name: "ofm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim:2 dim:3 }
+}
+operand {
+ name: "ofm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim:2 dim:3 }
+}
+operation {
+ type: "Sub"
+ sub_options {
+ activation: 0
+ }
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm1"
+}
+operation {
+ type: "Sub"
+ sub_options {
+ activation: 0
+ }
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm2"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm1"
+output: "ofm2"
diff --git a/res/TensorFlowLiteRecipes/Sub_001/test.reverse b/res/TensorFlowLiteRecipes/Sub_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Sub_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Sub_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Sub_U8_000/test.recipe
new file mode 100644
index 000000000..eeeb8aacc
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Sub_U8_000/test.recipe
@@ -0,0 +1,30 @@
+operand {
+ name: "ifm1"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "ifm2"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+ type: "Sub"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+ sub_options {
+ activation: NONE
+ }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Sub_U8_000/test.reverse b/res/TensorFlowLiteRecipes/Sub_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Sub_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Transpose_000/test.recipe b/res/TensorFlowLiteRecipes/Transpose_000/test.recipe
new file mode 100644
index 000000000..82a85c13b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Transpose_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 3 dim: 8 dim: 1 }
+}
+operand {
+ name: "perm"
+ type: INT32
+ shape { dim: 3 }
+ filler { tag: "explicit" arg: "1" arg: "2" arg: "0" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 8 dim: 1 dim: 3 }
+}
+
+operation {
+ type: "Transpose"
+ transpose_options {
+ }
+ input: "ifm"
+ input: "perm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Transpose_000/test.reverse b/res/TensorFlowLiteRecipes/Transpose_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Transpose_000/test.reverse
diff --git a/res/TensorFlowLiteSchema/1.13.1/schema.fbs b/res/TensorFlowLiteSchema/1.13.1/schema.fbs
new file mode 100644
index 000000000..980f13b19
--- /dev/null
+++ b/res/TensorFlowLiteSchema/1.13.1/schema.fbs
@@ -0,0 +1,794 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+
+namespace tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+ FLOAT32 = 0,
+ FLOAT16 = 1,
+ INT32 = 2,
+ UINT8 = 3,
+ INT64 = 4,
+ STRING = 5,
+ BOOL = 6,
+ INT16 = 7,
+ COMPLEX64 = 8,
+ INT8 = 9,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+ custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+ CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+ // These four parameters are the asymmetric linear quantization parameters.
+ // Given a quantized value q, the corresponding float value f should be:
+ // f = scale * (q - zero_point)
+ // For other quantization types, the QuantizationDetails below is used.
+ min:[float]; // For importing back into tensorflow.
+ max:[float]; // For importing back into tensorflow.
+ scale:[float]; // For dequantizing the tensor's values.
+ zero_point:[long];
+
+ // If this is not none, the quantization parameters above are ignored and the
+ // value of the QuantizationDetails union below should be used.
+ details:QuantizationDetails;
+}
+
+table Tensor {
+ // The tensor shape. The meaning of each entry is operator-specific but
+ // builtin ops use: [batch size, height, width, number of channels] (That's
+ // Tensorflow's NHWC).
+ shape:[int];
+ type:TensorType;
+ // An index that refers to the buffers table at the root of the model. Or,
+ // if there is no data buffer associated (i.e. intermediate results), then
+ // this is 0 (which refers to an always existent empty buffer).
+ //
+ // The data_buffer itself is an opaque container, with the assumption that the
+ // target device is little-endian. In addition, all builtin operators assume
+ // the memory is ordered such that if `shape` is [4, 3, 2], then index
+ // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+ buffer:uint;
+ name:string; // For debugging and importing back into tensorflow.
+ quantization:QuantizationParameters; // Optional.
+
+ is_variable:bool = false;
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+enum BuiltinOperator : byte {
+ ADD = 0,
+ AVERAGE_POOL_2D = 1,
+ CONCATENATION = 2,
+ CONV_2D = 3,
+ DEPTHWISE_CONV_2D = 4,
+ // DEPTH_TO_SPACE = 5,
+ DEQUANTIZE = 6,
+ EMBEDDING_LOOKUP = 7,
+ FLOOR = 8,
+ FULLY_CONNECTED = 9,
+ HASHTABLE_LOOKUP = 10,
+ L2_NORMALIZATION = 11,
+ L2_POOL_2D = 12,
+ LOCAL_RESPONSE_NORMALIZATION = 13,
+ LOGISTIC = 14,
+ LSH_PROJECTION = 15,
+ LSTM = 16,
+ MAX_POOL_2D = 17,
+ MUL = 18,
+ RELU = 19,
+ // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+ // since different model developers use RELU1 in different ways. Never
+ // create another op called RELU1.
+ RELU_N1_TO_1 = 20,
+ RELU6 = 21,
+ RESHAPE = 22,
+ RESIZE_BILINEAR = 23,
+ RNN = 24,
+ SOFTMAX = 25,
+ SPACE_TO_DEPTH = 26,
+ SVDF = 27,
+ TANH = 28,
+ // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
+ CONCAT_EMBEDDINGS = 29,
+ SKIP_GRAM = 30,
+ CALL = 31,
+ CUSTOM = 32,
+ EMBEDDING_LOOKUP_SPARSE = 33,
+ PAD = 34,
+ UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+ GATHER = 36,
+ BATCH_TO_SPACE_ND = 37,
+ SPACE_TO_BATCH_ND = 38,
+ TRANSPOSE = 39,
+ MEAN = 40,
+ SUB = 41,
+ DIV = 42,
+ SQUEEZE = 43,
+ UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+ STRIDED_SLICE = 45,
+ BIDIRECTIONAL_SEQUENCE_RNN = 46,
+ EXP = 47,
+ TOPK_V2 = 48,
+ SPLIT = 49,
+ LOG_SOFTMAX = 50,
+ // DELEGATE is a special op type for the operations which are delegated to
+ // other backends.
+ // WARNING: Experimental interface, subject to change
+ DELEGATE = 51,
+ BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+ CAST = 53,
+ PRELU = 54,
+ MAXIMUM = 55,
+ ARG_MAX = 56,
+ MINIMUM = 57,
+ LESS = 58,
+ NEG = 59,
+ PADV2 = 60,
+ GREATER = 61,
+ GREATER_EQUAL = 62,
+ LESS_EQUAL = 63,
+ SELECT = 64,
+ SLICE = 65,
+ SIN = 66,
+ TRANSPOSE_CONV = 67,
+ SPARSE_TO_DENSE = 68,
+ TILE = 69,
+ EXPAND_DIMS = 70,
+ EQUAL = 71,
+ NOT_EQUAL = 72,
+ LOG = 73,
+ SUM = 74,
+ SQRT = 75,
+ RSQRT = 76,
+ SHAPE = 77,
+ POW = 78,
+ ARG_MIN = 79,
+ FAKE_QUANT = 80,
+ REDUCE_PROD = 81,
+ REDUCE_MAX = 82,
+ PACK = 83,
+ LOGICAL_OR = 84,
+ ONE_HOT = 85,
+ LOGICAL_AND = 86,
+ LOGICAL_NOT = 87,
+ UNPACK = 88,
+ REDUCE_MIN = 89,
+ FLOOR_DIV = 90,
+ REDUCE_ANY = 91,
+ SQUARE = 92,
+ ZEROS_LIKE = 93,
+ FILL = 94,
+ FLOOR_MOD = 95,
+ RANGE = 96,
+ RESIZE_NEAREST_NEIGHBOR = 97,
+ LEAKY_RELU = 98,
+ SQUARED_DIFFERENCE = 99,
+ MIRROR_PAD = 100,
+ ABS = 101,
+ SPLIT_V = 102,
+}
+
+// Options for the builtin operators.
+union BuiltinOptions {
+ Conv2DOptions,
+ DepthwiseConv2DOptions,
+ ConcatEmbeddingsOptions,
+ LSHProjectionOptions,
+ Pool2DOptions,
+ SVDFOptions,
+ RNNOptions,
+ FullyConnectedOptions,
+ SoftmaxOptions,
+ ConcatenationOptions,
+ AddOptions,
+ L2NormOptions,
+ LocalResponseNormalizationOptions,
+ LSTMOptions,
+ ResizeBilinearOptions,
+ CallOptions,
+ ReshapeOptions,
+ SkipGramOptions,
+ SpaceToDepthOptions,
+ EmbeddingLookupSparseOptions,
+ MulOptions,
+ PadOptions,
+ GatherOptions,
+ BatchToSpaceNDOptions,
+ SpaceToBatchNDOptions,
+ TransposeOptions,
+ ReducerOptions,
+ SubOptions,
+ DivOptions,
+ SqueezeOptions,
+ SequenceRNNOptions,
+ StridedSliceOptions,
+ ExpOptions,
+ TopKV2Options,
+ SplitOptions,
+ LogSoftmaxOptions,
+ CastOptions,
+ DequantizeOptions,
+ MaximumMinimumOptions,
+ ArgMaxOptions,
+ LessOptions,
+ NegOptions,
+ PadV2Options,
+ GreaterOptions,
+ GreaterEqualOptions,
+ LessEqualOptions,
+ SelectOptions,
+ SliceOptions,
+ TransposeConvOptions,
+ SparseToDenseOptions,
+ TileOptions,
+ ExpandDimsOptions,
+ EqualOptions,
+ NotEqualOptions,
+ ShapeOptions,
+ PowOptions,
+ ArgMinOptions,
+ FakeQuantOptions,
+ PackOptions,
+ LogicalOrOptions,
+ OneHotOptions,
+ LogicalAndOptions,
+ LogicalNotOptions,
+ UnpackOptions,
+ FloorDivOptions,
+ SquareOptions,
+ ZerosLikeOptions,
+ FillOptions,
+ BidirectionalSequenceLSTMOptions,
+ BidirectionalSequenceRNNOptions,
+ UnidirectionalSequenceLSTMOptions,
+ FloorModOptions,
+ RangeOptions,
+ ResizeNearestNeighborOptions,
+ LeakyReluOptions,
+ SquaredDifferenceOptions,
+ MirrorPadOptions,
+ AbsOptions,
+ SplitVOptions,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+ NONE = 0,
+ RELU = 1,
+ RELU_N1_TO_1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ filter_width:int;
+ filter_height:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+ // Parameters for DepthwiseConv version 1 or above.
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ depth_multiplier:int;
+ fused_activation_function:ActivationFunctionType;
+ // Parameters for DepthwiseConv version 2 or above.
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+ num_channels:int;
+ num_columns_per_channel:[int];
+ embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+ UNKNOWN = 0,
+ SPARSE = 1,
+ DENSE = 2,
+}
+
+table LSHProjectionOptions {
+ type: LSHProjectionType;
+}
+
+table SVDFOptions {
+ rank:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ merge_outputs: bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+ DEFAULT = 0,
+ SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+ // Parameters for FullyConnected version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+
+ // Parameters for FullyConnected version 2 or above.
+ weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+}
+
+table SoftmaxOptions {
+ beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+ axis:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table MulOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+ radius:int;
+ bias:float;
+ alpha:float;
+ beta:float;
+}
+
+enum LSTMKernelType : byte {
+ // Full LSTM kernel which supports peephole and projection.
+ FULL = 0,
+ // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+ BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+ // Parameters for LSTM version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // Parameters for LSTM version 2 or above.
+ // Basic kernel is only supported in version 2 or above.
+ kernel_type: LSTMKernelType = FULL;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true then first dimension is sequence, otherwise batch.
+ time_major:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true, store the outputs of both directions into the first output.
+ merge_outputs: bool;
+}
+
+table ResizeBilinearOptions {
+ new_height: int (deprecated);
+ new_width: int (deprecated);
+ align_corners: bool;
+}
+
+table ResizeNearestNeighborOptions {
+ align_corners: bool;
+}
+
+// A call operation options
+table CallOptions {
+ // The subgraph index that needs to be called.
+ subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+ new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+ ngram_size: int;
+ max_skip_size: int;
+ include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+ block_size: int;
+}
+
+table SubOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DivOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+ SUM = 0,
+ MEAN = 1,
+ SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+ combiner:CombinerType;
+}
+
+table GatherOptions {
+ axis: int;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table ReducerOptions {
+ keep_dims: bool;
+}
+
+table SqueezeOptions {
+ squeeze_dims:[int];
+}
+
+table SplitOptions {
+ num_splits: int;
+}
+
+table SplitVOptions {
+ num_splits: int;
+}
+
+table StridedSliceOptions {
+ begin_mask: int;
+ end_mask: int;
+ ellipsis_mask: int;
+ new_axis_mask: int;
+ shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+ in_data_type: TensorType;
+ out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+ output_type : TensorType;
+}
+
+table ArgMinOptions {
+ output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+ validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+ // Optional output type of the operation (int32 or int64). Defaults to int32.
+ out_type : TensorType;
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+ // Parameters supported by version 1:
+ min:float;
+ max:float;
+ num_bits:int;
+
+ // Parameters supported by version 2:
+ narrow_range:bool;
+}
+
+table PackOptions {
+ values_count:int;
+ axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+ axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+ num:int;
+ axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+ alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+ // Doesn't include borders.
+ REFLECT = 0,
+ // Includes borders.
+ SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+ mode:MirrorPadMode;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+ builtin_code:BuiltinOperator;
+ custom_code:string;
+
+ // The version of the operator. The version need to be bumped whenever new
+ // parameters are introduced into an op.
+ version:int = 1;
+}
+
+enum CustomOptionsFormat : byte {
+ FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+ // Index into the operator_codes array. Using an integer here avoids
+ // complicate map lookups.
+ opcode_index:uint;
+
+ // Optional input and output tensors are indicated by -1.
+ inputs:[int];
+ outputs:[int];
+
+ builtin_options:BuiltinOptions;
+ custom_options:[ubyte];
+ custom_options_format:CustomOptionsFormat;
+
+ // A list of booleans indicating the input tensors which are being mutated by
+ // this operator.(e.g. used by RNN and LSTM).
+ // For example, if the "inputs" array refers to 5 tensors and the second and
+ // fifth are mutable variables, then this list will contain
+ // [false, true, false, false, true].
+ //
+ // If the list is empty, no variable is mutated in this operator.
+ // The list either has the same length as `inputs`, or is empty.
+ mutating_variable_inputs:[bool];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+ // A list of all tensors used in this subgraph.
+ tensors:[Tensor];
+
+ // Indices of the tensors that are inputs into this subgraph. Note this is
+ // the list of non-static tensors that feed into the subgraph for inference.
+ inputs:[int];
+
+ // Indices of the tensors that are outputs out of this subgraph. Note this is
+ // the list of output tensors that are considered the product of the
+ // subgraph's inference.
+ outputs:[int];
+
+ // All operators, in execution order.
+ operators:[Operator];
+
+ // Name of this subgraph (used for debugging).
+ name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+ data:[ubyte] (force_align: 16);
+}
+
+table Model {
+ // Version of the schema.
+ version:uint;
+
+ // A list of all operator codes used in this model. This is
+ // kept in order because operators carry an index into this
+ // vector.
+ operator_codes:[OperatorCode];
+
+ // All the subgraphs of the model. The 0th is assumed to be the main
+ // model.
+ subgraphs:[SubGraph];
+
+ // A description of the model.
+ description:string;
+
+ // Buffers of the model.
+ // Note the 0th entry of this array must be an empty buffer (sentinel).
+ // This is a convention so that tensors without a buffer can provide 0 as
+ // their buffer.
+ buffers:[Buffer];
+
+ // Metadata about the model. Indirects into the existings buffers list.
+ metadata_buffer:[int];
+}
+
+root_type Model;
diff --git a/res/TensorFlowLiteSchema/1.14.0/schema.fbs b/res/TensorFlowLiteSchema/1.14.0/schema.fbs
new file mode 100644
index 000000000..b5fc0f31b
--- /dev/null
+++ b/res/TensorFlowLiteSchema/1.14.0/schema.fbs
@@ -0,0 +1,873 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+
+namespace tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+ FLOAT32 = 0,
+ FLOAT16 = 1,
+ INT32 = 2,
+ UINT8 = 3,
+ INT64 = 4,
+ STRING = 5,
+ BOOL = 6,
+ INT16 = 7,
+ COMPLEX64 = 8,
+ INT8 = 9,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+ custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+ CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+ // These four parameters are the asymmetric linear quantization parameters.
+ // Given a quantized value q, the corresponding float value f should be:
+ // f = scale * (q - zero_point)
+ // For other quantization types, the QuantizationDetails below is used.
+ min:[float]; // For importing back into tensorflow.
+ max:[float]; // For importing back into tensorflow.
+ scale:[float]; // For dequantizing the tensor's values.
+ zero_point:[long];
+
+ // If this is not none, the other quantization parameters (i.e. min, max,
+ // scale, zero_point fields above) are ignored and the value of the
+ // QuantizationDetails union should be used.
+ details:QuantizationDetails;
+
+ // Specifies the dimension of the Tensor's shape that the scales and
+ // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+ // with quantization params:
+ // scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+ // will be quantized across the second dimension of t.
+ // t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+ // t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+ // t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+ quantized_dimension:int;
+}
+
+table Tensor {
+ // The tensor shape. The meaning of each entry is operator-specific but
+ // builtin ops use: [batch size, height, width, number of channels] (That's
+ // Tensorflow's NHWC).
+ shape:[int];
+ type:TensorType;
+ // An index that refers to the buffers table at the root of the model. Or,
+ // if there is no data buffer associated (i.e. intermediate results), then
+ // this is 0 (which refers to an always existent empty buffer).
+ //
+ // The data_buffer itself is an opaque container, with the assumption that the
+ // target device is little-endian. In addition, all builtin operators assume
+ // the memory is ordered such that if `shape` is [4, 3, 2], then index
+ // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+ buffer:uint;
+ name:string; // For debugging and importing back into tensorflow.
+ quantization:QuantizationParameters; // Optional.
+
+ is_variable:bool = false;
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+enum BuiltinOperator : byte {
+ ADD = 0,
+ AVERAGE_POOL_2D = 1,
+ CONCATENATION = 2,
+ CONV_2D = 3,
+ DEPTHWISE_CONV_2D = 4,
+ // DEPTH_TO_SPACE = 5,
+ DEQUANTIZE = 6,
+ EMBEDDING_LOOKUP = 7,
+ FLOOR = 8,
+ FULLY_CONNECTED = 9,
+ HASHTABLE_LOOKUP = 10,
+ L2_NORMALIZATION = 11,
+ L2_POOL_2D = 12,
+ LOCAL_RESPONSE_NORMALIZATION = 13,
+ LOGISTIC = 14,
+ LSH_PROJECTION = 15,
+ LSTM = 16,
+ MAX_POOL_2D = 17,
+ MUL = 18,
+ RELU = 19,
+ // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+ // since different model developers use RELU1 in different ways. Never
+ // create another op called RELU1.
+ RELU_N1_TO_1 = 20,
+ RELU6 = 21,
+ RESHAPE = 22,
+ RESIZE_BILINEAR = 23,
+ RNN = 24,
+ SOFTMAX = 25,
+ SPACE_TO_DEPTH = 26,
+ SVDF = 27,
+ TANH = 28,
+ // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
+ CONCAT_EMBEDDINGS = 29,
+ SKIP_GRAM = 30,
+ CALL = 31,
+ CUSTOM = 32,
+ EMBEDDING_LOOKUP_SPARSE = 33,
+ PAD = 34,
+ UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+ GATHER = 36,
+ BATCH_TO_SPACE_ND = 37,
+ SPACE_TO_BATCH_ND = 38,
+ TRANSPOSE = 39,
+ MEAN = 40,
+ SUB = 41,
+ DIV = 42,
+ SQUEEZE = 43,
+ UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+ STRIDED_SLICE = 45,
+ BIDIRECTIONAL_SEQUENCE_RNN = 46,
+ EXP = 47,
+ TOPK_V2 = 48,
+ SPLIT = 49,
+ LOG_SOFTMAX = 50,
+ // DELEGATE is a special op type for the operations which are delegated to
+ // other backends.
+ // WARNING: Experimental interface, subject to change
+ DELEGATE = 51,
+ BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+ CAST = 53,
+ PRELU = 54,
+ MAXIMUM = 55,
+ ARG_MAX = 56,
+ MINIMUM = 57,
+ LESS = 58,
+ NEG = 59,
+ PADV2 = 60,
+ GREATER = 61,
+ GREATER_EQUAL = 62,
+ LESS_EQUAL = 63,
+ SELECT = 64,
+ SLICE = 65,
+ SIN = 66,
+ TRANSPOSE_CONV = 67,
+ SPARSE_TO_DENSE = 68,
+ TILE = 69,
+ EXPAND_DIMS = 70,
+ EQUAL = 71,
+ NOT_EQUAL = 72,
+ LOG = 73,
+ SUM = 74,
+ SQRT = 75,
+ RSQRT = 76,
+ SHAPE = 77,
+ POW = 78,
+ ARG_MIN = 79,
+ FAKE_QUANT = 80,
+ REDUCE_PROD = 81,
+ REDUCE_MAX = 82,
+ PACK = 83,
+ LOGICAL_OR = 84,
+ ONE_HOT = 85,
+ LOGICAL_AND = 86,
+ LOGICAL_NOT = 87,
+ UNPACK = 88,
+ REDUCE_MIN = 89,
+ FLOOR_DIV = 90,
+ REDUCE_ANY = 91,
+ SQUARE = 92,
+ ZEROS_LIKE = 93,
+ FILL = 94,
+ FLOOR_MOD = 95,
+ RANGE = 96,
+ RESIZE_NEAREST_NEIGHBOR = 97,
+ LEAKY_RELU = 98,
+ SQUARED_DIFFERENCE = 99,
+ MIRROR_PAD = 100,
+ ABS = 101,
+ SPLIT_V = 102,
+ UNIQUE = 103,
+ CEIL = 104,
+ REVERSE_V2 = 105,
+ ADD_N = 106,
+ GATHER_ND = 107,
+ COS = 108,
+ WHERE = 109,
+ RANK = 110,
+ ELU = 111,
+ REVERSE_SEQUENCE = 112,
+ MATRIX_DIAG = 113,
+ QUANTIZE = 114,
+ MATRIX_SET_DIAG = 115,
+ ROUND = 116,
+}
+
+// Options for the builtin operators.
+union BuiltinOptions {
+ Conv2DOptions,
+ DepthwiseConv2DOptions,
+ ConcatEmbeddingsOptions,
+ LSHProjectionOptions,
+ Pool2DOptions,
+ SVDFOptions,
+ RNNOptions,
+ FullyConnectedOptions,
+ SoftmaxOptions,
+ ConcatenationOptions,
+ AddOptions,
+ L2NormOptions,
+ LocalResponseNormalizationOptions,
+ LSTMOptions,
+ ResizeBilinearOptions,
+ CallOptions,
+ ReshapeOptions,
+ SkipGramOptions,
+ SpaceToDepthOptions,
+ EmbeddingLookupSparseOptions,
+ MulOptions,
+ PadOptions,
+ GatherOptions,
+ BatchToSpaceNDOptions,
+ SpaceToBatchNDOptions,
+ TransposeOptions,
+ ReducerOptions,
+ SubOptions,
+ DivOptions,
+ SqueezeOptions,
+ SequenceRNNOptions,
+ StridedSliceOptions,
+ ExpOptions,
+ TopKV2Options,
+ SplitOptions,
+ LogSoftmaxOptions,
+ CastOptions,
+ DequantizeOptions,
+ MaximumMinimumOptions,
+ ArgMaxOptions,
+ LessOptions,
+ NegOptions,
+ PadV2Options,
+ GreaterOptions,
+ GreaterEqualOptions,
+ LessEqualOptions,
+ SelectOptions,
+ SliceOptions,
+ TransposeConvOptions,
+ SparseToDenseOptions,
+ TileOptions,
+ ExpandDimsOptions,
+ EqualOptions,
+ NotEqualOptions,
+ ShapeOptions,
+ PowOptions,
+ ArgMinOptions,
+ FakeQuantOptions,
+ PackOptions,
+ LogicalOrOptions,
+ OneHotOptions,
+ LogicalAndOptions,
+ LogicalNotOptions,
+ UnpackOptions,
+ FloorDivOptions,
+ SquareOptions,
+ ZerosLikeOptions,
+ FillOptions,
+ BidirectionalSequenceLSTMOptions,
+ BidirectionalSequenceRNNOptions,
+ UnidirectionalSequenceLSTMOptions,
+ FloorModOptions,
+ RangeOptions,
+ ResizeNearestNeighborOptions,
+ LeakyReluOptions,
+ SquaredDifferenceOptions,
+ MirrorPadOptions,
+ AbsOptions,
+ SplitVOptions,
+ UniqueOptions,
+ ReverseV2Options,
+ AddNOptions,
+ GatherNdOptions,
+ CosOptions,
+ WhereOptions,
+ RankOptions,
+ ReverseSequenceOptions,
+ MatrixDiagOptions,
+ QuantizeOptions,
+ MatrixSetDiagOptions
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+ NONE = 0,
+ RELU = 1,
+ RELU_N1_TO_1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ filter_width:int;
+ filter_height:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+ // Parameters for DepthwiseConv version 1 or above.
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ depth_multiplier:int;
+ fused_activation_function:ActivationFunctionType;
+ // Parameters for DepthwiseConv version 2 or above.
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+ num_channels:int;
+ num_columns_per_channel:[int];
+ embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+ UNKNOWN = 0,
+ SPARSE = 1,
+ DENSE = 2,
+}
+
+table LSHProjectionOptions {
+ type: LSHProjectionType;
+}
+
+table SVDFOptions {
+ rank:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ merge_outputs: bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+ DEFAULT = 0,
+ SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+ // Parameters for FullyConnected version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+
+ // Parameters for FullyConnected version 2 or above.
+ weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+}
+
+table SoftmaxOptions {
+ beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+ axis:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table MulOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+ radius:int;
+ bias:float;
+ alpha:float;
+ beta:float;
+}
+
+enum LSTMKernelType : byte {
+ // Full LSTM kernel which supports peephole and projection.
+ FULL = 0,
+ // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+ BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+ // Parameters for LSTM version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // Parameters for LSTM version 2 or above.
+ // Basic kernel is only supported in version 2 or above.
+ kernel_type: LSTMKernelType = FULL;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true then first dimension is sequence, otherwise batch.
+ time_major:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+ // Parameters supported by version 1:
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true, store the outputs of both directions into the first output.
+ merge_outputs: bool;
+
+ // Parameters supported by version 2:
+ // If true then first dimension is sequence, otherwise batch.
+ // Version 1 implementations assumed time_major to be true, so this default
+ // value should never change.
+ time_major: bool = true;
+}
+
+table ResizeBilinearOptions {
+ new_height: int (deprecated);
+ new_width: int (deprecated);
+ align_corners: bool;
+}
+
+table ResizeNearestNeighborOptions {
+ align_corners: bool;
+}
+
+// A call operation options
+table CallOptions {
+ // The subgraph index that needs to be called.
+ subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+ new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+ ngram_size: int;
+ max_skip_size: int;
+ include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+ block_size: int;
+}
+
+table SubOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DivOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+ SUM = 0,
+ MEAN = 1,
+ SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+ combiner:CombinerType;
+}
+
+table GatherOptions {
+ axis: int;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+ keep_dims: bool;
+}
+
+table SqueezeOptions {
+ squeeze_dims:[int];
+}
+
+table SplitOptions {
+ num_splits: int;
+}
+
+table SplitVOptions {
+ num_splits: int;
+}
+
+table StridedSliceOptions {
+ begin_mask: int;
+ end_mask: int;
+ ellipsis_mask: int;
+ new_axis_mask: int;
+ shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+ in_data_type: TensorType;
+ out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+ output_type : TensorType;
+}
+
+table ArgMinOptions {
+ output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+ validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+ // Optional output type of the operation (int32 or int64). Defaults to int32.
+ out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+ // Parameters supported by version 1:
+ min:float;
+ max:float;
+ num_bits:int;
+
+ // Parameters supported by version 2:
+ narrow_range:bool;
+}
+
+table PackOptions {
+ values_count:int;
+ axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+ axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+ num:int;
+ axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+ alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+ // Doesn't include borders.
+ REFLECT = 0,
+ // Includes borders.
+ SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+ mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+ idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+ seq_dim:int;
+ batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+ builtin_code:BuiltinOperator;
+ custom_code:string;
+
+ // The version of the operator. The version need to be bumped whenever new
+ // parameters are introduced into an op.
+ version:int = 1;
+}
+
+enum CustomOptionsFormat : byte {
+ FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+ // Index into the operator_codes array. Using an integer here avoids
+ // complicate map lookups.
+ opcode_index:uint;
+
+ // Optional input and output tensors are indicated by -1.
+ inputs:[int];
+ outputs:[int];
+
+ builtin_options:BuiltinOptions;
+ custom_options:[ubyte];
+ custom_options_format:CustomOptionsFormat;
+
+ // A list of booleans indicating the input tensors which are being mutated by
+ // this operator.(e.g. used by RNN and LSTM).
+ // For example, if the "inputs" array refers to 5 tensors and the second and
+ // fifth are mutable variables, then this list will contain
+ // [false, true, false, false, true].
+ //
+ // If the list is empty, no variable is mutated in this operator.
+ // The list either has the same length as `inputs`, or is empty.
+ mutating_variable_inputs:[bool];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+ // A list of all tensors used in this subgraph.
+ tensors:[Tensor];
+
+ // Indices of the tensors that are inputs into this subgraph. Note this is
+ // the list of non-static tensors that feed into the subgraph for inference.
+ inputs:[int];
+
+ // Indices of the tensors that are outputs out of this subgraph. Note this is
+ // the list of output tensors that are considered the product of the
+ // subgraph's inference.
+ outputs:[int];
+
+ // All operators, in execution order.
+ operators:[Operator];
+
+ // Name of this subgraph (used for debugging).
+ name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+ data:[ubyte] (force_align: 16);
+}
+
+table Model {
+ // Version of the schema.
+ version:uint;
+
+ // A list of all operator codes used in this model. This is
+ // kept in order because operators carry an index into this
+ // vector.
+ operator_codes:[OperatorCode];
+
+ // All the subgraphs of the model. The 0th is assumed to be the main
+ // model.
+ subgraphs:[SubGraph];
+
+ // A description of the model.
+ description:string;
+
+ // Buffers of the model.
+ // Note the 0th entry of this array must be an empty buffer (sentinel).
+ // This is a convention so that tensors without a buffer can provide 0 as
+ // their buffer.
+ buffers:[Buffer];
+
+ // Metadata about the model. Indirects into the existings buffers list.
+ metadata_buffer:[int];
+}
+
+root_type Model;
diff --git a/res/TensorFlowLiteSchema/1.15.2/schema.fbs b/res/TensorFlowLiteSchema/1.15.2/schema.fbs
new file mode 100644
index 000000000..d63386035
--- /dev/null
+++ b/res/TensorFlowLiteSchema/1.15.2/schema.fbs
@@ -0,0 +1,922 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+
+namespace tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+ FLOAT32 = 0,
+ FLOAT16 = 1,
+ INT32 = 2,
+ UINT8 = 3,
+ INT64 = 4,
+ STRING = 5,
+ BOOL = 6,
+ INT16 = 7,
+ COMPLEX64 = 8,
+ INT8 = 9,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+ custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+ CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+ // These four parameters are the asymmetric linear quantization parameters.
+ // Given a quantized value q, the corresponding float value f should be:
+ // f = scale * (q - zero_point)
+ // For other quantization types, the QuantizationDetails below is used.
+ min:[float]; // For importing back into tensorflow.
+ max:[float]; // For importing back into tensorflow.
+ scale:[float]; // For dequantizing the tensor's values.
+ zero_point:[long];
+
+ // If this is not none, the other quantization parameters (i.e. min, max,
+ // scale, zero_point fields above) are ignored and the value of the
+ // QuantizationDetails union should be used.
+ details:QuantizationDetails;
+
+ // Specifies the dimension of the Tensor's shape that the scales and
+ // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+ // with quantization params:
+ // scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+ // will be quantized across the second dimension of t.
+ // t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+ // t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+ // t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+ quantized_dimension:int;
+}
+
+table Tensor {
+ // The tensor shape. The meaning of each entry is operator-specific but
+ // builtin ops use: [batch size, height, width, number of channels] (That's
+ // Tensorflow's NHWC).
+ shape:[int];
+ type:TensorType;
+ // An index that refers to the buffers table at the root of the model. Or,
+ // if there is no data buffer associated (i.e. intermediate results), then
+ // this is 0 (which refers to an always existent empty buffer).
+ //
+ // The data_buffer itself is an opaque container, with the assumption that the
+ // target device is little-endian. In addition, all builtin operators assume
+ // the memory is ordered such that if `shape` is [4, 3, 2], then index
+ // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+ buffer:uint;
+ name:string; // For debugging and importing back into tensorflow.
+ quantization:QuantizationParameters; // Optional.
+
+ is_variable:bool = false;
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+enum BuiltinOperator : byte {
+ ADD = 0,
+ AVERAGE_POOL_2D = 1,
+ CONCATENATION = 2,
+ CONV_2D = 3,
+ DEPTHWISE_CONV_2D = 4,
+ DEPTH_TO_SPACE = 5,
+ DEQUANTIZE = 6,
+ EMBEDDING_LOOKUP = 7,
+ FLOOR = 8,
+ FULLY_CONNECTED = 9,
+ HASHTABLE_LOOKUP = 10,
+ L2_NORMALIZATION = 11,
+ L2_POOL_2D = 12,
+ LOCAL_RESPONSE_NORMALIZATION = 13,
+ LOGISTIC = 14,
+ LSH_PROJECTION = 15,
+ LSTM = 16,
+ MAX_POOL_2D = 17,
+ MUL = 18,
+ RELU = 19,
+ // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+ // since different model developers use RELU1 in different ways. Never
+ // create another op called RELU1.
+ RELU_N1_TO_1 = 20,
+ RELU6 = 21,
+ RESHAPE = 22,
+ RESIZE_BILINEAR = 23,
+ RNN = 24,
+ SOFTMAX = 25,
+ SPACE_TO_DEPTH = 26,
+ SVDF = 27,
+ TANH = 28,
+ // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
+ CONCAT_EMBEDDINGS = 29,
+ SKIP_GRAM = 30,
+ CALL = 31,
+ CUSTOM = 32,
+ EMBEDDING_LOOKUP_SPARSE = 33,
+ PAD = 34,
+ UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+ GATHER = 36,
+ BATCH_TO_SPACE_ND = 37,
+ SPACE_TO_BATCH_ND = 38,
+ TRANSPOSE = 39,
+ MEAN = 40,
+ SUB = 41,
+ DIV = 42,
+ SQUEEZE = 43,
+ UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+ STRIDED_SLICE = 45,
+ BIDIRECTIONAL_SEQUENCE_RNN = 46,
+ EXP = 47,
+ TOPK_V2 = 48,
+ SPLIT = 49,
+ LOG_SOFTMAX = 50,
+ // DELEGATE is a special op type for the operations which are delegated to
+ // other backends.
+ // WARNING: Experimental interface, subject to change
+ DELEGATE = 51,
+ BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+ CAST = 53,
+ PRELU = 54,
+ MAXIMUM = 55,
+ ARG_MAX = 56,
+ MINIMUM = 57,
+ LESS = 58,
+ NEG = 59,
+ PADV2 = 60,
+ GREATER = 61,
+ GREATER_EQUAL = 62,
+ LESS_EQUAL = 63,
+ SELECT = 64,
+ SLICE = 65,
+ SIN = 66,
+ TRANSPOSE_CONV = 67,
+ SPARSE_TO_DENSE = 68,
+ TILE = 69,
+ EXPAND_DIMS = 70,
+ EQUAL = 71,
+ NOT_EQUAL = 72,
+ LOG = 73,
+ SUM = 74,
+ SQRT = 75,
+ RSQRT = 76,
+ SHAPE = 77,
+ POW = 78,
+ ARG_MIN = 79,
+ FAKE_QUANT = 80,
+ REDUCE_PROD = 81,
+ REDUCE_MAX = 82,
+ PACK = 83,
+ LOGICAL_OR = 84,
+ ONE_HOT = 85,
+ LOGICAL_AND = 86,
+ LOGICAL_NOT = 87,
+ UNPACK = 88,
+ REDUCE_MIN = 89,
+ FLOOR_DIV = 90,
+ REDUCE_ANY = 91,
+ SQUARE = 92,
+ ZEROS_LIKE = 93,
+ FILL = 94,
+ FLOOR_MOD = 95,
+ RANGE = 96,
+ RESIZE_NEAREST_NEIGHBOR = 97,
+ LEAKY_RELU = 98,
+ SQUARED_DIFFERENCE = 99,
+ MIRROR_PAD = 100,
+ ABS = 101,
+ SPLIT_V = 102,
+ UNIQUE = 103,
+ CEIL = 104,
+ REVERSE_V2 = 105,
+ ADD_N = 106,
+ GATHER_ND = 107,
+ COS = 108,
+ WHERE = 109,
+ RANK = 110,
+ ELU = 111,
+ REVERSE_SEQUENCE = 112,
+ MATRIX_DIAG = 113,
+ QUANTIZE = 114,
+ MATRIX_SET_DIAG = 115,
+ ROUND = 116,
+ HARD_SWISH = 117,
+ IF = 118,
+ WHILE = 119,
+}
+
+// Options for the builtin operators.
+union BuiltinOptions {
+ Conv2DOptions,
+ DepthwiseConv2DOptions,
+ ConcatEmbeddingsOptions,
+ LSHProjectionOptions,
+ Pool2DOptions,
+ SVDFOptions,
+ RNNOptions,
+ FullyConnectedOptions,
+ SoftmaxOptions,
+ ConcatenationOptions,
+ AddOptions,
+ L2NormOptions,
+ LocalResponseNormalizationOptions,
+ LSTMOptions,
+ ResizeBilinearOptions,
+ CallOptions,
+ ReshapeOptions,
+ SkipGramOptions,
+ SpaceToDepthOptions,
+ EmbeddingLookupSparseOptions,
+ MulOptions,
+ PadOptions,
+ GatherOptions,
+ BatchToSpaceNDOptions,
+ SpaceToBatchNDOptions,
+ TransposeOptions,
+ ReducerOptions,
+ SubOptions,
+ DivOptions,
+ SqueezeOptions,
+ SequenceRNNOptions,
+ StridedSliceOptions,
+ ExpOptions,
+ TopKV2Options,
+ SplitOptions,
+ LogSoftmaxOptions,
+ CastOptions,
+ DequantizeOptions,
+ MaximumMinimumOptions,
+ ArgMaxOptions,
+ LessOptions,
+ NegOptions,
+ PadV2Options,
+ GreaterOptions,
+ GreaterEqualOptions,
+ LessEqualOptions,
+ SelectOptions,
+ SliceOptions,
+ TransposeConvOptions,
+ SparseToDenseOptions,
+ TileOptions,
+ ExpandDimsOptions,
+ EqualOptions,
+ NotEqualOptions,
+ ShapeOptions,
+ PowOptions,
+ ArgMinOptions,
+ FakeQuantOptions,
+ PackOptions,
+ LogicalOrOptions,
+ OneHotOptions,
+ LogicalAndOptions,
+ LogicalNotOptions,
+ UnpackOptions,
+ FloorDivOptions,
+ SquareOptions,
+ ZerosLikeOptions,
+ FillOptions,
+ BidirectionalSequenceLSTMOptions,
+ BidirectionalSequenceRNNOptions,
+ UnidirectionalSequenceLSTMOptions,
+ FloorModOptions,
+ RangeOptions,
+ ResizeNearestNeighborOptions,
+ LeakyReluOptions,
+ SquaredDifferenceOptions,
+ MirrorPadOptions,
+ AbsOptions,
+ SplitVOptions,
+ UniqueOptions,
+ ReverseV2Options,
+ AddNOptions,
+ GatherNdOptions,
+ CosOptions,
+ WhereOptions,
+ RankOptions,
+ ReverseSequenceOptions,
+ MatrixDiagOptions,
+ QuantizeOptions,
+ MatrixSetDiagOptions,
+ HardSwishOptions,
+ IfOptions,
+ WhileOptions,
+ DepthToSpaceOptions
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+ NONE = 0,
+ RELU = 1,
+ RELU_N1_TO_1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ filter_width:int;
+ filter_height:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+ // Parameters for DepthwiseConv version 1 or above.
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ depth_multiplier:int;
+ fused_activation_function:ActivationFunctionType;
+ // Parameters for DepthwiseConv version 2 or above.
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+ num_channels:int;
+ num_columns_per_channel:[int];
+ embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+ UNKNOWN = 0,
+ SPARSE = 1,
+ DENSE = 2,
+}
+
+table LSHProjectionOptions {
+ type: LSHProjectionType;
+}
+
+table SVDFOptions {
+ rank:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ merge_outputs: bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+ DEFAULT = 0,
+ SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+ // Parameters for FullyConnected version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+
+ // Parameters for FullyConnected version 2 or above.
+ weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+ // Parameters for FullyConnected version 5 or above.
+ // If set to true, then the number of dimension is preserved. Furthermore,
+ // all but the last dimension of the input and output shapes will be equal.
+ keep_num_dims: bool;
+}
+
+table SoftmaxOptions {
+ beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+ axis:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table MulOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+ radius:int;
+ bias:float;
+ alpha:float;
+ beta:float;
+}
+
+enum LSTMKernelType : byte {
+ // Full LSTM kernel which supports peephole and projection.
+ FULL = 0,
+ // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+ BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+ // Parameters for LSTM version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // Parameters for LSTM version 2 or above.
+ // Basic kernel is only supported in version 2 or above.
+ kernel_type: LSTMKernelType = FULL;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true then first dimension is sequence, otherwise batch.
+ time_major:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+ // Parameters supported by version 1:
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true, store the outputs of both directions into the first output.
+ merge_outputs: bool;
+
+ // Parameters supported by version 2:
+ // If true then first dimension is sequence, otherwise batch.
+ // Version 1 implementations assumed time_major to be true, so this default
+ // value should never change.
+ time_major: bool = true;
+}
+
+table ResizeBilinearOptions {
+ new_height: int (deprecated);
+ new_width: int (deprecated);
+ align_corners: bool;
+}
+
+table ResizeNearestNeighborOptions {
+ align_corners: bool;
+}
+
+// A call operation options
+table CallOptions {
+ // The subgraph index that needs to be called.
+ subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+ new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+ ngram_size: int;
+ max_skip_size: int;
+ include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+ block_size: int;
+}
+
+table DepthToSpaceOptions {
+ block_size: int;
+}
+
+table SubOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DivOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+ SUM = 0,
+ MEAN = 1,
+ SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+ combiner:CombinerType;
+}
+
+table GatherOptions {
+ axis: int;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+ keep_dims: bool;
+}
+
+table SqueezeOptions {
+ squeeze_dims:[int];
+}
+
+table SplitOptions {
+ num_splits: int;
+}
+
+table SplitVOptions {
+ num_splits: int;
+}
+
+table StridedSliceOptions {
+ begin_mask: int;
+ end_mask: int;
+ ellipsis_mask: int;
+ new_axis_mask: int;
+ shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+ in_data_type: TensorType;
+ out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+ output_type : TensorType;
+}
+
+table ArgMinOptions {
+ output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+ validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+ // Optional output type of the operation (int32 or int64). Defaults to int32.
+ out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+ // Parameters supported by version 1:
+ min:float;
+ max:float;
+ num_bits:int;
+
+ // Parameters supported by version 2:
+ narrow_range:bool;
+}
+
+table PackOptions {
+ values_count:int;
+ axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+ axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+ num:int;
+ axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+ alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+ // Doesn't include borders.
+ REFLECT = 0,
+ // Includes borders.
+ SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+ mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+ idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+ seq_dim:int;
+ batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+ then_subgraph_index:int;
+ else_subgraph_index:int;
+}
+
+table WhileOptions {
+ cond_subgraph_index:int;
+ body_subgraph_index:int;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+ builtin_code:BuiltinOperator;
+ custom_code:string;
+
+ // The version of the operator. The version need to be bumped whenever new
+ // parameters are introduced into an op.
+ version:int = 1;
+}
+
+enum CustomOptionsFormat : byte {
+ FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+ // Index into the operator_codes array. Using an integer here avoids
+ // complicate map lookups.
+ opcode_index:uint;
+
+ // Optional input and output tensors are indicated by -1.
+ inputs:[int];
+ outputs:[int];
+
+ builtin_options:BuiltinOptions;
+ custom_options:[ubyte];
+ custom_options_format:CustomOptionsFormat;
+
+ // A list of booleans indicating the input tensors which are being mutated by
+ // this operator.(e.g. used by RNN and LSTM).
+ // For example, if the "inputs" array refers to 5 tensors and the second and
+ // fifth are mutable variables, then this list will contain
+ // [false, true, false, false, true].
+ //
+ // If the list is empty, no variable is mutated in this operator.
+ // The list either has the same length as `inputs`, or is empty.
+ mutating_variable_inputs:[bool];
+
+ // A list of indices to the subgraph's "tensors" that are internal to an Op.
+ // Internal tensors are those that do not flow in or out of the operation,
+ // but instead are part of internal computation. As such, the operation's
+ // implementation may manage its memory more efficiently. They are needed
+ // however (i.e. not just an implementation detail) since they are part of the
+ // computation, which may require relevant metadata such as quantization
+ // parameters.
+ intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+ // A list of all tensors used in this subgraph.
+ tensors:[Tensor];
+
+ // Indices of the tensors that are inputs into this subgraph. Note this is
+ // the list of non-static tensors that feed into the subgraph for inference.
+ inputs:[int];
+
+ // Indices of the tensors that are outputs out of this subgraph. Note this is
+ // the list of output tensors that are considered the product of the
+ // subgraph's inference.
+ outputs:[int];
+
+ // All operators, in execution order.
+ operators:[Operator];
+
+ // Name of this subgraph (used for debugging).
+ name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+ data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+ // A human readable string to uniquely identify a Metadata.
+ name:string;
+ // An index to the buffers table.
+ buffer:uint;
+}
+
+table Model {
+ // Version of the schema.
+ version:uint;
+
+ // A list of all operator codes used in this model. This is
+ // kept in order because operators carry an index into this
+ // vector.
+ operator_codes:[OperatorCode];
+
+ // All the subgraphs of the model. The 0th is assumed to be the main
+ // model.
+ subgraphs:[SubGraph];
+
+ // A description of the model.
+ description:string;
+
+ // Buffers of the model.
+ // Note the 0th entry of this array must be an empty buffer (sentinel).
+ // This is a convention so that tensors without a buffer can provide 0 as
+ // their buffer.
+ buffers:[Buffer];
+
+ // Metadata about the model. Indirects into the existings buffers list.
+ // Deprecated, prefer to use metadata field.
+ metadata_buffer:[int];
+
+ // Metadata about the model.
+ metadata:[Metadata];
+}
+
+root_type Model;
diff --git a/res/TensorFlowLiteSchema/2.1.0/schema.fbs b/res/TensorFlowLiteSchema/2.1.0/schema.fbs
new file mode 100644
index 000000000..f1fbfc655
--- /dev/null
+++ b/res/TensorFlowLiteSchema/2.1.0/schema.fbs
@@ -0,0 +1,940 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+
+namespace tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+ FLOAT32 = 0,
+ FLOAT16 = 1,
+ INT32 = 2,
+ UINT8 = 3,
+ INT64 = 4,
+ STRING = 5,
+ BOOL = 6,
+ INT16 = 7,
+ COMPLEX64 = 8,
+ INT8 = 9,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+ custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+ CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+ // These four parameters are the asymmetric linear quantization parameters.
+ // Given a quantized value q, the corresponding float value f should be:
+ // f = scale * (q - zero_point)
+ // For other quantization types, the QuantizationDetails below is used.
+ min:[float]; // For importing back into tensorflow.
+ max:[float]; // For importing back into tensorflow.
+ scale:[float]; // For dequantizing the tensor's values.
+ zero_point:[long];
+
+ // If this is not none, the other quantization parameters (i.e. min, max,
+ // scale, zero_point fields above) are ignored and the value of the
+ // QuantizationDetails union should be used.
+ details:QuantizationDetails;
+
+ // Specifies the dimension of the Tensor's shape that the scales and
+ // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+ // with quantization params:
+ // scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+ // will be quantized across the second dimension of t.
+ // t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+ // t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+ // t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+ quantized_dimension:int;
+}
+
+table Tensor {
+ // The tensor shape. The meaning of each entry is operator-specific but
+ // builtin ops use: [batch size, height, width, number of channels] (That's
+ // Tensorflow's NHWC).
+ shape:[int];
+ type:TensorType;
+ // An index that refers to the buffers table at the root of the model. Or,
+ // if there is no data buffer associated (i.e. intermediate results), then
+ // this is 0 (which refers to an always existent empty buffer).
+ //
+ // The data_buffer itself is an opaque container, with the assumption that the
+ // target device is little-endian. In addition, all builtin operators assume
+ // the memory is ordered such that if `shape` is [4, 3, 2], then index
+ // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+ buffer:uint;
+ name:string; // For debugging and importing back into tensorflow.
+ quantization:QuantizationParameters; // Optional.
+
+ is_variable:bool = false;
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+enum BuiltinOperator : byte {
+ ADD = 0,
+ AVERAGE_POOL_2D = 1,
+ CONCATENATION = 2,
+ CONV_2D = 3,
+ DEPTHWISE_CONV_2D = 4,
+ DEPTH_TO_SPACE = 5,
+ DEQUANTIZE = 6,
+ EMBEDDING_LOOKUP = 7,
+ FLOOR = 8,
+ FULLY_CONNECTED = 9,
+ HASHTABLE_LOOKUP = 10,
+ L2_NORMALIZATION = 11,
+ L2_POOL_2D = 12,
+ LOCAL_RESPONSE_NORMALIZATION = 13,
+ LOGISTIC = 14,
+ LSH_PROJECTION = 15,
+ LSTM = 16,
+ MAX_POOL_2D = 17,
+ MUL = 18,
+ RELU = 19,
+ // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+ // since different model developers use RELU1 in different ways. Never
+ // create another op called RELU1.
+ RELU_N1_TO_1 = 20,
+ RELU6 = 21,
+ RESHAPE = 22,
+ RESIZE_BILINEAR = 23,
+ RNN = 24,
+ SOFTMAX = 25,
+ SPACE_TO_DEPTH = 26,
+ SVDF = 27,
+ TANH = 28,
+ // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
+ CONCAT_EMBEDDINGS = 29,
+ SKIP_GRAM = 30,
+ CALL = 31,
+ CUSTOM = 32,
+ EMBEDDING_LOOKUP_SPARSE = 33,
+ PAD = 34,
+ UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+ GATHER = 36,
+ BATCH_TO_SPACE_ND = 37,
+ SPACE_TO_BATCH_ND = 38,
+ TRANSPOSE = 39,
+ MEAN = 40,
+ SUB = 41,
+ DIV = 42,
+ SQUEEZE = 43,
+ UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+ STRIDED_SLICE = 45,
+ BIDIRECTIONAL_SEQUENCE_RNN = 46,
+ EXP = 47,
+ TOPK_V2 = 48,
+ SPLIT = 49,
+ LOG_SOFTMAX = 50,
+ // DELEGATE is a special op type for the operations which are delegated to
+ // other backends.
+ // WARNING: Experimental interface, subject to change
+ DELEGATE = 51,
+ BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+ CAST = 53,
+ PRELU = 54,
+ MAXIMUM = 55,
+ ARG_MAX = 56,
+ MINIMUM = 57,
+ LESS = 58,
+ NEG = 59,
+ PADV2 = 60,
+ GREATER = 61,
+ GREATER_EQUAL = 62,
+ LESS_EQUAL = 63,
+ SELECT = 64,
+ SLICE = 65,
+ SIN = 66,
+ TRANSPOSE_CONV = 67,
+ SPARSE_TO_DENSE = 68,
+ TILE = 69,
+ EXPAND_DIMS = 70,
+ EQUAL = 71,
+ NOT_EQUAL = 72,
+ LOG = 73,
+ SUM = 74,
+ SQRT = 75,
+ RSQRT = 76,
+ SHAPE = 77,
+ POW = 78,
+ ARG_MIN = 79,
+ FAKE_QUANT = 80,
+ REDUCE_PROD = 81,
+ REDUCE_MAX = 82,
+ PACK = 83,
+ LOGICAL_OR = 84,
+ ONE_HOT = 85,
+ LOGICAL_AND = 86,
+ LOGICAL_NOT = 87,
+ UNPACK = 88,
+ REDUCE_MIN = 89,
+ FLOOR_DIV = 90,
+ REDUCE_ANY = 91,
+ SQUARE = 92,
+ ZEROS_LIKE = 93,
+ FILL = 94,
+ FLOOR_MOD = 95,
+ RANGE = 96,
+ RESIZE_NEAREST_NEIGHBOR = 97,
+ LEAKY_RELU = 98,
+ SQUARED_DIFFERENCE = 99,
+ MIRROR_PAD = 100,
+ ABS = 101,
+ SPLIT_V = 102,
+ UNIQUE = 103,
+ CEIL = 104,
+ REVERSE_V2 = 105,
+ ADD_N = 106,
+ GATHER_ND = 107,
+ COS = 108,
+ WHERE = 109,
+ RANK = 110,
+ ELU = 111,
+ REVERSE_SEQUENCE = 112,
+ MATRIX_DIAG = 113,
+ QUANTIZE = 114,
+ MATRIX_SET_DIAG = 115,
+ ROUND = 116,
+ HARD_SWISH = 117,
+ IF = 118,
+ WHILE = 119,
+ NON_MAX_SUPPRESSION_V4 = 120,
+ NON_MAX_SUPPRESSION_V5 = 121,
+ SCATTER_ND = 122
+}
+
+// Options for the builtin operators.
+union BuiltinOptions {
+ Conv2DOptions,
+ DepthwiseConv2DOptions,
+ ConcatEmbeddingsOptions,
+ LSHProjectionOptions,
+ Pool2DOptions,
+ SVDFOptions,
+ RNNOptions,
+ FullyConnectedOptions,
+ SoftmaxOptions,
+ ConcatenationOptions,
+ AddOptions,
+ L2NormOptions,
+ LocalResponseNormalizationOptions,
+ LSTMOptions,
+ ResizeBilinearOptions,
+ CallOptions,
+ ReshapeOptions,
+ SkipGramOptions,
+ SpaceToDepthOptions,
+ EmbeddingLookupSparseOptions,
+ MulOptions,
+ PadOptions,
+ GatherOptions,
+ BatchToSpaceNDOptions,
+ SpaceToBatchNDOptions,
+ TransposeOptions,
+ ReducerOptions,
+ SubOptions,
+ DivOptions,
+ SqueezeOptions,
+ SequenceRNNOptions,
+ StridedSliceOptions,
+ ExpOptions,
+ TopKV2Options,
+ SplitOptions,
+ LogSoftmaxOptions,
+ CastOptions,
+ DequantizeOptions,
+ MaximumMinimumOptions,
+ ArgMaxOptions,
+ LessOptions,
+ NegOptions,
+ PadV2Options,
+ GreaterOptions,
+ GreaterEqualOptions,
+ LessEqualOptions,
+ SelectOptions,
+ SliceOptions,
+ TransposeConvOptions,
+ SparseToDenseOptions,
+ TileOptions,
+ ExpandDimsOptions,
+ EqualOptions,
+ NotEqualOptions,
+ ShapeOptions,
+ PowOptions,
+ ArgMinOptions,
+ FakeQuantOptions,
+ PackOptions,
+ LogicalOrOptions,
+ OneHotOptions,
+ LogicalAndOptions,
+ LogicalNotOptions,
+ UnpackOptions,
+ FloorDivOptions,
+ SquareOptions,
+ ZerosLikeOptions,
+ FillOptions,
+ BidirectionalSequenceLSTMOptions,
+ BidirectionalSequenceRNNOptions,
+ UnidirectionalSequenceLSTMOptions,
+ FloorModOptions,
+ RangeOptions,
+ ResizeNearestNeighborOptions,
+ LeakyReluOptions,
+ SquaredDifferenceOptions,
+ MirrorPadOptions,
+ AbsOptions,
+ SplitVOptions,
+ UniqueOptions,
+ ReverseV2Options,
+ AddNOptions,
+ GatherNdOptions,
+ CosOptions,
+ WhereOptions,
+ RankOptions,
+ ReverseSequenceOptions,
+ MatrixDiagOptions,
+ QuantizeOptions,
+ MatrixSetDiagOptions,
+ HardSwishOptions,
+ IfOptions,
+ WhileOptions,
+ DepthToSpaceOptions,
+ NonMaxSuppressionV4Options,
+ NonMaxSuppressionV5Options,
+ ScatterNdOptions
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+ NONE = 0,
+ RELU = 1,
+ RELU_N1_TO_1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ filter_width:int;
+ filter_height:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+ // Parameters for DepthwiseConv version 1 or above.
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ // `depth_multiplier` is redundant. It's used by CPU kernels in
+ // TensorFlow 2.0 or below, but ignored in versions above.
+ // See comments in lite/c/builtin_op_data.h for more details.
+ depth_multiplier:int;
+ fused_activation_function:ActivationFunctionType;
+ // Parameters for DepthwiseConv version 2 or above.
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+ num_channels:int;
+ num_columns_per_channel:[int];
+ embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+ UNKNOWN = 0,
+ SPARSE = 1,
+ DENSE = 2,
+}
+
+table LSHProjectionOptions {
+ type: LSHProjectionType;
+}
+
+table SVDFOptions {
+ rank:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ merge_outputs: bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+ DEFAULT = 0,
+ SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+ // Parameters for FullyConnected version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+
+ // Parameters for FullyConnected version 2 or above.
+ weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+ // Parameters for FullyConnected version 5 or above.
+ // If set to true, then the number of dimension is preserved. Furthermore,
+ // all but the last dimension of the input and output shapes will be equal.
+ keep_num_dims: bool;
+}
+
+table SoftmaxOptions {
+ beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+ axis:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table MulOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+ radius:int;
+ bias:float;
+ alpha:float;
+ beta:float;
+}
+
+enum LSTMKernelType : byte {
+ // Full LSTM kernel which supports peephole and projection.
+ FULL = 0,
+ // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+ BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+ // Parameters for LSTM version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // Parameters for LSTM version 2 or above.
+ // Basic kernel is only supported in version 2 or above.
+ kernel_type: LSTMKernelType = FULL;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true then first dimension is sequence, otherwise batch.
+ time_major:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+ // Parameters supported by version 1:
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true, store the outputs of both directions into the first output.
+ merge_outputs: bool;
+
+ // Parameters supported by version 2:
+ // If true then first dimension is sequence, otherwise batch.
+ // Version 1 implementations assumed time_major to be true, so this default
+ // value should never change.
+ time_major: bool = true;
+}
+
+table ResizeBilinearOptions {
+ new_height: int (deprecated);
+ new_width: int (deprecated);
+ align_corners: bool;
+}
+
+table ResizeNearestNeighborOptions {
+ align_corners: bool;
+}
+
+// A call operation options
+table CallOptions {
+ // The subgraph index that needs to be called.
+ subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+ new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+ ngram_size: int;
+ max_skip_size: int;
+ include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+ block_size: int;
+}
+
+table DepthToSpaceOptions {
+ block_size: int;
+}
+
+table SubOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DivOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+ SUM = 0,
+ MEAN = 1,
+ SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+ combiner:CombinerType;
+}
+
+table GatherOptions {
+ axis: int;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+ keep_dims: bool;
+}
+
+table SqueezeOptions {
+ squeeze_dims:[int];
+}
+
+table SplitOptions {
+ num_splits: int;
+}
+
+table SplitVOptions {
+ num_splits: int;
+}
+
+table StridedSliceOptions {
+ begin_mask: int;
+ end_mask: int;
+ ellipsis_mask: int;
+ new_axis_mask: int;
+ shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+ in_data_type: TensorType;
+ out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+ output_type : TensorType;
+}
+
+table ArgMinOptions {
+ output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+ validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+ // Optional output type of the operation (int32 or int64). Defaults to int32.
+ out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+ // Parameters supported by version 1:
+ min:float;
+ max:float;
+ num_bits:int;
+
+ // Parameters supported by version 2:
+ narrow_range:bool;
+}
+
+table PackOptions {
+ values_count:int;
+ axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+ axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+ num:int;
+ axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+ alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+ // Doesn't include borders.
+ REFLECT = 0,
+ // Includes borders.
+ SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+ mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+ idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+ seq_dim:int;
+ batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+ then_subgraph_index:int;
+ else_subgraph_index:int;
+}
+
+table WhileOptions {
+ cond_subgraph_index:int;
+ body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+ builtin_code:BuiltinOperator;
+ custom_code:string;
+
+ // The version of the operator. The version need to be bumped whenever new
+ // parameters are introduced into an op.
+ version:int = 1;
+}
+
+enum CustomOptionsFormat : byte {
+ FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+ // Index into the operator_codes array. Using an integer here avoids
+ // complicate map lookups.
+ opcode_index:uint;
+
+ // Optional input and output tensors are indicated by -1.
+ inputs:[int];
+ outputs:[int];
+
+ builtin_options:BuiltinOptions;
+ custom_options:[ubyte];
+ custom_options_format:CustomOptionsFormat;
+
+ // A list of booleans indicating the input tensors which are being mutated by
+ // this operator.(e.g. used by RNN and LSTM).
+ // For example, if the "inputs" array refers to 5 tensors and the second and
+ // fifth are mutable variables, then this list will contain
+ // [false, true, false, false, true].
+ //
+ // If the list is empty, no variable is mutated in this operator.
+ // The list either has the same length as `inputs`, or is empty.
+ mutating_variable_inputs:[bool];
+
+ // A list of indices to the subgraph's "tensors" that are internal to an Op.
+ // Internal tensors are those that do not flow in or out of the operation,
+ // but instead are part of internal computation. As such, the operation's
+ // implementation may manage its memory more efficiently. They are needed
+ // however (i.e. not just an implementation detail) since they are part of the
+ // computation, which may require relevant metadata such as quantization
+ // parameters.
+ intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+ // A list of all tensors used in this subgraph.
+ tensors:[Tensor];
+
+ // Indices of the tensors that are inputs into this subgraph. Note this is
+ // the list of non-static tensors that feed into the subgraph for inference.
+ inputs:[int];
+
+ // Indices of the tensors that are outputs out of this subgraph. Note this is
+ // the list of output tensors that are considered the product of the
+ // subgraph's inference.
+ outputs:[int];
+
+ // All operators, in execution order.
+ operators:[Operator];
+
+ // Name of this subgraph (used for debugging).
+ name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+ data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+ // A human readable string to uniquely identify a Metadata.
+ name:string;
+ // An index to the buffers table.
+ buffer:uint;
+}
+
+table Model {
+ // Version of the schema.
+ version:uint;
+
+ // A list of all operator codes used in this model. This is
+ // kept in order because operators carry an index into this
+ // vector.
+ operator_codes:[OperatorCode];
+
+ // All the subgraphs of the model. The 0th is assumed to be the main
+ // model.
+ subgraphs:[SubGraph];
+
+ // A description of the model.
+ description:string;
+
+ // Buffers of the model.
+ // Note the 0th entry of this array must be an empty buffer (sentinel).
+ // This is a convention so that tensors without a buffer can provide 0 as
+ // their buffer.
+ buffers:[Buffer];
+
+ // Metadata about the model. Indirects into the existings buffers list.
+ // Deprecated, prefer to use metadata field.
+ metadata_buffer:[int];
+
+ // Metadata about the model.
+ metadata:[Metadata];
+}
+
+root_type Model;
diff --git a/res/TensorFlowLiteSchema/README.md b/res/TensorFlowLiteSchema/README.md
new file mode 100644
index 000000000..b99cfe4a6
--- /dev/null
+++ b/res/TensorFlowLiteSchema/README.md
@@ -0,0 +1,7 @@
+# TensorFlow Lite Schema
+
+A collection of T/F Lite schema files (for each version)
+
+## How to add a new schema?
+
+Update [SCHEMA.lst](SCHEMA.lst) and run "download.sh".
diff --git a/res/TensorFlowLiteSchema/SCHEMA.lst b/res/TensorFlowLiteSchema/SCHEMA.lst
new file mode 100644
index 000000000..f264b0c9f
--- /dev/null
+++ b/res/TensorFlowLiteSchema/SCHEMA.lst
@@ -0,0 +1,5 @@
+VERSION,URL
+1.13.1,https://raw.githubusercontent.com/tensorflow/tensorflow/v1.13.1/tensorflow/lite/schema/schema.fbs
+1.14.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v1.14.0/tensorflow/lite/schema/schema.fbs
+1.15.2,https://raw.githubusercontent.com/tensorflow/tensorflow/v1.15.2/tensorflow/lite/schema/schema.fbs
+2.1.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.1.0/tensorflow/lite/schema/schema.fbs
diff --git a/res/TensorFlowLiteSchema/download.sh b/res/TensorFlowLiteSchema/download.sh
new file mode 100755
index 000000000..9a946c3d9
--- /dev/null
+++ b/res/TensorFlowLiteSchema/download.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+while IFS=',' read -r VERSION URL
+do
+ echo "Download ${VERSION} from '${URL}'"
+ mkdir -p "${VERSION}"
+ wget -nv -O "${VERSION}/schema.fbs" "${URL}"
+ echo "Download ${VERSION} from '${URL}' - Done"
+done < <(cat SCHEMA.lst | tail -n +2)
diff --git a/res/TensorFlowPythonExamples/.gitignore b/res/TensorFlowPythonExamples/.gitignore
new file mode 100644
index 000000000..bee8a64b7
--- /dev/null
+++ b/res/TensorFlowPythonExamples/.gitignore
@@ -0,0 +1 @@
+__pycache__
diff --git a/res/TensorFlowPythonExamples/README.md b/res/TensorFlowPythonExamples/README.md
new file mode 100644
index 000000000..63e249b93
--- /dev/null
+++ b/res/TensorFlowPythonExamples/README.md
@@ -0,0 +1,31 @@
+# TensorFlow Python Examples
+
+## Prerequisite
+
+- Python 3.X
+- TensorFlow 1.13.1
+
+## Directory Layout
+
+```
+tfpem.py <- TensorFlow Python Example Manager
+examples/
+ [EXAMPLE NAME]/
+ __init__.py
+```
+
+## HOWTO: Create a Python environment
+
+TBA
+
+## HOWTO: Generate a pbtxt from examples
+
+```
+$ /path/to/python -B <path/to/tfpem.py> [EXAMPLE NAME 1] [EXANMPE NAME 2] ...
+```
+
+NOTE. Add "-B" option not to generate "__pycache__".
+
+## HOWTO: Add a new example
+
+TBA
diff --git a/res/TensorFlowPythonExamples/examples/abs/__init__.py b/res/TensorFlowPythonExamples/examples/abs/__init__.py
new file mode 100755
index 000000000..fd5515595
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/abs/__init__.py
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+abs_ = tf.compat.v1.abs(in_)
diff --git a/res/TensorFlowPythonExamples/examples/add/__init__.py b/res/TensorFlowPythonExamples/examples/add/__init__.py
new file mode 100755
index 000000000..7e283f35f
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/add/__init__.py
@@ -0,0 +1,5 @@
+import tensorflow as tf
+
+lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.compat.v1.add(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/argmax/__init__.py b/res/TensorFlowPythonExamples/examples/argmax/__init__.py
new file mode 100755
index 000000000..059df97f9
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/argmax/__init__.py
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
+op_ = tf.compat.v1.math.argmax(in_)
diff --git a/res/TensorFlowPythonExamples/examples/biasadd/__init__.py b/res/TensorFlowPythonExamples/examples/biasadd/__init__.py
new file mode 100755
index 000000000..eb8a69bc3
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/biasadd/__init__.py
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1, 2, 3), name="Hole")
+op_ = tf.nn.bias_add(in_, bias=[1.0, 1.0, -1.0], data_format="NHWC")
diff --git a/res/TensorFlowPythonExamples/examples/cos/__init__.py b/res/TensorFlowPythonExamples/examples/cos/__init__.py
new file mode 100755
index 000000000..cfce5d830
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/cos/__init__.py
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.compat.v1.cos(in_)
diff --git a/res/TensorFlowPythonExamples/examples/div/__init__.py b/res/TensorFlowPythonExamples/examples/div/__init__.py
new file mode 100755
index 000000000..2887771ff
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/div/__init__.py
@@ -0,0 +1,5 @@
+import tensorflow as tf
+
+lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.compat.v1.div(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/elu/__init__.py b/res/TensorFlowPythonExamples/examples/elu/__init__.py
new file mode 100755
index 000000000..b41f65111
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/elu/__init__.py
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
+elu_ = tf.compat.v1.nn.elu(in_)
diff --git a/res/TensorFlowPythonExamples/examples/exp/__init__.py b/res/TensorFlowPythonExamples/examples/exp/__init__.py
new file mode 100644
index 000000000..e83638436
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/exp/__init__.py
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.compat.v1.exp(in_)
diff --git a/res/TensorFlowPythonExamples/examples/floor/__init__.py b/res/TensorFlowPythonExamples/examples/floor/__init__.py
new file mode 100755
index 000000000..3b3f5bfc3
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/floor/__init__.py
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
+op_ = tf.compat.v1.floor(in_)
diff --git a/res/TensorFlowPythonExamples/examples/floordiv/__init__.py b/res/TensorFlowPythonExamples/examples/floordiv/__init__.py
new file mode 100755
index 000000000..34f413f2b
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/floordiv/__init__.py
@@ -0,0 +1,5 @@
+import tensorflow as tf
+
+lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.compat.v1.floordiv(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/greater/__init__.py b/res/TensorFlowPythonExamples/examples/greater/__init__.py
new file mode 100755
index 000000000..e88f57471
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/greater/__init__.py
@@ -0,0 +1,5 @@
+import tensorflow as tf
+
+lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.compat.v1.greater(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/greater_equal/__init__.py b/res/TensorFlowPythonExamples/examples/greater_equal/__init__.py
new file mode 100755
index 000000000..b15fbd324
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/greater_equal/__init__.py
@@ -0,0 +1,5 @@
+import tensorflow as tf
+
+lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.compat.v1.greater_equal(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/leaky_relu/__init__.py b/res/TensorFlowPythonExamples/examples/leaky_relu/__init__.py
new file mode 100755
index 000000000..d595edbd0
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/leaky_relu/__init__.py
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
+op_ = tf.compat.v1.nn.leaky_relu(in_)
diff --git a/res/TensorFlowPythonExamples/examples/less/__init__.py b/res/TensorFlowPythonExamples/examples/less/__init__.py
new file mode 100755
index 000000000..41ba18c62
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/less/__init__.py
@@ -0,0 +1,5 @@
+import tensorflow as tf
+
+lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.compat.v1.less(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/less_equal/__init__.py b/res/TensorFlowPythonExamples/examples/less_equal/__init__.py
new file mode 100755
index 000000000..d60bf2a73
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/less_equal/__init__.py
@@ -0,0 +1,5 @@
+import tensorflow as tf
+
+lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.compat.v1.less_equal(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/logical_not/__init__.py b/res/TensorFlowPythonExamples/examples/logical_not/__init__.py
new file mode 100755
index 000000000..f1bcc2c8f
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/logical_not/__init__.py
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole")
+op_ = tf.compat.v1.logical_not(in_)
diff --git a/res/TensorFlowPythonExamples/examples/logical_or/__init__.py b/res/TensorFlowPythonExamples/examples/logical_or/__init__.py
new file mode 100755
index 000000000..991d61ab9
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/logical_or/__init__.py
@@ -0,0 +1,5 @@
+import tensorflow as tf
+
+lhs_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole")
+rhs_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole")
+op_ = tf.compat.v1.logical_or(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/matmul/__init__.py b/res/TensorFlowPythonExamples/examples/matmul/__init__.py
new file mode 100755
index 000000000..760241de7
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/matmul/__init__.py
@@ -0,0 +1,5 @@
+import tensorflow as tf
+
+lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 4), name="Hole")
+rhs_ = tf.compat.v1.constant(dtype=tf.float32, shape=(4, 4), name="Hole", value=1.0)
+op_ = tf.compat.v1.matmul(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/multiply/__init__.py b/res/TensorFlowPythonExamples/examples/multiply/__init__.py
new file mode 100755
index 000000000..da8885660
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/multiply/__init__.py
@@ -0,0 +1,5 @@
+import tensorflow as tf
+
+lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.compat.v1.multiply(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/not_equal/__init__.py b/res/TensorFlowPythonExamples/examples/not_equal/__init__.py
new file mode 100755
index 000000000..95073fe4a
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/not_equal/__init__.py
@@ -0,0 +1,5 @@
+import tensorflow as tf
+
+lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.compat.v1.not_equal(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/pack/__init__.py b/res/TensorFlowPythonExamples/examples/pack/__init__.py
new file mode 100755
index 000000000..609bc9b76
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/pack/__init__.py
@@ -0,0 +1,5 @@
+import tensorflow as tf
+
+in_1 = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4), name="Hole")
+in_2 = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4), name="Hole")
+op_ = tf.compat.v1.stack([in_1, in_2])
diff --git a/res/TensorFlowPythonExamples/examples/pad/__init__.py b/res/TensorFlowPythonExamples/examples/pad/__init__.py
new file mode 100755
index 000000000..ac5cf81fa
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/pad/__init__.py
@@ -0,0 +1,5 @@
+import tensorflow as tf
+
+tensor_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3), name="Hole")
+paddings_ = tf.compat.v1.constant([[1, 1], [2, 2]], name="Hole")
+op_ = tf.compat.v1.pad(tensor_, paddings_)
diff --git a/res/TensorFlowPythonExamples/examples/pow/__init__.py b/res/TensorFlowPythonExamples/examples/pow/__init__.py
new file mode 100755
index 000000000..960032a84
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/pow/__init__.py
@@ -0,0 +1,5 @@
+import tensorflow as tf
+
+lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.compat.v1.pow(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/prelu/__init__.py b/res/TensorFlowPythonExamples/examples/prelu/__init__.py
new file mode 100755
index 000000000..2ab030265
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/prelu/__init__.py
@@ -0,0 +1,7 @@
+import tensorflow as tf
+from tensorflow.compat.v1.keras import layers
+
+model = tf.compat.v1.keras.Sequential()
+model.add(layers.PReLU())
+# TODO Find a way to freeze Keras model for inference
+model.build((1, 1))
diff --git a/res/TensorFlowPythonExamples/examples/relu/__init__.py b/res/TensorFlowPythonExamples/examples/relu/__init__.py
new file mode 100755
index 000000000..a144a1212
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/relu/__init__.py
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
+op_ = tf.compat.v1.nn.relu(in_)
diff --git a/res/TensorFlowPythonExamples/examples/relu6/__init__.py b/res/TensorFlowPythonExamples/examples/relu6/__init__.py
new file mode 100755
index 000000000..f58ae7c2c
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/relu6/__init__.py
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
+op_ = tf.compat.v1.nn.relu6(in_)
diff --git a/res/TensorFlowPythonExamples/examples/reshape/__init.py__ b/res/TensorFlowPythonExamples/examples/reshape/__init.py__
new file mode 100644
index 000000000..3afe7efa9
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/reshape/__init.py__
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.compat.v1.reshape(in_, shape=[2,2,2,2])
diff --git a/res/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py b/res/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py
new file mode 100755
index 000000000..422bf1db5
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 8, 8, 3), name="Hole")
+op_ = tf.compat.v1.image.resize_bilinear(in_, [16, 16])
diff --git a/res/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py b/res/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py
new file mode 100755
index 000000000..a14022948
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 8, 8, 3), name="Hole")
+op_ = tf.compat.v1.image.resize_nearest_neighbor(in_, [16, 16])
diff --git a/res/TensorFlowPythonExamples/examples/rsqrt/__init__.py b/res/TensorFlowPythonExamples/examples/rsqrt/__init__.py
new file mode 100755
index 000000000..90500bd11
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/rsqrt/__init__.py
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
+op_ = tf.compat.v1.rsqrt(in_)
diff --git a/res/TensorFlowPythonExamples/examples/sigmoid/__init__.py b/res/TensorFlowPythonExamples/examples/sigmoid/__init__.py
new file mode 100755
index 000000000..43328f2cb
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/sigmoid/__init__.py
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
+op_ = tf.compat.v1.nn.sigmoid(in_)
diff --git a/res/TensorFlowPythonExamples/examples/softmax/__init__.py b/res/TensorFlowPythonExamples/examples/softmax/__init__.py
new file mode 100755
index 000000000..5b8d1cdfb
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/softmax/__init__.py
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
+op_ = tf.compat.v1.nn.softmax(in_)
diff --git a/res/TensorFlowPythonExamples/examples/sqrt/__init__.py b/res/TensorFlowPythonExamples/examples/sqrt/__init__.py
new file mode 100755
index 000000000..4aab5da9c
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/sqrt/__init__.py
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
+op_ = tf.compat.v1.sqrt(in_)
diff --git a/res/TensorFlowPythonExamples/examples/subtract/__init__.py b/res/TensorFlowPythonExamples/examples/subtract/__init__.py
new file mode 100755
index 000000000..feb11b12e
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/subtract/__init__.py
@@ -0,0 +1,5 @@
+import tensorflow as tf
+
+lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.compat.v1.subtract(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/tanh/__init__.py b/res/TensorFlowPythonExamples/examples/tanh/__init__.py
new file mode 100755
index 000000000..dd202a78d
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/tanh/__init__.py
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
+op_ = tf.compat.v1.tanh(in_)
diff --git a/res/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py b/res/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py
new file mode 100755
index 000000000..5230bbac6
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py
@@ -0,0 +1,4 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 16, 16, 3), name="Hole")
+op_ = tf.compat.v1.image.yuv_to_rgb(in_)
diff --git a/res/TensorFlowPythonExamples/requirements.txt b/res/TensorFlowPythonExamples/requirements.txt
new file mode 100644
index 000000000..2a938941f
--- /dev/null
+++ b/res/TensorFlowPythonExamples/requirements.txt
@@ -0,0 +1,18 @@
+absl-py==0.9.0
+astor==0.8.1
+gast==0.3.3
+grpcio==1.27.2
+h5py==2.10.0
+Keras-Applications==1.0.8
+Keras-Preprocessing==1.1.0
+Markdown==3.2.1
+mock==4.0.2
+numpy==1.18.2
+pkg-resources==0.0.0
+protobuf==3.11.3
+six==1.14.0
+tensorboard==1.13.1
+tensorflow==1.13.1
+tensorflow-estimator==1.13.0
+termcolor==1.1.0
+Werkzeug==1.0.0
diff --git a/res/TensorFlowPythonExamples/tfpem.py b/res/TensorFlowPythonExamples/tfpem.py
new file mode 100755
index 000000000..514200c19
--- /dev/null
+++ b/res/TensorFlowPythonExamples/tfpem.py
@@ -0,0 +1,25 @@
+# TensorFlow Python Example Manager
+
+import tensorflow as tf
+import importlib
+import argparse
+
+parser = argparse.ArgumentParser(description='Process TensorFlow Python Examples')
+
+parser.add_argument('--mode', metavar='MODE', choices=['pbtxt'], default='pbtxt')
+parser.add_argument('examples', metavar='EXAMPLES', nargs='+')
+
+args = parser.parse_args()
+
+if args.mode == 'pbtxt':
+ for example in args.examples:
+ print("Generate '" + example + ".pbtxt'")
+
+ tf.compat.v1.reset_default_graph()
+ # https://stackoverflow.com/questions/37808866/proper-way-to-dynamically-import-a-module-with-relative-imports
+ importlib.import_module("examples." + example)
+
+ with open(example + ".pbtxt", "w") as f:
+ f.write(str(tf.compat.v1.get_default_graph().as_graph_def(add_shapes=True)))
+
+ print("Generate '" + example + ".pbtxt' - Done")
diff --git a/res/TensorFlowTests/NET_0003/test.py b/res/TensorFlowTests/NET_0003/test.py
index b5bad2dae..b5bad2dae 100644..100755
--- a/res/TensorFlowTests/NET_0003/test.py
+++ b/res/TensorFlowTests/NET_0003/test.py
diff --git a/res/TensorFlowTests/NET_0004/test.py b/res/TensorFlowTests/NET_0004/test.py
index a0c790d79..a0c790d79 100644..100755
--- a/res/TensorFlowTests/NET_0004/test.py
+++ b/res/TensorFlowTests/NET_0004/test.py
diff --git a/res/TensorFlowTests/UNIT_Maximum_000/test.info b/res/TensorFlowTests/UNIT_Maximum_000/test.info
new file mode 100644
index 000000000..f8f74e382
--- /dev/null
+++ b/res/TensorFlowTests/UNIT_Maximum_000/test.info
@@ -0,0 +1,3 @@
+input, input_01:0, TF_FLOAT, [1, 3, 3, 1]
+input, input_02:0, TF_FLOAT, [1, 3, 3, 1]
+output, maximum:0, TF_FLOAT, [1, 3, 3, 1]
diff --git a/res/TensorFlowTests/UNIT_Maximum_000/test.pbtxt b/res/TensorFlowTests/UNIT_Maximum_000/test.pbtxt
new file mode 100644
index 000000000..9ab81b52b
--- /dev/null
+++ b/res/TensorFlowTests/UNIT_Maximum_000/test.pbtxt
@@ -0,0 +1,70 @@
+node {
+ name: "input_01"
+ op: "Placeholder"
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 1
+ }
+ dim {
+ size: 3
+ }
+ dim {
+ size: 3
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+}
+node {
+ name: "input_02"
+ op: "Placeholder"
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 1
+ }
+ dim {
+ size: 3
+ }
+ dim {
+ size: 3
+ }
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+}
+node {
+ name: "maximum"
+ op: "Maximum"
+ input: "input_01"
+ input: "input_02"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
diff --git a/res/TensorFlowTests/UNIT_Maximum_001/test.info b/res/TensorFlowTests/UNIT_Maximum_001/test.info
new file mode 100644
index 000000000..24dc2be07
--- /dev/null
+++ b/res/TensorFlowTests/UNIT_Maximum_001/test.info
@@ -0,0 +1,3 @@
+input, input_01:0, TF_FLOAT, [1, 3, 3, 5]
+input, input_02:0, TF_FLOAT, [1, 1, 1, 5]
+output, maximum:0, TF_FLOAT, [1, 3, 3, 5]
diff --git a/res/TensorFlowTests/UNIT_Maximum_001/test.pbtxt b/res/TensorFlowTests/UNIT_Maximum_001/test.pbtxt
new file mode 100644
index 000000000..29e90ab2f
--- /dev/null
+++ b/res/TensorFlowTests/UNIT_Maximum_001/test.pbtxt
@@ -0,0 +1,70 @@
+node {
+ name: "input_01"
+ op: "Placeholder"
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 1
+ }
+ dim {
+ size: 3
+ }
+ dim {
+ size: 3
+ }
+ dim {
+ size: 5
+ }
+ }
+ }
+ }
+}
+node {
+ name: "input_02"
+ op: "Placeholder"
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 1
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 1
+ }
+ dim {
+ size: 5
+ }
+ }
+ }
+ }
+}
+node {
+ name: "maximum"
+ op: "Maximum"
+ input: "input_01"
+ input: "input_02"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
diff --git a/res/TensorFlowTests/UNIT_Maximum_002/test.info b/res/TensorFlowTests/UNIT_Maximum_002/test.info
new file mode 100644
index 000000000..44ddd085a
--- /dev/null
+++ b/res/TensorFlowTests/UNIT_Maximum_002/test.info
@@ -0,0 +1,3 @@
+input, input_01:0, TF_FLOAT, [1, 3, 3, 5]
+input, input_02:0, TF_FLOAT, [5]
+output, maximum:0, TF_FLOAT, [1, 3, 3, 5]
diff --git a/res/TensorFlowTests/UNIT_Maximum_002/test.pbtxt b/res/TensorFlowTests/UNIT_Maximum_002/test.pbtxt
new file mode 100644
index 000000000..ca4ae80de
--- /dev/null
+++ b/res/TensorFlowTests/UNIT_Maximum_002/test.pbtxt
@@ -0,0 +1,61 @@
+node {
+ name: "input_01"
+ op: "Placeholder"
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 1
+ }
+ dim {
+ size: 3
+ }
+ dim {
+ size: 3
+ }
+ dim {
+ size: 5
+ }
+ }
+ }
+ }
+}
+node {
+ name: "input_02"
+ op: "Placeholder"
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 5
+ }
+ }
+ }
+ }
+}
+node {
+ name: "maximum"
+ op: "Maximum"
+ input: "input_01"
+ input: "input_02"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+}
diff --git a/runtime/contrib/README.md b/runtime/contrib/README.md
deleted file mode 100644
index 2f8b709eb..000000000
--- a/runtime/contrib/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# nnfw contrib
-
-The `contrib` directory is basically a contribution channel where contributors can create a project
-and start the code development. The projects in the `contrib` directory may not be directly related
-to `nnfw` but should have its own purpose that could augment the nnfw project.
-
-If you are interested in proposing a new project, please create a pull request (PR) with a new
-project directory under `contrib` including the description of proposing project. The PR will be
-reviewed by reviewers in `nnfw`, and the acceptance of new project will be determined based on the
-PR reviews.
diff --git a/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h
index 405ca9879..69dfcc7b2 100644
--- a/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h
+++ b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd All Rights Reserved
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the License);
* you may not use this file except in compliance with the License.
diff --git a/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h
index af1947ff0..b099ba9ba 100644
--- a/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h
+++ b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd All Rights Reserved
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the License);
* you may not use this file except in compliance with the License.
diff --git a/runtime/contrib/TFLiteSharp/TFLiteNative/src/tflite_nativewrapper.cpp b/runtime/contrib/TFLiteSharp/TFLiteNative/src/tflite_nativewrapper.cpp
index 0304720f7..576a659ac 100644
--- a/runtime/contrib/TFLiteSharp/TFLiteNative/src/tflite_nativewrapper.cpp
+++ b/runtime/contrib/TFLiteSharp/TFLiteNative/src/tflite_nativewrapper.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd All Rights Reserved
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the License);
* you may not use this file except in compliance with the License.
diff --git a/runtime/contrib/android_benchmark_app/CMakeLists.txt b/runtime/contrib/android_benchmark_app/CMakeLists.txt
index 8e9d3c7a1..ac511cfa3 100644
--- a/runtime/contrib/android_benchmark_app/CMakeLists.txt
+++ b/runtime/contrib/android_benchmark_app/CMakeLists.txt
@@ -81,11 +81,11 @@ add_custom_target(android-benchmark-apk ALL
COMMAND ${CMAKE_COMMAND} -E copy ${CORE_LIBRARY} ${LIB_DIR}
COMMAND ${CMAKE_COMMAND} -E copy ${RUNTIME_LIBRARY} ${LIB_DIR}
COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:android_benchmark_native> ${LIB_DIR}
- COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:neurun> ${LIB_DIR}
- COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:neurun_backend_acl_cl> ${LIB_DIR}
+ COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:onert> ${LIB_DIR}
+ COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:onert_backend_acl_cl> ${LIB_DIR}
COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:arm_compute_ex> ${LIB_DIR}
- COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:neurun_backend_acl_neon> ${LIB_DIR}
- COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:neurun_backend_cpu> ${LIB_DIR}
+ COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:onert_backend_acl_neon> ${LIB_DIR}
+ COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:onert_backend_cpu> ${LIB_DIR}
COMMAND ${CMAKE_COMMAND} -E echo ${ANDROID_BUILD_TOOLS_DIR}/aapt package -f -0 tflite -M ${CMAKE_CURRENT_SOURCE_DIR}/AndroidManifest.xml -S ${CMAKE_CURRENT_SOURCE_DIR}/res/ -I ${ANDROID_PLATFORM_DIR}/android.jar -F ${CMAKE_CURRENT_BINARY_DIR}/android-benchmark.unsigned.pkg ${PKG_DIR}
COMMAND ${ANDROID_BUILD_TOOLS_DIR}/aapt package -f
-0 tflite
@@ -94,4 +94,4 @@ add_custom_target(android-benchmark-apk ALL
-I ${ANDROID_PLATFORM_DIR}/android.jar
-F ${CMAKE_CURRENT_BINARY_DIR}/android-benchmark.unsigned.pkg
${PKG_DIR}
- DEPENDS android_benchmark_native neurun neurun_backend_acl_cl neurun_backend_acl_neon neurun_backend_cpu)
+ DEPENDS android_benchmark_native onert onert_backend_acl_cl onert_backend_acl_neon onert_backend_cpu)
diff --git a/runtime/contrib/android_benchmark_app/README.md b/runtime/contrib/android_benchmark_app/README.md
index 2868e0ada..ce165cd5d 100644
--- a/runtime/contrib/android_benchmark_app/README.md
+++ b/runtime/contrib/android_benchmark_app/README.md
@@ -5,7 +5,7 @@ An Android sample app that run `.tflite` and measure performance.
You can run with two engines.
- Tensorflow Lite Interpreter
-- NN API Delegate (neurun)
+- NN API Delegate (onert)
## Build
@@ -28,7 +28,7 @@ make TARGET_OS=android \
EXT_ACL_FOLDER=/home/hanjoung/ws/temp/arm_compute-v19.05-bin-android/lib/android-arm64-v8a-neon-cl \
ANDROID_BUILD_TOOLS_DIR=/home/hanjoung/ws/android-tools/sdk/build-tools/27.0.3/ \
ANDROID_SDK_DIR=/home/hanjoung/ws/android-tools/sdk \
- TFLITE_MODEL_PATH=/Users/hanjoung/ws/ghent/STAR/nnfw/tests/framework/cache/MODELS/mobilenet/mobilenet_v1_0.25_128.tflite \
+ TFLITE_MODEL_PATH=/Users/hanjoung/ws/ghent/STAR/nnfw/tests/scripts/framework/cache/MODELS/mobilenet/mobilenet_v1_0.25_128.tflite \
ANDROID_BOOST_ROOT=/home/hanjoung/ws/gh/moritz-wundke/Boost-for-Android/build/out/arm64-v8a
```
diff --git a/runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp b/runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp
index f2ca1312c..4b0e4395f 100644
--- a/runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp
+++ b/runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp
@@ -1,3 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the License);
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
#include "ndk_main.h"
#include "tensorflow/lite/kernels/register.h"
diff --git a/runtime/contrib/android_benchmark_app/cpp/ndk_main.h b/runtime/contrib/android_benchmark_app/cpp/ndk_main.h
index 8de39ce9f..8f2969870 100644
--- a/runtime/contrib/android_benchmark_app/cpp/ndk_main.h
+++ b/runtime/contrib/android_benchmark_app/cpp/ndk_main.h
@@ -1,3 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the License);
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class com_ndk_tflbench_MainActivity */
diff --git a/runtime/contrib/android_tflite/builtin_ops_jni.cc b/runtime/contrib/android_tflite/builtin_ops_jni.cc
index af9d3325d..5770701ea 100644
--- a/runtime/contrib/android_tflite/builtin_ops_jni.cc
+++ b/runtime/contrib/android_tflite/builtin_ops_jni.cc
@@ -1,18 +1,19 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- Copyright 2019 Samsung Electronics Co., Ltd. All Rights Reserved
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the License);
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
#include "tensorflow/lite/kernels/register.h"
#include "tflite/ext/kernels/register.h"
diff --git a/runtime/contrib/custom_op/customOp-workflow.png b/runtime/contrib/custom_op/customOp-workflow.png
deleted file mode 100644
index 0487f5b63..000000000
--- a/runtime/contrib/custom_op/customOp-workflow.png
+++ /dev/null
Binary files differ
diff --git a/runtime/contrib/heap_trace/CMakeLists.txt b/runtime/contrib/heap_trace/CMakeLists.txt
index 1f18152d8..3043e2c0c 100644
--- a/runtime/contrib/heap_trace/CMakeLists.txt
+++ b/runtime/contrib/heap_trace/CMakeLists.txt
@@ -4,12 +4,17 @@ endif(NOT BUILD_HEAP_TRACE)
add_library(heap_trace SHARED src/cl_create_buffer_stub.cc
src/cl_release_mem_object.cc
+ src/cl_retain_mem_object_stub.cc
src/free_stub.cc
src/malloc_stub.cc
src/realloc_stub.cc
src/valloc_stub.cc
+ src/calloc_stub.cc
+ src/posix_memalign_stub.cc
+ src/aligned_alloc_stub.cc
src/symbol_searcher.cc
src/trace.cc
+ src/memory_pool_for_symbol_searcher_internals.cc
)
target_link_libraries(heap_trace PRIVATE ${CMAKE_DL_LIBS})
diff --git a/runtime/contrib/heap_trace/src/aligned_alloc_stub.cc b/runtime/contrib/heap_trace/src/aligned_alloc_stub.cc
new file mode 100644
index 000000000..c70c24e1e
--- /dev/null
+++ b/runtime/contrib/heap_trace/src/aligned_alloc_stub.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "trace.h"
+#include "function_resolver.h"
+#include "memory_pool_for_symbol_searcher_internals.h"
+
+#include <memory>
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+extern "C" {
+
+void *aligned_alloc(size_t alignment, size_t sz) noexcept
+{
+ static auto isOriginalFunctionCallSuccessful = [](void *result) -> bool { return result; };
+
+ if (isCurrentAllocationForSymbolSearcherInternalUsage())
+ {
+ return MemoryPoolForSymbolSearcherInternals{}.allocate(sz);
+ }
+
+ static auto originalFunction = findFunctionByName<void *, size_t, size_t>("aligned_alloc");
+ void *result = originalFunction(alignment, sz);
+ if (isOriginalFunctionCallSuccessful(result) && !Trace::Guard{}.isActive())
+ {
+ GlobalTrace->logAllocationEvent(result, sz);
+ }
+
+ return result;
+}
+}
diff --git a/runtime/contrib/heap_trace/src/calloc_stub.cc b/runtime/contrib/heap_trace/src/calloc_stub.cc
new file mode 100644
index 000000000..adc93a209
--- /dev/null
+++ b/runtime/contrib/heap_trace/src/calloc_stub.cc
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "trace.h"
+#include "function_resolver.h"
+#include "memory_pool_for_symbol_searcher_internals.h"
+
+#include <memory>
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+extern "C" {
+
+void *calloc(size_t number_of_elements, size_t size_of_one_element) noexcept
+{
+ static auto isOriginalFunctionCallSuccessful = [](void *result) -> bool { return result; };
+
+ if (isCurrentAllocationForSymbolSearcherInternalUsage())
+ {
+ return MemoryPoolForSymbolSearcherInternals{}.allocate(number_of_elements *
+ size_of_one_element);
+ }
+
+ static auto originalFunction = findFunctionByName<void *, size_t, size_t>("calloc");
+ void *result = originalFunction(number_of_elements, size_of_one_element);
+ if (isOriginalFunctionCallSuccessful(result) && !Trace::Guard{}.isActive())
+ {
+ GlobalTrace->logAllocationEvent(result, number_of_elements * size_of_one_element);
+ }
+
+ return result;
+}
+}
diff --git a/runtime/contrib/heap_trace/src/cl_retain_mem_object_stub.cc b/runtime/contrib/heap_trace/src/cl_retain_mem_object_stub.cc
new file mode 100644
index 000000000..b1b56b60c
--- /dev/null
+++ b/runtime/contrib/heap_trace/src/cl_retain_mem_object_stub.cc
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "trace.h"
+#include "function_resolver.h"
+
+#include <CL/cl.h>
+
+#include <memory>
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+extern "C" {
+
+cl_int clRetainMemObject(cl_mem mem)
+{
+ static auto isOriginalFunctionCallSuccessful = [](cl_int result) -> bool {
+ return result == CL_SUCCESS;
+ };
+
+ auto originalFunction = findFunctionByName<cl_int, cl_mem>("clRetainMemObject");
+ cl_int result = originalFunction(mem);
+ if (isOriginalFunctionCallSuccessful(result) && !Trace::Guard{}.isActive())
+ {
+ GlobalTrace->logAllocationEvent(mem, 0);
+ }
+
+ return result;
+}
+}
diff --git a/runtime/contrib/heap_trace/src/free_stub.cc b/runtime/contrib/heap_trace/src/free_stub.cc
index 31af63c8a..21c311dfd 100644
--- a/runtime/contrib/heap_trace/src/free_stub.cc
+++ b/runtime/contrib/heap_trace/src/free_stub.cc
@@ -16,6 +16,7 @@
#include "trace.h"
#include "function_resolver.h"
+#include "memory_pool_for_symbol_searcher_internals.h"
#include <memory>
@@ -25,6 +26,12 @@ extern "C" {
void free(void *p) noexcept
{
+ MemoryPoolForSymbolSearcherInternals pool;
+ if (pool.containsMemorySpaceStartedFromPointer(p))
+ {
+ return pool.deallocate(p);
+ }
+
static auto originalFunction = findFunctionByName<void, void *>("free");
originalFunction(p);
if (!Trace::Guard{}.isActive())
diff --git a/runtime/contrib/heap_trace/src/malloc_stub.cc b/runtime/contrib/heap_trace/src/malloc_stub.cc
index 50124d164..c27dcaa14 100644
--- a/runtime/contrib/heap_trace/src/malloc_stub.cc
+++ b/runtime/contrib/heap_trace/src/malloc_stub.cc
@@ -16,6 +16,7 @@
#include "trace.h"
#include "function_resolver.h"
+#include "memory_pool_for_symbol_searcher_internals.h"
#include <memory>
@@ -27,6 +28,11 @@ void *malloc(size_t sz) noexcept
{
static auto isOriginalFunctionCallSuccessful = [](void *result) -> bool { return result; };
+ if (isCurrentAllocationForSymbolSearcherInternalUsage())
+ {
+ return MemoryPoolForSymbolSearcherInternals{}.allocate(sz);
+ }
+
static auto originalFunction = findFunctionByName<void *, size_t>("malloc");
void *result = originalFunction(sz);
if (isOriginalFunctionCallSuccessful(result) && !Trace::Guard{}.isActive())
diff --git a/runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.cc b/runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.cc
new file mode 100644
index 000000000..9778d0f7b
--- /dev/null
+++ b/runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.cc
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "memory_pool_for_symbol_searcher_internals.h"
+
+uint8_t MemoryPoolForSymbolSearcherInternals::_buffer[MAX_SIZE] = {0};
+uint8_t *volatile MemoryPoolForSymbolSearcherInternals::_ptr_to_free_space_start = _buffer;
+size_t volatile MemoryPoolForSymbolSearcherInternals::_size_of_last_allocated_space = 0;
diff --git a/runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.h b/runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.h
new file mode 100644
index 000000000..89797ad50
--- /dev/null
+++ b/runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MEMORY_POOL_FOR_SYMBOL_SEARCHER_INTERNALS_H
+#define MEMORY_POOL_FOR_SYMBOL_SEARCHER_INTERNALS_H
+
+#include <cstddef>
+#include <cstdint>
+
+// TODO this class possibly should be thread safe (or all symbols should be resolved at the start of
+// application as alternative)
+class MemoryPoolForSymbolSearcherInternals
+{
+ static constexpr size_t MAX_SIZE = 65536;
+
+public:
+ bool containsMemorySpaceStartedFromPointer(void *ptr) noexcept
+ {
+ return ptr >= _buffer && ptr < _buffer + MAX_SIZE;
+ }
+
+ // TODO this function should return alighned ptr to avoid potential problems
+ void *allocate(size_t size) noexcept
+ {
+ if (isSpaceOfRequiredSizeNotAvailable(size))
+ {
+ // TODO need to signalize about error
+ }
+
+ uint8_t *ptr_to_memory_space_begin = _ptr_to_free_space_start;
+ _ptr_to_free_space_start += size;
+ _size_of_last_allocated_space = size;
+ return ptr_to_memory_space_begin;
+ }
+
+ void deallocate(void *p) noexcept
+ {
+ if (p == _ptr_to_free_space_start - _size_of_last_allocated_space)
+ {
+ _ptr_to_free_space_start -= _size_of_last_allocated_space;
+ _size_of_last_allocated_space = 0;
+ }
+ }
+
+private:
+ bool isSpaceOfRequiredSizeNotAvailable(size_t size)
+ {
+ uint8_t *ptr_to_the_free_space_after_allocation = _ptr_to_free_space_start + size;
+ size_t size_of_reserved_space_after_allocation =
+ ptr_to_the_free_space_after_allocation - _buffer;
+ if (size_of_reserved_space_after_allocation >= MAX_SIZE)
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+private:
+ static uint8_t _buffer[MAX_SIZE];
+ static uint8_t *volatile _ptr_to_free_space_start;
+ static volatile size_t _size_of_last_allocated_space;
+};
+
+#endif // ! MEMORY_POOL_FOR_SYMBOL_SEARCHER_INTERNALS_H
diff --git a/runtime/contrib/heap_trace/src/posix_memalign_stub.cc b/runtime/contrib/heap_trace/src/posix_memalign_stub.cc
new file mode 100644
index 000000000..d68c5736a
--- /dev/null
+++ b/runtime/contrib/heap_trace/src/posix_memalign_stub.cc
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "trace.h"
+#include "function_resolver.h"
+#include "memory_pool_for_symbol_searcher_internals.h"
+
+#include <memory>
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+extern "C" {
+
+int posix_memalign(void **memptr, size_t alignment, size_t size) noexcept
+{
+ static auto isOriginalFunctionCallSuccessful = [](int result) -> bool { return result == 0; };
+
+ if (isCurrentAllocationForSymbolSearcherInternalUsage())
+ {
+ *memptr = MemoryPoolForSymbolSearcherInternals{}.allocate(size);
+ return 0;
+ }
+
+ static auto originalFunction = findFunctionByName<int, void **, size_t, size_t>("posix_memalign");
+ int result = originalFunction(memptr, alignment, size);
+ if (isOriginalFunctionCallSuccessful(result) && !Trace::Guard{}.isActive())
+ {
+ GlobalTrace->logAllocationEvent(*memptr, size);
+ }
+
+ return result;
+}
+}
diff --git a/runtime/contrib/heap_trace/src/realloc_stub.cc b/runtime/contrib/heap_trace/src/realloc_stub.cc
index ce4569b0e..636cfebfe 100644
--- a/runtime/contrib/heap_trace/src/realloc_stub.cc
+++ b/runtime/contrib/heap_trace/src/realloc_stub.cc
@@ -16,6 +16,7 @@
#include "trace.h"
#include "function_resolver.h"
+#include "memory_pool_for_symbol_searcher_internals.h"
#include <memory>
@@ -27,6 +28,11 @@ void *realloc(void *ptr, size_t sz) noexcept
{
static auto isOriginalFunctionCallSuccessful = [](void *result) -> bool { return result; };
+ if (isCurrentAllocationForSymbolSearcherInternalUsage())
+ {
+ return MemoryPoolForSymbolSearcherInternals{}.allocate(sz);
+ }
+
static auto originalFunction = findFunctionByName<void *, void *, size_t>("realloc");
void *result = originalFunction(ptr, sz);
if (isOriginalFunctionCallSuccessful(result) && !Trace::Guard{}.isActive())
diff --git a/runtime/contrib/heap_trace/src/symbol_searcher.cc b/runtime/contrib/heap_trace/src/symbol_searcher.cc
index cf83f2f7b..9aaae6a2a 100644
--- a/runtime/contrib/heap_trace/src/symbol_searcher.cc
+++ b/runtime/contrib/heap_trace/src/symbol_searcher.cc
@@ -36,13 +36,14 @@ static int checkIfLibraryContainsSymbol(InfoAboutLoadedLib library_description,
static bool isSymbolAddressNotInTheSameTranslationUnit(SymbolDescription *symbol);
void *findSymbol(const char *name)
{
+ signalizeThatNextAllocationsWillBeForSymbolSearcherInternalUsage();
SymbolDescription symbol(name);
tryToFindSymbolInLinkedLibraries(symbol);
if (!symbol.address)
{
tryToFindSymbolInAllLoadedLibraries(symbol);
}
-
+ signalizeThatSymbolSearcherEndedOfWork();
return symbol.address;
}
@@ -80,3 +81,22 @@ static bool isSymbolAddressNotInTheSameTranslationUnit(SymbolDescription *symbol
return addressInTheSameTranslationUnit == nullptr ||
addressInTheSameTranslationUnit != symbol->address;
}
+
+// TODO should be thread_local (or all symbols should be resolved at the start of application as
+// alternative)
+static volatile bool are_next_allocations_will_be_for_symbol_searcher_internal_usage = false;
+
+void signalizeThatNextAllocationsWillBeForSymbolSearcherInternalUsage()
+{
+ are_next_allocations_will_be_for_symbol_searcher_internal_usage = true;
+}
+
+void signalizeThatSymbolSearcherEndedOfWork()
+{
+ are_next_allocations_will_be_for_symbol_searcher_internal_usage = false;
+}
+
+bool isCurrentAllocationForSymbolSearcherInternalUsage()
+{
+ return are_next_allocations_will_be_for_symbol_searcher_internal_usage;
+}
diff --git a/runtime/contrib/heap_trace/src/symbol_searcher.h b/runtime/contrib/heap_trace/src/symbol_searcher.h
index f70a4e46f..2408232d6 100644
--- a/runtime/contrib/heap_trace/src/symbol_searcher.h
+++ b/runtime/contrib/heap_trace/src/symbol_searcher.h
@@ -17,6 +17,10 @@
#ifndef SYMBOL_SEARCHER_H
#define SYMBOL_SEARCHER_H
+void signalizeThatNextAllocationsWillBeForSymbolSearcherInternalUsage();
+void signalizeThatSymbolSearcherEndedOfWork();
+bool isCurrentAllocationForSymbolSearcherInternalUsage();
+
void *findSymbol(const char *name);
#endif // ! SYMBOL_SEARCHER_H
diff --git a/runtime/contrib/heap_trace/src/trace.cc b/runtime/contrib/heap_trace/src/trace.cc
index 82f2915cb..020aeb90e 100644
--- a/runtime/contrib/heap_trace/src/trace.cc
+++ b/runtime/contrib/heap_trace/src/trace.cc
@@ -48,7 +48,7 @@ void Trace::logAllocationEvent(void *memory_ptr, size_t size_of_allocated_space_
_peak_heap_usage_on_cpu = _total_allocated_bytes_on_cpu - _total_deallocated_bytes_on_cpu;
}
_memory_in_use_on_cpu[memory_ptr] = size_of_allocated_space_in_bytes;
- Guard{}.signalizeThatDangerOfRecursionHAsPassed();
+ Guard{}.signalizeThatDangerOfRecursionHasPassed();
}
void Trace::logDeallocationEvent(void *memory_ptr)
@@ -61,20 +61,29 @@ void Trace::logDeallocationEvent(void *memory_ptr)
_total_deallocated_bytes_on_cpu += found_memory_space_description->second;
_memory_in_use_on_cpu.erase(found_memory_space_description);
}
- Guard{}.signalizeThatDangerOfRecursionHAsPassed();
+ Guard{}.signalizeThatDangerOfRecursionHasPassed();
}
void Trace::logAllocationEvent(cl_mem memory_ptr, size_t size_of_allocated_space_in_bytes)
{
Guard{}.signalizeAboutPossibleRecursion();
std::lock_guard<std::mutex> guard(_lock);
- _total_allocated_bytes_on_gpu += size_of_allocated_space_in_bytes;
- if (_peak_heap_usage_on_gpu < _total_allocated_bytes_on_gpu - _total_deallocated_bytes_on_gpu)
+ auto found_memory_space_description = _memory_in_use_on_gpu.find(memory_ptr);
+ if (found_memory_space_description == _memory_in_use_on_gpu.end())
+ {
+ _memory_in_use_on_gpu.insert(
+ std::make_pair(memory_ptr, MemoryTraits(1, size_of_allocated_space_in_bytes)));
+ _total_allocated_bytes_on_gpu += size_of_allocated_space_in_bytes;
+ if (_peak_heap_usage_on_gpu < _total_allocated_bytes_on_gpu - _total_deallocated_bytes_on_gpu)
+ {
+ _peak_heap_usage_on_gpu = _total_allocated_bytes_on_gpu - _total_deallocated_bytes_on_gpu;
+ }
+ }
+ else
{
- _peak_heap_usage_on_gpu = _total_allocated_bytes_on_gpu - _total_deallocated_bytes_on_gpu;
+ ++found_memory_space_description->second.ref_counter;
}
- _memory_in_use_on_gpu[memory_ptr] = size_of_allocated_space_in_bytes;
- Guard{}.signalizeThatDangerOfRecursionHAsPassed();
+ Guard{}.signalizeThatDangerOfRecursionHasPassed();
}
void Trace::logDeallocationEvent(cl_mem memory_ptr)
@@ -84,10 +93,13 @@ void Trace::logDeallocationEvent(cl_mem memory_ptr)
auto found_memory_space_description = _memory_in_use_on_gpu.find(memory_ptr);
if (found_memory_space_description != _memory_in_use_on_gpu.end())
{
- _total_deallocated_bytes_on_gpu += found_memory_space_description->second;
- _memory_in_use_on_gpu.erase(found_memory_space_description);
+ if (--found_memory_space_description->second.ref_counter == 0)
+ {
+ _total_deallocated_bytes_on_gpu += found_memory_space_description->second.size;
+ _memory_in_use_on_gpu.erase(found_memory_space_description);
+ }
}
- Guard{}.signalizeThatDangerOfRecursionHAsPassed();
+ Guard{}.signalizeThatDangerOfRecursionHasPassed();
}
Trace::~Trace()
diff --git a/runtime/contrib/heap_trace/src/trace.h b/runtime/contrib/heap_trace/src/trace.h
index f03a65a58..647c51d54 100644
--- a/runtime/contrib/heap_trace/src/trace.h
+++ b/runtime/contrib/heap_trace/src/trace.h
@@ -25,6 +25,17 @@
class Trace
{
+ struct MemoryTraits
+ {
+ size_t ref_counter;
+ size_t size;
+
+ MemoryTraits(size_t init_counter_value, size_t size_of_allocated_memory)
+ : ref_counter(init_counter_value), size(size_of_allocated_memory)
+ {
+ }
+ };
+
public:
class Guard
{
@@ -37,7 +48,7 @@ public:
void markTraceAsReady() { _is_trace_not_available = false; }
void markTraceAsNotReady() { _is_trace_not_available = true; }
void signalizeAboutPossibleRecursion() { _is_recursion_detected = true; }
- void signalizeThatDangerOfRecursionHAsPassed() { _is_recursion_detected = false; }
+ void signalizeThatDangerOfRecursionHasPassed() { _is_recursion_detected = false; }
private:
static bool _is_trace_not_available;
@@ -69,7 +80,7 @@ private:
size_t _total_deallocated_bytes_on_gpu = 0;
size_t _peak_heap_usage_on_gpu = 0;
std::unordered_map<void *, size_t> _memory_in_use_on_cpu;
- std::unordered_map<cl_mem, size_t> _memory_in_use_on_gpu;
+ std::unordered_map<cl_mem, MemoryTraits> _memory_in_use_on_gpu;
};
#endif // !TRACE_H
diff --git a/runtime/contrib/heap_trace/src/valloc_stub.cc b/runtime/contrib/heap_trace/src/valloc_stub.cc
index 24e91bd11..344591630 100644
--- a/runtime/contrib/heap_trace/src/valloc_stub.cc
+++ b/runtime/contrib/heap_trace/src/valloc_stub.cc
@@ -16,6 +16,7 @@
#include "trace.h"
#include "function_resolver.h"
+#include "memory_pool_for_symbol_searcher_internals.h"
#include <memory>
@@ -27,6 +28,11 @@ void *valloc(size_t sz) noexcept
{
static auto isOriginalFunctionCallSuccessful = [](void *result) -> bool { return result; };
+ if (isCurrentAllocationForSymbolSearcherInternalUsage())
+ {
+ return MemoryPoolForSymbolSearcherInternals{}.allocate(sz);
+ }
+
static auto originalFunction = findFunctionByName<void *, size_t>("valloc");
void *result = originalFunction(sz);
if (isOriginalFunctionCallSuccessful(result) && !Trace::Guard{}.isActive())
diff --git a/runtime/contrib/heap_trace/tests/CMakeLists.txt b/runtime/contrib/heap_trace/tests/CMakeLists.txt
index 8fbe5dec1..14e083e60 100644
--- a/runtime/contrib/heap_trace/tests/CMakeLists.txt
+++ b/runtime/contrib/heap_trace/tests/CMakeLists.txt
@@ -14,6 +14,7 @@ add_library(test_sample4 SHARED src/test_sample4/test_sample4.cc)
add_executable(${HEAP_TRACE_TESTS} src/cl_release_mem_object_interception_test.cc
src/symbol_searcher_test.cc
src/cl_create_buffer_interception_test.cc
+ src/cl_retain_mem_object_interception_test.cc
src/free_interception_test.cc
src/main.cc
src/common_test_environment.cc
@@ -21,7 +22,11 @@ add_executable(${HEAP_TRACE_TESTS} src/cl_release_mem_object_interception_test.c
src/malloc_interception_test.cc
src/realloc_interception_test.cc
src/valloc_interception_test.cc
+ src/calloc_interception_test.cc
+ src/posix_memalign_interception_test.cc
+ src/aligned_alloc_interception_test.cc
src/file_content_manipulations.cc
+ src/memory_pool_for_symbol_searcher_internals_test.cc
)
target_include_directories(${HEAP_TRACE_TESTS} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../src)
target_link_libraries(${HEAP_TRACE_TESTS} test_sample1)
diff --git a/runtime/contrib/heap_trace/tests/src/aligned_alloc_interception_test.cc b/runtime/contrib/heap_trace/tests/src/aligned_alloc_interception_test.cc
new file mode 100644
index 000000000..29e9a855d
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/aligned_alloc_interception_test.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_test_environment.h"
+#include "file_content_manipulations.h"
+
+#include "trace.h"
+#include "symbol_searcher.h"
+#include "memory_pool_for_symbol_searcher_internals.h"
+
+#include <limits>
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+namespace backstage
+{
+
+struct AlignedAllocStub : public TestEnv
+{
+ AlignedAllocStub() : TestEnv("aligned_alloc_interception_test.log") {}
+};
+
+TEST_F(AlignedAllocStub, must_allocate_space_as_standard_aligned_alloc)
+{
+ void *p = aligned_alloc(4, 128);
+
+ ASSERT_TRUE(p);
+ free(p);
+}
+
+TEST_F(AlignedAllocStub, must_log_allocation_events_if_trace_is_ready_for_it)
+{
+ GlobalTrace.reset();
+ void *p1 = aligned_alloc(4, 1024);
+
+ GlobalTrace.reset(new Trace);
+ void *p2 = aligned_alloc(4, 128);
+ void *p3 = aligned_alloc(4, 64);
+ GlobalTrace.reset();
+
+ ASSERT_TRUE(p1 && p2 && p3);
+ ASSERT_STREQ(getContentOfFile("aligned_alloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 192 B, Total allocated: 192 B, Total deallocated: 0 "
+ "B\nOn GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+ free(p1);
+ free(p2);
+ free(p3);
+}
+
+TEST_F(AlignedAllocStub, must_not_do_the_record_about_allocation_event_if_original_function_failed)
+{
+ void *p = aligned_alloc(1, std::numeric_limits<size_t>::max());
+ GlobalTrace.reset();
+
+ ASSERT_FALSE(p);
+ ASSERT_STREQ(getContentOfFile("aligned_alloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
+TEST_F(AlignedAllocStub,
+ should_allocate_memory_from_pool_for_symbol_searcher_internal_usage_if_need)
+{
+ signalizeThatNextAllocationsWillBeForSymbolSearcherInternalUsage();
+ void *p = aligned_alloc(4, 1024);
+ signalizeThatSymbolSearcherEndedOfWork();
+ GlobalTrace.reset();
+
+ MemoryPoolForSymbolSearcherInternals pool;
+ ASSERT_TRUE(p);
+ ASSERT_TRUE(pool.containsMemorySpaceStartedFromPointer(p));
+ ASSERT_STREQ(getContentOfFile("aligned_alloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
+} // namespace backstage
diff --git a/runtime/contrib/heap_trace/tests/src/calloc_interception_test.cc b/runtime/contrib/heap_trace/tests/src/calloc_interception_test.cc
new file mode 100644
index 000000000..56765e977
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/calloc_interception_test.cc
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_test_environment.h"
+#include "file_content_manipulations.h"
+
+#include "trace.h"
+#include "symbol_searcher.h"
+#include "memory_pool_for_symbol_searcher_internals.h"
+
+#include <limits>
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+namespace backstage
+{
+
+struct CallocStub : public TestEnv
+{
+ CallocStub() : TestEnv("calloc_interception_test.log") {}
+};
+
+TEST_F(CallocStub, must_allocate_space_as_standard_calloc)
+{
+ std::array<char, 128 * 4> reference_data;
+ reference_data.fill(0);
+ void *p = calloc(128, 4);
+ ASSERT_TRUE(p);
+ ASSERT_TRUE(memcmp(p, reference_data.data(), reference_data.size()) == 0);
+ free(p);
+}
+
+TEST_F(CallocStub, must_log_allocation_events_if_trace_is_ready_for_it)
+{
+ GlobalTrace.reset();
+ void *p1 = calloc(1024, 4);
+
+ GlobalTrace.reset(new Trace);
+ void *p2 = calloc(128, 4);
+ void *p3 = calloc(64, 4);
+ GlobalTrace.reset();
+
+ ASSERT_TRUE(p1 && p2 && p3);
+ ASSERT_STREQ(getContentOfFile("calloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 768 B, Total allocated: 768 B, Total deallocated: 0 "
+ "B\nOn GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+ free(p1);
+ free(p2);
+ free(p3);
+}
+
+TEST_F(CallocStub, must_not_do_the_record_about_allocation_event_if_original_function_failed)
+{
+ void *p = calloc(1, std::numeric_limits<size_t>::max());
+ GlobalTrace.reset();
+
+ ASSERT_FALSE(p);
+ ASSERT_STREQ(getContentOfFile("calloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
+TEST_F(CallocStub, should_allocate_memory_from_pool_for_symbol_searcher_internal_usage_if_need)
+{
+ signalizeThatNextAllocationsWillBeForSymbolSearcherInternalUsage();
+ void *p = calloc(10, 102);
+ signalizeThatSymbolSearcherEndedOfWork();
+ GlobalTrace.reset();
+
+ MemoryPoolForSymbolSearcherInternals pool;
+ ASSERT_TRUE(p);
+ ASSERT_TRUE(pool.containsMemorySpaceStartedFromPointer(p));
+ ASSERT_STREQ(getContentOfFile("calloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
+} // namespace backstage
diff --git a/runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc b/runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc
index ddd1a0d5c..49b8fd994 100644
--- a/runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc
+++ b/runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc
@@ -76,6 +76,29 @@ TEST_F(ClReleaseMemObjectStub, must_log_deallocation_events_if_trace_is_ready_fo
"GPU - Peak mem usage: 192 B, Total allocated: 192 B, Total deallocated: 192 B\n");
}
+TEST_F(ClReleaseMemObjectStub, must_log_deallocation_event_only_if_reference_counter_equals_to_zero)
+{
+ cl_mem mem = clCreateBuffer(context, CL_MEM_READ_WRITE, 1024, NULL, NULL);
+ clRetainMemObject(mem);
+ clReleaseMemObject(mem);
+ GlobalTrace.reset();
+ ASSERT_STREQ(getContentOfFile("./cl_release_mem_object_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 1024 B, Total allocated: 1024 B, Total deallocated: 0 B\n");
+ clReleaseMemObject(mem);
+
+ GlobalTrace.reset(new Trace);
+ mem = clCreateBuffer(context, CL_MEM_READ_WRITE, 1024, NULL, NULL);
+ clRetainMemObject(mem);
+ clReleaseMemObject(mem);
+ clReleaseMemObject(mem);
+ GlobalTrace.reset();
+ ASSERT_STREQ(
+ getContentOfFile("./cl_release_mem_object_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 1024 B, Total allocated: 1024 B, Total deallocated: 1024 B\n");
+}
+
TEST_F(ClReleaseMemObjectStub, must_not_log_deallocation_event_if_original_function_failed)
{
cl_mem mem;
diff --git a/runtime/contrib/heap_trace/tests/src/cl_retain_mem_object_interception_test.cc b/runtime/contrib/heap_trace/tests/src/cl_retain_mem_object_interception_test.cc
new file mode 100644
index 000000000..f0b1902dc
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/cl_retain_mem_object_interception_test.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_test_environment.h"
+#include "file_content_manipulations.h"
+
+#include "trace.h"
+
+#include <CL/cl.h>
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+namespace backstage
+{
+
+struct ClRetainMemObjectStub : public TestEnv
+{
+ cl_context context;
+
+ ClRetainMemObjectStub() : TestEnv("cl_retain_mem_object_interception_test.log") {}
+
+ void SetUp() final
+ {
+ cl_device_id device_id;
+ int err = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_GPU, 1, &device_id, nullptr);
+ context = clCreateContext(0, 1, &device_id, nullptr, nullptr, &err);
+ TestEnv::SetUp();
+ }
+
+ void TearDown() final
+ {
+ TestEnv::TearDown();
+ clReleaseContext(context);
+ }
+};
+
+TEST_F(ClRetainMemObjectStub, must_work_as_standard_version)
+{
+ cl_mem mem = clCreateBuffer(context, CL_MEM_READ_WRITE, 1024 * 1024, nullptr, nullptr);
+ cl_int retain_mem_result = clRetainMemObject(mem);
+ cl_int release_mem_result1 = clReleaseMemObject(mem);
+ cl_int release_mem_result2 = clReleaseMemObject(mem);
+
+ cl_mem bad_mem_object = nullptr;
+ cl_int retain_mem_result_with_bad_mem_object = clRetainMemObject(bad_mem_object);
+
+ ASSERT_TRUE(mem);
+ ASSERT_TRUE(retain_mem_result == CL_SUCCESS);
+ ASSERT_TRUE(release_mem_result1 == CL_SUCCESS);
+ ASSERT_TRUE(release_mem_result2 == CL_SUCCESS);
+ ASSERT_TRUE(retain_mem_result_with_bad_mem_object == CL_INVALID_MEM_OBJECT);
+}
+
+TEST_F(ClRetainMemObjectStub, must_do_not_log_new_allocation_event_just_increase_reference_count)
+{
+ GlobalTrace.reset();
+ cl_mem mem = clCreateBuffer(context, CL_MEM_READ_WRITE, 1024, nullptr, nullptr);
+
+ GlobalTrace.reset(new Trace);
+ clRetainMemObject(mem);
+ GlobalTrace.reset();
+
+ cl_int release_mem_result1 = clReleaseMemObject(mem);
+ cl_int release_mem_result2 = clReleaseMemObject(mem);
+ ASSERT_TRUE(release_mem_result1 == CL_SUCCESS);
+ ASSERT_TRUE(release_mem_result2 == CL_SUCCESS);
+ ASSERT_STREQ(getContentOfFile("cl_retain_mem_object_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
+} // namespace backstage
diff --git a/runtime/contrib/heap_trace/tests/src/free_interception_test.cc b/runtime/contrib/heap_trace/tests/src/free_interception_test.cc
index e690718d0..07aa88680 100644
--- a/runtime/contrib/heap_trace/tests/src/free_interception_test.cc
+++ b/runtime/contrib/heap_trace/tests/src/free_interception_test.cc
@@ -18,6 +18,7 @@
#include "file_content_manipulations.h"
#include "trace.h"
+#include "memory_pool_for_symbol_searcher_internals.h"
extern std::unique_ptr<Trace> GlobalTrace;
@@ -57,4 +58,20 @@ TEST_F(FreeStub, must_log_deallocation_events_if_trace_is_ready_for_it)
"B\nOn GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
}
+TEST_F(FreeStub, can_deallocate_memory_using_pool_for_symbol_searcher_internals)
+{
+
+ MemoryPoolForSymbolSearcherInternals pool;
+ void *volatile p1 = pool.allocate(1024);
+ free(p1);
+ void *volatile p2 = pool.allocate(1024);
+ free(p2);
+ GlobalTrace.reset();
+
+ ASSERT_TRUE(p1 == p2);
+ ASSERT_STREQ(getContentOfFile("./free_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 "
+ "B\nOn GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
} // namespace backstage
diff --git a/runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc b/runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc
index 7a2f5a915..ea3eb8256 100644
--- a/runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc
+++ b/runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc
@@ -18,8 +18,9 @@
#include "file_content_manipulations.h"
#include "trace.h"
+#include "symbol_searcher.h"
+#include "memory_pool_for_symbol_searcher_internals.h"
-#include <experimental/filesystem>
#include <limits>
extern std::unique_ptr<Trace> GlobalTrace;
@@ -70,6 +71,21 @@ TEST_F(MallocStub, must_not_do_the_record_about_allocation_event_if_original_fun
"GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
}
+TEST_F(MallocStub, should_allocate_memory_from_pool_for_symbol_searcher_internal_usage_if_need)
+{
+ signalizeThatNextAllocationsWillBeForSymbolSearcherInternalUsage();
+ void *p = malloc(1024);
+ signalizeThatSymbolSearcherEndedOfWork();
+ GlobalTrace.reset();
+
+ MemoryPoolForSymbolSearcherInternals pool;
+ ASSERT_TRUE(p);
+ ASSERT_TRUE(pool.containsMemorySpaceStartedFromPointer(p));
+ ASSERT_STREQ(getContentOfFile("./malloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
TEST_F(
MallocStub,
should_not_influence_on_trace_results_even_if_orignal_function_return_any_not_null_ptr_when_incoming_size_is_zero)
diff --git a/runtime/contrib/heap_trace/tests/src/memory_pool_for_symbol_searcher_internals_test.cc b/runtime/contrib/heap_trace/tests/src/memory_pool_for_symbol_searcher_internals_test.cc
new file mode 100644
index 000000000..c0e2e1d3c
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/memory_pool_for_symbol_searcher_internals_test.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+
+#include "memory_pool_for_symbol_searcher_internals.h"
+
+namespace backstage
+{
+
+struct MemoryPoolForSymbolSearcherInternals : public ::testing::Test
+{
+};
+
+TEST_F(MemoryPoolForSymbolSearcherInternals, can_help_users_allocate_deallocate_memory)
+{
+ ::MemoryPoolForSymbolSearcherInternals memory;
+
+ void *p1 = memory.allocate(1024);
+
+ ASSERT_TRUE(p1);
+ memory.deallocate(p1);
+}
+
+TEST_F(MemoryPoolForSymbolSearcherInternals,
+ should_reuse_memory_if_it_deallocated_just_after_allocations)
+{
+ ::MemoryPoolForSymbolSearcherInternals memory;
+
+ void *p1 = memory.allocate(1024);
+ memory.deallocate(p1);
+ void *p2 = memory.allocate(128);
+ memory.deallocate(p2);
+ void *p3 = memory.allocate(3467);
+ memory.deallocate(p3);
+
+ ASSERT_TRUE(p1 && p2 && p3);
+ ASSERT_TRUE(p1 == p2);
+ ASSERT_TRUE(p2 == p3);
+}
+
+TEST_F(MemoryPoolForSymbolSearcherInternals,
+ can_define_either_contains_memory_starting_from_incoming_pointer_or_not)
+{
+ ::MemoryPoolForSymbolSearcherInternals memory;
+
+ void *p1 = memory.allocate(1024);
+ void *p2 = malloc(1024);
+
+ ASSERT_TRUE(memory.containsMemorySpaceStartedFromPointer(p1));
+ ASSERT_FALSE(memory.containsMemorySpaceStartedFromPointer(p2));
+
+ memory.deallocate(p1);
+ free(p2);
+}
+} // namespace backstage
diff --git a/runtime/contrib/heap_trace/tests/src/posix_memalign_interception_test.cc b/runtime/contrib/heap_trace/tests/src/posix_memalign_interception_test.cc
new file mode 100644
index 000000000..52c06e3de
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/posix_memalign_interception_test.cc
@@ -0,0 +1,101 @@
+
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_test_environment.h"
+#include "file_content_manipulations.h"
+
+#include "trace.h"
+#include "symbol_searcher.h"
+#include "memory_pool_for_symbol_searcher_internals.h"
+
+#include <limits>
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+namespace backstage
+{
+
+struct PosixMemalignStub : public TestEnv
+{
+ PosixMemalignStub() : TestEnv("posix_memalign_interception_test.log") {}
+};
+
+TEST_F(PosixMemalignStub, must_allocate_space_as_standard_posix_memalign)
+{
+ void *p = nullptr;
+ int res = posix_memalign(&p, 4, 12);
+
+ ASSERT_TRUE(res == 0);
+ ASSERT_TRUE(p);
+ free(p);
+}
+
+TEST_F(PosixMemalignStub, must_log_allocation_events_if_trace_is_ready_for_it)
+{
+ GlobalTrace.reset();
+ void *p1 = nullptr;
+ int res1 = posix_memalign(&p1, 4, 1024);
+
+ GlobalTrace.reset(new Trace);
+ void *p2 = nullptr;
+ int res2 = posix_memalign(&p2, 4, 128);
+ void *p3 = nullptr;
+ int res3 = posix_memalign(&p3, 4, 64);
+ GlobalTrace.reset();
+
+ ASSERT_TRUE(res1 == 0 && res2 == 0 && res3 == 0);
+ ASSERT_TRUE(p1 && p2 && p3);
+ ASSERT_STREQ(getContentOfFile("posix_memalign_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 192 B, Total allocated: 192 B, Total deallocated: 0 "
+ "B\nOn GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+ free(p1);
+ free(p2);
+ free(p3);
+}
+
+TEST_F(PosixMemalignStub, must_not_do_the_record_about_allocation_event_if_original_function_failed)
+{
+ void *p = nullptr;
+ int res = posix_memalign(&p, 1, std::numeric_limits<size_t>::max());
+ GlobalTrace.reset();
+
+ ASSERT_FALSE(res == 0);
+ ASSERT_FALSE(p);
+ ASSERT_STREQ(getContentOfFile("posix_memalign_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
+TEST_F(PosixMemalignStub,
+ should_allocate_memory_from_pool_for_symbol_searcher_internal_usage_if_need)
+{
+ signalizeThatNextAllocationsWillBeForSymbolSearcherInternalUsage();
+ void *p = nullptr;
+ int res = posix_memalign(&p, 1, 1024);
+ signalizeThatSymbolSearcherEndedOfWork();
+ GlobalTrace.reset();
+
+ MemoryPoolForSymbolSearcherInternals pool;
+ ASSERT_TRUE(res == 0);
+ ASSERT_TRUE(p);
+ ASSERT_TRUE(pool.containsMemorySpaceStartedFromPointer(p));
+ ASSERT_STREQ(getContentOfFile("posix_memalign_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
+} // namespace backstage
diff --git a/runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc b/runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc
index 160a19beb..59660fad4 100644
--- a/runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc
+++ b/runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc
@@ -18,10 +18,10 @@
#include "file_content_manipulations.h"
#include "trace.h"
+#include "symbol_searcher.h"
+#include "memory_pool_for_symbol_searcher_internals.h"
-#include <experimental/filesystem>
#include <limits>
-#include <cstring>
extern std::unique_ptr<Trace> GlobalTrace;
@@ -107,4 +107,20 @@ TEST_F(
"GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
}
+TEST_F(ReallocStub, should_allocate_memory_from_pool_for_symbol_searcher_internal_usage_if_need)
+{
+ signalizeThatNextAllocationsWillBeForSymbolSearcherInternalUsage();
+ void *p = malloc(128);
+ p = realloc(p, 1024);
+ signalizeThatSymbolSearcherEndedOfWork();
+ GlobalTrace.reset();
+
+ MemoryPoolForSymbolSearcherInternals pool;
+ ASSERT_TRUE(p);
+ ASSERT_TRUE(pool.containsMemorySpaceStartedFromPointer(p));
+ ASSERT_STREQ(getContentOfFile("./realloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
} // namespace backstage
diff --git a/runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc b/runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc
index d615cc928..59fdeedc9 100644
--- a/runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc
+++ b/runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc
@@ -14,23 +14,25 @@
* limitations under the License.
*/
-#include "gtest/gtest.h"
+#include "common_test_environment.h"
+#include "file_content_manipulations.h"
#include "test_sample1.h"
#include "test_sample2.h"
#include "test_sample4.h"
#include "symbol_searcher.h"
+#include "trace.h"
#include <dlfcn.h>
#include <linux/limits.h>
#include <unistd.h>
-#include <cstdlib>
-
#include <experimental/filesystem>
namespace fs = std::experimental::filesystem;
+extern std::unique_ptr<Trace> GlobalTrace;
+
fs::path exePath()
{
char result[PATH_MAX] = {0};
@@ -41,8 +43,9 @@ fs::path exePath()
namespace backstage
{
-struct SymbolSearcher : public ::testing::Test
+struct SymbolSearcher : TestEnv
{
+ SymbolSearcher() : TestEnv("./symbol_searcher_test.log") {}
};
TEST_F(SymbolSearcher, should_find_symbol_in_linked_library)
@@ -76,4 +79,21 @@ TEST_F(SymbolSearcher,
dlclose(test_sample2_handle);
}
+TEST_F(SymbolSearcher, should_give_an_opportunity_do_not_log_its_internal_allocations)
+{
+ GlobalTrace.reset();
+ fs::path pathToTestLib = exePath() / "libtest_sample2.so";
+ void *handle = dlopen(pathToTestLib.c_str(), RTLD_NOW);
+
+ GlobalTrace.reset(new Trace);
+ void *symbolAddress = findSymbol("funcDefinedOnlyInTestSample2");
+ GlobalTrace.reset();
+
+ ASSERT_STREQ(getContentOfFile("./symbol_searcher_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+
+ dlclose(handle);
+}
+
} // namespace backstage
diff --git a/runtime/contrib/heap_trace/tests/src/valloc_interception_test.cc b/runtime/contrib/heap_trace/tests/src/valloc_interception_test.cc
index 6b3d6df82..7a409ab25 100644
--- a/runtime/contrib/heap_trace/tests/src/valloc_interception_test.cc
+++ b/runtime/contrib/heap_trace/tests/src/valloc_interception_test.cc
@@ -18,8 +18,9 @@
#include "file_content_manipulations.h"
#include "trace.h"
+#include "symbol_searcher.h"
+#include "memory_pool_for_symbol_searcher_internals.h"
-#include <experimental/filesystem>
#include <limits>
extern std::unique_ptr<Trace> GlobalTrace;
@@ -70,4 +71,19 @@ TEST_F(VallocStub, must_not_do_the_record_about_allocation_event_if_original_fun
"GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
}
+TEST_F(VallocStub, should_allocate_memory_from_pool_for_symbol_searcher_internal_usage_if_need)
+{
+ signalizeThatNextAllocationsWillBeForSymbolSearcherInternalUsage();
+ void *p = valloc(1024);
+ signalizeThatSymbolSearcherEndedOfWork();
+ GlobalTrace.reset();
+
+ MemoryPoolForSymbolSearcherInternals pool;
+ ASSERT_TRUE(p);
+ ASSERT_TRUE(pool.containsMemorySpaceStartedFromPointer(p));
+ ASSERT_STREQ(getContentOfFile("./valloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
} // namespace backstage
diff --git a/runtime/contrib/hi_perf_cpu/CMakeLists.txt b/runtime/contrib/hi_perf_cpu/CMakeLists.txt
new file mode 100644
index 000000000..141698a8a
--- /dev/null
+++ b/runtime/contrib/hi_perf_cpu/CMakeLists.txt
@@ -0,0 +1,47 @@
+set(LIB_ONERT_BACKEND_HI_PERF_CPU onert_backend_hi_perf)
+
+nnas_find_package(NNPACK QUIET)
+
+option(BUILD_ONERT_HI_PERF_CPU_BACKEND
+ "Build onert HI_PERF_CPU backend"
+ ${NNPACK_FOUND} # Default value when there is no explicit user request
+)
+
+message(STATUS "Build onert HI_PERF_CPU backend: ${BUILD_ONERT_HI_PERF_CPU_BACKEND}")
+
+if(NOT BUILD_ONERT_HI_PERF_CPU_BACKEND)
+ return()
+endif(NOT BUILD_ONERT_HI_PERF_CPU_BACKEND)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+file(GLOB_RECURSE TESTS "*.test.cc")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(${LIB_ONERT_BACKEND_HI_PERF_CPU} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_HI_PERF_CPU} PRIVATE nnfw_lib_misc)
+target_link_libraries(${LIB_ONERT_BACKEND_HI_PERF_CPU} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_HI_PERF_CPU} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_HI_PERF_CPU} PRIVATE nnpack pthreadpool cpuinfo)
+target_link_libraries(${LIB_ONERT_BACKEND_HI_PERF_CPU} PRIVATE nnfw_coverage)
+target_include_directories(${LIB_ONERT_BACKEND_HI_PERF_CPU} PRIVATE ${NNPACK_INCLUDE_DIRS})
+
+set_target_properties(${LIB_ONERT_BACKEND_HI_PERF_CPU} PROPERTIES OUTPUT_NAME backend_NNPACK)
+
+install(TARGETS ${LIB_ONERT_BACKEND_HI_PERF_CPU} DESTINATION lib)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Unit Tests
+set(TEST_ONERT_BACKEND_HI_PERF_CPU test_onert_backend_hi_perf)
+
+add_executable(${TEST_ONERT_BACKEND_HI_PERF_CPU} ${TESTS})
+
+target_link_libraries(${TEST_ONERT_BACKEND_HI_PERF_CPU} ${LIB_ONERT_BACKEND_HI_PERF_CPU})
+target_link_libraries(${TEST_ONERT_BACKEND_HI_PERF_CPU} gtest gtest_main ${LIB_PTHREAD})
+target_link_libraries(${TEST_ONERT_BACKEND_HI_PERF_CPU} nnpack)
+
+add_test(${TEST_ONERT_BACKEND_HI_PERF_CPU} ${TEST_ONERT_BACKEND_HI_PERF_CPU})
+install(TARGETS ${TEST_ONERT_BACKEND_HI_PERF_CPU} DESTINATION unittest)
diff --git a/runtime/neurun/backend/hi_perf_cpu/HighPerformanceBackend.test.cc b/runtime/contrib/hi_perf_cpu/HighPerformanceBackend.test.cc
index 625fe1c36..625fe1c36 100644
--- a/runtime/neurun/backend/hi_perf_cpu/HighPerformanceBackend.test.cc
+++ b/runtime/contrib/hi_perf_cpu/HighPerformanceBackend.test.cc
diff --git a/runtime/neurun/backend/hi_perf_cpu/KernelGenerator.cc b/runtime/contrib/hi_perf_cpu/KernelGenerator.cc
index a13fe12b9..a13fe12b9 100644
--- a/runtime/neurun/backend/hi_perf_cpu/KernelGenerator.cc
+++ b/runtime/contrib/hi_perf_cpu/KernelGenerator.cc
diff --git a/runtime/contrib/hi_perf_cpu/KernelGenerator.h b/runtime/contrib/hi_perf_cpu/KernelGenerator.h
new file mode 100644
index 000000000..67278521d
--- /dev/null
+++ b/runtime/contrib/hi_perf_cpu/KernelGenerator.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_HI_PERF_CPU_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_HI_PERF_CPU_KERNEL_GENERATOR_H__
+
+#include <backend/IKernelGenerator.h>
+
+#include "ir/Operands.h"
+#include "TensorBuilder.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace hi_perf_cpu
+{
+
+class KernelGenerator : public IKernelGenerator
+{
+public:
+ KernelGenerator(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder);
+ // TODO add more ops
+
+private:
+ const ir::Operands &_ctx;
+ std::shared_ptr<TensorBuilder> _tensor_builder;
+};
+
+} // namespace hi_perf_cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_HI_PERF_CPU_KERNEL_GENERATOR_H__
diff --git a/runtime/neurun/backend/hi_perf_cpu/TensorBuilder.cc b/runtime/contrib/hi_perf_cpu/TensorBuilder.cc
index e6ebf5f0b..e6ebf5f0b 100644
--- a/runtime/neurun/backend/hi_perf_cpu/TensorBuilder.cc
+++ b/runtime/contrib/hi_perf_cpu/TensorBuilder.cc
diff --git a/runtime/contrib/hi_perf_cpu/TensorBuilder.h b/runtime/contrib/hi_perf_cpu/TensorBuilder.h
new file mode 100644
index 000000000..17ebb00cf
--- /dev/null
+++ b/runtime/contrib/hi_perf_cpu/TensorBuilder.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NNFW_TENSORBUILDER_H
+#define NNFW_TENSORBUILDER_H
+
+#include <unordered_map>
+
+#include <backend/ITensorBuilder.h>
+#include "ir/OperandIndexMap.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace hi_perf_cpu
+{
+
+class TensorBuilder : public ITensorBuilder
+{
+public:
+ TensorBuilder();
+
+private:
+};
+
+} // namespace hi_perf_cpu
+} // namespace backend
+} // namespace onert
+
+#endif // NNFW_TENSORBUILDER_H
diff --git a/runtime/contrib/labs/jniacl/src/jniacl_main.cc b/runtime/contrib/labs/jniacl/src/jniacl_main.cc
index 4e5f10d1f..01b928981 100644
--- a/runtime/contrib/labs/jniacl/src/jniacl_main.cc
+++ b/runtime/contrib/labs/jniacl/src/jniacl_main.cc
@@ -1,3 +1,19 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the License);
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
#include <jni.h>
#include <string>
@@ -10,11 +26,11 @@ extern "C" JNIEXPORT jstring JNICALL
Java_com_samsung_testaclexec_ActivityMain_RunACLJNI(JNIEnv *env, jobject)
{
using arm_compute::DataType;
- using arm_compute::graph::Tensor;
- using arm_compute::graph::TargetHint;
- using arm_compute::graph::Graph;
using arm_compute::TensorInfo;
using arm_compute::TensorShape;
+ using arm_compute::graph::Graph;
+ using arm_compute::graph::TargetHint;
+ using arm_compute::graph::Tensor;
arm_compute::graph::Graph graph;
TargetHint target_hint = TargetHint::OPENCL;
@@ -27,7 +43,6 @@ Java_com_samsung_testaclexec_ActivityMain_RunACLJNI(JNIEnv *env, jobject)
std::unique_ptr<BiasAccessor>(new BiasAccessor()),
arm_compute::PadStrideInfo(1, 1, 0, 0))
<< Tensor(std::unique_ptr<OutputAccessor>(new OutputAccessor()));
- ;
graph.run();
diff --git a/runtime/contrib/labs/tflite_examples/src/conv.cpp b/runtime/contrib/labs/tflite_examples/src/conv.cpp
index 3117c316c..e8542c3f5 100644
--- a/runtime/contrib/labs/tflite_examples/src/conv.cpp
+++ b/runtime/contrib/labs/tflite_examples/src/conv.cpp
@@ -33,7 +33,7 @@ template <typename T> struct View
virtual int32_t size(void) const = 0;
virtual T at(uint32_t off) const = 0;
};
-}
+} // namespace vector
namespace feature
{
@@ -52,7 +52,7 @@ template <typename T> struct View
virtual const Shape &shape(void) const = 0;
virtual T at(uint32_t ch, uint32_t row, uint32_t col) const = 0;
};
-}
+} // namespace feature
namespace kernel
{
@@ -72,7 +72,7 @@ template <typename T> struct View
virtual const Shape &shape(void) const = 0;
virtual T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const = 0;
};
-}
+} // namespace kernel
const int32_t N = 1;
const int32_t C = 2;
diff --git a/runtime/contrib/logging/src/nnapi_logging.cc b/runtime/contrib/logging/src/nnapi_logging.cc
index 14f2369ec..370e72a0a 100644
--- a/runtime/contrib/logging/src/nnapi_logging.cc
+++ b/runtime/contrib/logging/src/nnapi_logging.cc
@@ -1,3 +1,19 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the License);
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
#include <NeuralNetworks.h>
#include <NeuralNetworksEx.h>
@@ -109,7 +125,7 @@ std::string OperandCodeResolver::resolve(int code) const
return it->second;
}
-}
+} // namespace
//
// Asynchronous Event
diff --git a/runtime/contrib/mlapse/tfl/CMakeLists.txt b/runtime/contrib/mlapse/tfl/CMakeLists.txt
index 36f32d7ef..06e8df311 100644
--- a/runtime/contrib/mlapse/tfl/CMakeLists.txt
+++ b/runtime/contrib/mlapse/tfl/CMakeLists.txt
@@ -6,7 +6,6 @@ add_executable(mlapse-tfl ${SOURCES})
target_include_directories(mlapse-tfl PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(mlapse-tfl nnfw_lib_tflite)
target_link_libraries(mlapse-tfl nnfw_lib_misc)
-target_link_libraries(mlapse-tfl nnfw_lib_cpp14)
target_link_libraries(mlapse-tfl tensorflow-lite)
install(TARGETS mlapse-tfl DESTINATION bin)
diff --git a/runtime/contrib/mlapse/tfl/driver.cc b/runtime/contrib/mlapse/tfl/driver.cc
index 867a6051a..1ba23178c 100644
--- a/runtime/contrib/mlapse/tfl/driver.cc
+++ b/runtime/contrib/mlapse/tfl/driver.cc
@@ -24,8 +24,7 @@
#include <tflite/InterpreterSession.h>
#include <tflite/NNAPISession.h>
-// From 'nnfw_lib_cpp14'
-#include <cpp14/memory.h>
+#include <memory>
// From C++ Standard Library
#include <cassert>
@@ -119,9 +118,9 @@ public:
switch (_type)
{
case SessionType::Interp:
- return nnfw::cpp14::make_unique<nnfw::tflite::InterpreterSession>(interp);
+ return std::make_unique<nnfw::tflite::InterpreterSession>(interp);
case SessionType::NNAPI:
- return nnfw::cpp14::make_unique<nnfw::tflite::NNAPISession>(interp);
+ return std::make_unique<nnfw::tflite::NNAPISession>(interp);
default:
break;
}
@@ -159,7 +158,7 @@ int entry(const int argc, char **argv)
// Create an observer
mlapse::MulticastObserver observer;
- observer.append(nnfw::cpp14::make_unique<ConsoleReporter>());
+ observer.append(std::make_unique<ConsoleReporter>());
// Set default parameters
std::string model_path;
@@ -195,7 +194,7 @@ int entry(const int argc, char **argv)
};
opts["--csv-report"] = [&observer](const char *const *tok) {
- observer.append(nnfw::cpp14::make_unique<mlapse::CSVReportGenerator>(tok[0]));
+ observer.append(std::make_unique<mlapse::CSVReportGenerator>(tok[0]));
return 1;
};
diff --git a/runtime/contrib/mlapse/tfl/mlapse/benchmark_observer.h b/runtime/contrib/mlapse/tfl/mlapse/benchmark_observer.h
index 8fc570d24..792911157 100644
--- a/runtime/contrib/mlapse/tfl/mlapse/benchmark_observer.h
+++ b/runtime/contrib/mlapse/tfl/mlapse/benchmark_observer.h
@@ -72,6 +72,6 @@ struct BenchmarkObserver
virtual void notify(const NotificationArg<IterationEnd> &arg) = 0;
};
-} // namespace mlpase
+} // namespace mlapse
#endif // __MLAPSE_BENCHMARK_OBSERVER_H__
diff --git a/runtime/contrib/mlapse/tfl/mlapse/benchmark_runner.h b/runtime/contrib/mlapse/tfl/mlapse/benchmark_runner.h
index fcbb41d1b..7f10c059f 100644
--- a/runtime/contrib/mlapse/tfl/mlapse/benchmark_runner.h
+++ b/runtime/contrib/mlapse/tfl/mlapse/benchmark_runner.h
@@ -58,6 +58,6 @@ private:
BenchmarkObserver *_observer = nullptr;
};
-} // namespace mlpase
+} // namespace mlapse
#endif // __MLAPSE_BENCHMARK_RUNNER_H__
diff --git a/runtime/contrib/pure_arm_compute/CMakeLists.txt b/runtime/contrib/pure_arm_compute/CMakeLists.txt
index f9cdfcba9..3189a8b93 100644
--- a/runtime/contrib/pure_arm_compute/CMakeLists.txt
+++ b/runtime/contrib/pure_arm_compute/CMakeLists.txt
@@ -15,7 +15,7 @@ if(BUILD_TFLITE_BENCHMARK_MODEL)
endif()
target_link_libraries(nnapi_pure_arm_compute nnfw-nnapi-header)
target_link_libraries(nnapi_pure_arm_compute arm_compute arm_compute_ex)
-target_link_libraries(nnapi_pure_arm_compute nnfw_lib_cpp14 nnfw_lib_misc nnfw_lib_profiling)
+target_link_libraries(nnapi_pure_arm_compute nnfw_lib_misc nnfw_lib_profiling)
if(ANDROID)
target_link_libraries(nnapi_pure_arm_compute log)
diff --git a/runtime/contrib/pure_arm_compute/src/compilation.cc b/runtime/contrib/pure_arm_compute/src/compilation.cc
index 8cc86ebae..b97fab547 100644
--- a/runtime/contrib/pure_arm_compute/src/compilation.cc
+++ b/runtime/contrib/pure_arm_compute/src/compilation.cc
@@ -57,7 +57,7 @@
#include "misc/feature/IndexIterator.h"
#include "misc/tensor/IndexIterator.h"
-#include <cpp14/memory.h>
+#include <memory>
#include "compilation.h"
#include "model.h"
@@ -380,7 +380,7 @@ void ActivationBuilder::appendReLU(::arm_compute::ITensor *ifm_alloc)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
fn->configure(CAST_CL(ifm_alloc), nullptr, act_info);
@@ -388,7 +388,7 @@ void ActivationBuilder::appendReLU(::arm_compute::ITensor *ifm_alloc)
}
else
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
fn->configure(ifm_alloc, nullptr, act_info);
@@ -403,7 +403,7 @@ void ActivationBuilder::appendReLU1(::arm_compute::ITensor *ifm_alloc)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
fn->configure(CAST_CL(ifm_alloc), nullptr, act_info);
@@ -411,7 +411,7 @@ void ActivationBuilder::appendReLU1(::arm_compute::ITensor *ifm_alloc)
}
else
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
fn->configure(ifm_alloc, nullptr, act_info);
@@ -426,7 +426,7 @@ void ActivationBuilder::appendReLU6(::arm_compute::ITensor *ifm_alloc)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
fn->configure(CAST_CL(ifm_alloc), nullptr, act_info);
@@ -434,7 +434,7 @@ void ActivationBuilder::appendReLU6(::arm_compute::ITensor *ifm_alloc)
}
else
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
fn->configure(ifm_alloc, nullptr, act_info);
@@ -613,7 +613,7 @@ void Planner::visit(const ::internal::tflite::op::Add::Node &node)
{
if (::internal::arm_compute::isGpuMode())
{
- auto l = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticAddition>();
+ auto l = std::make_unique<::arm_compute::CLArithmeticAddition>();
// TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
l->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc),
@@ -623,7 +623,7 @@ void Planner::visit(const ::internal::tflite::op::Add::Node &node)
}
else // NEON
{
- auto l = nnfw::cpp14::make_unique<::arm_compute::NEArithmeticAddition>();
+ auto l = std::make_unique<::arm_compute::NEArithmeticAddition>();
// TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
l->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE);
@@ -693,7 +693,7 @@ void Planner::visit(const ::internal::tflite::op::Sub::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticSubtraction>();
+ auto fn = std::make_unique<::arm_compute::CLArithmeticSubtraction>();
// TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
fn->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc),
@@ -703,7 +703,7 @@ void Planner::visit(const ::internal::tflite::op::Sub::Node &node)
}
else // NEON
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArithmeticSubtraction>();
+ auto fn = std::make_unique<::arm_compute::NEArithmeticSubtraction>();
// TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
fn->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE);
@@ -767,14 +767,13 @@ void Planner::visit(const ::internal::tflite::op::Mul::Node &node)
param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
-
auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
auto lhs_input_alloc = ctx.at(::internal::tflite::operand::Index{param.lhs_index});
auto rhs_input_alloc = ctx.at(::internal::tflite::operand::Index{param.rhs_index});
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPixelWiseMultiplication>();
+ auto fn = std::make_unique<::arm_compute::CLPixelWiseMultiplication>();
fn->configure(CAST_CL(lhs_input_alloc), CAST_CL(rhs_input_alloc), CAST_CL(output_alloc),
1.0, // scale
@@ -785,7 +784,7 @@ void Planner::visit(const ::internal::tflite::op::Mul::Node &node)
}
else // NEON
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPixelWiseMultiplication>();
+ auto fn = std::make_unique<::arm_compute::NEPixelWiseMultiplication>();
fn->configure(lhs_input_alloc, rhs_input_alloc, output_alloc,
1.0, // scale
@@ -856,7 +855,7 @@ void Planner::visit(const ::internal::tflite::op::Div::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticDivision>();
+ auto fn = std::make_unique<::arm_compute::CLArithmeticDivision>();
fn->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc));
@@ -1321,7 +1320,7 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Implicit::Nod
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
+ auto fn = std::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ker_alloc), CAST_CL(bias_alloc), CAST_CL(ofm_alloc),
conv_info, param.multipler);
@@ -1330,7 +1329,7 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Implicit::Nod
}
else
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
+ auto fn = std::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
fn->configure(ifm_alloc, ker_alloc, bias_alloc, ofm_alloc, conv_info, param.multipler);
@@ -1464,7 +1463,7 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Explicit::Nod
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
+ auto fn = std::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ker_alloc), CAST_CL(bias_alloc), CAST_CL(ofm_alloc),
conv_info, param.multipler);
@@ -1473,7 +1472,7 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Explicit::Nod
}
else
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
+ auto fn = std::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
fn->configure(ifm_alloc, ker_alloc, bias_alloc, ofm_alloc, conv_info, param.multipler);
@@ -1527,7 +1526,7 @@ void Planner::visit(const ::internal::tflite::op::Dequantize::Node &node)
{
if (::internal::arm_compute::isGpuMode())
{
- auto l = nnfw::cpp14::make_unique<::arm_compute::CLCast>();
+ auto l = std::make_unique<::arm_compute::CLCast>();
l->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
fn = std::move(l);
@@ -2157,7 +2156,7 @@ void Planner::visit(const ::internal::tflite::op::FullyConnected::Node &node)
auto weight_alloc = ctx.at(::internal::tflite::operand::Index{param.weight_index});
auto bias_alloc = ctx.at(::internal::tflite::operand::Index{param.bias_index});
- auto fn = nnfw::cpp14::make_unique<arm_compute::CLFullyConnectedReshapingLayer>();
+ auto fn = std::make_unique<arm_compute::CLFullyConnectedReshapingLayer>();
fn->configure(CAST_CL(input_alloc), CAST_CL(weight_alloc), CAST_CL(bias_alloc),
CAST_CL(output_alloc), needs_reshape, asTensorShape(reshape));
@@ -2209,7 +2208,7 @@ void Planner::visit(const ::internal::tflite::op::ResizeBilinear::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLScale>();
+ auto fn = std::make_unique<::arm_compute::CLScale>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc),
::arm_compute::InterpolationPolicy::BILINEAR,
@@ -2262,7 +2261,7 @@ void Planner::visit(const ::internal::tflite::op::Reshape::Node &node)
if (::internal::arm_compute::isGpuMode())
{
// GenericReshape first apply NCHW->NHWC permutation, and apply reshape
- auto fn = nnfw::cpp14::make_unique<GenericReshapeLayer>();
+ auto fn = std::make_unique<GenericReshapeLayer>();
fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
@@ -2270,7 +2269,7 @@ void Planner::visit(const ::internal::tflite::op::Reshape::Node &node)
}
else
{
- auto fn = nnfw::cpp14::make_unique<GenericReshapeLayer>();
+ auto fn = std::make_unique<GenericReshapeLayer>();
fn->configure(input_alloc, output_alloc);
@@ -2316,7 +2315,7 @@ void Planner::visit(const ::internal::tflite::op::Squeeze::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReshapeLayer>();
+ auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
@@ -2324,7 +2323,7 @@ void Planner::visit(const ::internal::tflite::op::Squeeze::Node &node)
}
else
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEReshapeLayer>();
+ auto fn = std::make_unique<::arm_compute::NEReshapeLayer>();
fn->configure(input_alloc, output_alloc);
@@ -2375,7 +2374,7 @@ void Planner::visit(const ::internal::tflite::op::Softmax::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSoftmaxLayer>();
+ auto fn = std::make_unique<::arm_compute::CLSoftmaxLayer>();
fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), param.scale);
@@ -2383,7 +2382,7 @@ void Planner::visit(const ::internal::tflite::op::Softmax::Node &node)
}
else
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NESoftmaxLayer>();
+ auto fn = std::make_unique<::arm_compute::NESoftmaxLayer>();
fn->configure(input_alloc, output_alloc, param.scale);
@@ -2518,7 +2517,7 @@ void Planner::visit(const ::internal::tflite::op::StridedSlice::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLStridedSlice>();
+ auto fn = std::make_unique<::arm_compute::CLStridedSlice>();
fn->configure(CAST_CL(inputData_alloc), CAST_CL(outputData_alloc), starts, ends, strides,
param.beginMask, param.endMask, param.shrinkAxisMask);
@@ -2645,7 +2644,7 @@ void Planner::visit(const ::internal::tflite::op::ReduceMin::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>();
+ auto fn = std::make_unique<::arm_compute::CLReduceOperation>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis,
::arm_compute::ReduceOperation::MIN);
@@ -2772,7 +2771,7 @@ void Planner::visit(const ::internal::tflite::op::ReduceMax::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>();
+ auto fn = std::make_unique<::arm_compute::CLReduceOperation>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis,
::arm_compute::ReduceOperation::MAX);
@@ -2823,7 +2822,7 @@ void Planner::visit(const ::internal::tflite::op::Cast::Node &node)
{
if (::internal::arm_compute::isGpuMode())
{
- auto l = nnfw::cpp14::make_unique<::arm_compute::CLCast>();
+ auto l = std::make_unique<::arm_compute::CLCast>();
l->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
fn = std::move(l);
@@ -2892,7 +2891,7 @@ void Planner::visit(const ::internal::tflite::op::TopKV2::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLTopKV2>();
+ auto fn = std::make_unique<::arm_compute::CLTopKV2>();
fn->configure(CAST_CL(input_alloc), param.k, CAST_CL(values_alloc), CAST_CL(indices_alloc));
@@ -2969,7 +2968,7 @@ void Planner::visit(const ::internal::tflite::op::Gather::Node &node)
{
std::unique_ptr<::arm_compute::IFunction> fn;
- auto l = nnfw::cpp14::make_unique<GenericGather>();
+ auto l = std::make_unique<GenericGather>();
l->configure(CAST_CL(ifm_alloc), CAST_CL(indices_alloc), CAST_CL(ofm_alloc), param.axis);
fn = std::move(l);
builder.append("Gather", std::move(fn));
@@ -3032,7 +3031,7 @@ void Planner::visit(const ::internal::tflite::op::PReLU::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPReLU>();
+ auto fn = std::make_unique<::arm_compute::CLPReLU>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(alpha_alloc), CAST_CL(ofm_alloc));
builder.append("PReLU", std::move(fn));
}
@@ -3082,7 +3081,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
@@ -3090,7 +3089,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU::Node &node)
}
else
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
fn->configure(ifm_alloc, ofm_alloc, act_info);
@@ -3136,7 +3135,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU1::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
@@ -3144,7 +3143,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU1::Node &node)
}
else
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
fn->configure(ifm_alloc, ofm_alloc, act_info);
@@ -3190,7 +3189,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU6::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
@@ -3198,7 +3197,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU6::Node &node)
}
else
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
fn->configure(ifm_alloc, ofm_alloc, act_info);
@@ -3244,7 +3243,7 @@ void Planner::visit(const ::internal::tflite::op::Tanh::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
@@ -3252,7 +3251,7 @@ void Planner::visit(const ::internal::tflite::op::Tanh::Node &node)
}
else
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
fn->configure(ifm_alloc, ofm_alloc, act_info);
@@ -3298,7 +3297,7 @@ void Planner::visit(const ::internal::tflite::op::Logistic::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
@@ -3306,7 +3305,7 @@ void Planner::visit(const ::internal::tflite::op::Logistic::Node &node)
}
else
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
fn->configure(ifm_alloc, ofm_alloc, act_info);
@@ -3442,7 +3441,7 @@ void Planner::visit(const ::internal::tflite::op::Mean::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceMean>();
+ auto fn = std::make_unique<::arm_compute::CLReduceMean>();
fn->configure(CAST_CL(ifm_alloc), reduction_axis, param.keep_dims, CAST_CL(ofm_alloc));
@@ -3630,13 +3629,12 @@ void Planner::visit(const ::internal::tflite::op::Transpose::Node &node)
param.rank = _ctx.at(ifm_index).shape().rank();
auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
-
auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
const auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPermute>();
+ auto fn = std::make_unique<::arm_compute::CLPermute>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc),
getARMComputePermutationVector(param.rank, param.pv));
@@ -3647,7 +3645,6 @@ void Planner::visit(const ::internal::tflite::op::Transpose::Node &node)
{
throw std::runtime_error("Not supported, yet");
}
-
};
_builder.addStage(stage);
@@ -3685,7 +3682,7 @@ void Planner::visit(const ::internal::tflite::op::Floor::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLFloor>();
+ auto fn = std::make_unique<::arm_compute::CLFloor>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc));
@@ -3693,7 +3690,7 @@ void Planner::visit(const ::internal::tflite::op::Floor::Node &node)
}
else
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEFloor>();
+ auto fn = std::make_unique<::arm_compute::NEFloor>();
fn->configure(ifm_alloc, ofm_alloc);
@@ -3766,7 +3763,7 @@ void Planner::visit(const ::internal::tflite::op::ArgMax::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArgOperation>();
+ auto fn = std::make_unique<::arm_compute::CLArgOperation>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis,
::arm_compute::ArgOperation::MAX);
@@ -3819,7 +3816,7 @@ void Planner::visit(const ::internal::tflite::op::SQRT::Node &node)
{
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), act_info);
@@ -3827,7 +3824,7 @@ void Planner::visit(const ::internal::tflite::op::SQRT::Node &node)
}
else
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
fn->configure(input_alloc, output_alloc, act_info);
@@ -3873,7 +3870,7 @@ void Planner::visit(const ::internal::tflite::op::RSQRT::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLRsqrtLayer>();
+ auto fn = std::make_unique<::arm_compute::CLRsqrtLayer>();
fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
@@ -3936,7 +3933,7 @@ void Planner::visit(const ::internal::tflite::op::Equal::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLComparison>();
+ auto fn = std::make_unique<::arm_compute::CLComparison>();
fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc),
::arm_compute::ComparisonOperation::Equal);
@@ -4046,7 +4043,7 @@ void Planner::visit(const ::internal::tflite::op::TransposeConv::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLTransposeConvLayer>();
+ auto fn = std::make_unique<::arm_compute::CLTransposeConvLayer>();
auto symmetric_tconv_info = asPadStrideInfo(param.padding, param.stride);
@@ -4111,7 +4108,7 @@ void Planner::visit(const ::internal::tflite::op::SquaredDifference::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLElementwiseSquaredDiff>();
+ auto fn = std::make_unique<::arm_compute::CLElementwiseSquaredDiff>();
fn->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc));
builder.append("SquaredDifference", std::move(fn));
@@ -4121,7 +4118,6 @@ void Planner::visit(const ::internal::tflite::op::SquaredDifference::Node &node)
// TODO Enable NEON Support
throw std::runtime_error("Not supported, yet");
}
-
};
_builder.addStage(stage);
@@ -4244,7 +4240,7 @@ void Planner::visit(const ::internal::tflite::op::Pad::Node &node)
{
if (::internal::arm_compute::isGpuMode()) // GPU
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPadLayer>();
+ auto fn = std::make_unique<::arm_compute::CLPadLayer>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.padding_list,
param.pixel_value);
@@ -4314,7 +4310,7 @@ void Planner::visit(const ::internal::tflite::op::SpaceToDepth::Node &node)
{
if (::internal::arm_compute::isGpuMode()) // GPU
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSpaceToDepth>();
+ auto fn = std::make_unique<::arm_compute::CLSpaceToDepth>();
fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), param.block_size);
@@ -4448,7 +4444,7 @@ void Planner::visit(const ::internal::tflite::op::SpaceToBatchND::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSpaceToBatchND>();
+ auto fn = std::make_unique<::arm_compute::CLSpaceToBatchND>();
fn->configure(CAST_CL(input_alloc), CAST_CL(block_size_alloc), CAST_CL(padding_size_alloc),
CAST_CL(output_alloc));
@@ -4543,7 +4539,7 @@ void Planner::visit(const ::internal::tflite::op::BatchToSpaceNd::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBatchToSpaceLayer>();
+ auto fn = std::make_unique<::arm_compute::CLBatchToSpaceLayer>();
fn->configure(CAST_CL(input_alloc), CAST_CL(block_size_alloc), CAST_CL(output_alloc));
builder.append("BatchToSpaceND", std::move(fn));
@@ -4608,7 +4604,7 @@ void Planner::visit(const ::internal::tflite::op::L2Normalization::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNormalizationLayer>();
+ auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), norm_info);
@@ -4616,7 +4612,7 @@ void Planner::visit(const ::internal::tflite::op::L2Normalization::Node &node)
}
else
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NENormalizationLayer>();
+ auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
fn->configure(ifm_alloc, ofm_alloc, norm_info);
@@ -4889,7 +4885,7 @@ void Planner::visit(const ::internal::tflite::op::EmbeddingLookup::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLEmbeddingLookup>();
+ auto fn = std::make_unique<::arm_compute::CLEmbeddingLookup>();
fn->configure(CAST_CL(values_alloc), CAST_CL(output_alloc), CAST_CL(lookups_alloc));
@@ -4985,7 +4981,7 @@ void Planner::visit(const ::internal::tflite::op::HashtableLookup::Node &node)
if (::internal::arm_compute::isGpuMode()) // GPU
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLHashtableLookup>();
+ auto fn = std::make_unique<::arm_compute::CLHashtableLookup>();
fn->configure(CAST_CL(lookups_alloc), CAST_CL(keys_alloc), CAST_CL(values_alloc),
CAST_CL(output_alloc), CAST_CL(hits_alloc));
@@ -5049,7 +5045,7 @@ void Planner::visit(const ::internal::tflite::op::LocalResponseNormalization::No
param.beta, param.bias, false);
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNormalizationLayer>();
+ auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), norm_info);
@@ -5057,7 +5053,7 @@ void Planner::visit(const ::internal::tflite::op::LocalResponseNormalization::No
}
else
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NENormalizationLayer>();
+ auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
fn->configure(ifm_alloc, ofm_alloc, norm_info);
@@ -5121,7 +5117,7 @@ void Planner::visit(const ::internal::tflite::op::DepthToSpace::Node &node)
{
if (::internal::arm_compute::isGpuMode()) // GPU
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthToSpace>();
+ auto fn = std::make_unique<::arm_compute::CLDepthToSpace>();
fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), param.block_size);
@@ -5194,7 +5190,7 @@ void Planner::visit(const ::internal::tflite::op::Unpack::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLUnstack>();
+ auto fn = std::make_unique<::arm_compute::CLUnstack>();
std::vector<::arm_compute::ICLTensor *> outputs;
for (const auto &index : param.ofm_indexes)
{
@@ -5282,7 +5278,7 @@ void Planner::visit(const ::internal::tflite::op::Pack::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLStackLayer>();
+ auto fn = std::make_unique<::arm_compute::CLStackLayer>();
std::vector<::arm_compute::ICLTensor *> inputs;
for (const auto &index : param.ifm_indexes)
{
@@ -5340,7 +5336,7 @@ void Planner::visit(const ::internal::tflite::op::Neg::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNeg>();
+ auto fn = std::make_unique<::arm_compute::CLNeg>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc));
builder.append("Neg", std::move(fn));
@@ -5350,7 +5346,6 @@ void Planner::visit(const ::internal::tflite::op::Neg::Node &node)
// TODO Enable NEON Support
throw std::runtime_error("Not supported, yet");
}
-
};
_builder.addStage(stage);
}
@@ -5387,7 +5382,7 @@ void Planner::visit(const ::internal::tflite::op::Exp::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLExpLayer>();
+ auto fn = std::make_unique<::arm_compute::CLExpLayer>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc));
@@ -5512,7 +5507,7 @@ void Planner::visit(const ::internal::tflite::op::ReduceSum::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>();
+ auto fn = std::make_unique<::arm_compute::CLReduceOperation>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis,
::arm_compute::ReduceOperation::SUM);
@@ -5563,7 +5558,7 @@ void Planner::visit(const ::internal::tflite::op::Abs::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), act_info);
@@ -5571,7 +5566,7 @@ void Planner::visit(const ::internal::tflite::op::Abs::Node &node)
}
else
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+ auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
fn->configure(input_alloc, output_alloc, act_info);
@@ -5632,7 +5627,7 @@ void Planner::visit(const ::internal::tflite::op::NotEqual::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLComparison>();
+ auto fn = std::make_unique<::arm_compute::CLComparison>();
fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc),
::arm_compute::ComparisonOperation::NotEqual);
@@ -5701,7 +5696,7 @@ void Planner::visit(const ::internal::tflite::op::LogicalAnd::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBinaryLogicalOp>();
+ auto fn = std::make_unique<::arm_compute::CLBinaryLogicalOp>();
fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc),
::arm_compute::BinaryLogicalOperation::AND);
@@ -5750,7 +5745,7 @@ void Planner::visit(const ::internal::tflite::op::LogicalNot::Node &node)
auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBitwiseNot>();
+ auto fn = std::make_unique<::arm_compute::CLBitwiseNot>();
fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
@@ -5818,7 +5813,7 @@ void Planner::visit(const ::internal::tflite::op::LogicalOr::Node &node)
auto input2_alloc = ctx.at(::internal::tflite::operand::Index{param.input2_index});
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBinaryLogicalOp>();
+ auto fn = std::make_unique<::arm_compute::CLBinaryLogicalOp>();
fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc),
::arm_compute::BinaryLogicalOperation::OR);
diff --git a/runtime/contrib/pure_arm_compute/src/internal/MatrixSink.h b/runtime/contrib/pure_arm_compute/src/internal/MatrixSink.h
index 23ecc112b..b33556697 100644
--- a/runtime/contrib/pure_arm_compute/src/internal/MatrixSink.h
+++ b/runtime/contrib/pure_arm_compute/src/internal/MatrixSink.h
@@ -62,10 +62,10 @@ public:
assert(tensor.info()->dimension(0) == _width);
assert(tensor.info()->dimension(1) == _height);
- using ::arm_compute::Window;
- using ::arm_compute::Iterator;
using ::arm_compute::Coordinates;
using ::arm_compute::execute_window_loop;
+ using ::arm_compute::Iterator;
+ using ::arm_compute::Window;
Window window;
diff --git a/runtime/contrib/pure_arm_compute/src/internal/MatrixSource.h b/runtime/contrib/pure_arm_compute/src/internal/MatrixSource.h
index 71d6a804f..c19d6af01 100644
--- a/runtime/contrib/pure_arm_compute/src/internal/MatrixSource.h
+++ b/runtime/contrib/pure_arm_compute/src/internal/MatrixSource.h
@@ -54,10 +54,10 @@ public:
*/
void push(::arm_compute::ITensor &tensor) const override
{
- using ::arm_compute::Window;
- using ::arm_compute::Iterator;
using ::arm_compute::Coordinates;
using ::arm_compute::execute_window_loop;
+ using ::arm_compute::Iterator;
+ using ::arm_compute::Window;
Window window;
window.use_tensor_dimensions(tensor.info()->tensor_shape(), ::arm_compute::Window::DimY);
diff --git a/runtime/contrib/pure_arm_compute/src/internal/Sinks.h b/runtime/contrib/pure_arm_compute/src/internal/Sinks.h
index 7317c67c1..8e8d775ba 100644
--- a/runtime/contrib/pure_arm_compute/src/internal/Sinks.h
+++ b/runtime/contrib/pure_arm_compute/src/internal/Sinks.h
@@ -68,8 +68,8 @@ public:
const ::internal::arm_compute::tensor::View<T> from{&tensor};
::internal::nnapi::tensor::View<T> into{_shape, _base, _size};
- using ::nnfw::misc::tensor::iterate;
using ::nnfw::misc::tensor::Index;
+ using ::nnfw::misc::tensor::iterate;
const uint32_t rank = _shape.rank();
diff --git a/runtime/contrib/pure_arm_compute/src/internal/Tensor3DSink.h b/runtime/contrib/pure_arm_compute/src/internal/Tensor3DSink.h
index 1e14e2d6c..e47130860 100644
--- a/runtime/contrib/pure_arm_compute/src/internal/Tensor3DSink.h
+++ b/runtime/contrib/pure_arm_compute/src/internal/Tensor3DSink.h
@@ -57,10 +57,10 @@ public:
*/
void pull(::arm_compute::ITensor &tensor) const override
{
- using ::arm_compute::Window;
- using ::arm_compute::Iterator;
using ::arm_compute::Coordinates;
using ::arm_compute::execute_window_loop;
+ using ::arm_compute::Iterator;
+ using ::arm_compute::Window;
Window window;
diff --git a/runtime/contrib/pure_arm_compute/src/internal/Tensor3DSource.h b/runtime/contrib/pure_arm_compute/src/internal/Tensor3DSource.h
index 3d8d1b958..1e7fb5272 100644
--- a/runtime/contrib/pure_arm_compute/src/internal/Tensor3DSource.h
+++ b/runtime/contrib/pure_arm_compute/src/internal/Tensor3DSource.h
@@ -57,10 +57,10 @@ public:
*/
void push(::arm_compute::ITensor &tensor) const override
{
- using ::arm_compute::Window;
- using ::arm_compute::Iterator;
using ::arm_compute::Coordinates;
using ::arm_compute::execute_window_loop;
+ using ::arm_compute::Iterator;
+ using ::arm_compute::Window;
Window window;
diff --git a/runtime/contrib/pure_arm_compute/src/internal/arm_compute.cc b/runtime/contrib/pure_arm_compute/src/internal/arm_compute.cc
index a7be2068d..e1a903e52 100644
--- a/runtime/contrib/pure_arm_compute/src/internal/arm_compute.cc
+++ b/runtime/contrib/pure_arm_compute/src/internal/arm_compute.cc
@@ -45,7 +45,7 @@ void Object::access(const std::function<void(::arm_compute::ITensor &tensor)> &f
}
} // namespace operand
-} // namepsace arm_compute
+} // namespace arm_compute
} // namespace internal
namespace internal
@@ -65,7 +65,7 @@ Context &Context::set(const ::internal::tflite::operand::Index &id,
}
} // namespace operand
-} // namepsace arm_compute
+} // namespace arm_compute
} // namespace internal
namespace internal
@@ -83,5 +83,5 @@ bool isGpuMode()
return true;
}
-} // namepsace arm_compute
+} // namespace arm_compute
} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/arm_compute.h b/runtime/contrib/pure_arm_compute/src/internal/arm_compute.h
index fb6acaf81..64827476c 100644
--- a/runtime/contrib/pure_arm_compute/src/internal/arm_compute.h
+++ b/runtime/contrib/pure_arm_compute/src/internal/arm_compute.h
@@ -68,7 +68,7 @@ public:
};
} // namespace operand
-} // namepsace arm_compute
+} // namespace arm_compute
} // namespace internal
#include "internal/Model.h"
@@ -131,7 +131,7 @@ private:
};
} // namespace operand
-} // namepsace arm_compute
+} // namespace arm_compute
} // namespace internal
#include <arm_compute/runtime/IFunction.h>
@@ -192,13 +192,14 @@ public:
* @return The operation index as reference
*/
int &op_idx() { return _op_idx; }
+
private:
int _op_idx;
#endif
};
} // namespace op
-} // namepsace arm_compute
+} // namespace arm_compute
} // namespace internal
namespace internal
@@ -252,7 +253,7 @@ private:
};
} // namespace op
-} // namepsace arm_compute
+} // namespace arm_compute
} // namespace internal
namespace internal
@@ -312,7 +313,7 @@ private:
op::Sequence _ops;
};
-} // namepsace arm_compute
+} // namespace arm_compute
} // namespace internal
#include <arm_compute/core/ITensor.h>
@@ -331,7 +332,7 @@ bool isGpuMode();
#define CAST_CL(tensor) static_cast<::arm_compute::CLTensor *>(tensor)
#define CAST_NE(tensor) static_cast<::arm_compute::Tensor *>(tensor)
-} // namepsace arm_compute
+} // namespace arm_compute
} // namespace internal
#endif // __INTERNAL_ARM_COMPUTE_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/arm_compute/Cast.h b/runtime/contrib/pure_arm_compute/src/internal/arm_compute/Cast.h
index 211a6ac87..a407d5fad 100644
--- a/runtime/contrib/pure_arm_compute/src/internal/arm_compute/Cast.h
+++ b/runtime/contrib/pure_arm_compute/src/internal/arm_compute/Cast.h
@@ -49,11 +49,11 @@
const ::arm_compute::Coordinates &axises);
/**
-* @brief Generate arm compute permutation vector from runtime permutation vector
-* @param[in] rank Rank number supported upto 4
-* @param[in] runtime_pv Integer array for runtime permutation vector
-* @return Permutation vector of arm compute
-*/
+ * @brief Generate arm compute permutation vector from runtime permutation vector
+ * @param[in] rank Rank number supported upto 4
+ * @param[in] runtime_pv Integer array for runtime permutation vector
+ * @return Permutation vector of arm compute
+ */
::arm_compute::PermutationVector getARMComputePermutationVector(uint32_t rank,
const int32_t *runtime_pv);
/**
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/BatchToSpaceNd.cc b/runtime/contrib/pure_arm_compute/src/internal/op/BatchToSpaceNd.cc
index 0768039d0..f9cea8250 100644
--- a/runtime/contrib/pure_arm_compute/src/internal/op/BatchToSpaceNd.cc
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/BatchToSpaceNd.cc
@@ -1,4 +1,5 @@
-/*Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Mean.h b/runtime/contrib/pure_arm_compute/src/internal/op/Mean.h
index f8e7ed308..998ba6693 100644
--- a/runtime/contrib/pure_arm_compute/src/internal/op/Mean.h
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Mean.h
@@ -51,12 +51,12 @@ struct Param
*/
Param() = default;
/**
- * @brief Construct a new Param object with params
- * @param[in] inputCount Count of inputs
- * @param[in] inputs Pointer of inputs
- * @param[in] outputCount Count of outputs
- * @param[in] outputs Pointer of outputs
- */
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
diff --git a/runtime/contrib/pure_arm_compute/src/memory.cc b/runtime/contrib/pure_arm_compute/src/memory.cc
index 9e999661a..6699a00c8 100644
--- a/runtime/contrib/pure_arm_compute/src/memory.cc
+++ b/runtime/contrib/pure_arm_compute/src/memory.cc
@@ -18,7 +18,7 @@
#include <sys/mman.h>
#include <memory>
-#include "cpp14/memory.h"
+#include <memory>
#include "memory.h"
int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t offset,
@@ -31,7 +31,7 @@ int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t
// Use unique pointer to avoid memory leak
std::unique_ptr<ANeuralNetworksMemory> memory_ptr =
- nnfw::cpp14::make_unique<ANeuralNetworksMemory>(size, protect, fd, offset);
+ std::make_unique<ANeuralNetworksMemory>(size, protect, fd, offset);
if (memory_ptr == nullptr)
{
return ANEURALNETWORKS_OUT_OF_MEMORY;
diff --git a/runtime/contrib/pure_arm_compute/src/model.cc b/runtime/contrib/pure_arm_compute/src/model.cc
index ddca589db..0ecd838b3 100644
--- a/runtime/contrib/pure_arm_compute/src/model.cc
+++ b/runtime/contrib/pure_arm_compute/src/model.cc
@@ -188,8 +188,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
assert(inputCount == 3);
assert(outputCount == 1);
- using internal::tflite::op::Add::Param;
using internal::tflite::op::Add::Node;
+ using internal::tflite::op::Add::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -203,8 +203,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
assert(inputCount == 3);
assert(outputCount == 1);
- using internal::tflite::op::Sub::Param;
using internal::tflite::op::Sub::Node;
+ using internal::tflite::op::Sub::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -218,8 +218,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
assert(inputCount == 3);
assert(outputCount == 1);
- using internal::tflite::op::Mul::Param;
using internal::tflite::op::Mul::Node;
+ using internal::tflite::op::Mul::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -233,8 +233,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
assert(inputCount == 3);
assert(outputCount == 1);
- using internal::tflite::op::Div::Param;
using internal::tflite::op::Div::Node;
+ using internal::tflite::op::Div::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -253,8 +253,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
if (inputCount == 7)
{
- using internal::tflite::op::Conv2D::Implicit::Param;
using internal::tflite::op::Conv2D::Implicit::Node;
+ using internal::tflite::op::Conv2D::Implicit::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -263,8 +263,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
else
{
- using internal::tflite::op::Conv2D::Explicit::Param;
using internal::tflite::op::Conv2D::Explicit::Node;
+ using internal::tflite::op::Conv2D::Explicit::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -284,8 +284,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
if (inputCount == 8)
{
- using internal::tflite::op::DepthwiseConv2D::Implicit::Param;
using internal::tflite::op::DepthwiseConv2D::Implicit::Node;
+ using internal::tflite::op::DepthwiseConv2D::Implicit::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -294,8 +294,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
else
{
- using internal::tflite::op::DepthwiseConv2D::Explicit::Param;
using internal::tflite::op::DepthwiseConv2D::Explicit::Node;
+ using internal::tflite::op::DepthwiseConv2D::Explicit::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -315,8 +315,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
if (inputCount == 7)
{
- using internal::tflite::op::MaxPool2D::Implicit::Param;
using internal::tflite::op::MaxPool2D::Implicit::Node;
+ using internal::tflite::op::MaxPool2D::Implicit::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -325,8 +325,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
else
{
- using internal::tflite::op::MaxPool2D::Explicit::Param;
using internal::tflite::op::MaxPool2D::Explicit::Node;
+ using internal::tflite::op::MaxPool2D::Explicit::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -339,8 +339,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
case ANEURALNETWORKS_DEQUANTIZE:
{
assert(outputCount == 1 && inputCount == 1);
- using internal::tflite::op::Dequantize::Param;
using internal::tflite::op::Dequantize::Node;
+ using internal::tflite::op::Dequantize::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -359,8 +359,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
if (inputCount == 7)
{
- using internal::tflite::op::AvgPool2D::Implicit::Param;
using internal::tflite::op::AvgPool2D::Implicit::Node;
+ using internal::tflite::op::AvgPool2D::Implicit::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -369,8 +369,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
else
{
- using internal::tflite::op::AvgPool2D::Explicit::Param;
using internal::tflite::op::AvgPool2D::Explicit::Node;
+ using internal::tflite::op::AvgPool2D::Explicit::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -382,8 +382,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_CONCATENATION:
{
- using internal::tflite::op::Concat::Param;
using internal::tflite::op::Concat::Node;
+ using internal::tflite::op::Concat::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -394,8 +394,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_RESIZE_BILINEAR:
{
- using internal::tflite::op::ResizeBilinear::Param;
using internal::tflite::op::ResizeBilinear::Node;
+ using internal::tflite::op::ResizeBilinear::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -406,8 +406,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_RESHAPE:
{
- using internal::tflite::op::Reshape::Param;
using internal::tflite::op::Reshape::Node;
+ using internal::tflite::op::Reshape::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -418,8 +418,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_SQUEEZE:
{
- using internal::tflite::op::Squeeze::Param;
using internal::tflite::op::Squeeze::Node;
+ using internal::tflite::op::Squeeze::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -430,8 +430,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_FULLY_CONNECTED:
{
- using internal::tflite::op::FullyConnected::Param;
using internal::tflite::op::FullyConnected::Node;
+ using internal::tflite::op::FullyConnected::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -442,8 +442,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_SOFTMAX:
{
- using internal::tflite::op::Softmax::Param;
using internal::tflite::op::Softmax::Node;
+ using internal::tflite::op::Softmax::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -454,8 +454,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_RELU:
{
- using internal::tflite::op::ReLU::Param;
using internal::tflite::op::ReLU::Node;
+ using internal::tflite::op::ReLU::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -466,8 +466,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_RELU1:
{
- using internal::tflite::op::ReLU1::Param;
using internal::tflite::op::ReLU1::Node;
+ using internal::tflite::op::ReLU1::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -478,8 +478,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_RELU6:
{
- using internal::tflite::op::ReLU6::Param;
using internal::tflite::op::ReLU6::Node;
+ using internal::tflite::op::ReLU6::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -490,8 +490,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_TANH:
{
- using internal::tflite::op::Tanh::Param;
using internal::tflite::op::Tanh::Node;
+ using internal::tflite::op::Tanh::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -502,8 +502,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_STRIDED_SLICE:
{
- using internal::tflite::op::StridedSlice::Param;
using internal::tflite::op::StridedSlice::Node;
+ using internal::tflite::op::StridedSlice::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -514,8 +514,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_LOGISTIC:
{
- using internal::tflite::op::Logistic::Param;
using internal::tflite::op::Logistic::Node;
+ using internal::tflite::op::Logistic::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -526,8 +526,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_MEAN:
{
- using internal::tflite::op::Mean::Param;
using internal::tflite::op::Mean::Node;
+ using internal::tflite::op::Mean::Param;
auto &operations = model->deref().operations();
operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
@@ -536,8 +536,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_RNN:
{
- using internal::tflite::op::RNN::Param;
using internal::tflite::op::RNN::Node;
+ using internal::tflite::op::RNN::Param;
auto &operations = model->deref().operations();
@@ -547,8 +547,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_TRANSPOSE:
{
- using internal::tflite::op::Transpose::Param;
using internal::tflite::op::Transpose::Node;
+ using internal::tflite::op::Transpose::Param;
auto &operations = model->deref().operations();
operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
@@ -557,8 +557,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_LSTM:
{
- using internal::tflite::op::LSTM::Param;
using internal::tflite::op::LSTM::Node;
+ using internal::tflite::op::LSTM::Param;
auto &operations = model->deref().operations();
@@ -568,8 +568,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_FLOOR:
{
- using internal::tflite::op::Floor::Param;
using internal::tflite::op::Floor::Node;
+ using internal::tflite::op::Floor::Param;
auto &operations = model->deref().operations();
@@ -581,8 +581,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
{
assert(inputCount == 2 && outputCount == 1);
- using internal::tflite::op::Pad::Param;
using internal::tflite::op::Pad::Node;
+ using internal::tflite::op::Pad::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -593,8 +593,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_SPACE_TO_DEPTH:
{
- using internal::tflite::op::SpaceToDepth::Param;
using internal::tflite::op::SpaceToDepth::Node;
+ using internal::tflite::op::SpaceToDepth::Param;
auto &operations = model->deref().operations();
@@ -604,8 +604,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_SPACE_TO_BATCH_ND:
{
- using internal::tflite::op::SpaceToBatchND::Param;
using internal::tflite::op::SpaceToBatchND::Node;
+ using internal::tflite::op::SpaceToBatchND::Param;
auto &operations = model->deref().operations();
@@ -615,8 +615,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_BATCH_TO_SPACE_ND:
{
- using internal::tflite::op::BatchToSpaceNd::Param;
using internal::tflite::op::BatchToSpaceNd::Node;
+ using internal::tflite::op::BatchToSpaceNd::Param;
auto &operations = model->deref().operations();
@@ -633,8 +633,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
if (inputCount == 7)
{
- using internal::tflite::op::L2Pool2D::Implicit::Param;
using internal::tflite::op::L2Pool2D::Implicit::Node;
+ using internal::tflite::op::L2Pool2D::Implicit::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -643,8 +643,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
else
{
- using internal::tflite::op::L2Pool2D::Explicit::Param;
using internal::tflite::op::L2Pool2D::Explicit::Node;
+ using internal::tflite::op::L2Pool2D::Explicit::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -659,8 +659,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
assert(inputCount == 2);
assert(outputCount == 1);
- using internal::tflite::op::EmbeddingLookup::Param;
using internal::tflite::op::EmbeddingLookup::Node;
+ using internal::tflite::op::EmbeddingLookup::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -673,8 +673,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
{
assert(inputCount == 1 && outputCount == 1);
- using internal::tflite::op::L2Normalization::Param;
using internal::tflite::op::L2Normalization::Node;
+ using internal::tflite::op::L2Normalization::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -688,8 +688,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
assert(inputCount == 3);
assert(outputCount == 2);
- using internal::tflite::op::HashtableLookup::Param;
using internal::tflite::op::HashtableLookup::Node;
+ using internal::tflite::op::HashtableLookup::Param;
auto &operations = model->deref().operations();
@@ -700,8 +700,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
case ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION:
{
- using internal::tflite::op::LocalResponseNormalization::Param;
using internal::tflite::op::LocalResponseNormalization::Node;
+ using internal::tflite::op::LocalResponseNormalization::Param;
auto &operations = model->deref().operations();
@@ -711,8 +711,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_DEPTH_TO_SPACE:
{
- using internal::tflite::op::DepthToSpace::Param;
using internal::tflite::op::DepthToSpace::Node;
+ using internal::tflite::op::DepthToSpace::Param;
auto &operations = model->deref().operations();
@@ -741,8 +741,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
{
case ANEURALNETWORKS_CAST_EX:
{
- using internal::tflite::op::Cast::Param;
using internal::tflite::op::Cast::Node;
+ using internal::tflite::op::Cast::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -753,8 +753,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_REDUCE_MIN_EX:
{
- using internal::tflite::op::ReduceMin::Param;
using internal::tflite::op::ReduceMin::Node;
+ using internal::tflite::op::ReduceMin::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -765,8 +765,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_REDUCE_MAX_EX:
{
- using internal::tflite::op::ReduceMax::Param;
using internal::tflite::op::ReduceMax::Node;
+ using internal::tflite::op::ReduceMax::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -777,8 +777,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_PRELU_EX:
{
- using internal::tflite::op::PReLU::Param;
using internal::tflite::op::PReLU::Node;
+ using internal::tflite::op::PReLU::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -789,8 +789,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_TRANSPOSE_CONV_EX:
{
- using internal::tflite::op::TransposeConv::Param;
using internal::tflite::op::TransposeConv::Node;
+ using internal::tflite::op::TransposeConv::Param;
auto &operations = model->deref().operations();
@@ -800,8 +800,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_LOGICAL_AND_EX:
{
- using internal::tflite::op::LogicalAnd::Param;
using internal::tflite::op::LogicalAnd::Node;
+ using internal::tflite::op::LogicalAnd::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -812,8 +812,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_LOGICAL_OR_EX:
{
- using internal::tflite::op::LogicalOr::Param;
using internal::tflite::op::LogicalOr::Node;
+ using internal::tflite::op::LogicalOr::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -824,8 +824,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_LOGICAL_NOT_EX:
{
- using internal::tflite::op::LogicalNot::Param;
using internal::tflite::op::LogicalNot::Node;
+ using internal::tflite::op::LogicalNot::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -836,8 +836,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_RSQRT_EX:
{
- using internal::tflite::op::RSQRT::Param;
using internal::tflite::op::RSQRT::Node;
+ using internal::tflite::op::RSQRT::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -848,8 +848,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_SQRT_EX:
{
- using internal::tflite::op::SQRT::Param;
using internal::tflite::op::SQRT::Node;
+ using internal::tflite::op::SQRT::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -860,8 +860,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_EQUAL_EX:
{
- using internal::tflite::op::Equal::Param;
using internal::tflite::op::Equal::Node;
+ using internal::tflite::op::Equal::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -872,8 +872,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_SQUARED_DIFFERENCE_EX:
{
- using internal::tflite::op::SquaredDifference::Param;
using internal::tflite::op::SquaredDifference::Node;
+ using internal::tflite::op::SquaredDifference::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -884,8 +884,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_TOPK_V2_EX:
{
- using internal::tflite::op::TopKV2::Param;
using internal::tflite::op::TopKV2::Node;
+ using internal::tflite::op::TopKV2::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -896,8 +896,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_GATHER_EX:
{
- using internal::tflite::op::Gather::Param;
using internal::tflite::op::Gather::Node;
+ using internal::tflite::op::Gather::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -908,8 +908,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_SPLIT_EX:
{
- using internal::tflite::op::Split::Param;
using internal::tflite::op::Split::Node;
+ using internal::tflite::op::Split::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -920,8 +920,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_UNPACK_EX:
{
- using internal::tflite::op::Unpack::Param;
using internal::tflite::op::Unpack::Node;
+ using internal::tflite::op::Unpack::Param;
auto &operations = model->deref().operations();
@@ -931,8 +931,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_NEG_EX:
{
- using internal::tflite::op::Neg::Param;
using internal::tflite::op::Neg::Node;
+ using internal::tflite::op::Neg::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -943,8 +943,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_EXP_EX:
{
- using internal::tflite::op::Exp::Param;
using internal::tflite::op::Exp::Node;
+ using internal::tflite::op::Exp::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -955,8 +955,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_REDUCE_SUM_EX:
{
- using internal::tflite::op::ReduceSum::Param;
using internal::tflite::op::ReduceSum::Node;
+ using internal::tflite::op::ReduceSum::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -967,8 +967,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_PACK_EX:
{
- using internal::tflite::op::Pack::Param;
using internal::tflite::op::Pack::Node;
+ using internal::tflite::op::Pack::Param;
auto &operations = model->deref().operations();
@@ -978,8 +978,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_ABS_EX:
{
- using internal::tflite::op::Abs::Param;
using internal::tflite::op::Abs::Node;
+ using internal::tflite::op::Abs::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -990,8 +990,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_ARGMAX_EX:
{
- using internal::tflite::op::ArgMax::Param;
using internal::tflite::op::ArgMax::Node;
+ using internal::tflite::op::ArgMax::Param;
// Add 'operations'
auto &operations = model->deref().operations();
@@ -1002,8 +1002,8 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
}
case ANEURALNETWORKS_NOT_EQUAL_EX:
{
- using internal::tflite::op::NotEqual::Param;
using internal::tflite::op::NotEqual::Node;
+ using internal::tflite::op::NotEqual::Param;
// Add 'operations'
auto &operations = model->deref().operations();
diff --git a/runtime/contrib/style_transfer_app/CMakeLists.txt b/runtime/contrib/style_transfer_app/CMakeLists.txt
new file mode 100644
index 000000000..4896e6710
--- /dev/null
+++ b/runtime/contrib/style_transfer_app/CMakeLists.txt
@@ -0,0 +1,40 @@
+if(NOT BUILD_STYLE_TRANSFER_APP)
+ return()
+endif(NOT BUILD_STYLE_TRANSFER_APP)
+
+if(NOT BUILD_ONERT)
+ return()
+endif(NOT BUILD_ONERT)
+
+find_package(JPEG)
+if(JPEG_FOUND)
+ add_definitions(-DNNFW_ST_APP_JPEG_SUPPORTED)
+else(JPEG_FOUND)
+ message(WARNING "JPEG not found.")
+endif(JPEG_FOUND)
+
+list(APPEND STYLE_TRANSFER_APP_SRCS "src/style_transfer_app.cc")
+list(APPEND STYLE_TRANSFER_APP_SRCS "src/args.cc")
+list(APPEND STYLE_TRANSFER_APP_SRCS "src/bitmap_helper.cc")
+if(JPEG_FOUND)
+ list(APPEND STYLE_TRANSFER_APP_SRCS "src/jpeg_helper.cc")
+endif(JPEG_FOUND)
+
+nnas_find_package(Boost REQUIRED)
+
+add_executable(style_transfer_app ${STYLE_TRANSFER_APP_SRCS})
+target_include_directories(style_transfer_app PRIVATE src)
+target_include_directories(style_transfer_app PRIVATE ${Boost_INCLUDE_DIRS})
+if(JPEG_FOUND)
+ target_include_directories(style_transfer_app PRIVATE ${JPEG_INCLUDE_DIRS})
+endif(JPEG_FOUND)
+
+target_link_libraries(style_transfer_app onert_core onert tflite_loader)
+target_link_libraries(style_transfer_app tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_tflite)
+target_link_libraries(style_transfer_app nnfw-dev)
+target_link_libraries(style_transfer_app boost_program_options boost_system boost_filesystem)
+if(JPEG_FOUND)
+ target_link_libraries(style_transfer_app ${JPEG_LIBRARIES})
+endif(JPEG_FOUND)
+
+install(TARGETS style_transfer_app DESTINATION bin)
diff --git a/runtime/contrib/style_transfer_app/README.md b/runtime/contrib/style_transfer_app/README.md
new file mode 100644
index 000000000..f56cb806e
--- /dev/null
+++ b/runtime/contrib/style_transfer_app/README.md
@@ -0,0 +1,23 @@
+# style_transfer_app
+
+A sample app that runs `style transfer models`
+
+It reads a neural network model from an `nnpackage` and an input image, converts the image through the network, and produces an output image.
+
+It supports both JPG and BMP image formats. It uses **runtime API** internally.
+
+## How to use
+
+```
+$ ./style_transfer_app --nnpackage path_to_nnpackage --input input_image --output output_image
+```
+
+## Install libjpeg
+
+To read/write JPG images, you should install `libjpeg` on the host and target device.
+
+```bash
+$ sudo apt-get install libjpeg-dev
+```
+
+If `libjpeg` is not installed on the host or target, this app only supports the BMP file format.
diff --git a/runtime/contrib/style_transfer_app/src/args.cc b/runtime/contrib/style_transfer_app/src/args.cc
new file mode 100644
index 000000000..e194b8203
--- /dev/null
+++ b/runtime/contrib/style_transfer_app/src/args.cc
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "args.h"
+
+#include <iostream>
+#include <boost/filesystem.hpp>
+
+namespace StyleTransferApp
+{
+
+Args::Args(const int argc, char **argv) noexcept
+{
+ Initialize();
+ Parse(argc, argv);
+}
+
+void Args::Initialize(void)
+{
+ // General options
+ po::options_description general("General options");
+
+ // clang-format off
+ general.add_options()
+ ("help,h", "Display available options")
+ ("nnpackage", po::value<std::string>()->required(), "nnpackage path")
+ ("input,i", po::value<std::string>()->required(), "Input image path")
+ ("output,o", po::value<std::string>()->required(), "Output image path");
+ // clang-format on
+
+ _options.add(general);
+ _positional.add("nnpackage", 1);
+}
+
+void Args::Parse(const int argc, char **argv)
+{
+
+ po::variables_map vm;
+ try
+ {
+ po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
+ vm);
+
+ if (vm.count("help"))
+ {
+ std::cout << "style_transfer_app\n\n";
+ std::cout << "Usage: " << argv[0] << " path to nnpackage root directory [<options>]\n\n";
+ std::cout << _options;
+ std::cout << "\n";
+
+ exit(0);
+ }
+
+ po::notify(vm);
+
+ if (vm.count("input"))
+ {
+ _input_filename = vm["input"].as<std::string>();
+ }
+
+ if (vm.count("output"))
+ {
+ _output_filename = vm["output"].as<std::string>();
+ }
+
+ if (vm.count("nnpackage"))
+ {
+ _package_filename = vm["nnpackage"].as<std::string>();
+
+ if (!boost::filesystem::exists(_package_filename))
+ {
+ std::cerr << "nnpackage not found: " << _package_filename << "\n";
+ }
+ }
+ }
+ catch (const boost::program_options::required_option &e)
+ {
+ std::cerr << e.what() << std::endl;
+ return exit(-1);
+ }
+}
+
+} // namespace StyleTransferApp
diff --git a/runtime/contrib/style_transfer_app/src/args.h b/runtime/contrib/style_transfer_app/src/args.h
new file mode 100644
index 000000000..75cdc6fdd
--- /dev/null
+++ b/runtime/contrib/style_transfer_app/src/args.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __STYLE_TRANSFER_APP_ARGS_H__
+#define __STYLE_TRANSFER_APP_ARGS_H__
+
+#include <string>
+#include <boost/program_options.hpp>
+
+namespace po = boost::program_options;
+
+namespace StyleTransferApp
+{
+
+class Args
+{
+public:
+ Args(const int argc, char **argv) noexcept;
+ void print(void);
+
+ const std::string &getPackageFilename(void) const { return _package_filename; }
+ const std::string &getInputFilename(void) const { return _input_filename; }
+ const std::string &getOutputFilename(void) const { return _output_filename; }
+
+private:
+ void Initialize();
+ void Parse(const int argc, char **argv);
+
+private:
+ po::positional_options_description _positional;
+ po::options_description _options;
+
+ std::string _package_filename;
+ std::string _input_filename;
+ std::string _output_filename;
+};
+
+} // end of namespace StyleTransferApp
+
+#endif // __STYLE_TRANSFER_APP_ARGS_H__
diff --git a/runtime/contrib/style_transfer_app/src/bitmap_helper.cc b/runtime/contrib/style_transfer_app/src/bitmap_helper.cc
new file mode 100644
index 000000000..6211ea476
--- /dev/null
+++ b/runtime/contrib/style_transfer_app/src/bitmap_helper.cc
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cassert>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+
+#include <unistd.h> // NOLINT(build/include_order)
+
+#include "bitmap_helper.h"
+
+#define LOG(x) std::cerr
+
+namespace StyleTransferApp
+{
+
+unsigned char *BitmapHelper::createBitmapFileHeader(int height, int width, int paddingSize)
+{
+ int fileSize = fileHeaderSize + infoHeaderSize + (bytesPerPixel * width + paddingSize) * height;
+
+ static unsigned char fileHeader[] = {
+ 0, 0, /// signature
+ 0, 0, 0, 0, /// image file size in bytes
+ 0, 0, 0, 0, /// reserved
+ 0, 0, 0, 0, /// start of pixel array
+ };
+
+ fileHeader[0] = (unsigned char)('B');
+ fileHeader[1] = (unsigned char)('M');
+ fileHeader[2] = (unsigned char)(fileSize);
+ fileHeader[3] = (unsigned char)(fileSize >> 8);
+ fileHeader[4] = (unsigned char)(fileSize >> 16);
+ fileHeader[5] = (unsigned char)(fileSize >> 24);
+ fileHeader[10] = (unsigned char)(fileHeaderSize + infoHeaderSize);
+
+ return fileHeader;
+}
+
+unsigned char *BitmapHelper::createBitmapInfoHeader(int height, int width)
+{
+ static unsigned char infoHeader[] = {
+ 0, 0, 0, 0, /// header size
+ 0, 0, 0, 0, /// image width
+ 0, 0, 0, 0, /// image height
+ 0, 0, /// number of color planes
+ 0, 0, /// bits per pixel
+ 0, 0, 0, 0, /// compression
+ 0, 0, 0, 0, /// image size
+ 0, 0, 0, 0, /// horizontal resolution
+ 0, 0, 0, 0, /// vertical resolution
+ 0, 0, 0, 0, /// colors in color table
+ 0, 0, 0, 0, /// important color count
+ };
+
+ // Minus height means top to bottom write
+ height = -height;
+
+ infoHeader[0] = (unsigned char)(infoHeaderSize);
+ infoHeader[4] = (unsigned char)(width);
+ infoHeader[5] = (unsigned char)(width >> 8);
+ infoHeader[6] = (unsigned char)(width >> 16);
+ infoHeader[7] = (unsigned char)(width >> 24);
+ infoHeader[8] = (unsigned char)(height);
+ infoHeader[9] = (unsigned char)(height >> 8);
+ infoHeader[10] = (unsigned char)(height >> 16);
+ infoHeader[11] = (unsigned char)(height >> 24);
+ infoHeader[12] = (unsigned char)(1);
+ infoHeader[14] = (unsigned char)(bytesPerPixel * 8);
+
+ return infoHeader;
+}
+
+std::vector<uint8_t> BitmapHelper::decode_bmp(const uint8_t *input, int row_size, int width,
+ int height, int channels, bool top_down)
+{
+ std::vector<uint8_t> output(height * width * channels);
+ for (int i = 0; i < height; i++)
+ {
+ int src_pos;
+ int dst_pos;
+
+ for (int j = 0; j < width; j++)
+ {
+ if (!top_down)
+ {
+ src_pos = ((height - 1 - i) * row_size) + j * channels;
+ }
+ else
+ {
+ src_pos = i * row_size + j * channels;
+ }
+
+ dst_pos = (i * width + j) * channels;
+
+ switch (channels)
+ {
+ case 1:
+ output[dst_pos] = input[src_pos];
+ break;
+ case 3:
+ // BGR -> RGB
+ output[dst_pos] = input[src_pos + 2];
+ output[dst_pos + 1] = input[src_pos + 1];
+ output[dst_pos + 2] = input[src_pos];
+ break;
+ case 4:
+ // BGRA -> RGBA
+ output[dst_pos] = input[src_pos + 2];
+ output[dst_pos + 1] = input[src_pos + 1];
+ output[dst_pos + 2] = input[src_pos];
+ output[dst_pos + 3] = input[src_pos + 3];
+ break;
+ default:
+ LOG(FATAL) << "Unexpected number of channels: " << channels;
+ break;
+ }
+ }
+ }
+ return output;
+}
+
+int BitmapHelper::read_bmp(const std::string &input_bmp_name, std::vector<float> &input,
+ int model_width, int model_height)
+{
+ int begin, end;
+ int width, height, channels;
+
+ std::ifstream file(input_bmp_name, std::ios::in | std::ios::binary);
+ if (!file)
+ {
+ LOG(FATAL) << "Error opening " << input_bmp_name << "\n";
+ exit(-1);
+ }
+
+ begin = file.tellg();
+ file.seekg(0, std::ios::end);
+ end = file.tellg();
+ size_t len = end - begin;
+
+ std::vector<uint8_t> img_bytes(len);
+ file.seekg(0, std::ios::beg);
+ file.read(reinterpret_cast<char *>(img_bytes.data()), len);
+ const int32_t header_size = *(reinterpret_cast<const int32_t *>(img_bytes.data() + 10));
+ width = *(reinterpret_cast<const int32_t *>(img_bytes.data() + 18));
+ height = *(reinterpret_cast<const int32_t *>(img_bytes.data() + 22));
+ const int32_t bpp = *(reinterpret_cast<const int32_t *>(img_bytes.data() + 28));
+ channels = bpp / 8;
+
+ // TODO: Implement resize function
+ assert(model_width == width);
+ assert(model_height == height);
+
+ // there may be padding bytes when the width is not a multiple of 4 bytes
+ // 8 * channels == bits per pixel
+ const int row_size = (8 * channels * width + 31) / 32 * 4;
+
+ // if height is negative, data layout is top down
+ // otherwise, it's bottom up
+ bool top_down = (height < 0);
+
+ // Decode image, allocating tensor once the image size is known
+ const uint8_t *bmp_pixels = &img_bytes[header_size];
+ std::vector<uint8_t> bmp =
+ decode_bmp(bmp_pixels, row_size, width, abs(height), channels, top_down);
+ for (uint32_t j = 0; j < bmp.size(); j++)
+ {
+ input.push_back(static_cast<float>(bmp[j]));
+ }
+ return 0;
+}
+
+int BitmapHelper::write_bmp(const std::string &output_bmp_name, std::vector<float> &output,
+ int width, int height, int channels)
+{
+ std::ofstream file(output_bmp_name, std::ios::out | std::ios::binary);
+ if (!file)
+ {
+ LOG(FATAL) << "Error opening " << output_bmp_name << "\n";
+ exit(-1);
+ }
+
+ unsigned char padding[3] = {0, 0, 0};
+ int paddingSize = (4 - (width * channels) % 4) % 4;
+
+ const unsigned char *fileHeader = createBitmapFileHeader(height, width, paddingSize);
+ const unsigned char *infoHeader = createBitmapInfoHeader(height, width);
+
+ file.write((char *)fileHeader, fileHeaderSize);
+ file.write((char *)infoHeader, infoHeaderSize);
+
+ // RGB to BGR
+ for (int i = 0; i < output.size(); i += 3)
+ {
+ file << static_cast<unsigned char>(output[i + 2]);
+ file << static_cast<unsigned char>(output[i + 1]);
+ file << static_cast<unsigned char>(output[i]);
+ for (int j = 0; j < paddingSize; j++)
+ {
+ file << padding;
+ }
+ }
+ file.close();
+ return 0;
+}
+
+} // namespace StyleTransferApp
diff --git a/runtime/contrib/style_transfer_app/src/bitmap_helper.h b/runtime/contrib/style_transfer_app/src/bitmap_helper.h
new file mode 100644
index 000000000..f499fff62
--- /dev/null
+++ b/runtime/contrib/style_transfer_app/src/bitmap_helper.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef __STYLE_TRANSFER_APP_BITMAP_HELPER_H__
+#define __STYLE_TRANSFER_APP_BITMAP_HELPER_H__
+
+#include <vector>
+
+namespace StyleTransferApp
+{
+
+class BitmapHelper
+{
+public:
+ BitmapHelper(){/* DO NOTHING */};
+ int read_bmp(const std::string &input_bmp_name, std::vector<float> &input, int model_width,
+ int model_height);
+ int write_bmp(const std::string &output_bmp_name, std::vector<float> &output, int width,
+ int height, int channels);
+
+private:
+ unsigned char *createBitmapFileHeader(int height, int width, int paddingSize);
+ unsigned char *createBitmapInfoHeader(int height, int width);
+ std::vector<uint8_t> decode_bmp(const uint8_t *input, int row_size, int width, int height,
+ int channels, bool top_down);
+
+ const int fileHeaderSize = 14;
+ const int infoHeaderSize = 40;
+ const int bytesPerPixel = 3;
+};
+
+} // namespace StyleTransferApp
+
+#endif // __STYLE_TRANSFER_APP_BITMAP_HELPER_H__
diff --git a/runtime/contrib/style_transfer_app/src/jpeg_helper.cc b/runtime/contrib/style_transfer_app/src/jpeg_helper.cc
new file mode 100644
index 000000000..ed5ae25a1
--- /dev/null
+++ b/runtime/contrib/style_transfer_app/src/jpeg_helper.cc
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jpeg_helper.h"
+
+#include <cassert>
+#include <stdio.h>
+#include <jpeglib.h>
+#include <stdlib.h>
+#include <vector>
+
+namespace StyleTransferApp
+{
+
+JpegHelper::JpegHelper(int bytes_per_pixel, J_COLOR_SPACE color_space)
+ : _bytes_per_pixel(bytes_per_pixel), _color_space(color_space)
+{
+ // DO NOTHING
+}
+
+int JpegHelper::readJpeg(const std::string filename, std::vector<float> &raw_image, int width,
+ int height)
+{
+ struct jpeg_decompress_struct cinfo;
+ struct jpeg_error_mgr jerr;
+
+ FILE *infile = fopen(filename.c_str(), "rb");
+ unsigned long location = 0;
+ int i = 0;
+
+ if (!infile)
+ {
+ printf("Error opening jpeg file %s\n!", filename);
+ return -1;
+ }
+
+ cinfo.err = jpeg_std_error(&jerr);
+
+ jpeg_create_decompress(&cinfo);
+
+ jpeg_stdio_src(&cinfo, infile);
+
+ jpeg_read_header(&cinfo, TRUE);
+
+ jpeg_start_decompress(&cinfo);
+
+ // TODO: Implement resize function
+ assert(cinfo.output_width == width);
+ assert(cinfo.output_height == height);
+
+ raw_image.resize(cinfo.output_width * cinfo.output_height * cinfo.num_components);
+
+ unsigned char *ptr = new unsigned char[cinfo.output_width * cinfo.num_components];
+
+ while (cinfo.output_scanline < cinfo.image_height)
+ {
+ jpeg_read_scanlines(&cinfo, &ptr, 1);
+ for (i = 0; i < cinfo.image_width * cinfo.num_components; i++)
+ {
+ raw_image[location++] = static_cast<float>(ptr[i]);
+ }
+ }
+
+ jpeg_finish_decompress(&cinfo);
+ jpeg_destroy_decompress(&cinfo);
+ delete (ptr);
+ fclose(infile);
+
+ return 1;
+}
+
+int JpegHelper::writeJpeg(const std::string filename, std::vector<float> &raw_image, int width,
+ int height)
+{
+ struct jpeg_compress_struct cinfo;
+ struct jpeg_error_mgr jerr;
+ unsigned long location = 0;
+
+ FILE *outfile = fopen(filename.c_str(), "wb");
+
+ if (!outfile)
+ {
+ printf("Error opening output jpeg file %s\n!", filename);
+ return -1;
+ }
+ cinfo.err = jpeg_std_error(&jerr);
+ jpeg_create_compress(&cinfo);
+ jpeg_stdio_dest(&cinfo, outfile);
+
+ cinfo.image_width = width;
+ cinfo.image_height = height;
+ cinfo.input_components = _bytes_per_pixel;
+ cinfo.in_color_space = _color_space;
+
+ jpeg_set_defaults(&cinfo);
+
+ jpeg_start_compress(&cinfo, TRUE);
+
+ unsigned char *ptr = new unsigned char[cinfo.image_width * cinfo.input_components];
+
+ while (cinfo.next_scanline < cinfo.image_height)
+ {
+ for (int i = 0; i < cinfo.image_width * cinfo.input_components; i++)
+ {
+ ptr[i] = static_cast<unsigned char>(raw_image[location++]);
+ }
+ jpeg_write_scanlines(&cinfo, &ptr, 1);
+ }
+
+ jpeg_finish_compress(&cinfo);
+ fclose(outfile);
+ delete (ptr);
+
+ jpeg_destroy_compress(&cinfo);
+
+ return 1;
+}
+
+} // namespace StyleTransferApp
diff --git a/runtime/contrib/style_transfer_app/src/jpeg_helper.h b/runtime/contrib/style_transfer_app/src/jpeg_helper.h
new file mode 100644
index 000000000..e6d7845b5
--- /dev/null
+++ b/runtime/contrib/style_transfer_app/src/jpeg_helper.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __STYLE_TRANSFER_APP_JPEG_HELPER_H__
+#define __STYLE_TRANSFER_APP_JPEG_HELPER_H__
+
+#include <vector>
+#include <string>
+#include <jpeglib.h>
+
+namespace StyleTransferApp
+{
+
+class JpegHelper
+{
+public:
+ JpegHelper(){/* DO NOTHING */};
+ JpegHelper(int bytes_per_pixel, J_COLOR_SPACE color_space);
+
+ int readJpeg(const std::string filename, std::vector<float> &raw_image, int width, int height);
+ int writeJpeg(const std::string filename, std::vector<float> &raw_image, int width, int height);
+
+private:
+ int _bytes_per_pixel = 3; /* or 1 for GRACYSCALE images */
+ J_COLOR_SPACE _color_space = JCS_RGB; /* or JCS_GRAYSCALE for grayscale images */
+};
+
+} // namespace StyleTransferApp
+
+#endif // __STYLE_TRANSFER_APP_JPEG_HELPER_H__
diff --git a/runtime/contrib/style_transfer_app/src/style_transfer_app.cc b/runtime/contrib/style_transfer_app/src/style_transfer_app.cc
new file mode 100644
index 000000000..eed0c4288
--- /dev/null
+++ b/runtime/contrib/style_transfer_app/src/style_transfer_app.cc
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "args.h"
+#include "bitmap_helper.h"
+#include "nnfw.h"
+
+#ifdef NNFW_ST_APP_JPEG_SUPPORTED
+#include "jpeg_helper.h"
+#endif
+
+#include <cassert>
+#include <chrono>
+#include <cstdlib>
+#include <iostream>
+#include <stdexcept>
+#include <unordered_map>
+#include <math.h>
+
+#define NNPR_ENSURE_STATUS(a) \
+ do \
+ { \
+ if ((a) != NNFW_STATUS_NO_ERROR) \
+ { \
+ exit(-1); \
+ } \
+ } while (0)
+
+enum ImageFormat
+{
+ JPEG = 0,
+ BMP,
+ OTHERS
+};
+
+uint64_t NowMicros()
+{
+ auto time_point = std::chrono::high_resolution_clock::now();
+ auto since_epoch = time_point.time_since_epoch();
+ // default precision of high resolution clock is 10e-9 (nanoseconds)
+ return std::chrono::duration_cast<std::chrono::microseconds>(since_epoch).count();
+}
+
+uint64_t num_elems(const nnfw_tensorinfo *ti)
+{
+ uint64_t n = 1;
+ for (uint32_t i = 0; i < ti->rank; ++i)
+ {
+ assert(ti->dims[i] >= 0);
+ n *= ti->dims[i];
+ }
+ return n;
+}
+
+NNFW_STATUS resolve_op_backend(nnfw_session *session)
+{
+ static std::unordered_map<std::string, std::string> operation_map = {
+ {"TRANSPOSE_CONV", "OP_BACKEND_TransposeConv"}, {"CONV_2D", "OP_BACKEND_Conv2D"},
+ {"DEPTHWISE_CONV_2D", "OP_BACKEND_DepthwiseConv2D"}, {"MEAN", "OP_BACKEND_Mean"},
+ {"AVERAGE_POOL_2D", "OP_BACKEND_AvgPool2D"}, {"MAX_POOL_2D", "OP_BACKEND_MaxPool2D"},
+ {"INSTANCE_NORM", "OP_BACKEND_InstanceNorm"}, {"ADD", "OP_BACKEND_Add"}};
+
+ for (auto i : operation_map)
+ {
+ char *default_backend = std::getenv(i.second.c_str());
+ if (default_backend)
+ {
+ NNFW_STATUS return_result = nnfw_set_op_backend(session, i.first.c_str(), default_backend);
+ if (return_result == NNFW_STATUS_ERROR)
+ return return_result;
+ }
+ }
+
+ return NNFW_STATUS_NO_ERROR;
+}
+
+ImageFormat get_image_format(const std::string &FileName)
+{
+ std::string ext;
+ if (FileName.find_last_of(".") != std::string::npos)
+ ext = FileName.substr(FileName.find_last_of(".") + 1);
+
+ if (ext == "jpeg" || ext == "jpg")
+ return ImageFormat::JPEG;
+ else if (ext == "bmp")
+ return ImageFormat::BMP;
+ else
+ return ImageFormat::OTHERS;
+}
+
+static int vector_tanh(std::vector<float> &a)
+{
+ int size = a.size();
+
+#pragma omp parallel for
+ for (int i = 0; i < size; i++)
+ {
+ float temp = tanh(a[i]) * 150 + 127.5f;
+ a[i] = temp > 255 ? 255 : temp < 0 ? 0 : temp;
+ }
+
+ return 0;
+}
+
+int main(const int argc, char **argv)
+{
+ StyleTransferApp::Args args(argc, argv);
+ auto nnpackage_path = args.getPackageFilename();
+
+ nnfw_session *session = nullptr;
+ NNPR_ENSURE_STATUS(nnfw_create_session(&session));
+ char *available_backends = std::getenv("BACKENDS");
+ if (available_backends)
+ NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
+ NNPR_ENSURE_STATUS(resolve_op_backend(session));
+
+ NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, nnpackage_path.c_str()));
+
+ uint32_t num_inputs;
+ NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
+
+ // verify input and output
+
+ if (num_inputs == 0)
+ {
+ std::cerr << "[ ERROR ] "
+ << "No inputs in model => execution is not possible" << std::endl;
+ exit(1);
+ }
+
+ auto verifyInputTypes = [session]() {
+ uint32_t sz;
+ NNPR_ENSURE_STATUS(nnfw_input_size(session, &sz));
+ for (uint32_t i = 0; i < sz; ++i)
+ {
+ nnfw_tensorinfo ti;
+ NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
+ if (ti.dtype != NNFW_TYPE_TENSOR_FLOAT32)
+ {
+ std::cerr << "Only float 32bit is supported." << std::endl;
+ exit(-1);
+ }
+ }
+ };
+
+ auto verifyOutputTypes = [session]() {
+ uint32_t sz;
+ NNPR_ENSURE_STATUS(nnfw_output_size(session, &sz));
+
+ for (uint32_t i = 0; i < sz; ++i)
+ {
+ nnfw_tensorinfo ti;
+ NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
+ if (ti.dtype != NNFW_TYPE_TENSOR_FLOAT32)
+ {
+ std::cerr << "Only float 32bit is supported." << std::endl;
+ exit(-1);
+ }
+ }
+ };
+
+ verifyInputTypes();
+ verifyOutputTypes();
+
+ // prepare execution
+
+ uint64_t prepare_us = NowMicros();
+ NNPR_ENSURE_STATUS(nnfw_prepare(session));
+ prepare_us = NowMicros() - prepare_us;
+
+ // prepare input
+
+ std::vector<std::vector<float>> inputs(num_inputs);
+
+ auto loadInputs = [session, num_inputs, &inputs](const std::string &filename) {
+ for (uint32_t i = 0; i < num_inputs; ++i)
+ {
+ nnfw_tensorinfo ti;
+ NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
+
+ ImageFormat format = get_image_format(filename);
+ switch (format)
+ {
+ case ImageFormat::JPEG:
+ {
+#ifdef NNFW_ST_APP_JPEG_SUPPORTED
+ StyleTransferApp::JpegHelper jpeg_helper;
+ jpeg_helper.readJpeg(filename, inputs[i], ti.dims[2], ti.dims[1]);
+#else
+ std::cerr << "JPEG format not supported. Install libjpeg to read/write jpeg images."
+ << std::endl;
+ exit(-1);
+#endif
+ break;
+ }
+ case ImageFormat::BMP:
+ {
+ StyleTransferApp::BitmapHelper bitmap_helper;
+ bitmap_helper.read_bmp(filename, inputs[i], ti.dims[2], ti.dims[1]);
+ break;
+ }
+ default:
+ std::cerr << "Unsupported image format." << std::endl;
+ exit(-1);
+ break;
+ }
+
+ NNPR_ENSURE_STATUS(nnfw_set_input(session, i, NNFW_TYPE_TENSOR_FLOAT32, inputs[i].data(),
+ sizeof(float) * num_elems(&ti)));
+ NNPR_ENSURE_STATUS(nnfw_set_input_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST));
+ }
+ };
+
+ if (!args.getInputFilename().empty())
+ loadInputs(args.getInputFilename());
+ else
+ std::exit(-1);
+
+ // prepare output
+
+ uint32_t num_outputs = 0;
+ NNPR_ENSURE_STATUS(nnfw_output_size(session, &num_outputs));
+ std::vector<std::vector<float>> outputs(num_outputs);
+
+ for (uint32_t i = 0; i < num_outputs; i++)
+ {
+ nnfw_tensorinfo ti;
+ NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
+ auto output_num_elements = num_elems(&ti);
+ outputs[i].resize(output_num_elements);
+ NNPR_ENSURE_STATUS(nnfw_set_output(session, i, NNFW_TYPE_TENSOR_FLOAT32, outputs[i].data(),
+ sizeof(float) * output_num_elements));
+ NNPR_ENSURE_STATUS(nnfw_set_output_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST));
+ }
+
+ uint64_t run_us = NowMicros();
+ NNPR_ENSURE_STATUS(nnfw_run(session));
+ run_us = NowMicros() - run_us;
+
+ // dump output tensors
+
+ auto dumpOutputs = [session, num_outputs, &outputs](const std::string &filename) {
+ for (uint32_t i = 0; i < num_outputs; ++i)
+ {
+ nnfw_tensorinfo ti;
+ NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
+
+ vector_tanh(outputs[i]);
+
+ ImageFormat format = get_image_format(filename);
+ switch (format)
+ {
+ case ImageFormat::JPEG:
+ {
+#ifdef NNFW_ST_APP_JPEG_SUPPORTED
+ StyleTransferApp::JpegHelper jpeg_helper;
+ jpeg_helper.writeJpeg(filename, outputs[i], ti.dims[2], ti.dims[1]);
+#else
+ std::cerr << "JPEG format not supported. Install libjpeg to read/write jpeg images."
+ << std::endl;
+ exit(-1);
+#endif
+ break;
+ }
+ case ImageFormat::BMP:
+ {
+ StyleTransferApp::BitmapHelper bitmap_helper;
+ bitmap_helper.write_bmp(filename, outputs[i], ti.dims[2], ti.dims[1], ti.dims[3]);
+ break;
+ }
+ default:
+ std::cerr << "Unsupported image format." << std::endl;
+ exit(-1);
+ break;
+ }
+ }
+ };
+
+ if (!args.getOutputFilename().empty())
+ dumpOutputs(args.getOutputFilename());
+
+ std::cout << "nnfw_prepare takes " << prepare_us / 1e3 << " ms" << std::endl;
+ std::cout << "nnfw_run takes " << run_us / 1e3 << " ms" << std::endl;
+
+ NNPR_ENSURE_STATUS(nnfw_close_session(session));
+
+ return 0;
+}
diff --git a/runtime/contrib/tflite_classify/src/tflite_classify.cc b/runtime/contrib/tflite_classify/src/tflite_classify.cc
index 40c15f331..51758e2a6 100644
--- a/runtime/contrib/tflite_classify/src/tflite_classify.cc
+++ b/runtime/contrib/tflite_classify/src/tflite_classify.cc
@@ -38,18 +38,8 @@ int main(const int argc, char **argv)
const int FRAME_WIDTH = 640;
const int FRAME_HEIGHT = 480;
- bool use_nnapi = false;
- bool debug_mode = false;
-
- if (std::getenv("USE_NNAPI") != nullptr)
- {
- use_nnapi = true;
- }
-
- if (std::getenv("DEBUG_MODE") != nullptr)
- {
- debug_mode = true;
- }
+ const bool use_nnapi = nnfw::misc::EnvVar("USE_NNAPI").asBool(false);
+ const bool debug_mode = nnfw::misc::EnvVar("DEBUG_MODE").asBool(false);
std::cout << "USE_NNAPI : " << use_nnapi << std::endl;
std::cout << "DEBUG_MODE : " << debug_mode << std::endl;
diff --git a/runtime/contrib/tflite_test/tflite_test.cpp b/runtime/contrib/tflite_test/tflite_test.cpp
index 80ba448c6..9b6f07717 100644
--- a/runtime/contrib/tflite_test/tflite_test.cpp
+++ b/runtime/contrib/tflite_test/tflite_test.cpp
@@ -94,12 +94,7 @@ uint32_t count_elements(const TfLiteTensor *tensor)
int main(int argc, char **argv)
{
- bool use_nnapi = false;
-
- if (std::getenv("USE_NNAPI") != nullptr)
- {
- use_nnapi = true;
- }
+ const bool use_nnapi = nnfw::misc::EnvVar("USE_NNAPI").asBool(false);
if (argc < 3)
{
diff --git a/runtime/libs/benchmark/include/benchmark/CsvWriter.h b/runtime/libs/benchmark/include/benchmark/CsvWriter.h
index 3e141216b..5c259d7ed 100644
--- a/runtime/libs/benchmark/include/benchmark/CsvWriter.h
+++ b/runtime/libs/benchmark/include/benchmark/CsvWriter.h
@@ -54,6 +54,7 @@ private:
std::ofstream _ofs;
uint32_t _header_size;
uint32_t _col_idx;
+ uint32_t _row_idx;
};
} // namespace benchmark
diff --git a/runtime/libs/benchmark/include/benchmark/Phase.h b/runtime/libs/benchmark/include/benchmark/Phase.h
index bea9a87b2..ed8c8869e 100644
--- a/runtime/libs/benchmark/include/benchmark/Phase.h
+++ b/runtime/libs/benchmark/include/benchmark/Phase.h
@@ -48,4 +48,18 @@ inline std::string getPhaseString(Phase phase)
} // namespace benchmark
+namespace std
+{
+
+template <> struct hash<benchmark::Phase>
+{
+ size_t operator()(benchmark::Phase value) const noexcept
+ {
+ using type = typename std::underlying_type<benchmark::Phase>::type;
+ return hash<type>()(static_cast<type>(value));
+ }
+};
+
+} // namespace std
+
#endif // __NNFW_BENCHMARK_PHASE_H__
diff --git a/runtime/libs/benchmark/include/benchmark/Result.h b/runtime/libs/benchmark/include/benchmark/Result.h
index 570fa2114..2d86d95ec 100644
--- a/runtime/libs/benchmark/include/benchmark/Result.h
+++ b/runtime/libs/benchmark/include/benchmark/Result.h
@@ -39,7 +39,7 @@ uint32_t maxMemory(const std::unordered_map<benchmark::Phase, uint32_t> &map)
return answer.second;
}
-} // namespace anonymous
+} // namespace
namespace benchmark
{
diff --git a/runtime/libs/benchmark/include/benchmark/Util.h b/runtime/libs/benchmark/include/benchmark/Util.h
index b10360fa0..2e1f985b1 100644
--- a/runtime/libs/benchmark/include/benchmark/Util.h
+++ b/runtime/libs/benchmark/include/benchmark/Util.h
@@ -90,11 +90,10 @@ inline void writeResult(const Result &result, const std::string &exec, const std
bool done = writer.done();
- std::cout << "Writing to " << csv_filename << " is ";
- if (done)
- std::cout << "done" << std::endl;
- else
- std::cout << "failed" << std::endl;
+ if (!done)
+ {
+ std::cerr << "Writing to " << csv_filename << " is failed" << std::endl;
+ }
}
} // namespace benchmark
diff --git a/runtime/libs/benchmark/src/CsvWriter.cpp b/runtime/libs/benchmark/src/CsvWriter.cpp
index 9f2c5b09d..5f47c6511 100644
--- a/runtime/libs/benchmark/src/CsvWriter.cpp
+++ b/runtime/libs/benchmark/src/CsvWriter.cpp
@@ -24,7 +24,7 @@ const std::vector<std::string> csv_header{
#include "benchmark/CsvHeader.lst"
};
-} // namespace anonymous
+} // namespace
namespace benchmark
{
@@ -35,7 +35,7 @@ CsvWriter::CsvWriter(const std::string &csv_filename) : CsvWriter(csv_filename,
}
CsvWriter::CsvWriter(const std::string &csv_filename, const std::vector<std::string> &header)
- : _ofs(csv_filename), _header_size(header.size()), _col_idx(0)
+ : _ofs(csv_filename), _header_size(header.size()), _col_idx(0), _row_idx(0)
{
assert(csv_filename.empty() == false);
assert(header.size() != 0);
@@ -61,6 +61,7 @@ void CsvWriter::postWrite()
if (++_col_idx == _header_size)
{
_ofs << newline;
+ _row_idx += 1;
_col_idx = 0;
}
else
@@ -93,7 +94,7 @@ void CsvWriter::write(char val)
postWrite();
}
-bool CsvWriter::done() { return _col_idx == 0; }
+bool CsvWriter::done() { return (_col_idx == 0) && (_row_idx == 2); }
CsvWriter &operator<<(CsvWriter &csvw, const std::string &val)
{
diff --git a/runtime/libs/benchmark/src/MemoryPoller.cpp b/runtime/libs/benchmark/src/MemoryPoller.cpp
index 436d536e4..95fc34bb2 100644
--- a/runtime/libs/benchmark/src/MemoryPoller.cpp
+++ b/runtime/libs/benchmark/src/MemoryPoller.cpp
@@ -82,7 +82,7 @@ std::vector<std::string> getValueFromFileStatus(const std::string &file, const s
return val;
}
-} // namespace anonymous
+} // namespace
namespace benchmark
{
diff --git a/runtime/libs/cpp14/CMakeLists.txt b/runtime/libs/cpp14/CMakeLists.txt
deleted file mode 100644
index bba9e132d..000000000
--- a/runtime/libs/cpp14/CMakeLists.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-add_library(nnfw_lib_cpp14 INTERFACE)
-target_include_directories(nnfw_lib_cpp14 INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include)
diff --git a/runtime/libs/cpp14/include/cpp14/memory.h b/runtime/libs/cpp14/include/cpp14/memory.h
deleted file mode 100644
index 7070e1c99..000000000
--- a/runtime/libs/cpp14/include/cpp14/memory.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file memory.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains @c make_unique which is not supported by C++11
- * @details Implementation is based on http://isocpp.org/files/papers/N3656.txt
- */
-#ifndef __NNFW_CPP14_MEMORY_H__
-#define __NNFW_CPP14_MEMORY_H__
-
-#include <memory>
-
-namespace nnfw
-{
-namespace cpp14
-{
-
-template <typename T> struct _Unique_if
-{
- typedef std::unique_ptr<T> _Single_object;
-};
-
-template <typename T> struct _Unique_if<T[]>
-{
- typedef std::unique_ptr<T[]> _Unknown_bound;
-};
-
-template <typename T, size_t N> struct _Unique_if<T[N]>
-{
- typedef void _Known_bound;
-};
-
-template <typename T, typename... Args>
-typename _Unique_if<T>::_Single_object make_unique(Args &&... args)
-{
- return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
-}
-
-template <typename T> typename _Unique_if<T>::_Unknown_bound make_unique(size_t n)
-{
- typedef typename std::remove_extent<T>::type U;
- return std::unique_ptr<T>(new U[n]());
-}
-
-template <typename T, typename... Args>
-typename _Unique_if<T>::_Known_bound make_unique(Args &&...) = delete;
-
-} // namespace cpp14
-} // namespace nnfw
-
-#endif // __NNFW_CPP14_MEMORY_H__
diff --git a/runtime/libs/jsoncpp/.FORMATDENY b/runtime/libs/jsoncpp/.FORMATDENY
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/runtime/libs/jsoncpp/.FORMATDENY
diff --git a/runtime/libs/misc/CMakeLists.txt b/runtime/libs/misc/CMakeLists.txt
index 557d403ec..5efa300f8 100644
--- a/runtime/libs/misc/CMakeLists.txt
+++ b/runtime/libs/misc/CMakeLists.txt
@@ -7,5 +7,11 @@ set_target_properties(nnfw_lib_misc PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_link_libraries(nnfw_lib_misc PRIVATE nnfw_common)
target_link_libraries(nnfw_lib_misc PRIVATE nnfw_coverage)
+install(TARGETS nnfw_lib_misc ARCHIVE DESTINATION lib)
+install(DIRECTORY "include/misc"
+ DESTINATION "include/onert" # FIXME This is only for onert developers
+ FILES_MATCHING PATTERN "*.h"
+ )
+
add_executable(nnfw_tensor_index_iterator "examples/tensor_index_iterator.cpp")
target_link_libraries(nnfw_tensor_index_iterator nnfw_lib_misc)
diff --git a/runtime/libs/misc/examples/tensor_index_iterator.cpp b/runtime/libs/misc/examples/tensor_index_iterator.cpp
index d94da9f49..590b433df 100644
--- a/runtime/libs/misc/examples/tensor_index_iterator.cpp
+++ b/runtime/libs/misc/examples/tensor_index_iterator.cpp
@@ -31,8 +31,8 @@ void test_iterate(void)
array.fill(0);
- using nnfw::misc::tensor::iterate;
using nnfw::misc::tensor::Index;
+ using nnfw::misc::tensor::iterate;
iterate(shape) << [&](const Index &index) {
assert(index.rank() == shape.rank());
diff --git a/runtime/libs/misc/include/misc/EventRecorder.h b/runtime/libs/misc/include/misc/EventRecorder.h
index 1e621fdf8..35d4074bb 100644
--- a/runtime/libs/misc/include/misc/EventRecorder.h
+++ b/runtime/libs/misc/include/misc/EventRecorder.h
@@ -57,6 +57,7 @@ public:
void emit(const CounterEvent &evt);
public:
+ bool empty() { return _ss.str().empty(); }
void writeToFile(std::ostream &os);
private:
diff --git a/runtime/libs/misc/include/misc/benchmark.h b/runtime/libs/misc/include/misc/benchmark.h
index fe5b97585..aa487aca9 100644
--- a/runtime/libs/misc/include/misc/benchmark.h
+++ b/runtime/libs/misc/include/misc/benchmark.h
@@ -69,9 +69,9 @@ private:
template <typename T, typename Callable>
Accumulator<T> &operator<<(Accumulator<T> &&acc, Callable cb)
{
- auto begin = std::chrono::steady_clock::now();
+ auto begin = std::chrono::high_resolution_clock::now();
cb();
- auto end = std::chrono::steady_clock::now();
+ auto end = std::chrono::high_resolution_clock::now();
acc() += std::chrono::duration_cast<T>(end - begin);
diff --git a/runtime/libs/misc/include/misc/string_helpers.h b/runtime/libs/misc/include/misc/string_helpers.h
index e42a12754..6aac3a83b 100644
--- a/runtime/libs/misc/include/misc/string_helpers.h
+++ b/runtime/libs/misc/include/misc/string_helpers.h
@@ -36,7 +36,7 @@ template <typename Arg, typename... Args> void _str(std::ostream &os, Arg &&arg,
_str(os, std::forward<Args>(args)...);
}
-} // namespace {anonymous}
+} // namespace
namespace nnfw
{
diff --git a/runtime/libs/misc/src/tensor/Comparator.cpp b/runtime/libs/misc/src/tensor/Comparator.cpp
index e765e77b2..80a18c11a 100644
--- a/runtime/libs/misc/src/tensor/Comparator.cpp
+++ b/runtime/libs/misc/src/tensor/Comparator.cpp
@@ -1,3 +1,19 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the License);
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
#include "misc/tensor/Comparator.h"
#include "misc/tensor/Zipper.h"
diff --git a/runtime/libs/profiling/include/profiling/profiling.h b/runtime/libs/profiling/include/profiling/profiling.h
index ee0df1338..79ad060c5 100644
--- a/runtime/libs/profiling/include/profiling/profiling.h
+++ b/runtime/libs/profiling/include/profiling/profiling.h
@@ -25,7 +25,7 @@ namespace profiling
{
class Profiler; // forward declaration
}
-}
+} // namespace tflite
namespace profiling
{
diff --git a/runtime/libs/profiling/include/profiling/time.h b/runtime/libs/profiling/include/profiling/time.h
index 200563aa6..03d18ddc8 100644
--- a/runtime/libs/profiling/include/profiling/time.h
+++ b/runtime/libs/profiling/include/profiling/time.h
@@ -1,17 +1,19 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the License);
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
// NOTE To minimize diff with upstream tensorflow, disable clang-format
// clang-format off
diff --git a/runtime/libs/profiling/src/profiling/time.cpp b/runtime/libs/profiling/src/profiling/time.cpp
index 761023e6d..4e045556e 100644
--- a/runtime/libs/profiling/src/profiling/time.cpp
+++ b/runtime/libs/profiling/src/profiling/time.cpp
@@ -1,17 +1,19 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the License);
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
// NOTE To minimize diff with upstream tensorflow, disable clang-format
// clang-format off
diff --git a/runtime/libs/rua/core/include/rua/Service.h b/runtime/libs/rua/core/include/rua/Service.h
index a79524a8a..2129b7ac2 100644
--- a/runtime/libs/rua/core/include/rua/Service.h
+++ b/runtime/libs/rua/core/include/rua/Service.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/runtime/libs/rua/dyn/include/rua/DynamicBinder.h b/runtime/libs/rua/dyn/include/rua/DynamicBinder.h
index 8ce0c42f8..1e2d30665 100644
--- a/runtime/libs/rua/dyn/include/rua/DynamicBinder.h
+++ b/runtime/libs/rua/dyn/include/rua/DynamicBinder.h
@@ -30,6 +30,6 @@ struct DynamicBinder
static const rua::RuntimeService *get(void);
};
-} // namespace
+} // namespace rua
#endif // __NNFW_RUA_DYNAMIC_BINDER_H__
diff --git a/runtime/libs/rua/dyn/src/DynamicBinder.cpp b/runtime/libs/rua/dyn/src/DynamicBinder.cpp
index 68dae6262..fa3f0bb1e 100644
--- a/runtime/libs/rua/dyn/src/DynamicBinder.cpp
+++ b/runtime/libs/rua/dyn/src/DynamicBinder.cpp
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/runtime/libs/rua/shim/include/rua/Shim.h b/runtime/libs/rua/shim/include/rua/Shim.h
index 07a4bb2fd..755803e3b 100644
--- a/runtime/libs/rua/shim/include/rua/Shim.h
+++ b/runtime/libs/rua/shim/include/rua/Shim.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/runtime/libs/tflite/CMakeLists.txt b/runtime/libs/tflite/CMakeLists.txt
index b5a16bcd9..04ced8e0f 100644
--- a/runtime/libs/tflite/CMakeLists.txt
+++ b/runtime/libs/tflite/CMakeLists.txt
@@ -1,3 +1,9 @@
+nnfw_find_package(TensorFlowLite QUIET)
+if(NOT TensorFlowLite_FOUND)
+ message(STATUS "Check tensorflow lite library extension build: need tensorflow lite library")
+ return()
+endif(NOT TensorFlowLite_FOUND)
+
add_subdirectory(port)
file(GLOB_RECURSE SOURCES "src/*.cpp")
@@ -13,5 +19,9 @@ target_link_libraries(nnfw_lib_tflite PRIVATE ${LIB_PTHREAD} dl)
target_link_libraries(nnfw_lib_tflite PRIVATE nnfw_common)
target_link_libraries(nnfw_lib_tflite PRIVATE nnfw_coverage)
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
add_executable(nnfw_lib_tflite_test_TensorView src/TensorView.test.cpp)
target_link_libraries(nnfw_lib_tflite_test_TensorView nnfw_lib_tflite)
diff --git a/runtime/libs/tflite/include/tflite/Diff.h b/runtime/libs/tflite/include/tflite/Diff.h
index eca2fd502..38011b65d 100644
--- a/runtime/libs/tflite/include/tflite/Diff.h
+++ b/runtime/libs/tflite/include/tflite/Diff.h
@@ -139,6 +139,7 @@ private:
template <> uint8_t RandomGenerator::generate<uint8_t>(void);
template <> bool RandomGenerator::generate<bool>(void);
+template <> int32_t RandomGenerator::generate<int32_t>(void);
/**
* @brief Structure for NNAPI correctness test
diff --git a/runtime/libs/tflite/port/1.13.1/CMakeLists.txt b/runtime/libs/tflite/port/1.13.1/CMakeLists.txt
index 311e11cae..e3cf97569 100644
--- a/runtime/libs/tflite/port/1.13.1/CMakeLists.txt
+++ b/runtime/libs/tflite/port/1.13.1/CMakeLists.txt
@@ -2,8 +2,6 @@ if(NOT SUPPORT_TFLITE_VERSION VERSION_EQUAL 1.13.1)
return()
endif(NOT SUPPORT_TFLITE_VERSION VERSION_EQUAL 1.13.1)
-nnfw_find_package(TensorFlowLite REQUIRED)
-
file(GLOB_RECURSE SOURCES "src/*.cpp")
add_library(tensorflow-lite-ex STATIC ${SOURCES})
diff --git a/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/Abs.h b/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/Abs.h
deleted file mode 100644
index 697ba33e9..000000000
--- a/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/Abs.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_TFLITE_EXT_KERNELS_ABS_H__
-#define __NNFW_TFLITE_EXT_KERNELS_ABS_H__
-
-#include "tensorflow/lite/context.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-namespace custom
-{
-namespace Abs
-{
-
-void *InitAbs(TfLiteContext *context, const char *buffer, size_t length);
-void FreeAbs(TfLiteContext *context, void *buffer);
-TfLiteStatus PrepareAbs(TfLiteContext *context, TfLiteNode *node);
-TfLiteStatus EvalAbs(TfLiteContext *context, TfLiteNode *node);
-
-} // namespace Abs
-} // namespace custom
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_EXT_KERNELS_ABS_H__
diff --git a/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/CustomOps.h b/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/CustomOps.h
index 3370db778..c073ad58e 100644
--- a/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/CustomOps.h
+++ b/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/CustomOps.h
@@ -24,10 +24,7 @@
#define __NNFW_TFLITE_EXT_KERNELS_CUSTOM_OP_H__
#include "tensorflow/lite/context.h"
-#include "tflite/ext/kernels/TensorFlowMax.h"
#include "tflite/ext/kernels/SquaredDifference.h"
-#include "tflite/ext/kernels/TensorFlowSum.h"
-#include "tflite/ext/kernels/Abs.h"
namespace nnfw
{
@@ -48,10 +45,7 @@ namespace custom
return &r; \
}
-REGISTER_FUNCTION(TensorFlowMax)
REGISTER_FUNCTION(SquaredDifference)
-REGISTER_FUNCTION(TensorFlowSum)
-REGISTER_FUNCTION(Abs)
#undef REGISTER_FUNCTION
diff --git a/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/TensorFlowMax.h b/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/TensorFlowMax.h
deleted file mode 100644
index d573308ed..000000000
--- a/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/TensorFlowMax.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file TensorFlowMax.h
- * @brief This file contains TensorFlowMax namespace and TensorFlowMax function definitions
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_EXT_KERNELS_TENSORFLOW_MAX_H__
-#define __NNFW_TFLITE_EXT_KERNELS_TENSORFLOW_MAX_H__
-
-#include "tensorflow/lite/context.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-namespace custom
-{
-namespace TensorFlowMax
-{
-
-/**
- * @brief Initialize TensorFlowMax operand using the contents of buffer
- * @param[in] context The TfLite context
- * @param[in] buffer The buffer with contents
- * @param[in] length The buffer length
- * @return The void pointer for user data
- */
-void *InitTensorFlowMax(TfLiteContext *context, const char *buffer, size_t length);
-
-/**
- * @brief Release any memory it might have allocated via 'InitTensorFlowMax'
- * @param[in] context The TfLite context
- * @param[in] buffer The buffer with contents
- * @return N/A
- */
-void FreeTensorFlowMax(TfLiteContext *context, void *buffer);
-
-/**
- * @brief Prepare the TensorFlowMax operand for execution
- * @param[in] context The TfLite context
- * @param[in] node The operand node
- * @return The TfLite status
- */
-TfLiteStatus PrepareTensorFlowMax(TfLiteContext *context, TfLiteNode *node);
-
-/**
- * @brief Evaluation the TensorFlowMax operand for execution
- * @param[in] context The TfLite context
- * @param[in] node The operand node
- * @return The TfLite status
- */
-TfLiteStatus EvalTensorFlowMax(TfLiteContext *context, TfLiteNode *node);
-
-} // namespace TensorFlowMax
-} // namespace custom
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_EXT_KERNELS_TENSORFLOW_MAX_H__
diff --git a/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/TensorFlowSum.h b/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/TensorFlowSum.h
deleted file mode 100644
index 29455aac5..000000000
--- a/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/TensorFlowSum.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_TFLITE_EXT_KERNELS_TENSORFLOW_SUM_H__
-#define __NNFW_TFLITE_EXT_KERNELS_TENSORFLOW_SUM_H__
-
-#include "tensorflow/lite/context.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-namespace custom
-{
-namespace TensorFlowSum
-{
-
-void *InitTensorFlowSum(TfLiteContext *context, const char *buffer, size_t length);
-void FreeTensorFlowSum(TfLiteContext *context, void *buffer);
-TfLiteStatus PrepareTensorFlowSum(TfLiteContext *context, TfLiteNode *node);
-TfLiteStatus EvalTensorFlowSum(TfLiteContext *context, TfLiteNode *node);
-
-} // namespace TensorFlowSum
-} // namespace custom
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_EXT_KERNELS_TENSORFLOW_SUM_H__
diff --git a/runtime/libs/tflite/port/1.13.1/src/kernels/Abs.cpp b/runtime/libs/tflite/port/1.13.1/src/kernels/Abs.cpp
deleted file mode 100644
index 61181465d..000000000
--- a/runtime/libs/tflite/port/1.13.1/src/kernels/Abs.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/ext/kernels/Abs.h"
-#include "tensorflow/lite/kernels/kernel_util.h"
-
-#include <iostream>
-#include <cmath>
-
-namespace nnfw
-{
-namespace tflite
-{
-namespace custom
-{
-namespace Abs
-{
-
-void *InitAbs(TfLiteContext *, const char *, size_t) { return nullptr; }
-
-void FreeAbs(TfLiteContext *, void *) {}
-
-TfLiteStatus PrepareAbs(TfLiteContext *context, TfLiteNode *node)
-{
- TF_LITE_ENSURE_EQ(context, ::tflite::NumInputs(node), 1);
- TF_LITE_ENSURE_EQ(context, ::tflite::NumOutputs(node), 1);
-
- const TfLiteTensor *input = ::tflite::GetInput(context, node, 0);
- TfLiteTensor *output = ::tflite::GetOutput(context, node, 0);
-
- TF_LITE_ENSURE_EQ(context, input->type, output->type);
-
- return context->ResizeTensor(context, output, TfLiteIntArrayCopy(input->dims));
-}
-
-TfLiteStatus EvalAbs(TfLiteContext *context, TfLiteNode *node)
-{
- const TfLiteTensor *input = ::tflite::GetInput(context, node, 0);
- TfLiteTensor *output = ::tflite::GetOutput(context, node, 0);
- size_t elements = ::tflite::NumElements(input);
- switch (input->type)
- {
- case kTfLiteFloat32:
- {
- auto *in = input->data.f;
- auto *in_end = in + elements;
- auto *out = output->data.f;
- for (; in < in_end; in++, out++)
- *out = std::abs(*in);
- return kTfLiteOk;
- }
- case kTfLiteInt32:
- {
- auto *in = input->data.i32;
- auto *in_end = in + elements;
- auto *out = output->data.i32;
- for (; in < in_end; in++, out++)
- *out = std::abs(*in);
- return kTfLiteOk;
- }
- case kTfLiteInt64:
- {
- auto *in = input->data.i64;
- auto *in_end = in + elements;
- auto *out = output->data.i64;
- for (; in < in_end; in++, out++)
- *out = std::abs(*in);
- return kTfLiteOk;
- }
- case kTfLiteUInt8:
- {
- auto *in = input->data.uint8;
- auto *in_end = in + elements;
- auto *out = output->data.uint8;
- for (; in < in_end; in++, out++)
- *out = *in;
- return kTfLiteOk;
- }
- default:
- {
- context->ReportError(context, "Input type %d is not supported", input->type);
- return kTfLiteError;
- }
- }
-}
-
-} // namespace Abs
-} // namespace custom
-} // namespace tflite
-} // namespace nnfw
diff --git a/runtime/libs/tflite/port/1.13.1/src/kernels/TensorFlowMax.cpp b/runtime/libs/tflite/port/1.13.1/src/kernels/TensorFlowMax.cpp
deleted file mode 100644
index 207de98f5..000000000
--- a/runtime/libs/tflite/port/1.13.1/src/kernels/TensorFlowMax.cpp
+++ /dev/null
@@ -1,405 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/ext/kernels/TensorFlowMax.h"
-#include "tensorflow/lite/kernels/kernel_util.h"
-
-#include <iostream>
-
-namespace nnfw
-{
-namespace tflite
-{
-namespace custom
-{
-namespace TensorFlowMax
-{
-
-struct TensorFlowMaxOp
-{
- TensorFlowMaxOp(TfLiteContext *context, TfLiteNode *node)
- {
- input = ::tflite::GetInput(context, node, 0);
- axis = ::tflite::GetInput(context, node, 1);
- output = ::tflite::GetOutput(context, node, 0);
- }
- const TfLiteTensor *input;
- const TfLiteTensor *axis;
- TfLiteTensor *output;
-};
-
-void *InitTensorFlowMax(TfLiteContext *context, const char *, size_t)
-{
- // Creates two temp tensors to store index and axis for internal
- // implementation only.
- auto *scratch_tensor_index = new int;
- context->AddTensors(context, 2, scratch_tensor_index);
- return scratch_tensor_index;
-}
-
-void FreeTensorFlowMax(TfLiteContext *, void *buffer)
-{
- delete static_cast<TensorFlowMaxOp *>(buffer);
-}
-
-// Resizes the temp tensor that stores resolved axis.
-TfLiteStatus ResizeTempAxis(TfLiteContext *context, TensorFlowMaxOp *op_context,
- TfLiteTensor *resolved_axis)
-{
- TfLiteIntArray *axis_size = TfLiteIntArrayCreate(1);
- axis_size->data[0] = static_cast<int>(::tflite::NumElements(op_context->axis));
- return context->ResizeTensor(context, resolved_axis, axis_size);
-}
-
-// Resizes output array based on the input size and resolved axis.
-TfLiteStatus ResizeOutputTensor(TfLiteContext *context, TensorFlowMaxOp *op_context)
-{
- int64_t num_axis = ::tflite::NumElements(op_context->axis);
- TfLiteIntArray *input_dims = op_context->input->dims;
- int input_num_dims = ::tflite::NumDimensions(op_context->input);
- const int *axis = op_context->axis->data.i32;
-
- {
- // Calculates size of reducing axis.
- int64_t num_reduce_axis = num_axis;
- for (int64_t i = 0; i < num_axis; ++i)
- {
- int current = axis[i];
- if (current < 0)
- {
- current += input_num_dims;
- }
- TF_LITE_ENSURE(context, current >= 0 && current < input_num_dims);
- for (int64_t j = 0; j < i; ++j)
- {
- int previous = axis[j];
- if (previous < 0)
- {
- previous += input_num_dims;
- }
- if (current == previous)
- {
- --num_reduce_axis;
- break;
- }
- }
- }
- // Determines output dimensions.
- int output_num_dims = ::tflite::NumDimensions(op_context->output);
- TF_LITE_ENSURE(context, (input_num_dims == output_num_dims) ||
- (input_num_dims - num_reduce_axis == output_num_dims));
-
- if (input_num_dims == output_num_dims)
- {
- TfLiteIntArray *output_dims = TfLiteIntArrayCopy(input_dims);
- for (int64_t axis_idx = 0; axis_idx < num_axis; ++axis_idx)
- {
- int current = axis[axis_idx];
- output_dims->data[current] = 1;
- }
- return context->ResizeTensor(context, op_context->output, output_dims);
- }
- else
- {
- TfLiteIntArray *output_dims = TfLiteIntArrayCreate(output_num_dims);
- int num_skip_axis = 0;
- for (int idx = 0; idx < input_num_dims; ++idx)
- {
- bool is_axis = false;
- for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
- {
- if (axis[axis_idx] == idx || axis[axis_idx] + input_num_dims == idx)
- {
- ++num_skip_axis;
- is_axis = true;
- break;
- }
- }
- if (!is_axis)
- {
- output_dims->data[idx - num_skip_axis] = input_dims->data[idx];
- }
- }
- return context->ResizeTensor(context, op_context->output, output_dims);
- }
- }
-}
-
-// Initializes temp tensors to store index and resolved axis.
-TfLiteStatus InitializeTemporaries(TfLiteContext *context, TfLiteNode *node,
- TensorFlowMaxOp *op_context)
-{
- // Creates a temp index to iterate through input data.
- int *scratch_tensor_index = reinterpret_cast<int *>(node->user_data);
- TfLiteIntArrayFree(node->temporaries);
- node->temporaries = TfLiteIntArrayCreate(2);
- node->temporaries->data[0] = *scratch_tensor_index;
- TfLiteTensor *scratch_tensor = &context->tensors[node->temporaries->data[0]];
- scratch_tensor->type = kTfLiteInt32;
- scratch_tensor->allocation_type = kTfLiteArenaRw;
- TfLiteIntArray *index_size = TfLiteIntArrayCreate(1);
- index_size->data[0] = ::tflite::NumDimensions(op_context->input);
- TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scratch_tensor, index_size));
-
- // Creates a temp tensor to store resolved axis given input data.
- node->temporaries->data[1] = *scratch_tensor_index + 1;
- TfLiteTensor *resolved_axis = &context->tensors[node->temporaries->data[1]];
- resolved_axis->type = kTfLiteInt32;
- return kTfLiteOk;
-}
-
-TfLiteStatus PrepareTensorFlowMax(TfLiteContext *context, TfLiteNode *node)
-{
- TF_LITE_ENSURE_EQ(context, ::tflite::NumInputs(node), 2);
- TF_LITE_ENSURE_EQ(context, ::tflite::NumOutputs(node), 1);
-
- TensorFlowMaxOp op_context(context, node);
- TF_LITE_ENSURE_OK(context, InitializeTemporaries(context, node, &op_context));
-
- TfLiteTensor *resolved_axis = &context->tensors[node->temporaries->data[1]];
- // Leaves work to Eval if axis is not constant; else resizes output.
- if (!::tflite::IsConstantTensor(op_context.axis))
- {
- ::tflite::SetTensorToDynamic(op_context.output);
- ::tflite::SetTensorToDynamic(resolved_axis);
- return kTfLiteOk;
- }
- resolved_axis->allocation_type = kTfLiteArenaRw;
- TF_LITE_ENSURE_OK(context, ResizeTempAxis(context, &op_context, resolved_axis));
- return ResizeOutputTensor(context, &op_context);
-}
-
-// Gets offset of index if expanded on axis. When expanded, the flattened offset
-// will not change, if the output index changes on the given axis. For example,
-// if you have a 2D tensor and you are expanding to 3D on axis 0,
-// then index (0, 1, 2) and index (1, 1, 2) will map from the same flattened
-// offset.
-inline size_t ExpandedInputOffset(const int num_dims, const int *dims, const int *index,
- const int num_axis, const int *axis)
-{
- size_t offset = 0;
- int out_idx = 0;
- for (int in_idx = 0; in_idx < num_dims; ++in_idx)
- {
- // if we need to expand this axis
- bool is_axis = false;
- if (axis != nullptr)
- {
- for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
- {
- if (in_idx == axis[axis_idx])
- {
- is_axis = true;
- break;
- }
- }
- }
- if (!is_axis)
- {
- offset = offset * static_cast<size_t>(dims[in_idx]) + static_cast<size_t>(index[out_idx]);
- out_idx++;
- }
- else
- {
- offset = offset * static_cast<size_t>(dims[in_idx]);
- }
- }
- return offset;
-}
-
-// Gets offset of index if reducing on axis. When reducing, the flattened offset
-// will not change, if the input index changes on the given axis. For example,
-// if you have a 3D tensor and you are reducing to 2D by eliminating axis 0,
-// then index (0, 1, 2) and index (1, 1, 2) will map to the same flattened
-// offset.
-// TODO(kanlig): uses Dims to represent dimensions.
-inline size_t ReducedOutputOffset(const int num_dims, const int *dims, const int *index,
- const int num_axis, const int *axis)
-{
- size_t offset = 0;
- for (int idx = 0; idx < num_dims; ++idx)
- {
- // if we need to skip this axis
- bool is_axis = false;
- if (axis != nullptr)
- {
- for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
- {
- if (idx == axis[axis_idx])
- {
- is_axis = true;
- break;
- }
- }
- }
- if (!is_axis)
- {
- offset = offset * static_cast<size_t>(dims[idx]) + static_cast<size_t>(index[idx]);
- }
- }
- return offset;
-}
-
-// Gets next index to iterate through a multidimensional array.
-inline bool NextIndex(TfLiteContext *context, const int num_dims, const int *dims, int *current)
-{
- int carry = 1;
- for (int idx = num_dims - 1; idx >= 0; --idx)
- {
- int current_val = current[idx] + carry;
- TF_LITE_ENSURE(context, (dims[idx] >= current_val));
- if (dims[idx] == current_val)
- {
- current[idx] = 0;
- }
- else
- {
- current[idx] = current_val;
- carry = 0;
- break;
- }
- }
- return (carry == 0);
-}
-
-template <typename T>
-inline TfLiteStatus
-CustomMax(TfLiteContext *context, T *input_data, const int *input_dims, const int input_num_dims,
- T *output_data, const int *output_dims, const int output_num_dims, const int *axis,
- const int num_axis_dimensions, bool /*keep_dims*/, int *temp_index, int *resolved_axis)
-{
- // resolves axis.
- int num_resolved_axis = 0;
- for (int idx = 0; idx < num_axis_dimensions; ++idx)
- {
- int current = axis[idx];
- TF_LITE_ENSURE(context, (current < input_num_dims && current + input_num_dims >= 0));
- if (current < 0)
- {
- current += input_num_dims;
- }
- bool is_dup = false;
- for (int j = 0; j < num_resolved_axis; ++j)
- {
- if (resolved_axis[j] == current)
- {
- is_dup = true;
- break;
- }
- }
- if (!is_dup)
- {
- resolved_axis[num_resolved_axis++] = current;
- }
- }
-
- TF_LITE_ENSURE(context, (input_num_dims > 0));
- TF_LITE_ENSURE(context, (input_dims != nullptr));
- TF_LITE_ENSURE(context, (temp_index != nullptr));
-
- // resets output data.
- for (int idx = 0; idx < output_num_dims; ++idx)
- {
- temp_index[idx] = 0;
- }
- for (bool has_next = true; has_next;
- has_next = NextIndex(context, output_num_dims, output_dims, temp_index))
- {
- size_t output_offset =
- ReducedOutputOffset(output_num_dims, output_dims, temp_index, 0, nullptr);
- size_t input_offset = ExpandedInputOffset(input_num_dims, input_dims, temp_index,
- num_resolved_axis, resolved_axis);
- output_data[output_offset] = input_data[input_offset];
- }
-
- // resets temp index.
- for (int idx = 0; idx < input_num_dims; ++idx)
- {
- temp_index[idx] = 0;
- }
-
- // iterates through input_data.
- for (bool has_next = true; has_next;
- has_next = NextIndex(context, input_num_dims, input_dims, temp_index))
- {
- size_t input_offset = ReducedOutputOffset(input_num_dims, input_dims, temp_index, 0, nullptr);
- size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims, temp_index,
- num_resolved_axis, resolved_axis);
- if (output_data[output_offset] < input_data[input_offset])
- {
- output_data[output_offset] = input_data[input_offset];
- }
- }
-
- return kTfLiteOk;
-}
-
-TfLiteStatus EvalTensorFlowMax(TfLiteContext *context, TfLiteNode *node)
-{
-
- TensorFlowMaxOp op_context(context, node);
- int num_axis = static_cast<int>(::tflite::NumElements(op_context.axis));
- TfLiteTensor *temp_index = &context->tensors[node->temporaries->data[0]];
- TfLiteTensor *resolved_axis = &context->tensors[node->temporaries->data[1]];
- // Resize the output tensor if the output tensor is dynamic.
- if (::tflite::IsDynamicTensor(op_context.output))
- {
- TF_LITE_ENSURE_OK(context, ResizeTempAxis(context, &op_context, resolved_axis));
- TF_LITE_ENSURE_OK(context, ResizeOutputTensor(context, &op_context));
- }
-
- TfLiteStatus returnStatus = kTfLiteOk;
- switch (op_context.input->type)
- {
- case kTfLiteFloat32:
- returnStatus = CustomMax<float>(
- context, op_context.input->data.f, op_context.input->dims->data,
- op_context.input->dims->size, op_context.output->data.f, op_context.output->dims->data,
- op_context.output->dims->size, op_context.axis->data.i32, num_axis, false,
- temp_index->data.i32, resolved_axis->data.i32);
- break;
- case kTfLiteInt32:
- returnStatus = CustomMax<int>(context, op_context.input->data.i32,
- op_context.input->dims->data, op_context.input->dims->size,
- op_context.output->data.i32, op_context.output->dims->data,
- op_context.output->dims->size, op_context.axis->data.i32,
- num_axis, false, temp_index->data.i32, resolved_axis->data.i32);
- break;
- case kTfLiteUInt8:
- returnStatus = CustomMax<uint8_t>(
- context, op_context.input->data.uint8, op_context.input->dims->data,
- op_context.input->dims->size, op_context.output->data.uint8,
- op_context.output->dims->data, op_context.output->dims->size, op_context.axis->data.i32,
- num_axis, false, temp_index->data.i32, resolved_axis->data.i32);
- break;
- case kTfLiteInt64:
- returnStatus = CustomMax<int64_t>(
- context, op_context.input->data.i64, op_context.input->dims->data,
- op_context.input->dims->size, op_context.output->data.i64, op_context.output->dims->data,
- op_context.output->dims->size, op_context.axis->data.i32, num_axis, false,
- temp_index->data.i32, resolved_axis->data.i32);
- break;
- default:
- returnStatus = kTfLiteError;
- }
-
- return returnStatus;
-}
-
-} // namespace TensorFlowMax
-} // namespace custom
-} // namespace tflite
-} // namespace nnfw
diff --git a/runtime/libs/tflite/port/1.13.1/src/kernels/TensorFlowSum.cpp b/runtime/libs/tflite/port/1.13.1/src/kernels/TensorFlowSum.cpp
deleted file mode 100644
index 40f266baa..000000000
--- a/runtime/libs/tflite/port/1.13.1/src/kernels/TensorFlowSum.cpp
+++ /dev/null
@@ -1,400 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/ext/kernels/TensorFlowSum.h"
-#include "tensorflow/lite/kernels/kernel_util.h"
-
-#include <iostream>
-
-namespace nnfw
-{
-namespace tflite
-{
-namespace custom
-{
-namespace TensorFlowSum
-{
-
-struct TensorFlowSumOp
-{
- TensorFlowSumOp(TfLiteContext *context, TfLiteNode *node)
- {
- input = ::tflite::GetInput(context, node, 0);
- axis = ::tflite::GetInput(context, node, 1);
- output = ::tflite::GetOutput(context, node, 0);
- }
- const TfLiteTensor *input;
- const TfLiteTensor *axis;
- TfLiteTensor *output;
-};
-
-void *InitTensorFlowSum(TfLiteContext *context, const char *, size_t)
-{
- // Creates two temp tensors to store index and axis for internal
- // implementation only.
- auto *scratch_tensor_index = new int;
- context->AddTensors(context, 2, scratch_tensor_index);
- return scratch_tensor_index;
-}
-
-void FreeTensorFlowSum(TfLiteContext *, void *buffer)
-{
- delete static_cast<TensorFlowSumOp *>(buffer);
-}
-
-// Resizes the temp tensor that stores resolved axis.
-TfLiteStatus ResizeTempAxis(TfLiteContext *context, TensorFlowSumOp *op_context,
- TfLiteTensor *resolved_axis)
-{
- TfLiteIntArray *axis_size = TfLiteIntArrayCreate(1);
- axis_size->data[0] = static_cast<int>(::tflite::NumElements(op_context->axis));
- return context->ResizeTensor(context, resolved_axis, axis_size);
-}
-
-// Resizes output array based on the input size and resolved axis.
-TfLiteStatus ResizeOutputTensor(TfLiteContext *context, TensorFlowSumOp *op_context)
-{
- int64_t num_axis = ::tflite::NumElements(op_context->axis);
- TfLiteIntArray *input_dims = op_context->input->dims;
- int input_num_dims = ::tflite::NumDimensions(op_context->input);
- const int *axis = op_context->axis->data.i32;
-
- {
- // Calculates size of reducing axis.
- int64_t num_reduce_axis = num_axis;
- for (int64_t i = 0; i < num_axis; ++i)
- {
- int current = axis[i];
- if (current < 0)
- {
- current += input_num_dims;
- }
- TF_LITE_ENSURE(context, current >= 0 && current < input_num_dims);
- for (int64_t j = 0; j < i; ++j)
- {
- int previous = axis[j];
- if (previous < 0)
- {
- previous += input_num_dims;
- }
- if (current == previous)
- {
- --num_reduce_axis;
- break;
- }
- }
- }
- // Determines output dimensions.
- int output_num_dims = ::tflite::NumDimensions(op_context->output);
- TF_LITE_ENSURE(context, (input_num_dims == output_num_dims) ||
- (input_num_dims - num_reduce_axis == output_num_dims));
-
- if (input_num_dims == output_num_dims)
- {
- TfLiteIntArray *output_dims = TfLiteIntArrayCopy(input_dims);
- for (int64_t axis_idx = 0; axis_idx < num_axis; ++axis_idx)
- {
- int current = axis[axis_idx];
- output_dims->data[current] = 1;
- }
- return context->ResizeTensor(context, op_context->output, output_dims);
- }
- else
- {
- TfLiteIntArray *output_dims = TfLiteIntArrayCreate(output_num_dims);
- int num_skip_axis = 0;
- for (int idx = 0; idx < input_num_dims; ++idx)
- {
- bool is_axis = false;
- for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
- {
- if (axis[axis_idx] == idx || axis[axis_idx] + input_num_dims == idx)
- {
- ++num_skip_axis;
- is_axis = true;
- break;
- }
- }
- if (!is_axis)
- {
- output_dims->data[idx - num_skip_axis] = input_dims->data[idx];
- }
- }
- return context->ResizeTensor(context, op_context->output, output_dims);
- }
- }
-}
-
-// Initializes temp tensors to store index and resolved axis.
-TfLiteStatus InitializeTemporaries(TfLiteContext *context, TfLiteNode *node,
- TensorFlowSumOp *op_context)
-{
- // Creates a temp index to iterate through input data.
- int *scratch_tensor_index = reinterpret_cast<int *>(node->user_data);
- TfLiteIntArrayFree(node->temporaries);
- node->temporaries = TfLiteIntArrayCreate(2);
- node->temporaries->data[0] = *scratch_tensor_index;
- TfLiteTensor *scratch_tensor = &context->tensors[node->temporaries->data[0]];
- scratch_tensor->type = kTfLiteInt32;
- scratch_tensor->allocation_type = kTfLiteArenaRw;
- TfLiteIntArray *index_size = TfLiteIntArrayCreate(1);
- index_size->data[0] = ::tflite::NumDimensions(op_context->input);
- TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scratch_tensor, index_size));
-
- // Creates a temp tensor to store resolved axis given input data.
- node->temporaries->data[1] = *scratch_tensor_index + 1;
- TfLiteTensor *resolved_axis = &context->tensors[node->temporaries->data[1]];
- resolved_axis->type = kTfLiteInt32;
- return kTfLiteOk;
-}
-
-TfLiteStatus PrepareTensorFlowSum(TfLiteContext *context, TfLiteNode *node)
-{
- TF_LITE_ENSURE_EQ(context, ::tflite::NumInputs(node), 2);
- TF_LITE_ENSURE_EQ(context, ::tflite::NumOutputs(node), 1);
-
- TensorFlowSumOp op_context(context, node);
- TF_LITE_ENSURE_OK(context, InitializeTemporaries(context, node, &op_context));
-
- TfLiteTensor *resolved_axis = &context->tensors[node->temporaries->data[1]];
- // Leaves work to Eval if axis is not constant; else resizes output.
- if (!::tflite::IsConstantTensor(op_context.axis))
- {
- ::tflite::SetTensorToDynamic(op_context.output);
- ::tflite::SetTensorToDynamic(resolved_axis);
- return kTfLiteOk;
- }
- resolved_axis->allocation_type = kTfLiteArenaRw;
- TF_LITE_ENSURE_OK(context, ResizeTempAxis(context, &op_context, resolved_axis));
- return ResizeOutputTensor(context, &op_context);
-}
-
-// Gets offset of index if expanded on axis. When expanded, the flattened offset
-// will not change, if the output index changes on the given axis. For example,
-// if you have a 2D tensor and you are expanding to 3D on axis 0,
-// then index (0, 1, 2) and index (1, 1, 2) will map from the same flattened
-// offset.
-inline size_t ExpandedInputOffset(const int num_dims, const int *dims, const int *index,
- const int num_axis, const int *axis)
-{
- size_t offset = 0;
- int out_idx = 0;
- for (int in_idx = 0; in_idx < num_dims; ++in_idx)
- {
- // if we need to expand this axis
- bool is_axis = false;
- if (axis != nullptr)
- {
- for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
- {
- if (in_idx == axis[axis_idx])
- {
- is_axis = true;
- break;
- }
- }
- }
- if (!is_axis)
- {
- offset = offset * static_cast<size_t>(dims[in_idx]) + static_cast<size_t>(index[out_idx]);
- out_idx++;
- }
- else
- {
- offset = offset * static_cast<size_t>(dims[in_idx]);
- }
- }
- return offset;
-}
-
-// Gets offset of index if reducing on axis. When reducing, the flattened offset
-// will not change, if the input index changes on the given axis. For example,
-// if you have a 3D tensor and you are reducing to 2D by eliminating axis 0,
-// then index (0, 1, 2) and index (1, 1, 2) will map to the same flattened
-// offset.
-// TODO(kanlig): uses Dims to represent dimensions.
-inline size_t ReducedOutputOffset(const int num_dims, const int *dims, const int *index,
- const int num_axis, const int *axis)
-{
- size_t offset = 0;
- for (int idx = 0; idx < num_dims; ++idx)
- {
- // if we need to skip this axis
- bool is_axis = false;
- if (axis != nullptr)
- {
- for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
- {
- if (idx == axis[axis_idx])
- {
- is_axis = true;
- break;
- }
- }
- }
- if (!is_axis)
- {
- offset = offset * static_cast<size_t>(dims[idx]) + static_cast<size_t>(index[idx]);
- }
- }
- return offset;
-}
-
-// Gets next index to iterate through a multidimensional array.
-inline bool NextIndex(TfLiteContext *context, const int num_dims, const int *dims, int *current)
-{
- int carry = 1;
- for (int idx = num_dims - 1; idx >= 0; --idx)
- {
- int current_val = current[idx] + carry;
- TF_LITE_ENSURE(context, (dims[idx] >= current_val));
- if (dims[idx] == current_val)
- {
- current[idx] = 0;
- }
- else
- {
- current[idx] = current_val;
- carry = 0;
- break;
- }
- }
- return (carry == 0);
-}
-
-template <typename T>
-inline TfLiteStatus
-CustomSum(TfLiteContext *context, T *input_data, const int *input_dims, const int input_num_dims,
- T *output_data, const int *output_dims, const int output_num_dims, const int *axis,
- const int num_axis_dimensions, bool /*keep_dims*/, int *temp_index, int *resolved_axis)
-{
- // resolves axis.
- int num_resolved_axis = 0;
- for (int idx = 0; idx < num_axis_dimensions; ++idx)
- {
- int current = axis[idx];
- TF_LITE_ENSURE(context, (current < input_num_dims && current + input_num_dims >= 0));
- if (current < 0)
- {
- current += input_num_dims;
- }
- bool is_dup = false;
- for (int j = 0; j < num_resolved_axis; ++j)
- {
- if (resolved_axis[j] == current)
- {
- is_dup = true;
- break;
- }
- }
- if (!is_dup)
- {
- resolved_axis[num_resolved_axis++] = current;
- }
- }
-
- TF_LITE_ENSURE(context, (input_num_dims > 0));
- TF_LITE_ENSURE(context, (input_dims != nullptr));
- TF_LITE_ENSURE(context, (temp_index != nullptr));
-
- // resets output data.
- for (int idx = 0; idx < output_num_dims; ++idx)
- {
- temp_index[idx] = 0;
- }
- for (bool has_next = true; has_next;
- has_next = NextIndex(context, output_num_dims, output_dims, temp_index))
- {
- size_t output_offset =
- ReducedOutputOffset(output_num_dims, output_dims, temp_index, 0, nullptr);
- output_data[output_offset] = 0;
- }
-
- // resets temp index.
- for (int idx = 0; idx < input_num_dims; ++idx)
- {
- temp_index[idx] = 0;
- }
-
- // iterates through input_data.
- for (bool has_next = true; has_next;
- has_next = NextIndex(context, input_num_dims, input_dims, temp_index))
- {
- size_t input_offset = ReducedOutputOffset(input_num_dims, input_dims, temp_index, 0, nullptr);
- size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims, temp_index,
- num_resolved_axis, resolved_axis);
- output_data[output_offset] += input_data[input_offset];
- }
-
- return kTfLiteOk;
-}
-
-TfLiteStatus EvalTensorFlowSum(TfLiteContext *context, TfLiteNode *node)
-{
-
- TensorFlowSumOp op_context(context, node);
- int num_axis = static_cast<int>(::tflite::NumElements(op_context.axis));
- TfLiteTensor *temp_index = &context->tensors[node->temporaries->data[0]];
- TfLiteTensor *resolved_axis = &context->tensors[node->temporaries->data[1]];
- // Resize the output tensor if the output tensor is dynamic.
- if (::tflite::IsDynamicTensor(op_context.output))
- {
- TF_LITE_ENSURE_OK(context, ResizeTempAxis(context, &op_context, resolved_axis));
- TF_LITE_ENSURE_OK(context, ResizeOutputTensor(context, &op_context));
- }
-
- TfLiteStatus returnStatus = kTfLiteOk;
- switch (op_context.input->type)
- {
- case kTfLiteFloat32:
- returnStatus = CustomSum<float>(
- context, op_context.input->data.f, op_context.input->dims->data,
- op_context.input->dims->size, op_context.output->data.f, op_context.output->dims->data,
- op_context.output->dims->size, op_context.axis->data.i32, num_axis, false,
- temp_index->data.i32, resolved_axis->data.i32);
- break;
- case kTfLiteInt32:
- returnStatus = CustomSum<int>(context, op_context.input->data.i32,
- op_context.input->dims->data, op_context.input->dims->size,
- op_context.output->data.i32, op_context.output->dims->data,
- op_context.output->dims->size, op_context.axis->data.i32,
- num_axis, false, temp_index->data.i32, resolved_axis->data.i32);
- break;
- case kTfLiteUInt8:
- returnStatus = CustomSum<uint8_t>(
- context, op_context.input->data.uint8, op_context.input->dims->data,
- op_context.input->dims->size, op_context.output->data.uint8,
- op_context.output->dims->data, op_context.output->dims->size, op_context.axis->data.i32,
- num_axis, false, temp_index->data.i32, resolved_axis->data.i32);
- break;
- case kTfLiteInt64:
- returnStatus = CustomSum<int64_t>(
- context, op_context.input->data.i64, op_context.input->dims->data,
- op_context.input->dims->size, op_context.output->data.i64, op_context.output->dims->data,
- op_context.output->dims->size, op_context.axis->data.i32, num_axis, false,
- temp_index->data.i32, resolved_axis->data.i32);
- break;
- default:
- returnStatus = kTfLiteError;
- }
-
- return returnStatus;
-}
-
-} // namespace TensorFlowSum
-} // namespace custom
-} // namespace tflite
-} // namespace nnfw
diff --git a/runtime/libs/tflite/port/1.13.1/src/kernels/register.cpp b/runtime/libs/tflite/port/1.13.1/src/kernels/register.cpp
index b2088b277..89f81b612 100644
--- a/runtime/libs/tflite/port/1.13.1/src/kernels/register.cpp
+++ b/runtime/libs/tflite/port/1.13.1/src/kernels/register.cpp
@@ -296,10 +296,7 @@ BuiltinOpResolver::BuiltinOpResolver() {
AddBuiltin(BuiltinOperator_FILL, Register_FILL());
AddBuiltin(BuiltinOperator_MIRROR_PAD, Register_MIRROR_PAD());
- AddCustom("TensorFlowMax", nnfw::tflite::custom::Register_TensorFlowMax());
AddCustom("SquaredDifference", nnfw::tflite::custom::Register_SquaredDifference());
- AddCustom("TensorFlowSum", nnfw::tflite::custom::Register_TensorFlowSum());
- AddCustom("Abs", nnfw::tflite::custom::Register_Abs());
// TODO(andrewharp, ahentz): Move these somewhere more appropriate so that
// custom ops aren't always included by default.
diff --git a/runtime/libs/tflite/port/1.13.1/src/nnapi_delegate.cpp b/runtime/libs/tflite/port/1.13.1/src/nnapi_delegate.cpp
index 99272f0e5..2924c44e9 100644
--- a/runtime/libs/tflite/port/1.13.1/src/nnapi_delegate.cpp
+++ b/runtime/libs/tflite/port/1.13.1/src/nnapi_delegate.cpp
@@ -35,6 +35,8 @@ limitations under the License.
#include <sys/system_properties.h>
#endif
+#include <memory>
+
namespace nnfw {
namespace tflite {
@@ -159,6 +161,9 @@ TfLiteStatus addTensorOperands(::tflite::Subgraph* subgraph,
uint32_t* no_of_operands_added,
std::vector<int64_t>* nnapi_ids) {
uint32_t next_id = 0;
+ // Allocate temporary buffer to save casted boolean tensor
+ std::unordered_map<size_t, std::unique_ptr<uint8_t[]>> const_boolean_tensors;
+
for (size_t i = 0; i < subgraph->tensors_size(); i++) {
// Skip temporaries and RNN back-edges.
if ((*nnapi_ids)[i] == kOperandNotNeeded) continue;
@@ -196,9 +201,7 @@ TfLiteStatus addTensorOperands(::tflite::Subgraph* subgraph,
case kTfLiteBool:
// Workaround to pass bool type under NNAPI
// Use bool type using ANEURALNETWORKS_TENSOR_QUANT8_ASYMM with scale = 1.0f and zero_point = 0
- nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
- scale = 1.0f;
- zeroPoint = 0;
+ nn_type = ANEURALNETWORKS_TENSOR_BOOL8;
break;
default:
logError("Unsupported tensor type %d", tensor->type);
@@ -243,7 +246,19 @@ TfLiteStatus addTensorOperands(::tflite::Subgraph* subgraph,
// TODO(aselle): Based on Michael's suggestion, limiting this to read
// only memory
if (tensor->allocation_type == kTfLiteMmapRo) {
- if (const NNAPIAllocation* alloc = dynamic_cast<const NNAPIAllocation*>(
+ if (tensor->type == kTfLiteBool)
+ {
+ // ANEURALNETWORKS_TENSOR_BOOL8 tensor element size is 8 bits
+ size_t elements = tensor->bytes / sizeof(bool);
+ const_boolean_tensors[i] = std::make_unique<uint8_t[]>(elements);
+ for (size_t idx = 0; idx < elements; idx++)
+ {
+ const_boolean_tensors[i].get()[idx] = (tensor->data.b[idx] ? 0x00 : 0xff);
+ }
+ RETURN_ERROR_IF_NN_FAILED(ANeuralNetworksModel_setOperandValue(
+ nn_model, next_id, const_boolean_tensors[i].get(), tensor->bytes));
+ }
+ else if (const NNAPIAllocation* alloc = dynamic_cast<const NNAPIAllocation*>(
static_cast<const ::tflite::Allocation*>(tensor->allocation))) {
RETURN_ERROR_IF_NN_FAILED(
ANeuralNetworksModel_setOperandValueFromMemory(
@@ -703,19 +718,32 @@ TfLiteStatus AddOpsAndParams(
nnapi_version = 12; // require NNAPI 1.2
nn_op_type = ANEURALNETWORKS_TOPK_V2;
break;
+ case tflite::BuiltinOperator_GREATER:
+ nnapi_version = 12; // require NNAPI 1.2
+ nn_op_type = ANEURALNETWORKS_GREATER;
+ break;
+ case tflite::BuiltinOperator_GREATER_EQUAL:
+ nnapi_version = 12; // require NNAPI 1.2
+ nn_op_type = ANEURALNETWORKS_GREATER_EQUAL;
+ break;
+ case tflite::BuiltinOperator_LESS:
+ nnapi_version = 12; // require NNAPI 1.2
+ nn_op_type = ANEURALNETWORKS_LESS;
+ break;
+ case tflite::BuiltinOperator_LESS_EQUAL:
+ nnapi_version = 12; // require NNAPI 1.2
+ nn_op_type = ANEURALNETWORKS_LESS_EQUAL;
+ break;
case tflite::BuiltinOperator_GATHER:
nnapi_version = 12; // require NNAPI 1.2
nn_op_type = ANEURALNETWORKS_GATHER;
add_gather_params(node.builtin_data);
break;
case tflite::BuiltinOperator_SPLIT:
+ nnapi_version = 12; // require NNAPI 1.2
+ nn_op_type = ANEURALNETWORKS_SPLIT;
add_split_params(node.builtin_data);
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_SPLIT_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
+ break;
case tflite::BuiltinOperator_NEG:
nnapi_version = 12; // require NNAPI 1.2
nn_op_type = ANEURALNETWORKS_NEG;
@@ -733,21 +761,14 @@ TfLiteStatus AddOpsAndParams(
reinterpret_cast<uint32_t*>(node.outputs->data)));
continue;
case tflite::BuiltinOperator_PRELU:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_PRELU_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(),
- static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
+ nnapi_version = 12; // require NNAPI 1.2
+ nn_op_type = ANEURALNETWORKS_PRELU;
+ break;
case tflite::BuiltinOperator_ARG_MAX:
check_arg_max_input(node.builtin_data);
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_ARGMAX_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
+ nnapi_version = 12; // require NNAPI 1.2
+ nn_op_type = ANEURALNETWORKS_ARGMAX;
+ break;
case tflite::BuiltinOperator_PACK:
add_pack_ex_params(node.builtin_data);
CHECK_NN(ANeuralNetworksModel_addOperationEx(
@@ -773,66 +794,40 @@ TfLiteStatus AddOpsAndParams(
nn_op_type = ANEURALNETWORKS_RSQRT;
break;
case tflite::BuiltinOperator_EQUAL:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_EQUAL_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
+ nnapi_version = 12; // require NNAPI 1.2
+ nn_op_type = ANEURALNETWORKS_EQUAL;
+ break;
case tflite::BuiltinOperator_NOT_EQUAL:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_NOT_EQUAL_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
+ nnapi_version = 12; // require NNAPI 1.2
+ nn_op_type = ANEURALNETWORKS_NOT_EQUAL;
+ break;
case tflite::BuiltinOperator_SUM:
+ nnapi_version = 12; // require NNAPI 1.2
+ nn_op_type = ANEURALNETWORKS_REDUCE_SUM;
add_reducer_params(node.builtin_data);
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_REDUCE_SUM_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(),
- static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
+ break;
case tflite::BuiltinOperator_REDUCE_MAX:
- add_reducer_v12_params(node.builtin_data);
+ add_reducer_params(node.builtin_data);
nnapi_version = 12; // require NNAPI 1.2
nn_op_type = ANEURALNETWORKS_REDUCE_MAX;
break;
case tflite::BuiltinOperator_REDUCE_MIN:
+ nnapi_version = 12; // require NNAPI 1.2
+ nn_op_type = ANEURALNETWORKS_REDUCE_MIN;
add_reducer_params(node.builtin_data);
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_REDUCE_MIN_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(),
- static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
+ break;
case tflite::BuiltinOperator_LOGICAL_AND:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_LOGICAL_AND_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(),
- static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
+ nnapi_version = 12; // require NNAPI 1.2
+ nn_op_type = ANEURALNETWORKS_LOGICAL_AND;
+ break;
case tflite::BuiltinOperator_LOGICAL_OR:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_LOGICAL_OR_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(),
- static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
+ nnapi_version = 12; // require NNAPI 1.2
+ nn_op_type = ANEURALNETWORKS_LOGICAL_OR;
+ break;
case tflite::BuiltinOperator_LOGICAL_NOT:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_LOGICAL_NOT_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(),
- static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
+ nnapi_version = 12; // require NNAPI 1.2
+ nn_op_type = ANEURALNETWORKS_LOGICAL_NOT;
+ break;
case tflite::BuiltinOperator_SQUARED_DIFFERENCE:
CHECK_NN(ANeuralNetworksModel_addOperationEx(
nn_model, ANEURALNETWORKS_SQUARED_DIFFERENCE_EX,
@@ -851,6 +846,26 @@ TfLiteStatus AddOpsAndParams(
nnapi_version = 12; // require NNAPI 1.2
nn_op_type = ANEURALNETWORKS_ABS;
break;
+ case tflite::BuiltinOperator_ONE_HOT:
+ add_one_hot_tensor_inputs_as_scalar();
+ add_one_hot_params(node.builtin_data);
+ CHECK_NN(ANeuralNetworksModel_addOperationEx(
+ nn_model, ANEURALNETWORKS_ONE_HOT_EX,
+ static_cast<uint32_t>(augmented_inputs.size()),
+ augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
+ reinterpret_cast<uint32_t*>(node.outputs->data)));
+ continue; // _EX operator should use `continue` to skip addOperanation.
+ case tflite::BuiltinOperator_SIN:
+ nnapi_version = 12; // require NNAPI 1.2
+ nn_op_type = ANEURALNETWORKS_SIN;
+ break;
+ case tflite::BuiltinOperator_SHAPE:
+ CHECK_NN(ANeuralNetworksModel_addOperationEx(
+ nn_model, ANEURALNETWORKS_SHAPE_EX,
+ static_cast<uint32_t>(augmented_inputs.size()),
+ augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
+ reinterpret_cast<uint32_t*>(node.outputs->data)));
+ continue; // _EX operator should use `continue` to skip addOperanation.
case tflite::BuiltinOperator_CONCAT_EMBEDDINGS:
case tflite::BuiltinOperator_LSH_PROJECTION:
case tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN:
@@ -881,14 +896,14 @@ TfLiteStatus AddOpsAndParams(
//case tflite::BuiltinOperator_MINIMUM:
//case tflite::BuiltinOperator_ARG_MAX:
case tflite::BuiltinOperator_ARG_MIN:
- case tflite::BuiltinOperator_GREATER:
- case tflite::BuiltinOperator_GREATER_EQUAL:
- case tflite::BuiltinOperator_LESS:
- case tflite::BuiltinOperator_LESS_EQUAL:
+ //case tflite::BuiltinOperator_GREATER:
+ //case tflite::BuiltinOperator_GREATER_EQUAL:
+ //case tflite::BuiltinOperator_LESS:
+ //case tflite::BuiltinOperator_LESS_EQUAL:
//case tflite::BuiltinOperator_NEG:
case tflite::BuiltinOperator_SELECT:
// case tflite::BuiltinOperator_SLICE:
- case tflite::BuiltinOperator_SIN:
+ //case tflite::BuiltinOperator_SIN:
case tflite::BuiltinOperator_LOG:
//case tflite::BuiltinOperator_TRANSPOSE_CONV:
case tflite::BuiltinOperator_TILE:
@@ -902,12 +917,12 @@ TfLiteStatus AddOpsAndParams(
case tflite::BuiltinOperator_REDUCE_PROD:
//case tflite::BuiltinOperator_SQRT:
//case tflite::BuiltinOperator_RSQRT:
- case tflite::BuiltinOperator_SHAPE:
+ //case tflite::BuiltinOperator_SHAPE:
case tflite::BuiltinOperator_POW:
case tflite::BuiltinOperator_FAKE_QUANT:
//case tflite::BuiltinOperator_PACK:
//case tflite::BuiltinOperator_LOGICAL_OR:
- case tflite::BuiltinOperator_ONE_HOT:
+ //case tflite::BuiltinOperator_ONE_HOT:
//case tflite::BuiltinOperator_LOGICAL_AND:
//case tflite::BuiltinOperator_LOGICAL_NOT:
//case tflite::BuiltinOperator_UNPACK:
@@ -928,13 +943,7 @@ TfLiteStatus AddOpsAndParams(
break;
case tflite::BuiltinOperator_CUSTOM: {
std::string custom_name(registration.custom_name);
- if (custom_name.compare("TensorFlowMax") == 0) {
- add_reducer_v12_params(node.builtin_data);
- nnapi_version = 12; // require NNAPI 1.2
- nn_op_type = ANEURALNETWORKS_REDUCE_MAX;
- break;
- }
- else if (custom_name.compare("SquaredDifference") == 0) {
+ if (custom_name.compare("SquaredDifference") == 0) {
CHECK_NN(ANeuralNetworksModel_addOperationEx(
nn_model, ANEURALNETWORKS_SQUARED_DIFFERENCE_EX,
static_cast<uint32_t>(augmented_inputs.size()),
@@ -943,21 +952,6 @@ TfLiteStatus AddOpsAndParams(
reinterpret_cast<uint32_t*>(node.outputs->data)));
continue;
}
- else if (custom_name.compare("TensorFlowSum") == 0) {
- add_reducer_params(node.builtin_data);
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_REDUCE_SUM_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(),
- static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- }
- else if (custom_name.compare("Abs") == 0) {
- nnapi_version = 12; // require NNAPI 1.2
- nn_op_type = ANEURALNETWORKS_ABS;
- break;
- }
logError("Custom operations are not supported when using NNAPI.");
return kTfLiteError;
break;
@@ -1110,6 +1104,7 @@ TfLiteStatus NNAPIDelegate::Invoke(::tflite::Subgraph* subgraph) {
// TODO(aselle): This should be called setInputValue maybe to be cons.
TfLiteTensor* tensor = subgraph->tensor(input);
// Workaround to pass bool type under NNAPI
+ // ANEURALNETWORKS_TENSOR_BOOL8 tensor element size is 8 bits
if (tensor->type == kTfLiteBool)
{
CHECK_NN(ANeuralNetworksExecution_setInput(
@@ -1128,6 +1123,7 @@ TfLiteStatus NNAPIDelegate::Invoke(::tflite::Subgraph* subgraph) {
TfLiteTensor* tensor = subgraph->tensor(output);
// Workaround to pass bool type under NNAPI
+ // ANEURALNETWORKS_TENSOR_BOOL8 tensor element size is 8 bits
if (tensor->type == kTfLiteBool)
{
CHECK_NN(ANeuralNetworksExecution_setOutput(
diff --git a/runtime/libs/tflite/port/1.13.1/src/nnapi_delegate_ex_AddOpsAndParams_lambda.inc b/runtime/libs/tflite/port/1.13.1/src/nnapi_delegate_ex_AddOpsAndParams_lambda.inc
index 5b718029b..ee758105f 100644
--- a/runtime/libs/tflite/port/1.13.1/src/nnapi_delegate_ex_AddOpsAndParams_lambda.inc
+++ b/runtime/libs/tflite/port/1.13.1/src/nnapi_delegate_ex_AddOpsAndParams_lambda.inc
@@ -135,7 +135,7 @@
assert(count == 1);
};
- auto add_reducer_v12_params = [&add_scalar_bool8](void* data) {
+ auto add_reducer_params = [&add_scalar_bool8](void* data) {
auto builtin = reinterpret_cast<TfLiteReducerParams*>(data);
if (builtin == nullptr)
{
@@ -147,14 +147,24 @@
}
};
- auto add_reducer_params = [&add_scalar_int32](void* data) {
- auto builtin = reinterpret_cast<TfLiteReducerParams*>(data);
- if (builtin == nullptr)
- {
- add_scalar_int32(0);
- }
- else
- {
- add_scalar_int32(builtin->keep_dims);
- }
+ auto add_one_hot_tensor_inputs_as_scalar = [subgraph, &node, &augmented_inputs,
+ &add_scalar_float32]() {
+ assert(augmented_inputs.size() == 4);
+ const auto on_value_idx = node.inputs->data[2];
+ const auto off_value_idx = node.inputs->data[3];
+ const auto on_value_tensor = subgraph->tensor(on_value_idx);
+ const auto off_value_tensor = subgraph->tensor(off_value_idx);
+ assert(on_value_tensor->type == kTfLiteFloat32);
+ assert(off_value_tensor->type == kTfLiteFloat32);
+ const auto on_value = *on_value_tensor->data.f;
+ const auto off_value = *off_value_tensor->data.f;
+ augmented_inputs.pop_back();
+ augmented_inputs.pop_back();
+ add_scalar_float32(on_value);
+ add_scalar_float32(off_value);
+ };
+
+ auto add_one_hot_params = [&add_scalar_int32](void* data) {
+ const auto* builtin = reinterpret_cast<TfLiteOneHotParams*>(data);
+ add_scalar_int32(builtin->axis);
};
diff --git a/runtime/libs/tflite/src/Diff.cpp b/runtime/libs/tflite/src/Diff.cpp
index 879de0735..9e66bbb5d 100644
--- a/runtime/libs/tflite/src/Diff.cpp
+++ b/runtime/libs/tflite/src/Diff.cpp
@@ -86,8 +86,8 @@ bool TfLiteInterpMatchApp::compareSingleTensorView(const nnfw::tflite::TensorVie
std::vector<nnfw::misc::tensor::Diff<T>> diffs;
assert(expected.shape() == obtained.shape());
- using nnfw::misc::tensor::zip;
using nnfw::misc::tensor::Index;
+ using nnfw::misc::tensor::zip;
zip(expected.shape(), expected, obtained)
<< [&](const Index &index, T expected_value, T obtained_value) {
@@ -296,6 +296,18 @@ template <> bool RandomGenerator::generate<bool>(void)
return dist(_rand);
}
+template <> int32_t RandomGenerator::generate<int32_t>(void)
+{
+ // Instead of INT_MAX, 4096 is chosen because int32_t input does not mean
+ // that the model can have any value in int32_t can hold.
+ // For example, one_hot operation gets indices as int32_t tensor.
+ // However, we usually expect it would hold a value in [0..depth).
+ // In our given model, depth was 10137.
+ const int int32_random_max = 4096;
+ std::uniform_int_distribution<> dist(0, int32_random_max);
+ return dist(_rand);
+}
+
#include "tflite/TensorLogger.h"
//
// Random Test Runner
@@ -615,7 +627,8 @@ RandomTestRunner RandomTestRunner::make(uint32_t seed)
param.verbose = nnfw::misc::EnvVar("VERBOSE").asInt(0);
param.tolerance = nnfw::misc::EnvVar("TOLERANCE").asInt(1);
- ;
+ param.tensor_logging = nnfw::misc::EnvVar("TENSOR_LOGGING").asBool(false);
+ param.log_path = nnfw::misc::EnvVar("TENSOR_LOGGING").asString("tensor_log.txt");
return RandomTestRunner{seed, param};
}
diff --git a/runtime/libs/tflite/src/TensorShapeUtils.cpp b/runtime/libs/tflite/src/TensorShapeUtils.cpp
index 29628cd26..689b6151b 100644
--- a/runtime/libs/tflite/src/TensorShapeUtils.cpp
+++ b/runtime/libs/tflite/src/TensorShapeUtils.cpp
@@ -1,3 +1,19 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the License);
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
#include "tflite/TensorShapeUtils.h"
namespace nnfw
diff --git a/runtime/neurun/CMakeLists.txt b/runtime/neurun/CMakeLists.txt
deleted file mode 100644
index 359006d54..000000000
--- a/runtime/neurun/CMakeLists.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-if(NOT BUILD_NEURUN)
- return()
-endif(NOT BUILD_NEURUN)
-
-# Add cpu
-
-# TODO Remove this variable as adding include dirs is done with target_link_libraries
-# (currently used by cpu/acl_cl kernel module which is not proper)
-set(NEURUN_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/core/include)
-
-add_subdirectory(backend)
-add_subdirectory(frontend)
-add_subdirectory(core)
-add_subdirectory(api)
-add_subdirectory(test)
-add_subdirectory(sample)
diff --git a/runtime/neurun/api/CMakeLists.txt b/runtime/neurun/api/CMakeLists.txt
deleted file mode 100644
index c3f7702ad..000000000
--- a/runtime/neurun/api/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-file(GLOB_RECURSE API_SRC "*.cc")
-
-set(NEURUN_DEV nnfw-dev)
-add_library(${NEURUN_DEV} SHARED ${API_SRC})
-
-# Public headers to publish
-# nnfw_debug.h is header for runtime developer, so it will not be installed
-# But runtime developer can use nnfw_debug.h by linking nnfw-dev
-set(NNFW_API_HEADERS include/nnfw.h include/nnfw_dev.h)
-
-target_link_libraries(${NEURUN_DEV} PUBLIC nnfw-nnapi-header)
-target_link_libraries(${NEURUN_DEV} PRIVATE neurun_core)
-target_link_libraries(${NEURUN_DEV} PRIVATE jsoncpp tflite_loader circle_loader ${LIB_PTHREAD})
-target_link_libraries(${NEURUN_DEV} PRIVATE nnfw_common)
-target_link_libraries(${NEURUN_DEV} PRIVATE nnfw_coverage)
-target_include_directories(${NEURUN_DEV} PUBLIC include)
-set_target_properties(${NEURUN_DEV} PROPERTIES PUBLIC_HEADER "${NNFW_API_HEADERS}")
-
-install(TARGETS ${NEURUN_DEV}
- LIBRARY DESTINATION lib
- PUBLIC_HEADER DESTINATION include/nnfw)
diff --git a/runtime/neurun/api/include/nnfw.h b/runtime/neurun/api/include/nnfw.h
deleted file mode 100644
index c903fbcad..000000000
--- a/runtime/neurun/api/include/nnfw.h
+++ /dev/null
@@ -1,378 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file nnfw.h
- * @brief This file describes runtime API
- */
-#ifndef __NNFW_H__
-#define __NNFW_H__
-
-#include <stddef.h>
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**
- * @brief Session to query with runtime
- *
- * <p>nnfw_session is started and passed by calling {@link nnfw_create_session}.
- * Each session has its own inference environment, such as model to inference, backend usage, etc.
- *
- * <p>Load model by calling {@link nnfw_load_model_from_file}
- *
- * <p>After loading, prepare inference by calling {@link nnfw_prepare}.
- * Application can set runtime environment before prepare by calling
- * {@link nnfw_set_available_backends} and {@link nnfw_set_op_backend}, and it is optional.
- *
- * <p>Application can inference by calling {@link nnfw_run}.
- * Before inference, application has responsibility to set input tensor to set input data by calling
- * {@link nnfw_set_output}, and output tensor to get output by calling {@link nnfw_set_input}
- *
- * <p>To support input and output setting, application can get
- * input and output tensor information by calling<ul>
- * <li>{@link nnfw_input_size}</li>
- * <li>{@link nnfw_output_size}</li>
- * <li>{@link nnfw_input_tensorinfo}</li>
- * <li>{@link nnfw_output_tensorinfo}</li>
- * </ul>
- *
- * <p>Application can inference many times using one session,
- * but next inference can do after prior inference end
- *
- * <p>Application cannot use muitiple model using one session
- */
-typedef struct nnfw_session nnfw_session;
-
-/**
- * @brief Tensor types
- *
- * The type of tensor represented in {@link nnfw_tensorinfo}
- */
-typedef enum {
- /** A tensor of 32 bit floating point */
- NNFW_TYPE_TENSOR_FLOAT32 = 0,
- /** A tensor of 32 bit signed integer */
- NNFW_TYPE_TENSOR_INT32 = 1,
- /**
- * A tensor of 8 bit integers that represent real numbers.
- *
- * real_value = (integer_value - zeroPoint) * scale.
- */
- NNFW_TYPE_TENSOR_QUANT8_ASYMM = 2,
- /** A tensor of boolean */
- NNFW_TYPE_TENSOR_BOOL = 3,
- /** A tensor of 8 bit unsigned integer */
- NNFW_TYPE_TENSOR_UINT8 = 4,
-} NNFW_TYPE;
-
-/**
- * @brief Result Values
- */
-typedef enum {
- /** Successful */
- NNFW_STATUS_NO_ERROR = 0,
- /** Failed */
- NNFW_STATUS_ERROR = 1,
-} NNFW_STATUS;
-
-/**
- * @brief Data format of a tensor
- */
-typedef enum {
- /** Don't care layout */
- NNFW_LAYOUT_NONE = 0,
- /**
- * Channel last layout
- * If rank is 4, layout is NHWC
- */
- NNFW_LAYOUT_CHANNELS_LAST = 1,
- /**
- * Channel first layout
- * If rank is 4, layout is NCHW
- */
- NNFW_LAYOUT_CHANNELS_FIRST = 2,
-} NNFW_LAYOUT;
-
-/**
- * @brief tensor info describes the type and shape of tensors
- *
- * <p>This structure is used to describe input and output tensors.
- * Application can get input and output tensor type and shape described in model by using
- * {@link nnfw_input_tensorinfo} and {@link nnfw_output_tensorinfo}
- *
- * <p>Maximum rank is 6. And tensor's dimension value is filled in 'dims' field from index 0.
- * For example, if tensor's rank is 4,
- * application can get dimension value from dims[0], dims[1], dims[2], and dims[3]
- */
-typedef struct nnfw_tensorinfo
-{
- /** The data type */
- NNFW_TYPE dtype;
- /** The number of dimensions (rank) */
- int32_t rank;
- /**
- * The dimension of tensor.
- * Maximum rank is 6.
- */
- int32_t dims[6];
-} nnfw_tensorinfo;
-
-/**
- * @brief Create a new session instance.
- *
- * <p>This only creates a session.
- * Model is loaded after {@link nnfw_load_model_from_file} is invoked.
- * And inference is performed after {@link nnfw_run} is invoked.
- *
- * <p>{@link nnfw_close_session} should be called once
- * if session is no longer need
- *
- * @param[out] session The session to be created
- * @return NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_create_session(nnfw_session **session);
-
-/**
- * @brief Close a session instance
- *
- * After called, access to closed session by application will be invalid
- *
- * @param[in] session The session to be closed
- * @return @c NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_close_session(nnfw_session *session);
-
-/**
- * @brief Load model from nnpackage file or directory
- *
- * @param[in] session nnfw_session loading the given nnpackage file/dir
- * @param[in] package_file_path Path to the nnpackage file or unzipped directory to be loaded
- *
- * @return @c NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_load_model_from_file(nnfw_session *session, const char *package_file_path);
-
-/**
- * @brief Apply i-th input's tensor info to resize input tensor
- *
- * This function should be called before {@link nnfw_prepare} is invoked, and
- * should be called after {@link nnfw_load_model_from_file} is invoked
- * See {@link nnfw_prepare} for information applying updated tensor info
- * If this function is called many times for same index, tensor info is overwritten
- *
- * @param[in] session Session to the input tensor info is to be set
- * @param[in] index Index of input to be applied (0-indexed)
- * @param[in] tensor_info Tensor info to be applied
- * @return @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR
- */
-NNFW_STATUS nnfw_apply_tensorinfo(nnfw_session *session, uint32_t index,
- nnfw_tensorinfo tensor_info);
-
-/**
- * @brief Prepare session to be ready for inference
- *
- * This phase may finalize model compilation, scheduling, and additional settings.
- * If {@link nnfw_apply_tensor} is called to apply input tensor info different with model
- * before this function, tries to resize all tensors.
- *
- * @param[in] session the session to be prepared
- * @return @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR
- */
-NNFW_STATUS nnfw_prepare(nnfw_session *session);
-
-/**
- * @brief Run inference
- *
- * <p>This function should be called after model is loaded by {@link nnfw_load_model_from_file},
- * session is prepared for inference by {@link nnfw_prepare}, set input and output buffers
- * by {@link nnfw_set_input} and {@link nnfw_set_output}.</p>
- *
- * <p>This function return after inference is finished.</p>
- *
- * @param[in] session The session to run inference
- * @return @c NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_run(nnfw_session *session);
-
-/**
- * @brief Set input buffer
- *
- * This function should be called after {@link nnfw_prepare}, and before first inference
- * on session by {@link nnfw_run}. Application can reuse buffer for many inferences.
- *
- * @param[in] session Session to the input is to be set
- * @param[in] index Index of input to be set (0-indexed)
- * @param[in] type Type of the input
- * @param[in] buffer Raw buffer for input
- * @param[in] length Size of bytes of input buffer
- *
- * @return @c NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_set_input(nnfw_session *session, uint32_t index, NNFW_TYPE type,
- const void *buffer, size_t length);
-
-/**
- * @brief Set output buffer
- *
- * This function should be called after {@link nnfw_prepare}, and before first inference
- * on session by {@link nnfw_run}. Application can reuse buffer for many inferences.
- *
- * @param[in] session Session from inference output is to be extracted
- * @param[in] index Index of output to be set (0-indexed)
- * @param[in] type Type of the output
- * @param[out] buffer Raw buffer for output
- * @param[in] length Size of bytes of output buffer
- *
- * @return @c NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_set_output(nnfw_session *session, uint32_t index, NNFW_TYPE type, void *buffer,
- size_t length);
-
-/**
- * @brief Get the number of inputs
- *
- * Application can call this function to get number of inputs defined in loaded model.
- * This function should be called after {@link nnfw_load_model_from_file} is invoked to load model
- *
- * @param[in] session Session from input information is to be extracted
- * @param[out] number Variable which the number of inputs is put into
- *
- * @return @c NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_input_size(nnfw_session *session, uint32_t *number);
-
-/**
- * @brief Get the number of outputs
- *
- * Application can call this function to get number of outputs defined in loaded model.
- * This function should be called after {@link nnfw_load_model_from_file} is invoked to load model
- *
- * @param[in] session Session from output information is to be extracted
- * @param[out] number Variable which the number of outputs is put into
- *
- * @return @c NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_output_size(nnfw_session *session, uint32_t *number);
-
-/**
- * @brief Set the layout of an input
- *
- * The input that does not call this has NNFW_LAYOUT_NHWC layout
- *
- * @param[in] session session from inference input is to be extracted
- * @param[in] index index of input to be set (0-indexed)
- * @param[in] layout layout to set to target input
- *
- * @return NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_set_input_layout(nnfw_session *session, uint32_t index, NNFW_LAYOUT layout);
-
-/**
- * @brief Set the layout of an output
- *
- * The output that does not call this has NNFW_LAYOUT_NHWC layout
- *
- * @param[in] session session from inference output is to be extracted
- * @param[in] index index of output to be set (0-indexed)
- * @param[in] layout layout to set to target output
- *
- * @return NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_set_output_layout(nnfw_session *session, uint32_t index, NNFW_LAYOUT layout);
-
-/**
- * @brief Get i-th input tensor info
- *
- * <p>Before {@link nnfw_prepare} is invoked, this function return tensor info in model,
- * so updated tensor info by {@link nnfw_apply_tensorinfo} is not returned.</p>
- *
- * <p>After {@link nnfw_prepare} is invoked, this function return updated tensor info
- * if tensor info is updated by {@link nnfw_apply_tensorinfo}.</p>
- *
- * @param[in] session Session from input information is to be extracted
- * @param[in] index Index of input
- * @param[out] tensor_info Tensor info (shape, type, etc)
- *
- * @return @c NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_input_tensorinfo(nnfw_session *session, uint32_t index,
- nnfw_tensorinfo *tensor_info);
-
-/**
- * @brief Get i-th output tensor info
- *
- * <p>Before {@link nnfw_prepare} is invoked, this function return tensor info in model,
- * so updated tensor info by {@link nnfw_apply_tensorinfo} is not returned.</p>
- *
- * <p>After {@link nnfw_prepare} is invoked, this function return updated tensor info
- * if tensor info is updated by {@link nnfw_apply_tensorinfo}.</p>
- *
- * @param[in] session Session from output information is to be extracted
- * @param[in] index Index of output
- * @param[out] tensor_info Tensor info (shape, type, etc)
- *
- * @return @c NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_output_tensorinfo(nnfw_session *session, uint32_t index,
- nnfw_tensorinfo *tensor_info);
-
-/**
- * @brief Set available backends
- *
- * This function should be called before {@link nnfw_prepare} is invoked.
- *
- * <p>Supported backends differs on each platforms.
- * For example, `x86_64` supports "cpu" only.
- * Can set multiple backends by semicolon (ex: "acl_cl;cpu").
- * Among the multiple backends, the 1st element is used as default backend.</p>
- *
- * @note Possible backend strings are: "cpu", "acl_cl", "acl_neon", "srcn"
- *
- * @param[in] session session to which avilable backends are set
- * @param[in] backends available backends on which nnfw uses
- *
- * @return @c NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_set_available_backends(nnfw_session *session, const char *backends);
-
-/**
- * @brief Set the operation's backend
- *
- * This function should be called before {@link nnfw_prepare} is invoked.
- *
- * <p>Supported backends differs on each platforms.
- * For example, `x86_64` supports "cpu" only.
- * The backend for op has higher priority than default backend specified by
- * nnfw_set_default_backend.</p>
- *
- * @note Possible backend strings are: "cpu", "acl_cl", "acl_neon"
- *
- * @param[in] session session to be modified
- * @param[in] op operation to be set
- * @param[in] backend bakcend on which operation run
- *
- * @return @c NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_set_op_backend(nnfw_session *session, const char *op, const char *backend);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/runtime/neurun/api/include/nnfw_debug.h b/runtime/neurun/api/include/nnfw_debug.h
deleted file mode 100644
index eefca0d29..000000000
--- a/runtime/neurun/api/include/nnfw_debug.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_DEBUG_H__
-#define __NNFW_DEBUG_H__
-
-#include "nnfw.h"
-
-NNFW_STATUS nnfw_create_debug_session(nnfw_session **session);
-
-#endif // __NNFW_DEBUG_H__
diff --git a/runtime/neurun/api/src/CustomKernel.cc b/runtime/neurun/api/src/CustomKernel.cc
deleted file mode 100644
index 60ddeedc2..000000000
--- a/runtime/neurun/api/src/CustomKernel.cc
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CustomKernel.h"
-
-namespace neurun
-{
-namespace frontend
-{
-namespace custom
-{
-
-using namespace backend::custom;
-
-class APIConverter
-{
-public:
- static nnfw_operand convertOperand(void *alloc, const TypeInfo &type)
- {
- nnfw_operand api_operand;
- api_operand.allocation = alloc;
- api_operand.type = convertType(type);
- return api_operand;
- }
-
- static nnfw_tensorinfo convertType(const TypeInfo &type)
- {
- nnfw_tensorinfo api_type;
- api_type.rank = type.shape.rank();
- assert(type.shape.rank() <= 6);
- std::copy(type.shape.dims().begin(), type.shape.dims().end(), std::begin(api_type.dims));
-
- switch (type.dtype)
- {
- case ir::DataType::FLOAT32:
- api_type.dtype = NNFW_TYPE_TENSOR_FLOAT32;
- break;
- case ir::DataType::INT32:
- api_type.dtype = NNFW_TYPE_TENSOR_INT32;
- break;
- case ir::DataType::QUANT8_ASYMM:
- api_type.dtype = NNFW_TYPE_TENSOR_QUANT8_ASYMM;
- break;
- case ir::DataType::BOOL8:
- api_type.dtype = NNFW_TYPE_TENSOR_BOOL;
- break;
- default:
- throw std::runtime_error("Unsupported tensor datatype");
- }
- return api_type;
- }
-};
-
-Kernel::Kernel(const nnfw_custom_eval evalFunction)
- : _params(), _userdata(nullptr), _userdata_size(0), _evalFunction(evalFunction)
-{
-}
-
-void Kernel::configure(CustomKernelConfigParams &&inParams)
-{
- _userdata = inParams.userdata;
- _userdata_size = inParams.userdata_size;
-
- _params.ninputs = inParams.input_allocations.size();
- _params.inputs = new nnfw_operand[_params.ninputs];
- for (size_t i = 0; i < _params.ninputs; ++i)
- {
- _params.inputs[i] =
- APIConverter::convertOperand(inParams.input_allocations[i], inParams.input_types[i]);
- }
-
- _params.noutputs = inParams.output_allocations.size();
- _params.outputs = new nnfw_operand[_params.noutputs];
- for (size_t i = 0; i < _params.noutputs; ++i)
- {
- _params.outputs[i] =
- APIConverter::convertOperand(inParams.output_allocations[i], inParams.output_types[i]);
- }
-}
-
-void Kernel::run() { _evalFunction(&_params, _userdata, _userdata_size); }
-
-} // namespace custom
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/api/src/CustomKernel.h b/runtime/neurun/api/src/CustomKernel.h
deleted file mode 100644
index 8cafc2061..000000000
--- a/runtime/neurun/api/src/CustomKernel.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CUSTOM_KERNEL_H__
-#define __NEURUN_BACKEND_CUSTOM_KERNEL_H__
-
-#include "nnfw_dev.h"
-
-#include "backend/CustomKernelBuilder.h"
-
-#include <vector>
-
-namespace neurun
-{
-namespace frontend
-{
-namespace custom
-{
-
-class Kernel : public ::neurun::exec::IFunction
-{
-public:
- explicit Kernel(nnfw_custom_eval evalFunction);
-
- nnfw_custom_kernel_params _params;
- char *_userdata;
- size_t _userdata_size;
-
- nnfw_custom_eval _evalFunction;
- // nnfw_custom_type_infer _type_infer_function; //Unused for now
-
- /**
- * Fills _params field used later by user specified eval function
- * @param inParams custom kernel parameters
- */
- virtual void configure(backend::custom::CustomKernelConfigParams &&inParams);
-
- void run() override;
- void runSync() override { run(); }
-};
-
-} // namespace custom
-} // namespace frontend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CUSTOM_KERNEL_H__
diff --git a/runtime/neurun/api/src/CustomKernelRegistry.cc b/runtime/neurun/api/src/CustomKernelRegistry.cc
deleted file mode 100644
index b223682b8..000000000
--- a/runtime/neurun/api/src/CustomKernelRegistry.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CustomKernelRegistry.h"
-
-#include "cpp14/memory.h"
-
-namespace neurun
-{
-namespace frontend
-{
-namespace custom
-{
-
-void KernelRegistry::registerKernel(const std::string &id, nnfw_custom_eval evalFunction)
-{
- _storage.emplace(id, evalFunction);
-}
-
-std::shared_ptr<backend::custom::IKernelBuilder> KernelRegistry::getBuilder()
-{
- return nnfw::cpp14::make_unique<KernelBuilder>(this);
-}
-
-std::unique_ptr<Kernel> KernelRegistry::buildKernelForOp(const std::string &id)
-{
- auto it = _storage.find(id);
- if (it == _storage.end())
- {
- throw std::runtime_error("Unable to find associated kernel for op");
- }
-
- return nnfw::cpp14::make_unique<Kernel>(it->second);
-}
-
-// Kernel builder
-std::unique_ptr<exec::IFunction>
-KernelBuilder::buildKernel(const std::string &id,
- backend::custom::CustomKernelConfigParams &&params) const
-{
- auto kernel = _registry->buildKernelForOp(id);
- kernel->configure(std::move(params));
-
- return kernel;
-}
-
-KernelBuilder::KernelBuilder(KernelRegistry *registry) : _registry(registry) {}
-
-} // namespace custom
-} // namespace frontend
-} // namespace neurun
diff --git a/runtime/neurun/api/src/CustomKernelRegistry.h b/runtime/neurun/api/src/CustomKernelRegistry.h
deleted file mode 100644
index 207a82a0a..000000000
--- a/runtime/neurun/api/src/CustomKernelRegistry.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CUSTOM_KERNEL_REGISTRY_H__
-#define __NEURUN_BACKEND_CUSTOM_KERNEL_REGISTRY_H__
-
-#include "CustomKernel.h"
-
-#include <unordered_map>
-#include <functional>
-#include <memory>
-
-#include <iostream>
-
-namespace neurun
-{
-namespace frontend
-{
-namespace custom
-{
-
-class KernelRegistry
-{
-public:
- void registerKernel(const std::string &id, nnfw_custom_eval evalFunction);
-
- std::shared_ptr<backend::custom::IKernelBuilder> getBuilder();
- std::unique_ptr<Kernel> buildKernelForOp(const std::string &id);
-
-private:
- std::unordered_map<std::string, nnfw_custom_eval> _storage;
-};
-
-class KernelBuilder : public backend::custom::IKernelBuilder
-{
-public:
- KernelBuilder(KernelRegistry *registry);
-
- std::unique_ptr<exec::IFunction>
- buildKernel(const std::string &id,
- backend::custom::CustomKernelConfigParams &&params) const override;
-
-private:
- KernelRegistry *_registry;
-};
-
-} // namespace custom
-} // namespace frontend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CUSTOM_KERNEL_REGISTRY_H__
diff --git a/runtime/neurun/api/src/OpMap.lst b/runtime/neurun/api/src/OpMap.lst
deleted file mode 100644
index 5e93275b8..000000000
--- a/runtime/neurun/api/src/OpMap.lst
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef MAP_MACRO
-#error Define MAP_MACRO before including this file
-#endif
-
-// circle operation | neurun internal operation
-MAP_MACRO(ADD , Add)
-MAP_MACRO(SUB , Sub)
-MAP_MACRO(BATCH_TO_SPACE_ND , BatchToSpaceND)
-MAP_MACRO(CAST , Cast)
-MAP_MACRO(CONV_2D , Conv2D)
-MAP_MACRO(DEPTHWISE_CONV_2D , DepthwiseConv2D)
-MAP_MACRO(AVERAGE_POOL_2D , AvgPool2D)
-MAP_MACRO(MAX_POOL_2D , MaxPool2D)
-MAP_MACRO(CONCATENATION , Concat)
-MAP_MACRO(FULLY_CONNECTED , FullyConnected)
-MAP_MACRO(SUM , ReduceSum)
-MAP_MACRO(RESHAPE , Reshape)
-MAP_MACRO(MUL , Mul)
-MAP_MACRO(SOFTMAX , Softmax)
-MAP_MACRO(SQUEEZE , Squeeze)
-MAP_MACRO(SLICE , Slice)
-MAP_MACRO(STRIDED_SLICE , StridedSlice)
-MAP_MACRO(TANH , Tanh)
-MAP_MACRO(LOGISTIC , Logistic)
-MAP_MACRO(DIV , Div)
-MAP_MACRO(TRANSPOSE , Transpose)
-MAP_MACRO(EXP , Exp)
-MAP_MACRO(REDUCE_MAX , ReduceMax)
-// UNMATCHED
-//MAP_MACRO(Comparison)
-MAP_MACRO(LOGICAL_AND , LogicalAnd)
-MAP_MACRO(LOGICAL_OR , LogicalOr)
-MAP_MACRO(LOGICAL_NOT , LogicalNot)
-MAP_MACRO(LSTM , LSTM)
-MAP_MACRO(RSQRT , RSQRT)
-MAP_MACRO(RELU , ReLU)
-MAP_MACRO(RESIZE_BILINEAR , ResizeBilinear)
-MAP_MACRO(RELU_N1_TO_1 , ReLU1)
-MAP_MACRO(RELU6 , ReLU6)
-MAP_MACRO(RNN , RNN)
-MAP_MACRO(FLOOR , Floor)
-MAP_MACRO(SPACE_TO_BATCH_ND , SpaceToBatchND)
-MAP_MACRO(SPACE_TO_DEPTH , SpaceToDepth)
-MAP_MACRO(L2_POOL_2D , L2Pool2D)
-MAP_MACRO(EMBEDDING_LOOKUP , EmbeddingLookup)
-MAP_MACRO(L2_NORMALIZATION , L2Normalization)
-MAP_MACRO(HASHTABLE_LOOKUP , HashtableLookup)
-MAP_MACRO(INSTANCE_NORM , InstanceNorm)
-MAP_MACRO(PRELU , PReLU)
-MAP_MACRO(TRANSPOSE_CONV , TransposeConv)
-MAP_MACRO(SQRT , SQRT)
-MAP_MACRO(SQUARED_DIFFERENCE , SquaredDifference)
-MAP_MACRO(TOPK_V2 , TopKV2)
-MAP_MACRO(GATHER , Gather)
-MAP_MACRO(NEG , Neg)
-MAP_MACRO(ABS , Abs)
-MAP_MACRO(ARG_MAX , ArgMax)
-MAP_MACRO(DEQUANTIZE , Dequantize)
-MAP_MACRO(MEAN , Mean)
-MAP_MACRO(LOCAL_RESPONSE_NORMALIZATION , LocalResponseNormalization)
-// UNDEFINED IN CIRCLE
-//MAP_MACRO(DepthToSpace)
-MAP_MACRO(PACK , Pack)
-MAP_MACRO(REDUCE_MIN , ReduceMin)
-MAP_MACRO(SPLIT , Split)
-MAP_MACRO(UNPACK , Unpack)
-MAP_MACRO(PAD , Pad)
-MAP_MACRO(CUSTOM , Custom)
-// UNDEFINED IN CIRCLE
-//MAP_MACRO(Permute)
-MAP_MACRO(MINIMUM , Min)
-MAP_MACRO(MAXIMUM , Max)
-MAP_MACRO(ONE_HOT , OneHot)
diff --git a/runtime/neurun/api/src/nnfw_api.cc b/runtime/neurun/api/src/nnfw_api.cc
deleted file mode 100644
index bdac4c89b..000000000
--- a/runtime/neurun/api/src/nnfw_api.cc
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "nnfw_api_internal.h"
-
-/*
- * Create a new session instance
- *
- * @param session the session to be created
- * @return NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_create_session(nnfw_session **session)
-{
- *session = new nnfw_session();
-
- return NNFW_STATUS_NO_ERROR;
-}
-
-/*
- * Close a session instance
- *
- * @param session the session to be closed
- * @return NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_close_session(nnfw_session *session)
-{
- delete session;
- return NNFW_STATUS_NO_ERROR;
-}
-
-#define NNFW_RETURN_ERROR_IF_NULL(p) \
- do \
- { \
- if ((p) == NULL) \
- return NNFW_STATUS_ERROR; \
- } while (0)
-
-/*
- * Load model from nnpackage file or directory
- *
- * @param session nnfw_session loading the given nnpackage file/dir
- * @param package_file_path path to the nnpackage file or unzipped directory to be loaded
- *
- * @return NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_load_model_from_file(nnfw_session *session, const char *pacakge_file_path)
-{
- NNFW_RETURN_ERROR_IF_NULL(session);
- return session->load_model_from_file(pacakge_file_path);
-}
-
-/*
- * Prepare session to be ready for inference
- * This phase may finalize model compilation, scheduling, and additional settings.
- *
- * @param session the session to be prepared
- * @return NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_prepare(nnfw_session *session)
-{
- NNFW_RETURN_ERROR_IF_NULL(session);
- return session->prepare();
-}
-
-/*
- * Run inference
- *
- * @param session the session to run inference
- * @return NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_run(nnfw_session *session)
-{
- NNFW_RETURN_ERROR_IF_NULL(session);
- return session->run();
-}
-
-/*
- * Set input
- *
- * @param session session to the input is to be set
- * @param index index of input to be set (0-indexed)
- * @param type type of the input
- * @param buffer raw buffer for input
- * @param length size of bytes of output
- *
- * @return NNFW_STATUS_NO_ERROR if successful
- */
-
-NNFW_STATUS nnfw_set_input(nnfw_session *session, uint32_t index, NNFW_TYPE type,
- const void *buffer, size_t length)
-{
- NNFW_RETURN_ERROR_IF_NULL(session);
- return session->set_input(index, type, buffer, length);
-}
-
-/*
- * Set output
- *
- * @param session session from inference output is to be extracted
- * @param index index of output to be set (0-indexed)
- * @param type type of the output
- * @param buffer raw buffer for output
- * @param length size of bytes of output
- *
- * @return NNFW_STATUS_NO_ERROR if successful
- */
-
-NNFW_STATUS nnfw_set_output(nnfw_session *session, uint32_t index, NNFW_TYPE type, void *buffer,
- size_t length)
-{
- NNFW_RETURN_ERROR_IF_NULL(session);
- return session->set_output(index, type, buffer, length);
-}
-
-/*
- * Get the number of inputs
- *
- * @param[in] session session from input information is to be extracted
- * @param[out] number variable which the number of inputs is put into
- *
- * @return NNFW_STATUS_NO_ERROR if successful
- */
-
-NNFW_STATUS nnfw_input_size(nnfw_session *session, uint32_t *number)
-{
- NNFW_RETURN_ERROR_IF_NULL(session);
- return session->input_size(number);
-}
-
-/*
- * Get the number of outputs
- *
- * @param[in] session session from output information is to be extracted
- * @param[out] number variable which the number of outputs is put into
- *
- * @return NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_output_size(nnfw_session *session, uint32_t *number)
-{
- NNFW_RETURN_ERROR_IF_NULL(session);
- return session->output_size(number);
-}
-
-/*
- * Set the layout of an input
- * @note The input that does not call this has NNFW_LAYOUT_CHANNELS_LAST layout
- *
- * @param[in] session session from inference input is to be extracted
- * @param[in] index index of input to be set (0-indexed)
- * @param[in] layout layout to set to target input
- *
- * @return NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_set_input_layout(nnfw_session *session, uint32_t index, NNFW_LAYOUT layout)
-{
- NNFW_RETURN_ERROR_IF_NULL(session);
- return session->set_input_layout(index, layout);
-}
-
-/*
- * Set the layout of an output
- * @note The output that does not call this has NNFW_LAYOUT_CHANNELS_LAST layout
- *
- * @param[in] session session from inference output is to be extracted
- * @param[in] index index of output to be set (0-indexed)
- * @param[in] layout layout to set to target output
- *
- * @return NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_set_output_layout(nnfw_session *session, uint32_t index, NNFW_LAYOUT layout)
-{
- NNFW_RETURN_ERROR_IF_NULL(session);
- return session->set_output_layout(index, layout);
-}
-
-/*
- * Get i-th input tensor info
- *
- * @param[in] session session from input information is to be extracted
- * @param[in] index index of input
- * @param[out] tensor_info nnfw_tensor_info
- *
- * @return NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_input_tensorinfo(nnfw_session *session, uint32_t index,
- nnfw_tensorinfo *tensor_info)
-{
- NNFW_RETURN_ERROR_IF_NULL(session);
- return session->input_tensorinfo(index, tensor_info);
-}
-
-/*
- * Get i-th output tensor info
- *
- * @param[in] session session from output information is to be extracted
- * @param[in] index index of output
- * @param[out] tensor_info nnfw_tensor_info
- *
- * @return NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_output_tensorinfo(nnfw_session *session, uint32_t index,
- nnfw_tensorinfo *tensor_info)
-{
- NNFW_RETURN_ERROR_IF_NULL(session);
- return session->output_tensorinfo(index, tensor_info);
-}
-
-/*
- * Register custom operation
- * @param session session to register this operation
- * @param id operation id
- * @param info registration info ( eval function, etc. )
- * @return NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_register_custom_op_info(nnfw_session *session, const char *id,
- custom_kernel_registration_info *info)
-{
- NNFW_RETURN_ERROR_IF_NULL(session);
- return session->register_custom_operation(id, info->eval_function);
-}
-
-NNFW_STATUS nnfw_apply_tensorinfo(nnfw_session *session, uint32_t index,
- nnfw_tensorinfo tensor_info)
-{
- NNFW_RETURN_ERROR_IF_NULL(session);
- return session->apply_tensorinfo(index, tensor_info);
-}
-
-/*
- * Set available backends
- *
- * @param[in] session session to which a avilable backends are set
- * @param[in] backends available backends on which nnfw uses
- */
-NNFW_STATUS nnfw_set_available_backends(nnfw_session *session, const char *backends)
-{
- NNFW_RETURN_ERROR_IF_NULL(session);
- return session->set_available_backends(backends);
-}
-
-/*
- * Set the operation's backend
- *
- * @param[in] session session to be modified
- * @param[in] op operation to be set
- * @param[in] backend bakcend on which operation run
- *
- * @return NNFW_STATUS_NO_ERROR if successful
- */
-NNFW_STATUS nnfw_set_op_backend(nnfw_session *session, const char *op, const char *backend)
-{
- NNFW_RETURN_ERROR_IF_NULL(session);
- return session->set_op_backend(op, backend);
-}
diff --git a/runtime/neurun/api/src/nnfw_api_internal.cc b/runtime/neurun/api/src/nnfw_api_internal.cc
deleted file mode 100644
index 037cd3bca..000000000
--- a/runtime/neurun/api/src/nnfw_api_internal.cc
+++ /dev/null
@@ -1,435 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "nnfw_api_internal.h"
-#include "CustomKernelRegistry.h"
-#include "compiler/Compiler.h"
-#include "exec/Execution.h"
-#include "circle_loader.h"
-#include "tflite_loader.h"
-#include "json/json.h"
-#include <fstream>
-#include <iostream>
-#include <string>
-#include <dirent.h>
-#include <util/ConfigSource.h>
-
-/*
- * API does not accept string argument longer than max length below
- */
-#define MAX_BACKEND_NAME_LENGTH 32
-#define MAX_OP_NAME_LENGTH 64
-
-// Is null-terminating in length ?
-static bool null_terminating(const char *str, uint32_t length)
-{
- for (uint32_t i = 0; i < length; i++)
- {
- if (*(str + i) == '\0')
- {
- return true;
- }
- }
- return false;
-}
-
-static neurun::ir::Layout convertLayout(NNFW_LAYOUT layout)
-{
- if (layout == NNFW_LAYOUT_CHANNELS_LAST)
- {
- return neurun::ir::Layout::NHWC;
- }
- else if (layout == NNFW_LAYOUT_CHANNELS_FIRST)
- {
- return neurun::ir::Layout::NCHW;
- }
- return neurun::ir::Layout::UNKNOWN;
-}
-
-nnfw_session::nnfw_session()
- : _graph{nullptr}, _execution{nullptr},
- _kernel_registry{std::make_shared<neurun::frontend::custom::KernelRegistry>()},
- _source{nnfw::cpp14::make_unique<neurun::util::GeneralConfigSource>()}
-{
- // DO NOTHING
-}
-
-NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir)
-{
- // TODO : add support for zipped package file load
- DIR *dir;
- if (!(dir = opendir(package_dir)))
- {
- std::cerr << "invalid nnpackge directory: " << package_dir << std::endl;
- return NNFW_STATUS_ERROR;
- }
- closedir(dir);
-
- try
- {
- std::string manifest_file_name(package_dir);
- manifest_file_name += "/metadata/MANIFEST";
- std::ifstream mfs(manifest_file_name);
-
- // extract the filename of the first(index 0) model
- // e.g. In MANIFEST file, { "models" : [ "firstmodel.tflite", "2nd.tflite" ] }
- Json::Value root;
- mfs >> root;
- Json::Value models = root["models"];
- Json::Value model_types = root["model-types"];
-
- auto model_file_path = package_dir + std::string("/") + models[0].asString(); // first model
- auto model_type = model_types[0].asString(); // first model's type
- if (model_type == "tflite")
- {
- _graph = neurun::tflite_loader::loadModel(model_file_path.c_str());
- }
- else if (model_type == "circle")
- {
- _graph = neurun::circle_loader::loadModel(model_file_path.c_str());
- }
- else
- {
- std::cerr << "Unsupported model type in MANIFEST" << std::endl;
- return NNFW_STATUS_ERROR;
- }
- _graph->bindKernelBuilder(_kernel_registry->getBuilder());
- }
- catch (const std::exception &e)
- {
- std::cerr << "Error during model loading : " << e.what() << std::endl;
- return NNFW_STATUS_ERROR;
- }
- return NNFW_STATUS_NO_ERROR;
-}
-
-NNFW_STATUS nnfw_session::prepare()
-{
- // TODO : add additional setting routine(executor type, backend)
- // Note that we assume acl_cl backend
-
- try
- {
- // config_source setting
- using neurun::util::config_source;
- config_source(std::move(_source));
-
- auto compiler = nnfw::cpp14::make_unique<neurun::compiler::Compiler>(_graph);
- compiler->compile();
- std::shared_ptr<neurun::exec::IExecutor> executor;
- compiler->release(executor);
- _execution = std::make_shared<neurun::exec::Execution>(executor);
- }
- catch (const std::exception &e)
- {
- std::cerr << "Error during model prepare : " << e.what() << std::endl;
- return NNFW_STATUS_ERROR;
- }
- return NNFW_STATUS_NO_ERROR;
-}
-
-NNFW_STATUS nnfw_session::run()
-{
- try
- {
- _execution->execute();
- }
- catch (const std::exception &e)
- {
- std::cerr << "Error during nnfw_session::run : " << e.what() << std::endl;
- return NNFW_STATUS_ERROR;
- }
- return NNFW_STATUS_NO_ERROR;
-}
-
-NNFW_STATUS nnfw_session::set_input(uint32_t index, NNFW_TYPE /*type*/, const void *buffer,
- size_t length)
-{
- try
- {
- _execution->setInput(neurun::ir::IOIndex(index), buffer, length);
- }
- catch (const std::exception &e)
- {
- std::cerr << "Error during nnfw_session::set_input : " << e.what() << std::endl;
- return NNFW_STATUS_ERROR;
- }
- return NNFW_STATUS_NO_ERROR;
-}
-
-NNFW_STATUS nnfw_session::set_output(uint32_t index, NNFW_TYPE /*type*/, void *buffer,
- size_t length)
-{
- try
- {
- _execution->setOutput(neurun::ir::IOIndex(index), buffer, length);
- }
- catch (const std::exception &e)
- {
- std::cerr << "Error during nnfw_session::set_output : " << e.what() << std::endl;
- return NNFW_STATUS_ERROR;
- }
- return NNFW_STATUS_NO_ERROR;
-}
-
-NNFW_STATUS nnfw_session::input_size(uint32_t *number)
-{
- try
- {
- if (number == nullptr)
- {
- std::cerr << "Error during nnfw_session::input_size, number is null pointer." << std::endl;
- return NNFW_STATUS_ERROR;
- }
- *number = _graph->getInputs().size();
- }
- catch (const std::exception &e)
- {
- std::cerr << "Error during nnfw_session::input_size : " << e.what() << std::endl;
- return NNFW_STATUS_ERROR;
- }
- return NNFW_STATUS_NO_ERROR;
-}
-
-NNFW_STATUS nnfw_session::output_size(uint32_t *number)
-{
- try
- {
- if (number == nullptr)
- {
- std::cerr << "Error during nnfw_session::output_size, number is null pointer." << std::endl;
- return NNFW_STATUS_ERROR;
- }
- *number = _graph->getOutputs().size();
- }
- catch (const std::exception &e)
- {
- std::cerr << "Error during nnfw_session::output_size" << e.what() << std::endl;
- return NNFW_STATUS_ERROR;
- }
- return NNFW_STATUS_NO_ERROR;
-}
-
-NNFW_STATUS nnfw_session::set_input_layout(uint32_t index, NNFW_LAYOUT layout)
-{
- try
- {
- if (layout != NNFW_LAYOUT_NONE && layout != NNFW_LAYOUT_CHANNELS_FIRST &&
- layout != NNFW_LAYOUT_CHANNELS_LAST)
- {
- std::cerr << "Error during nnfw_session::set_input_layout, not supported layout" << std::endl;
- return NNFW_STATUS_ERROR;
- }
- _execution->setInputLayout(neurun::ir::IOIndex(index), convertLayout(layout));
- }
- catch (const std::exception &e)
- {
- std::cerr << "Error during nnfw_session::set_input_layout : " << e.what() << std::endl;
- return NNFW_STATUS_ERROR;
- }
- return NNFW_STATUS_NO_ERROR;
-}
-
-NNFW_STATUS nnfw_session::set_output_layout(uint32_t index, NNFW_LAYOUT layout)
-{
- try
- {
- if (layout != NNFW_LAYOUT_NONE && layout != NNFW_LAYOUT_CHANNELS_FIRST &&
- layout != NNFW_LAYOUT_CHANNELS_LAST)
- {
- std::cerr << "Error during nnfw_session::set_output_layout, not supported layout"
- << std::endl;
- return NNFW_STATUS_ERROR;
- }
- _execution->setOutputLayout(neurun::ir::IOIndex(index), convertLayout(layout));
- }
- catch (const std::exception &e)
- {
- std::cerr << "Error during nnfw_session::set_output_layout : " << e.what() << std::endl;
- return NNFW_STATUS_ERROR;
- }
- return NNFW_STATUS_NO_ERROR;
-}
-
-static NNFW_TYPE datatype_to_nnfw_dtype(neurun::ir::DataType dt)
-{
- using neurun::ir::DataType;
- switch (dt)
- {
- case DataType::FLOAT32:
- return NNFW_TYPE_TENSOR_FLOAT32;
- case DataType::INT32:
- return NNFW_TYPE_TENSOR_INT32;
- case DataType::QUANT8_ASYMM:
- return NNFW_TYPE_TENSOR_QUANT8_ASYMM;
- case DataType::BOOL8:
- return NNFW_TYPE_TENSOR_BOOL;
- case DataType::UINT8:
- return NNFW_TYPE_TENSOR_UINT8;
- case DataType::UINT32:
- case DataType::QUANT8_SYMM:
- default:
- std::cerr << "Error: Model has type that runtime API does not support." << std::endl;
- exit(-1);
- }
-}
-
-NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t /*index*/, nnfw_tensorinfo /*ti*/)
-{
- std::cerr << "Error: NYI" << std::endl;
- return NNFW_STATUS_ERROR;
-}
-
-NNFW_STATUS nnfw_session::input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
-{
- try
- {
- if (ti == nullptr)
- {
- std::cerr << "Error during nnfw_session::input_tensorinfo, tensorinfo is null pointer."
- << std::endl;
- return NNFW_STATUS_ERROR;
- }
- if (index >= _graph->getInputs().size())
- {
- std::cerr << "Error during nnfw_session::input_tensorinfo, index is out of range."
- << std::endl;
- return NNFW_STATUS_ERROR;
- }
- auto opidx = _graph->getInputs().at(index);
- auto shape = _graph->operands().at(opidx).shape();
- ti->rank = shape.rank();
- for (int j = 0; j < ti->rank; ++j)
- {
- ti->dims[j] = shape.dim(j);
- }
- ti->dtype = datatype_to_nnfw_dtype(_graph->operands().at(opidx).typeInfo().type());
- }
- catch (const std::exception &e)
- {
- std::cerr << "Error during nnfw_session::input_tensorinfo : " << e.what() << std::endl;
- return NNFW_STATUS_ERROR;
- }
- return NNFW_STATUS_NO_ERROR;
-}
-
-NNFW_STATUS nnfw_session::output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
-{
- try
- {
- if (ti == nullptr)
- {
- std::cerr << "Error during nnfw_session::output_tensorinfo, tensorinfo is null pointer."
- << std::endl;
- return NNFW_STATUS_ERROR;
- }
- if (index >= _graph->getOutputs().size())
- {
- std::cerr << "Error during nnfw_session::output_tensorinfo, index is out of range."
- << std::endl;
- return NNFW_STATUS_ERROR;
- }
- auto opidx = _graph->getOutputs().at(index);
- auto shape = _graph->operands().at(opidx).shape();
- ti->rank = shape.rank();
- for (int j = 0; j < ti->rank; ++j)
- {
- ti->dims[j] = shape.dim(j);
- }
- ti->dtype = datatype_to_nnfw_dtype(_graph->operands().at(opidx).typeInfo().type());
- }
- catch (const std::exception &e)
- {
- std::cerr << "Error during nnfw_session::output_tensorinfo : " << e.what() << std::endl;
- return NNFW_STATUS_ERROR;
- }
- return NNFW_STATUS_NO_ERROR;
-}
-NNFW_STATUS nnfw_session::register_custom_operation(const std::string &id,
- nnfw_custom_eval eval_func)
-{
- _kernel_registry->registerKernel(id, eval_func);
- return NNFW_STATUS_NO_ERROR;
-}
-
-static std::string get_op_backend_string(std::string op)
-{
-#define MAP_MACRO(CircleName, NeurunName) {#CircleName, "OP_BACKEND_" #NeurunName},
-
- static std::unordered_map<std::string, std::string> operation_map = {
-#include "OpMap.lst"
- };
-
-#undef MAP_MACRO
-
- auto n = operation_map.find(op);
-
- if (n == operation_map.end())
- {
- // this return value is handled by a caller to return error code
- return std::string("");
- }
- else
- {
- return n->second;
- }
-}
-
-NNFW_STATUS nnfw_session::set_available_backends(const char *backends)
-{
- try
- {
- if (!backends || null_terminating(backends, MAX_BACKEND_NAME_LENGTH) == false)
- {
- return NNFW_STATUS_ERROR;
- }
-
- _source->set("BACKENDS", backends);
- }
- catch (const std::exception &e)
- {
- std::cerr << "Error during nnfw_session::set_available_backends : " << e.what() << std::endl;
- return NNFW_STATUS_ERROR;
- }
- return NNFW_STATUS_NO_ERROR;
-}
-
-NNFW_STATUS nnfw_session::set_op_backend(const char *op, const char *backend)
-{
- try
- {
- if (!op || !null_terminating(op, MAX_OP_NAME_LENGTH) || !backend ||
- !null_terminating(backend, MAX_BACKEND_NAME_LENGTH))
- {
- return NNFW_STATUS_ERROR;
- }
-
- auto key = get_op_backend_string(op);
-
- if (key.empty())
- {
- return NNFW_STATUS_ERROR;
- }
-
- _source->set(key, backend);
- }
- catch (const std::exception &e)
- {
- std::cerr << "Error during nnfw_session::set_op_backend : " << e.what() << std::endl;
- return NNFW_STATUS_ERROR;
- }
- return NNFW_STATUS_NO_ERROR;
-}
diff --git a/runtime/neurun/api/src/nnfw_api_internal.h b/runtime/neurun/api/src/nnfw_api_internal.h
deleted file mode 100644
index 40069cc55..000000000
--- a/runtime/neurun/api/src/nnfw_api_internal.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __API_NNFW_API_INTERNAL_H__
-#define __API_NNFW_API_INTERNAL_H__
-
-#include "nnfw.h"
-#include "nnfw_dev.h"
-
-#include <util/GeneralConfigSource.h>
-
-#include <string>
-#include <memory>
-
-namespace neurun
-{
-namespace frontend
-{
-namespace custom
-{
-class KernelRegistry;
-}
-} // namespace frontend
-namespace exec
-{
-class Execution;
-}
-namespace ir
-{
-class Graph;
-} // namespace ir
-} // namespace neurun
-
-struct nnfw_session
-{
-public:
- nnfw_session();
-
- NNFW_STATUS load_model_from_file(const char *package_file_path);
- NNFW_STATUS prepare();
- NNFW_STATUS run();
-
- NNFW_STATUS set_input(uint32_t index, NNFW_TYPE type, const void *buffer, size_t length);
- NNFW_STATUS set_output(uint32_t index, NNFW_TYPE type, void *buffer, size_t length);
-
- NNFW_STATUS input_size(uint32_t *number);
- NNFW_STATUS output_size(uint32_t *number);
-
- NNFW_STATUS set_input_layout(uint32_t index, NNFW_LAYOUT layout);
- NNFW_STATUS set_output_layout(uint32_t index, NNFW_LAYOUT layout);
-
- NNFW_STATUS apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti);
-
- NNFW_STATUS input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti);
- NNFW_STATUS output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti);
-
- NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func);
-
- NNFW_STATUS set_available_backends(const char *backends);
- NNFW_STATUS set_op_backend(const char *op, const char *backend);
-
-private:
- std::shared_ptr<neurun::ir::Graph> _graph;
- std::shared_ptr<neurun::exec::Execution> _execution;
- std::shared_ptr<neurun::frontend::custom::KernelRegistry> _kernel_registry;
-
-protected:
- std::unique_ptr<neurun::util::GeneralConfigSource> _source;
-};
-
-#endif // __API_NNFW_API_INTERNAL_H__
diff --git a/runtime/neurun/api/src/nnfw_debug.cc b/runtime/neurun/api/src/nnfw_debug.cc
deleted file mode 100644
index 4ea0a203f..000000000
--- a/runtime/neurun/api/src/nnfw_debug.cc
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "nnfw_debug_internal.h"
-
-NNFW_STATUS nnfw_create_debug_session(nnfw_session **session)
-{
- *session = new nnfw_debug_session();
-
- return NNFW_STATUS_NO_ERROR;
-}
diff --git a/runtime/neurun/api/src/nnfw_debug_internal.cc b/runtime/neurun/api/src/nnfw_debug_internal.cc
deleted file mode 100644
index 778efbc5c..000000000
--- a/runtime/neurun/api/src/nnfw_debug_internal.cc
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "nnfw_debug_internal.h"
-#include "util/EnvConfigSource.h"
-
-#include <cpp14/memory.h>
-
-nnfw_debug_session::nnfw_debug_session() : nnfw_session()
-{
- _source = nnfw::cpp14::make_unique<neurun::util::EnvConfigSource>();
-}
diff --git a/runtime/neurun/backend/CMakeLists.txt b/runtime/neurun/backend/CMakeLists.txt
deleted file mode 100644
index fc363e031..000000000
--- a/runtime/neurun/backend/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-set(LIB_NEURUN_BACKEND_ACL_COMMON neurun_backend_acl_common)
-set(LIB_NEURUN_BACKEND_CPU_COMMON neurun_backend_cpu_common)
-
-add_subdirectory(cpu)
-add_subdirectory(cpu_common)
-add_subdirectory(acl_cl)
-add_subdirectory(acl_neon)
-add_subdirectory(acl_common)
-add_subdirectory(hi_perf_cpu)
-add_subdirectory(srcn)
diff --git a/runtime/neurun/backend/acl_cl/Backend.h b/runtime/neurun/backend/acl_cl/Backend.h
deleted file mode 100644
index 2033b42e7..000000000
--- a/runtime/neurun/backend/acl_cl/Backend.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_CL_BACKEND_H__
-#define __NEURUN_BACKEND_ACL_CL_BACKEND_H__
-
-#include <memory>
-#include <backend/Backend.h>
-#include <ir/Operands.h>
-
-#include "Config.h"
-#include "ConstantInitializer.h"
-#include "KernelGenerator.h"
-#include "ShapeFixer.h"
-#include "TensorManager.h"
-#include "TensorRegister.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_cl
-{
-
-class Backend : public ::neurun::backend::Backend
-{
-public:
- Backend() : _config{std::make_shared<Config>()} {}
-
- std::shared_ptr<IConfig> config() const override { return _config; }
-
- std::unique_ptr<BackendContext>
- newContext(const ir::Operands &operands,
- const std::shared_ptr<custom::IKernelBuilder> &) const override
- {
- auto tensor_builder = std::make_shared<TensorBuilder>(createTensorManager());
- return std::unique_ptr<BackendContext>{new BackendContext{
- this, tensor_builder, std::make_shared<ConstantInitializer>(operands, tensor_builder),
- std::make_shared<KernelGenerator>(operands, tensor_builder),
- std::make_shared<ShapeFixer>(operands, tensor_builder),
- std::make_shared<TensorRegister>(operands, tensor_builder)}};
- }
-
-private:
- std::shared_ptr<IConfig> _config;
-};
-
-} // namespace acl_cl
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_CL_BACKEND_H__
diff --git a/runtime/neurun/backend/acl_cl/CLTimer.h b/runtime/neurun/backend/acl_cl/CLTimer.h
deleted file mode 100644
index 3939ee722..000000000
--- a/runtime/neurun/backend/acl_cl/CLTimer.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_CL_CLTIMER_H__
-#define __NEURUN_BACKEND_ACL_CL_CLTIMER_H__
-
-#include <util/ITimer.h>
-#include <arm_compute/core/CL/OpenCL.h>
-#include <arm_compute/runtime/CL/CLScheduler.h>
-#include <chrono>
-#include <list>
-#include <sstream>
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_cl
-{
-
-/**
- * @brief Class to measure CL kernels execution time
- */
-class CLTimer : public util::ITimer
-{
-public:
- /**
- * @brief This function replaces CL function, which enqueues a command to execute a kernel
- * with a wrapper which remembers enqueued kernels
- */
- void handleBegin() override
- {
- _measured_events.clear();
-
- _origin_enqueue_function = arm_compute::CLSymbols::get().clEnqueueNDRangeKernel_ptr;
-
- auto _timer_enqueue_function = [this](cl_command_queue command_queue, cl_kernel kernel,
- cl_uint work_dim, const size_t *gwo, const size_t *gws,
- const size_t *lws, cl_uint num_events_in_wait_list,
- const cl_event *event_wait_list, cl_event *usr_event) {
- cl_event event;
- cl_int enqueue_res =
- this->_origin_enqueue_function(command_queue, kernel, work_dim, gwo, gws, lws,
- num_events_in_wait_list, event_wait_list, &event);
- this->_measured_events.emplace_back(event);
-
- // According to spec, if NULL was provided in usr_event - event shouldn't be returned
- if (usr_event != nullptr)
- {
- clRetainEvent(event);
- *usr_event = event;
- }
- return enqueue_res;
- };
- arm_compute::CLSymbols::get().clEnqueueNDRangeKernel_ptr = _timer_enqueue_function;
-
- // Set CL_QUEUE_PROFILING_ENABLE flag for the CL command-queue, if it isn't already set
- auto &cl_scheduler = arm_compute::CLScheduler::get();
- auto props = cl_scheduler.queue().getInfo<CL_QUEUE_PROPERTIES>();
- if ((props & CL_QUEUE_PROFILING_ENABLE) == 0)
- {
- cl_scheduler.set_queue(
- cl::CommandQueue(cl_scheduler.context(), props | CL_QUEUE_PROFILING_ENABLE));
- }
- };
-
- /**
- * @brief Get timer result by addition executed CL kernels durations
- */
- void handleEnd() override
- {
- _timer_res = 0;
- for (auto const &event : _measured_events)
- {
- cl_ulong start;
- cl_ulong end;
- event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start);
- event.getProfilingInfo(CL_PROFILING_COMMAND_END, &end);
- _timer_res += (end - start) / 1000.f; // nanoseconds -> microseconds
- }
-
- // Restore origin CL enqueue function
- arm_compute::CLSymbols::get().clEnqueueNDRangeKernel_ptr = _origin_enqueue_function;
- };
-
-private:
- std::function<decltype(clEnqueueNDRangeKernel)> _origin_enqueue_function;
- std::list<::cl::Event> _measured_events;
-};
-
-} // namespace acl_cl
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_CL_CLTIMER_H__
diff --git a/runtime/neurun/backend/acl_cl/CMakeLists.txt b/runtime/neurun/backend/acl_cl/CMakeLists.txt
deleted file mode 100644
index aaf6a4d62..000000000
--- a/runtime/neurun/backend/acl_cl/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-# Unsupported architecture
-nnas_find_package(ARMCompute QUIET)
-if(NOT ARMCompute_FOUND)
- return()
-endif(NOT ARMCompute_FOUND)
-
-set(LIB_NEURUN_BACKEND_ACL_CL neurun_backend_acl_cl)
-
-file(GLOB_RECURSE SOURCES "*.cc")
-
-add_library(${LIB_NEURUN_BACKEND_ACL_CL} SHARED ${SOURCES})
-
-target_include_directories(${LIB_NEURUN_BACKEND_ACL_CL} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
-target_link_libraries(${LIB_NEURUN_BACKEND_ACL_CL} PRIVATE neurun_core)
-target_link_libraries(${LIB_NEURUN_BACKEND_ACL_CL} PRIVATE ${LIB_NEURUN_BACKEND_ACL_COMMON})
-target_link_libraries(${LIB_NEURUN_BACKEND_ACL_CL} PRIVATE nnfw_common)
-target_link_libraries(${LIB_NEURUN_BACKEND_ACL_CL} PRIVATE nnfw_coverage)
-
-set_target_properties(${LIB_NEURUN_BACKEND_ACL_CL} PROPERTIES OUTPUT_NAME backend_acl_cl)
-
-install(TARGETS ${LIB_NEURUN_BACKEND_ACL_CL} DESTINATION lib)
diff --git a/runtime/neurun/backend/acl_cl/Config.cc b/runtime/neurun/backend/acl_cl/Config.cc
deleted file mode 100644
index 36bf83686..000000000
--- a/runtime/neurun/backend/acl_cl/Config.cc
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// For CLKernelLibraryEx initialization
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-
-#include <arm_compute/runtime/CL/CLScheduler.h>
-
-#include "Config.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_cl
-{
-
-bool Config::initialize()
-{
- if (!arm_compute::opencl_is_available())
- {
- return false;
- }
- arm_compute::CLScheduler::get().default_init();
- // NOTE CLKernelLibraryEx must use the same context as CLScheduler
- // It did not check whether another device is available.
- arm_compute::CLKernelLibraryEx::get().init(
- "./cl_kernels/", arm_compute::CLScheduler::get().context(), cl::Device::getDefault());
-
- return true;
-}
-
-} // namespace acl_cl
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/acl_cl/Config.h b/runtime/neurun/backend/acl_cl/Config.h
deleted file mode 100644
index a7ceaac26..000000000
--- a/runtime/neurun/backend/acl_cl/Config.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_CL_CONFIG_H__
-#define __NEURUN_BACKEND_ACL_CL_CONFIG_H__
-
-#include "CLTimer.h"
-#include <cpp14/memory.h>
-#include <backend/IConfig.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_cl
-{
-
-class Config : public IConfig
-{
-public:
- std::string id() override { return "acl_cl"; }
- bool initialize() override;
- bool SupportPermutation() override { return true; }
- bool SupportSubTensorAlloc() override { return true; }
- std::unique_ptr<util::ITimer> timer() override { return nnfw::cpp14::make_unique<CLTimer>(); }
-};
-
-} // namespace acl_cl
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_CL_CONFIG_H__
diff --git a/runtime/neurun/backend/acl_cl/ConstantInitializer.cc b/runtime/neurun/backend/acl_cl/ConstantInitializer.cc
deleted file mode 100644
index 165b17cd1..000000000
--- a/runtime/neurun/backend/acl_cl/ConstantInitializer.cc
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ConstantInitializer.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_cl
-{
-
-ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : _operands{operands}, _tensor_builder{tensor_builder}
-{
- // DO NOTHING
-}
-
-void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
-{
- const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
- const auto &block_size_obj = _operands.at(block_size_index);
-
- if (block_size_obj.isConstant())
- {
- _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::operand::ITensor &obj) {
- const auto &shape = model_obj.shape();
- const auto base = reinterpret_cast<const int32_t *>(model_obj.data().base());
- assert(model_obj.shape().rank() == 1);
- obj.access([&](::neurun::backend::operand::ITensor &tensor) {
- for (size_t i = 0; i < shape.num_elements(); ++i)
- {
- const int32_t value = base[shape.num_elements() - i - 1];
- int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
- tensor.calcOffset({static_cast<int32_t>(i)}));
- *into = value;
- }
- });
- };
- }
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerPermuteInitializer(kernel_index, kernel_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
- const auto &bias_obj = _operands.at(bias_index);
- registerCopyInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerPermuteInitializer(kernel_index, kernel_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
- const auto &bias_obj = _operands.at(bias_index);
- registerCopyInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::EmbeddingLookup &node)
-{
- const auto &lookups_index = node.getInputs().at(ir::operation::EmbeddingLookup::LOOKUPS);
- const auto &lookups_obj = _operands.at(lookups_index);
- registerCopyInitializer(lookups_index, lookups_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
- const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
- const auto &weight_obj = _operands.at(weight_index);
- registerCopyInitializer(weight_index, weight_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
- const auto &bias_obj = _operands.at(bias_index);
- registerCopyInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::Gather &node)
-{
- const auto &indices_index = node.getInputs().at(ir::operation::Gather::INDICES);
- const auto &indices_obj = _operands.at(indices_index);
- registerCopyInitializer(indices_index, indices_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::HashtableLookup &node)
-{
- const auto &lookups_index = node.getInputs().at(ir::operation::HashtableLookup::LOOKUPS);
- const auto &lookups_obj = _operands.at(lookups_index);
- registerCopyInitializer(lookups_index, lookups_obj);
-
- const auto &keys_index = node.getInputs().at(ir::operation::HashtableLookup::KEYS);
- const auto &keys_obj = _operands.at(keys_index);
- registerCopyInitializer(keys_index, keys_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::LSTM &node)
-{
- const auto &input_to_input_weights_index =
- node.getInputs().at(ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
- const auto &input_to_input_weights_obj = _operands.at(input_to_input_weights_index);
- registerCopyInitializer(input_to_input_weights_index, input_to_input_weights_obj);
-
- const auto &input_to_forget_weights_index =
- node.getInputs().at(ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
- const auto &input_to_forget_weights_obj = _operands.at(input_to_forget_weights_index);
- registerCopyInitializer(input_to_forget_weights_index, input_to_forget_weights_obj);
-
- const auto &input_to_cell_weights_index =
- node.getInputs().at(ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
- const auto &input_to_cell_weights_obj = _operands.at(input_to_cell_weights_index);
- registerCopyInitializer(input_to_cell_weights_index, input_to_cell_weights_obj);
-
- const auto &input_to_output_weights_index =
- node.getInputs().at(ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
- const auto &input_to_output_weights_obj = _operands.at(input_to_output_weights_index);
- registerCopyInitializer(input_to_output_weights_index, input_to_output_weights_obj);
-
- const auto &recurrent_to_input_weights_index =
- node.getInputs().at(ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
- const auto &recurrent_to_input_weights_obj = _operands.at(recurrent_to_input_weights_index);
- registerCopyInitializer(recurrent_to_input_weights_index, recurrent_to_input_weights_obj);
-
- const auto &recurrent_to_forget_weights_index =
- node.getInputs().at(ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
- const auto &recurrent_to_forget_weights_obj = _operands.at(recurrent_to_forget_weights_index);
- registerCopyInitializer(recurrent_to_forget_weights_index, recurrent_to_forget_weights_obj);
-
- const auto &recurrent_to_cell_weights_index =
- node.getInputs().at(ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
- const auto &recurrent_to_cell_weights_obj = _operands.at(recurrent_to_cell_weights_index);
- registerCopyInitializer(recurrent_to_cell_weights_index, recurrent_to_cell_weights_obj);
-
- const auto &recurrent_to_output_weights_index =
- node.getInputs().at(ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
- const auto &recurrent_to_output_weights_obj = _operands.at(recurrent_to_output_weights_index);
- registerCopyInitializer(recurrent_to_output_weights_index, recurrent_to_output_weights_obj);
-
- const auto &cell_to_input_weights_index =
- node.getInputs().at(ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
- const auto &cell_to_input_weights_obj = _operands.at(cell_to_input_weights_index);
- registerCopyInitializer(cell_to_input_weights_index, cell_to_input_weights_obj);
-
- const auto &cell_to_forget_weights_index =
- node.getInputs().at(ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
- const auto &cell_to_forget_weights_obj = _operands.at(cell_to_forget_weights_index);
- registerCopyInitializer(cell_to_forget_weights_index, cell_to_forget_weights_obj);
-
- const auto &cell_to_output_weights_index =
- node.getInputs().at(ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
- const auto &cell_to_output_weights_obj = _operands.at(cell_to_output_weights_index);
- registerCopyInitializer(cell_to_output_weights_index, cell_to_output_weights_obj);
-
- const auto &input_gate_bias_index = node.getInputs().at(ir::operation::LSTM::INPUT_GATE_BIAS);
- const auto &input_gate_bias_obj = _operands.at(input_gate_bias_index);
- registerCopyInitializer(input_gate_bias_index, input_gate_bias_obj);
-
- const auto &forget_gate_bias_index = node.getInputs().at(ir::operation::LSTM::FORGET_GATE_BIAS);
- const auto &forget_gate_bias_obj = _operands.at(forget_gate_bias_index);
- registerCopyInitializer(forget_gate_bias_index, forget_gate_bias_obj);
-
- const auto &output_gate_bias_index = node.getInputs().at(ir::operation::LSTM::OUTPUT_GATE_BIAS);
- const auto &output_gate_bias_obj = _operands.at(output_gate_bias_index);
- registerCopyInitializer(output_gate_bias_index, output_gate_bias_obj);
-
- const auto &projection_weights_index =
- node.getInputs().at(ir::operation::LSTM::PROJECTION_WEIGHTS);
- const auto &projection_weights_obj = _operands.at(projection_weights_index);
- registerCopyInitializer(projection_weights_index, projection_weights_obj);
-
- const auto &projection_bias_index = node.getInputs().at(ir::operation::LSTM::PROJECTION_BIAS);
- const auto &projection_bias_obj = _operands.at(projection_bias_index);
- registerCopyInitializer(projection_bias_index, projection_bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::RNN &node)
-{
- const auto &weights_index = node.getInputs().at(ir::operation::RNN::WEIGHTS);
- const auto &weights_obj = _operands.at(weights_index);
- registerCopyInitializer(weights_index, weights_obj);
-
- const auto &recurrent_weights_index = node.getInputs().at(ir::operation::RNN::RECURRENT_WEIGHTS);
- const auto &recurrent_weights_obj = _operands.at(recurrent_weights_index);
- registerCopyInitializer(recurrent_weights_index, recurrent_weights_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::RNN::BIAS);
- const auto &bias_obj = _operands.at(bias_index);
- registerCopyInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
-{
- const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE);
- const auto &block_size_obj = _operands.at(block_size_index);
-
- if (block_size_obj.isConstant())
- {
- _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::operand::ITensor &obj) {
- const auto &shape = model_obj.shape();
- const auto base = reinterpret_cast<const int32_t *>(model_obj.data().base());
- assert(model_obj.shape().rank() == 1);
- obj.access([&](::neurun::backend::operand::ITensor &tensor) {
- for (size_t i = 0; i < shape.num_elements(); ++i)
- {
- const int32_t value = base[shape.num_elements() - i - 1];
- int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
- tensor.calcOffset({static_cast<int32_t>(i)}));
- *into = value;
- }
- });
- };
- }
-
- const auto &paddings_index = node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS);
- const auto &paddings_obj = _operands.at(paddings_index);
- if (paddings_obj.isConstant())
- {
- _init_map[paddings_index] = [](const ir::Operand &model_obj, backend::operand::ITensor &obj) {
- const auto &shape = model_obj.shape();
- const auto base = reinterpret_cast<const int32_t *>(model_obj.data().base());
- assert(model_obj.shape().rank() == 2);
- assert(obj.dimension(0) == 2);
- obj.access([&](::neurun::backend::operand::ITensor &tensor) {
- for (auto i = 0; i < shape.dim(0); ++i)
- {
- for (auto j = 0; j < shape.dim(1); ++j)
- {
- const int32_t value = base[i * 2 + j];
- int32_t *into = reinterpret_cast<int32_t *>(
- tensor.buffer() + tensor.calcOffset({shape.dim(0) - i - 1, j}));
- *into = value;
- }
- }
- });
- };
- }
-}
-
-void ConstantInitializer::visit(const ir::operation::TransposeConv &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerPermuteInitializer(kernel_index, kernel_obj);
-}
-
-} // namespace acl_cl
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/acl_cl/ConstantInitializer.h b/runtime/neurun/backend/acl_cl/ConstantInitializer.h
deleted file mode 100644
index 5965d2046..000000000
--- a/runtime/neurun/backend/acl_cl/ConstantInitializer.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
-#define __NEURUN_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
-
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-#include "TensorBuilder.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_cl
-{
-
-class ConstantInitializer : public IConstantInitializer
-{
-public:
- ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
-
-public:
- void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::EmbeddingLookup &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::HashtableLookup &) override;
- void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::SpaceToBatchND &) override;
- void visit(const ir::operation::TransposeConv &) override;
-
-private:
- const ir::Operands &operands() const override { return _operands; }
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
-
-private:
- const ir::Operands &_operands;
- std::shared_ptr<TensorBuilder> _tensor_builder;
-};
-
-} // namespace acl_cl
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
diff --git a/runtime/neurun/backend/acl_cl/KernelGenerator.cc b/runtime/neurun/backend/acl_cl/KernelGenerator.cc
deleted file mode 100644
index bffb60b61..000000000
--- a/runtime/neurun/backend/acl_cl/KernelGenerator.cc
+++ /dev/null
@@ -1,2151 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "KernelGenerator.h"
-
-#include <arm_compute/runtime/CL/CLFunctions.h> // Include all ARM Compute CL functions
-#include <arm_compute/runtime/CL/CLFunctionsEx.h> // Include all ARM Compute EX CL functions
-
-#include <AclFunction.h>
-#include <Convert.h>
-#include <Swizzle.h>
-
-#include "ir/Index.h"
-#include "ir/DataType.h"
-#include "ir/InternalType.h"
-#include "compiler/IExecutionBuilder.h"
-#include "exec/NopFunction.h"
-#include "util/logging.h"
-#include "util/Utils.h"
-#include "util/Padding.h"
-
-using ::neurun::compiler::IExecutionBuilder;
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_cl
-{
-
-using ::neurun::backend::acl_common::asAclFunction;
-
-//
-// ActivationBuilder
-//
-class ActivationBuilder
-{
-public:
- explicit ActivationBuilder(IExecutionBuilder &builder) : _builder(builder)
- {
- // DO NOTHING
- }
-
-private:
- void appendReLU(::arm_compute::ICLTensor *ifm_alloc);
- void appendReLU1(::arm_compute::ICLTensor *ifm_alloc);
- void appendReLU6(::arm_compute::ICLTensor *ifm_alloc);
-
-public:
- void append(ir::Activation code, ::arm_compute::ICLTensor *ifm_alloc);
-
-private:
- IExecutionBuilder &_builder;
-};
-
-void ActivationBuilder::appendReLU(::arm_compute::ICLTensor *ifm_alloc)
-{
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(ifm_alloc, nullptr, act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _builder.append(std::move(acl_fn));
-}
-
-void ActivationBuilder::appendReLU1(::arm_compute::ICLTensor *ifm_alloc)
-{
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(ifm_alloc, nullptr, act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _builder.append(std::move(acl_fn));
-}
-
-void ActivationBuilder::appendReLU6(::arm_compute::ICLTensor *ifm_alloc)
-{
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(ifm_alloc, nullptr, act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _builder.append(std::move(acl_fn));
-}
-
-void ActivationBuilder::append(ir::Activation code, ::arm_compute::ICLTensor *ifm_alloc)
-{
- switch (code)
- {
- case ir::Activation::NONE:
- {
- // DO NOTHING
- break;
- }
- case ir::Activation::RELU:
- {
- appendReLU(ifm_alloc);
- break;
- }
- case ir::Activation::RELU1:
- {
- appendReLU1(ifm_alloc);
- break;
- }
- case ir::Activation::RELU6:
- {
- appendReLU6(ifm_alloc);
- break;
- }
- default:
- {
- throw std::runtime_error("Not supported, yet");
- }
- }
-}
-
-//
-// KernelGenerator
-//
-KernelGenerator::KernelGenerator(const ir::Operands &ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : _ctx(ctx), _tensor_builder(tensor_builder), _current_subg_layout(ir::Layout::UNKNOWN)
-{
- // DO NOTHING
-}
-
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
-{
- _current_subg_layout = op_seq.getLayout();
- for (const auto &e : op_seq.operations())
- {
- const auto &node = *(e.node);
- _tensor_builder->preVisit(node);
- node.accept(*this);
- _tensor_builder->postVisit(node);
- }
-}
-
-void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
- const auto block_size_index{
- node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto block_size_alloc = _tensor_builder->at(block_size_index).get();
-
- assert(_ctx.at(block_size_index).isConstant());
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBatchToSpaceLayer>();
-
- fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Cast &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- const auto input_sub_type = _ctx.at(ifm_index).typeInfo().type() == ir::DataType::BOOL8
- ? arm_compute::SubDataType::BOOL
- : arm_compute::SubDataType::NONE;
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLCast>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), input_sub_type);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Conv2D &node)
-{
- using ir::operation::Conv2D;
-
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
- const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
- const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
-
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
- const auto &ker_shape = _ctx.at(ker_index).shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
-
- const auto stride = node.param().stride;
- const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape,
- stride, ker_width, ker_height);
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
-
- const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
- const auto act_info = acl_common::asActivationLayerInfo(activation);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLConvolutionLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
- conv_info, ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
-{
- using ir::operation::DepthwiseConv2D;
-
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
- const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
- const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
-
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- // Kernel format is [1, kernel_height, kernel_width, depth_out].
- const auto &ker_shape = _ctx.at(ker_index).shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
-
- const auto stride = node.param().stride;
- const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape,
- stride, ker_width, ker_height);
- const auto multiplier = node.param().multiplier;
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
-
- const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
- const auto act_info = acl_common::asActivationLayerInfo(activation);
-
- if (ker_height == 3 && ker_width == 3)
- {
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer3x3>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
- ofm_alloc->handle(), conv_info, multiplier, act_info);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
- }
- else
- {
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
-
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
- ofm_alloc->handle(), conv_info, multiplier, act_info);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
- }
-}
-
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
-
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
-
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
- const auto stride = node.param().stride;
- const auto padding =
- neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
- const auto activation = node.param().activation;
-
- VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
- VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
- VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
- VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
- VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl;
- VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl;
- VERBOSE(MaxPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
- VERBOSE(MaxPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
- VERBOSE(MaxPool2D) << "PAD(T): " << padding.top << std::endl;
- VERBOSE(MaxPool2D) << "PAD(B): " << padding.bottom << std::endl;
- VERBOSE(MaxPool2D) << "PAD(L): " << padding.left << std::endl;
- VERBOSE(MaxPool2D) << "PAD(R): " << padding.right << std::endl;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX,
- ::arm_compute::Size2D{kw, kh},
- acl_common::asPadStrideInfo(padding, stride)};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append((std::move(acl_fn)));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
-
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
- const auto stride = node.param().stride;
- const auto padding =
- neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
- const auto activation = node.param().activation;
-
- VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
- VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
- VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
- VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
- VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl;
- VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl;
- VERBOSE(AvgPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
- VERBOSE(AvgPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
- VERBOSE(AvgPool2D) << "PAD(T): " << padding.top << std::endl;
- VERBOSE(AvgPool2D) << "PAD(B): " << padding.bottom << std::endl;
- VERBOSE(AvgPool2D) << "PAD(L): " << padding.left << std::endl;
- VERBOSE(AvgPool2D) << "PAD(R): " << padding.right << std::endl;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{
- ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{kw, kh},
- acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append((std::move(acl_fn)));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::Concat &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
-
- std::vector<ir::OperandIndex> input_indexes;
-
- for (const auto &input : node.getInputs())
- input_indexes.emplace_back(input);
-
- const auto axis = node.param().axis;
-
- // If tensor allocator allocate as subtensor
- bool canEliminate = true;
- for (auto &ifm_ind : input_indexes)
- {
- if (!_tensor_builder->isSubTensorOf(ofm_index, ifm_ind))
- {
- canEliminate = false;
- break;
- }
- }
- if (canEliminate)
- {
- // If concat eliminated, return a NOP IFunction
- _execution_builder->append(nnfw::cpp14::make_unique<exec::NopFunction>());
- return;
- }
-
- auto output_alloc = _tensor_builder->at(ofm_index).get();
- std::vector<::arm_compute::ICLTensor *> input_tensors;
- for (auto &ifm_ind : input_indexes)
- input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
-
- std::unique_ptr<::arm_compute::IFunction> fn;
- if (input_indexes.size() < 2)
- {
- auto l = nnfw::cpp14::make_unique<::arm_compute::CLCopy>();
- l->configure(input_tensors.at(0), output_alloc->handle());
- fn = std::move(l);
- }
- else
- {
- auto l = nnfw::cpp14::make_unique<::arm_compute::CLConcatenateLayer>();
- const auto rank = node.param().rank;
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = output_alloc->layout();
- const auto fixed_axis =
- acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
- l->configure(input_tensors, output_alloc->handle(), fixed_axis);
- fn = std::move(l);
- }
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::FullyConnected &node)
-{
- using ir::operation::FullyConnected;
-
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
- const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
- const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
-
- const auto input_rank = _ctx.at(input_index).shape().rank();
- // TODO Currently we are not handling where the case is that the input's rank is 3.
- // The handling should be added in the future.
- assert(input_rank != 3);
-
- const auto output_size =
- _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
- UNUSED_RELEASE(output_size);
- assert(_ctx.at(bias_index).shape().dim(0) == output_size);
- assert(_ctx.at(weight_index).shape().dim(0) == output_size);
- const auto batch_size =
- _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 2);
- const auto input_size =
- _ctx.at(weight_index).shape().dim(_ctx.at(weight_index).shape().rank() - 1);
-
- // Check for reshaping input's shape into rank-2
- bool needs_reshape = false;
- ir::Shape reshape(2);
- if (input_rank == 4)
- {
- const auto feature_size = _ctx.at(input_index).shape().num_elements();
-
- UNUSED_RELEASE(feature_size);
- assert(batch_size >= 0 && input_size >= 0);
- assert(feature_size == static_cast<uint64_t>(batch_size) * static_cast<uint64_t>(input_size));
-
- // for reshaping
- needs_reshape = true;
- reshape.dim(0) = batch_size; /* H */
- reshape.dim(1) = input_size; /* W */
- }
-
- const auto activation = node.param().activation;
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- const auto input_alloc = _tensor_builder->at(input_index).get();
- const auto weight_alloc = _tensor_builder->at(weight_index).get();
- const auto bias_alloc = _tensor_builder->at(bias_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto acl_layout = output_alloc->handle()->info()->data_layout();
-
- auto fn = nnfw::cpp14::make_unique<arm_compute::CLFullyConnectedReshapingLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(
- input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), output_alloc->handle(),
- needs_reshape,
- ::neurun::backend::acl_common::asTensorShape(
- reshape, frontend_layout, ::neurun::backend::acl_common::asRuntimeLayout(acl_layout)));
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, output_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPixelWiseMultiplication>();
-
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), 1.0, // scale
- arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::ReduceSum &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ReduceSum::Input::INPUT)};
- const auto &axes{node.param().axes};
- const auto keep_dims{node.param().keep_dims};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = input_alloc->layout();
-
- // Convert to ACL axes taking into account negative values and possible duplicates.
- std::set<std::uint32_t> acl_axes;
- const int input_rank = node.param().rank;
- for (int axis : axes)
- {
- if (axis < 0)
- axis += input_rank;
- acl_axes.insert(
- acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value());
- }
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), acl_axes, keep_dims,
- ::arm_compute::ReduceOperation::SUM);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Reshape &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- // NOTE This operation must not be changed the layout from frontend to backend
- // So, PermutationOperationPass makes layouts of frontend and backend the same.
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = output_alloc->layout();
- assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
- frontend_layout == backend_layout);
- UNUSED_RELEASE(frontend_layout);
- UNUSED_RELEASE(backend_layout);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReshapeLayer>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Squeeze &node)
-{
- // Squeeze is identical to reshape except that it has an optional dimensions input.
- // In addition, optional dims_index is ignored since output tensor already has squeezed shape
- // by freezer and toco
- // TODO Support multi-layout for frontend and backend
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
- const auto dims{node.param().dims};
- const auto ndim{node.param().ndim};
- (void)dims;
- (void)ndim;
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
- auto fn = nnfw::cpp14::make_unique<arm_compute::CLReshapeLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
- auto acl_fn = asAclFunction(std::move(fn));
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Tanh &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<arm_compute::CLActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Softmax &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
-
- const auto beta = node.param().beta;
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSoftmaxLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), beta);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Slice &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
- const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
- const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
-
- auto outputData_alloc = _tensor_builder->at(output_index).get();
- auto inputData_alloc = _tensor_builder->at(input_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = inputData_alloc->layout();
-
- // Set initializers for indices data such as order of inputData
- int input_rank = node.param().rank;
- std::vector<int32_t> starts;
- std::vector<int32_t> ends;
- starts.resize(input_rank, 0);
- ends.resize(input_rank, 0);
- {
- auto beginData_base = _ctx.at(begins_index).data().base();
- auto sizeData_base = _ctx.at(sizes_index).data().base();
- const int beginData_size = _ctx.at(begins_index).shape().num_elements();
- const int sizeData_size = _ctx.at(sizes_index).shape().num_elements();
-
- using ir::DataType;
-
- UNUSED_RELEASE(beginData_size);
- UNUSED_RELEASE(sizeData_size);
-
- assert(_ctx.at(begins_index).typeInfo().type() == DataType::INT32);
- assert(_ctx.at(sizes_index).typeInfo().type() == DataType::INT32);
- assert(beginData_size == input_rank);
- assert(sizeData_size == input_rank);
-
- assert(beginData_base != nullptr);
- for (int n = 0; n < input_rank; ++n)
- {
- auto axis = ::neurun::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
- backend_layout)
- .value();
-
- int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n);
- starts[axis] = begin_value;
-
- int32_t size_value = *(reinterpret_cast<const int32_t *>(sizeData_base) + n);
- ends[axis] = begin_value + size_value;
- }
- }
-
- ::arm_compute::Coordinates starts_set;
- ::arm_compute::Coordinates ends_set;
-
- for (size_t i = 0; i < starts.size(); ++i)
- {
- starts_set.set(i, starts[i]);
- ends_set.set(i, ends[i]);
- }
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSlice>();
-
- fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::StridedSlice &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
- const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
- const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
- const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
-
- auto outputData_alloc = _tensor_builder->at(output_index).get();
- auto inputData_alloc = _tensor_builder->at(input_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = inputData_alloc->layout();
-
- // Set initializers for indices data such as order of inputData
- int input_rank = node.param().rank;
- std::vector<int32_t> starts;
- std::vector<int32_t> ends;
- std::vector<int32_t> strides;
- starts.resize(input_rank, 0);
- ends.resize(input_rank, 0);
- strides.resize(input_rank, 0);
- {
- auto startData_base = _ctx.at(starts_index).data().base();
- auto endData_base = _ctx.at(ends_index).data().base();
- auto stridesData_base = _ctx.at(strides_index).data().base();
- const int startData_size = _ctx.at(starts_index).shape().num_elements();
- const int endData_size = _ctx.at(ends_index).shape().num_elements();
- const int stridesData_size = _ctx.at(strides_index).shape().num_elements();
-
- using ir::DataType;
-
- UNUSED_RELEASE(startData_size);
- UNUSED_RELEASE(endData_size);
- UNUSED_RELEASE(stridesData_size);
-
- assert(_ctx.at(starts_index).typeInfo().type() == DataType::INT32);
- assert(_ctx.at(ends_index).typeInfo().type() == DataType::INT32);
- assert(_ctx.at(strides_index).typeInfo().type() == DataType::INT32);
- assert(startData_size == input_rank);
- assert(endData_size == input_rank);
- assert(stridesData_size == input_rank);
-
- assert(startData_base != nullptr);
- for (int n = 0; n < input_rank; ++n)
- {
- auto axis = ::neurun::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
- backend_layout)
- .value();
-
- int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n);
- starts[axis] = start_value;
-
- int32_t end_value = *(reinterpret_cast<const int32_t *>(endData_base) + n);
- ends[axis] = end_value;
-
- int32_t strides_value = *(reinterpret_cast<const int32_t *>(stridesData_base) + n);
- strides[axis] = strides_value;
- }
- }
-
- // Set mask bits such as order of inputData
- const auto begin_mask = acl_common::ReorderBits<int32_t>(node.param().begin_mask, input_rank,
- frontend_layout, backend_layout);
- const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank,
- frontend_layout, backend_layout);
- const auto shrink_axis_mask = acl_common::ReorderBits<int32_t>(
- node.param().shrink_axis_mask, input_rank, frontend_layout, backend_layout);
-
- ::arm_compute::Coordinates starts_set;
- ::arm_compute::Coordinates ends_set;
- ::arm_compute::BiStrides strides_set;
-
- for (size_t i = 0; i < starts.size(); ++i)
- {
- starts_set.set(i, starts[i]);
- ends_set.set(i, ends[i]);
- strides_set.set(i, strides[i]);
- }
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLStridedSlice>();
-
- fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set,
- strides_set, begin_mask, end_mask, shrink_axis_mask);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Transpose &node)
-{
- const auto ofm_idx{node.getOutputs().at(0)};
- const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
- const auto &perm{node.param().perm};
-
- const auto rank = node.param().rank;
-
- auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
- auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = ifm_alloc->layout();
-
- std::vector<std::int32_t> pv(perm.cbegin(), perm.cend());
- // Reversed
- auto backend_pv = ::neurun::backend::acl_common::getARMComputePermutationVector(
- rank, pv, frontend_layout, backend_layout);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPermute>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), backend_pv);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Add &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticAddition>();
-
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
- arm_compute::ConvertPolicy::SATURATE);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::Sub &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticSubtraction>();
-
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
- arm_compute::ConvertPolicy::SATURATE);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::Div &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticDivision>();
-
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::Exp &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLExpLayer>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
- const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
- const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto gamma_alloc = _tensor_builder->at(gamma_index).get();
- auto beta_alloc = _tensor_builder->at(beta_index).get();
- auto epsilon = node.param().epsilon;
- auto activation = node.param().activation;
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLInstanceNormalizationLayerEx>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), gamma_alloc->handle(),
- beta_alloc->handle(), epsilon);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::Logistic &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBinaryLogicalOp>();
-
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(),
- ::arm_compute::BinaryLogicalOperation::AND);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::LSTM &node)
-{
- // TODO Support dynamic rnn
- // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
- const auto scratch_buffer_index{
- node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
- const auto output_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
- const auto cell_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
- const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
-
- const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
- const auto input_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
- const auto input_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
- const auto input_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
- const auto input_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
- const auto recurrent_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
- const auto recurrent_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
- const auto recurrent_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
- const auto recurrent_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
- const auto cell_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
- const auto cell_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
- const auto cell_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
- const auto input_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
- const auto forget_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
- const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
- const auto output_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
- const auto projection_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
- const auto projection_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
- const auto output_state_in_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
- const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
- const auto cell_threshold = node.param().cell_threshold;
- const auto projection_threshold = node.param().projection_threshold;
-
- bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) != 0;
- bool has_recurrent_to_input_weights =
- _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
- bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
- bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
- bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 &&
- _ctx.at(projection_weights_index).shape().dim(1) != 0;
- bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0);
-
- // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
- // true: no CIFG
- // false: CIFG
- // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
- bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
-
- // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
- // But the cell_to_input_weights does not exist in regular CIFG although peephole.
- // true: peephole
- // false: no peephole
- bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
-
- // NOTE Although the projection weights has data the projection bias may not have data.
- bool has_projection_param = has_projection_weights;
-
- const auto activation = node.param().activation;
- const auto cell_clip = cell_threshold;
- const auto projection_clip = projection_threshold;
- assert(cell_clip >= 0.f && projection_clip >= 0.f);
-
- auto scratch_buffer_alloc = _tensor_builder->at(scratch_buffer_index).get();
- auto output_state_out_alloc = _tensor_builder->at(output_state_out_index).get();
- auto cell_state_out_alloc = _tensor_builder->at(cell_state_out_index).get();
- auto output_alloc = _tensor_builder->at(output_index).get();
-
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto input_to_forget_weights_alloc = _tensor_builder->at(input_to_forget_weights_index).get();
- auto input_to_cell_weights_alloc = _tensor_builder->at(input_to_cell_weights_index).get();
- auto input_to_output_weights_alloc = _tensor_builder->at(input_to_output_weights_index).get();
- auto recurrent_to_forget_weights_alloc =
- _tensor_builder->at(recurrent_to_forget_weights_index).get();
- auto recurrent_to_cell_weights_alloc = _tensor_builder->at(recurrent_to_cell_weights_index).get();
- auto recurrent_to_output_weights_alloc =
- _tensor_builder->at(recurrent_to_output_weights_index).get();
-
- auto forget_gate_bias_alloc = _tensor_builder->at(forget_gate_bias_index).get();
- auto cell_bias_alloc = _tensor_builder->at(cell_bias_index).get();
- auto output_gate_bias_alloc = _tensor_builder->at(output_gate_bias_index).get();
- auto output_state_in_alloc = _tensor_builder->at(output_state_in_index).get();
- auto cell_state_in_alloc = _tensor_builder->at(cell_state_in_index).get();
-
- auto act_info = ::neurun::backend::acl_common::asActivationLayerInfo(activation);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLLSTMLayer>();
-
- ::arm_compute::LSTMParams<::arm_compute::ICLTensor> lstm_params{};
- if (has_cifg_param)
- {
- auto input_to_input_weights_alloc =
- _tensor_builder->at(input_to_input_weights_index).get(); // optional
- auto recurrent_to_input_weights_alloc =
- _tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
- auto cell_to_input_weights_handle =
- has_peephole_param ? _tensor_builder->at(cell_to_input_weights_index).get()->handle()
- : nullptr; // optional (non-cifg && peephole)
- auto input_gate_bias_alloc = _tensor_builder->at(input_gate_bias_index).get(); // optional
- lstm_params.set_cifg_params(input_to_input_weights_alloc->handle(),
- recurrent_to_input_weights_alloc->handle(),
- cell_to_input_weights_handle, input_gate_bias_alloc->handle());
- }
- if (has_peephole_param)
- {
- auto cell_to_forget_weights_alloc =
- _tensor_builder->at(cell_to_forget_weights_index).get(); // optional
- auto cell_to_output_weights_alloc =
- _tensor_builder->at(cell_to_output_weights_index).get(); // optional
- lstm_params.set_peephole_params(cell_to_forget_weights_alloc->handle(),
- cell_to_output_weights_alloc->handle());
- }
- if (has_projection_param)
- {
- auto projection_weights_alloc = _tensor_builder->at(projection_weights_index).get(); // optional
- auto projection_bias_handle = has_projection_bias
- ? _tensor_builder->at(projection_bias_index).get()->handle()
- : nullptr; // optional
- lstm_params.set_projection_params(projection_weights_alloc->handle(), projection_bias_handle);
- }
-
- fn->configure(
- input_alloc->handle(), input_to_forget_weights_alloc->handle(),
- input_to_cell_weights_alloc->handle(), input_to_output_weights_alloc->handle(),
- recurrent_to_forget_weights_alloc->handle(), recurrent_to_cell_weights_alloc->handle(),
- recurrent_to_output_weights_alloc->handle(), forget_gate_bias_alloc->handle(),
- cell_bias_alloc->handle(), output_gate_bias_alloc->handle(), output_state_in_alloc->handle(),
- cell_state_in_alloc->handle(), scratch_buffer_alloc->handle(),
- output_state_out_alloc->handle(), cell_state_out_alloc->handle(), output_alloc->handle(),
- lstm_params, act_info, cell_clip, projection_clip);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReduceMax &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ReduceMax::Input::INPUT)};
- const auto &axes{node.param().axes};
- const auto keep_dims{node.param().keep_dims};
-
- auto ofm_alloc = _tensor_builder->at(output_index).get();
- auto ifm_alloc = _tensor_builder->at(input_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = ifm_alloc->layout();
-
- // Convert to ACL axes taking into account negative values and possible duplicates.
- std::set<std::uint32_t> acl_axes;
- const int ifm_rank = node.param().rank;
- for (int axis : axes)
- {
- if (axis < 0)
- axis += ifm_rank;
- acl_axes.insert(
- acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value());
- }
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), acl_axes, keep_dims,
- arm_compute::ReduceOperation::MAX);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Comparison &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
-
- const auto comparison_type = node.param().comparison_type;
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLComparison>();
-
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(),
- (arm_compute::ComparisonOperation)comparison_type);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Pack &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- auto axis{node.param().axis};
-
- const auto output_rank = node.param().rank;
-
- std::vector<ir::OperandIndex> input_indexes;
- for (const auto &input_index : node.getInputs())
- input_indexes.emplace_back(input_index);
-
- auto output = _tensor_builder->at(output_index).get()->handle();
- std::vector<arm_compute::ICLTensor *> inputs;
- for (const auto &input_index : input_indexes)
- inputs.emplace_back(_tensor_builder->at(input_index)->handle());
-
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
-
- if (axis < 0)
- axis += output_rank;
- axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLStackLayer>();
-
- fn->configure(inputs, axis, output);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::Permute &node)
-{
- const auto ofm_idx{node.getOutputs().at(0)};
- const auto ifm_idx{node.getInputs().at(0)};
- const auto permute_type = node.getPermuteType();
- auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
- auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
- const auto rank = _ctx.at(ofm_idx).shape().rank();
- assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
-
- std::unique_ptr<::arm_compute::IFunction> fn;
- arm_compute::PermutationVector pv;
- if (permute_type == ir::operation::Permute::Type::NCHW_TO_NHWC && rank == 4)
- {
- // WHCN -> CWHN
- pv = arm_compute::PermutationVector{2, 0, 1};
-
- auto l = nnfw::cpp14::make_unique<::arm_compute::CLPermute>();
-
- l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
-
- fn = std::move(l);
- }
- else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
- {
- // CWHN -> WHCN
- pv = arm_compute::PermutationVector{1, 2, 0};
-
- auto l = nnfw::cpp14::make_unique<::arm_compute::CLPermute>();
-
- l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
-
- fn = std::move(l);
- }
- else
- {
- auto l = nnfw::cpp14::make_unique<::arm_compute::CLCopy>();
-
- l->configure(ifm_alloc->handle(), ofm_alloc->handle());
-
- fn = std::move(l);
- }
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLRsqrtLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
-
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<arm_compute::CLActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
-
- const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLScale>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(),
- ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
- ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU1 &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::RNN &node)
-{
- const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
- const auto hidden_state_out_index{
- node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
-
- const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
- const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
- const auto recurrent_weights_index{
- node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
- const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
- const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
-
- const auto activation = node.param().activation;
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto hidden_state_out_alloc = _tensor_builder->at(hidden_state_out_index).get();
-
- auto input_alloc = _tensor_builder->at(input_index).get();
- auto weights_alloc = _tensor_builder->at(weights_index).get();
- auto recurrent_weights_alloc = _tensor_builder->at(recurrent_weights_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
- auto hidden_state_in_alloc = _tensor_builder->at(hidden_state_in_index).get();
- auto act_info = ::neurun::backend::acl_common::asActivationLayerInfo(activation);
-
- auto copy_layer = nnfw::cpp14::make_unique<::arm_compute::CLCopy>();
- copy_layer->configure(hidden_state_in_alloc->handle(), hidden_state_out_alloc->handle());
- _execution_builder->append(asAclFunction(std::move(copy_layer)));
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLRNNLayerEx>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(input_alloc->handle(), weights_alloc->handle(), recurrent_weights_alloc->handle(),
- bias_alloc->handle(), hidden_state_out_alloc->handle(), output_alloc->handle(),
- act_info);
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::Floor &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLFloor>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
- const auto block_size_index{
- node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
- const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto block_size_alloc = _tensor_builder->at(block_size_index).get();
- auto paddings_alloc = _tensor_builder->at(paddings_index).get();
-
- assert(_ctx.at(block_size_index).isConstant());
- assert(_ctx.at(paddings_index).isConstant());
-
- std::unique_ptr<::arm_compute::IFunction> fn;
- if (_ctx.at(ofm_index).typeInfo().type() == ir::DataType::QUANT8_ASYMM)
- {
- // NOTE CLSpaceToBatchLayer has a bug that padding's values are 0 even when zero point of
- // QASYMM8 is not 0.
- auto l = nnfw::cpp14::make_unique<::arm_compute::CLSpaceToBatchND>();
- l->configure(ifm_alloc->handle(), block_size_alloc->handle(), paddings_alloc->handle(),
- ofm_alloc->handle());
- fn = std::move(l);
- }
- else
- {
- auto l = nnfw::cpp14::make_unique<::arm_compute::CLSpaceToBatchLayer>();
- l->configure(ifm_alloc->handle(), block_size_alloc->handle(), paddings_alloc->handle(),
- ofm_alloc->handle());
- fn = std::move(l);
- }
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
-
- auto block_size = node.param().block_size;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSpaceToDepth>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), block_size);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::L2Pool2D::Input::INPUT)};
-
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
-
- uint32_t kw = node.param().kw;
- uint32_t kh = node.param().kh;
- const auto stride = node.param().stride;
- const auto padding =
- neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{
- ::arm_compute::PoolingType::L2, ::arm_compute::Size2D{kw, kh},
- ::neurun::backend::acl_common::asPadStrideInfo(padding, stride)};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
- const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto lookups_alloc = _tensor_builder->at(lookups_index).get();
- auto values_alloc = _tensor_builder->at(values_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLEmbeddingLookup>();
-
- fn->configure(values_alloc->handle(), output_alloc->handle(), lookups_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::L2Normalization &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
-
- // {CL|Neon}L2Normalization performs the reduction only along dimension 0
- // L2 Normalization always performs the reduction along the depth axis
- // Thus, we repurpose {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by
- // choosing normalization parameters as below
-
- const auto &ifm_shape = _ctx.at(ifm_index).shape();
- // TODO Support optional constant dimension that normalization would be performed on
- const auto normalization_axis = node.param().rank - 1;
- int32_t radius =
- 2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
- float alpha = 1.0f; // In the implementation to make alpha_ become 1
- float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
- float bias = 0.0f; // Don't offset the reduction.
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
- radius, alpha, beta, bias, false);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNormalizationLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
-{
- const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
- const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)};
-
- const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
- const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
- const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto hits_alloc = _tensor_builder->at(hits_index).get();
-
- auto lookups_alloc = _tensor_builder->at(lookups_index).get();
- auto keys_alloc = _tensor_builder->at(keys_index).get();
- auto values_alloc = _tensor_builder->at(values_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLHashtableLookup>();
-
- fn->configure(lookups_alloc->handle(), keys_alloc->handle(), values_alloc->handle(),
- output_alloc->handle(), hits_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::PReLU &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
- const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto alpha_alloc = _tensor_builder->at(alpha_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPReLU>();
-
- fn->configure(ifm_alloc->handle(), alpha_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::TransposeConv &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto output_shape_index{
- node.getInputs().at(ir::operation::TransposeConv::Input::OUTPUT_SHAPE)};
- const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
- const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
-
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
- const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_subg_layout);
-
- const auto stride = node.param().stride;
-
- assert((node.param().padding.type == ir::PaddingType::SAME) ||
- (node.param().padding.type == ir::PaddingType::VALID));
- auto padding = neurun::util::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride,
- ker_shape.W, ker_shape.H);
-
- uint32_t invalid_horizontal = 0;
- uint32_t invalid_vertical = 0;
- if (node.param().padding.type == ir::PaddingType::VALID)
- {
- invalid_horizontal =
- ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
- invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
- }
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
-
- const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLTransposeConvLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info,
- invalid_horizontal, invalid_vertical);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::SQRT &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBitwiseOr>();
-
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBitwiseNot>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLElementwiseSquaredDiff>();
-
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::TopKV2 &node)
-{
- const auto outputValues_index{node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_VALUES)};
- const auto outputIndices_index{
- node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_INDICES)};
-
- const auto inputData_index{node.getInputs().at(ir::operation::TopKV2::Input::INPUT)};
-
- // Currently, we only support the vector input.
- assert(_ctx.at(inputData_index).shape().rank() == 1 ||
- _ctx.at(inputData_index).shape().rank() == 2);
-
- const auto k = node.param().k;
-
- auto values_alloc = _tensor_builder->at(outputValues_index).get();
- auto indices_alloc = _tensor_builder->at(outputIndices_index).get();
- auto input_alloc = _tensor_builder->at(inputData_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLTopKV2>();
-
- fn->configure(input_alloc->handle(), k, values_alloc->handle(), indices_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Gather &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
-
- const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
- const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
-
- const auto ifm_shape = _ctx.at(ifm_index).shape();
-
- const auto ifm_rank = node.param().rank;
- const auto axis_raw = node.param().axis;
- const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
- const int axis = ::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto indices_alloc = _tensor_builder->at(indices_index).get();
-
- // NOTE The frontend layout and backend layout must be the same for this operation.
- // If not the same, we have to add a stage(?) to perform permutation of output tensor. It
- // is not not efficient even if it works well. If so, it would be better to set the
- // layout of these backend tensors to the same layout.
- // There is also one thing we have to think about. This operation depends on the layout of
- // a model. For example, if a model in NHWC has this operation as output rank == 4, indices
- // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
- // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
- const auto backend_layout = ofm_alloc->layout();
- UNUSED_RELEASE(backend_layout);
- assert(backend_layout == ifm_alloc->layout());
- assert(backend_layout == indices_alloc->layout());
- assert(ifm_rank < 4 || _current_subg_layout == backend_layout);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLGatherEx>();
-
- fn->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), axis);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNeg>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
-
- auto ifm_shape = _ctx.at(ifm_index).shape();
- auto ofm_shape = _ctx.at(ofm_index).shape();
-
- assert((ifm_shape.rank() - 1) == ofm_shape.rank());
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- const auto ifm_rank = node.param().rank;
- auto frontend_layout = _current_subg_layout;
- auto backend_layout = ifm_alloc->layout();
-
- int axis_value = node.param().axis;
- if (axis_value < 0)
- {
- axis_value += ifm_rank;
- }
-
- auto acl_axis =
- acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArgOperation>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), {acl_axis},
- ::arm_compute::ArgOperation::MAX);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Dequantize &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLCast>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), arm_compute::SubDataType::NONE);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Mean &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Mean::Input::INPUT)};
- const auto &axes{node.param().axes};
- const auto keep_dims{node.param().keep_dims};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = ifm_alloc->layout();
-
- // Convert to ACL axes taking into account negative values and possible duplicates.
- std::set<std::uint32_t> acl_axes;
- const int ifm_rank = node.param().rank;
- for (int axis : axes)
- {
- if (axis < 0)
- axis += ifm_rank;
- acl_axes.insert(
- acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value());
- }
-
- arm_compute::Coordinates reduce_axes;
- for (const auto axis : acl_axes)
- {
- reduce_axes.set(reduce_axes.num_dimensions(), axis);
- }
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceMean>();
-
- fn->configure(ifm_alloc->handle(), reduce_axes, keep_dims, ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{
- node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
-
- auto radius = node.param().radius;
- auto alpha = node.param().alpha;
- auto beta = node.param().beta;
- auto bias = node.param().bias;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- const auto norm_info = ::arm_compute::NormalizationLayerInfo(
- ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNormalizationLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
-
- auto block_size = node.param().block_size;
- assert(block_size > 0);
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthToSpace>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), block_size);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReduceMin &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReduceMin::Input::INPUT)};
- const auto &axes{node.param().axes};
- const auto keep_dims{node.param().keep_dims};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = ifm_alloc->layout();
-
- // Convert to ACL axes taking into account negative values and possible duplicates.
- std::set<std::uint32_t> acl_axes;
- const int ifm_rank = node.param().rank;
- for (int axis : axes)
- {
- if (axis < 0)
- axis += ifm_rank;
- acl_axes.insert(
- acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value());
- }
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), acl_axes, keep_dims,
- ::arm_compute::ReduceOperation::MIN);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Split &node)
-{
- const auto ifm_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
-
- assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
-
- const auto ifm_rank = node.param().rank;
- std::vector<ir::OperandIndex> output_indexes;
- for (const auto &output : node.getOutputs())
- output_indexes.emplace_back(output);
-
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- std::vector<arm_compute::ICLTensor *> output_allocs;
- for (const auto &ofm_ind : output_indexes)
- output_allocs.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
-
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = ifm_alloc->layout();
- auto axis = node.param().axis;
- if (axis < 0)
- axis += ifm_rank;
- axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSplit>();
-
- fn->configure(ifm_alloc->handle(), output_allocs, axis);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::Unpack &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
- auto axis{node.param().axis};
-
- const auto input_rank = node.param().rank;
-
- std::vector<ir::OperandIndex> output_indexes;
- for (const auto &output_index : node.getOutputs())
- output_indexes.emplace_back(output_index);
-
- auto input = _tensor_builder->at(input_index).get()->handle();
- std::vector<arm_compute::ICLTensor *> outputs;
- for (const auto &output_index : output_indexes)
- outputs.emplace_back(_tensor_builder->at(output_index)->handle());
-
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
- if (axis < 0)
- axis += input_rank;
- axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLUnstack>();
-
- fn->configure(input, outputs, axis);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::Pad &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
- const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
- const auto output_index{node.getOutputs().at(0)};
- assert(_ctx.at(pad_index).isConstant());
-
- auto rank = node.param().rank;
- auto pad_base = _ctx.at(pad_index).data().base();
-
- auto input_type = _ctx.at(input_index).typeInfo();
- auto data_type = acl_common::asDataType(input_type.type());
- auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset());
- const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info);
-
- auto input = _tensor_builder->at(input_index).get()->handle();
- auto output = _tensor_builder->at(output_index).get()->handle();
-
- ::arm_compute::PaddingList padding_list;
- padding_list.resize(rank);
- for (int32_t n = 0; n < rank; ++n)
- {
- const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
-
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
- const auto axis =
- acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
- padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
- }
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPadLayer>();
- fn->configure(input, output, padding_list, pixel_value);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLElementwiseMin>();
-
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLElementwiseMax>();
-
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-} // namespace acl_cl
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/acl_cl/KernelGenerator.h b/runtime/neurun/backend/acl_cl/KernelGenerator.h
deleted file mode 100644
index a577f1ebc..000000000
--- a/runtime/neurun/backend/acl_cl/KernelGenerator.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
-#define __NEURUN_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
-
-#include <backend/IKernelGenerator.h>
-
-#include "ir/Operands.h"
-#include "TensorBuilder.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_cl
-{
-
-class KernelGenerator : public IKernelGenerator
-{
-public:
- KernelGenerator(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder);
-
- void visit(const ir::OpSequence &) override;
- void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::MaxPool2D &) override;
- void visit(const ir::operation::AvgPool2D &) override;
- void visit(const ir::operation::Concat &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Mul &) override;
- void visit(const ir::operation::ReduceSum &) override;
- void visit(const ir::operation::Reshape &) override;
- void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Tanh &) override;
- void visit(const ir::operation::Softmax &) override;
- void visit(const ir::operation::Slice &) override;
- void visit(const ir::operation::StridedSlice &) override;
- void visit(const ir::operation::Transpose &) override;
- void visit(const ir::operation::Add &) override;
- void visit(const ir::operation::Sub &) override;
- void visit(const ir::operation::Cast &) override;
- void visit(const ir::operation::Div &) override;
- void visit(const ir::operation::Exp &) override;
- void visit(const ir::operation::InstanceNorm &) override;
- void visit(const ir::operation::Logistic &) override;
- void visit(const ir::operation::ReduceMax &) override;
- void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::LogicalAnd &) override;
- void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::Pack &) override;
- void visit(const ir::operation::Permute &) override;
- void visit(const ir::operation::RSQRT &) override;
- void visit(const ir::operation::ReLU &) override;
- void visit(const ir::operation::ResizeBilinear &) override;
- void visit(const ir::operation::ReLU1 &) override;
- void visit(const ir::operation::ReLU6 &) override;
- void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::Floor &) override;
- void visit(const ir::operation::SpaceToBatchND &) override;
- void visit(const ir::operation::SpaceToDepth &) override;
- void visit(const ir::operation::L2Pool2D &) override;
- void visit(const ir::operation::EmbeddingLookup &) override;
- void visit(const ir::operation::L2Normalization &) override;
- void visit(const ir::operation::HashtableLookup &) override;
- void visit(const ir::operation::PReLU &) override;
- void visit(const ir::operation::TransposeConv &) override;
- void visit(const ir::operation::SQRT &) override;
- void visit(const ir::operation::LogicalOr &) override;
- void visit(const ir::operation::LogicalNot &) override;
- void visit(const ir::operation::SquaredDifference &) override;
- void visit(const ir::operation::TopKV2 &) override;
- void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::Neg &) override;
- void visit(const ir::operation::Abs &) override;
- void visit(const ir::operation::ArgMax &) override;
- void visit(const ir::operation::Dequantize &) override;
- void visit(const ir::operation::Mean &) override;
- void visit(const ir::operation::LocalResponseNormalization &) override;
- void visit(const ir::operation::DepthToSpace &) override;
- void visit(const ir::operation::ReduceMin &) override;
- void visit(const ir::operation::Split &) override;
- void visit(const ir::operation::Unpack &) override;
- void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::Min &) override;
- void visit(const ir::operation::Max &) override;
-
-private:
- const ir::Operands &_ctx;
- std::shared_ptr<TensorBuilder> _tensor_builder;
- ir::Layout _current_subg_layout;
-};
-
-} // namespace acl_cl
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
diff --git a/runtime/neurun/backend/acl_cl/PluginClassesAllocator.cc b/runtime/neurun/backend/acl_cl/PluginClassesAllocator.cc
deleted file mode 100644
index ac3f0acff..000000000
--- a/runtime/neurun/backend/acl_cl/PluginClassesAllocator.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <util/logging.h>
-
-#include "Backend.h"
-
-extern "C" {
-neurun::backend::Backend *neurun_backend_create()
-{
- VERBOSE(neurun_backend_create) << "'acl_cl' loaded\n";
- return new neurun::backend::acl_cl::Backend;
-}
-
-void neurun_backend_destroy(neurun::backend::Backend *backend)
-{
- VERBOSE(neurun_backend_create) << "'acl_cl' unloaded\n";
- delete backend;
-}
-}
diff --git a/runtime/neurun/backend/acl_cl/ShapeFixer.cc b/runtime/neurun/backend/acl_cl/ShapeFixer.cc
deleted file mode 100644
index e1cbeab6c..000000000
--- a/runtime/neurun/backend/acl_cl/ShapeFixer.cc
+++ /dev/null
@@ -1,434 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ShapeFixer.h"
-
-#include <arm_compute/runtime/CL/CLFunctions.h> // Include all ARM Compute CL functions
-#include <arm_compute/runtime/CL/CLFunctionsEx.h> // Include all ARM Compute EX CL functions
-
-#include <AclFunction.h>
-#include <Convert.h>
-#include <Swizzle.h>
-
-#include "ir/Index.h"
-#include "compiler/IExecutionBuilder.h"
-#include "exec/NopFunction.h"
-#include "util/logging.h"
-#include "util/Utils.h"
-#include "util/Padding.h"
-
-using ::neurun::compiler::IExecutionBuilder;
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_cl
-{
-
-using ::neurun::backend::acl_common::asAclFunction;
-
-ShapeFixer::ShapeFixer(const ir::Operands &ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : _ctx(ctx), _tensor_builder(tensor_builder)
-{
- assert(tensor_builder);
-}
-
-void ShapeFixer::visit(const ir::operation::BatchToSpaceND &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
- _tensor_builder->dimCorrection(ofm_index, false);
- _tensor_builder->dimCorrection(ifm_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::Cast &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Conv2D &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::DepthwiseConv2D &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::MaxPool2D &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::AvgPool2D &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Concat &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- _tensor_builder->dimCorrection(ofm_index, false);
- for (const auto &input : node.getInputs())
- _tensor_builder->dimCorrection(input, false);
-}
-
-void ShapeFixer::visit(const ir::operation::FullyConnected &node)
-{
- using ir::operation::FullyConnected;
- const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
- const auto input_rank = _ctx.at(input_index).shape().rank();
- if (input_rank == 4)
- _tensor_builder->dimCorrection(input_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::Mul &node)
-{
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
-
- // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
- // a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::ReduceSum &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Reshape &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- _tensor_builder->dimCorrection(input_index, false);
- _tensor_builder->dimCorrection(output_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::Squeeze &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).shape().rank() == 0)
- const_cast<ir::Shape &>(_ctx.at(output_index).shape()).extendRank(1);
- const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
- _tensor_builder->dimCorrection(input_index, false);
- _tensor_builder->dimCorrection(output_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::Tanh &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Softmax &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Slice &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::StridedSlice &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
- _tensor_builder->dimCorrection(ofm_index, false);
- _tensor_builder->dimCorrection(ifm_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::Transpose &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Add &node)
-{
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
- if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
- const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::Sub &node)
-{
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
- if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
-
- // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
- // a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::Div &node)
-{
- const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
- if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
-
- // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
- // a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::Exp &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::InstanceNorm &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Logistic &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::LogicalAnd &node)
-{
- const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
-
- if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank());
-
- // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
- // a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::LSTM &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::ReduceMax &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Comparison &node)
-{
- const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
-
- if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank());
-
- // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
- // a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::Pack &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- _tensor_builder->dimCorrection(ofm_index, false);
- for (const auto &inputs : node.getInputs())
- {
- _tensor_builder->dimCorrection(inputs, false);
- const auto ofm_rank = _ctx.at(ofm_index).shape().rank();
-
- // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
- // a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(_ctx.at(inputs).shape()).extendRank(ofm_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::Permute &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::RSQRT &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::ReLU &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::ResizeBilinear &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::ReLU1 &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::ReLU6 &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::RNN &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Floor &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::SpaceToBatchND &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
- _tensor_builder->dimCorrection(ofm_index, false);
- _tensor_builder->dimCorrection(ifm_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::SpaceToDepth &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
- _tensor_builder->dimCorrection(ofm_index, false);
- _tensor_builder->dimCorrection(ifm_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::L2Pool2D &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::EmbeddingLookup &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
- _tensor_builder->dimCorrection(values_index, false);
- _tensor_builder->dimCorrection(output_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::L2Normalization &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::HashtableLookup &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::PReLU &node)
-{
- const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
- const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
-
- if (!(_ctx.at(ifm_index).shape() == _ctx.at(alpha_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(ifm_index).shape().rank(), _ctx.at(alpha_index).shape().rank());
- const_cast<ir::Shape &>(_ctx.at(ifm_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(alpha_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::TransposeConv &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::SQRT &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::LogicalOr &node)
-{
- const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
-
- if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank());
- const_cast<ir::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::LogicalNot &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::SquaredDifference &node)
-{
- const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
-
- if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
- const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::TopKV2 &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Gather &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
- const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
- _tensor_builder->dimCorrection(ofm_index, false);
- _tensor_builder->dimCorrection(ifm_index, false);
- _tensor_builder->dimCorrection(indices_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::Neg &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Abs &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::ArgMax &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
- _tensor_builder->dimCorrection(ofm_index, false);
- _tensor_builder->dimCorrection(ifm_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::Dequantize &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Mean &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::LocalResponseNormalization &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::DepthToSpace &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::ReduceMin &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Split &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
- _tensor_builder->dimCorrection(input_index, false);
- for (const auto &output : node.getOutputs())
- _tensor_builder->dimCorrection(output, false);
-}
-
-void ShapeFixer::visit(const ir::operation::Unpack &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
- _tensor_builder->dimCorrection(input_index, false);
- for (const auto &output_index : node.getOutputs())
- _tensor_builder->dimCorrection(output_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::Pad &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
- const auto output_index{node.getOutputs().at(0)};
- _tensor_builder->dimCorrection(input_index, false);
- _tensor_builder->dimCorrection(output_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::Min &node)
-{
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
-
- // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
- // a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::Max &node)
-{
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
-
- // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
- // a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-} // namespace acl_cl
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/acl_cl/ShapeFixer.h b/runtime/neurun/backend/acl_cl/ShapeFixer.h
deleted file mode 100644
index ec5f5c896..000000000
--- a/runtime/neurun/backend/acl_cl/ShapeFixer.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_CL_SHAPE_FIXER_H__
-#define __NEURUN_BACKEND_ACL_CL_SHAPE_FIXER_H__
-
-#include <backend/IShapeFixer.h>
-
-#include "ir/Operands.h"
-#include "TensorBuilder.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_cl
-{
-
-class ShapeFixer : public IShapeFixer
-{
-public:
- ShapeFixer(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder);
-
- void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::MaxPool2D &) override;
- void visit(const ir::operation::AvgPool2D &) override;
- void visit(const ir::operation::Concat &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Mul &) override;
- void visit(const ir::operation::ReduceSum &) override;
- void visit(const ir::operation::Reshape &) override;
- void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Tanh &) override;
- void visit(const ir::operation::Softmax &) override;
- void visit(const ir::operation::Slice &) override;
- void visit(const ir::operation::StridedSlice &) override;
- void visit(const ir::operation::Transpose &) override;
- void visit(const ir::operation::Add &) override;
- void visit(const ir::operation::Sub &) override;
- void visit(const ir::operation::Cast &) override;
- void visit(const ir::operation::Div &) override;
- void visit(const ir::operation::Exp &) override;
- void visit(const ir::operation::InstanceNorm &) override;
- void visit(const ir::operation::Logistic &) override;
- void visit(const ir::operation::ReduceMax &) override;
- void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::LogicalAnd &) override;
- void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::Pack &) override;
- void visit(const ir::operation::Permute &) override;
- void visit(const ir::operation::RSQRT &) override;
- void visit(const ir::operation::ReLU &) override;
- void visit(const ir::operation::ResizeBilinear &) override;
- void visit(const ir::operation::ReLU1 &) override;
- void visit(const ir::operation::ReLU6 &) override;
- void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::Floor &) override;
- void visit(const ir::operation::SpaceToBatchND &) override;
- void visit(const ir::operation::SpaceToDepth &) override;
- void visit(const ir::operation::L2Pool2D &) override;
- void visit(const ir::operation::EmbeddingLookup &) override;
- void visit(const ir::operation::L2Normalization &) override;
- void visit(const ir::operation::HashtableLookup &) override;
- void visit(const ir::operation::PReLU &) override;
- void visit(const ir::operation::TransposeConv &) override;
- void visit(const ir::operation::SQRT &) override;
- void visit(const ir::operation::LogicalOr &) override;
- void visit(const ir::operation::LogicalNot &) override;
- void visit(const ir::operation::SquaredDifference &) override;
- void visit(const ir::operation::TopKV2 &) override;
- void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::Neg &) override;
- void visit(const ir::operation::Abs &) override;
- void visit(const ir::operation::ArgMax &) override;
- void visit(const ir::operation::Dequantize &) override;
- void visit(const ir::operation::Mean &) override;
- void visit(const ir::operation::LocalResponseNormalization &) override;
- void visit(const ir::operation::DepthToSpace &) override;
- void visit(const ir::operation::ReduceMin &) override;
- void visit(const ir::operation::Split &) override;
- void visit(const ir::operation::Unpack &) override;
- void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::Min &) override;
- void visit(const ir::operation::Max &) override;
-
-private:
- const ir::Operands &_ctx;
- std::shared_ptr<TensorBuilder> _tensor_builder;
-};
-
-} // namespace acl_cl
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_CL_SHAPE_FIXER_H__
diff --git a/runtime/neurun/backend/acl_cl/TensorBuilder.h b/runtime/neurun/backend/acl_cl/TensorBuilder.h
deleted file mode 100644
index b9a0dd4a6..000000000
--- a/runtime/neurun/backend/acl_cl/TensorBuilder.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_CL_TENSOR_BUILDER_H__
-#define __NEURUN_BACKEND_ACL_CL_TENSOR_BUILDER_H__
-
-#include <TemplTensorBuilder.h>
-
-#include "operand/CLTensor.h"
-#include "operand/CLSubTensor.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_cl
-{
-
-using TensorBuilder =
- acl_common::TemplTensorBuilder<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
-
-} // namespace acl_cl
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_CL_TENSOR_BUILDER_H__
diff --git a/runtime/neurun/backend/acl_cl/TensorManager.h b/runtime/neurun/backend/acl_cl/TensorManager.h
deleted file mode 100644
index fd2a9059c..000000000
--- a/runtime/neurun/backend/acl_cl/TensorManager.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_CL_TENSOR_MANAGER_H__
-#define __NEURUN_BACKEND_ACL_CL_TENSOR_MANAGER_H__
-
-#include <arm_compute/runtime/CL/CLBufferAllocator.h>
-#include <arm_compute/runtime/PoolManager.h>
-#include <arm_compute/runtime/BlobLifetimeManager.h>
-#include <arm_compute/runtime/MemoryManagerOnDemand.h>
-#include <arm_compute/runtime/CL/CLMemoryGroup.h>
-
-#include <AclMemoryManager.h>
-#include <AclLinearMemoryManager.h>
-#include <AclInternalBufferManager.h>
-#include <AclTensorManager.h>
-
-#include "operand/CLTensor.h"
-#include "operand/CLSubTensor.h"
-
-#include "util/logging.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_cl
-{
-
-using MemoryManager =
- acl_common::AclMemoryManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
-
-using LinearMemoryManager = acl_common::AclLinearMemoryManager<
- operand::ICLTensor, operand::CLTensor, operand::CLSubTensor,
- ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
- ::arm_compute::BlobLifetimeManager, ::arm_compute::CLBufferAllocator,
- ::arm_compute::CLMemoryGroup>;
-
-using InternalBufferManager = acl_common::AclInternalBufferManager<
- ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
- ::arm_compute::BlobLifetimeManager, ::arm_compute::CLBufferAllocator>;
-
-using TensorManager =
- acl_common::AclTensorManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
-
-TensorManager *createTensorManager()
-{
- const std::string executor_str = util::getConfigString(util::config::EXECUTOR);
-
- if (executor_str == "Linear")
- {
- VERBOSE(acl_cl_createTensorManager) << "AclTensorManager as Linear" << std::endl;
- return new TensorManager(new MemoryManager(), new LinearMemoryManager(),
- new InternalBufferManager());
- }
- else
- {
- VERBOSE(acl_cl_createTensorManager) << "AclTensorManager" << std::endl;
- return new TensorManager(new MemoryManager(), new MemoryManager(), new InternalBufferManager());
- }
-}
-
-} // namespace acl_cl
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_CL_TENSOR_MANAGER_H__
diff --git a/runtime/neurun/backend/acl_cl/TensorRegister.h b/runtime/neurun/backend/acl_cl/TensorRegister.h
deleted file mode 100644
index 02de45580..000000000
--- a/runtime/neurun/backend/acl_cl/TensorRegister.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_CL_TENSOR_REGISTER_H__
-#define __NEURUN_BACKEND_ACL_CL_TENSOR_REGISTER_H__
-
-#include <AclTensorRegister.h>
-#include <misc/polymorphic_downcast.h>
-#include "TensorBuilder.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_cl
-{
-
-class TensorRegister : public acl_common::AclTensorRegister
-{
-public:
- TensorRegister(const ir::Operands &operands, const std::shared_ptr<TensorBuilder> &tensor_builder)
- : acl_common::AclTensorRegister{operands, tensor_builder}
- {
- // DO NOTHING
- }
-
- void setUsesCount(const ir::OperandIndex &ind, size_t num_uses) const override
- {
- nnfw::misc::polymorphic_downcast<TensorBuilder *>(tensor_builder().get())
- ->setUsesCount(ind, num_uses);
- }
-};
-
-} // namespace acl_cl
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_CL_TENSOR_REGISTER_H__
diff --git a/runtime/neurun/backend/acl_cl/operand/CLSubTensor.cc b/runtime/neurun/backend/acl_cl/operand/CLSubTensor.cc
deleted file mode 100644
index 70c8829d9..000000000
--- a/runtime/neurun/backend/acl_cl/operand/CLSubTensor.cc
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CLSubTensor.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_cl
-{
-namespace operand
-{
-
-CLSubTensor::CLSubTensor(ICLTensor *parent, const arm_compute::TensorShape &tensor_shape,
- const arm_compute::Coordinates &coords, size_t rank, bool extend_parent)
- : _cl_sub_tensor(std::make_shared<arm_compute::CLSubTensor>(parent->handle(), tensor_shape,
- coords, extend_parent)),
- _rank{rank}
-{
- // DO NOTHING
-}
-
-const arm_compute::CLSubTensor *CLSubTensor::handle() const { return _cl_sub_tensor.get(); }
-
-arm_compute::CLSubTensor *CLSubTensor::handle() { return _cl_sub_tensor.get(); }
-
-} // namespace operand
-} // namespace acl_cl
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/acl_cl/operand/CLSubTensor.h b/runtime/neurun/backend/acl_cl/operand/CLSubTensor.h
deleted file mode 100644
index 8eba3760f..000000000
--- a/runtime/neurun/backend/acl_cl/operand/CLSubTensor.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_CL_OPERAND_CL_SUB_TENSOR_H__
-#define __NEURUN_BACKEND_ACL_CL_OPERAND_CL_SUB_TENSOR_H__
-
-#include <arm_compute/runtime/CL/CLSubTensor.h>
-#include "ICLTensor.h"
-#include "compiler/SubTensorInfo.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_cl
-{
-namespace operand
-{
-
-class CLSubTensor : public ICLTensor
-{
-public:
- CLSubTensor() = delete;
-
-public:
- CLSubTensor(ICLTensor *parent, const arm_compute::TensorShape &tensor_shape,
- const arm_compute::Coordinates &coords, size_t rank, bool extend_parent = false);
-
-public:
- size_t num_dimensions() const final { return _rank; }
-
-public:
- const arm_compute::CLSubTensor *handle() const override;
- arm_compute::CLSubTensor *handle() override;
-
-public:
- // This method is used to prevent the use of memcpy for SubTensor
- bool has_padding() const override { return true; }
-
-private:
- std::shared_ptr<arm_compute::CLSubTensor> _cl_sub_tensor;
- size_t _rank;
-};
-
-} // namespace operand
-} // namespace acl_cl
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_CL_OPERAND_CL_SUB_TENSOR_H__
diff --git a/runtime/neurun/backend/acl_cl/operand/CLTensor.cc b/runtime/neurun/backend/acl_cl/operand/CLTensor.cc
deleted file mode 100644
index dab74e65f..000000000
--- a/runtime/neurun/backend/acl_cl/operand/CLTensor.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CLTensor.h"
-
-#include <arm_compute/runtime/CL/CLScheduler.h>
-#include <arm_compute/runtime/CL/CLMemory.h>
-#include <arm_compute/runtime/CL/CLMemoryRegion.h>
-
-#include <Convert.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_cl
-{
-namespace operand
-{
-
-CLTensor::CLTensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses)
- : _cl_tensor(std::make_shared<arm_compute::CLTensor>()), _rank{rank}, _num_uses{num_uses}
-{
- allocator()->init(info);
-}
-
-const arm_compute::CLTensor *CLTensor::handle() const { return _cl_tensor.get(); }
-
-arm_compute::CLTensor *CLTensor::handle() { return _cl_tensor.get(); }
-
-arm_compute::CLTensorAllocator *CLTensor::allocator() { return _cl_tensor->allocator(); }
-
-void CLTensor::map(bool blocking) { _cl_tensor->map(blocking); }
-
-void CLTensor::unmap() { _cl_tensor->unmap(); }
-
-void CLTensor::setBuffer(void *host_ptr)
-{
- // Constructs a Buffer on a user-supplied memory
- auto buffer = cl::Buffer(arm_compute::CLScheduler::get().context(),
- CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, info()->total_size(), host_ptr);
- // import memory
- allocator()->import_memory(buffer);
-}
-
-} // namespace operand
-} // namespace acl_cl
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/acl_cl/operand/CLTensor.h b/runtime/neurun/backend/acl_cl/operand/CLTensor.h
deleted file mode 100644
index 8518bf0c3..000000000
--- a/runtime/neurun/backend/acl_cl/operand/CLTensor.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_CL_OPERAND_CL_TENSOR_H__
-#define __NEURUN_BACKEND_ACL_CL_OPERAND_CL_TENSOR_H__
-
-#include <arm_compute/core/TensorInfo.h>
-#include <arm_compute/runtime/CL/CLTensor.h>
-#include <arm_compute/runtime/CL/CLScheduler.h>
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "ICLTensor.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_cl
-{
-namespace operand
-{
-
-class CLTensor : public ICLTensor
-{
-public:
- CLTensor() = delete;
-
-public:
- CLTensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses);
-
-public:
- size_t num_dimensions() const final { return _rank; }
-
-public:
- const arm_compute::CLTensor *handle() const override;
- arm_compute::CLTensor *handle() override;
- size_t num_uses() const { return _num_uses; }
-
-public:
- arm_compute::CLTensorAllocator *allocator();
- void map(bool blocking = true);
- void unmap();
- /** Set given buffer as the buffer of the tensor
- *
- * @note Ownership of the memory is not transferred to this object.
- * Thus management (allocate/free) should be done by the client.
- *
- * @param[in] host_ptr Storage to be used.
- */
- void setBuffer(void *host_ptr);
-
-private:
- std::shared_ptr<arm_compute::CLTensor> _cl_tensor;
- size_t _rank;
- size_t _num_uses;
-};
-
-} // namespace operand
-} // namespace acl_cl
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_CL_OPERAND_CL_TENSOR_H__
diff --git a/runtime/neurun/backend/acl_cl/operand/ICLTensor.cc b/runtime/neurun/backend/acl_cl/operand/ICLTensor.cc
deleted file mode 100644
index 6b14584e0..000000000
--- a/runtime/neurun/backend/acl_cl/operand/ICLTensor.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ICLTensor.h"
-
-#include <arm_compute/runtime/CL/CLScheduler.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_cl
-{
-namespace operand
-{
-
-void ICLTensor::access(const std::function<void(ITensor &tensor)> &fn)
-{
- auto &queue = ::arm_compute::CLScheduler::get().queue();
-
- // This is an optional input
- if (total_size() == 0)
- return;
-
- map(queue);
- fn(*this);
- unmap(queue);
-}
-} // namespace operand
-} // namespace acl_cl
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/acl_cl/operand/ICLTensor.h b/runtime/neurun/backend/acl_cl/operand/ICLTensor.h
deleted file mode 100644
index 68e4e7fc5..000000000
--- a/runtime/neurun/backend/acl_cl/operand/ICLTensor.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_CL_OPERAND_I_CL_TENSOR_H__
-#define __NEURUN_BACKEND_ACL_CL_OPERAND_I_CL_TENSOR_H__
-
-#include <arm_compute/core/CL/ICLTensor.h>
-
-#include <IACLTensor.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_cl
-{
-namespace operand
-{
-
-class ICLTensor : public acl_common::IACLTensor
-{
-public:
- const arm_compute::ICLTensor *handle() const override = 0;
- arm_compute::ICLTensor *handle() override = 0;
-
-public:
- void map(cl::CommandQueue &q, bool blocking = true) { return handle()->map(q, blocking); }
- void unmap(cl::CommandQueue &q) { return handle()->unmap(q); }
- void access(const std::function<void(ITensor &tensor)> &fn) final;
-};
-
-} // namespace operand
-} // namespace acl_cl
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_CL_OPERAND_I_CL_TENSOR_H__
diff --git a/runtime/neurun/backend/acl_common/AclFunction.h b/runtime/neurun/backend/acl_common/AclFunction.h
deleted file mode 100644
index a63f3807b..000000000
--- a/runtime/neurun/backend/acl_common/AclFunction.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_COMMON_KERNEL_ACL_FUNCTION_H__
-#define __NEURUN_BACKEND_ACL_COMMON_KERNEL_ACL_FUNCTION_H__
-
-#include <exec/IFunction.h>
-#include <arm_compute/runtime/IFunction.h>
-#include <arm_compute/runtime/CL/CLScheduler.h>
-#include <memory>
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_common
-{
-
-class AclFunction : public ::neurun::exec::IFunction
-{
-public:
- AclFunction() = delete;
-
-public:
- AclFunction(std::unique_ptr<::arm_compute::IFunction> &&func) : _func(std::move(func))
- {
- // DO NOTHING
- }
-
-public:
- void run() override { _func->run(); }
- void runSync() override
- {
- run();
- arm_compute::CLScheduler::get().sync();
- }
- void prepare() override { _func->prepare(); }
-
-private:
- std::unique_ptr<::arm_compute::IFunction> _func;
-};
-
-} // namespace acl_common
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_COMMON_KERNEL_ACL_FUNCTION_H__
diff --git a/runtime/neurun/backend/acl_common/AclInternalBufferManager.h b/runtime/neurun/backend/acl_common/AclInternalBufferManager.h
deleted file mode 100644
index b7183c86e..000000000
--- a/runtime/neurun/backend/acl_common/AclInternalBufferManager.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_COMMON_INTERNAL_BUFFER_MANAGER_H__
-#define __NEURUN_BACKEND_ACL_COMMON_INTERNAL_BUFFER_MANAGER_H__
-
-#include <arm_compute/runtime/IMemoryManager.h>
-#include <cassert>
-#include <memory>
-#include <backend/IMemoryManager.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_common
-{
-
-// NOTE. If any backend can use something like InternalBufferManager,
-// this interface can be moved to core/include/backend/
-/**
- * @brief Interface for InternalBufferManager which has ::arm_compute::IMemoryManager pointer
- */
-struct IInternalBufferManager : public backend::IMemoryManager
-{
- virtual ~IInternalBufferManager() = default;
-
- /**
- * @brief Get shared_ptr of ::arm_compute::IMemoryManager
- */
- virtual std::shared_ptr<::arm_compute::IMemoryManager> internal_buffer_manager(void) = 0;
-};
-
-/**
- * @brief class for InternalBufferManager which has ::arm_compute::IMemoryManager pointer
- */
-template <typename T_MemoryManager, typename T_PoolManager, typename T_LifetimeManager,
- typename T_Allocator>
-class AclInternalBufferManager : public IInternalBufferManager
-{
-public:
- AclInternalBufferManager() : _allocator{nullptr}
- {
- std::shared_ptr<T_LifetimeManager> lifetime_mgr = std::make_shared<T_LifetimeManager>();
- std::shared_ptr<T_PoolManager> pool_mgr = std::make_shared<T_PoolManager>();
-
- _internal_manager = std::make_shared<T_MemoryManager>(lifetime_mgr, pool_mgr);
- assert(_internal_manager);
- }
-
- virtual ~AclInternalBufferManager() = default;
-
- /**
- * @brief Allocate the internal buffer manager on acl
- */
- void allocate(void) override
- {
- _allocator = std::make_shared<T_Allocator>();
- _internal_manager->populate(*_allocator, 1);
- }
-
- /**
- * @brief Deallocate the internal buffer manager on acl
- */
- void deallocate(void) override { _internal_manager->clear(); }
-
- /**
- * @brief Get shared_ptr of ::arm_compute::IMemoryManager
- */
- std::shared_ptr<::arm_compute::IMemoryManager> internal_buffer_manager(void) override
- {
- return _internal_manager;
- }
-
-private:
- std::shared_ptr<T_Allocator> _allocator;
- std::shared_ptr<T_MemoryManager> _internal_manager;
-};
-
-} // namespace acl_common
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_COMMON_INTERNAL_BUFFER_MANAGER_H__
diff --git a/runtime/neurun/backend/acl_common/AclLinearMemoryManager.h b/runtime/neurun/backend/acl_common/AclLinearMemoryManager.h
deleted file mode 100644
index 7ed719bc3..000000000
--- a/runtime/neurun/backend/acl_common/AclLinearMemoryManager.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_COMMON_LINEAR_MEMORY_MANAGER_H__
-#define __NEURUN_BACKEND_ACL_COMMON_LINEAR_MEMORY_MANAGER_H__
-
-#include <cassert>
-
-#include "AclMemoryManager.h"
-#include "ir/OperandIndexMap.h"
-#include "util/logging.h"
-
-namespace
-{
-
-template <typename T_MemoryManager, typename T_PoolManager, typename T_LifetimeManager>
-std::shared_ptr<T_MemoryManager> createMemoryManager()
-{
- std::shared_ptr<T_LifetimeManager> lifetime_mgr = std::make_shared<T_LifetimeManager>();
- std::shared_ptr<T_PoolManager> pool_mgr = std::make_shared<T_PoolManager>();
-
- std::shared_ptr<T_MemoryManager> mem_mgr =
- std::make_shared<T_MemoryManager>(lifetime_mgr, pool_mgr);
- return mem_mgr;
-}
-
-} // namespace anonymous
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_common
-{
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor, typename T_MemoryManager,
- typename T_PoolManager, typename T_LifetimeManager, typename T_Allocator,
- typename T_MemoryGroup>
-class AclLinearMemoryManager : public AclMemoryManager<T_ITensor, T_Tensor, T_SubTensor>
-{
-public:
- AclLinearMemoryManager()
- : _allocator{nullptr},
- _io_manager{createMemoryManager<T_MemoryManager, T_PoolManager, T_LifetimeManager>()},
- _io_group{std::make_shared<T_MemoryGroup>(_io_manager)}
- {
- // DO NOTHING
- }
-
- virtual ~AclLinearMemoryManager() = default;
-
- void allocate(void) override
- {
- _allocator = std::make_shared<T_Allocator>();
- _io_manager->populate(*_allocator, 1);
- _io_group->acquire();
- }
-
- void deallocate(void) override
- {
- _io_group->release();
- _io_manager->clear();
- }
-
- void startLifetime(const ir::OperandIndex &ind) override
- {
- auto &tensors = this->tensors();
- assert(tensors.find(ind) != tensors.end());
-
- auto tensor = tensors[ind];
- assert(tensor->handle());
-
- _io_group->manage(tensor->handle());
- }
-
- void finishLifetime(const ir::OperandIndex &ind) override
- {
- auto &tensors = this->tensors();
- assert(tensors.find(ind) != tensors.end());
-
- auto tensor = tensors[ind];
- assert(tensor->allocator());
-
- tensor->allocator()->allocate();
- }
-
-private:
- std::shared_ptr<T_Allocator> _allocator;
- std::shared_ptr<T_MemoryManager> _io_manager;
- std::shared_ptr<T_MemoryGroup> _io_group;
-};
-
-} // namespace acl_common
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_COMMON_LINEAR_MEMORY_MANAGER_H__
diff --git a/runtime/neurun/backend/acl_common/AclMemoryManager.h b/runtime/neurun/backend/acl_common/AclMemoryManager.h
deleted file mode 100644
index af7f3a460..000000000
--- a/runtime/neurun/backend/acl_common/AclMemoryManager.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__
-#define __NEURUN_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__
-
-#include <arm_compute/core/Types.h>
-#include <arm_compute/runtime/IMemoryManager.h>
-#include <cassert>
-
-#include "backend/IMemoryManager.h"
-#include "ir/OperandIndexMap.h"
-#include "Convert.h"
-#include "util/logging.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_common
-{
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-class AclMemoryManager : public backend::IMemoryManager
-{
-public:
- AclMemoryManager()
- {
- // DO NOTHING
- }
-
- virtual ~AclMemoryManager() = default;
-
- void allocate(void) override
- {
- for (const auto &tensor_entry : _tensors)
- {
- auto tensor = tensor_entry.second;
- tensor->allocator()->allocate();
- }
- }
-
- void deallocate(void) override
- {
- for (const auto &tensor_entry : _tensors)
- {
- auto tensor = tensor_entry.second;
- tensor->allocator()->free();
- }
- }
-
- virtual void startLifetime(const ir::OperandIndex &) { /* DO NOTHING */}
- virtual void finishLifetime(const ir::OperandIndex &) { /* DO NOTHING */}
-
- void buildTensor(const ir::OperandIndex &ind, const ::arm_compute::TensorInfo &info, size_t rank,
- size_t num_uses)
- {
- auto tensor = std::make_shared<T_Tensor>(info, rank, num_uses);
- _tensors[ind] = tensor;
- }
-
- void buildSubtensor(std::shared_ptr<T_ITensor> parent_tensor, const ir::OperandIndex &child_ind,
- const ::arm_compute::TensorShape &shape,
- const ::arm_compute::Coordinates &coordinates, size_t rank,
- bool extent_parent)
- {
- auto subtensor =
- std::make_shared<T_SubTensor>(parent_tensor.get(), shape, coordinates, rank, extent_parent);
- _subtensors[child_ind] = subtensor;
- }
-
- ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &tensors(void) { return _tensors; }
-
- ir::OperandIndexMap<std::shared_ptr<T_SubTensor>> &subtensors(void) { return _subtensors; }
-
-private:
- ir::OperandIndexMap<std::shared_ptr<T_Tensor>> _tensors;
- ir::OperandIndexMap<std::shared_ptr<T_SubTensor>> _subtensors;
-};
-
-} // namespace acl_common
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__
diff --git a/runtime/neurun/backend/acl_common/AclTensorManager.h b/runtime/neurun/backend/acl_common/AclTensorManager.h
deleted file mode 100644
index 48a4c2599..000000000
--- a/runtime/neurun/backend/acl_common/AclTensorManager.h
+++ /dev/null
@@ -1,300 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__
-#define __NEURUN_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__
-
-#include <arm_compute/runtime/IMemoryManager.h>
-
-#include "backend/ITensorManager.h"
-#include "AclMemoryManager.h"
-#include "AclInternalBufferManager.h"
-#include "ir/OperandIndexMap.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_common
-{
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-class AclTensorManager : public backend::ITensorManager
-{
-public:
- using T_AclMemoryManager = AclMemoryManager<T_ITensor, T_Tensor, T_SubTensor>;
-
- AclTensorManager(T_AclMemoryManager *const_mgr, T_AclMemoryManager *nonconst_mgr,
- IInternalBufferManager *inter_mgr);
-
- virtual ~AclTensorManager() = default;
-
- void allocateConsts(void) override;
- void allocateNonconsts(void) override;
- void deallocateConsts(void) override;
- void deallocateNonconsts(void) override;
-
- void allocateInternalBufferManager(void);
- void deallocateInternalBufferManager(void);
-
- void buildTensor(const ir::OperandIndex &ind, const ::arm_compute::TensorInfo &info, size_t rank,
- bool as_const, size_t num_uses);
- void buildSubtensor(const ir::OperandIndex &parent, const ir::OperandIndex &child,
- const ::arm_compute::TensorShape &shape,
- const ::arm_compute::Coordinates &coordinates, size_t rank,
- bool extent_parent);
-
- std::shared_ptr<T_ITensor> findTensorAsParent(const ir::OperandIndex &ind);
-
- void startLifetime(const ir::OperandIndex &ind);
- void finishLifetime(const ir::OperandIndex &ind);
-
- std::shared_ptr<T_ITensor> at(const ir::OperandIndex &ind);
-
- ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &constTensors(void);
- ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &nonconstTensors(void);
- ir::OperandIndexMap<std::shared_ptr<T_SubTensor>> &nonconstSubtensors(void);
-
- std::shared_ptr<::arm_compute::IMemoryManager> internal_buffer_manager(void);
-
- void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
-
- void tryDeallocConstants(void);
-
-private:
- std::unique_ptr<T_AclMemoryManager> _const_mgr;
- std::unique_ptr<T_AclMemoryManager> _nonconst_mgr;
- std::unique_ptr<IInternalBufferManager> _inter_mgr;
- ir::OperandIndexMap<T_AclMemoryManager &> _ind_to_mgr;
-};
-
-} // namespace acl_common
-} // namespace backend
-} // namespace neurun
-
-#include <cassert>
-#include "util/logging.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_common
-{
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::AclTensorManager(
- T_AclMemoryManager *const_mgr, T_AclMemoryManager *nonconst_mgr,
- IInternalBufferManager *inter_mgr)
- : _const_mgr{const_mgr}, _nonconst_mgr{nonconst_mgr}, _inter_mgr{inter_mgr}
-{
- // DO NOTHING
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::allocateConsts(void)
-{
- _const_mgr->allocate();
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::allocateNonconsts(void)
-{
- _nonconst_mgr->allocate();
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::deallocateConsts(void)
-{
- _const_mgr->deallocate();
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::deallocateNonconsts(void)
-{
- _nonconst_mgr->deallocate();
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::allocateInternalBufferManager(void)
-{
- _inter_mgr->allocate();
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::deallocateInternalBufferManager(void)
-{
- _inter_mgr->deallocate();
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::buildTensor(
- const ir::OperandIndex &ind, const ::arm_compute::TensorInfo &info, size_t rank, bool as_const,
- size_t num_uses)
-{
- assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end());
- if (as_const)
- {
- _const_mgr->buildTensor(ind, info, rank, num_uses);
- _ind_to_mgr.insert({ind, *_const_mgr});
- }
- else
- {
- _nonconst_mgr->buildTensor(ind, info, rank, num_uses);
- _ind_to_mgr.insert({ind, *_nonconst_mgr});
- }
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::buildSubtensor(
- const ir::OperandIndex &parent, const ir::OperandIndex &child,
- const ::arm_compute::TensorShape &shape, const ::arm_compute::Coordinates &coordinates,
- size_t rank, bool extent_parent)
-{
- assert(_ind_to_mgr.find(child) == _ind_to_mgr.end());
- std::shared_ptr<T_ITensor> parent_tensor = findTensorAsParent(parent);
- assert(parent_tensor);
- _nonconst_mgr->buildSubtensor(parent_tensor, child, shape, coordinates, rank, extent_parent);
- _ind_to_mgr.insert({child, *_nonconst_mgr});
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::shared_ptr<T_ITensor>
-AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::findTensorAsParent(const ir::OperandIndex &ind)
-{
-
- auto &tensors = _nonconst_mgr->tensors();
- auto &subtensors = _nonconst_mgr->subtensors();
- if (tensors.find(ind) != tensors.end())
- {
- // Parent is allocated as tensor
- return tensors[ind];
- }
- else if (subtensors.find(ind) != subtensors.end())
- {
- // Parent is allocated as subtensor
- return subtensors[ind];
- }
- else
- {
- return nullptr;
- }
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::startLifetime(const ir::OperandIndex &ind)
-{
- assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
- _ind_to_mgr.at(ind).startLifetime(ind);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::finishLifetime(const ir::OperandIndex &ind)
-{
- assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
- _ind_to_mgr.at(ind).finishLifetime(ind);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::shared_ptr<T_ITensor>
-AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::at(const ir::OperandIndex &ind)
-{
- assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
-
- auto &tensors = _ind_to_mgr.at(ind).tensors();
- if (tensors.find(ind) != tensors.end())
- {
- return tensors.at(ind);
- }
- else
- {
- return _ind_to_mgr.at(ind).subtensors().at(ind);
- }
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &
-AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::constTensors(void)
-{
- return _const_mgr->tensors();
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &
-AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::nonconstTensors(void)
-{
- return _nonconst_mgr->tensors();
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-ir::OperandIndexMap<std::shared_ptr<T_SubTensor>> &
-AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::nonconstSubtensors(void)
-{
- return _nonconst_mgr->subtensors();
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::shared_ptr<::arm_compute::IMemoryManager>
-AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::internal_buffer_manager(void)
-{
- return _inter_mgr->internal_buffer_manager();
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::iterate(
- const std::function<void(const ir::OperandIndex &)> &fn)
-{
- for (auto it : _nonconst_mgr->tensors())
- fn(it.first);
-
- for (auto it : _nonconst_mgr->subtensors())
- fn(it.first);
-
- for (auto it : _const_mgr->tensors())
- fn(it.first);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::tryDeallocConstants(void)
-{
- auto &tensors = _const_mgr->tensors();
-
- for (auto it = tensors.begin(); it != tensors.end();)
- {
- const auto &ind = it->first;
- auto tensor = it->second;
- // NOTE The condition "tensor->num_uses() < 2" is used to prevent deallocating a constant tensor
- // used in several nodes.
- if (tensor->handle() && !tensor->handle()->is_used() && tensor->num_uses() < 2)
- {
- VERBOSE(AclTensorManager) << "Tensor #" << ind.value()
- << " will be deallocated as an unused constant tensor" << std::endl;
- tensor->allocator()->free();
- tensor.reset();
- it = tensors.erase(it);
- }
- else
- {
- ++it;
- }
- }
-}
-
-} // namespace acl_common
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__
diff --git a/runtime/neurun/backend/acl_common/AclTensorRegister.cc b/runtime/neurun/backend/acl_common/AclTensorRegister.cc
deleted file mode 100644
index 07cbece1e..000000000
--- a/runtime/neurun/backend/acl_common/AclTensorRegister.cc
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AclTensorRegister.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_common
-{
-
-AclTensorRegister::AclTensorRegister(const ir::Operands &operands,
- const std::shared_ptr<ITensorBuilder> &tensor_builder)
- : _operands{operands}, _tensor_builder{tensor_builder}
-{
- assert(tensor_builder != nullptr);
-}
-
-void AclTensorRegister::visit(const ir::OpSequence &op_seq)
-{
- for (const auto &e : op_seq.operations())
- {
- const auto &node = *(e.node);
- node.accept(*this);
- // Set count of nodes to use operand
- for (const auto &input : node.getInputs())
- {
- setUsesCount(input, _operands.at(input).getUses().size());
- }
- }
-}
-
-} // namespace acl_common
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/acl_common/AclTensorRegister.h b/runtime/neurun/backend/acl_common/AclTensorRegister.h
deleted file mode 100644
index 905e19adb..000000000
--- a/runtime/neurun/backend/acl_common/AclTensorRegister.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_COMMON_TENSOR_REGISTER_H__
-#define __NEURUN_BACKEND_ACL_COMMON_TENSOR_REGISTER_H__
-
-#include <backend/ITensorRegister.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_common
-{
-
-class AclTensorRegister : public ITensorRegister
-{
-protected:
- AclTensorRegister(const ir::Operands &operands,
- const std::shared_ptr<ITensorBuilder> &tensor_builder);
-
-public:
- virtual ~AclTensorRegister() = default;
-
-protected:
- void visit(const ir::OpSequence &op_seq);
- virtual void setUsesCount(const ir::OperandIndex &ind, size_t num_uses) const = 0;
-
-protected:
- const ir::Operands &operands() const override { return _operands; }
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
- bool supportSubTensor() const final { return true; }
-
-private:
- const ir::Operands &_operands;
- const std::shared_ptr<ITensorBuilder> _tensor_builder;
-};
-
-} // namespace acl_common
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_COMMON_TENSOR_REGISTER_H__
diff --git a/runtime/neurun/backend/acl_common/CMakeLists.txt b/runtime/neurun/backend/acl_common/CMakeLists.txt
deleted file mode 100644
index 32d01291f..000000000
--- a/runtime/neurun/backend/acl_common/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-# Unsupported architecture
-nnas_find_package(ARMCompute QUIET)
-if(NOT ARMCompute_FOUND)
- return()
-endif(NOT ARMCompute_FOUND)
-
-file(GLOB SOURCES "*.cc")
-
-add_library(${LIB_NEURUN_BACKEND_ACL_COMMON} STATIC ${SOURCES})
-
-target_include_directories(${LIB_NEURUN_BACKEND_ACL_COMMON} PUBLIC ${NEURUN_INCLUDE_DIR})
-target_include_directories(${LIB_NEURUN_BACKEND_ACL_COMMON} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
-target_link_libraries(${LIB_NEURUN_BACKEND_ACL_COMMON} PUBLIC arm_compute arm_compute_ex)
-target_link_libraries(${LIB_NEURUN_BACKEND_ACL_COMMON} PUBLIC nnfw_lib_misc nnfw_lib_cpp14)
-target_link_libraries(${LIB_NEURUN_BACKEND_ACL_COMMON} PRIVATE nnfw_common)
-target_link_libraries(${LIB_NEURUN_BACKEND_ACL_COMMON} PRIVATE nnfw_coverage)
-
-set_target_properties(${LIB_NEURUN_BACKEND_ACL_COMMON} PROPERTIES POSITION_INDEPENDENT_CODE ON)
-set_target_properties(${LIB_NEURUN_BACKEND_ACL_COMMON} PROPERTIES OUTPUT_NAME backend_acl_common)
diff --git a/runtime/neurun/backend/acl_common/Convert.cc b/runtime/neurun/backend/acl_common/Convert.cc
deleted file mode 100644
index fc5be72a2..000000000
--- a/runtime/neurun/backend/acl_common/Convert.cc
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Convert.h"
-
-#include "Swizzle.h"
-#include "ir/DataType.h"
-#include <cpp14/memory.h>
-
-namespace
-{
-
-::arm_compute::DataLayout asDataLayout(neurun::ir::Layout layout)
-{
- switch (layout)
- {
- case neurun::ir::Layout::NHWC:
- return ::arm_compute::DataLayout::NHWC;
- case neurun::ir::Layout::NCHW:
- return ::arm_compute::DataLayout::NCHW;
- default:
- return ::arm_compute::DataLayout::UNKNOWN;
- }
-}
-
-} // namespace
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_common
-{
-
-::arm_compute::TensorShape asTensorShape(const ir::Shape &shape, ir::Layout frontend_layout,
- ir::Layout backend_layout, bool apply_dim_correction)
-{
- const uint32_t rank = shape.rank();
-
- ::arm_compute::TensorShape res{};
-
- res.set_num_dimensions(rank);
-
- for (uint32_t axis = 0; axis < rank; ++axis)
- {
- // NOTE In some cases, in incorrect dimensions is required.
- // For example, intput_size is 1 in LSTM. The input-to-input weights([num_units, input_size]) of
- // LSTM is used as the weight of the FullyConnected.
- // The FullyConnected's weight must be greater or equal than 2-dimensions.
- // However, if the dimension correction is applied to input_to_input_weights with input_size
- // equal to 1, it will be changed to 1-D.
- // So input_to_input_weights is not used by the weight of FullyConnected.
- res.set(ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value(), shape.dim(axis),
- apply_dim_correction);
- }
-
- return res;
-}
-
-::arm_compute::Coordinates asTensorCoordinate(const ::neurun::util::Coordinates &coord,
- ir::Layout frontend_layout, ir::Layout backend_layout)
-{
- const uint32_t rank = coord.size();
-
- ::arm_compute::Coordinates res{};
-
- res.set_num_dimensions(rank);
-
- for (uint32_t axis = 0; axis < rank; ++axis)
- {
- res.set(ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value(), coord[axis]);
- }
-
- return res;
-}
-
-::arm_compute::DataType asDataType(const ir::DataType type)
-{
- switch (type)
- {
- case ir::DataType::FLOAT32:
- return ::arm_compute::DataType::F32;
- case ir::DataType::INT32:
- return ::arm_compute::DataType::S32;
- case ir::DataType::UINT32:
- return ::arm_compute::DataType::U32;
- case ir::DataType::QUANT8_ASYMM:
- return ::arm_compute::DataType::QASYMM8;
- case ir::DataType::BOOL8:
- case ir::DataType::UINT8:
- return ::arm_compute::DataType::U8;
- case ir::DataType::QUANT8_SYMM:
- return ::arm_compute::DataType::S8;
- default:
- throw std::runtime_error("Not supported, yet");
- break;
- }
-}
-
-::arm_compute::QuantizationInfo asQuantizationInfo(const float scale, const int32_t offset)
-{
- return ::arm_compute::QuantizationInfo(scale, offset);
-}
-
-::arm_compute::TensorInfo asTensorInfo(const ir::Shape &shape, const ir::TypeInfo &typeInfo,
- ir::Layout frontend_layout, ir::Layout backend_layout,
- bool apply_dim_correction)
-{
- ::arm_compute::TensorInfo info(
- asTensorShape(shape, frontend_layout, backend_layout, apply_dim_correction), 1,
- asDataType(typeInfo.type()), asQuantizationInfo(typeInfo.scale(), typeInfo.offset()));
- info.set_data_layout(asDataLayout(backend_layout));
- return info;
-}
-
-::arm_compute::PadStrideInfo asPadStrideInfo(const ir::ExplicitPadding &padding,
- const ir::Stride &stride)
-{
- return ::arm_compute::PadStrideInfo{stride.horizontal,
- stride.vertical,
- padding.left,
- padding.right,
- padding.top,
- padding.bottom,
- ::arm_compute::DimensionRoundingType::FLOOR};
-}
-
-::arm_compute::ActivationLayerInfo asActivationLayerInfo(const ir::Activation act_code)
-{
- switch (act_code)
- {
- case ir::Activation::NONE:
- return ::arm_compute::ActivationLayerInfo{};
- case ir::Activation::RELU:
- return ::arm_compute::ActivationLayerInfo{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
- case ir::Activation::RELU1:
- return ::arm_compute::ActivationLayerInfo{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
- case ir::Activation::RELU6:
- return ::arm_compute::ActivationLayerInfo{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
- // Cases for activation of LSTM.
- case ir::Activation::TANH:
- return ::arm_compute::ActivationLayerInfo{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
- case ir::Activation::SIGMOID:
- // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
- // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
- // 0(always sigmoid) regardless of values of the parameter.
- // If ACL support non-sigmoid logistic, should fix param values.
- return ::arm_compute::ActivationLayerInfo{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f};
- default:
- throw std::runtime_error{"Not supported, yet"};
- break;
- }
-}
-
-std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunction> &&layer)
-{
- return nnfw::cpp14::make_unique<AclFunction>(std::move(layer));
-}
-
-ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout)
-{
- switch (data_layout)
- {
- case ::arm_compute::DataLayout::NHWC:
- return ir::Layout::NHWC;
- case ::arm_compute::DataLayout::NCHW:
- return ir::Layout::NCHW;
- default:
- return ir::Layout::UNKNOWN;
- }
-}
-
-} // namespace acl_common
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/acl_common/Convert.h b/runtime/neurun/backend/acl_common/Convert.h
deleted file mode 100644
index 4c638157b..000000000
--- a/runtime/neurun/backend/acl_common/Convert.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_COMMON_CONVERT_H__
-#define __NEURUN_BACKEND_ACL_COMMON_CONVERT_H__
-
-#include <arm_compute/core/TensorInfo.h>
-#include <arm_compute/core/SubTensorInfo.h>
-#include <arm_compute/core/TensorShape.h>
-
-#include "ir/Layout.h"
-#include "ir/InternalType.h"
-#include "ir/Operand.h"
-#include "ir/Shape.h"
-#include "ir/TypeInfo.h"
-#include "misc/feature/Shape.h"
-#include "misc/kernel/Shape.h"
-
-#include "util/Padding.h"
-#include "util/Coordinates.h"
-
-#include "AclFunction.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_common
-{
-
-::arm_compute::TensorShape asTensorShape(const ir::Shape &shape, ir::Layout frontend_layout,
- ir::Layout backend_layout,
- bool apply_dim_correction = true);
-::arm_compute::Coordinates asTensorCoordinate(const ::neurun::util::Coordinates &coord,
- ir::Layout frontend_layout,
- ir::Layout backend_layout);
-::arm_compute::DataType asDataType(ir::DataType type);
-::arm_compute::TensorInfo asTensorInfo(const ir::Shape &shape, const ir::TypeInfo &typeInfo,
- ir::Layout frontend_layout, ir::Layout backend_layout,
- bool apply_dim_correction = true);
-
-::arm_compute::PadStrideInfo asPadStrideInfo(const ir::ExplicitPadding &padding,
- const ir::Stride &stride);
-
-::arm_compute::ActivationLayerInfo asActivationLayerInfo(ir::Activation act_code);
-
-std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunction> &&layer);
-
-ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout);
-
-} // namespace acl_common
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_COMMON_CONVERT_H__
diff --git a/runtime/neurun/backend/acl_common/IACLTensor.cc b/runtime/neurun/backend/acl_common/IACLTensor.cc
deleted file mode 100644
index 70ffbdcf0..000000000
--- a/runtime/neurun/backend/acl_common/IACLTensor.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "IACLTensor.h"
-#include "Convert.h"
-#include "Swizzle.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_common
-{
-
-size_t IACLTensor::num_dimensions() const
-{
- throw std::runtime_error("No definition of num_dimensions()");
- return 0;
-}
-
-size_t IACLTensor::dimension(size_t index) const
-{
- // Assume that the front is higher dimensional.
- // i.g. N: 0, C: 1, H: 2, W: 3 for NCHW layout
- // NOTE This tensor must not be applied dim correction
- assert(num_dimensions() > index);
- const ARMComputeAxis reversed{(static_cast<uint32_t>(num_dimensions() - index) - 1)};
- return info()->dimension(reversed.value());
-}
-
-size_t IACLTensor::calcOffset(const neurun::util::Coordinates &coords) const
-{
- const auto rank = num_dimensions();
- assert(rank == coords.size());
-
- ::arm_compute::Coordinates acl_coords;
- for (uint32_t i = 0; i < rank; ++i)
- {
- const ARMComputeAxis reversed{static_cast<uint32_t>((rank - i) - 1)};
- acl_coords.set(reversed.value(), coords[i]);
- }
-
- return info()->offset_element_in_bytes(acl_coords);
-}
-
-ir::Layout IACLTensor::layout() const { return acl_common::asRuntimeLayout(info()->data_layout()); }
-
-} // namespace acl_common
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/acl_common/IACLTensor.h b/runtime/neurun/backend/acl_common/IACLTensor.h
deleted file mode 100644
index b0dcbb409..000000000
--- a/runtime/neurun/backend/acl_common/IACLTensor.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_COMMON_I_ACL_TENSOR_H__
-#define __NEURUN_BACKEND_ACL_COMMON_I_ACL_TENSOR_H__
-
-#include <backend/operand/ITensor.h>
-#include <arm_compute/core/ITensor.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_common
-{
-
-class IACLTensor : public operand::ITensor
-{
-public:
- IACLTensor() = default;
- IACLTensor(const IACLTensor &) = delete;
- IACLTensor &operator=(const IACLTensor &) = delete;
- IACLTensor(IACLTensor &&) = default;
- IACLTensor &operator=(IACLTensor &&) = default;
-
-public:
- uint8_t *buffer() const final { return handle()->buffer(); }
- size_t total_size() const final { return info()->total_size(); }
- size_t dimension(size_t index) const final;
- size_t num_dimensions() const override;
- size_t calcOffset(const neurun::util::Coordinates &coords) const final;
- ir::Layout layout() const final;
- bool has_padding() const override { return info()->has_padding(); }
-
-public:
- virtual const arm_compute::ITensor *handle() const = 0;
- virtual arm_compute::ITensor *handle() = 0;
-
- const arm_compute::ITensorInfo *info() const { return handle()->info(); }
- arm_compute::ITensorInfo *info() { return handle()->info(); }
-
- arm_compute::DataType data_type() const { return info()->data_type(); }
-};
-
-} // namespace acl_common
-} // namespace backend
-} // namespace neurun
-
-#endif //__NEURUN_BACKEND_ACL_COMMON_I_ACL_TENSOR_H__
diff --git a/runtime/neurun/backend/acl_common/Swizzle.h b/runtime/neurun/backend/acl_common/Swizzle.h
deleted file mode 100644
index 11874b592..000000000
--- a/runtime/neurun/backend/acl_common/Swizzle.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_COMMON_SWIZZLE_H__
-#define __NEURUN_BACKEND_ACL_COMMON_SWIZZLE_H__
-
-#include <cassert>
-#include <ir/Layout.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_common
-{
-
-class ARMComputeAxis
-{
-public:
- ARMComputeAxis() = default;
-
-public:
- explicit ARMComputeAxis(uint32_t value) : _value{value}
- {
- // DO NOTHING
- }
-
-public:
- uint32_t value(void) const { return _value; }
-
-private:
- uint32_t _value;
-};
-
-// Convert axis in acl order
-inline ARMComputeAxis ToARMComputeAxis(uint32_t rank, uint32_t axis,
- const ir::Layout org_layout = ir::Layout::UNKNOWN,
- const ir::Layout acl_layout = ir::Layout::UNKNOWN)
-{
- assert(rank > axis);
-
- const ARMComputeAxis reversed{(rank - axis) - 1};
-
- if (rank >= 4 && org_layout == ir::Layout::NHWC && acl_layout == ir::Layout::NCHW)
- {
- // NHWC -> WHCN
- // DEPTH
- if (0 == reversed.value())
- {
- return ARMComputeAxis{2};
- }
- // WIDTH
- if (1 == reversed.value())
- {
- return ARMComputeAxis{0};
- }
- // HEIGHT
- if (2 == reversed.value())
- {
- return ARMComputeAxis{1};
- }
- }
- if (rank >= 4 && org_layout == ir::Layout::NCHW && acl_layout == ir::Layout::NHWC)
- {
- // NCHW -> CWHN
- // WIDTH
- if (0 == reversed.value())
- {
- return ARMComputeAxis{1};
- }
- // HEIGHT
- if (1 == reversed.value())
- {
- return ARMComputeAxis{2};
- }
- // DEPTH
- if (2 == reversed.value())
- {
- return ARMComputeAxis{0};
- }
- }
-
- return reversed;
-}
-
-inline ::arm_compute::Coordinates
-getARMComputeAxises(uint32_t rank, const ir::Layout org_layout = ir::Layout::UNKNOWN,
- const ir::Layout acl_layout = ir::Layout::UNKNOWN)
-{
- ::arm_compute::Coordinates res{};
-
- res.set_num_dimensions(rank);
-
- for (uint32_t axis = 0; axis < rank; ++axis)
- {
- res.set(axis, ToARMComputeAxis(rank, axis, org_layout, acl_layout).value());
- }
-
- return res;
-}
-
-// Restructure runtime_permutationVector to ACL_permutationVector
-inline ::arm_compute::PermutationVector
-getARMComputePermutationVector(uint32_t rank, const std::vector<int32_t> runtime_pv,
- const ir::Layout org_layout = ir::Layout::UNKNOWN,
- const ir::Layout acl_layout = ir::Layout::UNKNOWN)
-{
- // rank upto 4 is supported
- assert(rank <= 4);
- assert(runtime_pv.size() > 0);
-
- int new_pv[4] = {0};
- ::arm_compute::Coordinates axises = getARMComputeAxises(rank, org_layout, acl_layout);
-
- for (uint32_t i = 0; i < rank; ++i)
- {
- new_pv[axises[i]] = ToARMComputeAxis(rank, runtime_pv[i], org_layout, acl_layout).value();
- }
-
- ::arm_compute::PermutationVector ACL_PV =
- ::arm_compute::PermutationVector{new_pv[0], new_pv[1], new_pv[2], new_pv[3]};
- ACL_PV.set_num_dimensions(rank);
-
- return ACL_PV;
-}
-
-template <typename T>
-inline T ReorderBits(T in, size_t numOfBits, const ir::Layout org_layout = ir::Layout::UNKNOWN,
- const ir::Layout acl_layout = ir::Layout::UNKNOWN)
-{
- assert(numOfBits > 0);
- T out = 0;
- for (int32_t i = numOfBits - 1; i >= 0; --i)
- {
- const uint32_t toShift =
- numOfBits - ToARMComputeAxis(numOfBits, i, org_layout, acl_layout).value() - 1;
- out += ((in & 1) << toShift);
- in >>= 1;
- }
- return out;
-}
-
-} // namespace acl_common
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_COMMON_SWIZZLE_H__
diff --git a/runtime/neurun/backend/acl_common/TemplTensorBuilder.h b/runtime/neurun/backend/acl_common/TemplTensorBuilder.h
deleted file mode 100644
index bb43823ed..000000000
--- a/runtime/neurun/backend/acl_common/TemplTensorBuilder.h
+++ /dev/null
@@ -1,612 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__
-#define __NEURUN_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__
-
-#include <memory>
-#include <queue>
-
-#include <arm_compute/core/Types.h>
-#include <backend/ITensorBuilder.h>
-#include "ir/OperandIndexMap.h"
-#include "AclTensorManager.h"
-#include "cpp14/memory.h"
-#include <util/Utils.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_common
-{
-
-enum class UsesType
-{
- FIRST,
- LAST
-};
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-class TemplTensorBuilder : public ITensorBuilder
-{
-public:
- using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>;
-
- TemplTensorBuilder(T_AclTensorManager *tensor_mgr);
-
- /**
- * @brief Register tensor information to allocate on ACL-CL backend
- * @param[in] ind Operand index
- * @param[in] info Tensor information
- * @param[in] layout Tensor data layout
- */
- void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout, bool as_const) override;
- /**
- * @brief Register subtensor information to allocate on ACL-CL backend
- * @param[in] ind Operand index
- * @param[in] info Tensor information
- */
- void registerSubTensorInfo(const ir::OperandIndex &ind,
- const compiler::SubTensorInfo &info) override;
-
- void notifyFirstUse(const ir::OperandIndex &) override;
- void notifyLastUse(const ir::OperandIndex &) override;
-
- bool isRegistered(const ir::OperandIndex &) const override;
-
- void prepare(void) override;
- void allocateConsts() override;
- void allocateNonconsts() override;
- void postFunctionPrepare() override;
- void finalize() override;
-
- std::shared_ptr<::neurun::backend::operand::ITensor>
- tensorAt(const ir::OperandIndex &ind) override;
- void iterate(const IterateFunction &fn) override;
-
- void preVisit(const ir::Operation &node) override;
- void postVisit(const ir::Operation &node) override;
-
- std::unique_ptr<ITensorManager> releaseTensorManager(void) override;
-
- std::shared_ptr<T_ITensor> at(const ir::OperandIndex &ind);
- /**
- * @brief Check child tensor is allocated as subtensor of parent tensor
- * @param[in] parent Index of parent
- * @param[in] child Index of child
- * @return @c true if child is allocated as subtensor of parent, otherwise @c false
- */
- bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child);
-
- void dimCorrection(const ir::OperandIndex &index, bool apply_dim_correction);
-
- T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); }
-
- void setUsesCount(const ir::OperandIndex &index, size_t num_uses)
- {
- assert(_uses_count_map.find(index) != _uses_count_map.end() ? _uses_count_map[index] == num_uses
- : true);
- _uses_count_map[index] = num_uses;
- }
-
-private:
- void buildTensors(void);
- void buildSubtensors(void);
- void validate(void);
- ir::OperandIndex findRootParent(ir::OperandIndex index);
-
-private:
- ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
- ir::OperandIndexMap<compiler::SubTensorInfo> _subtensor_info_map;
- ir::OperandIndexMap<bool> _apply_dim_correction_map;
- ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
- ir::OperandIndexMap<size_t> _uses_count_map;
-
- std::unique_ptr<T_AclTensorManager> _tensor_mgr;
- ir::OperandIndexSequence _constants;
-
- // TODO Consider dividing TensorBuilder into Linear and others
- const std::string _executor_str;
-
- // for linear executor
- std::queue<std::pair<UsesType, ir::OperandIndex>> _uses_queue;
- uint32_t _first_uses_num;
- ir::OperandIndexMap<bool> _first_uses_visit;
-
- // for subtensors
- ir::OperandIndexMap<uint32_t> _parent_def;
- ir::OperandIndexMap<uint32_t> _parent_uses;
-};
-
-} // namespace acl_common
-} // namespace backend
-} // namespace neurun
-
-#include <cassert>
-#include <stack>
-
-#include "Convert.h"
-
-#include "util/logging.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_common
-{
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::TemplTensorBuilder(
- T_AclTensorManager *tensor_mgr)
- : _tensor_mgr{tensor_mgr}, _executor_str(util::getConfigString(util::config::EXECUTOR)),
- _first_uses_num(0)
-{
- assert(_tensor_mgr);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::registerTensorInfo(
- const ir::OperandIndex &ind, const ir::OperandInfo &info, ir::Layout backend_layout,
- bool as_const)
-{
- assert(_tensor_mgr->constTensors().size() == 0);
- assert(_tensor_mgr->nonconstTensors().size() == 0);
-
- _tensor_info_map.emplace(ind, info);
- _apply_dim_correction_map.emplace(ind, true);
- _tensor_layout_map.insert({ind, backend_layout});
- if (as_const)
- _constants.append(ind);
-
- assert(_first_uses_visit.find(ind) == _first_uses_visit.end());
- _first_uses_visit[ind] = false;
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::registerSubTensorInfo(
- const ir::OperandIndex &ind, const compiler::SubTensorInfo &info)
-{
- assert(_tensor_mgr->constTensors().size() == 0);
- assert(_tensor_mgr->nonconstTensors().size() == 0);
-
- _subtensor_info_map.emplace(ind, info);
- _apply_dim_correction_map.emplace(ind, true);
-
- assert(_first_uses_visit.find(ind) == _first_uses_visit.end());
- _first_uses_visit[ind] = false;
-
- const auto &parent_ind = info.parent();
-
- // parent_def
- _parent_def[parent_ind] = 1;
-
- // parent_use
- if (_parent_uses.find(parent_ind) == _parent_uses.end())
- _parent_uses[parent_ind] = 1; // 1 means including parent it-self
- _parent_uses[parent_ind]++;
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::notifyFirstUse(
- const ir::OperandIndex &ind)
-{
- _first_uses_num++;
- _uses_queue.emplace(UsesType::FIRST, ind);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::notifyLastUse(
- const ir::OperandIndex &ind)
-{
- _uses_queue.emplace(UsesType::LAST, ind);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-bool TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::isRegistered(
- const ir::OperandIndex &ind) const
-{
- return _tensor_info_map.find(ind) != _tensor_info_map.end() ||
- _subtensor_info_map.find(ind) != _subtensor_info_map.end();
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::prepare(void)
-{
- buildTensors();
- buildSubtensors();
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::allocateConsts(void)
-{
- assert(_constants.size() == _tensor_mgr->constTensors().size());
- _tensor_mgr->allocateConsts();
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::allocateNonconsts(void)
-{
- assert(_tensor_info_map.size() == _tensor_mgr->nonconstTensors().size() + _constants.size());
- _tensor_mgr->allocateNonconsts();
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::postFunctionPrepare(void)
-{
- _tensor_mgr->tryDeallocConstants();
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::finalize(void)
-{
- validate();
- _tensor_mgr->allocateInternalBufferManager();
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::shared_ptr<::neurun::backend::operand::ITensor>
-TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::tensorAt(const ir::OperandIndex &ind)
-{
- return _tensor_mgr->at(ind);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::iterate(const IterateFunction &fn)
-{
- _tensor_mgr->iterate(fn);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::shared_ptr<T_ITensor>
-TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::at(const ir::OperandIndex &ind)
-{
- return _tensor_mgr->at(ind);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-bool TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::isSubTensorOf(
- const ir::OperandIndex &parent, const ir::OperandIndex &child)
-{
- if (_subtensor_info_map.find(child) == _subtensor_info_map.end())
- {
- return false;
- }
-
- auto &subtensors = _tensor_mgr->nonconstSubtensors();
- if (subtensors.find(child) == subtensors.end())
- {
- return false;
- }
-
- if (_subtensor_info_map.at(child).parent() != parent)
- {
- return false;
- }
-
- return true;
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::dimCorrection(
- const ir::OperandIndex &index, bool apply_dim_correction)
-{
- _apply_dim_correction_map[index] = apply_dim_correction;
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::unique_ptr<ITensorManager>
-TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::releaseTensorManager(void)
-{
- return std::move(_tensor_mgr);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildTensors(void)
-{
- assert(_tensor_mgr->constTensors().size() == 0);
- assert(_tensor_mgr->nonconstTensors().size() == 0);
-
- for (auto &entry : _tensor_info_map)
- {
- auto ind = entry.first;
- const auto &info = entry.second;
- // NOTE SubTensor's layout must be the same with layout of parent tensor
- const auto &root_parent = findRootParent(ind);
- const auto &backend_layout = _tensor_layout_map[root_parent];
- auto tensor_info = asTensorInfo(info.shape(), info.typeInfo(), ir::Layout::UNKNOWN,
- backend_layout, _apply_dim_correction_map[ind]);
- _tensor_mgr->buildTensor(ind, tensor_info, info.shape().rank(), _constants.contains(ind),
- _uses_count_map[ind]);
- }
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildSubtensors(void)
-{
- // TODO Handle SubTensor(subsumption)
- // Currently this TemplTensorBuilder does not have subsumption info yet
- // Allocated subtensor will be mapped to _subtensors instead of _tensors
- assert(_tensor_mgr->nonconstSubtensors().size() == 0);
-
- // To make subtensor, parent tensor must be made first
- // For this condition, use stack
- // 1) Push one subtensor index to stack (iterate subtensors)
- // 2) If tensor at stack top is already made, pop and go to 4)
- // 3) If tensor pushed at 1) is not made, check parent tensor
- // 3-1) If parent tensor is already made, we can make child tensor
- // Make child tensor and pop, go to 4)
- // 3-2) If parent tensor is not made, we can't make child tensor yet
- // Push parent tensor index to stack and return to 4)
- // 4) If stack is empty, return to 1), else return to 2)
- auto &subtensors = _tensor_mgr->nonconstSubtensors();
- for (auto &entry : _subtensor_info_map)
- {
- ir::OperandIndex ind = entry.first;
-
- std::stack<ir::OperandIndex> stack;
- stack.push(ind);
-
- while (!stack.empty())
- {
- const auto current = stack.top();
- const auto &info = _subtensor_info_map.at(current);
-
- // Already generated SubTensor
- if (subtensors.find(current) != subtensors.end())
- {
- stack.pop();
- continue;
- }
-
- auto parent = info.parent();
- std::shared_ptr<T_ITensor> parent_tensor = _tensor_mgr->findTensorAsParent(parent);
- if (!parent_tensor)
- {
- // Cannot find allocated parent tensor: allocate parent first
- assert(_subtensor_info_map.find(parent) != _subtensor_info_map.end());
- stack.push(parent);
- continue;
- }
- assert(parent_tensor != nullptr);
-
- // Child's type should be same with parent
- assert(info.type().offset() == parent_tensor->info()->quantization_info().offset);
- assert(info.type().scale() == parent_tensor->info()->quantization_info().scale);
- assert(asDataType(info.type().type()) == parent_tensor->info()->data_type());
-
- // NOTE SubTensor's layout must be the same with layout of parent tensor
- const auto &root_parent = findRootParent(parent);
- const auto &backend_layout = _tensor_layout_map[root_parent];
-
- auto shape = asTensorShape(info.shape(), ir::Layout::UNKNOWN, backend_layout,
- _apply_dim_correction_map[current]);
- ::arm_compute::Coordinates coordinates =
- asTensorCoordinate(info.offset(), ir::Layout::UNKNOWN, backend_layout);
- _tensor_mgr->buildSubtensor(parent, current, shape, coordinates, info.shape().rank(), true);
- stack.pop();
- }
- }
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::preVisit(const ir::Operation &node)
-{
- // For now others executor doesn't need this step
- if (_executor_str != "Linear")
- {
- return;
- }
-
- std::function<void(const ir::OperandIndex &ind)> def_handler =
- [this, &def_handler](const ir::OperandIndex &ind) {
- bool is_subtensor = _subtensor_info_map.find(ind) != _subtensor_info_map.end();
- bool is_parent = _parent_def.find(ind) != _parent_def.end();
- if (!is_subtensor && !is_parent)
- {
- _tensor_mgr->startLifetime(ind);
- return;
- }
-
- if (is_parent)
- {
- if (_parent_def[ind] == 0)
- return;
-
- _parent_def[ind] = 0;
-
- if (is_subtensor)
- {
- const auto &it = _parent_def.find(ind);
- _parent_def.erase(it);
- def_handler(ind);
- }
- else
- {
- _tensor_mgr->startLifetime(ind);
- }
- }
- else if (is_subtensor)
- {
- const ir::OperandIndex &parent_ind = _subtensor_info_map.at(ind).parent();
- if (_parent_def[parent_ind] == 0)
- return;
- def_handler(parent_ind);
- }
- };
-
- // See #5642
- ir::OperandIndexMap<bool> outputs_map;
- for (const auto &ind : node.getOutputs())
- {
- assert(_first_uses_visit.find(ind) != _first_uses_visit.end());
- outputs_map[ind] = _first_uses_visit[ind];
- }
-
- // outputs_map's all elements are true?
- auto outputs_map_all_check = [&outputs_map]() {
- return std::all_of(outputs_map.begin(), outputs_map.end(),
- [](std::pair<const ir::OperandIndex, bool> it) { return it.second; });
- };
-
- std::pair<UsesType, ir::OperandIndex> peak;
- while (!outputs_map_all_check() && (peak = _uses_queue.front()).first == UsesType::FIRST)
- {
- _uses_queue.pop();
- _first_uses_num--;
-
- const auto &popped_idx = peak.second;
- def_handler(popped_idx);
-
- outputs_map[popped_idx] = true;
- _first_uses_visit[popped_idx] = true;
- }
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::postVisit(const ir::Operation &node)
-{
- // For now others executor doesn't need this step
- if (_executor_str != "Linear")
- {
- return;
- }
-
- std::function<void(const ir::OperandIndex &ind)> use_handler =
- [this, &use_handler](const ir::OperandIndex &ind) {
- bool is_subtensor = _subtensor_info_map.find(ind) != _subtensor_info_map.end();
- bool is_parent = _parent_uses.find(ind) != _parent_uses.end();
- if (!is_subtensor && !is_parent)
- {
- _tensor_mgr->finishLifetime(ind);
- return;
- }
-
- // This handler shall be executed by the linear executor so that
- // The parent operand will always be done after the subtensor
- if (is_parent)
- {
- --_parent_uses[ind];
- assert(_parent_uses[ind] == 0);
-
- if (is_subtensor)
- {
- const auto &it = _parent_uses.find(ind);
- _parent_uses.erase(it);
- use_handler(ind);
- }
- else
- {
- _tensor_mgr->finishLifetime(ind);
- }
- }
- else if (is_subtensor)
- {
- const ir::OperandIndex &parent_ind = _subtensor_info_map.at(ind).parent();
- --_parent_uses[parent_ind];
- assert(_parent_uses[parent_ind] > 0);
- }
- };
-
- // See #5642
- const auto &inputs = node.getInputs();
- std::pair<UsesType, ir::OperandIndex> peak;
- while ((peak = _uses_queue.front()).first == UsesType::LAST)
- {
- const auto &popped_idx = peak.second;
- if (inputs.contains(popped_idx))
- {
- _uses_queue.pop();
- use_handler(popped_idx);
- }
- else
- {
- break;
- }
- }
-
- if (_first_uses_num == 0)
- {
- while (!_uses_queue.empty())
- {
- peak = _uses_queue.front();
- assert(peak.first == UsesType::LAST);
-
- _uses_queue.pop();
-
- use_handler(peak.second);
- }
- }
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::validate(void)
-{
- // For now others executor doesn't need this step
- if (_executor_str != "Linear")
- {
- return;
- }
-
- for (auto it : _tensor_info_map)
- {
- assert(_first_uses_visit.find(it.first) != _first_uses_visit.end());
- assert(_first_uses_visit[it.first]);
- }
-
- for (auto it : _subtensor_info_map)
- {
- assert(_first_uses_visit.find(it.first) != _first_uses_visit.end());
- assert(_first_uses_visit[it.first]);
- }
-
- for (auto it : _tensor_layout_map)
- {
- assert(_first_uses_visit.find(it.first) != _first_uses_visit.end());
- assert(_first_uses_visit[it.first]);
- UNUSED_RELEASE(it);
- }
-
- assert(_uses_queue.size() == 0);
- assert(_first_uses_num == 0);
-
- assert(
- std::all_of(_parent_def.begin(), _parent_def.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-
- assert(
- std::all_of(_parent_uses.begin(), _parent_uses.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-ir::OperandIndex
-TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::findRootParent(ir::OperandIndex ind)
-{
- if (_subtensor_info_map.find(ind) == _subtensor_info_map.end())
- return ind;
-
- const auto &parent_ind = _subtensor_info_map.at(ind).parent();
- return findRootParent(parent_ind);
-}
-
-} // namespace acl_common
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__
diff --git a/runtime/neurun/backend/acl_neon/Backend.h b/runtime/neurun/backend/acl_neon/Backend.h
deleted file mode 100644
index 2fcf66933..000000000
--- a/runtime/neurun/backend/acl_neon/Backend.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_NEON_BACKEND_H__
-#define __NEURUN_BACKEND_ACL_NEON_BACKEND_H__
-
-#include <memory>
-#include <backend/Backend.h>
-#include <ir/Operands.h>
-
-#include "Config.h"
-#include "ConstantInitializer.h"
-#include "KernelGenerator.h"
-#include "ShapeFixer.h"
-#include "TensorManager.h"
-#include "TensorRegister.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-
-class Backend : public ::neurun::backend::Backend
-{
-public:
- Backend() : _config{std::make_shared<Config>()} {}
-
- std::shared_ptr<IConfig> config() const override { return _config; }
-
- std::unique_ptr<BackendContext>
- newContext(const ir::Operands &operands,
- const std::shared_ptr<custom::IKernelBuilder> &) const override
- {
- auto tensor_builder = std::make_shared<TensorBuilder>(createTensorManager());
- return std::unique_ptr<BackendContext>{new BackendContext{
- this, tensor_builder, std::make_shared<ConstantInitializer>(operands, tensor_builder),
- std::make_shared<KernelGenerator>(operands, tensor_builder),
- std::make_shared<ShapeFixer>(operands, tensor_builder),
- std::make_shared<TensorRegister>(operands, tensor_builder)}};
- }
-
-private:
- std::shared_ptr<IConfig> _config;
-};
-
-} // namespace acl_neon
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_NEON_BACKEND_H__
diff --git a/runtime/neurun/backend/acl_neon/CMakeLists.txt b/runtime/neurun/backend/acl_neon/CMakeLists.txt
deleted file mode 100644
index 061246d36..000000000
--- a/runtime/neurun/backend/acl_neon/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-# Unsupported architecture
-nnas_find_package(ARMCompute QUIET)
-if(NOT ARMCompute_FOUND)
- return()
-endif(NOT ARMCompute_FOUND)
-
-set(LIB_NEURUN_BACKEND_ACL_NEON neurun_backend_acl_neon)
-
-file(GLOB_RECURSE SOURCES "*.cc")
-
-add_library(${LIB_NEURUN_BACKEND_ACL_NEON} SHARED ${SOURCES})
-
-target_include_directories(${LIB_NEURUN_BACKEND_ACL_NEON} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
-target_link_libraries(${LIB_NEURUN_BACKEND_ACL_NEON} PRIVATE neurun_core)
-target_link_libraries(${LIB_NEURUN_BACKEND_ACL_NEON} PRIVATE ${LIB_NEURUN_BACKEND_ACL_COMMON})
-target_link_libraries(${LIB_NEURUN_BACKEND_ACL_NEON} PRIVATE nnfw_common)
-target_link_libraries(${LIB_NEURUN_BACKEND_ACL_NEON} PRIVATE nnfw_coverage)
-
-set_target_properties(${LIB_NEURUN_BACKEND_ACL_NEON} PROPERTIES OUTPUT_NAME backend_acl_neon)
-
-install(TARGETS ${LIB_NEURUN_BACKEND_ACL_NEON} DESTINATION lib)
diff --git a/runtime/neurun/backend/acl_neon/Config.cc b/runtime/neurun/backend/acl_neon/Config.cc
deleted file mode 100644
index 352bc0b41..000000000
--- a/runtime/neurun/backend/acl_neon/Config.cc
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Config.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-
-bool Config::initialize() { return true; }
-
-} // namespace acl_neon
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/acl_neon/Config.h b/runtime/neurun/backend/acl_neon/Config.h
deleted file mode 100644
index 430c194ee..000000000
--- a/runtime/neurun/backend/acl_neon/Config.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_NEON_CONFIG_H__
-#define __NEURUN_BACKEND_ACL_NEON_CONFIG_H__
-
-#include <backend/IConfig.h>
-#include <cpp14/memory.h>
-#include <util/ITimer.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-
-class Config : public IConfig
-{
-public:
- std::string id() override { return "acl_neon"; }
- bool initialize() override;
- bool SupportPermutation() override { return true; }
- bool SupportSubTensorAlloc() override { return true; }
-
- std::unique_ptr<util::ITimer> timer() override
- {
- return nnfw::cpp14::make_unique<util::CPUTimer>();
- }
-};
-
-} // namespace acl_neon
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_NEON_CONFIG_H__
diff --git a/runtime/neurun/backend/acl_neon/ConstantInitializer.cc b/runtime/neurun/backend/acl_neon/ConstantInitializer.cc
deleted file mode 100644
index 9a74bda29..000000000
--- a/runtime/neurun/backend/acl_neon/ConstantInitializer.cc
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ConstantInitializer.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-
-ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : _operands{operands}, _tensor_builder{tensor_builder}
-{
- // DO NOTHING
-}
-
-void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
-{
- const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
- const auto &block_size_obj = _operands.at(block_size_index);
-
- if (block_size_obj.isConstant())
- {
- _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::operand::ITensor &obj) {
- const auto &shape = model_obj.shape();
- const auto base = reinterpret_cast<const int32_t *>(model_obj.data().base());
- assert(model_obj.shape().rank() == 1);
- obj.access([&](::neurun::backend::operand::ITensor &tensor) {
- for (size_t i = 0; i < shape.num_elements(); ++i)
- {
- const int32_t value = base[shape.num_elements() - i - 1];
- int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
- tensor.calcOffset({static_cast<int32_t>(i)}));
- *into = value;
- }
- });
- };
- }
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerPermuteInitializer(kernel_index, kernel_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
- const auto &bias_obj = _operands.at(bias_index);
- registerCopyInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerPermuteInitializer(kernel_index, kernel_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
- const auto &bias_obj = _operands.at(bias_index);
- registerCopyInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
- const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
- const auto &weight_obj = _operands.at(weight_index);
- registerCopyInitializer(weight_index, weight_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
- const auto &bias_obj = _operands.at(bias_index);
- registerCopyInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::LSTM &node)
-{
- const auto &input_to_input_weights_index =
- node.getInputs().at(ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
- const auto &input_to_input_weights_obj = _operands.at(input_to_input_weights_index);
- registerCopyInitializer(input_to_input_weights_index, input_to_input_weights_obj);
-
- const auto &input_to_forget_weights_index =
- node.getInputs().at(ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
- const auto &input_to_forget_weights_obj = _operands.at(input_to_forget_weights_index);
- registerCopyInitializer(input_to_forget_weights_index, input_to_forget_weights_obj);
-
- const auto &input_to_cell_weights_index =
- node.getInputs().at(ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
- const auto &input_to_cell_weights_obj = _operands.at(input_to_cell_weights_index);
- registerCopyInitializer(input_to_cell_weights_index, input_to_cell_weights_obj);
-
- const auto &input_to_output_weights_index =
- node.getInputs().at(ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
- const auto &input_to_output_weights_obj = _operands.at(input_to_output_weights_index);
- registerCopyInitializer(input_to_output_weights_index, input_to_output_weights_obj);
-
- const auto &recurrent_to_input_weights_index =
- node.getInputs().at(ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
- const auto &recurrent_to_input_weights_obj = _operands.at(recurrent_to_input_weights_index);
- registerCopyInitializer(recurrent_to_input_weights_index, recurrent_to_input_weights_obj);
-
- const auto &recurrent_to_forget_weights_index =
- node.getInputs().at(ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
- const auto &recurrent_to_forget_weights_obj = _operands.at(recurrent_to_forget_weights_index);
- registerCopyInitializer(recurrent_to_forget_weights_index, recurrent_to_forget_weights_obj);
-
- const auto &recurrent_to_cell_weights_index =
- node.getInputs().at(ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
- const auto &recurrent_to_cell_weights_obj = _operands.at(recurrent_to_cell_weights_index);
- registerCopyInitializer(recurrent_to_cell_weights_index, recurrent_to_cell_weights_obj);
-
- const auto &recurrent_to_output_weights_index =
- node.getInputs().at(ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
- const auto &recurrent_to_output_weights_obj = _operands.at(recurrent_to_output_weights_index);
- registerCopyInitializer(recurrent_to_output_weights_index, recurrent_to_output_weights_obj);
-
- const auto &cell_to_input_weights_index =
- node.getInputs().at(ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
- const auto &cell_to_input_weights_obj = _operands.at(cell_to_input_weights_index);
- registerCopyInitializer(cell_to_input_weights_index, cell_to_input_weights_obj);
-
- const auto &cell_to_forget_weights_index =
- node.getInputs().at(ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
- const auto &cell_to_forget_weights_obj = _operands.at(cell_to_forget_weights_index);
- registerCopyInitializer(cell_to_forget_weights_index, cell_to_forget_weights_obj);
-
- const auto &cell_to_output_weights_index =
- node.getInputs().at(ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
- const auto &cell_to_output_weights_obj = _operands.at(cell_to_output_weights_index);
- registerCopyInitializer(cell_to_output_weights_index, cell_to_output_weights_obj);
-
- const auto &input_gate_bias_index = node.getInputs().at(ir::operation::LSTM::INPUT_GATE_BIAS);
- const auto &input_gate_bias_obj = _operands.at(input_gate_bias_index);
- registerCopyInitializer(input_gate_bias_index, input_gate_bias_obj);
-
- const auto &forget_gate_bias_index = node.getInputs().at(ir::operation::LSTM::FORGET_GATE_BIAS);
- const auto &forget_gate_bias_obj = _operands.at(forget_gate_bias_index);
- registerCopyInitializer(forget_gate_bias_index, forget_gate_bias_obj);
-
- const auto &output_gate_bias_index = node.getInputs().at(ir::operation::LSTM::OUTPUT_GATE_BIAS);
- const auto &output_gate_bias_obj = _operands.at(output_gate_bias_index);
- registerCopyInitializer(output_gate_bias_index, output_gate_bias_obj);
-
- const auto &projection_weights_index =
- node.getInputs().at(ir::operation::LSTM::PROJECTION_WEIGHTS);
- const auto &projection_weights_obj = _operands.at(projection_weights_index);
- registerCopyInitializer(projection_weights_index, projection_weights_obj);
-
- const auto &projection_bias_index = node.getInputs().at(ir::operation::LSTM::PROJECTION_BIAS);
- const auto &projection_bias_obj = _operands.at(projection_bias_index);
- registerCopyInitializer(projection_bias_index, projection_bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::RNN &node)
-{
- const auto &weights_index = node.getInputs().at(ir::operation::RNN::WEIGHTS);
- const auto &weights_obj = _operands.at(weights_index);
- registerCopyInitializer(weights_index, weights_obj);
-
- const auto &recurrent_weights_index = node.getInputs().at(ir::operation::RNN::RECURRENT_WEIGHTS);
- const auto &recurrent_weights_obj = _operands.at(recurrent_weights_index);
- registerCopyInitializer(recurrent_weights_index, recurrent_weights_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::RNN::BIAS);
- const auto &bias_obj = _operands.at(bias_index);
- registerCopyInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
-{
- const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE);
- const auto &block_size_obj = _operands.at(block_size_index);
-
- if (block_size_obj.isConstant())
- {
- _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::operand::ITensor &obj) {
- const auto &shape = model_obj.shape();
- const auto base = reinterpret_cast<const int32_t *>(model_obj.data().base());
- assert(model_obj.shape().rank() == 1);
- obj.access([&](::neurun::backend::operand::ITensor &tensor) {
- for (size_t i = 0; i < shape.num_elements(); ++i)
- {
- const int32_t value = base[shape.num_elements() - i - 1];
- int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
- tensor.calcOffset({static_cast<int32_t>(i)}));
- *into = value;
- }
- });
- };
- }
-
- const auto &paddings_index = node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS);
- const auto &paddings_obj = _operands.at(paddings_index);
- if (paddings_obj.isConstant())
- {
- _init_map[paddings_index] = [](const ir::Operand &model_obj, backend::operand::ITensor &obj) {
- const auto &shape = model_obj.shape();
- const auto base = reinterpret_cast<const int32_t *>(model_obj.data().base());
- assert(model_obj.shape().rank() == 2);
- assert(shape.dim(0) == 2);
- assert(shape.dim(1) == 2);
- obj.access([&](::neurun::backend::operand::ITensor &tensor) {
- for (auto i = 0; i < shape.dim(0); ++i)
- {
- for (auto j = 0; j < shape.dim(1); ++j)
- {
- const int32_t value = base[i * 2 + j];
- int32_t *into = reinterpret_cast<int32_t *>(
- // The coordinates of NETensor are different from the coordiantes of CLTensor in
- // this operand.
- // NEON : {j, reversed i}
- // CL : {reversed i, j}
- tensor.buffer() + tensor.calcOffset({j, shape.dim(0) - i - 1}));
- *into = value;
- }
- }
- });
- };
- }
-}
-
-void ConstantInitializer::visit(const ir::operation::TransposeConv &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerPermuteInitializer(kernel_index, kernel_obj);
-}
-
-} // namespace acl_neon
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/acl_neon/ConstantInitializer.h b/runtime/neurun/backend/acl_neon/ConstantInitializer.h
deleted file mode 100644
index 0f2b2d05b..000000000
--- a/runtime/neurun/backend/acl_neon/ConstantInitializer.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
-#define __NEURUN_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
-
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-#include "TensorBuilder.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-
-class ConstantInitializer : public IConstantInitializer
-{
-public:
- ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
-
-public:
- void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::SpaceToBatchND &) override;
- void visit(const ir::operation::TransposeConv &) override;
-
-private:
- const ir::Operands &operands() const override { return _operands; }
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
-
-private:
- const ir::Operands &_operands;
- std::shared_ptr<TensorBuilder> _tensor_builder;
-};
-
-} // namespace acl_neon
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
diff --git a/runtime/neurun/backend/acl_neon/KernelGenerator.cc b/runtime/neurun/backend/acl_neon/KernelGenerator.cc
deleted file mode 100644
index 85c6a0633..000000000
--- a/runtime/neurun/backend/acl_neon/KernelGenerator.cc
+++ /dev/null
@@ -1,2152 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "KernelGenerator.h"
-
-#include <arm_compute/runtime/NEON/NEFunctions.h> // Include all ARM Compute NEON functions
-#include <arm_compute/runtime/NEON/NEFunctionsEx.h> // Include all ARM Compute EX NEON functions
-
-#include <Convert.h>
-#include <Swizzle.h>
-
-#include "util/Padding.h"
-#include "ir/Index.h"
-#include "ir/DataType.h"
-#include "ir/InternalType.h"
-#include "compiler/IExecutionBuilder.h"
-#include "exec/NopFunction.h"
-#include "util/logging.h"
-#include "util/Utils.h"
-
-using ::neurun::compiler::IExecutionBuilder;
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-
-using ::neurun::backend::acl_common::asAclFunction;
-
-//
-// ActivationBuilder
-//
-class ActivationBuilder
-{
-public:
- ActivationBuilder(IExecutionBuilder &builder) : _builder(builder)
- {
- // DO NOTHING
- }
-
-private:
- void appendReLU(::arm_compute::ITensor *ifm_alloc);
- void appendReLU1(::arm_compute::ITensor *ifm_alloc);
- void appendReLU6(::arm_compute::ITensor *ifm_alloc);
-
-public:
- void append(ir::Activation act, ::arm_compute::ITensor *ifm_alloc);
-
-private:
- IExecutionBuilder &_builder;
-};
-
-void ActivationBuilder::appendReLU(::arm_compute::ITensor *ifm_alloc)
-{
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_alloc, nullptr, act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _builder.append(std::move(acl_fn));
-}
-
-void ActivationBuilder::appendReLU1(::arm_compute::ITensor *ifm_alloc)
-{
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_alloc, nullptr, act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _builder.append(std::move(acl_fn));
-}
-
-void ActivationBuilder::appendReLU6(::arm_compute::ITensor *ifm_alloc)
-{
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_alloc, nullptr, act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _builder.append(std::move(acl_fn));
-}
-
-void ActivationBuilder::append(ir::Activation act, ::arm_compute::ITensor *ifm_alloc)
-{
- switch (act)
- {
- case ir::Activation::NONE:
- {
- // DO NOTHING
- break;
- }
- case ir::Activation::RELU:
- {
- appendReLU(ifm_alloc);
- break;
- }
- case ir::Activation::RELU1:
- {
- appendReLU1(ifm_alloc);
- break;
- }
- case ir::Activation::RELU6:
- {
- appendReLU6(ifm_alloc);
- break;
- }
- default:
- {
- throw std::runtime_error("Not supported, yet");
- }
- }
-}
-
-//
-// KernelGenerator
-//
-KernelGenerator::KernelGenerator(const ir::Operands &ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : _ctx(ctx), _tensor_builder(tensor_builder), _current_subg_layout(ir::Layout::UNKNOWN)
-{
- // DO NOTHING
-}
-
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
-{
- _current_subg_layout = op_seq.getLayout();
- for (const auto &e : op_seq.operations())
- {
- const auto &node = *(e.node);
- _tensor_builder->preVisit(node);
- node.accept(*this);
- _tensor_builder->postVisit(node);
- }
-}
-
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
-
- const auto ifm_rank = node.param().rank;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto frontend_layout = _current_subg_layout;
- auto backend_layout = ifm_alloc->layout();
-
- int axis_value = node.param().axis;
- if (axis_value < 0)
- {
- axis_value += ifm_rank;
- }
- assert(axis_value >= 0 && axis_value < ifm_rank);
- const auto fixed_axis =
- acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
-
- // auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMinMaxLayer>();
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMax>();
-
- // NOTE
- // if (ofm_alloc->info()->data_type() == arm_compute::DataType::S32)
- //{
- ofm_alloc->info()->set_data_type(arm_compute::DataType::U32);
- //}
- fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle());
- // fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle(),
- // arm_compute::ReductionOperation::ARG_IDX_MAX);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
- const auto block_size_index{
- node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto block_size_alloc = _tensor_builder->at(block_size_index).get();
-
- assert(_ctx.at(block_size_index).isConstant());
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEBatchToSpaceLayer>();
-
- fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Cast &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NECast>();
-
- auto input_sub_type = _ctx.at(ifm_index).typeInfo().type() == ir::DataType::BOOL8
- ? arm_compute::SubDataType::BOOL
- : arm_compute::SubDataType::NONE;
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), input_sub_type);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Conv2D &node)
-{
- using ir::operation::Conv2D;
-
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
- const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
- const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
-
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
- // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
- const auto &ker_shape = _ctx.at(ker_index).shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
-
- const auto stride = node.param().stride;
- const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape,
- stride, ker_width, ker_height);
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
-
- const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
- const auto act_info = acl_common::asActivationLayerInfo(activation);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEConvolutionLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
- conv_info, ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
-
- auto block_size = node.param().block_size;
- assert(block_size > 0);
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthToSpaceLayerEx>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), block_size);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
-{
- using ir::operation::DepthwiseConv2D;
-
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
- const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
- const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
-
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- // Kernel format is [1, kernel_height, kernel_width, depth_out].
- const auto &ker_shape = _ctx.at(ker_index).shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
-
- const auto stride = node.param().stride;
- const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape,
- stride, ker_width, ker_height);
- const auto multiplier = node.param().multiplier;
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
-
- const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
- const auto act_info = acl_common::asActivationLayerInfo(activation);
-
- if (ker_height == 3 && ker_width == 3)
- {
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer3x3>();
-
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
- ofm_alloc->handle(), conv_info, multiplier, act_info);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
- }
- else
- {
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
-
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
- ofm_alloc->handle(), conv_info, multiplier, act_info);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
- }
-}
-
-void KernelGenerator::visit(const ir::operation::Dequantize &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDequantizationLayer>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
-
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
-
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
- const auto stride = node.param().stride;
- const auto padding =
- neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
- const auto activation = node.param().activation;
-
- VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
- VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
- VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
- VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
- VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl;
- VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl;
- VERBOSE(MaxPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
- VERBOSE(MaxPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
- VERBOSE(MaxPool2D) << "PAD(T): " << padding.top << std::endl;
- VERBOSE(MaxPool2D) << "PAD(B): " << padding.bottom << std::endl;
- VERBOSE(MaxPool2D) << "PAD(L): " << padding.left << std::endl;
- VERBOSE(MaxPool2D) << "PAD(R): " << padding.right << std::endl;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX,
- ::arm_compute::Size2D{kw, kh},
- acl_common::asPadStrideInfo(padding, stride)};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append((std::move(acl_fn)));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::Mean &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Mean::Input::INPUT)};
- const auto &axes{node.param().axes};
- const auto keep_dims{node.param().keep_dims};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = ifm_alloc->layout();
-
- // Convert to ACL axes taking into account negative values and possible duplicates.
- std::set<std::uint32_t> acl_axes;
- const int ifm_rank = node.param().rank;
- for (int axis : axes)
- {
- if (axis < 0)
- axis += ifm_rank;
- acl_axes.insert(
- acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value());
- }
-
- arm_compute::Coordinates fixed_axis;
- for (const auto axis : acl_axes)
- {
- fixed_axis.set(fixed_axis.num_dimensions(), axis);
- }
-
- // NOTE NEReduceMean has a bug that does not support NHWC layout
- // NEReduceMean intermediate tensors are always NCHW layout
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEReduceMeanEx>();
-
- fn->configure(ifm_alloc->handle(), fixed_axis, keep_dims, ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
-
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
- const auto stride = node.param().stride;
- const auto padding =
- neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
- const auto activation = node.param().activation;
-
- VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
- VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
- VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
- VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
- VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl;
- VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl;
- VERBOSE(AvgPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
- VERBOSE(AvgPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
- VERBOSE(AvgPool2D) << "PAD(T): " << padding.top << std::endl;
- VERBOSE(AvgPool2D) << "PAD(B): " << padding.bottom << std::endl;
- VERBOSE(AvgPool2D) << "PAD(L): " << padding.left << std::endl;
- VERBOSE(AvgPool2D) << "PAD(R): " << padding.right << std::endl;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{
- ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{kw, kh},
- acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append((std::move(acl_fn)));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::Concat &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
-
- std::vector<ir::OperandIndex> input_indexes;
- for (const auto &input : node.getInputs())
- input_indexes.emplace_back(input);
-
- const auto axis = node.param().axis;
-
- // If tensor allocator allocate as subtensor
- bool canEliminate = true;
- for (auto ifm_ind : input_indexes)
- {
- if (!_tensor_builder->isSubTensorOf(ofm_index, ifm_ind))
- {
- canEliminate = false;
- break;
- }
- }
- if (canEliminate)
- {
- // If concat eliminated, return a NOP IFunction
- _execution_builder->append(nnfw::cpp14::make_unique<exec::NopFunction>());
- return;
- }
-
- auto output_alloc = _tensor_builder->at(ofm_index).get();
- std::vector<::arm_compute::ITensor *> input_tensors;
- for (const auto &ifm_ind : input_indexes)
- input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
-
- std::unique_ptr<::arm_compute::IFunction> fn;
- if (input_indexes.size() < 2)
- {
- auto l = nnfw::cpp14::make_unique<::arm_compute::NECopy>();
- l->configure(input_tensors.at(0), output_alloc->handle());
- fn = std::move(l);
- }
- else
- {
- auto l = nnfw::cpp14::make_unique<::arm_compute::NEConcatenateLayer>();
- const auto rank = node.param().rank;
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = output_alloc->layout();
- const auto fixed_axis =
- acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
- l->configure(input_tensors, output_alloc->handle(), fixed_axis);
- fn = std::move(l);
- }
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
- const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto lookups_alloc = _tensor_builder->at(lookups_index).get();
- auto values_alloc = _tensor_builder->at(values_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEEmbeddingLookup>();
-
- fn->configure(values_alloc->handle(), output_alloc->handle(), lookups_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Floor &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEFloor>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::FullyConnected &node)
-{
- using ir::operation::FullyConnected;
-
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
- const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
- const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
-
- const auto input_rank = _ctx.at(input_index).shape().rank();
-
- const auto output_size =
- _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
- UNUSED_RELEASE(output_size);
- assert(_ctx.at(bias_index).shape().dim(0) == output_size);
- assert(_ctx.at(weight_index).shape().dim(0) == output_size);
- const auto batch_size =
- _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 2);
- const auto input_size =
- _ctx.at(weight_index).shape().dim(_ctx.at(weight_index).shape().rank() - 1);
-
- // Check for reshaping input's shape into rank-2
- bool needs_reshape = false;
- ir::Shape reshape(2);
- if (input_rank == 3 || input_rank == 4)
- {
- const auto &ifm_shape = _ctx.at(input_index).shape();
- auto feature_size = 1;
- for (int i = 0; i < ifm_shape.rank(); ++i)
- {
- feature_size *= ifm_shape.dim(i);
- }
-
- UNUSED_RELEASE(feature_size);
- assert(feature_size == batch_size * input_size);
-
- // for reshaping
- needs_reshape = true;
- reshape.dim(0) = batch_size; /* H */
- reshape.dim(1) = input_size; /* W */
- }
-
- const auto activation = node.param().activation;
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- const auto input_alloc = _tensor_builder->at(input_index).get();
- const auto weight_alloc = _tensor_builder->at(weight_index).get();
- const auto bias_alloc = _tensor_builder->at(bias_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto acl_layout = output_alloc->handle()->info()->data_layout();
-
- auto fn = nnfw::cpp14::make_unique<arm_compute::NEFullyConnectedReshapingLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- arm_compute::NEFullyConnectedReshapingLayer::KernelType kernel_type =
- _ctx.at(weight_index).isConstant()
- ? arm_compute::NEFullyConnectedReshapingLayer::KernelType::PREPROCESSED_WEIGHTS
- : arm_compute::NEFullyConnectedReshapingLayer::KernelType::GENERAL;
-
- fn->configure(
- input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), output_alloc->handle(),
- needs_reshape,
- ::neurun::backend::acl_common::asTensorShape(
- reshape, frontend_layout, ::neurun::backend::acl_common::asRuntimeLayout(acl_layout)),
- kernel_type);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, output_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
-{
- const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
- const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)};
-
- const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
- const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
- const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto hits_alloc = _tensor_builder->at(hits_index).get();
-
- auto lookups_alloc = _tensor_builder->at(lookups_index).get();
- auto keys_alloc = _tensor_builder->at(keys_index).get();
- auto values_alloc = _tensor_builder->at(values_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEHashtableLookup>();
-
- fn->configure(lookups_alloc->handle(), keys_alloc->handle(), values_alloc->handle(),
- output_alloc->handle(), hits_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Gather &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
-
- const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
- const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
-
- const auto ifm_shape = _ctx.at(ifm_index).shape();
-
- const auto ifm_rank = node.param().rank;
- const auto axis_raw = node.param().axis;
- const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
- // Converting in reverse order
- const int axis = ::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto indices_alloc = _tensor_builder->at(indices_index).get();
- const auto backend_layout = ofm_alloc->layout();
- UNUSED_RELEASE(backend_layout);
-
- // NOTE The frontend layout and backend layout must be the same for this operation.
- // If not the same, we have to add a stage(?) to perform permutation of output tensor. It
- // is not not efficient even if it works well. If so, it would be better to set the
- // layout of these backend tensors to the same layout.
- // There is also one thing we have to think about. This operation depends on the layout of
- // a model. For example, if a model in NHWC has this operation as output rank == 4, indices
- // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
- // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
- assert(backend_layout == ifm_alloc->layout());
- assert(backend_layout == indices_alloc->layout());
- assert(ifm_rank < 4 || _current_subg_layout == backend_layout);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEGatherEx>();
-
- fn->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), axis);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
- const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
- const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto gamma_alloc = _tensor_builder->at(gamma_index).get();
- auto beta_alloc = _tensor_builder->at(beta_index).get();
- auto epsilon = node.param().epsilon;
- auto activation = node.param().activation;
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEInstanceNormalizationLayerEx>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), gamma_alloc->handle(),
- beta_alloc->handle(), epsilon);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::L2Normalization &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
-
- // {CL|Neon}L2Normalization performs the reduction only along dimension 0
- // L2 Normalization always performs the reduction along the depth axis
- // Thus, we repurpose {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by
- // choosing normalization parameters as below
-
- const auto &ifm_shape = _ctx.at(ifm_index).shape();
- // TODO Support optional constant dimension that normalization would be performed on
- const auto normalization_axis = node.param().rank - 1;
- int32_t radius =
- 2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
- float alpha = 1.0f; // In the implementation to make alpha_ become 1
- float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
- float bias = 0.0f; // Don't offset the reduction.
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
- radius, alpha, beta, bias, false);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NENormalizationLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::L2Pool2D::Input::INPUT)};
-
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
-
- uint32_t kw = node.param().kw;
- uint32_t kh = node.param().kh;
- const auto stride = node.param().stride;
- const auto padding =
- neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{
- ::arm_compute::PoolingType::L2, ::arm_compute::Size2D{kw, kh},
- ::neurun::backend::acl_common::asPadStrideInfo(padding, stride)};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{
- node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
-
- auto radius = node.param().radius;
- auto alpha = node.param().alpha;
- auto beta = node.param().beta;
- auto bias = node.param().bias;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- const auto norm_info = ::arm_compute::NormalizationLayerInfo(
- ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NENormalizationLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NELogicalAnd>();
-
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEBitwiseNot>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NELogicalOr>();
-
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Logistic &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::LSTM &node)
-{
- // TODO Support dynamic rnn
- // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
- const auto scratch_buffer_index{
- node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
- const auto output_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
- const auto cell_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
- const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
-
- const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
- const auto input_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
- const auto input_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
- const auto input_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
- const auto input_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
- const auto recurrent_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
- const auto recurrent_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
- const auto recurrent_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
- const auto recurrent_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
- const auto cell_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
- const auto cell_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
- const auto cell_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
- const auto input_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
- const auto forget_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
- const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
- const auto output_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
- const auto projection_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
- const auto projection_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
- const auto output_state_in_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
- const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
- const auto cell_threshold = node.param().cell_threshold;
- const auto projection_threshold = node.param().projection_threshold;
-
- bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) != 0;
- bool has_recurrent_to_input_weights =
- _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
- bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
- bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
- bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 &&
- _ctx.at(projection_weights_index).shape().dim(1) != 0;
- bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0);
-
- // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
- // true: no CIFG
- // false: CIFG
- // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
- bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
-
- // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
- // But the cell_to_input_weights does not exist in regular CIFG although peephole.
- // true: peephole
- // false: no peephole
- bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
-
- // NOTE Although the projection weights has data the projection bias may not have data.
- bool has_projection_param = has_projection_weights;
-
- const auto activation = node.param().activation;
- const auto cell_clip = cell_threshold;
- const auto projection_clip = projection_threshold;
- assert(cell_clip >= 0.f && projection_clip >= 0.f);
-
- auto scratch_buffer_alloc = _tensor_builder->at(scratch_buffer_index).get();
- auto output_state_out_alloc = _tensor_builder->at(output_state_out_index).get();
- auto cell_state_out_alloc = _tensor_builder->at(cell_state_out_index).get();
- auto output_alloc = _tensor_builder->at(output_index).get();
-
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto input_to_forget_weights_alloc = _tensor_builder->at(input_to_forget_weights_index).get();
- auto input_to_cell_weights_alloc = _tensor_builder->at(input_to_cell_weights_index).get();
- auto input_to_output_weights_alloc = _tensor_builder->at(input_to_output_weights_index).get();
- auto recurrent_to_forget_weights_alloc =
- _tensor_builder->at(recurrent_to_forget_weights_index).get();
- auto recurrent_to_cell_weights_alloc = _tensor_builder->at(recurrent_to_cell_weights_index).get();
- auto recurrent_to_output_weights_alloc =
- _tensor_builder->at(recurrent_to_output_weights_index).get();
-
- auto forget_gate_bias_alloc = _tensor_builder->at(forget_gate_bias_index).get();
- auto cell_bias_alloc = _tensor_builder->at(cell_bias_index).get();
- auto output_gate_bias_alloc = _tensor_builder->at(output_gate_bias_index).get();
- auto output_state_in_alloc = _tensor_builder->at(output_state_in_index).get();
- auto cell_state_in_alloc = _tensor_builder->at(cell_state_in_index).get();
-
- auto act_info = ::neurun::backend::acl_common::asActivationLayerInfo(activation);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NELSTMLayer>();
-
- ::arm_compute::LSTMParams<::arm_compute::ITensor> lstm_params{};
- if (has_cifg_param)
- {
- auto input_to_input_weights_alloc =
- _tensor_builder->at(input_to_input_weights_index).get(); // optional
- auto recurrent_to_input_weights_alloc =
- _tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
- auto cell_to_input_weights_handle =
- has_peephole_param ? _tensor_builder->at(cell_to_input_weights_index).get()->handle()
- : nullptr; // optional (non-cifg && peephole)
- auto input_gate_bias_alloc = _tensor_builder->at(input_gate_bias_index).get(); // optional
- lstm_params.set_cifg_params(input_to_input_weights_alloc->handle(),
- recurrent_to_input_weights_alloc->handle(),
- cell_to_input_weights_handle, input_gate_bias_alloc->handle());
- }
- if (has_peephole_param)
- {
- auto cell_to_forget_weights_alloc =
- _tensor_builder->at(cell_to_forget_weights_index).get(); // optional
- auto cell_to_output_weights_alloc =
- _tensor_builder->at(cell_to_output_weights_index).get(); // optional
- lstm_params.set_peephole_params(cell_to_forget_weights_alloc->handle(),
- cell_to_output_weights_alloc->handle());
- }
- if (has_projection_param)
- {
- auto projection_weights_alloc = _tensor_builder->at(projection_weights_index).get(); // optional
- auto projection_bias_handle = has_projection_bias
- ? _tensor_builder->at(projection_bias_index).get()->handle()
- : nullptr; // optional
- lstm_params.set_projection_params(projection_weights_alloc->handle(), projection_bias_handle);
- }
-
- fn->configure(
- input_alloc->handle(), input_to_forget_weights_alloc->handle(),
- input_to_cell_weights_alloc->handle(), input_to_output_weights_alloc->handle(),
- recurrent_to_forget_weights_alloc->handle(), recurrent_to_cell_weights_alloc->handle(),
- recurrent_to_output_weights_alloc->handle(), forget_gate_bias_alloc->handle(),
- cell_bias_alloc->handle(), output_gate_bias_alloc->handle(), output_state_in_alloc->handle(),
- cell_state_in_alloc->handle(), scratch_buffer_alloc->handle(),
- output_state_out_alloc->handle(), cell_state_out_alloc->handle(), output_alloc->handle(),
- lstm_params, act_info, cell_clip, projection_clip);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPixelWiseMultiplication>();
-
- // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), 1.0, // scale
- arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NENegLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Pack &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- auto axis{node.param().axis};
-
- const auto output_rank = node.param().rank;
-
- std::vector<ir::OperandIndex> input_indexes;
- for (const auto &input_index : node.getInputs())
- input_indexes.emplace_back(input_index);
-
- auto output = _tensor_builder->at(output_index).get()->handle();
- std::vector<arm_compute::ITensor *> inputs;
- for (const auto &input_index : input_indexes)
- inputs.emplace_back(_tensor_builder->at(input_index)->handle());
-
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
-
- if (axis < 0)
- axis += output_rank;
- axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEStackLayer>();
-
- fn->configure(inputs, axis, output);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::Pad &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
- const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
- const auto output_index{node.getOutputs().at(0)};
- assert(_ctx.at(pad_index).isConstant());
-
- auto rank = node.param().rank;
- auto pad_base = _ctx.at(pad_index).data().base();
-
- auto input = _tensor_builder->at(input_index).get()->handle();
- auto output = _tensor_builder->at(output_index).get()->handle();
-
- ::arm_compute::PaddingList padding_list;
- padding_list.resize(rank);
- for (int32_t n = 0; n < rank; ++n)
- {
- const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
-
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
- const auto axis =
- acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
- padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
- }
-
- const auto input_type = _ctx.at(input_index).typeInfo();
- UNUSED_RELEASE(input_type);
- assert(input->info()->data_type() == acl_common::asDataType(input_type.type()));
- assert(input->info()->quantization_info() ==
- ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset()));
- const auto pixel_value =
- ::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPadLayer>();
- fn->configure(input, output, padding_list, pixel_value);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::Permute &node)
-{
- const auto ofm_idx{node.getOutputs().at(0)};
- const auto ifm_idx{node.getInputs().at(0)};
- const auto permute_type = node.getPermuteType();
- auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
- auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
- const auto rank = _ctx.at(ofm_idx).shape().rank();
- assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
-
- std::unique_ptr<::arm_compute::IFunction> fn;
- arm_compute::PermutationVector pv;
- if (permute_type == ir::operation::Permute::Type::NCHW_TO_NHWC && rank == 4)
- {
- // WHCN -> CWHN
- pv = arm_compute::PermutationVector{2, 0, 1};
-
- auto l = nnfw::cpp14::make_unique<::arm_compute::NEPermute>();
-
- l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
-
- fn = std::move(l);
- }
- else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
- {
- // CWHN -> WHCN
- pv = arm_compute::PermutationVector{1, 2, 0};
-
- auto l = nnfw::cpp14::make_unique<::arm_compute::NEPermute>();
-
- l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
-
- fn = std::move(l);
- }
- else
- {
- auto l = nnfw::cpp14::make_unique<::arm_compute::NECopy>();
-
- l->configure(ifm_alloc->handle(), ofm_alloc->handle());
-
- fn = std::move(l);
- }
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::PReLU &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
- const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto alpha_alloc = _tensor_builder->at(alpha_index).get();
-
- std::unique_ptr<::arm_compute::IFunction> fn;
-
- auto l = nnfw::cpp14::make_unique<::arm_compute::NEPReLU>();
-
- l->configure(ifm_alloc->handle(), alpha_alloc->handle(), ofm_alloc->handle());
-
- fn = std::move(l);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReduceMax &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReduceMax::Input::INPUT)};
- const auto &axes{node.param().axes};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = ifm_alloc->layout();
-
- // Convert to ACL axes taking into account negative values and possible duplicates.
- std::set<std::uint32_t> acl_axes;
- const int ifm_rank = node.param().rank;
- for (int axis : axes)
- {
- if (axis < 0)
- axis += ifm_rank;
- acl_axes.insert(
- acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value());
- }
-
- arm_compute::Coordinates reduce_axes;
- for (const auto axis : acl_axes)
- {
- reduce_axes.set(reduce_axes.num_dimensions(), axis);
- }
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEReduceOperation>();
-
- fn->configure(ifm_alloc->handle(), reduce_axes, false, ofm_alloc->handle(),
- ::arm_compute::ReduceOperation::MAX);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReduceMin &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReduceMin::Input::INPUT)};
- const auto &axes{node.param().axes};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = ifm_alloc->layout();
-
- // Convert to ACL axes taking into account negative values and possible duplicates.
- std::set<std::uint32_t> acl_axes;
- const int ifm_rank = node.param().rank;
- for (int axis : axes)
- {
- if (axis < 0)
- axis += ifm_rank;
- acl_axes.insert(
- acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value());
- }
-
- arm_compute::Coordinates reduce_axes;
- for (const auto axis : acl_axes)
- {
- reduce_axes.set(reduce_axes.num_dimensions(), axis);
- }
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEReduceOperation>();
-
- fn->configure(ifm_alloc->handle(), reduce_axes, false, ofm_alloc->handle(),
- ::arm_compute::ReduceOperation::MIN);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReduceSum &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ReduceSum::Input::INPUT)};
- const auto &axes{node.param().axes};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = input_alloc->layout();
-
- // Convert to ACL axes taking into account negative values and possible duplicates.
- std::set<std::uint32_t> acl_axes;
- const int input_rank = node.param().rank;
- for (int axis : axes)
- {
- if (axis < 0)
- axis += input_rank;
- acl_axes.insert(
- acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value());
- }
-
- arm_compute::Coordinates fixed_axes;
- for (const auto axis : acl_axes)
- {
- fixed_axes.set(fixed_axes.num_dimensions(), axis);
- }
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEReduceSum>();
-
- fn->configure(input_alloc->handle(), fixed_axes, false, output_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<arm_compute::NEActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU1 &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Reshape &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- // NOTE This operation must not be changed the layout from frontend to backend
- // So, PermutationOperationPass makes layouts of frontend and backend the same.
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = output_alloc->layout();
- assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
- frontend_layout == backend_layout);
- UNUSED_RELEASE(frontend_layout);
- UNUSED_RELEASE(backend_layout);
-
- auto fn = nnfw::cpp14::make_unique<arm_compute::NEReshapeLayer>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
-
- const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEScale>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(),
- ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
- ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::RNN &node)
-{
- const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
- const auto hidden_state_out_index{
- node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
-
- const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
- const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
- const auto recurrent_weights_index{
- node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
- const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
- const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
-
- const auto activation = node.param().activation;
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto hidden_state_out_alloc = _tensor_builder->at(hidden_state_out_index).get();
-
- auto input_alloc = _tensor_builder->at(input_index).get();
- auto weights_alloc = _tensor_builder->at(weights_index).get();
- auto recurrent_weights_alloc = _tensor_builder->at(recurrent_weights_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
- auto hidden_state_in_alloc = _tensor_builder->at(hidden_state_in_index).get();
- auto act_info = ::neurun::backend::acl_common::asActivationLayerInfo(activation);
-
- auto copy_layer = nnfw::cpp14::make_unique<::arm_compute::NECopy>();
- copy_layer->configure(hidden_state_in_alloc->handle(), hidden_state_out_alloc->handle());
- _execution_builder->append(asAclFunction(std::move(copy_layer)));
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NERNNLayerEx>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(input_alloc->handle(), weights_alloc->handle(), recurrent_weights_alloc->handle(),
- bias_alloc->handle(), hidden_state_out_alloc->handle(), output_alloc->handle(),
- act_info);
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NERsqrtLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
-
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::Squeeze &node)
-{
- // Squeeze is identical to reshape except that it has an optional dimensions input.
- // In addition, optional dims_index is ignored since output tensor already has squeezed shape
- // by freezer and toco
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
- const auto dims{node.param().dims};
- const auto ndim{node.param().ndim};
- (void)dims;
- (void)ndim;
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
- auto fn = nnfw::cpp14::make_unique<arm_compute::NEReshapeLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
- auto acl_fn = asAclFunction(std::move(fn));
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Tanh &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<arm_compute::NEActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Softmax &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
- const auto beta = node.param().beta;
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NESoftmaxLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), beta);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
- const auto block_size_index{
- node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
- const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto block_size_alloc = _tensor_builder->at(block_size_index).get();
- auto paddings_alloc = _tensor_builder->at(paddings_index).get();
-
- assert(_ctx.at(block_size_index).isConstant());
- assert(_ctx.at(paddings_index).isConstant());
-
- // NESpaceToBatchLayer has a bug that padding's values are 0 even when zero point of QASYMM8 is
- // not 0.
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NESpaceToBatchLayerEx>();
-
- fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), paddings_alloc->handle(),
- ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
-
- auto block_size = node.param().block_size;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NESpaceToDepthLayerEx>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), block_size);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Split &node)
-{
- // TODO Support this op by SubTensor
- const auto ifm_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
-
- assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
-
- const auto ifm_rank = node.param().rank;
- std::vector<ir::OperandIndex> output_indexes;
- for (const auto &output : node.getOutputs())
- output_indexes.emplace_back(output);
-
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- std::vector<arm_compute::ITensor *> output_allocs;
- for (const auto &ofm_ind : output_indexes)
- output_allocs.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
-
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = ifm_alloc->layout();
- auto axis = node.param().axis;
- if (axis < 0)
- axis += ifm_rank;
- axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NESplit>();
-
- fn->configure(ifm_alloc->handle(), output_allocs, axis);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::SQRT &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEElementwiseSquaredDiff>();
-
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Sub &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArithmeticSubtraction>();
-
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
- arm_compute::ConvertPolicy::SATURATE);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::Slice &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
- const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
- const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
-
- auto outputData_alloc = _tensor_builder->at(output_index).get();
- auto inputData_alloc = _tensor_builder->at(input_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = inputData_alloc->layout();
-
- // Set initializers for indices data such as order of inputData
- int input_rank = node.param().rank;
- std::vector<int32_t> starts;
- std::vector<int32_t> ends;
- starts.resize(input_rank, 0);
- ends.resize(input_rank, 0);
- {
- auto beginData_base = _ctx.at(begins_index).data().base();
- auto sizeData_base = _ctx.at(sizes_index).data().base();
- const int beginData_size = _ctx.at(begins_index).shape().num_elements();
- const int sizeData_size = _ctx.at(sizes_index).shape().num_elements();
-
- using ir::DataType;
-
- UNUSED_RELEASE(beginData_size);
- UNUSED_RELEASE(sizeData_size);
-
- assert(_ctx.at(begins_index).typeInfo().type() == DataType::INT32);
- assert(_ctx.at(sizes_index).typeInfo().type() == DataType::INT32);
- assert(beginData_size == input_rank);
- assert(sizeData_size == input_rank);
-
- assert(beginData_base != nullptr);
- for (int n = 0; n < input_rank; ++n)
- {
- auto axis = ::neurun::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
- backend_layout)
- .value();
-
- int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n);
- starts[axis] = begin_value;
-
- int32_t size_value = *(reinterpret_cast<const int32_t *>(sizeData_base) + n);
- ends[axis] = begin_value + size_value;
- }
- }
-
- ::arm_compute::Coordinates starts_set;
- ::arm_compute::Coordinates ends_set;
-
- for (size_t i = 0; i < starts.size(); ++i)
- {
- starts_set.set(i, starts[i]);
- ends_set.set(i, ends[i]);
- }
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NESlice>();
-
- fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::StridedSlice &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
- const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
- const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
- const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
-
- auto outputData_alloc = _tensor_builder->at(output_index).get();
- auto inputData_alloc = _tensor_builder->at(input_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = inputData_alloc->layout();
-
- // Set initializers for indices data such as order of inputData
- int input_rank = node.param().rank;
- std::vector<int32_t> starts;
- std::vector<int32_t> ends;
- std::vector<int32_t> strides;
- starts.resize(input_rank, 0);
- ends.resize(input_rank, 0);
- strides.resize(input_rank, 0);
- {
- auto startData_base = _ctx.at(starts_index).data().base();
- auto endData_base = _ctx.at(ends_index).data().base();
- auto stridesData_base = _ctx.at(strides_index).data().base();
- const int startData_size = _ctx.at(starts_index).shape().num_elements();
- const int endData_size = _ctx.at(ends_index).shape().num_elements();
- const int stridesData_size = _ctx.at(strides_index).shape().num_elements();
-
- using ir::DataType;
-
- UNUSED_RELEASE(startData_size);
- UNUSED_RELEASE(endData_size);
- UNUSED_RELEASE(stridesData_size);
-
- assert(_ctx.at(starts_index).typeInfo().type() == DataType::INT32);
- assert(_ctx.at(ends_index).typeInfo().type() == DataType::INT32);
- assert(_ctx.at(strides_index).typeInfo().type() == DataType::INT32);
- assert(startData_size == input_rank);
- assert(endData_size == input_rank);
- assert(stridesData_size == input_rank);
-
- assert(startData_base != nullptr);
- for (int n = 0; n < input_rank; ++n)
- {
- auto axis = ::neurun::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
- backend_layout)
- .value();
-
- int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n);
- starts[axis] = start_value;
-
- int32_t end_value = *(reinterpret_cast<const int32_t *>(endData_base) + n);
- ends[axis] = end_value;
-
- int32_t strides_value = *(reinterpret_cast<const int32_t *>(stridesData_base) + n);
- strides[axis] = strides_value;
- }
- }
-
- // Set mask bits such as order of inputData
- // FIXME Take the layouts into account.
- const auto begin_mask = acl_common::ReorderBits<int32_t>(node.param().begin_mask, input_rank);
- const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank);
- const auto shrink_axis_mask =
- acl_common::ReorderBits<int32_t>(node.param().shrink_axis_mask, input_rank);
-
- ::arm_compute::Coordinates starts_set;
- ::arm_compute::Coordinates ends_set;
- ::arm_compute::BiStrides strides_set;
-
- for (size_t i = 0; i < starts.size(); ++i)
- {
- starts_set.set(i, starts[i]);
- ends_set.set(i, ends[i]);
- strides_set.set(i, strides[i]);
- }
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEStridedSlice>();
-
- fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set,
- strides_set, begin_mask, end_mask, shrink_axis_mask);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::TransposeConv &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto output_shape_index{
- node.getInputs().at(ir::operation::TransposeConv::Input::OUTPUT_SHAPE)};
- const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
- const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
-
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
- const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_subg_layout);
-
- const auto stride = node.param().stride;
-
- assert((node.param().padding.type == ir::PaddingType::SAME) ||
- (node.param().padding.type == ir::PaddingType::VALID));
- auto padding = neurun::util::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride,
- ker_shape.W, ker_shape.H);
-
- uint32_t invalid_horizontal = 0;
- uint32_t invalid_vertical = 0;
- if (node.param().padding.type == ir::PaddingType::VALID)
- {
- invalid_horizontal =
- ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
- invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
- }
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
-
- const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NETransposeConvLayer>();
-
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info,
- invalid_horizontal, invalid_vertical);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Transpose &node)
-{
- const auto ofm_idx{node.getOutputs().at(0)};
- const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
- const auto &perm{node.param().perm};
-
- auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
- const auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = ifm_alloc->layout();
-
- const auto rank = node.param().rank;
- std::vector<std::int32_t> pv(perm.cbegin(), perm.cend());
- auto backend_pv = ::neurun::backend::acl_common::getARMComputePermutationVector(
- rank, pv, frontend_layout, backend_layout);
-
- std::unique_ptr<::arm_compute::IFunction> fn;
-
- if (ifm_alloc->num_dimensions() <= 2 && ofm_alloc->num_dimensions() <= 2)
- {
- auto l = nnfw::cpp14::make_unique<::arm_compute::NETranspose>();
-
- l->configure(ifm_alloc->handle(), ofm_alloc->handle());
-
- fn = std::move(l);
- }
- else
- {
- auto l = nnfw::cpp14::make_unique<::arm_compute::NEPermute>();
-
- l->configure(ifm_alloc->handle(), ofm_alloc->handle(), backend_pv);
-
- fn = std::move(l);
- }
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Unpack &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
- auto axis{node.param().axis};
-
- const auto input_rank = node.param().rank;
-
- std::vector<ir::OperandIndex> output_indexes;
- for (const auto &output_index : node.getOutputs())
- output_indexes.emplace_back(output_index);
-
- auto input = _tensor_builder->at(input_index).get()->handle();
- std::vector<arm_compute::ITensor *> outputs;
- for (const auto &output_index : output_indexes)
- outputs.emplace_back(_tensor_builder->at(output_index)->handle());
-
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
- if (axis < 0)
- axis += input_rank;
- axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEUnstack>();
-
- fn->configure(input, outputs, axis);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::Add &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArithmeticAddition>();
-
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
- arm_compute::ConvertPolicy::SATURATE);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::Div &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEElementwiseDivision>();
-
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::Exp &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEExpLayer>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Comparison &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
-
- const auto comparison_type = node.param().comparison_type;
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEElementwiseComparison>();
-
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(),
- (arm_compute::ComparisonOperation)comparison_type);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEElementwiseMin>();
-
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEElementwiseMax>();
-
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-} // namespace acl_neon
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/acl_neon/KernelGenerator.h b/runtime/neurun/backend/acl_neon/KernelGenerator.h
deleted file mode 100644
index f041fb725..000000000
--- a/runtime/neurun/backend/acl_neon/KernelGenerator.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
-#define __NEURUN_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
-
-#include <backend/IKernelGenerator.h>
-
-#include "ir/Operands.h"
-#include "TensorBuilder.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-
-class KernelGenerator : public IKernelGenerator
-{
-public:
- KernelGenerator(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder);
-
- void visit(const ir::OpSequence &) override;
- void visit(const ir::operation::Abs &) override;
- void visit(const ir::operation::ArgMax &) override;
- void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::Cast &) override;
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthToSpace &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::Dequantize &) override;
- void visit(const ir::operation::MaxPool2D &) override;
- void visit(const ir::operation::Mean &) override;
- void visit(const ir::operation::AvgPool2D &) override;
- void visit(const ir::operation::Concat &) override;
- void visit(const ir::operation::EmbeddingLookup &) override;
- void visit(const ir::operation::Floor &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::HashtableLookup &) override;
- void visit(const ir::operation::InstanceNorm &) override;
- void visit(const ir::operation::L2Normalization &) override;
- void visit(const ir::operation::L2Pool2D &) override;
- void visit(const ir::operation::LocalResponseNormalization &) override;
- void visit(const ir::operation::LogicalAnd &) override;
- void visit(const ir::operation::LogicalNot &) override;
- void visit(const ir::operation::LogicalOr &) override;
- void visit(const ir::operation::Logistic &) override;
- void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::Mul &) override;
- void visit(const ir::operation::Neg &) override;
- void visit(const ir::operation::Pack &) override;
- void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::Permute &) override;
- void visit(const ir::operation::PReLU &) override;
- void visit(const ir::operation::ReduceMax &) override;
- void visit(const ir::operation::ReduceMin &) override;
- void visit(const ir::operation::ReduceSum &) override;
- void visit(const ir::operation::ReLU &) override;
- void visit(const ir::operation::ReLU1 &) override;
- void visit(const ir::operation::ReLU6 &) override;
- void visit(const ir::operation::Reshape &) override;
- void visit(const ir::operation::ResizeBilinear &) override;
- void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::RSQRT &) override;
- void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Tanh &) override;
- void visit(const ir::operation::Softmax &) override;
- void visit(const ir::operation::SpaceToBatchND &) override;
- void visit(const ir::operation::SpaceToDepth &) override;
- void visit(const ir::operation::Split &) override;
- void visit(const ir::operation::SQRT &) override;
- void visit(const ir::operation::SquaredDifference &) override;
- void visit(const ir::operation::Sub &) override;
- void visit(const ir::operation::Slice &) override;
- void visit(const ir::operation::StridedSlice &) override;
- void visit(const ir::operation::TransposeConv &) override;
- void visit(const ir::operation::Transpose &) override;
- void visit(const ir::operation::Unpack &) override;
- void visit(const ir::operation::Add &) override;
- void visit(const ir::operation::Div &) override;
- void visit(const ir::operation::Exp &) override;
- void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::Min &) override;
- void visit(const ir::operation::Max &) override;
-
-private:
- const ir::Operands &_ctx;
- std::shared_ptr<TensorBuilder> _tensor_builder;
- ir::Layout _current_subg_layout;
-};
-
-} // namespace acl_neon
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
diff --git a/runtime/neurun/backend/acl_neon/PluginClassesAllocator.cc b/runtime/neurun/backend/acl_neon/PluginClassesAllocator.cc
deleted file mode 100644
index 75f2e9797..000000000
--- a/runtime/neurun/backend/acl_neon/PluginClassesAllocator.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <util/logging.h>
-
-#include "Backend.h"
-
-extern "C" {
-neurun::backend::Backend *neurun_backend_create()
-{
- VERBOSE(neurun_backend_create) << "'acl_neon' loaded\n";
- return new neurun::backend::acl_neon::Backend;
-}
-
-void neurun_backend_destroy(neurun::backend::Backend *backend)
-{
- VERBOSE(neurun_backend_create) << "'acl_neon' unloaded\n";
- delete backend;
-}
-}
diff --git a/runtime/neurun/backend/acl_neon/ShapeFixer.cc b/runtime/neurun/backend/acl_neon/ShapeFixer.cc
deleted file mode 100644
index 1d80e57e9..000000000
--- a/runtime/neurun/backend/acl_neon/ShapeFixer.cc
+++ /dev/null
@@ -1,439 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ShapeFixer.h"
-
-#include <arm_compute/runtime/NEON/functions/NESoftmaxLayer.h>
-#include <arm_compute/runtime/NEON/functions/NEArithmeticAddition.h>
-#include <arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h>
-#include <arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h>
-#include <arm_compute/runtime/NEON/functions/NEPoolingLayer.h>
-#include <arm_compute/runtime/NEON/functions/NEActivationLayer.h>
-#include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h>
-#include <arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h>
-#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
-#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
-#include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h>
-
-#include <Convert.h>
-#include <Swizzle.h>
-
-#include "util/Padding.h"
-#include "ir/Index.h"
-#include "compiler/IExecutionBuilder.h"
-#include "exec/NopFunction.h"
-#include "util/logging.h"
-#include "util/Utils.h"
-
-using ::neurun::compiler::IExecutionBuilder;
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-
-using ::neurun::backend::acl_common::asAclFunction;
-
-ShapeFixer::ShapeFixer(const ir::Operands &ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : _ctx(ctx), _tensor_builder(tensor_builder)
-{
- assert(tensor_builder);
-}
-
-void ShapeFixer::visit(const ir::operation::Abs &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::ArgMax &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::BatchToSpaceND &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
- _tensor_builder->dimCorrection(ofm_index, false);
- _tensor_builder->dimCorrection(ifm_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::Cast &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Conv2D &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::DepthToSpace &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::DepthwiseConv2D &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Dequantize &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::MaxPool2D &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Mean &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::AvgPool2D &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Concat &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- _tensor_builder->dimCorrection(ofm_index, false);
- for (const auto &inputs : node.getInputs())
- _tensor_builder->dimCorrection(inputs, false);
-}
-
-void ShapeFixer::visit(const ir::operation::EmbeddingLookup &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
- _tensor_builder->dimCorrection(values_index, false);
- _tensor_builder->dimCorrection(output_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::Exp &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Floor &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::FullyConnected &node)
-{
- using ir::operation::FullyConnected;
- const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
- const auto input_rank = _ctx.at(input_index).shape().rank();
- // Check for reshaping input's shape into rank-2
- if (input_rank == 3 || input_rank == 4)
- _tensor_builder->dimCorrection(input_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::HashtableLookup &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
- _tensor_builder->dimCorrection(values_index, false);
- _tensor_builder->dimCorrection(output_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::Gather &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
- const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
- _tensor_builder->dimCorrection(ofm_index, false);
- _tensor_builder->dimCorrection(ifm_index, false);
- _tensor_builder->dimCorrection(indices_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::InstanceNorm &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::L2Normalization &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::L2Pool2D &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::LocalResponseNormalization &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::LogicalAnd &node)
-{
- const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
-
- if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank());
-
- // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
- // a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::LogicalNot &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::LogicalOr &node)
-{
- const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
-
- if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank());
-
- // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
- // a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::Logistic &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::LSTM &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Pack &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- _tensor_builder->dimCorrection(ofm_index, false);
- for (const auto &inputs : node.getInputs())
- {
- _tensor_builder->dimCorrection(inputs, false);
- const auto ofm_rank = _ctx.at(ofm_index).shape().rank();
-
- // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
- // a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(_ctx.at(inputs).shape()).extendRank(ofm_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::Pad &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
- const auto output_index{node.getOutputs().at(0)};
- _tensor_builder->dimCorrection(input_index, false);
- _tensor_builder->dimCorrection(output_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::Mul &node)
-{
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
-
- // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
- // a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::Neg &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Permute &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::PReLU &node)
-{
- const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
- const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
-
- if (!(_ctx.at(ifm_index).shape() == _ctx.at(alpha_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(ifm_index).shape().rank(), _ctx.at(alpha_index).shape().rank());
- const_cast<ir::Shape &>(_ctx.at(ifm_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(alpha_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::ReduceMax &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::ReduceMin &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::ReduceSum &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::ReLU &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::ReLU1 &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::ReLU6 &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Reshape &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
-
- _tensor_builder->dimCorrection(input_index, false);
- _tensor_builder->dimCorrection(output_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::ResizeBilinear &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::RNN &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Comparison &node)
-{
- const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
-
- if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank());
-
- // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
- // a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::RSQRT &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Squeeze &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
- _tensor_builder->dimCorrection(input_index, false);
- _tensor_builder->dimCorrection(output_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::Tanh &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Slice &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::StridedSlice &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
- _tensor_builder->dimCorrection(ofm_index, false);
- _tensor_builder->dimCorrection(ifm_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::Softmax &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::SpaceToBatchND &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
- _tensor_builder->dimCorrection(ofm_index, false);
- _tensor_builder->dimCorrection(ifm_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::SpaceToDepth &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Split &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
- _tensor_builder->dimCorrection(input_index, false);
- for (const auto &output : node.getOutputs())
- _tensor_builder->dimCorrection(output, false);
-}
-
-void ShapeFixer::visit(const ir::operation::SQRT &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::SquaredDifference &node)
-{
- const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
-
- if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
-
- // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
- // a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::Sub &node)
-{
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
- if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
- // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
- // a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::TransposeConv &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Transpose &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Unpack &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
- _tensor_builder->dimCorrection(input_index, false);
- for (const auto &output_index : node.getOutputs())
- _tensor_builder->dimCorrection(output_index, false);
-}
-
-void ShapeFixer::visit(const ir::operation::Add &node)
-{
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
- if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
- const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::Div &node)
-{
- const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
- if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
-
- // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
- // a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::Min &node)
-{
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
-
- // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
- // a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-void ShapeFixer::visit(const ir::operation::Max &node)
-{
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
- {
- const auto broadcast_rank =
- std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
-
- // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
- // a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
- const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
- }
-}
-
-} // namespace acl_neon
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/acl_neon/ShapeFixer.h b/runtime/neurun/backend/acl_neon/ShapeFixer.h
deleted file mode 100644
index aa1f8f75a..000000000
--- a/runtime/neurun/backend/acl_neon/ShapeFixer.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_NEON_SHAPE_FIXER_H__
-#define __NEURUN_BACKEND_ACL_NEON_SHAPE_FIXER_H__
-
-#include <backend/IShapeFixer.h>
-
-#include "ir/Operands.h"
-#include "TensorBuilder.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-
-class ShapeFixer : public IShapeFixer
-{
-public:
- ShapeFixer(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder);
-
- void visit(const ir::operation::Abs &) override;
- void visit(const ir::operation::ArgMax &) override;
- void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::Cast &) override;
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthToSpace &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::Dequantize &) override;
- void visit(const ir::operation::MaxPool2D &) override;
- void visit(const ir::operation::Mean &) override;
- void visit(const ir::operation::AvgPool2D &) override;
- void visit(const ir::operation::Concat &) override;
- void visit(const ir::operation::EmbeddingLookup &) override;
- void visit(const ir::operation::Exp &) override;
- void visit(const ir::operation::Floor &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::HashtableLookup &) override;
- void visit(const ir::operation::InstanceNorm &) override;
- void visit(const ir::operation::L2Normalization &) override;
- void visit(const ir::operation::L2Pool2D &) override;
- void visit(const ir::operation::LocalResponseNormalization &) override;
- void visit(const ir::operation::LogicalAnd &) override;
- void visit(const ir::operation::LogicalNot &) override;
- void visit(const ir::operation::LogicalOr &) override;
- void visit(const ir::operation::Logistic &) override;
- void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::Mul &) override;
- void visit(const ir::operation::Neg &) override;
- void visit(const ir::operation::Pack &) override;
- void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::Permute &) override;
- void visit(const ir::operation::PReLU &) override;
- void visit(const ir::operation::ReduceMax &) override;
- void visit(const ir::operation::ReduceMin &) override;
- void visit(const ir::operation::ReduceSum &) override;
- void visit(const ir::operation::ReLU &) override;
- void visit(const ir::operation::ReLU1 &) override;
- void visit(const ir::operation::ReLU6 &) override;
- void visit(const ir::operation::Reshape &) override;
- void visit(const ir::operation::ResizeBilinear &) override;
- void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::RSQRT &) override;
- void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Tanh &) override;
- void visit(const ir::operation::Softmax &) override;
- void visit(const ir::operation::SpaceToBatchND &) override;
- void visit(const ir::operation::SpaceToDepth &) override;
- void visit(const ir::operation::Split &) override;
- void visit(const ir::operation::SQRT &) override;
- void visit(const ir::operation::SquaredDifference &) override;
- void visit(const ir::operation::Sub &) override;
- void visit(const ir::operation::Slice &) override;
- void visit(const ir::operation::StridedSlice &) override;
- void visit(const ir::operation::TransposeConv &) override;
- void visit(const ir::operation::Transpose &) override;
- void visit(const ir::operation::Unpack &) override;
- void visit(const ir::operation::Add &) override;
- void visit(const ir::operation::Div &) override;
- void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::Min &) override;
- void visit(const ir::operation::Max &) override;
-
-private:
- const ir::Operands &_ctx;
- std::shared_ptr<TensorBuilder> _tensor_builder;
-};
-
-} // namespace acl_neon
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_NEON_SHAPE_FIXER_H__
diff --git a/runtime/neurun/backend/acl_neon/TensorBuilder.h b/runtime/neurun/backend/acl_neon/TensorBuilder.h
deleted file mode 100644
index 0a6b4921d..000000000
--- a/runtime/neurun/backend/acl_neon/TensorBuilder.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_NEON_TENSOR_BUILDER_H__
-#define __NEURUN_BACKEND_ACL_NEON_TENSOR_BUILDER_H__
-
-#include <TemplTensorBuilder.h>
-
-#include "operand/NETensor.h"
-#include "operand/NESubTensor.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-
-using TensorBuilder =
- acl_common::TemplTensorBuilder<operand::INETensor, operand::NETensor, operand::NESubTensor>;
-
-} // namespace acl_neon
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_NEON_TENSOR_BUILDER_H__
diff --git a/runtime/neurun/backend/acl_neon/TensorManager.h b/runtime/neurun/backend/acl_neon/TensorManager.h
deleted file mode 100644
index 725275cef..000000000
--- a/runtime/neurun/backend/acl_neon/TensorManager.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_NEON_TENSOR_MANAGER_H__
-#define __NEURUN_BACKEND_ACL_NEON_TENSOR_MANAGER_H__
-
-#include <arm_compute/runtime/Allocator.h>
-#include <arm_compute/runtime/PoolManager.h>
-#include <arm_compute/runtime/OffsetLifetimeManager.h>
-#include <arm_compute/runtime/MemoryManagerOnDemand.h>
-#include <arm_compute/runtime/MemoryGroup.h>
-
-#include <AclMemoryManager.h>
-#include <AclLinearMemoryManager.h>
-#include <AclInternalBufferManager.h>
-#include <AclTensorManager.h>
-
-#include "operand/NETensor.h"
-#include "operand/NESubTensor.h"
-
-#include "util/logging.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-
-using MemoryManager =
- acl_common::AclMemoryManager<operand::INETensor, operand::NETensor, operand::NESubTensor>;
-
-using LinearMemoryManager = acl_common::AclLinearMemoryManager<
- operand::INETensor, operand::NETensor, operand::NESubTensor,
- ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
- ::arm_compute::OffsetLifetimeManager, ::arm_compute::Allocator, ::arm_compute::MemoryGroup>;
-
-using InternalBufferManager = acl_common::AclInternalBufferManager<
- ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
- ::arm_compute::OffsetLifetimeManager, ::arm_compute::Allocator>;
-
-using TensorManager = acl_common::AclTensorManager<acl_neon::operand::INETensor, operand::NETensor,
- operand::NESubTensor>;
-
-TensorManager *createTensorManager()
-{
- const std::string executor_str = util::getConfigString(util::config::EXECUTOR);
- if (executor_str == "Linear")
- {
- VERBOSE(acl_neon_createTensorManager) << "AclTensorManager as Linear" << std::endl;
- return new TensorManager(new MemoryManager(), new LinearMemoryManager(),
- new InternalBufferManager());
- }
- else
- {
- VERBOSE(acl_neon_createTensorManager) << "AclTensorManager" << std::endl;
- return new TensorManager(new MemoryManager(), new MemoryManager(), new InternalBufferManager());
- }
-}
-
-} // namespace acl_neon
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_NEON_TENSOR_MANAGER_H__
diff --git a/runtime/neurun/backend/acl_neon/TensorRegister.cc b/runtime/neurun/backend/acl_neon/TensorRegister.cc
deleted file mode 100644
index fe766cdf9..000000000
--- a/runtime/neurun/backend/acl_neon/TensorRegister.cc
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorRegister.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-
-// NOTHING
-
-} // srcn
-} // backend
-} // neurun
diff --git a/runtime/neurun/backend/acl_neon/TensorRegister.h b/runtime/neurun/backend/acl_neon/TensorRegister.h
deleted file mode 100644
index 115e05dee..000000000
--- a/runtime/neurun/backend/acl_neon/TensorRegister.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_NEON_TENSOR_REGISTER_H__
-#define __NEURUN_BACKEND_ACL_NEON_TENSOR_REGISTER_H__
-
-#include <AclTensorRegister.h>
-#include <misc/polymorphic_downcast.h>
-#include "TensorBuilder.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-
-class TensorRegister : public acl_common::AclTensorRegister
-{
-public:
- TensorRegister(const ir::Operands &operands, const std::shared_ptr<TensorBuilder> &tensor_builder)
- : acl_common::AclTensorRegister{operands, tensor_builder}
- {
- // DO NOTHING
- }
-
- void setUsesCount(const ir::OperandIndex &ind, size_t num_uses) const override
- {
- nnfw::misc::polymorphic_downcast<TensorBuilder *>(tensor_builder().get())
- ->setUsesCount(ind, num_uses);
- }
-};
-
-} // namespace acl_neon
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_NEON_TENSOR_REGISTER_H__
diff --git a/runtime/neurun/backend/acl_neon/operand/INETensor.cc b/runtime/neurun/backend/acl_neon/operand/INETensor.cc
deleted file mode 100644
index fdb20970d..000000000
--- a/runtime/neurun/backend/acl_neon/operand/INETensor.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "INETensor.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-namespace operand
-{
-
-void INETensor::access(const std::function<void(ITensor &tensor)> &fn) { fn(*this); }
-
-} // namespace operand
-} // namespace acl_neon
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/acl_neon/operand/INETensor.h b/runtime/neurun/backend/acl_neon/operand/INETensor.h
deleted file mode 100644
index 22b1140cf..000000000
--- a/runtime/neurun/backend/acl_neon/operand/INETensor.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_NEON_OPERAND_I_NE_TENSOR_H__
-#define __NEURUN_BACKEND_ACL_NEON_OPERAND_I_NE_TENSOR_H__
-
-#include <arm_compute/core/ITensor.h>
-
-#include <IACLTensor.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-namespace operand
-{
-
-class INETensor : public acl_common::IACLTensor
-{
-public:
- const arm_compute::ITensor *handle() const override = 0;
- arm_compute::ITensor *handle() override = 0;
- void access(const std::function<void(ITensor &tensor)> &fn) final;
-};
-
-} // namespace operand
-} // namespace acl_neon
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_NEON_OPERAND_I_NE_TENSOR_H__
diff --git a/runtime/neurun/backend/acl_neon/operand/NESubTensor.cc b/runtime/neurun/backend/acl_neon/operand/NESubTensor.cc
deleted file mode 100644
index a36af609c..000000000
--- a/runtime/neurun/backend/acl_neon/operand/NESubTensor.cc
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "NESubTensor.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-namespace operand
-{
-
-NESubTensor::NESubTensor(INETensor *parent, const arm_compute::TensorShape &tensor_shape,
- const arm_compute::Coordinates &coords, size_t rank, bool extend_parent)
- : _ne_sub_tensor(std::make_shared<arm_compute::SubTensor>(parent->handle(), tensor_shape,
- coords, extend_parent)),
- _rank{rank}
-{
- // DO NOTHING
-}
-
-const arm_compute::SubTensor *NESubTensor::handle() const { return _ne_sub_tensor.get(); }
-
-arm_compute::SubTensor *NESubTensor::handle() { return _ne_sub_tensor.get(); }
-
-} // namespace operand
-} // namespace acl_neon
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/acl_neon/operand/NESubTensor.h b/runtime/neurun/backend/acl_neon/operand/NESubTensor.h
deleted file mode 100644
index 010e4deda..000000000
--- a/runtime/neurun/backend/acl_neon/operand/NESubTensor.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_NEON_OPERAND_NE_SUB_TENSOR_H__
-#define __NEURUN_BACKEND_ACL_NEON_OPERAND_NE_SUB_TENSOR_H__
-
-#include <arm_compute/runtime/SubTensor.h>
-#include "INETensor.h"
-#include "compiler/SubTensorInfo.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-namespace operand
-{
-
-class NESubTensor : public INETensor
-{
-public:
- NESubTensor() = delete;
-
-public:
- NESubTensor(INETensor *parent, const arm_compute::TensorShape &tensor_shape,
- const arm_compute::Coordinates &coords, size_t rank, bool extend_parent = false);
-
-public:
- size_t num_dimensions() const final { return _rank; }
-
-public:
- const arm_compute::SubTensor *handle() const override;
- arm_compute::SubTensor *handle() override;
-
-public:
- // This method is used to prevent the use of memcpy for SubTensor
- bool has_padding() const override { return true; }
-
-private:
- std::shared_ptr<arm_compute::SubTensor> _ne_sub_tensor;
- size_t _rank;
-};
-
-} // namespace operand
-} // namespace acl_neon
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_NEON_OPERAND_NE_SUB_TENSOR_H__
diff --git a/runtime/neurun/backend/acl_neon/operand/NETensor.cc b/runtime/neurun/backend/acl_neon/operand/NETensor.cc
deleted file mode 100644
index 8a9ece88f..000000000
--- a/runtime/neurun/backend/acl_neon/operand/NETensor.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <arm_compute/runtime/Memory.h>
-#include <arm_compute/runtime/MemoryRegion.h>
-#include "NETensor.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-namespace operand
-{
-
-NETensor::NETensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses)
- : _ne_tensor(std::make_shared<arm_compute::Tensor>()), _rank{rank}, _num_uses{num_uses}
-{
- allocator()->init(info);
-}
-
-const arm_compute::Tensor *NETensor::handle() const { return _ne_tensor.get(); }
-
-arm_compute::Tensor *NETensor::handle() { return _ne_tensor.get(); }
-
-arm_compute::TensorAllocator *NETensor::allocator() { return _ne_tensor->allocator(); }
-
-} // namespace operand
-} // namespace acl_neon
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/acl_neon/operand/NETensor.h b/runtime/neurun/backend/acl_neon/operand/NETensor.h
deleted file mode 100644
index 3de4695e9..000000000
--- a/runtime/neurun/backend/acl_neon/operand/NETensor.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ACL_NEON_OPERAND_NE_TENSOR_H__
-#define __NEURUN_BACKEND_ACL_NEON_OPERAND_NE_TENSOR_H__
-
-#include <arm_compute/core/TensorInfo.h>
-#include <arm_compute/runtime/Tensor.h>
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "INETensor.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-namespace operand
-{
-
-class NETensor : public INETensor
-{
-public:
- NETensor() = delete;
-
-public:
- NETensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses);
-
-public:
- size_t num_dimensions() const final { return _rank; }
-
-public:
- const arm_compute::Tensor *handle() const override;
- arm_compute::Tensor *handle() override;
- size_t num_uses() const { return _num_uses; }
-
-public:
- arm_compute::TensorAllocator *allocator();
-
-private:
- std::shared_ptr<arm_compute::Tensor> _ne_tensor;
- size_t _rank;
- size_t _num_uses;
-};
-
-} // namespace operand
-} // namespace acl_neon
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ACL_NEON_OPERAND_NE_TENSOR_H__
diff --git a/runtime/neurun/backend/cpu/Backend.h b/runtime/neurun/backend/cpu/Backend.h
deleted file mode 100644
index e52a776b9..000000000
--- a/runtime/neurun/backend/cpu/Backend.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_BACKEND_H__
-#define __NEURUN_BACKEND_CPU_BACKEND_H__
-
-#include <memory>
-#include <backend/Backend.h>
-#include <ir/Operands.h>
-
-#include "Config.h"
-#include "ConstantInitializer.h"
-#include "KernelGenerator.h"
-#include "ShapeFixer.h"
-#include "TensorRegister.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-
-class Backend : public ::neurun::backend::Backend
-{
-public:
- Backend() : _config{std::make_shared<Config>()} {}
-
- std::shared_ptr<IConfig> config() const override { return _config; }
-
- std::unique_ptr<BackendContext>
- newContext(const ir::Operands &operands,
- const std::shared_ptr<custom::IKernelBuilder> &kb) const override
- {
- auto tensor_builder = std::make_shared<TensorBuilder>();
- return std::unique_ptr<BackendContext>{new BackendContext{
- this, tensor_builder, std::make_shared<ConstantInitializer>(operands, tensor_builder),
- std::make_shared<KernelGenerator>(operands, tensor_builder, kb),
- std::make_shared<ShapeFixer>(operands, tensor_builder),
- std::make_shared<TensorRegister>(operands, tensor_builder)}};
- }
-
-private:
- std::shared_ptr<IConfig> _config;
-};
-
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_BACKEND_H__
diff --git a/runtime/neurun/backend/cpu/CMakeLists.txt b/runtime/neurun/backend/cpu/CMakeLists.txt
deleted file mode 100644
index 82c838b15..000000000
--- a/runtime/neurun/backend/cpu/CMakeLists.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-set(LIB_NEURUN_BACKEND_CPU neurun_backend_cpu)
-
-file(GLOB_RECURSE SOURCES "*.cc")
-
-add_library(${LIB_NEURUN_BACKEND_CPU} SHARED ${SOURCES})
-
-target_link_libraries(${LIB_NEURUN_BACKEND_CPU} PUBLIC nnfw_lib_cpp14)
-target_link_libraries(${LIB_NEURUN_BACKEND_CPU} PRIVATE nnfw_lib_misc nnfw_lib_cker)
-target_link_libraries(${LIB_NEURUN_BACKEND_CPU} PRIVATE neurun_core)
-target_link_libraries(${LIB_NEURUN_BACKEND_CPU} PRIVATE ${LIB_NEURUN_BACKEND_CPU_COMMON})
-target_link_libraries(${LIB_NEURUN_BACKEND_CPU} PRIVATE nnfw_common)
-target_link_libraries(${LIB_NEURUN_BACKEND_CPU} PRIVATE nnfw_coverage)
-
-set_target_properties(${LIB_NEURUN_BACKEND_CPU} PROPERTIES OUTPUT_NAME backend_cpu)
-
-install(TARGETS ${LIB_NEURUN_BACKEND_CPU} DESTINATION lib)
diff --git a/runtime/neurun/backend/cpu/Config.cc b/runtime/neurun/backend/cpu/Config.cc
deleted file mode 100644
index 39127406f..000000000
--- a/runtime/neurun/backend/cpu/Config.cc
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Config.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-
-bool Config::initialize() { return true; }
-
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/Config.h b/runtime/neurun/backend/cpu/Config.h
deleted file mode 100644
index be303b556..000000000
--- a/runtime/neurun/backend/cpu/Config.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_CONFIG_H__
-#define __NEURUN_BACKEND_CPU_CONFIG_H__
-
-#include <backend/IConfig.h>
-#include <cpp14/memory.h>
-#include <util/ITimer.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-
-class Config : public IConfig
-{
-public:
- std::string id() override { return "cpu"; }
- bool initialize() override;
- bool SupportPermutation() override { return true; }
- bool SupportSubTensorAlloc() override
- {
- // NOTE CPU allocator cannot support subtensor allocation yet
- return false;
- }
-
- std::unique_ptr<util::ITimer> timer() override
- {
- return nnfw::cpp14::make_unique<util::CPUTimer>();
- }
-};
-
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_CONFIG_H__
diff --git a/runtime/neurun/backend/cpu/ConstantInitializer.cc b/runtime/neurun/backend/cpu/ConstantInitializer.cc
deleted file mode 100644
index e6e7d8deb..000000000
--- a/runtime/neurun/backend/cpu/ConstantInitializer.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ConstantInitializer.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-
-ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : _operands{operands}, _tensor_builder{tensor_builder}
-{
- // DO NOTHING
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerCopyInitializer(kernel_index, kernel_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
- const auto &bias_obj = _operands.at(bias_index);
- registerCopyInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerCopyInitializer(kernel_index, kernel_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
- const auto &bias_obj = _operands.at(bias_index);
- registerCopyInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
- const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
- const auto &weight_obj = _operands.at(weight_index);
- registerCopyInitializer(weight_index, weight_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
- const auto &bias_obj = _operands.at(bias_index);
- registerCopyInitializer(bias_index, bias_obj);
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/ConstantInitializer.h b/runtime/neurun/backend/cpu/ConstantInitializer.h
deleted file mode 100644
index a53321997..000000000
--- a/runtime/neurun/backend/cpu/ConstantInitializer.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_COMPILER_CPU_CONSTANT_INITIALIZER_H__
-#define __NEURUN_COMPILER_CPU_CONSTANT_INITIALIZER_H__
-
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-#include "TensorBuilder.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-
-class ConstantInitializer : public IConstantInitializer
-{
-public:
- ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
-
-public:
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::FullyConnected &) override;
-
-private:
- const ir::Operands &operands() const override { return _operands; }
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
-
-private:
- const ir::Operands &_operands;
- std::shared_ptr<TensorBuilder> _tensor_builder;
-};
-
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_COMPILER_CPU_CONSTANT_INITIALIZER_H__
diff --git a/runtime/neurun/backend/cpu/KernelGenerator.cc b/runtime/neurun/backend/cpu/KernelGenerator.cc
deleted file mode 100644
index 09bd1367d..000000000
--- a/runtime/neurun/backend/cpu/KernelGenerator.cc
+++ /dev/null
@@ -1,624 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "KernelGenerator.h"
-
-#include <stdexcept>
-
-#include "cpp14/memory.h"
-#include "util/Padding.h"
-#include "kernel/OperationUtils.h"
-#include "kernel/ConvolutionLayer.h"
-#include "kernel/AvgPoolLayer.h"
-#include "kernel/MaxPoolLayer.h"
-#include "kernel/ConcatLayer.h"
-#include "kernel/FullyConnectedLayer.h"
-#include "kernel/ReshapeLayer.h"
-#include "kernel/SoftMaxLayer.h"
-#include "kernel/PermuteLayer.h"
-#include "kernel/DepthwiseConvolutionLayer.h"
-#include "kernel/AddLayer.h"
-#include "kernel/SubLayer.h"
-#include "kernel/MulLayer.h"
-#include "kernel/GatherLayer.h"
-#include "kernel/LogisticLayer.h"
-#include "kernel/PadLayer.h"
-
-#include <backend/Backend.h>
-#include <backend/IConfig.h>
-
-#include "util/logging.h"
-
-#include "util/Utils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-
-KernelGenerator::KernelGenerator(
- const ir::Operands &operand_ctx, const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builer)
- : _ctx(operand_ctx), _tensor_builder(tensor_builder), _kernel_builder(kernel_builer),
- _current_subg_layout(ir::Layout::UNKNOWN)
-{
- // DO NOTHING
-}
-
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
-{
- _current_subg_layout = op_seq.getLayout();
- for (const auto &e : op_seq.operations())
- {
- const auto &node = *(e.node);
- _tensor_builder->preVisit(node);
- node.accept(*this);
- _tensor_builder->postVisit(node);
- }
-}
-
-void KernelGenerator::visit(const ir::operation::Conv2D &node)
-{
- using ir::operation::Conv2D;
-
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
- const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
- const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
-
- const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
- const auto &ker_shape = _ctx.at(ker_index).shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
- const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape,
- stride, ker_width, ker_height);
- const auto activation = node.param().activation;
-
- const auto ofm_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ofm_index), _current_subg_layout);
- const auto ifm_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ifm_index), _current_subg_layout);
- const auto ker_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ker_index), ir::Layout::UNKNOWN);
- const auto bias_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(bias_index), ir::Layout::UNKNOWN);
-
- auto ofm_alloc = _tensor_builder->at(ofm_index);
- auto ifm_alloc = _tensor_builder->at(ifm_index);
- auto ker_alloc = _tensor_builder->at(ker_index);
- auto bias_alloc = _tensor_builder->at(bias_index);
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::ConvolutionLayer>();
-
- fn->configure(ifm_alloc->buffer(), ifm_backend_descr, ker_alloc->buffer(), ker_backend_descr,
- bias_alloc->buffer(), bias_backend_descr, padding.left, padding.right, padding.top,
- padding.bottom, stride.horizontal, stride.vertical, activation, ofm_alloc->buffer(),
- ofm_backend_descr);
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
-{
- using ir::operation::DepthwiseConv2D;
-
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
- const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
- const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
-
- const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- // Kernel format is [1, kernel_height, kernel_width, depth_out].
- const auto &ker_shape = _ctx.at(ker_index).shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
- const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape,
- stride, ker_width, ker_height);
-
- const auto ofm_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ofm_index), _current_subg_layout);
- const auto ifm_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ifm_index), _current_subg_layout);
- const auto ker_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ker_index), ir::Layout::UNKNOWN);
- const auto bias_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(bias_index), ir::Layout::UNKNOWN);
-
- const auto multiplier = node.param().multiplier;
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index);
- auto ifm_alloc = _tensor_builder->at(ifm_index);
- auto ker_alloc = _tensor_builder->at(ker_index);
- auto bias_alloc = _tensor_builder->at(bias_index);
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::DepthwiseConvolutionLayer>();
-
- fn->configure(ifm_alloc->buffer(), ifm_backend_descr, ker_alloc->buffer(), ker_backend_descr,
- bias_alloc->buffer(), bias_backend_descr, padding.left, padding.right, padding.top,
- padding.bottom, stride.horizontal, stride.vertical, multiplier, activation,
- ofm_alloc->buffer(), ofm_backend_descr);
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
-
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
-
- const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- const auto padding =
- neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
- const auto activation = node.param().activation;
-
- const auto ofm_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ofm_index), _current_subg_layout);
- const auto ifm_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ifm_index), _current_subg_layout);
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::MaxPoolLayer>();
-
- fn->configure(ifm_alloc->buffer(), ifm_backend_descr, padding.left, padding.right, padding.top,
- padding.bottom, stride.horizontal, stride.vertical, kw, kh, activation,
- ofm_alloc->buffer(), ofm_backend_descr);
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
- const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- const auto padding =
- neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
- const auto activation = node.param().activation;
-
- const auto ofm_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ofm_index), _current_subg_layout);
- const auto ifm_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ifm_index), _current_subg_layout);
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::AvgPoolLayer>();
-
- fn->configure(ifm_alloc->buffer(), ifm_backend_descr, padding.left, padding.right, padding.top,
- padding.bottom, stride.horizontal, stride.vertical, kw, kh, activation,
- ofm_alloc->buffer(), ofm_backend_descr);
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Concat &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
-
- const auto rank = _ctx.at(ofm_index).shape().rank();
- const auto axis =
- ::neurun::backend::cpu::kernel::getAxis(rank, node.param().axis, _current_subg_layout);
-
- const auto ofm_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ofm_index), _current_subg_layout);
- std::vector<::neurun::backend::cpu::kernel::TensorDescriptor> ifm_backend_descrs;
- for (auto &in_idx : node.getInputs())
- ifm_backend_descrs.emplace_back(
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(in_idx), _current_subg_layout));
-
- auto output_alloc = _tensor_builder->at(ofm_index).get();
-
- std::vector<const uint8_t *> input_buffers;
- for (auto &ifm_idx : node.getInputs())
- input_buffers.emplace_back(_tensor_builder->at(ifm_idx).get()->buffer());
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::ConcatLayer>();
-
- fn->configure(input_buffers, ifm_backend_descrs, axis, output_alloc->buffer(), ofm_backend_descr);
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::FullyConnected &node)
-{
- using ir::operation::FullyConnected;
-
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
- const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
- const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
-
- const auto ofm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(output_index), _current_subg_layout);
- const auto ifm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(input_index), _current_subg_layout);
- const auto weight_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(weight_index), ir::Layout::UNKNOWN);
- const auto bias_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(bias_index), ir::Layout::UNKNOWN);
-
- const auto activation = node.param().activation;
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
- auto weight_alloc = _tensor_builder->at(weight_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::FullyConnectedLayer>();
-
- fn->configure(input_alloc->buffer(), ifm_backend_descr, weight_alloc->buffer(),
- weight_backend_descr, bias_alloc->buffer(), bias_backend_descr, activation,
- output_alloc->buffer(), ofm_backend_descr);
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Reshape &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
-
- const auto ofm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(output_index), _current_subg_layout);
- const auto ifm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(input_index), _current_subg_layout);
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::ReshapeLayer>();
-
- fn->configure(input_alloc->buffer(), ifm_backend_descr, output_alloc->buffer(),
- ofm_backend_descr);
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Squeeze &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
-
- const auto ofm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(output_index), _current_subg_layout);
- const auto ifm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(input_index), _current_subg_layout);
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- // Squeeze can share same kernel with reshape
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::ReshapeLayer>();
-
- fn->configure(input_alloc->buffer(), ifm_backend_descr, output_alloc->buffer(),
- ofm_backend_descr);
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Softmax &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
-
- const auto ofm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(output_index), _current_subg_layout);
- const auto ifm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(input_index), _current_subg_layout);
-
- const auto beta = node.param().beta;
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::SoftMaxLayer>();
-
- fn->configure(input_alloc->buffer(), ifm_backend_descr, beta, output_alloc->buffer(),
- ofm_backend_descr);
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Add &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
- const auto ofm_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ofm_index), _current_subg_layout);
- const auto lhs_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(lhs_index), _current_subg_layout);
- const auto rhs_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(rhs_index), _current_subg_layout);
-
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::AddLayer>();
-
- fn->configure(lhs_alloc->buffer(), lhs_backend_descr, rhs_alloc->buffer(), rhs_backend_descr,
- activation, ofm_alloc->buffer(), ofm_backend_descr);
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Gather &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
- const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
-
- const auto output_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(output_index), _current_subg_layout);
- const auto input_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(input_index), _current_subg_layout);
- const auto indices_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(indices_index), _current_subg_layout);
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
- auto indices_alloc = _tensor_builder->at(indices_index).get();
-
- const auto backend_layout = output_alloc->layout();
- UNUSED_RELEASE(backend_layout);
-
- // NOTE The frontend layout and backend layout must be the same for this operation.
- // If not the same, we have to add a stage(?) to perform permutation of output tensor. It
- // is not not efficient even if it works well. If so, it would be better to set the
- // layout of these backend tensors to the same layout.
- // There is also one thing we have to think about. This operation depends on the layout of
- // a model. For example, if a model in NHWC has this operation as output rank == 4, indices
- // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
- // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
- assert(backend_layout == input_alloc->layout());
- assert(backend_layout == indices_alloc->layout());
- const auto &input_shape = _ctx.at(input_index).shape();
- UNUSED_RELEASE(input_shape);
- assert(input_shape.rank() < 4 || _current_subg_layout == backend_layout);
-
- const auto axis_raw = node.param().axis;
- const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::GatherLayer>();
-
- fn->configure(input_alloc->buffer(), input_backend_descr, indices_alloc->buffer(),
- indices_backend_descr, output_alloc->buffer(), output_backend_descr, axis_value);
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Sub &node)
-{
- // The same as Add
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
- const auto ofm_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ofm_index), _current_subg_layout);
- const auto lhs_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(lhs_index), _current_subg_layout);
- const auto rhs_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(rhs_index), _current_subg_layout);
-
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::SubLayer>();
-
- fn->configure(lhs_alloc->buffer(), lhs_backend_descr, rhs_alloc->buffer(), rhs_backend_descr,
- activation, ofm_alloc->buffer(), ofm_backend_descr);
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
- // The same as Add
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
- const auto ofm_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ofm_index), _current_subg_layout);
- const auto lhs_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(lhs_index), _current_subg_layout);
- const auto rhs_backend_descr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(rhs_index), _current_subg_layout);
-
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::MulLayer>();
-
- fn->configure(lhs_alloc->buffer(), lhs_backend_descr, rhs_alloc->buffer(), rhs_backend_descr,
- activation, ofm_alloc->buffer(), ofm_backend_descr);
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Permute &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(0)};
-
- const auto &shape = _ctx.at(output_index).shape();
- const auto input_backend_ctx = node.param().input_backend_ctx;
- const auto output_backend_ctx = node.param().output_backend_ctx;
- const auto data_type = node.getDataType();
-
- output_backend_ctx->tensor_builder->preVisit(node);
-
- auto output_object = output_backend_ctx->tensor_builder->tensorAt(output_index);
- auto input_object = input_backend_ctx->tensor_builder->tensorAt(input_index);
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::PermuteLayer>();
-
- // TODO Support NCHW frontend
- auto out_shape = shape;
- if (shape.rank() == 4 && output_object->layout() == ir::Layout::NCHW)
- {
- out_shape.dim(1) = shape.dim(3);
- out_shape.dim(2) = shape.dim(1);
- out_shape.dim(3) = shape.dim(2);
- }
-
- const auto permute_type = node.getPermuteType();
- // Check Permutation Type
- const auto inferPermuteType = [&]() {
- if (input_object->layout() == ir::Layout::NHWC && output_object->layout() == ir::Layout::NCHW)
- {
- return ir::operation::Permute::Type::NHWC_TO_NCHW;
- }
- else if (input_object->layout() == ir::Layout::NCHW &&
- output_object->layout() == ir::Layout::NHWC)
- {
- return ir::operation::Permute::Type::NCHW_TO_NHWC;
- }
- else
- {
- return ir::operation::Permute::Type::COPY;
- }
- }();
- UNUSED_RELEASE(inferPermuteType);
- assert(permute_type == inferPermuteType);
-
- fn->configure(input_object, output_object, out_shape, permute_type, data_type);
-
- input_backend_ctx->tensor_builder->postVisit(node);
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Custom &node)
-{
- auto get_type_info = [this](const ir::Operand &operand) -> custom::TypeInfo {
- auto backendDescr =
- ::neurun::backend::cpu::kernel::getTensorDescriptor(operand, _current_subg_layout);
-
- custom::Shape shape(backendDescr.dimensions.size());
- for (size_t d = 0; d < backendDescr.dimensions.size(); ++d)
- {
- shape.dim(d) = backendDescr.dimensions[d];
- }
-
- return {shape, backendDescr.type};
- };
-
- auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
- std::vector<custom::TypeInfo> &types, std::vector<void *> &allocs) {
- for (auto &idx : opSeq)
- {
- const auto &operand = _ctx.at(idx);
- // TODO make sure using `_current_subg_layout` is correct for custom operations
- types.emplace_back(get_type_info(operand));
- auto in_alloc = _tensor_builder->at(idx)->buffer();
- allocs.emplace_back(in_alloc);
- }
- };
-
- backend::custom::CustomKernelConfigParams params{};
-
- fill_op_info(node.getInputs(), params.input_types, params.input_allocations);
- fill_op_info(node.getOutputs(), params.output_types, params.output_allocations);
-
- params.userdata = node.userdata().data;
- params.userdata_size = node.userdata().size;
-
- auto fn = _kernel_builder->buildKernel(node.id(), std::move(params));
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Logistic &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
-
- const auto ofm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(output_index), _current_subg_layout);
- const auto ifm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(input_index), _current_subg_layout);
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::LogisticLayer>();
-
- fn->configure(input_alloc->buffer(), ifm_backend_descr, output_alloc->buffer(),
- ofm_backend_descr);
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Pad &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
- const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
- const auto output_index{node.getOutputs().at(0)};
- assert(_ctx.at(pad_index).isConstant());
-
- auto input = _tensor_builder->at(input_index).get();
- auto output = _tensor_builder->at(output_index).get();
- auto pad_rank = _ctx.at(pad_index).shape().dim(0);
- auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data().base());
- const auto ofm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(output_index), _current_subg_layout);
- const auto ifm_backend_descr = ::neurun::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(input_index), _current_subg_layout);
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::PadLayer>();
-
- fn->configure(input->buffer(), ifm_backend_descr, output->buffer(), ofm_backend_descr, pad_base,
- pad_rank);
-
- _execution_builder->append(std::move(fn));
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/KernelGenerator.h b/runtime/neurun/backend/cpu/KernelGenerator.h
deleted file mode 100644
index 0083d6791..000000000
--- a/runtime/neurun/backend/cpu/KernelGenerator.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_KERNEL_GENERATOR_H__
-#define __NEURUN_BACKEND_CPU_KERNEL_GENERATOR_H__
-
-#include "backend/IKernelGenerator.h"
-#include "ir/Operands.h"
-#include "operand/Tensor.h"
-#include "backend/CustomKernelBuilder.h"
-#include "TensorBuilder.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-
-class KernelGenerator : public IKernelGenerator
-{
-public:
- KernelGenerator(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<custom::IKernelBuilder> &kernel_builder);
-
- using IKernelGenerator::visit;
-
- void visit(const ir::OpSequence &) override;
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::MaxPool2D &) override;
- void visit(const ir::operation::AvgPool2D &) override;
- void visit(const ir::operation::Concat &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Reshape &) override;
- void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Softmax &) override;
- void visit(const ir::operation::Add &) override;
- void visit(const ir::operation::Sub &) override;
- void visit(const ir::operation::Mul &) override;
- void visit(const ir::operation::Permute &) override;
- void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::Custom &node) override;
- void visit(const ir::operation::Logistic &) override;
- void visit(const ir::operation::Pad &) override;
-
-private:
- const ir::Operands &_ctx;
- std::shared_ptr<TensorBuilder> _tensor_builder;
- std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
- ir::Layout _current_subg_layout;
-};
-
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_KERNEL_GENERATOR_H__
diff --git a/runtime/neurun/backend/cpu/MemoryManager.cc b/runtime/neurun/backend/cpu/MemoryManager.cc
deleted file mode 100644
index 926d8fb4c..000000000
--- a/runtime/neurun/backend/cpu/MemoryManager.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MemoryManager.h"
-
-#include <cassert>
-
-#include <MemoryPlannerFactory.h>
-#include "util/logging.h"
-#include "util/ConfigSource.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-
-MemoryManager::MemoryManager() : _mem_planner{createMemoryPlanner()}
-{
- // DO NOTHING
-}
-
-MemoryManager::MemoryManager(const std::string planner_id)
- : _mem_planner{createMemoryPlanner(planner_id)}
-{
- // DO NOTHING
-}
-
-cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner()
-{
- auto planner_id = util::getConfigString(util::config::CPU_MEMORY_PLANNER);
- return cpu_common::MemoryPlannerFactory::get().create(planner_id);
-}
-
-cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner(const std::string planner_id)
-{
- return cpu_common::MemoryPlannerFactory::get().create(planner_id);
-}
-
-void MemoryManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info)
-{
- auto tensor = std::make_shared<operand::Tensor>(info);
- _tensors[ind] = tensor;
-}
-
-void MemoryManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
-{
- _mem_planner->claim(ind, size);
-}
-
-void MemoryManager::releasePlan(const ir::OperandIndex &ind) { _mem_planner->release(ind); }
-
-void MemoryManager::allocate(void)
-{
- _mem_alloc = std::make_shared<cpu_common::Allocator>(_mem_planner->capacity());
- assert(_mem_alloc->base());
-
- for (auto &mem_plan : _mem_planner->memory_plans())
- {
- auto ind = mem_plan.first;
- auto mem_blk = mem_plan.second;
-
- uint8_t *buffer = _mem_alloc->base() + mem_blk.offset;
- auto tensor = _tensors[ind];
- tensor->setBuffer(buffer);
-
- VERBOSE(CPU_MEMORYMANAGER) << "TENSOR(#" << ind.value() << "): " << static_cast<void *>(buffer)
- << std::endl;
-
- // If we do not make tensor here currently, kernel generation would cause segmentation fault.
- // See also : Comments in `allocate` method.
- }
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/MemoryManager.h b/runtime/neurun/backend/cpu/MemoryManager.h
deleted file mode 100644
index 267e8001b..000000000
--- a/runtime/neurun/backend/cpu/MemoryManager.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_MEMORY_MANAGER_H__
-#define __NEURUN_BACKEND_CPU_MEMORY_MANAGER_H__
-
-#include "backend/IMemoryManager.h"
-#include <MemoryPlanner.h>
-#include "operand/Tensor.h"
-#include "ir/OperandIndexMap.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-
-class MemoryManager : public backend::IMemoryManager
-{
-public:
- MemoryManager();
- MemoryManager(const std::string);
- virtual ~MemoryManager() = default;
-
- void allocate(void) override;
- void deallocate(void) override { _mem_alloc->release(); }
-
- void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info);
- void claimPlan(const ir::OperandIndex &ind, uint32_t size);
- void releasePlan(const ir::OperandIndex &ind);
-
- ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &tensors(void) { return _tensors; }
-
-private:
- cpu_common::IMemoryPlanner *createMemoryPlanner();
- cpu_common::IMemoryPlanner *createMemoryPlanner(const std::string);
-
-private:
- ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> _tensors;
- ir::OperandIndexMap<cpu_common::Block> _tensor_mem_map;
- std::shared_ptr<cpu_common::IMemoryPlanner> _mem_planner;
- std::shared_ptr<cpu_common::Allocator> _mem_alloc;
-};
-
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_MEMORY_MANAGER_H__
diff --git a/runtime/neurun/backend/cpu/PluginClassesAllocator.cc b/runtime/neurun/backend/cpu/PluginClassesAllocator.cc
deleted file mode 100644
index caba78447..000000000
--- a/runtime/neurun/backend/cpu/PluginClassesAllocator.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <util/logging.h>
-
-#include "Backend.h"
-
-extern "C" {
-neurun::backend::Backend *neurun_backend_create()
-{
- VERBOSE(neurun_backend_create) << "'cpu' loaded\n";
- return new neurun::backend::cpu::Backend;
-}
-
-void neurun_backend_destroy(neurun::backend::Backend *backend)
-{
- VERBOSE(neurun_backend_create) << "'cpu' unloaded\n";
- delete backend;
-}
-}
diff --git a/runtime/neurun/backend/cpu/ShapeFixer.cc b/runtime/neurun/backend/cpu/ShapeFixer.cc
deleted file mode 100644
index 835592b30..000000000
--- a/runtime/neurun/backend/cpu/ShapeFixer.cc
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ShapeFixer.h"
-
-#include <stdexcept>
-
-#include "cpp14/memory.h"
-#include "util/Padding.h"
-#include "kernel/OperationUtils.h"
-#include "kernel/ConvolutionLayer.h"
-#include "kernel/AvgPoolLayer.h"
-#include "kernel/MaxPoolLayer.h"
-#include "kernel/ConcatLayer.h"
-#include "kernel/FullyConnectedLayer.h"
-#include "kernel/ReshapeLayer.h"
-#include "kernel/SoftMaxLayer.h"
-#include "kernel/PermuteLayer.h"
-#include "kernel/DepthwiseConvolutionLayer.h"
-#include "kernel/AddLayer.h"
-#include "kernel/SubLayer.h"
-#include "kernel/MulLayer.h"
-#include "kernel/GatherLayer.h"
-
-#include <backend/Backend.h>
-#include <backend/IConfig.h>
-#include "compiler/IExecutionBuilder.h"
-
-#include "util/logging.h"
-
-#include "util/Utils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-
-ShapeFixer::ShapeFixer(const ir::Operands &operand_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : _ctx(operand_ctx), _tensor_builder(tensor_builder)
-{
- assert(tensor_builder);
-}
-
-void ShapeFixer::visit(const ir::operation::Conv2D &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::DepthwiseConv2D &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::MaxPool2D &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::AvgPool2D &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Concat &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::FullyConnected &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Reshape &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Squeeze &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Softmax &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Gather &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Add &node)
-{
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
-
- // Quantization : not supported
- if (_ctx.at(lhs_index).typeInfo().type() == ir::DataType::QUANT8_ASYMM)
- {
- throw std::runtime_error{"ShapeFixer: NYI for quantized Add"};
- }
-}
-
-void ShapeFixer::visit(const ir::operation::Permute &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Sub &node)
-{
- // The same as Add
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
-
- // Quantization : not supported
- if (_ctx.at(lhs_index).typeInfo().type() == ir::DataType::QUANT8_ASYMM)
- {
- throw std::runtime_error{"ShapeFixer: NYI for quantized Sub"};
- }
-}
-
-void ShapeFixer::visit(const ir::operation::Mul &node)
-{
- // The same as Add
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
-
- // Quantization : not supported
- if (_ctx.at(lhs_index).typeInfo().type() == ir::DataType::QUANT8_ASYMM)
- {
- throw std::runtime_error{"ShapeFixer: NYI for quantized Mul"};
- }
-}
-
-void ShapeFixer::visit(const ir::operation::Custom &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Logistic &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Pad &node)
-{
- // TODO: empty this method when quantization is supported
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
-
- // Quantization : not supported
- if (_ctx.at(lhs_index).typeInfo().type() == ir::DataType::QUANT8_ASYMM)
- {
- throw std::runtime_error{"ShapeFixer: NYI for quantized Pad"};
- }
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/ShapeFixer.h b/runtime/neurun/backend/cpu/ShapeFixer.h
deleted file mode 100644
index bbf48498e..000000000
--- a/runtime/neurun/backend/cpu/ShapeFixer.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_SHAPE_FIXER_H__
-#define __NEURUN_BACKEND_CPU_SHAPE_FIXER_H__
-
-#include <backend/IShapeFixer.h>
-
-#include "ir/Operands.h"
-#include "operand/Tensor.h"
-#include "TensorBuilder.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-
-class ShapeFixer : public IShapeFixer
-{
-public:
- ShapeFixer(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder);
-
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::MaxPool2D &) override;
- void visit(const ir::operation::AvgPool2D &) override;
- void visit(const ir::operation::Concat &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Reshape &) override;
- void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Softmax &) override;
- void visit(const ir::operation::Add &) override;
- void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::Sub &) override;
- void visit(const ir::operation::Mul &) override;
- void visit(const ir::operation::Permute &) override;
- void visit(const ir::operation::Custom &) override;
- void visit(const ir::operation::Logistic &) override;
- void visit(const ir::operation::Pad &) override;
-
-private:
- const ir::Operands &_ctx;
- std::shared_ptr<TensorBuilder> _tensor_builder;
-};
-
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_SHAPE_FIXER_H__
diff --git a/runtime/neurun/backend/cpu/TensorBuilder.cc b/runtime/neurun/backend/cpu/TensorBuilder.cc
deleted file mode 100644
index 2c654c256..000000000
--- a/runtime/neurun/backend/cpu/TensorBuilder.cc
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorBuilder.h"
-
-#include <cassert>
-
-#include "util/logging.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-
-TensorBuilder::TensorBuilder() : _tensor_mgr{new TensorManager()}
-{
- // DO NOTHING
-}
-
-void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout, bool as_const)
-{
- _tensor_info_map.emplace(ind, info);
-
- if (as_const)
- _constants.append(ind);
-}
-
-void TensorBuilder::registerSubTensorInfo(const ir::OperandIndex &, const compiler::SubTensorInfo &)
-{
- // Not supported yet
- assert(false);
-}
-
-void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
-{
- assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
- const auto tensor_info = _tensor_info_map.at(ind);
- const auto size = tensor_info.total_size();
- _tensor_mgr->buildTensor(ind, tensor_info, _constants.contains(ind));
- _tensor_mgr->claimPlan(ind, size);
-}
-
-void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind) { _tensor_mgr->releasePlan(ind); }
-
-bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
-{
- return _tensor_info_map.find(ind) != _tensor_info_map.end();
-}
-
-void TensorBuilder::prepare(void)
-{
- _tensor_mgr->allocateConsts();
- _tensor_mgr->allocateNonconsts();
-}
-
-void TensorBuilder::allocateConsts()
-{
- // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
- // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
-}
-
-void TensorBuilder::allocateNonconsts()
-{
- // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
- // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
-}
-
-std::shared_ptr<::neurun::backend::operand::ITensor>
-TensorBuilder::tensorAt(const ir::OperandIndex &ind)
-{
- return _tensor_mgr->at(ind);
-}
-
-void TensorBuilder::iterate(const IterateFunction &fn) { _tensor_mgr->iterate(fn); }
-
-std::shared_ptr<operand::Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
-{
- return _tensor_mgr->at(ind);
-}
-
-std::unique_ptr<ITensorManager> TensorBuilder::releaseTensorManager(void)
-{
- return std::move(_tensor_mgr);
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/TensorBuilder.h b/runtime/neurun/backend/cpu/TensorBuilder.h
deleted file mode 100644
index a08db8c9a..000000000
--- a/runtime/neurun/backend/cpu/TensorBuilder.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_TENSOR_BUILDER_H__
-#define __NEURUN_BACKEND_CPU_TENSOR_BUILDER_H__
-
-#include <unordered_map>
-
-#include <backend/ITensorBuilder.h>
-#include "operand/Tensor.h"
-#include "ir/OperandIndexMap.h"
-#include "TensorManager.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-
-class TensorBuilder : public ITensorBuilder
-{
-public:
- TensorBuilder();
-
- /**
- * @brief Register tensor information to allocate on CPU backend
- * @param[in] ind Operand index
- * @param[in] info Operand information
- * @param[in] layout Operand data layout
- */
- void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout, bool as_const) override;
- /**
- * @brief Register subtensor information to allocate on CPU backend
- * @param[in] ind Operand index
- * @param[in] info Tensor information
- */
- void registerSubTensorInfo(const ir::OperandIndex &ind,
- const compiler::SubTensorInfo &info) override;
-
- void notifyFirstUse(const ir::OperandIndex &) override;
- void notifyLastUse(const ir::OperandIndex &) override;
-
- bool isRegistered(const ir::OperandIndex &) const override;
-
- void prepare(void) override;
- void allocateConsts() override;
- void allocateNonconsts() override;
- void postFunctionPrepare() override { /* DO NOTHING */}
- void finalize() override { /* DO NOTHING */}
-
- std::shared_ptr<::neurun::backend::operand::ITensor>
- tensorAt(const ir::OperandIndex &ind) override;
-
- void iterate(const IterateFunction &fn) override;
-
- void preVisit(const ir::Operation &) override { /* DO NOTHING */}
- void postVisit(const ir::Operation &) override { /* DO NOTHING */}
-
- std::unique_ptr<ITensorManager> releaseTensorManager(void) override;
-
- std::shared_ptr<operand::Tensor> at(const ir::OperandIndex &ind);
-
-private:
- std::unique_ptr<TensorManager> _tensor_mgr;
- ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
- ir::OperandIndexSequence _constants;
-};
-
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_TENSOR_BUILDER_H__
diff --git a/runtime/neurun/backend/cpu/TensorManager.cc b/runtime/neurun/backend/cpu/TensorManager.cc
deleted file mode 100644
index a346e2cb4..000000000
--- a/runtime/neurun/backend/cpu/TensorManager.cc
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorManager.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-
-TensorManager::TensorManager()
- : _const_mgr{new MemoryManager("Bump")}, _nonconst_mgr{new MemoryManager()}
-{
- // DO NOTHING
-}
-
-void TensorManager::allocateConsts(void) { _const_mgr->allocate(); }
-
-void TensorManager::allocateNonconsts(void) { _nonconst_mgr->allocate(); }
-
-void TensorManager::deallocateConsts(void) { _const_mgr->deallocate(); }
-
-void TensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
-
-void TensorManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
- bool as_const)
-{
- assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end());
- if (as_const)
- {
- _const_mgr->buildTensor(ind, tensor_info);
- _ind_to_mgr.insert({ind, *_const_mgr});
- }
- else
- {
- _nonconst_mgr->buildTensor(ind, tensor_info);
- _ind_to_mgr.insert({ind, *_nonconst_mgr});
- }
-}
-
-void TensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
-{
- assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
- _ind_to_mgr.at(ind).claimPlan(ind, size);
-}
-
-void TensorManager::releasePlan(const ir::OperandIndex &ind)
-{
- assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
- _ind_to_mgr.at(ind).releasePlan(ind);
-}
-
-std::shared_ptr<operand::Tensor> TensorManager::at(const ir::OperandIndex &ind)
-{
- assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
- return _ind_to_mgr.at(ind).tensors().at(ind);
-}
-
-ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &TensorManager::constTensors(void)
-{
- return _const_mgr->tensors();
-}
-
-ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &TensorManager::nonconstTensors(void)
-{
- return _nonconst_mgr->tensors();
-}
-
-void TensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
-{
- for (auto it : _nonconst_mgr->tensors())
- fn(it.first);
-
- for (auto it : _const_mgr->tensors())
- fn(it.first);
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/TensorManager.h b/runtime/neurun/backend/cpu/TensorManager.h
deleted file mode 100644
index c3ef70663..000000000
--- a/runtime/neurun/backend/cpu/TensorManager.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_TENSOR_MANAGER_H__
-#define __NEURUN_BACKEND_CPU_TENSOR_MANAGER_H__
-
-#include "backend/ITensorManager.h"
-#include "MemoryManager.h"
-#include "ir/OperandIndexMap.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-
-class TensorManager : public backend::ITensorManager
-{
-public:
- TensorManager();
- virtual ~TensorManager() = default;
-
- void allocateConsts(void) override;
- void allocateNonconsts(void) override;
- void deallocateConsts(void) override;
- void deallocateNonconsts(void) override;
-
- void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info, bool as_const);
-
- void claimPlan(const ir::OperandIndex &ind, uint32_t size);
- void releasePlan(const ir::OperandIndex &ind);
-
- std::shared_ptr<operand::Tensor> at(const ir::OperandIndex &ind);
-
- ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &constTensors(void);
- ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &nonconstTensors(void);
-
- void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
-
-private:
- std::unique_ptr<MemoryManager> _const_mgr;
- std::unique_ptr<MemoryManager> _nonconst_mgr;
- ir::OperandIndexMap<MemoryManager &> _ind_to_mgr;
-};
-
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_TENSOR_MANAGER_H__
diff --git a/runtime/neurun/backend/cpu/TensorRegister.cc b/runtime/neurun/backend/cpu/TensorRegister.cc
deleted file mode 100644
index 2701503f5..000000000
--- a/runtime/neurun/backend/cpu/TensorRegister.cc
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorRegister.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-
-TensorRegister::TensorRegister(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : _operands{operands}, _tensor_builder{tensor_builder}
-{
- assert(tensor_builder != nullptr);
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/TensorRegister.h b/runtime/neurun/backend/cpu/TensorRegister.h
deleted file mode 100644
index 1bda9fca3..000000000
--- a/runtime/neurun/backend/cpu/TensorRegister.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_TENSOR_REGISTER_H__
-#define __NEURUN_BACKEND_CPU_TENSOR_REGISTER_H__
-
-#include <backend/ITensorRegister.h>
-#include "TensorBuilder.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-
-class TensorRegister : public ITensorRegister
-{
-public:
- TensorRegister(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
-
-private:
- const ir::Operands &operands() const override { return _operands; }
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
- bool supportSubTensor() const final { return false; }
-
-private:
- const ir::Operands &_operands;
- const std::shared_ptr<TensorBuilder> _tensor_builder;
-};
-
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_TENSOR_REGISTER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/AddLayer.cc b/runtime/neurun/backend/cpu/kernel/AddLayer.cc
deleted file mode 100644
index 8a2d872e5..000000000
--- a/runtime/neurun/backend/cpu/kernel/AddLayer.cc
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AddLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-void AddLayer::addFloat32()
-{
- float output_activation_min, output_activation_max;
- CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.float_activation_max = output_activation_max;
- op_params.float_activation_min = output_activation_min;
-
- const std::function<float(const float &, const float &)> fn = [](const float &a, const float &b) {
- return a + b;
- };
-
- if (!HaveSameShapes(&_lhsDescr, &_rhsDescr))
- {
- nnfw::cker::BroadcastBinaryArithmeticOpSlow(
- op_params, convertToExtendedCkerShape(_lhsDescr), _lhsData.f,
- convertToExtendedCkerShape(_rhsDescr), _rhsData.f, convertToExtendedCkerShape(_outputDescr),
- _outputData.f, fn);
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp(op_params, convertTensorDescriptorToCkerShape(_lhsDescr),
- _lhsData.f, convertTensorDescriptorToCkerShape(_rhsDescr),
- _rhsData.f, convertTensorDescriptorToCkerShape(_outputDescr),
- _outputData.f, fn);
-}
-
-void AddLayer::addQuant8()
-{
- int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
- &output_activation_max);
- // nnfw::cker::BinaryArithmeticOpParam op_params;
- // op_params.quantized_activation_max = output_activation_max;
- // op_params.quantized_activation_min = output_activation_min;
-
- // cker quant8 add is not implemented yet
- throw std::runtime_error{"NYI"};
-}
-
-void AddLayer::configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData,
- const TensorDescriptor &rhsDescr, const ir::Activation activation,
- uint8_t *outputData, const TensorDescriptor &outputDescr)
-{
- _lhsData.u8 = lhsData;
- _lhsDescr = lhsDescr;
- _rhsData.u8 = rhsData;
- _rhsDescr = rhsDescr;
- _inputType = lhsDescr.type;
- _activation = activation;
- _outputData.u8 = outputData;
- _outputDescr = outputDescr;
-}
-
-void AddLayer::run()
-{
- if (_inputType == OperandType::FLOAT32)
- {
- addFloat32();
- }
- else if (_inputType == OperandType::QUANT8_ASYMM)
- {
- addQuant8();
- }
-}
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/AddLayer.h b/runtime/neurun/backend/cpu/kernel/AddLayer.h
deleted file mode 100644
index 7018e4c48..000000000
--- a/runtime/neurun/backend/cpu/kernel/AddLayer.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_KERNEL_ADDLAYER_H__
-#define __NEURUN_BACKEND_CPU_KERNEL_ADDLAYER_H__
-
-#include <exec/IFunction.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-class AddLayer : public ::neurun::exec::IFunction
-{
-public:
- AddLayer() : _lhsData(), _rhsData(), _outputData(), _lhsDescr(), _rhsDescr(), _outputDescr()
- {
- // DO NOTHING
- }
-
-public:
- void addFloat32();
-
- void addQuant8();
-
- void configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData,
- const TensorDescriptor &rhsDescr, const ir::Activation activation,
- uint8_t *outputData, const TensorDescriptor &outputDescr);
-
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- DataPtr _lhsData;
- DataPtr _rhsData;
- DataPtr _outputData;
-
- TensorDescriptor _lhsDescr;
- TensorDescriptor _rhsDescr;
- TensorDescriptor _outputDescr;
-
- ir::Activation _activation{ir::Activation::NONE};
-
- OperandType _inputType{OperandType::FLOAT32};
-};
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_KERNEL_ADDLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.cc b/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.cc
deleted file mode 100644
index 389955796..000000000
--- a/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AvgPoolLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/AveragePool.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-#define AVGPOOLING_PARAMETERS \
- nnfw::cker::PoolParams op_params; \
- op_params.stride_height = _strideHeight; \
- op_params.stride_width = _strideWidth; \
- op_params.filter_height = _kernelHeight; \
- op_params.filter_width = _kernelWidth; \
- op_params.padding_values.height = (int8_t)_paddingTop; \
- op_params.padding_values.width = (int8_t)_paddingLeft;
-
-AvgPoolLayer::AvgPoolLayer()
- : _inputData(), _outputData(), _inputDescr(), _outputDescr(), _paddingLeft(0), _paddingTop(0),
- _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0),
- _kernelHeight(0), _activation(ir::Activation::NONE), _inputType(OperandType::FLOAT32)
-{
- // DO NOTHING
-}
-
-void AvgPoolLayer::averagePoolFloat32()
-{
- AVGPOOLING_PARAMETERS
- float output_activation_min, output_activation_max;
- CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
- op_params.float_activation_min = output_activation_min;
- op_params.float_activation_max = output_activation_max;
-
- nnfw::cker::AveragePool(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f,
- convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f);
-}
-void AvgPoolLayer::averagePoolQuant8()
-{
- AVGPOOLING_PARAMETERS
- int32_t output_activation_min = 0;
- int32_t output_activation_max = 0;
- CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
- &output_activation_max);
- op_params.quantized_activation_min = output_activation_min;
- op_params.quantized_activation_max = output_activation_max;
-
- nnfw::cker::AveragePool(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.u8,
- convertTensorDescriptorToCkerShape(_outputDescr), _outputData.u8);
-}
-
-void AvgPoolLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr,
- const uint32_t paddingLeft, const uint32_t paddingRight,
- const uint32_t paddingTop, const uint32_t paddingBottom,
- const uint32_t strideWidth, const uint32_t strideHeight,
- const uint32_t kernelWidth, const uint32_t kernelHeight,
- const ir::Activation activation, uint8_t *outputData,
- const TensorDescriptor outputDescr)
-{
- _inputData.u8 = inputData;
- _inputDescr = inputDescr;
- _inputType = inputDescr.type;
- _paddingLeft = paddingLeft;
- _paddingRight = paddingRight;
- _paddingTop = paddingTop;
- _paddingBottom = paddingBottom;
- _strideWidth = strideWidth;
- _strideHeight = strideHeight;
- _kernelWidth = kernelWidth;
- _kernelHeight = kernelHeight;
- _activation = activation;
- _outputData.u8 = outputData;
- _outputDescr = outputDescr;
-}
-
-void AvgPoolLayer::run()
-{
- if (_inputType == OperandType::FLOAT32)
- {
- averagePoolFloat32();
- }
- else if (_inputType == OperandType::QUANT8_ASYMM)
- {
- averagePoolQuant8();
- }
-}
-
-#undef AVGPOOLING_PARAMETERS
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.h b/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.h
deleted file mode 100644
index 6339efa41..000000000
--- a/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_KERNEL_AVGPOOLLAYER_H__
-#define __NEURUN_BACKEND_CPU_KERNEL_AVGPOOLLAYER_H__
-
-#include <exec/IFunction.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-class AvgPoolLayer : public ::neurun::exec::IFunction
-{
-public:
- AvgPoolLayer();
-
-public:
- void averagePoolFloat32();
-
- void averagePoolQuant8();
-
- void configure(uint8_t *inputData, const TensorDescriptor inputDescr, const uint32_t paddingLeft,
- const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, const uint32_t kernelWidth,
- const uint32_t kernelHeight, const ir::Activation activation, uint8_t *outputData,
- const TensorDescriptor outputDescr);
-
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- DataPtr _inputData;
- DataPtr _outputData;
-
- TensorDescriptor _inputDescr;
- TensorDescriptor _outputDescr;
-
- uint32_t _paddingLeft;
- uint32_t _paddingTop;
- uint32_t _paddingRight;
- uint32_t _paddingBottom;
-
- uint32_t _strideWidth;
- uint32_t _strideHeight;
- uint32_t _kernelWidth;
- uint32_t _kernelHeight;
-
- ir::Activation _activation;
-
- OperandType _inputType;
-};
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_KERNEL_AVGPOOLLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/ConcatLayer.cc b/runtime/neurun/backend/cpu/kernel/ConcatLayer.cc
deleted file mode 100644
index 471c9b3bb..000000000
--- a/runtime/neurun/backend/cpu/kernel/ConcatLayer.cc
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ConcatLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Concatenation.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-ConcatLayer::ConcatLayer()
- : _inputDataPtrs(), _outputData(), _axis(0), _inputDescriptors(), _outputDescr(),
- _inputType(OperandType::FLOAT32)
-{
- // DO NOTHING
-}
-
-void ConcatLayer::concatenationFloat32()
-{
- uint32_t num_inputs = _inputDescriptors.size();
-
- nnfw::cker::ConcatenationParams op_params;
- op_params.axis = _axis;
- op_params.inputs_count = num_inputs;
-
- std::vector<nnfw::cker::Shape *> inputDimsPtr;
- std::vector<nnfw::cker::Shape> inputDims;
- inputDimsPtr.reserve(num_inputs);
- inputDims.reserve(num_inputs);
-
- for (uint32_t i = 0; i < num_inputs; i++)
- {
- inputDims.push_back(convertTensorDescriptorToCkerShape(_inputDescriptors[i]));
- inputDimsPtr.push_back(&inputDims[i]);
- }
-
- std::vector<const float *> inputFloatPtrs;
-
- for (auto ptr : _inputDataPtrs)
- {
- inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(ptr));
- }
-
- nnfw::cker::Concatenation<float>(op_params, inputDimsPtr.data(), inputFloatPtrs.data(),
- convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f);
-}
-void ConcatLayer::concatenationQuant8()
-{
- uint32_t num_inputs = _inputDescriptors.size();
-
- std::vector<int32_t> input_zeropoints(num_inputs);
- std::vector<float> input_scales(num_inputs);
- for (uint32_t i = 0; i < num_inputs; i++)
- {
- input_zeropoints[i] = _inputDescriptors[i].offset;
- input_scales[i] = _inputDescriptors[i].scale;
- }
-
- nnfw::cker::ConcatenationParams op_params;
- op_params.axis = _axis;
- op_params.inputs_count = num_inputs;
- op_params.input_zeropoint = input_zeropoints.data();
- op_params.input_scale = input_scales.data();
- op_params.output_zeropoint = _outputDescr.offset;
- op_params.output_scale = _outputDescr.scale;
-
- std::vector<nnfw::cker::Shape *> inputDimsPtr;
- std::vector<nnfw::cker::Shape> inputDims;
- inputDimsPtr.reserve(num_inputs);
- inputDims.reserve(num_inputs);
- for (uint32_t i = 0; i < num_inputs; i++)
- {
- inputDims.push_back(convertTensorDescriptorToCkerShape(_inputDescriptors[i]));
- inputDimsPtr.push_back(&inputDims[i]);
- }
-
- nnfw::cker::Concatenation<uint8_t>(op_params, inputDimsPtr.data(), _inputDataPtrs.data(),
- convertTensorDescriptorToCkerShape(_outputDescr),
- _outputData.u8);
-}
-
-void ConcatLayer::configure(const std::vector<const uint8_t *> &inputDataPtrs,
- const std::vector<TensorDescriptor> &inputDescriptors, int32_t axis,
- uint8_t *outputData, const TensorDescriptor outputDescr)
-{
- _inputDataPtrs = inputDataPtrs;
-
- for (auto inputDescr : inputDescriptors)
- {
- _inputDescriptors.emplace_back(inputDescr);
- _inputType = inputDescr.type;
- }
-
- _axis = axis;
-
- _outputData.u8 = outputData;
- _outputDescr = outputDescr;
-}
-
-void ConcatLayer::run()
-{
- if (_inputType == OperandType::FLOAT32)
- {
- concatenationFloat32();
- }
- else if (_inputType == OperandType::QUANT8_ASYMM)
- {
- concatenationQuant8();
- }
-}
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/ConcatLayer.h b/runtime/neurun/backend/cpu/kernel/ConcatLayer.h
deleted file mode 100644
index 048aa4208..000000000
--- a/runtime/neurun/backend/cpu/kernel/ConcatLayer.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_KERNEL_CONCATLAYER_H__
-#define __NEURUN_BACKEND_CPU_KERNEL_CONCATLAYER_H__
-
-#include <exec/IFunction.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-class ConcatLayer : public ::neurun::exec::IFunction
-{
-public:
- ConcatLayer();
-
-public:
- void concatenationFloat32();
-
- void concatenationQuant8();
-
- void configure(const std::vector<const uint8_t *> &inputDataPtrs,
- const std::vector<TensorDescriptor> &inputDescriptors, int32_t axis,
- uint8_t *outputData, const TensorDescriptor outputDescr);
-
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- std::vector<const uint8_t *> _inputDataPtrs;
- DataPtr _outputData;
-
- int32_t _axis;
-
- std::vector<TensorDescriptor> _inputDescriptors;
- TensorDescriptor _outputDescr;
-
- OperandType _inputType;
-};
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_KERNEL_CONCATLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.cc b/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.cc
deleted file mode 100644
index 2fdb0baf7..000000000
--- a/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.cc
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ConvolutionLayer.h"
-
-#include <cker/operation/Conv.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-ConvolutionLayer::ConvolutionLayer()
- : _inputData(), _kernelData(), _outputData(), _biasData(), _inputDescr(), _kernelDescr(),
- _outputDescr(), _biasDescr(), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
- _paddingBottom(0), _strideWidth(0), _strideHeight(0), _activation(ir::Activation::NONE),
- _inputType(OperandType::FLOAT32)
-{
- // DO NOTHING
-}
-
-void ConvolutionLayer::convFloat32()
-{
- float output_activation_min, output_activation_max;
- CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
-
- nnfw::cker::ConvParams op_params;
- op_params.padding_values.width = _paddingLeft;
- op_params.padding_values.height = _paddingTop;
- op_params.stride_width = _strideWidth;
- op_params.stride_height = _strideHeight;
- op_params.dilation_width_factor = 1;
- op_params.dilation_height_factor = 1;
- op_params.float_activation_min = output_activation_min;
- op_params.float_activation_max = output_activation_max;
-
- nnfw::cker::Conv(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f,
- convertTensorDescriptorToCkerShape(_kernelDescr), _kernelData.f,
- convertTensorDescriptorToCkerShape(_biasDescr), _biasData.f,
- convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f);
-}
-
-void ConvolutionLayer::convQuant8()
-{
- int32_t output_activation_min = 0;
- int32_t output_activation_max = 0;
- CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
- &output_activation_max);
-
- float real_multiplier = 0.0;
- int32_t output_multiplier = 0;
- int32_t output_shift = 0;
- GetQuantizedConvolutionMultiplier(_inputDescr, _kernelDescr, _biasDescr, _outputDescr,
- &real_multiplier);
- QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
-
- nnfw::cker::ConvParams op_params;
- op_params.stride_width = _strideWidth;
- op_params.stride_height = _strideHeight;
- op_params.dilation_width_factor = 1;
- op_params.dilation_height_factor = 1;
- op_params.padding_values.width = _paddingLeft;
- op_params.padding_values.height = _paddingTop;
- op_params.input_offset = -_inputDescr.offset;
- op_params.weights_offset = -_kernelDescr.offset;
- op_params.output_offset = _outputDescr.offset;
- op_params.output_multiplier = output_multiplier;
- op_params.output_shift = output_shift;
- op_params.quantized_activation_min = output_activation_min;
- op_params.quantized_activation_max = output_activation_max;
-
- nnfw::cker::Conv(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.u8,
- convertTensorDescriptorToCkerShape(_kernelDescr), _kernelData.u8,
- convertTensorDescriptorToCkerShape(_biasDescr), _biasData.i32,
- convertTensorDescriptorToCkerShape(_outputDescr), _outputData.u8);
-}
-
-void ConvolutionLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr,
- uint8_t *kernelData, const TensorDescriptor kernelDescr,
- uint8_t *biasData, const TensorDescriptor biasDescr,
- const uint32_t paddingLeft, const uint32_t paddingRight,
- const uint32_t paddingTop, const uint32_t paddingBottom,
- const uint32_t strideWidth, const uint32_t strideHeight,
- const ir::Activation activation, uint8_t *outputData,
- const TensorDescriptor outputDescr)
-{
- _inputData.u8 = inputData;
- _inputDescr = inputDescr;
- _inputType = inputDescr.type;
- _kernelData.u8 = kernelData;
- _kernelDescr = kernelDescr;
- _biasData.u8 = biasData;
- _biasDescr = biasDescr;
- _paddingLeft = paddingLeft;
- _paddingRight = paddingRight;
- _paddingTop = paddingTop;
- _paddingBottom = paddingBottom;
- _strideWidth = strideWidth;
- _strideHeight = strideHeight;
- _activation = activation;
- _outputData.u8 = outputData;
- _outputDescr = outputDescr;
-}
-
-void ConvolutionLayer::run()
-{
- if (_inputType == OperandType::FLOAT32)
- {
- convFloat32();
- }
- else if (_inputType == OperandType::QUANT8_ASYMM)
- {
- convQuant8();
- }
-}
-
-#undef ANDROID_NN_CONV_PARAMETERS
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.h b/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.h
deleted file mode 100644
index 16669f316..000000000
--- a/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_KERNEL_CONVOLUTIONLAYER_H__
-#define __NEURUN_BACKEND_CPU_KERNEL_CONVOLUTIONLAYER_H__
-
-#include <exec/IFunction.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-class ConvolutionLayer : public ::neurun::exec::IFunction
-{
-public:
- ConvolutionLayer();
-
-public:
- void convFloat32();
-
- void convQuant8();
-
- void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *kernelData,
- const TensorDescriptor kernelDescr, uint8_t *biasData,
- const TensorDescriptor biasDescr, const uint32_t paddingLeft,
- const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH,
- const ir::Activation activation, uint8_t *outputData,
- const TensorDescriptor outputDescr);
-
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- DataPtr _inputData;
- DataPtr _kernelData;
- DataPtr _outputData;
- DataPtr _biasData;
-
- TensorDescriptor _inputDescr;
- TensorDescriptor _kernelDescr;
- TensorDescriptor _outputDescr;
- TensorDescriptor _biasDescr;
-
- uint32_t _paddingLeft;
- uint32_t _paddingTop;
- uint32_t _paddingRight;
- uint32_t _paddingBottom;
-
- uint32_t _strideWidth;
- uint32_t _strideHeight;
-
- ir::Activation _activation;
-
- OperandType _inputType;
-};
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_KERNEL_CONVOLUTIONLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.cc b/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.cc
deleted file mode 100644
index e33e3465e..000000000
--- a/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.cc
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DepthwiseConvolutionLayer.h"
-
-#include <cker/operation/DepthwiseConv.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-DepthwiseConvolutionLayer::DepthwiseConvolutionLayer()
- : _inputData(), _kernelData(), _outputData(), _biasData(), _inputDescr(), _kernelDescr(),
- _outputDescr(), _biasDescr(), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
- _paddingBottom(0), _strideWidth(0), _strideHeight(0), _multiplier(0),
- _activation(ir::Activation::NONE), _inputType(OperandType::FLOAT32)
-{
- // DO NOTHING
-}
-
-void DepthwiseConvolutionLayer::convFloat32()
-{
- float output_activation_min, output_activation_max;
- CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
-
- nnfw::cker::DepthwiseConvParams op_params;
- op_params.stride_width = _strideWidth;
- op_params.stride_height = _strideHeight;
- op_params.dilation_width_factor = 1;
- op_params.dilation_height_factor = 1;
- op_params.padding_values.width = _paddingLeft;
- op_params.padding_values.height = _paddingTop;
- op_params.depth_multiplier = _multiplier;
- op_params.float_activation_min = output_activation_min;
- op_params.float_activation_max = output_activation_max;
-
- nnfw::cker::DepthwiseConv(op_params, convertTensorDescriptorToCkerShape(_inputDescr),
- _inputData.f, convertTensorDescriptorToCkerShape(_kernelDescr),
- _kernelData.f, convertTensorDescriptorToCkerShape(_biasDescr),
- _biasData.f, convertTensorDescriptorToCkerShape(_outputDescr),
- _outputData.f);
-}
-
-void DepthwiseConvolutionLayer::convQuant8()
-{
- int32_t output_activation_min = 0;
- int32_t output_activation_max = 0;
- CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
- &output_activation_max);
-
- float real_multiplier = 0.0;
- int32_t output_multiplier = 0;
- int32_t output_shift = 0;
- GetQuantizedConvolutionMultiplier(_inputDescr, _kernelDescr, _biasDescr, _outputDescr,
- &real_multiplier);
- QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
-
- nnfw::cker::DepthwiseConvParams op_params;
- op_params.stride_width = _strideWidth;
- op_params.stride_height = _strideHeight;
- op_params.dilation_width_factor = 1;
- op_params.dilation_height_factor = 1;
- op_params.padding_values.width = _paddingLeft;
- op_params.padding_values.height = _paddingTop;
- op_params.depth_multiplier = _multiplier;
- op_params.input_offset = -_inputDescr.offset;
- op_params.weights_offset = -_kernelDescr.offset;
- op_params.output_offset = _outputDescr.offset;
- op_params.output_multiplier = output_multiplier;
- op_params.output_shift = output_shift;
- op_params.quantized_activation_min = output_activation_min;
- op_params.quantized_activation_max = output_activation_max;
-
- nnfw::cker::DepthwiseConv(op_params, convertTensorDescriptorToCkerShape(_inputDescr),
- _inputData.u8, convertTensorDescriptorToCkerShape(_kernelDescr),
- _kernelData.u8, convertTensorDescriptorToCkerShape(_biasDescr),
- _biasData.i32, convertTensorDescriptorToCkerShape(_outputDescr),
- _outputData.u8);
-}
-
-void DepthwiseConvolutionLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr,
- uint8_t *kernelData, const TensorDescriptor kernelDescr,
- uint8_t *biasData, const TensorDescriptor biasDescr,
- const uint32_t paddingLeft, const uint32_t paddingRight,
- const uint32_t paddingTop, const uint32_t paddingBottom,
- const uint32_t strideWidth, const uint32_t strideHeight,
- const uint32_t multiplier,
- const ir::Activation activation, uint8_t *outputData,
- const TensorDescriptor outputDescr)
-{
- _inputData.u8 = inputData;
- _inputDescr = inputDescr;
- _inputType = inputDescr.type;
- _kernelData.u8 = kernelData;
- _kernelDescr = kernelDescr;
- _biasData.u8 = biasData;
- _biasDescr = biasDescr;
- _paddingLeft = paddingLeft;
- _paddingRight = paddingRight;
- _paddingTop = paddingTop;
- _paddingBottom = paddingBottom;
- _strideWidth = strideWidth;
- _strideHeight = strideHeight;
- _multiplier = multiplier;
- _activation = activation;
- _outputData.u8 = outputData;
- _outputDescr = outputDescr;
-}
-
-void DepthwiseConvolutionLayer::run()
-{
- if (_inputType == OperandType::FLOAT32)
- {
- convFloat32();
- }
- else if (_inputType == OperandType::QUANT8_ASYMM)
- {
- convQuant8();
- }
-}
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.h b/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.h
deleted file mode 100644
index 575cc0ab1..000000000
--- a/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_KERNEL_CPU_DEPTHWISECONVOLUTIONLAYER_H__
-#define __NEURUN_KERNEL_CPU_DEPTHWISECONVOLUTIONLAYER_H__
-
-#include <exec/IFunction.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-class DepthwiseConvolutionLayer : public ::neurun::exec::IFunction
-{
-public:
- DepthwiseConvolutionLayer();
-
-public:
- void convFloat32();
-
- void convQuant8();
-
- void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *kernelData,
- const TensorDescriptor kernelDescr, uint8_t *biasData,
- const TensorDescriptor biasDescr, const uint32_t paddingLeft,
- const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH,
- const uint32_t multiplier, const ir::Activation activation, uint8_t *outputData,
- const TensorDescriptor outputDescr);
-
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- DataPtr _inputData;
- DataPtr _kernelData;
- DataPtr _outputData;
- DataPtr _biasData;
-
- TensorDescriptor _inputDescr;
- TensorDescriptor _kernelDescr;
- TensorDescriptor _outputDescr;
- TensorDescriptor _biasDescr;
-
- uint32_t _paddingLeft;
- uint32_t _paddingTop;
- uint32_t _paddingRight;
- uint32_t _paddingBottom;
-
- uint32_t _strideWidth;
- uint32_t _strideHeight;
-
- uint32_t _multiplier;
-
- ir::Activation _activation;
-
- OperandType _inputType;
-};
-
-} // namespace kernel
-} // namespace cpu
-} // backend
-} // namespace neurun
-
-#endif // __NEURUN_KERNEL_CPU_DEPTHWISECONVOLUTIONLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.cc b/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.cc
deleted file mode 100644
index 055f71590..000000000
--- a/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.cc
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "FullyConnectedLayer.h"
-
-#include <cker/operation/FullyConnected.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-FullyConnectedLayer::FullyConnectedLayer()
- : _inputData(), _weightsData(), _biasData(), _outputData(), _inputDescr(), _weightsDescr(),
- _biasDescr(), _outputDescr(), _activation(ir::Activation::NONE),
- _inputType(OperandType::FLOAT32)
-{
- // DO NOTHING
-}
-
-void FullyConnectedLayer::fullyConnectedFloat32()
-{
- float output_activation_min, output_activation_max;
- CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
-
- nnfw::cker::FullyConnectedParams op_params;
- op_params.float_activation_min = output_activation_min;
- op_params.float_activation_max = output_activation_max;
-
- nnfw::cker::FullyConnected(op_params, convertToExtendedCkerShape(_inputDescr), _inputData.f,
- convertToExtendedCkerShape(_weightsDescr), _weightsData.f,
- convertToExtendedCkerShape(_biasDescr), _biasData.f,
- convertToExtendedCkerShape(_outputDescr), _outputData.f);
-}
-
-// executionMutex is used to protect concurrent access of non-threadsafe resources
-// like gemmlowp::GemmContext.
-void FullyConnectedLayer::fullyConnectedQuant8()
-{
- float real_multiplier = 0.0;
- int32_t output_multiplier = 0;
- int32_t output_shift = 0;
- int32_t output_activation_min = 0;
- int32_t output_activation_max = 0;
- GetQuantizedConvolutionMultiplier(_inputDescr, _weightsDescr, _biasDescr, _outputDescr,
- &real_multiplier);
- QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
- CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
- &output_activation_max);
-
- nnfw::cker::FullyConnectedParams op_params;
- op_params.input_offset = -_inputDescr.offset;
- op_params.weights_offset = -_weightsDescr.offset;
- op_params.output_offset = _outputDescr.offset;
- op_params.output_multiplier = output_multiplier;
- op_params.output_shift = output_shift;
- op_params.quantized_activation_min = output_activation_min;
- op_params.quantized_activation_max = output_activation_max;
-
- nnfw::cker::FullyConnected(op_params, convertToExtendedCkerShape(_inputDescr), _inputData.u8,
- convertToExtendedCkerShape(_weightsDescr), _weightsData.u8,
- convertToExtendedCkerShape(_biasDescr), _biasData.i32,
- convertToExtendedCkerShape(_outputDescr), _outputData.u8);
-}
-
-void FullyConnectedLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr,
- uint8_t *weightsData, const TensorDescriptor weightsDescr,
- uint8_t *biasData, const TensorDescriptor biasDescr,
- ir::Activation activation, uint8_t *outputData,
- const TensorDescriptor outputDescr)
-{
- _inputData.u8 = inputData;
- _inputDescr = inputDescr;
- _inputType = inputDescr.type;
- _weightsData.u8 = weightsData;
- _weightsDescr = weightsDescr;
- _biasData.u8 = biasData;
- _biasDescr = biasDescr;
- _activation = activation;
- _outputData.u8 = outputData;
- _outputDescr = outputDescr;
-}
-
-void FullyConnectedLayer::run()
-{
- if (_inputType == OperandType::FLOAT32)
- {
- fullyConnectedFloat32();
- }
- else if (_inputType == OperandType::QUANT8_ASYMM)
- {
- fullyConnectedQuant8();
- }
-}
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.h b/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.h
deleted file mode 100644
index 9fdc393a4..000000000
--- a/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_KERNEL_FULLYCONNECTEDLAYER_H__
-#define __NEURUN_BACKEND_CPU_KERNEL_FULLYCONNECTEDLAYER_H__
-
-#include <exec/IFunction.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-class FullyConnectedLayer : public ::neurun::exec::IFunction
-{
-public:
- FullyConnectedLayer();
-
-public:
- void fullyConnectedFloat32();
-
- void fullyConnectedQuant8();
-
- void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *weightsData,
- const TensorDescriptor weightsDescr, uint8_t *biasData,
- const TensorDescriptor biasDescr, ir::Activation activation, uint8_t *outputData,
- const TensorDescriptor outputDescr);
-
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- DataPtr _inputData;
- DataPtr _weightsData;
- DataPtr _biasData;
- DataPtr _outputData;
-
- TensorDescriptor _inputDescr;
- TensorDescriptor _weightsDescr;
- TensorDescriptor _biasDescr;
- TensorDescriptor _outputDescr;
-
- ir::Activation _activation;
-
- OperandType _inputType;
-};
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_KERNEL_FULLYCONNECTEDLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/GatherLayer.cc b/runtime/neurun/backend/cpu/kernel/GatherLayer.cc
deleted file mode 100644
index b29acba79..000000000
--- a/runtime/neurun/backend/cpu/kernel/GatherLayer.cc
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GatherLayer.h"
-
-#include <cker/operation/Gather.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-void GatherLayer::configure(uint8_t *inputData, const TensorDescriptor &inputDescr,
- uint8_t *indicesData, const TensorDescriptor &indicesDescr,
- uint8_t *outputData, const TensorDescriptor &outputDescr, int32_t axis)
-{
- _inputData.u8 = inputData;
- _inputDescr = inputDescr;
- _indicesData.u8 = indicesData;
- _indicesDescr = indicesDescr;
- _axis = axis;
- _inputType = inputDescr.type;
- _outputData.u8 = outputData;
- _outputDescr = outputDescr;
-}
-
-void GatherLayer::run()
-{
- nnfw::cker::GatherParams op_params;
- op_params.axis = _axis;
-
- switch (_inputType)
- {
- case OperandType::FLOAT32:
- nnfw::cker::Gather<float>(op_params, convertTensorDescriptorToCkerShape(_inputDescr),
- _inputData.f, convertTensorDescriptorToCkerShape(_indicesDescr),
- _indicesData.i32, convertTensorDescriptorToCkerShape(_outputDescr),
- _outputData.f);
- break;
- case OperandType::QUANT8_ASYMM:
- nnfw::cker::Gather<uint8_t>(op_params, convertTensorDescriptorToCkerShape(_inputDescr),
- _inputData.u8, convertTensorDescriptorToCkerShape(_indicesDescr),
- _indicesData.i32,
- convertTensorDescriptorToCkerShape(_outputDescr), _outputData.u8);
- break;
- case OperandType::INT32:
- nnfw::cker::Gather<int32_t>(
- op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.i32,
- convertTensorDescriptorToCkerShape(_indicesDescr), _indicesData.i32,
- convertTensorDescriptorToCkerShape(_outputDescr), _outputData.i32);
- break;
- default:
- throw std::runtime_error("Gather NYI for this operand type!");
- }
-}
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/GatherLayer.h b/runtime/neurun/backend/cpu/kernel/GatherLayer.h
deleted file mode 100644
index af4f8b8f6..000000000
--- a/runtime/neurun/backend/cpu/kernel/GatherLayer.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_KERNEL_GATHERLAYER_H__
-#define __NEURUN_BACKEND_CPU_KERNEL_GATHERLAYER_H__
-
-#include <exec/IFunction.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-class GatherLayer : public ::neurun::exec::IFunction
-{
-public:
- GatherLayer()
- : _inputData{nullptr}, _indicesData{nullptr}, _outputData{nullptr}, _axis{-1},
- _inputType{OperandType::FLOAT32}
- {
- // DO NOTHING
- }
-
-public:
- void configure(uint8_t *inputData, const TensorDescriptor &inputDescr, uint8_t *indicesData,
- const TensorDescriptor &indicesDescr, uint8_t *outputData,
- const TensorDescriptor &outputDescr, int32_t axis);
-
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- DataPtr _inputData;
- DataPtr _indicesData;
- DataPtr _outputData;
-
- TensorDescriptor _inputDescr;
- TensorDescriptor _indicesDescr;
- TensorDescriptor _outputDescr;
-
- int32_t _axis;
- OperandType _inputType;
-};
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_KERNEL_GATHERLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/LogisticLayer.cc b/runtime/neurun/backend/cpu/kernel/LogisticLayer.cc
deleted file mode 100644
index d9916964e..000000000
--- a/runtime/neurun/backend/cpu/kernel/LogisticLayer.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogisticLayer.h"
-
-#include <cker/operation/Logistic.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-LogisticLayer::LogisticLayer()
- : _inputData(), _outputData(), _inputDescr(), _outputDescr(), _inputType(OperandType::FLOAT32)
-{
- // DO NOTHING
-}
-
-void LogisticLayer::logisticFloat32()
-{
- nnfw::cker::Logistic(convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f,
- convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f);
-}
-
-void LogisticLayer::logisticQuant8()
-{
- // cker quant8 logistic is not implemented yet
- throw std::runtime_error{"NYI"};
-}
-
-void LogisticLayer::configure(uint8_t *inputData, const TensorDescriptor &inputDescr,
- uint8_t *outputData, const TensorDescriptor &outputDescr)
-{
- _inputData.u8 = inputData;
- _inputDescr = inputDescr;
- _inputType = inputDescr.type;
- _outputData.u8 = outputData;
- _outputDescr = outputDescr;
-}
-
-void LogisticLayer::run()
-{
- if (_inputType == OperandType::FLOAT32)
- {
- logisticFloat32();
- }
- else if (_inputType == OperandType::QUANT8_ASYMM)
- {
- logisticQuant8();
- }
-}
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/LogisticLayer.h b/runtime/neurun/backend/cpu/kernel/LogisticLayer.h
deleted file mode 100644
index 33fcd6fed..000000000
--- a/runtime/neurun/backend/cpu/kernel/LogisticLayer.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_KERNEL_LOGISTICLAYER_H__
-#define __NEURUN_BACKEND_CPU_KERNEL_LOGISTICLAYER_H__
-
-#include <exec/IFunction.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-class LogisticLayer : public ::neurun::exec::IFunction
-{
-public:
- LogisticLayer();
-
-public:
- void logisticFloat32();
-
- void logisticQuant8();
-
- void configure(uint8_t *inputData, const TensorDescriptor &inputDescr, uint8_t *outputData,
- const TensorDescriptor &outputDescr);
-
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- DataPtr _inputData;
- DataPtr _outputData;
-
- TensorDescriptor _inputDescr;
- TensorDescriptor _outputDescr;
-
- OperandType _inputType;
-};
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_KERNEL_LOGISTICLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.cc b/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.cc
deleted file mode 100644
index 095cd6d1d..000000000
--- a/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MaxPoolLayer.h"
-
-#include <cker/operation/MaxPool.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-#define MAXPOOLING_PARAMETERS \
- nnfw::cker::PoolParams op_params; \
- op_params.stride_height = _strideHeight; \
- op_params.stride_width = _strideWidth; \
- op_params.filter_height = _kernelHeight; \
- op_params.filter_width = _kernelWidth; \
- op_params.padding_values.height = (int8_t)_paddingTop; \
- op_params.padding_values.width = (int8_t)_paddingLeft;
-
-MaxPoolLayer::MaxPoolLayer()
- : _inputData(), _outputData(), _inputDescr(), _outputDescr(), _paddingLeft(0), _paddingTop(0),
- _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0),
- _kernelHeight(0), _activation(ir::Activation::NONE), _inputType(OperandType::FLOAT32)
-{
- // DO NOTHING
-}
-
-void MaxPoolLayer::maxPoolFloat32()
-{
- MAXPOOLING_PARAMETERS
- float output_activation_min, output_activation_max;
- CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
- op_params.float_activation_min = output_activation_min;
- op_params.float_activation_max = output_activation_max;
-
- nnfw::cker::MaxPool(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f,
- convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f);
-}
-void MaxPoolLayer::maxPoolQuant8()
-{
- MAXPOOLING_PARAMETERS
- int32_t output_activation_min = 0;
- int32_t output_activation_max = 0;
- CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
- &output_activation_max);
- op_params.quantized_activation_min = output_activation_min;
- op_params.quantized_activation_max = output_activation_max;
-
- nnfw::cker::MaxPool(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.u8,
- convertTensorDescriptorToCkerShape(_outputDescr), _outputData.u8);
-}
-
-void MaxPoolLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr,
- const uint32_t paddingLeft, const uint32_t paddingRight,
- const uint32_t paddingTop, const uint32_t paddingBottom,
- const uint32_t strideWidth, const uint32_t strideHeight,
- const uint32_t kernelWidth, const uint32_t kernelHeight,
- const ir::Activation activation, uint8_t *outputData,
- const TensorDescriptor outputDescr)
-{
- _inputData.u8 = inputData;
-
- _inputDescr = inputDescr;
- _inputType = inputDescr.type;
- _paddingLeft = paddingLeft;
- _paddingRight = paddingRight;
- _paddingTop = paddingTop;
- _paddingBottom = paddingBottom;
- _strideWidth = strideWidth;
- _strideHeight = strideHeight;
- _kernelWidth = kernelWidth;
- _kernelHeight = kernelHeight;
- _activation = activation;
- _outputData.u8 = outputData;
- _outputDescr = outputDescr;
-}
-
-void MaxPoolLayer::run()
-{
- if (_inputType == OperandType::FLOAT32)
- {
- maxPoolFloat32();
- }
- else if (_inputType == OperandType::QUANT8_ASYMM)
- {
- maxPoolQuant8();
- }
-}
-
-#undef MAXPOOLING_PARAMETERS
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.h b/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.h
deleted file mode 100644
index 88a574c42..000000000
--- a/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_KERNEL_MAXPOOLLAYER_H__
-#define __NEURUN_BACKEND_CPU_KERNEL_MAXPOOLLAYER_H__
-
-#include <exec/IFunction.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-class MaxPoolLayer : public ::neurun::exec::IFunction
-{
-public:
- MaxPoolLayer();
-
-public:
- void maxPoolFloat32();
-
- void maxPoolQuant8();
-
- void configure(uint8_t *inputData, const TensorDescriptor inputDescr, const uint32_t paddingLeft,
- const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, const uint32_t kernelWidth,
- const uint32_t kernelHeight, const ir::Activation activation, uint8_t *outputData,
- const TensorDescriptor outputDescr);
-
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- DataPtr _inputData;
- DataPtr _outputData;
-
- TensorDescriptor _inputDescr;
- TensorDescriptor _outputDescr;
-
- uint32_t _paddingLeft;
- uint32_t _paddingTop;
- uint32_t _paddingRight;
- uint32_t _paddingBottom;
-
- uint32_t _strideWidth;
- uint32_t _strideHeight;
- uint32_t _kernelWidth;
- uint32_t _kernelHeight;
-
- ir::Activation _activation;
-
- OperandType _inputType;
-};
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_KERNEL_MAXPOOLLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/MulLayer.cc b/runtime/neurun/backend/cpu/kernel/MulLayer.cc
deleted file mode 100644
index d6ce2cfad..000000000
--- a/runtime/neurun/backend/cpu/kernel/MulLayer.cc
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MulLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-void MulLayer::mulFloat32()
-{
- float output_activation_min, output_activation_max;
- CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.float_activation_max = output_activation_max;
- op_params.float_activation_min = output_activation_min;
-
- const std::function<float(const float &, const float &)> fn = [](const float &a, const float &b) {
- return a * b;
- };
-
- if (!HaveSameShapes(&_lhsDescr, &_rhsDescr))
- {
- nnfw::cker::BroadcastBinaryArithmeticOpSlow(
- op_params, convertToExtendedCkerShape(_lhsDescr), _lhsData.f,
- convertToExtendedCkerShape(_rhsDescr), _rhsData.f, convertToExtendedCkerShape(_outputDescr),
- _outputData.f, fn);
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp(op_params, convertTensorDescriptorToCkerShape(_lhsDescr),
- _lhsData.f, convertTensorDescriptorToCkerShape(_rhsDescr),
- _rhsData.f, convertTensorDescriptorToCkerShape(_outputDescr),
- _outputData.f, fn);
-}
-
-void MulLayer::mulQuant8()
-{
- int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
- &output_activation_max);
- // nnfw::cker::BinaryArithmeticOpParam op_params;
- // op_params.quantized_activation_max = output_activation_max;
- // op_params.quantized_activation_min = output_activation_min;
-
- // cker quant8 mul is not implemented yet
- throw std::runtime_error{"Mull NYI for quantized"};
-}
-
-void MulLayer::configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData,
- const TensorDescriptor &rhsDescr, const ir::Activation activation,
- uint8_t *outputData, const TensorDescriptor &outputDescr)
-{
- _lhsData.u8 = lhsData;
- _lhsDescr = lhsDescr;
- _rhsData.u8 = rhsData;
- _rhsDescr = rhsDescr;
- _inputType = lhsDescr.type;
- _activation = activation;
- _outputData.u8 = outputData;
- _outputDescr = outputDescr;
-}
-
-void MulLayer::run()
-{
- if (_inputType == OperandType::FLOAT32)
- {
- mulFloat32();
- }
- else if (_inputType == OperandType::QUANT8_ASYMM)
- {
- mulQuant8();
- }
-}
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/MulLayer.h b/runtime/neurun/backend/cpu/kernel/MulLayer.h
deleted file mode 100644
index 05fc3052f..000000000
--- a/runtime/neurun/backend/cpu/kernel/MulLayer.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_KERNEL_MULLAYER_H__
-#define __NEURUN_BACKEND_CPU_KERNEL_MULLAYER_H__
-
-#include <exec/IFunction.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-class MulLayer : public ::neurun::exec::IFunction
-{
-public:
- MulLayer() : _lhsData(), _rhsData(), _outputData(), _lhsDescr(), _rhsDescr(), _outputDescr()
- {
- // DO NOTHING
- }
-
-public:
- void mulFloat32();
-
- void mulQuant8();
-
- void configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData,
- const TensorDescriptor &rhsDescr, const ir::Activation activation,
- uint8_t *outputData, const TensorDescriptor &outputDescr);
-
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- DataPtr _lhsData;
- DataPtr _rhsData;
- DataPtr _outputData;
-
- TensorDescriptor _lhsDescr;
- TensorDescriptor _rhsDescr;
- TensorDescriptor _outputDescr;
-
- ir::Activation _activation{ir::Activation::NONE};
-
- OperandType _inputType{OperandType::FLOAT32};
-};
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_KERNEL_MULLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/OperationUtils.cc b/runtime/neurun/backend/cpu/kernel/OperationUtils.cc
deleted file mode 100644
index 8aa15dcbd..000000000
--- a/runtime/neurun/backend/cpu/kernel/OperationUtils.cc
+++ /dev/null
@@ -1,273 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtils.h"
-
-#include <cmath>
-#include <algorithm>
-#include <cassert>
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-uint32_t getNumberOfDimensions(const TensorDescriptor &descr) { return descr.dimensions.size(); }
-
-uint32_t getNumberOfElements(const TensorDescriptor &descr)
-{
- uint32_t count = 1;
- for (size_t i = 0; i < descr.dimensions.size(); i++)
- {
- count *= descr.dimensions[i];
- }
- return count;
-}
-
-uint32_t getSizeOfDimension(const TensorDescriptor &descr, uint32_t dimensionIdx)
-{
- if (dimensionIdx >= descr.dimensions.size())
- {
- // TODO, log the error
- return 0;
- }
- return descr.dimensions[dimensionIdx];
-}
-
-void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
-{
- if (double_multiplier == 0.)
- {
- *quantized_multiplier = 0;
- *shift = 0;
- return;
- }
- const double q = std::frexp(double_multiplier, shift);
- auto q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
-
- assert(q_fixed <= (1ll << 31));
- if (q_fixed == (1ll << 31))
- {
- q_fixed /= 2;
- ++*shift;
- }
- assert(q_fixed <= std::numeric_limits<int32_t>::max());
- *quantized_multiplier = static_cast<int32_t>(q_fixed);
-}
-
-void GetQuantizedConvolutionMultiplier(const TensorDescriptor &inputDescr,
- const TensorDescriptor &filterDescr,
- const TensorDescriptor &biasDescr,
- const TensorDescriptor &outputDescr, float *multiplier)
-{
- const float input_product_scale = inputDescr.scale * filterDescr.scale;
- const float bias_scale = biasDescr.scale;
- const float output_scale = outputDescr.scale;
- // The following conditions must be guaranteed by the training pipeline.
- UNUSED_RELEASE(bias_scale);
- assert(std::abs(input_product_scale - bias_scale) <=
- 1e-6 * std::min(input_product_scale, bias_scale));
- assert(input_product_scale >= 0);
- assert(input_product_scale < output_scale);
- *multiplier = input_product_scale / output_scale;
-}
-
-void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier,
- int *left_shift)
-{
- assert(double_multiplier > 1.);
- const double q = std::frexp(double_multiplier, left_shift);
- int64_t q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
- assert(q_fixed <= (1ll << 31));
- if (q_fixed == (1ll << 31))
- {
- q_fixed /= 2;
- ++*left_shift;
- }
- assert(*left_shift >= 0);
- assert(q_fixed <= std::numeric_limits<int32_t>::max());
- *quantized_multiplier = static_cast<int32_t>(q_fixed);
-}
-
-void CalculateActivationRangeFloat(ir::Activation activation, float *activation_min,
- float *activation_max)
-{
- if (activation == ir::Activation::RELU)
- {
- *activation_min = 0.f;
- *activation_max = std::numeric_limits<float>::max();
- }
- else if (activation == ir::Activation::RELU6)
- {
- *activation_min = 0.f;
- *activation_max = 6.f;
- }
- else if (activation == ir::Activation::RELU1)
- {
- *activation_min = -1.f;
- *activation_max = 1.f;
- }
- else if (activation == ir::Activation::SIGMOID)
- {
- *activation_min = 0.f;
- *activation_max = 1.f;
- }
- else if (activation == ir::Activation::NONE)
- {
- *activation_min = std::numeric_limits<float>::lowest();
- *activation_max = std::numeric_limits<float>::max();
- }
- else
- {
- std::cout << "Unsupported fused activation function." << std::endl;
- }
-}
-
-void CalculateActivationRangeUint8(ir::Activation activation, const TensorDescriptor &outputDescr,
- int32_t *act_min, int32_t *act_max)
-{
- const int32_t qmin = std::numeric_limits<uint8_t>::min();
- const int32_t qmax = std::numeric_limits<uint8_t>::max();
- const auto scale = outputDescr.scale;
- const auto zero_point = outputDescr.offset;
- auto quantize = [scale, zero_point](float f) {
- return zero_point + static_cast<int32_t>(std::round(f / scale));
- };
- if (activation == ir::Activation::RELU)
- {
- *act_min = std::max(qmin, quantize(0.0));
- *act_max = qmax;
- }
- else if (activation == ir::Activation::RELU6)
- {
- *act_min = std::max(qmin, quantize(0.0));
- *act_max = std::min(qmax, quantize(6.0));
- }
- else if (activation == ir::Activation::RELU1)
- {
- *act_min = std::max(qmin, quantize(-1.0));
- *act_max = std::min(qmax, quantize(1.0));
- }
- else if (activation == ir::Activation::SIGMOID)
- {
- *act_min = std::max(qmin, quantize(0.0));
- *act_max = std::min(qmax, quantize(1.0));
- }
- else if (activation == ir::Activation::NONE)
- {
- *act_min = qmin;
- *act_max = qmax;
- }
- else
- {
- std::cout << "Unsupported fused activation function." << std::endl;
- }
-}
-
-bool HaveSameShapes(const TensorDescriptor *input1, const TensorDescriptor *input2)
-{
- if (input1 == input2)
- return true;
- if (input2 == NULL || input2 == NULL)
- return false;
-
- if (input1 == NULL)
- {
- return (getNumberOfDimensions(*input2) == 0);
- }
-
- if (getNumberOfDimensions(*input1) != getNumberOfDimensions(*input2))
- return false;
-
- for (uint32_t i = 0; i < getNumberOfDimensions(*input1); i++)
- if (input1->dimensions[i] != input2->dimensions[i])
- return false;
-
- return true;
-}
-
-int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift)
-{
- const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
- (1ll << (31 - input_integer_bits)) / (1ll << input_left_shift);
- // Tighten bound using floor. Suppose that we could use the exact value.
- // After scaling the difference, the result would be at the maximum. Thus we
- // must ensure that our value has lower magnitude.
- return static_cast<int32_t>(std::floor(max_input_rescaled));
-}
-
-TensorDescriptor getTensorDescriptor(const ir::Operand &o, ir::Layout frontend_layout)
-{
- TensorDescriptor descriptor;
-
- auto dims = o.shape().dims();
- if (frontend_layout == ir::Layout::NCHW && o.shape().rank() == 4)
- {
- // NCHW -> NHWC
- uint32_t permutation[4] = {0, 2, 3, 1};
- for (int i = 0; i < o.shape().rank(); ++i)
- {
- dims.at(i) = o.shape().dim(permutation[i]);
- }
- }
- descriptor.dimensions = std::vector<uint32_t>(dims.begin(), dims.end());
- descriptor.type = static_cast<OperandType>(static_cast<int32_t>(o.typeInfo().type()));
- descriptor.scale = o.typeInfo().scale();
- descriptor.offset = o.typeInfo().offset();
-
- // CPU backend assume that neurun internal shape's rank is always same or less than 4
- assert(descriptor.dimensions.size() <= 4);
-
- return descriptor;
-}
-
-uint32_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions)
-{
- uint32_t size = 4;
-
- switch (type)
- {
- case OperandType::FLOAT32:
- case OperandType::INT32:
- case OperandType::UINT32:
- size = 4;
- break;
- case OperandType::BOOL8:
- case OperandType::QUANT8_ASYMM:
- case OperandType::QUANT8_SYMM:
- size = 1;
- break;
- default:
- throw std::runtime_error("Not supported operand type.");
- break;
- }
-
- for (auto d : dimensions)
- {
- size *= d;
- }
-
- return size;
-}
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/OperationUtils.h b/runtime/neurun/backend/cpu/kernel/OperationUtils.h
deleted file mode 100644
index b9e8c8974..000000000
--- a/runtime/neurun/backend/cpu/kernel/OperationUtils.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__
-#define __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__
-
-#include <iostream>
-#include <limits>
-#include <vector>
-
-#include <cker/Shape.h>
-
-#include "ir/Operand.h"
-#include "ir/DataType.h"
-#include <ir/InternalType.h>
-
-using OperandType = neurun::ir::DataType;
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-struct TensorDescriptor
-{
- OperandType type;
- std::vector<uint32_t> dimensions;
- float scale;
- int32_t offset;
-};
-
-union DataPtr {
- uint8_t *u8;
- int8_t *i8;
- int32_t *i32;
- float *f;
- void *v;
-};
-
-uint32_t getNumberOfDimensions(const TensorDescriptor &descr);
-
-uint32_t getNumberOfElements(const TensorDescriptor &descr);
-
-uint32_t getSizeOfDimension(const TensorDescriptor &descr, uint32_t dimensionIdx);
-
-inline nnfw::cker::Shape convertToExtendedCkerShape(const TensorDescriptor &descr)
-{
- std::vector<int32_t> raw_shape;
- raw_shape.resize(4);
-
- uint32_t src = 4 - descr.dimensions.size();
- for (uint32_t i = 0; i < 4; ++i)
- {
- if (i < src)
- {
- raw_shape[i] = 1;
- }
- else
- {
- raw_shape[i] = descr.dimensions[i - src];
- }
- }
-
- return nnfw::cker::GetShape(raw_shape);
-}
-
-inline nnfw::cker::Shape convertTensorDescriptorToCkerShape(const TensorDescriptor &descr)
-{
- std::vector<int32_t> raw_shape;
- raw_shape.resize(4);
-
- for (uint32_t i = 0; i < 4; ++i)
- {
- if (i >= descr.dimensions.size())
- {
- raw_shape[i] = 1;
- }
- else
- {
- raw_shape[i] = descr.dimensions[i];
- }
- }
-
- return nnfw::cker::GetShape(raw_shape);
-}
-
-inline int32_t getAxis(uint32_t rank, int32_t axis, ir::Layout frontend_layout)
-{
- auto ret = axis;
-
- if (axis < 0)
- {
- ret += rank;
- }
-
- // NCHW -> NHWC
- if (frontend_layout == ir::Layout::NCHW)
- {
- int32_t permutation[4] = {0, 3, 1, 2};
- ret = permutation[ret];
- }
-
- return ret;
-}
-
-void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
-
-void GetQuantizedConvolutionMultiplier(const TensorDescriptor &inputDescr,
- const TensorDescriptor &filterDescr,
- const TensorDescriptor &biasDescr,
- const TensorDescriptor &outputDescr, float *multiplier);
-
-void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier,
- int *left_shift);
-
-void CalculateActivationRangeFloat(ir::Activation activation, float *activation_min,
- float *activation_max);
-
-void CalculateActivationRangeUint8(ir::Activation activation, const TensorDescriptor &outputDescr,
- int32_t *act_min, int32_t *act_max);
-
-bool HaveSameShapes(const TensorDescriptor *input1, const TensorDescriptor *input2);
-
-int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift);
-
-TensorDescriptor getTensorDescriptor(const ir::Operand &o, ir::Layout frontend_layout);
-
-uint32_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions);
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__
diff --git a/runtime/neurun/backend/cpu/kernel/PadLayer.cc b/runtime/neurun/backend/cpu/kernel/PadLayer.cc
deleted file mode 100644
index 1fd9429b5..000000000
--- a/runtime/neurun/backend/cpu/kernel/PadLayer.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "PadLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Pad.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-PadLayer::PadLayer()
- : _inputData(), _outputData(), _inputDescr(), _outputDescr(), _padData(), _padRank(),
- _constantValueData(), _inputType(OperandType::FLOAT32)
-{
- // DO NOTHING
-}
-
-void PadLayer::padFloat32()
-{
- nnfw::cker::Pad(_padData, _padRank, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f,
- convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f,
- _constantValueData.f);
-}
-void PadLayer::padQuant8() { throw std::runtime_error("Quantized Pad isn't supported NYI"); }
-
-void PadLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *outputData,
- const TensorDescriptor outputDescr, const int32_t *padData,
- int32_t padRank, uint8_t *constantValueData)
-{
- _inputData.u8 = inputData;
- _inputDescr = inputDescr;
- _inputType = inputDescr.type;
- _outputData.u8 = outputData;
- _outputDescr = outputDescr;
- _padData = padData;
- _padRank = padRank;
- _constantValueData.u8 = constantValueData;
-}
-
-void PadLayer::run()
-{
- if (_inputType == OperandType::FLOAT32)
- {
- padFloat32();
- }
- else if (_inputType == OperandType::QUANT8_ASYMM)
- {
- padQuant8();
- }
-}
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/PadLayer.h b/runtime/neurun/backend/cpu/kernel/PadLayer.h
deleted file mode 100644
index f4413a8ed..000000000
--- a/runtime/neurun/backend/cpu/kernel/PadLayer.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_KERNEL_PADLAYER_H__
-#define __NEURUN_BACKEND_CPU_KERNEL_PADLAYER_H__
-
-#include <exec/IFunction.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-// Note, this is pad with mode=`CONSTANT`: it doesn't support `REFLECT` and `SYMMETRIC`
-class PadLayer : public ::neurun::exec::IFunction
-{
-public:
- PadLayer();
-
-public:
- void padFloat32();
-
- void padQuant8();
-
- void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *outputData,
- const TensorDescriptor outputDescr, const int32_t *padData, int32_t padRank,
- uint8_t *constantValueData = nullptr);
-
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- DataPtr _inputData;
- DataPtr _outputData;
-
- TensorDescriptor _inputDescr;
- TensorDescriptor _outputDescr;
-
- const int32_t *_padData;
- int32_t _padRank;
- DataPtr _constantValueData;
-
- OperandType _inputType;
-};
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_KERNEL_PADLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/PermuteLayer.cc b/runtime/neurun/backend/cpu/kernel/PermuteLayer.cc
deleted file mode 100644
index 6f28d8436..000000000
--- a/runtime/neurun/backend/cpu/kernel/PermuteLayer.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "PermuteLayer.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-using Type = ir::operation::Permute::Type;
-
-void PermuteLayer::configure(std::shared_ptr<backend::operand::ITensor> input,
- std::shared_ptr<backend::operand::ITensor> output,
- const ir::Shape &output_shape, Type type, ir::DataType dataType)
-{
- _input = input;
- _output = output;
- _output_shape = output_shape;
- _type = type;
- _dataType = dataType;
-}
-
-void PermuteLayer::run()
-{
- using ir::DataType;
- switch (_dataType)
- {
- case DataType::FLOAT32:
- runTempl<float>();
- break;
- case DataType::INT32:
- runTempl<int32_t>();
- break;
- case DataType::UINT32:
- runTempl<uint32_t>();
- break;
- case DataType::BOOL8:
- case DataType::QUANT8_ASYMM:
- runTempl<uint8_t>();
- break;
- case DataType::QUANT8_SYMM:
- runTempl<int8_t>();
- break;
- default:
- throw std::runtime_error("NYI");
- break;
- }
-}
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/PermuteLayer.h b/runtime/neurun/backend/cpu/kernel/PermuteLayer.h
deleted file mode 100644
index 1f9110807..000000000
--- a/runtime/neurun/backend/cpu/kernel/PermuteLayer.h
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_KERNEL_PERMUTE_LAYER_H__
-#define __NEURUN_BACKEND_CPU_KERNEL_PERMUTE_LAYER_H__
-
-#include <exec/IFunction.h>
-
-#include "util/feature/nhwc/View.h"
-#include "OperationUtils.h"
-#include "ir/operation/Permute.h"
-#include "util/feature/nhwc/Reader.h"
-#include "util/feature/nchw/View.h"
-#include "util/Coordinates.h"
-
-#include <misc/feature/IndexIterator.h>
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-class PermuteLayer : public ::neurun::exec::IFunction
-{
-public:
- PermuteLayer() = default;
-
-public:
- void configure(std::shared_ptr<backend::operand::ITensor> input,
- std::shared_ptr<backend::operand::ITensor> output, const ir::Shape &output_shape,
- ir::operation::Permute::Type type, ir::DataType dataType);
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- template <class T> void runTempl()
- {
- auto rank = _output_shape.rank();
- auto fn = [&](::neurun::backend::operand::ITensor &in_tensor) {
- _output->access([&](::neurun::backend::operand::ITensor &out_tensor) {
- auto input_buffer = in_tensor.buffer();
- auto input_size = in_tensor.total_size();
- auto output_buffer = out_tensor.buffer();
- if (_type == ir::operation::Permute::Type::COPY)
- {
- assert(in_tensor.layout() == out_tensor.layout());
- if (!in_tensor.has_padding() && !out_tensor.has_padding())
- {
- assert(input_size == out_tensor.total_size());
- memcpy(output_buffer, input_buffer, input_size);
- return;
- }
- }
- switch (rank)
- {
- case 0:
- case 1:
- {
- const int32_t copy_len = _output_shape.dim(0);
-
- memcpy(output_buffer, input_buffer, copy_len);
- break;
- }
- case 2:
- {
- const int32_t copy_len = _output_shape.dim(1);
-
- for (auto i = 0; i < _output_shape.dim(0); ++i)
- {
- neurun::util::Coordinates coords{i, 0};
- memcpy(output_buffer + out_tensor.calcOffset(coords),
- input_buffer + in_tensor.calcOffset(coords), copy_len * sizeof(T));
- }
- break;
- }
- case 3:
- {
- const int32_t copy_len = _output_shape.dim(2);
-
- for (auto i = 0; i < _output_shape.dim(0); ++i)
- {
- for (auto j = 0; j < _output_shape.dim(1); ++j)
- {
- neurun::util::Coordinates coords{i, j, 0};
- memcpy(output_buffer + out_tensor.calcOffset(coords),
- input_buffer + in_tensor.calcOffset(coords), copy_len * sizeof(T));
- }
- }
- break;
- }
- case 4:
- {
- // TODO Unify permute type and remove switch case
- switch (_type)
- {
- case ir::operation::Permute::Type::NHWC_TO_NCHW:
- {
- for (auto n = 0; n < _output_shape.dim(0); ++n)
- {
- for (auto c = 0; c < _output_shape.dim(1); ++c)
- {
- for (auto h = 0; h < _output_shape.dim(2); ++h)
- {
- for (auto w = 0; w < _output_shape.dim(3); ++w)
- {
- const neurun::util::Coordinates in_coords{n, h, w, c};
- const auto out_coords =
- convertCoordinates(in_coords, in_tensor.layout(), out_tensor.layout());
- const auto value =
- *reinterpret_cast<T *>(input_buffer + in_tensor.calcOffset(in_coords));
- *reinterpret_cast<T *>(output_buffer + out_tensor.calcOffset(out_coords)) =
- value;
- }
- }
- }
- }
- break;
- }
- case ir::operation::Permute::Type::NCHW_TO_NHWC:
- {
- for (auto n = 0; n < _output_shape.dim(0); ++n)
- {
- for (auto h = 0; h < _output_shape.dim(1); ++h)
- {
- for (auto w = 0; w < _output_shape.dim(2); ++w)
- {
- for (auto c = 0; c < _output_shape.dim(3); ++c)
- {
- const neurun::util::Coordinates in_coords{n, c, h, w};
- const auto out_coords =
- convertCoordinates(in_coords, in_tensor.layout(), out_tensor.layout());
- const auto value =
- *reinterpret_cast<T *>(input_buffer + in_tensor.calcOffset(in_coords));
- *reinterpret_cast<T *>(output_buffer + out_tensor.calcOffset(out_coords)) =
- value;
- }
- }
- }
- }
- break;
- }
- case ir::operation::Permute::Type::COPY:
- {
- const int32_t copy_len = _output_shape.dim(3);
-
- for (auto i = 0; i < _output_shape.dim(0); ++i)
- {
- for (auto j = 0; j < _output_shape.dim(1); ++j)
- {
- for (auto k = 0; k < _output_shape.dim(2); ++k)
- {
- neurun::util::Coordinates coords{i, j, k, 0};
- memcpy(output_buffer + out_tensor.calcOffset(coords),
- input_buffer + in_tensor.calcOffset(coords), copy_len * sizeof(T));
- }
- }
- }
- break;
- }
- default:
- throw std::runtime_error("NYI");
- break;
- }
- break;
- }
- default:
- throw std::runtime_error("NYI");
- break;
- }
- });
- };
- _input->access(fn);
- }
-
-private:
- std::shared_ptr<backend::operand::ITensor> _input{nullptr};
- std::shared_ptr<backend::operand::ITensor> _output{nullptr};
- ir::Shape _output_shape{};
- ir::operation::Permute::Type _type{ir::operation::Permute::Type::COPY};
- ir::DataType _dataType{ir::DataType::FLOAT32};
-};
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_KERNEL_PERMUTE_LAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/ReshapeLayer.cc b/runtime/neurun/backend/cpu/kernel/ReshapeLayer.cc
deleted file mode 100644
index caeee9f12..000000000
--- a/runtime/neurun/backend/cpu/kernel/ReshapeLayer.cc
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ReshapeLayer.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-ReshapeLayer::ReshapeLayer() : _inputData(), _outputData(), _inputDescr(), _outputDescr()
-{
- // DO NOTHING
-}
-
-void ReshapeLayer::reshapeGeneric()
-{
- size_t count = sizeOfData(_inputDescr.type, _inputDescr.dimensions);
- memcpy(_outputData.v, _inputData.v, count);
-}
-
-void ReshapeLayer::configure(uint8_t *inputData, const TensorDescriptor &inputDescr,
- uint8_t *outputData, const TensorDescriptor &outputDescr)
-{
- _inputData.u8 = inputData;
- _inputDescr = inputDescr;
- _outputData.u8 = outputData;
- _outputDescr = outputDescr;
-}
-
-void ReshapeLayer::run() { reshapeGeneric(); }
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/ReshapeLayer.h b/runtime/neurun/backend/cpu/kernel/ReshapeLayer.h
deleted file mode 100644
index 25dd851b2..000000000
--- a/runtime/neurun/backend/cpu/kernel/ReshapeLayer.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_KERNEL_RESHAPELAYER_H__
-#define __NEURUN_BACKEND_CPU_KERNEL_RESHAPELAYER_H__
-
-#include <exec/IFunction.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-class ReshapeLayer : public ::neurun::exec::IFunction
-{
-public:
- ReshapeLayer();
-
-public:
- void reshapeGeneric();
-
- void configure(uint8_t *inputData, const TensorDescriptor &inputDescr, uint8_t *outputData,
- const TensorDescriptor &outputDescr);
-
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- DataPtr _inputData;
- DataPtr _outputData;
-
- TensorDescriptor _inputDescr;
- TensorDescriptor _outputDescr;
-};
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_KERNEL_RESHAPELAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.cc b/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.cc
deleted file mode 100644
index 58ba109b4..000000000
--- a/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.cc
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SoftMaxLayer.h"
-
-#include <cker/operation/SoftMax.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-SoftMaxLayer::SoftMaxLayer()
- : _inputData(), _outputData(), _beta(0.0), _inputDescr(), _outputDescr(),
- _inputType(OperandType::FLOAT32)
-{
- // DO NOTHING
-}
-
-// Performs softmax along the input of size (input_size * batch_size).
-void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
- float *out)
-{
- assert(input_size > 0);
-
- // For each batch
- for (int b = 0; b < batch_size; b++)
- {
- // Find the max coeff.
- float max_coeff = in[0];
- for (int i = 1; i < input_size; i++)
- {
- if (in[i] > max_coeff)
- max_coeff = in[i];
- }
-
- // Compute the normalized sum of exps.
- float exp_sum = 0.0;
- for (int i = 0; i < input_size; i++)
- {
- out[i] = std::exp((in[i] - max_coeff) * beta);
- exp_sum += out[i];
- }
-
- // Divide by the sum of exps.
- float reciprocal_sum_exp = 1.f / exp_sum;
- for (int i = 0; i < input_size; i++)
- {
- out[i] *= reciprocal_sum_exp;
- }
-
- // Advance in and out pointers for the next batch.
- in += input_size;
- out += input_size;
- }
-}
-
-void SoftMaxLayer::softmaxFloat32()
-{
- TensorDescriptor descrIn4D;
-
- if (getNumberOfDimensions(_inputDescr) == 2)
- {
- uint32_t batch_size = getSizeOfDimension(_inputDescr, 0);
- if (batch_size == 0)
- throw std::runtime_error("batch_size should not be 0");
-
- uint32_t input_size = getNumberOfElements(_inputDescr) / batch_size;
- Softmax(_inputData.f, input_size, batch_size, _beta, _outputData.f);
- }
- else if (getNumberOfDimensions(_inputDescr) == 4)
- {
- nnfw::cker::SoftmaxParams op_params;
- op_params.beta = _beta;
- nnfw::cker::Softmax(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f,
- convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f);
- }
- else
- {
- throw std::runtime_error{"only 2D and 4D tensors supported"};
- }
-}
-
-void SoftMaxLayer::softmaxQuant8()
-{
- TensorDescriptor descrIn4D = _inputDescr;
-
- if (getNumberOfDimensions(_inputDescr) == 2)
- {
- uint32_t batch_size = getSizeOfDimension(_inputDescr, 0);
- if (batch_size == 0)
- throw std::runtime_error("batch_size should not be 0");
-
- uint32_t input_size = getNumberOfElements(_inputDescr) / batch_size;
- descrIn4D.dimensions = {batch_size, 1, 1, input_size};
- }
- else if (getNumberOfDimensions(_inputDescr) == 4)
- {
- descrIn4D = _inputDescr;
- }
- else
- {
- throw std::runtime_error{"only 2D and 4D tensors supported"};
- }
- if (_outputDescr.offset != 0 || _outputDescr.scale != 1.f / 256)
- {
- throw std::runtime_error{"incorrect scale / offset for output"};
- }
- static const int32_t kScaledDiffIntegerBits = 5;
- const double input_beta_real_multiplier = std::min(
- 1.0 * _beta * _inputDescr.scale * (1 << (31 - kScaledDiffIntegerBits)), (1ll << 31) - 1.0);
- int32_t input_multiplier = 0;
- int32_t input_left_shift = 0;
- QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier, &input_multiplier,
- &input_left_shift);
- float diff_min = -1.0f * CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift);
-
- nnfw::cker::SoftmaxParams op_params;
- op_params.input_multiplier = input_multiplier;
- op_params.input_left_shift = input_left_shift;
- op_params.diff_min = diff_min;
- nnfw::cker::Softmax(op_params, convertTensorDescriptorToCkerShape(descrIn4D), _inputData.u8,
- convertTensorDescriptorToCkerShape(descrIn4D), _outputData.u8);
-}
-
-void SoftMaxLayer::configure(uint8_t *inputData, const TensorDescriptor &inputDescr,
- const float beta, uint8_t *outputData,
- const TensorDescriptor &outputDescr)
-{
- _inputData.u8 = inputData;
- _inputDescr = inputDescr;
- _inputType = inputDescr.type;
- _outputData.u8 = outputData;
- _outputDescr = outputDescr;
- _beta = beta;
-}
-
-void SoftMaxLayer::run()
-{
- if (_inputType == OperandType::FLOAT32)
- {
- softmaxFloat32();
- }
- else if (_inputType == OperandType::QUANT8_ASYMM)
- {
- softmaxQuant8();
- }
-}
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.h b/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.h
deleted file mode 100644
index 4723afb72..000000000
--- a/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_KERNEL_SOFTMAXLAYER_H__
-#define __NEURUN_BACKEND_CPU_KERNEL_SOFTMAXLAYER_H__
-
-#include <exec/IFunction.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-class SoftMaxLayer : public ::neurun::exec::IFunction
-{
-public:
- SoftMaxLayer();
-
-public:
- void softmaxFloat32();
-
- void softmaxQuant8();
-
- void configure(uint8_t *inputData, const TensorDescriptor &inputDescr, const float beta,
- uint8_t *outputData, const TensorDescriptor &outputDescr);
-
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- DataPtr _inputData;
- DataPtr _outputData;
-
- float _beta;
-
- TensorDescriptor _inputDescr;
- TensorDescriptor _outputDescr;
-
- OperandType _inputType;
-};
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_KERNEL_SOFTMAXLAYER_H__
diff --git a/runtime/neurun/backend/cpu/kernel/SubLayer.cc b/runtime/neurun/backend/cpu/kernel/SubLayer.cc
deleted file mode 100644
index c6f7188e0..000000000
--- a/runtime/neurun/backend/cpu/kernel/SubLayer.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SubLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-void SubLayer::subFloat32()
-{
- float output_activation_min, output_activation_max;
- CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.float_activation_max = output_activation_max;
- op_params.float_activation_min = output_activation_min;
- const std::function<float(const float &, const float &)> fn = [](const float &a, const float &b) {
- return a - b;
- };
-
- if (!HaveSameShapes(&_lhsDescr, &_rhsDescr))
- {
- nnfw::cker::BroadcastBinaryArithmeticOpSlow(
- op_params, convertToExtendedCkerShape(_lhsDescr), _lhsData.f,
- convertToExtendedCkerShape(_rhsDescr), _rhsData.f, convertToExtendedCkerShape(_outputDescr),
- _outputData.f, fn);
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp(op_params, convertTensorDescriptorToCkerShape(_lhsDescr),
- _lhsData.f, convertTensorDescriptorToCkerShape(_rhsDescr),
- _rhsData.f, convertTensorDescriptorToCkerShape(_outputDescr),
- _outputData.f, fn);
-}
-
-void SubLayer::subQuant8()
-{
- int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
- &output_activation_max);
- // nnfw::cker::SubParam op_params;
- // op_params.quantized_activation_max = output_activation_max;
- // op_params.quantized_activation_min = output_activation_min;
-
- // cker quant8 sub is not implemented yet
- throw std::runtime_error{"NYI"};
-}
-
-void SubLayer::configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData,
- const TensorDescriptor &rhsDescr, const ir::Activation activation,
- uint8_t *outputData, const TensorDescriptor &outputDescr)
-{
- _lhsData.u8 = lhsData;
- _lhsDescr = lhsDescr;
- _rhsData.u8 = rhsData;
- _rhsDescr = rhsDescr;
- _inputType = lhsDescr.type;
- _activation = activation;
- _outputData.u8 = outputData;
- _outputDescr = outputDescr;
-}
-
-void SubLayer::run()
-{
- if (_inputType == OperandType::FLOAT32)
- {
- subFloat32();
- }
- else if (_inputType == OperandType::QUANT8_ASYMM)
- {
- subQuant8();
- }
-}
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/SubLayer.h b/runtime/neurun/backend/cpu/kernel/SubLayer.h
deleted file mode 100644
index c9abdb48c..000000000
--- a/runtime/neurun/backend/cpu/kernel/SubLayer.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_KERNEL_SUBLAYER_H__
-#define __NEURUN_BACKEND_CPU_KERNEL_SUBLAYER_H__
-
-#include <exec/IFunction.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace kernel
-{
-
-class SubLayer : public ::neurun::exec::IFunction
-{
-public:
- SubLayer() : _lhsData(), _rhsData(), _outputData(), _lhsDescr(), _rhsDescr(), _outputDescr()
- {
- // DO NOTHING
- }
-
-public:
- void subFloat32();
-
- void subQuant8();
-
- void configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData,
- const TensorDescriptor &rhsDescr, const ir::Activation activation,
- uint8_t *outputData, const TensorDescriptor &outputDescr);
-
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- DataPtr _lhsData;
- DataPtr _rhsData;
- DataPtr _outputData;
-
- TensorDescriptor _lhsDescr;
- TensorDescriptor _rhsDescr;
- TensorDescriptor _outputDescr;
-
- ir::Activation _activation{ir::Activation::NONE};
-
- OperandType _inputType{OperandType::FLOAT32};
-};
-
-} // namespace kernel
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_KERNEL_SUBLAYER_H__
diff --git a/runtime/neurun/backend/cpu/operand/Tensor.cc b/runtime/neurun/backend/cpu/operand/Tensor.cc
deleted file mode 100644
index 21d4a9dce..000000000
--- a/runtime/neurun/backend/cpu/operand/Tensor.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Tensor.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace operand
-{
-
-size_t Tensor::calcOffset(const neurun::util::Coordinates &coords) const
-{
- size_t rank = num_dimensions();
- size_t offset = 0;
- for (size_t i = 0; i < rank; ++i)
- {
- offset = offset * dimension(i) + coords[i];
- }
- offset *= sizeOfDataType(data_type());
- return offset;
-}
-
-void Tensor::access(const std::function<void(ITensor &)> &fn) { fn(*this); }
-
-} // namespace operand
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/operand/Tensor.h b/runtime/neurun/backend/cpu/operand/Tensor.h
deleted file mode 100644
index dec680873..000000000
--- a/runtime/neurun/backend/cpu/operand/Tensor.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_OPERAND_TENSOR_H__
-#define __NEURUN_BACKEND_CPU_OPERAND_TENSOR_H__
-
-#include <backend/operand/ITensor.h>
-#include "ir/OperandInfo.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-namespace operand
-{
-
-class Tensor : public ::neurun::backend::operand::ITensor
-{
-public:
- Tensor() = delete;
-
-public:
- Tensor(const ir::OperandInfo &info) : _info(info)
- {
- // DO NOTHING
- }
-
-public:
- void setBuffer(uint8_t *buffer) { _buffer = buffer; }
- ir::DataType data_type() const { return _info.typeInfo().type(); }
-
-public:
- uint8_t *buffer() const override { return _buffer; }
- /**
- * @brief Get dimension by index
- *
- * @param index Index to get diemension
- * @return size_t Dimension at index
- * @note N : dimension(0)
- * H : dimension(1)
- * W : dimension(2)
- * C : dimension(3)
- */
- size_t dimension(size_t index) const override { return _info.shape().dim(index); }
- size_t num_dimensions() const override { return _info.shape().rank(); }
- size_t total_size() const override { return _info.total_size(); }
- size_t calcOffset(const neurun::util::Coordinates &coords) const override;
- ir::Layout layout() const override { return ir::Layout::NHWC; }
- bool has_padding() const override { return false; }
- void access(const std::function<void(ITensor &tensor)> &fn) final;
-
-private:
- ir::OperandInfo _info;
- uint8_t *_buffer = nullptr;
-};
-
-} // namespace operand
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_OPERAND_TENSOR_H__
diff --git a/runtime/neurun/backend/cpu_common/CMakeLists.txt b/runtime/neurun/backend/cpu_common/CMakeLists.txt
deleted file mode 100644
index 5b6161ba7..000000000
--- a/runtime/neurun/backend/cpu_common/CMakeLists.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-file(GLOB SOURCES "*.cc")
-file(GLOB_RECURSE TESTS "*.test.cc")
-list(REMOVE_ITEM SOURCES ${TESTS})
-
-add_library(${LIB_NEURUN_BACKEND_CPU_COMMON} STATIC ${SOURCES})
-
-target_include_directories(${LIB_NEURUN_BACKEND_CPU_COMMON} PUBLIC ${NEURUN_INCLUDE_DIR})
-target_include_directories(${LIB_NEURUN_BACKEND_CPU_COMMON} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
-target_link_libraries(${LIB_NEURUN_BACKEND_CPU_COMMON} PUBLIC nnfw_lib_cpp14)
-target_link_libraries(${LIB_NEURUN_BACKEND_CPU_COMMON} PRIVATE nnfw_lib_misc)
-target_link_libraries(${LIB_NEURUN_BACKEND_CPU_COMMON} PRIVATE neurun_core)
-target_link_libraries(${LIB_NEURUN_BACKEND_CPU_COMMON} PRIVATE nnfw_common)
-target_link_libraries(${LIB_NEURUN_BACKEND_CPU_COMMON} PRIVATE nnfw_coverage)
-
-set_target_properties(${LIB_NEURUN_BACKEND_CPU_COMMON} PROPERTIES POSITION_INDEPENDENT_CODE ON)
-set_target_properties(${LIB_NEURUN_BACKEND_CPU_COMMON} PROPERTIES OUTPUT_NAME backend_cpu_common)
-
-# Unit Tests
-set(TEST_NEURUN_BACKEND_CPU_COMMON test_neurun_backend_cpu_common)
-
-add_executable(${TEST_NEURUN_BACKEND_CPU_COMMON} ${TESTS})
-
-target_link_libraries(${TEST_NEURUN_BACKEND_CPU_COMMON} ${LIB_NEURUN_BACKEND_CPU_COMMON})
-target_link_libraries(${TEST_NEURUN_BACKEND_CPU_COMMON} gtest gtest_main dl ${LIB_PTHREAD})
-target_include_directories(${TEST_NEURUN_BACKEND_CPU_COMMON} PRIVATE ${NEURUN_INCLUDE_DIR})
-
-add_test(${TEST_NEURUN_BACKEND_CPU_COMMON} ${TEST_NEURUN_BACKEND_CPU_COMMON})
-install(TARGETS ${TEST_NEURUN_BACKEND_CPU_COMMON} DESTINATION unittest)
diff --git a/runtime/neurun/backend/cpu_common/MemoryPlanner.cc b/runtime/neurun/backend/cpu_common/MemoryPlanner.cc
deleted file mode 100644
index 19961362e..000000000
--- a/runtime/neurun/backend/cpu_common/MemoryPlanner.cc
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MemoryPlanner.h"
-#include "util/logging.h"
-#include <cassert>
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-Allocator::Allocator(uint32_t capacity)
-{
- _base = nnfw::cpp14::make_unique<uint8_t[]>(capacity);
-
- VERBOSE(ALLOC) << "allocation capacity: " << capacity << std::endl;
- VERBOSE(ALLOC) << "base pointer: " << static_cast<void *>(_base.get()) << std::endl;
-}
-
-void BumpPlanner::claim(const ir::OperandIndex &ind, size_t size)
-{
- assert(size != 0);
-
- Block blk{_capacity, size};
- _mem_plans[ind] = blk;
- _capacity += size;
-
- VERBOSE(BP_PLANNER) << "CLAIM(#" << ind.value() << "): " << blk.offset << ", " << blk.size
- << std::endl;
-}
-
-void BumpPlanner::release(const ir::OperandIndex &ind)
-{
- VERBOSE(BP_PLANNER) << "RELEASE(#" << ind.value() << "): "
- << "NOTHING does" << std::endl;
-}
-
-// There are some assumptions for claiming memory(== making a reservation for memory).
-// 1. About _claim_table(std::map).
-// - The table's data structure is std::map so that it always sorts
-// value(OperandIndex) by key(base_offset).
-// - This claim() inserts key/value into _claim_table and the release() removes the key/value from
-// _claim_table.
-// - _claim_table shows the memory status at a certain point in time. Therefore,
-// - If _claim_table has an offset and a certain size at a certain point in time,
-// it means the place at the offset has been already claimed(== can't claim now. need to find
-// someplace new).
-// - If _claim_table doesn't have any element for an offset and a certain size at a certain
-// point in time, it means the place at the offset can be claimed.
-// 2. In the loop for _claim_table, we can assume the current claim_base_offset value is bigger than
-// the previous claim_base_offset.
-void FirstFitPlanner::claim(const ir::OperandIndex &ind, size_t size)
-{
- assert(size != 0);
-
- // Find the right position for claiming
- uint32_t next_offset = 0;
- for (auto &mem_claim : _claim_table)
- {
- auto claimed_base_offset = mem_claim.first;
- auto claimed_size = _mem_plans[mem_claim.second].size;
- if (next_offset + size <= claimed_base_offset)
- {
- break;
- }
- else
- {
- next_offset = claimed_base_offset + claimed_size;
- }
- }
-
- // Now next_offset is set to the proper offset
- _claim_table[next_offset] = ind;
- _mem_plans[ind] = {next_offset, size};
-
- VERBOSE(FF_PLANNER) << "claim(#" << ind.value() << "): [+" << next_offset << ", " << size << "sz]"
- << std::endl;
-
- if (_capacity < next_offset + size)
- {
- _capacity = next_offset + size;
- }
-}
-
-void FirstFitPlanner::release(const ir::OperandIndex &ind)
-{
- for (auto it = _claim_table.cbegin(); it != _claim_table.cend(); ++it)
- {
- if (it->second == ind)
- {
- uint32_t offset = it->first;
- uint32_t index = ind.value();
- uint32_t size = _mem_plans[ind].size;
-
- _claim_table.erase(it);
-
- VERBOSE(FF_PLANNER) << "release(#" << index << "): [+" << offset << ", " << size << "sz]"
- << std::endl;
- return;
- }
- }
- assert(!"Cannot release for given index. It has been not claimed or released already.");
-}
-
-WICPlanner::WICPlanner()
- : _initialized(false), _capacity(0), _mem_plans(), _live_operands(), _interference_graph(),
- _map_size_to_operands(), _claim_table()
-{
- // DO NOTHING
-}
-
-void WICPlanner::claim(const ir::OperandIndex &ind, size_t size)
-{
- assert(size != 0);
-
- _map_size_to_operands.insert({size, ind});
- for (auto &live_operand : _live_operands)
- {
- _interference_graph[live_operand].insert(ind);
- _interference_graph[ind].insert(live_operand);
- }
- _live_operands.insert(ind);
-
- VERBOSE(WIC_PLANNER) << "claim(#" << ind.value() << "): [" << size << "sz]" << std::endl;
-}
-
-void WICPlanner::release(const ir::OperandIndex &ind)
-{
- _live_operands.erase(ind);
- VERBOSE(WIC_PLANNER) << "release(#" << ind.value() << ")" << std::endl;
-}
-
-/*
- * Build memory plans using liveness and size of operands
- * 1. Build inference graph at claim
- * - Two operands interfere if they have overlapped live range
- * 2. Sort operands descending order of size
- * - Use std::multimap to sort operands
- * 3. Allocate memory block for sorted operands
- * - Find free memory block which does not overlap with interfered operands
- */
-void WICPlanner::buildMemoryPlans()
-{
- for (auto &size_to_operand : _map_size_to_operands)
- {
- uint32_t size = size_to_operand.first;
- ir::OperandIndex ind = size_to_operand.second;
- VERBOSE(WIC_PLANNER) << "build_plan(#" << ind.value() << "): [" << size << "sz]" << std::endl;
-
- // Find firstfit which does not interfere with live operands
- uint32_t next_offset = 0;
- if (_interference_graph.find(ind) != _interference_graph.end())
- {
- std::unordered_set<ir::OperandIndex> &interferences = _interference_graph.find(ind)->second;
- for (auto &mem_claim : _claim_table)
- {
- if (interferences.find(mem_claim.second) != interferences.end())
- {
- auto claimed_base_offset = mem_claim.first;
- auto claimed_size = _mem_plans[mem_claim.second].size;
- VERBOSE(WIC_PLANNER) << "interfere (#" << mem_claim.second.value() << "): [+"
- << claimed_base_offset << ", " << claimed_size << "sz]" << std::endl;
- if (next_offset + size <= claimed_base_offset)
- {
- break;
- }
- else if (next_offset < claimed_base_offset + claimed_size)
- {
- next_offset = claimed_base_offset + claimed_size;
- }
- }
- }
- }
- else
- {
- VERBOSE(WIC_PLANNER) << "No interference" << std::endl;
- }
-
- _claim_table.insert({next_offset, ind});
- _mem_plans[ind] = {next_offset, size};
- VERBOSE(WIC_PLANNER) << "alloc(#" << ind.value() << "): [+" << next_offset << ", " << size
- << "sz]" << std::endl;
-
- if (_capacity < next_offset + size)
- {
- _capacity = next_offset + size;
- }
- }
- _initialized = true;
- _interference_graph.clear();
- _map_size_to_operands.clear();
- _claim_table.clear();
-}
-
-WICPlanner::MemoryPlans &WICPlanner::memory_plans()
-{
- if (!_initialized)
- buildMemoryPlans();
- return _mem_plans;
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu_common/MemoryPlanner.h b/runtime/neurun/backend/cpu_common/MemoryPlanner.h
deleted file mode 100644
index c4f5e6a9e..000000000
--- a/runtime/neurun/backend/cpu_common/MemoryPlanner.h
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file        MemoryPlanner.h
- * @brief       This file contains Memory Planning related classes
- */
-
-#ifndef __NEURUN_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__
-#define __NEURUN_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__
-
-#include <map>
-#include <unordered_set>
-#include <cpp14/memory.h>
-
-#include "ir/OperandIndexMap.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-/**
- * @brief Structure to have memory offset and size
- */
-struct Block
-{
- uint32_t offset;
- size_t size;
-};
-
-/**
- * @brief Class to allocate memory
- */
-class Allocator
-{
-public:
- Allocator(uint32_t capacity);
- /**
- * @brief Get memory base pointer
- * @return base pointer
- */
- uint8_t *base() const { return _base.get(); }
- void release() { _base.reset(); }
-
-private:
- std::unique_ptr<uint8_t[]> _base;
-};
-
-/**
- * @brief Interface to plan memory
- */
-struct IMemoryPlanner
-{
- using MemoryPlans = ir::OperandIndexMap<Block>;
-
- /**
- * @brief Claim memory for operand
- * @param[in] index The operand index
- * @param[in] size The size of the memory
- */
- virtual void claim(const ir::OperandIndex &, size_t) = 0;
- /**
- * @brief Release memory for operand
- * @param[in] index The operand index
- */
- virtual void release(const ir::OperandIndex &) = 0;
- /**
- * @brief Get capacity for memory planning
- * @return The value of capacity
- */
- virtual uint32_t capacity() = 0;
- /**
- * @brief Get MemoryPlans
- * @return MemoryPlans
- */
- virtual MemoryPlans &memory_plans() = 0;
-
- virtual ~IMemoryPlanner() = default;
-};
-
-/**
- * @brief Class to plan memory by bump way
- */
-class BumpPlanner : public IMemoryPlanner
-{
-public:
- /**
- * @brief Claim memory for operand by bump way
- * @param[in] index The operand index
- * @param[in] size The size of the memory
- */
- void claim(const ir::OperandIndex &, size_t) override;
- /**
- * @brief Release memory for operand by bump way
- * @param[in] index The operand index
- */
- void release(const ir::OperandIndex &) override;
- /**
- * @brief Get capacity for memory planning
- * @return The value of capacity
- */
- uint32_t capacity() override { return _capacity; }
- /**
- * @brief Get MemoryPlans
- * @return MemoryPlans
- */
- MemoryPlans &memory_plans() override { return _mem_plans; }
-
-private:
- uint32_t _capacity = 0;
- MemoryPlans _mem_plans;
-};
-
-/**
- * @brief Class to plan memory by firstfit way
- */
-class FirstFitPlanner : public IMemoryPlanner
-{
-public:
- /**
- * @brief Claim memory for operand by firstfit way
- * @param[in] index The operand index
- * @param[in] size The size of the memory
- */
- void claim(const ir::OperandIndex &, size_t) override;
- /**
- * @brief Release memory for operand by firstfit way
- * @param[in] index The operand index
- */
- void release(const ir::OperandIndex &) override;
- /**
- * @brief Get capacity for memory planning
- * @return The value of capacity
- */
- uint32_t capacity() override { return _capacity; }
- /**
- * @brief Get MemoryPlans
- * @return MemoryPlans
- */
- MemoryPlans &memory_plans() override { return _mem_plans; }
-
-private:
- uint32_t _capacity = 0;
- MemoryPlans _mem_plans;
- // Use std::map because claim() assumes that _claim_table is sorted by uint32_t(base_offset)
- std::map<uint32_t, ir::OperandIndex> _claim_table;
-};
-
-/**
- * @brief Class to plan memory by Weighted Interval Color algorithm
- */
-class WICPlanner : public IMemoryPlanner
-{
-public:
- WICPlanner();
-
- /**
- * @brief Claim memory for operand by WIC algorithm
- * @param[in] index The operand index
- * @param[in] size The size of the memory
- */
- void claim(const ir::OperandIndex &, size_t) override;
- /**
- * @brief Release memory for operand by WIC algorithm
- * @param[in] index The operand index
- */
- void release(const ir::OperandIndex &) override;
- /**
- * @brief Get capacity for memory planning
- * @return The value of capacity
- */
- uint32_t capacity() override
- {
- if (!_initialized)
- buildMemoryPlans();
- return _capacity;
- }
- /**
- * @brief Get MemoryPlans
- * @return MemoryPlans
- */
- MemoryPlans &memory_plans() override;
-
-private:
- void buildMemoryPlans();
-
- bool _initialized;
- uint32_t _capacity;
- MemoryPlans _mem_plans;
- std::unordered_set<ir::OperandIndex> _live_operands;
- ir::OperandIndexMap<std::unordered_set<ir::OperandIndex>> _interference_graph;
- // Sort operands by descending order of size
- std::multimap<uint32_t, ir::OperandIndex, std::greater<uint32_t>> _map_size_to_operands;
- std::multimap<uint32_t, ir::OperandIndex> _claim_table;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__
diff --git a/runtime/neurun/backend/cpu_common/MemoryPlanner.test.cc b/runtime/neurun/backend/cpu_common/MemoryPlanner.test.cc
deleted file mode 100644
index b2be7db24..000000000
--- a/runtime/neurun/backend/cpu_common/MemoryPlanner.test.cc
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "MemoryPlanner.h"
-#include "ir/Index.h"
-
-TEST(Allocator, allocate_test)
-{
- ::neurun::backend::cpu_common::Allocator allocator(1024);
- ASSERT_NE(allocator.base(), nullptr);
-}
-
-TEST(BumpPlanner, claim_test)
-{
- ::neurun::backend::cpu_common::BumpPlanner planner;
-
- auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) {
- neurun::ir::OperandIndex mem_idx(index);
- planner.claim(mem_idx, size);
- auto mem_blk = planner.memory_plans()[mem_idx];
- ASSERT_EQ(mem_blk.offset, expected_offset);
- ASSERT_EQ(mem_blk.size, size);
- };
-
- claim(0, 10, 0);
- claim(1, 20, 10);
- claim(2, 30, 30);
-}
-
-TEST(FirstFitPlanner, claim_release_test)
-{
- ::neurun::backend::cpu_common::FirstFitPlanner planner;
-
- auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) {
- neurun::ir::OperandIndex mem_idx(index);
- planner.claim(mem_idx, size);
- auto mem_blk = planner.memory_plans()[mem_idx];
- ASSERT_EQ(mem_blk.offset, expected_offset);
- ASSERT_EQ(mem_blk.size, size);
- };
-
- auto release = [&planner](uint32_t index) {
- neurun::ir::OperandIndex mem_idx(index);
- planner.release(mem_idx);
- };
-
- // 0 CLAIM - 10
- claim(0, 10, 0);
-
- // 1 CLAIM - 20
- claim(1, 20, 10);
-
- // 2 CLAIM - 30
- claim(2, 30, 30);
-
- // 0 RELEASE - 10
- release(0);
-
- // 3 CLAIM - 20
- claim(3, 20, 60);
-
- // 4 CLAIM - 5
- claim(4, 5, 0);
-
- // 5 CLAIM - 10
- claim(5, 10, 80);
-
- // 6 CLAIM - 5
- claim(6, 5, 5);
-
- // 2 RELEASE - 30
- release(2);
-
- // 7 CLAIM - 35
- claim(7, 35, 90);
-
- // 8 CLAIM - 10
- claim(8, 10, 30);
-
- // 4 RELEASE - 5
- release(4);
-
- // 9 CLAIM - 10
- claim(9, 10, 40);
-
- // 10 CLAIM - 10
- claim(10, 10, 50);
-
- // 6 RELEASE
- release(6);
-
- // 1 RELEASE
- release(1);
-
- // 8 RELEASE
- release(8);
-
- // 9 RELEASE
- release(9);
-
- // 10 RELEASE
- release(10);
-
- // 3 RELEASE
- release(3);
-
- // 5 RELEASE
- release(5);
-
- // 7 RELEASE
- release(7);
-}
-
-TEST(WICPlanner, claim_release_test)
-{
- ::neurun::backend::cpu_common::WICPlanner planner;
-
- auto claim = [&planner](uint32_t index, size_t size) {
- neurun::ir::OperandIndex mem_idx(index);
- planner.claim(mem_idx, size);
- };
-
- auto release = [&planner](uint32_t index) {
- neurun::ir::OperandIndex mem_idx(index);
- planner.release(mem_idx);
- };
-
- auto verify = [&planner](uint32_t index, uint32_t size, uint32_t expected_offset) {
- neurun::ir::OperandIndex mem_idx(index);
- auto mem_blk = planner.memory_plans()[mem_idx];
- ASSERT_EQ(mem_blk.offset, expected_offset);
- ASSERT_EQ(mem_blk.size, size);
- };
-
- auto capacity = [&planner](uint32_t expected_capacity) {
- auto actual_capacity = planner.capacity();
- ASSERT_EQ(actual_capacity, expected_capacity);
- };
-
- claim(0, 20);
- claim(1, 5);
- release(0);
- claim(2, 10);
- release(1);
- claim(3, 10);
- release(2);
- claim(4, 10);
- release(3);
- claim(5, 20);
- release(4);
- claim(6, 20);
- release(5);
- release(7);
-
- // VERIFY 0 - 0
- verify(0, 20, 0);
-
- // VERIFY 1 - 20
- verify(1, 5, 20);
-
- // VERIFY 2 - 0
- verify(2, 10, 0);
-
- // VERIFY 3 - 10
- verify(3, 10, 10);
-
- // VERIFY 4 - 20
- verify(4, 10, 20);
-
- // VERIFY 5 - 0
- verify(5, 20, 0);
-
- // VERIFY 6 - 20
- verify(6, 20, 20);
-
- // CAPACITY - 40
- capacity(40);
-}
diff --git a/runtime/neurun/backend/cpu_common/MemoryPlannerFactory.cc b/runtime/neurun/backend/cpu_common/MemoryPlannerFactory.cc
deleted file mode 100644
index 9da987aa4..000000000
--- a/runtime/neurun/backend/cpu_common/MemoryPlannerFactory.cc
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MemoryPlannerFactory.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-MemoryPlannerFactory &MemoryPlannerFactory::get()
-{
- static MemoryPlannerFactory instance;
- return instance;
-}
-
-IMemoryPlanner *MemoryPlannerFactory::create(const std::string &key)
-{
- if (key == "FirstFit")
- {
- return new FirstFitPlanner;
- }
- else if (key == "Bump")
- {
- return new BumpPlanner;
- }
- else if (key == "WIC")
- {
- return new WICPlanner;
- }
- return new FirstFitPlanner; // Default Planner
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/cpu_common/MemoryPlannerFactory.h b/runtime/neurun/backend/cpu_common/MemoryPlannerFactory.h
deleted file mode 100644
index 829600e27..000000000
--- a/runtime/neurun/backend/cpu_common/MemoryPlannerFactory.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__
-#define __NEURUN_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__
-
-#include "MemoryPlanner.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-class MemoryPlannerFactory
-{
-public:
- static MemoryPlannerFactory &get();
-
-private:
- MemoryPlannerFactory() = default;
-
-public:
- IMemoryPlanner *create(const std::string &key);
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__
diff --git a/runtime/neurun/backend/hi_perf_cpu/CMakeLists.txt b/runtime/neurun/backend/hi_perf_cpu/CMakeLists.txt
deleted file mode 100644
index 816edba5e..000000000
--- a/runtime/neurun/backend/hi_perf_cpu/CMakeLists.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-set(LIB_NEURUN_BACKEND_HI_PERF_CPU neurun_backend_hi_perf)
-
-nnfw_find_package(NNPACK QUIET)
-
-option(BUILD_NEURUN_HI_PERF_CPU_BACKEND
- "Build neurun HI_PERF_CPU backend"
- ${NNPACK_FOUND} # Default value when there is no explicit user request
-)
-
-message(STATUS "Build neurun HI_PERF_CPU backend: ${BUILD_NEURUN_HI_PERF_CPU_BACKEND}")
-
-if(NOT BUILD_NEURUN_HI_PERF_CPU_BACKEND)
- return()
-endif(NOT BUILD_NEURUN_HI_PERF_CPU_BACKEND)
-
-file(GLOB_RECURSE SOURCES "*.cc")
-file(GLOB_RECURSE TESTS "*.test.cc")
-list(REMOVE_ITEM SOURCES ${TESTS})
-
-add_library(${LIB_NEURUN_BACKEND_HI_PERF_CPU} SHARED ${SOURCES})
-
-target_link_libraries(${LIB_NEURUN_BACKEND_HI_PERF_CPU} PRIVATE nnfw_lib_misc)
-target_link_libraries(${LIB_NEURUN_BACKEND_HI_PERF_CPU} PRIVATE nnfw_lib_cpp14)
-target_link_libraries(${LIB_NEURUN_BACKEND_HI_PERF_CPU} PRIVATE neurun_core)
-target_link_libraries(${LIB_NEURUN_BACKEND_HI_PERF_CPU} PRIVATE nnfw_common)
-target_link_libraries(${LIB_NEURUN_BACKEND_HI_PERF_CPU} PRIVATE nnpack pthreadpool cpuinfo)
-target_link_libraries(${LIB_NEURUN_BACKEND_HI_PERF_CPU} PRIVATE nnfw_coverage)
-target_include_directories(${LIB_NEURUN_BACKEND_HI_PERF_CPU} PRIVATE ${NNPACK_INCLUDE_DIRS})
-
-set_target_properties(${LIB_NEURUN_BACKEND_HI_PERF_CPU} PROPERTIES OUTPUT_NAME backend_NNPACK)
-
-install(TARGETS ${LIB_NEURUN_BACKEND_HI_PERF_CPU} DESTINATION lib)
-
-# Unit Tests
-set(TEST_NEURUN_BACKEND_HI_PERF_CPU test_neurun_backend_hi_perf)
-
-add_executable(${TEST_NEURUN_BACKEND_HI_PERF_CPU} ${TESTS})
-
-target_link_libraries(${TEST_NEURUN_BACKEND_HI_PERF_CPU} ${LIB_NEURUN_BACKEND_HI_PERF_CPU})
-target_link_libraries(${TEST_NEURUN_BACKEND_HI_PERF_CPU} gtest gtest_main ${LIB_PTHREAD})
-target_link_libraries(${TEST_NEURUN_BACKEND_HI_PERF_CPU} nnpack)
-
-add_test(${TEST_NEURUN_BACKEND_HI_PERF_CPU} ${TEST_NEURUN_BACKEND_HI_PERF_CPU})
-install(TARGETS ${TEST_NEURUN_BACKEND_HI_PERF_CPU} DESTINATION unittest)
diff --git a/runtime/neurun/backend/hi_perf_cpu/KernelGenerator.h b/runtime/neurun/backend/hi_perf_cpu/KernelGenerator.h
deleted file mode 100644
index 3197995e1..000000000
--- a/runtime/neurun/backend/hi_perf_cpu/KernelGenerator.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_HI_PERF_CPU_KERNEL_GENERATOR_H__
-#define __NEURUN_BACKEND_HI_PERF_CPU_KERNEL_GENERATOR_H__
-
-#include <backend/IKernelGenerator.h>
-
-#include "ir/Operands.h"
-#include "TensorBuilder.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace hi_perf_cpu
-{
-
-class KernelGenerator : public IKernelGenerator
-{
-public:
- KernelGenerator(const Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder);
- // TODO add more ops
-
-private:
- const Operands &_ctx;
- std::shared_ptr<TensorBuilder> _tensor_builder;
-};
-
-} // namespace hi_perf_cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_HI_PERF_CPU_KERNEL_GENERATOR_H__
diff --git a/runtime/neurun/backend/hi_perf_cpu/TensorBuilder.h b/runtime/neurun/backend/hi_perf_cpu/TensorBuilder.h
deleted file mode 100644
index af879a41e..000000000
--- a/runtime/neurun/backend/hi_perf_cpu/TensorBuilder.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef NNFW_TENSORBUILDER_H
-#define NNFW_TENSORBUILDER_H
-
-#include <unordered_map>
-
-#include <backend/ITensorBuilder.h>
-#include "ir/OperandIndexMap.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace hi_perf_cpu
-{
-
-class TensorBuilder : public ITensorBuilder
-{
-public:
- TensorBuilder();
-
-private:
-};
-
-} // namespace hi_perf_cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // NNFW_TENSORBUILDER_H
diff --git a/runtime/neurun/backend/srcn/Backend.h b/runtime/neurun/backend/srcn/Backend.h
deleted file mode 100644
index bc76a7ed2..000000000
--- a/runtime/neurun/backend/srcn/Backend.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_SRCN_BACKEND_H__
-#define __NEURUN_BACKEND_SRCN_BACKEND_H__
-
-#include <memory>
-#include <backend/Backend.h>
-#include <ir/Operands.h>
-
-#include "Config.h"
-#include "ConstantInitializer.h"
-#include "KernelGenerator.h"
-#include "ShapeFixer.h"
-#include "TensorRegister.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-class Backend : public ::neurun::backend::Backend
-{
-public:
- Backend() : _config{std::make_shared<Config>()} {}
-
- std::shared_ptr<IConfig> config() const override { return _config; }
-
- std::unique_ptr<BackendContext>
- newContext(const ir::Operands &operands,
- const std::shared_ptr<custom::IKernelBuilder> &kb) const override
- {
- auto tensor_builder = std::make_shared<TensorBuilder>();
- return std::unique_ptr<BackendContext>{new BackendContext{
- this, tensor_builder, std::make_shared<ConstantInitializer>(operands, tensor_builder),
- std::make_shared<KernelGenerator>(operands, tensor_builder, kb),
- std::make_shared<ShapeFixer>(operands, tensor_builder),
- std::make_shared<TensorRegister>(operands, tensor_builder)}};
- }
-
-private:
- std::shared_ptr<IConfig> _config;
-};
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_SRCN_BACKEND_H__
diff --git a/runtime/neurun/backend/srcn/CMakeLists.txt b/runtime/neurun/backend/srcn/CMakeLists.txt
deleted file mode 100644
index 3b0440c8e..000000000
--- a/runtime/neurun/backend/srcn/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-if(NOT BUILD_SRCN_KERNEL)
- message(STATUS "Skip building SRCN backend: SRCN kernel library is not build")
- return()
-endif()
-
-set(LIB_NEURUN_BACKEND_SRCN neurun_backend_srcn)
-
-file(GLOB_RECURSE SOURCES "*.cc")
-
-add_library(${LIB_NEURUN_BACKEND_SRCN} SHARED ${SOURCES})
-
-target_link_libraries(${LIB_NEURUN_BACKEND_SRCN} PUBLIC nnfw_lib_cpp14)
-target_link_libraries(${LIB_NEURUN_BACKEND_SRCN} PRIVATE nnfw_lib_srcn)
-target_link_libraries(${LIB_NEURUN_BACKEND_SRCN} PRIVATE neurun_core)
-target_link_libraries(${LIB_NEURUN_BACKEND_SRCN} PRIVATE ${LIB_NEURUN_BACKEND_CPU_COMMON})
-target_link_libraries(${LIB_NEURUN_BACKEND_SRCN} PRIVATE nnfw_common)
-target_link_libraries(${LIB_NEURUN_BACKEND_SRCN} PRIVATE nnfw_coverage)
-
-set_target_properties(${LIB_NEURUN_BACKEND_SRCN} PROPERTIES OUTPUT_NAME backend_srcn)
-
-install(TARGETS ${LIB_NEURUN_BACKEND_SRCN} DESTINATION lib)
diff --git a/runtime/neurun/backend/srcn/Config.cc b/runtime/neurun/backend/srcn/Config.cc
deleted file mode 100644
index 6865657e7..000000000
--- a/runtime/neurun/backend/srcn/Config.cc
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Config.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-bool Config::initialize() { return true; }
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/srcn/Config.h b/runtime/neurun/backend/srcn/Config.h
deleted file mode 100644
index efc77fde2..000000000
--- a/runtime/neurun/backend/srcn/Config.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_SRCN_CONFIG_H__
-#define __NEURUN_BACKEND_SRCN_CONFIG_H__
-
-#include <backend/IConfig.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-class Config : public IConfig
-{
-public:
- std::string id() override { return "srcn"; }
- bool initialize() override;
- bool SupportPermutation() override { return false; }
- bool SupportSubTensorAlloc() override
- {
- // NOTE srcn allocator cannot support subtensor allocation yet
- return false;
- }
-};
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_SRCN_CONFIG_H__
diff --git a/runtime/neurun/backend/srcn/ConstantInitializer.cc b/runtime/neurun/backend/srcn/ConstantInitializer.cc
deleted file mode 100644
index f03628b1f..000000000
--- a/runtime/neurun/backend/srcn/ConstantInitializer.cc
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ConstantInitializer.h"
-
-#include "kernel/OperationUtils.h"
-
-namespace
-{
-
-template <typename T>
-static void PermuteKernel(const neurun::ir::Operand &model_obj,
- neurun::backend::operand::ITensor &obj,
- const std::vector<int32_t> &permutation)
-{
- const auto shape = model_obj.shape();
- auto base = reinterpret_cast<const T *>(model_obj.data().base());
-
- assert(shape.rank() == 4);
- assert(permutation.size() == 4);
- assert(permutation[0] != permutation[1] && permutation[0] != permutation[2] &&
- permutation[0] != permutation[3]);
- assert(permutation[1] != permutation[2] && permutation[1] != permutation[3]);
- assert(permutation[2] != permutation[3]);
- assert(permutation[0] < 4 && permutation[1] < 4 && permutation[2] < 4 && permutation[3] < 4);
-
- obj.access([&](::neurun::backend::operand::ITensor &tensor) {
- if (permutation[0] == 0 && permutation[1] == 1 && permutation[2] == 2 && permutation[3] == 3)
- {
- memcpy(tensor.buffer(), base, shape.num_elements() * sizeof(T));
- }
- else
- {
- const int32_t dim0 = shape.dim(0);
- const int32_t dim1 = shape.dim(1);
- const int32_t dim2 = shape.dim(2);
- const int32_t dim3 = shape.dim(3);
- for (auto i = 0; i < dim0; ++i)
- {
- for (auto j = 0; j < dim1; ++j)
- {
- for (auto k = 0; k < dim2; ++k)
- {
- for (auto l = 0; l < dim3; ++l)
- {
- Coordinates frontend_coords{i, j, k, l};
- Coordinates coords = frontend_coords;
- coords.set(0, frontend_coords[permutation[0]]);
- coords.set(1, frontend_coords[permutation[1]]);
- coords.set(2, frontend_coords[permutation[2]]);
- coords.set(3, frontend_coords[permutation[3]]);
- T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset(coords));
- T value = *(base + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3 + l);
- *into = value;
- }
- }
- }
- }
- }
- });
-}
-}
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : _operands{operands}, _tensor_builder{tensor_builder}
-{
- // DO NOTHING
-}
-
-void ConstantInitializer::registerPermuteKernelInitializer(const ir::OperandIndex &index,
- const ir::Operand &obj,
- const std::vector<int32_t> &permutation)
-{
- // For only CONSTANTS
- if (!obj.isConstant())
- return;
-
- VERBOSE(FillOperandData) << "[SRCN] Fill data for operand " << index.value() << std::endl;
-
- const auto type = obj.typeInfo().type();
- using ir::DataType;
- using namespace std::placeholders;
-
- switch (type)
- {
- case DataType::FLOAT32:
- _init_map[index] = std::bind(PermuteKernel<float>, _1, _2, permutation);
- break;
- case DataType::INT32:
- _init_map[index] = std::bind(PermuteKernel<int32_t>, _1, _2, permutation);
- break;
- case DataType::UINT32:
- _init_map[index] = std::bind(PermuteKernel<uint32_t>, _1, _2, permutation);
- break;
- case DataType::BOOL8:
- case DataType::QUANT8_ASYMM:
- _init_map[index] = std::bind(PermuteKernel<uint8_t>, _1, _2, permutation);
- break;
- case DataType::QUANT8_SYMM:
- _init_map[index] = std::bind(PermuteKernel<int8_t>, _1, _2, permutation);
- break;
- default:
- throw std::runtime_error("Not supported, yet");
- break;
- }
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- util::Coordinates permutation{0, 1, 2, 3};
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = _tensor_builder->tensorAt(kernel_index)->layout();
- assert(frontend_layout == ir::Layout::NHWC || frontend_layout == ir::Layout::NCHW);
- assert(backend_layout == ir::Layout::NHWC || backend_layout == ir::Layout::NCHW);
- const auto frontend_filter_layout =
- frontend_layout == ir::Layout::NHWC ? kernel::FilterLayout::OHWI : kernel::FilterLayout::OIHW;
- const auto backend_filter_layout =
- backend_layout == ir::Layout::NHWC ? kernel::FilterLayout::HWIO : kernel::FilterLayout::OIHW;
- registerPermuteKernelInitializer(
- kernel_index, kernel_obj,
- kernel::getFilterPermutation(frontend_filter_layout, backend_filter_layout));
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- util::Coordinates permutation{0, 1, 2, 3};
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = _tensor_builder->tensorAt(kernel_index)->layout();
- assert(frontend_layout == ir::Layout::NHWC || frontend_layout == ir::Layout::NCHW);
- assert(backend_layout == ir::Layout::NHWC || backend_layout == ir::Layout::NCHW);
- const auto frontend_filter_layout =
- frontend_layout == ir::Layout::NHWC ? kernel::FilterLayout::OHWI : kernel::FilterLayout::OIHW;
- const auto backend_filter_layout =
- backend_layout == ir::Layout::NHWC ? kernel::FilterLayout::HWIO : kernel::FilterLayout::OIHW;
- registerPermuteKernelInitializer(
- kernel_index, kernel_obj,
- kernel::getFilterPermutation(frontend_filter_layout, backend_filter_layout));
-}
-
-void ConstantInitializer::visit(const ir::operation::TransposeConv &node)
-{
- // NOTE The srcn deconvolution layer takes a HWOI layout as kernel filter even though image layout
- // is NHWC.
- // This policy is the same with the tensorflow policy.
- // So for using srcn library, we need to change kernel layout to HWOI from OHWI or OIHW in
- // this case.
- // Also the srcn deconvolution layer takes a OIHW layout as kernel filter if image's layout
- // is NCHW
- const auto &kernel_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = _tensor_builder->tensorAt(kernel_index)->layout();
- assert(frontend_layout == ir::Layout::NHWC || frontend_layout == ir::Layout::NCHW);
- assert(backend_layout == ir::Layout::NHWC || backend_layout == ir::Layout::NCHW);
- const auto frontend_filter_layout =
- frontend_layout == ir::Layout::NHWC ? kernel::FilterLayout::OHWI : kernel::FilterLayout::OIHW;
- const auto backend_filter_layout =
- backend_layout == ir::Layout::NHWC ? kernel::FilterLayout::HWOI : kernel::FilterLayout::IOHW;
- registerPermuteKernelInitializer(
- kernel_index, kernel_obj,
- kernel::getFilterPermutation(frontend_filter_layout, backend_filter_layout));
-}
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/srcn/ConstantInitializer.h b/runtime/neurun/backend/srcn/ConstantInitializer.h
deleted file mode 100644
index eadfe2ae1..000000000
--- a/runtime/neurun/backend/srcn/ConstantInitializer.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_COMPILER_SRCN_CONSTANT_INITIALIZER_H__
-#define __NEURUN_COMPILER_SRCN_CONSTANT_INITIALIZER_H__
-
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-#include "TensorBuilder.h"
-#include <util/Coordinates.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-class ConstantInitializer : public IConstantInitializer
-{
-public:
- ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
-
-public:
- void registerPermuteKernelInitializer(const ir::OperandIndex &index, const ir::Operand &obj,
- const std::vector<int32_t> &permutation);
-
-public:
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::TransposeConv &) override;
-
-private:
- const ir::Operands &operands() const override { return _operands; }
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
-
-private:
- const ir::Operands &_operands;
- std::shared_ptr<TensorBuilder> _tensor_builder;
-};
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_COMPILER_SRCN_CONSTANT_INITIALIZER_H__
diff --git a/runtime/neurun/backend/srcn/Convert.cc b/runtime/neurun/backend/srcn/Convert.cc
deleted file mode 100644
index 1d80b2c7c..000000000
--- a/runtime/neurun/backend/srcn/Convert.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Convert.h"
-
-#include <cassert>
-#include <cpp14/memory.h>
-#include <ir/DataType.h>
-#include "Swizzle.h"
-#include <vector>
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-ir::Shape asKernelShape(const ir::Shape &shape, kernel::FilterLayout frontend_layout,
- kernel::FilterLayout backend_layout)
-{
- assert(shape.rank() == 4);
- if (frontend_layout == backend_layout)
- {
- return ir::Shape{shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3)};
- }
-
- const auto permutation = getFilterPermutation(frontend_layout, backend_layout);
- if (permutation.size() == 0)
- {
- throw std::runtime_error("Not supported FilterLayout");
- }
- return ir::Shape{shape.dim(permutation[0]), shape.dim(permutation[1]), shape.dim(permutation[2]),
- shape.dim(permutation[3])};
-}
-
-ir::Shape asTensorShape(const ir::Shape &shape, ir::Layout frontend_layout,
- ir::Layout backend_layout)
-{
- const uint32_t rank = shape.rank();
-
- ir::Shape ret(rank);
- for (uint32_t axis = 0; axis < rank; ++axis)
- {
- const auto ncnn_axis = ToNCNNAxis(rank, axis, frontend_layout, backend_layout);
- ret.dim(ncnn_axis) = shape.dim(axis);
- }
-
- return ret;
-}
-
-ir::OperandInfo asTensorInfo(const ir::Shape &shape, const ir::TypeInfo &typeInfo,
- ir::Layout frontend_layout, ir::Layout backend_layout)
-{
- ir::OperandInfo info(asTensorShape(shape, frontend_layout, backend_layout), typeInfo);
-
- return info;
-}
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/srcn/Convert.h b/runtime/neurun/backend/srcn/Convert.h
deleted file mode 100644
index 64be46e60..000000000
--- a/runtime/neurun/backend/srcn/Convert.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_SRCN_CONVERT_H__
-#define __NEURUN_BACKEND_SRCN_CONVERT_H__
-
-#include "kernel/OperationUtils.h"
-#include <ir/Layout.h>
-#include <ir/Shape.h>
-#include <ir/TypeInfo.h>
-#include <ir/OperandInfo.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-ir::Shape asKernelShape(const ir::Shape &shape, kernel::FilterLayout frontend_layout,
- kernel::FilterLayout backend_layout);
-
-ir::Shape asTensorShape(const ir::Shape &shape, ir::Layout frontend_layout,
- ir::Layout backend_layout);
-
-ir::OperandInfo asTensorInfo(const ir::Shape &shape, const ir::TypeInfo &typeInfo,
- ir::Layout frontend_layout, ir::Layout backend_layout);
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_SRCN_CONVERT_H__
diff --git a/runtime/neurun/backend/srcn/KernelGenerator.cc b/runtime/neurun/backend/srcn/KernelGenerator.cc
deleted file mode 100644
index c096f9230..000000000
--- a/runtime/neurun/backend/srcn/KernelGenerator.cc
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "KernelGenerator.h"
-
-#include <stdexcept>
-
-#include "cpp14/memory.h"
-#include "util/Padding.h"
-#include "kernel/ConvolutionLayer.h"
-#include "kernel/DepthwiseConvolutionLayer.h"
-#include "kernel/InstanceNormLayer.h"
-#include "kernel/TransposeConvLayer.h"
-#include "kernel/AddLayer.h"
-
-#include <backend/Backend.h>
-#include <backend/IConfig.h>
-
-#include "util/logging.h"
-
-#include "util/Utils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-KernelGenerator::KernelGenerator(const ir::Operands &operand_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<custom::IKernelBuilder> &kb)
- : _ctx(operand_ctx), _tensor_builder(tensor_builder), _kernel_builder(kb),
- _current_subg_layout(ir::Layout::UNKNOWN)
-{
- // DO NOTHING
-}
-
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
-{
- _current_subg_layout = op_seq.getLayout();
- for (const auto &e : op_seq.operations())
- {
- const auto &node = *(e.node);
- _tensor_builder->preVisit(node);
- node.accept(*this);
- _tensor_builder->postVisit(node);
- }
-}
-
-void KernelGenerator::visit(const ir::operation::Conv2D &node)
-{
- using ir::operation::Conv2D;
-
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
- const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
- const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
-
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- // Kernel format is [depth_out, kernel_height, kernel_width, depth_in] if NHWC and [depth_out,
- // depth_in, kernel_height, kernel_width] if NCHW.
- const auto &ker_shape = _ctx.at(ker_index).shape().asFeature(_current_subg_layout);
- const auto ker_height = ker_shape.H;
- const auto ker_width = ker_shape.W;
- const auto stride = node.param().stride;
- const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape,
- stride, ker_width, ker_height);
- const int has_padding = padding.left + padding.right + padding.top + padding.bottom;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index);
- auto ifm_alloc = _tensor_builder->at(ifm_index);
- auto ker_alloc = _tensor_builder->at(ker_index);
- auto bias_alloc = _tensor_builder->at(bias_index);
- const auto backend_layout = ifm_alloc->layout();
- assert(backend_layout == ofm_alloc->layout());
-
- const auto ofm_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor(
- _ctx.at(ofm_index), _current_subg_layout, backend_layout);
- const auto ifm_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor(
- _ctx.at(ifm_index), _current_subg_layout, backend_layout);
- const auto ker_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor(
- _ctx.at(ker_index), _current_subg_layout, backend_layout);
- const auto bias_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor(
- _ctx.at(bias_index), _current_subg_layout, backend_layout);
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::srcn::kernel::ConvolutionLayer>();
-
- // TODO Support activation
- fn->configure(ifm_alloc->buffer(), ifm_backend_descr, ker_alloc->buffer(), ker_backend_descr,
- bias_alloc->buffer(), bias_backend_descr, has_padding, padding.left, padding.right,
- padding.top, padding.bottom, stride.horizontal, stride.vertical,
- /*activation,*/ ofm_alloc->buffer(), ofm_backend_descr, backend_layout);
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
-{
- using ir::operation::InstanceNorm;
-
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(InstanceNorm::Input::INPUT)};
- const auto gamma_index{node.getInputs().at(InstanceNorm::Input::GAMMA)};
- const auto beta_index{node.getInputs().at(InstanceNorm::Input::BETA)};
-
- const auto epsilon = node.param().epsilon;
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index);
- auto ifm_alloc = _tensor_builder->at(ifm_index);
- auto gamma_alloc = _tensor_builder->at(gamma_index);
- auto beta_alloc = _tensor_builder->at(beta_index);
-
- const auto backend_layout = ofm_alloc->layout();
-
- const auto ofm_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor(
- _ctx.at(ofm_index), _current_subg_layout, backend_layout);
- const auto ifm_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor(
- _ctx.at(ifm_index), _current_subg_layout, backend_layout);
- const auto gamma_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor(
- _ctx.at(gamma_index), _current_subg_layout, backend_layout);
- const auto beta_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor(
- _ctx.at(beta_index), _current_subg_layout, backend_layout);
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::srcn::kernel::InstanceNormLayer>();
-
- fn->configure(ifm_alloc->buffer(), ifm_backend_descr, gamma_alloc->buffer(), gamma_backend_descr,
- beta_alloc->buffer(), beta_backend_descr, ofm_alloc->buffer(), ofm_backend_descr,
- epsilon, activation, backend_layout);
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
-{
- using ir::operation::DepthwiseConv2D;
-
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
- const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
- const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
-
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- // Kernel format is [depth_out, kernel_height, kernel_width, depth_in] if NHWC and [depth_out,
- // depth_in, kernel_height, kernel_width] if NCHW.
- const auto &ker_shape = _ctx.at(ker_index).shape().asFeature(_current_subg_layout);
- const auto ker_height = ker_shape.H;
- const auto ker_width = ker_shape.W;
- const auto stride = node.param().stride;
- const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape,
- stride, ker_width, ker_height);
- const int padding_type = padding.left + padding.right + padding.top + padding.bottom;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index);
- auto ifm_alloc = _tensor_builder->at(ifm_index);
- auto ker_alloc = _tensor_builder->at(ker_index);
- auto bias_alloc = _tensor_builder->at(bias_index);
- const auto backend_layout = ifm_alloc->layout();
- assert(backend_layout == ofm_alloc->layout());
-
- const auto ofm_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor(
- _ctx.at(ofm_index), _current_subg_layout, backend_layout);
- const auto ifm_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor(
- _ctx.at(ifm_index), _current_subg_layout, backend_layout);
- const auto ker_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor(
- _ctx.at(ker_index), _current_subg_layout, backend_layout);
- const auto bias_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor(
- _ctx.at(bias_index), _current_subg_layout, backend_layout);
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::srcn::kernel::DepthwiseConvolutionLayer>();
-
- // TODO Support activation
- fn->configure(ifm_alloc->buffer(), ifm_backend_descr, ker_alloc->buffer(), ker_backend_descr,
- bias_alloc->buffer(), bias_backend_descr, padding_type, padding.left, padding.right,
- padding.top, padding.bottom, stride.horizontal, stride.vertical,
- /*activation,*/ ofm_alloc->buffer(), ofm_backend_descr, backend_layout);
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::TransposeConv &node)
-{
- using ir::operation::TransposeConv;
-
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(TransposeConv::Input::INPUT)};
- const auto ker_index{node.getInputs().at(TransposeConv::Input::KERNEL)};
- const auto output_shape_index{node.getInputs().at(TransposeConv::Input::OUTPUT_SHAPE)};
-
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- // Kernel format is [depth_out, kernel_height, kernel_width, depth_in] if NHWC and [depth_out,
- // depth_in, kernel_height, kernel_width] if NCHW.
- const auto &ker_shape = _ctx.at(ker_index).shape().asFeature(_current_subg_layout);
- const auto ker_height = ker_shape.H;
- const auto ker_width = ker_shape.W;
- const auto stride = node.param().stride;
- const int padding_type = (node.param().padding.type == ir::PaddingType::SAME);
- const auto padding = neurun::util::calculatePadding(node.param().padding, ofm_shape, ifm_shape,
- stride, ker_width, ker_height);
-
- auto ofm_alloc = _tensor_builder->at(ofm_index);
- auto ifm_alloc = _tensor_builder->at(ifm_index);
- auto ker_alloc = _tensor_builder->at(ker_index);
- const auto backend_layout = ofm_alloc->layout();
- assert(backend_layout == ifm_alloc->layout());
-
- const auto ofm_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor(
- _ctx.at(ofm_index), _current_subg_layout, backend_layout);
- const auto ifm_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor(
- _ctx.at(ifm_index), _current_subg_layout, backend_layout);
- const auto ker_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor(
- _ctx.at(ker_index), _current_subg_layout, backend_layout);
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::srcn::kernel::TransposeConvLayer>();
-
- fn->configure(ifm_alloc->buffer(), ifm_backend_descr, ker_alloc->buffer(), ker_backend_descr,
- padding_type, padding.left, padding.right, padding.top, padding.bottom,
- stride.horizontal, stride.vertical, ofm_alloc->buffer(), ofm_backend_descr,
- backend_layout);
-
- _execution_builder->append(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Add &node)
-{
- using ir::operation::Add;
-
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(Add::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- const auto backend_layout = ofm_alloc->layout();
- assert(backend_layout == lhs_alloc->layout() && backend_layout == rhs_alloc->layout());
-
- const auto ofm_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor(
- _ctx.at(ofm_index), _current_subg_layout, backend_layout);
- const auto lhs_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor(
- _ctx.at(lhs_index), _current_subg_layout, backend_layout);
- const auto rhs_backend_descr = ::neurun::backend::srcn::kernel::getTensorDescriptor(
- _ctx.at(rhs_index), _current_subg_layout, backend_layout);
-
- auto fn = nnfw::cpp14::make_unique<::neurun::backend::srcn::kernel::AddLayer>();
-
- fn->configure(lhs_alloc->buffer(), lhs_backend_descr, rhs_alloc->buffer(), rhs_backend_descr,
- activation, ofm_alloc->buffer(), ofm_backend_descr, backend_layout);
-
- _execution_builder->append(std::move(fn));
-}
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/srcn/KernelGenerator.h b/runtime/neurun/backend/srcn/KernelGenerator.h
deleted file mode 100644
index 879aeaf5b..000000000
--- a/runtime/neurun/backend/srcn/KernelGenerator.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_SRCN_KERNEL_GENERATOR_H__
-#define __NEURUN_BACKEND_SRCN_KERNEL_GENERATOR_H__
-
-#include "backend/IKernelGenerator.h"
-#include "ir/Operands.h"
-#include "operand/Tensor.h"
-#include "backend/CustomKernelBuilder.h"
-#include "TensorBuilder.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-class KernelGenerator : public IKernelGenerator
-{
-public:
- KernelGenerator(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<custom::IKernelBuilder> &kb);
-
- using IKernelGenerator::visit;
-
- void visit(const ir::OpSequence &) override;
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::InstanceNorm &) override;
- void visit(const ir::operation::TransposeConv &) override;
- void visit(const ir::operation::Add &) override;
-
-private:
- const ir::Operands &_ctx;
- std::shared_ptr<TensorBuilder> _tensor_builder;
- std::shared_ptr<custom::IKernelBuilder> _kernel_builder;
- ir::Layout _current_subg_layout;
-};
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_SRCN_KERNEL_GENERATOR_H__
diff --git a/runtime/neurun/backend/srcn/MemoryManager.cc b/runtime/neurun/backend/srcn/MemoryManager.cc
deleted file mode 100644
index aa07ab168..000000000
--- a/runtime/neurun/backend/srcn/MemoryManager.cc
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MemoryManager.h"
-
-#include <cassert>
-
-#include <MemoryPlannerFactory.h>
-#include "util/logging.h"
-#include "util/ConfigSource.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-MemoryManager::MemoryManager() : _mem_planner{createMemoryPlanner()}
-{
- // DO NOTHING
-}
-
-MemoryManager::MemoryManager(const std::string planner_id)
- : _mem_planner{createMemoryPlanner(planner_id)}
-{
- // DO NOTHING
-}
-
-cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner()
-{
- auto planner_id = util::getConfigString(util::config::CPU_MEMORY_PLANNER);
- return cpu_common::MemoryPlannerFactory::get().create(planner_id);
-}
-
-cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner(const std::string planner_id)
-{
- return cpu_common::MemoryPlannerFactory::get().create(planner_id);
-}
-
-void MemoryManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout layout)
-{
- auto tensor = std::make_shared<operand::Tensor>(info, layout);
- _tensors[ind] = tensor;
-}
-
-void MemoryManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
-{
- _mem_planner->claim(ind, size);
-}
-
-void MemoryManager::releasePlan(const ir::OperandIndex &ind) { _mem_planner->release(ind); }
-
-void MemoryManager::allocate(void)
-{
- _mem_alloc = std::make_shared<cpu_common::Allocator>(_mem_planner->capacity());
- assert(_mem_alloc->base());
-
- for (auto &mem_plan : _mem_planner->memory_plans())
- {
- auto ind = mem_plan.first;
- auto mem_blk = mem_plan.second;
-
- uint8_t *buffer = _mem_alloc->base() + mem_blk.offset;
- auto tensor = _tensors[ind];
- tensor->setBuffer(buffer);
-
- VERBOSE(CPU_MEMORYMANAGER) << "TENSOR(#" << ind.value() << "): " << static_cast<void *>(buffer)
- << std::endl;
-
- // If we do not make tensor here currently, kernel generation would cause segmentation fault.
- // See also : Comments in `allocate` method.
- }
-}
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/srcn/MemoryManager.h b/runtime/neurun/backend/srcn/MemoryManager.h
deleted file mode 100644
index 05fa07622..000000000
--- a/runtime/neurun/backend/srcn/MemoryManager.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_SRCN_MEMORY_MANAGER_H__
-#define __NEURUN_BACKEND_SRCN_MEMORY_MANAGER_H__
-
-#include "backend/IMemoryManager.h"
-#include <MemoryPlanner.h>
-#include "operand/Tensor.h"
-#include "ir/OperandIndexMap.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-class MemoryManager : public backend::IMemoryManager
-{
-public:
- MemoryManager();
- MemoryManager(const std::string);
- virtual ~MemoryManager() = default;
-
- void allocate(void) override;
- void deallocate(void) override { _mem_alloc->release(); }
-
- void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info, ir::Layout layout);
- void claimPlan(const ir::OperandIndex &ind, uint32_t size);
- void releasePlan(const ir::OperandIndex &ind);
-
- ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &tensors(void) { return _tensors; }
-
-private:
- cpu_common::IMemoryPlanner *createMemoryPlanner();
- cpu_common::IMemoryPlanner *createMemoryPlanner(std::string);
-
-private:
- ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> _tensors;
- ir::OperandIndexMap<cpu_common::Block> _tensor_mem_map;
- std::shared_ptr<cpu_common::IMemoryPlanner> _mem_planner;
- std::shared_ptr<cpu_common::Allocator> _mem_alloc;
-};
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_SRCN_MEMORY_MANAGER_H__
diff --git a/runtime/neurun/backend/srcn/PluginClassesAllocator.cc b/runtime/neurun/backend/srcn/PluginClassesAllocator.cc
deleted file mode 100644
index 9efc6aaaa..000000000
--- a/runtime/neurun/backend/srcn/PluginClassesAllocator.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <util/logging.h>
-
-#include "Backend.h"
-
-extern "C" {
-neurun::backend::Backend *neurun_backend_create()
-{
- VERBOSE(neurun_backend_create) << "'srcn' loaded\n";
- return new neurun::backend::srcn::Backend;
-}
-
-void neurun_backend_destroy(neurun::backend::Backend *backend)
-{
- VERBOSE(neurun_backend_create) << "'srcn' unloaded\n";
- delete backend;
-}
-}
diff --git a/runtime/neurun/backend/srcn/ShapeFixer.cc b/runtime/neurun/backend/srcn/ShapeFixer.cc
deleted file mode 100644
index 0ef190f28..000000000
--- a/runtime/neurun/backend/srcn/ShapeFixer.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ShapeFixer.h"
-
-#include <stdexcept>
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-ShapeFixer::ShapeFixer(const ir::Operands &operand_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : _ctx(operand_ctx), _tensor_builder(tensor_builder)
-{
- assert(tensor_builder);
-}
-
-void ShapeFixer::visit(const ir::operation::Conv2D &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::DepthwiseConv2D &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::InstanceNorm &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::TransposeConv &) { /* DO NOTHING */}
-
-void ShapeFixer::visit(const ir::operation::Add &) { /* DO NOTHING */}
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/srcn/ShapeFixer.h b/runtime/neurun/backend/srcn/ShapeFixer.h
deleted file mode 100644
index 7da1ae731..000000000
--- a/runtime/neurun/backend/srcn/ShapeFixer.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_SRCN_SHAPE_FIXER_H__
-#define __NEURUN_BACKEND_SRCN_SHAPE_FIXER_H__
-
-#include <backend/IShapeFixer.h>
-
-#include "ir/Operands.h"
-#include "operand/Tensor.h"
-#include "TensorBuilder.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-class ShapeFixer : public IShapeFixer
-{
-public:
- ShapeFixer(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder);
-
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::InstanceNorm &) override;
- void visit(const ir::operation::TransposeConv &) override;
- void visit(const ir::operation::Add &) override;
-
-private:
- const ir::Operands &_ctx;
- std::shared_ptr<TensorBuilder> _tensor_builder;
-};
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_SRCN_SHAPE_FIXER_H__
diff --git a/runtime/neurun/backend/srcn/Swizzle.h b/runtime/neurun/backend/srcn/Swizzle.h
deleted file mode 100644
index d1f922367..000000000
--- a/runtime/neurun/backend/srcn/Swizzle.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_SRCN_SWIZZLE_H__
-#define __NEURUN_BACKEND_SRCN_SWIZZLE_H__
-
-#include <cassert>
-#include <ir/Layout.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-// Convert axis in ncnn order
-inline uint32_t ToNCNNAxis(uint32_t rank, uint32_t axis,
- const ir::Layout org_layout = ir::Layout::UNKNOWN,
- const ir::Layout ncnn_layout = ir::Layout::UNKNOWN)
-{
- assert(rank > axis);
-
- if (rank >= 4 && org_layout == ir::Layout::NHWC && ncnn_layout == ir::Layout::NCHW)
- {
- // NHWC -> NCHW
- // DEPTH
- if (axis == 3)
- {
- return 1;
- }
- // WIDTH
- if (axis == 2)
- {
- return 3;
- }
- // HEIGHT
- if (axis == 1)
- {
- return 2;
- }
- }
-
- if (rank >= 4 && org_layout == ir::Layout::NCHW && ncnn_layout == ir::Layout::NHWC)
- {
- // NCHW -> NHWC
- // WIDTH
- if (axis == 3)
- {
- return 2;
- }
- // HEIGHT
- if (axis == 2)
- {
- return 1;
- }
- // DEPTH
- if (axis == 1)
- {
- return 3;
- }
- }
-
- return axis;
-}
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_SRCN_SWIZZLE_H__
diff --git a/runtime/neurun/backend/srcn/TensorBuilder.cc b/runtime/neurun/backend/srcn/TensorBuilder.cc
deleted file mode 100644
index 5ac25c33e..000000000
--- a/runtime/neurun/backend/srcn/TensorBuilder.cc
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorBuilder.h"
-
-#include <cassert>
-#include "Convert.h"
-#include "util/logging.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-TensorBuilder::TensorBuilder() : _tensor_mgr{new TensorManager()}
-{
- // DO NOTHING
-}
-
-void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind,
- const ir::OperandInfo &tensor_info,
- ir::Layout backend_layout, bool as_const)
-{
- _tensor_info_map.emplace(ind, tensor_info);
- _tensor_layout_map.emplace(ind, backend_layout);
-
- if (as_const)
- _constants.append(ind);
-}
-
-void TensorBuilder::registerSubTensorInfo(const ir::OperandIndex &, const compiler::SubTensorInfo &)
-{
- // Not supported yet
- assert(false);
-}
-
-void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
-{
- assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
- const auto &tensor_info = _tensor_info_map.at(ind);
- const auto size = tensor_info.total_size();
- const auto &backend_layout = _tensor_layout_map.at(ind);
- _tensor_mgr->buildTensor(ind, tensor_info, backend_layout, _constants.contains(ind));
- _tensor_mgr->claimPlan(ind, size);
-}
-
-void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind) { _tensor_mgr->releasePlan(ind); }
-
-bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
-{
- return _tensor_info_map.find(ind) != _tensor_info_map.end();
-}
-
-void TensorBuilder::prepare(void)
-{
- _tensor_mgr->allocateConsts();
- _tensor_mgr->allocateNonconsts();
-}
-
-void TensorBuilder::allocateConsts()
-{
- // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
- // This is because SRCN kernels require `ITensor`s to be allocated before Kernel Generation.
-}
-
-void TensorBuilder::allocateNonconsts()
-{
- // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
- // This is because SRCN kernels require `ITensor`s to be allocated before Kernel Generation.
-}
-
-std::shared_ptr<::neurun::backend::operand::ITensor>
-TensorBuilder::tensorAt(const ir::OperandIndex &ind)
-{
- return _tensor_mgr->at(ind);
-}
-
-void TensorBuilder::iterate(const IterateFunction &fn) { _tensor_mgr->iterate(fn); }
-
-std::shared_ptr<operand::Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
-{
- return _tensor_mgr->at(ind);
-}
-
-std::unique_ptr<ITensorManager> TensorBuilder::releaseTensorManager(void)
-{
- return std::move(_tensor_mgr);
-}
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/srcn/TensorBuilder.h b/runtime/neurun/backend/srcn/TensorBuilder.h
deleted file mode 100644
index 38bd6dd89..000000000
--- a/runtime/neurun/backend/srcn/TensorBuilder.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_SRCN_TENSOR_BUILDER_H__
-#define __NEURUN_BACKEND_SRCN_TENSOR_BUILDER_H__
-
-#include <unordered_map>
-
-#include <backend/ITensorBuilder.h>
-#include "operand/Tensor.h"
-#include "ir/OperandIndexMap.h"
-#include "TensorManager.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-class TensorBuilder : public ITensorBuilder
-{
-public:
- TensorBuilder();
-
- /**
- * @brief Register tensor information to allocate on CPU backend
- * @param[in] ind Operand index
- * @param[in] info Operand information
- * @param[in] layout Operand data layout
- */
- void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout, bool as_const) override;
- /**
- * @brief Register subtensor information to allocate on CPU backend
- * @param[in] ind Operand index
- * @param[in] info Tensor information
- */
- void registerSubTensorInfo(const ir::OperandIndex &ind,
- const compiler::SubTensorInfo &info) override;
-
- void notifyFirstUse(const ir::OperandIndex &) override;
- void notifyLastUse(const ir::OperandIndex &) override;
-
- bool isRegistered(const ir::OperandIndex &) const override;
-
- void prepare(void) override;
- void allocateConsts() override;
- void allocateNonconsts() override;
- void postFunctionPrepare() override { /* DO NOTHING */}
- void finalize() override { /* DO NOTHING */}
-
- std::shared_ptr<::neurun::backend::operand::ITensor>
- tensorAt(const ir::OperandIndex &ind) override;
-
- void iterate(const IterateFunction &fn) override;
-
- void preVisit(const ir::Operation &) override { /* DO NOTHING */}
- void postVisit(const ir::Operation &) override { /* DO NOTHING */}
-
- std::unique_ptr<ITensorManager> releaseTensorManager(void) override;
-
- std::shared_ptr<operand::Tensor> at(const ir::OperandIndex &ind);
-
-private:
- std::unique_ptr<TensorManager> _tensor_mgr;
- ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
- ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
- ir::OperandIndexSequence _constants;
-};
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_SRCN_TENSOR_BUILDER_H__
diff --git a/runtime/neurun/backend/srcn/TensorManager.cc b/runtime/neurun/backend/srcn/TensorManager.cc
deleted file mode 100644
index 717f1bbbc..000000000
--- a/runtime/neurun/backend/srcn/TensorManager.cc
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorManager.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-TensorManager::TensorManager()
- : _const_mgr{new MemoryManager("Bump")}, _nonconst_mgr{new MemoryManager()}
-{
- // DO NOTHING
-}
-
-void TensorManager::allocateConsts(void) { _const_mgr->allocate(); }
-
-void TensorManager::allocateNonconsts(void) { _nonconst_mgr->allocate(); }
-
-void TensorManager::deallocateConsts(void) { _const_mgr->deallocate(); }
-
-void TensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
-
-void TensorManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
- ir::Layout layout, bool as_const)
-{
- assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end());
- if (as_const)
- {
- _const_mgr->buildTensor(ind, tensor_info, layout);
- _ind_to_mgr.insert({ind, *_const_mgr});
- }
- else
- {
- _nonconst_mgr->buildTensor(ind, tensor_info, layout);
- _ind_to_mgr.insert({ind, *_nonconst_mgr});
- }
-}
-
-void TensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
-{
- assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
- _ind_to_mgr.at(ind).claimPlan(ind, size);
-}
-
-void TensorManager::releasePlan(const ir::OperandIndex &ind)
-{
- assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
- _ind_to_mgr.at(ind).releasePlan(ind);
-}
-
-std::shared_ptr<operand::Tensor> TensorManager::at(const ir::OperandIndex &ind)
-{
- assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
- return _ind_to_mgr.at(ind).tensors().at(ind);
-}
-
-ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &TensorManager::constTensors(void)
-{
- return _const_mgr->tensors();
-}
-
-ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &TensorManager::nonconstTensors(void)
-{
- return _nonconst_mgr->tensors();
-}
-
-void TensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
-{
- for (auto it : _nonconst_mgr->tensors())
- fn(it.first);
-
- for (auto it : _const_mgr->tensors())
- fn(it.first);
-}
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/srcn/TensorManager.h b/runtime/neurun/backend/srcn/TensorManager.h
deleted file mode 100644
index d4390d80c..000000000
--- a/runtime/neurun/backend/srcn/TensorManager.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_SRCN_TENSOR_MANAGER_H__
-#define __NEURUN_BACKEND_SRCN_TENSOR_MANAGER_H__
-
-#include "backend/ITensorManager.h"
-#include "MemoryManager.h"
-#include "ir/OperandIndexMap.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-class TensorManager : public backend::ITensorManager
-{
-public:
- TensorManager();
- virtual ~TensorManager() = default;
-
- void allocateConsts(void) override;
- void allocateNonconsts(void) override;
- void deallocateConsts(void) override;
- void deallocateNonconsts(void) override;
-
- void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
- ir::Layout layout, bool as_const);
-
- void claimPlan(const ir::OperandIndex &ind, uint32_t size);
- void releasePlan(const ir::OperandIndex &ind);
-
- std::shared_ptr<operand::Tensor> at(const ir::OperandIndex &ind);
-
- ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &constTensors(void);
- ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> &nonconstTensors(void);
-
- void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
-
-private:
- std::unique_ptr<MemoryManager> _const_mgr;
- std::unique_ptr<MemoryManager> _nonconst_mgr;
- ir::OperandIndexMap<MemoryManager &> _ind_to_mgr;
-};
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_SRCN_TENSOR_MANAGER_H__
diff --git a/runtime/neurun/backend/srcn/TensorRegister.cc b/runtime/neurun/backend/srcn/TensorRegister.cc
deleted file mode 100644
index 8c2f59aef..000000000
--- a/runtime/neurun/backend/srcn/TensorRegister.cc
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorRegister.h"
-
-#include "Convert.h"
-#include "kernel/OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-TensorRegister::TensorRegister(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : _operands{operands}, _tensor_builder{tensor_builder}
-{
- assert(tensor_builder != nullptr);
-}
-
-void TensorRegister::visit(const ir::operation::Conv2D &node)
-{
- // General cases
- defaultRegisterTensorInfo(node.getInputs().at(ir::operation::Conv2D::INPUT));
- defaultRegisterTensorInfo(node.getInputs().at(ir::operation::Conv2D::BIAS));
- defaultRegisterTensorInfo(node.getOutputs().at(0));
-
- // Special case
- const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
-
- const auto frontend_layout = frontendLayout();
- assert(frontend_layout == ir::Layout::NCHW || frontend_layout == ir::Layout::NHWC);
- const auto frontend_filter_layout =
- frontend_layout == ir::Layout::NHWC ? kernel::FilterLayout::OHWI : kernel::FilterLayout::OIHW;
- const auto backend_layout = backendLayout(kernel_index);
- assert(backend_layout == ir::Layout::NCHW || backend_layout == ir::Layout::NHWC);
- const auto backend_filter_layout =
- backend_layout == ir::Layout::NHWC ? kernel::FilterLayout::HWIO : kernel::FilterLayout::OIHW;
-
- ir::OperandInfo backend_info{
- asKernelShape(kernel_obj.shape(), frontend_filter_layout, backend_filter_layout),
- kernel_obj.info().typeInfo()};
- _tensor_builder->registerTensorInfo(kernel_index, backend_info, backend_layout,
- kernel_obj.isConstant());
-}
-
-void TensorRegister::visit(const ir::operation::DepthwiseConv2D &node)
-{
- // General cases
- defaultRegisterTensorInfo(node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT));
- defaultRegisterTensorInfo(node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS));
- defaultRegisterTensorInfo(node.getOutputs().at(0));
-
- // Special case
- const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
-
- const auto frontend_layout = frontendLayout();
- assert(frontend_layout == ir::Layout::NCHW || frontend_layout == ir::Layout::NHWC);
- const auto frontend_filter_layout =
- frontend_layout == ir::Layout::NHWC ? kernel::FilterLayout::OHWI : kernel::FilterLayout::OIHW;
- const auto backend_layout = backendLayout(kernel_index);
- assert(backend_layout == ir::Layout::NCHW || backend_layout == ir::Layout::NHWC);
- const auto backend_filter_layout =
- backend_layout == ir::Layout::NHWC ? kernel::FilterLayout::HWIO : kernel::FilterLayout::OIHW;
-
- ir::OperandInfo backend_info{
- asKernelShape(kernel_obj.shape(), frontend_filter_layout, backend_filter_layout),
- kernel_obj.info().typeInfo()};
- _tensor_builder->registerTensorInfo(kernel_index, backend_info, backend_layout,
- kernel_obj.isConstant());
-}
-
-void TensorRegister::visit(const ir::operation::TransposeConv &node)
-{
- // General cases
- defaultRegisterTensorInfo(node.getInputs().at(ir::operation::TransposeConv::INPUT));
- defaultRegisterTensorInfo(node.getOutputs().at(0));
-
- // Special case
- const auto &kernel_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
-
- const auto frontend_layout = frontendLayout();
- assert(frontend_layout == ir::Layout::NCHW || frontend_layout == ir::Layout::NHWC);
- const auto frontend_filter_layout =
- frontend_layout == ir::Layout::NHWC ? kernel::FilterLayout::OHWI : kernel::FilterLayout::OIHW;
- const auto backend_layout = backendLayout(kernel_index);
- assert(backend_layout == ir::Layout::NCHW || backend_layout == ir::Layout::NHWC);
- const auto backend_filter_layout =
- backend_layout == ir::Layout::NHWC ? kernel::FilterLayout::HWOI : kernel::FilterLayout::IOHW;
-
- ir::OperandInfo backend_info{
- asKernelShape(kernel_obj.shape(), frontend_filter_layout, backend_filter_layout),
- kernel_obj.info().typeInfo()};
- _tensor_builder->registerTensorInfo(kernel_index, backend_info, backend_layout,
- kernel_obj.isConstant());
-}
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/srcn/TensorRegister.h b/runtime/neurun/backend/srcn/TensorRegister.h
deleted file mode 100644
index 765f29567..000000000
--- a/runtime/neurun/backend/srcn/TensorRegister.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_SRCN_TENSOR_REGISTER_H__
-#define __NEURUN_BACKEND_SRCN_TENSOR_REGISTER_H__
-
-#include <backend/ITensorRegister.h>
-#include "TensorBuilder.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-
-class TensorRegister : public ITensorRegister
-{
-public:
- TensorRegister(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
-
-public:
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::TransposeConv &) override;
-
-private:
- const ir::Operands &operands() const override { return _operands; }
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
- bool supportSubTensor() const final { return false; }
-
-private:
- const ir::Operands &_operands;
- const std::shared_ptr<TensorBuilder> _tensor_builder;
-};
-
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_SRCN_TENSOR_REGISTER_H__
diff --git a/runtime/neurun/backend/srcn/kernel/AddLayer.cc b/runtime/neurun/backend/srcn/kernel/AddLayer.cc
deleted file mode 100644
index b53dfe89d..000000000
--- a/runtime/neurun/backend/srcn/kernel/AddLayer.cc
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AddLayer.h"
-
-#include "OperationUtils.h"
-#include "ncnn/layer/binaryop.h"
-
-#include "cpp14/memory.h"
-
-namespace
-{
-std::unique_ptr<nnfw::ncnn::Mat>
-convertMatIgnoreLayout(neurun::backend::srcn::kernel::TensorDescriptor &desc, void *data)
-{
- if (desc.dimensions.size() == 1)
- {
- return nnfw::cpp14::make_unique<nnfw::ncnn::Mat>(desc.dimensions[0], data);
- }
- else if (desc.dimensions.size() == 2)
- {
- return nnfw::cpp14::make_unique<nnfw::ncnn::Mat>(desc.dimensions[1], desc.dimensions[0], data);
- }
- else if (desc.dimensions.size() == 3)
- {
- return nnfw::cpp14::make_unique<nnfw::ncnn::Mat>(desc.dimensions[2], desc.dimensions[1],
- desc.dimensions[0], data);
- }
- else // rank == 4 and N == 1
- {
- return nnfw::cpp14::make_unique<nnfw::ncnn::Mat>(desc.dimensions[3], desc.dimensions[2],
- desc.dimensions[1], data);
- }
-}
-} // namespace
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-namespace kernel
-{
-
-void AddLayer::addFloat32()
-{
- assert(_activation == ir::Activation::NONE);
-
- // ncnn kernel support
- // 1. rank < 4
- // 2. broadcasting
- // 2-1 lhs, rhs have same rank, or
- // 2-2 model layout and backend layout is same
- // For safety, block all broadcasting (enable when ready)
-
- assert(_lhsDescr.dimensions.size() < 4 ||
- (_lhsDescr.dimensions.size() == 4 && _lhsDescr.dimensions[0] == 1));
- assert(_rhsDescr.dimensions.size() < 4 ||
- (_rhsDescr.dimensions.size() == 4 && _rhsDescr.dimensions[0] == 1));
- assert((_lhsDescr.dimensions.size() == _rhsDescr.dimensions.size()));
-
- nnfw::ncnn::BinaryOpParam param;
- param.op_type = nnfw::ncnn::BinaryOp::Operation_ADD;
-
- auto lhs_mat = convertMatIgnoreLayout(_lhsDescr, _lhsData.v);
- auto rhs_mat = convertMatIgnoreLayout(_rhsDescr, _rhsData.v);
- auto out_mat = convertMatIgnoreLayout(_outputDescr, _outputData.v);
-
- ::nnfw::ncnn::ncnn_binary_op(param, *lhs_mat.get(), *rhs_mat.get(), *out_mat.get());
-}
-
-void AddLayer::addQuant8()
-{
- // quant8 add is not implemented yet
- throw std::runtime_error{"NYI"};
-}
-
-void AddLayer::configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData,
- const TensorDescriptor &rhsDescr, const ir::Activation activation,
- uint8_t *outputData, const TensorDescriptor &outputDescr,
- const ir::Layout backendLayout)
-{
- _lhsData.u8 = lhsData;
- _lhsDescr = lhsDescr;
- _rhsData.u8 = rhsData;
- _rhsDescr = rhsDescr;
- _inputType = lhsDescr.type;
- _activation = activation;
- _outputData.u8 = outputData;
- _outputDescr = outputDescr;
- _backendLayout = backendLayout;
-}
-
-void AddLayer::run()
-{
- if (_inputType == OperandType::FLOAT32)
- {
- addFloat32();
- }
- else if (_inputType == OperandType::QUANT8_ASYMM)
- {
- addQuant8();
- }
-}
-
-} // namespace kernel
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/srcn/kernel/AddLayer.h b/runtime/neurun/backend/srcn/kernel/AddLayer.h
deleted file mode 100644
index 1cae171b5..000000000
--- a/runtime/neurun/backend/srcn/kernel/AddLayer.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_SRCN_KERNEL_ADD_LAYER_H__
-#define __NEURUN_BACKEND_SRCN_KERNEL_ADD_LAYER_H__
-
-#include <exec/IFunction.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-namespace kernel
-{
-
-class AddLayer : public ::neurun::exec::IFunction
-{
-public:
- AddLayer() : _lhsData(), _rhsData(), _outputData(), _lhsDescr(), _rhsDescr(), _outputDescr()
- {
- // DO NOTHING
- }
-
-public:
- void addFloat32();
-
- void addQuant8();
-
- void configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData,
- const TensorDescriptor &rhsDescr, const ir::Activation activation,
- uint8_t *outputData, const TensorDescriptor &outputDescr,
- const ir::Layout backendLayout);
-
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- DataPtr _lhsData;
- DataPtr _rhsData;
- DataPtr _outputData;
-
- TensorDescriptor _lhsDescr;
- TensorDescriptor _rhsDescr;
- TensorDescriptor _outputDescr;
-
- ir::Activation _activation{ir::Activation::NONE};
-
- OperandType _inputType{OperandType::FLOAT32};
-
- ir::Layout _backendLayout{ir::Layout::UNKNOWN};
-};
-
-} // namespace kernel
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_SRCN_KERNEL_ADD_LAYER_H__
diff --git a/runtime/neurun/backend/srcn/kernel/ConvolutionLayer.cc b/runtime/neurun/backend/srcn/kernel/ConvolutionLayer.cc
deleted file mode 100644
index 4e70f6319..000000000
--- a/runtime/neurun/backend/srcn/kernel/ConvolutionLayer.cc
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ConvolutionLayer.h"
-
-#include "OperationUtils.h"
-#include <ncnn/srcn/srcn_conv.h>
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-namespace kernel
-{
-
-ConvolutionLayer::ConvolutionLayer()
- : _inputData(), _kernelData(), _biasData(), _outputData(), _inputDescr(), _kernelDescr(),
- _biasDescr(), _outputDescr(), _paddingType(0), _paddingLeft(0), _paddingTop(0),
- _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0),
- _inputType(OperandType::FLOAT32), _layout(nnfw::srcn::col_major), _winograd_weights(nullptr),
- _sparse_weights(nullptr)
-{
- // DO NOTHING
-}
-
-ConvolutionLayer::~ConvolutionLayer()
-{
- // TODO Move managing constant _winograd_data and sparse
- nnfw::srcn::winograd_release(_winograd_weights);
- size_t depth_index = _layout == nnfw::srcn::col_major ? 3 : 1;
- nnfw::srcn::sparse_release(_outputDescr.dimensions[depth_index], _sparse_weights);
-}
-
-void ConvolutionLayer::convFloat32()
-{
- nnfw::srcn::convMat_t in_mat, out_mat, kernel_mat;
- nnfw::srcn::convParams_t in_param;
-
- assert(_layout == nnfw::srcn::col_major || _layout == nnfw::srcn::row_major);
- size_t height_index = _layout == nnfw::srcn::col_major ? 1 : 2;
- size_t width_index = _layout == nnfw::srcn::col_major ? 2 : 3;
- size_t depth_index = _layout == nnfw::srcn::col_major ? 3 : 1;
- size_t kernel_input_depth_index = _layout == nnfw::srcn::col_major ? 3 : 1;
- size_t kernel_output_depth_index = 0;
-
- const int batches = MatchingDim(_inputDescr, 0, _outputDescr, 0);
- const int input_height = _inputDescr.dimensions[height_index];
- const int input_width = _inputDescr.dimensions[width_index];
- const int input_depth =
- MatchingDim(_inputDescr, depth_index, _kernelDescr, kernel_input_depth_index);
- in_mat.c = input_depth;
- in_mat.w = input_width;
- in_mat.h = input_height;
- in_mat.n = batches;
- in_mat.data = _inputData.f;
-
- const int output_height = _outputDescr.dimensions[height_index];
- const int output_width = _outputDescr.dimensions[width_index];
- const int output_depth =
- MatchingDim(_kernelDescr, kernel_output_depth_index, _outputDescr, depth_index);
- out_mat.c = output_depth;
- out_mat.w = output_width;
- out_mat.h = output_height;
- out_mat.n = batches;
- out_mat.data = _outputData.f;
-
- const int outch = _kernelDescr.dimensions[kernel_output_depth_index];
- const int inch = _kernelDescr.dimensions[kernel_input_depth_index];
- const int ker_height = _kernelDescr.dimensions[height_index];
- const int ker_width = _kernelDescr.dimensions[width_index];
- kernel_mat.c = input_depth;
- kernel_mat.w = ker_width;
- kernel_mat.h = ker_height;
- kernel_mat.n = output_depth;
- kernel_mat.data = _kernelData.f;
-
- in_param.kernel_w = ker_width;
- in_param.kernel_h = ker_height;
- in_param.stride_w = _strideWidth;
- in_param.stride_h = _strideHeight;
- in_param.padding = _paddingType;
- in_param.pad_w = _paddingLeft;
- in_param.pad_h = _paddingTop;
- in_param.dilation_w = 1;
- in_param.dilation_h = 1;
-
- nnfw::srcn::winogradParams_t winograd_param;
- winograd_param.kernel_w = ker_width;
- winograd_param.kernel_h = ker_height;
- winograd_param.stride_w = _strideWidth;
- winograd_param.stride_h = _strideHeight;
- winograd_param.dilation_w = 1;
- winograd_param.dilation_h = 1;
- winograd_param.batch = batches;
- winograd_param.w = ker_width;
- winograd_param.h = ker_height;
- winograd_param.inch = inch;
- winograd_param.outch = outch;
- winograd_param.num_threads = 4;
-
- winograd_param.conv_type = _layout;
- winograd_param.weight_data = _kernelData.f;
-
- // Without winograd
- if (nnfw::srcn::check_winograd(winograd_param))
- {
- _winograd_weights = nnfw::srcn::trans_weight2winograd(winograd_param, nullptr);
- }
- _sparse_weights = nnfw::srcn::trans_weight2sparse(kernel_mat);
-
- nnfw::srcn::srcn_convolution2D(in_mat, kernel_mat, out_mat, in_param, _winograd_weights, 4,
- _layout);
-
- // Add biases
- if (_biasData.f == nullptr)
- {
- return;
- }
- // TODO Optimize
- uint32_t strides[4] = {
- _outputDescr.dimensions[1] * _outputDescr.dimensions[2] * _outputDescr.dimensions[3],
- _outputDescr.dimensions[2] * _outputDescr.dimensions[3], _outputDescr.dimensions[3], 1};
- if (_layout == nnfw::srcn::convType_t::col_major)
- {
- for (uint32_t c = 0; c < _outputDescr.dimensions[3]; ++c)
- {
- if (_biasData.f[c] != 0)
- {
- for (uint32_t b = 0; b < _outputDescr.dimensions[0]; ++b)
- {
- for (uint32_t h = 0; h < _outputDescr.dimensions[1]; ++h)
- {
- for (uint32_t w = 0; w < _outputDescr.dimensions[2]; ++w)
- {
- _outputData.f[b * strides[0] + h * strides[1] + w * strides[2] + c * strides[3]] +=
- _biasData.f[c];
- }
- }
- }
- }
- }
- }
- else if (_layout == nnfw::srcn::convType_t::row_major)
- {
- for (uint32_t c = 0; c < _outputDescr.dimensions[1]; ++c)
- {
- if (_biasData.f[c] != 0)
- {
- for (uint32_t b = 0; b < _outputDescr.dimensions[0]; ++b)
- {
- for (uint32_t h = 0; h < _outputDescr.dimensions[2]; ++h)
- {
- for (uint32_t w = 0; w < _outputDescr.dimensions[3]; ++w)
- {
- _outputData.f[b * strides[0] + c * strides[1] + h * strides[2] + w * strides[3]] +=
- _biasData.f[c];
- }
- }
- }
- }
- }
- }
- else
- {
- throw std::runtime_error("Wrong Layout");
- }
-}
-
-void ConvolutionLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr,
- uint8_t *kernelData, const TensorDescriptor kernelDescr,
- uint8_t *biasData, const TensorDescriptor biasDescr,
- const uint32_t paddingType, const uint32_t paddingLeft,
- const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, uint8_t *outputData,
- const TensorDescriptor outputDescr, ir::Layout layout)
-{
- assert(inputDescr.dimensions.size() == 4);
- assert(kernelDescr.dimensions.size() == 4);
- assert(biasDescr.dimensions.size() == 1);
- assert(outputDescr.dimensions.size() == 4);
- assert(inputDescr.type == kernelDescr.type && inputDescr.type == outputDescr.type);
- // TODO Add assertions validating height and width with padding
- _layout = convertLayout(layout);
- _inputData.u8 = inputData;
- _inputDescr = inputDescr;
- _inputType = inputDescr.type;
- _kernelData.u8 = kernelData;
- _kernelDescr = kernelDescr;
- _biasData.u8 = biasData;
- _biasDescr = biasDescr;
- _paddingType = paddingType;
- _paddingLeft = paddingLeft;
- _paddingRight = paddingRight;
- _paddingTop = paddingTop;
- _paddingBottom = paddingBottom;
- _strideWidth = strideWidth;
- _strideHeight = strideHeight;
- _outputData.u8 = outputData;
- _outputDescr = outputDescr;
-}
-
-void ConvolutionLayer::run()
-{
- if (_inputType == OperandType::FLOAT32)
- {
- convFloat32();
- }
- else if (_inputType == OperandType::QUANT8_ASYMM)
- {
- throw std::runtime_error("NYI");
- }
-}
-
-} // namespace kernel
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/srcn/kernel/ConvolutionLayer.h b/runtime/neurun/backend/srcn/kernel/ConvolutionLayer.h
deleted file mode 100644
index 4edafaa87..000000000
--- a/runtime/neurun/backend/srcn/kernel/ConvolutionLayer.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_SRCN_KERNEL_CONVOLUTION_LAYER_H__
-#define __NEURUN_BACKEND_SRCN_KERNEL_CONVOLUTION_LAYER_H__
-
-#include <exec/IFunction.h>
-#include <ncnn/srcn/conv_type.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-namespace kernel
-{
-
-class ConvolutionLayer : public ::neurun::exec::IFunction
-{
-public:
- ConvolutionLayer();
- ~ConvolutionLayer();
-
-public:
- void convFloat32();
- void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *kernelData,
- const TensorDescriptor kernelDescr, uint8_t *biasData,
- const TensorDescriptor biasDescr, const uint32_t paddingType,
- const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH,
- uint8_t *outputData, const TensorDescriptor outputDescr, ir::Layout layout);
-
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- DataPtr _inputData;
- DataPtr _kernelData;
- DataPtr _biasData;
- DataPtr _outputData;
-
- TensorDescriptor _inputDescr;
- TensorDescriptor _kernelDescr;
- TensorDescriptor _biasDescr;
- TensorDescriptor _outputDescr;
-
- uint32_t _paddingType;
- uint32_t _paddingLeft;
- uint32_t _paddingTop;
- uint32_t _paddingRight;
- uint32_t _paddingBottom;
-
- uint32_t _strideWidth;
- uint32_t _strideHeight;
-
- OperandType _inputType;
- nnfw::srcn::convType_t _layout;
-
- float *_winograd_weights;
- void *_sparse_weights;
-};
-
-} // namespace kernel
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_SRCN_KERNEL_CONVOLUTION_LAYER_H__
diff --git a/runtime/neurun/backend/srcn/kernel/DepthwiseConvolutionLayer.cc b/runtime/neurun/backend/srcn/kernel/DepthwiseConvolutionLayer.cc
deleted file mode 100644
index a1718c500..000000000
--- a/runtime/neurun/backend/srcn/kernel/DepthwiseConvolutionLayer.cc
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DepthwiseConvolutionLayer.h"
-
-#include <ncnn/srcn/srcn_conv.h>
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-namespace kernel
-{
-
-DepthwiseConvolutionLayer::DepthwiseConvolutionLayer()
- : _inputData(), _kernelData(), _biasData(), _outputData(), _inputDescr(), _kernelDescr(),
- _biasDescr(), _outputDescr(), _paddingType(0), _paddingLeft(0), _paddingTop(0),
- _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0),
- _inputType(OperandType::FLOAT32), _layout(nnfw::srcn::col_major)
-{
- // DO NOTHING
-}
-
-void DepthwiseConvolutionLayer::convFloat32()
-{
- nnfw::srcn::convMat_t in_mat, out_mat, kernel_mat, bias_mat;
- nnfw::srcn::convParams_t in_param;
-
- assert(_layout == nnfw::srcn::col_major || _layout == nnfw::srcn::row_major);
- size_t height_index = _layout == nnfw::srcn::col_major ? 1 : 2;
- size_t width_index = _layout == nnfw::srcn::col_major ? 2 : 3;
- size_t depth_index = _layout == nnfw::srcn::col_major ? 3 : 1;
-
- const int batches = MatchingDim(_inputDescr, 0, _outputDescr, 0);
- const int input_height = _inputDescr.dimensions[height_index];
- const int input_width = _inputDescr.dimensions[width_index];
- const int input_depth = _inputDescr.dimensions[depth_index];
- in_mat.c = input_depth;
- in_mat.w = input_width;
- in_mat.h = input_height;
- in_mat.n = batches;
- in_mat.data = _inputData.f;
-
- const int output_height = _outputDescr.dimensions[height_index];
- const int output_width = _outputDescr.dimensions[width_index];
- const int output_depth = MatchingDim(_kernelDescr, depth_index, _outputDescr, depth_index);
- out_mat.c = output_depth;
- out_mat.w = output_width;
- out_mat.h = output_height;
- out_mat.n = batches;
- out_mat.data = _outputData.f;
-
- const int ker_height = _kernelDescr.dimensions[height_index];
- const int ker_width = _kernelDescr.dimensions[width_index];
- kernel_mat.c = MatchingDim(_kernelDescr, depth_index, _outputDescr, depth_index);
- kernel_mat.w = ker_width;
- kernel_mat.h = ker_height;
- kernel_mat.n = 1;
- kernel_mat.data = _kernelData.f;
-
- const int bias_depth = MatchingDim(_biasDescr, 0, _outputDescr, depth_index);
- bias_mat.c = bias_depth;
- bias_mat.data = _biasData.f;
-
- in_param.kernel_w = ker_width;
- in_param.kernel_h = ker_height;
- in_param.stride_w = _strideWidth;
- in_param.stride_h = _strideHeight;
- in_param.padding = _paddingType;
- in_param.pad_w = _paddingLeft;
- in_param.pad_h = _paddingTop;
- in_param.dilation_w = 1;
- in_param.dilation_h = 1;
-
- nnfw::srcn::srcn_depthwise_conv(in_mat, kernel_mat, out_mat, bias_mat, in_param, 4, _layout);
-
- // Add biases
- if (_biasData.f == nullptr)
- {
- return;
- }
- // TODO Optimize
- uint32_t strides[4] = {
- _outputDescr.dimensions[1] * _outputDescr.dimensions[2] * _outputDescr.dimensions[3],
- _outputDescr.dimensions[2] * _outputDescr.dimensions[3], _outputDescr.dimensions[3], 1};
- if (_layout == nnfw::srcn::convType_t::col_major)
- {
- for (uint32_t c = 0; c < _outputDescr.dimensions[3]; ++c)
- {
- if (_biasData.f[c] != 0)
- {
- for (uint32_t b = 0; b < _outputDescr.dimensions[0]; ++b)
- {
- for (uint32_t h = 0; h < _outputDescr.dimensions[1]; ++h)
- {
- for (uint32_t w = 0; w < _outputDescr.dimensions[2]; ++w)
- {
- _outputData.f[b * strides[0] + h * strides[1] + w * strides[2] + c * strides[3]] +=
- _biasData.f[c];
- }
- }
- }
- }
- }
- }
- else if (_layout == nnfw::srcn::convType_t::row_major)
- {
- for (uint32_t c = 0; c < _outputDescr.dimensions[1]; ++c)
- {
- if (_biasData.f[c] != 0)
- {
- for (uint32_t b = 0; b < _outputDescr.dimensions[0]; ++b)
- {
- for (uint32_t h = 0; h < _outputDescr.dimensions[2]; ++h)
- {
- for (uint32_t w = 0; w < _outputDescr.dimensions[3]; ++w)
- {
- _outputData.f[b * strides[0] + c * strides[1] + h * strides[2] + w * strides[3]] +=
- _biasData.f[c];
- }
- }
- }
- }
- }
- }
- else
- {
- throw std::runtime_error("Wrong Layout");
- }
-}
-
-void DepthwiseConvolutionLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr,
- uint8_t *kernelData, const TensorDescriptor kernelDescr,
- uint8_t *biasData, const TensorDescriptor biasDescr,
- const uint32_t paddingType, const uint32_t paddingLeft,
- const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, uint8_t *outputData,
- const TensorDescriptor outputDescr, ir::Layout layout)
-{
- assert(layout == ir::Layout::NHWC || layout == ir::Layout::NCHW);
-
- const auto height_index = layout == ir::Layout::NHWC ? 1 : 2;
- const auto width_index = layout == ir::Layout::NHWC ? 2 : 3;
-
- if (layout == ir::Layout::NHWC)
- {
- throw std::runtime_error("DepthwiseConv of ncnn does not support layout yet in NHWC");
- }
-
- if (kernelDescr.dimensions[height_index] != 3 || kernelDescr.dimensions[width_index] != 3)
- {
- throw std::runtime_error("DepthwiseConv of ncnn supports only 3x3 kernel now");
- }
-
- assert(inputDescr.dimensions.size() == 4);
- assert(kernelDescr.dimensions.size() == 4);
- assert(biasDescr.dimensions.size() == 1);
- assert(outputDescr.dimensions.size() == 4);
- assert(inputDescr.type == kernelDescr.type && inputDescr.type == outputDescr.type);
- // TODO Add assertions validating height and width with padding
- _layout = convertLayout(layout);
- _inputData.u8 = inputData;
- _inputDescr = inputDescr;
- _inputType = inputDescr.type;
- _kernelData.u8 = kernelData;
- _kernelDescr = kernelDescr;
- _biasData.u8 = biasData;
- _biasDescr = biasDescr;
- _paddingType = paddingType;
- _paddingLeft = paddingLeft;
- _paddingRight = paddingRight;
- _paddingTop = paddingTop;
- _paddingBottom = paddingBottom;
- _strideWidth = strideWidth;
- _strideHeight = strideHeight;
- _outputData.u8 = outputData;
- _outputDescr = outputDescr;
-}
-
-void DepthwiseConvolutionLayer::run()
-{
- if (_inputType == OperandType::FLOAT32)
- {
- convFloat32();
- }
- else if (_inputType == OperandType::QUANT8_ASYMM)
- {
- throw std::runtime_error("NYI");
- }
-}
-
-} // namespace kernel
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/srcn/kernel/DepthwiseConvolutionLayer.h b/runtime/neurun/backend/srcn/kernel/DepthwiseConvolutionLayer.h
deleted file mode 100644
index e94acff08..000000000
--- a/runtime/neurun/backend/srcn/kernel/DepthwiseConvolutionLayer.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_SRCN_KERNEL_DEPTHWISE_CONVOLUTION_LAYER_H__
-#define __NEURUN_BACKEND_SRCN_KERNEL_DEPTHWISE_CONVOLUTION_LAYER_H__
-
-#include <exec/IFunction.h>
-#include <ncnn/srcn/conv_type.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-namespace kernel
-{
-
-class DepthwiseConvolutionLayer : public ::neurun::exec::IFunction
-{
-public:
- DepthwiseConvolutionLayer();
-
-public:
- void convFloat32();
- void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *kernelData,
- const TensorDescriptor kernelDescr, uint8_t *biasData,
- const TensorDescriptor biasDescr, const uint32_t paddingType,
- const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH,
- uint8_t *outputData, const TensorDescriptor outputDescr, ir::Layout layout);
-
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- DataPtr _inputData;
- DataPtr _kernelData;
- DataPtr _biasData;
- DataPtr _outputData;
-
- TensorDescriptor _inputDescr;
- TensorDescriptor _kernelDescr;
- TensorDescriptor _biasDescr;
- TensorDescriptor _outputDescr;
-
- uint32_t _paddingType;
- uint32_t _paddingLeft;
- uint32_t _paddingTop;
- uint32_t _paddingRight;
- uint32_t _paddingBottom;
-
- uint32_t _strideWidth;
- uint32_t _strideHeight;
-
- OperandType _inputType;
- nnfw::srcn::convType_t _layout;
-};
-
-} // namespace kernel
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_SRCN_KERNEL_DEPTHWISE_CONVOLUTION_LAYER_H__
diff --git a/runtime/neurun/backend/srcn/kernel/InstanceNormLayer.cc b/runtime/neurun/backend/srcn/kernel/InstanceNormLayer.cc
deleted file mode 100644
index c83fe6d67..000000000
--- a/runtime/neurun/backend/srcn/kernel/InstanceNormLayer.cc
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "InstanceNormLayer.h"
-
-#include "OperationUtils.h"
-#include "ncnn/layer/instance_norm.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-namespace kernel
-{
-
-InstanceNormLayer::InstanceNormLayer()
- : _inputData(), _gammaData(), _betaData(), _outputData(), _inputDescr(), _gammaDescr(),
- _betaDescr(), _outputDescr(), _epsilon(1e-5), _activation(ir::Activation::NONE),
- _inputType(OperandType::FLOAT32), _backendLayout(ir::Layout::UNKNOWN)
-{
- // DO NOTHING
-}
-
-void InstanceNormLayer::instanceNormFloat32()
-{
- // Call kernel for NCHW data layout
- if (_backendLayout == ir::Layout::NCHW)
- {
- // Supports single batch only
- assert(_inputDescr.dimensions[0] == 1);
- const int input_channels = _inputDescr.dimensions[1];
- const int input_height = _inputDescr.dimensions[2];
- const int input_width = _inputDescr.dimensions[3];
- nnfw::ncnn::Mat in_mat(input_width, input_height, input_channels, _inputData.f);
-
- const int gamma_channels = _gammaDescr.dimensions[0];
- nnfw::ncnn::Mat gamma_mat(gamma_channels, _gammaData.f);
-
- const int beta_channels = _betaDescr.dimensions[0];
- nnfw::ncnn::Mat beta_mat(beta_channels, _betaData.f);
-
- assert(_outputDescr.dimensions[0] == 1);
- const int output_channels = _outputDescr.dimensions[1];
- const int output_height = _outputDescr.dimensions[2];
- const int output_width = _outputDescr.dimensions[3];
- nnfw::ncnn::Mat out_mat(output_width, output_height, output_channels, _outputData.f);
-
- if (_activation == ir::Activation::NONE)
- {
- nnfw::ncnn::ncnn_instance_norm_rowmajor(in_mat, out_mat, gamma_mat, beta_mat, input_channels,
- _epsilon);
- }
- else if (_activation == ir::Activation::RELU)
- {
- nnfw::ncnn::ncnn_instance_norm_with_relu_rowmajor(in_mat, out_mat, gamma_mat, beta_mat,
- input_channels, _epsilon, 0.f);
- }
- else
- {
- std::runtime_error("Unsupported activation type");
- }
- }
- // Call kernel for NHWC data layout
- else if (_backendLayout == ir::Layout::NHWC)
- {
- // Supports single batch only
- assert(_inputDescr.dimensions[0] == 1);
- const int input_height = _inputDescr.dimensions[1];
- const int input_width = _inputDescr.dimensions[2];
- const int input_channels = _inputDescr.dimensions[3];
- nnfw::ncnn::Mat in_mat(input_channels, input_width, input_height, _inputData.f);
-
- const int gamma_channels = _gammaDescr.dimensions[0];
- nnfw::ncnn::Mat gamma_mat(gamma_channels, _gammaData.f);
-
- const int beta_channels = _betaDescr.dimensions[0];
- nnfw::ncnn::Mat beta_mat(beta_channels, _betaData.f);
-
- assert(_outputDescr.dimensions[0] == 1);
- const int output_height = _outputDescr.dimensions[1];
- const int output_width = _outputDescr.dimensions[2];
- const int output_channels = _outputDescr.dimensions[3];
- nnfw::ncnn::Mat out_mat(output_channels, output_width, output_height, _outputData.f);
-
- if (_activation == ir::Activation::NONE)
- {
- nnfw::ncnn::ncnn_instance_norm_colmajor(in_mat, out_mat, gamma_mat, beta_mat, input_channels,
- _epsilon);
- }
- else if (_activation == ir::Activation::RELU)
- {
- nnfw::ncnn::ncnn_instance_norm_with_relu_colmajor(in_mat, out_mat, gamma_mat, beta_mat,
- input_channels, _epsilon, 0.f);
- }
- {
- std::runtime_error("Unsupported activation type");
- }
- }
- else
- {
- std::runtime_error("Unsupported backend layout");
- }
-}
-
-void InstanceNormLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr,
- uint8_t *gammaData, const TensorDescriptor gammaDescr,
- uint8_t *betaData, const TensorDescriptor betaDescr,
- uint8_t *outputData, const TensorDescriptor outputDescr,
- float epsilon, ir::Activation activation,
- ir::Layout backendLayout)
-{
- _inputData.u8 = inputData;
- _inputDescr = inputDescr;
- _gammaData.u8 = gammaData;
- _gammaDescr = gammaDescr;
- _betaData.u8 = betaData;
- _betaDescr = betaDescr;
- _outputData.u8 = outputData;
- _outputDescr = outputDescr;
- _epsilon = epsilon;
- _activation = activation;
- _backendLayout = backendLayout;
-}
-
-void InstanceNormLayer::run()
-{
- if (_inputType == OperandType::FLOAT32)
- {
- instanceNormFloat32();
- }
- else if (_inputType == OperandType::QUANT8_ASYMM)
- {
- throw std::runtime_error("NYI");
- }
-}
-
-} // namespace kernel
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/srcn/kernel/InstanceNormLayer.h b/runtime/neurun/backend/srcn/kernel/InstanceNormLayer.h
deleted file mode 100644
index 0ac0cef3f..000000000
--- a/runtime/neurun/backend/srcn/kernel/InstanceNormLayer.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_SRCN_KERNEL_INSTANCENORM_LAYER_H__
-#define __NEURUN_BACKEND_SRCN_KERNEL_INSTANCENORM_LAYER_H__
-
-#include <exec/IFunction.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-namespace kernel
-{
-
-class InstanceNormLayer : public ::neurun::exec::IFunction
-{
-public:
- InstanceNormLayer();
-
-public:
- void instanceNormFloat32();
- void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *gammaData,
- const TensorDescriptor gammaDescr, uint8_t *betaData,
- const TensorDescriptor betaDescr, uint8_t *outputData,
- const TensorDescriptor outputDescr, float epsilon, ir::Activation activation,
- ir::Layout backendLayout);
-
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- DataPtr _inputData;
- DataPtr _gammaData;
- DataPtr _betaData;
- DataPtr _outputData;
-
- TensorDescriptor _inputDescr;
- TensorDescriptor _gammaDescr;
- TensorDescriptor _betaDescr;
- TensorDescriptor _outputDescr;
-
- float _epsilon;
- ir::Activation _activation;
-
- OperandType _inputType;
- ir::Layout _backendLayout;
-};
-
-} // namespace kernel
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_SRCN_KERNEL_INSTANCENORM_LAYER_H__
diff --git a/runtime/neurun/backend/srcn/kernel/OperationUtils.cc b/runtime/neurun/backend/srcn/kernel/OperationUtils.cc
deleted file mode 100644
index 684573a51..000000000
--- a/runtime/neurun/backend/srcn/kernel/OperationUtils.cc
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtils.h"
-
-#include <cmath>
-#include <algorithm>
-#include <cassert>
-#include <map>
-
-#include "util/Utils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-namespace kernel
-{
-
-uint32_t MatchingDim(const TensorDescriptor &descr1, int index1, const TensorDescriptor &descr2,
- int index2)
-{
- UNUSED_RELEASE(descr2);
- UNUSED_RELEASE(index2);
- assert(descr1.dimensions[index1] == descr2.dimensions[index2]);
- return descr1.dimensions[index1];
-}
-
-std::vector<int32_t> getFilterPermutation(FilterLayout from_layout, FilterLayout to_layout)
-{
- static std::map<std::pair<FilterLayout, FilterLayout>, std::vector<int32_t>> filter_permutation =
- {{std::make_pair(FilterLayout::OHWI, FilterLayout::HWOI), {1, 2, 0, 3}},
- {std::make_pair(FilterLayout::OHWI, FilterLayout::HWIO), {1, 2, 3, 0}},
- {std::make_pair(FilterLayout::OHWI, FilterLayout::OIHW), {0, 3, 1, 2}},
- {std::make_pair(FilterLayout::OHWI, FilterLayout::IOHW), {3, 0, 1, 2}},
- {std::make_pair(FilterLayout::OIHW, FilterLayout::HWOI), {2, 3, 0, 1}},
- {std::make_pair(FilterLayout::OIHW, FilterLayout::HWIO), {2, 3, 1, 0}},
- {std::make_pair(FilterLayout::OIHW, FilterLayout::OHWI), {0, 2, 3, 1}},
- {std::make_pair(FilterLayout::OIHW, FilterLayout::IOHW), {1, 0, 2, 3}}};
-
- const auto pair = std::make_pair(from_layout, to_layout);
- const auto it = filter_permutation.find(pair);
- if (it == filter_permutation.end())
- {
- return std::vector<int32_t>{};
- }
- return it->second;
-}
-
-Coordinates convertCoordinates(const Coordinates &coordinates, FilterLayout from_layout,
- FilterLayout to_layout)
-{
- assert(coordinates.size() == 4);
- if (from_layout == to_layout)
- {
- return coordinates;
- }
-
- const auto permutation = getFilterPermutation(from_layout, to_layout);
- if (permutation.size() == 0)
- {
- throw std::runtime_error("Not supported FilterLayout");
- }
- return Coordinates{coordinates[permutation[0]], coordinates[permutation[1]],
- coordinates[permutation[2]], coordinates[permutation[3]]};
-}
-
-nnfw::srcn::convType_t convertLayout(ir::Layout layout)
-{
- if (layout == ir::Layout::NHWC)
- {
- return nnfw::srcn::col_major;
- }
- else if (layout == ir::Layout::NCHW)
- {
- return nnfw::srcn::row_major;
- }
- else
- {
- throw std::runtime_error("Not supported layout");
- }
-}
-
-TensorDescriptor getTensorDescriptor(const ir::Operand &o, ir::Layout frontend_layout,
- ir::Layout backend_layout)
-{
- TensorDescriptor descriptor;
-
- auto dims = o.shape().dims();
- if (frontend_layout == ir::Layout::NHWC && backend_layout == ir::Layout::NCHW &&
- o.shape().rank() == 4)
- {
- // NHWC -> NCHW
- uint32_t permutation[4] = {0, 3, 1, 2};
- for (int i = 0; i < o.shape().rank(); ++i)
- {
- dims.at(i) = o.shape().dim(permutation[i]);
- }
- }
- else if (frontend_layout == ir::Layout::NCHW && backend_layout == ir::Layout::NHWC &&
- o.shape().rank() == 4)
- {
- // NCHW -> NHWC
- uint32_t permutation[4] = {0, 2, 3, 1};
- for (int i = 0; i < o.shape().rank(); ++i)
- {
- dims.at(i) = o.shape().dim(permutation[i]);
- }
- }
- descriptor.dimensions = std::vector<uint32_t>(dims.begin(), dims.end());
- descriptor.type = static_cast<OperandType>(static_cast<int32_t>(o.typeInfo().type()));
- descriptor.scale = o.typeInfo().scale();
- descriptor.offset = o.typeInfo().offset();
-
- // CPU backend assume that neurun internal descriptor's rank is always same or less than 4
- assert(descriptor.dimensions.size() <= 4);
-
- return descriptor;
-}
-
-} // namespace kernel
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/srcn/kernel/OperationUtils.h b/runtime/neurun/backend/srcn/kernel/OperationUtils.h
deleted file mode 100644
index aa163a1f3..000000000
--- a/runtime/neurun/backend/srcn/kernel/OperationUtils.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_SRCN_OPERATION_UTILS_H__
-#define __NEURUN_BACKEND_SRCN_OPERATION_UTILS_H__
-
-#include <iostream>
-#include <limits>
-#include <vector>
-
-#include "ir/Operand.h"
-#include "ir/DataType.h"
-#include <ir/InternalType.h>
-#include <ncnn/srcn/conv_type.h>
-
-using OperandType = neurun::ir::DataType;
-using neurun::util::Coordinates;
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-namespace kernel
-{
-
-struct TensorDescriptor
-{
- OperandType type;
- std::vector<uint32_t> dimensions;
- float scale;
- int32_t offset;
-};
-
-union DataPtr {
- uint8_t *u8;
- int8_t *i8;
- int32_t *i32;
- float *f;
- void *v;
-};
-
-enum FilterLayout
-{
- OHWI = 0, // TfLite Kernel Layout when using NHWC image layout
- HWOI, // SRCN Transpose Conv Kernel Layout when using NHWC image layout
- OIHW, // SRCN Conv Kernel Layout when using NCHW image layout
- HWIO, // SRCN Conv Kernel Layout when using NHWC image layout
- IOHW, // SRCN Transpose Conv Kernel Layout when using NCHW image layout
-};
-
-uint32_t MatchingDim(const TensorDescriptor &shape1, int index1, const TensorDescriptor &shape2,
- int index2);
-
-std::vector<int32_t> getFilterPermutation(FilterLayout from_layout, FilterLayout to_layout);
-
-Coordinates convertCoordinates(const Coordinates &from_coordinates, FilterLayout from_layout,
- FilterLayout to_layout);
-
-nnfw::srcn::convType_t convertLayout(ir::Layout layout);
-
-TensorDescriptor getTensorDescriptor(const ir::Operand &o, ir::Layout frontend_layout,
- ir::Layout backend_layout);
-
-} // namespace kernel
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_SRCN_OPERATION_UTILS_H__
diff --git a/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.cc b/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.cc
deleted file mode 100644
index 26469f728..000000000
--- a/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.cc
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TransposeConvLayer.h"
-
-#include <cstring>
-#include "OperationUtils.h"
-#include "ncnn/srcn/srcn_conv.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-namespace kernel
-{
-
-TransposeConvLayer::TransposeConvLayer()
- : _inputData(), _kernelData(), _outputData(), _inputDescr(), _kernelDescr(), _outputDescr(),
- _paddingType(0), _paddingLeft(0), _paddingTop(0), _paddingRight(0), _paddingBottom(0),
- _strideWidth(0), _strideHeight(0), _inputType(OperandType::FLOAT32),
- _layout(nnfw::srcn::col_major)
-{
- // DO NOTHING
-}
-
-void TransposeConvLayer::convFloat32()
-{
- nnfw::srcn::convMat_t in_mat, out_mat, kernel_mat;
- nnfw::srcn::convParams_t in_param;
-
- assert(_layout == nnfw::srcn::col_major || _layout == nnfw::srcn::row_major);
- size_t height_index = _layout == nnfw::srcn::col_major ? 1 : 2;
- size_t width_index = _layout == nnfw::srcn::col_major ? 2 : 3;
- size_t depth_index = _layout == nnfw::srcn::col_major ? 3 : 1;
- size_t kernel_input_depth_index = _layout == nnfw::srcn::col_major ? 3 : 1;
- size_t kernel_output_depth_index = 0;
- const int batches = MatchingDim(_inputDescr, 0, _outputDescr, 0);
- const int input_height = _inputDescr.dimensions[height_index];
- const int input_width = _inputDescr.dimensions[width_index];
- const int input_depth =
- MatchingDim(_inputDescr, depth_index, _kernelDescr, kernel_input_depth_index);
- in_mat.c = input_depth;
- in_mat.w = input_width;
- in_mat.h = input_height;
- in_mat.n = batches;
- in_mat.data = _inputData.f;
-
- const int output_height = _outputDescr.dimensions[height_index];
- const int output_width = _outputDescr.dimensions[width_index];
- const int output_depth =
- MatchingDim(_kernelDescr, kernel_output_depth_index, _outputDescr, depth_index);
- out_mat.c = output_depth;
- out_mat.w = output_width;
- out_mat.h = output_height;
- out_mat.n = batches;
- out_mat.data = _outputData.f;
-
- const int ker_height = _kernelDescr.dimensions[height_index];
- const int ker_width = _kernelDescr.dimensions[width_index];
- kernel_mat.c = input_depth;
- kernel_mat.w = ker_width;
- kernel_mat.h = ker_height;
- kernel_mat.n = output_depth;
- kernel_mat.data = _kernelData.f;
-
- in_param.kernel_w = ker_width;
- in_param.kernel_h = ker_height;
- in_param.stride_w = _strideWidth;
- in_param.stride_h = _strideHeight;
- in_param.padding = _paddingType;
- in_param.pad_w = _paddingLeft;
- in_param.pad_h = _paddingTop;
- in_param.dilation_w = 1;
- in_param.dilation_h = 1;
-
- memset(_outputData.f, 0, out_mat.n * out_mat.h * out_mat.w * out_mat.c * sizeof(float));
-
- nnfw::srcn::srcn_deconvolution2D(in_mat, kernel_mat, out_mat, in_param, 4, _layout);
-}
-
-void TransposeConvLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr,
- uint8_t *kernelData, const TensorDescriptor kernelDescr,
- const uint32_t paddingType, const uint32_t paddingLeft,
- const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, uint8_t *outputData,
- const TensorDescriptor outputDescr, ir::Layout layout)
-{
- _layout = convertLayout(layout);
- _inputData.u8 = inputData;
- _inputDescr = inputDescr;
- _inputType = inputDescr.type;
- _kernelData.u8 = kernelData;
- _kernelDescr = kernelDescr;
- _paddingType = paddingType;
- _paddingLeft = paddingLeft;
- _paddingRight = paddingRight;
- _paddingTop = paddingTop;
- _paddingBottom = paddingBottom;
- _strideWidth = strideWidth;
- _strideHeight = strideHeight;
- _outputData.u8 = outputData;
- _outputDescr = outputDescr;
-}
-
-void TransposeConvLayer::run()
-{
- if (_inputType == OperandType::FLOAT32)
- {
- convFloat32();
- }
- else if (_inputType == OperandType::QUANT8_ASYMM)
- {
- throw std::runtime_error("NYI");
- }
-}
-
-} // namespace kernel
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.h b/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.h
deleted file mode 100644
index cd88d4127..000000000
--- a/runtime/neurun/backend/srcn/kernel/TransposeConvLayer.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_SRCN_KERNEL_TRANSPOSECONV_LAYER_H__
-#define __NEURUN_BACKEND_SRCN_KERNEL_TRANSPOSECONV_LAYER_H__
-
-#include <exec/IFunction.h>
-#include <ncnn/srcn/conv_type.h>
-
-#include "OperationUtils.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-namespace kernel
-{
-
-class TransposeConvLayer : public ::neurun::exec::IFunction
-{
-public:
- TransposeConvLayer();
-
-public:
- void convFloat32();
- void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *kernelData,
- const TensorDescriptor kernelDescr, const uint32_t paddingType,
- const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH,
- uint8_t *outputData, const TensorDescriptor outputDescr,
- ir::Layout backend_layout);
-
- void run();
- void runSync()
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-
-private:
- DataPtr _inputData;
- DataPtr _kernelData;
- DataPtr _outputData;
-
- TensorDescriptor _inputDescr;
- TensorDescriptor _kernelDescr;
- TensorDescriptor _outputDescr;
-
- uint32_t _paddingType;
- uint32_t _paddingLeft;
- uint32_t _paddingTop;
- uint32_t _paddingRight;
- uint32_t _paddingBottom;
-
- uint32_t _strideWidth;
- uint32_t _strideHeight;
-
- OperandType _inputType;
- nnfw::srcn::convType_t _layout;
-};
-
-} // namespace kernel
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_SRCN_KERNEL_TRANSPOSECONV_LAYER_H__
diff --git a/runtime/neurun/backend/srcn/operand/Tensor.cc b/runtime/neurun/backend/srcn/operand/Tensor.cc
deleted file mode 100644
index 8a53f97c5..000000000
--- a/runtime/neurun/backend/srcn/operand/Tensor.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Tensor.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-namespace operand
-{
-
-size_t Tensor::calcOffset(const neurun::util::Coordinates &coords) const
-{
- size_t rank = num_dimensions();
- size_t offset = 0;
- for (size_t i = 0; i < rank; ++i)
- {
- offset = offset * dimension(i) + coords[i];
- }
- offset *= sizeOfDataType(data_type());
- return offset;
-}
-
-void Tensor::access(const std::function<void(ITensor &)> &fn) { fn(*this); }
-
-} // namespace operand
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/backend/srcn/operand/Tensor.h b/runtime/neurun/backend/srcn/operand/Tensor.h
deleted file mode 100644
index e16234a81..000000000
--- a/runtime/neurun/backend/srcn/operand/Tensor.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_SRCN_OPERAND_TENSOR_H__
-#define __NEURUN_BACKEND_SRCN_OPERAND_TENSOR_H__
-
-#include <backend/operand/ITensor.h>
-#include <ir/Layout.h>
-#include "ir/OperandInfo.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace srcn
-{
-namespace operand
-{
-
-class Tensor : public ::neurun::backend::operand::ITensor
-{
-public:
- Tensor() = delete;
-
-public:
- Tensor(const ir::OperandInfo &info, ir::Layout layout) : _info(info), _layout(layout)
- {
- // DO NOTHING
- }
-
-public:
- void setBuffer(uint8_t *buffer) { _buffer = buffer; }
- ir::DataType data_type() const { return _info.typeInfo().type(); }
-
-public:
- uint8_t *buffer() const override { return _buffer; }
- /**
- * @brief Get dimension by index
- *
- * @param index Index to get diemension
- * @return size_t Dimension at index
- * @note N : dimension(0)
- * H : dimension(1)
- * W : dimension(2)
- * C : dimension(3)
- */
- size_t dimension(size_t index) const override { return _info.shape().dim(index); }
- size_t num_dimensions() const override { return _info.shape().rank(); }
- size_t total_size() const override { return _info.total_size(); }
- size_t calcOffset(const neurun::util::Coordinates &coords) const override;
- ir::Layout layout() const override { return _layout; }
- bool has_padding() const override { return false; }
- void access(const std::function<void(ITensor &tensor)> &fn) final;
-
-private:
- ir::OperandInfo _info;
- uint8_t *_buffer = nullptr;
- ir::Layout _layout;
-};
-
-} // namespace operand
-} // namespace srcn
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_SRCN_OPERAND_TENSOR_H__
diff --git a/runtime/neurun/core/CMakeLists.txt b/runtime/neurun/core/CMakeLists.txt
deleted file mode 100644
index 422d292bb..000000000
--- a/runtime/neurun/core/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-file(GLOB_RECURSE SOURCES "src/*.cc")
-
-add_library(neurun_core SHARED ${SOURCES})
-set_target_properties(neurun_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
-target_include_directories(neurun_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_include_directories(neurun_core PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
-target_link_libraries(neurun_core PUBLIC nnfw_lib_misc)
-target_link_libraries(neurun_core PUBLIC nnfw_lib_cpp14)
-target_link_libraries(neurun_core PRIVATE nnfw_lib_cker)
-target_link_libraries(neurun_core PRIVATE nnfw_common)
-target_link_libraries(neurun_core PRIVATE nnfw_coverage)
-target_link_libraries(neurun_core PRIVATE dl ${LIB_PTHREAD})
-
-if(ENVVAR_NEURUN_CONFIG)
- target_compile_definitions(neurun_core PRIVATE ENVVAR_FOR_DEFAULT_CONFIG)
-endif(ENVVAR_NEURUN_CONFIG)
-
-install(TARGETS neurun_core DESTINATION lib)
diff --git a/runtime/neurun/core/include/backend/Backend.h b/runtime/neurun/core/include/backend/Backend.h
deleted file mode 100644
index 9c4484fca..000000000
--- a/runtime/neurun/core/include/backend/Backend.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_BACKEND_H__
-#define __NEURUN_BACKEND_BACKEND_H__
-
-#include <memory>
-
-#include "ir/Operands.h"
-
-namespace neurun
-{
-namespace backend
-{
-
-namespace custom
-{
-class IKernelBuilder;
-}
-
-class Backend;
-struct IConfig;
-class IConstantInitializer;
-class IKernelGenerator;
-class IShapeFixer;
-class ITensorRegister;
-struct ITensorBuilder;
-
-class BackendContext
-{
-public:
- const Backend *backend;
- std::shared_ptr<ITensorBuilder> tensor_builder;
- std::shared_ptr<IConstantInitializer> constant_initializer;
- std::shared_ptr<IKernelGenerator> kernel_gen;
- std::shared_ptr<IShapeFixer> shape_fixer;
- std::shared_ptr<ITensorRegister> tensor_register;
-};
-
-class Backend
-{
-public:
- virtual ~Backend() = default;
- virtual std::shared_ptr<neurun::backend::IConfig> config() const = 0;
-
- virtual std::unique_ptr<BackendContext>
- newContext(const ir::Operands &operands,
- const std::shared_ptr<backend::custom::IKernelBuilder> &kb) const = 0;
-};
-
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_BACKEND_H__
diff --git a/runtime/neurun/core/include/backend/CustomKernelBuilder.h b/runtime/neurun/core/include/backend/CustomKernelBuilder.h
deleted file mode 100644
index 848ebd595..000000000
--- a/runtime/neurun/core/include/backend/CustomKernelBuilder.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CUSTOM_KERNEL_BUILDER_H__
-#define __NEURUN_BACKEND_CUSTOM_KERNEL_BUILDER_H__
-
-#include "exec/IFunction.h"
-
-#include "misc/tensor/Shape.h"
-#include "ir/DataType.h"
-
-#include <vector>
-#include <memory>
-
-namespace neurun
-{
-namespace backend
-{
-namespace custom
-{
-
-using Shape = nnfw::misc::tensor::Shape;
-
-struct TypeInfo
-{
- Shape shape;
- ir::DataType dtype;
-};
-
-struct CustomKernelConfigParams
-{
- std::vector<void *> input_allocations;
- std::vector<TypeInfo> input_types;
-
- std::vector<void *> output_allocations;
- std::vector<TypeInfo> output_types;
-
- char *userdata;
- size_t userdata_size;
-};
-
-class IKernelBuilder
-{
-public:
- virtual ~IKernelBuilder() = default;
- virtual std::unique_ptr<exec::IFunction> buildKernel(const std::string &id,
- CustomKernelConfigParams &&params) const = 0;
-};
-
-} // namespace custom
-
-} // namespace backend
-
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CUSTOM_KERNEL_BUILDER_H__
diff --git a/runtime/neurun/core/include/backend/ExecTime.h b/runtime/neurun/core/include/backend/ExecTime.h
deleted file mode 100644
index 4eaf49fab..000000000
--- a/runtime/neurun/core/include/backend/ExecTime.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_EXEC_TIME_H__
-#define __NEURUN_BACKEND_EXEC_TIME_H__
-
-#include "backend/Backend.h"
-#include "backend/IConfig.h"
-#include "JSONExecTime.h"
-#include <memory>
-#include <limits>
-#include <map>
-#include <unordered_map>
-#include <vector>
-
-namespace neurun
-{
-namespace backend
-{
-class ExecTime
-{
-public:
- explicit ExecTime(const std::vector<const Backend *> &backends) : _json(backends, _measurements)
- {
- }
-
-public:
- /**
- * @brief Get exec time of an operation with input size
- * or linearly interpolated value based on size if there is no record for given size
- *
- * @param[in] backend id of a backend
- * @param[in] operation name of an operation
- * @param[in] quant if input type quantized
- * @param[in] op_size sum of operation's flattened sizes of inputs and outputs
- * @return execution time for given input sizes
- * -1 if there are no records for given parameters (backend, op, quantization).
- */
- int64_t getOperationExecTime(const Backend *backend, const std::string &operation, bool quant,
- uint32_t op_size) const;
- /**
- * @brief Update exec time of the operation on a backend with given input size or
- * add new entity if there is no one.
- *
- * @param[in] backend id of a backend
- * @param[in] operation name of an operation
- * @param[in] quant if input type quantized
- * @param[in] op_size sum of operation's flattened sizes of inputs and outputs
- * @param[in] time real measured value
- */
- void updateOperationExecTime(const Backend *backend, const std::string &operation, bool quant,
- uint32_t op_size, int64_t time);
- /**
- * @brief Get the permute time from one backend to another
- *
- * @param[in] from_backend
- * @param[in] to_backend
- * @param[in] quant if input type quantized
- * @param[in] op_size sum of operation's flattened sizes of inputs and outputs
- * @return permutation time for operation size
- */
- int64_t getPermuteTime(const Backend *from_backend, const Backend *to_backend, bool quant,
- uint32_t op_size) const;
- /**
- * @brief Update permute time from one backend to another
- *
- * @param[in] from_backend
- * @param[in] to_backend
- * @param[in] quant if input type quantized
- * @param[in] time measured permutation time
- * @param[in] op_size sum of operation's flattened sizes of inputs and outputs
- */
- void updatePermuteTime(const Backend *from_backend, const Backend *to_backend, bool quant,
- uint32_t op_size, int64_t time);
- /**
- * @brief Get the max value of int32_t in int64_t
- * @return max value
- */
- static int64_t getMax() { return _MAX; }
- /**
- * @brief Update metrics file with new data.
- */
- void uploadOperationsExecTime() const { _json.uploadOperationsExecTime(); }
- static const int64_t NOT_FOUND = -1;
-
-private:
- /// @brief Measurement data, which is shared with serializer
- MeasurementData _measurements;
- // int64_t::max may cause integer overflow
- static const int64_t _MAX = std::numeric_limits<int32_t>::max();
- /// @brief Serializer
- JSON _json;
-};
-
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_EXEC_TIME_H__
diff --git a/runtime/neurun/core/include/backend/IConfig.h b/runtime/neurun/core/include/backend/IConfig.h
deleted file mode 100644
index 855f31ef9..000000000
--- a/runtime/neurun/core/include/backend/IConfig.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ICONFIG_H__
-#define __NEURUN_BACKEND_ICONFIG_H__
-
-#include "util/ITimer.h"
-#include <memory>
-#include <string>
-
-namespace neurun
-{
-namespace backend
-{
-
-struct IConfig
-{
- virtual ~IConfig() = default;
-
- virtual std::string id() = 0;
- virtual bool initialize() = 0;
- // Support permute kernel
- virtual bool SupportPermutation() = 0;
- // Support subtensor allocation
- virtual bool SupportSubTensorAlloc() = 0;
-
- // Timer is used for backend profiling. In case of default (nullptr) timer profiler won't work.
- virtual std::unique_ptr<util::ITimer> timer() { return nullptr; }
-};
-
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ICONFIG_H__
diff --git a/runtime/neurun/core/include/backend/IConstantInitializer.h b/runtime/neurun/core/include/backend/IConstantInitializer.h
deleted file mode 100644
index 3cc770b29..000000000
--- a/runtime/neurun/core/include/backend/IConstantInitializer.h
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ICONSTANT_INITIALIZER_H__
-#define __NEURUN_BACKEND_ICONSTANT_INITIALIZER_H__
-
-#include <unordered_map>
-#include <functional>
-
-#include "ITensorBuilder.h"
-#include "ir/Layout.h"
-#include "ir/Operand.h"
-#include "ir/Operands.h"
-#include "ir/OperationVisitor.h"
-#include "ir/OpSequence.h"
-#include "util/logging.h"
-#include "util/Utils.h"
-
-namespace
-{
-template <typename T>
-static void Init(const neurun::ir::Operand &model_obj, neurun::backend::operand::ITensor &obj,
- const bool copy,
- const neurun::ir::Layout frontend_layout = neurun::ir::Layout::UNKNOWN)
-{
- const auto shape = model_obj.shape();
- auto base = reinterpret_cast<const T *>(model_obj.data().base());
-
- obj.access([&](::neurun::backend::operand::ITensor &tensor) {
- switch (shape.rank())
- {
- case 0:
- {
- assert(model_obj.data().size() == sizeof(T));
- const auto value = *reinterpret_cast<const T *>(base);
- T *into = reinterpret_cast<T *>(tensor.buffer());
- *into = value;
- break;
- }
- case 1:
- {
- auto vec_size = shape.dim(0);
- for (int32_t n = 0; n < vec_size; ++n)
- {
- const T *from = reinterpret_cast<const T *>(base) + n;
- const auto value = *from;
-
- T *into = reinterpret_cast<T *>(tensor.buffer()) + n;
-
- *into = value;
- }
- break;
- }
- case 2:
- {
- const int32_t copy_len = shape.dim(1);
-
- for (auto i = 0; i < shape.dim(0); ++i)
- {
- neurun::util::Coordinates coords{i, 0};
- memcpy(tensor.buffer() + tensor.calcOffset(coords), base + i * copy_len,
- copy_len * sizeof(T));
- }
- break;
- }
- case 3:
- {
- const int32_t width = shape.dim(1);
- const int32_t copy_len = shape.dim(2);
-
- for (auto i = 0; i < shape.dim(0); ++i)
- {
- for (auto j = 0; j < shape.dim(1); ++j)
- {
- neurun::util::Coordinates coords{i, j, 0};
- memcpy(tensor.buffer() + tensor.calcOffset(coords),
- base + i * width * copy_len + j * copy_len, copy_len * sizeof(T));
- }
- }
- break;
- }
- case 4:
- {
- const int32_t height = shape.dim(1);
- const int32_t width = shape.dim(2);
- const int32_t copy_len = shape.dim(3);
- for (auto i = 0; i < shape.dim(0); ++i)
- {
- for (auto j = 0; j < shape.dim(1); ++j)
- {
- for (auto k = 0; k < shape.dim(2); ++k)
- {
- if (copy)
- {
- neurun::util::Coordinates coords{i, j, k, 0};
- memcpy(tensor.buffer() + tensor.calcOffset(coords),
- base + i * height * width * copy_len + j * width * copy_len + k * copy_len,
- copy_len * sizeof(T));
- }
- else
- {
- for (auto l = 0; l < shape.dim(3); ++l)
- {
- const auto coords = neurun::util::convertCoordinates(
- {i, j, k, l}, frontend_layout, tensor.layout());
- T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset(coords));
- T value = *(base + i * height * width * copy_len + j * width * copy_len +
- k * copy_len + l);
- *into = value;
- }
- }
- }
- }
- }
- break;
- }
- default:
- throw std::runtime_error{"Not yet supported"};
- }
- });
-}
-
-template <typename T>
-void copyInit(const neurun::ir::Operand &model_obj, neurun::backend::operand::ITensor &obj)
-{
- Init<T>(model_obj, obj, true);
-}
-
-template <typename T>
-void permuteInit(const neurun::ir::Operand &model_obj, neurun::backend::operand::ITensor &obj,
- const neurun::ir::Layout frontend_layout)
-{
- const bool copy = frontend_layout == obj.layout();
- Init<T>(model_obj, obj, copy, frontend_layout);
-}
-
-} // namespace
-
-namespace neurun
-{
-namespace backend
-{
-
-class IConstantInitializer : ir::OperationVisitor
-{
-public:
- virtual ~IConstantInitializer() = default;
-
-public:
- void run()
- {
- assert(tensor_builder().get());
- for (const auto &it : _init_map)
- {
- const auto &ind = it.first;
- const auto &fn = it.second;
-
- const auto &model_obj = operands().at(ind);
- auto tensor_obj = tensor_builder()->tensorAt(ind);
- fn(model_obj, *tensor_obj);
- VERBOSE(FillOperandData) << "Fill data for operand " << ind.value() << std::endl;
- }
- _init_map.clear();
- }
-
-public:
- using Initializer = std::function<void(const ir::Operand &, backend::operand::ITensor &)>;
-
- void generate(const ir::OpSequence &subg, const ir::Operands &operands)
- {
- _current_subg_layout = subg.getLayout();
- subg.accept(*this);
- for (const auto &e : subg.operations())
- {
- for (const auto &ind : e.node->getInputs())
- {
- const auto &obj = operands.at(ind);
- if (obj.isConstant() && !exist(ind))
- {
- registerPermuteInitializer(ind, obj);
- }
- }
- }
- }
-
-protected:
- using OperationVisitor::visit;
-
-protected:
- virtual const ir::Operands &operands() const = 0;
- virtual std::shared_ptr<ITensorBuilder> tensor_builder() const = 0;
-
-protected:
- void registerCopyInitializer(const ir::OperandIndex &index, const ir::Operand &obj)
- {
- // For only CONSTANTS
- // TODO Add to check if tensor has been allocated
- if (!obj.isConstant())
- return;
-
- const auto type = obj.typeInfo().type();
- using ir::DataType;
-
- switch (type)
- {
- case DataType::FLOAT32:
- _init_map[index] = copyInit<float>;
- break;
- case DataType::INT32:
- _init_map[index] = copyInit<int32_t>;
- break;
- case DataType::UINT32:
- _init_map[index] = copyInit<uint32_t>;
- break;
- case DataType::BOOL8:
- case DataType::QUANT8_ASYMM:
- _init_map[index] = copyInit<uint8_t>;
- break;
- case DataType::QUANT8_SYMM:
- _init_map[index] = copyInit<int8_t>;
- break;
- default:
- throw std::runtime_error("Not supported, yet");
- break;
- }
- }
-
-protected:
- void registerPermuteInitializer(const ir::OperandIndex &index, const ir::Operand &obj)
- {
- // For only CONSTANTS
- // TODO Add to check if tensor has been allocated
- if (!obj.isConstant())
- return;
-
- const auto type = obj.typeInfo().type();
- using ir::DataType;
- using namespace std::placeholders;
-
- switch (type)
- {
- case DataType::FLOAT32:
- _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_subg_layout);
- break;
- case DataType::INT32:
- _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_subg_layout);
- break;
- case DataType::UINT32:
- _init_map[index] = std::bind(permuteInit<uint32_t>, _1, _2, _current_subg_layout);
- break;
- case DataType::BOOL8:
- case DataType::QUANT8_ASYMM:
- _init_map[index] = std::bind(permuteInit<uint8_t>, _1, _2, _current_subg_layout);
- break;
- case DataType::QUANT8_SYMM:
- _init_map[index] = std::bind(permuteInit<int8_t>, _1, _2, _current_subg_layout);
- break;
- default:
- throw std::runtime_error("Not supported, yet");
- break;
- }
- }
-
-private:
- bool exist(const ir::OperandIndex &ind) { return _init_map.find(ind) != _init_map.end(); }
-
-protected:
- std::unordered_map<ir::OperandIndex, Initializer> _init_map;
- ir::Layout _current_subg_layout;
-};
-
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ICONSTANT_INITIALIZER_H__
diff --git a/runtime/neurun/core/include/backend/IKernelGenerator.h b/runtime/neurun/core/include/backend/IKernelGenerator.h
deleted file mode 100644
index 4540e81d2..000000000
--- a/runtime/neurun/core/include/backend/IKernelGenerator.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_IKERNEL_GENERATOR_H__
-#define __NEURUN_BACKEND_IKERNEL_GENERATOR_H__
-
-#include <memory>
-#include <functional>
-
-#include "ITensorBuilder.h"
-#include "compiler/IExecutionBuilder.h"
-#include "ir/OperationVisitor.h"
-#include "ir/OpSequence.h"
-#include "cpp14/memory.h"
-
-namespace neurun
-{
-namespace backend
-{
-
-class IKernelGenerator : public ir::OperationVisitor
-{
-public:
- virtual ~IKernelGenerator() = default;
-
- void generate(const ir::OpSequence &subg, neurun::compiler::IExecutionBuilder *executionBuilder)
- {
- _execution_builder = executionBuilder;
- subg.accept(*this);
- }
-
-protected:
- using OperationVisitor::visit;
-
-#define OP(InternalName) \
- void visit(const ir::operation::InternalName &) override \
- { \
- throw std::runtime_error("KernelGenerator: NYI for operation '" #InternalName "'"); \
- }
-#include "ir/Operations.lst"
-#undef OP
-
-protected:
- neurun::compiler::IExecutionBuilder *_execution_builder;
-};
-
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_IKERNEL_GENERATOR_H__
diff --git a/runtime/neurun/core/include/backend/IMemoryManager.h b/runtime/neurun/core/include/backend/IMemoryManager.h
deleted file mode 100644
index b06bab872..000000000
--- a/runtime/neurun/core/include/backend/IMemoryManager.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_IMEMORY_MANAGER_H__
-#define __NEURUN_BACKEND_IMEMORY_MANAGER_H__
-
-namespace neurun
-{
-namespace backend
-{
-
-struct IMemoryManager
-{
- virtual ~IMemoryManager() = default;
-
- virtual void allocate(void) = 0;
- virtual void deallocate(void) = 0;
-};
-
-} // namespace backend
-} // namespace neurun
-
-#include <unordered_set>
-#include <memory>
-
-namespace neurun
-{
-namespace backend
-{
-
-using MemoryManagerSet = std::unordered_set<std::unique_ptr<backend::IMemoryManager>>;
-
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_IMEMORY_MANAGER_H__
diff --git a/runtime/neurun/core/include/backend/IShapeFixer.h b/runtime/neurun/core/include/backend/IShapeFixer.h
deleted file mode 100644
index 8168e2a11..000000000
--- a/runtime/neurun/core/include/backend/IShapeFixer.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ISHAPE_FIXER_H__
-#define __NEURUN_BACKEND_ISHAPE_FIXER_H__
-
-#include <memory>
-#include <functional>
-
-#include "ir/LowerInfoMap.h"
-#include "ITensorBuilder.h"
-#include "ir/OperationVisitor.h"
-#include "ir/OpSequence.h"
-#include "cpp14/memory.h"
-
-namespace neurun
-{
-namespace backend
-{
-
-class IShapeFixer : public ir::OperationVisitor
-{
-public:
- virtual ~IShapeFixer() = default;
-
-protected:
-#define OP(InternalName) \
- void visit(const ir::operation::InternalName &) override \
- { \
- throw std::runtime_error("ShapeFixer: NYI for operation '" #InternalName "'"); \
- }
-#include "ir/Operations.lst"
-#undef OP
-
-public:
- void fix(const ir::OpSequence &op_seq) { op_seq.accept(*this); };
-};
-
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ISHAPE_FIXER_H__
diff --git a/runtime/neurun/core/include/backend/ITensorBuilder.h b/runtime/neurun/core/include/backend/ITensorBuilder.h
deleted file mode 100644
index 5eb4ab2d8..000000000
--- a/runtime/neurun/core/include/backend/ITensorBuilder.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ITENSOR_BUILDER_H__
-#define __NEURUN_BACKEND_ITENSOR_BUILDER_H__
-
-#include <map>
-
-#include "ir/Index.h"
-#include "ir/OperandInfo.h"
-#include "ir/Operation.h"
-#include "ir/Layout.h"
-#include "operand/ITensor.h"
-#include "compiler/SubTensorInfo.h"
-#include "ITensorManager.h"
-
-namespace neurun
-{
-namespace backend
-{
-
-struct ITensorBuilder
-{
- using IterateFunction = std::function<void(const ir::OperandIndex &)>;
-
- virtual ~ITensorBuilder(void) = default;
-
- // TODO Merge registerTensorInfo and registerSubTensorInfo using abstraction by internal class
- /**
- * @brief Register tensor information to allocate on backend
- */
- virtual void registerTensorInfo(const ir::OperandIndex &, const ir::OperandInfo &,
- ir::Layout backend_layout, bool as_const) = 0;
- /**
- * @brief Register subtensor information to allocate on backend
- */
- virtual void registerSubTensorInfo(const ir::OperandIndex &, const compiler::SubTensorInfo &) = 0;
-
- virtual void notifyFirstUse(const ir::OperandIndex &) = 0;
- virtual void notifyLastUse(const ir::OperandIndex &) = 0;
-
- virtual bool isRegistered(const ir::OperandIndex &) const = 0;
-
- virtual void prepare(void) = 0;
- virtual void allocateConsts() = 0;
- virtual void allocateNonconsts() = 0;
- virtual void postFunctionPrepare() = 0;
- virtual void finalize() = 0;
-
- virtual std::shared_ptr<::neurun::backend::operand::ITensor>
- tensorAt(const ir::OperandIndex &ind) = 0;
- virtual void iterate(const IterateFunction &fn) = 0;
-
- virtual void preVisit(const ir::Operation &) = 0;
- virtual void postVisit(const ir::Operation &) = 0;
-
- virtual std::unique_ptr<ITensorManager> releaseTensorManager(void) = 0;
-};
-
-} // namespace backend
-} // namespace neurun
-
-#include <unordered_set>
-#include <memory>
-
-namespace neurun
-{
-namespace backend
-{
-
-using TensorBuilderSet = std::unordered_set<std::shared_ptr<backend::ITensorBuilder>>;
-
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ITENSOR_BUILDER_H__
diff --git a/runtime/neurun/core/include/backend/ITensorManager.h b/runtime/neurun/core/include/backend/ITensorManager.h
deleted file mode 100644
index 74506ef59..000000000
--- a/runtime/neurun/core/include/backend/ITensorManager.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ITENSOR_MANAGER_H__
-#define __NEURUN_BACKEND_ITENSOR_MANAGER_H__
-
-namespace neurun
-{
-namespace backend
-{
-
-// NOTE This name ITensorManager has been discussed whether or not the name is proper.
-// Anyone can argue with any better name.
-/**
- * @brief Interface as an abstract tensor manager which has MemoryManager
- */
-struct ITensorManager
-{
- virtual ~ITensorManager() = default;
-
- virtual void allocateConsts(void) = 0;
- virtual void allocateNonconsts(void) = 0;
- virtual void deallocateConsts(void) = 0;
- virtual void deallocateNonconsts(void) = 0;
-};
-
-} // namespace backend
-} // namespace neurun
-
-#include <unordered_set>
-#include <memory>
-
-namespace neurun
-{
-namespace backend
-{
-
-using TensorManagerSet = std::unordered_set<std::unique_ptr<backend::ITensorManager>>;
-
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ITENSOR_MANAGER_H__
diff --git a/runtime/neurun/core/include/backend/ITensorRegister.h b/runtime/neurun/core/include/backend/ITensorRegister.h
deleted file mode 100644
index 9e554ab77..000000000
--- a/runtime/neurun/core/include/backend/ITensorRegister.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_ITENSOR_REGISTER_H__
-#define __NEURUN_BACKEND_ITENSOR_REGISTER_H__
-
-#include "compiler/SubTensorInfo.h"
-#include "ir/LowerInfoMap.h"
-#include "ir/operand/ParentInfo.h"
-#include "ITensorBuilder.h"
-#include "ir/Layout.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/OperandInfo.h"
-#include "ir/Operands.h"
-#include "ir/OperationVisitor.h"
-
-namespace
-{
-
-neurun::ir::Shape permuteTensorShape(const neurun::ir::Shape &shape,
- neurun::ir::Layout frontend_layout,
- neurun::ir::Layout backend_layout)
-{
- assert(shape.rank() <= 4);
- neurun::ir::Shape backend_shape{shape};
- if (shape.rank() == 4 && frontend_layout == neurun::ir::Layout::NHWC &&
- backend_layout == neurun::ir::Layout::NCHW)
- {
- backend_shape.dim(1) = shape.dim(3);
- backend_shape.dim(2) = shape.dim(1);
- backend_shape.dim(3) = shape.dim(2);
- }
- else if (shape.rank() == 4 && frontend_layout == neurun::ir::Layout::NCHW &&
- backend_layout == neurun::ir::Layout::NHWC)
- {
- backend_shape.dim(1) = shape.dim(2);
- backend_shape.dim(2) = shape.dim(3);
- backend_shape.dim(3) = shape.dim(1);
- }
- return backend_shape;
-}
-} // namespace
-
-namespace neurun
-{
-namespace backend
-{
-
-class ITensorRegister : public ir::OperationVisitor
-{
-public:
- virtual ~ITensorRegister() = default;
-
-public:
- void registerTensors(const ir::OpSequence &subg, const ir::LowerInfoMap *lower_info_map)
- {
- _current_subg_layout = subg.getLayout();
- _lower_info_map = lower_info_map;
- assert(_lower_info_map != nullptr);
- assert(tensor_builder().get() != nullptr);
- subg.accept(*this);
- }
-
-protected:
- virtual const ir::Operands &operands() const = 0;
- virtual std::shared_ptr<ITensorBuilder> tensor_builder() const = 0;
- virtual bool supportSubTensor() const = 0;
-
-protected:
-#define OP(InternalName) \
- void visit(const ir::operation::InternalName &node) override \
- { \
- ir::OperandIndexSequence indices{node.getInputs()}; \
- indices.append(node.getOutputs()); \
- for (const auto &index : indices) \
- { \
- defaultRegisterTensorInfo(index); \
- } \
- }
-#include "ir/Operations.lst"
-#undef OP
-
-protected:
- void defaultRegisterTensorInfo(const ir::OperandIndex &index) const
- {
- if (tensor_builder()->isRegistered(index))
- {
- return;
- }
-
- const auto &obj = operands().at(index);
- const auto frontend_layout = frontendLayout();
- const auto backend_layout = backendLayout(index);
- if (supportSubTensor() && obj.parent_info() != nullptr)
- {
- tensor_builder()->registerSubTensorInfo(
- index, generateSubTensorInfo(obj, frontend_layout, backend_layout));
- }
- else
- {
- ir::OperandInfo backend_info{permuteTensorShape(obj.shape(), frontend_layout, backend_layout),
- obj.typeInfo()};
- tensor_builder()->registerTensorInfo(index, backend_info, backend_layout, obj.isConstant());
- }
- }
-
-protected:
- ir::Layout frontendLayout() const { return _current_subg_layout; }
- ir::Layout backendLayout(const ir::OperandIndex &index) const
- {
- assert(_lower_info_map != nullptr);
- const auto lower_info = _lower_info_map->operand.at(index).get();
- return lower_info->def_factors().getOnlyElement().layout();
- }
-
-private:
- compiler::SubTensorInfo generateSubTensorInfo(const ir::Operand &obj, ir::Layout frontend_layout,
- ir::Layout backend_layout) const
- {
- assert(obj.shape().rank() <= 4);
- const auto parent_index = obj.parent_info()->parent();
- auto shape = obj.shape();
- auto offset = obj.parent_info()->offset();
- if (operands().at(parent_index).shape().rank() == 4 && frontend_layout == ir::Layout::NHWC &&
- backend_layout == ir::Layout::NCHW)
- {
- shape.extendRank(4);
- offset = {offset[0], offset[3], offset[1], offset[2]};
- }
- else if (operands().at(parent_index).shape().rank() == 4 &&
- frontend_layout == ir::Layout::NHWC && backend_layout == ir::Layout::NCHW)
- {
- shape.extendRank(4);
- offset = {offset[0], offset[2], offset[3], offset[1]};
- }
- ir::Operand subtensor_obj{permuteTensorShape(shape, frontend_layout, backend_layout),
- obj.typeInfo()};
- subtensor_obj.parent_info(
- nnfw::cpp14::make_unique<ir::operand::ParentInfo>(parent_index, offset));
- return compiler::SubTensorInfo{subtensor_obj};
- }
-
-private:
- ir::Layout _current_subg_layout;
- const ir::LowerInfoMap *_lower_info_map{nullptr};
-};
-
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_ITENSOR_REGISTER_H__
diff --git a/runtime/neurun/core/include/backend/JSONExecTime.h b/runtime/neurun/core/include/backend/JSONExecTime.h
deleted file mode 100644
index 84505e10f..000000000
--- a/runtime/neurun/core/include/backend/JSONExecTime.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_JSON_EXEC_TIME_H__
-#define __NEURUN_BACKEND_JSON_EXEC_TIME_H__
-
-#include <fstream>
-#include <unordered_map>
-#include <map>
-#include <vector>
-#include "backend/Backend.h"
-#include "backend/IConfig.h"
-
-namespace neurun
-{
-namespace backend
-{
-
-/**
- * @brief table, that contains execution time of an operation on some backend for different input
- * sizes and transfer time from one backend to another for various input sizes (permutation time)
- *
- * backend -> op -> quant-> size --> time
- * _measurements[Backend*]["string"][bool][uint32_t] = int64_t
- */
-using MeasurementData = std::unordered_map<
- const Backend *,
- std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>;
-
-class JSON
-{
-public:
- explicit JSON(const std::vector<const Backend *> &backends, MeasurementData &measurements)
- : _measurement_file("exec_time.json"), _backends(), _measurements(measurements)
- {
- for (const auto b : backends)
- {
- _backends.emplace(b->config()->id(), b);
- }
- loadOperationsExecTime();
- };
- /**
- * @brief Update _operations_exec_time_file with new data.
- */
- void uploadOperationsExecTime() const;
-
-private:
- ///@brief file containing measurements
- std::string _measurement_file;
- std::unordered_map<std::string, const Backend *> _backends;
- std::unordered_map<
- const Backend *,
- std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>
- &_measurements;
- /**
- * @brief Helper function for inserting data to OperationExecTimes
- *
- * @param backend String name of backend
- * @param operation String name of operation
- * @param quant if input type quantized
- * @param stream File stream
- */
- void readOperation(const std::string &backend, const std::string &operation, bool quant,
- std::ifstream &stream);
-
- /**
- * @brief Helper function for writing OperationExecTimes to stream
- *
- * @param operation_info Map of operations execution information
- * @param stream File stream
- */
- void printOperation(const std::map<uint32_t, int64_t> &operation_info,
- std::ofstream &stream) const;
- /**
- * @brief Parse and load operations_exec_time from _operations_exec_time_file.
- */
- void loadOperationsExecTime();
-};
-
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_JSON_EXEC_TIME_H__
diff --git a/runtime/neurun/core/include/backend/operand/ITensor.h b/runtime/neurun/core/include/backend/operand/ITensor.h
deleted file mode 100644
index c278b01a0..000000000
--- a/runtime/neurun/core/include/backend/operand/ITensor.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_OPERAND_I_TENSOR_H__
-#define __NEURUN_BACKEND_OPERAND_I_TENSOR_H__
-
-#include <cstring>
-#include <cstdint>
-#include <functional>
-
-#include "ir/Layout.h"
-#include "util/Coordinates.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace operand
-{
-
-class ITensor
-{
-public:
- virtual ~ITensor() = default;
-
-public:
- virtual uint8_t *buffer() const = 0;
- virtual size_t total_size() const = 0;
- virtual size_t dimension(size_t index) const = 0;
- virtual size_t num_dimensions() const = 0;
- virtual size_t calcOffset(const neurun::util::Coordinates &coords) const = 0;
- virtual ir::Layout layout() const = 0;
- virtual bool has_padding() const = 0;
- virtual void access(const std::function<void(ITensor &tensor)> &fn) = 0;
-};
-
-} // namespace operand
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_OPERAND_I_TENSOR_H__
diff --git a/runtime/neurun/core/include/compiler/Compiler.h b/runtime/neurun/core/include/compiler/Compiler.h
deleted file mode 100644
index 797fc77ad..000000000
--- a/runtime/neurun/core/include/compiler/Compiler.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Compiler.h
- * @brief This file contains Compiler class to define and run compilation phase
- */
-
-#ifndef __NEURUN_COMPILER_COMPILE_H_
-#define __NEURUN_COMPILER_COMPILE_H_
-
-#include "ir/Graph.h"
-#include "exec/IExecutor.h"
-
-namespace neurun
-{
-
-namespace compiler
-{
-
-enum class State
-{
- CREATED, // Before compilation
- STARTED, // Compile is started
- LOWERED, // Backend is decided
- COMPILED // Success compilation
-};
-
-/**
- * @brief Class to compile graph model
- */
-class Compiler
-{
-public:
- /**
- * @brief Construct a new Compiler object
- * @param[in] model Graph model
- */
- Compiler(const std::shared_ptr<ir::Graph> &graph)
- : _graph{graph}, _executor{nullptr}, _state{State::CREATED}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Run compilation. Compilation result will be saved in _plan
- */
- void compile(void);
- /**
- * @brief Pass plan reference
- * @param[out] plan Plan reference to return\n
- * Set nullptr if compile is not run yet
- */
- void release(std::shared_ptr<exec::IExecutor> &executor) { executor = _executor; }
-
- void state(State state) { _state = state; }
- State state(void) const { return _state; }
-
- /**
- * @brief Check if model can compile
- * @return @c true if model can compile, otherwise @c false
- * @note This method don't check model correctness,\n
- * so model verification should be done before calling this method
- */
- bool checkCompilable();
-
-private:
- std::shared_ptr<ir::Graph> _graph;
- std::shared_ptr<exec::IExecutor> _executor;
- State _state;
-};
-
-} // namespace compiler
-
-} // namespace neurun
-
-#endif // __NEURUN_COMPILER_COMPILE_H_
diff --git a/runtime/neurun/core/include/compiler/IExecutionBuilder.h b/runtime/neurun/core/include/compiler/IExecutionBuilder.h
deleted file mode 100644
index c5a06fec0..000000000
--- a/runtime/neurun/core/include/compiler/IExecutionBuilder.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_COMPILER_I_EXECUTION_BUILDER_H__
-#define __NEURUN_COMPILER_I_EXECUTION_BUILDER_H__
-
-#include <memory>
-
-#include "exec/IFunction.h"
-
-namespace neurun
-{
-namespace compiler
-{
-
-struct IExecutionBuilder
-{
- virtual ~IExecutionBuilder() = default;
-
- virtual void append(std::unique_ptr<::neurun::exec::IFunction> &&f) = 0;
-};
-
-} // namespace compiler
-} // namespace neurun
-
-#endif // __NEURUN_COMPILER_I_EXECUTION_BUILDER_H__
diff --git a/runtime/neurun/core/include/compiler/SubTensorInfo.h b/runtime/neurun/core/include/compiler/SubTensorInfo.h
deleted file mode 100644
index 18cab466b..000000000
--- a/runtime/neurun/core/include/compiler/SubTensorInfo.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file SubTensorInfo.h
- * @brief This file contains SubTensorInfo to represent subsumption between tensors
- * for backend tensor allocation
- */
-#ifndef __NEURUN_COMPILER_SUBTENSOR_INFO_H__
-#define __NEURUN_COMPILER_SUBTENSOR_INFO_H__
-
-#include "ir/Operand.h"
-
-namespace neurun
-{
-namespace compiler
-{
-
-/**
- * @brief Class to represent information of subtensor
- */
-class SubTensorInfo
-{
-public:
- SubTensorInfo() = delete;
-
- /**
- * @brief Construct a new SubTensorInfo object
- * @param[in] obj SubTensor object
- */
- SubTensorInfo(const ir::Operand &obj)
- : _parent{obj.parent_info()->parent()}, _shape{obj.shape()}, _type{obj.typeInfo()},
- _offset{obj.parent_info()->offset()}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Return parent tensor index
- * @return Parent tensor index
- */
- const ir::OperandIndex parent(void) const { return _parent; }
- /**
- * @brief Return tensor shape
- * @return Tensor shape
- */
- const ir::Shape &shape(void) const { return _shape; }
- /**
- * @brief Return tensor type
- * @return Tensor type
- */
- const ir::TypeInfo &type(void) const { return _type; }
- /**
- * @brief Return tensor's offset in parent tensor
- * @return Tensor offset
- */
- const neurun::util::Coordinates offset(void) const { return _offset; }
-
-private:
- const ir::OperandIndex _parent;
- const ir::Shape _shape;
- const ir::TypeInfo _type;
- const neurun::util::Coordinates _offset;
-};
-
-} // compiler
-} // neurun
-
-#endif // __NEURUN_COMPILER_SUBTENSOR_INFO_H__
diff --git a/runtime/neurun/core/include/exec/Execution.h b/runtime/neurun/core/include/exec/Execution.h
deleted file mode 100644
index 7304f8aab..000000000
--- a/runtime/neurun/core/include/exec/Execution.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Execution.h
- * @brief This file defines execution
- */
-#ifndef __NEURUN_EXEC_EXECUTION_H__
-#define __NEURUN_EXEC_EXECUTION_H__
-
-#include "ir/Layout.h"
-#include "exec/IExecutor.h"
-#include "IODescription.h"
-
-#include <thread>
-
-namespace neurun
-{
-namespace exec
-{
-
-/**
- * @brief Class to define execution instance to collect input/output information for inference
- * and prepare executor run (TODO)
- */
-class Execution
-{
-
-public:
- /**
- * @brief Construct a new Execution object
- * @param[in] executor Model executor
- */
- Execution(const std::shared_ptr<IExecutor> &executor);
-
-public:
- /**
- * @brief Returns graph object
- * @return Graph object
- */
- const ir::Graph &graph() const { return _executor->graph(); }
- /**
- * @brief Set input data's information
- * @param[in] index Input index
- * @param[in] buffer Input data's buffer pointer
- * @param[in] length Input data's length
- * @param[in] layout Input data's data format
- */
- void setInput(const ir::IOIndex &index, const void *buffer, size_t length,
- ir::Layout layout = ir::Layout::NHWC);
- /**
- * @brief Set input data's information, especially to specify unknown dimensions on model
- * build time.
- * @param[in] index Input index
- * @param[in] type Input data's type info
- * @param[in] shape Input data's shape
- * @param[in] buffer Input data's buffer pointer
- * @param[in] length Input data's length
- * @param[in] layout Input data's data format
- */
- void setInput(const ir::IOIndex &index, const ir::TypeInfo &type, const ir::Shape &shape,
- const void *buffer, size_t length, ir::Layout layout = ir::Layout::NHWC);
- /**
- * @brief Set output data's information
- * @param[in] index Output index
- * @param[in] buffer Output data's buffer pointer
- * @param[in] length Output data's length
- * @param[in] layout Output data's data format
- */
- void setOutput(const ir::IOIndex &index, void *buffer, size_t length,
- ir::Layout layout = ir::Layout::NHWC);
- /**
- * @brief Set output data's information, especially to specify unknown dimensions on model
- * build time.
- * @param[in] index Output index
- * @param[in] type Output data's type info
- * @param[in] shape Output data's shape
- * @param[in] buffer Output data's buffer pointer
- * @param[in] length Output data's length
- * @param[in] layout Output data's data format
- */
- void setOutput(const ir::IOIndex &index, const ir::TypeInfo &type, const ir::Shape &shape,
- void *buffer, size_t length, ir::Layout layout = ir::Layout::NHWC);
- /**
- * @brief Set input data's data format
- * @param[in] index Input index
- * @param[in] layout Input data's data format
- */
- void setInputLayout(const ir::IOIndex &index, ir::Layout layout);
- /**
- * @brief Set output data's data format
- * @param[in] index Output index
- * @param[in] layout Output data's data format
- */
- void setOutputLayout(const ir::IOIndex &index, ir::Layout layout);
- /**
- * @brief Execution
- * @note It should be called after setting input and output buffer
- */
- void execute();
-
- /**
- * @brief Start asynchronous execution
- * @note It returns after execution thread is started
- * It should be called after setting input and output buffer
- */
- void startExecute(void);
-
- /**
- * @brief Return when execution is finished
- * @note It waits until execution is finished
- */
- void waitFinish(void);
-
- /**
- * @brief Check execution is finished
- * @return @c true if execution is finished, otherwise @c false
- */
- bool isFinished(void) const;
-
-private:
- const std::shared_ptr<IExecutor> _executor;
- IODescription _io_desc;
- std::unique_ptr<std::thread> _exec_thread;
- bool finished{false};
-};
-
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_EXECUTION_H__
diff --git a/runtime/neurun/core/include/exec/ExecutionObservers.h b/runtime/neurun/core/include/exec/ExecutionObservers.h
deleted file mode 100644
index ca658c706..000000000
--- a/runtime/neurun/core/include/exec/ExecutionObservers.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_EXEC_OBSREVERS_H__
-#define __NEURUN_EXEC_OBSREVERS_H__
-
-#include "exec/IFunction.h"
-#include "ir/OpSequence.h"
-#include "backend/ExecTime.h"
-#include "util/ITimer.h"
-#include "IExecutor.h"
-#include "misc/EventCollector.h"
-#include "misc/EventRecorder.h"
-
-namespace neurun
-{
-namespace exec
-{
-class IExecutionObserver
-{
-public:
- /// @brief Invoked just before model (not individual operation) execution begins
- virtual void handleBegin(IExecutor *) { return; }
-
- virtual void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0;
- virtual void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0;
-
- /// @brief Invoked just after model (not individual operation) execution ends
- virtual void handleEnd(IExecutor *) { return; }
-
- virtual ~IExecutionObserver() = default;
-};
-
-class ProfileObserver : public IExecutionObserver
-{
-public:
- explicit ProfileObserver(std::shared_ptr<backend::ExecTime> et) : _et(std::move(et)) {}
- void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
- void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
-
- void handleEnd(IExecutor *) override { _et->uploadOperationsExecTime(); }
-
-private:
- std::unique_ptr<util::ITimer> _timer;
- std::shared_ptr<backend::ExecTime> _et;
-};
-
-class ChromeTracingObserver : public IExecutionObserver
-{
-public:
- ChromeTracingObserver(const std::string &filepath);
- ~ChromeTracingObserver();
- void handleBegin(IExecutor *) override;
- void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
- void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
- void handleEnd(IExecutor *) override;
-
-private:
- static std::string subgraphTag(const ir::OpSequence *op_seq);
-
-private:
- std::ofstream _ofs;
- EventRecorder _recorder;
- EventCollector _collector;
-};
-
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_OBSREVERS_H__
diff --git a/runtime/neurun/core/include/exec/IExecutor.h b/runtime/neurun/core/include/exec/IExecutor.h
deleted file mode 100644
index de3291388..000000000
--- a/runtime/neurun/core/include/exec/IExecutor.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file IExecutor.h
- * @brief This file defines interface of Executor
- */
-#ifndef __NEURUN_EXEC_I_EXECUTOR_H_
-#define __NEURUN_EXEC_I_EXECUTOR_H_
-
-#include "ir/Graph.h"
-#include "IFunction.h"
-#include "IODescription.h"
-#include "ir/OperationIndexMap.h"
-
-namespace neurun
-{
-namespace exec
-{
-class IExecutionObserver;
-/**
- * @brief Struct to define interface of Executor
- */
-struct IExecutor
-{
- /**
- * @brief Construct a new IExecutor object
- */
- IExecutor() = default;
- /**
- * @brief Destroy the IExecutor object
- */
- virtual ~IExecutor() = default;
-
- /**
- * @brief Returns graph object
- *
- * @return Graph object
- */
- virtual const ir::Graph &graph() = 0;
-
- /**
- * @brief Set an ordering on operations
- * @param[in] ranks The table encoding the ordering
- */
- virtual void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) = 0;
-
- /**
- * @brief Start execution
- * @param[in] desc Input and output description
- * @note This method should be thread-safe
- */
- virtual void execute(const IODescription &desc) = 0;
-};
-
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_I_EXECUTOR_H_
diff --git a/runtime/neurun/core/include/exec/IFunction.h b/runtime/neurun/core/include/exec/IFunction.h
deleted file mode 100644
index 5cc29ea75..000000000
--- a/runtime/neurun/core/include/exec/IFunction.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_EXEC_I_FUNCTION_H__
-#define __NEURUN_EXEC_I_FUNCTION_H__
-
-namespace neurun
-{
-namespace exec
-{
-
-class IFunction
-{
-public:
- virtual ~IFunction() = default;
- virtual void run() = 0;
- virtual void runSync() = 0;
- virtual void prepare() {}
-};
-
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_I_FUNCTION_H__
diff --git a/runtime/neurun/core/include/exec/IODescription.h b/runtime/neurun/core/include/exec/IODescription.h
deleted file mode 100644
index bdcc78176..000000000
--- a/runtime/neurun/core/include/exec/IODescription.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_EXEC_IO_DESCRIPTION_H__
-#define __NEURUN_EXEC_IO_DESCRIPTION_H__
-
-#include <vector>
-
-#include "ir/OperandInfo.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-struct InputDesc
-{
- const ir::OperandInfo info;
- const void *buffer;
- const size_t size;
- const ir::Layout layout;
-
- InputDesc(void) = delete;
- InputDesc(const ir::OperandInfo &info, const void *buffer, const size_t size, ir::Layout layout)
- : info(info), buffer(buffer), size(size), layout(layout)
- {
- }
-};
-
-struct OutputDesc
-{
- const ir::OperandInfo info;
- void *buffer;
- const size_t size;
- const ir::Layout layout;
-
- OutputDesc(void) = delete;
- OutputDesc(const ir::OperandInfo &info, void *buffer, const size_t size, ir::Layout layout)
- : info(info), buffer(buffer), size(size), layout(layout)
- {
- }
-};
-
-struct IODescription
-{
- std::vector<std::unique_ptr<InputDesc>> inputs;
- std::vector<std::unique_ptr<OutputDesc>> outputs;
-};
-
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_IO_DESCRIPTION_H__
diff --git a/runtime/neurun/core/include/exec/NopFunction.h b/runtime/neurun/core/include/exec/NopFunction.h
deleted file mode 100644
index 5cbd7e5ce..000000000
--- a/runtime/neurun/core/include/exec/NopFunction.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file NopFunction.h
- * @brief This file defines NopFunction
- */
-#ifndef __NEURUN_EXEC_NOP_FUNCTION_H_
-#define __NEURUN_EXEC_NOP_FUNCTION_H_
-
-#include "IFunction.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-/**
- * @brief A derivative of IFunction tha does nothing
- *
- */
-class NopFunction : public IFunction
-{
-public:
- NopFunction() = default;
- void run() override
- {
- // DO NOTHING
- }
- void runSync() override
- {
- // this abstract method is used just for profiling and called for
- // backend::acl_common::AclFunction
- run();
- }
-};
-
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_NOP_FUNCTION_H_
diff --git a/runtime/neurun/core/include/ir/BackendSet.h b/runtime/neurun/core/include/ir/BackendSet.h
deleted file mode 100644
index 4979286df..000000000
--- a/runtime/neurun/core/include/ir/BackendSet.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_BACKEND_SET_H__
-#define __NEURUN_IR_BACKEND_SET_H__
-
-#include "util/Set.h"
-
-namespace neurun
-{
-namespace backend
-{
-class Backend;
-} // namespace backend
-} // namespace neurun
-
-namespace neurun
-{
-namespace ir
-{
-
-using BackendSet = util::Set<const backend::Backend *>;
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_BACKEND_SET_H__
diff --git a/runtime/neurun/core/include/ir/Data.h b/runtime/neurun/core/include/ir/Data.h
deleted file mode 100644
index a0a489553..000000000
--- a/runtime/neurun/core/include/ir/Data.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_DATA_H__
-#define __NEURUN_IR_DATA_H__
-
-#include <algorithm>
-
-namespace neurun
-{
-namespace ir
-{
-
-struct Data
-{
- virtual ~Data() = default;
-
- virtual size_t size(void) const = 0;
- virtual const uint8_t *base(void) const = 0;
-};
-
-class CachedData final : public Data
-{
-public:
- CachedData(const uint8_t *base, size_t size) : _base{new uint8_t[size]}, _size{size}
- {
- std::copy(base, base + size, _base);
- }
-
-public:
- ~CachedData() { delete[] _base; }
-
-public:
- size_t size(void) const override { return _size; }
- const uint8_t *base(void) const override { return _base; }
-
-private:
- uint8_t *_base;
- size_t _size;
-};
-
-class ExternalData final : public Data
-{
-public:
- ExternalData(const uint8_t *base, size_t size) : _base{base}, _size{size}
- {
- // DO NOTHING
- }
-
-public:
- size_t size(void) const override { return _size; }
- const uint8_t *base(void) const override { return _base; }
-
-private:
- const uint8_t *_base;
- const size_t _size;
-};
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_DATA_H__
diff --git a/runtime/neurun/core/include/ir/DataType.h b/runtime/neurun/core/include/ir/DataType.h
deleted file mode 100644
index 6eca6eb27..000000000
--- a/runtime/neurun/core/include/ir/DataType.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_DATATYPE_H__
-#define __NEURUN_IR_DATATYPE_H__
-
-#include <stdexcept>
-
-namespace neurun
-{
-namespace ir
-{
-
-enum class DataType
-{
- FLOAT32 = 0,
- INT32 = 1,
- UINT32 = 2,
- QUANT8_ASYMM = 3,
- BOOL8 = 4,
- UINT8 = 5,
- QUANT8_SYMM = 6,
-};
-
-inline size_t sizeOfDataType(DataType data_type)
-{
- switch (data_type)
- {
- case DataType::FLOAT32:
- return sizeof(float);
- case DataType::INT32:
- return sizeof(int32_t);
- case DataType::UINT32:
- return sizeof(uint32_t);
- case DataType::BOOL8:
- case DataType::QUANT8_ASYMM:
- case DataType::UINT8:
- return sizeof(uint8_t);
- case DataType::QUANT8_SYMM:
- return sizeof(int8_t);
- default:
- throw std::runtime_error{"Unsupported type size"};
- }
-}
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_DATATYPE_H__
diff --git a/runtime/neurun/core/include/ir/Graph.h b/runtime/neurun/core/include/ir/Graph.h
deleted file mode 100644
index 5105c3a42..000000000
--- a/runtime/neurun/core/include/ir/Graph.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_GRAPH_H__
-#define __NEURUN_IR_GRAPH_H__
-
-#include <functional>
-
-#include "ir/Operands.h"
-#include "ir/Operations.h"
-#include "ir/LowerInfoMap.h"
-#include "ir/OpSequence.h"
-#include "ir/Subgraphs.h"
-
-namespace neurun
-{
-namespace compiler
-{
-class BackendResolver;
-} // namespace compiler
-} // namespace neurun
-
-namespace neurun
-{
-namespace backend
-{
-namespace custom
-{
-class IKernelBuilder;
-} // namespace custom
-} // namespace backend
-} // namespace neurun
-
-namespace neurun
-{
-namespace ir
-{
-
-class Graph
-{
-private:
- enum class Phase
- {
- BUILDING,
- MODEL
- };
-
-public:
- Graph(void);
- ~Graph(void);
-
- // Graph Building
-public:
- OperandIndex addOperand(const Shape &shape, const TypeInfo &type);
- OperationIndex addOperation(std::unique_ptr<Operation> &&node);
- void setOperandValue(const OperandIndex &ind, std::unique_ptr<Data> &&data);
- void addInput(const OperandIndex &ind);
- void addOutput(const OperandIndex &ind);
- void finishBuilding(void);
- void lower(void);
- void removeOperand(const OperandIndex &ind) { _operands.remove(ind); }
- bool isBuildingPhase(void) const { return _phase == Phase::BUILDING; }
-
-private:
- void initializeUseDef();
-
- // Custom operations support
-public:
- void
- bindKernelBuilder(const std::shared_ptr<neurun::backend::custom::IKernelBuilder> &kernel_builder)
- {
- _kernel_builder = kernel_builder;
- }
-
- const std::shared_ptr<backend::custom::IKernelBuilder> &getKernelBuilder() const
- {
- return _kernel_builder;
- }
-
-private:
- std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
-
- // Accessors
-public:
- const OperandIndexSequence &getInputs() const { return _inputs; }
- OperandIndexSequence &getInputs() { return _inputs; }
- const OperandIndexSequence &getOutputs() const { return _outputs; }
- OperandIndexSequence &getOutputs() { return _outputs; }
- const Operands &operands() const { return _operands; }
- Operands &operands() { return _operands; } // TODO Remove this non-const accessor
- const Operations &operations() const { return _operations; }
- Operations &operations() { return _operations; }
- const compiler::BackendResolver *backend_resolver() const { return _backend_resolver.get(); }
-
-private:
- Phase _phase{Phase::BUILDING};
- Operations _operations;
- Operands _operands;
- OperandIndexSequence _inputs;
- OperandIndexSequence _outputs;
-
- // For LOWERED phase
-public:
- const LowerInfoMap *getLowerInfo() const { return _lower_info_map.get(); }
- const operation::LowerInfo *getLowerInfo(const SubgraphIndex &subg_index) const;
- void setLowerInfo(const SubgraphIndex &subg_index,
- std::unique_ptr<operation::LowerInfo> &&lower_info);
- void removeLowerInfo(const SubgraphIndex &subg_index);
- const operand::LowerInfo *getLowerInfo(const OperandIndex &index) const;
- operand::LowerInfo *getLowerInfo(const OperandIndex &index);
- void setLowerInfo(const OperandIndex &index, std::unique_ptr<operand::LowerInfo> &&lower_info);
- void removeLowerInfo(const OperandIndex &index);
- Subgraphs &subgraphs()
- {
- assert(_op_seqs);
- return *_op_seqs;
- }
- const Subgraphs *subgraphs() const { return _op_seqs.get(); }
- void setBackendResolver(std::unique_ptr<compiler::BackendResolver> &&br);
-
-private:
- void makeSubgraphs(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info);
- void
- manipulateLowerInfo(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info);
- void dumpLowerInfo();
- bool mergeable(const SubgraphIndex &subg_index, const OperationIndex &node_index, Layout layout);
- SubgraphIndex appendFreshSingleOpSubgraph(const OperationIndex &node_index, const Operation &node,
- Layout layout);
-
-private:
- std::unique_ptr<compiler::BackendResolver> _backend_resolver;
- std::unique_ptr<LowerInfoMap> _lower_info_map;
- // Pass(for Perm) can accept only graph so that Graph has Subgraphs as a member
- std::unique_ptr<Subgraphs> _op_seqs;
-};
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_GRAPH_H__
diff --git a/runtime/neurun/core/include/ir/Index.h b/runtime/neurun/core/include/ir/Index.h
deleted file mode 100644
index aebc64dcd..000000000
--- a/runtime/neurun/core/include/ir/Index.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERAND_INDEX_H__
-#define __NEURUN_IR_OPERAND_INDEX_H__
-
-#include "util/Index.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-struct OperationIndexTag;
-using OperationIndex = ::neurun::util::Index<uint32_t, OperationIndexTag>;
-
-struct OperandIndexTag;
-using OperandIndex = ::neurun::util::Index<uint32_t, OperandIndexTag>;
-
-struct IOIndexTag;
-using IOIndex = ::neurun::util::Index<uint32_t, IOIndexTag>;
-
-struct SubgraphIndexTag;
-using SubgraphIndex = ::neurun::util::Index<uint32_t, SubgraphIndexTag>;
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERAND_INDEX_H__
diff --git a/runtime/neurun/core/include/ir/InternalType.h b/runtime/neurun/core/include/ir/InternalType.h
deleted file mode 100644
index 91085f2f3..000000000
--- a/runtime/neurun/core/include/ir/InternalType.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_INTERNAL_TYPE_H__
-#define __NEURUN_IR_INTERNAL_TYPE_H__
-
-#include <cstdint>
-
-namespace neurun
-{
-namespace ir
-{
-
-enum class Activation
-{
- NONE = 0,
- RELU = 1,
- RELU1 = 2,
- RELU6 = 3,
- TANH = 4,
- SIGMOID = 5
-};
-
-enum class PaddingType
-{
- EXPLICIT = 0,
- SAME = 1,
- VALID = 2
-};
-
-struct ExplicitPadding
-{
- uint32_t left;
- uint32_t right;
- uint32_t top;
- uint32_t bottom;
-};
-
-// TODO Resolve explicit padding param at frontend and save in value field
-struct Padding
-{
- PaddingType type;
- ExplicitPadding param;
-};
-
-struct Stride
-{
- uint32_t vertical;
- uint32_t horizontal;
-};
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_INTERNAL_TYPE_H__
diff --git a/runtime/neurun/core/include/ir/Layout.h b/runtime/neurun/core/include/ir/Layout.h
deleted file mode 100644
index c4edb70db..000000000
--- a/runtime/neurun/core/include/ir/Layout.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_LAYOUT_H__
-#define __NEURUN_IR_LAYOUT_H__
-
-#include <functional>
-#include <string>
-
-namespace neurun
-{
-namespace ir
-{
-
-enum class Layout
-{
- UNKNOWN = 0,
- NHWC,
- NCHW
-};
-
-inline std::string to_string(Layout layout)
-{
- switch (layout)
- {
- case Layout::NHWC:
- return std::string{"NHWC"};
- case Layout::NCHW:
- return std::string{"NCHW"};
- case Layout::UNKNOWN:
- return std::string{"UNKNOWN"};
- default:
- throw std::runtime_error("WRONG LAYOUT");
- }
-}
-
-} // namespace ir
-} // namespace neurun
-
-namespace std
-{
-
-template <> struct hash<neurun::ir::Layout>
-{
- size_t operator()(neurun::ir::Layout value) const noexcept
- {
- using type = typename std::underlying_type<neurun::ir::Layout>::type;
- return hash<type>()(static_cast<type>(value));
- }
-};
-
-} // namespace std
-
-#endif // __NEURUN_IR_LAYOUT_H__
diff --git a/runtime/neurun/core/include/ir/LowerInfoMap.h b/runtime/neurun/core/include/ir/LowerInfoMap.h
deleted file mode 100644
index a8fd818b8..000000000
--- a/runtime/neurun/core/include/ir/LowerInfoMap.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_LOWER_INFO_MAP_H__
-#define __NEURUN_IR_LOWER_INFO_MAP_H__
-
-#include <memory>
-#include <unordered_map>
-
-#include "ir/operand/LowerInfo.h"
-#include "ir/operation/LowerInfo.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/Index.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-struct LowerInfoMap
-{
- std::unordered_map<SubgraphIndex, std::unique_ptr<operation::LowerInfo>> operation;
- OperandIndexMap<std::unique_ptr<operand::LowerInfo>> operand;
-};
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_LOWER_INFO_MAP_H__
diff --git a/runtime/neurun/core/include/ir/OpCode.h b/runtime/neurun/core/include/ir/OpCode.h
deleted file mode 100644
index 2b466a212..000000000
--- a/runtime/neurun/core/include/ir/OpCode.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OP_CODE_H__
-#define __NEURUN_IR_OP_CODE_H__
-
-#include <functional>
-#include <stdint.h>
-
-namespace neurun
-{
-namespace ir
-{
-
-enum class OpCode
-{
- Invalid, //< Unused
-#define OP(Name) Name, //< All operations
-#include "ir/Operations.lst"
-#undef OP
- COUNT
-};
-
-const char *toString(OpCode opcode);
-
-} // namespace ir
-} // namespace neurun
-
-namespace std
-{
-
-template <> struct hash<neurun::ir::OpCode>
-{
- size_t operator()(neurun::ir::OpCode value) const noexcept
- {
- using type = typename std::underlying_type<neurun::ir::OpCode>::type;
- return hash<type>()(static_cast<type>(value));
- }
-};
-
-} // namespace std
-
-#endif // __NEURUN_IR_OP_CODE_H__
diff --git a/runtime/neurun/core/include/ir/OpSequence.h b/runtime/neurun/core/include/ir/OpSequence.h
deleted file mode 100644
index 68632e589..000000000
--- a/runtime/neurun/core/include/ir/OpSequence.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OP_SEQUENCE_H__
-#define __NEURUN_IR_OP_SEQUENCE_H__
-
-#include <vector>
-#include <string>
-#include <memory>
-
-#include "ir/Layout.h"
-#include "ir/Index.h"
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-// To support ValueSwappable, Element doesn't have members which are classes
-// as value(or can have members which are classes as value and the classes
-// support Swappable)
-struct Element
-{
- OperationIndex index;
- const Operation *node;
-
- Element(const OperationIndex *i, const Operation *n) : index{*i}, node{n}
- {
- // DO NOTHING
- }
-};
-
-class OpSequence
-{
-public:
- explicit OpSequence(Layout layout);
- OpSequence(const OpSequence &) = delete;
-
-public:
- void accept(OperationVisitor &v) const;
-
-public:
- const OperandIndexSequence &getInputs() const { return _inputs; }
- const OperandIndexSequence &getOutputs() const { return _outputs; }
- void setInputs(const OperandIndexSequence &indexes) { _inputs = indexes; }
- void setOutputs(const OperandIndexSequence &indexes) { _outputs = indexes; }
- void replaceInput(const OperandIndex &from, const OperandIndex &to) { _inputs.replace(from, to); }
- void replaceOutput(const OperandIndex &from, const OperandIndex &to)
- {
- _outputs.replace(from, to);
- }
-
- void appendOperation(const OperationIndex &index, const Operation &node)
- {
- _operations.emplace_back(&index, &node);
- }
-
- std::vector<Element> &operations(void) { return _operations; }
-
- const std::vector<Element> &operations(void) const { return _operations; }
-
- uint32_t size(void) const { return _operations.size(); }
-
- // TODO: Impl Dumper instead of this method
- std::string getStr(void) const;
-
-public:
- void remove(const OperationIndex &index);
-
-public:
- Layout getLayout() const { return _layout; }
-
-public:
- std::vector<Element>::const_iterator begin() const { return _operations.begin(); }
- std::vector<Element>::const_iterator end() const { return _operations.end(); }
-
-private:
- bool exist(const OperationIndex &index) const;
-
-private:
- OperandIndexSequence _inputs;
- OperandIndexSequence _outputs;
- std::vector<Element> _operations;
-
-private:
- Layout _layout;
-};
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OP_SEQUENCE_H__
diff --git a/runtime/neurun/core/include/ir/Operand.h b/runtime/neurun/core/include/ir/Operand.h
deleted file mode 100644
index b1f28de48..000000000
--- a/runtime/neurun/core/include/ir/Operand.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERAND_H__
-#define __NEURUN_IR_OPERAND_H__
-
-#include <cassert>
-#include <cstdint>
-#include <cpp14/memory.h>
-#include <algorithm>
-
-#include "ir/Data.h"
-#include "ir/DataType.h"
-#include "ir/OperandInfo.h"
-#include "ir/operand/ParentInfo.h" // TODO Remove this dependency
-#include "ir/OperationIndexList.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-class Operand
-{
-public:
- explicit Operand(const Shape &shape, const TypeInfo &type) : _info{shape, type}
- {
- // DO NOTHING
- }
-
-public:
- const Shape &shape(void) const { return _info.shape(); }
- const TypeInfo &typeInfo(void) const { return _info.typeInfo(); }
- const OperandInfo &info(void) const { return _info; }
- size_t operandSize(void) const;
-
- const OperationIndexList &getUses() const { return _uses; }
- const OperationIndexList &getDef() const { return _def; }
- void appendUse(const OperationIndex &idx);
- void removeUse(const OperationIndex &idx);
- void appendDef(const OperationIndex &idx);
- void removeDef(const OperationIndex &idx);
-
-public:
- void type(const DataType type) { _info.type(type); };
-
-public:
- void data(std::unique_ptr<Data> &&data) { _data = std::move(data); }
- const Data &data(void) const
- {
- assert(_data);
- return *_data;
- }
-
- /**
- * @brief Get true if Operand has data, otherwise @c false
- a @return @c true if Operand has data, otherwise @c false
- */
- bool isConstant(void) const { return _data != nullptr; }
-
-public:
- template <typename T, typename... Args> void data(Args &&... args)
- {
- data(nnfw::cpp14::make_unique<T>(std::forward<Args>(args)...));
- }
-
-public:
- template <typename T> T asScalar(void) const
- {
- assert((shape().rank() == 0) || ((shape().rank() == 1) && (shape().dim(0) == 1)));
- assert(_data != nullptr);
- assert((_data->base() != nullptr) && (_data->size() == sizeof(T)));
-
- return *(reinterpret_cast<const T *>(_data->base()));
- }
-
- template <typename T> std::vector<T> asVector() const
- {
- assert(isConstant());
- assert(_data->size() % sizeof(T) == 0);
-
- const auto *base = reinterpret_cast<const T *>(_data->base());
- const std::size_t size = _data->size() / sizeof(T);
- return std::vector<T>(base, base + size);
- }
-
-public:
- /**
- * @brief Set parent information
- * @param[in] parent_info Parent information
- */
- void parent_info(std::unique_ptr<operand::ParentInfo> &&parent_info);
- /**
- * @brief Return parent information pointer as constant
- * @return Parent information pointer
- */
- const operand::ParentInfo *parent_info() const;
- /**
- * @brief Return parent information pointer
- * @return Perent information pointer
- */
- operand::ParentInfo *parent_info();
-
-private:
- OperandInfo _info;
- std::shared_ptr<Data> _data;
-
- OperationIndexList _uses;
- OperationIndexList _def; // size is 0 (constant) or 1 (from def operation)
-
- std::shared_ptr<operand::ParentInfo> _parent_info;
-};
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERAND_H__
diff --git a/runtime/neurun/core/include/ir/OperandConstraint.h b/runtime/neurun/core/include/ir/OperandConstraint.h
deleted file mode 100644
index de6f21634..000000000
--- a/runtime/neurun/core/include/ir/OperandConstraint.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_MODEL_OPERAND_CONSTRAINT_H__
-#define __NEURUN_MODEL_OPERAND_CONSTRAINT_H__
-
-#include <stdint.h>
-#include <limits>
-#include <set>
-
-namespace neurun
-{
-namespace ir
-{
-
-class OperandConstraint
-{
-private:
- static const uint32_t INF = std::numeric_limits<uint32_t>::max();
-
-public:
- static OperandConstraint createAny() { return OperandConstraint{0u, INF}; }
- static OperandConstraint createExact(uint32_t exact) { return OperandConstraint{exact, exact}; }
- static OperandConstraint createAtMost(uint32_t end) { return OperandConstraint{0u, end}; }
- static OperandConstraint createAtLeast(uint32_t begin) { return OperandConstraint{begin, INF}; }
- static OperandConstraint createInRange(uint32_t begin, uint32_t end)
- {
- return OperandConstraint{begin, end};
- }
-
-private:
- OperandConstraint(uint32_t begin, uint32_t end) : _begin{begin}, _end{end} {}
-
-public:
- bool check(uint32_t ind) const { return _begin <= ind && ind <= _end; }
-
-private:
- uint32_t _begin;
- uint32_t _end;
-};
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_MODEL_OPERAND_CONSTRAINT_H__
diff --git a/runtime/neurun/core/include/ir/OperandIndexMap.h b/runtime/neurun/core/include/ir/OperandIndexMap.h
deleted file mode 100644
index c9234128e..000000000
--- a/runtime/neurun/core/include/ir/OperandIndexMap.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERAND_INDEX_MAP_H__
-#define __NEURUN_IR_OPERAND_INDEX_MAP_H__
-
-#include <unordered_map>
-
-#include "ir/Index.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-template <typename T> using OperandIndexMap = std::unordered_map<OperandIndex, T>;
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERAND_INDEX_MAP_H__
diff --git a/runtime/neurun/core/include/ir/OperandIndexSequence.h b/runtime/neurun/core/include/ir/OperandIndexSequence.h
deleted file mode 100644
index 7f8cec844..000000000
--- a/runtime/neurun/core/include/ir/OperandIndexSequence.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_MODEL_OPERAND_INDEX_SEQUENCE_H__
-#define __NEURUN_MODEL_OPERAND_INDEX_SEQUENCE_H__
-
-#include <initializer_list>
-#include <vector>
-
-#include "ir/Index.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-class OperandIndexSequence
-{
-public:
- OperandIndexSequence(void) = default;
- OperandIndexSequence(std::initializer_list<OperandIndex> list);
- OperandIndexSequence(std::initializer_list<int32_t> list);
- OperandIndexSequence(std::initializer_list<uint32_t> list);
-
-public:
- void append(const OperandIndex &index) { _set.emplace_back(index); }
- void append(const OperandIndexSequence &l) { _set.insert(_set.end(), l.begin(), l.end()); }
-
-public:
- uint32_t size() const { return static_cast<uint32_t>(_set.size()); }
- const OperandIndex &at(IOIndex set_index) const { return _set.at(set_index.value()); }
- const OperandIndex &at(uint32_t index) const { return _set.at(index); }
- bool contains(const OperandIndex &index) const;
- void replace(const OperandIndex &from, const OperandIndex &to);
-
-public:
- std::vector<OperandIndex>::const_iterator begin(void) const { return _set.begin(); }
- std::vector<OperandIndex>::const_iterator end(void) const { return _set.end(); }
-
-private:
- std::vector<OperandIndex> _set;
-};
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_MODEL_OPERAND_INDEX_SET_H__
diff --git a/runtime/neurun/core/include/ir/OperandInfo.h b/runtime/neurun/core/include/ir/OperandInfo.h
deleted file mode 100644
index 82ad7ef0f..000000000
--- a/runtime/neurun/core/include/ir/OperandInfo.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file OperandInfo.h
- * @brief This file contains OperandInfo class
- */
-#ifndef __NEURUN_IR_OPERAND_INFO_H__
-#define __NEURUN_IR_OPERAND_INFO_H__
-
-#include "ir/Shape.h"
-#include "ir/TypeInfo.h"
-#include "ir/Layout.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-/**
- * @brief Class to save tensor's shape and type
- */
-class OperandInfo
-{
-public:
- /**
- * @brief Construct a new OperandInfo object (deleted)
- */
- OperandInfo() = delete;
- /**
- * @brief Construct a new OperandInfo object
- * @param[in] shape Tensor shape
- * @param[in] typeInfo Tensor data type
- */
- OperandInfo(const Shape &shape, const TypeInfo &typeInfo) : _shape(shape), _typeInfo(typeInfo)
- {
- // DO NOTHING
- }
- /**
- * @brief Construct a new OperandInfo object
- * @param[in] origin info for copy
- */
- OperandInfo(const OperandInfo &origin) : _shape(origin.shape()), _typeInfo(origin.typeInfo())
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Return tensor shape
- * @return Tensor shape
- */
- const Shape &shape() const { return _shape; }
- /**
- * @brief Return tensor data type info
- * @return Tensor data type
- */
- const TypeInfo &typeInfo() const { return _typeInfo; }
- /**
- * @brief Set tensor data type
- */
- void type(const DataType type) { _typeInfo.type(type); }
- /**
- * @brief Return size of tensor (bytes)
- * @return Tensor size
- */
- size_t total_size() const { return _shape.num_elements() * sizeOfDataType(_typeInfo.type()); }
-
-private:
- Shape _shape;
- TypeInfo _typeInfo;
-};
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERAND_INFO_H__
diff --git a/runtime/neurun/core/include/ir/Operands.h b/runtime/neurun/core/include/ir/Operands.h
deleted file mode 100644
index c8d68c088..000000000
--- a/runtime/neurun/core/include/ir/Operands.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERANDS_H__
-#define __NEURUN_IR_OPERANDS_H__
-
-#include <memory>
-#include <unordered_map>
-
-#include "ir/Operand.h"
-#include "ir/Index.h"
-#include "util/ObjectManager.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-class Operands : public util::ObjectManager<OperandIndex, Operand>
-{
-};
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_MODEL_OPERAND_SET_H__
diff --git a/runtime/neurun/core/include/ir/Operation.h b/runtime/neurun/core/include/ir/Operation.h
deleted file mode 100644
index a02f980a5..000000000
--- a/runtime/neurun/core/include/ir/Operation.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_H__
-#define __NEURUN_IR_OPERATION_H__
-
-#include <memory>
-
-#include "ir/OpCode.h"
-#include "ir/Operand.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/OperandConstraint.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-struct OperationVisitor;
-
-class Operation
-{
-public:
- Operation(OperandConstraint input_constr, const OperandIndexSequence &inputs,
- const OperandIndexSequence &outputs);
- explicit Operation(OperandConstraint input_constr);
-
- Operation(const Operation &) = delete;
- Operation(Operation &&) = default;
- Operation &operator=(const Operation &) = delete;
- Operation &operator=(Operation &&) = default;
-
- virtual ~Operation();
-
-public:
- virtual void accept(OperationVisitor &v) const = 0;
- virtual std::string name() const { return std::string{toString(opcode())}; }
- virtual OpCode opcode() const = 0;
-
-public:
- void replaceInput(const OperandIndex &from, const OperandIndex &to);
- void replaceOutput(const OperandIndex &from, const OperandIndex &to);
- const OperandIndexSequence &getInputs() const { return _inputs; }
- const OperandIndexSequence &getOutputs() const { return _outputs; }
- // It's for only input/output tensors but const data.
- void setInputs(const OperandIndexSequence &indexes);
- void setOutputs(const OperandIndexSequence &indexes);
-
-private:
- OperandConstraint _input_constr;
- OperandIndexSequence _inputs;
- OperandIndexSequence _outputs;
-};
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_H__
diff --git a/runtime/neurun/core/include/ir/OperationIndexList.h b/runtime/neurun/core/include/ir/OperationIndexList.h
deleted file mode 100644
index bf3ea3d6b..000000000
--- a/runtime/neurun/core/include/ir/OperationIndexList.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_MODEL_OPERATION_INDEX_LIST_H__
-#define __NEURUN_MODEL_OPERATION_INDEX_LIST_H__
-
-#include <algorithm>
-#include <cassert>
-#include <initializer_list>
-#include <list>
-
-#include "ir/Index.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-class OperationIndexList
-{
-public:
- OperationIndexList(void) = default;
- OperationIndexList(std::initializer_list<OperationIndex> list);
-
-public:
- void append(const OperationIndex &index) { _list.push_back(index); }
- void remove(const OperationIndex &index)
- {
- auto itr = std::find(_list.begin(), _list.end(), index);
- assert(itr != _list.end());
- _list.erase(itr);
- }
-
-public:
- uint32_t size() const { return static_cast<uint32_t>(_list.size()); }
- const std::list<OperationIndex> &list() const { return _list; }
- bool contains(const OperationIndex &index) const;
-
-private:
- std::list<OperationIndex> _list;
-};
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_MODEL_OPERATION_INDEX_LIST_H__
diff --git a/runtime/neurun/core/include/ir/OperationIndexMap.h b/runtime/neurun/core/include/ir/OperationIndexMap.h
deleted file mode 100644
index 50b1db527..000000000
--- a/runtime/neurun/core/include/ir/OperationIndexMap.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_INDEX_MAP_H__
-#define __NEURUN_IR_OPERATION_INDEX_MAP_H__
-
-#include <unordered_map>
-
-#include "ir/Index.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-template <typename T> using OperationIndexMap = std::unordered_map<OperationIndex, T>;
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_INDEX_MAP_H__
diff --git a/runtime/neurun/core/include/ir/OperationVisitor.h b/runtime/neurun/core/include/ir/OperationVisitor.h
deleted file mode 100644
index 0eb6de2d3..000000000
--- a/runtime/neurun/core/include/ir/OperationVisitor.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_VISITOR_H__
-#define __NEURUN_IR_OPERATION_VISITOR_H__
-
-#include "ir/Operations.Include.h"
-#include "ir/OpSequence.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-struct OperationVisitor
-{
- virtual ~OperationVisitor() = default;
-
-#define OP(InternalName) \
- virtual void visit(const operation::InternalName &) {}
-#include "ir/Operations.lst"
-#undef OP
-
- // This OpSequence node should be handled specially so that
- // Op.lst doesn't have OpSequence
- // TODO Remove by pushing it down to derived classes.
- virtual void visit(const OpSequence &op_seq)
- {
- for (const auto &e : op_seq.operations())
- {
- e.node->accept(*this);
- }
- }
-};
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_VISITOR_H__
diff --git a/runtime/neurun/core/include/ir/Operations.Include.h b/runtime/neurun/core/include/ir/Operations.Include.h
deleted file mode 100644
index e14e18cc1..000000000
--- a/runtime/neurun/core/include/ir/Operations.Include.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// This file has no ifdef guard intentionally
-
-#include "ir/operation/BatchToSpaceND.h"
-#include "ir/operation/Conv2D.h"
-#include "ir/operation/MaxPool2D.h"
-#include "ir/operation/AvgPool2D.h"
-#include "ir/operation/Concat.h"
-#include "ir/operation/Reshape.h"
-#include "ir/operation/FullyConnected.h"
-#include "ir/operation/Softmax.h"
-#include "ir/operation/Transpose.h"
-#include "ir/operation/Permute.h"
-#include "ir/operation/ReduceSum.h"
-#include "ir/operation/Add.h"
-#include "ir/operation/Sub.h"
-#include "ir/operation/DepthwiseConv2D.h"
-#include "ir/operation/Slice.h"
-#include "ir/operation/StridedSlice.h"
-#include "ir/operation/Mul.h"
-#include "ir/operation/Squeeze.h"
-#include "ir/operation/Tanh.h"
-#include "ir/operation/Logistic.h"
-#include "ir/operation/Cast.h"
-#include "ir/operation/Div.h"
-#include "ir/operation/Exp.h"
-#include "ir/operation/ReduceMax.h"
-#include "ir/operation/Comparison.h"
-#include "ir/operation/LogicalAnd.h"
-#include "ir/operation/LogicalOr.h"
-#include "ir/operation/LogicalNot.h"
-#include "ir/operation/LSTM.h"
-#include "ir/operation/RSQRT.h"
-#include "ir/operation/ReLU.h"
-#include "ir/operation/ResizeBilinear.h"
-#include "ir/operation/ReLU1.h"
-#include "ir/operation/ReLU6.h"
-#include "ir/operation/RNN.h"
-#include "ir/operation/Floor.h"
-#include "ir/operation/SpaceToBatchND.h"
-#include "ir/operation/SpaceToDepth.h"
-#include "ir/operation/L2Pool2D.h"
-#include "ir/operation/EmbeddingLookup.h"
-#include "ir/operation/L2Normalization.h"
-#include "ir/operation/HashtableLookup.h"
-#include "ir/operation/InstanceNorm.h"
-#include "ir/operation/PReLU.h"
-#include "ir/operation/TransposeConv.h"
-#include "ir/operation/SQRT.h"
-#include "ir/operation/SquaredDifference.h"
-#include "ir/operation/TopKV2.h"
-#include "ir/operation/Gather.h"
-#include "ir/operation/Neg.h"
-#include "ir/operation/Abs.h"
-#include "ir/operation/ArgMax.h"
-#include "ir/operation/Dequantize.h"
-#include "ir/operation/Mean.h"
-#include "ir/operation/LocalResponseNormalization.h"
-#include "ir/operation/DepthToSpace.h"
-#include "ir/operation/Pack.h"
-#include "ir/operation/ReduceMin.h"
-#include "ir/operation/Split.h"
-#include "ir/operation/Unpack.h"
-#include "ir/operation/Pad.h"
-#include "ir/operation/Min.h"
-#include "ir/operation/Max.h"
-#include "ir/operation/Custom.h"
-#include "ir/operation/OneHot.h"
diff --git a/runtime/neurun/core/include/ir/Operations.h b/runtime/neurun/core/include/ir/Operations.h
deleted file mode 100644
index 9e2aecb43..000000000
--- a/runtime/neurun/core/include/ir/Operations.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATIONS_H__
-#define __NEURUN_IR_OPERATIONS_H__
-
-#include "ir/Index.h"
-#include "ir/Operation.h"
-#include "util/ObjectManager.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-class Operations : public util::ObjectManager<OperationIndex, Operation>
-{
-};
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_MODEL_OPERATION_MANAGER_H__
diff --git a/runtime/neurun/core/include/ir/Operations.lst b/runtime/neurun/core/include/ir/Operations.lst
deleted file mode 100644
index 8c02857d9..000000000
--- a/runtime/neurun/core/include/ir/Operations.lst
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef OP
-#error Define OP before including this file
-#endif
-
-// Internal Name
-OP(Add)
-OP(Sub)
-OP(BatchToSpaceND)
-OP(Cast)
-OP(Conv2D)
-OP(DepthwiseConv2D)
-OP(AvgPool2D)
-OP(MaxPool2D)
-OP(Concat)
-OP(FullyConnected)
-OP(ReduceSum)
-OP(Reshape)
-OP(Mul)
-OP(Softmax)
-OP(Squeeze)
-OP(Slice)
-OP(StridedSlice)
-OP(Tanh)
-OP(Logistic)
-OP(Div)
-OP(Transpose)
-OP(Exp)
-OP(ReduceMax)
-OP(Comparison)
-OP(LogicalAnd)
-OP(LogicalOr)
-OP(LogicalNot)
-OP(LSTM)
-OP(RSQRT)
-OP(ReLU)
-OP(ResizeBilinear)
-OP(ReLU1)
-OP(ReLU6)
-OP(RNN)
-OP(Floor)
-OP(SpaceToBatchND)
-OP(SpaceToDepth)
-OP(L2Pool2D)
-OP(EmbeddingLookup)
-OP(L2Normalization)
-OP(HashtableLookup)
-OP(InstanceNorm)
-OP(PReLU)
-OP(TransposeConv)
-OP(SQRT)
-OP(SquaredDifference)
-OP(TopKV2)
-OP(Gather)
-OP(Neg)
-OP(Abs)
-OP(ArgMax)
-OP(Dequantize)
-OP(Mean)
-OP(LocalResponseNormalization)
-OP(DepthToSpace)
-OP(Pack)
-OP(ReduceMin)
-OP(Split)
-OP(Unpack)
-OP(Pad)
-OP(Custom)
-OP(Permute)
-OP(Min)
-OP(Max)
-OP(OneHot)
diff --git a/runtime/neurun/core/include/ir/Shape.h b/runtime/neurun/core/include/ir/Shape.h
deleted file mode 100644
index a58af38ad..000000000
--- a/runtime/neurun/core/include/ir/Shape.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_SHAPE_H__
-#define __NEURUN_IR_SHAPE_H__
-
-#include "ir/Layout.h"
-#include "misc/feature/Shape.h"
-
-#include <cstdint>
-#include <vector>
-
-namespace neurun
-{
-namespace ir
-{
-
-// TODO Remove this dependency.
-using FeatureShape = nnfw::misc::feature::Shape;
-
-struct Shape
-{
-public:
- Shape() = default;
-
- explicit Shape(int rank) : _dimensions(rank) {}
-
- Shape(std::initializer_list<int32_t> dimensions) : _dimensions(dimensions) {}
-
- int rank() const { return _dimensions.size(); }
-
- const std::vector<int32_t> &dims() const { return _dimensions; }
-
- int32_t dim(int i) const { return _dimensions.at(i); }
-
- int32_t &dim(int i) { return _dimensions.at(i); }
-
- uint64_t num_elements() const;
-
-public:
- FeatureShape asFeature(Layout layout) const;
-
- /**
- * @brief Add dimension to the beginning
- * @param[in] d dimension to add to the beginning
- */
- void prepend(int32_t d) { _dimensions.insert(_dimensions.cbegin(), d); }
-
- /**
- * @brief Add dimension to the end
- * @param[in] d dimension to add to the end
- */
- void append(int32_t d) { _dimensions.emplace_back(d); }
-
- /**
- * @brief Extend rank of Shape object for operand with param.
- * @param[in] to_rank The rank value to be extended to
- */
- void extendRank(int to_rank);
-
-private:
- std::vector<int32_t> _dimensions;
-};
-
-inline bool operator==(const Shape &lhs, const Shape &rhs) { return lhs.dims() == rhs.dims(); }
-inline bool operator!=(const Shape &lhs, const Shape &rhs) { return lhs.dims() != rhs.dims(); }
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_SHAPE_H__
diff --git a/runtime/neurun/core/include/ir/Subgraphs.h b/runtime/neurun/core/include/ir/Subgraphs.h
deleted file mode 100644
index 716f09bcf..000000000
--- a/runtime/neurun/core/include/ir/Subgraphs.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_SUBGRAPHS_H__
-#define __NEURUN_IR_SUBGRAPHS_H__
-
-#include "ir/Index.h"
-#include "ir/OpSequence.h"
-#include "util/ObjectManager.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-/**
- * @brief Class that manages OpSequence objects
- */
-class Subgraphs : public util::ObjectManager<SubgraphIndex, OpSequence>
-{
-public:
- /**
- * @brief Create an instance of OpSequence with given op and push it to objects
- *
- * @param[in] op_idx Operation index that is emplaced
- * @param[in] op Operation that is emplaced
- * @param[in] layout OpSequence's layout
- * @return SubgraphIndex
- */
- SubgraphIndex emplace(const OperationIndex &op_index, const Operation &op, Layout layout);
-
- /**
- * @brief Push an instance of OpSequence to objects
- *
- * @param[in] subg An instance of OpSequence
- * @return SubgraphIndex
- */
- SubgraphIndex emplace(std::unique_ptr<OpSequence> &&subg);
-
- /**
- * @brief Check if an operation does exist in any subgraphs
- *
- * @param operation_index Operation index to find
- * @return true If such operation exists in any subgraphs otherwise false
- */
- bool containsOperation(const OperationIndex &operation_index) const;
- /**
- * @brief Find an operation from all subgraphs
- *
- * @param operation_index Operation index to find
- * @return SubgraphIndex Index of OpSequence that contains given operation index
- */
- SubgraphIndex getOperation(const OperationIndex &operation_index) const;
- /**
- * @brief Dump subgraphs
- *
- * @param msg Message that will be displayed
- */
- void dump(const std::string &msg) const;
- /**
- * @brief Remove an operation from OpSequence
- *
- * @param operation_index Operation index to be removed
- */
- void removeFromSubgraph(const OperationIndex &operation_index);
-
-private:
- SubgraphIndex findOperation(const OperationIndex &operation_index) const;
-};
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_SUBGRAPHS_H__
diff --git a/runtime/neurun/core/include/ir/TypeInfo.h b/runtime/neurun/core/include/ir/TypeInfo.h
deleted file mode 100644
index 5b35046bb..000000000
--- a/runtime/neurun/core/include/ir/TypeInfo.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_TYPEINFO_H__
-#define __NEURUN_IR_TYPEINFO_H__
-
-#include <cstdint>
-
-#include "ir/DataType.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-class TypeInfo
-{
-public:
- TypeInfo() = delete;
-
- explicit TypeInfo(DataType type, float scale = 0, int32_t offset = 0)
- : _type(type), _scale(scale), _offset(offset)
- {
- }
-
-public:
- DataType type() const { return _type; }
- float scale() const { return _scale; }
- int32_t offset() const { return _offset; }
-
-public:
- void type(const DataType type) { _type = type; }
-
-private:
- DataType _type;
- float _scale;
- int32_t _offset;
-};
-
-bool operator==(const TypeInfo &lhs, const TypeInfo &rhs);
-bool operator!=(const TypeInfo &lhs, const TypeInfo &rhs);
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_TYPEINFO_H__
diff --git a/runtime/neurun/core/include/ir/operand/LowerInfo.h b/runtime/neurun/core/include/ir/operand/LowerInfo.h
deleted file mode 100644
index e0fb2c5c8..000000000
--- a/runtime/neurun/core/include/ir/operand/LowerInfo.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERAND_LOWER_INFO_H__
-#define __NEURUN_IR_OPERAND_LOWER_INFO_H__
-
-#include <functional>
-#include <stdint.h>
-
-#include "ir/operand/PermuteFactor.h"
-#include "util/Set.h"
-
-namespace neurun
-{
-namespace backend
-{
-class Backend;
-} // namespace backend
-} // namespace neurun
-
-namespace neurun
-{
-namespace ir
-{
-namespace operand
-{
-using PermuteFactorSet = util::Set<PermuteFactor>;
-
-class LowerInfo
-{
-public:
- class Shape4D
- {
- public:
- Shape4D(uint32_t n, uint32_t h, uint32_t w, uint32_t c) : _n{n}, _h{h}, _w{w}, _c{c}
- {
- // DO NOTHING
- }
-
- public:
- uint32_t n(void) const { return _n; }
- uint32_t h(void) const { return _h; }
- uint32_t w(void) const { return _w; }
- uint32_t c(void) const { return _c; }
-
- private:
- uint32_t _n;
- uint32_t _h;
- uint32_t _w;
- uint32_t _c;
- };
-
-public:
- LowerInfo(const Shape4D &shape) : _shape{shape}
- {
- // DO NOTHING
- }
-
-public:
- const Shape4D &shape(void) const { return _shape; }
- const PermuteFactorSet &def_factors(void) const { return _def_factors; }
- const PermuteFactorSet &use_factors(void) const { return _use_factors; }
-
-public:
- void addDefPermuteFactor(const PermuteFactor &factor) { _def_factors.add(factor); }
- void addUsePermuteFactor(const PermuteFactor &factor) { _use_factors.add(factor); }
- void removeDefPermuteFactor(const PermuteFactor &factor) { _def_factors.remove(factor); }
- void removeUsePermuteFactor(const PermuteFactor &factor) { _use_factors.remove(factor); }
-
-private:
- Shape4D _shape;
- PermuteFactorSet _def_factors;
- PermuteFactorSet _use_factors;
-};
-
-} // namespace operand
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERAND_LOWER_INFO_H__
diff --git a/runtime/neurun/core/include/ir/operand/ParentInfo.h b/runtime/neurun/core/include/ir/operand/ParentInfo.h
deleted file mode 100644
index 92dac2b63..000000000
--- a/runtime/neurun/core/include/ir/operand/ParentInfo.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file ParentInfo.h
- * @brief This file contains ParentInfo class and internal Coordinate4D class
- * to represent subsumption between operand
- */
-
-#ifndef __NEURUN_IR_OPERAND_PARENT_INFO_H__
-#define __NEURUN_IR_OPERAND_PARENT_INFO_H__
-
-#include <stdint.h>
-
-#include "ir/Index.h"
-#include "util/Coordinates.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operand
-{
-
-/**
- * @brief Class to represent parent operand in child operand
- */
-class ParentInfo
-{
-public:
- /**
- * @brief Construct a new ParentInfo object
- * @param[in] parent Index of parent operand
- * @param[in] coordinate Offset of child operand in parent operand
- * @return
- */
- ParentInfo(const OperandIndex parent, const util::Coordinates &coordinate)
- : _parent{parent}, _coordinate{coordinate}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Return parent index
- * @return Parent index
- */
- OperandIndex parent(void) const { return _parent; }
- /**
- * @brief Retern offset in parent
- * @return Offset
- */
- util::Coordinates offset(void) const { return _coordinate; }
-
-private:
- OperandIndex _parent;
- util::Coordinates _coordinate;
-};
-
-} // namespace operand
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERAND_PARENT_INFO_H__
diff --git a/runtime/neurun/core/include/ir/operand/PermuteFactor.h b/runtime/neurun/core/include/ir/operand/PermuteFactor.h
deleted file mode 100644
index 60d926b2d..000000000
--- a/runtime/neurun/core/include/ir/operand/PermuteFactor.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file PermuteFactor.h
- * @brief This file contains neurun::ir::operand::PermuteFactor class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NEURUN_IR_OPERAND_PERMUTE_FACTOR_H__
-#define __NEURUN_IR_OPERAND_PERMUTE_FACTOR_H__
-
-#include <functional>
-
-#include "ir/Layout.h"
-
-namespace neurun
-{
-namespace backend
-{
-class Backend;
-} // namespace backend
-} // namespace neurun
-
-namespace neurun
-{
-namespace ir
-{
-namespace operand
-{
-
-/**
- * @brief Class that has factors of permutation
- */
-class PermuteFactor
-{
-public:
- /**
- * @brief Construct PermuteFactor object.
- * @param backend The backend factor
- * @param backend The layout factor
- */
- PermuteFactor(const backend::Backend *backend, Layout layout) : _backend{backend}, _layout{layout}
- {
- // DO NOTHING
- }
- /**
- * @brief Construct PermuteFactor object by copy semantics.
- */
- PermuteFactor(const PermuteFactor &f) : _backend{f._backend}, _layout{f._layout}
- {
- // DO NOTHING
- }
- /**
- * @brief Construct PermuteFactor object by move semantics.
- */
- PermuteFactor(PermuteFactor &&) = default;
-
-public:
- /**
- * @brief Get backend
- *
- * @return Backend factor
- */
- const backend::Backend *backend() const { return _backend; }
- /**
- * @brief Get layout
- *
- * @return Layout factor
- */
- Layout layout() const { return _layout; }
-
-public:
- /**
- * @brief operator overloading function for `==`
- *
- * @return Whether two PermuteFactor are the same
- */
- bool operator==(const PermuteFactor &other) const
- {
- return _backend == other.backend() && _layout == other.layout();
- }
- /**
- * @brief operator overloading function for `!=`
- *
- * @return Whether two PermuteFactor are differenct
- */
- bool operator!=(const PermuteFactor &other) const { return !(*this == other); }
-
-private:
- const backend::Backend *_backend{nullptr};
- Layout _layout{Layout::UNKNOWN};
-};
-
-} // namespace operand
-} // namespace ir
-} // namespace neurun
-
-namespace std
-{
-
-/**
- * @brief Structure that provides hash value of PermuteFactor
- */
-template <> struct hash<neurun::ir::operand::PermuteFactor>
-{
- size_t operator()(const neurun::ir::operand::PermuteFactor &factor) const noexcept
- {
- hash<const neurun::backend::Backend *> b_hash{};
- hash<neurun::ir::Layout> l_hash{};
- return b_hash(factor.backend()) ^ (l_hash(factor.layout()) << 1);
- }
-};
-
-} // namespace std
-
-#endif // __NEURUN_IR_OPERAND_PERMUTE_FACTOR_H__
diff --git a/runtime/neurun/core/include/ir/operation/Abs.h b/runtime/neurun/core/include/ir/operation/Abs.h
deleted file mode 100644
index 97293823b..000000000
--- a/runtime/neurun/core/include/ir/operation/Abs.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_ABS_H__
-#define __NEURUN_IR_OPERATION_ABS_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Abs : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Abs(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Abs; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_ABS_H__
diff --git a/runtime/neurun/core/include/ir/operation/Add.h b/runtime/neurun/core/include/ir/operation/Add.h
deleted file mode 100644
index fc4d6a7e7..000000000
--- a/runtime/neurun/core/include/ir/operation/Add.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_ADD_H__
-#define __NEURUN_IR_OPERATION_ADD_H__
-
-#include "ir/Operation.h"
-#include "ir/InternalType.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Add : public Operation
-{
-public:
- enum Input
- {
- LHS = 0,
- RHS
- };
-
- struct Param
- {
- Activation activation;
- };
-
-public:
- Add(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Add; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_ADD_H__
diff --git a/runtime/neurun/core/include/ir/operation/ArgMax.h b/runtime/neurun/core/include/ir/operation/ArgMax.h
deleted file mode 100644
index 23f52710f..000000000
--- a/runtime/neurun/core/include/ir/operation/ArgMax.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_ARG_MAX_H__
-#define __NEURUN_IR_OPERATION_ARG_MAX_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class ArgMax : public Operation
-{
-public:
- enum Input
- {
- INPUT
- };
-
- struct Param
- {
- int axis;
- int rank;
- };
-
-public:
- ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ArgMax; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_ARG_MAX_H__
diff --git a/runtime/neurun/core/include/ir/operation/AvgPool2D.h b/runtime/neurun/core/include/ir/operation/AvgPool2D.h
deleted file mode 100644
index a03628184..000000000
--- a/runtime/neurun/core/include/ir/operation/AvgPool2D.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_AVGPOOL2D_H__
-#define __NEURUN_IR_OPERATION_AVGPOOL2D_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-#include "ir/InternalType.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class AvgPool2D : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
- struct Param
- {
- uint32_t kh;
- uint32_t kw;
-
- Stride stride;
- Padding padding;
- Activation activation;
- };
-
-public:
- AvgPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::AvgPool2D; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_AVGPOOL2D_H__
diff --git a/runtime/neurun/core/include/ir/operation/BatchToSpaceND.h b/runtime/neurun/core/include/ir/operation/BatchToSpaceND.h
deleted file mode 100644
index b90d2871d..000000000
--- a/runtime/neurun/core/include/ir/operation/BatchToSpaceND.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_BATCH_TO_SPACE_ND_H__
-#define __NEURUN_IR_OPERATION_BATCH_TO_SPACE_ND_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class BatchToSpaceND : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- BLOCK_SIZE = 1
- };
-
-public:
- BatchToSpaceND(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::BatchToSpaceND; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_BATCH_TO_SPACE_ND_H__
diff --git a/runtime/neurun/core/include/ir/operation/Cast.h b/runtime/neurun/core/include/ir/operation/Cast.h
deleted file mode 100644
index a71087dd0..000000000
--- a/runtime/neurun/core/include/ir/operation/Cast.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_CAST_H__
-#define __NEURUN_IR_OPERATION_CAST_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Cast : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Cast(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Cast; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_CAST_H__
diff --git a/runtime/neurun/core/include/ir/operation/Comparison.h b/runtime/neurun/core/include/ir/operation/Comparison.h
deleted file mode 100644
index 23c775c42..000000000
--- a/runtime/neurun/core/include/ir/operation/Comparison.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_COMPARISON_H__
-#define __NEURUN_IR_OPERATION_COMPARISON_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Comparison : public Operation
-{
-public:
- enum Input
- {
- INPUT0 = 0,
- INPUT1
- };
-
- enum class ComparisonType
- {
- Equal,
- NotEqual,
- Greater,
- GreaterEqual,
- Less,
- LessEqual
- };
-
- struct Param
- {
- ComparisonType comparison_type;
- };
-
-public:
- Comparison(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Comparison; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_COMPARISON_H__
diff --git a/runtime/neurun/core/include/ir/operation/Concat.h b/runtime/neurun/core/include/ir/operation/Concat.h
deleted file mode 100644
index 8628ed398..000000000
--- a/runtime/neurun/core/include/ir/operation/Concat.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_CONCAT_H__
-#define __NEURUN_IR_OPERATION_CONCAT_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Concat : public Operation
-{
-public:
- struct Param
- {
- int32_t axis;
- int32_t rank;
- };
-
-public:
- Concat(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Concat; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_CONCAT_H__
diff --git a/runtime/neurun/core/include/ir/operation/Conv2D.h b/runtime/neurun/core/include/ir/operation/Conv2D.h
deleted file mode 100644
index 1215666e9..000000000
--- a/runtime/neurun/core/include/ir/operation/Conv2D.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_CONV2D_H__
-#define __NEURUN_IR_OPERATION_CONV2D_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-#include "ir/InternalType.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Conv2D : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- KERNEL,
- BIAS
- };
-
- struct Param
- {
- Stride stride;
- Padding padding;
- Activation activation;
- };
-
-public:
- Conv2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Conv2D; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_CONV2D_H__
diff --git a/runtime/neurun/core/include/ir/operation/Custom.h b/runtime/neurun/core/include/ir/operation/Custom.h
deleted file mode 100644
index 03501e8ef..000000000
--- a/runtime/neurun/core/include/ir/operation/Custom.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __NEURUN_IR_OPERATION_CUSTOM_H__
-#define __NEURUN_IR_OPERATION_CUSTOM_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Custom : public Operation
-{
-public:
- struct Userdata
- {
- char *data;
- size_t size;
- };
-
- Custom(OperandConstraint input_constr, const OperandIndexSequence &inputs,
- const OperandIndexSequence &outputs, std::string id, const Userdata &userdata);
-
- void accept(OperationVisitor &v) const override;
-
-public:
- /**
- * @return unique operation identifier
- */
- const std::string &id() const;
-
- std::string name() const override;
- OpCode opcode() const final { return OpCode::Custom; }
-
- /**
- * @return user-provided data
- */
- const Userdata &userdata() const;
-
- ~Custom() override;
-
-private:
- std::string _id;
- Userdata _userdata;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-#endif // __NEURUN_IR_OPERATION_CUSTOM_H__
diff --git a/runtime/neurun/core/include/ir/operation/DepthToSpace.h b/runtime/neurun/core/include/ir/operation/DepthToSpace.h
deleted file mode 100644
index 6e7aaf249..000000000
--- a/runtime/neurun/core/include/ir/operation/DepthToSpace.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_DEPTH_TO_SPACE_H__
-#define __NEURUN_IR_OPERATION_DEPTH_TO_SPACE_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class DepthToSpace : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
- struct Param
- {
- std::int32_t block_size;
- };
-
-public:
- DepthToSpace(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::DepthToSpace; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_DEPTH_TO_SPACE_H__
diff --git a/runtime/neurun/core/include/ir/operation/DepthwiseConv2D.h b/runtime/neurun/core/include/ir/operation/DepthwiseConv2D.h
deleted file mode 100644
index 1f0926fa8..000000000
--- a/runtime/neurun/core/include/ir/operation/DepthwiseConv2D.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_DEPTHWISECONV2D_H__
-#define __NEURUN_IR_OPERATION_DEPTHWISECONV2D_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-#include "ir/InternalType.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class DepthwiseConv2D : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- KERNEL,
- BIAS
- };
-
- struct Param
- {
- Stride stride;
- Padding padding;
- uint32_t multiplier;
- Activation activation;
- };
-
-public:
- DepthwiseConv2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::DepthwiseConv2D; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_DEPTHWISECONV2D_H__
diff --git a/runtime/neurun/core/include/ir/operation/Dequantize.h b/runtime/neurun/core/include/ir/operation/Dequantize.h
deleted file mode 100644
index dfca278cd..000000000
--- a/runtime/neurun/core/include/ir/operation/Dequantize.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_DEQUANTIZE_H__
-#define __NEURUN_IR_OPERATION_DEQUANTIZE_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Dequantize : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Dequantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Dequantize; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_DEQUANTIZE_H__
diff --git a/runtime/neurun/core/include/ir/operation/Div.h b/runtime/neurun/core/include/ir/operation/Div.h
deleted file mode 100644
index d3e744472..000000000
--- a/runtime/neurun/core/include/ir/operation/Div.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_DIV_H__
-#define __NEURUN_IR_OPERATION_DIV_H__
-
-#include "ir/Operation.h"
-#include "ir/InternalType.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Div : public Operation
-{
-public:
- enum Input
- {
- LHS = 0,
- RHS
- };
-
- struct Param
- {
- Activation activation;
- };
-
-public:
- Div(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Div; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_DIV_H__
diff --git a/runtime/neurun/core/include/ir/operation/EmbeddingLookup.h b/runtime/neurun/core/include/ir/operation/EmbeddingLookup.h
deleted file mode 100644
index 968b7b35a..000000000
--- a/runtime/neurun/core/include/ir/operation/EmbeddingLookup.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_EMBEDDING_LOOKUP_H__
-#define __NEURUN_IR_OPERATION_EMBEDDING_LOOKUP_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class EmbeddingLookup : public Operation
-{
-public:
- enum Input
- {
- LOOKUPS = 0,
- VALUES = 1
- };
-
-public:
- EmbeddingLookup(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::EmbeddingLookup; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_EMBEDDING_LOOKUP_H__
diff --git a/runtime/neurun/core/include/ir/operation/Exp.h b/runtime/neurun/core/include/ir/operation/Exp.h
deleted file mode 100644
index 8e04f3f7f..000000000
--- a/runtime/neurun/core/include/ir/operation/Exp.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_EXP_H__
-#define __NEURUN_IR_OPERATION_EXP_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Exp : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Exp(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Exp; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_EXP_H__
diff --git a/runtime/neurun/core/include/ir/operation/Floor.h b/runtime/neurun/core/include/ir/operation/Floor.h
deleted file mode 100644
index ca4cf9881..000000000
--- a/runtime/neurun/core/include/ir/operation/Floor.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_FLOOR_H__
-#define __NEURUN_IR_OPERATION_FLOOR_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Floor : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Floor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Floor; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_FLOOR_H__
diff --git a/runtime/neurun/core/include/ir/operation/FullyConnected.h b/runtime/neurun/core/include/ir/operation/FullyConnected.h
deleted file mode 100644
index 1ffa1318d..000000000
--- a/runtime/neurun/core/include/ir/operation/FullyConnected.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_FULLYCONNECTED_H__
-#define __NEURUN_IR_OPERATION_FULLYCONNECTED_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-#include "ir/InternalType.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class FullyConnected : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- WEIGHT,
- BIAS
- };
-
- struct Param
- {
- Activation activation;
- };
-
-public:
- FullyConnected(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::FullyConnected; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_FULLYCONNECTED_H__
diff --git a/runtime/neurun/core/include/ir/operation/Gather.h b/runtime/neurun/core/include/ir/operation/Gather.h
deleted file mode 100644
index 13540d413..000000000
--- a/runtime/neurun/core/include/ir/operation/Gather.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_GATHER_H__
-#define __NEURUN_IR_OPERATION_GATHER_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Gather : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- INDICES,
- };
-
- struct Param
- {
- int32_t axis;
- int32_t rank;
- };
-
-public:
- Gather(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Gather; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_GATHER_H__
diff --git a/runtime/neurun/core/include/ir/operation/HashtableLookup.h b/runtime/neurun/core/include/ir/operation/HashtableLookup.h
deleted file mode 100644
index fb6c97607..000000000
--- a/runtime/neurun/core/include/ir/operation/HashtableLookup.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_HASHTABLE_LOOKUP_H__
-#define __NEURUN_IR_OPERATION_HASHTABLE_LOOKUP_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class HashtableLookup : public Operation
-{
-public:
- enum Input
- {
- LOOKUPS = 0,
- KEYS = 1,
- VALUES = 2
- };
-
- enum Output
- {
- OUTPUT = 0,
- HITS = 1
- };
-
-public:
- HashtableLookup(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::HashtableLookup; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_HASHTABLE_LOOKUP_H__
diff --git a/runtime/neurun/core/include/ir/operation/InstanceNorm.h b/runtime/neurun/core/include/ir/operation/InstanceNorm.h
deleted file mode 100644
index cbd03ad1f..000000000
--- a/runtime/neurun/core/include/ir/operation/InstanceNorm.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_INSTANCE_NORM_H__
-#define __NEURUN_IR_OPERATION_INSTANCE_NORM_H__
-
-#include "ir/Operation.h"
-#include "ir/InternalType.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class InstanceNorm : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- GAMMA,
- BETA
- };
-
- struct Param
- {
- Activation activation;
- float epsilon;
- };
-
-public:
- InstanceNorm(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::InstanceNorm; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_INSTANCE_NORM_H__
diff --git a/runtime/neurun/core/include/ir/operation/L2Normalization.h b/runtime/neurun/core/include/ir/operation/L2Normalization.h
deleted file mode 100644
index e2c1f4eee..000000000
--- a/runtime/neurun/core/include/ir/operation/L2Normalization.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_L2_NORMALIZATION_H__
-#define __NEURUN_IR_OPERATION_L2_NORMALIZATION_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class L2Normalization : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- struct Param
- {
- int32_t rank;
- };
-
-public:
- L2Normalization(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::L2Normalization; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_L2_NORMALIZATION_H__
diff --git a/runtime/neurun/core/include/ir/operation/L2Pool2D.h b/runtime/neurun/core/include/ir/operation/L2Pool2D.h
deleted file mode 100644
index 6d4d72ee2..000000000
--- a/runtime/neurun/core/include/ir/operation/L2Pool2D.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_L2_POOL_2D_H__
-#define __NEURUN_IR_OPERATION_L2_POOL_2D_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-#include "ir/InternalType.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class L2Pool2D : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- };
-
- struct Param
- {
- Padding padding;
- Stride stride;
- uint32_t kw;
- uint32_t kh;
- Activation activation;
- };
-
-public:
- L2Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::L2Pool2D; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_L2_POOL_2D_H__
diff --git a/runtime/neurun/core/include/ir/operation/LSTM.h b/runtime/neurun/core/include/ir/operation/LSTM.h
deleted file mode 100644
index 2ea09b1b7..000000000
--- a/runtime/neurun/core/include/ir/operation/LSTM.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __NEURUN_IR_OPERATION_LSTM_H__
-#define __NEURUN_IR_OPERATION_LSTM_H__
-
-#include "ir/InternalType.h"
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class LSTM : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- INPUT_TO_INPUT_WEIGHTS = 1,
- INPUT_TO_FORGET_WEIGHTS = 2,
- INPUT_TO_CELL_WEIGHTS = 3,
- INPUT_TO_OUTPUT_WEIGHTS = 4,
- RECURRENT_TO_INPUT_WEIGHTS = 5,
- RECURRENT_TO_FORGET_WEIGHTS = 6,
- RECURRENT_TO_CELL_WEIGHTS = 7,
- RECURRENT_TO_OUTPUT_WEIGHTS = 8,
- CELL_TO_INPUT_WEIGHTS = 9,
- CELL_TO_FORGET_WEIGHTS = 10,
- CELL_TO_OUTPUT_WEIGHTS = 11,
- INPUT_GATE_BIAS = 12,
- FORGET_GATE_BIAS = 13,
- CELL_BIAS = 14,
- OUTPUT_GATE_BIAS = 15,
- PROJECTION_WEIGHTS = 16,
- PROJECTION_BIAS = 17,
- OUTPUT_STATE_IN = 18,
- CELL_STATE_IN = 19,
- };
-
- enum Output
- {
- SCRATCH_BUFFER = 0,
- OUTPUT_STATE_OUT = 1,
- CELL_STATE_OUT = 2,
- OUTPUT = 3
- };
-
- struct Param
- {
- Activation activation;
- float cell_threshold;
- float projection_threshold;
- };
-
-public:
- LSTM(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::LSTM; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_LSTM_H__
diff --git a/runtime/neurun/core/include/ir/operation/LocalResponseNormalization.h b/runtime/neurun/core/include/ir/operation/LocalResponseNormalization.h
deleted file mode 100644
index 3fbf2e4ae..000000000
--- a/runtime/neurun/core/include/ir/operation/LocalResponseNormalization.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_LOCAL_RESPONSE_NORMALIZATION_H__
-#define __NEURUN_IR_OPERATION_LOCAL_RESPONSE_NORMALIZATION_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class LocalResponseNormalization : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
- struct Param
- {
- int radius;
- float bias;
- float alpha;
- float beta;
- };
-
-public:
- LocalResponseNormalization(const OperandIndexSequence &inputs,
- const OperandIndexSequence &outputs, const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::LocalResponseNormalization; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_LOCAL_RESPONSE_NORMALIZATION_H__
diff --git a/runtime/neurun/core/include/ir/operation/LogicalAnd.h b/runtime/neurun/core/include/ir/operation/LogicalAnd.h
deleted file mode 100644
index f7b03d62d..000000000
--- a/runtime/neurun/core/include/ir/operation/LogicalAnd.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_LOGICAL_AND_H__
-#define __NEURUN_IR_OPERATION_LOGICAL_AND_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class LogicalAnd : public Operation
-{
-public:
- enum Input
- {
- INPUT0 = 0,
- INPUT1 = 1,
- };
-
-public:
- LogicalAnd(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::LogicalAnd; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_LOGICAL_AND_H__
diff --git a/runtime/neurun/core/include/ir/operation/LogicalNot.h b/runtime/neurun/core/include/ir/operation/LogicalNot.h
deleted file mode 100644
index e689d57b2..000000000
--- a/runtime/neurun/core/include/ir/operation/LogicalNot.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_LOGICAL_NOT_H__
-#define __NEURUN_IR_OPERATION_LOGICAL_NOT_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class LogicalNot : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- };
-
-public:
- LogicalNot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::LogicalNot; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_LOGICAL_NOT_H__
diff --git a/runtime/neurun/core/include/ir/operation/LogicalOr.h b/runtime/neurun/core/include/ir/operation/LogicalOr.h
deleted file mode 100644
index fda6b20d7..000000000
--- a/runtime/neurun/core/include/ir/operation/LogicalOr.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_LOGICAL_OR_H__
-#define __NEURUN_IR_OPERATION_LOGICAL_OR_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class LogicalOr : public Operation
-{
-public:
- enum Input
- {
- INPUT0 = 0,
- INPUT1 = 1,
- };
-
-public:
- LogicalOr(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::LogicalOr; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_LOGICAL_OR_H__
diff --git a/runtime/neurun/core/include/ir/operation/Logistic.h b/runtime/neurun/core/include/ir/operation/Logistic.h
deleted file mode 100644
index b23e7ef3f..000000000
--- a/runtime/neurun/core/include/ir/operation/Logistic.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_LOGISTIC_H__
-#define __NEURUN_IR_OPERATION_LOGISTIC_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Logistic : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Logistic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Logistic; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_LOGISTIC_H__
diff --git a/runtime/neurun/core/include/ir/operation/LowerInfo.h b/runtime/neurun/core/include/ir/operation/LowerInfo.h
deleted file mode 100644
index 856976a0c..000000000
--- a/runtime/neurun/core/include/ir/operation/LowerInfo.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_LOWER_INFO_H__
-#define __NEURUN_IR_OPERATION_LOWER_INFO_H__
-
-#include <string>
-
-#include <ir/operand/PermuteFactor.h>
-
-namespace neurun
-{
-namespace backend
-{
-class Backend;
-} // namespace backend
-} // namespace neurun
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class LowerInfo
-{
-public:
- LowerInfo(const backend::Backend *backend, Layout layout);
- const backend::Backend *backend() const { return _permute_factor.backend(); }
- Layout layout() const { return _permute_factor.layout(); }
-
-private:
- operand::PermuteFactor _permute_factor;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_LOWER_INFO_H__
diff --git a/runtime/neurun/core/include/ir/operation/Max.h b/runtime/neurun/core/include/ir/operation/Max.h
deleted file mode 100644
index 1675f9f72..000000000
--- a/runtime/neurun/core/include/ir/operation/Max.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_MAX_H__
-#define __NEURUN_IR_OPERATION_MAX_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Max : public Operation
-{
-public:
- enum Input
- {
- LHS = 0,
- RHS
- };
-
-public:
- Max(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Max; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_MAX_H__
diff --git a/runtime/neurun/core/include/ir/operation/MaxPool2D.h b/runtime/neurun/core/include/ir/operation/MaxPool2D.h
deleted file mode 100644
index c0f0939aa..000000000
--- a/runtime/neurun/core/include/ir/operation/MaxPool2D.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_MAXPOOL2D_H__
-#define __NEURUN_IR_OPERATION_MAXPOOL2D_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-#include "ir/InternalType.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class MaxPool2D : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
- struct Param
- {
- uint32_t kh;
- uint32_t kw;
- Stride stride;
- Padding padding;
- Activation activation;
- };
-
-public:
- MaxPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::MaxPool2D; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_MAXPOOL2D_H__
diff --git a/runtime/neurun/core/include/ir/operation/Mean.h b/runtime/neurun/core/include/ir/operation/Mean.h
deleted file mode 100644
index cce8de377..000000000
--- a/runtime/neurun/core/include/ir/operation/Mean.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_MEAN_H__
-#define __NEURUN_IR_OPERATION_MEAN_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Mean : public Operation
-{
-public:
- enum Input
- {
- INPUT
- };
-
- struct Param
- {
- std::vector<int> axes;
- bool keep_dims;
- int32_t rank;
- };
-
-public:
- Mean(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Mean; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_MEAN_H__
diff --git a/runtime/neurun/core/include/ir/operation/Min.h b/runtime/neurun/core/include/ir/operation/Min.h
deleted file mode 100644
index fac901f21..000000000
--- a/runtime/neurun/core/include/ir/operation/Min.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_MIN_H__
-#define __NEURUN_IR_OPERATION_MIN_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Min : public Operation
-{
-public:
- enum Input
- {
- LHS = 0,
- RHS
- };
-
-public:
- Min(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Min; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_MIN_H__
diff --git a/runtime/neurun/core/include/ir/operation/Mul.h b/runtime/neurun/core/include/ir/operation/Mul.h
deleted file mode 100644
index f6cfe2afb..000000000
--- a/runtime/neurun/core/include/ir/operation/Mul.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_MUL_H__
-#define __NEURUN_IR_OPERATION_MUL_H__
-
-#include "ir/Operation.h"
-#include "ir/InternalType.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Mul : public Operation
-{
-public:
- enum Input
- {
- LHS = 0,
- RHS
- };
-
- struct Param
- {
- Activation activation;
- };
-
-public:
- Mul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Mul; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_MUL_H__
diff --git a/runtime/neurun/core/include/ir/operation/Neg.h b/runtime/neurun/core/include/ir/operation/Neg.h
deleted file mode 100644
index ec364f8ad..000000000
--- a/runtime/neurun/core/include/ir/operation/Neg.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_NEG_H__
-#define __NEURUN_IR_OPERATION_NEG_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Neg : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Neg(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Neg; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_NEG_H__
diff --git a/runtime/neurun/core/include/ir/operation/OneHot.h b/runtime/neurun/core/include/ir/operation/OneHot.h
deleted file mode 100644
index 5fbc5d45f..000000000
--- a/runtime/neurun/core/include/ir/operation/OneHot.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_ONEHOT_H__
-#define __NEURUN_IR_OPERATION_ONEHOT_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class OneHot : public Operation
-{
-public:
- enum Input
- {
- INDICES = 0,
- DEPTH = 1,
- ON_VALUE = 2,
- OFF_VALUE = 3,
- };
-
- struct Param
- {
- int axis;
- };
-
-public:
- OneHot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::OneHot; }
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_ONEHOT_H__
diff --git a/runtime/neurun/core/include/ir/operation/PReLU.h b/runtime/neurun/core/include/ir/operation/PReLU.h
deleted file mode 100644
index 8c00c46fd..000000000
--- a/runtime/neurun/core/include/ir/operation/PReLU.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_PRELU_H__
-#define __NEURUN_IR_OPERATION_PRELU_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class PReLU : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- ALPHA = 1
- };
-
-public:
- PReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::PReLU; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_PRELU_H__
diff --git a/runtime/neurun/core/include/ir/operation/Pack.h b/runtime/neurun/core/include/ir/operation/Pack.h
deleted file mode 100644
index ccf73fe5c..000000000
--- a/runtime/neurun/core/include/ir/operation/Pack.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __NEURUN_IR_OPERATION_PACK_H__
-#define __NEURUN_IR_OPERATION_PACK_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-class Pack : public Operation
-{
-public:
- struct Param
- {
- int32_t num;
- int32_t axis;
- int32_t rank;
- };
-
-public:
- Pack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Pack; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-#endif // __NEURUN_IR_OPERATION_PACK_H__
diff --git a/runtime/neurun/core/include/ir/operation/Pad.h b/runtime/neurun/core/include/ir/operation/Pad.h
deleted file mode 100644
index 8e8304fae..000000000
--- a/runtime/neurun/core/include/ir/operation/Pad.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_PAD_H__
-#define __NEURUN_IR_OPERATION_PAD_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Pad : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- PAD = 1,
- // VALUE = 2 Not allow padding value operand yet
- };
-
-public:
- struct Param
- {
- int32_t rank;
- };
-
-public:
- Pad(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Pad; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_PAD_H__
diff --git a/runtime/neurun/core/include/ir/operation/Permute.h b/runtime/neurun/core/include/ir/operation/Permute.h
deleted file mode 100644
index f91f9571b..000000000
--- a/runtime/neurun/core/include/ir/operation/Permute.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_PERMUTE_H__
-#define __NEURUN_IR_OPERATION_PERMUTE_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace backend
-{
-class BackendContext;
-} // namespace backend
-} // namespace neurun
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Permute : public Operation
-{
-public:
- enum class Type
- {
- NHWC_TO_NCHW,
- NCHW_TO_NHWC,
- COPY
- };
-
- struct Param
- {
- const backend::BackendContext *input_backend_ctx;
- const backend::BackendContext *output_backend_ctx;
- };
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Permute; }
-
-public:
- Permute(const OperandIndex &input, const OperandIndex &output,
- const backend::BackendContext *input_backend_ctx,
- const backend::BackendContext *output_backend_ctx, Type type,
- DataType data_type = DataType::FLOAT32);
-
-public:
- const Param &param() const { return _param; }
- DataType getDataType() const { return _dataType; }
- Type getPermuteType() const { return _type; }
-
-private:
- Param _param;
- Type _type;
- DataType _dataType;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_PERMUTE_H__
diff --git a/runtime/neurun/core/include/ir/operation/RNN.h b/runtime/neurun/core/include/ir/operation/RNN.h
deleted file mode 100644
index d812a6fc3..000000000
--- a/runtime/neurun/core/include/ir/operation/RNN.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __NEURUN_IR_OPERATION_RNN_H__
-#define __NEURUN_IR_OPERATION_RNN_H__
-
-#include "ir/InternalType.h"
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class RNN : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- WEIGHTS = 1,
- RECURRENT_WEIGHTS = 2,
- BIAS = 3,
- HIDDEN_STATE_IN = 4
- };
-
- enum Output
- {
- OUTPUT = 0,
- HIDDEN_STATE_OUT = 1
- };
-
- struct Param
- {
- Activation activation;
- };
-
-public:
- RNN(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::RNN; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_RNN_H__
diff --git a/runtime/neurun/core/include/ir/operation/RSQRT.h b/runtime/neurun/core/include/ir/operation/RSQRT.h
deleted file mode 100644
index 33648555a..000000000
--- a/runtime/neurun/core/include/ir/operation/RSQRT.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_RSQRT_H__
-#define __NEURUN_IR_OPERATION_RSQRT_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class RSQRT : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- RSQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::RSQRT; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_RSQRT_H__
diff --git a/runtime/neurun/core/include/ir/operation/ReLU.h b/runtime/neurun/core/include/ir/operation/ReLU.h
deleted file mode 100644
index b6c7fdf01..000000000
--- a/runtime/neurun/core/include/ir/operation/ReLU.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_RELU_H__
-#define __NEURUN_IR_OPERATION_RELU_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReLU : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- ReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ReLU; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_RELU_H__
diff --git a/runtime/neurun/core/include/ir/operation/ReLU1.h b/runtime/neurun/core/include/ir/operation/ReLU1.h
deleted file mode 100644
index ac45fda05..000000000
--- a/runtime/neurun/core/include/ir/operation/ReLU1.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_ReLU1_H__
-#define __NEURUN_IR_OPERATION_ReLU1_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReLU1 : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- ReLU1(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ReLU1; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_ReLU1_H__
diff --git a/runtime/neurun/core/include/ir/operation/ReLU6.h b/runtime/neurun/core/include/ir/operation/ReLU6.h
deleted file mode 100644
index 4d98dad55..000000000
--- a/runtime/neurun/core/include/ir/operation/ReLU6.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_ReLU6_H__
-#define __NEURUN_IR_OPERATION_ReLU6_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReLU6 : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- ReLU6(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ReLU6; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_ReLU6_H__
diff --git a/runtime/neurun/core/include/ir/operation/ReduceMax.h b/runtime/neurun/core/include/ir/operation/ReduceMax.h
deleted file mode 100644
index da4d7c4cc..000000000
--- a/runtime/neurun/core/include/ir/operation/ReduceMax.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_REDUCEMAX_H__
-#define __NEURUN_IR_OPERATION_REDUCEMAX_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReduceMax : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
- struct Param
- {
- std::vector<int> axes;
- bool keep_dims;
- int32_t rank;
- };
-
-public:
- ReduceMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ReduceMax; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_REDUCEMAX_H__
diff --git a/runtime/neurun/core/include/ir/operation/ReduceMin.h b/runtime/neurun/core/include/ir/operation/ReduceMin.h
deleted file mode 100644
index f79fdeaea..000000000
--- a/runtime/neurun/core/include/ir/operation/ReduceMin.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_REDUCEMIN_H__
-#define __NEURUN_IR_OPERATION_REDUCEMIN_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReduceMin : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
- struct Param
- {
- std::vector<int> axes;
- bool keep_dims;
- int32_t rank;
- };
-
-public:
- ReduceMin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ReduceMin; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_REDUCEMIN_H__
diff --git a/runtime/neurun/core/include/ir/operation/ReduceSum.h b/runtime/neurun/core/include/ir/operation/ReduceSum.h
deleted file mode 100644
index b5ab8ee75..000000000
--- a/runtime/neurun/core/include/ir/operation/ReduceSum.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_REDUCE_SUM_H__
-#define __NEURUN_IR_OPERATION_REDUCE_SUM_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReduceSum : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
- struct Param
- {
- std::vector<int> axes;
- bool keep_dims;
- int32_t rank;
- };
-
-public:
- ReduceSum(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ReduceSum; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_REDUCE_SUM_H__
diff --git a/runtime/neurun/core/include/ir/operation/Reshape.h b/runtime/neurun/core/include/ir/operation/Reshape.h
deleted file mode 100644
index e476d7fe1..000000000
--- a/runtime/neurun/core/include/ir/operation/Reshape.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_RESHAPE_H__
-#define __NEURUN_IR_OPERATION_RESHAPE_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Reshape : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Reshape(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Reshape; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_RESHAPE_H__
diff --git a/runtime/neurun/core/include/ir/operation/ResizeBilinear.h b/runtime/neurun/core/include/ir/operation/ResizeBilinear.h
deleted file mode 100644
index d937da00c..000000000
--- a/runtime/neurun/core/include/ir/operation/ResizeBilinear.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_RESIZE_BILINEAR_H__
-#define __NEURUN_IR_OPERATION_RESIZE_BILINEAR_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class ResizeBilinear : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
- struct Param
- {
- int32_t height_out;
- int32_t width_out;
- };
-
-public:
- ResizeBilinear(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ResizeBilinear; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_RESIZE_BILINEAR_H__
diff --git a/runtime/neurun/core/include/ir/operation/SQRT.h b/runtime/neurun/core/include/ir/operation/SQRT.h
deleted file mode 100644
index 5e21315b4..000000000
--- a/runtime/neurun/core/include/ir/operation/SQRT.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_SQRT_H__
-#define __NEURUN_IR_OPERATION_SQRT_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class SQRT : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- SQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::SQRT; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_SQRT_H__
diff --git a/runtime/neurun/core/include/ir/operation/Slice.h b/runtime/neurun/core/include/ir/operation/Slice.h
deleted file mode 100644
index 4b79f42a6..000000000
--- a/runtime/neurun/core/include/ir/operation/Slice.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_SLICE_H__
-#define __NEURUN_IR_OPERATION_SLICE_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Slice : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- BEGINS = 1,
- SIZES = 2,
- };
-
-public:
- struct Param
- {
- int32_t rank;
- };
-
-public:
- Slice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Slice; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_SLICE_H__
diff --git a/runtime/neurun/core/include/ir/operation/Softmax.h b/runtime/neurun/core/include/ir/operation/Softmax.h
deleted file mode 100644
index a3e896fed..000000000
--- a/runtime/neurun/core/include/ir/operation/Softmax.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_SOFTMAX_H__
-#define __NEURUN_IR_OPERATION_SOFTMAX_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Softmax : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
- struct Param
- {
- float beta;
- };
-
-public:
- Softmax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Softmax; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_SOFTMAX_H__
diff --git a/runtime/neurun/core/include/ir/operation/SpaceToBatchND.h b/runtime/neurun/core/include/ir/operation/SpaceToBatchND.h
deleted file mode 100644
index 4ca0978b0..000000000
--- a/runtime/neurun/core/include/ir/operation/SpaceToBatchND.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_SPACE_TO_BATCH_ND_H__
-#define __NEURUN_IR_OPERATION_SPACE_TO_BATCH_ND_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class SpaceToBatchND : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- BLOCK_SIZE = 1,
- PADDINGS = 2
- };
-
-public:
- SpaceToBatchND(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::SpaceToBatchND; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_SPACE_TO_BATCH_ND_H__
diff --git a/runtime/neurun/core/include/ir/operation/SpaceToDepth.h b/runtime/neurun/core/include/ir/operation/SpaceToDepth.h
deleted file mode 100644
index 9e77bdae0..000000000
--- a/runtime/neurun/core/include/ir/operation/SpaceToDepth.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_SPACE_TO_DEPTH_H__
-#define __NEURUN_IR_OPERATION_SPACE_TO_DEPTH_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class SpaceToDepth : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
- struct Param
- {
- std::int32_t block_size;
- };
-
-public:
- SpaceToDepth(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::SpaceToDepth; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_SPACE_TO_DEPTH_H__
diff --git a/runtime/neurun/core/include/ir/operation/Split.h b/runtime/neurun/core/include/ir/operation/Split.h
deleted file mode 100644
index 7a2749e84..000000000
--- a/runtime/neurun/core/include/ir/operation/Split.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __NEURUN_IR_OPERATION_SPLIT_H__
-#define __NEURUN_IR_OPERATION_SPLIT_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-class Split : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
- struct Param
- {
- int axis;
- int num_splits;
- int rank;
- };
-
-public:
- Split(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Split; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-#endif // __NEURUN_IR_OPERATION_SPLIT_H__
diff --git a/runtime/neurun/core/include/ir/operation/SquaredDifference.h b/runtime/neurun/core/include/ir/operation/SquaredDifference.h
deleted file mode 100644
index 46df419f5..000000000
--- a/runtime/neurun/core/include/ir/operation/SquaredDifference.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_SQUARED_DIFFERENCE_H__
-#define __NEURUN_IR_OPERATION_SQUARED_DIFFERENCE_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class SquaredDifference : public Operation
-{
-public:
- enum Input
- {
- LHS = 0,
- RHS
- };
-
-public:
- SquaredDifference(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::SquaredDifference; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_SQUARED_DIFFERENCE_H__
diff --git a/runtime/neurun/core/include/ir/operation/Squeeze.h b/runtime/neurun/core/include/ir/operation/Squeeze.h
deleted file mode 100644
index d27b315b5..000000000
--- a/runtime/neurun/core/include/ir/operation/Squeeze.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_SQUEEZE_H__
-#define __NEURUN_IR_OPERATION_SQUEEZE_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Squeeze : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
- struct Param
- {
- // Please see tensorflow/lite/c/builtin_op_data.h and squeeze.cc.
- // tensorflow lite supports only for ndim <= 8.
- int dims[8];
- int ndim;
- };
-
-public:
- Squeeze(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Squeeze; }
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_SQUEEZE_H__
diff --git a/runtime/neurun/core/include/ir/operation/StridedSlice.h b/runtime/neurun/core/include/ir/operation/StridedSlice.h
deleted file mode 100644
index 868bda72c..000000000
--- a/runtime/neurun/core/include/ir/operation/StridedSlice.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_STRIDED_SLICE_H__
-#define __NEURUN_IR_OPERATION_STRIDED_SLICE_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class StridedSlice : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- STARTS = 1,
- ENDS = 2,
- STRIDES = 3
- };
-
- struct Param
- {
- std::int32_t begin_mask;
- std::int32_t end_mask;
- std::int32_t shrink_axis_mask;
- int32_t rank;
- };
-
-public:
- StridedSlice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::StridedSlice; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_STRIDED_SLICE_H__
diff --git a/runtime/neurun/core/include/ir/operation/Sub.h b/runtime/neurun/core/include/ir/operation/Sub.h
deleted file mode 100644
index e5850af8c..000000000
--- a/runtime/neurun/core/include/ir/operation/Sub.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_SUB_H__
-#define __NEURUN_IR_OPERATION_SUB_H__
-
-#include "ir/Operation.h"
-#include "ir/InternalType.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Sub : public Operation
-{
-public:
- enum Input
- {
- LHS = 0,
- RHS
- };
-
- struct Param
- {
- Activation activation;
- };
-
-public:
- Sub(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Sub; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_SUB_H__
diff --git a/runtime/neurun/core/include/ir/operation/Tanh.h b/runtime/neurun/core/include/ir/operation/Tanh.h
deleted file mode 100644
index 814ceec5a..000000000
--- a/runtime/neurun/core/include/ir/operation/Tanh.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_TANH_H__
-#define __NEURUN_IR_OPERATION_TANH_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Tanh : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Tanh(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Tanh; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_TANH_H__
diff --git a/runtime/neurun/core/include/ir/operation/TopKV2.h b/runtime/neurun/core/include/ir/operation/TopKV2.h
deleted file mode 100644
index a6971e843..000000000
--- a/runtime/neurun/core/include/ir/operation/TopKV2.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_TOPK_V2_H__
-#define __NEURUN_IR_OPERATION_TOPK_V2_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class TopKV2 : public Operation
-{
-public:
- enum Input
- {
- INPUT
- };
-
- enum Output
- {
- OUTPUT_VALUES = 0,
- OUTPUT_INDICES,
- };
-
- struct Param
- {
- std::int32_t k;
- };
-
-public:
- TopKV2(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::TopKV2; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_TOPK_V2_H__
diff --git a/runtime/neurun/core/include/ir/operation/Transpose.h b/runtime/neurun/core/include/ir/operation/Transpose.h
deleted file mode 100644
index b1e08a506..000000000
--- a/runtime/neurun/core/include/ir/operation/Transpose.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_TRANSPOSE_H__
-#define __NEURUN_IR_OPERATION_TRANSPOSE_H__
-
-#include "ir/Operation.h"
-
-#include <utility>
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class Transpose : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0, // for an n-D tensor, specifying the tensor to be transposed.
- };
-
- struct Param
- {
- std::vector<int> perm;
- int32_t rank;
- };
-
-public:
- Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Transpose; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_TRANSPOSE_H__
diff --git a/runtime/neurun/core/include/ir/operation/TransposeConv.h b/runtime/neurun/core/include/ir/operation/TransposeConv.h
deleted file mode 100644
index a561db4e4..000000000
--- a/runtime/neurun/core/include/ir/operation/TransposeConv.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_OPERATION_TRANSPOSE_CONV_H__
-#define __NEURUN_IR_OPERATION_TRANSPOSE_CONV_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-#include "ir/InternalType.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-class TransposeConv : public Operation
-{
-public:
- enum Input
- {
- OUTPUT_SHAPE = 0,
- KERNEL,
- INPUT
- };
-
- struct Param
- {
- Padding padding;
- Stride stride;
- };
-
-public:
- TransposeConv(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::TransposeConv; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_OPERATION_TRANSPOSE_CONV_H__
diff --git a/runtime/neurun/core/include/ir/operation/Unpack.h b/runtime/neurun/core/include/ir/operation/Unpack.h
deleted file mode 100644
index fa698d3af..000000000
--- a/runtime/neurun/core/include/ir/operation/Unpack.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __NEURUN_IR_OPERATION_UNPACK_H__
-#define __NEURUN_IR_OPERATION_UNPACK_H__
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-class Unpack : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
- struct Param
- {
- int32_t num;
- int32_t axis;
- int32_t rank;
- };
-
-public:
- Unpack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Unpack; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-} // namespace operation
-} // namespace ir
-} // namespace neurun
-#endif // __NEURUN_IR_OPERATION_UNPACK_H__
diff --git a/runtime/neurun/core/include/util/Config.lst b/runtime/neurun/core/include/util/Config.lst
deleted file mode 100644
index 046a0c4a8..000000000
--- a/runtime/neurun/core/include/util/Config.lst
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef CONFIG
-#error Define CONFIG before including this file
-#endif
-
-// Name | Type | Default
-CONFIG(GRAPH_DOT_DUMP , int , "0")
-CONFIG(BACKENDS , std::string , "acl_cl;acl_neon;cpu;srcn")
-CONFIG(OP_BACKEND_ALLOPS , std::string , "")
-CONFIG(OP_BACKEND_MAP , std::string , "")
-CONFIG(DISABLE_COMPILE , bool , "0")
-CONFIG(NEURUN_LOG_ENABLE , bool , "0")
-CONFIG(CPU_MEMORY_PLANNER , std::string , "WIC")
-CONFIG(EXECUTOR , std::string , "Linear")
-CONFIG(ACL_LAYOUT , std::string , "none")
-CONFIG(NCNN_LAYOUT , std::string , "NCHW")
-CONFIG(PROFILING_MODE , bool , "0")
-CONFIG(USE_SCHEDULER , bool , "0")
-CONFIG(SUBG_MAX_NODE , int , "0")
-CONFIG(TRACE_FILEPATH , std::string , "")
-
-// Auto-generate all operations
-
-#define OP(InternalName) \
- CONFIG(OP_BACKEND_ ## InternalName, std::string, "")
-#include "ir/Operations.lst"
-#undef OP
-
diff --git a/runtime/neurun/core/include/util/ConfigSource.h b/runtime/neurun/core/include/util/ConfigSource.h
deleted file mode 100644
index b1fa9a87d..000000000
--- a/runtime/neurun/core/include/util/ConfigSource.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_UTIL_CONFIG_SOURCE_H__
-#define __NEURUN_UTIL_CONFIG_SOURCE_H__
-
-#include <memory>
-
-#include "IConfigSource.h"
-
-namespace neurun
-{
-namespace util
-{
-
-void config_source(std::unique_ptr<IConfigSource> &&source);
-
-bool getConfigBool(const std::string &key);
-int getConfigInt(const std::string &key);
-std::string getConfigString(const std::string &key);
-
-} // namespace util
-} // namespace neurun
-
-namespace neurun
-{
-namespace util
-{
-namespace config
-{
-
-#define CONFIG(Name, Type, Default) extern const char *Name;
-
-#include "Config.lst"
-
-#undef CONFIG
-
-} // namespace config
-} // namespace util
-} // namespace neurun
-
-#endif // __NEURUN_UTIL_CONFIG_SOURCE_H__
diff --git a/runtime/neurun/core/include/util/Coordinates.h b/runtime/neurun/core/include/util/Coordinates.h
deleted file mode 100644
index 67947138f..000000000
--- a/runtime/neurun/core/include/util/Coordinates.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_UTIL_COORDINATES_H__
-#define __NEURUN_UTIL_COORDINATES_H__
-
-#include <cassert>
-#include <stdint.h>
-#include <vector>
-
-namespace neurun
-{
-namespace util
-{
-
-/**
- * @brief Class to represent position(offset) of tensor.\n
- * Assume that the front is higher dimensional.
- * i.g. N: 0, C: 1, H: 2, W: 3 for NCHW layout
- */
-class Coordinates final
-{
-public:
- static constexpr size_t num_max_dimensions = 4;
-
-public:
- /**
- * @brief Construct a new Coordinates object
- * @param[in] init The initialzer_list with coordinates
- * @return
- */
- Coordinates(std::initializer_list<int32_t> init) : _coordinates{init}
- {
- assert(init.size() <= num_max_dimensions);
- }
-
-public:
- /**
- * @brief Set the coordinate of one of the coordinates.
- *
- * @param[in] dimension Dimension for which the coordinate is set.
- * @param[in] Coordinate Coordinate to be set for the dimension.
- */
- void set(size_t dimension, int32_t coordinate)
- {
- assert(dimension < num_max_dimensions);
- if (dimension >= _coordinates.size())
- {
- _coordinates.resize(dimension + 1, 0);
- }
- _coordinates[dimension] = coordinate;
- }
-
-public:
- /**
- * @brief Return size of coordinates
- *
- * @return size of coordinates
- */
- size_t size() const { return _coordinates.size(); }
-
-public:
- int32_t operator[](size_t dimension) const
- {
- assert(dimension < _coordinates.size());
- return _coordinates[dimension];
- }
-
-public:
- /**
- * @brief begin() of const_iterator for this class
- *
- * @return The first iterator of the coordinates
- */
- std::vector<int32_t>::const_iterator begin() const { return _coordinates.begin(); }
- /**
- * @brief end() of const_iterator for this class
- *
- * @return The last iterator of the coordinates
- */
- std::vector<int32_t>::const_iterator end() const { return _coordinates.end(); }
-
-private:
- std::vector<int32_t> _coordinates;
-};
-
-} // namespace util
-} // namespace neurun
-
-#endif // __NEURUN_UTIL_COORDINATES_H__
diff --git a/runtime/neurun/core/include/util/EnvConfigSource.h b/runtime/neurun/core/include/util/EnvConfigSource.h
deleted file mode 100644
index 77be15c4e..000000000
--- a/runtime/neurun/core/include/util/EnvConfigSource.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_UTIL_ENV_CONFIG_SOURCE_H__
-#define __NEURUN_UTIL_ENV_CONFIG_SOURCE_H__
-
-#include <unordered_map>
-
-#include "util/GeneralConfigSource.h"
-
-namespace neurun
-{
-namespace util
-{
-
-class EnvConfigSource final : public GeneralConfigSource
-{
-public:
- std::string get(const std::string &key) const override;
-
-private:
- std::unordered_map<std::string, std::string> _default_attributes;
-};
-
-} // namespace util
-} // namespace neurun
-
-#endif // __NEURUN_UTIL_ENV_CONFIG_SOURCE_H__
diff --git a/runtime/neurun/core/include/util/EventCollectorGlobal.h b/runtime/neurun/core/include/util/EventCollectorGlobal.h
deleted file mode 100644
index 15e40844e..000000000
--- a/runtime/neurun/core/include/util/EventCollectorGlobal.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_UTIL_EVENT_COLLECTOR_GLOBAL_H__
-#define __NEURUN_UTIL_EVENT_COLLECTOR_GLOBAL_H__
-
-#include "misc/EventRecorder.h"
-#include "misc/EventCollector.h"
-
-namespace neurun
-{
-namespace util
-{
-
-/**
- * @brief Singleton class for event collection from anywhere in code
- *
- */
-class EventCollectorGlobal
-{
-public:
- /**
- * @brief Get the singleton object of this class
- *
- * @return EventCollectorGlobal& Singleton object
- */
- static EventCollectorGlobal &get();
-
-public:
- /**
- * @brief Getter for event collector object
- *
- * @return EventCollector& Collector object
- */
- EventCollector &collector() { return _collector; }
-
-private:
- EventCollectorGlobal();
- ~EventCollectorGlobal();
-
-private:
- EventRecorder _recorder;
- EventCollector _collector;
-};
-
-/**
- * @brief Helper class for emitting duration event which is handled automatically with ctor/dtor
- *
- */
-class EventDurationBlock
-{
-public:
- /**
- * @brief Raise a duration event with type of BEGIN
- *
- * @param tag A label for the duration event
- */
- EventDurationBlock(const std::string &tag);
- /**
- * @brief Raise a duration event with type of END
- *
- */
- ~EventDurationBlock();
-
-private:
- std::string _tag;
-};
-
-/**
- * @brief Helper class for emitting duration event which is handled manually
- *
- * Usage:
- * {
- * ...
- * EventDurationManual duration("some tag");
- * duration.begin();
- * ...
- * ... // Code for duration
- * ...
- * duration.end();
- * }
- *
- */
-class EventDurationManual
-{
-public:
- /**
- * @brief Construct a new Event Duration Manual object
- *
- * @param tag A label for the duration object
- */
- EventDurationManual(const std::string &tag);
- /**
- * @brief Destroy the Event Duration Manual object
- *
- */
- ~EventDurationManual();
-
- /**
- * @brief Raise a duration event with type of BEGIN
- *
- */
- void begin();
- /**
- * @brief Raise a duration event with type of END
- *
- */
- void end();
-
-private:
- std::string _tag;
- bool _pair;
-};
-
-} // namespace util
-} // namespace neurun
-
-/**
- * Helper Macro Definitions
- *
- * HOW TO USE
- *
- * void f(args)
- * {
- * EVENT_DURATION_FUNCTION();
- * ...
- * if(cond)
- * {
- * EVENT_DURATION_REGION("if branch");
- * ...
- * }
- * ...
- * }
- */
-
-#define EVENT_DURATION_FUNCTION() \
- ::neurun::util::EventDurationBlock __event_duration__##__LINE__ { __FUNCTION__ }
-
-#define EVENT_DURATION_REGION(tag) \
- ::neurun::util::EventDurationBlock __event_duration__##__LINE__ { tag }
-
-#endif // __NEURUN_UTIL_EVENT_COLLECTOR_GLOBAL_H__
diff --git a/runtime/neurun/core/include/util/GeneralConfigSource.h b/runtime/neurun/core/include/util/GeneralConfigSource.h
deleted file mode 100644
index 04e3332b3..000000000
--- a/runtime/neurun/core/include/util/GeneralConfigSource.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_UTIL_GLOBAL_CONFIG_SOURCE_H__
-#define __NEURUN_UTIL_GLOBAL_CONFIG_SOURCE_H__
-
-#include <unordered_map>
-
-#include "util/IConfigSource.h"
-
-namespace neurun
-{
-namespace util
-{
-
-class GeneralConfigSource : public IConfigSource
-{
-public:
- GeneralConfigSource() = default;
-
- std::string get(const std::string &key) const override;
- void set(const std::string &key, const std::string &val);
-
-private:
- std::unordered_map<std::string, std::string> _map;
-};
-
-} // namespace util
-} // namespace neurun
-
-#endif // __NEURUN_UTIL_GLOBAL_CONFIG_SOURCE_H__
diff --git a/runtime/neurun/core/include/util/IConfigSource.h b/runtime/neurun/core/include/util/IConfigSource.h
deleted file mode 100644
index a52d87097..000000000
--- a/runtime/neurun/core/include/util/IConfigSource.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_UTIL_I_CONFIG_SOURCE_H__
-#define __NEURUN_UTIL_I_CONFIG_SOURCE_H__
-
-#include <string>
-
-namespace neurun
-{
-namespace util
-{
-
-struct IConfigSource
-{
- /**
- * @brief Destroy the IConfigSource object
- */
- virtual ~IConfigSource() = default;
-
- /**
- * @brief get the value for the matching key
- *
- * @param key string key to search
- * @return string value associated with the key
- */
- virtual std::string get(const std::string &key) const = 0;
-};
-
-} // namespace util
-} // namespace neurun
-
-#endif // __NEURUN_UTIL_I_CONFIG_SOURCE_H__
diff --git a/runtime/neurun/core/include/util/ITimer.h b/runtime/neurun/core/include/util/ITimer.h
deleted file mode 100644
index 79ecdd0ca..000000000
--- a/runtime/neurun/core/include/util/ITimer.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_UTIL_ITIMER_H__
-#define __NEURUN_UTIL_ITIMER_H__
-
-#include <chrono>
-
-namespace neurun
-{
-namespace util
-{
-
-class ITimer
-{
-public:
- virtual void handleBegin() = 0;
- virtual void handleEnd() = 0;
- int getTime() { return _timer_res; };
-
- virtual ~ITimer() = default;
-
-protected:
- int _timer_res{0};
-};
-
-class CPUTimer : public ITimer
-{
-public:
- void handleBegin() override { _start_time = std::chrono::steady_clock::now(); };
-
- void handleEnd() override
- {
- const auto end_time = std::chrono::steady_clock::now();
- _timer_res =
- std::chrono::duration_cast<std::chrono::microseconds>(end_time - _start_time).count();
- };
-
-private:
- std::chrono::steady_clock::time_point _start_time; // in microseconds
-};
-
-} // namespace util
-} // namespace neurun
-
-#endif // __NEURUN_UTIL_ITIMER_H__
diff --git a/runtime/neurun/core/include/util/Index.h b/runtime/neurun/core/include/util/Index.h
deleted file mode 100644
index bd8eeb38c..000000000
--- a/runtime/neurun/core/include/util/Index.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_UTIL_INDEX_H__
-#define __NEURUN_UTIL_INDEX_H__
-
-#include <functional>
-#include <limits>
-#include <stdint.h>
-
-namespace neurun
-{
-namespace util
-{
-
-/**
- * @brief A wrapper class for unsigned integral Index
- * NOTE : Max value of the underlying type is used as the invalid value
- *
- * @tparam T Underlying type. Must be unsigned integral type otherwise its behavior is undefined.
- * @tparam DummyTag Dummy type to distinguish types with a same underlying type. Using an opaque
- * type is recommended.
- */
-template <typename T, typename DummyTag> class Index
-{
-private:
- static const T UNDEFINED = std::numeric_limits<T>::max();
-
-public:
- /**
- * @brief Construct a new Index object
- */
- explicit Index(void) : _index{UNDEFINED} {}
- /**
- * @brief Construct a new Index object with a value in the underlying type
- *
- * @param o Value in the underlying type
- */
- explicit Index(const T o) : _index{o} {}
- /**
- * @brief Copy Constructor
- *
- * @param o Object to be copied
- */
- Index(const Index &o) = default;
-
- /**
- * @brief Assign a value in the underlying time
- *
- * @param o Value in the underlying type
- * @return Index& Reference of this pointer
- */
- Index &operator=(const T o)
- {
- _index = o;
- return *this;
- }
-
- /**
- * @brief Copy assignment operator
- *
- * @param o Object to be copied
- * @return Index& Reference of this pointer
- */
- Index &operator=(const Index &o) = default;
-
- /**
- * @brief Equality operator
- *
- * @param o The other value in the underlying type to compare
- * @return true if underlying value is the same, false otherwise
- */
- bool operator==(T o) const { return _index == o; }
- /**
- * @brief Equality operator
- *
- * @param o The other object to compare
- * @return true if underlying value is the same, false otherwise
- */
- bool operator==(const Index &o) const { return _index == o._index; }
- /**
- * @brief Inquality operator
- *
- * @param o The other value in the underlying type to compare
- * @return true if underlying value is different, false otherwise
- */
- bool operator!=(T o) const { return !(*this == o); }
- /**
- * @brief Inquality operator
- *
- * @param o The other object to compare
- * @return true if underlying value is different, false otherwise
- */
- bool operator!=(const Index &o) const { return !(*this == o); }
-
- /**
- * @brief Post increment operator
- *
- * @return Index Index before increment
- */
- Index operator++(int)
- {
- Index temp = *this;
- _index++;
- return temp;
- }
-
- /**
- * @brief Check whether the value is valid or not
- *
- * @return true if valid, false otherwise
- */
- bool valid() const { return _index != UNDEFINED; }
- /**
- * @brief Return underlying value
- *
- * @return T Underlying value
- */
- T value() const { return _index; }
-
-private:
- T _index;
-};
-
-} // namespace util
-} // namespace neurun
-
-namespace std
-{
-
-template <typename T, typename Tag> struct hash<::neurun::util::Index<T, Tag>>
-{
- size_t operator()(const ::neurun::util::Index<T, Tag> &index) const noexcept
- {
- return hash<T>()(index.value());
- }
-};
-
-} // namespace std
-
-#endif // __NEURUN_UTIL_INDEX_H__
diff --git a/runtime/neurun/core/include/util/ObjectManager.h b/runtime/neurun/core/include/util/ObjectManager.h
deleted file mode 100644
index fd2c3f295..000000000
--- a/runtime/neurun/core/include/util/ObjectManager.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_UTIL_OBJECT_MANAGER_H__
-#define __NEURUN_UTIL_OBJECT_MANAGER_H__
-
-#include <unordered_map>
-#include <memory>
-
-namespace neurun
-{
-namespace util
-{
-
-/**
- * @brief Class that owns objects and maps them with indices as a handle for them
- *
- */
-template <typename Index, typename Object> class ObjectManager
-{
-public:
- ObjectManager() : _index_count{0u} {}
-
-public:
- /**
- * @brief Create an object with args and put it in the container with a new Index for that
- *
- * @param[in] args Arguments for creating Operand object
- * @return Created index that is associated to the object
- */
- template <class... Args> Index emplace(Args &&... args)
- {
- auto index = generateIndex();
- _objects.emplace(index, nnfw::cpp14::make_unique<Object>(std::forward<Args>(args)...));
- return index;
- }
-
- /**
- * @brief Put object in the container with a new Index for that
- *
- * @param[in] object Object to be pushed
- * @return Created index that is associated to the object
- */
- Index push(std::unique_ptr<Object> &&object)
- {
- auto index = generateIndex();
- _objects.emplace(index, std::move(object));
- return index;
- }
-
- /**
- * @brief Remove the object that is associated with the given index
- *
- * @param[in] index Index of the object to be removed
- * @return N/A
- */
- void remove(const Index &index) { _objects.erase(index); };
-
- /**
- * @brief Get the object that is associated with the given index
- *
- * @param[in] index Index of the object to be returned
- * @return Object
- */
- const Object &at(const Index &index) const { return *(_objects.at(index)); }
- /**
- * @brief Get the object that is associated with the given index
- *
- * @param[in] index Index of the object to be returned
- * @return Object
- */
- Object &at(const Index &index) { return *(_objects.at(index)); }
- /**
- * @brief Get the object that is associated with the given index
- *
- * @param[in] index Index of the object to be returned
- * @return true if such entry exists otherwise false
- */
- bool exist(const Index &index) const
- {
- auto it = _objects.find(index);
- return it != _objects.end();
- }
- /**
- * @brief Iterate over the container with given function
- *
- * @param[in] fn Function to be run for every container entry
- * @return N/A
- */
- void iterate(const std::function<void(const Index &, const Object &)> &fn) const
- {
- for (const auto &e : _objects)
- {
- fn(e.first, *e.second);
- }
- }
- /**
- * @brief Iterate over the container with given function
- *
- * @param[in] fn Function to be run for every container entry
- * @return N/A
- */
- void iterate(const std::function<void(const Index &, Object &)> &fn)
- {
- // TODO Remove this workaround
- // This implementation is a workaround in case of adding operands while iteration
- std::list<Index> l;
-
- for (auto &e : _objects)
- {
- l.push_back(e.first);
- }
-
- for (auto index : l)
- {
- fn(index, *_objects[index]);
- }
- }
-
-private:
- Index generateIndex() { return Index{_index_count++}; }
-
-private:
- std::unordered_map<Index, std::unique_ptr<Object>> _objects;
- uint32_t _index_count;
-};
-
-} // namespace util
-} // namespace neurun
-
-#endif // __NEURUN_UTIL_OBJECT_MANAGER_H__
diff --git a/runtime/neurun/core/include/util/Padding.h b/runtime/neurun/core/include/util/Padding.h
deleted file mode 100644
index 3c707b873..000000000
--- a/runtime/neurun/core/include/util/Padding.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_UTIL_PADDING_H__
-#define __NEURUN_UTIL_PADDING_H__
-
-#include <stdint.h>
-
-#include "ir/Shape.h"
-#include "ir/InternalType.h"
-
-namespace neurun
-{
-namespace util
-{
-
-ir::ExplicitPadding validPadding(void);
-ir::ExplicitPadding samePadding(const ir::FeatureShape &ifm_shape,
- const ir::FeatureShape &ofm_shape, const ir::Stride &stride,
- uint32_t kw, uint32_t kh);
-ir::ExplicitPadding calculatePadding(const ir::Padding &padding, const ir::FeatureShape &ifm_shape,
- const ir::FeatureShape &ofm_shape, const ir::Stride &stride,
- uint32_t kw, uint32_t kh);
-
-} // namespace util
-} // namespace neurun
-
-#endif // __NEURUN_UTIL_PADDING_H__
diff --git a/runtime/neurun/core/include/util/Set.h b/runtime/neurun/core/include/util/Set.h
deleted file mode 100644
index 13213511d..000000000
--- a/runtime/neurun/core/include/util/Set.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Set.h
- * @brief This file contains neurun::util::Set class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NEURUN_UTIL_SET_H__
-#define __NEURUN_UTIL_SET_H__
-
-#include <cassert>
-#include <unordered_set>
-
-namespace neurun
-{
-namespace util
-{
-
-/**
- * @brief Class for set of custom element
- & @tparam Element Key type of Set
- */
-template <typename Element> class Set
-{
-public:
- /**
- * @brief Construct default Set object.
- */
- Set() = default;
- /**
- * @brief Construct Set object by copy semantics.
- */
- Set(const Set<Element> &) = default;
- /**
- * @brief Construct move Set object by move semantics.
- */
- Set(Set<Element> &&) = default;
-
-public:
- /**
- * @brief Add a given element to the set
- *
- * @param e Element added
- */
- void add(const Element &e) { _set.insert(e); }
- /**
- * @brief remove a given element from the set
- *
- * @param e Element removed
- */
- void remove(const Element &e) { _set.erase(e); }
- /**
- * @brief Get size of the set
- *
- * @return The size of the set
- */
- uint32_t size() const { return static_cast<uint32_t>(_set.size()); }
- /**
- * @brief Get whether the set is empty
- *
- * @return Whether the set is empty
- */
- bool empty() const { return _set.empty(); }
- /**
- * @brief Get whether a given element exists in the set
- *
- * @param e A given element
- *
- * @return Whether a given element exists in the set
- */
- bool contains(const Element &e) const { return _set.find(e) != _set.end(); }
- /**
- * @brief Get first element of the set
- *
- * @return first element of the set
- */
- const Element &getOnlyElement() const
- {
- assert(_set.size() == 1u);
- return *_set.begin();
- }
-
-public:
- /**
- * @brief operator overloading function for `|`
- *
- * @return A set with two sets combined
- */
- Set<Element> operator|(const Set<Element> &other) const // Union
- {
- auto ret = *this;
- for (auto e : other)
- {
- ret.add(e);
- }
- return ret;
- }
- /**
- * @brief operator overloading function for `&`
- *
- * @return A set of elements that overlap in two sets
- */
- Set<Element> operator&(const Set<Element> &other) const // Intersect
- {
- Set<Element> ret;
- for (auto e : other)
- {
- if (contains(e))
- {
- ret.add(e);
- }
- }
- return ret;
- }
- /**
- * @brief operator overloading function for `-`
- *
- * @return A set of subtracted from another set
- */
- Set<Element> operator-(const Set<Element> &other) const // Minus
- {
- auto ret = *this;
- for (auto e : other)
- {
- ret.remove(e);
- }
- return ret;
- }
-
-public:
- /**
- * @brief begin() of const_iterator for this class
- *
- * @return The first iterator of the set
- */
- typename std::unordered_set<Element>::const_iterator begin() const { return _set.begin(); }
- /**
- * @brief end() of const_iterator for this class
- *
- * @return The last iterator of the set
- */
- typename std::unordered_set<Element>::const_iterator end() const { return _set.end(); }
-
-private:
- std::unordered_set<Element> _set;
-};
-
-} // namespace util
-} // namespace neurun
-
-#endif // __NEURUN_UTIL_SET_H__
diff --git a/runtime/neurun/core/include/util/ShapeInference.h b/runtime/neurun/core/include/util/ShapeInference.h
deleted file mode 100644
index 097d61643..000000000
--- a/runtime/neurun/core/include/util/ShapeInference.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_GRAPH_SHAPE_INFERENCE_H__
-#define __NEURUN_GRAPH_SHAPE_INFERENCE_H__
-
-#include "ir/operation/AvgPool2D.h"
-#include "ir/operation/Concat.h"
-#include "ir/operation/MaxPool2D.h"
-#include "ir/operation/Conv2D.h"
-#include "ir/operation/DepthwiseConv2D.h"
-#include "ir/Operands.h"
-#include "ir/Index.h"
-#include "ir/Layout.h"
-
-namespace neurun
-{
-namespace shape_inference
-{
-
-using Shapes = std::vector<ir::Shape>;
-
-Shapes inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape);
-
-Shapes inferAvgPoolShape(const ir::Shape &in_shape, const ir::operation::AvgPool2D::Param &param,
- ir::Layout layout = ir::Layout::NHWC);
-
-Shapes inferConcatShape(const Shapes &in_shapes, const ir::operation::Concat::Param &param);
-
-Shapes inferMaxPoolShape(const ir::Shape &in_shape, const ir::operation::MaxPool2D::Param &param,
- ir::Layout layout = ir::Layout::NHWC);
-
-Shapes inferConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape,
- const ir::operation::Conv2D::Param &param,
- ir::Layout layout = ir::Layout::NHWC);
-
-Shapes inferDepthwiseConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape,
- const ir::operation::DepthwiseConv2D::Param &param,
- ir::Layout layout = ir::Layout::NHWC);
-
-Shapes inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape);
-
-} // namespace shape_inference
-} // namespace neurun
-
-#endif // __NEURUN_GRAPH_SHAPE_INFERENCE_H__
diff --git a/runtime/neurun/core/include/util/Utils.h b/runtime/neurun/core/include/util/Utils.h
deleted file mode 100644
index e7468dabd..000000000
--- a/runtime/neurun/core/include/util/Utils.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Utils.h
- * @brief This file contains utility functions
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NEURUN_UTIL_UTILS_H__
-#define __NEURUN_UTIL_UTILS_H__
-
-#include "ir/InternalType.h"
-#include "ir/Layout.h"
-#include "ir/Operand.h"
-#include "util/Coordinates.h"
-
-#define UNUSED_RELEASE(a) (void)(a)
-
-namespace neurun
-{
-namespace util
-{
-
-/**
- * @brief Converts a internal padding type to const char*
- * @param[in] type Padding type to be converted
- * @return A string holding the converted value
- */
-const char *to_string(ir::PaddingType type);
-
-Coordinates convertCoordinates(const Coordinates &from_coordinates, ir::Layout from_layout,
- ir::Layout to_layout);
-
-} // namespace util
-} // namespace neurun
-
-#endif // __NEURUN_UTIL_UTILS_H__
diff --git a/runtime/neurun/core/include/util/feature/Coordinate4D.h b/runtime/neurun/core/include/util/feature/Coordinate4D.h
deleted file mode 100644
index b020ed239..000000000
--- a/runtime/neurun/core/include/util/feature/Coordinate4D.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_UTIL_FEATURE_COORDINATE_4D_H__
-#define __NEURUN_UTIL_FEATURE_COORDINATE_4D_H__
-
-#include <stdint.h>
-
-namespace neurun
-{
-namespace util
-{
-namespace feature
-{
-
-/**
- * @brief Class to represent position(offset) of subtensor.\n
- * Assume that parent and child are already lowered (can get Shape4D).
- */
-class Coordinate4D
-{
-public:
- /**
- * @brief Construct a new Coordinate4D object
- */
- Coordinate4D(void) : _n{0}, _h{0}, _w{0}, _c{0}
- {
- // DO NOTHING
- }
- /**
- * @brief Construct a new Coordinate4D object
- * @param[in] n Batch offset
- * @param[in] h Height offset
- * @param[in] w Width offset
- * @param[in] c Channel offset
- * @return
- */
- Coordinate4D(int32_t n, int32_t h, int32_t w, int32_t c) : _n{n}, _h{h}, _w{w}, _c{c}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Set batch offset
- * @param[in] n Batch offset
- */
- void n(int32_t n) { _n = n; }
- /**
- * @brief Set height offset
- * @param[in] h Height offset
- */
- void h(int32_t h) { _h = h; }
- /**
- * @brief Set width offset
- * @param[in] w Width offset
- */
- void w(int32_t w) { _w = w; }
- /**
- * @brief Set channel offset
- * @param[in] c Channel offset
- */
- void c(int32_t c) { _c = c; }
-
-public:
- /**
- * @brief Return batch offset
- * @return Batch offset
- */
- int32_t n(void) const { return _n; }
- /**
- * @brief Return height offset
- * @return Height offset
- */
- int32_t h(void) const { return _h; }
- /**
- * @brief Return width offset
- * @return Width offset
- */
- int32_t w(void) const { return _w; }
- /**
- * @brief Return channel offset
- * @return Channel offset
- */
- int32_t c(void) const { return _c; }
-
-private:
- int32_t _n;
- int32_t _h;
- int32_t _w;
- int32_t _c;
-};
-
-} // namespace feature
-} // namespace util
-} // namespace neurun
-
-#endif // __NEURUN_UTIL_FEATURE_COORDINATE_4D_H__
diff --git a/runtime/neurun/core/include/util/feature/nchw/Reader.h b/runtime/neurun/core/include/util/feature/nchw/Reader.h
deleted file mode 100644
index 0305bdf69..000000000
--- a/runtime/neurun/core/include/util/feature/nchw/Reader.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_UTIL_FEATURE_NCHW_READER_H__
-#define __NEURUN_UTIL_FEATURE_NCHW_READER_H__
-
-#include <cassert>
-
-#include "backend/operand/ITensor.h"
-#include "misc/feature/Reader.h"
-#include "misc/feature/Shape.h"
-#include "util/Coordinates.h"
-#include "util/Utils.h"
-
-namespace neurun
-{
-namespace util
-{
-namespace feature
-{
-namespace nchw
-{
-
-template <typename T> class Reader final : public nnfw::misc::feature::Reader<T>
-{
-public:
- // Construct for buffer of model inputs
- Reader(const ::nnfw::misc::feature::Shape &shape, const T *ptr, size_t len)
- : _shape{shape}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
- {
- assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
-
- // No padding
- _strides.W = sizeof(T);
- _strides.H = shape.W * sizeof(T);
- _strides.C = shape.W * shape.H * sizeof(T);
- _strides.N = shape.W * shape.H * shape.C * sizeof(T);
- }
-
- // Construct for backend tensor
- Reader(backend::operand::ITensor *tensor)
- : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
- {
- assert(tensor->layout() == ir::Layout::NCHW);
-
- const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
- _strides.W = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
- _strides.H = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
- _strides.C = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
- _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
- _shape.W = tensor->dimension(3);
- _shape.H = tensor->dimension(2);
- _shape.C = tensor->dimension(1);
- _shape.N = tensor->dimension(0);
- }
-
-public:
- T at(uint32_t ch, uint32_t row, uint32_t col) const override
- {
- const auto offset = feature_index_to_byte_offset(0, ch, row, col);
-
- const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
- return *ptr;
- }
- T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
- {
- const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
-
- const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
- return *ptr;
- }
-
-private:
- size_t feature_index_to_byte_offset(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const
- {
- assert(1u * _shape.N > batch); // shape.N > batch
- assert(1u * _shape.C > ch); // shape.C > ch
- assert(1u * _shape.H > row); // shape.H > row
- assert(1u * _shape.W > col); // shape.W > col
-
- uint32_t res = 0;
- res += batch * _strides.N;
- res += ch * _strides.C;
- res += row * _strides.H;
- res += col * _strides.W;
-
- return res;
- }
-
-private:
- // TODO Remove _shape
- nnfw::misc::feature::Shape _shape;
- using Strides = nnfw::misc::feature::Shape;
- Strides _strides;
- const uint8_t *_ptr;
- size_t _len;
-};
-
-} // namespace nchw
-} // namespace feature
-} // namespace util
-} // namespace neurun
-
-#endif // __NEURUN_UTIL_FEATURE_NCHW_READER_H__
diff --git a/runtime/neurun/core/include/util/feature/nchw/View.h b/runtime/neurun/core/include/util/feature/nchw/View.h
deleted file mode 100644
index d747937ee..000000000
--- a/runtime/neurun/core/include/util/feature/nchw/View.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_UTIL_FEATURE_NCHW_VIEW_H__
-#define __NEURUN_UTIL_FEATURE_NCHW_VIEW_H__
-
-#include "misc/feature/Reader.h"
-#include "misc/feature/Shape.h"
-
-#include "backend/operand/ITensor.h"
-#include "util/Coordinates.h"
-#include "util/Utils.h"
-
-#include <cassert>
-
-namespace neurun
-{
-namespace util
-{
-namespace feature
-{
-namespace nchw
-{
-
-template <typename T> class View final : public nnfw::misc::feature::Reader<T>
-{
-public:
- // Construct for buffer of model inputs
- View(const ::nnfw::misc::feature::Shape &shape, T *ptr, size_t len)
- : _shape{shape}, _ptr{reinterpret_cast<uint8_t *>(ptr)}, _len{len}
- {
- assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
-
- _strides.W = sizeof(T);
- _strides.H = shape.W * sizeof(T);
- _strides.C = shape.W * shape.H * sizeof(T);
- _strides.N = shape.W * shape.H * shape.C * sizeof(T);
- }
-
- // Construct for backend tensor
- View(::neurun::backend::operand::ITensor *tensor)
- : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
- {
- assert(tensor->layout() == ir::Layout::NCHW);
-
- const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
- _strides.W = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
- _strides.H = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
- _strides.C = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
- _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
- _shape.W = tensor->dimension(3);
- _shape.H = tensor->dimension(2);
- _shape.C = tensor->dimension(1);
- _shape.N = tensor->dimension(0);
- }
-
-public:
- T at(uint32_t ch, uint32_t row, uint32_t col) const override
- {
- const auto offset = feature_index_to_byte_offset(0, ch, row, col);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
- }
- T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
- {
- const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
- }
-
-public:
- T &at(uint32_t ch, uint32_t row, uint32_t col)
- {
- const auto offset = feature_index_to_byte_offset(0, ch, row, col);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
- }
- T &at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col)
- {
- const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
- }
-
-private:
- size_t feature_index_to_byte_offset(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const
- {
- assert(1u * _shape.N > batch); // shape.N > batch
- assert(1u * _shape.C > ch); // shape.C > ch
- assert(1u * _shape.H > row); // shape.H > row
- assert(1u * _shape.W > col); // shape.W > col
-
- uint32_t res = 0;
- res += batch * _strides.N;
- res += ch * _strides.C;
- res += row * _strides.H;
- res += col * _strides.W;
-
- return res;
- }
-
-private:
- // TODO Remove _shape
- nnfw::misc::feature::Shape _shape;
- using Strides = nnfw::misc::feature::Shape;
- Strides _strides;
- uint8_t *_ptr;
- size_t _len;
-};
-
-} // namespace nchw
-} // namespace feature
-} // namespace util
-} // namespace neurun
-
-#endif // __NEURUN_UTIL_FEATURE_NCHW_VIEW_H__
diff --git a/runtime/neurun/core/include/util/feature/nhwc/Reader.h b/runtime/neurun/core/include/util/feature/nhwc/Reader.h
deleted file mode 100644
index 0df7be4be..000000000
--- a/runtime/neurun/core/include/util/feature/nhwc/Reader.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_UTIL_FEATURE_NHWC_READER_H__
-#define __NEURUN_UTIL_FEATURE_NHWC_READER_H__
-
-#include <cassert>
-
-#include "backend/operand/ITensor.h"
-#include "misc/feature/Reader.h"
-#include "misc/feature/Shape.h"
-#include "util/Coordinates.h"
-#include "util/Utils.h"
-
-namespace neurun
-{
-namespace util
-{
-namespace feature
-{
-namespace nhwc
-{
-
-template <typename T> class Reader final : public nnfw::misc::feature::Reader<T>
-{
-public:
- // Construct for buffer of model inputs
- Reader(const ::nnfw::misc::feature::Shape &shape, const T *ptr, size_t len)
- : _shape{shape}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
- {
- UNUSED_RELEASE(len); // Workaround for unused variable in release mode
- assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
-
- // No padding
- _strides.C = sizeof(T);
- _strides.W = shape.C * sizeof(T);
- _strides.H = shape.C * shape.W * sizeof(T);
- _strides.N = shape.C * shape.W * shape.H * sizeof(T);
- }
-
- // Construct for backend tensor
- Reader(const backend::operand::ITensor *tensor)
- : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
- {
- assert(tensor->layout() == ir::Layout::NHWC);
-
- const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
- _strides.C = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
- _strides.W = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
- _strides.H = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
- _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
- _shape.C = tensor->dimension(3);
- _shape.W = tensor->dimension(2);
- _shape.H = tensor->dimension(1);
- _shape.N = tensor->dimension(0);
- }
-
-public:
- T at(uint32_t row, uint32_t col, uint32_t ch) const override
- {
- const auto offset = feature_index_to_byte_offset(0, row, col, ch);
-
- const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
- return *ptr;
- }
- T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const override
- {
- const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
-
- const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
- return *ptr;
- }
-
-private:
- size_t feature_index_to_byte_offset(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const
- {
- assert(1u * _shape.N > batch); // shape.N > batch
- assert(1u * _shape.H > row); // shape.H > row
- assert(1u * _shape.W > col); // shape.W > col
- assert(1u * _shape.C > ch); // shape.C > ch
-
- uint32_t res = 0;
- res += batch * _strides.N;
- res += row * _strides.H;
- res += col * _strides.W;
- res += ch * _strides.C;
-
- return res;
- }
-
-private:
- // TODO Remove _shape
- nnfw::misc::feature::Shape _shape;
- using Strides = nnfw::misc::feature::Shape;
- Strides _strides;
- const uint8_t *_ptr;
- size_t _len;
-};
-
-} // namespace nhwc
-} // namespace feature
-} // namespace util
-} // namespace neurun
-
-#endif // __NEURUN_UTIL_FEATURE_NHWC_READER_H__
diff --git a/runtime/neurun/core/include/util/feature/nhwc/View.h b/runtime/neurun/core/include/util/feature/nhwc/View.h
deleted file mode 100644
index b9d98e9fa..000000000
--- a/runtime/neurun/core/include/util/feature/nhwc/View.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_UTIL_FEATURE_NHWC_VIEW_H__
-#define __NEURUN_UTIL_FEATURE_NHWC_VIEW_H__
-
-#include <cassert>
-#include <cstddef>
-
-#include "backend/operand/ITensor.h"
-#include "misc/feature/Reader.h"
-#include "misc/feature/Shape.h"
-#include "util/Coordinates.h"
-#include "util/Utils.h"
-
-namespace neurun
-{
-namespace util
-{
-namespace feature
-{
-namespace nhwc
-{
-
-template <typename T> class View final : public nnfw::misc::feature::Reader<T>
-{
-public:
- // Construct for buffer of model inputs
- View(const ::nnfw::misc::feature::Shape &shape, T *ptr, size_t len)
- : _shape{shape}, _ptr{reinterpret_cast<uint8_t *>(ptr)}, _len{len}
- {
- UNUSED_RELEASE(len); // Workaround for unused variable in release mode
- assert(shape.N * shape.H * shape.W * shape.C * sizeof(T) == len);
-
- // No padding
- _strides.C = sizeof(T);
- _strides.W = shape.C * sizeof(T);
- _strides.H = shape.C * shape.W * sizeof(T);
- _strides.N = shape.C * shape.W * shape.H * sizeof(T);
- }
-
- // Construct for backend tensor
- View(backend::operand::ITensor *tensor)
- : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
- {
- assert(tensor->layout() == ir::Layout::NHWC);
-
- const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
- _strides.C = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
- _strides.W = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
- _strides.H = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
- _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
- _shape.C = tensor->dimension(3);
- _shape.W = tensor->dimension(2);
- _shape.H = tensor->dimension(1);
- _shape.N = tensor->dimension(0);
- }
-
-public:
- T at(uint32_t row, uint32_t col, uint32_t ch) const override
- {
- const auto offset = feature_index_to_byte_offset(0, row, col, ch);
-
- const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
- return *ptr;
- }
- T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const override
- {
- const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
-
- const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
- return *ptr;
- }
-
- T &at(uint32_t row, uint32_t col, uint32_t ch)
- {
- const auto offset = feature_index_to_byte_offset(0, row, col, ch);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
- }
-
- T &at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch)
- {
- const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
- }
-
-private:
- size_t feature_index_to_byte_offset(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const
- {
- assert(1u * _shape.N > batch); // shape.N > batch
- assert(1u * _shape.H > row); // shape.H > row
- assert(1u * _shape.W > col); // shape.W > col
- assert(1u * _shape.C > ch); // shape.C > ch
-
- uint32_t res = 0;
- res += batch * _strides.N;
- res += row * _strides.H;
- res += col * _strides.W;
- res += ch * _strides.C;
-
- return res;
- }
-
-private:
- // TODO Remove _shape
- nnfw::misc::feature::Shape _shape;
- using Strides = nnfw::misc::feature::Shape;
- Strides _strides;
- uint8_t *_ptr;
- size_t _len;
-};
-
-} // namespace nhwc
-} // namespace feature
-} // namespace util
-} // namespace neurun
-
-#endif // __NEURUN_UTIL_FEATURE_NHWC_VIEW_H__
diff --git a/runtime/neurun/core/include/util/logging.h b/runtime/neurun/core/include/util/logging.h
deleted file mode 100644
index 8ecd0ac12..000000000
--- a/runtime/neurun/core/include/util/logging.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_UTIL_LOGGING_H__
-#define __NEURUN_UTIL_LOGGING_H__
-
-#include <iostream>
-
-#include "util/ConfigSource.h"
-
-namespace neurun
-{
-namespace util
-{
-namespace logging
-{
-
-class Context
-{
-public:
- Context() noexcept : _enabled{false}
- {
- const auto env = util::getConfigBool(util::config::NEURUN_LOG_ENABLE);
-
- if (env)
- {
- _enabled = true;
- }
- }
-
- static Context &get() noexcept;
-
-public:
- bool enabled(void) const { return _enabled; }
-
-private:
- bool _enabled;
-};
-
-static Context &ctx = Context::get();
-
-} // namespace logging
-} // namespace util
-} // namespace neurun
-
-#define VERBOSE(name) \
- if (::neurun::util::logging::ctx.enabled()) \
- std::cout << "[" << #name << "] "
-
-#endif // __NEURUN_UTIL_LOGGING_H__
diff --git a/runtime/neurun/core/src/backend/Backend.cc b/runtime/neurun/core/src/backend/Backend.cc
deleted file mode 100644
index c2f745f8f..000000000
--- a/runtime/neurun/core/src/backend/Backend.cc
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/Backend.h"
-
-#include "backend/IConfig.h"
-#include "backend/ITensorBuilder.h"
-#include "backend/IKernelGenerator.h"
-#include "backend/IShapeFixer.h"
-
-namespace neurun
-{
-namespace backend
-{
-
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/core/src/backend/BackendManager.cc b/runtime/neurun/core/src/backend/BackendManager.cc
deleted file mode 100644
index 32086e8b6..000000000
--- a/runtime/neurun/core/src/backend/BackendManager.cc
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <memory>
-#include <dlfcn.h>
-#include "BackendManager.h"
-
-#include "backend/Backend.h"
-#include "backend/IConfig.h"
-#include "util/logging.h"
-#include "util/ConfigSource.h"
-#include "misc/string_helpers.h"
-
-namespace neurun
-{
-namespace backend
-{
-
-BackendManager &BackendManager::get()
-{
- static BackendManager object;
- return object;
-}
-
-template <typename T, class... Types>
-void BackendManager::loadObjectFromPlugin(std::shared_ptr<T> &object_of_plugin_class,
- const std::string obj_creator_func_name, void *handle,
- Types &&... args)
-{
- T *(*allocate_obj)(Types && ... Args);
- // load object creator function
- allocate_obj = (T * (*)(Types && ... Args))dlsym(handle, obj_creator_func_name.c_str());
- if (allocate_obj == nullptr)
- {
- fprintf(stderr, "BackendManager: unable to open function %s: %s\n",
- obj_creator_func_name.c_str(), dlerror());
- abort();
- }
-
- object_of_plugin_class.reset(allocate_obj(args...));
-}
-
-void BackendManager::loadBackend(const std::string &backend)
-{
- const std::string backend_plugin = "libbackend_" + backend + ".so";
- void *handle = dlopen(backend_plugin.c_str(), RTLD_LAZY | RTLD_LOCAL);
- if (handle == nullptr)
- {
- VERBOSE(BackendManager::loadBackend) << "loadBackend failed to load plugin of "
- << backend.c_str() << " backend: " << dlerror()
- << std::endl;
- return;
- }
-
- VERBOSE(BackendManager::loadBackend) << "loaded " << backend_plugin << " as a plugin of "
- << backend << " backend\n";
-
- {
- // load object creator function
- auto backend_create = (backend_create_t)dlsym(handle, "neurun_backend_create");
- if (backend_create == nullptr)
- {
- fprintf(stderr, "BackendManager: unable to open function neurun_backend_create : %s\n",
- dlerror());
- abort();
- }
-
- // load object creator function
- auto backend_destroy = (backend_destroy_t)dlsym(handle, "neurun_backend_destroy");
- if (backend_destroy == nullptr)
- {
- fprintf(stderr, "BackendManager: unable to open function neurun_backend_destroy : %s\n",
- dlerror());
- abort();
- }
-
- auto backend_object =
- std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy);
- auto backend_object_raw = backend_object.get();
- bool initialized = backend_object->config()->initialize(); // Call initialize here?
- if (!initialized)
- {
- VERBOSE(BackendManager::loadBackend)
- << backend.c_str() << " backend initialization failed. Don't use this backend"
- << std::endl;
- dlclose(handle);
- return;
- }
- _gen_map.emplace(backend_object->config()->id(), std::move(backend_object));
- _available_backends.push_back(backend_object_raw);
- }
-
- // Save backend handle (avoid warning by handle lost without dlclose())
- auto u_handle = std::unique_ptr<void, dlhandle_destroy_t>{handle, [](void *h) { dlclose(h); }};
- _handle_map.emplace(backend, std::move(u_handle));
-}
-
-BackendManager::BackendManager()
-{
- const auto backends = util::getConfigString(util::config::BACKENDS);
- for (auto &backend_id : nnfw::misc::split(backends, ';'))
- {
- loadBackend(backend_id);
- }
-
- // No loaded backend
- if (_available_backends.empty())
- {
- VERBOSE(BackendManager::loadBackend) << "There is no loaded backend\n";
- abort();
- }
-}
-
-Backend *BackendManager::get(const std::string &key)
-{
- if (_gen_map.find(key) != _gen_map.end())
- {
- return _gen_map.at(key).get();
- }
-
- return nullptr;
-}
-
-const Backend *BackendManager::get(const std::string &key) const
-{
- if (_gen_map.find(key) != _gen_map.end())
- {
- return _gen_map.at(key).get();
- }
-
- return nullptr;
-}
-
-const Backend *BackendManager::getDefault() const { return get("cpu"); }
-
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/core/src/backend/BackendManager.h b/runtime/neurun/core/src/backend/BackendManager.h
deleted file mode 100644
index 9c6483f9d..000000000
--- a/runtime/neurun/core/src/backend/BackendManager.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_BACKEND_MANAGER_H__
-#define __NEURUN_BACKEND_BACKEND_MANAGER_H__
-
-#include <memory>
-#include <map>
-
-#include "ir/Operands.h"
-#include "backend/Backend.h"
-
-namespace neurun
-{
-namespace backend
-{
-
-class BackendManager
-{
-public:
- using backend_create_t = Backend *(*)();
- using backend_destroy_t = void (*)(Backend *);
- using dlhandle_destroy_t = void (*)(void *);
-
- static BackendManager &get();
-
-public:
- Backend *get(const std::string &key);
- const Backend *get(const std::string &key) const;
- const Backend *getDefault() const;
- const std::vector<const Backend *> &getAll() const { return _available_backends; };
-
-private:
- BackendManager();
-
-private:
- std::vector<const Backend *> _available_backends;
- std::map<std::string, std::unique_ptr<void, dlhandle_destroy_t>> _handle_map;
- std::map<std::string, std::unique_ptr<Backend, backend_destroy_t>> _gen_map;
- /**
- * @brief Allocate an object of a class of a plugin by loading a plugin function, that does
- * allocation, and calling it
- *
- * @param object_of_plugin_class target object
- * @param obj_creator_func_name name of the plugin function, that allocates an object
- * @param handle handle of the plugin
- * @param args arguments to pass to constructor of the plugin class
- *
- * @return
- */
- template <typename T, class... Types>
- void loadObjectFromPlugin(std::shared_ptr<T> &object_of_plugin_class,
- const std::string obj_creator_func_name, void *handle,
- Types &&... args);
-
- /**
- * @brief load backend plugin
- *
- * @param backend backend to be loaded
- *
- * @return
- */
- void loadBackend(const std::string &backend);
-};
-
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_BACKEND_MANAGER_H__
diff --git a/runtime/neurun/core/src/backend/ExecTime.cc b/runtime/neurun/core/src/backend/ExecTime.cc
deleted file mode 100644
index d5aa679d7..000000000
--- a/runtime/neurun/core/src/backend/ExecTime.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/ExecTime.h"
-
-#include <fstream>
-#include <cassert>
-#include <limits>
-#include <algorithm>
-
-namespace neurun
-{
-namespace backend
-{
-
-int64_t ExecTime::getOperationExecTime(const Backend *backend, const std::string &operation,
- bool quant, uint32_t op_size) const
-{
- auto found_backend = _measurements.find(backend);
- if (found_backend == _measurements.end())
- return NOT_FOUND; // no execution time for this backend
-
- auto found_operation_with_type = found_backend->second.find(operation);
- if (found_operation_with_type == found_backend->second.end())
- // no execution time for this operation
- return NOT_FOUND;
-
- auto found_operation = found_operation_with_type->second.find(quant);
- if (found_operation == found_operation_with_type->second.end())
- // no execution time for this operation
- return NOT_FOUND;
-
- auto found_size = found_operation->second.find(op_size);
- if (found_size != found_operation->second.end())
- return found_size->second; // found execution time
-
- // Try to interpolate
- if (found_operation->second.size() < 2)
- // not possible to do linear interpolation
- return found_operation->second.begin()->second;
-
- // if we reach here, then this means, that there is no record, that is equal to op_size
- auto upper_bound = found_operation->second.upper_bound(op_size); // > op_size
- auto lower_bound = upper_bound;
-
- if (upper_bound == found_operation->second.end()) // all values <= op_size
- {
- upper_bound--;
- lower_bound = upper_bound;
- lower_bound--;
- }
- else if (upper_bound == found_operation->second.begin()) // all values > op_size
- {
- upper_bound++;
- }
- else // op_size between
- {
- lower_bound--;
- }
-
- // Linear interpolation
- const auto x0 = static_cast<int64_t>(lower_bound->first); // size
- const auto x1 = static_cast<int64_t>(upper_bound->first); // size
- const int64_t y0 = lower_bound->second; // time
- const int64_t y1 = upper_bound->second; // time
- const auto x = static_cast<int64_t>(op_size);
-
- int64_t interpolated_value = y0 + (x - x0) * (y1 - y0) / (x1 - x0);
-
- // In some cases ops with smaller inputs is executed slower than the one
- // with larger inputs, more likely because of a backend's load difference
- if (interpolated_value < 0 && x > x1)
- {
- return y0;
- }
- // It must be non-positive ONLY if it's lesser than both of them
- assert(interpolated_value > 0 || x < x0);
-
- // execution time must be non-negative
- return std::max<int64_t>(interpolated_value, 1);
-}
-
-void ExecTime::updateOperationExecTime(const Backend *backend, const std::string &operation,
- bool quant, uint32_t op_size, int64_t time)
-{
- // If the op is not implemented for some input, it should not be scheduled
- const auto &recs = _measurements[backend][operation][quant];
- if (time == getMax() ||
- std::any_of(recs.begin(), recs.end(),
- [](std::pair<const uint32_t, const int64_t> p) { return p.second == getMax(); }))
- {
- _measurements[backend][operation][quant].clear();
- _measurements[backend][operation][quant].emplace(op_size, getMax());
- }
- else
- {
- auto it = _measurements[backend][operation][quant].emplace(op_size, time);
- if (!it.second)
- {
- // affect of the last measurement is bigger than the previous ones:
- // this prefers new metrics than older once, so will adapt backend changes
- it.first->second = (it.first->second + time) / 2;
- }
- }
-}
-
-void ExecTime::updatePermuteTime(const Backend *from_backend, const Backend *to_backend, bool quant,
- uint32_t op_size, int64_t time)
-{
- updateOperationExecTime(from_backend, to_backend->config()->id(), quant, op_size, time);
-}
-
-int64_t ExecTime::getPermuteTime(const Backend *from_backend, const Backend *to_backend, bool quant,
- uint32_t op_size) const
-{
- return getOperationExecTime(from_backend, to_backend->config()->id(), quant, op_size);
-}
-
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/core/src/backend/JSONExecTime.cc b/runtime/neurun/core/src/backend/JSONExecTime.cc
deleted file mode 100644
index e2404b2c8..000000000
--- a/runtime/neurun/core/src/backend/JSONExecTime.cc
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/JSONExecTime.h"
-#include "backend/IConfig.h"
-#include <fstream>
-
-namespace neurun
-{
-namespace backend
-{
-/**
- * @brief Helper function for reading string from stream
- *
- * @param str Output string
- * @param stream File stream
- */
-void readString(std::string &str, std::ifstream &stream)
-{
- str.clear();
- char buf;
- while (stream.good())
- {
- stream.get(buf);
- if (buf == '"')
- break;
- str.push_back(buf);
- }
-}
-
-/**
- * @brief Helper function for reading bool from stream
- *
- * @param quant Output bool
- * @param stream File stream
- */
-void readBool(bool &quant, std::ifstream &stream)
-{
- char buf;
- stream.get(buf);
- quant = (buf == '1');
- stream.get(buf);
-}
-
-void printString(const std::string &str, std::ofstream &stream) { stream << "\"" << str << "\""; }
-
-void printBool(bool quant, std::ofstream &stream) { stream << "\"" << quant << "\""; }
-
-void JSON::readOperation(const std::string &backend, const std::string &operation, bool quant,
- std::ifstream &stream)
-{
- uint32_t size = 0;
- int64_t time = 0;
-
- std::string int_buf;
- char buf;
- int number_of_closed_braces = 0;
- int number_of_commas = 0;
-
- while (stream.good())
- {
- stream.get(buf);
-
- switch (buf)
- {
- case ']':
- {
- number_of_closed_braces++;
- break;
- }
- case '[':
- {
- number_of_closed_braces--;
- break;
- }
- default:
- {
- if (std::isdigit(buf))
- {
- int_buf.push_back(buf);
- }
- break;
- }
- }
-
- if (number_of_closed_braces == 1)
- break;
-
- if ((buf == ']' && number_of_closed_braces == 0) ||
- (buf == ',' && number_of_closed_braces == -1))
- {
- switch (number_of_commas % 2)
- {
- case 0:
- {
- size = static_cast<uint32_t>(std::atoi(int_buf.c_str()));
- break;
- }
- case 1:
- {
- time = static_cast<int64_t>(std::atol(int_buf.c_str()));
- auto bf = _backends.find(backend);
- if (bf != _backends.end())
- {
- _measurements[bf->second][operation][quant][size] = time;
- } // we ignore the records for unsupported backends
- break;
- }
- }
- number_of_commas++;
- int_buf.clear();
- }
- }
-}
-void JSON::printOperation(const std::map<uint32_t, int64_t> &operation_info,
- std::ofstream &stream) const
-{
- for (const auto &items : operation_info)
- {
- stream << "[" << items.first << ", " << items.second << "], ";
- }
- stream.seekp(-2, std::ofstream::end);
-}
-
-void JSON::uploadOperationsExecTime() const
-{
- std::ofstream stream(_measurement_file);
- if (!stream.is_open())
- {
- throw std::runtime_error("Failed to save backend config file");
- }
- else
- {
- stream << "{";
- for (const auto &backend : _measurements)
- {
- printString(backend.first->config()->id(), stream);
- stream << ": {";
- for (const auto &operation : backend.second)
- {
- printString(operation.first, stream);
- stream << ": {";
- for (const auto &type : operation.second)
- {
- printBool(type.first, stream);
- stream << ": [";
- printOperation(type.second, stream);
- stream << "], ";
- }
- stream.seekp(-2, std::ofstream::end);
- stream << "}, ";
- }
- stream.seekp(-2, std::ofstream::end);
- stream << "}, ";
- }
- stream.seekp(-2, std::ofstream::end);
- stream << "}";
- stream.close();
- }
-}
-
-void JSON::loadOperationsExecTime()
-{
- std::ifstream stream(_measurement_file);
- if (stream.is_open())
- {
- std::string backend;
- std::string operation;
- bool quant = false;
- char buf;
- int number_of_open_braces = 0;
-
- while (stream.good())
- {
- stream.get(buf);
- switch (buf)
- {
- case '{':
- number_of_open_braces++;
- break;
- case '}':
- number_of_open_braces--;
- break;
- case '"':
- {
- if (number_of_open_braces == 1)
- {
- // read backend string
- readString(backend, stream);
- }
- if (number_of_open_braces == 2)
- {
- // read operation string
- readString(operation, stream);
- }
- if (number_of_open_braces == 3)
- {
- // read operation string
- readBool(quant, stream);
- }
- break;
- }
- case '[':
- {
- // reading and creating all info for operation
- readOperation(backend, operation, quant, stream);
- break;
- }
- default:
- break;
- }
- }
- stream.close();
- }
-}
-
-} // namespace backend
-} // namespace neurun
diff --git a/runtime/neurun/core/src/compiler/BackendResolver.cc b/runtime/neurun/core/src/compiler/BackendResolver.cc
deleted file mode 100644
index 0c544190c..000000000
--- a/runtime/neurun/core/src/compiler/BackendResolver.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "BackendResolver.h"
-
-namespace neurun
-{
-namespace compiler
-{
-
-BackendResolver::BackendResolver(const BackendResolver &obj)
- : _context_manager{}, _gen_map{obj._gen_map}
-{
- for (const auto &e : obj._context_manager)
- {
- _context_manager.emplace(e.first, nnfw::cpp14::make_unique<backend::BackendContext>(*e.second));
- }
-}
-
-BackendResolver &BackendResolver::operator=(const BackendResolver &obj)
-{
- _gen_map = obj._gen_map;
-
- _context_manager.clear();
- for (const auto &e : obj._context_manager)
- {
- _context_manager.emplace(e.first, nnfw::cpp14::make_unique<backend::BackendContext>(*e.second));
- }
-
- return *this;
-}
-
-} // namespace compiler
-} // namespace neurun
diff --git a/runtime/neurun/core/src/compiler/BackendResolver.h b/runtime/neurun/core/src/compiler/BackendResolver.h
deleted file mode 100644
index ddcae793a..000000000
--- a/runtime/neurun/core/src/compiler/BackendResolver.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_COMPILER_BACKEND_RESOLVER_H__
-#define __NEURUN_COMPILER_BACKEND_RESOLVER_H__
-
-#include <unordered_map>
-#include <typeindex>
-
-#include "util/logging.h"
-#include "backend/Backend.h"
-#include "backend/BackendManager.h"
-#include "backend/ITensorBuilder.h"
-#include "ir/OperationIndexMap.h"
-
-namespace neurun
-{
-namespace compiler
-{
-
-class BackendResolver
-{
-public:
- BackendResolver(const ir::Operands &operands,
- const std::vector<const backend::Backend *> &backends,
- const std::shared_ptr<backend::custom::IKernelBuilder> &kb)
- {
- for (const auto backend : backends)
- {
- _context_manager.emplace(backend, backend->newContext(operands, kb));
- }
- }
-
- ~BackendResolver() = default;
- BackendResolver(const BackendResolver &obj);
- BackendResolver(BackendResolver &&obj) = default;
- BackendResolver &operator=(const BackendResolver &obj);
- BackendResolver &operator=(BackendResolver &&obj) = default;
-
-public:
- const backend::BackendContext *getBackendContext(const ir::OperationIndex &index) const
- {
- return _context_manager.at(_gen_map.at(index)).get();
- }
-
- const backend::BackendContext *getBackendContext(const backend::Backend *backend) const
- {
- return _context_manager.at(backend).get();
- }
-
- backend::TensorBuilderSet tensor_builders() const
- {
- backend::TensorBuilderSet ret;
- for (const auto &e : _context_manager)
- {
- ret.insert(e.second->tensor_builder);
- }
- return ret;
- }
-
- const backend::Backend *getBackend(const ir::OperationIndex &index) const
- {
- return getBackendContext(index)->backend;
- }
-
- void setBackend(const ir::OperationIndex &index, const backend::Backend *backend)
- {
- _gen_map[index] = backend;
- }
-
- void iterate(const std::function<void(const ir::OperationIndex &,
- const backend::BackendContext &)> &fn) const
- {
- for (const auto &e : _gen_map)
- {
- fn(e.first, *_context_manager.at(e.second));
- }
- }
-
-private:
- std::unordered_map<const backend::Backend *, std::unique_ptr<backend::BackendContext>>
- _context_manager;
- ir::OperationIndexMap<const backend::Backend *> _gen_map;
-};
-
-} // namespace compiler
-} // namespace neurun
-
-#endif // __NEURUN_COMPILER_BACKEND_RESOLVER_H__
diff --git a/runtime/neurun/core/src/compiler/CodeWithInfo.h b/runtime/neurun/core/src/compiler/CodeWithInfo.h
deleted file mode 100644
index 73dd1055b..000000000
--- a/runtime/neurun/core/src/compiler/CodeWithInfo.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_COMPILER_CODE_WITH_INFO_H__
-#define __NEURUN_COMPILER_CODE_WITH_INFO_H__
-
-#include <memory>
-
-#include "compiler/Linear.h"
-#include "exec/IFunction.h"
-
-namespace neurun
-{
-namespace compiler
-{
-
-struct CodeWithInfo
-{
- Linear::Element elem;
- std::unique_ptr<exec::IFunction> fn;
-
- CodeWithInfo(const Linear::Element &elem, std::unique_ptr<exec::IFunction> &&fn)
- : elem{elem}, fn{std::move(fn)}
- {
- }
-};
-
-} // namespace compiler
-} // namespace neurun
-
-#endif // __NEURUN_COMPILER_CODE_WITH_INFO_H__
diff --git a/runtime/neurun/core/src/compiler/Compiler.cc b/runtime/neurun/core/src/compiler/Compiler.cc
deleted file mode 100644
index d5ee39d85..000000000
--- a/runtime/neurun/core/src/compiler/Compiler.cc
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "compiler/Compiler.h"
-
-#include "BackendResolver.h"
-#include "ParamChecker.h"
-#include "ExecutorFactory.h"
-#include "OperationValidator.h"
-
-#include "compiler/IScheduler.h"
-#include "compiler/ManualScheduler.h"
-#include "compiler/HEScheduler.h"
-#include "backend/ExecTime.h"
-#include "ir/operation/LowerInfo.h"
-#include "dumper/dot/DotDumper.h"
-#include "compiler/Linear.h"
-#include "exec/interp/ExecManager.h"
-#include "backend/ExecTime.h"
-#include "util/ConfigSource.h"
-
-namespace neurun
-{
-
-namespace compiler
-{
-
-static void checkProfilerConditions()
-{
- if (!util::getConfigBool(util::config::USE_SCHEDULER))
- throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling.");
-
- if (util::getConfigString(util::config::EXECUTOR) != "Dataflow")
- throw std::runtime_error("Profiling mode works only with 'Dataflow' executor");
-}
-
-void Compiler::compile(void)
-{
- _state = State::STARTED;
-
- /***************************************************
- * Prepare compilation phase
- ***************************************************/
-
- // Operation validation check
- OperationValidator{*_graph}();
-
- // Compilable check
- if (!checkCompilable())
- {
- _executor = std::make_shared<exec::interp::ExecManager>(*_graph);
- return;
- }
-
- // Mode check
- if (util::getConfigBool(util::config::PROFILING_MODE))
- checkProfilerConditions();
-
- /***************************************************
- * Backend independent analysis & optimization phase
- ***************************************************/
- // Schedule
- std::unique_ptr<BackendResolver> br;
- std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks;
- if (util::getConfigBool(util::config::USE_SCHEDULER))
- {
- auto scheduler = compiler::HEScheduler(
- _graph->operands(), backend::BackendManager::get().getAll(), _graph->getKernelBuilder());
- br = scheduler.schedule(*_graph);
- indexed_ranks = scheduler.getIndexedRanks();
- }
- else
- {
- auto scheduler = compiler::ManualScheduler();
- br = scheduler.schedule(*_graph);
- }
- _graph->setBackendResolver(std::move(br));
- /*************************************************************
- * Backend independent analysis & optimization phase finished
- *************************************************************/
-
- // dump graph to .dot
- auto dump_level =
- static_cast<dumper::dot::DotDumper::Level>(util::getConfigInt(util::config::GRAPH_DOT_DUMP));
- neurun::dumper::dot::DotDumper dot_dumper(*_graph, dump_level);
- dot_dumper.dump("before_lower");
-
- // Lower: decide backend
- _graph->lower();
- _state = State::LOWERED;
-
- dot_dumper.dump("after_lower");
-
- const std::string executor_str = util::getConfigString(util::config::EXECUTOR);
-
- _executor =
- std::shared_ptr<exec::IExecutor>{ExecutorFactory::get().create(executor_str, *_graph)};
- _executor->setIndexedRanks(indexed_ranks);
- /********************************
- * Code generation phase finished
- ********************************/
- _state = State::COMPILED;
-}
-
-bool Compiler::checkCompilable()
-{
- // Disable compile phase
- // When ready to use interpreter backend, remove this config and use backend setting
- const auto env_disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE);
- if (env_disable_compile)
- {
- return false;
- }
-
- // TODO check unspecified operand shape
-
- // Check compilable parameter
- ParamChecker paramChecker{_graph};
- paramChecker();
- if (paramChecker.haveNoneConstParam())
- {
- return false;
- }
-
- return true;
-}
-
-} // namespace compiler
-
-} // namespace neurun
diff --git a/runtime/neurun/core/src/compiler/ExecutorFactory.cc b/runtime/neurun/core/src/compiler/ExecutorFactory.cc
deleted file mode 100644
index 59de6c4a4..000000000
--- a/runtime/neurun/core/src/compiler/ExecutorFactory.cc
+++ /dev/null
@@ -1,379 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ExecutorFactory.h"
-
-#include <functional>
-#include "exec/ExecutionObservers.h"
-#include "exec/LinearExecutor.h"
-#include "exec/DataflowExecutor.h"
-#include "exec/ParallelExecutor.h"
-#include "compiler/BackendResolver.h"
-#include "backend/ExecTime.h"
-#include "compiler/Linear.h"
-#include "ir/dumper/Dumper.h"
-#include "SubTensorAnalyzer.h"
-#include "backend/IConstantInitializer.h"
-#include "backend/IKernelGenerator.h"
-#include "backend/IShapeFixer.h"
-#include "backend/ITensorRegister.h"
-#include "cpp14/memory.h"
-#include "CodeWithInfo.h"
-
-namespace neurun
-{
-namespace compiler
-{
-
-ExecutorFactory &ExecutorFactory::get()
-{
- static ExecutorFactory singleton;
- return singleton;
-}
-
-ExecutorFactory::ExecutorFactory()
-{
- _map["Linear"] = createLinearExecutor;
- _map["Dataflow"] = std::bind(createDataflowExecutor, std::placeholders::_1, false);
- _map["Parallel"] = std::bind(createDataflowExecutor, std::placeholders::_1, true);
-}
-
-exec::IExecutor *ExecutorFactory::create(const std::string &id, ir::Graph &graph)
-{
- return _map.at(id)(graph);
-}
-
-exec::IExecutor *ExecutorFactory::createLinearExecutor(ir::Graph &graph)
-{
- auto operand_context = std::make_shared<OperandContext>();
-
- // linearize
- assert(!graph.isBuildingPhase());
- auto linear = nnfw::cpp14::make_unique<Linear>(graph);
-
- // Dump ops
- linear->accept(ir::dumper::Dumper{});
-
- /*************************************************
- * Backend dependent analysis & optimization phase
- *************************************************/
-
- // SubTensorInfo should be generated after lower, before shape correction and finalize
- // because SubTensorAnalyzer assume that insert permutation is already finished
- // lower: decide backend and insert permutation
- // fix shapes: prepare codegen to optimization
- // generate tensor objects: generate tensor using subtensor info
- // generate kernels
- // allocate tesor memory
- // constant intialization: fill the constants with values
- // Generated SubTensorInfo is in operand(Object)
- // for easy pass SubTensorInfo to plan builder and tensor builder
- linear->accept(SubTensorAnalyzer{graph});
-
- /**********************************************************
- * Backend dependent analysis & optimization phase finished
- **********************************************************/
-
- /***********************
- * Code generation phase
- ***********************/
-
- // Fix shapes
- linear->iterate([&](const compiler::Linear::Element &element) {
- auto backend = element.lower_info->backend();
- auto shape_fixer = graph.backend_resolver()->getBackendContext(backend)->shape_fixer;
- shape_fixer->fix(*element.op_seq);
- });
-
- linear->planTensors();
-
- auto tensor_builders = graph.backend_resolver()->tensor_builders();
-
- // Prepare tensors
- for (auto &tensor_builder : tensor_builders)
- {
- tensor_builder->prepare();
- }
-
- // Generate initializers
- linear->generateConstantInitializers();
-
- class ExecutionBuilder final : public IExecutionBuilder
- {
- public:
- void append(std::unique_ptr<exec::IFunction> &&f) override
- {
- _code.emplace_back(_next_elem, std::move(f));
- }
-
- void setNextElem(const compiler::Linear::Element &next_elem) { _next_elem = next_elem; }
- std::vector<CodeWithInfo> releaseCode() { return std::move(_code); }
-
- private:
- compiler::Linear::Element _next_elem;
- std::vector<CodeWithInfo> _code;
- };
-
- ExecutionBuilder builder;
-
- // Generate kernels
- linear->iterate([&](const compiler::Linear::Element &element) {
- auto backend = element.lower_info->backend();
- builder.setNextElem(element);
- auto kernel_gen = graph.backend_resolver()->getBackendContext(backend)->kernel_gen;
- kernel_gen->generate(*element.op_seq, &builder);
- });
-
- auto code = builder.releaseCode();
-
- for (auto &tensor_builder : tensor_builders)
- {
- tensor_builder->allocateConsts();
- }
-
- // TODO Add optimization passes
-
- // Initialize constant tensors
- for (const auto backend : backend::BackendManager::get().getAll())
- {
- graph.backend_resolver()->getBackendContext(backend)->constant_initializer->run();
- }
-
- for (auto &&e : code)
- {
- e.fn->prepare();
- auto backend = e.elem.lower_info->backend();
- auto tensor_builder = graph.backend_resolver()->getBackendContext(backend)->tensor_builder;
- tensor_builder->postFunctionPrepare();
- }
-
- for (auto &tensor_builder : tensor_builders)
- {
- tensor_builder->allocateNonconsts();
- }
-
- for (auto &tensor_builder : tensor_builders)
- {
- tensor_builder->finalize();
- }
-
- // Wrap tensors as Object and store them to plan
- for (auto &tensor_builder : tensor_builders)
- {
- tensor_builder->iterate([&](const ir::OperandIndex &index) {
- auto object = tensor_builder->tensorAt(index);
- operand_context->set(index, object);
- });
- }
-
- // Prepare each TensorManager on each backend
- auto tensor_mgrs = nnfw::cpp14::make_unique<backend::TensorManagerSet>();
- for (auto &tensor_builder : tensor_builders)
- {
- tensor_mgrs->insert(tensor_builder->releaseTensorManager());
- }
-
- auto exec =
- new exec::LinearExecutor{graph, operand_context, std::move(tensor_mgrs), std::move(code)};
-
- const std::string trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
- if (!trace_filepath.empty())
- {
- std::unique_ptr<exec::IExecutionObserver> ctp =
- nnfw::cpp14::make_unique<exec::ChromeTracingObserver>(trace_filepath);
- exec->addObserver(std::move(ctp));
- }
-
- return exec;
-}
-
-exec::IExecutor *ExecutorFactory::createDataflowExecutor(ir::Graph &graph, bool parallel)
-{
- auto operand_context = std::make_shared<OperandContext>();
-
- graph.subgraphs().iterate([&](const ir::SubgraphIndex &, const ir::OpSequence &subg) {
- auto subtensor_analyzer = SubTensorAnalyzer{graph};
- subg.accept(subtensor_analyzer);
- });
-
- // Fix shapes and register tensors
- graph.subgraphs().iterate([&](const ir::SubgraphIndex &subg_index, const ir::OpSequence &subg) {
- auto backend = graph.getLowerInfo(subg_index)->backend();
- auto shape_fixer = graph.backend_resolver()->getBackendContext(backend)->shape_fixer;
- shape_fixer->fix(subg);
- const auto tensor_register =
- graph.backend_resolver()->getBackendContext(backend)->tensor_register;
- tensor_register->registerTensors(subg, graph.getLowerInfo());
- });
-
- graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
- const auto lower_info = graph.getLowerInfo(ind);
- for (auto factor : lower_info->def_factors())
- {
- auto backend = factor.backend();
- auto tensor_builder = graph.backend_resolver()->getBackendContext(backend)->tensor_builder;
-
- if (!tensor_builder->isRegistered(ind))
- {
- // These tensors do not exist in any op_seq (No use and def)
- // These tensors cannot be a SubTensor
- assert(obj.parent_info() == nullptr);
-
- const auto info = obj.info();
- const auto backend_layout = lower_info->def_factors().getOnlyElement().layout();
- // TODO Change tensor info to have permuted shape
- tensor_builder->registerTensorInfo(ind, info, backend_layout, obj.isConstant());
- }
-
- // Is not SubTensor?
- if (!backend->config()->SupportSubTensorAlloc() || obj.parent_info() == nullptr)
- {
- // To make this never be deallocated, this is a workaround to use static memory planner
- tensor_builder->notifyFirstUse(ind);
- }
- }
- });
-
- auto tensor_builders = graph.backend_resolver()->tensor_builders();
-
- for (auto &tensor_builder : tensor_builders)
- {
- tensor_builder->prepare();
- }
-
- class ExecutionBuilder : public IExecutionBuilder
- {
- public:
- void append(std::unique_ptr<exec::IFunction> &&fn) override
- {
- auto itr = _code_map.find(_next_index);
- if (itr == _code_map.end())
- {
- _code_map[_next_index] = nnfw::cpp14::make_unique<exec::FunctionSequence>();
- }
- _code_map[_next_index]->append(std::move(fn));
- };
-
- // TODO Remove this method and make `append` to get index value as an argument
- void setNextIndex(const ir::SubgraphIndex next_index) { _next_index = next_index; }
-
- exec::DataflowExecutor::CodeMap &&releaseCodeMap() { return std::move(_code_map); }
-
- private:
- ir::SubgraphIndex _next_index;
- exec::DataflowExecutor::CodeMap _code_map;
- };
-
- auto execution_builder = nnfw::cpp14::make_unique<ExecutionBuilder>();
-
- // Generate kernels
- graph.subgraphs().iterate([&](const ir::SubgraphIndex &subg_index, const ir::OpSequence &subg) {
- auto backend = graph.getLowerInfo(subg_index)->backend();
- auto constant_initializer =
- graph.backend_resolver()->getBackendContext(backend)->constant_initializer;
- constant_initializer->generate(subg, graph.operands());
- // TODO This approach is temporal. See declaration of `setNextIndex`.
- execution_builder->setNextIndex(subg_index);
- auto kernel_gen = graph.backend_resolver()->getBackendContext(backend)->kernel_gen;
- kernel_gen->generate(subg, execution_builder.get());
- });
-
- for (const auto &tensor_builder : tensor_builders)
- {
- tensor_builder->allocateConsts();
- }
-
- // Initialize constant tensors
- for (const auto backend : backend::BackendManager::get().getAll())
- {
- graph.backend_resolver()->getBackendContext(backend)->constant_initializer->run();
- }
-
- exec::DataflowExecutor::CodeMap code_map = execution_builder->releaseCodeMap();
-
- for (auto &it : code_map)
- {
- auto subg_index = it.first;
- auto &function_sequence = *(it.second);
-
- function_sequence.iterate([&](exec::IFunction &ifunc) {
- // NOTE. It may need avoiding prepare() for some operations
- // Ref: https://github.sec.samsung.net/STAR/nnfw/issues/7326
- ifunc.prepare();
- auto backend = graph.getLowerInfo(subg_index)->backend();
- auto tensor_builder = graph.backend_resolver()->getBackendContext(backend)->tensor_builder;
- tensor_builder->postFunctionPrepare();
- });
- }
-
- for (const auto &tensor_builder : tensor_builders)
- {
- tensor_builder->allocateNonconsts();
- }
-
- for (auto &tensor_builder : tensor_builders)
- {
- tensor_builder->finalize();
- }
-
- // Wrap tensors as Object and store them to plan
- for (auto &tensor_builder : tensor_builders)
- {
- tensor_builder->iterate([&](const ir::OperandIndex &index) {
- auto object = tensor_builder->tensorAt(index);
- operand_context->set(index, object);
- });
- }
-
- // Prepare each TensorManager on each backend
- auto tensor_mgrs = nnfw::cpp14::make_unique<backend::TensorManagerSet>();
- for (auto &tensor_builder : tensor_builders)
- {
- tensor_mgrs->insert(tensor_builder->releaseTensorManager());
- }
-
- exec::ExecutorBase *exec = nullptr;
- if (parallel)
- {
- exec = new exec::ParallelExecutor{graph, operand_context, std::move(tensor_mgrs),
- std::move(code_map)};
- }
- else
- {
- exec = new exec::DataflowExecutor{graph, operand_context, std::move(tensor_mgrs),
- std::move(code_map)};
- if (util::getConfigBool(util::config::PROFILING_MODE))
- {
- auto et = std::make_shared<backend::ExecTime>(backend::BackendManager::get().getAll());
- std::unique_ptr<exec::IExecutionObserver> obs =
- nnfw::cpp14::make_unique<exec::ProfileObserver>(et);
- exec->addObserver(std::move(obs));
- }
- }
-
- const std::string trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
- if (!trace_filepath.empty())
- {
- std::unique_ptr<exec::IExecutionObserver> ctp =
- nnfw::cpp14::make_unique<exec::ChromeTracingObserver>(trace_filepath);
- exec->addObserver(std::move(ctp));
- }
-
- return exec;
-}
-
-} // namespace compiler
-} // namespace neurun
diff --git a/runtime/neurun/core/src/compiler/ExecutorFactory.h b/runtime/neurun/core/src/compiler/ExecutorFactory.h
deleted file mode 100644
index 6da4ffc5e..000000000
--- a/runtime/neurun/core/src/compiler/ExecutorFactory.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_COMPILER_EXECUTOR_FACTORY_H__
-#define __NEURUN_COMPILER_EXECUTOR_FACTORY_H__
-
-#include <unordered_map>
-
-#include "exec/IExecutor.h"
-#include "ir/Graph.h"
-
-namespace neurun
-{
-namespace compiler
-{
-
-class ExecutorFactory
-{
-public:
- static ExecutorFactory &get();
-
-public:
- exec::IExecutor *create(const std::string &id, ir::Graph &graph);
-
-private:
- ExecutorFactory();
-
-private:
- static exec::IExecutor *createLinearExecutor(ir::Graph &graph);
- static exec::IExecutor *createDataflowExecutor(ir::Graph &graph, bool parallel);
-
-private:
- std::unordered_map<std::string, std::function<exec::IExecutor *(ir::Graph &)>> _map;
-};
-
-} // namespace compiler
-} // namespace neurun
-
-#endif // __NEURUN_COMPILER_EXECUTOR_FACTORY_H__
diff --git a/runtime/neurun/core/src/compiler/HEScheduler.cc b/runtime/neurun/core/src/compiler/HEScheduler.cc
deleted file mode 100644
index aec68d655..000000000
--- a/runtime/neurun/core/src/compiler/HEScheduler.cc
+++ /dev/null
@@ -1,628 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/Operand.h"
-#include "compiler/HEScheduler.h"
-#include "ir/Graph.h"
-#include "util/ConfigSource.h"
-#include "compiler/IExecutionBuilder.h"
-#include "compiler/BackendResolver.h"
-#include "backend/IShapeFixer.h"
-#include "util/logging.h"
-#include "util/Utils.h"
-#include "exec/FunctionSequence.h"
-#include <cassert>
-#include <cmath>
-#include <chrono>
-
-namespace neurun
-{
-
-namespace compiler
-{
-static uint32_t getOperationsFlattenedIOSize(const ir::Graph &graph, const ir::Operation &node)
-{
- uint32_t size = 0;
- for (const auto &input : node.getInputs())
- {
- size += graph.operands().at(input).info().total_size();
- }
- for (const auto &output : node.getOutputs())
- {
- size += graph.operands().at(output).info().total_size();
- }
- return size;
-}
-
-static bool isQuant(const ir::Graph &graph, const ir::Operation &node)
-{
- for (const auto &input : node.getInputs())
- {
- const auto &obj = graph.operands().at(input);
- if (obj.typeInfo().type() == ir::DataType::QUANT8_ASYMM)
- {
- return true;
- }
- }
- return false;
-}
-
-static bool isWorkaroundSkip(const ir::Graph &graph, const backend::Backend *backend,
- const ir::Operation &node, bool quant)
-{
- /* TODO: this is workaround, come up with better solution if have.
- Adding exception in stage doesn't help. Because if there is a record for add without
- broadcast, scheduling will select it since it doesn't distinguish broadcast and
- non-broadcast like it does for quant non-quantized*/
- if (backend->config()->id() == "cpu" &&
- (node.opcode() == ir::OpCode::Add || node.opcode() == ir::OpCode::Sub ||
- node.opcode() == ir::OpCode::Mul))
- {
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
- /*Broadcasting isn't supported on CPU: no way to differ the existing exec_time record with and
- * without broadcasting*/
- if (!(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
- {
- return true;
- }
- }
- /* TODO: this is workaround, come up with better solution if have.
- Adding exception in stage doesn't help. Because if there is a record for Mul without
- broadcast, scheduling will select it since it doesn't distinguish broadcast and
- non-broadcast like it does for quant non-quantized*/
- else if (backend->config()->id() == "acl_neon" && node.opcode() == ir::OpCode::Mul)
- {
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- // Nontrivial broadcasting isn't supported yet
- if (quant ||
- !(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
- {
- return true;
- }
- }
- return false;
-}
-
-// if a node can be merged into op_seq
-static bool isMergeable(const ir::Graph &graph, const ir::Operation &node)
-{
- size_t prev_op_cnt = 0;
- for (const auto &input : node.getInputs())
- {
- // only valid_inputs
- const auto &operand = graph.operands().at(input);
- if (operand.isConstant())
- continue;
-
- // This operand is output of operation, not weight or bias
- if (operand.getDef().list().size() > 0)
- ++prev_op_cnt;
-
- // Current node has multiple inputs as concat or at the beginning of the separated branch
- if (prev_op_cnt > 1 || operand.getUses().list().size() > 1)
- {
- return false;
- }
- }
- return true;
-}
-
-void HEScheduler::scheduleShufflingBackends()
-{
- VERBOSE(HEScheduler::schedule)
- << "Started task scheduling: uses all backends to get more metrics for data transfer"
- << std::endl;
- size_t backend_ind = 0;
- for (const auto &rank : _rank_to_op)
- {
- VERBOSE(HEScheduler::schedule) << "scheduling (" << rank.second.value() << ")" << std::endl;
- const auto &node = _graph->operations().at(rank.second);
- const bool quant = isQuant(*_graph, node);
- const auto size = getOperationsFlattenedIOSize(*_graph, node);
- for (size_t i = 0;; ++i)
- {
- if (i == _all_backends.size())
- {
- // wasn't able to find backend
- assert(false);
- break;
- }
- if (backend_ind == _all_backends.size())
- {
- backend_ind = 0;
- }
- if (isWorkaroundSkip(*_graph, _all_backends[backend_ind], node, quant))
- {
- ++backend_ind;
- continue;
- }
- const auto exec_time =
- _exec_time->getOperationExecTime(_all_backends[backend_ind], node.name(), quant, size);
- // Scheduling to measure data transfer must be done after measuring all backends separately
- assert(exec_time != _exec_time->NOT_FOUND);
- if (exec_time == _exec_time->getMax())
- {
- ++backend_ind;
- continue;
- }
- _backend_resolver->setBackend(rank.second, _all_backends[backend_ind]);
- VERBOSE(HEScheduler::schedule) << "backend for " << node.name() << " is "
- << _all_backends[backend_ind]->config()->id() << std::endl;
- ++backend_ind;
- break;
- }
- }
-}
-
-bool HEScheduler::isNodeProfiled(const ir::Operation &node)
-{
- const bool quant = isQuant(*_graph, node);
- const auto size = getOperationsFlattenedIOSize(*_graph, node);
- for (const auto *backend : _all_backends)
- {
- const auto exec_time = _exec_time->getOperationExecTime(backend, node.name(), quant, size);
- if (exec_time == _exec_time->NOT_FOUND)
- return false;
- }
- return true;
-}
-
-void HEScheduler::scheduleBranch(const ir::OperationIndex &index,
- ir::OperationIndexMap<bool> &scheduled)
-{
- auto loc_index = index;
- const backend::Backend *parent_backend = nullptr;
- while (true)
- {
- if (scheduled[loc_index])
- {
- return;
- }
- if (!schedule(loc_index, parent_backend))
- {
- return;
- }
- scheduled[loc_index] = true;
- parent_backend = _backend_resolver->getBackend(loc_index);
-
- const auto &node = _graph->operations().at(loc_index);
- /* get the only output operand, that is input of the next single operation
- * and just this nodes output.*/
- if (node.getOutputs().size() != 1)
- {
- return;
- }
- const auto &only_out_operand = _graph->operands().at(*node.getOutputs().begin());
- loc_index = only_out_operand.getUses().list().front();
- /* verify, that next node is neither beginning nor ending node of a branch*/
- const auto &next_node = _graph->operations().at(loc_index);
- if (!isMergeable(*_graph, next_node))
- {
- return;
- }
- }
-}
-
-std::unique_ptr<compiler::BackendResolver> HEScheduler::schedule(const ir::Graph &graph)
-{
- _graph = &graph;
- VERBOSE(HEScheduler::schedule) << "task scheduling started" << std::endl;
- // Make ranks and save in descending order
- makeRank();
-
- for (const auto *backend : _all_backends)
- {
- _backends_avail_time.emplace(backend, std::map<int64_t, int64_t>{{0, 0}});
- }
-
- const bool is_profiling = util::getConfigBool(util::config::PROFILING_MODE);
- if (is_profiling)
- {
- // Check if profiling info about all backend/node pairs already exists
- bool all_nodes_are_profiled = true;
- _graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
- if (all_nodes_are_profiled)
- all_nodes_are_profiled = isNodeProfiled(op);
- });
-
- // If all nodes are already profiled - schedule backends in such order, so more profiling
- // information about between-backends data transfer could be collected
- if (all_nodes_are_profiled)
- {
- scheduleShufflingBackends();
- VERBOSE(HEScheduler::schedule) << "task scheduling finished" << std::endl;
- return std::move(_backend_resolver);
- }
- }
-
- ir::OperationIndexMap<bool> visited;
- graph.operations().iterate(
- [&](const ir::OperationIndex &index, const ir::Operation &) { visited[index] = false; });
- // for each task select the backend with the smallest earliest finishing time(eft)
- for (const auto &rank : _rank_to_op)
- {
- scheduleBranch(rank.second, visited);
- }
- VERBOSE(HEScheduler::schedule) << "task scheduling finished" << std::endl;
- return std::move(_backend_resolver);
-}
-
-int64_t HEScheduler::getOpTime(const backend::Backend *backend, const std::string &operation,
- bool quant, uint32_t size)
-{
- const auto time = _exec_time->getOperationExecTime(backend, operation, quant, size);
- if (time != _exec_time->NOT_FOUND)
- return time;
-
- return _is_supported.at(backend).at(operation) ? 1 : _exec_time->getMax();
-}
-
-int64_t HEScheduler::getPermuteTime(const backend::Backend *src_backend,
- const backend::Backend *dst_backend, bool quant, uint32_t size)
-{
- const auto time = _exec_time->getPermuteTime(src_backend, dst_backend, quant, size);
- if (time != _exec_time->NOT_FOUND)
- return time;
-
- // Makes the scheduler prefer keeping computations on one backend
- return size / 200;
-}
-
-int64_t HEScheduler::tryBackend(const ir::Operation &node, const backend::Backend *backend)
-{
- // if there is no profiling info don't use this backend during scheduling
- if (!util::getConfigBool(util::config::PROFILING_MODE))
- {
- VERBOSE(HEScheduler::tryBackend)
- << "Trying to HE schedule while there is no profiling info for " << node.name()
- << " on backend " << backend->config()->id() << ". So this backend won't be used. "
- << std::endl;
- _is_supported[backend][node.name()] = false;
- return _exec_time->getMax();
- }
- auto iter = _is_supported.find(backend);
- if (iter != _is_supported.end())
- {
- auto it2 = iter->second.find(node.name());
- if (it2 != iter->second.end())
- {
- return _is_supported[backend][node.name()] ? 1 : _exec_time->getMax();
- }
- }
- try
- {
- node.accept(*_backend_resolver->getBackendContext(backend)->shape_fixer);
-
- _is_supported[backend][node.name()] = true;
- }
- catch (std::runtime_error &e)
- {
- _is_supported[backend][node.name()] = false;
- }
- return _is_supported[backend][node.name()] ? 1 : _exec_time->getMax();
-}
-
-void HEScheduler::makeRank()
-{
- VERBOSE(HEScheduler::makeRank) << "task prioritizing" << std::endl;
-
- _graph->operations().iterate(
- [&](const ir::OperationIndex &index, const ir::Operation &) { DFSMaxRank(index); });
-
- // Check that ranks are calculated for all operations(nodes)
- _graph->operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) {
- UNUSED_RELEASE(index);
- assert(_op_to_rank->find(index) != _op_to_rank->end());
- });
- VERBOSE(HEScheduler::makeRank) << "task prioritizing finished" << std::endl;
-}
-
-int64_t HEScheduler::DFSMaxRank(const ir::OperationIndex &index)
-{
- auto op_to_rank_it = _op_to_rank->find(index);
- if (op_to_rank_it != _op_to_rank->end())
- return op_to_rank_it->second;
-
- const auto &node = _graph->operations().at(index);
- int64_t rank = 0;
- const bool quant = isQuant(*_graph, node);
- const auto size = getOperationsFlattenedIOSize(*_graph, node);
- auto supported_backends_quantity = static_cast<int64_t>(_all_backends.size());
-
- const auto max_child_rank = DFSChildrenMaxRank(index);
-
- // get average exec time of this op
- for (const auto &backend : _all_backends)
- {
- auto exec_time = _exec_time->getOperationExecTime(backend, node.name(), quant, size);
- if (exec_time == _exec_time->NOT_FOUND)
- {
- exec_time = tryBackend(node, backend);
- }
- if (exec_time < _exec_time->getMax())
- {
- rank += exec_time;
- }
- else
- {
- // this operation isn't supported in this backend
- --supported_backends_quantity;
- }
- }
- if (supported_backends_quantity == 0)
- {
- throw std::runtime_error{"Encountered unsupported op: " + node.name()};
- }
- rank /= supported_backends_quantity;
-
- // get standard deviation
- int64_t std = 0;
- for (const auto backend : _all_backends)
- {
- const auto exec_time = getOpTime(backend, node.name(), quant, size);
- if (exec_time < _exec_time->getMax())
- {
- std += (exec_time - rank) * (exec_time - rank);
- }
- }
- std /= supported_backends_quantity;
- if (std > 0)
- {
- std = static_cast<int>(std::sqrt(std));
- rank *= std;
- }
- rank += max_child_rank;
-
- assert(rank >= 0);
- _rank_to_op.emplace(rank, index);
- _op_to_rank->emplace(index, rank);
- VERBOSE(HEScheduler::DFSMaxRank) << "rank of operation (" << index.value() << ")" << node.name()
- << " is " << rank << std::endl;
-
- return rank;
-}
-
-int64_t HEScheduler::DFSChildrenMaxRank(const ir::OperationIndex &index)
-{
- const auto &node = _graph->operations().at(index);
- int64_t max_child_rank = 0;
- for (const auto &output : node.getOutputs())
- {
- const auto &operand = _graph->operands().at(output);
- const bool quant = operand.typeInfo().type() == ir::DataType::QUANT8_ASYMM;
- // average data transfer cost of this operand's data
- int64_t avg_transfer_cost = 1;
- for (const auto *backend : _all_backends)
- {
- for (const auto *other_backend : _all_backends)
- {
- if (backend == other_backend)
- {
- continue;
- }
- auto transfer_cost =
- _exec_time->getPermuteTime(backend, other_backend, quant, operand.info().total_size());
- if (transfer_cost == _exec_time->NOT_FOUND)
- {
- // Makes the scheduler prefer keeping computations on one backend
- transfer_cost = operand.info().total_size() / 100;
- }
- avg_transfer_cost += transfer_cost;
- }
- }
- avg_transfer_cost /= _all_backends.size();
- for (const auto &use : operand.getUses().list())
- {
- const auto cur_child_rank = DFSMaxRank(use);
- max_child_rank = std::max(max_child_rank, cur_child_rank + avg_transfer_cost);
- }
- }
- return max_child_rank;
-}
-
-int64_t HEScheduler::backendAvailableTime(const backend::Backend *backend,
- const int64_t &starting_time, const int64_t &time_amount)
-{
- const auto backend_times = _backends_avail_time.at(backend);
- // finishing and starting times of an op, that will come after current op
- auto next_op_fst = backend_times.upper_bound(starting_time);
- // finishing time of an op, that will come before current op
- auto prev_op_ft = starting_time;
- // until reach the "hole/gap", that is enough to run this op
- while (next_op_fst != backend_times.end() && next_op_fst->second - prev_op_ft <= time_amount)
- {
- prev_op_ft = next_op_fst->first + 1;
- ++next_op_fst;
- }
- return prev_op_ft;
-}
-
-bool HEScheduler::schedule(const ir::OperationIndex &index, const backend::Backend *parent_backend)
-{
- VERBOSE(HEScheduler::schedule) << "scheduling (" << index.value() << ")" << std::endl;
- int64_t eft = std::numeric_limits<int64_t>::max(), selected_exec_time = 0;
- const auto &node = _graph->operations().at(index);
-
- std::multimap<int64_t, int64_t> selected_transfer_st_exec_time;
- // select the backend with the smallest eft of this task
- const backend::Backend *chosen_backend = nullptr;
- for (const auto *backend : _all_backends)
- {
- std::multimap<int64_t, int64_t> transfer_st_exec_time;
- const auto est_and_et = ESTAndExecTime(backend, index, transfer_st_exec_time);
-
- if (eft > est_and_et.first + est_and_et.second)
- {
- eft = est_and_et.first + est_and_et.second;
- selected_exec_time = est_and_et.second;
- chosen_backend = backend;
- selected_transfer_st_exec_time = transfer_st_exec_time;
- }
- }
-
- if (chosen_backend == nullptr)
- {
- throw std::runtime_error{"Fail to choose backend on scheduler"};
- }
-
- // this is part of a branch and it is assigned another backend
- if (parent_backend && parent_backend != chosen_backend)
- {
- return false;
- }
- for (const auto &it : selected_transfer_st_exec_time)
- {
- auto prev_op_ft = backendAvailableTime(_cpu_backend, it.first, it.second);
- _backends_avail_time[_cpu_backend].insert({prev_op_ft + it.second, prev_op_ft});
- }
-
- _ops_eft[index] = eft;
- _backends_avail_time[chosen_backend].emplace(eft, eft - selected_exec_time);
- _backend_resolver->setBackend(index, chosen_backend);
-
- VERBOSE(HEScheduler::schedule) << "backend for " << node.name() << " is "
- << chosen_backend->config()->id() << ". Its eft: " << eft
- << std::endl;
- return true;
-}
-
-std::pair<int64_t, int64_t>
-HEScheduler::ESTAndExecTime(const backend::Backend *backend, const ir::OperationIndex &index,
- std::multimap<int64_t, int64_t> &transfer_st_exec_time)
-{
- const bool is_linear_exec = "Linear" == util::getConfigString(util::config::EXECUTOR);
- const bool is_parallel_exec = "Parallel" == util::getConfigString(util::config::EXECUTOR);
- // Permutation will cause creating a separate op_seq that contains just this permutation node.
- // This isn't needed for Linear executor since it doesn't use subgraphs
- // Number 1 ms is picked experimentally
- int64_t permute_fine = 1000;
- // Multiply cpu operations' exec time by 2 because in parallel executor it might be busy with
- // permutation on other branches or non-nnfw specific tasks and have to wait for it.
- // Number 2 is picked experimentally
- const int64_t CPU_DELAY = 2;
- const auto &node = _graph->operations().at(index);
- const bool quant = isQuant(*_graph, node);
- const auto size = getOperationsFlattenedIOSize(*_graph, node);
- // if this node can be part of a op_seq, then assigning different backend will cause creating
- // another op_seq
- if (isMergeable(*_graph, node))
- {
- permute_fine *= 2;
- }
- if (isWorkaroundSkip(*_graph, backend, node, quant))
- {
- return {_exec_time->getMax(), _exec_time->getMax()};
- }
- // get average exec time of the op on this backend
- auto exec_time = getOpTime(backend, node.name(), quant, size);
- if (backend->config()->id() == "cpu" && is_parallel_exec)
- {
- exec_time *= CPU_DELAY;
- }
-
- // get max eft of direct (one level above) predecessors
- auto max_pred_eft = predMaxEFT(backend, node, transfer_st_exec_time);
-
- int64_t total_transfer_cost = 0;
- std::vector<std::multimap<int64_t, int64_t>::iterator> inserted_permutations;
- // Find free time for data transferring and insert it into backend taskset. This is needed:
- // 1. Time for multiple permutations for this node's input is found correctly
- // 2. If backend==cpu, then free time for this node must come after permutations
- for (auto &it : transfer_st_exec_time)
- {
- if (is_parallel_exec)
- {
- it.second *= CPU_DELAY;
- }
- if (!is_linear_exec)
- {
- it.second += permute_fine;
- }
- total_transfer_cost += it.second;
-
- const auto prev_op_ft = backendAvailableTime(_cpu_backend, it.first, it.second);
-
- max_pred_eft = std::max(max_pred_eft, prev_op_ft + it.second);
-
- const auto tmp = _backends_avail_time[_cpu_backend].emplace(prev_op_ft + it.second, prev_op_ft);
- inserted_permutations.push_back(tmp.first);
- }
- // find the hole/gap, where this op can be put or the finishing time of the last assigned op
- auto prev_op_ft = backendAvailableTime(backend, max_pred_eft, exec_time);
-
- // Remove inserted permutation from cpu's task set
- for (const auto &it : inserted_permutations)
- {
- _backends_avail_time[_cpu_backend].erase(it);
- }
-
- /* In case non-parallel executor measure just exec time and data transfer time
- * because EFT(prev_op_ft) is the same for all backends. Since two operations
- * can't be run simultaneously, finish of running operation must be waited for.
- * When an operation starts, all backends are free. So, they need time just for
- * data transfer.*/
- if (!is_parallel_exec)
- {
- VERBOSE(HEScheduler::ESTAndExecTime)
- << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on "
- << backend->config()->id() << " is " << exec_time
- << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl;
-
- return {total_transfer_cost, exec_time};
- }
- VERBOSE(HEScheduler::ESTAndExecTime)
- << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on "
- << backend->config()->id() << ": " << exec_time
- << " microseconds. Backend available time: " << prev_op_ft
- << " Parent's max eft: " << max_pred_eft - total_transfer_cost
- << " data transfer cost: " << total_transfer_cost << std::endl;
-
- return {prev_op_ft, exec_time};
-}
-
-int64_t HEScheduler::predMaxEFT(const backend::Backend *backend, const ir::Operation &node,
- std::multimap<int64_t, int64_t> &transfer_st_exec_time)
-{
- int64_t max_pred_eft = 0;
- for (const auto &input_operand_idx : node.getInputs())
- {
- const auto &input_operand = _graph->operands().at(input_operand_idx);
- const bool quant = input_operand.typeInfo().type() == ir::DataType::QUANT8_ASYMM;
-
- for (const auto &input_node_idx : input_operand.getDef().list())
- {
- // Data transfer cost from parent's node backend to current node's backend:
- auto parent_backend = _backend_resolver->getBackend(input_node_idx);
-
- max_pred_eft = std::max(max_pred_eft, _ops_eft.at(input_node_idx));
- if (parent_backend != backend)
- {
- // Multiply operand size by 2 because size must describe input+output size
- int64_t transfer_cost =
- getPermuteTime(parent_backend, backend, quant, input_operand.info().total_size() * 2);
- transfer_st_exec_time.emplace(_ops_eft.at(input_node_idx), transfer_cost);
- }
- }
- }
- return max_pred_eft;
-}
-
-} // namespace compiler
-
-} // namespace neurun
diff --git a/runtime/neurun/core/src/compiler/HEScheduler.h b/runtime/neurun/core/src/compiler/HEScheduler.h
deleted file mode 100644
index 538427065..000000000
--- a/runtime/neurun/core/src/compiler/HEScheduler.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file HEScheduler.h
- * @brief This file contains HEScheduler class to define and run task Heterogeneous Execution
- * Scheduler
- */
-
-#ifndef __NEURUN_COMPILER_H_E_SCHEDULER_H_
-#define __NEURUN_COMPILER_H_E_SCHEDULER_H_
-
-#include "compiler/IScheduler.h"
-#include "ir/Graph.h"
-#include "backend/ExecTime.h"
-#include "backend/Backend.h"
-#include "cpp14/memory.h"
-#include "ir/OperationIndexMap.h"
-#include <map>
-#include <memory>
-
-namespace neurun
-{
-
-namespace compiler
-{
-/**
- * @brief Class to schedule tasks
- */
-class HEScheduler : IScheduler
-{
-public:
- /**
- * @brief Construct a new Heterogeneous Execution Scheduler object
- * @param[in] model Graph model
- * @param[in] backend_resolver backend resolver
- */
- HEScheduler(const ir::Operands &operands, std::vector<const backend::Backend *> backends,
- const std::shared_ptr<backend::custom::IKernelBuilder> &kb)
- : _is_supported{}, _backends_avail_time{}, _ops_eft{},
- _op_to_rank{std::make_shared<ir::OperationIndexMap<int64_t>>()},
- _all_backends(std::move(backends))
- {
- _backend_resolver =
- nnfw::cpp14::make_unique<compiler::BackendResolver>(operands, _all_backends, kb);
- _exec_time = nnfw::cpp14::make_unique<backend::ExecTime>(_all_backends);
-
- // Find cpu backend
- auto cpu_backend_it = std::find_if(
- _all_backends.begin(), _all_backends.end(),
- [](const backend::Backend *backend) { return backend->config()->id() == "cpu"; });
- if (cpu_backend_it == _all_backends.end())
- throw std::runtime_error("HEScheduler could be used only if 'cpu' backend is available");
- _cpu_backend = *cpu_backend_it;
- }
-
-public:
- /**
- * @brief Task scheduling
- *
- * @note The main idea is taken from HSIP algo:
- * https://www.hindawi.com/journals/sp/2016/3676149/
- */
- std::unique_ptr<compiler::BackendResolver> schedule(const ir::Graph &graph) final;
- std::shared_ptr<ir::OperationIndexMap<int64_t>> getIndexedRanks() { return _op_to_rank; }
-
-private:
- bool isNodeProfiled(const ir::Operation &);
-
- bool schedule(const ir::OperationIndex &, const backend::Backend *parent_backend);
- /**
- * @brief Get earliest starting time and execution time of an operation on a backend.
- *
- * @note Returns a time when operation's inputs are ready and backend is available
- * It also returns exec time. If this is "cpu" backend, then exec_time*CPU_DELAY
- *
- * @param[in] backend: backend, for which to return the time
- * @param[in] index: index of an operation
- * @param[out] transfer_st_exec_time: est and exec time of data transfer operation
- *
- * @return earliest starting time and execution time
- */
- std::pair<int64_t, int64_t>
- ESTAndExecTime(const backend::Backend *backend, const ir::OperationIndex &index,
- std::multimap<int64_t, int64_t> &transfer_st_exec_time);
- /**
- * @brief Returns the latest finishing time of parents of a node.
- *
- * @param[in] backend: backend, for which to return the time
- * @param[in] node: node to get eft of parents
- * @param[out] transfer_st_exec_time: est and exec time of data transfer operation
- *
- * @return earliest finishing time of parent nodes
- */
- int64_t predMaxEFT(const backend::Backend *backend, const ir::Operation &node,
- std::multimap<int64_t, int64_t> &transfer_st_exec_time);
-
- void makeRank();
-
- int64_t DFSMaxRank(const ir::OperationIndex &index);
-
- int64_t DFSChildrenMaxRank(const ir::OperationIndex &index);
- /**
- * @brief Returns the time, when backend is available for at least given amount of time.
- *
- * @note Returns either hole/gap between two performing two already scheduled operations,
- * or the finishing time of the last scheduled operation
- *
- * @param[in] backend backend, for which to return the time
- * @param[in] starting_time time, starting which to look for gap
- * @param[in] time_amount amount of the time, for which to look gap
- *
- * @return time, when backend has at least time_amount free time
- */
- int64_t backendAvailableTime(const backend::Backend *backend, const int64_t &starting_time,
- const int64_t &time_amount);
-
- int64_t getOpTime(const backend::Backend *backend, const std::string &operation, bool quant,
- uint32_t size);
-
- int64_t getPermuteTime(const backend::Backend *src_backend, const backend::Backend *dst_backend,
- bool quant, uint32_t size);
-
- void scheduleShufflingBackends();
-
- int64_t tryBackend(const ir::Operation &node, const backend::Backend *backend);
-
- /**
- * @brief Schedule a node and its successor until:
- * 1. there is no branching or connection of multiple branches
- * 2. for subsequent nodes: other than predecessor's backend is prefered
- *
- * @param[in] index: index of an operation
- * @param[in] scheduled: a map to check if this node has already been scheduled
- *
- * @return N/A
- */
- void scheduleBranch(const ir::OperationIndex &index, ir::OperationIndexMap<bool> &scheduled);
-
-private:
- // This variable stores backend/node pairs with unknown execution time, and hints scheduler
- // whether it should assign these backends to these nodes:
- // * It stores false for unsupported nodes
- // * During rank calculation with enabled profiling mode it stores true for supported nodes
- std::unordered_map<const backend::Backend *, std::unordered_map<std::string, bool>> _is_supported;
- // Finishing and starting time of each backend
- std::unordered_map<const backend::Backend *, std::map<int64_t, int64_t>> _backends_avail_time;
- ir::OperationIndexMap<int64_t> _ops_eft;
- std::multimap<int64_t, ir::OperationIndex, std::greater<int64_t>> _rank_to_op;
- std::shared_ptr<ir::OperationIndexMap<int64_t>> _op_to_rank;
- std::unique_ptr<compiler::BackendResolver> _backend_resolver;
- std::unique_ptr<backend::ExecTime> _exec_time;
- const ir::Graph *_graph{nullptr};
- const std::vector<const backend::Backend *> _all_backends;
- const backend::Backend *_cpu_backend{nullptr};
-};
-
-} // namespace compiler
-
-} // namespace neurun
-
-#endif // __NEURUN_COMPILER_H_E_SCHEDULER_H_
diff --git a/runtime/neurun/core/src/compiler/IScheduler.h b/runtime/neurun/core/src/compiler/IScheduler.h
deleted file mode 100644
index dc1373ff9..000000000
--- a/runtime/neurun/core/src/compiler/IScheduler.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_CORE_COMPILER_I_SCHEDULER_H__
-#define __NEURUN_CORE_COMPILER_I_SCHEDULER_H__
-
-#include "BackendResolver.h"
-#include "ir/Graph.h"
-
-namespace neurun
-{
-namespace compiler
-{
-
-struct IScheduler
-{
- virtual ~IScheduler() = default;
-
- virtual std::unique_ptr<BackendResolver> schedule(const ir::Graph &graph) = 0;
-};
-
-} // namespace compiler
-} // namespace neurun
-
-#endif // __NEURUN_CORE_COMPILER_I_SCHEDULER_H__
diff --git a/runtime/neurun/core/src/compiler/Linear.cc b/runtime/neurun/core/src/compiler/Linear.cc
deleted file mode 100644
index b18dcea02..000000000
--- a/runtime/neurun/core/src/compiler/Linear.cc
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <algorithm>
-
-#include "Linear.h"
-
-#include "backend/IShapeFixer.h"
-#include "backend/IConfig.h"
-#include "backend/IConstantInitializer.h"
-#include "backend/ITensorRegister.h"
-#include "backend/Backend.h"
-#include "compiler/SubTensorInfo.h"
-
-#include "util/logging.h"
-
-namespace neurun
-{
-namespace compiler
-{
-
-Linear::Linear(ir::Graph &graph) : _graph(graph)
-{
- // Get SubgraphSequence by topological sorting
- {
- ir::Subgraphs &subgraphs = _graph.subgraphs();
- ir::Operands &operands = _graph.operands();
- // subgraphs can't access a op_seq by an operand so that input_to_subgs can offer it
- std::unordered_map<ir::OperandIndex, std::list<ir::SubgraphIndex>> input_to_subgs;
-
- // Get the relations between input/op_seq to be used for dfs-post-iter
- //
- // [0] # input -> _input_to_op_seqes[0] = {SUBG0}
- // |
- // [SUBG0]
- // |
- // [1]-----. # input -> _input_to_op_seqes[1] = {SUBG1, SUBG2}
- // | |
- // [SUBG1] [SUBG2]
- // | |
- // [2] [3] # input -> _input_to_op_seqes[2] = {SUBG3}
- // \ / # input -> _input_to_op_seqes[3] = {SUBG3}
- // [SUBG3]
- // |
- // [4]
- subgraphs.iterate([&](const ir::SubgraphIndex &subg_idx, ir::OpSequence &subg) {
- for (auto input : subg.getInputs())
- {
- // only valid_inputs
- const auto &operand = operands.at(input);
- if (operand.isConstant())
- continue;
-
- auto it = input_to_subgs.find(input);
- if (it == input_to_subgs.end())
- {
- std::list<ir::SubgraphIndex> list{subg_idx};
- input_to_subgs[input] = list;
- }
- else
- {
- it->second.push_back(subg_idx);
- }
- }
- });
-
- std::unordered_map<ir::SubgraphIndex, bool> visited;
- subgraphs.iterate(
- [&](const ir::SubgraphIndex &index, const ir::OpSequence &) { visited[index] = false; });
-
- std::function<void(const ir::SubgraphIndex &, ir::OpSequence &)> dfs_recursive =
- [&](const ir::SubgraphIndex &index, ir::OpSequence &subg) -> void {
- if (visited[index])
- return;
- visited[index] = true;
-
- // The outputs should be not constants
- for (auto output : subg.getOutputs())
- {
- const auto it = input_to_subgs.find(output);
- if (it != input_to_subgs.end())
- {
- const auto &subg_index_list = it->second;
- for (const auto &index : subg_index_list)
- {
- auto &subg = subgraphs.at(index);
- dfs_recursive(index, subg);
- }
- }
- }
-
- _elements.emplace_back(&subgraphs.at(index), _graph.getLowerInfo(index));
- };
-
- subgraphs.iterate(dfs_recursive);
-
- // All of the nodes must have been visited.
- assert(std::all_of(visited.begin(), visited.end(),
- [](const std::pair<const ir::SubgraphIndex, bool> &v) { return v.second; }));
-
- // NOTE. Now these op_seq are on the reverse order
- std::reverse(_elements.begin(), _elements.end());
- }
-
- {
- const auto &backendToString = [](const neurun::backend::Backend *backend) {
- assert(backend);
- std::string str;
- str += backend->config()->id();
- str += " ";
- return "{ " + str + "}";
- };
-
- VERBOSE(Linear) << "Final SubgraphSequence" << std::endl;
- for (const auto &element : _elements)
- {
- const auto subg = element.op_seq;
- const auto lower_info = element.lower_info;
- VERBOSE(Linear) << "* SUBG"
- << " " << backendToString(lower_info->backend()) << " " << subg->getStr()
- << std::endl;
- }
- }
-}
-
-void Linear::accept(ir::OperationVisitor &&visitor) const
-{
- for (const auto &e : _elements)
- {
- e.op_seq->accept(visitor);
- }
-}
-
-void Linear::planTensors()
-{
- ir::OperandIndexMap<std::shared_ptr<backend::ITensorBuilder>> tensor_builder_map;
-
- // NOTE
- // While current ITensorBuilder exposes registerSubTensorInfo for subtensor,
- // this stage uses registerSubTensorInfo() and notify{First|Last}Use()
- // but handling subtensor should be processed on each backend. See #5726.
- ir::OperandIndexMap<uint32_t> uses_map;
- ir::OperandIndexMap<uint32_t> def_map;
- ir::OperandIndexSequence constants;
-
- iterate([&](const neurun::compiler::Linear::Element &element) {
- const auto backend = element.lower_info->backend();
- const auto tensor_register =
- _graph.backend_resolver()->getBackendContext(backend)->tensor_register;
- tensor_register->registerTensors(*element.op_seq, _graph.getLowerInfo());
- });
-
- // Prepare scanning
- _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
- const auto lower_info = _graph.getLowerInfo(ind);
- // TODO Remove if neurun doesn't support anymore such as
- // GeneratedTests.reshape_quant8_weights_as_inputs
- if (lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0 &&
- !_graph.getInputs().contains(ind))
- {
- VERBOSE(LINEAR) << "Operand #" << ind.value() << " will be not used. no more process."
- << std::endl;
- return;
- }
-
- uses_map[ind] = obj.getUses().size();
- def_map[ind] = obj.getDef().size(); // should be 1 or 0
-
- bool is_const = obj.isConstant();
- if (is_const)
- {
- constants.append(ind);
- }
-
- for (auto factor : lower_info->def_factors())
- {
- auto backend = factor.backend();
- auto tensor_builder = _graph.backend_resolver()->getBackendContext(backend)->tensor_builder;
-
- if (!tensor_builder->isRegistered(ind))
- {
- // These tensors do not exist in any op_seq (No use and def)
- // These tensors cannot be a SubTensor
- assert(obj.parent_info() == nullptr);
-
- const auto info = obj.info();
- const auto backend_layout = lower_info->def_factors().getOnlyElement().layout();
- // TODO Change tensor info to have permuted shape
- tensor_builder->registerTensorInfo(ind, info, backend_layout, is_const);
- }
-
- tensor_builder_map[ind] = tensor_builder;
- }
- });
-
- // If a tensor is model output, increase the use of the tensor.
- // This aim is same to above one.
- for (const auto &ind : _graph.getOutputs())
- {
- uses_map[ind]++;
- }
-
- // Start scanning to do notify{First|Last}Use for each tensor
-
- // If a tensor is a constant, increase the use of the tensor.
- // It makes the tensor not be dealloced. It means these will be deallocated last.
- // And allocate constant operands first
- VERBOSE(LINEAR) << "TENSORS as CONSTANT" << std::endl;
- for (const auto &ind : constants)
- {
- uses_map[ind]++;
- tensor_builder_map[ind]->notifyFirstUse(ind);
- }
-
- // Allocate Model's inputs
- VERBOSE(LINEAR) << "TENSORS as MODEL INPUT" << std::endl;
- for (const auto &ind : _graph.getInputs())
- {
- auto tensor_builder = tensor_builder_map[ind];
- if (!tensor_builder) // for GeneratedTests.xxx_weights_as_inputs
- continue;
- tensor_builder->notifyFirstUse(ind);
- }
-
- // At each operation,
- // 1. Scan DEF of outputs. If the DEF, allocate it
- // 2. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
- VERBOSE(LINEAR) << "TENSORS" << std::endl;
- for (const auto &e : _elements)
- {
- for (const auto &op : e.op_seq->operations())
- {
- for (const auto &ind : op.node->getOutputs())
- {
- assert(def_map.find(ind) != def_map.end());
- if (def_map[ind])
- {
- def_map[ind] = 0;
- tensor_builder_map[ind]->notifyFirstUse(ind);
- }
- }
-
- for (const auto &ind : op.node->getInputs())
- {
- assert(uses_map.find(ind) != uses_map.end());
- assert(uses_map[ind] > 0);
- uses_map[ind]--;
- if (uses_map[ind] == 0)
- {
- tensor_builder_map[ind]->notifyLastUse(ind);
- }
- }
- }
- }
-
- // Dispose and validate
- for (const auto &ind : _graph.getOutputs())
- {
- --uses_map[ind];
- if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
- {
- tensor_builder_map[ind]->notifyLastUse(ind);
- }
- }
-
- for (const auto &ind : constants)
- {
- --uses_map[ind];
- if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
- {
- tensor_builder_map[ind]->notifyLastUse(ind);
- }
- }
-
- assert(
- std::all_of(uses_map.begin(), uses_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-
- assert(
- std::all_of(def_map.begin(), def_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-}
-
-void Linear::iterate(const std::function<void(const Element &element)> &fn) const
-{
- for (const auto &e : _elements)
- {
- fn(e);
- }
-}
-
-void Linear::generateConstantInitializers(void) const
-{
- iterate([&](const compiler::Linear::Element &element) {
- auto backend = element.lower_info->backend();
-
- auto constant_initializer =
- _graph.backend_resolver()->getBackendContext(backend)->constant_initializer;
- constant_initializer->generate(*element.op_seq, _graph.operands());
- });
-}
-
-} // namespace compiler
-} // namespace neurun
diff --git a/runtime/neurun/core/src/compiler/Linear.h b/runtime/neurun/core/src/compiler/Linear.h
deleted file mode 100644
index e10d03695..000000000
--- a/runtime/neurun/core/src/compiler/Linear.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_COMPILER_LINEAR_H__
-#define __NEURUN_COMPILER_LINEAR_H__
-
-#include <vector>
-#include <memory>
-
-#include "ir/Subgraphs.h"
-#include "backend/ITensorBuilder.h"
-#include "ir/Graph.h"
-#include "compiler/BackendResolver.h"
-
-namespace neurun
-{
-namespace ir
-{
-struct OperationVisitor;
-} // namespace ir
-} // namespace neurun
-
-namespace neurun
-{
-namespace compiler
-{
-
-class Linear
-{
-public:
- struct Element
- {
- const ir::OpSequence *op_seq;
- const ir::operation::LowerInfo *lower_info;
-
- Element() : op_seq{nullptr}, lower_info{nullptr} {}
-
- Element(const ir::OpSequence *op_seq, const ir::operation::LowerInfo *lower_info)
- : op_seq{op_seq}, lower_info{lower_info}
- {
- // DO NOTHING
- }
- };
-
-public:
- Linear(ir::Graph &graph);
-
-public:
- Linear(const Linear &linear) = delete;
-
-public:
- void accept(ir::OperationVisitor &&visitor) const;
-
- void planTensors();
-
- void iterate(const std::function<void(const Element &element)> &fn) const;
-
- void generateConstantInitializers(void) const;
-
-private:
- ir::Graph &_graph;
- std::vector<Element> _elements;
-};
-
-} // namespace compiler
-} // namespace neurun
-
-#endif // __NEURUN_COMPILER_LINEAR_H__
diff --git a/runtime/neurun/core/src/compiler/ManualScheduler.cc b/runtime/neurun/core/src/compiler/ManualScheduler.cc
deleted file mode 100644
index 1fb9d3759..000000000
--- a/runtime/neurun/core/src/compiler/ManualScheduler.cc
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ManualScheduler.h"
-#include "ir/OpCode.h"
-#include "ir/Operations.Include.h"
-#include "backend/Backend.h"
-#include "backend/BackendManager.h"
-#include "backend/IConfig.h"
-#include "util/ConfigSource.h"
-#include "misc/string_helpers.h"
-
-namespace neurun
-{
-namespace compiler
-{
-
-std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &graph)
-{
- auto backend_resolver = nnfw::cpp14::make_unique<compiler::BackendResolver>(
- graph.operands(), backend::BackendManager::get().getAll(), graph.getKernelBuilder());
-
- // 1. Backend for All operations
- auto backend_all_str = util::getConfigString(util::config::OP_BACKEND_ALLOPS);
- backend::Backend *backend_all = nullptr;
-
- if (backend_all_str.empty())
- {
- const auto backends_str = util::getConfigString(util::config::BACKENDS);
- size_t prev_pos = 0;
- auto pos = backends_str.find(";");
- while (pos != std::string::npos)
- {
- backend_all_str = backends_str.substr(prev_pos, pos - prev_pos);
- backend_all = backend::BackendManager::get().get(backend_all_str);
- if (backend_all != nullptr)
- break;
-
- prev_pos = pos + 1;
- pos = backends_str.find(";", prev_pos);
- }
- // if backends doesn't terminate with ";"
- if (backend_all == nullptr && prev_pos < backends_str.size())
- {
- backend_all_str = backends_str.substr(prev_pos);
- backend_all = backend::BackendManager::get().get(backend_all_str);
- }
- }
- else
- {
- backend_all = backend::BackendManager::get().get(backend_all_str);
- }
-
- VERBOSE(ManualScheduler) << "Default backend for all ops: " << backend_all_str << std::endl;
-
- graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) {
- backend_resolver->setBackend(index, backend_all);
- });
-
- // 2. Backend per operation type
- std::unordered_map<ir::OpCode, backend::Backend *> op_type_map;
- // By default, Custom uses cpu backend
- op_type_map[ir::OpCode::Custom] = backend::BackendManager::get().get("cpu");
-
-#define OP(InternalName) \
- { \
- const auto &backend_str = util::getConfigString(util::config::OP_BACKEND_##InternalName); \
- if (!backend_str.empty()) \
- { \
- auto backend = backend::BackendManager::get().get(backend_str); \
- VERBOSE(Lower) << "backend for " << #InternalName << ": " << backend_str << std::endl; \
- op_type_map[ir::OpCode::InternalName] = backend; \
- } \
- }
-#include "ir/Operations.lst"
-#undef OP
-
- graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &operation) {
- auto itr = op_type_map.find(operation.opcode());
- if (itr != op_type_map.end())
- {
- backend_resolver->setBackend(index, itr->second);
- }
- });
-
- // 3. Backend per operation
- try
- {
- auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP);
- auto key_val_list = nnfw::misc::split(map_str, ';');
- for (const auto &key_val_str : key_val_list)
- {
- if (key_val_str.empty())
- {
- continue;
- }
-
- auto key_val = nnfw::misc::split(key_val_str, '=');
- const auto &key_str = key_val.at(0);
- const auto &val = key_val.at(1);
- auto key = static_cast<uint32_t>(std::stoi(key_str));
-
- graph.operations().at(ir::OperationIndex{key}); // Check if exist, or this wil throw
- backend_resolver->setBackend(ir::OperationIndex{key},
- backend::BackendManager::get().get(val));
- }
- }
- catch (...)
- {
- VERBOSE(ManualScheduler) << "Invalid value from " << util::config::OP_BACKEND_MAP
- << ". Some of the given values are ignored" << std::endl;
- }
-
- // 4. Operations that are specially handled
- // All configuration above will be ignored(overwritten)
- op_type_map[ir::OpCode::Permute] = backend::BackendManager::get().get("cpu");
-
- // Dump final assignment
- backend_resolver->iterate(
- [&](const ir::OperationIndex &index, const backend::BackendContext &backend_ctx) {
- VERBOSE(ManualScheduler) << "backend for operation #" << index.value() << ": "
- << backend_ctx.backend->config()->id() << std::endl;
- });
-
- return backend_resolver;
-}
-
-} // namespace compiler
-} // namespace neurun
diff --git a/runtime/neurun/core/src/compiler/ManualScheduler.h b/runtime/neurun/core/src/compiler/ManualScheduler.h
deleted file mode 100644
index a442cec08..000000000
--- a/runtime/neurun/core/src/compiler/ManualScheduler.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_CORE_COMPILER_MANUAL_SCHEDULER_H__
-#define __NEURUN_CORE_COMPILER_MANUAL_SCHEDULER_H__
-
-#include "IScheduler.h"
-
-namespace neurun
-{
-namespace compiler
-{
-
-class ManualScheduler : public IScheduler
-{
-public:
- std::unique_ptr<BackendResolver> schedule(const ir::Graph &graph) override;
-};
-
-} // namespace compiler
-} // namespace neurun
-
-#endif // __NEURUN_CORE_COMPILER_MANUAL_SCHEDULER_H__
diff --git a/runtime/neurun/core/src/compiler/OperandContext.cc b/runtime/neurun/core/src/compiler/OperandContext.cc
deleted file mode 100644
index c06f6157b..000000000
--- a/runtime/neurun/core/src/compiler/OperandContext.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperandContext.h"
-
-#include <cassert>
-
-namespace neurun
-{
-namespace compiler
-{
-
-OperandContext &OperandContext::set(const ir::OperandIndex &id,
- const std::shared_ptr<backend::operand::ITensor> &tensor)
-{
- // Only one tensor for an id
- assert(_tensors.find(id) == _tensors.end());
- _tensors[id] = tensor;
- return (*this);
-}
-
-void OperandContext::iterate(
- const std::function<void(const ir::OperandIndex &, backend::operand::ITensor &)> &fn)
-{
- for (auto &e : _tensors)
- {
- fn(e.first, *e.second);
- }
-}
-
-} // namespace compiler
-} // namespace neurun
diff --git a/runtime/neurun/core/src/compiler/OperandContext.h b/runtime/neurun/core/src/compiler/OperandContext.h
deleted file mode 100644
index da1a51bb9..000000000
--- a/runtime/neurun/core/src/compiler/OperandContext.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_COMPILER_OPERAND_CONTEXT_H__
-#define __NEURUN_COMPILER_OPERAND_CONTEXT_H__
-
-#include "backend/operand/ITensor.h"
-#include "ir/OperandIndexMap.h"
-#include <unordered_map>
-#include <memory>
-
-namespace neurun
-{
-namespace compiler
-{
-
-class OperandContext
-{
-public:
- OperandContext &set(const ir::OperandIndex &ind,
- const std::shared_ptr<backend::operand::ITensor> &tensor);
-
-public:
- bool exist(const ir::OperandIndex &ind) const { return _tensors.find(ind) != _tensors.end(); }
-
-public:
- std::shared_ptr<backend::operand::ITensor> at(const ir::OperandIndex &ind) const
- {
- return _tensors.at(ind);
- }
-
- std::shared_ptr<backend::operand::ITensor> &at(const ir::OperandIndex &ind)
- {
- return _tensors.at(ind);
- }
-
- void
- iterate(const std::function<void(const ir::OperandIndex &, backend::operand::ITensor &)> &fn);
-
-private:
- ir::OperandIndexMap<std::shared_ptr<backend::operand::ITensor>> _tensors;
-};
-
-} // namespace compiler
-} // namespace neurun
-
-#endif // __NEURUN_COMPILER_OPERAND_CONTEXT_H__
diff --git a/runtime/neurun/core/src/compiler/OperationValidator.cc b/runtime/neurun/core/src/compiler/OperationValidator.cc
deleted file mode 100644
index 74f76bdbe..000000000
--- a/runtime/neurun/core/src/compiler/OperationValidator.cc
+++ /dev/null
@@ -1,985 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationValidator.h"
-
-#include <typeinfo>
-
-#include "ir/Graph.h"
-#include "ir/operation/LowerInfo.h"
-
-#include "util/logging.h"
-#include "util/Utils.h"
-
-namespace neurun
-{
-namespace compiler
-{
-
-OperationValidator::OperationValidator(const ir::Graph &graph)
- : _graph{graph}, _ctx{graph.operands()}, _current_subg_layout{ir::Layout::UNKNOWN}
-{
-}
-
-void OperationValidator::operator()()
-{
- // TODO Get frontend layout from graph
- _current_subg_layout = ir::Layout::NHWC;
-
- _graph.operations().iterate(
- [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
-}
-
-void OperationValidator::visit(const ir::operation::BatchToSpaceND &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
- const auto block_size_index{
- node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
-
- const auto frontend_layout = _current_subg_layout;
- const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
- const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
-
- UNUSED_RELEASE(input_shape);
- UNUSED_RELEASE(output_shape);
-
- // All assertions as per NNAPI specification.
- assert(_ctx.at(ifm_index).shape().rank() == 4);
- assert(_ctx.at(ofm_index).shape().rank() == 4);
- assert(_ctx.at(block_size_index).shape().rank() == 1);
-
- assert(_ctx.at(block_size_index).shape().dim(0) == 2);
-
- assert(_ctx.at(block_size_index).isConstant());
-
- assert(input_shape.C == output_shape.C);
-}
-
-void OperationValidator::visit(const ir::operation::Cast &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(0)};
-
- UNUSED_RELEASE(output_index);
- UNUSED_RELEASE(input_index);
-
- assert(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
-
-void OperationValidator::visit(const ir::operation::Comparison &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
- const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
-
- UNUSED_RELEASE(output_index);
- UNUSED_RELEASE(lhs_index);
- UNUSED_RELEASE(rhs_index);
-
- assert(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
- assert(_ctx.at(output_index).typeInfo().type() == ir::DataType::BOOL8);
-}
-
-void OperationValidator::visit(const ir::operation::Softmax &node)
-{
- VERBOSE(Softmax) << "Configure SOFTMAX operation" << std::endl;
-
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(0)};
-
- UNUSED_RELEASE(output_index);
- UNUSED_RELEASE(input_index);
-
- assert(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
-}
-
-void OperationValidator::visit(const ir::operation::InstanceNorm &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
- const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
- const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
-
- UNUSED_RELEASE(ofm_index);
- UNUSED_RELEASE(ifm_index);
- UNUSED_RELEASE(gamma_index);
- UNUSED_RELEASE(beta_index);
-
- assert(_ctx.at(ifm_index).shape().rank() == 4);
- assert(_ctx.at(ifm_index).shape() == _ctx.at(ofm_index).shape());
- assert(_ctx.at(gamma_index).shape().rank() == 1);
- assert(_ctx.at(beta_index).shape().rank() == 1);
-}
-
-void OperationValidator::visit(const ir::operation::Permute &node)
-{
- VERBOSE(Permute) << "Configure Permute operation" << std::endl;
-
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(0)};
-
- UNUSED_RELEASE(output_index);
- UNUSED_RELEASE(input_index);
-
- assert(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
-}
-
-void OperationValidator::visit(const ir::operation::ReduceSum &node)
-{
- VERBOSE(Permute) << "Configure ReduceSum operation" << std::endl;
-
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ReduceSum::Input::INPUT)};
- const auto &axes = node.param().axes;
-
- UNUSED_RELEASE(output_index);
- UNUSED_RELEASE(input_index);
- UNUSED_RELEASE(axes);
-
- const auto input_shape = _ctx.at(input_index).shape();
- const auto output_shape = _ctx.at(output_index).shape();
-
- UNUSED_RELEASE(output_shape);
- UNUSED_RELEASE(input_shape);
-
- assert(input_shape.rank() <= 4);
- assert(output_shape.rank() <= input_shape.rank());
-
- // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
- // supports cases reducing height and width or reducing depth.
- // TODO We have to support all cases of dimensions up to 4.
- // For correct permuting, we have to set output's shape to be equal in dimension position of the
- // input. But the positions of the same dimensions in the input and output may be set differently.
- // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
- // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
- // extend it in 4 dimensions, it should be {1,1,3,5}.
- // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
- // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
- // next operation is not desired.
- if (input_shape.rank() == 4 && input_shape.rank() != output_shape.rank())
- {
- if (output_shape.rank() == 2)
- {
- // Reducing HW
- assert(input_shape.dim(0) == output_shape.dim(0) &&
- input_shape.dim(3) == output_shape.dim(1));
- }
- else if (output_shape.rank() == 3)
- {
- // Reducing C or
- // (Reducing H and C(input and output) == 1) or (Reducing W and C(input and output) == 1)
- assert((input_shape.dim(0) == output_shape.dim(0) &&
- input_shape.dim(1) == output_shape.dim(1) &&
- input_shape.dim(2) == output_shape.dim(2)) ||
- (input_shape.dim(0) == output_shape.dim(0) &&
- (input_shape.dim(1) == output_shape.dim(1) ||
- input_shape.dim(2) == output_shape.dim(1)) &&
- input_shape.dim(3) == 1 && output_shape.dim(2) == 1));
- }
- }
-}
-
-void OperationValidator::visit(const ir::operation::Transpose &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
- const auto &perm{node.param().perm};
-
- const auto &output_shape = _ctx.at(output_index).shape();
- const auto &input_shape = _ctx.at(input_index).shape();
-
- UNUSED_RELEASE(output_shape);
- UNUSED_RELEASE(input_shape);
- UNUSED_RELEASE(perm);
-
- assert(input_shape.rank() == static_cast<int>(perm.size()));
- assert(input_shape.rank() == output_shape.rank());
-}
-
-void OperationValidator::visit(const ir::operation::ReduceMax &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ReduceMax::Input::INPUT)};
- const auto &axes = node.param().axes;
-
- auto output_shape = _ctx.at(output_index).shape();
- auto input_shape = _ctx.at(input_index).shape();
-
- UNUSED_RELEASE(output_shape);
- UNUSED_RELEASE(input_shape);
- UNUSED_RELEASE(axes);
-
- assert(input_shape.rank() <= 4);
- assert(output_shape.rank() <= input_shape.rank());
-
- // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
- // supports cases reducing height and width or reducing depth.
- // TODO We have to support all cases of dimensions up to 4.
- // For correct permuting, we have to set output's shape to be equal in dimension position of the
- // input. But the positions of the same dimensions in the input and output may be set differently.
- // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
- // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
- // extend it in 4 dimensions, it should be {1,1,3,5}.
- // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
- // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
- // next operation is not desired.
- if (input_shape.rank() == 4 && input_shape.rank() != output_shape.rank())
- {
- if (output_shape.rank() == 2)
- {
- // Reducing HW
- assert(input_shape.dim(0) == output_shape.dim(0) &&
- input_shape.dim(3) == output_shape.dim(1));
- }
- else if (output_shape.rank() == 3)
- {
- // Reducing C or
- // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1)
- assert((input_shape.dim(0) == output_shape.dim(0) &&
- input_shape.dim(1) == output_shape.dim(1) &&
- input_shape.dim(2) == output_shape.dim(2)) ||
- (input_shape.dim(0) == output_shape.dim(0) &&
- (input_shape.dim(1) == output_shape.dim(1) ||
- input_shape.dim(2) == output_shape.dim(1)) &&
- input_shape.dim(3) == 1 && output_shape.dim(2) == 1));
- }
- }
-}
-
-void OperationValidator::visit(const ir::operation::RNN &node)
-{
- // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
- // TODO Support dynamic rnn
- const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
- const auto hidden_state_out_index{
- node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
-
- const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
- const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
- const auto recurrent_weights_index{
- node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
- const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
- const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
-
- const auto batch_size = _ctx.at(output_index).shape().dim(0);
- const auto num_units = _ctx.at(output_index).shape().dim(1);
-
- UNUSED_RELEASE(output_index);
- UNUSED_RELEASE(hidden_state_out_index);
- UNUSED_RELEASE(input_index);
- UNUSED_RELEASE(weights_index);
- UNUSED_RELEASE(recurrent_weights_index);
- UNUSED_RELEASE(bias_index);
- UNUSED_RELEASE(hidden_state_in_index);
- UNUSED_RELEASE(batch_size);
- UNUSED_RELEASE(num_units);
-
- assert(_ctx.at(output_index).shape().rank() == 2 &&
- _ctx.at(hidden_state_out_index).shape().rank() == 2 &&
- _ctx.at(input_index).shape().rank() == 2 && _ctx.at(weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_weights_index).shape().rank() == 2 &&
- _ctx.at(hidden_state_in_index).shape().rank() == 2);
- assert(_ctx.at(bias_index).shape().rank() == 1);
-
- assert(batch_size == _ctx.at(input_index).shape().dim(0) &&
- batch_size == _ctx.at(hidden_state_in_index).shape().dim(0) &&
- batch_size == _ctx.at(hidden_state_out_index).shape().dim(0));
- assert(_ctx.at(input_index).shape().dim(1) == _ctx.at(weights_index).shape().dim(1));
-
- assert(num_units == _ctx.at(weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_weights_index).shape().dim(0) &&
- num_units == _ctx.at(bias_index).shape().dim(0));
- assert(num_units == _ctx.at(output_index).shape().dim(1) &&
- num_units == _ctx.at(recurrent_weights_index).shape().dim(1) &&
- num_units == _ctx.at(hidden_state_in_index).shape().dim(1) &&
- num_units == _ctx.at(hidden_state_out_index).shape().dim(1));
-}
-
-void OperationValidator::visit(const ir::operation::SpaceToBatchND &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
- const auto block_size_index{
- node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
- const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
-
- const auto frontend_layout = _current_subg_layout;
- const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
- const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
-
- UNUSED_RELEASE(input_shape);
- UNUSED_RELEASE(output_shape);
-
- // All assertions as per NNAPI specification.
- assert(_ctx.at(ifm_index).shape().rank() == 4);
- assert(_ctx.at(ofm_index).shape().rank() == 4);
- assert(_ctx.at(block_size_index).shape().rank() == 1);
- assert(_ctx.at(paddings_index).shape().rank() == 2);
-
- assert(_ctx.at(block_size_index).shape().dim(0) == 2);
- assert(_ctx.at(paddings_index).shape().dim(0) == 2);
- assert(_ctx.at(paddings_index).shape().dim(1) == 2);
-
- assert(_ctx.at(block_size_index).isConstant());
- assert(_ctx.at(paddings_index).isConstant());
-
- assert(input_shape.C == output_shape.C);
-}
-
-void OperationValidator::visit(const ir::operation::SpaceToDepth &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
-
- const auto frontend_layout = _current_subg_layout;
- const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
- const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
- const auto block_size = node.param().block_size;
-
- UNUSED_RELEASE(input_shape);
- UNUSED_RELEASE(output_shape);
- UNUSED_RELEASE(block_size);
-
- // All assertions as per NNAPI specification.
- assert(_ctx.at(ifm_index).shape().rank() == 4);
- assert(_ctx.at(ofm_index).shape().rank() == 4);
- assert((block_size >= 1) && (input_shape.H % block_size == 0) &&
- (input_shape.W % block_size == 0));
- assert(input_shape.N == output_shape.N);
- assert(input_shape.C * block_size * block_size == output_shape.C);
-}
-
-void OperationValidator::visit(const ir::operation::EmbeddingLookup &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
- const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
-
- const auto &output_obj = _ctx.at(output_index);
- const auto &lookups_obj = _ctx.at(lookups_index);
- const auto &values_obj = _ctx.at(values_index);
-
- UNUSED_RELEASE(output_obj);
- UNUSED_RELEASE(lookups_obj);
- UNUSED_RELEASE(values_obj);
-
- // Verify operand here, not at SimpleEmbeddingLookup::configure() to avoid acl's modifying
- // TensorShape sometimes(Issue: https://github.sec.samsung.net/STAR/nnfw/issues/729)
- {
- assert(lookups_obj.typeInfo().type() == ir::DataType::INT32);
-
- const auto &output_shape = output_obj.shape();
- const auto &lookups_shape = lookups_obj.shape();
- const auto &values_shape = values_obj.shape();
-
- UNUSED_RELEASE(output_shape);
- UNUSED_RELEASE(lookups_shape);
- UNUSED_RELEASE(values_shape);
-
- assert(lookups_shape.rank() == 1);
- assert(values_shape.rank() >= 2);
-
- // output should be a n-D tensor with the same rank and shape as the values tensor, except for
- // the first dimension which has the same size as lookups' only dimension.
- assert(output_shape.rank() == values_shape.rank());
- assert(output_shape.dim(0) == lookups_shape.dim(0));
- for (int n = 1; n < output_shape.rank(); ++n)
- {
- assert(output_shape.dim(n) == values_shape.dim(n));
- }
- }
-}
-
-void OperationValidator::visit(const ir::operation::Exp &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
-
- UNUSED_RELEASE(output_index);
- UNUSED_RELEASE(input_index);
-
- assert(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
- assert(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
-}
-
-void OperationValidator::visit(const ir::operation::Floor &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
-
- UNUSED_RELEASE(output_index);
- UNUSED_RELEASE(input_index);
-
- assert(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
- assert(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
-}
-
-void OperationValidator::visit(const ir::operation::HashtableLookup &node)
-{
- const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
- const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)};
-
- const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
- const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
- const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
-
- const auto &output_obj = _ctx.at(output_index);
- const auto &hits_obj = _ctx.at(hits_index);
-
- const auto &lookups_obj = _ctx.at(lookups_index);
- const auto &keys_obj = _ctx.at(keys_index);
- const auto &values_obj = _ctx.at(values_index);
-
- assert(lookups_obj.typeInfo().type() == ir::DataType::INT32);
- assert(keys_obj.typeInfo().type() == ir::DataType::INT32);
- assert(hits_obj.typeInfo().type() == ir::DataType::QUANT8_ASYMM);
-
- const auto &output_shape = output_obj.shape();
- const auto &hits_shape = hits_obj.shape();
-
- const auto &lookups_shape = lookups_obj.shape();
- const auto &keys_shape = keys_obj.shape();
- const auto &values_shape = values_obj.shape();
-
- UNUSED_RELEASE(output_shape);
- UNUSED_RELEASE(hits_shape);
- UNUSED_RELEASE(lookups_shape);
- UNUSED_RELEASE(keys_shape);
- UNUSED_RELEASE(values_shape);
-
- assert(values_shape.rank() == output_shape.rank());
- assert(lookups_shape.rank() == 1);
- assert(keys_shape.rank() == 1);
- assert(values_shape.dim(0) == keys_shape.dim(0));
- assert(lookups_shape.dim(0) == output_shape.dim(0));
-}
-
-void OperationValidator::visit(const ir::operation::TransposeConv &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
- const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
-
- // Only 4D tensors are supported
- assert(_ctx.at(ofm_index).shape().rank() == 4);
- assert(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank());
- assert(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank());
-
- const auto frontend_layout = _current_subg_layout;
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
- // The kernel has only IHWO layout on frontend
- // So ker_shape is treated here below
- // I -> N
- // H -> H
- // W -> W
- // O -> C
- const auto ker_shape = _ctx.at(ker_index).shape().asFeature(ir::Layout::NHWC);
-
- UNUSED_RELEASE(ofm_shape);
- UNUSED_RELEASE(ifm_shape);
- UNUSED_RELEASE(ker_shape);
-
- assert((node.param().padding.type == ir::PaddingType::SAME) ||
- (node.param().padding.type == ir::PaddingType::VALID));
- assert(ifm_shape.N == ofm_shape.N);
- assert(ifm_shape.C == ker_shape.C);
- assert(ker_shape.N == ofm_shape.C);
-}
-
-void OperationValidator::visit(const ir::operation::Gather &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
-
- const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
- const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
-
- const auto axis = node.param().axis;
-
- const auto ifm_shape = _ctx.at(ifm_index).shape();
- const auto indices_shape = _ctx.at(indices_index).shape();
- const auto ofm_shape = _ctx.at(ofm_index).shape();
-
- UNUSED_RELEASE(ifm_shape);
- UNUSED_RELEASE(indices_shape);
- UNUSED_RELEASE(ofm_shape);
- UNUSED_RELEASE(axis);
-
- assert(ifm_shape.rank() <= 4);
- assert(indices_shape.rank() <= 3);
- assert(ofm_shape.rank() <= 4);
-}
-
-void OperationValidator::visit(const ir::operation::Dequantize &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
- UNUSED_RELEASE(output_index);
- UNUSED_RELEASE(input_index);
-
- assert(_ctx.at(input_index).shape().rank() <= 4);
- assert(_ctx.at(input_index).shape() == _ctx.at(output_index).shape());
- assert(_ctx.at(input_index).typeInfo().type() == ir::DataType::QUANT8_ASYMM);
- assert(_ctx.at(output_index).typeInfo().type() == ir::DataType::FLOAT32);
-}
-
-void OperationValidator::visit(const ir::operation::Mean &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Mean::Input::INPUT)};
-
- const auto ifm_shape = _ctx.at(ifm_index).shape();
- const auto ofm_shape = _ctx.at(ofm_index).shape();
-
- // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
- // supports cases reducing height and width or reducing depth.
- // TODO We have to support all cases of dimensions up to 4.
- // For correct permuting, we have to set output's shape to be equal in dimension position of the
- // input. But the positions of the same dimensions in the input and output may be set differently.
- // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
- // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
- // extend it in 4 dimensions, it should be {1,1,3,5}.
- // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
- // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
- // next operation is not desired.
- if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank())
- {
- if (ofm_shape.rank() == 2)
- {
- // Reducing HW
- assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1));
- }
- else if (ofm_shape.rank() == 3)
- {
- // Reducing C or
- // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1)
- assert((ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) &&
- ifm_shape.dim(2) == ofm_shape.dim(2)) ||
- (ifm_shape.dim(0) == ofm_shape.dim(0) &&
- (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) &&
- ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1));
- }
- }
-}
-
-void OperationValidator::visit(const ir::operation::DepthToSpace &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
-
- const auto frontend_layout = _current_subg_layout;
- const auto output_shape = _ctx.at(output_index).shape().asFeature(frontend_layout);
- const auto input_shape = _ctx.at(input_index).shape().asFeature(frontend_layout);
-
- UNUSED_RELEASE(output_shape);
- UNUSED_RELEASE(input_shape);
-
- assert(_ctx.at(input_index).shape().rank() == 4);
- assert(_ctx.at(output_index).shape().rank() == 4);
-
- int32_t block_size = node.param().block_size;
-
- UNUSED_RELEASE(block_size);
-
- assert(block_size > 0);
-
- { // assertions block
- assert(output_shape.N == input_shape.N);
- assert(output_shape.H == input_shape.H * block_size);
- assert(output_shape.W == input_shape.W * block_size);
- assert(input_shape.C % (block_size * block_size) == 0);
- assert(output_shape.C == input_shape.C / (block_size * block_size));
- }
-}
-
-void OperationValidator::visit(const ir::operation::Pack &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto num{node.param().num};
- const auto axis{node.param().axis};
-
- const auto &output_shape = _ctx.at(output_index).shape();
- const auto output_rank = static_cast<int32_t>(output_shape.rank());
-
- const auto input1_index{node.getInputs().at(0)};
- const auto input_shape = _ctx.at(input1_index).shape();
-
- UNUSED_RELEASE(num);
- UNUSED_RELEASE(axis);
- UNUSED_RELEASE(output_rank);
-
- assert(num == static_cast<int32_t>(node.getInputs().size()));
- assert(axis >= -output_rank && axis < output_rank);
- for (const auto &index : node.getInputs())
- {
- UNUSED_RELEASE(index);
- assert(input_shape == _ctx.at(index).shape());
- }
-}
-
-void OperationValidator::visit(const ir::operation::ReduceMin &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReduceMin::Input::INPUT)};
- const auto &axes = node.param().axes;
-
- auto ifm_shape = _ctx.at(ifm_index).shape();
- auto ofm_shape = _ctx.at(ofm_index).shape();
-
- UNUSED_RELEASE(ifm_shape);
- UNUSED_RELEASE(ofm_shape);
- UNUSED_RELEASE(axes);
-
- assert(ifm_shape.rank() <= 4);
- assert(ofm_shape.rank() <= ifm_shape.rank());
-
- // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
- // supports cases reducing height and width or reducing depth.
- // TODO We have to support all cases of dimensions up to 4.
- // For correct permuting, we have to set output's shape to be equal in dimension position of the
- // input. But the positions of the same dimensions in the input and output may be set differently.
- // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
- // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
- // extend it in 4 dimensions, it should be {1,1,3,5}.
- // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
- // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
- // next operation is not desired.
- if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank())
- {
- if (ofm_shape.rank() == 2)
- {
- // Reducing HW
- assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1));
- }
- else if (ofm_shape.rank() == 3)
- {
- // Reducing C or
- // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1)
- assert((ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) &&
- ifm_shape.dim(2) == ofm_shape.dim(2)) ||
- (ifm_shape.dim(0) == ofm_shape.dim(0) &&
- (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) &&
- ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1));
- }
- }
-}
-
-void OperationValidator::visit(const ir::operation::LSTM &node)
-{
- // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
- // TODO Support dynamic rnn
- const auto scratch_buffer_index{
- node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
- const auto output_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
- const auto cell_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
- const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
-
- const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
- const auto input_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
- const auto input_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
- const auto input_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
- const auto input_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
- const auto recurrent_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
- const auto recurrent_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
- const auto recurrent_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
- const auto recurrent_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
- const auto cell_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)};
- const auto cell_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)};
- const auto cell_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)};
- const auto input_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
- const auto forget_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
- const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
- const auto output_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
- const auto projection_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)};
- const auto projection_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)};
- const auto output_state_in_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
- const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
-
- UNUSED_RELEASE(scratch_buffer_index);
- UNUSED_RELEASE(output_state_out_index);
- UNUSED_RELEASE(cell_state_out_index);
- UNUSED_RELEASE(output_index);
-
- UNUSED_RELEASE(input_index);
- UNUSED_RELEASE(input_to_input_weights_index);
- UNUSED_RELEASE(input_to_forget_weights_index);
- UNUSED_RELEASE(input_to_cell_weights_index);
- UNUSED_RELEASE(input_to_output_weights_index);
- UNUSED_RELEASE(recurrent_to_input_weights_index);
- UNUSED_RELEASE(recurrent_to_forget_weights_index);
- UNUSED_RELEASE(recurrent_to_cell_weights_index);
- UNUSED_RELEASE(recurrent_to_output_weights_index);
- UNUSED_RELEASE(cell_to_input_weights_index);
- UNUSED_RELEASE(cell_to_forget_weights_index);
- UNUSED_RELEASE(cell_to_output_weights_index);
- UNUSED_RELEASE(input_gate_bias_index);
- UNUSED_RELEASE(forget_gate_bias_index);
- UNUSED_RELEASE(cell_bias_index);
- UNUSED_RELEASE(output_gate_bias_index);
- UNUSED_RELEASE(projection_weights_index);
- UNUSED_RELEASE(projection_bias_index);
- UNUSED_RELEASE(output_state_in_index);
- UNUSED_RELEASE(cell_state_in_index);
-
- assert(_ctx.at(scratch_buffer_index).shape().rank() == 2 &&
- _ctx.at(output_state_out_index).shape().rank() == 2 &&
- _ctx.at(cell_state_out_index).shape().rank() == 2 &&
- _ctx.at(output_index).shape().rank() == 2 && _ctx.at(input_index).shape().rank() == 2 &&
- _ctx.at(input_to_input_weights_index).shape().rank() == 2 &&
- _ctx.at(input_to_forget_weights_index).shape().rank() == 2 &&
- _ctx.at(input_to_cell_weights_index).shape().rank() == 2 &&
- _ctx.at(input_to_output_weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_to_input_weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_to_forget_weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_to_cell_weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_to_output_weights_index).shape().rank() == 2 &&
- _ctx.at(projection_weights_index).shape().rank() == 2 &&
- _ctx.at(output_state_in_index).shape().rank() == 2 &&
- _ctx.at(cell_state_in_index).shape().rank() == 2);
-
- assert(_ctx.at(cell_to_input_weights_index).shape().rank() == 1 &&
- _ctx.at(cell_to_forget_weights_index).shape().rank() == 1 &&
- _ctx.at(cell_to_output_weights_index).shape().rank() == 1 &&
- _ctx.at(input_gate_bias_index).shape().rank() == 1 &&
- _ctx.at(forget_gate_bias_index).shape().rank() == 1 &&
- _ctx.at(cell_bias_index).shape().rank() == 1 &&
- _ctx.at(output_gate_bias_index).shape().rank() == 1 &&
- _ctx.at(projection_bias_index).shape().rank() == 1);
-
- // CIFG assertion
- assert((_ctx.at(input_to_input_weights_index).shape().dim(0) == 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) == 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(0) == 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) == 0 &&
- _ctx.at(input_gate_bias_index).shape().dim(0) == 0 &&
- _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0) ||
- (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0 &&
- _ctx.at(input_gate_bias_index).shape().dim(0) != 0));
-
- // Peephole assertion
- assert((_ctx.at(cell_to_forget_weights_index).shape().dim(0) == 0 &&
- _ctx.at(cell_to_output_weights_index).shape().dim(0) == 0) ||
- (_ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0 &&
- _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0));
-
- bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) != 0;
- bool has_recurrent_to_input_weights =
- _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
- bool has_input_gate_bias = _ctx.at(input_gate_bias_index).shape().dim(0) != 0;
- bool has_cell_to_input_weights = _ctx.at(cell_to_input_weights_index).shape().dim(0) != 0;
- bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
- bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
- bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 &&
- _ctx.at(projection_weights_index).shape().dim(1) != 0;
- bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0);
-
- // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
- // true: no CIFG
- // false: CIFG
- bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
-
- // NOTE The cell_to_input_weights do not exist in regular CIFG although peephole.
- // true: peephole
- // false: no peephole
- bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
-
- // NOTE The projection weights may have data but the projection bias may not.
- bool has_projection_param = has_projection_weights;
-
- UNUSED_RELEASE(has_input_to_input_weights);
- UNUSED_RELEASE(has_recurrent_to_input_weights);
- UNUSED_RELEASE(has_input_gate_bias);
- UNUSED_RELEASE(has_cell_to_input_weights);
- UNUSED_RELEASE(has_cell_to_forget_weights);
- UNUSED_RELEASE(has_cell_to_output_weights);
- UNUSED_RELEASE(has_projection_weights);
- UNUSED_RELEASE(has_projection_bias);
- UNUSED_RELEASE(has_cifg_param);
- UNUSED_RELEASE(has_peephole_param);
- UNUSED_RELEASE(has_projection_param);
-
- const auto batch_size = _ctx.at(input_index).shape().dim(0);
- UNUSED_RELEASE(batch_size);
- assert(batch_size == _ctx.at(output_state_in_index).shape().dim(0) &&
- batch_size == _ctx.at(cell_state_in_index).shape().dim(0) &&
- batch_size == _ctx.at(scratch_buffer_index).shape().dim(0) &&
- batch_size == _ctx.at(output_state_out_index).shape().dim(0) &&
- batch_size == _ctx.at(cell_state_out_index).shape().dim(0) &&
- batch_size == _ctx.at(output_index).shape().dim(0));
-
- const auto input_size = _ctx.at(input_index).shape().dim(1);
- UNUSED_RELEASE(input_size);
- assert(input_size == _ctx.at(input_to_forget_weights_index).shape().dim(1) &&
- input_size == _ctx.at(input_to_cell_weights_index).shape().dim(1) &&
- input_size == _ctx.at(input_to_output_weights_index).shape().dim(1));
-
- const auto num_units = _ctx.at(cell_state_out_index).shape().dim(1);
- UNUSED_RELEASE(num_units);
- assert(num_units == _ctx.at(input_to_forget_weights_index).shape().dim(0) &&
- num_units == _ctx.at(input_to_cell_weights_index).shape().dim(0) &&
- num_units == _ctx.at(input_to_output_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_forget_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_cell_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_output_weights_index).shape().dim(0) &&
- num_units == _ctx.at(forget_gate_bias_index).shape().dim(0) &&
- num_units == _ctx.at(cell_bias_index).shape().dim(0) &&
- num_units == _ctx.at(output_gate_bias_index).shape().dim(0) &&
- num_units == _ctx.at(cell_state_in_index).shape().dim(1) &&
- (((num_units * 3) == _ctx.at(scratch_buffer_index).shape().dim(1)) ||
- ((num_units * 4) == _ctx.at(scratch_buffer_index).shape().dim(1))));
-
- const auto output_size = _ctx.at(output_index).shape().dim(1);
- UNUSED_RELEASE(output_size);
- assert(output_size == _ctx.at(recurrent_to_forget_weights_index).shape().dim(1) &&
- output_size == _ctx.at(recurrent_to_cell_weights_index).shape().dim(1) &&
- output_size == _ctx.at(recurrent_to_output_weights_index).shape().dim(1) &&
- output_size == _ctx.at(output_state_in_index).shape().dim(1) &&
- output_size == _ctx.at(output_state_out_index).shape().dim(1));
-
- if (has_cifg_param)
- {
- assert(input_size == _ctx.at(input_to_input_weights_index).shape().dim(1));
- assert(num_units == _ctx.at(input_to_input_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_input_weights_index).shape().dim(0) &&
- (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) ||
- _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* non-peephole */) &&
- num_units == _ctx.at(input_gate_bias_index).shape().dim(0));
- assert(output_size == _ctx.at(recurrent_to_input_weights_index).shape().dim(1));
- assert(has_input_to_input_weights && has_recurrent_to_input_weights && has_input_gate_bias);
- if (has_cell_to_input_weights)
- {
- // NOTE The cell_to_input_weights exist only in case of non-CIFG and peephole.
- assert(has_peephole_param);
- }
- assert(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 4);
- }
- else
- {
- assert(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 3);
- }
-
- if (has_peephole_param)
- {
- assert(num_units == _ctx.at(cell_to_forget_weights_index).shape().dim(0) &&
- num_units == _ctx.at(cell_to_output_weights_index).shape().dim(0) &&
- (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) ||
- _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */));
- }
-
- if (has_projection_param)
- {
- assert(num_units == _ctx.at(projection_weights_index).shape().dim(1));
- assert(output_size == _ctx.at(projection_weights_index).shape().dim(0));
- if (has_projection_bias)
- {
- assert(output_size == _ctx.at(projection_bias_index).shape().dim(0));
- }
- }
-}
-
-void OperationValidator::visit(const ir::operation::Unpack &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
- const auto num{node.param().num};
- const auto axis{node.param().axis};
-
- const auto &input_shape = _ctx.at(input_index).shape();
- const auto input_rank = static_cast<int32_t>(input_shape.rank());
-
- UNUSED_RELEASE(num);
- UNUSED_RELEASE(axis);
- UNUSED_RELEASE(input_rank);
-
- assert(num == static_cast<int32_t>(node.getOutputs().size()));
- assert(axis >= -input_rank && axis < input_rank);
-}
-
-void OperationValidator::visit(const ir::operation::Pad &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
- const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
- const auto output_index{node.getInputs().at(0)};
-
- const auto &pad_shape = _ctx.at(pad_index).shape();
- const auto input_rank = static_cast<int32_t>(_ctx.at(input_index).shape().rank());
-
- UNUSED_RELEASE(pad_shape);
- UNUSED_RELEASE(input_rank);
- UNUSED_RELEASE(output_index);
-
- assert(pad_shape.rank() == 2);
- assert(pad_shape.dim(0) == input_rank);
- assert(pad_shape.dim(1) == 2);
- assert(_ctx.at(pad_index).typeInfo().type() == ir::DataType::INT32);
- assert(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
-}
-
-void OperationValidator::visit(const ir::operation::Min &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- UNUSED_RELEASE(output_index);
- UNUSED_RELEASE(lhs_index);
- UNUSED_RELEASE(rhs_index);
-
- assert(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
- assert(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
-}
-
-void OperationValidator::visit(const ir::operation::Max &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- UNUSED_RELEASE(output_index);
- UNUSED_RELEASE(lhs_index);
- UNUSED_RELEASE(rhs_index);
-
- assert(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
- assert(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
-}
-
-} // namespace compiler
-} // namespace neurun
diff --git a/runtime/neurun/core/src/compiler/OperationValidator.h b/runtime/neurun/core/src/compiler/OperationValidator.h
deleted file mode 100644
index 6ff3c7815..000000000
--- a/runtime/neurun/core/src/compiler/OperationValidator.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_COMPILER_OPERATION_VALIDATOR_H__
-#define __NEURUN_COMPILER_OPERATION_VALIDATOR_H__
-
-#include "ir/Layout.h"
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-class Graph;
-class Operands;
-} // namespace ir
-} // namespace neurun
-
-namespace neurun
-{
-namespace compiler
-{
-
-class OperationValidator : public ir::OperationVisitor
-{
-public:
- OperationValidator(void) = delete;
- OperationValidator(const ir::Graph &graph);
-
-public:
- void operator()();
-
-public:
- void visit(const ir::operation::BatchToSpaceND &node) override;
- void visit(const ir::operation::Cast &node) override;
- void visit(const ir::operation::Comparison &node) override;
- void visit(const ir::operation::Softmax &node) override;
- void visit(const ir::operation::InstanceNorm &node) override;
- void visit(const ir::operation::Permute &node) override;
- void visit(const ir::operation::ReduceSum &node) override;
- void visit(const ir::operation::Transpose &node) override;
- void visit(const ir::operation::ReduceMax &node) override;
- void visit(const ir::operation::RNN &node) override;
- void visit(const ir::operation::SpaceToBatchND &node) override;
- void visit(const ir::operation::SpaceToDepth &node) override;
- void visit(const ir::operation::EmbeddingLookup &node) override;
- void visit(const ir::operation::Exp &node) override;
- void visit(const ir::operation::Floor &node) override;
- void visit(const ir::operation::HashtableLookup &node) override;
- void visit(const ir::operation::TransposeConv &node) override;
- void visit(const ir::operation::Gather &node) override;
- void visit(const ir::operation::Dequantize &node) override;
- void visit(const ir::operation::Mean &node) override;
- void visit(const ir::operation::DepthToSpace &node) override;
- void visit(const ir::operation::Pack &node) override;
- void visit(const ir::operation::ReduceMin &node) override;
- void visit(const ir::operation::LSTM &node) override;
- void visit(const ir::operation::Unpack &node) override;
- void visit(const ir::operation::Pad &node) override;
- void visit(const ir::operation::Min &node) override;
- void visit(const ir::operation::Max &node) override;
-
-private:
- // TODO Remove _ctx field
- const ir::Graph &_graph;
- const ir::Operands &_ctx;
- ir::Layout _current_subg_layout;
-};
-
-} // namespace compiler
-} // namespace neurun
-
-#endif // __NEURUN_COMPILER_OPERATION_VALIDATOR_H__
diff --git a/runtime/neurun/core/src/compiler/ParamChecker.cc b/runtime/neurun/core/src/compiler/ParamChecker.cc
deleted file mode 100644
index bf172b53f..000000000
--- a/runtime/neurun/core/src/compiler/ParamChecker.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ParamChecker.h"
-
-#include "ir/Graph.h"
-
-namespace neurun
-{
-namespace compiler
-{
-
-void ParamChecker::operator()()
-{
- _model->operations().iterate(
- [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
-}
-
-} // namespace compiler
-} // namespace neurun
diff --git a/runtime/neurun/core/src/compiler/ParamChecker.h b/runtime/neurun/core/src/compiler/ParamChecker.h
deleted file mode 100644
index 7e88ff185..000000000
--- a/runtime/neurun/core/src/compiler/ParamChecker.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file ParamChecker.h
- * @brief This file contains ParamChecker to check\n
- * operations' parameters are compilable at machine independent phase\n
- * ex) Check param is constant
- */
-#ifndef __NEURUN_COMPILER_PARAM_CHECKER_H__
-#define __NEURUN_COMPILER_PARAM_CHECKER_H__
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-class Graph;
-} // namespace ir
-} // namespace neurun
-
-namespace neurun
-{
-namespace compiler
-{
-
-class ParamChecker : public ir::OperationVisitor
-{
-public:
- /**
- * @brief Construct a new Param Checker object (deleted)
- */
- ParamChecker(void) = delete;
- /**
- * @brief Construct a new Param Checker object
- * @param[in] model Graph model to check
- */
- ParamChecker(std::shared_ptr<ir::Graph> model) : _model{model} {}
-
-public:
- /**
- * @brief Run parameter analysis
- */
- void operator()();
- /**
- * @brief Return analysis result if model have non-const parameter
- * @return @c true if there is non-const parameter, otherwise @c false
- */
- bool haveNoneConstParam(void) { return _nonConstParam; }
-
-private:
- const std::shared_ptr<ir::Graph> _model;
- bool _nonConstParam{false};
-};
-
-} // namespace compiler
-} // namespace neurun
-
-#endif // __NEURUN_COMPILER_OPERATION_VALIDATOR_H__
diff --git a/runtime/neurun/core/src/compiler/SubTensorAnalyzer.cc b/runtime/neurun/core/src/compiler/SubTensorAnalyzer.cc
deleted file mode 100644
index dae1a74ff..000000000
--- a/runtime/neurun/core/src/compiler/SubTensorAnalyzer.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SubTensorAnalyzer.h"
-
-#include <typeinfo>
-
-#include "cpp14/memory.h"
-#include "ir/OperandIndexSequence.h"
-#include "util/logging.h"
-#include "util/Coordinates.h"
-
-namespace neurun
-{
-namespace compiler
-{
-
-void SubTensorAnalyzer::visit(const ir::operation::Concat &node)
-{
- // If operator is concat (or other operators related with subsumption), fill subsumption info
- // TODO: if one tensor is subset of many parents or model input
- // Solution 1. Handle 1st parent only, ignore others (need to invert for other children)
- // Solution 2. Insert copy operation for other parents
- int32_t axis_raw = node.param().axis;
-
- const auto &output_index = node.getOutputs().at(0);
- const auto &inputs = node.getInputs();
-
- int32_t axis_point = 0;
- const auto rank = _graph.operands().at(output_index).shape().rank();
- int32_t axis = axis_raw < 0 ? (axis_raw + rank) : axis_raw;
- assert(rank > axis);
-
- for (const auto &input_index : inputs)
- {
- // NOTE Not support multiple parent tensor yet
- //
- // Let us consider the following example (where OP_i is not a CONCAT):
- //
- // %0 = OP_0
- // %1 = OP_1
- // %2 = OP_2
- // %3 = CONCAT(%0, %1)
- // %4 = CONCAT(%0, %2)
- //
- // %0 and %1 SHOULD be consecutive in order to eliminate the former CONCAT operation,
- // which makes it impossible to eliminate the latter CONCAT operation.
- // - Note that %0 and %2 cannot be consecutive.
- if (_graph.operands().at(input_index).parent_info() != nullptr)
- {
- return;
- }
-
- // NOTE Not support the case that concat's input is a constant or a input of model
- if (_graph.operands().at(input_index).isConstant() || _graph.getInputs().contains(input_index))
- {
- return;
- }
- }
-
- for (const auto &input_index : inputs)
- {
- auto input_shape = _graph.operands().at(input_index).shape();
- assert(rank == input_shape.rank());
-
- neurun::util::Coordinates coordinate_info{};
- for (int i = 0; i < rank; i++)
- {
- coordinate_info.set(i, 0);
- }
- coordinate_info.set(axis, axis_point);
-
- auto parentInfo =
- nnfw::cpp14::make_unique<ir::operand::ParentInfo>(output_index, coordinate_info);
-
- _graph.operands().at(input_index).parent_info(std::move(parentInfo));
-
- axis_point += input_shape.dim(axis);
- }
-}
-
-} // namespace compiler
-} // namespace neurun
diff --git a/runtime/neurun/core/src/compiler/SubTensorAnalyzer.h b/runtime/neurun/core/src/compiler/SubTensorAnalyzer.h
deleted file mode 100644
index 54d41e460..000000000
--- a/runtime/neurun/core/src/compiler/SubTensorAnalyzer.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file SubTensorAnalyzer.h
- * @brief This file contains SubTensorAnalyzer to analyze tensor subsumption
- * using operation visitor
- */
-
-#ifndef __NEURUN_COMPILER_SUBTENSOR_ANALYZER_H__
-#define __NEURUN_COMPILER_SUBTENSOR_ANALYZER_H__
-
-#include "ir/OperationVisitor.h"
-#include "ir/Graph.h"
-
-namespace neurun
-{
-namespace compiler
-{
-
-/**
- * @brief Class to analyze tensor subsumption
- */
-class SubTensorAnalyzer : public ir::OperationVisitor
-{
-public:
- /**
- * @brief Construct a new SubTensorAnalyzer object
- * @param[in] ctx Graph operand set
- */
- SubTensorAnalyzer(ir::Graph &graph) : _graph{graph}
- {
- // DO NOTHING
- }
-
-public:
- void visit(const ir::operation::Concat &) override;
-
-private:
- ir::Graph &_graph; // TODO Refactor : Do not update Operands
-};
-
-} // namespace compiler
-} // namespace neurun
-
-#endif // __NEURUN_COMPILER_SUBTENSOR_ANALYZER_H__
diff --git a/runtime/neurun/core/src/dumper/dot/DotBuilder.cc b/runtime/neurun/core/src/dumper/dot/DotBuilder.cc
deleted file mode 100644
index 4c7089a9c..000000000
--- a/runtime/neurun/core/src/dumper/dot/DotBuilder.cc
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DotBuilder.h"
-
-namespace neurun
-{
-namespace dumper
-{
-namespace dot
-{
-
-// DotDumper
-DotBuilder::DotBuilder() {}
-
-void DotBuilder::update(const Node &node_info)
-{
- add(node_info);
- for (auto edge : node_info.edges())
- {
- addEdge(node_info, *edge);
- }
-}
-
-void DotBuilder::addSubgraph(const DotSubgraphInfo &subgraph_info)
-{
- _dot << "op_seq cluster_" << subgraph_info.index().value() << " {\n";
- _dot << " label=\"" << subgraph_info.label() << "\";\n";
- _dot << " style=filled;\n";
- _dot << " color=lightgrey;\n";
- _dot << " ";
- for (auto op : subgraph_info.operations())
- {
- _dot << "operation" << op.value() << "; ";
- }
- for (auto op : subgraph_info.operands())
- {
- _dot << "operand" << op.value() << "; ";
- }
- _dot << "\n";
- _dot << "}\n";
-}
-
-void DotBuilder::writeDot(std::ostream &os)
-{
- os << "digraph D {\n"
- << _dot.str() << "\n"
- << "}\n";
-}
-
-void DotBuilder::add(const Node &node)
-{
- _dot << node.id();
- std::stringstream ss;
- _dot << "[";
- for (auto attr : node.attributes())
- {
- _dot << attr.first << "=\"" << attr.second << "\" ";
- }
- _dot << "];\n";
-}
-
-void DotBuilder::addEdge(const Node &node1, const Node &node2)
-{
- _dot << node1.id() << " -> " << node2.id() << ";\n";
-}
-
-} // namespace dot
-} // namespace dumper
-} // namespace neurun
diff --git a/runtime/neurun/core/src/dumper/dot/DotBuilder.h b/runtime/neurun/core/src/dumper/dot/DotBuilder.h
deleted file mode 100644
index c04f6bc52..000000000
--- a/runtime/neurun/core/src/dumper/dot/DotBuilder.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_DUMPER_DOT_DOT_BUILDER_H__
-#define __NEURUN_DUMPER_DOT_DOT_BUILDER_H__
-
-#include <sstream>
-
-#include "ir/Index.h"
-#include "ir/Operation.h"
-#include "ir/Operand.h"
-
-#include "OperationNode.h"
-#include "OperandNode.h"
-#include "DotSubgraphInfo.h"
-
-using Operation = neurun::ir::Operation;
-using Object = neurun::ir::Operand;
-
-namespace neurun
-{
-namespace dumper
-{
-namespace dot
-{
-
-class DotBuilder
-{
-public:
- DotBuilder();
-
-public:
- void update(const Node &dotinfo);
- void addSubgraph(const DotSubgraphInfo &subgraph_info);
-
- void writeDot(std::ostream &os);
-
-private:
- void add(const Node &dotinfo);
- void addEdge(const Node &dotinfo1, const Node &dotinfo2);
-
- std::stringstream _dot;
-};
-
-} // namespace dot
-} // namespace dumper
-} // namespace neurun
-
-#endif // __NEURUN_DUMPER_DOT_DOT_BUILDER_H__
diff --git a/runtime/neurun/core/src/dumper/dot/DotDumper.cc b/runtime/neurun/core/src/dumper/dot/DotDumper.cc
deleted file mode 100644
index 44313a657..000000000
--- a/runtime/neurun/core/src/dumper/dot/DotDumper.cc
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <fstream>
-#include <unordered_map>
-
-#include "DotDumper.h"
-#include "DotBuilder.h"
-#include "DotSubgraphInfo.h"
-#include "ir/OpSequence.h"
-#include "ir/OperationIndexMap.h"
-#include "backend/Backend.h"
-#include "backend/BackendManager.h"
-#include "backend/IConfig.h"
-
-namespace neurun
-{
-namespace dumper
-{
-namespace dot
-{
-
-void DotDumper::dump(const std::string &tag)
-{
- if (_level == Level::OFF)
- {
- return;
- }
-
- neurun::dumper::dot::DotBuilder dot_builder;
-
- auto &operations = _graph.operations();
- auto &operands = _graph.operands();
-
- ir::OperationIndexMap<std::unique_ptr<Operation>> operation_nodes;
- std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>> operand_nodes;
-
- operations.iterate([&](const ir::OperationIndex &index, const ir::Operation &op) {
- auto node = nnfw::cpp14::make_unique<Operation>(index, op);
-
- for (auto output : op.getOutputs())
- {
- using neurun::dumper::dot::Operand;
- auto child = std::make_shared<Operand>(output, Operand::Type::MODEL_OUTPUT);
- node->addEdge(child);
- }
-
- operation_nodes.emplace(index, std::move(node));
- });
-
- auto backend_to_fillcolor = [](const backend::Backend *backend) {
- static const auto map = []() {
- std::unordered_map<const backend::Backend *, std::string> ret;
- uint32_t index = 1; // Start from 1 to avoid 0(red) which is too dark :(
- for (const auto backend : backend::BackendManager::get().getAll())
- {
- ret.emplace(backend, Node::BG_COLORS[index]);
- index = (index + 1) % (sizeof(Node::BG_COLORS) / sizeof(Node::BG_COLORS[0]));
- }
- return ret;
- }();
-
- auto itr = map.find(backend);
- if (itr == map.end())
- {
- return Node::DEFAULT_FILLCOLOR;
- }
- else
- {
- return itr->second;
- }
- };
-
- util::Set<ir::OperandIndex> shown_operand_set;
-
- operands.iterate([&](const ir::OperandIndex &index, const ir::Operand &object) {
- bool showing_cond = false;
- if (_level == Level::ALL)
- {
- showing_cond = true;
- }
- else
- {
- showing_cond = !object.isConstant();
- }
- if (object.isConstant() || _graph.getInputs().contains(index))
- {
- showing_cond = showing_cond && (object.getUses().size() > 0);
- }
- if (showing_cond)
- {
- shown_operand_set.add(index);
-
- auto type = [&]() {
- using neurun::dumper::dot::Operand;
- if (_graph.getInputs().contains(index))
- return Operand::Type::MODEL_INPUT;
- if (_graph.getOutputs().contains(index))
- return Operand::Type::MODEL_OUTPUT;
- return Operand::Type::INTERNAL;
- }();
-
- auto lower_info = _graph.getLowerInfo(index);
- auto node = nnfw::cpp14::make_unique<Operand>(index, type);
-
- {
- // Display LowerInfo attributes
- std::string label = std::to_string(index.value());
- std::string fillcolor = "";
- if (lower_info)
- {
- const auto &def_factors = lower_info->def_factors();
- if (def_factors.size() > 0)
- {
- label += "\\n[";
- label += def_factors.getOnlyElement().backend()->config()->id();
- label += "]";
-
- fillcolor = backend_to_fillcolor(lower_info->def_factors().getOnlyElement().backend());
- }
- }
- node->setAttribute("label", label);
- node->setAttribute("fillcolor", fillcolor);
- }
-
- for (auto operation_index : object.getUses().list())
- {
- auto &operation = operations.at(operation_index);
- auto child = std::make_shared<Operation>(operation_index, operation);
- node->addEdge(child);
- }
-
- operand_nodes.emplace(index, std::move(node));
- }
- });
-
- const auto subgraphs = _graph.subgraphs();
- if (subgraphs)
- {
- subgraphs->iterate([&](const ir::SubgraphIndex &index, const ir::OpSequence &op_seq) {
- const auto lower_info = _graph.getLowerInfo(index);
- auto fillcolor = backend_to_fillcolor(lower_info->backend());
- std::string label =
- std::to_string(index.value()) + " [" + lower_info->backend()->config()->id() + "]";
- DotSubgraphInfo subgraph_info{index, op_seq, shown_operand_set};
- subgraph_info.label(label);
- subgraph_info.fillcolor(fillcolor);
- dot_builder.addSubgraph(subgraph_info);
-
- // Set fillcolor of all operations in the op_seq
- for (const auto &op : op_seq.operations())
- {
- auto found = operation_nodes.find(op.index);
- if (found != operation_nodes.end())
- {
- auto &&op = found->second;
- op->setAttribute("fillcolor", fillcolor);
- }
- }
- });
- }
-
- for (const auto &e : operation_nodes)
- dot_builder.update(*e.second);
- for (const auto &e : operand_nodes)
- dot_builder.update(*e.second);
-
- // Dump to file
- {
- std::string file_name;
- file_name += tag;
- file_name += ".dot";
- std::filebuf fb;
-
- fb.open(file_name, std::ios::out);
- std::ostream os(&fb);
-
- dot_builder.writeDot(os);
-
- fb.close();
- }
-}
-
-} // namespace dot
-} // namespace dumper
-} // namespace neurun
diff --git a/runtime/neurun/core/src/dumper/dot/DotDumper.h b/runtime/neurun/core/src/dumper/dot/DotDumper.h
deleted file mode 100644
index ec4d2b967..000000000
--- a/runtime/neurun/core/src/dumper/dot/DotDumper.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/Graph.h"
-
-#ifndef __NEURUN_DUMPER_DOT_DOT_DUMPER_H__
-#define __NEURUN_DUMPER_DOT_DOT_DUMPER_H__
-
-namespace neurun
-{
-namespace dumper
-{
-namespace dot
-{
-
-class DotDumper
-{
-public:
- enum Level
- {
- OFF = 0, //< Do not dump
- ALL_BUT_CONSTANTS = 1, //< Emit all operations and operands but constants
- ALL = 2 //< Emit all operations and operands
- };
-
-public:
- DotDumper(const ir::Graph &graph, Level level) : _graph(graph), _level{level} {}
-
-public:
- /**
- * @brief Dump to dot file as tag name if "GRAPH_DOT_DUMP" is set
- *
- * @param[in] tag The name of dot file that would be created
- * @return N/A
- */
- void dump(const std::string &tag);
-
-private:
- const ir::Graph &_graph;
- Level _level;
-};
-
-} // namespace dot
-} // namespace dumper
-} // namespace neurun
-
-#endif // __NEURUN_DUMPER_DOT_DOT_DUMPER_H__
diff --git a/runtime/neurun/core/src/dumper/dot/DotSubgraphInfo.cc b/runtime/neurun/core/src/dumper/dot/DotSubgraphInfo.cc
deleted file mode 100644
index 8cfe35900..000000000
--- a/runtime/neurun/core/src/dumper/dot/DotSubgraphInfo.cc
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DotSubgraphInfo.h"
-
-#include <sstream>
-
-namespace neurun
-{
-namespace dumper
-{
-namespace dot
-{
-
-DotSubgraphInfo::DotSubgraphInfo(const ir::SubgraphIndex &index, const ir::OpSequence &op_seq,
- const util::Set<ir::OperandIndex> &shown_operands)
- : _index{index}
-{
- for (const auto &element : op_seq.operations())
- {
- _operations.insert(element.index);
- for (auto o : element.node->getInputs())
- {
- // Must be a shown operand, not op_seq's inputs
- if (shown_operands.contains(o) && !op_seq.getInputs().contains(o))
- {
- _operands.insert(o);
- }
- }
- for (auto o : element.node->getOutputs())
- {
- // Must be a shown operand, not op_seq's inputs
- if (shown_operands.contains(o) && !op_seq.getOutputs().contains(o))
- {
- _operands.insert(o);
- }
- }
- }
-}
-
-} // namespace dot
-} // namespace dumper
-} // namespace neurun
diff --git a/runtime/neurun/core/src/dumper/dot/DotSubgraphInfo.h b/runtime/neurun/core/src/dumper/dot/DotSubgraphInfo.h
deleted file mode 100644
index 0aa7c6ddf..000000000
--- a/runtime/neurun/core/src/dumper/dot/DotSubgraphInfo.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__
-#define __NEURUN_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__
-
-#include <unordered_set>
-
-#include "ir/Index.h"
-#include "ir/OpSequence.h"
-#include "util/Set.h"
-
-namespace neurun
-{
-namespace dumper
-{
-namespace dot
-{
-
-class DotSubgraphInfo
-{
-public:
- DotSubgraphInfo(const ir::SubgraphIndex &index, const ir::OpSequence &op_seq,
- const util::Set<ir::OperandIndex> &shown_operands);
-
- ir::SubgraphIndex index() const { return _index; }
- std::string label() const { return _label; }
- void label(const std::string &val) { _label = val; }
- std::string fillcolor() const { return _fillcolor; }
- void fillcolor(const std::string &val) { _fillcolor = val; }
- const std::unordered_set<ir::OperationIndex> &operations() const { return _operations; }
- const std::unordered_set<ir::OperandIndex> &operands() const { return _operands; }
-
-private:
- ir::SubgraphIndex _index;
- std::string _label;
- std::string _fillcolor;
- std::unordered_set<ir::OperationIndex> _operations;
- std::unordered_set<ir::OperandIndex> _operands;
-};
-
-} // namespace dot
-} // namespace dumper
-} // namespace neurun
-
-#endif // __NEURUN_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__
diff --git a/runtime/neurun/core/src/dumper/dot/Node.cc b/runtime/neurun/core/src/dumper/dot/Node.cc
deleted file mode 100644
index 166f0f40f..000000000
--- a/runtime/neurun/core/src/dumper/dot/Node.cc
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Node.h"
-
-namespace neurun
-{
-namespace dumper
-{
-namespace dot
-{
-
-const std::string Node::DEFAULT_COLORSCHEME = "x11";
-const std::string Node::DEFAULT_FILLCOLOR = "white";
-// RED, BLUE, GREEN, PURPLE, ORANGE, YELLOW, BROWN, PINK
-const std::string Node::BG_COLORS[8] = {"1", "2", "3", "4", "5", "6", "7", "8"};
-
-Node::Node(const std::string &id) : _id{id}
-{
- // Set default values
- _attributes["style"] = "filled";
- _attributes["colorscheme"] = DEFAULT_COLORSCHEME;
- _attributes["fillcolor"] = DEFAULT_FILLCOLOR;
-}
-
-void Node::setAttribute(const std::string &key, const std::string &val) { _attributes[key] = val; }
-
-std::string Node::getAttribute(const std::string &key)
-{
- auto itr = _attributes.find(key);
- if (itr == _attributes.end())
- {
- return "";
- }
- else
- {
- return itr->second;
- }
-}
-
-} // namespace dot
-} // namespace dumper
-} // namespace neurun
diff --git a/runtime/neurun/core/src/dumper/dot/Node.h b/runtime/neurun/core/src/dumper/dot/Node.h
deleted file mode 100644
index 364cb08a4..000000000
--- a/runtime/neurun/core/src/dumper/dot/Node.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Node.h
- * @brief This file contains Node class
- * @ingroup COM_AI_RUNTIME
- *
- */
-
-#ifndef __NEURUN_DUMPER_DOT_NODE_H__
-#define __NEURUN_DUMPER_DOT_NODE_H__
-
-#include <string>
-#include <memory>
-#include <vector>
-#include <unordered_map>
-
-namespace neurun
-{
-namespace dumper
-{
-namespace dot
-{
-
-enum BGCOLORS : int
-{
- RED,
- BLUE,
- GREEN,
- PUPLE,
- ORANGE,
- YELLOW,
- BROWN,
- PINK
-};
-
-/**
- * @brief Class that represents a Node in "dot" format
- *
-*/
-class Node
-{
-public:
- const static std::string DEFAULT_FILLCOLOR;
- const static std::string DEFAULT_COLORSCHEME;
- const static std::string BG_COLORS[8];
-
-public:
- /**
- * @brief Destroy the Node object
- *
- */
- virtual ~Node() = default;
-
- /**
- * @brief Construct a new Node object
- *
- * @param id
- */
- Node(const std::string &id);
-
- /**
- * @brief return id
- *
- * @return id
- */
- std::string id() const { return _id; }
-
- /**
- * @brief return attributes
- *
- * @return const reference of attributes object
- */
- const std::unordered_map<std::string, std::string> &attributes() const { return _attributes; }
- /**
- * @brief Store an attribute with key-value pair
- *
- * @param[in] key attribute's key
- * @param[in] val attribute's value that is associated with the key
- */
- void setAttribute(const std::string &key, const std::string &val);
- /**
- * @brief Get the attributte value that is associated with key
- *
- * @param[in] key key of the attribute
- * @return value that is associated with the key
- */
- std::string getAttribute(const std::string &key);
-
- /**
- * @brief Add an edge in the graph, which is an outgoing edge
- *
- * @param[in] dotinfo A node that the new edge will be connected to
- */
- void addEdge(std::shared_ptr<Node> dotinfo) { _children.emplace_back(dotinfo); }
- /**
- * @brief Return list of edges
- *
- * @return Edges
- */
- const std::vector<std::shared_ptr<Node>> &edges() const { return _children; }
-
-private:
- std::string _id;
- std::unordered_map<std::string, std::string> _attributes;
- std::vector<std::shared_ptr<Node>> _children;
-};
-
-} // namespace dot
-} // namespace dumper
-} // namespace neurun
-
-#endif // __NEURUN_DUMPER_DOT_NODE_H__
diff --git a/runtime/neurun/core/src/dumper/dot/OperandNode.cc b/runtime/neurun/core/src/dumper/dot/OperandNode.cc
deleted file mode 100644
index 76d2c704c..000000000
--- a/runtime/neurun/core/src/dumper/dot/OperandNode.cc
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <sstream>
-
-#include "OperandNode.h"
-#include "ir/Graph.h"
-#include "ir/operand/LowerInfo.h"
-
-namespace neurun
-{
-namespace dumper
-{
-namespace dot
-{
-
-const std::string Operand::INPUT_SHAPE = "doublecircle";
-const std::string Operand::OUTPUT_SHAPE = "doublecircle";
-const std::string Operand::OPERAND_SHAPE = "ellipse";
-const std::string Operand::BG_COLOR_SCHEME = "set18";
-
-Operand::Operand(const ir::OperandIndex &index, Type type)
- : Node{"operand" + std::to_string(index.value())}
-{
- {
- auto type_to_shape = [](Type type) {
- switch (type)
- {
- case Type::MODEL_INPUT:
- return INPUT_SHAPE;
- case Type::MODEL_OUTPUT:
- return OUTPUT_SHAPE;
- case Type::UNDEFINED:
- case Type::INTERNAL:
- default:
- return OPERAND_SHAPE;
- }
- };
- setAttribute("shape", type_to_shape(type));
- }
-
- setAttribute("colorscheme", BG_COLOR_SCHEME);
-}
-
-} // namespace dot
-} // namespace dumper
-} // namespace neurun
diff --git a/runtime/neurun/core/src/dumper/dot/OperandNode.h b/runtime/neurun/core/src/dumper/dot/OperandNode.h
deleted file mode 100644
index 5ebd651b6..000000000
--- a/runtime/neurun/core/src/dumper/dot/OperandNode.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Operand.h
- * @brief This file contains Operand
- * @ingroup COM_AI_RUNTIME
- *
- */
-
-#ifndef __NEURUN_DUMPER_DOT_DOT_OPERAND_INFO_H__
-#define __NEURUN_DUMPER_DOT_DOT_OPERAND_INFO_H__
-
-#include <vector>
-
-#include "Node.h"
-#include "ir/Operand.h"
-#include "ir/Index.h"
-
-namespace neurun
-{
-namespace dumper
-{
-namespace dot
-{
-
-/**
- * @brief Class that represents an Operand
- *
- */
-class Operand : public Node
-{
-public:
- enum class Type
- {
- UNDEFINED,
- MODEL_INPUT,
- MODEL_OUTPUT,
- INTERNAL
- };
-
-public:
- static const std::string INPUT_SHAPE;
- static const std::string OUTPUT_SHAPE;
- static const std::string OPERAND_SHAPE;
- static const std::string BG_COLOR_SCHEME;
-
-public:
- /**
- * @brief Construct a new Operand Node object
- *
- * @param[in] index Operand index
- * @param[in] type Operand type
- * @param[in] lower_info Operand LowerInfo
- */
- Operand(const ir::OperandIndex &index, Type type);
-
-private:
- void addBackendLabel();
-};
-
-} // namespace dot
-} // namespace dumper
-} // namespace neurun
-
-#endif // __NEURUN_DUMPER_DOT_DOT_OPERAND_INFO_H__
diff --git a/runtime/neurun/core/src/dumper/dot/OperationNode.cc b/runtime/neurun/core/src/dumper/dot/OperationNode.cc
deleted file mode 100644
index ca870ba05..000000000
--- a/runtime/neurun/core/src/dumper/dot/OperationNode.cc
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <sstream>
-
-#include "OperationNode.h"
-#include "ir/Graph.h"
-#include "ir/operation/LowerInfo.h"
-#include "backend/IConfig.h"
-#include "backend/Backend.h"
-
-namespace neurun
-{
-namespace dumper
-{
-namespace dot
-{
-
-const std::string Operation::OPERATION_SHAPE = "rect";
-const std::string Operation::BG_COLOR_SCHEME = "pastel18";
-
-Operation::Operation(const ir::OperationIndex &index, const ir::Operation &node)
- : Node{"operation" + std::to_string(index.value())}
-{
- setAttribute("label", std::to_string(index.value()) + " : " + node.name());
- setAttribute("shape", OPERATION_SHAPE);
- setAttribute("colorscheme", BG_COLOR_SCHEME);
- setAttribute("fillcolor", DEFAULT_FILLCOLOR);
-}
-
-} // namespace dot
-} // namespace dumper
-} // namespace neurun
diff --git a/runtime/neurun/core/src/dumper/dot/OperationNode.h b/runtime/neurun/core/src/dumper/dot/OperationNode.h
deleted file mode 100644
index ba0713790..000000000
--- a/runtime/neurun/core/src/dumper/dot/OperationNode.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Operation.h
- * @brief This file contains Operation
- * @ingroup COM_AI_RUNTIME
- *
- */
-
-#ifndef __NEURUN_DUMPER_DOT_DOT_NODE_INFO_H__
-#define __NEURUN_DUMPER_DOT_DOT_NODE_INFO_H__
-
-#include "Node.h"
-#include "ir/Operation.h"
-#include "ir/Index.h"
-
-namespace neurun
-{
-namespace dumper
-{
-namespace dot
-{
-
-/**
- * @brief Class that represents an Operation
- *
- */
-class Operation : public Node
-{
-public:
- static const std::string OPERATION_SHAPE;
- static const std::string BG_COLOR_SCHEME;
-
-public:
- /**
- * @brief Construct a new Operation Node object
- *
- * @param[in] index operation index
- * @param[in] node operation object
- */
- Operation(const ir::OperationIndex &index, const ir::Operation &node);
-};
-
-} // namespace dot
-} // namespace dumper
-} // namespace neurun
-
-#endif // __NEURUN_DUMPER_DOT_DOT_NODE_INFO_H__
diff --git a/runtime/neurun/core/src/exec/DataflowExecutor.cc b/runtime/neurun/core/src/exec/DataflowExecutor.cc
deleted file mode 100644
index e22d41031..000000000
--- a/runtime/neurun/core/src/exec/DataflowExecutor.cc
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DataflowExecutor.h"
-
-#include <cassert>
-
-#include "util/logging.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-int64_t DataflowExecutor::calculateRank(const std::vector<ir::Element> &operations)
-{
- int64_t rank = 0;
- if (!_indexed_ranks)
- {
- return rank;
- }
- for (const auto &element : operations)
- {
- auto it = _indexed_ranks->find(element.index);
- if (it == _indexed_ranks->end())
- {
- assert(element.node->opcode() == ir::OpCode::Permute);
- // assign int32_t::max to prevent integer overflow
- rank += std::numeric_limits<int32_t>::max();
- }
- else
- {
- rank += it->second;
- }
- }
- return rank;
-}
-
-void DataflowExecutor::emplaceToReadyJobs(const uint32_t &id)
-{
- auto &job = _waiting_jobs[id];
- assert(job != nullptr);
- auto &subg = _graph.subgraphs()->at(_job_to_op_seq[job->index()]);
- auto rank = calculateRank(subg.operations());
- _ready_jobs.emplace(rank, std::move(job));
-}
-
-void DataflowExecutor::notify(uint32_t finished_job_id)
-{
- for (auto id : _output_info[finished_job_id])
- {
- assert(_input_info[id] > 0);
- auto count = --_input_info[id];
- if (count == 0) // No dependent jobs left, ready for execution
- {
- emplaceToReadyJobs(id);
- }
- }
-}
-bool DataflowExecutor::noWaitingJobs()
-{
- return std::all_of(_waiting_jobs.begin(), _waiting_jobs.end(),
- [](const std::unique_ptr<Job> &job) { return job == nullptr; });
-}
-
-DataflowExecutor::DataflowExecutor(const ir::Graph &graph,
- const std::shared_ptr<compiler::OperandContext> &operand_context,
- std::unique_ptr<backend::TensorManagerSet> tensor_mgrs,
- CodeMap &&code_map)
- : ExecutorBase{graph, operand_context, std::move(tensor_mgrs)}, _code_map{std::move(code_map)}
-{
- VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl;
-
- const ir::Subgraphs *subgraphs = _graph.subgraphs();
- // Assign jobs convert SubgraphIndex to job index(uint32_t)
- uint32_t next_job_index = 0;
- std::unordered_map<ir::SubgraphIndex, uint32_t> subgraph_to_job;
- subgraphs->iterate([&](const ir::SubgraphIndex &subg_index, const ir::OpSequence &) {
- VERBOSE(DataflowExecutor) << "Create a job #" << next_job_index << " with SubgraphIndex "
- << subg_index.value() << std::endl;
- _finished_jobs.emplace_back(
- nnfw::cpp14::make_unique<Job>(next_job_index, _code_map.at(subg_index).get()));
- subgraph_to_job[subg_index] = next_job_index++;
- });
-
- _waiting_jobs.resize(next_job_index);
- _output_info.resize(next_job_index);
- _initial_input_info.resize(next_job_index, 0);
-
- subgraphs->iterate([&](const ir::SubgraphIndex &subg_index, const ir::OpSequence &subg) {
- auto job_index = subgraph_to_job[subg_index];
- for (auto output : subg.getOutputs())
- {
- // Update output and input info
- subgraphs->iterate(
- [&](const ir::SubgraphIndex &subg_cur_index, const ir::OpSequence &subg_cur) {
- if (subg_cur.getInputs().contains(output))
- {
- auto dep_index = subgraph_to_job[subg_cur_index];
- ++_initial_input_info[dep_index];
- _output_info[job_index].push_back(dep_index);
- }
- });
- }
- });
- for (const auto &s : subgraph_to_job)
- _job_to_op_seq.emplace(s.second, s.first);
-
- _input_info = _initial_input_info;
-}
-
-void DataflowExecutor::executeImpl()
-{
- assert(noWaitingJobs());
-
- // Execution setup
- _waiting_jobs.swap(_finished_jobs); // Move finished jobs to waiting jobs
-
- for (uint32_t i = 0; i < _waiting_jobs.size(); ++i)
- {
- if (_input_info[i] == 0)
- {
- emplaceToReadyJobs(i);
- }
- }
- assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs
- bool is_profiling = util::getConfigBool(util::config::PROFILING_MODE);
-
- _subject.notifyModelBegin(this);
-
- while (!_ready_jobs.empty())
- {
- auto job = std::move((_ready_jobs.begin())->second);
- _ready_jobs.erase(_ready_jobs.begin());
- auto job_index = job->index();
- VERBOSE(DataflowExecutor) << "Run job #" << job_index << std::endl;
-
- auto subgraph_index = _job_to_op_seq[job_index];
- auto op_seq = &_graph.subgraphs()->at(subgraph_index);
- const backend::Backend *backend =
- _graph.getLowerInfo()->operation.at(subgraph_index)->backend();
-
- _subject.notifyJobBegin(this, op_seq, backend);
-
- if (is_profiling)
- job->fn()->runSync();
- else
- job->run();
-
- _subject.notifyJobEnd(this, op_seq, backend);
- notify(job_index);
- _finished_jobs[job_index] = std::move(job);
- }
- assert(noWaitingJobs());
-
- _subject.notifyModelEnd(this);
-
- // Reset input info for the next execution
- _input_info = _initial_input_info;
-}
-
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/DataflowExecutor.h b/runtime/neurun/core/src/exec/DataflowExecutor.h
deleted file mode 100644
index 6c12093fd..000000000
--- a/runtime/neurun/core/src/exec/DataflowExecutor.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_EXEC_DATAFLOW_EXECUTOR_H__
-#define __NEURUN_EXEC_DATAFLOW_EXECUTOR_H__
-
-#include <list>
-#include <map>
-#include <unordered_map>
-
-#include "FunctionSequence.h"
-#include "Job.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/Index.h"
-#include "cpp14/memory.h"
-#include "exec/ExecutorBase.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-class DataflowExecutor : public ExecutorBase
-{
-public:
- using CodeMap = std::unordered_map<ir::SubgraphIndex, std::unique_ptr<FunctionSequence>>;
-
-protected:
- virtual void notify(uint32_t finished_job_id);
- bool noWaitingJobs();
-
-public:
- /**
- * @brief Constructs a DataflowExecutor object
- *
- * @param graph Graph object
- * @param operand_context (Only for input/output operand data access)
- * @param code_map Compiled code map
- * @param ranks Operation ranks for ordering execution
- */
- DataflowExecutor(const ir::Graph &graph,
- const std::shared_ptr<compiler::OperandContext> &operand_context,
- std::unique_ptr<backend::TensorManagerSet> tensor_mgrs, CodeMap &&code_map);
-
- void executeImpl() override;
-
-protected:
- int64_t calculateRank(const std::vector<ir::Element> &operations);
- void emplaceToReadyJobs(const uint32_t &id);
-
-protected:
- CodeMap _code_map;
- /**
- * @brief A vector of finished jobs for current execution
- * After a run it has all the jobs of this execution for the next run
- */
- std::vector<std::unique_ptr<Job>> _finished_jobs;
- /**
- * @brief A vector of waiting jobs for current execution
- * All the jobs are moved from #_finished_jobs to it when start a run
- */
- std::vector<std::unique_ptr<Job>> _waiting_jobs;
- /**
- * @brief Jobs' output info
- * Used for notifying after finishing a job
- */
- std::vector<std::list<uint32_t>> _output_info;
- std::vector<uint32_t> _initial_input_info;
- std::vector<uint32_t> _input_info;
- /**
- * @brief A collection of jobs that are ready for execution
- * Jobs in it are ready to be scheduled.
- * Ordered by priority from `_indexed_ranks`
- */
- std::multimap<int64_t, std::unique_ptr<Job>, std::greater<int64_t>> _ready_jobs;
-
- /// @brief Which job runs which op and function.
- std::unordered_map<uint32_t, ir::SubgraphIndex> _job_to_op_seq;
-};
-
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_DATAFLOW_EXECUTOR_H__
diff --git a/runtime/neurun/core/src/exec/Execution.cc b/runtime/neurun/core/src/exec/Execution.cc
deleted file mode 100644
index bc7bbd160..000000000
--- a/runtime/neurun/core/src/exec/Execution.cc
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "exec/Execution.h"
-
-#include "util/logging.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-Execution::Execution(const std::shared_ptr<IExecutor> &executor) : _executor{executor}
-{
- _io_desc.inputs.resize(_executor->graph().getInputs().size());
- _io_desc.outputs.resize(_executor->graph().getOutputs().size());
-}
-
-// TODO Remove default parameter
-void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t length,
- ir::Layout layout)
-{
- const auto input_index = graph().getInputs().at(index);
- const auto info = graph().operands().at(input_index).info();
-
- if (length < info.total_size())
- {
- throw std::runtime_error{"Too small length"};
- }
-
- _io_desc.inputs.at(index.value()) =
- nnfw::cpp14::make_unique<InputDesc>(info, buffer, length, layout);
-}
-
-// TODO Remove default parameter
-void Execution::setInput(const ir::IOIndex &index, const ir::TypeInfo &type, const ir::Shape &shape,
- const void *buffer, size_t length, ir::Layout layout)
-{
- const ir::OperandInfo info{shape, type};
-
- if (length < info.total_size())
- {
- throw std::runtime_error{"Too small length"};
- }
-
- _io_desc.inputs.at(index.value()) =
- nnfw::cpp14::make_unique<InputDesc>(info, buffer, length, layout);
-}
-
-// TODO Remove default parameter
-void Execution::setOutput(const ir::IOIndex &index, void *buffer, size_t length, ir::Layout layout)
-{
- const auto output_index = graph().getOutputs().at(index);
- const auto info = graph().operands().at(output_index).info();
-
- if (length < info.total_size())
- {
- throw std::runtime_error{"Too small length"};
- }
-
- _io_desc.outputs.at(index.value()) =
- nnfw::cpp14::make_unique<OutputDesc>(info, buffer, length, layout);
-}
-
-// TODO Remove default parameter
-void Execution::setOutput(const ir::IOIndex &index, const ir::TypeInfo &type,
- const ir::Shape &shape, void *buffer, size_t length, ir::Layout layout)
-{
- const ir::OperandInfo info{shape, type};
-
- if (length < info.total_size())
- {
- throw std::runtime_error{"Too small length"};
- }
-
- _io_desc.outputs.at(index.value()) =
- nnfw::cpp14::make_unique<OutputDesc>(info, buffer, length, layout);
-}
-
-void Execution::setInputLayout(const ir::IOIndex &index, ir::Layout layout)
-{
- const auto &input_desc = _io_desc.inputs.at(index.value());
- _io_desc.inputs.at(index.value()) = nnfw::cpp14::make_unique<InputDesc>(
- input_desc->info, input_desc->buffer, input_desc->size, layout);
-}
-
-void Execution::setOutputLayout(const ir::IOIndex &index, ir::Layout layout)
-{
- const auto &output_desc = _io_desc.outputs.at(index.value());
- _io_desc.outputs.at(index.value()) = nnfw::cpp14::make_unique<OutputDesc>(
- output_desc->info, output_desc->buffer, output_desc->size, layout);
-}
-
-void Execution::execute()
-{
- VERBOSE(Execution) << "Start execution" << std::endl;
-
- _executor->execute(_io_desc);
- finished = true;
-
- VERBOSE(Execution) << "Execution finished" << std::endl;
-}
-
-void Execution::startExecute()
-{
- VERBOSE(Execution) << "Create asynchronous execution thread" << std::endl;
-
- _exec_thread = nnfw::cpp14::make_unique<std::thread>(&Execution::execute, this);
-}
-
-void Execution::waitFinish()
-{
- VERBOSE(Execution) << "Wait to finish execution" << std::endl;
-
- _exec_thread->join();
- finished = true;
-}
-
-bool Execution::isFinished(void) const { return finished; }
-
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/ExecutionObservee.cc b/runtime/neurun/core/src/exec/ExecutionObservee.cc
deleted file mode 100644
index 3b342d703..000000000
--- a/runtime/neurun/core/src/exec/ExecutionObservee.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ExecutionObservee.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-void ExecutionObservee::add(std::unique_ptr<IExecutionObserver> observer)
-{
- _observers.emplace_back(std::move(observer));
-}
-
-void ExecutionObservee::notifyModelBegin(IExecutor *executor)
-{
- for (auto &o : _observers)
- {
- o->handleBegin(executor);
- }
-}
-
-void ExecutionObservee::notifyModelEnd(IExecutor *executor)
-{
- for (auto &o : _observers)
- {
- o->handleEnd(executor);
- }
-}
-
-void ExecutionObservee::notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq,
- const backend::Backend *backend)
-{
- for (auto &o : _observers)
- {
- o->handleBegin(executor, op_seq, backend);
- }
-}
-
-void ExecutionObservee::notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq,
- const backend::Backend *backend)
-{
- for (auto &o : _observers)
- {
- o->handleEnd(executor, op_seq, backend);
- }
-}
-
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/ExecutionObservee.h b/runtime/neurun/core/src/exec/ExecutionObservee.h
deleted file mode 100644
index dafeef55b..000000000
--- a/runtime/neurun/core/src/exec/ExecutionObservee.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_EXEC_EXECUTION_OBSERVEE_H__
-#define __NEURUN_EXEC_EXECUTION_OBSERVEE_H__
-
-#include <list>
-
-#include "exec/ExecutionObservers.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-/**
- * @brief Class that
- *
- */
-class ExecutionObservee
-{
-public:
- /**
- * @brief Register an observer
- *
- * @param observer Observer to be added
- */
- void add(std::unique_ptr<IExecutionObserver> observer);
- void notifyModelBegin(IExecutor *executor);
- void notifyModelEnd(IExecutor *executor);
- void notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq,
- const backend::Backend *backend);
- void notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq,
- const backend::Backend *backend);
-
-private:
- std::list<std::unique_ptr<IExecutionObserver>> _observers;
-};
-
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_EXECUTION_OBSERVEE__
diff --git a/runtime/neurun/core/src/exec/ExecutionObservers.cc b/runtime/neurun/core/src/exec/ExecutionObservers.cc
deleted file mode 100644
index 071a9e228..000000000
--- a/runtime/neurun/core/src/exec/ExecutionObservers.cc
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "exec/ExecutionObservers.h"
-
-#include <string>
-
-#include "util/logging.h"
-#include "ir/operation/Permute.h"
-#include "exec/IExecutor.h"
-#include "misc/polymorphic_downcast.h"
-#include "ir/OpSequence.h"
-
-namespace neurun
-{
-
-namespace exec
-{
-
-void ProfileObserver::handleBegin(neurun::exec::IExecutor *, const ir::OpSequence *,
- const neurun::backend::Backend *backend)
-{
- _timer = backend->config()->timer();
- if (_timer == nullptr)
- throw std::runtime_error("To profile backend timer() method must be implemented");
- _timer->handleBegin();
-}
-
-void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq,
- const backend::Backend *backend)
-{
- _timer->handleEnd();
- const auto timer_res = _timer->getTime();
-
- // NOTE This assumes there is just one operation in a op_seq
- auto node = op_seq->operations().at(0).node;
- auto node_name = node->name();
- VERBOSE(ProfileInfo) << "Time for " << node_name << " : " << timer_res << std::endl;
-
- // fill ExecTime:
- bool is_quantized = exec->graph().operands().at(node->getInputs().at(0)).typeInfo().type() ==
- ir::DataType::QUANT8_ASYMM;
-
- uint32_t size = 0;
- for (const auto &input : node->getInputs())
- {
- size += exec->graph().operands().at(input).info().total_size();
- }
- for (const auto &output : node->getOutputs())
- {
- size += exec->graph().operands().at(output).info().total_size();
- }
- if (node_name == "Permute")
- {
- auto *permute_node = nnfw::misc::polymorphic_downcast<const ir::operation::Permute *>(node);
- assert(permute_node != nullptr);
- _et->updatePermuteTime(permute_node->param().input_backend_ctx->backend,
- permute_node->param().output_backend_ctx->backend, is_quantized, size,
- timer_res);
- }
- else
- {
- _et->updateOperationExecTime(backend, node_name, is_quantized, size, timer_res);
- }
-};
-
-ChromeTracingObserver::ChromeTracingObserver(const std::string &filepath)
- : _ofs{filepath, std::ofstream::out}, _recorder{}, _collector{&_recorder}
-{
-}
-
-ChromeTracingObserver::~ChromeTracingObserver() { _recorder.writeToFile(_ofs); }
-
-void ChromeTracingObserver::handleBegin(IExecutor *)
-{
- _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "runtime", "Graph"});
-}
-
-void ChromeTracingObserver::handleBegin(IExecutor *, const ir::OpSequence *op_seq,
- const backend::Backend *backend)
-{
- std::string backend_id = backend->config()->id();
- _collector.onEvent(
- EventCollector::Event{EventCollector::Edge::BEGIN, backend_id, subgraphTag(op_seq)});
-}
-
-void ChromeTracingObserver::handleEnd(IExecutor *, const ir::OpSequence *op_seq,
- const backend::Backend *backend)
-{
- std::string backend_id = backend->config()->id();
- _collector.onEvent(
- EventCollector::Event{EventCollector::Edge::END, backend_id, subgraphTag(op_seq)});
-}
-
-void ChromeTracingObserver::handleEnd(IExecutor *)
-{
- _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, "runtime", "Graph"});
-}
-
-std::string ChromeTracingObserver::subgraphTag(const ir::OpSequence *op_seq)
-{
- if (op_seq->size() == 0)
- return "Empty OpSequence";
-
- auto first_op = op_seq->operations().at(0);
- std::string tag = "$" + std::to_string(first_op.index.value());
- tag += " " + first_op.node->name();
- if (op_seq->size() > 1)
- {
- tag += " (+" + std::to_string(op_seq->size() - 1) + ")";
- }
- return tag;
-}
-
-} // namespace exec
-
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/ExecutorBase.cc b/runtime/neurun/core/src/exec/ExecutorBase.cc
deleted file mode 100644
index 9692c2ba7..000000000
--- a/runtime/neurun/core/src/exec/ExecutorBase.cc
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ExecutorBase.h"
-#include "util/logging.h"
-namespace neurun
-{
-namespace exec
-{
-
-ExecutorBase::ExecutorBase(const ir::Graph &graph,
- const std::shared_ptr<compiler::OperandContext> &operand_context,
- std::unique_ptr<backend::TensorManagerSet> tensor_mgrs)
- : _graph{graph}, _operand_context{operand_context}, _tensor_mgrs{std::move(tensor_mgrs)},
- _mutex()
-{
- // DO NOTHING
-}
-
-std::unique_ptr<ISource> ExecutorBase::source(const ir::IOIndex &index, const ir::TypeInfo &type,
- const void *buffer, size_t length,
- ir::Layout io_layout)
-{
- using ir::DataType;
- switch (type.type())
- {
- case DataType::FLOAT32:
- return source<float>(index, buffer, length, io_layout);
- case DataType::INT32:
- return source<int32_t>(index, buffer, length, io_layout);
- case DataType::UINT32:
- return source<uint32_t>(index, buffer, length, io_layout);
- case DataType::BOOL8:
- case DataType::QUANT8_ASYMM:
- case DataType::UINT8:
- return source<uint8_t>(index, buffer, length, io_layout);
- case DataType::QUANT8_SYMM:
- return source<int8_t>(index, buffer, length, io_layout);
- default:
- throw std::runtime_error("Not supported yet");
- }
-}
-
-std::unique_ptr<ISink> ExecutorBase::sink(const ir::IOIndex &index, const ir::TypeInfo &type,
- void *buffer, size_t length, ir::Layout io_layout)
-{
- using ir::DataType;
- switch (type.type())
- {
- case DataType::FLOAT32:
- return sink<float>(index, buffer, length, io_layout);
- case DataType::INT32:
- return sink<int32_t>(index, buffer, length, io_layout);
- case DataType::UINT32:
- return sink<uint32_t>(index, buffer, length, io_layout);
- case DataType::BOOL8:
- case DataType::QUANT8_ASYMM:
- case DataType::UINT8:
- return sink<uint8_t>(index, buffer, length, io_layout);
- case DataType::QUANT8_SYMM:
- return sink<int8_t>(index, buffer, length, io_layout);
- default:
- throw std::runtime_error("Not supported yet");
- }
-}
-
-void ExecutorBase::execute(const IODescription &desc)
-{
- // For thread-safe, use mutex
- // TODO: if all used backends on this executor are thread-safe,
- // do not need to use mutex (otherwise, use mutex)
- std::lock_guard<std::mutex> lock(_mutex);
-
- std::vector<std::unique_ptr<ISource>> sources{_graph.getInputs().size()};
- std::vector<std::unique_ptr<ISink>> sinks{_graph.getOutputs().size()};
-
- // Set input(s)
- for (uint32_t n = 0; n < _graph.getInputs().size(); ++n)
- {
- ir::IOIndex input_index{n};
- ir::OperandIndex index{_graph.getInputs().at(input_index)};
-
- if (desc.inputs.at(n) == nullptr)
- {
- // Optional input
- continue;
- }
-
- const auto operand_li = _graph.getLowerInfo()->operand.at(index).get();
- if (operand_li->def_factors().empty())
- {
- // This input is not used (i.e. constant, EX. reshape's axis)
- continue;
- }
-
- const auto &input = *desc.inputs.at(n);
- sources.at(n) =
- source(input_index, input.info.typeInfo(), input.buffer, input.size, input.layout);
-
- auto setter = [&](::neurun::backend::operand::ITensor &tensor) { sources.at(n)->push(tensor); };
-
- auto object = _operand_context->at(index);
-
- object->access(setter);
- }
-
- executeImpl();
-
- // Get output(s)
- for (uint32_t n = 0; n < _graph.getOutputs().size(); ++n)
- {
- ir::IOIndex output_index{n};
- // Optional output
- if (desc.outputs.at(n) == nullptr)
- {
- continue;
- }
- const auto &output = *desc.outputs.at(n);
- sinks.at(n) =
- sink(output_index, output.info.typeInfo(), output.buffer, output.size, output.layout);
-
- auto getter = [&](::neurun::backend::operand::ITensor &tensor) { sinks.at(n)->pull(tensor); };
-
- ir::OperandIndex index{_graph.getOutputs().at(output_index)};
- auto object = _operand_context->at(index);
-
- object->access(getter);
- }
-}
-
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/ExecutorBase.h b/runtime/neurun/core/src/exec/ExecutorBase.h
deleted file mode 100644
index a93e036a5..000000000
--- a/runtime/neurun/core/src/exec/ExecutorBase.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_EXEC_EXECUTOR_BASE_H__
-#define __NEURUN_EXEC_EXECUTOR_BASE_H__
-
-#include <mutex>
-
-#include "Source.h"
-#include "exec/ExecutionObservers.h"
-#include "Sink.h"
-#include "exec/IExecutor.h"
-#include "ir/Graph.h"
-#include "ir/LowerInfoMap.h"
-#include "backend/IConfig.h"
-#include "backend/Backend.h"
-#include "compiler/OperandContext.h"
-#include "backend/ExecTime.h"
-#include "exec/IFunction.h"
-#include "backend/ITensorManager.h"
-#include "exec/ExecutionObservee.h"
-#include <list>
-
-namespace neurun
-{
-namespace exec
-{
-
-class ExecutorBase : public IExecutor
-{
-public:
- ExecutorBase(const ir::Graph &graph,
- const std::shared_ptr<compiler::OperandContext> &operand_context,
- std::unique_ptr<backend::TensorManagerSet> tensor_mgrs);
-
- virtual ~ExecutorBase() = default;
-
- const ir::Graph &graph() final { return _graph; }
-
- void execute(const IODescription &desc) final;
-
- // Used only in Dataflow and Parallel Executors
- void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>> ranks) final
- {
- _indexed_ranks = std::move(ranks);
- };
-
- virtual void executeImpl(void) = 0;
-
- void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); };
-
-private:
- std::unique_ptr<ISource> source(const ir::IOIndex &index, const ir::TypeInfo &type,
- const void *buffer, size_t length, ir::Layout io_layout);
- std::unique_ptr<ISink> sink(const ir::IOIndex &index, const ir::TypeInfo &type, void *buffer,
- size_t length, ir::Layout io_layout);
-
- template <typename T>
- std::unique_ptr<ISource> source(const ir::IOIndex &index, const void *buffer, size_t length,
- ir::Layout io_layout)
- {
- const auto operand_index = _graph.getInputs().at(index);
- const auto &operand = _graph.operands().at(operand_index);
-
- const auto tensor = _operand_context->at(operand_index);
- const auto tensor_layout = tensor->layout();
-
- if (((io_layout == ir::Layout::NHWC) && (tensor_layout == ir::Layout::NCHW)) ||
- ((io_layout == ir::Layout::NCHW) && (tensor_layout == ir::Layout::NHWC)))
- {
- return nnfw::cpp14::make_unique<PermutateSource<T>>(buffer, length, operand.shape(),
- io_layout);
- }
- // TODO Change this to return error
- assert(io_layout != ir::Layout::UNKNOWN ||
- (tensor_layout != ir::Layout::NCHW && tensor_layout != ir::Layout::NCHW));
-
- return nnfw::cpp14::make_unique<CopySource<T>>(buffer, length, operand.shape());
- }
-
- template <typename T>
- std::unique_ptr<ISink> sink(const ir::IOIndex &index, void *buffer, size_t length,
- ir::Layout io_layout)
- {
- const auto operand_index = _graph.getOutputs().at(index);
- const auto &operand = _graph.operands().at(operand_index);
- const auto tensor = _operand_context->at(operand_index);
- const auto tensor_layout = tensor->layout();
-
- if (((tensor_layout == ir::Layout::NCHW) && (io_layout == ir::Layout::NHWC)) ||
- ((tensor_layout == ir::Layout::NHWC) && (io_layout == ir::Layout::NCHW)))
- {
- return nnfw::cpp14::make_unique<PermutateSink<T>>(buffer, length, operand.shape(), io_layout);
- }
- // TODO Change this to return error
- assert(io_layout != ir::Layout::UNKNOWN ||
- (tensor_layout != ir::Layout::NCHW && tensor_layout != ir::Layout::NCHW));
-
- return nnfw::cpp14::make_unique<CopySink<T>>(buffer, length, operand.shape());
- }
-
-protected:
- ExecutionObservee _subject;
- std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
- const ir::Graph &_graph;
- std::shared_ptr<compiler::OperandContext> _operand_context;
- std::unique_ptr<backend::TensorManagerSet> _tensor_mgrs;
- std::mutex _mutex;
-};
-
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_EXECUTOR_BASE_H__
diff --git a/runtime/neurun/core/src/exec/FunctionSequence.cc b/runtime/neurun/core/src/exec/FunctionSequence.cc
deleted file mode 100644
index 00214fcfa..000000000
--- a/runtime/neurun/core/src/exec/FunctionSequence.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "FunctionSequence.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-void FunctionSequence::run()
-{
- for (const auto &function : _functions)
- {
- function->run();
- }
-}
-
-void FunctionSequence::runSync()
-{
- for (const auto &function : _functions)
- {
- function->runSync();
- }
-}
-
-void FunctionSequence::prepare()
-{
- for (const auto &function : _functions)
- {
- function->prepare();
- }
-}
-
-void FunctionSequence::append(std::unique_ptr<IFunction> &&function)
-{
- _functions.push_back(std::move(function));
-}
-
-void FunctionSequence::iterate(const std::function<void(IFunction &)> &fn)
-{
- for (const auto &func : _functions)
- {
- fn(*func);
- }
-}
-
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/FunctionSequence.h b/runtime/neurun/core/src/exec/FunctionSequence.h
deleted file mode 100644
index 2ba5c0b08..000000000
--- a/runtime/neurun/core/src/exec/FunctionSequence.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_EXEC_FUNCTION_SEQUENCE_H__
-#define __NEURUN_EXEC_FUNCTION_SEQUENCE_H__
-
-#include <memory>
-#include <vector>
-#include <functional>
-
-#include "exec/IFunction.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-class FunctionSequence : public IFunction
-{
-public:
- virtual ~FunctionSequence() = default;
-
- void run() override;
- void runSync() override;
- void prepare() override;
-
- /**
- * @brief Appends an IFunction object to the function sequence
- *
- * @param function IFunction object to be appended
- */
- void append(std::unique_ptr<IFunction> &&function);
-
- void iterate(const std::function<void(IFunction &)> &fn);
-
-private:
- std::vector<std::unique_ptr<IFunction>> _functions;
-};
-
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_FUNCTION_SEQUENCE_H__
diff --git a/runtime/neurun/core/src/exec/Job.cc b/runtime/neurun/core/src/exec/Job.cc
deleted file mode 100644
index ba02daf30..000000000
--- a/runtime/neurun/core/src/exec/Job.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Job.h"
-
-#include <cassert>
-
-#include "util/logging.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-Job::Job(uint32_t index, IFunction *fn) : _index{index}, _fn{fn} {}
-
-void Job::run() { _fn->run(); }
-
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/Job.h b/runtime/neurun/core/src/exec/Job.h
deleted file mode 100644
index 1516b9281..000000000
--- a/runtime/neurun/core/src/exec/Job.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_EXEC_JOB_H__
-#define __NEURUN_EXEC_JOB_H__
-
-#include <unordered_set>
-
-#include "exec/IFunction.h"
-#include "ir/Index.h"
-#include "ir/OperandIndexSequence.h"
-#include "backend/Backend.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-class Job
-{
-public:
- /**
- * @brief Constructs a Job object
- *
- * @param index Operation index for this job
- * @param fn compiled code to run this job
- * @param inputs Input operand list
- * @param outputs Output operand list
- */
- Job(uint32_t index, IFunction *fn);
- /**
- * @brief Execute the compiled code
- */
- void run();
- /**
- * @brief Return job index
- *
- * @return Job index
- */
- uint32_t index() const { return _index; }
- /**
- * @brief Return the function to be executed
- *
- * @return Pointer of the function
- */
- IFunction *fn() { return _fn; }
-
-private:
- uint32_t _index;
- IFunction *_fn;
-};
-
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_JOB_H__
diff --git a/runtime/neurun/core/src/exec/LinearExecutor.cc b/runtime/neurun/core/src/exec/LinearExecutor.cc
deleted file mode 100644
index d41dba880..000000000
--- a/runtime/neurun/core/src/exec/LinearExecutor.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LinearExecutor.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-void LinearExecutor::executeImpl()
-{
- _subject.notifyModelBegin(this);
- for (auto &&code : _code)
- {
- const auto op_seq = code.elem.op_seq;
- const auto backend = code.elem.lower_info->backend();
- _subject.notifyJobBegin(this, op_seq, backend);
- code.fn->run();
- _subject.notifyJobEnd(this, op_seq, backend);
- }
- _subject.notifyModelEnd(this);
-}
-
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/LinearExecutor.h b/runtime/neurun/core/src/exec/LinearExecutor.h
deleted file mode 100644
index baf063a12..000000000
--- a/runtime/neurun/core/src/exec/LinearExecutor.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file LinearExecutor.h
- * @brief This file contains LinearExecutor class to define and run execution phase
- */
-
-#ifndef __NEURUN_EXEC_EXECUTOR_H_
-#define __NEURUN_EXEC_EXECUTOR_H_
-
-#include "ExecutorBase.h"
-#include "compiler/Linear.h"
-#include "exec/FunctionSequence.h"
-#include "compiler/CodeWithInfo.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-/**
- * @brief Class to handle execution phase. Simple run the sequence of operations that is sorted in
- * topological order
- */
-class LinearExecutor final : public ExecutorBase
-{
-public:
- /**
- * @brief Construct a new LinearExecutor object
- * @param[in] plan Execution plan generated by compiled result
- */
- LinearExecutor(const ir::Graph &graph,
- const std::shared_ptr<compiler::OperandContext> &operand_context,
- std::unique_ptr<backend::TensorManagerSet> tensor_mgrs,
- std::vector<compiler::CodeWithInfo> &&code)
- : ExecutorBase{graph, operand_context, std::move(tensor_mgrs)}, _code{std::move(code)}
- {
- }
-
-public:
- void executeImpl(void) override;
-
-private:
- std::vector<compiler::CodeWithInfo> _code;
-};
-
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_EXECUTOR_H_
diff --git a/runtime/neurun/core/src/exec/ParallelExecutor.cc b/runtime/neurun/core/src/exec/ParallelExecutor.cc
deleted file mode 100644
index c73c353d3..000000000
--- a/runtime/neurun/core/src/exec/ParallelExecutor.cc
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ParallelExecutor.h"
-
-#include <cassert>
-
-#include "util/logging.h"
-#include "exec/IFunction.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-class HookFunction : public IFunction
-{
-public:
- HookFunction(IFunction *fn, const std::function<void()> &setup,
- const std::function<void()> &teardown)
- : _fn{fn}, _setup{setup}, _teardown{teardown}
- {
- }
-
-public:
- void run() override
- {
- _setup();
- _fn->run();
- _teardown();
- }
- void runSync() override { throw("runSync is needed just for profiling in Dataflow executor"); }
-
-private:
- IFunction *_fn;
- std::function<void()> _setup;
- std::function<void()> _teardown;
-};
-
-void ParallelExecutor::notify(uint32_t finished_job_id)
-{
- std::unique_lock<std::mutex> lock{_mu_jobs};
-
- DataflowExecutor::notify(finished_job_id);
-
- lock.unlock();
- _cv_jobs.notify_all();
-}
-
-ParallelExecutor::ParallelExecutor(const ir::Graph &graph,
- const std::shared_ptr<compiler::OperandContext> &operand_context,
- std::unique_ptr<backend::TensorManagerSet> tensor_mgrs,
- CodeMap &&code_map)
- : DataflowExecutor{graph, operand_context, std::move(tensor_mgrs), std::move(code_map)}
-{
- VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl;
-}
-
-void ParallelExecutor::executeImpl()
-{
- // Init scheduler
- // TODO Consider to have distinct backend set in LowerInfoMap
- ir::BackendSet backends;
- for (auto &itr : _graph.getLowerInfo()->operation)
- {
- backends.add(itr.second->backend());
- }
- _scheduler = nnfw::cpp14::make_unique<ParallelScheduler>(backends);
-
- assert(noWaitingJobs());
-
- // Execution setup
- _waiting_jobs.swap(_finished_jobs); // Move finished jobs to waiting jobs
-
- for (uint32_t i = 0; i < _waiting_jobs.size(); ++i)
- {
- VERBOSE(ParallelExecutor) << i << ": " << _input_info[i] << std::endl;
- if (_input_info[i] == 0)
- {
- emplaceToReadyJobs(i);
- }
- }
- assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs
-
- VERBOSE(ParallelExecutor) << "INITIAL JOBS : " << _ready_jobs.size() << std::endl;
-
- _subject.notifyModelBegin(this);
- while (true)
- {
- std::unique_lock<std::mutex> lock{_mu_jobs};
-
- if (_ready_jobs.empty())
- {
- _cv_jobs.wait(lock, [this] { return !_ready_jobs.empty() || noWaitingJobs(); });
- // Check finish condition
- if (_ready_jobs.empty() && noWaitingJobs())
- {
- break;
- }
- }
-
- auto job = std::move(_ready_jobs.begin()->second);
- _ready_jobs.erase(_ready_jobs.begin());
-
- lock.unlock();
-
- VERBOSE(ParallelExecutor) << "Assigning fn #" << job->index() << std::endl;
-
- auto job_index = job->index();
- auto subgraph_index = _job_to_op_seq[job_index];
- auto op_seq = &_graph.subgraphs()->at(subgraph_index);
- auto backend = _graph.getLowerInfo()->operation.at(subgraph_index)->backend();
- auto setup = [&, op_seq, backend]() { _subject.notifyJobBegin(this, op_seq, backend); };
- auto teardown = [&, job_index, op_seq, backend]() {
- _subject.notifyJobEnd(this, op_seq, backend);
- notify(job_index);
- };
-
- _scheduler->assign(nnfw::cpp14::make_unique<HookFunction>(job->fn(), setup, teardown), backend);
- _finished_jobs[job_index] = std::move(job);
- }
-
- assert(noWaitingJobs());
-
- // Wait for all the jobs done
- _scheduler->finish();
- _subject.notifyModelEnd(this);
-
- // Reset input info for the next execution
- _input_info = _initial_input_info;
-}
-
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/ParallelExecutor.h b/runtime/neurun/core/src/exec/ParallelExecutor.h
deleted file mode 100644
index 54377fd9e..000000000
--- a/runtime/neurun/core/src/exec/ParallelExecutor.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_EXEC_PARALLEL_EXECUTOR_H__
-#define __NEURUN_EXEC_PARALLEL_EXECUTOR_H__
-
-#include <list>
-#include <queue>
-#include <unordered_map>
-
-#include "FunctionSequence.h"
-#include "Job.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/Index.h"
-#include "cpp14/memory.h"
-#include "exec/DataflowExecutor.h"
-#include "ParallelScheduler.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-/**
- * @brief Class to execute Graph in parallel
- */
-class ParallelExecutor : public DataflowExecutor
-{
-protected:
- void notify(uint32_t finished_job_id) override;
-
-public:
- /**
- * @brief Constructs a ParallelExecutor object
- *
- * @param graph Graph object
- * @param operand_context (Only for input/output operand data access)
- * @param code_map Compiled code map
- * @param ranks Operation ranks for ordering execution
- */
- ParallelExecutor(const ir::Graph &graph,
- const std::shared_ptr<compiler::OperandContext> &operand_context,
- std::unique_ptr<backend::TensorManagerSet> tensor_mgrs, CodeMap &&code_map);
-
- void executeImpl() override;
-
-private:
- std::condition_variable _cv_jobs;
- std::mutex _mu_jobs;
- std::unique_ptr<ParallelScheduler> _scheduler;
-};
-
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_PARALLEL_EXECUTOR_H__
diff --git a/runtime/neurun/core/src/exec/ParallelScheduler.cc b/runtime/neurun/core/src/exec/ParallelScheduler.cc
deleted file mode 100644
index 5f9e9e013..000000000
--- a/runtime/neurun/core/src/exec/ParallelScheduler.cc
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ParallelScheduler.h"
-
-#include <cassert>
-
-#include "cpp14/memory.h"
-#include "util/logging.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-ParallelScheduler::ParallelScheduler(const ir::BackendSet &backends)
-{
- assert(!backends.empty());
-
- for (auto backend : backends)
- {
- _thread_pools[backend] = nnfw::cpp14::make_unique<ThreadPool>();
- }
-}
-
-void ParallelScheduler::assign(std::unique_ptr<IFunction> &&fn, const backend::Backend *backend)
-{
- assert(!_thread_pools.empty());
-
- _thread_pools.at(backend)->enqueue(std::move(fn));
-}
-
-void ParallelScheduler::finish()
-{
- for (auto &itr : _thread_pools)
- {
- itr.second->finish();
- }
-}
-
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/ParallelScheduler.h b/runtime/neurun/core/src/exec/ParallelScheduler.h
deleted file mode 100644
index af1103750..000000000
--- a/runtime/neurun/core/src/exec/ParallelScheduler.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_EXEC_PARALLEL_SCHEDULER_H__
-#define __NEURUN_EXEC_PARALLEL_SCHEDULER_H__
-
-#include <unordered_map>
-#include <memory>
-
-#include "exec/IFunction.h"
-#include "ir/BackendSet.h"
-#include "ThreadPool.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-class ParallelScheduler
-{
-public:
- /**
- * @brief Constructs ParallelScheduler object
- *
- * @param backends Backend set
- */
- ParallelScheduler(const ir::BackendSet &backends);
- /**
- * @brief Assign a task to the given backend
- *
- * @param[in] fn Function to be assigned
- * @param[in] fn Target backend
- */
- void assign(std::unique_ptr<IFunction> &&fn, const backend::Backend *backend);
- /**
- * @brief Block until all jobs are finished
- */
- void finish();
-
-private:
- std::unordered_map<const backend::Backend *, std::unique_ptr<ThreadPool>> _thread_pools;
-};
-
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_PARALLEL_SCHEDULER_H__
diff --git a/runtime/neurun/core/src/exec/Sink.h b/runtime/neurun/core/src/exec/Sink.h
deleted file mode 100644
index bb2a6c58a..000000000
--- a/runtime/neurun/core/src/exec/Sink.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_EXEC_SINK_H__
-#define __NEURUN_EXEC_SINK_H__
-
-#include <cassert>
-
-#include "cpp14/memory.h"
-#include "util/feature/nchw/Reader.h"
-#include "util/feature/nchw/View.h"
-#include "util/feature/nhwc/Reader.h"
-#include "util/feature/nhwc/View.h"
-#include "util/Utils.h"
-#include <misc/feature/IndexIterator.h>
-
-namespace neurun
-{
-namespace exec
-{
-struct ISink
-{
- virtual ~ISink() = default;
-
- virtual void pull(::neurun::backend::operand::ITensor &tensor) const = 0;
-};
-
-// Create second lever inheritance: the first lever is used as a reference type in use-case places
-template <typename T> class ITemplSink : public ISink
-{
-public:
- ITemplSink(void *output_buffer, const size_t &output_size, const ir::Shape &shape,
- const bool copy, ir::Layout io_layout)
- : _output_buffer{reinterpret_cast<T *>(output_buffer)}, _output_size{output_size},
- _shape{shape}, _copy{copy}, _io_layout{io_layout}
- {
- }
-
-protected:
- void pullUnif(neurun::backend::operand::ITensor &tensor) const
- {
- assert(((_io_layout == ir::Layout::NHWC && tensor.layout() == ir::Layout::NCHW) ||
- (_io_layout == ir::Layout::NCHW && tensor.layout() == ir::Layout::NHWC)) ||
- _copy);
- auto input_buffer = tensor.buffer();
- auto rank = _shape.rank();
-
- if (!tensor.has_padding() && rank < 4 + _copy)
- {
- memcpy(_output_buffer, input_buffer, _output_size);
- return;
- }
-
- switch (rank)
- {
- case 0:
- case 1:
- {
- memcpy(_output_buffer, input_buffer, _output_size);
- break;
- }
- case 2:
- {
- const int32_t copy_len = _shape.dim(1);
-
- for (auto i = 0; i < _shape.dim(0); ++i)
- {
- neurun::util::Coordinates coords{i, 0};
- memcpy(_output_buffer + i * copy_len, input_buffer + tensor.calcOffset(coords),
- copy_len * sizeof(T));
- }
- break;
- }
- case 3:
- {
- const int32_t dim1 = _shape.dim(1);
- const int32_t dim2 = _shape.dim(2);
-
- for (auto i = 0; i < _shape.dim(0); ++i)
- {
- for (auto j = 0; j < _shape.dim(1); ++j)
- {
- neurun::util::Coordinates coords{i, j, 0};
- memcpy(_output_buffer + i * dim1 * dim2 + j * dim2,
- input_buffer + tensor.calcOffset(coords), dim2 * sizeof(T));
- }
- }
- break;
- }
- case 4:
- {
- if (_copy)
- {
- const int32_t dim1 = _shape.dim(1);
- const int32_t dim2 = _shape.dim(2);
- const int32_t dim3 = _shape.dim(3);
-
- for (auto i = 0; i < _shape.dim(0); ++i)
- {
- for (auto j = 0; j < _shape.dim(1); ++j)
- {
- for (auto k = 0; k < _shape.dim(2); ++k)
- {
- neurun::util::Coordinates coords{i, j, k, 0};
- memcpy(_output_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3,
- input_buffer + tensor.calcOffset(coords), dim3 * sizeof(T));
- }
- }
- }
- }
- else
- {
- const auto shape = _shape.asFeature(_io_layout);
-
- if (_io_layout == ir::Layout::NHWC)
- {
- const util::feature::nchw::Reader<T> from(&tensor);
- util::feature::nhwc::View<T> into(shape, _output_buffer, _output_size);
- ::nnfw::misc::feature::iterate(shape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, ch, row, col);
- into.at(batch, row, col, ch) = value;
- };
- }
- else if (_io_layout == ir::Layout::NCHW)
- {
- const util::feature::nhwc::Reader<T> from(&tensor);
- util::feature::nchw::View<T> into(shape, _output_buffer, _output_size);
- ::nnfw::misc::feature::iterate(shape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, row, col, ch);
- into.at(batch, ch, row, col) = value;
- };
- }
- else
- {
- throw std::runtime_error("Wrong Layout");
- }
- }
- break;
- }
- default:
- throw std::runtime_error("NYI");
- break;
- }
- }
-
-private:
- T *_output_buffer;
- const size_t _output_size;
- const ir::Shape _shape;
- const bool _copy;
- const ir::Layout _io_layout;
-};
-
-template <typename T> class PermutateSink final : public ITemplSink<T>
-{
-public:
- PermutateSink(void *output_buffer, const size_t &output_size, const ir::Shape &shape,
- ir::Layout io_layout)
- : ITemplSink<T>(output_buffer, output_size, shape, false, io_layout)
- {
- }
-
-public:
- void pull(neurun::backend::operand::ITensor &tensor) const override
- {
- ITemplSink<T>::pullUnif(tensor);
- }
-};
-
-// Only supports NHWC format front-end(NNAPI) now
-template <typename T> class CopySink final : public ITemplSink<T>
-{
-public:
- CopySink(void *output_buffer, const size_t &output_size, const ir::Shape &shape,
- ir::Layout io_layout = ir::Layout::UNKNOWN)
- : ITemplSink<T>(output_buffer, output_size, shape, true, io_layout)
- {
- }
-
-public:
- void pull(neurun::backend::operand::ITensor &tensor) const override
- {
- ITemplSink<T>::pullUnif(tensor);
- }
-};
-
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_SINK_H__
diff --git a/runtime/neurun/core/src/exec/Source.h b/runtime/neurun/core/src/exec/Source.h
deleted file mode 100644
index fd52dd546..000000000
--- a/runtime/neurun/core/src/exec/Source.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_EXEC_SOURCE_H__
-#define __NEURUN_EXEC_SOURCE_H__
-
-#include <cassert>
-
-#include "cpp14/memory.h"
-#include "util/feature/nchw/Reader.h"
-#include "util/feature/nchw/View.h"
-#include "util/feature/nhwc/Reader.h"
-#include "util/feature/nhwc/View.h"
-#include "util/Utils.h"
-#include <misc/feature/IndexIterator.h>
-#include <ir/Layout.h>
-#include "ir/Shape.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-struct ISource
-{
- virtual ~ISource() = default;
-
- virtual void push(::neurun::backend::operand::ITensor &tensor) const = 0;
-};
-
-// Create second lever inheritance: the first lever is used as a reference type in use-case places
-template <typename T> class ITemplSource : public ISource
-{
-public:
- ITemplSource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape,
- const bool copy, ir::Layout io_layout)
- : _input_buffer{reinterpret_cast<const T *>(input_buffer)}, _input_size{input_size},
- _shape{shape}, _copy(copy), _io_layout{io_layout}
- {
- }
-
- virtual void push(::neurun::backend::operand::ITensor &tensor) const = 0;
-
-protected:
- void pushUnif(neurun::backend::operand::ITensor &tensor) const
- {
- assert(((_io_layout == ir::Layout::NHWC && tensor.layout() == ir::Layout::NCHW) ||
- (_io_layout == ir::Layout::NCHW && tensor.layout() == ir::Layout::NHWC)) ||
- _copy);
- auto output_buffer = tensor.buffer();
- auto rank = _shape.rank();
-
- if (!tensor.has_padding() && rank < 4 + _copy)
- {
- memcpy(output_buffer, _input_buffer, _input_size);
- return;
- }
-
- switch (rank)
- {
- case 0:
- case 1:
- {
- memcpy(output_buffer, _input_buffer, _input_size);
- break;
- }
- case 2:
- {
- const int32_t copy_len = _shape.dim(1);
-
- for (auto i = 0; i < _shape.dim(0); ++i)
- {
- neurun::util::Coordinates coords{i, 0};
- memcpy(output_buffer + tensor.calcOffset(coords), _input_buffer + i * copy_len,
- copy_len * sizeof(T));
- }
- break;
- }
- case 3:
- {
- const int32_t dim1 = _shape.dim(1);
- const int32_t dim2 = _shape.dim(2);
-
- for (auto i = 0; i < _shape.dim(0); ++i)
- {
- for (auto j = 0; j < _shape.dim(1); ++j)
- {
- neurun::util::Coordinates coords{i, j, 0};
- memcpy(output_buffer + tensor.calcOffset(coords),
- _input_buffer + i * dim1 * dim2 + j * dim2, dim2 * sizeof(T));
- }
- }
- break;
- }
- case 4:
- {
- if (_copy)
- {
- const int32_t dim1 = _shape.dim(1);
- const int32_t dim2 = _shape.dim(2);
- const int32_t dim3 = _shape.dim(3);
- for (auto i = 0; i < _shape.dim(0); ++i)
- {
- for (auto j = 0; j < _shape.dim(1); ++j)
- {
- for (auto k = 0; k < _shape.dim(2); ++k)
- {
- neurun::util::Coordinates coords{i, j, k, 0};
- memcpy(output_buffer + tensor.calcOffset(coords),
- _input_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3,
- dim3 * sizeof(T));
- }
- }
- }
- }
- else
- {
- const auto shape = _shape.asFeature(_io_layout);
-
- if (_io_layout == ir::Layout::NCHW)
- {
- const util::feature::nchw::Reader<T> from(shape, _input_buffer, _input_size);
- util::feature::nhwc::View<T> into(&tensor);
- ::nnfw::misc::feature::iterate(shape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, ch, row, col);
- into.at(batch, row, col, ch) = value;
- };
- }
- else if (_io_layout == ir::Layout::NHWC)
- {
- const util::feature::nhwc::Reader<T> from(shape, _input_buffer, _input_size);
- util::feature::nchw::View<T> into(&tensor);
- ::nnfw::misc::feature::iterate(shape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, row, col, ch);
- into.at(batch, ch, row, col) = value;
- };
- }
- else
- {
- throw std::runtime_error("Wrong Layout");
- }
- }
-
- break;
- }
- default:
- throw std::runtime_error("NYI");
- break;
- }
- }
-
-private:
- const T *_input_buffer;
- const size_t _input_size;
- const ir::Shape _shape;
- const bool _copy;
- const ir::Layout _io_layout;
-};
-
-template <typename T> class PermutateSource final : public ITemplSource<T>
-{
-public:
- PermutateSource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape,
- ir::Layout io_layout)
- : ITemplSource<T>(input_buffer, input_size, shape, false, io_layout)
- {
- }
-
-public:
- void push(neurun::backend::operand::ITensor &tensor) const override
- {
- // do NHWC_TO_NCHW or NCHW_TO_NHWC permutation
- ITemplSource<T>::pushUnif(tensor);
- }
-};
-
-template <typename T> class CopySource final : public ITemplSource<T>
-{
-public:
- CopySource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape,
- ir::Layout io_layout = ir::Layout::UNKNOWN)
- : ITemplSource<T>(input_buffer, input_size, shape, true, io_layout)
- {
- }
-
-public:
- void push(neurun::backend::operand::ITensor &tensor) const override
- {
- ITemplSource<T>::pushUnif(tensor);
- }
-};
-
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_SOURCE_H__
diff --git a/runtime/neurun/core/src/exec/ThreadPool.cc b/runtime/neurun/core/src/exec/ThreadPool.cc
deleted file mode 100644
index d8c706e30..000000000
--- a/runtime/neurun/core/src/exec/ThreadPool.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ThreadPool.h"
-
-#include <cassert>
-
-namespace neurun
-{
-namespace exec
-{
-
-ThreadPool::ThreadPool(uint32_t num_threads)
-{
- assert(num_threads >= 1);
-
- for (uint32_t i = 0; i < num_threads; i++)
- {
- _threads.emplace_back(std::ref(_worker));
- }
-}
-
-ThreadPool::~ThreadPool()
-{
- if (!_threads.empty())
- {
- _worker.terminate();
- join();
- }
-}
-
-void ThreadPool::enqueue(std::unique_ptr<IFunction> &&fn) { _worker.enqueue(std::move(fn)); }
-
-uint32_t ThreadPool::numJobsInQueue() { return _worker.numJobsInQueue(); }
-
-void ThreadPool::join()
-{
- for (auto &thread : _threads)
- {
- thread.join();
- }
- _threads.clear();
-}
-
-void ThreadPool::finish()
-{
- _worker.finish();
- join();
-}
-
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/ThreadPool.h b/runtime/neurun/core/src/exec/ThreadPool.h
deleted file mode 100644
index a1a027617..000000000
--- a/runtime/neurun/core/src/exec/ThreadPool.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_EXEC_THREAD_POOL_H__
-#define __NEURUN_EXEC_THREAD_POOL_H__
-
-#include <thread>
-#include <memory>
-#include <vector>
-
-#include "WorkQueue.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-class ThreadPool
-{
-public:
- /**
- * @brief Coustruct ThreadPool object
- *
- * @param num_threads Number of threads
- */
- ThreadPool(uint32_t num_threads = 1);
- /**
- * @brief Destroy ThreadPool object
- */
- ~ThreadPool();
- /**
- * @brief Enqueue a function
- *
- * @param fn A function to be queued
- */
- void enqueue(std::unique_ptr<IFunction> &&fn);
- /**
- * @brief Get number of jobs in worker's queue
- *
- * @return Number of jobs
- */
- uint32_t numJobsInQueue();
-
- /**
- * @brief Block until all jobs are finished
- */
- void finish();
-
-private:
- void join();
-
-private:
- WorkQueue _worker;
- std::vector<std::thread> _threads;
-};
-
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_THREAD_POOL_H__
diff --git a/runtime/neurun/core/src/exec/WorkQueue.cc b/runtime/neurun/core/src/exec/WorkQueue.cc
deleted file mode 100644
index 6712554ac..000000000
--- a/runtime/neurun/core/src/exec/WorkQueue.cc
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "WorkQueue.h"
-
-#include <cassert>
-
-namespace neurun
-{
-namespace exec
-{
-
-WorkQueue::~WorkQueue()
-{
- {
- std::unique_lock<std::mutex> lock(_mu);
- _state = State::FORCE_FINISHING;
- }
- _cv.notify_all();
-}
-
-void WorkQueue::operator()()
-{
- while (true)
- {
- std::unique_ptr<IFunction> fn = nullptr;
-
- {
- std::unique_lock<std::mutex> lock{_mu};
- _cv.wait(lock, [this] {
- return (_state == State::FORCE_FINISHING) || (_state == State::FINISHING) ||
- (_state == State::ONLINE && !_functions.empty());
- });
-
- if (_state == State::FORCE_FINISHING)
- {
- assert(_functions.empty() && "Terminating with unfinished jobs");
- return;
- }
- else if (_state == State::FINISHING && _functions.empty())
- {
- return;
- }
- else
- {
- assert(((_state == State::FINISHING) || (_state == State::ONLINE)) && !_functions.empty());
- fn = std::move(_functions.front());
- _functions.pop();
- }
- }
-
- assert(fn);
- fn->run();
- }
-}
-
-void WorkQueue::enqueue(std::unique_ptr<IFunction> &&fn)
-{
- {
- std::unique_lock<std::mutex> lock{_mu};
- _functions.emplace(std::move(fn));
- }
- _cv.notify_one();
-}
-
-void WorkQueue::terminate()
-{
- {
- std::unique_lock<std::mutex> lock{_mu};
- _state = State::FORCE_FINISHING;
- }
- _cv.notify_all();
-}
-
-void WorkQueue::finish()
-{
- {
- std::unique_lock<std::mutex> lock{_mu};
- _state = State::FINISHING;
- }
- _cv.notify_all();
-}
-
-uint32_t WorkQueue::numJobsInQueue()
-{
- std::unique_lock<std::mutex> lock{_mu};
- return _functions.size();
-}
-
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/WorkQueue.h b/runtime/neurun/core/src/exec/WorkQueue.h
deleted file mode 100644
index cdbadfb8f..000000000
--- a/runtime/neurun/core/src/exec/WorkQueue.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_EXEC_WORK_QUEUE_H__
-#define __NEURUN_EXEC_WORK_QUEUE_H__
-
-#include <condition_variable>
-#include <memory>
-#include <mutex>
-#include <queue>
-
-#include "exec/IFunction.h"
-
-namespace neurun
-{
-namespace exec
-{
-
-class WorkQueue
-{
-public:
- enum class State
- {
- ONLINE,
- FINISHING,
- FORCE_FINISHING
- };
-
-public:
- /**
- * @brief Create WorkQueue object
- */
- WorkQueue() = default;
- /**
- * @brief Destroy WorkQueue object
- */
- ~WorkQueue();
- /**
- * @brief Thread entry function
- */
- void operator()();
- /**
- * @brief Push the given Task to the job queue
- *
- * @param fn Function to be executed(a job)
- */
- void enqueue(std::unique_ptr<IFunction> &&fn);
- /**
- * @brief Flag as terminating so all the worker threads can terminate
- */
- void terminate();
- /**
- * @brief Flag as terminating so all the worker threads can terminate
- */
- void finish();
- /**
- * @brief Check if it has pending jobs. Even if this returns fals, WorkQueue threads may be still
- * running
- *
- * @return true if the job queue not empty otherwise false
- */
- uint32_t numJobsInQueue();
-
-private:
- State _state{State::ONLINE};
- std::queue<std::unique_ptr<IFunction>> _functions;
- std::mutex _mu;
- std::condition_variable _cv;
-};
-
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_WORK_QUEUE_H__
diff --git a/runtime/neurun/core/src/exec/interp/Buffer.h b/runtime/neurun/core/src/exec/interp/Buffer.h
deleted file mode 100644
index d60b59a2f..000000000
--- a/runtime/neurun/core/src/exec/interp/Buffer.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Buffer.h
- * @brief This file contains Buffer interface and InternalBuffer, ExternalBuffer class
- */
-#ifndef __NEURUN_EXEC_INTERP_BUFFER_H__
-#define __NEURUN_EXEC_INTERP_BUFFER_H__
-
-#include <cpp14/memory.h>
-
-#include "ir/Data.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-
-/**
- * @brief Interface for writable data area
- */
-class Buffer : public ir::Data
-{
-public:
- /**
- * @brief Return writable pointer for data area
- * @return Writable pointer
- */
- virtual uint8_t *baseWritable(void) const = 0;
-};
-
-/**
- * @brief Class for internally allocated data area
- */
-class InternalBuffer final : public Buffer
-{
-public:
- InternalBuffer(size_t size) : _base{nnfw::cpp14::make_unique<uint8_t[]>(size)}, _size{size}
- {
- // DO NOTHING
- }
-
-public:
- size_t size(void) const override { return _size; }
- const uint8_t *base(void) const override { return _base.get(); }
- uint8_t *baseWritable(void) const override { return _base.get(); }
-
-private:
- std::unique_ptr<uint8_t[]> _base;
- size_t _size;
-};
-
-/**
- * @brief Class for data area from outside
- */
-class ExternalBuffer final : public Buffer
-{
-public:
- ExternalBuffer(uint8_t *base, size_t size) : _base{base}, _size{size}
- {
- // DO NOTHING
- }
-
-public:
- size_t size(void) const override { return _size; }
- const uint8_t *base(void) const override { return _base; }
- uint8_t *baseWritable(void) const override { return _base; }
-
-private:
- uint8_t *_base;
- size_t _size;
-};
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_INTERP_BUFFER_H__
diff --git a/runtime/neurun/core/src/exec/interp/ExecEnv.h b/runtime/neurun/core/src/exec/interp/ExecEnv.h
deleted file mode 100644
index 0f7d45e2a..000000000
--- a/runtime/neurun/core/src/exec/interp/ExecEnv.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file ExecEnv.h
- * @brief This file contains ExecEnv to access interpreter tensor and execution status
- */
-#ifndef __NEURUN_EXEC_INTERP_EXEC_ENV_H_
-#define __NEURUN_EXEC_INTERP_EXEC_ENV_H_
-
-#include <unordered_set>
-
-#include "ir/Graph.h"
-#include "Tensor.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-
-/**
- * @brief Class to gather interpreter execution environment
- * Each interpreter instance own execution environment
- */
-class ExecEnv
-{
-public:
- /**
- * @brief Construct a new Exec Env object (deleted)
- */
- ExecEnv(void) = delete;
- /**
- * @brief Construct a new ExecEnv object
- * @param[in] graph Graph to execute by interpreter
- */
- explicit ExecEnv(const ir::Graph &graph) : _graph(graph)
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Return graph to execute
- * @return Graph
- */
- const ir::Graph &graph(void) const { return _graph; }
- /**
- * @brief Assign tensor to environment which have allocated or assigned buffer
- * @param[in] index Tensor index
- * @param[in] tensor Tensor
- */
- void assignTensor(const ir::OperandIndex index, std::shared_ptr<ITensor> tensor)
- {
- assert(tensor->bufferRO() != nullptr);
- _tensors.emplace(index, tensor);
- }
-
- /**
- * @brief Return tensor pointer in environment
- * @param[in] index Tensor index
- * @return Tensor pointer
- */
- const ITensor *tensorAt(const ir::OperandIndex index) const { return _tensors.at(index).get(); }
-
- /**
- * @brief Check environment contains tensor
- * @param[in] index Tensor index
- * @return @c true if environment contain tensor, otherwise @c false
- */
- bool contains(const ir::OperandIndex index) const
- {
- return (_tensors.find(index) != _tensors.end());
- }
-
- /**
- * @brief Allocate tensor using operand info
- * @param[in] index Tensor index
- * @param[in] info Operand info
- * @note If already allocated, just return
- * @TODO More smart allocation policy
- */
- void allocateIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info)
- {
- // already allocated, or constant
- if (contains(index))
- {
- return;
- }
-
- auto tensor = std::make_shared<Tensor>(info);
- tensor->setBuffer(std::make_shared<InternalBuffer>(tensor->total_size()));
- assignTensor(index, tensor);
- _buffers.insert(index);
- }
-
- /**
- * @brief Allocate read-only tensor and share data with other tensor
- * @param[in] index Tensor index
- * @param[in] info Operand info
- * @param[in] index_to_share Tensor index that have data to share
- */
- void allocateAndShareIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info,
- const ir::OperandIndex index_to_share)
- {
- if (!contains(index_to_share))
- {
- throw std::runtime_error{"Cannot find tensor to share data"};
- }
-
- // already allocated
- if (contains(index))
- {
- return;
- }
- else
- {
- auto tensor = std::make_shared<ROTensor>(info);
- tensor->setData(tensorAt(index_to_share)->shareData());
- assignTensor(index, tensor);
- _buffers.insert(index);
- }
- }
-
- /**
- * @brief Free buffer if allocated by allocateIfNeed
- * @param[in] index Tensor index
- * @note If allocated by outside, just return
- */
- void freeIfAllocated(const ir::OperandIndex index)
- {
- if (_buffers.find(index) != _buffers.end())
- {
- _tensors.at(index)->releaseData();
- }
- }
-
-private:
- const ir::Graph &_graph;
- // Tensor map to use in interpreter
- // It should map tensors that have allocated or assigned buffer pointer
- std::unordered_map<ir::OperandIndex, std::shared_ptr<ITensor>> _tensors;
- // Tensors allocated by allocateIfNeed (buffer)
- std::unordered_set<ir::OperandIndex> _buffers;
-};
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_INTERP_EXEC_ENV_H_
diff --git a/runtime/neurun/core/src/exec/interp/ExecManager.cc b/runtime/neurun/core/src/exec/interp/ExecManager.cc
deleted file mode 100644
index 92f182c06..000000000
--- a/runtime/neurun/core/src/exec/interp/ExecManager.cc
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ExecManager.h"
-#include "ExecEnv.h"
-#include "Interpreter.h"
-
-#include "util/logging.h"
-
-#include <cpp14/memory.h>
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-
-void ExecManager::execute(const IODescription &desc)
-{
- /************************************************************************
- * Prepare execution model (submodel)
- It may execute divided model
- but now consider model inference is done at interpreter
- ***********************************************************************/
- ir::OperandIndexMap<std::shared_ptr<ITensor>> tensor_map;
-
- for (uint32_t n = 0; n < _graph.getInputs().size(); n++)
- {
- ir::IOIndex index{n};
- const auto input_index = _graph.getInputs().at(index);
- const auto &input = *desc.inputs.at(n);
-
- auto input_tensor = std::make_shared<ROTensor>(input.info);
- input_tensor->setData(std::make_shared<const ir::ExternalData>(
- reinterpret_cast<const uint8_t *>(input.buffer), input.size));
- tensor_map[input_index] = input_tensor;
- }
-
- for (uint32_t n = 0; n < _graph.getOutputs().size(); n++)
- {
- ir::IOIndex index{n};
- const auto output_index = _graph.getOutputs().at(index);
- const auto &output = *desc.outputs.at(n);
-
- auto output_tensor = std::make_shared<Tensor>(output.info);
- output_tensor->setBuffer(
- std::make_shared<ExternalBuffer>(reinterpret_cast<uint8_t *>(output.buffer), output.size));
- tensor_map[output_index] = output_tensor;
- }
-
- /************************************************************************
- * Prepare execution environment
- Execution environment will be assigned to invoked interpreter instance
- ***********************************************************************/
-
- std::unique_ptr<ExecEnv> interp_env = nnfw::cpp14::make_unique<ExecEnv>(_graph);
-
- // Assign input tensor into interpreter execution environment
- for (auto index : _graph.getInputs())
- {
- if (tensor_map.find(index) != tensor_map.end())
- {
- VERBOSE(INTERPRETER) << "Assign input tensor. operand index:" << index.value() << std::endl;
- interp_env->assignTensor(index, tensor_map.at(index));
- }
- }
-
- // Assign output tensor into interpreter execution environment
- for (auto index : _graph.getOutputs())
- {
- if (tensor_map.find(index) != tensor_map.end())
- {
- VERBOSE(INTERPRETER) << "Assign output tensor. operand index: " << index.value() << std::endl;
- interp_env->assignTensor(index, tensor_map.at(index));
- }
- }
-
- // Allocate constant tensor
- _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
- if (obj.isConstant())
- {
- VERBOSE(INTERPRETER) << "Allocate and assign constant tensor. operand index:" << ind.value()
- << std::endl;
-
- auto const_tensor = std::make_shared<ROTensor>(obj.info());
- // Assume that interpreter's tensor layout is same with model (NHWC)
- const_tensor->setData(
- std::make_shared<ir::ExternalData>(obj.data().base(), obj.info().total_size()));
- interp_env->assignTensor(ind, const_tensor);
- }
- });
-
- /*****************************************************************************
- * Invoke interpreter
- ****************************************************************************/
-
- Interpreter interp(std::move(interp_env));
- interp.run();
-
- /*****************************************************************************
- * Invoked interpreter run is finished
- ****************************************************************************/
-
- // If interpreter execute submodel
- // 1. Get tensor output of submodel into tensor_map to save result
- // 2. Generate new ExecEnv for next interpretation
-}
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/ExecManager.h b/runtime/neurun/core/src/exec/interp/ExecManager.h
deleted file mode 100644
index f952abf02..000000000
--- a/runtime/neurun/core/src/exec/interp/ExecManager.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file ExecManager.h
- * @brief This file contains ExecManager class\n
- * to manage interpreter execution and environment
- */
-#ifndef __NEURUN_EXEC_INTERP_EXEC_MANAGER_H_
-#define __NEURUN_EXEC_INTERP_EXEC_MANAGER_H_
-
-#include "ir/Graph.h"
-#include "exec/IExecutor.h"
-#include "Tensor.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-
-/**
- * @brief Class to execute model using interpreter
- */
-class ExecManager final : public IExecutor
-{
-public:
- explicit ExecManager(const ir::Graph &graph) : _graph(graph)
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Return graph object
- * @return Graph object
- */
- const ir::Graph &graph() final { return _graph; }
- void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) override{
- // Not implemented
- };
- /**
- * @brief Start execution
- * @note It should be called after setting input and output buffer
- */
- void execute(const IODescription &desc) final;
-
-private:
- const ir::Graph &_graph;
- ir::OperandIndexMap<std::shared_ptr<ITensor>> _tensor_map;
-};
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_INTERP_EXEC_MANAGER_H_
diff --git a/runtime/neurun/core/src/exec/interp/Interpreter.cc b/runtime/neurun/core/src/exec/interp/Interpreter.cc
deleted file mode 100644
index 8373419f6..000000000
--- a/runtime/neurun/core/src/exec/interp/Interpreter.cc
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Interpreter.h"
-
-#include <stack>
-#include <unordered_set>
-
-#include "Registration.h"
-
-#include "ir/OperandIndexMap.h"
-#include "util/logging.h"
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-
-// TODO more structured execution kernel implementation
-// TODO use cker for execution
-// TODO divide tensor prepare and execution
-// TODO introduce memory manager (buffer allocate and free)
-class OperationExecutor : ir::OperationVisitor
-{
-public:
- OperationExecutor(ExecEnv *env) : _env{env}
- {
- _kernels[ir::OpCode::Add] = getAdd();
- _kernels[ir::OpCode::Sub] = getSub();
- _kernels[ir::OpCode::Mul] = getMul();
- _kernels[ir::OpCode::Conv2D] = getConv2D();
- _kernels[ir::OpCode::MaxPool2D] = getMaxPool2D();
- _kernels[ir::OpCode::Concat] = getConcat();
- _kernels[ir::OpCode::Gather] = getGather();
- _kernels[ir::OpCode::AvgPool2D] = getAvgPool2D();
- _kernels[ir::OpCode::FullyConnected] = getFullyConnected();
- _kernels[ir::OpCode::InstanceNorm] = getInstanceNorm();
- _kernels[ir::OpCode::Softmax] = getSoftMax();
- _kernels[ir::OpCode::Reshape] = getReshape();
- _kernels[ir::OpCode::DepthwiseConv2D] = getDepthwiseConv();
- _kernels[ir::OpCode::TransposeConv] = getTransposeConv();
- _kernels[ir::OpCode::Logistic] = getLogistic();
- _kernels[ir::OpCode::Pad] = getPad();
- _kernels[ir::OpCode::ReLU] = getReLU();
- _kernels[ir::OpCode::ReLU1] = getReLU1();
- _kernels[ir::OpCode::ReLU6] = getReLU6();
- _kernels[ir::OpCode::Tanh] = getTanh();
- }
-
- void execute(const ir::OperationIndex &idx)
- {
- const auto nodeName = _env->graph().operations().at(idx).name();
- VERBOSE(INTERPRETER) << "Prepare output operands and execute " << nodeName
- << " operation (id: " << idx.value() << ")" << std::endl;
- _env->graph().operations().at(idx).accept(*this);
- }
-
-private:
-#define OP(InternalName) \
- void visit(const ir::operation::InternalName &node) override \
- { \
- if (_kernels[ir::OpCode::InternalName]->prepare != nullptr) \
- { \
- _kernels[ir::OpCode::InternalName]->prepare(_env, node); \
- } \
- _kernels[ir::OpCode::InternalName]->invoke(_env, node); \
- }
-#include "ir/Operations.lst"
-#undef OP
-
-private:
- ExecEnv *_env;
- std::unordered_map<ir::OpCode, OpKernel *> _kernels;
-};
-
-void Interpreter::run()
-{
- VERBOSE(INTERPRETER) << "Interpreter is invoked " << std::endl;
-
- // operand_stack: save operands prepared to use
- std::stack<ir::OperandIndex> operand_stack;
-
- // Note: We should push input first, then constant.
- // We use use-def for find operators ready to execution,
- // but Use-Def cannot handle parameters (maybe constant, but not always)
- // Note: If all model inputs are constant, it may not work (depend on tensors' order).
- // But that scenario may not exist
- for (auto ind : _env->graph().getInputs())
- {
- VERBOSE(INTERPRETER) << "Input: Push to operand stack " << ind.value() << std::endl;
-
- operand_stack.push(ind);
- }
-
- _env->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
- if (obj.isConstant())
- {
- VERBOSE(INTERPRETER) << "Constant: Push to operand stack " << ind.value() << std::endl;
-
- operand_stack.push(ind);
- }
- });
-
- // Execution
- std::unordered_set<ir::OperandIndex> ready_check;
- std::unordered_set<ir::OperationIndex> executed;
- OperationExecutor executor{_env.get()};
- while (!operand_stack.empty())
- {
- const auto current_operand_index = operand_stack.top();
- operand_stack.pop();
- VERBOSE(INTERPRETER) << "Poped operand " << current_operand_index.value()
- << " is checked ready to use" << std::endl;
-
- assert(ready_check.find(current_operand_index) == ready_check.end());
- ready_check.insert(current_operand_index);
-
- // Find prepared operations by scan use of current operand
- std::stack<ir::OperationIndex> operation_stack;
- auto use_operators = std::list<ir::OperationIndex>(
- _env->graph().operands().at(current_operand_index).getUses().list());
- // Remove operation index duplication
- // If one operation uses same operand tensor for multiple input,
- // use-list have duplicated operation index
- use_operators.unique();
- for (auto use_operator : use_operators)
- {
- // Assumption: all parameters are ready to use
- bool operator_ready = true;
- for (auto input_index : _env->graph().operations().at(use_operator).getInputs())
- {
- if (ready_check.find(input_index) == ready_check.end())
- {
- operator_ready = false;
- break;
- }
- }
-
- if (operator_ready)
- {
- VERBOSE(INTERPRETER) << "Ready to execute operation " << use_operator.value() << std::endl;
- operation_stack.push(use_operator);
- }
- }
-
- while (!operation_stack.empty())
- {
- const auto current_operation_index = operation_stack.top();
- operation_stack.pop();
- VERBOSE(INTERPRETER) << "Poped operation: " << current_operation_index.value() << "("
- << _env->graph().operations().at(current_operation_index).name() << ")"
- << std::endl;
-
- // execution
- // 1. Prepare output tensor
- // 2. Call operation kernel
- executor.execute(current_operation_index);
- executed.insert(current_operation_index);
-
- // 3. Push each output into operand stack
- const auto def_operands = _env->graph().operations().at(current_operation_index).getOutputs();
- for (auto def_operand : def_operands)
- {
- VERBOSE(INTERPRETER) << "Buffer: Push to operand stack " << def_operand.value()
- << std::endl;
- operand_stack.push(def_operand);
- }
-
- // 4. Free if lifetime of buffer operands used by input is finished
- for (auto input_index : _env->graph().operations().at(current_operation_index).getInputs())
- {
- const auto use_operators = _env->graph().operands().at(input_index).getUses();
- bool dead_buffer = true;
- for (auto use_operator : use_operators.list())
- {
- if (executed.find(use_operator) == executed.end())
- {
- dead_buffer = false;
- break;
- }
- }
-
- if (dead_buffer)
- {
- _env->freeIfAllocated(input_index);
- }
- }
- }
- }
-}
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/Interpreter.h b/runtime/neurun/core/src/exec/interp/Interpreter.h
deleted file mode 100644
index 1b73592b3..000000000
--- a/runtime/neurun/core/src/exec/interp/Interpreter.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Interpreter.h
- * @brief This file contains Interpreter class for interpretation
- */
-#ifndef __NEURUN_EXEC_INTERP_INTERPRETER_H__
-#define __NEURUN_EXEC_INTERP_INTERPRETER_H__
-
-#include "ExecEnv.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-
-/**
- * @brief Class for interpretation
- */
-class Interpreter
-{
-
-public:
- /**
- * @brief Construct a new Interpreter object (deleted)
- */
- Interpreter() = delete;
- /**
- * @brief Construct a new Interpreter object
- * @param[in] env Execution environment variable for interpreter object
- */
- Interpreter(std::unique_ptr<ExecEnv> env) : _env{std::move(env)}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Run interpreter until there is no operation to execute
- */
- void run();
-
-private:
- std::unique_ptr<ExecEnv> _env;
-};
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_INTERP_INTERPRETER_H__
diff --git a/runtime/neurun/core/src/exec/interp/Registration.h b/runtime/neurun/core/src/exec/interp/Registration.h
deleted file mode 100644
index 3ebe3bc9f..000000000
--- a/runtime/neurun/core/src/exec/interp/Registration.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_EXEC_INTERP_REGISTRATION_H__
-#define __NEURUN_EXEC_INTERP_REGISTRATION_H__
-
-#include "ExecEnv.h"
-
-#include "ir/Operation.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-
-struct OpKernel
-{
- std::function<void(ExecEnv *, const ir::Operation &)> prepare;
- std::function<void(const ExecEnv *, const ir::Operation &)> invoke;
-};
-
-// Defined in operations/ directory
-OpKernel *getAdd();
-OpKernel *getSub();
-OpKernel *getMul();
-OpKernel *getConv2D();
-OpKernel *getMaxPool2D();
-OpKernel *getConcat();
-OpKernel *getGather();
-OpKernel *getAvgPool2D();
-OpKernel *getFullyConnected();
-OpKernel *getInstanceNorm();
-OpKernel *getSoftMax();
-OpKernel *getDepthwiseConv();
-OpKernel *getReshape();
-OpKernel *getTransposeConv();
-OpKernel *getLogistic();
-OpKernel *getPad();
-OpKernel *getReLU();
-OpKernel *getReLU1();
-OpKernel *getReLU6();
-OpKernel *getTanh();
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_INTERP_REGISTRATION_H__
diff --git a/runtime/neurun/core/src/exec/interp/Tensor.cc b/runtime/neurun/core/src/exec/interp/Tensor.cc
deleted file mode 100644
index 5c1da3587..000000000
--- a/runtime/neurun/core/src/exec/interp/Tensor.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Tensor.h"
-
-#define NO_USE(a) (void)(a)
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-
-void ITensor::access(const std::function<void(backend::operand::ITensor &tensor)> &fn)
-{
- fn(*this);
-}
-
-size_t ROTensor::calcOffset(const neurun::util::Coordinates &coords) const
-{
- NO_USE(coords);
- throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now.");
-}
-
-size_t Tensor::calcOffset(const neurun::util::Coordinates &coords) const
-{
- NO_USE(coords);
- throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now.");
-}
-
-ir::Layout ROTensor::layout() const
-{
- // TODO Changes to return frontend layout
- return ir::Layout::NHWC;
-}
-
-ir::Layout Tensor::layout() const
-{
- // TODO Changes to return frontend layout
- return ir::Layout::NHWC;
-}
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/Tensor.h b/runtime/neurun/core/src/exec/interp/Tensor.h
deleted file mode 100644
index c53fd46a6..000000000
--- a/runtime/neurun/core/src/exec/interp/Tensor.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Tensor.h
- * @brief This file contains ITensor interface, ROTensor class, and Tensor class
- */
-#ifndef __NEURUN_EXEC_INTERP_TENSOR_H__
-#define __NEURUN_EXEC_INTERP_TENSOR_H__
-
-#include "Buffer.h"
-
-#include "ir/OperandInfo.h"
-#include "backend/operand/ITensor.h"
-#include "ir/Layout.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-
-/**
- * @brief Interface to handle Tensor in interpreter
- */
-class ITensor : public backend::operand::ITensor
-{
-public:
- virtual ~ITensor() = default;
-
-public:
- virtual uint8_t *buffer() const = 0;
- /**
- * @brief Return shared pointer for buffer
- * @return Buffer shared pointer
- */
- virtual std::shared_ptr<const Buffer> shareBuffer() const = 0;
- /**
- * @brief Return read-only buffer pointer
- * @return Read-only buffer pointer
- */
- virtual const uint8_t *bufferRO() const = 0;
- /**
- * @brief Return shared pointer for data
- * @return Data shared pointer
- */
- virtual std::shared_ptr<const ir::Data> shareData() const = 0;
- /**
- * @brief Set internal/external buffer
- * @param[in] buffer Buffer pointer
- */
- virtual void setBuffer(std::shared_ptr<const Buffer> buffer) = 0;
- /**
- * @brief Set data reference (including constant, input)
- * @param[in] data Data pointer
- */
- virtual void setData(std::shared_ptr<const ir::Data> data) = 0;
- virtual void releaseData() = 0;
-
- virtual size_t total_size() const = 0;
- virtual size_t dimension(size_t index) const = 0;
- virtual size_t num_dimensions() const = 0;
- virtual size_t calcOffset(const util::Coordinates &coords) const = 0;
-
- virtual bool has_padding() const = 0;
- /**
- * @brief Return data type of tensor
- * @return Data type of tensor
- */
- virtual ir::DataType data_type() const = 0;
- /**
- * @brief Return TensorInfo
- * @return TensorInfo
- */
- virtual const ir::OperandInfo &tensorInfo() const = 0;
- /**
- * @brief Return number of elements
- * @return Number of elements
- */
- virtual uint64_t num_elements() const = 0;
- void access(const std::function<void(backend::operand::ITensor &tensor)> &fn) final;
-};
-
-/**
- * @brief Class to handle tensor in interpreter as read-only
- */
-class ROTensor final : public ITensor
-{
-public:
- ROTensor() = delete;
- ROTensor(const ir::OperandInfo &info) : _info(info)
- {
- // DO NOTHING
- }
-
-public:
- uint8_t *buffer() const override { throw std::runtime_error{"Read only tensor"}; }
- std::shared_ptr<const Buffer> shareBuffer() const override
- {
- throw std::runtime_error{"Read only tensor"};
- }
- const uint8_t *bufferRO() const override { return _data->base(); }
- std::shared_ptr<const ir::Data> shareData() const override { return _data; }
- void setBuffer(std::shared_ptr<const Buffer> buffer) override { _data = buffer; }
- void setData(std::shared_ptr<const ir::Data> data) override { _data = data; }
- void releaseData() override { _data = nullptr; }
-
- size_t total_size() const override { return _info.total_size(); }
- size_t dimension(size_t index) const override { return _info.shape().dim(index); }
- size_t num_dimensions() const override { return _info.shape().rank(); }
- size_t calcOffset(const util::Coordinates &coords) const override;
- ir::Layout layout() const override;
- bool has_padding() const override { return false; }
- ir::DataType data_type() const override { return _info.typeInfo().type(); }
- const ir::OperandInfo &tensorInfo() const override { return _info; }
- uint64_t num_elements() const override { return _info.shape().num_elements(); };
-
-private:
- const ir::OperandInfo _info;
- std::shared_ptr<const ir::Data> _data{nullptr};
-};
-
-/**
- * @brief Class to handle tensor in interpreter as writable
- */
-class Tensor final : public ITensor
-{
-public:
- Tensor() = delete;
- Tensor(const ir::OperandInfo &info) : _info(info)
- {
- // DO NOTHING
- }
-
-public:
- uint8_t *buffer() const override { return _buffer->baseWritable(); }
- std::shared_ptr<const Buffer> shareBuffer() const override { return _buffer; };
- const uint8_t *bufferRO() const override { return _buffer->base(); }
- std::shared_ptr<const ir::Data> shareData() const override { return _buffer; }
- void setBuffer(std::shared_ptr<const Buffer> buffer) override { _buffer = buffer; }
- void setData(std::shared_ptr<const ir::Data>) override
- {
- throw std::runtime_error{"Passed data may read-only"};
- }
- void releaseData() override { _buffer = nullptr; }
-
- size_t total_size() const override { return _info.total_size(); }
- size_t dimension(size_t index) const override { return _info.shape().dim(index); }
- size_t num_dimensions() const override { return _info.shape().rank(); }
- size_t calcOffset(const util::Coordinates &coords) const override;
- ir::Layout layout() const override;
- bool has_padding() const override { return false; }
- ir::DataType data_type() const override { return _info.typeInfo().type(); }
- const ir::OperandInfo &tensorInfo() const override { return _info; }
- uint64_t num_elements() const override { return _info.shape().num_elements(); };
-
-private:
- const ir::OperandInfo _info;
- std::shared_ptr<const Buffer> _buffer{nullptr};
-};
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_INTERP_TENSOR_H__
diff --git a/runtime/neurun/core/src/exec/interp/operations/AvgPool2D.cc b/runtime/neurun/core/src/exec/interp/operations/AvgPool2D.cc
deleted file mode 100644
index bd396491f..000000000
--- a/runtime/neurun/core/src/exec/interp/operations/AvgPool2D.cc
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/AveragePool.h>
-
-#include "OperationUtil.h"
-
-#include "exec/interp/Registration.h"
-#include "ir/operation/AvgPool2D.h"
-#include "util/Utils.h"
-#include "util/Padding.h"
-#include "util/ShapeInference.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-namespace avgpool2d
-{
-
-void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- UNUSED_RELEASE(in_tensor);
-
- assert(in_tensor->num_dimensions() == 4);
-
- const auto output_info = env->graph().operands().at(out_index).info();
- if (output_info.total_size() == 0)
- {
- // Handle unspecified output shape
- const auto &avgpool_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node);
- const auto infered_output_shapes =
- shape_inference::inferAvgPoolShape(in_tensor->tensorInfo().shape(), avgpool_node.param());
- env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()});
- }
- else
- {
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->num_dimensions() == 4);
-}
-
-void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
- const ir::operation::AvgPool2D::Param &param)
-{
- // TODO Support NCHW frontend
- const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape,
- param.stride, param.kw, param.kh);
- // Calculate
- nnfw::cker::PoolParams cker_param;
- calculateActivationRange(param.activation, &cker_param.float_activation_min,
- &cker_param.float_activation_max);
- cker_param.filter_width = param.kw;
- cker_param.filter_height = param.kh;
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
-
- const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
- const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
- float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-
- nnfw::cker::AveragePool(cker_param, in_shape, in_ptr, out_shape, out_ptr);
-}
-
-void invokeAvgPool2D(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &avgpool_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node);
-
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- // Check lhs shape is same with rhs (with broadcast)
- const auto in_tensor = env->tensorAt(in_index);
- const auto out_tensor = env->tensorAt(out_index);
-
- const auto data_type = in_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(in_tensor, out_tensor, avgpool_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float only"};
- }
-}
-} // namespace avgpool2d
-
-OpKernel *getAvgPool2D()
-{
- static OpKernel kernel = {avgpool2d::prepareAvgPool2D, avgpool2d::invokeAvgPool2D};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/BinaryArithmeticOps.cc b/runtime/neurun/core/src/exec/interp/operations/BinaryArithmeticOps.cc
deleted file mode 100644
index 16469b9db..000000000
--- a/runtime/neurun/core/src/exec/interp/operations/BinaryArithmeticOps.cc
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-#include "OperationUtil.h"
-
-#include "exec/interp/Registration.h"
-#include "ir/operation/Add.h"
-#include "ir/operation/Sub.h"
-#include "ir/operation/Mul.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-namespace
-{
-
-enum class OpType
-{
- ADD,
- SUB,
- MUL
-};
-
-template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation &node)
-{
- const auto &add_node = nnfw::misc::polymorphic_downcast<const node_type &>(node);
-
- const auto lhs_index = node.getInputs().at(add_node.LHS);
- const auto rhs_index = node.getInputs().at(add_node.RHS);
- const auto out_index = node.getOutputs().at(0);
-
- const auto lhs_tensor = env->tensorAt(lhs_index);
- const auto rhs_tensor = env->tensorAt(rhs_index);
-
- // Check shape and type lhs is same with rhs
- // TODO Util function to compare TensorInfo
- if (lhs_tensor->data_type() != rhs_tensor->data_type())
- {
- throw std::runtime_error{"Interp(Add): Different input types"};
- }
-
- bool try_broadcast = (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape());
- if (try_broadcast)
- {
- bool success = true;
- auto out_shape = calcBroadcastShape(lhs_tensor->tensorInfo().shape(),
- rhs_tensor->tensorInfo().shape(), success);
- if (!success)
- {
- throw std::runtime_error{"Interp(Add): Fail to brodcasting"};
- }
-
- auto output_info = ir::OperandInfo(out_shape, lhs_tensor->tensorInfo().typeInfo());
- // We can handle already allocated (ex. model output)
- env->allocateIfNeeded(out_index, output_info);
- }
- else
- {
- // Output's shape and type is same with input
- auto output_info = lhs_tensor->tensorInfo();
- // We can handle already allocated (ex. model output)
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- // Check shape and type lhs is same with output
- // TODO Util function to compare TensorInfo
- if (lhs_tensor->data_type() != out_tensor->data_type())
- {
- throw std::runtime_error{"Interp(Add): Invalid output type"};
- }
-}
-
-inline void setActivationParams(float min, float max, nnfw::cker::BinaryArithmeticOpParam *params)
-{
- params->float_activation_min = min;
- params->float_activation_max = max;
-}
-
-inline void setActivationParams(int32_t min, int32_t max,
- nnfw::cker::BinaryArithmeticOpParam *params)
-{
- params->quantized_activation_min = min;
- params->quantized_activation_max = max;
-}
-
-template <typename raw_type, typename param_type, OpType op_type>
-void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor *out_tensor,
- const param_type &param)
-{
- const auto lhs_buffer = lhs_tensor->bufferRO();
- const auto rhs_buffer = rhs_tensor->bufferRO();
- auto out_buffer = out_tensor->buffer();
-
- nnfw::cker::BinaryArithmeticOpParam cker_param;
- raw_type activation_min, activation_max;
- calculateActivationRange(param.activation, &activation_min, &activation_max);
- setActivationParams(activation_min, activation_max, &cker_param);
- const raw_type *lhs_ptr = reinterpret_cast<const raw_type *>(lhs_buffer);
- const raw_type *rhs_ptr = reinterpret_cast<const raw_type *>(rhs_buffer);
- raw_type *out_ptr = reinterpret_cast<raw_type *>(out_buffer);
-
- // Calculate
- const std::function<raw_type(const raw_type &, const raw_type &)> fn_add =
- [](const raw_type &a, const raw_type &b) { return a + b; };
- const std::function<raw_type(const raw_type &, const raw_type &)> fn_sub =
- [](const raw_type &a, const raw_type &b) { return a - b; };
- const std::function<raw_type(const raw_type &, const raw_type &)> fn_mul =
- [](const raw_type &a, const raw_type &b) { return a * b; };
-
- const std::function<raw_type(const raw_type &, const raw_type &)> fn =
- (op_type == OpType::ADD) ? fn_add : ((op_type == OpType::SUB) ? fn_sub : fn_mul);
-
- if (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape())
- {
- const auto lhs_shape = convertExtendShape(lhs_tensor->tensorInfo().shape());
- const auto rhs_shape = convertExtendShape(rhs_tensor->tensorInfo().shape());
- const auto out_shape = convertExtendShape(out_tensor->tensorInfo().shape());
- nnfw::cker::BroadcastBinaryArithmeticOpSlow(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr,
- out_shape, out_ptr, fn);
- return;
- }
-
- const auto lhs_shape = convertShape(lhs_tensor->tensorInfo().shape());
- const auto rhs_shape = convertShape(rhs_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
- nnfw::cker::BinaryArithmeticOp(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr, out_shape,
- out_ptr, fn);
-}
-
-template <typename node_type, typename param_type, OpType op_type>
-void invokeAdd(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &arithmetic_node = nnfw::misc::polymorphic_downcast<const node_type &>(node);
-
- const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
- const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
- const auto out_index = node.getOutputs().at(0);
- const auto lhs_tensor = env->tensorAt(lhs_index);
- const auto rhs_tensor = env->tensorAt(rhs_index);
- const auto out_tensor = env->tensorAt(out_index);
- const auto data_type = lhs_tensor->data_type();
-
- if (data_type == ir::DataType::INT32)
- {
- invoke<int32_t, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor,
- arithmetic_node.param());
- }
- else if (data_type == ir::DataType::FLOAT32)
- {
- invoke<float, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor, arithmetic_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Unsupported data type"};
- }
-}
-} // namespace add
-
-OpKernel *getAdd()
-{
- static OpKernel kernel = {prepareAdd<ir::operation::Add>,
- invokeAdd<ir::operation::Add, ir::operation::Add::Param, OpType::ADD>};
- return &kernel;
-}
-
-OpKernel *getSub()
-{
- static OpKernel kernel = {prepareAdd<ir::operation::Sub>,
- invokeAdd<ir::operation::Sub, ir::operation::Sub::Param, OpType::SUB>};
- return &kernel;
-}
-
-OpKernel *getMul()
-{
- static OpKernel kernel = {prepareAdd<ir::operation::Mul>,
- invokeAdd<ir::operation::Mul, ir::operation::Mul::Param, OpType::MUL>};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/Concat.cc b/runtime/neurun/core/src/exec/interp/operations/Concat.cc
deleted file mode 100644
index a127e5f30..000000000
--- a/runtime/neurun/core/src/exec/interp/operations/Concat.cc
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/Concatenation.h>
-
-#include "OperationUtil.h"
-
-#include "exec/interp/Registration.h"
-#include "ir/operation/Concat.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-namespace concat
-{
-
-void prepareConcat(ExecEnv *env, const ir::Operation &node)
-{
- const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node);
-
- const auto first_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto first_tensor = env->tensorAt(first_index);
- uint32_t out_axis_dimension = 0;
- const int32_t axis_raw = concat_node.param().axis;
- const uint32_t axis = (axis_raw < 0) ? (axis_raw + first_tensor->num_dimensions()) : axis_raw;
-
- // All inputs shape should be same except axis dimension
- // All inputs type should be same
- for (auto input : node.getInputs())
- {
- assert(first_tensor->num_dimensions() == env->tensorAt(input)->num_dimensions());
- assert(first_tensor->data_type() == env->tensorAt(input)->data_type());
- for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++)
- {
- if (i == axis)
- {
- out_axis_dimension += env->tensorAt(input)->dimension(i);
- continue;
- }
- assert(first_tensor->dimension(i) == env->tensorAt(input)->dimension(i));
- }
- }
-
- // Make output tensor info using first input tensor info, and accumulated axis dimension value
- auto out_shape = first_tensor->tensorInfo().shape();
- out_shape.dim(axis) = out_axis_dimension;
- env->allocateIfNeeded(out_index,
- ir::OperandInfo{out_shape, first_tensor->tensorInfo().typeInfo()});
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Output shape should be same with input except axis dimension
- // Output type should be same with input
- assert(first_tensor->data_type() == out_tensor->data_type());
- for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++)
- {
- if (i == axis)
- {
- continue;
- }
- assert(first_tensor->dimension(i) == out_tensor->dimension(i));
- }
-}
-
-void invoke(const std::vector<const ITensor *> in_tensors, const ITensor *out_tensor, uint32_t axis)
-{
- const uint32_t count = in_tensors.size();
-
- // Calculate
- nnfw::cker::ConcatenationParams cker_param;
- cker_param.axis = (int8_t)axis;
- cker_param.inputs_count = count;
-
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-
- std::vector<nnfw::cker::Shape> in_shapes;
- std::vector<const nnfw::cker::Shape *> in_shape_ptrs;
- in_shapes.reserve(count);
- in_shape_ptrs.reserve(count);
- std::vector<const float *> in_ptrs;
- for (uint32_t i = 0; i < count; i++)
- {
- in_shapes.push_back(convertShape(in_tensors[i]->tensorInfo().shape()));
- in_shape_ptrs.push_back(&in_shapes[i]);
- in_ptrs.push_back(reinterpret_cast<const float *>(in_tensors[i]->bufferRO()));
- }
-
- auto out_buffer = out_tensor->buffer();
- float *out_ptr = reinterpret_cast<float *>(out_buffer);
-
- nnfw::cker::Concatenation<float>(cker_param, in_shape_ptrs.data(), in_ptrs.data(), out_shape,
- out_ptr);
-}
-
-void invokeConcat(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node);
- const int32_t axis_raw = concat_node.param().axis;
-
- std::vector<const ITensor *> in_tensors;
- for (const auto &e : concat_node.getInputs())
- {
- in_tensors.emplace_back(env->tensorAt(e));
- }
-
- const auto out_index = node.getOutputs().at(0);
- const auto out_tensor = env->tensorAt(out_index);
- const uint32_t axis = (axis_raw < 0) ? (axis_raw + out_tensor->num_dimensions()) : axis_raw;
-
- const auto data_type = in_tensors[0]->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(in_tensors, out_tensor, axis);
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-} // namespace concat
-
-OpKernel *getConcat()
-{
- static OpKernel kernel = {concat::prepareConcat, concat::invokeConcat};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/Conv2D.cc b/runtime/neurun/core/src/exec/interp/operations/Conv2D.cc
deleted file mode 100644
index 5242247a4..000000000
--- a/runtime/neurun/core/src/exec/interp/operations/Conv2D.cc
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/Conv.h>
-
-#include "OperationUtil.h"
-
-#include "exec/interp/Registration.h"
-#include "ir/operation/Conv2D.h"
-#include "util/Utils.h"
-#include "util/Padding.h"
-#include "util/ShapeInference.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-namespace conv2d
-{
-
-void prepareConv2D(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(ir::operation::Conv2D::INPUT);
- const auto kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
- const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto kernel_tensor = env->tensorAt(kernel_index);
- const auto bias_tensor = env->tensorAt(bias_index);
-
- assert(in_tensor->num_dimensions() == 4);
- assert(kernel_tensor->num_dimensions() == 4);
- assert(bias_tensor->num_dimensions() == 1);
-
- UNUSED_RELEASE(in_tensor);
- UNUSED_RELEASE(kernel_tensor);
- UNUSED_RELEASE(bias_tensor);
-
- const auto output_info = env->graph().operands().at(out_index).info();
- if (output_info.total_size() == 0)
- {
- // Handle unspecified output shape
- const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node);
- const auto infered_output_shapes = shape_inference::inferConv2DShape(
- in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), conv_node.param());
- env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()});
- }
- else
- {
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->num_dimensions() == 4);
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
- const ITensor *ofm_tensor, const ir::operation::Conv2D::Param &param)
-{
- // TODO Support NCHW frontned
- const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
- const auto &ker_shape = ker_tensor->tensorInfo().shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
- const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape,
- param.stride, ker_width, ker_height);
-
- // Calculate
- float activation_min, activation_max;
- calculateActivationRange(param.activation, &activation_min, &activation_max);
-
- nnfw::cker::ConvParams cker_param;
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
- cker_param.dilation_width_factor = 1;
- cker_param.dilation_height_factor = 1;
- cker_param.float_activation_min = activation_min;
- cker_param.float_activation_max = activation_max;
-
- const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
- const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
- const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
- const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
- const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
- const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
- const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO());
- float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
-
- nnfw::cker::Conv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, cker_bias_shape,
- bias_ptr, cker_ofm_shape, ofm_ptr);
-}
-
-void invokeConv2D(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node);
-
- const auto ifm_index = node.getInputs().at(ir::operation::Conv2D::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
- const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto bias_tensor = env->tensorAt(bias_index);
- const auto ofm_tensor = env->tensorAt(ofm_index);
-
- const auto data_type = ifm_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-} // namespace conv2d
-
-OpKernel *getConv2D()
-{
- static OpKernel kernel = {conv2d::prepareConv2D, conv2d::invokeConv2D};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/DepthwiseConv.cc b/runtime/neurun/core/src/exec/interp/operations/DepthwiseConv.cc
deleted file mode 100644
index 1d3649f48..000000000
--- a/runtime/neurun/core/src/exec/interp/operations/DepthwiseConv.cc
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/DepthwiseConv.h>
-#include <misc/polymorphic_downcast.h>
-
-#include "OperationUtil.h"
-
-#include "exec/interp/Registration.h"
-#include "ir/operation/DepthwiseConv2D.h"
-#include "util/Padding.h"
-#include "util/Utils.h"
-#include "util/ShapeInference.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-
-namespace
-{
-
-void prepareDepthwiseConv(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT);
- const auto kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
- const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto kernel_tensor = env->tensorAt(kernel_index);
- const auto bias_tensor = env->tensorAt(bias_index);
-
- assert(in_tensor->num_dimensions() == 4);
- assert(kernel_tensor->num_dimensions() == 4);
- assert(bias_tensor->num_dimensions() == 1);
-
- UNUSED_RELEASE(in_tensor);
- UNUSED_RELEASE(kernel_tensor);
- UNUSED_RELEASE(bias_tensor);
-
- // TODO handle unspecified output shape:
- // calculate output shape using ifm shape, kernel shape, padding, stride
- const auto output_info = env->graph().operands().at(out_index).info();
- if (output_info.total_size() == 0)
- {
- // Handle unspecified output shape
- const auto &depth_conv_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::DepthwiseConv2D &>(node);
- const auto infered_output_shapes = shape_inference::inferDepthwiseConv2DShape(
- in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(),
- depth_conv_node.param());
- env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()});
- }
- else
- {
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->num_dimensions() == 4);
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
- const ITensor *ofm_tensor, const ir::operation::DepthwiseConv2D::Param &param)
-{
- // TODO Support NCHW frontend
- const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- // Kernel format is [1, kernel_height, kernel_width, depth_out].
- const auto &ker_shape = ker_tensor->tensorInfo().shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
- const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape,
- param.stride, ker_width, ker_height);
-
- // Calculate
- float activation_min, activation_max;
- calculateActivationRange(param.activation, &activation_min, &activation_max);
-
- nnfw::cker::DepthwiseConvParams cker_param;
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.depth_multiplier = param.multiplier;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
- cker_param.dilation_width_factor = 1;
- cker_param.dilation_height_factor = 1;
- cker_param.float_activation_min = activation_min;
- cker_param.float_activation_max = activation_max;
-
- const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
- const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
- const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
- const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
- const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
- const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
- const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO());
- float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
-
- nnfw::cker::DepthwiseConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
- cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr);
-}
-
-void invokeDepthwiseConv(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &conv_node = static_cast<const ir::operation::DepthwiseConv2D &>(node);
-
- const auto ifm_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
- const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto bias_tensor = env->tensorAt(bias_index);
- const auto ofm_tensor = env->tensorAt(ofm_index);
-
- const auto data_type = ifm_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-
-} // namespace
-
-OpKernel *getDepthwiseConv()
-{
- static OpKernel kernel = {prepareDepthwiseConv, invokeDepthwiseConv};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/FullyConnected.cc b/runtime/neurun/core/src/exec/interp/operations/FullyConnected.cc
deleted file mode 100644
index 9c1c5d4e2..000000000
--- a/runtime/neurun/core/src/exec/interp/operations/FullyConnected.cc
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/FullyConnected.h>
-
-#include "OperationUtil.h"
-
-#include "exec/interp/Registration.h"
-#include "ir/operation/FullyConnected.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-namespace fc
-{
-
-void prepareFC(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(ir::operation::FullyConnected::INPUT);
- const auto kernel_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
- const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto kernel_tensor = env->tensorAt(kernel_index);
- const auto bias_tensor = env->tensorAt(bias_index);
-
- UNUSED_RELEASE(in_tensor);
- UNUSED_RELEASE(kernel_tensor);
- UNUSED_RELEASE(bias_tensor);
-
- assert(in_tensor->num_dimensions() >= 2);
- assert(kernel_tensor->num_dimensions() == 2);
- assert(bias_tensor->num_dimensions() == 1);
-
- const auto input_size_with_batch = in_tensor->num_elements();
- const auto num_units = kernel_tensor->dimension(0);
- const auto input_size = kernel_tensor->dimension(1);
- const auto batch_size = input_size_with_batch / input_size;
- assert(input_size_with_batch % input_size == 0);
- assert(num_units == bias_tensor->dimension(0));
-
- // Make output tensor info
- ir::Shape output_shape(2);
- output_shape.dim(0) = batch_size;
- output_shape.dim(1) = num_units;
- const ir::OperandInfo out_info{output_shape, in_tensor->tensorInfo().typeInfo()};
- env->allocateIfNeeded(out_index, out_info);
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->num_dimensions() == 2);
- assert(out_tensor->dimension(0) == batch_size);
- assert(out_tensor->dimension(1) == num_units);
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
- const ITensor *ofm_tensor, const ir::operation::FullyConnected::Param &param)
-{
- const auto ifm_buffer = ifm_tensor->bufferRO();
- const auto ker_buffer = ker_tensor->bufferRO();
- const auto bias_buffer = bias_tensor->bufferRO();
- auto ofm_buffer = ofm_tensor->buffer();
-
- // Calculate
- nnfw::cker::FullyConnectedParams cker_param;
- calculateActivationRange(param.activation, &cker_param.float_activation_min,
- &cker_param.float_activation_max);
- const auto cker_ifm_shape = convertExtendShape(ifm_tensor->tensorInfo().shape());
- const auto cker_ker_shape = convertExtendShape(ker_tensor->tensorInfo().shape());
- const auto cker_bias_shape = convertExtendShape(bias_tensor->tensorInfo().shape());
- const auto cker_ofm_shape = convertExtendShape(ofm_tensor->tensorInfo().shape());
- const float *ifm_ptr = reinterpret_cast<const float *>(ifm_buffer);
- const float *ker_ptr = reinterpret_cast<const float *>(ker_buffer);
- const float *bias_ptr = reinterpret_cast<const float *>(bias_buffer);
- float *ofm_ptr = reinterpret_cast<float *>(ofm_buffer);
-
- nnfw::cker::FullyConnected(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
- cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr);
-}
-
-void invokeFC(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &conv_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::FullyConnected &>(node);
-
- const auto ifm_index = node.getInputs().at(ir::operation::FullyConnected::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
- const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto bias_tensor = env->tensorAt(bias_index);
- const auto ofm_tensor = env->tensorAt(ofm_index);
-
- const auto data_type = ifm_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float only"};
- }
-}
-} // namespace fc
-
-OpKernel *getFullyConnected()
-{
- static OpKernel kernel = {fc::prepareFC, fc::invokeFC};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/Gather.cc b/runtime/neurun/core/src/exec/interp/operations/Gather.cc
deleted file mode 100644
index 8b64d1937..000000000
--- a/runtime/neurun/core/src/exec/interp/operations/Gather.cc
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/Gather.h>
-
-#include "OperationUtil.h"
-
-#include "exec/interp/Registration.h"
-#include "ir/operation/Gather.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-namespace
-{
-
-void prepareGather(ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT);
- const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
- const auto indices_tensor = env->tensorAt(indices_index);
-
- // TODO handle unspecified output shape:
- // calculate output shape using ifm shape, kernel shape, padding, stride
- const auto output_info = env->graph().operands().at(output_index).info();
- if (output_info.total_size() == 0)
- {
- throw std::runtime_error{"Interp(Gather): NYI for unspecified output shape"};
- }
- else
- {
- env->allocateIfNeeded(output_index, output_info);
- }
-
- if (indices_tensor->data_type() != ir::DataType::INT32)
- {
- throw std::runtime_error{"Interp(Gather): Invalid indices data type"};
- }
-
- auto output_tensor = env->tensorAt(output_index);
- auto output_rank = input_tensor->num_dimensions() + indices_tensor->num_dimensions() - 1;
-
- if (output_rank != output_tensor->num_dimensions())
- {
- throw std::runtime_error{"Interp(Gather): Invalid output rank"};
- }
- if (output_tensor->data_type() != input_tensor->data_type())
- {
- throw std::runtime_error{"Interp(Gather): Invalid output data type"};
- }
-
- if (input_tensor->data_type() == ir::DataType::QUANT8_ASYMM &&
- input_tensor->tensorInfo().typeInfo() != output_tensor->tensorInfo().typeInfo())
- {
- throw std::runtime_error{
- "Interp(Gather): Cannot handle different I/O QUANT8_ASYMM scale/offset"};
- }
-}
-
-template <typename raw_type>
-void invoke(const ITensor *input_tensors, const ITensor *indices_tensors,
- const ITensor *output_tensor, uint32_t axis)
-{
- // Calculate
- nnfw::cker::GatherParams cker_param;
- cker_param.axis = (int8_t)axis;
-
- const auto cker_input_shapes = convertShape(input_tensors->tensorInfo().shape());
- const auto cker_indices_shape = convertShape(indices_tensors->tensorInfo().shape());
- const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
- const raw_type *input_ptr = reinterpret_cast<const raw_type *>(input_tensors->bufferRO());
- const int32_t *indices_ptr = reinterpret_cast<const int32_t *>(indices_tensors->bufferRO());
- raw_type *output_ptr = reinterpret_cast<raw_type *>(output_tensor->buffer());
-
- nnfw::cker::Gather<raw_type>(cker_param, cker_input_shapes, input_ptr, cker_indices_shape,
- indices_ptr, cker_output_shape, output_ptr);
-}
-
-void invokeGather(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &gather_node = nnfw::misc::polymorphic_downcast<const ir::operation::Gather &>(node);
- const int32_t axis_raw = gather_node.param().axis;
-
- const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT);
- const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
- const auto indices_tensor = env->tensorAt(indices_index);
- const auto output_tensor = env->tensorAt(output_index);
- const uint32_t axis = (axis_raw < 0) ? (axis_raw + input_tensor->num_dimensions()) : axis_raw;
-
- const auto data_type = input_tensor->data_type();
-
- switch (data_type)
- {
- case ir::DataType::FLOAT32:
- invoke<float>(input_tensor, indices_tensor, output_tensor, axis);
- break;
- case ir::DataType::INT32:
- invoke<int32_t>(input_tensor, indices_tensor, output_tensor, axis);
- break;
- case ir::DataType::QUANT8_ASYMM:
- invoke<uint8_t>(input_tensor, indices_tensor, output_tensor, axis);
- break;
- default:
- throw std::runtime_error{"Interp(Gather): NYI - Not supported type"};
- }
-}
-
-} // namespace concat
-
-OpKernel *getGather()
-{
- static OpKernel kernel = {prepareGather, invokeGather};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/InstanceNorm.cc b/runtime/neurun/core/src/exec/interp/operations/InstanceNorm.cc
deleted file mode 100644
index d1623d53c..000000000
--- a/runtime/neurun/core/src/exec/interp/operations/InstanceNorm.cc
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/InstanceNorm.h>
-
-#include "OperationUtil.h"
-
-#include "exec/interp/Registration.h"
-#include "ir/operation/InstanceNorm.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-namespace instancenorm
-{
-
-void prepareInstanceNorm(ExecEnv *env, const ir::Operation &node)
-{
- const auto &instancenorm_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
-
- const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
- const auto output_index = node.getOutputs().at(0);
- const auto input_tensor = env->tensorAt(input_index);
-
- if (input_tensor->num_dimensions() != 4)
- {
- throw std::runtime_error{"Interp(InstanceNorm): Input should be 4D-tensor"};
- }
-
- // Output shape should be same with input
- env->allocateIfNeeded(output_index, input_tensor->tensorInfo());
-
- auto output_tensor = env->tensorAt(output_index);
- UNUSED_RELEASE(output_tensor);
-
- // Handle same ifm & ofm data type only
- assert(input_tensor->data_type() == output_tensor->data_type());
- assert(input_tensor->tensorInfo().shape() == output_tensor->tensorInfo().shape());
-}
-
-inline void setActivationParams(float min, float max, nnfw::cker::InstanceNormParams *params)
-{
- params->float_activation_min = min;
- params->float_activation_max = max;
-}
-
-void invoke(const ITensor *input_tensor, const ITensor *gamma_tensor, const ITensor *beta_tensor,
- const ITensor *output_tensor, const ir::operation::InstanceNorm::Param &param)
-{
- // Calculate
- float activation_min, activation_max;
- calculateActivationRange(param.activation, &activation_min, &activation_max);
-
- nnfw::cker::InstanceNormParams cker_param;
- cker_param.epsilon = param.epsilon;
- cker_param.float_activation_min = activation_min;
- cker_param.float_activation_max = activation_max;
-
- const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
- const auto cker_gamma_shape = convertShape(gamma_tensor->tensorInfo().shape());
- const auto cker_beta_shape = convertShape(beta_tensor->tensorInfo().shape());
- const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
- const float *input_ptr = reinterpret_cast<const float *>(input_tensor->bufferRO());
- const float *gamma_ptr = reinterpret_cast<const float *>(gamma_tensor->bufferRO());
- const float *beta_ptr = reinterpret_cast<const float *>(beta_tensor->bufferRO());
- float *output_ptr = reinterpret_cast<float *>(output_tensor->buffer());
-
- nnfw::cker::InstanceNorm(cker_param, cker_input_shape, input_ptr, cker_gamma_shape, gamma_ptr,
- cker_beta_shape, beta_ptr, cker_output_shape, output_ptr);
-}
-
-void invokeInstanceNorm(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &instancenorm_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
-
- const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
- const auto gamma_index = node.getInputs().at(instancenorm_node.GAMMA);
- const auto beta_index = node.getInputs().at(instancenorm_node.BETA);
- const auto out_index = node.getOutputs().at(0);
- const auto input_tensor = env->tensorAt(input_index);
- const auto gamma_tensor = env->tensorAt(gamma_index);
- const auto beta_tensor = env->tensorAt(beta_index);
- const auto out_tensor = env->tensorAt(out_index);
- const auto data_type = input_tensor->data_type();
-
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(input_tensor, gamma_tensor, beta_tensor, out_tensor, instancenorm_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Unsupported data type"};
- }
-}
-} // namespace instancenorm
-
-OpKernel *getInstanceNorm()
-{
- static OpKernel kernel = {instancenorm::prepareInstanceNorm, instancenorm::invokeInstanceNorm};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/Logistic.cc b/runtime/neurun/core/src/exec/interp/operations/Logistic.cc
deleted file mode 100644
index 2fc68ffd2..000000000
--- a/runtime/neurun/core/src/exec/interp/operations/Logistic.cc
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/Logistic.h>
-
-#include "OperationUtil.h"
-
-#include "exec/interp/Registration.h"
-#include "ir/operation/Logistic.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-namespace
-{
-
-void prepareLogistic(ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(0);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
-
- const auto output_info = env->graph().operands().at(output_index).info();
-
- // Check shape and type lhs is same with rhs
- // TODO Util function to compare TensorInfo
- if (output_info.total_size() == 0)
- {
- throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"};
- }
- else
- {
- env->allocateIfNeeded(output_index, output_info);
- }
-
- const auto output_tensor = env->tensorAt(output_index);
- if (input_tensor->data_type() != output_tensor->data_type())
- {
- throw std::runtime_error{"Interp(Logistic): Invalid output type"};
- }
-}
-
-void invoke(const ITensor *input_tensor, const ITensor *output_tensor)
-{
- const auto input_buffer = input_tensor->bufferRO();
- auto output_buffer = output_tensor->buffer();
-
- const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
- const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
- const float *input_ptr = reinterpret_cast<const float *>(input_buffer);
- float *output_ptr = reinterpret_cast<float *>(output_buffer);
-
- nnfw::cker::Logistic(cker_input_shape, input_ptr, cker_output_shape, output_ptr);
-}
-
-void invokeLogistic(const ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(0);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
- const auto output_tensor = env->tensorAt(output_index);
-
- const auto data_type = input_tensor->data_type();
-
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(input_tensor, output_tensor);
- }
- else
- {
- throw std::runtime_error{"Interp(Logistic): NYI - Unsupported data type"};
- }
-}
-} // namespace
-
-OpKernel *getLogistic()
-{
- static OpKernel kernel = {prepareLogistic, invokeLogistic};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/MaxPool2D.cc b/runtime/neurun/core/src/exec/interp/operations/MaxPool2D.cc
deleted file mode 100644
index 3e1711d8e..000000000
--- a/runtime/neurun/core/src/exec/interp/operations/MaxPool2D.cc
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/MaxPool.h>
-
-#include "OperationUtil.h"
-
-#include "exec/interp/Registration.h"
-#include "ir/operation/MaxPool2D.h"
-#include "util/Utils.h"
-#include "util/Padding.h"
-#include "util/ShapeInference.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-namespace
-{
-
-void prepareMaxPool2D(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
-
- assert(in_tensor->num_dimensions() == 4);
- UNUSED_RELEASE(in_tensor);
-
- const auto output_info = env->graph().operands().at(out_index).info();
- if (output_info.total_size() == 0)
- {
- // Handle unspecified output shape
- const auto &maxpool_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node);
- const auto infered_output_shapes =
- shape_inference::inferMaxPoolShape(in_tensor->tensorInfo().shape(), maxpool_node.param());
- env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()});
- }
- else
- {
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->num_dimensions() == 4);
-}
-
-void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
- const ir::operation::MaxPool2D::Param &param)
-{
- // TODO support NCHW frontend
- const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto padding = neurun::util::calculatePadding(param.padding, ifm_shape, ofm_shape,
- param.stride, param.kw, param.kh);
- // Calculate
- nnfw::cker::PoolParams cker_param;
- calculateActivationRange(param.activation, &cker_param.float_activation_min,
- &cker_param.float_activation_max);
- cker_param.filter_width = param.kw;
- cker_param.filter_height = param.kh;
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
-
- const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
- const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
- float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-
- nnfw::cker::MaxPool(cker_param, in_shape, in_ptr, out_shape, out_ptr);
-}
-
-void invokeMaxPool2D(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &maxpool_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node);
-
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto out_tensor = env->tensorAt(out_index);
-
- const auto data_type = in_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(in_tensor, out_tensor, maxpool_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-} // namespace
-
-OpKernel *getMaxPool2D()
-{
- static OpKernel kernel = {prepareMaxPool2D, invokeMaxPool2D};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/OperationUtil.h b/runtime/neurun/core/src/exec/interp/operations/OperationUtil.h
deleted file mode 100644
index 5f4146bb8..000000000
--- a/runtime/neurun/core/src/exec/interp/operations/OperationUtil.h
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_EXEC_INTERP_OPERATIONS_OPERATION_UTILS_H_
-#define __NEURUN_EXEC_INTERP_OPERATIONS_OPERATION_UTILS_H_
-
-#include "ir/Shape.h"
-#include "ir/InternalType.h"
-
-#include <cker/Shape.h>
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-
-inline nnfw::cker::Shape convertShape(const ir::Shape &shape)
-{
- auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end());
-
- std::vector<int32_t> raw_shape;
- raw_shape.resize(4);
-
- for (uint32_t i = 0; i < 4; ++i)
- {
- if (i >= dimensions.size())
- {
- raw_shape[i] = 1;
- }
- else
- {
- raw_shape[i] = dimensions[i];
- }
- }
-
- return nnfw::cker::GetShape(raw_shape);
-}
-
-inline nnfw::cker::Shape convertExtendShape(const ir::Shape &shape)
-{
- auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end());
-
- std::vector<int32_t> raw_shape;
- raw_shape.resize(4);
- uint32_t start = 4 - dimensions.size();
-
- for (uint32_t i = 0; i < 4; ++i)
- {
- if (i < start)
- {
- raw_shape[i] = 1;
- }
- else
- {
- raw_shape[i] = dimensions[i - start];
- }
- }
-
- return nnfw::cker::GetShape(raw_shape);
-}
-
-template <typename T>
-void calculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
-{
- if (activation == ir::Activation::RELU)
- {
- *activation_min = 0;
- *activation_max = std::numeric_limits<T>::max();
- }
- else if (activation == ir::Activation::RELU6)
- {
- *activation_min = 0;
- *activation_max = 6;
- }
- else if (activation == ir::Activation::RELU1)
- {
- *activation_min = -1;
- *activation_max = 1;
- }
- else if (activation == ir::Activation::NONE)
- {
- *activation_min = std::numeric_limits<T>::lowest();
- *activation_max = std::numeric_limits<T>::max();
- }
- else
- {
- throw std::runtime_error{"Unsupported activation type"};
- }
-}
-
-inline ir::Shape calcBroadcastShape(const ir::Shape &lhs, const ir::Shape &rhs, bool &success)
-{
- int lhs_rank = lhs.rank();
- int rhs_rank = rhs.rank();
-
- int out_rank = (lhs_rank > rhs_rank ? lhs_rank : rhs_rank);
- ir::Shape out_shape(out_rank);
-
- int lhs_idim = lhs_rank - 1;
- int rhs_idim = rhs_rank - 1;
- success = true;
- for (int out_idim = out_rank - 1; out_idim >= 0; out_idim--)
- {
- if (lhs_idim == -1 && rhs_idim == -1)
- {
- // invalid result
- success = false;
- break;
- }
-
- if (lhs_idim == -1)
- {
- out_shape.dim(out_idim) = rhs.dim(rhs_idim);
- rhs_idim--;
- }
- else if (rhs_idim == -1)
- {
- out_shape.dim(out_idim) = lhs.dim(lhs_idim);
- lhs_idim--;
- }
- else
- {
- if (lhs.dim(lhs_idim) == rhs.dim(rhs_idim))
- {
- out_shape.dim(out_idim) = lhs.dim(lhs_idim);
- lhs_idim--;
- rhs_idim--;
- }
- else if (lhs.dim(lhs_idim) == 1)
- {
- out_shape.dim(out_idim) = rhs.dim(rhs_idim);
- lhs_idim--;
- rhs_idim--;
- }
- else if (rhs.dim(rhs_idim) == 1)
- {
- out_shape.dim(out_idim) = lhs.dim(lhs_idim);
- lhs_idim--;
- rhs_idim--;
- }
- else
- {
- // invalid result
- success = false;
- break;
- }
- }
- }
-
- if (lhs_idim != -1 || rhs_idim != -1)
- {
- // invalid result
- success = false;
- }
- return out_shape;
-}
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
-
-#endif // __NEURUN_EXEC_INTERP_OPERATIONS_OPERATION_UTILS_H_
diff --git a/runtime/neurun/core/src/exec/interp/operations/Pad.cc b/runtime/neurun/core/src/exec/interp/operations/Pad.cc
deleted file mode 100644
index 0c8267a90..000000000
--- a/runtime/neurun/core/src/exec/interp/operations/Pad.cc
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/Pad.h>
-
-#include "OperationUtil.h"
-
-#include "exec/interp/Registration.h"
-#include "ir/operation/Pad.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-namespace
-{
-
-void preparePad(ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
-
- const auto output_info = env->graph().operands().at(output_index).info();
-
- // Check shape and type lhs is same with rhs
- // TODO Util function to compare TensorInfo
- if (output_info.total_size() == 0)
- {
- throw std::runtime_error{"Interp(Pad): NYI unspecified output shape"};
- }
- else
- {
- env->allocateIfNeeded(output_index, output_info);
- }
-
- const auto output_tensor = env->tensorAt(output_index);
- if (input_tensor->data_type() != output_tensor->data_type())
- {
- throw std::runtime_error{"Interp(Pad): Invalid output type"};
- }
-}
-
-void invoke(const ITensor *input_tensor, const ITensor *pad_tensor, const ITensor *output_tensor)
-{
- const auto input_buffer = input_tensor->bufferRO();
- const auto pad_buffer = pad_tensor->bufferRO();
- auto output_buffer = output_tensor->buffer();
-
- int32_t pad_rank = pad_tensor->dimension(0);
-
- const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
- const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
- const float *input_ptr = reinterpret_cast<const float *>(input_buffer);
- const int32_t *pad_ptr = reinterpret_cast<const int32_t *>(pad_buffer);
- float *output_ptr = reinterpret_cast<float *>(output_buffer);
-
- nnfw::cker::Pad(pad_ptr, pad_rank, cker_input_shape, input_ptr, cker_output_shape, output_ptr,
- nullptr);
-}
-
-void invokePad(const ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT);
- const auto pad_index = node.getInputs().at(ir::operation::Pad::PAD);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
- const auto pad_tensor = env->tensorAt(pad_index);
- const auto output_tensor = env->tensorAt(output_index);
-
- const auto data_type = input_tensor->data_type();
-
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(input_tensor, pad_tensor, output_tensor);
- }
- else
- {
- throw std::runtime_error{"Interp(Pad): NYI - Unsupported data type"};
- }
-}
-} // namespace
-
-OpKernel *getPad()
-{
- static OpKernel kernel = {preparePad, invokePad};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/Reshape.cc b/runtime/neurun/core/src/exec/interp/operations/Reshape.cc
deleted file mode 100644
index a160232de..000000000
--- a/runtime/neurun/core/src/exec/interp/operations/Reshape.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "exec/interp/Registration.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-namespace
-{
-
-void prepare(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- // Unspecified shape is not supported in operation node spec now
- const auto output_info = env->graph().operands().at(out_index).info();
- env->allocateAndShareIfNeeded(out_index, output_info, in_index);
-
- assert(output_info.total_size() == env->graph().operands().at(in_index).info().total_size());
-}
-
-void invoke(const ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- if (env->tensorAt(in_index)->bufferRO() == env->tensorAt(out_index)->bufferRO())
- {
- // Same data
- return;
- }
-
- const auto output_info = env->graph().operands().at(out_index).info();
- memcpy(env->tensorAt(out_index)->buffer(), env->tensorAt(in_index)->bufferRO(),
- output_info.total_size());
-}
-
-} // namespace {anonymous}
-
-OpKernel *getReshape()
-{
- static OpKernel kernel = {prepare, invoke};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/SoftMax.cc b/runtime/neurun/core/src/exec/interp/operations/SoftMax.cc
deleted file mode 100644
index 91d98889f..000000000
--- a/runtime/neurun/core/src/exec/interp/operations/SoftMax.cc
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/SoftMax.h>
-
-#include "OperationUtil.h"
-
-#include "exec/interp/Registration.h"
-#include "ir/operation/Softmax.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-namespace
-{
-
-void Softmax2D(const float *in, const int input_size, const int batch_size, const float beta,
- float *out)
-{
- assert(input_size > 0);
-
- // For each batch
- for (int b = 0; b < batch_size; b++)
- {
- // Find the max coeff.
- float max_coeff = in[0];
- for (int i = 1; i < input_size; i++)
- {
- if (in[i] > max_coeff)
- max_coeff = in[i];
- }
-
- // Compute the normalized sum of exps.
- float exp_sum = 0.0;
- for (int i = 0; i < input_size; i++)
- {
- out[i] = std::exp((in[i] - max_coeff) * beta);
- exp_sum += out[i];
- }
-
- // Divide by the sum of exps.
- float reciprocal_sum_exp = 1.f / exp_sum;
- for (int i = 0; i < input_size; i++)
- {
- out[i] *= reciprocal_sum_exp;
- }
-
- // Advance in and out pointers for the next batch.
- in += input_size;
- out += input_size;
- }
-}
-
-void prepareSoftMax(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- UNUSED_RELEASE(in_tensor);
-
- assert((in_tensor->num_dimensions() == 4) || (in_tensor->num_dimensions() == 2));
-
- // Output shape should be same with input
- // Output type is pre-defined in model
- const auto output_shape = env->graph().operands().at(in_index).info().shape();
- const auto output_type = env->graph().operands().at(out_index).info().typeInfo();
-
- const ir::OperandInfo output_info{output_shape, output_type};
- env->allocateIfNeeded(out_index, output_info);
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Check output shape is same with input
- assert(out_tensor->num_dimensions() == out_tensor->num_dimensions());
- for (uint32_t i = 0; i < in_tensor->num_dimensions(); i++)
- {
- assert(in_tensor->dimension(i) == out_tensor->dimension(i));
- }
-}
-
-void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
- const ir::operation::Softmax::Param &param)
-{
- const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
- float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-
- float beta = param.beta;
-
- if (in_tensor->num_dimensions() == 2)
- {
- uint32_t batch_size = in_tensor->dimension(0);
- uint32_t input_size = in_tensor->dimension(1);
-
- Softmax2D(in_ptr, input_size, batch_size, beta, out_ptr);
- }
- else if (in_tensor->num_dimensions() == 4)
- {
- const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-
- nnfw::cker::SoftmaxParams cker_param;
- cker_param.beta = beta;
-
- nnfw::cker::Softmax(cker_param, in_shape, in_ptr, out_shape, out_ptr);
- }
- else
- {
- throw std::runtime_error{"Unsuported input dimension: support 2D or 4D"};
- }
-}
-
-void invokeSoftMax(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &softmax_node = nnfw::misc::polymorphic_downcast<const ir::operation::Softmax &>(node);
-
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto out_tensor = env->tensorAt(out_index);
-
- const auto in_data_type = in_tensor->data_type();
- const auto out_data_type = out_tensor->data_type();
- if ((in_data_type == ir::DataType::FLOAT32) && (out_data_type == ir::DataType::FLOAT32))
- {
- invoke(in_tensor, out_tensor, softmax_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-
-} // namespace
-
-OpKernel *getSoftMax()
-{
- static OpKernel kernel = {prepareSoftMax, invokeSoftMax};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/TransposeConv.cc b/runtime/neurun/core/src/exec/interp/operations/TransposeConv.cc
deleted file mode 100644
index 70b72c88d..000000000
--- a/runtime/neurun/core/src/exec/interp/operations/TransposeConv.cc
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/TransposeConv.h>
-#include <misc/polymorphic_downcast.h>
-
-#include "OperationUtil.h"
-
-#include "exec/interp/Registration.h"
-#include "ir/operation/TransposeConv.h"
-#include "util/Padding.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-namespace
-{
-
-void prepareTransposeConv(ExecEnv *env, const ir::Operation &node)
-{
- const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
- const auto ofm_shape_index = node.getInputs().at(ir::operation::TransposeConv::OUTPUT_SHAPE);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto ofm_shape_tensor = env->tensorAt(ofm_shape_index);
-
- assert(ifm_tensor->num_dimensions() == 4);
- assert(ker_tensor->num_dimensions() == 4);
- assert(ofm_shape_tensor->num_dimensions() == 1);
-
- UNUSED_RELEASE(ifm_tensor);
- UNUSED_RELEASE(ker_tensor);
- UNUSED_RELEASE(ofm_shape_tensor);
-
- const auto output_info = env->graph().operands().at(ofm_index).info();
- if (output_info.total_size() == 0)
- {
- // TODO: Handle unspecified output shape
- throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"};
- }
- else
- {
- env->allocateIfNeeded(ofm_index, output_info);
- }
-
- auto ofm_tensor = env->tensorAt(ofm_index);
- UNUSED_RELEASE(ofm_tensor);
-
- // Handle same ifm & ofm data type only
- if (ifm_tensor->data_type() != ofm_tensor->data_type())
- {
- throw std::runtime_error{"Interp(TConv): Different I/O data dype"};
- }
-
- if (ofm_tensor->num_dimensions() != 4)
- {
- throw std::runtime_error{"Interp(TConv): Invalid output rank"};
- }
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *ofm_tensor,
- const ir::operation::TransposeConv::Param &param)
-{
- const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
- const auto ker_shape = ker_tensor->tensorInfo().shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
- const auto padding = neurun::util::calculatePadding(param.padding, ofm_shape, ifm_shape,
- param.stride, ker_width, ker_height);
-
- nnfw::cker::TransposeConvParams cker_param;
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
- cker_param.dilation_width_factor = 1;
- cker_param.dilation_height_factor = 1;
-
- const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
- const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
- const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
- const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
- const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
- float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
-
- nnfw::cker::TransposeConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
- cker_ofm_shape, ofm_ptr);
-}
-
-void invokeTransposeConv(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &tconv_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::TransposeConv &>(node);
-
- const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto ofm_tensor = env->tensorAt(ofm_index);
-
- const auto data_type = ifm_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(ifm_tensor, ker_tensor, ofm_tensor, tconv_node.param());
- }
- else
- {
- throw std::runtime_error{"Interp(TConv): Support float32 only"};
- }
-}
-
-} // namespace transposeconv
-
-OpKernel *getTransposeConv()
-{
- static OpKernel kernel = {prepareTransposeConv, invokeTransposeConv};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/exec/interp/operations/UnaryActivations.cc b/runtime/neurun/core/src/exec/interp/operations/UnaryActivations.cc
deleted file mode 100644
index 116806fc4..000000000
--- a/runtime/neurun/core/src/exec/interp/operations/UnaryActivations.cc
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cmath>
-
-#include "OperationUtil.h"
-
-#include "exec/interp/Registration.h"
-
-#include "ir/operation/ReLU.h"
-#include "ir/operation/ReLU1.h"
-#include "ir/operation/ReLU6.h"
-#include "ir/operation/Tanh.h"
-
-namespace neurun
-{
-namespace exec
-{
-namespace interp
-{
-namespace
-{
-
-enum class ActivationType
-{
- ReLU,
- ReLU1,
- ReLU6,
- Tanh
-};
-
-void prepare(ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(0);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
-
- const auto output_info = env->graph().operands().at(output_index).info();
- if (output_info.total_size() == 0)
- {
- // Output's shape and type is same with input
- auto input_info = input_tensor->tensorInfo();
- // We can handle already allocated (ex. model output)
- env->allocateIfNeeded(output_index, input_info);
- }
- else
- {
- env->allocateIfNeeded(output_index, output_info);
- }
-
- const auto output_tensor = env->tensorAt(output_index);
- // Check shape and type lhs is same with output
- // TODO Util function to compare TensorInfo
- if (input_tensor->data_type() != output_tensor->data_type())
- {
- throw std::runtime_error{"Interp(Activations): Invalid output type"};
- }
-}
-
-template <ActivationType act_type>
-void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements)
-{
- std::function<float(const float &)> fn = [](const float &) { return std::nanf(""); };
- switch (act_type)
- {
- case ActivationType::ReLU:
- fn = [](const float &in) { return std::max(0.f, in); };
- break;
- case ActivationType::ReLU1:
- fn = [](const float &in) { return std::min(std::max(-1.f, in), 1.f); };
- break;
- case ActivationType::ReLU6:
- fn = [](const float &in) { return std::min(std::max(0.f, in), 6.f); };
- break;
- case ActivationType::Tanh:
- fn = [](const float &in) { return std::tanh(in); };
- break;
- default:
- throw std::runtime_error{"Interp(Activations): NYI - Unsupported activation"};
- break;
- }
-
- const float *input_end = input_ptr + num_elements;
- for (; input_ptr < input_end; input_ptr++, output_ptr++)
- {
- *output_ptr = fn(*input_ptr);
- }
-}
-
-template <ActivationType act_type> void invoke(const ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(0);
- const auto output_index = node.getOutputs().at(0);
-
- // Check lhs shape is same with rhs (with broadcast)
- const auto input_tensor = env->tensorAt(input_index);
- const auto output_tensor = env->tensorAt(output_index);
-
- const auto data_type = input_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- uint64_t elements = input_tensor->num_elements();
- const float *input_start = reinterpret_cast<const float *>(input_tensor->bufferRO());
- float *out = reinterpret_cast<float *>(output_tensor->buffer());
-
- evalFloat<act_type>(input_start, out, elements);
- }
- else
- {
- throw std::runtime_error{"Interp(ReLU6): NYI - Support float only"};
- }
-}
-
-} // namespace
-
-OpKernel *getReLU()
-{
- static OpKernel kernel = {prepare, invoke<ActivationType::ReLU>};
- return &kernel;
-}
-
-OpKernel *getReLU1()
-{
- static OpKernel kernel = {prepare, invoke<ActivationType::ReLU1>};
- return &kernel;
-}
-
-OpKernel *getReLU6()
-{
- static OpKernel kernel = {prepare, invoke<ActivationType::ReLU6>};
- return &kernel;
-}
-
-OpKernel *getTanh()
-{
- static OpKernel kernel = {prepare, invoke<ActivationType::Tanh>};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace exec
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/Graph.cc b/runtime/neurun/core/src/ir/Graph.cc
deleted file mode 100644
index a84ebb68b..000000000
--- a/runtime/neurun/core/src/ir/Graph.cc
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/Graph.h"
-
-#include <algorithm>
-#include <bitset>
-#include <sstream>
-
-#include "util/logging.h"
-#include "verifier/Verifier.h"
-#include "cpp14/memory.h"
-#include "ir/operation/LowerInfo.h"
-#include "ir/operand/LowerInfo.h"
-#include "ir/operand/PermuteFactor.h"
-#include "ir/GraphIterator.h"
-#include "operand/Shape4DConvert.h"
-#include "compiler/BackendResolver.h"
-#include "backend/IConfig.h"
-#include "pass/ConstantInsertionPass.h"
-#include "pass/PermutationInsertionPass.h"
-#include "pass/PermutationEliminationPass.h"
-#include "pass/PermutationOperationPass.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-Graph::Graph() = default;
-
-Graph::~Graph(void) = default;
-
-OperandIndex Graph::addOperand(const Shape &shape, const TypeInfo &type)
-{
- return _operands.emplace(shape, type);
-}
-
-OperationIndex Graph::addOperation(std::unique_ptr<Operation> &&node)
-{
- assert(isBuildingPhase());
- return _operations.push(std::move(node));
-}
-
-void Graph::setOperandValue(const OperandIndex &ind, std::unique_ptr<Data> &&data)
-{
- assert(isBuildingPhase());
- assert(_operands.exist(ind));
- _operands.at(ind).data(std::move(data));
-}
-
-void Graph::addInput(const OperandIndex &ind)
-{
- assert(isBuildingPhase());
- _inputs.append(ind);
-}
-
-void Graph::addOutput(const OperandIndex &ind)
-{
- assert(isBuildingPhase());
- _outputs.append(ind);
-}
-
-void Graph::finishBuilding(void)
-{
- assert(isBuildingPhase());
- _phase = Phase::MODEL;
-
- // Initialize operand use-def
- initializeUseDef();
-
- // Call graph verifications for the MODEL phase
- {
- assert(verifier::DAGChecker().verify(*this));
- assert(verifier::EdgeConsistencyChecker().verify(*this));
- }
-}
-
-void Graph::lower(void)
-{
- assert(_phase == Phase::MODEL);
-
- _op_seqs = nnfw::cpp14::make_unique<Subgraphs>();
-
- // Lower
- {
- // operand::LowerInfo holder
- OperandIndexMap<std::unique_ptr<operand::LowerInfo>> operands_lower_info;
-
- _operands.iterate([&](const OperandIndex &index, const Operand &object) {
- operands_lower_info[index] =
- nnfw::cpp14::make_unique<operand::LowerInfo>(operand::asShape4D(object.shape()));
- });
-
- _lower_info_map = nnfw::cpp14::make_unique<LowerInfoMap>();
-
- // Make subgraphs while checking whether a node can be merged into a op_seq.
- makeSubgraphs(operands_lower_info);
-
- _op_seqs->iterate([&](const SubgraphIndex &, OpSequence &subg) {
- assert(subg.operations().size() > 0);
- std::reverse(std::begin(subg.operations()), std::end(subg.operations()));
- });
-
- _op_seqs->dump("merged and sorted operations without permutation");
-
- pass::ConstantInsertionPass ci_pass(*this);
- ci_pass.run();
-
- // Set LowerInfo for each operand from the operand::LowerInfo holder
- manipulateLowerInfo(operands_lower_info);
-
- dumpLowerInfo();
- }
-
- // Run Permutation Passes
- {
- pass::PermutationOperationPass po_pass(*this);
- po_pass.run();
-
- pass::PermutationInsertionPass pi_pass(*this);
- pi_pass.run();
- // Implemented code no longer works.
- // pass::PermutationEliminationPass pe_pass(*this);
- // pe_pass.run();
-
- // TODO merge perm subgraphs if possible
- _op_seqs->dump("merged and sorted operations with permutation");
- }
-
- // Graph verifications for the LOWERED phase
- {
- assert(verifier::DAGChecker().verify(*this));
- assert(verifier::EdgeConsistencyChecker().verify(*this));
- }
-}
-
-void Graph::initializeUseDef()
-{
- operations().iterate([&](const OperationIndex &index, const Operation &node) -> void {
- auto outputs = node.getOutputs();
- for (auto output : outputs)
- {
- operands().at(output).appendDef(index);
- }
-
- auto inputs = node.getInputs();
- for (auto input : inputs)
- {
- operands().at(input).appendUse(index);
- }
- });
-}
-
-const operation::LowerInfo *Graph::getLowerInfo(const SubgraphIndex &subg_index) const
-{
- if (!_lower_info_map)
- return nullptr;
- auto itr = _lower_info_map->operation.find(subg_index);
- if (itr == _lower_info_map->operation.end())
- return nullptr;
- return itr->second.get();
-}
-
-void Graph::setLowerInfo(const SubgraphIndex &subg_index,
- std::unique_ptr<operation::LowerInfo> &&lower_info)
-{
- assert(_lower_info_map);
- _lower_info_map->operation.insert(std::make_pair(subg_index, std::move(lower_info)));
-}
-
-void Graph::removeLowerInfo(const SubgraphIndex &subg_index)
-{
- auto &subg_lower_info = _lower_info_map->operation;
- assert(subg_lower_info.find(subg_index) != subg_lower_info.end());
- for (auto it = subg_lower_info.begin(); it != subg_lower_info.end(); ++it)
- {
- if (it->first == subg_index)
- {
- subg_lower_info.erase(it);
- break;
- }
- }
-}
-
-const operand::LowerInfo *Graph::getLowerInfo(const OperandIndex &index) const
-{
- if (!_lower_info_map)
- return nullptr;
- auto itr = _lower_info_map->operand.find(index);
- if (itr == _lower_info_map->operand.end())
- return nullptr;
- return itr->second.get();
-}
-
-operand::LowerInfo *Graph::getLowerInfo(const OperandIndex &index)
-{
- if (!_lower_info_map)
- return nullptr;
- auto itr = _lower_info_map->operand.find(index);
- if (itr == _lower_info_map->operand.end())
- return nullptr;
- return itr->second.get();
-}
-
-void Graph::setLowerInfo(const OperandIndex &index,
- std::unique_ptr<operand::LowerInfo> &&lower_info)
-{
- assert(_lower_info_map);
- _lower_info_map->operand.insert(std::make_pair(index, std::move(lower_info)));
-}
-
-void Graph::removeLowerInfo(const OperandIndex &index) { _lower_info_map->operand.erase(index); }
-
-void Graph::makeSubgraphs(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info)
-{
- // if SUBG_MAX_NODE == 0, no limit on nodes of a op_seq
- const int subg_max_node = util::getConfigInt(util::config::SUBG_MAX_NODE);
- assert(subg_max_node >= 0);
-
- bool is_profiling = util::getConfigBool(util::config::PROFILING_MODE);
- OpSequence *subg = nullptr;
- SubgraphIndex subg_index;
-
- // NOTE: The below method appends nodes while making one op_seq if needed. If something better
- // ways, happy to update this code.
- PostDfsConstIterator{}.iterate(*this, [&](const OperationIndex &node_index,
- const Operation &node) {
- // LowerInfo for in/output operands
- auto backend = _backend_resolver->getBackend(node_index);
-
- // TODO How to get frontend layout of this node from IR
- auto frontend_layout = Layout::NHWC;
- auto backend_layout = frontend_layout;
-
- // The layout of each backend should be set at another place
- // TODO Change setting layout of each backend at another place
- // TODO Remove getting id of backend
- if (backend->config()->id() == "acl_cl" || backend->config()->id() == "acl_neon")
- {
- const std::string acl_layout_str = util::getConfigString(util::config::ACL_LAYOUT);
- if (acl_layout_str == "NHWC")
- {
- backend_layout = Layout::NHWC;
- }
- else if (acl_layout_str == "NCHW")
- {
- backend_layout = Layout::NCHW;
- }
- }
- else if (backend->config()->id() == "srcn")
- {
- const std::string ncnn_layout_str = util::getConfigString(util::config::NCNN_LAYOUT);
- if (ncnn_layout_str == "NHWC")
- {
- backend_layout = Layout::NHWC;
- }
- else if (ncnn_layout_str == "NCHW")
- {
- backend_layout = Layout::NCHW;
- }
- }
- else if (backend->config()->id() == "cpu")
- {
- backend_layout = Layout::NHWC;
- }
-
- for (auto operand : node.getInputs())
- {
- auto &&lower_info = operands_lower_info.at(operand);
- lower_info->addUsePermuteFactor(operand::PermuteFactor{backend, backend_layout});
- }
- for (auto operand : node.getOutputs())
- {
- auto &&lower_info = operands_lower_info.at(operand);
- lower_info->addDefPermuteFactor(operand::PermuteFactor{backend, backend_layout});
- }
-
- bool new_subg =
- (subg == nullptr ||
- (subg_max_node != 0 && subg->operations().size() >= static_cast<size_t>(subg_max_node)));
-
- // for profiling each op_seq must contain just one node,
- // so that we can measure a node separately
- if (new_subg || is_profiling || !mergeable(subg_index, node_index, backend_layout))
- {
- auto new_subg_index = appendFreshSingleOpSubgraph(node_index, node, frontend_layout);
-
- // OpSequence LowerInfo
- setLowerInfo(new_subg_index,
- nnfw::cpp14::make_unique<operation::LowerInfo>(backend, backend_layout));
-
- subg_index = new_subg_index;
- subg = &(_op_seqs->at(new_subg_index));
-
- VERBOSE(Lower) << "SUBG#" << subg_index.value() << " is created for "
- << "NODE#" << node_index.value() << "(" << node.name() << ")" << std::endl;
- }
- else
- {
- subg->appendOperation(node_index, node);
- subg->setInputs(node.getInputs());
-
- VERBOSE(Lower) << "SUBG#" << subg_index.value() << " merges "
- << "NODE#" << node_index.value() << "(" << node.name() << ")" << std::endl;
- }
- });
-}
-
-void Graph::manipulateLowerInfo(
- OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info)
-{
- const auto default_backend = backend::BackendManager::get().getDefault();
- for (auto index : _inputs)
- {
- // Pick just any one from the uses, here the first one is chosen
- // For the other uses, Permute operations will be inserted later
- auto &&lower_info = operands_lower_info.at(index);
- assert(lower_info->use_factors().size() > 0);
- lower_info->addDefPermuteFactor(*lower_info->use_factors().begin());
- }
- for (auto index : _outputs)
- {
- auto &&lower_info = operands_lower_info.at(index);
- if (_operands.at(index).isConstant())
- {
- lower_info->addDefPermuteFactor(operand::PermuteFactor{
- default_backend,
- Layout::NHWC // TODO Get frontend layout of this node from IR
- });
- }
- }
-
- // Set LowerInfo for each operand from the operand::LowerInfo holder
- _operands.iterate([&](const OperandIndex &index, Operand &) {
- setLowerInfo(index, std::move(operands_lower_info[index]));
- });
-}
-
-void Graph::dumpLowerInfo()
-{
- if (::neurun::util::logging::ctx.enabled() == false)
- return;
-
- std::map<uint32_t, std::string> dumps;
-
- _operands.iterate([&](const OperandIndex &index, Operand &object) {
- std::stringstream sstream;
- if (!getLowerInfo(index)->def_factors().empty() || !getLowerInfo(index)->use_factors().empty())
- {
- auto factors_to_string = [](const operand::PermuteFactorSet &factors) {
- std::string str;
- for (auto factor : factors)
- {
- str += factor.backend()->config()->id();
- str += "(" + to_string(factor.layout()) + ")";
- str += " ";
- }
- return "{ " + str + "}";
- };
-
- auto operation_index_to_string = [](const OperationIndexList &operations) {
- std::string str;
- for (auto op : operations.list())
- {
- str += std::to_string(op.value());
- str += " ";
- }
- return "{ " + str + "}";
- };
-
- const auto lower_info = getLowerInfo(index);
- const auto &shape = object.shape();
- const auto &lower_shape = lower_info->shape();
- std::string def_ops = operation_index_to_string(object.getDef());
- std::string use_ops = operation_index_to_string(object.getUses());
- std::string def_layouts = factors_to_string(lower_info->def_factors());
- std::string use_layouts = factors_to_string(lower_info->use_factors());
- sstream << "Operand #" << index.value() << " LowerInfo" << std::endl;
- sstream << " - Shape : { " << (shape.rank() > 0 ? shape.dim(0) : 0) << " "
- << (shape.rank() > 1 ? shape.dim(1) : 0) << " "
- << (shape.rank() > 2 ? shape.dim(2) : 0) << " "
- << (shape.rank() > 3 ? shape.dim(3) : 0) << " "
- << "}" << std::endl;
- sstream << " - Def Operations : " << def_ops << std::endl;
- sstream << " - Use Operations : " << use_ops << std::endl;
- sstream << " - Lower Info" << std::endl;
- sstream << " - 4D Shape (NHWC) : { " << lower_shape.n() << " " << lower_shape.h() << " "
- << lower_shape.w() << " " << lower_shape.c() << " "
- << "}" << std::endl;
- sstream << " - Def Backends : " << def_layouts << std::endl;
- sstream << " - Use Backends : " << use_layouts << std::endl;
- }
- dumps.emplace(index.value(), sstream.str());
- });
-
- for (const auto &e : dumps)
- {
- if (!e.second.empty())
- {
- VERBOSE(Lower) << e.second;
- }
- }
-}
-
-bool Graph::mergeable(const SubgraphIndex &subg_index, const OperationIndex &node_index,
- Layout layout)
-{
- // Are they mergeable?
- // 1. the same backend id and layout?
- // 2. Is op_seq or node branched?
- // 3. if 1 is true, the subg and a node are connected?
- const auto &subg = _op_seqs->at(subg_index);
- const auto &node = _operations.at(node_index);
-
- // The same backend id and layout?
- {
- const auto subg_backend_layout = getLowerInfo(subg_index)->layout();
- const auto &subg_backend_id = getLowerInfo(subg_index)->backend()->config()->id();
- const auto &node_backend_id = _backend_resolver->getBackend(node_index)->config()->id();
- VERBOSE(Lower) << "SUBG#" << subg_index.value() << " { " << subg_backend_id << "("
- << to_string(subg_backend_layout) << ") } "
- << " NODE#" << node_index.value() << " (" << node.name() << ") { "
- << node_backend_id << "(" << to_string(layout) << ") } " << std::endl;
- if (subg_backend_id != node_backend_id || subg_backend_layout != layout)
- return false;
- }
-
- // Branched?
- {
- std::unordered_set<OperationIndex> branched_set;
-
- // Check for branching up
- const auto &inputs = subg.getInputs();
- for (const auto &input : inputs)
- {
- const auto &input_obj = _operands.at(input);
- for (const auto &def : input_obj.getDef().list())
- {
- branched_set.insert(def);
- if (branched_set.size() > 1)
- {
- return false;
- }
- }
- }
- branched_set.clear();
-
- // Check for branching down
- const auto &outputs = node.getOutputs();
- for (const auto &output : outputs)
- {
- const auto &output_obj = _operands.at(output);
- for (const auto &use : output_obj.getUses().list())
- {
- branched_set.insert(use);
- if (branched_set.size() > 1)
- {
- return false;
- }
- }
- }
- }
-
- // Connected?
- // an input of one node is an output of the other node? or vice-versa?
- {
- const auto &node_inputs = node.getInputs();
- const auto &node_outputs = node.getOutputs();
-
- // subg's operations are in order so that we just check the first and the last
- std::vector<Element> subg_ops{subg.operations()[0]};
- if (subg.operations().size() > 1)
- subg_ops.emplace_back(subg.operations()[subg.operations().size() - 1]);
-
- for (const auto &elem : subg_ops)
- {
- const auto &n_index = elem.index;
- const auto &n = *elem.node;
-
- // node's output == subg's input?
- const auto &n_inputs = n.getInputs();
- for (auto input : n_inputs)
- {
- if (node_outputs.contains(input))
- {
- VERBOSE(Lower) << "SUBG#" << subg_index.value() << " 's NODE#" << n_index.value() << "("
- << n.name() << ") is connected to NODE#" << node_index.value() << "("
- << node.name() << ")" << std::endl;
- return true;
- }
- }
-
- // node's input == subg's output?
- const auto &n_outputs = n.getOutputs();
- for (auto output : n_outputs)
- {
- if (node_inputs.contains(output))
- {
- VERBOSE(Lower) << "SUBG#" << subg_index.value() << " 's NODE#" << n_index.value() << " ("
- << n.name() << ") is connected to NODE#" << node_index.value()
- << std::endl;
- return true;
- }
- }
- }
-
- VERBOSE(Lower) << "SUBG#" << subg_index.value() << " is not connected to NODE#"
- << node_index.value() << "(" << node.name() << ")" << std::endl;
- }
-
- return false;
-}
-
-SubgraphIndex Graph::appendFreshSingleOpSubgraph(const OperationIndex &node_index,
- const Operation &node, Layout layout)
-{
- // Create a fresh op_seq with one operation, and append it to subgraphs
- // Create a fresh op_seq
- auto subg = nnfw::cpp14::make_unique<OpSequence>(layout);
-
- // Add an operation
- subg->appendOperation(node_index, node);
-
- // Update input/output
- subg->setOutputs(node.getOutputs());
- subg->setInputs(node.getInputs());
-
- return _op_seqs->emplace(std::move(subg));
-}
-
-void Graph::setBackendResolver(std::unique_ptr<compiler::BackendResolver> &&br)
-{
- _backend_resolver = std::move(br);
-}
-
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/GraphIterator.cc b/runtime/neurun/core/src/ir/GraphIterator.cc
deleted file mode 100644
index ce20787ae..000000000
--- a/runtime/neurun/core/src/ir/GraphIterator.cc
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GraphIterator.h"
-
-#include "ir/OperationIndexMap.h"
-#include "ir/Graph.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-// Explicit instantiations to have implementation in the source file.
-
-template class DefaultIterator<true>;
-template class DefaultIterator<false>;
-
-template class PostDfsIterator<true>;
-template class PostDfsIterator<false>;
-
-//
-// Graph::DefaultIterator
-//
-
-template <bool is_const>
-void DefaultIterator<is_const>::iterate(GraphRef graph, const IterFn &fn) const
-{
- graph.operations().iterate(
- [&](const OperationIndex &index, NodeRef node) -> void { fn(index, node); });
-}
-
-//
-// Graph::PostDfsIterator
-//
-
-template <bool is_const>
-void PostDfsIterator<is_const>::iterate(GraphRef graph, const IterFn &fn) const
-{
- assert(!graph.isBuildingPhase()); // Restrict iteration condition
-
- OperationIndexMap<bool> visited;
- graph.operations().iterate([&](const OperationIndex &index, NodeRef) { visited[index] = false; });
-
- std::function<void(const OperationIndex &, NodeRef)> dfs_recursive =
- [&](const OperationIndex &index, NodeRef node) -> void {
- if (visited[index])
- return;
- visited[index] = true;
-
- for (auto output : node.getOutputs())
- {
- const auto &operand = graph.operands().at(output);
- for (const auto &use : operand.getUses().list())
- {
- dfs_recursive(use, graph.operations().at(use));
- }
- }
-
- fn(index, node);
- };
-
- graph.operations().iterate(dfs_recursive);
-
- // All of the operations(nodes) must have been visited.
- assert(std::all_of(visited.begin(), visited.end(),
- [](const std::pair<const OperationIndex, bool> &v) { return v.second; }));
-}
-
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/GraphIterator.h b/runtime/neurun/core/src/ir/GraphIterator.h
deleted file mode 100644
index a5bf1c323..000000000
--- a/runtime/neurun/core/src/ir/GraphIterator.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_GRAPH_ITERATOR_H__
-#define __NEURUN_IR_GRAPH_ITERATOR_H__
-
-#include <type_traits>
-
-#include "ir/Index.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-class Graph;
-class Operation;
-
-template <bool is_const> class Iterator
-{
-public:
- using GraphRef = typename std::conditional<is_const, const Graph &, Graph &>::type;
- using IndexRef = const OperationIndex &;
- using NodeRef = typename std::conditional<is_const, const Operation &, Operation &>::type;
- using IterFn = std::function<void(IndexRef, NodeRef)>;
-
-public:
- virtual ~Iterator() = default;
- virtual void iterate(GraphRef graph, const IterFn &fn) const = 0;
-};
-
-template <bool is_const = false> class DefaultIterator final : public Iterator<is_const>
-{
-public:
- using GraphRef = typename Iterator<is_const>::GraphRef;
- using IndexRef = typename Iterator<is_const>::IndexRef;
- using NodeRef = typename Iterator<is_const>::NodeRef;
- using IterFn = typename Iterator<is_const>::IterFn;
-
-public:
- void iterate(GraphRef graph, const IterFn &fn) const;
-};
-using DefaultConstIterator = DefaultIterator<true>;
-
-template <bool is_const = false> class PostDfsIterator final : public Iterator<is_const>
-{
-public:
- using GraphRef = typename Iterator<is_const>::GraphRef;
- using IndexRef = typename Iterator<is_const>::IndexRef;
- using NodeRef = typename Iterator<is_const>::NodeRef;
- using IterFn = typename Iterator<is_const>::IterFn;
-
-public:
- void iterate(GraphRef graph, const IterFn &fn) const;
-};
-using PostDfsConstIterator = PostDfsIterator<true>;
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_GRAPH_ITERATOR_H__
diff --git a/runtime/neurun/core/src/ir/LayoutSet.cc b/runtime/neurun/core/src/ir/LayoutSet.cc
deleted file mode 100644
index 025ba45dc..000000000
--- a/runtime/neurun/core/src/ir/LayoutSet.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LayoutSet.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-LayoutSet::LayoutSet(std::initializer_list<Layout> layouts)
-{
- for (auto layout : layouts)
- {
- _set.insert(layout);
- }
-}
-
-LayoutSet LayoutSet::operator|(const LayoutSet &other) const
-{
- auto ret = *this;
- for (auto layout : other)
- {
- ret.add(layout);
- }
- return ret;
-}
-
-LayoutSet LayoutSet::operator&(const LayoutSet &other) const
-{
- LayoutSet ret;
- for (auto layout : other)
- {
- if (contains(layout))
- {
- ret.add(layout);
- }
- }
- return ret;
-}
-
-LayoutSet LayoutSet::operator-(const LayoutSet &other) const
-{
- auto ret = *this;
- for (auto layout : other)
- {
- ret.remove(layout);
- }
- return ret;
-}
-
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/LayoutSet.h b/runtime/neurun/core/src/ir/LayoutSet.h
deleted file mode 100644
index e38ef3ce2..000000000
--- a/runtime/neurun/core/src/ir/LayoutSet.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_IR_LAYOUT_SET_H__
-#define __NEURUN_IR_LAYOUT_SET_H__
-
-#include <initializer_list>
-#include <unordered_set>
-
-#include "ir/Layout.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-class LayoutSet
-{
-public:
- LayoutSet() = default;
- LayoutSet(std::initializer_list<Layout> layouts);
-
-public:
- void add(const Layout &layout) { _set.insert(layout); }
- void remove(const Layout &layout) { _set.erase(layout); }
- uint32_t size() const { return static_cast<uint32_t>(_set.size()); }
- bool contains(const Layout &layout) const { return _set.find(layout) != _set.end(); }
-
-public:
- LayoutSet operator|(const LayoutSet &other) const; // Union
- LayoutSet operator&(const LayoutSet &other) const; // Intersect
- LayoutSet operator-(const LayoutSet &other) const; // Minus
-
-public:
- std::unordered_set<Layout>::const_iterator begin() const { return _set.begin(); }
- std::unordered_set<Layout>::const_iterator end() const { return _set.end(); }
-
-private:
- std::unordered_set<Layout> _set;
-};
-
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_IR_LAYOUT_SET_H__
diff --git a/runtime/neurun/core/src/ir/OpCode.cc b/runtime/neurun/core/src/ir/OpCode.cc
deleted file mode 100644
index e6552a275..000000000
--- a/runtime/neurun/core/src/ir/OpCode.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/OpCode.h"
-
-#include <unordered_map>
-
-namespace neurun
-{
-namespace ir
-{
-
-const char *toString(OpCode opcode)
-{
- static const std::unordered_map<OpCode, const char *> map{{OpCode::Invalid, "Invalid"},
-#define OP(Name) {OpCode::Name, #Name},
-#include "ir/Operations.lst"
-#undef OP
- {OpCode::COUNT, "COUNT"}};
- return map.at(opcode);
-}
-
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/OpSequence.cc b/runtime/neurun/core/src/ir/OpSequence.cc
deleted file mode 100644
index 13a6cbe27..000000000
--- a/runtime/neurun/core/src/ir/OpSequence.cc
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/OpSequence.h"
-#include "ir/OperationVisitor.h"
-#include <sstream>
-
-namespace neurun
-{
-namespace ir
-{
-
-OpSequence::OpSequence(Layout layout) : _layout{layout}
-{
- // DO NOTHING
-}
-
-void OpSequence::accept(OperationVisitor &v) const { v.visit(*this); }
-
-// TODO: Impl Dumper instead of this method
-std::string OpSequence::getStr() const
-{
- // " OpSequence IN(xx,xx,xx) -> { op0, op1, op2 } -> OUT(yy,yy,yy)"
- std::stringstream ss;
- ss << " OpSequence IN(";
- for (const auto &index : getInputs())
- {
- ss << " " << index.value();
- }
- ss << " ) -> {";
- for (const auto &elem : _operations)
- {
- ss << " " << elem.index.value() << "(" << elem.node->name() << ")";
- }
- ss << " } -> OUT(";
- for (const auto &index : getOutputs())
- {
- ss << " " << index.value();
- }
- ss << " )";
- return ss.str();
-}
-
-void OpSequence::remove(const OperationIndex &index)
-{
- assert(exist(index));
- for (auto it = _operations.cbegin(); it != _operations.cend(); ++it)
- {
- if (it->index == index)
- {
- _operations.erase(it);
- break;
- }
- }
-}
-
-bool OpSequence::exist(const OperationIndex &index) const
-{
- for (const auto &element : _operations)
- {
- if (element.index == index)
- {
- return true;
- }
- }
- return false;
-}
-
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/Operand.cc b/runtime/neurun/core/src/ir/Operand.cc
deleted file mode 100644
index 335dd17b9..000000000
--- a/runtime/neurun/core/src/ir/Operand.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/Operand.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-size_t Operand::operandSize(void) const
-{
- const uint32_t ranks = shape().rank();
- int32_t elements = 1;
-
- for (uint32_t rank = 0; rank < ranks; rank++)
- {
- elements *= shape().dim(rank);
- }
-
- DataType type = typeInfo().type();
- size_t element_size = sizeOfDataType(type);
-
- // Value of type is matched with OperandCode enum in NeuralNetworks.h
- return element_size * elements;
-}
-
-void Operand::appendUse(const OperationIndex &idx) { _uses.append(idx); }
-
-void Operand::removeUse(const OperationIndex &idx) { _uses.remove(idx); }
-
-void Operand::appendDef(const OperationIndex &idx)
-{
- assert(!isConstant());
- assert(_def.size() == 0);
-
- _def.append(idx);
-}
-
-void Operand::removeDef(const OperationIndex &idx)
-{
- assert(_def.contains(idx));
-
- _def.remove(idx);
-}
-
-void Operand::parent_info(std::unique_ptr<operand::ParentInfo> &&parent_info)
-{
- _parent_info = std::move(parent_info);
-}
-
-const operand::ParentInfo *Operand::parent_info() const { return _parent_info.get(); }
-
-operand::ParentInfo *Operand::parent_info() { return _parent_info.get(); }
-
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/OperandIndexSequence.cc b/runtime/neurun/core/src/ir/OperandIndexSequence.cc
deleted file mode 100644
index 302444125..000000000
--- a/runtime/neurun/core/src/ir/OperandIndexSequence.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/OperandIndexSequence.h"
-
-#include <algorithm>
-
-namespace neurun
-{
-namespace ir
-{
-
-OperandIndexSequence::OperandIndexSequence(std::initializer_list<OperandIndex> list) : _set(list)
-{
- // DO NOTHING
-}
-
-OperandIndexSequence::OperandIndexSequence(std::initializer_list<int32_t> list)
-{
- for (auto val : list)
- {
- _set.emplace_back(static_cast<uint32_t>(val));
- }
-}
-
-OperandIndexSequence::OperandIndexSequence(std::initializer_list<uint32_t> list)
-{
- for (auto val : list)
- {
- _set.emplace_back(val);
- }
-}
-
-bool OperandIndexSequence::contains(const OperandIndex &index) const
-{
- return std::find(_set.begin(), _set.end(), index) != _set.end();
-}
-
-void OperandIndexSequence::replace(const OperandIndex &from, const OperandIndex &to)
-{
- std::replace(_set.begin(), _set.end(), from, to);
-}
-
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/Operation.cc b/runtime/neurun/core/src/ir/Operation.cc
deleted file mode 100644
index 3e4b606f2..000000000
--- a/runtime/neurun/core/src/ir/Operation.cc
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/Operation.h"
-
-#include <cassert>
-
-namespace neurun
-{
-namespace ir
-{
-
-Operation::Operation(OperandConstraint input_constr, const OperandIndexSequence &inputs,
- const OperandIndexSequence &outputs)
- : _input_constr{input_constr}, _inputs{inputs}, _outputs{outputs}
-{
-}
-
-Operation::Operation(OperandConstraint input_constr) : _input_constr{input_constr} {}
-
-Operation::~Operation() = default;
-
-void Operation::setInputs(const OperandIndexSequence &indexes)
-{
- assert(_input_constr.check(indexes.size()));
- _inputs = indexes;
-}
-
-void Operation::setOutputs(const OperandIndexSequence &indexes) { _outputs = indexes; }
-
-void Operation::replaceInput(const OperandIndex &from, const OperandIndex &to)
-{
- _inputs.replace(from, to);
-}
-
-void Operation::replaceOutput(const OperandIndex &from, const OperandIndex &to)
-{
- _outputs.replace(from, to);
-}
-
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/OperationIndexList.cc b/runtime/neurun/core/src/ir/OperationIndexList.cc
deleted file mode 100644
index 261cc5ce6..000000000
--- a/runtime/neurun/core/src/ir/OperationIndexList.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/OperationIndexList.h"
-
-#include <algorithm>
-
-namespace neurun
-{
-namespace ir
-{
-
-OperationIndexList::OperationIndexList(std::initializer_list<OperationIndex> list) : _list(list)
-{
- // DO NOTHING
-}
-
-bool OperationIndexList::contains(const OperationIndex &index) const
-{
- return std::find(_list.begin(), _list.end(), index) != _list.end();
-}
-
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/Shape.cc b/runtime/neurun/core/src/ir/Shape.cc
deleted file mode 100644
index 2679f83c6..000000000
--- a/runtime/neurun/core/src/ir/Shape.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/Shape.h"
-#include "util/Utils.h"
-
-#include <cassert>
-#include <functional>
-#include <numeric>
-
-namespace neurun
-{
-namespace ir
-{
-
-FeatureShape Shape::asFeature(Layout layout) const
-{
- assert(rank() == 4);
-
- if (layout == Layout::NHWC)
- {
- // Feature Map in NHWC layout
- // - Dimension(0) -> Batch
- // - Dimension(1) -> Height
- // - Dimension(2) -> Width
- // - Dimension(3) -> Depth
- const auto batch = dim(0);
- const auto depth = dim(3);
- const auto height = dim(1);
- const auto width = dim(2);
-
- return {batch, depth, height, width};
- }
- else if (layout == Layout::NCHW)
- {
- // Feature Map in NHWC layout
- // - Dimension(0) -> Batch
- // - Dimension(1) -> Depth
- // - Dimension(2) -> Height
- // - Dimension(3) -> Width
- const auto batch = dim(0);
- const auto depth = dim(1);
- const auto height = dim(2);
- const auto width = dim(3);
-
- return {batch, depth, height, width};
- }
- else
- {
- throw std::runtime_error("Wrong Layout");
- }
-}
-
-// Extended dimension is filled with 1.
-void Shape::extendRank(int to_rank)
-{
- assert(to_rank - rank() >= 0);
- _dimensions.insert(_dimensions.cbegin(), to_rank - rank(), 1);
-}
-
-uint64_t Shape::num_elements() const
-{
- // All of the nodes must have non-negative dimension
- assert(std::all_of(_dimensions.begin(), _dimensions.end(),
- [](const int32_t &v) { return (v >= 0); }));
-
- return std::accumulate(_dimensions.cbegin(), _dimensions.cend(), UINT64_C(1),
- std::multiplies<uint64_t>());
-}
-
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/Subgraphs.cc b/runtime/neurun/core/src/ir/Subgraphs.cc
deleted file mode 100644
index 780fc8c28..000000000
--- a/runtime/neurun/core/src/ir/Subgraphs.cc
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/Subgraphs.h"
-#include "util/logging.h"
-#include "cpp14/memory.h"
-
-#include <cassert>
-#include <string>
-
-namespace neurun
-{
-namespace ir
-{
-
-SubgraphIndex Subgraphs::emplace(const OperationIndex &index, const Operation &node, Layout layout)
-{
- std::unique_ptr<OpSequence> subg = nnfw::cpp14::make_unique<OpSequence>(layout);
- subg->appendOperation(index, node);
- return push(std::move(subg));
-}
-
-SubgraphIndex Subgraphs::emplace(std::unique_ptr<OpSequence> &&subg)
-{
- return push(std::move(subg));
-}
-
-bool Subgraphs::containsOperation(const OperationIndex &operation_index) const
-{
- return findOperation(operation_index).valid();
-}
-
-SubgraphIndex Subgraphs::getOperation(const OperationIndex &operation_index) const
-{
- SubgraphIndex ret = findOperation(operation_index);
- assert(ret.valid());
- return ret;
-}
-
-// TODO: Extract this into external helper function
-void Subgraphs::dump(const std::string &msg) const
-{
- VERBOSE(Subgraphs) << "Subgraphs(" << msg << ")" << std::endl;
- iterate([&](const SubgraphIndex &idx, const OpSequence &subg) {
- VERBOSE(Subgraphs) << idx.value() << "] " << subg.getStr() << std::endl;
- });
-}
-
-void Subgraphs::removeFromSubgraph(const OperationIndex &operation_index)
-{
- const auto subg_index = findOperation(operation_index);
- auto &subg = at(subg_index);
- subg.remove(operation_index);
- if (subg.size() == 0)
- {
- remove(subg_index);
- }
-}
-
-SubgraphIndex Subgraphs::findOperation(const OperationIndex &operation_index) const
-{
- SubgraphIndex ret;
- iterate([&](const SubgraphIndex &index, const OpSequence &object) {
- for (const auto &elem : object.operations())
- {
- if (elem.index == operation_index)
- ret = index;
- }
- });
- return ret;
-}
-
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/TypeInfo.cc b/runtime/neurun/core/src/ir/TypeInfo.cc
deleted file mode 100644
index 280146b51..000000000
--- a/runtime/neurun/core/src/ir/TypeInfo.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/TypeInfo.h"
-
-namespace neurun
-{
-namespace ir
-{
-
-bool operator==(const TypeInfo &lhs, const TypeInfo &rhs)
-{
- if (lhs.type() != rhs.type())
- {
- return false;
- }
-
- if (lhs.offset() != rhs.offset())
- {
- return false;
- }
-
- if (lhs.scale() != rhs.scale())
- {
- return false;
- }
-
- return true;
-}
-
-bool operator!=(const TypeInfo &lhs, const TypeInfo &rhs) { return !(lhs == rhs); }
-
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/dumper/Dumper.cc b/runtime/neurun/core/src/ir/dumper/Dumper.cc
deleted file mode 100644
index ddfd1a47a..000000000
--- a/runtime/neurun/core/src/ir/dumper/Dumper.cc
+++ /dev/null
@@ -1,633 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Dumper.h"
-
-#include <string>
-
-#include "util/logging.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace dumper
-{
-
-using namespace operation;
-
-void Dumper::visit(const Abs &node)
-{
- VERBOSE(LIR) << "* Abs" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Abs::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Add &node)
-{
- VERBOSE(LIR) << "* Add" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Add::Input::LHS).value() << ", "
- << node.getInputs().at(Add::Input::RHS).value() << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const ArgMax &node)
-{
- VERBOSE(LIR) << "* ArgMax" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ArgMax::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const AvgPool2D &node)
-{
- VERBOSE(LIR) << "* AvgPool2D(Implicit)" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(AvgPool2D::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Cast &node)
-{
- VERBOSE(LIR) << "* Cast" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Cast::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Comparison &node)
-{
- VERBOSE(LIR) << "* Comparison" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Comparison::Input::INPUT0).value()
- << ", " << node.getInputs().at(Comparison::Input::INPUT1).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Concat &node)
-{
- VERBOSE(LIR) << "* Concat" << std::endl;
- std::string inputs;
- for (auto i : node.getInputs())
- {
- inputs += std::to_string(i.value()) + ",";
- }
- VERBOSE(LIR) << " - Inputs : IFM(" << inputs << ")" << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Conv2D &node)
-{
- std::string padding_type =
- node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
- VERBOSE(LIR) << "* Conv2D(" << padding_type << ")" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(Conv2D::Input::INPUT).value()
- << ") Kernel(" << node.getInputs().at(Conv2D::Input::KERNEL).value() << ") Bias("
- << node.getInputs().at(Conv2D::Input::BIAS).value() << ")" << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const DepthToSpace &node)
-{
- VERBOSE(LIR) << "* DepthToSpace" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(DepthToSpace::Input::INPUT).value()
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const DepthwiseConv2D &node)
-{
- std::string padding_type =
- node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
- VERBOSE(LIR) << "* DepthwiseConv2D(" << padding_type << ")" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(DepthwiseConv2D::Input::INPUT).value()
- << ") Kernel(" << node.getInputs().at(DepthwiseConv2D::Input::KERNEL).value()
- << ") Bias(" << node.getInputs().at(DepthwiseConv2D::Input::BIAS).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Dequantize &node)
-{
- VERBOSE(LIR) << "* Dequantize" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Dequantize::Input::INPUT).value()
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Div &node)
-{
- VERBOSE(LIR) << "* Div" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Div::Input::LHS).value() << ", "
- << node.getInputs().at(Div::Input::RHS).value() << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const EmbeddingLookup &node)
-{
- VERBOSE(LIR) << "* EmbeddingLookup" << std::endl;
- VERBOSE(LIR) << " - Inputs : Lookups("
- << node.getInputs().at(EmbeddingLookup::Input::LOOKUPS).value() << ") VALUES("
- << node.getInputs().at(EmbeddingLookup::Input::VALUES).value() << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Exp &node)
-{
- VERBOSE(LIR) << "* Exp" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Exp::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Floor &node)
-{
- VERBOSE(LIR) << "* Floor" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Floor::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const FullyConnected &node)
-{
- VERBOSE(LIR) << "* FullyConnected" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(FullyConnected::Input::INPUT).value()
- << ") Weight(" << node.getInputs().at(FullyConnected::Input::WEIGHT).value()
- << ") Bias(" << node.getInputs().at(FullyConnected::Input::BIAS).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Gather &node)
-{
- VERBOSE(LIR) << "* Gather" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Gather::Input::INPUT).value()
- << ") Indices(" << node.getInputs().at(Gather::Input::INDICES).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const HashtableLookup &node)
-{
- VERBOSE(LIR) << "* HashTableLookup" << std::endl;
- VERBOSE(LIR) << " - Inputs : Lookups("
- << node.getInputs().at(HashtableLookup::Input::LOOKUPS).value() << ") Keys("
- << node.getInputs().at(HashtableLookup::Input::KEYS).value() << ") Values("
- << node.getInputs().at(HashtableLookup::Input::VALUES).value() << ")" << std::endl;
- VERBOSE(LIR) << " - Outputs : Output("
- << node.getInputs().at(HashtableLookup::Output::OUTPUT).value() << ") Hits("
- << node.getInputs().at(HashtableLookup::Output::HITS).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const InstanceNorm &node)
-{
- VERBOSE(LIR) << "* InstanceNorm" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(InstanceNorm::Input::INPUT).value()
- << ") Gamma(" << node.getInputs().at(InstanceNorm::Input::GAMMA).value() << ") Beta("
- << node.getInputs().at(InstanceNorm::Input::BETA).value() << ") Epsilon("
- << node.param().epsilon << ")" << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const L2Normalization &node)
-{
- VERBOSE(LIR) << "* L2Normalization" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input("
- << node.getInputs().at(L2Normalization::Input::INPUT).value() << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const L2Pool2D &node)
-{
- VERBOSE(LIR) << "* L2Pool2D" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(L2Pool2D::Input::INPUT).value()
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const LocalResponseNormalization &node)
-{
- VERBOSE(LIR) << "* LocalResponseNormalization" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input("
- << node.getInputs().at(LocalResponseNormalization::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const LSTM &node)
-{
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LSTM::Input::INPUT).value()
- << ") Input To Input Weights("
- << node.getInputs().at(LSTM::Input::INPUT_TO_INPUT_WEIGHTS).value()
- << ") Input To Forget Weights("
- << node.getInputs().at(LSTM::Input::INPUT_TO_FORGET_WEIGHTS).value()
- << ") Input To Cell Weights("
- << node.getInputs().at(LSTM::Input::INPUT_TO_CELL_WEIGHTS).value()
- << ") Input To Output Weights("
- << node.getInputs().at(LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS).value()
- << ") Recurrent To Input Weights("
- << node.getInputs().at(LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS).value()
- << ") Recurrent To Forget Weights("
- << node.getInputs().at(LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS).value()
- << ") Recurrent To Cell Weights("
- << node.getInputs().at(LSTM::Input::RECURRENT_TO_CELL_WEIGHTS).value()
- << ") Recurrent To Output Weights("
- << node.getInputs().at(LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS).value()
- << ") Cell To Input Weights("
- << node.getInputs().at(LSTM::Input::CELL_TO_INPUT_WEIGHTS).value()
- << ") Cell To Forget Weights("
- << node.getInputs().at(LSTM::Input::CELL_TO_FORGET_WEIGHTS).value()
- << ") Cell To OUTPUT Weights("
- << node.getInputs().at(LSTM::Input::CELL_TO_OUTPUT_WEIGHTS).value()
- << ") Input Gate Bias(" << node.getInputs().at(LSTM::Input::INPUT_GATE_BIAS).value()
- << ") Forget Gate Bias("
- << node.getInputs().at(LSTM::Input::FORGET_GATE_BIAS).value() << ") Cell Bias("
- << node.getInputs().at(LSTM::Input::CELL_BIAS).value() << ") Output Gate Bias("
- << node.getInputs().at(LSTM::Input::OUTPUT_GATE_BIAS).value()
- << ") Projection Weights("
- << node.getInputs().at(LSTM::Input::PROJECTION_WEIGHTS).value()
- << ") Projection Bias(" << node.getInputs().at(LSTM::Input::PROJECTION_BIAS).value()
- << ") Output State In(" << node.getInputs().at(LSTM::Input::OUTPUT_STATE_IN).value()
- << ") Cell State In(" << node.getInputs().at(LSTM::Input::CELL_STATE_IN).value()
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Scratch Buffer("
- << node.getOutputs().at(LSTM::Output::SCRATCH_BUFFER).value()
- << ") Output State Out("
- << node.getInputs().at(LSTM::Output::OUTPUT_STATE_OUT).value() << ") Cell State Out("
- << node.getInputs().at(LSTM::Output::CELL_STATE_OUT).value() << ") Output("
- << node.getInputs().at(LSTM::Output::OUTPUT).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const LogicalAnd &node)
-{
- VERBOSE(LIR) << "* LogicalAnd" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalAnd::Input::INPUT0).value()
- << ", " << node.getInputs().at(LogicalAnd::Input::INPUT1).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const LogicalNot &node)
-{
- VERBOSE(LIR) << "* LogicalNot" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalNot::Input::INPUT).value()
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const LogicalOr &node)
-{
- VERBOSE(LIR) << "* LogicalOr" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalOr::Input::INPUT0).value()
- << ", " << node.getInputs().at(LogicalOr::Input::INPUT1).value() << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Logistic &node)
-{
- VERBOSE(LIR) << "* Logistic" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Logistic::Input::INPUT).value()
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const MaxPool2D &node)
-{
- std::string padding_type =
- node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
- VERBOSE(LIR) << "* MaxPool2D(" << padding_type << ")" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(MaxPool2D::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Mean &node)
-{
- VERBOSE(LIR) << "* Mean" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Mean::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Mul &node)
-{
- VERBOSE(LIR) << "* Mul" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Mul::Input::LHS).value() << ", "
- << node.getInputs().at(Mul::Input::RHS).value() << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Neg &node)
-{
- VERBOSE(LIR) << "* Neg" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Neg::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Pack &node)
-{
- VERBOSE(LIR) << "* Pack" << std::endl;
- std::string inputs;
- const auto &input_indices = node.getInputs();
- for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
- {
- inputs += std::to_string(it->value());
- if (std::next(it) != std::end(input_indices))
- inputs += ", ";
- }
- VERBOSE(LIR) << " - Inputs : Inputs(" << inputs << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Permute &node)
-{
- std::string permute_type = "Unknown";
- switch (node.getPermuteType())
- {
- case Permute::Type::COPY:
- permute_type = "Copy";
- break;
- case Permute::Type::NHWC_TO_NCHW:
- permute_type = "NHWC to NCHW";
- break;
- case Permute::Type::NCHW_TO_NHWC:
- permute_type = "NCHW to NHWC";
- break;
- }
-
- VERBOSE(LIR) << "* Permute(" + permute_type + ")" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0).value() << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const PReLU &node)
-{
- VERBOSE(LIR) << "* PReLU" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(PReLU::Input::INPUT).value()
- << ") Alpha(" << node.getInputs().at(PReLU::Input::ALPHA).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const ReduceMax &node)
-{
- VERBOSE(LIR) << "* ReduceMax" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReduceMax::Input::INPUT).value()
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const ReduceMin &node)
-{
- VERBOSE(LIR) << "* ReduceMin" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReduceMin::Input::INPUT).value()
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const ReduceSum &node)
-{
- VERBOSE(LIR) << "* ReduceSum" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReduceSum::Input::INPUT).value()
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const ReLU &node)
-{
- VERBOSE(LIR) << "* ReLU" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const ReLU1 &node)
-{
- VERBOSE(LIR) << "* ReLU1" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU1::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const ReLU6 &node)
-{
- VERBOSE(LIR) << "* ReLU6" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU6::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Reshape &node)
-{
- VERBOSE(LIR) << "* Reshape" << std::endl;
- // TODO The shape index should be "node.getInputs().at(1).value()" but not valid for now
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Reshape::Input::INPUT).value()
- << ") Shape("
- << "?"
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const ResizeBilinear &node)
-{
- VERBOSE(LIR) << "* ResizeBilinear" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ResizeBilinear::Input::INPUT).value()
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const RNN &node)
-{
- VERBOSE(LIR) << "* RNN" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(RNN::Input::INPUT).value()
- << ") Weights" << node.getInputs().at(RNN::Input::WEIGHTS).value()
- << ") Recurrent Weights"
- << node.getInputs().at(RNN::Input::RECURRENT_WEIGHTS).value() << ") Bias"
- << node.getInputs().at(RNN::Input::BIAS).value() << ") Hidden State"
- << node.getInputs().at(RNN::Input::HIDDEN_STATE_IN).value() << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(RNN::Output::OUTPUT).value()
- << ") Hidden State" << node.getInputs().at(RNN::Output::HIDDEN_STATE_OUT).value()
- << ")" << std::endl;
-}
-
-void Dumper::visit(const RSQRT &node)
-{
- VERBOSE(LIR) << "* RSQRT" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(RSQRT::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Softmax &node)
-{
- VERBOSE(LIR) << "* Softmax" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Softmax::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const SpaceToDepth &node)
-{
- VERBOSE(LIR) << "* SpaceToDepth" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SpaceToDepth::Input::INPUT).value()
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Split &node)
-{
- VERBOSE(LIR) << "* Split" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Split::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const SQRT &node)
-{
- VERBOSE(LIR) << "* SQRT" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SQRT::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const SquaredDifference &node)
-{
- VERBOSE(LIR) << "* SquaredDifference" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input("
- << node.getInputs().at(SquaredDifference::Input::LHS).value() << ", "
- << node.getInputs().at(SquaredDifference::Input::RHS).value() << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Squeeze &node)
-{
- VERBOSE(LIR) << "* Squeeze" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Squeeze::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Slice &node)
-{
- VERBOSE(LIR) << "* Slice" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Slice::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const StridedSlice &node)
-{
- VERBOSE(LIR) << "* StridedSlice" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(StridedSlice::Input::INPUT).value()
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Sub &node)
-{
- VERBOSE(LIR) << "* Sub" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Sub::Input::LHS).value() << ", "
- << node.getInputs().at(Sub::Input::RHS).value() << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Tanh &node)
-{
- VERBOSE(LIR) << "* TanH" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Tanh::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const TopKV2 &node)
-{
- VERBOSE(LIR) << "* TopKV2" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(TopKV2::Input::INPUT).value() << ")"
- << std::endl;
- VERBOSE(LIR) << " - Outputs : Values("
- << node.getOutputs().at(TopKV2::Output::OUTPUT_VALUES).value() << ") Indices("
- << node.getOutputs().at(TopKV2::Output::OUTPUT_INDICES).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const TransposeConv &node)
-{
- std::string padding_type =
- node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
- VERBOSE(LIR) << "* TransposeConv(" << padding_type << ")" << std::endl;
- VERBOSE(LIR) << " - Inputs : Output Shape("
- << node.getInputs().at(TransposeConv::Input::OUTPUT_SHAPE).value() << ") KERNEL("
- << node.getInputs().at(TransposeConv::Input::KERNEL).value() << ") IFM("
- << node.getInputs().at(TransposeConv::Input::INPUT).value() << ")" << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Transpose &node)
-{
- VERBOSE(LIR) << "* Transpose" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Transpose::Input::INPUT).value()
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Unpack &node)
-{
- VERBOSE(LIR) << "* Unpack" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Unpack::Input::INPUT).value() << ")"
- << std::endl;
- std::string outputs;
- const auto &output_indices = node.getOutputs();
- for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it)
- {
- outputs += std::to_string(it->value());
- if (std::next(it) != std::end(output_indices))
- outputs += ", ";
- }
- VERBOSE(LIR) << " - Outputs : Outputs(" << outputs << ")" << std::endl;
-}
-
-void Dumper::visit(const Min &node)
-{
- VERBOSE(LIR) << "* Min" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Min::Input::LHS).value() << ", "
- << node.getInputs().at(Min::Input::RHS).value() << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const Max &node)
-{
- VERBOSE(LIR) << "* Max" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Max::Input::LHS).value() << ", "
- << node.getInputs().at(Max::Input::RHS).value() << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-void Dumper::visit(const OneHot &node)
-{
- VERBOSE(LIR) << "* OneHot" << std::endl;
- VERBOSE(LIR) << " - Inputs : "
- << "Indices(" << node.getInputs().at(OneHot::Input::INDICES).value() << ") "
- << "Depth(" << node.getInputs().at(OneHot::Input::DEPTH).value() << ") "
- << "OnValue(" << node.getInputs().at(OneHot::Input::ON_VALUE).value() << ") "
- << "OffValue(" << node.getInputs().at(OneHot::Input::OFF_VALUE).value() << ") "
- << "Axis(" << node.param().axis << ") " << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
-}
-
-} // namespace dumper
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/dumper/Dumper.h b/runtime/neurun/core/src/ir/dumper/Dumper.h
deleted file mode 100644
index 458f1c81f..000000000
--- a/runtime/neurun/core/src/ir/dumper/Dumper.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_GRAPH_DUMPER_H__
-#define __NEURUN_GRAPH_DUMPER_H__
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace dumper
-{
-
-class Dumper : public OperationVisitor
-{
-public:
- Dumper() = default;
-
-public:
- void visit(const operation::Abs &) override;
- void visit(const operation::Add &node) override;
- void visit(const operation::ArgMax &) override;
- void visit(const operation::AvgPool2D &node) override;
- void visit(const operation::Cast &) override;
- void visit(const operation::Comparison &) override;
- void visit(const operation::Concat &node) override;
- void visit(const operation::Conv2D &node) override;
- void visit(const operation::DepthToSpace &) override;
- void visit(const operation::DepthwiseConv2D &node) override;
- void visit(const operation::Dequantize &) override;
- void visit(const operation::Div &) override;
- void visit(const operation::EmbeddingLookup &) override;
- void visit(const operation::Exp &) override;
- void visit(const operation::Floor &) override;
- void visit(const operation::FullyConnected &node) override;
- void visit(const operation::Gather &) override;
- void visit(const operation::HashtableLookup &) override;
- void visit(const operation::InstanceNorm &) override;
- void visit(const operation::L2Normalization &) override;
- void visit(const operation::L2Pool2D &) override;
- void visit(const operation::LocalResponseNormalization &) override;
- void visit(const operation::LogicalAnd &) override;
- void visit(const operation::LogicalNot &) override;
- void visit(const operation::LogicalOr &) override;
- void visit(const operation::Logistic &) override;
- void visit(const operation::LSTM &) override;
- void visit(const operation::MaxPool2D &node) override;
- void visit(const operation::Mean &) override;
- void visit(const operation::Mul &) override;
- void visit(const operation::Neg &) override;
- void visit(const operation::Pack &) override;
- void visit(const operation::Permute &node) override;
- void visit(const operation::PReLU &) override;
- void visit(const operation::ReduceMax &) override;
- void visit(const operation::ReduceMin &) override;
- void visit(const operation::ReduceSum &) override;
- void visit(const operation::ReLU &) override;
- void visit(const operation::ReLU1 &) override;
- void visit(const operation::ReLU6 &) override;
- void visit(const operation::Reshape &node) override;
- void visit(const operation::ResizeBilinear &) override;
- void visit(const operation::RNN &) override;
- void visit(const operation::RSQRT &) override;
- void visit(const operation::Softmax &node) override;
- void visit(const operation::SpaceToDepth &) override;
- void visit(const operation::Split &) override;
- void visit(const operation::SQRT &) override;
- void visit(const operation::SquaredDifference &) override;
- void visit(const operation::Squeeze &) override;
- void visit(const operation::Slice &) override;
- void visit(const operation::StridedSlice &) override;
- void visit(const operation::Sub &) override;
- void visit(const operation::Tanh &) override;
- void visit(const operation::TopKV2 &) override;
- void visit(const operation::TransposeConv &) override;
- void visit(const operation::Transpose &) override;
- void visit(const operation::Unpack &) override;
- void visit(const operation::Min &) override;
- void visit(const operation::Max &) override;
- void visit(const operation::OneHot &) override;
-};
-
-} // namespace dumper
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_GRAPH_DUMPER_H__
diff --git a/runtime/neurun/core/src/ir/operand/Shape4DConvert.h b/runtime/neurun/core/src/ir/operand/Shape4DConvert.h
deleted file mode 100644
index feffee89f..000000000
--- a/runtime/neurun/core/src/ir/operand/Shape4DConvert.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_GRAPH_OPERAND_SHAPE4D_CONVERT_H__
-#define __NEURUN_GRAPH_OPERAND_SHAPE4D_CONVERT_H__
-
-#include "ir/operand/LowerInfo.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operand
-{
-
-inline LowerInfo::Shape4D asShape4D(const Shape &shape)
-{
- switch (shape.rank())
- {
- case 0u:
- return LowerInfo::Shape4D(1, 1, 1, 1);
-
- case 1u:
- return LowerInfo::Shape4D(1, 1, 1, shape.dim(0));
-
- case 2u:
- return LowerInfo::Shape4D(1, 1, shape.dim(0), shape.dim(1));
-
- case 3u:
- return LowerInfo::Shape4D(1, shape.dim(0), shape.dim(1), shape.dim(2));
-
- case 4u:
- return LowerInfo::Shape4D(shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3));
-
- default:
- throw "Unsupported rank > 4";
- }
-}
-
-} // namespace operand
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_GRAPH_OPERAND_SHAPE4D_CONVERT_H__
diff --git a/runtime/neurun/core/src/ir/operation/Abs.cc b/runtime/neurun/core/src/ir/operation/Abs.cc
deleted file mode 100644
index 9506f83d2..000000000
--- a/runtime/neurun/core/src/ir/operation/Abs.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Abs.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Abs::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Abs::Abs(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Add.cc b/runtime/neurun/core/src/ir/operation/Add.cc
deleted file mode 100644
index a7c40c37a..000000000
--- a/runtime/neurun/core/src/ir/operation/Add.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Add.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Add::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Add::Add(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/ArgMax.cc b/runtime/neurun/core/src/ir/operation/ArgMax.cc
deleted file mode 100644
index 200abc7dd..000000000
--- a/runtime/neurun/core/src/ir/operation/ArgMax.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ArgMax.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void ArgMax::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ArgMax::ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/AvgPool2D.cc b/runtime/neurun/core/src/ir/operation/AvgPool2D.cc
deleted file mode 100644
index 21ec052eb..000000000
--- a/runtime/neurun/core/src/ir/operation/AvgPool2D.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/AvgPool2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void AvgPool2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-AvgPool2D::AvgPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/BatchToSpaceND.cc b/runtime/neurun/core/src/ir/operation/BatchToSpaceND.cc
deleted file mode 100644
index 042144c12..000000000
--- a/runtime/neurun/core/src/ir/operation/BatchToSpaceND.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/BatchToSpaceND.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void BatchToSpaceND::accept(OperationVisitor &v) const { v.visit(*this); }
-
-BatchToSpaceND::BatchToSpaceND(const OperandIndexSequence &inputs,
- const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Cast.cc b/runtime/neurun/core/src/ir/operation/Cast.cc
deleted file mode 100644
index 095225eca..000000000
--- a/runtime/neurun/core/src/ir/operation/Cast.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Cast.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Cast::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Cast::Cast(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Comparison.cc b/runtime/neurun/core/src/ir/operation/Comparison.cc
deleted file mode 100644
index 995d56764..000000000
--- a/runtime/neurun/core/src/ir/operation/Comparison.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Comparison.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Comparison::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Comparison::Comparison(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Concat.cc b/runtime/neurun/core/src/ir/operation/Concat.cc
deleted file mode 100644
index 1772da1fc..000000000
--- a/runtime/neurun/core/src/ir/operation/Concat.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Concat.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Concat::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Concat::Concat(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Conv2D.cc b/runtime/neurun/core/src/ir/operation/Conv2D.cc
deleted file mode 100644
index 505e916a9..000000000
--- a/runtime/neurun/core/src/ir/operation/Conv2D.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Conv2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Conv2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Conv2D::Conv2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Custom.cc b/runtime/neurun/core/src/ir/operation/Custom.cc
deleted file mode 100644
index 67f36d588..000000000
--- a/runtime/neurun/core/src/ir/operation/Custom.cc
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Custom.h"
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Custom::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Custom::Custom(OperandConstraint input_constr, const OperandIndexSequence &inputs,
- const OperandIndexSequence &outputs, std::string id, const Userdata &userdata)
- : Operation{input_constr, inputs, outputs}, _id(std::move(id)), _userdata(userdata)
-{
-}
-
-const std::string &Custom::id() const { return _id; }
-
-const Custom::Userdata &Custom::userdata() const { return _userdata; }
-
-Custom::~Custom() { delete[] _userdata.data; }
-
-std::string Custom::name() const { return id(); }
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/DepthToSpace.cc b/runtime/neurun/core/src/ir/operation/DepthToSpace.cc
deleted file mode 100644
index fd1d1f1aa..000000000
--- a/runtime/neurun/core/src/ir/operation/DepthToSpace.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/DepthToSpace.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void DepthToSpace::accept(OperationVisitor &v) const { v.visit(*this); }
-
-DepthToSpace::DepthToSpace(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/DepthwiseConv2D.cc b/runtime/neurun/core/src/ir/operation/DepthwiseConv2D.cc
deleted file mode 100644
index ed76594a3..000000000
--- a/runtime/neurun/core/src/ir/operation/DepthwiseConv2D.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/DepthwiseConv2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void DepthwiseConv2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-DepthwiseConv2D::DepthwiseConv2D(const OperandIndexSequence &inputs,
- const OperandIndexSequence &outputs, const Param &param)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Dequantize.cc b/runtime/neurun/core/src/ir/operation/Dequantize.cc
deleted file mode 100644
index e99a59cb7..000000000
--- a/runtime/neurun/core/src/ir/operation/Dequantize.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Dequantize.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Dequantize::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Dequantize::Dequantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Div.cc b/runtime/neurun/core/src/ir/operation/Div.cc
deleted file mode 100644
index 484406ff3..000000000
--- a/runtime/neurun/core/src/ir/operation/Div.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Div.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Div::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Div::Div(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/EmbeddingLookup.cc b/runtime/neurun/core/src/ir/operation/EmbeddingLookup.cc
deleted file mode 100644
index 206e6bfaa..000000000
--- a/runtime/neurun/core/src/ir/operation/EmbeddingLookup.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/EmbeddingLookup.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void EmbeddingLookup::accept(OperationVisitor &v) const { v.visit(*this); }
-
-EmbeddingLookup::EmbeddingLookup(const OperandIndexSequence &inputs,
- const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Exp.cc b/runtime/neurun/core/src/ir/operation/Exp.cc
deleted file mode 100644
index 3c0e0cf9b..000000000
--- a/runtime/neurun/core/src/ir/operation/Exp.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Exp.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Exp::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Exp::Exp(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Floor.cc b/runtime/neurun/core/src/ir/operation/Floor.cc
deleted file mode 100644
index 75373cd41..000000000
--- a/runtime/neurun/core/src/ir/operation/Floor.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Floor.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Floor::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Floor::Floor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/FullyConnected.cc b/runtime/neurun/core/src/ir/operation/FullyConnected.cc
deleted file mode 100644
index 9560c0593..000000000
--- a/runtime/neurun/core/src/ir/operation/FullyConnected.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/FullyConnected.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void FullyConnected::accept(OperationVisitor &v) const { v.visit(*this); }
-
-FullyConnected::FullyConnected(const OperandIndexSequence &inputs,
- const OperandIndexSequence &outputs, const Param &param)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Gather.cc b/runtime/neurun/core/src/ir/operation/Gather.cc
deleted file mode 100644
index f98cef9ae..000000000
--- a/runtime/neurun/core/src/ir/operation/Gather.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Gather.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Gather::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Gather::Gather(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/HashtableLookup.cc b/runtime/neurun/core/src/ir/operation/HashtableLookup.cc
deleted file mode 100644
index ecb9d3195..000000000
--- a/runtime/neurun/core/src/ir/operation/HashtableLookup.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/HashtableLookup.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void HashtableLookup::accept(OperationVisitor &v) const { v.visit(*this); }
-
-HashtableLookup::HashtableLookup(const OperandIndexSequence &inputs,
- const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/InstanceNorm.cc b/runtime/neurun/core/src/ir/operation/InstanceNorm.cc
deleted file mode 100644
index 69e47abd4..000000000
--- a/runtime/neurun/core/src/ir/operation/InstanceNorm.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/InstanceNorm.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void InstanceNorm::accept(OperationVisitor &v) const { v.visit(*this); }
-
-InstanceNorm::InstanceNorm(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/L2Normalization.cc b/runtime/neurun/core/src/ir/operation/L2Normalization.cc
deleted file mode 100644
index 67085989e..000000000
--- a/runtime/neurun/core/src/ir/operation/L2Normalization.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/L2Normalization.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void L2Normalization::accept(OperationVisitor &v) const { v.visit(*this); }
-
-L2Normalization::L2Normalization(const OperandIndexSequence &inputs,
- const OperandIndexSequence &outputs, const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/L2Pool2D.cc b/runtime/neurun/core/src/ir/operation/L2Pool2D.cc
deleted file mode 100644
index 0815cb5ab..000000000
--- a/runtime/neurun/core/src/ir/operation/L2Pool2D.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/L2Pool2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void L2Pool2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-L2Pool2D::L2Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/LSTM.cc b/runtime/neurun/core/src/ir/operation/LSTM.cc
deleted file mode 100644
index 58e2aa32e..000000000
--- a/runtime/neurun/core/src/ir/operation/LSTM.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LSTM.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void LSTM::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LSTM::LSTM(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(23u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/LocalResponseNormalization.cc b/runtime/neurun/core/src/ir/operation/LocalResponseNormalization.cc
deleted file mode 100644
index dcba7f1cb..000000000
--- a/runtime/neurun/core/src/ir/operation/LocalResponseNormalization.cc
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LocalResponseNormalization.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void LocalResponseNormalization::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LocalResponseNormalization::LocalResponseNormalization(const OperandIndexSequence &inputs,
- const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/LogicalAnd.cc b/runtime/neurun/core/src/ir/operation/LogicalAnd.cc
deleted file mode 100644
index 51f4f0ee0..000000000
--- a/runtime/neurun/core/src/ir/operation/LogicalAnd.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LogicalAnd.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void LogicalAnd::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LogicalAnd::LogicalAnd(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/LogicalNot.cc b/runtime/neurun/core/src/ir/operation/LogicalNot.cc
deleted file mode 100644
index 48c25142a..000000000
--- a/runtime/neurun/core/src/ir/operation/LogicalNot.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LogicalNot.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void LogicalNot::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LogicalNot::LogicalNot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/LogicalOr.cc b/runtime/neurun/core/src/ir/operation/LogicalOr.cc
deleted file mode 100644
index 663b7deb5..000000000
--- a/runtime/neurun/core/src/ir/operation/LogicalOr.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LogicalOr.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void LogicalOr::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LogicalOr::LogicalOr(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Logistic.cc b/runtime/neurun/core/src/ir/operation/Logistic.cc
deleted file mode 100644
index 3ed2f3453..000000000
--- a/runtime/neurun/core/src/ir/operation/Logistic.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Logistic.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Logistic::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Logistic::Logistic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/LowerInfo.cc b/runtime/neurun/core/src/ir/operation/LowerInfo.cc
deleted file mode 100644
index 6133be3f8..000000000
--- a/runtime/neurun/core/src/ir/operation/LowerInfo.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LowerInfo.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-LowerInfo::LowerInfo(const backend::Backend *backend, Layout layout)
- : _permute_factor{backend, layout}
-{
- // DO NOTHING
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Max.cc b/runtime/neurun/core/src/ir/operation/Max.cc
deleted file mode 100644
index be4bdd365..000000000
--- a/runtime/neurun/core/src/ir/operation/Max.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Max.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Max::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Max::Max(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/MaxPool2D.cc b/runtime/neurun/core/src/ir/operation/MaxPool2D.cc
deleted file mode 100644
index 8f1b70cd6..000000000
--- a/runtime/neurun/core/src/ir/operation/MaxPool2D.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/MaxPool2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void MaxPool2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-MaxPool2D::MaxPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Mean.cc b/runtime/neurun/core/src/ir/operation/Mean.cc
deleted file mode 100644
index 016b5dd85..000000000
--- a/runtime/neurun/core/src/ir/operation/Mean.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Mean.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Mean::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Mean::Mean(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Min.cc b/runtime/neurun/core/src/ir/operation/Min.cc
deleted file mode 100644
index a864405dc..000000000
--- a/runtime/neurun/core/src/ir/operation/Min.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Min.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Min::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Min::Min(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Mul.cc b/runtime/neurun/core/src/ir/operation/Mul.cc
deleted file mode 100644
index 0b2d67a9d..000000000
--- a/runtime/neurun/core/src/ir/operation/Mul.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Mul.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Mul::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Mul::Mul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Neg.cc b/runtime/neurun/core/src/ir/operation/Neg.cc
deleted file mode 100644
index 65922d57c..000000000
--- a/runtime/neurun/core/src/ir/operation/Neg.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Neg.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Neg::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Neg::Neg(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/OneHot.cc b/runtime/neurun/core/src/ir/operation/OneHot.cc
deleted file mode 100644
index 0ba3c9d60..000000000
--- a/runtime/neurun/core/src/ir/operation/OneHot.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/OneHot.h"
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void OneHot::accept(OperationVisitor &v) const { v.visit(*this); }
-
-OneHot::OneHot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/PReLU.cc b/runtime/neurun/core/src/ir/operation/PReLU.cc
deleted file mode 100644
index b8555ccbd..000000000
--- a/runtime/neurun/core/src/ir/operation/PReLU.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/PReLU.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void PReLU::accept(OperationVisitor &v) const { v.visit(*this); }
-
-PReLU::PReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Pack.cc b/runtime/neurun/core/src/ir/operation/Pack.cc
deleted file mode 100644
index 412c744ea..000000000
--- a/runtime/neurun/core/src/ir/operation/Pack.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "ir/operation/Pack.h"
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-void Pack::accept(OperationVisitor &v) const { v.visit(*this); }
-Pack::Pack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createAtLeast(3u), inputs, outputs}, _param{param}
-{
-}
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Pad.cc b/runtime/neurun/core/src/ir/operation/Pad.cc
deleted file mode 100644
index a08be12a6..000000000
--- a/runtime/neurun/core/src/ir/operation/Pad.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Pad.h"
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Pad::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Pad::Pad(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Permute.cc b/runtime/neurun/core/src/ir/operation/Permute.cc
deleted file mode 100644
index ec3d969c8..000000000
--- a/runtime/neurun/core/src/ir/operation/Permute.cc
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Permute.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Permute::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Permute::Permute(const OperandIndex &input, const OperandIndex &output,
- const backend::BackendContext *input_backend_ctx,
- const backend::BackendContext *output_backend_ctx, Type type, DataType data_type)
- : Operation{OperandConstraint::createExact(1u)}, _param{input_backend_ctx, output_backend_ctx},
- _type{type}, _dataType{data_type}
-{
- setInputs({input});
- setOutputs({output});
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/RNN.cc b/runtime/neurun/core/src/ir/operation/RNN.cc
deleted file mode 100644
index 8db5cbceb..000000000
--- a/runtime/neurun/core/src/ir/operation/RNN.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/RNN.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void RNN::accept(OperationVisitor &v) const { v.visit(*this); }
-
-RNN::RNN(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/RSQRT.cc b/runtime/neurun/core/src/ir/operation/RSQRT.cc
deleted file mode 100644
index ec13b20ec..000000000
--- a/runtime/neurun/core/src/ir/operation/RSQRT.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/RSQRT.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void RSQRT::accept(OperationVisitor &v) const { v.visit(*this); }
-
-RSQRT::RSQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/ReLU.cc b/runtime/neurun/core/src/ir/operation/ReLU.cc
deleted file mode 100644
index 6b3f7e72d..000000000
--- a/runtime/neurun/core/src/ir/operation/ReLU.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReLU.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReLU::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReLU::ReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/ReLU1.cc b/runtime/neurun/core/src/ir/operation/ReLU1.cc
deleted file mode 100644
index d7b4e1b11..000000000
--- a/runtime/neurun/core/src/ir/operation/ReLU1.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReLU1.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReLU1::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReLU1::ReLU1(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/ReLU6.cc b/runtime/neurun/core/src/ir/operation/ReLU6.cc
deleted file mode 100644
index 245eb923f..000000000
--- a/runtime/neurun/core/src/ir/operation/ReLU6.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReLU6.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReLU6::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReLU6::ReLU6(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/ReduceMax.cc b/runtime/neurun/core/src/ir/operation/ReduceMax.cc
deleted file mode 100644
index b7ef2c5a9..000000000
--- a/runtime/neurun/core/src/ir/operation/ReduceMax.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReduceMax.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReduceMax::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReduceMax::ReduceMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/ReduceMin.cc b/runtime/neurun/core/src/ir/operation/ReduceMin.cc
deleted file mode 100644
index 84d7e0cc5..000000000
--- a/runtime/neurun/core/src/ir/operation/ReduceMin.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReduceMin.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReduceMin::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReduceMin::ReduceMin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/ReduceSum.cc b/runtime/neurun/core/src/ir/operation/ReduceSum.cc
deleted file mode 100644
index 7e3b19bd6..000000000
--- a/runtime/neurun/core/src/ir/operation/ReduceSum.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReduceSum.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReduceSum::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReduceSum::ReduceSum(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Reshape.cc b/runtime/neurun/core/src/ir/operation/Reshape.cc
deleted file mode 100644
index bae37e12f..000000000
--- a/runtime/neurun/core/src/ir/operation/Reshape.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Reshape.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Reshape::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Reshape::Reshape(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/ResizeBilinear.cc b/runtime/neurun/core/src/ir/operation/ResizeBilinear.cc
deleted file mode 100644
index 55ae4815d..000000000
--- a/runtime/neurun/core/src/ir/operation/ResizeBilinear.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ResizeBilinear.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void ResizeBilinear::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ResizeBilinear::ResizeBilinear(const OperandIndexSequence &inputs,
- const OperandIndexSequence &outputs, const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/SQRT.cc b/runtime/neurun/core/src/ir/operation/SQRT.cc
deleted file mode 100644
index 6c6daa3a0..000000000
--- a/runtime/neurun/core/src/ir/operation/SQRT.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/SQRT.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void SQRT::accept(OperationVisitor &v) const { v.visit(*this); }
-
-SQRT::SQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Slice.cc b/runtime/neurun/core/src/ir/operation/Slice.cc
deleted file mode 100644
index 88014d1e4..000000000
--- a/runtime/neurun/core/src/ir/operation/Slice.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Slice.h"
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Slice::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Slice::Slice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Softmax.cc b/runtime/neurun/core/src/ir/operation/Softmax.cc
deleted file mode 100644
index 6b3a6b164..000000000
--- a/runtime/neurun/core/src/ir/operation/Softmax.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Softmax.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Softmax::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Softmax::Softmax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/SpaceToBatchND.cc b/runtime/neurun/core/src/ir/operation/SpaceToBatchND.cc
deleted file mode 100644
index a07453504..000000000
--- a/runtime/neurun/core/src/ir/operation/SpaceToBatchND.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/SpaceToBatchND.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void SpaceToBatchND::accept(OperationVisitor &v) const { v.visit(*this); }
-
-SpaceToBatchND::SpaceToBatchND(const OperandIndexSequence &inputs,
- const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/SpaceToDepth.cc b/runtime/neurun/core/src/ir/operation/SpaceToDepth.cc
deleted file mode 100644
index ca16bd92f..000000000
--- a/runtime/neurun/core/src/ir/operation/SpaceToDepth.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/SpaceToDepth.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void SpaceToDepth::accept(OperationVisitor &v) const { v.visit(*this); }
-
-SpaceToDepth::SpaceToDepth(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Split.cc b/runtime/neurun/core/src/ir/operation/Split.cc
deleted file mode 100644
index a4b15a9b2..000000000
--- a/runtime/neurun/core/src/ir/operation/Split.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "ir/operation/Split.h"
-#include <cassert>
-#include "ir/OperationVisitor.h"
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-void Split::accept(OperationVisitor &v) const { v.visit(*this); }
-Split::Split(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/SquaredDifference.cc b/runtime/neurun/core/src/ir/operation/SquaredDifference.cc
deleted file mode 100644
index 141fb7560..000000000
--- a/runtime/neurun/core/src/ir/operation/SquaredDifference.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/SquaredDifference.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void SquaredDifference::accept(OperationVisitor &v) const { v.visit(*this); }
-
-SquaredDifference::SquaredDifference(const OperandIndexSequence &inputs,
- const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Squeeze.cc b/runtime/neurun/core/src/ir/operation/Squeeze.cc
deleted file mode 100644
index 22ee5763d..000000000
--- a/runtime/neurun/core/src/ir/operation/Squeeze.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Squeeze.h"
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Squeeze::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Squeeze::Squeeze(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param(param)
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/StridedSlice.cc b/runtime/neurun/core/src/ir/operation/StridedSlice.cc
deleted file mode 100644
index f764dccc0..000000000
--- a/runtime/neurun/core/src/ir/operation/StridedSlice.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/StridedSlice.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void StridedSlice::accept(OperationVisitor &v) const { v.visit(*this); }
-
-StridedSlice::StridedSlice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Sub.cc b/runtime/neurun/core/src/ir/operation/Sub.cc
deleted file mode 100644
index 7d83e3d74..000000000
--- a/runtime/neurun/core/src/ir/operation/Sub.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Sub.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Sub::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Sub::Sub(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Tanh.cc b/runtime/neurun/core/src/ir/operation/Tanh.cc
deleted file mode 100644
index a3125e947..000000000
--- a/runtime/neurun/core/src/ir/operation/Tanh.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Tanh.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Tanh::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Tanh::Tanh(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/TopKV2.cc b/runtime/neurun/core/src/ir/operation/TopKV2.cc
deleted file mode 100644
index 6fabd34a3..000000000
--- a/runtime/neurun/core/src/ir/operation/TopKV2.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/TopKV2.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void TopKV2::accept(OperationVisitor &v) const { v.visit(*this); }
-
-TopKV2::TopKV2(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Transpose.cc b/runtime/neurun/core/src/ir/operation/Transpose.cc
deleted file mode 100644
index 74239b0f6..000000000
--- a/runtime/neurun/core/src/ir/operation/Transpose.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Transpose.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void Transpose::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Transpose::Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/TransposeConv.cc b/runtime/neurun/core/src/ir/operation/TransposeConv.cc
deleted file mode 100644
index 30664e974..000000000
--- a/runtime/neurun/core/src/ir/operation/TransposeConv.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/TransposeConv.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-
-void TransposeConv::accept(OperationVisitor &v) const { v.visit(*this); }
-
-TransposeConv::TransposeConv(const OperandIndexSequence &inputs,
- const OperandIndexSequence &outputs, const Param &param)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/operation/Unpack.cc b/runtime/neurun/core/src/ir/operation/Unpack.cc
deleted file mode 100644
index 7c2c24892..000000000
--- a/runtime/neurun/core/src/ir/operation/Unpack.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "ir/operation/Unpack.h"
-#include "ir/OperationVisitor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace operation
-{
-void Unpack::accept(OperationVisitor &v) const { v.visit(*this); }
-Unpack::Unpack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-} // namespace operation
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/pass/ConstantInsertionPass.cc b/runtime/neurun/core/src/ir/pass/ConstantInsertionPass.cc
deleted file mode 100644
index 8f8ebff1b..000000000
--- a/runtime/neurun/core/src/ir/pass/ConstantInsertionPass.cc
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ConstantInsertionPass.h"
-
-#include "backend/Backend.h"
-#include <ir/Graph.h>
-#include "ir/operand/Shape4DConvert.h"
-#include <util/Utils.h>
-
-namespace neurun
-{
-namespace ir
-{
-namespace pass
-{
-
-void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation &node)
-{
- const auto &subgraph_index = _graph.subgraphs().getOperation(node_index);
- const auto subg_lower_info = _graph.getLowerInfo(subgraph_index);
- const auto backend = subg_lower_info->backend();
- const auto layout = subg_lower_info->layout();
- const auto factor = operand::PermuteFactor{backend, layout};
-
- for (const auto input : node.getInputs())
- {
- auto &object = _graph.operands().at(input);
-
- if (object.isConstant())
- {
- const auto key = ReplaceKey{input, factor};
- if (_replace_operands_map.count(key) == 0)
- {
- auto new_object = object;
- // TODO Remove const_case
- const_cast<std::list<OperationIndex> &>(new_object.getDef().list()).clear();
- const_cast<std::list<OperationIndex> &>(new_object.getUses().list()).clear();
- const auto new_index = _graph.operands().emplace(new_object);
- _replace_operands_map[key] = new_index;
-
- _graph.setLowerInfo(new_index, nnfw::cpp14::make_unique<operand::LowerInfo>(
- operand::asShape4D(new_object.shape())));
- _graph.getLowerInfo(new_index)->addDefPermuteFactor(factor);
- }
-
- const auto replaced_input = _replace_operands_map[key];
- // Update op_seq
- if (_graph.subgraphs().at(subgraph_index).getInputs().contains(input))
- {
- _graph.subgraphs().at(subgraph_index).replaceInput(input, replaced_input);
- }
-
- // Update node
- node.replaceInput(input, replaced_input);
-
- // Update operand
- auto &replaced_object = _graph.operands().at(replaced_input);
- replaced_object.appendUse(node_index);
-
- // Update lower_info
- auto replaced_lower_info = _graph.getLowerInfo(replaced_input);
- replaced_lower_info->addUsePermuteFactor(factor);
-
- // Remove this node from def and uses of origin operand
- if (object.getDef().contains(node_index))
- {
- object.removeDef(node_index);
- }
- object.removeUse(node_index);
-
- // Remove origin operand
- if (object.getDef().size() == 0 && object.getUses().size() == 0)
- {
- _graph.removeOperand(input);
- _graph.removeLowerInfo(input);
- }
- }
- }
-
- // Now this runtime does not support the node making output as constant
- for (const auto &output : node.getOutputs())
- {
- UNUSED_RELEASE(output);
- assert(!_graph.operands().at(output).isConstant());
- }
-}
-
-} // namespace pass
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/pass/ConstantInsertionPass.h b/runtime/neurun/core/src/ir/pass/ConstantInsertionPass.h
deleted file mode 100644
index 40476b20e..000000000
--- a/runtime/neurun/core/src/ir/pass/ConstantInsertionPass.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
-#define __NEURUN_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
-
-#include <ir/operand/PermuteFactor.h>
-#include <ir/Index.h>
-#include "OperationPass.h"
-#include <unordered_map>
-#include <utility>
-
-namespace neurun
-{
-namespace ir
-{
-namespace pass
-{
-
-class ConstantInsertionPass : public OperationPass
-{
-public:
- using OperationPass::OperationPass;
-
-public:
- std::string id() final { return "ConstantInsertionPass"; }
-
-public:
- void callback(const OperationIndex &index, Operation &node) final;
-
-private:
- struct ReplaceKey
- {
- OperandIndex index;
- operand::PermuteFactor factor;
-
- bool operator==(const ReplaceKey &other) const
- {
- return index == other.index && factor == other.factor;
- }
- };
-
- /**
- * @brief Structure that provides hash function of ReplaceKey
- */
- struct KeyHasher
- {
- std::size_t operator()(const ReplaceKey &key) const noexcept
- {
- using std::hash;
- return hash<OperandIndex>()(key.index) ^ (hash<operand::PermuteFactor>()(key.factor) << 1);
- }
- };
-
- std::unordered_map<ReplaceKey, OperandIndex, KeyHasher> _replace_operands_map;
-};
-
-} // namespace pass
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
diff --git a/runtime/neurun/core/src/ir/pass/OperandPass.cc b/runtime/neurun/core/src/ir/pass/OperandPass.cc
deleted file mode 100644
index f31d0d850..000000000
--- a/runtime/neurun/core/src/ir/pass/OperandPass.cc
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperandPass.h"
-
-#include "ir/Graph.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace pass
-{
-
-void OperandPass::run()
-{
- _graph.operands().iterate(
- [&](const OperandIndex &index, Operand &object) { callback(index, object); });
-}
-
-} // namespace pass
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/pass/OperandPass.h b/runtime/neurun/core/src/ir/pass/OperandPass.h
deleted file mode 100644
index c9fbf541d..000000000
--- a/runtime/neurun/core/src/ir/pass/OperandPass.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_GRAPH_PASS_OPERAND_PASS_H__
-#define __NEURUN_GRAPH_PASS_OPERAND_PASS_H__
-
-#include "Pass.h"
-#include "ir/Index.h"
-
-namespace neurun
-{
-namespace ir
-{
-class Operand;
-} // namespace ir
-} // namespace neurun
-
-namespace neurun
-{
-namespace ir
-{
-namespace pass
-{
-
-class OperandPass : public Pass
-{
-public:
- using Pass::Pass;
-
-public:
- std::string id() override = 0;
- void run() override final;
- virtual void callback(const OperandIndex &i, Operand &o) = 0;
-};
-
-} // namespace pass
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_GRAPH_PASS_OPERAND_PASS_H__
diff --git a/runtime/neurun/core/src/ir/pass/OperationPass.cc b/runtime/neurun/core/src/ir/pass/OperationPass.cc
deleted file mode 100644
index c9438ee39..000000000
--- a/runtime/neurun/core/src/ir/pass/OperationPass.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationPass.h"
-
-#include "ir/Index.h"
-#include "ir/Operation.h"
-#include "ir/Graph.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace pass
-{
-
-void OperationPass::run()
-{
- _graph.operations().iterate(
- [&](const OperationIndex &index, Operation &node) { callback(index, node); });
-}
-
-} // namespace pass
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/pass/OperationPass.h b/runtime/neurun/core/src/ir/pass/OperationPass.h
deleted file mode 100644
index 4b7de7109..000000000
--- a/runtime/neurun/core/src/ir/pass/OperationPass.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file OperationPass.h
- * @brief This file contains OperationPass class
- */
-
-#ifndef __NEURUN_GRAPH_PASS_OPERATION_PASS_H__
-#define __NEURUN_GRAPH_PASS_OPERATION_PASS_H__
-
-#include "Pass.h"
-#include "ir/Index.h"
-
-namespace neurun
-{
-namespace ir
-{
-class Operation;
-} // namespace ir
-} // namespace neurun
-
-namespace neurun
-{
-namespace ir
-{
-namespace pass
-{
-
-/**
- * @brief Class to iterate over operations and calls callback() method
- */
-class OperationPass : public Pass
-{
-public:
- using Pass::Pass;
-
-public:
- /**
- * @brief Returns string id for this pass. Same with class name.
- *
- * @return string id
- */
- std::string id() override = 0;
-
- /**
- * @brief Be called for all nodes of graph.
- * @param index is the index of a node in graph
- * @param node is the node in graph
- */
- virtual void callback(const OperationIndex &index, Operation &node) = 0;
-
- /**
- * @brief Run the pass
- */
- void run() final;
-};
-
-} // namespace pass
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_GRAPH_PASS_OPERATION_PASS_H__
diff --git a/runtime/neurun/core/src/ir/pass/Pass.h b/runtime/neurun/core/src/ir/pass/Pass.h
deleted file mode 100644
index 0aa0f36a6..000000000
--- a/runtime/neurun/core/src/ir/pass/Pass.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_GRAPH_PASS_PASS_H__
-#define __NEURUN_GRAPH_PASS_PASS_H__
-
-#include <string>
-
-namespace neurun
-{
-namespace ir
-{
-class Graph;
-} // namespace ir
-} // namespace neurun
-
-namespace neurun
-{
-namespace ir
-{
-namespace pass
-{
-
-class Pass
-{
-public:
- Pass(Graph &graph) : _graph{graph} {}
- virtual ~Pass() = default;
-
-public:
- virtual std::string id() = 0;
- virtual void run() = 0;
-
-protected:
- Graph &_graph;
-};
-
-} // namespace pass
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_GRAPH_PASS_PASS_H__
diff --git a/runtime/neurun/core/src/ir/pass/PermutationEliminationPass.cc b/runtime/neurun/core/src/ir/pass/PermutationEliminationPass.cc
deleted file mode 100644
index 71f3d7e82..000000000
--- a/runtime/neurun/core/src/ir/pass/PermutationEliminationPass.cc
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "PermutationEliminationPass.h"
-
-#include "ir/Operand.h"
-#include "ir/operand/LowerInfo.h"
-#include "ir/Graph.h"
-#include "backend/IConfig.h"
-#include "util/logging.h"
-#include "compiler/BackendResolver.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace pass
-{
-void PermutationEliminationPass::callback(const OperandIndex &inp_index, Operand &object)
-{
- if (_graph.getInputs().contains(inp_index))
- {
- eliminateInput(inp_index, object);
- }
- else if (_graph.getOutputs().contains(inp_index))
- {
- eliminateOutput(inp_index, object);
- }
-}
-
-void PermutationEliminationPass::eliminateInput(const OperandIndex &inp_index, Operand &object)
-{
- auto &model_inputs = _graph.getInputs();
-
- // get uses of the model's given input
- auto uses = object.getUses();
-
- // input must be used just by permutation
- if (uses.size() != 1)
- {
- return;
- }
-
- for (auto input_use : uses.list())
- {
- auto &perm_operation = _graph.operations().at(input_use);
- auto perm_inputs = perm_operation.getInputs();
-
- auto perm_outputs = perm_operation.getOutputs();
-
- if (!isPermuteLayerToEliminate(perm_inputs, perm_outputs, true))
- {
- return;
- }
-
- assert(perm_inputs.at(0) == inp_index);
-
- VERBOSE(PermutationEliminationPass::EliminateInput) << "remove NHWC_TO_NCHW permutation\n";
-
- // set model's new input, which was output of permutation
- model_inputs.replace(inp_index, perm_outputs.at(0));
-
- // remove model's input, which is also input of permutation
- _graph.removeOperand(inp_index);
-
- // remove permutation operation
- assert(_graph.subgraphs().containsOperation(input_use));
- auto subg_idx = _graph.subgraphs().getOperation(input_use);
- _graph.subgraphs().remove(subg_idx);
- _graph.operations().remove(input_use);
-
- VERBOSE(PermutationEliminationPass::EliminateInput)
- << inp_index.value() << " is model's input and is removed. New input is "
- << perm_outputs.at(0).value() << "\n"
- << input_use.value() << " is removed permutation operation\n";
- }
-}
-
-void PermutationEliminationPass::eliminateOutput(const OperandIndex &out_index, Operand &object)
-{
- auto &model_outputs = _graph.getOutputs();
-
- // get defs of the model's given output
- auto defs = object.getDef();
-
- // output must use just permutation
- if (defs.size() != 1)
- {
- return;
- }
-
- for (auto output_def : defs.list())
- {
- auto &perm_operation = _graph.operations().at(output_def);
- auto perm_outputs = perm_operation.getOutputs();
-
- auto perm_inputs = perm_operation.getInputs();
- if (!isPermuteLayerToEliminate(perm_inputs, perm_outputs, false))
- {
- return;
- }
-
- assert(perm_outputs.at(0) == out_index);
-
- VERBOSE(PermutationEliminationPass::EliminateOutput) << "remove NCHW_TO_NHWC permutation\n";
-
- // Update operations' output that is used by permute operand
- for (auto perm_input_index : perm_inputs)
- {
- auto &perm_input_operand = _graph.operands().at(perm_input_index);
- perm_input_operand.removeUse(output_def);
- }
-
- // set model's new output, which was input of permutation
- model_outputs.replace(out_index, perm_inputs.at(0));
-
- // remove model's output, which is also output of permutation
- _graph.removeOperand(out_index);
-
- // remove permutation operation
- assert(_graph.subgraphs().containsOperation(output_def));
- auto subg_idx = _graph.subgraphs().getOperation(output_def);
- _graph.subgraphs().remove(subg_idx);
- _graph.operations().remove(output_def);
-
- VERBOSE(PermutationEliminationPass::EliminateOutput)
- << out_index.value() << " is model's output and is removed. New output is "
- << perm_inputs.at(0).value() << "\n"
- << output_def.value() << " is removed permutation operation\n";
- }
-}
-
-bool PermutationEliminationPass::isPermuteLayerToEliminate(const OperandIndexSequence &inp_indexes,
- const OperandIndexSequence &out_indexes,
- bool is_for_model_input)
-{
- auto input_def_factors = _graph.getLowerInfo(inp_indexes.at(0))->def_factors();
- auto output_def_factors = _graph.getLowerInfo(out_indexes.at(0))->def_factors();
-
- auto input_layout = input_def_factors.getOnlyElement().layout();
- auto output_layout = output_def_factors.getOnlyElement().layout();
-
- if (input_def_factors.size() != 1 || output_def_factors.size() != 1)
- {
- return false;
- }
-
- // all operands' factor must be the same
- for (auto index : inp_indexes)
- {
- auto op_factor_set = _graph.getLowerInfo(index)->def_factors();
- if (op_factor_set.size() != 1 ||
- input_layout != _graph.getLowerInfo(index)->def_factors().getOnlyElement().layout())
- {
- return false;
- }
- }
- // all operands' factor must be the same
- for (auto index : out_indexes)
- {
- auto op_factor_set = _graph.getLowerInfo(index)->def_factors();
- if (op_factor_set.size() != 1 ||
- output_layout != _graph.getLowerInfo(index)->def_factors().getOnlyElement().layout())
- {
- return false;
- }
- }
-
- if (is_for_model_input)
- {
- // check if this is NHWC_TO_NCHW permutation: must have single input, which is model's input
- return (inp_indexes.size() == 1 && input_layout == Layout::NHWC &&
- output_layout == Layout::NCHW);
- }
-
- // check if this is NCHW_TO_NHWC permutation: must have single output, which is model's output
- return (out_indexes.size() == 1 && input_layout == Layout::NCHW && output_layout == Layout::NHWC);
-}
-
-} // namespace pass
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/pass/PermutationEliminationPass.h b/runtime/neurun/core/src/ir/pass/PermutationEliminationPass.h
deleted file mode 100644
index 4431eabbc..000000000
--- a/runtime/neurun/core/src/ir/pass/PermutationEliminationPass.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
-#define __NEURUN_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
-
-#include "OperandPass.h"
-#include "ir/Operand.h"
-#include "ir/OperandIndexSequence.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace pass
-{
-
-class PermutationEliminationPass : public OperandPass
-{
-public:
- using OperandPass::OperandPass;
-
-public:
- std::string id() override { return "PermutationEliminationPass"; }
-
- void callback(const OperandIndex &index, Operand &object) override;
-
-private:
- /**
- * @brief Remove Permute operation that permutates input
- *
- * Note: This function aslo removes model's input and
- * sets output of permutation as model's new input
- *
- * @param inp_index is the target operand index for the elimination
- * @param object is the target operand object for the elimination
- *
- * @return
- */
- void eliminateInput(const OperandIndex &inp_index, Operand &object);
-
- /**
- * @brief Remove Permute operation that permutates output of a model
- *
- * Note: This function aslo removes model's output and
- * sets input of permutation as model's new output
- *
- * @param out_index is the target operand index for the elimination
- * @param object is the target operand object for the elimination
- *
- * @return
- */
- void eliminateOutput(const OperandIndex &out_index, Operand &object);
-
- /**
- * @brief Determine if passed operands are permute layer's input and output, that must be
- * eliminated
- *
- * @param inp_index indexes of the input operand to operation
- * @param out_index indexes of the output operand to operation
- * @param is_for_model_input checking for model's input or output
- *
- * @return if it is permutation layer
- */
- bool isPermuteLayerToEliminate(const OperandIndexSequence &inp_indexes,
- const OperandIndexSequence &out_indexes, bool is_for_model_input);
-};
-
-} // namespace pass
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
diff --git a/runtime/neurun/core/src/ir/pass/PermutationInsertionPass.cc b/runtime/neurun/core/src/ir/pass/PermutationInsertionPass.cc
deleted file mode 100644
index 052e3026a..000000000
--- a/runtime/neurun/core/src/ir/pass/PermutationInsertionPass.cc
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "PermutationInsertionPass.h"
-
-#include <cassert>
-#include <utility>
-#include <unordered_map>
-
-#include "ir/Operand.h"
-#include "ir/operation/LowerInfo.h"
-#include "ir/Graph.h"
-#include "backend/IConfig.h"
-#include "util/logging.h"
-#include "cpp14/memory.h"
-#include "ir/operation/Permute.h"
-#include "ir/operand/Shape4DConvert.h"
-#include "compiler/BackendResolver.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace pass
-{
-
-void PermutationInsertionPass::callback(const OperandIndex &index, Operand &object)
-{
- auto &&operand_li = _graph.getLowerInfo(index);
- assert(operand_li);
-
- // NOTE Later, constants also will have Def
- // Ignore constants
- if (operand_li->def_factors().size() == 0)
- {
- return;
- }
-
- std::list<OperationIndex> permute_indexes;
-
- // Build a map for all necessary type of operands
- std::unordered_map<operand::PermuteFactor, OperandIndex> factor_to_index;
- {
- assert(operand_li->def_factors().size() == 1);
- for (auto factor : operand_li->def_factors())
- {
- factor_to_index.emplace(factor, index);
- }
-
- auto insert_set = operand_li->use_factors() - operand_li->def_factors();
- for (auto factor : insert_set)
- {
- const auto permute_operation_index = insertPermute(index, factor);
- permute_indexes.push_back(permute_operation_index);
- VERBOSE(PermutationInsertionPass) << "Insert 'Permute' operation for operand "
- << index.value() << std::endl;
- const auto &permute_operation = _graph.operations().at(permute_operation_index);
- const auto permuted_operand_index = permute_operation.getOutputs().at(0);
- factor_to_index.emplace(factor, permuted_operand_index);
- }
- }
-
- // Update operations' input that uses this operand
- {
- std::list<OperationIndex> remove_list;
-
- auto uses = object.getUses();
- for (auto use : uses.list())
- {
- // If permute operation, ignore it
- if (std::find(permute_indexes.begin(), permute_indexes.end(), use) != permute_indexes.end())
- continue;
-
- auto &operation = _graph.operations().at(use);
- assert(_graph.subgraphs().containsOperation(use));
- auto subg_index = _graph.subgraphs().getOperation(use);
- auto subg_li = _graph.getLowerInfo(subg_index);
- assert(subg_li);
- const auto subg_layout = subg_li->layout();
- const backend::Backend *backend = subg_li->backend();
- assert(backend);
- auto use_node_inputs = operation.getInputs();
- assert(use_node_inputs.contains(index));
-
- auto new_index = factor_to_index.at({backend, subg_layout});
- if (index != new_index)
- {
- // Update from op_seq
- _graph.subgraphs().at(subg_index).replaceInput(index, new_index);
-
- // Update from operation
- operation.replaceInput(index, new_index);
-
- // Update from operand
- remove_list.push_back(
- use); // Removal should be done in another loop since we are in the loop
- _graph.operands().at(new_index).appendUse(use);
- }
- }
-
- for (auto &operation : remove_list)
- {
- object.removeUse(operation);
- }
- }
-}
-
-OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &operand_index,
- const operand::PermuteFactor &factor)
-{
- assert(!_graph.isBuildingPhase());
-
- auto &operand = _graph.operands().at(operand_index);
-
- // Generate output operand and permute operation
- auto out_operand_index = _graph.addOperand(operand.shape(), operand.typeInfo());
- // change model output if operand_index is model output index
- auto &model_outputs = _graph.getOutputs();
- if (model_outputs.contains(operand_index))
- {
- model_outputs.replace(operand_index, out_operand_index);
- }
-
- // Find Permute information
- auto input_backend = _graph.getLowerInfo(operand_index)->def_factors().getOnlyElement().backend();
- auto output_backend = factor.backend();
- // NOTE Permute may not have specific layout because the layout of input and output may be
- // different.
- const auto permute_node_layout = Layout::UNKNOWN;
- const auto permute_node_backend = backend::BackendManager::get().getDefault();
- const operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
-
- // Update LowerInfo of input operand
- auto operand_lower_info = _graph.getLowerInfo(operand_index);
- operand_lower_info->removeUsePermuteFactor(factor);
- operand_lower_info->addUsePermuteFactor(permute_node_factor);
-
- // Update LowerInfo of output operand
- auto out_operand_li =
- nnfw::cpp14::make_unique<operand::LowerInfo>(operand::asShape4D(operand.shape()));
-
- // The input and output factors of all nodes will be the same except Permute. So Tensor's
- // allocators allocates memory using only the information of def permutation factor now.
- // TODO Change param to permute_node_factor
- out_operand_li->addDefPermuteFactor(factor);
- out_operand_li->addUsePermuteFactor(factor);
- _graph.setLowerInfo(out_operand_index, std::move(out_operand_li));
-
- auto input_backend_ctx = _graph.backend_resolver()->getBackendContext(input_backend);
- auto output_backend_ctx = _graph.backend_resolver()->getBackendContext(output_backend);
-
- // Insert permute operation to the graph
- const auto input_layout =
- _graph.getLowerInfo(operand_index)->def_factors().getOnlyElement().layout();
- const auto output_layout = factor.layout();
- using Permute = operation::Permute;
- const auto permute_type = [&]() {
- if (input_layout == Layout::NHWC && output_layout == Layout::NCHW)
- {
- return Permute::Type::NHWC_TO_NCHW;
- }
- else if (input_layout == Layout::NCHW && output_layout == Layout::NHWC)
- {
- return Permute::Type::NCHW_TO_NHWC;
- }
- else
- {
- return Permute::Type::COPY;
- }
- }();
- auto insert_node = nnfw::cpp14::make_unique<Permute>(
- operand_index, out_operand_index, input_backend_ctx, output_backend_ctx, permute_type);
-
- auto node_index = _graph.operations().push(std::move(insert_node));
- const auto &node = _graph.operations().at(node_index);
-
- // OpSequence
- {
- auto subg_index = _graph.subgraphs().emplace(node_index, node, permute_node_layout);
- auto &subg = _graph.subgraphs().at(subg_index);
- subg.setInputs(node.getInputs());
- subg.setOutputs(node.getOutputs());
- _graph.setLowerInfo(subg_index, nnfw::cpp14::make_unique<operation::LowerInfo>(
- permute_node_backend, permute_node_layout));
- }
-
- // Update Use/Def info
- {
- _graph.operands().at(operand_index).appendUse(node_index);
- _graph.operands().at(out_operand_index).appendDef(node_index);
- }
- return node_index;
-}
-} // namespace pass
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/pass/PermutationInsertionPass.h b/runtime/neurun/core/src/ir/pass/PermutationInsertionPass.h
deleted file mode 100644
index 4065fc6ac..000000000
--- a/runtime/neurun/core/src/ir/pass/PermutationInsertionPass.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
-#define __NEURUN_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
-
-#include "OperandPass.h"
-#include "ir/Operand.h" //for OperationIndex
-#include "backend/BackendManager.h"
-#include "ir/operand/PermuteFactor.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace pass
-{
-
-class PermutationInsertionPass : public OperandPass
-{
-public:
- using OperandPass::OperandPass;
-
-public:
- std::string id() override { return "PermutationInsertionPass"; }
- void callback(const OperandIndex &index, Operand &object) override;
-
- /**
- * @brief Insert Permute operation that has given operand as input
- *
- * @param operand_index is the target operand index for the insertion
- * @param factor is the output operand's backend type and layout
- *
- * @return OperationIndex
- */
- OperationIndex insertPermute(const OperandIndex &operand_index,
- const operand::PermuteFactor &factor);
-
-private:
-};
-
-} // namespace pass
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
diff --git a/runtime/neurun/core/src/ir/pass/PermutationOperationPass.cc b/runtime/neurun/core/src/ir/pass/PermutationOperationPass.cc
deleted file mode 100644
index 41a1ad903..000000000
--- a/runtime/neurun/core/src/ir/pass/PermutationOperationPass.cc
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "PermutationOperationPass.h"
-
-#include "backend/Backend.h"
-#include "backend/IConfig.h"
-#include "ir/Graph.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace pass
-{
-
-void PermutationOperationPass::callback(const OperationIndex &, Operation &node)
-{
- node.accept(*this);
-};
-
-void PermutationOperationPass::changeToKeepLayout(const Operation &node)
-{
- const auto &output_ind = node.getOutputs().at(0);
- const auto &output_obj = _graph.operands().at(output_ind);
-
- assert(output_obj.getDef().size() == 1);
- const auto &node_index = output_obj.getDef().list().front();
- const auto &subg_index = _graph.subgraphs().getOperation(node_index);
-
- const auto frontend_layout = _graph.subgraphs().at(subg_index).getLayout();
- const auto backend_layout = _graph.getLowerInfo(subg_index)->layout();
-
- if (frontend_layout == backend_layout)
- {
- return;
- }
-
- // CPU supports only NHWC now
- if (_graph.getLowerInfo(subg_index)->backend()->config()->id() != "cpu")
- {
- // TODO Change backend of this node
- assert(frontend_layout == Layout::NHWC || backend_layout == Layout::UNKNOWN);
- }
-
- // Divide op_seq based on target operation
- {
- auto &above_subg = _graph.subgraphs().at(subg_index);
-
- // Create new op_seq and move information from existing op_seq to new op_seq if target
- // node is the end of op_seq
- auto it = above_subg.begin();
- // Find iterator of target node in op_seq
- while ((it++)->index != node_index)
- ;
- if (it != above_subg.end())
- {
- const auto &below_subg_index =
- _graph.subgraphs().emplace(it->index, *it->node, above_subg.getLayout());
- auto &below_subg = _graph.subgraphs().at(below_subg_index);
- below_subg.setInputs(it->node->getInputs());
- below_subg.setOutputs(it->node->getOutputs());
-
- std::vector<OperationIndex> remove_list;
- remove_list.emplace_back(it->index);
- while (++it != above_subg.end())
- {
- below_subg.appendOperation(it->index, *it->node);
- below_subg.setOutputs(it->node->getOutputs());
- remove_list.emplace_back(it->index);
- }
-
- above_subg.setOutputs(node.getOutputs());
- for (const auto &index : remove_list)
- {
- above_subg.remove(index);
- }
-
- const auto subg_li = _graph.getLowerInfo(subg_index);
- _graph.setLowerInfo(below_subg_index, nnfw::cpp14::make_unique<operation::LowerInfo>(
- subg_li->backend(), subg_li->layout()));
- }
- }
-
- // Remove target operation from op_seq and insert the target operation to new op_seq
- {
- const auto backend = _graph.getLowerInfo(subg_index)->backend();
-
- // Remove target operation from subraph
- _graph.subgraphs().removeFromSubgraph(node_index);
-
- if (!_graph.subgraphs().exist(subg_index))
- {
- // Remove lowerinfo for op_seq of target operation if the op_seq does not exist
- _graph.removeLowerInfo(subg_index);
- }
- else
- {
- // Update op_seq of target operation if the op_seq exists
- auto &above_subg = _graph.subgraphs().at(subg_index);
- const auto last_node = (--above_subg.end())->node;
- above_subg.setOutputs(last_node->getOutputs());
- }
-
- // Create new op_seq and set information to the op_seq
- auto new_subg_index = _graph.subgraphs().emplace(node_index, node, frontend_layout);
- auto &new_subg = _graph.subgraphs().at(new_subg_index);
- new_subg.setInputs(node.getInputs());
- new_subg.setOutputs(node.getOutputs());
- _graph.setLowerInfo(new_subg_index,
- nnfw::cpp14::make_unique<operation::LowerInfo>(backend, frontend_layout));
- }
-
- // Change PermuteFactors of operands of target node
- {
- const auto &subg_index = _graph.subgraphs().getOperation(node_index);
- const auto subg_li = _graph.getLowerInfo(subg_index);
- const auto backend = subg_li->backend();
- const operand::PermuteFactor removed_factor{backend, backend_layout};
- const operand::PermuteFactor new_factor{backend, frontend_layout};
- for (const auto &input : node.getInputs())
- {
- bool canRemove = true;
- for (const auto &use : _graph.operands().at(input).getUses().list())
- {
- if (use != node_index)
- {
- const auto &use_subg_index = _graph.subgraphs().getOperation(use);
- auto use_subg_li = _graph.getLowerInfo(use_subg_index);
- if (use_subg_li->backend() == backend && use_subg_li->layout() == backend_layout)
- {
- canRemove = false;
- break;
- }
- }
- }
-
- auto lower_info = _graph.getLowerInfo(input);
- if (canRemove)
- {
- lower_info->removeUsePermuteFactor(removed_factor);
- }
- lower_info->addUsePermuteFactor(new_factor);
-
- // Whether if node's input is an input of model or a constant
- if (_graph.operands().at(input).getDef().size() == 0)
- {
- assert(_graph.getInputs().contains(input) || _graph.operands().at(input).isConstant());
- lower_info->removeDefPermuteFactor(removed_factor);
- lower_info->addDefPermuteFactor(new_factor);
- }
- }
-
- for (const auto &output : node.getOutputs())
- {
- auto lower_info = _graph.getLowerInfo(output);
- lower_info->removeDefPermuteFactor(removed_factor);
- lower_info->addDefPermuteFactor(new_factor);
-
- // Whether if node's output is an output of model
- if (_graph.operands().at(output).getUses().size() == 0)
- {
- assert(_graph.getOutputs().contains(output));
- lower_info->removeUsePermuteFactor(removed_factor);
- lower_info->addUsePermuteFactor(new_factor);
- }
- }
- }
-}
-
-void PermutationOperationPass::visit(const operation::FullyConnected &node)
-{
- const auto &input_ind = node.getInputs().at(operation::FullyConnected::Input::INPUT);
- const auto &input_obj = _graph.operands().at(input_ind);
- const auto &input_shape = input_obj.shape();
-
- if (input_shape.rank() == 4)
- {
- changeToKeepLayout(node);
- }
-}
-
-void PermutationOperationPass::visit(const operation::Gather &node)
-{
- const auto &input_ind = node.getInputs().at(operation::Gather::Input::INPUT);
- const auto &input_obj = _graph.operands().at(input_ind);
- const auto &input_shape = input_obj.shape();
-
- const auto &output_ind = node.getOutputs().at(0);
- const auto &output_obj = _graph.operands().at(output_ind);
- const auto &output_shape = output_obj.shape();
-
- if (input_shape.rank() >= 4 || output_shape.rank() >= 4)
- {
- changeToKeepLayout(node);
- }
-}
-
-void PermutationOperationPass::visit(const operation::Reshape &node)
-{
- const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT);
- const auto &input_obj = _graph.operands().at(input_ind);
- const auto &input_shape = input_obj.shape();
-
- const auto &output_ind = node.getOutputs().at(0);
- const auto &output_obj = _graph.operands().at(output_ind);
- const auto &output_shape = output_obj.shape();
-
- if (input_shape.rank() >= 4 || output_shape.rank() >= 4)
- {
- changeToKeepLayout(node);
- }
-}
-
-} // namespace pass
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/pass/PermutationOperationPass.h b/runtime/neurun/core/src/ir/pass/PermutationOperationPass.h
deleted file mode 100644
index 896e0176a..000000000
--- a/runtime/neurun/core/src/ir/pass/PermutationOperationPass.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
-#define __NEURUN_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
-
-#include "ir/OperationVisitor.h"
-#include "OperationPass.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace pass
-{
-
-class PermutationOperationPass : public OperationPass, public OperationVisitor
-{
-public:
- using OperationPass::OperationPass;
-
-public:
- std::string id() final { return "PermutationOperationPass"; }
-
-public:
- void callback(const OperationIndex &i, Operation &n) final;
-
-public:
- void visit(const operation::FullyConnected &) final;
- void visit(const operation::Gather &) final;
- void visit(const operation::Reshape &) final;
-
-private:
- void changeToKeepLayout(const Operation &);
-};
-
-} // namespace pass
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
diff --git a/runtime/neurun/core/src/ir/verifier/Verifier.cc b/runtime/neurun/core/src/ir/verifier/Verifier.cc
deleted file mode 100644
index 7bd8ac512..000000000
--- a/runtime/neurun/core/src/ir/verifier/Verifier.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Verifier.h"
-
-#include "ir/Graph.h"
-#include "ir/OperationIndexMap.h"
-
-#include "util/logging.h"
-
-namespace neurun
-{
-namespace ir
-{
-namespace verifier
-{
-
-//
-// DAGChecker
-//
-
-bool DAGChecker::verify(const Graph &graph) const
-{
- auto &operations = graph.operations();
- bool cyclic = false;
-
- OperationIndexMap<bool> visited;
- operations.iterate(
- [&](const OperationIndex &index, const Operation &) { visited[index] = false; });
- OperationIndexMap<bool> on_stack = visited; // Copy from visited
-
- std::function<void(const OperationIndex &index, const Operation &)> dfs_recursive =
- [&](const OperationIndex &index, const Operation &node) -> void {
- if (on_stack[index])
- cyclic = true;
- if (visited[index])
- return;
- visited[index] = true;
- on_stack[index] = true;
-
- for (auto output : node.getOutputs())
- {
- const auto &operand = graph.operands().at(output);
- for (const auto &use : operand.getUses().list())
- {
- dfs_recursive(use, graph.operations().at(use));
- }
- }
-
- on_stack[index] = false;
- };
-
- operations.iterate(dfs_recursive);
-
- return !cyclic;
-}
-
-//
-// EdgeConsistencyVerifier
-//
-
-bool EdgeConsistencyChecker::verify(const Graph &graph) const
-{
- auto &operations = graph.operations();
- uint32_t mismatches = 0;
- operations.iterate([&](const OperationIndex &index, const Operation &node) {
- for (auto operand_index : node.getInputs())
- {
- auto &operand = graph.operands().at(operand_index);
- mismatches += (operand.getUses().contains(index) ? 0 : 1);
- }
- for (auto operand_index : node.getOutputs())
- {
- auto &operand = graph.operands().at(operand_index);
- mismatches += (operand.getDef().contains(index) ? 0 : 1);
- }
- });
- return mismatches == 0;
-}
-
-} // namespace verifier
-} // namespace ir
-} // namespace neurun
diff --git a/runtime/neurun/core/src/ir/verifier/Verifier.h b/runtime/neurun/core/src/ir/verifier/Verifier.h
deleted file mode 100644
index 0993a239e..000000000
--- a/runtime/neurun/core/src/ir/verifier/Verifier.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_GRAPH_VERIFIER_VERIFIER_H__
-#define __NEURUN_GRAPH_VERIFIER_VERIFIER_H__
-
-namespace neurun
-{
-namespace ir
-{
-class Graph;
-} // namespace ir
-} // namespace neurun
-
-namespace neurun
-{
-namespace ir
-{
-namespace verifier
-{
-
-struct IVerifier
-{
- virtual ~IVerifier() = default;
- virtual bool verify(const Graph &graph) const = 0;
-};
-
-} // namespace verifier
-} // namespace ir
-} // namespace neurun
-
-namespace neurun
-{
-namespace ir
-{
-namespace verifier
-{
-
-class DAGChecker : public IVerifier
-{
-public:
- bool verify(const Graph &graph) const override;
-};
-
-class EdgeConsistencyChecker : public IVerifier
-{
-public:
- bool verify(const Graph &graph) const override;
-};
-
-} // namespace verifier
-} // namespace ir
-} // namespace neurun
-
-#endif // __NEURUN_GRAPH_VERIFIER_VERIFIER_H__
diff --git a/runtime/neurun/core/src/library_info.cc b/runtime/neurun/core/src/library_info.cc
deleted file mode 100644
index 601d09185..000000000
--- a/runtime/neurun/core/src/library_info.cc
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-volatile const char info[] = "library information : runtime=neurun";
diff --git a/runtime/neurun/core/src/util/ConfigSource.cc b/runtime/neurun/core/src/util/ConfigSource.cc
deleted file mode 100644
index 7d57ec178..000000000
--- a/runtime/neurun/core/src/util/ConfigSource.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/ConfigSource.h"
-#include "util/GeneralConfigSource.h"
-#include "util/EnvConfigSource.h"
-
-#include <array>
-#include <algorithm>
-#include <cassert>
-
-#include "cpp14/memory.h"
-
-namespace neurun
-{
-namespace util
-{
-
-static std::unique_ptr<IConfigSource> _source;
-
-void config_source(std::unique_ptr<IConfigSource> &&source) { _source = std::move(source); }
-
-static IConfigSource *config_source()
-{
- if (!_source)
- {
-#ifdef ENVVAR_FOR_DEFAULT_CONFIG
- // Default ConfigSource is EnvConfigSource
- _source = nnfw::cpp14::make_unique<EnvConfigSource>();
-#else
- _source = nnfw::cpp14::make_unique<GeneralConfigSource>();
-#endif // ENVVAR_FOR_DEFAULT_CONFIG
- }
- return _source.get();
-}
-
-static std::string getConfigOrDefault(const std::string &key)
-{
- static std::unordered_map<std::string, std::string> defaults;
- if (defaults.empty())
- {
-#define CONFIG(Name, Type, Default) \
- { \
- auto name = std::string{#Name}; \
- defaults.emplace(name, std::string{Default}); \
- }
-
-#include "util/Config.lst"
-
-#undef CONFIG
- }
-
- // Treat empty string and absence of the value to be the same
- auto ret = config_source()->get(key);
- if (ret.empty())
- {
- auto itr = defaults.find(key);
- if (itr != defaults.end())
- {
- // Return the default value if exists
- ret = itr->second;
- }
- }
-
- return ret;
-}
-
-bool getConfigBool(const std::string &key)
-{
- auto raw = getConfigOrDefault(key);
- static const std::array<std::string, 5> false_list{"0", "OFF", "FALSE", "N", "NO"};
- auto false_found = std::find(false_list.begin(), false_list.end(), raw);
-
- return (false_found == false_list.end());
-}
-
-int getConfigInt(const std::string &key)
-{
- auto raw = getConfigOrDefault(key);
- return std::stoi(raw);
-}
-
-std::string getConfigString(const std::string &key) { return getConfigOrDefault(key); }
-
-} // namespace util
-} // namespace neurun
-
-namespace neurun
-{
-namespace util
-{
-namespace config
-{
-
-#define CONFIG(Name, Type, Default) const char *Name = #Name;
-
-#include "util/Config.lst"
-
-#undef CONFIG
-
-} // namespace config
-} // namespace util
-} // namespace neurun
diff --git a/runtime/neurun/core/src/util/EnvConfigSource.cc b/runtime/neurun/core/src/util/EnvConfigSource.cc
deleted file mode 100644
index 52a6bf2a4..000000000
--- a/runtime/neurun/core/src/util/EnvConfigSource.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/EnvConfigSource.h"
-
-#include <cstdlib>
-
-namespace neurun
-{
-namespace util
-{
-
-std::string EnvConfigSource::get(const std::string &key) const
-{
- const char *value = std::getenv(key.c_str());
- if (value != nullptr)
- {
- return value;
- }
- else
- {
- return GeneralConfigSource::get(key);
- }
-}
-
-} // namespace util
-} // namespace neurun
diff --git a/runtime/neurun/core/src/util/EventCollectorGlobal.cc b/runtime/neurun/core/src/util/EventCollectorGlobal.cc
deleted file mode 100644
index 6c3594f5f..000000000
--- a/runtime/neurun/core/src/util/EventCollectorGlobal.cc
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/EventCollectorGlobal.h"
-
-#include <cassert>
-#include <fstream>
-
-#include "util/ConfigSource.h"
-
-namespace neurun
-{
-namespace util
-{
-
-EventCollectorGlobal::EventCollectorGlobal() : _recorder{}, _collector{&_recorder}
-{
- // DO NOTHING
-}
-
-EventCollectorGlobal::~EventCollectorGlobal()
-{
- auto path = util::getConfigString(util::config::TRACE_FILEPATH);
- if (!path.empty())
- {
- // TODO Need better way for saved file path than just appending ".global" to the trace file path
- std::ofstream ofs{path + ".global"};
- _recorder.writeToFile(ofs);
- }
-}
-
-EventCollectorGlobal &EventCollectorGlobal::get()
-{
- static EventCollectorGlobal instance;
- return instance;
-}
-
-EventDurationBlock::EventDurationBlock(const std::string &tag) : _tag{tag}
-{
- auto &glob = EventCollectorGlobal::get();
- glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag});
-}
-EventDurationBlock::~EventDurationBlock()
-{
- auto &glob = EventCollectorGlobal::get();
- glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag});
-}
-
-EventDurationManual::EventDurationManual(const std::string &tag) : _tag{tag}, _pair{true} {}
-
-EventDurationManual::~EventDurationManual()
-{
- // Check if it has called begin-end pair
- assert(_pair);
-}
-
-void EventDurationManual::begin()
-{
- _pair = false;
- auto &glob = EventCollectorGlobal::get();
- glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag});
-}
-
-void EventDurationManual::end()
-{
- assert(!_pair);
- _pair = true;
- auto &glob = EventCollectorGlobal::get();
- glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag});
-}
-
-} // namespace util
-} // namespace neurun
diff --git a/runtime/neurun/core/src/util/GeneralConfigSource.cc b/runtime/neurun/core/src/util/GeneralConfigSource.cc
deleted file mode 100644
index 084e4c109..000000000
--- a/runtime/neurun/core/src/util/GeneralConfigSource.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/GeneralConfigSource.h"
-#include "util/logging.h"
-
-namespace neurun
-{
-namespace util
-{
-
-std::string GeneralConfigSource::get(const std::string &key) const
-{
- auto itr = _map.find(key);
- if (itr == _map.end())
- {
- return "";
- }
- else
- {
- return itr->second;
- }
-}
-
-void GeneralConfigSource::set(const std::string &key, const std::string &val)
-{
- VERBOSE(GeneralConfigSource) << key << " : " << val << std::endl;
- _map[key] = val;
-}
-
-} // namespace util
-} // namespace neurun
diff --git a/runtime/neurun/core/src/util/Padding.cc b/runtime/neurun/core/src/util/Padding.cc
deleted file mode 100644
index 2e2202b58..000000000
--- a/runtime/neurun/core/src/util/Padding.cc
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/Padding.h"
-#include "util/Utils.h"
-
-#include <algorithm>
-#include <stdexcept>
-
-namespace neurun
-{
-namespace util
-{
-
-ir::ExplicitPadding validPadding(void)
-{
- //
- // ANEURALNETWORKS_PADDING_VALID
- //
- // VALID padding. No padding.
- //
- // When the input size is not evenly divisible by the filter size,
- // the input at the end that could not fill the whole filter tile
- // will simply be ignored.
- //
- ir::ExplicitPadding padding;
-
- padding.top = 0;
- padding.bottom = 0;
- padding.left = 0;
- padding.right = 0;
-
- return padding;
-}
-
-ir::ExplicitPadding samePaddingUsingIFM(const ir::FeatureShape &ifm_shape, const ir::Stride &stride,
- uint32_t kw, uint32_t kh)
-{
- ir::ExplicitPadding padding;
-
- // ANEURALNETWORKS_PADDING_SAME (from NNAPI spec)
- //
- // SAME padding. Padding on both ends are the "same":
- //
- // padding_to_beginning = total_padding / 2
- // padding_to_end = (total_padding + 1)/2.
- //
- const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
- const int32_t horizontal_expected_output =
- (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
-
- const int32_t vertical_needed_input = (vertical_expected_output - 1) * stride.vertical + kh;
- const int32_t vertical_total_padding = std::max(0, vertical_needed_input - ifm_shape.H);
-
- const int32_t horizontal_needed_input = (horizontal_expected_output - 1) * stride.horizontal + kw;
- const int32_t horizontal_total_padding = std::max(0, horizontal_needed_input - ifm_shape.W);
-
- padding.top = vertical_total_padding / 2;
- padding.bottom = (vertical_total_padding + 1) / 2;
- padding.left = horizontal_total_padding / 2;
- padding.right = (horizontal_total_padding + 1) / 2;
-
- return padding;
-}
-
-ir::ExplicitPadding samePadding(const ir::FeatureShape &ifm_shape,
- const ir::FeatureShape &ofm_shape, const ir::Stride &stride,
- uint32_t kw, uint32_t kh)
-{
- const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
- const int32_t horizontal_expected_output =
- (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
- assert(vertical_expected_output == ofm_shape.H);
- assert(horizontal_expected_output == ofm_shape.W);
-
- UNUSED_RELEASE(ofm_shape);
- UNUSED_RELEASE(vertical_expected_output);
- UNUSED_RELEASE(horizontal_expected_output);
-
- return samePaddingUsingIFM(ifm_shape, stride, kw, kh);
-}
-
-ir::ExplicitPadding calculatePadding(const ir::Padding &padding, const ir::FeatureShape &ifm_shape,
- const ir::FeatureShape &ofm_shape, const ir::Stride &stride,
- uint32_t kw, uint32_t kh)
-{
- if (padding.type == ir::PaddingType::EXPLICIT)
- {
- return padding.param;
- }
- else if (padding.type == ir::PaddingType::SAME)
- {
- return samePadding(ifm_shape, ofm_shape, stride, kw, kh);
- }
- else if (padding.type == ir::PaddingType::VALID)
- {
- return validPadding();
- }
- else
- {
- throw std::runtime_error{"Cannot handle padding type"};
- }
-}
-
-} // namespace util
-} // namespace neurun
diff --git a/runtime/neurun/core/src/util/ShapeInference.cc b/runtime/neurun/core/src/util/ShapeInference.cc
deleted file mode 100644
index 6fa29e7d8..000000000
--- a/runtime/neurun/core/src/util/ShapeInference.cc
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/Utils.h"
-#include "ir/InternalType.h"
-#include "ir/Shape.h"
-#include "ir/operation/AvgPool2D.h"
-#include "ir/operation/MaxPool2D.h"
-#include "util/ShapeInference.h"
-
-namespace neurun
-{
-namespace shape_inference
-{
-
-//
-// Helper functions
-//
-
-namespace
-{
-
-template <typename T, typename U>
-typename std::enable_if<std::is_integral<T>::value && std::is_integral<U>::value,
- typename std::common_type<T, U>::type>::type
-ceil_div(T dividend, U divisor)
-{
- assert(dividend > 0 && divisor > 0 && "this implementations is for positive numbers only");
- return (dividend + divisor - 1) / divisor;
-}
-
-// Calculate the result of broadcast of two shapes
-ir::Shape broadcastShapes(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape)
-{
- ir::Shape out_shape;
- auto max_rank = std::max(lhs_shape.rank(), rhs_shape.rank());
-
- for (int idx = 0; idx < max_rank; ++idx)
- {
- // Go over operands dimensions from right to left
- int lhs_idx = lhs_shape.rank() - idx - 1;
- int rhs_idx = rhs_shape.rank() - idx - 1;
-
- int32_t lhs_dim = lhs_idx >= 0 ? lhs_shape.dim(lhs_idx) : 1;
- int32_t rhs_dim = rhs_idx >= 0 ? rhs_shape.dim(rhs_idx) : 1;
-
- if (lhs_dim != 1 && rhs_dim != 1 && lhs_dim != rhs_dim)
- throw std::runtime_error("Incompatible shapes for broadcast");
-
- out_shape.prepend(std::max(lhs_dim, rhs_dim));
- }
-
- return out_shape;
-}
-
-// Calculate output height and width of convolution-like operation
-std::pair<int, int> calcConvLikeHeightAndWidth(const int in_h, const int in_w, const int ker_h,
- const int ker_w, const ir::Padding pad,
- const ir::Stride stride)
-{
- int32_t out_h = 0, out_w = 0;
-
- switch (pad.type)
- {
- case ir::PaddingType::SAME:
- out_h = ceil_div(in_h, stride.vertical);
- out_w = ceil_div(in_w, stride.horizontal);
- break;
- case ir::PaddingType::VALID:
- out_h = ceil_div(in_h - ker_h + 1, stride.vertical);
- out_w = ceil_div(in_w - ker_w + 1, stride.horizontal);
- break;
- case ir::PaddingType::EXPLICIT:
- out_h = (in_h + pad.param.top + pad.param.bottom - ker_h) / stride.vertical + 1;
- out_w = (in_w + pad.param.left + pad.param.right - ker_w) / stride.horizontal + 1;
- break;
- default:
- assert(false);
- }
-
- return {out_h, out_w};
-}
-
-} // namespace
-
-//
-// Shape inference
-//
-
-Shapes inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape)
-{
- return {broadcastShapes(lhs_shape, rhs_shape)};
-}
-
-Shapes inferAvgPoolShape(const ir::Shape &in_shape, const ir::operation::AvgPool2D::Param &param,
- const ir::Layout layout)
-{
- assert(layout == ir::Layout::NHWC);
- auto ifm_shape = in_shape.asFeature(layout);
- const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
- param.padding, param.stride);
- // Pooling don't change number of channels and batch size
- return {ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C}};
-}
-
-Shapes inferConcatShape(const Shapes &in_shapes, const ir::operation::Concat::Param &param)
-{
- const int32_t concat_axis = param.axis;
- const auto &first_in_shape = in_shapes[0];
-
- // Check that all shapes are equal except for concat axis dimension
- for (const auto &in_shape : in_shapes)
- {
- assert(in_shape.rank() == first_in_shape.rank());
- for (int64_t dim_idx = 0; dim_idx < in_shape.rank(); ++dim_idx)
- assert(dim_idx == concat_axis || in_shape.dim(dim_idx) == first_in_shape.dim(dim_idx));
- }
-
- // Calculate output shape
- ir::Shape out_shape(first_in_shape);
- out_shape.dim(concat_axis) = 0;
- for (const auto &in_shape : in_shapes)
- out_shape.dim(concat_axis) += in_shape.dim(concat_axis);
- return {out_shape};
-}
-
-Shapes inferMaxPoolShape(const ir::Shape &in_shape, const ir::operation::MaxPool2D::Param &param,
- const ir::Layout layout)
-{
- assert(layout == ir::Layout::NHWC);
- auto ifm_shape = in_shape.asFeature(layout);
- const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
- param.padding, param.stride);
- // Pooling don't change number of channels and batch size
- return {ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C}};
-}
-
-Shapes inferConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape,
- const ir::operation::Conv2D::Param &param, ir::Layout layout)
-{
- assert(layout == ir::Layout::NHWC);
- auto ifm_shape = in_shape.asFeature(layout);
-
- // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]
- auto kf_shape = ker_shape.asFeature(layout);
- assert(ifm_shape.C == kf_shape.C);
-
- const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, kf_shape.H, kf_shape.W,
- param.padding, param.stride);
-
- return {ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, kf_shape.N}};
-}
-
-Shapes inferDepthwiseConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape,
- const ir::operation::DepthwiseConv2D::Param &param,
- ir::Layout layout)
-{
- assert(layout == ir::Layout::NHWC);
- auto ifm_shape = in_shape.asFeature(layout);
-
- // Kernel format is [1, kernel_height, kernel_width, depth_out]
- auto kf_shape = ker_shape.asFeature(layout);
- assert(kf_shape.C == static_cast<int32_t>(ifm_shape.C * param.multiplier));
- assert(kf_shape.N == 1);
-
- const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, kf_shape.H, kf_shape.W,
- param.padding, param.stride);
-
- return {ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, kf_shape.C}};
-}
-
-Shapes inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape)
-{
- assert(in_shape.rank() >= 2);
- assert(ker_shape.rank() == 2);
-
- const auto input_size_with_batch = in_shape.num_elements();
- const auto num_units = ker_shape.dim(0);
- const auto input_size = ker_shape.dim(1);
- const auto batch_size = input_size_with_batch / input_size;
- assert(input_size_with_batch % input_size == 0);
-
- return {{ir::Shape({static_cast<int32_t>(batch_size), num_units})}};
-}
-
-} // namespace shape_inference
-} // namespace neurun
diff --git a/runtime/neurun/core/src/util/Utils.cc b/runtime/neurun/core/src/util/Utils.cc
deleted file mode 100644
index 1e24e28d4..000000000
--- a/runtime/neurun/core/src/util/Utils.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/Utils.h"
-
-#include <cassert>
-
-namespace neurun
-{
-namespace util
-{
-
-const char *to_string(const ir::PaddingType type)
-{
- assert((type == ir::PaddingType::EXPLICIT) || (type == ir::PaddingType::SAME) ||
- (type == ir::PaddingType::VALID));
-
- switch (type)
- {
- case ir::PaddingType::EXPLICIT:
- return "Padding::EXPLICIT";
- case ir::PaddingType::SAME:
- return "Padding::SAME";
- case ir::PaddingType::VALID:
- return "Padding::VALID";
- }
-
- return nullptr;
-}
-
-Coordinates convertCoordinates(const Coordinates &from_coordinates, ir::Layout from_layout,
- ir::Layout to_layout)
-{
- assert(from_coordinates.size() == 4);
- Coordinates to{from_coordinates};
- if (from_layout == ir::Layout::NHWC && to_layout == ir::Layout::NCHW)
- {
- to.set(0, from_coordinates[0]);
- to.set(1, from_coordinates[3]);
- to.set(2, from_coordinates[1]);
- to.set(3, from_coordinates[2]);
- }
- else if (from_layout == ir::Layout::NCHW && to_layout == ir::Layout::NHWC)
- {
- to.set(0, from_coordinates[0]);
- to.set(1, from_coordinates[2]);
- to.set(2, from_coordinates[3]);
- to.set(3, from_coordinates[1]);
- }
-
- return to;
-}
-
-} // namespace util
-} // namespace neurun
diff --git a/runtime/neurun/core/src/util/logging.cc b/runtime/neurun/core/src/util/logging.cc
deleted file mode 100644
index c23e2b53c..000000000
--- a/runtime/neurun/core/src/util/logging.cc
+++ /dev/null
@@ -1,7 +0,0 @@
-#include "util/logging.h"
-
-neurun::util::logging::Context &neurun::util::logging::Context::get() noexcept
-{
- static Context ctx;
- return ctx;
-}
diff --git a/runtime/neurun/frontend/base_loader/CMakeLists.txt b/runtime/neurun/frontend/base_loader/CMakeLists.txt
deleted file mode 100644
index 358fc2646..000000000
--- a/runtime/neurun/frontend/base_loader/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-if(NOT BUILD_TFLITE_LOADER AND NOT BUILD_CIRCLE_LOADER)
- return()
-endif(NOT BUILD_TFLITE_LOADER AND NOT BUILD_CIRCLE_LOADER)
-
-add_library(base_loader INTERFACE)
-target_include_directories(base_loader INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_link_libraries(base_loader INTERFACE neurun_core nnfw_lib_cpp14)
diff --git a/runtime/neurun/frontend/base_loader/include/base_loader.h b/runtime/neurun/frontend/base_loader/include/base_loader.h
deleted file mode 100644
index ae1562f6c..000000000
--- a/runtime/neurun/frontend/base_loader/include/base_loader.h
+++ /dev/null
@@ -1,1278 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BASE_LOADER_BASE_LOADER_H__
-#define __BASE_LOADER_BASE_LOADER_H__
-
-#include "ir/Graph.h"
-#include "ir/Operations.Include.h"
-
-#include <map>
-#include <cpp14/memory.h>
-#include <fstream>
-#include <limits>
-
-namespace neurun
-{
-namespace base_loader
-{
-
-template <typename LoaderDomain, typename SpecificLoader> class BaseLoader
-{
- using Verifier = typename LoaderDomain::Verifier;
- using ActivationFunctionType = typename LoaderDomain::ActivationFunctionType;
- using Buffer = typename LoaderDomain::Buffer;
- using BuiltinOperator = typename LoaderDomain::BuiltinOperator;
- using CustomOptionsFormat = typename LoaderDomain::CustomOptionsFormat;
- using Model = typename LoaderDomain::Model;
- using Operator = typename LoaderDomain::Operator;
- using Padding = typename LoaderDomain::Padding;
- using Pool2DOptions = typename LoaderDomain::Pool2DOptions;
- using SubGraph = typename LoaderDomain::SubGraph;
- using Tensor = typename LoaderDomain::Tensor;
- using TensorType = typename LoaderDomain::TensorType;
-
-public:
- /**
- * @brief Construct a new Loader object
- *
- * @param graph reference on graph
- */
- explicit BaseLoader(ir::Graph &graph) : _graph(graph), _model{nullptr} {}
-
- /**
- * @brief Load a model from file
- *
- * @param file_path
- */
- void loadFromFile(const char *file_path);
-
-protected:
- ~BaseLoader() = default;
-
- void loadModel();
-
- // Helper functions
- ir::Activation convertActivation(ActivationFunctionType type);
- ir::DataType tensorTypeToDataType(TensorType type);
-
- // Create operands form tflite::Tensor
- ir::OperandIndex loadOperand(const Tensor *tensor);
- void loadOperationIO(const Operator *op, ir::OperandIndexSequence &inputs,
- ir::OperandIndexSequence &outputs);
- // Create operations from Operator
- void loadOperation(const Operator *op);
- // Load Strides and Paddings from options to param
- template <typename Param, typename OptionsType>
- void loadStridesAndPaddings(Param &param, const OptionsType *options);
- // Load Pool2D param
- template <typename Param> void loadPool2D(Param &param, const Pool2DOptions *options);
-
- // Operations
- void loadConv2D(const Operator *op);
- void loadDepthwiseConv2D(const Operator *op);
- void loadTransposeConv(const Operator *op);
- void loadAvgPool2D(const Operator *op);
- void loadReshape(const Operator *op);
- void loadSoftmax(const Operator *op);
- void loadMaxPool2D(const Operator *op);
- void loadConcatenation(const Operator *op);
- void loadInstanceNorm(const Operator *op);
- void loadFC(const Operator *op);
- void loadAdd(const Operator *op);
- void loadSub(const Operator *op);
- void loadMul(const Operator *op);
- void loadDiv(const Operator *op);
- void loadPack(const Operator *op);
- void loadRelu(const Operator *op);
- void loadRelu6(const Operator *op);
- void loadResizeBilinear(const Operator *op);
- void loadRsqrt(const Operator *op);
- void loadSqrt(const Operator *op);
- void loadSquaredDifference(const Operator *op);
- void loadTanh(const Operator *op);
- void loadTranspose(const Operator *op);
- void loadMean(const Operator *op);
- void loadReduceMax(const Operator *op);
- void loadPad(const Operator *op);
- void loadLogistic(const Operator *op);
- void loadExp(const Operator *op);
- void loadGather(const Operator *op);
- void loadCustom(const Operator *op);
- void loadSpaceToBatchND(const Operator *op);
- void loadBatchToSpaceND(const Operator *op);
- void loadReduceSum(const Operator *op);
- void loadSqueeze(const Operator *op);
- void loadPrelu(const Operator *op);
- void loadSplit(const Operator *op);
- void loadSlice(const Operator *op);
- void loadStridedSlice(const Operator *op);
- void loadUnpack(const Operator *op);
- void loadMinimum(const Operator *op);
- void loadMaximum(const Operator *op);
- void loadCast(const Operator *op);
- void loadComparison(const Operator *op);
- void loadOneHot(const Operator *op);
-
-protected:
- // Buffer for loading (if needed)
- std::vector<char> _buffer;
- // Reference on loadable Graph
- ir::Graph &_graph;
- const Model *_model;
- // Maps Tensor indices to neurun Operands.
- std::vector<ir::OperandIndex> _tensor_to_operand;
- // Verifier
- std::unique_ptr<Verifier> _verifier;
-};
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::loadFromFile(const char *file_path)
-{
- std::ifstream stream(file_path, std::fstream::in | std::fstream::binary);
-
- if (!stream)
- {
- std::string msg = "Failed to open file `";
- msg += file_path;
- msg += "`";
- throw std::runtime_error{msg};
- }
-
- stream.seekg(0, stream.end);
- auto size = stream.tellg();
- stream.seekg(0, stream.beg);
-
- _buffer.resize(size);
- stream.read(_buffer.data(), size);
-
- stream.close();
-
- // Prepare verifier
- _verifier = nnfw::cpp14::make_unique<Verifier>(
- reinterpret_cast<const std::uint8_t *>(_buffer.data()), _buffer.size());
-
- loadModel();
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-ir::Activation BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::convertActivation(
- const ActivationFunctionType type)
-{
- switch (type)
- {
- case ActivationFunctionType::ActivationFunctionType_NONE:
- return ir::Activation::NONE;
- case ActivationFunctionType::ActivationFunctionType_RELU:
- return ir::Activation::RELU;
- case ActivationFunctionType::ActivationFunctionType_RELU_N1_TO_1:
- return ir::Activation::RELU1;
- case ActivationFunctionType::ActivationFunctionType_RELU6:
- return ir::Activation::RELU6;
- case ActivationFunctionType::ActivationFunctionType_TANH:
- return ir::Activation::TANH;
- default:
- throw std::runtime_error(std::string("Unsupported activation type: ")
- .append(EnumNameActivationFunctionType(type)));
- }
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-ir::DataType
-BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::tensorTypeToDataType(const TensorType type)
-{
- switch (type)
- {
- case TensorType::TensorType_FLOAT32:
- return ir::DataType::FLOAT32;
- case TensorType::TensorType_INT32:
- return ir::DataType::INT32;
- case TensorType::TensorType_BOOL:
- return ir::DataType::BOOL8;
- case TensorType::TensorType_UINT8:
- return ir::DataType::UINT8;
- default:
- throw std::runtime_error(
- std::string("Unsupported tensor type: ").append(EnumNameTensorType(type)));
- }
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Tensor *tensor)
-{
- ir::Shape shape;
- // Shape
- const auto *tensor_shape = tensor->shape();
- for (const auto &dim : *tensor_shape)
- {
- shape.append(dim);
- }
- // Type
- ir::DataType data_type = tensorTypeToDataType(tensor->type());
- // Quantization
- auto q_params = tensor->quantization();
- float scale = 0.0;
- long zero_point = 0;
- if (q_params != nullptr)
- {
- if (q_params->scale())
- {
- if (q_params->scale()->size() != 1)
- {
- throw std::runtime_error("Only 1 scale for a tensor is supported.");
- }
- scale = q_params->scale()->Get(0);
- }
-
- if (q_params->zero_point())
- {
- if (q_params->zero_point()->size() != 1)
- {
- throw std::runtime_error("Only 1 zero_point value for a tensor is supported.");
- }
- zero_point = q_params->zero_point()->Get(0);
- // zero_point is long while TypeInfo.zero_point is defined as int32_t.
- assert(zero_point >= std::numeric_limits<int32_t>::min());
- assert(zero_point <= std::numeric_limits<int32_t>::max());
- }
- auto details = q_params->details_as_CustomQuantization();
- if (details != nullptr)
- throw std::runtime_error("Custom Quantization is not supported");
-
- if (q_params->scale() && q_params->zero_point())
- {
- data_type = ir::DataType::QUANT8_ASYMM;
- }
- }
- // Create TypeInfo
- ir::TypeInfo type_info(data_type, scale, zero_point);
- // Create operand
- const auto operand_index = _graph.addOperand(shape, type_info);
-
- // Constant tensors are indicated by non-empty data.
- const auto *data = _model->buffers()->Get(tensor->buffer())->data();
- if (data != nullptr)
- {
- auto ptr = nnfw::cpp14::make_unique<ir::CachedData>(data->data(), data->size());
- _graph.setOperandValue(operand_index, std::move(ptr));
- }
-
- // Name unused
- // auto name = tensor->name();
- // Variablie
- if (tensor->is_variable())
- throw std::runtime_error("Variable tensor not supported!");
-
- return operand_index;
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadOperationIO(const Operator *op,
- ir::OperandIndexSequence &inputs,
- ir::OperandIndexSequence &outputs)
-{
- for (const std::int32_t idx : *op->inputs())
- {
- inputs.append(_tensor_to_operand[idx]);
- }
-
- for (const std::int32_t idx : *op->outputs())
- {
- outputs.append(_tensor_to_operand[idx]);
- }
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-template <typename Param, typename OptionsType>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadStridesAndPaddings(Param &param,
- const OptionsType *options)
-{
- // Strides
- param.stride.vertical = options->stride_w();
- param.stride.horizontal = options->stride_h();
- // Paddings
- if (options->padding() == Padding::Padding_SAME)
- param.padding.type = ir::PaddingType::SAME;
- if (options->padding() == Padding::Padding_VALID)
- param.padding.type = ir::PaddingType::VALID;
- // param paddings indexes unused
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-template <typename Param>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2D(Param &param,
- const Pool2DOptions *options)
-{
- // Strides and Paddings
- loadStridesAndPaddings(param, options);
- // Filter width and height
- // Strides
- param.kw = options->filter_width();
- param.kh = options->filter_height();
- // Activation
- param.activation = convertActivation(options->fused_activation_function());
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadConv2D(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Conv2D::Param param;
- const auto *options = op->builtin_options_as_Conv2DOptions();
- param.activation = convertActivation(options->fused_activation_function());
- loadStridesAndPaddings(param, options);
- // Dilation h/w factor unused
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Conv2D(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadDepthwiseConv2D(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::DepthwiseConv2D::Param param;
- const auto *options = op->builtin_options_as_DepthwiseConv2DOptions();
- param.activation = convertActivation(options->fused_activation_function());
- loadStridesAndPaddings(param, options);
- // Multiplier
- param.multiplier = options->depth_multiplier();
- // Dilation h/w factor unused
- std::unique_ptr<ir::Operation> new_op(new ir::operation::DepthwiseConv2D(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadTransposeConv(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::TransposeConv::Param param;
- const auto *options = op->builtin_options_as_TransposeConvOptions();
- loadStridesAndPaddings(param, options);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::TransposeConv(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadAvgPool2D(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::AvgPool2D::Param param;
- const auto *options = op->builtin_options_as_Pool2DOptions();
-
- loadPool2D(param, options);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::AvgPool2D(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadReshape(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- // const auto *options = op->builtin_options_as_ReshapeOptions();
- // No params
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Reshape(inputs, outputs));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSoftmax(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Softmax::Param param;
- const auto *options = op->builtin_options_as_SoftmaxOptions();
- // Beta
- param.beta = options->beta();
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Softmax(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMaxPool2D(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::MaxPool2D::Param param;
- const auto *options = op->builtin_options_as_Pool2DOptions();
-
- loadPool2D(param, options);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::MaxPool2D(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadConcatenation(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Concat::Param param;
- const auto *options = op->builtin_options_as_ConcatenationOptions();
- // Axis
- param.axis = options->axis();
- param.rank = _graph.operands().at(outputs.at(0)).shape().rank();
- // activation unused
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Concat(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadInstanceNorm(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::InstanceNorm::Param param;
- const auto *options = op->builtin_options_as_InstanceNormOptions();
-
- param.activation = convertActivation(options->fused_activation_function());
- // Use default value 1e-5 if value of epsilon is zero
- param.epsilon = options->epsilon() == 0.f ? 1e-5 : options->epsilon();
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::InstanceNorm(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadFC(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- const auto &input_operand = _graph.operands().at(inputs.at(ir::operation::FullyConnected::INPUT));
- auto &weights_operand = _graph.operands().at(inputs.at(ir::operation::FullyConnected::WEIGHT));
- if (input_operand.typeInfo().type() == ir::DataType::FLOAT32 &&
- weights_operand.typeInfo().type() == ir::DataType::QUANT8_ASYMM)
- {
- weights_operand.type(ir::DataType::QUANT8_SYMM);
- }
-
- ir::operation::FullyConnected::Param param;
- const auto *options = op->builtin_options_as_FullyConnectedOptions();
-
- param.activation = convertActivation(options->fused_activation_function());
- // weights_format unused
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::FullyConnected(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadAdd(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Add::Param param;
- const auto *options = op->builtin_options_as_AddOptions();
-
- param.activation = convertActivation(options->fused_activation_function());
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Add(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSub(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Sub::Param param;
- const auto *options = op->builtin_options_as_SubOptions();
-
- param.activation = convertActivation(options->fused_activation_function());
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Sub(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMul(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Mul::Param param;
- const auto *options = op->builtin_options_as_MulOptions();
-
- param.activation = convertActivation(options->fused_activation_function());
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Mul(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadDiv(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Div::Param param;
- const auto *options = op->builtin_options_as_DivOptions();
-
- param.activation = convertActivation(options->fused_activation_function());
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Div(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPack(const Operator *op)
-{
- // This runtime_error will be removed if the one of backend supports this operation
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Pack::Param param;
- const auto *options = op->builtin_options_as_PackOptions();
- param.num = options->values_count();
- param.axis = options->axis();
- param.rank = _graph.operands().at(outputs.at(0)).shape().rank();
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Pack(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRelu(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::ReLU(inputs, outputs));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRelu6(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::ReLU6(inputs, outputs));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadResizeBilinear(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
- auto input = inputs.at(0);
- auto size = inputs.at(1);
-
- // FIXME Handle ResizeBilinearOptions.
- if (!_graph.operands().at(size).isConstant())
- throw std::runtime_error("ResizeBilinear: non-constant 'size' is not supported.");
-
- std::vector<std::int32_t> size_v = _graph.operands().at(size).template asVector<std::int32_t>();
-
- ir::operation::ResizeBilinear::Param param;
- param.height_out = size_v[0];
- param.width_out = size_v[1];
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::ResizeBilinear({input}, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRsqrt(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::RSQRT(inputs, outputs));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSqrt(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::SQRT(inputs, outputs));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSquaredDifference(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::SquaredDifference(inputs, outputs));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadTanh(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Tanh(inputs, outputs));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadTranspose(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
- auto input = inputs.at(0);
- auto perm = inputs.at(1);
-
- if (!_graph.operands().at(perm).isConstant())
- throw std::runtime_error("Transpose: non-constant 'perm' is not supported.");
-
- ir::operation::Transpose::Param param;
- param.perm = _graph.operands().at(perm).template asVector<int>();
- param.rank = _graph.operands().at(inputs.at(0)).shape().rank();
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Transpose({input}, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMean(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
- auto input = inputs.at(0);
- auto axes = inputs.at(1);
-
- if (!_graph.operands().at(axes).isConstant())
- throw std::runtime_error("Mean: non-constant 'axes' is not supported.");
-
- ir::operation::Mean::Param param;
- param.axes = _graph.operands().at(axes).template asVector<int>();
- param.keep_dims = op->builtin_options_as_ReducerOptions()->keep_dims();
- param.rank = _graph.operands().at(inputs.at(0)).shape().rank();
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Mean({input}, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadReduceMax(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
- auto input = inputs.at(0);
- auto axes = inputs.at(1);
-
- // FIXME Handle ReducerOptions.
- if (!_graph.operands().at(axes).isConstant())
- throw std::runtime_error("ReduceSum: non-constant 'axes' is not supported.");
-
- ir::operation::ReduceMax::Param param;
- param.axes = _graph.operands().at(axes).template asVector<int>();
- param.keep_dims = op->builtin_options_as_ReducerOptions()->keep_dims();
- param.rank = _graph.operands().at(inputs.at(0)).shape().rank();
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::ReduceMax({input}, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPad(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Pad::Param param;
- param.rank = _graph.operands().at(inputs.at(0)).shape().rank();
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Pad(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLogistic(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Logistic(inputs, outputs));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadExp(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Exp(inputs, outputs));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadGather(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
- ir::operation::Gather::Param param;
- param.axis = op->builtin_options_as_GatherOptions()->axis();
- param.rank = _graph.operands().at(inputs.at(0)).shape().rank();
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Gather(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSpaceToBatchND(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op{new ir::operation::SpaceToBatchND{inputs, outputs}};
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadBatchToSpaceND(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
- auto input = inputs.at(0);
- auto block_shape = inputs.at(1);
- auto crops = inputs.at(2);
-
- if (!_graph.operands().at(crops).isConstant())
- throw std::runtime_error("BatchToSpaceND: non-constant 'crops' is not supported.");
-
- std::vector<std::int32_t> crops_v = _graph.operands().at(crops).template asVector<std::int32_t>();
- assert(crops_v.size() == 4);
- if (crops_v != std::vector<std::int32_t>{0, 0, 0, 0})
- throw std::runtime_error("BatchToSpaceND: 'crops' other than {0, 0, 0, 0} is not supported.");
-
- std::unique_ptr<ir::Operation> new_op{
- new ir::operation::BatchToSpaceND{{input, block_shape}, outputs}};
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadReduceSum(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
- auto input = inputs.at(0);
- auto axes = inputs.at(1);
-
- // FIXME Handle ReducerOptions.
- if (!_graph.operands().at(axes).isConstant())
- throw std::runtime_error("ReduceSum: non-constant 'axes' is not supported.");
-
- ir::operation::ReduceSum::Param param;
- param.axes = _graph.operands().at(axes).template asVector<int>();
- param.keep_dims = op->builtin_options_as_ReducerOptions()->keep_dims();
- param.rank = _graph.operands().at(inputs.at(0)).shape().rank();
-
- std::unique_ptr<ir::Operation> new_op{new ir::operation::ReduceSum{{input}, outputs, param}};
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- auto *op_code = _model->operator_codes()->Get(op->opcode_index());
- auto custom_op_id = op_code->custom_code()->str();
-
- auto constraint = ir::OperandConstraint::createExact(inputs.size());
-
- assert(op->custom_options_format() == CustomOptionsFormat::CustomOptionsFormat_FLEXBUFFERS &&
- "Unsupported custom operation options format");
-
- size_t custom_op_data_size = op->custom_options()->size();
- auto custom_op_data = new char[custom_op_data_size];
- std::copy(op->custom_options()->begin(), op->custom_options()->end(), custom_op_data);
-
- ir::operation::Custom::Userdata userdata{};
- userdata.data = custom_op_data;
- userdata.size = custom_op_data_size;
-
- auto new_op = nnfw::cpp14::make_unique<ir::operation::Custom>(constraint, inputs, outputs,
- custom_op_id, userdata);
-
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSqueeze(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Squeeze::Param param{};
- const auto *options = op->builtin_options_as_SqueezeOptions();
- const auto *dims = options->squeeze_dims();
- if (dims)
- {
- if (dims->Length() > sizeof(param.dims) / sizeof(param.dims[0]))
- throw std::runtime_error("Squeeze: 'param.ndims' is out of range.");
- param.ndim = dims->Length();
- for (int i = 0; i < param.ndim; ++i)
- param.dims[i] = dims->Get(i);
- }
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Squeeze(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPrelu(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::PReLU(inputs, outputs));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSplit(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
- // Notice : input order is strange for tflite split
- auto input = inputs.at(1);
- auto axis = inputs.at(0);
-
- // FIXME Handle SplitOptions.
- if (!_graph.operands().at(axis).isConstant())
- throw std::runtime_error("Split: non-constant 'axis' is not supported.");
-
- ir::operation::Split::Param param{};
- param.axis = _graph.operands().at(axis).template asScalar<int>();
- const auto *options = op->builtin_options_as_SplitOptions();
- param.num_splits = options->num_splits();
- param.rank = _graph.operands().at(inputs.at(0)).shape().rank();
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Split({input}, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSlice(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Slice::Param param;
- param.rank = _graph.operands().at(inputs.at(0)).shape().rank();
-
- std::unique_ptr<ir::Operation> new_op{new ir::operation::Slice{inputs, outputs, param}};
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadStridedSlice(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::StridedSlice::Param param;
-
- const auto *options = op->builtin_options_as_StridedSliceOptions();
- param.begin_mask = options->begin_mask();
- param.end_mask = options->end_mask();
- param.shrink_axis_mask = options->shrink_axis_mask();
- param.rank = _graph.operands().at(inputs.at(0)).shape().rank();
-
- std::unique_ptr<ir::Operation> new_op{new ir::operation::StridedSlice{inputs, outputs, param}};
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadUnpack(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Unpack::Param param;
- const auto *options = op->builtin_options_as_UnpackOptions();
- param.num = options->num();
- param.axis = options->axis();
- param.rank = _graph.operands().at(inputs.at(0)).shape().rank();
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Unpack(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMinimum(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Min(inputs, outputs));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMaximum(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Max(inputs, outputs));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadCast(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Cast(inputs, outputs));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadComparison(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Comparison::Param param;
-
- const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
-
- switch (builtin_op)
- {
- case BuiltinOperator::BuiltinOperator_EQUAL:
- param.comparison_type = ir::operation::Comparison::ComparisonType::Equal;
- break;
- case BuiltinOperator::BuiltinOperator_GREATER_EQUAL:
- param.comparison_type = ir::operation::Comparison::ComparisonType::GreaterEqual;
- break;
- case BuiltinOperator::BuiltinOperator_GREATER:
- param.comparison_type = ir::operation::Comparison::ComparisonType::Greater;
- break;
- case BuiltinOperator::BuiltinOperator_LESS_EQUAL:
- param.comparison_type = ir::operation::Comparison::ComparisonType::LessEqual;
- break;
- case BuiltinOperator::BuiltinOperator_LESS:
- param.comparison_type = ir::operation::Comparison::ComparisonType::Less;
- break;
- default:
- throw std::runtime_error(
- std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
- }
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Comparison(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadOneHot(const Operator *op)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::OneHot::Param param{};
- const auto *options = op->builtin_options_as_OneHotOptions();
- const auto axis = options->axis();
- const auto &indices = _graph.operands().at(inputs.at(ir::operation::OneHot::INDICES));
- auto indices_dims = indices.shape().rank();
- param.axis = (axis == -1) ? indices_dims : axis;
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::OneHot(inputs, outputs, param));
- _graph.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op)
-{
- const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
-
- switch (builtin_op)
- {
- case BuiltinOperator::BuiltinOperator_CONV_2D:
- loadConv2D(op);
- return;
- case BuiltinOperator::BuiltinOperator_AVERAGE_POOL_2D:
- loadAvgPool2D(op);
- return;
- case BuiltinOperator::BuiltinOperator_DEPTHWISE_CONV_2D:
- loadDepthwiseConv2D(op);
- return;
- case BuiltinOperator::BuiltinOperator_TRANSPOSE_CONV:
- loadTransposeConv(op);
- return;
- case BuiltinOperator::BuiltinOperator_RESHAPE:
- loadReshape(op);
- return;
- case BuiltinOperator::BuiltinOperator_SOFTMAX:
- loadSoftmax(op);
- return;
- case BuiltinOperator::BuiltinOperator_MAX_POOL_2D:
- loadMaxPool2D(op);
- return;
- case BuiltinOperator::BuiltinOperator_CONCATENATION:
- loadConcatenation(op);
- return;
- case BuiltinOperator::BuiltinOperator_FULLY_CONNECTED:
- loadFC(op);
- return;
- case BuiltinOperator::BuiltinOperator_ADD:
- loadAdd(op);
- return;
- case BuiltinOperator::BuiltinOperator_SUB:
- loadSub(op);
- return;
- case BuiltinOperator::BuiltinOperator_MUL:
- loadMul(op);
- return;
- case BuiltinOperator::BuiltinOperator_DIV:
- loadDiv(op);
- return;
- case BuiltinOperator::BuiltinOperator_PACK:
- loadPack(op);
- return;
- case BuiltinOperator::BuiltinOperator_RELU:
- loadRelu(op);
- return;
- case BuiltinOperator::BuiltinOperator_RELU6:
- loadRelu6(op);
- return;
- case BuiltinOperator::BuiltinOperator_RESIZE_BILINEAR:
- loadResizeBilinear(op);
- return;
- case BuiltinOperator::BuiltinOperator_RSQRT:
- loadRsqrt(op);
- return;
- case BuiltinOperator::BuiltinOperator_SQRT:
- loadSqrt(op);
- return;
- case BuiltinOperator::BuiltinOperator_SQUARED_DIFFERENCE:
- loadSquaredDifference(op);
- return;
- case BuiltinOperator::BuiltinOperator_TANH:
- loadTanh(op);
- return;
- case BuiltinOperator::BuiltinOperator_TRANSPOSE:
- loadTranspose(op);
- return;
- case BuiltinOperator::BuiltinOperator_MEAN:
- loadMean(op);
- return;
- case BuiltinOperator::BuiltinOperator_REDUCE_MAX:
- loadReduceMax(op);
- return;
- case BuiltinOperator::BuiltinOperator_PAD:
- loadPad(op);
- return;
- case BuiltinOperator::BuiltinOperator_LOGISTIC:
- loadLogistic(op);
- return;
- case BuiltinOperator::BuiltinOperator_EXP:
- loadExp(op);
- return;
- case BuiltinOperator::BuiltinOperator_GATHER:
- loadGather(op);
- return;
- case BuiltinOperator::BuiltinOperator_SPACE_TO_BATCH_ND:
- loadSpaceToBatchND(op);
- return;
- case BuiltinOperator::BuiltinOperator_BATCH_TO_SPACE_ND:
- loadBatchToSpaceND(op);
- return;
- case BuiltinOperator::BuiltinOperator_SUM:
- loadReduceSum(op);
- return;
- case BuiltinOperator::BuiltinOperator_CUSTOM:
- loadCustom(op);
- return;
- case BuiltinOperator::BuiltinOperator_SQUEEZE:
- loadSqueeze(op);
- return;
- case BuiltinOperator::BuiltinOperator_PRELU:
- loadPrelu(op);
- return;
- case BuiltinOperator::BuiltinOperator_SPLIT:
- loadSplit(op);
- return;
- case BuiltinOperator::BuiltinOperator_SLICE:
- loadSlice(op);
- return;
- case BuiltinOperator::BuiltinOperator_STRIDED_SLICE:
- loadStridedSlice(op);
- return;
- case BuiltinOperator::BuiltinOperator_UNPACK:
- loadUnpack(op);
- return;
- case BuiltinOperator::BuiltinOperator_MINIMUM:
- loadMinimum(op);
- return;
- case BuiltinOperator::BuiltinOperator_MAXIMUM:
- loadMaximum(op);
- return;
- case BuiltinOperator::BuiltinOperator_CAST:
- loadCast(op);
- return;
- case BuiltinOperator::BuiltinOperator_EQUAL:
- case BuiltinOperator::BuiltinOperator_GREATER_EQUAL:
- case BuiltinOperator::BuiltinOperator_GREATER:
- case BuiltinOperator::BuiltinOperator_LESS_EQUAL:
- case BuiltinOperator::BuiltinOperator_LESS:
- loadComparison(op);
- return;
- case BuiltinOperator::BuiltinOperator_ONE_HOT:
- loadOneHot(op);
- return;
- default:
- throw std::runtime_error(
- std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
- }
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadModel()
-{
- LoaderDomain::VerifyModelBuffer(*_verifier.get());
- _model = LoaderDomain::GetModel(_buffer.data());
- // Version unused
- // const auto version = _model->version();
- // Description unused
- // const auto *description = _model->description();
- // Metabuffer unsued
- // const auto *metadata_buffer = _model->metadata_buffer();
- // Load subgraphs and mapping from op to subgraph
- for (const auto *subgraph : *_model->subgraphs())
- {
- static_cast<SpecificLoader *>(this)->loadSubgraph(subgraph);
- }
-
- _graph.finishBuilding();
-}
-
-} // namespace base_loader
-} // namespace neurun
-
-#endif //__BASE_LOADER_BASE_LOADER_H__
diff --git a/runtime/neurun/frontend/circle/CMakeLists.txt b/runtime/neurun/frontend/circle/CMakeLists.txt
deleted file mode 100644
index fea9725c2..000000000
--- a/runtime/neurun/frontend/circle/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-if (NOT BUILD_CIRCLE_LOADER)
- return()
-endif ()
-
-nnfw_find_package(FlatBuffersSource REQUIRED)
-
-set(CIRCLE_LOADER_SOURCES src/circle_loader.cc)
-
-add_library(circle_loader SHARED ${CIRCLE_LOADER_SOURCES})
-
-target_include_directories(circle_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_include_directories(circle_loader PRIVATE ${FlatBuffersSource_DIR}/include)
-
-target_link_libraries(circle_loader PUBLIC neurun_core)
-target_link_libraries(circle_loader PRIVATE base_loader nnfw_lib_cpp14 nnfw_common nnfw_coverage)
-
-install(TARGETS circle_loader DESTINATION lib)
diff --git a/runtime/neurun/frontend/circle/include/circle_loader.h b/runtime/neurun/frontend/circle/include/circle_loader.h
deleted file mode 100644
index 898bd32b1..000000000
--- a/runtime/neurun/frontend/circle/include/circle_loader.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLE_CIRCLE_LOADER_H__
-#define __CIRCLE_CIRCLE_LOADER_H__
-
-#include "ir/Graph.h"
-
-#include <memory>
-
-namespace neurun
-{
-namespace circle_loader
-{
-std::unique_ptr<ir::Graph> loadModel(const char *filename);
-} // namespace circle_loader
-} // namespace neurun
-
-#endif // __CIRCLE_CIRCLE_LOADER_H__
diff --git a/runtime/neurun/frontend/circle/src/circle_loader.cc b/runtime/neurun/frontend/circle/src/circle_loader.cc
deleted file mode 100644
index cc48a793d..000000000
--- a/runtime/neurun/frontend/circle/src/circle_loader.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "circle_loader.h"
-#include "base_loader.h"
-#include "circle_schema_generated.h"
-
-namespace neurun
-{
-namespace circle_loader
-{
-
-namespace
-{
-
-struct LoaderDomain
-{
- using Verifier = flatbuffers::Verifier;
- using ActivationFunctionType = circle::ActivationFunctionType;
- using Buffer = circle::Buffer;
- using BuiltinOperator = circle::BuiltinOperator;
- using CustomOptionsFormat = circle::CustomOptionsFormat;
- using Model = circle::Model;
- using Operator = circle::Operator;
- using Padding = circle::Padding;
- using Pool2DOptions = circle::Pool2DOptions;
- using Tensor = circle::Tensor;
- using TensorType = circle::TensorType;
- using SubGraph = circle::SubGraph;
-
- static const char *EnumNameBuiltinOperator(BuiltinOperator e)
- {
- return circle::EnumNameBuiltinOperator(e);
- }
- static const char *EnumNameActivationFunctionType(ActivationFunctionType e)
- {
- return circle::EnumNameActivationFunctionType(e);
- }
- static const char *EnumNameTensorType(TensorType e) { return circle::EnumNameTensorType(e); }
- static const Model *GetModel(const void *buf) { return circle::GetModel(buf); }
- static bool VerifyModelBuffer(Verifier &verifier) { return circle::VerifyModelBuffer(verifier); }
-};
-
-class CircleLoader final : public base_loader::BaseLoader<LoaderDomain, CircleLoader>
-{
-public:
- using BaseLoader::BaseLoader;
-
- void loadSubgraph(const circle::SubGraph *subgraph)
- {
- // Load tensors
- _tensor_to_operand.resize(subgraph->tensors()->size());
- for (flatbuffers::uoffset_t i = 0; i < subgraph->tensors()->size(); ++i)
- {
- _tensor_to_operand[i] = loadOperand(subgraph->tensors()->Get(i));
- }
- // Set inputs
- for (const std::int32_t input_ind : *subgraph->inputs())
- {
- _graph.addInput(_tensor_to_operand[input_ind]);
- }
- // Set outputs
- for (const std::int32_t output_ind : *subgraph->outputs())
- {
- _graph.addOutput(_tensor_to_operand[output_ind]);
- }
- // Create operations
- for (const auto *op : *subgraph->operators())
- {
- CircleLoader::loadOperation(op);
- }
-
- (void)subgraph->data_format();
- }
-
- void loadOperation(const circle::Operator *op)
- {
- const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
-
- switch (builtin_op)
- {
- case circle::BuiltinOperator::BuiltinOperator_INSTANCE_NORM:
- loadInstanceNorm(op);
- return;
- default:
- BaseLoader::loadOperation(op);
- return;
- }
- }
-};
-
-} // namespace
-
-std::unique_ptr<ir::Graph> loadModel(const char *filename)
-{
- auto graph = nnfw::cpp14::make_unique<ir::Graph>();
- CircleLoader loader(*graph);
- loader.loadFromFile(filename);
- return graph;
-}
-
-} // namespace circle_loader
-} // namespace neurun
diff --git a/runtime/neurun/frontend/circle/src/circle_schema_generated.h b/runtime/neurun/frontend/circle/src/circle_schema_generated.h
deleted file mode 100644
index 5f7aad462..000000000
--- a/runtime/neurun/frontend/circle/src/circle_schema_generated.h
+++ /dev/null
@@ -1,7546 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-// automatically generated by the FlatBuffers compiler, do not modify
-
-#ifndef FLATBUFFERS_GENERATED_CIRCLESCHEMA_CIRCLE_H_
-#define FLATBUFFERS_GENERATED_CIRCLESCHEMA_CIRCLE_H_
-
-#include "flatbuffers/flatbuffers.h"
-
-namespace circle
-{
-
-struct CustomQuantization;
-
-struct QuantizationParameters;
-
-struct Tensor;
-
-struct Conv2DOptions;
-
-struct Pool2DOptions;
-
-struct DepthwiseConv2DOptions;
-
-struct ConcatEmbeddingsOptions;
-
-struct LSHProjectionOptions;
-
-struct SVDFOptions;
-
-struct RNNOptions;
-
-struct SequenceRNNOptions;
-
-struct BidirectionalSequenceRNNOptions;
-
-struct FullyConnectedOptions;
-
-struct SoftmaxOptions;
-
-struct ConcatenationOptions;
-
-struct AddOptions;
-
-struct MulOptions;
-
-struct L2NormOptions;
-
-struct LocalResponseNormalizationOptions;
-
-struct LSTMOptions;
-
-struct UnidirectionalSequenceLSTMOptions;
-
-struct BidirectionalSequenceLSTMOptions;
-
-struct ResizeBilinearOptions;
-
-struct ResizeNearestNeighborOptions;
-
-struct CallOptions;
-
-struct PadOptions;
-
-struct PadV2Options;
-
-struct ReshapeOptions;
-
-struct SpaceToBatchNDOptions;
-
-struct BatchToSpaceNDOptions;
-
-struct SkipGramOptions;
-
-struct SpaceToDepthOptions;
-
-struct SubOptions;
-
-struct DivOptions;
-
-struct TopKV2Options;
-
-struct EmbeddingLookupSparseOptions;
-
-struct GatherOptions;
-
-struct TransposeOptions;
-
-struct ExpOptions;
-
-struct ReducerOptions;
-
-struct SqueezeOptions;
-
-struct SplitOptions;
-
-struct SplitVOptions;
-
-struct StridedSliceOptions;
-
-struct LogSoftmaxOptions;
-
-struct CastOptions;
-
-struct DequantizeOptions;
-
-struct MaximumMinimumOptions;
-
-struct TileOptions;
-
-struct ArgMaxOptions;
-
-struct ArgMinOptions;
-
-struct GreaterOptions;
-
-struct GreaterEqualOptions;
-
-struct LessOptions;
-
-struct LessEqualOptions;
-
-struct NegOptions;
-
-struct SelectOptions;
-
-struct SliceOptions;
-
-struct TransposeConvOptions;
-
-struct ExpandDimsOptions;
-
-struct SparseToDenseOptions;
-
-struct EqualOptions;
-
-struct NotEqualOptions;
-
-struct ShapeOptions;
-
-struct PowOptions;
-
-struct FakeQuantOptions;
-
-struct PackOptions;
-
-struct LogicalOrOptions;
-
-struct OneHotOptions;
-
-struct AbsOptions;
-
-struct LogicalAndOptions;
-
-struct LogicalNotOptions;
-
-struct UnpackOptions;
-
-struct FloorDivOptions;
-
-struct SquareOptions;
-
-struct ZerosLikeOptions;
-
-struct FillOptions;
-
-struct FloorModOptions;
-
-struct RangeOptions;
-
-struct LeakyReluOptions;
-
-struct SquaredDifferenceOptions;
-
-struct MirrorPadOptions;
-
-struct InstanceNormOptions;
-
-struct OperatorCode;
-
-struct Operator;
-
-struct SubGraph;
-
-struct Buffer;
-
-struct Model;
-
-enum TensorType
-{
- TensorType_FLOAT32 = 0,
- TensorType_FLOAT16 = 1,
- TensorType_INT32 = 2,
- TensorType_UINT8 = 3,
- TensorType_INT64 = 4,
- TensorType_STRING = 5,
- TensorType_BOOL = 6,
- TensorType_INT16 = 7,
- TensorType_COMPLEX64 = 8,
- TensorType_INT8 = 9,
- TensorType_MIN = TensorType_FLOAT32,
- TensorType_MAX = TensorType_INT8
-};
-
-inline const TensorType (&EnumValuesTensorType())[10]
-{
- static const TensorType values[] = {TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32,
- TensorType_UINT8, TensorType_INT64, TensorType_STRING,
- TensorType_BOOL, TensorType_INT16, TensorType_COMPLEX64,
- TensorType_INT8};
- return values;
-}
-
-inline const char *const *EnumNamesTensorType()
-{
- static const char *const names[] = {"FLOAT32", "FLOAT16", "INT32", "UINT8", "INT64", "STRING",
- "BOOL", "INT16", "COMPLEX64", "INT8", nullptr};
- return names;
-}
-
-inline const char *EnumNameTensorType(TensorType e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesTensorType()[index];
-}
-
-enum QuantizationDetails
-{
- QuantizationDetails_NONE = 0,
- QuantizationDetails_CustomQuantization = 1,
- QuantizationDetails_MIN = QuantizationDetails_NONE,
- QuantizationDetails_MAX = QuantizationDetails_CustomQuantization
-};
-
-inline const QuantizationDetails (&EnumValuesQuantizationDetails())[2]
-{
- static const QuantizationDetails values[] = {QuantizationDetails_NONE,
- QuantizationDetails_CustomQuantization};
- return values;
-}
-
-inline const char *const *EnumNamesQuantizationDetails()
-{
- static const char *const names[] = {"NONE", "CustomQuantization", nullptr};
- return names;
-}
-
-inline const char *EnumNameQuantizationDetails(QuantizationDetails e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesQuantizationDetails()[index];
-}
-
-template <typename T> struct QuantizationDetailsTraits
-{
- static const QuantizationDetails enum_value = QuantizationDetails_NONE;
-};
-
-template <> struct QuantizationDetailsTraits<CustomQuantization>
-{
- static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization;
-};
-
-bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj,
- QuantizationDetails type);
-bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier,
- const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
- const flatbuffers::Vector<uint8_t> *types);
-
-enum BuiltinOperator
-{
- BuiltinOperator_ADD = 0,
- BuiltinOperator_AVERAGE_POOL_2D = 1,
- BuiltinOperator_CONCATENATION = 2,
- BuiltinOperator_CONV_2D = 3,
- BuiltinOperator_DEPTHWISE_CONV_2D = 4,
- BuiltinOperator_DEQUANTIZE = 6,
- BuiltinOperator_EMBEDDING_LOOKUP = 7,
- BuiltinOperator_FLOOR = 8,
- BuiltinOperator_FULLY_CONNECTED = 9,
- BuiltinOperator_HASHTABLE_LOOKUP = 10,
- BuiltinOperator_L2_NORMALIZATION = 11,
- BuiltinOperator_L2_POOL_2D = 12,
- BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION = 13,
- BuiltinOperator_LOGISTIC = 14,
- BuiltinOperator_LSH_PROJECTION = 15,
- BuiltinOperator_LSTM = 16,
- BuiltinOperator_MAX_POOL_2D = 17,
- BuiltinOperator_MUL = 18,
- BuiltinOperator_RELU = 19,
- BuiltinOperator_RELU_N1_TO_1 = 20,
- BuiltinOperator_RELU6 = 21,
- BuiltinOperator_RESHAPE = 22,
- BuiltinOperator_RESIZE_BILINEAR = 23,
- BuiltinOperator_RNN = 24,
- BuiltinOperator_SOFTMAX = 25,
- BuiltinOperator_SPACE_TO_DEPTH = 26,
- BuiltinOperator_SVDF = 27,
- BuiltinOperator_TANH = 28,
- BuiltinOperator_CONCAT_EMBEDDINGS = 29,
- BuiltinOperator_SKIP_GRAM = 30,
- BuiltinOperator_CALL = 31,
- BuiltinOperator_CUSTOM = 32,
- BuiltinOperator_EMBEDDING_LOOKUP_SPARSE = 33,
- BuiltinOperator_PAD = 34,
- BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN = 35,
- BuiltinOperator_GATHER = 36,
- BuiltinOperator_BATCH_TO_SPACE_ND = 37,
- BuiltinOperator_SPACE_TO_BATCH_ND = 38,
- BuiltinOperator_TRANSPOSE = 39,
- BuiltinOperator_MEAN = 40,
- BuiltinOperator_SUB = 41,
- BuiltinOperator_DIV = 42,
- BuiltinOperator_SQUEEZE = 43,
- BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
- BuiltinOperator_STRIDED_SLICE = 45,
- BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN = 46,
- BuiltinOperator_EXP = 47,
- BuiltinOperator_TOPK_V2 = 48,
- BuiltinOperator_SPLIT = 49,
- BuiltinOperator_LOG_SOFTMAX = 50,
- BuiltinOperator_DELEGATE = 51,
- BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52,
- BuiltinOperator_CAST = 53,
- BuiltinOperator_PRELU = 54,
- BuiltinOperator_MAXIMUM = 55,
- BuiltinOperator_ARG_MAX = 56,
- BuiltinOperator_MINIMUM = 57,
- BuiltinOperator_LESS = 58,
- BuiltinOperator_NEG = 59,
- BuiltinOperator_PADV2 = 60,
- BuiltinOperator_GREATER = 61,
- BuiltinOperator_GREATER_EQUAL = 62,
- BuiltinOperator_LESS_EQUAL = 63,
- BuiltinOperator_SELECT = 64,
- BuiltinOperator_SLICE = 65,
- BuiltinOperator_SIN = 66,
- BuiltinOperator_TRANSPOSE_CONV = 67,
- BuiltinOperator_SPARSE_TO_DENSE = 68,
- BuiltinOperator_TILE = 69,
- BuiltinOperator_EXPAND_DIMS = 70,
- BuiltinOperator_EQUAL = 71,
- BuiltinOperator_NOT_EQUAL = 72,
- BuiltinOperator_LOG = 73,
- BuiltinOperator_SUM = 74,
- BuiltinOperator_SQRT = 75,
- BuiltinOperator_RSQRT = 76,
- BuiltinOperator_SHAPE = 77,
- BuiltinOperator_POW = 78,
- BuiltinOperator_ARG_MIN = 79,
- BuiltinOperator_FAKE_QUANT = 80,
- BuiltinOperator_REDUCE_PROD = 81,
- BuiltinOperator_REDUCE_MAX = 82,
- BuiltinOperator_PACK = 83,
- BuiltinOperator_LOGICAL_OR = 84,
- BuiltinOperator_ONE_HOT = 85,
- BuiltinOperator_LOGICAL_AND = 86,
- BuiltinOperator_LOGICAL_NOT = 87,
- BuiltinOperator_UNPACK = 88,
- BuiltinOperator_REDUCE_MIN = 89,
- BuiltinOperator_FLOOR_DIV = 90,
- BuiltinOperator_REDUCE_ANY = 91,
- BuiltinOperator_SQUARE = 92,
- BuiltinOperator_ZEROS_LIKE = 93,
- BuiltinOperator_FILL = 94,
- BuiltinOperator_FLOOR_MOD = 95,
- BuiltinOperator_RANGE = 96,
- BuiltinOperator_RESIZE_NEAREST_NEIGHBOR = 97,
- BuiltinOperator_LEAKY_RELU = 98,
- BuiltinOperator_SQUARED_DIFFERENCE = 99,
- BuiltinOperator_MIRROR_PAD = 100,
- BuiltinOperator_ABS = 101,
- BuiltinOperator_SPLIT_V = 102,
- BuiltinOperator_INSTANCE_NORM = 254,
- BuiltinOperator_MIN = BuiltinOperator_ADD,
- BuiltinOperator_MAX = BuiltinOperator_INSTANCE_NORM
-};
-
-inline const BuiltinOperator (&EnumValuesBuiltinOperator())[103]
-{
- static const BuiltinOperator values[] = {BuiltinOperator_ADD,
- BuiltinOperator_AVERAGE_POOL_2D,
- BuiltinOperator_CONCATENATION,
- BuiltinOperator_CONV_2D,
- BuiltinOperator_DEPTHWISE_CONV_2D,
- BuiltinOperator_DEQUANTIZE,
- BuiltinOperator_EMBEDDING_LOOKUP,
- BuiltinOperator_FLOOR,
- BuiltinOperator_FULLY_CONNECTED,
- BuiltinOperator_HASHTABLE_LOOKUP,
- BuiltinOperator_L2_NORMALIZATION,
- BuiltinOperator_L2_POOL_2D,
- BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
- BuiltinOperator_LOGISTIC,
- BuiltinOperator_LSH_PROJECTION,
- BuiltinOperator_LSTM,
- BuiltinOperator_MAX_POOL_2D,
- BuiltinOperator_MUL,
- BuiltinOperator_RELU,
- BuiltinOperator_RELU_N1_TO_1,
- BuiltinOperator_RELU6,
- BuiltinOperator_RESHAPE,
- BuiltinOperator_RESIZE_BILINEAR,
- BuiltinOperator_RNN,
- BuiltinOperator_SOFTMAX,
- BuiltinOperator_SPACE_TO_DEPTH,
- BuiltinOperator_SVDF,
- BuiltinOperator_TANH,
- BuiltinOperator_CONCAT_EMBEDDINGS,
- BuiltinOperator_SKIP_GRAM,
- BuiltinOperator_CALL,
- BuiltinOperator_CUSTOM,
- BuiltinOperator_EMBEDDING_LOOKUP_SPARSE,
- BuiltinOperator_PAD,
- BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN,
- BuiltinOperator_GATHER,
- BuiltinOperator_BATCH_TO_SPACE_ND,
- BuiltinOperator_SPACE_TO_BATCH_ND,
- BuiltinOperator_TRANSPOSE,
- BuiltinOperator_MEAN,
- BuiltinOperator_SUB,
- BuiltinOperator_DIV,
- BuiltinOperator_SQUEEZE,
- BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
- BuiltinOperator_STRIDED_SLICE,
- BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN,
- BuiltinOperator_EXP,
- BuiltinOperator_TOPK_V2,
- BuiltinOperator_SPLIT,
- BuiltinOperator_LOG_SOFTMAX,
- BuiltinOperator_DELEGATE,
- BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM,
- BuiltinOperator_CAST,
- BuiltinOperator_PRELU,
- BuiltinOperator_MAXIMUM,
- BuiltinOperator_ARG_MAX,
- BuiltinOperator_MINIMUM,
- BuiltinOperator_LESS,
- BuiltinOperator_NEG,
- BuiltinOperator_PADV2,
- BuiltinOperator_GREATER,
- BuiltinOperator_GREATER_EQUAL,
- BuiltinOperator_LESS_EQUAL,
- BuiltinOperator_SELECT,
- BuiltinOperator_SLICE,
- BuiltinOperator_SIN,
- BuiltinOperator_TRANSPOSE_CONV,
- BuiltinOperator_SPARSE_TO_DENSE,
- BuiltinOperator_TILE,
- BuiltinOperator_EXPAND_DIMS,
- BuiltinOperator_EQUAL,
- BuiltinOperator_NOT_EQUAL,
- BuiltinOperator_LOG,
- BuiltinOperator_SUM,
- BuiltinOperator_SQRT,
- BuiltinOperator_RSQRT,
- BuiltinOperator_SHAPE,
- BuiltinOperator_POW,
- BuiltinOperator_ARG_MIN,
- BuiltinOperator_FAKE_QUANT,
- BuiltinOperator_REDUCE_PROD,
- BuiltinOperator_REDUCE_MAX,
- BuiltinOperator_PACK,
- BuiltinOperator_LOGICAL_OR,
- BuiltinOperator_ONE_HOT,
- BuiltinOperator_LOGICAL_AND,
- BuiltinOperator_LOGICAL_NOT,
- BuiltinOperator_UNPACK,
- BuiltinOperator_REDUCE_MIN,
- BuiltinOperator_FLOOR_DIV,
- BuiltinOperator_REDUCE_ANY,
- BuiltinOperator_SQUARE,
- BuiltinOperator_ZEROS_LIKE,
- BuiltinOperator_FILL,
- BuiltinOperator_FLOOR_MOD,
- BuiltinOperator_RANGE,
- BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
- BuiltinOperator_LEAKY_RELU,
- BuiltinOperator_SQUARED_DIFFERENCE,
- BuiltinOperator_MIRROR_PAD,
- BuiltinOperator_ABS,
- BuiltinOperator_SPLIT_V,
- BuiltinOperator_INSTANCE_NORM};
- return values;
-}
-
-inline const char *const *EnumNamesBuiltinOperator()
-{
- static const char *const names[] = {"ADD",
- "AVERAGE_POOL_2D",
- "CONCATENATION",
- "CONV_2D",
- "DEPTHWISE_CONV_2D",
- "",
- "DEQUANTIZE",
- "EMBEDDING_LOOKUP",
- "FLOOR",
- "FULLY_CONNECTED",
- "HASHTABLE_LOOKUP",
- "L2_NORMALIZATION",
- "L2_POOL_2D",
- "LOCAL_RESPONSE_NORMALIZATION",
- "LOGISTIC",
- "LSH_PROJECTION",
- "LSTM",
- "MAX_POOL_2D",
- "MUL",
- "RELU",
- "RELU_N1_TO_1",
- "RELU6",
- "RESHAPE",
- "RESIZE_BILINEAR",
- "RNN",
- "SOFTMAX",
- "SPACE_TO_DEPTH",
- "SVDF",
- "TANH",
- "CONCAT_EMBEDDINGS",
- "SKIP_GRAM",
- "CALL",
- "CUSTOM",
- "EMBEDDING_LOOKUP_SPARSE",
- "PAD",
- "UNIDIRECTIONAL_SEQUENCE_RNN",
- "GATHER",
- "BATCH_TO_SPACE_ND",
- "SPACE_TO_BATCH_ND",
- "TRANSPOSE",
- "MEAN",
- "SUB",
- "DIV",
- "SQUEEZE",
- "UNIDIRECTIONAL_SEQUENCE_LSTM",
- "STRIDED_SLICE",
- "BIDIRECTIONAL_SEQUENCE_RNN",
- "EXP",
- "TOPK_V2",
- "SPLIT",
- "LOG_SOFTMAX",
- "DELEGATE",
- "BIDIRECTIONAL_SEQUENCE_LSTM",
- "CAST",
- "PRELU",
- "MAXIMUM",
- "ARG_MAX",
- "MINIMUM",
- "LESS",
- "NEG",
- "PADV2",
- "GREATER",
- "GREATER_EQUAL",
- "LESS_EQUAL",
- "SELECT",
- "SLICE",
- "SIN",
- "TRANSPOSE_CONV",
- "SPARSE_TO_DENSE",
- "TILE",
- "EXPAND_DIMS",
- "EQUAL",
- "NOT_EQUAL",
- "LOG",
- "SUM",
- "SQRT",
- "RSQRT",
- "SHAPE",
- "POW",
- "ARG_MIN",
- "FAKE_QUANT",
- "REDUCE_PROD",
- "REDUCE_MAX",
- "PACK",
- "LOGICAL_OR",
- "ONE_HOT",
- "LOGICAL_AND",
- "LOGICAL_NOT",
- "UNPACK",
- "REDUCE_MIN",
- "FLOOR_DIV",
- "REDUCE_ANY",
- "SQUARE",
- "ZEROS_LIKE",
- "FILL",
- "FLOOR_MOD",
- "RANGE",
- "RESIZE_NEAREST_NEIGHBOR",
- "LEAKY_RELU",
- "SQUARED_DIFFERENCE",
- "MIRROR_PAD",
- "ABS",
- "SPLIT_V",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "INSTANCE_NORM",
- nullptr};
- return names;
-}
-
-inline const char *EnumNameBuiltinOperator(BuiltinOperator e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesBuiltinOperator()[index];
-}
-
-enum BuiltinOptions
-{
- BuiltinOptions_NONE = 0,
- BuiltinOptions_Conv2DOptions = 1,
- BuiltinOptions_DepthwiseConv2DOptions = 2,
- BuiltinOptions_ConcatEmbeddingsOptions = 3,
- BuiltinOptions_LSHProjectionOptions = 4,
- BuiltinOptions_Pool2DOptions = 5,
- BuiltinOptions_SVDFOptions = 6,
- BuiltinOptions_RNNOptions = 7,
- BuiltinOptions_FullyConnectedOptions = 8,
- BuiltinOptions_SoftmaxOptions = 9,
- BuiltinOptions_ConcatenationOptions = 10,
- BuiltinOptions_AddOptions = 11,
- BuiltinOptions_L2NormOptions = 12,
- BuiltinOptions_LocalResponseNormalizationOptions = 13,
- BuiltinOptions_LSTMOptions = 14,
- BuiltinOptions_ResizeBilinearOptions = 15,
- BuiltinOptions_CallOptions = 16,
- BuiltinOptions_ReshapeOptions = 17,
- BuiltinOptions_SkipGramOptions = 18,
- BuiltinOptions_SpaceToDepthOptions = 19,
- BuiltinOptions_EmbeddingLookupSparseOptions = 20,
- BuiltinOptions_MulOptions = 21,
- BuiltinOptions_PadOptions = 22,
- BuiltinOptions_GatherOptions = 23,
- BuiltinOptions_BatchToSpaceNDOptions = 24,
- BuiltinOptions_SpaceToBatchNDOptions = 25,
- BuiltinOptions_TransposeOptions = 26,
- BuiltinOptions_ReducerOptions = 27,
- BuiltinOptions_SubOptions = 28,
- BuiltinOptions_DivOptions = 29,
- BuiltinOptions_SqueezeOptions = 30,
- BuiltinOptions_SequenceRNNOptions = 31,
- BuiltinOptions_StridedSliceOptions = 32,
- BuiltinOptions_ExpOptions = 33,
- BuiltinOptions_TopKV2Options = 34,
- BuiltinOptions_SplitOptions = 35,
- BuiltinOptions_LogSoftmaxOptions = 36,
- BuiltinOptions_CastOptions = 37,
- BuiltinOptions_DequantizeOptions = 38,
- BuiltinOptions_MaximumMinimumOptions = 39,
- BuiltinOptions_ArgMaxOptions = 40,
- BuiltinOptions_LessOptions = 41,
- BuiltinOptions_NegOptions = 42,
- BuiltinOptions_PadV2Options = 43,
- BuiltinOptions_GreaterOptions = 44,
- BuiltinOptions_GreaterEqualOptions = 45,
- BuiltinOptions_LessEqualOptions = 46,
- BuiltinOptions_SelectOptions = 47,
- BuiltinOptions_SliceOptions = 48,
- BuiltinOptions_TransposeConvOptions = 49,
- BuiltinOptions_SparseToDenseOptions = 50,
- BuiltinOptions_TileOptions = 51,
- BuiltinOptions_ExpandDimsOptions = 52,
- BuiltinOptions_EqualOptions = 53,
- BuiltinOptions_NotEqualOptions = 54,
- BuiltinOptions_ShapeOptions = 55,
- BuiltinOptions_PowOptions = 56,
- BuiltinOptions_ArgMinOptions = 57,
- BuiltinOptions_FakeQuantOptions = 58,
- BuiltinOptions_PackOptions = 59,
- BuiltinOptions_LogicalOrOptions = 60,
- BuiltinOptions_OneHotOptions = 61,
- BuiltinOptions_LogicalAndOptions = 62,
- BuiltinOptions_LogicalNotOptions = 63,
- BuiltinOptions_UnpackOptions = 64,
- BuiltinOptions_FloorDivOptions = 65,
- BuiltinOptions_SquareOptions = 66,
- BuiltinOptions_ZerosLikeOptions = 67,
- BuiltinOptions_FillOptions = 68,
- BuiltinOptions_BidirectionalSequenceLSTMOptions = 69,
- BuiltinOptions_BidirectionalSequenceRNNOptions = 70,
- BuiltinOptions_UnidirectionalSequenceLSTMOptions = 71,
- BuiltinOptions_FloorModOptions = 72,
- BuiltinOptions_RangeOptions = 73,
- BuiltinOptions_ResizeNearestNeighborOptions = 74,
- BuiltinOptions_LeakyReluOptions = 75,
- BuiltinOptions_SquaredDifferenceOptions = 76,
- BuiltinOptions_MirrorPadOptions = 77,
- BuiltinOptions_AbsOptions = 78,
- BuiltinOptions_SplitVOptions = 79,
- BuiltinOptions_InstanceNormOptions = 80,
- BuiltinOptions_MIN = BuiltinOptions_NONE,
- BuiltinOptions_MAX = BuiltinOptions_InstanceNormOptions
-};
-
-inline const BuiltinOptions (&EnumValuesBuiltinOptions())[81]
-{
- static const BuiltinOptions values[] = {BuiltinOptions_NONE,
- BuiltinOptions_Conv2DOptions,
- BuiltinOptions_DepthwiseConv2DOptions,
- BuiltinOptions_ConcatEmbeddingsOptions,
- BuiltinOptions_LSHProjectionOptions,
- BuiltinOptions_Pool2DOptions,
- BuiltinOptions_SVDFOptions,
- BuiltinOptions_RNNOptions,
- BuiltinOptions_FullyConnectedOptions,
- BuiltinOptions_SoftmaxOptions,
- BuiltinOptions_ConcatenationOptions,
- BuiltinOptions_AddOptions,
- BuiltinOptions_L2NormOptions,
- BuiltinOptions_LocalResponseNormalizationOptions,
- BuiltinOptions_LSTMOptions,
- BuiltinOptions_ResizeBilinearOptions,
- BuiltinOptions_CallOptions,
- BuiltinOptions_ReshapeOptions,
- BuiltinOptions_SkipGramOptions,
- BuiltinOptions_SpaceToDepthOptions,
- BuiltinOptions_EmbeddingLookupSparseOptions,
- BuiltinOptions_MulOptions,
- BuiltinOptions_PadOptions,
- BuiltinOptions_GatherOptions,
- BuiltinOptions_BatchToSpaceNDOptions,
- BuiltinOptions_SpaceToBatchNDOptions,
- BuiltinOptions_TransposeOptions,
- BuiltinOptions_ReducerOptions,
- BuiltinOptions_SubOptions,
- BuiltinOptions_DivOptions,
- BuiltinOptions_SqueezeOptions,
- BuiltinOptions_SequenceRNNOptions,
- BuiltinOptions_StridedSliceOptions,
- BuiltinOptions_ExpOptions,
- BuiltinOptions_TopKV2Options,
- BuiltinOptions_SplitOptions,
- BuiltinOptions_LogSoftmaxOptions,
- BuiltinOptions_CastOptions,
- BuiltinOptions_DequantizeOptions,
- BuiltinOptions_MaximumMinimumOptions,
- BuiltinOptions_ArgMaxOptions,
- BuiltinOptions_LessOptions,
- BuiltinOptions_NegOptions,
- BuiltinOptions_PadV2Options,
- BuiltinOptions_GreaterOptions,
- BuiltinOptions_GreaterEqualOptions,
- BuiltinOptions_LessEqualOptions,
- BuiltinOptions_SelectOptions,
- BuiltinOptions_SliceOptions,
- BuiltinOptions_TransposeConvOptions,
- BuiltinOptions_SparseToDenseOptions,
- BuiltinOptions_TileOptions,
- BuiltinOptions_ExpandDimsOptions,
- BuiltinOptions_EqualOptions,
- BuiltinOptions_NotEqualOptions,
- BuiltinOptions_ShapeOptions,
- BuiltinOptions_PowOptions,
- BuiltinOptions_ArgMinOptions,
- BuiltinOptions_FakeQuantOptions,
- BuiltinOptions_PackOptions,
- BuiltinOptions_LogicalOrOptions,
- BuiltinOptions_OneHotOptions,
- BuiltinOptions_LogicalAndOptions,
- BuiltinOptions_LogicalNotOptions,
- BuiltinOptions_UnpackOptions,
- BuiltinOptions_FloorDivOptions,
- BuiltinOptions_SquareOptions,
- BuiltinOptions_ZerosLikeOptions,
- BuiltinOptions_FillOptions,
- BuiltinOptions_BidirectionalSequenceLSTMOptions,
- BuiltinOptions_BidirectionalSequenceRNNOptions,
- BuiltinOptions_UnidirectionalSequenceLSTMOptions,
- BuiltinOptions_FloorModOptions,
- BuiltinOptions_RangeOptions,
- BuiltinOptions_ResizeNearestNeighborOptions,
- BuiltinOptions_LeakyReluOptions,
- BuiltinOptions_SquaredDifferenceOptions,
- BuiltinOptions_MirrorPadOptions,
- BuiltinOptions_AbsOptions,
- BuiltinOptions_SplitVOptions,
- BuiltinOptions_InstanceNormOptions};
- return values;
-}
-
-inline const char *const *EnumNamesBuiltinOptions()
-{
- static const char *const names[] = {"NONE",
- "Conv2DOptions",
- "DepthwiseConv2DOptions",
- "ConcatEmbeddingsOptions",
- "LSHProjectionOptions",
- "Pool2DOptions",
- "SVDFOptions",
- "RNNOptions",
- "FullyConnectedOptions",
- "SoftmaxOptions",
- "ConcatenationOptions",
- "AddOptions",
- "L2NormOptions",
- "LocalResponseNormalizationOptions",
- "LSTMOptions",
- "ResizeBilinearOptions",
- "CallOptions",
- "ReshapeOptions",
- "SkipGramOptions",
- "SpaceToDepthOptions",
- "EmbeddingLookupSparseOptions",
- "MulOptions",
- "PadOptions",
- "GatherOptions",
- "BatchToSpaceNDOptions",
- "SpaceToBatchNDOptions",
- "TransposeOptions",
- "ReducerOptions",
- "SubOptions",
- "DivOptions",
- "SqueezeOptions",
- "SequenceRNNOptions",
- "StridedSliceOptions",
- "ExpOptions",
- "TopKV2Options",
- "SplitOptions",
- "LogSoftmaxOptions",
- "CastOptions",
- "DequantizeOptions",
- "MaximumMinimumOptions",
- "ArgMaxOptions",
- "LessOptions",
- "NegOptions",
- "PadV2Options",
- "GreaterOptions",
- "GreaterEqualOptions",
- "LessEqualOptions",
- "SelectOptions",
- "SliceOptions",
- "TransposeConvOptions",
- "SparseToDenseOptions",
- "TileOptions",
- "ExpandDimsOptions",
- "EqualOptions",
- "NotEqualOptions",
- "ShapeOptions",
- "PowOptions",
- "ArgMinOptions",
- "FakeQuantOptions",
- "PackOptions",
- "LogicalOrOptions",
- "OneHotOptions",
- "LogicalAndOptions",
- "LogicalNotOptions",
- "UnpackOptions",
- "FloorDivOptions",
- "SquareOptions",
- "ZerosLikeOptions",
- "FillOptions",
- "BidirectionalSequenceLSTMOptions",
- "BidirectionalSequenceRNNOptions",
- "UnidirectionalSequenceLSTMOptions",
- "FloorModOptions",
- "RangeOptions",
- "ResizeNearestNeighborOptions",
- "LeakyReluOptions",
- "SquaredDifferenceOptions",
- "MirrorPadOptions",
- "AbsOptions",
- "SplitVOptions",
- "InstanceNormOptions",
- nullptr};
- return names;
-}
-
-inline const char *EnumNameBuiltinOptions(BuiltinOptions e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesBuiltinOptions()[index];
-}
-
-template <typename T> struct BuiltinOptionsTraits
-{
- static const BuiltinOptions enum_value = BuiltinOptions_NONE;
-};
-
-template <> struct BuiltinOptionsTraits<Conv2DOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions;
-};
-
-template <> struct BuiltinOptionsTraits<DepthwiseConv2DOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_DepthwiseConv2DOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ConcatEmbeddingsOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ConcatEmbeddingsOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LSHProjectionOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions;
-};
-
-template <> struct BuiltinOptionsTraits<Pool2DOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SVDFOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions;
-};
-
-template <> struct BuiltinOptionsTraits<RNNOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions;
-};
-
-template <> struct BuiltinOptionsTraits<FullyConnectedOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SoftmaxOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ConcatenationOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions;
-};
-
-template <> struct BuiltinOptionsTraits<AddOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_AddOptions;
-};
-
-template <> struct BuiltinOptionsTraits<L2NormOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LocalResponseNormalizationOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LocalResponseNormalizationOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LSTMOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ResizeBilinearOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions;
-};
-
-template <> struct BuiltinOptionsTraits<CallOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_CallOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ReshapeOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SkipGramOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SpaceToDepthOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions;
-};
-
-template <> struct BuiltinOptionsTraits<EmbeddingLookupSparseOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_EmbeddingLookupSparseOptions;
-};
-
-template <> struct BuiltinOptionsTraits<MulOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_MulOptions;
-};
-
-template <> struct BuiltinOptionsTraits<PadOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_PadOptions;
-};
-
-template <> struct BuiltinOptionsTraits<GatherOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions;
-};
-
-template <> struct BuiltinOptionsTraits<BatchToSpaceNDOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_BatchToSpaceNDOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SpaceToBatchNDOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SpaceToBatchNDOptions;
-};
-
-template <> struct BuiltinOptionsTraits<TransposeOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_TransposeOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ReducerOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ReducerOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SubOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SubOptions;
-};
-
-template <> struct BuiltinOptionsTraits<DivOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_DivOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SqueezeOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SqueezeOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SequenceRNNOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SequenceRNNOptions;
-};
-
-template <> struct BuiltinOptionsTraits<StridedSliceOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_StridedSliceOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ExpOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ExpOptions;
-};
-
-template <> struct BuiltinOptionsTraits<TopKV2Options>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_TopKV2Options;
-};
-
-template <> struct BuiltinOptionsTraits<SplitOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SplitOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LogSoftmaxOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LogSoftmaxOptions;
-};
-
-template <> struct BuiltinOptionsTraits<CastOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_CastOptions;
-};
-
-template <> struct BuiltinOptionsTraits<DequantizeOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions;
-};
-
-template <> struct BuiltinOptionsTraits<MaximumMinimumOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_MaximumMinimumOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ArgMaxOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ArgMaxOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LessOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LessOptions;
-};
-
-template <> struct BuiltinOptionsTraits<NegOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_NegOptions;
-};
-
-template <> struct BuiltinOptionsTraits<PadV2Options>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_PadV2Options;
-};
-
-template <> struct BuiltinOptionsTraits<GreaterOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_GreaterOptions;
-};
-
-template <> struct BuiltinOptionsTraits<GreaterEqualOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_GreaterEqualOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LessEqualOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LessEqualOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SelectOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SelectOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SliceOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SliceOptions;
-};
-
-template <> struct BuiltinOptionsTraits<TransposeConvOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_TransposeConvOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SparseToDenseOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SparseToDenseOptions;
-};
-
-template <> struct BuiltinOptionsTraits<TileOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_TileOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ExpandDimsOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ExpandDimsOptions;
-};
-
-template <> struct BuiltinOptionsTraits<EqualOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_EqualOptions;
-};
-
-template <> struct BuiltinOptionsTraits<NotEqualOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_NotEqualOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ShapeOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ShapeOptions;
-};
-
-template <> struct BuiltinOptionsTraits<PowOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_PowOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ArgMinOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ArgMinOptions;
-};
-
-template <> struct BuiltinOptionsTraits<FakeQuantOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_FakeQuantOptions;
-};
-
-template <> struct BuiltinOptionsTraits<PackOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_PackOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LogicalOrOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LogicalOrOptions;
-};
-
-template <> struct BuiltinOptionsTraits<OneHotOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_OneHotOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LogicalAndOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LogicalAndOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LogicalNotOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LogicalNotOptions;
-};
-
-template <> struct BuiltinOptionsTraits<UnpackOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_UnpackOptions;
-};
-
-template <> struct BuiltinOptionsTraits<FloorDivOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_FloorDivOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SquareOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SquareOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ZerosLikeOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions;
-};
-
-template <> struct BuiltinOptionsTraits<FillOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_FillOptions;
-};
-
-template <> struct BuiltinOptionsTraits<BidirectionalSequenceLSTMOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceLSTMOptions;
-};
-
-template <> struct BuiltinOptionsTraits<BidirectionalSequenceRNNOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions;
-};
-
-template <> struct BuiltinOptionsTraits<UnidirectionalSequenceLSTMOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions;
-};
-
-template <> struct BuiltinOptionsTraits<FloorModOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions;
-};
-
-template <> struct BuiltinOptionsTraits<RangeOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_RangeOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ResizeNearestNeighborOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ResizeNearestNeighborOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LeakyReluOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LeakyReluOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SquaredDifferenceOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SquaredDifferenceOptions;
-};
-
-template <> struct BuiltinOptionsTraits<MirrorPadOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_MirrorPadOptions;
-};
-
-template <> struct BuiltinOptionsTraits<AbsOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_AbsOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SplitVOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SplitVOptions;
-};
-
-template <> struct BuiltinOptionsTraits<InstanceNormOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_InstanceNormOptions;
-};
-
-bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
-bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier,
- const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
- const flatbuffers::Vector<uint8_t> *types);
-
-enum Padding
-{
- Padding_SAME = 0,
- Padding_VALID = 1,
- Padding_MIN = Padding_SAME,
- Padding_MAX = Padding_VALID
-};
-
-inline const Padding (&EnumValuesPadding())[2]
-{
- static const Padding values[] = {Padding_SAME, Padding_VALID};
- return values;
-}
-
-inline const char *const *EnumNamesPadding()
-{
- static const char *const names[] = {"SAME", "VALID", nullptr};
- return names;
-}
-
-inline const char *EnumNamePadding(Padding e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesPadding()[index];
-}
-
-enum ActivationFunctionType
-{
- ActivationFunctionType_NONE = 0,
- ActivationFunctionType_RELU = 1,
- ActivationFunctionType_RELU_N1_TO_1 = 2,
- ActivationFunctionType_RELU6 = 3,
- ActivationFunctionType_TANH = 4,
- ActivationFunctionType_SIGN_BIT = 5,
- ActivationFunctionType_MIN = ActivationFunctionType_NONE,
- ActivationFunctionType_MAX = ActivationFunctionType_SIGN_BIT
-};
-
-inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6]
-{
- static const ActivationFunctionType values[] = {
- ActivationFunctionType_NONE, ActivationFunctionType_RELU,
- ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6,
- ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT};
- return values;
-}
-
-inline const char *const *EnumNamesActivationFunctionType()
-{
- static const char *const names[] = {"NONE", "RELU", "RELU_N1_TO_1", "RELU6",
- "TANH", "SIGN_BIT", nullptr};
- return names;
-}
-
-inline const char *EnumNameActivationFunctionType(ActivationFunctionType e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesActivationFunctionType()[index];
-}
-
-enum LSHProjectionType
-{
- LSHProjectionType_UNKNOWN = 0,
- LSHProjectionType_SPARSE = 1,
- LSHProjectionType_DENSE = 2,
- LSHProjectionType_MIN = LSHProjectionType_UNKNOWN,
- LSHProjectionType_MAX = LSHProjectionType_DENSE
-};
-
-inline const LSHProjectionType (&EnumValuesLSHProjectionType())[3]
-{
- static const LSHProjectionType values[] = {LSHProjectionType_UNKNOWN, LSHProjectionType_SPARSE,
- LSHProjectionType_DENSE};
- return values;
-}
-
-inline const char *const *EnumNamesLSHProjectionType()
-{
- static const char *const names[] = {"UNKNOWN", "SPARSE", "DENSE", nullptr};
- return names;
-}
-
-inline const char *EnumNameLSHProjectionType(LSHProjectionType e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesLSHProjectionType()[index];
-}
-
-enum FullyConnectedOptionsWeightsFormat
-{
- FullyConnectedOptionsWeightsFormat_DEFAULT = 0,
- FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 = 1,
- FullyConnectedOptionsWeightsFormat_MIN = FullyConnectedOptionsWeightsFormat_DEFAULT,
- FullyConnectedOptionsWeightsFormat_MAX = FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8
-};
-
-inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[2]
-{
- static const FullyConnectedOptionsWeightsFormat values[] = {
- FullyConnectedOptionsWeightsFormat_DEFAULT,
- FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8};
- return values;
-}
-
-inline const char *const *EnumNamesFullyConnectedOptionsWeightsFormat()
-{
- static const char *const names[] = {"DEFAULT", "SHUFFLED4x16INT8", nullptr};
- return names;
-}
-
-inline const char *EnumNameFullyConnectedOptionsWeightsFormat(FullyConnectedOptionsWeightsFormat e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesFullyConnectedOptionsWeightsFormat()[index];
-}
-
-enum LSTMKernelType
-{
- LSTMKernelType_FULL = 0,
- LSTMKernelType_BASIC = 1,
- LSTMKernelType_MIN = LSTMKernelType_FULL,
- LSTMKernelType_MAX = LSTMKernelType_BASIC
-};
-
-inline const LSTMKernelType (&EnumValuesLSTMKernelType())[2]
-{
- static const LSTMKernelType values[] = {LSTMKernelType_FULL, LSTMKernelType_BASIC};
- return values;
-}
-
-inline const char *const *EnumNamesLSTMKernelType()
-{
- static const char *const names[] = {"FULL", "BASIC", nullptr};
- return names;
-}
-
-inline const char *EnumNameLSTMKernelType(LSTMKernelType e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesLSTMKernelType()[index];
-}
-
-enum CombinerType
-{
- CombinerType_SUM = 0,
- CombinerType_MEAN = 1,
- CombinerType_SQRTN = 2,
- CombinerType_MIN = CombinerType_SUM,
- CombinerType_MAX = CombinerType_SQRTN
-};
-
-inline const CombinerType (&EnumValuesCombinerType())[3]
-{
- static const CombinerType values[] = {CombinerType_SUM, CombinerType_MEAN, CombinerType_SQRTN};
- return values;
-}
-
-inline const char *const *EnumNamesCombinerType()
-{
- static const char *const names[] = {"SUM", "MEAN", "SQRTN", nullptr};
- return names;
-}
-
-inline const char *EnumNameCombinerType(CombinerType e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesCombinerType()[index];
-}
-
-enum MirrorPadMode
-{
- MirrorPadMode_REFLECT = 0,
- MirrorPadMode_SYMMETRIC = 1,
- MirrorPadMode_MIN = MirrorPadMode_REFLECT,
- MirrorPadMode_MAX = MirrorPadMode_SYMMETRIC
-};
-
-inline const MirrorPadMode (&EnumValuesMirrorPadMode())[2]
-{
- static const MirrorPadMode values[] = {MirrorPadMode_REFLECT, MirrorPadMode_SYMMETRIC};
- return values;
-}
-
-inline const char *const *EnumNamesMirrorPadMode()
-{
- static const char *const names[] = {"REFLECT", "SYMMETRIC", nullptr};
- return names;
-}
-
-inline const char *EnumNameMirrorPadMode(MirrorPadMode e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesMirrorPadMode()[index];
-}
-
-enum CustomOptionsFormat
-{
- CustomOptionsFormat_FLEXBUFFERS = 0,
- CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS,
- CustomOptionsFormat_MAX = CustomOptionsFormat_FLEXBUFFERS
-};
-
-inline const CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1]
-{
- static const CustomOptionsFormat values[] = {CustomOptionsFormat_FLEXBUFFERS};
- return values;
-}
-
-inline const char *const *EnumNamesCustomOptionsFormat()
-{
- static const char *const names[] = {"FLEXBUFFERS", nullptr};
- return names;
-}
-
-inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesCustomOptionsFormat()[index];
-}
-
-enum DataFormat
-{
- DataFormat_CHANNELS_LAST = 0,
- DataFormat_CHANNELS_FIRST = 1,
- DataFormat_MIN = DataFormat_CHANNELS_LAST,
- DataFormat_MAX = DataFormat_CHANNELS_FIRST
-};
-
-inline const DataFormat (&EnumValuesDataFormat())[2]
-{
- static const DataFormat values[] = {DataFormat_CHANNELS_LAST, DataFormat_CHANNELS_FIRST};
- return values;
-}
-
-inline const char *const *EnumNamesDataFormat()
-{
- static const char *const names[] = {"CHANNELS_LAST", "CHANNELS_FIRST", nullptr};
- return names;
-}
-
-inline const char *EnumNameDataFormat(DataFormat e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesDataFormat()[index];
-}
-
-struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_CUSTOM = 4
- };
- const flatbuffers::Vector<uint8_t> *custom() const
- {
- return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM);
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CUSTOM) &&
- verifier.VerifyVector(custom()) && verifier.EndTable();
- }
-};
-
-struct CustomQuantizationBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_custom(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom)
- {
- fbb_.AddOffset(CustomQuantization::VT_CUSTOM, custom);
- }
- explicit CustomQuantizationBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- CustomQuantizationBuilder &operator=(const CustomQuantizationBuilder &);
- flatbuffers::Offset<CustomQuantization> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<CustomQuantization>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<CustomQuantization>
-CreateCustomQuantization(flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom = 0)
-{
- CustomQuantizationBuilder builder_(_fbb);
- builder_.add_custom(custom);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<CustomQuantization>
-CreateCustomQuantizationDirect(flatbuffers::FlatBufferBuilder &_fbb,
- const std::vector<uint8_t> *custom = nullptr)
-{
- return circle::CreateCustomQuantization(_fbb, custom ? _fbb.CreateVector<uint8_t>(*custom) : 0);
-}
-
-struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_MIN = 4,
- VT_MAX = 6,
- VT_SCALE = 8,
- VT_ZERO_POINT = 10,
- VT_DETAILS_TYPE = 12,
- VT_DETAILS = 14
- };
- const flatbuffers::Vector<float> *min() const
- {
- return GetPointer<const flatbuffers::Vector<float> *>(VT_MIN);
- }
- const flatbuffers::Vector<float> *max() const
- {
- return GetPointer<const flatbuffers::Vector<float> *>(VT_MAX);
- }
- const flatbuffers::Vector<float> *scale() const
- {
- return GetPointer<const flatbuffers::Vector<float> *>(VT_SCALE);
- }
- const flatbuffers::Vector<int64_t> *zero_point() const
- {
- return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_ZERO_POINT);
- }
- QuantizationDetails details_type() const
- {
- return static_cast<QuantizationDetails>(GetField<uint8_t>(VT_DETAILS_TYPE, 0));
- }
- const void *details() const { return GetPointer<const void *>(VT_DETAILS); }
- template <typename T> const T *details_as() const;
- const CustomQuantization *details_as_CustomQuantization() const
- {
- return details_type() == QuantizationDetails_CustomQuantization
- ? static_cast<const CustomQuantization *>(details())
- : nullptr;
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_MIN) &&
- verifier.VerifyVector(min()) && VerifyOffset(verifier, VT_MAX) &&
- verifier.VerifyVector(max()) && VerifyOffset(verifier, VT_SCALE) &&
- verifier.VerifyVector(scale()) && VerifyOffset(verifier, VT_ZERO_POINT) &&
- verifier.VerifyVector(zero_point()) && VerifyField<uint8_t>(verifier, VT_DETAILS_TYPE) &&
- VerifyOffset(verifier, VT_DETAILS) &&
- VerifyQuantizationDetails(verifier, details(), details_type()) && verifier.EndTable();
- }
-};
-
-template <>
-inline const CustomQuantization *QuantizationParameters::details_as<CustomQuantization>() const
-{
- return details_as_CustomQuantization();
-}
-
-struct QuantizationParametersBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_min(flatbuffers::Offset<flatbuffers::Vector<float>> min)
- {
- fbb_.AddOffset(QuantizationParameters::VT_MIN, min);
- }
- void add_max(flatbuffers::Offset<flatbuffers::Vector<float>> max)
- {
- fbb_.AddOffset(QuantizationParameters::VT_MAX, max);
- }
- void add_scale(flatbuffers::Offset<flatbuffers::Vector<float>> scale)
- {
- fbb_.AddOffset(QuantizationParameters::VT_SCALE, scale);
- }
- void add_zero_point(flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point)
- {
- fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point);
- }
- void add_details_type(QuantizationDetails details_type)
- {
- fbb_.AddElement<uint8_t>(QuantizationParameters::VT_DETAILS_TYPE,
- static_cast<uint8_t>(details_type), 0);
- }
- void add_details(flatbuffers::Offset<void> details)
- {
- fbb_.AddOffset(QuantizationParameters::VT_DETAILS, details);
- }
- explicit QuantizationParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- QuantizationParametersBuilder &operator=(const QuantizationParametersBuilder &);
- flatbuffers::Offset<QuantizationParameters> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<QuantizationParameters>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<QuantizationParameters>
-CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
- flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
- flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
- flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
- QuantizationDetails details_type = QuantizationDetails_NONE,
- flatbuffers::Offset<void> details = 0)
-{
- QuantizationParametersBuilder builder_(_fbb);
- builder_.add_details(details);
- builder_.add_zero_point(zero_point);
- builder_.add_scale(scale);
- builder_.add_max(max);
- builder_.add_min(min);
- builder_.add_details_type(details_type);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect(
- flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
- const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
- const std::vector<int64_t> *zero_point = nullptr,
- QuantizationDetails details_type = QuantizationDetails_NONE,
- flatbuffers::Offset<void> details = 0)
-{
- return circle::CreateQuantizationParameters(
- _fbb, min ? _fbb.CreateVector<float>(*min) : 0, max ? _fbb.CreateVector<float>(*max) : 0,
- scale ? _fbb.CreateVector<float>(*scale) : 0,
- zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0, details_type, details);
-}
-
-struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_SHAPE = 4,
- VT_TYPE = 6,
- VT_BUFFER = 8,
- VT_NAME = 10,
- VT_QUANTIZATION = 12,
- VT_IS_VARIABLE = 14
- };
- const flatbuffers::Vector<int32_t> *shape() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE);
- }
- TensorType type() const { return static_cast<TensorType>(GetField<int8_t>(VT_TYPE, 0)); }
- uint32_t buffer() const { return GetField<uint32_t>(VT_BUFFER, 0); }
- const flatbuffers::String *name() const
- {
- return GetPointer<const flatbuffers::String *>(VT_NAME);
- }
- const QuantizationParameters *quantization() const
- {
- return GetPointer<const QuantizationParameters *>(VT_QUANTIZATION);
- }
- bool is_variable() const { return GetField<uint8_t>(VT_IS_VARIABLE, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SHAPE) &&
- verifier.VerifyVector(shape()) && VerifyField<int8_t>(verifier, VT_TYPE) &&
- VerifyField<uint32_t>(verifier, VT_BUFFER) && VerifyOffset(verifier, VT_NAME) &&
- verifier.VerifyString(name()) && VerifyOffset(verifier, VT_QUANTIZATION) &&
- verifier.VerifyTable(quantization()) && VerifyField<uint8_t>(verifier, VT_IS_VARIABLE) &&
- verifier.EndTable();
- }
-};
-
-struct TensorBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape)
- {
- fbb_.AddOffset(Tensor::VT_SHAPE, shape);
- }
- void add_type(TensorType type)
- {
- fbb_.AddElement<int8_t>(Tensor::VT_TYPE, static_cast<int8_t>(type), 0);
- }
- void add_buffer(uint32_t buffer) { fbb_.AddElement<uint32_t>(Tensor::VT_BUFFER, buffer, 0); }
- void add_name(flatbuffers::Offset<flatbuffers::String> name)
- {
- fbb_.AddOffset(Tensor::VT_NAME, name);
- }
- void add_quantization(flatbuffers::Offset<QuantizationParameters> quantization)
- {
- fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization);
- }
- void add_is_variable(bool is_variable)
- {
- fbb_.AddElement<uint8_t>(Tensor::VT_IS_VARIABLE, static_cast<uint8_t>(is_variable), 0);
- }
- explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- TensorBuilder &operator=(const TensorBuilder &);
- flatbuffers::Offset<Tensor> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Tensor>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Tensor>
-CreateTensor(flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0,
- TensorType type = TensorType_FLOAT32, uint32_t buffer = 0,
- flatbuffers::Offset<flatbuffers::String> name = 0,
- flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false)
-{
- TensorBuilder builder_(_fbb);
- builder_.add_quantization(quantization);
- builder_.add_name(name);
- builder_.add_buffer(buffer);
- builder_.add_shape(shape);
- builder_.add_is_variable(is_variable);
- builder_.add_type(type);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Tensor> CreateTensorDirect(
- flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
- TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr,
- flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false)
-{
- return circle::CreateTensor(_fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, buffer,
- name ? _fbb.CreateString(name) : 0, quantization, is_variable);
-}
-
-struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_PADDING = 4,
- VT_STRIDE_W = 6,
- VT_STRIDE_H = 8,
- VT_FUSED_ACTIVATION_FUNCTION = 10,
- VT_DILATION_W_FACTOR = 12,
- VT_DILATION_H_FACTOR = 14
- };
- Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
- int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
- int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
- int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
- VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
- VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
- VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
- VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable();
- }
-};
-
-struct Conv2DOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_padding(Padding padding)
- {
- fbb_.AddElement<int8_t>(Conv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
- }
- void add_stride_w(int32_t stride_w)
- {
- fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_W, stride_w, 0);
- }
- void add_stride_h(int32_t stride_h)
- {
- fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_H, stride_h, 0);
- }
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- void add_dilation_w_factor(int32_t dilation_w_factor)
- {
- fbb_.AddElement<int32_t>(Conv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
- }
- void add_dilation_h_factor(int32_t dilation_h_factor)
- {
- fbb_.AddElement<int32_t>(Conv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
- }
- explicit Conv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- Conv2DOptionsBuilder &operator=(const Conv2DOptionsBuilder &);
- flatbuffers::Offset<Conv2DOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Conv2DOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Conv2DOptions>
-CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
- int32_t stride_w = 0, int32_t stride_h = 0,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
-{
- Conv2DOptionsBuilder builder_(_fbb);
- builder_.add_dilation_h_factor(dilation_h_factor);
- builder_.add_dilation_w_factor(dilation_w_factor);
- builder_.add_stride_h(stride_h);
- builder_.add_stride_w(stride_w);
- builder_.add_fused_activation_function(fused_activation_function);
- builder_.add_padding(padding);
- return builder_.Finish();
-}
-
-struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_PADDING = 4,
- VT_STRIDE_W = 6,
- VT_STRIDE_H = 8,
- VT_FILTER_WIDTH = 10,
- VT_FILTER_HEIGHT = 12,
- VT_FUSED_ACTIVATION_FUNCTION = 14
- };
- Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
- int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
- int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
- int32_t filter_width() const { return GetField<int32_t>(VT_FILTER_WIDTH, 0); }
- int32_t filter_height() const { return GetField<int32_t>(VT_FILTER_HEIGHT, 0); }
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
- VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
- VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
- VerifyField<int32_t>(verifier, VT_FILTER_WIDTH) &&
- VerifyField<int32_t>(verifier, VT_FILTER_HEIGHT) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct Pool2DOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_padding(Padding padding)
- {
- fbb_.AddElement<int8_t>(Pool2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
- }
- void add_stride_w(int32_t stride_w)
- {
- fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_W, stride_w, 0);
- }
- void add_stride_h(int32_t stride_h)
- {
- fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_H, stride_h, 0);
- }
- void add_filter_width(int32_t filter_width)
- {
- fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_WIDTH, filter_width, 0);
- }
- void add_filter_height(int32_t filter_height)
- {
- fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0);
- }
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit Pool2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- Pool2DOptionsBuilder &operator=(const Pool2DOptionsBuilder &);
- flatbuffers::Offset<Pool2DOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Pool2DOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Pool2DOptions>
-CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
- int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0,
- int32_t filter_height = 0,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- Pool2DOptionsBuilder builder_(_fbb);
- builder_.add_filter_height(filter_height);
- builder_.add_filter_width(filter_width);
- builder_.add_stride_h(stride_h);
- builder_.add_stride_w(stride_w);
- builder_.add_fused_activation_function(fused_activation_function);
- builder_.add_padding(padding);
- return builder_.Finish();
-}
-
-struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_PADDING = 4,
- VT_STRIDE_W = 6,
- VT_STRIDE_H = 8,
- VT_DEPTH_MULTIPLIER = 10,
- VT_FUSED_ACTIVATION_FUNCTION = 12,
- VT_DILATION_W_FACTOR = 14,
- VT_DILATION_H_FACTOR = 16
- };
- Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
- int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
- int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
- int32_t depth_multiplier() const { return GetField<int32_t>(VT_DEPTH_MULTIPLIER, 0); }
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
- int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
- VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
- VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
- VerifyField<int32_t>(verifier, VT_DEPTH_MULTIPLIER) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
- VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
- VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable();
- }
-};
-
-struct DepthwiseConv2DOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_padding(Padding padding)
- {
- fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
- }
- void add_stride_w(int32_t stride_w)
- {
- fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_W, stride_w, 0);
- }
- void add_stride_h(int32_t stride_h)
- {
- fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_H, stride_h, 0);
- }
- void add_depth_multiplier(int32_t depth_multiplier)
- {
- fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, depth_multiplier, 0);
- }
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- void add_dilation_w_factor(int32_t dilation_w_factor)
- {
- fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
- }
- void add_dilation_h_factor(int32_t dilation_h_factor)
- {
- fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
- }
- explicit DepthwiseConv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- DepthwiseConv2DOptionsBuilder &operator=(const DepthwiseConv2DOptionsBuilder &);
- flatbuffers::Offset<DepthwiseConv2DOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<DepthwiseConv2DOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
- flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0,
- int32_t stride_h = 0, int32_t depth_multiplier = 0,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
-{
- DepthwiseConv2DOptionsBuilder builder_(_fbb);
- builder_.add_dilation_h_factor(dilation_h_factor);
- builder_.add_dilation_w_factor(dilation_w_factor);
- builder_.add_depth_multiplier(depth_multiplier);
- builder_.add_stride_h(stride_h);
- builder_.add_stride_w(stride_w);
- builder_.add_fused_activation_function(fused_activation_function);
- builder_.add_padding(padding);
- return builder_.Finish();
-}
-
-struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_NUM_CHANNELS = 4,
- VT_NUM_COLUMNS_PER_CHANNEL = 6,
- VT_EMBEDDING_DIM_PER_CHANNEL = 8
- };
- int32_t num_channels() const { return GetField<int32_t>(VT_NUM_CHANNELS, 0); }
- const flatbuffers::Vector<int32_t> *num_columns_per_channel() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NUM_COLUMNS_PER_CHANNEL);
- }
- const flatbuffers::Vector<int32_t> *embedding_dim_per_channel() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_EMBEDDING_DIM_PER_CHANNEL);
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_CHANNELS) &&
- VerifyOffset(verifier, VT_NUM_COLUMNS_PER_CHANNEL) &&
- verifier.VerifyVector(num_columns_per_channel()) &&
- VerifyOffset(verifier, VT_EMBEDDING_DIM_PER_CHANNEL) &&
- verifier.VerifyVector(embedding_dim_per_channel()) && verifier.EndTable();
- }
-};
-
-struct ConcatEmbeddingsOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_num_channels(int32_t num_channels)
- {
- fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0);
- }
- void add_num_columns_per_channel(
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
- {
- fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel);
- }
- void add_embedding_dim_per_channel(
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
- {
- fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL,
- embedding_dim_per_channel);
- }
- explicit ConcatEmbeddingsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ConcatEmbeddingsOptionsBuilder &operator=(const ConcatEmbeddingsOptionsBuilder &);
- flatbuffers::Offset<ConcatEmbeddingsOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ConcatEmbeddingsOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(
- flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
-{
- ConcatEmbeddingsOptionsBuilder builder_(_fbb);
- builder_.add_embedding_dim_per_channel(embedding_dim_per_channel);
- builder_.add_num_columns_per_channel(num_columns_per_channel);
- builder_.add_num_channels(num_channels);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<ConcatEmbeddingsOptions>
-CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
- const std::vector<int32_t> *num_columns_per_channel = nullptr,
- const std::vector<int32_t> *embedding_dim_per_channel = nullptr)
-{
- return circle::CreateConcatEmbeddingsOptions(
- _fbb, num_channels,
- num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0,
- embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0);
-}
-
-struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_TYPE = 4
- };
- LSHProjectionType type() const
- {
- return static_cast<LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_TYPE) &&
- verifier.EndTable();
- }
-};
-
-struct LSHProjectionOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_type(LSHProjectionType type)
- {
- fbb_.AddElement<int8_t>(LSHProjectionOptions::VT_TYPE, static_cast<int8_t>(type), 0);
- }
- explicit LSHProjectionOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LSHProjectionOptionsBuilder &operator=(const LSHProjectionOptionsBuilder &);
- flatbuffers::Offset<LSHProjectionOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LSHProjectionOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LSHProjectionOptions>
-CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb,
- LSHProjectionType type = LSHProjectionType_UNKNOWN)
-{
- LSHProjectionOptionsBuilder builder_(_fbb);
- builder_.add_type(type);
- return builder_.Finish();
-}
-
-struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_RANK = 4,
- VT_FUSED_ACTIVATION_FUNCTION = 6
- };
- int32_t rank() const { return GetField<int32_t>(VT_RANK, 0); }
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_RANK) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct SVDFOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_rank(int32_t rank) { fbb_.AddElement<int32_t>(SVDFOptions::VT_RANK, rank, 0); }
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit SVDFOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SVDFOptionsBuilder &operator=(const SVDFOptionsBuilder &);
- flatbuffers::Offset<SVDFOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SVDFOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SVDFOptions>
-CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- SVDFOptionsBuilder builder_(_fbb);
- builder_.add_rank(rank);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct RNNOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit RNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- RNNOptionsBuilder &operator=(const RNNOptionsBuilder &);
- flatbuffers::Offset<RNNOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<RNNOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<RNNOptions>
-CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- RNNOptionsBuilder builder_(_fbb);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_TIME_MAJOR = 4,
- VT_FUSED_ACTIVATION_FUNCTION = 6
- };
- bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct SequenceRNNOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_time_major(bool time_major)
- {
- fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major),
- 0);
- }
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit SequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SequenceRNNOptionsBuilder &operator=(const SequenceRNNOptionsBuilder &);
- flatbuffers::Offset<SequenceRNNOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SequenceRNNOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
- flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- SequenceRNNOptionsBuilder builder_(_fbb);
- builder_.add_fused_activation_function(fused_activation_function);
- builder_.add_time_major(time_major);
- return builder_.Finish();
-}
-
-struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_TIME_MAJOR = 4,
- VT_FUSED_ACTIVATION_FUNCTION = 6,
- VT_MERGE_OUTPUTS = 8
- };
- bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
- VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) && verifier.EndTable();
- }
-};
-
-struct BidirectionalSequenceRNNOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_time_major(bool time_major)
- {
- fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_TIME_MAJOR,
- static_cast<uint8_t>(time_major), 0);
- }
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- void add_merge_outputs(bool merge_outputs)
- {
- fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_MERGE_OUTPUTS,
- static_cast<uint8_t>(merge_outputs), 0);
- }
- explicit BidirectionalSequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- BidirectionalSequenceRNNOptionsBuilder &operator=(const BidirectionalSequenceRNNOptionsBuilder &);
- flatbuffers::Offset<BidirectionalSequenceRNNOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<BidirectionalSequenceRNNOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(
- flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- bool merge_outputs = false)
-{
- BidirectionalSequenceRNNOptionsBuilder builder_(_fbb);
- builder_.add_merge_outputs(merge_outputs);
- builder_.add_fused_activation_function(fused_activation_function);
- builder_.add_time_major(time_major);
- return builder_.Finish();
-}
-
-struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4,
- VT_WEIGHTS_FORMAT = 6
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- FullyConnectedOptionsWeightsFormat weights_format() const
- {
- return static_cast<FullyConnectedOptionsWeightsFormat>(GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
- VerifyField<int8_t>(verifier, VT_WEIGHTS_FORMAT) && verifier.EndTable();
- }
-};
-
-struct FullyConnectedOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- void add_weights_format(FullyConnectedOptionsWeightsFormat weights_format)
- {
- fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_WEIGHTS_FORMAT,
- static_cast<int8_t>(weights_format), 0);
- }
- explicit FullyConnectedOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- FullyConnectedOptionsBuilder &operator=(const FullyConnectedOptionsBuilder &);
- flatbuffers::Offset<FullyConnectedOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<FullyConnectedOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT)
-{
- FullyConnectedOptionsBuilder builder_(_fbb);
- builder_.add_weights_format(weights_format);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_BETA = 4
- };
- float beta() const { return GetField<float>(VT_BETA, 0.0f); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_BETA) &&
- verifier.EndTable();
- }
-};
-
-struct SoftmaxOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_beta(float beta) { fbb_.AddElement<float>(SoftmaxOptions::VT_BETA, beta, 0.0f); }
- explicit SoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SoftmaxOptionsBuilder &operator=(const SoftmaxOptionsBuilder &);
- flatbuffers::Offset<SoftmaxOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SoftmaxOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SoftmaxOptions>
-CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, float beta = 0.0f)
-{
- SoftmaxOptionsBuilder builder_(_fbb);
- builder_.add_beta(beta);
- return builder_.Finish();
-}
-
-struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_AXIS = 4,
- VT_FUSED_ACTIVATION_FUNCTION = 6
- };
- int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct ConcatenationOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(ConcatenationOptions::VT_AXIS, axis, 0); }
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit ConcatenationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ConcatenationOptionsBuilder &operator=(const ConcatenationOptionsBuilder &);
- flatbuffers::Offset<ConcatenationOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ConcatenationOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
- flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- ConcatenationOptionsBuilder builder_(_fbb);
- builder_.add_axis(axis);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct AddOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- AddOptionsBuilder &operator=(const AddOptionsBuilder &);
- flatbuffers::Offset<AddOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<AddOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<AddOptions>
-CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- AddOptionsBuilder builder_(_fbb);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct MulOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(MulOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit MulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- MulOptionsBuilder &operator=(const MulOptionsBuilder &);
- flatbuffers::Offset<MulOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<MulOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<MulOptions>
-CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- MulOptionsBuilder builder_(_fbb);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct L2NormOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit L2NormOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- L2NormOptionsBuilder &operator=(const L2NormOptionsBuilder &);
- flatbuffers::Offset<L2NormOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<L2NormOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<L2NormOptions>
-CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- L2NormOptionsBuilder builder_(_fbb);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_RADIUS = 4,
- VT_BIAS = 6,
- VT_ALPHA = 8,
- VT_BETA = 10
- };
- int32_t radius() const { return GetField<int32_t>(VT_RADIUS, 0); }
- float bias() const { return GetField<float>(VT_BIAS, 0.0f); }
- float alpha() const { return GetField<float>(VT_ALPHA, 0.0f); }
- float beta() const { return GetField<float>(VT_BETA, 0.0f); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_RADIUS) &&
- VerifyField<float>(verifier, VT_BIAS) && VerifyField<float>(verifier, VT_ALPHA) &&
- VerifyField<float>(verifier, VT_BETA) && verifier.EndTable();
- }
-};
-
-struct LocalResponseNormalizationOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_radius(int32_t radius)
- {
- fbb_.AddElement<int32_t>(LocalResponseNormalizationOptions::VT_RADIUS, radius, 0);
- }
- void add_bias(float bias)
- {
- fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BIAS, bias, 0.0f);
- }
- void add_alpha(float alpha)
- {
- fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_ALPHA, alpha, 0.0f);
- }
- void add_beta(float beta)
- {
- fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f);
- }
- explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LocalResponseNormalizationOptionsBuilder &
- operator=(const LocalResponseNormalizationOptionsBuilder &);
- flatbuffers::Offset<LocalResponseNormalizationOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LocalResponseNormalizationOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LocalResponseNormalizationOptions>
-CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t radius = 0,
- float bias = 0.0f, float alpha = 0.0f, float beta = 0.0f)
-{
- LocalResponseNormalizationOptionsBuilder builder_(_fbb);
- builder_.add_beta(beta);
- builder_.add_alpha(alpha);
- builder_.add_bias(bias);
- builder_.add_radius(radius);
- return builder_.Finish();
-}
-
-struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4,
- VT_CELL_CLIP = 6,
- VT_PROJ_CLIP = 8,
- VT_KERNEL_TYPE = 10
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
- float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
- LSTMKernelType kernel_type() const
- {
- return static_cast<LSTMKernelType>(GetField<int8_t>(VT_KERNEL_TYPE, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
- VerifyField<float>(verifier, VT_CELL_CLIP) &&
- VerifyField<float>(verifier, VT_PROJ_CLIP) &&
- VerifyField<int8_t>(verifier, VT_KERNEL_TYPE) && verifier.EndTable();
- }
-};
-
-struct LSTMOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- void add_cell_clip(float cell_clip)
- {
- fbb_.AddElement<float>(LSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
- }
- void add_proj_clip(float proj_clip)
- {
- fbb_.AddElement<float>(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
- }
- void add_kernel_type(LSTMKernelType kernel_type)
- {
- fbb_.AddElement<int8_t>(LSTMOptions::VT_KERNEL_TYPE, static_cast<int8_t>(kernel_type), 0);
- }
- explicit LSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LSTMOptionsBuilder &operator=(const LSTMOptionsBuilder &);
- flatbuffers::Offset<LSTMOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LSTMOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LSTMOptions>
-CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- float cell_clip = 0.0f, float proj_clip = 0.0f,
- LSTMKernelType kernel_type = LSTMKernelType_FULL)
-{
- LSTMOptionsBuilder builder_(_fbb);
- builder_.add_proj_clip(proj_clip);
- builder_.add_cell_clip(cell_clip);
- builder_.add_kernel_type(kernel_type);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4,
- VT_CELL_CLIP = 6,
- VT_PROJ_CLIP = 8,
- VT_TIME_MAJOR = 10
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
- float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
- bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
- VerifyField<float>(verifier, VT_CELL_CLIP) &&
- VerifyField<float>(verifier, VT_PROJ_CLIP) &&
- VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) && verifier.EndTable();
- }
-};
-
-struct UnidirectionalSequenceLSTMOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- void add_cell_clip(float cell_clip)
- {
- fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
- }
- void add_proj_clip(float proj_clip)
- {
- fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
- }
- void add_time_major(bool time_major)
- {
- fbb_.AddElement<uint8_t>(UnidirectionalSequenceLSTMOptions::VT_TIME_MAJOR,
- static_cast<uint8_t>(time_major), 0);
- }
- explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- UnidirectionalSequenceLSTMOptionsBuilder &
- operator=(const UnidirectionalSequenceLSTMOptionsBuilder &);
- flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>
-CreateUnidirectionalSequenceLSTMOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false)
-{
- UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
- builder_.add_proj_clip(proj_clip);
- builder_.add_cell_clip(cell_clip);
- builder_.add_time_major(time_major);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4,
- VT_CELL_CLIP = 6,
- VT_PROJ_CLIP = 8,
- VT_MERGE_OUTPUTS = 10
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
- float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
- bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
- VerifyField<float>(verifier, VT_CELL_CLIP) &&
- VerifyField<float>(verifier, VT_PROJ_CLIP) &&
- VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) && verifier.EndTable();
- }
-};
-
-struct BidirectionalSequenceLSTMOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- void add_cell_clip(float cell_clip)
- {
- fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
- }
- void add_proj_clip(float proj_clip)
- {
- fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
- }
- void add_merge_outputs(bool merge_outputs)
- {
- fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_MERGE_OUTPUTS,
- static_cast<uint8_t>(merge_outputs), 0);
- }
- explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- BidirectionalSequenceLSTMOptionsBuilder &
- operator=(const BidirectionalSequenceLSTMOptionsBuilder &);
- flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<BidirectionalSequenceLSTMOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false)
-{
- BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
- builder_.add_proj_clip(proj_clip);
- builder_.add_cell_clip(cell_clip);
- builder_.add_merge_outputs(merge_outputs);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_ALIGN_CORNERS = 8
- };
- bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) &&
- verifier.EndTable();
- }
-};
-
-struct ResizeBilinearOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_align_corners(bool align_corners)
- {
- fbb_.AddElement<uint8_t>(ResizeBilinearOptions::VT_ALIGN_CORNERS,
- static_cast<uint8_t>(align_corners), 0);
- }
- explicit ResizeBilinearOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ResizeBilinearOptionsBuilder &operator=(const ResizeBilinearOptionsBuilder &);
- flatbuffers::Offset<ResizeBilinearOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ResizeBilinearOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ResizeBilinearOptions>
-CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false)
-{
- ResizeBilinearOptionsBuilder builder_(_fbb);
- builder_.add_align_corners(align_corners);
- return builder_.Finish();
-}
-
-struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_ALIGN_CORNERS = 4
- };
- bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) &&
- verifier.EndTable();
- }
-};
-
-struct ResizeNearestNeighborOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_align_corners(bool align_corners)
- {
- fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS,
- static_cast<uint8_t>(align_corners), 0);
- }
- explicit ResizeNearestNeighborOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ResizeNearestNeighborOptionsBuilder &operator=(const ResizeNearestNeighborOptionsBuilder &);
- flatbuffers::Offset<ResizeNearestNeighborOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ResizeNearestNeighborOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ResizeNearestNeighborOptions>
-CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false)
-{
- ResizeNearestNeighborOptionsBuilder builder_(_fbb);
- builder_.add_align_corners(align_corners);
- return builder_.Finish();
-}
-
-struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_SUBGRAPH = 4
- };
- uint32_t subgraph() const { return GetField<uint32_t>(VT_SUBGRAPH, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_SUBGRAPH) &&
- verifier.EndTable();
- }
-};
-
-struct CallOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_subgraph(uint32_t subgraph)
- {
- fbb_.AddElement<uint32_t>(CallOptions::VT_SUBGRAPH, subgraph, 0);
- }
- explicit CallOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- CallOptionsBuilder &operator=(const CallOptionsBuilder &);
- flatbuffers::Offset<CallOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<CallOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<CallOptions> CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb,
- uint32_t subgraph = 0)
-{
- CallOptionsBuilder builder_(_fbb);
- builder_.add_subgraph(subgraph);
- return builder_.Finish();
-}
-
-struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct PadOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit PadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- PadOptionsBuilder &operator=(const PadOptionsBuilder &);
- flatbuffers::Offset<PadOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<PadOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<PadOptions> CreatePadOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- PadOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct PadV2OptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit PadV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- PadV2OptionsBuilder &operator=(const PadV2OptionsBuilder &);
- flatbuffers::Offset<PadV2Options> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<PadV2Options>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<PadV2Options> CreatePadV2Options(flatbuffers::FlatBufferBuilder &_fbb)
-{
- PadV2OptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_NEW_SHAPE = 4
- };
- const flatbuffers::Vector<int32_t> *new_shape() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NEW_SHAPE);
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NEW_SHAPE) &&
- verifier.VerifyVector(new_shape()) && verifier.EndTable();
- }
-};
-
-struct ReshapeOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_new_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape)
- {
- fbb_.AddOffset(ReshapeOptions::VT_NEW_SHAPE, new_shape);
- }
- explicit ReshapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ReshapeOptionsBuilder &operator=(const ReshapeOptionsBuilder &);
- flatbuffers::Offset<ReshapeOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ReshapeOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ReshapeOptions>
-CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape = 0)
-{
- ReshapeOptionsBuilder builder_(_fbb);
- builder_.add_new_shape(new_shape);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<ReshapeOptions>
-CreateReshapeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
- const std::vector<int32_t> *new_shape = nullptr)
-{
- return circle::CreateReshapeOptions(_fbb, new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0);
-}
-
-struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct SpaceToBatchNDOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit SpaceToBatchNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SpaceToBatchNDOptionsBuilder &operator=(const SpaceToBatchNDOptionsBuilder &);
- flatbuffers::Offset<SpaceToBatchNDOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SpaceToBatchNDOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SpaceToBatchNDOptions>
-CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- SpaceToBatchNDOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct BatchToSpaceNDOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit BatchToSpaceNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- BatchToSpaceNDOptionsBuilder &operator=(const BatchToSpaceNDOptionsBuilder &);
- flatbuffers::Offset<BatchToSpaceNDOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<BatchToSpaceNDOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<BatchToSpaceNDOptions>
-CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- BatchToSpaceNDOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_NGRAM_SIZE = 4,
- VT_MAX_SKIP_SIZE = 6,
- VT_INCLUDE_ALL_NGRAMS = 8
- };
- int32_t ngram_size() const { return GetField<int32_t>(VT_NGRAM_SIZE, 0); }
- int32_t max_skip_size() const { return GetField<int32_t>(VT_MAX_SKIP_SIZE, 0); }
- bool include_all_ngrams() const { return GetField<uint8_t>(VT_INCLUDE_ALL_NGRAMS, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NGRAM_SIZE) &&
- VerifyField<int32_t>(verifier, VT_MAX_SKIP_SIZE) &&
- VerifyField<uint8_t>(verifier, VT_INCLUDE_ALL_NGRAMS) && verifier.EndTable();
- }
-};
-
-struct SkipGramOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_ngram_size(int32_t ngram_size)
- {
- fbb_.AddElement<int32_t>(SkipGramOptions::VT_NGRAM_SIZE, ngram_size, 0);
- }
- void add_max_skip_size(int32_t max_skip_size)
- {
- fbb_.AddElement<int32_t>(SkipGramOptions::VT_MAX_SKIP_SIZE, max_skip_size, 0);
- }
- void add_include_all_ngrams(bool include_all_ngrams)
- {
- fbb_.AddElement<uint8_t>(SkipGramOptions::VT_INCLUDE_ALL_NGRAMS,
- static_cast<uint8_t>(include_all_ngrams), 0);
- }
- explicit SkipGramOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &);
- flatbuffers::Offset<SkipGramOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SkipGramOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SkipGramOptions>
-CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t ngram_size = 0,
- int32_t max_skip_size = 0, bool include_all_ngrams = false)
-{
- SkipGramOptionsBuilder builder_(_fbb);
- builder_.add_max_skip_size(max_skip_size);
- builder_.add_ngram_size(ngram_size);
- builder_.add_include_all_ngrams(include_all_ngrams);
- return builder_.Finish();
-}
-
-struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_BLOCK_SIZE = 4
- };
- int32_t block_size() const { return GetField<int32_t>(VT_BLOCK_SIZE, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_BLOCK_SIZE) &&
- verifier.EndTable();
- }
-};
-
-struct SpaceToDepthOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_block_size(int32_t block_size)
- {
- fbb_.AddElement<int32_t>(SpaceToDepthOptions::VT_BLOCK_SIZE, block_size, 0);
- }
- explicit SpaceToDepthOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SpaceToDepthOptionsBuilder &operator=(const SpaceToDepthOptionsBuilder &);
- flatbuffers::Offset<SpaceToDepthOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SpaceToDepthOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SpaceToDepthOptions>
-CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_size = 0)
-{
- SpaceToDepthOptionsBuilder builder_(_fbb);
- builder_.add_block_size(block_size);
- return builder_.Finish();
-}
-
-struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct SubOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(SubOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SubOptionsBuilder &operator=(const SubOptionsBuilder &);
- flatbuffers::Offset<SubOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SubOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SubOptions>
-CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- SubOptionsBuilder builder_(_fbb);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct DivOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(DivOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit DivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- DivOptionsBuilder &operator=(const DivOptionsBuilder &);
- flatbuffers::Offset<DivOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<DivOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<DivOptions>
-CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- DivOptionsBuilder builder_(_fbb);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct TopKV2OptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit TopKV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- TopKV2OptionsBuilder &operator=(const TopKV2OptionsBuilder &);
- flatbuffers::Offset<TopKV2Options> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<TopKV2Options>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<TopKV2Options> CreateTopKV2Options(flatbuffers::FlatBufferBuilder &_fbb)
-{
- TopKV2OptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_COMBINER = 4
- };
- CombinerType combiner() const
- {
- return static_cast<CombinerType>(GetField<int8_t>(VT_COMBINER, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_COMBINER) &&
- verifier.EndTable();
- }
-};
-
-struct EmbeddingLookupSparseOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_combiner(CombinerType combiner)
- {
- fbb_.AddElement<int8_t>(EmbeddingLookupSparseOptions::VT_COMBINER,
- static_cast<int8_t>(combiner), 0);
- }
- explicit EmbeddingLookupSparseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- EmbeddingLookupSparseOptionsBuilder &operator=(const EmbeddingLookupSparseOptionsBuilder &);
- flatbuffers::Offset<EmbeddingLookupSparseOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<EmbeddingLookupSparseOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<EmbeddingLookupSparseOptions>
-CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb,
- CombinerType combiner = CombinerType_SUM)
-{
- EmbeddingLookupSparseOptionsBuilder builder_(_fbb);
- builder_.add_combiner(combiner);
- return builder_.Finish();
-}
-
-struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_AXIS = 4
- };
- int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
- verifier.EndTable();
- }
-};
-
-struct GatherOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(GatherOptions::VT_AXIS, axis, 0); }
- explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- GatherOptionsBuilder &operator=(const GatherOptionsBuilder &);
- flatbuffers::Offset<GatherOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<GatherOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb,
- int32_t axis = 0)
-{
- GatherOptionsBuilder builder_(_fbb);
- builder_.add_axis(axis);
- return builder_.Finish();
-}
-
-struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct TransposeOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit TransposeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- TransposeOptionsBuilder &operator=(const TransposeOptionsBuilder &);
- flatbuffers::Offset<TransposeOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<TransposeOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<TransposeOptions>
-CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- TransposeOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct ExpOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit ExpOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ExpOptionsBuilder &operator=(const ExpOptionsBuilder &);
- flatbuffers::Offset<ExpOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ExpOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ExpOptions> CreateExpOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- ExpOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_KEEP_DIMS = 4
- };
- bool keep_dims() const { return GetField<uint8_t>(VT_KEEP_DIMS, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_KEEP_DIMS) &&
- verifier.EndTable();
- }
-};
-
-struct ReducerOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_keep_dims(bool keep_dims)
- {
- fbb_.AddElement<uint8_t>(ReducerOptions::VT_KEEP_DIMS, static_cast<uint8_t>(keep_dims), 0);
- }
- explicit ReducerOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ReducerOptionsBuilder &operator=(const ReducerOptionsBuilder &);
- flatbuffers::Offset<ReducerOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ReducerOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ReducerOptions>
-CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, bool keep_dims = false)
-{
- ReducerOptionsBuilder builder_(_fbb);
- builder_.add_keep_dims(keep_dims);
- return builder_.Finish();
-}
-
-struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_SQUEEZE_DIMS = 4
- };
- const flatbuffers::Vector<int32_t> *squeeze_dims() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SQUEEZE_DIMS);
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SQUEEZE_DIMS) &&
- verifier.VerifyVector(squeeze_dims()) && verifier.EndTable();
- }
-};
-
-struct SqueezeOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_squeeze_dims(flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims)
- {
- fbb_.AddOffset(SqueezeOptions::VT_SQUEEZE_DIMS, squeeze_dims);
- }
- explicit SqueezeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SqueezeOptionsBuilder &operator=(const SqueezeOptionsBuilder &);
- flatbuffers::Offset<SqueezeOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SqueezeOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SqueezeOptions>
-CreateSqueezeOptions(flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims = 0)
-{
- SqueezeOptionsBuilder builder_(_fbb);
- builder_.add_squeeze_dims(squeeze_dims);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<SqueezeOptions>
-CreateSqueezeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
- const std::vector<int32_t> *squeeze_dims = nullptr)
-{
- return circle::CreateSqueezeOptions(_fbb,
- squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0);
-}
-
-struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_NUM_SPLITS = 4
- };
- int32_t num_splits() const { return GetField<int32_t>(VT_NUM_SPLITS, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_SPLITS) &&
- verifier.EndTable();
- }
-};
-
-struct SplitOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_num_splits(int32_t num_splits)
- {
- fbb_.AddElement<int32_t>(SplitOptions::VT_NUM_SPLITS, num_splits, 0);
- }
- explicit SplitOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SplitOptionsBuilder &operator=(const SplitOptionsBuilder &);
- flatbuffers::Offset<SplitOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SplitOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SplitOptions> CreateSplitOptions(flatbuffers::FlatBufferBuilder &_fbb,
- int32_t num_splits = 0)
-{
- SplitOptionsBuilder builder_(_fbb);
- builder_.add_num_splits(num_splits);
- return builder_.Finish();
-}
-
-struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_NUM_SPLITS = 4
- };
- int32_t num_splits() const { return GetField<int32_t>(VT_NUM_SPLITS, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_SPLITS) &&
- verifier.EndTable();
- }
-};
-
-struct SplitVOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_num_splits(int32_t num_splits)
- {
- fbb_.AddElement<int32_t>(SplitVOptions::VT_NUM_SPLITS, num_splits, 0);
- }
- explicit SplitVOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SplitVOptionsBuilder &operator=(const SplitVOptionsBuilder &);
- flatbuffers::Offset<SplitVOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SplitVOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SplitVOptions> CreateSplitVOptions(flatbuffers::FlatBufferBuilder &_fbb,
- int32_t num_splits = 0)
-{
- SplitVOptionsBuilder builder_(_fbb);
- builder_.add_num_splits(num_splits);
- return builder_.Finish();
-}
-
-struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_BEGIN_MASK = 4,
- VT_END_MASK = 6,
- VT_ELLIPSIS_MASK = 8,
- VT_NEW_AXIS_MASK = 10,
- VT_SHRINK_AXIS_MASK = 12
- };
- int32_t begin_mask() const { return GetField<int32_t>(VT_BEGIN_MASK, 0); }
- int32_t end_mask() const { return GetField<int32_t>(VT_END_MASK, 0); }
- int32_t ellipsis_mask() const { return GetField<int32_t>(VT_ELLIPSIS_MASK, 0); }
- int32_t new_axis_mask() const { return GetField<int32_t>(VT_NEW_AXIS_MASK, 0); }
- int32_t shrink_axis_mask() const { return GetField<int32_t>(VT_SHRINK_AXIS_MASK, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_BEGIN_MASK) &&
- VerifyField<int32_t>(verifier, VT_END_MASK) &&
- VerifyField<int32_t>(verifier, VT_ELLIPSIS_MASK) &&
- VerifyField<int32_t>(verifier, VT_NEW_AXIS_MASK) &&
- VerifyField<int32_t>(verifier, VT_SHRINK_AXIS_MASK) && verifier.EndTable();
- }
-};
-
-struct StridedSliceOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_begin_mask(int32_t begin_mask)
- {
- fbb_.AddElement<int32_t>(StridedSliceOptions::VT_BEGIN_MASK, begin_mask, 0);
- }
- void add_end_mask(int32_t end_mask)
- {
- fbb_.AddElement<int32_t>(StridedSliceOptions::VT_END_MASK, end_mask, 0);
- }
- void add_ellipsis_mask(int32_t ellipsis_mask)
- {
- fbb_.AddElement<int32_t>(StridedSliceOptions::VT_ELLIPSIS_MASK, ellipsis_mask, 0);
- }
- void add_new_axis_mask(int32_t new_axis_mask)
- {
- fbb_.AddElement<int32_t>(StridedSliceOptions::VT_NEW_AXIS_MASK, new_axis_mask, 0);
- }
- void add_shrink_axis_mask(int32_t shrink_axis_mask)
- {
- fbb_.AddElement<int32_t>(StridedSliceOptions::VT_SHRINK_AXIS_MASK, shrink_axis_mask, 0);
- }
- explicit StridedSliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- StridedSliceOptionsBuilder &operator=(const StridedSliceOptionsBuilder &);
- flatbuffers::Offset<StridedSliceOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<StridedSliceOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<StridedSliceOptions>
-CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t begin_mask = 0,
- int32_t end_mask = 0, int32_t ellipsis_mask = 0,
- int32_t new_axis_mask = 0, int32_t shrink_axis_mask = 0)
-{
- StridedSliceOptionsBuilder builder_(_fbb);
- builder_.add_shrink_axis_mask(shrink_axis_mask);
- builder_.add_new_axis_mask(new_axis_mask);
- builder_.add_ellipsis_mask(ellipsis_mask);
- builder_.add_end_mask(end_mask);
- builder_.add_begin_mask(begin_mask);
- return builder_.Finish();
-}
-
-struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct LogSoftmaxOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit LogSoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LogSoftmaxOptionsBuilder &operator=(const LogSoftmaxOptionsBuilder &);
- flatbuffers::Offset<LogSoftmaxOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LogSoftmaxOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LogSoftmaxOptions>
-CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- LogSoftmaxOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_IN_DATA_TYPE = 4,
- VT_OUT_DATA_TYPE = 6
- };
- TensorType in_data_type() const
- {
- return static_cast<TensorType>(GetField<int8_t>(VT_IN_DATA_TYPE, 0));
- }
- TensorType out_data_type() const
- {
- return static_cast<TensorType>(GetField<int8_t>(VT_OUT_DATA_TYPE, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_IN_DATA_TYPE) &&
- VerifyField<int8_t>(verifier, VT_OUT_DATA_TYPE) && verifier.EndTable();
- }
-};
-
-struct CastOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_in_data_type(TensorType in_data_type)
- {
- fbb_.AddElement<int8_t>(CastOptions::VT_IN_DATA_TYPE, static_cast<int8_t>(in_data_type), 0);
- }
- void add_out_data_type(TensorType out_data_type)
- {
- fbb_.AddElement<int8_t>(CastOptions::VT_OUT_DATA_TYPE, static_cast<int8_t>(out_data_type), 0);
- }
- explicit CastOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- CastOptionsBuilder &operator=(const CastOptionsBuilder &);
- flatbuffers::Offset<CastOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<CastOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<CastOptions>
-CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb,
- TensorType in_data_type = TensorType_FLOAT32,
- TensorType out_data_type = TensorType_FLOAT32)
-{
- CastOptionsBuilder builder_(_fbb);
- builder_.add_out_data_type(out_data_type);
- builder_.add_in_data_type(in_data_type);
- return builder_.Finish();
-}
-
-struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct DequantizeOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit DequantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- DequantizeOptionsBuilder &operator=(const DequantizeOptionsBuilder &);
- flatbuffers::Offset<DequantizeOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<DequantizeOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<DequantizeOptions>
-CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- DequantizeOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct MaximumMinimumOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit MaximumMinimumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- MaximumMinimumOptionsBuilder &operator=(const MaximumMinimumOptionsBuilder &);
- flatbuffers::Offset<MaximumMinimumOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<MaximumMinimumOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<MaximumMinimumOptions>
-CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- MaximumMinimumOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct TileOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit TileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- TileOptionsBuilder &operator=(const TileOptionsBuilder &);
- flatbuffers::Offset<TileOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<TileOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<TileOptions> CreateTileOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- TileOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_OUTPUT_TYPE = 4
- };
- TensorType output_type() const
- {
- return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUTPUT_TYPE) &&
- verifier.EndTable();
- }
-};
-
-struct ArgMaxOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_output_type(TensorType output_type)
- {
- fbb_.AddElement<int8_t>(ArgMaxOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
- }
- explicit ArgMaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ArgMaxOptionsBuilder &operator=(const ArgMaxOptionsBuilder &);
- flatbuffers::Offset<ArgMaxOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ArgMaxOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ArgMaxOptions>
-CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb,
- TensorType output_type = TensorType_FLOAT32)
-{
- ArgMaxOptionsBuilder builder_(_fbb);
- builder_.add_output_type(output_type);
- return builder_.Finish();
-}
-
-struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_OUTPUT_TYPE = 4
- };
- TensorType output_type() const
- {
- return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUTPUT_TYPE) &&
- verifier.EndTable();
- }
-};
-
-struct ArgMinOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_output_type(TensorType output_type)
- {
- fbb_.AddElement<int8_t>(ArgMinOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
- }
- explicit ArgMinOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ArgMinOptionsBuilder &operator=(const ArgMinOptionsBuilder &);
- flatbuffers::Offset<ArgMinOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ArgMinOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ArgMinOptions>
-CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb,
- TensorType output_type = TensorType_FLOAT32)
-{
- ArgMinOptionsBuilder builder_(_fbb);
- builder_.add_output_type(output_type);
- return builder_.Finish();
-}
-
-struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct GreaterOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit GreaterOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- GreaterOptionsBuilder &operator=(const GreaterOptionsBuilder &);
- flatbuffers::Offset<GreaterOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<GreaterOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<GreaterOptions>
-CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- GreaterOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct GreaterEqualOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit GreaterEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- GreaterEqualOptionsBuilder &operator=(const GreaterEqualOptionsBuilder &);
- flatbuffers::Offset<GreaterEqualOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<GreaterEqualOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<GreaterEqualOptions>
-CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- GreaterEqualOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct LessOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit LessOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LessOptionsBuilder &operator=(const LessOptionsBuilder &);
- flatbuffers::Offset<LessOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LessOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LessOptions> CreateLessOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- LessOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct LessEqualOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit LessEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LessEqualOptionsBuilder &operator=(const LessEqualOptionsBuilder &);
- flatbuffers::Offset<LessEqualOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LessEqualOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LessEqualOptions>
-CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- LessEqualOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct NegOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit NegOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- NegOptionsBuilder &operator=(const NegOptionsBuilder &);
- flatbuffers::Offset<NegOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<NegOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<NegOptions> CreateNegOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- NegOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct SelectOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit SelectOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SelectOptionsBuilder &operator=(const SelectOptionsBuilder &);
- flatbuffers::Offset<SelectOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SelectOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SelectOptions> CreateSelectOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- SelectOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct SliceOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit SliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SliceOptionsBuilder &operator=(const SliceOptionsBuilder &);
- flatbuffers::Offset<SliceOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SliceOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SliceOptions> CreateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- SliceOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_PADDING = 4,
- VT_STRIDE_W = 6,
- VT_STRIDE_H = 8
- };
- Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
- int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
- int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
- VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
- VerifyField<int32_t>(verifier, VT_STRIDE_H) && verifier.EndTable();
- }
-};
-
-struct TransposeConvOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_padding(Padding padding)
- {
- fbb_.AddElement<int8_t>(TransposeConvOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
- }
- void add_stride_w(int32_t stride_w)
- {
- fbb_.AddElement<int32_t>(TransposeConvOptions::VT_STRIDE_W, stride_w, 0);
- }
- void add_stride_h(int32_t stride_h)
- {
- fbb_.AddElement<int32_t>(TransposeConvOptions::VT_STRIDE_H, stride_h, 0);
- }
- explicit TransposeConvOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- TransposeConvOptionsBuilder &operator=(const TransposeConvOptionsBuilder &);
- flatbuffers::Offset<TransposeConvOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<TransposeConvOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<TransposeConvOptions>
-CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
- int32_t stride_w = 0, int32_t stride_h = 0)
-{
- TransposeConvOptionsBuilder builder_(_fbb);
- builder_.add_stride_h(stride_h);
- builder_.add_stride_w(stride_w);
- builder_.add_padding(padding);
- return builder_.Finish();
-}
-
-struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct ExpandDimsOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit ExpandDimsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ExpandDimsOptionsBuilder &operator=(const ExpandDimsOptionsBuilder &);
- flatbuffers::Offset<ExpandDimsOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ExpandDimsOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ExpandDimsOptions>
-CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- ExpandDimsOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_VALIDATE_INDICES = 4
- };
- bool validate_indices() const { return GetField<uint8_t>(VT_VALIDATE_INDICES, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_VALIDATE_INDICES) &&
- verifier.EndTable();
- }
-};
-
-struct SparseToDenseOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_validate_indices(bool validate_indices)
- {
- fbb_.AddElement<uint8_t>(SparseToDenseOptions::VT_VALIDATE_INDICES,
- static_cast<uint8_t>(validate_indices), 0);
- }
- explicit SparseToDenseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SparseToDenseOptionsBuilder &operator=(const SparseToDenseOptionsBuilder &);
- flatbuffers::Offset<SparseToDenseOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SparseToDenseOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SparseToDenseOptions>
-CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, bool validate_indices = false)
-{
- SparseToDenseOptionsBuilder builder_(_fbb);
- builder_.add_validate_indices(validate_indices);
- return builder_.Finish();
-}
-
-struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct EqualOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit EqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- EqualOptionsBuilder &operator=(const EqualOptionsBuilder &);
- flatbuffers::Offset<EqualOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<EqualOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<EqualOptions> CreateEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- EqualOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct NotEqualOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit NotEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- NotEqualOptionsBuilder &operator=(const NotEqualOptionsBuilder &);
- flatbuffers::Offset<NotEqualOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<NotEqualOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<NotEqualOptions>
-CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- NotEqualOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_OUT_TYPE = 4
- };
- TensorType out_type() const { return static_cast<TensorType>(GetField<int8_t>(VT_OUT_TYPE, 0)); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUT_TYPE) &&
- verifier.EndTable();
- }
-};
-
-struct ShapeOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_out_type(TensorType out_type)
- {
- fbb_.AddElement<int8_t>(ShapeOptions::VT_OUT_TYPE, static_cast<int8_t>(out_type), 0);
- }
- explicit ShapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ShapeOptionsBuilder &operator=(const ShapeOptionsBuilder &);
- flatbuffers::Offset<ShapeOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ShapeOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ShapeOptions>
-CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, TensorType out_type = TensorType_FLOAT32)
-{
- ShapeOptionsBuilder builder_(_fbb);
- builder_.add_out_type(out_type);
- return builder_.Finish();
-}
-
-struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct PowOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit PowOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- PowOptionsBuilder &operator=(const PowOptionsBuilder &);
- flatbuffers::Offset<PowOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<PowOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<PowOptions> CreatePowOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- PowOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_MIN = 4,
- VT_MAX = 6,
- VT_NUM_BITS = 8,
- VT_NARROW_RANGE = 10
- };
- float min() const { return GetField<float>(VT_MIN, 0.0f); }
- float max() const { return GetField<float>(VT_MAX, 0.0f); }
- int32_t num_bits() const { return GetField<int32_t>(VT_NUM_BITS, 0); }
- bool narrow_range() const { return GetField<uint8_t>(VT_NARROW_RANGE, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_MIN) &&
- VerifyField<float>(verifier, VT_MAX) && VerifyField<int32_t>(verifier, VT_NUM_BITS) &&
- VerifyField<uint8_t>(verifier, VT_NARROW_RANGE) && verifier.EndTable();
- }
-};
-
-struct FakeQuantOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_min(float min) { fbb_.AddElement<float>(FakeQuantOptions::VT_MIN, min, 0.0f); }
- void add_max(float max) { fbb_.AddElement<float>(FakeQuantOptions::VT_MAX, max, 0.0f); }
- void add_num_bits(int32_t num_bits)
- {
- fbb_.AddElement<int32_t>(FakeQuantOptions::VT_NUM_BITS, num_bits, 0);
- }
- void add_narrow_range(bool narrow_range)
- {
- fbb_.AddElement<uint8_t>(FakeQuantOptions::VT_NARROW_RANGE, static_cast<uint8_t>(narrow_range),
- 0);
- }
- explicit FakeQuantOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- FakeQuantOptionsBuilder &operator=(const FakeQuantOptionsBuilder &);
- flatbuffers::Offset<FakeQuantOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<FakeQuantOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<FakeQuantOptions>
-CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, float min = 0.0f, float max = 0.0f,
- int32_t num_bits = 0, bool narrow_range = false)
-{
- FakeQuantOptionsBuilder builder_(_fbb);
- builder_.add_num_bits(num_bits);
- builder_.add_max(max);
- builder_.add_min(min);
- builder_.add_narrow_range(narrow_range);
- return builder_.Finish();
-}
-
-struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_VALUES_COUNT = 4,
- VT_AXIS = 6
- };
- int32_t values_count() const { return GetField<int32_t>(VT_VALUES_COUNT, 0); }
- int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_VALUES_COUNT) &&
- VerifyField<int32_t>(verifier, VT_AXIS) && verifier.EndTable();
- }
-};
-
-struct PackOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_values_count(int32_t values_count)
- {
- fbb_.AddElement<int32_t>(PackOptions::VT_VALUES_COUNT, values_count, 0);
- }
- void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(PackOptions::VT_AXIS, axis, 0); }
- explicit PackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- PackOptionsBuilder &operator=(const PackOptionsBuilder &);
- flatbuffers::Offset<PackOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<PackOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<PackOptions>
-CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t values_count = 0, int32_t axis = 0)
-{
- PackOptionsBuilder builder_(_fbb);
- builder_.add_axis(axis);
- builder_.add_values_count(values_count);
- return builder_.Finish();
-}
-
-struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct LogicalOrOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit LogicalOrOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LogicalOrOptionsBuilder &operator=(const LogicalOrOptionsBuilder &);
- flatbuffers::Offset<LogicalOrOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LogicalOrOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LogicalOrOptions>
-CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- LogicalOrOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_AXIS = 4
- };
- int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
- verifier.EndTable();
- }
-};
-
-struct OneHotOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(OneHotOptions::VT_AXIS, axis, 0); }
- explicit OneHotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- OneHotOptionsBuilder &operator=(const OneHotOptionsBuilder &);
- flatbuffers::Offset<OneHotOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<OneHotOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<OneHotOptions> CreateOneHotOptions(flatbuffers::FlatBufferBuilder &_fbb,
- int32_t axis = 0)
-{
- OneHotOptionsBuilder builder_(_fbb);
- builder_.add_axis(axis);
- return builder_.Finish();
-}
-
-struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct AbsOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit AbsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- AbsOptionsBuilder &operator=(const AbsOptionsBuilder &);
- flatbuffers::Offset<AbsOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<AbsOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<AbsOptions> CreateAbsOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- AbsOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct LogicalAndOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit LogicalAndOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LogicalAndOptionsBuilder &operator=(const LogicalAndOptionsBuilder &);
- flatbuffers::Offset<LogicalAndOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LogicalAndOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LogicalAndOptions>
-CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- LogicalAndOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct LogicalNotOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit LogicalNotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LogicalNotOptionsBuilder &operator=(const LogicalNotOptionsBuilder &);
- flatbuffers::Offset<LogicalNotOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LogicalNotOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LogicalNotOptions>
-CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- LogicalNotOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_NUM = 4,
- VT_AXIS = 6
- };
- int32_t num() const { return GetField<int32_t>(VT_NUM, 0); }
- int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM) &&
- VerifyField<int32_t>(verifier, VT_AXIS) && verifier.EndTable();
- }
-};
-
-struct UnpackOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_num(int32_t num) { fbb_.AddElement<int32_t>(UnpackOptions::VT_NUM, num, 0); }
- void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(UnpackOptions::VT_AXIS, axis, 0); }
- explicit UnpackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- UnpackOptionsBuilder &operator=(const UnpackOptionsBuilder &);
- flatbuffers::Offset<UnpackOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<UnpackOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<UnpackOptions> CreateUnpackOptions(flatbuffers::FlatBufferBuilder &_fbb,
- int32_t num = 0, int32_t axis = 0)
-{
- UnpackOptionsBuilder builder_(_fbb);
- builder_.add_axis(axis);
- builder_.add_num(num);
- return builder_.Finish();
-}
-
-struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct FloorDivOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit FloorDivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- FloorDivOptionsBuilder &operator=(const FloorDivOptionsBuilder &);
- flatbuffers::Offset<FloorDivOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<FloorDivOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<FloorDivOptions>
-CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- FloorDivOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct SquareOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit SquareOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SquareOptionsBuilder &operator=(const SquareOptionsBuilder &);
- flatbuffers::Offset<SquareOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SquareOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SquareOptions> CreateSquareOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- SquareOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct ZerosLikeOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit ZerosLikeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ZerosLikeOptionsBuilder &operator=(const ZerosLikeOptionsBuilder &);
- flatbuffers::Offset<ZerosLikeOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ZerosLikeOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ZerosLikeOptions>
-CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- ZerosLikeOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct FillOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit FillOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- FillOptionsBuilder &operator=(const FillOptionsBuilder &);
- flatbuffers::Offset<FillOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<FillOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- FillOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct FloorModOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit FloorModOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- FloorModOptionsBuilder &operator=(const FloorModOptionsBuilder &);
- flatbuffers::Offset<FloorModOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<FloorModOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<FloorModOptions>
-CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- FloorModOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct RangeOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit RangeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- RangeOptionsBuilder &operator=(const RangeOptionsBuilder &);
- flatbuffers::Offset<RangeOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<RangeOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<RangeOptions> CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- RangeOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_ALPHA = 4
- };
- float alpha() const { return GetField<float>(VT_ALPHA, 0.0f); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_ALPHA) &&
- verifier.EndTable();
- }
-};
-
-struct LeakyReluOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_alpha(float alpha) { fbb_.AddElement<float>(LeakyReluOptions::VT_ALPHA, alpha, 0.0f); }
- explicit LeakyReluOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LeakyReluOptionsBuilder &operator=(const LeakyReluOptionsBuilder &);
- flatbuffers::Offset<LeakyReluOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LeakyReluOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LeakyReluOptions>
-CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, float alpha = 0.0f)
-{
- LeakyReluOptionsBuilder builder_(_fbb);
- builder_.add_alpha(alpha);
- return builder_.Finish();
-}
-
-struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct SquaredDifferenceOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit SquaredDifferenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SquaredDifferenceOptionsBuilder &operator=(const SquaredDifferenceOptionsBuilder &);
- flatbuffers::Offset<SquaredDifferenceOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SquaredDifferenceOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SquaredDifferenceOptions>
-CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- SquaredDifferenceOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_MODE = 4
- };
- MirrorPadMode mode() const { return static_cast<MirrorPadMode>(GetField<int8_t>(VT_MODE, 0)); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_MODE) &&
- verifier.EndTable();
- }
-};
-
-struct MirrorPadOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_mode(MirrorPadMode mode)
- {
- fbb_.AddElement<int8_t>(MirrorPadOptions::VT_MODE, static_cast<int8_t>(mode), 0);
- }
- explicit MirrorPadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- MirrorPadOptionsBuilder &operator=(const MirrorPadOptionsBuilder &);
- flatbuffers::Offset<MirrorPadOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<MirrorPadOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<MirrorPadOptions>
-CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb,
- MirrorPadMode mode = MirrorPadMode_REFLECT)
-{
- MirrorPadOptionsBuilder builder_(_fbb);
- builder_.add_mode(mode);
- return builder_.Finish();
-}
-
-struct InstanceNormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_EPSILON = 4,
- VT_FUSED_ACTIVATION_FUNCTION = 6
- };
- float epsilon() const { return GetField<float>(VT_EPSILON, 0.0f); }
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_EPSILON) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct InstanceNormOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_epsilon(float epsilon)
- {
- fbb_.AddElement<float>(InstanceNormOptions::VT_EPSILON, epsilon, 0.0f);
- }
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(InstanceNormOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit InstanceNormOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- InstanceNormOptionsBuilder &operator=(const InstanceNormOptionsBuilder &);
- flatbuffers::Offset<InstanceNormOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<InstanceNormOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<InstanceNormOptions> CreateInstanceNormOptions(
- flatbuffers::FlatBufferBuilder &_fbb, float epsilon = 0.0f,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- InstanceNormOptionsBuilder builder_(_fbb);
- builder_.add_epsilon(epsilon);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_BUILTIN_CODE = 4,
- VT_CUSTOM_CODE = 6,
- VT_VERSION = 8
- };
- BuiltinOperator builtin_code() const
- {
- return static_cast<BuiltinOperator>(GetField<uint8_t>(VT_BUILTIN_CODE, 0));
- }
- const flatbuffers::String *custom_code() const
- {
- return GetPointer<const flatbuffers::String *>(VT_CUSTOM_CODE);
- }
- int32_t version() const { return GetField<int32_t>(VT_VERSION, 1); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_BUILTIN_CODE) &&
- VerifyOffset(verifier, VT_CUSTOM_CODE) && verifier.VerifyString(custom_code()) &&
- VerifyField<int32_t>(verifier, VT_VERSION) && verifier.EndTable();
- }
-};
-
-struct OperatorCodeBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_builtin_code(BuiltinOperator builtin_code)
- {
- fbb_.AddElement<uint8_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<uint8_t>(builtin_code), 0);
- }
- void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code)
- {
- fbb_.AddOffset(OperatorCode::VT_CUSTOM_CODE, custom_code);
- }
- void add_version(int32_t version)
- {
- fbb_.AddElement<int32_t>(OperatorCode::VT_VERSION, version, 1);
- }
- explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- OperatorCodeBuilder &operator=(const OperatorCodeBuilder &);
- flatbuffers::Offset<OperatorCode> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<OperatorCode>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<OperatorCode>
-CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb,
- BuiltinOperator builtin_code = BuiltinOperator_ADD,
- flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1)
-{
- OperatorCodeBuilder builder_(_fbb);
- builder_.add_version(version);
- builder_.add_custom_code(custom_code);
- builder_.add_builtin_code(builtin_code);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<OperatorCode>
-CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb,
- BuiltinOperator builtin_code = BuiltinOperator_ADD,
- const char *custom_code = nullptr, int32_t version = 1)
-{
- return circle::CreateOperatorCode(_fbb, builtin_code,
- custom_code ? _fbb.CreateString(custom_code) : 0, version);
-}
-
-struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_OPCODE_INDEX = 4,
- VT_INPUTS = 6,
- VT_OUTPUTS = 8,
- VT_BUILTIN_OPTIONS_TYPE = 10,
- VT_BUILTIN_OPTIONS = 12,
- VT_CUSTOM_OPTIONS = 14,
- VT_CUSTOM_OPTIONS_FORMAT = 16,
- VT_MUTATING_VARIABLE_INPUTS = 18
- };
- uint32_t opcode_index() const { return GetField<uint32_t>(VT_OPCODE_INDEX, 0); }
- const flatbuffers::Vector<int32_t> *inputs() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS);
- }
- const flatbuffers::Vector<int32_t> *outputs() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
- }
- BuiltinOptions builtin_options_type() const
- {
- return static_cast<BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0));
- }
- const void *builtin_options() const { return GetPointer<const void *>(VT_BUILTIN_OPTIONS); }
- template <typename T> const T *builtin_options_as() const;
- const Conv2DOptions *builtin_options_as_Conv2DOptions() const
- {
- return builtin_options_type() == BuiltinOptions_Conv2DOptions
- ? static_cast<const Conv2DOptions *>(builtin_options())
- : nullptr;
- }
- const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const
- {
- return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions
- ? static_cast<const DepthwiseConv2DOptions *>(builtin_options())
- : nullptr;
- }
- const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions
- ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options())
- : nullptr;
- }
- const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LSHProjectionOptions
- ? static_cast<const LSHProjectionOptions *>(builtin_options())
- : nullptr;
- }
- const Pool2DOptions *builtin_options_as_Pool2DOptions() const
- {
- return builtin_options_type() == BuiltinOptions_Pool2DOptions
- ? static_cast<const Pool2DOptions *>(builtin_options())
- : nullptr;
- }
- const SVDFOptions *builtin_options_as_SVDFOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SVDFOptions
- ? static_cast<const SVDFOptions *>(builtin_options())
- : nullptr;
- }
- const RNNOptions *builtin_options_as_RNNOptions() const
- {
- return builtin_options_type() == BuiltinOptions_RNNOptions
- ? static_cast<const RNNOptions *>(builtin_options())
- : nullptr;
- }
- const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const
- {
- return builtin_options_type() == BuiltinOptions_FullyConnectedOptions
- ? static_cast<const FullyConnectedOptions *>(builtin_options())
- : nullptr;
- }
- const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SoftmaxOptions
- ? static_cast<const SoftmaxOptions *>(builtin_options())
- : nullptr;
- }
- const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ConcatenationOptions
- ? static_cast<const ConcatenationOptions *>(builtin_options())
- : nullptr;
- }
- const AddOptions *builtin_options_as_AddOptions() const
- {
- return builtin_options_type() == BuiltinOptions_AddOptions
- ? static_cast<const AddOptions *>(builtin_options())
- : nullptr;
- }
- const L2NormOptions *builtin_options_as_L2NormOptions() const
- {
- return builtin_options_type() == BuiltinOptions_L2NormOptions
- ? static_cast<const L2NormOptions *>(builtin_options())
- : nullptr;
- }
- const LocalResponseNormalizationOptions *
- builtin_options_as_LocalResponseNormalizationOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LocalResponseNormalizationOptions
- ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options())
- : nullptr;
- }
- const LSTMOptions *builtin_options_as_LSTMOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LSTMOptions
- ? static_cast<const LSTMOptions *>(builtin_options())
- : nullptr;
- }
- const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions
- ? static_cast<const ResizeBilinearOptions *>(builtin_options())
- : nullptr;
- }
- const CallOptions *builtin_options_as_CallOptions() const
- {
- return builtin_options_type() == BuiltinOptions_CallOptions
- ? static_cast<const CallOptions *>(builtin_options())
- : nullptr;
- }
- const ReshapeOptions *builtin_options_as_ReshapeOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ReshapeOptions
- ? static_cast<const ReshapeOptions *>(builtin_options())
- : nullptr;
- }
- const SkipGramOptions *builtin_options_as_SkipGramOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SkipGramOptions
- ? static_cast<const SkipGramOptions *>(builtin_options())
- : nullptr;
- }
- const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions
- ? static_cast<const SpaceToDepthOptions *>(builtin_options())
- : nullptr;
- }
- const EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const
- {
- return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions
- ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options())
- : nullptr;
- }
- const MulOptions *builtin_options_as_MulOptions() const
- {
- return builtin_options_type() == BuiltinOptions_MulOptions
- ? static_cast<const MulOptions *>(builtin_options())
- : nullptr;
- }
- const PadOptions *builtin_options_as_PadOptions() const
- {
- return builtin_options_type() == BuiltinOptions_PadOptions
- ? static_cast<const PadOptions *>(builtin_options())
- : nullptr;
- }
- const GatherOptions *builtin_options_as_GatherOptions() const
- {
- return builtin_options_type() == BuiltinOptions_GatherOptions
- ? static_cast<const GatherOptions *>(builtin_options())
- : nullptr;
- }
- const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const
- {
- return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions
- ? static_cast<const BatchToSpaceNDOptions *>(builtin_options())
- : nullptr;
- }
- const SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SpaceToBatchNDOptions
- ? static_cast<const SpaceToBatchNDOptions *>(builtin_options())
- : nullptr;
- }
- const TransposeOptions *builtin_options_as_TransposeOptions() const
- {
- return builtin_options_type() == BuiltinOptions_TransposeOptions
- ? static_cast<const TransposeOptions *>(builtin_options())
- : nullptr;
- }
- const ReducerOptions *builtin_options_as_ReducerOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ReducerOptions
- ? static_cast<const ReducerOptions *>(builtin_options())
- : nullptr;
- }
- const SubOptions *builtin_options_as_SubOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SubOptions
- ? static_cast<const SubOptions *>(builtin_options())
- : nullptr;
- }
- const DivOptions *builtin_options_as_DivOptions() const
- {
- return builtin_options_type() == BuiltinOptions_DivOptions
- ? static_cast<const DivOptions *>(builtin_options())
- : nullptr;
- }
- const SqueezeOptions *builtin_options_as_SqueezeOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SqueezeOptions
- ? static_cast<const SqueezeOptions *>(builtin_options())
- : nullptr;
- }
- const SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SequenceRNNOptions
- ? static_cast<const SequenceRNNOptions *>(builtin_options())
- : nullptr;
- }
- const StridedSliceOptions *builtin_options_as_StridedSliceOptions() const
- {
- return builtin_options_type() == BuiltinOptions_StridedSliceOptions
- ? static_cast<const StridedSliceOptions *>(builtin_options())
- : nullptr;
- }
- const ExpOptions *builtin_options_as_ExpOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ExpOptions
- ? static_cast<const ExpOptions *>(builtin_options())
- : nullptr;
- }
- const TopKV2Options *builtin_options_as_TopKV2Options() const
- {
- return builtin_options_type() == BuiltinOptions_TopKV2Options
- ? static_cast<const TopKV2Options *>(builtin_options())
- : nullptr;
- }
- const SplitOptions *builtin_options_as_SplitOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SplitOptions
- ? static_cast<const SplitOptions *>(builtin_options())
- : nullptr;
- }
- const LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LogSoftmaxOptions
- ? static_cast<const LogSoftmaxOptions *>(builtin_options())
- : nullptr;
- }
- const CastOptions *builtin_options_as_CastOptions() const
- {
- return builtin_options_type() == BuiltinOptions_CastOptions
- ? static_cast<const CastOptions *>(builtin_options())
- : nullptr;
- }
- const DequantizeOptions *builtin_options_as_DequantizeOptions() const
- {
- return builtin_options_type() == BuiltinOptions_DequantizeOptions
- ? static_cast<const DequantizeOptions *>(builtin_options())
- : nullptr;
- }
- const MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const
- {
- return builtin_options_type() == BuiltinOptions_MaximumMinimumOptions
- ? static_cast<const MaximumMinimumOptions *>(builtin_options())
- : nullptr;
- }
- const ArgMaxOptions *builtin_options_as_ArgMaxOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ArgMaxOptions
- ? static_cast<const ArgMaxOptions *>(builtin_options())
- : nullptr;
- }
- const LessOptions *builtin_options_as_LessOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LessOptions
- ? static_cast<const LessOptions *>(builtin_options())
- : nullptr;
- }
- const NegOptions *builtin_options_as_NegOptions() const
- {
- return builtin_options_type() == BuiltinOptions_NegOptions
- ? static_cast<const NegOptions *>(builtin_options())
- : nullptr;
- }
- const PadV2Options *builtin_options_as_PadV2Options() const
- {
- return builtin_options_type() == BuiltinOptions_PadV2Options
- ? static_cast<const PadV2Options *>(builtin_options())
- : nullptr;
- }
- const GreaterOptions *builtin_options_as_GreaterOptions() const
- {
- return builtin_options_type() == BuiltinOptions_GreaterOptions
- ? static_cast<const GreaterOptions *>(builtin_options())
- : nullptr;
- }
- const GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const
- {
- return builtin_options_type() == BuiltinOptions_GreaterEqualOptions
- ? static_cast<const GreaterEqualOptions *>(builtin_options())
- : nullptr;
- }
- const LessEqualOptions *builtin_options_as_LessEqualOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LessEqualOptions
- ? static_cast<const LessEqualOptions *>(builtin_options())
- : nullptr;
- }
- const SelectOptions *builtin_options_as_SelectOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SelectOptions
- ? static_cast<const SelectOptions *>(builtin_options())
- : nullptr;
- }
- const SliceOptions *builtin_options_as_SliceOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SliceOptions
- ? static_cast<const SliceOptions *>(builtin_options())
- : nullptr;
- }
- const TransposeConvOptions *builtin_options_as_TransposeConvOptions() const
- {
- return builtin_options_type() == BuiltinOptions_TransposeConvOptions
- ? static_cast<const TransposeConvOptions *>(builtin_options())
- : nullptr;
- }
- const SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SparseToDenseOptions
- ? static_cast<const SparseToDenseOptions *>(builtin_options())
- : nullptr;
- }
- const TileOptions *builtin_options_as_TileOptions() const
- {
- return builtin_options_type() == BuiltinOptions_TileOptions
- ? static_cast<const TileOptions *>(builtin_options())
- : nullptr;
- }
- const ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ExpandDimsOptions
- ? static_cast<const ExpandDimsOptions *>(builtin_options())
- : nullptr;
- }
- const EqualOptions *builtin_options_as_EqualOptions() const
- {
- return builtin_options_type() == BuiltinOptions_EqualOptions
- ? static_cast<const EqualOptions *>(builtin_options())
- : nullptr;
- }
- const NotEqualOptions *builtin_options_as_NotEqualOptions() const
- {
- return builtin_options_type() == BuiltinOptions_NotEqualOptions
- ? static_cast<const NotEqualOptions *>(builtin_options())
- : nullptr;
- }
- const ShapeOptions *builtin_options_as_ShapeOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ShapeOptions
- ? static_cast<const ShapeOptions *>(builtin_options())
- : nullptr;
- }
- const PowOptions *builtin_options_as_PowOptions() const
- {
- return builtin_options_type() == BuiltinOptions_PowOptions
- ? static_cast<const PowOptions *>(builtin_options())
- : nullptr;
- }
- const ArgMinOptions *builtin_options_as_ArgMinOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ArgMinOptions
- ? static_cast<const ArgMinOptions *>(builtin_options())
- : nullptr;
- }
- const FakeQuantOptions *builtin_options_as_FakeQuantOptions() const
- {
- return builtin_options_type() == BuiltinOptions_FakeQuantOptions
- ? static_cast<const FakeQuantOptions *>(builtin_options())
- : nullptr;
- }
- const PackOptions *builtin_options_as_PackOptions() const
- {
- return builtin_options_type() == BuiltinOptions_PackOptions
- ? static_cast<const PackOptions *>(builtin_options())
- : nullptr;
- }
- const LogicalOrOptions *builtin_options_as_LogicalOrOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LogicalOrOptions
- ? static_cast<const LogicalOrOptions *>(builtin_options())
- : nullptr;
- }
- const OneHotOptions *builtin_options_as_OneHotOptions() const
- {
- return builtin_options_type() == BuiltinOptions_OneHotOptions
- ? static_cast<const OneHotOptions *>(builtin_options())
- : nullptr;
- }
- const LogicalAndOptions *builtin_options_as_LogicalAndOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LogicalAndOptions
- ? static_cast<const LogicalAndOptions *>(builtin_options())
- : nullptr;
- }
- const LogicalNotOptions *builtin_options_as_LogicalNotOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LogicalNotOptions
- ? static_cast<const LogicalNotOptions *>(builtin_options())
- : nullptr;
- }
- const UnpackOptions *builtin_options_as_UnpackOptions() const
- {
- return builtin_options_type() == BuiltinOptions_UnpackOptions
- ? static_cast<const UnpackOptions *>(builtin_options())
- : nullptr;
- }
- const FloorDivOptions *builtin_options_as_FloorDivOptions() const
- {
- return builtin_options_type() == BuiltinOptions_FloorDivOptions
- ? static_cast<const FloorDivOptions *>(builtin_options())
- : nullptr;
- }
- const SquareOptions *builtin_options_as_SquareOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SquareOptions
- ? static_cast<const SquareOptions *>(builtin_options())
- : nullptr;
- }
- const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ZerosLikeOptions
- ? static_cast<const ZerosLikeOptions *>(builtin_options())
- : nullptr;
- }
- const FillOptions *builtin_options_as_FillOptions() const
- {
- return builtin_options_type() == BuiltinOptions_FillOptions
- ? static_cast<const FillOptions *>(builtin_options())
- : nullptr;
- }
- const BidirectionalSequenceLSTMOptions *
- builtin_options_as_BidirectionalSequenceLSTMOptions() const
- {
- return builtin_options_type() == BuiltinOptions_BidirectionalSequenceLSTMOptions
- ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options())
- : nullptr;
- }
- const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const
- {
- return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions
- ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options())
- : nullptr;
- }
- const UnidirectionalSequenceLSTMOptions *
- builtin_options_as_UnidirectionalSequenceLSTMOptions() const
- {
- return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions
- ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options())
- : nullptr;
- }
- const FloorModOptions *builtin_options_as_FloorModOptions() const
- {
- return builtin_options_type() == BuiltinOptions_FloorModOptions
- ? static_cast<const FloorModOptions *>(builtin_options())
- : nullptr;
- }
- const RangeOptions *builtin_options_as_RangeOptions() const
- {
- return builtin_options_type() == BuiltinOptions_RangeOptions
- ? static_cast<const RangeOptions *>(builtin_options())
- : nullptr;
- }
- const ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ResizeNearestNeighborOptions
- ? static_cast<const ResizeNearestNeighborOptions *>(builtin_options())
- : nullptr;
- }
- const LeakyReluOptions *builtin_options_as_LeakyReluOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LeakyReluOptions
- ? static_cast<const LeakyReluOptions *>(builtin_options())
- : nullptr;
- }
- const SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SquaredDifferenceOptions
- ? static_cast<const SquaredDifferenceOptions *>(builtin_options())
- : nullptr;
- }
- const MirrorPadOptions *builtin_options_as_MirrorPadOptions() const
- {
- return builtin_options_type() == BuiltinOptions_MirrorPadOptions
- ? static_cast<const MirrorPadOptions *>(builtin_options())
- : nullptr;
- }
- const AbsOptions *builtin_options_as_AbsOptions() const
- {
- return builtin_options_type() == BuiltinOptions_AbsOptions
- ? static_cast<const AbsOptions *>(builtin_options())
- : nullptr;
- }
- const SplitVOptions *builtin_options_as_SplitVOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SplitVOptions
- ? static_cast<const SplitVOptions *>(builtin_options())
- : nullptr;
- }
- const InstanceNormOptions *builtin_options_as_InstanceNormOptions() const
- {
- return builtin_options_type() == BuiltinOptions_InstanceNormOptions
- ? static_cast<const InstanceNormOptions *>(builtin_options())
- : nullptr;
- }
- const flatbuffers::Vector<uint8_t> *custom_options() const
- {
- return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
- }
- CustomOptionsFormat custom_options_format() const
- {
- return static_cast<CustomOptionsFormat>(GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0));
- }
- const flatbuffers::Vector<uint8_t> *mutating_variable_inputs() const
- {
- return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_MUTATING_VARIABLE_INPUTS);
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_OPCODE_INDEX) &&
- VerifyOffset(verifier, VT_INPUTS) && verifier.VerifyVector(inputs()) &&
- VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) &&
- VerifyField<uint8_t>(verifier, VT_BUILTIN_OPTIONS_TYPE) &&
- VerifyOffset(verifier, VT_BUILTIN_OPTIONS) &&
- VerifyBuiltinOptions(verifier, builtin_options(), builtin_options_type()) &&
- VerifyOffset(verifier, VT_CUSTOM_OPTIONS) && verifier.VerifyVector(custom_options()) &&
- VerifyField<int8_t>(verifier, VT_CUSTOM_OPTIONS_FORMAT) &&
- VerifyOffset(verifier, VT_MUTATING_VARIABLE_INPUTS) &&
- verifier.VerifyVector(mutating_variable_inputs()) && verifier.EndTable();
- }
-};
-
-template <> inline const Conv2DOptions *Operator::builtin_options_as<Conv2DOptions>() const
-{
- return builtin_options_as_Conv2DOptions();
-}
-
-template <>
-inline const DepthwiseConv2DOptions *Operator::builtin_options_as<DepthwiseConv2DOptions>() const
-{
- return builtin_options_as_DepthwiseConv2DOptions();
-}
-
-template <>
-inline const ConcatEmbeddingsOptions *Operator::builtin_options_as<ConcatEmbeddingsOptions>() const
-{
- return builtin_options_as_ConcatEmbeddingsOptions();
-}
-
-template <>
-inline const LSHProjectionOptions *Operator::builtin_options_as<LSHProjectionOptions>() const
-{
- return builtin_options_as_LSHProjectionOptions();
-}
-
-template <> inline const Pool2DOptions *Operator::builtin_options_as<Pool2DOptions>() const
-{
- return builtin_options_as_Pool2DOptions();
-}
-
-template <> inline const SVDFOptions *Operator::builtin_options_as<SVDFOptions>() const
-{
- return builtin_options_as_SVDFOptions();
-}
-
-template <> inline const RNNOptions *Operator::builtin_options_as<RNNOptions>() const
-{
- return builtin_options_as_RNNOptions();
-}
-
-template <>
-inline const FullyConnectedOptions *Operator::builtin_options_as<FullyConnectedOptions>() const
-{
- return builtin_options_as_FullyConnectedOptions();
-}
-
-template <> inline const SoftmaxOptions *Operator::builtin_options_as<SoftmaxOptions>() const
-{
- return builtin_options_as_SoftmaxOptions();
-}
-
-template <>
-inline const ConcatenationOptions *Operator::builtin_options_as<ConcatenationOptions>() const
-{
- return builtin_options_as_ConcatenationOptions();
-}
-
-template <> inline const AddOptions *Operator::builtin_options_as<AddOptions>() const
-{
- return builtin_options_as_AddOptions();
-}
-
-template <> inline const L2NormOptions *Operator::builtin_options_as<L2NormOptions>() const
-{
- return builtin_options_as_L2NormOptions();
-}
-
-template <>
-inline const LocalResponseNormalizationOptions *
-Operator::builtin_options_as<LocalResponseNormalizationOptions>() const
-{
- return builtin_options_as_LocalResponseNormalizationOptions();
-}
-
-template <> inline const LSTMOptions *Operator::builtin_options_as<LSTMOptions>() const
-{
- return builtin_options_as_LSTMOptions();
-}
-
-template <>
-inline const ResizeBilinearOptions *Operator::builtin_options_as<ResizeBilinearOptions>() const
-{
- return builtin_options_as_ResizeBilinearOptions();
-}
-
-template <> inline const CallOptions *Operator::builtin_options_as<CallOptions>() const
-{
- return builtin_options_as_CallOptions();
-}
-
-template <> inline const ReshapeOptions *Operator::builtin_options_as<ReshapeOptions>() const
-{
- return builtin_options_as_ReshapeOptions();
-}
-
-template <> inline const SkipGramOptions *Operator::builtin_options_as<SkipGramOptions>() const
-{
- return builtin_options_as_SkipGramOptions();
-}
-
-template <>
-inline const SpaceToDepthOptions *Operator::builtin_options_as<SpaceToDepthOptions>() const
-{
- return builtin_options_as_SpaceToDepthOptions();
-}
-
-template <>
-inline const EmbeddingLookupSparseOptions *
-Operator::builtin_options_as<EmbeddingLookupSparseOptions>() const
-{
- return builtin_options_as_EmbeddingLookupSparseOptions();
-}
-
-template <> inline const MulOptions *Operator::builtin_options_as<MulOptions>() const
-{
- return builtin_options_as_MulOptions();
-}
-
-template <> inline const PadOptions *Operator::builtin_options_as<PadOptions>() const
-{
- return builtin_options_as_PadOptions();
-}
-
-template <> inline const GatherOptions *Operator::builtin_options_as<GatherOptions>() const
-{
- return builtin_options_as_GatherOptions();
-}
-
-template <>
-inline const BatchToSpaceNDOptions *Operator::builtin_options_as<BatchToSpaceNDOptions>() const
-{
- return builtin_options_as_BatchToSpaceNDOptions();
-}
-
-template <>
-inline const SpaceToBatchNDOptions *Operator::builtin_options_as<SpaceToBatchNDOptions>() const
-{
- return builtin_options_as_SpaceToBatchNDOptions();
-}
-
-template <> inline const TransposeOptions *Operator::builtin_options_as<TransposeOptions>() const
-{
- return builtin_options_as_TransposeOptions();
-}
-
-template <> inline const ReducerOptions *Operator::builtin_options_as<ReducerOptions>() const
-{
- return builtin_options_as_ReducerOptions();
-}
-
-template <> inline const SubOptions *Operator::builtin_options_as<SubOptions>() const
-{
- return builtin_options_as_SubOptions();
-}
-
-template <> inline const DivOptions *Operator::builtin_options_as<DivOptions>() const
-{
- return builtin_options_as_DivOptions();
-}
-
-template <> inline const SqueezeOptions *Operator::builtin_options_as<SqueezeOptions>() const
-{
- return builtin_options_as_SqueezeOptions();
-}
-
-template <>
-inline const SequenceRNNOptions *Operator::builtin_options_as<SequenceRNNOptions>() const
-{
- return builtin_options_as_SequenceRNNOptions();
-}
-
-template <>
-inline const StridedSliceOptions *Operator::builtin_options_as<StridedSliceOptions>() const
-{
- return builtin_options_as_StridedSliceOptions();
-}
-
-template <> inline const ExpOptions *Operator::builtin_options_as<ExpOptions>() const
-{
- return builtin_options_as_ExpOptions();
-}
-
-template <> inline const TopKV2Options *Operator::builtin_options_as<TopKV2Options>() const
-{
- return builtin_options_as_TopKV2Options();
-}
-
-template <> inline const SplitOptions *Operator::builtin_options_as<SplitOptions>() const
-{
- return builtin_options_as_SplitOptions();
-}
-
-template <> inline const LogSoftmaxOptions *Operator::builtin_options_as<LogSoftmaxOptions>() const
-{
- return builtin_options_as_LogSoftmaxOptions();
-}
-
-template <> inline const CastOptions *Operator::builtin_options_as<CastOptions>() const
-{
- return builtin_options_as_CastOptions();
-}
-
-template <> inline const DequantizeOptions *Operator::builtin_options_as<DequantizeOptions>() const
-{
- return builtin_options_as_DequantizeOptions();
-}
-
-template <>
-inline const MaximumMinimumOptions *Operator::builtin_options_as<MaximumMinimumOptions>() const
-{
- return builtin_options_as_MaximumMinimumOptions();
-}
-
-template <> inline const ArgMaxOptions *Operator::builtin_options_as<ArgMaxOptions>() const
-{
- return builtin_options_as_ArgMaxOptions();
-}
-
-template <> inline const LessOptions *Operator::builtin_options_as<LessOptions>() const
-{
- return builtin_options_as_LessOptions();
-}
-
-template <> inline const NegOptions *Operator::builtin_options_as<NegOptions>() const
-{
- return builtin_options_as_NegOptions();
-}
-
-template <> inline const PadV2Options *Operator::builtin_options_as<PadV2Options>() const
-{
- return builtin_options_as_PadV2Options();
-}
-
-template <> inline const GreaterOptions *Operator::builtin_options_as<GreaterOptions>() const
-{
- return builtin_options_as_GreaterOptions();
-}
-
-template <>
-inline const GreaterEqualOptions *Operator::builtin_options_as<GreaterEqualOptions>() const
-{
- return builtin_options_as_GreaterEqualOptions();
-}
-
-template <> inline const LessEqualOptions *Operator::builtin_options_as<LessEqualOptions>() const
-{
- return builtin_options_as_LessEqualOptions();
-}
-
-template <> inline const SelectOptions *Operator::builtin_options_as<SelectOptions>() const
-{
- return builtin_options_as_SelectOptions();
-}
-
-template <> inline const SliceOptions *Operator::builtin_options_as<SliceOptions>() const
-{
- return builtin_options_as_SliceOptions();
-}
-
-template <>
-inline const TransposeConvOptions *Operator::builtin_options_as<TransposeConvOptions>() const
-{
- return builtin_options_as_TransposeConvOptions();
-}
-
-template <>
-inline const SparseToDenseOptions *Operator::builtin_options_as<SparseToDenseOptions>() const
-{
- return builtin_options_as_SparseToDenseOptions();
-}
-
-template <> inline const TileOptions *Operator::builtin_options_as<TileOptions>() const
-{
- return builtin_options_as_TileOptions();
-}
-
-template <> inline const ExpandDimsOptions *Operator::builtin_options_as<ExpandDimsOptions>() const
-{
- return builtin_options_as_ExpandDimsOptions();
-}
-
-template <> inline const EqualOptions *Operator::builtin_options_as<EqualOptions>() const
-{
- return builtin_options_as_EqualOptions();
-}
-
-template <> inline const NotEqualOptions *Operator::builtin_options_as<NotEqualOptions>() const
-{
- return builtin_options_as_NotEqualOptions();
-}
-
-template <> inline const ShapeOptions *Operator::builtin_options_as<ShapeOptions>() const
-{
- return builtin_options_as_ShapeOptions();
-}
-
-template <> inline const PowOptions *Operator::builtin_options_as<PowOptions>() const
-{
- return builtin_options_as_PowOptions();
-}
-
-template <> inline const ArgMinOptions *Operator::builtin_options_as<ArgMinOptions>() const
-{
- return builtin_options_as_ArgMinOptions();
-}
-
-template <> inline const FakeQuantOptions *Operator::builtin_options_as<FakeQuantOptions>() const
-{
- return builtin_options_as_FakeQuantOptions();
-}
-
-template <> inline const PackOptions *Operator::builtin_options_as<PackOptions>() const
-{
- return builtin_options_as_PackOptions();
-}
-
-template <> inline const LogicalOrOptions *Operator::builtin_options_as<LogicalOrOptions>() const
-{
- return builtin_options_as_LogicalOrOptions();
-}
-
-template <> inline const OneHotOptions *Operator::builtin_options_as<OneHotOptions>() const
-{
- return builtin_options_as_OneHotOptions();
-}
-
-template <> inline const LogicalAndOptions *Operator::builtin_options_as<LogicalAndOptions>() const
-{
- return builtin_options_as_LogicalAndOptions();
-}
-
-template <> inline const LogicalNotOptions *Operator::builtin_options_as<LogicalNotOptions>() const
-{
- return builtin_options_as_LogicalNotOptions();
-}
-
-template <> inline const UnpackOptions *Operator::builtin_options_as<UnpackOptions>() const
-{
- return builtin_options_as_UnpackOptions();
-}
-
-template <> inline const FloorDivOptions *Operator::builtin_options_as<FloorDivOptions>() const
-{
- return builtin_options_as_FloorDivOptions();
-}
-
-template <> inline const SquareOptions *Operator::builtin_options_as<SquareOptions>() const
-{
- return builtin_options_as_SquareOptions();
-}
-
-template <> inline const ZerosLikeOptions *Operator::builtin_options_as<ZerosLikeOptions>() const
-{
- return builtin_options_as_ZerosLikeOptions();
-}
-
-template <> inline const FillOptions *Operator::builtin_options_as<FillOptions>() const
-{
- return builtin_options_as_FillOptions();
-}
-
-template <>
-inline const BidirectionalSequenceLSTMOptions *
-Operator::builtin_options_as<BidirectionalSequenceLSTMOptions>() const
-{
- return builtin_options_as_BidirectionalSequenceLSTMOptions();
-}
-
-template <>
-inline const BidirectionalSequenceRNNOptions *
-Operator::builtin_options_as<BidirectionalSequenceRNNOptions>() const
-{
- return builtin_options_as_BidirectionalSequenceRNNOptions();
-}
-
-template <>
-inline const UnidirectionalSequenceLSTMOptions *
-Operator::builtin_options_as<UnidirectionalSequenceLSTMOptions>() const
-{
- return builtin_options_as_UnidirectionalSequenceLSTMOptions();
-}
-
-template <> inline const FloorModOptions *Operator::builtin_options_as<FloorModOptions>() const
-{
- return builtin_options_as_FloorModOptions();
-}
-
-template <> inline const RangeOptions *Operator::builtin_options_as<RangeOptions>() const
-{
- return builtin_options_as_RangeOptions();
-}
-
-template <>
-inline const ResizeNearestNeighborOptions *
-Operator::builtin_options_as<ResizeNearestNeighborOptions>() const
-{
- return builtin_options_as_ResizeNearestNeighborOptions();
-}
-
-template <> inline const LeakyReluOptions *Operator::builtin_options_as<LeakyReluOptions>() const
-{
- return builtin_options_as_LeakyReluOptions();
-}
-
-template <>
-inline const SquaredDifferenceOptions *
-Operator::builtin_options_as<SquaredDifferenceOptions>() const
-{
- return builtin_options_as_SquaredDifferenceOptions();
-}
-
-template <> inline const MirrorPadOptions *Operator::builtin_options_as<MirrorPadOptions>() const
-{
- return builtin_options_as_MirrorPadOptions();
-}
-
-template <> inline const AbsOptions *Operator::builtin_options_as<AbsOptions>() const
-{
- return builtin_options_as_AbsOptions();
-}
-
-template <> inline const SplitVOptions *Operator::builtin_options_as<SplitVOptions>() const
-{
- return builtin_options_as_SplitVOptions();
-}
-
-template <>
-inline const InstanceNormOptions *Operator::builtin_options_as<InstanceNormOptions>() const
-{
- return builtin_options_as_InstanceNormOptions();
-}
-
-struct OperatorBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_opcode_index(uint32_t opcode_index)
- {
- fbb_.AddElement<uint32_t>(Operator::VT_OPCODE_INDEX, opcode_index, 0);
- }
- void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs)
- {
- fbb_.AddOffset(Operator::VT_INPUTS, inputs);
- }
- void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs)
- {
- fbb_.AddOffset(Operator::VT_OUTPUTS, outputs);
- }
- void add_builtin_options_type(BuiltinOptions builtin_options_type)
- {
- fbb_.AddElement<uint8_t>(Operator::VT_BUILTIN_OPTIONS_TYPE,
- static_cast<uint8_t>(builtin_options_type), 0);
- }
- void add_builtin_options(flatbuffers::Offset<void> builtin_options)
- {
- fbb_.AddOffset(Operator::VT_BUILTIN_OPTIONS, builtin_options);
- }
- void add_custom_options(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options)
- {
- fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options);
- }
- void add_custom_options_format(CustomOptionsFormat custom_options_format)
- {
- fbb_.AddElement<int8_t>(Operator::VT_CUSTOM_OPTIONS_FORMAT,
- static_cast<int8_t>(custom_options_format), 0);
- }
- void add_mutating_variable_inputs(
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
- {
- fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs);
- }
- explicit OperatorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- OperatorBuilder &operator=(const OperatorBuilder &);
- flatbuffers::Offset<Operator> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Operator>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Operator>
-CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
- BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
- flatbuffers::Offset<void> builtin_options = 0,
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
- CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS,
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0)
-{
- OperatorBuilder builder_(_fbb);
- builder_.add_mutating_variable_inputs(mutating_variable_inputs);
- builder_.add_custom_options(custom_options);
- builder_.add_builtin_options(builtin_options);
- builder_.add_outputs(outputs);
- builder_.add_inputs(inputs);
- builder_.add_opcode_index(opcode_index);
- builder_.add_custom_options_format(custom_options_format);
- builder_.add_builtin_options_type(builtin_options_type);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Operator>
-CreateOperatorDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
- const std::vector<int32_t> *inputs = nullptr,
- const std::vector<int32_t> *outputs = nullptr,
- BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
- flatbuffers::Offset<void> builtin_options = 0,
- const std::vector<uint8_t> *custom_options = nullptr,
- CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS,
- const std::vector<uint8_t> *mutating_variable_inputs = nullptr)
-{
- return circle::CreateOperator(
- _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
- outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type, builtin_options,
- custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0, custom_options_format,
- mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0);
-}
-
-struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_TENSORS = 4,
- VT_INPUTS = 6,
- VT_OUTPUTS = 8,
- VT_OPERATORS = 10,
- VT_NAME = 12,
- VT_DATA_FORMAT = 14
- };
- const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *tensors() const
- {
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *>(VT_TENSORS);
- }
- const flatbuffers::Vector<int32_t> *inputs() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS);
- }
- const flatbuffers::Vector<int32_t> *outputs() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
- }
- const flatbuffers::Vector<flatbuffers::Offset<Operator>> *operators() const
- {
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Operator>> *>(VT_OPERATORS);
- }
- const flatbuffers::String *name() const
- {
- return GetPointer<const flatbuffers::String *>(VT_NAME);
- }
- DataFormat data_format() const
- {
- return static_cast<DataFormat>(GetField<int8_t>(VT_DATA_FORMAT, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_TENSORS) &&
- verifier.VerifyVector(tensors()) && verifier.VerifyVectorOfTables(tensors()) &&
- VerifyOffset(verifier, VT_INPUTS) && verifier.VerifyVector(inputs()) &&
- VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) &&
- VerifyOffset(verifier, VT_OPERATORS) && verifier.VerifyVector(operators()) &&
- verifier.VerifyVectorOfTables(operators()) && VerifyOffset(verifier, VT_NAME) &&
- verifier.VerifyString(name()) && VerifyField<int8_t>(verifier, VT_DATA_FORMAT) &&
- verifier.EndTable();
- }
-};
-
-struct SubGraphBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors)
- {
- fbb_.AddOffset(SubGraph::VT_TENSORS, tensors);
- }
- void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs)
- {
- fbb_.AddOffset(SubGraph::VT_INPUTS, inputs);
- }
- void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs)
- {
- fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs);
- }
- void
- add_operators(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators)
- {
- fbb_.AddOffset(SubGraph::VT_OPERATORS, operators);
- }
- void add_name(flatbuffers::Offset<flatbuffers::String> name)
- {
- fbb_.AddOffset(SubGraph::VT_NAME, name);
- }
- void add_data_format(DataFormat data_format)
- {
- fbb_.AddElement<int8_t>(SubGraph::VT_DATA_FORMAT, static_cast<int8_t>(data_format), 0);
- }
- explicit SubGraphBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SubGraphBuilder &operator=(const SubGraphBuilder &);
- flatbuffers::Offset<SubGraph> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SubGraph>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SubGraph> CreateSubGraph(
- flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0,
- flatbuffers::Offset<flatbuffers::String> name = 0,
- DataFormat data_format = DataFormat_CHANNELS_LAST)
-{
- SubGraphBuilder builder_(_fbb);
- builder_.add_name(name);
- builder_.add_operators(operators);
- builder_.add_outputs(outputs);
- builder_.add_inputs(inputs);
- builder_.add_tensors(tensors);
- builder_.add_data_format(data_format);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<SubGraph>
-CreateSubGraphDirect(flatbuffers::FlatBufferBuilder &_fbb,
- const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr,
- const std::vector<int32_t> *inputs = nullptr,
- const std::vector<int32_t> *outputs = nullptr,
- const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr,
- const char *name = nullptr, DataFormat data_format = DataFormat_CHANNELS_LAST)
-{
- return circle::CreateSubGraph(
- _fbb, tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0,
- inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
- outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0,
- operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0,
- name ? _fbb.CreateString(name) : 0, data_format);
-}
-
-struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_DATA = 4
- };
- const flatbuffers::Vector<uint8_t> *data() const
- {
- return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_DATA);
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_DATA) &&
- verifier.VerifyVector(data()) && verifier.EndTable();
- }
-};
-
-struct BufferBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_data(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data)
- {
- fbb_.AddOffset(Buffer::VT_DATA, data);
- }
- explicit BufferBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- BufferBuilder &operator=(const BufferBuilder &);
- flatbuffers::Offset<Buffer> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Buffer>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Buffer>
-CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data = 0)
-{
- BufferBuilder builder_(_fbb);
- builder_.add_data(data);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Buffer> CreateBufferDirect(flatbuffers::FlatBufferBuilder &_fbb,
- const std::vector<uint8_t> *data = nullptr)
-{
- return circle::CreateBuffer(_fbb, data ? _fbb.CreateVector<uint8_t>(*data) : 0);
-}
-
-struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_VERSION = 4,
- VT_OPERATOR_CODES = 6,
- VT_SUBGRAPHS = 8,
- VT_DESCRIPTION = 10,
- VT_BUFFERS = 12,
- VT_METADATA_BUFFER = 14
- };
- uint32_t version() const { return GetField<uint32_t>(VT_VERSION, 0); }
- const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *operator_codes() const
- {
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *>(
- VT_OPERATOR_CODES);
- }
- const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *subgraphs() const
- {
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *>(VT_SUBGRAPHS);
- }
- const flatbuffers::String *description() const
- {
- return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION);
- }
- const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *buffers() const
- {
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *>(VT_BUFFERS);
- }
- const flatbuffers::Vector<int32_t> *metadata_buffer() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_METADATA_BUFFER);
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_VERSION) &&
- VerifyOffset(verifier, VT_OPERATOR_CODES) && verifier.VerifyVector(operator_codes()) &&
- verifier.VerifyVectorOfTables(operator_codes()) &&
- VerifyOffset(verifier, VT_SUBGRAPHS) && verifier.VerifyVector(subgraphs()) &&
- verifier.VerifyVectorOfTables(subgraphs()) && VerifyOffset(verifier, VT_DESCRIPTION) &&
- verifier.VerifyString(description()) && VerifyOffset(verifier, VT_BUFFERS) &&
- verifier.VerifyVector(buffers()) && verifier.VerifyVectorOfTables(buffers()) &&
- VerifyOffset(verifier, VT_METADATA_BUFFER) && verifier.VerifyVector(metadata_buffer()) &&
- verifier.EndTable();
- }
-};
-
-struct ModelBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); }
- void add_operator_codes(
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes)
- {
- fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes);
- }
- void
- add_subgraphs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs)
- {
- fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs);
- }
- void add_description(flatbuffers::Offset<flatbuffers::String> description)
- {
- fbb_.AddOffset(Model::VT_DESCRIPTION, description);
- }
- void add_buffers(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers)
- {
- fbb_.AddOffset(Model::VT_BUFFERS, buffers);
- }
- void add_metadata_buffer(flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer)
- {
- fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer);
- }
- explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ModelBuilder &operator=(const ModelBuilder &);
- flatbuffers::Offset<Model> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Model>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Model> CreateModel(
- flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0,
- flatbuffers::Offset<flatbuffers::String> description = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0)
-{
- ModelBuilder builder_(_fbb);
- builder_.add_metadata_buffer(metadata_buffer);
- builder_.add_buffers(buffers);
- builder_.add_description(description);
- builder_.add_subgraphs(subgraphs);
- builder_.add_operator_codes(operator_codes);
- builder_.add_version(version);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Model>
-CreateModelDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
- const std::vector<flatbuffers::Offset<OperatorCode>> *operator_codes = nullptr,
- const std::vector<flatbuffers::Offset<SubGraph>> *subgraphs = nullptr,
- const char *description = nullptr,
- const std::vector<flatbuffers::Offset<Buffer>> *buffers = nullptr,
- const std::vector<int32_t> *metadata_buffer = nullptr)
-{
- return circle::CreateModel(
- _fbb, version,
- operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0,
- subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0,
- description ? _fbb.CreateString(description) : 0,
- buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0,
- metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0);
-}
-
-inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj,
- QuantizationDetails type)
-{
- switch (type)
- {
- case QuantizationDetails_NONE:
- {
- return true;
- }
- case QuantizationDetails_CustomQuantization:
- {
- auto ptr = reinterpret_cast<const CustomQuantization *>(obj);
- return verifier.VerifyTable(ptr);
- }
- default:
- return false;
- }
-}
-
-inline bool
-VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier,
- const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
- const flatbuffers::Vector<uint8_t> *types)
-{
- if (!values || !types)
- return !values && !types;
- if (values->size() != types->size())
- return false;
- for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i)
- {
- if (!VerifyQuantizationDetails(verifier, values->Get(i),
- types->GetEnum<QuantizationDetails>(i)))
- {
- return false;
- }
- }
- return true;
-}
-
-inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj,
- BuiltinOptions type)
-{
- switch (type)
- {
- case BuiltinOptions_NONE:
- {
- return true;
- }
- case BuiltinOptions_Conv2DOptions:
- {
- auto ptr = reinterpret_cast<const Conv2DOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_DepthwiseConv2DOptions:
- {
- auto ptr = reinterpret_cast<const DepthwiseConv2DOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ConcatEmbeddingsOptions:
- {
- auto ptr = reinterpret_cast<const ConcatEmbeddingsOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LSHProjectionOptions:
- {
- auto ptr = reinterpret_cast<const LSHProjectionOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_Pool2DOptions:
- {
- auto ptr = reinterpret_cast<const Pool2DOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SVDFOptions:
- {
- auto ptr = reinterpret_cast<const SVDFOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_RNNOptions:
- {
- auto ptr = reinterpret_cast<const RNNOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_FullyConnectedOptions:
- {
- auto ptr = reinterpret_cast<const FullyConnectedOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SoftmaxOptions:
- {
- auto ptr = reinterpret_cast<const SoftmaxOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ConcatenationOptions:
- {
- auto ptr = reinterpret_cast<const ConcatenationOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_AddOptions:
- {
- auto ptr = reinterpret_cast<const AddOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_L2NormOptions:
- {
- auto ptr = reinterpret_cast<const L2NormOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LocalResponseNormalizationOptions:
- {
- auto ptr = reinterpret_cast<const LocalResponseNormalizationOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LSTMOptions:
- {
- auto ptr = reinterpret_cast<const LSTMOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ResizeBilinearOptions:
- {
- auto ptr = reinterpret_cast<const ResizeBilinearOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_CallOptions:
- {
- auto ptr = reinterpret_cast<const CallOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ReshapeOptions:
- {
- auto ptr = reinterpret_cast<const ReshapeOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SkipGramOptions:
- {
- auto ptr = reinterpret_cast<const SkipGramOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SpaceToDepthOptions:
- {
- auto ptr = reinterpret_cast<const SpaceToDepthOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_EmbeddingLookupSparseOptions:
- {
- auto ptr = reinterpret_cast<const EmbeddingLookupSparseOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_MulOptions:
- {
- auto ptr = reinterpret_cast<const MulOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_PadOptions:
- {
- auto ptr = reinterpret_cast<const PadOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_GatherOptions:
- {
- auto ptr = reinterpret_cast<const GatherOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_BatchToSpaceNDOptions:
- {
- auto ptr = reinterpret_cast<const BatchToSpaceNDOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SpaceToBatchNDOptions:
- {
- auto ptr = reinterpret_cast<const SpaceToBatchNDOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_TransposeOptions:
- {
- auto ptr = reinterpret_cast<const TransposeOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ReducerOptions:
- {
- auto ptr = reinterpret_cast<const ReducerOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SubOptions:
- {
- auto ptr = reinterpret_cast<const SubOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_DivOptions:
- {
- auto ptr = reinterpret_cast<const DivOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SqueezeOptions:
- {
- auto ptr = reinterpret_cast<const SqueezeOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SequenceRNNOptions:
- {
- auto ptr = reinterpret_cast<const SequenceRNNOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_StridedSliceOptions:
- {
- auto ptr = reinterpret_cast<const StridedSliceOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ExpOptions:
- {
- auto ptr = reinterpret_cast<const ExpOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_TopKV2Options:
- {
- auto ptr = reinterpret_cast<const TopKV2Options *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SplitOptions:
- {
- auto ptr = reinterpret_cast<const SplitOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LogSoftmaxOptions:
- {
- auto ptr = reinterpret_cast<const LogSoftmaxOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_CastOptions:
- {
- auto ptr = reinterpret_cast<const CastOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_DequantizeOptions:
- {
- auto ptr = reinterpret_cast<const DequantizeOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_MaximumMinimumOptions:
- {
- auto ptr = reinterpret_cast<const MaximumMinimumOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ArgMaxOptions:
- {
- auto ptr = reinterpret_cast<const ArgMaxOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LessOptions:
- {
- auto ptr = reinterpret_cast<const LessOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_NegOptions:
- {
- auto ptr = reinterpret_cast<const NegOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_PadV2Options:
- {
- auto ptr = reinterpret_cast<const PadV2Options *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_GreaterOptions:
- {
- auto ptr = reinterpret_cast<const GreaterOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_GreaterEqualOptions:
- {
- auto ptr = reinterpret_cast<const GreaterEqualOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LessEqualOptions:
- {
- auto ptr = reinterpret_cast<const LessEqualOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SelectOptions:
- {
- auto ptr = reinterpret_cast<const SelectOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SliceOptions:
- {
- auto ptr = reinterpret_cast<const SliceOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_TransposeConvOptions:
- {
- auto ptr = reinterpret_cast<const TransposeConvOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SparseToDenseOptions:
- {
- auto ptr = reinterpret_cast<const SparseToDenseOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_TileOptions:
- {
- auto ptr = reinterpret_cast<const TileOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ExpandDimsOptions:
- {
- auto ptr = reinterpret_cast<const ExpandDimsOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_EqualOptions:
- {
- auto ptr = reinterpret_cast<const EqualOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_NotEqualOptions:
- {
- auto ptr = reinterpret_cast<const NotEqualOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ShapeOptions:
- {
- auto ptr = reinterpret_cast<const ShapeOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_PowOptions:
- {
- auto ptr = reinterpret_cast<const PowOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ArgMinOptions:
- {
- auto ptr = reinterpret_cast<const ArgMinOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_FakeQuantOptions:
- {
- auto ptr = reinterpret_cast<const FakeQuantOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_PackOptions:
- {
- auto ptr = reinterpret_cast<const PackOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LogicalOrOptions:
- {
- auto ptr = reinterpret_cast<const LogicalOrOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_OneHotOptions:
- {
- auto ptr = reinterpret_cast<const OneHotOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LogicalAndOptions:
- {
- auto ptr = reinterpret_cast<const LogicalAndOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LogicalNotOptions:
- {
- auto ptr = reinterpret_cast<const LogicalNotOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_UnpackOptions:
- {
- auto ptr = reinterpret_cast<const UnpackOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_FloorDivOptions:
- {
- auto ptr = reinterpret_cast<const FloorDivOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SquareOptions:
- {
- auto ptr = reinterpret_cast<const SquareOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ZerosLikeOptions:
- {
- auto ptr = reinterpret_cast<const ZerosLikeOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_FillOptions:
- {
- auto ptr = reinterpret_cast<const FillOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_BidirectionalSequenceLSTMOptions:
- {
- auto ptr = reinterpret_cast<const BidirectionalSequenceLSTMOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_BidirectionalSequenceRNNOptions:
- {
- auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_UnidirectionalSequenceLSTMOptions:
- {
- auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_FloorModOptions:
- {
- auto ptr = reinterpret_cast<const FloorModOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_RangeOptions:
- {
- auto ptr = reinterpret_cast<const RangeOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ResizeNearestNeighborOptions:
- {
- auto ptr = reinterpret_cast<const ResizeNearestNeighborOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LeakyReluOptions:
- {
- auto ptr = reinterpret_cast<const LeakyReluOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SquaredDifferenceOptions:
- {
- auto ptr = reinterpret_cast<const SquaredDifferenceOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_MirrorPadOptions:
- {
- auto ptr = reinterpret_cast<const MirrorPadOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_AbsOptions:
- {
- auto ptr = reinterpret_cast<const AbsOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SplitVOptions:
- {
- auto ptr = reinterpret_cast<const SplitVOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_InstanceNormOptions:
- {
- auto ptr = reinterpret_cast<const InstanceNormOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- default:
- return false;
- }
-}
-
-inline bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier,
- const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
- const flatbuffers::Vector<uint8_t> *types)
-{
- if (!values || !types)
- return !values && !types;
- if (values->size() != types->size())
- return false;
- for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i)
- {
- if (!VerifyBuiltinOptions(verifier, values->Get(i), types->GetEnum<BuiltinOptions>(i)))
- {
- return false;
- }
- }
- return true;
-}
-
-inline const circle::Model *GetModel(const void *buf)
-{
- return flatbuffers::GetRoot<circle::Model>(buf);
-}
-
-inline const circle::Model *GetSizePrefixedModel(const void *buf)
-{
- return flatbuffers::GetSizePrefixedRoot<circle::Model>(buf);
-}
-
-inline const char *ModelIdentifier() { return "CIR0"; }
-
-inline bool ModelBufferHasIdentifier(const void *buf)
-{
- return flatbuffers::BufferHasIdentifier(buf, ModelIdentifier());
-}
-
-inline bool VerifyModelBuffer(flatbuffers::Verifier &verifier)
-{
- return verifier.VerifyBuffer<circle::Model>(ModelIdentifier());
-}
-
-inline bool VerifySizePrefixedModelBuffer(flatbuffers::Verifier &verifier)
-{
- return verifier.VerifySizePrefixedBuffer<circle::Model>(ModelIdentifier());
-}
-
-inline const char *ModelExtension() { return "circle"; }
-
-inline void FinishModelBuffer(flatbuffers::FlatBufferBuilder &fbb,
- flatbuffers::Offset<circle::Model> root)
-{
- fbb.Finish(root, ModelIdentifier());
-}
-
-inline void FinishSizePrefixedModelBuffer(flatbuffers::FlatBufferBuilder &fbb,
- flatbuffers::Offset<circle::Model> root)
-{
- fbb.FinishSizePrefixed(root, ModelIdentifier());
-}
-
-} // namespace circle
-
-#endif // FLATBUFFERS_GENERATED_CIRCLESCHEMA_CIRCLE_H_
diff --git a/runtime/neurun/frontend/nnapi/CMakeLists.txt b/runtime/neurun/frontend/nnapi/CMakeLists.txt
deleted file mode 100644
index 3c3411e05..000000000
--- a/runtime/neurun/frontend/nnapi/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-file(GLOB_RECURSE SOURCES_FRONTEND "*.cc")
-file(GLOB_RECURSE TESTS_FRONTEND "*.test.cc")
-list(REMOVE_ITEM SOURCES_FRONTEND ${TESTS_FRONTEND})
-
-set(LIB_NEURUN neurun)
-
-add_library(${LIB_NEURUN} SHARED ${SOURCES_FRONTEND})
-target_link_libraries(${LIB_NEURUN} PUBLIC nnfw-nnapi-header)
-target_link_libraries(${LIB_NEURUN} PUBLIC neurun_core) # TODO Link PRIVATE neurun_core
-target_link_libraries(${LIB_NEURUN} PRIVATE nnfw_common)
-target_link_libraries(${LIB_NEURUN} PRIVATE nnfw_coverage)
-
-set_target_properties(${LIB_NEURUN} PROPERTIES OUTPUT_NAME neuralnetworks)
-
-install(TARGETS ${LIB_NEURUN} DESTINATION lib)
-
-add_executable(test_neurun_frontend_nnapi ${TESTS_FRONTEND})
-
-target_link_libraries(test_neurun_frontend_nnapi PRIVATE ${LIB_NEURUN} dl)
-target_link_libraries(test_neurun_frontend_nnapi PRIVATE gtest)
-target_link_libraries(test_neurun_frontend_nnapi PRIVATE gtest_main)
-
-install(TARGETS test_neurun_frontend_nnapi DESTINATION unittest)
diff --git a/runtime/neurun/frontend/nnapi/compilation.cc b/runtime/neurun/frontend/nnapi/compilation.cc
deleted file mode 100644
index 164158f19..000000000
--- a/runtime/neurun/frontend/nnapi/compilation.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <NeuralNetworks.h>
-
-#include <new>
-
-#include "wrapper/ANeuralNetworksModel.h"
-#include "wrapper/ANeuralNetworksCompilation.h"
-#include "util/logging.h"
-
-//
-// NNAPI Implementation
-//
-int ANeuralNetworksCompilation_create(ANeuralNetworksModel *model,
- ANeuralNetworksCompilation **compilation)
-{
- if ((model == nullptr) || (compilation == nullptr))
- {
- VERBOSE(NNAPI::Compilation) << "create: Incorrect null pointer parameter(s)" << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- if (!model->isFinished())
- {
- VERBOSE(NNAPI::Compilation) << "create: Model define is not finished" << std::endl;
- return ANEURALNETWORKS_BAD_STATE;
- }
-
- std::shared_ptr<neurun::ir::Graph> internal;
-
- model->release(internal);
-
- *compilation = new (std::nothrow) ANeuralNetworksCompilation(internal);
- if (*compilation == nullptr)
- {
- VERBOSE(NNAPI::Compilation) << "create: ail to create compilation object" << std::endl;
- return ANEURALNETWORKS_OUT_OF_MEMORY;
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
-
-int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation *compilation)
-{
- if (compilation == nullptr)
- {
- VERBOSE(NNAPI::Compilation) << "finish: Incorrect null pointer parameter" << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- if (compilation->state() != ::neurun::compiler::State::CREATED)
- {
- VERBOSE(NNAPI::Compilation) << "finish: Already finished" << std::endl;
- return ANEURALNETWORKS_BAD_STATE;
- }
-
- if (!compilation->finish())
- {
- VERBOSE(NNAPI::Compilation) << "finish: Fail to compile" << std::endl;
- return ANEURALNETWORKS_BAD_STATE;
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
-
-void ANeuralNetworksCompilation_free(ANeuralNetworksCompilation *compilation)
-{
- delete compilation;
-}
-
-int ANeuralNetworksCompilation_setPreference(ANeuralNetworksCompilation *compilation,
- int32_t preference)
-{
- if (compilation == nullptr)
- {
- VERBOSE(NNAPI::Compilation) << "setPreference: Incorrect null pointer parameter" << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- if (compilation->state() != ::neurun::compiler::State::CREATED)
- {
- VERBOSE(NNAPI::Compilation) << "setPreference: Already finished" << std::endl;
- return ANEURALNETWORKS_BAD_STATE;
- }
-
- const PreferenceCode FIRST_PREFERENCE_CODE = ANEURALNETWORKS_PREFER_LOW_POWER;
- const PreferenceCode LAST_PREFERENCE_CODE = ANEURALNETWORKS_PREFER_SUSTAINED_SPEED;
- if ((preference < FIRST_PREFERENCE_CODE) || (preference > LAST_PREFERENCE_CODE))
- {
- VERBOSE(NNAPI::Compilation) << "setPreference: Incorrect preference code" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- // NYI: nothing to set
- return ANEURALNETWORKS_NO_ERROR;
-}
diff --git a/runtime/neurun/frontend/nnapi/execution.cc b/runtime/neurun/frontend/nnapi/execution.cc
deleted file mode 100644
index 08f2df4c2..000000000
--- a/runtime/neurun/frontend/nnapi/execution.cc
+++ /dev/null
@@ -1,480 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <NeuralNetworks.h>
-
-#include <new>
-
-#include "wrapper/ANeuralNetworksCompilation.h"
-#include "wrapper/ANeuralNetworksExecution.h"
-#include "wrapper/ANeuralNetworksMemory.h"
-#include "wrapper/ANeuralNetworksEvent.h"
-#include "wrapper/NNAPIConvert.h"
-#include "util/logging.h"
-
-//
-// NNAPI Implementation
-//
-int ANeuralNetworksExecution_create(ANeuralNetworksCompilation *compilation,
- ANeuralNetworksExecution **execution)
-{
- if ((compilation == nullptr) || (execution == nullptr))
- {
- VERBOSE(NNAPI::Execution) << "create: Incorrect null pointer parameter(s)" << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- std::shared_ptr<neurun::exec::IExecutor> executor;
-
- compilation->publish(executor);
-
- if (executor == nullptr)
- {
- VERBOSE(NNAPI::Execution) << "create: Never compiled yet" << std::endl;
- return ANEURALNETWORKS_BAD_STATE;
- }
-
- *execution = new (std::nothrow) ANeuralNetworksExecution{executor};
- if (*execution == nullptr)
- {
- VERBOSE(NNAPI::Execution) << "create: Fail to create execution object" << std::endl;
- return ANEURALNETWORKS_OUT_OF_MEMORY;
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
-
-// NOTE Handle optional input
-// Unspecified shape on model build
-// Optional and omitted input on execution: skip input setting (workaround for LSTM)
-// Optional but not omitted input on execution: cannot handle
-// Normal input on execution: cannot handle
-// Fully specified shape on model build
-// Optional input on execution: cannot handle
-// Normal input: handle normally
-int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution *execution, int32_t index,
- const ANeuralNetworksOperandType *type, const void *buffer,
- size_t length)
-{
- // Don't check type
- // Comment about ANeuralNetworksOperandType in NeuralNetworks.h:
- // If the input or output is optional and omitted then it need not have a fully specified tensor
- // operand type
- if ((execution == nullptr) || ((buffer == nullptr) && (length != 0)))
- {
- VERBOSE(NNAPI::Execution) << "setInput: Incorrect null pointer parameter(s)" << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- if ((buffer != nullptr) && (length == 0))
- {
- VERBOSE(NNAPI::Execution) << "setInput: Zero length input" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- const auto operand_index = execution->getInputOperandIndex(index);
- if (!operand_index.valid())
- {
- VERBOSE(NNAPI::Execution) << "setInput: Invalid input index" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- // Omitted optional input
- // LSTM operation's some inputs can be optional input
- if ((buffer == nullptr) && (length == 0))
- {
- if (execution->haveUnspecifiedDims(operand_index))
- {
- return ANEURALNETWORKS_NO_ERROR;
- }
- else
- {
- VERBOSE(NNAPI::Execution) << "setInput: Cannot handle fully-specified shape on model build "
- "but omitted input on execution"
- << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
- }
-
- if (type != nullptr)
- {
- if (!execution->compareDataType(type, operand_index))
- {
- VERBOSE(NNAPI::Execution) << "setInput: Data type mismatch" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (!execution->compareShape(type, operand_index))
- {
- VERBOSE(NNAPI::Execution) << "setInput: Shape mismatch" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (NNAPIConvert::calculateSizeFromType(type) != length)
- {
- VERBOSE(NNAPI::Execution) << "setInput: Invalid length" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
- }
- else
- {
- if (execution->haveUnspecifiedDims(operand_index))
- {
- VERBOSE(NNAPI::Execution) << "setInput: Unspecified dimension value" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (execution->getOperandSize(operand_index) != length)
- {
- VERBOSE(NNAPI::Execution) << "setInput: Invalid length" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
- }
-
- if (!execution->setInput(index, type, buffer, length))
- {
- VERBOSE(NNAPI::Execution) << "setInput: Fail to set input" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
-
-int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution *execution, int32_t index,
- const ANeuralNetworksOperandType *type, void *buffer,
- size_t length)
-{
- // Don't check type
- // Comment about ANeuralNetworksOperandType in NeuralNetworks.h:
- // If the input or output is optional and omitted then it need not have a fully specified tensor
- // operand type
- if ((execution == nullptr) || ((buffer == nullptr) && (length != 0)))
- {
- VERBOSE(NNAPI::Execution) << "setOutput: Incorrect null pointer parameter(s)" << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- if ((buffer != nullptr) && (length == 0))
- {
- VERBOSE(NNAPI::Execution) << "setOutput: Zero length output" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- // Handle optional output
- if (buffer == nullptr)
- {
- return ANEURALNETWORKS_NO_ERROR;
- }
-
- const auto operand_index = execution->getOutputOperandIndex(index);
- if (!operand_index.valid())
- {
- VERBOSE(NNAPI::Execution) << "setOutput: Invalid output index" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (type != nullptr)
- {
- if (!execution->compareDataType(type, operand_index))
- {
- VERBOSE(NNAPI::Execution) << "setOutput: Data type mismatch" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (!execution->compareShape(type, operand_index))
- {
- VERBOSE(NNAPI::Execution) << "setOutput: Shape mismatch" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (NNAPIConvert::calculateSizeFromType(type) != length)
- {
- VERBOSE(NNAPI::Execution) << "setOutput: Invalid length" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
- }
- else
- {
- if (execution->haveUnspecifiedDims(operand_index))
- {
- VERBOSE(NNAPI::Execution) << "setOutput: Unspecified dimension value" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (execution->getOperandSize(operand_index) != length)
- {
- VERBOSE(NNAPI::Execution) << "setInput: Invalid length" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
- }
-
- if (!execution->setOutput(index, type, buffer, length))
- {
- VERBOSE(NNAPI::Execution) << "setOutput: Fail to set output" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
-
-int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution *execution,
- ANeuralNetworksEvent **event)
-{
- if ((execution == nullptr) || (event == nullptr))
- {
- VERBOSE(NNAPI::Execution) << "startCompute: Incorrect null pointer parameter(s)" << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- // TODO: Handle event
- auto instance = execution->instance();
- *event = new (std::nothrow) ANeuralNetworksEvent{instance};
- if (*event == nullptr)
- {
- VERBOSE(NNAPI::Execution) << "startCompute: Fail to create event" << std::endl;
- return ANEURALNETWORKS_OUT_OF_MEMORY;
- }
-
- if (!execution->startExecute())
- {
- VERBOSE(NNAPI::Execution) << "startCompute: Fail to start execution" << std::endl;
- return ANEURALNETWORKS_BAD_STATE;
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
-
-int ANeuralNetworksExecution_compute(ANeuralNetworksExecution *execution)
-{
- if (execution == nullptr)
- {
- VERBOSE(NNAPI::Execution) << "Compute: Incorrect null pointer parameter" << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- if (!execution->execute())
- {
- VERBOSE(NNAPI::Execution) << "Compute: Fail to execution" << std::endl;
- return ANEURALNETWORKS_BAD_STATE;
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
-
-void ANeuralNetworksExecution_free(ANeuralNetworksExecution *execution) { delete execution; }
-
-int ANeuralNetworksExecution_setInputFromMemory(ANeuralNetworksExecution *execution, int32_t index,
- const ANeuralNetworksOperandType *type,
- const ANeuralNetworksMemory *memory, size_t offset,
- size_t length)
-{
- if ((execution == nullptr) || (memory == nullptr))
- {
- VERBOSE(NNAPI::Execution) << "setInputFromMemory: Incorrect null pointer parameter(s)"
- << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- if (length == 0)
- {
- VERBOSE(NNAPI::Execution) << "setInputFromMemory: Zero length input" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- const auto operand_index = execution->getInputOperandIndex(index);
- if (!operand_index.valid())
- {
- VERBOSE(NNAPI::Execution) << "setInputFromMemory: Invalid input index" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (type != nullptr)
- {
- if (!execution->compareDataType(type, operand_index))
- {
- VERBOSE(NNAPI::Execution) << "setInputFromMemory: Data type mismatch" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (!execution->compareShape(type, operand_index))
- {
- VERBOSE(NNAPI::Execution) << "setInputFromMemory: Shape mismatch" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (NNAPIConvert::calculateSizeFromType(type) != length)
- {
- VERBOSE(NNAPI::Execution) << "setInputFromMemory: Invalid length" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
- }
- else
- {
- if (execution->haveUnspecifiedDims(operand_index))
- {
- VERBOSE(NNAPI::Execution) << "setInputFromMemory: Unspecified dimension value" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (execution->getOperandSize(operand_index) != length)
- {
- VERBOSE(NNAPI::Execution) << "setInputFromMemory: Invalid length" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
- }
-
- if (!memory->vaildAccess(offset, length))
- {
- VERBOSE(NNAPI::Execution) << "setInputFromMemory: Invalid memory access" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (!execution->setInput(index, type, reinterpret_cast<const void *>(memory->base() + offset),
- length))
- {
- VERBOSE(NNAPI::Execution) << "setInputFromMemory: Fail to set input" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
-
-int ANeuralNetworksExecution_setOutputFromMemory(ANeuralNetworksExecution *execution, int32_t index,
- const ANeuralNetworksOperandType *type,
- const ANeuralNetworksMemory *memory, size_t offset,
- size_t length)
-{
- if ((execution == nullptr) || (memory == nullptr))
- {
- VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Incorrect null pointer parameter(s)"
- << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- if (length == 0)
- {
- VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Zero length input" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- const auto operand_index = execution->getOutputOperandIndex(index);
- if (!operand_index.valid())
- {
- VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Invalid output index" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (type != nullptr)
- {
- if (!execution->compareDataType(type, operand_index))
- {
- VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Data type mismatch" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (!execution->compareShape(type, operand_index))
- {
- VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Shape mismatch" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (NNAPIConvert::calculateSizeFromType(type) != length)
- {
- VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Invalid length" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
- }
- else
- {
- if (execution->haveUnspecifiedDims(operand_index))
- {
- VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Unspecified dimension value" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (execution->getOperandSize(operand_index) != length)
- {
- VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Invalid length" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
- }
-
- if (!memory->vaildAccess(offset, length))
- {
- VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Invalid memory access" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (!execution->setOutput(index, type, reinterpret_cast<void *>(memory->base() + offset), length))
- {
- VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Fail to set input" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
-
-int ANeuralNetworksExecution_getOutputOperandRank(ANeuralNetworksExecution *execution,
- int32_t index, uint32_t *rank)
-{
- if ((execution == nullptr) || (rank == nullptr))
- {
- VERBOSE(NNAPI::Execution) << "getOutputOperandRank: Incorrect null pointer parameter(s)"
- << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- const auto operand_index = execution->getOutputOperandIndex(index);
- if (!operand_index.valid())
- {
- VERBOSE(NNAPI::Execution) << "getOutputOperandRank: Invalid output index" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (!execution->getOutputOperandRank(index, rank))
- {
- VERBOSE(NNAPI::Execution) << "getOutputOperandRank: Fail to get rank" << std::endl;
- return ANEURALNETWORKS_BAD_STATE;
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
-
-int ANeuralNetworksExecution_getOutputOperandDimensions(ANeuralNetworksExecution *execution,
- int32_t index, uint32_t *dimensions)
-{
- if ((execution == nullptr) || (dimensions == nullptr))
- {
- VERBOSE(NNAPI::Execution) << "getOutputOperandDimensions: Incorrect null pointer parameter(s)"
- << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- const auto operand_index = execution->getOutputOperandIndex(index);
- if (!operand_index.valid())
- {
- VERBOSE(NNAPI::Execution) << "getOutputOperandDimensions: Invalid output index" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (!execution->getOutputOperandDimensions(index, dimensions))
- {
- VERBOSE(NNAPI::Execution) << "getOutputOperandDimensions: Fail to get rank" << std::endl;
- return ANEURALNETWORKS_BAD_STATE;
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
diff --git a/runtime/neurun/frontend/nnapi/memory.cc b/runtime/neurun/frontend/nnapi/memory.cc
deleted file mode 100644
index fbe1a48e8..000000000
--- a/runtime/neurun/frontend/nnapi/memory.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <NeuralNetworks.h>
-#include <sys/mman.h>
-#include <new>
-#include <memory>
-
-#include "cpp14/memory.h"
-#include "wrapper/ANeuralNetworksMemory.h"
-
-int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t offset,
- ANeuralNetworksMemory **memory)
-{
- if (memory == nullptr)
- {
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- *memory = new (std::nothrow) ANeuralNetworksMemory{size, protect, fd, offset};
- if (*memory == nullptr)
- {
- return ANEURALNETWORKS_OUT_OF_MEMORY;
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
-
-void ANeuralNetworksMemory_free(ANeuralNetworksMemory *memory) { delete memory; }
diff --git a/runtime/neurun/frontend/nnapi/model.cc b/runtime/neurun/frontend/nnapi/model.cc
deleted file mode 100644
index 72a66e630..000000000
--- a/runtime/neurun/frontend/nnapi/model.cc
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <NeuralNetworks.h>
-#include <NeuralNetworksEx.h>
-
-#include <new>
-
-#include "wrapper/ANeuralNetworksModel.h"
-#include "wrapper/ANeuralNetworksMemory.h"
-#include "util/logging.h"
-
-int ANeuralNetworksModel_create(ANeuralNetworksModel **model)
-{
- if (model == nullptr)
- {
- VERBOSE(NNAPI::Model) << "create: Incorrect null pointer parameter" << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- *model = new (std::nothrow) ANeuralNetworksModel{};
- if (*model == nullptr)
- {
- VERBOSE(NNAPI::Model) << "create: Fail to create model object" << std::endl;
- return ANEURALNETWORKS_OUT_OF_MEMORY;
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
-
-void ANeuralNetworksModel_free(ANeuralNetworksModel *model) { delete model; }
-
-int ANeuralNetworksModel_addOperand(ANeuralNetworksModel *model,
- const ANeuralNetworksOperandType *type)
-{
- if ((model == nullptr) || (type == nullptr))
- {
- VERBOSE(NNAPI::Model) << "addOperand: Incorrect null pointer parameter(s)" << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- if (model->isFinished())
- {
- VERBOSE(NNAPI::Model) << "addOperand: Already finished" << std::endl;
- return ANEURALNETWORKS_BAD_STATE;
- }
-
- // scale and zeroPoint should be zero for scalars and non-fixed point tensors
- // Quantized:
- // scale: a 32 bit floating point value greater than zero
- // zeroPoint: a 32 bit integer, in range [0, 255]
- if (type->type == ANEURALNETWORKS_TENSOR_QUANT8_ASYMM)
- {
- if (!(type->scale > 0.0f))
- {
- VERBOSE(NNAPI::Model) << "addOperand: Incorrect scale value for quantization" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if ((type->zeroPoint < 0) || (type->zeroPoint > 255))
- {
- VERBOSE(NNAPI::Model) << "addOperand: Incorrect zeroPoint value for quantization"
- << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
- }
- // NOTE Validation of scale and zeroPoint would be skipped for a while.
- // We do not know whether scalar type can have scale and zeroPoint.
- // To pass ValidationTest and GeneratedTest, this validation code
- // would not be implemented until we can define this issue clearly.
- //
- // scale and zeroPoint should be zero for scalars and non-fixed point tensors
- // else if ((type->scale != 0.0f) || (type->zeroPoint != 0))
- // {
- // return ANEURALNETWORKS_BAD_DATA;
- // }
-
- // dimensionCount should be zero for scalars
- if ((type->dimensionCount != 0) &&
- ((type->type == ANEURALNETWORKS_FLOAT32) || (type->type == ANEURALNETWORKS_INT32) ||
- (type->type == ANEURALNETWORKS_UINT32)))
- {
- VERBOSE(NNAPI::Model) << "addOperand: Incorrect data type" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (!model->addOperand(type))
- {
- VERBOSE(NNAPI::Model) << "addOperand: Fail to add operand" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
-
-int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel *model, int32_t index,
- const void *buffer, size_t length)
-{
- const bool optional_operand = ((buffer == nullptr) && (length == 0));
-
- if ((model == nullptr) || ((buffer == nullptr) && (length != 0)))
- {
- VERBOSE(NNAPI::Model) << "setOperandValue: Incorrect null pointer parameter(s)" << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- if (model->isFinished())
- {
- VERBOSE(NNAPI::Model) << "setOperandValue: Already finished" << std::endl;
- return ANEURALNETWORKS_BAD_STATE;
- }
-
- // Negative index value is not allowed
- if (index < 0)
- {
- VERBOSE(NNAPI::Model) << "setOperandValue: Invalid index value (negative)" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
- // NOTE OperandIndex uses uint32_t as its underlying type as various NNAPI
- // functions such as ANeuralNetworksModel_addOperation use uint32_t to represent operand
- // index
- // ANeuralNetworksModel_setOperandValue, however, uses int32_t to represent operand index.
- //
- // Below, static_cast<uint32_t>(...) is introduced to eliminate compiler warning.
- uint32_t ind = static_cast<uint32_t>(index);
-
- if (!model->isExistOperand(ind))
- {
- VERBOSE(NNAPI::Model) << "setOperandValue: Invalid index value (not exist)" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (!optional_operand && (model->operandSize(ind) != length))
- {
- VERBOSE(NNAPI::Model) << "setOperandValue: Invalid data length" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (model->isUsageSet(ind))
- {
- VERBOSE(NNAPI::Model) << "setOperandValue: Already set operand" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- // NNAPI spec in NeuralNetworks.h
- // For values of length greater than ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES,
- // the application is responsible for not changing the content of this region
- // until all executions using this model have completed
- bool copy_value = false;
- if (length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES)
- {
- copy_value = true;
- }
-
- if (!model->setOperandValue(ind, buffer, length, optional_operand, copy_value))
- {
- VERBOSE(NNAPI::Model) << "setOperandValue: Fail to set operand value" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
-
-int ANeuralNetworksModel_setOperandValueFromMemory(ANeuralNetworksModel *model, int32_t index,
- const ANeuralNetworksMemory *memory,
- size_t offset, size_t length)
-{
- if ((model == nullptr) || (memory == nullptr))
- {
- VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Incorrect null pointer parameter(s)"
- << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- if (model->isFinished())
- {
- VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Already finished" << std::endl;
- return ANEURALNETWORKS_BAD_STATE;
- }
-
- // Negative index value is not allowed
- if (index < 0)
- {
- VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Invalid index value (negative)"
- << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
- // NOTE OperandIndex uses uint32_t as its underlying type as various NNAPI
- // functions such as ANeuralNetworksModel_addOperation use uint32_t to represent operand
- // index
- // ANeuralNetworksModel_setOperandValue, however, uses int32_t to represent operand index.
- //
- // Below, static_cast<uint32_t>(...) is introduced to eliminate compiler warning.
- uint32_t ind = static_cast<uint32_t>(index);
-
- if (!model->isExistOperand(ind))
- {
- VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Invalid index value (not exist)"
- << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if ((model->operandSize(ind) != length) || (memory->size() < (offset + length)))
- {
- VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Invalid data length" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (model->isUsageSet(ind))
- {
- VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Already set operand" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (!model->setOperandValue(ind, memory->base() + offset, length))
- {
- VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Fail to set operand value" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
-
-int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
- ANeuralNetworksOperationType type, uint32_t inputCount,
- const uint32_t *inputs, uint32_t outputCount,
- const uint32_t *outputs)
-{
- if ((model == nullptr) || (inputs == nullptr) || (outputs == nullptr))
- {
- VERBOSE(NNAPI::Model) << "addOperation: Incorrect null pointer parameter(s)" << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- if (model->isFinished())
- {
- VERBOSE(NNAPI::Model) << "addOperation: Already finished" << std::endl;
- return ANEURALNETWORKS_BAD_STATE;
- }
-
- const ANeuralNetworksOperationType FIRST_OPERATION = ANEURALNETWORKS_ADD;
- const ANeuralNetworksOperationType LAST_OPERATION = ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR;
- if ((type < FIRST_OPERATION) || (type > LAST_OPERATION))
- {
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- for (uint32_t i = 0; i < outputCount; i++)
- {
- if (model->isUsageSet(outputs[i]))
- {
- VERBOSE(NNAPI::Model) << "addOperation: Already set output operand" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
- }
-
- if (!model->addOperation(type, inputCount, inputs, outputCount, outputs))
- {
- VERBOSE(NNAPI::Model) << "addOperation: Fail to add operation" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
-
-int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
- ANeuralNetworksOperationTypeEx type, uint32_t inputCount,
- const uint32_t *inputs, uint32_t outputCount,
- const uint32_t *outputs)
-{
- if ((model == nullptr) || (inputs == nullptr) || (outputs == nullptr))
- {
- VERBOSE(NNAPI::Model) << "addOperation: Incorrect null pointer parameter(s)" << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- if (model->isFinished())
- {
- VERBOSE(NNAPI::Model) << "addOperation: Already finished" << std::endl;
- return ANEURALNETWORKS_BAD_STATE;
- }
-
- const ANeuralNetworksOperationTypeEx FIRST_OPERATION = ANEURALNETWORKS_CAST_EX;
- const ANeuralNetworksOperationTypeEx LAST_OPERATION = ANEURALNETWORKS_LESS_EX;
- if ((type < FIRST_OPERATION) || (type > LAST_OPERATION))
- {
- VERBOSE(NNAPI::Model) << "addOperation: Invalid operation type" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- for (uint32_t i = 0; i < outputCount; i++)
- {
- if (model->isUsageSet(outputs[i]))
- {
- VERBOSE(NNAPI::Model) << "addOperation: Already set output operand" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
- }
-
- if (!model->addOperationEx(type, inputCount, inputs, outputCount, outputs))
- {
- VERBOSE(NNAPI::Model) << "addOperation: Fail to add operation" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
-
-int ANeuralNetworksModel_identifyInputsAndOutputs(ANeuralNetworksModel *model, uint32_t inputCount,
- const uint32_t *inputs, uint32_t outputCount,
- const uint32_t *outputs)
-{
- if ((model == nullptr) || (inputs == nullptr) || (outputs == nullptr))
- {
- VERBOSE(NNAPI::Model) << "identifyInputsAndOutputs: Incorrect null pointer parameter(s)"
- << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- if (model->isFinished())
- {
- VERBOSE(NNAPI::Model) << "identifyInputsAndOutputs: Already finished" << std::endl;
- return ANEURALNETWORKS_BAD_STATE;
- }
-
- for (uint32_t n = 0; n < inputCount; ++n)
- {
- uint32_t ind = inputs[n];
- if (model->isUsageSet(ind))
- {
- VERBOSE(NNAPI::Model) << "identifyInputsAndOutputs: Already set input operand" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (!model->addModelInput(ind))
- {
- VERBOSE(NNAPI::Model) << "identifyInputsAndOutputs: Fail to add input" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
- }
-
- for (uint32_t n = 0; n < outputCount; ++n)
- {
- uint32_t ind = outputs[n];
-
- if (!model->isOperationOutput(ind))
- {
- VERBOSE(NNAPI::Model) << "identifyInputsAndOutputs: Need to set output operand" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
-
- if (!model->addModelOutput(ind))
- {
- VERBOSE(NNAPI::Model) << "identifyInputsAndOutputs: Fail to add output" << std::endl;
- return ANEURALNETWORKS_BAD_DATA;
- }
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
-
-int ANeuralNetworksModel_finish(ANeuralNetworksModel *model)
-{
- if (model == nullptr)
- {
- VERBOSE(NNAPI::Model) << "finish: Incorrect null pointer parameter" << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- if (model->isFinished())
- {
- VERBOSE(NNAPI::Model) << "finish: Already finished" << std::endl;
- return ANEURALNETWORKS_BAD_STATE;
- }
-
- if (!model->finish())
- {
- VERBOSE(NNAPI::Model) << "finish: Fail to generate internal graph" << std::endl;
- return ANEURALNETWORKS_BAD_STATE;
- }
-
- return ANEURALNETWORKS_NO_ERROR;
-}
-
-int ANeuralNetworksModel_relaxComputationFloat32toFloat16(ANeuralNetworksModel *model, bool)
-{
- if (model == nullptr)
- {
- VERBOSE(NNAPI::Model) << "relaxComputationFloat32toFloat16: Incorrect null pointer parameter"
- << std::endl;
- return ANEURALNETWORKS_UNEXPECTED_NULL;
- }
-
- // NYI: nothing to set
- VERBOSE(NNAPI::Model) << "relaxComputationFloat32toFloat16: Do nothing yet" << std::endl;
-
- return ANEURALNETWORKS_NO_ERROR;
-}
diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
deleted file mode 100644
index 1aa1583aa..000000000
--- a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ANeuralNetworksCompilation.h"
-
-#include "util/logging.h"
-
-ANeuralNetworksCompilation::ANeuralNetworksCompilation(
- const std::shared_ptr<neurun::ir::Graph> &model) noexcept
- : _compiler{new neurun::compiler::Compiler{model}}
-{
- // DO NOTHING
-}
-
-bool ANeuralNetworksCompilation::finish() noexcept
-{
- try
- {
- _compiler->compile();
- }
- catch (const std::exception &e)
- {
- VERBOSE(EXCEPTION) << e.what() << std::endl;
-
- return false;
- }
-
- return true;
-}
diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
deleted file mode 100644
index 56b402d16..000000000
--- a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __COMPILATION_H__
-#define __COMPILATION_H__
-
-#include "compiler/Compiler.h"
-#include "ir/Graph.h"
-#include "exec/IExecutor.h"
-
-struct ANeuralNetworksCompilation
-{
-public:
- ANeuralNetworksCompilation(const std::shared_ptr<neurun::ir::Graph> &graph) noexcept;
-
-public:
- bool finish() noexcept;
-
- neurun::compiler::State state(void) noexcept { return _compiler->state(); }
- void publish(std::shared_ptr<neurun::exec::IExecutor> &executor) noexcept
- {
- _compiler->release(executor);
- }
-
-private:
- std::shared_ptr<neurun::compiler::Compiler> _compiler;
-};
-
-#endif
diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc
deleted file mode 100644
index b09f9abe6..000000000
--- a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ANeuralNetworksEvent.h"
-
-#include "exec/Execution.h"
-#include "util/logging.h"
-
-ANeuralNetworksEvent::ANeuralNetworksEvent(
- const std::shared_ptr<neurun::exec::Execution> &execution)
- : _execution{execution}
-{
- // DO NOTHING
-}
-
-bool ANeuralNetworksEvent::waitFinish(void) noexcept
-{
- try
- {
- _execution->waitFinish();
- }
- catch (const std::exception &e)
- {
- VERBOSE(EXCEPTION) << e.what() << std::endl;
-
- return false;
- }
-
- return true;
-}
diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksEvent.h b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksEvent.h
deleted file mode 100644
index e499bab77..000000000
--- a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksEvent.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __EVENT_H__
-#define __EVENT_H__
-
-#include <NeuralNetworks.h>
-
-#include <memory>
-
-namespace neurun
-{
-namespace exec
-{
-class Execution;
-} // namespace exec
-} // namespace neurun
-
-struct ANeuralNetworksEvent
-{
-public:
- ANeuralNetworksEvent(const std::shared_ptr<neurun::exec::Execution> &execution);
-
-public:
- bool waitFinish(void) noexcept;
-
-private:
- const std::shared_ptr<neurun::exec::Execution> _execution;
-};
-
-#endif
diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc
deleted file mode 100644
index b8e43a691..000000000
--- a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ANeuralNetworksExecution.h"
-#include "NNAPIConvert.h"
-#include "util/logging.h"
-
-const neurun::ir::OperandIndex
-ANeuralNetworksExecution::getInputOperandIndex(int32_t index) noexcept
-{
- if (index < 0)
- {
- // Negative index: return invalid index
- return neurun::ir::OperandIndex{};
- }
-
- uint32_t cast_index = static_cast<uint32_t>(index);
- if (cast_index >= _execution->graph().getInputs().size())
- {
- // Return invalid index
- return neurun::ir::OperandIndex{};
- }
-
- neurun::ir::IOIndex input_index{cast_index};
- const auto operand_index = _execution->graph().getInputs().at(input_index);
- return operand_index;
-}
-
-const neurun::ir::OperandIndex
-ANeuralNetworksExecution::getOutputOperandIndex(int32_t index) noexcept
-{
- if (index < 0)
- {
- // Negative index: return invalid index
- return neurun::ir::OperandIndex{};
- }
-
- uint32_t cast_index = static_cast<uint32_t>(index);
- if (cast_index >= _execution->graph().getOutputs().size())
- {
- // Return invalid index
- return neurun::ir::OperandIndex{};
- }
-
- neurun::ir::IOIndex output_index{cast_index};
- const auto operand_index = _execution->graph().getOutputs().at(output_index);
- return operand_index;
-}
-
-bool ANeuralNetworksExecution::compareDataType(const ANeuralNetworksOperandType *type,
- const neurun::ir::OperandIndex index) noexcept
-{
- try
- {
- const auto operand_type = _execution->graph().operands().at(index).typeInfo();
- const auto typeInfo = NNAPIConvert::getTypeInfo(type);
-
- if (operand_type != typeInfo)
- {
- // Data type mismatch
- return false;
- }
- }
- catch (const std::exception &e)
- {
- VERBOSE(EXCEPTION) << e.what() << std::endl;
-
- return false;
- }
-
- return true;
-}
-
-bool ANeuralNetworksExecution::compareShape(const ANeuralNetworksOperandType *type,
- const neurun::ir::OperandIndex index) noexcept
-{
- // Passed shape should be specified
- if (haveUnspecifiedDims(index))
- {
- return false;
- }
-
- const auto &operand_shape = _execution->graph().operands().at(index).shape();
- const auto &shape_from_type = NNAPIConvert::getShape(type);
-
- return operand_shape == shape_from_type;
-}
-
-bool ANeuralNetworksExecution::haveUnspecifiedDims(const neurun::ir::OperandIndex index) noexcept
-{
- const auto operand_shape = _execution->graph().operands().at(index).shape();
-
- return operand_shape.num_elements() == 0;
-}
-
-size_t ANeuralNetworksExecution::getOperandSize(const neurun::ir::OperandIndex index) noexcept
-{
- try
- {
- return _execution->graph().operands().at(index).operandSize();
- }
- catch (const std::exception &e)
- {
- VERBOSE(EXCEPTION) << e.what() << std::endl;
-
- return 0;
- }
-}
-
-bool ANeuralNetworksExecution::setInput(uint32_t index, const ANeuralNetworksOperandType *type,
- const void *buffer, size_t length) noexcept
-{
- try
- {
- neurun::ir::IOIndex input_index{index};
- const auto operand_index = getInputOperandIndex(index);
-
- const auto type_info = _execution->graph().operands().at(operand_index).typeInfo();
- const auto shape = (type != nullptr) ? NNAPIConvert::getShape(type)
- : _execution->graph().operands().at(operand_index).shape();
-
- // NOTE The nnapi does not provide setting io_layout and not support changing layout. In other
- // words, we can assume that io_layout from nnapi always is the same as layout of the used
- // model.
- // TODO Set layout of model
- _execution->setInput(input_index, type_info, shape, buffer, length, neurun::ir::Layout::NHWC);
- }
- catch (const std::exception &e)
- {
- VERBOSE(EXCEPTION) << e.what() << std::endl;
-
- return false;
- }
-
- return true;
-}
-
-bool ANeuralNetworksExecution::setOutput(uint32_t index, const ANeuralNetworksOperandType *type,
- void *buffer, size_t length) noexcept
-{
- try
- {
- neurun::ir::IOIndex output_index{index};
- const auto operand_index = getOutputOperandIndex(index);
-
- const auto type_info = _execution->graph().operands().at(operand_index).typeInfo();
- const auto shape = (type != nullptr) ? NNAPIConvert::getShape(type)
- : _execution->graph().operands().at(operand_index).shape();
-
- // NOTE The nnapi does not provide setting io_layout and not support changing layout. In other
- // words, we can assume that io_layout from nnapi always is the same as layout of the used
- // model.
- // TODO Set layout of model
- _execution->setOutput(output_index, type_info, shape, buffer, length, neurun::ir::Layout::NHWC);
- }
- catch (const std::exception &e)
- {
- VERBOSE(EXCEPTION) << e.what() << std::endl;
-
- return false;
- }
-
- return true;
-}
-
-bool ANeuralNetworksExecution::startExecute(void) noexcept
-{
- try
- {
- _execution->startExecute();
- }
- catch (const std::exception &e)
- {
- VERBOSE(EXCEPTION) << e.what() << std::endl;
-
- return false;
- }
-
- return true;
-}
-
-bool ANeuralNetworksExecution::execute(void) noexcept
-{
- try
- {
- _execution->execute();
- }
- catch (const std::exception &e)
- {
- VERBOSE(EXCEPTION) << e.what() << std::endl;
-
- return false;
- }
-
- return true;
-}
-
-const std::shared_ptr<neurun::exec::Execution> ANeuralNetworksExecution::instance(void) noexcept
-{
- return _execution;
-}
-
-bool ANeuralNetworksExecution::getOutputOperandRank(uint32_t index, uint32_t *rank) noexcept
-{
- try
- {
- neurun::ir::IOIndex output_index{index};
- const auto operand_index = getOutputOperandIndex(index);
- bool unspecified = haveUnspecifiedDims(operand_index);
-
- // TODO Get unspecified output operand's rank
- if (unspecified)
- {
- throw std::runtime_error{"Unsupport feature"};
- }
-
- // Check execution is finished
- // Output rank and shape may be decided after execution if output is unspecified operand
- if (!_execution->isFinished())
- {
- return false;
- }
-
- *rank = _execution->graph().operands().at(operand_index).shape().rank();
- }
- catch (const std::exception &e)
- {
- VERBOSE(EXCEPTION) << e.what() << std::endl;
-
- return false;
- }
-
- return true;
-}
-
-bool ANeuralNetworksExecution::getOutputOperandDimensions(uint32_t index, uint32_t *dimensions)
-{
- try
- {
- neurun::ir::IOIndex output_index{index};
- const auto operand_index = getOutputOperandIndex(index);
- bool unspecified = haveUnspecifiedDims(operand_index);
- if (unspecified)
- {
- throw std::runtime_error{"NYI: Models with unspecified output dimensions"};
- }
-
- // Check execution is finished
- // Output rank and shape may be decided after execution if output is unspecified operand
- if (!_execution->isFinished())
- {
- return false;
- }
-
- auto shape = _execution->graph().operands().at(operand_index).shape();
- for (int i = 0; i < shape.rank(); i++)
- {
- auto dim = shape.dim(i);
-
- if (dim <= 0)
- {
- throw std::runtime_error{"Invalid dimension value"};
- }
-
- dimensions[i] = static_cast<uint32_t>(dim);
- }
- }
- catch (const std::exception &e)
- {
- VERBOSE(EXCEPTION) << e.what() << std::endl;
-
- return false;
- }
-
- return true;
-}
diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksExecution.h b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
deleted file mode 100644
index ecffedc0a..000000000
--- a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __EXECUTION_H__
-#define __EXECUTION_H__
-
-#include <NeuralNetworks.h>
-
-#include <memory>
-
-#include "exec/Execution.h"
-
-struct ANeuralNetworksExecution
-{
-public:
- ANeuralNetworksExecution(const std::shared_ptr<neurun::exec::IExecutor> &executor)
- : _execution{std::make_shared<neurun::exec::Execution>(executor)}
- {
- // DO NOTHING
- }
-
-public:
- bool setInput(uint32_t index, const ANeuralNetworksOperandType *type, const void *buffer,
- size_t length) noexcept;
- bool setOutput(uint32_t index, const ANeuralNetworksOperandType *type, void *buffer,
- size_t length) noexcept;
- bool startExecute(void) noexcept;
- bool execute(void) noexcept;
-
- const neurun::ir::OperandIndex getInputOperandIndex(int32_t index) noexcept;
- const neurun::ir::OperandIndex getOutputOperandIndex(int32_t index) noexcept;
- bool compareDataType(const ANeuralNetworksOperandType *type,
- const neurun::ir::OperandIndex index) noexcept;
- bool compareShape(const ANeuralNetworksOperandType *type,
- const neurun::ir::OperandIndex index) noexcept;
- bool haveUnspecifiedDims(const neurun::ir::OperandIndex index) noexcept;
- size_t getOperandSize(const neurun::ir::OperandIndex index) noexcept;
- const std::shared_ptr<neurun::exec::Execution> instance(void) noexcept;
-
- /**
- * @brief Get output operand's rank
- * @param[in] index Output index
- * @param[out] rank Output operand's rank
- * @return @c true if success to get rank, otherwise @c false
- */
- bool getOutputOperandRank(uint32_t index, uint32_t *rank) noexcept;
- /**
- * @brief Get dimensions of the output operand
- * @param[in] index Output index
- * @param[out] dimensions Output operand's dimensions
- * @return @c true if success to get rank, otherwise @c false
- * @note This must be called after execution is finished to get resolved output shape
- * unspecified in model
- */
- bool getOutputOperandDimensions(uint32_t index, uint32_t *dimensions);
-
-private:
- std::shared_ptr<neurun::exec::Execution> _execution;
-};
-
-#endif
diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksModel.cc b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
deleted file mode 100644
index 5542a2e83..000000000
--- a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ANeuralNetworksModel.h"
-#include "OperationFactory.h"
-#include "NNAPIConvert.h"
-
-#include "ir/Operations.Include.h"
-#include "util/logging.h"
-
-#include "cpp14/memory.h"
-
-//
-// ANeuralNetworksModel
-//
-ANeuralNetworksModel::ANeuralNetworksModel() noexcept : _optional_operands{}, _operand_usages{}
-{
- _graph = std::make_shared<neurun::ir::Graph>();
-}
-
-bool ANeuralNetworksModel::addOperand(const ANeuralNetworksOperandType *type) noexcept
-{
- try
- {
- const auto shape = NNAPIConvert::getShape(type);
- const auto typeInfo = NNAPIConvert::getTypeInfo(type);
- _graph->addOperand(shape, typeInfo);
- _operand_usages.emplace_back(OperandUsage::NOT_DEFINED);
- }
- catch (const std::exception &e)
- {
- VERBOSE(EXCEPTION) << e.what() << std::endl;
-
- return false;
- }
-
- return true;
-}
-
-bool ANeuralNetworksModel::setOperandValue(uint32_t index, const void *buffer, size_t length,
- bool optional, bool copy) noexcept
-{
- const neurun::ir::OperandIndex ind{index};
-
- try
- {
- _operand_usages[index] = OperandUsage::CONSTANT;
-
- // Remain operands.at(ind).data()->base() as nullptr for optional operand
- // This will be filled when model finished
- if (optional)
- {
- setOptionalOperand(ind);
- }
-
- using neurun::ir::CachedData;
- using neurun::ir::ExternalData;
- if (copy)
- {
- _graph->operands().at(ind).data(
- nnfw::cpp14::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(buffer), length));
- }
- else
- {
- _graph->operands().at(ind).data(nnfw::cpp14::make_unique<ExternalData>(
- reinterpret_cast<const uint8_t *>(buffer), length));
- }
- }
- catch (const std::exception &e)
- {
- VERBOSE(EXCEPTION) << e.what() << std::endl;
-
- return false;
- }
-
- return true;
-}
-
-bool ANeuralNetworksModel::addOperation(ANeuralNetworksOperationType type, uint32_t inputCount,
- const uint32_t *inputs, uint32_t outputCount,
- const uint32_t *outputs) noexcept
-{
- try
- {
- for (uint32_t i = 0; i < outputCount; i++)
- {
- _operand_usages[outputs[i]] = OperandUsage::OPERATION_OUTPUT;
- }
-
- auto &factory = OperationFactory::get();
- OperationFactory::Param param{inputCount, inputs, outputCount, outputs};
-
- auto node = factory.create(type, param, _graph->operands());
- _graph->addOperation(std::unique_ptr<neurun::ir::Operation>{node});
-
- // TODO Move these codes to delegate.cpp
- if (type == ANEURALNETWORKS_FULLY_CONNECTED)
- {
- const auto &input_operand =
- _graph->operands().at(node->getInputs().at(neurun::ir::operation::FullyConnected::INPUT));
- auto &weights_operand = _graph->operands().at(
- node->getInputs().at(neurun::ir::operation::FullyConnected::WEIGHT));
- if (input_operand.typeInfo().type() == neurun::ir::DataType::FLOAT32 &&
- weights_operand.typeInfo().type() == neurun::ir::DataType::QUANT8_ASYMM)
- {
- weights_operand.type(neurun::ir::DataType::QUANT8_SYMM);
- }
- }
- }
- catch (const std::exception &e)
- {
- VERBOSE(EXCEPTION) << e.what() << std::endl;
-
- return false;
- }
-
- return true;
-}
-
-bool ANeuralNetworksModel::addOperationEx(ANeuralNetworksOperationTypeEx type, uint32_t inputCount,
- const uint32_t *inputs, uint32_t outputCount,
- const uint32_t *outputs) noexcept
-{
- try
- {
- for (uint32_t i = 0; i < outputCount; i++)
- {
- _operand_usages[outputs[i]] = OperandUsage::OPERATION_OUTPUT;
- }
-
- auto &factory = OperationFactory::get();
- OperationFactory::Param param{inputCount, inputs, outputCount, outputs};
-
- auto node = factory.create(type, param, _graph->operands());
- _graph->addOperation(std::unique_ptr<neurun::ir::Operation>{node});
- }
- catch (const std::exception &e)
- {
- return false;
- }
- return true;
-}
-
-bool ANeuralNetworksModel::addModelInput(uint32_t index) noexcept
-{
- try
- {
- _operand_usages[index] = OperandUsage::MODEL_INPUT;
-
- const neurun::ir::OperandIndex ind{index};
- _graph->addInput(ind);
- }
- catch (const std::exception &e)
- {
- VERBOSE(EXCEPTION) << e.what() << std::endl;
-
- return false;
- }
-
- return true;
-}
-bool ANeuralNetworksModel::addModelOutput(uint32_t index) noexcept
-{
- try
- {
- const neurun::ir::OperandIndex ind{index};
-
- // Duplicated output is not allowed
- if (_graph->getOutputs().contains(ind))
- {
- return false;
- }
-
- _graph->addOutput(ind);
- }
- catch (const std::exception &e)
- {
- VERBOSE(EXCEPTION) << e.what() << std::endl;
-
- return false;
- }
-
- return true;
-}
-
-bool ANeuralNetworksModel::finish() noexcept
-{
- try
- {
- fillOptionalOperand();
-
- _graph->finishBuilding();
-
- _operand_usages.clear();
- }
- catch (const std::exception &e)
- {
- VERBOSE(EXCEPTION) << e.what() << '\n';
-
- return false;
- }
-
- return true;
-}
-
-bool ANeuralNetworksModel::isFinished() noexcept { return !_graph->isBuildingPhase(); }
-
-bool ANeuralNetworksModel::isExistOperand(uint32_t index) noexcept
-{
- return _graph->operands().exist(neurun::ir::OperandIndex{index});
-}
-
-size_t ANeuralNetworksModel::operandSize(uint32_t index) noexcept
-{
- try
- {
- return _graph->operands().at(neurun::ir::OperandIndex{index}).operandSize();
- }
- catch (const std::exception &e)
- {
- VERBOSE(EXCEPTION) << e.what() << '\n';
-
- return 0;
- }
-}
-
-bool ANeuralNetworksModel::isUsageSet(uint32_t index) noexcept
-{
- return (_operand_usages[index] != OperandUsage::NOT_DEFINED);
-}
-
-bool ANeuralNetworksModel::isOperationOutput(uint32_t index) noexcept
-{
- return (_operand_usages[index] == OperandUsage::OPERATION_OUTPUT);
-}
-
-void ANeuralNetworksModel::setOptionalOperand(const neurun::ir::OperandIndex idx)
-{
- _optional_operands.insert(idx);
-}
-
-void ANeuralNetworksModel::fillOptionalOperand(void)
-{
- _graph->operations().iterate(
- [&](const neurun::ir::OperationIndex &, neurun::ir::Operation &node) {
- for (auto input : node.getInputs())
- {
- // TODO fill default value for optional operands
- if (_optional_operands.find(input) != _optional_operands.end())
- {
- throw std::runtime_error{"Optional operand is not supported yet"};
- }
- }
- });
-}
diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksModel.h b/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksModel.h
deleted file mode 100644
index d364ee39e..000000000
--- a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksModel.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __MODEL_H__
-#define __MODEL_H__
-
-#include <unordered_set>
-#include <NeuralNetworks.h>
-#include <NeuralNetworksEx.h>
-
-#include "ir/Graph.h"
-
-struct ANeuralNetworksModel
-{
-public:
- enum class OperandUsage
- {
- NOT_DEFINED = 0,
- MODEL_INPUT,
- CONSTANT,
- OPERATION_OUTPUT,
- };
-
-public:
- ANeuralNetworksModel() noexcept;
-
-public:
- bool addOperand(const ANeuralNetworksOperandType *type) noexcept;
- bool setOperandValue(uint32_t index, const void *buffer, size_t length, bool optional = false,
- bool copy = false) noexcept;
- bool addOperation(ANeuralNetworksOperationType type, uint32_t inputCount, const uint32_t *inputs,
- uint32_t outputCount, const uint32_t *outputs) noexcept;
- bool addOperationEx(ANeuralNetworksOperationTypeEx type, uint32_t inputCount,
- const uint32_t *inputs, uint32_t outputCount,
- const uint32_t *outputs) noexcept;
- bool addModelInput(uint32_t index) noexcept;
- bool addModelOutput(uint32_t index) noexcept;
- bool finish() noexcept;
-
- neurun::ir::Graph &deref(void) { return *_graph; }
- bool isFinished() noexcept;
- bool isExistOperand(uint32_t index) noexcept;
- size_t operandSize(uint32_t index) noexcept;
- bool isUsageSet(uint32_t index) noexcept;
- bool isOperationOutput(uint32_t index) noexcept;
- void release(std::shared_ptr<neurun::ir::Graph> &graph) { graph = _graph; }
-
-private:
- void setOptionalOperand(const neurun::ir::OperandIndex idx);
- void fillOptionalOperand(void);
-
-private:
- std::shared_ptr<neurun::ir::Graph> _graph;
- std::unordered_set<neurun::ir::OperandIndex> _optional_operands;
- std::vector<OperandUsage> _operand_usages;
-};
-
-#endif // __MODEL_H__
diff --git a/runtime/neurun/frontend/nnapi/wrapper/NNAPIConvert.cc b/runtime/neurun/frontend/nnapi/wrapper/NNAPIConvert.cc
deleted file mode 100644
index 79589be75..000000000
--- a/runtime/neurun/frontend/nnapi/wrapper/NNAPIConvert.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "NNAPIConvert.h"
-
-#include <numeric>
-
-using namespace neurun::ir;
-
-DataType NNAPIConvert::getDataType(OperandCode type)
-{
- switch (type)
- {
- case ANEURALNETWORKS_FLOAT32:
- case ANEURALNETWORKS_TENSOR_FLOAT32:
- return DataType::FLOAT32;
- case ANEURALNETWORKS_INT32:
- case ANEURALNETWORKS_TENSOR_INT32:
- return DataType::INT32;
- case ANEURALNETWORKS_UINT32:
- return DataType::UINT32;
- case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
- return DataType::QUANT8_ASYMM;
- case ANEURALNETWORKS_TENSOR_QUANT8_SYMM:
- return DataType::QUANT8_SYMM;
- case ANEURALNETWORKS_BOOL:
- case ANEURALNETWORKS_TENSOR_BOOL8:
- return DataType::BOOL8;
- default:
- throw std::runtime_error("Unsupported type");
- }
-}
-
-TypeInfo NNAPIConvert::getTypeInfo(const ANeuralNetworksOperandType *type)
-{
- return TypeInfo(getDataType((OperandCode)(type->type)), type->scale, type->zeroPoint);
-}
-
-Shape NNAPIConvert::getShape(const ANeuralNetworksOperandType *type)
-{
- Shape shape(type->dimensionCount);
-
- for (uint32_t axis = 0; axis < type->dimensionCount; ++axis)
- {
- shape.dim(axis) = type->dimensions[axis];
- }
-
- return shape;
-}
-
-size_t NNAPIConvert::calculateSizeFromType(const ANeuralNetworksOperandType *type)
-{
- auto shape = getShape(type);
- auto data_type = getDataType((OperandCode)(type->type));
-
- return shape.num_elements() * sizeOfDataType(data_type);
-}
-
-Activation NNAPIConvert::getFusedActivation(FuseCode act)
-{
- switch (act)
- {
- case ANEURALNETWORKS_FUSED_NONE:
- return Activation::NONE;
- case ANEURALNETWORKS_FUSED_RELU:
- return Activation::RELU;
- case ANEURALNETWORKS_FUSED_RELU1:
- return Activation::RELU1;
- case ANEURALNETWORKS_FUSED_RELU6:
- return Activation::RELU6;
- default:
- throw std::runtime_error("Unsupported activation type");
- }
-}
-
-PaddingType NNAPIConvert::getPaddingType(PaddingCode type)
-{
- switch (type)
- {
- case ANEURALNETWORKS_PADDING_SAME:
- return PaddingType::SAME;
- case ANEURALNETWORKS_PADDING_VALID:
- return PaddingType::VALID;
- default:
- throw std::runtime_error("Unsupported type");
- }
-}
diff --git a/runtime/neurun/frontend/nnapi/wrapper/NNAPIConvert.h b/runtime/neurun/frontend/nnapi/wrapper/NNAPIConvert.h
deleted file mode 100644
index 91f84b983..000000000
--- a/runtime/neurun/frontend/nnapi/wrapper/NNAPIConvert.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file NNAPIConvert.h
- * @brief This file contains convereter(s)\n
- * from NNAPI frontend's struct to neurun's internal struct
- */
-#ifndef __NEURUN_NNAPI_CONVERT_H__
-#define __NEURUN_NNAPI_CONVERT_H__
-
-#include <NeuralNetworks.h>
-
-#include <ir/TypeInfo.h>
-#include <ir/Shape.h>
-#include <ir/InternalType.h>
-
-class NNAPIConvert
-{
-
-public:
- /**
- * @brief Convert data type from NNAPI to internal data type
- * @param[in] type NNAPI's data type
- * @return neurun's internal data type
- */
- static neurun::ir::DataType getDataType(OperandCode type);
-
- /**
- * @brief Convert operand type info from NNAPI to interanl operand type info
- * @param[in] type NNAPI's operand type
- * @return neurun's internal operand type info
- */
- static neurun::ir::TypeInfo getTypeInfo(const ANeuralNetworksOperandType *type);
-
- /**
- * @brief Convert operand shape info from NNAPI to internal operand shape
- * @param[in] type NNAPI's operand type
- * @return neurun's internal operand shape
- */
- static neurun::ir::Shape getShape(const ANeuralNetworksOperandType *type);
-
- /**
- * @brief Calcaulate operand size from NNAPI type
- * @param[in] type NNAPI's operand type
- * @return Operand size
- */
- static size_t calculateSizeFromType(const ANeuralNetworksOperandType *type);
-
- /**
- * @brief Convert NNAPI FuseCode to internal activation type
- * @param[in] act NNAPI's FuseCode type
- * @return neurun's internal activation type
- */
- static neurun::ir::Activation getFusedActivation(FuseCode act);
-
- /**
- * @brief Convert NNAPI PaddingCode to internal padding type
- * @param[in] type NNAPI's PaddingCode type
- * @return neurun's internal padding type
- */
- static neurun::ir::PaddingType getPaddingType(PaddingCode type);
-};
-
-#endif // __NEURUN_NNAPI_CONVERT_H__
diff --git a/runtime/neurun/frontend/nnapi/wrapper/OperationFactory.cc b/runtime/neurun/frontend/nnapi/wrapper/OperationFactory.cc
deleted file mode 100644
index 84f876e86..000000000
--- a/runtime/neurun/frontend/nnapi/wrapper/OperationFactory.cc
+++ /dev/null
@@ -1,1680 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationFactory.h"
-#include "NNAPIConvert.h"
-
-#include <ir/Operations.Include.h>
-#include <string.h>
-
-namespace
-{
-using namespace neurun::ir;
-
-void replaceDataType(Operands &operands, const OperandIndex &index, const DataType type)
-{
- assert(operands.exist(index));
- operands.at(index).type(type);
-}
-
-ExplicitPadding makeExplicitPadding(Operands &operands, const OperandIndex &left_index,
- const OperandIndex &right_index, const OperandIndex &top_index,
- const OperandIndex &bottom_index)
-{
- auto left = operands.at(left_index).asScalar<int32_t>();
- auto right = operands.at(right_index).asScalar<int32_t>();
- auto top = operands.at(top_index).asScalar<int32_t>();
- auto bottom = operands.at(bottom_index).asScalar<int32_t>();
-
- if (left < 0 || right < 0 || top < 0 || bottom < 0)
- {
- throw std::runtime_error{"Cannot handle negative explicit padding value"};
- }
-
- ExplicitPadding param;
- param.left = static_cast<uint32_t>(left);
- param.right = static_cast<uint32_t>(right);
- param.top = static_cast<uint32_t>(top);
- param.bottom = static_cast<uint32_t>(bottom);
-
- return param;
-}
-
-Stride makeStride(Operands &operands, const OperandIndex &horizontal_index,
- const OperandIndex &vertical_index)
-{
- auto horizontal = operands.at(horizontal_index).asScalar<int32_t>();
- auto vertical = operands.at(vertical_index).asScalar<int32_t>();
-
- if (vertical < 0 || horizontal < 0)
- {
- throw std::runtime_error{"Cannot handle negative stride value"};
- }
-
- Stride stride;
- stride.horizontal = static_cast<uint32_t>(horizontal);
- stride.vertical = static_cast<uint32_t>(vertical);
-
- return stride;
-}
-
-uint32_t getUint32Scalar(Operands &operands, const OperandIndex index)
-{
- auto int32_value = operands.at(index).asScalar<int32_t>();
- if (int32_value < 0)
- {
- throw std::runtime_error{"Cannot handle negative value"};
- }
-
- return static_cast<uint32_t>(int32_value);
-}
-
-} // namespace
-
-OperationFactory &OperationFactory::get()
-{
- static OperationFactory factory;
- return factory;
-}
-
-OperationFactory::OperationFactory()
-{
- _map[ANEURALNETWORKS_BATCH_TO_SPACE_ND] = [](const OperationFactory::Param &init_param,
- Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- // 1 -> Block size Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- return new operation::BatchToSpaceND{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_DEPTHWISE_CONV_2D] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert((init_param.input_count == 8 || init_param.input_count == 11) &&
- init_param.output_count == 1);
-
- // In common
- // 0 -> IFM Tensor Index
- // 1 -> Kernel Tensor Index
- // 2 -> Bias Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- operation::DepthwiseConv2D::Param param;
- if (init_param.input_count == 8)
- {
- // Imlicit Padding case
- // Each input should be interpreted as follows:
- //
- // 3 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
- // 4 -> Stride (width) Index
- // 5 -> Stride (height) INdex
- // 6 -> Depthwise multiplier
- // 7 -> Activation Index
-
- const auto padding_index = OperandIndex{init_param.inputs[3]};
- const auto hstride_index = OperandIndex{init_param.inputs[4]};
- const auto vstride_index = OperandIndex{init_param.inputs[5]};
- const auto multiplier_index = OperandIndex{init_param.inputs[6]};
- const auto activation_index = OperandIndex{init_param.inputs[7]};
-
- param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.multiplier = getUint32Scalar(operands, multiplier_index);
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
- else
- {
- // Explicit Padding case
- // Each input should be interpreted as follows:
- //
- // 3 -> Padding On the Left
- // 4 -> Padding On the Right
- // 5 -> Padding On the Top
- // 6 -> Padding On the Bottom
- // 7 -> Stride (width) Index
- // 8 -> Stride (height) Index
- // 9 -> Depthwise multiplier
- // 10-> Activation Index
-
- const auto padding_left_index = OperandIndex{init_param.inputs[3]};
- const auto padding_right_index = OperandIndex{init_param.inputs[4]};
- const auto padding_top_index = OperandIndex{init_param.inputs[5]};
- const auto padding_bottom_index = OperandIndex{init_param.inputs[6]};
- const auto hstride_index = OperandIndex{init_param.inputs[7]};
- const auto vstride_index = OperandIndex{init_param.inputs[8]};
- const auto multiplier_index = OperandIndex{init_param.inputs[9]};
- const auto activation_index = OperandIndex{init_param.inputs[10]};
-
- param.padding.type = PaddingType::EXPLICIT;
- param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
- padding_top_index, padding_bottom_index);
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.multiplier = getUint32Scalar(operands, multiplier_index);
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
-
- return new operation::DepthwiseConv2D{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_MAX_POOL_2D] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 7 || init_param.input_count == 10);
- assert(init_param.output_count == 1);
-
- // In common
- // 0 -> IFM Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- operation::MaxPool2D::Param param;
- if (init_param.input_count == 7) // support implicit padding
- {
- // Each input should be interpreted as follows:
- //
- // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
- // 2 -> Horizontal (over width) Stride Index
- // 3 -> Vertial (over height) Stride Index
- // 4 -> Filter Width Index
- // 5 -> Filter Height Index
- // 6 -> FuseCode (activation) Index
-
- const auto padding_index = OperandIndex{init_param.inputs[1]};
- const auto hstride_index = OperandIndex{init_param.inputs[2]};
- const auto vstride_index = OperandIndex{init_param.inputs[3]};
- const auto kw_index = OperandIndex{init_param.inputs[4]};
- const auto kh_index = OperandIndex{init_param.inputs[5]};
- const auto activation_index = OperandIndex{init_param.inputs[6]};
-
- param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.kw = getUint32Scalar(operands, kw_index);
- param.kh = operands.at(kh_index).asScalar<uint32_t>();
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
- else if (init_param.input_count == 10) // support explicit padding
- {
- // Each input should be interpreted as follows:
- //
- // 1 -> Padding_left index
- // 2 -> Padding_right index
- // 3 -> Padding_top index
- // 4 -> Padding_bottom index
- // 5 -> Horizontal (over width) Stride Index
- // 6 -> Vertial (over height) Stride Index
- // 7 -> Filter Width Index
- // 8 -> Filter Height Index
- // 9 -> FuseCode (activation) Index
-
- const auto padding_left_index = OperandIndex{init_param.inputs[1]};
- const auto padding_right_index = OperandIndex{init_param.inputs[2]};
- const auto padding_top_index = OperandIndex{init_param.inputs[3]};
- const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
- const auto hstride_index = OperandIndex{init_param.inputs[5]};
- const auto vstride_index = OperandIndex{init_param.inputs[6]};
- const auto kw_index = OperandIndex{init_param.inputs[7]};
- const auto kh_index = OperandIndex{init_param.inputs[8]};
- const auto activation_index = OperandIndex{init_param.inputs[9]};
-
- param.padding.type = PaddingType::EXPLICIT;
- param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
- padding_top_index, padding_bottom_index);
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.kw = getUint32Scalar(operands, kw_index);
- param.kh = getUint32Scalar(operands, kh_index);
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
- return new operation::MaxPool2D{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_AVERAGE_POOL_2D] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- // TODO We may reuse code here for MAX_POOL_2D. Seems like these two are identical
- assert(init_param.input_count == 7 || init_param.input_count == 10);
- assert(init_param.output_count == 1);
-
- // In common
- // 0 -> IFM Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- operation::AvgPool2D::Param param;
- if (init_param.input_count == 7) // support implicit padding
- {
- // Each input should be interpreted as follows:
- //
- // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
- // 2 -> Horizontal (over width) Stride Index
- // 3 -> Vertial (over height) Stride Index
- // 4 -> Filter Width Index
- // 5 -> Filter Height Index
- // 6 -> FuseCode (activation) Index
-
- const auto padding_index = OperandIndex{init_param.inputs[1]};
- const auto hstride_index = OperandIndex{init_param.inputs[2]};
- const auto vstride_index = OperandIndex{init_param.inputs[3]};
- const auto kw_index = OperandIndex{init_param.inputs[4]};
- const auto kh_index = OperandIndex{init_param.inputs[5]};
- const auto activation_index = OperandIndex{init_param.inputs[6]};
-
- param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.kw = getUint32Scalar(operands, kw_index);
- param.kh = getUint32Scalar(operands, kh_index);
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
- else if (init_param.input_count == 10) // support explicit padding
- {
- // Each input should be interpreted as follows:
- //
- // 1 -> Padding_left index
- // 2 -> Padding_right index
- // 3 -> Padding_top index
- // 4 -> Padding_bottom index
- // 5 -> Horizontal (over width) Stride Index
- // 6 -> Vertial (over height) Stride Index
- // 7 -> Filter Width Index
- // 8 -> Filter Height Index
- // 9 -> FuseCode (activation) Index
-
- const auto padding_left_index = OperandIndex{init_param.inputs[1]};
- const auto padding_right_index = OperandIndex{init_param.inputs[2]};
- const auto padding_top_index = OperandIndex{init_param.inputs[3]};
- const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
- const auto hstride_index = OperandIndex{init_param.inputs[5]};
- const auto vstride_index = OperandIndex{init_param.inputs[6]};
- const auto kw_index = OperandIndex{init_param.inputs[7]};
- const auto kh_index = OperandIndex{init_param.inputs[8]};
- const auto activation_index = OperandIndex{init_param.inputs[9]};
-
- param.padding.type = PaddingType::EXPLICIT;
- param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
- padding_top_index, padding_bottom_index);
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.kw = getUint32Scalar(operands, kw_index);
- param.kh = getUint32Scalar(operands, kh_index);
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
-
- return new operation::AvgPool2D{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_CONCATENATION] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count >= 2); // At least one one input tensor and axis
- assert(init_param.output_count == 1);
-
- // When there are N + 1 inputs, each input should be interpreted as follows:
- //
- // [0, N) -> Input tensors
- // N -> Axis
- //
-
- OperandIndexSequence inputs;
- for (uint32_t n = 0; n < init_param.input_count - 1; ++n)
- {
- inputs.append(OperandIndex{init_param.inputs[n]});
- }
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- operation::Concat::Param param;
- const OperandIndex axis_index{init_param.inputs[init_param.input_count - 1]};
- param.axis = operands.at(axis_index).asScalar<int32_t>();
- param.rank = operands.at(outputs.at(0)).shape().rank();
-
- return new operation::Concat{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_RESHAPE] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- // Each input should be interpreted as follows:
- //
- // 0 -> A tensor, specifying the tensor to be reshaped.
- // 1 -> A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32, defining the shape of the output
- // tensor
-
- // TODO Second input should be shape tensor (init_param.inputs[1])
- // Currently unused since assume that it is same with output tensor size
- OperandIndexSequence inputs{init_param.inputs[0] /* , init_param.inputs[1] */};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- return new operation::Reshape{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_FULLY_CONNECTED] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 4 && init_param.output_count == 1);
-
- // Each input should be interpreted as follows:
- //
- // 0 -> A tensor, specifying the input.
- // 1 -> A 2-D tensor, specifying the weights
- // 2 -> A 1-D tensor, specifying the bias
- // 3 -> An INT32 value, and has to be one of the FuseCode values
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- operation::FullyConnected::Param param;
- const auto activation_index = OperandIndex{init_param.inputs[3]};
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
- return new operation::FullyConnected{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_SOFTMAX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- // Each input should be interpreted as follows:
- //
- // 0 -> A 2-D or 4-D tensor, specifying the tensor to be reshaped.
- // 1 -> FLOAT32 value, specifying the positive scaling factor for the exponent, beta.
-
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- const auto beta_index = OperandIndex{init_param.inputs[1]};
-
- operation::Softmax::Param param;
- param.beta = operands.at(beta_index).asScalar<float>();
-
- return new operation::Softmax{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_CAST] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- // NNAPI uses QUANT8_ASYMM to represent UINT8 type for ANEURALNETWORKS_CAST's input/output
- if (operands.at(inputs.at(0)).typeInfo().type() == DataType::QUANT8_ASYMM)
- {
- replaceDataType(operands, inputs.at(0), DataType::UINT8);
- }
- if (operands.at(outputs.at(0)).typeInfo().type() == DataType::QUANT8_ASYMM)
- {
- replaceDataType(operands, outputs.at(0), DataType::UINT8);
- }
-
- return new operation::Cast{inputs, outputs};
- };
-
- // ANEURALNETWORKS_CAST_EX is deprecated
- // TODO Remove ANEURALNETWORKS_CAST_EX
- _map[ANEURALNETWORKS_CAST_EX] = _map[ANEURALNETWORKS_CAST];
-
- _map[ANEURALNETWORKS_CONV_2D] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- using operation::Conv2D;
-
- // inputCount is either 7 or 10 acccording to NN API specification.
- // - Padding is implicit when inputCount is 7
- // - Padding is explicit when inputCount is 10
- assert(init_param.input_count == 7 || init_param.input_count == 10);
- assert(init_param.output_count == 1);
-
- // 0 -> IFM Tensor Index
- // 1 -> Kernel Tensor Index
- // 2 -> Bias Tensor Index
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- Conv2D::Param param;
-
- if (init_param.input_count == 7) // support implicit padding
- {
- // Each input should be interpreted as follows:
- //
- // 3 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
- // 4 -> Stride (width) Index
- // 5 -> Stride (height) INdex
- // 6 -> Activation Index
-
- const auto padding_index = OperandIndex{init_param.inputs[3]};
- const auto hstride_index = OperandIndex{init_param.inputs[4]};
- const auto vstride_index = OperandIndex{init_param.inputs[5]};
- const auto activation_index = OperandIndex{init_param.inputs[6]};
-
- param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
- else if (init_param.input_count == 10) // support explicit padding
- {
- // Each input should be interpreted as follows:
- //
- // 3 -> Padding_left index
- // 4 -> Padding_right index
- // 5 -> Padding_top index
- // 6 -> Padding_bottom index
- // 7 -> Stride (width) Index
- // 8 -> Stride (height) INdex
- // 9 -> Activation Index
-
- const auto padding_left_index = OperandIndex{init_param.inputs[3]};
- const auto padding_right_index = OperandIndex{init_param.inputs[4]};
- const auto padding_top_index = OperandIndex{init_param.inputs[5]};
- const auto padding_bottom_index = OperandIndex{init_param.inputs[6]};
- const auto hstride_index = OperandIndex{init_param.inputs[7]};
- const auto vstride_index = OperandIndex{init_param.inputs[8]};
- const auto activation_index = OperandIndex{init_param.inputs[9]};
-
- param.padding.type = PaddingType::EXPLICIT;
- param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
- padding_top_index, padding_bottom_index);
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
-
- return new Conv2D{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_ADD] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 3);
- assert(init_param.output_count == 1);
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Lefthand side operand
- // 1 -> Righthand side operand
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- operation::Add::Param param;
-
- const auto activation_index = OperandIndex{init_param.inputs[2]};
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
- return new operation::Add{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_REDUCE_SUM_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 3);
- assert(init_param.output_count == 1);
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- // 1 -> Reduced Axes Tensor Index
- // 2 -> keep_dims Index
-
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
- std::vector<std::int32_t> axes =
- operands.at(OperandIndex{init_param.inputs[1]}).asVector<std::int32_t>();
-
- operation::ReduceSum::Param param;
- param.axes.assign(axes.cbegin(), axes.cend());
- param.keep_dims = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<int32_t>() != 0;
- param.rank = operands.at(inputs.at(0)).shape().rank();
-
- return new operation::ReduceSum{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_SUB] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 3);
- assert(init_param.output_count == 1);
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Lefthand side operand
- // 1 -> Righthand side operand
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- operation::Sub::Param param;
-
- const auto activation_index = OperandIndex{init_param.inputs[2]};
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
- return new operation::Sub{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_SLICE] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 3 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- // 1 -> Begins Tensor Index
- // 2 -> Sizes Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]};
-
- operation::Slice::Param param;
- param.rank = operands.at(inputs.at(0)).shape().rank();
-
- return new operation::Slice{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_STRIDED_SLICE] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 7 && init_param.output_count == 1);
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2],
- init_param.inputs[3]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 1 -> A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the starts of
- // the dimensions of the input tensor to be sliced. The length must be
- // of rank(input0).
- // 2 -> A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the ends of
- // the dimensions of the input tensor to be sliced. The length must be
- // of rank(input0).
- // 3 -> A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the strides of
- // the dimensions of the input tensor to be sliced. The length must be
- // of rank(input0).
- // 4 -> An {@link ANEURALNETWORKS_INT32} scalar, begin_mask. If the ith bit
- // of begin_mask is set, begin[i] is ignored and the fullest possible
- // range in that dimension is used instead.
- // 5 -> An {@link ANEURALNETWORKS_INT32} scalar, end_mask. If the ith bit of
- // end_mask is set, end[i] is ignored and the fullest possible range in
- // that dimension is used instead.
- // 6 -> An {@link ANEURALNETWORKS_INT32} scalar, shrink_axis_mask. An int32
- // mask. If the ith bit of shrink_axis_mask is set, it implies that the
- // ith specification shrinks the dimensionality by 1. A slice of size 1
- // starting from begin[i] in the dimension must be preserved.
-
- operation::StridedSlice::Param param;
-
- param.begin_mask = operands.at(OperandIndex{init_param.inputs[4]}).asScalar<std::int32_t>();
- param.end_mask = operands.at(OperandIndex{init_param.inputs[5]}).asScalar<std::int32_t>();
- param.shrink_axis_mask =
- operands.at(OperandIndex{init_param.inputs[6]}).asScalar<std::int32_t>();
- param.rank = operands.at(inputs.at(0)).shape().rank();
-
- return new operation::StridedSlice{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_TRANSPOSE] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- // TODO make this work with init_param.input_count == 1 (when permutation vector is optional)
-
- // Inputs
- // 0: An n-D tensor, specifying the tensor to be transposed.
- // 1: An optional 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32},
- // the permutation of the dimensions of the input tensor.
- // The returned tensor's dimension i corresponds to the input dimension
- // perm[i]. If perm is not given, it is set to (n-1...0), where n is the
- // rank of the input tensor. Hence by default, this operation performs a
- // regular matrix transpose on 2-D input Tensors.
- assert(init_param.input_count == 2);
- assert(init_param.output_count == 1);
-
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
- std::vector<std::int32_t> perm =
- operands.at(OperandIndex{init_param.inputs[1]}).asVector<std::int32_t>();
-
- operation::Transpose::Param param;
- param.perm.assign(perm.cbegin(), perm.cend());
- param.rank = operands.at(inputs.at(0)).shape().rank();
-
- return new operation::Transpose{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_MUL] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 3 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> LHS Tensor Index
- // 1 -> RHS Tensor Index
- // 2 -> Activation Index
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Mul::Param param;
-
- const auto activation_index = OperandIndex{init_param.inputs[2]};
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
- return new operation::Mul{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_SQUEEZE] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 1 || init_param.input_count == 2);
- assert(init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> An n-D tensor, the tensor to be squeezed.
- // 1 -> An optional 1-D tensor of ANEURALNETWORKS_TENSOR_INT32. The dimensions to squeeze.
- // If specified only squeezes the dimensions listed. Otherwise, squeezes all dimensions.
- // The dimension index starts at 0. An error must be reported if squeezing a dimension that
- // is not 1.
-
- // Add mandatory input index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- // Add dims index if specified
- operation::Squeeze::Param param{};
- if (init_param.input_count == 2)
- {
- auto squeeze_dims_idx = OperandIndex{init_param.inputs[1]};
- assert(operands.at(squeeze_dims_idx).shape().rank() == 1);
- assert(operands.at(squeeze_dims_idx).shape().dim(0) >= 0);
- assert(static_cast<uint32_t>(operands.at(squeeze_dims_idx).shape().dim(0)) <=
- sizeof(param.dims));
- param.ndim = operands.at(squeeze_dims_idx).shape().dim(0);
- if (param.ndim > 0)
- memcpy(param.dims, operands.at(squeeze_dims_idx).data().base(),
- param.ndim * sizeof(param.dims[0]));
- }
-
- return new operation::Squeeze{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_TANH] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::Tanh{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_LOGISTIC] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::Logistic{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_DIV] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 3 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> LHS Tensor Index
- // 1 -> RHS Tensor Index
- // 2 -> Activation Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Div::Param param;
-
- const auto activation_index = OperandIndex{init_param.inputs[2]};
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
- return new operation::Div{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_EXP] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::Exp{inputs, outputs};
- };
-
- // ANEURALNETWORKS_EXP_EX is deprecated
- // TODO Remove ANEURALNETWORKS_EXP_EX
- _map[ANEURALNETWORKS_EXP_EX] = _map[ANEURALNETWORKS_EXP];
-
- _map[ANEURALNETWORKS_GREATER_EQUAL_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::GreaterEqual;
-
- // Output operand type must be boolean
- replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_LESS_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::Less;
-
- // Output operand type must be boolean
- replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_REDUCE_MAX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 3 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- // 1 -> Axis Tensor Index
- // 2 -> keep_dims Index
- OperandIndexSequence inputs{init_param.inputs[0]};
- std::vector<std::int32_t> axes =
- operands.at(OperandIndex{init_param.inputs[1]}).asVector<std::int32_t>();
-
- operation::ReduceMax::Param param;
- param.axes.assign(axes.cbegin(), axes.cend());
- param.keep_dims = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<int8_t>() != 0;
- param.rank = operands.at(inputs.at(0)).shape().rank();
-
- return new operation::ReduceMax{inputs, outputs, param};
- };
-
- // ANEURALNETWORKS_REDUCE_MAX_EX is deprecated
- // TODO Remove ANEURALNETWORKS_REDUCE_MAX_EX
- _map[ANEURALNETWORKS_REDUCE_MAX_EX] = _map[ANEURALNETWORKS_REDUCE_MAX];
-
- _map[ANEURALNETWORKS_NOT_EQUAL_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input1 Tensor Index
- // 1 -> input2 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::NotEqual;
-
- // Output operand type must be boolean
- replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_LOGICAL_AND_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- // This operation's operands must be boolean type.
- replaceDataType(operands, inputs.at(0), DataType::BOOL8);
- replaceDataType(operands, inputs.at(1), DataType::BOOL8);
- replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
- return new operation::LogicalAnd{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_RSQRT] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::RSQRT{inputs, outputs};
- };
-
- // ANEURALNETWORKS_RSQRT_EX is deprecated
- // TODO Remove ANEURALNETWORKS_RSQRT_EX
- _map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT];
-
- _map[ANEURALNETWORKS_RELU] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::ReLU{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_RESIZE_BILINEAR] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 3 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> IFM Index
- // 1 -> Height Index
- // 2 -> Width Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- operation::ResizeBilinear::Param param;
- param.height_out = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<int32_t>();
- param.width_out = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<int32_t>();
-
- return new operation::ResizeBilinear{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_RELU1] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::ReLU1{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_RELU6] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::ReLU6{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_RNN] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 6 && init_param.output_count == 2);
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- // 1 -> Weights Tensor Index
- // 2 -> Recurrent Weights Tensor Index
- // 3 -> Bias Tensor Index
- // 4 -> Hidden state (in) Index
- // 5 -> Activation Index
-
- OperandIndexSequence inputs;
- for (uint32_t n = 0; n < init_param.input_count - 1; ++n)
- {
- inputs.append(OperandIndex{init_param.inputs[n]});
- }
- OperandIndexSequence outputs;
- for (uint32_t n = 0; n < init_param.output_count; ++n)
- {
- outputs.append(OperandIndex{init_param.outputs[n]});
- }
-
- operation::RNN::Param param;
- const auto activation_index = OperandIndex{init_param.inputs[5]};
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
- return new operation::RNN{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_FLOOR] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::Floor{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_SPACE_TO_BATCH_ND] = [](const OperationFactory::Param &init_param,
- Operands &) {
- assert(init_param.input_count == 3 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- // 1 -> Block size Index
- // 2 -> Paddings Index
- OperandIndexSequence inputs;
- for (uint32_t n = 0; n < init_param.input_count; ++n)
- {
- inputs.append(OperandIndex{init_param.inputs[n]});
- }
-
- return new operation::SpaceToBatchND{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_SPACE_TO_DEPTH] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- // 1 -> Block size Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- operation::SpaceToDepth::Param param;
- param.block_size = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
-
- return new operation::SpaceToDepth{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_L2_POOL_2D] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 10 || init_param.input_count == 7);
- assert(init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> IFM Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- operation::L2Pool2D::Param param;
-
- if (init_param.input_count == 7) // Imlicit Padding case
- {
- // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
- // 2 -> Horizontal (over width) Stride Index
- // 3 -> Vertial (over height) Stride Index
- // 4 -> Filter Width Index
- // 5 -> Filter Height Index
- // 6 -> FuseCode (activation) Index
- const auto padding_index = OperandIndex{init_param.inputs[1]};
- const auto hstride_index = OperandIndex{init_param.inputs[2]};
- const auto vstride_index = OperandIndex{init_param.inputs[3]};
- const auto kw_index = OperandIndex{init_param.inputs[4]};
- const auto kh_index = OperandIndex{init_param.inputs[5]};
- const auto activation_index = OperandIndex{init_param.inputs[6]};
-
- param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.kw = getUint32Scalar(operands, kw_index);
- param.kh = getUint32Scalar(operands, kh_index);
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
- else // Explicit Padding case
- {
- // 1 -> Padding_left index
- // 2 -> Padding_right index
- // 3 -> Padding_top index
- // 4 -> Padding_bottom index
- // 5 -> Horizontal (over width) Stride Index
- // 6 -> Vertial (over height) Stride Index
- // 7 -> Filter Width Index
- // 8 -> Filter Height Index
- // 9 -> FuseCode (activation) Index
- const auto padding_left_index = OperandIndex{init_param.inputs[1]};
- const auto padding_right_index = OperandIndex{init_param.inputs[2]};
- const auto padding_top_index = OperandIndex{init_param.inputs[3]};
- const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
- const auto hstride_index = OperandIndex{init_param.inputs[5]};
- const auto vstride_index = OperandIndex{init_param.inputs[6]};
- const auto kw_index = OperandIndex{init_param.inputs[7]};
- const auto kh_index = OperandIndex{init_param.inputs[8]};
- const auto activation_index = OperandIndex{init_param.inputs[9]};
-
- param.padding.type = PaddingType::EXPLICIT;
- param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
- padding_top_index, padding_bottom_index);
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.kw = getUint32Scalar(operands, kw_index);
- param.kh = getUint32Scalar(operands, kh_index);
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
-
- return new operation::L2Pool2D{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_EMBEDDING_LOOKUP] = [](const OperationFactory::Param &init_param,
- Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Lookups Index
- // 1 -> Values Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- return new operation::EmbeddingLookup{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_L2_NORMALIZATION] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- operation::L2Normalization::Param param;
- param.rank = operands.at(inputs.at(0)).shape().rank();
-
- return new operation::L2Normalization{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_HASHTABLE_LOOKUP] = [](const OperationFactory::Param &init_param,
- Operands &) {
- assert(init_param.input_count == 3 && init_param.output_count == 2);
-
- // Each output should be interpreted as follows:
- //
- // 0 -> Output Index
- // 1 -> Hits Index
- OperandIndexSequence outputs{init_param.outputs[0], init_param.outputs[1]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Lookups Index
- // 1 -> Keys Index
- // 2 -> Values Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]};
-
- return new operation::HashtableLookup{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_PRELU_EX] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input Tensor Index
- // 1 -> alpha Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- return new operation::PReLU{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_TRANSPOSE_CONV_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 6 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Output Shape Index
- // 1 -> Weights Index
- // 2 -> Input Tensor Index
- // 3 -> Padding Type
- // 4 -> Stride width
- // 5 -> Stride height
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]};
-
- operation::TransposeConv::Param param;
-
- const auto padding_index = OperandIndex{init_param.inputs[3]};
- const auto hstride_index = OperandIndex{init_param.inputs[4]};
- const auto vstride_index = OperandIndex{init_param.inputs[5]};
-
- param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
- param.stride = makeStride(operands, hstride_index, vstride_index);
-
- return new operation::TransposeConv{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_SQRT] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- // 0 -> input Tensor Index
-
- OperandIndexSequence inputs{init_param.inputs[0]};
- return new operation::SQRT{inputs, outputs};
- };
-
- // ANEURALNETWORKS_SQRT_EX is deprecated
- // TODO Remove ANEURALNETWORKS_SQRT_EX
- _map[ANEURALNETWORKS_SQRT_EX] = _map[ANEURALNETWORKS_SQRT];
-
- _map[ANEURALNETWORKS_LOGICAL_OR_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- // This operation's operands must be boolean type.
- replaceDataType(operands, inputs.at(0), DataType::BOOL8);
- replaceDataType(operands, inputs.at(1), DataType::BOOL8);
- replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
- return new operation::LogicalOr{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_LOGICAL_NOT_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- // This operation's operands must be boolean type.
- replaceDataType(operands, inputs.at(0), DataType::BOOL8);
- replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
- return new operation::LogicalNot{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_LSTM] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 23 && init_param.output_count == 4);
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- // 1 -> Input to Input Tensor Index
- // 2 -> Input to Forget Tensor Index
- // 3 -> Input to Cell Tensor Index
- // 4 -> Input to Output Tensor Index
- // 5 -> Recurrent to Input Weights Tensor Index
- // 6 -> Recurrent to Forget Weights Tensor Index
- // 7 -> Recurrent to Cell Weights Tensor Index
- // 8 -> Recurrent to Output Weights Tensor Index
- // 9 -> Cell to Input Weights Tensor Index
- // 10 -> Cell to Forget Weights Tensor Index
- // 11 -> Cell to Output Weights Tensor Index
- // 12 -> Input Gate Bias Tensor Index
- // 13 -> Forget Gate Bias Tensor Index
- // 14 -> Cell Bias Tensor Index
- // 15 -> Output Gate Bias Tensor Index
- // 16 -> Projection Weights Tensor Index
- // 17 -> Projection Bias Tensor Index
- // 18 -> Output State In Tensor Index
- // 19 -> Cell State In Tensor Index
- OperandIndexSequence inputs;
- for (uint32_t n = 0; n < init_param.input_count - 3; ++n)
- {
- inputs.append(OperandIndex{init_param.inputs[n]});
- }
-
- // Each output should be interpreted as follows:
- //
- // 0 -> Scratch Buffer Tensor Index
- // 1 -> Output State Out Tensor Index
- // 2 -> Cell State Out Tensor Index
- // 3 -> Output Tensor Index
- OperandIndexSequence outputs;
- for (uint32_t n = 0; n < init_param.output_count; ++n)
- {
- outputs.append(OperandIndex{init_param.outputs[n]});
- }
-
- operation::LSTM::Param param;
- const auto activation_index = OperandIndex{init_param.inputs[20]};
- switch (operands.at(activation_index).asScalar<int32_t>())
- {
- case 0:
- param.activation = Activation::NONE;
- break;
- case 1:
- param.activation = Activation::RELU;
- break;
- case 2:
- param.activation = Activation::RELU1;
- break;
- case 3:
- param.activation = Activation::RELU6;
- break;
- case 4:
- param.activation = Activation::TANH;
- break;
- case 6:
- param.activation = Activation::SIGMOID;
- break;
- default:
- throw std::runtime_error("Unsupported activation type");
- break;
- }
- param.cell_threshold = operands.at(OperandIndex{init_param.inputs[21]}).asScalar<float>();
- param.projection_threshold = operands.at(OperandIndex{init_param.inputs[22]}).asScalar<float>();
-
- return new operation::LSTM{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_EQUAL_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::Equal;
-
- // Output operand type must be boolean
- replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_SQUARED_DIFFERENCE_EX] = [](const OperationFactory::Param &init_param,
- Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> LHS Tensor Index
- // 1 -> RHS Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- return new operation::SquaredDifference{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_TOPK_V2] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 2 && init_param.output_count == 2);
-
- // Each output should be interpreted as follows:
- //
- // 0 -> Index for Output Values
- // 1 -> Index for Output Indices
- OperandIndexSequence outputs{init_param.outputs[0], init_param.outputs[1]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Index for Input Data
- // 1 -> Index for K
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- operation::TopKV2::Param param;
- param.k = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
-
- return new operation::TopKV2{inputs, outputs, param};
- };
-
- // ANEURALNETWORKS_CAST_EX is deprecated
- // TODO Remove ANEURALNETWORKS_CAST_EX
- _map[ANEURALNETWORKS_TOPK_V2_EX] = _map[ANEURALNETWORKS_TOPK_V2];
-
- _map[ANEURALNETWORKS_GATHER] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 3 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input Tensor Index
- // 1 -> axis Index
- // 2 -> indices Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[2]};
-
- operation::Gather::Param param;
- param.axis = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<int32_t>();
- param.rank = operands.at(inputs.at(0)).shape().rank();
-
- return new operation::Gather{inputs, outputs, param};
- };
-
- // ANEURALNETWORKS_GATHER_EX is deprecated
- // TODO Remove ANEURALNETWORKS_GATHER_EX
- _map[ANEURALNETWORKS_GATHER_EX] = _map[ANEURALNETWORKS_GATHER];
-
- _map[ANEURALNETWORKS_NEG] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::Neg{inputs, outputs};
- };
-
- // ANEURALNETWORKS_NEG_EX is deprecated
- // TODO Remove ANEURALNETWORKS_NEG_EX
- _map[ANEURALNETWORKS_NEG_EX] = _map[ANEURALNETWORKS_NEG];
-
- _map[ANEURALNETWORKS_ABS] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::Abs{inputs, outputs};
- };
-
- // ANEURALNETWORKS_ABS_EX is deprecated
- // TODO Remove ANEURALNETWORKS_ABS_EX
- _map[ANEURALNETWORKS_ABS_EX] = _map[ANEURALNETWORKS_ABS];
-
- _map[ANEURALNETWORKS_ARGMAX_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- // 1 -> Axis Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- operation::ArgMax::Param param;
- param.axis = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
- param.rank = operands.at(inputs.at(0)).shape().rank();
-
- return new operation::ArgMax{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_DEQUANTIZE] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::Dequantize{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_MEAN] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 3 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> ifm Tensor Index
- // 1 -> axis Tensor Index
- // 2 -> keep_dims Index
- OperandIndexSequence inputs{init_param.inputs[0]};
- std::vector<std::int32_t> axes =
- operands.at(OperandIndex{init_param.inputs[1]}).asVector<std::int32_t>();
-
- operation::Mean::Param param;
- param.axes.assign(axes.cbegin(), axes.cend());
- param.keep_dims = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<int32_t>() != 0;
- param.rank = operands.at(inputs.at(0)).shape().rank();
-
- return new operation::Mean{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 5 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- operation::LocalResponseNormalization::Param param;
- param.radius = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
- param.bias = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<float>();
- param.alpha = operands.at(OperandIndex{init_param.inputs[3]}).asScalar<float>();
- param.beta = operands.at(OperandIndex{init_param.inputs[4]}).asScalar<float>();
-
- return new operation::LocalResponseNormalization{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_DEPTH_TO_SPACE] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- // 1 -> Block size Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- operation::DepthToSpace::Param param;
- param.block_size = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
-
- return new operation::DepthToSpace{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_PACK_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count >= 3 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
- OperandIndexSequence inputs;
- for (uint32_t n = 0; n < init_param.input_count - 2; ++n)
- {
- inputs.append(OperandIndex{init_param.inputs[n]});
- }
-
- operation::Pack::Param param;
- const auto num_index = OperandIndex{init_param.inputs[init_param.input_count - 2]};
- const auto axis_index = OperandIndex{init_param.inputs[init_param.input_count - 1]};
- param.num = operands.at(num_index).asScalar<int32_t>();
- param.axis = operands.at(axis_index).asScalar<int32_t>();
- param.rank = operands.at(outputs.at(0)).shape().rank();
-
- return new operation::Pack{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_REDUCE_MIN_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 3 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- // 1 -> Axis Tensor Index
- // 2 -> keep_dims Index
- OperandIndexSequence inputs{init_param.inputs[0]};
- std::vector<std::int32_t> axes =
- operands.at(OperandIndex{init_param.inputs[1]}).asVector<std::int32_t>();
-
- operation::ReduceMin::Param param;
- param.axes.assign(axes.cbegin(), axes.cend());
- param.keep_dims = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<int32_t>() != 0;
- param.rank = operands.at(inputs.at(0)).shape().rank();
-
- return new operation::ReduceMin{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_SPLIT_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 3);
- assert(init_param.output_count >= 1); // At least one output tensor and axis
-
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs;
- for (uint32_t n = 0; n < init_param.output_count; ++n)
- {
- outputs.append(OperandIndex{init_param.outputs[n]});
- }
-
- operation::Split::Param param;
- param.axis = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
- param.num_splits = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<std::int32_t>();
- param.rank = operands.at(inputs.at(0)).shape().rank();
-
- return new operation::Split{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_UNPACK_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 3 && init_param.output_count >= 1);
-
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs;
- for (uint32_t n = 0; n < init_param.output_count; ++n)
- {
- outputs.append(OperandIndex{init_param.outputs[n]});
- }
-
- operation::Unpack::Param param;
- const auto num_index = OperandIndex{init_param.inputs[1]};
- const auto axis_index = OperandIndex{init_param.inputs[2]};
- param.num = operands.at(num_index).asScalar<int32_t>();
- param.axis = operands.at(axis_index).asScalar<int32_t>();
- param.rank = operands.at(inputs.at(0)).shape().rank();
-
- return new operation::Unpack{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_PAD] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 2 && init_param.output_count >= 1);
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- operation::Pad::Param param;
- param.rank = operands.at(inputs.at(0)).shape().rank();
-
- return new operation::Pad{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_MINIMUM] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- return new operation::Min{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_MAXIMUM] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- return new operation::Max{inputs, outputs};
- };
-}
-
-Operation *OperationFactory::create(ANeuralNetworksOperationType type,
- const OperationFactory::Param &param, Operands &operands)
-{
- auto it = _map.find(type);
- if (it == _map.end())
- {
- throw std::runtime_error("Unsupported operation type: " + std::to_string(type));
- }
- return it->second(param, operands);
-}
diff --git a/runtime/neurun/frontend/nnapi/wrapper/OperationFactory.h b/runtime/neurun/frontend/nnapi/wrapper/OperationFactory.h
deleted file mode 100644
index 003e4eb7a..000000000
--- a/runtime/neurun/frontend/nnapi/wrapper/OperationFactory.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __OPERATION_FACTORY_H__
-#define __OPERATION_FACTORY_H__
-
-#include <unordered_map>
-
-#include "ir/Operands.h"
-#include "ir/Operation.h"
-#include "NeuralNetworks.h"
-#include "NeuralNetworksEx.h"
-
-/**
- * @brief A class to create a neurun operation object from NN API input parameters
- */
-class OperationFactory
-{
-public:
- struct Param
- {
- uint32_t input_count;
- const uint32_t *inputs;
- uint32_t output_count;
- const uint32_t *outputs;
- };
-
-public:
- using Generator = std::function<neurun::ir::Operation *(const OperationFactory::Param &,
- neurun::ir::Operands &)>;
-
-public:
- static OperationFactory &get();
-
-private:
- OperationFactory();
-
-public:
- neurun::ir::Operation *create(ANeuralNetworksOperationType, const OperationFactory::Param &param,
- neurun::ir::Operands &operands);
- // TODO add "register" method for separating registration, possibly supporting custom-ops
-
-private:
- std::unordered_map<ANeuralNetworksOperationType, Generator> _map;
-};
-
-#endif // __OPERATION_FACTORY_H__
diff --git a/runtime/neurun/frontend/tflite/CMakeLists.txt b/runtime/neurun/frontend/tflite/CMakeLists.txt
deleted file mode 100644
index 5157869f3..000000000
--- a/runtime/neurun/frontend/tflite/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-if(NOT BUILD_TFLITE_LOADER)
- return()
-endif(NOT BUILD_TFLITE_LOADER)
-
-nnfw_find_package(FlatBuffersSource REQUIRED)
-
-set(TFLITE_LOADER_SOURCES src/tflite_loader.cc)
-
-add_library(tflite_loader SHARED ${TFLITE_LOADER_SOURCES})
-
-target_include_directories(tflite_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_include_directories(tflite_loader PRIVATE ${FlatBuffersSource_DIR}/include)
-
-target_link_libraries(tflite_loader PUBLIC neurun_core)
-target_link_libraries(tflite_loader PRIVATE base_loader nnfw_lib_cpp14 nnfw_common nnfw_coverage)
-
-install(TARGETS tflite_loader DESTINATION lib)
diff --git a/runtime/neurun/frontend/tflite/include/tflite_loader.h b/runtime/neurun/frontend/tflite/include/tflite_loader.h
deleted file mode 100644
index 033230b4b..000000000
--- a/runtime/neurun/frontend/tflite/include/tflite_loader.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __TFLITE_TFLITE_LOADER_H__
-#define __TFLITE_TFLITE_LOADER_H__
-
-#include "ir/Graph.h"
-
-#include <memory>
-
-namespace neurun
-{
-namespace tflite_loader
-{
-
-std::unique_ptr<ir::Graph> loadModel(const char *filename);
-
-} // namespace tflite_loader
-} // namespace neurun
-
-#endif // __TFLITE_TFLITE_LOADER_H__
diff --git a/runtime/neurun/frontend/tflite/src/tflite_loader.cc b/runtime/neurun/frontend/tflite/src/tflite_loader.cc
deleted file mode 100644
index 10a4fc095..000000000
--- a/runtime/neurun/frontend/tflite/src/tflite_loader.cc
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite_loader.h"
-#include "base_loader.h"
-#include "tflite_schema_generated.h"
-
-namespace neurun
-{
-namespace tflite_loader
-{
-
-namespace
-{
-
-struct LoaderDomain
-{
- using Verifier = flatbuffers::Verifier;
- using ActivationFunctionType = neurun_tflite::ActivationFunctionType;
- using Buffer = neurun_tflite::Buffer;
- using BuiltinOperator = neurun_tflite::BuiltinOperator;
- using CustomOptionsFormat = neurun_tflite::CustomOptionsFormat;
- using Model = neurun_tflite::Model;
- using Operator = neurun_tflite::Operator;
- using Padding = neurun_tflite::Padding;
- using Pool2DOptions = neurun_tflite::Pool2DOptions;
- using Tensor = neurun_tflite::Tensor;
- using TensorType = neurun_tflite::TensorType;
- using SubGraph = neurun_tflite::SubGraph;
-
- static const char *EnumNameBuiltinOperator(BuiltinOperator e)
- {
- return neurun_tflite::EnumNameBuiltinOperator(e);
- }
- static const char *EnumNameActivationFunctionType(ActivationFunctionType e)
- {
- return neurun_tflite::EnumNameActivationFunctionType(e);
- }
- static const char *EnumNameTensorType(TensorType e)
- {
- return neurun_tflite::EnumNameTensorType(e);
- }
- static const Model *GetModel(const void *buf) { return neurun_tflite::GetModel(buf); }
- static bool VerifyModelBuffer(Verifier &verifier)
- {
- return neurun_tflite::VerifyModelBuffer(verifier);
- }
-};
-
-class TFLiteLoader final : public base_loader::BaseLoader<LoaderDomain, TFLiteLoader>
-{
-public:
- using BaseLoader::BaseLoader;
-
- void loadSubgraph(const neurun_tflite::SubGraph *subgraph)
- {
- // Load tensors
- _tensor_to_operand.resize(subgraph->tensors()->size());
- for (flatbuffers::uoffset_t i = 0; i < subgraph->tensors()->size(); ++i)
- {
- _tensor_to_operand[i] = loadOperand(subgraph->tensors()->Get(i));
- }
- // Set inputs
- for (const std::int32_t input_ind : *subgraph->inputs())
- {
- _graph.addInput(_tensor_to_operand[input_ind]);
- }
- // Set outputs
- for (const std::int32_t output_ind : *subgraph->outputs())
- {
- _graph.addOutput(_tensor_to_operand[output_ind]);
- }
- // Create operations
- for (const auto *op : *subgraph->operators())
- {
- loadOperation(op);
- }
- }
-};
-
-} // namespace
-
-std::unique_ptr<ir::Graph> loadModel(const char *filename)
-{
- auto graph = nnfw::cpp14::make_unique<ir::Graph>();
- TFLiteLoader loader(*graph);
- loader.loadFromFile(filename);
- return graph;
-}
-
-} // namespace tflite_loader
-} // namespace neurun
diff --git a/runtime/neurun/frontend/tflite/src/tflite_schema_generated.h b/runtime/neurun/frontend/tflite/src/tflite_schema_generated.h
deleted file mode 100644
index 21669e2ff..000000000
--- a/runtime/neurun/frontend/tflite/src/tflite_schema_generated.h
+++ /dev/null
@@ -1,7275 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-// automatically generated by the FlatBuffers compiler, do not modify
-
-#ifndef FLATBUFFERS_GENERATED_SCHEMA_NEURUN_TFLITE_H_
-#define FLATBUFFERS_GENERATED_SCHEMA_NEURUN_TFLITE_H_
-
-#include "flatbuffers/flatbuffers.h"
-
-namespace neurun_tflite
-{
-
-struct CustomQuantization;
-
-struct QuantizationParameters;
-
-struct Tensor;
-
-struct Conv2DOptions;
-
-struct Pool2DOptions;
-
-struct DepthwiseConv2DOptions;
-
-struct ConcatEmbeddingsOptions;
-
-struct LSHProjectionOptions;
-
-struct SVDFOptions;
-
-struct RNNOptions;
-
-struct SequenceRNNOptions;
-
-struct BidirectionalSequenceRNNOptions;
-
-struct FullyConnectedOptions;
-
-struct SoftmaxOptions;
-
-struct ConcatenationOptions;
-
-struct AddOptions;
-
-struct MulOptions;
-
-struct L2NormOptions;
-
-struct LocalResponseNormalizationOptions;
-
-struct LSTMOptions;
-
-struct UnidirectionalSequenceLSTMOptions;
-
-struct BidirectionalSequenceLSTMOptions;
-
-struct ResizeBilinearOptions;
-
-struct ResizeNearestNeighborOptions;
-
-struct CallOptions;
-
-struct PadOptions;
-
-struct PadV2Options;
-
-struct ReshapeOptions;
-
-struct SpaceToBatchNDOptions;
-
-struct BatchToSpaceNDOptions;
-
-struct SkipGramOptions;
-
-struct SpaceToDepthOptions;
-
-struct SubOptions;
-
-struct DivOptions;
-
-struct TopKV2Options;
-
-struct EmbeddingLookupSparseOptions;
-
-struct GatherOptions;
-
-struct TransposeOptions;
-
-struct ExpOptions;
-
-struct ReducerOptions;
-
-struct SqueezeOptions;
-
-struct SplitOptions;
-
-struct SplitVOptions;
-
-struct StridedSliceOptions;
-
-struct LogSoftmaxOptions;
-
-struct CastOptions;
-
-struct DequantizeOptions;
-
-struct MaximumMinimumOptions;
-
-struct TileOptions;
-
-struct ArgMaxOptions;
-
-struct ArgMinOptions;
-
-struct GreaterOptions;
-
-struct GreaterEqualOptions;
-
-struct LessOptions;
-
-struct LessEqualOptions;
-
-struct NegOptions;
-
-struct SelectOptions;
-
-struct SliceOptions;
-
-struct TransposeConvOptions;
-
-struct ExpandDimsOptions;
-
-struct SparseToDenseOptions;
-
-struct EqualOptions;
-
-struct NotEqualOptions;
-
-struct ShapeOptions;
-
-struct PowOptions;
-
-struct FakeQuantOptions;
-
-struct PackOptions;
-
-struct LogicalOrOptions;
-
-struct OneHotOptions;
-
-struct AbsOptions;
-
-struct LogicalAndOptions;
-
-struct LogicalNotOptions;
-
-struct UnpackOptions;
-
-struct FloorDivOptions;
-
-struct SquareOptions;
-
-struct ZerosLikeOptions;
-
-struct FillOptions;
-
-struct FloorModOptions;
-
-struct RangeOptions;
-
-struct LeakyReluOptions;
-
-struct SquaredDifferenceOptions;
-
-struct MirrorPadOptions;
-
-struct OperatorCode;
-
-struct Operator;
-
-struct SubGraph;
-
-struct Buffer;
-
-struct Model;
-
-enum TensorType
-{
- TensorType_FLOAT32 = 0,
- TensorType_FLOAT16 = 1,
- TensorType_INT32 = 2,
- TensorType_UINT8 = 3,
- TensorType_INT64 = 4,
- TensorType_STRING = 5,
- TensorType_BOOL = 6,
- TensorType_INT16 = 7,
- TensorType_COMPLEX64 = 8,
- TensorType_INT8 = 9,
- TensorType_MIN = TensorType_FLOAT32,
- TensorType_MAX = TensorType_INT8
-};
-
-inline const TensorType (&EnumValuesTensorType())[10]
-{
- static const TensorType values[] = {TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32,
- TensorType_UINT8, TensorType_INT64, TensorType_STRING,
- TensorType_BOOL, TensorType_INT16, TensorType_COMPLEX64,
- TensorType_INT8};
- return values;
-}
-
-inline const char *const *EnumNamesTensorType()
-{
- static const char *const names[] = {"FLOAT32", "FLOAT16", "INT32", "UINT8", "INT64", "STRING",
- "BOOL", "INT16", "COMPLEX64", "INT8", nullptr};
- return names;
-}
-
-inline const char *EnumNameTensorType(TensorType e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesTensorType()[index];
-}
-
-enum QuantizationDetails
-{
- QuantizationDetails_NONE = 0,
- QuantizationDetails_CustomQuantization = 1,
- QuantizationDetails_MIN = QuantizationDetails_NONE,
- QuantizationDetails_MAX = QuantizationDetails_CustomQuantization
-};
-
-inline const QuantizationDetails (&EnumValuesQuantizationDetails())[2]
-{
- static const QuantizationDetails values[] = {QuantizationDetails_NONE,
- QuantizationDetails_CustomQuantization};
- return values;
-}
-
-inline const char *const *EnumNamesQuantizationDetails()
-{
- static const char *const names[] = {"NONE", "CustomQuantization", nullptr};
- return names;
-}
-
-inline const char *EnumNameQuantizationDetails(QuantizationDetails e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesQuantizationDetails()[index];
-}
-
-template <typename T> struct QuantizationDetailsTraits
-{
- static const QuantizationDetails enum_value = QuantizationDetails_NONE;
-};
-
-template <> struct QuantizationDetailsTraits<CustomQuantization>
-{
- static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization;
-};
-
-bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj,
- QuantizationDetails type);
-bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier,
- const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
- const flatbuffers::Vector<uint8_t> *types);
-
-enum BuiltinOperator
-{
- BuiltinOperator_ADD = 0,
- BuiltinOperator_AVERAGE_POOL_2D = 1,
- BuiltinOperator_CONCATENATION = 2,
- BuiltinOperator_CONV_2D = 3,
- BuiltinOperator_DEPTHWISE_CONV_2D = 4,
- BuiltinOperator_DEQUANTIZE = 6,
- BuiltinOperator_EMBEDDING_LOOKUP = 7,
- BuiltinOperator_FLOOR = 8,
- BuiltinOperator_FULLY_CONNECTED = 9,
- BuiltinOperator_HASHTABLE_LOOKUP = 10,
- BuiltinOperator_L2_NORMALIZATION = 11,
- BuiltinOperator_L2_POOL_2D = 12,
- BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION = 13,
- BuiltinOperator_LOGISTIC = 14,
- BuiltinOperator_LSH_PROJECTION = 15,
- BuiltinOperator_LSTM = 16,
- BuiltinOperator_MAX_POOL_2D = 17,
- BuiltinOperator_MUL = 18,
- BuiltinOperator_RELU = 19,
- BuiltinOperator_RELU_N1_TO_1 = 20,
- BuiltinOperator_RELU6 = 21,
- BuiltinOperator_RESHAPE = 22,
- BuiltinOperator_RESIZE_BILINEAR = 23,
- BuiltinOperator_RNN = 24,
- BuiltinOperator_SOFTMAX = 25,
- BuiltinOperator_SPACE_TO_DEPTH = 26,
- BuiltinOperator_SVDF = 27,
- BuiltinOperator_TANH = 28,
- BuiltinOperator_CONCAT_EMBEDDINGS = 29,
- BuiltinOperator_SKIP_GRAM = 30,
- BuiltinOperator_CALL = 31,
- BuiltinOperator_CUSTOM = 32,
- BuiltinOperator_EMBEDDING_LOOKUP_SPARSE = 33,
- BuiltinOperator_PAD = 34,
- BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN = 35,
- BuiltinOperator_GATHER = 36,
- BuiltinOperator_BATCH_TO_SPACE_ND = 37,
- BuiltinOperator_SPACE_TO_BATCH_ND = 38,
- BuiltinOperator_TRANSPOSE = 39,
- BuiltinOperator_MEAN = 40,
- BuiltinOperator_SUB = 41,
- BuiltinOperator_DIV = 42,
- BuiltinOperator_SQUEEZE = 43,
- BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
- BuiltinOperator_STRIDED_SLICE = 45,
- BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN = 46,
- BuiltinOperator_EXP = 47,
- BuiltinOperator_TOPK_V2 = 48,
- BuiltinOperator_SPLIT = 49,
- BuiltinOperator_LOG_SOFTMAX = 50,
- BuiltinOperator_DELEGATE = 51,
- BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52,
- BuiltinOperator_CAST = 53,
- BuiltinOperator_PRELU = 54,
- BuiltinOperator_MAXIMUM = 55,
- BuiltinOperator_ARG_MAX = 56,
- BuiltinOperator_MINIMUM = 57,
- BuiltinOperator_LESS = 58,
- BuiltinOperator_NEG = 59,
- BuiltinOperator_PADV2 = 60,
- BuiltinOperator_GREATER = 61,
- BuiltinOperator_GREATER_EQUAL = 62,
- BuiltinOperator_LESS_EQUAL = 63,
- BuiltinOperator_SELECT = 64,
- BuiltinOperator_SLICE = 65,
- BuiltinOperator_SIN = 66,
- BuiltinOperator_TRANSPOSE_CONV = 67,
- BuiltinOperator_SPARSE_TO_DENSE = 68,
- BuiltinOperator_TILE = 69,
- BuiltinOperator_EXPAND_DIMS = 70,
- BuiltinOperator_EQUAL = 71,
- BuiltinOperator_NOT_EQUAL = 72,
- BuiltinOperator_LOG = 73,
- BuiltinOperator_SUM = 74,
- BuiltinOperator_SQRT = 75,
- BuiltinOperator_RSQRT = 76,
- BuiltinOperator_SHAPE = 77,
- BuiltinOperator_POW = 78,
- BuiltinOperator_ARG_MIN = 79,
- BuiltinOperator_FAKE_QUANT = 80,
- BuiltinOperator_REDUCE_PROD = 81,
- BuiltinOperator_REDUCE_MAX = 82,
- BuiltinOperator_PACK = 83,
- BuiltinOperator_LOGICAL_OR = 84,
- BuiltinOperator_ONE_HOT = 85,
- BuiltinOperator_LOGICAL_AND = 86,
- BuiltinOperator_LOGICAL_NOT = 87,
- BuiltinOperator_UNPACK = 88,
- BuiltinOperator_REDUCE_MIN = 89,
- BuiltinOperator_FLOOR_DIV = 90,
- BuiltinOperator_REDUCE_ANY = 91,
- BuiltinOperator_SQUARE = 92,
- BuiltinOperator_ZEROS_LIKE = 93,
- BuiltinOperator_FILL = 94,
- BuiltinOperator_FLOOR_MOD = 95,
- BuiltinOperator_RANGE = 96,
- BuiltinOperator_RESIZE_NEAREST_NEIGHBOR = 97,
- BuiltinOperator_LEAKY_RELU = 98,
- BuiltinOperator_SQUARED_DIFFERENCE = 99,
- BuiltinOperator_MIRROR_PAD = 100,
- BuiltinOperator_ABS = 101,
- BuiltinOperator_SPLIT_V = 102,
- BuiltinOperator_MIN = BuiltinOperator_ADD,
- BuiltinOperator_MAX = BuiltinOperator_SPLIT_V
-};
-
-inline const BuiltinOperator (&EnumValuesBuiltinOperator())[102]
-{
- static const BuiltinOperator values[] = {BuiltinOperator_ADD,
- BuiltinOperator_AVERAGE_POOL_2D,
- BuiltinOperator_CONCATENATION,
- BuiltinOperator_CONV_2D,
- BuiltinOperator_DEPTHWISE_CONV_2D,
- BuiltinOperator_DEQUANTIZE,
- BuiltinOperator_EMBEDDING_LOOKUP,
- BuiltinOperator_FLOOR,
- BuiltinOperator_FULLY_CONNECTED,
- BuiltinOperator_HASHTABLE_LOOKUP,
- BuiltinOperator_L2_NORMALIZATION,
- BuiltinOperator_L2_POOL_2D,
- BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
- BuiltinOperator_LOGISTIC,
- BuiltinOperator_LSH_PROJECTION,
- BuiltinOperator_LSTM,
- BuiltinOperator_MAX_POOL_2D,
- BuiltinOperator_MUL,
- BuiltinOperator_RELU,
- BuiltinOperator_RELU_N1_TO_1,
- BuiltinOperator_RELU6,
- BuiltinOperator_RESHAPE,
- BuiltinOperator_RESIZE_BILINEAR,
- BuiltinOperator_RNN,
- BuiltinOperator_SOFTMAX,
- BuiltinOperator_SPACE_TO_DEPTH,
- BuiltinOperator_SVDF,
- BuiltinOperator_TANH,
- BuiltinOperator_CONCAT_EMBEDDINGS,
- BuiltinOperator_SKIP_GRAM,
- BuiltinOperator_CALL,
- BuiltinOperator_CUSTOM,
- BuiltinOperator_EMBEDDING_LOOKUP_SPARSE,
- BuiltinOperator_PAD,
- BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN,
- BuiltinOperator_GATHER,
- BuiltinOperator_BATCH_TO_SPACE_ND,
- BuiltinOperator_SPACE_TO_BATCH_ND,
- BuiltinOperator_TRANSPOSE,
- BuiltinOperator_MEAN,
- BuiltinOperator_SUB,
- BuiltinOperator_DIV,
- BuiltinOperator_SQUEEZE,
- BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
- BuiltinOperator_STRIDED_SLICE,
- BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN,
- BuiltinOperator_EXP,
- BuiltinOperator_TOPK_V2,
- BuiltinOperator_SPLIT,
- BuiltinOperator_LOG_SOFTMAX,
- BuiltinOperator_DELEGATE,
- BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM,
- BuiltinOperator_CAST,
- BuiltinOperator_PRELU,
- BuiltinOperator_MAXIMUM,
- BuiltinOperator_ARG_MAX,
- BuiltinOperator_MINIMUM,
- BuiltinOperator_LESS,
- BuiltinOperator_NEG,
- BuiltinOperator_PADV2,
- BuiltinOperator_GREATER,
- BuiltinOperator_GREATER_EQUAL,
- BuiltinOperator_LESS_EQUAL,
- BuiltinOperator_SELECT,
- BuiltinOperator_SLICE,
- BuiltinOperator_SIN,
- BuiltinOperator_TRANSPOSE_CONV,
- BuiltinOperator_SPARSE_TO_DENSE,
- BuiltinOperator_TILE,
- BuiltinOperator_EXPAND_DIMS,
- BuiltinOperator_EQUAL,
- BuiltinOperator_NOT_EQUAL,
- BuiltinOperator_LOG,
- BuiltinOperator_SUM,
- BuiltinOperator_SQRT,
- BuiltinOperator_RSQRT,
- BuiltinOperator_SHAPE,
- BuiltinOperator_POW,
- BuiltinOperator_ARG_MIN,
- BuiltinOperator_FAKE_QUANT,
- BuiltinOperator_REDUCE_PROD,
- BuiltinOperator_REDUCE_MAX,
- BuiltinOperator_PACK,
- BuiltinOperator_LOGICAL_OR,
- BuiltinOperator_ONE_HOT,
- BuiltinOperator_LOGICAL_AND,
- BuiltinOperator_LOGICAL_NOT,
- BuiltinOperator_UNPACK,
- BuiltinOperator_REDUCE_MIN,
- BuiltinOperator_FLOOR_DIV,
- BuiltinOperator_REDUCE_ANY,
- BuiltinOperator_SQUARE,
- BuiltinOperator_ZEROS_LIKE,
- BuiltinOperator_FILL,
- BuiltinOperator_FLOOR_MOD,
- BuiltinOperator_RANGE,
- BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
- BuiltinOperator_LEAKY_RELU,
- BuiltinOperator_SQUARED_DIFFERENCE,
- BuiltinOperator_MIRROR_PAD,
- BuiltinOperator_ABS,
- BuiltinOperator_SPLIT_V};
- return values;
-}
-
-inline const char *const *EnumNamesBuiltinOperator()
-{
- static const char *const names[] = {"ADD",
- "AVERAGE_POOL_2D",
- "CONCATENATION",
- "CONV_2D",
- "DEPTHWISE_CONV_2D",
- "",
- "DEQUANTIZE",
- "EMBEDDING_LOOKUP",
- "FLOOR",
- "FULLY_CONNECTED",
- "HASHTABLE_LOOKUP",
- "L2_NORMALIZATION",
- "L2_POOL_2D",
- "LOCAL_RESPONSE_NORMALIZATION",
- "LOGISTIC",
- "LSH_PROJECTION",
- "LSTM",
- "MAX_POOL_2D",
- "MUL",
- "RELU",
- "RELU_N1_TO_1",
- "RELU6",
- "RESHAPE",
- "RESIZE_BILINEAR",
- "RNN",
- "SOFTMAX",
- "SPACE_TO_DEPTH",
- "SVDF",
- "TANH",
- "CONCAT_EMBEDDINGS",
- "SKIP_GRAM",
- "CALL",
- "CUSTOM",
- "EMBEDDING_LOOKUP_SPARSE",
- "PAD",
- "UNIDIRECTIONAL_SEQUENCE_RNN",
- "GATHER",
- "BATCH_TO_SPACE_ND",
- "SPACE_TO_BATCH_ND",
- "TRANSPOSE",
- "MEAN",
- "SUB",
- "DIV",
- "SQUEEZE",
- "UNIDIRECTIONAL_SEQUENCE_LSTM",
- "STRIDED_SLICE",
- "BIDIRECTIONAL_SEQUENCE_RNN",
- "EXP",
- "TOPK_V2",
- "SPLIT",
- "LOG_SOFTMAX",
- "DELEGATE",
- "BIDIRECTIONAL_SEQUENCE_LSTM",
- "CAST",
- "PRELU",
- "MAXIMUM",
- "ARG_MAX",
- "MINIMUM",
- "LESS",
- "NEG",
- "PADV2",
- "GREATER",
- "GREATER_EQUAL",
- "LESS_EQUAL",
- "SELECT",
- "SLICE",
- "SIN",
- "TRANSPOSE_CONV",
- "SPARSE_TO_DENSE",
- "TILE",
- "EXPAND_DIMS",
- "EQUAL",
- "NOT_EQUAL",
- "LOG",
- "SUM",
- "SQRT",
- "RSQRT",
- "SHAPE",
- "POW",
- "ARG_MIN",
- "FAKE_QUANT",
- "REDUCE_PROD",
- "REDUCE_MAX",
- "PACK",
- "LOGICAL_OR",
- "ONE_HOT",
- "LOGICAL_AND",
- "LOGICAL_NOT",
- "UNPACK",
- "REDUCE_MIN",
- "FLOOR_DIV",
- "REDUCE_ANY",
- "SQUARE",
- "ZEROS_LIKE",
- "FILL",
- "FLOOR_MOD",
- "RANGE",
- "RESIZE_NEAREST_NEIGHBOR",
- "LEAKY_RELU",
- "SQUARED_DIFFERENCE",
- "MIRROR_PAD",
- "ABS",
- "SPLIT_V",
- nullptr};
- return names;
-}
-
-inline const char *EnumNameBuiltinOperator(BuiltinOperator e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesBuiltinOperator()[index];
-}
-
-enum BuiltinOptions
-{
- BuiltinOptions_NONE = 0,
- BuiltinOptions_Conv2DOptions = 1,
- BuiltinOptions_DepthwiseConv2DOptions = 2,
- BuiltinOptions_ConcatEmbeddingsOptions = 3,
- BuiltinOptions_LSHProjectionOptions = 4,
- BuiltinOptions_Pool2DOptions = 5,
- BuiltinOptions_SVDFOptions = 6,
- BuiltinOptions_RNNOptions = 7,
- BuiltinOptions_FullyConnectedOptions = 8,
- BuiltinOptions_SoftmaxOptions = 9,
- BuiltinOptions_ConcatenationOptions = 10,
- BuiltinOptions_AddOptions = 11,
- BuiltinOptions_L2NormOptions = 12,
- BuiltinOptions_LocalResponseNormalizationOptions = 13,
- BuiltinOptions_LSTMOptions = 14,
- BuiltinOptions_ResizeBilinearOptions = 15,
- BuiltinOptions_CallOptions = 16,
- BuiltinOptions_ReshapeOptions = 17,
- BuiltinOptions_SkipGramOptions = 18,
- BuiltinOptions_SpaceToDepthOptions = 19,
- BuiltinOptions_EmbeddingLookupSparseOptions = 20,
- BuiltinOptions_MulOptions = 21,
- BuiltinOptions_PadOptions = 22,
- BuiltinOptions_GatherOptions = 23,
- BuiltinOptions_BatchToSpaceNDOptions = 24,
- BuiltinOptions_SpaceToBatchNDOptions = 25,
- BuiltinOptions_TransposeOptions = 26,
- BuiltinOptions_ReducerOptions = 27,
- BuiltinOptions_SubOptions = 28,
- BuiltinOptions_DivOptions = 29,
- BuiltinOptions_SqueezeOptions = 30,
- BuiltinOptions_SequenceRNNOptions = 31,
- BuiltinOptions_StridedSliceOptions = 32,
- BuiltinOptions_ExpOptions = 33,
- BuiltinOptions_TopKV2Options = 34,
- BuiltinOptions_SplitOptions = 35,
- BuiltinOptions_LogSoftmaxOptions = 36,
- BuiltinOptions_CastOptions = 37,
- BuiltinOptions_DequantizeOptions = 38,
- BuiltinOptions_MaximumMinimumOptions = 39,
- BuiltinOptions_ArgMaxOptions = 40,
- BuiltinOptions_LessOptions = 41,
- BuiltinOptions_NegOptions = 42,
- BuiltinOptions_PadV2Options = 43,
- BuiltinOptions_GreaterOptions = 44,
- BuiltinOptions_GreaterEqualOptions = 45,
- BuiltinOptions_LessEqualOptions = 46,
- BuiltinOptions_SelectOptions = 47,
- BuiltinOptions_SliceOptions = 48,
- BuiltinOptions_TransposeConvOptions = 49,
- BuiltinOptions_SparseToDenseOptions = 50,
- BuiltinOptions_TileOptions = 51,
- BuiltinOptions_ExpandDimsOptions = 52,
- BuiltinOptions_EqualOptions = 53,
- BuiltinOptions_NotEqualOptions = 54,
- BuiltinOptions_ShapeOptions = 55,
- BuiltinOptions_PowOptions = 56,
- BuiltinOptions_ArgMinOptions = 57,
- BuiltinOptions_FakeQuantOptions = 58,
- BuiltinOptions_PackOptions = 59,
- BuiltinOptions_LogicalOrOptions = 60,
- BuiltinOptions_OneHotOptions = 61,
- BuiltinOptions_LogicalAndOptions = 62,
- BuiltinOptions_LogicalNotOptions = 63,
- BuiltinOptions_UnpackOptions = 64,
- BuiltinOptions_FloorDivOptions = 65,
- BuiltinOptions_SquareOptions = 66,
- BuiltinOptions_ZerosLikeOptions = 67,
- BuiltinOptions_FillOptions = 68,
- BuiltinOptions_BidirectionalSequenceLSTMOptions = 69,
- BuiltinOptions_BidirectionalSequenceRNNOptions = 70,
- BuiltinOptions_UnidirectionalSequenceLSTMOptions = 71,
- BuiltinOptions_FloorModOptions = 72,
- BuiltinOptions_RangeOptions = 73,
- BuiltinOptions_ResizeNearestNeighborOptions = 74,
- BuiltinOptions_LeakyReluOptions = 75,
- BuiltinOptions_SquaredDifferenceOptions = 76,
- BuiltinOptions_MirrorPadOptions = 77,
- BuiltinOptions_AbsOptions = 78,
- BuiltinOptions_SplitVOptions = 79,
- BuiltinOptions_MIN = BuiltinOptions_NONE,
- BuiltinOptions_MAX = BuiltinOptions_SplitVOptions
-};
-
-inline const BuiltinOptions (&EnumValuesBuiltinOptions())[80]
-{
- static const BuiltinOptions values[] = {BuiltinOptions_NONE,
- BuiltinOptions_Conv2DOptions,
- BuiltinOptions_DepthwiseConv2DOptions,
- BuiltinOptions_ConcatEmbeddingsOptions,
- BuiltinOptions_LSHProjectionOptions,
- BuiltinOptions_Pool2DOptions,
- BuiltinOptions_SVDFOptions,
- BuiltinOptions_RNNOptions,
- BuiltinOptions_FullyConnectedOptions,
- BuiltinOptions_SoftmaxOptions,
- BuiltinOptions_ConcatenationOptions,
- BuiltinOptions_AddOptions,
- BuiltinOptions_L2NormOptions,
- BuiltinOptions_LocalResponseNormalizationOptions,
- BuiltinOptions_LSTMOptions,
- BuiltinOptions_ResizeBilinearOptions,
- BuiltinOptions_CallOptions,
- BuiltinOptions_ReshapeOptions,
- BuiltinOptions_SkipGramOptions,
- BuiltinOptions_SpaceToDepthOptions,
- BuiltinOptions_EmbeddingLookupSparseOptions,
- BuiltinOptions_MulOptions,
- BuiltinOptions_PadOptions,
- BuiltinOptions_GatherOptions,
- BuiltinOptions_BatchToSpaceNDOptions,
- BuiltinOptions_SpaceToBatchNDOptions,
- BuiltinOptions_TransposeOptions,
- BuiltinOptions_ReducerOptions,
- BuiltinOptions_SubOptions,
- BuiltinOptions_DivOptions,
- BuiltinOptions_SqueezeOptions,
- BuiltinOptions_SequenceRNNOptions,
- BuiltinOptions_StridedSliceOptions,
- BuiltinOptions_ExpOptions,
- BuiltinOptions_TopKV2Options,
- BuiltinOptions_SplitOptions,
- BuiltinOptions_LogSoftmaxOptions,
- BuiltinOptions_CastOptions,
- BuiltinOptions_DequantizeOptions,
- BuiltinOptions_MaximumMinimumOptions,
- BuiltinOptions_ArgMaxOptions,
- BuiltinOptions_LessOptions,
- BuiltinOptions_NegOptions,
- BuiltinOptions_PadV2Options,
- BuiltinOptions_GreaterOptions,
- BuiltinOptions_GreaterEqualOptions,
- BuiltinOptions_LessEqualOptions,
- BuiltinOptions_SelectOptions,
- BuiltinOptions_SliceOptions,
- BuiltinOptions_TransposeConvOptions,
- BuiltinOptions_SparseToDenseOptions,
- BuiltinOptions_TileOptions,
- BuiltinOptions_ExpandDimsOptions,
- BuiltinOptions_EqualOptions,
- BuiltinOptions_NotEqualOptions,
- BuiltinOptions_ShapeOptions,
- BuiltinOptions_PowOptions,
- BuiltinOptions_ArgMinOptions,
- BuiltinOptions_FakeQuantOptions,
- BuiltinOptions_PackOptions,
- BuiltinOptions_LogicalOrOptions,
- BuiltinOptions_OneHotOptions,
- BuiltinOptions_LogicalAndOptions,
- BuiltinOptions_LogicalNotOptions,
- BuiltinOptions_UnpackOptions,
- BuiltinOptions_FloorDivOptions,
- BuiltinOptions_SquareOptions,
- BuiltinOptions_ZerosLikeOptions,
- BuiltinOptions_FillOptions,
- BuiltinOptions_BidirectionalSequenceLSTMOptions,
- BuiltinOptions_BidirectionalSequenceRNNOptions,
- BuiltinOptions_UnidirectionalSequenceLSTMOptions,
- BuiltinOptions_FloorModOptions,
- BuiltinOptions_RangeOptions,
- BuiltinOptions_ResizeNearestNeighborOptions,
- BuiltinOptions_LeakyReluOptions,
- BuiltinOptions_SquaredDifferenceOptions,
- BuiltinOptions_MirrorPadOptions,
- BuiltinOptions_AbsOptions,
- BuiltinOptions_SplitVOptions};
- return values;
-}
-
-inline const char *const *EnumNamesBuiltinOptions()
-{
- static const char *const names[] = {"NONE",
- "Conv2DOptions",
- "DepthwiseConv2DOptions",
- "ConcatEmbeddingsOptions",
- "LSHProjectionOptions",
- "Pool2DOptions",
- "SVDFOptions",
- "RNNOptions",
- "FullyConnectedOptions",
- "SoftmaxOptions",
- "ConcatenationOptions",
- "AddOptions",
- "L2NormOptions",
- "LocalResponseNormalizationOptions",
- "LSTMOptions",
- "ResizeBilinearOptions",
- "CallOptions",
- "ReshapeOptions",
- "SkipGramOptions",
- "SpaceToDepthOptions",
- "EmbeddingLookupSparseOptions",
- "MulOptions",
- "PadOptions",
- "GatherOptions",
- "BatchToSpaceNDOptions",
- "SpaceToBatchNDOptions",
- "TransposeOptions",
- "ReducerOptions",
- "SubOptions",
- "DivOptions",
- "SqueezeOptions",
- "SequenceRNNOptions",
- "StridedSliceOptions",
- "ExpOptions",
- "TopKV2Options",
- "SplitOptions",
- "LogSoftmaxOptions",
- "CastOptions",
- "DequantizeOptions",
- "MaximumMinimumOptions",
- "ArgMaxOptions",
- "LessOptions",
- "NegOptions",
- "PadV2Options",
- "GreaterOptions",
- "GreaterEqualOptions",
- "LessEqualOptions",
- "SelectOptions",
- "SliceOptions",
- "TransposeConvOptions",
- "SparseToDenseOptions",
- "TileOptions",
- "ExpandDimsOptions",
- "EqualOptions",
- "NotEqualOptions",
- "ShapeOptions",
- "PowOptions",
- "ArgMinOptions",
- "FakeQuantOptions",
- "PackOptions",
- "LogicalOrOptions",
- "OneHotOptions",
- "LogicalAndOptions",
- "LogicalNotOptions",
- "UnpackOptions",
- "FloorDivOptions",
- "SquareOptions",
- "ZerosLikeOptions",
- "FillOptions",
- "BidirectionalSequenceLSTMOptions",
- "BidirectionalSequenceRNNOptions",
- "UnidirectionalSequenceLSTMOptions",
- "FloorModOptions",
- "RangeOptions",
- "ResizeNearestNeighborOptions",
- "LeakyReluOptions",
- "SquaredDifferenceOptions",
- "MirrorPadOptions",
- "AbsOptions",
- "SplitVOptions",
- nullptr};
- return names;
-}
-
-inline const char *EnumNameBuiltinOptions(BuiltinOptions e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesBuiltinOptions()[index];
-}
-
-template <typename T> struct BuiltinOptionsTraits
-{
- static const BuiltinOptions enum_value = BuiltinOptions_NONE;
-};
-
-template <> struct BuiltinOptionsTraits<Conv2DOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions;
-};
-
-template <> struct BuiltinOptionsTraits<DepthwiseConv2DOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_DepthwiseConv2DOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ConcatEmbeddingsOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ConcatEmbeddingsOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LSHProjectionOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions;
-};
-
-template <> struct BuiltinOptionsTraits<Pool2DOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SVDFOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions;
-};
-
-template <> struct BuiltinOptionsTraits<RNNOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions;
-};
-
-template <> struct BuiltinOptionsTraits<FullyConnectedOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SoftmaxOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ConcatenationOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions;
-};
-
-template <> struct BuiltinOptionsTraits<AddOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_AddOptions;
-};
-
-template <> struct BuiltinOptionsTraits<L2NormOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LocalResponseNormalizationOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LocalResponseNormalizationOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LSTMOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ResizeBilinearOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions;
-};
-
-template <> struct BuiltinOptionsTraits<CallOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_CallOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ReshapeOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SkipGramOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SpaceToDepthOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions;
-};
-
-template <> struct BuiltinOptionsTraits<EmbeddingLookupSparseOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_EmbeddingLookupSparseOptions;
-};
-
-template <> struct BuiltinOptionsTraits<MulOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_MulOptions;
-};
-
-template <> struct BuiltinOptionsTraits<PadOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_PadOptions;
-};
-
-template <> struct BuiltinOptionsTraits<GatherOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions;
-};
-
-template <> struct BuiltinOptionsTraits<BatchToSpaceNDOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_BatchToSpaceNDOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SpaceToBatchNDOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SpaceToBatchNDOptions;
-};
-
-template <> struct BuiltinOptionsTraits<TransposeOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_TransposeOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ReducerOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ReducerOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SubOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SubOptions;
-};
-
-template <> struct BuiltinOptionsTraits<DivOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_DivOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SqueezeOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SqueezeOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SequenceRNNOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SequenceRNNOptions;
-};
-
-template <> struct BuiltinOptionsTraits<StridedSliceOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_StridedSliceOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ExpOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ExpOptions;
-};
-
-template <> struct BuiltinOptionsTraits<TopKV2Options>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_TopKV2Options;
-};
-
-template <> struct BuiltinOptionsTraits<SplitOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SplitOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LogSoftmaxOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LogSoftmaxOptions;
-};
-
-template <> struct BuiltinOptionsTraits<CastOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_CastOptions;
-};
-
-template <> struct BuiltinOptionsTraits<DequantizeOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions;
-};
-
-template <> struct BuiltinOptionsTraits<MaximumMinimumOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_MaximumMinimumOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ArgMaxOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ArgMaxOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LessOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LessOptions;
-};
-
-template <> struct BuiltinOptionsTraits<NegOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_NegOptions;
-};
-
-template <> struct BuiltinOptionsTraits<PadV2Options>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_PadV2Options;
-};
-
-template <> struct BuiltinOptionsTraits<GreaterOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_GreaterOptions;
-};
-
-template <> struct BuiltinOptionsTraits<GreaterEqualOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_GreaterEqualOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LessEqualOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LessEqualOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SelectOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SelectOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SliceOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SliceOptions;
-};
-
-template <> struct BuiltinOptionsTraits<TransposeConvOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_TransposeConvOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SparseToDenseOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SparseToDenseOptions;
-};
-
-template <> struct BuiltinOptionsTraits<TileOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_TileOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ExpandDimsOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ExpandDimsOptions;
-};
-
-template <> struct BuiltinOptionsTraits<EqualOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_EqualOptions;
-};
-
-template <> struct BuiltinOptionsTraits<NotEqualOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_NotEqualOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ShapeOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ShapeOptions;
-};
-
-template <> struct BuiltinOptionsTraits<PowOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_PowOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ArgMinOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ArgMinOptions;
-};
-
-template <> struct BuiltinOptionsTraits<FakeQuantOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_FakeQuantOptions;
-};
-
-template <> struct BuiltinOptionsTraits<PackOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_PackOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LogicalOrOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LogicalOrOptions;
-};
-
-template <> struct BuiltinOptionsTraits<OneHotOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_OneHotOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LogicalAndOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LogicalAndOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LogicalNotOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LogicalNotOptions;
-};
-
-template <> struct BuiltinOptionsTraits<UnpackOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_UnpackOptions;
-};
-
-template <> struct BuiltinOptionsTraits<FloorDivOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_FloorDivOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SquareOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SquareOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ZerosLikeOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions;
-};
-
-template <> struct BuiltinOptionsTraits<FillOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_FillOptions;
-};
-
-template <> struct BuiltinOptionsTraits<BidirectionalSequenceLSTMOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceLSTMOptions;
-};
-
-template <> struct BuiltinOptionsTraits<BidirectionalSequenceRNNOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions;
-};
-
-template <> struct BuiltinOptionsTraits<UnidirectionalSequenceLSTMOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions;
-};
-
-template <> struct BuiltinOptionsTraits<FloorModOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions;
-};
-
-template <> struct BuiltinOptionsTraits<RangeOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_RangeOptions;
-};
-
-template <> struct BuiltinOptionsTraits<ResizeNearestNeighborOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_ResizeNearestNeighborOptions;
-};
-
-template <> struct BuiltinOptionsTraits<LeakyReluOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_LeakyReluOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SquaredDifferenceOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SquaredDifferenceOptions;
-};
-
-template <> struct BuiltinOptionsTraits<MirrorPadOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_MirrorPadOptions;
-};
-
-template <> struct BuiltinOptionsTraits<AbsOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_AbsOptions;
-};
-
-template <> struct BuiltinOptionsTraits<SplitVOptions>
-{
- static const BuiltinOptions enum_value = BuiltinOptions_SplitVOptions;
-};
-
-bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
-bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier,
- const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
- const flatbuffers::Vector<uint8_t> *types);
-
-enum Padding
-{
- Padding_SAME = 0,
- Padding_VALID = 1,
- Padding_MIN = Padding_SAME,
- Padding_MAX = Padding_VALID
-};
-
-inline const Padding (&EnumValuesPadding())[2]
-{
- static const Padding values[] = {Padding_SAME, Padding_VALID};
- return values;
-}
-
-inline const char *const *EnumNamesPadding()
-{
- static const char *const names[] = {"SAME", "VALID", nullptr};
- return names;
-}
-
-inline const char *EnumNamePadding(Padding e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesPadding()[index];
-}
-
-enum ActivationFunctionType
-{
- ActivationFunctionType_NONE = 0,
- ActivationFunctionType_RELU = 1,
- ActivationFunctionType_RELU_N1_TO_1 = 2,
- ActivationFunctionType_RELU6 = 3,
- ActivationFunctionType_TANH = 4,
- ActivationFunctionType_SIGN_BIT = 5,
- ActivationFunctionType_MIN = ActivationFunctionType_NONE,
- ActivationFunctionType_MAX = ActivationFunctionType_SIGN_BIT
-};
-
-inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6]
-{
- static const ActivationFunctionType values[] = {
- ActivationFunctionType_NONE, ActivationFunctionType_RELU,
- ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6,
- ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT};
- return values;
-}
-
-inline const char *const *EnumNamesActivationFunctionType()
-{
- static const char *const names[] = {"NONE", "RELU", "RELU_N1_TO_1", "RELU6",
- "TANH", "SIGN_BIT", nullptr};
- return names;
-}
-
-inline const char *EnumNameActivationFunctionType(ActivationFunctionType e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesActivationFunctionType()[index];
-}
-
-enum LSHProjectionType
-{
- LSHProjectionType_UNKNOWN = 0,
- LSHProjectionType_SPARSE = 1,
- LSHProjectionType_DENSE = 2,
- LSHProjectionType_MIN = LSHProjectionType_UNKNOWN,
- LSHProjectionType_MAX = LSHProjectionType_DENSE
-};
-
-inline const LSHProjectionType (&EnumValuesLSHProjectionType())[3]
-{
- static const LSHProjectionType values[] = {LSHProjectionType_UNKNOWN, LSHProjectionType_SPARSE,
- LSHProjectionType_DENSE};
- return values;
-}
-
-inline const char *const *EnumNamesLSHProjectionType()
-{
- static const char *const names[] = {"UNKNOWN", "SPARSE", "DENSE", nullptr};
- return names;
-}
-
-inline const char *EnumNameLSHProjectionType(LSHProjectionType e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesLSHProjectionType()[index];
-}
-
-enum FullyConnectedOptionsWeightsFormat
-{
- FullyConnectedOptionsWeightsFormat_DEFAULT = 0,
- FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 = 1,
- FullyConnectedOptionsWeightsFormat_MIN = FullyConnectedOptionsWeightsFormat_DEFAULT,
- FullyConnectedOptionsWeightsFormat_MAX = FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8
-};
-
-inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[2]
-{
- static const FullyConnectedOptionsWeightsFormat values[] = {
- FullyConnectedOptionsWeightsFormat_DEFAULT,
- FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8};
- return values;
-}
-
-inline const char *const *EnumNamesFullyConnectedOptionsWeightsFormat()
-{
- static const char *const names[] = {"DEFAULT", "SHUFFLED4x16INT8", nullptr};
- return names;
-}
-
-inline const char *EnumNameFullyConnectedOptionsWeightsFormat(FullyConnectedOptionsWeightsFormat e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesFullyConnectedOptionsWeightsFormat()[index];
-}
-
-enum LSTMKernelType
-{
- LSTMKernelType_FULL = 0,
- LSTMKernelType_BASIC = 1,
- LSTMKernelType_MIN = LSTMKernelType_FULL,
- LSTMKernelType_MAX = LSTMKernelType_BASIC
-};
-
-inline const LSTMKernelType (&EnumValuesLSTMKernelType())[2]
-{
- static const LSTMKernelType values[] = {LSTMKernelType_FULL, LSTMKernelType_BASIC};
- return values;
-}
-
-inline const char *const *EnumNamesLSTMKernelType()
-{
- static const char *const names[] = {"FULL", "BASIC", nullptr};
- return names;
-}
-
-inline const char *EnumNameLSTMKernelType(LSTMKernelType e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesLSTMKernelType()[index];
-}
-
-enum CombinerType
-{
- CombinerType_SUM = 0,
- CombinerType_MEAN = 1,
- CombinerType_SQRTN = 2,
- CombinerType_MIN = CombinerType_SUM,
- CombinerType_MAX = CombinerType_SQRTN
-};
-
-inline const CombinerType (&EnumValuesCombinerType())[3]
-{
- static const CombinerType values[] = {CombinerType_SUM, CombinerType_MEAN, CombinerType_SQRTN};
- return values;
-}
-
-inline const char *const *EnumNamesCombinerType()
-{
- static const char *const names[] = {"SUM", "MEAN", "SQRTN", nullptr};
- return names;
-}
-
-inline const char *EnumNameCombinerType(CombinerType e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesCombinerType()[index];
-}
-
-enum MirrorPadMode
-{
- MirrorPadMode_REFLECT = 0,
- MirrorPadMode_SYMMETRIC = 1,
- MirrorPadMode_MIN = MirrorPadMode_REFLECT,
- MirrorPadMode_MAX = MirrorPadMode_SYMMETRIC
-};
-
-inline const MirrorPadMode (&EnumValuesMirrorPadMode())[2]
-{
- static const MirrorPadMode values[] = {MirrorPadMode_REFLECT, MirrorPadMode_SYMMETRIC};
- return values;
-}
-
-inline const char *const *EnumNamesMirrorPadMode()
-{
- static const char *const names[] = {"REFLECT", "SYMMETRIC", nullptr};
- return names;
-}
-
-inline const char *EnumNameMirrorPadMode(MirrorPadMode e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesMirrorPadMode()[index];
-}
-
-enum CustomOptionsFormat
-{
- CustomOptionsFormat_FLEXBUFFERS = 0,
- CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS,
- CustomOptionsFormat_MAX = CustomOptionsFormat_FLEXBUFFERS
-};
-
-inline const CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1]
-{
- static const CustomOptionsFormat values[] = {CustomOptionsFormat_FLEXBUFFERS};
- return values;
-}
-
-inline const char *const *EnumNamesCustomOptionsFormat()
-{
- static const char *const names[] = {"FLEXBUFFERS", nullptr};
- return names;
-}
-
-inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e)
-{
- const size_t index = static_cast<int>(e);
- return EnumNamesCustomOptionsFormat()[index];
-}
-
-struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_CUSTOM = 4
- };
- const flatbuffers::Vector<uint8_t> *custom() const
- {
- return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM);
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CUSTOM) &&
- verifier.VerifyVector(custom()) && verifier.EndTable();
- }
-};
-
-struct CustomQuantizationBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_custom(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom)
- {
- fbb_.AddOffset(CustomQuantization::VT_CUSTOM, custom);
- }
- explicit CustomQuantizationBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- CustomQuantizationBuilder &operator=(const CustomQuantizationBuilder &);
- flatbuffers::Offset<CustomQuantization> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<CustomQuantization>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<CustomQuantization>
-CreateCustomQuantization(flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom = 0)
-{
- CustomQuantizationBuilder builder_(_fbb);
- builder_.add_custom(custom);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<CustomQuantization>
-CreateCustomQuantizationDirect(flatbuffers::FlatBufferBuilder &_fbb,
- const std::vector<uint8_t> *custom = nullptr)
-{
- return neurun_tflite::CreateCustomQuantization(_fbb,
- custom ? _fbb.CreateVector<uint8_t>(*custom) : 0);
-}
-
-struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_MIN = 4,
- VT_MAX = 6,
- VT_SCALE = 8,
- VT_ZERO_POINT = 10,
- VT_DETAILS_TYPE = 12,
- VT_DETAILS = 14
- };
- const flatbuffers::Vector<float> *min() const
- {
- return GetPointer<const flatbuffers::Vector<float> *>(VT_MIN);
- }
- const flatbuffers::Vector<float> *max() const
- {
- return GetPointer<const flatbuffers::Vector<float> *>(VT_MAX);
- }
- const flatbuffers::Vector<float> *scale() const
- {
- return GetPointer<const flatbuffers::Vector<float> *>(VT_SCALE);
- }
- const flatbuffers::Vector<int64_t> *zero_point() const
- {
- return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_ZERO_POINT);
- }
- QuantizationDetails details_type() const
- {
- return static_cast<QuantizationDetails>(GetField<uint8_t>(VT_DETAILS_TYPE, 0));
- }
- const void *details() const { return GetPointer<const void *>(VT_DETAILS); }
- template <typename T> const T *details_as() const;
- const CustomQuantization *details_as_CustomQuantization() const
- {
- return details_type() == QuantizationDetails_CustomQuantization
- ? static_cast<const CustomQuantization *>(details())
- : nullptr;
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_MIN) &&
- verifier.VerifyVector(min()) && VerifyOffset(verifier, VT_MAX) &&
- verifier.VerifyVector(max()) && VerifyOffset(verifier, VT_SCALE) &&
- verifier.VerifyVector(scale()) && VerifyOffset(verifier, VT_ZERO_POINT) &&
- verifier.VerifyVector(zero_point()) && VerifyField<uint8_t>(verifier, VT_DETAILS_TYPE) &&
- VerifyOffset(verifier, VT_DETAILS) &&
- VerifyQuantizationDetails(verifier, details(), details_type()) && verifier.EndTable();
- }
-};
-
-template <>
-inline const CustomQuantization *QuantizationParameters::details_as<CustomQuantization>() const
-{
- return details_as_CustomQuantization();
-}
-
-struct QuantizationParametersBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_min(flatbuffers::Offset<flatbuffers::Vector<float>> min)
- {
- fbb_.AddOffset(QuantizationParameters::VT_MIN, min);
- }
- void add_max(flatbuffers::Offset<flatbuffers::Vector<float>> max)
- {
- fbb_.AddOffset(QuantizationParameters::VT_MAX, max);
- }
- void add_scale(flatbuffers::Offset<flatbuffers::Vector<float>> scale)
- {
- fbb_.AddOffset(QuantizationParameters::VT_SCALE, scale);
- }
- void add_zero_point(flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point)
- {
- fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point);
- }
- void add_details_type(QuantizationDetails details_type)
- {
- fbb_.AddElement<uint8_t>(QuantizationParameters::VT_DETAILS_TYPE,
- static_cast<uint8_t>(details_type), 0);
- }
- void add_details(flatbuffers::Offset<void> details)
- {
- fbb_.AddOffset(QuantizationParameters::VT_DETAILS, details);
- }
- explicit QuantizationParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- QuantizationParametersBuilder &operator=(const QuantizationParametersBuilder &);
- flatbuffers::Offset<QuantizationParameters> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<QuantizationParameters>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<QuantizationParameters>
-CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
- flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
- flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
- flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
- QuantizationDetails details_type = QuantizationDetails_NONE,
- flatbuffers::Offset<void> details = 0)
-{
- QuantizationParametersBuilder builder_(_fbb);
- builder_.add_details(details);
- builder_.add_zero_point(zero_point);
- builder_.add_scale(scale);
- builder_.add_max(max);
- builder_.add_min(min);
- builder_.add_details_type(details_type);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect(
- flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
- const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
- const std::vector<int64_t> *zero_point = nullptr,
- QuantizationDetails details_type = QuantizationDetails_NONE,
- flatbuffers::Offset<void> details = 0)
-{
- return neurun_tflite::CreateQuantizationParameters(
- _fbb, min ? _fbb.CreateVector<float>(*min) : 0, max ? _fbb.CreateVector<float>(*max) : 0,
- scale ? _fbb.CreateVector<float>(*scale) : 0,
- zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0, details_type, details);
-}
-
-struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_SHAPE = 4,
- VT_TYPE = 6,
- VT_BUFFER = 8,
- VT_NAME = 10,
- VT_QUANTIZATION = 12,
- VT_IS_VARIABLE = 14
- };
- const flatbuffers::Vector<int32_t> *shape() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE);
- }
- TensorType type() const { return static_cast<TensorType>(GetField<int8_t>(VT_TYPE, 0)); }
- uint32_t buffer() const { return GetField<uint32_t>(VT_BUFFER, 0); }
- const flatbuffers::String *name() const
- {
- return GetPointer<const flatbuffers::String *>(VT_NAME);
- }
- const QuantizationParameters *quantization() const
- {
- return GetPointer<const QuantizationParameters *>(VT_QUANTIZATION);
- }
- bool is_variable() const { return GetField<uint8_t>(VT_IS_VARIABLE, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SHAPE) &&
- verifier.VerifyVector(shape()) && VerifyField<int8_t>(verifier, VT_TYPE) &&
- VerifyField<uint32_t>(verifier, VT_BUFFER) && VerifyOffset(verifier, VT_NAME) &&
- verifier.VerifyString(name()) && VerifyOffset(verifier, VT_QUANTIZATION) &&
- verifier.VerifyTable(quantization()) && VerifyField<uint8_t>(verifier, VT_IS_VARIABLE) &&
- verifier.EndTable();
- }
-};
-
-struct TensorBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape)
- {
- fbb_.AddOffset(Tensor::VT_SHAPE, shape);
- }
- void add_type(TensorType type)
- {
- fbb_.AddElement<int8_t>(Tensor::VT_TYPE, static_cast<int8_t>(type), 0);
- }
- void add_buffer(uint32_t buffer) { fbb_.AddElement<uint32_t>(Tensor::VT_BUFFER, buffer, 0); }
- void add_name(flatbuffers::Offset<flatbuffers::String> name)
- {
- fbb_.AddOffset(Tensor::VT_NAME, name);
- }
- void add_quantization(flatbuffers::Offset<QuantizationParameters> quantization)
- {
- fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization);
- }
- void add_is_variable(bool is_variable)
- {
- fbb_.AddElement<uint8_t>(Tensor::VT_IS_VARIABLE, static_cast<uint8_t>(is_variable), 0);
- }
- explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- TensorBuilder &operator=(const TensorBuilder &);
- flatbuffers::Offset<Tensor> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Tensor>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Tensor>
-CreateTensor(flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0,
- TensorType type = TensorType_FLOAT32, uint32_t buffer = 0,
- flatbuffers::Offset<flatbuffers::String> name = 0,
- flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false)
-{
- TensorBuilder builder_(_fbb);
- builder_.add_quantization(quantization);
- builder_.add_name(name);
- builder_.add_buffer(buffer);
- builder_.add_shape(shape);
- builder_.add_is_variable(is_variable);
- builder_.add_type(type);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Tensor> CreateTensorDirect(
- flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
- TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr,
- flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false)
-{
- return neurun_tflite::CreateTensor(_fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type,
- buffer, name ? _fbb.CreateString(name) : 0, quantization,
- is_variable);
-}
-
-struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_PADDING = 4,
- VT_STRIDE_W = 6,
- VT_STRIDE_H = 8,
- VT_FUSED_ACTIVATION_FUNCTION = 10,
- VT_DILATION_W_FACTOR = 12,
- VT_DILATION_H_FACTOR = 14
- };
- Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
- int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
- int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
- int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
- VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
- VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
- VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
- VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable();
- }
-};
-
-struct Conv2DOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_padding(Padding padding)
- {
- fbb_.AddElement<int8_t>(Conv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
- }
- void add_stride_w(int32_t stride_w)
- {
- fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_W, stride_w, 0);
- }
- void add_stride_h(int32_t stride_h)
- {
- fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_H, stride_h, 0);
- }
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- void add_dilation_w_factor(int32_t dilation_w_factor)
- {
- fbb_.AddElement<int32_t>(Conv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
- }
- void add_dilation_h_factor(int32_t dilation_h_factor)
- {
- fbb_.AddElement<int32_t>(Conv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
- }
- explicit Conv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- Conv2DOptionsBuilder &operator=(const Conv2DOptionsBuilder &);
- flatbuffers::Offset<Conv2DOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Conv2DOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Conv2DOptions>
-CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
- int32_t stride_w = 0, int32_t stride_h = 0,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
-{
- Conv2DOptionsBuilder builder_(_fbb);
- builder_.add_dilation_h_factor(dilation_h_factor);
- builder_.add_dilation_w_factor(dilation_w_factor);
- builder_.add_stride_h(stride_h);
- builder_.add_stride_w(stride_w);
- builder_.add_fused_activation_function(fused_activation_function);
- builder_.add_padding(padding);
- return builder_.Finish();
-}
-
-struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_PADDING = 4,
- VT_STRIDE_W = 6,
- VT_STRIDE_H = 8,
- VT_FILTER_WIDTH = 10,
- VT_FILTER_HEIGHT = 12,
- VT_FUSED_ACTIVATION_FUNCTION = 14
- };
- Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
- int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
- int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
- int32_t filter_width() const { return GetField<int32_t>(VT_FILTER_WIDTH, 0); }
- int32_t filter_height() const { return GetField<int32_t>(VT_FILTER_HEIGHT, 0); }
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
- VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
- VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
- VerifyField<int32_t>(verifier, VT_FILTER_WIDTH) &&
- VerifyField<int32_t>(verifier, VT_FILTER_HEIGHT) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct Pool2DOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_padding(Padding padding)
- {
- fbb_.AddElement<int8_t>(Pool2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
- }
- void add_stride_w(int32_t stride_w)
- {
- fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_W, stride_w, 0);
- }
- void add_stride_h(int32_t stride_h)
- {
- fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_H, stride_h, 0);
- }
- void add_filter_width(int32_t filter_width)
- {
- fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_WIDTH, filter_width, 0);
- }
- void add_filter_height(int32_t filter_height)
- {
- fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0);
- }
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit Pool2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- Pool2DOptionsBuilder &operator=(const Pool2DOptionsBuilder &);
- flatbuffers::Offset<Pool2DOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Pool2DOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Pool2DOptions>
-CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
- int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0,
- int32_t filter_height = 0,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- Pool2DOptionsBuilder builder_(_fbb);
- builder_.add_filter_height(filter_height);
- builder_.add_filter_width(filter_width);
- builder_.add_stride_h(stride_h);
- builder_.add_stride_w(stride_w);
- builder_.add_fused_activation_function(fused_activation_function);
- builder_.add_padding(padding);
- return builder_.Finish();
-}
-
-struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_PADDING = 4,
- VT_STRIDE_W = 6,
- VT_STRIDE_H = 8,
- VT_DEPTH_MULTIPLIER = 10,
- VT_FUSED_ACTIVATION_FUNCTION = 12,
- VT_DILATION_W_FACTOR = 14,
- VT_DILATION_H_FACTOR = 16
- };
- Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
- int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
- int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
- int32_t depth_multiplier() const { return GetField<int32_t>(VT_DEPTH_MULTIPLIER, 0); }
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
- int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
- VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
- VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
- VerifyField<int32_t>(verifier, VT_DEPTH_MULTIPLIER) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
- VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
- VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable();
- }
-};
-
-struct DepthwiseConv2DOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_padding(Padding padding)
- {
- fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
- }
- void add_stride_w(int32_t stride_w)
- {
- fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_W, stride_w, 0);
- }
- void add_stride_h(int32_t stride_h)
- {
- fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_H, stride_h, 0);
- }
- void add_depth_multiplier(int32_t depth_multiplier)
- {
- fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, depth_multiplier, 0);
- }
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- void add_dilation_w_factor(int32_t dilation_w_factor)
- {
- fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
- }
- void add_dilation_h_factor(int32_t dilation_h_factor)
- {
- fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
- }
- explicit DepthwiseConv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- DepthwiseConv2DOptionsBuilder &operator=(const DepthwiseConv2DOptionsBuilder &);
- flatbuffers::Offset<DepthwiseConv2DOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<DepthwiseConv2DOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
- flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0,
- int32_t stride_h = 0, int32_t depth_multiplier = 0,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
-{
- DepthwiseConv2DOptionsBuilder builder_(_fbb);
- builder_.add_dilation_h_factor(dilation_h_factor);
- builder_.add_dilation_w_factor(dilation_w_factor);
- builder_.add_depth_multiplier(depth_multiplier);
- builder_.add_stride_h(stride_h);
- builder_.add_stride_w(stride_w);
- builder_.add_fused_activation_function(fused_activation_function);
- builder_.add_padding(padding);
- return builder_.Finish();
-}
-
-struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_NUM_CHANNELS = 4,
- VT_NUM_COLUMNS_PER_CHANNEL = 6,
- VT_EMBEDDING_DIM_PER_CHANNEL = 8
- };
- int32_t num_channels() const { return GetField<int32_t>(VT_NUM_CHANNELS, 0); }
- const flatbuffers::Vector<int32_t> *num_columns_per_channel() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NUM_COLUMNS_PER_CHANNEL);
- }
- const flatbuffers::Vector<int32_t> *embedding_dim_per_channel() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_EMBEDDING_DIM_PER_CHANNEL);
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_CHANNELS) &&
- VerifyOffset(verifier, VT_NUM_COLUMNS_PER_CHANNEL) &&
- verifier.VerifyVector(num_columns_per_channel()) &&
- VerifyOffset(verifier, VT_EMBEDDING_DIM_PER_CHANNEL) &&
- verifier.VerifyVector(embedding_dim_per_channel()) && verifier.EndTable();
- }
-};
-
-struct ConcatEmbeddingsOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_num_channels(int32_t num_channels)
- {
- fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0);
- }
- void add_num_columns_per_channel(
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
- {
- fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel);
- }
- void add_embedding_dim_per_channel(
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
- {
- fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL,
- embedding_dim_per_channel);
- }
- explicit ConcatEmbeddingsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ConcatEmbeddingsOptionsBuilder &operator=(const ConcatEmbeddingsOptionsBuilder &);
- flatbuffers::Offset<ConcatEmbeddingsOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ConcatEmbeddingsOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(
- flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
-{
- ConcatEmbeddingsOptionsBuilder builder_(_fbb);
- builder_.add_embedding_dim_per_channel(embedding_dim_per_channel);
- builder_.add_num_columns_per_channel(num_columns_per_channel);
- builder_.add_num_channels(num_channels);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<ConcatEmbeddingsOptions>
-CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
- const std::vector<int32_t> *num_columns_per_channel = nullptr,
- const std::vector<int32_t> *embedding_dim_per_channel = nullptr)
-{
- return neurun_tflite::CreateConcatEmbeddingsOptions(
- _fbb, num_channels,
- num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0,
- embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0);
-}
-
-struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_TYPE = 4
- };
- LSHProjectionType type() const
- {
- return static_cast<LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_TYPE) &&
- verifier.EndTable();
- }
-};
-
-struct LSHProjectionOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_type(LSHProjectionType type)
- {
- fbb_.AddElement<int8_t>(LSHProjectionOptions::VT_TYPE, static_cast<int8_t>(type), 0);
- }
- explicit LSHProjectionOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LSHProjectionOptionsBuilder &operator=(const LSHProjectionOptionsBuilder &);
- flatbuffers::Offset<LSHProjectionOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LSHProjectionOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LSHProjectionOptions>
-CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb,
- LSHProjectionType type = LSHProjectionType_UNKNOWN)
-{
- LSHProjectionOptionsBuilder builder_(_fbb);
- builder_.add_type(type);
- return builder_.Finish();
-}
-
-struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_RANK = 4,
- VT_FUSED_ACTIVATION_FUNCTION = 6
- };
- int32_t rank() const { return GetField<int32_t>(VT_RANK, 0); }
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_RANK) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct SVDFOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_rank(int32_t rank) { fbb_.AddElement<int32_t>(SVDFOptions::VT_RANK, rank, 0); }
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit SVDFOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SVDFOptionsBuilder &operator=(const SVDFOptionsBuilder &);
- flatbuffers::Offset<SVDFOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SVDFOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SVDFOptions>
-CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- SVDFOptionsBuilder builder_(_fbb);
- builder_.add_rank(rank);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct RNNOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit RNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- RNNOptionsBuilder &operator=(const RNNOptionsBuilder &);
- flatbuffers::Offset<RNNOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<RNNOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<RNNOptions>
-CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- RNNOptionsBuilder builder_(_fbb);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_TIME_MAJOR = 4,
- VT_FUSED_ACTIVATION_FUNCTION = 6
- };
- bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct SequenceRNNOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_time_major(bool time_major)
- {
- fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major),
- 0);
- }
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit SequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SequenceRNNOptionsBuilder &operator=(const SequenceRNNOptionsBuilder &);
- flatbuffers::Offset<SequenceRNNOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SequenceRNNOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
- flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- SequenceRNNOptionsBuilder builder_(_fbb);
- builder_.add_fused_activation_function(fused_activation_function);
- builder_.add_time_major(time_major);
- return builder_.Finish();
-}
-
-struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_TIME_MAJOR = 4,
- VT_FUSED_ACTIVATION_FUNCTION = 6,
- VT_MERGE_OUTPUTS = 8
- };
- bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
- VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) && verifier.EndTable();
- }
-};
-
-struct BidirectionalSequenceRNNOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_time_major(bool time_major)
- {
- fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_TIME_MAJOR,
- static_cast<uint8_t>(time_major), 0);
- }
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- void add_merge_outputs(bool merge_outputs)
- {
- fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_MERGE_OUTPUTS,
- static_cast<uint8_t>(merge_outputs), 0);
- }
- explicit BidirectionalSequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- BidirectionalSequenceRNNOptionsBuilder &operator=(const BidirectionalSequenceRNNOptionsBuilder &);
- flatbuffers::Offset<BidirectionalSequenceRNNOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<BidirectionalSequenceRNNOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(
- flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- bool merge_outputs = false)
-{
- BidirectionalSequenceRNNOptionsBuilder builder_(_fbb);
- builder_.add_merge_outputs(merge_outputs);
- builder_.add_fused_activation_function(fused_activation_function);
- builder_.add_time_major(time_major);
- return builder_.Finish();
-}
-
-struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4,
- VT_WEIGHTS_FORMAT = 6
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- FullyConnectedOptionsWeightsFormat weights_format() const
- {
- return static_cast<FullyConnectedOptionsWeightsFormat>(GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
- VerifyField<int8_t>(verifier, VT_WEIGHTS_FORMAT) && verifier.EndTable();
- }
-};
-
-struct FullyConnectedOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- void add_weights_format(FullyConnectedOptionsWeightsFormat weights_format)
- {
- fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_WEIGHTS_FORMAT,
- static_cast<int8_t>(weights_format), 0);
- }
- explicit FullyConnectedOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- FullyConnectedOptionsBuilder &operator=(const FullyConnectedOptionsBuilder &);
- flatbuffers::Offset<FullyConnectedOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<FullyConnectedOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT)
-{
- FullyConnectedOptionsBuilder builder_(_fbb);
- builder_.add_weights_format(weights_format);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_BETA = 4
- };
- float beta() const { return GetField<float>(VT_BETA, 0.0f); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_BETA) &&
- verifier.EndTable();
- }
-};
-
-struct SoftmaxOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_beta(float beta) { fbb_.AddElement<float>(SoftmaxOptions::VT_BETA, beta, 0.0f); }
- explicit SoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SoftmaxOptionsBuilder &operator=(const SoftmaxOptionsBuilder &);
- flatbuffers::Offset<SoftmaxOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SoftmaxOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SoftmaxOptions>
-CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, float beta = 0.0f)
-{
- SoftmaxOptionsBuilder builder_(_fbb);
- builder_.add_beta(beta);
- return builder_.Finish();
-}
-
-struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_AXIS = 4,
- VT_FUSED_ACTIVATION_FUNCTION = 6
- };
- int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct ConcatenationOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(ConcatenationOptions::VT_AXIS, axis, 0); }
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit ConcatenationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ConcatenationOptionsBuilder &operator=(const ConcatenationOptionsBuilder &);
- flatbuffers::Offset<ConcatenationOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ConcatenationOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
- flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- ConcatenationOptionsBuilder builder_(_fbb);
- builder_.add_axis(axis);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct AddOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- AddOptionsBuilder &operator=(const AddOptionsBuilder &);
- flatbuffers::Offset<AddOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<AddOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<AddOptions>
-CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- AddOptionsBuilder builder_(_fbb);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct MulOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(MulOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit MulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- MulOptionsBuilder &operator=(const MulOptionsBuilder &);
- flatbuffers::Offset<MulOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<MulOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<MulOptions>
-CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- MulOptionsBuilder builder_(_fbb);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct L2NormOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit L2NormOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- L2NormOptionsBuilder &operator=(const L2NormOptionsBuilder &);
- flatbuffers::Offset<L2NormOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<L2NormOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<L2NormOptions>
-CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- L2NormOptionsBuilder builder_(_fbb);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_RADIUS = 4,
- VT_BIAS = 6,
- VT_ALPHA = 8,
- VT_BETA = 10
- };
- int32_t radius() const { return GetField<int32_t>(VT_RADIUS, 0); }
- float bias() const { return GetField<float>(VT_BIAS, 0.0f); }
- float alpha() const { return GetField<float>(VT_ALPHA, 0.0f); }
- float beta() const { return GetField<float>(VT_BETA, 0.0f); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_RADIUS) &&
- VerifyField<float>(verifier, VT_BIAS) && VerifyField<float>(verifier, VT_ALPHA) &&
- VerifyField<float>(verifier, VT_BETA) && verifier.EndTable();
- }
-};
-
-struct LocalResponseNormalizationOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_radius(int32_t radius)
- {
- fbb_.AddElement<int32_t>(LocalResponseNormalizationOptions::VT_RADIUS, radius, 0);
- }
- void add_bias(float bias)
- {
- fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BIAS, bias, 0.0f);
- }
- void add_alpha(float alpha)
- {
- fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_ALPHA, alpha, 0.0f);
- }
- void add_beta(float beta)
- {
- fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f);
- }
- explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LocalResponseNormalizationOptionsBuilder &
- operator=(const LocalResponseNormalizationOptionsBuilder &);
- flatbuffers::Offset<LocalResponseNormalizationOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LocalResponseNormalizationOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LocalResponseNormalizationOptions>
-CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t radius = 0,
- float bias = 0.0f, float alpha = 0.0f, float beta = 0.0f)
-{
- LocalResponseNormalizationOptionsBuilder builder_(_fbb);
- builder_.add_beta(beta);
- builder_.add_alpha(alpha);
- builder_.add_bias(bias);
- builder_.add_radius(radius);
- return builder_.Finish();
-}
-
-struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4,
- VT_CELL_CLIP = 6,
- VT_PROJ_CLIP = 8,
- VT_KERNEL_TYPE = 10
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
- float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
- LSTMKernelType kernel_type() const
- {
- return static_cast<LSTMKernelType>(GetField<int8_t>(VT_KERNEL_TYPE, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
- VerifyField<float>(verifier, VT_CELL_CLIP) &&
- VerifyField<float>(verifier, VT_PROJ_CLIP) &&
- VerifyField<int8_t>(verifier, VT_KERNEL_TYPE) && verifier.EndTable();
- }
-};
-
-struct LSTMOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- void add_cell_clip(float cell_clip)
- {
- fbb_.AddElement<float>(LSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
- }
- void add_proj_clip(float proj_clip)
- {
- fbb_.AddElement<float>(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
- }
- void add_kernel_type(LSTMKernelType kernel_type)
- {
- fbb_.AddElement<int8_t>(LSTMOptions::VT_KERNEL_TYPE, static_cast<int8_t>(kernel_type), 0);
- }
- explicit LSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LSTMOptionsBuilder &operator=(const LSTMOptionsBuilder &);
- flatbuffers::Offset<LSTMOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LSTMOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LSTMOptions>
-CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- float cell_clip = 0.0f, float proj_clip = 0.0f,
- LSTMKernelType kernel_type = LSTMKernelType_FULL)
-{
- LSTMOptionsBuilder builder_(_fbb);
- builder_.add_proj_clip(proj_clip);
- builder_.add_cell_clip(cell_clip);
- builder_.add_kernel_type(kernel_type);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4,
- VT_CELL_CLIP = 6,
- VT_PROJ_CLIP = 8,
- VT_TIME_MAJOR = 10
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
- float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
- bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
- VerifyField<float>(verifier, VT_CELL_CLIP) &&
- VerifyField<float>(verifier, VT_PROJ_CLIP) &&
- VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) && verifier.EndTable();
- }
-};
-
-struct UnidirectionalSequenceLSTMOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- void add_cell_clip(float cell_clip)
- {
- fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
- }
- void add_proj_clip(float proj_clip)
- {
- fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
- }
- void add_time_major(bool time_major)
- {
- fbb_.AddElement<uint8_t>(UnidirectionalSequenceLSTMOptions::VT_TIME_MAJOR,
- static_cast<uint8_t>(time_major), 0);
- }
- explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- UnidirectionalSequenceLSTMOptionsBuilder &
- operator=(const UnidirectionalSequenceLSTMOptionsBuilder &);
- flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>
-CreateUnidirectionalSequenceLSTMOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false)
-{
- UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
- builder_.add_proj_clip(proj_clip);
- builder_.add_cell_clip(cell_clip);
- builder_.add_time_major(time_major);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4,
- VT_CELL_CLIP = 6,
- VT_PROJ_CLIP = 8,
- VT_MERGE_OUTPUTS = 10
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
- float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
- bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
- VerifyField<float>(verifier, VT_CELL_CLIP) &&
- VerifyField<float>(verifier, VT_PROJ_CLIP) &&
- VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) && verifier.EndTable();
- }
-};
-
-struct BidirectionalSequenceLSTMOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- void add_cell_clip(float cell_clip)
- {
- fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
- }
- void add_proj_clip(float proj_clip)
- {
- fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
- }
- void add_merge_outputs(bool merge_outputs)
- {
- fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_MERGE_OUTPUTS,
- static_cast<uint8_t>(merge_outputs), 0);
- }
- explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- BidirectionalSequenceLSTMOptionsBuilder &
- operator=(const BidirectionalSequenceLSTMOptionsBuilder &);
- flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<BidirectionalSequenceLSTMOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false)
-{
- BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
- builder_.add_proj_clip(proj_clip);
- builder_.add_cell_clip(cell_clip);
- builder_.add_merge_outputs(merge_outputs);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_ALIGN_CORNERS = 8
- };
- bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) &&
- verifier.EndTable();
- }
-};
-
-struct ResizeBilinearOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_align_corners(bool align_corners)
- {
- fbb_.AddElement<uint8_t>(ResizeBilinearOptions::VT_ALIGN_CORNERS,
- static_cast<uint8_t>(align_corners), 0);
- }
- explicit ResizeBilinearOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ResizeBilinearOptionsBuilder &operator=(const ResizeBilinearOptionsBuilder &);
- flatbuffers::Offset<ResizeBilinearOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ResizeBilinearOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ResizeBilinearOptions>
-CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false)
-{
- ResizeBilinearOptionsBuilder builder_(_fbb);
- builder_.add_align_corners(align_corners);
- return builder_.Finish();
-}
-
-struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_ALIGN_CORNERS = 4
- };
- bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) &&
- verifier.EndTable();
- }
-};
-
-struct ResizeNearestNeighborOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_align_corners(bool align_corners)
- {
- fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS,
- static_cast<uint8_t>(align_corners), 0);
- }
- explicit ResizeNearestNeighborOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ResizeNearestNeighborOptionsBuilder &operator=(const ResizeNearestNeighborOptionsBuilder &);
- flatbuffers::Offset<ResizeNearestNeighborOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ResizeNearestNeighborOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ResizeNearestNeighborOptions>
-CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false)
-{
- ResizeNearestNeighborOptionsBuilder builder_(_fbb);
- builder_.add_align_corners(align_corners);
- return builder_.Finish();
-}
-
-struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_SUBGRAPH = 4
- };
- uint32_t subgraph() const { return GetField<uint32_t>(VT_SUBGRAPH, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_SUBGRAPH) &&
- verifier.EndTable();
- }
-};
-
-struct CallOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_subgraph(uint32_t subgraph)
- {
- fbb_.AddElement<uint32_t>(CallOptions::VT_SUBGRAPH, subgraph, 0);
- }
- explicit CallOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- CallOptionsBuilder &operator=(const CallOptionsBuilder &);
- flatbuffers::Offset<CallOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<CallOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<CallOptions> CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb,
- uint32_t subgraph = 0)
-{
- CallOptionsBuilder builder_(_fbb);
- builder_.add_subgraph(subgraph);
- return builder_.Finish();
-}
-
-struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct PadOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit PadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- PadOptionsBuilder &operator=(const PadOptionsBuilder &);
- flatbuffers::Offset<PadOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<PadOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<PadOptions> CreatePadOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- PadOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct PadV2OptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit PadV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- PadV2OptionsBuilder &operator=(const PadV2OptionsBuilder &);
- flatbuffers::Offset<PadV2Options> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<PadV2Options>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<PadV2Options> CreatePadV2Options(flatbuffers::FlatBufferBuilder &_fbb)
-{
- PadV2OptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_NEW_SHAPE = 4
- };
- const flatbuffers::Vector<int32_t> *new_shape() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NEW_SHAPE);
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NEW_SHAPE) &&
- verifier.VerifyVector(new_shape()) && verifier.EndTable();
- }
-};
-
-struct ReshapeOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_new_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape)
- {
- fbb_.AddOffset(ReshapeOptions::VT_NEW_SHAPE, new_shape);
- }
- explicit ReshapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ReshapeOptionsBuilder &operator=(const ReshapeOptionsBuilder &);
- flatbuffers::Offset<ReshapeOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ReshapeOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ReshapeOptions>
-CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape = 0)
-{
- ReshapeOptionsBuilder builder_(_fbb);
- builder_.add_new_shape(new_shape);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<ReshapeOptions>
-CreateReshapeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
- const std::vector<int32_t> *new_shape = nullptr)
-{
- return neurun_tflite::CreateReshapeOptions(
- _fbb, new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0);
-}
-
-struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct SpaceToBatchNDOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit SpaceToBatchNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SpaceToBatchNDOptionsBuilder &operator=(const SpaceToBatchNDOptionsBuilder &);
- flatbuffers::Offset<SpaceToBatchNDOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SpaceToBatchNDOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SpaceToBatchNDOptions>
-CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- SpaceToBatchNDOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct BatchToSpaceNDOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit BatchToSpaceNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- BatchToSpaceNDOptionsBuilder &operator=(const BatchToSpaceNDOptionsBuilder &);
- flatbuffers::Offset<BatchToSpaceNDOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<BatchToSpaceNDOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<BatchToSpaceNDOptions>
-CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- BatchToSpaceNDOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_NGRAM_SIZE = 4,
- VT_MAX_SKIP_SIZE = 6,
- VT_INCLUDE_ALL_NGRAMS = 8
- };
- int32_t ngram_size() const { return GetField<int32_t>(VT_NGRAM_SIZE, 0); }
- int32_t max_skip_size() const { return GetField<int32_t>(VT_MAX_SKIP_SIZE, 0); }
- bool include_all_ngrams() const { return GetField<uint8_t>(VT_INCLUDE_ALL_NGRAMS, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NGRAM_SIZE) &&
- VerifyField<int32_t>(verifier, VT_MAX_SKIP_SIZE) &&
- VerifyField<uint8_t>(verifier, VT_INCLUDE_ALL_NGRAMS) && verifier.EndTable();
- }
-};
-
-struct SkipGramOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_ngram_size(int32_t ngram_size)
- {
- fbb_.AddElement<int32_t>(SkipGramOptions::VT_NGRAM_SIZE, ngram_size, 0);
- }
- void add_max_skip_size(int32_t max_skip_size)
- {
- fbb_.AddElement<int32_t>(SkipGramOptions::VT_MAX_SKIP_SIZE, max_skip_size, 0);
- }
- void add_include_all_ngrams(bool include_all_ngrams)
- {
- fbb_.AddElement<uint8_t>(SkipGramOptions::VT_INCLUDE_ALL_NGRAMS,
- static_cast<uint8_t>(include_all_ngrams), 0);
- }
- explicit SkipGramOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &);
- flatbuffers::Offset<SkipGramOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SkipGramOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SkipGramOptions>
-CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t ngram_size = 0,
- int32_t max_skip_size = 0, bool include_all_ngrams = false)
-{
- SkipGramOptionsBuilder builder_(_fbb);
- builder_.add_max_skip_size(max_skip_size);
- builder_.add_ngram_size(ngram_size);
- builder_.add_include_all_ngrams(include_all_ngrams);
- return builder_.Finish();
-}
-
-struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_BLOCK_SIZE = 4
- };
- int32_t block_size() const { return GetField<int32_t>(VT_BLOCK_SIZE, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_BLOCK_SIZE) &&
- verifier.EndTable();
- }
-};
-
-struct SpaceToDepthOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_block_size(int32_t block_size)
- {
- fbb_.AddElement<int32_t>(SpaceToDepthOptions::VT_BLOCK_SIZE, block_size, 0);
- }
- explicit SpaceToDepthOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SpaceToDepthOptionsBuilder &operator=(const SpaceToDepthOptionsBuilder &);
- flatbuffers::Offset<SpaceToDepthOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SpaceToDepthOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SpaceToDepthOptions>
-CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_size = 0)
-{
- SpaceToDepthOptionsBuilder builder_(_fbb);
- builder_.add_block_size(block_size);
- return builder_.Finish();
-}
-
-struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct SubOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(SubOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SubOptionsBuilder &operator=(const SubOptionsBuilder &);
- flatbuffers::Offset<SubOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SubOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SubOptions>
-CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- SubOptionsBuilder builder_(_fbb);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_FUSED_ACTIVATION_FUNCTION = 4
- };
- ActivationFunctionType fused_activation_function() const
- {
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
- }
-};
-
-struct DivOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
- {
- fbb_.AddElement<int8_t>(DivOptions::VT_FUSED_ACTIVATION_FUNCTION,
- static_cast<int8_t>(fused_activation_function), 0);
- }
- explicit DivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- DivOptionsBuilder &operator=(const DivOptionsBuilder &);
- flatbuffers::Offset<DivOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<DivOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<DivOptions>
-CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
-{
- DivOptionsBuilder builder_(_fbb);
- builder_.add_fused_activation_function(fused_activation_function);
- return builder_.Finish();
-}
-
-struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct TopKV2OptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit TopKV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- TopKV2OptionsBuilder &operator=(const TopKV2OptionsBuilder &);
- flatbuffers::Offset<TopKV2Options> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<TopKV2Options>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<TopKV2Options> CreateTopKV2Options(flatbuffers::FlatBufferBuilder &_fbb)
-{
- TopKV2OptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_COMBINER = 4
- };
- CombinerType combiner() const
- {
- return static_cast<CombinerType>(GetField<int8_t>(VT_COMBINER, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_COMBINER) &&
- verifier.EndTable();
- }
-};
-
-struct EmbeddingLookupSparseOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_combiner(CombinerType combiner)
- {
- fbb_.AddElement<int8_t>(EmbeddingLookupSparseOptions::VT_COMBINER,
- static_cast<int8_t>(combiner), 0);
- }
- explicit EmbeddingLookupSparseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- EmbeddingLookupSparseOptionsBuilder &operator=(const EmbeddingLookupSparseOptionsBuilder &);
- flatbuffers::Offset<EmbeddingLookupSparseOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<EmbeddingLookupSparseOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<EmbeddingLookupSparseOptions>
-CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb,
- CombinerType combiner = CombinerType_SUM)
-{
- EmbeddingLookupSparseOptionsBuilder builder_(_fbb);
- builder_.add_combiner(combiner);
- return builder_.Finish();
-}
-
-struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_AXIS = 4
- };
- int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
- verifier.EndTable();
- }
-};
-
-struct GatherOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(GatherOptions::VT_AXIS, axis, 0); }
- explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- GatherOptionsBuilder &operator=(const GatherOptionsBuilder &);
- flatbuffers::Offset<GatherOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<GatherOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb,
- int32_t axis = 0)
-{
- GatherOptionsBuilder builder_(_fbb);
- builder_.add_axis(axis);
- return builder_.Finish();
-}
-
-struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct TransposeOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit TransposeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- TransposeOptionsBuilder &operator=(const TransposeOptionsBuilder &);
- flatbuffers::Offset<TransposeOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<TransposeOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<TransposeOptions>
-CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- TransposeOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct ExpOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit ExpOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ExpOptionsBuilder &operator=(const ExpOptionsBuilder &);
- flatbuffers::Offset<ExpOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ExpOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ExpOptions> CreateExpOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- ExpOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_KEEP_DIMS = 4
- };
- bool keep_dims() const { return GetField<uint8_t>(VT_KEEP_DIMS, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_KEEP_DIMS) &&
- verifier.EndTable();
- }
-};
-
-struct ReducerOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_keep_dims(bool keep_dims)
- {
- fbb_.AddElement<uint8_t>(ReducerOptions::VT_KEEP_DIMS, static_cast<uint8_t>(keep_dims), 0);
- }
- explicit ReducerOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ReducerOptionsBuilder &operator=(const ReducerOptionsBuilder &);
- flatbuffers::Offset<ReducerOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ReducerOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ReducerOptions>
-CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, bool keep_dims = false)
-{
- ReducerOptionsBuilder builder_(_fbb);
- builder_.add_keep_dims(keep_dims);
- return builder_.Finish();
-}
-
-struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_SQUEEZE_DIMS = 4
- };
- const flatbuffers::Vector<int32_t> *squeeze_dims() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SQUEEZE_DIMS);
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SQUEEZE_DIMS) &&
- verifier.VerifyVector(squeeze_dims()) && verifier.EndTable();
- }
-};
-
-struct SqueezeOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_squeeze_dims(flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims)
- {
- fbb_.AddOffset(SqueezeOptions::VT_SQUEEZE_DIMS, squeeze_dims);
- }
- explicit SqueezeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SqueezeOptionsBuilder &operator=(const SqueezeOptionsBuilder &);
- flatbuffers::Offset<SqueezeOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SqueezeOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SqueezeOptions>
-CreateSqueezeOptions(flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims = 0)
-{
- SqueezeOptionsBuilder builder_(_fbb);
- builder_.add_squeeze_dims(squeeze_dims);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<SqueezeOptions>
-CreateSqueezeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
- const std::vector<int32_t> *squeeze_dims = nullptr)
-{
- return neurun_tflite::CreateSqueezeOptions(
- _fbb, squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0);
-}
-
-struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_NUM_SPLITS = 4
- };
- int32_t num_splits() const { return GetField<int32_t>(VT_NUM_SPLITS, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_SPLITS) &&
- verifier.EndTable();
- }
-};
-
-struct SplitOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_num_splits(int32_t num_splits)
- {
- fbb_.AddElement<int32_t>(SplitOptions::VT_NUM_SPLITS, num_splits, 0);
- }
- explicit SplitOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SplitOptionsBuilder &operator=(const SplitOptionsBuilder &);
- flatbuffers::Offset<SplitOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SplitOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SplitOptions> CreateSplitOptions(flatbuffers::FlatBufferBuilder &_fbb,
- int32_t num_splits = 0)
-{
- SplitOptionsBuilder builder_(_fbb);
- builder_.add_num_splits(num_splits);
- return builder_.Finish();
-}
-
-struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_NUM_SPLITS = 4
- };
- int32_t num_splits() const { return GetField<int32_t>(VT_NUM_SPLITS, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_SPLITS) &&
- verifier.EndTable();
- }
-};
-
-struct SplitVOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_num_splits(int32_t num_splits)
- {
- fbb_.AddElement<int32_t>(SplitVOptions::VT_NUM_SPLITS, num_splits, 0);
- }
- explicit SplitVOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SplitVOptionsBuilder &operator=(const SplitVOptionsBuilder &);
- flatbuffers::Offset<SplitVOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SplitVOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SplitVOptions> CreateSplitVOptions(flatbuffers::FlatBufferBuilder &_fbb,
- int32_t num_splits = 0)
-{
- SplitVOptionsBuilder builder_(_fbb);
- builder_.add_num_splits(num_splits);
- return builder_.Finish();
-}
-
-struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_BEGIN_MASK = 4,
- VT_END_MASK = 6,
- VT_ELLIPSIS_MASK = 8,
- VT_NEW_AXIS_MASK = 10,
- VT_SHRINK_AXIS_MASK = 12
- };
- int32_t begin_mask() const { return GetField<int32_t>(VT_BEGIN_MASK, 0); }
- int32_t end_mask() const { return GetField<int32_t>(VT_END_MASK, 0); }
- int32_t ellipsis_mask() const { return GetField<int32_t>(VT_ELLIPSIS_MASK, 0); }
- int32_t new_axis_mask() const { return GetField<int32_t>(VT_NEW_AXIS_MASK, 0); }
- int32_t shrink_axis_mask() const { return GetField<int32_t>(VT_SHRINK_AXIS_MASK, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_BEGIN_MASK) &&
- VerifyField<int32_t>(verifier, VT_END_MASK) &&
- VerifyField<int32_t>(verifier, VT_ELLIPSIS_MASK) &&
- VerifyField<int32_t>(verifier, VT_NEW_AXIS_MASK) &&
- VerifyField<int32_t>(verifier, VT_SHRINK_AXIS_MASK) && verifier.EndTable();
- }
-};
-
-struct StridedSliceOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_begin_mask(int32_t begin_mask)
- {
- fbb_.AddElement<int32_t>(StridedSliceOptions::VT_BEGIN_MASK, begin_mask, 0);
- }
- void add_end_mask(int32_t end_mask)
- {
- fbb_.AddElement<int32_t>(StridedSliceOptions::VT_END_MASK, end_mask, 0);
- }
- void add_ellipsis_mask(int32_t ellipsis_mask)
- {
- fbb_.AddElement<int32_t>(StridedSliceOptions::VT_ELLIPSIS_MASK, ellipsis_mask, 0);
- }
- void add_new_axis_mask(int32_t new_axis_mask)
- {
- fbb_.AddElement<int32_t>(StridedSliceOptions::VT_NEW_AXIS_MASK, new_axis_mask, 0);
- }
- void add_shrink_axis_mask(int32_t shrink_axis_mask)
- {
- fbb_.AddElement<int32_t>(StridedSliceOptions::VT_SHRINK_AXIS_MASK, shrink_axis_mask, 0);
- }
- explicit StridedSliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- StridedSliceOptionsBuilder &operator=(const StridedSliceOptionsBuilder &);
- flatbuffers::Offset<StridedSliceOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<StridedSliceOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<StridedSliceOptions>
-CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t begin_mask = 0,
- int32_t end_mask = 0, int32_t ellipsis_mask = 0,
- int32_t new_axis_mask = 0, int32_t shrink_axis_mask = 0)
-{
- StridedSliceOptionsBuilder builder_(_fbb);
- builder_.add_shrink_axis_mask(shrink_axis_mask);
- builder_.add_new_axis_mask(new_axis_mask);
- builder_.add_ellipsis_mask(ellipsis_mask);
- builder_.add_end_mask(end_mask);
- builder_.add_begin_mask(begin_mask);
- return builder_.Finish();
-}
-
-struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct LogSoftmaxOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit LogSoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LogSoftmaxOptionsBuilder &operator=(const LogSoftmaxOptionsBuilder &);
- flatbuffers::Offset<LogSoftmaxOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LogSoftmaxOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LogSoftmaxOptions>
-CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- LogSoftmaxOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_IN_DATA_TYPE = 4,
- VT_OUT_DATA_TYPE = 6
- };
- TensorType in_data_type() const
- {
- return static_cast<TensorType>(GetField<int8_t>(VT_IN_DATA_TYPE, 0));
- }
- TensorType out_data_type() const
- {
- return static_cast<TensorType>(GetField<int8_t>(VT_OUT_DATA_TYPE, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_IN_DATA_TYPE) &&
- VerifyField<int8_t>(verifier, VT_OUT_DATA_TYPE) && verifier.EndTable();
- }
-};
-
-struct CastOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_in_data_type(TensorType in_data_type)
- {
- fbb_.AddElement<int8_t>(CastOptions::VT_IN_DATA_TYPE, static_cast<int8_t>(in_data_type), 0);
- }
- void add_out_data_type(TensorType out_data_type)
- {
- fbb_.AddElement<int8_t>(CastOptions::VT_OUT_DATA_TYPE, static_cast<int8_t>(out_data_type), 0);
- }
- explicit CastOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- CastOptionsBuilder &operator=(const CastOptionsBuilder &);
- flatbuffers::Offset<CastOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<CastOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<CastOptions>
-CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb,
- TensorType in_data_type = TensorType_FLOAT32,
- TensorType out_data_type = TensorType_FLOAT32)
-{
- CastOptionsBuilder builder_(_fbb);
- builder_.add_out_data_type(out_data_type);
- builder_.add_in_data_type(in_data_type);
- return builder_.Finish();
-}
-
-struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct DequantizeOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit DequantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- DequantizeOptionsBuilder &operator=(const DequantizeOptionsBuilder &);
- flatbuffers::Offset<DequantizeOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<DequantizeOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<DequantizeOptions>
-CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- DequantizeOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct MaximumMinimumOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit MaximumMinimumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- MaximumMinimumOptionsBuilder &operator=(const MaximumMinimumOptionsBuilder &);
- flatbuffers::Offset<MaximumMinimumOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<MaximumMinimumOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<MaximumMinimumOptions>
-CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- MaximumMinimumOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct TileOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit TileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- TileOptionsBuilder &operator=(const TileOptionsBuilder &);
- flatbuffers::Offset<TileOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<TileOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<TileOptions> CreateTileOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- TileOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_OUTPUT_TYPE = 4
- };
- TensorType output_type() const
- {
- return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUTPUT_TYPE) &&
- verifier.EndTable();
- }
-};
-
-struct ArgMaxOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_output_type(TensorType output_type)
- {
- fbb_.AddElement<int8_t>(ArgMaxOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
- }
- explicit ArgMaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ArgMaxOptionsBuilder &operator=(const ArgMaxOptionsBuilder &);
- flatbuffers::Offset<ArgMaxOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ArgMaxOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ArgMaxOptions>
-CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb,
- TensorType output_type = TensorType_FLOAT32)
-{
- ArgMaxOptionsBuilder builder_(_fbb);
- builder_.add_output_type(output_type);
- return builder_.Finish();
-}
-
-struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_OUTPUT_TYPE = 4
- };
- TensorType output_type() const
- {
- return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUTPUT_TYPE) &&
- verifier.EndTable();
- }
-};
-
-struct ArgMinOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_output_type(TensorType output_type)
- {
- fbb_.AddElement<int8_t>(ArgMinOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
- }
- explicit ArgMinOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ArgMinOptionsBuilder &operator=(const ArgMinOptionsBuilder &);
- flatbuffers::Offset<ArgMinOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ArgMinOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ArgMinOptions>
-CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb,
- TensorType output_type = TensorType_FLOAT32)
-{
- ArgMinOptionsBuilder builder_(_fbb);
- builder_.add_output_type(output_type);
- return builder_.Finish();
-}
-
-struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct GreaterOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit GreaterOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- GreaterOptionsBuilder &operator=(const GreaterOptionsBuilder &);
- flatbuffers::Offset<GreaterOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<GreaterOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<GreaterOptions>
-CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- GreaterOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct GreaterEqualOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit GreaterEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- GreaterEqualOptionsBuilder &operator=(const GreaterEqualOptionsBuilder &);
- flatbuffers::Offset<GreaterEqualOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<GreaterEqualOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<GreaterEqualOptions>
-CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- GreaterEqualOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct LessOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit LessOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LessOptionsBuilder &operator=(const LessOptionsBuilder &);
- flatbuffers::Offset<LessOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LessOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LessOptions> CreateLessOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- LessOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct LessEqualOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit LessEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LessEqualOptionsBuilder &operator=(const LessEqualOptionsBuilder &);
- flatbuffers::Offset<LessEqualOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LessEqualOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LessEqualOptions>
-CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- LessEqualOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct NegOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit NegOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- NegOptionsBuilder &operator=(const NegOptionsBuilder &);
- flatbuffers::Offset<NegOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<NegOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<NegOptions> CreateNegOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- NegOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct SelectOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit SelectOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SelectOptionsBuilder &operator=(const SelectOptionsBuilder &);
- flatbuffers::Offset<SelectOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SelectOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SelectOptions> CreateSelectOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- SelectOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct SliceOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit SliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SliceOptionsBuilder &operator=(const SliceOptionsBuilder &);
- flatbuffers::Offset<SliceOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SliceOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SliceOptions> CreateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- SliceOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_PADDING = 4,
- VT_STRIDE_W = 6,
- VT_STRIDE_H = 8
- };
- Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
- int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
- int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
- VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
- VerifyField<int32_t>(verifier, VT_STRIDE_H) && verifier.EndTable();
- }
-};
-
-struct TransposeConvOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_padding(Padding padding)
- {
- fbb_.AddElement<int8_t>(TransposeConvOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
- }
- void add_stride_w(int32_t stride_w)
- {
- fbb_.AddElement<int32_t>(TransposeConvOptions::VT_STRIDE_W, stride_w, 0);
- }
- void add_stride_h(int32_t stride_h)
- {
- fbb_.AddElement<int32_t>(TransposeConvOptions::VT_STRIDE_H, stride_h, 0);
- }
- explicit TransposeConvOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- TransposeConvOptionsBuilder &operator=(const TransposeConvOptionsBuilder &);
- flatbuffers::Offset<TransposeConvOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<TransposeConvOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<TransposeConvOptions>
-CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
- int32_t stride_w = 0, int32_t stride_h = 0)
-{
- TransposeConvOptionsBuilder builder_(_fbb);
- builder_.add_stride_h(stride_h);
- builder_.add_stride_w(stride_w);
- builder_.add_padding(padding);
- return builder_.Finish();
-}
-
-struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct ExpandDimsOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit ExpandDimsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ExpandDimsOptionsBuilder &operator=(const ExpandDimsOptionsBuilder &);
- flatbuffers::Offset<ExpandDimsOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ExpandDimsOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ExpandDimsOptions>
-CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- ExpandDimsOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_VALIDATE_INDICES = 4
- };
- bool validate_indices() const { return GetField<uint8_t>(VT_VALIDATE_INDICES, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_VALIDATE_INDICES) &&
- verifier.EndTable();
- }
-};
-
-struct SparseToDenseOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_validate_indices(bool validate_indices)
- {
- fbb_.AddElement<uint8_t>(SparseToDenseOptions::VT_VALIDATE_INDICES,
- static_cast<uint8_t>(validate_indices), 0);
- }
- explicit SparseToDenseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SparseToDenseOptionsBuilder &operator=(const SparseToDenseOptionsBuilder &);
- flatbuffers::Offset<SparseToDenseOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SparseToDenseOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SparseToDenseOptions>
-CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, bool validate_indices = false)
-{
- SparseToDenseOptionsBuilder builder_(_fbb);
- builder_.add_validate_indices(validate_indices);
- return builder_.Finish();
-}
-
-struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct EqualOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit EqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- EqualOptionsBuilder &operator=(const EqualOptionsBuilder &);
- flatbuffers::Offset<EqualOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<EqualOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<EqualOptions> CreateEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- EqualOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct NotEqualOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit NotEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- NotEqualOptionsBuilder &operator=(const NotEqualOptionsBuilder &);
- flatbuffers::Offset<NotEqualOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<NotEqualOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<NotEqualOptions>
-CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- NotEqualOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_OUT_TYPE = 4
- };
- TensorType out_type() const { return static_cast<TensorType>(GetField<int8_t>(VT_OUT_TYPE, 0)); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUT_TYPE) &&
- verifier.EndTable();
- }
-};
-
-struct ShapeOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_out_type(TensorType out_type)
- {
- fbb_.AddElement<int8_t>(ShapeOptions::VT_OUT_TYPE, static_cast<int8_t>(out_type), 0);
- }
- explicit ShapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ShapeOptionsBuilder &operator=(const ShapeOptionsBuilder &);
- flatbuffers::Offset<ShapeOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ShapeOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ShapeOptions>
-CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, TensorType out_type = TensorType_FLOAT32)
-{
- ShapeOptionsBuilder builder_(_fbb);
- builder_.add_out_type(out_type);
- return builder_.Finish();
-}
-
-struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct PowOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit PowOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- PowOptionsBuilder &operator=(const PowOptionsBuilder &);
- flatbuffers::Offset<PowOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<PowOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<PowOptions> CreatePowOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- PowOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_MIN = 4,
- VT_MAX = 6,
- VT_NUM_BITS = 8,
- VT_NARROW_RANGE = 10
- };
- float min() const { return GetField<float>(VT_MIN, 0.0f); }
- float max() const { return GetField<float>(VT_MAX, 0.0f); }
- int32_t num_bits() const { return GetField<int32_t>(VT_NUM_BITS, 0); }
- bool narrow_range() const { return GetField<uint8_t>(VT_NARROW_RANGE, 0) != 0; }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_MIN) &&
- VerifyField<float>(verifier, VT_MAX) && VerifyField<int32_t>(verifier, VT_NUM_BITS) &&
- VerifyField<uint8_t>(verifier, VT_NARROW_RANGE) && verifier.EndTable();
- }
-};
-
-struct FakeQuantOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_min(float min) { fbb_.AddElement<float>(FakeQuantOptions::VT_MIN, min, 0.0f); }
- void add_max(float max) { fbb_.AddElement<float>(FakeQuantOptions::VT_MAX, max, 0.0f); }
- void add_num_bits(int32_t num_bits)
- {
- fbb_.AddElement<int32_t>(FakeQuantOptions::VT_NUM_BITS, num_bits, 0);
- }
- void add_narrow_range(bool narrow_range)
- {
- fbb_.AddElement<uint8_t>(FakeQuantOptions::VT_NARROW_RANGE, static_cast<uint8_t>(narrow_range),
- 0);
- }
- explicit FakeQuantOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- FakeQuantOptionsBuilder &operator=(const FakeQuantOptionsBuilder &);
- flatbuffers::Offset<FakeQuantOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<FakeQuantOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<FakeQuantOptions>
-CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, float min = 0.0f, float max = 0.0f,
- int32_t num_bits = 0, bool narrow_range = false)
-{
- FakeQuantOptionsBuilder builder_(_fbb);
- builder_.add_num_bits(num_bits);
- builder_.add_max(max);
- builder_.add_min(min);
- builder_.add_narrow_range(narrow_range);
- return builder_.Finish();
-}
-
-struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_VALUES_COUNT = 4,
- VT_AXIS = 6
- };
- int32_t values_count() const { return GetField<int32_t>(VT_VALUES_COUNT, 0); }
- int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_VALUES_COUNT) &&
- VerifyField<int32_t>(verifier, VT_AXIS) && verifier.EndTable();
- }
-};
-
-struct PackOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_values_count(int32_t values_count)
- {
- fbb_.AddElement<int32_t>(PackOptions::VT_VALUES_COUNT, values_count, 0);
- }
- void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(PackOptions::VT_AXIS, axis, 0); }
- explicit PackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- PackOptionsBuilder &operator=(const PackOptionsBuilder &);
- flatbuffers::Offset<PackOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<PackOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<PackOptions>
-CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t values_count = 0, int32_t axis = 0)
-{
- PackOptionsBuilder builder_(_fbb);
- builder_.add_axis(axis);
- builder_.add_values_count(values_count);
- return builder_.Finish();
-}
-
-struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct LogicalOrOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit LogicalOrOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LogicalOrOptionsBuilder &operator=(const LogicalOrOptionsBuilder &);
- flatbuffers::Offset<LogicalOrOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LogicalOrOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LogicalOrOptions>
-CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- LogicalOrOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_AXIS = 4
- };
- int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
- verifier.EndTable();
- }
-};
-
-struct OneHotOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(OneHotOptions::VT_AXIS, axis, 0); }
- explicit OneHotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- OneHotOptionsBuilder &operator=(const OneHotOptionsBuilder &);
- flatbuffers::Offset<OneHotOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<OneHotOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<OneHotOptions> CreateOneHotOptions(flatbuffers::FlatBufferBuilder &_fbb,
- int32_t axis = 0)
-{
- OneHotOptionsBuilder builder_(_fbb);
- builder_.add_axis(axis);
- return builder_.Finish();
-}
-
-struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct AbsOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit AbsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- AbsOptionsBuilder &operator=(const AbsOptionsBuilder &);
- flatbuffers::Offset<AbsOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<AbsOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<AbsOptions> CreateAbsOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- AbsOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct LogicalAndOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit LogicalAndOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LogicalAndOptionsBuilder &operator=(const LogicalAndOptionsBuilder &);
- flatbuffers::Offset<LogicalAndOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LogicalAndOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LogicalAndOptions>
-CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- LogicalAndOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct LogicalNotOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit LogicalNotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LogicalNotOptionsBuilder &operator=(const LogicalNotOptionsBuilder &);
- flatbuffers::Offset<LogicalNotOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LogicalNotOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LogicalNotOptions>
-CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- LogicalNotOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_NUM = 4,
- VT_AXIS = 6
- };
- int32_t num() const { return GetField<int32_t>(VT_NUM, 0); }
- int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM) &&
- VerifyField<int32_t>(verifier, VT_AXIS) && verifier.EndTable();
- }
-};
-
-struct UnpackOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_num(int32_t num) { fbb_.AddElement<int32_t>(UnpackOptions::VT_NUM, num, 0); }
- void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(UnpackOptions::VT_AXIS, axis, 0); }
- explicit UnpackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- UnpackOptionsBuilder &operator=(const UnpackOptionsBuilder &);
- flatbuffers::Offset<UnpackOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<UnpackOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<UnpackOptions> CreateUnpackOptions(flatbuffers::FlatBufferBuilder &_fbb,
- int32_t num = 0, int32_t axis = 0)
-{
- UnpackOptionsBuilder builder_(_fbb);
- builder_.add_axis(axis);
- builder_.add_num(num);
- return builder_.Finish();
-}
-
-struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct FloorDivOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit FloorDivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- FloorDivOptionsBuilder &operator=(const FloorDivOptionsBuilder &);
- flatbuffers::Offset<FloorDivOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<FloorDivOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<FloorDivOptions>
-CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- FloorDivOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct SquareOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit SquareOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SquareOptionsBuilder &operator=(const SquareOptionsBuilder &);
- flatbuffers::Offset<SquareOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SquareOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SquareOptions> CreateSquareOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- SquareOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct ZerosLikeOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit ZerosLikeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ZerosLikeOptionsBuilder &operator=(const ZerosLikeOptionsBuilder &);
- flatbuffers::Offset<ZerosLikeOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<ZerosLikeOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<ZerosLikeOptions>
-CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- ZerosLikeOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct FillOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit FillOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- FillOptionsBuilder &operator=(const FillOptionsBuilder &);
- flatbuffers::Offset<FillOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<FillOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- FillOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct FloorModOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit FloorModOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- FloorModOptionsBuilder &operator=(const FloorModOptionsBuilder &);
- flatbuffers::Offset<FloorModOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<FloorModOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<FloorModOptions>
-CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- FloorModOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct RangeOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit RangeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- RangeOptionsBuilder &operator=(const RangeOptionsBuilder &);
- flatbuffers::Offset<RangeOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<RangeOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<RangeOptions> CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- RangeOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_ALPHA = 4
- };
- float alpha() const { return GetField<float>(VT_ALPHA, 0.0f); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_ALPHA) &&
- verifier.EndTable();
- }
-};
-
-struct LeakyReluOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_alpha(float alpha) { fbb_.AddElement<float>(LeakyReluOptions::VT_ALPHA, alpha, 0.0f); }
- explicit LeakyReluOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- LeakyReluOptionsBuilder &operator=(const LeakyReluOptionsBuilder &);
- flatbuffers::Offset<LeakyReluOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<LeakyReluOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<LeakyReluOptions>
-CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, float alpha = 0.0f)
-{
- LeakyReluOptionsBuilder builder_(_fbb);
- builder_.add_alpha(alpha);
- return builder_.Finish();
-}
-
-struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && verifier.EndTable();
- }
-};
-
-struct SquaredDifferenceOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- explicit SquaredDifferenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SquaredDifferenceOptionsBuilder &operator=(const SquaredDifferenceOptionsBuilder &);
- flatbuffers::Offset<SquaredDifferenceOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SquaredDifferenceOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SquaredDifferenceOptions>
-CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb)
-{
- SquaredDifferenceOptionsBuilder builder_(_fbb);
- return builder_.Finish();
-}
-
-struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_MODE = 4
- };
- MirrorPadMode mode() const { return static_cast<MirrorPadMode>(GetField<int8_t>(VT_MODE, 0)); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_MODE) &&
- verifier.EndTable();
- }
-};
-
-struct MirrorPadOptionsBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_mode(MirrorPadMode mode)
- {
- fbb_.AddElement<int8_t>(MirrorPadOptions::VT_MODE, static_cast<int8_t>(mode), 0);
- }
- explicit MirrorPadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- MirrorPadOptionsBuilder &operator=(const MirrorPadOptionsBuilder &);
- flatbuffers::Offset<MirrorPadOptions> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<MirrorPadOptions>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<MirrorPadOptions>
-CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb,
- MirrorPadMode mode = MirrorPadMode_REFLECT)
-{
- MirrorPadOptionsBuilder builder_(_fbb);
- builder_.add_mode(mode);
- return builder_.Finish();
-}
-
-struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_BUILTIN_CODE = 4,
- VT_CUSTOM_CODE = 6,
- VT_VERSION = 8
- };
- BuiltinOperator builtin_code() const
- {
- return static_cast<BuiltinOperator>(GetField<int8_t>(VT_BUILTIN_CODE, 0));
- }
- const flatbuffers::String *custom_code() const
- {
- return GetPointer<const flatbuffers::String *>(VT_CUSTOM_CODE);
- }
- int32_t version() const { return GetField<int32_t>(VT_VERSION, 1); }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_BUILTIN_CODE) &&
- VerifyOffset(verifier, VT_CUSTOM_CODE) && verifier.VerifyString(custom_code()) &&
- VerifyField<int32_t>(verifier, VT_VERSION) && verifier.EndTable();
- }
-};
-
-struct OperatorCodeBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_builtin_code(BuiltinOperator builtin_code)
- {
- fbb_.AddElement<int8_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<int8_t>(builtin_code), 0);
- }
- void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code)
- {
- fbb_.AddOffset(OperatorCode::VT_CUSTOM_CODE, custom_code);
- }
- void add_version(int32_t version)
- {
- fbb_.AddElement<int32_t>(OperatorCode::VT_VERSION, version, 1);
- }
- explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- OperatorCodeBuilder &operator=(const OperatorCodeBuilder &);
- flatbuffers::Offset<OperatorCode> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<OperatorCode>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<OperatorCode>
-CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb,
- BuiltinOperator builtin_code = BuiltinOperator_ADD,
- flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1)
-{
- OperatorCodeBuilder builder_(_fbb);
- builder_.add_version(version);
- builder_.add_custom_code(custom_code);
- builder_.add_builtin_code(builtin_code);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<OperatorCode>
-CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb,
- BuiltinOperator builtin_code = BuiltinOperator_ADD,
- const char *custom_code = nullptr, int32_t version = 1)
-{
- return neurun_tflite::CreateOperatorCode(
- _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0, version);
-}
-
-struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_OPCODE_INDEX = 4,
- VT_INPUTS = 6,
- VT_OUTPUTS = 8,
- VT_BUILTIN_OPTIONS_TYPE = 10,
- VT_BUILTIN_OPTIONS = 12,
- VT_CUSTOM_OPTIONS = 14,
- VT_CUSTOM_OPTIONS_FORMAT = 16,
- VT_MUTATING_VARIABLE_INPUTS = 18
- };
- uint32_t opcode_index() const { return GetField<uint32_t>(VT_OPCODE_INDEX, 0); }
- const flatbuffers::Vector<int32_t> *inputs() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS);
- }
- const flatbuffers::Vector<int32_t> *outputs() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
- }
- BuiltinOptions builtin_options_type() const
- {
- return static_cast<BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0));
- }
- const void *builtin_options() const { return GetPointer<const void *>(VT_BUILTIN_OPTIONS); }
- template <typename T> const T *builtin_options_as() const;
- const Conv2DOptions *builtin_options_as_Conv2DOptions() const
- {
- return builtin_options_type() == BuiltinOptions_Conv2DOptions
- ? static_cast<const Conv2DOptions *>(builtin_options())
- : nullptr;
- }
- const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const
- {
- return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions
- ? static_cast<const DepthwiseConv2DOptions *>(builtin_options())
- : nullptr;
- }
- const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions
- ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options())
- : nullptr;
- }
- const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LSHProjectionOptions
- ? static_cast<const LSHProjectionOptions *>(builtin_options())
- : nullptr;
- }
- const Pool2DOptions *builtin_options_as_Pool2DOptions() const
- {
- return builtin_options_type() == BuiltinOptions_Pool2DOptions
- ? static_cast<const Pool2DOptions *>(builtin_options())
- : nullptr;
- }
- const SVDFOptions *builtin_options_as_SVDFOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SVDFOptions
- ? static_cast<const SVDFOptions *>(builtin_options())
- : nullptr;
- }
- const RNNOptions *builtin_options_as_RNNOptions() const
- {
- return builtin_options_type() == BuiltinOptions_RNNOptions
- ? static_cast<const RNNOptions *>(builtin_options())
- : nullptr;
- }
- const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const
- {
- return builtin_options_type() == BuiltinOptions_FullyConnectedOptions
- ? static_cast<const FullyConnectedOptions *>(builtin_options())
- : nullptr;
- }
- const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SoftmaxOptions
- ? static_cast<const SoftmaxOptions *>(builtin_options())
- : nullptr;
- }
- const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ConcatenationOptions
- ? static_cast<const ConcatenationOptions *>(builtin_options())
- : nullptr;
- }
- const AddOptions *builtin_options_as_AddOptions() const
- {
- return builtin_options_type() == BuiltinOptions_AddOptions
- ? static_cast<const AddOptions *>(builtin_options())
- : nullptr;
- }
- const L2NormOptions *builtin_options_as_L2NormOptions() const
- {
- return builtin_options_type() == BuiltinOptions_L2NormOptions
- ? static_cast<const L2NormOptions *>(builtin_options())
- : nullptr;
- }
- const LocalResponseNormalizationOptions *
- builtin_options_as_LocalResponseNormalizationOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LocalResponseNormalizationOptions
- ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options())
- : nullptr;
- }
- const LSTMOptions *builtin_options_as_LSTMOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LSTMOptions
- ? static_cast<const LSTMOptions *>(builtin_options())
- : nullptr;
- }
- const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions
- ? static_cast<const ResizeBilinearOptions *>(builtin_options())
- : nullptr;
- }
- const CallOptions *builtin_options_as_CallOptions() const
- {
- return builtin_options_type() == BuiltinOptions_CallOptions
- ? static_cast<const CallOptions *>(builtin_options())
- : nullptr;
- }
- const ReshapeOptions *builtin_options_as_ReshapeOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ReshapeOptions
- ? static_cast<const ReshapeOptions *>(builtin_options())
- : nullptr;
- }
- const SkipGramOptions *builtin_options_as_SkipGramOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SkipGramOptions
- ? static_cast<const SkipGramOptions *>(builtin_options())
- : nullptr;
- }
- const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions
- ? static_cast<const SpaceToDepthOptions *>(builtin_options())
- : nullptr;
- }
- const EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const
- {
- return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions
- ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options())
- : nullptr;
- }
- const MulOptions *builtin_options_as_MulOptions() const
- {
- return builtin_options_type() == BuiltinOptions_MulOptions
- ? static_cast<const MulOptions *>(builtin_options())
- : nullptr;
- }
- const PadOptions *builtin_options_as_PadOptions() const
- {
- return builtin_options_type() == BuiltinOptions_PadOptions
- ? static_cast<const PadOptions *>(builtin_options())
- : nullptr;
- }
- const GatherOptions *builtin_options_as_GatherOptions() const
- {
- return builtin_options_type() == BuiltinOptions_GatherOptions
- ? static_cast<const GatherOptions *>(builtin_options())
- : nullptr;
- }
- const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const
- {
- return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions
- ? static_cast<const BatchToSpaceNDOptions *>(builtin_options())
- : nullptr;
- }
- const SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SpaceToBatchNDOptions
- ? static_cast<const SpaceToBatchNDOptions *>(builtin_options())
- : nullptr;
- }
- const TransposeOptions *builtin_options_as_TransposeOptions() const
- {
- return builtin_options_type() == BuiltinOptions_TransposeOptions
- ? static_cast<const TransposeOptions *>(builtin_options())
- : nullptr;
- }
- const ReducerOptions *builtin_options_as_ReducerOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ReducerOptions
- ? static_cast<const ReducerOptions *>(builtin_options())
- : nullptr;
- }
- const SubOptions *builtin_options_as_SubOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SubOptions
- ? static_cast<const SubOptions *>(builtin_options())
- : nullptr;
- }
- const DivOptions *builtin_options_as_DivOptions() const
- {
- return builtin_options_type() == BuiltinOptions_DivOptions
- ? static_cast<const DivOptions *>(builtin_options())
- : nullptr;
- }
- const SqueezeOptions *builtin_options_as_SqueezeOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SqueezeOptions
- ? static_cast<const SqueezeOptions *>(builtin_options())
- : nullptr;
- }
- const SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SequenceRNNOptions
- ? static_cast<const SequenceRNNOptions *>(builtin_options())
- : nullptr;
- }
- const StridedSliceOptions *builtin_options_as_StridedSliceOptions() const
- {
- return builtin_options_type() == BuiltinOptions_StridedSliceOptions
- ? static_cast<const StridedSliceOptions *>(builtin_options())
- : nullptr;
- }
- const ExpOptions *builtin_options_as_ExpOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ExpOptions
- ? static_cast<const ExpOptions *>(builtin_options())
- : nullptr;
- }
- const TopKV2Options *builtin_options_as_TopKV2Options() const
- {
- return builtin_options_type() == BuiltinOptions_TopKV2Options
- ? static_cast<const TopKV2Options *>(builtin_options())
- : nullptr;
- }
- const SplitOptions *builtin_options_as_SplitOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SplitOptions
- ? static_cast<const SplitOptions *>(builtin_options())
- : nullptr;
- }
- const LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LogSoftmaxOptions
- ? static_cast<const LogSoftmaxOptions *>(builtin_options())
- : nullptr;
- }
- const CastOptions *builtin_options_as_CastOptions() const
- {
- return builtin_options_type() == BuiltinOptions_CastOptions
- ? static_cast<const CastOptions *>(builtin_options())
- : nullptr;
- }
- const DequantizeOptions *builtin_options_as_DequantizeOptions() const
- {
- return builtin_options_type() == BuiltinOptions_DequantizeOptions
- ? static_cast<const DequantizeOptions *>(builtin_options())
- : nullptr;
- }
- const MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const
- {
- return builtin_options_type() == BuiltinOptions_MaximumMinimumOptions
- ? static_cast<const MaximumMinimumOptions *>(builtin_options())
- : nullptr;
- }
- const ArgMaxOptions *builtin_options_as_ArgMaxOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ArgMaxOptions
- ? static_cast<const ArgMaxOptions *>(builtin_options())
- : nullptr;
- }
- const LessOptions *builtin_options_as_LessOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LessOptions
- ? static_cast<const LessOptions *>(builtin_options())
- : nullptr;
- }
- const NegOptions *builtin_options_as_NegOptions() const
- {
- return builtin_options_type() == BuiltinOptions_NegOptions
- ? static_cast<const NegOptions *>(builtin_options())
- : nullptr;
- }
- const PadV2Options *builtin_options_as_PadV2Options() const
- {
- return builtin_options_type() == BuiltinOptions_PadV2Options
- ? static_cast<const PadV2Options *>(builtin_options())
- : nullptr;
- }
- const GreaterOptions *builtin_options_as_GreaterOptions() const
- {
- return builtin_options_type() == BuiltinOptions_GreaterOptions
- ? static_cast<const GreaterOptions *>(builtin_options())
- : nullptr;
- }
- const GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const
- {
- return builtin_options_type() == BuiltinOptions_GreaterEqualOptions
- ? static_cast<const GreaterEqualOptions *>(builtin_options())
- : nullptr;
- }
- const LessEqualOptions *builtin_options_as_LessEqualOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LessEqualOptions
- ? static_cast<const LessEqualOptions *>(builtin_options())
- : nullptr;
- }
- const SelectOptions *builtin_options_as_SelectOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SelectOptions
- ? static_cast<const SelectOptions *>(builtin_options())
- : nullptr;
- }
- const SliceOptions *builtin_options_as_SliceOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SliceOptions
- ? static_cast<const SliceOptions *>(builtin_options())
- : nullptr;
- }
- const TransposeConvOptions *builtin_options_as_TransposeConvOptions() const
- {
- return builtin_options_type() == BuiltinOptions_TransposeConvOptions
- ? static_cast<const TransposeConvOptions *>(builtin_options())
- : nullptr;
- }
- const SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SparseToDenseOptions
- ? static_cast<const SparseToDenseOptions *>(builtin_options())
- : nullptr;
- }
- const TileOptions *builtin_options_as_TileOptions() const
- {
- return builtin_options_type() == BuiltinOptions_TileOptions
- ? static_cast<const TileOptions *>(builtin_options())
- : nullptr;
- }
- const ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ExpandDimsOptions
- ? static_cast<const ExpandDimsOptions *>(builtin_options())
- : nullptr;
- }
- const EqualOptions *builtin_options_as_EqualOptions() const
- {
- return builtin_options_type() == BuiltinOptions_EqualOptions
- ? static_cast<const EqualOptions *>(builtin_options())
- : nullptr;
- }
- const NotEqualOptions *builtin_options_as_NotEqualOptions() const
- {
- return builtin_options_type() == BuiltinOptions_NotEqualOptions
- ? static_cast<const NotEqualOptions *>(builtin_options())
- : nullptr;
- }
- const ShapeOptions *builtin_options_as_ShapeOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ShapeOptions
- ? static_cast<const ShapeOptions *>(builtin_options())
- : nullptr;
- }
- const PowOptions *builtin_options_as_PowOptions() const
- {
- return builtin_options_type() == BuiltinOptions_PowOptions
- ? static_cast<const PowOptions *>(builtin_options())
- : nullptr;
- }
- const ArgMinOptions *builtin_options_as_ArgMinOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ArgMinOptions
- ? static_cast<const ArgMinOptions *>(builtin_options())
- : nullptr;
- }
- const FakeQuantOptions *builtin_options_as_FakeQuantOptions() const
- {
- return builtin_options_type() == BuiltinOptions_FakeQuantOptions
- ? static_cast<const FakeQuantOptions *>(builtin_options())
- : nullptr;
- }
- const PackOptions *builtin_options_as_PackOptions() const
- {
- return builtin_options_type() == BuiltinOptions_PackOptions
- ? static_cast<const PackOptions *>(builtin_options())
- : nullptr;
- }
- const LogicalOrOptions *builtin_options_as_LogicalOrOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LogicalOrOptions
- ? static_cast<const LogicalOrOptions *>(builtin_options())
- : nullptr;
- }
- const OneHotOptions *builtin_options_as_OneHotOptions() const
- {
- return builtin_options_type() == BuiltinOptions_OneHotOptions
- ? static_cast<const OneHotOptions *>(builtin_options())
- : nullptr;
- }
- const LogicalAndOptions *builtin_options_as_LogicalAndOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LogicalAndOptions
- ? static_cast<const LogicalAndOptions *>(builtin_options())
- : nullptr;
- }
- const LogicalNotOptions *builtin_options_as_LogicalNotOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LogicalNotOptions
- ? static_cast<const LogicalNotOptions *>(builtin_options())
- : nullptr;
- }
- const UnpackOptions *builtin_options_as_UnpackOptions() const
- {
- return builtin_options_type() == BuiltinOptions_UnpackOptions
- ? static_cast<const UnpackOptions *>(builtin_options())
- : nullptr;
- }
- const FloorDivOptions *builtin_options_as_FloorDivOptions() const
- {
- return builtin_options_type() == BuiltinOptions_FloorDivOptions
- ? static_cast<const FloorDivOptions *>(builtin_options())
- : nullptr;
- }
- const SquareOptions *builtin_options_as_SquareOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SquareOptions
- ? static_cast<const SquareOptions *>(builtin_options())
- : nullptr;
- }
- const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ZerosLikeOptions
- ? static_cast<const ZerosLikeOptions *>(builtin_options())
- : nullptr;
- }
- const FillOptions *builtin_options_as_FillOptions() const
- {
- return builtin_options_type() == BuiltinOptions_FillOptions
- ? static_cast<const FillOptions *>(builtin_options())
- : nullptr;
- }
- const BidirectionalSequenceLSTMOptions *
- builtin_options_as_BidirectionalSequenceLSTMOptions() const
- {
- return builtin_options_type() == BuiltinOptions_BidirectionalSequenceLSTMOptions
- ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options())
- : nullptr;
- }
- const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const
- {
- return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions
- ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options())
- : nullptr;
- }
- const UnidirectionalSequenceLSTMOptions *
- builtin_options_as_UnidirectionalSequenceLSTMOptions() const
- {
- return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions
- ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options())
- : nullptr;
- }
- const FloorModOptions *builtin_options_as_FloorModOptions() const
- {
- return builtin_options_type() == BuiltinOptions_FloorModOptions
- ? static_cast<const FloorModOptions *>(builtin_options())
- : nullptr;
- }
- const RangeOptions *builtin_options_as_RangeOptions() const
- {
- return builtin_options_type() == BuiltinOptions_RangeOptions
- ? static_cast<const RangeOptions *>(builtin_options())
- : nullptr;
- }
- const ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const
- {
- return builtin_options_type() == BuiltinOptions_ResizeNearestNeighborOptions
- ? static_cast<const ResizeNearestNeighborOptions *>(builtin_options())
- : nullptr;
- }
- const LeakyReluOptions *builtin_options_as_LeakyReluOptions() const
- {
- return builtin_options_type() == BuiltinOptions_LeakyReluOptions
- ? static_cast<const LeakyReluOptions *>(builtin_options())
- : nullptr;
- }
- const SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SquaredDifferenceOptions
- ? static_cast<const SquaredDifferenceOptions *>(builtin_options())
- : nullptr;
- }
- const MirrorPadOptions *builtin_options_as_MirrorPadOptions() const
- {
- return builtin_options_type() == BuiltinOptions_MirrorPadOptions
- ? static_cast<const MirrorPadOptions *>(builtin_options())
- : nullptr;
- }
- const AbsOptions *builtin_options_as_AbsOptions() const
- {
- return builtin_options_type() == BuiltinOptions_AbsOptions
- ? static_cast<const AbsOptions *>(builtin_options())
- : nullptr;
- }
- const SplitVOptions *builtin_options_as_SplitVOptions() const
- {
- return builtin_options_type() == BuiltinOptions_SplitVOptions
- ? static_cast<const SplitVOptions *>(builtin_options())
- : nullptr;
- }
- const flatbuffers::Vector<uint8_t> *custom_options() const
- {
- return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
- }
- CustomOptionsFormat custom_options_format() const
- {
- return static_cast<CustomOptionsFormat>(GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0));
- }
- const flatbuffers::Vector<uint8_t> *mutating_variable_inputs() const
- {
- return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_MUTATING_VARIABLE_INPUTS);
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_OPCODE_INDEX) &&
- VerifyOffset(verifier, VT_INPUTS) && verifier.VerifyVector(inputs()) &&
- VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) &&
- VerifyField<uint8_t>(verifier, VT_BUILTIN_OPTIONS_TYPE) &&
- VerifyOffset(verifier, VT_BUILTIN_OPTIONS) &&
- VerifyBuiltinOptions(verifier, builtin_options(), builtin_options_type()) &&
- VerifyOffset(verifier, VT_CUSTOM_OPTIONS) && verifier.VerifyVector(custom_options()) &&
- VerifyField<int8_t>(verifier, VT_CUSTOM_OPTIONS_FORMAT) &&
- VerifyOffset(verifier, VT_MUTATING_VARIABLE_INPUTS) &&
- verifier.VerifyVector(mutating_variable_inputs()) && verifier.EndTable();
- }
-};
-
-template <> inline const Conv2DOptions *Operator::builtin_options_as<Conv2DOptions>() const
-{
- return builtin_options_as_Conv2DOptions();
-}
-
-template <>
-inline const DepthwiseConv2DOptions *Operator::builtin_options_as<DepthwiseConv2DOptions>() const
-{
- return builtin_options_as_DepthwiseConv2DOptions();
-}
-
-template <>
-inline const ConcatEmbeddingsOptions *Operator::builtin_options_as<ConcatEmbeddingsOptions>() const
-{
- return builtin_options_as_ConcatEmbeddingsOptions();
-}
-
-template <>
-inline const LSHProjectionOptions *Operator::builtin_options_as<LSHProjectionOptions>() const
-{
- return builtin_options_as_LSHProjectionOptions();
-}
-
-template <> inline const Pool2DOptions *Operator::builtin_options_as<Pool2DOptions>() const
-{
- return builtin_options_as_Pool2DOptions();
-}
-
-template <> inline const SVDFOptions *Operator::builtin_options_as<SVDFOptions>() const
-{
- return builtin_options_as_SVDFOptions();
-}
-
-template <> inline const RNNOptions *Operator::builtin_options_as<RNNOptions>() const
-{
- return builtin_options_as_RNNOptions();
-}
-
-template <>
-inline const FullyConnectedOptions *Operator::builtin_options_as<FullyConnectedOptions>() const
-{
- return builtin_options_as_FullyConnectedOptions();
-}
-
-template <> inline const SoftmaxOptions *Operator::builtin_options_as<SoftmaxOptions>() const
-{
- return builtin_options_as_SoftmaxOptions();
-}
-
-template <>
-inline const ConcatenationOptions *Operator::builtin_options_as<ConcatenationOptions>() const
-{
- return builtin_options_as_ConcatenationOptions();
-}
-
-template <> inline const AddOptions *Operator::builtin_options_as<AddOptions>() const
-{
- return builtin_options_as_AddOptions();
-}
-
-template <> inline const L2NormOptions *Operator::builtin_options_as<L2NormOptions>() const
-{
- return builtin_options_as_L2NormOptions();
-}
-
-template <>
-inline const LocalResponseNormalizationOptions *
-Operator::builtin_options_as<LocalResponseNormalizationOptions>() const
-{
- return builtin_options_as_LocalResponseNormalizationOptions();
-}
-
-template <> inline const LSTMOptions *Operator::builtin_options_as<LSTMOptions>() const
-{
- return builtin_options_as_LSTMOptions();
-}
-
-template <>
-inline const ResizeBilinearOptions *Operator::builtin_options_as<ResizeBilinearOptions>() const
-{
- return builtin_options_as_ResizeBilinearOptions();
-}
-
-template <> inline const CallOptions *Operator::builtin_options_as<CallOptions>() const
-{
- return builtin_options_as_CallOptions();
-}
-
-template <> inline const ReshapeOptions *Operator::builtin_options_as<ReshapeOptions>() const
-{
- return builtin_options_as_ReshapeOptions();
-}
-
-template <> inline const SkipGramOptions *Operator::builtin_options_as<SkipGramOptions>() const
-{
- return builtin_options_as_SkipGramOptions();
-}
-
-template <>
-inline const SpaceToDepthOptions *Operator::builtin_options_as<SpaceToDepthOptions>() const
-{
- return builtin_options_as_SpaceToDepthOptions();
-}
-
-template <>
-inline const EmbeddingLookupSparseOptions *
-Operator::builtin_options_as<EmbeddingLookupSparseOptions>() const
-{
- return builtin_options_as_EmbeddingLookupSparseOptions();
-}
-
-template <> inline const MulOptions *Operator::builtin_options_as<MulOptions>() const
-{
- return builtin_options_as_MulOptions();
-}
-
-template <> inline const PadOptions *Operator::builtin_options_as<PadOptions>() const
-{
- return builtin_options_as_PadOptions();
-}
-
-template <> inline const GatherOptions *Operator::builtin_options_as<GatherOptions>() const
-{
- return builtin_options_as_GatherOptions();
-}
-
-template <>
-inline const BatchToSpaceNDOptions *Operator::builtin_options_as<BatchToSpaceNDOptions>() const
-{
- return builtin_options_as_BatchToSpaceNDOptions();
-}
-
-template <>
-inline const SpaceToBatchNDOptions *Operator::builtin_options_as<SpaceToBatchNDOptions>() const
-{
- return builtin_options_as_SpaceToBatchNDOptions();
-}
-
-template <> inline const TransposeOptions *Operator::builtin_options_as<TransposeOptions>() const
-{
- return builtin_options_as_TransposeOptions();
-}
-
-template <> inline const ReducerOptions *Operator::builtin_options_as<ReducerOptions>() const
-{
- return builtin_options_as_ReducerOptions();
-}
-
-template <> inline const SubOptions *Operator::builtin_options_as<SubOptions>() const
-{
- return builtin_options_as_SubOptions();
-}
-
-template <> inline const DivOptions *Operator::builtin_options_as<DivOptions>() const
-{
- return builtin_options_as_DivOptions();
-}
-
-template <> inline const SqueezeOptions *Operator::builtin_options_as<SqueezeOptions>() const
-{
- return builtin_options_as_SqueezeOptions();
-}
-
-template <>
-inline const SequenceRNNOptions *Operator::builtin_options_as<SequenceRNNOptions>() const
-{
- return builtin_options_as_SequenceRNNOptions();
-}
-
-template <>
-inline const StridedSliceOptions *Operator::builtin_options_as<StridedSliceOptions>() const
-{
- return builtin_options_as_StridedSliceOptions();
-}
-
-template <> inline const ExpOptions *Operator::builtin_options_as<ExpOptions>() const
-{
- return builtin_options_as_ExpOptions();
-}
-
-template <> inline const TopKV2Options *Operator::builtin_options_as<TopKV2Options>() const
-{
- return builtin_options_as_TopKV2Options();
-}
-
-template <> inline const SplitOptions *Operator::builtin_options_as<SplitOptions>() const
-{
- return builtin_options_as_SplitOptions();
-}
-
-template <> inline const LogSoftmaxOptions *Operator::builtin_options_as<LogSoftmaxOptions>() const
-{
- return builtin_options_as_LogSoftmaxOptions();
-}
-
-template <> inline const CastOptions *Operator::builtin_options_as<CastOptions>() const
-{
- return builtin_options_as_CastOptions();
-}
-
-template <> inline const DequantizeOptions *Operator::builtin_options_as<DequantizeOptions>() const
-{
- return builtin_options_as_DequantizeOptions();
-}
-
-template <>
-inline const MaximumMinimumOptions *Operator::builtin_options_as<MaximumMinimumOptions>() const
-{
- return builtin_options_as_MaximumMinimumOptions();
-}
-
-template <> inline const ArgMaxOptions *Operator::builtin_options_as<ArgMaxOptions>() const
-{
- return builtin_options_as_ArgMaxOptions();
-}
-
-template <> inline const LessOptions *Operator::builtin_options_as<LessOptions>() const
-{
- return builtin_options_as_LessOptions();
-}
-
-template <> inline const NegOptions *Operator::builtin_options_as<NegOptions>() const
-{
- return builtin_options_as_NegOptions();
-}
-
-template <> inline const PadV2Options *Operator::builtin_options_as<PadV2Options>() const
-{
- return builtin_options_as_PadV2Options();
-}
-
-template <> inline const GreaterOptions *Operator::builtin_options_as<GreaterOptions>() const
-{
- return builtin_options_as_GreaterOptions();
-}
-
-template <>
-inline const GreaterEqualOptions *Operator::builtin_options_as<GreaterEqualOptions>() const
-{
- return builtin_options_as_GreaterEqualOptions();
-}
-
-template <> inline const LessEqualOptions *Operator::builtin_options_as<LessEqualOptions>() const
-{
- return builtin_options_as_LessEqualOptions();
-}
-
-template <> inline const SelectOptions *Operator::builtin_options_as<SelectOptions>() const
-{
- return builtin_options_as_SelectOptions();
-}
-
-template <> inline const SliceOptions *Operator::builtin_options_as<SliceOptions>() const
-{
- return builtin_options_as_SliceOptions();
-}
-
-template <>
-inline const TransposeConvOptions *Operator::builtin_options_as<TransposeConvOptions>() const
-{
- return builtin_options_as_TransposeConvOptions();
-}
-
-template <>
-inline const SparseToDenseOptions *Operator::builtin_options_as<SparseToDenseOptions>() const
-{
- return builtin_options_as_SparseToDenseOptions();
-}
-
-template <> inline const TileOptions *Operator::builtin_options_as<TileOptions>() const
-{
- return builtin_options_as_TileOptions();
-}
-
-template <> inline const ExpandDimsOptions *Operator::builtin_options_as<ExpandDimsOptions>() const
-{
- return builtin_options_as_ExpandDimsOptions();
-}
-
-template <> inline const EqualOptions *Operator::builtin_options_as<EqualOptions>() const
-{
- return builtin_options_as_EqualOptions();
-}
-
-template <> inline const NotEqualOptions *Operator::builtin_options_as<NotEqualOptions>() const
-{
- return builtin_options_as_NotEqualOptions();
-}
-
-template <> inline const ShapeOptions *Operator::builtin_options_as<ShapeOptions>() const
-{
- return builtin_options_as_ShapeOptions();
-}
-
-template <> inline const PowOptions *Operator::builtin_options_as<PowOptions>() const
-{
- return builtin_options_as_PowOptions();
-}
-
-template <> inline const ArgMinOptions *Operator::builtin_options_as<ArgMinOptions>() const
-{
- return builtin_options_as_ArgMinOptions();
-}
-
-template <> inline const FakeQuantOptions *Operator::builtin_options_as<FakeQuantOptions>() const
-{
- return builtin_options_as_FakeQuantOptions();
-}
-
-template <> inline const PackOptions *Operator::builtin_options_as<PackOptions>() const
-{
- return builtin_options_as_PackOptions();
-}
-
-template <> inline const LogicalOrOptions *Operator::builtin_options_as<LogicalOrOptions>() const
-{
- return builtin_options_as_LogicalOrOptions();
-}
-
-template <> inline const OneHotOptions *Operator::builtin_options_as<OneHotOptions>() const
-{
- return builtin_options_as_OneHotOptions();
-}
-
-template <> inline const LogicalAndOptions *Operator::builtin_options_as<LogicalAndOptions>() const
-{
- return builtin_options_as_LogicalAndOptions();
-}
-
-template <> inline const LogicalNotOptions *Operator::builtin_options_as<LogicalNotOptions>() const
-{
- return builtin_options_as_LogicalNotOptions();
-}
-
-template <> inline const UnpackOptions *Operator::builtin_options_as<UnpackOptions>() const
-{
- return builtin_options_as_UnpackOptions();
-}
-
-template <> inline const FloorDivOptions *Operator::builtin_options_as<FloorDivOptions>() const
-{
- return builtin_options_as_FloorDivOptions();
-}
-
-template <> inline const SquareOptions *Operator::builtin_options_as<SquareOptions>() const
-{
- return builtin_options_as_SquareOptions();
-}
-
-template <> inline const ZerosLikeOptions *Operator::builtin_options_as<ZerosLikeOptions>() const
-{
- return builtin_options_as_ZerosLikeOptions();
-}
-
-template <> inline const FillOptions *Operator::builtin_options_as<FillOptions>() const
-{
- return builtin_options_as_FillOptions();
-}
-
-template <>
-inline const BidirectionalSequenceLSTMOptions *
-Operator::builtin_options_as<BidirectionalSequenceLSTMOptions>() const
-{
- return builtin_options_as_BidirectionalSequenceLSTMOptions();
-}
-
-template <>
-inline const BidirectionalSequenceRNNOptions *
-Operator::builtin_options_as<BidirectionalSequenceRNNOptions>() const
-{
- return builtin_options_as_BidirectionalSequenceRNNOptions();
-}
-
-template <>
-inline const UnidirectionalSequenceLSTMOptions *
-Operator::builtin_options_as<UnidirectionalSequenceLSTMOptions>() const
-{
- return builtin_options_as_UnidirectionalSequenceLSTMOptions();
-}
-
-template <> inline const FloorModOptions *Operator::builtin_options_as<FloorModOptions>() const
-{
- return builtin_options_as_FloorModOptions();
-}
-
-template <> inline const RangeOptions *Operator::builtin_options_as<RangeOptions>() const
-{
- return builtin_options_as_RangeOptions();
-}
-
-template <>
-inline const ResizeNearestNeighborOptions *
-Operator::builtin_options_as<ResizeNearestNeighborOptions>() const
-{
- return builtin_options_as_ResizeNearestNeighborOptions();
-}
-
-template <> inline const LeakyReluOptions *Operator::builtin_options_as<LeakyReluOptions>() const
-{
- return builtin_options_as_LeakyReluOptions();
-}
-
-template <>
-inline const SquaredDifferenceOptions *
-Operator::builtin_options_as<SquaredDifferenceOptions>() const
-{
- return builtin_options_as_SquaredDifferenceOptions();
-}
-
-template <> inline const MirrorPadOptions *Operator::builtin_options_as<MirrorPadOptions>() const
-{
- return builtin_options_as_MirrorPadOptions();
-}
-
-template <> inline const AbsOptions *Operator::builtin_options_as<AbsOptions>() const
-{
- return builtin_options_as_AbsOptions();
-}
-
-template <> inline const SplitVOptions *Operator::builtin_options_as<SplitVOptions>() const
-{
- return builtin_options_as_SplitVOptions();
-}
-
-struct OperatorBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_opcode_index(uint32_t opcode_index)
- {
- fbb_.AddElement<uint32_t>(Operator::VT_OPCODE_INDEX, opcode_index, 0);
- }
- void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs)
- {
- fbb_.AddOffset(Operator::VT_INPUTS, inputs);
- }
- void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs)
- {
- fbb_.AddOffset(Operator::VT_OUTPUTS, outputs);
- }
- void add_builtin_options_type(BuiltinOptions builtin_options_type)
- {
- fbb_.AddElement<uint8_t>(Operator::VT_BUILTIN_OPTIONS_TYPE,
- static_cast<uint8_t>(builtin_options_type), 0);
- }
- void add_builtin_options(flatbuffers::Offset<void> builtin_options)
- {
- fbb_.AddOffset(Operator::VT_BUILTIN_OPTIONS, builtin_options);
- }
- void add_custom_options(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options)
- {
- fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options);
- }
- void add_custom_options_format(CustomOptionsFormat custom_options_format)
- {
- fbb_.AddElement<int8_t>(Operator::VT_CUSTOM_OPTIONS_FORMAT,
- static_cast<int8_t>(custom_options_format), 0);
- }
- void add_mutating_variable_inputs(
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
- {
- fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs);
- }
- explicit OperatorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- OperatorBuilder &operator=(const OperatorBuilder &);
- flatbuffers::Offset<Operator> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Operator>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Operator>
-CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
- BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
- flatbuffers::Offset<void> builtin_options = 0,
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
- CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS,
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0)
-{
- OperatorBuilder builder_(_fbb);
- builder_.add_mutating_variable_inputs(mutating_variable_inputs);
- builder_.add_custom_options(custom_options);
- builder_.add_builtin_options(builtin_options);
- builder_.add_outputs(outputs);
- builder_.add_inputs(inputs);
- builder_.add_opcode_index(opcode_index);
- builder_.add_custom_options_format(custom_options_format);
- builder_.add_builtin_options_type(builtin_options_type);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Operator>
-CreateOperatorDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
- const std::vector<int32_t> *inputs = nullptr,
- const std::vector<int32_t> *outputs = nullptr,
- BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
- flatbuffers::Offset<void> builtin_options = 0,
- const std::vector<uint8_t> *custom_options = nullptr,
- CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS,
- const std::vector<uint8_t> *mutating_variable_inputs = nullptr)
-{
- return neurun_tflite::CreateOperator(
- _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
- outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type, builtin_options,
- custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0, custom_options_format,
- mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0);
-}
-
-struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_TENSORS = 4,
- VT_INPUTS = 6,
- VT_OUTPUTS = 8,
- VT_OPERATORS = 10,
- VT_NAME = 12
- };
- const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *tensors() const
- {
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *>(VT_TENSORS);
- }
- const flatbuffers::Vector<int32_t> *inputs() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS);
- }
- const flatbuffers::Vector<int32_t> *outputs() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
- }
- const flatbuffers::Vector<flatbuffers::Offset<Operator>> *operators() const
- {
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Operator>> *>(VT_OPERATORS);
- }
- const flatbuffers::String *name() const
- {
- return GetPointer<const flatbuffers::String *>(VT_NAME);
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_TENSORS) &&
- verifier.VerifyVector(tensors()) && verifier.VerifyVectorOfTables(tensors()) &&
- VerifyOffset(verifier, VT_INPUTS) && verifier.VerifyVector(inputs()) &&
- VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) &&
- VerifyOffset(verifier, VT_OPERATORS) && verifier.VerifyVector(operators()) &&
- verifier.VerifyVectorOfTables(operators()) && VerifyOffset(verifier, VT_NAME) &&
- verifier.VerifyString(name()) && verifier.EndTable();
- }
-};
-
-struct SubGraphBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors)
- {
- fbb_.AddOffset(SubGraph::VT_TENSORS, tensors);
- }
- void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs)
- {
- fbb_.AddOffset(SubGraph::VT_INPUTS, inputs);
- }
- void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs)
- {
- fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs);
- }
- void
- add_operators(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators)
- {
- fbb_.AddOffset(SubGraph::VT_OPERATORS, operators);
- }
- void add_name(flatbuffers::Offset<flatbuffers::String> name)
- {
- fbb_.AddOffset(SubGraph::VT_NAME, name);
- }
- explicit SubGraphBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- SubGraphBuilder &operator=(const SubGraphBuilder &);
- flatbuffers::Offset<SubGraph> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<SubGraph>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<SubGraph> CreateSubGraph(
- flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0,
- flatbuffers::Offset<flatbuffers::String> name = 0)
-{
- SubGraphBuilder builder_(_fbb);
- builder_.add_name(name);
- builder_.add_operators(operators);
- builder_.add_outputs(outputs);
- builder_.add_inputs(inputs);
- builder_.add_tensors(tensors);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<SubGraph>
-CreateSubGraphDirect(flatbuffers::FlatBufferBuilder &_fbb,
- const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr,
- const std::vector<int32_t> *inputs = nullptr,
- const std::vector<int32_t> *outputs = nullptr,
- const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr,
- const char *name = nullptr)
-{
- return neurun_tflite::CreateSubGraph(
- _fbb, tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0,
- inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
- outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0,
- operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0,
- name ? _fbb.CreateString(name) : 0);
-}
-
-struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_DATA = 4
- };
- const flatbuffers::Vector<uint8_t> *data() const
- {
- return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_DATA);
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_DATA) &&
- verifier.VerifyVector(data()) && verifier.EndTable();
- }
-};
-
-struct BufferBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_data(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data)
- {
- fbb_.AddOffset(Buffer::VT_DATA, data);
- }
- explicit BufferBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- BufferBuilder &operator=(const BufferBuilder &);
- flatbuffers::Offset<Buffer> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Buffer>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Buffer>
-CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data = 0)
-{
- BufferBuilder builder_(_fbb);
- builder_.add_data(data);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Buffer> CreateBufferDirect(flatbuffers::FlatBufferBuilder &_fbb,
- const std::vector<uint8_t> *data = nullptr)
-{
- return neurun_tflite::CreateBuffer(_fbb, data ? _fbb.CreateVector<uint8_t>(*data) : 0);
-}
-
-struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
-{
- enum
- {
- VT_VERSION = 4,
- VT_OPERATOR_CODES = 6,
- VT_SUBGRAPHS = 8,
- VT_DESCRIPTION = 10,
- VT_BUFFERS = 12,
- VT_METADATA_BUFFER = 14
- };
- uint32_t version() const { return GetField<uint32_t>(VT_VERSION, 0); }
- const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *operator_codes() const
- {
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *>(
- VT_OPERATOR_CODES);
- }
- const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *subgraphs() const
- {
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *>(VT_SUBGRAPHS);
- }
- const flatbuffers::String *description() const
- {
- return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION);
- }
- const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *buffers() const
- {
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *>(VT_BUFFERS);
- }
- const flatbuffers::Vector<int32_t> *metadata_buffer() const
- {
- return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_METADATA_BUFFER);
- }
- bool Verify(flatbuffers::Verifier &verifier) const
- {
- return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_VERSION) &&
- VerifyOffset(verifier, VT_OPERATOR_CODES) && verifier.VerifyVector(operator_codes()) &&
- verifier.VerifyVectorOfTables(operator_codes()) &&
- VerifyOffset(verifier, VT_SUBGRAPHS) && verifier.VerifyVector(subgraphs()) &&
- verifier.VerifyVectorOfTables(subgraphs()) && VerifyOffset(verifier, VT_DESCRIPTION) &&
- verifier.VerifyString(description()) && VerifyOffset(verifier, VT_BUFFERS) &&
- verifier.VerifyVector(buffers()) && verifier.VerifyVectorOfTables(buffers()) &&
- VerifyOffset(verifier, VT_METADATA_BUFFER) && verifier.VerifyVector(metadata_buffer()) &&
- verifier.EndTable();
- }
-};
-
-struct ModelBuilder
-{
- flatbuffers::FlatBufferBuilder &fbb_;
- flatbuffers::uoffset_t start_;
- void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); }
- void add_operator_codes(
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes)
- {
- fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes);
- }
- void
- add_subgraphs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs)
- {
- fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs);
- }
- void add_description(flatbuffers::Offset<flatbuffers::String> description)
- {
- fbb_.AddOffset(Model::VT_DESCRIPTION, description);
- }
- void add_buffers(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers)
- {
- fbb_.AddOffset(Model::VT_BUFFERS, buffers);
- }
- void add_metadata_buffer(flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer)
- {
- fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer);
- }
- explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
- {
- start_ = fbb_.StartTable();
- }
- ModelBuilder &operator=(const ModelBuilder &);
- flatbuffers::Offset<Model> Finish()
- {
- const auto end = fbb_.EndTable(start_);
- auto o = flatbuffers::Offset<Model>(end);
- return o;
- }
-};
-
-inline flatbuffers::Offset<Model> CreateModel(
- flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0,
- flatbuffers::Offset<flatbuffers::String> description = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0)
-{
- ModelBuilder builder_(_fbb);
- builder_.add_metadata_buffer(metadata_buffer);
- builder_.add_buffers(buffers);
- builder_.add_description(description);
- builder_.add_subgraphs(subgraphs);
- builder_.add_operator_codes(operator_codes);
- builder_.add_version(version);
- return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Model>
-CreateModelDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
- const std::vector<flatbuffers::Offset<OperatorCode>> *operator_codes = nullptr,
- const std::vector<flatbuffers::Offset<SubGraph>> *subgraphs = nullptr,
- const char *description = nullptr,
- const std::vector<flatbuffers::Offset<Buffer>> *buffers = nullptr,
- const std::vector<int32_t> *metadata_buffer = nullptr)
-{
- return neurun_tflite::CreateModel(
- _fbb, version,
- operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0,
- subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0,
- description ? _fbb.CreateString(description) : 0,
- buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0,
- metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0);
-}
-
-inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj,
- QuantizationDetails type)
-{
- switch (type)
- {
- case QuantizationDetails_NONE:
- {
- return true;
- }
- case QuantizationDetails_CustomQuantization:
- {
- auto ptr = reinterpret_cast<const CustomQuantization *>(obj);
- return verifier.VerifyTable(ptr);
- }
- default:
- return false;
- }
-}
-
-inline bool
-VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier,
- const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
- const flatbuffers::Vector<uint8_t> *types)
-{
- if (!values || !types)
- return !values && !types;
- if (values->size() != types->size())
- return false;
- for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i)
- {
- if (!VerifyQuantizationDetails(verifier, values->Get(i),
- types->GetEnum<QuantizationDetails>(i)))
- {
- return false;
- }
- }
- return true;
-}
-
-inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj,
- BuiltinOptions type)
-{
- switch (type)
- {
- case BuiltinOptions_NONE:
- {
- return true;
- }
- case BuiltinOptions_Conv2DOptions:
- {
- auto ptr = reinterpret_cast<const Conv2DOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_DepthwiseConv2DOptions:
- {
- auto ptr = reinterpret_cast<const DepthwiseConv2DOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ConcatEmbeddingsOptions:
- {
- auto ptr = reinterpret_cast<const ConcatEmbeddingsOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LSHProjectionOptions:
- {
- auto ptr = reinterpret_cast<const LSHProjectionOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_Pool2DOptions:
- {
- auto ptr = reinterpret_cast<const Pool2DOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SVDFOptions:
- {
- auto ptr = reinterpret_cast<const SVDFOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_RNNOptions:
- {
- auto ptr = reinterpret_cast<const RNNOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_FullyConnectedOptions:
- {
- auto ptr = reinterpret_cast<const FullyConnectedOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SoftmaxOptions:
- {
- auto ptr = reinterpret_cast<const SoftmaxOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ConcatenationOptions:
- {
- auto ptr = reinterpret_cast<const ConcatenationOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_AddOptions:
- {
- auto ptr = reinterpret_cast<const AddOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_L2NormOptions:
- {
- auto ptr = reinterpret_cast<const L2NormOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LocalResponseNormalizationOptions:
- {
- auto ptr = reinterpret_cast<const LocalResponseNormalizationOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LSTMOptions:
- {
- auto ptr = reinterpret_cast<const LSTMOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ResizeBilinearOptions:
- {
- auto ptr = reinterpret_cast<const ResizeBilinearOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_CallOptions:
- {
- auto ptr = reinterpret_cast<const CallOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ReshapeOptions:
- {
- auto ptr = reinterpret_cast<const ReshapeOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SkipGramOptions:
- {
- auto ptr = reinterpret_cast<const SkipGramOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SpaceToDepthOptions:
- {
- auto ptr = reinterpret_cast<const SpaceToDepthOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_EmbeddingLookupSparseOptions:
- {
- auto ptr = reinterpret_cast<const EmbeddingLookupSparseOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_MulOptions:
- {
- auto ptr = reinterpret_cast<const MulOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_PadOptions:
- {
- auto ptr = reinterpret_cast<const PadOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_GatherOptions:
- {
- auto ptr = reinterpret_cast<const GatherOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_BatchToSpaceNDOptions:
- {
- auto ptr = reinterpret_cast<const BatchToSpaceNDOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SpaceToBatchNDOptions:
- {
- auto ptr = reinterpret_cast<const SpaceToBatchNDOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_TransposeOptions:
- {
- auto ptr = reinterpret_cast<const TransposeOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ReducerOptions:
- {
- auto ptr = reinterpret_cast<const ReducerOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SubOptions:
- {
- auto ptr = reinterpret_cast<const SubOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_DivOptions:
- {
- auto ptr = reinterpret_cast<const DivOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SqueezeOptions:
- {
- auto ptr = reinterpret_cast<const SqueezeOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SequenceRNNOptions:
- {
- auto ptr = reinterpret_cast<const SequenceRNNOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_StridedSliceOptions:
- {
- auto ptr = reinterpret_cast<const StridedSliceOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ExpOptions:
- {
- auto ptr = reinterpret_cast<const ExpOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_TopKV2Options:
- {
- auto ptr = reinterpret_cast<const TopKV2Options *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SplitOptions:
- {
- auto ptr = reinterpret_cast<const SplitOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LogSoftmaxOptions:
- {
- auto ptr = reinterpret_cast<const LogSoftmaxOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_CastOptions:
- {
- auto ptr = reinterpret_cast<const CastOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_DequantizeOptions:
- {
- auto ptr = reinterpret_cast<const DequantizeOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_MaximumMinimumOptions:
- {
- auto ptr = reinterpret_cast<const MaximumMinimumOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ArgMaxOptions:
- {
- auto ptr = reinterpret_cast<const ArgMaxOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LessOptions:
- {
- auto ptr = reinterpret_cast<const LessOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_NegOptions:
- {
- auto ptr = reinterpret_cast<const NegOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_PadV2Options:
- {
- auto ptr = reinterpret_cast<const PadV2Options *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_GreaterOptions:
- {
- auto ptr = reinterpret_cast<const GreaterOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_GreaterEqualOptions:
- {
- auto ptr = reinterpret_cast<const GreaterEqualOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LessEqualOptions:
- {
- auto ptr = reinterpret_cast<const LessEqualOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SelectOptions:
- {
- auto ptr = reinterpret_cast<const SelectOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SliceOptions:
- {
- auto ptr = reinterpret_cast<const SliceOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_TransposeConvOptions:
- {
- auto ptr = reinterpret_cast<const TransposeConvOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SparseToDenseOptions:
- {
- auto ptr = reinterpret_cast<const SparseToDenseOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_TileOptions:
- {
- auto ptr = reinterpret_cast<const TileOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ExpandDimsOptions:
- {
- auto ptr = reinterpret_cast<const ExpandDimsOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_EqualOptions:
- {
- auto ptr = reinterpret_cast<const EqualOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_NotEqualOptions:
- {
- auto ptr = reinterpret_cast<const NotEqualOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ShapeOptions:
- {
- auto ptr = reinterpret_cast<const ShapeOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_PowOptions:
- {
- auto ptr = reinterpret_cast<const PowOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ArgMinOptions:
- {
- auto ptr = reinterpret_cast<const ArgMinOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_FakeQuantOptions:
- {
- auto ptr = reinterpret_cast<const FakeQuantOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_PackOptions:
- {
- auto ptr = reinterpret_cast<const PackOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LogicalOrOptions:
- {
- auto ptr = reinterpret_cast<const LogicalOrOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_OneHotOptions:
- {
- auto ptr = reinterpret_cast<const OneHotOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LogicalAndOptions:
- {
- auto ptr = reinterpret_cast<const LogicalAndOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LogicalNotOptions:
- {
- auto ptr = reinterpret_cast<const LogicalNotOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_UnpackOptions:
- {
- auto ptr = reinterpret_cast<const UnpackOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_FloorDivOptions:
- {
- auto ptr = reinterpret_cast<const FloorDivOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SquareOptions:
- {
- auto ptr = reinterpret_cast<const SquareOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ZerosLikeOptions:
- {
- auto ptr = reinterpret_cast<const ZerosLikeOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_FillOptions:
- {
- auto ptr = reinterpret_cast<const FillOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_BidirectionalSequenceLSTMOptions:
- {
- auto ptr = reinterpret_cast<const BidirectionalSequenceLSTMOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_BidirectionalSequenceRNNOptions:
- {
- auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_UnidirectionalSequenceLSTMOptions:
- {
- auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_FloorModOptions:
- {
- auto ptr = reinterpret_cast<const FloorModOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_RangeOptions:
- {
- auto ptr = reinterpret_cast<const RangeOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_ResizeNearestNeighborOptions:
- {
- auto ptr = reinterpret_cast<const ResizeNearestNeighborOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_LeakyReluOptions:
- {
- auto ptr = reinterpret_cast<const LeakyReluOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SquaredDifferenceOptions:
- {
- auto ptr = reinterpret_cast<const SquaredDifferenceOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_MirrorPadOptions:
- {
- auto ptr = reinterpret_cast<const MirrorPadOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_AbsOptions:
- {
- auto ptr = reinterpret_cast<const AbsOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- case BuiltinOptions_SplitVOptions:
- {
- auto ptr = reinterpret_cast<const SplitVOptions *>(obj);
- return verifier.VerifyTable(ptr);
- }
- default:
- return false;
- }
-}
-
-inline bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier,
- const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
- const flatbuffers::Vector<uint8_t> *types)
-{
- if (!values || !types)
- return !values && !types;
- if (values->size() != types->size())
- return false;
- for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i)
- {
- if (!VerifyBuiltinOptions(verifier, values->Get(i), types->GetEnum<BuiltinOptions>(i)))
- {
- return false;
- }
- }
- return true;
-}
-
-inline const neurun_tflite::Model *GetModel(const void *buf)
-{
- return flatbuffers::GetRoot<neurun_tflite::Model>(buf);
-}
-
-inline const neurun_tflite::Model *GetSizePrefixedModel(const void *buf)
-{
- return flatbuffers::GetSizePrefixedRoot<neurun_tflite::Model>(buf);
-}
-
-inline const char *ModelIdentifier() { return "TFL3"; }
-
-inline bool ModelBufferHasIdentifier(const void *buf)
-{
- return flatbuffers::BufferHasIdentifier(buf, ModelIdentifier());
-}
-
-inline bool VerifyModelBuffer(flatbuffers::Verifier &verifier)
-{
- return verifier.VerifyBuffer<neurun_tflite::Model>(ModelIdentifier());
-}
-
-inline bool VerifySizePrefixedModelBuffer(flatbuffers::Verifier &verifier)
-{
- return verifier.VerifySizePrefixedBuffer<neurun_tflite::Model>(ModelIdentifier());
-}
-
-inline const char *ModelExtension() { return "tflite"; }
-
-inline void FinishModelBuffer(flatbuffers::FlatBufferBuilder &fbb,
- flatbuffers::Offset<neurun_tflite::Model> root)
-{
- fbb.Finish(root, ModelIdentifier());
-}
-
-inline void FinishSizePrefixedModelBuffer(flatbuffers::FlatBufferBuilder &fbb,
- flatbuffers::Offset<neurun_tflite::Model> root)
-{
- fbb.FinishSizePrefixed(root, ModelIdentifier());
-}
-
-} // namespace neurun_tflite
-
-#endif // FLATBUFFERS_GENERATED_SCHEMA_NEURUN_TFLITE_H_
diff --git a/runtime/neurun/frontend/tflite/tflite_schema.fbs b/runtime/neurun/frontend/tflite/tflite_schema.fbs
deleted file mode 100644
index ede4dfa3a..000000000
--- a/runtime/neurun/frontend/tflite/tflite_schema.fbs
+++ /dev/null
@@ -1,795 +0,0 @@
-// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Revision History
-// Version 0: Initial version.
-// Version 1: Add subgraphs to schema.
-// Version 2: Rename operators to conform to NN API.
-// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
-
-// Change namespace to neurun_tflite
-namespace neurun_tflite;
-
-// This corresponds to the version.
-file_identifier "TFL3";
-// File extension of any written files.
-file_extension "tflite";
-
-// IMPORTANT: All new members of tables, enums and unions must be added at the
-// end to ensure backwards compatibility.
-
-// The type of data stored in a tensor.
-enum TensorType : byte {
- FLOAT32 = 0,
- FLOAT16 = 1,
- INT32 = 2,
- UINT8 = 3,
- INT64 = 4,
- STRING = 5,
- BOOL = 6,
- INT16 = 7,
- COMPLEX64 = 8,
- INT8 = 9,
-}
-
-// Custom quantization parameters for experimenting with new quantization
-// techniques.
-table CustomQuantization {
- custom:[ubyte] (force_align: 16);
-}
-
-// Represents a specific quantization technique's parameters.
-union QuantizationDetails {
- CustomQuantization,
-}
-
-// Parameters for converting a quantized tensor back to float.
-table QuantizationParameters {
- // These four parameters are the asymmetric linear quantization parameters.
- // Given a quantized value q, the corresponding float value f should be:
- // f = scale * (q - zero_point)
- // For other quantization types, the QuantizationDetails below is used.
- min:[float]; // For importing back into tensorflow.
- max:[float]; // For importing back into tensorflow.
- scale:[float]; // For dequantizing the tensor's values.
- zero_point:[long];
-
- // If this is not none, the quantization parameters above are ignored and the
- // value of the QuantizationDetails union below should be used.
- details:QuantizationDetails;
-}
-
-table Tensor {
- // The tensor shape. The meaning of each entry is operator-specific but
- // builtin ops use: [batch size, height, width, number of channels] (That's
- // Tensorflow's NHWC).
- shape:[int];
- type:TensorType;
- // An index that refers to the buffers table at the root of the model. Or,
- // if there is no data buffer associated (i.e. intermediate results), then
- // this is 0 (which refers to an always existent empty buffer).
- //
- // The data_buffer itself is an opaque container, with the assumption that the
- // target device is little-endian. In addition, all builtin operators assume
- // the memory is ordered such that if `shape` is [4, 3, 2], then index
- // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
- buffer:uint;
- name:string; // For debugging and importing back into tensorflow.
- quantization:QuantizationParameters; // Optional.
-
- is_variable:bool = false;
-}
-
-// A list of builtin operators. Builtin operators are slightly faster than custom
-// ones, but not by much. Moreover, while custom operators accept an opaque
-// object containing configuration parameters, builtins have a predetermined
-// set of acceptable options.
-enum BuiltinOperator : byte {
- ADD = 0,
- AVERAGE_POOL_2D = 1,
- CONCATENATION = 2,
- CONV_2D = 3,
- DEPTHWISE_CONV_2D = 4,
- // DEPTH_TO_SPACE = 5,
- DEQUANTIZE = 6,
- EMBEDDING_LOOKUP = 7,
- FLOOR = 8,
- FULLY_CONNECTED = 9,
- HASHTABLE_LOOKUP = 10,
- L2_NORMALIZATION = 11,
- L2_POOL_2D = 12,
- LOCAL_RESPONSE_NORMALIZATION = 13,
- LOGISTIC = 14,
- LSH_PROJECTION = 15,
- LSTM = 16,
- MAX_POOL_2D = 17,
- MUL = 18,
- RELU = 19,
- // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
- // since different model developers use RELU1 in different ways. Never
- // create another op called RELU1.
- RELU_N1_TO_1 = 20,
- RELU6 = 21,
- RESHAPE = 22,
- RESIZE_BILINEAR = 23,
- RNN = 24,
- SOFTMAX = 25,
- SPACE_TO_DEPTH = 26,
- SVDF = 27,
- TANH = 28,
- // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
- CONCAT_EMBEDDINGS = 29,
- SKIP_GRAM = 30,
- CALL = 31,
- CUSTOM = 32,
- EMBEDDING_LOOKUP_SPARSE = 33,
- PAD = 34,
- UNIDIRECTIONAL_SEQUENCE_RNN = 35,
- GATHER = 36,
- BATCH_TO_SPACE_ND = 37,
- SPACE_TO_BATCH_ND = 38,
- TRANSPOSE = 39,
- MEAN = 40,
- SUB = 41,
- DIV = 42,
- SQUEEZE = 43,
- UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
- STRIDED_SLICE = 45,
- BIDIRECTIONAL_SEQUENCE_RNN = 46,
- EXP = 47,
- TOPK_V2 = 48,
- SPLIT = 49,
- LOG_SOFTMAX = 50,
- // DELEGATE is a special op type for the operations which are delegated to
- // other backends.
- // WARNING: Experimental interface, subject to change
- DELEGATE = 51,
- BIDIRECTIONAL_SEQUENCE_LSTM = 52,
- CAST = 53,
- PRELU = 54,
- MAXIMUM = 55,
- ARG_MAX = 56,
- MINIMUM = 57,
- LESS = 58,
- NEG = 59,
- PADV2 = 60,
- GREATER = 61,
- GREATER_EQUAL = 62,
- LESS_EQUAL = 63,
- SELECT = 64,
- SLICE = 65,
- SIN = 66,
- TRANSPOSE_CONV = 67,
- SPARSE_TO_DENSE = 68,
- TILE = 69,
- EXPAND_DIMS = 70,
- EQUAL = 71,
- NOT_EQUAL = 72,
- LOG = 73,
- SUM = 74,
- SQRT = 75,
- RSQRT = 76,
- SHAPE = 77,
- POW = 78,
- ARG_MIN = 79,
- FAKE_QUANT = 80,
- REDUCE_PROD = 81,
- REDUCE_MAX = 82,
- PACK = 83,
- LOGICAL_OR = 84,
- ONE_HOT = 85,
- LOGICAL_AND = 86,
- LOGICAL_NOT = 87,
- UNPACK = 88,
- REDUCE_MIN = 89,
- FLOOR_DIV = 90,
- REDUCE_ANY = 91,
- SQUARE = 92,
- ZEROS_LIKE = 93,
- FILL = 94,
- FLOOR_MOD = 95,
- RANGE = 96,
- RESIZE_NEAREST_NEIGHBOR = 97,
- LEAKY_RELU = 98,
- SQUARED_DIFFERENCE = 99,
- MIRROR_PAD = 100,
- ABS = 101,
- SPLIT_V = 102,
-}
-
-// Options for the builtin operators.
-union BuiltinOptions {
- Conv2DOptions,
- DepthwiseConv2DOptions,
- ConcatEmbeddingsOptions,
- LSHProjectionOptions,
- Pool2DOptions,
- SVDFOptions,
- RNNOptions,
- FullyConnectedOptions,
- SoftmaxOptions,
- ConcatenationOptions,
- AddOptions,
- L2NormOptions,
- LocalResponseNormalizationOptions,
- LSTMOptions,
- ResizeBilinearOptions,
- CallOptions,
- ReshapeOptions,
- SkipGramOptions,
- SpaceToDepthOptions,
- EmbeddingLookupSparseOptions,
- MulOptions,
- PadOptions,
- GatherOptions,
- BatchToSpaceNDOptions,
- SpaceToBatchNDOptions,
- TransposeOptions,
- ReducerOptions,
- SubOptions,
- DivOptions,
- SqueezeOptions,
- SequenceRNNOptions,
- StridedSliceOptions,
- ExpOptions,
- TopKV2Options,
- SplitOptions,
- LogSoftmaxOptions,
- CastOptions,
- DequantizeOptions,
- MaximumMinimumOptions,
- ArgMaxOptions,
- LessOptions,
- NegOptions,
- PadV2Options,
- GreaterOptions,
- GreaterEqualOptions,
- LessEqualOptions,
- SelectOptions,
- SliceOptions,
- TransposeConvOptions,
- SparseToDenseOptions,
- TileOptions,
- ExpandDimsOptions,
- EqualOptions,
- NotEqualOptions,
- ShapeOptions,
- PowOptions,
- ArgMinOptions,
- FakeQuantOptions,
- PackOptions,
- LogicalOrOptions,
- OneHotOptions,
- LogicalAndOptions,
- LogicalNotOptions,
- UnpackOptions,
- FloorDivOptions,
- SquareOptions,
- ZerosLikeOptions,
- FillOptions,
- BidirectionalSequenceLSTMOptions,
- BidirectionalSequenceRNNOptions,
- UnidirectionalSequenceLSTMOptions,
- FloorModOptions,
- RangeOptions,
- ResizeNearestNeighborOptions,
- LeakyReluOptions,
- SquaredDifferenceOptions,
- MirrorPadOptions,
- AbsOptions,
- SplitVOptions,
-}
-
-enum Padding : byte { SAME, VALID }
-
-enum ActivationFunctionType : byte {
- NONE = 0,
- RELU = 1,
- RELU_N1_TO_1 = 2,
- RELU6 = 3,
- TANH = 4,
- SIGN_BIT = 5,
-}
-
-table Conv2DOptions {
- padding:Padding;
- stride_w:int;
- stride_h:int;
- fused_activation_function:ActivationFunctionType;
- dilation_w_factor:int = 1;
- dilation_h_factor:int = 1;
-}
-
-table Pool2DOptions {
- padding:Padding;
- stride_w:int;
- stride_h:int;
- filter_width:int;
- filter_height:int;
- fused_activation_function:ActivationFunctionType;
-}
-
-table DepthwiseConv2DOptions {
- // Parameters for DepthwiseConv version 1 or above.
- padding:Padding;
- stride_w:int;
- stride_h:int;
- depth_multiplier:int;
- fused_activation_function:ActivationFunctionType;
- // Parameters for DepthwiseConv version 2 or above.
- dilation_w_factor:int = 1;
- dilation_h_factor:int = 1;
-}
-
-table ConcatEmbeddingsOptions {
- num_channels:int;
- num_columns_per_channel:[int];
- embedding_dim_per_channel:[int]; // This could be inferred from parameters.
-}
-
-enum LSHProjectionType: byte {
- UNKNOWN = 0,
- SPARSE = 1,
- DENSE = 2,
-}
-
-table LSHProjectionOptions {
- type: LSHProjectionType;
-}
-
-table SVDFOptions {
- rank:int;
- fused_activation_function:ActivationFunctionType;
-}
-
-// An implementation of TensorFlow RNNCell.
-table RNNOptions {
- fused_activation_function:ActivationFunctionType;
-}
-
-// An implementation of TensorFlow dynamic_rnn with RNNCell.
-table SequenceRNNOptions {
- time_major:bool;
- fused_activation_function:ActivationFunctionType;
-}
-
-// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
-table BidirectionalSequenceRNNOptions {
- time_major:bool;
- fused_activation_function:ActivationFunctionType;
- merge_outputs: bool;
-}
-
-enum FullyConnectedOptionsWeightsFormat: byte {
- DEFAULT = 0,
- SHUFFLED4x16INT8 = 1,
-}
-
-// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
-table FullyConnectedOptions {
- // Parameters for FullyConnected version 1 or above.
- fused_activation_function:ActivationFunctionType;
-
- // Parameters for FullyConnected version 2 or above.
- weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
-}
-
-table SoftmaxOptions {
- beta: float;
-}
-
-// An implementation of TensorFlow concat.
-table ConcatenationOptions {
- axis:int;
- fused_activation_function:ActivationFunctionType;
-}
-
-table AddOptions {
- fused_activation_function:ActivationFunctionType;
-}
-
-table MulOptions {
- fused_activation_function:ActivationFunctionType;
-}
-
-table L2NormOptions {
- fused_activation_function:ActivationFunctionType;
-}
-
-table LocalResponseNormalizationOptions {
- radius:int;
- bias:float;
- alpha:float;
- beta:float;
-}
-
-enum LSTMKernelType : byte {
- // Full LSTM kernel which supports peephole and projection.
- FULL = 0,
- // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
- BASIC = 1,
-}
-
-// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
-table LSTMOptions {
- // Parameters for LSTM version 1 or above.
- fused_activation_function:ActivationFunctionType;
- cell_clip: float; // Optional, 0.0 means no clipping
- proj_clip: float; // Optional, 0.0 means no clipping
-
- // Parameters for LSTM version 2 or above.
- // Basic kernel is only supported in version 2 or above.
- kernel_type: LSTMKernelType = FULL;
-}
-
-// An implementation of TensorFlow dynamic_rnn with LSTMCell.
-table UnidirectionalSequenceLSTMOptions {
- fused_activation_function:ActivationFunctionType;
- cell_clip: float; // Optional, 0.0 means no clipping
- proj_clip: float; // Optional, 0.0 means no clipping
-
- // If true then first dimension is sequence, otherwise batch.
- time_major:bool;
-}
-
-table BidirectionalSequenceLSTMOptions {
- fused_activation_function:ActivationFunctionType;
- cell_clip: float; // Optional, 0.0 means no clipping
- proj_clip: float; // Optional, 0.0 means no clipping
-
- // If true, store the outputs of both directions into the first output.
- merge_outputs: bool;
-}
-
-table ResizeBilinearOptions {
- new_height: int (deprecated);
- new_width: int (deprecated);
- align_corners: bool;
-}
-
-table ResizeNearestNeighborOptions {
- align_corners: bool;
-}
-
-// A call operation options
-table CallOptions {
- // The subgraph index that needs to be called.
- subgraph:uint;
-}
-
-table PadOptions {
-}
-
-table PadV2Options {
-}
-
-table ReshapeOptions {
- new_shape:[int];
-}
-
-table SpaceToBatchNDOptions {
-}
-
-table BatchToSpaceNDOptions {
-}
-
-table SkipGramOptions {
- ngram_size: int;
- max_skip_size: int;
- include_all_ngrams: bool;
-}
-
-table SpaceToDepthOptions {
- block_size: int;
-}
-
-table SubOptions {
- fused_activation_function:ActivationFunctionType;
-}
-
-table DivOptions {
- fused_activation_function:ActivationFunctionType;
-}
-
-table TopKV2Options {
-}
-
-enum CombinerType : byte {
- SUM = 0,
- MEAN = 1,
- SQRTN = 2,
-}
-
-table EmbeddingLookupSparseOptions {
- combiner:CombinerType;
-}
-
-table GatherOptions {
- axis: int;
-}
-
-table TransposeOptions {
-}
-
-table ExpOptions {
-}
-
-table ReducerOptions {
- keep_dims: bool;
-}
-
-table SqueezeOptions {
- squeeze_dims:[int];
-}
-
-table SplitOptions {
- num_splits: int;
-}
-
-table SplitVOptions {
- num_splits: int;
-}
-
-table StridedSliceOptions {
- begin_mask: int;
- end_mask: int;
- ellipsis_mask: int;
- new_axis_mask: int;
- shrink_axis_mask: int;
-}
-
-table LogSoftmaxOptions {
-}
-
-table CastOptions {
- in_data_type: TensorType;
- out_data_type: TensorType;
-}
-
-table DequantizeOptions {
-}
-
-table MaximumMinimumOptions {
-}
-
-table TileOptions {
-}
-
-table ArgMaxOptions {
- output_type : TensorType;
-}
-
-table ArgMinOptions {
- output_type : TensorType;
-}
-
-table GreaterOptions {
-}
-
-table GreaterEqualOptions {
-}
-
-table LessOptions {
-}
-
-table LessEqualOptions {
-}
-
-table NegOptions {
-}
-
-table SelectOptions {
-}
-
-table SliceOptions {
-}
-
-table TransposeConvOptions {
- padding:Padding;
- stride_w:int;
- stride_h:int;
-}
-
-table ExpandDimsOptions {
-}
-
-table SparseToDenseOptions {
- validate_indices:bool;
-}
-
-table EqualOptions {
-}
-
-table NotEqualOptions {
-}
-
-table ShapeOptions {
- // Optional output type of the operation (int32 or int64). Defaults to int32.
- out_type : TensorType;
-}
-
-table PowOptions {
-}
-
-table FakeQuantOptions {
- // Parameters supported by version 1:
- min:float;
- max:float;
- num_bits:int;
-
- // Parameters supported by version 2:
- narrow_range:bool;
-}
-
-table PackOptions {
- values_count:int;
- axis:int;
-}
-
-table LogicalOrOptions {
-}
-
-table OneHotOptions {
- axis:int;
-}
-
-table AbsOptions {
-}
-
-
-table LogicalAndOptions {
-}
-
-table LogicalNotOptions {
-}
-
-table UnpackOptions {
- num:int;
- axis:int;
-}
-
-table FloorDivOptions {
-}
-
-table SquareOptions {
-}
-
-table ZerosLikeOptions {
-}
-
-table FillOptions {
-}
-
-table FloorModOptions {
-}
-
-table RangeOptions {
-}
-
-table LeakyReluOptions {
- alpha:float;
-}
-
-table SquaredDifferenceOptions {
-}
-
-enum MirrorPadMode : byte {
- // Doesn't include borders.
- REFLECT = 0,
- // Includes borders.
- SYMMETRIC = 1,
-}
-
-table MirrorPadOptions {
- mode:MirrorPadMode;
-}
-
-// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
-// builtin, or a string if the operator is custom.
-table OperatorCode {
- builtin_code:BuiltinOperator;
- custom_code:string;
-
- // The version of the operator. The version need to be bumped whenever new
- // parameters are introduced into an op.
- version:int = 1;
-}
-
-enum CustomOptionsFormat : byte {
- FLEXBUFFERS = 0,
-}
-
-// An operator takes tensors as inputs and outputs. The type of operation being
-// performed is determined by an index into the list of valid OperatorCodes,
-// while the specifics of each operations is configured using builtin_options
-// or custom_options.
-table Operator {
- // Index into the operator_codes array. Using an integer here avoids
- // complicate map lookups.
- opcode_index:uint;
-
- // Optional input and output tensors are indicated by -1.
- inputs:[int];
- outputs:[int];
-
- builtin_options:BuiltinOptions;
- custom_options:[ubyte];
- custom_options_format:CustomOptionsFormat;
-
- // A list of booleans indicating the input tensors which are being mutated by
- // this operator.(e.g. used by RNN and LSTM).
- // For example, if the "inputs" array refers to 5 tensors and the second and
- // fifth are mutable variables, then this list will contain
- // [false, true, false, false, true].
- //
- // If the list is empty, no variable is mutated in this operator.
- // The list either has the same length as `inputs`, or is empty.
- mutating_variable_inputs:[bool];
-}
-
-// The root type, defining a subgraph, which typically represents an entire
-// model.
-table SubGraph {
- // A list of all tensors used in this subgraph.
- tensors:[Tensor];
-
- // Indices of the tensors that are inputs into this subgraph. Note this is
- // the list of non-static tensors that feed into the subgraph for inference.
- inputs:[int];
-
- // Indices of the tensors that are outputs out of this subgraph. Note this is
- // the list of output tensors that are considered the product of the
- // subgraph's inference.
- outputs:[int];
-
- // All operators, in execution order.
- operators:[Operator];
-
- // Name of this subgraph (used for debugging).
- name:string;
-}
-
-// Table of raw data buffers (used for constant tensors). Referenced by tensors
-// by index. The generous alignment accommodates mmap-friendly data structures.
-table Buffer {
- data:[ubyte] (force_align: 16);
-}
-
-table Model {
- // Version of the schema.
- version:uint;
-
- // A list of all operator codes used in this model. This is
- // kept in order because operators carry an index into this
- // vector.
- operator_codes:[OperatorCode];
-
- // All the subgraphs of the model. The 0th is assumed to be the main
- // model.
- subgraphs:[SubGraph];
-
- // A description of the model.
- description:string;
-
- // Buffers of the model.
- // Note the 0th entry of this array must be an empty buffer (sentinel).
- // This is a convention so that tensors without a buffer can provide 0 as
- // their buffer.
- buffers:[Buffer];
-
- // Metadata about the model. Indirects into the existings buffers list.
- metadata_buffer:[int];
-}
-
-root_type Model;
diff --git a/runtime/neurun/sample/minimal/src/minimal.cc b/runtime/neurun/sample/minimal/src/minimal.cc
deleted file mode 100644
index 003c8a323..000000000
--- a/runtime/neurun/sample/minimal/src/minimal.cc
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "nnfw.h"
-#include <vector>
-
-uint64_t num_elems(const nnfw_tensorinfo *ti)
-{
- uint64_t n = 1;
- for (uint32_t i = 0; i < ti->rank; ++i)
- {
- n *= ti->dims[i];
- }
- return n;
-}
-
-int main(const int argc, char **argv)
-{
- nnfw_session *session = nullptr;
- nnfw_create_session(&session);
-
- // Loading nnpackage
- nnfw_load_model_from_file(session, argv[1]);
-
- // Use acl_neon backend for CONV_2D and acl_cl for otherwise.
- // Note that defalut backend is acl_cl
- nnfw_set_op_backend(session, "CONV_2D", "acl_neon");
-
- // Compile model
- nnfw_prepare(session);
-
- // Prepare input. Here we just allocate dummy input arrays.
- std::vector<float> input;
- nnfw_tensorinfo ti;
- nnfw_input_tensorinfo(session, 0, &ti); // get first input's info
- uint32_t input_elements = num_elems(&ti);
- input.resize(input_elements);
- // TODO: Please add initialization for your input.
- nnfw_set_input(session, 0, ti.dtype, input.data(), sizeof(float) * input_elements);
-
- // Prepare output
- std::vector<float> output;
- nnfw_output_tensorinfo(session, 0, &ti); // get first output's info
- uint32_t output_elements = num_elems(&ti);
- output.resize(output_elements);
- nnfw_set_output(session, 0, ti.dtype, output.data(), sizeof(float) * output_elements);
-
- // Do inference
- nnfw_run(session);
-
- // TODO: Please print or compare the output value in your way.
-
- return 0;
-}
diff --git a/runtime/neurun/test/CMakeLists.txt b/runtime/neurun/test/CMakeLists.txt
deleted file mode 100644
index 815173937..000000000
--- a/runtime/neurun/test/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-set(TEST_NEURUN test_neurun)
-
-file(GLOB_RECURSE TESTS "*.cc")
-
-add_executable(${TEST_NEURUN} ${TESTS})
-
-target_include_directories(${TEST_NEURUN} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../core/src)
-
-target_link_libraries(${TEST_NEURUN} neurun_core)
-target_link_libraries(${TEST_NEURUN} gtest)
-target_link_libraries(${TEST_NEURUN} gtest_main)
-target_link_libraries(${TEST_NEURUN} ${LIB_PTHREAD} dl)
-add_test(${TEST_NEURUN} ${TEST_NEURUN})
-
-install(TARGETS ${TEST_NEURUN} DESTINATION unittest)
diff --git a/runtime/neurun/test/core/backend/ExecTime.test.cc b/runtime/neurun/test/core/backend/ExecTime.test.cc
deleted file mode 100644
index 4b89e64d2..000000000
--- a/runtime/neurun/test/core/backend/ExecTime.test.cc
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/ExecTime.h"
-#include "backend/IConfig.h"
-#include "backend/Backend.h"
-#include <gtest/gtest.h>
-#include <string>
-
-namespace
-{
-using namespace neurun;
-using namespace backend;
-
-struct MockConfig : public IConfig
-{
- std::string id() override { return "b1"; }
- bool initialize() override { return true; };
- bool SupportPermutation() override { return false; }
- bool SupportSubTensorAlloc() override { return false; }
-};
-
-struct MockBackend : public ::neurun::backend::Backend
-{
- std::shared_ptr<neurun::backend::IConfig> config() const override
- {
- return std::make_shared<MockConfig>();
- }
- std::unique_ptr<BackendContext>
- newContext(const ir::Operands &, const std::shared_ptr<custom::IKernelBuilder> &kb) const override
- {
- return nullptr;
- }
-};
-
-TEST(ExecTime, roundtrip_ok)
-{
- const auto *b = new MockBackend();
- std::vector<const Backend *> bs = {b};
- {
- ExecTime et(bs);
- et.updateOperationExecTime(b, "op1", true, 100, 100);
- et.updateOperationExecTime(b, "op1", true, 200, 200);
- et.updateOperationExecTime(b, "op1", false, 100, 888);
- et.uploadOperationsExecTime();
- }
- {
- ExecTime et(bs);
- auto time = et.getOperationExecTime(b, "op1", true, 100);
- ASSERT_EQ(time, 100);
- // Check interpolation
- time = et.getOperationExecTime(b, "op1", true, 150);
- ASSERT_EQ(time, 150);
- time = et.getOperationExecTime(b, "op1", false, 100);
- ASSERT_EQ(time, 888);
- et.uploadOperationsExecTime();
- }
- // clean up
- EXPECT_EQ(remove("exec_time.json"), 0);
-}
-
-TEST(ExecTime, structure)
-{
-
- const auto *b = new MockBackend();
- std::vector<const Backend *> bs = {b};
- {
- ExecTime et(bs);
- et.updateOperationExecTime(b, "op1", true, 100, 100);
- et.updateOperationExecTime(b, "op1", true, 200, 200);
- et.uploadOperationsExecTime();
- }
- {
- ExecTime et(bs);
- auto time = et.getOperationExecTime(b, "op1", true, 100);
- ASSERT_EQ(time, 100);
- // Check interpolation
- time = et.getOperationExecTime(b, "op1", true, 200);
- ASSERT_EQ(time, 200);
- et.uploadOperationsExecTime();
- }
- // clean up
- EXPECT_EQ(remove("exec_time.json"), 0);
-}
-} // unnamed namespace
diff --git a/runtime/neurun/test/core/compiler/Scheduler.cc b/runtime/neurun/test/core/compiler/Scheduler.cc
deleted file mode 100644
index a4fbfeb2c..000000000
--- a/runtime/neurun/test/core/compiler/Scheduler.cc
+++ /dev/null
@@ -1,550 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <compiler/HEScheduler.h>
-#include <backend/ExecTime.h>
-#include <backend/IShapeFixer.h>
-
-#include <ir/Shape.h>
-#include <ir/InternalType.h>
-#include <ir/TypeInfo.h>
-#include <ir/DataType.h>
-
-#include <ir/operation/Add.h>
-#include <ir/operation/Sub.h>
-#include <ir/operation/Mul.h>
-#include <ir/operation/FullyConnected.h>
-
-#include <gtest/gtest.h>
-
-namespace
-{
-using namespace neurun;
-using namespace ir;
-using namespace backend;
-using namespace operation;
-
-//
-// Mock backends classes
-//
-
-// Backend could be created without ShapeFixer.
-// But it is used by scheduler to detect which operations are supported by backend.
-struct MockShapeFixer : IShapeFixer
-{
- void visit(const Add &) override {}
- void visit(const Sub &) override {}
- void visit(const Mul &) override {}
- void visit(const FullyConnected &) override {}
-};
-
-struct MockConfigCPU : public IConfig
-{
- std::string id() override { return "cpu"; }
- bool initialize() override { return true; };
- bool SupportPermutation() override { return false; }
- bool SupportSubTensorAlloc() override { return false; }
-};
-
-struct MockBackendCPU : public Backend
-{
- std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigCPU>(); }
- std::unique_ptr<BackendContext>
- newContext(const Operands &, const std::shared_ptr<custom::IKernelBuilder> &) const override
- {
- return std::unique_ptr<BackendContext>(
- new BackendContext{this, nullptr, nullptr, nullptr, std::make_shared<MockShapeFixer>()});
- }
-};
-
-struct MockConfigGPU : public IConfig
-{
- std::string id() override { return "gpu"; }
- bool initialize() override { return true; };
- bool SupportPermutation() override { return false; }
- bool SupportSubTensorAlloc() override { return false; }
-};
-
-struct MockBackendGPU : public Backend
-{
- std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigGPU>(); }
- std::unique_ptr<BackendContext>
- newContext(const Operands &, const std::shared_ptr<custom::IKernelBuilder> &) const override
- {
- return std::unique_ptr<BackendContext>(
- new BackendContext{this, nullptr, nullptr, nullptr, std::make_shared<MockShapeFixer>()});
- }
-};
-
-struct MockConfigNPU : public IConfig
-{
- std::string id() override { return "npu"; }
- bool initialize() override { return true; };
- bool SupportPermutation() override { return false; }
- bool SupportSubTensorAlloc() override { return false; }
-};
-
-struct MockBackendNPU : public Backend
-{
- std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigNPU>(); }
- std::unique_ptr<BackendContext>
- newContext(const Operands &, const std::shared_ptr<custom::IKernelBuilder> &) const override
- {
- return std::unique_ptr<BackendContext>(
- new BackendContext{this, nullptr, nullptr, nullptr, std::make_shared<MockShapeFixer>()});
- }
-};
-
-//
-// Constants
-//
-
-const int OPERAND_ELEMS = 268203;
-const int OPERAND_SIZE = OPERAND_ELEMS * 4;
-const int OPERATION_SIZE = OPERAND_SIZE * 3;
-
-const std::string LINEAR("Linear");
-const std::string DATAFLOW("Dataflow");
-const std::string PARALLEL("Parallel");
-
-//
-// Helper functions
-//
-
-// Set executor through environment variable
-void setExecutor(const std::string &executor) { setenv("EXECUTOR", executor.c_str(), true); }
-
-// Set profiling mode through environment variable
-void setProfilingMode(const bool value) { setenv("PROFILING_MODE", value ? "1" : "0", true); }
-
-// Calculate operation size by addition sizes of all input and output operands
-uint32_t calcOpSize(const std::shared_ptr<Graph> &graph, const OperationIndex &op_idx)
-{
- uint32_t size = 0;
- for (const auto &input : graph->operations().at(op_idx).getInputs())
- size += graph->operands().at(input).info().total_size();
- for (const auto &output : graph->operations().at(op_idx).getOutputs())
- size += graph->operands().at(output).info().total_size();
- return size;
-}
-
-// Set execution operation time. This method is needed since ExecutionTime has only
-// 'updateOperationExecTime' method.
-void setOperationExecTime(ExecTime &et, const Backend *backend, const std::string &operation,
- bool quant, uint32_t op_size, int64_t time)
-{
- // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
- assert(time > 0);
- int64_t prev_time = et.getOperationExecTime(backend, operation, quant, op_size);
- int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
- et.updateOperationExecTime(backend, operation, quant, op_size, time_to_set);
- assert(et.getOperationExecTime(backend, operation, quant, op_size) == time);
-}
-
-// Set same execution time for all given backends/operations
-void setOperationsExecutionTime(const std::vector<const Backend *> &backends,
- const std::vector<std::string> &op_names,
- const std::vector<uint32_t> &op_sizes, int64_t exec_time)
-{
- assert(op_names.size() == op_sizes.size());
- ExecTime et(backends);
- for (int i = 0; i < op_names.size(); ++i)
- {
- for (auto &backend : backends)
- setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
- }
- et.uploadOperationsExecTime();
-}
-
-// Set permute time from one backend to another. This method is needed since ExecutionTime has only
-// 'updatePermuteTime' method.
-void setPermutationTime(ExecTime &et, const Backend *from_backend, const Backend *to_backend,
- bool quant, uint32_t op_size, int64_t time)
-{
- // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
- assert(time > 0);
- int64_t prev_time = et.getPermuteTime(from_backend, to_backend, quant, op_size);
- int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
- et.updatePermuteTime(from_backend, to_backend, quant, op_size, time_to_set);
- assert(et.getPermuteTime(from_backend, to_backend, quant, op_size) == time);
-}
-
-// Set same permutation time between all given backends
-void setPermutationsExecutionTime(const std::vector<const Backend *> &backends,
- const int operand_size, const int64_t exec_time)
-{
- ExecTime et(backends);
- for (const auto &backend : backends)
- {
- for (auto &other_backend : backends)
- {
- if (backend == other_backend)
- continue;
- setPermutationTime(et, backend, other_backend, false, operand_size, exec_time);
- }
- }
- et.uploadOperationsExecTime();
-}
-
-//
-// Functions for creating graphs
-//
-
-using OIS = OperandIndexSequence;
-
-template <typename NodeT, typename... Types>
-OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args)
-{
- typename NodeT::Param op_params{Activation::NONE};
- auto op = nnfw::cpp14::make_unique<NodeT>(std::forward<Types>(args)..., op_params);
- auto op_idx = graph->addOperation(std::move(op));
- // For now in scheduler test all operations in tested graphs has same size (for simplicity)
- assert(calcOpSize(graph, op_idx) == OPERATION_SIZE);
- return op_idx;
-}
-
-// Create straight graph: Add->Sub->Mul
-std::shared_ptr<Graph> createStraightGraph()
-{
- auto graph = std::make_shared<Graph>();
- const TypeInfo float_op(DataType::FLOAT32);
-
- // Create add node
- auto add_lhs_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op);
- auto add_rhs_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op);
- auto add_out_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op);
- create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx});
-
- // Create sub node
- auto sub_const_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op);
- auto sub_out_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op);
- create<Sub>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx});
-
- // Create mul node
- auto mul_const_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op);
- auto mul_out_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op);
- create<Mul>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx});
-
- graph->finishBuilding();
- return graph;
-}
-
-/* Create branched graph:
- * [Add]
- * // \\
- * [Mul1] [FC2]
- * || ||
- * [Mul2] [FC2]
- * \\ //
- * [Sub]
- */
-std::shared_ptr<Graph> createBranchedGraph()
-{
- auto graph = std::make_shared<Graph>();
- const TypeInfo float_op(DataType::FLOAT32);
-
- // Create add node
- auto add_lhs_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op);
- auto add_rhs_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op);
- auto add_out_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op);
- create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx});
-
- // Create mul1 node
- auto mul1_const_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op);
- auto mul1_out_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op);
- create<Mul>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx});
-
- // Create mul2 node
- auto mul2_const_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op);
- auto mul2_out_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op);
- create<Mul>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx});
-
- // Create fc1 node
- auto fc1_const_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op);
- auto fc1_out_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op);
- create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx});
-
- // Create fc2 node
- auto fc2_const_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op);
- auto fc2_out_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op);
- create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx});
-
- // Create add2 node
- auto sub_out_idx = graph->addOperand(Shape{OPERAND_ELEMS}, float_op);
- create<Sub>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx});
-
- graph->finishBuilding();
- return graph;
-}
-
-//
-// Tests setup/teardown
-//
-
-// SetUp/TearDown methods runs before/after each test and performs actions common for each test
-class SchedulerTest : public ::testing::Test
-{
-protected:
- void SetUp() override
- {
- // Initialize mock backends
- _cpu_backend = new MockBackendCPU();
- _gpu_backend = new MockBackendGPU();
- _npu_backend = new MockBackendNPU();
- _mock_backends = {_cpu_backend, _gpu_backend, _npu_backend};
-
- // Remove previous profile data if it exists
- if (!remove("exec_time.json"))
- {
- // DO NOTHING (no profile data)
- }
-
- // Remember original value of 'EXECUTOR' environment variable
- char *executor = std::getenv("EXECUTOR");
- _original_executor = executor == nullptr ? "" : executor;
-
- // Remember original value of 'PROFILING_MODE' environment variable
- char *profiling_mode = std::getenv("PROFILING_MODE");
- _original_profiling_mode = profiling_mode == nullptr ? "" : profiling_mode;
- }
-
- void TearDown() override
- {
- delete _cpu_backend;
- delete _gpu_backend;
- delete _npu_backend;
- EXPECT_EQ(remove("exec_time.json"), 0);
- setenv("EXECUTOR", _original_executor.c_str(), true);
- setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true);
- }
-
- const MockBackendCPU *_cpu_backend{nullptr};
- const MockBackendGPU *_gpu_backend{nullptr};
- const MockBackendNPU *_npu_backend{nullptr};
- std::vector<const Backend *> _mock_backends;
-
- std::string _original_executor;
- std::string _original_profiling_mode;
-};
-
-class SchedulerTestWithExecutorParam : public SchedulerTest,
- public testing::WithParamInterface<std::string>
-{
-};
-
-//
-// HEScheduler tests
-//
-
-// Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
-TEST_P(SchedulerTestWithExecutorParam, straight_graph_known_exec_time)
-{
- setExecutor(GetParam());
-
- // Prepare graph
- auto graph(createStraightGraph());
- OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2);
-
- // Set default execution and transfer time
- setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1);
- setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul"},
- {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
-
- // Test 1
- // Expected behaviour: scheduler assigns different backend to each node
- {
- // For each backend reduce execution time of one node
- ExecTime et(_mock_backends);
- setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1);
- setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1);
- setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1);
- et.uploadOperationsExecTime();
-
- // Test scheduler
- auto scheduler = compiler::HEScheduler(graph->operands(), _mock_backends, nullptr);
- const auto br = scheduler.schedule(*graph);
- ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
- ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu");
- ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "npu");
- }
-
- // Test 2
- // Expected behaviour: scheduler assigns single backend to all nodes because of big transfer time
- {
- // Increase transfer time
- setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5);
-
- // Test scheduler
- auto scheduler = compiler::HEScheduler(graph->operands(), _mock_backends, nullptr);
- const auto br = scheduler.schedule(*graph);
- ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
- ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
- ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "cpu");
- }
-}
-
-// Test scheduler behavior for branched graph with known execution time of all nodes and permutes
-TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time)
-{
- const int64_t NPU_ET = 5000;
- setExecutor(GetParam());
-
- // Prepare graph
- auto graph(createBranchedGraph());
- OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
- sub_op_idx(5);
-
- // Set default execution and transfer time
- setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000);
- setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul", "FullyConnected"},
- {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
-
- // Test 1
- // Expected behaviour: for dataflow and linear executors scheduler assigns fastest backend to all
- // nodes, in case of parallel executor scheduler assigns different backends to branches.
- {
- // Reduce execution time
- ExecTime et(_mock_backends);
- setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, NPU_ET);
- setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, NPU_ET);
- setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, NPU_ET);
- setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET);
- setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000);
- setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000);
- et.uploadOperationsExecTime();
-
- // Test scheduler
- auto scheduler = compiler::HEScheduler(graph->operands(), _mock_backends, nullptr);
- const auto br = scheduler.schedule(*graph);
-
- std::string branch1_expected_backend("npu"), branch2_expected_backend("npu");
- if (GetParam() == PARALLEL)
- {
- branch1_expected_backend =
- br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
- branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu";
- }
-
- ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
- ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), branch1_expected_backend);
- ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), branch1_expected_backend);
- ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), branch2_expected_backend);
- ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), branch2_expected_backend);
- ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
- }
-
- // Test 2
- // Expected behaviour: scheduler assigns single backend to all nodes
- {
- // Increase execution time for GPU backend
- ExecTime et(_mock_backends);
- /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt *
- * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the
- * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter
- * branching or scheduler assigns another backend to a node*/
- setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1);
- setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1);
- et.uploadOperationsExecTime();
-
- // Test scheduler
- auto scheduler = compiler::HEScheduler(graph->operands(), _mock_backends, nullptr);
- const auto br = scheduler.schedule(*graph);
- ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
- ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
- ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
- ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "npu");
- ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "npu");
- ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
- }
-}
-
-// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
-// one time for each executor
-INSTANTIATE_TEST_CASE_P(AllExecutors, SchedulerTestWithExecutorParam,
- testing::Values(LINEAR, DATAFLOW, PARALLEL));
-
-// Test scheduler behavior for branched graph and enabled profiling mode
-TEST_F(SchedulerTest, branched_graph_profiling_mode)
-{
- const int ET = 1e5;
-
- // Turn on profiling mode
- setProfilingMode(true);
- setExecutor(DATAFLOW);
-
- // Prepare graph
- auto graph(createBranchedGraph());
- OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
- sub_op_idx(5);
-
- // Test 1
- // Expected behaviour: scheduler assigns backends to nodes with unknown execution time
- {
- // Set execution time for all backends/nodes except for cpu/Sub, npu/Mul, gpu/FC
- ExecTime et(_mock_backends);
- setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, ET);
- setOperationExecTime(et, _cpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
- setOperationExecTime(et, _cpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
- setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, ET);
- setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
- setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, ET);
- setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET);
- setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
- setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET);
- et.uploadOperationsExecTime();
-
- // Test scheduler
- auto scheduler = compiler::HEScheduler(graph->operands(), _mock_backends, nullptr);
- const auto br = scheduler.schedule(*graph);
- ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
- ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
- ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "gpu");
- ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "gpu");
- ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
- }
-
- // Test 2
- // Expected behaviour: scheduler shuffling backends, so different backends are assigned to
- // neighbor nodes
- {
- // Set execution time for rest backends/nodes (cpu/Sub, npu/Mul, gpu/FC)
- ExecTime et(_mock_backends);
- setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET);
- setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
- setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
- et.uploadOperationsExecTime();
-
- // Test scheduler
- auto scheduler = compiler::HEScheduler(graph->operands(), _mock_backends, nullptr);
- const auto br = scheduler.schedule(*graph);
- ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
- br->getBackend(mul1_op_idx)->config()->id());
- ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
- br->getBackend(fc1_op_idx)->config()->id());
- ASSERT_NE(br->getBackend(mul1_op_idx)->config()->id(),
- br->getBackend(mul2_op_idx)->config()->id());
- ASSERT_NE(br->getBackend(fc1_op_idx)->config()->id(),
- br->getBackend(fc2_op_idx)->config()->id());
- ASSERT_NE(br->getBackend(mul2_op_idx)->config()->id(),
- br->getBackend(sub_op_idx)->config()->id());
- ASSERT_NE(br->getBackend(fc2_op_idx)->config()->id(),
- br->getBackend(sub_op_idx)->config()->id());
- }
-}
-
-// TODO: Add tests with unknown execution and permutation time
-
-} // unnamed namespace
diff --git a/runtime/neurun/test/core/exec/ExecInstance.cc b/runtime/neurun/test/core/exec/ExecInstance.cc
deleted file mode 100644
index 49d561226..000000000
--- a/runtime/neurun/test/core/exec/ExecInstance.cc
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-#include <thread>
-
-#include "ir/Graph.h"
-#include "compiler/Compiler.h"
-#include "exec/Execution.h"
-#include "ir/operation/Add.h"
-
-namespace
-{
-
-using namespace neurun::ir;
-
-class CompiledMockUpModel
-{
-public:
- CompiledMockUpModel()
- {
- // Model: two elementwise add operation
- // model input: lhs, rhs1
- // model output: second add result (result2)
- // constant: rhs2
- // result1 <= (lhs + rhs)
- // result2 <= (result1 + rhs2)
- // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1}
- // activation: none (constant)
- graph = std::make_shared<Graph>();
- // 1st add operands (result1 <= lhs + rhs1)
- Shape shape{1, 2, 2, 1};
- TypeInfo type{DataType::FLOAT32};
- static float rhs2_data[4] = {3, 1, -1, 5};
- auto operand_lhs = graph->addOperand(shape, type);
- auto operand_rhs1 = graph->addOperand(shape, type);
- auto operand_result1 = graph->addOperand(shape, type);
- auto operand_rhs2 = graph->addOperand(shape, type);
- auto operand_result2 = graph->addOperand(shape, type);
- graph->operands()
- .at(operand_rhs2)
- .data(nnfw::cpp14::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data),
- 16));
- // 2nd add operations (result2 <= result1 + rhs2)
- operation::Add::Param param1;
- param1.activation = Activation::NONE;
- auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
- auto output_set1 = OperandIndexSequence{operand_result1};
- graph->addOperation(nnfw::cpp14::make_unique<operation::Add>(input_set1, output_set1, param1));
- operation::Add::Param param2;
- param2.activation = Activation::NONE;
- auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
- auto output_set2 = OperandIndexSequence{operand_result2};
- graph->addOperation(nnfw::cpp14::make_unique<operation::Add>(input_set2, output_set2, param2));
- // Identify model inputs and outputs
- graph->addInput(operand_lhs);
- graph->addInput(operand_rhs1);
- graph->addOutput(operand_result2);
- graph->finishBuilding();
-
- // Compile
- auto compiler = new neurun::compiler::Compiler{graph};
- compiler->compile();
- compiler->release(executor);
- delete compiler;
- }
-
-public:
- std::shared_ptr<Graph> graph;
- std::shared_ptr<neurun::exec::IExecutor> executor;
-};
-
-TEST(ExecInstance, simple)
-{
- auto mockup = CompiledMockUpModel();
- auto graph = mockup.graph;
- auto executor = mockup.executor;
-
- auto input1 = IOIndex{0};
- auto input2 = IOIndex{1};
- auto output = IOIndex{0};
-
- const float input1_buffer[4] = {1, 0, -1, -2};
- const float input2_buffer[4] = {1, -3, 2, -4};
- float output_buffer[4] = {};
- const float output_expected[4] = {5, -2, 0, -1};
-
- auto execution = new neurun::exec::Execution(executor);
-
- execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
- execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
- execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
- execution->execute();
-
- for (auto i = 0; i < 4; i++)
- {
- EXPECT_EQ(output_buffer[i], output_expected[i]);
- }
-
- delete execution;
-}
-
-TEST(ExecInstance, twoCompile)
-{
- auto mockup = CompiledMockUpModel();
- auto graph = mockup.graph;
- auto executor1 = mockup.executor;
- auto execution1 = new neurun::exec::Execution(executor1);
-
- auto input1 = IOIndex{0};
- auto input2 = IOIndex{1};
- auto output = IOIndex{0};
-
- const float exe1_input1_buffer[4] = {1, 0, -1, -2};
- const float exe1_input2_buffer[4] = {1, -3, 2, -4};
- float exe1_output_buffer[4] = {};
- const float exe1_output_expected[4] = {5, -2, 0, -1};
-
- execution1->setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
- execution1->setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
- execution1->setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16);
-
- // Make new executor: compile again
- auto compiler = new neurun::compiler::Compiler{graph};
- compiler->compile();
- std::shared_ptr<neurun::exec::IExecutor> executor2;
- compiler->release(executor2);
- auto execution2 = new neurun::exec::Execution(executor2);
-
- const float exe2_input1_buffer[4] = {2, 1, -2, 0};
- const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
- float exe2_output_buffer[4] = {};
- const float exe2_output_expected[4] = {2, 5, -2, 7};
-
- execution2->setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
- execution2->setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
- execution2->setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16);
-
- execution1->execute();
- execution2->execute();
-
- for (auto i = 0; i < 4; i++)
- {
- EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
- EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
- }
-
- delete compiler;
- delete execution1;
- delete execution2;
-}
-
-// Support two initialized execution instance then ordered execution
-TEST(ExecInstance, twoExecution)
-{
- auto mockup = CompiledMockUpModel();
- auto executor = mockup.executor;
- auto input1 = IOIndex{0};
- auto input2 = IOIndex{1};
- auto output1 = IOIndex{0};
-
- const float exe1_input1_buffer[4] = {1, 0, -1, -2};
- const float exe1_input2_buffer[4] = {1, -3, 2, -4};
- float exe1_output_buffer[4] = {};
- const float exe1_output_expected[4] = {5, -2, 0, -1};
- const float exe2_output_expected[4] = {2, 5, -2, 7};
-
- auto execution1 = new neurun::exec::Execution(executor);
- execution1->setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
- execution1->setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
- execution1->setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16);
-
- const float exe2_input1_buffer[4] = {2, 1, -2, 0};
- const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
- float exe2_output_buffer[4] = {};
-
- // Make new execution
- auto execution2 = new neurun::exec::Execution(executor);
- execution2->setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
- execution2->setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
- execution2->setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16);
-
- execution1->execute();
- execution2->execute();
-
- for (auto i = 0; i < 4; i++)
- {
- EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
- EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
- }
-
- delete execution1;
- delete execution2;
-}
-
-class Inference
-{
-public:
- Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4],
- std::shared_ptr<neurun::exec::IExecutor> &executor)
- : _input1{input1}, _input2{input2}, _output{output}, _executor{executor}
- {
- // DO NOTHING
- }
-
- void inference(void)
- {
- auto input1 = IOIndex{0};
- auto input2 = IOIndex{1};
- auto output1 = IOIndex{0};
-
- auto execution = new neurun::exec::Execution(_executor);
- execution->setInput(input1, reinterpret_cast<const void *>(_input1), 16);
- execution->setInput(input2, reinterpret_cast<const void *>(_input2), 16);
- execution->setOutput(output1, reinterpret_cast<void *>(_output), 16);
-
- execution->execute();
-
- delete execution;
- }
-
-private:
- const float (&_input1)[4];
- const float (&_input2)[4];
- float (&_output)[4];
- std::shared_ptr<neurun::exec::IExecutor> &_executor;
-};
-
-// Support multi-thread execution
-TEST(ExecInstance, twoThreads)
-{
- auto mockup = CompiledMockUpModel();
- auto executor = mockup.executor;
-
- const float exe1_input1_buffer[4] = {1, 0, -1, -2};
- const float exe1_input2_buffer[4] = {1, -3, 2, -4};
- float exe1_output_buffer[4] = {};
- const float exe1_output_expected[4] = {5, -2, 0, -1};
-
- Inference execution1{exe1_input1_buffer, exe1_input2_buffer, exe1_output_buffer, executor};
-
- const float exe2_input1_buffer[4] = {2, 1, -2, 0};
- const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
- float exe2_output_buffer[4] = {};
- const float exe2_output_expected[4] = {2, 5, -2, 7};
-
- Inference execution2{exe2_input1_buffer, exe2_input2_buffer, exe2_output_buffer, executor};
-
- std::thread t1{&Inference::inference, &execution1};
- std::thread t2{&Inference::inference, &execution2};
-
- t1.join();
- t2.join();
-
- for (auto i = 0; i < 4; i++)
- {
- EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
- EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
- }
-}
-
-// Support asynchronous execution
-TEST(ExecInstance, async)
-{
- auto mockup = CompiledMockUpModel();
- auto graph = mockup.graph;
- auto executor = mockup.executor;
-
- auto input1 = IOIndex{0};
- auto input2 = IOIndex{1};
- auto output = IOIndex{0};
-
- const float input1_buffer[4] = {1, 0, -1, -2};
- const float input2_buffer[4] = {1, -3, 2, -4};
- float output_buffer[4] = {};
- const float output_expected[4] = {5, -2, 0, -1};
-
- auto execution = new neurun::exec::Execution(executor);
-
- execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
- execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
- execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
- execution->startExecute();
- execution->waitFinish();
-
- for (auto i = 0; i < 4; i++)
- {
- EXPECT_EQ(output_buffer[i], output_expected[i]);
- }
-
- delete execution;
-}
-
-} // namespace
diff --git a/runtime/neurun/test/core/exec/interp/ExecManager.cc b/runtime/neurun/test/core/exec/interp/ExecManager.cc
deleted file mode 100644
index aba64e28f..000000000
--- a/runtime/neurun/test/core/exec/interp/ExecManager.cc
+++ /dev/null
@@ -1,334 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include <cpp14/memory.h>
-
-#include "ir/Graph.h"
-#include "exec/interp/ExecManager.h"
-#include "exec/Execution.h"
-#include "ir/operation/Add.h"
-
-namespace
-{
-
-using namespace neurun::ir;
-using ExecManager = neurun::exec::interp::ExecManager;
-using Execution = neurun::exec::Execution;
-
-class InterpExecManagerTest : public ::testing::Test
-{
-protected:
- virtual void SetUp() {}
- void CreateSimpleModel()
- {
- // Model: one elementwise add operation
- // model input: lhs, rhs
- // model output: add result
- // lhs, rhs, result shape: {1, 2, 2, 1}
- // activation: none (constant)
- _graph = nnfw::cpp14::make_unique<Graph>();
-
- // Add operands
-
- Shape shape{1, 2, 2, 1};
- TypeInfo type{DataType::INT32};
- Shape shape_scalar(0);
- TypeInfo type_scalar{DataType::INT32};
-
- auto operand_lhs = _graph->addOperand(shape, type);
- auto operand_rhs = _graph->addOperand(shape, type);
- auto operand_result = _graph->addOperand(shape, type);
-
- // Add operations
-
- operation::Add::Param param;
- param.activation = Activation::NONE;
- auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
- auto output_set = OperandIndexSequence{operand_result};
- _graph->addOperation(nnfw::cpp14::make_unique<operation::Add>(input_set, output_set, param));
-
- // Identify model inputs and outputs
-
- _graph->getInputs().append(operand_lhs);
- _graph->getInputs().append(operand_rhs);
- _graph->getOutputs().append(operand_result);
-
- _graph->finishBuilding();
-
- _executor = nnfw::cpp14::make_unique<ExecManager>(*_graph);
- }
-
- void CreateTwoStepModel()
- {
- // Model: two elementwise add operation
- // model input: lhs, rhs1
- // model output: second add result (result2)
- // constant: rhs2
- // result1 <= (lhs + rhs)
- // result2 <= (result1 + rhs2)
- // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1}
- // activation: none (constant)
- _graph = nnfw::cpp14::make_unique<Graph>();
-
- // 1st add operands (result1 <= lhs + rhs1)
-
- Shape shape{1, 2, 2, 1};
- TypeInfo type{DataType::INT32};
- Shape shape_scalar(0);
- TypeInfo type_scalar{DataType::INT32};
-
- static int32_t rhs2_data[4] = {3, 1, -1, 5};
-
- auto operand_lhs = _graph->addOperand(shape, type);
- auto operand_rhs1 = _graph->addOperand(shape, type);
- auto operand_result1 = _graph->addOperand(shape, type);
- auto operand_rhs2 = _graph->addOperand(shape, type);
- auto operand_result2 = _graph->addOperand(shape, type);
- _graph->operands()
- .at(operand_rhs2)
- .data(nnfw::cpp14::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data),
- 16));
-
- // 2nd add operations (result2 <= result1 + rhs2)
-
- operation::Add::Param param1;
- param1.activation = Activation::NONE;
- auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
- auto output_set1 = OperandIndexSequence{operand_result1};
- _graph->addOperation(nnfw::cpp14::make_unique<operation::Add>(input_set1, output_set1, param1));
-
- operation::Add::Param param2;
- param2.activation = Activation::NONE;
- auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
- auto output_set2 = OperandIndexSequence{operand_result2};
- _graph->addOperation(nnfw::cpp14::make_unique<operation::Add>(input_set2, output_set2, param2));
-
- // Identify model inputs and outputs
-
- _graph->getInputs().append(operand_lhs);
- _graph->getInputs().append(operand_rhs1);
- _graph->getOutputs().append(operand_result2);
-
- _graph->finishBuilding();
-
- _executor = nnfw::cpp14::make_unique<ExecManager>(*_graph);
- }
-
- void CreateUnspecifiedDimensionsModel()
- {
- // Model: one elementwise add operation
- // model input: lhs, rhs
- // model output: add result
- // lhs, rhs, result shape: {1, unknown, 2, 1}
- // activation: none (constant)
- _graph = nnfw::cpp14::make_unique<Graph>();
-
- // Add operands
-
- Shape shape{1, 0, 2, 1};
- TypeInfo type{DataType::INT32};
- Shape shape_scalar(0);
- TypeInfo type_scalar{DataType::INT32};
-
- auto operand_lhs = _graph->addOperand(shape, type);
- auto operand_rhs = _graph->addOperand(shape, type);
-
- auto operand_activation = _graph->addOperand(shape_scalar, type_scalar);
- _graph->operands()
- .at(operand_activation)
- .data(nnfw::cpp14::make_unique<CachedData>(
- reinterpret_cast<const uint8_t *>(&_activation_value), 4));
-
- auto operand_result = _graph->addOperand(shape, type);
-
- // Add operations
-
- operation::Add::Param param;
- param.activation = Activation::NONE;
- auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
- auto output_set = OperandIndexSequence{operand_result};
- _graph->addOperation(nnfw::cpp14::make_unique<operation::Add>(input_set, output_set, param));
-
- // Identify model inputs and outputs
-
- _graph->getInputs().append(operand_lhs);
- _graph->getInputs().append(operand_rhs);
- _graph->getOutputs().append(operand_result);
-
- _graph->finishBuilding();
-
- _executor = nnfw::cpp14::make_unique<ExecManager>(*_graph);
- }
-
- void createExecution() { _execution = nnfw::cpp14::make_unique<Execution>(_executor); }
-
- virtual void TearDown() { _executor = nullptr; }
-
- std::unique_ptr<Graph> _graph{nullptr};
- std::shared_ptr<ExecManager> _executor{nullptr};
- std::unique_ptr<Execution> _execution{nullptr};
- const int32_t _activation_value{0};
-};
-
-TEST_F(InterpExecManagerTest, create_empty)
-{
- Graph graph;
- graph.finishBuilding();
- _executor = nnfw::cpp14::make_unique<ExecManager>(graph);
- ASSERT_NE(_executor, nullptr);
-}
-
-TEST_F(InterpExecManagerTest, create_simple)
-{
- CreateSimpleModel();
- ASSERT_NE(_executor, nullptr);
-}
-
-TEST_F(InterpExecManagerTest, setInput)
-{
- CreateSimpleModel();
- createExecution();
-
- auto input1 = IOIndex{0};
- const int32_t input1_buffer[4] = {1, 0, -1, -2};
-
- EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 4),
- std::runtime_error);
- EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 12),
- std::runtime_error);
- EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
-}
-
-TEST_F(InterpExecManagerTest, setOutput)
-{
- CreateSimpleModel();
- createExecution();
-
- auto output = IOIndex{0};
- auto output_idx = _graph->getOutputs().at(output);
-
- int32_t output_buffer[4] = {};
-
- EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 4),
- std::runtime_error);
- EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 12),
- std::runtime_error);
- EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
-}
-
-TEST_F(InterpExecManagerTest, setInputForUnspecifiedDimensions)
-{
- CreateUnspecifiedDimensionsModel();
- createExecution();
-
- auto input1 = IOIndex{0};
- const int32_t input1_buffer[4] = {1, 0, -1, -2};
-
- TypeInfo operand_type{DataType::INT32};
- Shape operand_shape{1, 2, 2, 1};
-
- EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
- reinterpret_cast<const void *>(input1_buffer), 4),
- std::runtime_error);
- EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
- reinterpret_cast<const void *>(input1_buffer), 12),
- std::runtime_error);
- EXPECT_NO_THROW(_execution->setInput(input1, operand_type, operand_shape,
- reinterpret_cast<const void *>(input1_buffer), 16));
-}
-
-TEST_F(InterpExecManagerTest, setOutputForUnspecifiedDimensions)
-{
- CreateUnspecifiedDimensionsModel();
- createExecution();
-
- auto output = IOIndex{0};
- auto output_idx = _graph->getOutputs().at(output);
-
- TypeInfo operand_type{DataType::INT32};
- Shape operand_shape{1, 2, 2, 1};
-
- int32_t output_buffer[4] = {};
-
- EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
- reinterpret_cast<void *>(output_buffer), 4),
- std::runtime_error);
- EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
- reinterpret_cast<void *>(output_buffer), 12),
- std::runtime_error);
- EXPECT_NO_THROW(_execution->setOutput(output, operand_type, operand_shape,
- reinterpret_cast<void *>(output_buffer), 16));
-}
-
-TEST_F(InterpExecManagerTest, execute)
-{
- CreateSimpleModel();
- createExecution();
-
- auto input1 = IOIndex{0};
- auto input2 = IOIndex{1};
- auto input1_idx = _graph->getInputs().at(input1);
- auto input2_idx = _graph->getInputs().at(input2);
-
- const int32_t input1_buffer[4] = {1, 0, -1, -2};
- const int32_t input2_buffer[4] = {1, -3, 2, -4};
-
- auto output = IOIndex{0};
- auto output_idx = _graph->getOutputs().at(output);
-
- int32_t output_buffer[4] = {};
-
- EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
- EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
- EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
- EXPECT_NO_THROW(_execution->execute());
- EXPECT_EQ(output_buffer[0], 2);
- EXPECT_EQ(output_buffer[1], -3);
- EXPECT_EQ(output_buffer[2], 1);
- EXPECT_EQ(output_buffer[3], -6);
-}
-
-TEST_F(InterpExecManagerTest, executeTwoStep)
-{
- CreateTwoStepModel();
- createExecution();
-
- auto input1 = IOIndex{0};
- auto input2 = IOIndex{1};
- auto input1_idx = _graph->getInputs().at(input1);
- auto input2_idx = _graph->getInputs().at(input2);
-
- const int32_t input1_buffer[4] = {1, 0, -1, -2};
- const int32_t input2_buffer[4] = {1, -3, 2, -4};
-
- auto output = IOIndex{0};
- auto output_idx = _graph->getOutputs().at(output);
-
- int32_t output_buffer[4] = {};
-
- EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
- EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
- EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
- EXPECT_NO_THROW(_execution->execute());
- EXPECT_EQ(output_buffer[0], 5);
- EXPECT_EQ(output_buffer[1], -2);
- EXPECT_EQ(output_buffer[2], 0);
- EXPECT_EQ(output_buffer[3], -1);
-}
-
-} // namespace
diff --git a/runtime/neurun/test/graph/Graph.cc b/runtime/neurun/test/graph/Graph.cc
deleted file mode 100644
index faf84df9c..000000000
--- a/runtime/neurun/test/graph/Graph.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Graph.h"
-
-TEST(Graph, inputs_and_outputs)
-{
- neurun::ir::Graph graph;
-
- neurun::ir::OperandIndex index0{0u};
- neurun::ir::OperandIndex index1{1u};
-
- graph.addInput({index0});
- graph.addInput({index1});
-
- neurun::ir::OperandIndex index10{10u};
- neurun::ir::OperandIndex index11{11u};
- neurun::ir::OperandIndex index12{12u};
-
- graph.addOutput({index10});
- graph.addOutput({index11});
- graph.addOutput({index12});
-
- ASSERT_EQ(graph.getInputs().size(), 2);
- ASSERT_EQ(graph.getOutputs().size(), 3);
-
- neurun::ir::IOIndex io_index0{0};
- neurun::ir::IOIndex io_index1{1};
- neurun::ir::IOIndex io_index2{2};
-
- ASSERT_EQ(graph.getInputs().at(io_index0), 0);
- ASSERT_EQ(graph.getInputs().at(io_index1), 1);
-
- ASSERT_EQ(graph.getOutputs().at(io_index0), 10);
- ASSERT_EQ(graph.getOutputs().at(io_index1), 11);
- ASSERT_EQ(graph.getOutputs().at(io_index2), 12);
-}
diff --git a/runtime/neurun/test/graph/Index.cc b/runtime/neurun/test/graph/Index.cc
deleted file mode 100644
index c24c2dd5e..000000000
--- a/runtime/neurun/test/graph/Index.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "util/Index.h"
-
-using Index = ::neurun::util::Index<uint32_t, struct TestTag>;
-
-TEST(Index, index_test)
-{
- Index idx1{1u};
- Index idx2{2u};
- Index idx3{idx1};
-
- ASSERT_EQ(idx1, 1);
- ASSERT_EQ(idx1, 1u);
- ASSERT_EQ(idx1.value(), 1u);
- ASSERT_NE(idx1, idx2);
- ASSERT_EQ(idx1, idx3);
-}
diff --git a/runtime/neurun/test/graph/MockNode.h b/runtime/neurun/test/graph/MockNode.h
deleted file mode 100644
index 67f4c049d..000000000
--- a/runtime/neurun/test/graph/MockNode.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_TEST_GRAPH_MOCK_NODE_H__
-#define __NEURUN_TEST_GRAPH_MOCK_NODE_H__
-
-#include "ir/Operation.h"
-#include "ir/OperandIndexSequence.h"
-
-namespace neurun_test
-{
-namespace ir
-{
-
-class SimpleMock : public neurun::ir::Operation
-{
-public:
- SimpleMock(const neurun::ir::OperandIndexSequence &inputs,
- const neurun::ir::OperandIndexSequence &outputs)
- : Operation{neurun::ir::OperandConstraint::createAny()}
- {
- setInputs(inputs);
- setOutputs(outputs);
- }
-
-public:
- void accept(neurun::ir::OperationVisitor &) const override {}
- neurun::ir::OpCode opcode() const final { return neurun::ir::OpCode::Invalid; }
-};
-
-} // namespace ir
-} // namespace neurun_test
-
-#endif // __NEURUN_TEST_GRAPH_MOCK_NODE_H__
diff --git a/runtime/neurun/test/graph/operand/IndexSet.cc b/runtime/neurun/test/graph/operand/IndexSet.cc
deleted file mode 100644
index 969290fe1..000000000
--- a/runtime/neurun/test/graph/operand/IndexSet.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/OperandIndexSequence.h"
-
-using neurun::ir::OperandIndex;
-using neurun::ir::OperandIndexSequence;
-
-TEST(graph_OperandIndexSequence, append)
-{
- OperandIndexSequence iset{0, 2, 4, 8};
-
- ASSERT_EQ(iset.size(), 4);
-
- iset.append(OperandIndex{10});
-
- ASSERT_EQ(iset.size(), 5);
-
- neurun::ir::IOIndex index1{1};
- neurun::ir::IOIndex index2{4};
-
- ASSERT_EQ(iset.at(index1), 2);
- ASSERT_EQ(iset.at(index2), 10);
-
- ASSERT_TRUE(iset.contains(OperandIndex{2}));
- ASSERT_TRUE(iset.contains(OperandIndex{10}));
- ASSERT_FALSE(iset.contains(OperandIndex{11}));
-}
-
-TEST(graph_OperandIndexSequence, replace)
-{
- OperandIndexSequence iset{0, 1, 2, 3};
-
- iset.replace(OperandIndex{1}, OperandIndex{9});
- ASSERT_FALSE(iset.contains(OperandIndex{1}));
- ASSERT_TRUE(iset.contains(OperandIndex{9}));
-}
diff --git a/runtime/neurun/test/graph/operand/LayoutSet.cc b/runtime/neurun/test/graph/operand/LayoutSet.cc
deleted file mode 100644
index 7b0dcc9f7..000000000
--- a/runtime/neurun/test/graph/operand/LayoutSet.cc
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/LayoutSet.h"
-
-using neurun::ir::Layout;
-using neurun::ir::LayoutSet;
-
-TEST(graph_operand_LayoutSet, layout_set_operators)
-{
- LayoutSet set1{Layout::NCHW};
- LayoutSet set2{Layout::NHWC};
- LayoutSet set3 = set1 | set2;
-
- ASSERT_EQ(set3.size(), 2);
-
- ASSERT_EQ((set3 - set1).size(), 1);
- ASSERT_EQ((set3 - set1).contains(Layout::NHWC), true);
- ASSERT_EQ((set3 - set2).size(), 1);
- ASSERT_EQ((set3 - set2).contains(Layout::NCHW), true);
- ASSERT_EQ((set3 - set3).size(), 0);
-
- ASSERT_EQ((set3 & set1).size(), 1);
- ASSERT_EQ((set3 & set1).contains(Layout::NCHW), true);
- ASSERT_EQ((set3 & set2).size(), 1);
- ASSERT_EQ((set3 & set2).contains(Layout::NHWC), true);
- ASSERT_EQ((set1 & set2).size(), 0);
-}
diff --git a/runtime/neurun/test/graph/operand/Set.cc b/runtime/neurun/test/graph/operand/Set.cc
deleted file mode 100644
index e30a5b7f7..000000000
--- a/runtime/neurun/test/graph/operand/Set.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Operands.h"
-
-TEST(graph_operand_Set, set_test)
-{
- neurun::ir::Operands set;
-
- neurun::ir::Shape shape0{1, 2, 3};
-
- neurun::ir::Shape shape1(4);
- shape1.dim(0) = 10;
- shape1.dim(1) = 20;
- shape1.dim(2) = 30;
- shape1.dim(3) = 40;
-
- neurun::ir::TypeInfo type{neurun::ir::DataType::INT32};
-
- set.emplace(shape0, type);
- set.emplace(shape1, type);
-
- ASSERT_EQ(set.exist(neurun::ir::OperandIndex{0u}), true);
- ASSERT_EQ(set.exist(neurun::ir::OperandIndex{1u}), true);
- ASSERT_EQ(set.exist(neurun::ir::OperandIndex{2u}), false);
-
- ASSERT_EQ(set.at(neurun::ir::OperandIndex{0u}).shape().dim(0), 1);
- ASSERT_EQ(set.at(neurun::ir::OperandIndex{0u}).shape().dim(1), 2);
- ASSERT_EQ(set.at(neurun::ir::OperandIndex{0u}).shape().dim(2), 3);
-}
diff --git a/runtime/neurun/test/graph/operand/UseDef.cc b/runtime/neurun/test/graph/operand/UseDef.cc
deleted file mode 100644
index c59032824..000000000
--- a/runtime/neurun/test/graph/operand/UseDef.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Graph.h"
-#include "ir/verifier/Verifier.h"
-#include "cpp14/memory.h"
-#include "../MockNode.h"
-
-#include <typeindex>
-
-namespace
-{
-
-using IndexSet = neurun::ir::OperandIndexSequence;
-using Mock = neurun_test::ir::SimpleMock;
-
-} // namespace anonymous
-
-TEST(graph_operand_usedef, usedef_test)
-{
- neurun::ir::Graph graph;
- neurun::ir::verifier::DAGChecker verifier;
-
- neurun::ir::Shape shape(3);
- neurun::ir::TypeInfo type{neurun::ir::DataType::INT32};
-
- // Model Input/Output
- auto input_operand = graph.addOperand(shape, type);
- auto output_operand = graph.addOperand(shape, type);
-
- graph.addInput(input_operand);
- graph.addOutput(output_operand);
-
- // MockNode1
- auto operand_index1 = graph.addOperand(shape, type);
- auto mocknode_index1 = graph.addOperation(
- nnfw::cpp14::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1}));
-
- // MockNode2
- auto operand_index2 = graph.addOperand(shape, type);
- auto mocknode_index2 = graph.addOperation(
- nnfw::cpp14::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2}));
-
- // MockNode3(two input)
- auto multiinput_index = graph.addOperation(nnfw::cpp14::make_unique<Mock>(
- IndexSet{operand_index1, operand_index2}, IndexSet{output_operand}));
-
- graph.finishBuilding();
-
- ASSERT_EQ(verifier.verify(graph), true);
-
- // Check def
- ASSERT_EQ(graph.operands().at(operand_index1).getDef().contains(mocknode_index1), true);
- ASSERT_EQ(graph.operands().at(operand_index2).getDef().contains(mocknode_index2), true);
- ASSERT_EQ(graph.operands().at(output_operand).getDef().contains(multiinput_index), true);
-
- ASSERT_EQ(graph.operands().at(operand_index1).getDef().contains(mocknode_index2), false);
- ASSERT_EQ(graph.operands().at(operand_index1).getDef().contains(multiinput_index), false);
-
- // Check use
- ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index1), true);
- ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index2), true);
- ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(multiinput_index), false);
- ASSERT_EQ(graph.operands().at(operand_index1).getUses().contains(multiinput_index), true);
- ASSERT_EQ(graph.operands().at(operand_index2).getUses().contains(multiinput_index), true);
-
- ASSERT_EQ(graph.operands().at(input_operand).getUses().size(), 2);
- ASSERT_EQ(graph.operands().at(operand_index1).getUses().size(), 1);
- ASSERT_EQ(graph.operands().at(output_operand).getUses().size(), 0);
-}
diff --git a/runtime/neurun/test/graph/operation/Set.cc b/runtime/neurun/test/graph/operation/Set.cc
deleted file mode 100644
index fb3d54298..000000000
--- a/runtime/neurun/test/graph/operation/Set.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "../MockNode.h"
-#include "ir/Operations.h"
-
-using neurun::ir::Operations;
-using neurun::ir::Operation;
-using neurun::ir::OperationIndex;
-
-TEST(graph_operation_Set, operation_test)
-{
- Operations ops;
- ops.push(std::unique_ptr<Operation>(new neurun_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
- OperationIndex idx{0u};
- ASSERT_EQ(ops.at(idx).getInputs().size(), 4);
- ASSERT_EQ(ops.at(idx).getOutputs().size(), 3);
-}
diff --git a/runtime/neurun/test/graph/operation/SetIO.cc b/runtime/neurun/test/graph/operation/SetIO.cc
deleted file mode 100644
index 110accfac..000000000
--- a/runtime/neurun/test/graph/operation/SetIO.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Graph.h"
-#include "ir/Index.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/operation/Conv2D.h"
-#include "ir/operation/Concat.h"
-
-#include <cpp14/memory.h>
-
-#include <stdexcept>
-
-using Index = neurun::ir::IOIndex;
-using IndexSet = neurun::ir::OperandIndexSequence;
-
-TEST(graph_operation_setIO, operation_setIO_conv)
-{
- neurun::ir::Graph graph;
-
- neurun::ir::Shape shape{3};
- neurun::ir::TypeInfo type{neurun::ir::DataType::INT32};
-
- // Add Conv
- using Graph = neurun::ir::operation::Conv2D;
-
- auto input_operand = graph.addOperand(shape, type);
- auto kernel_operand = graph.addOperand(shape, type);
- auto bias_operand = graph.addOperand(shape, type);
- IndexSet inputs{input_operand, kernel_operand, bias_operand};
-
- Graph::Param conv_params;
- conv_params.padding.type = neurun::ir::PaddingType::SAME;
- conv_params.stride.horizontal = 1;
- conv_params.stride.vertical = 1;
- conv_params.activation = neurun::ir::Activation::NONE;
-
- auto output_operand = graph.addOperand(shape, type).value();
- IndexSet outputs{output_operand};
-
- auto conv = nnfw::cpp14::make_unique<Graph>(inputs, outputs, conv_params);
-
- ASSERT_NE(conv, nullptr);
- ASSERT_EQ(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
- conv->setInputs({8, 9, 10});
- ASSERT_NE(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
- ASSERT_EQ(conv->getInputs().at(Index{0}).value(), 8);
-}
-
-TEST(graph_operation_setIO, operation_setIO_concat)
-{
- neurun::ir::Graph graph;
-
- neurun::ir::Shape shape{3};
-
- neurun::ir::TypeInfo type{neurun::ir::DataType::INT32};
-
- using Graph = neurun::ir::operation::Concat;
-
- // Add Concat
- IndexSet inputs;
- for (int i = 0; i < 6; ++i)
- {
- inputs.append(graph.addOperand(shape, type));
- }
-
- Graph::Param concat_params{0};
-
- auto output_operand = graph.addOperand(shape, type).value();
- IndexSet outputs{output_operand};
-
- auto concat = nnfw::cpp14::make_unique<Graph>(inputs, outputs, concat_params);
-
- ASSERT_NE(concat, nullptr);
- ASSERT_EQ(concat->getInputs().size(), 6);
- ASSERT_EQ(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
-
- concat->setInputs({80, 6, 9, 11});
- ASSERT_EQ(concat->getInputs().size(), 4);
- ASSERT_NE(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
- ASSERT_EQ(concat->getInputs().at(Index{0}).value(), 80);
- ASSERT_EQ(concat->getInputs().at(Index{2}).value(), 9);
- ASSERT_THROW(concat->getInputs().at(Index{5}), std::out_of_range);
-}
diff --git a/runtime/neurun/test/graph/verifier/Verifier.cc b/runtime/neurun/test/graph/verifier/Verifier.cc
deleted file mode 100644
index cbda31dfe..000000000
--- a/runtime/neurun/test/graph/verifier/Verifier.cc
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Operation.h"
-#include "ir/Graph.h"
-#include "ir/verifier/Verifier.h"
-#include "cpp14/memory.h"
-#include "ir/Operand.h"
-#include "../MockNode.h"
-
-using IndexSet = neurun::ir::OperandIndexSequence;
-using Mock = neurun_test::ir::SimpleMock;
-
-TEST(Verifier, dag_checker)
-{
- neurun::ir::Graph graph;
-
- neurun::ir::Shape shape{3};
- neurun::ir::TypeInfo type{neurun::ir::DataType::INT32};
-
- auto operand1 = graph.addOperand(shape, type);
- auto operand2 = graph.addOperand(shape, type);
-
- graph.addInput(operand1);
- graph.addOutput(operand2);
-
- graph.addOperation(nnfw::cpp14::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2}));
-
- graph.finishBuilding();
-
- neurun::ir::verifier::DAGChecker verifier;
-
- ASSERT_EQ(verifier.verify(graph), true);
-}
diff --git a/runtime/neurun/test/util/ShapeInference.cc b/runtime/neurun/test/util/ShapeInference.cc
deleted file mode 100644
index a8dea3a79..000000000
--- a/runtime/neurun/test/util/ShapeInference.cc
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Layout.h"
-#include "util/ShapeInference.h"
-
-using namespace neurun::ir;
-
-TEST(ShapeInference, Elementwise)
-{
- Shape lhs_shape{1, 299, 299, 3};
- Shape rhs_shape{3};
- auto infered_shapes = neurun::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
- auto infered_out_shape = infered_shapes[0];
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.dim(0), 1);
- ASSERT_EQ(infered_out_shape.dim(1), 299);
- ASSERT_EQ(infered_out_shape.dim(2), 299);
- ASSERT_EQ(infered_out_shape.dim(3), 3);
-}
-
-TEST(ShapeInference, IncorrectElementwise)
-{
- Shape lhs_shape{1, 299, 299, 3};
- Shape rhs_shape{5, 3};
- ASSERT_THROW(neurun::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape),
- std::runtime_error);
-}
-
-TEST(ShapeInference, Pool2DNodeSame)
-{
- Shape in_shape{10, 6, 12, 20};
- Stride stride{3, 7};
- Padding padding{PaddingType::SAME};
-
- operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
- auto infered_shapes = neurun::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
- auto infered_out_shape = infered_shapes[0];
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-
- operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
- infered_shapes = neurun::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
- infered_out_shape = infered_shapes[0];
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-}
-
-TEST(ShapeInference, Pool2DNodeValid)
-{
- Shape in_shape{10, 6, 12, 20};
- Stride stride{3, 7};
- Padding padding{PaddingType::VALID};
-
- operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
- auto infered_shapes = neurun::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
- auto infered_out_shape = infered_shapes[0];
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-
- operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
- infered_shapes = neurun::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
- infered_out_shape = infered_shapes[0];
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-}
-
-TEST(ShapeInference, Pool2DNodeExplicit)
-{
- Shape in_shape{10, 3, 5, 20};
-
- Stride stride{3, 7};
- Padding padding{PaddingType::EXPLICIT, {4, 3, 2, 1}};
-
- operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
- auto infered_shapes = neurun::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
- auto infered_out_shape = infered_shapes[0];
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-
- operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
- infered_shapes = neurun::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
- infered_out_shape = infered_shapes[0];
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-}
-
-TEST(ShapeInference, Conv2D)
-{
- Shape in_shape{10, 6, 12, 20};
- Shape ker_shape{30, 3, 6, 20};
-
- operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE};
- auto infered_shapes = neurun::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
- auto infered_out_shape = infered_shapes[0];
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
-
- param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE};
- infered_shapes = neurun::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
- infered_out_shape = infered_shapes[0];
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
-
- param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::EXPLICIT, {4, 3, 2, 1}},
- Activation::NONE};
- infered_shapes = neurun::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
- infered_out_shape = infered_shapes[0];
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
-}
-
-TEST(ShapeInference, DepthwiseConv2D)
-{
- Shape in_shape{10, 6, 12, 20};
- Shape ker_shape{1, 3, 6, 60};
-
- operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3,
- Activation::NONE};
- auto infered_shapes =
- neurun::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
- auto infered_out_shape = infered_shapes[0];
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
-
- param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, 3,
- Activation::NONE};
- infered_shapes = neurun::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
- infered_out_shape = infered_shapes[0];
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
-
- param = operation::DepthwiseConv2D::Param{
- Stride{3, 7}, Padding{PaddingType::EXPLICIT, {4, 3, 2, 1}}, 3, Activation::NONE};
- infered_shapes = neurun::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
- infered_out_shape = infered_shapes[0];
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
-}
-
-TEST(ShapeInference, Concat)
-{
- Shape in1{10, 20, 30, 3, 50};
- Shape in2{10, 20, 30, 2, 50};
- Shape in3{10, 20, 30, 2, 50};
-
- operation::Concat::Param param{3};
- auto infered_shapes = neurun::shape_inference::inferConcatShape({in1, in2, in3}, param);
- auto infered_out_shape = infered_shapes[0];
-
- ASSERT_EQ(infered_out_shape.rank(), 5);
- ASSERT_EQ(infered_out_shape.dim(0), 10);
- ASSERT_EQ(infered_out_shape.dim(1), 20);
- ASSERT_EQ(infered_out_shape.dim(2), 30);
- ASSERT_EQ(infered_out_shape.dim(3), 7);
- ASSERT_EQ(infered_out_shape.dim(4), 50);
-}
-
-TEST(ShapeInference, FullyConnected)
-{
- Shape in_shape{3, 4, 5, 6};
- Shape ker_shape{3, 10};
- auto infered_shapes = neurun::shape_inference::inferFullyConnectedShape(in_shape, ker_shape);
- auto infered_out_shape = infered_shapes[0];
-
- ASSERT_EQ(infered_out_shape.rank(), 2);
- ASSERT_EQ(infered_out_shape.dim(0), 36);
- ASSERT_EQ(infered_out_shape.dim(1), 3);
-}
diff --git a/runtime/nnapi-header/include/NeuralNetworksEx.h b/runtime/nnapi-header/include/NeuralNetworksEx.h
index 7a054a1a4..99f015d0f 100644
--- a/runtime/nnapi-header/include/NeuralNetworksEx.h
+++ b/runtime/nnapi-header/include/NeuralNetworksEx.h
@@ -71,24 +71,11 @@ typedef enum {
ANEURALNETWORKS_REDUCE_MAX_EX = 50003,
/**
- * Splits a tensor along a given axis into num_splits subtensors.
*
- * Supported tensor {@link OperandCode}:
- * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
- * * {@link ANEURALNETWORKS_TENSOR_INT32}
- * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
- *
- * Supported tensor rank: from 1
- *
- * Inputs:
- * * 0: An n-D tensor to split.
- * * 1: An {@link ANEURALNETWORKS_INT32} scalar specifying the axis along
- * which to split.
- * * 2: An {@link ANEURALNETWORKS_INT32} scalar indicating the number of
- * splits along given axis. Must evenly divide axis size.
+ * IMPORTANT NOTICE:
+ * ANEURALNETWORKS_SPLIT_EX operation is DEPRECATED
+ * Use ANEURALNETWORKS_SPLIT instead
*
- * Outputs:
- * * 0 ~ (num_splits - 1): Resulting subtensors.
*/
ANEURALNETWORKS_SPLIT_EX = 50004, /**< Splits a tensor into sub tensors */
@@ -150,24 +137,11 @@ typedef enum {
ANEURALNETWORKS_EXP_EX = 50008,
/**
- * Computes the sum of elements across dimensions of a tensor.
- *
- * Reduces the input tensor along the given dimensions to reduce.
*
- * Supported tensor {@link OperandCode}:
- * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
- *
- * Supported tensor rank: up to 4
- *
- * Inputs:
- * * 0: A tensor, specifying the input.
- * * 1: A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}. The dimensions
- * to reduce.
- * * 2: An {@link ANEURALNETWORKS_BOOL} scalar, keep_dims. If true,
- * retains reduced dimensions with length 1.
+ * IMPORTANT NOTICE:
+ * ANEURALNETWORKS_REDUCE_SUM_EX operation is DEPRECATED
+ * Use ANEURALNETWORKS_REDUCE_SUM instead
*
- * Outputs:
- * * 0: A tensor of the same {@link OperandCode} as input0.
*/
ANEURALNETWORKS_REDUCE_SUM_EX = 50009,
@@ -199,36 +173,11 @@ typedef enum {
ANEURALNETWORKS_TRANSPOSE_CONV_EX = 50010,
/**
- * Computes element-wise truth value by comparing the two input tensors for equality.
- *
- * Takes two input tensors of identical {@link OperandCode} and compatible dimensions.
- * The output is the result of comparison of two input tensors.
- *
- * Two dimensions are compatible when:
- * 1. they are equal, or
- * 2. one of them is 1
*
- * The size of the output is the maximum size along each dimension of the
- * input operands. It starts with the trailing dimensions, and works its way
- * forward.
- *
- * Supported tensor {@link OperandCode}:
- * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
- * * {@link ANEURALNETWORKS_TENSOR_INT32}
- * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
- *
- * Supported tensor rank: up to 4
- *
- * Inputs:
- * * 0: An n-D tensor, specifying the first input.
- * * 1: A tensor of the same {@link OperandCode}, and compatible dimensions
- * as input0.
+ * IMPORTANT NOTICE:
+ * ANEURALNETWORKS_EQUAL_EX operation is DEPRECATED
+ * Use ANEURALNETWORKS_EQUAL instead
*
- * Outputs:
- * * 0: A boolean tensor indicating the truth value of (x == y)
- * Stored as {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} with offset 0
- * and scale 1.0f.
- * A non-zero byte represents True, a hit. A zero indicates otherwise.
*/
ANEURALNETWORKS_EQUAL_EX = 50011,
@@ -296,23 +245,11 @@ typedef enum {
ANEURALNETWORKS_UNPACK_EX = 50014,
/**
- * Returns the index of the largest element along an axis.
- *
- * Supported tensor {@link OperandCode}:
- * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
- * * {@link ANEURALNETWORKS_TENSOR_INT32}
- * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
*
- * Supported tensor rank: from 1
- *
- * Inputs:
- * * 0: An n-D tensor, specifying the input.
- * * 1: An {@link ANEURALNETWORKS_INT32} scalar specifying the axis to
- * reduce across. Negative index is used to specify axis from the
- * end (e.g. -1 for the last axis). Must be in the range [-n, n).
+ * IMPORTANT NOTICE:
+ * ANEURALNETWORKS_ARGMAX_EX operation is DEPRECATED
+ * Use ANEURALNETWORKS_ARGMAX instead
*
- * Outputs:
- * * 0: An (n - 1)-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor.
*/
ANEURALNETWORKS_ARGMAX_EX = 50015,
@@ -326,191 +263,56 @@ typedef enum {
ANEURALNETWORKS_SQRT_EX = 50016,
/**
- * Computes element-wise truth value by comparing the input tensors for non-equality.
- *
- * Takes two input tensors of identical {@link OperandCode} and compatible dimensions.
- * The output is the result of comparison of two input tensors.
- *
- * Two dimensions are compatible when:
- * 1. they are equal, or
- * 2. one of them is 1
- *
- * The size of the output is the maximum size along each dimension of the
- * input operands. It starts with the trailing dimensions, and works its way
- * forward.
- *
- * Supported tensor {@link OperandCode}:
- * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
- * * {@link ANEURALNETWORKS_TENSOR_INT32}
- * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
- *
- * Supported tensor rank: up to 4
- *
- * Inputs:
- * * 0: An n-D tensor, specifying the first input.
- * * 1: A tensor of the same {@link OperandCode}, and compatible dimensions
- * as input0.
- *
- * Outputs:
- * * 0: A boolean tensor indicating the truth value of non-equality of input tensors
- * Stored as {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} with offset 0
- * and scale 1.0f.
- * A non-zero byte represents True, a hit. A zero indicates otherwise.
- */
+ *
+ * IMPORTANT NOTICE:
+ * ANEURALNETWORKS_NOT_EQUAL_EX operation is DEPRECATED
+ * Use ANEURALNETWORKS_NOT_EQUAL instead
+ *
+ */
ANEURALNETWORKS_NOT_EQUAL_EX = 50017,
/**
- * Computes element-wise truth value of the input tensor negation.
- *
- * Takes one input tensor.
- * The output is the negation, which is logical complement, of the input tensor.
- *
- * Supported tensor {@link OperandCode}:
- * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
- *
- * Supported tensor rank: up to 4
*
- * Inputs:
- * * 0: An n-D boolean tensor, specifying the input.
- * Stored as {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} with offset 0
- * and scale 1.0f.
- * A non-zero byte represents True. A zero indicates otherwise.
+ * IMPORTANT NOTICE:
+ * ANEURALNETWORKS_LOGICAL_NOT_EX operation is DEPRECATED
+ * Use ANEURALNETWORKS_LOGICAL_NOT instead
*
- * Outputs:
- * * 0: A boolean tensor of the same size as input indicating the truth value of (NOT x)
- * Stored as {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} with offset 0
- * and scale 1.0f.
- * A non-zero byte represents True. A zero indicates otherwise.
*/
ANEURALNETWORKS_LOGICAL_NOT_EX = 50018,
/**
- * Computes element-wise truth value of two input tensors for LOGICAL AND.
- *
- * Takes two input tensors of identical {@link OperandCode} and compatible dimensions.
- * The output is the result of comparison of two input tensors.
- *
- * Two dimensions are compatible when:
- * 1. they are equal, or
- * 2. one of them is 1
- *
- * The size of the output is the maximum size along each dimension of the
- * input operands. It starts with the trailing dimensions, and works its way
- * forward.
- *
- * Supported tensor {@link OperandCode}:
- * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
*
- * Supported tensor rank: up to 4
- *
- * Inputs:
- * * 0: An n-D boolean tensor, specifying the first input.
- * Stored as {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} with offset 0
- * and scale 1.0f.
- * A non-zero byte represents True, a hit. A zero indicates otherwise.
- * * 1: A tensor of the same {@link OperandCode}, and compatible dimensions
- * as input0.
+ * IMPORTANT NOTICE:
+ * ANEURALNETWORKS_LOGICAL_AND_EX operation is DEPRECATED
+ * Use ANEURALNETWORKS_LOGICAL_AND instead
*
- * Outputs:
- * * 0: A boolean tensor indicating the truth value of two input tensors for LOGICAL AND.
- * Stored as {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} with offset 0
- * and scale 1.0f.
- * A non-zero byte represents True, a hit. A zero indicates otherwise.
*/
ANEURALNETWORKS_LOGICAL_AND_EX = 50019,
/**
- * Computes element-wise truth value of two input tensors for LOGICAL OR.
- *
- * Takes two input tensors of identical {@link OperandCode} and compatible dimensions.
- * The output is the result of comparison of two input tensors.
- *
- * Two dimensions are compatible when:
- * 1. they are equal, or
- * 2. one of them is 1
- *
- * The size of the output is the maximum size along each dimension of the
- * input operands. It starts with the trailing dimensions, and works its way
- * forward.
- *
- * Supported tensor {@link OperandCode}:
- * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
- *
- * Supported tensor rank: up to 4
*
- * Inputs:
- * * 0: An n-D boolean tensor, specifying the first input.
- * Stored as {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} with offset 0
- * and scale 1.0f.
- * A non-zero byte represents True, a hit. A zero indicates otherwise.
- * * 1: A tensor of the same {@link OperandCode}, and compatible dimensions
- * as input0.
+ * IMPORTANT NOTICE:
+ * ANEURALNETWORKS_LOGICAL_OR_EX operation is DEPRECATED
+ * Use ANEURALNETWORKS_LOGICAL_OR instead
*
- * Outputs:
- * * 0: A boolean tensor indicating the truth value of two input tensors for LOGICAL OR.
- * Stored as {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} with offset 0
- * and scale 1.0f.
- * A non-zero byte represents True, a hit. A zero indicates otherwise.
*/
ANEURALNETWORKS_LOGICAL_OR_EX = 50020,
/**
- * Computes the minimum of elements across dimensions of a tensor.
*
- * Reduces the input tensor along the given dimensions to reduce.
- *
- * Supported tensor {@link OperandCode}:
- * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
- * * {@link ANEURALNETWORKS_TENSOR_INT32}
- * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
- *
- * Supported tensor rank: up to 4
- *
- * Inputs:
- * * 0: A tensor, specifying the input.
- * * 1: A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}. The dimensions
- * to reduce.
- * * 2: An {@link ANEURALNETWORKS_BOOL} scalar, keep_dims. If true,
- * retains reduced dimensions with length 1.
+ * IMPORTANT NOTICE:
+ * ANEURALNETWORKS_REDUCE_MIN_EX operation is DEPRECATED
+ * Use ANEURALNETWORKS_REDUCE_MIN instead
*
- * Outputs:
- * * 0: A tensor of the same {@link OperandCode} as input0.
*/
ANEURALNETWORKS_REDUCE_MIN_EX = 50021,
/**
- * Parametric Rectified Linear Unit.
*
- * It follows: f(x) = alpha * x for x < 0, f(x) = x for x >= 0, where alpha
- * is a learned array with the same {@link OperandCode} and compatible
- * dimensions as input x.
- *
- * Two dimensions are compatible when:
- * 1. they are equal, or
- * 2. one of them is 1
- *
- * The size of the output is the maximum size along each dimension of the
- * input operands. It starts with the trailing dimensions, and works its way
- * forward.
- *
- * Example:
- * input.dimension = {4, 1, 2}
- * alpha.dimension = {5, 4, 3, 1}
- * output.dimension = {5, 4, 3, 2}
- *
- * Supported tensor {@link OperandCode}:
- * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
- * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
- *
- * Supported tensor rank: up to 4
- *
- * Inputs:
- * * 0: A tensor, specifying the input.
- * * 1: A tensor of the same {@link OperandCode}, and compatible dimensions
- * as input0, specifying the alpha.
+ * IMPORTANT NOTICE:
+ * ANEURALNETWORKS_PRELU_EX operation is DEPRECATED
+ * Use ANEURALNETWORKS_PRELU instead
*
- * Outputs:
- * * 0: A tensor of the same {@link OperandCode} as input0.
*/
ANEURALNETWORKS_PRELU_EX = 50022,
@@ -546,50 +348,43 @@ typedef enum {
ANEURALNETWORKS_ONE_HOT_EX = 50023,
/**
- * For input tensors x and y, computes x >= y elementwise.
- *
- * Supported tensor {@link OperandCode}:
- * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
- * * {@link ANEURALNETWORKS_TENSOR_INT32}
- * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
*
- * Supported tensor rank: from 1
+ * IMPORTANT NOTICE:
+ * ANEURALNETWORKS_GREATER_EQUAL_EX operation is DEPRECATED
+ * Use ANEURALNETWORKS_GREATER_EQUAL instead
*
- * This operation supports broadcasting.
+ */
+ ANEURALNETWORKS_GREATER_EQUAL_EX = 50024,
+
+ /**
*
- * Inputs:
- * * 0: A tensor.
- * * 1: A tensor of the same {@link OperandCode} and dimensions compatible
- * with input0.
+ * IMPORTANT NOTICE:
+ * ANEURALNETWORKS_LESS_EX operation is DEPRECATED
+ * Use ANEURALNETWORKS_LESS instead
*
- * Outputs:
- * * 0: A boolean tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
- * with offset 0 and scale 1.0f.
*/
- ANEURALNETWORKS_GREATER_EQUAL_EX = 50024,
+ ANEURALNETWORKS_LESS_EX = 50025,
/**
- * For input tensors x and y, computes x < y elementwise.
+ * Returns the input tensor's shape as a rank 1 output tensor
+ * If the input shape is [D0, D1, ..., D(N-1) ] and rank is N,
+ * the output tensor is [D0, D1, ... D(N-1)], shape is [N] and rank is 1.
*
* Supported tensor {@link OperandCode}:
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_INT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
*
- * Supported tensor rank: from 1
- *
- * This operation supports broadcasting.
+ * Supported tensor rank: virtually unlimited
*
* Inputs:
- * * 0: A tensor.
- * * 1: A tensor of the same {@link OperandCode} and dimensions compatible
- * with input0.
+ * * 0: The input tensor.
*
* Outputs:
- * * 0: A tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
- * with offset 0 and scale 1.0f.
+ * * 0: The rank-1 shape tensor.
*/
- ANEURALNETWORKS_LESS_EX = 50025,
+ ANEURALNETWORKS_SHAPE_EX = 50026,
+
} OperationCodeEx; // extends OperationCode
typedef OperationCodeEx ANeuralNetworksOperationTypeEx;
diff --git a/runtime/onert/CMakeLists.txt b/runtime/onert/CMakeLists.txt
new file mode 100644
index 000000000..88d52a5bd
--- /dev/null
+++ b/runtime/onert/CMakeLists.txt
@@ -0,0 +1,15 @@
+if(NOT BUILD_ONERT)
+ return()
+endif(NOT BUILD_ONERT)
+
+add_subdirectory(backend)
+add_subdirectory(frontend)
+add_subdirectory(core)
+add_subdirectory(api)
+add_subdirectory(sample)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+add_subdirectory(test)
diff --git a/runtime/onert/api/CMakeLists.txt b/runtime/onert/api/CMakeLists.txt
new file mode 100644
index 000000000..3132646c4
--- /dev/null
+++ b/runtime/onert/api/CMakeLists.txt
@@ -0,0 +1,21 @@
+file(GLOB_RECURSE API_SRC "*.cc")
+
+set(ONERT_DEV nnfw-dev)
+add_library(${ONERT_DEV} SHARED ${API_SRC})
+
+# Public headers to publish
+# nnfw_debug.h is header for runtime developer, so it will not be installed
+# But runtime developer can use nnfw_debug.h by linking nnfw-dev
+set(NNFW_API_HEADERS include/nnfw.h include/nnfw_dev.h)
+
+target_link_libraries(${ONERT_DEV} PUBLIC nnfw-nnapi-header)
+target_link_libraries(${ONERT_DEV} PRIVATE onert_core)
+target_link_libraries(${ONERT_DEV} PRIVATE jsoncpp tflite_loader circle_loader ${LIB_PTHREAD})
+target_link_libraries(${ONERT_DEV} PRIVATE nnfw_common)
+target_link_libraries(${ONERT_DEV} PRIVATE nnfw_coverage)
+target_include_directories(${ONERT_DEV} PUBLIC include)
+set_target_properties(${ONERT_DEV} PROPERTIES PUBLIC_HEADER "${NNFW_API_HEADERS}")
+
+install(TARGETS ${ONERT_DEV}
+ LIBRARY DESTINATION lib
+ PUBLIC_HEADER DESTINATION include/nnfw)
diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h
new file mode 100644
index 000000000..7d9fc048f
--- /dev/null
+++ b/runtime/onert/api/include/nnfw.h
@@ -0,0 +1,409 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file nnfw.h
+ * @brief This file describes runtime API
+ */
+#ifndef __NNFW_H__
+#define __NNFW_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @brief Session to query with runtime
+ *
+ * <p>nnfw_session is started and passed by calling {@link nnfw_create_session}.
+ * Each session has its own inference environment, such as model to inference, backend usage, etc.
+ *
+ * <p>Load model by calling {@link nnfw_load_model_from_file}
+ *
+ * <p>After loading, prepare inference by calling {@link nnfw_prepare}.
+ * Application can set runtime environment before prepare by calling
+ * {@link nnfw_set_available_backends} and {@link nnfw_set_op_backend}, and it is optional.
+ *
+ * <p>Application can inference by calling {@link nnfw_run}.
+ * Before inference, application has responsibility to set input tensor to set input data by calling
+ * {@link nnfw_set_output}, and output tensor to get output by calling {@link nnfw_set_input}
+ *
+ * <p>To support input and output setting, application can get
+ * input and output tensor information by calling<ul>
+ * <li>{@link nnfw_input_size}</li>
+ * <li>{@link nnfw_output_size}</li>
+ * <li>{@link nnfw_input_tensorinfo}</li>
+ * <li>{@link nnfw_output_tensorinfo}</li>
+ * </ul>
+ *
+ * <p>Application can inference many times using one session,
+ * but next inference can do after prior inference end
+ *
+ * <p>Application cannot use muitiple model using one session
+ */
+typedef struct nnfw_session nnfw_session;
+
+/**
+ * @brief Tensor types
+ *
+ * The type of tensor represented in {@link nnfw_tensorinfo}
+ */
+typedef enum {
+ /** A tensor of 32 bit floating point */
+ NNFW_TYPE_TENSOR_FLOAT32 = 0,
+ /** A tensor of 32 bit signed integer */
+ NNFW_TYPE_TENSOR_INT32 = 1,
+ /**
+ * A tensor of 8 bit integers that represent real numbers.
+ *
+ * real_value = (integer_value - zeroPoint) * scale.
+ */
+ NNFW_TYPE_TENSOR_QUANT8_ASYMM = 2,
+ /** A tensor of boolean */
+ NNFW_TYPE_TENSOR_BOOL = 3,
+ /** A tensor of 8 bit unsigned integer */
+ NNFW_TYPE_TENSOR_UINT8 = 4,
+} NNFW_TYPE;
+
+/**
+ * @brief Result Values
+ */
+typedef enum {
+ /** Successful */
+ NNFW_STATUS_NO_ERROR = 0,
+ /** Failed */
+ NNFW_STATUS_ERROR = 1,
+} NNFW_STATUS;
+
+/**
+ * @brief Data format of a tensor
+ */
+typedef enum {
+ /** Don't care layout */
+ NNFW_LAYOUT_NONE = 0,
+ /**
+ * Channel last layout
+ * If rank is 4, layout is NHWC
+ */
+ NNFW_LAYOUT_CHANNELS_LAST = 1,
+ /**
+ * Channel first layout
+ * If rank is 4, layout is NCHW
+ */
+ NNFW_LAYOUT_CHANNELS_FIRST = 2,
+} NNFW_LAYOUT;
+
+/**
+ * @brief Information ID for retrieving information on nnfw (e.g. version)
+ */
+typedef enum {
+ /** nnfw runtime version
+ * Its value is uint32 in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch.
+ */
+ NNFW_INFO_ID_VERSION = 0,
+} NNFW_INFO_ID;
+
+/**
+ * @brief Maximum rank expressible with nnfw
+ */
+#define NNFW_MAX_RANK (6)
+
+/**
+ * @brief tensor info describes the type and shape of tensors
+ *
+ * <p>This structure is used to describe input and output tensors.
+ * Application can get input and output tensor type and shape described in model by using
+ * {@link nnfw_input_tensorinfo} and {@link nnfw_output_tensorinfo}
+ *
+ * <p>Maximum rank is 6 (NNFW_MAX_RANK). And tensor's dimension value is filled in 'dims' field from
+ * index 0.
+ * For example, if tensor's rank is 4,
+ * application can get dimension value from dims[0], dims[1], dims[2], and dims[3]
+ */
+typedef struct nnfw_tensorinfo
+{
+ /** The data type */
+ NNFW_TYPE dtype;
+ /** The number of dimensions (rank) */
+ int32_t rank;
+ /**
+ * The dimension of tensor.
+ * Maximum rank is 6 (NNFW_MAX_RANK).
+ */
+ int32_t dims[NNFW_MAX_RANK];
+} nnfw_tensorinfo;
+
+/**
+ * @brief Create a new session instance.
+ *
+ * <p>This only creates a session.
+ * Model is loaded after {@link nnfw_load_model_from_file} is invoked.
+ * And inference is performed after {@link nnfw_run} is invoked.
+ *
+ * <p>{@link nnfw_close_session} should be called once
+ * if session is no longer need
+ *
+ * @param[out] session The session to be created
+ * @return NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_create_session(nnfw_session **session);
+
+/**
+ * @brief Close a session instance
+ *
+ * After called, access to closed session by application will be invalid
+ *
+ * @param[in] session The session to be closed
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_close_session(nnfw_session *session);
+
+/**
+ * @brief Load model from nnpackage file or directory
+ *
+ * @param[in] session nnfw_session loading the given nnpackage file/dir
+ * @param[in] package_file_path Path to the nnpackage file or unzipped directory to be loaded
+ *
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_load_model_from_file(nnfw_session *session, const char *package_file_path);
+
+/**
+ * @brief Apply i-th input's tensor info to resize input tensor
+ *
+ * This function should be called before {@link nnfw_prepare} is invoked, and
+ * should be called after {@link nnfw_load_model_from_file} is invoked
+ * See {@link nnfw_prepare} for information applying updated tensor info
+ * If this function is called many times for same index, tensor info is overwritten
+ *
+ * @param[in] session Session to the input tensor info is to be set
+ * @param[in] index Index of input to be applied (0-indexed)
+ * @param[in] tensor_info Tensor info to be applied
+ * @return @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR
+ */
+NNFW_STATUS nnfw_apply_tensorinfo(nnfw_session *session, uint32_t index,
+ nnfw_tensorinfo tensor_info);
+
+/**
+ * @brief Prepare session to be ready for inference
+ *
+ * This phase may finalize model compilation, scheduling, and additional settings.
+ * If {@link nnfw_apply_tensor} is called to apply input tensor info different with model
+ * before this function, tries to resize all tensors.
+ *
+ * @param[in] session the session to be prepared
+ * @return @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR
+ */
+NNFW_STATUS nnfw_prepare(nnfw_session *session);
+
+/**
+ * @brief Run inference
+ *
+ * <p>This function should be called after model is loaded by {@link nnfw_load_model_from_file},
+ * session is prepared for inference by {@link nnfw_prepare}, set input and output buffers
+ * by {@link nnfw_set_input} and {@link nnfw_set_output}.</p>
+ *
+ * <p>This function return after inference is finished.</p>
+ *
+ * @param[in] session The session to run inference
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_run(nnfw_session *session);
+
+/**
+ * @brief Set input buffer
+ *
+ * This function should be called after {@link nnfw_prepare}, and before first inference
+ * on session by {@link nnfw_run}. Application can reuse buffer for many inferences.
+ *
+ * @param[in] session Session to the input is to be set
+ * @param[in] index Index of input to be set (0-indexed)
+ * @param[in] type Type of the input
+ * @param[in] buffer Raw buffer for input
+ * @param[in] length Size of bytes of input buffer
+ *
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_set_input(nnfw_session *session, uint32_t index, NNFW_TYPE type,
+ const void *buffer, size_t length);
+
+/**
+ * @brief Set output buffer
+ *
+ * This function should be called after {@link nnfw_prepare}, and before first inference
+ * on session by {@link nnfw_run}. Application can reuse buffer for many inferences.
+ *
+ * @param[in] session Session from inference output is to be extracted
+ * @param[in] index Index of output to be set (0-indexed)
+ * @param[in] type Type of the output
+ * @param[out] buffer Raw buffer for output
+ * @param[in] length Size of bytes of output buffer
+ *
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_set_output(nnfw_session *session, uint32_t index, NNFW_TYPE type, void *buffer,
+ size_t length);
+
+/**
+ * @brief Get the number of inputs
+ *
+ * Application can call this function to get number of inputs defined in loaded model.
+ * This function should be called after {@link nnfw_load_model_from_file} is invoked to load model
+ *
+ * @param[in] session Session from input information is to be extracted
+ * @param[out] number Variable which the number of inputs is put into
+ *
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_input_size(nnfw_session *session, uint32_t *number);
+
+/**
+ * @brief Get the number of outputs
+ *
+ * Application can call this function to get number of outputs defined in loaded model.
+ * This function should be called after {@link nnfw_load_model_from_file} is invoked to load model
+ *
+ * @param[in] session Session from output information is to be extracted
+ * @param[out] number Variable which the number of outputs is put into
+ *
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_output_size(nnfw_session *session, uint32_t *number);
+
+/**
+ * @brief Set the layout of an input
+ *
+ * The input that does not call this has NNFW_LAYOUT_NHWC layout
+ *
+ * @param[in] session session from inference input is to be extracted
+ * @param[in] index index of input to be set (0-indexed)
+ * @param[in] layout layout to set to target input
+ *
+ * @return NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_set_input_layout(nnfw_session *session, uint32_t index, NNFW_LAYOUT layout);
+
+/**
+ * @brief Set the layout of an output
+ *
+ * The output that does not call this has NNFW_LAYOUT_NHWC layout
+ *
+ * @param[in] session session from inference output is to be extracted
+ * @param[in] index index of output to be set (0-indexed)
+ * @param[in] layout layout to set to target output
+ *
+ * @return NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_set_output_layout(nnfw_session *session, uint32_t index, NNFW_LAYOUT layout);
+
+/**
+ * @brief Get i-th input tensor info
+ *
+ * <p>Before {@link nnfw_prepare} is invoked, this function return tensor info in model,
+ * so updated tensor info by {@link nnfw_apply_tensorinfo} is not returned.</p>
+ *
+ * <p>After {@link nnfw_prepare} is invoked, this function return updated tensor info
+ * if tensor info is updated by {@link nnfw_apply_tensorinfo}.</p>
+ *
+ * @param[in] session Session from input information is to be extracted
+ * @param[in] index Index of input
+ * @param[out] tensor_info Tensor info (shape, type, etc)
+ *
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_input_tensorinfo(nnfw_session *session, uint32_t index,
+ nnfw_tensorinfo *tensor_info);
+
+/**
+ * @brief Get i-th output tensor info
+ *
+ * <p>Before {@link nnfw_prepare} is invoked, this function return tensor info in model,
+ * so updated tensor info by {@link nnfw_apply_tensorinfo} is not returned.</p>
+ *
+ * <p>After {@link nnfw_prepare} is invoked, this function return updated tensor info
+ * if tensor info is updated by {@link nnfw_apply_tensorinfo}.</p>
+ *
+ * @param[in] session Session from output information is to be extracted
+ * @param[in] index Index of output
+ * @param[out] tensor_info Tensor info (shape, type, etc)
+ *
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_output_tensorinfo(nnfw_session *session, uint32_t index,
+ nnfw_tensorinfo *tensor_info);
+
+/**
+ * @brief Set available backends
+ *
+ * This function should be called before {@link nnfw_prepare} is invoked.
+ *
+ * <p>Supported backends differs on each platforms.
+ * For example, `x86_64` supports "cpu" only.
+ * Can set multiple backends by semicolon (ex: "acl_cl;cpu").
+ * Among the multiple backends, the 1st element is used as default backend.</p>
+ *
+ * @note Possible backend strings are: "cpu", "acl_cl", "acl_neon", "srcn"
+ *
+ * @param[in] session session to which avilable backends are set
+ * @param[in] backends available backends on which nnfw uses
+ *
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_set_available_backends(nnfw_session *session, const char *backends);
+
+/**
+ * @brief Set the operation's backend
+ *
+ * This function should be called before {@link nnfw_prepare} is invoked.
+ *
+ * <p>Supported backends differs on each platforms.
+ * For example, `x86_64` supports "cpu" only.
+ * The backend for op has higher priority than available backends specified by
+ * nnfw_set_available_backends.</p>
+ *
+ * @note Possible backend strings are: "cpu", "acl_cl", "acl_neon"
+ *
+ * @param[in] session session to be modified
+ * @param[in] op operation to be set
+ * @param[in] backend bakcend on which operation run
+ *
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_set_op_backend(nnfw_session *session, const char *op, const char *backend);
+
+/**
+ * @brief Retrieve uint32 type of nnfw information for given information ID.
+ *
+ * <p>Retrieves the information of property given by information id </p>
+ *
+ * @note: The input session could be null for global information (e.g. runtime version).*
+ *
+ * @param[in] session session to be queried on.
+ * @param[in] information ID to be queried
+ * @param[out] val uint32 value to be returned.
+ *
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_query_info_u32(nnfw_session *session, NNFW_INFO_ID id, uint32_t *val);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/runtime/onert/api/include/nnfw_debug.h b/runtime/onert/api/include/nnfw_debug.h
new file mode 100644
index 000000000..fe335ec4e
--- /dev/null
+++ b/runtime/onert/api/include/nnfw_debug.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_DEBUG_H__
+#define __NNFW_DEBUG_H__
+
+#include "nnfw.h"
+
+NNFW_STATUS nnfw_create_debug_session(nnfw_session **session);
+
+NNFW_STATUS nnfw_set_config(nnfw_session *session, const char *key, const char *value);
+
+#endif // __NNFW_DEBUG_H__
diff --git a/runtime/neurun/api/include/nnfw_dev.h b/runtime/onert/api/include/nnfw_dev.h
index ecf0597cf..ecf0597cf 100644
--- a/runtime/neurun/api/include/nnfw_dev.h
+++ b/runtime/onert/api/include/nnfw_dev.h
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h
new file mode 100644
index 000000000..25be998a2
--- /dev/null
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_VERSION_H__
+#define __NNFW_VERSION_H__
+
+/**
+ * NNFW_VERSION is a uint32 value representing nnfw runtime version
+ * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
+ */
+#define NNFW_VERSION 0x01000300
+
+#endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/api/src/CustomKernel.cc b/runtime/onert/api/src/CustomKernel.cc
new file mode 100644
index 000000000..a383dfe9c
--- /dev/null
+++ b/runtime/onert/api/src/CustomKernel.cc
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CustomKernel.h"
+
+namespace onert
+{
+namespace frontend
+{
+namespace custom
+{
+
+using namespace backend::custom;
+
+class APIConverter
+{
+public:
+ static nnfw_operand convertOperand(void *alloc, const TypeInfo &type)
+ {
+ nnfw_operand api_operand;
+ api_operand.allocation = alloc;
+ api_operand.type = convertType(type);
+ return api_operand;
+ }
+
+ static nnfw_tensorinfo convertType(const TypeInfo &type)
+ {
+ nnfw_tensorinfo api_type;
+ api_type.rank = type.shape.rank();
+ assert(type.shape.rank() <= 6);
+ std::copy(type.shape.dims().begin(), type.shape.dims().end(), std::begin(api_type.dims));
+
+ switch (type.dtype)
+ {
+ case ir::DataType::FLOAT32:
+ api_type.dtype = NNFW_TYPE_TENSOR_FLOAT32;
+ break;
+ case ir::DataType::INT32:
+ api_type.dtype = NNFW_TYPE_TENSOR_INT32;
+ break;
+ case ir::DataType::QUANT8_ASYMM:
+ api_type.dtype = NNFW_TYPE_TENSOR_QUANT8_ASYMM;
+ break;
+ case ir::DataType::BOOL8:
+ api_type.dtype = NNFW_TYPE_TENSOR_BOOL;
+ break;
+ default:
+ throw std::runtime_error("Unsupported tensor datatype");
+ }
+ return api_type;
+ }
+};
+
+Kernel::Kernel(const nnfw_custom_eval evalFunction)
+ : _params(), _userdata(nullptr), _userdata_size(0), _evalFunction(evalFunction)
+{
+}
+
+void Kernel::configure(CustomKernelConfigParams &&inParams)
+{
+ _userdata = inParams.userdata;
+ _userdata_size = inParams.userdata_size;
+
+ _params.ninputs = inParams.input_allocations.size();
+ _params.inputs = new nnfw_operand[_params.ninputs];
+ for (size_t i = 0; i < _params.ninputs; ++i)
+ {
+ _params.inputs[i] =
+ APIConverter::convertOperand(inParams.input_allocations[i], inParams.input_types[i]);
+ }
+
+ _params.noutputs = inParams.output_allocations.size();
+ _params.outputs = new nnfw_operand[_params.noutputs];
+ for (size_t i = 0; i < _params.noutputs; ++i)
+ {
+ _params.outputs[i] =
+ APIConverter::convertOperand(inParams.output_allocations[i], inParams.output_types[i]);
+ }
+}
+
+void Kernel::run() { _evalFunction(&_params, _userdata, _userdata_size); }
+
+} // namespace custom
+} // namespace frontend
+} // namespace onert
diff --git a/runtime/onert/api/src/CustomKernel.h b/runtime/onert/api/src/CustomKernel.h
new file mode 100644
index 000000000..2acf5979e
--- /dev/null
+++ b/runtime/onert/api/src/CustomKernel.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CUSTOM_KERNEL_H__
+#define __ONERT_BACKEND_CUSTOM_KERNEL_H__
+
+#include "nnfw_dev.h"
+
+#include "backend/CustomKernelBuilder.h"
+#include "exec/IFunction.h"
+
+#include <vector>
+
+namespace onert
+{
+namespace frontend
+{
+namespace custom
+{
+
+class Kernel : public ::onert::exec::IFunction
+{
+public:
+ explicit Kernel(nnfw_custom_eval evalFunction);
+
+ nnfw_custom_kernel_params _params;
+ char *_userdata;
+ size_t _userdata_size;
+
+ nnfw_custom_eval _evalFunction;
+ // nnfw_custom_type_infer _type_infer_function; //Unused for now
+
+ /**
+ * Fills _params field used later by user specified eval function
+ * @param inParams custom kernel parameters
+ */
+ virtual void configure(backend::custom::CustomKernelConfigParams &&inParams);
+
+ void run() override;
+ void runSync() override { run(); }
+};
+
+} // namespace custom
+} // namespace frontend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CUSTOM_KERNEL_H__
diff --git a/runtime/onert/api/src/CustomKernelRegistry.cc b/runtime/onert/api/src/CustomKernelRegistry.cc
new file mode 100644
index 000000000..7812609d1
--- /dev/null
+++ b/runtime/onert/api/src/CustomKernelRegistry.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CustomKernelRegistry.h"
+
+#include <memory>
+
+namespace onert
+{
+namespace frontend
+{
+namespace custom
+{
+
+void KernelRegistry::registerKernel(const std::string &id, nnfw_custom_eval evalFunction)
+{
+ _storage.emplace(id, evalFunction);
+}
+
+std::shared_ptr<backend::custom::IKernelBuilder> KernelRegistry::getBuilder()
+{
+ return std::make_unique<KernelBuilder>(this);
+}
+
+std::unique_ptr<Kernel> KernelRegistry::buildKernelForOp(const std::string &id)
+{
+ auto it = _storage.find(id);
+ if (it == _storage.end())
+ {
+ throw std::runtime_error("Unable to find associated kernel for op");
+ }
+
+ return std::make_unique<Kernel>(it->second);
+}
+
+// Kernel builder
+std::unique_ptr<exec::IFunction>
+KernelBuilder::buildKernel(const std::string &id,
+ backend::custom::CustomKernelConfigParams &&params) const
+{
+ auto kernel = _registry->buildKernelForOp(id);
+ kernel->configure(std::move(params));
+
+ return kernel;
+}
+
+KernelBuilder::KernelBuilder(KernelRegistry *registry) : _registry(registry) {}
+
+} // namespace custom
+} // namespace frontend
+} // namespace onert
diff --git a/runtime/onert/api/src/CustomKernelRegistry.h b/runtime/onert/api/src/CustomKernelRegistry.h
new file mode 100644
index 000000000..fe60d5bcc
--- /dev/null
+++ b/runtime/onert/api/src/CustomKernelRegistry.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CUSTOM_KERNEL_REGISTRY_H__
+#define __ONERT_BACKEND_CUSTOM_KERNEL_REGISTRY_H__
+
+#include "CustomKernel.h"
+
+#include <unordered_map>
+#include <functional>
+#include <memory>
+
+#include <iostream>
+
+namespace onert
+{
+namespace frontend
+{
+namespace custom
+{
+
+class KernelRegistry
+{
+public:
+ void registerKernel(const std::string &id, nnfw_custom_eval evalFunction);
+
+ std::shared_ptr<backend::custom::IKernelBuilder> getBuilder();
+ std::unique_ptr<Kernel> buildKernelForOp(const std::string &id);
+
+private:
+ std::unordered_map<std::string, nnfw_custom_eval> _storage;
+};
+
+class KernelBuilder : public backend::custom::IKernelBuilder
+{
+public:
+ KernelBuilder(KernelRegistry *registry);
+
+ std::unique_ptr<exec::IFunction>
+ buildKernel(const std::string &id,
+ backend::custom::CustomKernelConfigParams &&params) const override;
+
+private:
+ KernelRegistry *_registry;
+};
+
+} // namespace custom
+} // namespace frontend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CUSTOM_KERNEL_REGISTRY_H__
diff --git a/runtime/onert/api/src/OpMap.lst b/runtime/onert/api/src/OpMap.lst
new file mode 100644
index 000000000..a3d1b25ea
--- /dev/null
+++ b/runtime/onert/api/src/OpMap.lst
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MAP_MACRO
+#error Define MAP_MACRO before including this file
+#endif
+
+// circle operation | onert internal operation
+MAP_MACRO(ADD , Add)
+MAP_MACRO(SUB , Sub)
+MAP_MACRO(BATCH_TO_SPACE_ND , BatchToSpaceND)
+MAP_MACRO(CAST , Cast)
+MAP_MACRO(CONV_2D , Conv2D)
+MAP_MACRO(DEPTHWISE_CONV_2D , DepthwiseConv2D)
+MAP_MACRO(AVERAGE_POOL_2D , AvgPool2D)
+MAP_MACRO(MAX_POOL_2D , MaxPool2D)
+MAP_MACRO(CONCATENATION , Concat)
+MAP_MACRO(FULLY_CONNECTED , FullyConnected)
+MAP_MACRO(SUM , ReduceSum)
+MAP_MACRO(RESHAPE , Reshape)
+MAP_MACRO(MUL , Mul)
+MAP_MACRO(SOFTMAX , Softmax)
+MAP_MACRO(SQUEEZE , Squeeze)
+MAP_MACRO(SLICE , Slice)
+MAP_MACRO(STRIDED_SLICE , StridedSlice)
+MAP_MACRO(TANH , Tanh)
+MAP_MACRO(LOGISTIC , Logistic)
+MAP_MACRO(DIV , Div)
+MAP_MACRO(TRANSPOSE , Transpose)
+MAP_MACRO(EXP , Exp)
+MAP_MACRO(REDUCE_MAX , ReduceMax)
+// UNMATCHED
+//MAP_MACRO(Comparison)
+MAP_MACRO(LOGICAL_AND , LogicalAnd)
+MAP_MACRO(LOGICAL_OR , LogicalOr)
+MAP_MACRO(LOGICAL_NOT , LogicalNot)
+MAP_MACRO(LSTM , LSTM)
+MAP_MACRO(RSQRT , RSQRT)
+MAP_MACRO(RELU , ReLU)
+MAP_MACRO(RESIZE_BILINEAR , ResizeBilinear)
+MAP_MACRO(RELU_N1_TO_1 , ReLU1)
+MAP_MACRO(RELU6 , ReLU6)
+MAP_MACRO(RNN , RNN)
+MAP_MACRO(FLOOR , Floor)
+MAP_MACRO(SPACE_TO_BATCH_ND , SpaceToBatchND)
+MAP_MACRO(SPACE_TO_DEPTH , SpaceToDepth)
+MAP_MACRO(L2_POOL_2D , L2Pool2D)
+MAP_MACRO(EMBEDDING_LOOKUP , EmbeddingLookup)
+MAP_MACRO(L2_NORMALIZATION , L2Normalization)
+MAP_MACRO(HASHTABLE_LOOKUP , HashtableLookup)
+MAP_MACRO(INSTANCE_NORM , InstanceNorm)
+MAP_MACRO(PRELU , PReLU)
+MAP_MACRO(TRANSPOSE_CONV , TransposeConv)
+MAP_MACRO(SQRT , SQRT)
+MAP_MACRO(SQUARED_DIFFERENCE , SquaredDifference)
+MAP_MACRO(TOPK_V2 , TopKV2)
+MAP_MACRO(GATHER , Gather)
+MAP_MACRO(NEG , Neg)
+MAP_MACRO(ABS , Abs)
+MAP_MACRO(ARG_MAX , ArgMax)
+MAP_MACRO(DEQUANTIZE , Dequantize)
+MAP_MACRO(MEAN , Mean)
+MAP_MACRO(LOCAL_RESPONSE_NORMALIZATION , LocalResponseNormalization)
+// UNDEFINED IN CIRCLE
+//MAP_MACRO(DepthToSpace)
+MAP_MACRO(PACK , Pack)
+MAP_MACRO(REDUCE_MIN , ReduceMin)
+MAP_MACRO(SPLIT , Split)
+MAP_MACRO(UNPACK , Unpack)
+MAP_MACRO(PAD , Pad)
+MAP_MACRO(CUSTOM , Custom)
+// UNDEFINED IN CIRCLE
+//MAP_MACRO(Permute)
+MAP_MACRO(MINIMUM , Min)
+MAP_MACRO(MAXIMUM , Max)
+MAP_MACRO(ONE_HOT , OneHot)
diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc
new file mode 100644
index 000000000..7e6792ff3
--- /dev/null
+++ b/runtime/onert/api/src/nnfw_api.cc
@@ -0,0 +1,299 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnfw_api_internal.h"
+#include "nnfw_version.h"
+
+/*
+ * Create a new session instance
+ *
+ * @param session the session to be created
+ * @return NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_create_session(nnfw_session **session)
+{
+ if (session == nullptr)
+ return NNFW_STATUS_ERROR;
+
+ *session = new nnfw_session();
+
+ return NNFW_STATUS_NO_ERROR;
+}
+
+/*
+ * Close a session instance
+ *
+ * @param session the session to be closed
+ * @return NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_close_session(nnfw_session *session)
+{
+ delete session;
+ return NNFW_STATUS_NO_ERROR;
+}
+
+#define NNFW_RETURN_ERROR_IF_NULL(p) \
+ do \
+ { \
+ if ((p) == NULL) \
+ return NNFW_STATUS_ERROR; \
+ } while (0)
+
+/*
+ * Load model from nnpackage file or directory
+ *
+ * @param session nnfw_session loading the given nnpackage file/dir
+ * @param package_file_path path to the nnpackage file or unzipped directory to be loaded
+ *
+ * @return NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_load_model_from_file(nnfw_session *session, const char *pacakge_file_path)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->load_model_from_file(pacakge_file_path);
+}
+
+/*
+ * Prepare session to be ready for inference
+ * This phase may finalize model compilation, scheduling, and additional settings.
+ *
+ * @param session the session to be prepared
+ * @return NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_prepare(nnfw_session *session)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->prepare();
+}
+
+/*
+ * Run inference
+ *
+ * @param session the session to run inference
+ * @return NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_run(nnfw_session *session)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->run();
+}
+
+/*
+ * Set input
+ *
+ * @param session session to the input is to be set
+ * @param index index of input to be set (0-indexed)
+ * @param type type of the input
+ * @param buffer raw buffer for input
+ * @param length size of bytes of input
+ *
+ * @return NNFW_STATUS_NO_ERROR if successful
+ */
+
+NNFW_STATUS nnfw_set_input(nnfw_session *session, uint32_t index, NNFW_TYPE type,
+ const void *buffer, size_t length)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->set_input(index, type, buffer, length);
+}
+
+/*
+ * Set output
+ *
+ * @param session session from inference output is to be extracted
+ * @param index index of output to be set (0-indexed)
+ * @param type type of the output
+ * @param buffer raw buffer for output
+ * @param length size of bytes of output
+ *
+ * @return NNFW_STATUS_NO_ERROR if successful
+ */
+
+NNFW_STATUS nnfw_set_output(nnfw_session *session, uint32_t index, NNFW_TYPE type, void *buffer,
+ size_t length)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->set_output(index, type, buffer, length);
+}
+
+/*
+ * Get the number of inputs
+ *
+ * @param[in] session session from input information is to be extracted
+ * @param[out] number variable which the number of inputs is put into
+ *
+ * @return NNFW_STATUS_NO_ERROR if successful
+ */
+
+NNFW_STATUS nnfw_input_size(nnfw_session *session, uint32_t *number)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->input_size(number);
+}
+
+/*
+ * Get the number of outputs
+ *
+ * @param[in] session session from output information is to be extracted
+ * @param[out] number variable which the number of outputs is put into
+ *
+ * @return NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_output_size(nnfw_session *session, uint32_t *number)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->output_size(number);
+}
+
+/*
+ * Set the layout of an input
+ * @note The input that does not call this has NNFW_LAYOUT_CHANNELS_LAST layout
+ *
+ * @param[in] session session from inference input is to be extracted
+ * @param[in] index index of input to be set (0-indexed)
+ * @param[in] layout layout to set to target input
+ *
+ * @return NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_set_input_layout(nnfw_session *session, uint32_t index, NNFW_LAYOUT layout)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->set_input_layout(index, layout);
+}
+
+/*
+ * Set the layout of an output
+ * @note The output that does not call this has NNFW_LAYOUT_CHANNELS_LAST layout
+ *
+ * @param[in] session session from inference output is to be extracted
+ * @param[in] index index of output to be set (0-indexed)
+ * @param[in] layout layout to set to target output
+ *
+ * @return NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_set_output_layout(nnfw_session *session, uint32_t index, NNFW_LAYOUT layout)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->set_output_layout(index, layout);
+}
+
+/*
+ * Get i-th input tensor info
+ *
+ * @param[in] session session from input information is to be extracted
+ * @param[in] index index of input
+ * @param[out] tensor_info nnfw_tensor_info
+ *
+ * @return NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_input_tensorinfo(nnfw_session *session, uint32_t index,
+ nnfw_tensorinfo *tensor_info)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->input_tensorinfo(index, tensor_info);
+}
+
+/*
+ * Get i-th output tensor info
+ *
+ * @param[in] session session from output information is to be extracted
+ * @param[in] index index of output
+ * @param[out] tensor_info nnfw_tensor_info
+ *
+ * @return NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_output_tensorinfo(nnfw_session *session, uint32_t index,
+ nnfw_tensorinfo *tensor_info)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->output_tensorinfo(index, tensor_info);
+}
+
+/*
+ * Register custom operation
+ * @param session session to register this operation
+ * @param id operation id
+ * @param info registration info ( eval function, etc. )
+ * @return NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_register_custom_op_info(nnfw_session *session, const char *id,
+ custom_kernel_registration_info *info)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->register_custom_operation(id, info->eval_function);
+}
+
+NNFW_STATUS nnfw_apply_tensorinfo(nnfw_session *session, uint32_t index,
+ nnfw_tensorinfo tensor_info)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->apply_tensorinfo(index, tensor_info);
+}
+
+/*
+ * Set available backends
+ *
+ * @param[in] session session to which a avilable backends are set
+ * @param[in] backends available backends on which nnfw uses
+ */
+NNFW_STATUS nnfw_set_available_backends(nnfw_session *session, const char *backends)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->set_available_backends(backends);
+}
+
+/*
+ * Set the operation's backend
+ *
+ * @param[in] session session to be modified
+ * @param[in] op operation to be set
+ * @param[in] backend bakcend on which operation run
+ *
+ * @return NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_set_op_backend(nnfw_session *session, const char *op, const char *backend)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->set_op_backend(op, backend);
+}
+
+/*
+ * Retrieve uint32 type of nnfw information for given information ID.
+ *
+ * @param[in] session session to be queried on
+ * @param[in] information ID to be queried
+ * @param[out] val uint32 value to be returned
+ *
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_query_info_u32(nnfw_session *session, NNFW_INFO_ID id, uint32_t *val)
+{
+ (void)session;
+ switch (id)
+ {
+ case NNFW_INFO_ID_VERSION:
+ if (val)
+ {
+ *val = NNFW_VERSION;
+ return NNFW_STATUS_NO_ERROR;
+ }
+ break;
+ default:
+ return NNFW_STATUS_ERROR;
+ }
+ // It should not be reached.
+ return NNFW_STATUS_ERROR;
+}
diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc
new file mode 100644
index 000000000..4fff90443
--- /dev/null
+++ b/runtime/onert/api/src/nnfw_api_internal.cc
@@ -0,0 +1,518 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnfw_api_internal.h"
+#include "CustomKernelRegistry.h"
+#include "compiler/Compiler.h"
+#include "util/ConfigSource.h"
+#include "exec/Execution.h"
+#include "circle_loader.h"
+#include "tflite_loader.h"
+#include "json/json.h"
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <dirent.h>
+#include <util/ConfigSource.h>
+
+/*
+ * API does not accept string argument longer than max length below
+ */
+#define MAX_BACKEND_NAME_LENGTH 32
+#define MAX_OP_NAME_LENGTH 64
+
+// Is null-terminating in length ?
+static bool null_terminating(const char *str, uint32_t length)
+{
+ for (uint32_t i = 0; i < length; i++)
+ {
+ if (*(str + i) == '\0')
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+static onert::ir::Layout convertLayout(NNFW_LAYOUT layout)
+{
+ if (layout == NNFW_LAYOUT_CHANNELS_LAST)
+ {
+ return onert::ir::Layout::NHWC;
+ }
+ else if (layout == NNFW_LAYOUT_CHANNELS_FIRST)
+ {
+ return onert::ir::Layout::NCHW;
+ }
+ return onert::ir::Layout::UNKNOWN;
+}
+
+nnfw_session::nnfw_session()
+ : _primary_subgraph{nullptr}, _execution{nullptr},
+ _kernel_registry{std::make_shared<onert::frontend::custom::KernelRegistry>()},
+ _source{std::make_unique<onert::util::GeneralConfigSource>()}
+{
+ // DO NOTHING
+}
+
+nnfw_session::~nnfw_session() = default;
+
+NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir)
+{
+ // TODO : add support for zipped package file load
+ DIR *dir;
+ if (!(dir = opendir(package_dir)))
+ {
+ std::cerr << "invalid nnpackge directory: " << package_dir << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ closedir(dir);
+
+ try
+ {
+ std::string manifest_file_name(package_dir);
+ manifest_file_name += "/metadata/MANIFEST";
+ std::ifstream mfs(manifest_file_name);
+
+ // extract the filename of the first(index 0) model
+ // e.g. In MANIFEST file, { "models" : [ "firstmodel.tflite", "2nd.tflite" ] }
+ Json::Value root;
+ mfs >> root;
+ Json::Value models = root["models"];
+ Json::Value model_types = root["model-types"];
+
+ auto model_file_path = package_dir + std::string("/") + models[0].asString(); // first model
+ auto model_type = model_types[0].asString(); // first model's type
+ if (model_type == "tflite")
+ {
+ _primary_subgraph = onert::tflite_loader::loadModel(model_file_path.c_str());
+ }
+ else if (model_type == "circle")
+ {
+ _primary_subgraph = onert::circle_loader::loadModel(model_file_path.c_str());
+ }
+ else
+ {
+ std::cerr << "Unsupported model type in MANIFEST" << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ _primary_subgraph->bindKernelBuilder(_kernel_registry->getBuilder());
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "Error during model loading : " << e.what() << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+
+ _compiler = std::make_unique<onert::compiler::Compiler>(_primary_subgraph);
+
+ return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::prepare()
+{
+ if (!_primary_subgraph || _primary_subgraph->isBuildingPhase())
+ {
+ std::cerr << "Error during model prepare : "
+ << "prepare should be run after load_model" << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+
+ // NOTE. If users want to run prepare() more than one time, this could be removed.
+ if (!_source || _execution)
+ {
+ std::cerr << "Error during model prepare : "
+ << "prepare should be run once" << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+
+ // TODO : add additional setting routine(executor type, backend)
+ // Note that we assume acl_cl backend
+
+ _source->set("DELETE_CACHED_DATA", "1");
+
+ try
+ {
+ // config_source setting
+ using onert::util::config_source;
+ config_source(std::move(_source));
+
+ _compiler->compile();
+ std::shared_ptr<onert::exec::IExecutor> executor;
+ _compiler->release(executor);
+ _execution = std::make_shared<onert::exec::Execution>(executor);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "Error during model prepare : " << e.what() << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+
+ return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::run()
+{
+ if (!_execution)
+ {
+ std::cerr << "Error during nnfw_session::run : "
+ << "run should be run after prepare" << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+
+ try
+ {
+ _execution->execute();
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "Error during nnfw_session::run : " << e.what() << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::set_input(uint32_t index, NNFW_TYPE /*type*/, const void *buffer,
+ size_t length)
+{
+ try
+ {
+ _execution->setInput(onert::ir::IOIndex(index), buffer, length);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "Error during nnfw_session::set_input : " << e.what() << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::set_output(uint32_t index, NNFW_TYPE /*type*/, void *buffer,
+ size_t length)
+{
+ try
+ {
+ _execution->setOutput(onert::ir::IOIndex(index), buffer, length);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "Error during nnfw_session::set_output : " << e.what() << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::input_size(uint32_t *number)
+{
+ try
+ {
+ if (number == nullptr)
+ {
+ std::cerr << "Error during nnfw_session::input_size, number is null pointer." << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ *number = _primary_subgraph->getInputs().size();
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "Error during nnfw_session::input_size : " << e.what() << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::output_size(uint32_t *number)
+{
+ try
+ {
+ if (number == nullptr)
+ {
+ std::cerr << "Error during nnfw_session::output_size, number is null pointer." << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ *number = _primary_subgraph->getOutputs().size();
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "Error during nnfw_session::output_size" << e.what() << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::set_input_layout(uint32_t index, NNFW_LAYOUT layout)
+{
+ try
+ {
+ if (layout != NNFW_LAYOUT_NONE && layout != NNFW_LAYOUT_CHANNELS_FIRST &&
+ layout != NNFW_LAYOUT_CHANNELS_LAST)
+ {
+ std::cerr << "Error during nnfw_session::set_input_layout, not supported layout" << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ _execution->setInputLayout(onert::ir::IOIndex(index), convertLayout(layout));
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "Error during nnfw_session::set_input_layout : " << e.what() << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::set_output_layout(uint32_t index, NNFW_LAYOUT layout)
+{
+ try
+ {
+ if (layout != NNFW_LAYOUT_NONE && layout != NNFW_LAYOUT_CHANNELS_FIRST &&
+ layout != NNFW_LAYOUT_CHANNELS_LAST)
+ {
+ std::cerr << "Error during nnfw_session::set_output_layout, not supported layout"
+ << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ _execution->setOutputLayout(onert::ir::IOIndex(index), convertLayout(layout));
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "Error during nnfw_session::set_output_layout : " << e.what() << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ return NNFW_STATUS_NO_ERROR;
+}
+
+static NNFW_TYPE datatype_to_nnfw_dtype(onert::ir::DataType dt)
+{
+ using onert::ir::DataType;
+ switch (dt)
+ {
+ case DataType::FLOAT32:
+ return NNFW_TYPE_TENSOR_FLOAT32;
+ case DataType::INT32:
+ return NNFW_TYPE_TENSOR_INT32;
+ case DataType::QUANT8_ASYMM:
+ return NNFW_TYPE_TENSOR_QUANT8_ASYMM;
+ case DataType::BOOL8:
+ return NNFW_TYPE_TENSOR_BOOL;
+ case DataType::UINT8:
+ return NNFW_TYPE_TENSOR_UINT8;
+ case DataType::UINT32:
+ case DataType::QUANT8_SYMM:
+ default:
+ std::cerr << "Error: Model has type that runtime API does not support." << std::endl;
+ exit(-1);
+ }
+}
+
+NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t /*index*/, nnfw_tensorinfo /*ti*/)
+{
+ std::cerr << "Error: NYI" << std::endl;
+ return NNFW_STATUS_ERROR;
+}
+
+NNFW_STATUS nnfw_session::input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
+{
+ try
+ {
+ if (ti == nullptr)
+ {
+ std::cerr << "Error during nnfw_session::input_tensorinfo, tensorinfo is null pointer."
+ << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ if (index >= _primary_subgraph->getInputs().size())
+ {
+ std::cerr << "Error during nnfw_session::input_tensorinfo, index is out of range."
+ << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ auto opidx = _primary_subgraph->getInputs().at(index);
+ auto shape = _primary_subgraph->operands().at(opidx).shape();
+ ti->rank = shape.rank();
+ for (int j = 0; j < ti->rank; ++j)
+ {
+ ti->dims[j] = shape.dim(j);
+ }
+ ti->dtype = datatype_to_nnfw_dtype(_primary_subgraph->operands().at(opidx).typeInfo().type());
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "Error during nnfw_session::input_tensorinfo : " << e.what() << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
+{
+ try
+ {
+ if (ti == nullptr)
+ {
+ std::cerr << "Error during nnfw_session::output_tensorinfo, tensorinfo is null pointer."
+ << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ if (index >= _primary_subgraph->getOutputs().size())
+ {
+ std::cerr << "Error during nnfw_session::output_tensorinfo, index is out of range."
+ << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ auto opidx = _primary_subgraph->getOutputs().at(index);
+ auto shape = _primary_subgraph->operands().at(opidx).shape();
+ ti->rank = shape.rank();
+ for (int j = 0; j < ti->rank; ++j)
+ {
+ ti->dims[j] = shape.dim(j);
+ }
+ ti->dtype = datatype_to_nnfw_dtype(_primary_subgraph->operands().at(opidx).typeInfo().type());
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "Error during nnfw_session::output_tensorinfo : " << e.what() << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ return NNFW_STATUS_NO_ERROR;
+}
+NNFW_STATUS nnfw_session::register_custom_operation(const std::string &id,
+ nnfw_custom_eval eval_func)
+{
+ _kernel_registry->registerKernel(id, eval_func);
+ return NNFW_STATUS_NO_ERROR;
+}
+
+static std::string get_op_backend_string(std::string op)
+{
+#define MAP_MACRO(CircleName, OneRTName) {#CircleName, "OP_BACKEND_" #OneRTName},
+
+ static std::unordered_map<std::string, std::string> operation_map = {
+#include "OpMap.lst"
+ };
+
+#undef MAP_MACRO
+
+ auto n = operation_map.find(op);
+
+ if (n == operation_map.end())
+ {
+ // this return value is handled by a caller to return error code
+ return std::string("");
+ }
+ else
+ {
+ return n->second;
+ }
+}
+
+NNFW_STATUS nnfw_session::set_available_backends(const char *backends)
+{
+ try
+ {
+ if (!backends || null_terminating(backends, MAX_BACKEND_NAME_LENGTH) == false)
+ {
+ return NNFW_STATUS_ERROR;
+ }
+
+ _source->set("BACKENDS", backends);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "Error during nnfw_session::set_available_backends : " << e.what() << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::set_op_backend(const char *op, const char *backend)
+{
+ try
+ {
+ if (!op || !null_terminating(op, MAX_OP_NAME_LENGTH) || !backend ||
+ !null_terminating(backend, MAX_BACKEND_NAME_LENGTH))
+ {
+ return NNFW_STATUS_ERROR;
+ }
+
+ auto key = get_op_backend_string(op);
+
+ if (key.empty())
+ {
+ return NNFW_STATUS_ERROR;
+ }
+
+ _source->set(key, backend);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "Error during nnfw_session::set_op_backend : " << e.what() << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::set_config(const char *key, const char *value)
+{
+ // The session must be in the state after model load
+ if (!_compiler)
+ return NNFW_STATUS_ERROR;
+
+ auto &options = _compiler->options();
+
+ using namespace onert::util;
+
+ if (key == config::TRACE_FILEPATH)
+ {
+ options.trace_filepath = value;
+ }
+ else if (key == config::GRAPH_DOT_DUMP)
+ {
+ options.graph_dump_level = toInt(value);
+ }
+ else if (key == config::OP_SEQ_MAX_NODE)
+ {
+ options.op_seq_max_node = toInt(value);
+ }
+ else if (key == config::EXECUTOR)
+ {
+ options.executor = value;
+ }
+ else if (key == config::OP_BACKEND_ALLOPS)
+ {
+ options.manual_scheduler_options.backend_for_all = value;
+ }
+ else if (key == config::USE_SCHEDULER)
+ {
+ options.he_scheduler = toBool(value);
+ }
+ else if (key == config::PROFILING_MODE)
+ {
+ options.he_profiling_mode = toBool(value);
+ }
+ else if (key == config::DELETE_CACHED_DATA)
+ {
+ options.delete_cached_data = toBool(value);
+ }
+ else if (key == config::DISABLE_COMPILE)
+ {
+ options.disable_compile = toBool(value);
+ }
+ else
+ {
+ return NNFW_STATUS_ERROR;
+ }
+ return NNFW_STATUS_NO_ERROR;
+}
diff --git a/runtime/onert/api/src/nnfw_api_internal.h b/runtime/onert/api/src/nnfw_api_internal.h
new file mode 100644
index 000000000..7fe4e0a26
--- /dev/null
+++ b/runtime/onert/api/src/nnfw_api_internal.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __API_NNFW_API_INTERNAL_H__
+#define __API_NNFW_API_INTERNAL_H__
+
+#include "nnfw.h"
+#include "nnfw_dev.h"
+
+#include <util/GeneralConfigSource.h>
+
+#include <string>
+#include <memory>
+
+namespace onert
+{
+namespace frontend
+{
+namespace custom
+{
+class KernelRegistry;
+}
+} // namespace frontend
+namespace exec
+{
+class Execution;
+} // namespace exec
+namespace ir
+{
+class Graph;
+} // namespace ir
+namespace compiler
+{
+class Compiler;
+} // namespace compiler
+} // namespace onert
+
+struct nnfw_session
+{
+public:
+ nnfw_session();
+ ~nnfw_session();
+
+ NNFW_STATUS load_model_from_file(const char *package_file_path);
+ NNFW_STATUS prepare();
+ NNFW_STATUS run();
+
+ NNFW_STATUS set_input(uint32_t index, NNFW_TYPE type, const void *buffer, size_t length);
+ NNFW_STATUS set_output(uint32_t index, NNFW_TYPE type, void *buffer, size_t length);
+
+ NNFW_STATUS input_size(uint32_t *number);
+ NNFW_STATUS output_size(uint32_t *number);
+
+ NNFW_STATUS set_input_layout(uint32_t index, NNFW_LAYOUT layout);
+ NNFW_STATUS set_output_layout(uint32_t index, NNFW_LAYOUT layout);
+
+ NNFW_STATUS apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti);
+
+ NNFW_STATUS input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti);
+ NNFW_STATUS output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti);
+
+ NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func);
+
+ NNFW_STATUS set_available_backends(const char *backends);
+ NNFW_STATUS set_op_backend(const char *op, const char *backend);
+
+ NNFW_STATUS set_config(const char *key, const char *value);
+
+private:
+ std::shared_ptr<onert::ir::Graph> _primary_subgraph;
+ std::unique_ptr<onert::compiler::Compiler> _compiler;
+ std::shared_ptr<onert::exec::Execution> _execution;
+ std::shared_ptr<onert::frontend::custom::KernelRegistry> _kernel_registry;
+
+protected:
+ std::unique_ptr<onert::util::GeneralConfigSource> _source;
+};
+
+#endif // __API_NNFW_API_INTERNAL_H__
diff --git a/runtime/onert/api/src/nnfw_debug.cc b/runtime/onert/api/src/nnfw_debug.cc
new file mode 100644
index 000000000..73d0e9c86
--- /dev/null
+++ b/runtime/onert/api/src/nnfw_debug.cc
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnfw_debug_internal.h"
+
+NNFW_STATUS nnfw_create_debug_session(nnfw_session **session)
+{
+ *session = new nnfw_debug_session();
+
+ return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_set_config(nnfw_session *session, const char *key, const char *value)
+{
+ return session->set_config(key, value);
+}
diff --git a/runtime/onert/api/src/nnfw_debug_internal.cc b/runtime/onert/api/src/nnfw_debug_internal.cc
new file mode 100644
index 000000000..4fddfc93d
--- /dev/null
+++ b/runtime/onert/api/src/nnfw_debug_internal.cc
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnfw_debug_internal.h"
+#include "util/EnvConfigSource.h"
+
+#include <memory>
+
+nnfw_debug_session::nnfw_debug_session() : nnfw_session()
+{
+ _source = std::make_unique<onert::util::EnvConfigSource>();
+}
diff --git a/runtime/neurun/api/src/nnfw_debug_internal.h b/runtime/onert/api/src/nnfw_debug_internal.h
index f4984e7a1..f4984e7a1 100644
--- a/runtime/neurun/api/src/nnfw_debug_internal.h
+++ b/runtime/onert/api/src/nnfw_debug_internal.h
diff --git a/runtime/onert/backend/CMakeLists.txt b/runtime/onert/backend/CMakeLists.txt
new file mode 100644
index 000000000..7310571d8
--- /dev/null
+++ b/runtime/onert/backend/CMakeLists.txt
@@ -0,0 +1,8 @@
+set(LIB_ONERT_BACKEND_ACL_COMMON onert_backend_acl_common)
+set(LIB_ONERT_BACKEND_CPU_COMMON onert_backend_cpu_common)
+
+add_subdirectory(cpu)
+add_subdirectory(cpu_common)
+add_subdirectory(acl_cl)
+add_subdirectory(acl_neon)
+add_subdirectory(acl_common)
diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h
new file mode 100644
index 000000000..fabcae38e
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/Backend.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_CL_BACKEND_H__
+#define __ONERT_BACKEND_ACL_CL_BACKEND_H__
+
+#include <memory>
+#include <backend/Backend.h>
+
+#include "Config.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+#include "ShapeFixer.h"
+#include "TensorManager.h"
+#include "Optimizer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+ Backend() : _config{std::make_shared<Config>()} {}
+
+ std::shared_ptr<IConfig> config() const override { return _config; }
+
+ std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
+ const std::shared_ptr<custom::IKernelBuilder> &,
+ bool is_linear_executor) const override
+ {
+ const auto &operands = graph.operands();
+ auto context = std::make_unique<BackendContext>(this, &graph);
+ auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor));
+ context->tensor_builder = tb;
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, tb);
+ context->shape_fixer = std::make_shared<ShapeFixer>(operands, tb);
+ context->tensor_register = nullptr;
+ context->optimizer = std::make_shared<Optimizer>(context.get());
+ return context;
+ }
+
+private:
+ std::shared_ptr<IConfig> _config;
+};
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_CL_BACKEND_H__
diff --git a/runtime/onert/backend/acl_cl/CLTimer.h b/runtime/onert/backend/acl_cl/CLTimer.h
new file mode 100644
index 000000000..722dc68ef
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/CLTimer.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_CL_CLTIMER_H__
+#define __ONERT_BACKEND_ACL_CL_CLTIMER_H__
+
+#include <util/ITimer.h>
+#include <arm_compute/core/CL/OpenCL.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
+#include <chrono>
+#include <list>
+#include <sstream>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+/**
+ * @brief Class to measure CL kernels execution time
+ */
+class CLTimer : public util::ITimer
+{
+public:
+ /**
+ * @brief This function replaces CL function, which enqueues a command to execute a kernel
+ * with a wrapper which remembers enqueued kernels
+ */
+ void handleBegin() override
+ {
+ _measured_events.clear();
+
+ _origin_enqueue_function = arm_compute::CLSymbols::get().clEnqueueNDRangeKernel_ptr;
+
+ auto _timer_enqueue_function = [this](cl_command_queue command_queue, cl_kernel kernel,
+ cl_uint work_dim, const size_t *gwo, const size_t *gws,
+ const size_t *lws, cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list, cl_event *usr_event) {
+ cl_event event;
+ cl_int enqueue_res =
+ this->_origin_enqueue_function(command_queue, kernel, work_dim, gwo, gws, lws,
+ num_events_in_wait_list, event_wait_list, &event);
+ this->_measured_events.emplace_back(event);
+
+ // According to spec, if NULL was provided in usr_event - event shouldn't be returned
+ if (usr_event != nullptr)
+ {
+ clRetainEvent(event);
+ *usr_event = event;
+ }
+ return enqueue_res;
+ };
+ arm_compute::CLSymbols::get().clEnqueueNDRangeKernel_ptr = _timer_enqueue_function;
+
+ // Set CL_QUEUE_PROFILING_ENABLE flag for the CL command-queue, if it isn't already set
+ auto &cl_scheduler = arm_compute::CLScheduler::get();
+ auto props = cl_scheduler.queue().getInfo<CL_QUEUE_PROPERTIES>();
+ if ((props & CL_QUEUE_PROFILING_ENABLE) == 0)
+ {
+ cl_scheduler.set_queue(
+ cl::CommandQueue(cl_scheduler.context(), props | CL_QUEUE_PROFILING_ENABLE));
+ }
+ };
+
+ /**
+ * @brief Get timer result by addition executed CL kernels durations
+ */
+ void handleEnd() override
+ {
+ _timer_res = 0;
+ for (auto const &event : _measured_events)
+ {
+ cl_ulong start;
+ cl_ulong end;
+ event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start);
+ event.getProfilingInfo(CL_PROFILING_COMMAND_END, &end);
+ _timer_res += (end - start) / 1000.f; // nanoseconds -> microseconds
+ }
+
+ // Restore origin CL enqueue function
+ arm_compute::CLSymbols::get().clEnqueueNDRangeKernel_ptr = _origin_enqueue_function;
+ };
+
+private:
+ std::function<decltype(clEnqueueNDRangeKernel)> _origin_enqueue_function;
+ std::list<::cl::Event> _measured_events;
+};
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_CL_CLTIMER_H__
diff --git a/runtime/onert/backend/acl_cl/CMakeLists.txt b/runtime/onert/backend/acl_cl/CMakeLists.txt
new file mode 100644
index 000000000..3b56dca81
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/CMakeLists.txt
@@ -0,0 +1,19 @@
+# Unsupported architecture
+nnas_find_package(ARMCompute QUIET)
+if(NOT ARMCompute_FOUND)
+ return()
+endif(NOT ARMCompute_FOUND)
+
+set(LIB_ONERT_BACKEND_ACL_CL onert_backend_acl_cl)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_ACL_CL} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_ACL_CL} PRIVATE ${LIB_ONERT_BACKEND_ACL_COMMON})
+target_link_libraries(${LIB_ONERT_BACKEND_ACL_CL} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_ACL_CL} PRIVATE nnfw_coverage)
+
+set_target_properties(${LIB_ONERT_BACKEND_ACL_CL} PROPERTIES OUTPUT_NAME backend_acl_cl)
+
+install(TARGETS ${LIB_ONERT_BACKEND_ACL_CL} DESTINATION lib)
diff --git a/runtime/onert/backend/acl_cl/Config.cc b/runtime/onert/backend/acl_cl/Config.cc
new file mode 100644
index 000000000..1cc3cb401
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/Config.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// For CLKernelLibraryEx initialization
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+#include "Config.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+bool Config::initialize()
+{
+ if (!arm_compute::opencl_is_available())
+ {
+ return false;
+ }
+ arm_compute::CLScheduler::get().default_init();
+ // NOTE CLKernelLibraryEx must use the same context as CLScheduler
+ // It did not check whether another device is available.
+ arm_compute::CLKernelLibraryEx::get().init(
+ "./cl_kernels/", arm_compute::CLScheduler::get().context(), cl::Device::getDefault());
+
+ return true;
+}
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/Config.h b/runtime/onert/backend/acl_cl/Config.h
new file mode 100644
index 000000000..a64c7923e
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/Config.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_CL_CONFIG_H__
+#define __ONERT_BACKEND_ACL_CL_CONFIG_H__
+
+#include "CLTimer.h"
+#include <memory>
+#include <backend/IConfig.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+class Config : public IConfig
+{
+public:
+ std::string id() override { return "acl_cl"; }
+ bool initialize() override;
+ bool SupportPermutation() override { return true; }
+ std::unique_ptr<util::ITimer> timer() override { return std::make_unique<CLTimer>(); }
+};
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_CL_CONFIG_H__
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
new file mode 100644
index 000000000..d7f5f8031
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConstantInitializer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
+ const std::shared_ptr<TensorBuilder> &tensor_builder)
+ : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+{
+ // DO NOTHING
+}
+
+void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
+{
+ assert(node.getInputs().size() > index);
+
+ const auto &input_index = node.getInputs().at(index);
+ const auto &input_obj = _operands.at(input_index);
+ registerCopyInitializer(input_index, input_obj);
+}
+
+void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
+{
+ assert(node.getInputs().size() > index);
+
+ const auto &input_index = node.getInputs().at(index);
+ const auto &input_obj = _operands.at(input_index);
+ registerPermuteInitializer(input_index, input_obj);
+}
+
+void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
+{
+ const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
+ const auto &block_size_obj = _operands.at(block_size_index);
+
+ if (block_size_obj.isConstant())
+ {
+ _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
+ assert(model_obj.data());
+ const auto &shape = model_obj.shape();
+ const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
+ assert(model_obj.shape().rank() == 1);
+ obj.access([&](ITensor &tensor) {
+ for (size_t i = 0; i < shape.num_elements(); ++i)
+ {
+ const int32_t value = base[shape.num_elements() - i - 1];
+ int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
+ tensor.calcOffset({static_cast<int32_t>(i)}));
+ *into = value;
+ }
+ });
+ };
+ }
+}
+
+void ConstantInitializer::visit(const ir::operation::Conv2D &node)
+{
+ permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
+ copyInputInitialize(node, ir::operation::Conv2D::BIAS);
+}
+
+void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
+{
+ permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
+ copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
+}
+
+void ConstantInitializer::visit(const ir::operation::EmbeddingLookup &node)
+{
+ copyInputInitialize(node, ir::operation::EmbeddingLookup::LOOKUPS);
+}
+
+void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
+{
+ copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
+ copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
+}
+
+void ConstantInitializer::visit(const ir::operation::Gather &node)
+{
+ copyInputInitialize(node, ir::operation::Gather::INDICES);
+}
+
+void ConstantInitializer::visit(const ir::operation::HashtableLookup &node)
+{
+ copyInputInitialize(node, ir::operation::HashtableLookup::LOOKUPS);
+ copyInputInitialize(node, ir::operation::HashtableLookup::KEYS);
+}
+
+void ConstantInitializer::visit(const ir::operation::LSTM &node)
+{
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
+ copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
+ copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
+ copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
+}
+
+void ConstantInitializer::visit(const ir::operation::RNN &node)
+{
+ copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
+ copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::RNN::BIAS);
+}
+
+void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
+{
+ const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE);
+ const auto &block_size_obj = _operands.at(block_size_index);
+
+ if (block_size_obj.isConstant())
+ {
+ _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
+ assert(model_obj.data());
+ const auto &shape = model_obj.shape();
+ const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
+ assert(model_obj.shape().rank() == 1);
+ obj.access([&](ITensor &tensor) {
+ for (size_t i = 0; i < shape.num_elements(); ++i)
+ {
+ const int32_t value = base[shape.num_elements() - i - 1];
+ int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
+ tensor.calcOffset({static_cast<int32_t>(i)}));
+ *into = value;
+ }
+ });
+ };
+ }
+
+ const auto &paddings_index = node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS);
+ const auto &paddings_obj = _operands.at(paddings_index);
+ if (paddings_obj.isConstant())
+ {
+ _init_map[paddings_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
+ assert(model_obj.data());
+ const auto &shape = model_obj.shape();
+ const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
+ assert(model_obj.shape().rank() == 2);
+ assert(obj.dimension(0) == 2);
+ obj.access([&](ITensor &tensor) {
+ for (auto i = 0; i < shape.dim(0); ++i)
+ {
+ for (auto j = 0; j < shape.dim(1); ++j)
+ {
+ const int32_t value = base[i * 2 + j];
+ int32_t *into = reinterpret_cast<int32_t *>(
+ tensor.buffer() + tensor.calcOffset({shape.dim(0) - i - 1, j}));
+ *into = value;
+ }
+ }
+ });
+ };
+ }
+}
+
+void ConstantInitializer::visit(const ir::operation::TransposeConv &node)
+{
+ const auto &kernel_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
+ const auto &kernel_obj = _operands.at(kernel_index);
+ registerPermuteInitializer(kernel_index, kernel_obj);
+}
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.h b/runtime/onert/backend/acl_cl/ConstantInitializer.h
new file mode 100644
index 000000000..c51f72b11
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
+#define __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
+
+#include <backend/IConstantInitializer.h>
+#include <ir/Operands.h>
+#include "TensorBuilder.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+class ConstantInitializer : public IConstantInitializer
+{
+public:
+ ConstantInitializer(const ir::Operands &operands,
+ const std::shared_ptr<TensorBuilder> &tensor_builder);
+
+public:
+ void visit(const ir::operation::BatchToSpaceND &) override;
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::DepthwiseConv2D &) override;
+ void visit(const ir::operation::EmbeddingLookup &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::Gather &) override;
+ void visit(const ir::operation::HashtableLookup &) override;
+ void visit(const ir::operation::LSTM &) override;
+ void visit(const ir::operation::RNN &) override;
+ void visit(const ir::operation::SpaceToBatchND &) override;
+ void visit(const ir::operation::TransposeConv &) override;
+
+private:
+ std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
+ void copyInputInitialize(const ir::Operation &node, uint32_t index);
+ void permuteInputInitialize(const ir::Operation &node, uint32_t index);
+
+private:
+ std::shared_ptr<TensorBuilder> _tensor_builder;
+};
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc
new file mode 100644
index 000000000..25588fea7
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc
@@ -0,0 +1,2023 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include <arm_compute/runtime/CL/CLFunctions.h> // Include all ARM Compute CL functions
+#include <arm_compute/runtime/CL/CLFunctionsEx.h> // Include all ARM Compute EX CL functions
+
+#include <AclActivationBuilder.h>
+#include <AclFunction.h>
+#include <Convert.h>
+#include <Swizzle.h>
+
+#include "ir/Index.h"
+#include "ir/DataType.h"
+#include "ir/InternalType.h"
+#include "exec/NopFunction.h"
+#include "exec/FunctionSequence.h"
+#include "util/logging.h"
+#include "util/Utils.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+using ::onert::backend::acl_common::asAclClFunction;
+using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
+ ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclClFunction>;
+
+KernelGenerator::KernelGenerator(const ir::Operands &ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder)
+ : _ctx(ctx), _tensor_builder(tensor_builder), _current_op_seq_layout(ir::Layout::UNKNOWN)
+{
+ // DO NOTHING
+}
+
+void KernelGenerator::visit(const ir::OpSequence &op_seq)
+{
+ // TODO Move this to IKernelGenerator
+ // (all derivatives have the same implementation for this)
+ assert(!_return_fn_seq);
+ _return_fn_seq = std::make_unique<exec::FunctionSequence>();
+ _current_op_seq_layout = op_seq.getLayout();
+ for (const auto &e : op_seq.operations())
+ {
+ const auto &node = *(e.node);
+ node.accept(*this);
+ _return_fn_seq->append(releaseFunction());
+ }
+}
+
+void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
+ const auto block_size_index{
+ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto block_size_alloc = _tensor_builder->at(block_size_index).get();
+
+ assert(_ctx.at(block_size_index).data());
+
+ auto fn = std::make_unique<::arm_compute::CLBatchToSpaceLayer>();
+
+ fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), ofm_alloc->handle());
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Cast &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ const auto input_sub_type = _ctx.at(ifm_index).typeInfo().type() == ir::DataType::BOOL8
+ ? arm_compute::SubDataType::BOOL
+ : arm_compute::SubDataType::NONE;
+
+ auto fn = std::make_unique<::arm_compute::CLCast>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), input_sub_type);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Conv2D &node)
+{
+ using ir::operation::Conv2D;
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto &ker_shape = _ctx.at(ker_index).shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+
+ const auto stride = node.param().stride;
+ const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
+ ker_width, ker_height);
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ker_alloc = _tensor_builder->at(ker_index).get();
+ auto bias_alloc = _tensor_builder->at(bias_index).get();
+
+ const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
+ const auto act_info = acl_common::asActivationLayerInfo(activation);
+
+ auto fn = std::make_unique<::arm_compute::CLConvolutionLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
+
+ fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
+ conv_info, ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
+
+ _return_fn = asAclClFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
+{
+ using ir::operation::DepthwiseConv2D;
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ // Kernel format is [1, kernel_height, kernel_width, depth_out].
+ const auto &ker_shape = _ctx.at(ker_index).shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+
+ const auto stride = node.param().stride;
+ const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
+ ker_width, ker_height);
+ const auto multiplier = node.param().multiplier;
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ker_alloc = _tensor_builder->at(ker_index).get();
+ auto bias_alloc = _tensor_builder->at(bias_index).get();
+
+ const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
+ const auto act_info = acl_common::asActivationLayerInfo(activation);
+
+ {
+ auto fn = std::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
+
+ fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
+ ofm_alloc->handle(), conv_info, multiplier, act_info);
+
+ _return_fn = asAclClFunction(std::move(fn));
+ }
+}
+
+void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
+
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+
+ const auto kh = node.param().kh;
+ const auto kw = node.param().kw;
+ const auto stride = node.param().stride;
+ const auto padding =
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto activation = node.param().activation;
+
+ VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
+ VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
+ VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
+ VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
+ VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl;
+ VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl;
+ VERBOSE(MaxPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
+ VERBOSE(MaxPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
+ VERBOSE(MaxPool2D) << "PAD(T): " << padding.top << std::endl;
+ VERBOSE(MaxPool2D) << "PAD(B): " << padding.bottom << std::endl;
+ VERBOSE(MaxPool2D) << "PAD(L): " << padding.left << std::endl;
+ VERBOSE(MaxPool2D) << "PAD(R): " << padding.right << std::endl;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX,
+ ::arm_compute::Size2D{kw, kh},
+ acl_common::asPadStrideInfo(padding, stride)};
+
+ auto fn = std::make_unique<::arm_compute::CLPoolingLayer>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
+
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+}
+
+void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
+
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+
+ const auto kh = node.param().kh;
+ const auto kw = node.param().kw;
+ const auto stride = node.param().stride;
+ const auto padding =
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto activation = node.param().activation;
+
+ VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
+ VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
+ VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
+ VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
+ VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl;
+ VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl;
+ VERBOSE(AvgPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
+ VERBOSE(AvgPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
+ VERBOSE(AvgPool2D) << "PAD(T): " << padding.top << std::endl;
+ VERBOSE(AvgPool2D) << "PAD(B): " << padding.bottom << std::endl;
+ VERBOSE(AvgPool2D) << "PAD(L): " << padding.left << std::endl;
+ VERBOSE(AvgPool2D) << "PAD(R): " << padding.right << std::endl;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ ::arm_compute::PoolingLayerInfo info{
+ ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{kw, kh},
+ acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
+
+ auto fn = std::make_unique<::arm_compute::CLPoolingLayer>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
+
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+}
+
+void KernelGenerator::visit(const ir::operation::Concat &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+
+ std::vector<ir::OperandIndex> input_indexes;
+
+ for (const auto &input : node.getInputs())
+ input_indexes.emplace_back(input);
+
+ const auto axis = node.param().axis;
+
+ // Concat elimination check
+ bool eliminated = _tensor_builder->areSubTensorsOf(ofm_index, node.getInputs());
+ if (eliminated)
+ {
+ // If concat eliminated, return a NOP IFunction
+ VERBOSE(acl_cl_KernelGenerator_Concat) << "Concat eliminated" << std::endl;
+ _return_fn = std::make_unique<exec::NopFunction>();
+ return;
+ }
+
+ auto output_alloc = _tensor_builder->at(ofm_index).get();
+ std::vector<::arm_compute::ICLTensor *> input_tensors;
+ for (auto &ifm_ind : input_indexes)
+ input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
+
+ std::unique_ptr<::arm_compute::IFunction> fn;
+ if (input_indexes.size() < 2)
+ {
+ auto l = std::make_unique<::arm_compute::CLCopy>();
+ l->configure(input_tensors.at(0), output_alloc->handle());
+ fn = std::move(l);
+ }
+ else
+ {
+ auto l = std::make_unique<::arm_compute::CLConcatenateLayer>();
+ const auto rank = node.param().rank;
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = output_alloc->layout();
+ const auto fixed_axis =
+ acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
+ l->configure(input_tensors, output_alloc->handle(), fixed_axis);
+ fn = std::move(l);
+ }
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::FullyConnected &node)
+{
+ using ir::operation::FullyConnected;
+
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+ const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+ const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+
+ const auto input_rank = _ctx.at(input_index).shape().rank();
+
+ const auto output_size =
+ _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
+ UNUSED_RELEASE(output_size);
+ assert(_ctx.at(bias_index).shape().dim(0) == output_size);
+ assert(_ctx.at(weight_index).shape().dim(0) == output_size);
+ const auto batch_size =
+ _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 2);
+ const auto input_size =
+ _ctx.at(weight_index).shape().dim(_ctx.at(weight_index).shape().rank() - 1);
+
+ // Check for reshaping input's shape into rank-2
+ bool needs_reshape = false;
+ ir::Shape reshape(2);
+ if (input_rank == 3 || input_rank == 4)
+ {
+ const auto &ifm_shape = _ctx.at(input_index).shape();
+ auto feature_size = 1;
+ for (int i = 0; i < ifm_shape.rank(); ++i)
+ {
+ feature_size *= ifm_shape.dim(i);
+ }
+
+ UNUSED_RELEASE(feature_size);
+ assert(feature_size == batch_size * input_size);
+
+ // for reshaping
+ needs_reshape = true;
+ reshape.dim(0) = batch_size; /* H */
+ reshape.dim(1) = input_size; /* W */
+ }
+
+ const auto activation = node.param().activation;
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ const auto input_alloc = _tensor_builder->at(input_index).get();
+ const auto weight_alloc = _tensor_builder->at(weight_index).get();
+ const auto bias_alloc = _tensor_builder->at(bias_index).get();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto acl_layout = output_alloc->handle()->info()->data_layout();
+
+ auto fn = std::make_unique<arm_compute::CLFullyConnectedReshapingLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
+
+ arm_compute::CLFullyConnectedReshapingLayer::KernelType kernel_type =
+ arm_compute::CLFullyConnectedReshapingLayer::KernelType::GENERAL;
+ if (_ctx.at(weight_index).isConstant())
+ {
+ kernel_type = arm_compute::CLFullyConnectedReshapingLayer::KernelType::PREPROCESSED_WEIGHTS;
+ assert(_ctx.at(weight_index).data());
+ }
+ fn->configure(
+ input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), output_alloc->handle(),
+ needs_reshape,
+ ::onert::backend::acl_common::asTensorShape(
+ reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
+ kernel_type);
+
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclClFunction(std::move(fn)),
+ ActivationBuilder::generate(activation, output_alloc->handle()));
+}
+
+void KernelGenerator::visit(const ir::operation::Mul &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
+
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLPixelWiseMultiplication>();
+
+ fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), 1.0, // scale
+ arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+}
+
+void KernelGenerator::visit(const ir::operation::ReduceSum &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ReduceSum::Input::INPUT)};
+ const auto &axes{node.param().axes};
+ const auto keep_dims{node.param().keep_dims};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = input_alloc->layout();
+
+ // Convert to ACL axes taking into account negative values and possible duplicates.
+ std::set<std::uint32_t> acl_axes;
+ const int input_rank = node.param().rank;
+ for (int axis : axes)
+ {
+ if (axis < 0)
+ axis += input_rank;
+ acl_axes.insert(
+ acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value());
+ }
+
+ auto fn = std::make_unique<::arm_compute::CLReduceOperation>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
+
+ fn->configure(input_alloc->handle(), output_alloc->handle(), acl_axes, keep_dims,
+ ::arm_compute::ReduceOperation::SUM);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Reshape &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ // NOTE This operation must not be changed the layout from frontend to backend
+ // So, PermutationOperationPass makes layouts of frontend and backend the same.
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = output_alloc->layout();
+ assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
+ frontend_layout == backend_layout);
+ UNUSED_RELEASE(frontend_layout);
+ UNUSED_RELEASE(backend_layout);
+
+ auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
+
+ fn->configure(input_alloc->handle(), output_alloc->handle());
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Squeeze &node)
+{
+ // Squeeze is identical to reshape except that it has an optional dimensions input.
+ // In addition, optional dims_index is ignored since output tensor already has squeezed shape
+ // by freezer and toco
+ // TODO Support multi-layout for frontend and backend
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
+ const auto dims{node.param().dims};
+ const auto ndim{node.param().ndim};
+ (void)dims;
+ (void)ndim;
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+ auto fn = std::make_unique<arm_compute::CLReshapeLayer>();
+ fn->configure(input_alloc->handle(), output_alloc->handle());
+ auto acl_fn = asAclClFunction(std::move(fn));
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Tanh &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<arm_compute::CLActivationLayer>();
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
+
+ fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Softmax &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
+
+ const auto beta = node.param().beta;
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLSoftmaxLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
+
+ fn->configure(input_alloc->handle(), output_alloc->handle(), beta);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Slice &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
+ const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
+ const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
+
+ auto outputData_alloc = _tensor_builder->at(output_index).get();
+ auto inputData_alloc = _tensor_builder->at(input_index).get();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = inputData_alloc->layout();
+
+ // Set initializers for indices data such as order of inputData
+ int input_rank = node.param().rank;
+ std::vector<int32_t> starts;
+ std::vector<int32_t> ends;
+ starts.resize(input_rank, 0);
+ ends.resize(input_rank, 0);
+ {
+ assert(_ctx.at(begins_index).data());
+ assert(_ctx.at(sizes_index).data());
+ auto beginData_base = _ctx.at(begins_index).data()->base();
+ auto sizeData_base = _ctx.at(sizes_index).data()->base();
+ const int beginData_size = _ctx.at(begins_index).shape().num_elements();
+ const int sizeData_size = _ctx.at(sizes_index).shape().num_elements();
+
+ using ir::DataType;
+
+ UNUSED_RELEASE(beginData_size);
+ UNUSED_RELEASE(sizeData_size);
+
+ assert(_ctx.at(begins_index).typeInfo().type() == DataType::INT32);
+ assert(_ctx.at(sizes_index).typeInfo().type() == DataType::INT32);
+ assert(beginData_size == input_rank);
+ assert(sizeData_size == input_rank);
+
+ assert(beginData_base != nullptr);
+ for (int n = 0; n < input_rank; ++n)
+ {
+ auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
+ backend_layout)
+ .value();
+
+ int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n);
+ starts[axis] = begin_value;
+
+ int32_t size_value = *(reinterpret_cast<const int32_t *>(sizeData_base) + n);
+ ends[axis] = begin_value + size_value;
+ }
+ }
+
+ ::arm_compute::Coordinates starts_set;
+ ::arm_compute::Coordinates ends_set;
+
+ for (size_t i = 0; i < starts.size(); ++i)
+ {
+ starts_set.set(i, starts[i]);
+ ends_set.set(i, ends[i]);
+ }
+
+ auto fn = std::make_unique<::arm_compute::CLSlice>();
+
+ fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::StridedSlice &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
+ const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
+ const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
+ const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
+
+ auto outputData_alloc = _tensor_builder->at(output_index).get();
+ auto inputData_alloc = _tensor_builder->at(input_index).get();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = inputData_alloc->layout();
+
+ // Set initializers for indices data such as order of inputData
+ int input_rank = node.param().rank;
+ std::vector<int32_t> starts;
+ std::vector<int32_t> ends;
+ std::vector<int32_t> strides;
+ starts.resize(input_rank, 0);
+ ends.resize(input_rank, 0);
+ strides.resize(input_rank, 0);
+ {
+ assert(_ctx.at(starts_index).data());
+ assert(_ctx.at(ends_index).data());
+ assert(_ctx.at(strides_index).data());
+ auto startData_base = _ctx.at(starts_index).data()->base();
+ auto endData_base = _ctx.at(ends_index).data()->base();
+ auto stridesData_base = _ctx.at(strides_index).data()->base();
+ const int startData_size = _ctx.at(starts_index).shape().num_elements();
+ const int endData_size = _ctx.at(ends_index).shape().num_elements();
+ const int stridesData_size = _ctx.at(strides_index).shape().num_elements();
+
+ using ir::DataType;
+
+ UNUSED_RELEASE(startData_size);
+ UNUSED_RELEASE(endData_size);
+ UNUSED_RELEASE(stridesData_size);
+
+ assert(_ctx.at(starts_index).typeInfo().type() == DataType::INT32);
+ assert(_ctx.at(ends_index).typeInfo().type() == DataType::INT32);
+ assert(_ctx.at(strides_index).typeInfo().type() == DataType::INT32);
+ assert(startData_size == input_rank);
+ assert(endData_size == input_rank);
+ assert(stridesData_size == input_rank);
+
+ assert(startData_base != nullptr);
+ for (int n = 0; n < input_rank; ++n)
+ {
+ auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
+ backend_layout)
+ .value();
+
+ int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n);
+ starts[axis] = start_value;
+
+ int32_t end_value = *(reinterpret_cast<const int32_t *>(endData_base) + n);
+ ends[axis] = end_value;
+
+ int32_t strides_value = *(reinterpret_cast<const int32_t *>(stridesData_base) + n);
+ strides[axis] = strides_value;
+ }
+ }
+
+ // Set mask bits such as order of inputData
+ const auto begin_mask = acl_common::ReorderBits<int32_t>(node.param().begin_mask, input_rank,
+ frontend_layout, backend_layout);
+ const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank,
+ frontend_layout, backend_layout);
+ const auto shrink_axis_mask = acl_common::ReorderBits<int32_t>(
+ node.param().shrink_axis_mask, input_rank, frontend_layout, backend_layout);
+
+ ::arm_compute::Coordinates starts_set;
+ ::arm_compute::Coordinates ends_set;
+ ::arm_compute::BiStrides strides_set;
+
+ for (size_t i = 0; i < starts.size(); ++i)
+ {
+ starts_set.set(i, starts[i]);
+ ends_set.set(i, ends[i]);
+ strides_set.set(i, strides[i]);
+ }
+
+ auto fn = std::make_unique<::arm_compute::CLStridedSlice>();
+
+ fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set,
+ strides_set, begin_mask, end_mask, shrink_axis_mask);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Transpose &node)
+{
+ const auto ofm_idx{node.getOutputs().at(0)};
+ const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
+ const auto &perm{node.param().perm};
+
+ const auto rank = node.param().rank;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = ifm_alloc->layout();
+
+ std::vector<std::int32_t> pv(perm.cbegin(), perm.cend());
+ // Reversed
+ auto backend_pv = ::onert::backend::acl_common::getARMComputePermutationVector(
+ rank, pv, frontend_layout, backend_layout);
+
+ auto fn = std::make_unique<::arm_compute::CLPermute>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), backend_pv);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Add &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
+
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLArithmeticAddition>();
+
+ fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+ arm_compute::ConvertPolicy::SATURATE);
+
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+}
+
+void KernelGenerator::visit(const ir::operation::Sub &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
+
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLArithmeticSubtraction>();
+
+ fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+ arm_compute::ConvertPolicy::SATURATE);
+
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+}
+
+void KernelGenerator::visit(const ir::operation::Div &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
+
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLArithmeticDivision>();
+
+ fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+}
+
+void KernelGenerator::visit(const ir::operation::Exp &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLExpLayer>();
+
+ fn->configure(input_alloc->handle(), output_alloc->handle());
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
+ const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
+ const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto gamma_alloc = _tensor_builder->at(gamma_index).get();
+ auto beta_alloc = _tensor_builder->at(beta_index).get();
+ auto epsilon = node.param().epsilon;
+ auto activation = node.param().activation;
+
+ auto fn = std::make_unique<::arm_compute::CLInstanceNormalizationLayerEx>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), gamma_alloc->handle(),
+ beta_alloc->handle(), epsilon);
+
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+}
+
+void KernelGenerator::visit(const ir::operation::Logistic &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
+
+ auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
+ const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input0_alloc = _tensor_builder->at(input0_index).get();
+ auto input1_alloc = _tensor_builder->at(input1_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLBinaryLogicalOp>();
+
+ fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(),
+ ::arm_compute::BinaryLogicalOperation::AND);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::LSTM &node)
+{
+ // TODO Support dynamic rnn
+ // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
+ const auto scratch_buffer_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+ const auto output_state_out_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+ const auto cell_state_out_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+ const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
+
+ const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
+ const auto input_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
+ const auto input_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+ const auto input_to_cell_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+ const auto input_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+ const auto recurrent_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
+ const auto recurrent_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+ const auto recurrent_to_cell_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+ const auto recurrent_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+ const auto cell_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
+ const auto cell_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
+ const auto cell_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
+ const auto input_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
+ const auto forget_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+ const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
+ const auto output_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+ const auto projection_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
+ const auto projection_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
+ const auto output_state_in_index{
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+ const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
+ const auto cell_threshold = node.param().cell_threshold;
+ const auto projection_threshold = node.param().projection_threshold;
+
+ bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(input_to_input_weights_index).shape().dim(1) != 0;
+ bool has_recurrent_to_input_weights =
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+ bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
+ bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
+ bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(projection_weights_index).shape().dim(1) != 0;
+ bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0);
+
+ // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
+ // true: no CIFG
+ // false: CIFG
+ // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
+ bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
+
+ // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
+ // But the cell_to_input_weights does not exist in regular CIFG although peephole.
+ // true: peephole
+ // false: no peephole
+ bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
+
+ // NOTE Although the projection weights has data the projection bias may not have data.
+ bool has_projection_param = has_projection_weights;
+
+ const auto activation = node.param().activation;
+ const auto cell_clip = cell_threshold;
+ const auto projection_clip = projection_threshold;
+ assert(cell_clip >= 0.f && projection_clip >= 0.f);
+
+ auto scratch_buffer_alloc = _tensor_builder->at(scratch_buffer_index).get();
+ auto output_state_out_alloc = _tensor_builder->at(output_state_out_index).get();
+ auto cell_state_out_alloc = _tensor_builder->at(cell_state_out_index).get();
+ auto output_alloc = _tensor_builder->at(output_index).get();
+
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto input_to_forget_weights_alloc = _tensor_builder->at(input_to_forget_weights_index).get();
+ auto input_to_cell_weights_alloc = _tensor_builder->at(input_to_cell_weights_index).get();
+ auto input_to_output_weights_alloc = _tensor_builder->at(input_to_output_weights_index).get();
+ auto recurrent_to_forget_weights_alloc =
+ _tensor_builder->at(recurrent_to_forget_weights_index).get();
+ auto recurrent_to_cell_weights_alloc = _tensor_builder->at(recurrent_to_cell_weights_index).get();
+ auto recurrent_to_output_weights_alloc =
+ _tensor_builder->at(recurrent_to_output_weights_index).get();
+
+ auto forget_gate_bias_alloc = _tensor_builder->at(forget_gate_bias_index).get();
+ auto cell_bias_alloc = _tensor_builder->at(cell_bias_index).get();
+ auto output_gate_bias_alloc = _tensor_builder->at(output_gate_bias_index).get();
+ auto output_state_in_alloc = _tensor_builder->at(output_state_in_index).get();
+ auto cell_state_in_alloc = _tensor_builder->at(cell_state_in_index).get();
+
+ auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
+
+ auto fn = std::make_unique<::arm_compute::CLLSTMLayer>();
+
+ ::arm_compute::LSTMParams<::arm_compute::ICLTensor> lstm_params{};
+ if (has_cifg_param)
+ {
+ auto input_to_input_weights_alloc =
+ _tensor_builder->at(input_to_input_weights_index).get(); // optional
+ auto recurrent_to_input_weights_alloc =
+ _tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
+ auto cell_to_input_weights_handle =
+ has_peephole_param ? _tensor_builder->at(cell_to_input_weights_index).get()->handle()
+ : nullptr; // optional (non-cifg && peephole)
+ auto input_gate_bias_alloc = _tensor_builder->at(input_gate_bias_index).get(); // optional
+ lstm_params.set_cifg_params(input_to_input_weights_alloc->handle(),
+ recurrent_to_input_weights_alloc->handle(),
+ cell_to_input_weights_handle, input_gate_bias_alloc->handle());
+ }
+ if (has_peephole_param)
+ {
+ auto cell_to_forget_weights_alloc =
+ _tensor_builder->at(cell_to_forget_weights_index).get(); // optional
+ auto cell_to_output_weights_alloc =
+ _tensor_builder->at(cell_to_output_weights_index).get(); // optional
+ lstm_params.set_peephole_params(cell_to_forget_weights_alloc->handle(),
+ cell_to_output_weights_alloc->handle());
+ }
+ if (has_projection_param)
+ {
+ auto projection_weights_alloc = _tensor_builder->at(projection_weights_index).get(); // optional
+ auto projection_bias_handle = has_projection_bias
+ ? _tensor_builder->at(projection_bias_index).get()->handle()
+ : nullptr; // optional
+ lstm_params.set_projection_params(projection_weights_alloc->handle(), projection_bias_handle);
+ }
+
+ fn->configure(
+ input_alloc->handle(), input_to_forget_weights_alloc->handle(),
+ input_to_cell_weights_alloc->handle(), input_to_output_weights_alloc->handle(),
+ recurrent_to_forget_weights_alloc->handle(), recurrent_to_cell_weights_alloc->handle(),
+ recurrent_to_output_weights_alloc->handle(), forget_gate_bias_alloc->handle(),
+ cell_bias_alloc->handle(), output_gate_bias_alloc->handle(), output_state_in_alloc->handle(),
+ cell_state_in_alloc->handle(), scratch_buffer_alloc->handle(),
+ output_state_out_alloc->handle(), cell_state_out_alloc->handle(), output_alloc->handle(),
+ lstm_params, act_info, cell_clip, projection_clip);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReduceMax &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ReduceMax::Input::INPUT)};
+ const auto &axes{node.param().axes};
+ const auto keep_dims{node.param().keep_dims};
+
+ auto ofm_alloc = _tensor_builder->at(output_index).get();
+ auto ifm_alloc = _tensor_builder->at(input_index).get();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = ifm_alloc->layout();
+
+ // Convert to ACL axes taking into account negative values and possible duplicates.
+ std::set<std::uint32_t> acl_axes;
+ const int ifm_rank = node.param().rank;
+ for (int axis : axes)
+ {
+ if (axis < 0)
+ axis += ifm_rank;
+ acl_axes.insert(
+ acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value());
+ }
+
+ auto fn = std::make_unique<::arm_compute::CLReduceOperation>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), acl_axes, keep_dims,
+ arm_compute::ReduceOperation::MAX);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Comparison &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
+ const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
+
+ const auto comparison_type = node.param().comparison_type;
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input0_alloc = _tensor_builder->at(input0_index).get();
+ auto input1_alloc = _tensor_builder->at(input1_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLComparison>();
+
+ fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(),
+ (arm_compute::ComparisonOperation)comparison_type);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Pack &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ auto axis{node.param().axis};
+
+ const auto output_rank = node.param().rank;
+
+ std::vector<ir::OperandIndex> input_indexes;
+ for (const auto &input_index : node.getInputs())
+ input_indexes.emplace_back(input_index);
+
+ auto output = _tensor_builder->at(output_index).get()->handle();
+ std::vector<arm_compute::ICLTensor *> inputs;
+ for (const auto &input_index : input_indexes)
+ inputs.emplace_back(_tensor_builder->at(input_index)->handle());
+
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
+
+ if (axis < 0)
+ axis += output_rank;
+ axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
+
+ auto fn = std::make_unique<::arm_compute::CLStackLayer>();
+
+ fn->configure(inputs, axis, output);
+
+ _return_fn = asAclClFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::Permute &node)
+{
+ const auto ofm_idx{node.getOutputs().at(0)};
+ const auto ifm_idx{node.getInputs().at(0)};
+ const auto permute_type = node.getPermuteType();
+ auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+ const auto rank = _ctx.at(ofm_idx).shape().rank();
+ assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
+
+ std::unique_ptr<::arm_compute::IFunction> fn;
+ arm_compute::PermutationVector pv;
+ if (permute_type == ir::operation::Permute::Type::NCHW_TO_NHWC && rank == 4)
+ {
+ // WHCN -> CWHN
+ pv = arm_compute::PermutationVector{2, 0, 1};
+
+ auto l = std::make_unique<::arm_compute::CLPermute>();
+
+ l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+
+ fn = std::move(l);
+ }
+ else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
+ {
+ // CWHN -> WHCN
+ pv = arm_compute::PermutationVector{1, 2, 0};
+
+ auto l = std::make_unique<::arm_compute::CLPermute>();
+
+ l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+
+ fn = std::move(l);
+ }
+ else
+ {
+ auto l = std::make_unique<::arm_compute::CLCopy>();
+
+ l->configure(ifm_alloc->handle(), ofm_alloc->handle());
+
+ fn = std::move(l);
+ }
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::RSQRT &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLRsqrtLayer>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+
+ _return_fn = asAclClFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::ReLU &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<arm_compute::CLActivationLayer>();
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+
+ fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+
+ const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLScale>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(),
+ ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
+ ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReLU1 &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
+
+ auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReLU6 &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
+
+ auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::RNN &node)
+{
+ const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
+ const auto hidden_state_out_index{
+ node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
+
+ const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
+ const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
+ const auto recurrent_weights_index{
+ node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
+ const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
+ const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
+
+ const auto activation = node.param().activation;
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto hidden_state_out_alloc = _tensor_builder->at(hidden_state_out_index).get();
+
+ auto input_alloc = _tensor_builder->at(input_index).get();
+ auto weights_alloc = _tensor_builder->at(weights_index).get();
+ auto recurrent_weights_alloc = _tensor_builder->at(recurrent_weights_index).get();
+ auto bias_alloc = _tensor_builder->at(bias_index).get();
+ auto hidden_state_in_alloc = _tensor_builder->at(hidden_state_in_index).get();
+ auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
+
+ auto copy_layer = std::make_unique<::arm_compute::CLCopy>();
+ copy_layer->configure(hidden_state_in_alloc->handle(), hidden_state_out_alloc->handle());
+ _return_fn = asAclClFunction(std::move(copy_layer));
+
+ auto fn = std::make_unique<::arm_compute::CLRNNLayerEx>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
+ fn->configure(input_alloc->handle(), weights_alloc->handle(), recurrent_weights_alloc->handle(),
+ bias_alloc->handle(), hidden_state_out_alloc->handle(), output_alloc->handle(),
+ act_info);
+ _return_fn = asAclClFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::Floor &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLFloor>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
+ const auto block_size_index{
+ node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+ const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto block_size_alloc = _tensor_builder->at(block_size_index).get();
+ auto paddings_alloc = _tensor_builder->at(paddings_index).get();
+
+ assert(_ctx.at(block_size_index).data());
+ assert(_ctx.at(paddings_index).data());
+
+ std::unique_ptr<::arm_compute::IFunction> fn;
+ if (_ctx.at(ofm_index).typeInfo().type() == ir::DataType::QUANT8_ASYMM)
+ {
+ // NOTE CLSpaceToBatchLayer has a bug that padding's values are 0 even when zero point of
+ // QASYMM8 is not 0.
+ auto l = std::make_unique<::arm_compute::CLSpaceToBatchND>();
+ l->configure(ifm_alloc->handle(), block_size_alloc->handle(), paddings_alloc->handle(),
+ ofm_alloc->handle());
+ fn = std::move(l);
+ }
+ else
+ {
+ auto l = std::make_unique<::arm_compute::CLSpaceToBatchLayer>();
+ l->configure(ifm_alloc->handle(), block_size_alloc->handle(), paddings_alloc->handle(),
+ ofm_alloc->handle());
+ fn = std::move(l);
+ }
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
+
+ auto block_size = node.param().block_size;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLSpaceToDepth>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), block_size);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::L2Pool2D::Input::INPUT)};
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+
+ uint32_t kw = node.param().kw;
+ uint32_t kh = node.param().kh;
+ const auto stride = node.param().stride;
+ const auto padding =
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ ::arm_compute::PoolingLayerInfo info{
+ ::arm_compute::PoolingType::L2, ::arm_compute::Size2D{kw, kh},
+ ::onert::backend::acl_common::asPadStrideInfo(padding, stride)};
+
+ auto fn = std::make_unique<::arm_compute::CLPoolingLayer>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
+
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+}
+
+void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
+ const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto lookups_alloc = _tensor_builder->at(lookups_index).get();
+ auto values_alloc = _tensor_builder->at(values_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLEmbeddingLookup>();
+
+ fn->configure(values_alloc->handle(), output_alloc->handle(), lookups_alloc->handle());
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::L2Normalization &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
+
+ // {CL|Neon}L2Normalization performs the reduction only along dimension 0
+ // L2 Normalization always performs the reduction along the depth axis
+ // Thus, we repurpose {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by
+ // choosing normalization parameters as below
+
+ const auto &ifm_shape = _ctx.at(ifm_index).shape();
+ // TODO Support optional constant dimension that normalization would be performed on
+ const auto normalization_axis = node.param().rank - 1;
+ int32_t radius =
+ 2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
+ float alpha = 1.0f; // In the implementation to make alpha_ become 1
+ float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
+ float bias = 0.0f; // Don't offset the reduction.
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
+ radius, alpha, beta, bias, false);
+
+ auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
+{
+ const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
+ const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)};
+
+ const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
+ const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
+ const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto hits_alloc = _tensor_builder->at(hits_index).get();
+
+ auto lookups_alloc = _tensor_builder->at(lookups_index).get();
+ auto keys_alloc = _tensor_builder->at(keys_index).get();
+ auto values_alloc = _tensor_builder->at(values_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLHashtableLookup>();
+
+ fn->configure(lookups_alloc->handle(), keys_alloc->handle(), values_alloc->handle(),
+ output_alloc->handle(), hits_alloc->handle());
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::PReLU &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
+ const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto alpha_alloc = _tensor_builder->at(alpha_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLPReLU>();
+
+ fn->configure(ifm_alloc->handle(), alpha_alloc->handle(), ofm_alloc->handle());
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::TransposeConv &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
+ const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
+
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout);
+
+ const auto stride = node.param().stride;
+
+ assert((node.param().padding.type == ir::PaddingType::SAME) ||
+ (node.param().padding.type == ir::PaddingType::VALID));
+ auto padding = ir::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride,
+ ker_shape.W, ker_shape.H);
+
+ uint32_t invalid_horizontal = 0;
+ uint32_t invalid_vertical = 0;
+ if (node.param().padding.type == ir::PaddingType::VALID)
+ {
+ invalid_horizontal =
+ ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
+ invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
+ }
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ker_alloc = _tensor_builder->at(ker_index).get();
+
+ const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
+
+ auto fn = std::make_unique<::arm_compute::CLTransposeConvLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
+
+ fn->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info,
+ invalid_horizontal, invalid_vertical);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::SQRT &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
+
+ auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
+
+ fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::LogicalOr &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
+ const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input0_alloc = _tensor_builder->at(input0_index).get();
+ auto input1_alloc = _tensor_builder->at(input1_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLBitwiseOr>();
+
+ fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::LogicalNot &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLBitwiseNot>();
+
+ fn->configure(input_alloc->handle(), output_alloc->handle());
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLElementwiseSquaredDiff>();
+
+ fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::TopKV2 &node)
+{
+ const auto outputValues_index{node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_VALUES)};
+ const auto outputIndices_index{
+ node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_INDICES)};
+
+ const auto inputData_index{node.getInputs().at(ir::operation::TopKV2::Input::INPUT)};
+
+ // Currently, we only support the vector input.
+ assert(_ctx.at(inputData_index).shape().rank() == 1 ||
+ _ctx.at(inputData_index).shape().rank() == 2);
+
+ const auto k = node.param().k;
+
+ auto values_alloc = _tensor_builder->at(outputValues_index).get();
+ auto indices_alloc = _tensor_builder->at(outputIndices_index).get();
+ auto input_alloc = _tensor_builder->at(inputData_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLTopKV2>();
+
+ fn->configure(input_alloc->handle(), k, values_alloc->handle(), indices_alloc->handle());
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Gather &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+
+ const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
+ const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape();
+
+ const auto ifm_rank = node.param().rank;
+ const auto axis_raw = node.param().axis;
+ const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
+ const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto indices_alloc = _tensor_builder->at(indices_index).get();
+
+ // NOTE The frontend layout and backend layout must be the same for this operation.
+ // If not the same, we have to add a stage(?) to perform permutation of output tensor. It
+ // is not not efficient even if it works well. If so, it would be better to set the
+ // layout of these backend tensors to the same layout.
+ // There is also one thing we have to think about. This operation depends on the layout of
+ // a model. For example, if a model in NHWC has this operation as output rank == 4, indices
+ // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
+ // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
+ const auto backend_layout = ofm_alloc->layout();
+ UNUSED_RELEASE(backend_layout);
+ assert(backend_layout == ifm_alloc->layout());
+ assert(backend_layout == indices_alloc->layout());
+ assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
+
+ auto fn = std::make_unique<::arm_compute::CLGatherEx>();
+
+ fn->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), axis);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Neg &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLNeg>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Abs &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
+
+ auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
+
+ fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ArgMax &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
+
+ auto ifm_shape = _ctx.at(ifm_index).shape();
+ auto ofm_shape = _ctx.at(ofm_index).shape();
+
+ assert((ifm_shape.rank() - 1) == ofm_shape.rank());
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ const auto ifm_rank = node.param().rank;
+ auto frontend_layout = _current_op_seq_layout;
+ auto backend_layout = ifm_alloc->layout();
+
+ int axis_value = node.param().axis;
+ if (axis_value < 0)
+ {
+ axis_value += ifm_rank;
+ }
+
+ auto acl_axis =
+ acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
+
+ auto fn = std::make_unique<::arm_compute::CLArgOperation>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), {acl_axis},
+ ::arm_compute::ArgOperation::MAX);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Dequantize &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLCast>();
+
+ fn->configure(input_alloc->handle(), output_alloc->handle(), arm_compute::SubDataType::NONE);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Mean &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Mean::Input::INPUT)};
+ const auto &axes{node.param().axes};
+ const auto keep_dims{node.param().keep_dims};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = ifm_alloc->layout();
+
+ // Convert to ACL axes taking into account negative values and possible duplicates.
+ std::set<std::uint32_t> acl_axes;
+ const int ifm_rank = node.param().rank;
+ for (int axis : axes)
+ {
+ if (axis < 0)
+ axis += ifm_rank;
+ acl_axes.insert(
+ acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value());
+ }
+
+ arm_compute::Coordinates reduce_axes;
+ for (const auto axis : acl_axes)
+ {
+ reduce_axes.set(reduce_axes.num_dimensions(), axis);
+ }
+
+ auto fn = std::make_unique<::arm_compute::CLReduceMean>();
+
+ fn->configure(ifm_alloc->handle(), reduce_axes, keep_dims, ofm_alloc->handle());
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{
+ node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
+
+ auto radius = node.param().radius;
+ auto alpha = node.param().alpha;
+ auto beta = node.param().beta;
+ auto bias = node.param().bias;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ const auto norm_info = ::arm_compute::NormalizationLayerInfo(
+ ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
+
+ auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
+
+ auto block_size = node.param().block_size;
+ assert(block_size > 0);
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLDepthToSpace>();
+
+ fn->configure(input_alloc->handle(), output_alloc->handle(), block_size);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReduceMin &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ReduceMin::Input::INPUT)};
+ const auto &axes{node.param().axes};
+ const auto keep_dims{node.param().keep_dims};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = ifm_alloc->layout();
+
+ // Convert to ACL axes taking into account negative values and possible duplicates.
+ std::set<std::uint32_t> acl_axes;
+ const int ifm_rank = node.param().rank;
+ for (int axis : axes)
+ {
+ if (axis < 0)
+ axis += ifm_rank;
+ acl_axes.insert(
+ acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value());
+ }
+
+ auto fn = std::make_unique<::arm_compute::CLReduceOperation>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), acl_axes, keep_dims,
+ ::arm_compute::ReduceOperation::MIN);
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Split &node)
+{
+ const auto ifm_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
+
+ assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
+
+ const auto ifm_rank = node.param().rank;
+ std::vector<ir::OperandIndex> output_indexes;
+ for (const auto &output : node.getOutputs())
+ output_indexes.emplace_back(output);
+
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ std::vector<arm_compute::ICLTensor *> output_allocs;
+ for (const auto &ofm_ind : output_indexes)
+ output_allocs.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = ifm_alloc->layout();
+ auto axis = node.param().axis;
+ if (axis < 0)
+ axis += ifm_rank;
+ axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
+
+ auto fn = std::make_unique<::arm_compute::CLSplit>();
+
+ fn->configure(ifm_alloc->handle(), output_allocs, axis);
+
+ _return_fn = asAclClFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::Unpack &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
+ auto axis{node.param().axis};
+
+ const auto input_rank = node.param().rank;
+
+ std::vector<ir::OperandIndex> output_indexes;
+ for (const auto &output_index : node.getOutputs())
+ output_indexes.emplace_back(output_index);
+
+ auto input = _tensor_builder->at(input_index).get()->handle();
+ std::vector<arm_compute::ICLTensor *> outputs;
+ for (const auto &output_index : output_indexes)
+ outputs.emplace_back(_tensor_builder->at(output_index)->handle());
+
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ if (axis < 0)
+ axis += input_rank;
+ axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
+
+ auto fn = std::make_unique<::arm_compute::CLUnstack>();
+
+ fn->configure(input, outputs, axis);
+
+ _return_fn = asAclClFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::Pad &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
+ const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
+ const auto output_index{node.getOutputs().at(0)};
+ assert(_ctx.at(pad_index).data());
+
+ auto rank = node.param().rank;
+ auto pad_base = _ctx.at(pad_index).data()->base();
+
+ auto input_type = _ctx.at(input_index).typeInfo();
+ auto data_type = acl_common::asDataType(input_type.type());
+ auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset());
+ const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info);
+
+ auto input = _tensor_builder->at(input_index).get()->handle();
+ auto output = _tensor_builder->at(output_index).get()->handle();
+
+ ::arm_compute::PaddingList padding_list;
+ padding_list.resize(rank);
+ for (int32_t n = 0; n < rank; ++n)
+ {
+ const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
+
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ const auto axis =
+ acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
+ padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
+ }
+ auto fn = std::make_unique<::arm_compute::CLPadLayer>();
+ fn->configure(input, output, padding_list, pixel_value);
+
+ _return_fn = asAclClFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::Min &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLElementwiseMin>();
+
+ fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Max &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::arm_compute::CLElementwiseMax>();
+
+ fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+
+ auto acl_fn = asAclClFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.h b/runtime/onert/backend/acl_cl/KernelGenerator.h
new file mode 100644
index 000000000..c7e1a2178
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
+
+#include <backend/IKernelGenerator.h>
+
+#include "ir/Operands.h"
+#include "TensorBuilder.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+class KernelGenerator : public IKernelGenerator
+{
+public:
+ KernelGenerator(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder);
+
+ void visit(const ir::OpSequence &) override;
+ void visit(const ir::operation::BatchToSpaceND &) override;
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::DepthwiseConv2D &) override;
+ void visit(const ir::operation::MaxPool2D &) override;
+ void visit(const ir::operation::AvgPool2D &) override;
+ void visit(const ir::operation::Concat &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::Mul &) override;
+ void visit(const ir::operation::ReduceSum &) override;
+ void visit(const ir::operation::Reshape &) override;
+ void visit(const ir::operation::Squeeze &) override;
+ void visit(const ir::operation::Tanh &) override;
+ void visit(const ir::operation::Softmax &) override;
+ void visit(const ir::operation::Slice &) override;
+ void visit(const ir::operation::StridedSlice &) override;
+ void visit(const ir::operation::Transpose &) override;
+ void visit(const ir::operation::Add &) override;
+ void visit(const ir::operation::Sub &) override;
+ void visit(const ir::operation::Cast &) override;
+ void visit(const ir::operation::Div &) override;
+ void visit(const ir::operation::Exp &) override;
+ void visit(const ir::operation::InstanceNorm &) override;
+ void visit(const ir::operation::Logistic &) override;
+ void visit(const ir::operation::ReduceMax &) override;
+ void visit(const ir::operation::Comparison &) override;
+ void visit(const ir::operation::LogicalAnd &) override;
+ void visit(const ir::operation::LSTM &) override;
+ void visit(const ir::operation::Pack &) override;
+ void visit(const ir::operation::Permute &) override;
+ void visit(const ir::operation::RSQRT &) override;
+ void visit(const ir::operation::ReLU &) override;
+ void visit(const ir::operation::ResizeBilinear &) override;
+ void visit(const ir::operation::ReLU1 &) override;
+ void visit(const ir::operation::ReLU6 &) override;
+ void visit(const ir::operation::RNN &) override;
+ void visit(const ir::operation::Floor &) override;
+ void visit(const ir::operation::SpaceToBatchND &) override;
+ void visit(const ir::operation::SpaceToDepth &) override;
+ void visit(const ir::operation::L2Pool2D &) override;
+ void visit(const ir::operation::EmbeddingLookup &) override;
+ void visit(const ir::operation::L2Normalization &) override;
+ void visit(const ir::operation::HashtableLookup &) override;
+ void visit(const ir::operation::PReLU &) override;
+ void visit(const ir::operation::TransposeConv &) override;
+ void visit(const ir::operation::SQRT &) override;
+ void visit(const ir::operation::LogicalOr &) override;
+ void visit(const ir::operation::LogicalNot &) override;
+ void visit(const ir::operation::SquaredDifference &) override;
+ void visit(const ir::operation::TopKV2 &) override;
+ void visit(const ir::operation::Gather &) override;
+ void visit(const ir::operation::Neg &) override;
+ void visit(const ir::operation::Abs &) override;
+ void visit(const ir::operation::ArgMax &) override;
+ void visit(const ir::operation::Dequantize &) override;
+ void visit(const ir::operation::Mean &) override;
+ void visit(const ir::operation::LocalResponseNormalization &) override;
+ void visit(const ir::operation::DepthToSpace &) override;
+ void visit(const ir::operation::ReduceMin &) override;
+ void visit(const ir::operation::Split &) override;
+ void visit(const ir::operation::Unpack &) override;
+ void visit(const ir::operation::Pad &) override;
+ void visit(const ir::operation::Min &) override;
+ void visit(const ir::operation::Max &) override;
+
+private:
+ const ir::Operands &_ctx;
+ std::shared_ptr<TensorBuilder> _tensor_builder;
+ ir::Layout _current_op_seq_layout;
+};
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/backend/acl_cl/Optimizer.cc b/runtime/onert/backend/acl_cl/Optimizer.cc
new file mode 100644
index 000000000..6ba3143e8
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/Optimizer.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Optimizer.h"
+
+#include "ParentInfo.h"
+
+#include <cassert>
+#include <ir/LoweredGraph.h>
+#include <util/logging.h>
+#include "AclSubTensorAnalyzer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+Optimizer::Optimizer(BackendContext *context)
+ : _context{context},
+ _tensor_builder{std::dynamic_pointer_cast<TensorBuilder>(context->tensor_builder)}
+{
+ assert(context);
+}
+
+void Optimizer::optimize()
+{
+ // Concat elimination (build subtensor info)
+ {
+ acl_common::AclSubTensorAnalyzer sa{*_context->graph()};
+ for (auto op_info : _context->operation_list())
+ {
+ auto &op = _context->graph()->operations().at(op_info.index);
+ sa.setLayout(op_info.layout);
+ op.accept(sa);
+ }
+
+ _tensor_builder->parent_map(sa.releaseParentMap());
+ }
+}
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/Optimizer.h b/runtime/onert/backend/acl_cl/Optimizer.h
new file mode 100644
index 000000000..18d38ec1b
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/Optimizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__
+#define __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__
+
+#include <backend/IOptimizer.h>
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+class Optimizer : public IOptimizer
+{
+public:
+ Optimizer(BackendContext *context);
+
+ void optimize() override;
+
+private:
+ BackendContext *_context;
+ std::shared_ptr<TensorBuilder> _tensor_builder;
+};
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__
diff --git a/runtime/onert/backend/acl_cl/ShapeFixer.cc b/runtime/onert/backend/acl_cl/ShapeFixer.cc
new file mode 100644
index 000000000..d73cf151d
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/ShapeFixer.cc
@@ -0,0 +1,431 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ShapeFixer.h"
+
+#include <arm_compute/runtime/CL/CLFunctions.h> // Include all ARM Compute CL functions
+#include <arm_compute/runtime/CL/CLFunctionsEx.h> // Include all ARM Compute EX CL functions
+
+#include <AclFunction.h>
+#include <Convert.h>
+#include <Swizzle.h>
+
+#include "ir/Index.h"
+#include "exec/NopFunction.h"
+#include "util/logging.h"
+#include "util/Utils.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+using ::onert::backend::acl_common::asAclFunction;
+
+ShapeFixer::ShapeFixer(const ir::Operands &ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder)
+ : _ctx(ctx), _tensor_builder(tensor_builder)
+{
+ assert(tensor_builder);
+}
+
+void ShapeFixer::visit(const ir::operation::BatchToSpaceND &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
+ _tensor_builder->dimCorrection(ofm_index, false);
+ _tensor_builder->dimCorrection(ifm_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::Cast &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Conv2D &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::DepthwiseConv2D &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::MaxPool2D &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::AvgPool2D &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Concat &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ _tensor_builder->dimCorrection(ofm_index, false);
+ for (const auto &input : node.getInputs())
+ _tensor_builder->dimCorrection(input, false);
+}
+
+void ShapeFixer::visit(const ir::operation::FullyConnected &node)
+{
+ using ir::operation::FullyConnected;
+ const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+ const auto input_rank = _ctx.at(input_index).shape().rank();
+ // Check for reshaping input's shape into rank-2
+ if (input_rank == 3 || input_rank == 4)
+ _tensor_builder->dimCorrection(input_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::Mul &node)
+{
+ const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
+
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::ReduceSum &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Reshape &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
+ _tensor_builder->dimCorrection(input_index, false);
+ _tensor_builder->dimCorrection(output_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::Squeeze &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ if (_ctx.at(output_index).shape().rank() == 0)
+ const_cast<ir::Shape &>(_ctx.at(output_index).shape()).extendRank(1);
+ const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
+ _tensor_builder->dimCorrection(input_index, false);
+ _tensor_builder->dimCorrection(output_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::Tanh &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Softmax &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Slice &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::StridedSlice &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
+ _tensor_builder->dimCorrection(ofm_index, false);
+ _tensor_builder->dimCorrection(ifm_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::Transpose &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Add &node)
+{
+ const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
+
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+ const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::Sub &node)
+{
+ const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
+
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::Div &node)
+{
+ const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
+
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::Exp &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::InstanceNorm &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Logistic &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::LogicalAnd &node)
+{
+ const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
+ const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
+
+ if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank());
+
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<ir::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::LSTM &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::ReduceMax &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Comparison &node)
+{
+ const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
+ const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
+
+ if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank());
+
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<ir::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::Pack &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ _tensor_builder->dimCorrection(ofm_index, false);
+ for (const auto &inputs : node.getInputs())
+ {
+ _tensor_builder->dimCorrection(inputs, false);
+ const auto ofm_rank = _ctx.at(ofm_index).shape().rank();
+
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<ir::Shape &>(_ctx.at(inputs).shape()).extendRank(ofm_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::Permute &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::RSQRT &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::ReLU &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::ResizeBilinear &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::ReLU1 &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::ReLU6 &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::RNN &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Floor &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::SpaceToBatchND &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
+ _tensor_builder->dimCorrection(ofm_index, false);
+ _tensor_builder->dimCorrection(ifm_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::SpaceToDepth &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
+ _tensor_builder->dimCorrection(ofm_index, false);
+ _tensor_builder->dimCorrection(ifm_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::L2Pool2D &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::EmbeddingLookup &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
+ _tensor_builder->dimCorrection(values_index, false);
+ _tensor_builder->dimCorrection(output_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::L2Normalization &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::HashtableLookup &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::PReLU &node)
+{
+ const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
+ const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
+
+ if (!(_ctx.at(ifm_index).shape() == _ctx.at(alpha_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(ifm_index).shape().rank(), _ctx.at(alpha_index).shape().rank());
+ const_cast<ir::Shape &>(_ctx.at(ifm_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(alpha_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::TransposeConv &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::SQRT &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::LogicalOr &node)
+{
+ const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
+ const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
+
+ if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank());
+ const_cast<ir::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::LogicalNot &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::SquaredDifference &node)
+{
+ const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
+
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+ const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::TopKV2 &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Gather &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
+ const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
+ _tensor_builder->dimCorrection(ofm_index, false);
+ _tensor_builder->dimCorrection(ifm_index, false);
+ _tensor_builder->dimCorrection(indices_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::Neg &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Abs &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::ArgMax &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
+ _tensor_builder->dimCorrection(ofm_index, false);
+ _tensor_builder->dimCorrection(ifm_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::Dequantize &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Mean &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::LocalResponseNormalization &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::DepthToSpace &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::ReduceMin &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Split &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
+ _tensor_builder->dimCorrection(input_index, false);
+ for (const auto &output : node.getOutputs())
+ _tensor_builder->dimCorrection(output, false);
+}
+
+void ShapeFixer::visit(const ir::operation::Unpack &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
+ _tensor_builder->dimCorrection(input_index, false);
+ for (const auto &output_index : node.getOutputs())
+ _tensor_builder->dimCorrection(output_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::Pad &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+ _tensor_builder->dimCorrection(input_index, false);
+ _tensor_builder->dimCorrection(output_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::Min &node)
+{
+ const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
+
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::Max &node)
+{
+ const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
+
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/ShapeFixer.h b/runtime/onert/backend/acl_cl/ShapeFixer.h
new file mode 100644
index 000000000..5b3e9b248
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/ShapeFixer.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_CL_SHAPE_FIXER_H__
+#define __ONERT_BACKEND_ACL_CL_SHAPE_FIXER_H__
+
+#include <backend/IShapeFixer.h>
+
+#include "ir/Operands.h"
+#include "TensorBuilder.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+class ShapeFixer : public IShapeFixer
+{
+public:
+ ShapeFixer(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder);
+
+ void visit(const ir::operation::BatchToSpaceND &) override;
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::DepthwiseConv2D &) override;
+ void visit(const ir::operation::MaxPool2D &) override;
+ void visit(const ir::operation::AvgPool2D &) override;
+ void visit(const ir::operation::Concat &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::Mul &) override;
+ void visit(const ir::operation::ReduceSum &) override;
+ void visit(const ir::operation::Reshape &) override;
+ void visit(const ir::operation::Squeeze &) override;
+ void visit(const ir::operation::Tanh &) override;
+ void visit(const ir::operation::Softmax &) override;
+ void visit(const ir::operation::Slice &) override;
+ void visit(const ir::operation::StridedSlice &) override;
+ void visit(const ir::operation::Transpose &) override;
+ void visit(const ir::operation::Add &) override;
+ void visit(const ir::operation::Sub &) override;
+ void visit(const ir::operation::Cast &) override;
+ void visit(const ir::operation::Div &) override;
+ void visit(const ir::operation::Exp &) override;
+ void visit(const ir::operation::InstanceNorm &) override;
+ void visit(const ir::operation::Logistic &) override;
+ void visit(const ir::operation::ReduceMax &) override;
+ void visit(const ir::operation::Comparison &) override;
+ void visit(const ir::operation::LogicalAnd &) override;
+ void visit(const ir::operation::LSTM &) override;
+ void visit(const ir::operation::Pack &) override;
+ void visit(const ir::operation::Permute &) override;
+ void visit(const ir::operation::RSQRT &) override;
+ void visit(const ir::operation::ReLU &) override;
+ void visit(const ir::operation::ResizeBilinear &) override;
+ void visit(const ir::operation::ReLU1 &) override;
+ void visit(const ir::operation::ReLU6 &) override;
+ void visit(const ir::operation::RNN &) override;
+ void visit(const ir::operation::Floor &) override;
+ void visit(const ir::operation::SpaceToBatchND &) override;
+ void visit(const ir::operation::SpaceToDepth &) override;
+ void visit(const ir::operation::L2Pool2D &) override;
+ void visit(const ir::operation::EmbeddingLookup &) override;
+ void visit(const ir::operation::L2Normalization &) override;
+ void visit(const ir::operation::HashtableLookup &) override;
+ void visit(const ir::operation::PReLU &) override;
+ void visit(const ir::operation::TransposeConv &) override;
+ void visit(const ir::operation::SQRT &) override;
+ void visit(const ir::operation::LogicalOr &) override;
+ void visit(const ir::operation::LogicalNot &) override;
+ void visit(const ir::operation::SquaredDifference &) override;
+ void visit(const ir::operation::TopKV2 &) override;
+ void visit(const ir::operation::Gather &) override;
+ void visit(const ir::operation::Neg &) override;
+ void visit(const ir::operation::Abs &) override;
+ void visit(const ir::operation::ArgMax &) override;
+ void visit(const ir::operation::Dequantize &) override;
+ void visit(const ir::operation::Mean &) override;
+ void visit(const ir::operation::LocalResponseNormalization &) override;
+ void visit(const ir::operation::DepthToSpace &) override;
+ void visit(const ir::operation::ReduceMin &) override;
+ void visit(const ir::operation::Split &) override;
+ void visit(const ir::operation::Unpack &) override;
+ void visit(const ir::operation::Pad &) override;
+ void visit(const ir::operation::Min &) override;
+ void visit(const ir::operation::Max &) override;
+
+private:
+ const ir::Operands &_ctx;
+ std::shared_ptr<TensorBuilder> _tensor_builder;
+};
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_CL_SHAPE_FIXER_H__
diff --git a/runtime/onert/backend/acl_cl/TensorBuilder.h b/runtime/onert/backend/acl_cl/TensorBuilder.h
new file mode 100644
index 000000000..91502d39a
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/TensorBuilder.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_CL_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_ACL_CL_TENSOR_BUILDER_H__
+
+#include <AclTensorBuilder.h>
+
+#include "operand/CLTensor.h"
+#include "operand/CLSubTensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+using TensorBuilder =
+ acl_common::AclTensorBuilder<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_CL_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/acl_cl/TensorManager.h b/runtime/onert/backend/acl_cl/TensorManager.h
new file mode 100644
index 000000000..bdbd0364e
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/TensorManager.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_CL_TENSOR_MANAGER_H__
+#define __ONERT_BACKEND_ACL_CL_TENSOR_MANAGER_H__
+
+#include <arm_compute/runtime/CL/CLBufferAllocator.h>
+#include <arm_compute/runtime/PoolManager.h>
+#include <arm_compute/runtime/BlobLifetimeManager.h>
+#include <arm_compute/runtime/MemoryManagerOnDemand.h>
+#include <arm_compute/runtime/MemoryGroup.h>
+
+#include <AclMemoryManager.h>
+#include <AclLinearMemoryManager.h>
+#include <AclInternalBufferManager.h>
+#include <AclTensorManager.h>
+
+#include "operand/CLTensor.h"
+#include "operand/CLSubTensor.h"
+
+#include "util/logging.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+using MemoryManager =
+ acl_common::AclMemoryManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
+
+using LinearMemoryManager = acl_common::AclLinearMemoryManager<
+ operand::ICLTensor, operand::CLTensor, operand::CLSubTensor,
+ ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
+ ::arm_compute::BlobLifetimeManager, ::arm_compute::CLBufferAllocator,
+ ::arm_compute::MemoryGroup>;
+
+using InternalBufferManager = acl_common::AclInternalBufferManager<
+ ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
+ ::arm_compute::BlobLifetimeManager, ::arm_compute::CLBufferAllocator>;
+
+using TensorManager =
+ acl_common::AclTensorManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
+
+TensorManager *createTensorManager(bool is_linear_executor)
+{
+ if (is_linear_executor)
+ {
+ VERBOSE(acl_cl_createTensorManager) << "AclTensorManager as Linear" << std::endl;
+ return new TensorManager(new MemoryManager(), new LinearMemoryManager(),
+ new InternalBufferManager());
+ }
+ else
+ {
+ VERBOSE(acl_cl_createTensorManager) << "AclTensorManager" << std::endl;
+ return new TensorManager(new MemoryManager(), new MemoryManager(), new InternalBufferManager());
+ }
+}
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_CL_TENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/acl_cl/acl_cl.cc b/runtime/onert/backend/acl_cl/acl_cl.cc
new file mode 100644
index 000000000..88378b13a
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/acl_cl.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <util/logging.h>
+
+#include "Backend.h"
+
+extern "C" {
+onert::backend::Backend *onert_backend_create()
+{
+ VERBOSE(onert_backend_create) << "'acl_cl' loaded\n";
+ return new onert::backend::acl_cl::Backend;
+}
+
+void onert_backend_destroy(onert::backend::Backend *backend)
+{
+ VERBOSE(onert_backend_create) << "'acl_cl' unloaded\n";
+ delete backend;
+}
+}
diff --git a/runtime/onert/backend/acl_cl/operand/CLSubTensor.cc b/runtime/onert/backend/acl_cl/operand/CLSubTensor.cc
new file mode 100644
index 000000000..234229787
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/operand/CLSubTensor.cc
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CLSubTensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+namespace operand
+{
+
+CLSubTensor::CLSubTensor(ICLTensor *parent, const arm_compute::TensorShape &tensor_shape,
+ const arm_compute::Coordinates &coords, size_t rank, bool extend_parent)
+ : _cl_sub_tensor(std::make_shared<arm_compute::CLSubTensor>(parent->handle(), tensor_shape,
+ coords, extend_parent)),
+ _rank{rank}
+{
+ // DO NOTHING
+}
+
+const arm_compute::CLSubTensor *CLSubTensor::handle() const { return _cl_sub_tensor.get(); }
+
+arm_compute::CLSubTensor *CLSubTensor::handle() { return _cl_sub_tensor.get(); }
+
+} // namespace operand
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/operand/CLSubTensor.h b/runtime/onert/backend/acl_cl/operand/CLSubTensor.h
new file mode 100644
index 000000000..fedc17fc2
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/operand/CLSubTensor.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_CL_OPERAND_CL_SUB_TENSOR_H__
+#define __ONERT_BACKEND_ACL_CL_OPERAND_CL_SUB_TENSOR_H__
+
+#include <arm_compute/runtime/CL/CLSubTensor.h>
+#include "ICLTensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+namespace operand
+{
+
+class CLSubTensor : public ICLTensor
+{
+public:
+ CLSubTensor() = delete;
+
+public:
+ CLSubTensor(ICLTensor *parent, const arm_compute::TensorShape &tensor_shape,
+ const arm_compute::Coordinates &coords, size_t rank, bool extend_parent = false);
+
+public:
+ size_t num_dimensions() const final { return _rank; }
+
+public:
+ const arm_compute::CLSubTensor *handle() const override;
+ arm_compute::CLSubTensor *handle() override;
+
+public:
+ // This method is used to prevent the use of memcpy for SubTensor
+ bool has_padding() const override { return true; }
+
+private:
+ std::shared_ptr<arm_compute::CLSubTensor> _cl_sub_tensor;
+ size_t _rank;
+};
+
+} // namespace operand
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_CL_OPERAND_CL_SUB_TENSOR_H__
diff --git a/runtime/onert/backend/acl_cl/operand/CLTensor.cc b/runtime/onert/backend/acl_cl/operand/CLTensor.cc
new file mode 100644
index 000000000..cd5aaa1fd
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/operand/CLTensor.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CLTensor.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+#include <arm_compute/runtime/CL/CLMemory.h>
+#include <arm_compute/runtime/CL/CLMemoryRegion.h>
+
+#include <Convert.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+namespace operand
+{
+
+CLTensor::CLTensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses)
+ : _cl_tensor(std::make_shared<arm_compute::CLTensor>()), _rank{rank}, _num_uses{num_uses}
+{
+ allocator()->init(info);
+}
+
+const arm_compute::CLTensor *CLTensor::handle() const { return _cl_tensor.get(); }
+
+arm_compute::CLTensor *CLTensor::handle() { return _cl_tensor.get(); }
+
+arm_compute::CLTensorAllocator *CLTensor::allocator() { return _cl_tensor->allocator(); }
+
+void CLTensor::map(bool blocking) { _cl_tensor->map(blocking); }
+
+void CLTensor::unmap() { _cl_tensor->unmap(); }
+
+void CLTensor::setBuffer(void *host_ptr)
+{
+ // Constructs a Buffer on a user-supplied memory
+ auto buffer = cl::Buffer(arm_compute::CLScheduler::get().context(),
+ CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, info()->total_size(), host_ptr);
+ // import memory
+ allocator()->import_memory(buffer);
+}
+
+} // namespace operand
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/operand/CLTensor.h b/runtime/onert/backend/acl_cl/operand/CLTensor.h
new file mode 100644
index 000000000..abad866c3
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/operand/CLTensor.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_CL_OPERAND_CL_TENSOR_H__
+#define __ONERT_BACKEND_ACL_CL_OPERAND_CL_TENSOR_H__
+
+#include <arm_compute/core/TensorInfo.h>
+#include <arm_compute/runtime/CL/CLTensor.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "ICLTensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+namespace operand
+{
+
+class CLTensor : public ICLTensor
+{
+public:
+ CLTensor() = delete;
+
+public:
+ CLTensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses);
+
+public:
+ size_t num_dimensions() const final { return _rank; }
+
+public:
+ const arm_compute::CLTensor *handle() const override;
+ arm_compute::CLTensor *handle() override;
+ size_t num_uses() const { return _num_uses; }
+
+public:
+ arm_compute::CLTensorAllocator *allocator();
+ void map(bool blocking = true);
+ void unmap();
+ /** Set given buffer as the buffer of the tensor
+ *
+ * @note Ownership of the memory is not transferred to this object.
+ * Thus management (allocate/free) should be done by the client.
+ *
+ * @param[in] host_ptr Storage to be used.
+ */
+ void setBuffer(void *host_ptr);
+
+private:
+ std::shared_ptr<arm_compute::CLTensor> _cl_tensor;
+ size_t _rank;
+ size_t _num_uses;
+};
+
+} // namespace operand
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_CL_OPERAND_CL_TENSOR_H__
diff --git a/runtime/onert/backend/acl_cl/operand/ICLTensor.cc b/runtime/onert/backend/acl_cl/operand/ICLTensor.cc
new file mode 100644
index 000000000..b400ef9cf
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/operand/ICLTensor.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ICLTensor.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+namespace operand
+{
+
+void ICLTensor::access(const std::function<void(ITensor &tensor)> &fn)
+{
+ auto &queue = ::arm_compute::CLScheduler::get().queue();
+
+ // This is an optional input
+ if (total_size() == 0)
+ return;
+
+ map(queue);
+ fn(*this);
+ unmap(queue);
+}
+} // namespace operand
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/operand/ICLTensor.h b/runtime/onert/backend/acl_cl/operand/ICLTensor.h
new file mode 100644
index 000000000..f709fe465
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/operand/ICLTensor.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_CL_OPERAND_I_CL_TENSOR_H__
+#define __ONERT_BACKEND_ACL_CL_OPERAND_I_CL_TENSOR_H__
+
+#include <arm_compute/core/CL/ICLTensor.h>
+
+#include <IACLTensor.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+namespace operand
+{
+
+class ICLTensor : public acl_common::IACLTensor
+{
+public:
+ const arm_compute::ICLTensor *handle() const override = 0;
+ arm_compute::ICLTensor *handle() override = 0;
+
+public:
+ void map(cl::CommandQueue &q, bool blocking = true) { return handle()->map(q, blocking); }
+ void unmap(cl::CommandQueue &q) { return handle()->unmap(q); }
+ void access(const std::function<void(ITensor &tensor)> &fn) final;
+};
+
+} // namespace operand
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_CL_OPERAND_I_CL_TENSOR_H__
diff --git a/runtime/onert/backend/acl_common/AclActivationBuilder.h b/runtime/onert/backend/acl_common/AclActivationBuilder.h
new file mode 100644
index 000000000..bfdea6ea0
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclActivationBuilder.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_ACL_ACTIVATION_BUILDER_H__
+#define __ONERT_BACKEND_ACL_COMMON_ACL_ACTIVATION_BUILDER_H__
+
+#include <memory>
+
+#include <ir/InternalType.h>
+#include <exec/IFunction.h>
+#include <exec/NopFunction.h>
+
+#include "Convert.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+template <typename T_Tensor, typename T_ActivationLayer, typename T_ExecFunction>
+class AclActivationBuilder
+{
+private:
+ static std::unique_ptr<exec::IFunction> generateReLU(T_Tensor *ifm_alloc);
+ static std::unique_ptr<exec::IFunction> generateReLU1(T_Tensor *ifm_alloc);
+ static std::unique_ptr<exec::IFunction> generateReLU6(T_Tensor *ifm_alloc);
+
+public:
+ static std::unique_ptr<exec::IFunction> generate(ir::Activation code, T_Tensor *ifm_alloc);
+};
+
+template <typename T_Tensor, typename T_ActivationLayer, typename T_ExecFunction>
+std::unique_ptr<exec::IFunction>
+AclActivationBuilder<T_Tensor, T_ActivationLayer, T_ExecFunction>::generateReLU(T_Tensor *ifm_alloc)
+{
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+
+ auto fn = std::make_unique<T_ActivationLayer>();
+
+ fn->configure(ifm_alloc, nullptr, act_info);
+
+ return asFunction<T_ExecFunction>(std::move(fn));
+}
+
+template <typename T_Tensor, typename T_ActivationLayer, typename T_ExecFunction>
+std::unique_ptr<exec::IFunction>
+AclActivationBuilder<T_Tensor, T_ActivationLayer, T_ExecFunction>::generateReLU1(
+ T_Tensor *ifm_alloc)
+{
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
+
+ auto fn = std::make_unique<T_ActivationLayer>();
+
+ fn->configure(ifm_alloc, nullptr, act_info);
+
+ return asFunction<T_ExecFunction>(std::move(fn));
+}
+
+template <typename T_Tensor, typename T_ActivationLayer, typename T_ExecFunction>
+std::unique_ptr<exec::IFunction>
+AclActivationBuilder<T_Tensor, T_ActivationLayer, T_ExecFunction>::generateReLU6(
+ T_Tensor *ifm_alloc)
+{
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
+
+ auto fn = std::make_unique<T_ActivationLayer>();
+
+ fn->configure(ifm_alloc, nullptr, act_info);
+
+ return asFunction<T_ExecFunction>(std::move(fn));
+}
+
+template <typename T_Tensor, typename T_ActivationLayer, typename T_ExecFunction>
+std::unique_ptr<exec::IFunction>
+AclActivationBuilder<T_Tensor, T_ActivationLayer, T_ExecFunction>::generate(ir::Activation code,
+ T_Tensor *ifm_alloc)
+{
+ switch (code)
+ {
+ case ir::Activation::NONE:
+ {
+ return std::make_unique<exec::NopFunction>();
+ }
+ case ir::Activation::RELU:
+ {
+ return generateReLU(ifm_alloc);
+ }
+ case ir::Activation::RELU1:
+ {
+ return generateReLU1(ifm_alloc);
+ }
+ case ir::Activation::RELU6:
+ {
+ return generateReLU6(ifm_alloc);
+ }
+ default:
+ {
+ throw std::runtime_error("Not supported, yet");
+ }
+ }
+}
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_ACL_ACTIVATION_BUILDER_H__
diff --git a/runtime/onert/backend/acl_common/AclFunction.h b/runtime/onert/backend/acl_common/AclFunction.h
new file mode 100644
index 000000000..99972ac9c
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclFunction.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_KERNEL_ACL_FUNCTION_H__
+#define __ONERT_BACKEND_ACL_COMMON_KERNEL_ACL_FUNCTION_H__
+
+#include <exec/IFunction.h>
+#include <arm_compute/runtime/IFunction.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+class AclFunction : public ::onert::exec::IFunction
+{
+public:
+ AclFunction() = delete;
+
+public:
+ AclFunction(std::unique_ptr<::arm_compute::IFunction> &&func) : _func(std::move(func))
+ {
+ // DO NOTHING
+ }
+
+public:
+ void run() override { _func->run(); }
+ void runSync() override { run(); }
+ void prepare() override { _func->prepare(); }
+
+private:
+ std::unique_ptr<::arm_compute::IFunction> _func;
+};
+
+class AclClFunction : public AclFunction
+{
+public:
+ using AclFunction::AclFunction;
+
+public:
+ void runSync() final
+ {
+ run();
+ arm_compute::CLScheduler::get().sync();
+ }
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_KERNEL_ACL_FUNCTION_H__
diff --git a/runtime/onert/backend/acl_common/AclInternalBufferManager.h b/runtime/onert/backend/acl_common/AclInternalBufferManager.h
new file mode 100644
index 000000000..f893bb44b
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclInternalBufferManager.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_INTERNAL_BUFFER_MANAGER_H__
+#define __ONERT_BACKEND_ACL_COMMON_INTERNAL_BUFFER_MANAGER_H__
+
+#include <arm_compute/runtime/IMemoryManager.h>
+#include <cassert>
+#include <memory>
+#include <backend/IMemoryManager.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+// NOTE. If any backend can use something like InternalBufferManager,
+// this interface can be moved to core/include/backend/
+/**
+ * @brief Interface for InternalBufferManager which has ::arm_compute::IMemoryManager pointer
+ */
+struct IInternalBufferManager : public backend::IMemoryManager
+{
+ virtual ~IInternalBufferManager() = default;
+
+ /**
+ * @brief Get shared_ptr of ::arm_compute::IMemoryManager
+ */
+ virtual std::shared_ptr<::arm_compute::IMemoryManager> internal_buffer_manager(void) = 0;
+};
+
+/**
+ * @brief class for InternalBufferManager which has ::arm_compute::IMemoryManager pointer
+ */
+template <typename T_MemoryManager, typename T_PoolManager, typename T_LifetimeManager,
+ typename T_Allocator>
+class AclInternalBufferManager : public IInternalBufferManager
+{
+public:
+ AclInternalBufferManager() : _allocator{nullptr}
+ {
+ std::shared_ptr<T_LifetimeManager> lifetime_mgr = std::make_shared<T_LifetimeManager>();
+ std::shared_ptr<T_PoolManager> pool_mgr = std::make_shared<T_PoolManager>();
+
+ _internal_manager = std::make_shared<T_MemoryManager>(lifetime_mgr, pool_mgr);
+ assert(_internal_manager);
+ }
+
+ virtual ~AclInternalBufferManager() = default;
+
+ /**
+ * @brief Allocate the internal buffer manager on acl
+ */
+ void allocate(void) override
+ {
+ _allocator = std::make_shared<T_Allocator>();
+ _internal_manager->populate(*_allocator, 1);
+ }
+
+ /**
+ * @brief Deallocate the internal buffer manager on acl
+ */
+ void deallocate(void) override { _internal_manager->clear(); }
+
+ /**
+ * @brief Get shared_ptr of ::arm_compute::IMemoryManager
+ */
+ std::shared_ptr<::arm_compute::IMemoryManager> internal_buffer_manager(void) override
+ {
+ return _internal_manager;
+ }
+
+private:
+ std::shared_ptr<T_Allocator> _allocator;
+ std::shared_ptr<T_MemoryManager> _internal_manager;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_INTERNAL_BUFFER_MANAGER_H__
diff --git a/runtime/onert/backend/acl_common/AclLinearMemoryManager.h b/runtime/onert/backend/acl_common/AclLinearMemoryManager.h
new file mode 100644
index 000000000..09f25e7a8
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclLinearMemoryManager.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_LINEAR_MEMORY_MANAGER_H__
+#define __ONERT_BACKEND_ACL_COMMON_LINEAR_MEMORY_MANAGER_H__
+
+#include <cassert>
+
+#include "AclMemoryManager.h"
+#include "ir/OperandIndexMap.h"
+#include "util/logging.h"
+
+namespace
+{
+
+template <typename T_MemoryManager, typename T_PoolManager, typename T_LifetimeManager>
+std::shared_ptr<T_MemoryManager> createMemoryManager()
+{
+ std::shared_ptr<T_LifetimeManager> lifetime_mgr = std::make_shared<T_LifetimeManager>();
+ std::shared_ptr<T_PoolManager> pool_mgr = std::make_shared<T_PoolManager>();
+
+ std::shared_ptr<T_MemoryManager> mem_mgr =
+ std::make_shared<T_MemoryManager>(lifetime_mgr, pool_mgr);
+ return mem_mgr;
+}
+
+} // namespace
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor, typename T_MemoryManager,
+ typename T_PoolManager, typename T_LifetimeManager, typename T_Allocator,
+ typename T_MemoryGroup>
+class AclLinearMemoryManager : public AclMemoryManager<T_ITensor, T_Tensor, T_SubTensor>
+{
+public:
+ AclLinearMemoryManager()
+ : _allocator{nullptr},
+ _io_manager{createMemoryManager<T_MemoryManager, T_PoolManager, T_LifetimeManager>()},
+ _io_group{std::make_shared<T_MemoryGroup>(_io_manager)}
+ {
+ // DO NOTHING
+ }
+
+ virtual ~AclLinearMemoryManager() = default;
+
+ void allocate(void) override
+ {
+ _allocator = std::make_shared<T_Allocator>();
+ _io_manager->populate(*_allocator, 1);
+ _io_group->acquire();
+ }
+
+ void deallocate(void) override
+ {
+ _io_group->release();
+ _io_manager->clear();
+ }
+
+ void startLifetime(const ir::OperandIndex &ind) override
+ {
+ auto &tensors = this->tensors();
+ assert(tensors.find(ind) != tensors.end());
+
+ auto tensor = tensors[ind];
+ assert(tensor->handle());
+
+ _io_group->manage(tensor->handle());
+ }
+
+ void finishLifetime(const ir::OperandIndex &ind) override
+ {
+ auto &tensors = this->tensors();
+ assert(tensors.find(ind) != tensors.end());
+
+ auto tensor = tensors[ind];
+ assert(tensor->allocator());
+
+ tensor->allocator()->allocate();
+ }
+
+private:
+ std::shared_ptr<T_Allocator> _allocator;
+ std::shared_ptr<T_MemoryManager> _io_manager;
+ std::shared_ptr<T_MemoryGroup> _io_group;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_LINEAR_MEMORY_MANAGER_H__
diff --git a/runtime/onert/backend/acl_common/AclMemoryManager.h b/runtime/onert/backend/acl_common/AclMemoryManager.h
new file mode 100644
index 000000000..eefcec130
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclMemoryManager.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__
+#define __ONERT_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__
+
+#include <arm_compute/core/Types.h>
+#include <arm_compute/runtime/IMemoryManager.h>
+#include <cassert>
+
+#include "backend/IMemoryManager.h"
+#include "ir/OperandIndexMap.h"
+#include "Convert.h"
+#include "util/logging.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+class AclMemoryManager : public backend::IMemoryManager
+{
+public:
+ AclMemoryManager()
+ {
+ // DO NOTHING
+ }
+
+ virtual ~AclMemoryManager() = default;
+
+ void allocate(void) override
+ {
+ for (const auto &tensor_entry : _tensors)
+ {
+ auto tensor = tensor_entry.second;
+ tensor->allocator()->allocate();
+ }
+ }
+
+ void deallocate(void) override
+ {
+ for (const auto &tensor_entry : _tensors)
+ {
+ auto tensor = tensor_entry.second;
+ tensor->allocator()->free();
+ }
+ }
+
+ virtual void startLifetime(const ir::OperandIndex &) { /* DO NOTHING */}
+ virtual void finishLifetime(const ir::OperandIndex &) { /* DO NOTHING */}
+
+ void buildTensor(const ir::OperandIndex &ind, const ::arm_compute::TensorInfo &info, size_t rank,
+ size_t num_uses)
+ {
+ auto tensor = std::make_shared<T_Tensor>(info, rank, num_uses);
+ _tensors[ind] = tensor;
+ }
+
+ void buildSubtensor(std::shared_ptr<T_ITensor> parent_tensor, const ir::OperandIndex &child_ind,
+ const ::arm_compute::TensorShape &shape,
+ const ::arm_compute::Coordinates &coordinates, size_t rank,
+ bool extent_parent)
+ {
+ auto subtensor =
+ std::make_shared<T_SubTensor>(parent_tensor.get(), shape, coordinates, rank, extent_parent);
+ _subtensors[child_ind] = subtensor;
+ }
+
+ ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &tensors(void) { return _tensors; }
+
+ ir::OperandIndexMap<std::shared_ptr<T_SubTensor>> &subtensors(void) { return _subtensors; }
+
+private:
+ ir::OperandIndexMap<std::shared_ptr<T_Tensor>> _tensors;
+ ir::OperandIndexMap<std::shared_ptr<T_SubTensor>> _subtensors;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__
diff --git a/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h b/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h
new file mode 100644
index 000000000..83d7ad6fd
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_ACL_SUB_TENSOR_ANALYZER_H__
+#define __ONERT_BACKEND_ACL_COMMON_ACL_SUB_TENSOR_ANALYZER_H__
+
+#include <ir/OperationVisitor.h>
+#include <ir/Graph.h>
+#include "ParentInfo.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+/**
+ * @brief Class to analyze tensor subsumption
+ */
+class AclSubTensorAnalyzer : public ir::OperationVisitor
+{
+public:
+ /**
+ * @brief Construct a new SubTensorAnalyzer object
+ * @param[in] ctx Graph operand set
+ */
+ AclSubTensorAnalyzer(const ir::Graph &graph) : _graph{graph}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void setLayout(ir::Layout layout) { _current_op_layout = layout; }
+
+ void visit(const ir::operation::Concat &node) override
+ {
+ // If operator is concat, fill subsumption info
+ int32_t axis_raw = node.param().axis;
+
+ const auto &output_index = node.getOutputs().at(0);
+ const auto &inputs = node.getInputs();
+
+ int32_t axis_point = 0;
+ const auto rank = _graph.operands().at(output_index).shape().rank();
+ int32_t axis = axis_raw < 0 ? (axis_raw + rank) : axis_raw;
+ assert(rank > axis);
+
+ for (const auto &ind : inputs)
+ {
+ // NOTE Not support the case that concat's input is a constant or a input of model
+ if (_graph.operands().at(ind).isConstant() || _graph.getInputs().contains(ind))
+ {
+ return;
+ }
+ }
+
+ for (const auto &input_index : inputs)
+ {
+ auto input_shape = _graph.operands().at(input_index).shape();
+ assert(rank == input_shape.rank());
+
+ ir::Coordinates coordinate_info{};
+ for (int i = 0; i < rank; i++)
+ {
+ coordinate_info.set(i, 0);
+ }
+ coordinate_info.set(axis, axis_point);
+
+ _parent_map.emplace(
+ input_index, acl_common::ParentInfo{output_index, _current_op_layout, coordinate_info});
+
+ axis_point += input_shape.dim(axis);
+ }
+ }
+
+ std::unordered_map<ir::OperandIndex, ParentInfo> &&releaseParentMap()
+ {
+ return std::move(_parent_map);
+ }
+
+private:
+ const ir::Graph &_graph;
+ std::unordered_map<ir::OperandIndex, ParentInfo> _parent_map;
+ ir::Layout _current_op_layout{ir::Layout::UNKNOWN};
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_ACL_SUB_TENSOR_ANALYZER_H__
diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h
new file mode 100644
index 000000000..be8d43209
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h
@@ -0,0 +1,483 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__
+
+#include <memory>
+#include <queue>
+
+#include <arm_compute/core/Types.h>
+#include <backend/ITensorBuilder.h>
+#include "ir/OperandIndexMap.h"
+#include <ir/Operands.h>
+#include "AclTensorManager.h"
+#include <memory>
+#include "ParentInfo.h"
+#include <util/Utils.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+enum class UsesType
+{
+ FIRST,
+ LAST
+};
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+class AclTensorBuilder : public ITensorBuilder
+{
+public:
+ using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>;
+
+ AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr);
+
+ /**
+ * @brief Register tensor information to allocate on ACL-CL backend
+ * @param[in] ind Operand index
+ * @param[in] info Tensor information
+ * @param[in] layout Tensor data layout
+ */
+ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout backend_layout, bool as_const) override;
+
+ void notifyFirstUse(const ir::OperandIndex &) override;
+ void notifyLastUse(const ir::OperandIndex &) override;
+
+ bool isRegistered(const ir::OperandIndex &) const override;
+
+ void prepare(void) override;
+ void allocate() override;
+ void postFunctionPrepare() override;
+
+ std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
+ void iterate(const IterateFunction &fn) override;
+
+ std::unique_ptr<ITensorManager> releaseTensorManager(void) override;
+
+ std::shared_ptr<T_ITensor> at(const ir::OperandIndex &ind);
+
+ void dimCorrection(const ir::OperandIndex &index, bool apply_dim_correction);
+
+ T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); }
+
+ void setUsesCount(const ir::OperandIndex &index, size_t num_uses)
+ {
+ assert(_uses_count_map.find(index) != _uses_count_map.end() ? _uses_count_map[index] == num_uses
+ : true);
+ _uses_count_map[index] = num_uses;
+ }
+
+ void parent_map(std::unordered_map<ir::OperandIndex, ParentInfo> &&parent_map)
+ {
+ _parent_map = std::move(parent_map);
+ }
+
+ bool areSubTensorsOf(const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq);
+
+ /**
+ * @brief Check child tensor is allocated as subtensor of parent tensor
+ * @param[in] parent Index of parent
+ * @param[in] child Index of child
+ * @return @c true if child is allocated as subtensor of parent, otherwise @c false
+ */
+ bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child);
+
+private:
+ void buildTensors(void);
+ ir::OperandIndex findRootParent(ir::OperandIndex index);
+
+private:
+ const ir::Operands &_operands;
+ ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+ ir::OperandIndexMap<bool> _apply_dim_correction_map;
+ ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
+ ir::OperandIndexMap<size_t> _uses_count_map;
+
+ std::unique_ptr<T_AclTensorManager> _tensor_mgr;
+ ir::OperandIndexSequence _constants;
+
+ // for linear executor
+ std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
+
+ // Extra info for concat elimination
+ ir::OperandIndexMap<ParentInfo> _parent_map;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#include <cassert>
+#include <stack>
+
+#include "Convert.h"
+
+#include "util/logging.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands,
+ T_AclTensorManager *tensor_mgr)
+ : _operands{operands}, _tensor_mgr{tensor_mgr}
+{
+ assert(_tensor_mgr);
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::registerTensorInfo(
+ const ir::OperandIndex &ind, const ir::OperandInfo &info, ir::Layout backend_layout,
+ bool as_const)
+{
+ assert(_tensor_mgr->constTensors().size() == 0);
+ assert(_tensor_mgr->nonconstTensors().size() == 0);
+
+ _uses_count_map[ind] = _operands.at(ind).getUses().size();
+
+ if (_parent_map.count(ind) == 0)
+ {
+ // Normal Tensors
+ _tensor_info_map.emplace(ind, info);
+ _apply_dim_correction_map.emplace(ind, true);
+ _tensor_layout_map.insert({ind, backend_layout});
+ if (as_const)
+ _constants.append(ind);
+ }
+ else
+ {
+ // SubTensors
+
+ assert(!as_const && "Subtensors of constants are not supported yet.");
+
+ // Update offset info and emplace
+ auto &parent_info = _parent_map[ind];
+ const auto &obj = _operands.at(ind);
+ auto parent_index = parent_info.parent;
+ auto &offset = parent_info.coordinates;
+ auto frontend_layout = parent_info.frontend_layout;
+
+ assert(obj.shape().rank() <= 4);
+ auto shape = obj.shape();
+ if (_operands.at(parent_index).shape().rank() == 4 && frontend_layout == ir::Layout::NHWC &&
+ backend_layout == ir::Layout::NCHW)
+ {
+ shape.extendRank(4);
+ offset = {offset[0], offset[3], offset[1], offset[2]};
+ }
+ else if (_operands.at(parent_index).shape().rank() == 4 &&
+ frontend_layout == ir::Layout::NHWC && backend_layout == ir::Layout::NCHW)
+ {
+ shape.extendRank(4);
+ offset = {offset[0], offset[2], offset[3], offset[1]};
+ }
+ auto new_shape = permuteShape(shape, frontend_layout, backend_layout);
+ ir::OperandInfo oi{new_shape, obj.typeInfo()};
+ _tensor_info_map.emplace(ind, oi);
+
+ _apply_dim_correction_map.emplace(ind, true);
+ }
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::notifyFirstUse(const ir::OperandIndex &ind)
+{
+ _lifetime_seq.emplace_back(UsesType::FIRST, ind);
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::notifyLastUse(const ir::OperandIndex &ind)
+{
+ _lifetime_seq.emplace_back(UsesType::LAST, ind);
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::isRegistered(
+ const ir::OperandIndex &ind) const
+{
+ return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::prepare(void)
+{
+ buildTensors();
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::allocate(void)
+{
+ // Update lifetime sequence to apply subtensor optimization
+
+ std::unordered_map<ir::OperandIndex, ir::OperandIndex> root_map;
+ std::function<ir::OperandIndex &(ir::OperandIndex)> find_root =
+ [&](ir::OperandIndex ind) -> ir::OperandIndex & {
+ ir::OperandIndex &ret = root_map[ind];
+
+ // We know the root parent value already
+ if (ret.valid())
+ return ret;
+
+ auto itr = _parent_map.find(ind);
+ if (itr == _parent_map.end())
+ {
+ // If there is no parent, let's store the value of itself
+ return ret = ind;
+ }
+ else
+ {
+ return ret = find_root(itr->second.parent);
+ }
+ };
+
+ ir::OperandIndexMap<bool> first_use_check;
+ ir::OperandIndexMap<bool> last_use_check;
+ std::map<size_t, std::pair<UsesType, ir::OperandIndex>> lifetime_map;
+ for (size_t i = 0; i < _lifetime_seq.size(); i++)
+ {
+ auto &entry = _lifetime_seq[i];
+ if (entry.first != UsesType::FIRST)
+ continue;
+ auto root_ind = find_root(entry.second);
+ if (first_use_check[root_ind])
+ continue;
+ first_use_check[root_ind] = true;
+ lifetime_map[i] = {UsesType::FIRST, root_ind};
+ }
+
+ for (int i = _lifetime_seq.size() - 1; i >= 0; i--)
+ {
+ auto &entry = _lifetime_seq[i];
+ if (entry.first != UsesType::LAST)
+ continue;
+ auto root_ind = find_root(entry.second);
+ if (last_use_check[root_ind])
+ continue;
+ last_use_check[root_ind] = true;
+ lifetime_map[i] = {UsesType::LAST, root_ind};
+ }
+
+ for (auto &entry : lifetime_map)
+ {
+ auto &use = entry.second;
+ auto use_type = use.first;
+ auto use_index = use.second;
+ assert(use_index.valid());
+ if (use_type == UsesType::FIRST)
+ _tensor_mgr->startLifetime(use_index);
+ else
+ _tensor_mgr->finishLifetime(use_index);
+ }
+
+ assert(_constants.size() == _tensor_mgr->constTensors().size());
+ _tensor_mgr->allocateConsts();
+
+ // TODO Since `_parent_map` is filled for all Concat nodes even if the node this backend uses
+ // After refactoring BackendContext we can uncomment this
+ // assert(_tensor_info_map.size() ==
+ // _tensor_mgr->nonconstTensors().size() + _constants.size() + _parent_map.size());
+ _tensor_mgr->allocateNonconsts();
+
+ _tensor_mgr->allocateInternalBufferManager();
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::postFunctionPrepare(void)
+{
+ _tensor_mgr->tryDeallocConstants();
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+std::shared_ptr<ITensor>
+AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::tensorAt(const ir::OperandIndex &ind)
+{
+ return _tensor_mgr->at(ind);
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::iterate(const IterateFunction &fn)
+{
+ _tensor_mgr->iterate(fn);
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+std::shared_ptr<T_ITensor>
+AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::at(const ir::OperandIndex &ind)
+{
+ auto ret = _tensor_mgr->at(ind);
+ assert(ret != nullptr);
+ return ret;
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::dimCorrection(
+ const ir::OperandIndex &index, bool apply_dim_correction)
+{
+ _apply_dim_correction_map[index] = apply_dim_correction;
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+std::unique_ptr<ITensorManager>
+AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::releaseTensorManager(void)
+{
+ return std::move(_tensor_mgr);
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildTensors(void)
+{
+ assert(_tensor_mgr->constTensors().size() == 0);
+ assert(_tensor_mgr->nonconstTensors().size() == 0);
+
+ // Normal tensors
+ for (auto &entry : _tensor_info_map)
+ {
+ auto ind = entry.first;
+ if (_parent_map.count(ind) > 0)
+ continue;
+
+ const auto &info = entry.second;
+ const auto &backend_layout = _tensor_layout_map[ind];
+ auto tensor_info = asTensorInfo(info.shape(), info.typeInfo(), ir::Layout::UNKNOWN,
+ backend_layout, _apply_dim_correction_map[ind]);
+ _tensor_mgr->buildTensor(ind, tensor_info, info.shape().rank(), _constants.contains(ind),
+ _uses_count_map[ind]);
+ }
+
+ // Subtensors
+ assert(_tensor_mgr->nonconstSubtensors().size() == 0);
+ // TODO Iterate `_parent_map` instead, once the optimizer bug is fixed
+ // `Optimizer` iterates the entire OpSequences, so there is a bug if iterating _parent_map
+ for (auto &entry : _tensor_info_map)
+ {
+ auto ind = entry.first;
+ if (_parent_map.count(ind) == 0)
+ continue;
+
+ // To make subtensor, parent tensor must be made first
+ // For this condition, use stack
+ // 1) Push one subtensor index to stack (iterate subtensors)
+ // 2) If tensor at stack top is already made, pop and go to 4)
+ // 3) If tensor pushed at 1) is not made, check parent tensor
+ // 3-1) If parent tensor is already made, we can make child tensor
+ // Make child tensor and pop, go to 4)
+ // 3-2) If parent tensor is not made, we can't make child tensor yet
+ // Push parent tensor index to stack and return to 4)
+ // 4) If stack is empty, return to 1), else return to 2)
+ auto &subtensors = _tensor_mgr->nonconstSubtensors();
+
+ std::stack<ir::OperandIndex> stack;
+ stack.push(ind);
+
+ while (!stack.empty())
+ {
+ const auto current = stack.top();
+ const auto &tensor_info = _tensor_info_map.at(current);
+ const auto &parent_info = _parent_map.at(current);
+
+ // Already generated SubTensor
+ if (subtensors.find(current) != subtensors.end())
+ {
+ stack.pop();
+ continue;
+ }
+
+ auto parent = parent_info.parent;
+ std::shared_ptr<T_ITensor> parent_tensor = _tensor_mgr->findTensorAsParent(parent);
+ if (!parent_tensor)
+ {
+ // Cannot find allocated parent tensor: allocate parent first
+ assert(_parent_map.count(parent) > 0);
+ stack.push(parent);
+ continue;
+ }
+ assert(parent_tensor != nullptr);
+
+ // Child's type should be same with parent
+ assert(tensor_info.typeInfo().offset() ==
+ parent_tensor->info()->quantization_info().uniform().offset);
+ assert(tensor_info.typeInfo().scale() ==
+ parent_tensor->info()->quantization_info().uniform().scale);
+ assert(asDataType(tensor_info.typeInfo().type()) == parent_tensor->info()->data_type());
+
+ // NOTE SubTensor's layout must be the same with layout of parent tensor
+ const auto &root_parent = findRootParent(parent);
+ const auto &backend_layout = _tensor_layout_map[root_parent];
+
+ auto shape = asTensorShape(tensor_info.shape(), ir::Layout::UNKNOWN, backend_layout,
+ _apply_dim_correction_map[current]);
+ ::arm_compute::Coordinates coordinates =
+ asTensorCoordinate(parent_info.coordinates, ir::Layout::UNKNOWN, backend_layout);
+ _tensor_mgr->buildSubtensor(parent, current, shape, coordinates, tensor_info.shape().rank(),
+ true);
+ stack.pop();
+ }
+ }
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::areSubTensorsOf(
+ const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq)
+{
+ for (auto &cand : seq)
+ {
+ if (!isSubTensorOf(parent, cand))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::isSubTensorOf(
+ const ir::OperandIndex &parent, const ir::OperandIndex &child)
+{
+ auto itr = _parent_map.find(child);
+ if (itr == _parent_map.end())
+ {
+ return false;
+ }
+
+ return itr->second.parent == parent;
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+ir::OperandIndex
+AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::findRootParent(ir::OperandIndex ind)
+{
+ if (_parent_map.find(ind) == _parent_map.end())
+ return ind;
+
+ auto parent_ind = _parent_map.at(ind).parent;
+ return findRootParent(parent_ind);
+}
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/acl_common/AclTensorManager.h b/runtime/onert/backend/acl_common/AclTensorManager.h
new file mode 100644
index 000000000..b999a39a9
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclTensorManager.h
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__
+#define __ONERT_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__
+
+#include <arm_compute/runtime/IMemoryManager.h>
+
+#include "backend/ITensorManager.h"
+#include "AclMemoryManager.h"
+#include "AclInternalBufferManager.h"
+#include "ir/OperandIndexMap.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+class AclTensorManager : public backend::ITensorManager
+{
+public:
+ using T_AclMemoryManager = AclMemoryManager<T_ITensor, T_Tensor, T_SubTensor>;
+
+ AclTensorManager(T_AclMemoryManager *const_mgr, T_AclMemoryManager *nonconst_mgr,
+ IInternalBufferManager *inter_mgr);
+
+ virtual ~AclTensorManager() = default;
+
+ void allocateConsts(void);
+ void allocateNonconsts(void);
+ void deallocateConsts(void);
+ void deallocateNonconsts(void);
+
+ void allocateInternalBufferManager(void);
+ void deallocateInternalBufferManager(void);
+
+ void buildTensor(const ir::OperandIndex &ind, const ::arm_compute::TensorInfo &info, size_t rank,
+ bool as_const, size_t num_uses);
+ void buildSubtensor(const ir::OperandIndex &parent, const ir::OperandIndex &child,
+ const ::arm_compute::TensorShape &shape,
+ const ::arm_compute::Coordinates &coordinates, size_t rank,
+ bool extent_parent);
+
+ std::shared_ptr<T_ITensor> findTensorAsParent(const ir::OperandIndex &ind);
+
+ void startLifetime(const ir::OperandIndex &ind);
+ void finishLifetime(const ir::OperandIndex &ind);
+
+ std::shared_ptr<T_ITensor> at(const ir::OperandIndex &ind);
+
+ ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &constTensors(void);
+ ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &nonconstTensors(void);
+ ir::OperandIndexMap<std::shared_ptr<T_SubTensor>> &nonconstSubtensors(void);
+
+ std::shared_ptr<::arm_compute::IMemoryManager> internal_buffer_manager(void);
+
+ void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
+
+ void tryDeallocConstants(void);
+
+private:
+ std::unique_ptr<T_AclMemoryManager> _const_mgr;
+ std::unique_ptr<T_AclMemoryManager> _nonconst_mgr;
+ std::unique_ptr<IInternalBufferManager> _inter_mgr;
+ ir::OperandIndexMap<T_AclMemoryManager &> _ind_to_mgr;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#include <cassert>
+#include "util/logging.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::AclTensorManager(
+ T_AclMemoryManager *const_mgr, T_AclMemoryManager *nonconst_mgr,
+ IInternalBufferManager *inter_mgr)
+ : _const_mgr{const_mgr}, _nonconst_mgr{nonconst_mgr}, _inter_mgr{inter_mgr}
+{
+ // DO NOTHING
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::allocateConsts(void)
+{
+ _const_mgr->allocate();
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::allocateNonconsts(void)
+{
+ _nonconst_mgr->allocate();
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::deallocateConsts(void)
+{
+ _const_mgr->deallocate();
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::deallocateNonconsts(void)
+{
+ _nonconst_mgr->deallocate();
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::allocateInternalBufferManager(void)
+{
+ _inter_mgr->allocate();
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::deallocateInternalBufferManager(void)
+{
+ _inter_mgr->deallocate();
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::buildTensor(
+ const ir::OperandIndex &ind, const ::arm_compute::TensorInfo &info, size_t rank, bool as_const,
+ size_t num_uses)
+{
+ assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end());
+ if (as_const)
+ {
+ _const_mgr->buildTensor(ind, info, rank, num_uses);
+ _ind_to_mgr.insert({ind, *_const_mgr});
+ }
+ else
+ {
+ _nonconst_mgr->buildTensor(ind, info, rank, num_uses);
+ _ind_to_mgr.insert({ind, *_nonconst_mgr});
+ }
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::buildSubtensor(
+ const ir::OperandIndex &parent, const ir::OperandIndex &child,
+ const ::arm_compute::TensorShape &shape, const ::arm_compute::Coordinates &coordinates,
+ size_t rank, bool extent_parent)
+{
+ assert(_ind_to_mgr.find(child) == _ind_to_mgr.end());
+ std::shared_ptr<T_ITensor> parent_tensor = findTensorAsParent(parent);
+ assert(parent_tensor);
+ _nonconst_mgr->buildSubtensor(parent_tensor, child, shape, coordinates, rank, extent_parent);
+ _ind_to_mgr.insert({child, *_nonconst_mgr});
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+std::shared_ptr<T_ITensor>
+AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::findTensorAsParent(const ir::OperandIndex &ind)
+{
+
+ auto &tensors = _nonconst_mgr->tensors();
+ auto &subtensors = _nonconst_mgr->subtensors();
+ if (tensors.find(ind) != tensors.end())
+ {
+ // Parent is allocated as tensor
+ return tensors[ind];
+ }
+ else if (subtensors.find(ind) != subtensors.end())
+ {
+ // Parent is allocated as subtensor
+ return subtensors[ind];
+ }
+ else
+ {
+ return nullptr;
+ }
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::startLifetime(const ir::OperandIndex &ind)
+{
+ assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
+ _ind_to_mgr.at(ind).startLifetime(ind);
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::finishLifetime(const ir::OperandIndex &ind)
+{
+ assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
+ _ind_to_mgr.at(ind).finishLifetime(ind);
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+std::shared_ptr<T_ITensor>
+AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::at(const ir::OperandIndex &ind)
+{
+ if (_ind_to_mgr.find(ind) == _ind_to_mgr.end())
+ return nullptr;
+
+ auto &tensors = _ind_to_mgr.at(ind).tensors();
+ if (tensors.find(ind) != tensors.end())
+ {
+ return tensors.at(ind);
+ }
+ else
+ {
+ return _ind_to_mgr.at(ind).subtensors().at(ind);
+ }
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &
+AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::constTensors(void)
+{
+ return _const_mgr->tensors();
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &
+AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::nonconstTensors(void)
+{
+ return _nonconst_mgr->tensors();
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+ir::OperandIndexMap<std::shared_ptr<T_SubTensor>> &
+AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::nonconstSubtensors(void)
+{
+ return _nonconst_mgr->subtensors();
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+std::shared_ptr<::arm_compute::IMemoryManager>
+AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::internal_buffer_manager(void)
+{
+ return _inter_mgr->internal_buffer_manager();
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::iterate(
+ const std::function<void(const ir::OperandIndex &)> &fn)
+{
+ for (auto it : _nonconst_mgr->tensors())
+ fn(it.first);
+
+ for (auto it : _nonconst_mgr->subtensors())
+ fn(it.first);
+
+ for (auto it : _const_mgr->tensors())
+ fn(it.first);
+}
+
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
+void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::tryDeallocConstants(void)
+{
+ auto &tensors = _const_mgr->tensors();
+
+ for (auto it = tensors.begin(); it != tensors.end();)
+ {
+ const auto &ind = it->first;
+ auto tensor = it->second;
+ // NOTE The condition "tensor->num_uses() < 2" is used to prevent deallocating a constant tensor
+ // used in several nodes.
+ if (tensor->handle() && !tensor->handle()->is_used() && tensor->num_uses() < 2)
+ {
+ VERBOSE(AclTensorManager) << "Tensor #" << ind.value()
+ << " will be deallocated as an unused constant tensor" << std::endl;
+ tensor->allocator()->free();
+ tensor.reset();
+ it = tensors.erase(it);
+ }
+ else
+ {
+ ++it;
+ }
+ }
+}
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/acl_common/CMakeLists.txt b/runtime/onert/backend/acl_common/CMakeLists.txt
new file mode 100644
index 000000000..b87db2621
--- /dev/null
+++ b/runtime/onert/backend/acl_common/CMakeLists.txt
@@ -0,0 +1,19 @@
+# Unsupported architecture
+nnas_find_package(ARMCompute QUIET)
+if(NOT ARMCompute_FOUND)
+ return()
+endif(NOT ARMCompute_FOUND)
+
+file(GLOB SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_ACL_COMMON} STATIC ${SOURCES})
+
+target_include_directories(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC arm_compute arm_compute_ex)
+target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC nnfw_lib_misc)
+target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PRIVATE nnfw_coverage)
+
+set_target_properties(${LIB_ONERT_BACKEND_ACL_COMMON} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+set_target_properties(${LIB_ONERT_BACKEND_ACL_COMMON} PROPERTIES OUTPUT_NAME backend_acl_common)
diff --git a/runtime/onert/backend/acl_common/Convert.cc b/runtime/onert/backend/acl_common/Convert.cc
new file mode 100644
index 000000000..0ea2be010
--- /dev/null
+++ b/runtime/onert/backend/acl_common/Convert.cc
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Convert.h"
+
+#include "Swizzle.h"
+#include "ir/DataType.h"
+#include <memory>
+
+namespace
+{
+
+::arm_compute::DataLayout asDataLayout(onert::ir::Layout layout)
+{
+ switch (layout)
+ {
+ case onert::ir::Layout::NHWC:
+ return ::arm_compute::DataLayout::NHWC;
+ case onert::ir::Layout::NCHW:
+ return ::arm_compute::DataLayout::NCHW;
+ default:
+ return ::arm_compute::DataLayout::UNKNOWN;
+ }
+}
+
+} // namespace
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+::arm_compute::TensorShape asTensorShape(const ir::Shape &shape, ir::Layout frontend_layout,
+ ir::Layout backend_layout, bool apply_dim_correction)
+{
+ const uint32_t rank = shape.rank();
+
+ ::arm_compute::TensorShape res{};
+
+ res.set_num_dimensions(rank);
+
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ // NOTE In some cases, in incorrect dimensions is required.
+ // For example, intput_size is 1 in LSTM. The input-to-input weights([num_units, input_size]) of
+ // LSTM is used as the weight of the FullyConnected.
+ // The FullyConnected's weight must be greater or equal than 2-dimensions.
+ // However, if the dimension correction is applied to input_to_input_weights with input_size
+ // equal to 1, it will be changed to 1-D.
+ // So input_to_input_weights is not used by the weight of FullyConnected.
+ res.set(ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value(), shape.dim(axis),
+ apply_dim_correction);
+ }
+
+ return res;
+}
+
+::arm_compute::Coordinates asTensorCoordinate(const ir::Coordinates &coord,
+ ir::Layout frontend_layout, ir::Layout backend_layout)
+{
+ const uint32_t rank = coord.size();
+
+ ::arm_compute::Coordinates res{};
+
+ res.set_num_dimensions(rank);
+
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ res.set(ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value(), coord[axis]);
+ }
+
+ return res;
+}
+
+::arm_compute::DataType asDataType(const ir::DataType type)
+{
+ switch (type)
+ {
+ case ir::DataType::FLOAT32:
+ return ::arm_compute::DataType::F32;
+ case ir::DataType::INT32:
+ return ::arm_compute::DataType::S32;
+ case ir::DataType::UINT32:
+ return ::arm_compute::DataType::U32;
+ case ir::DataType::QUANT8_ASYMM:
+ return ::arm_compute::DataType::QASYMM8;
+ case ir::DataType::BOOL8:
+ case ir::DataType::UINT8:
+ return ::arm_compute::DataType::U8;
+ case ir::DataType::QUANT8_SYMM:
+ return ::arm_compute::DataType::S8;
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
+::arm_compute::QuantizationInfo asQuantizationInfo(const float scale, const int32_t offset)
+{
+ return ::arm_compute::QuantizationInfo(scale, offset);
+}
+
+::arm_compute::TensorInfo asTensorInfo(const ir::Shape &shape, const ir::TypeInfo &typeInfo,
+ ir::Layout frontend_layout, ir::Layout backend_layout,
+ bool apply_dim_correction)
+{
+ ::arm_compute::TensorInfo info(
+ asTensorShape(shape, frontend_layout, backend_layout, apply_dim_correction), 1,
+ asDataType(typeInfo.type()), asQuantizationInfo(typeInfo.scale(), typeInfo.offset()));
+ info.set_data_layout(asDataLayout(backend_layout));
+ return info;
+}
+
+::arm_compute::PadStrideInfo asPadStrideInfo(const ir::ExplicitPadding &padding,
+ const ir::Stride &stride)
+{
+ return ::arm_compute::PadStrideInfo{stride.horizontal,
+ stride.vertical,
+ padding.left,
+ padding.right,
+ padding.top,
+ padding.bottom,
+ ::arm_compute::DimensionRoundingType::FLOOR};
+}
+
+::arm_compute::ActivationLayerInfo asActivationLayerInfo(const ir::Activation act_code)
+{
+ switch (act_code)
+ {
+ case ir::Activation::NONE:
+ return ::arm_compute::ActivationLayerInfo{};
+ case ir::Activation::RELU:
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+ case ir::Activation::RELU1:
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
+ case ir::Activation::RELU6:
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
+ // Cases for activation of LSTM.
+ case ir::Activation::TANH:
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
+ case ir::Activation::SIGMOID:
+ // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
+ // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
+ // 0(always sigmoid) regardless of values of the parameter.
+ // If ACL support non-sigmoid logistic, should fix param values.
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f};
+ default:
+ throw std::runtime_error{"Not supported, yet"};
+ break;
+ }
+}
+
+std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunction> &&layer)
+{
+ return std::make_unique<AclFunction>(std::move(layer));
+}
+
+std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer)
+{
+ return std::make_unique<AclClFunction>(std::move(layer));
+}
+
+ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout)
+{
+ switch (data_layout)
+ {
+ case ::arm_compute::DataLayout::NHWC:
+ return ir::Layout::NHWC;
+ case ::arm_compute::DataLayout::NCHW:
+ return ir::Layout::NCHW;
+ default:
+ return ir::Layout::UNKNOWN;
+ }
+}
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_common/Convert.h b/runtime/onert/backend/acl_common/Convert.h
new file mode 100644
index 000000000..760a73f51
--- /dev/null
+++ b/runtime/onert/backend/acl_common/Convert.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_CONVERT_H__
+#define __ONERT_BACKEND_ACL_COMMON_CONVERT_H__
+
+#include <arm_compute/core/TensorInfo.h>
+#include <arm_compute/core/SubTensorInfo.h>
+#include <arm_compute/core/TensorShape.h>
+
+#include "ir/Layout.h"
+#include "ir/InternalType.h"
+#include "ir/Operand.h"
+#include "ir/Shape.h"
+#include "ir/TypeInfo.h"
+#include "ir/Coordinates.h"
+#include "ir/Padding.h"
+#include "misc/feature/Shape.h"
+#include "misc/kernel/Shape.h"
+
+#include "AclFunction.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+::arm_compute::TensorShape asTensorShape(const ir::Shape &shape, ir::Layout frontend_layout,
+ ir::Layout backend_layout,
+ bool apply_dim_correction = true);
+::arm_compute::Coordinates asTensorCoordinate(const ir::Coordinates &coord,
+ ir::Layout frontend_layout,
+ ir::Layout backend_layout);
+::arm_compute::DataType asDataType(ir::DataType type);
+::arm_compute::TensorInfo asTensorInfo(const ir::Shape &shape, const ir::TypeInfo &typeInfo,
+ ir::Layout frontend_layout, ir::Layout backend_layout,
+ bool apply_dim_correction = true);
+
+::arm_compute::PadStrideInfo asPadStrideInfo(const ir::ExplicitPadding &padding,
+ const ir::Stride &stride);
+
+::arm_compute::ActivationLayerInfo asActivationLayerInfo(ir::Activation act_code);
+
+std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunction> &&layer);
+std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer);
+
+template <typename T_Function>
+std::unique_ptr<T_Function> asFunction(std::unique_ptr<::arm_compute::IFunction> &&fn)
+{
+ return std::make_unique<T_Function>(std::move(fn));
+}
+
+ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout);
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_CONVERT_H__
diff --git a/runtime/onert/backend/acl_common/IACLTensor.cc b/runtime/onert/backend/acl_common/IACLTensor.cc
new file mode 100644
index 000000000..3796b9e4b
--- /dev/null
+++ b/runtime/onert/backend/acl_common/IACLTensor.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IACLTensor.h"
+#include "Convert.h"
+#include "Swizzle.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+size_t IACLTensor::num_dimensions() const
+{
+ throw std::runtime_error("No definition of num_dimensions()");
+ return 0;
+}
+
+size_t IACLTensor::dimension(size_t index) const
+{
+ // Assume that the front is higher dimensional.
+ // i.g. N: 0, C: 1, H: 2, W: 3 for NCHW layout
+ // NOTE This tensor must not be applied dim correction
+ assert(num_dimensions() > index);
+ const ARMComputeAxis reversed{(static_cast<uint32_t>(num_dimensions() - index) - 1)};
+ return info()->dimension(reversed.value());
+}
+
+size_t IACLTensor::calcOffset(const ir::Coordinates &coords) const
+{
+ const auto rank = num_dimensions();
+ assert(rank == coords.size());
+
+ ::arm_compute::Coordinates acl_coords;
+ for (uint32_t i = 0; i < rank; ++i)
+ {
+ const ARMComputeAxis reversed{static_cast<uint32_t>((rank - i) - 1)};
+ acl_coords.set(reversed.value(), coords[i]);
+ }
+
+ return info()->offset_element_in_bytes(acl_coords);
+}
+
+ir::Layout IACLTensor::layout() const { return acl_common::asRuntimeLayout(info()->data_layout()); }
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_common/IACLTensor.h b/runtime/onert/backend/acl_common/IACLTensor.h
new file mode 100644
index 000000000..36a5d2107
--- /dev/null
+++ b/runtime/onert/backend/acl_common/IACLTensor.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_I_ACL_TENSOR_H__
+#define __ONERT_BACKEND_ACL_COMMON_I_ACL_TENSOR_H__
+
+#include <backend/ITensor.h>
+#include <arm_compute/core/ITensor.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+/**
+ * @brief Class representing Tensor for ACL
+ * @todo Override is_dynamic() method. We don't support dynamic tensor for ACL yet as of Apr, 2020.
+ * FYI, ACL ITensorInfo has is_dynamic() method, which seems currently not used.
+ * Maybe for ACL, this method can be implemented using ITensorInfo::is_dynamic() in future.
+ */
+class IACLTensor : public ITensor
+{
+public:
+ IACLTensor() = default;
+ IACLTensor(const IACLTensor &) = delete;
+ IACLTensor &operator=(const IACLTensor &) = delete;
+ IACLTensor(IACLTensor &&) = default;
+ IACLTensor &operator=(IACLTensor &&) = default;
+
+public:
+ uint8_t *buffer() const final { return handle()->buffer(); }
+ size_t total_size() const final { return info()->total_size(); }
+ size_t dimension(size_t index) const final;
+ size_t num_dimensions() const override;
+ size_t calcOffset(const ir::Coordinates &coords) const final;
+ ir::Layout layout() const final;
+ bool has_padding() const override { return info()->has_padding(); }
+
+public:
+ virtual const arm_compute::ITensor *handle() const = 0;
+ virtual arm_compute::ITensor *handle() = 0;
+
+ const arm_compute::ITensorInfo *info() const { return handle()->info(); }
+ arm_compute::ITensorInfo *info() { return handle()->info(); }
+
+ arm_compute::DataType data_type() const { return info()->data_type(); }
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif //__ONERT_BACKEND_ACL_COMMON_I_ACL_TENSOR_H__
diff --git a/runtime/onert/backend/acl_common/ParentInfo.h b/runtime/onert/backend/acl_common/ParentInfo.h
new file mode 100644
index 000000000..708436327
--- /dev/null
+++ b/runtime/onert/backend/acl_common/ParentInfo.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__
+#define __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__
+
+#include <ir/Index.h>
+#include <ir/Coordinates.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+/**
+ * @brief Struct to represent parent operand in child operand
+ */
+struct ParentInfo
+{
+ ir::OperandIndex parent;
+ ir::Layout frontend_layout;
+ ir::Coordinates coordinates;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__
diff --git a/runtime/onert/backend/acl_common/Swizzle.h b/runtime/onert/backend/acl_common/Swizzle.h
new file mode 100644
index 000000000..e1c7f8041
--- /dev/null
+++ b/runtime/onert/backend/acl_common/Swizzle.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_SWIZZLE_H__
+#define __ONERT_BACKEND_ACL_COMMON_SWIZZLE_H__
+
+#include <cassert>
+#include <ir/Layout.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+class ARMComputeAxis
+{
+public:
+ ARMComputeAxis() = default;
+
+public:
+ explicit ARMComputeAxis(uint32_t value) : _value{value}
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint32_t value(void) const { return _value; }
+
+private:
+ uint32_t _value;
+};
+
+// Convert axis in acl order
+inline ARMComputeAxis ToARMComputeAxis(uint32_t rank, uint32_t axis,
+ const ir::Layout org_layout = ir::Layout::UNKNOWN,
+ const ir::Layout acl_layout = ir::Layout::UNKNOWN)
+{
+ assert(rank > axis);
+
+ const ARMComputeAxis reversed{(rank - axis) - 1};
+
+ if (rank >= 4 && org_layout == ir::Layout::NHWC && acl_layout == ir::Layout::NCHW)
+ {
+ // NHWC -> WHCN
+ // DEPTH
+ if (0 == reversed.value())
+ {
+ return ARMComputeAxis{2};
+ }
+ // WIDTH
+ if (1 == reversed.value())
+ {
+ return ARMComputeAxis{0};
+ }
+ // HEIGHT
+ if (2 == reversed.value())
+ {
+ return ARMComputeAxis{1};
+ }
+ }
+ if (rank >= 4 && org_layout == ir::Layout::NCHW && acl_layout == ir::Layout::NHWC)
+ {
+ // NCHW -> CWHN
+ // WIDTH
+ if (0 == reversed.value())
+ {
+ return ARMComputeAxis{1};
+ }
+ // HEIGHT
+ if (1 == reversed.value())
+ {
+ return ARMComputeAxis{2};
+ }
+ // DEPTH
+ if (2 == reversed.value())
+ {
+ return ARMComputeAxis{0};
+ }
+ }
+
+ return reversed;
+}
+
+inline ::arm_compute::Coordinates
+getARMComputeAxises(uint32_t rank, const ir::Layout org_layout = ir::Layout::UNKNOWN,
+ const ir::Layout acl_layout = ir::Layout::UNKNOWN)
+{
+ ::arm_compute::Coordinates res{};
+
+ res.set_num_dimensions(rank);
+
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ res.set(axis, ToARMComputeAxis(rank, axis, org_layout, acl_layout).value());
+ }
+
+ return res;
+}
+
+// Restructure runtime_permutationVector to ACL_permutationVector
+inline ::arm_compute::PermutationVector
+getARMComputePermutationVector(uint32_t rank, const std::vector<int32_t> runtime_pv,
+ const ir::Layout org_layout = ir::Layout::UNKNOWN,
+ const ir::Layout acl_layout = ir::Layout::UNKNOWN)
+{
+ // rank upto 4 is supported
+ assert(rank <= 4);
+ assert(runtime_pv.size() > 0);
+
+ int new_pv[4] = {0};
+ ::arm_compute::Coordinates axises = getARMComputeAxises(rank, org_layout, acl_layout);
+
+ for (uint32_t i = 0; i < rank; ++i)
+ {
+ new_pv[axises[i]] = ToARMComputeAxis(rank, runtime_pv[i], org_layout, acl_layout).value();
+ }
+
+ ::arm_compute::PermutationVector ACL_PV =
+ ::arm_compute::PermutationVector{new_pv[0], new_pv[1], new_pv[2], new_pv[3]};
+ ACL_PV.set_num_dimensions(rank);
+
+ return ACL_PV;
+}
+
+template <typename T>
+inline T ReorderBits(T in, size_t numOfBits, const ir::Layout org_layout = ir::Layout::UNKNOWN,
+ const ir::Layout acl_layout = ir::Layout::UNKNOWN)
+{
+ assert(numOfBits > 0);
+ T out = 0;
+ for (int32_t i = numOfBits - 1; i >= 0; --i)
+ {
+ const uint32_t toShift =
+ numOfBits - ToARMComputeAxis(numOfBits, i, org_layout, acl_layout).value() - 1;
+ out += ((in & 1) << toShift);
+ in >>= 1;
+ }
+ return out;
+}
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_SWIZZLE_H__
diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h
new file mode 100644
index 000000000..609545dd9
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/Backend.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_NEON_BACKEND_H__
+#define __ONERT_BACKEND_ACL_NEON_BACKEND_H__
+
+#include <memory>
+#include <backend/Backend.h>
+#include <ir/Operands.h>
+
+#include "Config.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+#include "ShapeFixer.h"
+#include "TensorManager.h"
+#include "Optimizer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+ Backend() : _config{std::make_shared<Config>()} {}
+
+ std::shared_ptr<IConfig> config() const override { return _config; }
+
+ std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
+ const std::shared_ptr<custom::IKernelBuilder> &,
+ bool is_linear_executor) const override
+ {
+ const auto &operands = graph.operands();
+ auto context = std::make_unique<BackendContext>(this, &graph);
+ auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor));
+ context->tensor_builder = tb;
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, tb);
+ context->shape_fixer = std::make_shared<ShapeFixer>(operands, tb);
+ context->tensor_register = nullptr;
+ context->optimizer = std::make_shared<Optimizer>(context.get());
+ return context;
+ }
+
+private:
+ std::shared_ptr<IConfig> _config;
+};
+
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_NEON_BACKEND_H__
diff --git a/runtime/onert/backend/acl_neon/CMakeLists.txt b/runtime/onert/backend/acl_neon/CMakeLists.txt
new file mode 100644
index 000000000..05343dd8c
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/CMakeLists.txt
@@ -0,0 +1,19 @@
+# Unsupported architecture
+nnas_find_package(ARMCompute QUIET)
+if(NOT ARMCompute_FOUND)
+ return()
+endif(NOT ARMCompute_FOUND)
+
+set(LIB_ONERT_BACKEND_ACL_NEON onert_backend_acl_neon)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_ACL_NEON} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_ACL_NEON} PRIVATE ${LIB_ONERT_BACKEND_ACL_COMMON})
+target_link_libraries(${LIB_ONERT_BACKEND_ACL_NEON} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_ACL_NEON} PRIVATE nnfw_coverage)
+
+set_target_properties(${LIB_ONERT_BACKEND_ACL_NEON} PROPERTIES OUTPUT_NAME backend_acl_neon)
+
+install(TARGETS ${LIB_ONERT_BACKEND_ACL_NEON} DESTINATION lib)
diff --git a/runtime/onert/backend/acl_neon/Config.cc b/runtime/onert/backend/acl_neon/Config.cc
new file mode 100644
index 000000000..2033f5b4e
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/Config.cc
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Config.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+bool Config::initialize() { return true; }
+
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_neon/Config.h b/runtime/onert/backend/acl_neon/Config.h
new file mode 100644
index 000000000..a6d6b4673
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/Config.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_NEON_CONFIG_H__
+#define __ONERT_BACKEND_ACL_NEON_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+class Config : public IConfig
+{
+public:
+ std::string id() override { return "acl_neon"; }
+ bool initialize() override;
+ bool SupportPermutation() override { return true; }
+
+ std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_NEON_CONFIG_H__
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.cc b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
new file mode 100644
index 000000000..4191b277f
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConstantInitializer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
+ const std::shared_ptr<TensorBuilder> &tensor_builder)
+ : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+{
+ // DO NOTHING
+}
+
+void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
+{
+ assert(node.getInputs().size() > index);
+
+ const auto &input_index = node.getInputs().at(index);
+ const auto &input_obj = _operands.at(input_index);
+ registerCopyInitializer(input_index, input_obj);
+}
+
+void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
+{
+ assert(node.getInputs().size() > index);
+
+ const auto &input_index = node.getInputs().at(index);
+ const auto &input_obj = _operands.at(input_index);
+ registerPermuteInitializer(input_index, input_obj);
+}
+
+void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
+{
+ const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
+ const auto &block_size_obj = _operands.at(block_size_index);
+
+ if (block_size_obj.isConstant())
+ {
+ _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
+ assert(model_obj.data());
+ const auto &shape = model_obj.shape();
+ const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
+ assert(model_obj.shape().rank() == 1);
+ obj.access([&](ITensor &tensor) {
+ for (size_t i = 0; i < shape.num_elements(); ++i)
+ {
+ const int32_t value = base[shape.num_elements() - i - 1];
+ int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
+ tensor.calcOffset({static_cast<int32_t>(i)}));
+ *into = value;
+ }
+ });
+ };
+ }
+}
+
+void ConstantInitializer::visit(const ir::operation::Conv2D &node)
+{
+ permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
+ copyInputInitialize(node, ir::operation::Conv2D::BIAS);
+}
+
+void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
+{
+ permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
+ copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
+}
+
+void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
+{
+ copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
+ copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
+}
+
+void ConstantInitializer::visit(const ir::operation::LSTM &node)
+{
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
+ copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
+ copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
+ copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
+}
+
+void ConstantInitializer::visit(const ir::operation::RNN &node)
+{
+ copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
+ copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::RNN::BIAS);
+}
+
+void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
+{
+ const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE);
+ const auto &block_size_obj = _operands.at(block_size_index);
+
+ if (block_size_obj.isConstant())
+ {
+ _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
+ assert(model_obj.data());
+ const auto &shape = model_obj.shape();
+ const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
+ assert(model_obj.shape().rank() == 1);
+ obj.access([&](ITensor &tensor) {
+ for (size_t i = 0; i < shape.num_elements(); ++i)
+ {
+ const int32_t value = base[shape.num_elements() - i - 1];
+ int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
+ tensor.calcOffset({static_cast<int32_t>(i)}));
+ *into = value;
+ }
+ });
+ };
+ }
+
+ const auto &paddings_index = node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS);
+ const auto &paddings_obj = _operands.at(paddings_index);
+ if (paddings_obj.isConstant())
+ {
+ _init_map[paddings_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
+ assert(model_obj.data());
+ const auto &shape = model_obj.shape();
+ const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
+ assert(model_obj.shape().rank() == 2);
+ assert(shape.dim(0) == 2);
+ assert(shape.dim(1) == 2);
+ obj.access([&](ITensor &tensor) {
+ for (auto i = 0; i < shape.dim(0); ++i)
+ {
+ for (auto j = 0; j < shape.dim(1); ++j)
+ {
+ const int32_t value = base[i * 2 + j];
+ int32_t *into = reinterpret_cast<int32_t *>(
+ // The coordinates of NETensor are different from the coordiantes of CLTensor in
+ // this operand.
+ // NEON : {j, reversed i}
+ // CL : {reversed i, j}
+ tensor.buffer() + tensor.calcOffset({j, shape.dim(0) - i - 1}));
+ *into = value;
+ }
+ }
+ });
+ };
+ }
+}
+
+void ConstantInitializer::visit(const ir::operation::TransposeConv &node)
+{
+ permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL);
+}
+
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.h b/runtime/onert/backend/acl_neon/ConstantInitializer.h
new file mode 100644
index 000000000..6b4c1f145
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
+#define __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
+
+#include <backend/IConstantInitializer.h>
+#include <ir/Operands.h>
+#include "TensorBuilder.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+class ConstantInitializer : public IConstantInitializer
+{
+public:
+ ConstantInitializer(const ir::Operands &operands,
+ const std::shared_ptr<TensorBuilder> &tensor_builder);
+
+public:
+ void visit(const ir::operation::BatchToSpaceND &) override;
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::DepthwiseConv2D &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::LSTM &) override;
+ void visit(const ir::operation::RNN &) override;
+ void visit(const ir::operation::SpaceToBatchND &) override;
+ void visit(const ir::operation::TransposeConv &) override;
+
+private:
+ std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
+ void copyInputInitialize(const ir::Operation &node, uint32_t index);
+ void permuteInputInitialize(const ir::Operation &node, uint32_t index);
+
+private:
+ std::shared_ptr<TensorBuilder> _tensor_builder;
+};
+
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc
new file mode 100644
index 000000000..42a115438
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc
@@ -0,0 +1,2030 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include <arm_compute/runtime/NEON/NEFunctions.h> // Include all ARM Compute NEON functions
+#include <arm_compute/runtime/NEON/NEFunctionsEx.h> // Include all ARM Compute EX NEON functions
+#include <arm_compute/runtime/CPP/functions/CPPOneHotEx.h>
+
+#include <AclActivationBuilder.h>
+#include <AclFunction.h>
+#include <Convert.h>
+#include <Swizzle.h>
+
+#include "ir/Index.h"
+#include "ir/DataType.h"
+#include "ir/InternalType.h"
+#include "exec/NopFunction.h"
+#include "util/logging.h"
+#include "util/Utils.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+using ::onert::backend::acl_common::asAclFunction;
+using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
+ ::arm_compute::ITensor, ::arm_compute::NEActivationLayer, acl_common::AclFunction>;
+
+KernelGenerator::KernelGenerator(const ir::Operands &ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder)
+ : _ctx(ctx), _tensor_builder(tensor_builder), _current_op_seq_layout(ir::Layout::UNKNOWN)
+{
+ // DO NOTHING
+}
+
+void KernelGenerator::visit(const ir::OpSequence &op_seq)
+{
+ // TODO Move this to IKernelGenerator
+ // (all derivatives have the same implementation for this)
+ assert(!_return_fn_seq);
+ _return_fn_seq = std::make_unique<exec::FunctionSequence>();
+ _current_op_seq_layout = op_seq.getLayout();
+ for (const auto &e : op_seq.operations())
+ {
+ const auto &node = *(e.node);
+ node.accept(*this);
+ _return_fn_seq->append(releaseFunction());
+ }
+}
+
+void KernelGenerator::visit(const ir::operation::Abs &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
+
+ auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
+
+ fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ArgMax &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
+
+ const auto ifm_rank = node.param().rank;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto frontend_layout = _current_op_seq_layout;
+ auto backend_layout = ifm_alloc->layout();
+
+ int axis_value = node.param().axis;
+ if (axis_value < 0)
+ {
+ axis_value += ifm_rank;
+ }
+ assert(axis_value >= 0 && axis_value < ifm_rank);
+ const auto fixed_axis =
+ acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
+
+ auto fn = std::make_unique<::arm_compute::NEArgMinMaxLayer>();
+
+ fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle(),
+ arm_compute::ReductionOperation::ARG_IDX_MAX);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
+ const auto block_size_index{
+ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto block_size_alloc = _tensor_builder->at(block_size_index).get();
+
+ assert(_ctx.at(block_size_index).data());
+
+ auto fn = std::make_unique<::arm_compute::NEBatchToSpaceLayer>();
+
+ fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), ofm_alloc->handle());
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Cast &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NECast>();
+
+ auto input_sub_type = _ctx.at(ifm_index).typeInfo().type() == ir::DataType::BOOL8
+ ? arm_compute::SubDataType::BOOL
+ : arm_compute::SubDataType::NONE;
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), input_sub_type);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Conv2D &node)
+{
+ using ir::operation::Conv2D;
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
+
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto &ker_shape = _ctx.at(ker_index).shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+
+ const auto stride = node.param().stride;
+ const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
+ ker_width, ker_height);
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ker_alloc = _tensor_builder->at(ker_index).get();
+ auto bias_alloc = _tensor_builder->at(bias_index).get();
+
+ const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
+ const auto act_info = acl_common::asActivationLayerInfo(activation);
+
+ auto fn = std::make_unique<::arm_compute::NEConvolutionLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
+
+ fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
+ conv_info, ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
+
+ _return_fn = asAclFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
+
+ auto block_size = node.param().block_size;
+ assert(block_size > 0);
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayerEx>();
+
+ fn->configure(input_alloc->handle(), output_alloc->handle(), block_size);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
+{
+ using ir::operation::DepthwiseConv2D;
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ // Kernel format is [1, kernel_height, kernel_width, depth_out].
+ const auto &ker_shape = _ctx.at(ker_index).shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+
+ const auto stride = node.param().stride;
+ const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
+ ker_width, ker_height);
+ const auto multiplier = node.param().multiplier;
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ker_alloc = _tensor_builder->at(ker_index).get();
+ auto bias_alloc = _tensor_builder->at(bias_index).get();
+
+ const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
+ const auto act_info = acl_common::asActivationLayerInfo(activation);
+
+ {
+ auto fn = std::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
+
+ fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
+ ofm_alloc->handle(), conv_info, multiplier, act_info);
+
+ _return_fn = asAclFunction(std::move(fn));
+ }
+}
+
+void KernelGenerator::visit(const ir::operation::Dequantize &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NEDequantizationLayer>();
+
+ fn->configure(input_alloc->handle(), output_alloc->handle());
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
+
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+
+ const auto kh = node.param().kh;
+ const auto kw = node.param().kw;
+ const auto stride = node.param().stride;
+ const auto padding =
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto activation = node.param().activation;
+
+ VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
+ VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
+ VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
+ VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
+ VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl;
+ VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl;
+ VERBOSE(MaxPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
+ VERBOSE(MaxPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
+ VERBOSE(MaxPool2D) << "PAD(T): " << padding.top << std::endl;
+ VERBOSE(MaxPool2D) << "PAD(B): " << padding.bottom << std::endl;
+ VERBOSE(MaxPool2D) << "PAD(L): " << padding.left << std::endl;
+ VERBOSE(MaxPool2D) << "PAD(R): " << padding.right << std::endl;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX,
+ ::arm_compute::Size2D{kw, kh},
+ acl_common::asPadStrideInfo(padding, stride)};
+
+ auto fn = std::make_unique<::arm_compute::NEPoolingLayer>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
+
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+}
+
+void KernelGenerator::visit(const ir::operation::Mean &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Mean::Input::INPUT)};
+ const auto &axes{node.param().axes};
+ const auto keep_dims{node.param().keep_dims};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = ifm_alloc->layout();
+
+ // Convert to ACL axes taking into account negative values and possible duplicates.
+ std::set<std::uint32_t> acl_axes;
+ const int ifm_rank = node.param().rank;
+ for (int axis : axes)
+ {
+ if (axis < 0)
+ axis += ifm_rank;
+ acl_axes.insert(
+ acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value());
+ }
+
+ arm_compute::Coordinates fixed_axis;
+ for (const auto axis : acl_axes)
+ {
+ fixed_axis.set(fixed_axis.num_dimensions(), axis);
+ }
+
+ // NOTE NEReduceMean has a bug that does not support NHWC layout
+ // NEReduceMean intermediate tensors are always NCHW layout
+ auto fn = std::make_unique<::arm_compute::NEReduceMeanEx>();
+
+ fn->configure(ifm_alloc->handle(), fixed_axis, keep_dims, ofm_alloc->handle());
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
+
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+
+ const auto kh = node.param().kh;
+ const auto kw = node.param().kw;
+ const auto stride = node.param().stride;
+ const auto padding =
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto activation = node.param().activation;
+
+ VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
+ VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
+ VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
+ VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
+ VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl;
+ VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl;
+ VERBOSE(AvgPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
+ VERBOSE(AvgPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
+ VERBOSE(AvgPool2D) << "PAD(T): " << padding.top << std::endl;
+ VERBOSE(AvgPool2D) << "PAD(B): " << padding.bottom << std::endl;
+ VERBOSE(AvgPool2D) << "PAD(L): " << padding.left << std::endl;
+ VERBOSE(AvgPool2D) << "PAD(R): " << padding.right << std::endl;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ ::arm_compute::PoolingLayerInfo info{
+ ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{kw, kh},
+ acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
+
+ auto fn = std::make_unique<::arm_compute::NEPoolingLayer>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
+
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+}
+
+void KernelGenerator::visit(const ir::operation::Concat &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+
+ std::vector<ir::OperandIndex> input_indexes;
+ for (const auto &input : node.getInputs())
+ input_indexes.emplace_back(input);
+
+ const auto axis = node.param().axis;
+
+ // Concat elimination check
+ bool eliminated = _tensor_builder->areSubTensorsOf(ofm_index, node.getInputs());
+ if (eliminated)
+ {
+ // If concat eliminated, return a NOP IFunction
+ VERBOSE(acl_neon_KernelGenerator_Concat) << "Concat eliminated" << std::endl;
+ _return_fn = std::make_unique<exec::NopFunction>();
+ return;
+ }
+
+ auto output_alloc = _tensor_builder->at(ofm_index).get();
+ std::vector<::arm_compute::ITensor *> input_tensors;
+ for (const auto &ifm_ind : input_indexes)
+ input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
+
+ std::unique_ptr<::arm_compute::IFunction> fn;
+ if (input_indexes.size() < 2)
+ {
+ auto l = std::make_unique<::arm_compute::NECopy>();
+ l->configure(input_tensors.at(0), output_alloc->handle());
+ fn = std::move(l);
+ }
+ else
+ {
+ auto l = std::make_unique<::arm_compute::NEConcatenateLayer>();
+ const auto rank = node.param().rank;
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = output_alloc->layout();
+ const auto fixed_axis =
+ acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
+ l->configure(input_tensors, output_alloc->handle(), fixed_axis);
+ fn = std::move(l);
+ }
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
+ const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto lookups_alloc = _tensor_builder->at(lookups_index).get();
+ auto values_alloc = _tensor_builder->at(values_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NEEmbeddingLookup>();
+
+ fn->configure(values_alloc->handle(), output_alloc->handle(), lookups_alloc->handle());
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Floor &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NEFloor>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::FullyConnected &node)
+{
+ using ir::operation::FullyConnected;
+
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+ const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+ const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+
+ const auto input_rank = _ctx.at(input_index).shape().rank();
+
+ const auto output_size =
+ _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
+ UNUSED_RELEASE(output_size);
+ assert(_ctx.at(bias_index).shape().dim(0) == output_size);
+ assert(_ctx.at(weight_index).shape().dim(0) == output_size);
+ const auto batch_size =
+ _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 2);
+ const auto input_size =
+ _ctx.at(weight_index).shape().dim(_ctx.at(weight_index).shape().rank() - 1);
+
+ // Check for reshaping input's shape into rank-2
+ bool needs_reshape = false;
+ ir::Shape reshape(2);
+ if (input_rank == 3 || input_rank == 4)
+ {
+ const auto &ifm_shape = _ctx.at(input_index).shape();
+ auto feature_size = 1;
+ for (int i = 0; i < ifm_shape.rank(); ++i)
+ {
+ feature_size *= ifm_shape.dim(i);
+ }
+
+ UNUSED_RELEASE(feature_size);
+ assert(feature_size == batch_size * input_size);
+
+ // for reshaping
+ needs_reshape = true;
+ reshape.dim(0) = batch_size; /* H */
+ reshape.dim(1) = input_size; /* W */
+ }
+
+ const auto activation = node.param().activation;
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ const auto input_alloc = _tensor_builder->at(input_index).get();
+ const auto weight_alloc = _tensor_builder->at(weight_index).get();
+ const auto bias_alloc = _tensor_builder->at(bias_index).get();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto acl_layout = output_alloc->handle()->info()->data_layout();
+
+ auto fn = std::make_unique<arm_compute::NEFullyConnectedReshapingLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
+
+ arm_compute::NEFullyConnectedReshapingLayer::KernelType kernel_type =
+ arm_compute::NEFullyConnectedReshapingLayer::KernelType::GENERAL;
+ if (_ctx.at(weight_index).isConstant())
+ {
+ kernel_type = arm_compute::NEFullyConnectedReshapingLayer::KernelType::PREPROCESSED_WEIGHTS;
+ assert(_ctx.at(weight_index).data());
+ }
+
+ fn->configure(
+ input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), output_alloc->handle(),
+ needs_reshape,
+ ::onert::backend::acl_common::asTensorShape(
+ reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
+ kernel_type);
+
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(fn)),
+ ActivationBuilder::generate(activation, output_alloc->handle()));
+}
+
+void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
+{
+ const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
+ const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)};
+
+ const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
+ const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
+ const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto hits_alloc = _tensor_builder->at(hits_index).get();
+
+ auto lookups_alloc = _tensor_builder->at(lookups_index).get();
+ auto keys_alloc = _tensor_builder->at(keys_index).get();
+ auto values_alloc = _tensor_builder->at(values_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NEHashtableLookup>();
+
+ fn->configure(lookups_alloc->handle(), keys_alloc->handle(), values_alloc->handle(),
+ output_alloc->handle(), hits_alloc->handle());
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Gather &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+
+ const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
+ const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape();
+
+ const auto ifm_rank = node.param().rank;
+ const auto axis_raw = node.param().axis;
+ const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
+ // Converting in reverse order
+ const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto indices_alloc = _tensor_builder->at(indices_index).get();
+ const auto backend_layout = ofm_alloc->layout();
+ UNUSED_RELEASE(backend_layout);
+
+ // NOTE The frontend layout and backend layout must be the same for this operation.
+ // If not the same, we have to add a stage(?) to perform permutation of output tensor. It
+ // is not not efficient even if it works well. If so, it would be better to set the
+ // layout of these backend tensors to the same layout.
+ // There is also one thing we have to think about. This operation depends on the layout of
+ // a model. For example, if a model in NHWC has this operation as output rank == 4, indices
+ // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
+ // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
+ assert(backend_layout == ifm_alloc->layout());
+ assert(backend_layout == indices_alloc->layout());
+ assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
+
+ auto fn = std::make_unique<::arm_compute::NEGatherEx>();
+
+ fn->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), axis);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
+ const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
+ const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto gamma_alloc = _tensor_builder->at(gamma_index).get();
+ auto beta_alloc = _tensor_builder->at(beta_index).get();
+ auto epsilon = node.param().epsilon;
+ auto activation = node.param().activation;
+
+ auto fn = std::make_unique<::arm_compute::NEInstanceNormalizationLayerEx>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), gamma_alloc->handle(),
+ beta_alloc->handle(), epsilon);
+
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+}
+
+void KernelGenerator::visit(const ir::operation::L2Normalization &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
+
+ // {CL|Neon}L2Normalization performs the reduction only along dimension 0
+ // L2 Normalization always performs the reduction along the depth axis
+ // Thus, we repurpose {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by
+ // choosing normalization parameters as below
+
+ const auto &ifm_shape = _ctx.at(ifm_index).shape();
+ // TODO Support optional constant dimension that normalization would be performed on
+ const auto normalization_axis = node.param().rank - 1;
+ int32_t radius =
+ 2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
+ float alpha = 1.0f; // In the implementation to make alpha_ become 1
+ float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
+ float bias = 0.0f; // Don't offset the reduction.
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
+ radius, alpha, beta, bias, false);
+
+ auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::L2Pool2D::Input::INPUT)};
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+
+ uint32_t kw = node.param().kw;
+ uint32_t kh = node.param().kh;
+ const auto stride = node.param().stride;
+ const auto padding =
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ ::arm_compute::PoolingLayerInfo info{
+ ::arm_compute::PoolingType::L2, ::arm_compute::Size2D{kw, kh},
+ ::onert::backend::acl_common::asPadStrideInfo(padding, stride)};
+
+ auto fn = std::make_unique<::arm_compute::NEPoolingLayer>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
+
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+}
+
+void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{
+ node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
+
+ auto radius = node.param().radius;
+ auto alpha = node.param().alpha;
+ auto beta = node.param().beta;
+ auto bias = node.param().bias;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ const auto norm_info = ::arm_compute::NormalizationLayerInfo(
+ ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
+
+ auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
+ const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input0_alloc = _tensor_builder->at(input0_index).get();
+ auto input1_alloc = _tensor_builder->at(input1_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NELogicalAnd>();
+
+ fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::LogicalNot &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NEBitwiseNot>();
+
+ fn->configure(input_alloc->handle(), output_alloc->handle());
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::LogicalOr &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
+ const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input0_alloc = _tensor_builder->at(input0_index).get();
+ auto input1_alloc = _tensor_builder->at(input1_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NELogicalOr>();
+
+ fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Logistic &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
+
+ // NOTE NEActivationLayer can generate produce erroneous results. it were caused by 'vexpq_f32()'.
+ // The neon function returns a value outside of the limit of representation in float as 'NaN'
+ // instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
+ auto fn = std::make_unique<::arm_compute::NEActivationLayerEx>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::LSTM &node)
+{
+ // TODO Support dynamic rnn
+ // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
+ const auto scratch_buffer_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+ const auto output_state_out_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+ const auto cell_state_out_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+ const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
+
+ const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
+ const auto input_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
+ const auto input_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+ const auto input_to_cell_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+ const auto input_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+ const auto recurrent_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
+ const auto recurrent_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+ const auto recurrent_to_cell_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+ const auto recurrent_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+ const auto cell_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
+ const auto cell_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
+ const auto cell_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
+ const auto input_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
+ const auto forget_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+ const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
+ const auto output_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+ const auto projection_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
+ const auto projection_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
+ const auto output_state_in_index{
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+ const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
+ const auto cell_threshold = node.param().cell_threshold;
+ const auto projection_threshold = node.param().projection_threshold;
+
+ bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(input_to_input_weights_index).shape().dim(1) != 0;
+ bool has_recurrent_to_input_weights =
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+ bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
+ bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
+ bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(projection_weights_index).shape().dim(1) != 0;
+ bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0);
+
+ // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
+ // true: no CIFG
+ // false: CIFG
+ // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
+ bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
+
+ // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
+ // But the cell_to_input_weights does not exist in regular CIFG although peephole.
+ // true: peephole
+ // false: no peephole
+ bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
+
+ // NOTE Although the projection weights has data the projection bias may not have data.
+ bool has_projection_param = has_projection_weights;
+
+ const auto activation = node.param().activation;
+ const auto cell_clip = cell_threshold;
+ const auto projection_clip = projection_threshold;
+ assert(cell_clip >= 0.f && projection_clip >= 0.f);
+
+ auto scratch_buffer_alloc = _tensor_builder->at(scratch_buffer_index).get();
+ auto output_state_out_alloc = _tensor_builder->at(output_state_out_index).get();
+ auto cell_state_out_alloc = _tensor_builder->at(cell_state_out_index).get();
+ auto output_alloc = _tensor_builder->at(output_index).get();
+
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto input_to_forget_weights_alloc = _tensor_builder->at(input_to_forget_weights_index).get();
+ auto input_to_cell_weights_alloc = _tensor_builder->at(input_to_cell_weights_index).get();
+ auto input_to_output_weights_alloc = _tensor_builder->at(input_to_output_weights_index).get();
+ auto recurrent_to_forget_weights_alloc =
+ _tensor_builder->at(recurrent_to_forget_weights_index).get();
+ auto recurrent_to_cell_weights_alloc = _tensor_builder->at(recurrent_to_cell_weights_index).get();
+ auto recurrent_to_output_weights_alloc =
+ _tensor_builder->at(recurrent_to_output_weights_index).get();
+
+ auto forget_gate_bias_alloc = _tensor_builder->at(forget_gate_bias_index).get();
+ auto cell_bias_alloc = _tensor_builder->at(cell_bias_index).get();
+ auto output_gate_bias_alloc = _tensor_builder->at(output_gate_bias_index).get();
+ auto output_state_in_alloc = _tensor_builder->at(output_state_in_index).get();
+ auto cell_state_in_alloc = _tensor_builder->at(cell_state_in_index).get();
+
+ auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
+
+ auto fn = std::make_unique<::arm_compute::NELSTMLayer>();
+
+ ::arm_compute::LSTMParams<::arm_compute::ITensor> lstm_params{};
+ if (has_cifg_param)
+ {
+ auto input_to_input_weights_alloc =
+ _tensor_builder->at(input_to_input_weights_index).get(); // optional
+ auto recurrent_to_input_weights_alloc =
+ _tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
+ auto cell_to_input_weights_handle =
+ has_peephole_param ? _tensor_builder->at(cell_to_input_weights_index).get()->handle()
+ : nullptr; // optional (non-cifg && peephole)
+ auto input_gate_bias_alloc = _tensor_builder->at(input_gate_bias_index).get(); // optional
+ lstm_params.set_cifg_params(input_to_input_weights_alloc->handle(),
+ recurrent_to_input_weights_alloc->handle(),
+ cell_to_input_weights_handle, input_gate_bias_alloc->handle());
+ }
+ if (has_peephole_param)
+ {
+ auto cell_to_forget_weights_alloc =
+ _tensor_builder->at(cell_to_forget_weights_index).get(); // optional
+ auto cell_to_output_weights_alloc =
+ _tensor_builder->at(cell_to_output_weights_index).get(); // optional
+ lstm_params.set_peephole_params(cell_to_forget_weights_alloc->handle(),
+ cell_to_output_weights_alloc->handle());
+ }
+ if (has_projection_param)
+ {
+ auto projection_weights_alloc = _tensor_builder->at(projection_weights_index).get(); // optional
+ auto projection_bias_handle = has_projection_bias
+ ? _tensor_builder->at(projection_bias_index).get()->handle()
+ : nullptr; // optional
+ lstm_params.set_projection_params(projection_weights_alloc->handle(), projection_bias_handle);
+ }
+
+ fn->configure(
+ input_alloc->handle(), input_to_forget_weights_alloc->handle(),
+ input_to_cell_weights_alloc->handle(), input_to_output_weights_alloc->handle(),
+ recurrent_to_forget_weights_alloc->handle(), recurrent_to_cell_weights_alloc->handle(),
+ recurrent_to_output_weights_alloc->handle(), forget_gate_bias_alloc->handle(),
+ cell_bias_alloc->handle(), output_gate_bias_alloc->handle(), output_state_in_alloc->handle(),
+ cell_state_in_alloc->handle(), scratch_buffer_alloc->handle(),
+ output_state_out_alloc->handle(), cell_state_out_alloc->handle(), output_alloc->handle(),
+ lstm_params, act_info, cell_clip, projection_clip);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Mul &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
+
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NEPixelWiseMultiplication>();
+
+ // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
+ fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), 1.0, // scale
+ arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
+
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+}
+
+void KernelGenerator::visit(const ir::operation::Neg &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NENegLayer>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Pack &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ auto axis{node.param().axis};
+
+ const auto output_rank = node.param().rank;
+
+ std::vector<ir::OperandIndex> input_indexes;
+ for (const auto &input_index : node.getInputs())
+ input_indexes.emplace_back(input_index);
+
+ auto output = _tensor_builder->at(output_index).get()->handle();
+ std::vector<arm_compute::ITensor *> inputs;
+ for (const auto &input_index : input_indexes)
+ inputs.emplace_back(_tensor_builder->at(input_index)->handle());
+
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
+
+ if (axis < 0)
+ axis += output_rank;
+ axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
+
+ auto fn = std::make_unique<::arm_compute::NEStackLayer>();
+
+ fn->configure(inputs, axis, output);
+
+ _return_fn = asAclFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::Pad &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
+ const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
+ const auto output_index{node.getOutputs().at(0)};
+ assert(_ctx.at(pad_index).data());
+
+ auto rank = node.param().rank;
+ auto pad_base = _ctx.at(pad_index).data()->base();
+
+ auto input = _tensor_builder->at(input_index).get()->handle();
+ auto output = _tensor_builder->at(output_index).get()->handle();
+
+ ::arm_compute::PaddingList padding_list;
+ padding_list.resize(rank);
+ for (int32_t n = 0; n < rank; ++n)
+ {
+ const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
+
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ const auto axis =
+ acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
+ padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
+ }
+
+ const auto input_type = _ctx.at(input_index).typeInfo();
+ UNUSED_RELEASE(input_type);
+ assert(input->info()->data_type() == acl_common::asDataType(input_type.type()));
+ assert(input->info()->quantization_info() ==
+ ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset()));
+ const auto pixel_value =
+ ::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
+
+ auto fn = std::make_unique<::arm_compute::NEPadLayer>();
+ fn->configure(input, output, padding_list, pixel_value);
+
+ _return_fn = asAclFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::Permute &node)
+{
+ const auto ofm_idx{node.getOutputs().at(0)};
+ const auto ifm_idx{node.getInputs().at(0)};
+ const auto permute_type = node.getPermuteType();
+ auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+ const auto rank = _ctx.at(ofm_idx).shape().rank();
+ assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
+
+ std::unique_ptr<::arm_compute::IFunction> fn;
+ arm_compute::PermutationVector pv;
+ if (permute_type == ir::operation::Permute::Type::NCHW_TO_NHWC && rank == 4)
+ {
+ // WHCN -> CWHN
+ pv = arm_compute::PermutationVector{2, 0, 1};
+
+ auto l = std::make_unique<::arm_compute::NEPermute>();
+
+ l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+
+ fn = std::move(l);
+ }
+ else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
+ {
+ // CWHN -> WHCN
+ pv = arm_compute::PermutationVector{1, 2, 0};
+
+ auto l = std::make_unique<::arm_compute::NEPermute>();
+
+ l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+
+ fn = std::move(l);
+ }
+ else
+ {
+ auto l = std::make_unique<::arm_compute::NECopy>();
+
+ l->configure(ifm_alloc->handle(), ofm_alloc->handle());
+
+ fn = std::move(l);
+ }
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::PReLU &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
+ const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto alpha_alloc = _tensor_builder->at(alpha_index).get();
+
+ std::unique_ptr<::arm_compute::IFunction> fn;
+
+ auto l = std::make_unique<::arm_compute::NEPReLU>();
+
+ l->configure(ifm_alloc->handle(), alpha_alloc->handle(), ofm_alloc->handle());
+
+ fn = std::move(l);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReduceMax &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ReduceMax::Input::INPUT)};
+ const auto &axes{node.param().axes};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = ifm_alloc->layout();
+
+ // Convert to ACL axes taking into account negative values and possible duplicates.
+ std::set<std::uint32_t> acl_axes;
+ const int ifm_rank = node.param().rank;
+ for (int axis : axes)
+ {
+ if (axis < 0)
+ axis += ifm_rank;
+ acl_axes.insert(
+ acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value());
+ }
+
+ arm_compute::Coordinates reduce_axes;
+ for (const auto axis : acl_axes)
+ {
+ reduce_axes.set(reduce_axes.num_dimensions(), axis);
+ }
+
+ auto fn = std::make_unique<::arm_compute::NEReduceOperation>();
+
+ fn->configure(ifm_alloc->handle(), reduce_axes, false, ofm_alloc->handle(),
+ ::arm_compute::ReduceOperation::MAX);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReduceMin &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ReduceMin::Input::INPUT)};
+ const auto &axes{node.param().axes};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = ifm_alloc->layout();
+
+ // Convert to ACL axes taking into account negative values and possible duplicates.
+ std::set<std::uint32_t> acl_axes;
+ const int ifm_rank = node.param().rank;
+ for (int axis : axes)
+ {
+ if (axis < 0)
+ axis += ifm_rank;
+ acl_axes.insert(
+ acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value());
+ }
+
+ arm_compute::Coordinates reduce_axes;
+ for (const auto axis : acl_axes)
+ {
+ reduce_axes.set(reduce_axes.num_dimensions(), axis);
+ }
+
+ auto fn = std::make_unique<::arm_compute::NEReduceOperation>();
+
+ fn->configure(ifm_alloc->handle(), reduce_axes, false, ofm_alloc->handle(),
+ ::arm_compute::ReduceOperation::MIN);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReduceSum &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ReduceSum::Input::INPUT)};
+ const auto &axes{node.param().axes};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = input_alloc->layout();
+
+ // Convert to ACL axes taking into account negative values and possible duplicates.
+ std::set<std::uint32_t> acl_axes;
+ const int input_rank = node.param().rank;
+ for (int axis : axes)
+ {
+ if (axis < 0)
+ axis += input_rank;
+ acl_axes.insert(
+ acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value());
+ }
+
+ arm_compute::Coordinates fixed_axes;
+ for (const auto axis : acl_axes)
+ {
+ fixed_axes.set(fixed_axes.num_dimensions(), axis);
+ }
+
+ auto fn = std::make_unique<::arm_compute::NEReduceSum>();
+
+ fn->configure(input_alloc->handle(), fixed_axes, false, output_alloc->handle());
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReLU &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<arm_compute::NEActivationLayer>();
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+
+ fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReLU1 &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
+
+ auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReLU6 &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
+
+ auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Reshape &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ // NOTE This operation must not be changed the layout from frontend to backend
+ // So, PermutationOperationPass makes layouts of frontend and backend the same.
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = output_alloc->layout();
+ assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
+ frontend_layout == backend_layout);
+ UNUSED_RELEASE(frontend_layout);
+ UNUSED_RELEASE(backend_layout);
+
+ auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
+
+ fn->configure(input_alloc->handle(), output_alloc->handle());
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+
+ const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NEScale>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(),
+ ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
+ ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::RNN &node)
+{
+ const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
+ const auto hidden_state_out_index{
+ node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
+
+ const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
+ const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
+ const auto recurrent_weights_index{
+ node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
+ const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
+ const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
+
+ const auto activation = node.param().activation;
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto hidden_state_out_alloc = _tensor_builder->at(hidden_state_out_index).get();
+
+ auto input_alloc = _tensor_builder->at(input_index).get();
+ auto weights_alloc = _tensor_builder->at(weights_index).get();
+ auto recurrent_weights_alloc = _tensor_builder->at(recurrent_weights_index).get();
+ auto bias_alloc = _tensor_builder->at(bias_index).get();
+ auto hidden_state_in_alloc = _tensor_builder->at(hidden_state_in_index).get();
+ auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
+
+ auto copy_layer = std::make_unique<::arm_compute::NECopy>();
+ copy_layer->configure(hidden_state_in_alloc->handle(), hidden_state_out_alloc->handle());
+ _return_fn = asAclFunction(std::move(copy_layer));
+
+ auto fn = std::make_unique<::arm_compute::NERNNLayerEx>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
+ fn->configure(input_alloc->handle(), weights_alloc->handle(), recurrent_weights_alloc->handle(),
+ bias_alloc->handle(), hidden_state_out_alloc->handle(), output_alloc->handle(),
+ act_info);
+ _return_fn = asAclFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::RSQRT &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NERsqrtLayer>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+
+ _return_fn = asAclFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::Squeeze &node)
+{
+ // Squeeze is identical to reshape except that it has an optional dimensions input.
+ // In addition, optional dims_index is ignored since output tensor already has squeezed shape
+ // by freezer and toco
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
+ const auto dims{node.param().dims};
+ const auto ndim{node.param().ndim};
+ (void)dims;
+ (void)ndim;
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+ auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
+ fn->configure(input_alloc->handle(), output_alloc->handle());
+ auto acl_fn = asAclFunction(std::move(fn));
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Tanh &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<arm_compute::NEActivationLayer>();
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
+
+ fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Softmax &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
+ const auto beta = node.param().beta;
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NESoftmaxLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
+
+ fn->configure(input_alloc->handle(), output_alloc->handle(), beta);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
+ const auto block_size_index{
+ node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+ const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto block_size_alloc = _tensor_builder->at(block_size_index).get();
+ auto paddings_alloc = _tensor_builder->at(paddings_index).get();
+
+ assert(_ctx.at(block_size_index).data());
+ assert(_ctx.at(paddings_index).data());
+
+ // NESpaceToBatchLayer has a bug that padding's values are 0 even when zero point of QASYMM8 is
+ // not 0.
+ auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayerEx>();
+
+ fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), paddings_alloc->handle(),
+ ofm_alloc->handle());
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
+
+ auto block_size = node.param().block_size;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayerEx>();
+
+ fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), block_size);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Split &node)
+{
+ // TODO Support this op by SubTensor
+ const auto ifm_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
+
+ assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
+
+ const auto ifm_rank = node.param().rank;
+ std::vector<ir::OperandIndex> output_indexes;
+ for (const auto &output : node.getOutputs())
+ output_indexes.emplace_back(output);
+
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ std::vector<arm_compute::ITensor *> output_allocs;
+ for (const auto &ofm_ind : output_indexes)
+ output_allocs.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = ifm_alloc->layout();
+ auto axis = node.param().axis;
+ if (axis < 0)
+ axis += ifm_rank;
+ axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
+
+ auto fn = std::make_unique<::arm_compute::NESplit>();
+
+ fn->configure(ifm_alloc->handle(), output_allocs, axis);
+
+ _return_fn = asAclFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::SQRT &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
+
+ auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
+
+ fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NEElementwiseSquaredDiff>();
+
+ fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Sub &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
+
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NEArithmeticSubtraction>();
+
+ fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+ arm_compute::ConvertPolicy::SATURATE);
+
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+}
+
+void KernelGenerator::visit(const ir::operation::Slice &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
+ const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
+ const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
+
+ auto outputData_alloc = _tensor_builder->at(output_index).get();
+ auto inputData_alloc = _tensor_builder->at(input_index).get();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = inputData_alloc->layout();
+
+ // Set initializers for indices data such as order of inputData
+ int input_rank = node.param().rank;
+ std::vector<int32_t> starts;
+ std::vector<int32_t> ends;
+ starts.resize(input_rank, 0);
+ ends.resize(input_rank, 0);
+ {
+ auto beginData_base = _ctx.at(begins_index).data()->base();
+ auto sizeData_base = _ctx.at(sizes_index).data()->base();
+ const int beginData_size = _ctx.at(begins_index).shape().num_elements();
+ const int sizeData_size = _ctx.at(sizes_index).shape().num_elements();
+
+ using ir::DataType;
+
+ UNUSED_RELEASE(beginData_size);
+ UNUSED_RELEASE(sizeData_size);
+
+ assert(_ctx.at(begins_index).typeInfo().type() == DataType::INT32);
+ assert(_ctx.at(sizes_index).typeInfo().type() == DataType::INT32);
+ assert(beginData_size == input_rank);
+ assert(sizeData_size == input_rank);
+
+ assert(beginData_base != nullptr);
+ for (int n = 0; n < input_rank; ++n)
+ {
+ auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
+ backend_layout)
+ .value();
+
+ int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n);
+ starts[axis] = begin_value;
+
+ int32_t size_value = *(reinterpret_cast<const int32_t *>(sizeData_base) + n);
+ ends[axis] = begin_value + size_value;
+ }
+ }
+
+ ::arm_compute::Coordinates starts_set;
+ ::arm_compute::Coordinates ends_set;
+
+ for (size_t i = 0; i < starts.size(); ++i)
+ {
+ starts_set.set(i, starts[i]);
+ ends_set.set(i, ends[i]);
+ }
+
+ auto fn = std::make_unique<::arm_compute::NESlice>();
+
+ fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::StridedSlice &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
+ const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
+ const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
+ const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
+
+ auto outputData_alloc = _tensor_builder->at(output_index).get();
+ auto inputData_alloc = _tensor_builder->at(input_index).get();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = inputData_alloc->layout();
+
+ // Set initializers for indices data such as order of inputData
+ int input_rank = node.param().rank;
+ std::vector<int32_t> starts;
+ std::vector<int32_t> ends;
+ std::vector<int32_t> strides;
+ starts.resize(input_rank, 0);
+ ends.resize(input_rank, 0);
+ strides.resize(input_rank, 0);
+ {
+ auto startData_base = _ctx.at(starts_index).data()->base();
+ auto endData_base = _ctx.at(ends_index).data()->base();
+ auto stridesData_base = _ctx.at(strides_index).data()->base();
+ const int startData_size = _ctx.at(starts_index).shape().num_elements();
+ const int endData_size = _ctx.at(ends_index).shape().num_elements();
+ const int stridesData_size = _ctx.at(strides_index).shape().num_elements();
+
+ using ir::DataType;
+
+ UNUSED_RELEASE(startData_size);
+ UNUSED_RELEASE(endData_size);
+ UNUSED_RELEASE(stridesData_size);
+
+ assert(_ctx.at(starts_index).typeInfo().type() == DataType::INT32);
+ assert(_ctx.at(ends_index).typeInfo().type() == DataType::INT32);
+ assert(_ctx.at(strides_index).typeInfo().type() == DataType::INT32);
+ assert(startData_size == input_rank);
+ assert(endData_size == input_rank);
+ assert(stridesData_size == input_rank);
+
+ assert(startData_base != nullptr);
+ for (int n = 0; n < input_rank; ++n)
+ {
+ auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
+ backend_layout)
+ .value();
+
+ int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n);
+ starts[axis] = start_value;
+
+ int32_t end_value = *(reinterpret_cast<const int32_t *>(endData_base) + n);
+ ends[axis] = end_value;
+
+ int32_t strides_value = *(reinterpret_cast<const int32_t *>(stridesData_base) + n);
+ strides[axis] = strides_value;
+ }
+ }
+
+ // Set mask bits such as order of inputData
+ // FIXME Take the layouts into account.
+ const auto begin_mask = acl_common::ReorderBits<int32_t>(node.param().begin_mask, input_rank);
+ const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank);
+ const auto shrink_axis_mask =
+ acl_common::ReorderBits<int32_t>(node.param().shrink_axis_mask, input_rank);
+
+ ::arm_compute::Coordinates starts_set;
+ ::arm_compute::Coordinates ends_set;
+ ::arm_compute::BiStrides strides_set;
+
+ for (size_t i = 0; i < starts.size(); ++i)
+ {
+ starts_set.set(i, starts[i]);
+ ends_set.set(i, ends[i]);
+ strides_set.set(i, strides[i]);
+ }
+
+ auto fn = std::make_unique<::arm_compute::NEStridedSlice>();
+
+ fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set,
+ strides_set, begin_mask, end_mask, shrink_axis_mask);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::TransposeConv &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
+ const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
+
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout);
+
+ const auto stride = node.param().stride;
+
+ assert((node.param().padding.type == ir::PaddingType::SAME) ||
+ (node.param().padding.type == ir::PaddingType::VALID));
+ auto padding = ir::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride,
+ ker_shape.W, ker_shape.H);
+
+ uint32_t invalid_horizontal = 0;
+ uint32_t invalid_vertical = 0;
+ if (node.param().padding.type == ir::PaddingType::VALID)
+ {
+ invalid_horizontal =
+ ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
+ invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
+ }
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ker_alloc = _tensor_builder->at(ker_index).get();
+
+ const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
+
+ auto fn = std::make_unique<::arm_compute::NETransposeConvLayer>();
+
+ fn->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info,
+ invalid_horizontal, invalid_vertical);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Transpose &node)
+{
+ const auto ofm_idx{node.getOutputs().at(0)};
+ const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
+ const auto &perm{node.param().perm};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
+ const auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = ifm_alloc->layout();
+
+ const auto rank = node.param().rank;
+ std::vector<std::int32_t> pv(perm.cbegin(), perm.cend());
+ auto backend_pv = ::onert::backend::acl_common::getARMComputePermutationVector(
+ rank, pv, frontend_layout, backend_layout);
+
+ std::unique_ptr<::arm_compute::IFunction> fn;
+
+ if (ifm_alloc->num_dimensions() <= 2 && ofm_alloc->num_dimensions() <= 2)
+ {
+ auto l = std::make_unique<::arm_compute::NETranspose>();
+
+ l->configure(ifm_alloc->handle(), ofm_alloc->handle());
+
+ fn = std::move(l);
+ }
+ else
+ {
+ auto l = std::make_unique<::arm_compute::NEPermute>();
+
+ l->configure(ifm_alloc->handle(), ofm_alloc->handle(), backend_pv);
+
+ fn = std::move(l);
+ }
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Unpack &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
+ auto axis{node.param().axis};
+
+ const auto input_rank = node.param().rank;
+
+ std::vector<ir::OperandIndex> output_indexes;
+ for (const auto &output_index : node.getOutputs())
+ output_indexes.emplace_back(output_index);
+
+ auto input = _tensor_builder->at(input_index).get()->handle();
+ std::vector<arm_compute::ITensor *> outputs;
+ for (const auto &output_index : output_indexes)
+ outputs.emplace_back(_tensor_builder->at(output_index)->handle());
+
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ if (axis < 0)
+ axis += input_rank;
+ axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
+
+ auto fn = std::make_unique<::arm_compute::NEUnstack>();
+
+ fn->configure(input, outputs, axis);
+
+ _return_fn = asAclFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::Add &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
+
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NEArithmeticAddition>();
+
+ fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+ arm_compute::ConvertPolicy::SATURATE);
+
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+}
+
+void KernelGenerator::visit(const ir::operation::Div &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
+
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NEElementwiseDivision>();
+
+ fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+}
+
+void KernelGenerator::visit(const ir::operation::Exp &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NEExpLayer>();
+
+ fn->configure(input_alloc->handle(), output_alloc->handle());
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Comparison &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
+ const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
+
+ const auto comparison_type = node.param().comparison_type;
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input0_alloc = _tensor_builder->at(input0_index).get();
+ auto input1_alloc = _tensor_builder->at(input1_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NEElementwiseComparison>();
+
+ fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(),
+ (arm_compute::ComparisonOperation)comparison_type);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Min &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NEElementwiseMin>();
+
+ fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Max &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::arm_compute::NEElementwiseMax>();
+
+ fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _return_fn = std::move(acl_fn);
+}
+
+void KernelGenerator::visit(const ir::operation::OneHot &node)
+{
+ const auto out_idx{node.getOutputs().at(0)};
+ const auto indices_idx{node.getInputs().at(ir::operation::OneHot::Input::INDICES)};
+ const auto depth = node.param().depth;
+ const auto on_value = node.param().on_value;
+ const auto off_value = node.param().off_value;
+ const auto axis = node.param().axis;
+
+ auto output_tensor = _tensor_builder->at(out_idx).get();
+ auto indices_tensor = _tensor_builder->at(indices_idx).get();
+
+ auto fn = std::make_unique<::arm_compute::CPPOneHotEx>();
+ fn->configure(indices_tensor->handle(), output_tensor->handle(), depth, on_value, off_value,
+ axis);
+ auto acl_fn = asAclFunction(std::move(fn));
+ _return_fn = std::move(acl_fn);
+}
+
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.h b/runtime/onert/backend/acl_neon/KernelGenerator.h
new file mode 100644
index 000000000..a29b07805
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
+
+#include <backend/IKernelGenerator.h>
+
+#include "ir/Operands.h"
+#include "TensorBuilder.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+class KernelGenerator : public IKernelGenerator
+{
+public:
+ KernelGenerator(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder);
+
+ void visit(const ir::OpSequence &) override;
+ void visit(const ir::operation::Abs &) override;
+ void visit(const ir::operation::ArgMax &) override;
+ void visit(const ir::operation::BatchToSpaceND &) override;
+ void visit(const ir::operation::Cast &) override;
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::DepthToSpace &) override;
+ void visit(const ir::operation::DepthwiseConv2D &) override;
+ void visit(const ir::operation::Dequantize &) override;
+ void visit(const ir::operation::MaxPool2D &) override;
+ void visit(const ir::operation::Mean &) override;
+ void visit(const ir::operation::AvgPool2D &) override;
+ void visit(const ir::operation::Concat &) override;
+ void visit(const ir::operation::EmbeddingLookup &) override;
+ void visit(const ir::operation::Floor &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::Gather &) override;
+ void visit(const ir::operation::HashtableLookup &) override;
+ void visit(const ir::operation::InstanceNorm &) override;
+ void visit(const ir::operation::L2Normalization &) override;
+ void visit(const ir::operation::L2Pool2D &) override;
+ void visit(const ir::operation::LocalResponseNormalization &) override;
+ void visit(const ir::operation::LogicalAnd &) override;
+ void visit(const ir::operation::LogicalNot &) override;
+ void visit(const ir::operation::LogicalOr &) override;
+ void visit(const ir::operation::Logistic &) override;
+ void visit(const ir::operation::LSTM &) override;
+ void visit(const ir::operation::Mul &) override;
+ void visit(const ir::operation::Neg &) override;
+ void visit(const ir::operation::Pack &) override;
+ void visit(const ir::operation::Pad &) override;
+ void visit(const ir::operation::Permute &) override;
+ void visit(const ir::operation::PReLU &) override;
+ void visit(const ir::operation::ReduceMax &) override;
+ void visit(const ir::operation::ReduceMin &) override;
+ void visit(const ir::operation::ReduceSum &) override;
+ void visit(const ir::operation::ReLU &) override;
+ void visit(const ir::operation::ReLU1 &) override;
+ void visit(const ir::operation::ReLU6 &) override;
+ void visit(const ir::operation::Reshape &) override;
+ void visit(const ir::operation::ResizeBilinear &) override;
+ void visit(const ir::operation::RNN &) override;
+ void visit(const ir::operation::RSQRT &) override;
+ void visit(const ir::operation::Squeeze &) override;
+ void visit(const ir::operation::Tanh &) override;
+ void visit(const ir::operation::Softmax &) override;
+ void visit(const ir::operation::SpaceToBatchND &) override;
+ void visit(const ir::operation::SpaceToDepth &) override;
+ void visit(const ir::operation::Split &) override;
+ void visit(const ir::operation::SQRT &) override;
+ void visit(const ir::operation::SquaredDifference &) override;
+ void visit(const ir::operation::Sub &) override;
+ void visit(const ir::operation::Slice &) override;
+ void visit(const ir::operation::StridedSlice &) override;
+ void visit(const ir::operation::TransposeConv &) override;
+ void visit(const ir::operation::Transpose &) override;
+ void visit(const ir::operation::Unpack &) override;
+ void visit(const ir::operation::Add &) override;
+ void visit(const ir::operation::Div &) override;
+ void visit(const ir::operation::Exp &) override;
+ void visit(const ir::operation::Comparison &) override;
+ void visit(const ir::operation::Min &) override;
+ void visit(const ir::operation::Max &) override;
+ void visit(const ir::operation::OneHot &) override;
+
+private:
+ const ir::Operands &_ctx;
+ std::shared_ptr<TensorBuilder> _tensor_builder;
+ ir::Layout _current_op_seq_layout;
+};
+
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/backend/acl_neon/Optimizer.cc b/runtime/onert/backend/acl_neon/Optimizer.cc
new file mode 100644
index 000000000..2948cab09
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/Optimizer.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Optimizer.h"
+
+#include "ParentInfo.h"
+
+#include <cassert>
+#include <ir/LoweredGraph.h>
+#include <util/logging.h>
+#include "AclSubTensorAnalyzer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+Optimizer::Optimizer(BackendContext *context)
+ : _context{context},
+ _tensor_builder{std::dynamic_pointer_cast<TensorBuilder>(context->tensor_builder)}
+{
+ assert(context);
+}
+
+void Optimizer::optimize()
+{
+ // Concat elimination (build subtensor info)
+ {
+ acl_common::AclSubTensorAnalyzer sa{*_context->graph()};
+ for (auto op_info : _context->operation_list())
+ {
+ auto &op = _context->graph()->operations().at(op_info.index);
+ sa.setLayout(op_info.layout);
+ op.accept(sa);
+ }
+
+ _tensor_builder->parent_map(sa.releaseParentMap());
+ }
+}
+
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_neon/Optimizer.h b/runtime/onert/backend/acl_neon/Optimizer.h
new file mode 100644
index 000000000..5fe0d519c
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/Optimizer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__
+#define __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__
+
+#include <backend/IOptimizer.h>
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+class Optimizer : public IOptimizer
+{
+public:
+ Optimizer(BackendContext *context);
+
+ void optimize() override;
+
+private:
+ BackendContext *_context;
+ std::shared_ptr<TensorBuilder> _tensor_builder;
+};
+
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__
diff --git a/runtime/onert/backend/acl_neon/ShapeFixer.cc b/runtime/onert/backend/acl_neon/ShapeFixer.cc
new file mode 100644
index 000000000..756222f4b
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ShapeFixer.cc
@@ -0,0 +1,437 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ShapeFixer.h"
+
+#include <arm_compute/runtime/NEON/functions/NESoftmaxLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEArithmeticAddition.h>
+#include <arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h>
+#include <arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h>
+#include <arm_compute/runtime/NEON/functions/NEPoolingLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEActivationLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h>
+
+#include <Convert.h>
+#include <Swizzle.h>
+
+#include "ir/Index.h"
+#include "exec/NopFunction.h"
+#include "util/logging.h"
+#include "util/Utils.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+using ::onert::backend::acl_common::asAclFunction;
+
+ShapeFixer::ShapeFixer(const ir::Operands &ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder)
+ : _ctx(ctx), _tensor_builder(tensor_builder)
+{
+ assert(tensor_builder);
+}
+
+void ShapeFixer::visit(const ir::operation::Abs &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::ArgMax &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::BatchToSpaceND &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
+ _tensor_builder->dimCorrection(ofm_index, false);
+ _tensor_builder->dimCorrection(ifm_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::Cast &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Conv2D &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::DepthToSpace &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::DepthwiseConv2D &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Dequantize &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::MaxPool2D &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Mean &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::AvgPool2D &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Concat &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ _tensor_builder->dimCorrection(ofm_index, false);
+ for (const auto &inputs : node.getInputs())
+ _tensor_builder->dimCorrection(inputs, false);
+}
+
+void ShapeFixer::visit(const ir::operation::EmbeddingLookup &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
+ _tensor_builder->dimCorrection(values_index, false);
+ _tensor_builder->dimCorrection(output_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::Exp &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Floor &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::FullyConnected &node)
+{
+ using ir::operation::FullyConnected;
+ const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+ const auto input_rank = _ctx.at(input_index).shape().rank();
+ // Check for reshaping input's shape into rank-2
+ if (input_rank == 3 || input_rank == 4)
+ _tensor_builder->dimCorrection(input_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::HashtableLookup &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
+ _tensor_builder->dimCorrection(values_index, false);
+ _tensor_builder->dimCorrection(output_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::Gather &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
+ const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
+ _tensor_builder->dimCorrection(ofm_index, false);
+ _tensor_builder->dimCorrection(ifm_index, false);
+ _tensor_builder->dimCorrection(indices_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::InstanceNorm &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::L2Normalization &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::L2Pool2D &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::LocalResponseNormalization &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::LogicalAnd &node)
+{
+ const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
+ const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
+
+ if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank());
+
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<ir::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::LogicalNot &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::LogicalOr &node)
+{
+ const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
+ const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
+
+ if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank());
+
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<ir::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::Logistic &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::LSTM &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Pack &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ _tensor_builder->dimCorrection(ofm_index, false);
+ for (const auto &inputs : node.getInputs())
+ {
+ _tensor_builder->dimCorrection(inputs, false);
+ const auto ofm_rank = _ctx.at(ofm_index).shape().rank();
+
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<ir::Shape &>(_ctx.at(inputs).shape()).extendRank(ofm_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::Pad &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+ _tensor_builder->dimCorrection(input_index, false);
+ _tensor_builder->dimCorrection(output_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::Mul &node)
+{
+ const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
+
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::Neg &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Permute &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::PReLU &node)
+{
+ const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
+ const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
+
+ if (!(_ctx.at(ifm_index).shape() == _ctx.at(alpha_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(ifm_index).shape().rank(), _ctx.at(alpha_index).shape().rank());
+ const_cast<ir::Shape &>(_ctx.at(ifm_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(alpha_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::ReduceMax &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::ReduceMin &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::ReduceSum &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::ReLU &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::ReLU1 &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::ReLU6 &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Reshape &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
+
+ _tensor_builder->dimCorrection(input_index, false);
+ _tensor_builder->dimCorrection(output_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::ResizeBilinear &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::RNN &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Comparison &node)
+{
+ const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
+ const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
+
+ if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank());
+
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<ir::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::RSQRT &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Squeeze &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
+ _tensor_builder->dimCorrection(input_index, false);
+ _tensor_builder->dimCorrection(output_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::Tanh &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Slice &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::StridedSlice &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
+ _tensor_builder->dimCorrection(ofm_index, false);
+ _tensor_builder->dimCorrection(ifm_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::Softmax &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::SpaceToBatchND &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
+ _tensor_builder->dimCorrection(ofm_index, false);
+ _tensor_builder->dimCorrection(ifm_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::SpaceToDepth &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Split &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
+ _tensor_builder->dimCorrection(input_index, false);
+ for (const auto &output : node.getOutputs())
+ _tensor_builder->dimCorrection(output, false);
+}
+
+void ShapeFixer::visit(const ir::operation::SQRT &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::SquaredDifference &node)
+{
+ const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
+
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::Sub &node)
+{
+ const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
+
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::TransposeConv &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Transpose &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Unpack &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
+ _tensor_builder->dimCorrection(input_index, false);
+ for (const auto &output_index : node.getOutputs())
+ _tensor_builder->dimCorrection(output_index, false);
+}
+
+void ShapeFixer::visit(const ir::operation::Add &node)
+{
+ const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
+
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+ const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::Div &node)
+{
+ const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
+
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::Min &node)
+{
+ const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
+
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::Max &node)
+{
+ const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
+
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
+ const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::OneHot &) { /* Do NOTHING */}
+
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_neon/ShapeFixer.h b/runtime/onert/backend/acl_neon/ShapeFixer.h
new file mode 100644
index 000000000..434cfd240
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ShapeFixer.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_NEON_SHAPE_FIXER_H__
+#define __ONERT_BACKEND_ACL_NEON_SHAPE_FIXER_H__
+
+#include <backend/IShapeFixer.h>
+
+#include "ir/Operands.h"
+#include "TensorBuilder.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+class ShapeFixer : public IShapeFixer
+{
+public:
+ ShapeFixer(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder);
+
+ void visit(const ir::operation::Abs &) override;
+ void visit(const ir::operation::ArgMax &) override;
+ void visit(const ir::operation::BatchToSpaceND &) override;
+ void visit(const ir::operation::Cast &) override;
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::DepthToSpace &) override;
+ void visit(const ir::operation::DepthwiseConv2D &) override;
+ void visit(const ir::operation::Dequantize &) override;
+ void visit(const ir::operation::MaxPool2D &) override;
+ void visit(const ir::operation::Mean &) override;
+ void visit(const ir::operation::AvgPool2D &) override;
+ void visit(const ir::operation::Concat &) override;
+ void visit(const ir::operation::EmbeddingLookup &) override;
+ void visit(const ir::operation::Exp &) override;
+ void visit(const ir::operation::Floor &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::Gather &) override;
+ void visit(const ir::operation::HashtableLookup &) override;
+ void visit(const ir::operation::InstanceNorm &) override;
+ void visit(const ir::operation::L2Normalization &) override;
+ void visit(const ir::operation::L2Pool2D &) override;
+ void visit(const ir::operation::LocalResponseNormalization &) override;
+ void visit(const ir::operation::LogicalAnd &) override;
+ void visit(const ir::operation::LogicalNot &) override;
+ void visit(const ir::operation::LogicalOr &) override;
+ void visit(const ir::operation::Logistic &) override;
+ void visit(const ir::operation::LSTM &) override;
+ void visit(const ir::operation::Mul &) override;
+ void visit(const ir::operation::Neg &) override;
+ void visit(const ir::operation::Pack &) override;
+ void visit(const ir::operation::Pad &) override;
+ void visit(const ir::operation::Permute &) override;
+ void visit(const ir::operation::PReLU &) override;
+ void visit(const ir::operation::ReduceMax &) override;
+ void visit(const ir::operation::ReduceMin &) override;
+ void visit(const ir::operation::ReduceSum &) override;
+ void visit(const ir::operation::ReLU &) override;
+ void visit(const ir::operation::ReLU1 &) override;
+ void visit(const ir::operation::ReLU6 &) override;
+ void visit(const ir::operation::Reshape &) override;
+ void visit(const ir::operation::ResizeBilinear &) override;
+ void visit(const ir::operation::RNN &) override;
+ void visit(const ir::operation::RSQRT &) override;
+ void visit(const ir::operation::Squeeze &) override;
+ void visit(const ir::operation::Tanh &) override;
+ void visit(const ir::operation::Softmax &) override;
+ void visit(const ir::operation::SpaceToBatchND &) override;
+ void visit(const ir::operation::SpaceToDepth &) override;
+ void visit(const ir::operation::Split &) override;
+ void visit(const ir::operation::SQRT &) override;
+ void visit(const ir::operation::SquaredDifference &) override;
+ void visit(const ir::operation::Sub &) override;
+ void visit(const ir::operation::Slice &) override;
+ void visit(const ir::operation::StridedSlice &) override;
+ void visit(const ir::operation::TransposeConv &) override;
+ void visit(const ir::operation::Transpose &) override;
+ void visit(const ir::operation::Unpack &) override;
+ void visit(const ir::operation::Add &) override;
+ void visit(const ir::operation::Div &) override;
+ void visit(const ir::operation::Comparison &) override;
+ void visit(const ir::operation::Min &) override;
+ void visit(const ir::operation::Max &) override;
+ void visit(const ir::operation::OneHot &) override;
+
+private:
+ const ir::Operands &_ctx;
+ std::shared_ptr<TensorBuilder> _tensor_builder;
+};
+
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_NEON_SHAPE_FIXER_H__
diff --git a/runtime/onert/backend/acl_neon/TensorBuilder.h b/runtime/onert/backend/acl_neon/TensorBuilder.h
new file mode 100644
index 000000000..070dc20ac
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/TensorBuilder.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_NEON_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_ACL_NEON_TENSOR_BUILDER_H__
+
+#include <AclTensorBuilder.h>
+
+#include "operand/NETensor.h"
+#include "operand/NESubTensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+using TensorBuilder =
+ acl_common::AclTensorBuilder<operand::INETensor, operand::NETensor, operand::NESubTensor>;
+
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_NEON_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/acl_neon/TensorManager.h b/runtime/onert/backend/acl_neon/TensorManager.h
new file mode 100644
index 000000000..3ec9efa8f
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/TensorManager.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_NEON_TENSOR_MANAGER_H__
+#define __ONERT_BACKEND_ACL_NEON_TENSOR_MANAGER_H__
+
+#include <arm_compute/runtime/Allocator.h>
+#include <arm_compute/runtime/PoolManager.h>
+#include <arm_compute/runtime/OffsetLifetimeManager.h>
+#include <arm_compute/runtime/MemoryManagerOnDemand.h>
+#include <arm_compute/runtime/MemoryGroup.h>
+
+#include <AclMemoryManager.h>
+#include <AclLinearMemoryManager.h>
+#include <AclInternalBufferManager.h>
+#include <AclTensorManager.h>
+
+#include "operand/NETensor.h"
+#include "operand/NESubTensor.h"
+
+#include "util/logging.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+using MemoryManager =
+ acl_common::AclMemoryManager<operand::INETensor, operand::NETensor, operand::NESubTensor>;
+
+using LinearMemoryManager = acl_common::AclLinearMemoryManager<
+ operand::INETensor, operand::NETensor, operand::NESubTensor,
+ ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
+ ::arm_compute::OffsetLifetimeManager, ::arm_compute::Allocator, ::arm_compute::MemoryGroup>;
+
+using InternalBufferManager = acl_common::AclInternalBufferManager<
+ ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
+ ::arm_compute::OffsetLifetimeManager, ::arm_compute::Allocator>;
+
+using TensorManager = acl_common::AclTensorManager<acl_neon::operand::INETensor, operand::NETensor,
+ operand::NESubTensor>;
+
+TensorManager *createTensorManager(bool is_linear_executor)
+{
+ if (is_linear_executor)
+ {
+ VERBOSE(acl_neon_createTensorManager) << "AclTensorManager as Linear" << std::endl;
+ return new TensorManager(new MemoryManager(), new LinearMemoryManager(),
+ new InternalBufferManager());
+ }
+ else
+ {
+ VERBOSE(acl_neon_createTensorManager) << "AclTensorManager" << std::endl;
+ return new TensorManager(new MemoryManager(), new MemoryManager(), new InternalBufferManager());
+ }
+}
+
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_NEON_TENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/acl_neon/acl_neon.cc b/runtime/onert/backend/acl_neon/acl_neon.cc
new file mode 100644
index 000000000..f490d132d
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/acl_neon.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <util/logging.h>
+
+#include "Backend.h"
+
+extern "C" {
+onert::backend::Backend *onert_backend_create()
+{
+ VERBOSE(onert_backend_create) << "'acl_neon' loaded\n";
+ return new onert::backend::acl_neon::Backend;
+}
+
+void onert_backend_destroy(onert::backend::Backend *backend)
+{
+ VERBOSE(onert_backend_create) << "'acl_neon' unloaded\n";
+ delete backend;
+}
+}
diff --git a/runtime/onert/backend/acl_neon/operand/INETensor.cc b/runtime/onert/backend/acl_neon/operand/INETensor.cc
new file mode 100644
index 000000000..179ed691a
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/operand/INETensor.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "INETensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+namespace operand
+{
+
+void INETensor::access(const std::function<void(ITensor &tensor)> &fn) { fn(*this); }
+
+} // namespace operand
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_neon/operand/INETensor.h b/runtime/onert/backend/acl_neon/operand/INETensor.h
new file mode 100644
index 000000000..db0ce6fdc
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/operand/INETensor.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_NEON_OPERAND_I_NE_TENSOR_H__
+#define __ONERT_BACKEND_ACL_NEON_OPERAND_I_NE_TENSOR_H__
+
+#include <arm_compute/core/ITensor.h>
+
+#include <IACLTensor.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+namespace operand
+{
+
+class INETensor : public acl_common::IACLTensor
+{
+public:
+ const arm_compute::ITensor *handle() const override = 0;
+ arm_compute::ITensor *handle() override = 0;
+ void access(const std::function<void(ITensor &tensor)> &fn) final;
+};
+
+} // namespace operand
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_NEON_OPERAND_I_NE_TENSOR_H__
diff --git a/runtime/onert/backend/acl_neon/operand/NESubTensor.cc b/runtime/onert/backend/acl_neon/operand/NESubTensor.cc
new file mode 100644
index 000000000..457addd55
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/operand/NESubTensor.cc
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NESubTensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+namespace operand
+{
+
+NESubTensor::NESubTensor(INETensor *parent, const arm_compute::TensorShape &tensor_shape,
+ const arm_compute::Coordinates &coords, size_t rank, bool extend_parent)
+ : _ne_sub_tensor(std::make_shared<arm_compute::SubTensor>(parent->handle(), tensor_shape,
+ coords, extend_parent)),
+ _rank{rank}
+{
+ // DO NOTHING
+}
+
+const arm_compute::SubTensor *NESubTensor::handle() const { return _ne_sub_tensor.get(); }
+
+arm_compute::SubTensor *NESubTensor::handle() { return _ne_sub_tensor.get(); }
+
+} // namespace operand
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_neon/operand/NESubTensor.h b/runtime/onert/backend/acl_neon/operand/NESubTensor.h
new file mode 100644
index 000000000..e7f77d7fc
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/operand/NESubTensor.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_NEON_OPERAND_NE_SUB_TENSOR_H__
+#define __ONERT_BACKEND_ACL_NEON_OPERAND_NE_SUB_TENSOR_H__
+
+#include <arm_compute/runtime/SubTensor.h>
+#include "INETensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+namespace operand
+{
+
+class NESubTensor : public INETensor
+{
+public:
+ NESubTensor() = delete;
+
+public:
+ NESubTensor(INETensor *parent, const arm_compute::TensorShape &tensor_shape,
+ const arm_compute::Coordinates &coords, size_t rank, bool extend_parent = false);
+
+public:
+ size_t num_dimensions() const final { return _rank; }
+
+public:
+ const arm_compute::SubTensor *handle() const override;
+ arm_compute::SubTensor *handle() override;
+
+public:
+ // This method is used to prevent the use of memcpy for SubTensor
+ bool has_padding() const override { return true; }
+
+private:
+ std::shared_ptr<arm_compute::SubTensor> _ne_sub_tensor;
+ size_t _rank;
+};
+
+} // namespace operand
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_NEON_OPERAND_NE_SUB_TENSOR_H__
diff --git a/runtime/onert/backend/acl_neon/operand/NETensor.cc b/runtime/onert/backend/acl_neon/operand/NETensor.cc
new file mode 100644
index 000000000..53dbb3021
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/operand/NETensor.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <arm_compute/runtime/Memory.h>
+#include <arm_compute/runtime/MemoryRegion.h>
+#include "NETensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+namespace operand
+{
+
+NETensor::NETensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses)
+ : _ne_tensor(std::make_shared<arm_compute::Tensor>()), _rank{rank}, _num_uses{num_uses}
+{
+ allocator()->init(info);
+}
+
+const arm_compute::Tensor *NETensor::handle() const { return _ne_tensor.get(); }
+
+arm_compute::Tensor *NETensor::handle() { return _ne_tensor.get(); }
+
+arm_compute::TensorAllocator *NETensor::allocator() { return _ne_tensor->allocator(); }
+
+} // namespace operand
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_neon/operand/NETensor.h b/runtime/onert/backend/acl_neon/operand/NETensor.h
new file mode 100644
index 000000000..0dd81afec
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/operand/NETensor.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_NEON_OPERAND_NE_TENSOR_H__
+#define __ONERT_BACKEND_ACL_NEON_OPERAND_NE_TENSOR_H__
+
+#include <arm_compute/core/TensorInfo.h>
+#include <arm_compute/runtime/Tensor.h>
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "INETensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+namespace operand
+{
+
+class NETensor : public INETensor
+{
+public:
+ NETensor() = delete;
+
+public:
+ NETensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses);
+
+public:
+ size_t num_dimensions() const final { return _rank; }
+
+public:
+ const arm_compute::Tensor *handle() const override;
+ arm_compute::Tensor *handle() override;
+ size_t num_uses() const { return _num_uses; }
+
+public:
+ arm_compute::TensorAllocator *allocator();
+
+private:
+ std::shared_ptr<arm_compute::Tensor> _ne_tensor;
+ size_t _rank;
+ size_t _num_uses;
+};
+
+} // namespace operand
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_NEON_OPERAND_NE_TENSOR_H__
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
new file mode 100644
index 000000000..d12c809cd
--- /dev/null
+++ b/runtime/onert/backend/cpu/Backend.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_BACKEND_H__
+#define __ONERT_BACKEND_CPU_BACKEND_H__
+
+#include "Config.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+#include "ShapeFixer.h"
+
+#include <backend/Backend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+ Backend() : _config{std::make_shared<Config>()} {}
+
+ std::shared_ptr<IConfig> config() const override { return _config; }
+
+ std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
+ const std::shared_ptr<custom::IKernelBuilder> &kb,
+ bool) const override
+ {
+ const auto &operands = graph.operands();
+ auto context = std::make_unique<BackendContext>(this, &graph);
+ auto tb = std::make_shared<TensorBuilder>();
+ context->tensor_builder = tb;
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, tb, kb);
+ context->shape_fixer = std::make_shared<ShapeFixer>(operands);
+ context->tensor_register = nullptr;
+ context->optimizer = nullptr;
+ return context;
+ }
+
+private:
+ std::shared_ptr<IConfig> _config;
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_BACKEND_H__
diff --git a/runtime/onert/backend/cpu/CMakeLists.txt b/runtime/onert/backend/cpu/CMakeLists.txt
new file mode 100644
index 000000000..ed2dcd110
--- /dev/null
+++ b/runtime/onert/backend/cpu/CMakeLists.txt
@@ -0,0 +1,14 @@
+set(LIB_ONERT_BACKEND_CPU onert_backend_cpu)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_CPU} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_lib_misc nnfw_lib_cker)
+target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE ${LIB_ONERT_BACKEND_CPU_COMMON})
+target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_coverage)
+
+set_target_properties(${LIB_ONERT_BACKEND_CPU} PROPERTIES OUTPUT_NAME backend_cpu)
+
+install(TARGETS ${LIB_ONERT_BACKEND_CPU} DESTINATION lib)
diff --git a/runtime/onert/backend/cpu/Config.cc b/runtime/onert/backend/cpu/Config.cc
new file mode 100644
index 000000000..cea8095b1
--- /dev/null
+++ b/runtime/onert/backend/cpu/Config.cc
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Config.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+bool Config::initialize() { return true; }
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/Config.h b/runtime/onert/backend/cpu/Config.h
new file mode 100644
index 000000000..e7818fa04
--- /dev/null
+++ b/runtime/onert/backend/cpu/Config.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_CONFIG_H__
+#define __ONERT_BACKEND_CPU_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+class Config : public IConfig
+{
+public:
+ std::string id() override { return "cpu"; }
+ bool initialize() override;
+ bool SupportPermutation() override { return true; }
+
+ std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_CONFIG_H__
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/backend/cpu/ConstantInitializer.cc
new file mode 100644
index 000000000..a77a48b30
--- /dev/null
+++ b/runtime/onert/backend/cpu/ConstantInitializer.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConstantInitializer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
+ const std::shared_ptr<TensorBuilder> &tensor_builder)
+ : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+{
+ // DO NOTHING
+}
+
+void ConstantInitializer::visit(const ir::operation::Conv2D &node)
+{
+ const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
+ const auto &kernel_obj = _operands.at(kernel_index);
+ registerCopyInitializer(kernel_index, kernel_obj);
+
+ const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
+ const auto &bias_obj = _operands.at(bias_index);
+ registerCopyInitializer(bias_index, bias_obj);
+}
+
+void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
+{
+ const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
+ const auto &kernel_obj = _operands.at(kernel_index);
+ registerCopyInitializer(kernel_index, kernel_obj);
+
+ const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
+ const auto &bias_obj = _operands.at(bias_index);
+ registerCopyInitializer(bias_index, bias_obj);
+}
+
+void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
+{
+ const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
+ const auto &weight_obj = _operands.at(weight_index);
+ registerCopyInitializer(weight_index, weight_obj);
+
+ const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
+ const auto &bias_obj = _operands.at(bias_index);
+ registerCopyInitializer(bias_index, bias_obj);
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h
new file mode 100644
index 000000000..bd06c64d1
--- /dev/null
+++ b/runtime/onert/backend/cpu/ConstantInitializer.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
+#define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
+
+#include "TensorBuilder.h"
+
+#include <backend/IConstantInitializer.h>
+#include <ir/Operands.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+class ConstantInitializer : public IConstantInitializer
+{
+public:
+ ConstantInitializer(const ir::Operands &operands,
+ const std::shared_ptr<TensorBuilder> &tensor_builder);
+
+public:
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::DepthwiseConv2D &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+
+private:
+ std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
+
+private:
+ std::shared_ptr<TensorBuilder> _tensor_builder;
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
new file mode 100644
index 000000000..86764dd06
--- /dev/null
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -0,0 +1,932 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "kernel/AbsLayer.h"
+#include "kernel/AddLayer.h"
+#include "kernel/AvgPoolLayer.h"
+#include "kernel/CastLayer.h"
+#include "kernel/CompareLayer.h"
+#include "kernel/ConcatLayer.h"
+#include "kernel/ConvolutionLayer.h"
+#include "kernel/DepthwiseConvolutionLayer.h"
+#include "kernel/DivLayer.h"
+#include "kernel/ExpLayer.h"
+#include "kernel/FullyConnectedLayer.h"
+#include "kernel/GatherLayer.h"
+#include "kernel/LogisticLayer.h"
+#include "kernel/MaxLayer.h"
+#include "kernel/MaxPoolLayer.h"
+#include "kernel/MinLayer.h"
+#include "kernel/MulLayer.h"
+#include "kernel/OneHotLayer.h"
+#include "kernel/OperationUtils.h"
+#include "kernel/PackLayer.h"
+#include "kernel/PadLayer.h"
+#include "kernel/PermuteLayer.h"
+#include "kernel/ReduceLayer.h"
+#include "kernel/ReshapeLayer.h"
+#include "kernel/RsqrtLayer.h"
+#include "kernel/ShapeLayer.h"
+#include "kernel/SinLayer.h"
+#include "kernel/SliceLayer.h"
+#include "kernel/SoftMaxLayer.h"
+#include "kernel/StridedSliceLayer.h"
+#include "kernel/SplitLayer.h"
+#include "kernel/SubLayer.h"
+#include "kernel/TanhLayer.h"
+#include "kernel/TransposeLayer.h"
+#include "kernel/UnpackLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+KernelGenerator::KernelGenerator(
+ const ir::Operands &operand_ctx, const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builer)
+ : _ctx(operand_ctx), _tensor_builder(tensor_builder), _kernel_builder(kernel_builer),
+ _current_op_seq_layout(ir::Layout::UNKNOWN)
+{
+ // DO NOTHING
+}
+
+void KernelGenerator::visit(const ir::OpSequence &op_seq)
+{
+ // TODO Move this to IKernelGenerator
+ // (all derivatives have the same implementation for this)
+ assert(!_return_fn_seq);
+ _return_fn_seq = std::make_unique<exec::FunctionSequence>();
+ _current_op_seq_layout = op_seq.getLayout();
+ for (const auto &e : op_seq.operations())
+ {
+ const auto &node = *(e.node);
+ node.accept(*this);
+ _return_fn_seq->append(releaseFunction());
+
+ // NOTE Permute node has tensors of the other backends
+ if (node.opcode() != ir::OpCode::Permute)
+ {
+ for (const auto &ind : node.getInputs() + node.getOutputs())
+ {
+ auto tensor = _tensor_builder->at(ind);
+ if (tensor)
+ {
+ tensor->increase_ref();
+ }
+ }
+ }
+ }
+}
+
+void KernelGenerator::visit(const ir::operation::Conv2D &node)
+{
+ using ir::operation::Conv2D;
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
+
+ const auto stride = node.param().stride;
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto &ker_shape = _ctx.at(ker_index).shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+ const auto padding_type = node.param().padding.type;
+ const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
+ ker_width, ker_height);
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ker_alloc = _tensor_builder->at(ker_index).get();
+ auto bias_alloc = _tensor_builder->at(bias_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::ConvolutionLayer>();
+
+ fn->configure(ifm_alloc, ker_alloc, bias_alloc, padding_type, padding.left, padding.right,
+ padding.top, padding.bottom, stride.horizontal, stride.vertical, activation,
+ ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
+{
+ using ir::operation::DepthwiseConv2D;
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
+
+ const auto stride = node.param().stride;
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ // Kernel format is [1, kernel_height, kernel_width, depth_out].
+ const auto &ker_shape = _ctx.at(ker_index).shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+ const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
+ ker_width, ker_height);
+ const auto multiplier = node.param().multiplier;
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ker_alloc = _tensor_builder->at(ker_index).get();
+ auto bias_alloc = _tensor_builder->at(bias_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::DepthwiseConvolutionLayer>();
+
+ fn->configure(ifm_alloc, ker_alloc, bias_alloc, padding.left, padding.right, padding.top,
+ padding.bottom, stride.horizontal, stride.vertical, multiplier, activation,
+ ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
+
+ const auto kh = node.param().kh;
+ const auto kw = node.param().kw;
+
+ const auto stride = node.param().stride;
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto padding =
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::MaxPoolLayer>();
+
+ fn->configure(ifm_alloc, padding.left, padding.right, padding.top, padding.bottom,
+ stride.horizontal, stride.vertical, kw, kh, activation, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
+
+ const auto kh = node.param().kh;
+ const auto kw = node.param().kw;
+ const auto stride = node.param().stride;
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto padding =
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::AvgPoolLayer>();
+
+ fn->configure(ifm_alloc, padding.left, padding.right, padding.top, padding.bottom,
+ stride.horizontal, stride.vertical, kw, kh, activation, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Concat &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+
+ const auto rank = _ctx.at(ofm_index).shape().rank();
+ const auto axis =
+ ::onert::backend::cpu::kernel::getAxis(rank, node.param().axis, _current_op_seq_layout);
+
+ auto output_alloc = _tensor_builder->at(ofm_index).get();
+
+ std::vector<const operand::Tensor *> input_tensors;
+ for (auto &ifm_idx : node.getInputs())
+ input_tensors.emplace_back(_tensor_builder->at(ifm_idx).get());
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::ConcatLayer>();
+
+ fn->configure(input_tensors, axis, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::FullyConnected &node)
+{
+ using ir::operation::FullyConnected;
+
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+ const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+ const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+ const auto activation = node.param().activation;
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+ auto weight_alloc = _tensor_builder->at(weight_index).get();
+ auto bias_alloc = _tensor_builder->at(bias_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::FullyConnectedLayer>();
+
+ fn->configure(input_alloc, weight_alloc, bias_alloc, activation, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Reshape &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ // optional 2nd input
+ operand::Tensor *shape_alloc = nullptr;
+
+ if (node.getInputs().size() == 2)
+ {
+ const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
+ shape_alloc = _tensor_builder->at(shape_index).get();
+ }
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::ReshapeLayer>();
+
+ fn->configure(input_alloc, shape_alloc, output_alloc);
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Squeeze &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ // Squeeze can share same kernel with reshape
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::ReshapeLayer>();
+
+ fn->configure(input_alloc, nullptr, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Softmax &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
+
+ const auto beta = node.param().beta;
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::SoftMaxLayer>();
+
+ fn->configure(input_alloc, beta, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Add &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
+
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::AddLayer>();
+
+ fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Comparison &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto comparison_type = node.param().comparison_type;
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::CompareLayer>();
+
+ fn->configure(lhs_alloc, rhs_alloc, comparison_type, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Gather &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
+ const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+ auto indices_alloc = _tensor_builder->at(indices_index).get();
+
+ const auto backend_layout = output_alloc->layout();
+ UNUSED_RELEASE(backend_layout);
+
+ // NOTE The frontend layout and backend layout must be the same for this operation.
+ // If not the same, we have to add a stage(?) to perform permutation of output tensor. It
+ // is not not efficient even if it works well. If so, it would be better to set the
+ // layout of these backend tensors to the same layout.
+ // There is also one thing we have to think about. This operation depends on the layout of
+ // a model. For example, if a model in NHWC has this operation as output rank == 4, indices
+ // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
+ // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
+ assert(backend_layout == input_alloc->layout());
+ assert(backend_layout == indices_alloc->layout());
+ const auto &input_shape = _ctx.at(input_index).shape();
+ UNUSED_RELEASE(input_shape);
+ assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout);
+
+ const auto axis_raw = node.param().axis;
+ const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::GatherLayer>();
+
+ fn->configure(input_alloc, indices_alloc, output_alloc, axis_value);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Sub &node)
+{
+ // The same as Add
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
+
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::SubLayer>();
+
+ fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Mul &node)
+{
+ // The same as Add
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
+
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::MulLayer>();
+
+ fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::OneHot &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto indices_index{node.getInputs().at(ir::operation::OneHot::INDICES)};
+
+ const auto depth = node.param().depth;
+ const auto on_value = node.param().on_value;
+ const auto off_value = node.param().off_value;
+ const auto axis = node.param().axis;
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto indices_alloc = _tensor_builder->at(indices_index).get();
+
+ assert(indices_alloc->data_type() == OperandType::INT32);
+ assert(axis <= static_cast<int>(indices_alloc->num_dimensions()));
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::OneHotLayer>();
+
+ fn->configure(indices_alloc, output_alloc, depth, on_value, off_value, axis);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Div &node)
+{
+ // The same as Add
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
+
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::DivLayer>();
+
+ fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Permute &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ const auto &shape = _ctx.at(output_index).shape();
+ const auto input_backend_ctx = node.param().input_backend_ctx;
+ const auto output_backend_ctx = node.param().output_backend_ctx;
+ const auto data_type = node.getDataType();
+
+ auto output_tensor = output_backend_ctx->tensor_builder->tensorAt(output_index);
+ auto input_tensor = input_backend_ctx->tensor_builder->tensorAt(input_index);
+ assert(output_tensor != nullptr);
+ assert(input_tensor != nullptr);
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::PermuteLayer>();
+
+ // TODO Support NCHW frontend
+ auto out_shape = shape;
+ if (shape.rank() == 4 && output_tensor->layout() == ir::Layout::NCHW)
+ {
+ out_shape.dim(1) = shape.dim(3);
+ out_shape.dim(2) = shape.dim(1);
+ out_shape.dim(3) = shape.dim(2);
+ }
+
+ const auto permute_type = node.getPermuteType();
+ // Check Permutation Type
+ const auto inferPermuteType = [&]() {
+ if (input_tensor->layout() == ir::Layout::NHWC && output_tensor->layout() == ir::Layout::NCHW)
+ {
+ return ir::operation::Permute::Type::NHWC_TO_NCHW;
+ }
+ else if (input_tensor->layout() == ir::Layout::NCHW &&
+ output_tensor->layout() == ir::Layout::NHWC)
+ {
+ return ir::operation::Permute::Type::NCHW_TO_NHWC;
+ }
+ else
+ {
+ return ir::operation::Permute::Type::COPY;
+ }
+ }();
+ UNUSED_RELEASE(inferPermuteType);
+ assert(permute_type == inferPermuteType);
+
+ fn->configure(input_tensor, output_tensor, out_shape, permute_type, data_type);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Custom &node)
+{
+ auto get_type_info = [](const ir::Operand &operand) -> custom::TypeInfo {
+ const auto &frontend_shape = operand.shape();
+ custom::Shape shape(frontend_shape.rank());
+ for (auto d = 0; d < frontend_shape.rank(); ++d)
+ {
+ shape.dim(d) = frontend_shape.dim(d);
+ }
+
+ return {shape, operand.typeInfo().type()};
+ };
+
+ auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
+ std::vector<custom::TypeInfo> &types, std::vector<void *> &allocs) {
+ for (auto &idx : opSeq)
+ {
+ const auto &operand = _ctx.at(idx);
+ // TODO make sure using `_current_op_seq_layout` is correct for custom operations
+ types.emplace_back(get_type_info(operand));
+ auto in_alloc = _tensor_builder->at(idx)->buffer();
+ allocs.emplace_back(in_alloc);
+ }
+ };
+
+ backend::custom::CustomKernelConfigParams params{};
+
+ fill_op_info(node.getInputs(), params.input_types, params.input_allocations);
+ fill_op_info(node.getOutputs(), params.output_types, params.output_allocations);
+
+ params.userdata = node.userdata().data;
+ params.userdata_size = node.userdata().size;
+
+ auto fn = _kernel_builder->buildKernel(node.id(), std::move(params));
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Exp &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::ExpLayer>();
+
+ fn->configure(input_alloc, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Logistic &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::LogisticLayer>();
+
+ fn->configure(input_alloc, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Tanh &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::TanhLayer>();
+
+ fn->configure(input_alloc, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Pack &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+
+ const auto rank = node.param().rank;
+ const auto axis =
+ ::onert::backend::cpu::kernel::getAxis(rank, node.param().axis, _current_op_seq_layout);
+
+ assert(-rank <= axis && axis < rank);
+
+ auto output_alloc = _tensor_builder->at(ofm_index).get();
+
+ std::vector<const operand::Tensor *> input_tensors;
+ for (auto &ifm_idx : node.getInputs())
+ input_tensors.emplace_back(_tensor_builder->at(ifm_idx).get());
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::PackLayer>();
+
+ fn->configure(input_tensors, axis, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Unpack &node)
+{
+ const auto input_index{node.getInputs().at(0)};
+
+ const auto rank = node.param().rank;
+ const auto axis =
+ ::onert::backend::cpu::kernel::getAxis(rank, node.param().axis, _current_op_seq_layout);
+
+ assert(-rank <= axis && axis < rank);
+
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ std::vector<operand::Tensor *> output_tensors;
+ for (auto &output_idx : node.getOutputs())
+ output_tensors.emplace_back(_tensor_builder->at(output_idx).get());
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::UnpackLayer>();
+
+ uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
+
+ fn->configure(input_alloc, axis_resolved, node.param().num, output_tensors);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Pad &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
+ const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
+ const auto output_index{node.getOutputs().at(0)};
+ assert(_ctx.at(pad_index).data());
+
+ auto input = _tensor_builder->at(input_index).get();
+ auto output = _tensor_builder->at(output_index).get();
+ auto pad_rank = _ctx.at(pad_index).shape().dim(0);
+ auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::PadLayer>();
+
+ fn->configure(input, output, pad_base, pad_rank);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Max &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::MaxLayer>();
+
+ fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Min &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::MinLayer>();
+
+ fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Cast &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::CastLayer>();
+
+ fn->configure(ifm_alloc, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Transpose &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+ auto rank = node.param().rank;
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::TransposeLayer>();
+
+ fn->configure(input_alloc, output_alloc, node.param().perm, rank);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReduceSum &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<kernel::ReduceLayer>();
+
+ fn->configure(input_alloc, output_alloc, kernel::ReduceType::kSum, node.param().axes,
+ node.param().keep_dims);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReduceMax &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<kernel::ReduceLayer>();
+
+ fn->configure(input_alloc, output_alloc, kernel::ReduceType::kMax, node.param().axes,
+ node.param().keep_dims);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReduceMin &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<kernel::ReduceLayer>();
+
+ fn->configure(input_alloc, output_alloc, kernel::ReduceType::kMin, node.param().axes,
+ node.param().keep_dims);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Slice &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
+ const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
+ const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+ auto begins_alloc = _tensor_builder->at(begins_index).get();
+ auto sizes_alloc = _tensor_builder->at(sizes_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::SliceLayer>();
+
+ fn->configure(input_alloc, begins_alloc, sizes_alloc, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::StridedSlice &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
+ const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
+ const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
+ const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+ auto starts_alloc = _tensor_builder->at(starts_index).get();
+ auto ends_alloc = _tensor_builder->at(ends_index).get();
+ auto strides_alloc = _tensor_builder->at(strides_index).get();
+
+ auto begin_mask = node.param().begin_mask;
+ auto end_mask = node.param().end_mask;
+ auto shrink_axis_mask = node.param().shrink_axis_mask;
+ auto rank = node.param().rank;
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::StridedSliceLayer>();
+
+ fn->configure(input_alloc, starts_alloc, ends_alloc, strides_alloc, output_alloc, begin_mask,
+ end_mask, shrink_axis_mask, rank);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Split &node)
+{
+ const auto num_splits = node.param().num_splits;
+ assert(num_splits == static_cast<int>(node.getOutputs().size()));
+
+ const auto rank = node.param().rank;
+ const auto axis =
+ ::onert::backend::cpu::kernel::getAxis(rank, node.param().axis, _current_op_seq_layout);
+ auto axis_resolved = axis < 0 ? axis + rank : axis;
+ assert(0 <= axis_resolved && axis_resolved < rank);
+
+ const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)};
+ auto in_tensor = _tensor_builder->at(input_idx).get();
+
+ std::vector<operand::Tensor *> out_tensors;
+ for (auto &output_idx : node.getOutputs())
+ out_tensors.emplace_back(_tensor_builder->at(output_idx).get());
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::SplitLayer>();
+
+ fn->configure(in_tensor, num_splits, axis_resolved, out_tensors);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Abs &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::AbsLayer>();
+
+ fn->configure(ifm_alloc, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Sin &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Sin::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::SinLayer>();
+
+ fn->configure(ifm_alloc, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::RSQRT &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::RsqrtLayer>();
+
+ fn->configure(ifm_alloc, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Shape &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::ShapeLayer>();
+
+ fn->configure(ifm_alloc, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h
new file mode 100644
index 000000000..8291fa99d
--- /dev/null
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_CPU_KERNEL_GENERATOR_H__
+
+#include "TensorBuilder.h"
+#include "operand/Tensor.h"
+
+#include <backend/CustomKernelBuilder.h>
+#include <backend/IKernelGenerator.h>
+#include <ir/Operands.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+class KernelGenerator : public IKernelGenerator
+{
+public:
+ KernelGenerator(const ir::Operands &ctx, const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<custom::IKernelBuilder> &kernel_builder);
+
+ using IKernelGenerator::visit;
+
+ void visit(const ir::OpSequence &) override;
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::DepthwiseConv2D &) override;
+ void visit(const ir::operation::MaxPool2D &) override;
+ void visit(const ir::operation::AvgPool2D &) override;
+ void visit(const ir::operation::Concat &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::Reshape &) override;
+ void visit(const ir::operation::Squeeze &) override;
+ void visit(const ir::operation::Softmax &) override;
+ void visit(const ir::operation::Comparison &) override;
+ void visit(const ir::operation::Add &) override;
+ void visit(const ir::operation::Sub &) override;
+ void visit(const ir::operation::Mul &) override;
+ void visit(const ir::operation::Div &) override;
+ void visit(const ir::operation::Permute &) override;
+ void visit(const ir::operation::Gather &) override;
+ void visit(const ir::operation::Custom &node) override;
+ void visit(const ir::operation::Exp &) override;
+ void visit(const ir::operation::Logistic &) override;
+ void visit(const ir::operation::Pad &) override;
+ void visit(const ir::operation::Max &) override;
+ void visit(const ir::operation::Min &) override;
+ void visit(const ir::operation::Tanh &) override;
+ void visit(const ir::operation::Pack &) override;
+ void visit(const ir::operation::Unpack &) override;
+ void visit(const ir::operation::OneHot &) override;
+ void visit(const ir::operation::Cast &) override;
+ void visit(const ir::operation::Transpose &) override;
+ void visit(const ir::operation::ReduceSum &) override;
+ void visit(const ir::operation::ReduceMax &) override;
+ void visit(const ir::operation::ReduceMin &) override;
+ void visit(const ir::operation::Slice &) override;
+ void visit(const ir::operation::StridedSlice &) override;
+ void visit(const ir::operation::Split &) override;
+ void visit(const ir::operation::Abs &) override;
+ void visit(const ir::operation::Sin &) override;
+ void visit(const ir::operation::RSQRT &) override;
+ void visit(const ir::operation::Shape &) override;
+
+private:
+ const ir::Operands &_ctx;
+ std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
+ ir::Layout _current_op_seq_layout;
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/backend/cpu/ShapeFixer.cc b/runtime/onert/backend/cpu/ShapeFixer.cc
new file mode 100644
index 000000000..4c2141128
--- /dev/null
+++ b/runtime/onert/backend/cpu/ShapeFixer.cc
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ShapeFixer.h"
+
+#include "kernel/AddLayer.h"
+#include "kernel/AvgPoolLayer.h"
+#include "kernel/CastLayer.h"
+#include "kernel/ConcatLayer.h"
+#include "kernel/ConvolutionLayer.h"
+#include "kernel/DepthwiseConvolutionLayer.h"
+#include "kernel/DivLayer.h"
+#include "kernel/ExpLayer.h"
+#include "kernel/FullyConnectedLayer.h"
+#include "kernel/GatherLayer.h"
+#include "kernel/MaxPoolLayer.h"
+#include "kernel/MulLayer.h"
+#include "kernel/OperationUtils.h"
+#include "kernel/PermuteLayer.h"
+#include "kernel/ReshapeLayer.h"
+#include "kernel/SoftMaxLayer.h"
+#include "kernel/SubLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+ShapeFixer::ShapeFixer(const ir::Operands &operand_ctx) : _ctx(operand_ctx) {}
+
+void ShapeFixer::visit(const ir::operation::Comparison &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Conv2D &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::DepthwiseConv2D &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::MaxPool2D &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::AvgPool2D &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Concat &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Exp &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::FullyConnected &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Reshape &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Squeeze &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Softmax &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Gather &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Add &node)
+{
+ const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
+
+ // Quantization : not supported
+ if (_ctx.at(lhs_index).typeInfo().type() == ir::DataType::QUANT8_ASYMM)
+ {
+ throw std::runtime_error{"ShapeFixer: NYI for quantized Add"};
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::Sub &node)
+{
+ // The same as Add
+ const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
+
+ // Quantization : not supported
+ if (_ctx.at(lhs_index).typeInfo().type() == ir::DataType::QUANT8_ASYMM)
+ {
+ throw std::runtime_error{"ShapeFixer: NYI for quantized Sub"};
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::Mul &node)
+{
+ // The same as Add
+ const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
+
+ // Quantization : not supported
+ if (_ctx.at(lhs_index).typeInfo().type() == ir::DataType::QUANT8_ASYMM)
+ {
+ throw std::runtime_error{"ShapeFixer: NYI for quantized Mul"};
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::Div &node)
+{
+ // The same as Add
+ const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
+
+ // Quantization : not supported
+ if (_ctx.at(lhs_index).typeInfo().type() == ir::DataType::QUANT8_ASYMM)
+ {
+ throw std::runtime_error{"ShapeFixer: NYI for quantized Div"};
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::Permute &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Custom &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Logistic &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Pad &node)
+{
+ // TODO: empty this method when quantization is supported
+ const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
+
+ // Quantization : not supported
+ if (_ctx.at(lhs_index).typeInfo().type() == ir::DataType::QUANT8_ASYMM)
+ {
+ throw std::runtime_error{"ShapeFixer: NYI for quantized Pad"};
+ }
+}
+
+void ShapeFixer::visit(const ir::operation::Max &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Min &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Tanh &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Pack &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Unpack &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::OneHot &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Cast &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Transpose &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::ReduceSum &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::ReduceMax &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::ReduceMin &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Slice &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::StridedSlice &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Split &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Abs &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Sin &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::RSQRT &) { /* DO NOTHING */}
+
+void ShapeFixer::visit(const ir::operation::Shape &) { /* DO NOTHING */}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ShapeFixer.h b/runtime/onert/backend/cpu/ShapeFixer.h
new file mode 100644
index 000000000..73bad4b15
--- /dev/null
+++ b/runtime/onert/backend/cpu/ShapeFixer.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_SHAPE_FIXER_H__
+#define __ONERT_BACKEND_CPU_SHAPE_FIXER_H__
+
+#include "TensorBuilder.h"
+#include "operand/Tensor.h"
+
+#include <backend/IShapeFixer.h>
+#include <ir/Operands.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+class ShapeFixer : public IShapeFixer
+{
+public:
+ ShapeFixer(const ir::Operands &ctx);
+
+ void visit(const ir::operation::Comparison &) override;
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::DepthwiseConv2D &) override;
+ void visit(const ir::operation::MaxPool2D &) override;
+ void visit(const ir::operation::AvgPool2D &) override;
+ void visit(const ir::operation::Concat &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::Reshape &) override;
+ void visit(const ir::operation::Squeeze &) override;
+ void visit(const ir::operation::Softmax &) override;
+ void visit(const ir::operation::Add &) override;
+ void visit(const ir::operation::Gather &) override;
+ void visit(const ir::operation::Sub &) override;
+ void visit(const ir::operation::Mul &) override;
+ void visit(const ir::operation::Div &) override;
+ void visit(const ir::operation::Permute &) override;
+ void visit(const ir::operation::Custom &) override;
+ void visit(const ir::operation::Exp &) override;
+ void visit(const ir::operation::Logistic &) override;
+ void visit(const ir::operation::Pad &) override;
+ void visit(const ir::operation::Max &) override;
+ void visit(const ir::operation::Min &) override;
+ void visit(const ir::operation::Tanh &) override;
+ void visit(const ir::operation::Pack &) override;
+ void visit(const ir::operation::Unpack &) override;
+ void visit(const ir::operation::OneHot &) override;
+ void visit(const ir::operation::Cast &) override;
+ void visit(const ir::operation::Transpose &) override;
+ void visit(const ir::operation::ReduceSum &) override;
+ void visit(const ir::operation::ReduceMax &) override;
+ void visit(const ir::operation::ReduceMin &) override;
+ void visit(const ir::operation::Slice &) override;
+ void visit(const ir::operation::StridedSlice &) override;
+ void visit(const ir::operation::Split &) override;
+ void visit(const ir::operation::Abs &) override;
+ void visit(const ir::operation::Sin &) override;
+ void visit(const ir::operation::RSQRT &) override;
+ void visit(const ir::operation::Shape &) override;
+
+private:
+ const ir::Operands &_ctx;
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_SHAPE_FIXER_H__
diff --git a/runtime/onert/backend/cpu/TensorBuilder.cc b/runtime/onert/backend/cpu/TensorBuilder.cc
new file mode 100644
index 000000000..cbf7c9e5c
--- /dev/null
+++ b/runtime/onert/backend/cpu/TensorBuilder.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorBuilder.h"
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+TensorBuilder::TensorBuilder() : _tensor_mgr{new TensorManager()}
+{
+ // DO NOTHING
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout, bool as_const)
+{
+ _tensor_info_map.emplace(ind, info);
+
+ if (as_const)
+ _constants.append(ind);
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+ assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+ const auto tensor_info = _tensor_info_map.at(ind);
+ const auto size = tensor_info.total_size();
+ _tensor_mgr->buildTensor(ind, tensor_info, _constants.contains(ind));
+ _tensor_mgr->claimPlan(ind, size);
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind) { _tensor_mgr->releasePlan(ind); }
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+ return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::prepare(void)
+{
+ _tensor_mgr->allocateConsts();
+ _tensor_mgr->allocateNonconsts();
+}
+
+void TensorBuilder::allocate()
+{
+ // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
+ // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
+}
+
+std::shared_ptr<ITensor> TensorBuilder::tensorAt(const ir::OperandIndex &ind)
+{
+ return _tensor_mgr->at(ind);
+}
+
+void TensorBuilder::iterate(const IterateFunction &fn) { _tensor_mgr->iterate(fn); }
+
+std::shared_ptr<operand::Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
+{
+ auto ret = _tensor_mgr->at(ind);
+ assert(ret != nullptr);
+ return ret;
+}
+
+std::unique_ptr<ITensorManager> TensorBuilder::releaseTensorManager(void)
+{
+ return std::move(_tensor_mgr);
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h
new file mode 100644
index 000000000..863eb77e2
--- /dev/null
+++ b/runtime/onert/backend/cpu/TensorBuilder.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_CPU_TENSOR_BUILDER_H__
+
+#include "TensorManager.h"
+#include "operand/Tensor.h"
+
+#include <backend/ITensorBuilder.h>
+#include <ir/OperandIndexMap.h>
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+class TensorBuilder : public ITensorBuilder
+{
+public:
+ TensorBuilder();
+
+ /**
+ * @brief Register tensor information to allocate on CPU backend
+ * @param[in] ind Operand index
+ * @param[in] info Operand information
+ * @param[in] layout Operand data layout
+ */
+ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout backend_layout, bool as_const) override;
+
+ void notifyFirstUse(const ir::OperandIndex &) override;
+ void notifyLastUse(const ir::OperandIndex &) override;
+
+ bool isRegistered(const ir::OperandIndex &) const override;
+
+ void prepare(void) override;
+ void allocate() override;
+ void postFunctionPrepare() override { /* DO NOTHING */}
+
+ std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
+
+ void iterate(const IterateFunction &fn) override;
+
+ std::unique_ptr<ITensorManager> releaseTensorManager(void) override;
+
+ std::shared_ptr<operand::Tensor> at(const ir::OperandIndex &ind);
+
+private:
+ std::unique_ptr<TensorManager> _tensor_mgr;
+ ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+ ir::OperandIndexSequence _constants;
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/cpu/TensorManager.cc b/runtime/onert/backend/cpu/TensorManager.cc
new file mode 100644
index 000000000..96071bc51
--- /dev/null
+++ b/runtime/onert/backend/cpu/TensorManager.cc
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorManager.h"
+
+#include <util/logging.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+TensorManager::TensorManager()
+ : _const_mgr{new cpu_common::DynamicMemoryManager()},
+ _nonconst_mgr{new cpu_common::MemoryManager()}
+{
+ // DO NOTHING
+}
+
+void TensorManager::allocateConsts(void)
+{
+ for (auto &pair : _tensors)
+ {
+ const auto &ind = pair.first;
+ auto tensor = pair.second;
+ if (_as_constants[ind])
+ {
+ auto mem_alloc = _const_mgr->allocate(ind, tensor->total_size());
+ tensor->setBuffer(mem_alloc);
+ auto buffer = mem_alloc->base();
+ VERBOSE(CPU_TENSORMANAGER) << "CONSTANT TENSOR(#" << ind.value()
+ << "): " << static_cast<void *>(buffer)
+ << "size : " << tensor->total_size() << std::endl;
+ }
+ }
+}
+
+void TensorManager::allocateNonconsts(void)
+{
+ _nonconst_mgr->allocate();
+
+ for (auto &pair : _tensors)
+ {
+ const auto &ind = pair.first;
+ auto tensor = pair.second;
+ if (!_as_constants[ind])
+ {
+ auto *buffer = _nonconst_mgr->getBuffer(ind);
+ tensor->setBuffer(buffer);
+
+ VERBOSE(CPU_TENSORMANAGER) << "TENSOR(#" << ind.value()
+ << "): " << static_cast<void *>(buffer) << std::endl;
+ }
+ }
+}
+
+void TensorManager::deallocateConsts(void) { _const_mgr->deallocate(); }
+
+void TensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
+
+void TensorManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
+ bool as_const)
+{
+ assert(_tensors.find(ind) == _tensors.end());
+ auto tensor = std::make_shared<operand::Tensor>(tensor_info);
+ _tensors[ind] = tensor;
+ _as_constants[ind] = as_const;
+}
+
+void TensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
+{
+ assert(_tensors.find(ind) != _tensors.end());
+ if (!_as_constants[ind])
+ _nonconst_mgr->claimPlan(ind, size);
+}
+
+void TensorManager::releasePlan(const ir::OperandIndex &ind)
+{
+ assert(_tensors.find(ind) != _tensors.end());
+ if (!_as_constants[ind])
+ _nonconst_mgr->releasePlan(ind);
+}
+
+std::shared_ptr<operand::Tensor> TensorManager::at(const ir::OperandIndex &ind)
+{
+ if (_tensors.find(ind) == _tensors.end())
+ return nullptr;
+ return _tensors.at(ind);
+}
+
+void TensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
+{
+ for (const auto &it : _tensors)
+ fn(it.first);
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/TensorManager.h b/runtime/onert/backend/cpu/TensorManager.h
new file mode 100644
index 000000000..ab1eec3fc
--- /dev/null
+++ b/runtime/onert/backend/cpu/TensorManager.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_TENSOR_MANAGER_H__
+#define __ONERT_BACKEND_CPU_TENSOR_MANAGER_H__
+
+#include "MemoryManager.h"
+#include "operand/Tensor.h"
+
+#include <backend/ITensorManager.h>
+#include <ir/OperandIndexMap.h>
+#include <ir/OperandInfo.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+class TensorManager : public backend::ITensorManager
+{
+public:
+ TensorManager();
+ virtual ~TensorManager() = default;
+
+ void allocateConsts(void);
+ void allocateNonconsts(void);
+ void deallocateConsts(void);
+ void deallocateNonconsts(void);
+
+ void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info, bool as_const);
+
+ void claimPlan(const ir::OperandIndex &ind, uint32_t size);
+ void releasePlan(const ir::OperandIndex &ind);
+
+ std::shared_ptr<operand::Tensor> at(const ir::OperandIndex &ind);
+
+ void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
+
+private:
+ std::unique_ptr<cpu_common::DynamicMemoryManager> _const_mgr;
+ std::unique_ptr<cpu_common::MemoryManager> _nonconst_mgr;
+ ir::OperandIndexMap<std::shared_ptr<operand::Tensor>> _tensors;
+ ir::OperandIndexMap<bool> _as_constants;
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_TENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/cpu/cpu.cc b/runtime/onert/backend/cpu/cpu.cc
new file mode 100644
index 000000000..5385bb2a3
--- /dev/null
+++ b/runtime/onert/backend/cpu/cpu.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Backend.h"
+
+#include <util/logging.h>
+
+extern "C" {
+onert::backend::Backend *onert_backend_create()
+{
+ VERBOSE(onert_backend_create) << "'cpu' loaded\n";
+ return new onert::backend::cpu::Backend;
+}
+
+void onert_backend_destroy(onert::backend::Backend *backend)
+{
+ VERBOSE(onert_backend_create) << "'cpu' unloaded\n";
+ delete backend;
+}
+}
diff --git a/runtime/onert/backend/cpu/kernel/AbsLayer.cc b/runtime/onert/backend/cpu/kernel/AbsLayer.cc
new file mode 100644
index 000000000..770386826
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/AbsLayer.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AbsLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Elementwise.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+AbsLayer::AbsLayer() : _input(nullptr), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+void AbsLayer::absFloat32()
+{
+ nnfw::cker::Abs(convertTensorToCkerShape(_input),
+ reinterpret_cast<const float *>(_input->buffer()),
+ convertTensorToCkerShape(_output), reinterpret_cast<float *>(_output->buffer()));
+}
+
+void AbsLayer::absQuant8() { throw std::runtime_error{"NYI"}; }
+
+void AbsLayer::configure(const operand::Tensor *input, operand::Tensor *output)
+{
+ _input = input;
+ _output = output;
+}
+
+void AbsLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ absFloat32();
+ }
+ else if (_input->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ absQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/AbsLayer.h b/runtime/onert/backend/cpu/kernel/AbsLayer.h
new file mode 100644
index 000000000..551782a45
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/AbsLayer.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_ABSLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_ABSLAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class AbsLayer : public ::onert::exec::IFunction
+{
+public:
+ AbsLayer();
+
+public:
+ void absFloat32();
+
+ void absQuant8();
+
+ void configure(const operand::Tensor *input, operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_input;
+ operand::Tensor *_output;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_ABSLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/AddLayer.cc b/runtime/onert/backend/cpu/kernel/AddLayer.cc
new file mode 100644
index 000000000..51320907d
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/AddLayer.cc
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AddLayer.h"
+
+#include <cker/operation/BinaryArithmeticOps.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+void AddLayer::addFloat32()
+{
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ nnfw::cker::BinaryArithmeticOpParam op_params;
+ op_params.type = nnfw::cker::BinaryArithmeticOpType::ADD;
+ op_params.float_activation_max = output_activation_max;
+ op_params.float_activation_min = output_activation_min;
+
+ if (!HaveSameShapes(_lhs, _rhs))
+ {
+ nnfw::cker::BroadcastBinaryArithmeticOpSlow(
+ op_params, convertToExtendedCkerShape(_lhs),
+ reinterpret_cast<const float *>(_lhs->buffer()), convertToExtendedCkerShape(_rhs),
+ reinterpret_cast<const float *>(_rhs->buffer()), convertToExtendedCkerShape(_output),
+ reinterpret_cast<float *>(_output->buffer()));
+ return;
+ }
+
+ nnfw::cker::BinaryArithmeticOp(
+ op_params, convertTensorToCkerShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
+ convertTensorToCkerShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
+ convertTensorToCkerShape(_output), reinterpret_cast<float *>(_output->buffer()));
+}
+
+void AddLayer::addQuant8()
+{
+ int32_t output_activation_min, output_activation_max;
+ CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
+ &output_activation_max);
+ // nnfw::cker::BinaryArithmeticOpParam op_params;
+ // op_params.quantized_activation_max = output_activation_max;
+ // op_params.quantized_activation_min = output_activation_min;
+
+ // cker quant8 add is not implemented yet
+ throw std::runtime_error{"NYI"};
+}
+
+void AddLayer::configure(const operand::Tensor *lhs, const operand::Tensor *rhs,
+ const ir::Activation activation, operand::Tensor *output)
+{
+ assert(lhs != nullptr);
+ assert(rhs != nullptr);
+ assert(output != nullptr);
+
+ _lhs = lhs;
+ _rhs = rhs;
+ _activation = activation;
+ _output = output;
+}
+
+void AddLayer::run()
+{
+ if (_lhs->data_type() == OperandType::FLOAT32)
+ {
+ addFloat32();
+ }
+ else if (_lhs->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ addQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/AddLayer.h b/runtime/onert/backend/cpu/kernel/AddLayer.h
new file mode 100644
index 000000000..657decc72
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/AddLayer.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_ADDLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_ADDLAYER_H__
+
+#include "../operand/Tensor.h"
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class AddLayer : public ::onert::exec::IFunction
+{
+public:
+ AddLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+ {
+ // DO NOTHING
+ }
+
+public:
+ void addFloat32();
+
+ void addQuant8();
+
+ void configure(const operand::Tensor *lhs, const operand::Tensor *rhs,
+ const ir::Activation activation, operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_lhs;
+ const operand::Tensor *_rhs;
+ operand::Tensor *_output;
+
+ ir::Activation _activation{ir::Activation::NONE};
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_ADDLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/AvgPoolLayer.cc b/runtime/onert/backend/cpu/kernel/AvgPoolLayer.cc
new file mode 100644
index 000000000..ba11b7cde
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/AvgPoolLayer.cc
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AvgPoolLayer.h"
+
+#include <cker/operation/AveragePool.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+#define AVGPOOLING_PARAMETERS \
+ nnfw::cker::PoolParams op_params; \
+ op_params.stride_height = _strideHeight; \
+ op_params.stride_width = _strideWidth; \
+ op_params.filter_height = _kernelHeight; \
+ op_params.filter_width = _kernelWidth; \
+ op_params.padding_values.height = (int8_t)_paddingTop; \
+ op_params.padding_values.width = (int8_t)_paddingLeft;
+
+AvgPoolLayer::AvgPoolLayer()
+ : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
+ _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0),
+ _activation(ir::Activation::NONE)
+{
+ // DO NOTHING
+}
+
+void AvgPoolLayer::averagePoolFloat32()
+{
+ AVGPOOLING_PARAMETERS
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
+ nnfw::cker::AveragePool(op_params, convertTensorToCkerShape(_input),
+ reinterpret_cast<const float *>(_input->buffer()),
+ convertTensorToCkerShape(_output),
+ reinterpret_cast<float *>(_output->buffer()));
+}
+void AvgPoolLayer::averagePoolQuant8()
+{
+ AVGPOOLING_PARAMETERS
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
+ &output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+
+ nnfw::cker::AveragePool(op_params, convertTensorToCkerShape(_input),
+ reinterpret_cast<const uint8_t *>(_input->buffer()),
+ convertTensorToCkerShape(_output),
+ reinterpret_cast<uint8_t *>(_output->buffer()));
+}
+
+void AvgPoolLayer::configure(const operand::Tensor *input, const uint32_t paddingLeft,
+ const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideWidth,
+ const uint32_t strideHeight, const uint32_t kernelWidth,
+ const uint32_t kernelHeight, const ir::Activation activation,
+ operand::Tensor *output)
+{
+ assert(input != nullptr);
+ assert(output != nullptr);
+
+ _input = input;
+ _paddingLeft = paddingLeft;
+ _paddingRight = paddingRight;
+ _paddingTop = paddingTop;
+ _paddingBottom = paddingBottom;
+ _strideWidth = strideWidth;
+ _strideHeight = strideHeight;
+ _kernelWidth = kernelWidth;
+ _kernelHeight = kernelHeight;
+ _activation = activation;
+ _output = output;
+}
+
+void AvgPoolLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ averagePoolFloat32();
+ }
+ else if (_input->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ averagePoolQuant8();
+ }
+}
+
+#undef AVGPOOLING_PARAMETERS
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/AvgPoolLayer.h b/runtime/onert/backend/cpu/kernel/AvgPoolLayer.h
new file mode 100644
index 000000000..7d7ef980d
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/AvgPoolLayer.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_AVGPOOLLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_AVGPOOLLAYER_H__
+
+#include "../operand/Tensor.h"
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class AvgPoolLayer : public ::onert::exec::IFunction
+{
+public:
+ AvgPoolLayer();
+
+public:
+ void averagePoolFloat32();
+
+ void averagePoolQuant8();
+
+ void configure(const operand::Tensor *input, const uint32_t paddingLeft,
+ const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideWidth,
+ const uint32_t strideHeight, const uint32_t kernelWidth,
+ const uint32_t kernelHeight, const ir::Activation activation,
+ operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_input;
+ operand::Tensor *_output;
+
+ uint32_t _paddingLeft;
+ uint32_t _paddingTop;
+ uint32_t _paddingRight;
+ uint32_t _paddingBottom;
+
+ uint32_t _strideWidth;
+ uint32_t _strideHeight;
+ uint32_t _kernelWidth;
+ uint32_t _kernelHeight;
+
+ ir::Activation _activation;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_AVGPOOLLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/CastLayer.cc b/runtime/onert/backend/cpu/kernel/CastLayer.cc
new file mode 100644
index 000000000..e485e04c7
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/CastLayer.cc
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CastLayer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+CastLayer::CastLayer() : _input(nullptr), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+void CastLayer::configure(const operand::Tensor *input, operand::Tensor *output)
+{
+ _input = input;
+ _output = output;
+}
+
+template <typename FromT, typename ToT> void CastLayer::castTensor(const FromT *in, ToT *out)
+{
+ auto input_shape = convertTensorToCkerShape(_input);
+ auto output_shape = convertTensorToCkerShape(_output);
+ const auto num_elements = MatchingFlatSize(input_shape, output_shape);
+
+ std::transform(in, in + num_elements, out, [](FromT a) { return static_cast<ToT>(a); });
+}
+
+template <typename FromT> void CastLayer::castPtr(const FromT *in, DataPtr out)
+{
+ switch (_output->data_type())
+ {
+ case ir::DataType::FLOAT32:
+ castTensor(in, out.f);
+ return;
+ case ir::DataType::INT32:
+ castTensor(in, out.i32);
+ return;
+ case ir::DataType::UINT32:
+ castTensor(in, out.u32);
+ return;
+ case ir::DataType::UINT8:
+ castTensor(in, out.u8);
+ return;
+ case ir::DataType::BOOL8:
+ castTensor(in, out.b);
+ return;
+ case ir::DataType::QUANT8_ASYMM:
+ case ir::DataType::QUANT8_SYMM:
+ throw std::runtime_error("Not supported output type" +
+ std::to_string((int)_output->data_type()));
+ }
+}
+
+void CastLayer::run()
+{
+ auto input_buf = _input->buffer();
+ auto output_buf = _output->buffer();
+ const auto in = *reinterpret_cast<const DataPtr *>(&input_buf);
+ auto out = *reinterpret_cast<DataPtr *>(&output_buf);
+
+ switch (_input->data_type())
+ {
+ case ir::DataType::FLOAT32:
+ castPtr(in.f, out);
+ return;
+ case ir::DataType::INT32:
+ castPtr(in.i32, out);
+ return;
+ case ir::DataType::UINT32:
+ castPtr(in.u32, out);
+ return;
+ case ir::DataType::UINT8:
+ castPtr(in.u8, out);
+ return;
+ case ir::DataType::BOOL8:
+ castPtr(in.b, out);
+ return;
+ case ir::DataType::QUANT8_ASYMM:
+ case ir::DataType::QUANT8_SYMM:
+ throw std::runtime_error("Not supported input type" +
+ std::to_string((int)_input->data_type()));
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/CastLayer.h b/runtime/onert/backend/cpu/kernel/CastLayer.h
new file mode 100644
index 000000000..4690e1007
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/CastLayer.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_CASTLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_CASTLAYER_H__
+
+#include "../operand/Tensor.h"
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class CastLayer : public ::onert::exec::IFunction
+{
+public:
+ CastLayer();
+
+public:
+ template <typename FromT, typename ToT> void castTensor(const FromT *in, ToT *out);
+ template <typename FromT> void castPtr(const FromT *in, DataPtr out);
+
+ void configure(const operand::Tensor *input, operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_input;
+ operand::Tensor *_output;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_CASTLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/CompareLayer.cc b/runtime/onert/backend/cpu/kernel/CompareLayer.cc
new file mode 100644
index 000000000..c8ee683b1
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/CompareLayer.cc
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "CompareLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Comparison.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+namespace
+{
+
+using OpType = onert::ir::operation::Comparison::ComparisonType;
+using namespace onert::backend::cpu;
+
+template <typename T>
+void compareScalar(const operand::Tensor *lhs, const operand::Tensor *rhs, operand::Tensor *output,
+ OpType op_type)
+{
+ bool requires_broadcast = !HaveSameShapes(lhs, rhs);
+
+ if (requires_broadcast)
+ {
+ switch (op_type)
+ {
+ case OpType::Equal:
+ Broadcast4DSlowEqual(
+ convertToExtendedCkerShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ convertToExtendedCkerShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ convertToExtendedCkerShape(output), reinterpret_cast<bool *>(output->buffer()));
+ break;
+ case OpType::NotEqual:
+ Broadcast4DSlowNotEqual(
+ convertToExtendedCkerShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ convertToExtendedCkerShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ convertToExtendedCkerShape(output), reinterpret_cast<bool *>(output->buffer()));
+ break;
+ case OpType::Greater:
+ Broadcast4DSlowGreater(
+ convertToExtendedCkerShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ convertToExtendedCkerShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ convertToExtendedCkerShape(output), reinterpret_cast<bool *>(output->buffer()));
+ break;
+ case OpType::GreaterEqual:
+ Broadcast4DSlowGreaterEqual(
+ convertToExtendedCkerShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ convertToExtendedCkerShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ convertToExtendedCkerShape(output), reinterpret_cast<bool *>(output->buffer()));
+ break;
+ case OpType::Less:
+ Broadcast4DSlowLess(
+ convertToExtendedCkerShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ convertToExtendedCkerShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ convertToExtendedCkerShape(output), reinterpret_cast<bool *>(output->buffer()));
+ break;
+ case OpType::LessEqual:
+ Broadcast4DSlowLessEqual(
+ convertToExtendedCkerShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ convertToExtendedCkerShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ convertToExtendedCkerShape(output), reinterpret_cast<bool *>(output->buffer()));
+ break;
+ default:
+ throw std::runtime_error{"Invalid OpType for CompareLayer"};
+ }
+ }
+ else // if (requires_broadcast == false)
+ {
+ switch (op_type)
+ {
+ case OpType::Equal:
+ EqualNoScaling(convertToExtendedCkerShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ convertToExtendedCkerShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ convertToExtendedCkerShape(output),
+ reinterpret_cast<bool *>(output->buffer()));
+ break;
+ case OpType::NotEqual:
+ NotEqualNoScaling(
+ convertToExtendedCkerShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ convertToExtendedCkerShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ convertToExtendedCkerShape(output), reinterpret_cast<bool *>(output->buffer()));
+ break;
+ case OpType::Greater:
+ GreaterNoScaling(
+ convertToExtendedCkerShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ convertToExtendedCkerShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ convertToExtendedCkerShape(output), reinterpret_cast<bool *>(output->buffer()));
+ break;
+ case OpType::GreaterEqual:
+ GreaterEqualNoScaling(
+ convertToExtendedCkerShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ convertToExtendedCkerShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ convertToExtendedCkerShape(output), reinterpret_cast<bool *>(output->buffer()));
+ break;
+ case OpType::Less:
+ LessNoScaling(convertToExtendedCkerShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ convertToExtendedCkerShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ convertToExtendedCkerShape(output),
+ reinterpret_cast<bool *>(output->buffer()));
+ break;
+ case OpType::LessEqual:
+ LessEqualNoScaling(
+ convertToExtendedCkerShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ convertToExtendedCkerShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ convertToExtendedCkerShape(output), reinterpret_cast<bool *>(output->buffer()));
+ break;
+ default:
+ throw std::runtime_error{"Invalid OpType for CompareLayer"};
+ }
+ }
+ return;
+}
+} // namespace
+
+CompareLayer::CompareLayer()
+ : _lhs(nullptr), _rhs(nullptr), _output(nullptr),
+ _op_type(ir::operation::Comparison::ComparisonType::Equal)
+{
+ // DO NOTHING
+}
+
+void CompareLayer::compareQuant8() { throw std::runtime_error{"Compare NYI for quantized"}; }
+
+void CompareLayer::configure(const operand::Tensor *lhs, const operand::Tensor *rhs,
+ const OpType op_type, operand::Tensor *output)
+{
+ _lhs = lhs;
+ _rhs = rhs;
+ _op_type = op_type;
+ _output = output;
+}
+
+void CompareLayer::run()
+{
+ if (_lhs->data_type() == OperandType::FLOAT32)
+ {
+ compareScalar<float>(_lhs, _rhs, _output, _op_type);
+ }
+ else if (_lhs->data_type() == OperandType::INT32)
+ {
+ compareScalar<int32_t>(_lhs, _rhs, _output, _op_type);
+ }
+ else if (_lhs->data_type() == OperandType::BOOL8)
+ {
+ compareScalar<uint8_t>(_lhs, _rhs, _output, _op_type);
+ }
+ else if (_lhs->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ compareQuant8();
+ }
+ else
+ {
+ throw std::runtime_error{"Compare: unsupported data type"};
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/CompareLayer.h b/runtime/onert/backend/cpu/kernel/CompareLayer.h
new file mode 100644
index 000000000..b62e5a2e4
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/CompareLayer.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_COMPARELAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_COMPARELAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+#include <ir/operation/Comparison.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class CompareLayer : public ::onert::exec::IFunction
+{
+public:
+ CompareLayer();
+
+public:
+ void compareQuant8();
+
+ void configure(const operand::Tensor *lhs, const operand::Tensor *rhs,
+ const ir::operation::Comparison::ComparisonType op_type, operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_lhs;
+ const operand::Tensor *_rhs;
+ operand::Tensor *_output;
+ ir::operation::Comparison::ComparisonType _op_type;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_COMPARELAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/ConcatLayer.cc b/runtime/onert/backend/cpu/kernel/ConcatLayer.cc
new file mode 100644
index 000000000..80a29e8ce
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/ConcatLayer.cc
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConcatLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Concatenation.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+ConcatLayer::ConcatLayer() : _inputs(), _output(nullptr), _axis(0)
+{
+ // DO NOTHING
+}
+
+void ConcatLayer::concatenationFloat32()
+{
+ uint32_t num_inputs = _inputs.size();
+
+ nnfw::cker::ConcatenationParams op_params;
+ op_params.axis = _axis;
+ op_params.inputs_count = num_inputs;
+
+ std::vector<nnfw::cker::Shape *> inputDimsPtr;
+ std::vector<nnfw::cker::Shape> inputDims;
+ inputDimsPtr.reserve(num_inputs);
+ inputDims.reserve(num_inputs);
+
+ for (uint32_t i = 0; i < num_inputs; i++)
+ {
+ inputDims.push_back(convertTensorToCkerShape(_inputs[i]));
+ inputDimsPtr.push_back(&inputDims[i]);
+ }
+
+ std::vector<const float *> inputFloatPtrs;
+
+ for (const auto input : _inputs)
+ {
+ inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(input->buffer()));
+ }
+
+ nnfw::cker::Concatenation<float>(op_params, inputDimsPtr.data(), inputFloatPtrs.data(),
+ convertTensorToCkerShape(_output),
+ reinterpret_cast<float *>(_output->buffer()));
+}
+void ConcatLayer::concatenationQuant8()
+{
+ uint32_t num_inputs = _inputs.size();
+
+ std::vector<int32_t> input_zeropoints(num_inputs);
+ std::vector<float> input_scales(num_inputs);
+ for (uint32_t i = 0; i < num_inputs; i++)
+ {
+ input_zeropoints[i] = _inputs[i]->offset();
+ input_scales[i] = _inputs[i]->scale();
+ }
+
+ nnfw::cker::ConcatenationParams op_params;
+ op_params.axis = _axis;
+ op_params.inputs_count = num_inputs;
+ op_params.input_zeropoint = input_zeropoints.data();
+ op_params.input_scale = input_scales.data();
+ op_params.output_zeropoint = _output->offset();
+ op_params.output_scale = _output->scale();
+
+ std::vector<nnfw::cker::Shape *> inputDimsPtr;
+ std::vector<nnfw::cker::Shape> inputDims;
+ inputDimsPtr.reserve(num_inputs);
+ inputDims.reserve(num_inputs);
+ for (uint32_t i = 0; i < num_inputs; i++)
+ {
+ inputDims.push_back(convertTensorToCkerShape(_inputs[i]));
+ inputDimsPtr.push_back(&inputDims[i]);
+ }
+
+ std::vector<const uint8_t *> inputDataPtrs;
+ for (const auto input : _inputs)
+ {
+ inputDataPtrs.emplace_back(reinterpret_cast<const uint8_t *>(input->buffer()));
+ }
+
+ nnfw::cker::ConcatenationWithScaling(op_params, inputDimsPtr.data(), inputDataPtrs.data(),
+ convertTensorToCkerShape(_output),
+ reinterpret_cast<uint8_t *>(_output->buffer()));
+}
+
+void ConcatLayer::configure(const std::vector<const operand::Tensor *> &inputs, int32_t axis,
+ operand::Tensor *output)
+{
+ assert(inputs.size() > 0);
+ assert(output != nullptr);
+
+ _inputs = inputs;
+ _axis = axis;
+ _output = output;
+}
+
+void ConcatLayer::run()
+{
+ if (_output->data_type() == OperandType::FLOAT32)
+ {
+ concatenationFloat32();
+ }
+ else if (_output->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ concatenationQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/ConcatLayer.h b/runtime/onert/backend/cpu/kernel/ConcatLayer.h
new file mode 100644
index 000000000..1ac1604cf
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/ConcatLayer.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_CONCATLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_CONCATLAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class ConcatLayer : public ::onert::exec::IFunction
+{
+public:
+ ConcatLayer();
+
+public:
+ void concatenationFloat32();
+
+ void concatenationQuant8();
+
+ void configure(const std::vector<const operand::Tensor *> &inputs, int32_t axis,
+ operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ std::vector<const operand::Tensor *> _inputs;
+ operand::Tensor *_output;
+ int32_t _axis;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_CONCATLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/ConvolutionLayer.cc b/runtime/onert/backend/cpu/kernel/ConvolutionLayer.cc
new file mode 100644
index 000000000..398054527
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/ConvolutionLayer.cc
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionLayer.h"
+
+#include <cker/operation/Conv.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+ConvolutionLayer::ConvolutionLayer()
+ : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+ _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
+ _paddingBottom(0), _strideWidth(0), _strideHeight(0), _activation(ir::Activation::NONE),
+ _conv_kernel(new nnfw::cker::Conv()), _prepare(false)
+{
+ // DO NOTHING
+}
+
+ConvolutionLayer::~ConvolutionLayer() = default;
+
+void ConvolutionLayer::convFloat32()
+{
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+
+ nnfw::cker::ConvParams op_params;
+ op_params.padding_type = getPaddingType(_paddingType);
+ op_params.padding_values.width = _paddingLeft;
+ op_params.padding_values.height = _paddingTop;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = 1;
+ op_params.dilation_height_factor = 1;
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
+ nnfw::cker::Conv &kernel = *_conv_kernel;
+ if (!_prepare)
+ {
+ bool is_replaced_weights = false;
+ kernel.prepare(convertTensorToCkerShape(_kernel),
+ reinterpret_cast<const float *>(_kernel->buffer()), op_params.padding_type,
+ is_replaced_weights);
+
+ if (is_replaced_weights)
+ {
+ // TODO Remove const_cast
+ const_cast<operand::Tensor *>(_kernel)->decrease_ref();
+ }
+ _prepare = true;
+ }
+ kernel(op_params, convertTensorToCkerShape(_input),
+ reinterpret_cast<const float *>(_input->buffer()), convertTensorToCkerShape(_kernel),
+ reinterpret_cast<const float *>(_kernel->buffer()), convertTensorToCkerShape(_bias),
+ reinterpret_cast<const float *>(_bias->buffer()), convertTensorToCkerShape(_output),
+ reinterpret_cast<float *>(_output->buffer()));
+}
+
+void ConvolutionLayer::convQuant8()
+{
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
+ &output_activation_max);
+
+ double real_multiplier = 0.0;
+ int32_t output_multiplier = 0;
+ int32_t output_shift = 0;
+ GetQuantizedConvolutionMultiplier(_input, _kernel, _bias, _output, &real_multiplier);
+ QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+ nnfw::cker::ConvParams op_params;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = 1;
+ op_params.dilation_height_factor = 1;
+ op_params.padding_type = getPaddingType(_paddingType);
+ op_params.padding_values.width = _paddingLeft;
+ op_params.padding_values.height = _paddingTop;
+ op_params.input_offset = -_input->offset();
+ op_params.weights_offset = -_kernel->offset();
+ op_params.output_offset = _output->offset();
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+
+ nnfw::cker::Conv &kernel = *_conv_kernel;
+ if (!_prepare)
+ {
+ kernel.prepareQuant(convertTensorToCkerShape(_input), convertTensorToCkerShape(_kernel),
+ convertTensorToCkerShape(_output), _strideWidth, _strideHeight);
+ _prepare = true;
+ }
+ kernel(op_params, convertTensorToCkerShape(_input),
+ reinterpret_cast<const uint8_t *>(_input->buffer()), convertTensorToCkerShape(_kernel),
+ reinterpret_cast<const uint8_t *>(_kernel->buffer()), convertTensorToCkerShape(_bias),
+ reinterpret_cast<const int32_t *>(_bias->buffer()), convertTensorToCkerShape(_output),
+ reinterpret_cast<uint8_t *>(_output->buffer()));
+}
+
+void ConvolutionLayer::configure(const operand::Tensor *input, const operand::Tensor *kernel,
+ const operand::Tensor *bias, const ir::PaddingType paddingType,
+ const uint32_t paddingLeft, const uint32_t paddingRight,
+ const uint32_t paddingTop, const uint32_t paddingBottom,
+ const uint32_t strideWidth, const uint32_t strideHeight,
+ const ir::Activation activation, operand::Tensor *output)
+{
+ _input = input;
+ _kernel = kernel;
+ _bias = bias;
+ _paddingType = paddingType;
+ _paddingLeft = paddingLeft;
+ _paddingRight = paddingRight;
+ _paddingTop = paddingTop;
+ _paddingBottom = paddingBottom;
+ _strideWidth = strideWidth;
+ _strideHeight = strideHeight;
+ _activation = activation;
+ _output = output;
+}
+
+void ConvolutionLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ convFloat32();
+ }
+ else if (_input->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ convQuant8();
+ }
+}
+
+#undef ANDROID_NN_CONV_PARAMETERS
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/ConvolutionLayer.h b/runtime/onert/backend/cpu/kernel/ConvolutionLayer.h
new file mode 100644
index 000000000..70af098c4
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/ConvolutionLayer.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_CONVOLUTIONLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_CONVOLUTIONLAYER_H__
+
+#include "../operand/Tensor.h"
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+#include <functional>
+#include <memory>
+
+namespace nnfw
+{
+namespace cker
+{
+class Conv;
+}
+} // namespace nnfw
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class ConvolutionLayer : public ::onert::exec::IFunction
+{
+public:
+ ConvolutionLayer();
+ ~ConvolutionLayer();
+
+public:
+ void convFloat32();
+
+ void convQuant8();
+
+ void configure(const operand::Tensor *input, const operand::Tensor *kernel,
+ const operand::Tensor *bias, const ir::PaddingType paddingType,
+ const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH,
+ const ir::Activation activation, operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_input;
+ const operand::Tensor *_kernel;
+ const operand::Tensor *_bias;
+ operand::Tensor *_output;
+
+ ir::PaddingType _paddingType;
+ uint32_t _paddingLeft;
+ uint32_t _paddingTop;
+ uint32_t _paddingRight;
+ uint32_t _paddingBottom;
+
+ uint32_t _strideWidth;
+ uint32_t _strideHeight;
+
+ ir::Activation _activation;
+
+ std::unique_ptr<nnfw::cker::Conv> _conv_kernel;
+
+ bool _prepare;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_CONVOLUTIONLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/cpu/kernel/DepthwiseConvolutionLayer.cc
new file mode 100644
index 000000000..a1efe9fed
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/DepthwiseConvolutionLayer.cc
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthwiseConvolutionLayer.h"
+
+#include <cker/operation/DepthwiseConv.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+DepthwiseConvolutionLayer::DepthwiseConvolutionLayer()
+ : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr), _paddingLeft(0),
+ _paddingTop(0), _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0),
+ _multiplier(0), _activation(ir::Activation::NONE)
+{
+ // DO NOTHING
+}
+
+void DepthwiseConvolutionLayer::convFloat32()
+{
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+
+ nnfw::cker::DepthwiseConvParams op_params;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = 1;
+ op_params.dilation_height_factor = 1;
+ op_params.padding_values.width = _paddingLeft;
+ op_params.padding_values.height = _paddingTop;
+ op_params.depth_multiplier = _multiplier;
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
+ nnfw::cker::DepthwiseConv(
+ op_params, convertTensorToCkerShape(_input),
+ reinterpret_cast<const float *>(_input->buffer()), convertTensorToCkerShape(_kernel),
+ reinterpret_cast<const float *>(_kernel->buffer()), convertTensorToCkerShape(_bias),
+ reinterpret_cast<const float *>(_bias->buffer()), convertTensorToCkerShape(_output),
+ reinterpret_cast<float *>(_output->buffer()));
+}
+
+void DepthwiseConvolutionLayer::convQuant8()
+{
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
+ &output_activation_max);
+
+ double real_multiplier = 0.0;
+ int32_t output_multiplier = 0;
+ int32_t output_shift = 0;
+ GetQuantizedConvolutionMultiplier(_input, _kernel, _bias, _output, &real_multiplier);
+ QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+ nnfw::cker::DepthwiseConvParams op_params;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = 1;
+ op_params.dilation_height_factor = 1;
+ op_params.padding_values.width = _paddingLeft;
+ op_params.padding_values.height = _paddingTop;
+ op_params.depth_multiplier = _multiplier;
+ op_params.input_offset = -_input->offset();
+ op_params.weights_offset = -_kernel->offset();
+ op_params.output_offset = _output->offset();
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+
+ nnfw::cker::DepthwiseConv(
+ op_params, convertTensorToCkerShape(_input),
+ reinterpret_cast<const uint8_t *>(_input->buffer()), convertTensorToCkerShape(_kernel),
+ reinterpret_cast<const uint8_t *>(_kernel->buffer()), convertTensorToCkerShape(_bias),
+ reinterpret_cast<const int32_t *>(_bias->buffer()), convertTensorToCkerShape(_output),
+ reinterpret_cast<uint8_t *>(_output->buffer()));
+}
+
+void DepthwiseConvolutionLayer::configure(const operand::Tensor *input,
+ const operand::Tensor *kernel,
+ const operand::Tensor *bias, const uint32_t paddingLeft,
+ const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideWidth,
+ const uint32_t strideHeight, const uint32_t multiplier,
+ const ir::Activation activation, operand::Tensor *output)
+{
+ _input = input;
+ _kernel = kernel;
+ _bias = bias;
+ _paddingLeft = paddingLeft;
+ _paddingRight = paddingRight;
+ _paddingTop = paddingTop;
+ _paddingBottom = paddingBottom;
+ _strideWidth = strideWidth;
+ _strideHeight = strideHeight;
+ _multiplier = multiplier;
+ _activation = activation;
+ _output = output;
+}
+
+void DepthwiseConvolutionLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ convFloat32();
+ }
+ else if (_input->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ convQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/DepthwiseConvolutionLayer.h b/runtime/onert/backend/cpu/kernel/DepthwiseConvolutionLayer.h
new file mode 100644
index 000000000..dcc3762cc
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/DepthwiseConvolutionLayer.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_KERNEL_CPU_DEPTHWISECONVOLUTIONLAYER_H__
+#define __ONERT_KERNEL_CPU_DEPTHWISECONVOLUTIONLAYER_H__
+
+#include "../operand/Tensor.h"
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class DepthwiseConvolutionLayer : public ::onert::exec::IFunction
+{
+public:
+ DepthwiseConvolutionLayer();
+
+public:
+ void convFloat32();
+
+ void convQuant8();
+
+ void configure(const operand::Tensor *input, const operand::Tensor *kernel,
+ const operand::Tensor *bias, const uint32_t paddingLeft,
+ const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH,
+ const uint32_t multiplier, const ir::Activation activation,
+ operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_input;
+ const operand::Tensor *_kernel;
+ const operand::Tensor *_bias;
+ operand::Tensor *_output;
+
+ uint32_t _paddingLeft;
+ uint32_t _paddingTop;
+ uint32_t _paddingRight;
+ uint32_t _paddingBottom;
+
+ uint32_t _strideWidth;
+ uint32_t _strideHeight;
+
+ uint32_t _multiplier;
+
+ ir::Activation _activation;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_KERNEL_CPU_DEPTHWISECONVOLUTIONLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/DivLayer.cc b/runtime/onert/backend/cpu/kernel/DivLayer.cc
new file mode 100644
index 000000000..ec9daae9b
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/DivLayer.cc
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DivLayer.h"
+
+#include <cker/operation/BinaryArithmeticOps.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+void DivLayer::divFloat32()
+{
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ nnfw::cker::BinaryArithmeticOpParam op_params;
+ op_params.type = nnfw::cker::BinaryArithmeticOpType::DIV;
+ op_params.float_activation_max = output_activation_max;
+ op_params.float_activation_min = output_activation_min;
+
+ if (!HaveSameShapes(_lhs, _rhs))
+ {
+ nnfw::cker::BroadcastBinaryArithmeticOpSlow(
+ op_params, convertToExtendedCkerShape(_lhs),
+ reinterpret_cast<const float *>(_lhs->buffer()), convertToExtendedCkerShape(_rhs),
+ reinterpret_cast<const float *>(_rhs->buffer()), convertToExtendedCkerShape(_output),
+ reinterpret_cast<float *>(_output->buffer()));
+ return;
+ }
+
+ nnfw::cker::BinaryArithmeticOp(
+ op_params, convertTensorToCkerShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
+ convertTensorToCkerShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
+ convertTensorToCkerShape(_output), reinterpret_cast<float *>(_output->buffer()));
+}
+
+void DivLayer::divQuant8()
+{
+ int32_t output_activation_min, output_activation_max;
+ CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
+ &output_activation_max);
+ // nnfw::cker::BinaryArithmeticOpParam op_params;
+ // op_params.quantized_activation_max = output_activation_max;
+ // op_params.quantized_activation_min = output_activation_min;
+
+ // cker quant8 div is not implemented yet
+ throw std::runtime_error{"Div NYI for quantized"};
+}
+
+void DivLayer::configure(const operand::Tensor *lhs, const operand::Tensor *rhs,
+ const ir::Activation activation, operand::Tensor *output)
+{
+ _lhs = lhs;
+ _rhs = rhs;
+ _activation = activation;
+ _output = output;
+}
+
+void DivLayer::run()
+{
+ if (_output->data_type() == OperandType::FLOAT32)
+ {
+ divFloat32();
+ }
+ else if (_output->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ divQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/DivLayer.h b/runtime/onert/backend/cpu/kernel/DivLayer.h
new file mode 100644
index 000000000..3e14d9c19
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/DivLayer.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_DIVLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_DIVLAYER_H__
+
+#include "../operand/Tensor.h"
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class DivLayer : public ::onert::exec::IFunction
+{
+public:
+ DivLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+ {
+ // DO NOTHING
+ }
+
+public:
+ void divFloat32();
+
+ void divQuant8();
+
+ void configure(const operand::Tensor *lhs, const operand::Tensor *rhs,
+ const ir::Activation activation, operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_lhs;
+ const operand::Tensor *_rhs;
+ operand::Tensor *_output;
+
+ ir::Activation _activation{ir::Activation::NONE};
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_DIVLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/ExpLayer.cc b/runtime/onert/backend/cpu/kernel/ExpLayer.cc
new file mode 100644
index 000000000..335a789b2
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/ExpLayer.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExpLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Exp.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+ExpLayer::ExpLayer() : _input(nullptr), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+void ExpLayer::expFloat32()
+{
+ nnfw::cker::Exp(convertTensorToCkerShape(_input),
+ reinterpret_cast<const float *>(_input->buffer()),
+ convertTensorToCkerShape(_output), reinterpret_cast<float *>(_output->buffer()));
+}
+
+void ExpLayer::expQuant8()
+{
+ // cker quant8 exp is not implemented yet
+ throw std::runtime_error{"NYI"};
+}
+
+void ExpLayer::configure(const operand::Tensor *input, operand::Tensor *output)
+{
+ _input = input;
+ _output = output;
+}
+
+void ExpLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ expFloat32();
+ }
+ else if (_input->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ expQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/ExpLayer.h b/runtime/onert/backend/cpu/kernel/ExpLayer.h
new file mode 100644
index 000000000..5a7222fad
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/ExpLayer.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_EXPLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_EXPLAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class ExpLayer : public ::onert::exec::IFunction
+{
+public:
+ ExpLayer();
+
+public:
+ void expFloat32();
+
+ void expQuant8();
+
+ void configure(const operand::Tensor *input, operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_input;
+ operand::Tensor *_output;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_EXPLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/FullyConnectedLayer.cc b/runtime/onert/backend/cpu/kernel/FullyConnectedLayer.cc
new file mode 100644
index 000000000..636df8941
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/FullyConnectedLayer.cc
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedLayer.h"
+
+#include <cker/operation/FullyConnected.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+FullyConnectedLayer::FullyConnectedLayer()
+ : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
+ _activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena())
+{
+ // DO NOTHING
+}
+
+FullyConnectedLayer::~FullyConnectedLayer() = default;
+
+void FullyConnectedLayer::fullyConnectedFloat32()
+{
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+
+ nnfw::cker::FullyConnectedParams op_params;
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+ op_params.activation = convertActivationType(_activation);
+
+ nnfw::cker::FullyConnected(
+ op_params, convertTensorToCkerShape(_input),
+ reinterpret_cast<const float *>(_input->buffer()), convertTensorToCkerShape(_weights),
+ reinterpret_cast<const float *>(_weights->buffer()), convertTensorToCkerShape(_bias),
+ reinterpret_cast<const float *>(_bias->buffer()), convertTensorToCkerShape(_output),
+ reinterpret_cast<float *>(_output->buffer()));
+}
+
+// executionMutex is used to protect concurrent access of non-threadsafe resources
+// like gemmlowp::GemmContext.
+void FullyConnectedLayer::fullyConnectedQuant8()
+{
+ double real_multiplier = 0.0;
+ int32_t output_multiplier = 0;
+ int32_t output_shift = 0;
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ GetQuantizedConvolutionMultiplier(_input, _weights, _bias, _output, &real_multiplier);
+ QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+ CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
+ &output_activation_max);
+
+ nnfw::cker::FullyConnectedParams op_params;
+ op_params.input_offset = -_input->offset();
+ op_params.weights_offset = -_weights->offset();
+ op_params.output_offset = _output->offset();
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+
+ nnfw::cker::FullyConnected(
+ op_params, convertTensorToCkerShape(_input),
+ reinterpret_cast<const uint8_t *>(_input->buffer()), convertTensorToCkerShape(_weights),
+ reinterpret_cast<const uint8_t *>(_weights->buffer()), convertTensorToCkerShape(_bias),
+ reinterpret_cast<const int32_t *>(_bias->buffer()), convertTensorToCkerShape(_output),
+ reinterpret_cast<uint8_t *>(_output->buffer()));
+}
+
+void FullyConnectedLayer::fullyConnectedHybrid()
+{
+ nnfw::cker::FCTempArena &temp_arena = *_temp_arena;
+ if (!temp_arena.prepared)
+ {
+ temp_arena.prepare(convertTensorToCkerShape(_input), convertTensorToCkerShape(_weights));
+ }
+
+ nnfw::cker::FullyConnectedParams op_params;
+ op_params.activation = convertActivationType(_activation);
+ op_params.weights_scale = _weights->scale();
+
+ nnfw::cker::FullyConnectedHybrid(
+ op_params, convertTensorToCkerShape(_input),
+ reinterpret_cast<const float *>(_input->buffer()), convertTensorToCkerShape(_weights),
+ reinterpret_cast<const int8_t *>(_weights->buffer()), convertTensorToCkerShape(_bias),
+ reinterpret_cast<const float *>(_bias->buffer()), convertTensorToCkerShape(_output),
+ reinterpret_cast<float *>(_output->buffer()), temp_arena);
+}
+
+void FullyConnectedLayer::configure(const operand::Tensor *input, const operand::Tensor *weights,
+ const operand::Tensor *bias, ir::Activation activation,
+ operand::Tensor *output)
+{
+ _input = input;
+ _weights = weights;
+ _bias = bias;
+ _activation = activation;
+ _output = output;
+}
+
+void FullyConnectedLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ if (_weights->data_type() == OperandType::QUANT8_SYMM)
+ {
+ fullyConnectedHybrid();
+ }
+ else
+ {
+ fullyConnectedFloat32();
+ }
+ }
+ else if (_input->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ fullyConnectedQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/FullyConnectedLayer.h b/runtime/onert/backend/cpu/kernel/FullyConnectedLayer.h
new file mode 100644
index 000000000..616124bac
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/FullyConnectedLayer.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_FULLYCONNECTEDLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_FULLYCONNECTEDLAYER_H__
+
+#include "../operand/Tensor.h"
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace nnfw
+{
+namespace cker
+{
+class FCTempArena;
+}
+} // namespace nnfw
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class FullyConnectedLayer : public ::onert::exec::IFunction
+{
+public:
+ FullyConnectedLayer();
+ ~FullyConnectedLayer();
+
+public:
+ void fullyConnectedFloat32();
+
+ void fullyConnectedQuant8();
+
+ void fullyConnectedHybrid();
+
+ void configure(const operand::Tensor *input, const operand::Tensor *weights,
+ const operand::Tensor *bias, ir::Activation activation, operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_input;
+ const operand::Tensor *_weights;
+ const operand::Tensor *_bias;
+ operand::Tensor *_output;
+
+ ir::Activation _activation;
+ std::unique_ptr<nnfw::cker::FCTempArena> _temp_arena;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_FULLYCONNECTEDLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/GatherLayer.cc b/runtime/onert/backend/cpu/kernel/GatherLayer.cc
new file mode 100644
index 000000000..07f30136c
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/GatherLayer.cc
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GatherLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Gather.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+void GatherLayer::configure(const operand::Tensor *input, const operand::Tensor *indices,
+ operand::Tensor *output, int32_t axis)
+{
+ _input = input;
+ _indices = indices;
+ _axis = axis;
+ _output = output;
+}
+
+void GatherLayer::run()
+{
+ nnfw::cker::GatherParams op_params;
+ op_params.axis = _axis;
+
+ switch (_input->data_type())
+ {
+ case OperandType::FLOAT32:
+ nnfw::cker::Gather<float>(
+ op_params, convertTensorToCkerShape(_input),
+ reinterpret_cast<const float *>(_input->buffer()), convertTensorToCkerShape(_indices),
+ reinterpret_cast<const int32_t *>(_indices->buffer()), convertTensorToCkerShape(_output),
+ reinterpret_cast<float *>(_output->buffer()));
+ break;
+ case OperandType::QUANT8_ASYMM:
+ nnfw::cker::Gather<uint8_t>(
+ op_params, convertTensorToCkerShape(_input),
+ reinterpret_cast<const uint8_t *>(_input->buffer()), convertTensorToCkerShape(_indices),
+ reinterpret_cast<const int32_t *>(_indices->buffer()), convertTensorToCkerShape(_output),
+ reinterpret_cast<uint8_t *>(_output->buffer()));
+ break;
+ case OperandType::INT32:
+ nnfw::cker::Gather<int32_t>(
+ op_params, convertTensorToCkerShape(_input),
+ reinterpret_cast<const int32_t *>(_input->buffer()), convertTensorToCkerShape(_indices),
+ reinterpret_cast<const int32_t *>(_indices->buffer()), convertTensorToCkerShape(_output),
+ reinterpret_cast<int32_t *>(_output->buffer()));
+ break;
+ default:
+ throw std::runtime_error("Gather NYI for this operand type!");
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/GatherLayer.h b/runtime/onert/backend/cpu/kernel/GatherLayer.h
new file mode 100644
index 000000000..2ff3605a2
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/GatherLayer.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_GATHERLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_GATHERLAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class GatherLayer : public ::onert::exec::IFunction
+{
+public:
+ GatherLayer() : _input{nullptr}, _indices{nullptr}, _output{nullptr}, _axis{-1}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void configure(const operand::Tensor *input, const operand::Tensor *indices,
+ operand::Tensor *output, int32_t axis);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_input;
+ const operand::Tensor *_indices;
+ operand::Tensor *_output;
+
+ int32_t _axis;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_GATHERLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/LogisticLayer.cc b/runtime/onert/backend/cpu/kernel/LogisticLayer.cc
new file mode 100644
index 000000000..07f434048
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/LogisticLayer.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LogisticLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Logistic.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+LogisticLayer::LogisticLayer() : _input(nullptr), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+void LogisticLayer::logisticFloat32()
+{
+ nnfw::cker::Logistic(
+ convertTensorToCkerShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ convertTensorToCkerShape(_output), reinterpret_cast<float *>(_output->buffer()));
+}
+
+void LogisticLayer::logisticQuant8()
+{
+ // cker quant8 logistic is not implemented yet
+ throw std::runtime_error{"NYI"};
+}
+
+void LogisticLayer::configure(const operand::Tensor *input, operand::Tensor *output)
+{
+ _input = input;
+ _output = output;
+}
+
+void LogisticLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ logisticFloat32();
+ }
+ else if (_input->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ logisticQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/LogisticLayer.h b/runtime/onert/backend/cpu/kernel/LogisticLayer.h
new file mode 100644
index 000000000..76ee1600f
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/LogisticLayer.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_LOGISTICLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_LOGISTICLAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class LogisticLayer : public ::onert::exec::IFunction
+{
+public:
+ LogisticLayer();
+
+public:
+ void logisticFloat32();
+
+ void logisticQuant8();
+
+ void configure(const operand::Tensor *input, operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_input;
+ operand::Tensor *_output;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_LOGISTICLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/MaxLayer.cc b/runtime/onert/backend/cpu/kernel/MaxLayer.cc
new file mode 100644
index 000000000..701d73a35
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/MaxLayer.cc
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MaxLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/MaxMin.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+void MaxLayer::maxFloat32()
+{
+ nnfw::cker::Max<float>(
+ convertTensorToCkerShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
+ convertTensorToCkerShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
+ convertTensorToCkerShape(_output), reinterpret_cast<float *>(_output->buffer()));
+}
+
+void MaxLayer::maxQuant8()
+{
+ // TODO Check whether cker for quant8 max produces correct results
+ // nnfw::cker::Max<uint8_t>(
+ // convertTensorToCkerShape(_lhs), reinterpret_cast<const uint8_t*>(_lhs->buffer()),
+ // convertTensorToCkerShape(_rhs), reinterpret_cast<const uint8_t*>(_rhs->buffer()),
+ // convertTensorToCkerShape(_output), reinterpret_cast<uint8_t*>(_output->buffer()));
+
+ throw std::runtime_error("Max NYI for quantized");
+}
+
+void MaxLayer::configure(const operand::Tensor *lhs, const operand::Tensor *rhs,
+ operand::Tensor *output)
+{
+ assert(lhs != nullptr);
+ assert(rhs != nullptr);
+ assert(output != nullptr);
+
+ _lhs = lhs;
+ _rhs = rhs;
+ _output = output;
+}
+
+void MaxLayer::run()
+{
+ if (_lhs->data_type() == OperandType::FLOAT32)
+ {
+ maxFloat32();
+ }
+ else if (_lhs->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ maxQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/MaxLayer.h b/runtime/onert/backend/cpu/kernel/MaxLayer.h
new file mode 100644
index 000000000..2cccf26ae
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/MaxLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_MAXLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_MAXLAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class MaxLayer : public ::onert::exec::IFunction
+{
+public:
+ MaxLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+ {
+ // DO NOTHING
+ }
+
+public:
+ void maxFloat32();
+
+ void maxQuant8();
+
+ void configure(const operand::Tensor *lhs, const operand::Tensor *rhs, operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_lhs;
+ const operand::Tensor *_rhs;
+ operand::Tensor *_output;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_MAXLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/MaxPoolLayer.cc b/runtime/onert/backend/cpu/kernel/MaxPoolLayer.cc
new file mode 100644
index 000000000..e35d5e92a
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/MaxPoolLayer.cc
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MaxPoolLayer.h"
+
+#include <cker/operation/MaxPool.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+#define MAXPOOLING_PARAMETERS \
+ nnfw::cker::PoolParams op_params; \
+ op_params.stride_height = _strideHeight; \
+ op_params.stride_width = _strideWidth; \
+ op_params.filter_height = _kernelHeight; \
+ op_params.filter_width = _kernelWidth; \
+ op_params.padding_values.height = (int8_t)_paddingTop; \
+ op_params.padding_values.width = (int8_t)_paddingLeft;
+
+MaxPoolLayer::MaxPoolLayer()
+ : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
+ _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0),
+ _activation(ir::Activation::NONE)
+{
+ // DO NOTHING
+}
+
+void MaxPoolLayer::maxPoolFloat32()
+{
+ MAXPOOLING_PARAMETERS
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
+ nnfw::cker::MaxPool(op_params, convertTensorToCkerShape(_input),
+ reinterpret_cast<const float *>(_input->buffer()),
+ convertTensorToCkerShape(_output),
+ reinterpret_cast<float *>(_output->buffer()));
+}
+void MaxPoolLayer::maxPoolQuant8()
+{
+ MAXPOOLING_PARAMETERS
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
+ &output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+
+ nnfw::cker::MaxPool(op_params, convertTensorToCkerShape(_input),
+ reinterpret_cast<const uint8_t *>(_input->buffer()),
+ convertTensorToCkerShape(_output),
+ reinterpret_cast<uint8_t *>(_output->buffer()));
+}
+
+void MaxPoolLayer::configure(const operand::Tensor *input, const uint32_t paddingLeft,
+ const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideWidth,
+ const uint32_t strideHeight, const uint32_t kernelWidth,
+ const uint32_t kernelHeight, const ir::Activation activation,
+ operand::Tensor *output)
+{
+ _input = input;
+ _paddingLeft = paddingLeft;
+ _paddingRight = paddingRight;
+ _paddingTop = paddingTop;
+ _paddingBottom = paddingBottom;
+ _strideWidth = strideWidth;
+ _strideHeight = strideHeight;
+ _kernelWidth = kernelWidth;
+ _kernelHeight = kernelHeight;
+ _activation = activation;
+ _output = output;
+}
+
+void MaxPoolLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ maxPoolFloat32();
+ }
+ else if (_input->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ maxPoolQuant8();
+ }
+}
+
+#undef MAXPOOLING_PARAMETERS
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/MaxPoolLayer.h b/runtime/onert/backend/cpu/kernel/MaxPoolLayer.h
new file mode 100644
index 000000000..a29e09e1b
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/MaxPoolLayer.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_MAXPOOLLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_MAXPOOLLAYER_H__
+
+#include "../operand/Tensor.h"
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class MaxPoolLayer : public ::onert::exec::IFunction
+{
+public:
+ MaxPoolLayer();
+
+public:
+ void maxPoolFloat32();
+
+ void maxPoolQuant8();
+
+ void configure(const operand::Tensor *input, const uint32_t paddingLeft,
+ const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideWidth,
+ const uint32_t strideHeight, const uint32_t kernelWidth,
+ const uint32_t kernelHeight, const ir::Activation activation,
+ operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_input;
+ operand::Tensor *_output;
+
+ uint32_t _paddingLeft;
+ uint32_t _paddingTop;
+ uint32_t _paddingRight;
+ uint32_t _paddingBottom;
+
+ uint32_t _strideWidth;
+ uint32_t _strideHeight;
+ uint32_t _kernelWidth;
+ uint32_t _kernelHeight;
+
+ ir::Activation _activation;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_MAXPOOLLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/MinLayer.cc b/runtime/onert/backend/cpu/kernel/MinLayer.cc
new file mode 100644
index 000000000..07cb8ab91
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/MinLayer.cc
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MinLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/MaxMin.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+void MinLayer::minFloat32()
+{
+ nnfw::cker::Min<float>(
+ convertTensorToCkerShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
+ convertTensorToCkerShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
+ convertTensorToCkerShape(_output), reinterpret_cast<float *>(_output->buffer()));
+}
+
+void MinLayer::minQuant8()
+{
+ // TODO Check whether cker for quant8 min produces correct results
+ // nnfw::cker::Min<uint8_t>(
+ // convertTensorToCkerShape(_lhs), reinterpret_cast<const uint8_t*>(_lhs->buffer()),
+ // convertTensorToCkerShape(_rhs), reinterpret_cast<const uint8_t*>(_rhs->buffer()),
+ // convertTensorToCkerShape(_output), reinterpret_cast<uint8_t*>(_output->buffer()));
+
+ throw std::runtime_error("Min NYI for quantized");
+}
+
+void MinLayer::configure(const operand::Tensor *lhs, const operand::Tensor *rhs,
+ operand::Tensor *output)
+{
+ assert(lhs != nullptr);
+ assert(rhs != nullptr);
+ assert(output != nullptr);
+
+ _lhs = lhs;
+ _rhs = rhs;
+ _output = output;
+}
+
+void MinLayer::run()
+{
+ if (_lhs->data_type() == OperandType::FLOAT32)
+ {
+ minFloat32();
+ }
+ else if (_lhs->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ minQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/MinLayer.h b/runtime/onert/backend/cpu/kernel/MinLayer.h
new file mode 100644
index 000000000..5beb6444a
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/MinLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_MINLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_MINLAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class MinLayer : public ::onert::exec::IFunction
+{
+public:
+ MinLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+ {
+ // DO NOTHING
+ }
+
+public:
+ void minFloat32();
+
+ void minQuant8();
+
+ void configure(const operand::Tensor *lhs, const operand::Tensor *rhs, operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_lhs;
+ const operand::Tensor *_rhs;
+ operand::Tensor *_output;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_MINLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/MulLayer.cc b/runtime/onert/backend/cpu/kernel/MulLayer.cc
new file mode 100644
index 000000000..df7d71eec
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/MulLayer.cc
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MulLayer.h"
+
+#include <cker/operation/BinaryArithmeticOps.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+void MulLayer::mulFloat32()
+{
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ nnfw::cker::BinaryArithmeticOpParam op_params;
+ op_params.type = nnfw::cker::BinaryArithmeticOpType::MUL;
+ op_params.float_activation_max = output_activation_max;
+ op_params.float_activation_min = output_activation_min;
+
+ if (!HaveSameShapes(_lhs, _rhs))
+ {
+ nnfw::cker::BroadcastBinaryArithmeticOpSlow(
+ op_params, convertToExtendedCkerShape(_lhs),
+ reinterpret_cast<const float *>(_lhs->buffer()), convertToExtendedCkerShape(_rhs),
+ reinterpret_cast<const float *>(_rhs->buffer()), convertToExtendedCkerShape(_output),
+ reinterpret_cast<float *>(_output->buffer()));
+ return;
+ }
+
+ nnfw::cker::BinaryArithmeticOp(
+ op_params, convertTensorToCkerShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
+ convertTensorToCkerShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
+ convertTensorToCkerShape(_output), reinterpret_cast<float *>(_output->buffer()));
+}
+
+void MulLayer::mulQuant8()
+{
+ int32_t output_activation_min, output_activation_max;
+ CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
+ &output_activation_max);
+ // nnfw::cker::BinaryArithmeticOpParam op_params;
+ // op_params.quantized_activation_max = output_activation_max;
+ // op_params.quantized_activation_min = output_activation_min;
+
+ // cker quant8 mul is not implemented yet
+ throw std::runtime_error{"Mull NYI for quantized"};
+}
+
+void MulLayer::configure(const operand::Tensor *lhs, const operand::Tensor *rhs,
+ const ir::Activation activation, operand::Tensor *output)
+{
+ _lhs = lhs;
+ _rhs = rhs;
+ _activation = activation;
+ _output = output;
+}
+
+void MulLayer::run()
+{
+ if (_output->data_type() == OperandType::FLOAT32)
+ {
+ mulFloat32();
+ }
+ else if (_output->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ mulQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/MulLayer.h b/runtime/onert/backend/cpu/kernel/MulLayer.h
new file mode 100644
index 000000000..a5cf25f0f
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/MulLayer.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_MULLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_MULLAYER_H__
+
+#include "../operand/Tensor.h"
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class MulLayer : public ::onert::exec::IFunction
+{
+public:
+ MulLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+ {
+ // DO NOTHING
+ }
+
+public:
+ void mulFloat32();
+
+ void mulQuant8();
+
+ void configure(const operand::Tensor *lhs, const operand::Tensor *rhs,
+ const ir::Activation activation, operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_lhs;
+ const operand::Tensor *_rhs;
+ operand::Tensor *_output;
+
+ ir::Activation _activation{ir::Activation::NONE};
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_MULLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/OneHotLayer.cc b/runtime/onert/backend/cpu/kernel/OneHotLayer.cc
new file mode 100644
index 000000000..fd2c1b795
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/OneHotLayer.cc
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OneHotLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/OneHot.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+void OneHotLayer::oneHotFloat32()
+{
+ nnfw::cker::OneHot<float, int32_t>(
+ _depth, _on_value, _off_value, _axis, convertTensorToCkerShape(_indices),
+ reinterpret_cast<const int32_t *>(_indices->buffer()), convertTensorToCkerShape(_output),
+ reinterpret_cast<float *>(_output->buffer()));
+}
+
+void OneHotLayer::oneHotQuant8() { throw std::runtime_error{"OneHot NYI for quantized"}; }
+
+void OneHotLayer::configure(const operand::Tensor *indices, operand::Tensor *output, int32_t depth,
+ float on_value, float off_value, int32_t axis)
+{
+ _indices = indices;
+ _output = output;
+ _depth = depth;
+ _on_value = on_value;
+ _off_value = off_value;
+ _axis = axis;
+ if (_axis == -1)
+ _axis = _indices->num_dimensions();
+}
+
+void OneHotLayer::run()
+{
+ if (_output->data_type() == OperandType::FLOAT32)
+ {
+ oneHotFloat32();
+ }
+ else if (_output->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ oneHotQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/OneHotLayer.h b/runtime/onert/backend/cpu/kernel/OneHotLayer.h
new file mode 100644
index 000000000..5f23481ba
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/OneHotLayer.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_ONEHOTLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_ONEHOTLAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class OneHotLayer : public ::onert::exec::IFunction
+{
+public:
+ OneHotLayer()
+ : _indices(nullptr), _output(nullptr), _depth(0), _on_value(1), _off_value(0), _axis(-1)
+ {
+ // DO NOTHING
+ }
+
+public:
+ void oneHotFloat32();
+
+ void oneHotQuant8();
+
+ void configure(const operand::Tensor *indices, operand::Tensor *output, int32_t depth,
+ float on_value, float off_value, int32_t axis);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_indices;
+ operand::Tensor *_output;
+
+ int32_t _depth;
+ float _on_value;
+ float _off_value;
+ int32_t _axis;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_ONEHOTLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/OperationUtils.cc b/runtime/onert/backend/cpu/kernel/OperationUtils.cc
new file mode 100644
index 000000000..178aac833
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/OperationUtils.cc
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationUtils.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+uint32_t getNumberOfDimensions(const operand::Tensor *tensor)
+{
+ assert(tensor);
+ return tensor->num_dimensions();
+}
+
+uint32_t getNumberOfElements(const operand::Tensor *tensor)
+{
+ assert(tensor);
+ uint32_t count = 1;
+ for (size_t i = 0; i < tensor->num_dimensions(); i++)
+ {
+ count *= tensor->dimension(i);
+ }
+ return count;
+}
+
+uint32_t getSizeOfDimension(const operand::Tensor *tensor, uint32_t dimensionIdx)
+{
+ assert(tensor);
+ if (dimensionIdx >= tensor->num_dimensions())
+ {
+ // TODO, log the error
+ return 0;
+ }
+ return tensor->dimension(dimensionIdx);
+}
+
+void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
+{
+ if (double_multiplier == 0.)
+ {
+ *quantized_multiplier = 0;
+ *shift = 0;
+ return;
+ }
+ const double q = std::frexp(double_multiplier, shift);
+ auto q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
+
+ assert(q_fixed <= (1ll << 31));
+ if (q_fixed == (1ll << 31))
+ {
+ q_fixed /= 2;
+ ++*shift;
+ }
+ assert(q_fixed <= std::numeric_limits<int32_t>::max());
+ *quantized_multiplier = static_cast<int32_t>(q_fixed);
+}
+
+void GetQuantizedConvolutionMultiplier(const operand::Tensor *input, const operand::Tensor *filter,
+ const operand::Tensor *bias, const operand::Tensor *output,
+ double *multiplier)
+{
+ const double input_product_scale = input->scale() * filter->scale();
+ const double bias_scale = bias->scale();
+ const double output_scale = output->scale();
+ // The following conditions must be guaranteed by the training pipeline.
+ UNUSED_RELEASE(bias_scale);
+ assert(std::abs(input_product_scale - bias_scale) <=
+ 1e-6 * std::min(input_product_scale, bias_scale));
+ assert(input_product_scale >= 0);
+ assert(input_product_scale < output_scale);
+ *multiplier = input_product_scale / output_scale;
+}
+
+void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier,
+ int *left_shift)
+{
+ assert(double_multiplier > 1.);
+ const double q = std::frexp(double_multiplier, left_shift);
+ int64_t q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
+ assert(q_fixed <= (1ll << 31));
+ if (q_fixed == (1ll << 31))
+ {
+ q_fixed /= 2;
+ ++*left_shift;
+ }
+ assert(*left_shift >= 0);
+ assert(q_fixed <= std::numeric_limits<int32_t>::max());
+ *quantized_multiplier = static_cast<int32_t>(q_fixed);
+}
+
+void CalculateActivationRangeFloat(ir::Activation activation, float *activation_min,
+ float *activation_max)
+{
+ if (activation == ir::Activation::RELU)
+ {
+ *activation_min = 0.f;
+ *activation_max = std::numeric_limits<float>::max();
+ }
+ else if (activation == ir::Activation::RELU6)
+ {
+ *activation_min = 0.f;
+ *activation_max = 6.f;
+ }
+ else if (activation == ir::Activation::RELU1)
+ {
+ *activation_min = -1.f;
+ *activation_max = 1.f;
+ }
+ else if (activation == ir::Activation::SIGMOID)
+ {
+ *activation_min = 0.f;
+ *activation_max = 1.f;
+ }
+ else if (activation == ir::Activation::NONE)
+ {
+ *activation_min = std::numeric_limits<float>::lowest();
+ *activation_max = std::numeric_limits<float>::max();
+ }
+ else
+ {
+ std::cout << "Unsupported fused activation function." << std::endl;
+ }
+}
+
+void CalculateActivationRangeUint8(ir::Activation activation, const operand::Tensor *output,
+ int32_t *act_min, int32_t *act_max)
+{
+ const int32_t qmin = std::numeric_limits<uint8_t>::min();
+ const int32_t qmax = std::numeric_limits<uint8_t>::max();
+ const auto scale = output->scale();
+ const auto zero_point = output->offset();
+ auto quantize = [scale, zero_point](float f) {
+ return zero_point + static_cast<int32_t>(std::round(f / scale));
+ };
+ if (activation == ir::Activation::RELU)
+ {
+ *act_min = std::max(qmin, quantize(0.0));
+ *act_max = qmax;
+ }
+ else if (activation == ir::Activation::RELU6)
+ {
+ *act_min = std::max(qmin, quantize(0.0));
+ *act_max = std::min(qmax, quantize(6.0));
+ }
+ else if (activation == ir::Activation::RELU1)
+ {
+ *act_min = std::max(qmin, quantize(-1.0));
+ *act_max = std::min(qmax, quantize(1.0));
+ }
+ else if (activation == ir::Activation::SIGMOID)
+ {
+ *act_min = std::max(qmin, quantize(0.0));
+ *act_max = std::min(qmax, quantize(1.0));
+ }
+ else if (activation == ir::Activation::NONE)
+ {
+ *act_min = qmin;
+ *act_max = qmax;
+ }
+ else
+ {
+ std::cout << "Unsupported fused activation function." << std::endl;
+ }
+}
+
+bool HaveSameShapes(const operand::Tensor *input1, const operand::Tensor *input2)
+{
+ if (input1 == input2)
+ return true;
+ if (input2 == NULL || input2 == NULL)
+ return false;
+
+ if (input1 == NULL)
+ {
+ return (getNumberOfDimensions(input2) == 0);
+ }
+
+ if (getNumberOfDimensions(input1) != getNumberOfDimensions(input2))
+ return false;
+
+ for (uint32_t i = 0; i < getNumberOfDimensions(input1); i++)
+ if (input1->dimension(i) != input2->dimension(i))
+ return false;
+
+ return true;
+}
+
+int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift)
+{
+ const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
+ (1ll << (31 - input_integer_bits)) / (1ll << input_left_shift);
+ // Tighten bound using floor. Suppose that we could use the exact value.
+ // After scaling the difference, the result would be at the maximum. Thus we
+ // must ensure that our value has lower magnitude.
+ return static_cast<int32_t>(std::floor(max_input_rescaled));
+}
+
+uint32_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions)
+{
+ uint32_t size = 4;
+
+ switch (type)
+ {
+ case OperandType::FLOAT32:
+ case OperandType::INT32:
+ case OperandType::UINT32:
+ size = 4;
+ break;
+ case OperandType::BOOL8:
+ case OperandType::QUANT8_ASYMM:
+ case OperandType::QUANT8_SYMM:
+ size = 1;
+ break;
+ default:
+ throw std::runtime_error("Not supported operand type.");
+ break;
+ }
+
+ for (auto d : dimensions)
+ {
+ size *= d;
+ }
+
+ return size;
+}
+
+nnfw::cker::PaddingType getPaddingType(ir::PaddingType ir_padding_type)
+{
+ switch (ir_padding_type)
+ {
+ case ir::PaddingType::EXPLICIT:
+ return nnfw::cker::PaddingType::kNone;
+ case ir::PaddingType::SAME:
+ return nnfw::cker::PaddingType::kSame;
+ case ir::PaddingType::VALID:
+ return nnfw::cker::PaddingType::kValid;
+ default:
+ throw std::runtime_error("Wrong padding type.");
+ break;
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/OperationUtils.h b/runtime/onert/backend/cpu/kernel/OperationUtils.h
new file mode 100644
index 000000000..be9b24f36
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/OperationUtils.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__
+#define __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__
+
+#include "../operand/Tensor.h"
+
+#include <cker/Shape.h>
+#include <cker/Types.h>
+#include <iostream>
+#include <ir/DataType.h>
+#include <ir/InternalType.h>
+#include <ir/Operand.h>
+#include <ir/Padding.h>
+
+#include <limits>
+#include <vector>
+
+using OperandType = onert::ir::DataType;
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+union DataPtr {
+ uint8_t *u8;
+ int8_t *i8;
+ uint32_t *u32;
+ int32_t *i32;
+ bool *b;
+ float *f;
+ void *v;
+};
+
+uint32_t getNumberOfDimensions(const operand::Tensor *tensor);
+
+uint32_t getNumberOfElements(const operand::Tensor *tensor);
+
+uint32_t getSizeOfDimension(const operand::Tensor *tensor, uint32_t dimensionIdx);
+
+inline nnfw::cker::Shape convertToExtendedCkerShape(const operand::Tensor *tensor)
+{
+ assert(tensor);
+ std::vector<int32_t> raw_shape;
+ raw_shape.resize(4);
+
+ uint32_t src = 4 - tensor->num_dimensions();
+ for (uint32_t i = 0; i < 4; ++i)
+ {
+ if (i < src)
+ {
+ raw_shape[i] = 1;
+ }
+ else
+ {
+ raw_shape[i] = tensor->dimension(i - src);
+ }
+ }
+
+ return nnfw::cker::GetShape(raw_shape);
+}
+
+inline nnfw::cker::Shape convertTensorToCkerShape(const operand::Tensor *tensor)
+{
+ assert(tensor);
+ assert(tensor->layout() == ir::Layout::NHWC);
+ std::vector<int32_t> raw_shape;
+ raw_shape.resize(tensor->num_dimensions());
+ for (uint32_t i = 0; i < tensor->num_dimensions(); ++i)
+ {
+ raw_shape[i] = tensor->dimension(i);
+ }
+
+ return nnfw::cker::GetShape(raw_shape);
+}
+
+inline nnfw::cker::FusedActivationFunctionType
+convertActivationType(const ir::Activation activation)
+{
+ switch (activation)
+ {
+ case ir::Activation::NONE:
+ return nnfw::cker::FusedActivationFunctionType::kNone;
+ case ir::Activation::RELU:
+ return nnfw::cker::FusedActivationFunctionType::kRelu;
+ case ir::Activation::RELU1:
+ return nnfw::cker::FusedActivationFunctionType::kRelu1;
+ case ir::Activation::RELU6:
+ return nnfw::cker::FusedActivationFunctionType::kRelu6;
+ default:
+ throw std::runtime_error{"CPU backend: Cannot convert activation type"};
+ }
+}
+
+inline int32_t getAxis(uint32_t rank, int32_t axis, ir::Layout frontend_layout)
+{
+ auto ret = axis;
+
+ if (axis < 0)
+ {
+ ret += rank;
+ }
+
+ // NCHW -> NHWC
+ if (frontend_layout == ir::Layout::NCHW)
+ {
+ int32_t permutation[4] = {0, 3, 1, 2};
+ ret = permutation[ret];
+ }
+
+ return ret;
+}
+
+void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
+
+void GetQuantizedConvolutionMultiplier(const operand::Tensor *inputDescr,
+ const operand::Tensor *filterDescr,
+ const operand::Tensor *biasDescr,
+ const operand::Tensor *outputDescr, double *multiplier);
+
+void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier,
+ int *left_shift);
+
+void CalculateActivationRangeFloat(ir::Activation activation, float *activation_min,
+ float *activation_max);
+
+void CalculateActivationRangeUint8(ir::Activation activation, const operand::Tensor *output,
+ int32_t *act_min, int32_t *act_max);
+
+bool HaveSameShapes(const operand::Tensor *input1, const operand::Tensor *input2);
+
+int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift);
+
+uint32_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions);
+
+nnfw::cker::PaddingType getPaddingType(ir::PaddingType ir_padding_type);
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__
diff --git a/runtime/onert/backend/cpu/kernel/PackLayer.cc b/runtime/onert/backend/cpu/kernel/PackLayer.cc
new file mode 100644
index 000000000..01e69ff1d
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/PackLayer.cc
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PackLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Pack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+PackLayer::PackLayer() : _inputs(), _output(nullptr), _axis(0)
+{
+ // DO NOTHING
+}
+
+void PackLayer::packFloat32()
+{
+ uint32_t num_inputs = _inputs.size();
+ nnfw::cker::PackParams op_params;
+ op_params.axis = _axis;
+ op_params.inputs_count = num_inputs;
+
+ std::vector<nnfw::cker::Shape *> inputDimsPtr;
+ std::vector<nnfw::cker::Shape> inputDims;
+ inputDimsPtr.reserve(num_inputs);
+ inputDims.reserve(num_inputs);
+
+ for (uint32_t i = 0; i < num_inputs; i++)
+ {
+ inputDims.push_back(convertTensorToCkerShape(_inputs[i]));
+ inputDimsPtr.push_back(&inputDims[i]);
+ }
+
+ std::vector<const float *> inputFloatPtrs;
+
+ for (const auto input : _inputs)
+ {
+ inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(input->buffer()));
+ }
+
+ nnfw::cker::Pack<float>(op_params, inputFloatPtrs.data(), convertTensorToCkerShape(_output),
+ reinterpret_cast<float *>(_output->buffer()));
+}
+
+void PackLayer::packQuant8()
+{
+ // cker quant8 pack is not implemented yet
+ throw std::runtime_error{"NYI"};
+}
+
+void PackLayer::configure(const std::vector<const operand::Tensor *> &inputs, int32_t axis,
+ operand::Tensor *output)
+{
+ assert(inputs.size() > 0);
+ assert(output != nullptr);
+
+ _inputs = inputs;
+ _axis = axis;
+ _output = output;
+}
+
+void PackLayer::run()
+{
+ if (_output->data_type() == OperandType::FLOAT32)
+ {
+ packFloat32();
+ }
+ else if (_output->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ packQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/PackLayer.h b/runtime/onert/backend/cpu/kernel/PackLayer.h
new file mode 100644
index 000000000..5c87382be
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/PackLayer.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_PACKLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_PACKLAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class PackLayer : public ::onert::exec::IFunction
+{
+public:
+ PackLayer();
+
+public:
+ void packFloat32();
+
+ void packQuant8();
+
+ void configure(const std::vector<const operand::Tensor *> &inputs, int32_t axis,
+ operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ std::vector<const operand::Tensor *> _inputs;
+ operand::Tensor *_output;
+ int32_t _axis;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_PACKLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/PadLayer.cc b/runtime/onert/backend/cpu/kernel/PadLayer.cc
new file mode 100644
index 000000000..393856178
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/PadLayer.cc
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PadLayer.h"
+
+#include <cker/operation/Pad.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+PadLayer::PadLayer()
+ : _input(nullptr), _output(nullptr), _padData(), _padRank(), _constantValueData()
+{
+ // DO NOTHING
+}
+
+void PadLayer::padFloat32()
+{
+ nnfw::cker::Pad(_padData, _padRank, convertTensorToCkerShape(_input),
+ reinterpret_cast<const float *>(_input->buffer()),
+ convertTensorToCkerShape(_output), reinterpret_cast<float *>(_output->buffer()),
+ _constantValueData.f);
+}
+void PadLayer::padQuant8() { throw std::runtime_error("Quantized Pad isn't supported NYI"); }
+
+void PadLayer::configure(const operand::Tensor *input, operand::Tensor *output,
+ const int32_t *padData, int32_t padRank, uint8_t *constantValueData)
+{
+ _input = input;
+ _output = output;
+ _padData = padData;
+ _padRank = padRank;
+ _constantValueData.u8 = constantValueData;
+}
+
+void PadLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ padFloat32();
+ }
+ else if (_input->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ padQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/PadLayer.h b/runtime/onert/backend/cpu/kernel/PadLayer.h
new file mode 100644
index 000000000..fc14ae033
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/PadLayer.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_PADLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_PADLAYER_H__
+
+#include "../operand/Tensor.h"
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+// Note, this is pad with mode=`CONSTANT`: it doesn't support `REFLECT` and
+// `SYMMETRIC`
+class PadLayer : public ::onert::exec::IFunction
+{
+public:
+ PadLayer();
+
+public:
+ void padFloat32();
+
+ void padQuant8();
+
+ void configure(const operand::Tensor *input, operand::Tensor *output, const int32_t *padData,
+ int32_t padRank, uint8_t *constantValueData = nullptr);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_input;
+ operand::Tensor *_output;
+
+ const int32_t *_padData;
+ int32_t _padRank;
+ DataPtr _constantValueData;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_PADLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/PermuteLayer.cc b/runtime/onert/backend/cpu/kernel/PermuteLayer.cc
new file mode 100644
index 000000000..a3e828255
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/PermuteLayer.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PermuteLayer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+using Type = ir::operation::Permute::Type;
+
+void PermuteLayer::configure(std::shared_ptr<backend::ITensor> input,
+ std::shared_ptr<backend::ITensor> output,
+ const ir::Shape &output_shape, Type type, ir::DataType dataType)
+{
+ _input = input;
+ _output = output;
+ _output_shape = output_shape;
+ _type = type;
+ _dataType = dataType;
+}
+
+void PermuteLayer::run()
+{
+ using ir::DataType;
+ switch (_dataType)
+ {
+ case DataType::FLOAT32:
+ runTempl<float>();
+ break;
+ case DataType::INT32:
+ runTempl<int32_t>();
+ break;
+ case DataType::UINT32:
+ runTempl<uint32_t>();
+ break;
+ case DataType::BOOL8:
+ case DataType::QUANT8_ASYMM:
+ runTempl<uint8_t>();
+ break;
+ case DataType::QUANT8_SYMM:
+ runTempl<int8_t>();
+ break;
+ default:
+ throw std::runtime_error("NYI");
+ break;
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/PermuteLayer.h b/runtime/onert/backend/cpu/kernel/PermuteLayer.h
new file mode 100644
index 000000000..52b96caba
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/PermuteLayer.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_PERMUTE_LAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_PERMUTE_LAYER_H__
+
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+#include <ir/Coordinates.h>
+#include <ir/operation/Permute.h>
+#include <misc/feature/IndexIterator.h>
+#include <util/feature/nchw/View.h>
+#include <util/feature/nhwc/Reader.h>
+#include <util/feature/nhwc/View.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class PermuteLayer : public ::onert::exec::IFunction
+{
+public:
+ PermuteLayer() = default;
+
+public:
+ void configure(std::shared_ptr<backend::ITensor> input, std::shared_ptr<backend::ITensor> output,
+ const ir::Shape &output_shape, ir::operation::Permute::Type type,
+ ir::DataType dataType);
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ template <class T> void runTempl()
+ {
+ auto rank = _output_shape.rank();
+ auto fn = [&](ITensor &in_tensor) {
+ _output->access([&](ITensor &out_tensor) {
+ auto input_buffer = in_tensor.buffer();
+ auto input_size = in_tensor.total_size();
+ auto output_buffer = out_tensor.buffer();
+ if (_type == ir::operation::Permute::Type::COPY)
+ {
+ assert(in_tensor.layout() == out_tensor.layout());
+ if (!in_tensor.has_padding() && !out_tensor.has_padding())
+ {
+ assert(input_size == out_tensor.total_size());
+ memcpy(output_buffer, input_buffer, input_size);
+ return;
+ }
+ }
+ switch (rank)
+ {
+ case 0:
+ case 1:
+ {
+ const int32_t copy_len = _output_shape.dim(0);
+
+ memcpy(output_buffer, input_buffer, copy_len);
+ break;
+ }
+ case 2:
+ {
+ const int32_t copy_len = _output_shape.dim(1);
+
+ for (auto i = 0; i < _output_shape.dim(0); ++i)
+ {
+ ir::Coordinates coords{i, 0};
+ memcpy(output_buffer + out_tensor.calcOffset(coords),
+ input_buffer + in_tensor.calcOffset(coords), copy_len * sizeof(T));
+ }
+ break;
+ }
+ case 3:
+ {
+ const int32_t copy_len = _output_shape.dim(2);
+
+ for (auto i = 0; i < _output_shape.dim(0); ++i)
+ {
+ for (auto j = 0; j < _output_shape.dim(1); ++j)
+ {
+ ir::Coordinates coords{i, j, 0};
+ memcpy(output_buffer + out_tensor.calcOffset(coords),
+ input_buffer + in_tensor.calcOffset(coords), copy_len * sizeof(T));
+ }
+ }
+ break;
+ }
+ case 4:
+ {
+ // TODO Unify permute type and remove switch case
+ switch (_type)
+ {
+ case ir::operation::Permute::Type::NHWC_TO_NCHW:
+ {
+ for (auto n = 0; n < _output_shape.dim(0); ++n)
+ {
+ for (auto c = 0; c < _output_shape.dim(1); ++c)
+ {
+ for (auto h = 0; h < _output_shape.dim(2); ++h)
+ {
+ for (auto w = 0; w < _output_shape.dim(3); ++w)
+ {
+ const ir::Coordinates in_coords{n, h, w, c};
+ const auto out_coords =
+ convertCoordinates(in_coords, in_tensor.layout(), out_tensor.layout());
+ const auto value =
+ *reinterpret_cast<T *>(input_buffer + in_tensor.calcOffset(in_coords));
+ *reinterpret_cast<T *>(output_buffer + out_tensor.calcOffset(out_coords)) =
+ value;
+ }
+ }
+ }
+ }
+ break;
+ }
+ case ir::operation::Permute::Type::NCHW_TO_NHWC:
+ {
+ for (auto n = 0; n < _output_shape.dim(0); ++n)
+ {
+ for (auto h = 0; h < _output_shape.dim(1); ++h)
+ {
+ for (auto w = 0; w < _output_shape.dim(2); ++w)
+ {
+ for (auto c = 0; c < _output_shape.dim(3); ++c)
+ {
+ const ir::Coordinates in_coords{n, c, h, w};
+ const auto out_coords =
+ convertCoordinates(in_coords, in_tensor.layout(), out_tensor.layout());
+ const auto value =
+ *reinterpret_cast<T *>(input_buffer + in_tensor.calcOffset(in_coords));
+ *reinterpret_cast<T *>(output_buffer + out_tensor.calcOffset(out_coords)) =
+ value;
+ }
+ }
+ }
+ }
+ break;
+ }
+ case ir::operation::Permute::Type::COPY:
+ {
+ const int32_t copy_len = _output_shape.dim(3);
+
+ for (auto i = 0; i < _output_shape.dim(0); ++i)
+ {
+ for (auto j = 0; j < _output_shape.dim(1); ++j)
+ {
+ for (auto k = 0; k < _output_shape.dim(2); ++k)
+ {
+ ir::Coordinates coords{i, j, k, 0};
+ memcpy(output_buffer + out_tensor.calcOffset(coords),
+ input_buffer + in_tensor.calcOffset(coords), copy_len * sizeof(T));
+ }
+ }
+ }
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI");
+ break;
+ }
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI");
+ break;
+ }
+ });
+ };
+ _input->access(fn);
+ }
+
+private:
+ std::shared_ptr<backend::ITensor> _input{nullptr};
+ std::shared_ptr<backend::ITensor> _output{nullptr};
+ ir::Shape _output_shape{};
+ ir::operation::Permute::Type _type{ir::operation::Permute::Type::COPY};
+ ir::DataType _dataType{ir::DataType::FLOAT32};
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_PERMUTE_LAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/ReduceLayer.cc b/runtime/onert/backend/cpu/kernel/ReduceLayer.cc
new file mode 100644
index 000000000..78c82f1cc
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/ReduceLayer.cc
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReduceLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Reduce.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+namespace
+{
+
+template <typename T>
+void evalLogic(const operand::Tensor *input, operand::Tensor *output, const std::vector<int> &axes,
+ bool keep_dims, T init_value, nnfw::cker::Reduce &reduce_kernel,
+ T reducer(const T current, const T in))
+{
+ reduce_kernel.prepare(input->num_dimensions(), axes.size());
+ bool result = reduce_kernel.ReduceGeneric<T>(
+ convertTensorToCkerShape(input), reinterpret_cast<const T *>(input->buffer()),
+ convertTensorToCkerShape(output), reinterpret_cast<T *>(output->buffer()), axes, keep_dims,
+ init_value, reducer);
+
+ if (!result)
+ {
+ throw std::runtime_error{"Reduce: Fail to run"};
+ }
+}
+
+template <typename T>
+void evalType(const operand::Tensor *input, operand::Tensor *output, const std::vector<int> &axes,
+ bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
+{
+ switch (reduce_type)
+ {
+ case ReduceType::kSum:
+ return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(0), reduce_kernel,
+ [](const T current, const T in) -> T { return in + current; });
+ break;
+ case ReduceType::kProd:
+ return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(1), reduce_kernel,
+ [](const T current, const T in) -> T { return in * current; });
+ break;
+ case ReduceType::kMax:
+ return evalLogic<T>(
+ input, output, axes, keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
+ [](const T current, const T in) -> T { return (in > current) ? in : current; });
+ break;
+ case ReduceType::kMin:
+ return evalLogic<T>(
+ input, output, axes, keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
+ [](const T current, const T in) -> T { return (in < current) ? in : current; });
+ break;
+ default:
+ throw std::runtime_error{"Reduce: Unsupported reduce type"};
+ }
+}
+
+template <ReduceType reduce_type>
+void evalGeneric(const operand::Tensor *input, operand::Tensor *output,
+ const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel)
+{
+ switch (input->data_type())
+ {
+ case OperandType::FLOAT32:
+ return evalType<float>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+ case OperandType::INT32:
+ return evalType<int32_t>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+ default:
+ throw std::runtime_error{"Reduce(generic): Unsupported input type"};
+ }
+}
+} // namespace
+
+ReduceLayer::ReduceLayer()
+ : _input(nullptr), _output(nullptr), _reduceType(ReduceType::kAny), _axes(), _keep_dims(false),
+ _reduce_kernel(new nnfw::cker::Reduce())
+{
+ // DO NOTHING
+}
+
+ReduceLayer::~ReduceLayer() = default;
+
+void ReduceLayer::configure(const operand::Tensor *input, operand::Tensor *output,
+ ReduceType reduceType, const std::vector<int> &axes, bool keep_dims)
+{
+ _input = input;
+ _output = output;
+ _reduceType = reduceType;
+ _axes = axes;
+ _keep_dims = keep_dims;
+}
+
+void ReduceLayer::run()
+{
+ switch (_reduceType)
+ {
+ case ReduceType::kSum:
+ evalGeneric<ReduceType::kSum>(_input, _output, _axes, _keep_dims, *_reduce_kernel);
+ break;
+ case ReduceType::kMax:
+ evalGeneric<ReduceType::kMax>(_input, _output, _axes, _keep_dims, *_reduce_kernel);
+ break;
+ case ReduceType::kMin:
+ evalGeneric<ReduceType::kMin>(_input, _output, _axes, _keep_dims, *_reduce_kernel);
+ break;
+ default:
+ throw std::runtime_error{"ReduceSum: Unsupported reduce type"};
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/ReduceLayer.h b/runtime/onert/backend/cpu/kernel/ReduceLayer.h
new file mode 100644
index 000000000..9310882c9
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/ReduceLayer.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_REDUCESUMLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_REDUCESUMLAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+#include <memory>
+
+namespace nnfw
+{
+namespace cker
+{
+class Reduce;
+}
+} // namespace nnfw
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+enum class ReduceType
+{
+ kSum,
+ kProd,
+ kMax,
+ kMin,
+ kAny,
+};
+
+class ReduceLayer : public ::onert::exec::IFunction
+{
+public:
+ ReduceLayer();
+ ~ReduceLayer();
+
+public:
+ void configure(const operand::Tensor *input, operand::Tensor *output, ReduceType reduceType,
+ const std::vector<int> &axes, bool keep_dims);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_input;
+ operand::Tensor *_output;
+ ReduceType _reduceType;
+ std::vector<int> _axes;
+ bool _keep_dims;
+
+ std::unique_ptr<nnfw::cker::Reduce> _reduce_kernel;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_REDUCESUMLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/ReshapeLayer.cc b/runtime/onert/backend/cpu/kernel/ReshapeLayer.cc
new file mode 100644
index 000000000..b8bd5a45c
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/ReshapeLayer.cc
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReshapeLayer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+ReshapeLayer::ReshapeLayer() : _input(nullptr), _shape(nullptr), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+void ReshapeLayer::reshapeGeneric()
+{
+ // TODO use _shape to calculate shape of output when _shape is not nullptr && not constant
+
+ size_t count = _input->total_size();
+ memcpy(_output->buffer(), _input->buffer(), count);
+}
+
+void ReshapeLayer::configure(const operand::Tensor *input, const operand::Tensor *shape,
+ operand::Tensor *output)
+{
+ _input = input;
+ /* note : shape is optional. If not provided from model, _shape is nullptr. */
+ _shape = shape;
+ _output = output;
+}
+
+void ReshapeLayer::run() { reshapeGeneric(); }
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/ReshapeLayer.h b/runtime/onert/backend/cpu/kernel/ReshapeLayer.h
new file mode 100644
index 000000000..7a95b6d35
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/ReshapeLayer.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_RESHAPELAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_RESHAPELAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class ReshapeLayer : public ::onert::exec::IFunction
+{
+public:
+ ReshapeLayer();
+
+public:
+ void reshapeGeneric();
+
+ void configure(const operand::Tensor *input, const operand::Tensor *shape,
+ operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_input;
+ const operand::Tensor *_shape;
+ operand::Tensor *_output;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_RESHAPELAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/RsqrtLayer.cc b/runtime/onert/backend/cpu/kernel/RsqrtLayer.cc
new file mode 100644
index 000000000..d94ff8751
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/RsqrtLayer.cc
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "RsqrtLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Elementwise.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+RsqrtLayer::RsqrtLayer() : _input(nullptr), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+void RsqrtLayer::rsqrtFloat32()
+{
+ nnfw::cker::Rsqrt(
+ convertTensorToCkerShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ convertTensorToCkerShape(_output), reinterpret_cast<float *>(_output->buffer()));
+}
+
+void RsqrtLayer::rsqrtQuant8() { throw std::runtime_error{"NYI : QASYMM8 not supported"}; }
+
+void RsqrtLayer::configure(const operand::Tensor *input, operand::Tensor *output)
+{
+ _input = input;
+ _output = output;
+}
+
+void RsqrtLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ rsqrtFloat32();
+ }
+ else if (_input->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ rsqrtQuant8();
+ }
+ else
+ {
+ throw std::runtime_error{"Rsqrt: unsupported data type"};
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/RsqrtLayer.h b/runtime/onert/backend/cpu/kernel/RsqrtLayer.h
new file mode 100644
index 000000000..188d53160
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/RsqrtLayer.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_RSQRTLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_RSQRTLAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+class RsqrtLayer : public ::onert::exec::IFunction
+{
+public:
+ RsqrtLayer();
+
+ void configure(const operand::Tensor *input, operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this rsqrttract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ void rsqrtFloat32();
+ void rsqrtQuant8();
+ const operand::Tensor *_input;
+ operand::Tensor *_output;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_RSQRTLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/ShapeLayer.cc b/runtime/onert/backend/cpu/kernel/ShapeLayer.cc
new file mode 100644
index 000000000..1cd183331
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/ShapeLayer.cc
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ShapeLayer.h"
+
+#include "OperationUtils.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+ShapeLayer::ShapeLayer() : _input(nullptr), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+template <typename T> void GetRawShape(const operand::Tensor *input, T *output_data)
+{
+ for (uint32_t i = 0; i < input->num_dimensions(); ++i)
+ {
+ output_data[i] = static_cast<T>(input->dimension(i));
+ }
+}
+
+void ShapeLayer::shape()
+{
+ if (_output->data_type() == OperandType::UINT32)
+ {
+ GetRawShape(_input, reinterpret_cast<uint32_t *>(_output->buffer()));
+ }
+ else if (_output->data_type() == OperandType::INT32)
+ {
+ GetRawShape(_input, reinterpret_cast<int32_t *>(_output->buffer()));
+ }
+ else
+ {
+ throw std::runtime_error{"NYI : not supported output type for ShapeLayer"};
+ }
+}
+
+void ShapeLayer::configure(const operand::Tensor *input, operand::Tensor *output)
+{
+ _input = input;
+ _output = output;
+}
+
+void ShapeLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32 ||
+ _input->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ shape();
+ }
+ else
+ {
+ throw std::runtime_error{"NYI : not supported input type for ShapeLayer"};
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/ShapeLayer.h b/runtime/onert/backend/cpu/kernel/ShapeLayer.h
new file mode 100644
index 000000000..cb4acedb8
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/ShapeLayer.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_SHAPELAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_SHAPELAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class ShapeLayer : public ::onert::exec::IFunction
+{
+public:
+ ShapeLayer();
+
+public:
+ void shape();
+
+ void configure(const operand::Tensor *input, operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_input;
+ operand::Tensor *_output;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_SHAPELAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/SinLayer.cc b/runtime/onert/backend/cpu/kernel/SinLayer.cc
new file mode 100644
index 000000000..db8186ac6
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/SinLayer.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SinLayer.h"
+#include "OperationUtils.h"
+
+#include <cker/operation/Elementwise.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+SinLayer::SinLayer() : _input(nullptr), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+void SinLayer::sinFloat32()
+{
+ nnfw::cker::Sin(convertTensorToCkerShape(_input),
+ reinterpret_cast<const float *>(_input->buffer()),
+ convertTensorToCkerShape(_output), reinterpret_cast<float *>(_output->buffer()));
+}
+
+void SinLayer::sinQuant8() { throw std::runtime_error{"NYI"}; }
+
+void SinLayer::configure(const operand::Tensor *input, operand::Tensor *output)
+{
+ _input = input;
+ _output = output;
+}
+
+void SinLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ sinFloat32();
+ }
+ else if (_input->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ sinQuant8();
+ }
+ else
+ {
+ throw std::runtime_error{"Sin: unsupported data type"};
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/SinLayer.h b/runtime/onert/backend/cpu/kernel/SinLayer.h
new file mode 100644
index 000000000..7898cf050
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/SinLayer.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_SINLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_SINLAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+class SinLayer : public ::onert::exec::IFunction
+{
+public:
+ SinLayer();
+
+ void configure(const operand::Tensor *input, operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ void sinFloat32();
+ void sinQuant8();
+
+ const operand::Tensor *_input;
+ operand::Tensor *_output;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_SINLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/SliceLayer.cc b/runtime/onert/backend/cpu/kernel/SliceLayer.cc
new file mode 100644
index 000000000..1ade5fb44
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/SliceLayer.cc
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SliceLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Slice.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+SliceLayer::SliceLayer() : _input(nullptr), _begin(nullptr), _size(nullptr), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+template <typename T>
+void SliceLayer::GetBeginAndSizeVectors(int dimensions, const operand::Tensor *begin,
+ const operand::Tensor *size, std::vector<int> *begins,
+ std::vector<int> *sizes)
+{
+ for (int idx = dimensions - 1; idx >= 0; --idx)
+ {
+ begins->push_back(reinterpret_cast<T *>(begin->buffer())[idx]);
+ sizes->push_back(reinterpret_cast<T *>(size->buffer())[idx]);
+ }
+}
+
+void SliceLayer::sliceFloat32()
+{
+ const int kMaxDim = nnfw::cker::Shape::kMaxSmallSize;
+
+ std::vector<int> begins;
+ std::vector<int> sizes;
+ begins.reserve(kMaxDim);
+ sizes.reserve(kMaxDim);
+
+ GetBeginAndSizeVectors<int32_t>(_input->num_dimensions(), _begin, _size, &begins, &sizes);
+
+ // begins : 0-based, sizes : 1-based
+ for (int i = _input->num_dimensions(); i < kMaxDim; ++i)
+ {
+ begins.push_back(0);
+ sizes.push_back(1);
+ }
+
+ nnfw::cker::SliceParams op_params;
+ op_params.begin_count = 4;
+ op_params.size_count = 4;
+ for (int i = 0; i < 4; ++i)
+ {
+ op_params.begin[i] = begins[3 - i];
+ op_params.size[i] = sizes[3 - i];
+ }
+
+ nnfw::cker::Slice(op_params, convertToExtendedCkerShape(_input),
+ reinterpret_cast<const float *>(_input->buffer()),
+ reinterpret_cast<float *>(_output->buffer()));
+}
+
+void SliceLayer::sliceQuant8()
+{
+ // cker quant8 slice is not implemented yet
+ throw std::runtime_error{"NYI"};
+}
+
+void SliceLayer::configure(const operand::Tensor *input, const operand::Tensor *begin,
+ const operand::Tensor *size, operand::Tensor *output)
+{
+ _input = input;
+ _output = output;
+ _begin = begin;
+ _size = size;
+}
+
+void SliceLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ sliceFloat32();
+ }
+ else if (_input->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ sliceQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/SliceLayer.h b/runtime/onert/backend/cpu/kernel/SliceLayer.h
new file mode 100644
index 000000000..bb18c890c
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/SliceLayer.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_SLICELAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_SLICELAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class SliceLayer : public ::onert::exec::IFunction
+{
+public:
+ SliceLayer();
+
+public:
+ void configure(const operand::Tensor *input, const operand::Tensor *begin,
+ const operand::Tensor *size, operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ void sliceFloat32();
+ void sliceQuant8();
+
+ template <typename T>
+ void GetBeginAndSizeVectors(int dimensions, const operand::Tensor *begin,
+ const operand::Tensor *size, std::vector<int> *begins,
+ std::vector<int> *sizes);
+
+private:
+ const operand::Tensor *_input;
+ const operand::Tensor *_begin;
+ const operand::Tensor *_size;
+ operand::Tensor *_output;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_SLICELAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/SoftMaxLayer.cc b/runtime/onert/backend/cpu/kernel/SoftMaxLayer.cc
new file mode 100644
index 000000000..7a1ba48aa
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/SoftMaxLayer.cc
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SoftMaxLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/SoftMax.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+SoftMaxLayer::SoftMaxLayer() : _input(nullptr), _output(nullptr), _beta(0.0)
+{
+ // DO NOTHING
+}
+
+// Performs softmax along the input of size (input_size * batch_size).
+void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
+ float *out)
+{
+ assert(input_size > 0);
+
+ // For each batch
+ for (int b = 0; b < batch_size; b++)
+ {
+ // Find the max coeff.
+ float max_coeff = in[0];
+ for (int i = 1; i < input_size; i++)
+ {
+ if (in[i] > max_coeff)
+ max_coeff = in[i];
+ }
+
+ // Compute the normalized sum of exps.
+ float exp_sum = 0.0;
+ for (int i = 0; i < input_size; i++)
+ {
+ out[i] = std::exp((in[i] - max_coeff) * beta);
+ exp_sum += out[i];
+ }
+
+ // Divide by the sum of exps.
+ float reciprocal_sum_exp = 1.f / exp_sum;
+ for (int i = 0; i < input_size; i++)
+ {
+ out[i] *= reciprocal_sum_exp;
+ }
+
+ // Advance in and out pointers for the next batch.
+ in += input_size;
+ out += input_size;
+ }
+}
+
+void SoftMaxLayer::softmaxFloat32()
+{
+ if (getNumberOfDimensions(_input) == 2)
+ {
+ uint32_t batch_size = getSizeOfDimension(_input, 0);
+ if (batch_size == 0)
+ throw std::runtime_error("batch_size should not be 0");
+
+ uint32_t input_size = getNumberOfElements(_input) / batch_size;
+ Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size, _beta,
+ reinterpret_cast<float *>(_output->buffer()));
+ }
+ else if (getNumberOfDimensions(_input) == 4)
+ {
+ nnfw::cker::SoftmaxParams op_params;
+ op_params.beta = _beta;
+ nnfw::cker::Softmax(op_params, convertTensorToCkerShape(_input),
+ reinterpret_cast<const float *>(_input->buffer()),
+ convertTensorToCkerShape(_output),
+ reinterpret_cast<float *>(_output->buffer()));
+ }
+ else
+ {
+ throw std::runtime_error{"only 2D and 4D tensors supported"};
+ }
+}
+
+void SoftMaxLayer::softmaxQuant8()
+{
+ nnfw::cker::Shape descrIn4D(4);
+
+ if (getNumberOfDimensions(_input) == 2)
+ {
+ auto batch_size = getSizeOfDimension(_input, 0);
+ if (batch_size == 0)
+ throw std::runtime_error("batch_size should not be 0");
+
+ auto input_size = getNumberOfElements(_input) / batch_size;
+ descrIn4D.SetDim(0, batch_size);
+ descrIn4D.SetDim(1, 1);
+ descrIn4D.SetDim(2, 1);
+ descrIn4D.SetDim(3, input_size);
+ }
+ else if (getNumberOfDimensions(_input) == 4)
+ {
+ descrIn4D.SetDim(0, _input->dimension(0));
+ descrIn4D.SetDim(1, _input->dimension(1));
+ descrIn4D.SetDim(2, _input->dimension(2));
+ descrIn4D.SetDim(3, _input->dimension(3));
+ }
+ else
+ {
+ throw std::runtime_error{"only 2D and 4D tensors supported"};
+ }
+ if (_output->offset() != 0 || _output->scale() != 1.f / 256)
+ {
+ throw std::runtime_error{"incorrect scale / offset for output"};
+ }
+ static const int32_t kScaledDiffIntegerBits = 5;
+ const double input_beta_real_multiplier = std::min(
+ 1.0 * _beta * _input->scale() * (1 << (31 - kScaledDiffIntegerBits)), (1ll << 31) - 1.0);
+ int32_t input_multiplier = 0;
+ int32_t input_left_shift = 0;
+ QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier, &input_multiplier,
+ &input_left_shift);
+ float diff_min = -1.0f * CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift);
+
+ nnfw::cker::SoftmaxParams op_params;
+ op_params.input_multiplier = input_multiplier;
+ op_params.input_left_shift = input_left_shift;
+ op_params.diff_min = diff_min;
+ nnfw::cker::Softmax(op_params, descrIn4D, reinterpret_cast<const uint8_t *>(_input->buffer()),
+ descrIn4D, reinterpret_cast<uint8_t *>(_output->buffer()));
+}
+
+void SoftMaxLayer::configure(const operand::Tensor *input, const float beta,
+ operand::Tensor *output)
+{
+ _input = input;
+ _output = output;
+ _beta = beta;
+}
+
+void SoftMaxLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ softmaxFloat32();
+ }
+ else if (_input->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ softmaxQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/SoftMaxLayer.h b/runtime/onert/backend/cpu/kernel/SoftMaxLayer.h
new file mode 100644
index 000000000..bb29b4fda
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/SoftMaxLayer.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_SOFTMAXLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_SOFTMAXLAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class SoftMaxLayer : public ::onert::exec::IFunction
+{
+public:
+ SoftMaxLayer();
+
+public:
+ void softmaxFloat32();
+
+ void softmaxQuant8();
+
+ void configure(const operand::Tensor *input, const float beta, operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_input;
+ operand::Tensor *_output;
+
+ float _beta;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_SOFTMAXLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/SplitLayer.cc b/runtime/onert/backend/cpu/kernel/SplitLayer.cc
new file mode 100644
index 000000000..e4c5ef3a0
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/SplitLayer.cc
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SplitLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Split.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+SplitLayer::SplitLayer() : _input(nullptr), _num_splits(0), _axis(0), _outputs()
+{
+ // DO NOTHING
+}
+
+void SplitLayer::splitFloat32()
+{
+ nnfw::cker::SplitParams op_params;
+ op_params.axis = _axis;
+ op_params.num_split = _num_splits;
+
+ std::vector<nnfw::cker::Shape *> outputDimsPtr;
+ std::vector<nnfw::cker::Shape> outputDims;
+ outputDimsPtr.reserve(_num_splits);
+ outputDims.reserve(_num_splits);
+
+ for (uint32_t i = 0; i < _num_splits; i++)
+ {
+ outputDims.push_back(convertTensorToCkerShape(_outputs[i]));
+ outputDimsPtr.push_back(&outputDims[i]);
+ }
+
+ std::vector<float *> outputFloatPtrs;
+
+ for (const auto output : _outputs)
+ {
+ outputFloatPtrs.emplace_back(reinterpret_cast<float *>(output->buffer()));
+ }
+
+ nnfw::cker::Split<float>(op_params, convertTensorToCkerShape(_input),
+ reinterpret_cast<float *>(_input->buffer()),
+ convertTensorToCkerShape(_outputs[0]), outputFloatPtrs.data());
+}
+
+void SplitLayer::splitQuant8() { throw std::runtime_error{"Split: NYI quant8 type"}; }
+
+void SplitLayer::configure(const operand::Tensor *input, uint16_t num_splits, int16_t axis,
+ std::vector<operand::Tensor *> &outputs)
+{
+ assert(input != nullptr);
+
+ _num_splits = num_splits;
+ _input = input;
+ _axis = axis;
+ _outputs = outputs;
+}
+
+void SplitLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ splitFloat32();
+ }
+ else if (_input->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ splitQuant8();
+ }
+ else
+ {
+ throw std::runtime_error{"Split: Unsupported input type"};
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/SplitLayer.h b/runtime/onert/backend/cpu/kernel/SplitLayer.h
new file mode 100644
index 000000000..9f236f0af
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/SplitLayer.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_SPLITLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_SPLITLAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class SplitLayer : public ::onert::exec::IFunction
+{
+public:
+ SplitLayer();
+
+public:
+ void splitFloat32();
+
+ void splitQuant8();
+
+ void configure(const operand::Tensor *input, uint16_t num_splits, int16_t axis,
+ std::vector<operand::Tensor *> &outputs);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_input;
+ uint16_t _num_splits;
+ int16_t _axis;
+ std::vector<operand::Tensor *> _outputs;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_SPLITLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/StridedSliceLayer.cc b/runtime/onert/backend/cpu/kernel/StridedSliceLayer.cc
new file mode 100644
index 000000000..22315c7c6
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/StridedSliceLayer.cc
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "StridedSliceLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/StridedSlice.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+StridedSliceLayer::StridedSliceLayer()
+ : _input(nullptr), _begin(nullptr), _end(nullptr), _strides(nullptr), _output(nullptr),
+ _begin_mask(0), _ellipsis_mask(0), _end_mask(0), _new_axis_mask(0), _shrink_axis_mask(0),
+ _rank(0)
+{
+}
+
+void StridedSliceLayer::stridedSliceFloat32()
+{
+ auto op_params = nnfw::cker::buildStridedSliceParams(
+ reinterpret_cast<uint32_t *>(_begin->buffer()), reinterpret_cast<uint32_t *>(_end->buffer()),
+ reinterpret_cast<uint32_t *>(_strides->buffer()), _begin_mask, _end_mask, _shrink_axis_mask,
+ _rank);
+
+ nnfw::cker::checkOutputSize(op_params, convertTensorToCkerShape(_input),
+ convertTensorToCkerShape(_output), _rank);
+
+ nnfw::cker::StridedSlice(op_params, convertTensorToCkerShape(_input),
+ reinterpret_cast<const float *>(_input->buffer()),
+ convertTensorToCkerShape(_output),
+ reinterpret_cast<float *>(_output->buffer()));
+}
+
+void StridedSliceLayer::stridedSliceQuant8()
+{
+ // cker quant8 stridedSlice is not implemented yet
+ throw std::runtime_error{"NYI"};
+}
+
+void StridedSliceLayer::configure(const operand::Tensor *input, const operand::Tensor *begin,
+ const operand::Tensor *end, const operand::Tensor *strides,
+ operand::Tensor *output, const int32_t begin_mask,
+ const int32_t end_mask, const int32_t shrink_axis_mask,
+ const int32_t rank)
+{
+ _input = input;
+ _begin = begin;
+ _end = end;
+ _strides = strides;
+ _output = output;
+
+ _rank = rank;
+ _begin_mask = begin_mask;
+ _ellipsis_mask = 0;
+ _end_mask = end_mask;
+ _new_axis_mask = 0;
+ _shrink_axis_mask = shrink_axis_mask;
+}
+
+void StridedSliceLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ stridedSliceFloat32();
+ }
+ else if (_input->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ stridedSliceQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/StridedSliceLayer.h b/runtime/onert/backend/cpu/kernel/StridedSliceLayer.h
new file mode 100644
index 000000000..7888eff3c
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/StridedSliceLayer.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_STRIDEDSLICELAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_STRIDEDSLICELAYER_H__
+
+#include "../operand/Tensor.h"
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class StridedSliceLayer : public ::onert::exec::IFunction
+{
+public:
+ StridedSliceLayer();
+
+public:
+ void configure(const operand::Tensor *input, const operand::Tensor *begin,
+ const operand::Tensor *end, const operand::Tensor *strides,
+ operand::Tensor *output, const int32_t begin_mask, const int32_t end_mask,
+ const int32_t shrink_axis_mask, const int32_t rank);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ void stridedSliceFloat32();
+ void stridedSliceQuant8();
+
+private:
+ const operand::Tensor *_input;
+ const operand::Tensor *_begin;
+ const operand::Tensor *_end;
+ const operand::Tensor *_strides;
+ operand::Tensor *_output;
+
+ int32_t _begin_mask;
+ int32_t _ellipsis_mask;
+ int32_t _end_mask;
+ int32_t _new_axis_mask;
+ int32_t _shrink_axis_mask;
+
+ int32_t _rank;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_STRIDEDSLICELAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/SubLayer.cc b/runtime/onert/backend/cpu/kernel/SubLayer.cc
new file mode 100644
index 000000000..15aa43835
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/SubLayer.cc
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SubLayer.h"
+
+#include <cker/operation/BinaryArithmeticOps.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+void SubLayer::subFloat32()
+{
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ nnfw::cker::BinaryArithmeticOpParam op_params;
+ op_params.type = nnfw::cker::BinaryArithmeticOpType::SUB;
+ op_params.float_activation_max = output_activation_max;
+ op_params.float_activation_min = output_activation_min;
+
+ if (!HaveSameShapes(_lhs, _rhs))
+ {
+ nnfw::cker::BroadcastBinaryArithmeticOpSlow(
+ op_params, convertToExtendedCkerShape(_lhs),
+ reinterpret_cast<const float *>(_lhs->buffer()), convertToExtendedCkerShape(_rhs),
+ reinterpret_cast<const float *>(_rhs->buffer()), convertToExtendedCkerShape(_output),
+ reinterpret_cast<float *>(_output->buffer()));
+ return;
+ }
+
+ nnfw::cker::BinaryArithmeticOp(
+ op_params, convertTensorToCkerShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
+ convertTensorToCkerShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
+ convertTensorToCkerShape(_output), reinterpret_cast<float *>(_output->buffer()));
+}
+
+void SubLayer::subQuant8()
+{
+ int32_t output_activation_min, output_activation_max;
+ CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
+ &output_activation_max);
+ // nnfw::cker::SubParam op_params;
+ // op_params.quantized_activation_max = output_activation_max;
+ // op_params.quantized_activation_min = output_activation_min;
+
+ // cker quant8 sub is not implemented yet
+ throw std::runtime_error{"NYI"};
+}
+
+void SubLayer::configure(const operand::Tensor *lhs, const operand::Tensor *rhs,
+ const ir::Activation activation, operand::Tensor *output)
+{
+ _lhs = lhs;
+ _rhs = rhs;
+ _activation = activation;
+ _output = output;
+}
+
+void SubLayer::run()
+{
+ if (_output->data_type() == OperandType::FLOAT32)
+ {
+ subFloat32();
+ }
+ else if (_output->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ subQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/SubLayer.h b/runtime/onert/backend/cpu/kernel/SubLayer.h
new file mode 100644
index 000000000..48fa97669
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/SubLayer.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_SUBLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_SUBLAYER_H__
+
+#include "../operand/Tensor.h"
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class SubLayer : public ::onert::exec::IFunction
+{
+public:
+ SubLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+ {
+ // DO NOTHING
+ }
+
+public:
+ void subFloat32();
+
+ void subQuant8();
+
+ void configure(const operand::Tensor *lhs, const operand::Tensor *rhs,
+ const ir::Activation activation, operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_lhs;
+ const operand::Tensor *_rhs;
+ operand::Tensor *_output;
+
+ ir::Activation _activation{ir::Activation::NONE};
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_SUBLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/TanhLayer.cc b/runtime/onert/backend/cpu/kernel/TanhLayer.cc
new file mode 100644
index 000000000..6a9274f7a
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/TanhLayer.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TanhLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Tanh.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+TanhLayer::TanhLayer() : _input(nullptr), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+void TanhLayer::tanhFloat32()
+{
+ nnfw::cker::Tanh(convertTensorToCkerShape(_input),
+ reinterpret_cast<const float *>(_input->buffer()),
+ convertTensorToCkerShape(_output), reinterpret_cast<float *>(_output->buffer()));
+}
+
+void TanhLayer::tanhQuant8()
+{
+ // cker quant8 tanh is not implemented yet
+ throw std::runtime_error{"NYI"};
+}
+
+void TanhLayer::configure(const operand::Tensor *input, operand::Tensor *output)
+{
+ _input = input;
+ _output = output;
+}
+
+void TanhLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ tanhFloat32();
+ }
+ else if (_input->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ tanhQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/TanhLayer.h b/runtime/onert/backend/cpu/kernel/TanhLayer.h
new file mode 100644
index 000000000..8fb621cdb
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/TanhLayer.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_TANHLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_TANHLAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class TanhLayer : public ::onert::exec::IFunction
+{
+public:
+ TanhLayer();
+
+public:
+ void tanhFloat32();
+
+ void tanhQuant8();
+
+ void configure(const operand::Tensor *input, operand::Tensor *output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_input;
+ operand::Tensor *_output;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_TANHLAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/TransposeLayer.cc b/runtime/onert/backend/cpu/kernel/TransposeLayer.cc
new file mode 100644
index 000000000..31b1dd12d
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/TransposeLayer.cc
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TransposeLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Transpose.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+TransposeLayer::TransposeLayer() : _input(nullptr), _output(nullptr), _perm(), _rank(0)
+{
+ // DO NOTHING
+}
+
+void TransposeLayer::transposeFloat32()
+{
+ nnfw::cker::TransposeParams param;
+ param.perm_count = _rank;
+ for (int32_t i = 0; i < _rank; i++)
+ {
+ param.perm[i] = _perm[i];
+ }
+
+ nnfw::cker::Transpose(
+ param, convertTensorToCkerShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ convertTensorToCkerShape(_output), reinterpret_cast<float *>(_output->buffer()));
+}
+
+void TransposeLayer::transposeQuant8()
+{
+ // cker quant8 tanh is not implemented yet
+ throw std::runtime_error{"NYI"};
+}
+
+void TransposeLayer::configure(const operand::Tensor *input, operand::Tensor *output,
+ const std::vector<int> &perm, int32_t rank)
+{
+ _input = input;
+ _rank = rank;
+ _perm = perm;
+ _output = output;
+}
+
+void TransposeLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ transposeFloat32();
+ }
+ else if (_input->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ transposeQuant8();
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/TransposeLayer.h b/runtime/onert/backend/cpu/kernel/TransposeLayer.h
new file mode 100644
index 000000000..ffe7c2ae5
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/TransposeLayer.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_TRANSPOSELAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_TRANSPOSELAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class TransposeLayer : public ::onert::exec::IFunction
+{
+public:
+ TransposeLayer();
+
+public:
+ void transposeFloat32();
+
+ void transposeQuant8();
+
+ void configure(const operand::Tensor *input, operand::Tensor *output,
+ const std::vector<int> &perm, int32_t rank);
+
+ void run();
+ void runSync() { run(); }
+
+private:
+ const operand::Tensor *_input;
+ operand::Tensor *_output;
+ std::vector<int> _perm;
+ int32_t _rank;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_TRANSPOSELAYER_H__
diff --git a/runtime/onert/backend/cpu/kernel/UnpackLayer.cc b/runtime/onert/backend/cpu/kernel/UnpackLayer.cc
new file mode 100644
index 000000000..fe07e3e19
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/UnpackLayer.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "UnpackLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Unpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+UnpackLayer::UnpackLayer() : _input(nullptr), _outputs(), _axis(0), _num_output(0)
+{
+ // DO NOTHING
+}
+
+void UnpackLayer::unpackFloat32()
+{
+ nnfw::cker::UnpackParams op_params;
+ op_params.axis = _axis;
+ op_params.num_split = _num_output;
+
+ std::vector<nnfw::cker::Shape *> outputDimsPtr;
+ std::vector<nnfw::cker::Shape> outputDims;
+ outputDimsPtr.reserve(_num_output);
+ outputDims.reserve(_num_output);
+
+ for (int32_t i = 0; i < _num_output; i++)
+ {
+ outputDims.push_back(convertTensorToCkerShape(_outputs[i]));
+ outputDimsPtr.push_back(&outputDims[i]);
+ }
+
+ std::vector<float *> outputFloatPtrs;
+
+ for (const auto output : _outputs)
+ {
+ outputFloatPtrs.emplace_back(reinterpret_cast<float *>(output->buffer()));
+ }
+
+ nnfw::cker::Unpack<float>(op_params, convertTensorToCkerShape(_input),
+ reinterpret_cast<float *>(_input->buffer()),
+ convertTensorToCkerShape(_outputs[0]), outputFloatPtrs.data());
+}
+
+void UnpackLayer::unpackQuant8()
+{
+ // cker quant8 pack is not implemented yet
+ throw std::runtime_error{"Unpack: NYI quant8 type"};
+}
+
+void UnpackLayer::configure(const operand::Tensor *input, uint32_t axis, int32_t num,
+ std::vector<operand::Tensor *> &outputs)
+{
+ assert(input != nullptr);
+ assert(outputs.size() > 0);
+ assert(outputs.size() == (size_t)num);
+
+ _input = input;
+ _axis = axis;
+ _num_output = num;
+ _outputs = outputs;
+}
+
+void UnpackLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ unpackFloat32();
+ }
+ else if (_input->data_type() == OperandType::QUANT8_ASYMM)
+ {
+ unpackQuant8();
+ }
+ else
+ {
+ throw std::runtime_error{"Unpack: Unsupported input type"};
+ }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/kernel/UnpackLayer.h b/runtime/onert/backend/cpu/kernel/UnpackLayer.h
new file mode 100644
index 000000000..23d064db2
--- /dev/null
+++ b/runtime/onert/backend/cpu/kernel/UnpackLayer.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_KERNEL_UNPACKLAYER_H__
+#define __ONERT_BACKEND_CPU_KERNEL_UNPACKLAYER_H__
+
+#include "../operand/Tensor.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class UnpackLayer : public ::onert::exec::IFunction
+{
+public:
+ UnpackLayer();
+
+public:
+ void unpackFloat32();
+
+ void unpackQuant8();
+
+ void configure(const operand::Tensor *input, uint32_t axis, int32_t num_output,
+ std::vector<operand::Tensor *> &output);
+
+ void run();
+ void runSync()
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+
+private:
+ const operand::Tensor *_input;
+ std::vector<operand::Tensor *> _outputs;
+ uint32_t _axis;
+ int32_t _num_output;
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_KERNEL_UNPACKLAYER_H__
diff --git a/runtime/onert/backend/cpu/operand/Tensor.cc b/runtime/onert/backend/cpu/operand/Tensor.cc
new file mode 100644
index 000000000..96ee93023
--- /dev/null
+++ b/runtime/onert/backend/cpu/operand/Tensor.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Tensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace operand
+{
+
+size_t Tensor::calcOffset(const ir::Coordinates &coords) const
+{
+ size_t rank = num_dimensions();
+ size_t offset = 0;
+ for (size_t i = 0; i < rank; ++i)
+ {
+ offset = offset * dimension(i) + coords[i];
+ }
+ offset *= sizeOfDataType(data_type());
+ return offset;
+}
+
+void Tensor::access(const std::function<void(ITensor &)> &fn) { fn(*this); }
+
+} // namespace operand
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/operand/Tensor.h b/runtime/onert/backend/cpu/operand/Tensor.h
new file mode 100644
index 000000000..f9e7b34d1
--- /dev/null
+++ b/runtime/onert/backend/cpu/operand/Tensor.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPERAND_TENSOR_H__
+#define __ONERT_BACKEND_CPU_OPERAND_TENSOR_H__
+
+#include "Allocator.h"
+
+#include <backend/ITensor.h>
+#include <ir/OperandInfo.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace operand
+{
+
+class Tensor : public ITensor
+{
+public:
+ Tensor() = delete;
+
+public:
+ Tensor(const ir::OperandInfo &info)
+ : _info(info), _buffer(nullptr), _num_references(0), _allocator(nullptr)
+ {
+ // DO NOTHING
+ }
+
+public:
+ // Only one of two method 'setBuffer' must be called once
+ void setBuffer(uint8_t *buffer)
+ {
+ assert(_buffer == nullptr && _allocator == nullptr);
+ _buffer = buffer;
+ }
+ void setBuffer(const std::shared_ptr<cpu_common::Allocator> &alloc)
+ {
+ assert(_buffer == nullptr && _allocator == nullptr);
+ _allocator = alloc;
+ }
+ ir::DataType data_type() const { return _info.typeInfo().type(); }
+ float scale() const { return _info.typeInfo().scale(); }
+ int32_t offset() const { return _info.typeInfo().offset(); }
+
+public:
+ uint8_t *buffer() const override
+ {
+ if (_allocator != nullptr)
+ return _allocator->base();
+ else
+ return _buffer;
+ }
+ /**
+ * @brief Get dimension by index
+ *
+ * @param index Index to get diemension
+ * @return size_t Dimension at index
+ * @note N : dimension(0)
+ * H : dimension(1)
+ * W : dimension(2)
+ * C : dimension(3)
+ */
+ size_t dimension(size_t index) const override { return _info.shape().dim(index); }
+ size_t num_dimensions() const override { return _info.shape().rank(); }
+ size_t total_size() const override { return _info.total_size(); }
+ size_t calcOffset(const ir::Coordinates &coords) const override;
+ ir::Layout layout() const override { return ir::Layout::NHWC; }
+ bool has_padding() const override { return false; }
+ void access(const std::function<void(ITensor &tensor)> &fn) final;
+ bool is_dynamic() const override { return _info.memAllocType() == ir::MemAllocType::DYNAMIC; }
+
+ void increase_ref()
+ {
+ assert(_buffer != nullptr || _allocator != nullptr);
+ ++_num_references;
+ }
+ void decrease_ref()
+ {
+ assert(_buffer != nullptr || _allocator != nullptr);
+ assert(_num_references > 0);
+ --_num_references;
+ // Only constant tensor has allocator pointer
+ if (_num_references == 0)
+ {
+ if (_buffer != nullptr)
+ _buffer = nullptr;
+ else
+ {
+ _allocator->release();
+ _allocator = nullptr;
+ }
+ }
+ }
+
+private:
+ ir::OperandInfo _info;
+ uint8_t *_buffer;
+ int32_t _num_references;
+ std::shared_ptr<cpu_common::Allocator> _allocator;
+};
+
+} // namespace operand
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPERAND_TENSOR_H__
diff --git a/runtime/onert/backend/cpu_common/Allocator.cc b/runtime/onert/backend/cpu_common/Allocator.cc
new file mode 100644
index 000000000..253fc4753
--- /dev/null
+++ b/runtime/onert/backend/cpu_common/Allocator.cc
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Allocator.h"
+
+#include "util/logging.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu_common
+{
+
+Allocator::Allocator(uint32_t capacity)
+{
+ _base = std::make_unique<uint8_t[]>(capacity);
+
+ VERBOSE(ALLOC) << "allocation capacity: " << capacity << std::endl;
+ VERBOSE(ALLOC) << "base pointer: " << static_cast<void *>(_base.get()) << std::endl;
+}
+
+} // namespace cpu_common
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu_common/Allocator.h b/runtime/onert/backend/cpu_common/Allocator.h
new file mode 100644
index 000000000..fa67fc7c4
--- /dev/null
+++ b/runtime/onert/backend/cpu_common/Allocator.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file        Allocator.h
+ * @brief       This file contains Allocator related classes
+ */
+
+#ifndef __ONERT_BACKEND_CPU_COMMON_ALLOCATOR_H__
+#define __ONERT_BACKEND_CPU_COMMON_ALLOCATOR_H__
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu_common
+{
+
+/**
+ * @brief Class to allocate memory
+ */
+class Allocator
+{
+public:
+ Allocator(uint32_t capacity);
+ /**
+ * @brief Get memory base pointer
+ * @return base pointer
+ */
+ uint8_t *base() const { return _base.get(); }
+ void release() { _base.reset(); }
+
+private:
+ std::unique_ptr<uint8_t[]> _base;
+};
+
+} // namespace cpu_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_COMMON_ALLOCATOR_H__
diff --git a/runtime/onert/backend/cpu_common/CMakeLists.txt b/runtime/onert/backend/cpu_common/CMakeLists.txt
new file mode 100644
index 000000000..bc9e014e7
--- /dev/null
+++ b/runtime/onert/backend/cpu_common/CMakeLists.txt
@@ -0,0 +1,35 @@
+file(GLOB SOURCES "*.cc")
+file(GLOB_RECURSE TESTS "*.test.cc")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(${LIB_ONERT_BACKEND_CPU_COMMON} STATIC ${SOURCES})
+
+target_include_directories(${LIB_ONERT_BACKEND_CPU_COMMON} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+target_link_libraries(${LIB_ONERT_BACKEND_CPU_COMMON} PUBLIC onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_CPU_COMMON} PRIVATE nnfw_lib_misc)
+target_link_libraries(${LIB_ONERT_BACKEND_CPU_COMMON} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_CPU_COMMON} PRIVATE nnfw_coverage)
+
+set_target_properties(${LIB_ONERT_BACKEND_CPU_COMMON} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+set_target_properties(${LIB_ONERT_BACKEND_CPU_COMMON} PROPERTIES OUTPUT_NAME backend_cpu_common)
+
+install(TARGETS ${LIB_ONERT_BACKEND_CPU_COMMON} ARCHIVE DESTINATION lib)
+install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
+ DESTINATION "include/onert"
+ FILES_MATCHING PATTERN "*.h"
+ )
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Unit Tests
+set(TEST_ONERT_BACKEND_CPU_COMMON test_onert_backend_cpu_common)
+
+add_executable(${TEST_ONERT_BACKEND_CPU_COMMON} ${TESTS})
+
+target_link_libraries(${TEST_ONERT_BACKEND_CPU_COMMON} ${LIB_ONERT_BACKEND_CPU_COMMON})
+target_link_libraries(${TEST_ONERT_BACKEND_CPU_COMMON} gtest gtest_main dl ${LIB_PTHREAD})
+
+add_test(${TEST_ONERT_BACKEND_CPU_COMMON} ${TEST_ONERT_BACKEND_CPU_COMMON})
+install(TARGETS ${TEST_ONERT_BACKEND_CPU_COMMON} DESTINATION unittest)
diff --git a/runtime/onert/backend/cpu_common/MemoryManager.cc b/runtime/onert/backend/cpu_common/MemoryManager.cc
new file mode 100644
index 000000000..df6aa12f8
--- /dev/null
+++ b/runtime/onert/backend/cpu_common/MemoryManager.cc
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MemoryManager.h"
+
+#include <cassert>
+
+#include <MemoryPlannerFactory.h>
+#include "util/ConfigSource.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu_common
+{
+
+MemoryManager::MemoryManager() : _mem_planner{createMemoryPlanner()}
+{
+ // DO NOTHING
+}
+
+MemoryManager::MemoryManager(const std::string planner_id)
+ : _mem_planner{createMemoryPlanner(planner_id)}
+{
+ // DO NOTHING
+}
+
+cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner()
+{
+ auto planner_id = util::getConfigString(util::config::CPU_MEMORY_PLANNER);
+ return cpu_common::MemoryPlannerFactory::get().create(planner_id);
+}
+
+cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner(const std::string planner_id)
+{
+ return cpu_common::MemoryPlannerFactory::get().create(planner_id);
+}
+
+void MemoryManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
+{
+ _mem_planner->claim(ind, size);
+}
+
+void MemoryManager::releasePlan(const ir::OperandIndex &ind) { _mem_planner->release(ind); }
+
+void MemoryManager::allocate(void)
+{
+ _mem_alloc = std::make_shared<cpu_common::Allocator>(_mem_planner->capacity());
+ assert(_mem_alloc->base());
+}
+
+uint8_t *MemoryManager::getBuffer(const ir::OperandIndex &ind) const
+{
+ assert(_mem_planner->memory_plans().find(ind) != _mem_planner->memory_plans().end());
+ const auto &mem_blk = _mem_planner->memory_plans().at(ind);
+ return _mem_alloc->base() + mem_blk.offset;
+}
+
+std::shared_ptr<cpu_common::Allocator> DynamicMemoryManager::allocate(const ir::OperandIndex &ind,
+ uint32_t capacity)
+{
+ auto mem_alloc = std::make_shared<cpu_common::Allocator>(capacity);
+ _mem_alloc_map[ind] = mem_alloc;
+ return mem_alloc;
+}
+
+void DynamicMemoryManager::deallocate(void)
+{
+ for (auto &mem_alloc : _mem_alloc_map)
+ {
+ mem_alloc.second->release();
+ }
+}
+
+} // namespace cpu_common
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu_common/MemoryManager.h b/runtime/onert/backend/cpu_common/MemoryManager.h
new file mode 100644
index 000000000..b6d2d5192
--- /dev/null
+++ b/runtime/onert/backend/cpu_common/MemoryManager.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
+#define __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
+
+#include "backend/IMemoryManager.h"
+#include "MemoryPlanner.h"
+#include "ir/OperandIndexMap.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu_common
+{
+
+class MemoryManager : public backend::IMemoryManager
+{
+public:
+ MemoryManager();
+ MemoryManager(const std::string);
+ virtual ~MemoryManager() = default;
+
+ void allocate(void) override;
+ uint8_t *getBuffer(const ir::OperandIndex &ind) const;
+ void deallocate(void) override { _mem_alloc->release(); }
+
+ void claimPlan(const ir::OperandIndex &ind, uint32_t size);
+ void releasePlan(const ir::OperandIndex &ind);
+
+private:
+ cpu_common::IMemoryPlanner *createMemoryPlanner();
+ cpu_common::IMemoryPlanner *createMemoryPlanner(const std::string);
+
+private:
+ ir::OperandIndexMap<cpu_common::Block> _tensor_mem_map;
+ std::shared_ptr<cpu_common::IMemoryPlanner> _mem_planner;
+ std::shared_ptr<cpu_common::Allocator> _mem_alloc;
+};
+
+class DynamicMemoryManager
+{
+public:
+ DynamicMemoryManager() = default;
+ virtual ~DynamicMemoryManager() = default;
+
+ std::shared_ptr<cpu_common::Allocator> allocate(const ir::OperandIndex &ind, uint32_t capacity);
+ void deallocate(void);
+
+private:
+ ir::OperandIndexMap<std::shared_ptr<cpu_common::Allocator>> _mem_alloc_map;
+};
+
+} // namespace cpu_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
diff --git a/runtime/onert/backend/cpu_common/MemoryPlanner.cc b/runtime/onert/backend/cpu_common/MemoryPlanner.cc
new file mode 100644
index 000000000..4b7f12cfd
--- /dev/null
+++ b/runtime/onert/backend/cpu_common/MemoryPlanner.cc
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MemoryPlanner.h"
+#include "util/logging.h"
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu_common
+{
+
+void BumpPlanner::claim(const ir::OperandIndex &ind, size_t size)
+{
+ assert(size != 0);
+
+ Block blk{_capacity, size};
+ _mem_plans[ind] = blk;
+ _capacity += size;
+
+ VERBOSE(BP_PLANNER) << "CLAIM(#" << ind.value() << "): " << blk.offset << ", " << blk.size
+ << std::endl;
+}
+
+void BumpPlanner::release(const ir::OperandIndex &ind)
+{
+ VERBOSE(BP_PLANNER) << "RELEASE(#" << ind.value() << "): "
+ << "NOTHING does" << std::endl;
+}
+
+// There are some assumptions for claiming memory(== making a reservation for memory).
+// 1. About _claim_table(std::map).
+// - The table's data structure is std::map so that it always sorts
+// value(OperandIndex) by key(base_offset).
+// - This claim() inserts key/value into _claim_table and the release() removes the key/value from
+// _claim_table.
+// - _claim_table shows the memory status at a certain point in time. Therefore,
+// - If _claim_table has an offset and a certain size at a certain point in time,
+// it means the place at the offset has been already claimed(== can't claim now. need to find
+// someplace new).
+// - If _claim_table doesn't have any element for an offset and a certain size at a certain
+// point in time, it means the place at the offset can be claimed.
+// 2. In the loop for _claim_table, we can assume the current claim_base_offset value is bigger than
+// the previous claim_base_offset.
+void FirstFitPlanner::claim(const ir::OperandIndex &ind, size_t size)
+{
+ assert(size != 0);
+
+ // Find the right position for claiming
+ uint32_t next_offset = 0;
+ for (auto &mem_claim : _claim_table)
+ {
+ auto claimed_base_offset = mem_claim.first;
+ auto claimed_size = _mem_plans[mem_claim.second].size;
+ if (next_offset + size <= claimed_base_offset)
+ {
+ break;
+ }
+ else
+ {
+ next_offset = claimed_base_offset + claimed_size;
+ }
+ }
+
+ // Now next_offset is set to the proper offset
+ _claim_table[next_offset] = ind;
+ _mem_plans[ind] = {next_offset, size};
+
+ VERBOSE(FF_PLANNER) << "claim(#" << ind.value() << "): [+" << next_offset << ", " << size << "sz]"
+ << std::endl;
+
+ if (_capacity < next_offset + size)
+ {
+ _capacity = next_offset + size;
+ }
+}
+
+void FirstFitPlanner::release(const ir::OperandIndex &ind)
+{
+ for (auto it = _claim_table.cbegin(); it != _claim_table.cend(); ++it)
+ {
+ if (it->second == ind)
+ {
+ uint32_t offset = it->first;
+ uint32_t index = ind.value();
+ uint32_t size = _mem_plans[ind].size;
+
+ _claim_table.erase(it);
+
+ VERBOSE(FF_PLANNER) << "release(#" << index << "): [+" << offset << ", " << size << "sz]"
+ << std::endl;
+ return;
+ }
+ }
+ assert(!"Cannot release for given index. It has been not claimed or released already.");
+}
+
+WICPlanner::WICPlanner()
+ : _initialized(false), _capacity(0), _mem_plans(), _live_operands(), _interference_graph(),
+ _map_size_to_operands(), _claim_table()
+{
+ // DO NOTHING
+}
+
+void WICPlanner::claim(const ir::OperandIndex &ind, size_t size)
+{
+ assert(size != 0);
+
+ _map_size_to_operands.insert({size, ind});
+ for (auto &live_operand : _live_operands)
+ {
+ _interference_graph[live_operand].insert(ind);
+ _interference_graph[ind].insert(live_operand);
+ }
+ _live_operands.insert(ind);
+
+ VERBOSE(WIC_PLANNER) << "claim(#" << ind.value() << "): [" << size << "sz]" << std::endl;
+}
+
+void WICPlanner::release(const ir::OperandIndex &ind)
+{
+ _live_operands.erase(ind);
+ VERBOSE(WIC_PLANNER) << "release(#" << ind.value() << ")" << std::endl;
+}
+
+/*
+ * Build memory plans using liveness and size of operands
+ * 1. Build inference graph at claim
+ * - Two operands interfere if they have overlapped live range
+ * 2. Sort operands descending order of size
+ * - Use std::multimap to sort operands
+ * 3. Allocate memory block for sorted operands
+ * - Find free memory block which does not overlap with interfered operands
+ */
+void WICPlanner::buildMemoryPlans()
+{
+ for (auto &size_to_operand : _map_size_to_operands)
+ {
+ uint32_t size = size_to_operand.first;
+ ir::OperandIndex ind = size_to_operand.second;
+ VERBOSE(WIC_PLANNER) << "build_plan(#" << ind.value() << "): [" << size << "sz]" << std::endl;
+
+ // Find firstfit which does not interfere with live operands
+ uint32_t next_offset = 0;
+ if (_interference_graph.find(ind) != _interference_graph.end())
+ {
+ std::unordered_set<ir::OperandIndex> &interferences = _interference_graph.find(ind)->second;
+ for (auto &mem_claim : _claim_table)
+ {
+ if (interferences.find(mem_claim.second) != interferences.end())
+ {
+ auto claimed_base_offset = mem_claim.first;
+ auto claimed_size = _mem_plans[mem_claim.second].size;
+ VERBOSE(WIC_PLANNER) << "interfere (#" << mem_claim.second.value() << "): [+"
+ << claimed_base_offset << ", " << claimed_size << "sz]" << std::endl;
+ if (next_offset + size <= claimed_base_offset)
+ {
+ break;
+ }
+ else if (next_offset < claimed_base_offset + claimed_size)
+ {
+ next_offset = claimed_base_offset + claimed_size;
+ }
+ }
+ }
+ }
+ else
+ {
+ VERBOSE(WIC_PLANNER) << "No interference" << std::endl;
+ }
+
+ _claim_table.insert({next_offset, ind});
+ _mem_plans[ind] = {next_offset, size};
+ VERBOSE(WIC_PLANNER) << "alloc(#" << ind.value() << "): [+" << next_offset << ", " << size
+ << "sz]" << std::endl;
+
+ if (_capacity < next_offset + size)
+ {
+ _capacity = next_offset + size;
+ }
+ }
+ _initialized = true;
+ _interference_graph.clear();
+ _map_size_to_operands.clear();
+ _claim_table.clear();
+}
+
+WICPlanner::MemoryPlans &WICPlanner::memory_plans()
+{
+ if (!_initialized)
+ buildMemoryPlans();
+ return _mem_plans;
+}
+
+} // namespace cpu_common
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu_common/MemoryPlanner.h b/runtime/onert/backend/cpu_common/MemoryPlanner.h
new file mode 100644
index 000000000..4f9724328
--- /dev/null
+++ b/runtime/onert/backend/cpu_common/MemoryPlanner.h
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file        MemoryPlanner.h
+ * @brief       This file contains Memory Planning related classes
+ */
+
+#ifndef __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__
+#define __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__
+
+#include <map>
+#include <unordered_set>
+#include <memory>
+
+#include "Allocator.h"
+#include "ir/OperandIndexMap.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu_common
+{
+
+/**
+ * @brief Structure to have memory offset and size
+ */
+struct Block
+{
+ uint32_t offset;
+ size_t size;
+};
+
+/**
+ * @brief Interface to plan memory
+ */
+struct IMemoryPlanner
+{
+ using MemoryPlans = ir::OperandIndexMap<Block>;
+
+ /**
+ * @brief Claim memory for operand
+ * @param[in] index The operand index
+ * @param[in] size The size of the memory
+ */
+ virtual void claim(const ir::OperandIndex &, size_t) = 0;
+ /**
+ * @brief Release memory for operand
+ * @param[in] index The operand index
+ */
+ virtual void release(const ir::OperandIndex &) = 0;
+ /**
+ * @brief Get capacity for memory planning
+ * @return The value of capacity
+ */
+ virtual uint32_t capacity() = 0;
+ /**
+ * @brief Get MemoryPlans
+ * @return MemoryPlans
+ */
+ virtual MemoryPlans &memory_plans() = 0;
+
+ virtual ~IMemoryPlanner() = default;
+};
+
+/**
+ * @brief Class to plan memory by bump way
+ */
+class BumpPlanner : public IMemoryPlanner
+{
+public:
+ /**
+ * @brief Claim memory for operand by bump way
+ * @param[in] index The operand index
+ * @param[in] size The size of the memory
+ */
+ void claim(const ir::OperandIndex &, size_t) override;
+ /**
+ * @brief Release memory for operand by bump way
+ * @param[in] index The operand index
+ */
+ void release(const ir::OperandIndex &) override;
+ /**
+ * @brief Get capacity for memory planning
+ * @return The value of capacity
+ */
+ uint32_t capacity() override { return _capacity; }
+ /**
+ * @brief Get MemoryPlans
+ * @return MemoryPlans
+ */
+ MemoryPlans &memory_plans() override { return _mem_plans; }
+
+private:
+ uint32_t _capacity = 0;
+ MemoryPlans _mem_plans;
+};
+
+/**
+ * @brief Class to plan memory by firstfit way
+ */
+class FirstFitPlanner : public IMemoryPlanner
+{
+public:
+ /**
+ * @brief Claim memory for operand by firstfit way
+ * @param[in] index The operand index
+ * @param[in] size The size of the memory
+ */
+ void claim(const ir::OperandIndex &, size_t) override;
+ /**
+ * @brief Release memory for operand by firstfit way
+ * @param[in] index The operand index
+ */
+ void release(const ir::OperandIndex &) override;
+ /**
+ * @brief Get capacity for memory planning
+ * @return The value of capacity
+ */
+ uint32_t capacity() override { return _capacity; }
+ /**
+ * @brief Get MemoryPlans
+ * @return MemoryPlans
+ */
+ MemoryPlans &memory_plans() override { return _mem_plans; }
+
+private:
+ uint32_t _capacity = 0;
+ MemoryPlans _mem_plans;
+ // Use std::map because claim() assumes that _claim_table is sorted by uint32_t(base_offset)
+ std::map<uint32_t, ir::OperandIndex> _claim_table;
+};
+
+/**
+ * @brief Class to plan memory by Weighted Interval Color algorithm
+ */
+class WICPlanner : public IMemoryPlanner
+{
+public:
+ WICPlanner();
+
+ /**
+ * @brief Claim memory for operand by WIC algorithm
+ * @param[in] index The operand index
+ * @param[in] size The size of the memory
+ */
+ void claim(const ir::OperandIndex &, size_t) override;
+ /**
+ * @brief Release memory for operand by WIC algorithm
+ * @param[in] index The operand index
+ */
+ void release(const ir::OperandIndex &) override;
+ /**
+ * @brief Get capacity for memory planning
+ * @return The value of capacity
+ */
+ uint32_t capacity() override
+ {
+ if (!_initialized)
+ buildMemoryPlans();
+ return _capacity;
+ }
+ /**
+ * @brief Get MemoryPlans
+ * @return MemoryPlans
+ */
+ MemoryPlans &memory_plans() override;
+
+private:
+ void buildMemoryPlans();
+
+ bool _initialized;
+ uint32_t _capacity;
+ MemoryPlans _mem_plans;
+ std::unordered_set<ir::OperandIndex> _live_operands;
+ ir::OperandIndexMap<std::unordered_set<ir::OperandIndex>> _interference_graph;
+ // Sort operands by descending order of size
+ std::multimap<uint32_t, ir::OperandIndex, std::greater<uint32_t>> _map_size_to_operands;
+ std::multimap<uint32_t, ir::OperandIndex> _claim_table;
+};
+
+} // namespace cpu_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__
diff --git a/runtime/onert/backend/cpu_common/MemoryPlanner.test.cc b/runtime/onert/backend/cpu_common/MemoryPlanner.test.cc
new file mode 100644
index 000000000..5208a94d4
--- /dev/null
+++ b/runtime/onert/backend/cpu_common/MemoryPlanner.test.cc
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "MemoryPlanner.h"
+#include "ir/Index.h"
+
+TEST(Allocator, allocate_test)
+{
+ ::onert::backend::cpu_common::Allocator allocator(1024);
+ ASSERT_NE(allocator.base(), nullptr);
+}
+
+TEST(BumpPlanner, claim_test)
+{
+ ::onert::backend::cpu_common::BumpPlanner planner;
+
+ auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) {
+ onert::ir::OperandIndex mem_idx(index);
+ planner.claim(mem_idx, size);
+ auto mem_blk = planner.memory_plans()[mem_idx];
+ ASSERT_EQ(mem_blk.offset, expected_offset);
+ ASSERT_EQ(mem_blk.size, size);
+ };
+
+ claim(0, 10, 0);
+ claim(1, 20, 10);
+ claim(2, 30, 30);
+}
+
+TEST(FirstFitPlanner, claim_release_test)
+{
+ ::onert::backend::cpu_common::FirstFitPlanner planner;
+
+ auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) {
+ onert::ir::OperandIndex mem_idx(index);
+ planner.claim(mem_idx, size);
+ auto mem_blk = planner.memory_plans()[mem_idx];
+ ASSERT_EQ(mem_blk.offset, expected_offset);
+ ASSERT_EQ(mem_blk.size, size);
+ };
+
+ auto release = [&planner](uint32_t index) {
+ onert::ir::OperandIndex mem_idx(index);
+ planner.release(mem_idx);
+ };
+
+ // 0 CLAIM - 10
+ claim(0, 10, 0);
+
+ // 1 CLAIM - 20
+ claim(1, 20, 10);
+
+ // 2 CLAIM - 30
+ claim(2, 30, 30);
+
+ // 0 RELEASE - 10
+ release(0);
+
+ // 3 CLAIM - 20
+ claim(3, 20, 60);
+
+ // 4 CLAIM - 5
+ claim(4, 5, 0);
+
+ // 5 CLAIM - 10
+ claim(5, 10, 80);
+
+ // 6 CLAIM - 5
+ claim(6, 5, 5);
+
+ // 2 RELEASE - 30
+ release(2);
+
+ // 7 CLAIM - 35
+ claim(7, 35, 90);
+
+ // 8 CLAIM - 10
+ claim(8, 10, 30);
+
+ // 4 RELEASE - 5
+ release(4);
+
+ // 9 CLAIM - 10
+ claim(9, 10, 40);
+
+ // 10 CLAIM - 10
+ claim(10, 10, 50);
+
+ // 6 RELEASE
+ release(6);
+
+ // 1 RELEASE
+ release(1);
+
+ // 8 RELEASE
+ release(8);
+
+ // 9 RELEASE
+ release(9);
+
+ // 10 RELEASE
+ release(10);
+
+ // 3 RELEASE
+ release(3);
+
+ // 5 RELEASE
+ release(5);
+
+ // 7 RELEASE
+ release(7);
+}
+
+TEST(WICPlanner, claim_release_test)
+{
+ ::onert::backend::cpu_common::WICPlanner planner;
+
+ auto claim = [&planner](uint32_t index, size_t size) {
+ onert::ir::OperandIndex mem_idx(index);
+ planner.claim(mem_idx, size);
+ };
+
+ auto release = [&planner](uint32_t index) {
+ onert::ir::OperandIndex mem_idx(index);
+ planner.release(mem_idx);
+ };
+
+ auto verify = [&planner](uint32_t index, uint32_t size, uint32_t expected_offset) {
+ onert::ir::OperandIndex mem_idx(index);
+ auto mem_blk = planner.memory_plans()[mem_idx];
+ ASSERT_EQ(mem_blk.offset, expected_offset);
+ ASSERT_EQ(mem_blk.size, size);
+ };
+
+ auto capacity = [&planner](uint32_t expected_capacity) {
+ auto actual_capacity = planner.capacity();
+ ASSERT_EQ(actual_capacity, expected_capacity);
+ };
+
+ claim(0, 20);
+ claim(1, 5);
+ release(0);
+ claim(2, 10);
+ release(1);
+ claim(3, 10);
+ release(2);
+ claim(4, 10);
+ release(3);
+ claim(5, 20);
+ release(4);
+ claim(6, 20);
+ release(5);
+ release(7);
+
+ // VERIFY 0 - 0
+ verify(0, 20, 0);
+
+ // VERIFY 1 - 20
+ verify(1, 5, 20);
+
+ // VERIFY 2 - 0
+ verify(2, 10, 0);
+
+ // VERIFY 3 - 10
+ verify(3, 10, 10);
+
+ // VERIFY 4 - 20
+ verify(4, 10, 20);
+
+ // VERIFY 5 - 0
+ verify(5, 20, 0);
+
+ // VERIFY 6 - 20
+ verify(6, 20, 20);
+
+ // CAPACITY - 40
+ capacity(40);
+}
diff --git a/runtime/onert/backend/cpu_common/MemoryPlannerFactory.cc b/runtime/onert/backend/cpu_common/MemoryPlannerFactory.cc
new file mode 100644
index 000000000..322d0de8c
--- /dev/null
+++ b/runtime/onert/backend/cpu_common/MemoryPlannerFactory.cc
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MemoryPlannerFactory.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu_common
+{
+
+MemoryPlannerFactory &MemoryPlannerFactory::get()
+{
+ static MemoryPlannerFactory instance;
+ return instance;
+}
+
+IMemoryPlanner *MemoryPlannerFactory::create(const std::string &key)
+{
+ if (key == "FirstFit")
+ {
+ return new FirstFitPlanner;
+ }
+ else if (key == "Bump")
+ {
+ return new BumpPlanner;
+ }
+ else if (key == "WIC")
+ {
+ return new WICPlanner;
+ }
+ return new FirstFitPlanner; // Default Planner
+}
+
+} // namespace cpu_common
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu_common/MemoryPlannerFactory.h b/runtime/onert/backend/cpu_common/MemoryPlannerFactory.h
new file mode 100644
index 000000000..a85b67ca8
--- /dev/null
+++ b/runtime/onert/backend/cpu_common/MemoryPlannerFactory.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__
+#define __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__
+
+#include "MemoryPlanner.h"
+
+#include <string>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu_common
+{
+
+class MemoryPlannerFactory
+{
+public:
+ static MemoryPlannerFactory &get();
+
+private:
+ MemoryPlannerFactory() = default;
+
+public:
+ IMemoryPlanner *create(const std::string &key);
+};
+
+} // namespace cpu_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__
diff --git a/runtime/onert/core/CMakeLists.txt b/runtime/onert/core/CMakeLists.txt
new file mode 100644
index 000000000..46e57427c
--- /dev/null
+++ b/runtime/onert/core/CMakeLists.txt
@@ -0,0 +1,21 @@
+file(GLOB_RECURSE SOURCES "src/*.cc")
+
+add_library(onert_core SHARED ${SOURCES})
+set_target_properties(onert_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(onert_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+target_include_directories(onert_core PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
+target_link_libraries(onert_core PUBLIC nnfw_lib_misc)
+target_link_libraries(onert_core PRIVATE nnfw_lib_cker)
+target_link_libraries(onert_core PRIVATE nnfw_common)
+target_link_libraries(onert_core PRIVATE nnfw_coverage)
+target_link_libraries(onert_core PRIVATE dl ${LIB_PTHREAD})
+
+if(ENVVAR_ONERT_CONFIG)
+ target_compile_definitions(onert_core PRIVATE ENVVAR_FOR_DEFAULT_CONFIG)
+endif(ENVVAR_ONERT_CONFIG)
+
+install(TARGETS onert_core LIBRARY DESTINATION lib)
+install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include/"
+ DESTINATION "include/onert"
+ FILES_MATCHING PATTERN "*.h" PATTERN "*.lst"
+ )
diff --git a/runtime/onert/core/include/backend/Backend.h b/runtime/onert/core/include/backend/Backend.h
new file mode 100644
index 000000000..4f6ebbba7
--- /dev/null
+++ b/runtime/onert/core/include/backend/Backend.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BACKEND_H__
+#define __ONERT_BACKEND_BACKEND_H__
+
+#include <memory>
+
+#include "ir/Graph.h"
+#include "backend/IConfig.h"
+#include "backend/BackendContext.h"
+
+namespace onert
+{
+namespace backend
+{
+
+namespace custom
+{
+class IKernelBuilder;
+}
+
+class Backend
+{
+public:
+ virtual ~Backend() = default;
+ virtual std::shared_ptr<onert::backend::IConfig> config() const = 0;
+
+ virtual std::unique_ptr<BackendContext>
+ newContext(const ir::Graph &graph, const std::shared_ptr<backend::custom::IKernelBuilder> &kb,
+ bool is_linear_executor) const = 0;
+};
+
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BACKEND_H__
diff --git a/runtime/onert/core/include/backend/BackendContext.h b/runtime/onert/core/include/backend/BackendContext.h
new file mode 100644
index 000000000..3c88b5418
--- /dev/null
+++ b/runtime/onert/core/include/backend/BackendContext.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_BACKEND_CONTEXT_H__
+
+#include <memory>
+#include "ir/Graph.h"
+
+namespace onert
+{
+namespace backend
+{
+
+class Backend;
+class IConstantInitializer;
+class IKernelGenerator;
+class IShapeFixer;
+class ITensorRegister;
+struct ITensorBuilder;
+struct IOptimizer;
+
+class BackendContext
+{
+public:
+ struct OperationInfo
+ {
+ ir::OperationIndex index;
+ ir::Layout layout;
+
+ OperationInfo(ir::OperationIndex index, ir::Layout layout) : index{index}, layout{layout} {}
+ };
+
+public:
+ BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
+ std::shared_ptr<IShapeFixer> shape_fixer = nullptr,
+ std::shared_ptr<ITensorRegister> tensor_register = nullptr,
+ std::shared_ptr<IOptimizer> optimizer = nullptr)
+ : _backend{backend}, _graph{graph}, tensor_builder{tensor_builder},
+ constant_initializer{constant_initializer}, kernel_gen{kernel_gen},
+ shape_fixer{shape_fixer}, tensor_register{tensor_register}, optimizer{optimizer}
+ {
+ }
+
+ void initialize(const std::vector<OperationInfo> &operation_list,
+ const std::vector<ir::OperandIndex> &operand_list);
+ void fixShapes();
+ void initConsts();
+
+ const Backend *backend() const { return _backend; }
+ const ir::Graph *graph() const { return _graph; }
+ const std::vector<OperationInfo> &operation_list() { return _operation_list; }
+ const std::vector<ir::OperandIndex> &operand_list() { return _operand_list; }
+
+private:
+ const Backend *_backend{nullptr};
+ const ir::Graph *_graph{nullptr};
+ std::vector<OperationInfo> _operation_list;
+ std::vector<ir::OperandIndex> _operand_list;
+
+public:
+ std::shared_ptr<ITensorBuilder> tensor_builder;
+ std::shared_ptr<IConstantInitializer> constant_initializer;
+ std::shared_ptr<IKernelGenerator> kernel_gen;
+ std::shared_ptr<IShapeFixer> shape_fixer;
+ std::shared_ptr<ITensorRegister> tensor_register;
+ std::shared_ptr<IOptimizer> optimizer;
+};
+
+using BackendContexts = std::unordered_map<const Backend *, std::unique_ptr<BackendContext>>;
+
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/core/include/backend/CustomKernelBuilder.h b/runtime/onert/core/include/backend/CustomKernelBuilder.h
new file mode 100644
index 000000000..2d35406dd
--- /dev/null
+++ b/runtime/onert/core/include/backend/CustomKernelBuilder.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CUSTOM_KERNEL_BUILDER_H__
+#define __ONERT_BACKEND_CUSTOM_KERNEL_BUILDER_H__
+
+#include "misc/tensor/Shape.h"
+#include "ir/DataType.h"
+
+#include <vector>
+#include <memory>
+
+namespace onert
+{
+namespace exec
+{
+
+class IFunction;
+
+} // namespace exec
+} // namespace onert
+
+namespace onert
+{
+namespace backend
+{
+namespace custom
+{
+
+using Shape = nnfw::misc::tensor::Shape;
+
+struct TypeInfo
+{
+ Shape shape;
+ ir::DataType dtype;
+};
+
+struct CustomKernelConfigParams
+{
+ std::vector<void *> input_allocations;
+ std::vector<TypeInfo> input_types;
+
+ std::vector<void *> output_allocations;
+ std::vector<TypeInfo> output_types;
+
+ char *userdata;
+ size_t userdata_size;
+};
+
+class IKernelBuilder
+{
+public:
+ virtual ~IKernelBuilder() = default;
+ virtual std::unique_ptr<exec::IFunction> buildKernel(const std::string &id,
+ CustomKernelConfigParams &&params) const = 0;
+};
+
+} // namespace custom
+
+} // namespace backend
+
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CUSTOM_KERNEL_BUILDER_H__
diff --git a/runtime/onert/core/include/backend/IConfig.h b/runtime/onert/core/include/backend/IConfig.h
new file mode 100644
index 000000000..e4e1d5a7f
--- /dev/null
+++ b/runtime/onert/core/include/backend/IConfig.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ICONFIG_H__
+#define __ONERT_BACKEND_ICONFIG_H__
+
+#include "util/ITimer.h"
+#include <memory>
+#include <string>
+
+namespace onert
+{
+namespace backend
+{
+
+struct IConfig
+{
+ virtual ~IConfig() = default;
+
+ virtual std::string id() = 0;
+ virtual bool initialize() = 0;
+ // Support permute kernel
+ virtual bool SupportPermutation() = 0;
+
+ // Timer is used for backend profiling. In case of default (nullptr) timer profiler won't work.
+ virtual std::unique_ptr<util::ITimer> timer() { return nullptr; }
+};
+
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ICONFIG_H__
diff --git a/runtime/onert/core/include/backend/IConstantInitializer.h b/runtime/onert/core/include/backend/IConstantInitializer.h
new file mode 100644
index 000000000..3872f333b
--- /dev/null
+++ b/runtime/onert/core/include/backend/IConstantInitializer.h
@@ -0,0 +1,280 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__
+
+#include <unordered_map>
+#include <functional>
+
+#include "ITensorBuilder.h"
+#include "ir/Coordinates.h"
+#include "ir/Layout.h"
+#include "ir/Operand.h"
+#include "ir/Operands.h"
+#include "ir/OperationVisitor.h"
+#include "ir/OpSequence.h"
+#include "util/logging.h"
+
+namespace
+{
+template <typename T>
+static void Init(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj, const bool copy,
+ const onert::ir::Layout frontend_layout = onert::ir::Layout::UNKNOWN)
+{
+ const auto shape = model_obj.shape();
+ assert(model_obj.data());
+ auto base = reinterpret_cast<const T *>(model_obj.data()->base());
+
+ obj.access([&](::onert::backend::ITensor &tensor) {
+ switch (shape.rank())
+ {
+ case 0:
+ {
+ assert(model_obj.data()->size() == sizeof(T));
+ const auto value = *reinterpret_cast<const T *>(base);
+ T *into = reinterpret_cast<T *>(tensor.buffer());
+ *into = value;
+ break;
+ }
+ case 1:
+ {
+ auto vec_size = shape.dim(0);
+ for (int32_t n = 0; n < vec_size; ++n)
+ {
+ const T *from = reinterpret_cast<const T *>(base) + n;
+ const auto value = *from;
+
+ T *into = reinterpret_cast<T *>(tensor.buffer()) + n;
+
+ *into = value;
+ }
+ break;
+ }
+ case 2:
+ {
+ const int32_t copy_len = shape.dim(1);
+
+ for (auto i = 0; i < shape.dim(0); ++i)
+ {
+ ::onert::ir::Coordinates coords{i, 0};
+ memcpy(tensor.buffer() + tensor.calcOffset(coords), base + i * copy_len,
+ copy_len * sizeof(T));
+ }
+ break;
+ }
+ case 3:
+ {
+ const int32_t width = shape.dim(1);
+ const int32_t copy_len = shape.dim(2);
+
+ for (auto i = 0; i < shape.dim(0); ++i)
+ {
+ for (auto j = 0; j < shape.dim(1); ++j)
+ {
+ ::onert::ir::Coordinates coords{i, j, 0};
+ memcpy(tensor.buffer() + tensor.calcOffset(coords),
+ base + i * width * copy_len + j * copy_len, copy_len * sizeof(T));
+ }
+ }
+ break;
+ }
+ case 4:
+ {
+ const int32_t height = shape.dim(1);
+ const int32_t width = shape.dim(2);
+ const int32_t copy_len = shape.dim(3);
+ for (auto i = 0; i < shape.dim(0); ++i)
+ {
+ for (auto j = 0; j < shape.dim(1); ++j)
+ {
+ for (auto k = 0; k < shape.dim(2); ++k)
+ {
+ if (copy)
+ {
+ ::onert::ir::Coordinates coords{i, j, k, 0};
+ memcpy(tensor.buffer() + tensor.calcOffset(coords),
+ base + i * height * width * copy_len + j * width * copy_len + k * copy_len,
+ copy_len * sizeof(T));
+ }
+ else
+ {
+ for (auto l = 0; l < shape.dim(3); ++l)
+ {
+ const auto coords = ::onert::ir::convertCoordinates({i, j, k, l}, frontend_layout,
+ tensor.layout());
+ T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset(coords));
+ T value = *(base + i * height * width * copy_len + j * width * copy_len +
+ k * copy_len + l);
+ *into = value;
+ }
+ }
+ }
+ }
+ }
+ break;
+ }
+ default:
+ throw std::runtime_error{"Not yet supported"};
+ }
+ });
+}
+
+template <typename T>
+void copyInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj)
+{
+ Init<T>(model_obj, obj, true);
+}
+
+template <typename T>
+void permuteInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj,
+ const onert::ir::Layout frontend_layout)
+{
+ const bool copy = frontend_layout == obj.layout();
+ Init<T>(model_obj, obj, copy, frontend_layout);
+}
+
+} // namespace
+
+namespace onert
+{
+namespace backend
+{
+
+class IConstantInitializer : public ir::OperationVisitor
+{
+public:
+ virtual ~IConstantInitializer() = default;
+
+public:
+ void run()
+ {
+ assert(tensor_builder().get());
+ for (const auto &it : _init_map)
+ {
+ const auto &ind = it.first;
+ const auto &fn = it.second;
+
+ const auto &model_obj = _operands.at(ind);
+ auto tensor_obj = tensor_builder()->tensorAt(ind);
+ assert(tensor_obj != nullptr);
+ fn(model_obj, *tensor_obj);
+ VERBOSE(FillOperandData) << "Fill data for operand " << ind.value() << std::endl;
+ }
+ _init_map.clear();
+ }
+
+public:
+ IConstantInitializer(const ir::Operands &operands)
+ : _operands{operands}, _current_op_seq_layout{ir::Layout::UNKNOWN}
+ {
+ }
+
+public:
+ using Initializer = std::function<void(const ir::Operand &, backend::ITensor &)>;
+
+ void setLayout(ir::Layout layout) { _current_op_seq_layout = layout; }
+
+protected:
+ using OperationVisitor::visit;
+
+protected:
+ virtual std::shared_ptr<ITensorBuilder> tensor_builder() const = 0;
+
+public:
+ void registerCopyInitializer(const ir::OperandIndex &index, const ir::Operand &obj)
+ {
+ // For only CONSTANTS
+ // TODO Add to check if tensor has been allocated
+ if (!obj.isConstant())
+ return;
+
+ const auto type = obj.typeInfo().type();
+ using ir::DataType;
+
+ switch (type)
+ {
+ case DataType::FLOAT32:
+ _init_map[index] = copyInit<float>;
+ break;
+ case DataType::INT32:
+ _init_map[index] = copyInit<int32_t>;
+ break;
+ case DataType::UINT32:
+ _init_map[index] = copyInit<uint32_t>;
+ break;
+ case DataType::BOOL8:
+ case DataType::QUANT8_ASYMM:
+ _init_map[index] = copyInit<uint8_t>;
+ break;
+ case DataType::QUANT8_SYMM:
+ _init_map[index] = copyInit<int8_t>;
+ break;
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+ }
+
+public:
+ void registerPermuteInitializer(const ir::OperandIndex &index, const ir::Operand &obj)
+ {
+ // For only CONSTANTS
+ // TODO Add to check if tensor has been allocated
+ if (!obj.isConstant())
+ return;
+
+ const auto type = obj.typeInfo().type();
+ using ir::DataType;
+ using namespace std::placeholders;
+
+ switch (type)
+ {
+ case DataType::FLOAT32:
+ _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_op_seq_layout);
+ break;
+ case DataType::INT32:
+ _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_op_seq_layout);
+ break;
+ case DataType::UINT32:
+ _init_map[index] = std::bind(permuteInit<uint32_t>, _1, _2, _current_op_seq_layout);
+ break;
+ case DataType::BOOL8:
+ case DataType::QUANT8_ASYMM:
+ _init_map[index] = std::bind(permuteInit<uint8_t>, _1, _2, _current_op_seq_layout);
+ break;
+ case DataType::QUANT8_SYMM:
+ _init_map[index] = std::bind(permuteInit<int8_t>, _1, _2, _current_op_seq_layout);
+ break;
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+ }
+
+public:
+ bool exist(const ir::OperandIndex &ind) { return _init_map.find(ind) != _init_map.end(); }
+
+protected:
+ const ir::Operands &_operands;
+ std::unordered_map<ir::OperandIndex, Initializer> _init_map;
+ ir::Layout _current_op_seq_layout; // TODO Rename this to _current_layout
+};
+
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/core/include/backend/IKernelGenerator.h b/runtime/onert/core/include/backend/IKernelGenerator.h
new file mode 100644
index 000000000..afc34ec21
--- /dev/null
+++ b/runtime/onert/core/include/backend/IKernelGenerator.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_IKERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_IKERNEL_GENERATOR_H__
+
+#include <assert.h>
+#include <memory>
+#include <functional>
+
+#include "ITensorBuilder.h"
+#include "ir/OperationVisitor.h"
+#include "ir/OpSequence.h"
+#include <memory>
+#include "exec/FunctionSequence.h"
+
+namespace onert
+{
+namespace backend
+{
+
+class IKernelGenerator : public ir::OperationVisitor
+{
+public:
+ virtual ~IKernelGenerator() = default;
+
+ std::unique_ptr<exec::IFunction> releaseFunction()
+ {
+ assert(_return_fn);
+ return std::move(_return_fn);
+ }
+
+ std::unique_ptr<exec::FunctionSequence> generate(const ir::OpSequence &op_seq)
+ {
+ op_seq.accept(*this);
+ return std::move(_return_fn_seq);
+ }
+
+protected:
+ using OperationVisitor::visit;
+
+ void visit(const ir::OpSequence &) override
+ {
+ throw std::runtime_error("KernelGenerator: NYI for operation 'OpSequence'");
+ }
+
+#define OP(InternalName) \
+ void visit(const ir::operation::InternalName &) override \
+ { \
+ throw std::runtime_error("KernelGenerator: NYI for operation '" #InternalName "'"); \
+ }
+#include "ir/Operations.lst"
+#undef OP
+
+protected:
+ std::unique_ptr<exec::IFunction> _return_fn;
+ std::unique_ptr<exec::FunctionSequence> _return_fn_seq; // TODO Extract this out
+};
+
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_IKERNEL_GENERATOR_H__
diff --git a/runtime/onert/core/include/backend/IMemoryManager.h b/runtime/onert/core/include/backend/IMemoryManager.h
new file mode 100644
index 000000000..bad2fd51a
--- /dev/null
+++ b/runtime/onert/core/include/backend/IMemoryManager.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_IMEMORY_MANAGER_H__
+#define __ONERT_BACKEND_IMEMORY_MANAGER_H__
+
+namespace onert
+{
+namespace backend
+{
+
+struct IMemoryManager
+{
+ virtual ~IMemoryManager() = default;
+
+ virtual void allocate(void) = 0;
+ virtual void deallocate(void) = 0;
+};
+
+} // namespace backend
+} // namespace onert
+
+#include <unordered_set>
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+
+using MemoryManagerSet = std::unordered_set<std::unique_ptr<backend::IMemoryManager>>;
+
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_IMEMORY_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/IOptimizer.h b/runtime/onert/core/include/backend/IOptimizer.h
new file mode 100644
index 000000000..4844d21b9
--- /dev/null
+++ b/runtime/onert/core/include/backend/IOptimizer.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_I_OPTIMIZER_H__
+#define __ONERT_BACKEND_I_OPTIMIZER_H__
+
+namespace onert
+{
+namespace ir
+{
+class LoweredGraph;
+}
+} // namespace onert
+
+namespace onert
+{
+namespace backend
+{
+
+/**
+ * @brief Class for backend optimizations. This is an optional class so not all backends must have
+ * it.
+ *
+ */
+struct IOptimizer
+{
+ virtual ~IOptimizer() = default;
+ /**
+ * @brief Run optimization
+ *
+ */
+ virtual void optimize() = 0;
+};
+
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_I_OPTIMIZER_H__
diff --git a/runtime/onert/core/include/backend/IShapeFixer.h b/runtime/onert/core/include/backend/IShapeFixer.h
new file mode 100644
index 000000000..35c8d33f0
--- /dev/null
+++ b/runtime/onert/core/include/backend/IShapeFixer.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ISHAPE_FIXER_H__
+#define __ONERT_BACKEND_ISHAPE_FIXER_H__
+
+#include <memory>
+#include <functional>
+
+#include "ir/LowerInfoMap.h"
+#include "ITensorBuilder.h"
+#include "ir/OperationVisitor.h"
+#include "ir/OpSequence.h"
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+
+class IShapeFixer : public ir::OperationVisitor
+{
+public:
+ virtual ~IShapeFixer() = default;
+
+protected:
+#define OP(InternalName) \
+ void visit(const ir::operation::InternalName &) override \
+ { \
+ throw std::runtime_error("ShapeFixer: NYI for operation '" #InternalName "'"); \
+ }
+#include "ir/Operations.lst"
+#undef OP
+
+public:
+ void fix(const ir::OpSequence &op_seq) { op_seq.accept(*this); };
+};
+
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ISHAPE_FIXER_H__
diff --git a/runtime/onert/core/include/backend/ITensor.h b/runtime/onert/core/include/backend/ITensor.h
new file mode 100644
index 000000000..69f319a36
--- /dev/null
+++ b/runtime/onert/core/include/backend/ITensor.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_OPERAND_I_TENSOR_H__
+#define __ONERT_BACKEND_OPERAND_I_TENSOR_H__
+
+#include <cstring>
+#include <cstdint>
+#include <functional>
+
+#include "ir/Layout.h"
+#include "ir/Coordinates.h"
+
+namespace onert
+{
+namespace backend
+{
+
+class ITensor
+{
+public:
+ virtual ~ITensor() = default;
+
+public:
+ virtual uint8_t *buffer() const = 0;
+ virtual size_t total_size() const = 0;
+ virtual size_t dimension(size_t index) const = 0;
+ virtual size_t num_dimensions() const = 0;
+ virtual size_t calcOffset(const ir::Coordinates &coords) const = 0;
+ virtual ir::Layout layout() const = 0;
+ virtual bool has_padding() const = 0;
+ virtual void access(const std::function<void(ITensor &tensor)> &fn) = 0;
+
+ /**
+ * @brief Return true if the tensor needs dynamic allocation, meaning that during compile-time
+ * the outpus shape cannot be known and the output shape is calculated during
+ * kernel execution-time.
+ */
+ virtual bool is_dynamic() const { return false; /* default */ }
+};
+
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_OPERAND_I_TENSOR_H__
diff --git a/runtime/onert/core/include/backend/ITensorBuilder.h b/runtime/onert/core/include/backend/ITensorBuilder.h
new file mode 100644
index 000000000..f747b678f
--- /dev/null
+++ b/runtime/onert/core/include/backend/ITensorBuilder.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ITENSOR_BUILDER_H__
+#define __ONERT_BACKEND_ITENSOR_BUILDER_H__
+
+#include <map>
+
+#include "ir/Index.h"
+#include "ir/OperandInfo.h"
+#include "ir/Operation.h"
+#include "ir/Layout.h"
+#include "ITensor.h"
+#include "ITensorManager.h"
+
+namespace onert
+{
+namespace backend
+{
+
+struct ITensorBuilder
+{
+ using IterateFunction = std::function<void(const ir::OperandIndex &)>;
+
+ virtual ~ITensorBuilder(void) = default;
+
+ /**
+ * @brief Register tensor information to allocate on backend
+ */
+ virtual void registerTensorInfo(const ir::OperandIndex &, const ir::OperandInfo &,
+ ir::Layout backend_layout, bool as_const) = 0;
+
+ virtual void notifyFirstUse(const ir::OperandIndex &) = 0;
+ virtual void notifyLastUse(const ir::OperandIndex &) = 0;
+
+ virtual bool isRegistered(const ir::OperandIndex &) const = 0;
+
+ virtual void prepare(void) = 0;
+ virtual void allocate() = 0;
+ virtual void postFunctionPrepare() = 0;
+
+ virtual std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) = 0;
+ virtual void iterate(const IterateFunction &fn) = 0;
+
+ virtual std::unique_ptr<ITensorManager> releaseTensorManager(void) = 0;
+};
+
+} // namespace backend
+} // namespace onert
+
+#include <unordered_set>
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+
+using TensorBuilderSet = std::unordered_set<std::shared_ptr<backend::ITensorBuilder>>;
+
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ITENSOR_BUILDER_H__
diff --git a/runtime/onert/core/include/backend/ITensorManager.h b/runtime/onert/core/include/backend/ITensorManager.h
new file mode 100644
index 000000000..1fcbe140f
--- /dev/null
+++ b/runtime/onert/core/include/backend/ITensorManager.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ITENSOR_MANAGER_H__
+#define __ONERT_BACKEND_ITENSOR_MANAGER_H__
+
+namespace onert
+{
+namespace backend
+{
+
+// NOTE This name ITensorManager has been discussed whether or not the name is proper.
+// Anyone can argue with any better name.
+/**
+ * @brief Interface as an abstract tensor manager which has MemoryManager
+ */
+struct ITensorManager
+{
+ virtual ~ITensorManager() = default;
+};
+
+} // namespace backend
+} // namespace onert
+
+#include <unordered_set>
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+
+using TensorManagerSet = std::unordered_set<std::unique_ptr<backend::ITensorManager>>;
+
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ITENSOR_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/ITensorRegister.h b/runtime/onert/core/include/backend/ITensorRegister.h
new file mode 100644
index 000000000..bceaebf32
--- /dev/null
+++ b/runtime/onert/core/include/backend/ITensorRegister.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ITENSOR_REGISTER_H__
+#define __ONERT_BACKEND_ITENSOR_REGISTER_H__
+
+#include "ir/LowerInfoMap.h"
+#include "ITensorBuilder.h"
+#include "ir/Layout.h"
+#include "ir/OperandIndexSequence.h"
+#include "ir/OperandInfo.h"
+#include "ir/Operands.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace backend
+{
+
+class ITensorRegister : public ir::OperationVisitor
+{
+public:
+ virtual ~ITensorRegister() = default;
+
+public:
+ void registerTensors(const ir::OpSequence &op_seq, const ir::LowerInfoMap *lower_info_map)
+ {
+ _current_op_seq_layout = op_seq.getLayout();
+ _lower_info_map = lower_info_map;
+ assert(_lower_info_map != nullptr);
+ assert(tensor_builder().get() != nullptr);
+ op_seq.accept(*this);
+ }
+
+protected:
+ virtual const ir::Operands &operands() const = 0;
+ virtual std::shared_ptr<ITensorBuilder> tensor_builder() const = 0;
+
+protected:
+#define OP(InternalName) \
+ void visit(const ir::operation::InternalName &node) override \
+ { \
+ for (const auto &ind : node.getInputs() + node.getOutputs()) \
+ { \
+ defaultRegisterTensorInfo(ind); \
+ } \
+ }
+#include "ir/Operations.lst"
+#undef OP
+
+protected:
+ void defaultRegisterTensorInfo(const ir::OperandIndex &index) const
+ {
+ if (tensor_builder()->isRegistered(index))
+ {
+ return;
+ }
+
+ const auto &obj = operands().at(index);
+ const auto frontend_layout = frontendLayout();
+ const auto backend_layout = backendLayout(index);
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo()};
+ tensor_builder()->registerTensorInfo(index, backend_info, backend_layout, obj.isConstant());
+ }
+
+protected:
+ ir::Layout frontendLayout() const { return _current_op_seq_layout; }
+ ir::Layout backendLayout(const ir::OperandIndex &index) const
+ {
+ assert(_lower_info_map != nullptr);
+ const auto lower_info = _lower_info_map->operand.at(index).get();
+ return lower_info->def_factors().getOnlyElement().layout();
+ }
+
+private:
+ ir::Layout _current_op_seq_layout;
+ const ir::LowerInfoMap *_lower_info_map{nullptr};
+};
+
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ITENSOR_REGISTER_H__
diff --git a/runtime/onert/core/include/compiler/BackendManager.h b/runtime/onert/core/include/compiler/BackendManager.h
new file mode 100644
index 000000000..95ba2c223
--- /dev/null
+++ b/runtime/onert/core/include/compiler/BackendManager.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_BACKEND_MANAGER_H__
+#define __ONERT_COMPILER_BACKEND_MANAGER_H__
+
+#include <memory>
+#include <map>
+
+#include "ir/Operands.h"
+#include "backend/Backend.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+class BackendManager
+{
+public:
+ using backend_create_t = backend::Backend *(*)();
+ using backend_destroy_t = void (*)(backend::Backend *);
+ using dlhandle_destroy_t = void (*)(void *);
+
+ static BackendManager &get();
+
+public:
+ backend::Backend *get(const std::string &key);
+ const backend::Backend *get(const std::string &key) const;
+ const backend::Backend *getDefault() const;
+ const std::vector<const backend::Backend *> &getAll() const { return _available_backends; };
+ /**
+ * @brief load backend plugin
+ *
+ * @param backend backend to be loaded
+ *
+ * @return
+ */
+ void loadBackend(const std::string &backend);
+
+private:
+ BackendManager() = default;
+
+private:
+ std::vector<const backend::Backend *> _available_backends;
+ std::map<std::string, std::unique_ptr<void, dlhandle_destroy_t>> _handle_map;
+ std::map<std::string, std::unique_ptr<backend::Backend, backend_destroy_t>> _gen_map;
+ /**
+ * @brief Allocate an object of a class of a plugin by loading a plugin function, that does
+ * allocation, and calling it
+ *
+ * @param object_of_plugin_class target object
+ * @param obj_creator_func_name name of the plugin function, that allocates an object
+ * @param handle handle of the plugin
+ * @param args arguments to pass to constructor of the plugin class
+ *
+ * @return
+ */
+ template <typename T, class... Types>
+ void loadObjectFromPlugin(std::shared_ptr<T> &object_of_plugin_class,
+ const std::string obj_creator_func_name, void *handle,
+ Types &&... args);
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_BACKEND_MANAGER_H__
diff --git a/runtime/onert/core/include/compiler/BackendResolver.h b/runtime/onert/core/include/compiler/BackendResolver.h
new file mode 100644
index 000000000..a316b4335
--- /dev/null
+++ b/runtime/onert/core/include/compiler/BackendResolver.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_BACKEND_RESOLVER_H__
+#define __ONERT_COMPILER_BACKEND_RESOLVER_H__
+
+#include <unordered_map>
+#include <typeindex>
+
+#include "backend/Backend.h"
+#include "ir/OperationIndexMap.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+class BackendResolver
+{
+public:
+ const backend::Backend *getBackend(const ir::OperationIndex &index) const
+ {
+ return _gen_map.at(index);
+ }
+
+ void setBackend(const ir::OperationIndex &index, const backend::Backend *backend)
+ {
+ _gen_map[index] = backend;
+ }
+
+ void
+ iterate(const std::function<void(const ir::OperationIndex &, const backend::Backend &)> &fn) const
+ {
+ for (const auto &e : _gen_map)
+ {
+ fn(e.first, *e.second);
+ }
+ }
+
+private:
+ ir::OperationIndexMap<const backend::Backend *> _gen_map;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_BACKEND_RESOLVER_H__
diff --git a/runtime/onert/core/include/compiler/CodeMap.h b/runtime/onert/core/include/compiler/CodeMap.h
new file mode 100644
index 000000000..e13d3334c
--- /dev/null
+++ b/runtime/onert/core/include/compiler/CodeMap.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_CODE_MAP_H__
+#define __ONERT_COMPILER_CODE_MAP_H__
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace compiler
+{
+
+struct CodeAndInfo
+{
+ const ir::OpSequence *op_seq;
+ const ir::operation::LowerInfo *lower_info;
+ std::unique_ptr<exec::FunctionSequence> fn_seq;
+
+ CodeAndInfo(const ir::OpSequence *op_seq, const ir::operation::LowerInfo *lower_info,
+ std::unique_ptr<exec::FunctionSequence> &&fn_seq)
+ : op_seq{op_seq}, lower_info{lower_info}, fn_seq{std::move(fn_seq)}
+ {
+ }
+};
+
+using CodeMap = std::unordered_map<ir::OpSequenceIndex, CodeAndInfo>;
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_CODE_MAP_H__
diff --git a/runtime/onert/core/include/compiler/Compiler.h b/runtime/onert/core/include/compiler/Compiler.h
new file mode 100644
index 000000000..c25eb7b36
--- /dev/null
+++ b/runtime/onert/core/include/compiler/Compiler.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Compiler.h
+ * @brief This file contains Compiler class to define and run compilation phase
+ */
+
+#ifndef __ONERT_COMPILER_COMPILE_H_
+#define __ONERT_COMPILER_COMPILE_H_
+
+#include "ir/Graph.h"
+#include "exec/IExecutor.h"
+
+namespace onert
+{
+
+namespace compiler
+{
+
+enum class State
+{
+ CREATED, // Before compilation
+ STARTED, // Compile is started
+ LOWERED, // Backend is decided
+ COMPILED // Success compilation
+};
+
+struct ManualSchedulerOptions
+{
+ std::string backend_for_all;
+ std::unordered_map<ir::OpCode, std::string> opcode_to_backend;
+ std::unordered_map<ir::OperationIndex, std::string> index_to_backend;
+};
+
+struct CompilerOptions
+{
+ // GENERAL OPTIONS
+ std::vector<std::string> backend_list;
+
+ // OPTIONS ONLY FOR DEBUGGING/PROFILING
+ std::string trace_filepath; //< File path to save trace records
+ int graph_dump_level; //< Graph dump level, values between 0 and 2 are valid
+ int op_seq_max_node; //< Number of nodes that can be
+ std::string executor; //< Executor name to use
+ ManualSchedulerOptions manual_scheduler_options; //< Options for ManualScheduler
+ bool he_scheduler; //< HEScheduler if true, ManualScheduler otherwise
+ bool he_profiling_mode; //< Whether HEScheduler profiling mode ON/OFF
+ bool delete_cached_data; //< Whether CachedData deletion ON/OFF
+ bool disable_compile; //< Run with Interpreter if true, try compilation otherwise
+};
+
+CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Graph &graph);
+
+/**
+ * @brief Class to compile graph model
+ */
+class Compiler
+{
+public:
+ /**
+ * @brief Construct a new Compiler object
+ * @param[in] model Graph model
+ */
+ Compiler(const std::shared_ptr<ir::Graph> &graph);
+
+public:
+ /**
+ * @brief Run compilation. Compilation result will be saved in _plan
+ */
+ void compile(void);
+ /**
+ * @brief Pass plan reference
+ * @param[out] plan Plan reference to return\n
+ * Set nullptr if compile is not run yet
+ */
+ void release(std::shared_ptr<exec::IExecutor> &executor) { executor = _executor; }
+
+ void state(State state) { _state = state; }
+ State state(void) const { return _state; }
+
+ /**
+ * @brief Check if model can compile
+ * @return @c true if model can compile, otherwise @c false
+ * @note This method don't check model correctness,\n
+ * so model verification should be done before calling this method
+ */
+ bool checkCompilable();
+ CompilerOptions &options() { return _options; }
+
+private:
+ void checkProfilerConditions();
+
+private:
+ std::shared_ptr<ir::Graph> _graph;
+ std::shared_ptr<exec::IExecutor> _executor;
+ State _state;
+ CompilerOptions _options;
+};
+
+} // namespace compiler
+
+} // namespace onert
+
+#endif // __ONERT_COMPILER_COMPILE_H_
diff --git a/runtime/onert/core/include/compiler/ExecutionBuilder.h b/runtime/onert/core/include/compiler/ExecutionBuilder.h
new file mode 100644
index 000000000..d54d9d046
--- /dev/null
+++ b/runtime/onert/core/include/compiler/ExecutionBuilder.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_EXECUTION_BUILDER_H__
+#define __ONERT_COMPILER_EXECUTION_BUILDER_H__
+
+#include <memory>
+
+#include "ir/operation/LowerInfo.h"
+#include "ir/OpSequence.h"
+#include "exec/FunctionSequence.h"
+#include "CodeMap.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+class ExecutionBuilder
+{
+public:
+ void append(const ir::OpSequenceIndex index, CodeAndInfo &&code_and_info)
+ {
+ _code_map.emplace(index, std::move(code_and_info));
+ }
+
+ CodeMap releaseCodeMap() { return std::move(_code_map); }
+
+private:
+ CodeMap _code_map;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_EXECUTION_BUILDER_H__
diff --git a/runtime/onert/core/include/exec/ExecTime.h b/runtime/onert/core/include/exec/ExecTime.h
new file mode 100644
index 000000000..846d0930b
--- /dev/null
+++ b/runtime/onert/core/include/exec/ExecTime.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_EXEC_TIME_H__
+#define __ONERT_EXEC_EXEC_TIME_H__
+
+#include "backend/Backend.h"
+#include "backend/IConfig.h"
+#include "JSONExecTime.h"
+#include <memory>
+#include <limits>
+#include <map>
+#include <unordered_map>
+#include <vector>
+
+namespace onert
+{
+namespace exec
+{
+class ExecTime
+{
+public:
+ explicit ExecTime(const std::vector<const backend::Backend *> &backends)
+ : _json(backends, _measurements)
+ {
+ }
+
+public:
+ /**
+ * @brief Get exec time of an operation with input size
+ * or linearly interpolated value based on size if there is no record for given size
+ *
+ * @param[in] backend id of a backend
+ * @param[in] operation name of an operation
+ * @param[in] quant if input type quantized
+ * @param[in] op_size sum of operation's flattened sizes of inputs and outputs
+ * @return execution time for given input sizes
+ * -1 if there are no records for given parameters (backend, op, quantization).
+ */
+ int64_t getOperationExecTime(const backend::Backend *backend, const std::string &operation,
+ bool quant, uint32_t op_size) const;
+ /**
+ * @brief Update exec time of the operation on a backend with given input size or
+ * add new entity if there is no one.
+ *
+ * @param[in] backend id of a backend
+ * @param[in] operation name of an operation
+ * @param[in] quant if input type quantized
+ * @param[in] op_size sum of operation's flattened sizes of inputs and outputs
+ * @param[in] time real measured value
+ */
+ void updateOperationExecTime(const backend::Backend *backend, const std::string &operation,
+ bool quant, uint32_t op_size, int64_t time);
+ /**
+ * @brief Get the permute time from one backend to another
+ *
+ * @param[in] from_backend
+ * @param[in] to_backend
+ * @param[in] quant if input type quantized
+ * @param[in] op_size sum of operation's flattened sizes of inputs and outputs
+ * @return permutation time for operation size
+ */
+ int64_t getPermuteTime(const backend::Backend *from_backend, const backend::Backend *to_backend,
+ bool quant, uint32_t op_size) const;
+ /**
+ * @brief Update permute time from one backend to another
+ *
+ * @param[in] from_backend
+ * @param[in] to_backend
+ * @param[in] quant if input type quantized
+ * @param[in] time measured permutation time
+ * @param[in] op_size sum of operation's flattened sizes of inputs and outputs
+ */
+ void updatePermuteTime(const backend::Backend *from_backend, const backend::Backend *to_backend,
+ bool quant, uint32_t op_size, int64_t time);
+ /**
+ * @brief Get the max value of int32_t in int64_t
+ * @return max value
+ */
+ static int64_t getMax() { return _MAX; }
+ /**
+ * @brief Update metrics file with new data.
+ */
+ void uploadOperationsExecTime() const { _json.uploadOperationsExecTime(); }
+ static const int64_t NOT_FOUND = -1;
+
+private:
+ /// @brief Measurement data, which is shared with serializer
+ MeasurementData _measurements;
+ // int64_t::max may cause integer overflow
+ static const int64_t _MAX = std::numeric_limits<int32_t>::max();
+ /// @brief Serializer
+ JSON _json;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_EXEC_TIME_H__
diff --git a/runtime/onert/core/include/exec/Execution.h b/runtime/onert/core/include/exec/Execution.h
new file mode 100644
index 000000000..5ce5a8fee
--- /dev/null
+++ b/runtime/onert/core/include/exec/Execution.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Execution.h
+ * @brief This file defines execution
+ */
+#ifndef __ONERT_EXEC_EXECUTION_H__
+#define __ONERT_EXEC_EXECUTION_H__
+
+#include "ir/Layout.h"
+#include "exec/IExecutor.h"
+#include "IODescription.h"
+
+#include <thread>
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to define execution instance to collect input/output information for inference
+ * and prepare executor run (TODO)
+ */
+class Execution
+{
+
+public:
+ /**
+ * @brief Construct a new Execution object
+ * @param[in] executor Model executor
+ */
+ Execution(const std::shared_ptr<IExecutor> &executor);
+
+public:
+ /**
+ * @brief Returns graph object
+ * @return Graph object
+ */
+ const ir::Graph &graph() const { return _executor->graph(); }
+ /**
+ * @brief Set input data's information
+ * @param[in] index Input index
+ * @param[in] buffer Input data's buffer pointer
+ * @param[in] length Input data's length
+ * @param[in] layout Input data's data format
+ */
+ void setInput(const ir::IOIndex &index, const void *buffer, size_t length,
+ ir::Layout layout = ir::Layout::NHWC);
+ /**
+ * @brief Set input data's information, especially to specify unknown dimensions on model
+ * build time.
+ * @param[in] index Input index
+ * @param[in] type Input data's type info
+ * @param[in] shape Input data's shape
+ * @param[in] buffer Input data's buffer pointer
+ * @param[in] length Input data's length
+ * @param[in] layout Input data's data format
+ */
+ void setInput(const ir::IOIndex &index, const ir::TypeInfo &type, const ir::Shape &shape,
+ const void *buffer, size_t length, ir::Layout layout = ir::Layout::NHWC);
+ /**
+ * @brief Set output data's information
+ * @param[in] index Output index
+ * @param[in] buffer Output data's buffer pointer
+ * @param[in] length Output data's length
+ * @param[in] layout Output data's data format
+ */
+ void setOutput(const ir::IOIndex &index, void *buffer, size_t length,
+ ir::Layout layout = ir::Layout::NHWC);
+ /**
+ * @brief Set output data's information, especially to specify unknown dimensions on model
+ * build time.
+ * @param[in] index Output index
+ * @param[in] type Output data's type info
+ * @param[in] shape Output data's shape
+ * @param[in] buffer Output data's buffer pointer
+ * @param[in] length Output data's length
+ * @param[in] layout Output data's data format
+ */
+ void setOutput(const ir::IOIndex &index, const ir::TypeInfo &type, const ir::Shape &shape,
+ void *buffer, size_t length, ir::Layout layout = ir::Layout::NHWC);
+ /**
+ * @brief Set input data's data format
+ * @param[in] index Input index
+ * @param[in] layout Input data's data format
+ */
+ void setInputLayout(const ir::IOIndex &index, ir::Layout layout);
+ /**
+ * @brief Set output data's data format
+ * @param[in] index Output index
+ * @param[in] layout Output data's data format
+ */
+ void setOutputLayout(const ir::IOIndex &index, ir::Layout layout);
+ /**
+ * @brief Execution
+ * @note It should be called after setting input and output buffer
+ */
+ void execute();
+
+ /**
+ * @brief Start asynchronous execution
+ * @note It returns after execution thread is started
+ * It should be called after setting input and output buffer
+ */
+ void startExecute(void);
+
+ /**
+ * @brief Return when execution is finished
+ * @note It waits until execution is finished
+ */
+ void waitFinish(void);
+
+ /**
+ * @brief Check execution is finished
+ * @return @c true if execution is finished, otherwise @c false
+ */
+ bool isFinished(void) const;
+
+private:
+ const std::shared_ptr<IExecutor> _executor;
+ IODescription _io_desc;
+ std::unique_ptr<std::thread> _exec_thread;
+ bool finished{false};
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_EXECUTION_H__
diff --git a/runtime/onert/core/include/exec/ExecutionObservers.h b/runtime/onert/core/include/exec/ExecutionObservers.h
new file mode 100644
index 000000000..a993efee1
--- /dev/null
+++ b/runtime/onert/core/include/exec/ExecutionObservers.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_OBSREVERS_H__
+#define __ONERT_EXEC_OBSREVERS_H__
+
+#include "exec/IFunction.h"
+#include "ir/OpSequence.h"
+#include "ExecTime.h"
+#include "util/ITimer.h"
+#include "IExecutor.h"
+#include "misc/EventCollector.h"
+#include "misc/EventRecorder.h"
+
+namespace onert
+{
+namespace exec
+{
+class IExecutionObserver
+{
+public:
+ /// @brief Invoked just before model (not individual operation) execution begins
+ virtual void handleBegin(IExecutor *) { return; }
+
+ virtual void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0;
+ virtual void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0;
+
+ /// @brief Invoked just after model (not individual operation) execution ends
+ virtual void handleEnd(IExecutor *) { return; }
+
+ virtual ~IExecutionObserver() = default;
+};
+
+class ProfileObserver : public IExecutionObserver
+{
+public:
+ explicit ProfileObserver(std::shared_ptr<ExecTime> et) : _et(std::move(et)) {}
+ void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
+ void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
+
+ void handleEnd(IExecutor *) override { _et->uploadOperationsExecTime(); }
+
+private:
+ std::unique_ptr<util::ITimer> _timer;
+ std::shared_ptr<ExecTime> _et;
+};
+
+class ChromeTracingObserver : public IExecutionObserver
+{
+public:
+ ChromeTracingObserver(const std::string &filepath);
+ ~ChromeTracingObserver();
+ void handleBegin(IExecutor *) override;
+ void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
+ void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
+ void handleEnd(IExecutor *) override;
+
+private:
+ static std::string opSequenceTag(const ir::OpSequence *op_seq);
+
+private:
+ std::ofstream _ofs;
+ EventRecorder _recorder;
+ EventCollector _collector;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_OBSREVERS_H__
diff --git a/runtime/onert/core/include/exec/FunctionSequence.h b/runtime/onert/core/include/exec/FunctionSequence.h
new file mode 100644
index 000000000..e11e10043
--- /dev/null
+++ b/runtime/onert/core/include/exec/FunctionSequence.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_FUNCTION_SEQUENCE_H__
+#define __ONERT_EXEC_FUNCTION_SEQUENCE_H__
+
+#include <memory>
+#include <vector>
+#include <functional>
+
+#include "exec/IFunction.h"
+#include <memory>
+
+namespace onert
+{
+namespace exec
+{
+
+class FunctionSequence : public IFunction
+{
+public:
+ template <typename... Args> FunctionSequence(Args &&... args) { initialize(std::move(args)...); }
+
+private:
+ void initialize()
+ {
+ // Template base case : do nothing
+ }
+
+ template <typename T, typename... Args> void initialize(std::unique_ptr<T> &&fn, Args &&... args)
+ {
+ _functions.emplace_back(std::move(fn));
+ initialize(std::move(args)...);
+ }
+
+public:
+ virtual ~FunctionSequence() = default;
+
+ void run() override;
+ void runSync() override;
+ void prepare() override;
+
+ /**
+ * @brief Appends an IFunction object to the function sequence
+ *
+ * @param function IFunction object to be appended
+ */
+ void append(std::unique_ptr<IFunction> &&function);
+
+ void iterate(const std::function<void(IFunction &)> &fn);
+
+private:
+ std::vector<std::unique_ptr<IFunction>> _functions;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_FUNCTION_SEQUENCE_H__
diff --git a/runtime/onert/core/include/exec/IExecutor.h b/runtime/onert/core/include/exec/IExecutor.h
new file mode 100644
index 000000000..8ae492696
--- /dev/null
+++ b/runtime/onert/core/include/exec/IExecutor.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file IExecutor.h
+ * @brief This file defines interface of Executor
+ */
+#ifndef __ONERT_EXEC_I_EXECUTOR_H_
+#define __ONERT_EXEC_I_EXECUTOR_H_
+
+#include "ir/Graph.h"
+#include "IFunction.h"
+#include "IODescription.h"
+#include "ir/OperationIndexMap.h"
+
+namespace onert
+{
+namespace exec
+{
+class IExecutionObserver;
+/**
+ * @brief Struct to define interface of Executor
+ */
+struct IExecutor
+{
+ /**
+ * @brief Construct a new IExecutor object
+ */
+ IExecutor() = default;
+ /**
+ * @brief Destroy the IExecutor object
+ */
+ virtual ~IExecutor() = default;
+
+ /**
+ * @brief Returns graph object
+ *
+ * @return Graph object
+ */
+ virtual const ir::Graph &graph() = 0;
+
+ /**
+ * @brief Set an ordering on operations
+ * @param[in] ranks The table encoding the ordering
+ */
+ virtual void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) = 0;
+
+ /**
+ * @brief Start execution
+ * @param[in] desc Input and output description
+ * @note This method should be thread-safe
+ */
+ virtual void execute(const IODescription &desc) = 0;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_I_EXECUTOR_H_
diff --git a/runtime/onert/core/include/exec/IFunction.h b/runtime/onert/core/include/exec/IFunction.h
new file mode 100644
index 000000000..258f1e5b3
--- /dev/null
+++ b/runtime/onert/core/include/exec/IFunction.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_I_FUNCTION_H__
+#define __ONERT_EXEC_I_FUNCTION_H__
+
+namespace onert
+{
+namespace exec
+{
+
+class IFunction
+{
+public:
+ virtual ~IFunction() = default;
+ virtual void run() = 0;
+ virtual void runSync() = 0;
+ virtual void prepare() {}
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_I_FUNCTION_H__
diff --git a/runtime/onert/core/include/exec/IODescription.h b/runtime/onert/core/include/exec/IODescription.h
new file mode 100644
index 000000000..8bfddcde6
--- /dev/null
+++ b/runtime/onert/core/include/exec/IODescription.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_IO_DESCRIPTION_H__
+#define __ONERT_EXEC_IO_DESCRIPTION_H__
+
+#include <vector>
+
+#include "ir/OperandInfo.h"
+
+namespace onert
+{
+namespace exec
+{
+
+struct InputDesc
+{
+ const ir::OperandInfo info;
+ const void *buffer;
+ const size_t size;
+ const ir::Layout layout;
+
+ InputDesc(void) = delete;
+ InputDesc(const ir::OperandInfo &info, const void *buffer, const size_t size, ir::Layout layout)
+ : info(info), buffer(buffer), size(size), layout(layout)
+ {
+ }
+};
+
+struct OutputDesc
+{
+ const ir::OperandInfo info;
+ void *buffer;
+ const size_t size;
+ const ir::Layout layout;
+
+ OutputDesc(void) = delete;
+ OutputDesc(const ir::OperandInfo &info, void *buffer, const size_t size, ir::Layout layout)
+ : info(info), buffer(buffer), size(size), layout(layout)
+ {
+ }
+};
+
+struct IODescription
+{
+ std::vector<std::unique_ptr<InputDesc>> inputs;
+ std::vector<std::unique_ptr<OutputDesc>> outputs;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_IO_DESCRIPTION_H__
diff --git a/runtime/onert/core/include/exec/JSONExecTime.h b/runtime/onert/core/include/exec/JSONExecTime.h
new file mode 100644
index 000000000..a64cb3133
--- /dev/null
+++ b/runtime/onert/core/include/exec/JSONExecTime.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_JSON_EXEC_TIME_H__
+#define __ONERT_EXEC_JSON_EXEC_TIME_H__
+
+#include <fstream>
+#include <unordered_map>
+#include <map>
+#include <vector>
+#include "backend/Backend.h"
+#include "backend/IConfig.h"
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief table, that contains execution time of an operation on some backend for different input
+ * sizes and transfer time from one backend to another for various input sizes (permutation time)
+ *
+ * backend -> op -> quant-> size --> time
+ * _measurements[Backend*]["string"][bool][uint32_t] = int64_t
+ */
+using MeasurementData = std::unordered_map<
+ const backend::Backend *,
+ std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>;
+
+class JSON
+{
+public:
+ explicit JSON(const std::vector<const backend::Backend *> &backends,
+ MeasurementData &measurements)
+ : _measurement_file("exec_time.json"), _backends(), _measurements(measurements)
+ {
+ for (const auto b : backends)
+ {
+ _backends.emplace(b->config()->id(), b);
+ }
+ loadOperationsExecTime();
+ };
+ /**
+ * @brief Update _operations_exec_time_file with new data.
+ */
+ void uploadOperationsExecTime() const;
+
+private:
+ ///@brief file containing measurements
+ std::string _measurement_file;
+ std::unordered_map<std::string, const backend::Backend *> _backends;
+ std::unordered_map<
+ const backend::Backend *,
+ std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>
+ &_measurements;
+ /**
+ * @brief Helper function for inserting data to OperationExecTimes
+ *
+ * @param backend String name of backend
+ * @param operation String name of operation
+ * @param quant if input type quantized
+ * @param stream File stream
+ */
+ void readOperation(const std::string &backend, const std::string &operation, bool quant,
+ std::ifstream &stream);
+
+ /**
+ * @brief Helper function for writing OperationExecTimes to stream
+ *
+ * @param operation_info Map of operations execution information
+ * @param stream File stream
+ */
+ void printOperation(const std::map<uint32_t, int64_t> &operation_info,
+ std::ofstream &stream) const;
+ /**
+ * @brief Parse and load operations_exec_time from _operations_exec_time_file.
+ */
+ void loadOperationsExecTime();
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_JSON_EXEC_TIME_H__
diff --git a/runtime/onert/core/include/exec/NopFunction.h b/runtime/onert/core/include/exec/NopFunction.h
new file mode 100644
index 000000000..df9537cb5
--- /dev/null
+++ b/runtime/onert/core/include/exec/NopFunction.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file NopFunction.h
+ * @brief This file defines NopFunction
+ */
+#ifndef __ONERT_EXEC_NOP_FUNCTION_H_
+#define __ONERT_EXEC_NOP_FUNCTION_H_
+
+#include "IFunction.h"
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief A derivative of IFunction tha does nothing
+ *
+ */
+class NopFunction : public IFunction
+{
+public:
+ NopFunction() = default;
+ void run() override
+ {
+ // DO NOTHING
+ }
+ void runSync() override
+ {
+ // this abstract method is used just for profiling and called for
+ // backend::acl_common::AclFunction
+ run();
+ }
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_NOP_FUNCTION_H_
diff --git a/runtime/onert/core/include/interp/InterpExecutor.h b/runtime/onert/core/include/interp/InterpExecutor.h
new file mode 100644
index 000000000..2e3f3ca54
--- /dev/null
+++ b/runtime/onert/core/include/interp/InterpExecutor.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file InterpExecutor.h
+ * @brief This file contains InterpExecutor class\n
+ * to manage interpreter execution and environment
+ */
+#ifndef __ONERT_INTERP_INTERP_EXECUTOR_H__
+#define __ONERT_INTERP_INTERP_EXECUTOR_H__
+
+#include "ir/OperandIndexMap.h"
+#include "ir/Graph.h"
+#include "exec/IExecutor.h"
+
+namespace onert
+{
+namespace interp
+{
+
+class ITensor;
+
+/**
+ * @brief Class to execute model using interpreter
+ */
+class InterpExecutor final : public exec::IExecutor
+{
+public:
+ explicit InterpExecutor(const ir::Graph &graph) : _graph(graph)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Return graph object
+ * @return Graph object
+ */
+ const ir::Graph &graph() final { return _graph; }
+ void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) override{
+ // Not implemented
+ };
+ /**
+ * @brief Start execution
+ * @note It should be called after setting input and output buffer
+ */
+ void execute(const exec::IODescription &desc) final;
+
+private:
+ const ir::Graph &_graph;
+ ir::OperandIndexMap<std::shared_ptr<ITensor>> _tensor_map;
+};
+
+} // namespace interp
+} // namespace onert
+
+#endif // __ONERT_INTERP_INTERP_EXECUTOR_H__
diff --git a/runtime/onert/core/include/ir/BackendSet.h b/runtime/onert/core/include/ir/BackendSet.h
new file mode 100644
index 000000000..dc635bdaf
--- /dev/null
+++ b/runtime/onert/core/include/ir/BackendSet.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_BACKEND_SET_H__
+#define __ONERT_IR_BACKEND_SET_H__
+
+#include "util/Set.h"
+
+namespace onert
+{
+namespace backend
+{
+class Backend;
+} // namespace backend
+} // namespace onert
+
+namespace onert
+{
+namespace ir
+{
+
+using BackendSet = util::Set<const backend::Backend *>;
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_BACKEND_SET_H__
diff --git a/runtime/onert/core/include/ir/Coordinates.h b/runtime/onert/core/include/ir/Coordinates.h
new file mode 100644
index 000000000..6938fecef
--- /dev/null
+++ b/runtime/onert/core/include/ir/Coordinates.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_COORDINATES_H__
+#define __ONERT_IR_COORDINATES_H__
+
+#include <cassert>
+#include <stdint.h>
+#include <vector>
+
+#include "Layout.h"
+
+namespace onert
+{
+namespace ir
+{
+
+/**
+ * @brief Class to represent position(offset) of tensor.\n
+ * Assume that the front is higher dimensional.
+ * i.g. N: 0, C: 1, H: 2, W: 3 for NCHW layout
+ */
+class Coordinates final
+{
+public:
+ static constexpr size_t num_max_dimensions = 4;
+
+public:
+ /**
+ * @brief Construct a new Coordinates object with zero dimension
+ * @return N/A
+ */
+ Coordinates() = default;
+ /**
+ * @brief Construct a new Coordinates object
+ * @param[in] init The initialzer_list with coordinates
+ * @return
+ */
+ Coordinates(std::initializer_list<int32_t> init) : _coordinates{init}
+ {
+ assert(init.size() <= num_max_dimensions);
+ }
+
+public:
+ /**
+ * @brief Set the coordinate of one of the coordinates.
+ *
+ * @param[in] dimension Dimension for which the coordinate is set.
+ * @param[in] Coordinate Coordinate to be set for the dimension.
+ */
+ void set(size_t dimension, int32_t coordinate)
+ {
+ assert(dimension < num_max_dimensions);
+ if (dimension >= _coordinates.size())
+ {
+ _coordinates.resize(dimension + 1, 0);
+ }
+ _coordinates[dimension] = coordinate;
+ }
+
+public:
+ /**
+ * @brief Return size of coordinates
+ *
+ * @return size of coordinates
+ */
+ size_t size() const { return _coordinates.size(); }
+
+public:
+ int32_t operator[](size_t dimension) const
+ {
+ assert(dimension < _coordinates.size());
+ return _coordinates[dimension];
+ }
+
+public:
+ /**
+ * @brief begin() of const_iterator for this class
+ *
+ * @return The first iterator of the coordinates
+ */
+ std::vector<int32_t>::const_iterator begin() const { return _coordinates.begin(); }
+ /**
+ * @brief end() of const_iterator for this class
+ *
+ * @return The last iterator of the coordinates
+ */
+ std::vector<int32_t>::const_iterator end() const { return _coordinates.end(); }
+
+private:
+ std::vector<int32_t> _coordinates;
+};
+
+Coordinates convertCoordinates(const Coordinates &from_coordinates, Layout from_layout,
+ Layout to_layout);
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_COORDINATES_H__
diff --git a/runtime/onert/core/include/ir/Data.h b/runtime/onert/core/include/ir/Data.h
new file mode 100644
index 000000000..c85eb4847
--- /dev/null
+++ b/runtime/onert/core/include/ir/Data.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_DATA_H__
+#define __ONERT_IR_DATA_H__
+
+#include <algorithm>
+
+namespace onert
+{
+namespace ir
+{
+
+struct Data
+{
+ virtual ~Data() = default;
+
+ virtual size_t size(void) const = 0;
+ virtual const uint8_t *base(void) const = 0;
+};
+
+class CachedData final : public Data
+{
+public:
+ CachedData(const uint8_t *base, size_t size) : _base{new uint8_t[size]}, _size{size}
+ {
+ std::copy(base, base + size, _base);
+ }
+
+public:
+ ~CachedData() { delete[] _base; }
+
+public:
+ size_t size(void) const override { return _size; }
+ const uint8_t *base(void) const override { return _base; }
+
+private:
+ uint8_t *_base;
+ size_t _size;
+};
+
+class ExternalData final : public Data
+{
+public:
+ ExternalData(const uint8_t *base, size_t size) : _base{base}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ size_t size(void) const override { return _size; }
+ const uint8_t *base(void) const override { return _base; }
+
+private:
+ const uint8_t *_base;
+ const size_t _size;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_DATA_H__
diff --git a/runtime/onert/core/include/ir/DataType.h b/runtime/onert/core/include/ir/DataType.h
new file mode 100644
index 000000000..f706d1375
--- /dev/null
+++ b/runtime/onert/core/include/ir/DataType.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_DATATYPE_H__
+#define __ONERT_IR_DATATYPE_H__
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace ir
+{
+
+enum class DataType
+{
+ FLOAT32 = 0,
+ INT32 = 1,
+ UINT32 = 2,
+ QUANT8_ASYMM = 3,
+ BOOL8 = 4,
+ UINT8 = 5,
+ QUANT8_SYMM = 6,
+};
+
+inline size_t sizeOfDataType(DataType data_type)
+{
+ switch (data_type)
+ {
+ case DataType::FLOAT32:
+ return sizeof(float);
+ case DataType::INT32:
+ return sizeof(int32_t);
+ case DataType::UINT32:
+ return sizeof(uint32_t);
+ case DataType::BOOL8:
+ case DataType::QUANT8_ASYMM:
+ case DataType::UINT8:
+ return sizeof(uint8_t);
+ case DataType::QUANT8_SYMM:
+ return sizeof(int8_t);
+ default:
+ throw std::runtime_error{"Unsupported type size"};
+ }
+}
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_DATATYPE_H__
diff --git a/runtime/onert/core/include/ir/Graph.h b/runtime/onert/core/include/ir/Graph.h
new file mode 100644
index 000000000..a61d1861c
--- /dev/null
+++ b/runtime/onert/core/include/ir/Graph.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_GRAPH_H__
+#define __ONERT_IR_GRAPH_H__
+
+#include <functional>
+#include <unordered_map>
+
+#include "ir/Operands.h"
+#include "ir/Operations.h"
+#include "ir/OpSequence.h"
+#include "ir/OpSequences.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace custom
+{
+class IKernelBuilder;
+} // namespace custom
+} // namespace backend
+} // namespace onert
+
+namespace onert
+{
+namespace ir
+{
+
+class Graph
+{
+private:
+ enum class Phase
+ {
+ BUILDING,
+ MODEL
+ };
+
+public:
+ Graph(void);
+ ~Graph(void);
+
+ // Graph Building
+public:
+ OperandIndex addOperand(const Shape &shape, const TypeInfo &type);
+ OperationIndex addOperation(std::unique_ptr<Operation> &&node);
+ void setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data);
+ void addInput(const OperandIndex &ind);
+ void addOutput(const OperandIndex &ind);
+ void finishBuilding(void);
+ void removeOperand(const OperandIndex &ind) { _operands.remove(ind); }
+ bool isBuildingPhase(void) const { return _phase == Phase::BUILDING; }
+ void setLayout(Layout layout) { _layout = layout; }
+
+private:
+ void initializeUseDef();
+
+ // Custom operations support
+public:
+ void
+ bindKernelBuilder(const std::shared_ptr<onert::backend::custom::IKernelBuilder> &kernel_builder)
+ {
+ _kernel_builder = kernel_builder;
+ }
+
+ const std::shared_ptr<backend::custom::IKernelBuilder> &getKernelBuilder() const
+ {
+ return _kernel_builder;
+ }
+
+private:
+ std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
+
+ // Accessors
+public:
+ const OperandIndexSequence &getInputs() const { return _inputs; }
+ OperandIndexSequence &getInputs() { return _inputs; }
+ const OperandIndexSequence &getOutputs() const { return _outputs; }
+ OperandIndexSequence &getOutputs() { return _outputs; }
+ const Operands &operands() const { return _operands; }
+ Operands &operands() { return _operands; } // TODO Remove this non-const accessor
+ const Operations &operations() const { return _operations; }
+ Operations &operations() { return _operations; }
+ Layout layout() { return _layout; }
+
+private:
+ Phase _phase{Phase::BUILDING};
+ Operations _operations;
+ Operands _operands;
+ OperandIndexSequence _inputs;
+ OperandIndexSequence _outputs;
+ std::unordered_map<SubgraphIndex, std::shared_ptr<Graph>> _subgraphs;
+ // TFLite and circle's default layout is NHWC;
+ Layout _layout{Layout::NHWC};
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_GRAPH_H__
diff --git a/runtime/onert/core/include/ir/Index.h b/runtime/onert/core/include/ir/Index.h
new file mode 100644
index 000000000..4f04546bc
--- /dev/null
+++ b/runtime/onert/core/include/ir/Index.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERAND_INDEX_H__
+#define __ONERT_IR_OPERAND_INDEX_H__
+
+#include "util/Index.h"
+
+namespace onert
+{
+namespace ir
+{
+
+struct OperationIndexTag;
+using OperationIndex = ::onert::util::Index<uint32_t, OperationIndexTag>;
+
+struct OperandIndexTag;
+using OperandIndex = ::onert::util::Index<uint32_t, OperandIndexTag>;
+
+struct IOIndexTag;
+using IOIndex = ::onert::util::Index<uint32_t, IOIndexTag>;
+
+struct OpSequenceIndexTag;
+using OpSequenceIndex = ::onert::util::Index<uint32_t, OpSequenceIndexTag>;
+
+struct SubgraphIndexTag;
+using SubgraphIndex = ::onert::util::Index<uint32_t, SubgraphIndexTag>;
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERAND_INDEX_H__
diff --git a/runtime/onert/core/include/ir/InternalType.h b/runtime/onert/core/include/ir/InternalType.h
new file mode 100644
index 000000000..e42db72cf
--- /dev/null
+++ b/runtime/onert/core/include/ir/InternalType.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_INTERNAL_TYPE_H__
+#define __ONERT_IR_INTERNAL_TYPE_H__
+
+#include <cstdint>
+
+namespace onert
+{
+namespace ir
+{
+
+enum class Activation
+{
+ NONE = 0,
+ RELU = 1,
+ RELU1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGMOID = 5
+};
+
+struct Stride
+{
+ uint32_t vertical;
+ uint32_t horizontal;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_INTERNAL_TYPE_H__
diff --git a/runtime/onert/core/include/ir/Layout.h b/runtime/onert/core/include/ir/Layout.h
new file mode 100644
index 000000000..082810172
--- /dev/null
+++ b/runtime/onert/core/include/ir/Layout.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_LAYOUT_H__
+#define __ONERT_IR_LAYOUT_H__
+
+#include <functional>
+#include <string>
+
+namespace onert
+{
+namespace ir
+{
+
+enum class Layout
+{
+ UNKNOWN = 0,
+ NHWC,
+ NCHW
+};
+
+inline std::string to_string(Layout layout)
+{
+ switch (layout)
+ {
+ case Layout::NHWC:
+ return std::string{"NHWC"};
+ case Layout::NCHW:
+ return std::string{"NCHW"};
+ case Layout::UNKNOWN:
+ return std::string{"UNKNOWN"};
+ default:
+ throw std::runtime_error("WRONG LAYOUT");
+ }
+}
+
+} // namespace ir
+} // namespace onert
+
+namespace std
+{
+
+template <> struct hash<onert::ir::Layout>
+{
+ size_t operator()(onert::ir::Layout value) const noexcept
+ {
+ using type = typename std::underlying_type<onert::ir::Layout>::type;
+ return hash<type>()(static_cast<type>(value));
+ }
+};
+
+} // namespace std
+
+#endif // __ONERT_IR_LAYOUT_H__
diff --git a/runtime/onert/core/include/ir/LowerInfoMap.h b/runtime/onert/core/include/ir/LowerInfoMap.h
new file mode 100644
index 000000000..b42417092
--- /dev/null
+++ b/runtime/onert/core/include/ir/LowerInfoMap.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_LOWER_INFO_MAP_H__
+#define __ONERT_IR_LOWER_INFO_MAP_H__
+
+#include <memory>
+#include <unordered_map>
+
+#include "ir/operand/LowerInfo.h"
+#include "ir/operation/LowerInfo.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/Index.h"
+
+namespace onert
+{
+namespace ir
+{
+
+struct LowerInfoMap
+{
+ std::unordered_map<OpSequenceIndex, std::unique_ptr<operation::LowerInfo>> operation;
+ OperandIndexMap<std::unique_ptr<operand::LowerInfo>> operand;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_LOWER_INFO_MAP_H__
diff --git a/runtime/onert/core/include/ir/LoweredGraph.h b/runtime/onert/core/include/ir/LoweredGraph.h
new file mode 100644
index 000000000..41d9d962e
--- /dev/null
+++ b/runtime/onert/core/include/ir/LoweredGraph.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_LOWERED_GRAPH_H__
+#define __ONERT_IR_LOWERED_GRAPH_H__
+
+#include "ir/Graph.h"
+#include "ir/LowerInfoMap.h"
+#include "ir/OpSequences.h"
+#include "compiler/BackendResolver.h"
+#include "compiler/Compiler.h"
+
+namespace onert
+{
+namespace ir
+{
+
+class LoweredGraph
+{
+public:
+ LoweredGraph(const Graph &graph, const compiler::CompilerOptions &options);
+
+ Graph &graph() { return _graph; }
+ const Graph &graph() const { return _graph; }
+ const LowerInfoMap *getLowerInfo() const { return &_lower_info_map; }
+ const operation::LowerInfo *getLowerInfo(const OpSequenceIndex &op_seq_index) const;
+ void setLowerInfo(const OpSequenceIndex &op_seq_index,
+ std::unique_ptr<operation::LowerInfo> &&lower_info);
+ void removeLowerInfo(const OpSequenceIndex &op_seq_index);
+ const operand::LowerInfo *getLowerInfo(const OperandIndex &index) const;
+ operand::LowerInfo *getLowerInfo(const OperandIndex &index);
+ void setLowerInfo(const OperandIndex &index, std::unique_ptr<operand::LowerInfo> &&lower_info);
+ void removeLowerInfo(const OperandIndex &index);
+ OpSequences &op_seqs() { return _op_seqs; }
+ const OpSequences &op_seqs() const { return _op_seqs; }
+ const backend::BackendContexts &backend_contexts() { return _backend_contexts; }
+ const backend::BackendContexts &backend_contexts() const { return _backend_contexts; }
+ std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; }
+
+private:
+ void makeOpSequences(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info,
+ const compiler::CompilerOptions &options);
+
+ void
+ manipulateLowerInfo(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info);
+ void dumpLowerInfo();
+ bool mergeable(const OpSequenceIndex &op_seq_index, const OperationIndex &node_index,
+ Layout layout);
+ OpSequenceIndex appendFreshSingleOpSequence(const OperationIndex &node_index,
+ const Operation &node);
+
+private:
+ Graph _graph;
+ backend::BackendContexts _backend_contexts;
+ std::unique_ptr<compiler::BackendResolver> _backend_resolver; // TODO Remove this
+ std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
+ LowerInfoMap _lower_info_map;
+ // Pass(for Perm) can accept only graph so that Graph has OpSequences as a member
+ OpSequences _op_seqs;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_LOWERED_GRAPH_H__
diff --git a/runtime/onert/core/include/ir/OpCode.h b/runtime/onert/core/include/ir/OpCode.h
new file mode 100644
index 000000000..f22b36349
--- /dev/null
+++ b/runtime/onert/core/include/ir/OpCode.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OP_CODE_H__
+#define __ONERT_IR_OP_CODE_H__
+
+#include <functional>
+#include <stdint.h>
+
+namespace onert
+{
+namespace ir
+{
+
+enum class OpCode
+{
+ Invalid, //< Unused
+#define OP(Name) Name, //< All operations
+#include "ir/Operations.lst"
+#undef OP
+ COUNT
+};
+
+const char *toString(OpCode opcode);
+
+} // namespace ir
+} // namespace onert
+
+namespace std
+{
+
+template <> struct hash<onert::ir::OpCode>
+{
+ size_t operator()(onert::ir::OpCode value) const noexcept
+ {
+ using type = typename std::underlying_type<onert::ir::OpCode>::type;
+ return hash<type>()(static_cast<type>(value));
+ }
+};
+
+} // namespace std
+
+#endif // __ONERT_IR_OP_CODE_H__
diff --git a/runtime/onert/core/include/ir/OpSequence.h b/runtime/onert/core/include/ir/OpSequence.h
new file mode 100644
index 000000000..1ca231384
--- /dev/null
+++ b/runtime/onert/core/include/ir/OpSequence.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OP_SEQUENCE_H__
+#define __ONERT_IR_OP_SEQUENCE_H__
+
+#include <vector>
+#include <string>
+#include <memory>
+
+#include "ir/Layout.h"
+#include "ir/Index.h"
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+
+// To support ValueSwappable, Element doesn't have members which are classes
+// as value(or can have members which are classes as value and the classes
+// support Swappable)
+struct Element
+{
+ OperationIndex index;
+ const Operation *node;
+
+ Element(const OperationIndex *i, const Operation *n) : index{*i}, node{n}
+ {
+ // DO NOTHING
+ }
+};
+
+class OpSequence
+{
+public:
+ explicit OpSequence(Layout layout);
+ OpSequence(const OpSequence &) = delete;
+
+public:
+ void accept(OperationVisitor &v) const;
+
+public:
+ const OperandIndexSequence &getInputs() const { return _inputs; }
+ const OperandIndexSequence &getOutputs() const { return _outputs; }
+ void setInputs(const OperandIndexSequence &indexes) { _inputs = indexes; }
+ void setOutputs(const OperandIndexSequence &indexes) { _outputs = indexes; }
+ void replaceInput(const OperandIndex &from, const OperandIndex &to) { _inputs.replace(from, to); }
+ void replaceOutput(const OperandIndex &from, const OperandIndex &to)
+ {
+ _outputs.replace(from, to);
+ }
+
+ void appendOperation(const OperationIndex &index, const Operation &node)
+ {
+ _operations.emplace_back(&index, &node);
+ }
+
+ std::vector<Element> &operations(void) { return _operations; }
+
+ const std::vector<Element> &operations(void) const { return _operations; }
+
+ uint32_t size(void) const { return _operations.size(); }
+
+ // TODO: Impl Dumper instead of this method
+ std::string getStr(void) const;
+
+public:
+ void remove(const OperationIndex &index);
+
+public:
+ Layout getLayout() const { return _layout; }
+
+public:
+ std::vector<Element>::const_iterator begin() const { return _operations.begin(); }
+ std::vector<Element>::const_iterator end() const { return _operations.end(); }
+
+private:
+ bool exist(const OperationIndex &index) const;
+
+private:
+ OperandIndexSequence _inputs;
+ OperandIndexSequence _outputs;
+ std::vector<Element> _operations;
+
+private:
+ Layout _layout;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OP_SEQUENCE_H__
diff --git a/runtime/onert/core/include/ir/OpSequences.h b/runtime/onert/core/include/ir/OpSequences.h
new file mode 100644
index 000000000..2af9579b7
--- /dev/null
+++ b/runtime/onert/core/include/ir/OpSequences.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OP_SEQUENCES_H__
+#define __ONERT_IR_OP_SEQUENCES_H__
+
+#include "ir/Index.h"
+#include "ir/OpSequence.h"
+#include "util/ObjectManager.h"
+
+namespace onert
+{
+namespace ir
+{
+
+/**
+ * @brief Class that manages OpSequence objects
+ */
+class OpSequences : public util::ObjectManager<OpSequenceIndex, OpSequence>
+{
+public:
+ /**
+ * @brief Create an instance of OpSequence with given op and push it to objects
+ *
+ * @param[in] op_idx Operation index that is emplaced
+ * @param[in] op Operation that is emplaced
+ * @param[in] layout OpSequence's layout
+ * @return OpSequenceIndex
+ */
+ OpSequenceIndex emplace(const OperationIndex &op_index, const Operation &op, Layout layout);
+
+ /**
+ * @brief Push an instance of OpSequence to objects
+ *
+ * @param[in] op_seq An instance of OpSequence
+ * @return OpSequenceIndex
+ */
+ OpSequenceIndex emplace(std::unique_ptr<OpSequence> &&op_seq);
+
+ /**
+ * @brief Check if an operation does exist in any OpSequences
+ *
+ * @param operation_index Operation index to find
+ * @return true If such operation exists in any OpSequences otherwise false
+ */
+ bool containsOperation(const OperationIndex &operation_index) const;
+ /**
+ * @brief Find an operation from all OpSequences
+ *
+ * @param operation_index Operation index to find
+ * @return OpSequenceIndex Index of OpSequence that contains given operation index
+ */
+ OpSequenceIndex getOperation(const OperationIndex &operation_index) const;
+ /**
+ * @brief Dump OpSequences
+ *
+ * @param msg Message that will be displayed
+ */
+ void dump(const std::string &msg) const;
+ /**
+ * @brief Remove an operation from OpSequence
+ *
+ * @param operation_index Operation index to be removed
+ */
+ void removeFromOpSequence(const OperationIndex &operation_index);
+
+private:
+ OpSequenceIndex findOperation(const OperationIndex &operation_index) const;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OP_SEQUENCES_H__
diff --git a/runtime/onert/core/include/ir/Operand.h b/runtime/onert/core/include/ir/Operand.h
new file mode 100644
index 000000000..3b6de9d15
--- /dev/null
+++ b/runtime/onert/core/include/ir/Operand.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERAND_H__
+#define __ONERT_IR_OPERAND_H__
+
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <algorithm>
+
+#include "ir/Data.h"
+#include "ir/DataType.h"
+#include "ir/OperandInfo.h"
+#include "ir/OperationIndexList.h"
+
+namespace onert
+{
+namespace ir
+{
+
+class Operand
+{
+public:
+ explicit Operand(const Shape &shape, const TypeInfo &type) : _info{shape, type}, _const{false}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const Shape &shape(void) const { return _info.shape(); }
+ const TypeInfo &typeInfo(void) const { return _info.typeInfo(); }
+ const OperandInfo &info(void) const { return _info; }
+ OperandInfo &info(void) { return _info; }
+ size_t operandSize(void) const;
+
+ const OperationIndexList &getUses() const { return _uses; }
+ const OperationIndexList &getDef() const { return _def; }
+ void appendUse(const OperationIndex &idx);
+ void removeUse(const OperationIndex &idx);
+ void appendDef(const OperationIndex &idx);
+ void removeDef(const OperationIndex &idx);
+
+public:
+ void type(const DataType type) { _info.type(type); };
+
+public:
+ void data(std::shared_ptr<Data> &&data)
+ {
+ _data = std::move(data);
+ _const = true;
+ }
+ const Data *data(void) const { return _data.get(); }
+
+ void releaseData(void) { _data.reset(); }
+
+ /**
+ * @brief Get true if Operand is const, otherwise @c false
+ a @return @c true if Operand is const, otherwise @c false
+ */
+ bool isConstant(void) const { return _const; }
+
+public:
+ template <typename T, typename... Args> void data(Args &&... args)
+ {
+ data(std::make_unique<T>(std::forward<Args>(args)...));
+ }
+
+public:
+ template <typename T> T asScalar(void) const
+ {
+ assert((shape().rank() == 0) || ((shape().rank() == 1) && (shape().dim(0) == 1)));
+ assert(_data != nullptr);
+ assert((_data->base() != nullptr) && (_data->size() == sizeof(T)));
+
+ return *(reinterpret_cast<const T *>(_data->base()));
+ }
+
+ template <typename T> std::vector<T> asVector() const
+ {
+ assert(_data != nullptr);
+ assert(_data->size() % sizeof(T) == 0);
+
+ const auto *base = reinterpret_cast<const T *>(_data->base());
+ const std::size_t size = _data->size() / sizeof(T);
+ return std::vector<T>(base, base + size);
+ }
+
+private:
+ OperandInfo _info;
+ std::shared_ptr<Data> _data;
+ bool _const;
+
+ OperationIndexList _uses;
+ OperationIndexList _def; // size is 0 (constant) or 1 (from def operation)
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERAND_H__
diff --git a/runtime/onert/core/include/ir/OperandConstraint.h b/runtime/onert/core/include/ir/OperandConstraint.h
new file mode 100644
index 000000000..8da922bea
--- /dev/null
+++ b/runtime/onert/core/include/ir/OperandConstraint.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_MODEL_OPERAND_CONSTRAINT_H__
+#define __ONERT_MODEL_OPERAND_CONSTRAINT_H__
+
+#include <stdint.h>
+#include <limits>
+#include <set>
+
+namespace onert
+{
+namespace ir
+{
+
+class OperandConstraint
+{
+private:
+ static const uint32_t INF = std::numeric_limits<uint32_t>::max();
+
+public:
+ static OperandConstraint createAny() { return OperandConstraint{0u, INF}; }
+ static OperandConstraint createExact(uint32_t exact) { return OperandConstraint{exact, exact}; }
+ static OperandConstraint createAtMost(uint32_t end) { return OperandConstraint{0u, end}; }
+ static OperandConstraint createAtLeast(uint32_t begin) { return OperandConstraint{begin, INF}; }
+ static OperandConstraint createInRange(uint32_t begin, uint32_t end)
+ {
+ return OperandConstraint{begin, end};
+ }
+
+private:
+ OperandConstraint(uint32_t begin, uint32_t end) : _begin{begin}, _end{end} {}
+
+public:
+ bool check(uint32_t ind) const { return _begin <= ind && ind <= _end; }
+
+private:
+ uint32_t _begin;
+ uint32_t _end;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_MODEL_OPERAND_CONSTRAINT_H__
diff --git a/runtime/onert/core/include/ir/OperandIndexMap.h b/runtime/onert/core/include/ir/OperandIndexMap.h
new file mode 100644
index 000000000..468162ffb
--- /dev/null
+++ b/runtime/onert/core/include/ir/OperandIndexMap.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERAND_INDEX_MAP_H__
+#define __ONERT_IR_OPERAND_INDEX_MAP_H__
+
+#include <unordered_map>
+
+#include "ir/Index.h"
+
+namespace onert
+{
+namespace ir
+{
+
+template <typename T> using OperandIndexMap = std::unordered_map<OperandIndex, T>;
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERAND_INDEX_MAP_H__
diff --git a/runtime/onert/core/include/ir/OperandIndexSequence.h b/runtime/onert/core/include/ir/OperandIndexSequence.h
new file mode 100644
index 000000000..1f5ab3d0a
--- /dev/null
+++ b/runtime/onert/core/include/ir/OperandIndexSequence.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_MODEL_OPERAND_INDEX_SEQUENCE_H__
+#define __ONERT_MODEL_OPERAND_INDEX_SEQUENCE_H__
+
+#include <initializer_list>
+#include <vector>
+
+#include "ir/Index.h"
+
+namespace onert
+{
+namespace ir
+{
+
+class OperandIndexSequence
+{
+public:
+ OperandIndexSequence(void) = default;
+ OperandIndexSequence(std::initializer_list<OperandIndex> list);
+ OperandIndexSequence(std::initializer_list<int32_t> list);
+ OperandIndexSequence(std::initializer_list<uint32_t> list);
+
+public:
+ void append(const OperandIndex &index) { _set.emplace_back(index); }
+ void append(const OperandIndexSequence &l) { _set.insert(_set.end(), l.begin(), l.end()); }
+
+public:
+ uint32_t size() const { return static_cast<uint32_t>(_set.size()); }
+ const OperandIndex &at(IOIndex set_index) const { return _set.at(set_index.value()); }
+ const OperandIndex &at(uint32_t index) const { return _set.at(index); }
+ bool contains(const OperandIndex &index) const;
+ void replace(const OperandIndex &from, const OperandIndex &to);
+
+public:
+ OperandIndexSequence operator+(const OperandIndexSequence &other) const;
+
+public:
+ std::vector<OperandIndex>::const_iterator begin(void) const { return _set.begin(); }
+ std::vector<OperandIndex>::const_iterator end(void) const { return _set.end(); }
+
+private:
+ std::vector<OperandIndex> _set;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_MODEL_OPERAND_INDEX_SET_H__
diff --git a/runtime/onert/core/include/ir/OperandInfo.h b/runtime/onert/core/include/ir/OperandInfo.h
new file mode 100644
index 000000000..6d66f1e12
--- /dev/null
+++ b/runtime/onert/core/include/ir/OperandInfo.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file OperandInfo.h
+ * @brief This file contains OperandInfo class
+ */
+#ifndef __ONERT_IR_OPERAND_INFO_H__
+#define __ONERT_IR_OPERAND_INFO_H__
+
+#include "ir/Shape.h"
+#include "ir/TypeInfo.h"
+#include "ir/Layout.h"
+
+namespace onert
+{
+namespace ir
+{
+
+/**
+ * @brief enum class indicating when the memory for a tensor is allocated
+ */
+enum class MemAllocType
+{
+ /**
+ * @brief At compile time, shape for a tensor is known, thus requried memory capacity can be
+ * calculated
+ */
+ STATIC,
+
+ /**
+ * @brief At kernel execution time, shape for a tensor is known, thus requried memory capacity
+ * can be calculated
+ */
+ DYNAMIC
+};
+
+/**
+ * @brief Class to save tensor's shape and type
+ */
+class OperandInfo
+{
+public:
+ /**
+ * @brief Construct a new OperandInfo object (deleted)
+ */
+ OperandInfo() = delete;
+ /**
+ * @brief Construct a new OperandInfo object
+ * @param[in] shape Tensor shape
+ * @param[in] typeInfo Tensor data type
+ *
+ * @todo Deprecated this constructor because setting member var implicitly can cause bug later.
+ * Please use the third constructor. (This constor needs for now not to break previous code)
+ */
+ OperandInfo(const Shape &shape, const TypeInfo &typeInfo)
+ : _shape(shape), _typeInfo(typeInfo), _alloc_type(MemAllocType::STATIC)
+ {
+ // DO NOTHING
+ }
+ /**
+ * @brief Construct a new OperandInfo object
+ * @param[in] shape Tensor shape
+ * @param[in] typeInfo Tensor data type
+ * @param[in] alloc_type When the thesor needs memory allocation
+ */
+ OperandInfo(const Shape &shape, const TypeInfo &typeInfo, MemAllocType alloc_type)
+ : _shape(shape), _typeInfo(typeInfo), _alloc_type(alloc_type)
+ {
+ // DO NOTHING
+ }
+ /**
+ * @brief Construct a new OperandInfo object
+ * @param[in] origin info for copy
+ */
+ OperandInfo(const OperandInfo &origin) = default;
+
+public:
+ /**
+ * @brief Return tensor shape
+ * @return Tensor shape
+ */
+ const Shape &shape() const { return _shape; }
+ /**
+ * @brief set shape
+ */
+ void shape(const ir::Shape &new_shape) { _shape = new_shape; }
+ /**
+ * @brief Return tensor data type info
+ * @return Tensor data type
+ */
+ const TypeInfo &typeInfo() const { return _typeInfo; }
+ /**
+ * @brief Set tensor data type
+ */
+ void type(const DataType type) { _typeInfo.type(type); }
+ /**
+ * @brief Return size of tensor (bytes)
+ * @return Tensor size
+ */
+ size_t total_size() const { return _shape.num_elements() * sizeOfDataType(_typeInfo.type()); }
+
+ MemAllocType memAllocType() const { return _alloc_type; }
+ void memAllocType(MemAllocType alloc_type) { _alloc_type = alloc_type; }
+
+private:
+ Shape _shape;
+ TypeInfo _typeInfo;
+
+ MemAllocType _alloc_type;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERAND_INFO_H__
diff --git a/runtime/onert/core/include/ir/Operands.h b/runtime/onert/core/include/ir/Operands.h
new file mode 100644
index 000000000..be7b7061f
--- /dev/null
+++ b/runtime/onert/core/include/ir/Operands.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERANDS_H__
+#define __ONERT_IR_OPERANDS_H__
+
+#include <memory>
+#include <unordered_map>
+
+#include "ir/Operand.h"
+#include "ir/Index.h"
+#include "util/ObjectManager.h"
+
+namespace onert
+{
+namespace ir
+{
+
+class Operands : public util::ObjectManager<OperandIndex, Operand>
+{
+public:
+ Operands() = default;
+ Operands(const Operands &obj);
+ Operands(Operands &&) = default;
+ Operands &operator=(const Operands &) = delete;
+ Operands &operator=(Operands &&) = default;
+ ~Operands() = default;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_MODEL_OPERAND_SET_H__
diff --git a/runtime/onert/core/include/ir/Operation.h b/runtime/onert/core/include/ir/Operation.h
new file mode 100644
index 000000000..fb3472923
--- /dev/null
+++ b/runtime/onert/core/include/ir/Operation.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_H__
+#define __ONERT_IR_OPERATION_H__
+
+#include <memory>
+
+#include "ir/OpCode.h"
+#include "ir/Operand.h"
+#include "ir/OperandIndexSequence.h"
+#include "ir/OperandConstraint.h"
+
+namespace onert
+{
+namespace ir
+{
+
+struct OperationVisitor;
+
+class Operation
+{
+public:
+ Operation(OperandConstraint input_constr, const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs);
+ explicit Operation(OperandConstraint input_constr);
+
+ Operation(const Operation &) = default;
+ Operation(Operation &&) = default;
+ Operation &operator=(const Operation &) = default;
+ Operation &operator=(Operation &&) = default;
+
+ virtual ~Operation();
+
+public:
+ virtual void accept(OperationVisitor &v) const = 0;
+ virtual std::string name() const { return std::string{toString(opcode())}; }
+ virtual OpCode opcode() const = 0;
+
+public:
+ void replaceInput(const OperandIndex &from, const OperandIndex &to);
+ void replaceOutput(const OperandIndex &from, const OperandIndex &to);
+ const OperandIndexSequence &getInputs() const { return _inputs; }
+ const OperandIndexSequence &getOutputs() const { return _outputs; }
+ // It's for only input/output tensors but const data.
+ void setInputs(const OperandIndexSequence &indexes);
+ void setOutputs(const OperandIndexSequence &indexes);
+
+private:
+ OperandConstraint _input_constr;
+ OperandIndexSequence _inputs;
+ OperandIndexSequence _outputs;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_H__
diff --git a/runtime/onert/core/include/ir/OperationIndexList.h b/runtime/onert/core/include/ir/OperationIndexList.h
new file mode 100644
index 000000000..7863ff29a
--- /dev/null
+++ b/runtime/onert/core/include/ir/OperationIndexList.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_MODEL_OPERATION_INDEX_LIST_H__
+#define __ONERT_MODEL_OPERATION_INDEX_LIST_H__
+
+#include <algorithm>
+#include <cassert>
+#include <initializer_list>
+#include <list>
+
+#include "ir/Index.h"
+
+namespace onert
+{
+namespace ir
+{
+
+class OperationIndexList
+{
+public:
+ OperationIndexList(void) = default;
+ OperationIndexList(std::initializer_list<OperationIndex> list);
+
+public:
+ void append(const OperationIndex &index) { _list.push_back(index); }
+ void remove(const OperationIndex &index)
+ {
+ auto itr = std::find(_list.begin(), _list.end(), index);
+ assert(itr != _list.end());
+ _list.erase(itr);
+ }
+
+public:
+ uint32_t size() const { return static_cast<uint32_t>(_list.size()); }
+ const std::list<OperationIndex> &list() const { return _list; }
+ bool contains(const OperationIndex &index) const;
+
+private:
+ std::list<OperationIndex> _list;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_MODEL_OPERATION_INDEX_LIST_H__
diff --git a/runtime/onert/core/include/ir/OperationIndexMap.h b/runtime/onert/core/include/ir/OperationIndexMap.h
new file mode 100644
index 000000000..50c21c0ab
--- /dev/null
+++ b/runtime/onert/core/include/ir/OperationIndexMap.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_INDEX_MAP_H__
+#define __ONERT_IR_OPERATION_INDEX_MAP_H__
+
+#include <unordered_map>
+
+#include "ir/Index.h"
+
+namespace onert
+{
+namespace ir
+{
+
+template <typename T> using OperationIndexMap = std::unordered_map<OperationIndex, T>;
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_INDEX_MAP_H__
diff --git a/runtime/onert/core/include/ir/OperationVisitor.h b/runtime/onert/core/include/ir/OperationVisitor.h
new file mode 100644
index 000000000..b7e66b935
--- /dev/null
+++ b/runtime/onert/core/include/ir/OperationVisitor.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_VISITOR_H__
+#define __ONERT_IR_OPERATION_VISITOR_H__
+
+#include "ir/Operations.Include.h"
+#include "ir/OpSequence.h"
+
+namespace onert
+{
+namespace ir
+{
+
+struct OperationVisitor
+{
+ virtual ~OperationVisitor() = default;
+
+#define OP(InternalName) \
+ virtual void visit(const operation::InternalName &) {}
+#include "ir/Operations.lst"
+#undef OP
+
+ // This OpSequence node should be handled specially so that
+ // Op.lst doesn't have OpSequence
+ // TODO Remove by pushing it down to derived classes.
+ virtual void visit(const OpSequence &op_seq)
+ {
+ for (const auto &e : op_seq.operations())
+ {
+ e.node->accept(*this);
+ }
+ }
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_VISITOR_H__
diff --git a/runtime/onert/core/include/ir/Operations.Include.h b/runtime/onert/core/include/ir/Operations.Include.h
new file mode 100644
index 000000000..6f14ad9f9
--- /dev/null
+++ b/runtime/onert/core/include/ir/Operations.Include.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file has no ifdef guard intentionally
+
+#include "ir/operation/BatchToSpaceND.h"
+#include "ir/operation/Conv2D.h"
+#include "ir/operation/MaxPool2D.h"
+#include "ir/operation/AvgPool2D.h"
+#include "ir/operation/Concat.h"
+#include "ir/operation/Reshape.h"
+#include "ir/operation/FullyConnected.h"
+#include "ir/operation/Softmax.h"
+#include "ir/operation/Transpose.h"
+#include "ir/operation/Permute.h"
+#include "ir/operation/ReduceSum.h"
+#include "ir/operation/Add.h"
+#include "ir/operation/Sub.h"
+#include "ir/operation/DepthwiseConv2D.h"
+#include "ir/operation/Slice.h"
+#include "ir/operation/StridedSlice.h"
+#include "ir/operation/Mul.h"
+#include "ir/operation/Squeeze.h"
+#include "ir/operation/Tanh.h"
+#include "ir/operation/Logistic.h"
+#include "ir/operation/Cast.h"
+#include "ir/operation/Div.h"
+#include "ir/operation/Exp.h"
+#include "ir/operation/ReduceMax.h"
+#include "ir/operation/Comparison.h"
+#include "ir/operation/LogicalAnd.h"
+#include "ir/operation/LogicalOr.h"
+#include "ir/operation/LogicalNot.h"
+#include "ir/operation/LSTM.h"
+#include "ir/operation/RSQRT.h"
+#include "ir/operation/ReLU.h"
+#include "ir/operation/ResizeBilinear.h"
+#include "ir/operation/ReLU1.h"
+#include "ir/operation/ReLU6.h"
+#include "ir/operation/RNN.h"
+#include "ir/operation/Floor.h"
+#include "ir/operation/SpaceToBatchND.h"
+#include "ir/operation/SpaceToDepth.h"
+#include "ir/operation/L2Pool2D.h"
+#include "ir/operation/EmbeddingLookup.h"
+#include "ir/operation/L2Normalization.h"
+#include "ir/operation/HashtableLookup.h"
+#include "ir/operation/InstanceNorm.h"
+#include "ir/operation/PReLU.h"
+#include "ir/operation/TransposeConv.h"
+#include "ir/operation/SQRT.h"
+#include "ir/operation/SquaredDifference.h"
+#include "ir/operation/TopKV2.h"
+#include "ir/operation/Gather.h"
+#include "ir/operation/Neg.h"
+#include "ir/operation/Abs.h"
+#include "ir/operation/ArgMax.h"
+#include "ir/operation/Dequantize.h"
+#include "ir/operation/Mean.h"
+#include "ir/operation/LocalResponseNormalization.h"
+#include "ir/operation/DepthToSpace.h"
+#include "ir/operation/Pack.h"
+#include "ir/operation/ReduceMin.h"
+#include "ir/operation/Split.h"
+#include "ir/operation/Unpack.h"
+#include "ir/operation/Pad.h"
+#include "ir/operation/Min.h"
+#include "ir/operation/Max.h"
+#include "ir/operation/Custom.h"
+#include "ir/operation/OneHot.h"
+#include "ir/operation/Sin.h"
+#include "ir/operation/Shape.h"
+#include "ir/operation/ConvertFp32ToFp16.h"
+#include "ir/operation/ConvertFp16ToFp32.h"
diff --git a/runtime/onert/core/include/ir/Operations.h b/runtime/onert/core/include/ir/Operations.h
new file mode 100644
index 000000000..0b5fbf529
--- /dev/null
+++ b/runtime/onert/core/include/ir/Operations.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATIONS_H__
+#define __ONERT_IR_OPERATIONS_H__
+
+#include "ir/Index.h"
+#include "ir/Operation.h"
+#include "util/ObjectManager.h"
+
+namespace onert
+{
+namespace ir
+{
+
+class Operations : public util::ObjectManager<OperationIndex, Operation>
+{
+public:
+ Operations() = default;
+ Operations(const Operations &obj);
+ Operations(Operations &&) = default;
+ Operations &operator=(const Operations &) = delete;
+ Operations &operator=(Operations &&) = default;
+ ~Operations() = default;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_MODEL_OPERATION_MANAGER_H__
diff --git a/runtime/onert/core/include/ir/Operations.lst b/runtime/onert/core/include/ir/Operations.lst
new file mode 100644
index 000000000..de8c8364b
--- /dev/null
+++ b/runtime/onert/core/include/ir/Operations.lst
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OP
+#error Define OP before including this file
+#endif
+
+// Internal Name
+OP(Add)
+OP(Sub)
+OP(BatchToSpaceND)
+OP(Cast)
+OP(Conv2D)
+OP(DepthwiseConv2D)
+OP(AvgPool2D)
+OP(MaxPool2D)
+OP(Concat)
+OP(FullyConnected)
+OP(ReduceSum)
+OP(Reshape)
+OP(Mul)
+OP(Softmax)
+OP(Squeeze)
+OP(Slice)
+OP(StridedSlice)
+OP(Tanh)
+OP(Logistic)
+OP(Div)
+OP(Transpose)
+OP(Exp)
+OP(ReduceMax)
+OP(Comparison)
+OP(LogicalAnd)
+OP(LogicalOr)
+OP(LogicalNot)
+OP(LSTM)
+OP(RSQRT)
+OP(ReLU)
+OP(ResizeBilinear)
+OP(ReLU1)
+OP(ReLU6)
+OP(RNN)
+OP(Floor)
+OP(SpaceToBatchND)
+OP(SpaceToDepth)
+OP(L2Pool2D)
+OP(EmbeddingLookup)
+OP(L2Normalization)
+OP(HashtableLookup)
+OP(InstanceNorm)
+OP(PReLU)
+OP(TransposeConv)
+OP(SQRT)
+OP(SquaredDifference)
+OP(TopKV2)
+OP(Gather)
+OP(Neg)
+OP(Abs)
+OP(ArgMax)
+OP(Dequantize)
+OP(Mean)
+OP(LocalResponseNormalization)
+OP(DepthToSpace)
+OP(Pack)
+OP(ReduceMin)
+OP(Split)
+OP(Unpack)
+OP(Pad)
+OP(Custom)
+OP(Permute)
+OP(Min)
+OP(Max)
+OP(OneHot)
+OP(Sin)
+OP(Shape)
+OP(ConvertFp32ToFp16)
+OP(ConvertFp16ToFp32)
diff --git a/runtime/onert/core/include/ir/Padding.h b/runtime/onert/core/include/ir/Padding.h
new file mode 100644
index 000000000..b9053914d
--- /dev/null
+++ b/runtime/onert/core/include/ir/Padding.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_PADDIGN_H__
+#define __ONERT_IR_PADDIGN_H__
+
+#include "Shape.h"
+#include "InternalType.h"
+
+#include <cstdint>
+#include <string>
+
+namespace onert
+{
+namespace ir
+{
+
+enum class PaddingType
+{
+ EXPLICIT = 0,
+ SAME = 1,
+ VALID = 2
+};
+
+/**
+ * @brief Converts a internal padding type to const char*
+ * @param[in] type Padding type to be converted
+ * @return A string holding the converted value
+ */
+inline std::string to_string(const PaddingType type);
+
+struct ExplicitPadding
+{
+ uint32_t left;
+ uint32_t right;
+ uint32_t top;
+ uint32_t bottom;
+};
+
+// TODO Resolve explicit padding param at frontend and save in value field
+struct Padding
+{
+ Padding(void);
+ Padding(PaddingType paddingType);
+ Padding(uint32_t left, uint32_t right, uint32_t top, uint32_t bottom);
+
+ // TODO Change to private field
+ PaddingType type;
+ ExplicitPadding param;
+};
+
+// TODO Change to Padding struct's method
+const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape,
+ const FeatureShape &ofm_shape, const Stride &stride,
+ uint32_t kw, uint32_t kh);
+
+} // namespace ir
+} // namespace onert
+
+#endif
diff --git a/runtime/onert/core/include/ir/Shape.h b/runtime/onert/core/include/ir/Shape.h
new file mode 100644
index 000000000..ebf46c555
--- /dev/null
+++ b/runtime/onert/core/include/ir/Shape.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_SHAPE_H__
+#define __ONERT_IR_SHAPE_H__
+
+#include "ir/Layout.h"
+#include "misc/feature/Shape.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace onert
+{
+namespace ir
+{
+
+// TODO Remove this dependency.
+using FeatureShape = nnfw::misc::feature::Shape;
+
+struct Shape
+{
+public:
+ Shape() = default;
+
+ explicit Shape(int rank) : _dimensions(rank) {}
+
+ Shape(std::initializer_list<int32_t> dimensions) : _dimensions(dimensions) {}
+
+ int rank() const { return _dimensions.size(); }
+
+ const std::vector<int32_t> &dims() const { return _dimensions; }
+
+ int32_t dim(int i) const { return _dimensions.at(i); }
+
+ int32_t &dim(int i) { return _dimensions.at(i); }
+
+ uint64_t num_elements() const;
+
+public:
+ FeatureShape asFeature(Layout layout) const;
+
+ /**
+ * @brief Add dimension to the beginning
+ * @param[in] d dimension to add to the beginning
+ */
+ void prepend(int32_t d) { _dimensions.insert(_dimensions.cbegin(), d); }
+
+ /**
+ * @brief Add dimension to the end
+ * @param[in] d dimension to add to the end
+ */
+ void append(int32_t d) { _dimensions.emplace_back(d); }
+
+ /**
+ * @brief Extend rank of Shape object for operand with param.
+ * @param[in] to_rank The rank value to be extended to
+ */
+ void extendRank(int to_rank);
+
+private:
+ std::vector<int32_t> _dimensions;
+};
+
+inline bool operator==(const Shape &lhs, const Shape &rhs) { return lhs.dims() == rhs.dims(); }
+inline bool operator!=(const Shape &lhs, const Shape &rhs) { return lhs.dims() != rhs.dims(); }
+
+Shape permuteShape(const Shape &shape, Layout frontend_layout, Layout backend_layout);
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_SHAPE_H__
diff --git a/runtime/onert/core/include/ir/TypeInfo.h b/runtime/onert/core/include/ir/TypeInfo.h
new file mode 100644
index 000000000..07d82b6a7
--- /dev/null
+++ b/runtime/onert/core/include/ir/TypeInfo.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_TYPEINFO_H__
+#define __ONERT_IR_TYPEINFO_H__
+
+#include <cstdint>
+
+#include "ir/DataType.h"
+
+namespace onert
+{
+namespace ir
+{
+
+class TypeInfo
+{
+public:
+ TypeInfo() = delete;
+
+ explicit TypeInfo(DataType type, float scale = 0, int32_t offset = 0)
+ : _type(type), _scale(scale), _offset(offset)
+ {
+ }
+
+public:
+ DataType type() const { return _type; }
+ float scale() const { return _scale; }
+ int32_t offset() const { return _offset; }
+
+public:
+ void type(const DataType type) { _type = type; }
+
+private:
+ DataType _type;
+ float _scale;
+ int32_t _offset;
+};
+
+bool operator==(const TypeInfo &lhs, const TypeInfo &rhs);
+bool operator!=(const TypeInfo &lhs, const TypeInfo &rhs);
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_TYPEINFO_H__
diff --git a/runtime/onert/core/include/ir/operand/LowerInfo.h b/runtime/onert/core/include/ir/operand/LowerInfo.h
new file mode 100644
index 000000000..b7f032b02
--- /dev/null
+++ b/runtime/onert/core/include/ir/operand/LowerInfo.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERAND_LOWER_INFO_H__
+#define __ONERT_IR_OPERAND_LOWER_INFO_H__
+
+#include <functional>
+#include <stdint.h>
+
+#include "ir/operand/PermuteFactor.h"
+#include "util/Set.h"
+
+namespace onert
+{
+namespace backend
+{
+class Backend;
+} // namespace backend
+} // namespace onert
+
+namespace onert
+{
+namespace ir
+{
+namespace operand
+{
+using PermuteFactorSet = util::Set<PermuteFactor>;
+
+class LowerInfo
+{
+public:
+ LowerInfo()
+ {
+ // DO NOTHING
+ }
+
+public:
+ const PermuteFactorSet &def_factors(void) const { return _def_factors; }
+ const PermuteFactorSet &use_factors(void) const { return _use_factors; }
+
+public:
+ void addDefPermuteFactor(const PermuteFactor &factor) { _def_factors.add(factor); }
+ void addUsePermuteFactor(const PermuteFactor &factor) { _use_factors.add(factor); }
+ void removeDefPermuteFactor(const PermuteFactor &factor) { _def_factors.remove(factor); }
+ void removeUsePermuteFactor(const PermuteFactor &factor) { _use_factors.remove(factor); }
+
+private:
+ PermuteFactorSet _def_factors;
+ PermuteFactorSet _use_factors;
+};
+
+} // namespace operand
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERAND_LOWER_INFO_H__
diff --git a/runtime/onert/core/include/ir/operand/PermuteFactor.h b/runtime/onert/core/include/ir/operand/PermuteFactor.h
new file mode 100644
index 000000000..d0bfed337
--- /dev/null
+++ b/runtime/onert/core/include/ir/operand/PermuteFactor.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file PermuteFactor.h
+ * @brief This file contains onert::ir::operand::PermuteFactor class
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __ONERT_IR_OPERAND_PERMUTE_FACTOR_H__
+#define __ONERT_IR_OPERAND_PERMUTE_FACTOR_H__
+
+#include <functional>
+
+#include "ir/Layout.h"
+
+namespace onert
+{
+namespace backend
+{
+class Backend;
+} // namespace backend
+} // namespace onert
+
+namespace onert
+{
+namespace ir
+{
+namespace operand
+{
+
+/**
+ * @brief Class that has factors of permutation
+ */
+class PermuteFactor
+{
+public:
+ /**
+ * @brief Construct PermuteFactor object.
+ * @param backend The backend factor
+ * @param layout The layout factor
+ */
+ PermuteFactor(const backend::Backend *backend, Layout layout) : _backend{backend}, _layout{layout}
+ {
+ // DO NOTHING
+ }
+ /**
+ * @brief Construct PermuteFactor object by copy semantics.
+ */
+ PermuteFactor(const PermuteFactor &f) : _backend{f._backend}, _layout{f._layout}
+ {
+ // DO NOTHING
+ }
+ /**
+ * @brief Construct PermuteFactor object by move semantics.
+ */
+ PermuteFactor(PermuteFactor &&) = default;
+
+public:
+ /**
+ * @brief Get backend
+ *
+ * @return Backend factor
+ */
+ const backend::Backend *backend() const { return _backend; }
+ /**
+ * @brief Get layout
+ *
+ * @return Layout factor
+ */
+ Layout layout() const { return _layout; }
+
+public:
+ /**
+ * @brief operator overloading function for `==`
+ *
+ * @return Whether two PermuteFactor are the same
+ */
+ bool operator==(const PermuteFactor &other) const
+ {
+ return _backend == other.backend() && _layout == other.layout();
+ }
+ /**
+ * @brief operator overloading function for `!=`
+ *
+ * @return Whether two PermuteFactor are differenct
+ */
+ bool operator!=(const PermuteFactor &other) const { return !(*this == other); }
+
+private:
+ const backend::Backend *_backend{nullptr};
+ Layout _layout{Layout::UNKNOWN};
+};
+
+} // namespace operand
+} // namespace ir
+} // namespace onert
+
+namespace std
+{
+
+/**
+ * @brief Structure that provides hash value of PermuteFactor
+ */
+template <> struct hash<onert::ir::operand::PermuteFactor>
+{
+ size_t operator()(const onert::ir::operand::PermuteFactor &factor) const noexcept
+ {
+ hash<const onert::backend::Backend *> b_hash{};
+ hash<onert::ir::Layout> l_hash{};
+ return b_hash(factor.backend()) ^ (l_hash(factor.layout()) << 1);
+ }
+};
+
+} // namespace std
+
+#endif // __ONERT_IR_OPERAND_PERMUTE_FACTOR_H__
diff --git a/runtime/onert/core/include/ir/operation/Abs.h b/runtime/onert/core/include/ir/operation/Abs.h
new file mode 100644
index 000000000..9126c0027
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Abs.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_ABS_H__
+#define __ONERT_IR_OPERATION_ABS_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Abs : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+public:
+ Abs(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Abs; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_ABS_H__
diff --git a/runtime/onert/core/include/ir/operation/Add.h b/runtime/onert/core/include/ir/operation/Add.h
new file mode 100644
index 000000000..5f5f4e0fe
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Add.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_ADD_H__
+#define __ONERT_IR_OPERATION_ADD_H__
+
+#include "ir/Operation.h"
+#include "ir/InternalType.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Add : public Operation
+{
+public:
+ enum Input
+ {
+ LHS = 0,
+ RHS
+ };
+
+ struct Param
+ {
+ Activation activation;
+ };
+
+public:
+ Add(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Add; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_ADD_H__
diff --git a/runtime/onert/core/include/ir/operation/ArgMax.h b/runtime/onert/core/include/ir/operation/ArgMax.h
new file mode 100644
index 000000000..b006ea464
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/ArgMax.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_ARG_MAX_H__
+#define __ONERT_IR_OPERATION_ARG_MAX_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class ArgMax : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT
+ };
+
+ struct Param
+ {
+ int axis;
+ int rank;
+ };
+
+public:
+ ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::ArgMax; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_ARG_MAX_H__
diff --git a/runtime/onert/core/include/ir/operation/AvgPool2D.h b/runtime/onert/core/include/ir/operation/AvgPool2D.h
new file mode 100644
index 000000000..d5b300a35
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/AvgPool2D.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_AVGPOOL2D_H__
+#define __ONERT_IR_OPERATION_AVGPOOL2D_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+#include "ir/InternalType.h"
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class AvgPool2D : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+ struct Param
+ {
+ uint32_t kh;
+ uint32_t kw;
+
+ Stride stride;
+ Padding padding;
+ Activation activation;
+ };
+
+public:
+ AvgPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::AvgPool2D; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_AVGPOOL2D_H__
diff --git a/runtime/onert/core/include/ir/operation/BatchToSpaceND.h b/runtime/onert/core/include/ir/operation/BatchToSpaceND.h
new file mode 100644
index 000000000..bb6be57d7
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/BatchToSpaceND.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_BATCH_TO_SPACE_ND_H__
+#define __ONERT_IR_OPERATION_BATCH_TO_SPACE_ND_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class BatchToSpaceND : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0,
+ BLOCK_SIZE = 1
+ };
+
+public:
+ BatchToSpaceND(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::BatchToSpaceND; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_BATCH_TO_SPACE_ND_H__
diff --git a/runtime/onert/core/include/ir/operation/Cast.h b/runtime/onert/core/include/ir/operation/Cast.h
new file mode 100644
index 000000000..6fb8c105b
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Cast.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_CAST_H__
+#define __ONERT_IR_OPERATION_CAST_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Cast : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+public:
+ Cast(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Cast; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_CAST_H__
diff --git a/runtime/onert/core/include/ir/operation/Comparison.h b/runtime/onert/core/include/ir/operation/Comparison.h
new file mode 100644
index 000000000..8b53f163b
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Comparison.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_COMPARISON_H__
+#define __ONERT_IR_OPERATION_COMPARISON_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Comparison : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT0 = 0,
+ INPUT1
+ };
+
+ enum class ComparisonType
+ {
+ Equal,
+ NotEqual,
+ Greater,
+ GreaterEqual,
+ Less,
+ LessEqual
+ };
+
+ struct Param
+ {
+ ComparisonType comparison_type;
+ };
+
+public:
+ Comparison(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Comparison; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_COMPARISON_H__
diff --git a/runtime/onert/core/include/ir/operation/Concat.h b/runtime/onert/core/include/ir/operation/Concat.h
new file mode 100644
index 000000000..42b6fbb51
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Concat.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_CONCAT_H__
+#define __ONERT_IR_OPERATION_CONCAT_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Concat : public Operation
+{
+public:
+ struct Param
+ {
+ int32_t axis;
+ int32_t rank;
+ };
+
+public:
+ Concat(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Concat; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_CONCAT_H__
diff --git a/runtime/onert/core/include/ir/operation/Conv2D.h b/runtime/onert/core/include/ir/operation/Conv2D.h
new file mode 100644
index 000000000..e23bf3eb3
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Conv2D.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_CONV2D_H__
+#define __ONERT_IR_OPERATION_CONV2D_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+#include "ir/InternalType.h"
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Conv2D : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0,
+ KERNEL,
+ BIAS
+ };
+
+ struct Param
+ {
+ Stride stride;
+ Padding padding;
+ Activation activation;
+ };
+
+public:
+ Conv2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Conv2D; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_CONV2D_H__
diff --git a/runtime/onert/core/include/ir/operation/ConvertFp16ToFp32.h b/runtime/onert/core/include/ir/operation/ConvertFp16ToFp32.h
new file mode 100644
index 000000000..15c48357f
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/ConvertFp16ToFp32.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_CONVERT_FP16_TO_FP32_H__
+#define __ONERT_IR_OPERATION_CONVERT_FP16_TO_FP32_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class ConvertFp16ToFp32 : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+public:
+ ConvertFp16ToFp32(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::ConvertFp16ToFp32; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_CONVERT_FP16_TO_FP32_H__
diff --git a/runtime/onert/core/include/ir/operation/ConvertFp32ToFp16.h b/runtime/onert/core/include/ir/operation/ConvertFp32ToFp16.h
new file mode 100644
index 000000000..983ce4891
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/ConvertFp32ToFp16.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_CONVERT_FP32_TO_FP16_H__
+#define __ONERT_IR_OPERATION_CONVERT_FP32_TO_FP16_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class ConvertFp32ToFp16 : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+public:
+ ConvertFp32ToFp16(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::ConvertFp32ToFp16; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_CONVERT_FP32_TO_FP16_H__
diff --git a/runtime/onert/core/include/ir/operation/Custom.h b/runtime/onert/core/include/ir/operation/Custom.h
new file mode 100644
index 000000000..c2a4b354a
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Custom.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ONERT_IR_OPERATION_CUSTOM_H__
+#define __ONERT_IR_OPERATION_CUSTOM_H__
+
+#include "ir/Operation.h"
+
+#include <cstring>
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Custom : public Operation
+{
+public:
+ struct Userdata
+ {
+ char *data;
+ size_t size;
+
+ Userdata() : data{nullptr}, size{0} {}
+ Userdata(const Userdata &o)
+ {
+ size = o.size;
+ data = new char[size];
+ std::memcpy(data, o.data, size);
+ }
+ ~Userdata() { delete[] data; }
+ };
+
+ Custom(OperandConstraint input_constr, const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs, std::string id, const Userdata &userdata);
+
+ void accept(OperationVisitor &v) const override;
+
+public:
+ /**
+ * @return unique operation identifier
+ */
+ const std::string &id() const;
+
+ std::string name() const override;
+ OpCode opcode() const final { return OpCode::Custom; }
+
+ /**
+ * @return user-provided data
+ */
+ const Userdata &userdata() const;
+
+private:
+ std::string _id;
+ Userdata _userdata;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+#endif // __ONERT_IR_OPERATION_CUSTOM_H__
diff --git a/runtime/onert/core/include/ir/operation/DepthToSpace.h b/runtime/onert/core/include/ir/operation/DepthToSpace.h
new file mode 100644
index 000000000..a5315051d
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/DepthToSpace.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_DEPTH_TO_SPACE_H__
+#define __ONERT_IR_OPERATION_DEPTH_TO_SPACE_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class DepthToSpace : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+ struct Param
+ {
+ std::int32_t block_size;
+ };
+
+public:
+ DepthToSpace(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::DepthToSpace; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_DEPTH_TO_SPACE_H__
diff --git a/runtime/onert/core/include/ir/operation/DepthwiseConv2D.h b/runtime/onert/core/include/ir/operation/DepthwiseConv2D.h
new file mode 100644
index 000000000..b10bf708c
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/DepthwiseConv2D.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_DEPTHWISECONV2D_H__
+#define __ONERT_IR_OPERATION_DEPTHWISECONV2D_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+#include "ir/InternalType.h"
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class DepthwiseConv2D : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0,
+ KERNEL,
+ BIAS
+ };
+
+ struct Param
+ {
+ Stride stride;
+ Padding padding;
+ uint32_t multiplier;
+ Activation activation;
+ };
+
+public:
+ DepthwiseConv2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::DepthwiseConv2D; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_DEPTHWISECONV2D_H__
diff --git a/runtime/onert/core/include/ir/operation/Dequantize.h b/runtime/onert/core/include/ir/operation/Dequantize.h
new file mode 100644
index 000000000..97a08b33c
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Dequantize.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_DEQUANTIZE_H__
+#define __ONERT_IR_OPERATION_DEQUANTIZE_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Dequantize : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+public:
+ Dequantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Dequantize; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_DEQUANTIZE_H__
diff --git a/runtime/onert/core/include/ir/operation/Div.h b/runtime/onert/core/include/ir/operation/Div.h
new file mode 100644
index 000000000..a7ec1c465
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Div.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_DIV_H__
+#define __ONERT_IR_OPERATION_DIV_H__
+
+#include "ir/Operation.h"
+#include "ir/InternalType.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Div : public Operation
+{
+public:
+ enum Input
+ {
+ LHS = 0,
+ RHS
+ };
+
+ struct Param
+ {
+ Activation activation;
+ };
+
+public:
+ Div(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Div; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_DIV_H__
diff --git a/runtime/onert/core/include/ir/operation/EmbeddingLookup.h b/runtime/onert/core/include/ir/operation/EmbeddingLookup.h
new file mode 100644
index 000000000..54064faf0
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/EmbeddingLookup.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_EMBEDDING_LOOKUP_H__
+#define __ONERT_IR_OPERATION_EMBEDDING_LOOKUP_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class EmbeddingLookup : public Operation
+{
+public:
+ enum Input
+ {
+ LOOKUPS = 0,
+ VALUES = 1
+ };
+
+public:
+ EmbeddingLookup(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::EmbeddingLookup; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_EMBEDDING_LOOKUP_H__
diff --git a/runtime/onert/core/include/ir/operation/Exp.h b/runtime/onert/core/include/ir/operation/Exp.h
new file mode 100644
index 000000000..2e68ff07a
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Exp.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_EXP_H__
+#define __ONERT_IR_OPERATION_EXP_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Exp : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+public:
+ Exp(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Exp; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_EXP_H__
diff --git a/runtime/onert/core/include/ir/operation/Floor.h b/runtime/onert/core/include/ir/operation/Floor.h
new file mode 100644
index 000000000..b34699c22
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Floor.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_FLOOR_H__
+#define __ONERT_IR_OPERATION_FLOOR_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Floor : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+public:
+ Floor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Floor; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_FLOOR_H__
diff --git a/runtime/onert/core/include/ir/operation/FullyConnected.h b/runtime/onert/core/include/ir/operation/FullyConnected.h
new file mode 100644
index 000000000..b6484ae4d
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/FullyConnected.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_FULLYCONNECTED_H__
+#define __ONERT_IR_OPERATION_FULLYCONNECTED_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+#include "ir/InternalType.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class FullyConnected : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0,
+ WEIGHT,
+ BIAS
+ };
+
+ struct Param
+ {
+ Activation activation;
+ };
+
+public:
+ FullyConnected(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::FullyConnected; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_FULLYCONNECTED_H__
diff --git a/runtime/onert/core/include/ir/operation/Gather.h b/runtime/onert/core/include/ir/operation/Gather.h
new file mode 100644
index 000000000..daa198933
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Gather.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_GATHER_H__
+#define __ONERT_IR_OPERATION_GATHER_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Gather : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0,
+ INDICES,
+ };
+
+ struct Param
+ {
+ int32_t axis;
+ int32_t rank;
+ };
+
+public:
+ Gather(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Gather; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_GATHER_H__
diff --git a/runtime/onert/core/include/ir/operation/HashtableLookup.h b/runtime/onert/core/include/ir/operation/HashtableLookup.h
new file mode 100644
index 000000000..4b6cf9362
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/HashtableLookup.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_HASHTABLE_LOOKUP_H__
+#define __ONERT_IR_OPERATION_HASHTABLE_LOOKUP_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class HashtableLookup : public Operation
+{
+public:
+ enum Input
+ {
+ LOOKUPS = 0,
+ KEYS = 1,
+ VALUES = 2
+ };
+
+ enum Output
+ {
+ OUTPUT = 0,
+ HITS = 1
+ };
+
+public:
+ HashtableLookup(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::HashtableLookup; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_HASHTABLE_LOOKUP_H__
diff --git a/runtime/onert/core/include/ir/operation/InstanceNorm.h b/runtime/onert/core/include/ir/operation/InstanceNorm.h
new file mode 100644
index 000000000..6a3bb5189
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/InstanceNorm.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_INSTANCE_NORM_H__
+#define __ONERT_IR_OPERATION_INSTANCE_NORM_H__
+
+#include "ir/Operation.h"
+#include "ir/InternalType.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class InstanceNorm : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0,
+ GAMMA,
+ BETA
+ };
+
+ struct Param
+ {
+ Activation activation;
+ float epsilon;
+ };
+
+public:
+ InstanceNorm(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::InstanceNorm; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_INSTANCE_NORM_H__
diff --git a/runtime/onert/core/include/ir/operation/L2Normalization.h b/runtime/onert/core/include/ir/operation/L2Normalization.h
new file mode 100644
index 000000000..f55301bd6
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/L2Normalization.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_L2_NORMALIZATION_H__
+#define __ONERT_IR_OPERATION_L2_NORMALIZATION_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class L2Normalization : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+public:
+ struct Param
+ {
+ int32_t rank;
+ };
+
+public:
+ L2Normalization(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::L2Normalization; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_L2_NORMALIZATION_H__
diff --git a/runtime/onert/core/include/ir/operation/L2Pool2D.h b/runtime/onert/core/include/ir/operation/L2Pool2D.h
new file mode 100644
index 000000000..d369fd5fc
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/L2Pool2D.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_L2_POOL_2D_H__
+#define __ONERT_IR_OPERATION_L2_POOL_2D_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+#include "ir/InternalType.h"
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class L2Pool2D : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0,
+ };
+
+ struct Param
+ {
+ Padding padding;
+ Stride stride;
+ uint32_t kw;
+ uint32_t kh;
+ Activation activation;
+ };
+
+public:
+ L2Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::L2Pool2D; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_L2_POOL_2D_H__
diff --git a/runtime/onert/core/include/ir/operation/LSTM.h b/runtime/onert/core/include/ir/operation/LSTM.h
new file mode 100644
index 000000000..1e6c00bf3
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/LSTM.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ONERT_IR_OPERATION_LSTM_H__
+#define __ONERT_IR_OPERATION_LSTM_H__
+
+#include "ir/InternalType.h"
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class LSTM : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0,
+ INPUT_TO_INPUT_WEIGHTS = 1,
+ INPUT_TO_FORGET_WEIGHTS = 2,
+ INPUT_TO_CELL_WEIGHTS = 3,
+ INPUT_TO_OUTPUT_WEIGHTS = 4,
+ RECURRENT_TO_INPUT_WEIGHTS = 5,
+ RECURRENT_TO_FORGET_WEIGHTS = 6,
+ RECURRENT_TO_CELL_WEIGHTS = 7,
+ RECURRENT_TO_OUTPUT_WEIGHTS = 8,
+ CELL_TO_INPUT_WEIGHTS = 9,
+ CELL_TO_FORGET_WEIGHTS = 10,
+ CELL_TO_OUTPUT_WEIGHTS = 11,
+ INPUT_GATE_BIAS = 12,
+ FORGET_GATE_BIAS = 13,
+ CELL_BIAS = 14,
+ OUTPUT_GATE_BIAS = 15,
+ PROJECTION_WEIGHTS = 16,
+ PROJECTION_BIAS = 17,
+ OUTPUT_STATE_IN = 18,
+ CELL_STATE_IN = 19,
+ };
+
+ enum Output
+ {
+ SCRATCH_BUFFER = 0,
+ OUTPUT_STATE_OUT = 1,
+ CELL_STATE_OUT = 2,
+ OUTPUT = 3
+ };
+
+ struct Param
+ {
+ Activation activation;
+ float cell_threshold;
+ float projection_threshold;
+ };
+
+public:
+ LSTM(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::LSTM; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_LSTM_H__
diff --git a/runtime/onert/core/include/ir/operation/LocalResponseNormalization.h b/runtime/onert/core/include/ir/operation/LocalResponseNormalization.h
new file mode 100644
index 000000000..2946cfbad
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/LocalResponseNormalization.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_LOCAL_RESPONSE_NORMALIZATION_H__
+#define __ONERT_IR_OPERATION_LOCAL_RESPONSE_NORMALIZATION_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class LocalResponseNormalization : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+ struct Param
+ {
+ int radius;
+ float bias;
+ float alpha;
+ float beta;
+ };
+
+public:
+ LocalResponseNormalization(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs, const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::LocalResponseNormalization; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_LOCAL_RESPONSE_NORMALIZATION_H__
diff --git a/runtime/onert/core/include/ir/operation/LogicalAnd.h b/runtime/onert/core/include/ir/operation/LogicalAnd.h
new file mode 100644
index 000000000..dc853b6a9
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/LogicalAnd.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_LOGICAL_AND_H__
+#define __ONERT_IR_OPERATION_LOGICAL_AND_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class LogicalAnd : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT0 = 0,
+ INPUT1 = 1,
+ };
+
+public:
+ LogicalAnd(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::LogicalAnd; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_LOGICAL_AND_H__
diff --git a/runtime/onert/core/include/ir/operation/LogicalNot.h b/runtime/onert/core/include/ir/operation/LogicalNot.h
new file mode 100644
index 000000000..9519f6d47
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/LogicalNot.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_LOGICAL_NOT_H__
+#define __ONERT_IR_OPERATION_LOGICAL_NOT_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class LogicalNot : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0,
+ };
+
+public:
+ LogicalNot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::LogicalNot; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_LOGICAL_NOT_H__
diff --git a/runtime/onert/core/include/ir/operation/LogicalOr.h b/runtime/onert/core/include/ir/operation/LogicalOr.h
new file mode 100644
index 000000000..c4b658cd9
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/LogicalOr.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_LOGICAL_OR_H__
+#define __ONERT_IR_OPERATION_LOGICAL_OR_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class LogicalOr : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT0 = 0,
+ INPUT1 = 1,
+ };
+
+public:
+ LogicalOr(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::LogicalOr; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_LOGICAL_OR_H__
diff --git a/runtime/onert/core/include/ir/operation/Logistic.h b/runtime/onert/core/include/ir/operation/Logistic.h
new file mode 100644
index 000000000..5421e1c84
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Logistic.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_LOGISTIC_H__
+#define __ONERT_IR_OPERATION_LOGISTIC_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Logistic : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+public:
+ Logistic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Logistic; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_LOGISTIC_H__
diff --git a/runtime/onert/core/include/ir/operation/LowerInfo.h b/runtime/onert/core/include/ir/operation/LowerInfo.h
new file mode 100644
index 000000000..7ef53b8c7
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/LowerInfo.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_LOWER_INFO_H__
+#define __ONERT_IR_OPERATION_LOWER_INFO_H__
+
+#include <string>
+
+#include <ir/operand/PermuteFactor.h>
+
+namespace onert
+{
+namespace backend
+{
+class Backend;
+} // namespace backend
+} // namespace onert
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class LowerInfo
+{
+public:
+ LowerInfo(const backend::Backend *backend, Layout layout);
+ const backend::Backend *backend() const { return _permute_factor.backend(); }
+ Layout layout() const { return _permute_factor.layout(); }
+
+private:
+ operand::PermuteFactor _permute_factor;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_LOWER_INFO_H__
diff --git a/runtime/onert/core/include/ir/operation/Max.h b/runtime/onert/core/include/ir/operation/Max.h
new file mode 100644
index 000000000..df72d3ae9
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Max.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_MAX_H__
+#define __ONERT_IR_OPERATION_MAX_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Max : public Operation
+{
+public:
+ enum Input
+ {
+ LHS = 0,
+ RHS
+ };
+
+public:
+ Max(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Max; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_MAX_H__
diff --git a/runtime/onert/core/include/ir/operation/MaxPool2D.h b/runtime/onert/core/include/ir/operation/MaxPool2D.h
new file mode 100644
index 000000000..300f7cb3c
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/MaxPool2D.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_MAXPOOL2D_H__
+#define __ONERT_IR_OPERATION_MAXPOOL2D_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+#include "ir/InternalType.h"
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class MaxPool2D : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+ struct Param
+ {
+ uint32_t kh;
+ uint32_t kw;
+ Stride stride;
+ Padding padding;
+ Activation activation;
+ };
+
+public:
+ MaxPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::MaxPool2D; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_MAXPOOL2D_H__
diff --git a/runtime/onert/core/include/ir/operation/Mean.h b/runtime/onert/core/include/ir/operation/Mean.h
new file mode 100644
index 000000000..5fe3946d6
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Mean.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_MEAN_H__
+#define __ONERT_IR_OPERATION_MEAN_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Mean : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT
+ };
+
+ struct Param
+ {
+ std::vector<int> axes;
+ bool keep_dims;
+ int32_t rank;
+ };
+
+public:
+ Mean(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Mean; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_MEAN_H__
diff --git a/runtime/onert/core/include/ir/operation/Min.h b/runtime/onert/core/include/ir/operation/Min.h
new file mode 100644
index 000000000..117301c00
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Min.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_MIN_H__
+#define __ONERT_IR_OPERATION_MIN_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Min : public Operation
+{
+public:
+ enum Input
+ {
+ LHS = 0,
+ RHS
+ };
+
+public:
+ Min(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Min; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_MIN_H__
diff --git a/runtime/onert/core/include/ir/operation/Mul.h b/runtime/onert/core/include/ir/operation/Mul.h
new file mode 100644
index 000000000..0f01b0ecf
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Mul.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_MUL_H__
+#define __ONERT_IR_OPERATION_MUL_H__
+
+#include "ir/Operation.h"
+#include "ir/InternalType.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Mul : public Operation
+{
+public:
+ enum Input
+ {
+ LHS = 0,
+ RHS
+ };
+
+ struct Param
+ {
+ Activation activation;
+ };
+
+public:
+ Mul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Mul; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_MUL_H__
diff --git a/runtime/onert/core/include/ir/operation/Neg.h b/runtime/onert/core/include/ir/operation/Neg.h
new file mode 100644
index 000000000..f8123c485
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Neg.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_NEG_H__
+#define __ONERT_IR_OPERATION_NEG_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Neg : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+public:
+ Neg(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Neg; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_NEG_H__
diff --git a/runtime/onert/core/include/ir/operation/OneHot.h b/runtime/onert/core/include/ir/operation/OneHot.h
new file mode 100644
index 000000000..2cb0d23e1
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/OneHot.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_ONEHOT_H__
+#define __ONERT_IR_OPERATION_ONEHOT_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class OneHot : public Operation
+{
+public:
+ enum Input
+ {
+ INDICES = 0,
+ DEPTH = 1,
+ ON_VALUE = 2,
+ OFF_VALUE = 3,
+ };
+
+ struct Param
+ {
+ int depth; // comes from input tensor, not from OneHotOptions
+ float on_value; // comes from input tensor, not from OneHotOptions
+ float off_value; // comes from input tensor, not from OneHotOptions
+ int axis;
+ };
+
+public:
+ OneHot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::OneHot; }
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_ONEHOT_H__
diff --git a/runtime/onert/core/include/ir/operation/PReLU.h b/runtime/onert/core/include/ir/operation/PReLU.h
new file mode 100644
index 000000000..2981ffc6a
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/PReLU.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_PRELU_H__
+#define __ONERT_IR_OPERATION_PRELU_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class PReLU : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0,
+ ALPHA = 1
+ };
+
+public:
+ PReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::PReLU; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_PRELU_H__
diff --git a/runtime/onert/core/include/ir/operation/Pack.h b/runtime/onert/core/include/ir/operation/Pack.h
new file mode 100644
index 000000000..39fca49d4
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Pack.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ONERT_IR_OPERATION_PACK_H__
+#define __ONERT_IR_OPERATION_PACK_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+class Pack : public Operation
+{
+public:
+ struct Param
+ {
+ int32_t num;
+ int32_t axis;
+ int32_t rank;
+ };
+
+public:
+ Pack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Pack; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+} // namespace operation
+} // namespace ir
+} // namespace onert
+#endif // __ONERT_IR_OPERATION_PACK_H__
diff --git a/runtime/onert/core/include/ir/operation/Pad.h b/runtime/onert/core/include/ir/operation/Pad.h
new file mode 100644
index 000000000..18da18bc3
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Pad.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_PAD_H__
+#define __ONERT_IR_OPERATION_PAD_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Pad : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0,
+ PAD = 1,
+ // VALUE = 2 Not allow padding value operand yet
+ };
+
+public:
+ struct Param
+ {
+ int32_t rank;
+ };
+
+public:
+ Pad(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Pad; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_PAD_H__
diff --git a/runtime/onert/core/include/ir/operation/Permute.h b/runtime/onert/core/include/ir/operation/Permute.h
new file mode 100644
index 000000000..30930d2cc
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Permute.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_PERMUTE_H__
+#define __ONERT_IR_OPERATION_PERMUTE_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace backend
+{
+class BackendContext;
+} // namespace backend
+} // namespace onert
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Permute : public Operation
+{
+public:
+ enum class Type
+ {
+ NHWC_TO_NCHW,
+ NCHW_TO_NHWC,
+ COPY
+ };
+
+ struct Param
+ {
+ const backend::BackendContext *input_backend_ctx;
+ const backend::BackendContext *output_backend_ctx;
+ };
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Permute; }
+
+public:
+ Permute(const OperandIndex &input, const OperandIndex &output,
+ const backend::BackendContext *input_backend_ctx,
+ const backend::BackendContext *output_backend_ctx, Type type,
+ DataType data_type = DataType::FLOAT32);
+
+public:
+ const Param &param() const { return _param; }
+ DataType getDataType() const { return _dataType; }
+ Type getPermuteType() const { return _type; }
+
+private:
+ Param _param;
+ Type _type;
+ DataType _dataType;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_PERMUTE_H__
diff --git a/runtime/onert/core/include/ir/operation/RNN.h b/runtime/onert/core/include/ir/operation/RNN.h
new file mode 100644
index 000000000..087075da2
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/RNN.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ONERT_IR_OPERATION_RNN_H__
+#define __ONERT_IR_OPERATION_RNN_H__
+
+#include "ir/InternalType.h"
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class RNN : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0,
+ WEIGHTS = 1,
+ RECURRENT_WEIGHTS = 2,
+ BIAS = 3,
+ HIDDEN_STATE_IN = 4
+ };
+
+ enum Output
+ {
+ OUTPUT = 0,
+ HIDDEN_STATE_OUT = 1
+ };
+
+ struct Param
+ {
+ Activation activation;
+ };
+
+public:
+ RNN(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::RNN; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_RNN_H__
diff --git a/runtime/onert/core/include/ir/operation/RSQRT.h b/runtime/onert/core/include/ir/operation/RSQRT.h
new file mode 100644
index 000000000..64bb4f10a
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/RSQRT.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_RSQRT_H__
+#define __ONERT_IR_OPERATION_RSQRT_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class RSQRT : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+public:
+ RSQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::RSQRT; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_RSQRT_H__
diff --git a/runtime/onert/core/include/ir/operation/ReLU.h b/runtime/onert/core/include/ir/operation/ReLU.h
new file mode 100644
index 000000000..9eb0c091b
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/ReLU.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_RELU_H__
+#define __ONERT_IR_OPERATION_RELU_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class ReLU : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+public:
+ ReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::ReLU; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_RELU_H__
diff --git a/runtime/onert/core/include/ir/operation/ReLU1.h b/runtime/onert/core/include/ir/operation/ReLU1.h
new file mode 100644
index 000000000..134ee573a
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/ReLU1.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_ReLU1_H__
+#define __ONERT_IR_OPERATION_ReLU1_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class ReLU1 : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+public:
+ ReLU1(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::ReLU1; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_ReLU1_H__
diff --git a/runtime/onert/core/include/ir/operation/ReLU6.h b/runtime/onert/core/include/ir/operation/ReLU6.h
new file mode 100644
index 000000000..e658c4925
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/ReLU6.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_ReLU6_H__
+#define __ONERT_IR_OPERATION_ReLU6_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class ReLU6 : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+public:
+ ReLU6(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::ReLU6; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_ReLU6_H__
diff --git a/runtime/onert/core/include/ir/operation/ReduceMax.h b/runtime/onert/core/include/ir/operation/ReduceMax.h
new file mode 100644
index 000000000..af4bd5a61
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/ReduceMax.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_REDUCEMAX_H__
+#define __ONERT_IR_OPERATION_REDUCEMAX_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class ReduceMax : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+ struct Param
+ {
+ std::vector<int> axes;
+ bool keep_dims;
+ int32_t rank;
+ };
+
+public:
+ ReduceMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::ReduceMax; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_REDUCEMAX_H__
diff --git a/runtime/onert/core/include/ir/operation/ReduceMin.h b/runtime/onert/core/include/ir/operation/ReduceMin.h
new file mode 100644
index 000000000..46a3e9812
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/ReduceMin.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_REDUCEMIN_H__
+#define __ONERT_IR_OPERATION_REDUCEMIN_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class ReduceMin : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+ struct Param
+ {
+ std::vector<int> axes;
+ bool keep_dims;
+ int32_t rank;
+ };
+
+public:
+ ReduceMin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::ReduceMin; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_REDUCEMIN_H__
diff --git a/runtime/onert/core/include/ir/operation/ReduceSum.h b/runtime/onert/core/include/ir/operation/ReduceSum.h
new file mode 100644
index 000000000..b7374a51d
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/ReduceSum.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_REDUCE_SUM_H__
+#define __ONERT_IR_OPERATION_REDUCE_SUM_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class ReduceSum : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+ struct Param
+ {
+ std::vector<int> axes;
+ bool keep_dims;
+ int32_t rank;
+ };
+
+public:
+ ReduceSum(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::ReduceSum; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_REDUCE_SUM_H__
diff --git a/runtime/onert/core/include/ir/operation/Reshape.h b/runtime/onert/core/include/ir/operation/Reshape.h
new file mode 100644
index 000000000..34c212376
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Reshape.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_RESHAPE_H__
+#define __ONERT_IR_OPERATION_RESHAPE_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Reshape : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0,
+ SHAPE = 1
+ };
+
+public:
+ Reshape(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Reshape; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_RESHAPE_H__
diff --git a/runtime/onert/core/include/ir/operation/ResizeBilinear.h b/runtime/onert/core/include/ir/operation/ResizeBilinear.h
new file mode 100644
index 000000000..2887ed845
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/ResizeBilinear.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_RESIZE_BILINEAR_H__
+#define __ONERT_IR_OPERATION_RESIZE_BILINEAR_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class ResizeBilinear : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+ struct Param
+ {
+ int32_t height_out;
+ int32_t width_out;
+ };
+
+public:
+ ResizeBilinear(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::ResizeBilinear; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_RESIZE_BILINEAR_H__
diff --git a/runtime/onert/core/include/ir/operation/SQRT.h b/runtime/onert/core/include/ir/operation/SQRT.h
new file mode 100644
index 000000000..8563b1ab1
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/SQRT.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_SQRT_H__
+#define __ONERT_IR_OPERATION_SQRT_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class SQRT : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+public:
+ SQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::SQRT; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_SQRT_H__
diff --git a/runtime/onert/core/include/ir/operation/Shape.h b/runtime/onert/core/include/ir/operation/Shape.h
new file mode 100644
index 000000000..4dea7e424
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Shape.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_SHAPE_H__
+#define __ONERT_IR_OPERATION_SHAPE_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Shape : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+public:
+ Shape(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Shape; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_SHAPE_H__
diff --git a/runtime/onert/core/include/ir/operation/Sin.h b/runtime/onert/core/include/ir/operation/Sin.h
new file mode 100644
index 000000000..aef44ab2e
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Sin.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_SIN_H__
+#define __ONERT_IR_OPERATION_SIN_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Sin : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+public:
+ Sin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Sin; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_SIN_H__
diff --git a/runtime/onert/core/include/ir/operation/Slice.h b/runtime/onert/core/include/ir/operation/Slice.h
new file mode 100644
index 000000000..71e117c9c
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Slice.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_SLICE_H__
+#define __ONERT_IR_OPERATION_SLICE_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Slice : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0,
+ BEGINS = 1,
+ SIZES = 2,
+ };
+
+public:
+ struct Param
+ {
+ int32_t rank;
+ };
+
+public:
+ Slice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Slice; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_SLICE_H__
diff --git a/runtime/onert/core/include/ir/operation/Softmax.h b/runtime/onert/core/include/ir/operation/Softmax.h
new file mode 100644
index 000000000..db7ae910e
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Softmax.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_SOFTMAX_H__
+#define __ONERT_IR_OPERATION_SOFTMAX_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Softmax : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+ struct Param
+ {
+ float beta;
+ };
+
+public:
+ Softmax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Softmax; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_SOFTMAX_H__
diff --git a/runtime/onert/core/include/ir/operation/SpaceToBatchND.h b/runtime/onert/core/include/ir/operation/SpaceToBatchND.h
new file mode 100644
index 000000000..99928ff24
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/SpaceToBatchND.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_SPACE_TO_BATCH_ND_H__
+#define __ONERT_IR_OPERATION_SPACE_TO_BATCH_ND_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class SpaceToBatchND : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0,
+ BLOCK_SIZE = 1,
+ PADDINGS = 2
+ };
+
+public:
+ SpaceToBatchND(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::SpaceToBatchND; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_SPACE_TO_BATCH_ND_H__
diff --git a/runtime/onert/core/include/ir/operation/SpaceToDepth.h b/runtime/onert/core/include/ir/operation/SpaceToDepth.h
new file mode 100644
index 000000000..6c8b09130
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/SpaceToDepth.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_SPACE_TO_DEPTH_H__
+#define __ONERT_IR_OPERATION_SPACE_TO_DEPTH_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class SpaceToDepth : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+ struct Param
+ {
+ std::int32_t block_size;
+ };
+
+public:
+ SpaceToDepth(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::SpaceToDepth; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_SPACE_TO_DEPTH_H__
diff --git a/runtime/onert/core/include/ir/operation/Split.h b/runtime/onert/core/include/ir/operation/Split.h
new file mode 100644
index 000000000..d17a9813c
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Split.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ONERT_IR_OPERATION_SPLIT_H__
+#define __ONERT_IR_OPERATION_SPLIT_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+class Split : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+ struct Param
+ {
+ int axis;
+ int num_splits;
+ int rank;
+ };
+
+public:
+ Split(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Split; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+} // namespace operation
+} // namespace ir
+} // namespace onert
+#endif // __ONERT_IR_OPERATION_SPLIT_H__
diff --git a/runtime/onert/core/include/ir/operation/SquaredDifference.h b/runtime/onert/core/include/ir/operation/SquaredDifference.h
new file mode 100644
index 000000000..392b11448
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/SquaredDifference.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_SQUARED_DIFFERENCE_H__
+#define __ONERT_IR_OPERATION_SQUARED_DIFFERENCE_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class SquaredDifference : public Operation
+{
+public:
+ enum Input
+ {
+ LHS = 0,
+ RHS
+ };
+
+public:
+ SquaredDifference(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::SquaredDifference; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_SQUARED_DIFFERENCE_H__
diff --git a/runtime/onert/core/include/ir/operation/Squeeze.h b/runtime/onert/core/include/ir/operation/Squeeze.h
new file mode 100644
index 000000000..c370472b7
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Squeeze.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_SQUEEZE_H__
+#define __ONERT_IR_OPERATION_SQUEEZE_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Squeeze : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+ struct Param
+ {
+ // Please see tensorflow/lite/c/builtin_op_data.h and squeeze.cc.
+ // tensorflow lite supports only for ndim <= 8.
+ int dims[8];
+ int ndim;
+ };
+
+public:
+ Squeeze(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Squeeze; }
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_SQUEEZE_H__
diff --git a/runtime/onert/core/include/ir/operation/StridedSlice.h b/runtime/onert/core/include/ir/operation/StridedSlice.h
new file mode 100644
index 000000000..8c493b21d
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/StridedSlice.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_STRIDED_SLICE_H__
+#define __ONERT_IR_OPERATION_STRIDED_SLICE_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class StridedSlice : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0,
+ STARTS = 1,
+ ENDS = 2,
+ STRIDES = 3
+ };
+
+ struct Param
+ {
+ int32_t begin_mask;
+ int32_t end_mask;
+ int32_t shrink_axis_mask;
+ int32_t rank;
+ };
+
+public:
+ StridedSlice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::StridedSlice; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_STRIDED_SLICE_H__
diff --git a/runtime/onert/core/include/ir/operation/Sub.h b/runtime/onert/core/include/ir/operation/Sub.h
new file mode 100644
index 000000000..0674e6e4d
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Sub.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_SUB_H__
+#define __ONERT_IR_OPERATION_SUB_H__
+
+#include "ir/Operation.h"
+#include "ir/InternalType.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Sub : public Operation
+{
+public:
+ enum Input
+ {
+ LHS = 0,
+ RHS
+ };
+
+ struct Param
+ {
+ Activation activation;
+ };
+
+public:
+ Sub(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Sub; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_SUB_H__
diff --git a/runtime/onert/core/include/ir/operation/Tanh.h b/runtime/onert/core/include/ir/operation/Tanh.h
new file mode 100644
index 000000000..9b8d03bca
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Tanh.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_TANH_H__
+#define __ONERT_IR_OPERATION_TANH_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Tanh : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+public:
+ Tanh(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Tanh; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_TANH_H__
diff --git a/runtime/onert/core/include/ir/operation/TopKV2.h b/runtime/onert/core/include/ir/operation/TopKV2.h
new file mode 100644
index 000000000..179a599ca
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/TopKV2.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_TOPK_V2_H__
+#define __ONERT_IR_OPERATION_TOPK_V2_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class TopKV2 : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT
+ };
+
+ enum Output
+ {
+ OUTPUT_VALUES = 0,
+ OUTPUT_INDICES,
+ };
+
+ struct Param
+ {
+ std::int32_t k;
+ };
+
+public:
+ TopKV2(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::TopKV2; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_TOPK_V2_H__
diff --git a/runtime/onert/core/include/ir/operation/Transpose.h b/runtime/onert/core/include/ir/operation/Transpose.h
new file mode 100644
index 000000000..b2e04ca33
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Transpose.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_TRANSPOSE_H__
+#define __ONERT_IR_OPERATION_TRANSPOSE_H__
+
+#include "ir/Operation.h"
+
+#include <utility>
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Transpose : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0, // for an n-D tensor, specifying the tensor to be transposed.
+ };
+
+ struct Param
+ {
+ std::vector<int> perm;
+ int32_t rank;
+ };
+
+public:
+ Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Transpose; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_TRANSPOSE_H__
diff --git a/runtime/onert/core/include/ir/operation/TransposeConv.h b/runtime/onert/core/include/ir/operation/TransposeConv.h
new file mode 100644
index 000000000..05137ccf8
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/TransposeConv.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_TRANSPOSE_CONV_H__
+#define __ONERT_IR_OPERATION_TRANSPOSE_CONV_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+#include "ir/InternalType.h"
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class TransposeConv : public Operation
+{
+public:
+ enum Input
+ {
+ OUTPUT_SHAPE = 0,
+ KERNEL,
+ INPUT
+ };
+
+ struct Param
+ {
+ Padding padding;
+ Stride stride;
+ };
+
+public:
+ TransposeConv(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::TransposeConv; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_TRANSPOSE_CONV_H__
diff --git a/runtime/onert/core/include/ir/operation/Unpack.h b/runtime/onert/core/include/ir/operation/Unpack.h
new file mode 100644
index 000000000..28e91afda
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Unpack.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ONERT_IR_OPERATION_UNPACK_H__
+#define __ONERT_IR_OPERATION_UNPACK_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+class Unpack : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+ struct Param
+ {
+ int32_t num;
+ int32_t axis;
+ int32_t rank;
+ };
+
+public:
+ Unpack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Unpack; }
+
+public:
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+} // namespace operation
+} // namespace ir
+} // namespace onert
+#endif // __ONERT_IR_OPERATION_UNPACK_H__
diff --git a/runtime/onert/core/include/util/Config.lst b/runtime/onert/core/include/util/Config.lst
new file mode 100644
index 000000000..75767b8b1
--- /dev/null
+++ b/runtime/onert/core/include/util/Config.lst
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CONFIG
+#error Define CONFIG before including this file
+#endif
+
+// Name | Type | Default
+CONFIG(GRAPH_DOT_DUMP , int , "0")
+CONFIG(BACKENDS , std::string , "acl_cl;acl_neon;cpu")
+CONFIG(OP_BACKEND_ALLOPS , std::string , "")
+CONFIG(OP_BACKEND_MAP , std::string , "")
+CONFIG(DISABLE_COMPILE , bool , "0")
+CONFIG(ONERT_LOG_ENABLE , bool , "0")
+CONFIG(CPU_MEMORY_PLANNER , std::string , "WIC")
+CONFIG(EXECUTOR , std::string , "Linear")
+CONFIG(ACL_LAYOUT , std::string , "none")
+CONFIG(NCNN_LAYOUT , std::string , "NCHW")
+CONFIG(PROFILING_MODE , bool , "0")
+CONFIG(USE_SCHEDULER , bool , "0")
+CONFIG(OP_SEQ_MAX_NODE , int , "0")
+CONFIG(TRACE_FILEPATH , std::string , "")
+CONFIG(DELETE_CACHED_DATA , bool , "0")
+
+// Auto-generate all operations
+
+#define OP(InternalName) \
+ CONFIG(OP_BACKEND_ ## InternalName, std::string, "")
+#include "ir/Operations.lst"
+#undef OP
+
diff --git a/runtime/onert/core/include/util/ConfigSource.h b/runtime/onert/core/include/util/ConfigSource.h
new file mode 100644
index 000000000..b6a8144fd
--- /dev/null
+++ b/runtime/onert/core/include/util/ConfigSource.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_CONFIG_SOURCE_H__
+#define __ONERT_UTIL_CONFIG_SOURCE_H__
+
+#include <memory>
+
+#include "IConfigSource.h"
+
+namespace onert
+{
+namespace util
+{
+
+void config_source(std::unique_ptr<IConfigSource> &&source);
+
+bool toBool(const std::string &val);
+int toInt(const std::string &val);
+
+bool getConfigBool(const std::string &key);
+int getConfigInt(const std::string &key);
+std::string getConfigString(const std::string &key);
+
+} // namespace util
+} // namespace onert
+
+namespace onert
+{
+namespace util
+{
+namespace config
+{
+
+#define CONFIG(Name, Type, Default) extern const char *Name;
+
+#include "Config.lst"
+
+#undef CONFIG
+
+} // namespace config
+} // namespace util
+} // namespace onert
+
+#endif // __ONERT_UTIL_CONFIG_SOURCE_H__
diff --git a/runtime/onert/core/include/util/EnvConfigSource.h b/runtime/onert/core/include/util/EnvConfigSource.h
new file mode 100644
index 000000000..8c5d0e8e9
--- /dev/null
+++ b/runtime/onert/core/include/util/EnvConfigSource.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_ENV_CONFIG_SOURCE_H__
+#define __ONERT_UTIL_ENV_CONFIG_SOURCE_H__
+
+#include <unordered_map>
+
+#include "util/GeneralConfigSource.h"
+
+namespace onert
+{
+namespace util
+{
+
+class EnvConfigSource final : public GeneralConfigSource
+{
+public:
+ std::string get(const std::string &key) const override;
+
+private:
+ std::unordered_map<std::string, std::string> _default_attributes;
+};
+
+} // namespace util
+} // namespace onert
+
+#endif // __ONERT_UTIL_ENV_CONFIG_SOURCE_H__
diff --git a/runtime/onert/core/include/util/EventCollectorGlobal.h b/runtime/onert/core/include/util/EventCollectorGlobal.h
new file mode 100644
index 000000000..1f110d906
--- /dev/null
+++ b/runtime/onert/core/include/util/EventCollectorGlobal.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__
+#define __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__
+
+#include "misc/EventRecorder.h"
+#include "misc/EventCollector.h"
+
+namespace onert
+{
+namespace util
+{
+
+/**
+ * @brief Singleton class for event collection from anywhere in code
+ *
+ */
+class EventCollectorGlobal
+{
+public:
+ /**
+ * @brief Get the singleton object of this class
+ *
+ * @return EventCollectorGlobal& Singleton object
+ */
+ static EventCollectorGlobal &get();
+
+public:
+ /**
+ * @brief Getter for event collector object
+ *
+ * @return EventCollector& Collector object
+ */
+ EventCollector &collector() { return _collector; }
+
+private:
+ EventCollectorGlobal();
+ ~EventCollectorGlobal();
+
+private:
+ EventRecorder _recorder;
+ EventCollector _collector;
+};
+
+/**
+ * @brief Helper class for emitting duration event which is handled automatically with ctor/dtor
+ *
+ */
+class EventDurationBlock
+{
+public:
+ /**
+ * @brief Raise a duration event with type of BEGIN
+ *
+ * @param tag A label for the duration event
+ */
+ EventDurationBlock(const std::string &tag);
+ /**
+ * @brief Raise a duration event with type of END
+ *
+ */
+ ~EventDurationBlock();
+
+private:
+ std::string _tag;
+};
+
+/**
+ * @brief Helper class for emitting duration event which is handled manually
+ *
+ * Usage:
+ * {
+ * ...
+ * EventDurationManual duration("some tag");
+ * duration.begin();
+ * ...
+ * ... // Code for duration
+ * ...
+ * duration.end();
+ * }
+ *
+ */
+class EventDurationManual
+{
+public:
+ /**
+ * @brief Construct a new Event Duration Manual object
+ *
+ * @param tag A label for the duration object
+ */
+ EventDurationManual(const std::string &tag);
+ /**
+ * @brief Destroy the Event Duration Manual object
+ *
+ */
+ ~EventDurationManual();
+
+ /**
+ * @brief Raise a duration event with type of BEGIN
+ *
+ */
+ void begin();
+ /**
+ * @brief Raise a duration event with type of END
+ *
+ */
+ void end();
+
+private:
+ std::string _tag;
+ bool _pair;
+};
+
+} // namespace util
+} // namespace onert
+
+/**
+ * Helper Macro Definitions
+ *
+ * HOW TO USE
+ *
+ * void f(args)
+ * {
+ * EVENT_DURATION_FUNCTION();
+ * ...
+ * if(cond)
+ * {
+ * EVENT_DURATION_REGION("if branch");
+ * ...
+ * }
+ * ...
+ * }
+ */
+
+#define EVENT_DURATION_FUNCTION() \
+ ::onert::util::EventDurationBlock __event_duration__##__LINE__ { __FUNCTION__ }
+
+#define EVENT_DURATION_REGION(tag) \
+ ::onert::util::EventDurationBlock __event_duration__##__LINE__ { tag }
+
+#endif // __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__
diff --git a/runtime/onert/core/include/util/GeneralConfigSource.h b/runtime/onert/core/include/util/GeneralConfigSource.h
new file mode 100644
index 000000000..dedc820ec
--- /dev/null
+++ b/runtime/onert/core/include/util/GeneralConfigSource.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__
+#define __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__
+
+#include <unordered_map>
+
+#include "util/IConfigSource.h"
+
+namespace onert
+{
+namespace util
+{
+
+class GeneralConfigSource : public IConfigSource
+{
+public:
+ GeneralConfigSource() = default;
+
+ std::string get(const std::string &key) const override;
+ void set(const std::string &key, const std::string &val);
+
+private:
+ std::unordered_map<std::string, std::string> _map;
+};
+
+} // namespace util
+} // namespace onert
+
+#endif // __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__
diff --git a/runtime/onert/core/include/util/IConfigSource.h b/runtime/onert/core/include/util/IConfigSource.h
new file mode 100644
index 000000000..07b09848a
--- /dev/null
+++ b/runtime/onert/core/include/util/IConfigSource.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_I_CONFIG_SOURCE_H__
+#define __ONERT_UTIL_I_CONFIG_SOURCE_H__
+
+#include <string>
+
+namespace onert
+{
+namespace util
+{
+
+struct IConfigSource
+{
+ /**
+ * @brief Destroy the IConfigSource object
+ */
+ virtual ~IConfigSource() = default;
+
+ /**
+ * @brief get the value for the matching key
+ *
+ * @param key string key to search
+ * @return string value associated with the key
+ */
+ virtual std::string get(const std::string &key) const = 0;
+};
+
+} // namespace util
+} // namespace onert
+
+#endif // __ONERT_UTIL_I_CONFIG_SOURCE_H__
diff --git a/runtime/onert/core/include/util/ITimer.h b/runtime/onert/core/include/util/ITimer.h
new file mode 100644
index 000000000..d5a4e1eb0
--- /dev/null
+++ b/runtime/onert/core/include/util/ITimer.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_ITIMER_H__
+#define __ONERT_UTIL_ITIMER_H__
+
+#include <chrono>
+
+namespace onert
+{
+namespace util
+{
+
+class ITimer
+{
+public:
+ virtual void handleBegin() = 0;
+ virtual void handleEnd() = 0;
+ int getTime() { return _timer_res; };
+
+ virtual ~ITimer() = default;
+
+protected:
+ int _timer_res{0};
+};
+
+class CPUTimer : public ITimer
+{
+public:
+ void handleBegin() override { _start_time = std::chrono::steady_clock::now(); };
+
+ void handleEnd() override
+ {
+ const auto end_time = std::chrono::steady_clock::now();
+ _timer_res =
+ std::chrono::duration_cast<std::chrono::microseconds>(end_time - _start_time).count();
+ };
+
+private:
+ std::chrono::steady_clock::time_point _start_time; // in microseconds
+};
+
+} // namespace util
+} // namespace onert
+
+#endif // __ONERT_UTIL_ITIMER_H__
diff --git a/runtime/onert/core/include/util/Index.h b/runtime/onert/core/include/util/Index.h
new file mode 100644
index 000000000..d96c0cffe
--- /dev/null
+++ b/runtime/onert/core/include/util/Index.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_INDEX_H__
+#define __ONERT_UTIL_INDEX_H__
+
+#include <functional>
+#include <limits>
+#include <stdint.h>
+
+namespace onert
+{
+namespace util
+{
+
+/**
+ * @brief A wrapper class for unsigned integral Index
+ * NOTE : Max value of the underlying type is used as the invalid value
+ *
+ * @tparam T Underlying type. Must be unsigned integral type otherwise its behavior is undefined.
+ * @tparam DummyTag Dummy type to distinguish types with a same underlying type. Using an opaque
+ * type is recommended.
+ */
+template <typename T, typename DummyTag> class Index
+{
+private:
+ static const T UNDEFINED = std::numeric_limits<T>::max();
+
+public:
+ /**
+ * @brief Construct a new Index object
+ */
+ explicit Index(void) : _index{UNDEFINED} {}
+ /**
+ * @brief Construct a new Index object with a value in the underlying type
+ *
+ * @param o Value in the underlying type
+ */
+ explicit Index(const T o) : _index{o} {}
+ /**
+ * @brief Copy Constructor
+ *
+ * @param o Object to be copied
+ */
+ Index(const Index &o) = default;
+
+ /**
+ * @brief Assign a value in the underlying time
+ *
+ * @param o Value in the underlying type
+ * @return Index& Reference of this pointer
+ */
+ Index &operator=(const T o)
+ {
+ _index = o;
+ return *this;
+ }
+
+ /**
+ * @brief Copy assignment operator
+ *
+ * @param o Object to be copied
+ * @return Index& Reference of this pointer
+ */
+ Index &operator=(const Index &o) = default;
+
+ /**
+ * @brief Equality operator
+ *
+ * @param o The other value in the underlying type to compare
+ * @return true if underlying value is the same, false otherwise
+ */
+ bool operator==(T o) const { return _index == o; }
+ /**
+ * @brief Equality operator
+ *
+ * @param o The other object to compare
+ * @return true if underlying value is the same, false otherwise
+ */
+ bool operator==(const Index &o) const { return _index == o._index; }
+ /**
+ * @brief Inquality operator
+ *
+ * @param o The other value in the underlying type to compare
+ * @return true if underlying value is different, false otherwise
+ */
+ bool operator!=(T o) const { return !(*this == o); }
+ /**
+ * @brief Inquality operator
+ *
+ * @param o The other object to compare
+ * @return true if underlying value is different, false otherwise
+ */
+ bool operator!=(const Index &o) const { return !(*this == o); }
+
+ /**
+ * @brief Post increment operator
+ *
+ * @return Index Index before increment
+ */
+ Index operator++(int)
+ {
+ Index temp = *this;
+ _index++;
+ return temp;
+ }
+
+ /**
+ * @brief Check whether the value is valid or not
+ *
+ * @return true if valid, false otherwise
+ */
+ bool valid() const { return _index != UNDEFINED; }
+ /**
+ * @brief Return underlying value
+ *
+ * @return T Underlying value
+ */
+ T value() const { return _index; }
+
+private:
+ T _index;
+};
+
+} // namespace util
+} // namespace onert
+
+namespace std
+{
+
+template <typename T, typename Tag> struct hash<::onert::util::Index<T, Tag>>
+{
+ size_t operator()(const ::onert::util::Index<T, Tag> &index) const noexcept
+ {
+ return hash<T>()(index.value());
+ }
+};
+
+} // namespace std
+
+#endif // __ONERT_UTIL_INDEX_H__
diff --git a/runtime/onert/core/include/util/ObjectManager.h b/runtime/onert/core/include/util/ObjectManager.h
new file mode 100644
index 000000000..d2dd881a8
--- /dev/null
+++ b/runtime/onert/core/include/util/ObjectManager.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_OBJECT_MANAGER_H__
+#define __ONERT_UTIL_OBJECT_MANAGER_H__
+
+#include <unordered_map>
+#include <memory>
+#include <list>
+#include <functional>
+
+#include <memory>
+
+namespace onert
+{
+namespace util
+{
+
+/**
+ * @brief Class that owns objects and maps them with indices as a handle for them
+ *
+ */
+template <typename Index, typename Object> class ObjectManager
+{
+public:
+ ObjectManager() : _index_count{0u} {}
+
+public:
+ /**
+ * @brief Create an object with args and put it in the container with a new Index for that
+ *
+ * @param[in] args Arguments for creating Operand object
+ * @return Created index that is associated to the object
+ */
+ template <class... Args> Index emplace(Args &&... args)
+ {
+ auto index = generateIndex();
+ _objects.emplace(index, std::make_unique<Object>(std::forward<Args>(args)...));
+ return index;
+ }
+
+ /**
+ * @brief Put object in the container with a new Index for that
+ *
+ * @param[in] object Object to be pushed
+ * @return Created index that is associated to the object
+ */
+ Index push(std::unique_ptr<Object> &&object)
+ {
+ auto index = generateIndex();
+ _objects.emplace(index, std::move(object));
+ return index;
+ }
+
+ /**
+ * @brief Remove the object that is associated with the given index
+ *
+ * @param[in] index Index of the object to be removed
+ * @return N/A
+ */
+ void remove(const Index &index) { _objects.erase(index); }
+
+ /**
+ * @brief Get the object that is associated with the given index
+ *
+ * @param[in] index Index of the object to be returned
+ * @return Object
+ */
+ const Object &at(const Index &index) const { return *(_objects.at(index)); }
+ /**
+ * @brief Get the object that is associated with the given index
+ *
+ * @param[in] index Index of the object to be returned
+ * @return Object
+ */
+ Object &at(const Index &index) { return *(_objects.at(index)); }
+ /**
+ * @brief Get the object that is associated with the given index
+ *
+ * @param[in] index Index of the object to be returned
+ * @return true if such entry exists otherwise false
+ */
+ bool exist(const Index &index) const
+ {
+ auto it = _objects.find(index);
+ return it != _objects.end();
+ }
+ /**
+ * @brief Iterate over the container with given function
+ *
+ * @param[in] fn Function to be run for every container entry
+ * @return N/A
+ */
+ void iterate(const std::function<void(const Index &, const Object &)> &fn) const
+ {
+ for (const auto &e : _objects)
+ {
+ fn(e.first, *e.second);
+ }
+ }
+ /**
+ * @brief Iterate over the container with given function
+ *
+ * @param[in] fn Function to be run for every container entry
+ * @return N/A
+ */
+ void iterate(const std::function<void(const Index &, Object &)> &fn)
+ {
+ // TODO Remove this workaround
+ // This implementation is a workaround in case of adding operands while iteration
+ std::list<Index> l;
+
+ for (auto &e : _objects)
+ {
+ l.push_back(e.first);
+ }
+
+ for (auto index : l)
+ {
+ fn(index, *_objects[index]);
+ }
+ }
+
+private:
+ Index generateIndex() { return Index{_index_count++}; }
+
+protected:
+ std::unordered_map<Index, std::unique_ptr<Object>> _objects;
+ uint32_t _index_count;
+};
+
+} // namespace util
+} // namespace onert
+
+#endif // __ONERT_UTIL_OBJECT_MANAGER_H__
diff --git a/runtime/onert/core/include/util/Set.h b/runtime/onert/core/include/util/Set.h
new file mode 100644
index 000000000..ee4062d25
--- /dev/null
+++ b/runtime/onert/core/include/util/Set.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Set.h
+ * @brief This file contains onert::util::Set class
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __ONERT_UTIL_SET_H__
+#define __ONERT_UTIL_SET_H__
+
+#include <cassert>
+#include <unordered_set>
+
+namespace onert
+{
+namespace util
+{
+
+/**
+ * @brief Class for set of custom element
+ & @tparam Element Key type of Set
+ */
+template <typename Element> class Set
+{
+public:
+ /**
+ * @brief Construct default Set object.
+ */
+ Set() = default;
+ /**
+ * @brief Construct Set object by copy semantics.
+ */
+ Set(const Set<Element> &) = default;
+ /**
+ * @brief Construct move Set object by move semantics.
+ */
+ Set(Set<Element> &&) = default;
+
+public:
+ /**
+ * @brief Add a given element to the set
+ *
+ * @param e Element added
+ */
+ void add(const Element &e) { _set.insert(e); }
+ /**
+ * @brief remove a given element from the set
+ *
+ * @param e Element removed
+ */
+ void remove(const Element &e) { _set.erase(e); }
+ /**
+ * @brief Get size of the set
+ *
+ * @return The size of the set
+ */
+ uint32_t size() const { return static_cast<uint32_t>(_set.size()); }
+ /**
+ * @brief Get whether the set is empty
+ *
+ * @return Whether the set is empty
+ */
+ bool empty() const { return _set.empty(); }
+ /**
+ * @brief Get whether a given element exists in the set
+ *
+ * @param e A given element
+ *
+ * @return Whether a given element exists in the set
+ */
+ bool contains(const Element &e) const { return _set.find(e) != _set.end(); }
+ /**
+ * @brief Get first element of the set
+ *
+ * @return first element of the set
+ */
+ const Element &getOnlyElement() const
+ {
+ assert(_set.size() == 1u);
+ return *_set.begin();
+ }
+
+public:
+ /**
+ * @brief operator overloading function for `|`
+ *
+ * @return A set with two sets combined
+ */
+ Set<Element> operator|(const Set<Element> &other) const // Union
+ {
+ auto ret = *this;
+ for (auto e : other)
+ {
+ ret.add(e);
+ }
+ return ret;
+ }
+ /**
+ * @brief operator overloading function for `&`
+ *
+ * @return A set of elements that overlap in two sets
+ */
+ Set<Element> operator&(const Set<Element> &other) const // Intersect
+ {
+ Set<Element> ret;
+ for (auto e : other)
+ {
+ if (contains(e))
+ {
+ ret.add(e);
+ }
+ }
+ return ret;
+ }
+ /**
+ * @brief operator overloading function for `-`
+ *
+ * @return A set of subtracted from another set
+ */
+ Set<Element> operator-(const Set<Element> &other) const // Minus
+ {
+ auto ret = *this;
+ for (auto e : other)
+ {
+ ret.remove(e);
+ }
+ return ret;
+ }
+
+public:
+ /**
+ * @brief begin() of const_iterator for this class
+ *
+ * @return The first iterator of the set
+ */
+ typename std::unordered_set<Element>::const_iterator begin() const { return _set.begin(); }
+ /**
+ * @brief end() of const_iterator for this class
+ *
+ * @return The last iterator of the set
+ */
+ typename std::unordered_set<Element>::const_iterator end() const { return _set.end(); }
+
+private:
+ std::unordered_set<Element> _set;
+};
+
+} // namespace util
+} // namespace onert
+
+#endif // __ONERT_UTIL_SET_H__
diff --git a/runtime/onert/core/include/util/ShapeInference.h b/runtime/onert/core/include/util/ShapeInference.h
new file mode 100644
index 000000000..fbd3d19c8
--- /dev/null
+++ b/runtime/onert/core/include/util/ShapeInference.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_GRAPH_SHAPE_INFERENCE_H__
+#define __ONERT_GRAPH_SHAPE_INFERENCE_H__
+
+#include "ir/operation/AvgPool2D.h"
+#include "ir/operation/Concat.h"
+#include "ir/operation/MaxPool2D.h"
+#include "ir/operation/Conv2D.h"
+#include "ir/operation/DepthwiseConv2D.h"
+#include "ir/operation/Reshape.h"
+#include "ir/Operands.h"
+#include "ir/Index.h"
+#include "ir/Layout.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace shape_inference
+{
+
+using Shapes = std::vector<ir::Shape>;
+
+Shapes inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape);
+
+Shapes inferAvgPoolShape(const ir::Shape &in_shape, const ir::operation::AvgPool2D::Param &param,
+ ir::Layout layout = ir::Layout::NHWC);
+
+Shapes inferConcatShape(const Shapes &in_shapes, const ir::operation::Concat::Param &param);
+
+Shapes inferMaxPoolShape(const ir::Shape &in_shape, const ir::operation::MaxPool2D::Param &param,
+ ir::Layout layout = ir::Layout::NHWC);
+
+Shapes inferConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape,
+ const ir::operation::Conv2D::Param &param,
+ ir::Layout layout = ir::Layout::NHWC);
+
+Shapes inferDepthwiseConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape,
+ const ir::operation::DepthwiseConv2D::Param &param,
+ ir::Layout layout = ir::Layout::NHWC);
+
+Shapes inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape);
+
+/**
+ * @brief Class to infer shape before running kernels. It does the following:
+ * - re-calculate and set output shape at compile time (before running kernels)
+ * - if calculation cannot be done at compile time, mark the outputs to be dynamic, meaning
+ * shapes of outputs will be calculated during running kernels
+ */
+class StaticInferer : public ir::OperationVisitor
+{
+public:
+ StaticInferer(ir::Operands &operands) : _operands(operands) { /* empty */}
+ virtual ~StaticInferer() = default;
+
+public:
+ /**
+ * @brief Infer shape of operands beloning to ops and set the output shape.
+ * If output shape cannot be known without running op, mark it so that it can be allocated
+ * when running kernel.
+ * @param op_seq sequence of operations
+ */
+ void infer(const ir::OpSequence &op_seq) { op_seq.accept(*this); };
+
+private:
+ // TODO Define visitors for operations. List them in alphabetic order.
+ void visit(const ir::operation::Reshape &op);
+
+private:
+ ir::Operands &_operands;
+};
+
+} // namespace shape_inference
+} // namespace onert
+
+#endif // __ONERT_GRAPH_SHAPE_INFERENCE_H__
diff --git a/runtime/onert/core/include/util/Utils.h b/runtime/onert/core/include/util/Utils.h
new file mode 100644
index 000000000..847fb6971
--- /dev/null
+++ b/runtime/onert/core/include/util/Utils.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Utils.h
+ * @brief This file contains utility macro
+ */
+
+#ifndef __ONERT_UTIL_UTILS_H__
+#define __ONERT_UTIL_UTILS_H__
+
+#define UNUSED_RELEASE(a) (void)(a)
+
+#endif // __ONERT_UTIL_UTILS_H__
diff --git a/runtime/onert/core/include/util/feature/nchw/Reader.h b/runtime/onert/core/include/util/feature/nchw/Reader.h
new file mode 100644
index 000000000..586ba92dd
--- /dev/null
+++ b/runtime/onert/core/include/util/feature/nchw/Reader.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_FEATURE_NCHW_READER_H__
+#define __ONERT_UTIL_FEATURE_NCHW_READER_H__
+
+#include <cassert>
+
+#include "backend/ITensor.h"
+#include "misc/feature/Reader.h"
+#include "misc/feature/Shape.h"
+
+namespace onert
+{
+namespace util
+{
+namespace feature
+{
+namespace nchw
+{
+
+template <typename T> class Reader final : public nnfw::misc::feature::Reader<T>
+{
+public:
+ // Construct for buffer of model inputs
+ Reader(const ::nnfw::misc::feature::Shape &shape, const T *ptr, size_t len)
+ : _shape{shape}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
+ {
+ assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
+
+ // No padding
+ _strides.W = sizeof(T);
+ _strides.H = shape.W * sizeof(T);
+ _strides.C = shape.W * shape.H * sizeof(T);
+ _strides.N = shape.W * shape.H * shape.C * sizeof(T);
+ }
+
+ // Construct for backend tensor
+ Reader(backend::ITensor *tensor)
+ : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
+ {
+ assert(tensor->layout() == ir::Layout::NCHW);
+
+ const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
+ _strides.W = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
+ _strides.H = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
+ _strides.C = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
+ _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
+
+ _shape.W = tensor->dimension(3);
+ _shape.H = tensor->dimension(2);
+ _shape.C = tensor->dimension(1);
+ _shape.N = tensor->dimension(0);
+ }
+
+public:
+ T at(uint32_t ch, uint32_t row, uint32_t col) const override
+ {
+ const auto offset = feature_index_to_byte_offset(0, ch, row, col);
+
+ const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
+
+ return *ptr;
+ }
+ T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
+ {
+ const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
+
+ const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
+
+ return *ptr;
+ }
+
+private:
+ size_t feature_index_to_byte_offset(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const
+ {
+ assert(1u * _shape.N > batch); // shape.N > batch
+ assert(1u * _shape.C > ch); // shape.C > ch
+ assert(1u * _shape.H > row); // shape.H > row
+ assert(1u * _shape.W > col); // shape.W > col
+
+ uint32_t res = 0;
+ res += batch * _strides.N;
+ res += ch * _strides.C;
+ res += row * _strides.H;
+ res += col * _strides.W;
+
+ return res;
+ }
+
+private:
+ // TODO Remove _shape
+ nnfw::misc::feature::Shape _shape;
+ using Strides = nnfw::misc::feature::Shape;
+ Strides _strides;
+ const uint8_t *_ptr;
+ size_t _len;
+};
+
+} // namespace nchw
+} // namespace feature
+} // namespace util
+} // namespace onert
+
+#endif // __ONERT_UTIL_FEATURE_NCHW_READER_H__
diff --git a/runtime/onert/core/include/util/feature/nchw/View.h b/runtime/onert/core/include/util/feature/nchw/View.h
new file mode 100644
index 000000000..28c18d034
--- /dev/null
+++ b/runtime/onert/core/include/util/feature/nchw/View.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_FEATURE_NCHW_VIEW_H__
+#define __ONERT_UTIL_FEATURE_NCHW_VIEW_H__
+
+#include "misc/feature/Reader.h"
+#include "misc/feature/Shape.h"
+
+#include "backend/ITensor.h"
+#include "util/logging.h"
+
+#include <cassert>
+
+namespace onert
+{
+namespace util
+{
+namespace feature
+{
+namespace nchw
+{
+
+template <typename T> class View final : public nnfw::misc::feature::Reader<T>
+{
+public:
+ // Construct for buffer of model inputs
+ View(const ::nnfw::misc::feature::Shape &shape, T *ptr, size_t len)
+ : _shape{shape}, _ptr{reinterpret_cast<uint8_t *>(ptr)}, _len{len}
+ {
+ assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
+
+ _strides.W = sizeof(T);
+ _strides.H = shape.W * sizeof(T);
+ _strides.C = shape.W * shape.H * sizeof(T);
+ _strides.N = shape.W * shape.H * shape.C * sizeof(T);
+ }
+
+ // Construct for backend tensor
+ View(::onert::backend::ITensor *tensor)
+ : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
+ {
+ assert(tensor->layout() == ir::Layout::NCHW);
+
+ const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
+ _strides.W = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
+ _strides.H = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
+ _strides.C = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
+ _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
+
+ _shape.W = tensor->dimension(3);
+ _shape.H = tensor->dimension(2);
+ _shape.C = tensor->dimension(1);
+ _shape.N = tensor->dimension(0);
+ }
+
+public:
+ T at(uint32_t ch, uint32_t row, uint32_t col) const override
+ {
+ const auto offset = feature_index_to_byte_offset(0, ch, row, col);
+
+ T *ptr = reinterpret_cast<T *>(_ptr + offset);
+
+ return *ptr;
+ }
+ T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
+ {
+ const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
+
+ T *ptr = reinterpret_cast<T *>(_ptr + offset);
+
+ return *ptr;
+ }
+
+public:
+ T &at(uint32_t ch, uint32_t row, uint32_t col)
+ {
+ const auto offset = feature_index_to_byte_offset(0, ch, row, col);
+
+ T *ptr = reinterpret_cast<T *>(_ptr + offset);
+
+ return *ptr;
+ }
+ T &at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col)
+ {
+ const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
+
+ T *ptr = reinterpret_cast<T *>(_ptr + offset);
+
+ return *ptr;
+ }
+
+private:
+ size_t feature_index_to_byte_offset(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const
+ {
+ assert(1u * _shape.N > batch); // shape.N > batch
+ assert(1u * _shape.C > ch); // shape.C > ch
+ assert(1u * _shape.H > row); // shape.H > row
+ assert(1u * _shape.W > col); // shape.W > col
+
+ uint32_t res = 0;
+ res += batch * _strides.N;
+ res += ch * _strides.C;
+ res += row * _strides.H;
+ res += col * _strides.W;
+
+ return res;
+ }
+
+private:
+ // TODO Remove _shape
+ nnfw::misc::feature::Shape _shape;
+ using Strides = nnfw::misc::feature::Shape;
+ Strides _strides;
+ uint8_t *_ptr;
+ size_t _len;
+};
+
+} // namespace nchw
+} // namespace feature
+} // namespace util
+} // namespace onert
+
+#endif // __ONERT_UTIL_FEATURE_NCHW_VIEW_H__
diff --git a/runtime/onert/core/include/util/feature/nhwc/Reader.h b/runtime/onert/core/include/util/feature/nhwc/Reader.h
new file mode 100644
index 000000000..f00a864a6
--- /dev/null
+++ b/runtime/onert/core/include/util/feature/nhwc/Reader.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_FEATURE_NHWC_READER_H__
+#define __ONERT_UTIL_FEATURE_NHWC_READER_H__
+
+#include <cassert>
+
+#include "backend/ITensor.h"
+#include "misc/feature/Reader.h"
+#include "misc/feature/Shape.h"
+#include "util/Utils.h"
+
+namespace onert
+{
+namespace util
+{
+namespace feature
+{
+namespace nhwc
+{
+
+template <typename T> class Reader final : public nnfw::misc::feature::Reader<T>
+{
+public:
+ // Construct for buffer of model inputs
+ Reader(const ::nnfw::misc::feature::Shape &shape, const T *ptr, size_t len)
+ : _shape{shape}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
+ {
+ UNUSED_RELEASE(len); // Workaround for unused variable in release mode
+ assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
+
+ // No padding
+ _strides.C = sizeof(T);
+ _strides.W = shape.C * sizeof(T);
+ _strides.H = shape.C * shape.W * sizeof(T);
+ _strides.N = shape.C * shape.W * shape.H * sizeof(T);
+ }
+
+ // Construct for backend tensor
+ Reader(const backend::ITensor *tensor)
+ : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
+ {
+ assert(tensor->layout() == ir::Layout::NHWC);
+
+ const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
+ _strides.C = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
+ _strides.W = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
+ _strides.H = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
+ _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
+
+ _shape.C = tensor->dimension(3);
+ _shape.W = tensor->dimension(2);
+ _shape.H = tensor->dimension(1);
+ _shape.N = tensor->dimension(0);
+ }
+
+public:
+ T at(uint32_t row, uint32_t col, uint32_t ch) const override
+ {
+ const auto offset = feature_index_to_byte_offset(0, row, col, ch);
+
+ const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
+
+ return *ptr;
+ }
+ T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const override
+ {
+ const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
+
+ const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
+
+ return *ptr;
+ }
+
+private:
+ size_t feature_index_to_byte_offset(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const
+ {
+ assert(1u * _shape.N > batch); // shape.N > batch
+ assert(1u * _shape.H > row); // shape.H > row
+ assert(1u * _shape.W > col); // shape.W > col
+ assert(1u * _shape.C > ch); // shape.C > ch
+
+ uint32_t res = 0;
+ res += batch * _strides.N;
+ res += row * _strides.H;
+ res += col * _strides.W;
+ res += ch * _strides.C;
+
+ return res;
+ }
+
+private:
+ // TODO Remove _shape
+ nnfw::misc::feature::Shape _shape;
+ using Strides = nnfw::misc::feature::Shape;
+ Strides _strides;
+ const uint8_t *_ptr;
+ size_t _len;
+};
+
+} // namespace nhwc
+} // namespace feature
+} // namespace util
+} // namespace onert
+
+#endif // __ONERT_UTIL_FEATURE_NHWC_READER_H__
diff --git a/runtime/onert/core/include/util/feature/nhwc/View.h b/runtime/onert/core/include/util/feature/nhwc/View.h
new file mode 100644
index 000000000..1dfdfe461
--- /dev/null
+++ b/runtime/onert/core/include/util/feature/nhwc/View.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_FEATURE_NHWC_VIEW_H__
+#define __ONERT_UTIL_FEATURE_NHWC_VIEW_H__
+
+#include <cassert>
+#include <cstddef>
+
+#include "backend/ITensor.h"
+#include "misc/feature/Reader.h"
+#include "misc/feature/Shape.h"
+#include "util/Utils.h"
+
+namespace onert
+{
+namespace util
+{
+namespace feature
+{
+namespace nhwc
+{
+
+template <typename T> class View final : public nnfw::misc::feature::Reader<T>
+{
+public:
+ // Construct for buffer of model inputs
+ View(const ::nnfw::misc::feature::Shape &shape, T *ptr, size_t len)
+ : _shape{shape}, _ptr{reinterpret_cast<uint8_t *>(ptr)}, _len{len}
+ {
+ UNUSED_RELEASE(len); // Workaround for unused variable in release mode
+ assert(shape.N * shape.H * shape.W * shape.C * sizeof(T) == len);
+
+ // No padding
+ _strides.C = sizeof(T);
+ _strides.W = shape.C * sizeof(T);
+ _strides.H = shape.C * shape.W * sizeof(T);
+ _strides.N = shape.C * shape.W * shape.H * sizeof(T);
+ }
+
+ // Construct for backend tensor
+ View(backend::ITensor *tensor)
+ : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
+ {
+ assert(tensor->layout() == ir::Layout::NHWC);
+
+ const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
+ _strides.C = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
+ _strides.W = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
+ _strides.H = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
+ _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
+
+ _shape.C = tensor->dimension(3);
+ _shape.W = tensor->dimension(2);
+ _shape.H = tensor->dimension(1);
+ _shape.N = tensor->dimension(0);
+ }
+
+public:
+ T at(uint32_t row, uint32_t col, uint32_t ch) const override
+ {
+ const auto offset = feature_index_to_byte_offset(0, row, col, ch);
+
+ const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
+
+ return *ptr;
+ }
+ T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const override
+ {
+ const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
+
+ const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
+
+ return *ptr;
+ }
+
+ T &at(uint32_t row, uint32_t col, uint32_t ch)
+ {
+ const auto offset = feature_index_to_byte_offset(0, row, col, ch);
+
+ T *ptr = reinterpret_cast<T *>(_ptr + offset);
+
+ return *ptr;
+ }
+
+ T &at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch)
+ {
+ const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
+
+ T *ptr = reinterpret_cast<T *>(_ptr + offset);
+
+ return *ptr;
+ }
+
+private:
+ size_t feature_index_to_byte_offset(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const
+ {
+ assert(1u * _shape.N > batch); // shape.N > batch
+ assert(1u * _shape.H > row); // shape.H > row
+ assert(1u * _shape.W > col); // shape.W > col
+ assert(1u * _shape.C > ch); // shape.C > ch
+
+ uint32_t res = 0;
+ res += batch * _strides.N;
+ res += row * _strides.H;
+ res += col * _strides.W;
+ res += ch * _strides.C;
+
+ return res;
+ }
+
+private:
+ // TODO Remove _shape
+ nnfw::misc::feature::Shape _shape;
+ using Strides = nnfw::misc::feature::Shape;
+ Strides _strides;
+ uint8_t *_ptr;
+ size_t _len;
+};
+
+} // namespace nhwc
+} // namespace feature
+} // namespace util
+} // namespace onert
+
+#endif // __ONERT_UTIL_FEATURE_NHWC_VIEW_H__
diff --git a/runtime/onert/core/include/util/logging.h b/runtime/onert/core/include/util/logging.h
new file mode 100644
index 000000000..b8357793e
--- /dev/null
+++ b/runtime/onert/core/include/util/logging.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_LOGGING_H__
+#define __ONERT_UTIL_LOGGING_H__
+
+#include <iostream>
+
+#include "util/ConfigSource.h"
+
+namespace onert
+{
+namespace util
+{
+namespace logging
+{
+
+class Context
+{
+public:
+ Context() noexcept : _enabled{false}
+ {
+ const auto env = util::getConfigBool(util::config::ONERT_LOG_ENABLE);
+
+ if (env)
+ {
+ _enabled = true;
+ }
+ }
+
+ static Context &get() noexcept;
+
+public:
+ bool enabled(void) const { return _enabled; }
+
+private:
+ bool _enabled;
+};
+
+static Context &ctx = Context::get();
+
+} // namespace logging
+} // namespace util
+} // namespace onert
+
+#define VERBOSE(name) \
+ if (::onert::util::logging::ctx.enabled()) \
+ std::cout << "[" << #name << "] "
+
+#endif // __ONERT_UTIL_LOGGING_H__
diff --git a/runtime/onert/core/src/backend/BackendContext.cc b/runtime/onert/core/src/backend/BackendContext.cc
new file mode 100644
index 000000000..f2173de44
--- /dev/null
+++ b/runtime/onert/core/src/backend/BackendContext.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/BackendContext.h"
+
+#include "ir/Operation.h"
+#include "backend/IShapeFixer.h"
+#include "backend/IConstantInitializer.h"
+
+namespace onert
+{
+namespace backend
+{
+
+void BackendContext::initialize(const std::vector<OperationInfo> &operation_list,
+ const std::vector<ir::OperandIndex> &operand_list)
+{
+ _operation_list = operation_list;
+ _operand_list = operand_list;
+}
+
+void BackendContext::fixShapes()
+{
+ for (auto &op : _operation_list)
+ {
+ _graph->operations().at(op.index).accept(*shape_fixer);
+ }
+}
+
+void BackendContext::initConsts()
+{
+ for (auto &op : _operation_list)
+ {
+ constant_initializer->setLayout(op.layout);
+ _graph->operations().at(op.index).accept(*constant_initializer);
+ }
+
+ for (auto ind : _operand_list)
+ {
+ const auto &obj = _graph->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerPermuteInitializer(ind, obj);
+ }
+ }
+
+ constant_initializer->run();
+}
+
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/BackendManager.cc b/runtime/onert/core/src/compiler/BackendManager.cc
new file mode 100644
index 000000000..bfed7ff2b
--- /dev/null
+++ b/runtime/onert/core/src/compiler/BackendManager.cc
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/BackendManager.h"
+
+#include <memory>
+#include <dlfcn.h>
+
+#include "backend/Backend.h"
+#include "backend/IConfig.h"
+#include "util/logging.h"
+#include "util/ConfigSource.h"
+#include "misc/string_helpers.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+BackendManager &BackendManager::get()
+{
+ static BackendManager object;
+ return object;
+}
+
+template <typename T, class... Types>
+void BackendManager::loadObjectFromPlugin(std::shared_ptr<T> &object_of_plugin_class,
+ const std::string obj_creator_func_name, void *handle,
+ Types &&... args)
+{
+ T *(*allocate_obj)(Types && ... Args);
+ // load object creator function
+ allocate_obj = (T * (*)(Types && ... Args))dlsym(handle, obj_creator_func_name.c_str());
+ if (allocate_obj == nullptr)
+ {
+ fprintf(stderr, "BackendManager: unable to open function %s: %s\n",
+ obj_creator_func_name.c_str(), dlerror());
+ abort();
+ }
+
+ object_of_plugin_class.reset(allocate_obj(args...));
+}
+
+void BackendManager::loadBackend(const std::string &backend)
+{
+ if (get(backend) != nullptr)
+ {
+ return;
+ }
+
+ const std::string backend_plugin = "libbackend_" + backend + ".so";
+ void *handle = dlopen(backend_plugin.c_str(), RTLD_LAZY | RTLD_LOCAL);
+ if (handle == nullptr)
+ {
+ VERBOSE(BackendManager::loadBackend) << "loadBackend failed to load plugin of "
+ << backend.c_str() << " backend: " << dlerror()
+ << std::endl;
+ return;
+ }
+
+ VERBOSE(BackendManager::loadBackend) << "loaded " << backend_plugin << " as a plugin of "
+ << backend << " backend\n";
+
+ {
+ // load object creator function
+ auto backend_create = (backend_create_t)dlsym(handle, "onert_backend_create");
+ if (backend_create == nullptr)
+ {
+ fprintf(stderr, "BackendManager: unable to open function onert_backend_create : %s\n",
+ dlerror());
+ abort();
+ }
+
+ // load object creator function
+ auto backend_destroy = (backend_destroy_t)dlsym(handle, "onert_backend_destroy");
+ if (backend_destroy == nullptr)
+ {
+ fprintf(stderr, "BackendManager: unable to open function onert_backend_destroy : %s\n",
+ dlerror());
+ abort();
+ }
+
+ auto backend_object =
+ std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy);
+ auto backend_object_raw = backend_object.get();
+ bool initialized = backend_object->config()->initialize(); // Call initialize here?
+ if (!initialized)
+ {
+ VERBOSE(BackendManager::loadBackend)
+ << backend.c_str() << " backend initialization failed. Don't use this backend"
+ << std::endl;
+ dlclose(handle);
+ return;
+ }
+ _gen_map.emplace(backend_object->config()->id(), std::move(backend_object));
+ _available_backends.push_back(backend_object_raw);
+ }
+
+ // Save backend handle (avoid warning by handle lost without dlclose())
+ auto u_handle = std::unique_ptr<void, dlhandle_destroy_t>{handle, [](void *h) { dlclose(h); }};
+ _handle_map.emplace(backend, std::move(u_handle));
+}
+
+backend::Backend *BackendManager::get(const std::string &key)
+{
+ if (_gen_map.find(key) != _gen_map.end())
+ {
+ return _gen_map.at(key).get();
+ }
+
+ return nullptr;
+}
+
+const backend::Backend *BackendManager::get(const std::string &key) const
+{
+ if (_gen_map.find(key) != _gen_map.end())
+ {
+ return _gen_map.at(key).get();
+ }
+
+ return nullptr;
+}
+
+const backend::Backend *BackendManager::getDefault() const { return get("cpu"); }
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/BackendResolver.cc b/runtime/onert/core/src/compiler/BackendResolver.cc
new file mode 100644
index 000000000..a47d8d2d5
--- /dev/null
+++ b/runtime/onert/core/src/compiler/BackendResolver.cc
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/BackendResolver.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/CachedDataDeleter.h b/runtime/onert/core/src/compiler/CachedDataDeleter.h
new file mode 100644
index 000000000..73f00ced7
--- /dev/null
+++ b/runtime/onert/core/src/compiler/CachedDataDeleter.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_CACHED_DATA_DELETER_H__
+#define __ONERT_COMPILER_CACHED_DATA_DELETER_H__
+
+#include "ir/Index.h"
+#include "ir/OperationVisitor.h"
+#include "ir/OpSequences.h"
+#include "ir/Operands.h"
+#include "util/logging.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+class CachedDataDeleter : public ir::OperationVisitor
+{
+public:
+ CachedDataDeleter(ir::Operands &operands) : _operands(operands)
+ {
+ // DO NOTHING
+ }
+
+ virtual ~CachedDataDeleter() = default;
+
+public:
+ void run()
+ {
+ _operands.iterate(
+ [&](const ir::OperandIndex &ind, const ir::Operand &) { deleteCachedData(ind); });
+ }
+
+ void run(const ir::OpSequence &op_seq)
+ {
+ for (const auto &e : op_seq.operations())
+ {
+ const auto &node = *(e.node);
+ node.accept(*this);
+ }
+ }
+
+ // NOTE: Almost layers that have the big size constants are conv and fc.
+ void visit(const ir::operation::Conv2D &node) override
+ {
+ using ir::operation::Conv2D;
+ const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
+ deleteCachedData(ker_index);
+ deleteCachedData(bias_index);
+ }
+
+ void visit(const ir::operation::DepthwiseConv2D &node) override
+ {
+ using ir::operation::DepthwiseConv2D;
+ const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
+ deleteCachedData(ker_index);
+ deleteCachedData(bias_index);
+ }
+
+ void visit(const ir::operation::FullyConnected &node) override
+ {
+ using ir::operation::FullyConnected;
+ const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+ const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+ deleteCachedData(weight_index);
+ deleteCachedData(bias_index);
+ }
+
+private:
+ void deleteCachedData(const ir::OperandIndex &ind)
+ {
+ auto &obj = _operands.at(ind);
+ if (obj.isConstant())
+ {
+ assert(obj.data() != nullptr);
+ obj.releaseData();
+ }
+ }
+
+private:
+ ir::Operands &_operands;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_CACHED_DATA_DELETER_H__
diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc
new file mode 100644
index 000000000..85af843ae
--- /dev/null
+++ b/runtime/onert/core/src/compiler/Compiler.cc
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/Compiler.h"
+
+#include "ParamChecker.h"
+#include "ExecutorFactory.h"
+#include "OperationValidator.h"
+
+#include "compiler/BackendManager.h"
+#include "compiler/IScheduler.h"
+#include "compiler/ManualScheduler.h"
+#include "compiler/HEScheduler.h"
+#include "exec/ExecTime.h"
+#include "ir/operation/LowerInfo.h"
+#include "dumper/dot/DotDumper.h"
+#include "compiler/Linear.h"
+#include "interp/InterpExecutor.h"
+#include "util/ConfigSource.h"
+#include "ir/OperationDumper.h"
+#include "compiler/CachedDataDeleter.h"
+#include "misc/string_helpers.h"
+
+namespace onert
+{
+
+namespace compiler
+{
+
+CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Graph &graph)
+{
+ CompilerOptions options;
+
+ options.backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
+
+ options.trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
+ options.graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
+ options.op_seq_max_node = util::getConfigInt(util::config::OP_SEQ_MAX_NODE);
+ options.executor = util::getConfigString(util::config::EXECUTOR);
+ options.he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER);
+ options.he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE);
+ options.delete_cached_data = util::getConfigBool(util::config::DELETE_CACHED_DATA);
+ options.disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE);
+
+ {
+ // Backend for all
+ auto &ms_options = options.manual_scheduler_options;
+
+ // Default value for op_backend_all is first element in the backend list
+ ms_options.backend_for_all = util::getConfigString(util::config::OP_BACKEND_ALLOPS);
+
+// Opcode to Backend
+#define OP(OpName) \
+ { \
+ const auto &backend_str = util::getConfigString(util::config::OP_BACKEND_##OpName); \
+ if (!backend_str.empty()) \
+ { \
+ ms_options.opcode_to_backend[ir::OpCode::OpName] = backend_str; \
+ } \
+ }
+#include "ir/Operations.lst"
+#undef OP
+
+ // Index to Backend
+ auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP);
+ auto key_val_list = nnfw::misc::split(map_str, ';');
+ for (const auto &key_val_str : key_val_list)
+ {
+ if (key_val_str.empty())
+ {
+ continue;
+ }
+
+ auto key_val = nnfw::misc::split(key_val_str, '=');
+ const auto &key_str = key_val.at(0);
+ const auto &val = key_val.at(1);
+ auto key = static_cast<uint32_t>(std::stoi(key_str));
+
+ graph.operations().at(ir::OperationIndex{key}); // Check if exist, or this wil throw
+ ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val);
+ }
+ }
+ return options;
+}
+
+Compiler::Compiler(const std::shared_ptr<ir::Graph> &graph)
+ : _graph{graph}, _executor{nullptr}, _state{State::CREATED}
+{
+
+ // Set default values for CompilerOptions
+ // All these default values should not be fetched from Env, when we stop supporting Android NN
+ // API.
+ _options = fetchCompilerOptionsFromGlobalConfig(*_graph);
+}
+
+void Compiler::checkProfilerConditions()
+{
+ if (!_options.he_scheduler)
+ throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling.");
+
+ if (_options.executor != "Dataflow")
+ throw std::runtime_error("Profiling mode works only with 'Dataflow' executor");
+}
+
+void Compiler::compile(void)
+{
+ _state = State::STARTED;
+
+ /***************************************************
+ * Prepare compilation phase
+ ***************************************************/
+
+ // Operation validation check
+ OperationValidator{*_graph}();
+
+ // Compilable check
+ if (!checkCompilable())
+ {
+ _executor = std::make_shared<interp::InterpExecutor>(*_graph);
+ return;
+ }
+
+ // Mode check
+ if (_options.he_profiling_mode)
+ checkProfilerConditions();
+
+ /***************************************************
+ * Backend independent analysis & optimization phase
+ ***************************************************/
+ auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options.graph_dump_level);
+
+ onert::dumper::dot::DotDumper dot_dumper(*_graph, dump_level);
+ dot_dumper.dump("before_lower");
+
+ // Lower: Assign backend
+ auto lowered_graph = std::make_unique<ir::LoweredGraph>(*_graph, _options);
+
+ // NOTE. Current datas' reference of constant operands is 2 because of
+ // original graph and lowered graph.
+ // To delete cached data, this doing should be done for the original graph
+ // at this line and then once again for the lowered graph in ExecutorFactory
+ // TODO. Delete this code as code for disconnecting btw Graph and nnfw session lands
+ if (_options.delete_cached_data)
+ {
+ CachedDataDeleter(_graph->operands()).run();
+ }
+
+ auto indexed_ranks = lowered_graph->indexed_ranks();
+
+ /*************************************************************
+ * Backend independent analysis & optimization phase finished
+ *************************************************************/
+
+ _state = State::LOWERED;
+
+ onert::dumper::dot::DotDumper dot_dumper_lowered(lowered_graph.get(), dump_level);
+ dot_dumper_lowered.dump("after_lower");
+
+ ir::OperationDumper dumper;
+ _graph->operations().iterate(
+ [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
+
+ _executor = std::shared_ptr<exec::IExecutor>{
+ ExecutorFactory::get().create(std::move(lowered_graph), _options)};
+ _executor->setIndexedRanks(indexed_ranks);
+ /********************************
+ * Code generation phase finished
+ ********************************/
+ _state = State::COMPILED;
+}
+
+bool Compiler::checkCompilable()
+{
+ // Disable compile phase
+ // When ready to use interpreter backend, remove this config and use backend setting
+ if (_options.disable_compile)
+ {
+ return false;
+ }
+
+ // TODO check unspecified operand shape
+
+ // Check compilable parameter
+ ParamChecker paramChecker{_graph};
+ paramChecker();
+ if (paramChecker.haveNoneConstParam())
+ {
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace compiler
+
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc
new file mode 100644
index 000000000..8d06d6bbe
--- /dev/null
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -0,0 +1,379 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExecutorFactory.h"
+
+#include <functional>
+#include "exec/ExecutionObservers.h"
+#include "exec/LinearExecutor.h"
+#include "exec/DataflowExecutor.h"
+#include "exec/ParallelExecutor.h"
+#include "compiler/BackendManager.h"
+#include "compiler/ExecutionBuilder.h"
+#include "exec/ExecTime.h"
+#include "compiler/Linear.h"
+#include "backend/IConstantInitializer.h"
+#include "backend/IKernelGenerator.h"
+#include "backend/IShapeFixer.h"
+#include "backend/IOptimizer.h"
+#include "backend/ITensorRegister.h"
+#include <memory>
+#include "compiler/CachedDataDeleter.h"
+#include "util/ShapeInference.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+ExecutorFactory &ExecutorFactory::get()
+{
+ static ExecutorFactory singleton;
+ return singleton;
+}
+
+ExecutorFactory::ExecutorFactory()
+{
+ _map["Linear"] = createLinearExecutor;
+ _map["Dataflow"] =
+ std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2, false);
+ _map["Parallel"] =
+ std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2, true);
+}
+
+exec::IExecutor *ExecutorFactory::create(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ const compiler::CompilerOptions &options)
+{
+ return _map.at(options.executor)(std::move(lowered_graph), options);
+}
+
+void ExecutorFactory::initializeBackendContext(ir::LoweredGraph *lowered_graph)
+{
+ struct Entry
+ {
+ std::vector<backend::BackendContext::OperationInfo> operation_list;
+ std::vector<ir::OperandIndex> operand_list;
+ };
+ std::unordered_map<const backend::Backend *, Entry> backend_assets;
+
+ // Build lists for operations
+ lowered_graph->op_seqs().iterate(
+ [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
+ auto &op_seq_li = lowered_graph->getLowerInfo()->operation;
+ auto backend = op_seq_li.at(op_seq_index)->backend();
+ for (auto &element : op_seq.operations())
+ {
+ backend_assets[backend].operation_list.emplace_back(element.index, op_seq.getLayout());
+ }
+ });
+
+ // Build lists for operands
+ lowered_graph->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+ const auto lower_info = lowered_graph->getLowerInfo(ind);
+ for (auto factor : lower_info->def_factors())
+ {
+ auto backend = factor.backend();
+ backend_assets[backend].operand_list.emplace_back(ind);
+ }
+ });
+
+ for (auto &pair : backend_assets)
+ {
+ auto backend = pair.first;
+ auto &arg = pair.second;
+ lowered_graph->backend_contexts().at(backend)->initialize(arg.operation_list, arg.operand_list);
+ }
+}
+
+void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph,
+ const std::vector<ir::OpSequenceIndex> &order)
+{
+ for (const auto index : order)
+ {
+ const auto &op_seq = lowered_graph->op_seqs().at(index);
+ const auto backend = lowered_graph->getLowerInfo(index)->backend();
+ const auto tensor_register = lowered_graph->backend_contexts().at(backend)->tensor_register;
+ auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
+ if (tensor_register)
+ {
+ // Custom registration
+ tensor_register->registerTensors(op_seq, lowered_graph->getLowerInfo());
+ }
+ else
+ {
+ // Default registration
+ for (const auto elem : op_seq)
+ {
+ const auto &op = *elem.node;
+ for (const auto &index : op.getInputs() + op.getOutputs())
+ {
+ if (!tensor_builder->isRegistered(index))
+ {
+ const auto &operand_lower_info =
+ lowered_graph->getLowerInfo(index)->def_factors().getOnlyElement();
+ const auto &obj = lowered_graph->graph().operands().at(index);
+ const auto frontend_layout = op_seq.getLayout();
+ const auto backend_layout = operand_lower_info.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout,
+ obj.isConstant());
+ }
+ }
+ }
+ }
+ }
+}
+
+exec::IExecutor *
+ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ const compiler::CompilerOptions &options)
+{
+ const auto &backend_contexts = lowered_graph->backend_contexts();
+
+ initializeBackendContext(lowered_graph.get());
+
+ // linearize
+ assert(!lowered_graph->graph().isBuildingPhase());
+
+ // Shape inference.
+ {
+ shape_inference::StaticInferer inferer(lowered_graph->graph().operands());
+ lowered_graph->op_seqs().iterate(
+ [&](const ir::OpSequenceIndex &, const ir::OpSequence &op_seq) { inferer.infer(op_seq); });
+ }
+
+ for (auto &pair : backend_contexts)
+ {
+ pair.second->fixShapes();
+ }
+
+ /*************************************************
+ * Backend dependent analysis & optimization phase
+ *************************************************/
+
+ for (auto &pair : backend_contexts)
+ {
+ auto &optimizer = pair.second->optimizer;
+ if (optimizer)
+ optimizer->optimize();
+ }
+
+ /**********************************************************
+ * Backend dependent analysis & optimization phase finished
+ **********************************************************/
+
+ /***********************
+ * Code generation phase
+ ***********************/
+
+ auto order = Linear::linearize(*lowered_graph);
+ runTensorRegistration(lowered_graph.get(), order);
+ Linear::dump(*lowered_graph, order);
+ Linear::planTensors(*lowered_graph, order);
+
+ backend::TensorBuilderSet tensor_builders;
+ for (const auto &e : lowered_graph->backend_contexts())
+ {
+ tensor_builders.insert(e.second->tensor_builder);
+ }
+
+ for (auto &tensor_builder : tensor_builders)
+ {
+ tensor_builder->prepare();
+ }
+
+ ExecutionBuilder builder;
+
+ // Generate kernels
+ lowered_graph->op_seqs().iterate(
+ [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
+ auto lower_info = lowered_graph->getLowerInfo(op_seq_index);
+ auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen;
+ auto fn_seq = kernel_gen->generate(op_seq);
+ builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)});
+ });
+
+ for (auto &tensor_builder : tensor_builders)
+ {
+ tensor_builder->allocate();
+ }
+
+ for (auto &pair : backend_contexts)
+ {
+ pair.second->initConsts();
+ }
+
+ // Note. The best solution is not to use CachedDataDeleter but decreasing reference counts of data
+ // naturally
+ if (options.delete_cached_data)
+ {
+ CachedDataDeleter cached_data_deleter(lowered_graph->graph().operands());
+ lowered_graph->op_seqs().iterate(
+ [&](const ir::OpSequenceIndex &, const ir::OpSequence &op_seq) {
+ cached_data_deleter.run(op_seq);
+ });
+ }
+
+ auto code_map = builder.releaseCodeMap();
+
+ for (auto &it : code_map)
+ {
+ auto op_seq_index = it.first;
+ auto &fn_seq = it.second.fn_seq;
+
+ fn_seq->iterate([&](exec::IFunction &ifunc) {
+ ifunc.prepare();
+ auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend();
+ auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
+ tensor_builder->postFunctionPrepare();
+ });
+ }
+
+ auto exec = new exec::LinearExecutor{std::move(lowered_graph), tensor_builders,
+ std::move(code_map), order};
+
+ if (!options.trace_filepath.empty())
+ {
+ std::unique_ptr<exec::IExecutionObserver> ctp =
+ std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath);
+ exec->addObserver(std::move(ctp));
+ }
+
+ return exec;
+}
+
+exec::IExecutor *
+ExecutorFactory::createDataflowExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ const compiler::CompilerOptions &options, bool parallel)
+{
+ const auto &backend_contexts = lowered_graph->backend_contexts();
+
+ initializeBackendContext(lowered_graph.get());
+
+ for (auto &pair : backend_contexts)
+ {
+ pair.second->fixShapes();
+ }
+
+ auto order = Linear::linearize(*lowered_graph);
+ runTensorRegistration(lowered_graph.get(), order);
+
+ backend::TensorBuilderSet tensor_builders;
+ for (const auto &e : lowered_graph->backend_contexts())
+ {
+ tensor_builders.insert(e.second->tensor_builder);
+ }
+
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto &tensor_builder : tensor_builders)
+ {
+ lowered_graph->graph().operands().iterate(
+ [&](const ir::OperandIndex &ind, const ir::Operand &) {
+ if (tensor_builder->isRegistered(ind))
+ {
+ tensor_builder->notifyFirstUse(ind);
+ }
+ });
+ }
+
+ for (auto &tensor_builder : tensor_builders)
+ {
+ tensor_builder->prepare();
+ }
+
+ ExecutionBuilder builder;
+
+ // Generate kernels
+ lowered_graph->op_seqs().iterate(
+ [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
+ auto lower_info = lowered_graph->getLowerInfo(op_seq_index);
+ auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen;
+ auto fn_seq = kernel_gen->generate(op_seq);
+ builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)});
+ });
+
+ for (const auto &tensor_builder : tensor_builders)
+ {
+ tensor_builder->allocate();
+ }
+
+ for (auto &pair : backend_contexts)
+ {
+ pair.second->initConsts();
+ }
+
+ if (options.delete_cached_data)
+ {
+ CachedDataDeleter cached_data_deleter(lowered_graph->graph().operands());
+ lowered_graph->op_seqs().iterate(
+ [&](const ir::OpSequenceIndex &, const ir::OpSequence &op_seq) {
+ cached_data_deleter.run(op_seq);
+ });
+ }
+
+ auto code_map = builder.releaseCodeMap();
+
+ for (auto &it : code_map)
+ {
+ auto op_seq_index = it.first;
+ auto &fn_seq = it.second.fn_seq;
+
+ fn_seq->iterate([&](exec::IFunction &ifunc) {
+ ifunc.prepare();
+ auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend();
+ auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
+ tensor_builder->postFunctionPrepare();
+ });
+ }
+
+ exec::ExecutorBase *exec = nullptr;
+ if (parallel)
+ {
+ exec =
+ new exec::ParallelExecutor{std::move(lowered_graph), tensor_builders, std::move(code_map)};
+ }
+ else
+ {
+ auto dataflow_exec =
+ new exec::DataflowExecutor{std::move(lowered_graph), tensor_builders, std::move(code_map)};
+ if (options.he_profiling_mode)
+ {
+ std::vector<const backend::Backend *> backends;
+ for (const auto &pair : backend_contexts)
+ {
+ backends.push_back(pair.first);
+ }
+ auto et = std::make_shared<exec::ExecTime>(backends);
+ std::unique_ptr<exec::IExecutionObserver> obs = std::make_unique<exec::ProfileObserver>(et);
+ dataflow_exec->addObserver(std::move(obs));
+ dataflow_exec->setProfilingMode(true);
+ }
+ exec = dataflow_exec;
+ }
+
+ if (!options.trace_filepath.empty())
+ {
+ std::unique_ptr<exec::IExecutionObserver> ctp =
+ std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath);
+ exec->addObserver(std::move(ctp));
+ }
+
+ return exec;
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h
new file mode 100644
index 000000000..deba18d5e
--- /dev/null
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_EXECUTOR_FACTORY_H__
+#define __ONERT_COMPILER_EXECUTOR_FACTORY_H__
+
+#include <unordered_map>
+
+#include "exec/IExecutor.h"
+#include "ir/LoweredGraph.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+class ExecutorFactory
+{
+public:
+ static ExecutorFactory &get();
+
+public:
+ exec::IExecutor *create(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ const compiler::CompilerOptions &options);
+
+private:
+ ExecutorFactory();
+
+private:
+ static void initializeBackendContext(ir::LoweredGraph *lowered_graph);
+ static void runTensorRegistration(ir::LoweredGraph *lowered_graph,
+ const std::vector<ir::OpSequenceIndex> &order);
+ static exec::IExecutor *createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ const compiler::CompilerOptions &options);
+ static exec::IExecutor *createDataflowExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ const compiler::CompilerOptions &options,
+ bool parallel);
+
+private:
+ std::unordered_map<std::string,
+ std::function<exec::IExecutor *(std::unique_ptr<ir::LoweredGraph>,
+ const compiler::CompilerOptions &options)>>
+ _map;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_EXECUTOR_FACTORY_H__
diff --git a/runtime/onert/core/src/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.cc
new file mode 100644
index 000000000..f9a264908
--- /dev/null
+++ b/runtime/onert/core/src/compiler/HEScheduler.cc
@@ -0,0 +1,615 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Operand.h"
+#include "compiler/HEScheduler.h"
+#include "ir/Graph.h"
+#include "util/ConfigSource.h"
+#include "compiler/BackendResolver.h"
+#include "backend/IShapeFixer.h"
+#include "util/logging.h"
+#include "util/Utils.h"
+#include "exec/FunctionSequence.h"
+#include <cassert>
+#include <cmath>
+#include <chrono>
+
+namespace onert
+{
+
+namespace compiler
+{
+static uint32_t getOperationsFlattenedIOSize(const ir::Graph &graph, const ir::Operation &node)
+{
+ uint32_t size = 0;
+ for (const auto &ind : node.getInputs() + node.getOutputs())
+ {
+ size += graph.operands().at(ind).info().total_size();
+ }
+ return size;
+}
+
+static bool isQuant(const ir::Graph &graph, const ir::Operation &node)
+{
+ for (const auto &input : node.getInputs())
+ {
+ const auto &obj = graph.operands().at(input);
+ if (obj.typeInfo().type() == ir::DataType::QUANT8_ASYMM)
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool isWorkaroundSkip(const ir::Graph &graph, const backend::Backend *backend,
+ const ir::Operation &node, bool quant)
+{
+ /* TODO: this is workaround, come up with better solution if have.
+ Adding exception in stage doesn't help. Because if there is a record for add without
+ broadcast, scheduling will select it since it doesn't distinguish broadcast and
+ non-broadcast like it does for quant non-quantized*/
+ if (backend->config()->id() == "cpu" &&
+ (node.opcode() == ir::OpCode::Add || node.opcode() == ir::OpCode::Sub ||
+ node.opcode() == ir::OpCode::Mul))
+ {
+ const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
+ /*Broadcasting isn't supported on CPU: no way to differ the existing exec_time record with and
+ * without broadcasting*/
+ if (!(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
+ {
+ return true;
+ }
+ }
+ /* TODO: this is workaround, come up with better solution if have.
+ Adding exception in stage doesn't help. Because if there is a record for Mul without
+ broadcast, scheduling will select it since it doesn't distinguish broadcast and
+ non-broadcast like it does for quant non-quantized*/
+ else if (backend->config()->id() == "acl_neon" && node.opcode() == ir::OpCode::Mul)
+ {
+ const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
+
+ // Nontrivial broadcasting isn't supported yet
+ if (quant ||
+ !(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+// if a node can be merged into op_seq
+static bool isMergeable(const ir::Graph &graph, const ir::Operation &node)
+{
+ size_t prev_op_cnt = 0;
+ for (const auto &input : node.getInputs())
+ {
+ // only valid_inputs
+ const auto &operand = graph.operands().at(input);
+ if (operand.isConstant())
+ continue;
+
+ // This operand is output of operation, not weight or bias
+ if (operand.getDef().list().size() > 0)
+ ++prev_op_cnt;
+
+ // Current node has multiple inputs as concat or at the beginning of the separated branch
+ if (prev_op_cnt > 1 || operand.getUses().list().size() > 1)
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+void HEScheduler::scheduleShufflingBackends()
+{
+ VERBOSE(HEScheduler::schedule)
+ << "Started task scheduling: uses all backends to get more metrics for data transfer"
+ << std::endl;
+ size_t backend_ind = 0;
+ for (const auto &rank : _rank_to_op)
+ {
+ VERBOSE(HEScheduler::schedule) << "scheduling (" << rank.second.value() << ")" << std::endl;
+ const auto &node = _graph->operations().at(rank.second);
+ const bool quant = isQuant(*_graph, node);
+ const auto size = getOperationsFlattenedIOSize(*_graph, node);
+ for (size_t i = 0;; ++i)
+ {
+ if (i == _all_backends.size())
+ {
+ // wasn't able to find backend
+ assert(false);
+ break;
+ }
+ if (backend_ind == _all_backends.size())
+ {
+ backend_ind = 0;
+ }
+ if (isWorkaroundSkip(*_graph, _all_backends[backend_ind], node, quant))
+ {
+ ++backend_ind;
+ continue;
+ }
+ const auto exec_time =
+ _exec_time->getOperationExecTime(_all_backends[backend_ind], node.name(), quant, size);
+ // Scheduling to measure data transfer must be done after measuring all backends separately
+ assert(exec_time != _exec_time->NOT_FOUND);
+ if (exec_time == _exec_time->getMax())
+ {
+ ++backend_ind;
+ continue;
+ }
+ _backend_resolver->setBackend(rank.second, _all_backends[backend_ind]);
+ VERBOSE(HEScheduler::schedule) << "backend for " << node.name() << " is "
+ << _all_backends[backend_ind]->config()->id() << std::endl;
+ ++backend_ind;
+ break;
+ }
+ }
+}
+
+bool HEScheduler::isNodeProfiled(const ir::Operation &node)
+{
+ const bool quant = isQuant(*_graph, node);
+ const auto size = getOperationsFlattenedIOSize(*_graph, node);
+ for (const auto *backend : _all_backends)
+ {
+ const auto exec_time = _exec_time->getOperationExecTime(backend, node.name(), quant, size);
+ if (exec_time == _exec_time->NOT_FOUND)
+ return false;
+ }
+ return true;
+}
+
+void HEScheduler::scheduleBranch(const ir::OperationIndex &index,
+ ir::OperationIndexMap<bool> &scheduled)
+{
+ auto loc_index = index;
+ const backend::Backend *parent_backend = nullptr;
+ while (true)
+ {
+ if (scheduled[loc_index])
+ {
+ return;
+ }
+ if (!schedule(loc_index, parent_backend))
+ {
+ return;
+ }
+ scheduled[loc_index] = true;
+ parent_backend = _backend_resolver->getBackend(loc_index);
+
+ const auto &node = _graph->operations().at(loc_index);
+ /* get the only output operand, that is input of the next single operation
+ * and just this nodes output.*/
+ if (node.getOutputs().size() != 1)
+ {
+ return;
+ }
+ const auto &only_out_operand = _graph->operands().at(*node.getOutputs().begin());
+ loc_index = only_out_operand.getUses().list().front();
+ /* verify, that next node is neither beginning nor ending node of a branch*/
+ const auto &next_node = _graph->operations().at(loc_index);
+ if (!isMergeable(*_graph, next_node))
+ {
+ return;
+ }
+ }
+}
+
+std::unique_ptr<compiler::BackendResolver> HEScheduler::schedule(const ir::Graph &graph)
+{
+ _graph = &graph;
+ VERBOSE(HEScheduler::schedule) << "task scheduling started" << std::endl;
+ // Make ranks and save in descending order
+ makeRank();
+
+ for (const auto *backend : _all_backends)
+ {
+ _backends_avail_time.emplace(backend, std::map<int64_t, int64_t>{{0, 0}});
+ }
+
+ if (_is_profiling_mode)
+ {
+ // Check if profiling info about all backend/node pairs already exists
+ bool all_nodes_are_profiled = true;
+ _graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
+ if (all_nodes_are_profiled)
+ all_nodes_are_profiled = isNodeProfiled(op);
+ });
+
+ // If all nodes are already profiled - schedule backends in such order, so more profiling
+ // information about between-backends data transfer could be collected
+ if (all_nodes_are_profiled)
+ {
+ scheduleShufflingBackends();
+ VERBOSE(HEScheduler::schedule) << "task scheduling finished" << std::endl;
+ return std::move(_backend_resolver);
+ }
+ }
+
+ ir::OperationIndexMap<bool> visited;
+ graph.operations().iterate(
+ [&](const ir::OperationIndex &index, const ir::Operation &) { visited[index] = false; });
+ // for each task select the backend with the smallest earliest finishing time(eft)
+ for (const auto &rank : _rank_to_op)
+ {
+ scheduleBranch(rank.second, visited);
+ }
+ VERBOSE(HEScheduler::schedule) << "task scheduling finished" << std::endl;
+ return std::move(_backend_resolver);
+}
+
+int64_t HEScheduler::getOpTime(const backend::Backend *backend, const std::string &operation,
+ bool quant, uint32_t size)
+{
+ const auto time = _exec_time->getOperationExecTime(backend, operation, quant, size);
+ if (time != _exec_time->NOT_FOUND)
+ return time;
+
+ return _is_supported.at(backend).at(operation) ? 1 : _exec_time->getMax();
+}
+
+int64_t HEScheduler::getPermuteTime(const backend::Backend *src_backend,
+ const backend::Backend *dst_backend, bool quant, uint32_t size)
+{
+ const auto time = _exec_time->getPermuteTime(src_backend, dst_backend, quant, size);
+ if (time != _exec_time->NOT_FOUND)
+ return time;
+
+ // Makes the scheduler prefer keeping computations on one backend
+ return size / 200;
+}
+
+int64_t HEScheduler::tryBackend(const ir::Operation &node, const backend::Backend *backend)
+{
+ // if there is no profiling info don't use this backend during scheduling
+ if (!_is_profiling_mode)
+ {
+ VERBOSE(HEScheduler::tryBackend)
+ << "Trying to HE schedule while there is no profiling info for " << node.name()
+ << " on backend " << backend->config()->id() << ". So this backend won't be used. "
+ << std::endl;
+ _is_supported[backend][node.name()] = false;
+ return _exec_time->getMax();
+ }
+ auto iter = _is_supported.find(backend);
+ if (iter != _is_supported.end())
+ {
+ auto it2 = iter->second.find(node.name());
+ if (it2 != iter->second.end())
+ {
+ return _is_supported[backend][node.name()] ? 1 : _exec_time->getMax();
+ }
+ }
+ try
+ {
+ node.accept(*_backend_contexts.at(backend)->shape_fixer);
+
+ _is_supported[backend][node.name()] = true;
+ }
+ catch (std::runtime_error &e)
+ {
+ _is_supported[backend][node.name()] = false;
+ }
+ return _is_supported[backend][node.name()] ? 1 : _exec_time->getMax();
+}
+
+void HEScheduler::makeRank()
+{
+ VERBOSE(HEScheduler::makeRank) << "task prioritizing" << std::endl;
+
+ _graph->operations().iterate(
+ [&](const ir::OperationIndex &index, const ir::Operation &) { DFSMaxRank(index); });
+
+ // Check that ranks are calculated for all operations(nodes)
+ _graph->operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) {
+ UNUSED_RELEASE(index);
+ assert(_op_to_rank->find(index) != _op_to_rank->end());
+ });
+ VERBOSE(HEScheduler::makeRank) << "task prioritizing finished" << std::endl;
+}
+
+int64_t HEScheduler::DFSMaxRank(const ir::OperationIndex &index)
+{
+ auto op_to_rank_it = _op_to_rank->find(index);
+ if (op_to_rank_it != _op_to_rank->end())
+ return op_to_rank_it->second;
+
+ const auto &node = _graph->operations().at(index);
+ int64_t rank = 0;
+ const bool quant = isQuant(*_graph, node);
+ const auto size = getOperationsFlattenedIOSize(*_graph, node);
+ auto supported_backends_quantity = static_cast<int64_t>(_all_backends.size());
+
+ const auto max_child_rank = DFSChildrenMaxRank(index);
+
+ // get average exec time of this op
+ for (const auto &backend : _all_backends)
+ {
+ auto exec_time = _exec_time->getOperationExecTime(backend, node.name(), quant, size);
+ if (exec_time == _exec_time->NOT_FOUND)
+ {
+ exec_time = tryBackend(node, backend);
+ }
+ if (exec_time < _exec_time->getMax())
+ {
+ rank += exec_time;
+ }
+ else
+ {
+ // this operation isn't supported in this backend
+ --supported_backends_quantity;
+ }
+ }
+ if (supported_backends_quantity == 0)
+ {
+ throw std::runtime_error{"Encountered unsupported op: " + node.name()};
+ }
+ rank /= supported_backends_quantity;
+
+ // get standard deviation
+ int64_t std = 0;
+ for (const auto backend : _all_backends)
+ {
+ const auto exec_time = getOpTime(backend, node.name(), quant, size);
+ if (exec_time < _exec_time->getMax())
+ {
+ std += (exec_time - rank) * (exec_time - rank);
+ }
+ }
+ std /= supported_backends_quantity;
+ if (std > 0)
+ {
+ std = static_cast<int>(std::sqrt(std));
+ rank *= std;
+ }
+ rank += max_child_rank;
+
+ assert(rank >= 0);
+ _rank_to_op.emplace(rank, index);
+ _op_to_rank->emplace(index, rank);
+ VERBOSE(HEScheduler::DFSMaxRank) << "rank of operation (" << index.value() << ")" << node.name()
+ << " is " << rank << std::endl;
+
+ return rank;
+}
+
+int64_t HEScheduler::DFSChildrenMaxRank(const ir::OperationIndex &index)
+{
+ const auto &node = _graph->operations().at(index);
+ int64_t max_child_rank = 0;
+ for (const auto &output : node.getOutputs())
+ {
+ const auto &operand = _graph->operands().at(output);
+ const bool quant = operand.typeInfo().type() == ir::DataType::QUANT8_ASYMM;
+ // average data transfer cost of this operand's data
+ int64_t avg_transfer_cost = 1;
+ for (const auto *backend : _all_backends)
+ {
+ for (const auto *other_backend : _all_backends)
+ {
+ if (backend == other_backend)
+ {
+ continue;
+ }
+ auto transfer_cost =
+ getPermuteTime(backend, other_backend, quant, operand.info().total_size());
+ avg_transfer_cost += transfer_cost;
+ }
+ }
+ avg_transfer_cost /= _all_backends.size();
+ for (const auto &use : operand.getUses().list())
+ {
+ const auto cur_child_rank = DFSMaxRank(use);
+ max_child_rank = std::max(max_child_rank, cur_child_rank + avg_transfer_cost);
+ }
+ }
+ return max_child_rank;
+}
+
+int64_t HEScheduler::backendAvailableTime(const backend::Backend *backend,
+ const int64_t &starting_time, const int64_t &time_amount)
+{
+ const auto backend_times = _backends_avail_time.at(backend);
+ // finishing and starting times of an op, that will come after current op
+ auto next_op_fst = backend_times.upper_bound(starting_time);
+ // finishing time of an op, that will come before current op
+ auto prev_op_ft = starting_time;
+ // until reach the "hole/gap", that is enough to run this op
+ while (next_op_fst != backend_times.end() && next_op_fst->second - prev_op_ft <= time_amount)
+ {
+ prev_op_ft = next_op_fst->first + 1;
+ ++next_op_fst;
+ }
+ return prev_op_ft;
+}
+
+bool HEScheduler::schedule(const ir::OperationIndex &index, const backend::Backend *parent_backend)
+{
+ VERBOSE(HEScheduler::schedule) << "scheduling (" << index.value() << ")" << std::endl;
+ int64_t eft = std::numeric_limits<int64_t>::max(), selected_exec_time = 0;
+ const auto &node = _graph->operations().at(index);
+
+ std::multimap<int64_t, int64_t> selected_transfer_st_exec_time;
+ // select the backend with the smallest eft of this task
+ const backend::Backend *chosen_backend = nullptr;
+ for (const auto *backend : _all_backends)
+ {
+ std::multimap<int64_t, int64_t> transfer_st_exec_time;
+ const auto est_and_et = ESTAndExecTime(backend, index, transfer_st_exec_time);
+
+ if (eft > est_and_et.first + est_and_et.second)
+ {
+ eft = est_and_et.first + est_and_et.second;
+ selected_exec_time = est_and_et.second;
+ chosen_backend = backend;
+ selected_transfer_st_exec_time = transfer_st_exec_time;
+ }
+ }
+
+ if (chosen_backend == nullptr)
+ {
+ throw std::runtime_error{"Fail to choose backend on scheduler"};
+ }
+
+ // this is part of a branch and it is assigned another backend
+ if (parent_backend && parent_backend != chosen_backend)
+ {
+ return false;
+ }
+ for (const auto &it : selected_transfer_st_exec_time)
+ {
+ auto prev_op_ft = backendAvailableTime(_cpu_backend, it.first, it.second);
+ _backends_avail_time[_cpu_backend].insert({prev_op_ft + it.second, prev_op_ft});
+ }
+
+ _ops_eft[index] = eft;
+ _backends_avail_time[chosen_backend].emplace(eft, eft - selected_exec_time);
+ _backend_resolver->setBackend(index, chosen_backend);
+
+ VERBOSE(HEScheduler::schedule) << "backend for " << node.name() << " is "
+ << chosen_backend->config()->id() << ". Its eft: " << eft
+ << std::endl;
+ return true;
+}
+
+std::pair<int64_t, int64_t>
+HEScheduler::ESTAndExecTime(const backend::Backend *backend, const ir::OperationIndex &index,
+ std::multimap<int64_t, int64_t> &transfer_st_exec_time)
+{
+ // Permutation will cause creating a separate op_seq that contains just this permutation node.
+ // This isn't needed for Linear executor since it doesn't use op_seqs
+ // Number 1 ms is picked experimentally
+ int64_t permute_fine = 1000;
+ // Multiply cpu operations' exec time by 2 because in parallel executor it might be busy with
+ // permutation on other branches or non-nnfw specific tasks and have to wait for it.
+ // Number 2 is picked experimentally
+ const int64_t CPU_DELAY = 2;
+ const auto &node = _graph->operations().at(index);
+ const bool quant = isQuant(*_graph, node);
+ const auto size = getOperationsFlattenedIOSize(*_graph, node);
+ // if this node can be part of a op_seq, then assigning different backend will cause creating
+ // another op_seq
+ if (isMergeable(*_graph, node))
+ {
+ permute_fine *= 2;
+ }
+ if (isWorkaroundSkip(*_graph, backend, node, quant))
+ {
+ return {_exec_time->getMax(), _exec_time->getMax()};
+ }
+ // get average exec time of the op on this backend
+ auto exec_time = getOpTime(backend, node.name(), quant, size);
+ if (backend->config()->id() == "cpu" && _is_parallel_exec)
+ {
+ exec_time *= CPU_DELAY;
+ }
+
+ // get max eft of direct (one level above) predecessors
+ auto max_pred_eft = predMaxEFT(backend, node, transfer_st_exec_time);
+
+ int64_t total_transfer_cost = 0;
+ std::vector<std::multimap<int64_t, int64_t>::iterator> inserted_permutations;
+ // Find free time for data transferring and insert it into backend taskset. This is needed:
+ // 1. Time for multiple permutations for this node's input is found correctly
+ // 2. If backend==cpu, then free time for this node must come after permutations
+ for (auto &it : transfer_st_exec_time)
+ {
+ if (_is_parallel_exec)
+ {
+ it.second *= CPU_DELAY;
+ }
+ if (!_is_linear_exec)
+ {
+ it.second += permute_fine;
+ }
+ total_transfer_cost += it.second;
+
+ const auto prev_op_ft = backendAvailableTime(_cpu_backend, it.first, it.second);
+
+ max_pred_eft = std::max(max_pred_eft, prev_op_ft + it.second);
+
+ const auto tmp = _backends_avail_time[_cpu_backend].emplace(prev_op_ft + it.second, prev_op_ft);
+ inserted_permutations.push_back(tmp.first);
+ }
+ // find the hole/gap, where this op can be put or the finishing time of the last assigned op
+ auto prev_op_ft = backendAvailableTime(backend, max_pred_eft, exec_time);
+
+ // Remove inserted permutation from cpu's task set
+ for (const auto &it : inserted_permutations)
+ {
+ _backends_avail_time[_cpu_backend].erase(it);
+ }
+
+ /* In case non-parallel executor measure just exec time and data transfer time
+ * because EFT(prev_op_ft) is the same for all backends. Since two operations
+ * can't be run simultaneously, finish of running operation must be waited for.
+ * When an operation starts, all backends are free. So, they need time just for
+ * data transfer.*/
+ if (!_is_parallel_exec)
+ {
+ VERBOSE(HEScheduler::ESTAndExecTime)
+ << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on "
+ << backend->config()->id() << " is " << exec_time
+ << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl;
+
+ return {total_transfer_cost, exec_time};
+ }
+ VERBOSE(HEScheduler::ESTAndExecTime)
+ << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on "
+ << backend->config()->id() << ": " << exec_time
+ << " microseconds. Backend available time: " << prev_op_ft
+ << " Parent's max eft: " << max_pred_eft - total_transfer_cost
+ << " data transfer cost: " << total_transfer_cost << std::endl;
+
+ return {prev_op_ft, exec_time};
+}
+
+int64_t HEScheduler::predMaxEFT(const backend::Backend *backend, const ir::Operation &node,
+ std::multimap<int64_t, int64_t> &transfer_st_exec_time)
+{
+ int64_t max_pred_eft = 0;
+ for (const auto &input_operand_idx : node.getInputs())
+ {
+ const auto &input_operand = _graph->operands().at(input_operand_idx);
+ const bool quant = input_operand.typeInfo().type() == ir::DataType::QUANT8_ASYMM;
+
+ for (const auto &input_node_idx : input_operand.getDef().list())
+ {
+ // Data transfer cost from parent's node backend to current node's backend:
+ auto parent_backend = _backend_resolver->getBackend(input_node_idx);
+
+ max_pred_eft = std::max(max_pred_eft, _ops_eft.at(input_node_idx));
+ if (parent_backend != backend)
+ {
+ // Multiply operand size by 2 because size must describe input+output size
+ int64_t transfer_cost =
+ getPermuteTime(parent_backend, backend, quant, input_operand.info().total_size() * 2);
+ transfer_st_exec_time.emplace(_ops_eft.at(input_node_idx), transfer_cost);
+ }
+ }
+ }
+ return max_pred_eft;
+}
+
+} // namespace compiler
+
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/HEScheduler.h b/runtime/onert/core/src/compiler/HEScheduler.h
new file mode 100644
index 000000000..dbd71d4cb
--- /dev/null
+++ b/runtime/onert/core/src/compiler/HEScheduler.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file HEScheduler.h
+ * @brief This file contains HEScheduler class to define and run task Heterogeneous Execution
+ * Scheduler
+ */
+
+#ifndef __ONERT_COMPILER_H_E_SCHEDULER_H_
+#define __ONERT_COMPILER_H_E_SCHEDULER_H_
+
+#include "compiler/IScheduler.h"
+#include "compiler/BackendManager.h"
+#include "compiler/Compiler.h"
+#include "ir/Graph.h"
+#include "exec/ExecTime.h"
+#include "backend/Backend.h"
+#include <memory>
+#include "ir/OperationIndexMap.h"
+#include <map>
+#include <memory>
+
+namespace onert
+{
+
+namespace compiler
+{
+/**
+ * @brief Class to schedule tasks
+ */
+class HEScheduler : IScheduler
+{
+public:
+ /**
+ * @brief Construct a new Heterogeneous Execution Scheduler object
+ * @param[in] model Graph model
+ * @param[in] backend_resolver backend resolver
+ */
+ HEScheduler(const backend::BackendContexts &backend_contexts, const CompilerOptions &options)
+ : _backend_contexts{backend_contexts}, _is_supported{}, _backends_avail_time{}, _ops_eft{},
+ _op_to_rank{std::make_shared<ir::OperationIndexMap<int64_t>>()},
+ _is_profiling_mode{options.he_profiling_mode},
+ _is_linear_exec{options.executor == "Linear"},
+ _is_parallel_exec{options.executor == "Parallel"}
+ {
+ for (auto &entry : backend_contexts)
+ {
+ _all_backends.push_back(entry.first);
+ }
+ _backend_resolver = std::make_unique<compiler::BackendResolver>();
+ _exec_time = std::make_unique<exec::ExecTime>(_all_backends);
+
+ // Find cpu backend
+ auto cpu_backend_it = std::find_if(
+ _all_backends.begin(), _all_backends.end(),
+ [](const backend::Backend *backend) { return backend->config()->id() == "cpu"; });
+ if (cpu_backend_it == _all_backends.end())
+ throw std::runtime_error("HEScheduler could be used only if 'cpu' backend is available");
+ _cpu_backend = *cpu_backend_it;
+ }
+
+public:
+ /**
+ * @brief Task scheduling
+ *
+ * @note The main idea is taken from HSIP algo:
+ * https://www.hindawi.com/journals/sp/2016/3676149/
+ */
+ std::unique_ptr<compiler::BackendResolver> schedule(const ir::Graph &graph) final;
+ std::shared_ptr<ir::OperationIndexMap<int64_t>> getIndexedRanks() { return _op_to_rank; }
+
+private:
+ bool isNodeProfiled(const ir::Operation &);
+
+ bool schedule(const ir::OperationIndex &, const backend::Backend *parent_backend);
+ /**
+ * @brief Get earliest starting time and execution time of an operation on a backend.
+ *
+ * @note Returns a time when operation's inputs are ready and backend is available
+ * It also returns exec time. If this is "cpu" backend, then exec_time*CPU_DELAY
+ *
+ * @param[in] backend: backend, for which to return the time
+ * @param[in] index: index of an operation
+ * @param[out] transfer_st_exec_time: est and exec time of data transfer operation
+ *
+ * @return earliest starting time and execution time
+ */
+ std::pair<int64_t, int64_t>
+ ESTAndExecTime(const backend::Backend *backend, const ir::OperationIndex &index,
+ std::multimap<int64_t, int64_t> &transfer_st_exec_time);
+ /**
+ * @brief Returns the latest finishing time of parents of a node.
+ *
+ * @param[in] backend: backend, for which to return the time
+ * @param[in] node: node to get eft of parents
+ * @param[out] transfer_st_exec_time: est and exec time of data transfer operation
+ *
+ * @return earliest finishing time of parent nodes
+ */
+ int64_t predMaxEFT(const backend::Backend *backend, const ir::Operation &node,
+ std::multimap<int64_t, int64_t> &transfer_st_exec_time);
+
+ void makeRank();
+
+ int64_t DFSMaxRank(const ir::OperationIndex &index);
+
+ int64_t DFSChildrenMaxRank(const ir::OperationIndex &index);
+ /**
+ * @brief Returns the time, when backend is available for at least given amount of time.
+ *
+ * @note Returns either hole/gap between two performing two already scheduled operations,
+ * or the finishing time of the last scheduled operation
+ *
+ * @param[in] backend backend, for which to return the time
+ * @param[in] starting_time time, starting which to look for gap
+ * @param[in] time_amount amount of the time, for which to look gap
+ *
+ * @return time, when backend has at least time_amount free time
+ */
+ int64_t backendAvailableTime(const backend::Backend *backend, const int64_t &starting_time,
+ const int64_t &time_amount);
+
+ int64_t getOpTime(const backend::Backend *backend, const std::string &operation, bool quant,
+ uint32_t size);
+
+ int64_t getPermuteTime(const backend::Backend *src_backend, const backend::Backend *dst_backend,
+ bool quant, uint32_t size);
+
+ void scheduleShufflingBackends();
+
+ int64_t tryBackend(const ir::Operation &node, const backend::Backend *backend);
+
+ /**
+ * @brief Schedule a node and its successor until:
+ * 1. there is no branching or connection of multiple branches
+ * 2. for subsequent nodes: other than predecessor's backend is prefered
+ *
+ * @param[in] index: index of an operation
+ * @param[in] scheduled: a map to check if this node has already been scheduled
+ *
+ * @return N/A
+ */
+ void scheduleBranch(const ir::OperationIndex &index, ir::OperationIndexMap<bool> &scheduled);
+
+private:
+ // This variable stores backend/node pairs with unknown execution time, and hints scheduler
+ // whether it should assign these backends to these nodes:
+ // * It stores false for unsupported nodes
+ // * During rank calculation with enabled profiling mode it stores true for supported nodes
+ const backend::BackendContexts &_backend_contexts;
+ std::unordered_map<const backend::Backend *, std::unordered_map<std::string, bool>> _is_supported;
+ // Finishing and starting time of each backend
+ std::unordered_map<const backend::Backend *, std::map<int64_t, int64_t>> _backends_avail_time;
+ ir::OperationIndexMap<int64_t> _ops_eft;
+ std::multimap<int64_t, ir::OperationIndex, std::greater<int64_t>> _rank_to_op;
+ std::shared_ptr<ir::OperationIndexMap<int64_t>> _op_to_rank;
+ std::unique_ptr<compiler::BackendResolver> _backend_resolver;
+ std::unique_ptr<exec::ExecTime> _exec_time;
+ const ir::Graph *_graph{nullptr};
+ std::vector<const backend::Backend *>
+ _all_backends; // TODO Remove this and use _backend_contexts instead
+ const backend::Backend *_cpu_backend{nullptr};
+ bool _is_profiling_mode;
+ bool _is_linear_exec;
+ bool _is_parallel_exec;
+};
+
+} // namespace compiler
+
+} // namespace onert
+
+#endif // __ONERT_COMPILER_H_E_SCHEDULER_H_
diff --git a/runtime/onert/core/src/compiler/IScheduler.h b/runtime/onert/core/src/compiler/IScheduler.h
new file mode 100644
index 000000000..5e9b9bd3c
--- /dev/null
+++ b/runtime/onert/core/src/compiler/IScheduler.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_CORE_COMPILER_I_SCHEDULER_H__
+#define __ONERT_CORE_COMPILER_I_SCHEDULER_H__
+
+#include "compiler/BackendResolver.h"
+#include "ir/Graph.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+struct IScheduler
+{
+ virtual ~IScheduler() = default;
+
+ virtual std::unique_ptr<BackendResolver> schedule(const ir::Graph &graph) = 0;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_CORE_COMPILER_I_SCHEDULER_H__
diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc
new file mode 100644
index 000000000..7c658de95
--- /dev/null
+++ b/runtime/onert/core/src/compiler/Linear.cc
@@ -0,0 +1,280 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+
+#include "Linear.h"
+
+#include "backend/IShapeFixer.h"
+#include "backend/IConfig.h"
+#include "backend/IConstantInitializer.h"
+#include "backend/ITensorRegister.h"
+#include "backend/Backend.h"
+#include "util/logging.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+std::vector<ir::OpSequenceIndex> Linear::linearize(const ir::LoweredGraph &lowered_graph)
+{
+ std::vector<ir::OpSequenceIndex> order;
+ {
+ const ir::Graph &graph = lowered_graph.graph();
+ const ir::OpSequences &op_seqs = lowered_graph.op_seqs();
+ const ir::Operands &operands = graph.operands();
+ // op_seqs can't access a op_seq by an operand so that input_to_op_seqs can offer it
+ std::unordered_map<ir::OperandIndex, std::list<ir::OpSequenceIndex>> input_to_op_seqs;
+
+ // Get the relations between input/op_seq to be used for dfs-post-iter
+ //
+ // [0] # input -> _input_to_op_seqs[0] = {OP_SEQS0}
+ // |
+ // [OP_SEQS0]
+ // |
+ // [1]---------. # input -> _input_to_op_seqs[1] = {OP_SEQS1, OP_SEQS2}
+ // | |
+ // [OP_SEQS1] [OP_SEQS2]
+ // | |
+ // [2] [3] # input -> _input_to_op_seqs[2] = {OP_SEQS3}
+ // \ / # input -> _input_to_op_seqs[3] = {OP_SEQS3}
+ // [OP_SEQS3]
+ // |
+ // [4]
+ op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_idx, const ir::OpSequence &op_seq) {
+ for (auto input : op_seq.getInputs())
+ {
+ // only valid_inputs
+ const auto &operand = operands.at(input);
+ if (operand.isConstant())
+ continue;
+
+ auto it = input_to_op_seqs.find(input);
+ if (it == input_to_op_seqs.end())
+ {
+ std::list<ir::OpSequenceIndex> list{op_seq_idx};
+ input_to_op_seqs[input] = list;
+ }
+ else
+ {
+ it->second.push_back(op_seq_idx);
+ }
+ }
+ });
+
+ std::unordered_map<ir::OpSequenceIndex, bool> visited;
+ op_seqs.iterate(
+ [&](const ir::OpSequenceIndex &index, const ir::OpSequence &) { visited[index] = false; });
+
+ std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> dfs_recursive =
+ [&](const ir::OpSequenceIndex &index, const ir::OpSequence &op_seq) -> void {
+ if (visited[index])
+ return;
+ visited[index] = true;
+
+ // The outputs should be not constants
+ for (auto output : op_seq.getOutputs())
+ {
+ const auto it = input_to_op_seqs.find(output);
+ if (it != input_to_op_seqs.end())
+ {
+ const auto &op_seq_index_list = it->second;
+ for (const auto &index : op_seq_index_list)
+ {
+ auto &op_seq = op_seqs.at(index);
+ dfs_recursive(index, op_seq);
+ }
+ }
+ }
+
+ order.emplace_back(index);
+ };
+
+ op_seqs.iterate(dfs_recursive);
+
+ // All of the nodes must have been visited.
+ assert(
+ std::all_of(visited.begin(), visited.end(),
+ [](const std::pair<const ir::OpSequenceIndex, bool> &v) { return v.second; }));
+
+ // NOTE. Now these op_seq are on the reverse order
+ std::reverse(order.begin(), order.end());
+ }
+ return order;
+}
+
+void Linear::dump(const ir::LoweredGraph &lowered_graph,
+ const std::vector<ir::OpSequenceIndex> &order)
+{
+ {
+ const auto &toString = [](const onert::backend::Backend *backend) {
+ assert(backend);
+ std::string str;
+ str += backend->config()->id();
+ return "{" + str + "}";
+ };
+
+ VERBOSE(Linear) << "Final OpSequence" << std::endl;
+ for (const auto index : order)
+ {
+
+ const auto &op_seq = lowered_graph.op_seqs().at(index);
+ const auto lower_info = lowered_graph.getLowerInfo(index);
+ VERBOSE(Linear) << "* OP_SEQ " << toString(lower_info->backend()) << " " << op_seq.getStr()
+ << std::endl;
+ }
+ }
+}
+
+void Linear::planTensors(const ir::LoweredGraph &lowered_graph,
+ const std::vector<ir::OpSequenceIndex> &order)
+{
+ const auto &graph = lowered_graph.graph();
+ ir::OperandIndexMap<std::shared_ptr<backend::ITensorBuilder>> tensor_builder_map;
+
+ ir::OperandIndexMap<uint32_t> uses_map;
+ ir::OperandIndexMap<uint32_t> def_map;
+ ir::OperandIndexSequence constants;
+
+ // Prepare scanning
+ graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+ const auto lower_info = lowered_graph.getLowerInfo(ind);
+ // TODO Remove if onert doesn't support anymore such as
+ // GeneratedTests.reshape_quant8_weights_as_inputs
+ if (lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0 &&
+ !graph.getInputs().contains(ind))
+ {
+ VERBOSE(LINEAR) << "Operand #" << ind.value() << " will not be used. no more process."
+ << std::endl;
+ return;
+ }
+
+ uses_map[ind] = obj.getUses().size();
+ def_map[ind] = obj.getDef().size(); // should be 1 or 0
+
+ bool is_const = obj.isConstant();
+ if (is_const)
+ {
+ constants.append(ind);
+ }
+
+ auto factor = lower_info->def_factors().getOnlyElement();
+ auto backend = factor.backend();
+ auto tensor_builder = lowered_graph.backend_contexts().at(backend)->tensor_builder;
+ if (!tensor_builder->isRegistered(ind))
+ {
+ // These tensors do not exist in any op_seq (No use and def)
+ const auto info = obj.info();
+ const auto backend_layout = factor.layout();
+ // TODO Change tensor info to have permuted shape
+ tensor_builder->registerTensorInfo(ind, info, backend_layout, is_const);
+ }
+
+ tensor_builder_map[ind] = tensor_builder;
+ });
+
+ // If a tensor is model output, increase the use of the tensor.
+ // This aim is same to above one.
+ for (const auto &ind : graph.getOutputs())
+ {
+ uses_map[ind]++;
+ }
+
+ // Start scanning to do notify{First|Last}Use for each tensor
+
+ // If a tensor is a constant, increase the use of the tensor.
+ // It makes the tensor not be dealloced. It means these will be deallocated last.
+ // And allocate constant operands first
+ VERBOSE(LINEAR) << "TENSORS as CONSTANT" << std::endl;
+ for (const auto &ind : constants)
+ {
+ uses_map[ind]++;
+ tensor_builder_map[ind]->notifyFirstUse(ind);
+ }
+
+ // Allocate Model's inputs
+ VERBOSE(LINEAR) << "TENSORS as MODEL INPUT" << std::endl;
+ for (const auto &ind : graph.getInputs())
+ {
+ auto tensor_builder = tensor_builder_map[ind];
+ if (!tensor_builder) // for GeneratedTests.xxx_weights_as_inputs
+ continue;
+ tensor_builder->notifyFirstUse(ind);
+ }
+
+ // At each operation,
+ // 1. Scan DEF of outputs. If the DEF, allocate it
+ // 2. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+ VERBOSE(LINEAR) << "TENSORS" << std::endl;
+ for (const auto op_seq_ind : order)
+ {
+ const auto &op_seq = lowered_graph.op_seqs().at(op_seq_ind);
+ for (const auto &op : op_seq.operations())
+ {
+ for (const auto &ind : op.node->getOutputs())
+ {
+ assert(def_map.find(ind) != def_map.end());
+ if (def_map[ind])
+ {
+ def_map[ind] = 0;
+ tensor_builder_map[ind]->notifyFirstUse(ind);
+ }
+ }
+
+ for (const auto &ind : op.node->getInputs())
+ {
+ assert(uses_map.find(ind) != uses_map.end());
+ assert(uses_map[ind] > 0);
+ uses_map[ind]--;
+ if (uses_map[ind] == 0)
+ {
+ tensor_builder_map[ind]->notifyLastUse(ind);
+ }
+ }
+ }
+ }
+
+ // Dispose and validate
+ for (const auto &ind : graph.getOutputs())
+ {
+ --uses_map[ind];
+ if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+ {
+ tensor_builder_map[ind]->notifyLastUse(ind);
+ }
+ }
+
+ for (const auto &ind : constants)
+ {
+ --uses_map[ind];
+ if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+ {
+ tensor_builder_map[ind]->notifyLastUse(ind);
+ }
+ }
+
+ assert(
+ std::all_of(uses_map.begin(), uses_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+ assert(
+ std::all_of(def_map.begin(), def_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/Linear.h b/runtime/onert/core/src/compiler/Linear.h
new file mode 100644
index 000000000..faeff77f3
--- /dev/null
+++ b/runtime/onert/core/src/compiler/Linear.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_LINEAR_H__
+#define __ONERT_COMPILER_LINEAR_H__
+
+#include <vector>
+#include <memory>
+
+#include "ir/OpSequences.h"
+#include "ir/Index.h"
+#include "backend/ITensorBuilder.h"
+#include "ir/LoweredGraph.h"
+
+namespace onert
+{
+namespace ir
+{
+struct OperationVisitor;
+} // namespace ir
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+
+class Linear
+{
+public:
+ static std::vector<ir::OpSequenceIndex> linearize(const ir::LoweredGraph &lowered_graph);
+ static void dump(const ir::LoweredGraph &lowered_graph,
+ const std::vector<ir::OpSequenceIndex> &order);
+ static void planTensors(const ir::LoweredGraph &lowered_graph,
+ const std::vector<ir::OpSequenceIndex> &order);
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_LINEAR_H__
diff --git a/runtime/onert/core/src/compiler/ManualScheduler.cc b/runtime/onert/core/src/compiler/ManualScheduler.cc
new file mode 100644
index 000000000..5a5aa8219
--- /dev/null
+++ b/runtime/onert/core/src/compiler/ManualScheduler.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ManualScheduler.h"
+#include "ir/OpCode.h"
+#include "ir/Operations.Include.h"
+#include "backend/Backend.h"
+#include "backend/IConfig.h"
+#include "compiler/BackendManager.h"
+#include "util/ConfigSource.h"
+#include "util/logging.h"
+#include "misc/string_helpers.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+ManualScheduler::ManualScheduler(const compiler::ManualSchedulerOptions &options)
+ : _options{options}
+{
+}
+
+std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &graph)
+{
+ auto backend_resolver = std::make_unique<compiler::BackendResolver>();
+
+ // 1. Backend for All operations
+ const backend::Backend *backend_all = BackendManager::get().get(_options.backend_for_all);
+ if (!backend_all)
+ {
+ backend_all = BackendManager::get().getAll().at(0);
+ }
+ VERBOSE(ManualScheduler) << "Default backend for all ops: " << _options.backend_for_all
+ << std::endl;
+
+ graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) {
+ backend_resolver->setBackend(index, backend_all);
+ });
+
+ // 2. Backend per operation type
+ std::unordered_map<ir::OpCode, backend::Backend *> op_type_map;
+ for (auto &pair : _options.opcode_to_backend)
+ {
+ op_type_map.emplace(pair.first, BackendManager::get().get(pair.second));
+ }
+ // By default, Custom uses cpu backend
+ op_type_map[ir::OpCode::Custom] = BackendManager::get().get("cpu");
+
+ graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &operation) {
+ auto itr = op_type_map.find(operation.opcode());
+ if (itr != op_type_map.end())
+ {
+ backend_resolver->setBackend(index, itr->second);
+ }
+ });
+
+ // 3. Backend per operation
+ for (auto &pair : _options.index_to_backend)
+ {
+ const auto &key = pair.first;
+ const auto &val = pair.second;
+
+ try
+ {
+ graph.operations().at(key); // Check if exist, or this will throw
+ backend_resolver->setBackend(key, BackendManager::get().get(val));
+ }
+ catch (...)
+ {
+ VERBOSE(ManualScheduler) << "Invalid value while OperationIndex to Backend mapping : @"
+ << key.value() << " -> \"" << val << "\"" << std::endl;
+ }
+ }
+
+ // 4. Operations that are specially handled
+ // All configuration above will be ignored(overwritten)
+ op_type_map[ir::OpCode::Permute] = BackendManager::get().get("cpu");
+
+ // Dump final assignment
+ backend_resolver->iterate([&](const ir::OperationIndex &index, const backend::Backend &backend) {
+ VERBOSE(ManualScheduler) << "backend for operation #" << index.value() << ": "
+ << backend.config()->id() << std::endl;
+ });
+
+ return backend_resolver;
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/ManualScheduler.h b/runtime/onert/core/src/compiler/ManualScheduler.h
new file mode 100644
index 000000000..961dd14b2
--- /dev/null
+++ b/runtime/onert/core/src/compiler/ManualScheduler.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_CORE_COMPILER_MANUAL_SCHEDULER_H__
+#define __ONERT_CORE_COMPILER_MANUAL_SCHEDULER_H__
+
+#include "IScheduler.h"
+#include "compiler/Compiler.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+class ManualScheduler : public IScheduler
+{
+public:
+ ManualScheduler(const compiler::ManualSchedulerOptions &options);
+ std::unique_ptr<BackendResolver> schedule(const ir::Graph &graph) override;
+
+private:
+ compiler::ManualSchedulerOptions _options;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_CORE_COMPILER_MANUAL_SCHEDULER_H__
diff --git a/runtime/onert/core/src/compiler/OperandContext.cc b/runtime/onert/core/src/compiler/OperandContext.cc
new file mode 100644
index 000000000..cce555e33
--- /dev/null
+++ b/runtime/onert/core/src/compiler/OperandContext.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperandContext.h"
+
+#include <cassert>
+
+namespace onert
+{
+namespace compiler
+{
+
+OperandContext &OperandContext::set(const ir::OperandIndex &id,
+ const std::shared_ptr<backend::ITensor> &tensor)
+{
+ // Only one tensor for an id
+ assert(_tensors.find(id) == _tensors.end());
+ _tensors[id] = tensor;
+ return (*this);
+}
+
+void OperandContext::iterate(
+ const std::function<void(const ir::OperandIndex &, backend::ITensor &)> &fn)
+{
+ for (auto &e : _tensors)
+ {
+ fn(e.first, *e.second);
+ }
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/OperandContext.h b/runtime/onert/core/src/compiler/OperandContext.h
new file mode 100644
index 000000000..390b376fe
--- /dev/null
+++ b/runtime/onert/core/src/compiler/OperandContext.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_OPERAND_CONTEXT_H__
+#define __ONERT_COMPILER_OPERAND_CONTEXT_H__
+
+#include "backend/ITensor.h"
+#include "ir/OperandIndexMap.h"
+#include <unordered_map>
+#include <memory>
+
+namespace onert
+{
+namespace compiler
+{
+
+class OperandContext
+{
+public:
+ OperandContext &set(const ir::OperandIndex &ind, const std::shared_ptr<backend::ITensor> &tensor);
+
+public:
+ bool exist(const ir::OperandIndex &ind) const { return _tensors.find(ind) != _tensors.end(); }
+
+public:
+ std::shared_ptr<backend::ITensor> at(const ir::OperandIndex &ind) const
+ {
+ return _tensors.at(ind);
+ }
+
+ std::shared_ptr<backend::ITensor> &at(const ir::OperandIndex &ind) { return _tensors.at(ind); }
+
+ void iterate(const std::function<void(const ir::OperandIndex &, backend::ITensor &)> &fn);
+
+private:
+ ir::OperandIndexMap<std::shared_ptr<backend::ITensor>> _tensors;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_OPERAND_CONTEXT_H__
diff --git a/runtime/onert/core/src/compiler/OperationValidator.cc b/runtime/onert/core/src/compiler/OperationValidator.cc
new file mode 100644
index 000000000..1368d11b9
--- /dev/null
+++ b/runtime/onert/core/src/compiler/OperationValidator.cc
@@ -0,0 +1,1079 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationValidator.h"
+
+#include <typeinfo>
+
+#include "ir/Graph.h"
+#include "ir/operation/LowerInfo.h"
+
+#include "util/logging.h"
+#include "util/Utils.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+OperationValidator::OperationValidator(const ir::Graph &graph)
+ : _graph{graph}, _ctx{graph.operands()}, _current_op_seq_layout{ir::Layout::UNKNOWN}
+{
+}
+
+void OperationValidator::operator()()
+{
+ // TODO Get frontend layout from graph
+ _current_op_seq_layout = ir::Layout::NHWC;
+
+ _graph.operations().iterate(
+ [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
+}
+
+void OperationValidator::visit(const ir::operation::Abs &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ UNUSED_RELEASE(output_index);
+ UNUSED_RELEASE(input_index);
+
+ assert(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+}
+
+void OperationValidator::visit(const ir::operation::AvgPool2D &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
+
+ UNUSED_RELEASE(ofm_index);
+ UNUSED_RELEASE(ifm_index);
+
+ assert(_ctx.at(ifm_index).shape().rank() == 4);
+}
+
+void OperationValidator::visit(const ir::operation::BatchToSpaceND &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
+ const auto block_size_index{
+ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
+ const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
+
+ UNUSED_RELEASE(input_shape);
+ UNUSED_RELEASE(output_shape);
+ UNUSED_RELEASE(block_size_index);
+
+ // All assertions as per NNAPI specification.
+ assert(_ctx.at(ifm_index).shape().rank() == 4);
+ assert(_ctx.at(ofm_index).shape().rank() == 4);
+ assert(_ctx.at(block_size_index).shape().rank() == 1);
+
+ assert(_ctx.at(block_size_index).shape().dim(0) == 2);
+
+ assert(_ctx.at(block_size_index).isConstant());
+
+ assert(input_shape.C == output_shape.C);
+}
+
+void OperationValidator::visit(const ir::operation::Cast &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ UNUSED_RELEASE(output_index);
+ UNUSED_RELEASE(input_index);
+
+ assert(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+}
+
+void OperationValidator::visit(const ir::operation::Comparison &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
+
+ UNUSED_RELEASE(output_index);
+ UNUSED_RELEASE(lhs_index);
+ UNUSED_RELEASE(rhs_index);
+
+ assert(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
+ assert(_ctx.at(output_index).typeInfo().type() == ir::DataType::BOOL8);
+}
+
+void OperationValidator::visit(const ir::operation::Softmax &node)
+{
+ VERBOSE(Softmax) << "Configure SOFTMAX operation" << std::endl;
+
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ UNUSED_RELEASE(output_index);
+ UNUSED_RELEASE(input_index);
+
+ assert(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
+}
+
+void OperationValidator::visit(const ir::operation::InstanceNorm &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
+ const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
+ const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
+
+ UNUSED_RELEASE(ofm_index);
+ UNUSED_RELEASE(ifm_index);
+ UNUSED_RELEASE(gamma_index);
+ UNUSED_RELEASE(beta_index);
+
+ assert(_ctx.at(ifm_index).shape().rank() == 4);
+ assert(_ctx.at(ifm_index).shape() == _ctx.at(ofm_index).shape());
+ assert(_ctx.at(gamma_index).shape().rank() == 1);
+ assert(_ctx.at(beta_index).shape().rank() == 1);
+}
+
+void OperationValidator::visit(const ir::operation::Permute &node)
+{
+ VERBOSE(Permute) << "Configure Permute operation" << std::endl;
+
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ UNUSED_RELEASE(output_index);
+ UNUSED_RELEASE(input_index);
+
+ assert(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
+}
+
+void OperationValidator::visit(const ir::operation::ReduceSum &node)
+{
+ VERBOSE(Permute) << "Configure ReduceSum operation" << std::endl;
+
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ReduceSum::Input::INPUT)};
+ const auto &axes = node.param().axes;
+
+ UNUSED_RELEASE(output_index);
+ UNUSED_RELEASE(input_index);
+ UNUSED_RELEASE(axes);
+
+ const auto input_shape = _ctx.at(input_index).shape();
+ const auto output_shape = _ctx.at(output_index).shape();
+
+ UNUSED_RELEASE(output_shape);
+ UNUSED_RELEASE(input_shape);
+
+ assert(input_shape.rank() <= 4);
+ assert(output_shape.rank() <= input_shape.rank());
+
+ // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+ // supports cases reducing height and width or reducing depth.
+ // TODO We have to support all cases of dimensions up to 4.
+ // For correct permuting, we have to set output's shape to be equal in dimension position of the
+ // input. But the positions of the same dimensions in the input and output may be set differently.
+ // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+ // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+ // extend it in 4 dimensions, it should be {1,1,3,5}.
+ // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+ // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+ // next operation is not desired.
+ if (input_shape.rank() == 4 && input_shape.rank() != output_shape.rank())
+ {
+ if (output_shape.rank() == 2)
+ {
+ // Reducing HW
+ assert(input_shape.dim(0) == output_shape.dim(0) &&
+ input_shape.dim(3) == output_shape.dim(1));
+ }
+ else if (output_shape.rank() == 3)
+ {
+ // Reducing C or
+ // (Reducing H and C(input and output) == 1) or (Reducing W and C(input and output) == 1)
+ assert((input_shape.dim(0) == output_shape.dim(0) &&
+ input_shape.dim(1) == output_shape.dim(1) &&
+ input_shape.dim(2) == output_shape.dim(2)) ||
+ (input_shape.dim(0) == output_shape.dim(0) &&
+ (input_shape.dim(1) == output_shape.dim(1) ||
+ input_shape.dim(2) == output_shape.dim(1)) &&
+ input_shape.dim(3) == 1 && output_shape.dim(2) == 1));
+ }
+ }
+}
+
+void OperationValidator::visit(const ir::operation::Transpose &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
+ const auto &perm{node.param().perm};
+
+ const auto &output_shape = _ctx.at(output_index).shape();
+ const auto &input_shape = _ctx.at(input_index).shape();
+
+ UNUSED_RELEASE(output_shape);
+ UNUSED_RELEASE(input_shape);
+ UNUSED_RELEASE(perm);
+
+ assert(input_shape.rank() == static_cast<int>(perm.size()));
+ assert(input_shape.rank() == output_shape.rank());
+}
+
+void OperationValidator::visit(const ir::operation::ReduceMax &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ReduceMax::Input::INPUT)};
+ const auto &axes = node.param().axes;
+
+ auto output_shape = _ctx.at(output_index).shape();
+ auto input_shape = _ctx.at(input_index).shape();
+
+ UNUSED_RELEASE(output_shape);
+ UNUSED_RELEASE(input_shape);
+ UNUSED_RELEASE(axes);
+
+ assert(input_shape.rank() <= 4);
+ assert(output_shape.rank() <= input_shape.rank());
+
+ // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+ // supports cases reducing height and width or reducing depth.
+ // TODO We have to support all cases of dimensions up to 4.
+ // For correct permuting, we have to set output's shape to be equal in dimension position of the
+ // input. But the positions of the same dimensions in the input and output may be set differently.
+ // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+ // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+ // extend it in 4 dimensions, it should be {1,1,3,5}.
+ // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+ // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+ // next operation is not desired.
+ if (input_shape.rank() == 4 && input_shape.rank() != output_shape.rank())
+ {
+ if (output_shape.rank() == 2)
+ {
+ // Reducing HW
+ assert(input_shape.dim(0) == output_shape.dim(0) &&
+ input_shape.dim(3) == output_shape.dim(1));
+ }
+ else if (output_shape.rank() == 3)
+ {
+ // Reducing C or
+ // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1)
+ assert((input_shape.dim(0) == output_shape.dim(0) &&
+ input_shape.dim(1) == output_shape.dim(1) &&
+ input_shape.dim(2) == output_shape.dim(2)) ||
+ (input_shape.dim(0) == output_shape.dim(0) &&
+ (input_shape.dim(1) == output_shape.dim(1) ||
+ input_shape.dim(2) == output_shape.dim(1)) &&
+ input_shape.dim(3) == 1 && output_shape.dim(2) == 1));
+ }
+ }
+}
+
+void OperationValidator::visit(const ir::operation::RNN &node)
+{
+ // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
+ // TODO Support dynamic rnn
+ const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
+ const auto hidden_state_out_index{
+ node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
+
+ const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
+ const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
+ const auto recurrent_weights_index{
+ node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
+ const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
+ const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
+
+ const auto batch_size = _ctx.at(output_index).shape().dim(0);
+ const auto num_units = _ctx.at(output_index).shape().dim(1);
+
+ UNUSED_RELEASE(output_index);
+ UNUSED_RELEASE(hidden_state_out_index);
+ UNUSED_RELEASE(input_index);
+ UNUSED_RELEASE(weights_index);
+ UNUSED_RELEASE(recurrent_weights_index);
+ UNUSED_RELEASE(bias_index);
+ UNUSED_RELEASE(hidden_state_in_index);
+ UNUSED_RELEASE(batch_size);
+ UNUSED_RELEASE(num_units);
+
+ assert(_ctx.at(output_index).shape().rank() == 2 &&
+ _ctx.at(hidden_state_out_index).shape().rank() == 2 &&
+ _ctx.at(input_index).shape().rank() == 2 && _ctx.at(weights_index).shape().rank() == 2 &&
+ _ctx.at(recurrent_weights_index).shape().rank() == 2 &&
+ _ctx.at(hidden_state_in_index).shape().rank() == 2);
+ assert(_ctx.at(bias_index).shape().rank() == 1);
+
+ assert(batch_size == _ctx.at(input_index).shape().dim(0) &&
+ batch_size == _ctx.at(hidden_state_in_index).shape().dim(0) &&
+ batch_size == _ctx.at(hidden_state_out_index).shape().dim(0));
+ assert(_ctx.at(input_index).shape().dim(1) == _ctx.at(weights_index).shape().dim(1));
+
+ assert(num_units == _ctx.at(weights_index).shape().dim(0) &&
+ num_units == _ctx.at(recurrent_weights_index).shape().dim(0) &&
+ num_units == _ctx.at(bias_index).shape().dim(0));
+ assert(num_units == _ctx.at(output_index).shape().dim(1) &&
+ num_units == _ctx.at(recurrent_weights_index).shape().dim(1) &&
+ num_units == _ctx.at(hidden_state_in_index).shape().dim(1) &&
+ num_units == _ctx.at(hidden_state_out_index).shape().dim(1));
+}
+
+void OperationValidator::visit(const ir::operation::SpaceToBatchND &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
+ const auto block_size_index{
+ node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+ const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
+
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
+ const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
+
+ UNUSED_RELEASE(input_shape);
+ UNUSED_RELEASE(output_shape);
+ UNUSED_RELEASE(block_size_index);
+ UNUSED_RELEASE(paddings_index);
+
+ // All assertions as per NNAPI specification.
+ assert(_ctx.at(ifm_index).shape().rank() == 4);
+ assert(_ctx.at(ofm_index).shape().rank() == 4);
+ assert(_ctx.at(block_size_index).shape().rank() == 1);
+ assert(_ctx.at(paddings_index).shape().rank() == 2);
+
+ assert(_ctx.at(block_size_index).shape().dim(0) == 2);
+ assert(_ctx.at(paddings_index).shape().dim(0) == 2);
+ assert(_ctx.at(paddings_index).shape().dim(1) == 2);
+
+ assert(_ctx.at(block_size_index).isConstant());
+ assert(_ctx.at(paddings_index).isConstant());
+
+ assert(input_shape.C == output_shape.C);
+}
+
+void OperationValidator::visit(const ir::operation::SpaceToDepth &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
+
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
+ const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
+ const auto block_size = node.param().block_size;
+
+ UNUSED_RELEASE(input_shape);
+ UNUSED_RELEASE(output_shape);
+ UNUSED_RELEASE(block_size);
+
+ // All assertions as per NNAPI specification.
+ assert(_ctx.at(ifm_index).shape().rank() == 4);
+ assert(_ctx.at(ofm_index).shape().rank() == 4);
+ assert((block_size >= 1) && (input_shape.H % block_size == 0) &&
+ (input_shape.W % block_size == 0));
+ assert(input_shape.N == output_shape.N);
+ assert(input_shape.C * block_size * block_size == output_shape.C);
+}
+
+void OperationValidator::visit(const ir::operation::EmbeddingLookup &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
+ const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
+
+ const auto &output_obj = _ctx.at(output_index);
+ const auto &lookups_obj = _ctx.at(lookups_index);
+ const auto &values_obj = _ctx.at(values_index);
+
+ UNUSED_RELEASE(output_obj);
+ UNUSED_RELEASE(lookups_obj);
+ UNUSED_RELEASE(values_obj);
+
+ // Verify operand here, not at SimpleEmbeddingLookup::configure() to avoid acl's modifying
+ // TensorShape sometimes(Issue: https://github.sec.samsung.net/STAR/nnfw/issues/729)
+ {
+ assert(lookups_obj.typeInfo().type() == ir::DataType::INT32);
+
+ const auto &output_shape = output_obj.shape();
+ const auto &lookups_shape = lookups_obj.shape();
+ const auto &values_shape = values_obj.shape();
+
+ UNUSED_RELEASE(output_shape);
+ UNUSED_RELEASE(lookups_shape);
+ UNUSED_RELEASE(values_shape);
+
+ assert(lookups_shape.rank() == 1);
+ assert(values_shape.rank() >= 2);
+
+ // output should be a n-D tensor with the same rank and shape as the values tensor, except for
+ // the first dimension which has the same size as lookups' only dimension.
+ assert(output_shape.rank() == values_shape.rank());
+ assert(output_shape.dim(0) == lookups_shape.dim(0));
+ for (int n = 1; n < output_shape.rank(); ++n)
+ {
+ assert(output_shape.dim(n) == values_shape.dim(n));
+ }
+ }
+}
+
+void OperationValidator::visit(const ir::operation::Exp &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
+
+ UNUSED_RELEASE(output_index);
+ UNUSED_RELEASE(input_index);
+
+ assert(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+ assert(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
+}
+
+void OperationValidator::visit(const ir::operation::Floor &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
+
+ UNUSED_RELEASE(output_index);
+ UNUSED_RELEASE(input_index);
+
+ assert(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+ assert(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
+}
+
+void OperationValidator::visit(const ir::operation::HashtableLookup &node)
+{
+ const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
+ const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)};
+
+ const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
+ const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
+ const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
+
+ const auto &output_obj = _ctx.at(output_index);
+ const auto &hits_obj = _ctx.at(hits_index);
+
+ const auto &lookups_obj = _ctx.at(lookups_index);
+ const auto &keys_obj = _ctx.at(keys_index);
+ const auto &values_obj = _ctx.at(values_index);
+
+ assert(lookups_obj.typeInfo().type() == ir::DataType::INT32);
+ assert(keys_obj.typeInfo().type() == ir::DataType::INT32);
+ assert(hits_obj.typeInfo().type() == ir::DataType::QUANT8_ASYMM);
+
+ const auto &output_shape = output_obj.shape();
+ const auto &hits_shape = hits_obj.shape();
+
+ const auto &lookups_shape = lookups_obj.shape();
+ const auto &keys_shape = keys_obj.shape();
+ const auto &values_shape = values_obj.shape();
+
+ UNUSED_RELEASE(output_shape);
+ UNUSED_RELEASE(hits_shape);
+ UNUSED_RELEASE(lookups_shape);
+ UNUSED_RELEASE(keys_shape);
+ UNUSED_RELEASE(values_shape);
+
+ assert(values_shape.rank() == output_shape.rank());
+ assert(lookups_shape.rank() == 1);
+ assert(keys_shape.rank() == 1);
+ assert(values_shape.dim(0) == keys_shape.dim(0));
+ assert(lookups_shape.dim(0) == output_shape.dim(0));
+}
+
+void OperationValidator::visit(const ir::operation::TransposeConv &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
+
+ // Only 4D tensors are supported
+ assert(_ctx.at(ofm_index).shape().rank() == 4);
+ assert(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank());
+ assert(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank());
+
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
+ // The kernel has only IHWO layout on frontend
+ // So ker_shape is treated here below
+ // I -> N
+ // H -> H
+ // W -> W
+ // O -> C
+ const auto ker_shape = _ctx.at(ker_index).shape().asFeature(ir::Layout::NHWC);
+
+ UNUSED_RELEASE(ofm_shape);
+ UNUSED_RELEASE(ifm_shape);
+ UNUSED_RELEASE(ker_shape);
+
+ assert((node.param().padding.type == ir::PaddingType::SAME) ||
+ (node.param().padding.type == ir::PaddingType::VALID));
+ assert(ifm_shape.N == ofm_shape.N);
+ assert(ifm_shape.C == ker_shape.C);
+ assert(ker_shape.N == ofm_shape.C);
+}
+
+void OperationValidator::visit(const ir::operation::Gather &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+
+ const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
+ const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
+
+ const auto axis = node.param().axis;
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape();
+ const auto indices_shape = _ctx.at(indices_index).shape();
+ const auto ofm_shape = _ctx.at(ofm_index).shape();
+
+ UNUSED_RELEASE(ifm_shape);
+ UNUSED_RELEASE(indices_shape);
+ UNUSED_RELEASE(ofm_shape);
+ UNUSED_RELEASE(axis);
+
+ assert(ifm_shape.rank() <= 4);
+ assert(indices_shape.rank() <= 3);
+ assert(ofm_shape.rank() <= 4);
+}
+
+void OperationValidator::visit(const ir::operation::Dequantize &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
+
+ UNUSED_RELEASE(output_index);
+ UNUSED_RELEASE(input_index);
+
+ assert(_ctx.at(input_index).shape().rank() <= 4);
+ assert(_ctx.at(input_index).shape() == _ctx.at(output_index).shape());
+ assert(_ctx.at(input_index).typeInfo().type() == ir::DataType::QUANT8_ASYMM);
+ assert(_ctx.at(output_index).typeInfo().type() == ir::DataType::FLOAT32);
+}
+
+void OperationValidator::visit(const ir::operation::Mean &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Mean::Input::INPUT)};
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape();
+ const auto ofm_shape = _ctx.at(ofm_index).shape();
+
+ // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+ // supports cases reducing height and width or reducing depth.
+ // TODO We have to support all cases of dimensions up to 4.
+ // For correct permuting, we have to set output's shape to be equal in dimension position of the
+ // input. But the positions of the same dimensions in the input and output may be set differently.
+ // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+ // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+ // extend it in 4 dimensions, it should be {1,1,3,5}.
+ // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+ // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+ // next operation is not desired.
+ if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank())
+ {
+ if (ofm_shape.rank() == 2)
+ {
+ // Reducing HW
+ assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1));
+ }
+ else if (ofm_shape.rank() == 3)
+ {
+ // Reducing C or
+ // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1)
+ assert((ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) &&
+ ifm_shape.dim(2) == ofm_shape.dim(2)) ||
+ (ifm_shape.dim(0) == ofm_shape.dim(0) &&
+ (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) &&
+ ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1));
+ }
+ }
+}
+
+void OperationValidator::visit(const ir::operation::DepthToSpace &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
+
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto output_shape = _ctx.at(output_index).shape().asFeature(frontend_layout);
+ const auto input_shape = _ctx.at(input_index).shape().asFeature(frontend_layout);
+
+ UNUSED_RELEASE(output_shape);
+ UNUSED_RELEASE(input_shape);
+
+ assert(_ctx.at(input_index).shape().rank() == 4);
+ assert(_ctx.at(output_index).shape().rank() == 4);
+
+ int32_t block_size = node.param().block_size;
+
+ UNUSED_RELEASE(block_size);
+
+ assert(block_size > 0);
+
+ { // assertions block
+ assert(output_shape.N == input_shape.N);
+ assert(output_shape.H == input_shape.H * block_size);
+ assert(output_shape.W == input_shape.W * block_size);
+ assert(input_shape.C % (block_size * block_size) == 0);
+ assert(output_shape.C == input_shape.C / (block_size * block_size));
+ }
+}
+
+void OperationValidator::visit(const ir::operation::Pack &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto num{node.param().num};
+ const auto axis{node.param().axis};
+
+ const auto &output_shape = _ctx.at(output_index).shape();
+ const auto output_rank = static_cast<int32_t>(output_shape.rank());
+
+ const auto input1_index{node.getInputs().at(0)};
+ const auto input_shape = _ctx.at(input1_index).shape();
+
+ UNUSED_RELEASE(num);
+ UNUSED_RELEASE(axis);
+ UNUSED_RELEASE(output_rank);
+
+ assert(num == static_cast<int32_t>(node.getInputs().size()));
+ assert(axis >= -output_rank && axis < output_rank);
+ for (const auto &index : node.getInputs())
+ {
+ UNUSED_RELEASE(index);
+ assert(input_shape == _ctx.at(index).shape());
+ }
+}
+
+void OperationValidator::visit(const ir::operation::ReduceMin &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ReduceMin::Input::INPUT)};
+ const auto &axes = node.param().axes;
+
+ auto ifm_shape = _ctx.at(ifm_index).shape();
+ auto ofm_shape = _ctx.at(ofm_index).shape();
+
+ UNUSED_RELEASE(ifm_shape);
+ UNUSED_RELEASE(ofm_shape);
+ UNUSED_RELEASE(axes);
+
+ assert(ifm_shape.rank() <= 4);
+ assert(ofm_shape.rank() <= ifm_shape.rank());
+
+ // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+ // supports cases reducing height and width or reducing depth.
+ // TODO We have to support all cases of dimensions up to 4.
+ // For correct permuting, we have to set output's shape to be equal in dimension position of the
+ // input. But the positions of the same dimensions in the input and output may be set differently.
+ // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+ // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+ // extend it in 4 dimensions, it should be {1,1,3,5}.
+ // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+ // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+ // next operation is not desired.
+ if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank())
+ {
+ if (ofm_shape.rank() == 2)
+ {
+ // Reducing HW
+ assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1));
+ }
+ else if (ofm_shape.rank() == 3)
+ {
+ // Reducing C or
+ // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1)
+ assert((ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) &&
+ ifm_shape.dim(2) == ofm_shape.dim(2)) ||
+ (ifm_shape.dim(0) == ofm_shape.dim(0) &&
+ (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) &&
+ ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1));
+ }
+ }
+}
+
+void OperationValidator::visit(const ir::operation::LSTM &node)
+{
+ // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
+ // TODO Support dynamic rnn
+ const auto scratch_buffer_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+ const auto output_state_out_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+ const auto cell_state_out_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+ const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
+
+ const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
+ const auto input_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
+ const auto input_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+ const auto input_to_cell_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+ const auto input_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+ const auto recurrent_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
+ const auto recurrent_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+ const auto recurrent_to_cell_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+ const auto recurrent_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+ const auto cell_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)};
+ const auto cell_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)};
+ const auto cell_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)};
+ const auto input_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
+ const auto forget_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+ const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
+ const auto output_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+ const auto projection_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)};
+ const auto projection_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)};
+ const auto output_state_in_index{
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+ const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
+
+ UNUSED_RELEASE(scratch_buffer_index);
+ UNUSED_RELEASE(output_state_out_index);
+ UNUSED_RELEASE(cell_state_out_index);
+ UNUSED_RELEASE(output_index);
+
+ UNUSED_RELEASE(input_index);
+ UNUSED_RELEASE(input_to_input_weights_index);
+ UNUSED_RELEASE(input_to_forget_weights_index);
+ UNUSED_RELEASE(input_to_cell_weights_index);
+ UNUSED_RELEASE(input_to_output_weights_index);
+ UNUSED_RELEASE(recurrent_to_input_weights_index);
+ UNUSED_RELEASE(recurrent_to_forget_weights_index);
+ UNUSED_RELEASE(recurrent_to_cell_weights_index);
+ UNUSED_RELEASE(recurrent_to_output_weights_index);
+ UNUSED_RELEASE(cell_to_input_weights_index);
+ UNUSED_RELEASE(cell_to_forget_weights_index);
+ UNUSED_RELEASE(cell_to_output_weights_index);
+ UNUSED_RELEASE(input_gate_bias_index);
+ UNUSED_RELEASE(forget_gate_bias_index);
+ UNUSED_RELEASE(cell_bias_index);
+ UNUSED_RELEASE(output_gate_bias_index);
+ UNUSED_RELEASE(projection_weights_index);
+ UNUSED_RELEASE(projection_bias_index);
+ UNUSED_RELEASE(output_state_in_index);
+ UNUSED_RELEASE(cell_state_in_index);
+
+ assert(_ctx.at(scratch_buffer_index).shape().rank() == 2 &&
+ _ctx.at(output_state_out_index).shape().rank() == 2 &&
+ _ctx.at(cell_state_out_index).shape().rank() == 2 &&
+ _ctx.at(output_index).shape().rank() == 2 && _ctx.at(input_index).shape().rank() == 2 &&
+ _ctx.at(input_to_input_weights_index).shape().rank() == 2 &&
+ _ctx.at(input_to_forget_weights_index).shape().rank() == 2 &&
+ _ctx.at(input_to_cell_weights_index).shape().rank() == 2 &&
+ _ctx.at(input_to_output_weights_index).shape().rank() == 2 &&
+ _ctx.at(recurrent_to_input_weights_index).shape().rank() == 2 &&
+ _ctx.at(recurrent_to_forget_weights_index).shape().rank() == 2 &&
+ _ctx.at(recurrent_to_cell_weights_index).shape().rank() == 2 &&
+ _ctx.at(recurrent_to_output_weights_index).shape().rank() == 2 &&
+ _ctx.at(projection_weights_index).shape().rank() == 2 &&
+ _ctx.at(output_state_in_index).shape().rank() == 2 &&
+ _ctx.at(cell_state_in_index).shape().rank() == 2);
+
+ assert(_ctx.at(cell_to_input_weights_index).shape().rank() == 1 &&
+ _ctx.at(cell_to_forget_weights_index).shape().rank() == 1 &&
+ _ctx.at(cell_to_output_weights_index).shape().rank() == 1 &&
+ _ctx.at(input_gate_bias_index).shape().rank() == 1 &&
+ _ctx.at(forget_gate_bias_index).shape().rank() == 1 &&
+ _ctx.at(cell_bias_index).shape().rank() == 1 &&
+ _ctx.at(output_gate_bias_index).shape().rank() == 1 &&
+ _ctx.at(projection_bias_index).shape().rank() == 1);
+
+ // CIFG assertion
+ assert((_ctx.at(input_to_input_weights_index).shape().dim(0) == 0 &&
+ _ctx.at(input_to_input_weights_index).shape().dim(1) == 0 &&
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(0) == 0 &&
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(1) == 0 &&
+ _ctx.at(input_gate_bias_index).shape().dim(0) == 0 &&
+ _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0) ||
+ (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(input_to_input_weights_index).shape().dim(1) != 0 &&
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0 &&
+ _ctx.at(input_gate_bias_index).shape().dim(0) != 0));
+
+ // Peephole assertion
+ assert((_ctx.at(cell_to_forget_weights_index).shape().dim(0) == 0 &&
+ _ctx.at(cell_to_output_weights_index).shape().dim(0) == 0) ||
+ (_ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0));
+
+ bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(input_to_input_weights_index).shape().dim(1) != 0;
+ bool has_recurrent_to_input_weights =
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+ bool has_input_gate_bias = _ctx.at(input_gate_bias_index).shape().dim(0) != 0;
+ bool has_cell_to_input_weights = _ctx.at(cell_to_input_weights_index).shape().dim(0) != 0;
+ bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
+ bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
+ bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(projection_weights_index).shape().dim(1) != 0;
+ bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0);
+
+ // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
+ // true: no CIFG
+ // false: CIFG
+ bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
+
+ // NOTE The cell_to_input_weights do not exist in regular CIFG although peephole.
+ // true: peephole
+ // false: no peephole
+ bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
+
+ // NOTE The projection weights may have data but the projection bias may not.
+ bool has_projection_param = has_projection_weights;
+
+ UNUSED_RELEASE(has_input_to_input_weights);
+ UNUSED_RELEASE(has_recurrent_to_input_weights);
+ UNUSED_RELEASE(has_input_gate_bias);
+ UNUSED_RELEASE(has_cell_to_input_weights);
+ UNUSED_RELEASE(has_cell_to_forget_weights);
+ UNUSED_RELEASE(has_cell_to_output_weights);
+ UNUSED_RELEASE(has_projection_weights);
+ UNUSED_RELEASE(has_projection_bias);
+ UNUSED_RELEASE(has_cifg_param);
+ UNUSED_RELEASE(has_peephole_param);
+ UNUSED_RELEASE(has_projection_param);
+
+ const auto batch_size = _ctx.at(input_index).shape().dim(0);
+ UNUSED_RELEASE(batch_size);
+ assert(batch_size == _ctx.at(output_state_in_index).shape().dim(0) &&
+ batch_size == _ctx.at(cell_state_in_index).shape().dim(0) &&
+ batch_size == _ctx.at(scratch_buffer_index).shape().dim(0) &&
+ batch_size == _ctx.at(output_state_out_index).shape().dim(0) &&
+ batch_size == _ctx.at(cell_state_out_index).shape().dim(0) &&
+ batch_size == _ctx.at(output_index).shape().dim(0));
+
+ const auto input_size = _ctx.at(input_index).shape().dim(1);
+ UNUSED_RELEASE(input_size);
+ assert(input_size == _ctx.at(input_to_forget_weights_index).shape().dim(1) &&
+ input_size == _ctx.at(input_to_cell_weights_index).shape().dim(1) &&
+ input_size == _ctx.at(input_to_output_weights_index).shape().dim(1));
+
+ const auto num_units = _ctx.at(cell_state_out_index).shape().dim(1);
+ UNUSED_RELEASE(num_units);
+ assert(num_units == _ctx.at(input_to_forget_weights_index).shape().dim(0) &&
+ num_units == _ctx.at(input_to_cell_weights_index).shape().dim(0) &&
+ num_units == _ctx.at(input_to_output_weights_index).shape().dim(0) &&
+ num_units == _ctx.at(recurrent_to_forget_weights_index).shape().dim(0) &&
+ num_units == _ctx.at(recurrent_to_cell_weights_index).shape().dim(0) &&
+ num_units == _ctx.at(recurrent_to_output_weights_index).shape().dim(0) &&
+ num_units == _ctx.at(forget_gate_bias_index).shape().dim(0) &&
+ num_units == _ctx.at(cell_bias_index).shape().dim(0) &&
+ num_units == _ctx.at(output_gate_bias_index).shape().dim(0) &&
+ num_units == _ctx.at(cell_state_in_index).shape().dim(1) &&
+ (((num_units * 3) == _ctx.at(scratch_buffer_index).shape().dim(1)) ||
+ ((num_units * 4) == _ctx.at(scratch_buffer_index).shape().dim(1))));
+
+ const auto output_size = _ctx.at(output_index).shape().dim(1);
+ UNUSED_RELEASE(output_size);
+ assert(output_size == _ctx.at(recurrent_to_forget_weights_index).shape().dim(1) &&
+ output_size == _ctx.at(recurrent_to_cell_weights_index).shape().dim(1) &&
+ output_size == _ctx.at(recurrent_to_output_weights_index).shape().dim(1) &&
+ output_size == _ctx.at(output_state_in_index).shape().dim(1) &&
+ output_size == _ctx.at(output_state_out_index).shape().dim(1));
+
+ if (has_cifg_param)
+ {
+ assert(input_size == _ctx.at(input_to_input_weights_index).shape().dim(1));
+ assert(num_units == _ctx.at(input_to_input_weights_index).shape().dim(0) &&
+ num_units == _ctx.at(recurrent_to_input_weights_index).shape().dim(0) &&
+ (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) ||
+ _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* non-peephole */) &&
+ num_units == _ctx.at(input_gate_bias_index).shape().dim(0));
+ assert(output_size == _ctx.at(recurrent_to_input_weights_index).shape().dim(1));
+ assert(has_input_to_input_weights && has_recurrent_to_input_weights && has_input_gate_bias);
+ if (has_cell_to_input_weights)
+ {
+ // NOTE The cell_to_input_weights exist only in case of non-CIFG and peephole.
+ assert(has_peephole_param);
+ }
+ assert(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 4);
+ }
+ else
+ {
+ assert(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 3);
+ }
+
+ if (has_peephole_param)
+ {
+ assert(num_units == _ctx.at(cell_to_forget_weights_index).shape().dim(0) &&
+ num_units == _ctx.at(cell_to_output_weights_index).shape().dim(0) &&
+ (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) ||
+ _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */));
+ }
+
+ if (has_projection_param)
+ {
+ assert(num_units == _ctx.at(projection_weights_index).shape().dim(1));
+ assert(output_size == _ctx.at(projection_weights_index).shape().dim(0));
+ if (has_projection_bias)
+ {
+ assert(output_size == _ctx.at(projection_bias_index).shape().dim(0));
+ }
+ }
+}
+
+void OperationValidator::visit(const ir::operation::Unpack &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
+ const auto num{node.param().num};
+ const auto axis{node.param().axis};
+
+ const auto &input_shape = _ctx.at(input_index).shape();
+ const auto input_rank = static_cast<int32_t>(input_shape.rank());
+
+ UNUSED_RELEASE(num);
+ UNUSED_RELEASE(axis);
+ UNUSED_RELEASE(input_rank);
+
+ assert(num == static_cast<int32_t>(node.getOutputs().size()));
+ assert(axis >= -input_rank && axis < input_rank);
+}
+
+void OperationValidator::visit(const ir::operation::Pad &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
+ const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
+ const auto output_index{node.getInputs().at(0)};
+
+ const auto &pad_shape = _ctx.at(pad_index).shape();
+ const auto input_rank = static_cast<int32_t>(_ctx.at(input_index).shape().rank());
+
+ UNUSED_RELEASE(pad_shape);
+ UNUSED_RELEASE(input_rank);
+ UNUSED_RELEASE(output_index);
+
+ assert(pad_shape.rank() == 2);
+ assert(pad_shape.dim(0) == input_rank);
+ assert(pad_shape.dim(1) == 2);
+ assert(_ctx.at(pad_index).typeInfo().type() == ir::DataType::INT32);
+ assert(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
+}
+
+void OperationValidator::visit(const ir::operation::Min &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
+
+ UNUSED_RELEASE(output_index);
+ UNUSED_RELEASE(lhs_index);
+ UNUSED_RELEASE(rhs_index);
+
+ assert(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
+ assert(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
+}
+
+void OperationValidator::visit(const ir::operation::Max &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
+
+ UNUSED_RELEASE(output_index);
+ UNUSED_RELEASE(lhs_index);
+ UNUSED_RELEASE(rhs_index);
+
+ assert(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
+ assert(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
+}
+
+void OperationValidator::visit(const ir::operation::StridedSlice &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
+ const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
+ const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
+ const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
+
+ UNUSED_RELEASE(output_index);
+ UNUSED_RELEASE(input_index);
+ UNUSED_RELEASE(starts_index);
+ UNUSED_RELEASE(ends_index);
+ UNUSED_RELEASE(strides_index);
+
+ assert(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
+ assert(_ctx.at(input_index).shape().rank() <= 4);
+}
+
+void OperationValidator::visit(const ir::operation::Split &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
+ const auto &num_splits = node.param().num_splits;
+ const auto &input_rank = node.param().rank;
+ const auto &axis = node.param().axis < 0 ? node.param().axis + input_rank : node.param().axis;
+
+ UNUSED_RELEASE(input_index);
+ UNUSED_RELEASE(num_splits);
+ UNUSED_RELEASE(input_rank);
+ UNUSED_RELEASE(axis);
+
+ assert(num_splits > 0 && num_splits <= 0xFFFF);
+ assert(axis >= 0 && axis < input_rank);
+ assert(_ctx.at(input_index).shape().dim(axis) % num_splits == 0);
+ assert(node.getOutputs().size() == static_cast<uint32_t>(num_splits));
+}
+
+void OperationValidator::visit(const ir::operation::Sin &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ UNUSED_RELEASE(output_index);
+ UNUSED_RELEASE(input_index);
+
+ assert(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+}
+
+void OperationValidator::visit(const ir::operation::RSQRT &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ UNUSED_RELEASE(output_index);
+ UNUSED_RELEASE(input_index);
+
+ assert(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+}
+
+void OperationValidator::visit(const ir::operation::Shape &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ UNUSED_RELEASE(output_index);
+ UNUSED_RELEASE(input_index);
+
+ assert(_ctx.at(output_index).shape().rank() == 1);
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/OperationValidator.h b/runtime/onert/core/src/compiler/OperationValidator.h
new file mode 100644
index 000000000..10b0525c1
--- /dev/null
+++ b/runtime/onert/core/src/compiler/OperationValidator.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_OPERATION_VALIDATOR_H__
+#define __ONERT_COMPILER_OPERATION_VALIDATOR_H__
+
+#include "ir/Layout.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+class Graph;
+class Operands;
+} // namespace ir
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+
+class OperationValidator : public ir::OperationVisitor
+{
+public:
+ OperationValidator(void) = delete;
+ OperationValidator(const ir::Graph &graph);
+
+public:
+ void operator()();
+
+public:
+ void visit(const ir::operation::Abs &node) override;
+ void visit(const ir::operation::AvgPool2D &node) override;
+ void visit(const ir::operation::BatchToSpaceND &node) override;
+ void visit(const ir::operation::Cast &node) override;
+ void visit(const ir::operation::Comparison &node) override;
+ void visit(const ir::operation::Softmax &node) override;
+ void visit(const ir::operation::InstanceNorm &node) override;
+ void visit(const ir::operation::Permute &node) override;
+ void visit(const ir::operation::ReduceSum &node) override;
+ void visit(const ir::operation::Transpose &node) override;
+ void visit(const ir::operation::ReduceMax &node) override;
+ void visit(const ir::operation::RNN &node) override;
+ void visit(const ir::operation::SpaceToBatchND &node) override;
+ void visit(const ir::operation::SpaceToDepth &node) override;
+ void visit(const ir::operation::EmbeddingLookup &node) override;
+ void visit(const ir::operation::Exp &node) override;
+ void visit(const ir::operation::Floor &node) override;
+ void visit(const ir::operation::HashtableLookup &node) override;
+ void visit(const ir::operation::TransposeConv &node) override;
+ void visit(const ir::operation::Gather &node) override;
+ void visit(const ir::operation::Dequantize &node) override;
+ void visit(const ir::operation::Mean &node) override;
+ void visit(const ir::operation::DepthToSpace &node) override;
+ void visit(const ir::operation::Pack &node) override;
+ void visit(const ir::operation::ReduceMin &node) override;
+ void visit(const ir::operation::LSTM &node) override;
+ void visit(const ir::operation::Unpack &node) override;
+ void visit(const ir::operation::Pad &node) override;
+ void visit(const ir::operation::Min &node) override;
+ void visit(const ir::operation::Max &node) override;
+ void visit(const ir::operation::StridedSlice &node) override;
+ void visit(const ir::operation::Split &node) override;
+ void visit(const ir::operation::Sin &node) override;
+ void visit(const ir::operation::RSQRT &node) override;
+ void visit(const ir::operation::Shape &node) override;
+
+private:
+ // TODO Remove _ctx field
+ const ir::Graph &_graph;
+ const ir::Operands &_ctx;
+ ir::Layout _current_op_seq_layout;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_OPERATION_VALIDATOR_H__
diff --git a/runtime/onert/core/src/compiler/ParamChecker.cc b/runtime/onert/core/src/compiler/ParamChecker.cc
new file mode 100644
index 000000000..c4f80f087
--- /dev/null
+++ b/runtime/onert/core/src/compiler/ParamChecker.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ParamChecker.h"
+
+#include "ir/Graph.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+void ParamChecker::operator()()
+{
+ _model->operations().iterate(
+ [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/ParamChecker.h b/runtime/onert/core/src/compiler/ParamChecker.h
new file mode 100644
index 000000000..61429d521
--- /dev/null
+++ b/runtime/onert/core/src/compiler/ParamChecker.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ParamChecker.h
+ * @brief This file contains ParamChecker to check\n
+ * operations' parameters are compilable at machine independent phase\n
+ * ex) Check param is constant
+ */
+#ifndef __ONERT_COMPILER_PARAM_CHECKER_H__
+#define __ONERT_COMPILER_PARAM_CHECKER_H__
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+class Graph;
+} // namespace ir
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+
+class ParamChecker : public ir::OperationVisitor
+{
+public:
+ /**
+ * @brief Construct a new Param Checker object (deleted)
+ */
+ ParamChecker(void) = delete;
+ /**
+ * @brief Construct a new Param Checker object
+ * @param[in] model Graph model to check
+ */
+ ParamChecker(std::shared_ptr<ir::Graph> model) : _model{model} {}
+
+public:
+ /**
+ * @brief Run parameter analysis
+ */
+ void operator()();
+ /**
+ * @brief Return analysis result if model have non-const parameter
+ * @return @c true if there is non-const parameter, otherwise @c false
+ */
+ bool haveNoneConstParam(void) { return _nonConstParam; }
+
+private:
+ const std::shared_ptr<ir::Graph> _model;
+ bool _nonConstParam{false};
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_OPERATION_VALIDATOR_H__
diff --git a/runtime/onert/core/src/dumper/dot/DotBuilder.cc b/runtime/onert/core/src/dumper/dot/DotBuilder.cc
new file mode 100644
index 000000000..2a3a4af22
--- /dev/null
+++ b/runtime/onert/core/src/dumper/dot/DotBuilder.cc
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DotBuilder.h"
+
+namespace onert
+{
+namespace dumper
+{
+namespace dot
+{
+
+// DotDumper
+DotBuilder::DotBuilder() {}
+
+void DotBuilder::update(const Node &node_info)
+{
+ add(node_info);
+ for (auto edge : node_info.edges())
+ {
+ addEdge(node_info, *edge);
+ }
+}
+
+void DotBuilder::addOpSequence(const DotOpSequenceInfo &op_sequence_info)
+{
+ _dot << "op_sequence cluster_" << op_sequence_info.index().value() << " {\n";
+ _dot << " label=\"" << op_sequence_info.label() << "\";\n";
+ _dot << " style=filled;\n";
+ _dot << " color=lightgrey;\n";
+ _dot << " ";
+ for (auto op : op_sequence_info.operations())
+ {
+ _dot << "operation" << op.value() << "; ";
+ }
+ for (auto op : op_sequence_info.operands())
+ {
+ _dot << "operand" << op.value() << "; ";
+ }
+ _dot << "\n";
+ _dot << "}\n";
+}
+
+void DotBuilder::writeDot(std::ostream &os)
+{
+ os << "digraph D {\n"
+ << _dot.str() << "\n"
+ << "}\n";
+}
+
+void DotBuilder::add(const Node &node)
+{
+ _dot << node.id();
+ std::stringstream ss;
+ _dot << "[";
+ for (auto attr : node.attributes())
+ {
+ _dot << attr.first << "=\"" << attr.second << "\" ";
+ }
+ _dot << "];\n";
+}
+
+void DotBuilder::addEdge(const Node &node1, const Node &node2)
+{
+ _dot << node1.id() << " -> " << node2.id() << ";\n";
+}
+
+} // namespace dot
+} // namespace dumper
+} // namespace onert
diff --git a/runtime/onert/core/src/dumper/dot/DotBuilder.h b/runtime/onert/core/src/dumper/dot/DotBuilder.h
new file mode 100644
index 000000000..24a76533d
--- /dev/null
+++ b/runtime/onert/core/src/dumper/dot/DotBuilder.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_DUMPER_DOT_DOT_BUILDER_H__
+#define __ONERT_DUMPER_DOT_DOT_BUILDER_H__
+
+#include <sstream>
+
+#include "ir/Index.h"
+#include "ir/Operation.h"
+#include "ir/Operand.h"
+
+#include "OperationNode.h"
+#include "OperandNode.h"
+#include "DotOpSequenceInfo.h"
+
+using Operation = onert::ir::Operation;
+using Object = onert::ir::Operand;
+
+namespace onert
+{
+namespace dumper
+{
+namespace dot
+{
+
+class DotBuilder
+{
+public:
+ DotBuilder();
+
+public:
+ void update(const Node &dotinfo);
+ void addOpSequence(const DotOpSequenceInfo &op_sequence_info);
+
+ void writeDot(std::ostream &os);
+
+private:
+ void add(const Node &dotinfo);
+ void addEdge(const Node &dotinfo1, const Node &dotinfo2);
+
+ std::stringstream _dot;
+};
+
+} // namespace dot
+} // namespace dumper
+} // namespace onert
+
+#endif // __ONERT_DUMPER_DOT_DOT_BUILDER_H__
diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.cc b/runtime/onert/core/src/dumper/dot/DotDumper.cc
new file mode 100644
index 000000000..9bd8ed0ef
--- /dev/null
+++ b/runtime/onert/core/src/dumper/dot/DotDumper.cc
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fstream>
+#include <unordered_map>
+
+#include "DotDumper.h"
+#include "DotBuilder.h"
+#include "DotOpSequenceInfo.h"
+#include "ir/OpSequence.h"
+#include "ir/OperationIndexMap.h"
+#include "backend/Backend.h"
+#include "backend/IConfig.h"
+#include "compiler/BackendManager.h"
+
+namespace onert
+{
+namespace dumper
+{
+namespace dot
+{
+
+void DotDumper::dump(const std::string &tag)
+{
+ if (_level == Level::OFF)
+ {
+ return;
+ }
+
+ onert::dumper::dot::DotBuilder dot_builder;
+
+ auto &operations = _graph.operations();
+ auto &operands = _graph.operands();
+
+ ir::OperationIndexMap<std::unique_ptr<Operation>> operation_nodes;
+ std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>> operand_nodes;
+
+ operations.iterate([&](const ir::OperationIndex &index, const ir::Operation &op) {
+ auto node = std::make_unique<Operation>(index, op);
+
+ for (auto output : op.getOutputs())
+ {
+ using onert::dumper::dot::Operand;
+ auto child = std::make_shared<Operand>(output, Operand::Type::MODEL_OUTPUT);
+ node->addEdge(child);
+ }
+
+ operation_nodes.emplace(index, std::move(node));
+ });
+
+ auto backend_to_fillcolor = [](const backend::Backend *backend) {
+ static const auto map = []() {
+ std::unordered_map<const backend::Backend *, std::string> ret;
+ uint32_t index = 1; // Start from 1 to avoid 0(red) which is too dark :(
+ for (const auto backend : compiler::BackendManager::get().getAll())
+ {
+ ret.emplace(backend, Node::BG_COLORS[index]);
+ index = (index + 1) % (sizeof(Node::BG_COLORS) / sizeof(Node::BG_COLORS[0]));
+ }
+ return ret;
+ }();
+
+ auto itr = map.find(backend);
+ if (itr == map.end())
+ {
+ return Node::DEFAULT_FILLCOLOR;
+ }
+ else
+ {
+ return itr->second;
+ }
+ };
+
+ util::Set<ir::OperandIndex> shown_operand_set;
+
+ operands.iterate([&](const ir::OperandIndex &index, const ir::Operand &object) {
+ bool showing_cond = false;
+ if (_level == Level::ALL)
+ {
+ showing_cond = true;
+ }
+ else
+ {
+ showing_cond = !object.isConstant();
+ }
+ if (object.isConstant() || _graph.getInputs().contains(index))
+ {
+ showing_cond = showing_cond && (object.getUses().size() > 0);
+ }
+ if (showing_cond)
+ {
+ shown_operand_set.add(index);
+
+ auto type = [&]() {
+ using onert::dumper::dot::Operand;
+ if (_graph.getInputs().contains(index))
+ return Operand::Type::MODEL_INPUT;
+ if (_graph.getOutputs().contains(index))
+ return Operand::Type::MODEL_OUTPUT;
+ return Operand::Type::INTERNAL;
+ }();
+
+ auto node = std::make_unique<Operand>(index, type);
+
+ {
+ // Display LowerInfo attributes
+ std::string label = std::to_string(index.value());
+ std::string fillcolor = "";
+ if (_lowered_graph)
+ {
+ auto lower_info = _lowered_graph->getLowerInfo(index);
+ const auto &def_factors = lower_info->def_factors();
+ if (def_factors.size() > 0)
+ {
+ label += "\\n[";
+ label += def_factors.getOnlyElement().backend()->config()->id();
+ label += "]";
+
+ fillcolor = backend_to_fillcolor(lower_info->def_factors().getOnlyElement().backend());
+ }
+ }
+ node->setAttribute("label", label);
+ node->setAttribute("fillcolor", fillcolor);
+ }
+
+ for (auto operation_index : object.getUses().list())
+ {
+ auto &operation = operations.at(operation_index);
+ auto child = std::make_shared<Operation>(operation_index, operation);
+ node->addEdge(child);
+ }
+
+ operand_nodes.emplace(index, std::move(node));
+ }
+ });
+
+ if (_lowered_graph)
+ {
+ const auto &op_seqs = _lowered_graph->op_seqs();
+ op_seqs.iterate([&](const ir::OpSequenceIndex &index, const ir::OpSequence &op_seq) {
+ const auto lower_info = _lowered_graph->getLowerInfo(index);
+ auto fillcolor = backend_to_fillcolor(lower_info->backend());
+ std::string label =
+ std::to_string(index.value()) + " [" + lower_info->backend()->config()->id() + "]";
+ DotOpSequenceInfo op_sequence_info{index, op_seq, shown_operand_set};
+ op_sequence_info.label(label);
+ op_sequence_info.fillcolor(fillcolor);
+ dot_builder.addOpSequence(op_sequence_info);
+
+ // Set fillcolor of all operations in the op_seq
+ for (const auto &op : op_seq.operations())
+ {
+ auto found = operation_nodes.find(op.index);
+ if (found != operation_nodes.end())
+ {
+ auto &&op = found->second;
+ op->setAttribute("fillcolor", fillcolor);
+ }
+ }
+ });
+ }
+
+ for (const auto &e : operation_nodes)
+ dot_builder.update(*e.second);
+ for (const auto &e : operand_nodes)
+ dot_builder.update(*e.second);
+
+ // Dump to file
+ {
+ std::string file_name;
+ file_name += tag;
+ file_name += ".dot";
+ std::filebuf fb;
+
+ fb.open(file_name, std::ios::out);
+ std::ostream os(&fb);
+
+ dot_builder.writeDot(os);
+
+ fb.close();
+ }
+}
+
+} // namespace dot
+} // namespace dumper
+} // namespace onert
diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.h b/runtime/onert/core/src/dumper/dot/DotDumper.h
new file mode 100644
index 000000000..668785a81
--- /dev/null
+++ b/runtime/onert/core/src/dumper/dot/DotDumper.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Graph.h"
+#include "ir/LoweredGraph.h"
+
+#ifndef __ONERT_DUMPER_DOT_DOT_DUMPER_H__
+#define __ONERT_DUMPER_DOT_DOT_DUMPER_H__
+
+namespace onert
+{
+namespace dumper
+{
+namespace dot
+{
+
+class DotDumper
+{
+public:
+ enum Level
+ {
+ OFF = 0, //< Do not dump
+ ALL_BUT_CONSTANTS = 1, //< Emit all operations and operands but constants
+ ALL = 2 //< Emit all operations and operands
+ };
+
+public:
+ DotDumper(const ir::Graph &graph, Level level)
+ : _lowered_graph{nullptr}, _graph(graph), _level{level}
+ {
+ }
+ DotDumper(const ir::LoweredGraph *lowered_graph, Level level)
+ : _lowered_graph{lowered_graph}, _graph(_lowered_graph->graph()), _level{level}
+ {
+ }
+
+public:
+ /**
+ * @brief Dump to dot file as tag name if "GRAPH_DOT_DUMP" is set
+ *
+ * @param[in] tag The name of dot file that would be created
+ * @return N/A
+ */
+ void dump(const std::string &tag);
+
+private:
+ const ir::LoweredGraph *_lowered_graph;
+ const ir::Graph &_graph;
+ Level _level;
+};
+
+} // namespace dot
+} // namespace dumper
+} // namespace onert
+
+#endif // __ONERT_DUMPER_DOT_DOT_DUMPER_H__
diff --git a/runtime/onert/core/src/dumper/dot/DotOpSequenceInfo.cc b/runtime/onert/core/src/dumper/dot/DotOpSequenceInfo.cc
new file mode 100644
index 000000000..48dafc834
--- /dev/null
+++ b/runtime/onert/core/src/dumper/dot/DotOpSequenceInfo.cc
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DotOpSequenceInfo.h"
+
+#include <sstream>
+
+namespace onert
+{
+namespace dumper
+{
+namespace dot
+{
+
+DotOpSequenceInfo::DotOpSequenceInfo(const ir::OpSequenceIndex &index, const ir::OpSequence &op_seq,
+ const util::Set<ir::OperandIndex> &shown_operands)
+ : _index{index}
+{
+ for (const auto &element : op_seq.operations())
+ {
+ _operations.insert(element.index);
+ for (auto o : element.node->getInputs())
+ {
+ // Must be a shown operand, not op_seq's inputs
+ if (shown_operands.contains(o) && !op_seq.getInputs().contains(o))
+ {
+ _operands.insert(o);
+ }
+ }
+ for (auto o : element.node->getOutputs())
+ {
+ // Must be a shown operand, not op_seq's inputs
+ if (shown_operands.contains(o) && !op_seq.getOutputs().contains(o))
+ {
+ _operands.insert(o);
+ }
+ }
+ }
+}
+
+} // namespace dot
+} // namespace dumper
+} // namespace onert
diff --git a/runtime/onert/core/src/dumper/dot/DotOpSequenceInfo.h b/runtime/onert/core/src/dumper/dot/DotOpSequenceInfo.h
new file mode 100644
index 000000000..c30626cbf
--- /dev/null
+++ b/runtime/onert/core/src/dumper/dot/DotOpSequenceInfo.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__
+#define __ONERT_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__
+
+#include <unordered_set>
+
+#include "ir/Index.h"
+#include "ir/OpSequence.h"
+#include "util/Set.h"
+
+namespace onert
+{
+namespace dumper
+{
+namespace dot
+{
+
+class DotOpSequenceInfo
+{
+public:
+ DotOpSequenceInfo(const ir::OpSequenceIndex &index, const ir::OpSequence &op_seq,
+ const util::Set<ir::OperandIndex> &shown_operands);
+
+ ir::OpSequenceIndex index() const { return _index; }
+ std::string label() const { return _label; }
+ void label(const std::string &val) { _label = val; }
+ std::string fillcolor() const { return _fillcolor; }
+ void fillcolor(const std::string &val) { _fillcolor = val; }
+ const std::unordered_set<ir::OperationIndex> &operations() const { return _operations; }
+ const std::unordered_set<ir::OperandIndex> &operands() const { return _operands; }
+
+private:
+ ir::OpSequenceIndex _index;
+ std::string _label;
+ std::string _fillcolor;
+ std::unordered_set<ir::OperationIndex> _operations;
+ std::unordered_set<ir::OperandIndex> _operands;
+};
+
+} // namespace dot
+} // namespace dumper
+} // namespace onert
+
+#endif // __ONERT_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__
diff --git a/runtime/onert/core/src/dumper/dot/Node.cc b/runtime/onert/core/src/dumper/dot/Node.cc
new file mode 100644
index 000000000..85d6e67a4
--- /dev/null
+++ b/runtime/onert/core/src/dumper/dot/Node.cc
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Node.h"
+
+namespace onert
+{
+namespace dumper
+{
+namespace dot
+{
+
+const std::string Node::DEFAULT_COLORSCHEME = "x11";
+const std::string Node::DEFAULT_FILLCOLOR = "white";
+// RED, BLUE, GREEN, PURPLE, ORANGE, YELLOW, BROWN, PINK
+const std::string Node::BG_COLORS[8] = {"1", "2", "3", "4", "5", "6", "7", "8"};
+
+Node::Node(const std::string &id) : _id{id}
+{
+ // Set default values
+ _attributes["style"] = "filled";
+ _attributes["colorscheme"] = DEFAULT_COLORSCHEME;
+ _attributes["fillcolor"] = DEFAULT_FILLCOLOR;
+}
+
+void Node::setAttribute(const std::string &key, const std::string &val) { _attributes[key] = val; }
+
+std::string Node::getAttribute(const std::string &key)
+{
+ auto itr = _attributes.find(key);
+ if (itr == _attributes.end())
+ {
+ return "";
+ }
+ else
+ {
+ return itr->second;
+ }
+}
+
+} // namespace dot
+} // namespace dumper
+} // namespace onert
diff --git a/runtime/onert/core/src/dumper/dot/Node.h b/runtime/onert/core/src/dumper/dot/Node.h
new file mode 100644
index 000000000..a5d1d51a4
--- /dev/null
+++ b/runtime/onert/core/src/dumper/dot/Node.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Node.h
+ * @brief This file contains Node class
+ * @ingroup COM_AI_RUNTIME
+ *
+ */
+
+#ifndef __ONERT_DUMPER_DOT_NODE_H__
+#define __ONERT_DUMPER_DOT_NODE_H__
+
+#include <string>
+#include <memory>
+#include <vector>
+#include <unordered_map>
+
+namespace onert
+{
+namespace dumper
+{
+namespace dot
+{
+
+enum BGCOLORS : int
+{
+ RED,
+ BLUE,
+ GREEN,
+ PUPLE,
+ ORANGE,
+ YELLOW,
+ BROWN,
+ PINK
+};
+
+/**
+ * @brief Class that represents a Node in "dot" format
+ *
+ */
+class Node
+{
+public:
+ const static std::string DEFAULT_FILLCOLOR;
+ const static std::string DEFAULT_COLORSCHEME;
+ const static std::string BG_COLORS[8];
+
+public:
+ /**
+ * @brief Destroy the Node object
+ *
+ */
+ virtual ~Node() = default;
+
+ /**
+ * @brief Construct a new Node object
+ *
+ * @param id
+ */
+ Node(const std::string &id);
+
+ /**
+ * @brief return id
+ *
+ * @return id
+ */
+ std::string id() const { return _id; }
+
+ /**
+ * @brief return attributes
+ *
+ * @return const reference of attributes object
+ */
+ const std::unordered_map<std::string, std::string> &attributes() const { return _attributes; }
+ /**
+ * @brief Store an attribute with key-value pair
+ *
+ * @param[in] key attribute's key
+ * @param[in] val attribute's value that is associated with the key
+ */
+ void setAttribute(const std::string &key, const std::string &val);
+ /**
+ * @brief Get the attributte value that is associated with key
+ *
+ * @param[in] key key of the attribute
+ * @return value that is associated with the key
+ */
+ std::string getAttribute(const std::string &key);
+
+ /**
+ * @brief Add an edge in the graph, which is an outgoing edge
+ *
+ * @param[in] dotinfo A node that the new edge will be connected to
+ */
+ void addEdge(std::shared_ptr<Node> dotinfo) { _children.emplace_back(dotinfo); }
+ /**
+ * @brief Return list of edges
+ *
+ * @return Edges
+ */
+ const std::vector<std::shared_ptr<Node>> &edges() const { return _children; }
+
+private:
+ std::string _id;
+ std::unordered_map<std::string, std::string> _attributes;
+ std::vector<std::shared_ptr<Node>> _children;
+};
+
+} // namespace dot
+} // namespace dumper
+} // namespace onert
+
+#endif // __ONERT_DUMPER_DOT_NODE_H__
diff --git a/runtime/onert/core/src/dumper/dot/OperandNode.cc b/runtime/onert/core/src/dumper/dot/OperandNode.cc
new file mode 100644
index 000000000..5a6015ca9
--- /dev/null
+++ b/runtime/onert/core/src/dumper/dot/OperandNode.cc
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sstream>
+
+#include "OperandNode.h"
+#include "ir/Graph.h"
+#include "ir/operand/LowerInfo.h"
+
+namespace onert
+{
+namespace dumper
+{
+namespace dot
+{
+
+const std::string Operand::INPUT_SHAPE = "doublecircle";
+const std::string Operand::OUTPUT_SHAPE = "doublecircle";
+const std::string Operand::OPERAND_SHAPE = "ellipse";
+const std::string Operand::BG_COLOR_SCHEME = "set18";
+
+Operand::Operand(const ir::OperandIndex &index, Type type)
+ : Node{"operand" + std::to_string(index.value())}
+{
+ {
+ auto type_to_shape = [](Type type) {
+ switch (type)
+ {
+ case Type::MODEL_INPUT:
+ return INPUT_SHAPE;
+ case Type::MODEL_OUTPUT:
+ return OUTPUT_SHAPE;
+ case Type::UNDEFINED:
+ case Type::INTERNAL:
+ default:
+ return OPERAND_SHAPE;
+ }
+ };
+ setAttribute("shape", type_to_shape(type));
+ }
+
+ setAttribute("colorscheme", BG_COLOR_SCHEME);
+}
+
+} // namespace dot
+} // namespace dumper
+} // namespace onert
diff --git a/runtime/onert/core/src/dumper/dot/OperandNode.h b/runtime/onert/core/src/dumper/dot/OperandNode.h
new file mode 100644
index 000000000..2e7cc5861
--- /dev/null
+++ b/runtime/onert/core/src/dumper/dot/OperandNode.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Operand.h
+ * @brief This file contains Operand
+ * @ingroup COM_AI_RUNTIME
+ *
+ */
+
+#ifndef __ONERT_DUMPER_DOT_DOT_OPERAND_INFO_H__
+#define __ONERT_DUMPER_DOT_DOT_OPERAND_INFO_H__
+
+#include <vector>
+
+#include "Node.h"
+#include "ir/Operand.h"
+#include "ir/Index.h"
+
+namespace onert
+{
+namespace dumper
+{
+namespace dot
+{
+
+/**
+ * @brief Class that represents an Operand
+ *
+ */
+class Operand : public Node
+{
+public:
+ enum class Type
+ {
+ UNDEFINED,
+ MODEL_INPUT,
+ MODEL_OUTPUT,
+ INTERNAL
+ };
+
+public:
+ static const std::string INPUT_SHAPE;
+ static const std::string OUTPUT_SHAPE;
+ static const std::string OPERAND_SHAPE;
+ static const std::string BG_COLOR_SCHEME;
+
+public:
+ /**
+ * @brief Construct a new Operand Node object
+ *
+ * @param[in] index Operand index
+ * @param[in] type Operand type
+ * @param[in] lower_info Operand LowerInfo
+ */
+ Operand(const ir::OperandIndex &index, Type type);
+
+private:
+ void addBackendLabel();
+};
+
+} // namespace dot
+} // namespace dumper
+} // namespace onert
+
+#endif // __ONERT_DUMPER_DOT_DOT_OPERAND_INFO_H__
diff --git a/runtime/onert/core/src/dumper/dot/OperationNode.cc b/runtime/onert/core/src/dumper/dot/OperationNode.cc
new file mode 100644
index 000000000..bee137e7c
--- /dev/null
+++ b/runtime/onert/core/src/dumper/dot/OperationNode.cc
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sstream>
+
+#include "OperationNode.h"
+#include "ir/Graph.h"
+#include "ir/operation/LowerInfo.h"
+#include "backend/IConfig.h"
+#include "backend/Backend.h"
+
+namespace onert
+{
+namespace dumper
+{
+namespace dot
+{
+
+const std::string Operation::OPERATION_SHAPE = "rect";
+const std::string Operation::BG_COLOR_SCHEME = "pastel18";
+
+Operation::Operation(const ir::OperationIndex &index, const ir::Operation &node)
+ : Node{"operation" + std::to_string(index.value())}
+{
+ setAttribute("label", std::to_string(index.value()) + " : " + node.name());
+ setAttribute("shape", OPERATION_SHAPE);
+ setAttribute("colorscheme", BG_COLOR_SCHEME);
+ setAttribute("fillcolor", DEFAULT_FILLCOLOR);
+}
+
+} // namespace dot
+} // namespace dumper
+} // namespace onert
diff --git a/runtime/onert/core/src/dumper/dot/OperationNode.h b/runtime/onert/core/src/dumper/dot/OperationNode.h
new file mode 100644
index 000000000..74a37d3fb
--- /dev/null
+++ b/runtime/onert/core/src/dumper/dot/OperationNode.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Operation.h
+ * @brief This file contains Operation
+ * @ingroup COM_AI_RUNTIME
+ *
+ */
+
+#ifndef __ONERT_DUMPER_DOT_DOT_NODE_INFO_H__
+#define __ONERT_DUMPER_DOT_DOT_NODE_INFO_H__
+
+#include "Node.h"
+#include "ir/Operation.h"
+#include "ir/Index.h"
+
+namespace onert
+{
+namespace dumper
+{
+namespace dot
+{
+
+/**
+ * @brief Class that represents an Operation
+ *
+ */
+class Operation : public Node
+{
+public:
+ static const std::string OPERATION_SHAPE;
+ static const std::string BG_COLOR_SCHEME;
+
+public:
+ /**
+ * @brief Construct a new Operation Node object
+ *
+ * @param[in] index operation index
+ * @param[in] node operation object
+ */
+ Operation(const ir::OperationIndex &index, const ir::Operation &node);
+};
+
+} // namespace dot
+} // namespace dumper
+} // namespace onert
+
+#endif // __ONERT_DUMPER_DOT_DOT_NODE_INFO_H__
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.cc b/runtime/onert/core/src/exec/DataflowExecutor.cc
new file mode 100644
index 000000000..fbd76d3c5
--- /dev/null
+++ b/runtime/onert/core/src/exec/DataflowExecutor.cc
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DataflowExecutor.h"
+
+#include <cassert>
+
+#include "util/logging.h"
+
+namespace onert
+{
+namespace exec
+{
+
+int64_t DataflowExecutor::calculateRank(const std::vector<ir::Element> &operations)
+{
+ int64_t rank = 0;
+ if (!_indexed_ranks)
+ {
+ return rank;
+ }
+ for (const auto &element : operations)
+ {
+ auto it = _indexed_ranks->find(element.index);
+ if (it == _indexed_ranks->end())
+ {
+ assert(element.node->opcode() == ir::OpCode::Permute && operations.size() == 1);
+ // run Permute ASAP for next operations to be ready for other backends
+ return std::numeric_limits<int64_t>::max();
+ }
+ else
+ {
+ rank += it->second;
+ }
+ }
+ return rank;
+}
+
+void DataflowExecutor::emplaceToReadyJobs(const uint32_t &id)
+{
+ auto &job = _waiting_jobs[id];
+ assert(job != nullptr);
+ auto &op_seq = _lowered_graph->op_seqs().at(_job_to_op_seq[job->index()]);
+ auto rank = calculateRank(op_seq.operations());
+ _ready_jobs.emplace(rank, std::move(job));
+}
+
+void DataflowExecutor::notify(uint32_t finished_job_id)
+{
+ for (auto id : _output_info[finished_job_id])
+ {
+ assert(_input_info[id] > 0);
+ auto count = --_input_info[id];
+ if (count == 0) // No dependent jobs left, ready for execution
+ {
+ emplaceToReadyJobs(id);
+ }
+ }
+}
+bool DataflowExecutor::noWaitingJobs()
+{
+ return std::all_of(_waiting_jobs.begin(), _waiting_jobs.end(),
+ [](const std::unique_ptr<Job> &job) { return job == nullptr; });
+}
+
+DataflowExecutor::DataflowExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ const backend::TensorBuilderSet &tensor_builders,
+ compiler::CodeMap &&code_map)
+ : ExecutorBase{std::move(lowered_graph), tensor_builders}, _code_map{std::move(code_map)},
+ _profiling{false}
+{
+ VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl;
+
+ const auto &op_seqs = _lowered_graph->op_seqs();
+ // Assign jobs convert OpSequenceIndex to job index(uint32_t)
+ uint32_t next_job_index = 0;
+ std::unordered_map<ir::OpSequenceIndex, uint32_t> op_seq_to_job;
+ op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &) {
+ VERBOSE(DataflowExecutor) << "Create a job #" << next_job_index << " with OpSequenceIndex "
+ << op_seq_index.value() << std::endl;
+ _finished_jobs.emplace_back(
+ std::make_unique<Job>(next_job_index, _code_map.at(op_seq_index).fn_seq.get()));
+ op_seq_to_job[op_seq_index] = next_job_index++;
+ });
+
+ _waiting_jobs.resize(next_job_index);
+ _output_info.resize(next_job_index);
+ _initial_input_info.resize(next_job_index, 0);
+
+ op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
+ auto job_index = op_seq_to_job[op_seq_index];
+ for (auto output : op_seq.getOutputs())
+ {
+ // Update output and input info
+ op_seqs.iterate(
+ [&](const ir::OpSequenceIndex &op_seq_cur_index, const ir::OpSequence &op_seq_cur) {
+ if (op_seq_cur.getInputs().contains(output))
+ {
+ auto dep_index = op_seq_to_job[op_seq_cur_index];
+ ++_initial_input_info[dep_index];
+ _output_info[job_index].push_back(dep_index);
+ }
+ });
+ }
+ });
+ for (const auto &s : op_seq_to_job)
+ _job_to_op_seq.emplace(s.second, s.first);
+
+ _input_info = _initial_input_info;
+}
+
+void DataflowExecutor::executeImpl()
+{
+ assert(noWaitingJobs());
+
+ // Execution setup
+ _waiting_jobs.swap(_finished_jobs); // Move finished jobs to waiting jobs
+
+ for (uint32_t i = 0; i < _waiting_jobs.size(); ++i)
+ {
+ if (_input_info[i] == 0)
+ {
+ emplaceToReadyJobs(i);
+ }
+ }
+ assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs
+
+ _subject.notifyModelBegin(this);
+
+ while (!_ready_jobs.empty())
+ {
+ auto job = std::move((_ready_jobs.begin())->second);
+ _ready_jobs.erase(_ready_jobs.begin());
+ auto job_index = job->index();
+ VERBOSE(DataflowExecutor) << "Run job #" << job_index << std::endl;
+
+ auto op_seq_index = _job_to_op_seq[job_index];
+ auto op_seq = &_lowered_graph->op_seqs().at(op_seq_index);
+ const backend::Backend *backend =
+ _lowered_graph->getLowerInfo()->operation.at(op_seq_index)->backend();
+
+ _subject.notifyJobBegin(this, op_seq, backend);
+
+ if (_profiling)
+ job->fn()->runSync();
+ else
+ job->run();
+
+ _subject.notifyJobEnd(this, op_seq, backend);
+ notify(job_index);
+ _finished_jobs[job_index] = std::move(job);
+ }
+ assert(noWaitingJobs());
+
+ _subject.notifyModelEnd(this);
+
+ // Reset input info for the next execution
+ _input_info = _initial_input_info;
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h
new file mode 100644
index 000000000..b49df4386
--- /dev/null
+++ b/runtime/onert/core/src/exec/DataflowExecutor.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_DATAFLOW_EXECUTOR_H__
+#define __ONERT_EXEC_DATAFLOW_EXECUTOR_H__
+
+#include <list>
+#include <map>
+#include <unordered_map>
+
+#include "exec/FunctionSequence.h"
+#include "Job.h"
+#include "ir/OperandIndexSequence.h"
+#include "ir/Index.h"
+#include <memory>
+#include "exec/ExecutorBase.h"
+#include "compiler/CodeMap.h"
+
+namespace onert
+{
+namespace exec
+{
+
+class DataflowExecutor : public ExecutorBase
+{
+
+protected:
+ virtual void notify(uint32_t finished_job_id);
+ bool noWaitingJobs();
+
+public:
+ /**
+ * @brief Constructs a DataflowExecutor object
+ *
+ * @param lowered_graph LoweredGraph object
+ * @param tensor_builders Tensor builders that are currently used
+ * @param code_map OpSequence and its code map
+ */
+ DataflowExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ const backend::TensorBuilderSet &tensor_builders, compiler::CodeMap &&code_map);
+
+ void executeImpl() override;
+ void setProfilingMode(bool profiling) { _profiling = profiling; }
+
+protected:
+ int64_t calculateRank(const std::vector<ir::Element> &operations);
+ void emplaceToReadyJobs(const uint32_t &id);
+
+protected:
+ compiler::CodeMap _code_map;
+ /**
+ * @brief A vector of finished jobs for current execution
+ * After a run it has all the jobs of this execution for the next run
+ */
+ std::vector<std::unique_ptr<Job>> _finished_jobs;
+ /**
+ * @brief A vector of waiting jobs for current execution
+ * All the jobs are moved from #_finished_jobs to it when start a run
+ */
+ std::vector<std::unique_ptr<Job>> _waiting_jobs;
+ /**
+ * @brief Jobs' output info
+ * Used for notifying after finishing a job
+ */
+ std::vector<std::list<uint32_t>> _output_info;
+ std::vector<uint32_t> _initial_input_info;
+ std::vector<uint32_t> _input_info;
+ /**
+ * @brief A collection of jobs that are ready for execution
+ * Jobs in it are ready to be scheduled.
+ * Ordered by priority from `_indexed_ranks`
+ */
+ std::multimap<int64_t, std::unique_ptr<Job>, std::greater<int64_t>> _ready_jobs;
+
+ /// @brief Which job runs which op and function.
+ std::unordered_map<uint32_t, ir::OpSequenceIndex> _job_to_op_seq;
+ bool _profiling;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_DATAFLOW_EXECUTOR_H__
diff --git a/runtime/onert/core/src/exec/ExecTime.cc b/runtime/onert/core/src/exec/ExecTime.cc
new file mode 100644
index 000000000..6bf2744a9
--- /dev/null
+++ b/runtime/onert/core/src/exec/ExecTime.cc
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/ExecTime.h"
+
+#include <fstream>
+#include <cassert>
+#include <limits>
+#include <algorithm>
+
+namespace onert
+{
+namespace exec
+{
+
+int64_t ExecTime::getOperationExecTime(const backend::Backend *backend,
+ const std::string &operation, bool quant,
+ uint32_t op_size) const
+{
+ auto found_backend = _measurements.find(backend);
+ if (found_backend == _measurements.end())
+ return NOT_FOUND; // no execution time for this backend
+
+ auto found_operation_with_type = found_backend->second.find(operation);
+ if (found_operation_with_type == found_backend->second.end())
+ // no execution time for this operation
+ return NOT_FOUND;
+
+ auto found_operation = found_operation_with_type->second.find(quant);
+ if (found_operation == found_operation_with_type->second.end())
+ // no execution time for this operation
+ return NOT_FOUND;
+
+ auto found_size = found_operation->second.find(op_size);
+ if (found_size != found_operation->second.end())
+ return found_size->second; // found execution time
+
+ // Try to interpolate
+ if (found_operation->second.size() < 2)
+ // not possible to do linear interpolation
+ return found_operation->second.begin()->second;
+
+ // if we reach here, then this means, that there is no record, that is equal to op_size
+ auto upper_bound = found_operation->second.upper_bound(op_size); // > op_size
+ auto lower_bound = upper_bound;
+
+ if (upper_bound == found_operation->second.end()) // all values <= op_size
+ {
+ upper_bound--;
+ lower_bound = upper_bound;
+ lower_bound--;
+ }
+ else if (upper_bound == found_operation->second.begin()) // all values > op_size
+ {
+ upper_bound++;
+ }
+ else // op_size between
+ {
+ lower_bound--;
+ }
+
+ // Linear interpolation
+ const auto x0 = static_cast<int64_t>(lower_bound->first); // size
+ const auto x1 = static_cast<int64_t>(upper_bound->first); // size
+ const int64_t y0 = lower_bound->second; // time
+ const int64_t y1 = upper_bound->second; // time
+ const auto x = static_cast<int64_t>(op_size);
+
+ int64_t interpolated_value = y0 + (x - x0) * (y1 - y0) / (x1 - x0);
+
+ // In some cases ops with smaller inputs is executed slower than the one
+ // with larger inputs, more likely because of a backend's load difference
+ if (interpolated_value < 0 && x > x1)
+ {
+ return y0;
+ }
+ // It must be non-positive ONLY if it's lesser than both of them
+ assert(interpolated_value > 0 || x < x0);
+
+ // execution time must be non-negative
+ return std::max<int64_t>(interpolated_value, 1);
+}
+
+void ExecTime::updateOperationExecTime(const backend::Backend *backend,
+ const std::string &operation, bool quant, uint32_t op_size,
+ int64_t time)
+{
+ // If the op is not implemented for some input, it should not be scheduled
+ const auto &recs = _measurements[backend][operation][quant];
+ if (time == getMax() ||
+ std::any_of(recs.begin(), recs.end(),
+ [](std::pair<const uint32_t, const int64_t> p) { return p.second == getMax(); }))
+ {
+ _measurements[backend][operation][quant].clear();
+ _measurements[backend][operation][quant].emplace(op_size, getMax());
+ }
+ else
+ {
+ auto it = _measurements[backend][operation][quant].emplace(op_size, time);
+ if (!it.second)
+ {
+ // affect of the last measurement is bigger than the previous ones:
+ // this prefers new metrics than older once, so will adapt backend changes
+ it.first->second = (it.first->second + time) / 2;
+ }
+ }
+}
+
+void ExecTime::updatePermuteTime(const backend::Backend *from_backend,
+ const backend::Backend *to_backend, bool quant, uint32_t op_size,
+ int64_t time)
+{
+ updateOperationExecTime(from_backend, to_backend->config()->id(), quant, op_size, time);
+}
+
+int64_t ExecTime::getPermuteTime(const backend::Backend *from_backend,
+ const backend::Backend *to_backend, bool quant,
+ uint32_t op_size) const
+{
+ return getOperationExecTime(from_backend, to_backend->config()->id(), quant, op_size);
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/Execution.cc b/runtime/onert/core/src/exec/Execution.cc
new file mode 100644
index 000000000..9c66e51e4
--- /dev/null
+++ b/runtime/onert/core/src/exec/Execution.cc
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/Execution.h"
+
+#include "util/logging.h"
+
+namespace onert
+{
+namespace exec
+{
+
+Execution::Execution(const std::shared_ptr<IExecutor> &executor) : _executor{executor}
+{
+ _io_desc.inputs.resize(_executor->graph().getInputs().size());
+ _io_desc.outputs.resize(_executor->graph().getOutputs().size());
+}
+
+// TODO Remove default parameter
+void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t length,
+ ir::Layout layout)
+{
+ const auto input_index = graph().getInputs().at(index);
+ const auto info = graph().operands().at(input_index).info();
+
+ if (length < info.total_size())
+ {
+ throw std::runtime_error{"Too small length"};
+ }
+
+ _io_desc.inputs.at(index.value()) = std::make_unique<InputDesc>(info, buffer, length, layout);
+}
+
+// TODO Remove default parameter
+void Execution::setInput(const ir::IOIndex &index, const ir::TypeInfo &type, const ir::Shape &shape,
+ const void *buffer, size_t length, ir::Layout layout)
+{
+ const ir::OperandInfo info{shape, type};
+
+ if (length < info.total_size())
+ {
+ throw std::runtime_error{"Too small length"};
+ }
+
+ _io_desc.inputs.at(index.value()) = std::make_unique<InputDesc>(info, buffer, length, layout);
+}
+
+// TODO Remove default parameter
+void Execution::setOutput(const ir::IOIndex &index, void *buffer, size_t length, ir::Layout layout)
+{
+ const auto output_index = graph().getOutputs().at(index);
+ const auto info = graph().operands().at(output_index).info();
+
+ if (length < info.total_size())
+ {
+ throw std::runtime_error{"Too small length"};
+ }
+
+ _io_desc.outputs.at(index.value()) = std::make_unique<OutputDesc>(info, buffer, length, layout);
+}
+
+// TODO Remove default parameter
+void Execution::setOutput(const ir::IOIndex &index, const ir::TypeInfo &type,
+ const ir::Shape &shape, void *buffer, size_t length, ir::Layout layout)
+{
+ const ir::OperandInfo info{shape, type};
+
+ if (length < info.total_size())
+ {
+ throw std::runtime_error{"Too small length"};
+ }
+
+ _io_desc.outputs.at(index.value()) = std::make_unique<OutputDesc>(info, buffer, length, layout);
+}
+
+void Execution::setInputLayout(const ir::IOIndex &index, ir::Layout layout)
+{
+ const auto &input_desc = _io_desc.inputs.at(index.value());
+ _io_desc.inputs.at(index.value()) =
+ std::make_unique<InputDesc>(input_desc->info, input_desc->buffer, input_desc->size, layout);
+}
+
+void Execution::setOutputLayout(const ir::IOIndex &index, ir::Layout layout)
+{
+ const auto &output_desc = _io_desc.outputs.at(index.value());
+ _io_desc.outputs.at(index.value()) = std::make_unique<OutputDesc>(
+ output_desc->info, output_desc->buffer, output_desc->size, layout);
+}
+
+void Execution::execute()
+{
+ VERBOSE(Execution) << "Start execution" << std::endl;
+
+ _executor->execute(_io_desc);
+ finished = true;
+
+ VERBOSE(Execution) << "Execution finished" << std::endl;
+}
+
+void Execution::startExecute()
+{
+ VERBOSE(Execution) << "Create asynchronous execution thread" << std::endl;
+
+ _exec_thread = std::make_unique<std::thread>(&Execution::execute, this);
+}
+
+void Execution::waitFinish()
+{
+ VERBOSE(Execution) << "Wait to finish execution" << std::endl;
+
+ _exec_thread->join();
+ finished = true;
+}
+
+bool Execution::isFinished(void) const { return finished; }
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/ExecutionObservee.cc b/runtime/onert/core/src/exec/ExecutionObservee.cc
new file mode 100644
index 000000000..ddb1fb6a0
--- /dev/null
+++ b/runtime/onert/core/src/exec/ExecutionObservee.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExecutionObservee.h"
+
+namespace onert
+{
+namespace exec
+{
+
+void ExecutionObservee::add(std::unique_ptr<IExecutionObserver> observer)
+{
+ _observers.emplace_back(std::move(observer));
+}
+
+void ExecutionObservee::notifyModelBegin(IExecutor *executor)
+{
+ for (auto &o : _observers)
+ {
+ o->handleBegin(executor);
+ }
+}
+
+void ExecutionObservee::notifyModelEnd(IExecutor *executor)
+{
+ for (auto &o : _observers)
+ {
+ o->handleEnd(executor);
+ }
+}
+
+void ExecutionObservee::notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq,
+ const backend::Backend *backend)
+{
+ for (auto &o : _observers)
+ {
+ o->handleBegin(executor, op_seq, backend);
+ }
+}
+
+void ExecutionObservee::notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq,
+ const backend::Backend *backend)
+{
+ for (auto &o : _observers)
+ {
+ o->handleEnd(executor, op_seq, backend);
+ }
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/ExecutionObservee.h b/runtime/onert/core/src/exec/ExecutionObservee.h
new file mode 100644
index 000000000..49d409a3a
--- /dev/null
+++ b/runtime/onert/core/src/exec/ExecutionObservee.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_EXECUTION_OBSERVEE_H__
+#define __ONERT_EXEC_EXECUTION_OBSERVEE_H__
+
+#include <list>
+
+#include "exec/ExecutionObservers.h"
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class that
+ *
+ */
+class ExecutionObservee
+{
+public:
+ /**
+ * @brief Register an observer
+ *
+ * @param observer Observer to be added
+ */
+ void add(std::unique_ptr<IExecutionObserver> observer);
+ void notifyModelBegin(IExecutor *executor);
+ void notifyModelEnd(IExecutor *executor);
+ void notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq,
+ const backend::Backend *backend);
+ void notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq,
+ const backend::Backend *backend);
+
+private:
+ std::list<std::unique_ptr<IExecutionObserver>> _observers;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_EXECUTION_OBSERVEE__
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.cc b/runtime/onert/core/src/exec/ExecutionObservers.cc
new file mode 100644
index 000000000..e8fcafa2b
--- /dev/null
+++ b/runtime/onert/core/src/exec/ExecutionObservers.cc
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/ExecutionObservers.h"
+
+#include <string>
+
+#include "util/logging.h"
+#include "ir/operation/Permute.h"
+#include "exec/IExecutor.h"
+#include "misc/polymorphic_downcast.h"
+#include "ir/OpSequence.h"
+
+namespace onert
+{
+
+namespace exec
+{
+
+void ProfileObserver::handleBegin(onert::exec::IExecutor *, const ir::OpSequence *,
+ const onert::backend::Backend *backend)
+{
+ _timer = backend->config()->timer();
+ if (_timer == nullptr)
+ throw std::runtime_error("To profile backend timer() method must be implemented");
+ _timer->handleBegin();
+}
+
+void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq,
+ const backend::Backend *backend)
+{
+ _timer->handleEnd();
+ const auto timer_res = _timer->getTime();
+
+ // NOTE This assumes there is just one operation in a op_seq
+ auto node = op_seq->operations().at(0).node;
+ auto node_name = node->name();
+ VERBOSE(ProfileInfo) << "Time for " << node_name << " : " << timer_res << std::endl;
+
+ // fill ExecTime:
+ bool is_quantized = exec->graph().operands().at(node->getInputs().at(0)).typeInfo().type() ==
+ ir::DataType::QUANT8_ASYMM;
+
+ uint32_t size = 0;
+ for (const auto &ind : node->getInputs() + node->getOutputs())
+ {
+ size += exec->graph().operands().at(ind).info().total_size();
+ }
+ if (node_name == "Permute")
+ {
+ auto *permute_node = nnfw::misc::polymorphic_downcast<const ir::operation::Permute *>(node);
+ assert(permute_node != nullptr);
+ _et->updatePermuteTime(permute_node->param().input_backend_ctx->backend(),
+ permute_node->param().output_backend_ctx->backend(), is_quantized, size,
+ timer_res);
+ }
+ else
+ {
+ _et->updateOperationExecTime(backend, node_name, is_quantized, size, timer_res);
+ }
+};
+
+ChromeTracingObserver::ChromeTracingObserver(const std::string &filepath)
+ : _ofs{filepath, std::ofstream::out}, _recorder{}, _collector{&_recorder}
+{
+}
+
+ChromeTracingObserver::~ChromeTracingObserver() { _recorder.writeToFile(_ofs); }
+
+void ChromeTracingObserver::handleBegin(IExecutor *)
+{
+ _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "runtime", "Graph"});
+}
+
+void ChromeTracingObserver::handleBegin(IExecutor *, const ir::OpSequence *op_seq,
+ const backend::Backend *backend)
+{
+ std::string backend_id = backend->config()->id();
+ _collector.onEvent(
+ EventCollector::Event{EventCollector::Edge::BEGIN, backend_id, opSequenceTag(op_seq)});
+}
+
+void ChromeTracingObserver::handleEnd(IExecutor *, const ir::OpSequence *op_seq,
+ const backend::Backend *backend)
+{
+ std::string backend_id = backend->config()->id();
+ _collector.onEvent(
+ EventCollector::Event{EventCollector::Edge::END, backend_id, opSequenceTag(op_seq)});
+}
+
+void ChromeTracingObserver::handleEnd(IExecutor *)
+{
+ _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, "runtime", "Graph"});
+}
+
+std::string ChromeTracingObserver::opSequenceTag(const ir::OpSequence *op_seq)
+{
+ if (op_seq->size() == 0)
+ return "Empty OpSequence";
+
+ auto first_op = op_seq->operations().at(0);
+ std::string tag = "$" + std::to_string(first_op.index.value());
+ tag += " " + first_op.node->name();
+ if (op_seq->size() > 1)
+ {
+ tag += " (+" + std::to_string(op_seq->size() - 1) + ")";
+ }
+ return tag;
+}
+
+} // namespace exec
+
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc
new file mode 100644
index 000000000..dd7e84af8
--- /dev/null
+++ b/runtime/onert/core/src/exec/ExecutorBase.cc
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExecutorBase.h"
+#include "util/logging.h"
+
+namespace onert
+{
+namespace exec
+{
+
+ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
+ const backend::TensorBuilderSet &tensor_builders)
+ : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()}, _mutex()
+{
+ auto build_itensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
+ std::vector<std::shared_ptr<backend::ITensor>> list;
+ for (auto ind : ind_seq)
+ {
+ std::shared_ptr<backend::ITensor> tensor;
+ for (auto &tensor_builder : tensor_builders)
+ {
+ tensor = tensor_builder->tensorAt(ind);
+ if (tensor != nullptr)
+ break;
+ }
+ assert(tensor != nullptr);
+ list.push_back(tensor);
+ }
+ return list;
+ };
+
+ _input_tensors = build_itensor_list(_graph.getInputs());
+ _output_tensors = build_itensor_list(_graph.getOutputs());
+
+ // Prepare each TensorManager on each backend
+ for (auto &tensor_builder : tensor_builders)
+ {
+ auto tensor_manager = tensor_builder->releaseTensorManager();
+ assert(tensor_manager != nullptr);
+ _tensor_mgrs.insert(std::move(tensor_manager));
+ }
+}
+
+std::unique_ptr<ISource> ExecutorBase::source(const ir::IOIndex &index, const ir::TypeInfo &type,
+ const void *buffer, size_t length,
+ ir::Layout io_layout)
+{
+ using ir::DataType;
+ switch (type.type())
+ {
+ case DataType::FLOAT32:
+ return source<float>(index, buffer, length, io_layout);
+ case DataType::INT32:
+ return source<int32_t>(index, buffer, length, io_layout);
+ case DataType::UINT32:
+ return source<uint32_t>(index, buffer, length, io_layout);
+ case DataType::BOOL8:
+ case DataType::QUANT8_ASYMM:
+ case DataType::UINT8:
+ return source<uint8_t>(index, buffer, length, io_layout);
+ case DataType::QUANT8_SYMM:
+ return source<int8_t>(index, buffer, length, io_layout);
+ default:
+ throw std::runtime_error("Not supported yet");
+ }
+}
+
+std::unique_ptr<ISink> ExecutorBase::sink(const ir::IOIndex &index, const ir::TypeInfo &type,
+ void *buffer, size_t length, ir::Layout io_layout)
+{
+ using ir::DataType;
+ switch (type.type())
+ {
+ case DataType::FLOAT32:
+ return sink<float>(index, buffer, length, io_layout);
+ case DataType::INT32:
+ return sink<int32_t>(index, buffer, length, io_layout);
+ case DataType::UINT32:
+ return sink<uint32_t>(index, buffer, length, io_layout);
+ case DataType::BOOL8:
+ case DataType::QUANT8_ASYMM:
+ case DataType::UINT8:
+ return sink<uint8_t>(index, buffer, length, io_layout);
+ case DataType::QUANT8_SYMM:
+ return sink<int8_t>(index, buffer, length, io_layout);
+ default:
+ throw std::runtime_error("Not supported yet");
+ }
+}
+
+void ExecutorBase::execute(const IODescription &desc)
+{
+ // For thread-safe, use mutex
+ // TODO: if all used backends on this executor are thread-safe,
+ // do not need to use mutex (otherwise, use mutex)
+ std::lock_guard<std::mutex> lock(_mutex);
+
+ std::vector<std::unique_ptr<ISource>> sources{_graph.getInputs().size()};
+ std::vector<std::unique_ptr<ISink>> sinks{_graph.getOutputs().size()};
+
+ // Set input(s)
+ for (uint32_t n = 0; n < _graph.getInputs().size(); ++n)
+ {
+ ir::IOIndex input_index{n};
+ ir::OperandIndex index{_graph.getInputs().at(input_index)};
+
+ if (desc.inputs.at(n) == nullptr)
+ {
+ // Optional input
+ continue;
+ }
+
+ const auto operand_li = _lowered_graph->getLowerInfo()->operand.at(index).get();
+ if (operand_li->def_factors().empty())
+ {
+ // This input is not used (i.e. constant, EX. reshape's axis)
+ continue;
+ }
+
+ const auto &input = *desc.inputs.at(n);
+ sources.at(n) =
+ source(input_index, input.info.typeInfo(), input.buffer, input.size, input.layout);
+
+ auto setter = [&](::onert::backend::ITensor &tensor) { sources.at(n)->push(tensor); };
+
+ _input_tensors[n]->access(setter);
+ }
+
+ executeImpl();
+
+ // Get output(s)
+ for (uint32_t n = 0; n < _graph.getOutputs().size(); ++n)
+ {
+ ir::IOIndex output_index{n};
+ // Optional output
+ if (desc.outputs.at(n) == nullptr)
+ {
+ continue;
+ }
+ const auto &output = *desc.outputs.at(n);
+ sinks.at(n) =
+ sink(output_index, output.info.typeInfo(), output.buffer, output.size, output.layout);
+
+ auto getter = [&](::onert::backend::ITensor &tensor) { sinks.at(n)->pull(tensor); };
+
+ _output_tensors[n]->access(getter);
+ }
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h
new file mode 100644
index 000000000..cb5dde8eb
--- /dev/null
+++ b/runtime/onert/core/src/exec/ExecutorBase.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_EXECUTOR_BASE_H__
+#define __ONERT_EXEC_EXECUTOR_BASE_H__
+
+#include <mutex>
+
+#include "Source.h"
+#include "exec/ExecutionObservers.h"
+#include "Sink.h"
+#include "exec/IExecutor.h"
+#include "ir/LoweredGraph.h"
+#include "ir/LowerInfoMap.h"
+#include "backend/IConfig.h"
+#include "backend/Backend.h"
+#include "compiler/OperandContext.h"
+#include "exec/ExecTime.h"
+#include "exec/IFunction.h"
+#include "backend/ITensorManager.h"
+#include "backend/ITensorBuilder.h"
+#include "exec/ExecutionObservee.h"
+#include <list>
+
+namespace onert
+{
+namespace exec
+{
+
+class ExecutorBase : public IExecutor
+{
+public:
+ /**
+ * @brief Construct a new ExecutorBase object
+ * @param graph Graph object
+ * @param tensor_builders Tensor builders that are currently used
+ */
+ ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
+ const backend::TensorBuilderSet &tensor_builders);
+
+ virtual ~ExecutorBase() = default;
+
+ const ir::Graph &graph() final { return _graph; }
+
+ void execute(const IODescription &desc) final;
+
+ // Used only in Dataflow and Parallel Executors
+ void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>> ranks) final
+ {
+ _indexed_ranks = std::move(ranks);
+ };
+
+ virtual void executeImpl(void) = 0;
+
+ void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); };
+
+private:
+ std::unique_ptr<ISource> source(const ir::IOIndex &index, const ir::TypeInfo &type,
+ const void *buffer, size_t length, ir::Layout io_layout);
+ std::unique_ptr<ISink> sink(const ir::IOIndex &index, const ir::TypeInfo &type, void *buffer,
+ size_t length, ir::Layout io_layout);
+
+ template <typename T>
+ std::unique_ptr<ISource> source(const ir::IOIndex &index, const void *buffer, size_t length,
+ ir::Layout io_layout)
+ {
+ const auto operand_index = _graph.getInputs().at(index);
+ const auto &operand = _graph.operands().at(operand_index);
+
+ const auto tensor = _input_tensors[index.value()];
+ const auto tensor_layout = tensor->layout();
+
+ if (((io_layout == ir::Layout::NHWC) && (tensor_layout == ir::Layout::NCHW)) ||
+ ((io_layout == ir::Layout::NCHW) && (tensor_layout == ir::Layout::NHWC)))
+ {
+ return std::make_unique<PermutateSource<T>>(buffer, length, operand.shape(), io_layout);
+ }
+ // TODO Change this to return error
+ assert(io_layout != ir::Layout::UNKNOWN ||
+ (tensor_layout != ir::Layout::NCHW && tensor_layout != ir::Layout::NCHW));
+
+ return std::make_unique<CopySource<T>>(buffer, length, operand.shape());
+ }
+
+ template <typename T>
+ std::unique_ptr<ISink> sink(const ir::IOIndex &index, void *buffer, size_t length,
+ ir::Layout io_layout)
+ {
+ const auto operand_index = _graph.getOutputs().at(index);
+ const auto &operand = _graph.operands().at(operand_index);
+ const auto tensor = _output_tensors[index.value()];
+ const auto tensor_layout = tensor->layout();
+
+ if (((tensor_layout == ir::Layout::NCHW) && (io_layout == ir::Layout::NHWC)) ||
+ ((tensor_layout == ir::Layout::NHWC) && (io_layout == ir::Layout::NCHW)))
+ {
+ return std::make_unique<PermutateSink<T>>(buffer, length, operand.shape(), io_layout);
+ }
+ // TODO Change this to return error
+ assert(io_layout != ir::Layout::UNKNOWN ||
+ (tensor_layout != ir::Layout::NCHW && tensor_layout != ir::Layout::NCHW));
+
+ return std::make_unique<CopySink<T>>(buffer, length, operand.shape());
+ }
+
+protected:
+ ExecutionObservee _subject;
+ std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
+ std::unique_ptr<ir::LoweredGraph> _lowered_graph;
+ const ir::Graph &_graph;
+ std::vector<std::shared_ptr<backend::ITensor>> _input_tensors;
+ std::vector<std::shared_ptr<backend::ITensor>> _output_tensors;
+ backend::TensorManagerSet _tensor_mgrs;
+ std::mutex _mutex;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_EXECUTOR_BASE_H__
diff --git a/runtime/onert/core/src/exec/FunctionSequence.cc b/runtime/onert/core/src/exec/FunctionSequence.cc
new file mode 100644
index 000000000..bf205af80
--- /dev/null
+++ b/runtime/onert/core/src/exec/FunctionSequence.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/FunctionSequence.h"
+
+namespace onert
+{
+namespace exec
+{
+
+void FunctionSequence::run()
+{
+ for (const auto &function : _functions)
+ {
+ function->run();
+ }
+}
+
+void FunctionSequence::runSync()
+{
+ for (const auto &function : _functions)
+ {
+ function->runSync();
+ }
+}
+
+void FunctionSequence::prepare()
+{
+ for (const auto &function : _functions)
+ {
+ function->prepare();
+ }
+}
+
+void FunctionSequence::append(std::unique_ptr<IFunction> &&function)
+{
+ _functions.push_back(std::move(function));
+}
+
+void FunctionSequence::iterate(const std::function<void(IFunction &)> &fn)
+{
+ for (const auto &func : _functions)
+ {
+ fn(*func);
+ }
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/JSONExecTime.cc b/runtime/onert/core/src/exec/JSONExecTime.cc
new file mode 100644
index 000000000..72a18def1
--- /dev/null
+++ b/runtime/onert/core/src/exec/JSONExecTime.cc
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/JSONExecTime.h"
+#include "backend/IConfig.h"
+#include <fstream>
+
+namespace onert
+{
+namespace exec
+{
+/**
+ * @brief Helper function for reading string from stream
+ *
+ * @param str Output string
+ * @param stream File stream
+ */
+void readString(std::string &str, std::ifstream &stream)
+{
+ str.clear();
+ char buf;
+ while (stream.good())
+ {
+ stream.get(buf);
+ if (buf == '"')
+ break;
+ str.push_back(buf);
+ }
+}
+
+/**
+ * @brief Helper function for reading bool from stream
+ *
+ * @param quant Output bool
+ * @param stream File stream
+ */
+void readBool(bool &quant, std::ifstream &stream)
+{
+ char buf;
+ stream.get(buf);
+ quant = (buf == '1');
+ stream.get(buf);
+}
+
+void printString(const std::string &str, std::ofstream &stream) { stream << "\"" << str << "\""; }
+
+void printBool(bool quant, std::ofstream &stream) { stream << "\"" << quant << "\""; }
+
+void JSON::readOperation(const std::string &backend, const std::string &operation, bool quant,
+ std::ifstream &stream)
+{
+ uint32_t size = 0;
+ int64_t time = 0;
+
+ std::string int_buf;
+ char buf;
+ int number_of_closed_braces = 0;
+ int number_of_commas = 0;
+
+ while (stream.good())
+ {
+ stream.get(buf);
+
+ switch (buf)
+ {
+ case ']':
+ {
+ number_of_closed_braces++;
+ break;
+ }
+ case '[':
+ {
+ number_of_closed_braces--;
+ break;
+ }
+ default:
+ {
+ if (std::isdigit(buf))
+ {
+ int_buf.push_back(buf);
+ }
+ break;
+ }
+ }
+
+ if (number_of_closed_braces == 1)
+ break;
+
+ if ((buf == ']' && number_of_closed_braces == 0) ||
+ (buf == ',' && number_of_closed_braces == -1))
+ {
+ switch (number_of_commas % 2)
+ {
+ case 0:
+ {
+ size = static_cast<uint32_t>(std::atoi(int_buf.c_str()));
+ break;
+ }
+ case 1:
+ {
+ time = static_cast<int64_t>(std::atol(int_buf.c_str()));
+ auto bf = _backends.find(backend);
+ if (bf != _backends.end())
+ {
+ _measurements[bf->second][operation][quant][size] = time;
+ } // we ignore the records for unsupported backends
+ break;
+ }
+ }
+ number_of_commas++;
+ int_buf.clear();
+ }
+ }
+}
+void JSON::printOperation(const std::map<uint32_t, int64_t> &operation_info,
+ std::ofstream &stream) const
+{
+ for (const auto &items : operation_info)
+ {
+ stream << "[" << items.first << ", " << items.second << "], ";
+ }
+ stream.seekp(-2, std::ofstream::end);
+}
+
+void JSON::uploadOperationsExecTime() const
+{
+ std::ofstream stream(_measurement_file);
+ if (!stream.is_open())
+ {
+ throw std::runtime_error("Failed to save backend config file");
+ }
+ else
+ {
+ stream << "{";
+ for (const auto &backend : _measurements)
+ {
+ printString(backend.first->config()->id(), stream);
+ stream << ": {";
+ for (const auto &operation : backend.second)
+ {
+ printString(operation.first, stream);
+ stream << ": {";
+ for (const auto &type : operation.second)
+ {
+ printBool(type.first, stream);
+ stream << ": [";
+ printOperation(type.second, stream);
+ stream << "], ";
+ }
+ stream.seekp(-2, std::ofstream::end);
+ stream << "}, ";
+ }
+ stream.seekp(-2, std::ofstream::end);
+ stream << "}, ";
+ }
+ stream.seekp(-2, std::ofstream::end);
+ stream << "}";
+ stream.close();
+ }
+}
+
+void JSON::loadOperationsExecTime()
+{
+ std::ifstream stream(_measurement_file);
+ if (stream.is_open())
+ {
+ std::string backend;
+ std::string operation;
+ bool quant = false;
+ char buf;
+ int number_of_open_braces = 0;
+
+ while (stream.good())
+ {
+ stream.get(buf);
+ switch (buf)
+ {
+ case '{':
+ number_of_open_braces++;
+ break;
+ case '}':
+ number_of_open_braces--;
+ break;
+ case '"':
+ {
+ if (number_of_open_braces == 1)
+ {
+ // read backend string
+ readString(backend, stream);
+ }
+ if (number_of_open_braces == 2)
+ {
+ // read operation string
+ readString(operation, stream);
+ }
+ if (number_of_open_braces == 3)
+ {
+ // read operation string
+ readBool(quant, stream);
+ }
+ break;
+ }
+ case '[':
+ {
+ // reading and creating all info for operation
+ readOperation(backend, operation, quant, stream);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ stream.close();
+ }
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/Job.cc b/runtime/onert/core/src/exec/Job.cc
new file mode 100644
index 000000000..dc0d140f0
--- /dev/null
+++ b/runtime/onert/core/src/exec/Job.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Job.h"
+
+#include <cassert>
+
+#include "util/logging.h"
+
+namespace onert
+{
+namespace exec
+{
+
+Job::Job(uint32_t index, IFunction *fn) : _index{index}, _fn{fn} {}
+
+void Job::run() { _fn->run(); }
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/Job.h b/runtime/onert/core/src/exec/Job.h
new file mode 100644
index 000000000..ff08ec8ce
--- /dev/null
+++ b/runtime/onert/core/src/exec/Job.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_JOB_H__
+#define __ONERT_EXEC_JOB_H__
+
+#include <unordered_set>
+
+#include "exec/IFunction.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/Backend.h"
+
+namespace onert
+{
+namespace exec
+{
+
+class Job
+{
+public:
+ /**
+ * @brief Constructs a Job object
+ *
+ * @param index Operation index for this job
+ * @param fn compiled code to run this job
+ * @param inputs Input operand list
+ * @param outputs Output operand list
+ */
+ Job(uint32_t index, IFunction *fn);
+ /**
+ * @brief Execute the compiled code
+ */
+ void run();
+ /**
+ * @brief Return job index
+ *
+ * @return Job index
+ */
+ uint32_t index() const { return _index; }
+ /**
+ * @brief Return the function to be executed
+ *
+ * @return Pointer of the function
+ */
+ IFunction *fn() { return _fn; }
+
+private:
+ uint32_t _index;
+ IFunction *_fn;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_JOB_H__
diff --git a/runtime/onert/core/src/exec/LinearExecutor.cc b/runtime/onert/core/src/exec/LinearExecutor.cc
new file mode 100644
index 000000000..188282d4d
--- /dev/null
+++ b/runtime/onert/core/src/exec/LinearExecutor.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LinearExecutor.h"
+
+namespace onert
+{
+namespace exec
+{
+
+void LinearExecutor::executeImpl()
+{
+ _subject.notifyModelBegin(this);
+ for (auto &&code : _code)
+ {
+ const auto op_seq = code.op_seq;
+ const auto backend = code.lower_info->backend();
+ _subject.notifyJobBegin(this, op_seq, backend);
+ code.fn_seq->run();
+ _subject.notifyJobEnd(this, op_seq, backend);
+ }
+ _subject.notifyModelEnd(this);
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h
new file mode 100644
index 000000000..f49f51e3e
--- /dev/null
+++ b/runtime/onert/core/src/exec/LinearExecutor.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file LinearExecutor.h
+ * @brief This file contains LinearExecutor class to define and run execution phase
+ */
+
+#ifndef __ONERT_EXEC_EXECUTOR_H_
+#define __ONERT_EXEC_EXECUTOR_H_
+
+#include "ir/Index.h"
+#include "ExecutorBase.h"
+#include "compiler/Linear.h"
+#include "exec/FunctionSequence.h"
+#include "compiler/CodeMap.h"
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to handle execution phase. Simple run the sequence of operations that is sorted in
+ * topological order
+ */
+class LinearExecutor final : public ExecutorBase
+{
+public:
+ /**
+ * @brief Construct a new LinearExecutor object
+ * @param lowered_graph LoweredGraph object
+ * @param tensor_builders Tensor builders that are currently used
+ * @param code_map OpSequence and its code map
+ */
+ LinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ const backend::TensorBuilderSet &tensor_builders, compiler::CodeMap &&code_map,
+ const std::vector<ir::OpSequenceIndex> &order)
+ : ExecutorBase{std::move(lowered_graph), tensor_builders}
+ {
+ for (auto index : order)
+ {
+ _code.emplace_back(std::move(code_map.at(index)));
+ }
+ }
+
+public:
+ void executeImpl(void) override;
+
+private:
+ std::vector<compiler::CodeAndInfo> _code;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_EXECUTOR_H_
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.cc b/runtime/onert/core/src/exec/ParallelExecutor.cc
new file mode 100644
index 000000000..f7e5de67d
--- /dev/null
+++ b/runtime/onert/core/src/exec/ParallelExecutor.cc
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ParallelExecutor.h"
+
+#include <cassert>
+
+#include "util/logging.h"
+#include "exec/IFunction.h"
+
+namespace onert
+{
+namespace exec
+{
+
+class HookFunction : public IFunction
+{
+public:
+ HookFunction(IFunction *fn, const std::function<void()> &setup,
+ const std::function<void()> &teardown)
+ : _fn{fn}, _setup{setup}, _teardown{teardown}
+ {
+ }
+
+public:
+ void run() override
+ {
+ _setup();
+ _fn->run();
+ _teardown();
+ }
+ void runSync() override { throw("runSync is needed just for profiling in Dataflow executor"); }
+
+private:
+ IFunction *_fn;
+ std::function<void()> _setup;
+ std::function<void()> _teardown;
+};
+
+void ParallelExecutor::notify(uint32_t finished_job_id)
+{
+ std::unique_lock<std::mutex> lock{_mu_jobs};
+
+ DataflowExecutor::notify(finished_job_id);
+
+ lock.unlock();
+ _cv_jobs.notify_all();
+}
+
+ParallelExecutor::ParallelExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ const backend::TensorBuilderSet &tensor_builders,
+ compiler::CodeMap &&code_map)
+ : DataflowExecutor{std::move(lowered_graph), tensor_builders, std::move(code_map)}
+{
+ VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl;
+}
+
+void ParallelExecutor::executeImpl()
+{
+ // Init scheduler
+ // TODO Consider to have distinct backend set in LowerInfoMap
+ ir::BackendSet backends;
+ for (auto &itr : _lowered_graph->getLowerInfo()->operation)
+ {
+ backends.add(itr.second->backend());
+ }
+ _scheduler = std::make_unique<ParallelScheduler>(backends);
+
+ assert(noWaitingJobs());
+
+ // Execution setup
+ _waiting_jobs.swap(_finished_jobs); // Move finished jobs to waiting jobs
+
+ for (uint32_t i = 0; i < _waiting_jobs.size(); ++i)
+ {
+ VERBOSE(ParallelExecutor) << i << ": " << _input_info[i] << std::endl;
+ if (_input_info[i] == 0)
+ {
+ emplaceToReadyJobs(i);
+ }
+ }
+ assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs
+
+ VERBOSE(ParallelExecutor) << "INITIAL JOBS : " << _ready_jobs.size() << std::endl;
+
+ _subject.notifyModelBegin(this);
+ while (true)
+ {
+ std::unique_lock<std::mutex> lock{_mu_jobs};
+
+ if (_ready_jobs.empty())
+ {
+ _cv_jobs.wait(lock, [this] { return !_ready_jobs.empty() || noWaitingJobs(); });
+ // Check finish condition
+ if (_ready_jobs.empty() && noWaitingJobs())
+ {
+ break;
+ }
+ }
+
+ auto job = std::move(_ready_jobs.begin()->second);
+ _ready_jobs.erase(_ready_jobs.begin());
+
+ lock.unlock();
+
+ VERBOSE(ParallelExecutor) << "Assigning fn #" << job->index() << std::endl;
+
+ auto job_index = job->index();
+ auto op_sequence_index = _job_to_op_seq[job_index];
+ auto op_seq = &_lowered_graph->op_seqs().at(op_sequence_index);
+ auto backend = _lowered_graph->getLowerInfo()->operation.at(op_sequence_index)->backend();
+ auto setup = [&, op_seq, backend]() { _subject.notifyJobBegin(this, op_seq, backend); };
+ auto teardown = [&, job_index, op_seq, backend]() {
+ _subject.notifyJobEnd(this, op_seq, backend);
+ notify(job_index);
+ };
+
+ _scheduler->assign(std::make_unique<HookFunction>(job->fn(), setup, teardown), backend);
+ _finished_jobs[job_index] = std::move(job);
+ }
+
+ assert(noWaitingJobs());
+
+ // Wait for all the jobs done
+ _scheduler->finish();
+ _subject.notifyModelEnd(this);
+
+ // Reset input info for the next execution
+ _input_info = _initial_input_info;
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h
new file mode 100644
index 000000000..d9387db5c
--- /dev/null
+++ b/runtime/onert/core/src/exec/ParallelExecutor.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_PARALLEL_EXECUTOR_H__
+#define __ONERT_EXEC_PARALLEL_EXECUTOR_H__
+
+#include <list>
+#include <queue>
+#include <unordered_map>
+
+#include "exec/FunctionSequence.h"
+#include "Job.h"
+#include "ir/OperandIndexSequence.h"
+#include "ir/Index.h"
+#include <memory>
+#include "exec/DataflowExecutor.h"
+#include "ParallelScheduler.h"
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to execute Graph in parallel
+ */
+class ParallelExecutor : public DataflowExecutor
+{
+protected:
+ void notify(uint32_t finished_job_id) override;
+
+public:
+ /**
+ * @brief Constructs a ParallelExecutor object
+ *
+ * @param lowered_graph LoweredGraph object
+ * @param tensor_builders Tensor builders that are currently used
+ * @param code_map OpSequence and its code map
+ */
+ ParallelExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ const backend::TensorBuilderSet &tensor_builders, compiler::CodeMap &&code_map);
+
+ void executeImpl() override;
+
+private:
+ std::condition_variable _cv_jobs;
+ std::mutex _mu_jobs;
+ std::unique_ptr<ParallelScheduler> _scheduler;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_PARALLEL_EXECUTOR_H__
diff --git a/runtime/onert/core/src/exec/ParallelScheduler.cc b/runtime/onert/core/src/exec/ParallelScheduler.cc
new file mode 100644
index 000000000..1b2cf80bc
--- /dev/null
+++ b/runtime/onert/core/src/exec/ParallelScheduler.cc
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ParallelScheduler.h"
+
+#include <cassert>
+
+#include <memory>
+#include "util/logging.h"
+
+namespace onert
+{
+namespace exec
+{
+
+ParallelScheduler::ParallelScheduler(const ir::BackendSet &backends)
+{
+ assert(!backends.empty());
+
+ for (auto backend : backends)
+ {
+ _thread_pools[backend] = std::make_unique<ThreadPool>();
+ }
+}
+
+void ParallelScheduler::assign(std::unique_ptr<IFunction> &&fn, const backend::Backend *backend)
+{
+ assert(!_thread_pools.empty());
+
+ _thread_pools.at(backend)->enqueue(std::move(fn));
+}
+
+void ParallelScheduler::finish()
+{
+ for (auto &itr : _thread_pools)
+ {
+ itr.second->finish();
+ }
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/ParallelScheduler.h b/runtime/onert/core/src/exec/ParallelScheduler.h
new file mode 100644
index 000000000..3a53b6c7f
--- /dev/null
+++ b/runtime/onert/core/src/exec/ParallelScheduler.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_PARALLEL_SCHEDULER_H__
+#define __ONERT_EXEC_PARALLEL_SCHEDULER_H__
+
+#include <unordered_map>
+#include <memory>
+
+#include "exec/IFunction.h"
+#include "ir/BackendSet.h"
+#include "ThreadPool.h"
+
+namespace onert
+{
+namespace exec
+{
+
+class ParallelScheduler
+{
+public:
+ /**
+ * @brief Constructs ParallelScheduler object
+ *
+ * @param backends Backend set
+ */
+ ParallelScheduler(const ir::BackendSet &backends);
+ /**
+ * @brief Assign a task to the given backend
+ *
+ * @param[in] fn Function to be assigned
+ * @param[in] fn Target backend
+ */
+ void assign(std::unique_ptr<IFunction> &&fn, const backend::Backend *backend);
+ /**
+ * @brief Block until all jobs are finished
+ */
+ void finish();
+
+private:
+ std::unordered_map<const backend::Backend *, std::unique_ptr<ThreadPool>> _thread_pools;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_PARALLEL_SCHEDULER_H__
diff --git a/runtime/onert/core/src/exec/Sink.h b/runtime/onert/core/src/exec/Sink.h
new file mode 100644
index 000000000..238b0eddd
--- /dev/null
+++ b/runtime/onert/core/src/exec/Sink.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_SINK_H__
+#define __ONERT_EXEC_SINK_H__
+
+#include <cassert>
+
+#include <memory>
+#include "util/feature/nchw/Reader.h"
+#include "util/feature/nchw/View.h"
+#include "util/feature/nhwc/Reader.h"
+#include "util/feature/nhwc/View.h"
+#include "util/Utils.h"
+#include <misc/feature/IndexIterator.h>
+
+namespace onert
+{
+namespace exec
+{
+struct ISink
+{
+ virtual ~ISink() = default;
+
+ virtual void pull(::onert::backend::ITensor &tensor) const = 0;
+};
+
+// Create second lever inheritance: the first lever is used as a reference type in use-case places
+template <typename T> class ITemplSink : public ISink
+{
+public:
+ ITemplSink(void *output_buffer, const size_t &output_size, const ir::Shape &shape,
+ const bool copy, ir::Layout io_layout)
+ : _output_buffer{reinterpret_cast<T *>(output_buffer)}, _output_size{output_size},
+ _shape{shape}, _copy{copy}, _io_layout{io_layout}
+ {
+ }
+
+protected:
+ void pullUnif(onert::backend::ITensor &tensor) const
+ {
+ assert(((_io_layout == ir::Layout::NHWC && tensor.layout() == ir::Layout::NCHW) ||
+ (_io_layout == ir::Layout::NCHW && tensor.layout() == ir::Layout::NHWC)) ||
+ _copy);
+ auto input_buffer = tensor.buffer();
+ auto rank = _shape.rank();
+
+ if (!tensor.has_padding() && rank < 4 + _copy)
+ {
+ memcpy(_output_buffer, input_buffer, _output_size);
+ return;
+ }
+
+ switch (rank)
+ {
+ case 0:
+ case 1:
+ {
+ memcpy(_output_buffer, input_buffer, _output_size);
+ break;
+ }
+ case 2:
+ {
+ const int32_t copy_len = _shape.dim(1);
+
+ for (auto i = 0; i < _shape.dim(0); ++i)
+ {
+ ir::Coordinates coords{i, 0};
+ memcpy(_output_buffer + i * copy_len, input_buffer + tensor.calcOffset(coords),
+ copy_len * sizeof(T));
+ }
+ break;
+ }
+ case 3:
+ {
+ const int32_t dim1 = _shape.dim(1);
+ const int32_t dim2 = _shape.dim(2);
+
+ for (auto i = 0; i < _shape.dim(0); ++i)
+ {
+ for (auto j = 0; j < _shape.dim(1); ++j)
+ {
+ ir::Coordinates coords{i, j, 0};
+ memcpy(_output_buffer + i * dim1 * dim2 + j * dim2,
+ input_buffer + tensor.calcOffset(coords), dim2 * sizeof(T));
+ }
+ }
+ break;
+ }
+ case 4:
+ {
+ if (_copy)
+ {
+ const int32_t dim1 = _shape.dim(1);
+ const int32_t dim2 = _shape.dim(2);
+ const int32_t dim3 = _shape.dim(3);
+
+ for (auto i = 0; i < _shape.dim(0); ++i)
+ {
+ for (auto j = 0; j < _shape.dim(1); ++j)
+ {
+ for (auto k = 0; k < _shape.dim(2); ++k)
+ {
+ ir::Coordinates coords{i, j, k, 0};
+ memcpy(_output_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3,
+ input_buffer + tensor.calcOffset(coords), dim3 * sizeof(T));
+ }
+ }
+ }
+ }
+ else
+ {
+ const auto shape = _shape.asFeature(_io_layout);
+
+ if (_io_layout == ir::Layout::NHWC)
+ {
+ const util::feature::nchw::Reader<T> from(&tensor);
+ util::feature::nhwc::View<T> into(shape, _output_buffer, _output_size);
+ ::nnfw::misc::feature::iterate(shape)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, ch, row, col);
+ into.at(batch, row, col, ch) = value;
+ };
+ }
+ else if (_io_layout == ir::Layout::NCHW)
+ {
+ const util::feature::nhwc::Reader<T> from(&tensor);
+ util::feature::nchw::View<T> into(shape, _output_buffer, _output_size);
+ ::nnfw::misc::feature::iterate(shape)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, row, col, ch);
+ into.at(batch, ch, row, col) = value;
+ };
+ }
+ else
+ {
+ throw std::runtime_error("Wrong Layout");
+ }
+ }
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI: rank > 4");
+ break;
+ }
+ }
+
+private:
+ T *_output_buffer;
+ const size_t _output_size;
+ const ir::Shape _shape;
+ const bool _copy;
+ const ir::Layout _io_layout;
+};
+
+template <typename T> class PermutateSink final : public ITemplSink<T>
+{
+public:
+ PermutateSink(void *output_buffer, const size_t &output_size, const ir::Shape &shape,
+ ir::Layout io_layout)
+ : ITemplSink<T>(output_buffer, output_size, shape, false, io_layout)
+ {
+ }
+
+public:
+ void pull(onert::backend::ITensor &tensor) const override { ITemplSink<T>::pullUnif(tensor); }
+};
+
+// Only supports NHWC format front-end(NNAPI) now
+template <typename T> class CopySink final : public ITemplSink<T>
+{
+public:
+ CopySink(void *output_buffer, const size_t &output_size, const ir::Shape &shape,
+ ir::Layout io_layout = ir::Layout::UNKNOWN)
+ : ITemplSink<T>(output_buffer, output_size, shape, true, io_layout)
+ {
+ }
+
+public:
+ void pull(onert::backend::ITensor &tensor) const override { ITemplSink<T>::pullUnif(tensor); }
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_SINK_H__
diff --git a/runtime/onert/core/src/exec/Source.h b/runtime/onert/core/src/exec/Source.h
new file mode 100644
index 000000000..5792d8f2e
--- /dev/null
+++ b/runtime/onert/core/src/exec/Source.h
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_SOURCE_H__
+#define __ONERT_EXEC_SOURCE_H__
+
+#include <cassert>
+
+#include <memory>
+#include "util/feature/nchw/Reader.h"
+#include "util/feature/nchw/View.h"
+#include "util/feature/nhwc/Reader.h"
+#include "util/feature/nhwc/View.h"
+#include "util/Utils.h"
+#include <misc/feature/IndexIterator.h>
+#include <ir/Layout.h>
+#include "ir/Shape.h"
+
+namespace onert
+{
+namespace exec
+{
+
+struct ISource
+{
+ virtual ~ISource() = default;
+
+ virtual void push(::onert::backend::ITensor &tensor) const = 0;
+};
+
+// Create second lever inheritance: the first lever is used as a reference type in use-case places
+template <typename T> class ITemplSource : public ISource
+{
+public:
+ ITemplSource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape,
+ const bool copy, ir::Layout io_layout)
+ : _input_buffer{reinterpret_cast<const T *>(input_buffer)}, _input_size{input_size},
+ _shape{shape}, _copy(copy), _io_layout{io_layout}
+ {
+ }
+
+ virtual void push(::onert::backend::ITensor &tensor) const = 0;
+
+protected:
+ void pushUnif(onert::backend::ITensor &tensor) const
+ {
+ assert(((_io_layout == ir::Layout::NHWC && tensor.layout() == ir::Layout::NCHW) ||
+ (_io_layout == ir::Layout::NCHW && tensor.layout() == ir::Layout::NHWC)) ||
+ _copy);
+ auto output_buffer = tensor.buffer();
+ auto rank = _shape.rank();
+
+ if (!tensor.has_padding() && rank < 4 + _copy)
+ {
+ memcpy(output_buffer, _input_buffer, _input_size);
+ return;
+ }
+
+ switch (rank)
+ {
+ case 0:
+ case 1:
+ {
+ memcpy(output_buffer, _input_buffer, _input_size);
+ break;
+ }
+ case 2:
+ {
+ const int32_t copy_len = _shape.dim(1);
+
+ for (auto i = 0; i < _shape.dim(0); ++i)
+ {
+ ir::Coordinates coords{i, 0};
+ memcpy(output_buffer + tensor.calcOffset(coords), _input_buffer + i * copy_len,
+ copy_len * sizeof(T));
+ }
+ break;
+ }
+ case 3:
+ {
+ const int32_t dim1 = _shape.dim(1);
+ const int32_t dim2 = _shape.dim(2);
+
+ for (auto i = 0; i < _shape.dim(0); ++i)
+ {
+ for (auto j = 0; j < _shape.dim(1); ++j)
+ {
+ ir::Coordinates coords{i, j, 0};
+ memcpy(output_buffer + tensor.calcOffset(coords),
+ _input_buffer + i * dim1 * dim2 + j * dim2, dim2 * sizeof(T));
+ }
+ }
+ break;
+ }
+ case 4:
+ {
+ if (_copy)
+ {
+ const int32_t dim1 = _shape.dim(1);
+ const int32_t dim2 = _shape.dim(2);
+ const int32_t dim3 = _shape.dim(3);
+ for (auto i = 0; i < _shape.dim(0); ++i)
+ {
+ for (auto j = 0; j < _shape.dim(1); ++j)
+ {
+ for (auto k = 0; k < _shape.dim(2); ++k)
+ {
+ ir::Coordinates coords{i, j, k, 0};
+ memcpy(output_buffer + tensor.calcOffset(coords),
+ _input_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3,
+ dim3 * sizeof(T));
+ }
+ }
+ }
+ }
+ else
+ {
+ const auto shape = _shape.asFeature(_io_layout);
+
+ if (_io_layout == ir::Layout::NCHW)
+ {
+ const util::feature::nchw::Reader<T> from(shape, _input_buffer, _input_size);
+ util::feature::nhwc::View<T> into(&tensor);
+ ::nnfw::misc::feature::iterate(shape)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, ch, row, col);
+ into.at(batch, row, col, ch) = value;
+ };
+ }
+ else if (_io_layout == ir::Layout::NHWC)
+ {
+ const util::feature::nhwc::Reader<T> from(shape, _input_buffer, _input_size);
+ util::feature::nchw::View<T> into(&tensor);
+ ::nnfw::misc::feature::iterate(shape)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, row, col, ch);
+ into.at(batch, ch, row, col) = value;
+ };
+ }
+ else
+ {
+ throw std::runtime_error("Wrong Layout");
+ }
+ }
+
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI: rank > 4");
+ break;
+ }
+ }
+
+private:
+ const T *_input_buffer;
+ const size_t _input_size;
+ const ir::Shape _shape;
+ const bool _copy;
+ const ir::Layout _io_layout;
+};
+
+template <typename T> class PermutateSource final : public ITemplSource<T>
+{
+public:
+ PermutateSource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape,
+ ir::Layout io_layout)
+ : ITemplSource<T>(input_buffer, input_size, shape, false, io_layout)
+ {
+ }
+
+public:
+ void push(onert::backend::ITensor &tensor) const override
+ {
+ // do NHWC_TO_NCHW or NCHW_TO_NHWC permutation
+ ITemplSource<T>::pushUnif(tensor);
+ }
+};
+
+template <typename T> class CopySource final : public ITemplSource<T>
+{
+public:
+ CopySource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape,
+ ir::Layout io_layout = ir::Layout::UNKNOWN)
+ : ITemplSource<T>(input_buffer, input_size, shape, true, io_layout)
+ {
+ }
+
+public:
+ void push(onert::backend::ITensor &tensor) const override { ITemplSource<T>::pushUnif(tensor); }
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_SOURCE_H__
diff --git a/runtime/onert/core/src/exec/ThreadPool.cc b/runtime/onert/core/src/exec/ThreadPool.cc
new file mode 100644
index 000000000..c8e0e3265
--- /dev/null
+++ b/runtime/onert/core/src/exec/ThreadPool.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ThreadPool.h"
+
+#include <cassert>
+
+namespace onert
+{
+namespace exec
+{
+
+ThreadPool::ThreadPool(uint32_t num_threads)
+{
+ assert(num_threads >= 1);
+
+ for (uint32_t i = 0; i < num_threads; i++)
+ {
+ _threads.emplace_back(std::ref(_worker));
+ }
+}
+
+ThreadPool::~ThreadPool()
+{
+ if (!_threads.empty())
+ {
+ _worker.terminate();
+ join();
+ }
+}
+
+void ThreadPool::enqueue(std::unique_ptr<IFunction> &&fn) { _worker.enqueue(std::move(fn)); }
+
+uint32_t ThreadPool::numJobsInQueue() { return _worker.numJobsInQueue(); }
+
+void ThreadPool::join()
+{
+ for (auto &thread : _threads)
+ {
+ thread.join();
+ }
+ _threads.clear();
+}
+
+void ThreadPool::finish()
+{
+ _worker.finish();
+ join();
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/ThreadPool.h b/runtime/onert/core/src/exec/ThreadPool.h
new file mode 100644
index 000000000..b638bd94c
--- /dev/null
+++ b/runtime/onert/core/src/exec/ThreadPool.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_THREAD_POOL_H__
+#define __ONERT_EXEC_THREAD_POOL_H__
+
+#include <thread>
+#include <memory>
+#include <vector>
+
+#include "WorkQueue.h"
+
+namespace onert
+{
+namespace exec
+{
+
+class ThreadPool
+{
+public:
+ /**
+ * @brief Coustruct ThreadPool object
+ *
+ * @param num_threads Number of threads
+ */
+ ThreadPool(uint32_t num_threads = 1);
+ /**
+ * @brief Destroy ThreadPool object
+ */
+ ~ThreadPool();
+ /**
+ * @brief Enqueue a function
+ *
+ * @param fn A function to be queued
+ */
+ void enqueue(std::unique_ptr<IFunction> &&fn);
+ /**
+ * @brief Get number of jobs in worker's queue
+ *
+ * @return Number of jobs
+ */
+ uint32_t numJobsInQueue();
+
+ /**
+ * @brief Block until all jobs are finished
+ */
+ void finish();
+
+private:
+ void join();
+
+private:
+ WorkQueue _worker;
+ std::vector<std::thread> _threads;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_THREAD_POOL_H__
diff --git a/runtime/onert/core/src/exec/WorkQueue.cc b/runtime/onert/core/src/exec/WorkQueue.cc
new file mode 100644
index 000000000..b37f6a387
--- /dev/null
+++ b/runtime/onert/core/src/exec/WorkQueue.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "WorkQueue.h"
+
+#include <cassert>
+
+namespace onert
+{
+namespace exec
+{
+
+WorkQueue::~WorkQueue()
+{
+ {
+ std::unique_lock<std::mutex> lock(_mu);
+ _state = State::FORCE_FINISHING;
+ }
+ _cv.notify_all();
+}
+
+void WorkQueue::operator()()
+{
+ while (true)
+ {
+ std::unique_ptr<IFunction> fn = nullptr;
+
+ {
+ std::unique_lock<std::mutex> lock{_mu};
+ _cv.wait(lock, [this] {
+ return (_state == State::FORCE_FINISHING) || (_state == State::FINISHING) ||
+ (_state == State::ONLINE && !_functions.empty());
+ });
+
+ if (_state == State::FORCE_FINISHING)
+ {
+ assert(_functions.empty() && "Terminating with unfinished jobs");
+ return;
+ }
+ else if (_state == State::FINISHING && _functions.empty())
+ {
+ return;
+ }
+ else
+ {
+ assert(((_state == State::FINISHING) || (_state == State::ONLINE)) && !_functions.empty());
+ fn = std::move(_functions.front());
+ _functions.pop();
+ }
+ }
+
+ assert(fn);
+ fn->run();
+ }
+}
+
+void WorkQueue::enqueue(std::unique_ptr<IFunction> &&fn)
+{
+ {
+ std::unique_lock<std::mutex> lock{_mu};
+ _functions.emplace(std::move(fn));
+ }
+ _cv.notify_one();
+}
+
+void WorkQueue::terminate()
+{
+ {
+ std::unique_lock<std::mutex> lock{_mu};
+ _state = State::FORCE_FINISHING;
+ }
+ _cv.notify_all();
+}
+
+void WorkQueue::finish()
+{
+ {
+ std::unique_lock<std::mutex> lock{_mu};
+ _state = State::FINISHING;
+ }
+ _cv.notify_all();
+}
+
+uint32_t WorkQueue::numJobsInQueue()
+{
+ std::unique_lock<std::mutex> lock{_mu};
+ return _functions.size();
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/WorkQueue.h b/runtime/onert/core/src/exec/WorkQueue.h
new file mode 100644
index 000000000..2e56d85e8
--- /dev/null
+++ b/runtime/onert/core/src/exec/WorkQueue.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_WORK_QUEUE_H__
+#define __ONERT_EXEC_WORK_QUEUE_H__
+
+#include <condition_variable>
+#include <memory>
+#include <mutex>
+#include <queue>
+
+#include "exec/IFunction.h"
+
+namespace onert
+{
+namespace exec
+{
+
+class WorkQueue
+{
+public:
+ enum class State
+ {
+ ONLINE,
+ FINISHING,
+ FORCE_FINISHING
+ };
+
+public:
+ /**
+ * @brief Create WorkQueue object
+ */
+ WorkQueue() = default;
+ /**
+ * @brief Destroy WorkQueue object
+ */
+ ~WorkQueue();
+ /**
+ * @brief Thread entry function
+ */
+ void operator()();
+ /**
+ * @brief Push the given Task to the job queue
+ *
+ * @param fn Function to be executed(a job)
+ */
+ void enqueue(std::unique_ptr<IFunction> &&fn);
+ /**
+ * @brief Flag as terminating so all the worker threads can terminate
+ */
+ void terminate();
+ /**
+ * @brief Flag as terminating so all the worker threads can terminate
+ */
+ void finish();
+ /**
+ * @brief Check if it has pending jobs. Even if this returns fals, WorkQueue threads may be still
+ * running
+ *
+ * @return true if the job queue not empty otherwise false
+ */
+ uint32_t numJobsInQueue();
+
+private:
+ State _state{State::ONLINE};
+ std::queue<std::unique_ptr<IFunction>> _functions;
+ std::mutex _mu;
+ std::condition_variable _cv;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_WORK_QUEUE_H__
diff --git a/runtime/onert/core/src/interp/Buffer.h b/runtime/onert/core/src/interp/Buffer.h
new file mode 100644
index 000000000..24938f74f
--- /dev/null
+++ b/runtime/onert/core/src/interp/Buffer.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Buffer.h
+ * @brief This file contains Buffer interface and InternalBuffer, ExternalBuffer class
+ */
+#ifndef __ONERT_INTERP_BUFFER_H__
+#define __ONERT_INTERP_BUFFER_H__
+
+#include <memory>
+
+#include "ir/Data.h"
+
+namespace onert
+{
+namespace interp
+{
+
+/**
+ * @brief Interface for writable data area
+ */
+class Buffer : public ir::Data
+{
+public:
+ /**
+ * @brief Return writable pointer for data area
+ * @return Writable pointer
+ */
+ virtual uint8_t *baseWritable(void) const = 0;
+};
+
+/**
+ * @brief Class for internally allocated data area
+ */
+class InternalBuffer final : public Buffer
+{
+public:
+ InternalBuffer(size_t size) : _base{std::make_unique<uint8_t[]>(size)}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ size_t size(void) const override { return _size; }
+ const uint8_t *base(void) const override { return _base.get(); }
+ uint8_t *baseWritable(void) const override { return _base.get(); }
+
+private:
+ std::unique_ptr<uint8_t[]> _base;
+ size_t _size;
+};
+
+/**
+ * @brief Class for data area from outside
+ */
+class ExternalBuffer final : public Buffer
+{
+public:
+ ExternalBuffer(uint8_t *base, size_t size) : _base{base}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ size_t size(void) const override { return _size; }
+ const uint8_t *base(void) const override { return _base; }
+ uint8_t *baseWritable(void) const override { return _base; }
+
+private:
+ uint8_t *_base;
+ size_t _size;
+};
+
+} // namespace interp
+} // namespace onert
+
+#endif // __ONERT_INTERP_BUFFER_H__
diff --git a/runtime/onert/core/src/interp/ExecEnv.h b/runtime/onert/core/src/interp/ExecEnv.h
new file mode 100644
index 000000000..20805f1b3
--- /dev/null
+++ b/runtime/onert/core/src/interp/ExecEnv.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ExecEnv.h
+ * @brief This file contains ExecEnv to access interpreter tensor and execution status
+ */
+#ifndef __ONERT_INTERP_EXEC_ENV_H_
+#define __ONERT_INTERP_EXEC_ENV_H_
+
+#include <unordered_set>
+
+#include "ir/Graph.h"
+#include "Tensor.h"
+
+namespace onert
+{
+namespace interp
+{
+
+/**
+ * @brief Class to gather interpreter execution environment
+ * Each interpreter instance own execution environment
+ */
+class ExecEnv
+{
+public:
+ /**
+ * @brief Construct a new Exec Env object (deleted)
+ */
+ ExecEnv(void) = delete;
+ /**
+ * @brief Construct a new ExecEnv object
+ * @param[in] graph Graph to execute by interpreter
+ */
+ explicit ExecEnv(const ir::Graph &graph) : _graph(graph)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Return graph to execute
+ * @return Graph
+ */
+ const ir::Graph &graph(void) const { return _graph; }
+ /**
+ * @brief Assign tensor to environment which have allocated or assigned buffer
+ * @param[in] index Tensor index
+ * @param[in] tensor Tensor
+ */
+ void assignTensor(const ir::OperandIndex index, std::shared_ptr<ITensor> tensor)
+ {
+ assert(tensor->bufferRO() != nullptr);
+ _tensors.emplace(index, tensor);
+ }
+
+ /**
+ * @brief Return tensor pointer in environment
+ * @param[in] index Tensor index
+ * @return Tensor pointer
+ */
+ const ITensor *tensorAt(const ir::OperandIndex index) const { return _tensors.at(index).get(); }
+
+ /**
+ * @brief Check environment contains tensor
+ * @param[in] index Tensor index
+ * @return @c true if environment contain tensor, otherwise @c false
+ */
+ bool contains(const ir::OperandIndex index) const
+ {
+ return (_tensors.find(index) != _tensors.end());
+ }
+
+ /**
+ * @brief Allocate tensor using operand info
+ * @param[in] index Tensor index
+ * @param[in] info Operand info
+ * @note If already allocated, just return
+ * @TODO More smart allocation policy
+ */
+ void allocateIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info)
+ {
+ // already allocated, or constant
+ if (contains(index))
+ {
+ return;
+ }
+
+ auto tensor = std::make_shared<Tensor>(info);
+ tensor->setBuffer(std::make_shared<InternalBuffer>(tensor->total_size()));
+ assignTensor(index, tensor);
+ _buffers.insert(index);
+ }
+
+ /**
+ * @brief Allocate read-only tensor and share data with other tensor
+ * @param[in] index Tensor index
+ * @param[in] info Operand info
+ * @param[in] index_to_share Tensor index that have data to share
+ */
+ void allocateAndShareIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info,
+ const ir::OperandIndex index_to_share)
+ {
+ if (!contains(index_to_share))
+ {
+ throw std::runtime_error{"Cannot find tensor to share data"};
+ }
+
+ // already allocated
+ if (contains(index))
+ {
+ return;
+ }
+ else
+ {
+ auto tensor = std::make_shared<ROTensor>(info);
+ tensor->setData(tensorAt(index_to_share)->shareData());
+ assignTensor(index, tensor);
+ _buffers.insert(index);
+ }
+ }
+
+ /**
+ * @brief Free buffer if allocated by allocateIfNeed
+ * @param[in] index Tensor index
+ * @note If allocated by outside, just return
+ */
+ void freeIfAllocated(const ir::OperandIndex index)
+ {
+ if (_buffers.find(index) != _buffers.end())
+ {
+ _tensors.at(index)->releaseData();
+ }
+ }
+
+private:
+ const ir::Graph &_graph;
+ // Tensor map to use in interpreter
+ // It should map tensors that have allocated or assigned buffer pointer
+ std::unordered_map<ir::OperandIndex, std::shared_ptr<ITensor>> _tensors;
+ // Tensors allocated by allocateIfNeed (buffer)
+ std::unordered_set<ir::OperandIndex> _buffers;
+};
+
+} // namespace interp
+} // namespace onert
+
+#endif // __ONERT_INTERP_EXEC_ENV_H_
diff --git a/runtime/onert/core/src/interp/InterpExecutor.cc b/runtime/onert/core/src/interp/InterpExecutor.cc
new file mode 100644
index 000000000..7a848412f
--- /dev/null
+++ b/runtime/onert/core/src/interp/InterpExecutor.cc
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "interp/InterpExecutor.h"
+#include "interp/ExecEnv.h"
+#include "interp/Interpreter.h"
+
+#include "util/logging.h"
+
+#include <memory>
+
+namespace onert
+{
+namespace interp
+{
+
+void InterpExecutor::execute(const exec::IODescription &desc)
+{
+ /************************************************************************
+ * Prepare execution model (submodel)
+ It may execute divided model
+ but now consider model inference is done at interpreter
+ ***********************************************************************/
+ ir::OperandIndexMap<std::shared_ptr<ITensor>> tensor_map;
+
+ for (uint32_t n = 0; n < _graph.getInputs().size(); n++)
+ {
+ ir::IOIndex index{n};
+ const auto input_index = _graph.getInputs().at(index);
+ const auto &input = *desc.inputs.at(n);
+
+ auto input_tensor = std::make_shared<ROTensor>(input.info);
+ input_tensor->setData(std::make_shared<const ir::ExternalData>(
+ reinterpret_cast<const uint8_t *>(input.buffer), input.size));
+ tensor_map[input_index] = input_tensor;
+ }
+
+ for (uint32_t n = 0; n < _graph.getOutputs().size(); n++)
+ {
+ ir::IOIndex index{n};
+ const auto output_index = _graph.getOutputs().at(index);
+ const auto &output = *desc.outputs.at(n);
+
+ auto output_tensor = std::make_shared<Tensor>(output.info);
+ output_tensor->setBuffer(
+ std::make_shared<ExternalBuffer>(reinterpret_cast<uint8_t *>(output.buffer), output.size));
+ tensor_map[output_index] = output_tensor;
+ }
+
+ /************************************************************************
+ * Prepare execution environment
+ Execution environment will be assigned to invoked interpreter instance
+ ***********************************************************************/
+
+ std::unique_ptr<ExecEnv> interp_env = std::make_unique<ExecEnv>(_graph);
+
+ // Assign input/output tensor into interpreter execution environment
+ for (auto index : _graph.getInputs() + _graph.getOutputs())
+ {
+ if (tensor_map.find(index) != tensor_map.end())
+ {
+ VERBOSE(INTERPRETER) << "Assign input/output tensor. operand index:" << index.value()
+ << std::endl;
+ interp_env->assignTensor(index, tensor_map.at(index));
+ }
+ }
+
+ // Allocate constant tensor
+ _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+ if (obj.isConstant())
+ {
+ VERBOSE(INTERPRETER) << "Allocate and assign constant tensor. operand index:" << ind.value()
+ << std::endl;
+
+ assert(obj.data());
+ auto const_tensor = std::make_shared<ROTensor>(obj.info());
+ // Assume that interpreter's tensor layout is same with model (NHWC)
+ const_tensor->setData(
+ std::make_shared<ir::ExternalData>(obj.data()->base(), obj.info().total_size()));
+ interp_env->assignTensor(ind, const_tensor);
+ }
+ });
+
+ /*****************************************************************************
+ * Invoke interpreter
+ ****************************************************************************/
+
+ interp::Interpreter interp(std::move(interp_env));
+ interp.run();
+
+ /*****************************************************************************
+ * Invoked interpreter run is finished
+ ****************************************************************************/
+
+ // If interpreter execute submodel
+ // 1. Get tensor output of submodel into tensor_map to save result
+ // 2. Generate new ExecEnv for next interpretation
+}
+
+} // namespace interp
+} // namespace onert
diff --git a/runtime/onert/core/src/interp/InterpOps.lst b/runtime/onert/core/src/interp/InterpOps.lst
new file mode 100644
index 000000000..7b0c33232
--- /dev/null
+++ b/runtime/onert/core/src/interp/InterpOps.lst
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INTERP_OP
+#error Define INTERP_OP before including this file
+#endif
+
+// Supported operation name in interpreter
+//
+// Same list with Operations.lst
+// Make comment out if operation is not supported in interpreter
+INTERP_OP(Add)
+INTERP_OP(Sub)
+//INTERP_OP(BatchToSpaceND)
+//INTERP_OP(Cast)
+INTERP_OP(Conv2D)
+INTERP_OP(DepthwiseConv2D)
+INTERP_OP(AvgPool2D)
+INTERP_OP(MaxPool2D)
+INTERP_OP(Concat)
+INTERP_OP(FullyConnected)
+//INTERP_OP(ReduceSum)
+INTERP_OP(Reshape)
+INTERP_OP(Mul)
+INTERP_OP(Softmax)
+//INTERP_OP(Squeeze)
+//INTERP_OP(Slice)
+//INTERP_OP(StridedSlice)
+INTERP_OP(Tanh)
+INTERP_OP(Logistic)
+//INTERP_OP(Div)
+//INTERP_OP(Transpose)
+//INTERP_OP(Exp)
+//INTERP_OP(ReduceMax)
+//INTERP_OP(Comparison)
+//INTERP_OP(LogicalAnd)
+//INTERP_OP(LogicalOr)
+//INTERP_OP(LogicalNot)
+//INTERP_OP(LSTM)
+//INTERP_OP(RSQRT)
+INTERP_OP(ReLU)
+//INTERP_OP(ResizeBilinear)
+INTERP_OP(ReLU1)
+INTERP_OP(ReLU6)
+//INTERP_OP(RNN)
+//INTERP_OP(Floor)
+//INTERP_OP(SpaceToBatchND)
+//INTERP_OP(SpaceToDepth)
+//INTERP_OP(L2Pool2D)
+//INTERP_OP(EmbeddingLookup)
+//INTERP_OP(L2Normalization)
+//INTERP_OP(HashtableLookup)
+INTERP_OP(InstanceNorm)
+//INTERP_OP(PReLU)
+INTERP_OP(TransposeConv)
+//INTERP_OP(SQRT)
+//INTERP_OP(SquaredDifference)
+//INTERP_OP(TopKV2)
+INTERP_OP(Gather)
+//INTERP_OP(Neg)
+//INTERP_OP(Abs)
+//INTERP_OP(ArgMax)
+//INTERP_OP(Dequantize)
+//INTERP_OP(Mean)
+//INTERP_OP(LocalResponseNormalization)
+//INTERP_OP(DepthToSpace)
+//INTERP_OP(Pack)
+//INTERP_OP(ReduceMin)
+//INTERP_OP(Split)
+//INTERP_OP(Unpack)
+INTERP_OP(Pad)
+//INTERP_OP(Custom)
+//INTERP_OP(Permute)
+//INTERP_OP(Min)
+//INTERP_OP(Max)
+//INTERP_OP(OneHot)
diff --git a/runtime/onert/core/src/interp/Interpreter.cc b/runtime/onert/core/src/interp/Interpreter.cc
new file mode 100644
index 000000000..e0cb8ce41
--- /dev/null
+++ b/runtime/onert/core/src/interp/Interpreter.cc
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Interpreter.h"
+
+#include <stack>
+#include <unordered_set>
+
+#include "Registration.h"
+
+#include "ir/OperandIndexMap.h"
+#include "util/logging.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace interp
+{
+
+// TODO more structured execution kernel implementation
+// TODO use cker for execution
+// TODO divide tensor prepare and execution
+// TODO introduce memory manager (buffer allocate and free)
+class OperationExecutor
+{
+public:
+ OperationExecutor(ExecEnv *env) : _env{env}
+ {
+#define INTERP_OP(InternalName) _kernels[ir::OpCode::InternalName] = get##InternalName();
+#include "InterpOps.lst"
+#undef INTERP_OP
+ }
+
+ void execute(const ir::OperationIndex &idx)
+ {
+ const ir::Operation &node = _env->graph().operations().at(idx);
+ const auto nodeName = node.name();
+ VERBOSE(INTERPRETER) << "Prepare output operands and execute " << nodeName
+ << " operation (id: " << idx.value() << ")" << std::endl;
+
+ const auto nodeOpCode = node.opcode();
+ if (_kernels[nodeOpCode]->prepare != nullptr)
+ {
+ _kernels[nodeOpCode]->prepare(_env, node);
+ }
+ _kernels[nodeOpCode]->invoke(_env, node);
+ }
+
+private:
+ ExecEnv *_env;
+ std::unordered_map<ir::OpCode, OpKernel *> _kernels;
+};
+
+void Interpreter::run()
+{
+ VERBOSE(INTERPRETER) << "Interpreter is invoked " << std::endl;
+
+ // operand_stack: save operands prepared to use
+ std::stack<ir::OperandIndex> operand_stack;
+
+ // Note: We should push input first, then constant.
+ // We use use-def for find operators ready to execution,
+ // but Use-Def cannot handle parameters (maybe constant, but not always)
+ // Note: If all model inputs are constant, it may not work (depend on tensors' order).
+ // But that scenario may not exist
+ for (auto ind : _env->graph().getInputs())
+ {
+ VERBOSE(INTERPRETER) << "Input: Push to operand stack " << ind.value() << std::endl;
+
+ operand_stack.push(ind);
+ }
+
+ _env->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+ if (obj.isConstant())
+ {
+ VERBOSE(INTERPRETER) << "Constant: Push to operand stack " << ind.value() << std::endl;
+
+ operand_stack.push(ind);
+ }
+ });
+
+ // Execution
+ std::unordered_set<ir::OperandIndex> ready_check;
+ std::unordered_set<ir::OperationIndex> executed;
+ OperationExecutor executor{_env.get()};
+ while (!operand_stack.empty())
+ {
+ const auto current_operand_index = operand_stack.top();
+ operand_stack.pop();
+ VERBOSE(INTERPRETER) << "Poped operand " << current_operand_index.value()
+ << " is checked ready to use" << std::endl;
+
+ assert(ready_check.find(current_operand_index) == ready_check.end());
+ ready_check.insert(current_operand_index);
+
+ // Find prepared operations by scan use of current operand
+ std::stack<ir::OperationIndex> operation_stack;
+ auto use_operators = std::list<ir::OperationIndex>(
+ _env->graph().operands().at(current_operand_index).getUses().list());
+ // Remove operation index duplication
+ // If one operation uses same operand tensor for multiple input,
+ // use-list have duplicated operation index
+ use_operators.unique();
+ for (auto use_operator : use_operators)
+ {
+ // Assumption: all parameters are ready to use
+ bool operator_ready = true;
+ for (auto input_index : _env->graph().operations().at(use_operator).getInputs())
+ {
+ if (ready_check.find(input_index) == ready_check.end())
+ {
+ operator_ready = false;
+ break;
+ }
+ }
+
+ if (operator_ready)
+ {
+ VERBOSE(INTERPRETER) << "Ready to execute operation " << use_operator.value() << std::endl;
+ operation_stack.push(use_operator);
+ }
+ }
+
+ while (!operation_stack.empty())
+ {
+ const auto current_operation_index = operation_stack.top();
+ operation_stack.pop();
+ VERBOSE(INTERPRETER) << "Poped operation: " << current_operation_index.value() << "("
+ << _env->graph().operations().at(current_operation_index).name() << ")"
+ << std::endl;
+
+ // execution
+ // 1. Prepare output tensor
+ // 2. Call operation kernel
+ executor.execute(current_operation_index);
+ executed.insert(current_operation_index);
+
+ // 3. Push each output into operand stack
+ const auto def_operands = _env->graph().operations().at(current_operation_index).getOutputs();
+ for (auto def_operand : def_operands)
+ {
+ VERBOSE(INTERPRETER) << "Buffer: Push to operand stack " << def_operand.value()
+ << std::endl;
+ operand_stack.push(def_operand);
+ }
+
+ // 4. Free if lifetime of buffer operands used by input is finished
+ for (auto input_index : _env->graph().operations().at(current_operation_index).getInputs())
+ {
+ const auto use_operators = _env->graph().operands().at(input_index).getUses();
+ bool dead_buffer = true;
+ for (auto use_operator : use_operators.list())
+ {
+ if (executed.find(use_operator) == executed.end())
+ {
+ dead_buffer = false;
+ break;
+ }
+ }
+
+ if (dead_buffer)
+ {
+ _env->freeIfAllocated(input_index);
+ }
+ }
+ }
+ }
+}
+
+} // namespace interp
+} // namespace onert
diff --git a/runtime/onert/core/src/interp/Interpreter.h b/runtime/onert/core/src/interp/Interpreter.h
new file mode 100644
index 000000000..d2165f538
--- /dev/null
+++ b/runtime/onert/core/src/interp/Interpreter.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Interpreter.h
+ * @brief This file contains Interpreter class for interpretation
+ */
+#ifndef __ONERT_INTERP_INTERPRETER_H__
+#define __ONERT_INTERP_INTERPRETER_H__
+
+#include "ExecEnv.h"
+
+namespace onert
+{
+namespace interp
+{
+
+/**
+ * @brief Class for interpretation
+ */
+class Interpreter
+{
+
+public:
+ /**
+ * @brief Construct a new Interpreter object (deleted)
+ */
+ Interpreter() = delete;
+ /**
+ * @brief Construct a new Interpreter object
+ * @param[in] env Execution environment variable for interpreter object
+ */
+ Interpreter(std::unique_ptr<ExecEnv> env) : _env{std::move(env)}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Run interpreter until there is no operation to execute
+ */
+ void run();
+
+private:
+ std::unique_ptr<ExecEnv> _env;
+};
+
+} // namespace interp
+} // namespace onert
+
+#endif // __ONERT_INTERP_INTERPRETER_H__
diff --git a/runtime/onert/core/src/interp/Registration.h b/runtime/onert/core/src/interp/Registration.h
new file mode 100644
index 000000000..956b92a53
--- /dev/null
+++ b/runtime/onert/core/src/interp/Registration.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_INTERP_REGISTRATION_H__
+#define __ONERT_INTERP_REGISTRATION_H__
+
+#include "ExecEnv.h"
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace interp
+{
+
+struct OpKernel
+{
+ std::function<void(ExecEnv *, const ir::Operation &)> prepare;
+ std::function<void(const ExecEnv *, const ir::Operation &)> invoke;
+};
+
+// Defined in operations/ directory
+#define INTERP_OP(InternalName) OpKernel *get##InternalName();
+#include "InterpOps.lst"
+#undef INTERP_OP
+
+} // namespace interp
+} // namespace onert
+
+#endif // __ONERT_INTERP_REGISTRATION_H__
diff --git a/runtime/onert/core/src/interp/Tensor.cc b/runtime/onert/core/src/interp/Tensor.cc
new file mode 100644
index 000000000..07f8b75dc
--- /dev/null
+++ b/runtime/onert/core/src/interp/Tensor.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Tensor.h"
+
+#define NO_USE(a) (void)(a)
+
+namespace onert
+{
+namespace interp
+{
+
+void ITensor::access(const std::function<void(backend::ITensor &tensor)> &fn) { fn(*this); }
+
+size_t ROTensor::calcOffset(const ir::Coordinates &coords) const
+{
+ NO_USE(coords);
+ throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now.");
+}
+
+size_t Tensor::calcOffset(const ir::Coordinates &coords) const
+{
+ NO_USE(coords);
+ throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now.");
+}
+
+ir::Layout ROTensor::layout() const
+{
+ // TODO Changes to return frontend layout
+ return ir::Layout::NHWC;
+}
+
+ir::Layout Tensor::layout() const
+{
+ // TODO Changes to return frontend layout
+ return ir::Layout::NHWC;
+}
+
+} // namespace interp
+} // namespace onert
diff --git a/runtime/onert/core/src/interp/Tensor.h b/runtime/onert/core/src/interp/Tensor.h
new file mode 100644
index 000000000..d5cb81738
--- /dev/null
+++ b/runtime/onert/core/src/interp/Tensor.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Tensor.h
+ * @brief This file contains ITensor interface, ROTensor class, and Tensor class
+ */
+#ifndef __ONERT_INTERP_TENSOR_H__
+#define __ONERT_INTERP_TENSOR_H__
+
+#include "Buffer.h"
+
+#include "ir/OperandInfo.h"
+#include "backend/ITensor.h"
+#include "ir/Layout.h"
+
+namespace onert
+{
+namespace interp
+{
+
+/**
+ * @brief Interface to handle Tensor in interpreter
+ */
+class ITensor : public backend::ITensor
+{
+public:
+ virtual ~ITensor() = default;
+
+public:
+ virtual uint8_t *buffer() const = 0;
+ /**
+ * @brief Return shared pointer for buffer
+ * @return Buffer shared pointer
+ */
+ virtual std::shared_ptr<const Buffer> shareBuffer() const = 0;
+ /**
+ * @brief Return read-only buffer pointer
+ * @return Read-only buffer pointer
+ */
+ virtual const uint8_t *bufferRO() const = 0;
+ /**
+ * @brief Return shared pointer for data
+ * @return Data shared pointer
+ */
+ virtual std::shared_ptr<const ir::Data> shareData() const = 0;
+ /**
+ * @brief Set internal/external buffer
+ * @param[in] buffer Buffer pointer
+ */
+ virtual void setBuffer(std::shared_ptr<const Buffer> buffer) = 0;
+ /**
+ * @brief Set data reference (including constant, input)
+ * @param[in] data Data pointer
+ */
+ virtual void setData(std::shared_ptr<const ir::Data> data) = 0;
+ virtual void releaseData() = 0;
+
+ virtual size_t total_size() const = 0;
+ virtual size_t dimension(size_t index) const = 0;
+ virtual size_t num_dimensions() const = 0;
+ virtual size_t calcOffset(const ir::Coordinates &coords) const = 0;
+
+ virtual bool has_padding() const = 0;
+ /**
+ * @brief Return data type of tensor
+ * @return Data type of tensor
+ */
+ virtual ir::DataType data_type() const = 0;
+ /**
+ * @brief Return TensorInfo
+ * @return TensorInfo
+ */
+ virtual const ir::OperandInfo &tensorInfo() const = 0;
+ /**
+ * @brief Return number of elements
+ * @return Number of elements
+ */
+ virtual uint64_t num_elements() const = 0;
+ void access(const std::function<void(backend::ITensor &tensor)> &fn) final;
+};
+
+/**
+ * @brief Class to handle tensor in interpreter as read-only
+ */
+class ROTensor final : public ITensor
+{
+public:
+ ROTensor() = delete;
+ ROTensor(const ir::OperandInfo &info) : _info(info)
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint8_t *buffer() const override { throw std::runtime_error{"Read only tensor"}; }
+ std::shared_ptr<const Buffer> shareBuffer() const override
+ {
+ throw std::runtime_error{"Read only tensor"};
+ }
+ const uint8_t *bufferRO() const override { return _data->base(); }
+ std::shared_ptr<const ir::Data> shareData() const override { return _data; }
+ void setBuffer(std::shared_ptr<const Buffer> buffer) override { _data = buffer; }
+ void setData(std::shared_ptr<const ir::Data> data) override { _data = data; }
+ void releaseData() override { _data = nullptr; }
+
+ size_t total_size() const override { return _info.total_size(); }
+ size_t dimension(size_t index) const override { return _info.shape().dim(index); }
+ size_t num_dimensions() const override { return _info.shape().rank(); }
+ size_t calcOffset(const ir::Coordinates &coords) const override;
+ ir::Layout layout() const override;
+ bool has_padding() const override { return false; }
+ ir::DataType data_type() const override { return _info.typeInfo().type(); }
+ const ir::OperandInfo &tensorInfo() const override { return _info; }
+ uint64_t num_elements() const override { return _info.shape().num_elements(); };
+
+private:
+ const ir::OperandInfo _info;
+ std::shared_ptr<const ir::Data> _data{nullptr};
+};
+
+/**
+ * @brief Class to handle tensor in interpreter as writable
+ */
+class Tensor final : public ITensor
+{
+public:
+ Tensor() = delete;
+ Tensor(const ir::OperandInfo &info) : _info(info)
+ {
+ // DO NOTHING
+ }
+
+public:
+ uint8_t *buffer() const override { return _buffer->baseWritable(); }
+ std::shared_ptr<const Buffer> shareBuffer() const override { return _buffer; };
+ const uint8_t *bufferRO() const override { return _buffer->base(); }
+ std::shared_ptr<const ir::Data> shareData() const override { return _buffer; }
+ void setBuffer(std::shared_ptr<const Buffer> buffer) override { _buffer = buffer; }
+ void setData(std::shared_ptr<const ir::Data>) override
+ {
+ throw std::runtime_error{"Passed data may read-only"};
+ }
+ void releaseData() override { _buffer = nullptr; }
+
+ size_t total_size() const override { return _info.total_size(); }
+ size_t dimension(size_t index) const override { return _info.shape().dim(index); }
+ size_t num_dimensions() const override { return _info.shape().rank(); }
+ size_t calcOffset(const ir::Coordinates &coords) const override;
+ ir::Layout layout() const override;
+ bool has_padding() const override { return false; }
+ ir::DataType data_type() const override { return _info.typeInfo().type(); }
+ const ir::OperandInfo &tensorInfo() const override { return _info; }
+ uint64_t num_elements() const override { return _info.shape().num_elements(); };
+
+private:
+ const ir::OperandInfo _info;
+ std::shared_ptr<const Buffer> _buffer{nullptr};
+};
+
+} // namespace interp
+} // namespace onert
+
+#endif // __ONERT_INTERP_TENSOR_H__
diff --git a/runtime/onert/core/src/interp/operations/AvgPool2D.cc b/runtime/onert/core/src/interp/operations/AvgPool2D.cc
new file mode 100644
index 000000000..ef653fb2a
--- /dev/null
+++ b/runtime/onert/core/src/interp/operations/AvgPool2D.cc
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/AveragePool.h>
+
+#include "OperationUtil.h"
+
+#include "interp/Registration.h"
+#include "ir/operation/AvgPool2D.h"
+#include "util/Utils.h"
+#include "util/ShapeInference.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace onert
+{
+namespace interp
+{
+namespace avgpool2d
+{
+
+void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node)
+{
+ const auto in_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto in_tensor = env->tensorAt(in_index);
+ UNUSED_RELEASE(in_tensor);
+
+ assert(in_tensor->num_dimensions() == 4);
+
+ const auto output_info = env->graph().operands().at(out_index).info();
+ if (output_info.total_size() == 0)
+ {
+ // Handle unspecified output shape
+ const auto &avgpool_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node);
+ const auto infered_output_shapes =
+ shape_inference::inferAvgPoolShape(in_tensor->tensorInfo().shape(), avgpool_node.param());
+ env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()});
+ }
+ else
+ {
+ env->allocateIfNeeded(out_index, output_info);
+ }
+
+ auto out_tensor = env->tensorAt(out_index);
+ UNUSED_RELEASE(out_tensor);
+
+ // Handle same ifm & ofm data type only
+ assert(in_tensor->data_type() == out_tensor->data_type());
+ assert(out_tensor->num_dimensions() == 4);
+}
+
+void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
+ const ir::operation::AvgPool2D::Param &param)
+{
+ // TODO Support NCHW frontend
+ const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ const auto padding =
+ ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
+ // Calculate
+ nnfw::cker::PoolParams cker_param;
+ calculateActivationRange(param.activation, &cker_param.float_activation_min,
+ &cker_param.float_activation_max);
+ cker_param.filter_width = param.kw;
+ cker_param.filter_height = param.kh;
+ cker_param.padding_values.width = padding.left;
+ cker_param.padding_values.height = padding.top;
+ cker_param.stride_width = param.stride.horizontal;
+ cker_param.stride_height = param.stride.vertical;
+
+ const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
+ const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
+ const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
+ float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
+
+ nnfw::cker::AveragePool(cker_param, in_shape, in_ptr, out_shape, out_ptr);
+}
+
+void invokeAvgPool2D(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &avgpool_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node);
+
+ const auto in_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ // Check lhs shape is same with rhs (with broadcast)
+ const auto in_tensor = env->tensorAt(in_index);
+ const auto out_tensor = env->tensorAt(out_index);
+
+ const auto data_type = in_tensor->data_type();
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(in_tensor, out_tensor, avgpool_node.param());
+ }
+ else
+ {
+ throw std::runtime_error{"NYI: Support float only"};
+ }
+}
+} // namespace avgpool2d
+
+OpKernel *getAvgPool2D()
+{
+ static OpKernel kernel = {avgpool2d::prepareAvgPool2D, avgpool2d::invokeAvgPool2D};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
new file mode 100644
index 000000000..8ffc3cd33
--- /dev/null
+++ b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/BinaryArithmeticOps.h>
+
+#include "OperationUtil.h"
+
+#include "interp/Registration.h"
+#include "ir/operation/Add.h"
+#include "ir/operation/Sub.h"
+#include "ir/operation/Mul.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace onert
+{
+namespace interp
+{
+namespace
+{
+
+enum class OpType
+{
+ ADD,
+ SUB,
+ MUL
+};
+
+template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation &node)
+{
+ const auto &add_node = nnfw::misc::polymorphic_downcast<const node_type &>(node);
+
+ const auto lhs_index = node.getInputs().at(add_node.LHS);
+ const auto rhs_index = node.getInputs().at(add_node.RHS);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto lhs_tensor = env->tensorAt(lhs_index);
+ const auto rhs_tensor = env->tensorAt(rhs_index);
+
+ // Check shape and type lhs is same with rhs
+ // TODO Util function to compare TensorInfo
+ if (lhs_tensor->data_type() != rhs_tensor->data_type())
+ {
+ throw std::runtime_error{"Interp(Add): Different input types"};
+ }
+
+ bool try_broadcast = (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape());
+ if (try_broadcast)
+ {
+ bool success = true;
+ auto out_shape = calcBroadcastShape(lhs_tensor->tensorInfo().shape(),
+ rhs_tensor->tensorInfo().shape(), success);
+ if (!success)
+ {
+ throw std::runtime_error{"Interp(Add): Fail to brodcasting"};
+ }
+
+ auto output_info = ir::OperandInfo(out_shape, lhs_tensor->tensorInfo().typeInfo());
+ // We can handle already allocated (ex. model output)
+ env->allocateIfNeeded(out_index, output_info);
+ }
+ else
+ {
+ // Output's shape and type is same with input
+ auto output_info = lhs_tensor->tensorInfo();
+ // We can handle already allocated (ex. model output)
+ env->allocateIfNeeded(out_index, output_info);
+ }
+
+ auto out_tensor = env->tensorAt(out_index);
+ // Check shape and type lhs is same with output
+ // TODO Util function to compare TensorInfo
+ if (lhs_tensor->data_type() != out_tensor->data_type())
+ {
+ throw std::runtime_error{"Interp(Add): Invalid output type"};
+ }
+}
+
+inline void setActivationParams(float min, float max, nnfw::cker::BinaryArithmeticOpParam *params)
+{
+ params->float_activation_min = min;
+ params->float_activation_max = max;
+}
+
+inline void setActivationParams(int32_t min, int32_t max,
+ nnfw::cker::BinaryArithmeticOpParam *params)
+{
+ params->quantized_activation_min = min;
+ params->quantized_activation_max = max;
+}
+
+template <typename raw_type, typename param_type, OpType op_type>
+void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor *out_tensor,
+ const param_type &param)
+{
+ const auto lhs_buffer = lhs_tensor->bufferRO();
+ const auto rhs_buffer = rhs_tensor->bufferRO();
+ auto out_buffer = out_tensor->buffer();
+
+ nnfw::cker::BinaryArithmeticOpParam cker_param;
+ raw_type activation_min, activation_max;
+ calculateActivationRange(param.activation, &activation_min, &activation_max);
+ setActivationParams(activation_min, activation_max, &cker_param);
+ const raw_type *lhs_ptr = reinterpret_cast<const raw_type *>(lhs_buffer);
+ const raw_type *rhs_ptr = reinterpret_cast<const raw_type *>(rhs_buffer);
+ raw_type *out_ptr = reinterpret_cast<raw_type *>(out_buffer);
+
+ cker_param.type = (op_type == OpType::ADD)
+ ? nnfw::cker::BinaryArithmeticOpType::ADD
+ : ((op_type == OpType::SUB) ? nnfw::cker::BinaryArithmeticOpType::SUB
+ : nnfw::cker::BinaryArithmeticOpType::MUL);
+
+ if (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape())
+ {
+ const auto lhs_shape = convertExtendShape(lhs_tensor->tensorInfo().shape());
+ const auto rhs_shape = convertExtendShape(rhs_tensor->tensorInfo().shape());
+ const auto out_shape = convertExtendShape(out_tensor->tensorInfo().shape());
+ nnfw::cker::BroadcastBinaryArithmeticOpSlow(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr,
+ out_shape, out_ptr);
+ return;
+ }
+
+ const auto lhs_shape = convertShape(lhs_tensor->tensorInfo().shape());
+ const auto rhs_shape = convertShape(rhs_tensor->tensorInfo().shape());
+ const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
+ nnfw::cker::BinaryArithmeticOp(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr, out_shape,
+ out_ptr);
+}
+
+template <typename node_type, typename param_type, OpType op_type>
+void invokeAdd(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &arithmetic_node = nnfw::misc::polymorphic_downcast<const node_type &>(node);
+
+ const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
+ const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
+ const auto out_index = node.getOutputs().at(0);
+ const auto lhs_tensor = env->tensorAt(lhs_index);
+ const auto rhs_tensor = env->tensorAt(rhs_index);
+ const auto out_tensor = env->tensorAt(out_index);
+ const auto data_type = lhs_tensor->data_type();
+
+ if (data_type == ir::DataType::INT32)
+ {
+ invoke<int32_t, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor,
+ arithmetic_node.param());
+ }
+ else if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke<float, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor, arithmetic_node.param());
+ }
+ else
+ {
+ throw std::runtime_error{"NYI: Unsupported data type"};
+ }
+}
+} // namespace
+
+OpKernel *getAdd()
+{
+ static OpKernel kernel = {prepareAdd<ir::operation::Add>,
+ invokeAdd<ir::operation::Add, ir::operation::Add::Param, OpType::ADD>};
+ return &kernel;
+}
+
+OpKernel *getSub()
+{
+ static OpKernel kernel = {prepareAdd<ir::operation::Sub>,
+ invokeAdd<ir::operation::Sub, ir::operation::Sub::Param, OpType::SUB>};
+ return &kernel;
+}
+
+OpKernel *getMul()
+{
+ static OpKernel kernel = {prepareAdd<ir::operation::Mul>,
+ invokeAdd<ir::operation::Mul, ir::operation::Mul::Param, OpType::MUL>};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Concat.cc b/runtime/onert/core/src/interp/operations/Concat.cc
new file mode 100644
index 000000000..53715e790
--- /dev/null
+++ b/runtime/onert/core/src/interp/operations/Concat.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/Concatenation.h>
+
+#include "OperationUtil.h"
+
+#include "interp/Registration.h"
+#include "ir/operation/Concat.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace onert
+{
+namespace interp
+{
+namespace concat
+{
+
+void prepareConcat(ExecEnv *env, const ir::Operation &node)
+{
+ const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node);
+
+ const auto first_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto first_tensor = env->tensorAt(first_index);
+ uint32_t out_axis_dimension = 0;
+ const int32_t axis_raw = concat_node.param().axis;
+ const uint32_t axis = (axis_raw < 0) ? (axis_raw + first_tensor->num_dimensions()) : axis_raw;
+
+ // All inputs shape should be same except axis dimension
+ // All inputs type should be same
+ for (auto input : node.getInputs())
+ {
+ assert(first_tensor->num_dimensions() == env->tensorAt(input)->num_dimensions());
+ assert(first_tensor->data_type() == env->tensorAt(input)->data_type());
+ for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++)
+ {
+ if (i == axis)
+ {
+ out_axis_dimension += env->tensorAt(input)->dimension(i);
+ continue;
+ }
+ assert(first_tensor->dimension(i) == env->tensorAt(input)->dimension(i));
+ }
+ }
+
+ // Make output tensor info using first input tensor info, and accumulated axis dimension value
+ auto out_shape = first_tensor->tensorInfo().shape();
+ out_shape.dim(axis) = out_axis_dimension;
+ env->allocateIfNeeded(out_index,
+ ir::OperandInfo{out_shape, first_tensor->tensorInfo().typeInfo()});
+
+ auto out_tensor = env->tensorAt(out_index);
+ UNUSED_RELEASE(out_tensor);
+
+ // Output shape should be same with input except axis dimension
+ // Output type should be same with input
+ assert(first_tensor->data_type() == out_tensor->data_type());
+ for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++)
+ {
+ if (i == axis)
+ {
+ continue;
+ }
+ assert(first_tensor->dimension(i) == out_tensor->dimension(i));
+ }
+}
+
+void invoke(const std::vector<const ITensor *> in_tensors, const ITensor *out_tensor, uint32_t axis)
+{
+ const uint32_t count = in_tensors.size();
+
+ // Calculate
+ nnfw::cker::ConcatenationParams cker_param;
+ cker_param.axis = (int8_t)axis;
+ cker_param.inputs_count = count;
+
+ const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
+
+ std::vector<nnfw::cker::Shape> in_shapes;
+ std::vector<const nnfw::cker::Shape *> in_shape_ptrs;
+ in_shapes.reserve(count);
+ in_shape_ptrs.reserve(count);
+ std::vector<const float *> in_ptrs;
+ for (uint32_t i = 0; i < count; i++)
+ {
+ in_shapes.push_back(convertShape(in_tensors[i]->tensorInfo().shape()));
+ in_shape_ptrs.push_back(&in_shapes[i]);
+ in_ptrs.push_back(reinterpret_cast<const float *>(in_tensors[i]->bufferRO()));
+ }
+
+ auto out_buffer = out_tensor->buffer();
+ float *out_ptr = reinterpret_cast<float *>(out_buffer);
+
+ nnfw::cker::Concatenation<float>(cker_param, in_shape_ptrs.data(), in_ptrs.data(), out_shape,
+ out_ptr);
+}
+
+void invokeConcat(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node);
+ const int32_t axis_raw = concat_node.param().axis;
+
+ std::vector<const ITensor *> in_tensors;
+ for (const auto &e : concat_node.getInputs())
+ {
+ in_tensors.emplace_back(env->tensorAt(e));
+ }
+
+ const auto out_index = node.getOutputs().at(0);
+ const auto out_tensor = env->tensorAt(out_index);
+ const uint32_t axis = (axis_raw < 0) ? (axis_raw + out_tensor->num_dimensions()) : axis_raw;
+
+ const auto data_type = in_tensors[0]->data_type();
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(in_tensors, out_tensor, axis);
+ }
+ else
+ {
+ throw std::runtime_error{"NYI: Support float32 only"};
+ }
+}
+} // namespace concat
+
+OpKernel *getConcat()
+{
+ static OpKernel kernel = {concat::prepareConcat, concat::invokeConcat};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Conv2D.cc b/runtime/onert/core/src/interp/operations/Conv2D.cc
new file mode 100644
index 000000000..3c0087a61
--- /dev/null
+++ b/runtime/onert/core/src/interp/operations/Conv2D.cc
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/Conv.h>
+
+#include "OperationUtil.h"
+
+#include "interp/Registration.h"
+#include "ir/operation/Conv2D.h"
+#include "util/Utils.h"
+#include "util/ShapeInference.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace onert
+{
+namespace interp
+{
+namespace conv2d
+{
+
+void prepareConv2D(ExecEnv *env, const ir::Operation &node)
+{
+ const auto in_index = node.getInputs().at(ir::operation::Conv2D::INPUT);
+ const auto kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
+ const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto in_tensor = env->tensorAt(in_index);
+ const auto kernel_tensor = env->tensorAt(kernel_index);
+ const auto bias_tensor = env->tensorAt(bias_index);
+
+ assert(in_tensor->num_dimensions() == 4);
+ assert(kernel_tensor->num_dimensions() == 4);
+ assert(bias_tensor->num_dimensions() == 1);
+
+ UNUSED_RELEASE(in_tensor);
+ UNUSED_RELEASE(kernel_tensor);
+ UNUSED_RELEASE(bias_tensor);
+
+ const auto output_info = env->graph().operands().at(out_index).info();
+ if (output_info.total_size() == 0)
+ {
+ // Handle unspecified output shape
+ const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node);
+ const auto infered_output_shapes = shape_inference::inferConv2DShape(
+ in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), conv_node.param());
+ env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()});
+ }
+ else
+ {
+ env->allocateIfNeeded(out_index, output_info);
+ }
+
+ auto out_tensor = env->tensorAt(out_index);
+ UNUSED_RELEASE(out_tensor);
+
+ // Handle same ifm & ofm data type only
+ assert(in_tensor->data_type() == out_tensor->data_type());
+ assert(out_tensor->num_dimensions() == 4);
+}
+
+void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
+ const ITensor *ofm_tensor, const ir::operation::Conv2D::Param &param)
+{
+ // TODO Support NCHW frontned
+ const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto &ker_shape = ker_tensor->tensorInfo().shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+ const auto padding = ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride,
+ ker_width, ker_height);
+
+ // Calculate
+ float activation_min, activation_max;
+ calculateActivationRange(param.activation, &activation_min, &activation_max);
+
+ nnfw::cker::ConvParams cker_param;
+ cker_param.padding_type = convertPaddingType(param.padding.type);
+ cker_param.padding_values.width = padding.left;
+ cker_param.padding_values.height = padding.top;
+ cker_param.stride_width = param.stride.horizontal;
+ cker_param.stride_height = param.stride.vertical;
+ cker_param.dilation_width_factor = 1;
+ cker_param.dilation_height_factor = 1;
+ cker_param.float_activation_min = activation_min;
+ cker_param.float_activation_max = activation_max;
+
+ const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
+ const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
+ const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
+ const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
+ const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
+ const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
+ const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO());
+ float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
+
+ nnfw::cker::Conv conv_kernel;
+ conv_kernel(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, cker_bias_shape,
+ bias_ptr, cker_ofm_shape, ofm_ptr);
+}
+
+void invokeConv2D(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node);
+
+ const auto ifm_index = node.getInputs().at(ir::operation::Conv2D::INPUT);
+ const auto ker_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
+ const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
+ const auto ofm_index = node.getOutputs().at(0);
+
+ const auto ifm_tensor = env->tensorAt(ifm_index);
+ const auto ker_tensor = env->tensorAt(ker_index);
+ const auto bias_tensor = env->tensorAt(bias_index);
+ const auto ofm_tensor = env->tensorAt(ofm_index);
+
+ const auto data_type = ifm_tensor->data_type();
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
+ }
+ else
+ {
+ throw std::runtime_error{"NYI: Support float32 only"};
+ }
+}
+} // namespace conv2d
+
+OpKernel *getConv2D()
+{
+ static OpKernel kernel = {conv2d::prepareConv2D, conv2d::invokeConv2D};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
new file mode 100644
index 000000000..2682fbd78
--- /dev/null
+++ b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/DepthwiseConv.h>
+#include <misc/polymorphic_downcast.h>
+
+#include "OperationUtil.h"
+
+#include "interp/Registration.h"
+#include "ir/operation/DepthwiseConv2D.h"
+#include "util/Utils.h"
+#include "util/ShapeInference.h"
+
+namespace onert
+{
+namespace interp
+{
+
+namespace
+{
+
+void prepareDepthwiseConv(ExecEnv *env, const ir::Operation &node)
+{
+ const auto in_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT);
+ const auto kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
+ const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto in_tensor = env->tensorAt(in_index);
+ const auto kernel_tensor = env->tensorAt(kernel_index);
+ const auto bias_tensor = env->tensorAt(bias_index);
+
+ assert(in_tensor->num_dimensions() == 4);
+ assert(kernel_tensor->num_dimensions() == 4);
+ assert(bias_tensor->num_dimensions() == 1);
+
+ UNUSED_RELEASE(in_tensor);
+ UNUSED_RELEASE(kernel_tensor);
+ UNUSED_RELEASE(bias_tensor);
+
+ // TODO handle unspecified output shape:
+ // calculate output shape using ifm shape, kernel shape, padding, stride
+ const auto output_info = env->graph().operands().at(out_index).info();
+ if (output_info.total_size() == 0)
+ {
+ // Handle unspecified output shape
+ const auto &depth_conv_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::DepthwiseConv2D &>(node);
+ const auto infered_output_shapes = shape_inference::inferDepthwiseConv2DShape(
+ in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(),
+ depth_conv_node.param());
+ env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()});
+ }
+ else
+ {
+ env->allocateIfNeeded(out_index, output_info);
+ }
+
+ auto out_tensor = env->tensorAt(out_index);
+ UNUSED_RELEASE(out_tensor);
+
+ // Handle same ifm & ofm data type only
+ assert(in_tensor->data_type() == out_tensor->data_type());
+ assert(out_tensor->num_dimensions() == 4);
+}
+
+void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
+ const ITensor *ofm_tensor, const ir::operation::DepthwiseConv2D::Param &param)
+{
+ // TODO Support NCHW frontend
+ const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ // Kernel format is [1, kernel_height, kernel_width, depth_out].
+ const auto &ker_shape = ker_tensor->tensorInfo().shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+ const auto padding = ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride,
+ ker_width, ker_height);
+
+ // Calculate
+ float activation_min, activation_max;
+ calculateActivationRange(param.activation, &activation_min, &activation_max);
+
+ nnfw::cker::DepthwiseConvParams cker_param;
+ cker_param.padding_values.width = padding.left;
+ cker_param.padding_values.height = padding.top;
+ cker_param.depth_multiplier = param.multiplier;
+ cker_param.stride_width = param.stride.horizontal;
+ cker_param.stride_height = param.stride.vertical;
+ cker_param.dilation_width_factor = 1;
+ cker_param.dilation_height_factor = 1;
+ cker_param.float_activation_min = activation_min;
+ cker_param.float_activation_max = activation_max;
+
+ const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
+ const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
+ const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
+ const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
+ const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
+ const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
+ const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO());
+ float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
+
+ nnfw::cker::DepthwiseConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
+ cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr);
+}
+
+void invokeDepthwiseConv(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &conv_node = static_cast<const ir::operation::DepthwiseConv2D &>(node);
+
+ const auto ifm_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT);
+ const auto ker_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
+ const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
+ const auto ofm_index = node.getOutputs().at(0);
+
+ const auto ifm_tensor = env->tensorAt(ifm_index);
+ const auto ker_tensor = env->tensorAt(ker_index);
+ const auto bias_tensor = env->tensorAt(bias_index);
+ const auto ofm_tensor = env->tensorAt(ofm_index);
+
+ const auto data_type = ifm_tensor->data_type();
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
+ }
+ else
+ {
+ throw std::runtime_error{"NYI: Support float32 only"};
+ }
+}
+
+} // namespace
+
+OpKernel *getDepthwiseConv2D()
+{
+ static OpKernel kernel = {prepareDepthwiseConv, invokeDepthwiseConv};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/FullyConnected.cc b/runtime/onert/core/src/interp/operations/FullyConnected.cc
new file mode 100644
index 000000000..8dfac43db
--- /dev/null
+++ b/runtime/onert/core/src/interp/operations/FullyConnected.cc
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/FullyConnected.h>
+
+#include "OperationUtil.h"
+
+#include "interp/Registration.h"
+#include "ir/operation/FullyConnected.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace onert
+{
+namespace interp
+{
+namespace fc
+{
+
+void prepareFC(ExecEnv *env, const ir::Operation &node)
+{
+ const auto in_index = node.getInputs().at(ir::operation::FullyConnected::INPUT);
+ const auto kernel_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
+ const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto in_tensor = env->tensorAt(in_index);
+ const auto kernel_tensor = env->tensorAt(kernel_index);
+ const auto bias_tensor = env->tensorAt(bias_index);
+
+ UNUSED_RELEASE(in_tensor);
+ UNUSED_RELEASE(kernel_tensor);
+ UNUSED_RELEASE(bias_tensor);
+
+ assert(in_tensor->num_dimensions() >= 2);
+ assert(kernel_tensor->num_dimensions() == 2);
+ assert(bias_tensor->num_dimensions() == 1);
+
+ const auto input_size_with_batch = in_tensor->num_elements();
+ const auto num_units = kernel_tensor->dimension(0);
+ const auto input_size = kernel_tensor->dimension(1);
+ const auto batch_size = input_size_with_batch / input_size;
+ assert(input_size_with_batch % input_size == 0);
+ assert(num_units == bias_tensor->dimension(0));
+
+ // Make output tensor info
+ ir::Shape output_shape(2);
+ output_shape.dim(0) = batch_size;
+ output_shape.dim(1) = num_units;
+ const ir::OperandInfo out_info{output_shape, in_tensor->tensorInfo().typeInfo()};
+ env->allocateIfNeeded(out_index, out_info);
+
+ auto out_tensor = env->tensorAt(out_index);
+ UNUSED_RELEASE(out_tensor);
+
+ // Handle same ifm & ofm data type only
+ assert(in_tensor->data_type() == out_tensor->data_type());
+ assert(out_tensor->num_dimensions() == 2);
+ assert(out_tensor->dimension(0) == batch_size);
+ assert(out_tensor->dimension(1) == num_units);
+}
+
+void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
+ const ITensor *ofm_tensor, const ir::operation::FullyConnected::Param &param)
+{
+ const auto ifm_buffer = ifm_tensor->bufferRO();
+ const auto ker_buffer = ker_tensor->bufferRO();
+ const auto bias_buffer = bias_tensor->bufferRO();
+ auto ofm_buffer = ofm_tensor->buffer();
+
+ // Calculate
+ nnfw::cker::FullyConnectedParams cker_param;
+ cker_param.activation = convertActivationType(param.activation);
+ calculateActivationRange(param.activation, &cker_param.float_activation_min,
+ &cker_param.float_activation_max);
+ const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
+ const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
+ const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
+ const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
+ const float *ifm_ptr = reinterpret_cast<const float *>(ifm_buffer);
+ const float *ker_ptr = reinterpret_cast<const float *>(ker_buffer);
+ const float *bias_ptr = reinterpret_cast<const float *>(bias_buffer);
+ float *ofm_ptr = reinterpret_cast<float *>(ofm_buffer);
+
+ nnfw::cker::FullyConnected(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
+ cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr);
+}
+
+void invokeFC(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &conv_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::FullyConnected &>(node);
+
+ const auto ifm_index = node.getInputs().at(ir::operation::FullyConnected::INPUT);
+ const auto ker_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
+ const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
+ const auto ofm_index = node.getOutputs().at(0);
+
+ const auto ifm_tensor = env->tensorAt(ifm_index);
+ const auto ker_tensor = env->tensorAt(ker_index);
+ const auto bias_tensor = env->tensorAt(bias_index);
+ const auto ofm_tensor = env->tensorAt(ofm_index);
+
+ const auto data_type = ifm_tensor->data_type();
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
+ }
+ else
+ {
+ throw std::runtime_error{"NYI: Support float only"};
+ }
+}
+} // namespace fc
+
+OpKernel *getFullyConnected()
+{
+ static OpKernel kernel = {fc::prepareFC, fc::invokeFC};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Gather.cc b/runtime/onert/core/src/interp/operations/Gather.cc
new file mode 100644
index 000000000..b63e74886
--- /dev/null
+++ b/runtime/onert/core/src/interp/operations/Gather.cc
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/Gather.h>
+
+#include "OperationUtil.h"
+
+#include "interp/Registration.h"
+#include "ir/operation/Gather.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace onert
+{
+namespace interp
+{
+namespace
+{
+
+void prepareGather(ExecEnv *env, const ir::Operation &node)
+{
+ const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT);
+ const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES);
+ const auto output_index = node.getOutputs().at(0);
+
+ const auto input_tensor = env->tensorAt(input_index);
+ const auto indices_tensor = env->tensorAt(indices_index);
+
+ // TODO handle unspecified output shape:
+ // calculate output shape using ifm shape, kernel shape, padding, stride
+ const auto output_info = env->graph().operands().at(output_index).info();
+ if (output_info.total_size() == 0)
+ {
+ throw std::runtime_error{"Interp(Gather): NYI for unspecified output shape"};
+ }
+ else
+ {
+ env->allocateIfNeeded(output_index, output_info);
+ }
+
+ if (indices_tensor->data_type() != ir::DataType::INT32)
+ {
+ throw std::runtime_error{"Interp(Gather): Invalid indices data type"};
+ }
+
+ auto output_tensor = env->tensorAt(output_index);
+ auto output_rank = input_tensor->num_dimensions() + indices_tensor->num_dimensions() - 1;
+
+ if (output_rank != output_tensor->num_dimensions())
+ {
+ throw std::runtime_error{"Interp(Gather): Invalid output rank"};
+ }
+ if (output_tensor->data_type() != input_tensor->data_type())
+ {
+ throw std::runtime_error{"Interp(Gather): Invalid output data type"};
+ }
+
+ if (input_tensor->data_type() == ir::DataType::QUANT8_ASYMM &&
+ input_tensor->tensorInfo().typeInfo() != output_tensor->tensorInfo().typeInfo())
+ {
+ throw std::runtime_error{
+ "Interp(Gather): Cannot handle different I/O QUANT8_ASYMM scale/offset"};
+ }
+}
+
+template <typename raw_type>
+void invoke(const ITensor *input_tensors, const ITensor *indices_tensors,
+ const ITensor *output_tensor, uint32_t axis)
+{
+ // Calculate
+ nnfw::cker::GatherParams cker_param;
+ cker_param.axis = (int8_t)axis;
+
+ const auto cker_input_shapes = convertShape(input_tensors->tensorInfo().shape());
+ const auto cker_indices_shape = convertShape(indices_tensors->tensorInfo().shape());
+ const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
+ const raw_type *input_ptr = reinterpret_cast<const raw_type *>(input_tensors->bufferRO());
+ const int32_t *indices_ptr = reinterpret_cast<const int32_t *>(indices_tensors->bufferRO());
+ raw_type *output_ptr = reinterpret_cast<raw_type *>(output_tensor->buffer());
+
+ nnfw::cker::Gather<raw_type>(cker_param, cker_input_shapes, input_ptr, cker_indices_shape,
+ indices_ptr, cker_output_shape, output_ptr);
+}
+
+void invokeGather(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &gather_node = nnfw::misc::polymorphic_downcast<const ir::operation::Gather &>(node);
+ const int32_t axis_raw = gather_node.param().axis;
+
+ const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT);
+ const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES);
+ const auto output_index = node.getOutputs().at(0);
+
+ const auto input_tensor = env->tensorAt(input_index);
+ const auto indices_tensor = env->tensorAt(indices_index);
+ const auto output_tensor = env->tensorAt(output_index);
+ const uint32_t axis = (axis_raw < 0) ? (axis_raw + input_tensor->num_dimensions()) : axis_raw;
+
+ const auto data_type = input_tensor->data_type();
+
+ switch (data_type)
+ {
+ case ir::DataType::FLOAT32:
+ invoke<float>(input_tensor, indices_tensor, output_tensor, axis);
+ break;
+ case ir::DataType::INT32:
+ invoke<int32_t>(input_tensor, indices_tensor, output_tensor, axis);
+ break;
+ case ir::DataType::QUANT8_ASYMM:
+ invoke<uint8_t>(input_tensor, indices_tensor, output_tensor, axis);
+ break;
+ default:
+ throw std::runtime_error{"Interp(Gather): NYI - Not supported type"};
+ }
+}
+
+} // namespace
+
+OpKernel *getGather()
+{
+ static OpKernel kernel = {prepareGather, invokeGather};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/InstanceNorm.cc b/runtime/onert/core/src/interp/operations/InstanceNorm.cc
new file mode 100644
index 000000000..2538bcc39
--- /dev/null
+++ b/runtime/onert/core/src/interp/operations/InstanceNorm.cc
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/InstanceNorm.h>
+
+#include "OperationUtil.h"
+
+#include "interp/Registration.h"
+#include "ir/operation/InstanceNorm.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace onert
+{
+namespace interp
+{
+namespace instancenorm
+{
+
+void prepareInstanceNorm(ExecEnv *env, const ir::Operation &node)
+{
+ const auto &instancenorm_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
+
+ const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
+ const auto output_index = node.getOutputs().at(0);
+ const auto input_tensor = env->tensorAt(input_index);
+
+ if (input_tensor->num_dimensions() != 4)
+ {
+ throw std::runtime_error{"Interp(InstanceNorm): Input should be 4D-tensor"};
+ }
+
+ // Output shape should be same with input
+ env->allocateIfNeeded(output_index, input_tensor->tensorInfo());
+
+ auto output_tensor = env->tensorAt(output_index);
+ UNUSED_RELEASE(output_tensor);
+
+ // Handle same ifm & ofm data type only
+ assert(input_tensor->data_type() == output_tensor->data_type());
+ assert(input_tensor->tensorInfo().shape() == output_tensor->tensorInfo().shape());
+}
+
+inline void setActivationParams(float min, float max, nnfw::cker::InstanceNormParams *params)
+{
+ params->float_activation_min = min;
+ params->float_activation_max = max;
+}
+
+void invoke(const ITensor *input_tensor, const ITensor *gamma_tensor, const ITensor *beta_tensor,
+ const ITensor *output_tensor, const ir::operation::InstanceNorm::Param &param)
+{
+ // Calculate
+ float activation_min, activation_max;
+ calculateActivationRange(param.activation, &activation_min, &activation_max);
+
+ nnfw::cker::InstanceNormParams cker_param;
+ cker_param.epsilon = param.epsilon;
+ cker_param.float_activation_min = activation_min;
+ cker_param.float_activation_max = activation_max;
+
+ const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
+ const auto cker_gamma_shape = convertShape(gamma_tensor->tensorInfo().shape());
+ const auto cker_beta_shape = convertShape(beta_tensor->tensorInfo().shape());
+ const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
+ const float *input_ptr = reinterpret_cast<const float *>(input_tensor->bufferRO());
+ const float *gamma_ptr = reinterpret_cast<const float *>(gamma_tensor->bufferRO());
+ const float *beta_ptr = reinterpret_cast<const float *>(beta_tensor->bufferRO());
+ float *output_ptr = reinterpret_cast<float *>(output_tensor->buffer());
+
+ nnfw::cker::InstanceNorm(cker_param, cker_input_shape, input_ptr, cker_gamma_shape, gamma_ptr,
+ cker_beta_shape, beta_ptr, cker_output_shape, output_ptr);
+}
+
+void invokeInstanceNorm(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &instancenorm_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
+
+ const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
+ const auto gamma_index = node.getInputs().at(instancenorm_node.GAMMA);
+ const auto beta_index = node.getInputs().at(instancenorm_node.BETA);
+ const auto out_index = node.getOutputs().at(0);
+ const auto input_tensor = env->tensorAt(input_index);
+ const auto gamma_tensor = env->tensorAt(gamma_index);
+ const auto beta_tensor = env->tensorAt(beta_index);
+ const auto out_tensor = env->tensorAt(out_index);
+ const auto data_type = input_tensor->data_type();
+
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(input_tensor, gamma_tensor, beta_tensor, out_tensor, instancenorm_node.param());
+ }
+ else
+ {
+ throw std::runtime_error{"NYI: Unsupported data type"};
+ }
+}
+} // namespace instancenorm
+
+OpKernel *getInstanceNorm()
+{
+ static OpKernel kernel = {instancenorm::prepareInstanceNorm, instancenorm::invokeInstanceNorm};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Logistic.cc b/runtime/onert/core/src/interp/operations/Logistic.cc
new file mode 100644
index 000000000..c23cbb782
--- /dev/null
+++ b/runtime/onert/core/src/interp/operations/Logistic.cc
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/Logistic.h>
+
+#include "OperationUtil.h"
+
+#include "interp/Registration.h"
+#include "ir/operation/Logistic.h"
+
+namespace onert
+{
+namespace interp
+{
+namespace
+{
+
+void prepareLogistic(ExecEnv *env, const ir::Operation &node)
+{
+ const auto input_index = node.getInputs().at(0);
+ const auto output_index = node.getOutputs().at(0);
+
+ const auto input_tensor = env->tensorAt(input_index);
+
+ const auto output_info = env->graph().operands().at(output_index).info();
+
+ // Check shape and type lhs is same with rhs
+ // TODO Util function to compare TensorInfo
+ if (output_info.total_size() == 0)
+ {
+ throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"};
+ }
+ else
+ {
+ env->allocateIfNeeded(output_index, output_info);
+ }
+
+ const auto output_tensor = env->tensorAt(output_index);
+ if (input_tensor->data_type() != output_tensor->data_type())
+ {
+ throw std::runtime_error{"Interp(Logistic): Invalid output type"};
+ }
+}
+
+void invoke(const ITensor *input_tensor, const ITensor *output_tensor)
+{
+ const auto input_buffer = input_tensor->bufferRO();
+ auto output_buffer = output_tensor->buffer();
+
+ const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
+ const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
+ const float *input_ptr = reinterpret_cast<const float *>(input_buffer);
+ float *output_ptr = reinterpret_cast<float *>(output_buffer);
+
+ nnfw::cker::Logistic(cker_input_shape, input_ptr, cker_output_shape, output_ptr);
+}
+
+void invokeLogistic(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto input_index = node.getInputs().at(0);
+ const auto output_index = node.getOutputs().at(0);
+
+ const auto input_tensor = env->tensorAt(input_index);
+ const auto output_tensor = env->tensorAt(output_index);
+
+ const auto data_type = input_tensor->data_type();
+
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(input_tensor, output_tensor);
+ }
+ else
+ {
+ throw std::runtime_error{"Interp(Logistic): NYI - Unsupported data type"};
+ }
+}
+} // namespace
+
+OpKernel *getLogistic()
+{
+ static OpKernel kernel = {prepareLogistic, invokeLogistic};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/MaxPool2D.cc b/runtime/onert/core/src/interp/operations/MaxPool2D.cc
new file mode 100644
index 000000000..d524f356e
--- /dev/null
+++ b/runtime/onert/core/src/interp/operations/MaxPool2D.cc
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/MaxPool.h>
+
+#include "OperationUtil.h"
+
+#include "interp/Registration.h"
+#include "ir/operation/MaxPool2D.h"
+#include "util/Utils.h"
+#include "util/ShapeInference.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace onert
+{
+namespace interp
+{
+namespace
+{
+
+void prepareMaxPool2D(ExecEnv *env, const ir::Operation &node)
+{
+ const auto in_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto in_tensor = env->tensorAt(in_index);
+
+ assert(in_tensor->num_dimensions() == 4);
+ UNUSED_RELEASE(in_tensor);
+
+ const auto output_info = env->graph().operands().at(out_index).info();
+ if (output_info.total_size() == 0)
+ {
+ // Handle unspecified output shape
+ const auto &maxpool_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node);
+ const auto infered_output_shapes =
+ shape_inference::inferMaxPoolShape(in_tensor->tensorInfo().shape(), maxpool_node.param());
+ env->allocateIfNeeded(out_index, {infered_output_shapes[0], output_info.typeInfo()});
+ }
+ else
+ {
+ env->allocateIfNeeded(out_index, output_info);
+ }
+
+ auto out_tensor = env->tensorAt(out_index);
+ UNUSED_RELEASE(out_tensor);
+
+ // Handle same ifm & ofm data type only
+ assert(in_tensor->data_type() == out_tensor->data_type());
+ assert(out_tensor->num_dimensions() == 4);
+}
+
+void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
+ const ir::operation::MaxPool2D::Param &param)
+{
+ // TODO support NCHW frontend
+ const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ const auto padding =
+ ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
+ // Calculate
+ nnfw::cker::PoolParams cker_param;
+ calculateActivationRange(param.activation, &cker_param.float_activation_min,
+ &cker_param.float_activation_max);
+ cker_param.filter_width = param.kw;
+ cker_param.filter_height = param.kh;
+ cker_param.padding_values.width = padding.left;
+ cker_param.padding_values.height = padding.top;
+ cker_param.stride_width = param.stride.horizontal;
+ cker_param.stride_height = param.stride.vertical;
+
+ const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
+ const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
+ const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
+ float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
+
+ nnfw::cker::MaxPool(cker_param, in_shape, in_ptr, out_shape, out_ptr);
+}
+
+void invokeMaxPool2D(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &maxpool_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node);
+
+ const auto in_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto in_tensor = env->tensorAt(in_index);
+ const auto out_tensor = env->tensorAt(out_index);
+
+ const auto data_type = in_tensor->data_type();
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(in_tensor, out_tensor, maxpool_node.param());
+ }
+ else
+ {
+ throw std::runtime_error{"NYI: Support float32 only"};
+ }
+}
+} // namespace
+
+OpKernel *getMaxPool2D()
+{
+ static OpKernel kernel = {prepareMaxPool2D, invokeMaxPool2D};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/OperationUtil.h b/runtime/onert/core/src/interp/operations/OperationUtil.h
new file mode 100644
index 000000000..cdabe508d
--- /dev/null
+++ b/runtime/onert/core/src/interp/operations/OperationUtil.h
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_
+#define __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_
+
+#include "ir/Shape.h"
+#include "ir/InternalType.h"
+#include "ir/Padding.h"
+
+#include <cker/Shape.h>
+#include <cker/Types.h>
+
+namespace onert
+{
+namespace interp
+{
+
+inline nnfw::cker::Shape convertShape(const ir::Shape &shape)
+{
+ auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end());
+
+ std::vector<int32_t> raw_shape;
+ raw_shape.resize(4);
+
+ for (uint32_t i = 0; i < 4; ++i)
+ {
+ if (i >= dimensions.size())
+ {
+ raw_shape[i] = 1;
+ }
+ else
+ {
+ raw_shape[i] = dimensions[i];
+ }
+ }
+
+ return nnfw::cker::GetShape(raw_shape);
+}
+
+inline nnfw::cker::Shape convertExtendShape(const ir::Shape &shape)
+{
+ auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end());
+
+ std::vector<int32_t> raw_shape;
+ raw_shape.resize(4);
+ uint32_t start = 4 - dimensions.size();
+
+ for (uint32_t i = 0; i < 4; ++i)
+ {
+ if (i < start)
+ {
+ raw_shape[i] = 1;
+ }
+ else
+ {
+ raw_shape[i] = dimensions[i - start];
+ }
+ }
+
+ return nnfw::cker::GetShape(raw_shape);
+}
+
+inline nnfw::cker::FusedActivationFunctionType
+convertActivationType(const ir::Activation activation)
+{
+ switch (activation)
+ {
+ case ir::Activation::NONE:
+ return nnfw::cker::FusedActivationFunctionType::kNone;
+ case ir::Activation::RELU:
+ return nnfw::cker::FusedActivationFunctionType::kRelu;
+ case ir::Activation::RELU1:
+ return nnfw::cker::FusedActivationFunctionType::kRelu1;
+ case ir::Activation::RELU6:
+ return nnfw::cker::FusedActivationFunctionType::kRelu6;
+ default:
+ throw std::runtime_error{"CPU backend: Cannot convert activation type"};
+ }
+}
+
+template <typename T>
+void calculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
+{
+ if (activation == ir::Activation::RELU)
+ {
+ *activation_min = 0;
+ *activation_max = std::numeric_limits<T>::max();
+ }
+ else if (activation == ir::Activation::RELU6)
+ {
+ *activation_min = 0;
+ *activation_max = 6;
+ }
+ else if (activation == ir::Activation::RELU1)
+ {
+ *activation_min = -1;
+ *activation_max = 1;
+ }
+ else if (activation == ir::Activation::NONE)
+ {
+ *activation_min = std::numeric_limits<T>::lowest();
+ *activation_max = std::numeric_limits<T>::max();
+ }
+ else
+ {
+ throw std::runtime_error{"Unsupported activation type"};
+ }
+}
+
+inline ir::Shape calcBroadcastShape(const ir::Shape &lhs, const ir::Shape &rhs, bool &success)
+{
+ int lhs_rank = lhs.rank();
+ int rhs_rank = rhs.rank();
+
+ int out_rank = (lhs_rank > rhs_rank ? lhs_rank : rhs_rank);
+ ir::Shape out_shape(out_rank);
+
+ int lhs_idim = lhs_rank - 1;
+ int rhs_idim = rhs_rank - 1;
+ success = true;
+ for (int out_idim = out_rank - 1; out_idim >= 0; out_idim--)
+ {
+ if (lhs_idim == -1 && rhs_idim == -1)
+ {
+ // invalid result
+ success = false;
+ break;
+ }
+
+ if (lhs_idim == -1)
+ {
+ out_shape.dim(out_idim) = rhs.dim(rhs_idim);
+ rhs_idim--;
+ }
+ else if (rhs_idim == -1)
+ {
+ out_shape.dim(out_idim) = lhs.dim(lhs_idim);
+ lhs_idim--;
+ }
+ else
+ {
+ if (lhs.dim(lhs_idim) == rhs.dim(rhs_idim))
+ {
+ out_shape.dim(out_idim) = lhs.dim(lhs_idim);
+ lhs_idim--;
+ rhs_idim--;
+ }
+ else if (lhs.dim(lhs_idim) == 1)
+ {
+ out_shape.dim(out_idim) = rhs.dim(rhs_idim);
+ lhs_idim--;
+ rhs_idim--;
+ }
+ else if (rhs.dim(rhs_idim) == 1)
+ {
+ out_shape.dim(out_idim) = lhs.dim(lhs_idim);
+ lhs_idim--;
+ rhs_idim--;
+ }
+ else
+ {
+ // invalid result
+ success = false;
+ break;
+ }
+ }
+ }
+
+ if (lhs_idim != -1 || rhs_idim != -1)
+ {
+ // invalid result
+ success = false;
+ }
+ return out_shape;
+}
+
+inline nnfw::cker::PaddingType convertPaddingType(ir::PaddingType ir_padding_type)
+{
+ switch (ir_padding_type)
+ {
+ case ir::PaddingType::EXPLICIT:
+ return nnfw::cker::PaddingType::kNone;
+ case ir::PaddingType::SAME:
+ return nnfw::cker::PaddingType::kSame;
+ case ir::PaddingType::VALID:
+ return nnfw::cker::PaddingType::kValid;
+ default:
+ throw std::runtime_error("Wrong padding type.");
+ break;
+ }
+}
+
+} // namespace interp
+} // namespace onert
+
+#endif // __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_
diff --git a/runtime/onert/core/src/interp/operations/Pad.cc b/runtime/onert/core/src/interp/operations/Pad.cc
new file mode 100644
index 000000000..d2e3627b4
--- /dev/null
+++ b/runtime/onert/core/src/interp/operations/Pad.cc
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/Pad.h>
+
+#include "OperationUtil.h"
+
+#include "interp/Registration.h"
+#include "ir/operation/Pad.h"
+
+namespace onert
+{
+namespace interp
+{
+namespace
+{
+
+void preparePad(ExecEnv *env, const ir::Operation &node)
+{
+ const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT);
+ const auto output_index = node.getOutputs().at(0);
+
+ const auto input_tensor = env->tensorAt(input_index);
+
+ const auto output_info = env->graph().operands().at(output_index).info();
+
+ // Check shape and type lhs is same with rhs
+ // TODO Util function to compare TensorInfo
+ if (output_info.total_size() == 0)
+ {
+ throw std::runtime_error{"Interp(Pad): NYI unspecified output shape"};
+ }
+ else
+ {
+ env->allocateIfNeeded(output_index, output_info);
+ }
+
+ const auto output_tensor = env->tensorAt(output_index);
+ if (input_tensor->data_type() != output_tensor->data_type())
+ {
+ throw std::runtime_error{"Interp(Pad): Invalid output type"};
+ }
+}
+
+void invoke(const ITensor *input_tensor, const ITensor *pad_tensor, const ITensor *output_tensor)
+{
+ const auto input_buffer = input_tensor->bufferRO();
+ const auto pad_buffer = pad_tensor->bufferRO();
+ auto output_buffer = output_tensor->buffer();
+
+ int32_t pad_rank = pad_tensor->dimension(0);
+
+ const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
+ const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
+ const float *input_ptr = reinterpret_cast<const float *>(input_buffer);
+ const int32_t *pad_ptr = reinterpret_cast<const int32_t *>(pad_buffer);
+ float *output_ptr = reinterpret_cast<float *>(output_buffer);
+
+ nnfw::cker::Pad(pad_ptr, pad_rank, cker_input_shape, input_ptr, cker_output_shape, output_ptr,
+ nullptr);
+}
+
+void invokePad(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT);
+ const auto pad_index = node.getInputs().at(ir::operation::Pad::PAD);
+ const auto output_index = node.getOutputs().at(0);
+
+ const auto input_tensor = env->tensorAt(input_index);
+ const auto pad_tensor = env->tensorAt(pad_index);
+ const auto output_tensor = env->tensorAt(output_index);
+
+ const auto data_type = input_tensor->data_type();
+
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(input_tensor, pad_tensor, output_tensor);
+ }
+ else
+ {
+ throw std::runtime_error{"Interp(Pad): NYI - Unsupported data type"};
+ }
+}
+} // namespace
+
+OpKernel *getPad()
+{
+ static OpKernel kernel = {preparePad, invokePad};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Reshape.cc b/runtime/onert/core/src/interp/operations/Reshape.cc
new file mode 100644
index 000000000..3a118456b
--- /dev/null
+++ b/runtime/onert/core/src/interp/operations/Reshape.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "interp/Registration.h"
+
+namespace onert
+{
+namespace interp
+{
+namespace
+{
+
+void prepare(ExecEnv *env, const ir::Operation &node)
+{
+ const auto in_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ // Unspecified shape is not supported in operation node spec now
+ const auto output_info = env->graph().operands().at(out_index).info();
+ env->allocateAndShareIfNeeded(out_index, output_info, in_index);
+
+ assert(output_info.total_size() == env->graph().operands().at(in_index).info().total_size());
+}
+
+void invoke(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto in_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ if (env->tensorAt(in_index)->bufferRO() == env->tensorAt(out_index)->bufferRO())
+ {
+ // Same data
+ return;
+ }
+
+ const auto output_info = env->graph().operands().at(out_index).info();
+ memcpy(env->tensorAt(out_index)->buffer(), env->tensorAt(in_index)->bufferRO(),
+ output_info.total_size());
+}
+
+} // namespace
+
+OpKernel *getReshape()
+{
+ static OpKernel kernel = {prepare, invoke};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Softmax.cc b/runtime/onert/core/src/interp/operations/Softmax.cc
new file mode 100644
index 000000000..afc4e81f7
--- /dev/null
+++ b/runtime/onert/core/src/interp/operations/Softmax.cc
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/SoftMax.h>
+
+#include "OperationUtil.h"
+
+#include "interp/Registration.h"
+#include "ir/operation/Softmax.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace onert
+{
+namespace interp
+{
+namespace
+{
+
+void Softmax2D(const float *in, const int input_size, const int batch_size, const float beta,
+ float *out)
+{
+ assert(input_size > 0);
+
+ // For each batch
+ for (int b = 0; b < batch_size; b++)
+ {
+ // Find the max coeff.
+ float max_coeff = in[0];
+ for (int i = 1; i < input_size; i++)
+ {
+ if (in[i] > max_coeff)
+ max_coeff = in[i];
+ }
+
+ // Compute the normalized sum of exps.
+ float exp_sum = 0.0;
+ for (int i = 0; i < input_size; i++)
+ {
+ out[i] = std::exp((in[i] - max_coeff) * beta);
+ exp_sum += out[i];
+ }
+
+ // Divide by the sum of exps.
+ float reciprocal_sum_exp = 1.f / exp_sum;
+ for (int i = 0; i < input_size; i++)
+ {
+ out[i] *= reciprocal_sum_exp;
+ }
+
+ // Advance in and out pointers for the next batch.
+ in += input_size;
+ out += input_size;
+ }
+}
+
+void prepareSoftMax(ExecEnv *env, const ir::Operation &node)
+{
+ const auto in_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto in_tensor = env->tensorAt(in_index);
+ UNUSED_RELEASE(in_tensor);
+
+ assert((in_tensor->num_dimensions() == 4) || (in_tensor->num_dimensions() == 2));
+
+ // Output shape should be same with input
+ // Output type is pre-defined in model
+ const auto output_shape = env->graph().operands().at(in_index).info().shape();
+ const auto output_type = env->graph().operands().at(out_index).info().typeInfo();
+
+ const ir::OperandInfo output_info{output_shape, output_type};
+ env->allocateIfNeeded(out_index, output_info);
+
+ auto out_tensor = env->tensorAt(out_index);
+ UNUSED_RELEASE(out_tensor);
+
+ // Check output shape is same with input
+ assert(out_tensor->num_dimensions() == out_tensor->num_dimensions());
+ for (uint32_t i = 0; i < in_tensor->num_dimensions(); i++)
+ {
+ assert(in_tensor->dimension(i) == out_tensor->dimension(i));
+ }
+}
+
+void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
+ const ir::operation::Softmax::Param &param)
+{
+ const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
+ float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
+
+ float beta = param.beta;
+
+ if (in_tensor->num_dimensions() == 2)
+ {
+ uint32_t batch_size = in_tensor->dimension(0);
+ uint32_t input_size = in_tensor->dimension(1);
+
+ Softmax2D(in_ptr, input_size, batch_size, beta, out_ptr);
+ }
+ else if (in_tensor->num_dimensions() == 4)
+ {
+ const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
+ const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
+
+ nnfw::cker::SoftmaxParams cker_param;
+ cker_param.beta = beta;
+
+ nnfw::cker::Softmax(cker_param, in_shape, in_ptr, out_shape, out_ptr);
+ }
+ else
+ {
+ throw std::runtime_error{"Unsuported input dimension: support 2D or 4D"};
+ }
+}
+
+void invokeSoftMax(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &softmax_node = nnfw::misc::polymorphic_downcast<const ir::operation::Softmax &>(node);
+
+ const auto in_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ const auto in_tensor = env->tensorAt(in_index);
+ const auto out_tensor = env->tensorAt(out_index);
+
+ const auto in_data_type = in_tensor->data_type();
+ const auto out_data_type = out_tensor->data_type();
+ if ((in_data_type == ir::DataType::FLOAT32) && (out_data_type == ir::DataType::FLOAT32))
+ {
+ invoke(in_tensor, out_tensor, softmax_node.param());
+ }
+ else
+ {
+ throw std::runtime_error{"NYI: Support float32 only"};
+ }
+}
+
+} // namespace
+
+OpKernel *getSoftmax()
+{
+ static OpKernel kernel = {prepareSoftMax, invokeSoftMax};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/TransposeConv.cc b/runtime/onert/core/src/interp/operations/TransposeConv.cc
new file mode 100644
index 000000000..cc2ced26b
--- /dev/null
+++ b/runtime/onert/core/src/interp/operations/TransposeConv.cc
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/TransposeConv.h>
+#include <misc/polymorphic_downcast.h>
+
+#include "OperationUtil.h"
+
+#include "interp/Registration.h"
+#include "ir/operation/TransposeConv.h"
+
+namespace onert
+{
+namespace interp
+{
+namespace
+{
+
+void prepareTransposeConv(ExecEnv *env, const ir::Operation &node)
+{
+ const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT);
+ const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
+ const auto ofm_shape_index = node.getInputs().at(ir::operation::TransposeConv::OUTPUT_SHAPE);
+ const auto ofm_index = node.getOutputs().at(0);
+
+ const auto ifm_tensor = env->tensorAt(ifm_index);
+ const auto ker_tensor = env->tensorAt(ker_index);
+ const auto ofm_shape_tensor = env->tensorAt(ofm_shape_index);
+
+ assert(ifm_tensor->num_dimensions() == 4);
+ assert(ker_tensor->num_dimensions() == 4);
+ assert(ofm_shape_tensor->num_dimensions() == 1);
+
+ UNUSED_RELEASE(ifm_tensor);
+ UNUSED_RELEASE(ker_tensor);
+ UNUSED_RELEASE(ofm_shape_tensor);
+
+ const auto output_info = env->graph().operands().at(ofm_index).info();
+ if (output_info.total_size() == 0)
+ {
+ // TODO: Handle unspecified output shape
+ throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"};
+ }
+ else
+ {
+ env->allocateIfNeeded(ofm_index, output_info);
+ }
+
+ auto ofm_tensor = env->tensorAt(ofm_index);
+ UNUSED_RELEASE(ofm_tensor);
+
+ // Handle same ifm & ofm data type only
+ if (ifm_tensor->data_type() != ofm_tensor->data_type())
+ {
+ throw std::runtime_error{"Interp(TConv): Different I/O data dype"};
+ }
+
+ if (ofm_tensor->num_dimensions() != 4)
+ {
+ throw std::runtime_error{"Interp(TConv): Invalid output rank"};
+ }
+}
+
+void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *ofm_tensor,
+ const ir::operation::TransposeConv::Param &param)
+{
+ const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto ker_shape = ker_tensor->tensorInfo().shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+ const auto padding = ir::calculatePadding(param.padding, ofm_shape, ifm_shape, param.stride,
+ ker_width, ker_height);
+
+ nnfw::cker::TransposeConvParams cker_param;
+ cker_param.padding_values.width = padding.left;
+ cker_param.padding_values.height = padding.top;
+ cker_param.stride_width = param.stride.horizontal;
+ cker_param.stride_height = param.stride.vertical;
+ cker_param.dilation_width_factor = 1;
+ cker_param.dilation_height_factor = 1;
+
+ const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
+ const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
+ const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
+ const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
+ const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
+ float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
+
+ nnfw::cker::TransposeConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
+ cker_ofm_shape, ofm_ptr);
+}
+
+void invokeTransposeConv(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &tconv_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::TransposeConv &>(node);
+
+ const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT);
+ const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
+ const auto ofm_index = node.getOutputs().at(0);
+
+ const auto ifm_tensor = env->tensorAt(ifm_index);
+ const auto ker_tensor = env->tensorAt(ker_index);
+ const auto ofm_tensor = env->tensorAt(ofm_index);
+
+ const auto data_type = ifm_tensor->data_type();
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ invoke(ifm_tensor, ker_tensor, ofm_tensor, tconv_node.param());
+ }
+ else
+ {
+ throw std::runtime_error{"Interp(TConv): Support float32 only"};
+ }
+}
+
+} // namespace
+
+OpKernel *getTransposeConv()
+{
+ static OpKernel kernel = {prepareTransposeConv, invokeTransposeConv};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/UnaryActivations.cc b/runtime/onert/core/src/interp/operations/UnaryActivations.cc
new file mode 100644
index 000000000..ea5e2417b
--- /dev/null
+++ b/runtime/onert/core/src/interp/operations/UnaryActivations.cc
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cmath>
+
+#include "OperationUtil.h"
+
+#include "interp/Registration.h"
+
+#include "ir/operation/ReLU.h"
+#include "ir/operation/ReLU1.h"
+#include "ir/operation/ReLU6.h"
+#include "ir/operation/Tanh.h"
+
+namespace onert
+{
+namespace interp
+{
+namespace
+{
+
+enum class ActivationType
+{
+ ReLU,
+ ReLU1,
+ ReLU6,
+ Tanh
+};
+
+void prepare(ExecEnv *env, const ir::Operation &node)
+{
+ const auto input_index = node.getInputs().at(0);
+ const auto output_index = node.getOutputs().at(0);
+
+ const auto input_tensor = env->tensorAt(input_index);
+
+ const auto output_info = env->graph().operands().at(output_index).info();
+ if (output_info.total_size() == 0)
+ {
+ // Output's shape and type is same with input
+ auto input_info = input_tensor->tensorInfo();
+ // We can handle already allocated (ex. model output)
+ env->allocateIfNeeded(output_index, input_info);
+ }
+ else
+ {
+ env->allocateIfNeeded(output_index, output_info);
+ }
+
+ const auto output_tensor = env->tensorAt(output_index);
+ // Check shape and type lhs is same with output
+ // TODO Util function to compare TensorInfo
+ if (input_tensor->data_type() != output_tensor->data_type())
+ {
+ throw std::runtime_error{"Interp(Activations): Invalid output type"};
+ }
+}
+
+template <ActivationType act_type>
+void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements)
+{
+ std::function<float(const float &)> fn = [](const float &) { return std::nanf(""); };
+ switch (act_type)
+ {
+ case ActivationType::ReLU:
+ fn = [](const float &in) { return std::max(0.f, in); };
+ break;
+ case ActivationType::ReLU1:
+ fn = [](const float &in) { return std::min(std::max(-1.f, in), 1.f); };
+ break;
+ case ActivationType::ReLU6:
+ fn = [](const float &in) { return std::min(std::max(0.f, in), 6.f); };
+ break;
+ case ActivationType::Tanh:
+ fn = [](const float &in) { return std::tanh(in); };
+ break;
+ default:
+ throw std::runtime_error{"Interp(Activations): NYI - Unsupported activation"};
+ break;
+ }
+
+ const float *input_end = input_ptr + num_elements;
+ for (; input_ptr < input_end; input_ptr++, output_ptr++)
+ {
+ *output_ptr = fn(*input_ptr);
+ }
+}
+
+template <ActivationType act_type> void invoke(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto input_index = node.getInputs().at(0);
+ const auto output_index = node.getOutputs().at(0);
+
+ // Check lhs shape is same with rhs (with broadcast)
+ const auto input_tensor = env->tensorAt(input_index);
+ const auto output_tensor = env->tensorAt(output_index);
+
+ const auto data_type = input_tensor->data_type();
+ if (data_type == ir::DataType::FLOAT32)
+ {
+ uint64_t elements = input_tensor->num_elements();
+ const float *input_start = reinterpret_cast<const float *>(input_tensor->bufferRO());
+ float *out = reinterpret_cast<float *>(output_tensor->buffer());
+
+ evalFloat<act_type>(input_start, out, elements);
+ }
+ else
+ {
+ throw std::runtime_error{"Interp(ReLU6): NYI - Support float only"};
+ }
+}
+
+} // namespace
+
+OpKernel *getReLU()
+{
+ static OpKernel kernel = {prepare, invoke<ActivationType::ReLU>};
+ return &kernel;
+}
+
+OpKernel *getReLU1()
+{
+ static OpKernel kernel = {prepare, invoke<ActivationType::ReLU1>};
+ return &kernel;
+}
+
+OpKernel *getReLU6()
+{
+ static OpKernel kernel = {prepare, invoke<ActivationType::ReLU6>};
+ return &kernel;
+}
+
+OpKernel *getTanh()
+{
+ static OpKernel kernel = {prepare, invoke<ActivationType::Tanh>};
+ return &kernel;
+}
+
+} // namespace interp
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/Coordinates.cc b/runtime/onert/core/src/ir/Coordinates.cc
new file mode 100644
index 000000000..a02a56567
--- /dev/null
+++ b/runtime/onert/core/src/ir/Coordinates.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Coordinates.h"
+
+#include <cassert>
+
+namespace onert
+{
+namespace ir
+{
+
+Coordinates convertCoordinates(const Coordinates &from_coordinates, Layout from_layout,
+ Layout to_layout)
+{
+ assert(from_coordinates.size() == 4);
+ Coordinates to{from_coordinates};
+ if (from_layout == Layout::NHWC && to_layout == Layout::NCHW)
+ {
+ to.set(0, from_coordinates[0]);
+ to.set(1, from_coordinates[3]);
+ to.set(2, from_coordinates[1]);
+ to.set(3, from_coordinates[2]);
+ }
+ else if (from_layout == Layout::NCHW && to_layout == Layout::NHWC)
+ {
+ to.set(0, from_coordinates[0]);
+ to.set(1, from_coordinates[2]);
+ to.set(2, from_coordinates[3]);
+ to.set(3, from_coordinates[1]);
+ }
+
+ return to;
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.cc
new file mode 100644
index 000000000..837a88a6b
--- /dev/null
+++ b/runtime/onert/core/src/ir/Graph.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Graph.h"
+
+#include <algorithm>
+#include <bitset>
+#include <sstream>
+
+#include "util/logging.h"
+#include "verifier/Verifier.h"
+#include "ir/operation/LowerInfo.h"
+#include "ir/operand/LowerInfo.h"
+#include "ir/operand/PermuteFactor.h"
+#include "ir/GraphIterator.h"
+#include "backend/IConfig.h"
+
+namespace onert
+{
+namespace ir
+{
+
+Graph::Graph() = default;
+
+Graph::~Graph(void) = default;
+
+OperandIndex Graph::addOperand(const Shape &shape, const TypeInfo &type)
+{
+ return _operands.emplace(shape, type);
+}
+
+OperationIndex Graph::addOperation(std::unique_ptr<Operation> &&node)
+{
+ assert(isBuildingPhase());
+ return _operations.push(std::move(node));
+}
+
+void Graph::setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data)
+{
+ assert(isBuildingPhase());
+ assert(_operands.exist(ind));
+ _operands.at(ind).data(std::move(data));
+}
+
+void Graph::addInput(const OperandIndex &ind)
+{
+ assert(isBuildingPhase());
+ _inputs.append(ind);
+}
+
+void Graph::addOutput(const OperandIndex &ind)
+{
+ assert(isBuildingPhase());
+ _outputs.append(ind);
+}
+
+void Graph::finishBuilding(void)
+{
+ assert(isBuildingPhase());
+ _phase = Phase::MODEL;
+
+ // Initialize operand use-def
+ initializeUseDef();
+
+ // Call graph verifications for the MODEL phase
+ {
+ assert(verifier::DAGChecker().verify(*this));
+ assert(verifier::EdgeConsistencyChecker().verify(*this));
+ }
+}
+
+void Graph::initializeUseDef()
+{
+ operations().iterate([&](const OperationIndex &index, const Operation &node) -> void {
+ auto outputs = node.getOutputs();
+ for (auto output : outputs)
+ {
+ operands().at(output).appendDef(index);
+ }
+
+ auto inputs = node.getInputs();
+ for (auto input : inputs)
+ {
+ operands().at(input).appendUse(index);
+ }
+ });
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/GraphIterator.cc b/runtime/onert/core/src/ir/GraphIterator.cc
new file mode 100644
index 000000000..9e8b3e533
--- /dev/null
+++ b/runtime/onert/core/src/ir/GraphIterator.cc
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GraphIterator.h"
+
+#include "ir/OperationIndexMap.h"
+#include "ir/Graph.h"
+
+namespace onert
+{
+namespace ir
+{
+
+// Explicit instantiations to have implementation in the source file.
+
+template class DefaultIterator<true>;
+template class DefaultIterator<false>;
+
+template class PostDfsIterator<true>;
+template class PostDfsIterator<false>;
+
+//
+// Graph::DefaultIterator
+//
+
+template <bool is_const>
+void DefaultIterator<is_const>::iterate(GraphRef graph, const IterFn &fn) const
+{
+ graph.operations().iterate(
+ [&](const OperationIndex &index, NodeRef node) -> void { fn(index, node); });
+}
+
+//
+// Graph::PostDfsIterator
+//
+
+template <bool is_const>
+void PostDfsIterator<is_const>::iterate(GraphRef graph, const IterFn &fn) const
+{
+ assert(!graph.isBuildingPhase()); // Restrict iteration condition
+
+ OperationIndexMap<bool> visited;
+ graph.operations().iterate([&](const OperationIndex &index, NodeRef) { visited[index] = false; });
+
+ std::function<void(const OperationIndex &, NodeRef)> dfs_recursive =
+ [&](const OperationIndex &index, NodeRef node) -> void {
+ if (visited[index])
+ return;
+ visited[index] = true;
+
+ for (auto output : node.getOutputs())
+ {
+ const auto &operand = graph.operands().at(output);
+ for (const auto &use : operand.getUses().list())
+ {
+ dfs_recursive(use, graph.operations().at(use));
+ }
+ }
+
+ fn(index, node);
+ };
+
+ graph.operations().iterate(dfs_recursive);
+
+ // All of the operations(nodes) must have been visited.
+ assert(std::all_of(visited.begin(), visited.end(),
+ [](const std::pair<const OperationIndex, bool> &v) { return v.second; }));
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/GraphIterator.h b/runtime/onert/core/src/ir/GraphIterator.h
new file mode 100644
index 000000000..42c851895
--- /dev/null
+++ b/runtime/onert/core/src/ir/GraphIterator.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_GRAPH_ITERATOR_H__
+#define __ONERT_IR_GRAPH_ITERATOR_H__
+
+#include <type_traits>
+
+#include "ir/Index.h"
+
+namespace onert
+{
+namespace ir
+{
+
+class Graph;
+class Operation;
+
+template <bool is_const> class Iterator
+{
+public:
+ using GraphRef = typename std::conditional<is_const, const Graph &, Graph &>::type;
+ using IndexRef = const OperationIndex &;
+ using NodeRef = typename std::conditional<is_const, const Operation &, Operation &>::type;
+ using IterFn = std::function<void(IndexRef, NodeRef)>;
+
+public:
+ virtual ~Iterator() = default;
+ virtual void iterate(GraphRef graph, const IterFn &fn) const = 0;
+};
+
+template <bool is_const = false> class DefaultIterator final : public Iterator<is_const>
+{
+public:
+ using GraphRef = typename Iterator<is_const>::GraphRef;
+ using IndexRef = typename Iterator<is_const>::IndexRef;
+ using NodeRef = typename Iterator<is_const>::NodeRef;
+ using IterFn = typename Iterator<is_const>::IterFn;
+
+public:
+ void iterate(GraphRef graph, const IterFn &fn) const;
+};
+using DefaultConstIterator = DefaultIterator<true>;
+
+template <bool is_const = false> class PostDfsIterator final : public Iterator<is_const>
+{
+public:
+ using GraphRef = typename Iterator<is_const>::GraphRef;
+ using IndexRef = typename Iterator<is_const>::IndexRef;
+ using NodeRef = typename Iterator<is_const>::NodeRef;
+ using IterFn = typename Iterator<is_const>::IterFn;
+
+public:
+ void iterate(GraphRef graph, const IterFn &fn) const;
+};
+using PostDfsConstIterator = PostDfsIterator<true>;
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_GRAPH_ITERATOR_H__
diff --git a/runtime/onert/core/src/ir/LayoutSet.cc b/runtime/onert/core/src/ir/LayoutSet.cc
new file mode 100644
index 000000000..bd3f438ad
--- /dev/null
+++ b/runtime/onert/core/src/ir/LayoutSet.cc
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LayoutSet.h"
+
+namespace onert
+{
+namespace ir
+{
+
+LayoutSet::LayoutSet(std::initializer_list<Layout> layouts)
+{
+ for (auto layout : layouts)
+ {
+ _set.insert(layout);
+ }
+}
+
+LayoutSet LayoutSet::operator|(const LayoutSet &other) const
+{
+ auto ret = *this;
+ for (auto layout : other)
+ {
+ ret.add(layout);
+ }
+ return ret;
+}
+
+LayoutSet LayoutSet::operator&(const LayoutSet &other) const
+{
+ LayoutSet ret;
+ for (auto layout : other)
+ {
+ if (contains(layout))
+ {
+ ret.add(layout);
+ }
+ }
+ return ret;
+}
+
+LayoutSet LayoutSet::operator-(const LayoutSet &other) const
+{
+ auto ret = *this;
+ for (auto layout : other)
+ {
+ ret.remove(layout);
+ }
+ return ret;
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/LayoutSet.h b/runtime/onert/core/src/ir/LayoutSet.h
new file mode 100644
index 000000000..6ce4e38c6
--- /dev/null
+++ b/runtime/onert/core/src/ir/LayoutSet.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_LAYOUT_SET_H__
+#define __ONERT_IR_LAYOUT_SET_H__
+
+#include <initializer_list>
+#include <unordered_set>
+
+#include "ir/Layout.h"
+
+namespace onert
+{
+namespace ir
+{
+
+class LayoutSet
+{
+public:
+ LayoutSet() = default;
+ LayoutSet(std::initializer_list<Layout> layouts);
+
+public:
+ void add(const Layout &layout) { _set.insert(layout); }
+ void remove(const Layout &layout) { _set.erase(layout); }
+ uint32_t size() const { return static_cast<uint32_t>(_set.size()); }
+ bool contains(const Layout &layout) const { return _set.find(layout) != _set.end(); }
+
+public:
+ LayoutSet operator|(const LayoutSet &other) const; // Union
+ LayoutSet operator&(const LayoutSet &other) const; // Intersect
+ LayoutSet operator-(const LayoutSet &other) const; // Minus
+
+public:
+ std::unordered_set<Layout>::const_iterator begin() const { return _set.begin(); }
+ std::unordered_set<Layout>::const_iterator end() const { return _set.end(); }
+
+private:
+ std::unordered_set<Layout> _set;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_LAYOUT_SET_H__
diff --git a/runtime/onert/core/src/ir/LoweredGraph.cc b/runtime/onert/core/src/ir/LoweredGraph.cc
new file mode 100644
index 000000000..0018650e2
--- /dev/null
+++ b/runtime/onert/core/src/ir/LoweredGraph.cc
@@ -0,0 +1,496 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/LoweredGraph.h"
+
+#include <assert.h>
+#include <sstream>
+#include "util/logging.h"
+#include "pass/ConstantInsertionPass.h"
+#include "pass/PermutationOperationPass.h"
+#include "pass/PermutationInsertionPass.h"
+#include "ir/GraphIterator.h"
+#include "verifier/Verifier.h"
+#include "backend/Backend.h"
+#include "backend/IConfig.h"
+#include "compiler/BackendResolver.h"
+#include "compiler/ManualScheduler.h"
+#include "compiler/HEScheduler.h"
+
+namespace onert
+{
+namespace ir
+{
+
+LoweredGraph::LoweredGraph(const Graph &graph, const compiler::CompilerOptions &options)
+ : _graph{graph}
+{
+ // Build backend contexts
+ auto &backend_manager = compiler::BackendManager::get();
+ for (auto backend_str : options.backend_list)
+ {
+ backend_manager.loadBackend(backend_str);
+ auto backend = backend_manager.get(backend_str);
+
+ // TODO As the default value of backend list contains "cpu", "acl_cl" and "acl_neon", and some
+ // are not available on x64 or some other platforms. So this may be a workaround for x64 and
+ // we should change it back(throw if backend is not loaded) later.
+ if (!backend)
+ {
+ VERBOSE(LoweredGraph) << "Cannot load backend - " << backend_str;
+ continue;
+ }
+
+ _backend_contexts.emplace(backend, backend->newContext(_graph, _graph.getKernelBuilder(),
+ options.executor == "Linear"));
+ }
+ if (backend_manager.getAll().size() == 0)
+ throw std::runtime_error{"No available backends loaded."};
+
+ // TODO Move "schedule" phase out of here
+ // Schedule
+ if (options.he_scheduler)
+ {
+ auto scheduler = compiler::HEScheduler(_backend_contexts, options);
+ _backend_resolver = scheduler.schedule(_graph);
+ _indexed_ranks = scheduler.getIndexedRanks();
+ }
+ else
+ {
+ auto scheduler = compiler::ManualScheduler(options.manual_scheduler_options);
+ _backend_resolver = scheduler.schedule(_graph);
+ }
+
+ {
+ // operand::LowerInfo holder
+ OperandIndexMap<std::unique_ptr<operand::LowerInfo>> operands_lower_info;
+
+ _graph.operands().iterate([&](const OperandIndex &index, const Operand &) {
+ operands_lower_info[index] = std::make_unique<operand::LowerInfo>();
+ });
+
+ // Make op_seqs while checking whether a node can be merged into a op_seq.
+ makeOpSequences(operands_lower_info, options);
+
+ _op_seqs.iterate([&](const OpSequenceIndex &, OpSequence &op_seq) {
+ assert(op_seq.operations().size() > 0);
+ std::reverse(std::begin(op_seq.operations()), std::end(op_seq.operations()));
+ });
+
+ _op_seqs.dump("merged and sorted operations without permutation");
+
+ pass::ConstantInsertionPass ci_pass(*this);
+ ci_pass.run();
+
+ // Set LowerInfo for each operand from the operand::LowerInfo holder
+ manipulateLowerInfo(operands_lower_info);
+
+ dumpLowerInfo();
+ }
+
+ // Run Permutation Passes
+ {
+ pass::PermutationOperationPass po_pass(*this);
+ po_pass.run();
+
+ pass::PermutationInsertionPass pi_pass(*this);
+ pi_pass.run();
+ // Implemented code no longer works.
+ // pass::PermutationEliminationPass pe_pass(*this);
+ // pe_pass.run();
+
+ // TODO merge perm op_seqs if possible
+ _op_seqs.dump("merged and sorted operations with permutation");
+ }
+
+ // Graph verifications
+ {
+ assert(verifier::DAGChecker().verify(_graph));
+ assert(verifier::EdgeConsistencyChecker().verify(_graph));
+ }
+}
+
+const operation::LowerInfo *LoweredGraph::getLowerInfo(const OpSequenceIndex &op_seq_index) const
+{
+ auto itr = _lower_info_map.operation.find(op_seq_index);
+ if (itr == _lower_info_map.operation.end())
+ return nullptr;
+ return itr->second.get();
+}
+
+void LoweredGraph::setLowerInfo(const OpSequenceIndex &op_seq_index,
+ std::unique_ptr<operation::LowerInfo> &&lower_info)
+{
+ _lower_info_map.operation.insert(std::make_pair(op_seq_index, std::move(lower_info)));
+}
+
+void LoweredGraph::removeLowerInfo(const OpSequenceIndex &op_seq_index)
+{
+ auto &op_seq_lower_info = _lower_info_map.operation;
+ assert(op_seq_lower_info.find(op_seq_index) != op_seq_lower_info.end());
+ for (auto it = op_seq_lower_info.begin(); it != op_seq_lower_info.end(); ++it)
+ {
+ if (it->first == op_seq_index)
+ {
+ op_seq_lower_info.erase(it);
+ break;
+ }
+ }
+}
+
+const operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index) const
+{
+ auto itr = _lower_info_map.operand.find(index);
+ if (itr == _lower_info_map.operand.end())
+ return nullptr;
+ return itr->second.get();
+}
+
+operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index)
+{
+ auto itr = _lower_info_map.operand.find(index);
+ if (itr == _lower_info_map.operand.end())
+ return nullptr;
+ return itr->second.get();
+}
+
+void LoweredGraph::setLowerInfo(const OperandIndex &index,
+ std::unique_ptr<operand::LowerInfo> &&lower_info)
+{
+ _lower_info_map.operand.insert(std::make_pair(index, std::move(lower_info)));
+}
+
+void LoweredGraph::removeLowerInfo(const OperandIndex &index)
+{
+ _lower_info_map.operand.erase(index);
+}
+
+OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const OperationIndex &node_index,
+ const Operation &node)
+{
+ // Create a fresh op_seq with one operation, and append it to op_seqs
+ // Create a fresh op_seq
+ auto op_seq = std::make_unique<OpSequence>(_graph.layout());
+
+ // Add an operation
+ op_seq->appendOperation(node_index, node);
+
+ // Update input/output
+ op_seq->setOutputs(node.getOutputs());
+ op_seq->setInputs(node.getInputs());
+
+ return _op_seqs.emplace(std::move(op_seq));
+}
+
+void LoweredGraph::makeOpSequences(
+ OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info,
+ const compiler::CompilerOptions &options)
+{
+ // if SUBG_MAX_NODE == 0, no limit on nodes of a op_seq
+ const int op_seq_max_node = options.op_seq_max_node;
+ assert(op_seq_max_node >= 0);
+
+ bool is_profiling = options.he_profiling_mode;
+ OpSequence *op_seq = nullptr;
+ OpSequenceIndex op_seq_index;
+
+ // NOTE: The below method appends nodes while making one op_seq if needed. If something better
+ // ways, happy to update this code.
+ PostDfsConstIterator{}.iterate(
+ _graph, [&](const OperationIndex &node_index, const Operation &node) {
+ // LowerInfo for in/output operands
+ auto backend = _backend_resolver->getBackend(node_index);
+
+ // Set backend's layout to frontend layout
+ auto backend_layout = _graph.layout();
+
+ // The layout of each backend should be set at another place
+ // TODO Change setting layout of each backend at another place
+ // TODO Remove getting id of backend
+ if (backend->config()->id() == "acl_cl" || backend->config()->id() == "acl_neon")
+ {
+ const std::string acl_layout_str = util::getConfigString(util::config::ACL_LAYOUT);
+ if (acl_layout_str == "NHWC")
+ {
+ backend_layout = Layout::NHWC;
+ }
+ else if (acl_layout_str == "NCHW")
+ {
+ backend_layout = Layout::NCHW;
+ }
+ }
+ else if (backend->config()->id() == "srcn")
+ {
+ const std::string ncnn_layout_str = util::getConfigString(util::config::NCNN_LAYOUT);
+ if (ncnn_layout_str == "NHWC")
+ {
+ backend_layout = Layout::NHWC;
+ }
+ else if (ncnn_layout_str == "NCHW")
+ {
+ backend_layout = Layout::NCHW;
+ }
+ }
+ else if (backend->config()->id() == "cpu")
+ {
+ backend_layout = Layout::NHWC;
+ }
+
+ for (auto operand : node.getInputs())
+ {
+ auto &&lower_info = operands_lower_info.at(operand);
+ lower_info->addUsePermuteFactor(operand::PermuteFactor{backend, backend_layout});
+ }
+ for (auto operand : node.getOutputs())
+ {
+ auto &&lower_info = operands_lower_info.at(operand);
+ lower_info->addDefPermuteFactor(operand::PermuteFactor{backend, backend_layout});
+ }
+
+ bool new_op_seq = (op_seq == nullptr ||
+ (op_seq_max_node != 0 &&
+ op_seq->operations().size() >= static_cast<size_t>(op_seq_max_node)));
+
+ // for profiling each op_seq must contain just one node,
+ // so that we can measure a node separately
+ if (new_op_seq || is_profiling || !mergeable(op_seq_index, node_index, backend_layout))
+ {
+ auto new_op_seq_index = appendFreshSingleOpSequence(node_index, node);
+
+ // OpSequence LowerInfo
+ setLowerInfo(new_op_seq_index,
+ std::make_unique<operation::LowerInfo>(backend, backend_layout));
+
+ op_seq_index = new_op_seq_index;
+ op_seq = &(_op_seqs.at(new_op_seq_index));
+
+ VERBOSE(Lower) << "SUBG#" << op_seq_index.value() << " is created for "
+ << "NODE#" << node_index.value() << "(" << node.name() << ")" << std::endl;
+ }
+ else
+ {
+ op_seq->appendOperation(node_index, node);
+ op_seq->setInputs(node.getInputs());
+
+ VERBOSE(Lower) << "SUBG#" << op_seq_index.value() << " merges "
+ << "NODE#" << node_index.value() << "(" << node.name() << ")" << std::endl;
+ }
+ });
+}
+
+void LoweredGraph::manipulateLowerInfo(
+ OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info)
+{
+ const auto default_backend = compiler::BackendManager::get().getDefault();
+ for (auto index : _graph.getInputs())
+ {
+ // Pick just any one from the uses, here the first one is chosen
+ // For the other uses, Permute operations will be inserted later
+ auto &&lower_info = operands_lower_info.at(index);
+ assert(lower_info->use_factors().size() > 0);
+ lower_info->addDefPermuteFactor(*lower_info->use_factors().begin());
+ }
+ for (auto index : _graph.getOutputs())
+ {
+ auto &&lower_info = operands_lower_info.at(index);
+ if (_graph.operands().at(index).isConstant())
+ {
+ lower_info->addDefPermuteFactor(operand::PermuteFactor{
+ default_backend,
+ Layout::NHWC // TODO Get frontend layout of this node from IR
+ });
+ }
+ }
+
+ // Set LowerInfo for each operand from the operand::LowerInfo holder
+ _graph.operands().iterate([&](const OperandIndex &index, Operand &) {
+ setLowerInfo(index, std::move(operands_lower_info[index]));
+ });
+}
+
+void LoweredGraph::dumpLowerInfo()
+{
+ if (::onert::util::logging::ctx.enabled() == false)
+ return;
+
+ std::map<uint32_t, std::string> dumps;
+
+ _graph.operands().iterate([&](const OperandIndex &index, Operand &object) {
+ std::stringstream sstream;
+ if (!getLowerInfo(index)->def_factors().empty() || !getLowerInfo(index)->use_factors().empty())
+ {
+ auto factors_to_string = [](const operand::PermuteFactorSet &factors) {
+ std::string str;
+ for (auto factor : factors)
+ {
+ str += factor.backend()->config()->id();
+ str += "(" + to_string(factor.layout()) + ")";
+ str += " ";
+ }
+ return "{ " + str + "}";
+ };
+
+ auto operation_index_to_string = [](const OperationIndexList &operations) {
+ std::string str;
+ for (auto op : operations.list())
+ {
+ str += std::to_string(op.value());
+ str += " ";
+ }
+ return "{ " + str + "}";
+ };
+
+ const auto lower_info = getLowerInfo(index);
+ const auto &shape = object.shape();
+ std::string def_ops = operation_index_to_string(object.getDef());
+ std::string use_ops = operation_index_to_string(object.getUses());
+ std::string def_layouts = factors_to_string(lower_info->def_factors());
+ std::string use_layouts = factors_to_string(lower_info->use_factors());
+ sstream << "Operand #" << index.value() << " LowerInfo" << std::endl;
+ sstream << " - Shape : { " << (shape.rank() > 0 ? shape.dim(0) : 0) << " "
+ << (shape.rank() > 1 ? shape.dim(1) : 0) << " "
+ << (shape.rank() > 2 ? shape.dim(2) : 0) << " "
+ << (shape.rank() > 3 ? shape.dim(3) : 0) << " "
+ << "}" << std::endl;
+ sstream << " - Def Operations : " << def_ops << std::endl;
+ sstream << " - Use Operations : " << use_ops << std::endl;
+ sstream << " - Lower Info" << std::endl;
+ sstream << " - Def Backends : " << def_layouts << std::endl;
+ sstream << " - Use Backends : " << use_layouts << std::endl;
+ }
+ dumps.emplace(index.value(), sstream.str());
+ });
+
+ for (const auto &e : dumps)
+ {
+ if (!e.second.empty())
+ {
+ VERBOSE(Lower) << e.second;
+ }
+ }
+}
+
+bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const OperationIndex &node_index,
+ Layout layout)
+{
+ // Are they mergeable?
+ // 1. the same backend id and layout?
+ // 2. Is op_seq or node branched?
+ // 3. if 1 is true, the op_seq and a node are connected?
+ const auto &op_seq = _op_seqs.at(op_seq_index);
+ const auto &node = _graph.operations().at(node_index);
+
+ // The same backend id and layout?
+ {
+ const auto op_seq_backend_layout = getLowerInfo(op_seq_index)->layout();
+ const auto &op_seq_backend_id = getLowerInfo(op_seq_index)->backend()->config()->id();
+ const auto &node_backend_id = _backend_resolver->getBackend(node_index)->config()->id();
+ VERBOSE(Lower) << "SUBG#" << op_seq_index.value() << " { " << op_seq_backend_id << "("
+ << to_string(op_seq_backend_layout) << ") } "
+ << " NODE#" << node_index.value() << " (" << node.name() << ") { "
+ << node_backend_id << "(" << to_string(layout) << ") } " << std::endl;
+ if (op_seq_backend_id != node_backend_id || op_seq_backend_layout != layout)
+ return false;
+ }
+
+ // Branched?
+ {
+ std::unordered_set<OperationIndex> branched_set;
+
+ // Check for branching up
+ const auto &inputs = op_seq.getInputs();
+ for (const auto &input : inputs)
+ {
+ const auto &input_obj = _graph.operands().at(input);
+ for (const auto &def : input_obj.getDef().list())
+ {
+ branched_set.insert(def);
+ if (branched_set.size() > 1)
+ {
+ return false;
+ }
+ }
+ }
+ branched_set.clear();
+
+ // Check for branching down
+ const auto &outputs = node.getOutputs();
+ for (const auto &output : outputs)
+ {
+ const auto &output_obj = _graph.operands().at(output);
+ for (const auto &use : output_obj.getUses().list())
+ {
+ branched_set.insert(use);
+ if (branched_set.size() > 1)
+ {
+ return false;
+ }
+ }
+ }
+ }
+
+ // Connected?
+ // an input of one node is an output of the other node? or vice-versa?
+ {
+ const auto &node_inputs = node.getInputs();
+ const auto &node_outputs = node.getOutputs();
+
+ // op_seq's operations are in order so that we just check the first and the last
+ std::vector<Element> op_seq_ops{op_seq.operations()[0]};
+ if (op_seq.operations().size() > 1)
+ op_seq_ops.emplace_back(op_seq.operations()[op_seq.operations().size() - 1]);
+
+ for (const auto &elem : op_seq_ops)
+ {
+ const auto &n_index = elem.index;
+ const auto &n = *elem.node;
+
+ // node's output == op_seq's input?
+ const auto &n_inputs = n.getInputs();
+ for (auto input : n_inputs)
+ {
+ if (node_outputs.contains(input))
+ {
+ VERBOSE(Lower) << "SUBG#" << op_seq_index.value() << " 's NODE#" << n_index.value() << "("
+ << n.name() << ") is connected to NODE#" << node_index.value() << "("
+ << node.name() << ")" << std::endl;
+ return true;
+ }
+ }
+
+ // node's input == op_seq's output?
+ const auto &n_outputs = n.getOutputs();
+ for (auto output : n_outputs)
+ {
+ if (node_inputs.contains(output))
+ {
+ VERBOSE(Lower) << "SUBG#" << op_seq_index.value() << " 's NODE#" << n_index.value()
+ << " (" << n.name() << ") is connected to NODE#" << node_index.value()
+ << std::endl;
+ return true;
+ }
+ }
+ }
+
+ VERBOSE(Lower) << "SUBG#" << op_seq_index.value() << " is not connected to NODE#"
+ << node_index.value() << "(" << node.name() << ")" << std::endl;
+ }
+
+ return false;
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/OpCode.cc b/runtime/onert/core/src/ir/OpCode.cc
new file mode 100644
index 000000000..3f8182916
--- /dev/null
+++ b/runtime/onert/core/src/ir/OpCode.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/OpCode.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace ir
+{
+
+const char *toString(OpCode opcode)
+{
+ static const std::unordered_map<OpCode, const char *> map{{OpCode::Invalid, "Invalid"},
+#define OP(Name) {OpCode::Name, #Name},
+#include "ir/Operations.lst"
+#undef OP
+ {OpCode::COUNT, "COUNT"}};
+ return map.at(opcode);
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/OpSequence.cc b/runtime/onert/core/src/ir/OpSequence.cc
new file mode 100644
index 000000000..74c7854e5
--- /dev/null
+++ b/runtime/onert/core/src/ir/OpSequence.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/OpSequence.h"
+#include "ir/OperationVisitor.h"
+#include <sstream>
+
+namespace
+{
+
+std::string getStrFromIndice(const onert::ir::OperandIndexSequence &indice)
+{
+ std::string str;
+ for (const auto &ind : indice)
+ {
+ str += std::to_string(ind.value());
+ str.push_back(',');
+ }
+ if (str.back() == ',')
+ str.pop_back();
+
+ return str;
+}
+}
+
+namespace onert
+{
+namespace ir
+{
+
+OpSequence::OpSequence(Layout layout) : _layout{layout}
+{
+ // DO NOTHING
+}
+
+void OpSequence::accept(OperationVisitor &v) const { v.visit(*this); }
+
+// TODO: Impl Dumper instead of this method
+std::string OpSequence::getStr() const
+{
+ // " OpSequence IN(0,1,2) -> { op0(0,1,2:3), op1(3:4), op2(4:5) } -> OUT(5)"
+ std::stringstream ss;
+ ss << " OpSequence IN(" << getStrFromIndice(getInputs()) << ") -> {";
+ for (const auto &elem : _operations)
+ {
+ ss << " " << elem.index.value() << "(" << elem.node->name() << ":"
+ << getStrFromIndice(elem.node->getInputs()) << ":"
+ << getStrFromIndice(elem.node->getOutputs()) << ")";
+ }
+ ss << " } -> OUT(" << getStrFromIndice(getOutputs()) << ")";
+ return ss.str();
+}
+
+void OpSequence::remove(const OperationIndex &index)
+{
+ assert(exist(index));
+ for (auto it = _operations.cbegin(); it != _operations.cend(); ++it)
+ {
+ if (it->index == index)
+ {
+ _operations.erase(it);
+ break;
+ }
+ }
+}
+
+bool OpSequence::exist(const OperationIndex &index) const
+{
+ for (const auto &element : _operations)
+ {
+ if (element.index == index)
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/OpSequences.cc b/runtime/onert/core/src/ir/OpSequences.cc
new file mode 100644
index 000000000..991e96c9d
--- /dev/null
+++ b/runtime/onert/core/src/ir/OpSequences.cc
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/OpSequences.h"
+#include "util/logging.h"
+#include <memory>
+
+#include <cassert>
+#include <string>
+
+namespace onert
+{
+namespace ir
+{
+
+OpSequenceIndex OpSequences::emplace(const OperationIndex &index, const Operation &node,
+ Layout layout)
+{
+ std::unique_ptr<OpSequence> op_seq = std::make_unique<OpSequence>(layout);
+ op_seq->appendOperation(index, node);
+ return push(std::move(op_seq));
+}
+
+OpSequenceIndex OpSequences::emplace(std::unique_ptr<OpSequence> &&op_seq)
+{
+ return push(std::move(op_seq));
+}
+
+bool OpSequences::containsOperation(const OperationIndex &operation_index) const
+{
+ return findOperation(operation_index).valid();
+}
+
+OpSequenceIndex OpSequences::getOperation(const OperationIndex &operation_index) const
+{
+ OpSequenceIndex ret = findOperation(operation_index);
+ assert(ret.valid());
+ return ret;
+}
+
+// TODO: Extract this into external helper function
+void OpSequences::dump(const std::string &msg) const
+{
+ VERBOSE(OpSequences) << "OpSequences(" << msg << ")" << std::endl;
+ iterate([&](const OpSequenceIndex &idx, const OpSequence &op_seq) {
+ VERBOSE(OpSequences) << idx.value() << "] " << op_seq.getStr() << std::endl;
+ });
+}
+
+void OpSequences::removeFromOpSequence(const OperationIndex &operation_index)
+{
+ const auto op_seq_index = findOperation(operation_index);
+ auto &op_seq = at(op_seq_index);
+ op_seq.remove(operation_index);
+ if (op_seq.size() == 0)
+ {
+ remove(op_seq_index);
+ }
+}
+
+OpSequenceIndex OpSequences::findOperation(const OperationIndex &operation_index) const
+{
+ OpSequenceIndex ret;
+ iterate([&](const OpSequenceIndex &index, const OpSequence &object) {
+ for (const auto &elem : object.operations())
+ {
+ if (elem.index == operation_index)
+ ret = index;
+ }
+ });
+ return ret;
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/Operand.cc b/runtime/onert/core/src/ir/Operand.cc
new file mode 100644
index 000000000..39f9d6c90
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operand.cc
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Operand.h"
+
+namespace onert
+{
+namespace ir
+{
+
+size_t Operand::operandSize(void) const
+{
+ const uint32_t ranks = shape().rank();
+ int32_t elements = 1;
+
+ for (uint32_t rank = 0; rank < ranks; rank++)
+ {
+ elements *= shape().dim(rank);
+ }
+
+ DataType type = typeInfo().type();
+ size_t element_size = sizeOfDataType(type);
+
+ // Value of type is matched with OperandCode enum in NeuralNetworks.h
+ return element_size * elements;
+}
+
+void Operand::appendUse(const OperationIndex &idx) { _uses.append(idx); }
+
+void Operand::removeUse(const OperationIndex &idx) { _uses.remove(idx); }
+
+void Operand::appendDef(const OperationIndex &idx)
+{
+ assert(!isConstant());
+ assert(_def.size() == 0);
+
+ _def.append(idx);
+}
+
+void Operand::removeDef(const OperationIndex &idx)
+{
+ assert(_def.contains(idx));
+
+ _def.remove(idx);
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/OperandIndexSequence.cc b/runtime/onert/core/src/ir/OperandIndexSequence.cc
new file mode 100644
index 000000000..2044310e7
--- /dev/null
+++ b/runtime/onert/core/src/ir/OperandIndexSequence.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/OperandIndexSequence.h"
+
+#include <algorithm>
+
+namespace onert
+{
+namespace ir
+{
+
+OperandIndexSequence::OperandIndexSequence(std::initializer_list<OperandIndex> list) : _set(list)
+{
+ // DO NOTHING
+}
+
+OperandIndexSequence::OperandIndexSequence(std::initializer_list<int32_t> list)
+{
+ for (auto val : list)
+ {
+ _set.emplace_back(static_cast<uint32_t>(val));
+ }
+}
+
+OperandIndexSequence::OperandIndexSequence(std::initializer_list<uint32_t> list)
+{
+ for (auto val : list)
+ {
+ _set.emplace_back(val);
+ }
+}
+
+bool OperandIndexSequence::contains(const OperandIndex &index) const
+{
+ return std::find(_set.begin(), _set.end(), index) != _set.end();
+}
+
+void OperandIndexSequence::replace(const OperandIndex &from, const OperandIndex &to)
+{
+ std::replace(_set.begin(), _set.end(), from, to);
+}
+
+OperandIndexSequence OperandIndexSequence::operator+(const OperandIndexSequence &other) const
+{
+ OperandIndexSequence ret = *this;
+ ret.append(other);
+ return ret;
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/Operands.cc b/runtime/onert/core/src/ir/Operands.cc
new file mode 100644
index 000000000..ab32e478a
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operands.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Operands.h"
+
+#include <memory>
+#include "util/logging.h"
+
+namespace onert
+{
+namespace ir
+{
+
+Operands::Operands(const Operands &obj)
+{
+ obj.iterate([&](const OperandIndex &index, const Operand &operand) {
+ _objects.emplace(index, std::make_unique<Operand>(operand));
+ });
+ _index_count = obj._index_count;
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/Operation.cc b/runtime/onert/core/src/ir/Operation.cc
new file mode 100644
index 000000000..d2a60747f
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operation.cc
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Operation.h"
+
+#include <cassert>
+
+namespace onert
+{
+namespace ir
+{
+
+Operation::Operation(OperandConstraint input_constr, const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs)
+ : _input_constr{input_constr}, _inputs{inputs}, _outputs{outputs}
+{
+}
+
+Operation::Operation(OperandConstraint input_constr) : _input_constr{input_constr} {}
+
+Operation::~Operation() = default;
+
+void Operation::setInputs(const OperandIndexSequence &indexes)
+{
+ assert(_input_constr.check(indexes.size()));
+ _inputs = indexes;
+}
+
+void Operation::setOutputs(const OperandIndexSequence &indexes) { _outputs = indexes; }
+
+void Operation::replaceInput(const OperandIndex &from, const OperandIndex &to)
+{
+ _inputs.replace(from, to);
+}
+
+void Operation::replaceOutput(const OperandIndex &from, const OperandIndex &to)
+{
+ _outputs.replace(from, to);
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationCloner.cc b/runtime/onert/core/src/ir/OperationCloner.cc
new file mode 100644
index 000000000..b4e60f0bc
--- /dev/null
+++ b/runtime/onert/core/src/ir/OperationCloner.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationCloner.h"
+
+#include <assert.h>
+
+namespace onert
+{
+namespace ir
+{
+
+#define OP(Name) \
+ void OperationCloner::visit(const operation::Name &o) \
+ { \
+ assert(!_return_op); \
+ _return_op = std::make_unique<operation::Name>(o); \
+ }
+#include "ir/Operations.lst"
+#undef OP
+
+std::unique_ptr<Operation> OperationCloner::releaseClone()
+{
+ assert(_return_op);
+ return std::move(_return_op);
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationCloner.h b/runtime/onert/core/src/ir/OperationCloner.h
new file mode 100644
index 000000000..0e8cda2a0
--- /dev/null
+++ b/runtime/onert/core/src/ir/OperationCloner.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_CLONER_H__
+#define __ONERT_IR_OPERATION_CLONER_H__
+
+#include <memory>
+#include "ir/OperationVisitor.h"
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+
+class OperationCloner : public OperationVisitor
+{
+public:
+#define OP(Name) void visit(const operation::Name &o) override;
+#include "ir/Operations.lst"
+#undef OP
+
+public:
+ std::unique_ptr<Operation> releaseClone();
+
+private:
+ std::unique_ptr<Operation> _return_op;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_CLONER_H__
diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc
new file mode 100644
index 000000000..789d2869d
--- /dev/null
+++ b/runtime/onert/core/src/ir/OperationDumper.cc
@@ -0,0 +1,634 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationDumper.h"
+
+#include <string>
+
+#include "util/logging.h"
+
+namespace onert
+{
+namespace ir
+{
+
+using namespace operation;
+
+void OperationDumper::visit(const Abs &node)
+{
+ VERBOSE(LIR) << "* Abs" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Abs::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Add &node)
+{
+ VERBOSE(LIR) << "* Add" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Add::Input::LHS).value() << ", "
+ << node.getInputs().at(Add::Input::RHS).value() << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const ArgMax &node)
+{
+ VERBOSE(LIR) << "* ArgMax" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ArgMax::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const AvgPool2D &node)
+{
+ VERBOSE(LIR) << "* AvgPool2D(Implicit)" << std::endl;
+ VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(AvgPool2D::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Cast &node)
+{
+ VERBOSE(LIR) << "* Cast" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Cast::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Comparison &node)
+{
+ VERBOSE(LIR) << "* Comparison" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Comparison::Input::INPUT0).value()
+ << ", " << node.getInputs().at(Comparison::Input::INPUT1).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Concat &node)
+{
+ VERBOSE(LIR) << "* Concat" << std::endl;
+ std::string inputs;
+ for (auto i : node.getInputs())
+ {
+ inputs += std::to_string(i.value()) + ",";
+ }
+ VERBOSE(LIR) << " - Inputs : IFM(" << inputs << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Conv2D &node)
+{
+ std::string padding_type =
+ node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+ VERBOSE(LIR) << "* Conv2D(" << padding_type << ")" << std::endl;
+ VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(Conv2D::Input::INPUT).value()
+ << ") Kernel(" << node.getInputs().at(Conv2D::Input::KERNEL).value() << ") Bias("
+ << node.getInputs().at(Conv2D::Input::BIAS).value() << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const DepthToSpace &node)
+{
+ VERBOSE(LIR) << "* DepthToSpace" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(DepthToSpace::Input::INPUT).value()
+ << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const DepthwiseConv2D &node)
+{
+ std::string padding_type =
+ node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+ VERBOSE(LIR) << "* DepthwiseConv2D(" << padding_type << ")" << std::endl;
+ VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(DepthwiseConv2D::Input::INPUT).value()
+ << ") Kernel(" << node.getInputs().at(DepthwiseConv2D::Input::KERNEL).value()
+ << ") Bias(" << node.getInputs().at(DepthwiseConv2D::Input::BIAS).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Dequantize &node)
+{
+ VERBOSE(LIR) << "* Dequantize" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Dequantize::Input::INPUT).value()
+ << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Div &node)
+{
+ VERBOSE(LIR) << "* Div" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Div::Input::LHS).value() << ", "
+ << node.getInputs().at(Div::Input::RHS).value() << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const EmbeddingLookup &node)
+{
+ VERBOSE(LIR) << "* EmbeddingLookup" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Lookups("
+ << node.getInputs().at(EmbeddingLookup::Input::LOOKUPS).value() << ") VALUES("
+ << node.getInputs().at(EmbeddingLookup::Input::VALUES).value() << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Exp &node)
+{
+ VERBOSE(LIR) << "* Exp" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Exp::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Floor &node)
+{
+ VERBOSE(LIR) << "* Floor" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Floor::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const FullyConnected &node)
+{
+ VERBOSE(LIR) << "* FullyConnected" << std::endl;
+ VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(FullyConnected::Input::INPUT).value()
+ << ") Weight(" << node.getInputs().at(FullyConnected::Input::WEIGHT).value()
+ << ") Bias(" << node.getInputs().at(FullyConnected::Input::BIAS).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Gather &node)
+{
+ VERBOSE(LIR) << "* Gather" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Gather::Input::INPUT).value()
+ << ") Indices(" << node.getInputs().at(Gather::Input::INDICES).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const HashtableLookup &node)
+{
+ VERBOSE(LIR) << "* HashTableLookup" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Lookups("
+ << node.getInputs().at(HashtableLookup::Input::LOOKUPS).value() << ") Keys("
+ << node.getInputs().at(HashtableLookup::Input::KEYS).value() << ") Values("
+ << node.getInputs().at(HashtableLookup::Input::VALUES).value() << ")" << std::endl;
+ VERBOSE(LIR) << " - Outputs : Output("
+ << node.getInputs().at(HashtableLookup::Output::OUTPUT).value() << ") Hits("
+ << node.getInputs().at(HashtableLookup::Output::HITS).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const InstanceNorm &node)
+{
+ VERBOSE(LIR) << "* InstanceNorm" << std::endl;
+ VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(InstanceNorm::Input::INPUT).value()
+ << ") Gamma(" << node.getInputs().at(InstanceNorm::Input::GAMMA).value() << ") Beta("
+ << node.getInputs().at(InstanceNorm::Input::BETA).value() << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const L2Normalization &node)
+{
+ VERBOSE(LIR) << "* L2Normalization" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input("
+ << node.getInputs().at(L2Normalization::Input::INPUT).value() << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const L2Pool2D &node)
+{
+ VERBOSE(LIR) << "* L2Pool2D" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(L2Pool2D::Input::INPUT).value()
+ << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const LocalResponseNormalization &node)
+{
+ VERBOSE(LIR) << "* LocalResponseNormalization" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input("
+ << node.getInputs().at(LocalResponseNormalization::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const LSTM &node)
+{
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LSTM::Input::INPUT).value()
+ << ") Input To Input Weights("
+ << node.getInputs().at(LSTM::Input::INPUT_TO_INPUT_WEIGHTS).value()
+ << ") Input To Forget Weights("
+ << node.getInputs().at(LSTM::Input::INPUT_TO_FORGET_WEIGHTS).value()
+ << ") Input To Cell Weights("
+ << node.getInputs().at(LSTM::Input::INPUT_TO_CELL_WEIGHTS).value()
+ << ") Input To Output Weights("
+ << node.getInputs().at(LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS).value()
+ << ") Recurrent To Input Weights("
+ << node.getInputs().at(LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS).value()
+ << ") Recurrent To Forget Weights("
+ << node.getInputs().at(LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS).value()
+ << ") Recurrent To Cell Weights("
+ << node.getInputs().at(LSTM::Input::RECURRENT_TO_CELL_WEIGHTS).value()
+ << ") Recurrent To Output Weights("
+ << node.getInputs().at(LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS).value()
+ << ") Cell To Input Weights("
+ << node.getInputs().at(LSTM::Input::CELL_TO_INPUT_WEIGHTS).value()
+ << ") Cell To Forget Weights("
+ << node.getInputs().at(LSTM::Input::CELL_TO_FORGET_WEIGHTS).value()
+ << ") Cell To OUTPUT Weights("
+ << node.getInputs().at(LSTM::Input::CELL_TO_OUTPUT_WEIGHTS).value()
+ << ") Input Gate Bias(" << node.getInputs().at(LSTM::Input::INPUT_GATE_BIAS).value()
+ << ") Forget Gate Bias("
+ << node.getInputs().at(LSTM::Input::FORGET_GATE_BIAS).value() << ") Cell Bias("
+ << node.getInputs().at(LSTM::Input::CELL_BIAS).value() << ") Output Gate Bias("
+ << node.getInputs().at(LSTM::Input::OUTPUT_GATE_BIAS).value()
+ << ") Projection Weights("
+ << node.getInputs().at(LSTM::Input::PROJECTION_WEIGHTS).value()
+ << ") Projection Bias(" << node.getInputs().at(LSTM::Input::PROJECTION_BIAS).value()
+ << ") Output State In(" << node.getInputs().at(LSTM::Input::OUTPUT_STATE_IN).value()
+ << ") Cell State In(" << node.getInputs().at(LSTM::Input::CELL_STATE_IN).value()
+ << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Scratch Buffer("
+ << node.getOutputs().at(LSTM::Output::SCRATCH_BUFFER).value()
+ << ") Output State Out("
+ << node.getInputs().at(LSTM::Output::OUTPUT_STATE_OUT).value() << ") Cell State Out("
+ << node.getInputs().at(LSTM::Output::CELL_STATE_OUT).value() << ") Output("
+ << node.getInputs().at(LSTM::Output::OUTPUT).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const LogicalAnd &node)
+{
+ VERBOSE(LIR) << "* LogicalAnd" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalAnd::Input::INPUT0).value()
+ << ", " << node.getInputs().at(LogicalAnd::Input::INPUT1).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const LogicalNot &node)
+{
+ VERBOSE(LIR) << "* LogicalNot" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalNot::Input::INPUT).value()
+ << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const LogicalOr &node)
+{
+ VERBOSE(LIR) << "* LogicalOr" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalOr::Input::INPUT0).value()
+ << ", " << node.getInputs().at(LogicalOr::Input::INPUT1).value() << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Logistic &node)
+{
+ VERBOSE(LIR) << "* Logistic" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Logistic::Input::INPUT).value()
+ << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const MaxPool2D &node)
+{
+ std::string padding_type =
+ node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+ VERBOSE(LIR) << "* MaxPool2D(" << padding_type << ")" << std::endl;
+ VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(MaxPool2D::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Mean &node)
+{
+ VERBOSE(LIR) << "* Mean" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Mean::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Mul &node)
+{
+ VERBOSE(LIR) << "* Mul" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Mul::Input::LHS).value() << ", "
+ << node.getInputs().at(Mul::Input::RHS).value() << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Neg &node)
+{
+ VERBOSE(LIR) << "* Neg" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Neg::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Pack &node)
+{
+ VERBOSE(LIR) << "* Pack" << std::endl;
+ std::string inputs;
+ const auto &input_indices = node.getInputs();
+ for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
+ {
+ inputs += std::to_string(it->value());
+ if (std::next(it) != std::end(input_indices))
+ inputs += ", ";
+ }
+ VERBOSE(LIR) << " - Inputs : Inputs(" << inputs << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Permute &node)
+{
+ std::string permute_type = "Unknown";
+ switch (node.getPermuteType())
+ {
+ case Permute::Type::COPY:
+ permute_type = "Copy";
+ break;
+ case Permute::Type::NHWC_TO_NCHW:
+ permute_type = "NHWC to NCHW";
+ break;
+ case Permute::Type::NCHW_TO_NHWC:
+ permute_type = "NCHW to NHWC";
+ break;
+ }
+
+ VERBOSE(LIR) << "* Permute(" + permute_type + ")" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0).value() << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const PReLU &node)
+{
+ VERBOSE(LIR) << "* PReLU" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(PReLU::Input::INPUT).value()
+ << ") Alpha(" << node.getInputs().at(PReLU::Input::ALPHA).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const ReduceMax &node)
+{
+ VERBOSE(LIR) << "* ReduceMax" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReduceMax::Input::INPUT).value()
+ << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const ReduceMin &node)
+{
+ VERBOSE(LIR) << "* ReduceMin" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReduceMin::Input::INPUT).value()
+ << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const ReduceSum &node)
+{
+ VERBOSE(LIR) << "* ReduceSum" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReduceSum::Input::INPUT).value()
+ << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const ReLU &node)
+{
+ VERBOSE(LIR) << "* ReLU" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const ReLU1 &node)
+{
+ VERBOSE(LIR) << "* ReLU1" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU1::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const ReLU6 &node)
+{
+ VERBOSE(LIR) << "* ReLU6" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU6::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Reshape &node)
+{
+ VERBOSE(LIR) << "* Reshape" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Reshape::Input::INPUT).value()
+ << ")";
+ // optional param
+ if (node.getInputs().size() == 2)
+ {
+ VERBOSE(LIR) << " Shape(" << node.getInputs().at(Reshape::Input::SHAPE).value() << ")";
+ }
+ else
+ {
+ VERBOSE(LIR) << " Shape(not provided)";
+ }
+ VERBOSE(LIR) << std::endl;
+
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const ResizeBilinear &node)
+{
+ VERBOSE(LIR) << "* ResizeBilinear" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ResizeBilinear::Input::INPUT).value()
+ << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const RNN &node)
+{
+ VERBOSE(LIR) << "* RNN" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(RNN::Input::INPUT).value()
+ << ") Weights" << node.getInputs().at(RNN::Input::WEIGHTS).value()
+ << ") Recurrent Weights"
+ << node.getInputs().at(RNN::Input::RECURRENT_WEIGHTS).value() << ") Bias"
+ << node.getInputs().at(RNN::Input::BIAS).value() << ") Hidden State"
+ << node.getInputs().at(RNN::Input::HIDDEN_STATE_IN).value() << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(RNN::Output::OUTPUT).value()
+ << ") Hidden State" << node.getInputs().at(RNN::Output::HIDDEN_STATE_OUT).value()
+ << ")" << std::endl;
+}
+
+void OperationDumper::visit(const RSQRT &node)
+{
+ VERBOSE(LIR) << "* RSQRT" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(RSQRT::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Softmax &node)
+{
+ VERBOSE(LIR) << "* Softmax" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Softmax::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const SpaceToDepth &node)
+{
+ VERBOSE(LIR) << "* SpaceToDepth" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SpaceToDepth::Input::INPUT).value()
+ << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Split &node)
+{
+ VERBOSE(LIR) << "* Split" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Split::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const SQRT &node)
+{
+ VERBOSE(LIR) << "* SQRT" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SQRT::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const SquaredDifference &node)
+{
+ VERBOSE(LIR) << "* SquaredDifference" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input("
+ << node.getInputs().at(SquaredDifference::Input::LHS).value() << ", "
+ << node.getInputs().at(SquaredDifference::Input::RHS).value() << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Squeeze &node)
+{
+ VERBOSE(LIR) << "* Squeeze" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Squeeze::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Slice &node)
+{
+ VERBOSE(LIR) << "* Slice" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Slice::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const StridedSlice &node)
+{
+ VERBOSE(LIR) << "* StridedSlice" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(StridedSlice::Input::INPUT).value()
+ << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Sub &node)
+{
+ VERBOSE(LIR) << "* Sub" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Sub::Input::LHS).value() << ", "
+ << node.getInputs().at(Sub::Input::RHS).value() << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Tanh &node)
+{
+ VERBOSE(LIR) << "* TanH" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Tanh::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const TopKV2 &node)
+{
+ VERBOSE(LIR) << "* TopKV2" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(TopKV2::Input::INPUT).value() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Outputs : Values("
+ << node.getOutputs().at(TopKV2::Output::OUTPUT_VALUES).value() << ") Indices("
+ << node.getOutputs().at(TopKV2::Output::OUTPUT_INDICES).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const TransposeConv &node)
+{
+ std::string padding_type =
+ node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+ VERBOSE(LIR) << "* TransposeConv(" << padding_type << ")" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Output Shape("
+ << node.getInputs().at(TransposeConv::Input::OUTPUT_SHAPE).value() << ") KERNEL("
+ << node.getInputs().at(TransposeConv::Input::KERNEL).value() << ") IFM("
+ << node.getInputs().at(TransposeConv::Input::INPUT).value() << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Transpose &node)
+{
+ VERBOSE(LIR) << "* Transpose" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Transpose::Input::INPUT).value()
+ << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Unpack &node)
+{
+ VERBOSE(LIR) << "* Unpack" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Unpack::Input::INPUT).value() << ")"
+ << std::endl;
+ std::string outputs;
+ const auto &output_indices = node.getOutputs();
+ for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it)
+ {
+ outputs += std::to_string(it->value());
+ if (std::next(it) != std::end(output_indices))
+ outputs += ", ";
+ }
+ VERBOSE(LIR) << " - Outputs : Outputs(" << outputs << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Min &node)
+{
+ VERBOSE(LIR) << "* Min" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Min::Input::LHS).value() << ", "
+ << node.getInputs().at(Min::Input::RHS).value() << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const Max &node)
+{
+ VERBOSE(LIR) << "* Max" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Max::Input::LHS).value() << ", "
+ << node.getInputs().at(Max::Input::RHS).value() << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+void OperationDumper::visit(const OneHot &node)
+{
+ VERBOSE(LIR) << "* OneHot" << std::endl;
+ VERBOSE(LIR) << " - Inputs : "
+ << "Indices(" << node.getInputs().at(OneHot::Input::INDICES).value() << ") "
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0).value() << ")" << std::endl;
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationDumper.h b/runtime/onert/core/src/ir/OperationDumper.h
new file mode 100644
index 000000000..77d100cee
--- /dev/null
+++ b/runtime/onert/core/src/ir/OperationDumper.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_OPERATION_DUMPER_H__
+#define __ONERT_OPERATION_DUMPER_H__
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+
+class OperationDumper : public OperationVisitor
+{
+public:
+ OperationDumper() = default;
+
+public:
+ void visit(const operation::Abs &) override;
+ void visit(const operation::Add &node) override;
+ void visit(const operation::ArgMax &) override;
+ void visit(const operation::AvgPool2D &node) override;
+ void visit(const operation::Cast &) override;
+ void visit(const operation::Comparison &) override;
+ void visit(const operation::Concat &node) override;
+ void visit(const operation::Conv2D &node) override;
+ void visit(const operation::DepthToSpace &) override;
+ void visit(const operation::DepthwiseConv2D &node) override;
+ void visit(const operation::Dequantize &) override;
+ void visit(const operation::Div &) override;
+ void visit(const operation::EmbeddingLookup &) override;
+ void visit(const operation::Exp &) override;
+ void visit(const operation::Floor &) override;
+ void visit(const operation::FullyConnected &node) override;
+ void visit(const operation::Gather &) override;
+ void visit(const operation::HashtableLookup &) override;
+ void visit(const operation::InstanceNorm &) override;
+ void visit(const operation::L2Normalization &) override;
+ void visit(const operation::L2Pool2D &) override;
+ void visit(const operation::LocalResponseNormalization &) override;
+ void visit(const operation::LogicalAnd &) override;
+ void visit(const operation::LogicalNot &) override;
+ void visit(const operation::LogicalOr &) override;
+ void visit(const operation::Logistic &) override;
+ void visit(const operation::LSTM &) override;
+ void visit(const operation::MaxPool2D &node) override;
+ void visit(const operation::Mean &) override;
+ void visit(const operation::Mul &) override;
+ void visit(const operation::Neg &) override;
+ void visit(const operation::Pack &) override;
+ void visit(const operation::Permute &node) override;
+ void visit(const operation::PReLU &) override;
+ void visit(const operation::ReduceMax &) override;
+ void visit(const operation::ReduceMin &) override;
+ void visit(const operation::ReduceSum &) override;
+ void visit(const operation::ReLU &) override;
+ void visit(const operation::ReLU1 &) override;
+ void visit(const operation::ReLU6 &) override;
+ void visit(const operation::Reshape &node) override;
+ void visit(const operation::ResizeBilinear &) override;
+ void visit(const operation::RNN &) override;
+ void visit(const operation::RSQRT &) override;
+ void visit(const operation::Softmax &node) override;
+ void visit(const operation::SpaceToDepth &) override;
+ void visit(const operation::Split &) override;
+ void visit(const operation::SQRT &) override;
+ void visit(const operation::SquaredDifference &) override;
+ void visit(const operation::Squeeze &) override;
+ void visit(const operation::Slice &) override;
+ void visit(const operation::StridedSlice &) override;
+ void visit(const operation::Sub &) override;
+ void visit(const operation::Tanh &) override;
+ void visit(const operation::TopKV2 &) override;
+ void visit(const operation::TransposeConv &) override;
+ void visit(const operation::Transpose &) override;
+ void visit(const operation::Unpack &) override;
+ void visit(const operation::Min &) override;
+ void visit(const operation::Max &) override;
+ void visit(const operation::OneHot &) override;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_OPERATION_DUMPER_H__
diff --git a/runtime/onert/core/src/ir/OperationIndexList.cc b/runtime/onert/core/src/ir/OperationIndexList.cc
new file mode 100644
index 000000000..bf51f9abb
--- /dev/null
+++ b/runtime/onert/core/src/ir/OperationIndexList.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/OperationIndexList.h"
+
+#include <algorithm>
+
+namespace onert
+{
+namespace ir
+{
+
+OperationIndexList::OperationIndexList(std::initializer_list<OperationIndex> list) : _list(list)
+{
+ // DO NOTHING
+}
+
+bool OperationIndexList::contains(const OperationIndex &index) const
+{
+ return std::find(_list.begin(), _list.end(), index) != _list.end();
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/Operations.cc b/runtime/onert/core/src/ir/Operations.cc
new file mode 100644
index 000000000..64d0bd6f0
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operations.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Operations.h"
+
+#include "OperationCloner.h"
+
+namespace onert
+{
+namespace ir
+{
+
+Operations::Operations(const Operations &obj)
+{
+ obj.iterate([&](const OperationIndex &index, const Operation &op) {
+ OperationCloner cloner;
+ op.accept(cloner);
+ _objects.emplace(index, cloner.releaseClone());
+ });
+ _index_count = obj._index_count;
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/Padding.cc b/runtime/onert/core/src/ir/Padding.cc
new file mode 100644
index 000000000..31969911f
--- /dev/null
+++ b/runtime/onert/core/src/ir/Padding.cc
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Padding.h"
+
+#include "util/Utils.h"
+
+#include <stdexcept>
+#include <cassert>
+
+namespace onert
+{
+namespace ir
+{
+namespace
+{
+
+inline ExplicitPadding validPadding(void)
+{
+ //
+ // ANEURALNETWORKS_PADDING_VALID
+ //
+ // VALID padding. No padding.
+ //
+ // When the input size is not evenly divisible by the filter size,
+ // the input at the end that could not fill the whole filter tile
+ // will simply be ignored.
+ //
+ ExplicitPadding padding;
+
+ padding.top = 0;
+ padding.bottom = 0;
+ padding.left = 0;
+ padding.right = 0;
+
+ return padding;
+}
+
+inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const Stride &stride,
+ uint32_t kw, uint32_t kh)
+{
+ ExplicitPadding padding;
+
+ // ANEURALNETWORKS_PADDING_SAME (from NNAPI spec)
+ //
+ // SAME padding. Padding on both ends are the "same":
+ //
+ // padding_to_beginning = total_padding / 2
+ // padding_to_end = (total_padding + 1)/2.
+ //
+ const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
+ const int32_t horizontal_expected_output =
+ (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
+
+ const int32_t vertical_needed_input = (vertical_expected_output - 1) * stride.vertical + kh;
+ const int32_t vertical_total_padding = std::max(0, vertical_needed_input - ifm_shape.H);
+
+ const int32_t horizontal_needed_input = (horizontal_expected_output - 1) * stride.horizontal + kw;
+ const int32_t horizontal_total_padding = std::max(0, horizontal_needed_input - ifm_shape.W);
+
+ padding.top = vertical_total_padding / 2;
+ padding.bottom = (vertical_total_padding + 1) / 2;
+ padding.left = horizontal_total_padding / 2;
+ padding.right = (horizontal_total_padding + 1) / 2;
+
+ return padding;
+}
+
+inline ExplicitPadding samePadding(const FeatureShape &ifm_shape, const FeatureShape &ofm_shape,
+ const Stride &stride, uint32_t kw, uint32_t kh)
+{
+ const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
+ const int32_t horizontal_expected_output =
+ (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
+ assert(vertical_expected_output == ofm_shape.H);
+ assert(horizontal_expected_output == ofm_shape.W);
+
+ UNUSED_RELEASE(ofm_shape);
+ UNUSED_RELEASE(vertical_expected_output);
+ UNUSED_RELEASE(horizontal_expected_output);
+
+ return samePaddingUsingIFM(ifm_shape, stride, kw, kh);
+}
+
+} // namespace
+
+inline std::string to_string(const PaddingType type)
+{
+ switch (type)
+ {
+ case PaddingType::EXPLICIT:
+ return "Padding::EXPLICIT";
+ case PaddingType::SAME:
+ return "Padding::SAME";
+ case PaddingType::VALID:
+ return "Padding::VALID";
+ default:
+ throw std::runtime_error{"Fail to convert string: wrong padding type"};
+ }
+}
+
+Padding::Padding(void) : type{PaddingType::EXPLICIT}, param{0, 0, 0, 0}
+{
+ // DO NOTHING
+}
+
+Padding::Padding(PaddingType paddingType) : type{paddingType}, param{0, 0, 0, 0}
+{
+ assert(paddingType != PaddingType::EXPLICIT);
+}
+
+Padding::Padding(uint32_t left, uint32_t right, uint32_t top, uint32_t bottom)
+ : type{PaddingType::EXPLICIT}, param{left, right, top, bottom}
+{
+ // DO NOTHING
+}
+
+const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape,
+ const FeatureShape &ofm_shape, const Stride &stride,
+ uint32_t kw, uint32_t kh)
+{
+ if (padding.type == PaddingType::EXPLICIT)
+ {
+ return padding.param;
+ }
+ else if (padding.type == PaddingType::SAME)
+ {
+ return samePadding(ifm_shape, ofm_shape, stride, kw, kh);
+ }
+ else if (padding.type == PaddingType::VALID)
+ {
+ return validPadding();
+ }
+ else
+ {
+ throw std::runtime_error{"Cannot handle padding type"};
+ }
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/Shape.cc b/runtime/onert/core/src/ir/Shape.cc
new file mode 100644
index 000000000..92999a4fa
--- /dev/null
+++ b/runtime/onert/core/src/ir/Shape.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Shape.h"
+
+#include <cassert>
+#include <functional>
+#include <numeric>
+#include <algorithm>
+
+namespace onert
+{
+namespace ir
+{
+
+FeatureShape Shape::asFeature(Layout layout) const
+{
+ assert(rank() == 4);
+
+ if (layout == Layout::NHWC)
+ {
+ // Feature Map in NHWC layout
+ // - Dimension(0) -> Batch
+ // - Dimension(1) -> Height
+ // - Dimension(2) -> Width
+ // - Dimension(3) -> Depth
+ const auto batch = dim(0);
+ const auto depth = dim(3);
+ const auto height = dim(1);
+ const auto width = dim(2);
+
+ return {batch, depth, height, width};
+ }
+ else if (layout == Layout::NCHW)
+ {
+ // Feature Map in NHWC layout
+ // - Dimension(0) -> Batch
+ // - Dimension(1) -> Depth
+ // - Dimension(2) -> Height
+ // - Dimension(3) -> Width
+ const auto batch = dim(0);
+ const auto depth = dim(1);
+ const auto height = dim(2);
+ const auto width = dim(3);
+
+ return {batch, depth, height, width};
+ }
+ else
+ {
+ throw std::runtime_error("Wrong Layout");
+ }
+}
+
+// Extended dimension is filled with 1.
+void Shape::extendRank(int to_rank)
+{
+ assert(to_rank - rank() >= 0);
+ _dimensions.insert(_dimensions.cbegin(), to_rank - rank(), 1);
+}
+
+uint64_t Shape::num_elements() const
+{
+ // All of the nodes must have non-negative dimension
+ assert(std::all_of(_dimensions.begin(), _dimensions.end(),
+ [](const int32_t &v) { return (v >= 0); }));
+
+ return std::accumulate(_dimensions.cbegin(), _dimensions.cend(), UINT64_C(1),
+ std::multiplies<uint64_t>());
+}
+
+Shape permuteShape(const Shape &shape, Layout frontend_layout, Layout backend_layout)
+{
+ assert(shape.rank() <= 4);
+ Shape backend_shape{shape};
+ if (shape.rank() == 4 && frontend_layout == Layout::NHWC && backend_layout == Layout::NCHW)
+ {
+ backend_shape.dim(1) = shape.dim(3);
+ backend_shape.dim(2) = shape.dim(1);
+ backend_shape.dim(3) = shape.dim(2);
+ }
+ else if (shape.rank() == 4 && frontend_layout == Layout::NCHW && backend_layout == Layout::NHWC)
+ {
+ backend_shape.dim(1) = shape.dim(2);
+ backend_shape.dim(2) = shape.dim(3);
+ backend_shape.dim(3) = shape.dim(1);
+ }
+ return backend_shape;
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/TypeInfo.cc b/runtime/onert/core/src/ir/TypeInfo.cc
new file mode 100644
index 000000000..ab8af287e
--- /dev/null
+++ b/runtime/onert/core/src/ir/TypeInfo.cc
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/TypeInfo.h"
+
+namespace onert
+{
+namespace ir
+{
+
+bool operator==(const TypeInfo &lhs, const TypeInfo &rhs)
+{
+ if (lhs.type() != rhs.type())
+ {
+ return false;
+ }
+
+ if (lhs.offset() != rhs.offset())
+ {
+ return false;
+ }
+
+ if (lhs.scale() != rhs.scale())
+ {
+ return false;
+ }
+
+ return true;
+}
+
+bool operator!=(const TypeInfo &lhs, const TypeInfo &rhs) { return !(lhs == rhs); }
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Abs.cc b/runtime/onert/core/src/ir/operation/Abs.cc
new file mode 100644
index 000000000..b06705d07
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Abs.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Abs.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Abs::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Abs::Abs(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Add.cc b/runtime/onert/core/src/ir/operation/Add.cc
new file mode 100644
index 000000000..2fa30f8ed
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Add.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Add.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Add::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Add::Add(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ArgMax.cc b/runtime/onert/core/src/ir/operation/ArgMax.cc
new file mode 100644
index 000000000..1275ae43a
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ArgMax.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ArgMax.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ArgMax::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ArgMax::ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/AvgPool2D.cc b/runtime/onert/core/src/ir/operation/AvgPool2D.cc
new file mode 100644
index 000000000..28d4fcb54
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/AvgPool2D.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/AvgPool2D.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void AvgPool2D::accept(OperationVisitor &v) const { v.visit(*this); }
+
+AvgPool2D::AvgPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
new file mode 100644
index 000000000..0b3955c5c
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/BatchToSpaceND.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void BatchToSpaceND::accept(OperationVisitor &v) const { v.visit(*this); }
+
+BatchToSpaceND::BatchToSpaceND(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Cast.cc b/runtime/onert/core/src/ir/operation/Cast.cc
new file mode 100644
index 000000000..09d9c327e
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Cast.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Cast.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Cast::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Cast::Cast(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Comparison.cc b/runtime/onert/core/src/ir/operation/Comparison.cc
new file mode 100644
index 000000000..2f6775411
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Comparison.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Comparison.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Comparison::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Comparison::Comparison(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Concat.cc b/runtime/onert/core/src/ir/operation/Concat.cc
new file mode 100644
index 000000000..608bc29a6
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Concat.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Concat.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Concat::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Concat::Concat(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Conv2D.cc b/runtime/onert/core/src/ir/operation/Conv2D.cc
new file mode 100644
index 000000000..3a2e1d1fe
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Conv2D.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Conv2D.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Conv2D::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Conv2D::Conv2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc b/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc
new file mode 100644
index 000000000..676e039fa
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ConvertFp16ToFp32.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ConvertFp16ToFp32::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ConvertFp16ToFp32::ConvertFp16ToFp32(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc b/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc
new file mode 100644
index 000000000..bcfcbfc04
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ConvertFp32ToFp16.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ConvertFp32ToFp16::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ConvertFp32ToFp16::ConvertFp32ToFp16(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Custom.cc b/runtime/onert/core/src/ir/operation/Custom.cc
new file mode 100644
index 000000000..25c53e1ba
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Custom.cc
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Custom.h"
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Custom::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Custom::Custom(OperandConstraint input_constr, const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs, std::string id, const Userdata &userdata)
+ : Operation{input_constr, inputs, outputs}, _id(std::move(id)), _userdata(userdata)
+{
+}
+
+const std::string &Custom::id() const { return _id; }
+
+const Custom::Userdata &Custom::userdata() const { return _userdata; }
+
+std::string Custom::name() const { return id(); }
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/DepthToSpace.cc b/runtime/onert/core/src/ir/operation/DepthToSpace.cc
new file mode 100644
index 000000000..f2d6c7c1b
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/DepthToSpace.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/DepthToSpace.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void DepthToSpace::accept(OperationVisitor &v) const { v.visit(*this); }
+
+DepthToSpace::DepthToSpace(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc b/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc
new file mode 100644
index 000000000..d587a5591
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/DepthwiseConv2D.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void DepthwiseConv2D::accept(OperationVisitor &v) const { v.visit(*this); }
+
+DepthwiseConv2D::DepthwiseConv2D(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs, const Param &param)
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Dequantize.cc b/runtime/onert/core/src/ir/operation/Dequantize.cc
new file mode 100644
index 000000000..14d6362bd
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Dequantize.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Dequantize.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Dequantize::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Dequantize::Dequantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Div.cc b/runtime/onert/core/src/ir/operation/Div.cc
new file mode 100644
index 000000000..b095d9811
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Div.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Div.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Div::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Div::Div(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc b/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc
new file mode 100644
index 000000000..b300b004e
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/EmbeddingLookup.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void EmbeddingLookup::accept(OperationVisitor &v) const { v.visit(*this); }
+
+EmbeddingLookup::EmbeddingLookup(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Exp.cc b/runtime/onert/core/src/ir/operation/Exp.cc
new file mode 100644
index 000000000..0b22e080a
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Exp.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Exp.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Exp::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Exp::Exp(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Floor.cc b/runtime/onert/core/src/ir/operation/Floor.cc
new file mode 100644
index 000000000..dc01535ad
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Floor.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Floor.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Floor::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Floor::Floor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/FullyConnected.cc b/runtime/onert/core/src/ir/operation/FullyConnected.cc
new file mode 100644
index 000000000..118ae554a
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/FullyConnected.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/FullyConnected.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void FullyConnected::accept(OperationVisitor &v) const { v.visit(*this); }
+
+FullyConnected::FullyConnected(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs, const Param &param)
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Gather.cc b/runtime/onert/core/src/ir/operation/Gather.cc
new file mode 100644
index 000000000..11d46e75b
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Gather.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Gather.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Gather::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Gather::Gather(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/HashtableLookup.cc b/runtime/onert/core/src/ir/operation/HashtableLookup.cc
new file mode 100644
index 000000000..e9a7a82ff
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/HashtableLookup.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/HashtableLookup.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void HashtableLookup::accept(OperationVisitor &v) const { v.visit(*this); }
+
+HashtableLookup::HashtableLookup(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/InstanceNorm.cc b/runtime/onert/core/src/ir/operation/InstanceNorm.cc
new file mode 100644
index 000000000..2334560ef
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/InstanceNorm.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/InstanceNorm.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void InstanceNorm::accept(OperationVisitor &v) const { v.visit(*this); }
+
+InstanceNorm::InstanceNorm(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/L2Normalization.cc b/runtime/onert/core/src/ir/operation/L2Normalization.cc
new file mode 100644
index 000000000..d1c92fa8b
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/L2Normalization.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/L2Normalization.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void L2Normalization::accept(OperationVisitor &v) const { v.visit(*this); }
+
+L2Normalization::L2Normalization(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs, const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/L2Pool2D.cc b/runtime/onert/core/src/ir/operation/L2Pool2D.cc
new file mode 100644
index 000000000..8f21b93e0
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/L2Pool2D.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/L2Pool2D.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void L2Pool2D::accept(OperationVisitor &v) const { v.visit(*this); }
+
+L2Pool2D::L2Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LSTM.cc b/runtime/onert/core/src/ir/operation/LSTM.cc
new file mode 100644
index 000000000..30a865326
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/LSTM.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/LSTM.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void LSTM::accept(OperationVisitor &v) const { v.visit(*this); }
+
+LSTM::LSTM(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(23u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc b/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc
new file mode 100644
index 000000000..1ae97c142
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/LocalResponseNormalization.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void LocalResponseNormalization::accept(OperationVisitor &v) const { v.visit(*this); }
+
+LocalResponseNormalization::LocalResponseNormalization(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LogicalAnd.cc b/runtime/onert/core/src/ir/operation/LogicalAnd.cc
new file mode 100644
index 000000000..0d50706ca
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/LogicalAnd.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/LogicalAnd.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void LogicalAnd::accept(OperationVisitor &v) const { v.visit(*this); }
+
+LogicalAnd::LogicalAnd(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LogicalNot.cc b/runtime/onert/core/src/ir/operation/LogicalNot.cc
new file mode 100644
index 000000000..8f1142102
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/LogicalNot.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/LogicalNot.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void LogicalNot::accept(OperationVisitor &v) const { v.visit(*this); }
+
+LogicalNot::LogicalNot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LogicalOr.cc b/runtime/onert/core/src/ir/operation/LogicalOr.cc
new file mode 100644
index 000000000..d75207c4a
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/LogicalOr.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/LogicalOr.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void LogicalOr::accept(OperationVisitor &v) const { v.visit(*this); }
+
+LogicalOr::LogicalOr(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Logistic.cc b/runtime/onert/core/src/ir/operation/Logistic.cc
new file mode 100644
index 000000000..77d9d17de
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Logistic.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Logistic.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Logistic::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Logistic::Logistic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LowerInfo.cc b/runtime/onert/core/src/ir/operation/LowerInfo.cc
new file mode 100644
index 000000000..249918bd6
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/LowerInfo.cc
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/LowerInfo.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+LowerInfo::LowerInfo(const backend::Backend *backend, Layout layout)
+ : _permute_factor{backend, layout}
+{
+ // DO NOTHING
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Max.cc b/runtime/onert/core/src/ir/operation/Max.cc
new file mode 100644
index 000000000..281f9d451
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Max.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Max.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Max::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Max::Max(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/MaxPool2D.cc b/runtime/onert/core/src/ir/operation/MaxPool2D.cc
new file mode 100644
index 000000000..eac53cc5e
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/MaxPool2D.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/MaxPool2D.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void MaxPool2D::accept(OperationVisitor &v) const { v.visit(*this); }
+
+MaxPool2D::MaxPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Mean.cc b/runtime/onert/core/src/ir/operation/Mean.cc
new file mode 100644
index 000000000..5313a4434
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Mean.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Mean.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Mean::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Mean::Mean(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Min.cc b/runtime/onert/core/src/ir/operation/Min.cc
new file mode 100644
index 000000000..8be7f0cc8
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Min.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Min.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Min::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Min::Min(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Mul.cc b/runtime/onert/core/src/ir/operation/Mul.cc
new file mode 100644
index 000000000..03cdf1b61
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Mul.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Mul.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Mul::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Mul::Mul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Neg.cc b/runtime/onert/core/src/ir/operation/Neg.cc
new file mode 100644
index 000000000..df623a13b
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Neg.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Neg.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Neg::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Neg::Neg(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/OneHot.cc b/runtime/onert/core/src/ir/operation/OneHot.cc
new file mode 100644
index 000000000..22935e7d6
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/OneHot.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/OneHot.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void OneHot::accept(OperationVisitor &v) const { v.visit(*this); }
+
+OneHot::OneHot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/PReLU.cc b/runtime/onert/core/src/ir/operation/PReLU.cc
new file mode 100644
index 000000000..a2e37e0ad
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/PReLU.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/PReLU.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void PReLU::accept(OperationVisitor &v) const { v.visit(*this); }
+
+PReLU::PReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Pack.cc b/runtime/onert/core/src/ir/operation/Pack.cc
new file mode 100644
index 000000000..f0908a2c6
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Pack.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "ir/operation/Pack.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+void Pack::accept(OperationVisitor &v) const { v.visit(*this); }
+Pack::Pack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createAtLeast(3u), inputs, outputs}, _param{param}
+{
+}
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Pad.cc b/runtime/onert/core/src/ir/operation/Pad.cc
new file mode 100644
index 000000000..a958b5241
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Pad.cc
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Pad.h"
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Pad::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Pad::Pad(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Permute.cc b/runtime/onert/core/src/ir/operation/Permute.cc
new file mode 100644
index 000000000..fd495ed4b
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Permute.cc
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Permute.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Permute::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Permute::Permute(const OperandIndex &input, const OperandIndex &output,
+ const backend::BackendContext *input_backend_ctx,
+ const backend::BackendContext *output_backend_ctx, Type type, DataType data_type)
+ : Operation{OperandConstraint::createExact(1u)}, _param{input_backend_ctx, output_backend_ctx},
+ _type{type}, _dataType{data_type}
+{
+ setInputs({input});
+ setOutputs({output});
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/RNN.cc b/runtime/onert/core/src/ir/operation/RNN.cc
new file mode 100644
index 000000000..298c5e745
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/RNN.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/RNN.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void RNN::accept(OperationVisitor &v) const { v.visit(*this); }
+
+RNN::RNN(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/RSQRT.cc b/runtime/onert/core/src/ir/operation/RSQRT.cc
new file mode 100644
index 000000000..2bce1fa28
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/RSQRT.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/RSQRT.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void RSQRT::accept(OperationVisitor &v) const { v.visit(*this); }
+
+RSQRT::RSQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ReLU.cc b/runtime/onert/core/src/ir/operation/ReLU.cc
new file mode 100644
index 000000000..f0c88478b
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ReLU.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ReLU.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ReLU::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ReLU::ReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ReLU1.cc b/runtime/onert/core/src/ir/operation/ReLU1.cc
new file mode 100644
index 000000000..734f0b65b
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ReLU1.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ReLU1.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ReLU1::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ReLU1::ReLU1(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ReLU6.cc b/runtime/onert/core/src/ir/operation/ReLU6.cc
new file mode 100644
index 000000000..5972329af
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ReLU6.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ReLU6.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ReLU6::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ReLU6::ReLU6(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ReduceMax.cc b/runtime/onert/core/src/ir/operation/ReduceMax.cc
new file mode 100644
index 000000000..f7faf8744
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ReduceMax.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ReduceMax.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ReduceMax::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ReduceMax::ReduceMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ReduceMin.cc b/runtime/onert/core/src/ir/operation/ReduceMin.cc
new file mode 100644
index 000000000..83cdccb78
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ReduceMin.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ReduceMin.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ReduceMin::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ReduceMin::ReduceMin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ReduceSum.cc b/runtime/onert/core/src/ir/operation/ReduceSum.cc
new file mode 100644
index 000000000..c25a5ac5c
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ReduceSum.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ReduceSum.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ReduceSum::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ReduceSum::ReduceSum(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Reshape.cc b/runtime/onert/core/src/ir/operation/Reshape.cc
new file mode 100644
index 000000000..b3a0d30b6
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Reshape.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Reshape.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Reshape::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Reshape::Reshape(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ResizeBilinear.cc b/runtime/onert/core/src/ir/operation/ResizeBilinear.cc
new file mode 100644
index 000000000..d0d89f45f
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ResizeBilinear.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ResizeBilinear.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ResizeBilinear::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ResizeBilinear::ResizeBilinear(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs, const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/SQRT.cc b/runtime/onert/core/src/ir/operation/SQRT.cc
new file mode 100644
index 000000000..ad887d89a
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/SQRT.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/SQRT.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void SQRT::accept(OperationVisitor &v) const { v.visit(*this); }
+
+SQRT::SQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Shape.cc b/runtime/onert/core/src/ir/operation/Shape.cc
new file mode 100644
index 000000000..2a63d6dcf
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Shape.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Shape.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Shape::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Shape::Shape(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Sin.cc b/runtime/onert/core/src/ir/operation/Sin.cc
new file mode 100644
index 000000000..631505f36
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Sin.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Sin.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Sin::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Sin::Sin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Slice.cc b/runtime/onert/core/src/ir/operation/Slice.cc
new file mode 100644
index 000000000..674bd7093
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Slice.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Slice.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Slice::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Slice::Slice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Softmax.cc b/runtime/onert/core/src/ir/operation/Softmax.cc
new file mode 100644
index 000000000..3f1aa0af1
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Softmax.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Softmax.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Softmax::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Softmax::Softmax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc b/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc
new file mode 100644
index 000000000..53fab4fa9
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/SpaceToBatchND.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void SpaceToBatchND::accept(OperationVisitor &v) const { v.visit(*this); }
+
+SpaceToBatchND::SpaceToBatchND(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/SpaceToDepth.cc b/runtime/onert/core/src/ir/operation/SpaceToDepth.cc
new file mode 100644
index 000000000..d8a45aee5
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/SpaceToDepth.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/SpaceToDepth.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void SpaceToDepth::accept(OperationVisitor &v) const { v.visit(*this); }
+
+SpaceToDepth::SpaceToDepth(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Split.cc b/runtime/onert/core/src/ir/operation/Split.cc
new file mode 100644
index 000000000..244884e41
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Split.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "ir/operation/Split.h"
+#include <cassert>
+#include "ir/OperationVisitor.h"
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+void Split::accept(OperationVisitor &v) const { v.visit(*this); }
+Split::Split(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/SquaredDifference.cc b/runtime/onert/core/src/ir/operation/SquaredDifference.cc
new file mode 100644
index 000000000..49e58aaf2
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/SquaredDifference.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/SquaredDifference.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void SquaredDifference::accept(OperationVisitor &v) const { v.visit(*this); }
+
+SquaredDifference::SquaredDifference(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Squeeze.cc b/runtime/onert/core/src/ir/operation/Squeeze.cc
new file mode 100644
index 000000000..8cf928fb4
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Squeeze.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Squeeze.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Squeeze::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Squeeze::Squeeze(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param(param)
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/StridedSlice.cc b/runtime/onert/core/src/ir/operation/StridedSlice.cc
new file mode 100644
index 000000000..adc8beaa3
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/StridedSlice.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/StridedSlice.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void StridedSlice::accept(OperationVisitor &v) const { v.visit(*this); }
+
+StridedSlice::StridedSlice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Sub.cc b/runtime/onert/core/src/ir/operation/Sub.cc
new file mode 100644
index 000000000..d71071686
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Sub.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Sub.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Sub::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Sub::Sub(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Tanh.cc b/runtime/onert/core/src/ir/operation/Tanh.cc
new file mode 100644
index 000000000..8fab0c0f3
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Tanh.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Tanh.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Tanh::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Tanh::Tanh(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/TopKV2.cc b/runtime/onert/core/src/ir/operation/TopKV2.cc
new file mode 100644
index 000000000..a5e6c6a85
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/TopKV2.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/TopKV2.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void TopKV2::accept(OperationVisitor &v) const { v.visit(*this); }
+
+TopKV2::TopKV2(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Transpose.cc b/runtime/onert/core/src/ir/operation/Transpose.cc
new file mode 100644
index 000000000..3a663fbce
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Transpose.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Transpose.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Transpose::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Transpose::Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/TransposeConv.cc b/runtime/onert/core/src/ir/operation/TransposeConv.cc
new file mode 100644
index 000000000..7f29ca44e
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/TransposeConv.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/TransposeConv.h"
+
+#include <cassert>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void TransposeConv::accept(OperationVisitor &v) const { v.visit(*this); }
+
+TransposeConv::TransposeConv(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs, const Param &param)
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Unpack.cc b/runtime/onert/core/src/ir/operation/Unpack.cc
new file mode 100644
index 000000000..67aa54ab5
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Unpack.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "ir/operation/Unpack.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+void Unpack::accept(OperationVisitor &v) const { v.visit(*this); }
+Unpack::Unpack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc b/runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc
new file mode 100644
index 000000000..5f4612d35
--- /dev/null
+++ b/runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConstantInsertionPass.h"
+
+#include "backend/Backend.h"
+#include <ir/Graph.h>
+#include <util/Utils.h>
+
+namespace onert
+{
+namespace ir
+{
+namespace pass
+{
+
+void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation &node)
+{
+ const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index);
+ const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index);
+ const auto backend = op_seq_lower_info->backend();
+ const auto layout = op_seq_lower_info->layout();
+ const auto factor = operand::PermuteFactor{backend, layout};
+
+ for (const auto input : node.getInputs())
+ {
+ auto &object = _graph.operands().at(input);
+
+ if (object.isConstant())
+ {
+ const auto key = ReplaceKey{input, factor};
+ if (_replace_operands_map.count(key) == 0)
+ {
+ auto new_object = object;
+ // TODO Remove const_case
+ const_cast<std::list<OperationIndex> &>(new_object.getDef().list()).clear();
+ const_cast<std::list<OperationIndex> &>(new_object.getUses().list()).clear();
+ const auto new_index = _graph.operands().emplace(new_object);
+ _replace_operands_map[key] = new_index;
+
+ _lowered_graph.setLowerInfo(new_index, std::make_unique<operand::LowerInfo>());
+ _lowered_graph.getLowerInfo(new_index)->addDefPermuteFactor(factor);
+ }
+
+ const auto replaced_input = _replace_operands_map[key];
+ // Update op_seq
+ if (_lowered_graph.op_seqs().at(op_sequence_index).getInputs().contains(input))
+ {
+ _lowered_graph.op_seqs().at(op_sequence_index).replaceInput(input, replaced_input);
+ }
+
+ // Update node
+ node.replaceInput(input, replaced_input);
+
+ // Update operand
+ auto &replaced_object = _graph.operands().at(replaced_input);
+ replaced_object.appendUse(node_index);
+
+ // Update lower_info
+ auto replaced_lower_info = _lowered_graph.getLowerInfo(replaced_input);
+ replaced_lower_info->addUsePermuteFactor(factor);
+
+ // Remove this node from def and uses of origin operand
+ if (object.getDef().contains(node_index))
+ {
+ object.removeDef(node_index);
+ }
+ object.removeUse(node_index);
+
+ // Remove origin operand
+ if (object.getDef().size() == 0 && object.getUses().size() == 0)
+ {
+ _graph.removeOperand(input);
+ _lowered_graph.removeLowerInfo(input);
+ }
+ }
+ }
+
+ // Now this runtime does not support the node making output as constant
+ for (const auto &output : node.getOutputs())
+ {
+ UNUSED_RELEASE(output);
+ assert(!_graph.operands().at(output).isConstant());
+ }
+}
+
+} // namespace pass
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.h b/runtime/onert/core/src/ir/pass/ConstantInsertionPass.h
new file mode 100644
index 000000000..3ea4dc397
--- /dev/null
+++ b/runtime/onert/core/src/ir/pass/ConstantInsertionPass.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
+#define __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
+
+#include <ir/operand/PermuteFactor.h>
+#include <ir/Index.h>
+#include "LoweredOperationPass.h"
+#include <unordered_map>
+#include <utility>
+
+namespace onert
+{
+namespace ir
+{
+namespace pass
+{
+
+class ConstantInsertionPass : public LoweredOperationPass
+{
+public:
+ using LoweredOperationPass::LoweredOperationPass;
+
+public:
+ std::string id() final { return "ConstantInsertionPass"; }
+
+public:
+ void callback(const OperationIndex &index, Operation &node) final;
+
+private:
+ struct ReplaceKey
+ {
+ OperandIndex index;
+ operand::PermuteFactor factor;
+
+ bool operator==(const ReplaceKey &other) const
+ {
+ return index == other.index && factor == other.factor;
+ }
+ };
+
+ /**
+ * @brief Structure that provides hash function of ReplaceKey
+ */
+ struct KeyHasher
+ {
+ std::size_t operator()(const ReplaceKey &key) const noexcept
+ {
+ using std::hash;
+ return hash<OperandIndex>()(key.index) ^ (hash<operand::PermuteFactor>()(key.factor) << 1);
+ }
+ };
+
+ std::unordered_map<ReplaceKey, OperandIndex, KeyHasher> _replace_operands_map;
+};
+
+} // namespace pass
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/LoweredOperandPass.h b/runtime/onert/core/src/ir/pass/LoweredOperandPass.h
new file mode 100644
index 000000000..eefb8ddfb
--- /dev/null
+++ b/runtime/onert/core/src/ir/pass/LoweredOperandPass.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_PASS_LOWERED_OPERAND_PASS_H__
+#define __ONERT_IR_PASS_LOWERED_OPERAND_PASS_H__
+
+#include "OperandPass.h"
+#include "ir/LoweredGraph.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace pass
+{
+
+class LoweredOperandPass : public OperandPass
+{
+public:
+ LoweredOperandPass(ir::LoweredGraph &lowered_graph)
+ : OperandPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
+ {
+ // DO NOTHING
+ }
+
+ virtual ~LoweredOperandPass() = default;
+
+ std::string id() override = 0;
+ void callback(const OperandIndex &i, Operand &o) override = 0;
+
+protected:
+ ir::LoweredGraph &_lowered_graph;
+};
+
+} // namespace pass
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_PASS_LOWERED_OPERAND_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/LoweredOperationPass.h b/runtime/onert/core/src/ir/pass/LoweredOperationPass.h
new file mode 100644
index 000000000..0138712d7
--- /dev/null
+++ b/runtime/onert/core/src/ir/pass/LoweredOperationPass.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_PASS_LOWERED_OPERATION_PASS_H__
+#define __ONERT_IR_PASS_LOWERED_OPERATION_PASS_H__
+
+#include "OperationPass.h"
+#include "ir/LoweredGraph.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace pass
+{
+
+class LoweredOperationPass : public OperationPass
+{
+public:
+ LoweredOperationPass(ir::LoweredGraph &lowered_graph)
+ : OperationPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
+ {
+ // DO NOTHING
+ }
+
+ virtual ~LoweredOperationPass() = default;
+
+ std::string id() override = 0;
+ void callback(const OperationIndex &i, Operation &o) override = 0;
+
+protected:
+ ir::LoweredGraph &_lowered_graph;
+};
+
+} // namespace pass
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_PASS_LOWERED_OPERATION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/OperandPass.cc b/runtime/onert/core/src/ir/pass/OperandPass.cc
new file mode 100644
index 000000000..693a0f493
--- /dev/null
+++ b/runtime/onert/core/src/ir/pass/OperandPass.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperandPass.h"
+
+#include "ir/Graph.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace pass
+{
+
+void OperandPass::run()
+{
+ _graph.operands().iterate(
+ [&](const OperandIndex &index, Operand &object) { callback(index, object); });
+}
+
+} // namespace pass
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/OperandPass.h b/runtime/onert/core/src/ir/pass/OperandPass.h
new file mode 100644
index 000000000..393060741
--- /dev/null
+++ b/runtime/onert/core/src/ir/pass/OperandPass.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_GRAPH_PASS_OPERAND_PASS_H__
+#define __ONERT_GRAPH_PASS_OPERAND_PASS_H__
+
+#include "Pass.h"
+#include "ir/Index.h"
+
+namespace onert
+{
+namespace ir
+{
+class Operand;
+} // namespace ir
+} // namespace onert
+
+namespace onert
+{
+namespace ir
+{
+namespace pass
+{
+
+class OperandPass : public Pass
+{
+public:
+ using Pass::Pass;
+ virtual ~OperandPass() = default;
+
+public:
+ std::string id() override = 0;
+ void run() override final;
+ virtual void callback(const OperandIndex &i, Operand &o) = 0;
+};
+
+} // namespace pass
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_GRAPH_PASS_OPERAND_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/OperationPass.cc b/runtime/onert/core/src/ir/pass/OperationPass.cc
new file mode 100644
index 000000000..84b1da3ee
--- /dev/null
+++ b/runtime/onert/core/src/ir/pass/OperationPass.cc
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationPass.h"
+
+#include "ir/Index.h"
+#include "ir/Operation.h"
+#include "ir/Graph.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace pass
+{
+
+void OperationPass::run()
+{
+ _graph.operations().iterate(
+ [&](const OperationIndex &index, Operation &node) { callback(index, node); });
+}
+
+} // namespace pass
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/OperationPass.h b/runtime/onert/core/src/ir/pass/OperationPass.h
new file mode 100644
index 000000000..1733f87ed
--- /dev/null
+++ b/runtime/onert/core/src/ir/pass/OperationPass.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file OperationPass.h
+ * @brief This file contains OperationPass class
+ */
+
+#ifndef __ONERT_GRAPH_PASS_OPERATION_PASS_H__
+#define __ONERT_GRAPH_PASS_OPERATION_PASS_H__
+
+#include "Pass.h"
+#include "ir/Index.h"
+
+namespace onert
+{
+namespace ir
+{
+class Operation;
+} // namespace ir
+} // namespace onert
+
+namespace onert
+{
+namespace ir
+{
+namespace pass
+{
+
+/**
+ * @brief Class to iterate over operations and calls callback() method
+ */
+class OperationPass : public Pass
+{
+public:
+ using Pass::Pass;
+ virtual ~OperationPass() = default;
+
+public:
+ /**
+ * @brief Returns string id for this pass. Same with class name.
+ *
+ * @return string id
+ */
+ std::string id() override = 0;
+
+ /**
+ * @brief Be called for all nodes of graph.
+ * @param index is the index of a node in graph
+ * @param node is the node in graph
+ */
+ virtual void callback(const OperationIndex &index, Operation &node) = 0;
+
+ /**
+ * @brief Run the pass
+ */
+ void run() final;
+};
+
+} // namespace pass
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_GRAPH_PASS_OPERATION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/Pass.h b/runtime/onert/core/src/ir/pass/Pass.h
new file mode 100644
index 000000000..1c6628f6f
--- /dev/null
+++ b/runtime/onert/core/src/ir/pass/Pass.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_GRAPH_PASS_PASS_H__
+#define __ONERT_GRAPH_PASS_PASS_H__
+
+#include <string>
+
+namespace onert
+{
+namespace ir
+{
+class Graph;
+} // namespace ir
+} // namespace onert
+
+namespace onert
+{
+namespace ir
+{
+namespace pass
+{
+
+class Pass
+{
+public:
+ Pass(Graph &graph) : _graph{graph} {}
+ virtual ~Pass() = default;
+
+public:
+ virtual std::string id() = 0;
+ virtual void run() = 0;
+
+protected:
+ Graph &_graph;
+};
+
+} // namespace pass
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_GRAPH_PASS_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc b/runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc
new file mode 100644
index 000000000..f4793d411
--- /dev/null
+++ b/runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PermutationEliminationPass.h"
+
+#include "ir/Operand.h"
+#include "ir/operand/LowerInfo.h"
+#include "ir/Graph.h"
+#include "backend/IConfig.h"
+#include "util/logging.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace pass
+{
+void PermutationEliminationPass::callback(const OperandIndex &inp_index, Operand &object)
+{
+ if (_graph.getInputs().contains(inp_index))
+ {
+ eliminateInput(inp_index, object);
+ }
+ else if (_graph.getOutputs().contains(inp_index))
+ {
+ eliminateOutput(inp_index, object);
+ }
+}
+
+void PermutationEliminationPass::eliminateInput(const OperandIndex &inp_index, Operand &object)
+{
+ auto &model_inputs = _graph.getInputs();
+
+ // get uses of the model's given input
+ auto uses = object.getUses();
+
+ // input must be used just by permutation
+ if (uses.size() != 1)
+ {
+ return;
+ }
+
+ for (auto input_use : uses.list())
+ {
+ auto &perm_operation = _graph.operations().at(input_use);
+ auto perm_inputs = perm_operation.getInputs();
+
+ auto perm_outputs = perm_operation.getOutputs();
+
+ if (!isPermuteLayerToEliminate(perm_inputs, perm_outputs, true))
+ {
+ return;
+ }
+
+ assert(perm_inputs.at(0) == inp_index);
+
+ VERBOSE(PermutationEliminationPass::EliminateInput) << "remove NHWC_TO_NCHW permutation\n";
+
+ // set model's new input, which was output of permutation
+ model_inputs.replace(inp_index, perm_outputs.at(0));
+
+ // remove model's input, which is also input of permutation
+ _graph.removeOperand(inp_index);
+
+ // remove permutation operation
+ assert(_lowered_graph.op_seqs().containsOperation(input_use));
+ auto op_seq_idx = _lowered_graph.op_seqs().getOperation(input_use);
+ _lowered_graph.op_seqs().remove(op_seq_idx);
+ _graph.operations().remove(input_use);
+
+ VERBOSE(PermutationEliminationPass::EliminateInput)
+ << inp_index.value() << " is model's input and is removed. New input is "
+ << perm_outputs.at(0).value() << "\n"
+ << input_use.value() << " is removed permutation operation\n";
+ }
+}
+
+void PermutationEliminationPass::eliminateOutput(const OperandIndex &out_index, Operand &object)
+{
+ auto &model_outputs = _graph.getOutputs();
+
+ // get defs of the model's given output
+ auto defs = object.getDef();
+
+ // output must use just permutation
+ if (defs.size() != 1)
+ {
+ return;
+ }
+
+ for (auto output_def : defs.list())
+ {
+ auto &perm_operation = _graph.operations().at(output_def);
+ auto perm_outputs = perm_operation.getOutputs();
+
+ auto perm_inputs = perm_operation.getInputs();
+ if (!isPermuteLayerToEliminate(perm_inputs, perm_outputs, false))
+ {
+ return;
+ }
+
+ assert(perm_outputs.at(0) == out_index);
+
+ VERBOSE(PermutationEliminationPass::EliminateOutput) << "remove NCHW_TO_NHWC permutation\n";
+
+ // Update operations' output that is used by permute operand
+ for (auto perm_input_index : perm_inputs)
+ {
+ auto &perm_input_operand = _graph.operands().at(perm_input_index);
+ perm_input_operand.removeUse(output_def);
+ }
+
+ // set model's new output, which was input of permutation
+ model_outputs.replace(out_index, perm_inputs.at(0));
+
+ // remove model's output, which is also output of permutation
+ _graph.removeOperand(out_index);
+
+ // remove permutation operation
+ assert(_lowered_graph.op_seqs().containsOperation(output_def));
+ auto op_seq_idx = _lowered_graph.op_seqs().getOperation(output_def);
+ _lowered_graph.op_seqs().remove(op_seq_idx);
+ _graph.operations().remove(output_def);
+
+ VERBOSE(PermutationEliminationPass::EliminateOutput)
+ << out_index.value() << " is model's output and is removed. New output is "
+ << perm_inputs.at(0).value() << "\n"
+ << output_def.value() << " is removed permutation operation\n";
+ }
+}
+
+bool PermutationEliminationPass::isPermuteLayerToEliminate(const OperandIndexSequence &inp_indexes,
+ const OperandIndexSequence &out_indexes,
+ bool is_for_model_input)
+{
+ auto input_def_factors = _lowered_graph.getLowerInfo(inp_indexes.at(0))->def_factors();
+ auto output_def_factors = _lowered_graph.getLowerInfo(out_indexes.at(0))->def_factors();
+
+ auto input_layout = input_def_factors.getOnlyElement().layout();
+ auto output_layout = output_def_factors.getOnlyElement().layout();
+
+ if (input_def_factors.size() != 1 || output_def_factors.size() != 1)
+ {
+ return false;
+ }
+
+ // all operands' factor must be the same
+ for (auto index : inp_indexes)
+ {
+ auto op_factor_set = _lowered_graph.getLowerInfo(index)->def_factors();
+ if (op_factor_set.size() != 1 ||
+ input_layout != _lowered_graph.getLowerInfo(index)->def_factors().getOnlyElement().layout())
+ {
+ return false;
+ }
+ }
+ // all operands' factor must be the same
+ for (auto index : out_indexes)
+ {
+ auto op_factor_set = _lowered_graph.getLowerInfo(index)->def_factors();
+ if (op_factor_set.size() != 1 ||
+ output_layout !=
+ _lowered_graph.getLowerInfo(index)->def_factors().getOnlyElement().layout())
+ {
+ return false;
+ }
+ }
+
+ if (is_for_model_input)
+ {
+ // check if this is NHWC_TO_NCHW permutation: must have single input, which is model's input
+ return (inp_indexes.size() == 1 && input_layout == Layout::NHWC &&
+ output_layout == Layout::NCHW);
+ }
+
+ // check if this is NCHW_TO_NHWC permutation: must have single output, which is model's output
+ return (out_indexes.size() == 1 && input_layout == Layout::NCHW && output_layout == Layout::NHWC);
+}
+
+} // namespace pass
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.h b/runtime/onert/core/src/ir/pass/PermutationEliminationPass.h
new file mode 100644
index 000000000..1c8430062
--- /dev/null
+++ b/runtime/onert/core/src/ir/pass/PermutationEliminationPass.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
+#define __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
+
+#include "LoweredOperandPass.h"
+#include "ir/Operand.h"
+#include "ir/OperandIndexSequence.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace pass
+{
+
+class PermutationEliminationPass : public LoweredOperandPass
+{
+public:
+ using LoweredOperandPass::LoweredOperandPass;
+
+public:
+ std::string id() override { return "PermutationEliminationPass"; }
+
+ void callback(const OperandIndex &index, Operand &object) override;
+
+private:
+ /**
+ * @brief Remove Permute operation that permutates input
+ *
+ * Note: This function aslo removes model's input and
+ * sets output of permutation as model's new input
+ *
+ * @param inp_index is the target operand index for the elimination
+ * @param object is the target operand object for the elimination
+ *
+ * @return
+ */
+ void eliminateInput(const OperandIndex &inp_index, Operand &object);
+
+ /**
+ * @brief Remove Permute operation that permutates output of a model
+ *
+ * Note: This function aslo removes model's output and
+ * sets input of permutation as model's new output
+ *
+ * @param out_index is the target operand index for the elimination
+ * @param object is the target operand object for the elimination
+ *
+ * @return
+ */
+ void eliminateOutput(const OperandIndex &out_index, Operand &object);
+
+ /**
+ * @brief Determine if passed operands are permute layer's input and output, that must be
+ * eliminated
+ *
+ * @param inp_index indexes of the input operand to operation
+ * @param out_index indexes of the output operand to operation
+ * @param is_for_model_input checking for model's input or output
+ *
+ * @return if it is permutation layer
+ */
+ bool isPermuteLayerToEliminate(const OperandIndexSequence &inp_indexes,
+ const OperandIndexSequence &out_indexes, bool is_for_model_input);
+};
+
+} // namespace pass
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc
new file mode 100644
index 000000000..32db3d878
--- /dev/null
+++ b/runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PermutationInsertionPass.h"
+
+#include <cassert>
+#include <utility>
+#include <unordered_map>
+
+#include "ir/Operand.h"
+#include "ir/operation/LowerInfo.h"
+#include "ir/Graph.h"
+#include "backend/IConfig.h"
+#include "util/logging.h"
+#include <memory>
+#include "ir/operation/Permute.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace pass
+{
+
+void PermutationInsertionPass::callback(const OperandIndex &index, Operand &object)
+{
+ auto &&operand_li = _lowered_graph.getLowerInfo(index);
+ assert(operand_li);
+
+ // NOTE Later, constants also will have Def
+ // Ignore constants
+ if (operand_li->def_factors().size() == 0)
+ {
+ return;
+ }
+
+ std::list<OperationIndex> permute_indexes;
+
+ // Build a map for all necessary type of operands
+ std::unordered_map<operand::PermuteFactor, OperandIndex> factor_to_index;
+ {
+ assert(operand_li->def_factors().size() == 1);
+ for (auto factor : operand_li->def_factors())
+ {
+ factor_to_index.emplace(factor, index);
+ }
+
+ auto insert_set = operand_li->use_factors() - operand_li->def_factors();
+ for (auto factor : insert_set)
+ {
+ const auto permute_operation_index = insertPermute(index, factor);
+ permute_indexes.push_back(permute_operation_index);
+ VERBOSE(PermutationInsertionPass) << "Insert 'Permute' operation for operand "
+ << index.value() << std::endl;
+ const auto &permute_operation = _graph.operations().at(permute_operation_index);
+ const auto permuted_operand_index = permute_operation.getOutputs().at(0);
+ factor_to_index.emplace(factor, permuted_operand_index);
+ }
+ }
+
+ // Update operations' input that uses this operand
+ {
+ std::list<OperationIndex> remove_list;
+
+ auto uses = object.getUses();
+ for (auto use : uses.list())
+ {
+ // If permute operation, ignore it
+ if (std::find(permute_indexes.begin(), permute_indexes.end(), use) != permute_indexes.end())
+ continue;
+
+ auto &operation = _graph.operations().at(use);
+ assert(_lowered_graph.op_seqs().containsOperation(use));
+ auto op_seq_index = _lowered_graph.op_seqs().getOperation(use);
+ auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
+ assert(op_seq_li);
+ const auto op_seq_layout = op_seq_li->layout();
+ const backend::Backend *backend = op_seq_li->backend();
+ assert(backend);
+ auto use_node_inputs = operation.getInputs();
+ assert(use_node_inputs.contains(index));
+
+ auto new_index = factor_to_index.at({backend, op_seq_layout});
+ if (index != new_index)
+ {
+ // Update from op_seq
+ _lowered_graph.op_seqs().at(op_seq_index).replaceInput(index, new_index);
+
+ // Update from operation
+ operation.replaceInput(index, new_index);
+
+ // Update from operand
+ remove_list.push_back(
+ use); // Removal should be done in another loop since we are in the loop
+ _graph.operands().at(new_index).appendUse(use);
+ }
+ }
+
+ for (auto &operation : remove_list)
+ {
+ object.removeUse(operation);
+ }
+ }
+}
+
+OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &operand_index,
+ const operand::PermuteFactor &factor)
+{
+ assert(!_graph.isBuildingPhase());
+
+ auto &operand = _graph.operands().at(operand_index);
+
+ // Generate output operand and permute operation
+ auto out_operand_index = _graph.addOperand(operand.shape(), operand.typeInfo());
+ // change model output if operand_index is model output index
+ auto &model_outputs = _graph.getOutputs();
+ if (model_outputs.contains(operand_index))
+ {
+ model_outputs.replace(operand_index, out_operand_index);
+ }
+
+ // Find Permute information
+ auto input_backend =
+ _lowered_graph.getLowerInfo(operand_index)->def_factors().getOnlyElement().backend();
+ auto output_backend = factor.backend();
+ // NOTE Permute may not have specific layout because the layout of input and output may be
+ // different.
+ const auto permute_node_layout = Layout::UNKNOWN;
+ const auto permute_node_backend = compiler::BackendManager::get().getDefault();
+ const operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
+
+ // Update LowerInfo of input operand
+ auto operand_lower_info = _lowered_graph.getLowerInfo(operand_index);
+ operand_lower_info->removeUsePermuteFactor(factor);
+ operand_lower_info->addUsePermuteFactor(permute_node_factor);
+
+ // Update LowerInfo of output operand
+ auto out_operand_li = std::make_unique<operand::LowerInfo>();
+
+ // The input and output factors of all nodes will be the same except Permute. So Tensor's
+ // allocators allocates memory using only the information of def permutation factor now.
+ // TODO Change param to permute_node_factor
+ out_operand_li->addDefPermuteFactor(factor);
+ out_operand_li->addUsePermuteFactor(factor);
+ _lowered_graph.setLowerInfo(out_operand_index, std::move(out_operand_li));
+
+ auto input_backend_ctx = _lowered_graph.backend_contexts().at(input_backend).get();
+ auto output_backend_ctx = _lowered_graph.backend_contexts().at(output_backend).get();
+
+ // Insert permute operation to the graph
+ const auto input_layout =
+ _lowered_graph.getLowerInfo(operand_index)->def_factors().getOnlyElement().layout();
+ const auto output_layout = factor.layout();
+ using Permute = operation::Permute;
+ const auto permute_type = [&]() {
+ if (input_layout == Layout::NHWC && output_layout == Layout::NCHW)
+ {
+ return Permute::Type::NHWC_TO_NCHW;
+ }
+ else if (input_layout == Layout::NCHW && output_layout == Layout::NHWC)
+ {
+ return Permute::Type::NCHW_TO_NHWC;
+ }
+ else
+ {
+ return Permute::Type::COPY;
+ }
+ }();
+ auto insert_node = std::make_unique<Permute>(operand_index, out_operand_index, input_backend_ctx,
+ output_backend_ctx, permute_type);
+
+ auto node_index = _graph.operations().push(std::move(insert_node));
+ const auto &node = _graph.operations().at(node_index);
+
+ // OpSequence
+ {
+ auto op_seq_index = _lowered_graph.op_seqs().emplace(node_index, node, permute_node_layout);
+ auto &op_seq = _lowered_graph.op_seqs().at(op_seq_index);
+ op_seq.setInputs(node.getInputs());
+ op_seq.setOutputs(node.getOutputs());
+ _lowered_graph.setLowerInfo(op_seq_index, std::make_unique<operation::LowerInfo>(
+ permute_node_backend, permute_node_layout));
+ }
+
+ // Update Use/Def info
+ {
+ _graph.operands().at(operand_index).appendUse(node_index);
+ _graph.operands().at(out_operand_index).appendDef(node_index);
+ }
+ return node_index;
+}
+} // namespace pass
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.h b/runtime/onert/core/src/ir/pass/PermutationInsertionPass.h
new file mode 100644
index 000000000..314a54c95
--- /dev/null
+++ b/runtime/onert/core/src/ir/pass/PermutationInsertionPass.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
+#define __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
+
+#include "LoweredOperandPass.h"
+#include "compiler/BackendManager.h"
+#include "ir/Operand.h" //for OperationIndex
+#include "ir/operand/PermuteFactor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace pass
+{
+
+class PermutationInsertionPass : public LoweredOperandPass
+{
+public:
+ using LoweredOperandPass::LoweredOperandPass;
+
+public:
+ std::string id() override { return "PermutationInsertionPass"; }
+ void callback(const OperandIndex &index, Operand &object) override;
+
+ /**
+ * @brief Insert Permute operation that has given operand as input
+ *
+ * @param operand_index is the target operand index for the insertion
+ * @param factor is the output operand's backend type and layout
+ *
+ * @return OperationIndex
+ */
+ OperationIndex insertPermute(const OperandIndex &operand_index,
+ const operand::PermuteFactor &factor);
+
+private:
+};
+
+} // namespace pass
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/PermutationOperationPass.cc b/runtime/onert/core/src/ir/pass/PermutationOperationPass.cc
new file mode 100644
index 000000000..f2172c4b0
--- /dev/null
+++ b/runtime/onert/core/src/ir/pass/PermutationOperationPass.cc
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PermutationOperationPass.h"
+
+#include "backend/Backend.h"
+#include "backend/IConfig.h"
+#include "ir/Graph.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace pass
+{
+
+void PermutationOperationPass::callback(const OperationIndex &, Operation &node)
+{
+ node.accept(*this);
+};
+
+void PermutationOperationPass::changeToKeepLayout(const Operation &node)
+{
+ const auto &output_ind = node.getOutputs().at(0);
+ const auto &output_obj = _graph.operands().at(output_ind);
+
+ assert(output_obj.getDef().size() == 1);
+ const auto &node_index = output_obj.getDef().list().front();
+ const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index);
+
+ const auto frontend_layout = _lowered_graph.op_seqs().at(op_seq_index).getLayout();
+ const auto backend_layout = _lowered_graph.getLowerInfo(op_seq_index)->layout();
+
+ if (frontend_layout == backend_layout)
+ {
+ return;
+ }
+
+ // CPU supports only NHWC now
+ if (_lowered_graph.getLowerInfo(op_seq_index)->backend()->config()->id() != "cpu")
+ {
+ // TODO Change backend of this node
+ assert(frontend_layout == Layout::NHWC || backend_layout == Layout::UNKNOWN);
+ }
+
+ // Divide op_seq based on target operation
+ {
+ auto &prev_op_seq = _lowered_graph.op_seqs().at(op_seq_index);
+
+ // Create new op_seq and move information from existing op_seq to new op_seq if target
+ // node is the end of op_seq
+ auto it = prev_op_seq.begin();
+ // Find iterator of target node in op_seq
+ while ((it++)->index != node_index)
+ ;
+ if (it != prev_op_seq.end())
+ {
+ const auto &next_op_seq_index =
+ _lowered_graph.op_seqs().emplace(it->index, *it->node, prev_op_seq.getLayout());
+ auto &next_op_seq = _lowered_graph.op_seqs().at(next_op_seq_index);
+ next_op_seq.setInputs(it->node->getInputs());
+ next_op_seq.setOutputs(it->node->getOutputs());
+
+ std::vector<OperationIndex> remove_list;
+ remove_list.emplace_back(it->index);
+ while (++it != prev_op_seq.end())
+ {
+ next_op_seq.appendOperation(it->index, *it->node);
+ next_op_seq.setOutputs(it->node->getOutputs());
+ remove_list.emplace_back(it->index);
+ }
+
+ prev_op_seq.setOutputs(node.getOutputs());
+ for (const auto &index : remove_list)
+ {
+ prev_op_seq.remove(index);
+ }
+
+ const auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
+ _lowered_graph.setLowerInfo(
+ next_op_seq_index,
+ std::make_unique<operation::LowerInfo>(op_seq_li->backend(), op_seq_li->layout()));
+ }
+ }
+
+ // Remove target operation from op_seq and insert the target operation to new op_seq
+ {
+ const auto backend = _lowered_graph.getLowerInfo(op_seq_index)->backend();
+
+ // Remove target operation from op_sequence
+ _lowered_graph.op_seqs().removeFromOpSequence(node_index);
+
+ if (!_lowered_graph.op_seqs().exist(op_seq_index))
+ {
+ // Remove lowerinfo for op_seq of target operation if the op_seq does not exist
+ _lowered_graph.removeLowerInfo(op_seq_index);
+ }
+ else
+ {
+ // Update op_seq of target operation if the op_seq exists
+ auto &prev_op_seq = _lowered_graph.op_seqs().at(op_seq_index);
+ const auto last_node = (--prev_op_seq.end())->node;
+ prev_op_seq.setOutputs(last_node->getOutputs());
+ }
+
+ // Create new op_seq and set information to the op_seq
+ auto new_op_seq_index = _lowered_graph.op_seqs().emplace(node_index, node, frontend_layout);
+ auto &new_op_seq = _lowered_graph.op_seqs().at(new_op_seq_index);
+ new_op_seq.setInputs(node.getInputs());
+ new_op_seq.setOutputs(node.getOutputs());
+ _lowered_graph.setLowerInfo(new_op_seq_index,
+ std::make_unique<operation::LowerInfo>(backend, frontend_layout));
+ }
+
+ // Change PermuteFactors of operands of target node
+ {
+ const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index);
+ const auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
+ const auto backend = op_seq_li->backend();
+ const operand::PermuteFactor removed_factor{backend, backend_layout};
+ const operand::PermuteFactor new_factor{backend, frontend_layout};
+ for (const auto &input : node.getInputs())
+ {
+ bool canRemove = true;
+ for (const auto &use : _graph.operands().at(input).getUses().list())
+ {
+ if (use != node_index)
+ {
+ const auto &use_op_seq_index = _lowered_graph.op_seqs().getOperation(use);
+ auto use_op_seq_li = _lowered_graph.getLowerInfo(use_op_seq_index);
+ if (use_op_seq_li->backend() == backend && use_op_seq_li->layout() == backend_layout)
+ {
+ canRemove = false;
+ break;
+ }
+ }
+ }
+
+ auto lower_info = _lowered_graph.getLowerInfo(input);
+ if (canRemove)
+ {
+ lower_info->removeUsePermuteFactor(removed_factor);
+ }
+ lower_info->addUsePermuteFactor(new_factor);
+
+ // Whether if node's input is an input of model or a constant
+ if (_graph.operands().at(input).getDef().size() == 0)
+ {
+ assert(_graph.getInputs().contains(input) || _graph.operands().at(input).isConstant());
+ lower_info->removeDefPermuteFactor(removed_factor);
+ lower_info->addDefPermuteFactor(new_factor);
+ }
+ }
+
+ for (const auto &output : node.getOutputs())
+ {
+ auto lower_info = _lowered_graph.getLowerInfo(output);
+ lower_info->removeDefPermuteFactor(removed_factor);
+ lower_info->addDefPermuteFactor(new_factor);
+
+ // Whether if node's output is an output of model
+ if (_graph.operands().at(output).getUses().size() == 0)
+ {
+ assert(_graph.getOutputs().contains(output));
+ lower_info->removeUsePermuteFactor(removed_factor);
+ lower_info->addUsePermuteFactor(new_factor);
+ }
+ }
+ }
+}
+
+void PermutationOperationPass::visit(const operation::FullyConnected &node)
+{
+ const auto &input_ind = node.getInputs().at(operation::FullyConnected::Input::INPUT);
+ const auto &input_obj = _graph.operands().at(input_ind);
+ const auto &input_shape = input_obj.shape();
+
+ if (input_shape.rank() == 4)
+ {
+ changeToKeepLayout(node);
+ }
+}
+
+void PermutationOperationPass::visit(const operation::Gather &node)
+{
+ const auto &input_ind = node.getInputs().at(operation::Gather::Input::INPUT);
+ const auto &input_obj = _graph.operands().at(input_ind);
+ const auto &input_shape = input_obj.shape();
+
+ const auto &output_ind = node.getOutputs().at(0);
+ const auto &output_obj = _graph.operands().at(output_ind);
+ const auto &output_shape = output_obj.shape();
+
+ if (input_shape.rank() >= 4 || output_shape.rank() >= 4)
+ {
+ changeToKeepLayout(node);
+ }
+}
+
+void PermutationOperationPass::visit(const operation::Reshape &node)
+{
+ const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT);
+ const auto &input_obj = _graph.operands().at(input_ind);
+ const auto &input_shape = input_obj.shape();
+
+ const auto &output_ind = node.getOutputs().at(0);
+ const auto &output_obj = _graph.operands().at(output_ind);
+ const auto &output_shape = output_obj.shape();
+
+ if (input_shape.rank() >= 4 || output_shape.rank() >= 4)
+ {
+ changeToKeepLayout(node);
+ }
+}
+
+} // namespace pass
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/PermutationOperationPass.h b/runtime/onert/core/src/ir/pass/PermutationOperationPass.h
new file mode 100644
index 000000000..e68065bfb
--- /dev/null
+++ b/runtime/onert/core/src/ir/pass/PermutationOperationPass.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
+#define __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
+
+#include "ir/OperationVisitor.h"
+#include "LoweredOperationPass.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace pass
+{
+
+class PermutationOperationPass : public LoweredOperationPass, public OperationVisitor
+{
+public:
+ using LoweredOperationPass::LoweredOperationPass;
+
+public:
+ std::string id() final { return "PermutationOperationPass"; }
+
+public:
+ void callback(const OperationIndex &i, Operation &n) final;
+
+public:
+ void visit(const operation::FullyConnected &) final;
+ void visit(const operation::Gather &) final;
+ void visit(const operation::Reshape &) final;
+
+private:
+ void changeToKeepLayout(const Operation &);
+};
+
+} // namespace pass
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
diff --git a/runtime/onert/core/src/ir/verifier/Verifier.cc b/runtime/onert/core/src/ir/verifier/Verifier.cc
new file mode 100644
index 000000000..c4655ee42
--- /dev/null
+++ b/runtime/onert/core/src/ir/verifier/Verifier.cc
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Verifier.h"
+
+#include "ir/Graph.h"
+#include "ir/OperationIndexMap.h"
+
+#include "util/logging.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace verifier
+{
+
+//
+// DAGChecker
+//
+
+bool DAGChecker::verify(const Graph &graph) const
+{
+ auto &operations = graph.operations();
+ bool cyclic = false;
+
+ OperationIndexMap<bool> visited;
+ operations.iterate(
+ [&](const OperationIndex &index, const Operation &) { visited[index] = false; });
+ OperationIndexMap<bool> on_stack = visited; // Copy from visited
+
+ std::function<void(const OperationIndex &index, const Operation &)> dfs_recursive =
+ [&](const OperationIndex &index, const Operation &node) -> void {
+ if (on_stack[index])
+ cyclic = true;
+ if (visited[index])
+ return;
+ visited[index] = true;
+ on_stack[index] = true;
+
+ for (auto output : node.getOutputs())
+ {
+ const auto &operand = graph.operands().at(output);
+ for (const auto &use : operand.getUses().list())
+ {
+ dfs_recursive(use, graph.operations().at(use));
+ }
+ }
+
+ on_stack[index] = false;
+ };
+
+ operations.iterate(dfs_recursive);
+
+ return !cyclic;
+}
+
+//
+// EdgeConsistencyVerifier
+//
+
+bool EdgeConsistencyChecker::verify(const Graph &graph) const
+{
+ auto &operations = graph.operations();
+ uint32_t mismatches = 0;
+ operations.iterate([&](const OperationIndex &index, const Operation &node) {
+ for (auto operand_index : node.getInputs())
+ {
+ auto &operand = graph.operands().at(operand_index);
+ mismatches += (operand.getUses().contains(index) ? 0 : 1);
+ }
+ for (auto operand_index : node.getOutputs())
+ {
+ auto &operand = graph.operands().at(operand_index);
+ mismatches += (operand.getDef().contains(index) ? 0 : 1);
+ }
+ });
+ return mismatches == 0;
+}
+
+} // namespace verifier
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/verifier/Verifier.h b/runtime/onert/core/src/ir/verifier/Verifier.h
new file mode 100644
index 000000000..0bc22bc47
--- /dev/null
+++ b/runtime/onert/core/src/ir/verifier/Verifier.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_GRAPH_VERIFIER_VERIFIER_H__
+#define __ONERT_GRAPH_VERIFIER_VERIFIER_H__
+
+namespace onert
+{
+namespace ir
+{
+class Graph;
+} // namespace ir
+} // namespace onert
+
+namespace onert
+{
+namespace ir
+{
+namespace verifier
+{
+
+struct IVerifier
+{
+ virtual ~IVerifier() = default;
+ virtual bool verify(const Graph &graph) const = 0;
+};
+
+} // namespace verifier
+} // namespace ir
+} // namespace onert
+
+namespace onert
+{
+namespace ir
+{
+namespace verifier
+{
+
+class DAGChecker : public IVerifier
+{
+public:
+ bool verify(const Graph &graph) const override;
+};
+
+class EdgeConsistencyChecker : public IVerifier
+{
+public:
+ bool verify(const Graph &graph) const override;
+};
+
+} // namespace verifier
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_GRAPH_VERIFIER_VERIFIER_H__
diff --git a/runtime/onert/core/src/library_info.cc b/runtime/onert/core/src/library_info.cc
new file mode 100644
index 000000000..6d7579cca
--- /dev/null
+++ b/runtime/onert/core/src/library_info.cc
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+volatile const char info[] = "library information : runtime=onert";
diff --git a/runtime/onert/core/src/util/ConfigSource.cc b/runtime/onert/core/src/util/ConfigSource.cc
new file mode 100644
index 000000000..45cce662e
--- /dev/null
+++ b/runtime/onert/core/src/util/ConfigSource.cc
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/ConfigSource.h"
+#include "util/GeneralConfigSource.h"
+#include "util/EnvConfigSource.h"
+
+#include <array>
+#include <algorithm>
+#include <cassert>
+
+#include <memory>
+
+namespace onert
+{
+namespace util
+{
+
+static std::unique_ptr<IConfigSource> _source;
+
+void config_source(std::unique_ptr<IConfigSource> &&source) { _source = std::move(source); }
+
+static IConfigSource *config_source()
+{
+ if (!_source)
+ {
+#ifdef ENVVAR_FOR_DEFAULT_CONFIG
+ // Default ConfigSource is EnvConfigSource
+ _source = std::make_unique<EnvConfigSource>();
+#else
+ _source = std::make_unique<GeneralConfigSource>();
+#endif // ENVVAR_FOR_DEFAULT_CONFIG
+ }
+ return _source.get();
+}
+
+static std::string getConfigOrDefault(const std::string &key)
+{
+ static std::unordered_map<std::string, std::string> defaults;
+ if (defaults.empty())
+ {
+#define CONFIG(Name, Type, Default) \
+ { \
+ auto name = std::string{#Name}; \
+ defaults.emplace(name, std::string{Default}); \
+ }
+
+#include "util/Config.lst"
+
+#undef CONFIG
+ }
+
+ // Treat empty string and absence of the value to be the same
+ auto ret = config_source()->get(key);
+ if (ret.empty())
+ {
+ auto itr = defaults.find(key);
+ if (itr != defaults.end())
+ {
+ // Return the default value if exists
+ ret = itr->second;
+ }
+ }
+
+ return ret;
+}
+
+bool toBool(const std::string &val)
+{
+ static const std::array<std::string, 5> false_list{"0", "OFF", "FALSE", "N", "NO"};
+ auto false_found = std::find(false_list.begin(), false_list.end(), val);
+ return false_found == false_list.end();
+}
+
+int toInt(const std::string &val) { return std::stoi(val); }
+
+bool getConfigBool(const std::string &key)
+{
+ auto raw = getConfigOrDefault(key);
+ return toBool(raw);
+}
+
+int getConfigInt(const std::string &key)
+{
+ auto raw = getConfigOrDefault(key);
+ return toInt(raw);
+}
+
+std::string getConfigString(const std::string &key) { return getConfigOrDefault(key); }
+
+} // namespace util
+} // namespace onert
+
+namespace onert
+{
+namespace util
+{
+namespace config
+{
+
+#define CONFIG(Name, Type, Default) const char *Name = #Name;
+
+#include "util/Config.lst"
+
+#undef CONFIG
+
+} // namespace config
+} // namespace util
+} // namespace onert
diff --git a/runtime/onert/core/src/util/EnvConfigSource.cc b/runtime/onert/core/src/util/EnvConfigSource.cc
new file mode 100644
index 000000000..0d25b7353
--- /dev/null
+++ b/runtime/onert/core/src/util/EnvConfigSource.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/EnvConfigSource.h"
+
+#include <cstdlib>
+
+namespace onert
+{
+namespace util
+{
+
+std::string EnvConfigSource::get(const std::string &key) const
+{
+ const char *value = std::getenv(key.c_str());
+ if (value != nullptr)
+ {
+ return value;
+ }
+ else
+ {
+ return GeneralConfigSource::get(key);
+ }
+}
+
+} // namespace util
+} // namespace onert
diff --git a/runtime/onert/core/src/util/EventCollectorGlobal.cc b/runtime/onert/core/src/util/EventCollectorGlobal.cc
new file mode 100644
index 000000000..ab8ddab60
--- /dev/null
+++ b/runtime/onert/core/src/util/EventCollectorGlobal.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/EventCollectorGlobal.h"
+
+#include <cassert>
+#include <fstream>
+
+#include "util/ConfigSource.h"
+
+namespace onert
+{
+namespace util
+{
+
+EventCollectorGlobal::EventCollectorGlobal() : _recorder{}, _collector{&_recorder}
+{
+ // DO NOTHING
+}
+
+EventCollectorGlobal::~EventCollectorGlobal()
+{
+ if (!_recorder.empty())
+ {
+ // TODO Need better way for saved file path than the hardcoded path
+ std::ofstream ofs{"trace.global.json"};
+ _recorder.writeToFile(ofs);
+ }
+}
+
+EventCollectorGlobal &EventCollectorGlobal::get()
+{
+ static EventCollectorGlobal instance;
+ return instance;
+}
+
+EventDurationBlock::EventDurationBlock(const std::string &tag) : _tag{tag}
+{
+ auto &glob = EventCollectorGlobal::get();
+ glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag});
+}
+EventDurationBlock::~EventDurationBlock()
+{
+ auto &glob = EventCollectorGlobal::get();
+ glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag});
+}
+
+EventDurationManual::EventDurationManual(const std::string &tag) : _tag{tag}, _pair{true} {}
+
+EventDurationManual::~EventDurationManual()
+{
+ // Check if it has called begin-end pair
+ assert(_pair);
+}
+
+void EventDurationManual::begin()
+{
+ _pair = false;
+ auto &glob = EventCollectorGlobal::get();
+ glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag});
+}
+
+void EventDurationManual::end()
+{
+ assert(!_pair);
+ _pair = true;
+ auto &glob = EventCollectorGlobal::get();
+ glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag});
+}
+
+} // namespace util
+} // namespace onert
diff --git a/runtime/onert/core/src/util/GeneralConfigSource.cc b/runtime/onert/core/src/util/GeneralConfigSource.cc
new file mode 100644
index 000000000..7d2757e58
--- /dev/null
+++ b/runtime/onert/core/src/util/GeneralConfigSource.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/GeneralConfigSource.h"
+#include "util/logging.h"
+
+namespace onert
+{
+namespace util
+{
+
+std::string GeneralConfigSource::get(const std::string &key) const
+{
+ auto itr = _map.find(key);
+ if (itr == _map.end())
+ {
+ return "";
+ }
+ else
+ {
+ return itr->second;
+ }
+}
+
+void GeneralConfigSource::set(const std::string &key, const std::string &val)
+{
+ VERBOSE(GeneralConfigSource) << key << " : " << val << std::endl;
+ _map[key] = val;
+}
+
+} // namespace util
+} // namespace onert
diff --git a/runtime/onert/core/src/util/ShapeInference.cc b/runtime/onert/core/src/util/ShapeInference.cc
new file mode 100644
index 000000000..ace16959d
--- /dev/null
+++ b/runtime/onert/core/src/util/ShapeInference.cc
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/Utils.h"
+#include "ir/InternalType.h"
+#include "ir/Shape.h"
+#include "ir/operation/AvgPool2D.h"
+#include "ir/operation/MaxPool2D.h"
+#include "util/ShapeInference.h"
+
+namespace onert
+{
+namespace shape_inference
+{
+
+//
+// Helper functions
+//
+
+namespace
+{
+
+template <typename T, typename U>
+typename std::enable_if<std::is_integral<T>::value && std::is_integral<U>::value,
+ typename std::common_type<T, U>::type>::type
+ceil_div(T dividend, U divisor)
+{
+ assert(dividend > 0 && divisor > 0 && "this implementations is for positive numbers only");
+ return (dividend + divisor - 1) / divisor;
+}
+
+// Calculate the result of broadcast of two shapes
+ir::Shape broadcastShapes(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape)
+{
+ ir::Shape out_shape;
+ auto max_rank = std::max(lhs_shape.rank(), rhs_shape.rank());
+
+ for (int idx = 0; idx < max_rank; ++idx)
+ {
+ // Go over operands dimensions from right to left
+ int lhs_idx = lhs_shape.rank() - idx - 1;
+ int rhs_idx = rhs_shape.rank() - idx - 1;
+
+ int32_t lhs_dim = lhs_idx >= 0 ? lhs_shape.dim(lhs_idx) : 1;
+ int32_t rhs_dim = rhs_idx >= 0 ? rhs_shape.dim(rhs_idx) : 1;
+
+ if (lhs_dim != 1 && rhs_dim != 1 && lhs_dim != rhs_dim)
+ throw std::runtime_error("Incompatible shapes for broadcast");
+
+ out_shape.prepend(std::max(lhs_dim, rhs_dim));
+ }
+
+ return out_shape;
+}
+
+// Calculate output height and width of convolution-like operation
+std::pair<int, int> calcConvLikeHeightAndWidth(const int in_h, const int in_w, const int ker_h,
+ const int ker_w, const ir::Padding pad,
+ const ir::Stride stride)
+{
+ int32_t out_h = 0, out_w = 0;
+
+ switch (pad.type)
+ {
+ case ir::PaddingType::SAME:
+ out_h = ceil_div(in_h, stride.vertical);
+ out_w = ceil_div(in_w, stride.horizontal);
+ break;
+ case ir::PaddingType::VALID:
+ out_h = ceil_div(in_h - ker_h + 1, stride.vertical);
+ out_w = ceil_div(in_w - ker_w + 1, stride.horizontal);
+ break;
+ case ir::PaddingType::EXPLICIT:
+ out_h = (in_h + pad.param.top + pad.param.bottom - ker_h) / stride.vertical + 1;
+ out_w = (in_w + pad.param.left + pad.param.right - ker_w) / stride.horizontal + 1;
+ break;
+ default:
+ assert(false);
+ }
+
+ return {out_h, out_w};
+}
+
+} // namespace
+
+//
+// Shape inference
+//
+
+Shapes inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape)
+{
+ return {broadcastShapes(lhs_shape, rhs_shape)};
+}
+
+Shapes inferAvgPoolShape(const ir::Shape &in_shape, const ir::operation::AvgPool2D::Param &param,
+ const ir::Layout layout)
+{
+ assert(layout == ir::Layout::NHWC);
+ auto ifm_shape = in_shape.asFeature(layout);
+ const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
+ param.padding, param.stride);
+ // Pooling don't change number of channels and batch size
+ return {ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C}};
+}
+
+Shapes inferConcatShape(const Shapes &in_shapes, const ir::operation::Concat::Param &param)
+{
+ const int32_t concat_axis = param.axis;
+ const auto &first_in_shape = in_shapes[0];
+
+ // Check that all shapes are equal except for concat axis dimension
+ for (const auto &in_shape : in_shapes)
+ {
+ assert(in_shape.rank() == first_in_shape.rank());
+ for (int64_t dim_idx = 0; dim_idx < in_shape.rank(); ++dim_idx)
+ assert(dim_idx == concat_axis || in_shape.dim(dim_idx) == first_in_shape.dim(dim_idx));
+ }
+
+ // Calculate output shape
+ ir::Shape out_shape(first_in_shape);
+ out_shape.dim(concat_axis) = 0;
+ for (const auto &in_shape : in_shapes)
+ out_shape.dim(concat_axis) += in_shape.dim(concat_axis);
+ return {out_shape};
+}
+
+Shapes inferMaxPoolShape(const ir::Shape &in_shape, const ir::operation::MaxPool2D::Param &param,
+ const ir::Layout layout)
+{
+ assert(layout == ir::Layout::NHWC);
+ auto ifm_shape = in_shape.asFeature(layout);
+ const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
+ param.padding, param.stride);
+ // Pooling don't change number of channels and batch size
+ return {ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C}};
+}
+
+Shapes inferConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape,
+ const ir::operation::Conv2D::Param &param, ir::Layout layout)
+{
+ assert(layout == ir::Layout::NHWC);
+ auto ifm_shape = in_shape.asFeature(layout);
+
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]
+ auto kf_shape = ker_shape.asFeature(layout);
+ assert(ifm_shape.C == kf_shape.C);
+
+ const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, kf_shape.H, kf_shape.W,
+ param.padding, param.stride);
+
+ return {ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, kf_shape.N}};
+}
+
+Shapes inferDepthwiseConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape,
+ const ir::operation::DepthwiseConv2D::Param &param,
+ ir::Layout layout)
+{
+ assert(layout == ir::Layout::NHWC);
+ auto ifm_shape = in_shape.asFeature(layout);
+
+ // Kernel format is [1, kernel_height, kernel_width, depth_out]
+ auto kf_shape = ker_shape.asFeature(layout);
+ assert(kf_shape.C == static_cast<int32_t>(ifm_shape.C * param.multiplier));
+ assert(kf_shape.N == 1);
+
+ const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, kf_shape.H, kf_shape.W,
+ param.padding, param.stride);
+
+ return {ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, kf_shape.C}};
+}
+
+Shapes inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape)
+{
+ assert(in_shape.rank() >= 2);
+ assert(ker_shape.rank() == 2);
+
+ const auto input_size_with_batch = in_shape.num_elements();
+ const auto num_units = ker_shape.dim(0);
+ const auto input_size = ker_shape.dim(1);
+ const auto batch_size = input_size_with_batch / input_size;
+ assert(input_size_with_batch % input_size == 0);
+
+ return {{ir::Shape({static_cast<int32_t>(batch_size), num_units})}};
+}
+
+/*
+ StaticInferer
+*/
+
+void StaticInferer::visit(const ir::operation::Reshape &op)
+{
+ const auto input_idx{op.getInputs().at(ir::operation::Reshape::Input::INPUT)};
+ const auto &input = _operands.at(input_idx);
+
+ // get mutable output operand
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = _operands.at(output_idx);
+
+ // if input is dynamic, output also becomes dynamic
+ if (input.info().memAllocType() == ir::MemAllocType::DYNAMIC)
+ {
+ output.info().memAllocType(ir::MemAllocType::DYNAMIC);
+ return;
+ }
+
+ if (op.getInputs().size() == 1)
+ {
+ // no change on output shape
+ return;
+ }
+
+ // Let's check the second input
+ const auto shape_idx{op.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
+ const auto &shape = _operands.at(shape_idx);
+
+ if (shape.isConstant())
+ {
+ // if shape is from Const, TFLC put the shape of output into tensor
+ // no change on output shape
+ return;
+ }
+
+ // if shape is NOT Const, set output shape to be dynamic_
+ output.info().memAllocType(ir::MemAllocType::DYNAMIC);
+}
+
+} // namespace shape_inference
+} // namespace onert
diff --git a/runtime/onert/core/src/util/logging.cc b/runtime/onert/core/src/util/logging.cc
new file mode 100644
index 000000000..6309d25e5
--- /dev/null
+++ b/runtime/onert/core/src/util/logging.cc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the License);
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/logging.h"
+
+onert::util::logging::Context &onert::util::logging::Context::get() noexcept
+{
+ static Context ctx;
+ return ctx;
+}
diff --git a/runtime/onert/frontend/CMakeLists.txt b/runtime/onert/frontend/CMakeLists.txt
new file mode 100644
index 000000000..5ea6cdadd
--- /dev/null
+++ b/runtime/onert/frontend/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectories()
diff --git a/runtime/onert/frontend/base_loader/CMakeLists.txt b/runtime/onert/frontend/base_loader/CMakeLists.txt
new file mode 100644
index 000000000..921206c31
--- /dev/null
+++ b/runtime/onert/frontend/base_loader/CMakeLists.txt
@@ -0,0 +1,7 @@
+if(NOT BUILD_TFLITE_LOADER AND NOT BUILD_CIRCLE_LOADER)
+ return()
+endif(NOT BUILD_TFLITE_LOADER AND NOT BUILD_CIRCLE_LOADER)
+
+add_library(base_loader INTERFACE)
+target_include_directories(base_loader INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include)
+target_link_libraries(base_loader INTERFACE onert_core)
diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h
new file mode 100644
index 000000000..f87c6ea77
--- /dev/null
+++ b/runtime/onert/frontend/base_loader/include/base_loader.h
@@ -0,0 +1,1362 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BASE_LOADER_BASE_LOADER_H__
+#define __BASE_LOADER_BASE_LOADER_H__
+
+#include "ir/Graph.h"
+#include "ir/Operations.Include.h"
+
+#include <map>
+#include <memory>
+#include <fstream>
+#include <limits>
+
+namespace onert
+{
+namespace base_loader
+{
+
+template <typename LoaderDomain, typename SpecificLoader> class BaseLoader
+{
+ using Verifier = typename LoaderDomain::Verifier;
+ using ActivationFunctionType = typename LoaderDomain::ActivationFunctionType;
+ using Buffer = typename LoaderDomain::Buffer;
+ using BuiltinOperator = typename LoaderDomain::BuiltinOperator;
+ using CustomOptionsFormat = typename LoaderDomain::CustomOptionsFormat;
+ using Model = typename LoaderDomain::Model;
+ using Operator = typename LoaderDomain::Operator;
+ using Padding = typename LoaderDomain::Padding;
+ using Pool2DOptions = typename LoaderDomain::Pool2DOptions;
+ using SubGraph = typename LoaderDomain::SubGraph;
+ using Tensor = typename LoaderDomain::Tensor;
+ using TensorType = typename LoaderDomain::TensorType;
+
+public:
+ /**
+ * @brief Construct a new Loader object
+ *
+ * @param graph reference on primary subgraph
+ */
+ explicit BaseLoader(std::unique_ptr<ir::Graph> &graph) : _primary_subgraph(graph), _model{nullptr}
+ {
+ }
+
+ /**
+ * @brief Load a model from file
+ *
+ * @param file_path
+ */
+ void loadFromFile(const char *file_path);
+
+protected:
+ ~BaseLoader() = default;
+
+ void loadModel();
+
+ // Helper functions
+ ir::Activation convertActivation(ActivationFunctionType type);
+ ir::DataType tensorTypeToDataType(TensorType type);
+
+ // Create operands form tflite::Tensor
+ ir::OperandIndex loadOperand(const Tensor *tensor, ir::Graph &subg);
+ void loadOperationIO(const Operator *op, ir::OperandIndexSequence &inputs,
+ ir::OperandIndexSequence &outputs);
+ // Create operations from Operator
+ void loadOperation(const Operator *op, ir::Graph &subg);
+ // Load Strides and Paddings from options to param
+ template <typename Param, typename OptionsType>
+ void loadStridesAndPaddings(Param &param, const OptionsType *options);
+ // Load Pool2D param
+ template <typename Param> void loadPool2D(Param &param, const Pool2DOptions *options);
+
+ // Operations
+ void loadConv2D(const Operator *op, ir::Graph &subg);
+ void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg);
+ void loadTransposeConv(const Operator *op, ir::Graph &subg);
+ void loadAvgPool2D(const Operator *op, ir::Graph &subg);
+ void loadReshape(const Operator *op, ir::Graph &subg);
+ void loadSoftmax(const Operator *op, ir::Graph &subg);
+ void loadMaxPool2D(const Operator *op, ir::Graph &subg);
+ void loadConcatenation(const Operator *op, ir::Graph &subg);
+ void loadInstanceNorm(const Operator *op, ir::Graph &subg);
+ void loadFC(const Operator *op, ir::Graph &subg);
+ void loadAdd(const Operator *op, ir::Graph &subg);
+ void loadSub(const Operator *op, ir::Graph &subg);
+ void loadMul(const Operator *op, ir::Graph &subg);
+ void loadDiv(const Operator *op, ir::Graph &subg);
+ void loadPack(const Operator *op, ir::Graph &subg);
+ void loadRelu(const Operator *op, ir::Graph &subg);
+ void loadRelu6(const Operator *op, ir::Graph &subg);
+ void loadResizeBilinear(const Operator *op, ir::Graph &subg);
+ void loadRsqrt(const Operator *op, ir::Graph &subg);
+ void loadSqrt(const Operator *op, ir::Graph &subg);
+ void loadSquaredDifference(const Operator *op, ir::Graph &subg);
+ void loadTanh(const Operator *op, ir::Graph &subg);
+ void loadTranspose(const Operator *op, ir::Graph &subg);
+ void loadMean(const Operator *op, ir::Graph &subg);
+ void loadReduceMax(const Operator *op, ir::Graph &subg);
+ void loadPad(const Operator *op, ir::Graph &subg);
+ void loadLogistic(const Operator *op, ir::Graph &subg);
+ void loadExp(const Operator *op, ir::Graph &subg);
+ void loadGather(const Operator *op, ir::Graph &subg);
+ void loadCustom(const Operator *op, ir::Graph &subg);
+ void loadSpaceToBatchND(const Operator *op, ir::Graph &subg);
+ void loadBatchToSpaceND(const Operator *op, ir::Graph &subg);
+ void loadReduceSum(const Operator *op, ir::Graph &subg);
+ void loadSqueeze(const Operator *op, ir::Graph &subg);
+ void loadPrelu(const Operator *op, ir::Graph &subg);
+ void loadSplit(const Operator *op, ir::Graph &subg);
+ void loadSlice(const Operator *op, ir::Graph &subg);
+ void loadStridedSlice(const Operator *op, ir::Graph &subg);
+ void loadUnpack(const Operator *op, ir::Graph &subg);
+ void loadMinimum(const Operator *op, ir::Graph &subg);
+ void loadMaximum(const Operator *op, ir::Graph &subg);
+ void loadCast(const Operator *op, ir::Graph &subg);
+ void loadComparison(const Operator *op, ir::Graph &subg);
+ void loadOneHot(const Operator *op, ir::Graph &subg);
+ void loadAbs(const Operator *op, ir::Graph &subg);
+ void loadSin(const Operator *op, ir::Graph &subg);
+ void loadShape(const Operator *op, ir::Graph &subg);
+
+protected:
+ // Buffer for loading (if needed)
+ std::vector<char> _buffer;
+ // Reference on loadable primary subgraph
+ std::unique_ptr<ir::Graph> &_primary_subgraph;
+ const Model *_model;
+ // Maps Tensor indices to onert Operands.
+ std::vector<ir::OperandIndex> _tensor_to_operand;
+ // Verifier
+ std::unique_ptr<Verifier> _verifier;
+};
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::loadFromFile(const char *file_path)
+{
+ std::ifstream stream(file_path, std::fstream::in | std::fstream::binary);
+
+ if (!stream)
+ {
+ std::string msg = "Failed to open file `";
+ msg += file_path;
+ msg += "`";
+ throw std::runtime_error{msg};
+ }
+
+ stream.seekg(0, stream.end);
+ auto size = stream.tellg();
+ stream.seekg(0, stream.beg);
+
+ _buffer.resize(size);
+ stream.read(_buffer.data(), size);
+
+ stream.close();
+
+ // Prepare verifier
+ _verifier = std::make_unique<Verifier>(reinterpret_cast<const std::uint8_t *>(_buffer.data()),
+ _buffer.size());
+
+ loadModel();
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+ir::Activation BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::convertActivation(
+ const ActivationFunctionType type)
+{
+ switch (type)
+ {
+ case ActivationFunctionType::ActivationFunctionType_NONE:
+ return ir::Activation::NONE;
+ case ActivationFunctionType::ActivationFunctionType_RELU:
+ return ir::Activation::RELU;
+ case ActivationFunctionType::ActivationFunctionType_RELU_N1_TO_1:
+ return ir::Activation::RELU1;
+ case ActivationFunctionType::ActivationFunctionType_RELU6:
+ return ir::Activation::RELU6;
+ case ActivationFunctionType::ActivationFunctionType_TANH:
+ return ir::Activation::TANH;
+ default:
+ throw std::runtime_error(std::string("Unsupported activation type: ")
+ .append(EnumNameActivationFunctionType(type)));
+ }
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+ir::DataType
+BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::tensorTypeToDataType(const TensorType type)
+{
+ switch (type)
+ {
+ case TensorType::TensorType_FLOAT32:
+ return ir::DataType::FLOAT32;
+ case TensorType::TensorType_INT32:
+ return ir::DataType::INT32;
+ case TensorType::TensorType_BOOL:
+ return ir::DataType::BOOL8;
+ case TensorType::TensorType_UINT8:
+ return ir::DataType::QUANT8_ASYMM;
+ default:
+ throw std::runtime_error(
+ std::string("Unsupported tensor type: ").append(EnumNameTensorType(type)));
+ }
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Tensor *tensor,
+ ir::Graph &subg)
+{
+ ir::Shape shape;
+ // Shape
+ const auto *tensor_shape = tensor->shape();
+ if (tensor_shape != nullptr)
+ {
+ for (const auto &dim : *tensor_shape)
+ {
+ shape.append(dim);
+ }
+ }
+ // Type
+ ir::DataType data_type = tensorTypeToDataType(tensor->type());
+ // Quantization
+ auto q_params = tensor->quantization();
+ float scale = 0.0;
+ long zero_point = 0;
+ if (q_params != nullptr)
+ {
+ if (q_params->scale())
+ {
+ if (q_params->scale()->size() != 1)
+ {
+ throw std::runtime_error("Only 1 scale for a tensor is supported.");
+ }
+ scale = q_params->scale()->Get(0);
+ }
+
+ if (q_params->zero_point())
+ {
+ if (q_params->zero_point()->size() != 1)
+ {
+ throw std::runtime_error("Only 1 zero_point value for a tensor is supported.");
+ }
+ zero_point = q_params->zero_point()->Get(0);
+ // zero_point is long while TypeInfo.zero_point is defined as int32_t.
+ assert(zero_point >= std::numeric_limits<int32_t>::min());
+ assert(zero_point <= std::numeric_limits<int32_t>::max());
+ }
+ auto details = q_params->details_as_CustomQuantization();
+ if (details != nullptr)
+ throw std::runtime_error("Custom Quantization is not supported");
+ }
+ // Create TypeInfo
+ ir::TypeInfo type_info(data_type, scale, zero_point);
+ // Create operand
+ const auto operand_index = subg.addOperand(shape, type_info);
+
+ // Constant tensors are indicated by non-empty data.
+ const auto *data = _model->buffers()->Get(tensor->buffer())->data();
+ if (data != nullptr)
+ {
+ auto ptr = std::make_unique<ir::CachedData>(data->data(), data->size());
+ subg.setOperandValue(operand_index, std::move(ptr));
+ }
+
+ // Name unused
+ // auto name = tensor->name();
+ // Variablie
+ if (tensor->is_variable())
+ throw std::runtime_error("Variable tensor not supported!");
+
+ return operand_index;
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadOperationIO(const Operator *op,
+ ir::OperandIndexSequence &inputs,
+ ir::OperandIndexSequence &outputs)
+{
+ for (const std::int32_t idx : *op->inputs())
+ {
+ inputs.append(_tensor_to_operand[idx]);
+ }
+
+ for (const std::int32_t idx : *op->outputs())
+ {
+ outputs.append(_tensor_to_operand[idx]);
+ }
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+template <typename Param, typename OptionsType>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadStridesAndPaddings(Param &param,
+ const OptionsType *options)
+{
+ // Strides
+ param.stride.vertical = options->stride_w();
+ param.stride.horizontal = options->stride_h();
+ // Paddings
+ if (options->padding() == Padding::Padding_SAME)
+ param.padding.type = ir::PaddingType::SAME;
+ if (options->padding() == Padding::Padding_VALID)
+ param.padding.type = ir::PaddingType::VALID;
+ // param paddings indexes unused
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+template <typename Param>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2D(Param &param,
+ const Pool2DOptions *options)
+{
+ // Strides and Paddings
+ loadStridesAndPaddings(param, options);
+ // Filter width and height
+ // Strides
+ param.kw = options->filter_width();
+ param.kh = options->filter_height();
+ // Activation
+ param.activation = convertActivation(options->fused_activation_function());
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadConv2D(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::Conv2D::Param param;
+ const auto *options = op->builtin_options_as_Conv2DOptions();
+ param.activation = convertActivation(options->fused_activation_function());
+ loadStridesAndPaddings(param, options);
+ // Dilation h/w factor unused
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Conv2D(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadDepthwiseConv2D(const Operator *op,
+ ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::DepthwiseConv2D::Param param;
+ const auto *options = op->builtin_options_as_DepthwiseConv2DOptions();
+ param.activation = convertActivation(options->fused_activation_function());
+ loadStridesAndPaddings(param, options);
+ // Multiplier
+ param.multiplier = options->depth_multiplier();
+ // Dilation h/w factor unused
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::DepthwiseConv2D(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadTransposeConv(const Operator *op,
+ ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::TransposeConv::Param param;
+ const auto *options = op->builtin_options_as_TransposeConvOptions();
+ loadStridesAndPaddings(param, options);
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::TransposeConv(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadAvgPool2D(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::AvgPool2D::Param param;
+ const auto *options = op->builtin_options_as_Pool2DOptions();
+
+ loadPool2D(param, options);
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::AvgPool2D(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadReshape(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ // const auto *options = op->builtin_options_as_ReshapeOptions();
+ // No params
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Reshape(inputs, outputs));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadSoftmax(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::Softmax::Param param;
+ const auto *options = op->builtin_options_as_SoftmaxOptions();
+ // Beta
+ param.beta = options->beta();
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Softmax(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadMaxPool2D(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::MaxPool2D::Param param;
+ const auto *options = op->builtin_options_as_Pool2DOptions();
+
+ loadPool2D(param, options);
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::MaxPool2D(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadConcatenation(const Operator *op,
+ ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::Concat::Param param;
+ const auto *options = op->builtin_options_as_ConcatenationOptions();
+ // Axis
+ param.axis = options->axis();
+ param.rank = subg.operands().at(outputs.at(0)).shape().rank();
+ // activation unused
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Concat(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadInstanceNorm(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::InstanceNorm::Param param;
+ const auto *options = op->builtin_options_as_InstanceNormOptions();
+
+ param.activation = convertActivation(options->fused_activation_function());
+ // Use default value 1e-5 if value of epsilon is zero
+ param.epsilon = options->epsilon() == 0.f ? 1e-5 : options->epsilon();
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::InstanceNorm(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadFC(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ const auto &input_operand = subg.operands().at(inputs.at(ir::operation::FullyConnected::INPUT));
+ auto &weights_operand = subg.operands().at(inputs.at(ir::operation::FullyConnected::WEIGHT));
+ if (input_operand.typeInfo().type() == ir::DataType::FLOAT32 &&
+ weights_operand.typeInfo().type() == ir::DataType::QUANT8_ASYMM)
+ {
+ weights_operand.type(ir::DataType::QUANT8_SYMM);
+ }
+
+ ir::operation::FullyConnected::Param param;
+ const auto *options = op->builtin_options_as_FullyConnectedOptions();
+
+ param.activation = convertActivation(options->fused_activation_function());
+ // weights_format unused
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::FullyConnected(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadAdd(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::Add::Param param;
+ const auto *options = op->builtin_options_as_AddOptions();
+
+ param.activation = convertActivation(options->fused_activation_function());
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Add(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadSub(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::Sub::Param param;
+ const auto *options = op->builtin_options_as_SubOptions();
+
+ param.activation = convertActivation(options->fused_activation_function());
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Sub(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadMul(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::Mul::Param param;
+ const auto *options = op->builtin_options_as_MulOptions();
+
+ param.activation = convertActivation(options->fused_activation_function());
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Mul(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadDiv(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::Div::Param param;
+ const auto *options = op->builtin_options_as_DivOptions();
+
+ param.activation = convertActivation(options->fused_activation_function());
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Div(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadPack(const Operator *op, ir::Graph &subg)
+{
+ // This runtime_error will be removed if the one of backend supports this operation
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::Pack::Param param;
+ const auto *options = op->builtin_options_as_PackOptions();
+ param.num = options->values_count();
+ param.axis = options->axis();
+ param.rank = subg.operands().at(outputs.at(0)).shape().rank();
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Pack(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadRelu(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::ReLU(inputs, outputs));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadRelu6(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::ReLU6(inputs, outputs));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadResizeBilinear(const Operator *op,
+ ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+ auto input = inputs.at(0);
+ auto size = inputs.at(1);
+
+ // FIXME Handle ResizeBilinearOptions.
+ if (!subg.operands().at(size).isConstant())
+ throw std::runtime_error("ResizeBilinear: non-constant 'size' is not supported.");
+
+ std::vector<std::int32_t> size_v = subg.operands().at(size).template asVector<std::int32_t>();
+
+ ir::operation::ResizeBilinear::Param param;
+ param.height_out = size_v[0];
+ param.width_out = size_v[1];
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::ResizeBilinear({input}, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadRsqrt(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::RSQRT(inputs, outputs));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadSqrt(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::SQRT(inputs, outputs));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadSquaredDifference(const Operator *op,
+ ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::SquaredDifference(inputs, outputs));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadTanh(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Tanh(inputs, outputs));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadTranspose(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+ auto input = inputs.at(0);
+ auto perm = inputs.at(1);
+
+ if (!subg.operands().at(perm).isConstant())
+ throw std::runtime_error("Transpose: non-constant 'perm' is not supported.");
+
+ ir::operation::Transpose::Param param;
+ param.perm = subg.operands().at(perm).template asVector<int>();
+ param.rank = subg.operands().at(inputs.at(0)).shape().rank();
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Transpose({input}, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadMean(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+ auto input = inputs.at(0);
+ auto axes = inputs.at(1);
+
+ if (!subg.operands().at(axes).isConstant())
+ throw std::runtime_error("Mean: non-constant 'axes' is not supported.");
+
+ ir::operation::Mean::Param param;
+ param.axes = subg.operands().at(axes).template asVector<int>();
+ param.keep_dims = op->builtin_options_as_ReducerOptions()->keep_dims();
+ param.rank = subg.operands().at(inputs.at(0)).shape().rank();
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Mean({input}, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadReduceMax(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+ auto input = inputs.at(0);
+ auto axes = inputs.at(1);
+
+ // FIXME Handle ReducerOptions.
+ if (!subg.operands().at(axes).isConstant())
+ throw std::runtime_error("ReduceSum: non-constant 'axes' is not supported.");
+
+ ir::operation::ReduceMax::Param param;
+ param.axes = subg.operands().at(axes).template asVector<int>();
+ param.keep_dims = op->builtin_options_as_ReducerOptions()->keep_dims();
+ param.rank = subg.operands().at(inputs.at(0)).shape().rank();
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::ReduceMax({input}, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadPad(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::Pad::Param param;
+ param.rank = subg.operands().at(inputs.at(0)).shape().rank();
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Pad(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadLogistic(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Logistic(inputs, outputs));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadExp(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Exp(inputs, outputs));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadGather(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+ ir::operation::Gather::Param param;
+ param.axis = op->builtin_options_as_GatherOptions()->axis();
+ param.rank = subg.operands().at(inputs.at(0)).shape().rank();
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Gather(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadSpaceToBatchND(const Operator *op,
+ ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ std::unique_ptr<ir::Operation> new_op{new ir::operation::SpaceToBatchND{inputs, outputs}};
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadBatchToSpaceND(const Operator *op,
+ ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+ auto input = inputs.at(0);
+ auto block_shape = inputs.at(1);
+ auto crops = inputs.at(2);
+
+ if (!subg.operands().at(crops).isConstant())
+ throw std::runtime_error("BatchToSpaceND: non-constant 'crops' is not supported.");
+
+ std::vector<std::int32_t> crops_v = subg.operands().at(crops).template asVector<std::int32_t>();
+ assert(crops_v.size() == 4);
+ if (crops_v != std::vector<std::int32_t>{0, 0, 0, 0})
+ throw std::runtime_error("BatchToSpaceND: 'crops' other than {0, 0, 0, 0} is not supported.");
+
+ std::unique_ptr<ir::Operation> new_op{
+ new ir::operation::BatchToSpaceND{{input, block_shape}, outputs}};
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadReduceSum(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+ auto input = inputs.at(0);
+ auto axes = inputs.at(1);
+
+ // FIXME Handle ReducerOptions.
+ if (!subg.operands().at(axes).isConstant())
+ throw std::runtime_error("ReduceSum: non-constant 'axes' is not supported.");
+
+ ir::operation::ReduceSum::Param param;
+ param.axes = subg.operands().at(axes).template asVector<int>();
+ param.keep_dims = op->builtin_options_as_ReducerOptions()->keep_dims();
+ param.rank = subg.operands().at(inputs.at(0)).shape().rank();
+
+ std::unique_ptr<ir::Operation> new_op{new ir::operation::ReduceSum{{input}, outputs, param}};
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ auto *op_code = _model->operator_codes()->Get(op->opcode_index());
+ auto custom_op_id = op_code->custom_code()->str();
+
+ auto constraint = ir::OperandConstraint::createExact(inputs.size());
+
+ assert(op->custom_options_format() == CustomOptionsFormat::CustomOptionsFormat_FLEXBUFFERS &&
+ "Unsupported custom operation options format");
+
+ size_t custom_op_data_size = op->custom_options()->size();
+ auto custom_op_data = new char[custom_op_data_size];
+ std::copy(op->custom_options()->begin(), op->custom_options()->end(), custom_op_data);
+
+ ir::operation::Custom::Userdata userdata{};
+ userdata.data = custom_op_data;
+ userdata.size = custom_op_data_size;
+
+ auto new_op =
+ std::make_unique<ir::operation::Custom>(constraint, inputs, outputs, custom_op_id, userdata);
+
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadSqueeze(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::Squeeze::Param param{};
+ const auto *options = op->builtin_options_as_SqueezeOptions();
+ const auto *dims = options->squeeze_dims();
+ if (dims)
+ {
+ if (dims->Length() > sizeof(param.dims) / sizeof(param.dims[0]))
+ throw std::runtime_error("Squeeze: 'param.ndims' is out of range.");
+ param.ndim = dims->Length();
+ for (int i = 0; i < param.ndim; ++i)
+ param.dims[i] = dims->Get(i);
+ }
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Squeeze(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadPrelu(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::PReLU(inputs, outputs));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadSplit(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+ // Notice : input order is strange for tflite split
+ auto input = inputs.at(1);
+ auto axis = inputs.at(0);
+
+ // FIXME Handle SplitOptions.
+ if (!subg.operands().at(axis).isConstant())
+ throw std::runtime_error("Split: non-constant 'axis' is not supported.");
+
+ ir::operation::Split::Param param{};
+ param.axis = subg.operands().at(axis).template asScalar<int>();
+ const auto *options = op->builtin_options_as_SplitOptions();
+ param.num_splits = options->num_splits();
+ param.rank = subg.operands().at(input).shape().rank();
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Split({input}, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadSlice(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::Slice::Param param;
+ param.rank = subg.operands().at(inputs.at(0)).shape().rank();
+
+ std::unique_ptr<ir::Operation> new_op{new ir::operation::Slice{inputs, outputs, param}};
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadStridedSlice(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::StridedSlice::Param param;
+
+ const auto *options = op->builtin_options_as_StridedSliceOptions();
+ param.begin_mask = options->begin_mask();
+ param.end_mask = options->end_mask();
+ param.shrink_axis_mask = options->shrink_axis_mask();
+ param.rank = subg.operands().at(inputs.at(0)).shape().rank();
+
+ std::unique_ptr<ir::Operation> new_op{new ir::operation::StridedSlice{inputs, outputs, param}};
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadUnpack(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::Unpack::Param param;
+ const auto *options = op->builtin_options_as_UnpackOptions();
+ param.num = options->num();
+ param.axis = options->axis();
+ param.rank = subg.operands().at(inputs.at(0)).shape().rank();
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Unpack(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadMinimum(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Min(inputs, outputs));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadMaximum(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Max(inputs, outputs));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadCast(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ auto qasymm8ToUint8 = [](ir::Operand &operand) {
+ if (operand.typeInfo().type() == ir::DataType::QUANT8_ASYMM)
+ {
+ operand.type(ir::DataType::UINT8);
+ }
+ };
+ qasymm8ToUint8(subg.operands().at(inputs.at(ir::operation::Cast::Input::INPUT)));
+ qasymm8ToUint8(subg.operands().at(outputs.at(0)));
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Cast(inputs, outputs));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadComparison(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::Comparison::Param param;
+
+ const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+
+ switch (builtin_op)
+ {
+ case BuiltinOperator::BuiltinOperator_EQUAL:
+ param.comparison_type = ir::operation::Comparison::ComparisonType::Equal;
+ break;
+ case BuiltinOperator::BuiltinOperator_NOT_EQUAL:
+ param.comparison_type = ir::operation::Comparison::ComparisonType::NotEqual;
+ break;
+ case BuiltinOperator::BuiltinOperator_GREATER_EQUAL:
+ param.comparison_type = ir::operation::Comparison::ComparisonType::GreaterEqual;
+ break;
+ case BuiltinOperator::BuiltinOperator_GREATER:
+ param.comparison_type = ir::operation::Comparison::ComparisonType::Greater;
+ break;
+ case BuiltinOperator::BuiltinOperator_LESS_EQUAL:
+ param.comparison_type = ir::operation::Comparison::ComparisonType::LessEqual;
+ break;
+ case BuiltinOperator::BuiltinOperator_LESS:
+ param.comparison_type = ir::operation::Comparison::ComparisonType::Less;
+ break;
+ default:
+ throw std::runtime_error(
+ std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
+ }
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Comparison(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadOneHot(const Operator *op, ir::Graph &subg)
+{
+ if (op->inputs()->size() != 4 || op->outputs()->size() != 1)
+ throw std::runtime_error("OneHot Op has wrong number of input or output tensors.");
+
+ enum
+ {
+ INDICES = 0,
+ DEPTH = 1,
+ ON_VALUE = 2,
+ OFF_VALUE = 3,
+ };
+
+ // Set input and output tensors
+ ir::OperandIndexSequence inputs, outputs;
+ inputs.append(_tensor_to_operand[op->inputs()->Get(INDICES)]);
+ outputs.append(_tensor_to_operand[op->outputs()->Get(0)]);
+
+ // Set parameters
+ // depth, on_value and off_value are scalar though it is passed as inputs
+ auto depth_opidx = _tensor_to_operand[op->inputs()->Get(DEPTH)];
+ auto on_value_opidx = _tensor_to_operand[op->inputs()->Get(ON_VALUE)];
+ auto off_value_opidx = _tensor_to_operand[op->inputs()->Get(OFF_VALUE)];
+ const auto depth = subg.operands().at(depth_opidx).template asScalar<int>();
+ const auto on_value = subg.operands().at(on_value_opidx).template asScalar<float>();
+ const auto off_value = subg.operands().at(off_value_opidx).template asScalar<float>();
+ const auto axis = op->builtin_options_as_OneHotOptions()->axis();
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::OneHot(inputs, outputs, {depth, on_value, off_value, axis}));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadAbs(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Abs(inputs, outputs));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadSin(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Sin(inputs, outputs));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadShape(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ // ir::operation::Shape::Param param;
+ // const auto *options = op->builtin_options_as_ShapeOptions();
+ // param.out_type = tensorTypeToDataType(options->out_type());
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Shape(inputs, outputs /*, param*/));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, ir::Graph &subg)
+{
+ const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+
+ switch (builtin_op)
+ {
+ case BuiltinOperator::BuiltinOperator_CONV_2D:
+ loadConv2D(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_AVERAGE_POOL_2D:
+ loadAvgPool2D(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_DEPTHWISE_CONV_2D:
+ loadDepthwiseConv2D(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_TRANSPOSE_CONV:
+ loadTransposeConv(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_RESHAPE:
+ loadReshape(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_SOFTMAX:
+ loadSoftmax(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_MAX_POOL_2D:
+ loadMaxPool2D(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_CONCATENATION:
+ loadConcatenation(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_FULLY_CONNECTED:
+ loadFC(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_ADD:
+ loadAdd(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_SUB:
+ loadSub(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_MUL:
+ loadMul(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_DIV:
+ loadDiv(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_PACK:
+ loadPack(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_RELU:
+ loadRelu(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_RELU6:
+ loadRelu6(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_RESIZE_BILINEAR:
+ loadResizeBilinear(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_RSQRT:
+ loadRsqrt(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_SQRT:
+ loadSqrt(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_SQUARED_DIFFERENCE:
+ loadSquaredDifference(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_TANH:
+ loadTanh(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_TRANSPOSE:
+ loadTranspose(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_MEAN:
+ loadMean(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_REDUCE_MAX:
+ loadReduceMax(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_PAD:
+ loadPad(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_LOGISTIC:
+ loadLogistic(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_EXP:
+ loadExp(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_GATHER:
+ loadGather(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_SPACE_TO_BATCH_ND:
+ loadSpaceToBatchND(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_BATCH_TO_SPACE_ND:
+ loadBatchToSpaceND(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_SUM:
+ loadReduceSum(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_CUSTOM:
+ loadCustom(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_SQUEEZE:
+ loadSqueeze(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_PRELU:
+ loadPrelu(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_SPLIT:
+ loadSplit(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_SLICE:
+ loadSlice(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_STRIDED_SLICE:
+ loadStridedSlice(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_UNPACK:
+ loadUnpack(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_MINIMUM:
+ loadMinimum(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_MAXIMUM:
+ loadMaximum(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_CAST:
+ loadCast(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_EQUAL:
+ case BuiltinOperator::BuiltinOperator_NOT_EQUAL:
+ case BuiltinOperator::BuiltinOperator_GREATER_EQUAL:
+ case BuiltinOperator::BuiltinOperator_GREATER:
+ case BuiltinOperator::BuiltinOperator_LESS_EQUAL:
+ case BuiltinOperator::BuiltinOperator_LESS:
+ loadComparison(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_ONE_HOT:
+ loadOneHot(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_ABS:
+ loadAbs(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_SIN:
+ loadSin(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_SHAPE:
+ loadShape(op, subg);
+ return;
+ // TODO Implement loading subgraphs of conftrol flow ops
+ default:
+ throw std::runtime_error(
+ std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
+ }
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadModel()
+{
+ LoaderDomain::VerifyModelBuffer(*_verifier.get());
+ _model = LoaderDomain::GetModel(_buffer.data());
+ // Version unused
+ // const auto version = _model->version();
+ // Description unused
+ // const auto *description = _model->description();
+ // Metabuffer unsued
+ // const auto *metadata_buffer = _model->metadata_buffer();
+ // Load subgraphs recursively from primary subgraph and map operations on subgraph
+ const auto domain_subgraph = (*_model->subgraphs())[0];
+ _primary_subgraph = static_cast<SpecificLoader *>(this)->loadSubgraph(domain_subgraph);
+}
+
+} // namespace base_loader
+} // namespace onert
+
+#endif //__BASE_LOADER_BASE_LOADER_H__
diff --git a/runtime/onert/frontend/circle/CMakeLists.txt b/runtime/onert/frontend/circle/CMakeLists.txt
new file mode 100644
index 000000000..b446e694a
--- /dev/null
+++ b/runtime/onert/frontend/circle/CMakeLists.txt
@@ -0,0 +1,17 @@
+if (NOT BUILD_CIRCLE_LOADER)
+ return()
+endif ()
+
+nnfw_find_package(FlatBuffersSource REQUIRED)
+
+set(CIRCLE_LOADER_SOURCES src/circle_loader.cc)
+
+add_library(circle_loader SHARED ${CIRCLE_LOADER_SOURCES})
+
+target_include_directories(circle_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+target_include_directories(circle_loader PRIVATE ${FlatBuffersSource_DIR}/include)
+
+target_link_libraries(circle_loader PUBLIC onert_core)
+target_link_libraries(circle_loader PRIVATE base_loader nnfw_common nnfw_coverage)
+
+install(TARGETS circle_loader DESTINATION lib)
diff --git a/runtime/onert/frontend/circle/include/circle_loader.h b/runtime/onert/frontend/circle/include/circle_loader.h
new file mode 100644
index 000000000..718bc0b65
--- /dev/null
+++ b/runtime/onert/frontend/circle/include/circle_loader.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_CIRCLE_LOADER_H__
+#define __CIRCLE_CIRCLE_LOADER_H__
+
+#include "ir/Graph.h"
+
+#include <memory>
+
+namespace onert
+{
+namespace circle_loader
+{
+std::unique_ptr<ir::Graph> loadModel(const char *filename);
+} // namespace circle_loader
+} // namespace onert
+
+#endif // __CIRCLE_CIRCLE_LOADER_H__
diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc
new file mode 100644
index 000000000..49aaccc4c
--- /dev/null
+++ b/runtime/onert/frontend/circle/src/circle_loader.cc
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "circle_loader.h"
+#include "base_loader.h"
+#include "circle_schema_generated.h"
+
+namespace onert
+{
+namespace circle_loader
+{
+
+namespace
+{
+
+ir::Layout convertDataFormat(circle::DataFormat data_format)
+{
+ switch (data_format)
+ {
+ case circle::DataFormat::DataFormat_CHANNELS_FIRST:
+ return ir::Layout::NCHW;
+ case circle::DataFormat::DataFormat_CHANNELS_LAST:
+ return ir::Layout::NHWC;
+ default:
+ throw std::runtime_error("Unsupported DataFormat");
+ }
+}
+
+struct LoaderDomain
+{
+ using Verifier = flatbuffers::Verifier;
+ using ActivationFunctionType = circle::ActivationFunctionType;
+ using Buffer = circle::Buffer;
+ using BuiltinOperator = circle::BuiltinOperator;
+ using CustomOptionsFormat = circle::CustomOptionsFormat;
+ using Model = circle::Model;
+ using Operator = circle::Operator;
+ using Padding = circle::Padding;
+ using Pool2DOptions = circle::Pool2DOptions;
+ using Tensor = circle::Tensor;
+ using TensorType = circle::TensorType;
+ using SubGraph = circle::SubGraph;
+
+ static const char *EnumNameBuiltinOperator(BuiltinOperator e)
+ {
+ return circle::EnumNameBuiltinOperator(e);
+ }
+ static const char *EnumNameActivationFunctionType(ActivationFunctionType e)
+ {
+ return circle::EnumNameActivationFunctionType(e);
+ }
+ static const char *EnumNameTensorType(TensorType e) { return circle::EnumNameTensorType(e); }
+ static const Model *GetModel(const void *buf) { return circle::GetModel(buf); }
+ static bool VerifyModelBuffer(Verifier &verifier) { return circle::VerifyModelBuffer(verifier); }
+};
+
+class CircleLoader final : public base_loader::BaseLoader<LoaderDomain, CircleLoader>
+{
+public:
+ using BaseLoader::BaseLoader;
+
+ std::unique_ptr<ir::Graph> loadSubgraph(const circle::SubGraph *circle_subg)
+ {
+ auto subg = std::make_unique<ir::Graph>();
+ // Load tensors
+ _tensor_to_operand.resize(circle_subg->tensors()->size());
+ for (flatbuffers::uoffset_t i = 0; i < circle_subg->tensors()->size(); ++i)
+ {
+ _tensor_to_operand[i] = loadOperand(circle_subg->tensors()->Get(i), *subg);
+ }
+ // Set inputs
+ for (const std::int32_t input_ind : *circle_subg->inputs())
+ {
+ subg->addInput(_tensor_to_operand[input_ind]);
+ }
+ // Set outputs
+ for (const std::int32_t output_ind : *circle_subg->outputs())
+ {
+ subg->addOutput(_tensor_to_operand[output_ind]);
+ }
+ // Create operations
+ for (const auto *op : *circle_subg->operators())
+ {
+ CircleLoader::loadOperation(op, *subg);
+ }
+
+ subg->setLayout(convertDataFormat(circle_subg->data_format()));
+
+ subg->finishBuilding();
+
+ return subg;
+ }
+
+ void loadOperation(const circle::Operator *op, ir::Graph &subg)
+ {
+ const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+
+ switch (builtin_op)
+ {
+ case circle::BuiltinOperator::BuiltinOperator_INSTANCE_NORM:
+ loadInstanceNorm(op, subg);
+ return;
+ default:
+ BaseLoader::loadOperation(op, subg);
+ return;
+ }
+ }
+};
+
+} // namespace
+
+std::unique_ptr<ir::Graph> loadModel(const char *filename)
+{
+ auto primary_subgraph = std::make_unique<ir::Graph>();
+ CircleLoader loader(primary_subgraph);
+ loader.loadFromFile(filename);
+ return primary_subgraph;
+}
+
+} // namespace circle_loader
+} // namespace onert
diff --git a/runtime/onert/frontend/circle/src/circle_schema_generated.h b/runtime/onert/frontend/circle/src/circle_schema_generated.h
new file mode 100644
index 000000000..b1337f20d
--- /dev/null
+++ b/runtime/onert/frontend/circle/src/circle_schema_generated.h
@@ -0,0 +1,9952 @@
+/*
+ * Copyright (c) 2019-2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// automatically generated by the FlatBuffers compiler, do not modify
+
+#ifndef FLATBUFFERS_GENERATED_CIRCLESCHEMA_CIRCLE_H_
+#define FLATBUFFERS_GENERATED_CIRCLESCHEMA_CIRCLE_H_
+
+#include "flatbuffers/flatbuffers.h"
+
+namespace circle
+{
+
+struct CustomQuantization;
+
+struct QuantizationParameters;
+
+struct Int32Vector;
+
+struct Uint16Vector;
+
+struct Uint8Vector;
+
+struct DimensionMetadata;
+
+struct SparsityParameters;
+
+struct Tensor;
+
+struct Conv2DOptions;
+
+struct Pool2DOptions;
+
+struct DepthwiseConv2DOptions;
+
+struct ConcatEmbeddingsOptions;
+
+struct LSHProjectionOptions;
+
+struct SVDFOptions;
+
+struct RNNOptions;
+
+struct SequenceRNNOptions;
+
+struct BidirectionalSequenceRNNOptions;
+
+struct FullyConnectedOptions;
+
+struct SoftmaxOptions;
+
+struct ConcatenationOptions;
+
+struct AddOptions;
+
+struct MulOptions;
+
+struct L2NormOptions;
+
+struct LocalResponseNormalizationOptions;
+
+struct LSTMOptions;
+
+struct UnidirectionalSequenceLSTMOptions;
+
+struct BidirectionalSequenceLSTMOptions;
+
+struct ResizeBilinearOptions;
+
+struct ResizeNearestNeighborOptions;
+
+struct CallOptions;
+
+struct PadOptions;
+
+struct PadV2Options;
+
+struct ReshapeOptions;
+
+struct SpaceToBatchNDOptions;
+
+struct BatchToSpaceNDOptions;
+
+struct SkipGramOptions;
+
+struct SpaceToDepthOptions;
+
+struct DepthToSpaceOptions;
+
+struct SubOptions;
+
+struct DivOptions;
+
+struct TopKV2Options;
+
+struct EmbeddingLookupSparseOptions;
+
+struct GatherOptions;
+
+struct TransposeOptions;
+
+struct ExpOptions;
+
+struct CosOptions;
+
+struct ReducerOptions;
+
+struct SqueezeOptions;
+
+struct SplitOptions;
+
+struct SplitVOptions;
+
+struct StridedSliceOptions;
+
+struct LogSoftmaxOptions;
+
+struct CastOptions;
+
+struct DequantizeOptions;
+
+struct MaximumMinimumOptions;
+
+struct TileOptions;
+
+struct ArgMaxOptions;
+
+struct ArgMinOptions;
+
+struct GreaterOptions;
+
+struct GreaterEqualOptions;
+
+struct LessOptions;
+
+struct LessEqualOptions;
+
+struct NegOptions;
+
+struct SelectOptions;
+
+struct SliceOptions;
+
+struct TransposeConvOptions;
+
+struct ExpandDimsOptions;
+
+struct SparseToDenseOptions;
+
+struct EqualOptions;
+
+struct NotEqualOptions;
+
+struct ShapeOptions;
+
+struct RankOptions;
+
+struct PowOptions;
+
+struct FakeQuantOptions;
+
+struct PackOptions;
+
+struct LogicalOrOptions;
+
+struct OneHotOptions;
+
+struct AbsOptions;
+
+struct HardSwishOptions;
+
+struct LogicalAndOptions;
+
+struct LogicalNotOptions;
+
+struct UnpackOptions;
+
+struct FloorDivOptions;
+
+struct SquareOptions;
+
+struct ZerosLikeOptions;
+
+struct FillOptions;
+
+struct FloorModOptions;
+
+struct RangeOptions;
+
+struct LeakyReluOptions;
+
+struct SquaredDifferenceOptions;
+
+struct MirrorPadOptions;
+
+struct UniqueOptions;
+
+struct ReverseV2Options;
+
+struct AddNOptions;
+
+struct GatherNdOptions;
+
+struct WhereOptions;
+
+struct ReverseSequenceOptions;
+
+struct MatrixDiagOptions;
+
+struct QuantizeOptions;
+
+struct MatrixSetDiagOptions;
+
+struct IfOptions;
+
+struct WhileOptions;
+
+struct NonMaxSuppressionV4Options;
+
+struct NonMaxSuppressionV5Options;
+
+struct ScatterNdOptions;
+
+struct SelectV2Options;
+
+struct DensifyOptions;
+
+struct SegmentSumOptions;
+
+struct BatchMatMulOptions;
+
+struct InstanceNormOptions;
+
+struct OperatorCode;
+
+struct Operator;
+
+struct SubGraph;
+
+struct Buffer;
+
+struct Metadata;
+
+struct Model;
+
+enum TensorType
+{
+ TensorType_FLOAT32 = 0,
+ TensorType_FLOAT16 = 1,
+ TensorType_INT32 = 2,
+ TensorType_UINT8 = 3,
+ TensorType_INT64 = 4,
+ TensorType_STRING = 5,
+ TensorType_BOOL = 6,
+ TensorType_INT16 = 7,
+ TensorType_COMPLEX64 = 8,
+ TensorType_INT8 = 9,
+ TensorType_FLOAT64 = 10,
+ TensorType_MIN = TensorType_FLOAT32,
+ TensorType_MAX = TensorType_FLOAT64
+};
+
+inline const TensorType (&EnumValuesTensorType())[11]
+{
+ static const TensorType values[] = {TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32,
+ TensorType_UINT8, TensorType_INT64, TensorType_STRING,
+ TensorType_BOOL, TensorType_INT16, TensorType_COMPLEX64,
+ TensorType_INT8, TensorType_FLOAT64};
+ return values;
+}
+
+inline const char *const *EnumNamesTensorType()
+{
+ static const char *const names[] = {"FLOAT32", "FLOAT16", "INT32", "UINT8",
+ "INT64", "STRING", "BOOL", "INT16",
+ "COMPLEX64", "INT8", "FLOAT64", nullptr};
+ return names;
+}
+
+inline const char *EnumNameTensorType(TensorType e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesTensorType()[index];
+}
+
+enum QuantizationDetails
+{
+ QuantizationDetails_NONE = 0,
+ QuantizationDetails_CustomQuantization = 1,
+ QuantizationDetails_MIN = QuantizationDetails_NONE,
+ QuantizationDetails_MAX = QuantizationDetails_CustomQuantization
+};
+
+inline const QuantizationDetails (&EnumValuesQuantizationDetails())[2]
+{
+ static const QuantizationDetails values[] = {QuantizationDetails_NONE,
+ QuantizationDetails_CustomQuantization};
+ return values;
+}
+
+inline const char *const *EnumNamesQuantizationDetails()
+{
+ static const char *const names[] = {"NONE", "CustomQuantization", nullptr};
+ return names;
+}
+
+inline const char *EnumNameQuantizationDetails(QuantizationDetails e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesQuantizationDetails()[index];
+}
+
+template <typename T> struct QuantizationDetailsTraits
+{
+ static const QuantizationDetails enum_value = QuantizationDetails_NONE;
+};
+
+template <> struct QuantizationDetailsTraits<CustomQuantization>
+{
+ static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization;
+};
+
+bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj,
+ QuantizationDetails type);
+bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier,
+ const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+ const flatbuffers::Vector<uint8_t> *types);
+
+enum DimensionType
+{
+ DimensionType_DENSE = 0,
+ DimensionType_SPARSE_CSR = 1,
+ DimensionType_MIN = DimensionType_DENSE,
+ DimensionType_MAX = DimensionType_SPARSE_CSR
+};
+
+inline const DimensionType (&EnumValuesDimensionType())[2]
+{
+ static const DimensionType values[] = {DimensionType_DENSE, DimensionType_SPARSE_CSR};
+ return values;
+}
+
+inline const char *const *EnumNamesDimensionType()
+{
+ static const char *const names[] = {"DENSE", "SPARSE_CSR", nullptr};
+ return names;
+}
+
+inline const char *EnumNameDimensionType(DimensionType e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesDimensionType()[index];
+}
+
+enum SparseIndexVector
+{
+ SparseIndexVector_NONE = 0,
+ SparseIndexVector_Int32Vector = 1,
+ SparseIndexVector_Uint16Vector = 2,
+ SparseIndexVector_Uint8Vector = 3,
+ SparseIndexVector_MIN = SparseIndexVector_NONE,
+ SparseIndexVector_MAX = SparseIndexVector_Uint8Vector
+};
+
+inline const SparseIndexVector (&EnumValuesSparseIndexVector())[4]
+{
+ static const SparseIndexVector values[] = {SparseIndexVector_NONE, SparseIndexVector_Int32Vector,
+ SparseIndexVector_Uint16Vector,
+ SparseIndexVector_Uint8Vector};
+ return values;
+}
+
+inline const char *const *EnumNamesSparseIndexVector()
+{
+ static const char *const names[] = {"NONE", "Int32Vector", "Uint16Vector", "Uint8Vector",
+ nullptr};
+ return names;
+}
+
+inline const char *EnumNameSparseIndexVector(SparseIndexVector e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesSparseIndexVector()[index];
+}
+
+template <typename T> struct SparseIndexVectorTraits
+{
+ static const SparseIndexVector enum_value = SparseIndexVector_NONE;
+};
+
+template <> struct SparseIndexVectorTraits<Int32Vector>
+{
+ static const SparseIndexVector enum_value = SparseIndexVector_Int32Vector;
+};
+
+template <> struct SparseIndexVectorTraits<Uint16Vector>
+{
+ static const SparseIndexVector enum_value = SparseIndexVector_Uint16Vector;
+};
+
+template <> struct SparseIndexVectorTraits<Uint8Vector>
+{
+ static const SparseIndexVector enum_value = SparseIndexVector_Uint8Vector;
+};
+
+bool VerifySparseIndexVector(flatbuffers::Verifier &verifier, const void *obj,
+ SparseIndexVector type);
+bool VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier,
+ const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+ const flatbuffers::Vector<uint8_t> *types);
+
+enum BuiltinOperator
+{
+ BuiltinOperator_ADD = 0,
+ BuiltinOperator_AVERAGE_POOL_2D = 1,
+ BuiltinOperator_CONCATENATION = 2,
+ BuiltinOperator_CONV_2D = 3,
+ BuiltinOperator_DEPTHWISE_CONV_2D = 4,
+ BuiltinOperator_DEPTH_TO_SPACE = 5,
+ BuiltinOperator_DEQUANTIZE = 6,
+ BuiltinOperator_EMBEDDING_LOOKUP = 7,
+ BuiltinOperator_FLOOR = 8,
+ BuiltinOperator_FULLY_CONNECTED = 9,
+ BuiltinOperator_HASHTABLE_LOOKUP = 10,
+ BuiltinOperator_L2_NORMALIZATION = 11,
+ BuiltinOperator_L2_POOL_2D = 12,
+ BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION = 13,
+ BuiltinOperator_LOGISTIC = 14,
+ BuiltinOperator_LSH_PROJECTION = 15,
+ BuiltinOperator_LSTM = 16,
+ BuiltinOperator_MAX_POOL_2D = 17,
+ BuiltinOperator_MUL = 18,
+ BuiltinOperator_RELU = 19,
+ BuiltinOperator_RELU_N1_TO_1 = 20,
+ BuiltinOperator_RELU6 = 21,
+ BuiltinOperator_RESHAPE = 22,
+ BuiltinOperator_RESIZE_BILINEAR = 23,
+ BuiltinOperator_RNN = 24,
+ BuiltinOperator_SOFTMAX = 25,
+ BuiltinOperator_SPACE_TO_DEPTH = 26,
+ BuiltinOperator_SVDF = 27,
+ BuiltinOperator_TANH = 28,
+ BuiltinOperator_CONCAT_EMBEDDINGS = 29,
+ BuiltinOperator_SKIP_GRAM = 30,
+ BuiltinOperator_CALL = 31,
+ BuiltinOperator_CUSTOM = 32,
+ BuiltinOperator_EMBEDDING_LOOKUP_SPARSE = 33,
+ BuiltinOperator_PAD = 34,
+ BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+ BuiltinOperator_GATHER = 36,
+ BuiltinOperator_BATCH_TO_SPACE_ND = 37,
+ BuiltinOperator_SPACE_TO_BATCH_ND = 38,
+ BuiltinOperator_TRANSPOSE = 39,
+ BuiltinOperator_MEAN = 40,
+ BuiltinOperator_SUB = 41,
+ BuiltinOperator_DIV = 42,
+ BuiltinOperator_SQUEEZE = 43,
+ BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+ BuiltinOperator_STRIDED_SLICE = 45,
+ BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN = 46,
+ BuiltinOperator_EXP = 47,
+ BuiltinOperator_TOPK_V2 = 48,
+ BuiltinOperator_SPLIT = 49,
+ BuiltinOperator_LOG_SOFTMAX = 50,
+ BuiltinOperator_DELEGATE = 51,
+ BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+ BuiltinOperator_CAST = 53,
+ BuiltinOperator_PRELU = 54,
+ BuiltinOperator_MAXIMUM = 55,
+ BuiltinOperator_ARG_MAX = 56,
+ BuiltinOperator_MINIMUM = 57,
+ BuiltinOperator_LESS = 58,
+ BuiltinOperator_NEG = 59,
+ BuiltinOperator_PADV2 = 60,
+ BuiltinOperator_GREATER = 61,
+ BuiltinOperator_GREATER_EQUAL = 62,
+ BuiltinOperator_LESS_EQUAL = 63,
+ BuiltinOperator_SELECT = 64,
+ BuiltinOperator_SLICE = 65,
+ BuiltinOperator_SIN = 66,
+ BuiltinOperator_TRANSPOSE_CONV = 67,
+ BuiltinOperator_SPARSE_TO_DENSE = 68,
+ BuiltinOperator_TILE = 69,
+ BuiltinOperator_EXPAND_DIMS = 70,
+ BuiltinOperator_EQUAL = 71,
+ BuiltinOperator_NOT_EQUAL = 72,
+ BuiltinOperator_LOG = 73,
+ BuiltinOperator_SUM = 74,
+ BuiltinOperator_SQRT = 75,
+ BuiltinOperator_RSQRT = 76,
+ BuiltinOperator_SHAPE = 77,
+ BuiltinOperator_POW = 78,
+ BuiltinOperator_ARG_MIN = 79,
+ BuiltinOperator_FAKE_QUANT = 80,
+ BuiltinOperator_REDUCE_PROD = 81,
+ BuiltinOperator_REDUCE_MAX = 82,
+ BuiltinOperator_PACK = 83,
+ BuiltinOperator_LOGICAL_OR = 84,
+ BuiltinOperator_ONE_HOT = 85,
+ BuiltinOperator_LOGICAL_AND = 86,
+ BuiltinOperator_LOGICAL_NOT = 87,
+ BuiltinOperator_UNPACK = 88,
+ BuiltinOperator_REDUCE_MIN = 89,
+ BuiltinOperator_FLOOR_DIV = 90,
+ BuiltinOperator_REDUCE_ANY = 91,
+ BuiltinOperator_SQUARE = 92,
+ BuiltinOperator_ZEROS_LIKE = 93,
+ BuiltinOperator_FILL = 94,
+ BuiltinOperator_FLOOR_MOD = 95,
+ BuiltinOperator_RANGE = 96,
+ BuiltinOperator_RESIZE_NEAREST_NEIGHBOR = 97,
+ BuiltinOperator_LEAKY_RELU = 98,
+ BuiltinOperator_SQUARED_DIFFERENCE = 99,
+ BuiltinOperator_MIRROR_PAD = 100,
+ BuiltinOperator_ABS = 101,
+ BuiltinOperator_SPLIT_V = 102,
+ BuiltinOperator_UNIQUE = 103,
+ BuiltinOperator_CEIL = 104,
+ BuiltinOperator_REVERSE_V2 = 105,
+ BuiltinOperator_ADD_N = 106,
+ BuiltinOperator_GATHER_ND = 107,
+ BuiltinOperator_COS = 108,
+ BuiltinOperator_WHERE = 109,
+ BuiltinOperator_RANK = 110,
+ BuiltinOperator_ELU = 111,
+ BuiltinOperator_REVERSE_SEQUENCE = 112,
+ BuiltinOperator_MATRIX_DIAG = 113,
+ BuiltinOperator_QUANTIZE = 114,
+ BuiltinOperator_MATRIX_SET_DIAG = 115,
+ BuiltinOperator_ROUND = 116,
+ BuiltinOperator_HARD_SWISH = 117,
+ BuiltinOperator_IF = 118,
+ BuiltinOperator_WHILE = 119,
+ BuiltinOperator_NON_MAX_SUPPRESSION_V4 = 120,
+ BuiltinOperator_NON_MAX_SUPPRESSION_V5 = 121,
+ BuiltinOperator_SCATTER_ND = 122,
+ BuiltinOperator_SELECT_V2 = 123,
+ BuiltinOperator_DENSIFY = 124,
+ BuiltinOperator_SEGMENT_SUM = 125,
+ BuiltinOperator_BATCH_MATMUL = 126,
+ BuiltinOperator_INSTANCE_NORM = 254,
+ BuiltinOperator_MIN = BuiltinOperator_ADD,
+ BuiltinOperator_MAX = BuiltinOperator_INSTANCE_NORM
+};
+
+inline const BuiltinOperator (&EnumValuesBuiltinOperator())[128]
+{
+ static const BuiltinOperator values[] = {BuiltinOperator_ADD,
+ BuiltinOperator_AVERAGE_POOL_2D,
+ BuiltinOperator_CONCATENATION,
+ BuiltinOperator_CONV_2D,
+ BuiltinOperator_DEPTHWISE_CONV_2D,
+ BuiltinOperator_DEPTH_TO_SPACE,
+ BuiltinOperator_DEQUANTIZE,
+ BuiltinOperator_EMBEDDING_LOOKUP,
+ BuiltinOperator_FLOOR,
+ BuiltinOperator_FULLY_CONNECTED,
+ BuiltinOperator_HASHTABLE_LOOKUP,
+ BuiltinOperator_L2_NORMALIZATION,
+ BuiltinOperator_L2_POOL_2D,
+ BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
+ BuiltinOperator_LOGISTIC,
+ BuiltinOperator_LSH_PROJECTION,
+ BuiltinOperator_LSTM,
+ BuiltinOperator_MAX_POOL_2D,
+ BuiltinOperator_MUL,
+ BuiltinOperator_RELU,
+ BuiltinOperator_RELU_N1_TO_1,
+ BuiltinOperator_RELU6,
+ BuiltinOperator_RESHAPE,
+ BuiltinOperator_RESIZE_BILINEAR,
+ BuiltinOperator_RNN,
+ BuiltinOperator_SOFTMAX,
+ BuiltinOperator_SPACE_TO_DEPTH,
+ BuiltinOperator_SVDF,
+ BuiltinOperator_TANH,
+ BuiltinOperator_CONCAT_EMBEDDINGS,
+ BuiltinOperator_SKIP_GRAM,
+ BuiltinOperator_CALL,
+ BuiltinOperator_CUSTOM,
+ BuiltinOperator_EMBEDDING_LOOKUP_SPARSE,
+ BuiltinOperator_PAD,
+ BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN,
+ BuiltinOperator_GATHER,
+ BuiltinOperator_BATCH_TO_SPACE_ND,
+ BuiltinOperator_SPACE_TO_BATCH_ND,
+ BuiltinOperator_TRANSPOSE,
+ BuiltinOperator_MEAN,
+ BuiltinOperator_SUB,
+ BuiltinOperator_DIV,
+ BuiltinOperator_SQUEEZE,
+ BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
+ BuiltinOperator_STRIDED_SLICE,
+ BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN,
+ BuiltinOperator_EXP,
+ BuiltinOperator_TOPK_V2,
+ BuiltinOperator_SPLIT,
+ BuiltinOperator_LOG_SOFTMAX,
+ BuiltinOperator_DELEGATE,
+ BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM,
+ BuiltinOperator_CAST,
+ BuiltinOperator_PRELU,
+ BuiltinOperator_MAXIMUM,
+ BuiltinOperator_ARG_MAX,
+ BuiltinOperator_MINIMUM,
+ BuiltinOperator_LESS,
+ BuiltinOperator_NEG,
+ BuiltinOperator_PADV2,
+ BuiltinOperator_GREATER,
+ BuiltinOperator_GREATER_EQUAL,
+ BuiltinOperator_LESS_EQUAL,
+ BuiltinOperator_SELECT,
+ BuiltinOperator_SLICE,
+ BuiltinOperator_SIN,
+ BuiltinOperator_TRANSPOSE_CONV,
+ BuiltinOperator_SPARSE_TO_DENSE,
+ BuiltinOperator_TILE,
+ BuiltinOperator_EXPAND_DIMS,
+ BuiltinOperator_EQUAL,
+ BuiltinOperator_NOT_EQUAL,
+ BuiltinOperator_LOG,
+ BuiltinOperator_SUM,
+ BuiltinOperator_SQRT,
+ BuiltinOperator_RSQRT,
+ BuiltinOperator_SHAPE,
+ BuiltinOperator_POW,
+ BuiltinOperator_ARG_MIN,
+ BuiltinOperator_FAKE_QUANT,
+ BuiltinOperator_REDUCE_PROD,
+ BuiltinOperator_REDUCE_MAX,
+ BuiltinOperator_PACK,
+ BuiltinOperator_LOGICAL_OR,
+ BuiltinOperator_ONE_HOT,
+ BuiltinOperator_LOGICAL_AND,
+ BuiltinOperator_LOGICAL_NOT,
+ BuiltinOperator_UNPACK,
+ BuiltinOperator_REDUCE_MIN,
+ BuiltinOperator_FLOOR_DIV,
+ BuiltinOperator_REDUCE_ANY,
+ BuiltinOperator_SQUARE,
+ BuiltinOperator_ZEROS_LIKE,
+ BuiltinOperator_FILL,
+ BuiltinOperator_FLOOR_MOD,
+ BuiltinOperator_RANGE,
+ BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
+ BuiltinOperator_LEAKY_RELU,
+ BuiltinOperator_SQUARED_DIFFERENCE,
+ BuiltinOperator_MIRROR_PAD,
+ BuiltinOperator_ABS,
+ BuiltinOperator_SPLIT_V,
+ BuiltinOperator_UNIQUE,
+ BuiltinOperator_CEIL,
+ BuiltinOperator_REVERSE_V2,
+ BuiltinOperator_ADD_N,
+ BuiltinOperator_GATHER_ND,
+ BuiltinOperator_COS,
+ BuiltinOperator_WHERE,
+ BuiltinOperator_RANK,
+ BuiltinOperator_ELU,
+ BuiltinOperator_REVERSE_SEQUENCE,
+ BuiltinOperator_MATRIX_DIAG,
+ BuiltinOperator_QUANTIZE,
+ BuiltinOperator_MATRIX_SET_DIAG,
+ BuiltinOperator_ROUND,
+ BuiltinOperator_HARD_SWISH,
+ BuiltinOperator_IF,
+ BuiltinOperator_WHILE,
+ BuiltinOperator_NON_MAX_SUPPRESSION_V4,
+ BuiltinOperator_NON_MAX_SUPPRESSION_V5,
+ BuiltinOperator_SCATTER_ND,
+ BuiltinOperator_SELECT_V2,
+ BuiltinOperator_DENSIFY,
+ BuiltinOperator_SEGMENT_SUM,
+ BuiltinOperator_BATCH_MATMUL,
+ BuiltinOperator_INSTANCE_NORM};
+ return values;
+}
+
+inline const char *const *EnumNamesBuiltinOperator()
+{
+ static const char *const names[] = {"ADD",
+ "AVERAGE_POOL_2D",
+ "CONCATENATION",
+ "CONV_2D",
+ "DEPTHWISE_CONV_2D",
+ "DEPTH_TO_SPACE",
+ "DEQUANTIZE",
+ "EMBEDDING_LOOKUP",
+ "FLOOR",
+ "FULLY_CONNECTED",
+ "HASHTABLE_LOOKUP",
+ "L2_NORMALIZATION",
+ "L2_POOL_2D",
+ "LOCAL_RESPONSE_NORMALIZATION",
+ "LOGISTIC",
+ "LSH_PROJECTION",
+ "LSTM",
+ "MAX_POOL_2D",
+ "MUL",
+ "RELU",
+ "RELU_N1_TO_1",
+ "RELU6",
+ "RESHAPE",
+ "RESIZE_BILINEAR",
+ "RNN",
+ "SOFTMAX",
+ "SPACE_TO_DEPTH",
+ "SVDF",
+ "TANH",
+ "CONCAT_EMBEDDINGS",
+ "SKIP_GRAM",
+ "CALL",
+ "CUSTOM",
+ "EMBEDDING_LOOKUP_SPARSE",
+ "PAD",
+ "UNIDIRECTIONAL_SEQUENCE_RNN",
+ "GATHER",
+ "BATCH_TO_SPACE_ND",
+ "SPACE_TO_BATCH_ND",
+ "TRANSPOSE",
+ "MEAN",
+ "SUB",
+ "DIV",
+ "SQUEEZE",
+ "UNIDIRECTIONAL_SEQUENCE_LSTM",
+ "STRIDED_SLICE",
+ "BIDIRECTIONAL_SEQUENCE_RNN",
+ "EXP",
+ "TOPK_V2",
+ "SPLIT",
+ "LOG_SOFTMAX",
+ "DELEGATE",
+ "BIDIRECTIONAL_SEQUENCE_LSTM",
+ "CAST",
+ "PRELU",
+ "MAXIMUM",
+ "ARG_MAX",
+ "MINIMUM",
+ "LESS",
+ "NEG",
+ "PADV2",
+ "GREATER",
+ "GREATER_EQUAL",
+ "LESS_EQUAL",
+ "SELECT",
+ "SLICE",
+ "SIN",
+ "TRANSPOSE_CONV",
+ "SPARSE_TO_DENSE",
+ "TILE",
+ "EXPAND_DIMS",
+ "EQUAL",
+ "NOT_EQUAL",
+ "LOG",
+ "SUM",
+ "SQRT",
+ "RSQRT",
+ "SHAPE",
+ "POW",
+ "ARG_MIN",
+ "FAKE_QUANT",
+ "REDUCE_PROD",
+ "REDUCE_MAX",
+ "PACK",
+ "LOGICAL_OR",
+ "ONE_HOT",
+ "LOGICAL_AND",
+ "LOGICAL_NOT",
+ "UNPACK",
+ "REDUCE_MIN",
+ "FLOOR_DIV",
+ "REDUCE_ANY",
+ "SQUARE",
+ "ZEROS_LIKE",
+ "FILL",
+ "FLOOR_MOD",
+ "RANGE",
+ "RESIZE_NEAREST_NEIGHBOR",
+ "LEAKY_RELU",
+ "SQUARED_DIFFERENCE",
+ "MIRROR_PAD",
+ "ABS",
+ "SPLIT_V",
+ "UNIQUE",
+ "CEIL",
+ "REVERSE_V2",
+ "ADD_N",
+ "GATHER_ND",
+ "COS",
+ "WHERE",
+ "RANK",
+ "ELU",
+ "REVERSE_SEQUENCE",
+ "MATRIX_DIAG",
+ "QUANTIZE",
+ "MATRIX_SET_DIAG",
+ "ROUND",
+ "HARD_SWISH",
+ "IF",
+ "WHILE",
+ "NON_MAX_SUPPRESSION_V4",
+ "NON_MAX_SUPPRESSION_V5",
+ "SCATTER_ND",
+ "SELECT_V2",
+ "DENSIFY",
+ "SEGMENT_SUM",
+ "BATCH_MATMUL",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "INSTANCE_NORM",
+ nullptr};
+ return names;
+}
+
+inline const char *EnumNameBuiltinOperator(BuiltinOperator e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesBuiltinOperator()[index];
+}
+
+enum BuiltinOptions
+{
+ BuiltinOptions_NONE = 0,
+ BuiltinOptions_Conv2DOptions = 1,
+ BuiltinOptions_DepthwiseConv2DOptions = 2,
+ BuiltinOptions_ConcatEmbeddingsOptions = 3,
+ BuiltinOptions_LSHProjectionOptions = 4,
+ BuiltinOptions_Pool2DOptions = 5,
+ BuiltinOptions_SVDFOptions = 6,
+ BuiltinOptions_RNNOptions = 7,
+ BuiltinOptions_FullyConnectedOptions = 8,
+ BuiltinOptions_SoftmaxOptions = 9,
+ BuiltinOptions_ConcatenationOptions = 10,
+ BuiltinOptions_AddOptions = 11,
+ BuiltinOptions_L2NormOptions = 12,
+ BuiltinOptions_LocalResponseNormalizationOptions = 13,
+ BuiltinOptions_LSTMOptions = 14,
+ BuiltinOptions_ResizeBilinearOptions = 15,
+ BuiltinOptions_CallOptions = 16,
+ BuiltinOptions_ReshapeOptions = 17,
+ BuiltinOptions_SkipGramOptions = 18,
+ BuiltinOptions_SpaceToDepthOptions = 19,
+ BuiltinOptions_EmbeddingLookupSparseOptions = 20,
+ BuiltinOptions_MulOptions = 21,
+ BuiltinOptions_PadOptions = 22,
+ BuiltinOptions_GatherOptions = 23,
+ BuiltinOptions_BatchToSpaceNDOptions = 24,
+ BuiltinOptions_SpaceToBatchNDOptions = 25,
+ BuiltinOptions_TransposeOptions = 26,
+ BuiltinOptions_ReducerOptions = 27,
+ BuiltinOptions_SubOptions = 28,
+ BuiltinOptions_DivOptions = 29,
+ BuiltinOptions_SqueezeOptions = 30,
+ BuiltinOptions_SequenceRNNOptions = 31,
+ BuiltinOptions_StridedSliceOptions = 32,
+ BuiltinOptions_ExpOptions = 33,
+ BuiltinOptions_TopKV2Options = 34,
+ BuiltinOptions_SplitOptions = 35,
+ BuiltinOptions_LogSoftmaxOptions = 36,
+ BuiltinOptions_CastOptions = 37,
+ BuiltinOptions_DequantizeOptions = 38,
+ BuiltinOptions_MaximumMinimumOptions = 39,
+ BuiltinOptions_ArgMaxOptions = 40,
+ BuiltinOptions_LessOptions = 41,
+ BuiltinOptions_NegOptions = 42,
+ BuiltinOptions_PadV2Options = 43,
+ BuiltinOptions_GreaterOptions = 44,
+ BuiltinOptions_GreaterEqualOptions = 45,
+ BuiltinOptions_LessEqualOptions = 46,
+ BuiltinOptions_SelectOptions = 47,
+ BuiltinOptions_SliceOptions = 48,
+ BuiltinOptions_TransposeConvOptions = 49,
+ BuiltinOptions_SparseToDenseOptions = 50,
+ BuiltinOptions_TileOptions = 51,
+ BuiltinOptions_ExpandDimsOptions = 52,
+ BuiltinOptions_EqualOptions = 53,
+ BuiltinOptions_NotEqualOptions = 54,
+ BuiltinOptions_ShapeOptions = 55,
+ BuiltinOptions_PowOptions = 56,
+ BuiltinOptions_ArgMinOptions = 57,
+ BuiltinOptions_FakeQuantOptions = 58,
+ BuiltinOptions_PackOptions = 59,
+ BuiltinOptions_LogicalOrOptions = 60,
+ BuiltinOptions_OneHotOptions = 61,
+ BuiltinOptions_LogicalAndOptions = 62,
+ BuiltinOptions_LogicalNotOptions = 63,
+ BuiltinOptions_UnpackOptions = 64,
+ BuiltinOptions_FloorDivOptions = 65,
+ BuiltinOptions_SquareOptions = 66,
+ BuiltinOptions_ZerosLikeOptions = 67,
+ BuiltinOptions_FillOptions = 68,
+ BuiltinOptions_BidirectionalSequenceLSTMOptions = 69,
+ BuiltinOptions_BidirectionalSequenceRNNOptions = 70,
+ BuiltinOptions_UnidirectionalSequenceLSTMOptions = 71,
+ BuiltinOptions_FloorModOptions = 72,
+ BuiltinOptions_RangeOptions = 73,
+ BuiltinOptions_ResizeNearestNeighborOptions = 74,
+ BuiltinOptions_LeakyReluOptions = 75,
+ BuiltinOptions_SquaredDifferenceOptions = 76,
+ BuiltinOptions_MirrorPadOptions = 77,
+ BuiltinOptions_AbsOptions = 78,
+ BuiltinOptions_SplitVOptions = 79,
+ BuiltinOptions_UniqueOptions = 80,
+ BuiltinOptions_ReverseV2Options = 81,
+ BuiltinOptions_AddNOptions = 82,
+ BuiltinOptions_GatherNdOptions = 83,
+ BuiltinOptions_CosOptions = 84,
+ BuiltinOptions_WhereOptions = 85,
+ BuiltinOptions_RankOptions = 86,
+ BuiltinOptions_ReverseSequenceOptions = 87,
+ BuiltinOptions_MatrixDiagOptions = 88,
+ BuiltinOptions_QuantizeOptions = 89,
+ BuiltinOptions_MatrixSetDiagOptions = 90,
+ BuiltinOptions_HardSwishOptions = 91,
+ BuiltinOptions_IfOptions = 92,
+ BuiltinOptions_WhileOptions = 93,
+ BuiltinOptions_DepthToSpaceOptions = 94,
+ BuiltinOptions_NonMaxSuppressionV4Options = 95,
+ BuiltinOptions_NonMaxSuppressionV5Options = 96,
+ BuiltinOptions_ScatterNdOptions = 97,
+ BuiltinOptions_SelectV2Options = 98,
+ BuiltinOptions_DensifyOptions = 99,
+ BuiltinOptions_SegmentSumOptions = 100,
+ BuiltinOptions_BatchMatMulOptions = 101,
+ BuiltinOptions_InstanceNormOptions = 254,
+ BuiltinOptions_MIN = BuiltinOptions_NONE,
+ BuiltinOptions_MAX = BuiltinOptions_InstanceNormOptions
+};
+
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[103]
+{
+ static const BuiltinOptions values[] = {BuiltinOptions_NONE,
+ BuiltinOptions_Conv2DOptions,
+ BuiltinOptions_DepthwiseConv2DOptions,
+ BuiltinOptions_ConcatEmbeddingsOptions,
+ BuiltinOptions_LSHProjectionOptions,
+ BuiltinOptions_Pool2DOptions,
+ BuiltinOptions_SVDFOptions,
+ BuiltinOptions_RNNOptions,
+ BuiltinOptions_FullyConnectedOptions,
+ BuiltinOptions_SoftmaxOptions,
+ BuiltinOptions_ConcatenationOptions,
+ BuiltinOptions_AddOptions,
+ BuiltinOptions_L2NormOptions,
+ BuiltinOptions_LocalResponseNormalizationOptions,
+ BuiltinOptions_LSTMOptions,
+ BuiltinOptions_ResizeBilinearOptions,
+ BuiltinOptions_CallOptions,
+ BuiltinOptions_ReshapeOptions,
+ BuiltinOptions_SkipGramOptions,
+ BuiltinOptions_SpaceToDepthOptions,
+ BuiltinOptions_EmbeddingLookupSparseOptions,
+ BuiltinOptions_MulOptions,
+ BuiltinOptions_PadOptions,
+ BuiltinOptions_GatherOptions,
+ BuiltinOptions_BatchToSpaceNDOptions,
+ BuiltinOptions_SpaceToBatchNDOptions,
+ BuiltinOptions_TransposeOptions,
+ BuiltinOptions_ReducerOptions,
+ BuiltinOptions_SubOptions,
+ BuiltinOptions_DivOptions,
+ BuiltinOptions_SqueezeOptions,
+ BuiltinOptions_SequenceRNNOptions,
+ BuiltinOptions_StridedSliceOptions,
+ BuiltinOptions_ExpOptions,
+ BuiltinOptions_TopKV2Options,
+ BuiltinOptions_SplitOptions,
+ BuiltinOptions_LogSoftmaxOptions,
+ BuiltinOptions_CastOptions,
+ BuiltinOptions_DequantizeOptions,
+ BuiltinOptions_MaximumMinimumOptions,
+ BuiltinOptions_ArgMaxOptions,
+ BuiltinOptions_LessOptions,
+ BuiltinOptions_NegOptions,
+ BuiltinOptions_PadV2Options,
+ BuiltinOptions_GreaterOptions,
+ BuiltinOptions_GreaterEqualOptions,
+ BuiltinOptions_LessEqualOptions,
+ BuiltinOptions_SelectOptions,
+ BuiltinOptions_SliceOptions,
+ BuiltinOptions_TransposeConvOptions,
+ BuiltinOptions_SparseToDenseOptions,
+ BuiltinOptions_TileOptions,
+ BuiltinOptions_ExpandDimsOptions,
+ BuiltinOptions_EqualOptions,
+ BuiltinOptions_NotEqualOptions,
+ BuiltinOptions_ShapeOptions,
+ BuiltinOptions_PowOptions,
+ BuiltinOptions_ArgMinOptions,
+ BuiltinOptions_FakeQuantOptions,
+ BuiltinOptions_PackOptions,
+ BuiltinOptions_LogicalOrOptions,
+ BuiltinOptions_OneHotOptions,
+ BuiltinOptions_LogicalAndOptions,
+ BuiltinOptions_LogicalNotOptions,
+ BuiltinOptions_UnpackOptions,
+ BuiltinOptions_FloorDivOptions,
+ BuiltinOptions_SquareOptions,
+ BuiltinOptions_ZerosLikeOptions,
+ BuiltinOptions_FillOptions,
+ BuiltinOptions_BidirectionalSequenceLSTMOptions,
+ BuiltinOptions_BidirectionalSequenceRNNOptions,
+ BuiltinOptions_UnidirectionalSequenceLSTMOptions,
+ BuiltinOptions_FloorModOptions,
+ BuiltinOptions_RangeOptions,
+ BuiltinOptions_ResizeNearestNeighborOptions,
+ BuiltinOptions_LeakyReluOptions,
+ BuiltinOptions_SquaredDifferenceOptions,
+ BuiltinOptions_MirrorPadOptions,
+ BuiltinOptions_AbsOptions,
+ BuiltinOptions_SplitVOptions,
+ BuiltinOptions_UniqueOptions,
+ BuiltinOptions_ReverseV2Options,
+ BuiltinOptions_AddNOptions,
+ BuiltinOptions_GatherNdOptions,
+ BuiltinOptions_CosOptions,
+ BuiltinOptions_WhereOptions,
+ BuiltinOptions_RankOptions,
+ BuiltinOptions_ReverseSequenceOptions,
+ BuiltinOptions_MatrixDiagOptions,
+ BuiltinOptions_QuantizeOptions,
+ BuiltinOptions_MatrixSetDiagOptions,
+ BuiltinOptions_HardSwishOptions,
+ BuiltinOptions_IfOptions,
+ BuiltinOptions_WhileOptions,
+ BuiltinOptions_DepthToSpaceOptions,
+ BuiltinOptions_NonMaxSuppressionV4Options,
+ BuiltinOptions_NonMaxSuppressionV5Options,
+ BuiltinOptions_ScatterNdOptions,
+ BuiltinOptions_SelectV2Options,
+ BuiltinOptions_DensifyOptions,
+ BuiltinOptions_SegmentSumOptions,
+ BuiltinOptions_BatchMatMulOptions,
+ BuiltinOptions_InstanceNormOptions};
+ return values;
+}
+
+inline const char *const *EnumNamesBuiltinOptions()
+{
+ static const char *const names[] = {"NONE",
+ "Conv2DOptions",
+ "DepthwiseConv2DOptions",
+ "ConcatEmbeddingsOptions",
+ "LSHProjectionOptions",
+ "Pool2DOptions",
+ "SVDFOptions",
+ "RNNOptions",
+ "FullyConnectedOptions",
+ "SoftmaxOptions",
+ "ConcatenationOptions",
+ "AddOptions",
+ "L2NormOptions",
+ "LocalResponseNormalizationOptions",
+ "LSTMOptions",
+ "ResizeBilinearOptions",
+ "CallOptions",
+ "ReshapeOptions",
+ "SkipGramOptions",
+ "SpaceToDepthOptions",
+ "EmbeddingLookupSparseOptions",
+ "MulOptions",
+ "PadOptions",
+ "GatherOptions",
+ "BatchToSpaceNDOptions",
+ "SpaceToBatchNDOptions",
+ "TransposeOptions",
+ "ReducerOptions",
+ "SubOptions",
+ "DivOptions",
+ "SqueezeOptions",
+ "SequenceRNNOptions",
+ "StridedSliceOptions",
+ "ExpOptions",
+ "TopKV2Options",
+ "SplitOptions",
+ "LogSoftmaxOptions",
+ "CastOptions",
+ "DequantizeOptions",
+ "MaximumMinimumOptions",
+ "ArgMaxOptions",
+ "LessOptions",
+ "NegOptions",
+ "PadV2Options",
+ "GreaterOptions",
+ "GreaterEqualOptions",
+ "LessEqualOptions",
+ "SelectOptions",
+ "SliceOptions",
+ "TransposeConvOptions",
+ "SparseToDenseOptions",
+ "TileOptions",
+ "ExpandDimsOptions",
+ "EqualOptions",
+ "NotEqualOptions",
+ "ShapeOptions",
+ "PowOptions",
+ "ArgMinOptions",
+ "FakeQuantOptions",
+ "PackOptions",
+ "LogicalOrOptions",
+ "OneHotOptions",
+ "LogicalAndOptions",
+ "LogicalNotOptions",
+ "UnpackOptions",
+ "FloorDivOptions",
+ "SquareOptions",
+ "ZerosLikeOptions",
+ "FillOptions",
+ "BidirectionalSequenceLSTMOptions",
+ "BidirectionalSequenceRNNOptions",
+ "UnidirectionalSequenceLSTMOptions",
+ "FloorModOptions",
+ "RangeOptions",
+ "ResizeNearestNeighborOptions",
+ "LeakyReluOptions",
+ "SquaredDifferenceOptions",
+ "MirrorPadOptions",
+ "AbsOptions",
+ "SplitVOptions",
+ "UniqueOptions",
+ "ReverseV2Options",
+ "AddNOptions",
+ "GatherNdOptions",
+ "CosOptions",
+ "WhereOptions",
+ "RankOptions",
+ "ReverseSequenceOptions",
+ "MatrixDiagOptions",
+ "QuantizeOptions",
+ "MatrixSetDiagOptions",
+ "HardSwishOptions",
+ "IfOptions",
+ "WhileOptions",
+ "DepthToSpaceOptions",
+ "NonMaxSuppressionV4Options",
+ "NonMaxSuppressionV5Options",
+ "ScatterNdOptions",
+ "SelectV2Options",
+ "DensifyOptions",
+ "SegmentSumOptions",
+ "BatchMatMulOptions",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "InstanceNormOptions",
+ nullptr};
+ return names;
+}
+
+inline const char *EnumNameBuiltinOptions(BuiltinOptions e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesBuiltinOptions()[index];
+}
+
+template <typename T> struct BuiltinOptionsTraits
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_NONE;
+};
+
+template <> struct BuiltinOptionsTraits<Conv2DOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions;
+};
+
+template <> struct BuiltinOptionsTraits<DepthwiseConv2DOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_DepthwiseConv2DOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ConcatEmbeddingsOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ConcatEmbeddingsOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LSHProjectionOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions;
+};
+
+template <> struct BuiltinOptionsTraits<Pool2DOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SVDFOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions;
+};
+
+template <> struct BuiltinOptionsTraits<RNNOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions;
+};
+
+template <> struct BuiltinOptionsTraits<FullyConnectedOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SoftmaxOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ConcatenationOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions;
+};
+
+template <> struct BuiltinOptionsTraits<AddOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_AddOptions;
+};
+
+template <> struct BuiltinOptionsTraits<L2NormOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LocalResponseNormalizationOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LocalResponseNormalizationOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LSTMOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ResizeBilinearOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions;
+};
+
+template <> struct BuiltinOptionsTraits<CallOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_CallOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ReshapeOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SkipGramOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SpaceToDepthOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions;
+};
+
+template <> struct BuiltinOptionsTraits<EmbeddingLookupSparseOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_EmbeddingLookupSparseOptions;
+};
+
+template <> struct BuiltinOptionsTraits<MulOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_MulOptions;
+};
+
+template <> struct BuiltinOptionsTraits<PadOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_PadOptions;
+};
+
+template <> struct BuiltinOptionsTraits<GatherOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions;
+};
+
+template <> struct BuiltinOptionsTraits<BatchToSpaceNDOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_BatchToSpaceNDOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SpaceToBatchNDOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SpaceToBatchNDOptions;
+};
+
+template <> struct BuiltinOptionsTraits<TransposeOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_TransposeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ReducerOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ReducerOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SubOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SubOptions;
+};
+
+template <> struct BuiltinOptionsTraits<DivOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_DivOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SqueezeOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SqueezeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SequenceRNNOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SequenceRNNOptions;
+};
+
+template <> struct BuiltinOptionsTraits<StridedSliceOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_StridedSliceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ExpOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ExpOptions;
+};
+
+template <> struct BuiltinOptionsTraits<TopKV2Options>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_TopKV2Options;
+};
+
+template <> struct BuiltinOptionsTraits<SplitOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SplitOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LogSoftmaxOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LogSoftmaxOptions;
+};
+
+template <> struct BuiltinOptionsTraits<CastOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_CastOptions;
+};
+
+template <> struct BuiltinOptionsTraits<DequantizeOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<MaximumMinimumOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_MaximumMinimumOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ArgMaxOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ArgMaxOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LessOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LessOptions;
+};
+
+template <> struct BuiltinOptionsTraits<NegOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_NegOptions;
+};
+
+template <> struct BuiltinOptionsTraits<PadV2Options>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_PadV2Options;
+};
+
+template <> struct BuiltinOptionsTraits<GreaterOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_GreaterOptions;
+};
+
+template <> struct BuiltinOptionsTraits<GreaterEqualOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_GreaterEqualOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LessEqualOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LessEqualOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SelectOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SelectOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SliceOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SliceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<TransposeConvOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_TransposeConvOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SparseToDenseOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SparseToDenseOptions;
+};
+
+template <> struct BuiltinOptionsTraits<TileOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_TileOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ExpandDimsOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ExpandDimsOptions;
+};
+
+template <> struct BuiltinOptionsTraits<EqualOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_EqualOptions;
+};
+
+template <> struct BuiltinOptionsTraits<NotEqualOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_NotEqualOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ShapeOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ShapeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<PowOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_PowOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ArgMinOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ArgMinOptions;
+};
+
+template <> struct BuiltinOptionsTraits<FakeQuantOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_FakeQuantOptions;
+};
+
+template <> struct BuiltinOptionsTraits<PackOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_PackOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LogicalOrOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LogicalOrOptions;
+};
+
+template <> struct BuiltinOptionsTraits<OneHotOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_OneHotOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LogicalAndOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LogicalAndOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LogicalNotOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LogicalNotOptions;
+};
+
+template <> struct BuiltinOptionsTraits<UnpackOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_UnpackOptions;
+};
+
+template <> struct BuiltinOptionsTraits<FloorDivOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_FloorDivOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SquareOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SquareOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ZerosLikeOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<FillOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_FillOptions;
+};
+
+template <> struct BuiltinOptionsTraits<BidirectionalSequenceLSTMOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceLSTMOptions;
+};
+
+template <> struct BuiltinOptionsTraits<BidirectionalSequenceRNNOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions;
+};
+
+template <> struct BuiltinOptionsTraits<UnidirectionalSequenceLSTMOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions;
+};
+
+template <> struct BuiltinOptionsTraits<FloorModOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions;
+};
+
+template <> struct BuiltinOptionsTraits<RangeOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_RangeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ResizeNearestNeighborOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ResizeNearestNeighborOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LeakyReluOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LeakyReluOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SquaredDifferenceOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SquaredDifferenceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<MirrorPadOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_MirrorPadOptions;
+};
+
+template <> struct BuiltinOptionsTraits<AbsOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_AbsOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SplitVOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SplitVOptions;
+};
+
+template <> struct BuiltinOptionsTraits<UniqueOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_UniqueOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ReverseV2Options>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ReverseV2Options;
+};
+
+template <> struct BuiltinOptionsTraits<AddNOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_AddNOptions;
+};
+
+template <> struct BuiltinOptionsTraits<GatherNdOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_GatherNdOptions;
+};
+
+template <> struct BuiltinOptionsTraits<CosOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_CosOptions;
+};
+
+template <> struct BuiltinOptionsTraits<WhereOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_WhereOptions;
+};
+
+template <> struct BuiltinOptionsTraits<RankOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_RankOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ReverseSequenceOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ReverseSequenceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<MatrixDiagOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_MatrixDiagOptions;
+};
+
+template <> struct BuiltinOptionsTraits<QuantizeOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_QuantizeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<MatrixSetDiagOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_MatrixSetDiagOptions;
+};
+
+template <> struct BuiltinOptionsTraits<HardSwishOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_HardSwishOptions;
+};
+
+template <> struct BuiltinOptionsTraits<IfOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_IfOptions;
+};
+
+template <> struct BuiltinOptionsTraits<WhileOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_WhileOptions;
+};
+
+template <> struct BuiltinOptionsTraits<DepthToSpaceOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_DepthToSpaceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<NonMaxSuppressionV4Options>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV4Options;
+};
+
+template <> struct BuiltinOptionsTraits<NonMaxSuppressionV5Options>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV5Options;
+};
+
+template <> struct BuiltinOptionsTraits<ScatterNdOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ScatterNdOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SelectV2Options>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SelectV2Options;
+};
+
+template <> struct BuiltinOptionsTraits<DensifyOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_DensifyOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SegmentSumOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SegmentSumOptions;
+};
+
+template <> struct BuiltinOptionsTraits<BatchMatMulOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_BatchMatMulOptions;
+};
+
+template <> struct BuiltinOptionsTraits<InstanceNormOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_InstanceNormOptions;
+};
+
+bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
+bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier,
+ const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+ const flatbuffers::Vector<uint8_t> *types);
+
+enum Padding
+{
+ Padding_SAME = 0,
+ Padding_VALID = 1,
+ Padding_MIN = Padding_SAME,
+ Padding_MAX = Padding_VALID
+};
+
+inline const Padding (&EnumValuesPadding())[2]
+{
+ static const Padding values[] = {Padding_SAME, Padding_VALID};
+ return values;
+}
+
+inline const char *const *EnumNamesPadding()
+{
+ static const char *const names[] = {"SAME", "VALID", nullptr};
+ return names;
+}
+
+inline const char *EnumNamePadding(Padding e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesPadding()[index];
+}
+
+enum ActivationFunctionType
+{
+ ActivationFunctionType_NONE = 0,
+ ActivationFunctionType_RELU = 1,
+ ActivationFunctionType_RELU_N1_TO_1 = 2,
+ ActivationFunctionType_RELU6 = 3,
+ ActivationFunctionType_TANH = 4,
+ ActivationFunctionType_SIGN_BIT = 5,
+ ActivationFunctionType_MIN = ActivationFunctionType_NONE,
+ ActivationFunctionType_MAX = ActivationFunctionType_SIGN_BIT
+};
+
+inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6]
+{
+ static const ActivationFunctionType values[] = {
+ ActivationFunctionType_NONE, ActivationFunctionType_RELU,
+ ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6,
+ ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT};
+ return values;
+}
+
+inline const char *const *EnumNamesActivationFunctionType()
+{
+ static const char *const names[] = {"NONE", "RELU", "RELU_N1_TO_1", "RELU6",
+ "TANH", "SIGN_BIT", nullptr};
+ return names;
+}
+
+inline const char *EnumNameActivationFunctionType(ActivationFunctionType e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesActivationFunctionType()[index];
+}
+
+enum LSHProjectionType
+{
+ LSHProjectionType_UNKNOWN = 0,
+ LSHProjectionType_SPARSE = 1,
+ LSHProjectionType_DENSE = 2,
+ LSHProjectionType_MIN = LSHProjectionType_UNKNOWN,
+ LSHProjectionType_MAX = LSHProjectionType_DENSE
+};
+
+inline const LSHProjectionType (&EnumValuesLSHProjectionType())[3]
+{
+ static const LSHProjectionType values[] = {LSHProjectionType_UNKNOWN, LSHProjectionType_SPARSE,
+ LSHProjectionType_DENSE};
+ return values;
+}
+
+inline const char *const *EnumNamesLSHProjectionType()
+{
+ static const char *const names[] = {"UNKNOWN", "SPARSE", "DENSE", nullptr};
+ return names;
+}
+
+inline const char *EnumNameLSHProjectionType(LSHProjectionType e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesLSHProjectionType()[index];
+}
+
+enum FullyConnectedOptionsWeightsFormat
+{
+ FullyConnectedOptionsWeightsFormat_DEFAULT = 0,
+ FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 = 1,
+ FullyConnectedOptionsWeightsFormat_MIN = FullyConnectedOptionsWeightsFormat_DEFAULT,
+ FullyConnectedOptionsWeightsFormat_MAX = FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8
+};
+
+inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[2]
+{
+ static const FullyConnectedOptionsWeightsFormat values[] = {
+ FullyConnectedOptionsWeightsFormat_DEFAULT,
+ FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8};
+ return values;
+}
+
+inline const char *const *EnumNamesFullyConnectedOptionsWeightsFormat()
+{
+ static const char *const names[] = {"DEFAULT", "SHUFFLED4x16INT8", nullptr};
+ return names;
+}
+
+inline const char *EnumNameFullyConnectedOptionsWeightsFormat(FullyConnectedOptionsWeightsFormat e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesFullyConnectedOptionsWeightsFormat()[index];
+}
+
+enum LSTMKernelType
+{
+ LSTMKernelType_FULL = 0,
+ LSTMKernelType_BASIC = 1,
+ LSTMKernelType_MIN = LSTMKernelType_FULL,
+ LSTMKernelType_MAX = LSTMKernelType_BASIC
+};
+
+inline const LSTMKernelType (&EnumValuesLSTMKernelType())[2]
+{
+ static const LSTMKernelType values[] = {LSTMKernelType_FULL, LSTMKernelType_BASIC};
+ return values;
+}
+
+inline const char *const *EnumNamesLSTMKernelType()
+{
+ static const char *const names[] = {"FULL", "BASIC", nullptr};
+ return names;
+}
+
+inline const char *EnumNameLSTMKernelType(LSTMKernelType e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesLSTMKernelType()[index];
+}
+
+enum CombinerType
+{
+ CombinerType_SUM = 0,
+ CombinerType_MEAN = 1,
+ CombinerType_SQRTN = 2,
+ CombinerType_MIN = CombinerType_SUM,
+ CombinerType_MAX = CombinerType_SQRTN
+};
+
+inline const CombinerType (&EnumValuesCombinerType())[3]
+{
+ static const CombinerType values[] = {CombinerType_SUM, CombinerType_MEAN, CombinerType_SQRTN};
+ return values;
+}
+
+inline const char *const *EnumNamesCombinerType()
+{
+ static const char *const names[] = {"SUM", "MEAN", "SQRTN", nullptr};
+ return names;
+}
+
+inline const char *EnumNameCombinerType(CombinerType e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesCombinerType()[index];
+}
+
+enum MirrorPadMode
+{
+ MirrorPadMode_REFLECT = 0,
+ MirrorPadMode_SYMMETRIC = 1,
+ MirrorPadMode_MIN = MirrorPadMode_REFLECT,
+ MirrorPadMode_MAX = MirrorPadMode_SYMMETRIC
+};
+
+inline const MirrorPadMode (&EnumValuesMirrorPadMode())[2]
+{
+ static const MirrorPadMode values[] = {MirrorPadMode_REFLECT, MirrorPadMode_SYMMETRIC};
+ return values;
+}
+
+inline const char *const *EnumNamesMirrorPadMode()
+{
+ static const char *const names[] = {"REFLECT", "SYMMETRIC", nullptr};
+ return names;
+}
+
+inline const char *EnumNameMirrorPadMode(MirrorPadMode e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesMirrorPadMode()[index];
+}
+
+enum CustomOptionsFormat
+{
+ CustomOptionsFormat_FLEXBUFFERS = 0,
+ CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS,
+ CustomOptionsFormat_MAX = CustomOptionsFormat_FLEXBUFFERS
+};
+
+inline const CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1]
+{
+ static const CustomOptionsFormat values[] = {CustomOptionsFormat_FLEXBUFFERS};
+ return values;
+}
+
+inline const char *const *EnumNamesCustomOptionsFormat()
+{
+ static const char *const names[] = {"FLEXBUFFERS", nullptr};
+ return names;
+}
+
+inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesCustomOptionsFormat()[index];
+}
+
+enum DataFormat
+{
+ DataFormat_CHANNELS_LAST = 0,
+ DataFormat_CHANNELS_FIRST = 1,
+ DataFormat_MIN = DataFormat_CHANNELS_LAST,
+ DataFormat_MAX = DataFormat_CHANNELS_FIRST
+};
+
+inline const DataFormat (&EnumValuesDataFormat())[2]
+{
+ static const DataFormat values[] = {DataFormat_CHANNELS_LAST, DataFormat_CHANNELS_FIRST};
+ return values;
+}
+
+inline const char *const *EnumNamesDataFormat()
+{
+ static const char *const names[] = {"CHANNELS_LAST", "CHANNELS_FIRST", nullptr};
+ return names;
+}
+
+inline const char *EnumNameDataFormat(DataFormat e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesDataFormat()[index];
+}
+
+struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_CUSTOM = 4
+ };
+ const flatbuffers::Vector<uint8_t> *custom() const
+ {
+ return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CUSTOM) &&
+ verifier.VerifyVector(custom()) && verifier.EndTable();
+ }
+};
+
+struct CustomQuantizationBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_custom(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom)
+ {
+ fbb_.AddOffset(CustomQuantization::VT_CUSTOM, custom);
+ }
+ explicit CustomQuantizationBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ CustomQuantizationBuilder &operator=(const CustomQuantizationBuilder &);
+ flatbuffers::Offset<CustomQuantization> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<CustomQuantization>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<CustomQuantization>
+CreateCustomQuantization(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom = 0)
+{
+ CustomQuantizationBuilder builder_(_fbb);
+ builder_.add_custom(custom);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<CustomQuantization>
+CreateCustomQuantizationDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<uint8_t> *custom = nullptr)
+{
+ return circle::CreateCustomQuantization(_fbb, custom ? _fbb.CreateVector<uint8_t>(*custom) : 0);
+}
+
+struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_MIN = 4,
+ VT_MAX = 6,
+ VT_SCALE = 8,
+ VT_ZERO_POINT = 10,
+ VT_DETAILS_TYPE = 12,
+ VT_DETAILS = 14,
+ VT_QUANTIZED_DIMENSION = 16
+ };
+ const flatbuffers::Vector<float> *min() const
+ {
+ return GetPointer<const flatbuffers::Vector<float> *>(VT_MIN);
+ }
+ const flatbuffers::Vector<float> *max() const
+ {
+ return GetPointer<const flatbuffers::Vector<float> *>(VT_MAX);
+ }
+ const flatbuffers::Vector<float> *scale() const
+ {
+ return GetPointer<const flatbuffers::Vector<float> *>(VT_SCALE);
+ }
+ const flatbuffers::Vector<int64_t> *zero_point() const
+ {
+ return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_ZERO_POINT);
+ }
+ QuantizationDetails details_type() const
+ {
+ return static_cast<QuantizationDetails>(GetField<uint8_t>(VT_DETAILS_TYPE, 0));
+ }
+ const void *details() const { return GetPointer<const void *>(VT_DETAILS); }
+ template <typename T> const T *details_as() const;
+ const CustomQuantization *details_as_CustomQuantization() const
+ {
+ return details_type() == QuantizationDetails_CustomQuantization
+ ? static_cast<const CustomQuantization *>(details())
+ : nullptr;
+ }
+ int32_t quantized_dimension() const { return GetField<int32_t>(VT_QUANTIZED_DIMENSION, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_MIN) &&
+ verifier.VerifyVector(min()) && VerifyOffset(verifier, VT_MAX) &&
+ verifier.VerifyVector(max()) && VerifyOffset(verifier, VT_SCALE) &&
+ verifier.VerifyVector(scale()) && VerifyOffset(verifier, VT_ZERO_POINT) &&
+ verifier.VerifyVector(zero_point()) && VerifyField<uint8_t>(verifier, VT_DETAILS_TYPE) &&
+ VerifyOffset(verifier, VT_DETAILS) &&
+ VerifyQuantizationDetails(verifier, details(), details_type()) &&
+ VerifyField<int32_t>(verifier, VT_QUANTIZED_DIMENSION) && verifier.EndTable();
+ }
+};
+
+template <>
+inline const CustomQuantization *QuantizationParameters::details_as<CustomQuantization>() const
+{
+ return details_as_CustomQuantization();
+}
+
+struct QuantizationParametersBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_min(flatbuffers::Offset<flatbuffers::Vector<float>> min)
+ {
+ fbb_.AddOffset(QuantizationParameters::VT_MIN, min);
+ }
+ void add_max(flatbuffers::Offset<flatbuffers::Vector<float>> max)
+ {
+ fbb_.AddOffset(QuantizationParameters::VT_MAX, max);
+ }
+ void add_scale(flatbuffers::Offset<flatbuffers::Vector<float>> scale)
+ {
+ fbb_.AddOffset(QuantizationParameters::VT_SCALE, scale);
+ }
+ void add_zero_point(flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point)
+ {
+ fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point);
+ }
+ void add_details_type(QuantizationDetails details_type)
+ {
+ fbb_.AddElement<uint8_t>(QuantizationParameters::VT_DETAILS_TYPE,
+ static_cast<uint8_t>(details_type), 0);
+ }
+ void add_details(flatbuffers::Offset<void> details)
+ {
+ fbb_.AddOffset(QuantizationParameters::VT_DETAILS, details);
+ }
+ void add_quantized_dimension(int32_t quantized_dimension)
+ {
+ fbb_.AddElement<int32_t>(QuantizationParameters::VT_QUANTIZED_DIMENSION, quantized_dimension,
+ 0);
+ }
+ explicit QuantizationParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ QuantizationParametersBuilder &operator=(const QuantizationParametersBuilder &);
+ flatbuffers::Offset<QuantizationParameters> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<QuantizationParameters>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<QuantizationParameters>
+CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
+ flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
+ flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
+ QuantizationDetails details_type = QuantizationDetails_NONE,
+ flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+{
+ QuantizationParametersBuilder builder_(_fbb);
+ builder_.add_quantized_dimension(quantized_dimension);
+ builder_.add_details(details);
+ builder_.add_zero_point(zero_point);
+ builder_.add_scale(scale);
+ builder_.add_max(max);
+ builder_.add_min(min);
+ builder_.add_details_type(details_type);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect(
+ flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
+ const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
+ const std::vector<int64_t> *zero_point = nullptr,
+ QuantizationDetails details_type = QuantizationDetails_NONE,
+ flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+{
+ return circle::CreateQuantizationParameters(
+ _fbb, min ? _fbb.CreateVector<float>(*min) : 0, max ? _fbb.CreateVector<float>(*max) : 0,
+ scale ? _fbb.CreateVector<float>(*scale) : 0,
+ zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0, details_type, details,
+ quantized_dimension);
+}
+
+struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_VALUES = 4
+ };
+ const flatbuffers::Vector<int32_t> *values() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_VALUES);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_VALUES) &&
+ verifier.VerifyVector(values()) && verifier.EndTable();
+ }
+};
+
+struct Int32VectorBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_values(flatbuffers::Offset<flatbuffers::Vector<int32_t>> values)
+ {
+ fbb_.AddOffset(Int32Vector::VT_VALUES, values);
+ }
+ explicit Int32VectorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ Int32VectorBuilder &operator=(const Int32VectorBuilder &);
+ flatbuffers::Offset<Int32Vector> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Int32Vector>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Int32Vector>
+CreateInt32Vector(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> values = 0)
+{
+ Int32VectorBuilder builder_(_fbb);
+ builder_.add_values(values);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Int32Vector>
+CreateInt32VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<int32_t> *values = nullptr)
+{
+ return circle::CreateInt32Vector(_fbb, values ? _fbb.CreateVector<int32_t>(*values) : 0);
+}
+
+struct Uint16Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_VALUES = 4
+ };
+ const flatbuffers::Vector<uint16_t> *values() const
+ {
+ return GetPointer<const flatbuffers::Vector<uint16_t> *>(VT_VALUES);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_VALUES) &&
+ verifier.VerifyVector(values()) && verifier.EndTable();
+ }
+};
+
+struct Uint16VectorBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_values(flatbuffers::Offset<flatbuffers::Vector<uint16_t>> values)
+ {
+ fbb_.AddOffset(Uint16Vector::VT_VALUES, values);
+ }
+ explicit Uint16VectorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ Uint16VectorBuilder &operator=(const Uint16VectorBuilder &);
+ flatbuffers::Offset<Uint16Vector> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Uint16Vector>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Uint16Vector>
+CreateUint16Vector(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<uint16_t>> values = 0)
+{
+ Uint16VectorBuilder builder_(_fbb);
+ builder_.add_values(values);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Uint16Vector>
+CreateUint16VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<uint16_t> *values = nullptr)
+{
+ return circle::CreateUint16Vector(_fbb, values ? _fbb.CreateVector<uint16_t>(*values) : 0);
+}
+
+struct Uint8Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_VALUES = 4
+ };
+ const flatbuffers::Vector<uint8_t> *values() const
+ {
+ return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_VALUES);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_VALUES) &&
+ verifier.VerifyVector(values()) && verifier.EndTable();
+ }
+};
+
+struct Uint8VectorBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_values(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> values)
+ {
+ fbb_.AddOffset(Uint8Vector::VT_VALUES, values);
+ }
+ explicit Uint8VectorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ Uint8VectorBuilder &operator=(const Uint8VectorBuilder &);
+ flatbuffers::Offset<Uint8Vector> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Uint8Vector>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Uint8Vector>
+CreateUint8Vector(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> values = 0)
+{
+ Uint8VectorBuilder builder_(_fbb);
+ builder_.add_values(values);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Uint8Vector>
+CreateUint8VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<uint8_t> *values = nullptr)
+{
+ return circle::CreateUint8Vector(_fbb, values ? _fbb.CreateVector<uint8_t>(*values) : 0);
+}
+
+struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FORMAT = 4,
+ VT_DENSE_SIZE = 6,
+ VT_ARRAY_SEGMENTS_TYPE = 8,
+ VT_ARRAY_SEGMENTS = 10,
+ VT_ARRAY_INDICES_TYPE = 12,
+ VT_ARRAY_INDICES = 14
+ };
+ DimensionType format() const
+ {
+ return static_cast<DimensionType>(GetField<int8_t>(VT_FORMAT, 0));
+ }
+ int32_t dense_size() const { return GetField<int32_t>(VT_DENSE_SIZE, 0); }
+ SparseIndexVector array_segments_type() const
+ {
+ return static_cast<SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_SEGMENTS_TYPE, 0));
+ }
+ const void *array_segments() const { return GetPointer<const void *>(VT_ARRAY_SEGMENTS); }
+ template <typename T> const T *array_segments_as() const;
+ const Int32Vector *array_segments_as_Int32Vector() const
+ {
+ return array_segments_type() == SparseIndexVector_Int32Vector
+ ? static_cast<const Int32Vector *>(array_segments())
+ : nullptr;
+ }
+ const Uint16Vector *array_segments_as_Uint16Vector() const
+ {
+ return array_segments_type() == SparseIndexVector_Uint16Vector
+ ? static_cast<const Uint16Vector *>(array_segments())
+ : nullptr;
+ }
+ const Uint8Vector *array_segments_as_Uint8Vector() const
+ {
+ return array_segments_type() == SparseIndexVector_Uint8Vector
+ ? static_cast<const Uint8Vector *>(array_segments())
+ : nullptr;
+ }
+ SparseIndexVector array_indices_type() const
+ {
+ return static_cast<SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_INDICES_TYPE, 0));
+ }
+ const void *array_indices() const { return GetPointer<const void *>(VT_ARRAY_INDICES); }
+ template <typename T> const T *array_indices_as() const;
+ const Int32Vector *array_indices_as_Int32Vector() const
+ {
+ return array_indices_type() == SparseIndexVector_Int32Vector
+ ? static_cast<const Int32Vector *>(array_indices())
+ : nullptr;
+ }
+ const Uint16Vector *array_indices_as_Uint16Vector() const
+ {
+ return array_indices_type() == SparseIndexVector_Uint16Vector
+ ? static_cast<const Uint16Vector *>(array_indices())
+ : nullptr;
+ }
+ const Uint8Vector *array_indices_as_Uint8Vector() const
+ {
+ return array_indices_type() == SparseIndexVector_Uint8Vector
+ ? static_cast<const Uint8Vector *>(array_indices())
+ : nullptr;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_FORMAT) &&
+ VerifyField<int32_t>(verifier, VT_DENSE_SIZE) &&
+ VerifyField<uint8_t>(verifier, VT_ARRAY_SEGMENTS_TYPE) &&
+ VerifyOffset(verifier, VT_ARRAY_SEGMENTS) &&
+ VerifySparseIndexVector(verifier, array_segments(), array_segments_type()) &&
+ VerifyField<uint8_t>(verifier, VT_ARRAY_INDICES_TYPE) &&
+ VerifyOffset(verifier, VT_ARRAY_INDICES) &&
+ VerifySparseIndexVector(verifier, array_indices(), array_indices_type()) &&
+ verifier.EndTable();
+ }
+};
+
+template <> inline const Int32Vector *DimensionMetadata::array_segments_as<Int32Vector>() const
+{
+ return array_segments_as_Int32Vector();
+}
+
+template <> inline const Uint16Vector *DimensionMetadata::array_segments_as<Uint16Vector>() const
+{
+ return array_segments_as_Uint16Vector();
+}
+
+template <> inline const Uint8Vector *DimensionMetadata::array_segments_as<Uint8Vector>() const
+{
+ return array_segments_as_Uint8Vector();
+}
+
+template <> inline const Int32Vector *DimensionMetadata::array_indices_as<Int32Vector>() const
+{
+ return array_indices_as_Int32Vector();
+}
+
+template <> inline const Uint16Vector *DimensionMetadata::array_indices_as<Uint16Vector>() const
+{
+ return array_indices_as_Uint16Vector();
+}
+
+template <> inline const Uint8Vector *DimensionMetadata::array_indices_as<Uint8Vector>() const
+{
+ return array_indices_as_Uint8Vector();
+}
+
+struct DimensionMetadataBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_format(DimensionType format)
+ {
+ fbb_.AddElement<int8_t>(DimensionMetadata::VT_FORMAT, static_cast<int8_t>(format), 0);
+ }
+ void add_dense_size(int32_t dense_size)
+ {
+ fbb_.AddElement<int32_t>(DimensionMetadata::VT_DENSE_SIZE, dense_size, 0);
+ }
+ void add_array_segments_type(SparseIndexVector array_segments_type)
+ {
+ fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_SEGMENTS_TYPE,
+ static_cast<uint8_t>(array_segments_type), 0);
+ }
+ void add_array_segments(flatbuffers::Offset<void> array_segments)
+ {
+ fbb_.AddOffset(DimensionMetadata::VT_ARRAY_SEGMENTS, array_segments);
+ }
+ void add_array_indices_type(SparseIndexVector array_indices_type)
+ {
+ fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_INDICES_TYPE,
+ static_cast<uint8_t>(array_indices_type), 0);
+ }
+ void add_array_indices(flatbuffers::Offset<void> array_indices)
+ {
+ fbb_.AddOffset(DimensionMetadata::VT_ARRAY_INDICES, array_indices);
+ }
+ explicit DimensionMetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ DimensionMetadataBuilder &operator=(const DimensionMetadataBuilder &);
+ flatbuffers::Offset<DimensionMetadata> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<DimensionMetadata>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<DimensionMetadata>
+CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb,
+ DimensionType format = DimensionType_DENSE, int32_t dense_size = 0,
+ SparseIndexVector array_segments_type = SparseIndexVector_NONE,
+ flatbuffers::Offset<void> array_segments = 0,
+ SparseIndexVector array_indices_type = SparseIndexVector_NONE,
+ flatbuffers::Offset<void> array_indices = 0)
+{
+ DimensionMetadataBuilder builder_(_fbb);
+ builder_.add_array_indices(array_indices);
+ builder_.add_array_segments(array_segments);
+ builder_.add_dense_size(dense_size);
+ builder_.add_array_indices_type(array_indices_type);
+ builder_.add_array_segments_type(array_segments_type);
+ builder_.add_format(format);
+ return builder_.Finish();
+}
+
+struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_TRAVERSAL_ORDER = 4,
+ VT_BLOCK_MAP = 6,
+ VT_DIM_METADATA = 8
+ };
+ const flatbuffers::Vector<int32_t> *traversal_order() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_TRAVERSAL_ORDER);
+ }
+ const flatbuffers::Vector<int32_t> *block_map() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_BLOCK_MAP);
+ }
+ const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *>(
+ VT_DIM_METADATA);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_TRAVERSAL_ORDER) &&
+ verifier.VerifyVector(traversal_order()) && VerifyOffset(verifier, VT_BLOCK_MAP) &&
+ verifier.VerifyVector(block_map()) && VerifyOffset(verifier, VT_DIM_METADATA) &&
+ verifier.VerifyVector(dim_metadata()) && verifier.VerifyVectorOfTables(dim_metadata()) &&
+ verifier.EndTable();
+ }
+};
+
+struct SparsityParametersBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_traversal_order(flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order)
+ {
+ fbb_.AddOffset(SparsityParameters::VT_TRAVERSAL_ORDER, traversal_order);
+ }
+ void add_block_map(flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map)
+ {
+ fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map);
+ }
+ void add_dim_metadata(
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata)
+ {
+ fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata);
+ }
+ explicit SparsityParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SparsityParametersBuilder &operator=(const SparsityParametersBuilder &);
+ flatbuffers::Offset<SparsityParameters> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SparsityParameters>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata =
+ 0)
+{
+ SparsityParametersBuilder builder_(_fbb);
+ builder_.add_dim_metadata(dim_metadata);
+ builder_.add_block_map(block_map);
+ builder_.add_traversal_order(traversal_order);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SparsityParameters> CreateSparsityParametersDirect(
+ flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
+ const std::vector<int32_t> *block_map = nullptr,
+ const std::vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata = nullptr)
+{
+ return circle::CreateSparsityParameters(
+ _fbb, traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0,
+ block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0,
+ dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<DimensionMetadata>>(*dim_metadata) : 0);
+}
+
+struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_SHAPE = 4,
+ VT_TYPE = 6,
+ VT_BUFFER = 8,
+ VT_NAME = 10,
+ VT_QUANTIZATION = 12,
+ VT_IS_VARIABLE = 14,
+ VT_SPARSITY = 16,
+ VT_SHAPE_SIGNATURE = 18
+ };
+ const flatbuffers::Vector<int32_t> *shape() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE);
+ }
+ TensorType type() const { return static_cast<TensorType>(GetField<int8_t>(VT_TYPE, 0)); }
+ uint32_t buffer() const { return GetField<uint32_t>(VT_BUFFER, 0); }
+ const flatbuffers::String *name() const
+ {
+ return GetPointer<const flatbuffers::String *>(VT_NAME);
+ }
+ const QuantizationParameters *quantization() const
+ {
+ return GetPointer<const QuantizationParameters *>(VT_QUANTIZATION);
+ }
+ bool is_variable() const { return GetField<uint8_t>(VT_IS_VARIABLE, 0) != 0; }
+ const SparsityParameters *sparsity() const
+ {
+ return GetPointer<const SparsityParameters *>(VT_SPARSITY);
+ }
+ const flatbuffers::Vector<int32_t> *shape_signature() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE_SIGNATURE);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SHAPE) &&
+ verifier.VerifyVector(shape()) && VerifyField<int8_t>(verifier, VT_TYPE) &&
+ VerifyField<uint32_t>(verifier, VT_BUFFER) && VerifyOffset(verifier, VT_NAME) &&
+ verifier.VerifyString(name()) && VerifyOffset(verifier, VT_QUANTIZATION) &&
+ verifier.VerifyTable(quantization()) && VerifyField<uint8_t>(verifier, VT_IS_VARIABLE) &&
+ VerifyOffset(verifier, VT_SPARSITY) && verifier.VerifyTable(sparsity()) &&
+ VerifyOffset(verifier, VT_SHAPE_SIGNATURE) && verifier.VerifyVector(shape_signature()) &&
+ verifier.EndTable();
+ }
+};
+
+struct TensorBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape)
+ {
+ fbb_.AddOffset(Tensor::VT_SHAPE, shape);
+ }
+ void add_type(TensorType type)
+ {
+ fbb_.AddElement<int8_t>(Tensor::VT_TYPE, static_cast<int8_t>(type), 0);
+ }
+ void add_buffer(uint32_t buffer) { fbb_.AddElement<uint32_t>(Tensor::VT_BUFFER, buffer, 0); }
+ void add_name(flatbuffers::Offset<flatbuffers::String> name)
+ {
+ fbb_.AddOffset(Tensor::VT_NAME, name);
+ }
+ void add_quantization(flatbuffers::Offset<QuantizationParameters> quantization)
+ {
+ fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization);
+ }
+ void add_is_variable(bool is_variable)
+ {
+ fbb_.AddElement<uint8_t>(Tensor::VT_IS_VARIABLE, static_cast<uint8_t>(is_variable), 0);
+ }
+ void add_sparsity(flatbuffers::Offset<SparsityParameters> sparsity)
+ {
+ fbb_.AddOffset(Tensor::VT_SPARSITY, sparsity);
+ }
+ void add_shape_signature(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature)
+ {
+ fbb_.AddOffset(Tensor::VT_SHAPE_SIGNATURE, shape_signature);
+ }
+ explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ TensorBuilder &operator=(const TensorBuilder &);
+ flatbuffers::Offset<Tensor> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Tensor>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Tensor>
+CreateTensor(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0,
+ TensorType type = TensorType_FLOAT32, uint32_t buffer = 0,
+ flatbuffers::Offset<flatbuffers::String> name = 0,
+ flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false,
+ flatbuffers::Offset<SparsityParameters> sparsity = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature = 0)
+{
+ TensorBuilder builder_(_fbb);
+ builder_.add_shape_signature(shape_signature);
+ builder_.add_sparsity(sparsity);
+ builder_.add_quantization(quantization);
+ builder_.add_name(name);
+ builder_.add_buffer(buffer);
+ builder_.add_shape(shape);
+ builder_.add_is_variable(is_variable);
+ builder_.add_type(type);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Tensor> CreateTensorDirect(
+ flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
+ TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr,
+ flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false,
+ flatbuffers::Offset<SparsityParameters> sparsity = 0,
+ const std::vector<int32_t> *shape_signature = nullptr)
+{
+ return circle::CreateTensor(_fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, buffer,
+ name ? _fbb.CreateString(name) : 0, quantization, is_variable,
+ sparsity,
+ shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0);
+}
+
+struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_PADDING = 4,
+ VT_STRIDE_W = 6,
+ VT_STRIDE_H = 8,
+ VT_FUSED_ACTIVATION_FUNCTION = 10,
+ VT_DILATION_W_FACTOR = 12,
+ VT_DILATION_H_FACTOR = 14
+ };
+ Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+ int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+ int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
+ int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
+ VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable();
+ }
+};
+
+struct Conv2DOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_padding(Padding padding)
+ {
+ fbb_.AddElement<int8_t>(Conv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+ }
+ void add_stride_w(int32_t stride_w)
+ {
+ fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_W, stride_w, 0);
+ }
+ void add_stride_h(int32_t stride_h)
+ {
+ fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_H, stride_h, 0);
+ }
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_dilation_w_factor(int32_t dilation_w_factor)
+ {
+ fbb_.AddElement<int32_t>(Conv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
+ }
+ void add_dilation_h_factor(int32_t dilation_h_factor)
+ {
+ fbb_.AddElement<int32_t>(Conv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
+ }
+ explicit Conv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ Conv2DOptionsBuilder &operator=(const Conv2DOptionsBuilder &);
+ flatbuffers::Offset<Conv2DOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Conv2DOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Conv2DOptions>
+CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
+ int32_t stride_w = 0, int32_t stride_h = 0,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+{
+ Conv2DOptionsBuilder builder_(_fbb);
+ builder_.add_dilation_h_factor(dilation_h_factor);
+ builder_.add_dilation_w_factor(dilation_w_factor);
+ builder_.add_stride_h(stride_h);
+ builder_.add_stride_w(stride_w);
+ builder_.add_fused_activation_function(fused_activation_function);
+ builder_.add_padding(padding);
+ return builder_.Finish();
+}
+
+struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_PADDING = 4,
+ VT_STRIDE_W = 6,
+ VT_STRIDE_H = 8,
+ VT_FILTER_WIDTH = 10,
+ VT_FILTER_HEIGHT = 12,
+ VT_FUSED_ACTIVATION_FUNCTION = 14
+ };
+ Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+ int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+ int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+ int32_t filter_width() const { return GetField<int32_t>(VT_FILTER_WIDTH, 0); }
+ int32_t filter_height() const { return GetField<int32_t>(VT_FILTER_HEIGHT, 0); }
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+ VerifyField<int32_t>(verifier, VT_FILTER_WIDTH) &&
+ VerifyField<int32_t>(verifier, VT_FILTER_HEIGHT) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+ }
+};
+
+struct Pool2DOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_padding(Padding padding)
+ {
+ fbb_.AddElement<int8_t>(Pool2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+ }
+ void add_stride_w(int32_t stride_w)
+ {
+ fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_W, stride_w, 0);
+ }
+ void add_stride_h(int32_t stride_h)
+ {
+ fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_H, stride_h, 0);
+ }
+ void add_filter_width(int32_t filter_width)
+ {
+ fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_WIDTH, filter_width, 0);
+ }
+ void add_filter_height(int32_t filter_height)
+ {
+ fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0);
+ }
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ explicit Pool2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ Pool2DOptionsBuilder &operator=(const Pool2DOptionsBuilder &);
+ flatbuffers::Offset<Pool2DOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Pool2DOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Pool2DOptions>
+CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
+ int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0,
+ int32_t filter_height = 0,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+{
+ Pool2DOptionsBuilder builder_(_fbb);
+ builder_.add_filter_height(filter_height);
+ builder_.add_filter_width(filter_width);
+ builder_.add_stride_h(stride_h);
+ builder_.add_stride_w(stride_w);
+ builder_.add_fused_activation_function(fused_activation_function);
+ builder_.add_padding(padding);
+ return builder_.Finish();
+}
+
+struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_PADDING = 4,
+ VT_STRIDE_W = 6,
+ VT_STRIDE_H = 8,
+ VT_DEPTH_MULTIPLIER = 10,
+ VT_FUSED_ACTIVATION_FUNCTION = 12,
+ VT_DILATION_W_FACTOR = 14,
+ VT_DILATION_H_FACTOR = 16
+ };
+ Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+ int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+ int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+ int32_t depth_multiplier() const { return GetField<int32_t>(VT_DEPTH_MULTIPLIER, 0); }
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
+ int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+ VerifyField<int32_t>(verifier, VT_DEPTH_MULTIPLIER) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
+ VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable();
+ }
+};
+
+struct DepthwiseConv2DOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_padding(Padding padding)
+ {
+ fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+ }
+ void add_stride_w(int32_t stride_w)
+ {
+ fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_W, stride_w, 0);
+ }
+ void add_stride_h(int32_t stride_h)
+ {
+ fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_H, stride_h, 0);
+ }
+ void add_depth_multiplier(int32_t depth_multiplier)
+ {
+ fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, depth_multiplier, 0);
+ }
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_dilation_w_factor(int32_t dilation_w_factor)
+ {
+ fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
+ }
+ void add_dilation_h_factor(int32_t dilation_h_factor)
+ {
+ fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
+ }
+ explicit DepthwiseConv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ DepthwiseConv2DOptionsBuilder &operator=(const DepthwiseConv2DOptionsBuilder &);
+ flatbuffers::Offset<DepthwiseConv2DOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<DepthwiseConv2DOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
+ flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0,
+ int32_t stride_h = 0, int32_t depth_multiplier = 0,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+{
+ DepthwiseConv2DOptionsBuilder builder_(_fbb);
+ builder_.add_dilation_h_factor(dilation_h_factor);
+ builder_.add_dilation_w_factor(dilation_w_factor);
+ builder_.add_depth_multiplier(depth_multiplier);
+ builder_.add_stride_h(stride_h);
+ builder_.add_stride_w(stride_w);
+ builder_.add_fused_activation_function(fused_activation_function);
+ builder_.add_padding(padding);
+ return builder_.Finish();
+}
+
+struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_NUM_CHANNELS = 4,
+ VT_NUM_COLUMNS_PER_CHANNEL = 6,
+ VT_EMBEDDING_DIM_PER_CHANNEL = 8
+ };
+ int32_t num_channels() const { return GetField<int32_t>(VT_NUM_CHANNELS, 0); }
+ const flatbuffers::Vector<int32_t> *num_columns_per_channel() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NUM_COLUMNS_PER_CHANNEL);
+ }
+ const flatbuffers::Vector<int32_t> *embedding_dim_per_channel() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_EMBEDDING_DIM_PER_CHANNEL);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_CHANNELS) &&
+ VerifyOffset(verifier, VT_NUM_COLUMNS_PER_CHANNEL) &&
+ verifier.VerifyVector(num_columns_per_channel()) &&
+ VerifyOffset(verifier, VT_EMBEDDING_DIM_PER_CHANNEL) &&
+ verifier.VerifyVector(embedding_dim_per_channel()) && verifier.EndTable();
+ }
+};
+
+struct ConcatEmbeddingsOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_num_channels(int32_t num_channels)
+ {
+ fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0);
+ }
+ void add_num_columns_per_channel(
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
+ {
+ fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel);
+ }
+ void add_embedding_dim_per_channel(
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
+ {
+ fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL,
+ embedding_dim_per_channel);
+ }
+ explicit ConcatEmbeddingsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ConcatEmbeddingsOptionsBuilder &operator=(const ConcatEmbeddingsOptionsBuilder &);
+ flatbuffers::Offset<ConcatEmbeddingsOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ConcatEmbeddingsOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(
+ flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
+{
+ ConcatEmbeddingsOptionsBuilder builder_(_fbb);
+ builder_.add_embedding_dim_per_channel(embedding_dim_per_channel);
+ builder_.add_num_columns_per_channel(num_columns_per_channel);
+ builder_.add_num_channels(num_channels);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<ConcatEmbeddingsOptions>
+CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
+ const std::vector<int32_t> *num_columns_per_channel = nullptr,
+ const std::vector<int32_t> *embedding_dim_per_channel = nullptr)
+{
+ return circle::CreateConcatEmbeddingsOptions(
+ _fbb, num_channels,
+ num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0,
+ embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0);
+}
+
+struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_TYPE = 4
+ };
+ LSHProjectionType type() const
+ {
+ return static_cast<LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_TYPE) &&
+ verifier.EndTable();
+ }
+};
+
+struct LSHProjectionOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_type(LSHProjectionType type)
+ {
+ fbb_.AddElement<int8_t>(LSHProjectionOptions::VT_TYPE, static_cast<int8_t>(type), 0);
+ }
+ explicit LSHProjectionOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LSHProjectionOptionsBuilder &operator=(const LSHProjectionOptionsBuilder &);
+ flatbuffers::Offset<LSHProjectionOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LSHProjectionOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LSHProjectionOptions>
+CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ LSHProjectionType type = LSHProjectionType_UNKNOWN)
+{
+ LSHProjectionOptionsBuilder builder_(_fbb);
+ builder_.add_type(type);
+ return builder_.Finish();
+}
+
+struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_RANK = 4,
+ VT_FUSED_ACTIVATION_FUNCTION = 6,
+ VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
+ };
+ int32_t rank() const { return GetField<int32_t>(VT_RANK, 0); }
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool asymmetric_quantize_inputs() const
+ {
+ return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_RANK) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+ }
+};
+
+struct SVDFOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_rank(int32_t rank) { fbb_.AddElement<int32_t>(SVDFOptions::VT_RANK, rank, 0); }
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ fbb_.AddElement<uint8_t>(SVDFOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+ static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+ }
+ explicit SVDFOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SVDFOptionsBuilder &operator=(const SVDFOptionsBuilder &);
+ flatbuffers::Offset<SVDFOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SVDFOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SVDFOptions>
+CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ bool asymmetric_quantize_inputs = false)
+{
+ SVDFOptionsBuilder builder_(_fbb);
+ builder_.add_rank(rank);
+ builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4,
+ VT_ASYMMETRIC_QUANTIZE_INPUTS = 6
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool asymmetric_quantize_inputs() const
+ {
+ return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+ }
+};
+
+struct RNNOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ fbb_.AddElement<uint8_t>(RNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+ static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+ }
+ explicit RNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ RNNOptionsBuilder &operator=(const RNNOptionsBuilder &);
+ flatbuffers::Offset<RNNOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<RNNOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<RNNOptions>
+CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ bool asymmetric_quantize_inputs = false)
+{
+ RNNOptionsBuilder builder_(_fbb);
+ builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_TIME_MAJOR = 4,
+ VT_FUSED_ACTIVATION_FUNCTION = 6,
+ VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
+ };
+ bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool asymmetric_quantize_inputs() const
+ {
+ return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+ }
+};
+
+struct SequenceRNNOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_time_major(bool time_major)
+ {
+ fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major),
+ 0);
+ }
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+ static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+ }
+ explicit SequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SequenceRNNOptionsBuilder &operator=(const SequenceRNNOptionsBuilder &);
+ flatbuffers::Offset<SequenceRNNOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SequenceRNNOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
+ flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ bool asymmetric_quantize_inputs = false)
+{
+ SequenceRNNOptionsBuilder builder_(_fbb);
+ builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+ builder_.add_fused_activation_function(fused_activation_function);
+ builder_.add_time_major(time_major);
+ return builder_.Finish();
+}
+
+struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_TIME_MAJOR = 4,
+ VT_FUSED_ACTIVATION_FUNCTION = 6,
+ VT_MERGE_OUTPUTS = 8,
+ VT_ASYMMETRIC_QUANTIZE_INPUTS = 10
+ };
+ bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; }
+ bool asymmetric_quantize_inputs() const
+ {
+ return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) &&
+ VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+ }
+};
+
+struct BidirectionalSequenceRNNOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_time_major(bool time_major)
+ {
+ fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_TIME_MAJOR,
+ static_cast<uint8_t>(time_major), 0);
+ }
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_merge_outputs(bool merge_outputs)
+ {
+ fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_MERGE_OUTPUTS,
+ static_cast<uint8_t>(merge_outputs), 0);
+ }
+ void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+ static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+ }
+ explicit BidirectionalSequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ BidirectionalSequenceRNNOptionsBuilder &operator=(const BidirectionalSequenceRNNOptionsBuilder &);
+ flatbuffers::Offset<BidirectionalSequenceRNNOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<BidirectionalSequenceRNNOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(
+ flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
+{
+ BidirectionalSequenceRNNOptionsBuilder builder_(_fbb);
+ builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+ builder_.add_merge_outputs(merge_outputs);
+ builder_.add_fused_activation_function(fused_activation_function);
+ builder_.add_time_major(time_major);
+ return builder_.Finish();
+}
+
+struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4,
+ VT_WEIGHTS_FORMAT = 6,
+ VT_KEEP_NUM_DIMS = 8,
+ VT_ASYMMETRIC_QUANTIZE_INPUTS = 10
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ FullyConnectedOptionsWeightsFormat weights_format() const
+ {
+ return static_cast<FullyConnectedOptionsWeightsFormat>(GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
+ }
+ bool keep_num_dims() const { return GetField<uint8_t>(VT_KEEP_NUM_DIMS, 0) != 0; }
+ bool asymmetric_quantize_inputs() const
+ {
+ return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<int8_t>(verifier, VT_WEIGHTS_FORMAT) &&
+ VerifyField<uint8_t>(verifier, VT_KEEP_NUM_DIMS) &&
+ VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+ }
+};
+
+struct FullyConnectedOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_weights_format(FullyConnectedOptionsWeightsFormat weights_format)
+ {
+ fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_WEIGHTS_FORMAT,
+ static_cast<int8_t>(weights_format), 0);
+ }
+ void add_keep_num_dims(bool keep_num_dims)
+ {
+ fbb_.AddElement<uint8_t>(FullyConnectedOptions::VT_KEEP_NUM_DIMS,
+ static_cast<uint8_t>(keep_num_dims), 0);
+ }
+ void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ fbb_.AddElement<uint8_t>(FullyConnectedOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+ static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+ }
+ explicit FullyConnectedOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ FullyConnectedOptionsBuilder &operator=(const FullyConnectedOptionsBuilder &);
+ flatbuffers::Offset<FullyConnectedOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<FullyConnectedOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT,
+ bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
+{
+ FullyConnectedOptionsBuilder builder_(_fbb);
+ builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+ builder_.add_keep_num_dims(keep_num_dims);
+ builder_.add_weights_format(weights_format);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_BETA = 4
+ };
+ float beta() const { return GetField<float>(VT_BETA, 0.0f); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_BETA) &&
+ verifier.EndTable();
+ }
+};
+
+struct SoftmaxOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_beta(float beta) { fbb_.AddElement<float>(SoftmaxOptions::VT_BETA, beta, 0.0f); }
+ explicit SoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SoftmaxOptionsBuilder &operator=(const SoftmaxOptionsBuilder &);
+ flatbuffers::Offset<SoftmaxOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SoftmaxOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SoftmaxOptions>
+CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, float beta = 0.0f)
+{
+ SoftmaxOptionsBuilder builder_(_fbb);
+ builder_.add_beta(beta);
+ return builder_.Finish();
+}
+
+struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_AXIS = 4,
+ VT_FUSED_ACTIVATION_FUNCTION = 6
+ };
+ int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+ }
+};
+
+struct ConcatenationOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(ConcatenationOptions::VT_AXIS, axis, 0); }
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ explicit ConcatenationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ConcatenationOptionsBuilder &operator=(const ConcatenationOptionsBuilder &);
+ flatbuffers::Offset<ConcatenationOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ConcatenationOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
+ flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+{
+ ConcatenationOptionsBuilder builder_(_fbb);
+ builder_.add_axis(axis);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+ }
+};
+
+struct AddOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ AddOptionsBuilder &operator=(const AddOptionsBuilder &);
+ flatbuffers::Offset<AddOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<AddOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<AddOptions>
+CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+{
+ AddOptionsBuilder builder_(_fbb);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+ }
+};
+
+struct MulOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(MulOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ explicit MulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ MulOptionsBuilder &operator=(const MulOptionsBuilder &);
+ flatbuffers::Offset<MulOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<MulOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<MulOptions>
+CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+{
+ MulOptionsBuilder builder_(_fbb);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+ }
+};
+
+struct L2NormOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ explicit L2NormOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ L2NormOptionsBuilder &operator=(const L2NormOptionsBuilder &);
+ flatbuffers::Offset<L2NormOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<L2NormOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<L2NormOptions>
+CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+{
+ L2NormOptionsBuilder builder_(_fbb);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_RADIUS = 4,
+ VT_BIAS = 6,
+ VT_ALPHA = 8,
+ VT_BETA = 10
+ };
+ int32_t radius() const { return GetField<int32_t>(VT_RADIUS, 0); }
+ float bias() const { return GetField<float>(VT_BIAS, 0.0f); }
+ float alpha() const { return GetField<float>(VT_ALPHA, 0.0f); }
+ float beta() const { return GetField<float>(VT_BETA, 0.0f); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_RADIUS) &&
+ VerifyField<float>(verifier, VT_BIAS) && VerifyField<float>(verifier, VT_ALPHA) &&
+ VerifyField<float>(verifier, VT_BETA) && verifier.EndTable();
+ }
+};
+
+struct LocalResponseNormalizationOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_radius(int32_t radius)
+ {
+ fbb_.AddElement<int32_t>(LocalResponseNormalizationOptions::VT_RADIUS, radius, 0);
+ }
+ void add_bias(float bias)
+ {
+ fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BIAS, bias, 0.0f);
+ }
+ void add_alpha(float alpha)
+ {
+ fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_ALPHA, alpha, 0.0f);
+ }
+ void add_beta(float beta)
+ {
+ fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f);
+ }
+ explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LocalResponseNormalizationOptionsBuilder &
+ operator=(const LocalResponseNormalizationOptionsBuilder &);
+ flatbuffers::Offset<LocalResponseNormalizationOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LocalResponseNormalizationOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LocalResponseNormalizationOptions>
+CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t radius = 0,
+ float bias = 0.0f, float alpha = 0.0f, float beta = 0.0f)
+{
+ LocalResponseNormalizationOptionsBuilder builder_(_fbb);
+ builder_.add_beta(beta);
+ builder_.add_alpha(alpha);
+ builder_.add_bias(bias);
+ builder_.add_radius(radius);
+ return builder_.Finish();
+}
+
+struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4,
+ VT_CELL_CLIP = 6,
+ VT_PROJ_CLIP = 8,
+ VT_KERNEL_TYPE = 10,
+ VT_ASYMMETRIC_QUANTIZE_INPUTS = 12
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
+ float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
+ LSTMKernelType kernel_type() const
+ {
+ return static_cast<LSTMKernelType>(GetField<int8_t>(VT_KERNEL_TYPE, 0));
+ }
+ bool asymmetric_quantize_inputs() const
+ {
+ return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<float>(verifier, VT_CELL_CLIP) &&
+ VerifyField<float>(verifier, VT_PROJ_CLIP) &&
+ VerifyField<int8_t>(verifier, VT_KERNEL_TYPE) &&
+ VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+ }
+};
+
+struct LSTMOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_cell_clip(float cell_clip)
+ {
+ fbb_.AddElement<float>(LSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
+ }
+ void add_proj_clip(float proj_clip)
+ {
+ fbb_.AddElement<float>(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
+ }
+ void add_kernel_type(LSTMKernelType kernel_type)
+ {
+ fbb_.AddElement<int8_t>(LSTMOptions::VT_KERNEL_TYPE, static_cast<int8_t>(kernel_type), 0);
+ }
+ void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ fbb_.AddElement<uint8_t>(LSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+ static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+ }
+ explicit LSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LSTMOptionsBuilder &operator=(const LSTMOptionsBuilder &);
+ flatbuffers::Offset<LSTMOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LSTMOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LSTMOptions>
+CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ float cell_clip = 0.0f, float proj_clip = 0.0f,
+ LSTMKernelType kernel_type = LSTMKernelType_FULL,
+ bool asymmetric_quantize_inputs = false)
+{
+ LSTMOptionsBuilder builder_(_fbb);
+ builder_.add_proj_clip(proj_clip);
+ builder_.add_cell_clip(cell_clip);
+ builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+ builder_.add_kernel_type(kernel_type);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4,
+ VT_CELL_CLIP = 6,
+ VT_PROJ_CLIP = 8,
+ VT_TIME_MAJOR = 10,
+ VT_ASYMMETRIC_QUANTIZE_INPUTS = 12
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
+ float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
+ bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
+ bool asymmetric_quantize_inputs() const
+ {
+ return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<float>(verifier, VT_CELL_CLIP) &&
+ VerifyField<float>(verifier, VT_PROJ_CLIP) &&
+ VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
+ VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+ }
+};
+
+struct UnidirectionalSequenceLSTMOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_cell_clip(float cell_clip)
+ {
+ fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
+ }
+ void add_proj_clip(float proj_clip)
+ {
+ fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
+ }
+ void add_time_major(bool time_major)
+ {
+ fbb_.AddElement<uint8_t>(UnidirectionalSequenceLSTMOptions::VT_TIME_MAJOR,
+ static_cast<uint8_t>(time_major), 0);
+ }
+ void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ fbb_.AddElement<uint8_t>(UnidirectionalSequenceLSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+ static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+ }
+ explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ UnidirectionalSequenceLSTMOptionsBuilder &
+ operator=(const UnidirectionalSequenceLSTMOptionsBuilder &);
+ flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>
+CreateUnidirectionalSequenceLSTMOptions(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
+ bool asymmetric_quantize_inputs = false)
+{
+ UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
+ builder_.add_proj_clip(proj_clip);
+ builder_.add_cell_clip(cell_clip);
+ builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+ builder_.add_time_major(time_major);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4,
+ VT_CELL_CLIP = 6,
+ VT_PROJ_CLIP = 8,
+ VT_MERGE_OUTPUTS = 10,
+ VT_TIME_MAJOR = 12,
+ VT_ASYMMETRIC_QUANTIZE_INPUTS = 14
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
+ float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
+ bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; }
+ bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 1) != 0; }
+ bool asymmetric_quantize_inputs() const
+ {
+ return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<float>(verifier, VT_CELL_CLIP) &&
+ VerifyField<float>(verifier, VT_PROJ_CLIP) &&
+ VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) &&
+ VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
+ VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+ }
+};
+
+struct BidirectionalSequenceLSTMOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_cell_clip(float cell_clip)
+ {
+ fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
+ }
+ void add_proj_clip(float proj_clip)
+ {
+ fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
+ }
+ void add_merge_outputs(bool merge_outputs)
+ {
+ fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_MERGE_OUTPUTS,
+ static_cast<uint8_t>(merge_outputs), 0);
+ }
+ void add_time_major(bool time_major)
+ {
+ fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_TIME_MAJOR,
+ static_cast<uint8_t>(time_major), 1);
+ }
+ void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+ static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+ }
+ explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ BidirectionalSequenceLSTMOptionsBuilder &
+ operator=(const BidirectionalSequenceLSTMOptionsBuilder &);
+ flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<BidirectionalSequenceLSTMOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
+ bool time_major = true, bool asymmetric_quantize_inputs = false)
+{
+ BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
+ builder_.add_proj_clip(proj_clip);
+ builder_.add_cell_clip(cell_clip);
+ builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+ builder_.add_time_major(time_major);
+ builder_.add_merge_outputs(merge_outputs);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_ALIGN_CORNERS = 8,
+ VT_HALF_PIXEL_CENTERS = 10
+ };
+ bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; }
+ bool half_pixel_centers() const { return GetField<uint8_t>(VT_HALF_PIXEL_CENTERS, 0) != 0; }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) &&
+ VerifyField<uint8_t>(verifier, VT_HALF_PIXEL_CENTERS) && verifier.EndTable();
+ }
+};
+
+struct ResizeBilinearOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_align_corners(bool align_corners)
+ {
+ fbb_.AddElement<uint8_t>(ResizeBilinearOptions::VT_ALIGN_CORNERS,
+ static_cast<uint8_t>(align_corners), 0);
+ }
+ void add_half_pixel_centers(bool half_pixel_centers)
+ {
+ fbb_.AddElement<uint8_t>(ResizeBilinearOptions::VT_HALF_PIXEL_CENTERS,
+ static_cast<uint8_t>(half_pixel_centers), 0);
+ }
+ explicit ResizeBilinearOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ResizeBilinearOptionsBuilder &operator=(const ResizeBilinearOptionsBuilder &);
+ flatbuffers::Offset<ResizeBilinearOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ResizeBilinearOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ResizeBilinearOptions>
+CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false,
+ bool half_pixel_centers = false)
+{
+ ResizeBilinearOptionsBuilder builder_(_fbb);
+ builder_.add_half_pixel_centers(half_pixel_centers);
+ builder_.add_align_corners(align_corners);
+ return builder_.Finish();
+}
+
+struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_ALIGN_CORNERS = 4
+ };
+ bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) &&
+ verifier.EndTable();
+ }
+};
+
+struct ResizeNearestNeighborOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_align_corners(bool align_corners)
+ {
+ fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS,
+ static_cast<uint8_t>(align_corners), 0);
+ }
+ explicit ResizeNearestNeighborOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ResizeNearestNeighborOptionsBuilder &operator=(const ResizeNearestNeighborOptionsBuilder &);
+ flatbuffers::Offset<ResizeNearestNeighborOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ResizeNearestNeighborOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ResizeNearestNeighborOptions>
+CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false)
+{
+ ResizeNearestNeighborOptionsBuilder builder_(_fbb);
+ builder_.add_align_corners(align_corners);
+ return builder_.Finish();
+}
+
+struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_SUBGRAPH = 4
+ };
+ uint32_t subgraph() const { return GetField<uint32_t>(VT_SUBGRAPH, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_SUBGRAPH) &&
+ verifier.EndTable();
+ }
+};
+
+struct CallOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_subgraph(uint32_t subgraph)
+ {
+ fbb_.AddElement<uint32_t>(CallOptions::VT_SUBGRAPH, subgraph, 0);
+ }
+ explicit CallOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ CallOptionsBuilder &operator=(const CallOptionsBuilder &);
+ flatbuffers::Offset<CallOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<CallOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<CallOptions> CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ uint32_t subgraph = 0)
+{
+ CallOptionsBuilder builder_(_fbb);
+ builder_.add_subgraph(subgraph);
+ return builder_.Finish();
+}
+
+struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct PadOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit PadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ PadOptionsBuilder &operator=(const PadOptionsBuilder &);
+ flatbuffers::Offset<PadOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<PadOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<PadOptions> CreatePadOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ PadOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct PadV2OptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit PadV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ PadV2OptionsBuilder &operator=(const PadV2OptionsBuilder &);
+ flatbuffers::Offset<PadV2Options> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<PadV2Options>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<PadV2Options> CreatePadV2Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ PadV2OptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_NEW_SHAPE = 4
+ };
+ const flatbuffers::Vector<int32_t> *new_shape() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NEW_SHAPE);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NEW_SHAPE) &&
+ verifier.VerifyVector(new_shape()) && verifier.EndTable();
+ }
+};
+
+struct ReshapeOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_new_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape)
+ {
+ fbb_.AddOffset(ReshapeOptions::VT_NEW_SHAPE, new_shape);
+ }
+ explicit ReshapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ReshapeOptionsBuilder &operator=(const ReshapeOptionsBuilder &);
+ flatbuffers::Offset<ReshapeOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ReshapeOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ReshapeOptions>
+CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape = 0)
+{
+ ReshapeOptionsBuilder builder_(_fbb);
+ builder_.add_new_shape(new_shape);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<ReshapeOptions>
+CreateReshapeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<int32_t> *new_shape = nullptr)
+{
+ return circle::CreateReshapeOptions(_fbb, new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0);
+}
+
+struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct SpaceToBatchNDOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit SpaceToBatchNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SpaceToBatchNDOptionsBuilder &operator=(const SpaceToBatchNDOptionsBuilder &);
+ flatbuffers::Offset<SpaceToBatchNDOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SpaceToBatchNDOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SpaceToBatchNDOptions>
+CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ SpaceToBatchNDOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct BatchToSpaceNDOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit BatchToSpaceNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ BatchToSpaceNDOptionsBuilder &operator=(const BatchToSpaceNDOptionsBuilder &);
+ flatbuffers::Offset<BatchToSpaceNDOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<BatchToSpaceNDOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<BatchToSpaceNDOptions>
+CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ BatchToSpaceNDOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_NGRAM_SIZE = 4,
+ VT_MAX_SKIP_SIZE = 6,
+ VT_INCLUDE_ALL_NGRAMS = 8
+ };
+ int32_t ngram_size() const { return GetField<int32_t>(VT_NGRAM_SIZE, 0); }
+ int32_t max_skip_size() const { return GetField<int32_t>(VT_MAX_SKIP_SIZE, 0); }
+ bool include_all_ngrams() const { return GetField<uint8_t>(VT_INCLUDE_ALL_NGRAMS, 0) != 0; }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NGRAM_SIZE) &&
+ VerifyField<int32_t>(verifier, VT_MAX_SKIP_SIZE) &&
+ VerifyField<uint8_t>(verifier, VT_INCLUDE_ALL_NGRAMS) && verifier.EndTable();
+ }
+};
+
+struct SkipGramOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_ngram_size(int32_t ngram_size)
+ {
+ fbb_.AddElement<int32_t>(SkipGramOptions::VT_NGRAM_SIZE, ngram_size, 0);
+ }
+ void add_max_skip_size(int32_t max_skip_size)
+ {
+ fbb_.AddElement<int32_t>(SkipGramOptions::VT_MAX_SKIP_SIZE, max_skip_size, 0);
+ }
+ void add_include_all_ngrams(bool include_all_ngrams)
+ {
+ fbb_.AddElement<uint8_t>(SkipGramOptions::VT_INCLUDE_ALL_NGRAMS,
+ static_cast<uint8_t>(include_all_ngrams), 0);
+ }
+ explicit SkipGramOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &);
+ flatbuffers::Offset<SkipGramOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SkipGramOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SkipGramOptions>
+CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t ngram_size = 0,
+ int32_t max_skip_size = 0, bool include_all_ngrams = false)
+{
+ SkipGramOptionsBuilder builder_(_fbb);
+ builder_.add_max_skip_size(max_skip_size);
+ builder_.add_ngram_size(ngram_size);
+ builder_.add_include_all_ngrams(include_all_ngrams);
+ return builder_.Finish();
+}
+
+struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_BLOCK_SIZE = 4
+ };
+ int32_t block_size() const { return GetField<int32_t>(VT_BLOCK_SIZE, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_BLOCK_SIZE) &&
+ verifier.EndTable();
+ }
+};
+
+struct SpaceToDepthOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_block_size(int32_t block_size)
+ {
+ fbb_.AddElement<int32_t>(SpaceToDepthOptions::VT_BLOCK_SIZE, block_size, 0);
+ }
+ explicit SpaceToDepthOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SpaceToDepthOptionsBuilder &operator=(const SpaceToDepthOptionsBuilder &);
+ flatbuffers::Offset<SpaceToDepthOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SpaceToDepthOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SpaceToDepthOptions>
+CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_size = 0)
+{
+ SpaceToDepthOptionsBuilder builder_(_fbb);
+ builder_.add_block_size(block_size);
+ return builder_.Finish();
+}
+
+struct DepthToSpaceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_BLOCK_SIZE = 4
+ };
+ int32_t block_size() const { return GetField<int32_t>(VT_BLOCK_SIZE, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_BLOCK_SIZE) &&
+ verifier.EndTable();
+ }
+};
+
+struct DepthToSpaceOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_block_size(int32_t block_size)
+ {
+ fbb_.AddElement<int32_t>(DepthToSpaceOptions::VT_BLOCK_SIZE, block_size, 0);
+ }
+ explicit DepthToSpaceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ DepthToSpaceOptionsBuilder &operator=(const DepthToSpaceOptionsBuilder &);
+ flatbuffers::Offset<DepthToSpaceOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<DepthToSpaceOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<DepthToSpaceOptions>
+CreateDepthToSpaceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_size = 0)
+{
+ DepthToSpaceOptionsBuilder builder_(_fbb);
+ builder_.add_block_size(block_size);
+ return builder_.Finish();
+}
+
+struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+ }
+};
+
+struct SubOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(SubOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SubOptionsBuilder &operator=(const SubOptionsBuilder &);
+ flatbuffers::Offset<SubOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SubOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SubOptions>
+CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+{
+ SubOptionsBuilder builder_(_fbb);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+ }
+};
+
+struct DivOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(DivOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ explicit DivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ DivOptionsBuilder &operator=(const DivOptionsBuilder &);
+ flatbuffers::Offset<DivOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<DivOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<DivOptions>
+CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+{
+ DivOptionsBuilder builder_(_fbb);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct TopKV2OptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit TopKV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ TopKV2OptionsBuilder &operator=(const TopKV2OptionsBuilder &);
+ flatbuffers::Offset<TopKV2Options> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<TopKV2Options>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<TopKV2Options> CreateTopKV2Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ TopKV2OptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_COMBINER = 4
+ };
+ CombinerType combiner() const
+ {
+ return static_cast<CombinerType>(GetField<int8_t>(VT_COMBINER, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_COMBINER) &&
+ verifier.EndTable();
+ }
+};
+
+struct EmbeddingLookupSparseOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_combiner(CombinerType combiner)
+ {
+ fbb_.AddElement<int8_t>(EmbeddingLookupSparseOptions::VT_COMBINER,
+ static_cast<int8_t>(combiner), 0);
+ }
+ explicit EmbeddingLookupSparseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ EmbeddingLookupSparseOptionsBuilder &operator=(const EmbeddingLookupSparseOptionsBuilder &);
+ flatbuffers::Offset<EmbeddingLookupSparseOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<EmbeddingLookupSparseOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<EmbeddingLookupSparseOptions>
+CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ CombinerType combiner = CombinerType_SUM)
+{
+ EmbeddingLookupSparseOptionsBuilder builder_(_fbb);
+ builder_.add_combiner(combiner);
+ return builder_.Finish();
+}
+
+struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_AXIS = 4
+ };
+ int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
+ verifier.EndTable();
+ }
+};
+
+struct GatherOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(GatherOptions::VT_AXIS, axis, 0); }
+ explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ GatherOptionsBuilder &operator=(const GatherOptionsBuilder &);
+ flatbuffers::Offset<GatherOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<GatherOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t axis = 0)
+{
+ GatherOptionsBuilder builder_(_fbb);
+ builder_.add_axis(axis);
+ return builder_.Finish();
+}
+
+struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct TransposeOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit TransposeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ TransposeOptionsBuilder &operator=(const TransposeOptionsBuilder &);
+ flatbuffers::Offset<TransposeOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<TransposeOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<TransposeOptions>
+CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ TransposeOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct ExpOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit ExpOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ExpOptionsBuilder &operator=(const ExpOptionsBuilder &);
+ flatbuffers::Offset<ExpOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ExpOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ExpOptions> CreateExpOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ ExpOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct CosOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct CosOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit CosOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ CosOptionsBuilder &operator=(const CosOptionsBuilder &);
+ flatbuffers::Offset<CosOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<CosOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<CosOptions> CreateCosOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ CosOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_KEEP_DIMS = 4
+ };
+ bool keep_dims() const { return GetField<uint8_t>(VT_KEEP_DIMS, 0) != 0; }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_KEEP_DIMS) &&
+ verifier.EndTable();
+ }
+};
+
+struct ReducerOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_keep_dims(bool keep_dims)
+ {
+ fbb_.AddElement<uint8_t>(ReducerOptions::VT_KEEP_DIMS, static_cast<uint8_t>(keep_dims), 0);
+ }
+ explicit ReducerOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ReducerOptionsBuilder &operator=(const ReducerOptionsBuilder &);
+ flatbuffers::Offset<ReducerOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ReducerOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ReducerOptions>
+CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, bool keep_dims = false)
+{
+ ReducerOptionsBuilder builder_(_fbb);
+ builder_.add_keep_dims(keep_dims);
+ return builder_.Finish();
+}
+
+struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_SQUEEZE_DIMS = 4
+ };
+ const flatbuffers::Vector<int32_t> *squeeze_dims() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SQUEEZE_DIMS);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SQUEEZE_DIMS) &&
+ verifier.VerifyVector(squeeze_dims()) && verifier.EndTable();
+ }
+};
+
+struct SqueezeOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_squeeze_dims(flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims)
+ {
+ fbb_.AddOffset(SqueezeOptions::VT_SQUEEZE_DIMS, squeeze_dims);
+ }
+ explicit SqueezeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SqueezeOptionsBuilder &operator=(const SqueezeOptionsBuilder &);
+ flatbuffers::Offset<SqueezeOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SqueezeOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SqueezeOptions>
+CreateSqueezeOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims = 0)
+{
+ SqueezeOptionsBuilder builder_(_fbb);
+ builder_.add_squeeze_dims(squeeze_dims);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SqueezeOptions>
+CreateSqueezeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<int32_t> *squeeze_dims = nullptr)
+{
+ return circle::CreateSqueezeOptions(_fbb,
+ squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0);
+}
+
+struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_NUM_SPLITS = 4
+ };
+ int32_t num_splits() const { return GetField<int32_t>(VT_NUM_SPLITS, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_SPLITS) &&
+ verifier.EndTable();
+ }
+};
+
+struct SplitOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_num_splits(int32_t num_splits)
+ {
+ fbb_.AddElement<int32_t>(SplitOptions::VT_NUM_SPLITS, num_splits, 0);
+ }
+ explicit SplitOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SplitOptionsBuilder &operator=(const SplitOptionsBuilder &);
+ flatbuffers::Offset<SplitOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SplitOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SplitOptions> CreateSplitOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t num_splits = 0)
+{
+ SplitOptionsBuilder builder_(_fbb);
+ builder_.add_num_splits(num_splits);
+ return builder_.Finish();
+}
+
+struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_NUM_SPLITS = 4
+ };
+ int32_t num_splits() const { return GetField<int32_t>(VT_NUM_SPLITS, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_SPLITS) &&
+ verifier.EndTable();
+ }
+};
+
+struct SplitVOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_num_splits(int32_t num_splits)
+ {
+ fbb_.AddElement<int32_t>(SplitVOptions::VT_NUM_SPLITS, num_splits, 0);
+ }
+ explicit SplitVOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SplitVOptionsBuilder &operator=(const SplitVOptionsBuilder &);
+ flatbuffers::Offset<SplitVOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SplitVOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SplitVOptions> CreateSplitVOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t num_splits = 0)
+{
+ SplitVOptionsBuilder builder_(_fbb);
+ builder_.add_num_splits(num_splits);
+ return builder_.Finish();
+}
+
+struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_BEGIN_MASK = 4,
+ VT_END_MASK = 6,
+ VT_ELLIPSIS_MASK = 8,
+ VT_NEW_AXIS_MASK = 10,
+ VT_SHRINK_AXIS_MASK = 12
+ };
+ int32_t begin_mask() const { return GetField<int32_t>(VT_BEGIN_MASK, 0); }
+ int32_t end_mask() const { return GetField<int32_t>(VT_END_MASK, 0); }
+ int32_t ellipsis_mask() const { return GetField<int32_t>(VT_ELLIPSIS_MASK, 0); }
+ int32_t new_axis_mask() const { return GetField<int32_t>(VT_NEW_AXIS_MASK, 0); }
+ int32_t shrink_axis_mask() const { return GetField<int32_t>(VT_SHRINK_AXIS_MASK, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_BEGIN_MASK) &&
+ VerifyField<int32_t>(verifier, VT_END_MASK) &&
+ VerifyField<int32_t>(verifier, VT_ELLIPSIS_MASK) &&
+ VerifyField<int32_t>(verifier, VT_NEW_AXIS_MASK) &&
+ VerifyField<int32_t>(verifier, VT_SHRINK_AXIS_MASK) && verifier.EndTable();
+ }
+};
+
+struct StridedSliceOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_begin_mask(int32_t begin_mask)
+ {
+ fbb_.AddElement<int32_t>(StridedSliceOptions::VT_BEGIN_MASK, begin_mask, 0);
+ }
+ void add_end_mask(int32_t end_mask)
+ {
+ fbb_.AddElement<int32_t>(StridedSliceOptions::VT_END_MASK, end_mask, 0);
+ }
+ void add_ellipsis_mask(int32_t ellipsis_mask)
+ {
+ fbb_.AddElement<int32_t>(StridedSliceOptions::VT_ELLIPSIS_MASK, ellipsis_mask, 0);
+ }
+ void add_new_axis_mask(int32_t new_axis_mask)
+ {
+ fbb_.AddElement<int32_t>(StridedSliceOptions::VT_NEW_AXIS_MASK, new_axis_mask, 0);
+ }
+ void add_shrink_axis_mask(int32_t shrink_axis_mask)
+ {
+ fbb_.AddElement<int32_t>(StridedSliceOptions::VT_SHRINK_AXIS_MASK, shrink_axis_mask, 0);
+ }
+ explicit StridedSliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ StridedSliceOptionsBuilder &operator=(const StridedSliceOptionsBuilder &);
+ flatbuffers::Offset<StridedSliceOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<StridedSliceOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<StridedSliceOptions>
+CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t begin_mask = 0,
+ int32_t end_mask = 0, int32_t ellipsis_mask = 0,
+ int32_t new_axis_mask = 0, int32_t shrink_axis_mask = 0)
+{
+ StridedSliceOptionsBuilder builder_(_fbb);
+ builder_.add_shrink_axis_mask(shrink_axis_mask);
+ builder_.add_new_axis_mask(new_axis_mask);
+ builder_.add_ellipsis_mask(ellipsis_mask);
+ builder_.add_end_mask(end_mask);
+ builder_.add_begin_mask(begin_mask);
+ return builder_.Finish();
+}
+
+struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct LogSoftmaxOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit LogSoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LogSoftmaxOptionsBuilder &operator=(const LogSoftmaxOptionsBuilder &);
+ flatbuffers::Offset<LogSoftmaxOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LogSoftmaxOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LogSoftmaxOptions>
+CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ LogSoftmaxOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_IN_DATA_TYPE = 4,
+ VT_OUT_DATA_TYPE = 6
+ };
+ TensorType in_data_type() const
+ {
+ return static_cast<TensorType>(GetField<int8_t>(VT_IN_DATA_TYPE, 0));
+ }
+ TensorType out_data_type() const
+ {
+ return static_cast<TensorType>(GetField<int8_t>(VT_OUT_DATA_TYPE, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_IN_DATA_TYPE) &&
+ VerifyField<int8_t>(verifier, VT_OUT_DATA_TYPE) && verifier.EndTable();
+ }
+};
+
+struct CastOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_in_data_type(TensorType in_data_type)
+ {
+ fbb_.AddElement<int8_t>(CastOptions::VT_IN_DATA_TYPE, static_cast<int8_t>(in_data_type), 0);
+ }
+ void add_out_data_type(TensorType out_data_type)
+ {
+ fbb_.AddElement<int8_t>(CastOptions::VT_OUT_DATA_TYPE, static_cast<int8_t>(out_data_type), 0);
+ }
+ explicit CastOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ CastOptionsBuilder &operator=(const CastOptionsBuilder &);
+ flatbuffers::Offset<CastOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<CastOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<CastOptions>
+CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ TensorType in_data_type = TensorType_FLOAT32,
+ TensorType out_data_type = TensorType_FLOAT32)
+{
+ CastOptionsBuilder builder_(_fbb);
+ builder_.add_out_data_type(out_data_type);
+ builder_.add_in_data_type(in_data_type);
+ return builder_.Finish();
+}
+
+struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct DequantizeOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit DequantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ DequantizeOptionsBuilder &operator=(const DequantizeOptionsBuilder &);
+ flatbuffers::Offset<DequantizeOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<DequantizeOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<DequantizeOptions>
+CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ DequantizeOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct MaximumMinimumOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit MaximumMinimumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ MaximumMinimumOptionsBuilder &operator=(const MaximumMinimumOptionsBuilder &);
+ flatbuffers::Offset<MaximumMinimumOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<MaximumMinimumOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<MaximumMinimumOptions>
+CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ MaximumMinimumOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct TileOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit TileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ TileOptionsBuilder &operator=(const TileOptionsBuilder &);
+ flatbuffers::Offset<TileOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<TileOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<TileOptions> CreateTileOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ TileOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_OUTPUT_TYPE = 4
+ };
+ TensorType output_type() const
+ {
+ return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUTPUT_TYPE) &&
+ verifier.EndTable();
+ }
+};
+
+struct ArgMaxOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_output_type(TensorType output_type)
+ {
+ fbb_.AddElement<int8_t>(ArgMaxOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
+ }
+ explicit ArgMaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ArgMaxOptionsBuilder &operator=(const ArgMaxOptionsBuilder &);
+ flatbuffers::Offset<ArgMaxOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ArgMaxOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ArgMaxOptions>
+CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ TensorType output_type = TensorType_FLOAT32)
+{
+ ArgMaxOptionsBuilder builder_(_fbb);
+ builder_.add_output_type(output_type);
+ return builder_.Finish();
+}
+
+struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_OUTPUT_TYPE = 4
+ };
+ TensorType output_type() const
+ {
+ return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUTPUT_TYPE) &&
+ verifier.EndTable();
+ }
+};
+
+struct ArgMinOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_output_type(TensorType output_type)
+ {
+ fbb_.AddElement<int8_t>(ArgMinOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
+ }
+ explicit ArgMinOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ArgMinOptionsBuilder &operator=(const ArgMinOptionsBuilder &);
+ flatbuffers::Offset<ArgMinOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ArgMinOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ArgMinOptions>
+CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ TensorType output_type = TensorType_FLOAT32)
+{
+ ArgMinOptionsBuilder builder_(_fbb);
+ builder_.add_output_type(output_type);
+ return builder_.Finish();
+}
+
+struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct GreaterOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit GreaterOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ GreaterOptionsBuilder &operator=(const GreaterOptionsBuilder &);
+ flatbuffers::Offset<GreaterOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<GreaterOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<GreaterOptions>
+CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ GreaterOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct GreaterEqualOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit GreaterEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ GreaterEqualOptionsBuilder &operator=(const GreaterEqualOptionsBuilder &);
+ flatbuffers::Offset<GreaterEqualOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<GreaterEqualOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<GreaterEqualOptions>
+CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ GreaterEqualOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct LessOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit LessOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LessOptionsBuilder &operator=(const LessOptionsBuilder &);
+ flatbuffers::Offset<LessOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LessOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LessOptions> CreateLessOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ LessOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct LessEqualOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit LessEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LessEqualOptionsBuilder &operator=(const LessEqualOptionsBuilder &);
+ flatbuffers::Offset<LessEqualOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LessEqualOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LessEqualOptions>
+CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ LessEqualOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct NegOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit NegOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ NegOptionsBuilder &operator=(const NegOptionsBuilder &);
+ flatbuffers::Offset<NegOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<NegOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<NegOptions> CreateNegOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ NegOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct SelectOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit SelectOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SelectOptionsBuilder &operator=(const SelectOptionsBuilder &);
+ flatbuffers::Offset<SelectOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SelectOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SelectOptions> CreateSelectOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ SelectOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct SliceOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit SliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SliceOptionsBuilder &operator=(const SliceOptionsBuilder &);
+ flatbuffers::Offset<SliceOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SliceOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SliceOptions> CreateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ SliceOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_PADDING = 4,
+ VT_STRIDE_W = 6,
+ VT_STRIDE_H = 8
+ };
+ Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+ int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+ int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_H) && verifier.EndTable();
+ }
+};
+
+struct TransposeConvOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_padding(Padding padding)
+ {
+ fbb_.AddElement<int8_t>(TransposeConvOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+ }
+ void add_stride_w(int32_t stride_w)
+ {
+ fbb_.AddElement<int32_t>(TransposeConvOptions::VT_STRIDE_W, stride_w, 0);
+ }
+ void add_stride_h(int32_t stride_h)
+ {
+ fbb_.AddElement<int32_t>(TransposeConvOptions::VT_STRIDE_H, stride_h, 0);
+ }
+ explicit TransposeConvOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ TransposeConvOptionsBuilder &operator=(const TransposeConvOptionsBuilder &);
+ flatbuffers::Offset<TransposeConvOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<TransposeConvOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<TransposeConvOptions>
+CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
+ int32_t stride_w = 0, int32_t stride_h = 0)
+{
+ TransposeConvOptionsBuilder builder_(_fbb);
+ builder_.add_stride_h(stride_h);
+ builder_.add_stride_w(stride_w);
+ builder_.add_padding(padding);
+ return builder_.Finish();
+}
+
+struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct ExpandDimsOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit ExpandDimsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ExpandDimsOptionsBuilder &operator=(const ExpandDimsOptionsBuilder &);
+ flatbuffers::Offset<ExpandDimsOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ExpandDimsOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ExpandDimsOptions>
+CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ ExpandDimsOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_VALIDATE_INDICES = 4
+ };
+ bool validate_indices() const { return GetField<uint8_t>(VT_VALIDATE_INDICES, 0) != 0; }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_VALIDATE_INDICES) &&
+ verifier.EndTable();
+ }
+};
+
+struct SparseToDenseOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_validate_indices(bool validate_indices)
+ {
+ fbb_.AddElement<uint8_t>(SparseToDenseOptions::VT_VALIDATE_INDICES,
+ static_cast<uint8_t>(validate_indices), 0);
+ }
+ explicit SparseToDenseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SparseToDenseOptionsBuilder &operator=(const SparseToDenseOptionsBuilder &);
+ flatbuffers::Offset<SparseToDenseOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SparseToDenseOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SparseToDenseOptions>
+CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, bool validate_indices = false)
+{
+ SparseToDenseOptionsBuilder builder_(_fbb);
+ builder_.add_validate_indices(validate_indices);
+ return builder_.Finish();
+}
+
+struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct EqualOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit EqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ EqualOptionsBuilder &operator=(const EqualOptionsBuilder &);
+ flatbuffers::Offset<EqualOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<EqualOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<EqualOptions> CreateEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ EqualOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct NotEqualOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit NotEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ NotEqualOptionsBuilder &operator=(const NotEqualOptionsBuilder &);
+ flatbuffers::Offset<NotEqualOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<NotEqualOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<NotEqualOptions>
+CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ NotEqualOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_OUT_TYPE = 4
+ };
+ TensorType out_type() const { return static_cast<TensorType>(GetField<int8_t>(VT_OUT_TYPE, 0)); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUT_TYPE) &&
+ verifier.EndTable();
+ }
+};
+
+struct ShapeOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_out_type(TensorType out_type)
+ {
+ fbb_.AddElement<int8_t>(ShapeOptions::VT_OUT_TYPE, static_cast<int8_t>(out_type), 0);
+ }
+ explicit ShapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ShapeOptionsBuilder &operator=(const ShapeOptionsBuilder &);
+ flatbuffers::Offset<ShapeOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ShapeOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ShapeOptions>
+CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, TensorType out_type = TensorType_FLOAT32)
+{
+ ShapeOptionsBuilder builder_(_fbb);
+ builder_.add_out_type(out_type);
+ return builder_.Finish();
+}
+
+struct RankOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct RankOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit RankOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ RankOptionsBuilder &operator=(const RankOptionsBuilder &);
+ flatbuffers::Offset<RankOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<RankOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<RankOptions> CreateRankOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ RankOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct PowOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit PowOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ PowOptionsBuilder &operator=(const PowOptionsBuilder &);
+ flatbuffers::Offset<PowOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<PowOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<PowOptions> CreatePowOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ PowOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_MIN = 4,
+ VT_MAX = 6,
+ VT_NUM_BITS = 8,
+ VT_NARROW_RANGE = 10
+ };
+ float min() const { return GetField<float>(VT_MIN, 0.0f); }
+ float max() const { return GetField<float>(VT_MAX, 0.0f); }
+ int32_t num_bits() const { return GetField<int32_t>(VT_NUM_BITS, 0); }
+ bool narrow_range() const { return GetField<uint8_t>(VT_NARROW_RANGE, 0) != 0; }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_MIN) &&
+ VerifyField<float>(verifier, VT_MAX) && VerifyField<int32_t>(verifier, VT_NUM_BITS) &&
+ VerifyField<uint8_t>(verifier, VT_NARROW_RANGE) && verifier.EndTable();
+ }
+};
+
+struct FakeQuantOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_min(float min) { fbb_.AddElement<float>(FakeQuantOptions::VT_MIN, min, 0.0f); }
+ void add_max(float max) { fbb_.AddElement<float>(FakeQuantOptions::VT_MAX, max, 0.0f); }
+ void add_num_bits(int32_t num_bits)
+ {
+ fbb_.AddElement<int32_t>(FakeQuantOptions::VT_NUM_BITS, num_bits, 0);
+ }
+ void add_narrow_range(bool narrow_range)
+ {
+ fbb_.AddElement<uint8_t>(FakeQuantOptions::VT_NARROW_RANGE, static_cast<uint8_t>(narrow_range),
+ 0);
+ }
+ explicit FakeQuantOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ FakeQuantOptionsBuilder &operator=(const FakeQuantOptionsBuilder &);
+ flatbuffers::Offset<FakeQuantOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<FakeQuantOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<FakeQuantOptions>
+CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, float min = 0.0f, float max = 0.0f,
+ int32_t num_bits = 0, bool narrow_range = false)
+{
+ FakeQuantOptionsBuilder builder_(_fbb);
+ builder_.add_num_bits(num_bits);
+ builder_.add_max(max);
+ builder_.add_min(min);
+ builder_.add_narrow_range(narrow_range);
+ return builder_.Finish();
+}
+
+struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_VALUES_COUNT = 4,
+ VT_AXIS = 6
+ };
+ int32_t values_count() const { return GetField<int32_t>(VT_VALUES_COUNT, 0); }
+ int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_VALUES_COUNT) &&
+ VerifyField<int32_t>(verifier, VT_AXIS) && verifier.EndTable();
+ }
+};
+
+struct PackOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_values_count(int32_t values_count)
+ {
+ fbb_.AddElement<int32_t>(PackOptions::VT_VALUES_COUNT, values_count, 0);
+ }
+ void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(PackOptions::VT_AXIS, axis, 0); }
+ explicit PackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ PackOptionsBuilder &operator=(const PackOptionsBuilder &);
+ flatbuffers::Offset<PackOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<PackOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<PackOptions>
+CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t values_count = 0, int32_t axis = 0)
+{
+ PackOptionsBuilder builder_(_fbb);
+ builder_.add_axis(axis);
+ builder_.add_values_count(values_count);
+ return builder_.Finish();
+}
+
+struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct LogicalOrOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit LogicalOrOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LogicalOrOptionsBuilder &operator=(const LogicalOrOptionsBuilder &);
+ flatbuffers::Offset<LogicalOrOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LogicalOrOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LogicalOrOptions>
+CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ LogicalOrOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_AXIS = 4
+ };
+ int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
+ verifier.EndTable();
+ }
+};
+
+struct OneHotOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(OneHotOptions::VT_AXIS, axis, 0); }
+ explicit OneHotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ OneHotOptionsBuilder &operator=(const OneHotOptionsBuilder &);
+ flatbuffers::Offset<OneHotOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<OneHotOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<OneHotOptions> CreateOneHotOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t axis = 0)
+{
+ OneHotOptionsBuilder builder_(_fbb);
+ builder_.add_axis(axis);
+ return builder_.Finish();
+}
+
+struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct AbsOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit AbsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ AbsOptionsBuilder &operator=(const AbsOptionsBuilder &);
+ flatbuffers::Offset<AbsOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<AbsOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<AbsOptions> CreateAbsOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ AbsOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct HardSwishOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct HardSwishOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit HardSwishOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ HardSwishOptionsBuilder &operator=(const HardSwishOptionsBuilder &);
+ flatbuffers::Offset<HardSwishOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<HardSwishOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<HardSwishOptions>
+CreateHardSwishOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ HardSwishOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct LogicalAndOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit LogicalAndOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LogicalAndOptionsBuilder &operator=(const LogicalAndOptionsBuilder &);
+ flatbuffers::Offset<LogicalAndOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LogicalAndOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LogicalAndOptions>
+CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ LogicalAndOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct LogicalNotOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit LogicalNotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LogicalNotOptionsBuilder &operator=(const LogicalNotOptionsBuilder &);
+ flatbuffers::Offset<LogicalNotOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LogicalNotOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LogicalNotOptions>
+CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ LogicalNotOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_NUM = 4,
+ VT_AXIS = 6
+ };
+ int32_t num() const { return GetField<int32_t>(VT_NUM, 0); }
+ int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM) &&
+ VerifyField<int32_t>(verifier, VT_AXIS) && verifier.EndTable();
+ }
+};
+
+struct UnpackOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_num(int32_t num) { fbb_.AddElement<int32_t>(UnpackOptions::VT_NUM, num, 0); }
+ void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(UnpackOptions::VT_AXIS, axis, 0); }
+ explicit UnpackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ UnpackOptionsBuilder &operator=(const UnpackOptionsBuilder &);
+ flatbuffers::Offset<UnpackOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<UnpackOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<UnpackOptions> CreateUnpackOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t num = 0, int32_t axis = 0)
+{
+ UnpackOptionsBuilder builder_(_fbb);
+ builder_.add_axis(axis);
+ builder_.add_num(num);
+ return builder_.Finish();
+}
+
+struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct FloorDivOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit FloorDivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ FloorDivOptionsBuilder &operator=(const FloorDivOptionsBuilder &);
+ flatbuffers::Offset<FloorDivOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<FloorDivOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<FloorDivOptions>
+CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ FloorDivOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct SquareOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit SquareOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SquareOptionsBuilder &operator=(const SquareOptionsBuilder &);
+ flatbuffers::Offset<SquareOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SquareOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SquareOptions> CreateSquareOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ SquareOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct ZerosLikeOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit ZerosLikeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ZerosLikeOptionsBuilder &operator=(const ZerosLikeOptionsBuilder &);
+ flatbuffers::Offset<ZerosLikeOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ZerosLikeOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ZerosLikeOptions>
+CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ ZerosLikeOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct FillOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit FillOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ FillOptionsBuilder &operator=(const FillOptionsBuilder &);
+ flatbuffers::Offset<FillOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<FillOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ FillOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct FloorModOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit FloorModOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ FloorModOptionsBuilder &operator=(const FloorModOptionsBuilder &);
+ flatbuffers::Offset<FloorModOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<FloorModOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<FloorModOptions>
+CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ FloorModOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct RangeOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit RangeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ RangeOptionsBuilder &operator=(const RangeOptionsBuilder &);
+ flatbuffers::Offset<RangeOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<RangeOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<RangeOptions> CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ RangeOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_ALPHA = 4
+ };
+ float alpha() const { return GetField<float>(VT_ALPHA, 0.0f); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_ALPHA) &&
+ verifier.EndTable();
+ }
+};
+
+struct LeakyReluOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_alpha(float alpha) { fbb_.AddElement<float>(LeakyReluOptions::VT_ALPHA, alpha, 0.0f); }
+ explicit LeakyReluOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LeakyReluOptionsBuilder &operator=(const LeakyReluOptionsBuilder &);
+ flatbuffers::Offset<LeakyReluOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LeakyReluOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LeakyReluOptions>
+CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, float alpha = 0.0f)
+{
+ LeakyReluOptionsBuilder builder_(_fbb);
+ builder_.add_alpha(alpha);
+ return builder_.Finish();
+}
+
+struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct SquaredDifferenceOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit SquaredDifferenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SquaredDifferenceOptionsBuilder &operator=(const SquaredDifferenceOptionsBuilder &);
+ flatbuffers::Offset<SquaredDifferenceOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SquaredDifferenceOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SquaredDifferenceOptions>
+CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ SquaredDifferenceOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_MODE = 4
+ };
+ MirrorPadMode mode() const { return static_cast<MirrorPadMode>(GetField<int8_t>(VT_MODE, 0)); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_MODE) &&
+ verifier.EndTable();
+ }
+};
+
+struct MirrorPadOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_mode(MirrorPadMode mode)
+ {
+ fbb_.AddElement<int8_t>(MirrorPadOptions::VT_MODE, static_cast<int8_t>(mode), 0);
+ }
+ explicit MirrorPadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ MirrorPadOptionsBuilder &operator=(const MirrorPadOptionsBuilder &);
+ flatbuffers::Offset<MirrorPadOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<MirrorPadOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<MirrorPadOptions>
+CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ MirrorPadMode mode = MirrorPadMode_REFLECT)
+{
+ MirrorPadOptionsBuilder builder_(_fbb);
+ builder_.add_mode(mode);
+ return builder_.Finish();
+}
+
+struct UniqueOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_IDX_OUT_TYPE = 4
+ };
+ TensorType idx_out_type() const
+ {
+ return static_cast<TensorType>(GetField<int8_t>(VT_IDX_OUT_TYPE, 2));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_IDX_OUT_TYPE) &&
+ verifier.EndTable();
+ }
+};
+
+struct UniqueOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_idx_out_type(TensorType idx_out_type)
+ {
+ fbb_.AddElement<int8_t>(UniqueOptions::VT_IDX_OUT_TYPE, static_cast<int8_t>(idx_out_type), 2);
+ }
+ explicit UniqueOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ UniqueOptionsBuilder &operator=(const UniqueOptionsBuilder &);
+ flatbuffers::Offset<UniqueOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<UniqueOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<UniqueOptions>
+CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ TensorType idx_out_type = TensorType_INT32)
+{
+ UniqueOptionsBuilder builder_(_fbb);
+ builder_.add_idx_out_type(idx_out_type);
+ return builder_.Finish();
+}
+
+struct ReverseV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct ReverseV2OptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit ReverseV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ReverseV2OptionsBuilder &operator=(const ReverseV2OptionsBuilder &);
+ flatbuffers::Offset<ReverseV2Options> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ReverseV2Options>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ReverseV2Options>
+CreateReverseV2Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ ReverseV2OptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct AddNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct AddNOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit AddNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ AddNOptionsBuilder &operator=(const AddNOptionsBuilder &);
+ flatbuffers::Offset<AddNOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<AddNOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<AddNOptions> CreateAddNOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ AddNOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct GatherNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct GatherNdOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit GatherNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ GatherNdOptionsBuilder &operator=(const GatherNdOptionsBuilder &);
+ flatbuffers::Offset<GatherNdOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<GatherNdOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<GatherNdOptions>
+CreateGatherNdOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ GatherNdOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct WhereOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct WhereOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit WhereOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ WhereOptionsBuilder &operator=(const WhereOptionsBuilder &);
+ flatbuffers::Offset<WhereOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<WhereOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<WhereOptions> CreateWhereOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ WhereOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct ReverseSequenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_SEQ_DIM = 4,
+ VT_BATCH_DIM = 6
+ };
+ int32_t seq_dim() const { return GetField<int32_t>(VT_SEQ_DIM, 0); }
+ int32_t batch_dim() const { return GetField<int32_t>(VT_BATCH_DIM, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_SEQ_DIM) &&
+ VerifyField<int32_t>(verifier, VT_BATCH_DIM) && verifier.EndTable();
+ }
+};
+
+struct ReverseSequenceOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_seq_dim(int32_t seq_dim)
+ {
+ fbb_.AddElement<int32_t>(ReverseSequenceOptions::VT_SEQ_DIM, seq_dim, 0);
+ }
+ void add_batch_dim(int32_t batch_dim)
+ {
+ fbb_.AddElement<int32_t>(ReverseSequenceOptions::VT_BATCH_DIM, batch_dim, 0);
+ }
+ explicit ReverseSequenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ReverseSequenceOptionsBuilder &operator=(const ReverseSequenceOptionsBuilder &);
+ flatbuffers::Offset<ReverseSequenceOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ReverseSequenceOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ReverseSequenceOptions>
+CreateReverseSequenceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t seq_dim = 0,
+ int32_t batch_dim = 0)
+{
+ ReverseSequenceOptionsBuilder builder_(_fbb);
+ builder_.add_batch_dim(batch_dim);
+ builder_.add_seq_dim(seq_dim);
+ return builder_.Finish();
+}
+
+struct MatrixDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct MatrixDiagOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit MatrixDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ MatrixDiagOptionsBuilder &operator=(const MatrixDiagOptionsBuilder &);
+ flatbuffers::Offset<MatrixDiagOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<MatrixDiagOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<MatrixDiagOptions>
+CreateMatrixDiagOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ MatrixDiagOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct QuantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct QuantizeOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit QuantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ QuantizeOptionsBuilder &operator=(const QuantizeOptionsBuilder &);
+ flatbuffers::Offset<QuantizeOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<QuantizeOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<QuantizeOptions>
+CreateQuantizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ QuantizeOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct MatrixSetDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct MatrixSetDiagOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit MatrixSetDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ MatrixSetDiagOptionsBuilder &operator=(const MatrixSetDiagOptionsBuilder &);
+ flatbuffers::Offset<MatrixSetDiagOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<MatrixSetDiagOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<MatrixSetDiagOptions>
+CreateMatrixSetDiagOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ MatrixSetDiagOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct IfOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_THEN_SUBGRAPH_INDEX = 4,
+ VT_ELSE_SUBGRAPH_INDEX = 6
+ };
+ int32_t then_subgraph_index() const { return GetField<int32_t>(VT_THEN_SUBGRAPH_INDEX, 0); }
+ int32_t else_subgraph_index() const { return GetField<int32_t>(VT_ELSE_SUBGRAPH_INDEX, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_THEN_SUBGRAPH_INDEX) &&
+ VerifyField<int32_t>(verifier, VT_ELSE_SUBGRAPH_INDEX) && verifier.EndTable();
+ }
+};
+
+struct IfOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_then_subgraph_index(int32_t then_subgraph_index)
+ {
+ fbb_.AddElement<int32_t>(IfOptions::VT_THEN_SUBGRAPH_INDEX, then_subgraph_index, 0);
+ }
+ void add_else_subgraph_index(int32_t else_subgraph_index)
+ {
+ fbb_.AddElement<int32_t>(IfOptions::VT_ELSE_SUBGRAPH_INDEX, else_subgraph_index, 0);
+ }
+ explicit IfOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ IfOptionsBuilder &operator=(const IfOptionsBuilder &);
+ flatbuffers::Offset<IfOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<IfOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<IfOptions> CreateIfOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t then_subgraph_index = 0,
+ int32_t else_subgraph_index = 0)
+{
+ IfOptionsBuilder builder_(_fbb);
+ builder_.add_else_subgraph_index(else_subgraph_index);
+ builder_.add_then_subgraph_index(then_subgraph_index);
+ return builder_.Finish();
+}
+
+struct WhileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_COND_SUBGRAPH_INDEX = 4,
+ VT_BODY_SUBGRAPH_INDEX = 6
+ };
+ int32_t cond_subgraph_index() const { return GetField<int32_t>(VT_COND_SUBGRAPH_INDEX, 0); }
+ int32_t body_subgraph_index() const { return GetField<int32_t>(VT_BODY_SUBGRAPH_INDEX, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_COND_SUBGRAPH_INDEX) &&
+ VerifyField<int32_t>(verifier, VT_BODY_SUBGRAPH_INDEX) && verifier.EndTable();
+ }
+};
+
+struct WhileOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_cond_subgraph_index(int32_t cond_subgraph_index)
+ {
+ fbb_.AddElement<int32_t>(WhileOptions::VT_COND_SUBGRAPH_INDEX, cond_subgraph_index, 0);
+ }
+ void add_body_subgraph_index(int32_t body_subgraph_index)
+ {
+ fbb_.AddElement<int32_t>(WhileOptions::VT_BODY_SUBGRAPH_INDEX, body_subgraph_index, 0);
+ }
+ explicit WhileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ WhileOptionsBuilder &operator=(const WhileOptionsBuilder &);
+ flatbuffers::Offset<WhileOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<WhileOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<WhileOptions> CreateWhileOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t cond_subgraph_index = 0,
+ int32_t body_subgraph_index = 0)
+{
+ WhileOptionsBuilder builder_(_fbb);
+ builder_.add_body_subgraph_index(body_subgraph_index);
+ builder_.add_cond_subgraph_index(cond_subgraph_index);
+ return builder_.Finish();
+}
+
+struct NonMaxSuppressionV4Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct NonMaxSuppressionV4OptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit NonMaxSuppressionV4OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ NonMaxSuppressionV4OptionsBuilder &operator=(const NonMaxSuppressionV4OptionsBuilder &);
+ flatbuffers::Offset<NonMaxSuppressionV4Options> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<NonMaxSuppressionV4Options>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<NonMaxSuppressionV4Options>
+CreateNonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ NonMaxSuppressionV4OptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct NonMaxSuppressionV5Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct NonMaxSuppressionV5OptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit NonMaxSuppressionV5OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ NonMaxSuppressionV5OptionsBuilder &operator=(const NonMaxSuppressionV5OptionsBuilder &);
+ flatbuffers::Offset<NonMaxSuppressionV5Options> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<NonMaxSuppressionV5Options>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<NonMaxSuppressionV5Options>
+CreateNonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ NonMaxSuppressionV5OptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct ScatterNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct ScatterNdOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit ScatterNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ScatterNdOptionsBuilder &operator=(const ScatterNdOptionsBuilder &);
+ flatbuffers::Offset<ScatterNdOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ScatterNdOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ScatterNdOptions>
+CreateScatterNdOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ ScatterNdOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct SelectV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct SelectV2OptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit SelectV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SelectV2OptionsBuilder &operator=(const SelectV2OptionsBuilder &);
+ flatbuffers::Offset<SelectV2Options> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SelectV2Options>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SelectV2Options>
+CreateSelectV2Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ SelectV2OptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct DensifyOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct DensifyOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit DensifyOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ DensifyOptionsBuilder &operator=(const DensifyOptionsBuilder &);
+ flatbuffers::Offset<DensifyOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<DensifyOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<DensifyOptions>
+CreateDensifyOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ DensifyOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct SegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct SegmentSumOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit SegmentSumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SegmentSumOptionsBuilder &operator=(const SegmentSumOptionsBuilder &);
+ flatbuffers::Offset<SegmentSumOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SegmentSumOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SegmentSumOptions>
+CreateSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ SegmentSumOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct BatchMatMulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_ADJOINT_LHS = 4,
+ VT_ADJOINT_RHS = 6
+ };
+ bool adjoint_lhs() const { return GetField<uint8_t>(VT_ADJOINT_LHS, 0) != 0; }
+ bool adjoint_rhs() const { return GetField<uint8_t>(VT_ADJOINT_RHS, 0) != 0; }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ADJOINT_LHS) &&
+ VerifyField<uint8_t>(verifier, VT_ADJOINT_RHS) && verifier.EndTable();
+ }
+};
+
+struct BatchMatMulOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_adjoint_lhs(bool adjoint_lhs)
+ {
+ fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_LHS, static_cast<uint8_t>(adjoint_lhs),
+ 0);
+ }
+ void add_adjoint_rhs(bool adjoint_rhs)
+ {
+ fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_RHS, static_cast<uint8_t>(adjoint_rhs),
+ 0);
+ }
+ explicit BatchMatMulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ BatchMatMulOptionsBuilder &operator=(const BatchMatMulOptionsBuilder &);
+ flatbuffers::Offset<BatchMatMulOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<BatchMatMulOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<BatchMatMulOptions>
+CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, bool adjoint_lhs = false,
+ bool adjoint_rhs = false)
+{
+ BatchMatMulOptionsBuilder builder_(_fbb);
+ builder_.add_adjoint_rhs(adjoint_rhs);
+ builder_.add_adjoint_lhs(adjoint_lhs);
+ return builder_.Finish();
+}
+
+struct InstanceNormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_EPSILON = 4,
+ VT_FUSED_ACTIVATION_FUNCTION = 6
+ };
+ float epsilon() const { return GetField<float>(VT_EPSILON, 0.0f); }
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_EPSILON) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+ }
+};
+
+struct InstanceNormOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_epsilon(float epsilon)
+ {
+ fbb_.AddElement<float>(InstanceNormOptions::VT_EPSILON, epsilon, 0.0f);
+ }
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(InstanceNormOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ explicit InstanceNormOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ InstanceNormOptionsBuilder &operator=(const InstanceNormOptionsBuilder &);
+ flatbuffers::Offset<InstanceNormOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<InstanceNormOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<InstanceNormOptions> CreateInstanceNormOptions(
+ flatbuffers::FlatBufferBuilder &_fbb, float epsilon = 0.0f,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+{
+ InstanceNormOptionsBuilder builder_(_fbb);
+ builder_.add_epsilon(epsilon);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_BUILTIN_CODE = 4,
+ VT_CUSTOM_CODE = 6,
+ VT_VERSION = 8
+ };
+ BuiltinOperator builtin_code() const
+ {
+ return static_cast<BuiltinOperator>(GetField<uint8_t>(VT_BUILTIN_CODE, 0));
+ }
+ const flatbuffers::String *custom_code() const
+ {
+ return GetPointer<const flatbuffers::String *>(VT_CUSTOM_CODE);
+ }
+ int32_t version() const { return GetField<int32_t>(VT_VERSION, 1); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_BUILTIN_CODE) &&
+ VerifyOffset(verifier, VT_CUSTOM_CODE) && verifier.VerifyString(custom_code()) &&
+ VerifyField<int32_t>(verifier, VT_VERSION) && verifier.EndTable();
+ }
+};
+
+struct OperatorCodeBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_builtin_code(BuiltinOperator builtin_code)
+ {
+ fbb_.AddElement<uint8_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<uint8_t>(builtin_code), 0);
+ }
+ void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code)
+ {
+ fbb_.AddOffset(OperatorCode::VT_CUSTOM_CODE, custom_code);
+ }
+ void add_version(int32_t version)
+ {
+ fbb_.AddElement<int32_t>(OperatorCode::VT_VERSION, version, 1);
+ }
+ explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ OperatorCodeBuilder &operator=(const OperatorCodeBuilder &);
+ flatbuffers::Offset<OperatorCode> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<OperatorCode>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<OperatorCode>
+CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb,
+ BuiltinOperator builtin_code = BuiltinOperator_ADD,
+ flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1)
+{
+ OperatorCodeBuilder builder_(_fbb);
+ builder_.add_version(version);
+ builder_.add_custom_code(custom_code);
+ builder_.add_builtin_code(builtin_code);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<OperatorCode>
+CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ BuiltinOperator builtin_code = BuiltinOperator_ADD,
+ const char *custom_code = nullptr, int32_t version = 1)
+{
+ return circle::CreateOperatorCode(_fbb, builtin_code,
+ custom_code ? _fbb.CreateString(custom_code) : 0, version);
+}
+
+struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_OPCODE_INDEX = 4,
+ VT_INPUTS = 6,
+ VT_OUTPUTS = 8,
+ VT_BUILTIN_OPTIONS_TYPE = 10,
+ VT_BUILTIN_OPTIONS = 12,
+ VT_CUSTOM_OPTIONS = 14,
+ VT_CUSTOM_OPTIONS_FORMAT = 16,
+ VT_MUTATING_VARIABLE_INPUTS = 18,
+ VT_INTERMEDIATES = 20
+ };
+ uint32_t opcode_index() const { return GetField<uint32_t>(VT_OPCODE_INDEX, 0); }
+ const flatbuffers::Vector<int32_t> *inputs() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS);
+ }
+ const flatbuffers::Vector<int32_t> *outputs() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
+ }
+ BuiltinOptions builtin_options_type() const
+ {
+ return static_cast<BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0));
+ }
+ const void *builtin_options() const { return GetPointer<const void *>(VT_BUILTIN_OPTIONS); }
+ template <typename T> const T *builtin_options_as() const;
+ const Conv2DOptions *builtin_options_as_Conv2DOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_Conv2DOptions
+ ? static_cast<const Conv2DOptions *>(builtin_options())
+ : nullptr;
+ }
+ const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions
+ ? static_cast<const DepthwiseConv2DOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions
+ ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LSHProjectionOptions
+ ? static_cast<const LSHProjectionOptions *>(builtin_options())
+ : nullptr;
+ }
+ const Pool2DOptions *builtin_options_as_Pool2DOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_Pool2DOptions
+ ? static_cast<const Pool2DOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SVDFOptions *builtin_options_as_SVDFOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SVDFOptions
+ ? static_cast<const SVDFOptions *>(builtin_options())
+ : nullptr;
+ }
+ const RNNOptions *builtin_options_as_RNNOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_RNNOptions
+ ? static_cast<const RNNOptions *>(builtin_options())
+ : nullptr;
+ }
+ const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_FullyConnectedOptions
+ ? static_cast<const FullyConnectedOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SoftmaxOptions
+ ? static_cast<const SoftmaxOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ConcatenationOptions
+ ? static_cast<const ConcatenationOptions *>(builtin_options())
+ : nullptr;
+ }
+ const AddOptions *builtin_options_as_AddOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_AddOptions
+ ? static_cast<const AddOptions *>(builtin_options())
+ : nullptr;
+ }
+ const L2NormOptions *builtin_options_as_L2NormOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_L2NormOptions
+ ? static_cast<const L2NormOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LocalResponseNormalizationOptions *
+ builtin_options_as_LocalResponseNormalizationOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LocalResponseNormalizationOptions
+ ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LSTMOptions *builtin_options_as_LSTMOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LSTMOptions
+ ? static_cast<const LSTMOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions
+ ? static_cast<const ResizeBilinearOptions *>(builtin_options())
+ : nullptr;
+ }
+ const CallOptions *builtin_options_as_CallOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_CallOptions
+ ? static_cast<const CallOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ReshapeOptions *builtin_options_as_ReshapeOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ReshapeOptions
+ ? static_cast<const ReshapeOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SkipGramOptions *builtin_options_as_SkipGramOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SkipGramOptions
+ ? static_cast<const SkipGramOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions
+ ? static_cast<const SpaceToDepthOptions *>(builtin_options())
+ : nullptr;
+ }
+ const EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions
+ ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options())
+ : nullptr;
+ }
+ const MulOptions *builtin_options_as_MulOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_MulOptions
+ ? static_cast<const MulOptions *>(builtin_options())
+ : nullptr;
+ }
+ const PadOptions *builtin_options_as_PadOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_PadOptions
+ ? static_cast<const PadOptions *>(builtin_options())
+ : nullptr;
+ }
+ const GatherOptions *builtin_options_as_GatherOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_GatherOptions
+ ? static_cast<const GatherOptions *>(builtin_options())
+ : nullptr;
+ }
+ const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions
+ ? static_cast<const BatchToSpaceNDOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SpaceToBatchNDOptions
+ ? static_cast<const SpaceToBatchNDOptions *>(builtin_options())
+ : nullptr;
+ }
+ const TransposeOptions *builtin_options_as_TransposeOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_TransposeOptions
+ ? static_cast<const TransposeOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ReducerOptions *builtin_options_as_ReducerOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ReducerOptions
+ ? static_cast<const ReducerOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SubOptions *builtin_options_as_SubOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SubOptions
+ ? static_cast<const SubOptions *>(builtin_options())
+ : nullptr;
+ }
+ const DivOptions *builtin_options_as_DivOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_DivOptions
+ ? static_cast<const DivOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SqueezeOptions *builtin_options_as_SqueezeOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SqueezeOptions
+ ? static_cast<const SqueezeOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SequenceRNNOptions
+ ? static_cast<const SequenceRNNOptions *>(builtin_options())
+ : nullptr;
+ }
+ const StridedSliceOptions *builtin_options_as_StridedSliceOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_StridedSliceOptions
+ ? static_cast<const StridedSliceOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ExpOptions *builtin_options_as_ExpOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ExpOptions
+ ? static_cast<const ExpOptions *>(builtin_options())
+ : nullptr;
+ }
+ const TopKV2Options *builtin_options_as_TopKV2Options() const
+ {
+ return builtin_options_type() == BuiltinOptions_TopKV2Options
+ ? static_cast<const TopKV2Options *>(builtin_options())
+ : nullptr;
+ }
+ const SplitOptions *builtin_options_as_SplitOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SplitOptions
+ ? static_cast<const SplitOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LogSoftmaxOptions
+ ? static_cast<const LogSoftmaxOptions *>(builtin_options())
+ : nullptr;
+ }
+ const CastOptions *builtin_options_as_CastOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_CastOptions
+ ? static_cast<const CastOptions *>(builtin_options())
+ : nullptr;
+ }
+ const DequantizeOptions *builtin_options_as_DequantizeOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_DequantizeOptions
+ ? static_cast<const DequantizeOptions *>(builtin_options())
+ : nullptr;
+ }
+ const MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_MaximumMinimumOptions
+ ? static_cast<const MaximumMinimumOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ArgMaxOptions *builtin_options_as_ArgMaxOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ArgMaxOptions
+ ? static_cast<const ArgMaxOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LessOptions *builtin_options_as_LessOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LessOptions
+ ? static_cast<const LessOptions *>(builtin_options())
+ : nullptr;
+ }
+ const NegOptions *builtin_options_as_NegOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_NegOptions
+ ? static_cast<const NegOptions *>(builtin_options())
+ : nullptr;
+ }
+ const PadV2Options *builtin_options_as_PadV2Options() const
+ {
+ return builtin_options_type() == BuiltinOptions_PadV2Options
+ ? static_cast<const PadV2Options *>(builtin_options())
+ : nullptr;
+ }
+ const GreaterOptions *builtin_options_as_GreaterOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_GreaterOptions
+ ? static_cast<const GreaterOptions *>(builtin_options())
+ : nullptr;
+ }
+ const GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_GreaterEqualOptions
+ ? static_cast<const GreaterEqualOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LessEqualOptions *builtin_options_as_LessEqualOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LessEqualOptions
+ ? static_cast<const LessEqualOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SelectOptions *builtin_options_as_SelectOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SelectOptions
+ ? static_cast<const SelectOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SliceOptions *builtin_options_as_SliceOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SliceOptions
+ ? static_cast<const SliceOptions *>(builtin_options())
+ : nullptr;
+ }
+ const TransposeConvOptions *builtin_options_as_TransposeConvOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_TransposeConvOptions
+ ? static_cast<const TransposeConvOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SparseToDenseOptions
+ ? static_cast<const SparseToDenseOptions *>(builtin_options())
+ : nullptr;
+ }
+ const TileOptions *builtin_options_as_TileOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_TileOptions
+ ? static_cast<const TileOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ExpandDimsOptions
+ ? static_cast<const ExpandDimsOptions *>(builtin_options())
+ : nullptr;
+ }
+ const EqualOptions *builtin_options_as_EqualOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_EqualOptions
+ ? static_cast<const EqualOptions *>(builtin_options())
+ : nullptr;
+ }
+ const NotEqualOptions *builtin_options_as_NotEqualOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_NotEqualOptions
+ ? static_cast<const NotEqualOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ShapeOptions *builtin_options_as_ShapeOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ShapeOptions
+ ? static_cast<const ShapeOptions *>(builtin_options())
+ : nullptr;
+ }
+ const PowOptions *builtin_options_as_PowOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_PowOptions
+ ? static_cast<const PowOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ArgMinOptions *builtin_options_as_ArgMinOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ArgMinOptions
+ ? static_cast<const ArgMinOptions *>(builtin_options())
+ : nullptr;
+ }
+ const FakeQuantOptions *builtin_options_as_FakeQuantOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_FakeQuantOptions
+ ? static_cast<const FakeQuantOptions *>(builtin_options())
+ : nullptr;
+ }
+ const PackOptions *builtin_options_as_PackOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_PackOptions
+ ? static_cast<const PackOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LogicalOrOptions *builtin_options_as_LogicalOrOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LogicalOrOptions
+ ? static_cast<const LogicalOrOptions *>(builtin_options())
+ : nullptr;
+ }
+ const OneHotOptions *builtin_options_as_OneHotOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_OneHotOptions
+ ? static_cast<const OneHotOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LogicalAndOptions *builtin_options_as_LogicalAndOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LogicalAndOptions
+ ? static_cast<const LogicalAndOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LogicalNotOptions *builtin_options_as_LogicalNotOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LogicalNotOptions
+ ? static_cast<const LogicalNotOptions *>(builtin_options())
+ : nullptr;
+ }
+ const UnpackOptions *builtin_options_as_UnpackOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_UnpackOptions
+ ? static_cast<const UnpackOptions *>(builtin_options())
+ : nullptr;
+ }
+ const FloorDivOptions *builtin_options_as_FloorDivOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_FloorDivOptions
+ ? static_cast<const FloorDivOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SquareOptions *builtin_options_as_SquareOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SquareOptions
+ ? static_cast<const SquareOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ZerosLikeOptions
+ ? static_cast<const ZerosLikeOptions *>(builtin_options())
+ : nullptr;
+ }
+ const FillOptions *builtin_options_as_FillOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_FillOptions
+ ? static_cast<const FillOptions *>(builtin_options())
+ : nullptr;
+ }
+ const BidirectionalSequenceLSTMOptions *
+ builtin_options_as_BidirectionalSequenceLSTMOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_BidirectionalSequenceLSTMOptions
+ ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options())
+ : nullptr;
+ }
+ const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions
+ ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options())
+ : nullptr;
+ }
+ const UnidirectionalSequenceLSTMOptions *
+ builtin_options_as_UnidirectionalSequenceLSTMOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions
+ ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options())
+ : nullptr;
+ }
+ const FloorModOptions *builtin_options_as_FloorModOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_FloorModOptions
+ ? static_cast<const FloorModOptions *>(builtin_options())
+ : nullptr;
+ }
+ const RangeOptions *builtin_options_as_RangeOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_RangeOptions
+ ? static_cast<const RangeOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ResizeNearestNeighborOptions
+ ? static_cast<const ResizeNearestNeighborOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LeakyReluOptions *builtin_options_as_LeakyReluOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LeakyReluOptions
+ ? static_cast<const LeakyReluOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SquaredDifferenceOptions
+ ? static_cast<const SquaredDifferenceOptions *>(builtin_options())
+ : nullptr;
+ }
+ const MirrorPadOptions *builtin_options_as_MirrorPadOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_MirrorPadOptions
+ ? static_cast<const MirrorPadOptions *>(builtin_options())
+ : nullptr;
+ }
+ const AbsOptions *builtin_options_as_AbsOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_AbsOptions
+ ? static_cast<const AbsOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SplitVOptions *builtin_options_as_SplitVOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SplitVOptions
+ ? static_cast<const SplitVOptions *>(builtin_options())
+ : nullptr;
+ }
+ const UniqueOptions *builtin_options_as_UniqueOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_UniqueOptions
+ ? static_cast<const UniqueOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ReverseV2Options *builtin_options_as_ReverseV2Options() const
+ {
+ return builtin_options_type() == BuiltinOptions_ReverseV2Options
+ ? static_cast<const ReverseV2Options *>(builtin_options())
+ : nullptr;
+ }
+ const AddNOptions *builtin_options_as_AddNOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_AddNOptions
+ ? static_cast<const AddNOptions *>(builtin_options())
+ : nullptr;
+ }
+ const GatherNdOptions *builtin_options_as_GatherNdOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_GatherNdOptions
+ ? static_cast<const GatherNdOptions *>(builtin_options())
+ : nullptr;
+ }
+ const CosOptions *builtin_options_as_CosOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_CosOptions
+ ? static_cast<const CosOptions *>(builtin_options())
+ : nullptr;
+ }
+ const WhereOptions *builtin_options_as_WhereOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_WhereOptions
+ ? static_cast<const WhereOptions *>(builtin_options())
+ : nullptr;
+ }
+ const RankOptions *builtin_options_as_RankOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_RankOptions
+ ? static_cast<const RankOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ReverseSequenceOptions
+ ? static_cast<const ReverseSequenceOptions *>(builtin_options())
+ : nullptr;
+ }
+ const MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_MatrixDiagOptions
+ ? static_cast<const MatrixDiagOptions *>(builtin_options())
+ : nullptr;
+ }
+ const QuantizeOptions *builtin_options_as_QuantizeOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_QuantizeOptions
+ ? static_cast<const QuantizeOptions *>(builtin_options())
+ : nullptr;
+ }
+ const MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_MatrixSetDiagOptions
+ ? static_cast<const MatrixSetDiagOptions *>(builtin_options())
+ : nullptr;
+ }
+ const HardSwishOptions *builtin_options_as_HardSwishOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_HardSwishOptions
+ ? static_cast<const HardSwishOptions *>(builtin_options())
+ : nullptr;
+ }
+ const IfOptions *builtin_options_as_IfOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_IfOptions
+ ? static_cast<const IfOptions *>(builtin_options())
+ : nullptr;
+ }
+ const WhileOptions *builtin_options_as_WhileOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_WhileOptions
+ ? static_cast<const WhileOptions *>(builtin_options())
+ : nullptr;
+ }
+ const DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_DepthToSpaceOptions
+ ? static_cast<const DepthToSpaceOptions *>(builtin_options())
+ : nullptr;
+ }
+ const NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const
+ {
+ return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV4Options
+ ? static_cast<const NonMaxSuppressionV4Options *>(builtin_options())
+ : nullptr;
+ }
+ const NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const
+ {
+ return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV5Options
+ ? static_cast<const NonMaxSuppressionV5Options *>(builtin_options())
+ : nullptr;
+ }
+ const ScatterNdOptions *builtin_options_as_ScatterNdOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ScatterNdOptions
+ ? static_cast<const ScatterNdOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SelectV2Options *builtin_options_as_SelectV2Options() const
+ {
+ return builtin_options_type() == BuiltinOptions_SelectV2Options
+ ? static_cast<const SelectV2Options *>(builtin_options())
+ : nullptr;
+ }
+ const DensifyOptions *builtin_options_as_DensifyOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_DensifyOptions
+ ? static_cast<const DensifyOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SegmentSumOptions *builtin_options_as_SegmentSumOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SegmentSumOptions
+ ? static_cast<const SegmentSumOptions *>(builtin_options())
+ : nullptr;
+ }
+ const BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_BatchMatMulOptions
+ ? static_cast<const BatchMatMulOptions *>(builtin_options())
+ : nullptr;
+ }
+ const InstanceNormOptions *builtin_options_as_InstanceNormOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_InstanceNormOptions
+ ? static_cast<const InstanceNormOptions *>(builtin_options())
+ : nullptr;
+ }
+ const flatbuffers::Vector<uint8_t> *custom_options() const
+ {
+ return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
+ }
+ CustomOptionsFormat custom_options_format() const
+ {
+ return static_cast<CustomOptionsFormat>(GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0));
+ }
+ const flatbuffers::Vector<uint8_t> *mutating_variable_inputs() const
+ {
+ return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_MUTATING_VARIABLE_INPUTS);
+ }
+ const flatbuffers::Vector<int32_t> *intermediates() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INTERMEDIATES);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_OPCODE_INDEX) &&
+ VerifyOffset(verifier, VT_INPUTS) && verifier.VerifyVector(inputs()) &&
+ VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) &&
+ VerifyField<uint8_t>(verifier, VT_BUILTIN_OPTIONS_TYPE) &&
+ VerifyOffset(verifier, VT_BUILTIN_OPTIONS) &&
+ VerifyBuiltinOptions(verifier, builtin_options(), builtin_options_type()) &&
+ VerifyOffset(verifier, VT_CUSTOM_OPTIONS) && verifier.VerifyVector(custom_options()) &&
+ VerifyField<int8_t>(verifier, VT_CUSTOM_OPTIONS_FORMAT) &&
+ VerifyOffset(verifier, VT_MUTATING_VARIABLE_INPUTS) &&
+ verifier.VerifyVector(mutating_variable_inputs()) &&
+ VerifyOffset(verifier, VT_INTERMEDIATES) && verifier.VerifyVector(intermediates()) &&
+ verifier.EndTable();
+ }
+};
+
+template <> inline const Conv2DOptions *Operator::builtin_options_as<Conv2DOptions>() const
+{
+ return builtin_options_as_Conv2DOptions();
+}
+
+template <>
+inline const DepthwiseConv2DOptions *Operator::builtin_options_as<DepthwiseConv2DOptions>() const
+{
+ return builtin_options_as_DepthwiseConv2DOptions();
+}
+
+template <>
+inline const ConcatEmbeddingsOptions *Operator::builtin_options_as<ConcatEmbeddingsOptions>() const
+{
+ return builtin_options_as_ConcatEmbeddingsOptions();
+}
+
+template <>
+inline const LSHProjectionOptions *Operator::builtin_options_as<LSHProjectionOptions>() const
+{
+ return builtin_options_as_LSHProjectionOptions();
+}
+
+template <> inline const Pool2DOptions *Operator::builtin_options_as<Pool2DOptions>() const
+{
+ return builtin_options_as_Pool2DOptions();
+}
+
+template <> inline const SVDFOptions *Operator::builtin_options_as<SVDFOptions>() const
+{
+ return builtin_options_as_SVDFOptions();
+}
+
+template <> inline const RNNOptions *Operator::builtin_options_as<RNNOptions>() const
+{
+ return builtin_options_as_RNNOptions();
+}
+
+template <>
+inline const FullyConnectedOptions *Operator::builtin_options_as<FullyConnectedOptions>() const
+{
+ return builtin_options_as_FullyConnectedOptions();
+}
+
+template <> inline const SoftmaxOptions *Operator::builtin_options_as<SoftmaxOptions>() const
+{
+ return builtin_options_as_SoftmaxOptions();
+}
+
+template <>
+inline const ConcatenationOptions *Operator::builtin_options_as<ConcatenationOptions>() const
+{
+ return builtin_options_as_ConcatenationOptions();
+}
+
+template <> inline const AddOptions *Operator::builtin_options_as<AddOptions>() const
+{
+ return builtin_options_as_AddOptions();
+}
+
+template <> inline const L2NormOptions *Operator::builtin_options_as<L2NormOptions>() const
+{
+ return builtin_options_as_L2NormOptions();
+}
+
+template <>
+inline const LocalResponseNormalizationOptions *
+Operator::builtin_options_as<LocalResponseNormalizationOptions>() const
+{
+ return builtin_options_as_LocalResponseNormalizationOptions();
+}
+
+template <> inline const LSTMOptions *Operator::builtin_options_as<LSTMOptions>() const
+{
+ return builtin_options_as_LSTMOptions();
+}
+
+template <>
+inline const ResizeBilinearOptions *Operator::builtin_options_as<ResizeBilinearOptions>() const
+{
+ return builtin_options_as_ResizeBilinearOptions();
+}
+
+template <> inline const CallOptions *Operator::builtin_options_as<CallOptions>() const
+{
+ return builtin_options_as_CallOptions();
+}
+
+template <> inline const ReshapeOptions *Operator::builtin_options_as<ReshapeOptions>() const
+{
+ return builtin_options_as_ReshapeOptions();
+}
+
+template <> inline const SkipGramOptions *Operator::builtin_options_as<SkipGramOptions>() const
+{
+ return builtin_options_as_SkipGramOptions();
+}
+
+template <>
+inline const SpaceToDepthOptions *Operator::builtin_options_as<SpaceToDepthOptions>() const
+{
+ return builtin_options_as_SpaceToDepthOptions();
+}
+
+template <>
+inline const EmbeddingLookupSparseOptions *
+Operator::builtin_options_as<EmbeddingLookupSparseOptions>() const
+{
+ return builtin_options_as_EmbeddingLookupSparseOptions();
+}
+
+template <> inline const MulOptions *Operator::builtin_options_as<MulOptions>() const
+{
+ return builtin_options_as_MulOptions();
+}
+
+template <> inline const PadOptions *Operator::builtin_options_as<PadOptions>() const
+{
+ return builtin_options_as_PadOptions();
+}
+
+template <> inline const GatherOptions *Operator::builtin_options_as<GatherOptions>() const
+{
+ return builtin_options_as_GatherOptions();
+}
+
+template <>
+inline const BatchToSpaceNDOptions *Operator::builtin_options_as<BatchToSpaceNDOptions>() const
+{
+ return builtin_options_as_BatchToSpaceNDOptions();
+}
+
+template <>
+inline const SpaceToBatchNDOptions *Operator::builtin_options_as<SpaceToBatchNDOptions>() const
+{
+ return builtin_options_as_SpaceToBatchNDOptions();
+}
+
+template <> inline const TransposeOptions *Operator::builtin_options_as<TransposeOptions>() const
+{
+ return builtin_options_as_TransposeOptions();
+}
+
+template <> inline const ReducerOptions *Operator::builtin_options_as<ReducerOptions>() const
+{
+ return builtin_options_as_ReducerOptions();
+}
+
+template <> inline const SubOptions *Operator::builtin_options_as<SubOptions>() const
+{
+ return builtin_options_as_SubOptions();
+}
+
+template <> inline const DivOptions *Operator::builtin_options_as<DivOptions>() const
+{
+ return builtin_options_as_DivOptions();
+}
+
+template <> inline const SqueezeOptions *Operator::builtin_options_as<SqueezeOptions>() const
+{
+ return builtin_options_as_SqueezeOptions();
+}
+
+template <>
+inline const SequenceRNNOptions *Operator::builtin_options_as<SequenceRNNOptions>() const
+{
+ return builtin_options_as_SequenceRNNOptions();
+}
+
+template <>
+inline const StridedSliceOptions *Operator::builtin_options_as<StridedSliceOptions>() const
+{
+ return builtin_options_as_StridedSliceOptions();
+}
+
+template <> inline const ExpOptions *Operator::builtin_options_as<ExpOptions>() const
+{
+ return builtin_options_as_ExpOptions();
+}
+
+template <> inline const TopKV2Options *Operator::builtin_options_as<TopKV2Options>() const
+{
+ return builtin_options_as_TopKV2Options();
+}
+
+template <> inline const SplitOptions *Operator::builtin_options_as<SplitOptions>() const
+{
+ return builtin_options_as_SplitOptions();
+}
+
+template <> inline const LogSoftmaxOptions *Operator::builtin_options_as<LogSoftmaxOptions>() const
+{
+ return builtin_options_as_LogSoftmaxOptions();
+}
+
+template <> inline const CastOptions *Operator::builtin_options_as<CastOptions>() const
+{
+ return builtin_options_as_CastOptions();
+}
+
+template <> inline const DequantizeOptions *Operator::builtin_options_as<DequantizeOptions>() const
+{
+ return builtin_options_as_DequantizeOptions();
+}
+
+template <>
+inline const MaximumMinimumOptions *Operator::builtin_options_as<MaximumMinimumOptions>() const
+{
+ return builtin_options_as_MaximumMinimumOptions();
+}
+
+template <> inline const ArgMaxOptions *Operator::builtin_options_as<ArgMaxOptions>() const
+{
+ return builtin_options_as_ArgMaxOptions();
+}
+
+template <> inline const LessOptions *Operator::builtin_options_as<LessOptions>() const
+{
+ return builtin_options_as_LessOptions();
+}
+
+template <> inline const NegOptions *Operator::builtin_options_as<NegOptions>() const
+{
+ return builtin_options_as_NegOptions();
+}
+
+template <> inline const PadV2Options *Operator::builtin_options_as<PadV2Options>() const
+{
+ return builtin_options_as_PadV2Options();
+}
+
+template <> inline const GreaterOptions *Operator::builtin_options_as<GreaterOptions>() const
+{
+ return builtin_options_as_GreaterOptions();
+}
+
+template <>
+inline const GreaterEqualOptions *Operator::builtin_options_as<GreaterEqualOptions>() const
+{
+ return builtin_options_as_GreaterEqualOptions();
+}
+
+template <> inline const LessEqualOptions *Operator::builtin_options_as<LessEqualOptions>() const
+{
+ return builtin_options_as_LessEqualOptions();
+}
+
+template <> inline const SelectOptions *Operator::builtin_options_as<SelectOptions>() const
+{
+ return builtin_options_as_SelectOptions();
+}
+
+template <> inline const SliceOptions *Operator::builtin_options_as<SliceOptions>() const
+{
+ return builtin_options_as_SliceOptions();
+}
+
+template <>
+inline const TransposeConvOptions *Operator::builtin_options_as<TransposeConvOptions>() const
+{
+ return builtin_options_as_TransposeConvOptions();
+}
+
+template <>
+inline const SparseToDenseOptions *Operator::builtin_options_as<SparseToDenseOptions>() const
+{
+ return builtin_options_as_SparseToDenseOptions();
+}
+
+template <> inline const TileOptions *Operator::builtin_options_as<TileOptions>() const
+{
+ return builtin_options_as_TileOptions();
+}
+
+template <> inline const ExpandDimsOptions *Operator::builtin_options_as<ExpandDimsOptions>() const
+{
+ return builtin_options_as_ExpandDimsOptions();
+}
+
+template <> inline const EqualOptions *Operator::builtin_options_as<EqualOptions>() const
+{
+ return builtin_options_as_EqualOptions();
+}
+
+template <> inline const NotEqualOptions *Operator::builtin_options_as<NotEqualOptions>() const
+{
+ return builtin_options_as_NotEqualOptions();
+}
+
+template <> inline const ShapeOptions *Operator::builtin_options_as<ShapeOptions>() const
+{
+ return builtin_options_as_ShapeOptions();
+}
+
+template <> inline const PowOptions *Operator::builtin_options_as<PowOptions>() const
+{
+ return builtin_options_as_PowOptions();
+}
+
+template <> inline const ArgMinOptions *Operator::builtin_options_as<ArgMinOptions>() const
+{
+ return builtin_options_as_ArgMinOptions();
+}
+
+template <> inline const FakeQuantOptions *Operator::builtin_options_as<FakeQuantOptions>() const
+{
+ return builtin_options_as_FakeQuantOptions();
+}
+
+template <> inline const PackOptions *Operator::builtin_options_as<PackOptions>() const
+{
+ return builtin_options_as_PackOptions();
+}
+
+template <> inline const LogicalOrOptions *Operator::builtin_options_as<LogicalOrOptions>() const
+{
+ return builtin_options_as_LogicalOrOptions();
+}
+
+template <> inline const OneHotOptions *Operator::builtin_options_as<OneHotOptions>() const
+{
+ return builtin_options_as_OneHotOptions();
+}
+
+template <> inline const LogicalAndOptions *Operator::builtin_options_as<LogicalAndOptions>() const
+{
+ return builtin_options_as_LogicalAndOptions();
+}
+
+template <> inline const LogicalNotOptions *Operator::builtin_options_as<LogicalNotOptions>() const
+{
+ return builtin_options_as_LogicalNotOptions();
+}
+
+template <> inline const UnpackOptions *Operator::builtin_options_as<UnpackOptions>() const
+{
+ return builtin_options_as_UnpackOptions();
+}
+
+template <> inline const FloorDivOptions *Operator::builtin_options_as<FloorDivOptions>() const
+{
+ return builtin_options_as_FloorDivOptions();
+}
+
+template <> inline const SquareOptions *Operator::builtin_options_as<SquareOptions>() const
+{
+ return builtin_options_as_SquareOptions();
+}
+
+template <> inline const ZerosLikeOptions *Operator::builtin_options_as<ZerosLikeOptions>() const
+{
+ return builtin_options_as_ZerosLikeOptions();
+}
+
+template <> inline const FillOptions *Operator::builtin_options_as<FillOptions>() const
+{
+ return builtin_options_as_FillOptions();
+}
+
+template <>
+inline const BidirectionalSequenceLSTMOptions *
+Operator::builtin_options_as<BidirectionalSequenceLSTMOptions>() const
+{
+ return builtin_options_as_BidirectionalSequenceLSTMOptions();
+}
+
+template <>
+inline const BidirectionalSequenceRNNOptions *
+Operator::builtin_options_as<BidirectionalSequenceRNNOptions>() const
+{
+ return builtin_options_as_BidirectionalSequenceRNNOptions();
+}
+
+template <>
+inline const UnidirectionalSequenceLSTMOptions *
+Operator::builtin_options_as<UnidirectionalSequenceLSTMOptions>() const
+{
+ return builtin_options_as_UnidirectionalSequenceLSTMOptions();
+}
+
+template <> inline const FloorModOptions *Operator::builtin_options_as<FloorModOptions>() const
+{
+ return builtin_options_as_FloorModOptions();
+}
+
+template <> inline const RangeOptions *Operator::builtin_options_as<RangeOptions>() const
+{
+ return builtin_options_as_RangeOptions();
+}
+
+template <>
+inline const ResizeNearestNeighborOptions *
+Operator::builtin_options_as<ResizeNearestNeighborOptions>() const
+{
+ return builtin_options_as_ResizeNearestNeighborOptions();
+}
+
+template <> inline const LeakyReluOptions *Operator::builtin_options_as<LeakyReluOptions>() const
+{
+ return builtin_options_as_LeakyReluOptions();
+}
+
+template <>
+inline const SquaredDifferenceOptions *
+Operator::builtin_options_as<SquaredDifferenceOptions>() const
+{
+ return builtin_options_as_SquaredDifferenceOptions();
+}
+
+template <> inline const MirrorPadOptions *Operator::builtin_options_as<MirrorPadOptions>() const
+{
+ return builtin_options_as_MirrorPadOptions();
+}
+
+template <> inline const AbsOptions *Operator::builtin_options_as<AbsOptions>() const
+{
+ return builtin_options_as_AbsOptions();
+}
+
+template <> inline const SplitVOptions *Operator::builtin_options_as<SplitVOptions>() const
+{
+ return builtin_options_as_SplitVOptions();
+}
+
+template <> inline const UniqueOptions *Operator::builtin_options_as<UniqueOptions>() const
+{
+ return builtin_options_as_UniqueOptions();
+}
+
+template <> inline const ReverseV2Options *Operator::builtin_options_as<ReverseV2Options>() const
+{
+ return builtin_options_as_ReverseV2Options();
+}
+
+template <> inline const AddNOptions *Operator::builtin_options_as<AddNOptions>() const
+{
+ return builtin_options_as_AddNOptions();
+}
+
+template <> inline const GatherNdOptions *Operator::builtin_options_as<GatherNdOptions>() const
+{
+ return builtin_options_as_GatherNdOptions();
+}
+
+template <> inline const CosOptions *Operator::builtin_options_as<CosOptions>() const
+{
+ return builtin_options_as_CosOptions();
+}
+
+template <> inline const WhereOptions *Operator::builtin_options_as<WhereOptions>() const
+{
+ return builtin_options_as_WhereOptions();
+}
+
+template <> inline const RankOptions *Operator::builtin_options_as<RankOptions>() const
+{
+ return builtin_options_as_RankOptions();
+}
+
+template <>
+inline const ReverseSequenceOptions *Operator::builtin_options_as<ReverseSequenceOptions>() const
+{
+ return builtin_options_as_ReverseSequenceOptions();
+}
+
+template <> inline const MatrixDiagOptions *Operator::builtin_options_as<MatrixDiagOptions>() const
+{
+ return builtin_options_as_MatrixDiagOptions();
+}
+
+template <> inline const QuantizeOptions *Operator::builtin_options_as<QuantizeOptions>() const
+{
+ return builtin_options_as_QuantizeOptions();
+}
+
+template <>
+inline const MatrixSetDiagOptions *Operator::builtin_options_as<MatrixSetDiagOptions>() const
+{
+ return builtin_options_as_MatrixSetDiagOptions();
+}
+
+template <> inline const HardSwishOptions *Operator::builtin_options_as<HardSwishOptions>() const
+{
+ return builtin_options_as_HardSwishOptions();
+}
+
+template <> inline const IfOptions *Operator::builtin_options_as<IfOptions>() const
+{
+ return builtin_options_as_IfOptions();
+}
+
+template <> inline const WhileOptions *Operator::builtin_options_as<WhileOptions>() const
+{
+ return builtin_options_as_WhileOptions();
+}
+
+template <>
+inline const DepthToSpaceOptions *Operator::builtin_options_as<DepthToSpaceOptions>() const
+{
+ return builtin_options_as_DepthToSpaceOptions();
+}
+
+template <>
+inline const NonMaxSuppressionV4Options *
+Operator::builtin_options_as<NonMaxSuppressionV4Options>() const
+{
+ return builtin_options_as_NonMaxSuppressionV4Options();
+}
+
+template <>
+inline const NonMaxSuppressionV5Options *
+Operator::builtin_options_as<NonMaxSuppressionV5Options>() const
+{
+ return builtin_options_as_NonMaxSuppressionV5Options();
+}
+
+template <> inline const ScatterNdOptions *Operator::builtin_options_as<ScatterNdOptions>() const
+{
+ return builtin_options_as_ScatterNdOptions();
+}
+
+template <> inline const SelectV2Options *Operator::builtin_options_as<SelectV2Options>() const
+{
+ return builtin_options_as_SelectV2Options();
+}
+
+template <> inline const DensifyOptions *Operator::builtin_options_as<DensifyOptions>() const
+{
+ return builtin_options_as_DensifyOptions();
+}
+
+template <> inline const SegmentSumOptions *Operator::builtin_options_as<SegmentSumOptions>() const
+{
+ return builtin_options_as_SegmentSumOptions();
+}
+
+template <>
+inline const BatchMatMulOptions *Operator::builtin_options_as<BatchMatMulOptions>() const
+{
+ return builtin_options_as_BatchMatMulOptions();
+}
+
+template <>
+inline const InstanceNormOptions *Operator::builtin_options_as<InstanceNormOptions>() const
+{
+ return builtin_options_as_InstanceNormOptions();
+}
+
+struct OperatorBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_opcode_index(uint32_t opcode_index)
+ {
+ fbb_.AddElement<uint32_t>(Operator::VT_OPCODE_INDEX, opcode_index, 0);
+ }
+ void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs)
+ {
+ fbb_.AddOffset(Operator::VT_INPUTS, inputs);
+ }
+ void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs)
+ {
+ fbb_.AddOffset(Operator::VT_OUTPUTS, outputs);
+ }
+ void add_builtin_options_type(BuiltinOptions builtin_options_type)
+ {
+ fbb_.AddElement<uint8_t>(Operator::VT_BUILTIN_OPTIONS_TYPE,
+ static_cast<uint8_t>(builtin_options_type), 0);
+ }
+ void add_builtin_options(flatbuffers::Offset<void> builtin_options)
+ {
+ fbb_.AddOffset(Operator::VT_BUILTIN_OPTIONS, builtin_options);
+ }
+ void add_custom_options(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options)
+ {
+ fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options);
+ }
+ void add_custom_options_format(CustomOptionsFormat custom_options_format)
+ {
+ fbb_.AddElement<int8_t>(Operator::VT_CUSTOM_OPTIONS_FORMAT,
+ static_cast<int8_t>(custom_options_format), 0);
+ }
+ void add_mutating_variable_inputs(
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
+ {
+ fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs);
+ }
+ void add_intermediates(flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates)
+ {
+ fbb_.AddOffset(Operator::VT_INTERMEDIATES, intermediates);
+ }
+ explicit OperatorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ OperatorBuilder &operator=(const OperatorBuilder &);
+ flatbuffers::Offset<Operator> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Operator>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Operator>
+CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+ BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
+ flatbuffers::Offset<void> builtin_options = 0,
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
+ CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS,
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0)
+{
+ OperatorBuilder builder_(_fbb);
+ builder_.add_intermediates(intermediates);
+ builder_.add_mutating_variable_inputs(mutating_variable_inputs);
+ builder_.add_custom_options(custom_options);
+ builder_.add_builtin_options(builtin_options);
+ builder_.add_outputs(outputs);
+ builder_.add_inputs(inputs);
+ builder_.add_opcode_index(opcode_index);
+ builder_.add_custom_options_format(custom_options_format);
+ builder_.add_builtin_options_type(builtin_options_type);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Operator>
+CreateOperatorDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+ const std::vector<int32_t> *inputs = nullptr,
+ const std::vector<int32_t> *outputs = nullptr,
+ BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
+ flatbuffers::Offset<void> builtin_options = 0,
+ const std::vector<uint8_t> *custom_options = nullptr,
+ CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS,
+ const std::vector<uint8_t> *mutating_variable_inputs = nullptr,
+ const std::vector<int32_t> *intermediates = nullptr)
+{
+ return circle::CreateOperator(
+ _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
+ outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type, builtin_options,
+ custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0, custom_options_format,
+ mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0,
+ intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0);
+}
+
+struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_TENSORS = 4,
+ VT_INPUTS = 6,
+ VT_OUTPUTS = 8,
+ VT_OPERATORS = 10,
+ VT_NAME = 12,
+ VT_DATA_FORMAT = 14
+ };
+ const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *tensors() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *>(VT_TENSORS);
+ }
+ const flatbuffers::Vector<int32_t> *inputs() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS);
+ }
+ const flatbuffers::Vector<int32_t> *outputs() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
+ }
+ const flatbuffers::Vector<flatbuffers::Offset<Operator>> *operators() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Operator>> *>(VT_OPERATORS);
+ }
+ const flatbuffers::String *name() const
+ {
+ return GetPointer<const flatbuffers::String *>(VT_NAME);
+ }
+ DataFormat data_format() const
+ {
+ return static_cast<DataFormat>(GetField<int8_t>(VT_DATA_FORMAT, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_TENSORS) &&
+ verifier.VerifyVector(tensors()) && verifier.VerifyVectorOfTables(tensors()) &&
+ VerifyOffset(verifier, VT_INPUTS) && verifier.VerifyVector(inputs()) &&
+ VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) &&
+ VerifyOffset(verifier, VT_OPERATORS) && verifier.VerifyVector(operators()) &&
+ verifier.VerifyVectorOfTables(operators()) && VerifyOffset(verifier, VT_NAME) &&
+ verifier.VerifyString(name()) && VerifyField<int8_t>(verifier, VT_DATA_FORMAT) &&
+ verifier.EndTable();
+ }
+};
+
+struct SubGraphBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors)
+ {
+ fbb_.AddOffset(SubGraph::VT_TENSORS, tensors);
+ }
+ void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs)
+ {
+ fbb_.AddOffset(SubGraph::VT_INPUTS, inputs);
+ }
+ void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs)
+ {
+ fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs);
+ }
+ void
+ add_operators(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators)
+ {
+ fbb_.AddOffset(SubGraph::VT_OPERATORS, operators);
+ }
+ void add_name(flatbuffers::Offset<flatbuffers::String> name)
+ {
+ fbb_.AddOffset(SubGraph::VT_NAME, name);
+ }
+ void add_data_format(DataFormat data_format)
+ {
+ fbb_.AddElement<int8_t>(SubGraph::VT_DATA_FORMAT, static_cast<int8_t>(data_format), 0);
+ }
+ explicit SubGraphBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SubGraphBuilder &operator=(const SubGraphBuilder &);
+ flatbuffers::Offset<SubGraph> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SubGraph>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SubGraph> CreateSubGraph(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0,
+ flatbuffers::Offset<flatbuffers::String> name = 0,
+ DataFormat data_format = DataFormat_CHANNELS_LAST)
+{
+ SubGraphBuilder builder_(_fbb);
+ builder_.add_name(name);
+ builder_.add_operators(operators);
+ builder_.add_outputs(outputs);
+ builder_.add_inputs(inputs);
+ builder_.add_tensors(tensors);
+ builder_.add_data_format(data_format);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SubGraph>
+CreateSubGraphDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr,
+ const std::vector<int32_t> *inputs = nullptr,
+ const std::vector<int32_t> *outputs = nullptr,
+ const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr,
+ const char *name = nullptr, DataFormat data_format = DataFormat_CHANNELS_LAST)
+{
+ return circle::CreateSubGraph(
+ _fbb, tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0,
+ inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
+ outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0,
+ operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0,
+ name ? _fbb.CreateString(name) : 0, data_format);
+}
+
+struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_DATA = 4
+ };
+ const flatbuffers::Vector<uint8_t> *data() const
+ {
+ return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_DATA);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_DATA) &&
+ verifier.VerifyVector(data()) && verifier.EndTable();
+ }
+};
+
+struct BufferBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_data(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data)
+ {
+ fbb_.AddOffset(Buffer::VT_DATA, data);
+ }
+ explicit BufferBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ BufferBuilder &operator=(const BufferBuilder &);
+ flatbuffers::Offset<Buffer> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Buffer>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Buffer>
+CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data = 0)
+{
+ BufferBuilder builder_(_fbb);
+ builder_.add_data(data);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Buffer> CreateBufferDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<uint8_t> *data = nullptr)
+{
+ return circle::CreateBuffer(_fbb, data ? _fbb.CreateVector<uint8_t>(*data) : 0);
+}
+
+struct Metadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_NAME = 4,
+ VT_BUFFER = 6
+ };
+ const flatbuffers::String *name() const
+ {
+ return GetPointer<const flatbuffers::String *>(VT_NAME);
+ }
+ uint32_t buffer() const { return GetField<uint32_t>(VT_BUFFER, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) &&
+ verifier.VerifyString(name()) && VerifyField<uint32_t>(verifier, VT_BUFFER) &&
+ verifier.EndTable();
+ }
+};
+
+struct MetadataBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_name(flatbuffers::Offset<flatbuffers::String> name)
+ {
+ fbb_.AddOffset(Metadata::VT_NAME, name);
+ }
+ void add_buffer(uint32_t buffer) { fbb_.AddElement<uint32_t>(Metadata::VT_BUFFER, buffer, 0); }
+ explicit MetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ MetadataBuilder &operator=(const MetadataBuilder &);
+ flatbuffers::Offset<Metadata> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Metadata>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Metadata>
+CreateMetadata(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::String> name = 0, uint32_t buffer = 0)
+{
+ MetadataBuilder builder_(_fbb);
+ builder_.add_buffer(buffer);
+ builder_.add_name(name);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Metadata> CreateMetadataDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const char *name = nullptr,
+ uint32_t buffer = 0)
+{
+ return circle::CreateMetadata(_fbb, name ? _fbb.CreateString(name) : 0, buffer);
+}
+
+struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_VERSION = 4,
+ VT_OPERATOR_CODES = 6,
+ VT_SUBGRAPHS = 8,
+ VT_DESCRIPTION = 10,
+ VT_BUFFERS = 12,
+ VT_METADATA_BUFFER = 14,
+ VT_METADATA = 16
+ };
+ uint32_t version() const { return GetField<uint32_t>(VT_VERSION, 0); }
+ const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *operator_codes() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *>(
+ VT_OPERATOR_CODES);
+ }
+ const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *subgraphs() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *>(VT_SUBGRAPHS);
+ }
+ const flatbuffers::String *description() const
+ {
+ return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION);
+ }
+ const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *buffers() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *>(VT_BUFFERS);
+ }
+ const flatbuffers::Vector<int32_t> *metadata_buffer() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_METADATA_BUFFER);
+ }
+ const flatbuffers::Vector<flatbuffers::Offset<Metadata>> *metadata() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Metadata>> *>(VT_METADATA);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_VERSION) &&
+ VerifyOffset(verifier, VT_OPERATOR_CODES) && verifier.VerifyVector(operator_codes()) &&
+ verifier.VerifyVectorOfTables(operator_codes()) &&
+ VerifyOffset(verifier, VT_SUBGRAPHS) && verifier.VerifyVector(subgraphs()) &&
+ verifier.VerifyVectorOfTables(subgraphs()) && VerifyOffset(verifier, VT_DESCRIPTION) &&
+ verifier.VerifyString(description()) && VerifyOffset(verifier, VT_BUFFERS) &&
+ verifier.VerifyVector(buffers()) && verifier.VerifyVectorOfTables(buffers()) &&
+ VerifyOffset(verifier, VT_METADATA_BUFFER) && verifier.VerifyVector(metadata_buffer()) &&
+ VerifyOffset(verifier, VT_METADATA) && verifier.VerifyVector(metadata()) &&
+ verifier.VerifyVectorOfTables(metadata()) && verifier.EndTable();
+ }
+};
+
+struct ModelBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); }
+ void add_operator_codes(
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes)
+ {
+ fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes);
+ }
+ void
+ add_subgraphs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs)
+ {
+ fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs);
+ }
+ void add_description(flatbuffers::Offset<flatbuffers::String> description)
+ {
+ fbb_.AddOffset(Model::VT_DESCRIPTION, description);
+ }
+ void add_buffers(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers)
+ {
+ fbb_.AddOffset(Model::VT_BUFFERS, buffers);
+ }
+ void add_metadata_buffer(flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer)
+ {
+ fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer);
+ }
+ void
+ add_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata)
+ {
+ fbb_.AddOffset(Model::VT_METADATA, metadata);
+ }
+ explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ModelBuilder &operator=(const ModelBuilder &);
+ flatbuffers::Offset<Model> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Model>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Model> CreateModel(
+ flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0,
+ flatbuffers::Offset<flatbuffers::String> description = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata = 0)
+{
+ ModelBuilder builder_(_fbb);
+ builder_.add_metadata(metadata);
+ builder_.add_metadata_buffer(metadata_buffer);
+ builder_.add_buffers(buffers);
+ builder_.add_description(description);
+ builder_.add_subgraphs(subgraphs);
+ builder_.add_operator_codes(operator_codes);
+ builder_.add_version(version);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Model>
+CreateModelDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+ const std::vector<flatbuffers::Offset<OperatorCode>> *operator_codes = nullptr,
+ const std::vector<flatbuffers::Offset<SubGraph>> *subgraphs = nullptr,
+ const char *description = nullptr,
+ const std::vector<flatbuffers::Offset<Buffer>> *buffers = nullptr,
+ const std::vector<int32_t> *metadata_buffer = nullptr,
+ const std::vector<flatbuffers::Offset<Metadata>> *metadata = nullptr)
+{
+ return circle::CreateModel(
+ _fbb, version,
+ operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0,
+ subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0,
+ description ? _fbb.CreateString(description) : 0,
+ buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0,
+ metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0,
+ metadata ? _fbb.CreateVector<flatbuffers::Offset<Metadata>>(*metadata) : 0);
+}
+
+inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj,
+ QuantizationDetails type)
+{
+ switch (type)
+ {
+ case QuantizationDetails_NONE:
+ {
+ return true;
+ }
+ case QuantizationDetails_CustomQuantization:
+ {
+ auto ptr = reinterpret_cast<const CustomQuantization *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ default:
+ return false;
+ }
+}
+
+inline bool
+VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier,
+ const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+ const flatbuffers::Vector<uint8_t> *types)
+{
+ if (!values || !types)
+ return !values && !types;
+ if (values->size() != types->size())
+ return false;
+ for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i)
+ {
+ if (!VerifyQuantizationDetails(verifier, values->Get(i),
+ types->GetEnum<QuantizationDetails>(i)))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+inline bool VerifySparseIndexVector(flatbuffers::Verifier &verifier, const void *obj,
+ SparseIndexVector type)
+{
+ switch (type)
+ {
+ case SparseIndexVector_NONE:
+ {
+ return true;
+ }
+ case SparseIndexVector_Int32Vector:
+ {
+ auto ptr = reinterpret_cast<const Int32Vector *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case SparseIndexVector_Uint16Vector:
+ {
+ auto ptr = reinterpret_cast<const Uint16Vector *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case SparseIndexVector_Uint8Vector:
+ {
+ auto ptr = reinterpret_cast<const Uint8Vector *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ default:
+ return false;
+ }
+}
+
+inline bool
+VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier,
+ const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+ const flatbuffers::Vector<uint8_t> *types)
+{
+ if (!values || !types)
+ return !values && !types;
+ if (values->size() != types->size())
+ return false;
+ for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i)
+ {
+ if (!VerifySparseIndexVector(verifier, values->Get(i), types->GetEnum<SparseIndexVector>(i)))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj,
+ BuiltinOptions type)
+{
+ switch (type)
+ {
+ case BuiltinOptions_NONE:
+ {
+ return true;
+ }
+ case BuiltinOptions_Conv2DOptions:
+ {
+ auto ptr = reinterpret_cast<const Conv2DOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_DepthwiseConv2DOptions:
+ {
+ auto ptr = reinterpret_cast<const DepthwiseConv2DOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ConcatEmbeddingsOptions:
+ {
+ auto ptr = reinterpret_cast<const ConcatEmbeddingsOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LSHProjectionOptions:
+ {
+ auto ptr = reinterpret_cast<const LSHProjectionOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_Pool2DOptions:
+ {
+ auto ptr = reinterpret_cast<const Pool2DOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SVDFOptions:
+ {
+ auto ptr = reinterpret_cast<const SVDFOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_RNNOptions:
+ {
+ auto ptr = reinterpret_cast<const RNNOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_FullyConnectedOptions:
+ {
+ auto ptr = reinterpret_cast<const FullyConnectedOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SoftmaxOptions:
+ {
+ auto ptr = reinterpret_cast<const SoftmaxOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ConcatenationOptions:
+ {
+ auto ptr = reinterpret_cast<const ConcatenationOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_AddOptions:
+ {
+ auto ptr = reinterpret_cast<const AddOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_L2NormOptions:
+ {
+ auto ptr = reinterpret_cast<const L2NormOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LocalResponseNormalizationOptions:
+ {
+ auto ptr = reinterpret_cast<const LocalResponseNormalizationOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LSTMOptions:
+ {
+ auto ptr = reinterpret_cast<const LSTMOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ResizeBilinearOptions:
+ {
+ auto ptr = reinterpret_cast<const ResizeBilinearOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_CallOptions:
+ {
+ auto ptr = reinterpret_cast<const CallOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ReshapeOptions:
+ {
+ auto ptr = reinterpret_cast<const ReshapeOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SkipGramOptions:
+ {
+ auto ptr = reinterpret_cast<const SkipGramOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SpaceToDepthOptions:
+ {
+ auto ptr = reinterpret_cast<const SpaceToDepthOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_EmbeddingLookupSparseOptions:
+ {
+ auto ptr = reinterpret_cast<const EmbeddingLookupSparseOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_MulOptions:
+ {
+ auto ptr = reinterpret_cast<const MulOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_PadOptions:
+ {
+ auto ptr = reinterpret_cast<const PadOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_GatherOptions:
+ {
+ auto ptr = reinterpret_cast<const GatherOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_BatchToSpaceNDOptions:
+ {
+ auto ptr = reinterpret_cast<const BatchToSpaceNDOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SpaceToBatchNDOptions:
+ {
+ auto ptr = reinterpret_cast<const SpaceToBatchNDOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_TransposeOptions:
+ {
+ auto ptr = reinterpret_cast<const TransposeOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ReducerOptions:
+ {
+ auto ptr = reinterpret_cast<const ReducerOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SubOptions:
+ {
+ auto ptr = reinterpret_cast<const SubOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_DivOptions:
+ {
+ auto ptr = reinterpret_cast<const DivOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SqueezeOptions:
+ {
+ auto ptr = reinterpret_cast<const SqueezeOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SequenceRNNOptions:
+ {
+ auto ptr = reinterpret_cast<const SequenceRNNOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_StridedSliceOptions:
+ {
+ auto ptr = reinterpret_cast<const StridedSliceOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ExpOptions:
+ {
+ auto ptr = reinterpret_cast<const ExpOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_TopKV2Options:
+ {
+ auto ptr = reinterpret_cast<const TopKV2Options *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SplitOptions:
+ {
+ auto ptr = reinterpret_cast<const SplitOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LogSoftmaxOptions:
+ {
+ auto ptr = reinterpret_cast<const LogSoftmaxOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_CastOptions:
+ {
+ auto ptr = reinterpret_cast<const CastOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_DequantizeOptions:
+ {
+ auto ptr = reinterpret_cast<const DequantizeOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_MaximumMinimumOptions:
+ {
+ auto ptr = reinterpret_cast<const MaximumMinimumOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ArgMaxOptions:
+ {
+ auto ptr = reinterpret_cast<const ArgMaxOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LessOptions:
+ {
+ auto ptr = reinterpret_cast<const LessOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_NegOptions:
+ {
+ auto ptr = reinterpret_cast<const NegOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_PadV2Options:
+ {
+ auto ptr = reinterpret_cast<const PadV2Options *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_GreaterOptions:
+ {
+ auto ptr = reinterpret_cast<const GreaterOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_GreaterEqualOptions:
+ {
+ auto ptr = reinterpret_cast<const GreaterEqualOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LessEqualOptions:
+ {
+ auto ptr = reinterpret_cast<const LessEqualOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SelectOptions:
+ {
+ auto ptr = reinterpret_cast<const SelectOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SliceOptions:
+ {
+ auto ptr = reinterpret_cast<const SliceOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_TransposeConvOptions:
+ {
+ auto ptr = reinterpret_cast<const TransposeConvOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SparseToDenseOptions:
+ {
+ auto ptr = reinterpret_cast<const SparseToDenseOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_TileOptions:
+ {
+ auto ptr = reinterpret_cast<const TileOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ExpandDimsOptions:
+ {
+ auto ptr = reinterpret_cast<const ExpandDimsOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_EqualOptions:
+ {
+ auto ptr = reinterpret_cast<const EqualOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_NotEqualOptions:
+ {
+ auto ptr = reinterpret_cast<const NotEqualOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ShapeOptions:
+ {
+ auto ptr = reinterpret_cast<const ShapeOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_PowOptions:
+ {
+ auto ptr = reinterpret_cast<const PowOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ArgMinOptions:
+ {
+ auto ptr = reinterpret_cast<const ArgMinOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_FakeQuantOptions:
+ {
+ auto ptr = reinterpret_cast<const FakeQuantOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_PackOptions:
+ {
+ auto ptr = reinterpret_cast<const PackOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LogicalOrOptions:
+ {
+ auto ptr = reinterpret_cast<const LogicalOrOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_OneHotOptions:
+ {
+ auto ptr = reinterpret_cast<const OneHotOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LogicalAndOptions:
+ {
+ auto ptr = reinterpret_cast<const LogicalAndOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LogicalNotOptions:
+ {
+ auto ptr = reinterpret_cast<const LogicalNotOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_UnpackOptions:
+ {
+ auto ptr = reinterpret_cast<const UnpackOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_FloorDivOptions:
+ {
+ auto ptr = reinterpret_cast<const FloorDivOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SquareOptions:
+ {
+ auto ptr = reinterpret_cast<const SquareOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ZerosLikeOptions:
+ {
+ auto ptr = reinterpret_cast<const ZerosLikeOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_FillOptions:
+ {
+ auto ptr = reinterpret_cast<const FillOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_BidirectionalSequenceLSTMOptions:
+ {
+ auto ptr = reinterpret_cast<const BidirectionalSequenceLSTMOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_BidirectionalSequenceRNNOptions:
+ {
+ auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_UnidirectionalSequenceLSTMOptions:
+ {
+ auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_FloorModOptions:
+ {
+ auto ptr = reinterpret_cast<const FloorModOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_RangeOptions:
+ {
+ auto ptr = reinterpret_cast<const RangeOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ResizeNearestNeighborOptions:
+ {
+ auto ptr = reinterpret_cast<const ResizeNearestNeighborOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LeakyReluOptions:
+ {
+ auto ptr = reinterpret_cast<const LeakyReluOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SquaredDifferenceOptions:
+ {
+ auto ptr = reinterpret_cast<const SquaredDifferenceOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_MirrorPadOptions:
+ {
+ auto ptr = reinterpret_cast<const MirrorPadOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_AbsOptions:
+ {
+ auto ptr = reinterpret_cast<const AbsOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SplitVOptions:
+ {
+ auto ptr = reinterpret_cast<const SplitVOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_UniqueOptions:
+ {
+ auto ptr = reinterpret_cast<const UniqueOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ReverseV2Options:
+ {
+ auto ptr = reinterpret_cast<const ReverseV2Options *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_AddNOptions:
+ {
+ auto ptr = reinterpret_cast<const AddNOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_GatherNdOptions:
+ {
+ auto ptr = reinterpret_cast<const GatherNdOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_CosOptions:
+ {
+ auto ptr = reinterpret_cast<const CosOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_WhereOptions:
+ {
+ auto ptr = reinterpret_cast<const WhereOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_RankOptions:
+ {
+ auto ptr = reinterpret_cast<const RankOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ReverseSequenceOptions:
+ {
+ auto ptr = reinterpret_cast<const ReverseSequenceOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_MatrixDiagOptions:
+ {
+ auto ptr = reinterpret_cast<const MatrixDiagOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_QuantizeOptions:
+ {
+ auto ptr = reinterpret_cast<const QuantizeOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_MatrixSetDiagOptions:
+ {
+ auto ptr = reinterpret_cast<const MatrixSetDiagOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_HardSwishOptions:
+ {
+ auto ptr = reinterpret_cast<const HardSwishOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_IfOptions:
+ {
+ auto ptr = reinterpret_cast<const IfOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_WhileOptions:
+ {
+ auto ptr = reinterpret_cast<const WhileOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_DepthToSpaceOptions:
+ {
+ auto ptr = reinterpret_cast<const DepthToSpaceOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_NonMaxSuppressionV4Options:
+ {
+ auto ptr = reinterpret_cast<const NonMaxSuppressionV4Options *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_NonMaxSuppressionV5Options:
+ {
+ auto ptr = reinterpret_cast<const NonMaxSuppressionV5Options *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ScatterNdOptions:
+ {
+ auto ptr = reinterpret_cast<const ScatterNdOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SelectV2Options:
+ {
+ auto ptr = reinterpret_cast<const SelectV2Options *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_DensifyOptions:
+ {
+ auto ptr = reinterpret_cast<const DensifyOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SegmentSumOptions:
+ {
+ auto ptr = reinterpret_cast<const SegmentSumOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_BatchMatMulOptions:
+ {
+ auto ptr = reinterpret_cast<const BatchMatMulOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_InstanceNormOptions:
+ {
+ auto ptr = reinterpret_cast<const InstanceNormOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ default:
+ return false;
+ }
+}
+
+inline bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier,
+ const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+ const flatbuffers::Vector<uint8_t> *types)
+{
+ if (!values || !types)
+ return !values && !types;
+ if (values->size() != types->size())
+ return false;
+ for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i)
+ {
+ if (!VerifyBuiltinOptions(verifier, values->Get(i), types->GetEnum<BuiltinOptions>(i)))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+inline const circle::Model *GetModel(const void *buf)
+{
+ return flatbuffers::GetRoot<circle::Model>(buf);
+}
+
+inline const circle::Model *GetSizePrefixedModel(const void *buf)
+{
+ return flatbuffers::GetSizePrefixedRoot<circle::Model>(buf);
+}
+
+inline const char *ModelIdentifier() { return "CIR0"; }
+
+inline bool ModelBufferHasIdentifier(const void *buf)
+{
+ return flatbuffers::BufferHasIdentifier(buf, ModelIdentifier());
+}
+
+inline bool VerifyModelBuffer(flatbuffers::Verifier &verifier)
+{
+ return verifier.VerifyBuffer<circle::Model>(ModelIdentifier());
+}
+
+inline bool VerifySizePrefixedModelBuffer(flatbuffers::Verifier &verifier)
+{
+ return verifier.VerifySizePrefixedBuffer<circle::Model>(ModelIdentifier());
+}
+
+inline const char *ModelExtension() { return "circle"; }
+
+inline void FinishModelBuffer(flatbuffers::FlatBufferBuilder &fbb,
+ flatbuffers::Offset<circle::Model> root)
+{
+ fbb.Finish(root, ModelIdentifier());
+}
+
+inline void FinishSizePrefixedModelBuffer(flatbuffers::FlatBufferBuilder &fbb,
+ flatbuffers::Offset<circle::Model> root)
+{
+ fbb.FinishSizePrefixed(root, ModelIdentifier());
+}
+
+} // namespace circle
+
+#endif // FLATBUFFERS_GENERATED_CIRCLESCHEMA_CIRCLE_H_
diff --git a/runtime/neurun/frontend/nnapi/ANeuralNetworksModel.test.cc b/runtime/onert/frontend/nnapi/ANeuralNetworksModel.test.cc
index 15a279a7e..15a279a7e 100644
--- a/runtime/neurun/frontend/nnapi/ANeuralNetworksModel.test.cc
+++ b/runtime/onert/frontend/nnapi/ANeuralNetworksModel.test.cc
diff --git a/runtime/onert/frontend/nnapi/CMakeLists.txt b/runtime/onert/frontend/nnapi/CMakeLists.txt
new file mode 100644
index 000000000..b66b32e89
--- /dev/null
+++ b/runtime/onert/frontend/nnapi/CMakeLists.txt
@@ -0,0 +1,27 @@
+file(GLOB_RECURSE SOURCES_FRONTEND "*.cc")
+file(GLOB_RECURSE TESTS_FRONTEND "*.test.cc")
+list(REMOVE_ITEM SOURCES_FRONTEND ${TESTS_FRONTEND})
+
+set(LIB_ONERT onert)
+
+add_library(${LIB_ONERT} SHARED ${SOURCES_FRONTEND})
+target_link_libraries(${LIB_ONERT} PUBLIC nnfw-nnapi-header)
+target_link_libraries(${LIB_ONERT} PUBLIC onert_core) # TODO Link PRIVATE onert_core
+target_link_libraries(${LIB_ONERT} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT} PRIVATE nnfw_coverage)
+
+set_target_properties(${LIB_ONERT} PROPERTIES OUTPUT_NAME neuralnetworks)
+
+install(TARGETS ${LIB_ONERT} DESTINATION lib)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+add_executable(test_onert_frontend_nnapi ${TESTS_FRONTEND})
+
+target_link_libraries(test_onert_frontend_nnapi PRIVATE ${LIB_ONERT} dl)
+target_link_libraries(test_onert_frontend_nnapi PRIVATE gtest)
+target_link_libraries(test_onert_frontend_nnapi PRIVATE gtest_main)
+
+install(TARGETS test_onert_frontend_nnapi DESTINATION unittest)
diff --git a/runtime/onert/frontend/nnapi/compilation.cc b/runtime/onert/frontend/nnapi/compilation.cc
new file mode 100644
index 000000000..0823cb456
--- /dev/null
+++ b/runtime/onert/frontend/nnapi/compilation.cc
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <NeuralNetworks.h>
+
+#include <new>
+
+#include "wrapper/ANeuralNetworksModel.h"
+#include "wrapper/ANeuralNetworksCompilation.h"
+#include "util/logging.h"
+
+//
+// NNAPI Implementation
+//
+int ANeuralNetworksCompilation_create(ANeuralNetworksModel *model,
+ ANeuralNetworksCompilation **compilation)
+{
+ if ((model == nullptr) || (compilation == nullptr))
+ {
+ VERBOSE(NNAPI::Compilation) << "create: Incorrect null pointer parameter(s)" << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if (!model->isFinished())
+ {
+ VERBOSE(NNAPI::Compilation) << "create: Model define is not finished" << std::endl;
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ std::shared_ptr<onert::ir::Graph> internal;
+
+ model->release(internal);
+
+ *compilation = new (std::nothrow) ANeuralNetworksCompilation(internal);
+ if (*compilation == nullptr)
+ {
+ VERBOSE(NNAPI::Compilation) << "create: ail to create compilation object" << std::endl;
+ return ANEURALNETWORKS_OUT_OF_MEMORY;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation *compilation)
+{
+ if (compilation == nullptr)
+ {
+ VERBOSE(NNAPI::Compilation) << "finish: Incorrect null pointer parameter" << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if (compilation->state() != ::onert::compiler::State::CREATED)
+ {
+ VERBOSE(NNAPI::Compilation) << "finish: Already finished" << std::endl;
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ if (!compilation->finish())
+ {
+ VERBOSE(NNAPI::Compilation) << "finish: Fail to compile" << std::endl;
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+void ANeuralNetworksCompilation_free(ANeuralNetworksCompilation *compilation)
+{
+ delete compilation;
+}
+
+int ANeuralNetworksCompilation_setPreference(ANeuralNetworksCompilation *compilation,
+ int32_t preference)
+{
+ if (compilation == nullptr)
+ {
+ VERBOSE(NNAPI::Compilation) << "setPreference: Incorrect null pointer parameter" << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if (compilation->state() != ::onert::compiler::State::CREATED)
+ {
+ VERBOSE(NNAPI::Compilation) << "setPreference: Already finished" << std::endl;
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ const PreferenceCode FIRST_PREFERENCE_CODE = ANEURALNETWORKS_PREFER_LOW_POWER;
+ const PreferenceCode LAST_PREFERENCE_CODE = ANEURALNETWORKS_PREFER_SUSTAINED_SPEED;
+ if ((preference < FIRST_PREFERENCE_CODE) || (preference > LAST_PREFERENCE_CODE))
+ {
+ VERBOSE(NNAPI::Compilation) << "setPreference: Incorrect preference code" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ // NYI: nothing to set
+ return ANEURALNETWORKS_NO_ERROR;
+}
diff --git a/runtime/neurun/frontend/nnapi/event.cc b/runtime/onert/frontend/nnapi/event.cc
index 593b74e90..593b74e90 100644
--- a/runtime/neurun/frontend/nnapi/event.cc
+++ b/runtime/onert/frontend/nnapi/event.cc
diff --git a/runtime/onert/frontend/nnapi/execution.cc b/runtime/onert/frontend/nnapi/execution.cc
new file mode 100644
index 000000000..6aaca1b4c
--- /dev/null
+++ b/runtime/onert/frontend/nnapi/execution.cc
@@ -0,0 +1,480 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <NeuralNetworks.h>
+
+#include <new>
+
+#include "wrapper/ANeuralNetworksCompilation.h"
+#include "wrapper/ANeuralNetworksExecution.h"
+#include "wrapper/ANeuralNetworksMemory.h"
+#include "wrapper/ANeuralNetworksEvent.h"
+#include "wrapper/NNAPIConvert.h"
+#include "util/logging.h"
+
+//
+// NNAPI Implementation
+//
+int ANeuralNetworksExecution_create(ANeuralNetworksCompilation *compilation,
+ ANeuralNetworksExecution **execution)
+{
+ if ((compilation == nullptr) || (execution == nullptr))
+ {
+ VERBOSE(NNAPI::Execution) << "create: Incorrect null pointer parameter(s)" << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ std::shared_ptr<onert::exec::IExecutor> executor;
+
+ compilation->publish(executor);
+
+ if (executor == nullptr)
+ {
+ VERBOSE(NNAPI::Execution) << "create: Never compiled yet" << std::endl;
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ *execution = new (std::nothrow) ANeuralNetworksExecution{executor};
+ if (*execution == nullptr)
+ {
+ VERBOSE(NNAPI::Execution) << "create: Fail to create execution object" << std::endl;
+ return ANEURALNETWORKS_OUT_OF_MEMORY;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+// NOTE Handle optional input
+// Unspecified shape on model build
+// Optional and omitted input on execution: skip input setting (workaround for LSTM)
+// Optional but not omitted input on execution: cannot handle
+// Normal input on execution: cannot handle
+// Fully specified shape on model build
+// Optional input on execution: cannot handle
+// Normal input: handle normally
+int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution *execution, int32_t index,
+ const ANeuralNetworksOperandType *type, const void *buffer,
+ size_t length)
+{
+ // Don't check type
+ // Comment about ANeuralNetworksOperandType in NeuralNetworks.h:
+ // If the input or output is optional and omitted then it need not have a fully specified tensor
+ // operand type
+ if ((execution == nullptr) || ((buffer == nullptr) && (length != 0)))
+ {
+ VERBOSE(NNAPI::Execution) << "setInput: Incorrect null pointer parameter(s)" << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if ((buffer != nullptr) && (length == 0))
+ {
+ VERBOSE(NNAPI::Execution) << "setInput: Zero length input" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ const auto operand_index = execution->getInputOperandIndex(index);
+ if (!operand_index.valid())
+ {
+ VERBOSE(NNAPI::Execution) << "setInput: Invalid input index" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ // Omitted optional input
+ // LSTM operation's some inputs can be optional input
+ if ((buffer == nullptr) && (length == 0))
+ {
+ if (execution->haveUnspecifiedDims(operand_index))
+ {
+ return ANEURALNETWORKS_NO_ERROR;
+ }
+ else
+ {
+ VERBOSE(NNAPI::Execution) << "setInput: Cannot handle fully-specified shape on model build "
+ "but omitted input on execution"
+ << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+
+ if (type != nullptr)
+ {
+ if (!execution->compareDataType(type, operand_index))
+ {
+ VERBOSE(NNAPI::Execution) << "setInput: Data type mismatch" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (!execution->compareShape(type, operand_index))
+ {
+ VERBOSE(NNAPI::Execution) << "setInput: Shape mismatch" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (NNAPIConvert::calculateSizeFromType(type) != length)
+ {
+ VERBOSE(NNAPI::Execution) << "setInput: Invalid length" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+ else
+ {
+ if (execution->haveUnspecifiedDims(operand_index))
+ {
+ VERBOSE(NNAPI::Execution) << "setInput: Unspecified dimension value" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (execution->getOperandSize(operand_index) != length)
+ {
+ VERBOSE(NNAPI::Execution) << "setInput: Invalid length" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+
+ if (!execution->setInput(index, type, buffer, length))
+ {
+ VERBOSE(NNAPI::Execution) << "setInput: Fail to set input" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution *execution, int32_t index,
+ const ANeuralNetworksOperandType *type, void *buffer,
+ size_t length)
+{
+ // Don't check type
+ // Comment about ANeuralNetworksOperandType in NeuralNetworks.h:
+ // If the input or output is optional and omitted then it need not have a fully specified tensor
+ // operand type
+ if ((execution == nullptr) || ((buffer == nullptr) && (length != 0)))
+ {
+ VERBOSE(NNAPI::Execution) << "setOutput: Incorrect null pointer parameter(s)" << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if ((buffer != nullptr) && (length == 0))
+ {
+ VERBOSE(NNAPI::Execution) << "setOutput: Zero length output" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ // Handle optional output
+ if (buffer == nullptr)
+ {
+ return ANEURALNETWORKS_NO_ERROR;
+ }
+
+ const auto operand_index = execution->getOutputOperandIndex(index);
+ if (!operand_index.valid())
+ {
+ VERBOSE(NNAPI::Execution) << "setOutput: Invalid output index" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (type != nullptr)
+ {
+ if (!execution->compareDataType(type, operand_index))
+ {
+ VERBOSE(NNAPI::Execution) << "setOutput: Data type mismatch" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (!execution->compareShape(type, operand_index))
+ {
+ VERBOSE(NNAPI::Execution) << "setOutput: Shape mismatch" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (NNAPIConvert::calculateSizeFromType(type) != length)
+ {
+ VERBOSE(NNAPI::Execution) << "setOutput: Invalid length" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+ else
+ {
+ if (execution->haveUnspecifiedDims(operand_index))
+ {
+ VERBOSE(NNAPI::Execution) << "setOutput: Unspecified dimension value" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (execution->getOperandSize(operand_index) != length)
+ {
+ VERBOSE(NNAPI::Execution) << "setInput: Invalid length" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+
+ if (!execution->setOutput(index, type, buffer, length))
+ {
+ VERBOSE(NNAPI::Execution) << "setOutput: Fail to set output" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution *execution,
+ ANeuralNetworksEvent **event)
+{
+ if ((execution == nullptr) || (event == nullptr))
+ {
+ VERBOSE(NNAPI::Execution) << "startCompute: Incorrect null pointer parameter(s)" << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ // TODO: Handle event
+ auto instance = execution->instance();
+ *event = new (std::nothrow) ANeuralNetworksEvent{instance};
+ if (*event == nullptr)
+ {
+ VERBOSE(NNAPI::Execution) << "startCompute: Fail to create event" << std::endl;
+ return ANEURALNETWORKS_OUT_OF_MEMORY;
+ }
+
+ if (!execution->startExecute())
+ {
+ VERBOSE(NNAPI::Execution) << "startCompute: Fail to start execution" << std::endl;
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksExecution_compute(ANeuralNetworksExecution *execution)
+{
+ if (execution == nullptr)
+ {
+ VERBOSE(NNAPI::Execution) << "Compute: Incorrect null pointer parameter" << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if (!execution->execute())
+ {
+ VERBOSE(NNAPI::Execution) << "Compute: Fail to execution" << std::endl;
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+void ANeuralNetworksExecution_free(ANeuralNetworksExecution *execution) { delete execution; }
+
+int ANeuralNetworksExecution_setInputFromMemory(ANeuralNetworksExecution *execution, int32_t index,
+ const ANeuralNetworksOperandType *type,
+ const ANeuralNetworksMemory *memory, size_t offset,
+ size_t length)
+{
+ if ((execution == nullptr) || (memory == nullptr))
+ {
+ VERBOSE(NNAPI::Execution) << "setInputFromMemory: Incorrect null pointer parameter(s)"
+ << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if (length == 0)
+ {
+ VERBOSE(NNAPI::Execution) << "setInputFromMemory: Zero length input" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ const auto operand_index = execution->getInputOperandIndex(index);
+ if (!operand_index.valid())
+ {
+ VERBOSE(NNAPI::Execution) << "setInputFromMemory: Invalid input index" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (type != nullptr)
+ {
+ if (!execution->compareDataType(type, operand_index))
+ {
+ VERBOSE(NNAPI::Execution) << "setInputFromMemory: Data type mismatch" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (!execution->compareShape(type, operand_index))
+ {
+ VERBOSE(NNAPI::Execution) << "setInputFromMemory: Shape mismatch" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (NNAPIConvert::calculateSizeFromType(type) != length)
+ {
+ VERBOSE(NNAPI::Execution) << "setInputFromMemory: Invalid length" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+ else
+ {
+ if (execution->haveUnspecifiedDims(operand_index))
+ {
+ VERBOSE(NNAPI::Execution) << "setInputFromMemory: Unspecified dimension value" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (execution->getOperandSize(operand_index) != length)
+ {
+ VERBOSE(NNAPI::Execution) << "setInputFromMemory: Invalid length" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+
+ if (!memory->vaildAccess(offset, length))
+ {
+ VERBOSE(NNAPI::Execution) << "setInputFromMemory: Invalid memory access" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (!execution->setInput(index, type, reinterpret_cast<const void *>(memory->base() + offset),
+ length))
+ {
+ VERBOSE(NNAPI::Execution) << "setInputFromMemory: Fail to set input" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksExecution_setOutputFromMemory(ANeuralNetworksExecution *execution, int32_t index,
+ const ANeuralNetworksOperandType *type,
+ const ANeuralNetworksMemory *memory, size_t offset,
+ size_t length)
+{
+ if ((execution == nullptr) || (memory == nullptr))
+ {
+ VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Incorrect null pointer parameter(s)"
+ << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if (length == 0)
+ {
+ VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Zero length input" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ const auto operand_index = execution->getOutputOperandIndex(index);
+ if (!operand_index.valid())
+ {
+ VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Invalid output index" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (type != nullptr)
+ {
+ if (!execution->compareDataType(type, operand_index))
+ {
+ VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Data type mismatch" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (!execution->compareShape(type, operand_index))
+ {
+ VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Shape mismatch" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (NNAPIConvert::calculateSizeFromType(type) != length)
+ {
+ VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Invalid length" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+ else
+ {
+ if (execution->haveUnspecifiedDims(operand_index))
+ {
+ VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Unspecified dimension value" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (execution->getOperandSize(operand_index) != length)
+ {
+ VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Invalid length" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+
+ if (!memory->vaildAccess(offset, length))
+ {
+ VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Invalid memory access" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (!execution->setOutput(index, type, reinterpret_cast<void *>(memory->base() + offset), length))
+ {
+ VERBOSE(NNAPI::Execution) << "setOutputFromMemory: Fail to set input" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksExecution_getOutputOperandRank(ANeuralNetworksExecution *execution,
+ int32_t index, uint32_t *rank)
+{
+ if ((execution == nullptr) || (rank == nullptr))
+ {
+ VERBOSE(NNAPI::Execution) << "getOutputOperandRank: Incorrect null pointer parameter(s)"
+ << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ const auto operand_index = execution->getOutputOperandIndex(index);
+ if (!operand_index.valid())
+ {
+ VERBOSE(NNAPI::Execution) << "getOutputOperandRank: Invalid output index" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (!execution->getOutputOperandRank(index, rank))
+ {
+ VERBOSE(NNAPI::Execution) << "getOutputOperandRank: Fail to get rank" << std::endl;
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksExecution_getOutputOperandDimensions(ANeuralNetworksExecution *execution,
+ int32_t index, uint32_t *dimensions)
+{
+ if ((execution == nullptr) || (dimensions == nullptr))
+ {
+ VERBOSE(NNAPI::Execution) << "getOutputOperandDimensions: Incorrect null pointer parameter(s)"
+ << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ const auto operand_index = execution->getOutputOperandIndex(index);
+ if (!operand_index.valid())
+ {
+ VERBOSE(NNAPI::Execution) << "getOutputOperandDimensions: Invalid output index" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (!execution->getOutputOperandDimensions(index, dimensions))
+ {
+ VERBOSE(NNAPI::Execution) << "getOutputOperandDimensions: Fail to get rank" << std::endl;
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
diff --git a/runtime/onert/frontend/nnapi/memory.cc b/runtime/onert/frontend/nnapi/memory.cc
new file mode 100644
index 000000000..6e568a926
--- /dev/null
+++ b/runtime/onert/frontend/nnapi/memory.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <NeuralNetworks.h>
+#include <sys/mman.h>
+#include <new>
+#include <memory>
+
+#include <memory>
+#include "wrapper/ANeuralNetworksMemory.h"
+
+int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t offset,
+ ANeuralNetworksMemory **memory)
+{
+ if (memory == nullptr)
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ *memory = new (std::nothrow) ANeuralNetworksMemory{size, protect, fd, offset};
+ if (*memory == nullptr)
+ {
+ return ANEURALNETWORKS_OUT_OF_MEMORY;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+void ANeuralNetworksMemory_free(ANeuralNetworksMemory *memory) { delete memory; }
diff --git a/runtime/onert/frontend/nnapi/model.cc b/runtime/onert/frontend/nnapi/model.cc
new file mode 100644
index 000000000..e201a6753
--- /dev/null
+++ b/runtime/onert/frontend/nnapi/model.cc
@@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <NeuralNetworks.h>
+#include <NeuralNetworksEx.h>
+
+#include <new>
+
+#include "wrapper/ANeuralNetworksModel.h"
+#include "wrapper/ANeuralNetworksMemory.h"
+#include "util/logging.h"
+
+int ANeuralNetworksModel_create(ANeuralNetworksModel **model)
+{
+ if (model == nullptr)
+ {
+ VERBOSE(NNAPI::Model) << "create: Incorrect null pointer parameter" << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ *model = new (std::nothrow) ANeuralNetworksModel{};
+ if (*model == nullptr)
+ {
+ VERBOSE(NNAPI::Model) << "create: Fail to create model object" << std::endl;
+ return ANEURALNETWORKS_OUT_OF_MEMORY;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+void ANeuralNetworksModel_free(ANeuralNetworksModel *model) { delete model; }
+
+int ANeuralNetworksModel_addOperand(ANeuralNetworksModel *model,
+ const ANeuralNetworksOperandType *type)
+{
+ if ((model == nullptr) || (type == nullptr))
+ {
+ VERBOSE(NNAPI::Model) << "addOperand: Incorrect null pointer parameter(s)" << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if (model->isFinished())
+ {
+ VERBOSE(NNAPI::Model) << "addOperand: Already finished" << std::endl;
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ // scale and zeroPoint should be zero for scalars and non-fixed point tensors
+ // Quantized:
+ // scale: a 32 bit floating point value greater than zero
+ // zeroPoint: a 32 bit integer, in range [0, 255]
+ if (type->type == ANEURALNETWORKS_TENSOR_QUANT8_ASYMM)
+ {
+ if (!(type->scale > 0.0f))
+ {
+ VERBOSE(NNAPI::Model) << "addOperand: Incorrect scale value for quantization" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if ((type->zeroPoint < 0) || (type->zeroPoint > 255))
+ {
+ VERBOSE(NNAPI::Model) << "addOperand: Incorrect zeroPoint value for quantization"
+ << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+ // NOTE Validation of scale and zeroPoint would be skipped for a while.
+ // We do not know whether scalar type can have scale and zeroPoint.
+ // To pass ValidationTest and GeneratedTest, this validation code
+ // would not be implemented until we can define this issue clearly.
+ //
+ // scale and zeroPoint should be zero for scalars and non-fixed point tensors
+ // else if ((type->scale != 0.0f) || (type->zeroPoint != 0))
+ // {
+ // return ANEURALNETWORKS_BAD_DATA;
+ // }
+
+ // dimensionCount should be zero for scalars
+ if ((type->dimensionCount != 0) &&
+ ((type->type == ANEURALNETWORKS_FLOAT32) || (type->type == ANEURALNETWORKS_INT32) ||
+ (type->type == ANEURALNETWORKS_UINT32)))
+ {
+ VERBOSE(NNAPI::Model) << "addOperand: Incorrect data type" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (!model->addOperand(type))
+ {
+ VERBOSE(NNAPI::Model) << "addOperand: Fail to add operand" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel *model, int32_t index,
+ const void *buffer, size_t length)
+{
+ const bool optional_operand = ((buffer == nullptr) && (length == 0));
+
+ if ((model == nullptr) || ((buffer == nullptr) && (length != 0)))
+ {
+ VERBOSE(NNAPI::Model) << "setOperandValue: Incorrect null pointer parameter(s)" << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if (model->isFinished())
+ {
+ VERBOSE(NNAPI::Model) << "setOperandValue: Already finished" << std::endl;
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ // Negative index value is not allowed
+ if (index < 0)
+ {
+ VERBOSE(NNAPI::Model) << "setOperandValue: Invalid index value (negative)" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ // NOTE OperandIndex uses uint32_t as its underlying type as various NNAPI
+ // functions such as ANeuralNetworksModel_addOperation use uint32_t to represent operand
+ // index
+ // ANeuralNetworksModel_setOperandValue, however, uses int32_t to represent operand index.
+ //
+ // Below, static_cast<uint32_t>(...) is introduced to eliminate compiler warning.
+ uint32_t ind = static_cast<uint32_t>(index);
+
+ if (!model->isExistOperand(ind))
+ {
+ VERBOSE(NNAPI::Model) << "setOperandValue: Invalid index value (not exist)" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (!optional_operand && (model->operandSize(ind) != length))
+ {
+ VERBOSE(NNAPI::Model) << "setOperandValue: Invalid data length" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (model->isUsageSet(ind))
+ {
+ VERBOSE(NNAPI::Model) << "setOperandValue: Already set operand" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ // NNAPI spec in NeuralNetworks.h
+ // For values of length greater than ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES,
+ // the application is responsible for not changing the content of this region
+ // until all executions using this model have completed
+ bool copy_value = false;
+ if (length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES)
+ {
+ copy_value = true;
+ }
+
+ if (!model->setOperandValue(ind, buffer, length, optional_operand, copy_value))
+ {
+ VERBOSE(NNAPI::Model) << "setOperandValue: Fail to set operand value" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksModel_setOperandValueFromMemory(ANeuralNetworksModel *model, int32_t index,
+ const ANeuralNetworksMemory *memory,
+ size_t offset, size_t length)
+{
+ if ((model == nullptr) || (memory == nullptr))
+ {
+ VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Incorrect null pointer parameter(s)"
+ << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if (model->isFinished())
+ {
+ VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Already finished" << std::endl;
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ // Negative index value is not allowed
+ if (index < 0)
+ {
+ VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Invalid index value (negative)"
+ << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ // NOTE OperandIndex uses uint32_t as its underlying type as various NNAPI
+ // functions such as ANeuralNetworksModel_addOperation use uint32_t to represent operand
+ // index
+ // ANeuralNetworksModel_setOperandValue, however, uses int32_t to represent operand index.
+ //
+ // Below, static_cast<uint32_t>(...) is introduced to eliminate compiler warning.
+ uint32_t ind = static_cast<uint32_t>(index);
+
+ if (!model->isExistOperand(ind))
+ {
+ VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Invalid index value (not exist)"
+ << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if ((model->operandSize(ind) != length) || (memory->size() < (offset + length)))
+ {
+ VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Invalid data length" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (model->isUsageSet(ind))
+ {
+ VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Already set operand" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (!model->setOperandValue(ind, memory->base() + offset, length))
+ {
+ VERBOSE(NNAPI::Model) << "setOperandValueFromMemory: Fail to set operand value" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
+ ANeuralNetworksOperationType type, uint32_t inputCount,
+ const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ if ((model == nullptr) || (inputs == nullptr) || (outputs == nullptr))
+ {
+ VERBOSE(NNAPI::Model) << "addOperation: Incorrect null pointer parameter(s)" << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if (model->isFinished())
+ {
+ VERBOSE(NNAPI::Model) << "addOperation: Already finished" << std::endl;
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ const ANeuralNetworksOperationType FIRST_OPERATION = ANEURALNETWORKS_ADD;
+ const ANeuralNetworksOperationType LAST_OPERATION = ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR;
+ if ((type < FIRST_OPERATION) || (type > LAST_OPERATION))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ for (uint32_t i = 0; i < outputCount; i++)
+ {
+ if (model->isUsageSet(outputs[i]))
+ {
+ VERBOSE(NNAPI::Model) << "addOperation: Already set output operand" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+
+ if (!model->addOperation(type, inputCount, inputs, outputCount, outputs))
+ {
+ VERBOSE(NNAPI::Model) << "addOperation: Fail to add operation" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
+ ANeuralNetworksOperationTypeEx type, uint32_t inputCount,
+ const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ if ((model == nullptr) || (inputs == nullptr) || (outputs == nullptr))
+ {
+ VERBOSE(NNAPI::Model) << "addOperation: Incorrect null pointer parameter(s)" << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if (model->isFinished())
+ {
+ VERBOSE(NNAPI::Model) << "addOperation: Already finished" << std::endl;
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ const ANeuralNetworksOperationTypeEx FIRST_OPERATION = ANEURALNETWORKS_CAST_EX;
+ const ANeuralNetworksOperationTypeEx LAST_OPERATION = ANEURALNETWORKS_SHAPE_EX;
+ if ((type < FIRST_OPERATION) || (type > LAST_OPERATION))
+ {
+ VERBOSE(NNAPI::Model) << "addOperation: Invalid operation type" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ for (uint32_t i = 0; i < outputCount; i++)
+ {
+ if (model->isUsageSet(outputs[i]))
+ {
+ VERBOSE(NNAPI::Model) << "addOperation: Already set output operand" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+
+ if (!model->addOperationEx(type, inputCount, inputs, outputCount, outputs))
+ {
+ VERBOSE(NNAPI::Model) << "addOperation: Fail to add operation" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksModel_identifyInputsAndOutputs(ANeuralNetworksModel *model, uint32_t inputCount,
+ const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ if ((model == nullptr) || (inputs == nullptr) || (outputs == nullptr))
+ {
+ VERBOSE(NNAPI::Model) << "identifyInputsAndOutputs: Incorrect null pointer parameter(s)"
+ << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if (model->isFinished())
+ {
+ VERBOSE(NNAPI::Model) << "identifyInputsAndOutputs: Already finished" << std::endl;
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ for (uint32_t n = 0; n < inputCount; ++n)
+ {
+ uint32_t ind = inputs[n];
+ if (model->isUsageSet(ind))
+ {
+ VERBOSE(NNAPI::Model) << "identifyInputsAndOutputs: Already set input operand" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (!model->addModelInput(ind))
+ {
+ VERBOSE(NNAPI::Model) << "identifyInputsAndOutputs: Fail to add input" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+
+ for (uint32_t n = 0; n < outputCount; ++n)
+ {
+ uint32_t ind = outputs[n];
+
+ if (!model->isOperationOutput(ind))
+ {
+ VERBOSE(NNAPI::Model) << "identifyInputsAndOutputs: Need to set output operand" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (!model->addModelOutput(ind))
+ {
+ VERBOSE(NNAPI::Model) << "identifyInputsAndOutputs: Fail to add output" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksModel_finish(ANeuralNetworksModel *model)
+{
+ if (model == nullptr)
+ {
+ VERBOSE(NNAPI::Model) << "finish: Incorrect null pointer parameter" << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if (model->isFinished())
+ {
+ VERBOSE(NNAPI::Model) << "finish: Already finished" << std::endl;
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ if (!model->finish())
+ {
+ VERBOSE(NNAPI::Model) << "finish: Fail to generate internal graph" << std::endl;
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksModel_relaxComputationFloat32toFloat16(ANeuralNetworksModel *model, bool)
+{
+ if (model == nullptr)
+ {
+ VERBOSE(NNAPI::Model) << "relaxComputationFloat32toFloat16: Incorrect null pointer parameter"
+ << std::endl;
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ // NYI: nothing to set
+ VERBOSE(NNAPI::Model) << "relaxComputationFloat32toFloat16: Do nothing yet" << std::endl;
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
new file mode 100644
index 000000000..03518a88a
--- /dev/null
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ANeuralNetworksCompilation.h"
+
+#include "util/logging.h"
+
+ANeuralNetworksCompilation::ANeuralNetworksCompilation(
+ const std::shared_ptr<onert::ir::Graph> &model) noexcept
+ : _compiler{new onert::compiler::Compiler{model}}
+{
+ // DO NOTHING
+}
+
+bool ANeuralNetworksCompilation::finish() noexcept
+{
+ try
+ {
+ _compiler->compile();
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(EXCEPTION) << e.what() << std::endl;
+
+ return false;
+ }
+
+ return true;
+}
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
new file mode 100644
index 000000000..8d72441b2
--- /dev/null
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __COMPILATION_H__
+#define __COMPILATION_H__
+
+#include "compiler/Compiler.h"
+#include "ir/Graph.h"
+#include "exec/IExecutor.h"
+
+struct ANeuralNetworksCompilation
+{
+public:
+ ANeuralNetworksCompilation(const std::shared_ptr<onert::ir::Graph> &graph) noexcept;
+
+public:
+ bool finish() noexcept;
+
+ onert::compiler::State state(void) noexcept { return _compiler->state(); }
+ void publish(std::shared_ptr<onert::exec::IExecutor> &executor) noexcept
+ {
+ _compiler->release(executor);
+ }
+
+private:
+ std::shared_ptr<onert::compiler::Compiler> _compiler;
+};
+
+#endif
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc
new file mode 100644
index 000000000..2bea729be
--- /dev/null
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ANeuralNetworksEvent.h"
+
+#include "exec/Execution.h"
+#include "util/logging.h"
+
+ANeuralNetworksEvent::ANeuralNetworksEvent(const std::shared_ptr<onert::exec::Execution> &execution)
+ : _execution{execution}
+{
+ // DO NOTHING
+}
+
+bool ANeuralNetworksEvent::waitFinish(void) noexcept
+{
+ try
+ {
+ _execution->waitFinish();
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(EXCEPTION) << e.what() << std::endl;
+
+ return false;
+ }
+
+ return true;
+}
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.h
new file mode 100644
index 000000000..7b462d3d6
--- /dev/null
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __EVENT_H__
+#define __EVENT_H__
+
+#include <NeuralNetworks.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace exec
+{
+class Execution;
+} // namespace exec
+} // namespace onert
+
+struct ANeuralNetworksEvent
+{
+public:
+ ANeuralNetworksEvent(const std::shared_ptr<onert::exec::Execution> &execution);
+
+public:
+ bool waitFinish(void) noexcept;
+
+private:
+ const std::shared_ptr<onert::exec::Execution> _execution;
+};
+
+#endif
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc
new file mode 100644
index 000000000..15eb088c6
--- /dev/null
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ANeuralNetworksExecution.h"
+#include "NNAPIConvert.h"
+#include "util/logging.h"
+
+const onert::ir::OperandIndex ANeuralNetworksExecution::getInputOperandIndex(int32_t index) noexcept
+{
+ if (index < 0)
+ {
+ // Negative index: return invalid index
+ return onert::ir::OperandIndex{};
+ }
+
+ uint32_t cast_index = static_cast<uint32_t>(index);
+ if (cast_index >= _execution->graph().getInputs().size())
+ {
+ // Return invalid index
+ return onert::ir::OperandIndex{};
+ }
+
+ onert::ir::IOIndex input_index{cast_index};
+ const auto operand_index = _execution->graph().getInputs().at(input_index);
+ return operand_index;
+}
+
+const onert::ir::OperandIndex
+ANeuralNetworksExecution::getOutputOperandIndex(int32_t index) noexcept
+{
+ if (index < 0)
+ {
+ // Negative index: return invalid index
+ return onert::ir::OperandIndex{};
+ }
+
+ uint32_t cast_index = static_cast<uint32_t>(index);
+ if (cast_index >= _execution->graph().getOutputs().size())
+ {
+ // Return invalid index
+ return onert::ir::OperandIndex{};
+ }
+
+ onert::ir::IOIndex output_index{cast_index};
+ const auto operand_index = _execution->graph().getOutputs().at(output_index);
+ return operand_index;
+}
+
+bool ANeuralNetworksExecution::compareDataType(const ANeuralNetworksOperandType *type,
+ const onert::ir::OperandIndex index) noexcept
+{
+ try
+ {
+ const auto operand_type = _execution->graph().operands().at(index).typeInfo();
+ const auto typeInfo = NNAPIConvert::getTypeInfo(type);
+
+ if (operand_type != typeInfo)
+ {
+ // Data type mismatch
+ return false;
+ }
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(EXCEPTION) << e.what() << std::endl;
+
+ return false;
+ }
+
+ return true;
+}
+
+bool ANeuralNetworksExecution::compareShape(const ANeuralNetworksOperandType *type,
+ const onert::ir::OperandIndex index) noexcept
+{
+ // Passed shape should be specified
+ if (haveUnspecifiedDims(index))
+ {
+ return false;
+ }
+
+ const auto &operand_shape = _execution->graph().operands().at(index).shape();
+ const auto &shape_from_type = NNAPIConvert::getShape(type);
+
+ return operand_shape == shape_from_type;
+}
+
+bool ANeuralNetworksExecution::haveUnspecifiedDims(const onert::ir::OperandIndex index) noexcept
+{
+ const auto operand_shape = _execution->graph().operands().at(index).shape();
+
+ return operand_shape.num_elements() == 0;
+}
+
+size_t ANeuralNetworksExecution::getOperandSize(const onert::ir::OperandIndex index) noexcept
+{
+ try
+ {
+ return _execution->graph().operands().at(index).operandSize();
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(EXCEPTION) << e.what() << std::endl;
+
+ return 0;
+ }
+}
+
+bool ANeuralNetworksExecution::setInput(uint32_t index, const ANeuralNetworksOperandType *type,
+ const void *buffer, size_t length) noexcept
+{
+ try
+ {
+ onert::ir::IOIndex input_index{index};
+ const auto operand_index = getInputOperandIndex(index);
+
+ const auto type_info = _execution->graph().operands().at(operand_index).typeInfo();
+ const auto shape = (type != nullptr) ? NNAPIConvert::getShape(type)
+ : _execution->graph().operands().at(operand_index).shape();
+
+ // NOTE The nnapi does not provide setting io_layout and not support changing layout. In other
+ // words, we can assume that io_layout from nnapi always is the same as layout of the used
+ // model.
+ // TODO Set layout of model
+ _execution->setInput(input_index, type_info, shape, buffer, length, onert::ir::Layout::NHWC);
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(EXCEPTION) << e.what() << std::endl;
+
+ return false;
+ }
+
+ return true;
+}
+
+bool ANeuralNetworksExecution::setOutput(uint32_t index, const ANeuralNetworksOperandType *type,
+ void *buffer, size_t length) noexcept
+{
+ try
+ {
+ onert::ir::IOIndex output_index{index};
+ const auto operand_index = getOutputOperandIndex(index);
+
+ const auto type_info = _execution->graph().operands().at(operand_index).typeInfo();
+ const auto shape = (type != nullptr) ? NNAPIConvert::getShape(type)
+ : _execution->graph().operands().at(operand_index).shape();
+
+ // NOTE The nnapi does not provide setting io_layout and not support changing layout. In other
+ // words, we can assume that io_layout from nnapi always is the same as layout of the used
+ // model.
+ // TODO Set layout of model
+ _execution->setOutput(output_index, type_info, shape, buffer, length, onert::ir::Layout::NHWC);
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(EXCEPTION) << e.what() << std::endl;
+
+ return false;
+ }
+
+ return true;
+}
+
+bool ANeuralNetworksExecution::startExecute(void) noexcept
+{
+ try
+ {
+ _execution->startExecute();
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(EXCEPTION) << e.what() << std::endl;
+
+ return false;
+ }
+
+ return true;
+}
+
+bool ANeuralNetworksExecution::execute(void) noexcept
+{
+ try
+ {
+ _execution->execute();
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(EXCEPTION) << e.what() << std::endl;
+
+ return false;
+ }
+
+ return true;
+}
+
+const std::shared_ptr<onert::exec::Execution> ANeuralNetworksExecution::instance(void) noexcept
+{
+ return _execution;
+}
+
+bool ANeuralNetworksExecution::getOutputOperandRank(uint32_t index, uint32_t *rank) noexcept
+{
+ try
+ {
+ onert::ir::IOIndex output_index{index};
+ const auto operand_index = getOutputOperandIndex(index);
+ bool unspecified = haveUnspecifiedDims(operand_index);
+
+ // TODO Get unspecified output operand's rank
+ if (unspecified)
+ {
+ throw std::runtime_error{"Unsupport feature"};
+ }
+
+ // Check execution is finished
+ // Output rank and shape may be decided after execution if output is unspecified operand
+ if (!_execution->isFinished())
+ {
+ return false;
+ }
+
+ *rank = _execution->graph().operands().at(operand_index).shape().rank();
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(EXCEPTION) << e.what() << std::endl;
+
+ return false;
+ }
+
+ return true;
+}
+
+bool ANeuralNetworksExecution::getOutputOperandDimensions(uint32_t index, uint32_t *dimensions)
+{
+ try
+ {
+ onert::ir::IOIndex output_index{index};
+ const auto operand_index = getOutputOperandIndex(index);
+ bool unspecified = haveUnspecifiedDims(operand_index);
+ if (unspecified)
+ {
+ throw std::runtime_error{"NYI: Models with unspecified output dimensions"};
+ }
+
+ // Check execution is finished
+ // Output rank and shape may be decided after execution if output is unspecified operand
+ if (!_execution->isFinished())
+ {
+ return false;
+ }
+
+ auto shape = _execution->graph().operands().at(operand_index).shape();
+ for (int i = 0; i < shape.rank(); i++)
+ {
+ auto dim = shape.dim(i);
+
+ if (dim <= 0)
+ {
+ throw std::runtime_error{"Invalid dimension value"};
+ }
+
+ dimensions[i] = static_cast<uint32_t>(dim);
+ }
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(EXCEPTION) << e.what() << std::endl;
+
+ return false;
+ }
+
+ return true;
+}
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
new file mode 100644
index 000000000..af2465a81
--- /dev/null
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __EXECUTION_H__
+#define __EXECUTION_H__
+
+#include <NeuralNetworks.h>
+
+#include <memory>
+
+#include "exec/Execution.h"
+
+struct ANeuralNetworksExecution
+{
+public:
+ ANeuralNetworksExecution(const std::shared_ptr<onert::exec::IExecutor> &executor)
+ : _execution{std::make_shared<onert::exec::Execution>(executor)}
+ {
+ // DO NOTHING
+ }
+
+public:
+ bool setInput(uint32_t index, const ANeuralNetworksOperandType *type, const void *buffer,
+ size_t length) noexcept;
+ bool setOutput(uint32_t index, const ANeuralNetworksOperandType *type, void *buffer,
+ size_t length) noexcept;
+ bool startExecute(void) noexcept;
+ bool execute(void) noexcept;
+
+ const onert::ir::OperandIndex getInputOperandIndex(int32_t index) noexcept;
+ const onert::ir::OperandIndex getOutputOperandIndex(int32_t index) noexcept;
+ bool compareDataType(const ANeuralNetworksOperandType *type,
+ const onert::ir::OperandIndex index) noexcept;
+ bool compareShape(const ANeuralNetworksOperandType *type,
+ const onert::ir::OperandIndex index) noexcept;
+ bool haveUnspecifiedDims(const onert::ir::OperandIndex index) noexcept;
+ size_t getOperandSize(const onert::ir::OperandIndex index) noexcept;
+ const std::shared_ptr<onert::exec::Execution> instance(void) noexcept;
+
+ /**
+ * @brief Get output operand's rank
+ * @param[in] index Output index
+ * @param[out] rank Output operand's rank
+ * @return @c true if success to get rank, otherwise @c false
+ */
+ bool getOutputOperandRank(uint32_t index, uint32_t *rank) noexcept;
+ /**
+ * @brief Get dimensions of the output operand
+ * @param[in] index Output index
+ * @param[out] dimensions Output operand's dimensions
+ * @return @c true if success to get rank, otherwise @c false
+ * @note This must be called after execution is finished to get resolved output shape
+ * unspecified in model
+ */
+ bool getOutputOperandDimensions(uint32_t index, uint32_t *dimensions);
+
+private:
+ std::shared_ptr<onert::exec::Execution> _execution;
+};
+
+#endif
diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksMemory.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksMemory.cc
index 9cc100585..9cc100585 100644
--- a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksMemory.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksMemory.cc
diff --git a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksMemory.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksMemory.h
index 48a1bc5fc..48a1bc5fc 100644
--- a/runtime/neurun/frontend/nnapi/wrapper/ANeuralNetworksMemory.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksMemory.h
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
new file mode 100644
index 000000000..d2d699ae1
--- /dev/null
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ANeuralNetworksModel.h"
+#include "OperationFactory.h"
+#include "NNAPIConvert.h"
+
+#include "ir/Operations.Include.h"
+#include "util/logging.h"
+
+#include <memory>
+
+//
+// ANeuralNetworksModel
+//
+ANeuralNetworksModel::ANeuralNetworksModel() noexcept : _optional_operands{}, _operand_usages{}
+{
+ _graph = std::make_shared<onert::ir::Graph>();
+}
+
+bool ANeuralNetworksModel::addOperand(const ANeuralNetworksOperandType *type) noexcept
+{
+ try
+ {
+ const auto shape = NNAPIConvert::getShape(type);
+ const auto typeInfo = NNAPIConvert::getTypeInfo(type);
+ _graph->addOperand(shape, typeInfo);
+ _operand_usages.emplace_back(OperandUsage::NOT_DEFINED);
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(EXCEPTION) << e.what() << std::endl;
+
+ return false;
+ }
+
+ return true;
+}
+
+bool ANeuralNetworksModel::setOperandValue(uint32_t index, const void *buffer, size_t length,
+ bool optional, bool copy) noexcept
+{
+ const onert::ir::OperandIndex ind{index};
+
+ try
+ {
+ _operand_usages[index] = OperandUsage::CONSTANT;
+
+ // Remain operands.at(ind).data()->base() as nullptr for optional operand
+ // This will be filled when model finished
+ if (optional)
+ {
+ setOptionalOperand(ind);
+ }
+
+ using onert::ir::CachedData;
+ using onert::ir::ExternalData;
+ if (copy)
+ {
+ _graph->operands().at(ind).data(
+ std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(buffer), length));
+ }
+ else
+ {
+ _graph->operands().at(ind).data(
+ std::make_unique<ExternalData>(reinterpret_cast<const uint8_t *>(buffer), length));
+ }
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(EXCEPTION) << e.what() << std::endl;
+
+ return false;
+ }
+
+ return true;
+}
+
+bool ANeuralNetworksModel::addOperation(ANeuralNetworksOperationType type, uint32_t inputCount,
+ const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs) noexcept
+{
+ try
+ {
+ for (uint32_t i = 0; i < outputCount; i++)
+ {
+ _operand_usages[outputs[i]] = OperandUsage::OPERATION_OUTPUT;
+ }
+
+ auto &factory = OperationFactory::get();
+ OperationFactory::Param param{inputCount, inputs, outputCount, outputs};
+
+ auto node = factory.create(type, param, _graph->operands());
+ _graph->addOperation(std::unique_ptr<onert::ir::Operation>{node});
+
+ // TODO Move these codes to delegate.cpp
+ if (type == ANEURALNETWORKS_FULLY_CONNECTED)
+ {
+ const auto &input_operand =
+ _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::INPUT));
+ auto &weights_operand =
+ _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::WEIGHT));
+ if (input_operand.typeInfo().type() == onert::ir::DataType::FLOAT32 &&
+ weights_operand.typeInfo().type() == onert::ir::DataType::QUANT8_ASYMM)
+ {
+ weights_operand.type(onert::ir::DataType::QUANT8_SYMM);
+ }
+ }
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(EXCEPTION) << e.what() << std::endl;
+
+ return false;
+ }
+
+ return true;
+}
+
+bool ANeuralNetworksModel::addOperationEx(ANeuralNetworksOperationTypeEx type, uint32_t inputCount,
+ const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs) noexcept
+{
+ try
+ {
+ for (uint32_t i = 0; i < outputCount; i++)
+ {
+ _operand_usages[outputs[i]] = OperandUsage::OPERATION_OUTPUT;
+ }
+
+ auto &factory = OperationFactory::get();
+ OperationFactory::Param param{inputCount, inputs, outputCount, outputs};
+
+ auto node = factory.create(type, param, _graph->operands());
+ _graph->addOperation(std::unique_ptr<onert::ir::Operation>{node});
+ }
+ catch (const std::exception &e)
+ {
+ return false;
+ }
+ return true;
+}
+
+bool ANeuralNetworksModel::addModelInput(uint32_t index) noexcept
+{
+ try
+ {
+ _operand_usages[index] = OperandUsage::MODEL_INPUT;
+
+ const onert::ir::OperandIndex ind{index};
+ _graph->addInput(ind);
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(EXCEPTION) << e.what() << std::endl;
+
+ return false;
+ }
+
+ return true;
+}
+bool ANeuralNetworksModel::addModelOutput(uint32_t index) noexcept
+{
+ try
+ {
+ const onert::ir::OperandIndex ind{index};
+
+ // Duplicated output is not allowed
+ if (_graph->getOutputs().contains(ind))
+ {
+ return false;
+ }
+
+ _graph->addOutput(ind);
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(EXCEPTION) << e.what() << std::endl;
+
+ return false;
+ }
+
+ return true;
+}
+
+bool ANeuralNetworksModel::finish() noexcept
+{
+ try
+ {
+ fillOptionalOperand();
+
+ _graph->finishBuilding();
+
+ _operand_usages.clear();
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(EXCEPTION) << e.what() << '\n';
+
+ return false;
+ }
+
+ return true;
+}
+
+bool ANeuralNetworksModel::isFinished() noexcept { return !_graph->isBuildingPhase(); }
+
+bool ANeuralNetworksModel::isExistOperand(uint32_t index) noexcept
+{
+ return _graph->operands().exist(onert::ir::OperandIndex{index});
+}
+
+size_t ANeuralNetworksModel::operandSize(uint32_t index) noexcept
+{
+ try
+ {
+ return _graph->operands().at(onert::ir::OperandIndex{index}).operandSize();
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(EXCEPTION) << e.what() << '\n';
+
+ return 0;
+ }
+}
+
+bool ANeuralNetworksModel::isUsageSet(uint32_t index) noexcept
+{
+ return (_operand_usages[index] != OperandUsage::NOT_DEFINED);
+}
+
+bool ANeuralNetworksModel::isOperationOutput(uint32_t index) noexcept
+{
+ return (_operand_usages[index] == OperandUsage::OPERATION_OUTPUT);
+}
+
+void ANeuralNetworksModel::setOptionalOperand(const onert::ir::OperandIndex idx)
+{
+ _optional_operands.insert(idx);
+}
+
+void ANeuralNetworksModel::fillOptionalOperand(void)
+{
+ _graph->operations().iterate([&](const onert::ir::OperationIndex &, onert::ir::Operation &node) {
+ for (auto input : node.getInputs())
+ {
+ // TODO fill default value for optional operands
+ if (_optional_operands.find(input) != _optional_operands.end())
+ {
+ throw std::runtime_error{"Optional operand is not supported yet"};
+ }
+ }
+ });
+}
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h
new file mode 100644
index 000000000..3ccd941c7
--- /dev/null
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MODEL_H__
+#define __MODEL_H__
+
+#include <unordered_set>
+#include <NeuralNetworks.h>
+#include <NeuralNetworksEx.h>
+
+#include "ir/Graph.h"
+
+struct ANeuralNetworksModel
+{
+public:
+ enum class OperandUsage
+ {
+ NOT_DEFINED = 0,
+ MODEL_INPUT,
+ CONSTANT,
+ OPERATION_OUTPUT,
+ };
+
+public:
+ ANeuralNetworksModel() noexcept;
+
+public:
+ bool addOperand(const ANeuralNetworksOperandType *type) noexcept;
+ bool setOperandValue(uint32_t index, const void *buffer, size_t length, bool optional = false,
+ bool copy = false) noexcept;
+ bool addOperation(ANeuralNetworksOperationType type, uint32_t inputCount, const uint32_t *inputs,
+ uint32_t outputCount, const uint32_t *outputs) noexcept;
+ bool addOperationEx(ANeuralNetworksOperationTypeEx type, uint32_t inputCount,
+ const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs) noexcept;
+ bool addModelInput(uint32_t index) noexcept;
+ bool addModelOutput(uint32_t index) noexcept;
+ bool finish() noexcept;
+
+ onert::ir::Graph &deref(void) { return *_graph; }
+ bool isFinished() noexcept;
+ bool isExistOperand(uint32_t index) noexcept;
+ size_t operandSize(uint32_t index) noexcept;
+ bool isUsageSet(uint32_t index) noexcept;
+ bool isOperationOutput(uint32_t index) noexcept;
+ void release(std::shared_ptr<onert::ir::Graph> &graph) { graph = _graph; }
+
+private:
+ void setOptionalOperand(const onert::ir::OperandIndex idx);
+ void fillOptionalOperand(void);
+
+private:
+ std::shared_ptr<onert::ir::Graph> _graph;
+ std::unordered_set<onert::ir::OperandIndex> _optional_operands;
+ std::vector<OperandUsage> _operand_usages;
+};
+
+#endif // __MODEL_H__
diff --git a/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc b/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc
new file mode 100644
index 000000000..e07297241
--- /dev/null
+++ b/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NNAPIConvert.h"
+
+#include <numeric>
+
+using namespace onert::ir;
+
+DataType NNAPIConvert::getDataType(OperandCode type)
+{
+ switch (type)
+ {
+ case ANEURALNETWORKS_FLOAT32:
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ return DataType::FLOAT32;
+ case ANEURALNETWORKS_INT32:
+ case ANEURALNETWORKS_TENSOR_INT32:
+ return DataType::INT32;
+ case ANEURALNETWORKS_UINT32:
+ return DataType::UINT32;
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ return DataType::QUANT8_ASYMM;
+ case ANEURALNETWORKS_TENSOR_QUANT8_SYMM:
+ return DataType::QUANT8_SYMM;
+ case ANEURALNETWORKS_BOOL:
+ case ANEURALNETWORKS_TENSOR_BOOL8:
+ return DataType::BOOL8;
+ default:
+ throw std::runtime_error("Unsupported type");
+ }
+}
+
+TypeInfo NNAPIConvert::getTypeInfo(const ANeuralNetworksOperandType *type)
+{
+ return TypeInfo(getDataType((OperandCode)(type->type)), type->scale, type->zeroPoint);
+}
+
+Shape NNAPIConvert::getShape(const ANeuralNetworksOperandType *type)
+{
+ Shape shape(type->dimensionCount);
+
+ for (uint32_t axis = 0; axis < type->dimensionCount; ++axis)
+ {
+ shape.dim(axis) = type->dimensions[axis];
+ }
+
+ return shape;
+}
+
+size_t NNAPIConvert::calculateSizeFromType(const ANeuralNetworksOperandType *type)
+{
+ auto shape = getShape(type);
+ auto data_type = getDataType((OperandCode)(type->type));
+
+ return shape.num_elements() * sizeOfDataType(data_type);
+}
+
+Activation NNAPIConvert::getFusedActivation(FuseCode act)
+{
+ switch (act)
+ {
+ case ANEURALNETWORKS_FUSED_NONE:
+ return Activation::NONE;
+ case ANEURALNETWORKS_FUSED_RELU:
+ return Activation::RELU;
+ case ANEURALNETWORKS_FUSED_RELU1:
+ return Activation::RELU1;
+ case ANEURALNETWORKS_FUSED_RELU6:
+ return Activation::RELU6;
+ default:
+ throw std::runtime_error("Unsupported activation type");
+ }
+}
+
+PaddingType NNAPIConvert::getPaddingType(PaddingCode type)
+{
+ switch (type)
+ {
+ case ANEURALNETWORKS_PADDING_SAME:
+ return PaddingType::SAME;
+ case ANEURALNETWORKS_PADDING_VALID:
+ return PaddingType::VALID;
+ default:
+ throw std::runtime_error("Unsupported type");
+ }
+}
diff --git a/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.h b/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.h
new file mode 100644
index 000000000..4fd985e6e
--- /dev/null
+++ b/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file NNAPIConvert.h
+ * @brief This file contains convereter(s)\n
+ * from NNAPI frontend's struct to onert's internal struct
+ */
+#ifndef __ONERT_NNAPI_CONVERT_H__
+#define __ONERT_NNAPI_CONVERT_H__
+
+#include <NeuralNetworks.h>
+
+#include <ir/TypeInfo.h>
+#include <ir/Shape.h>
+#include <ir/Padding.h>
+#include <ir/InternalType.h>
+
+class NNAPIConvert
+{
+
+public:
+ /**
+ * @brief Convert data type from NNAPI to internal data type
+ * @param[in] type NNAPI's data type
+ * @return onert's internal data type
+ */
+ static onert::ir::DataType getDataType(OperandCode type);
+
+ /**
+ * @brief Convert operand type info from NNAPI to interanl operand type info
+ * @param[in] type NNAPI's operand type
+ * @return onert's internal operand type info
+ */
+ static onert::ir::TypeInfo getTypeInfo(const ANeuralNetworksOperandType *type);
+
+ /**
+ * @brief Convert operand shape info from NNAPI to internal operand shape
+ * @param[in] type NNAPI's operand type
+ * @return onert's internal operand shape
+ */
+ static onert::ir::Shape getShape(const ANeuralNetworksOperandType *type);
+
+ /**
+ * @brief Calcaulate operand size from NNAPI type
+ * @param[in] type NNAPI's operand type
+ * @return Operand size
+ */
+ static size_t calculateSizeFromType(const ANeuralNetworksOperandType *type);
+
+ /**
+ * @brief Convert NNAPI FuseCode to internal activation type
+ * @param[in] act NNAPI's FuseCode type
+ * @return onert's internal activation type
+ */
+ static onert::ir::Activation getFusedActivation(FuseCode act);
+
+ /**
+ * @brief Convert NNAPI PaddingCode to internal padding type
+ * @param[in] type NNAPI's PaddingCode type
+ * @return onert's internal padding type
+ */
+ static onert::ir::PaddingType getPaddingType(PaddingCode type);
+};
+
+#endif // __ONERT_NNAPI_CONVERT_H__
diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
new file mode 100644
index 000000000..10e7c0341
--- /dev/null
+++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
@@ -0,0 +1,1899 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationFactory.h"
+#include "NNAPIConvert.h"
+
+#include <ir/Operations.Include.h>
+#include <string.h>
+
+namespace
+{
+using namespace onert::ir;
+
+void replaceDataType(Operands &operands, const OperandIndex &index, const DataType type)
+{
+ assert(operands.exist(index));
+ operands.at(index).type(type);
+}
+
+ExplicitPadding makeExplicitPadding(Operands &operands, const OperandIndex &left_index,
+ const OperandIndex &right_index, const OperandIndex &top_index,
+ const OperandIndex &bottom_index)
+{
+ auto left = operands.at(left_index).asScalar<int32_t>();
+ auto right = operands.at(right_index).asScalar<int32_t>();
+ auto top = operands.at(top_index).asScalar<int32_t>();
+ auto bottom = operands.at(bottom_index).asScalar<int32_t>();
+
+ if (left < 0 || right < 0 || top < 0 || bottom < 0)
+ {
+ throw std::runtime_error{"Cannot handle negative explicit padding value"};
+ }
+
+ ExplicitPadding param;
+ param.left = static_cast<uint32_t>(left);
+ param.right = static_cast<uint32_t>(right);
+ param.top = static_cast<uint32_t>(top);
+ param.bottom = static_cast<uint32_t>(bottom);
+
+ return param;
+}
+
+Stride makeStride(Operands &operands, const OperandIndex &horizontal_index,
+ const OperandIndex &vertical_index)
+{
+ auto horizontal = operands.at(horizontal_index).asScalar<int32_t>();
+ auto vertical = operands.at(vertical_index).asScalar<int32_t>();
+
+ if (vertical < 0 || horizontal < 0)
+ {
+ throw std::runtime_error{"Cannot handle negative stride value"};
+ }
+
+ Stride stride;
+ stride.horizontal = static_cast<uint32_t>(horizontal);
+ stride.vertical = static_cast<uint32_t>(vertical);
+
+ return stride;
+}
+
+uint32_t getUint32Scalar(Operands &operands, const OperandIndex index)
+{
+ auto int32_value = operands.at(index).asScalar<int32_t>();
+ if (int32_value < 0)
+ {
+ throw std::runtime_error{"Cannot handle negative value"};
+ }
+
+ return static_cast<uint32_t>(int32_value);
+}
+
+} // namespace
+
+OperationFactory &OperationFactory::get()
+{
+ static OperationFactory factory;
+ return factory;
+}
+
+OperationFactory::OperationFactory()
+{
+ _map[ANEURALNETWORKS_BATCH_TO_SPACE_ND] = [](const OperationFactory::Param &init_param,
+ Operands &) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Block size Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ return new operation::BatchToSpaceND{inputs, outputs};
+ };
+
+ _map[ANEURALNETWORKS_DEPTHWISE_CONV_2D] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert((init_param.input_count == 8 || init_param.input_count == 11) &&
+ init_param.output_count == 1);
+
+ // In common
+ // 0 -> IFM Tensor Index
+ // 1 -> Kernel Tensor Index
+ // 2 -> Bias Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::DepthwiseConv2D::Param param;
+ if (init_param.input_count == 8)
+ {
+ // Imlicit Padding case
+ // Each input should be interpreted as follows:
+ //
+ // 3 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
+ // 4 -> Stride (width) Index
+ // 5 -> Stride (height) INdex
+ // 6 -> Depthwise multiplier
+ // 7 -> Activation Index
+
+ const auto padding_index = OperandIndex{init_param.inputs[3]};
+ const auto hstride_index = OperandIndex{init_param.inputs[4]};
+ const auto vstride_index = OperandIndex{init_param.inputs[5]};
+ const auto multiplier_index = OperandIndex{init_param.inputs[6]};
+ const auto activation_index = OperandIndex{init_param.inputs[7]};
+
+ param.padding.type =
+ NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+ param.stride = makeStride(operands, hstride_index, vstride_index);
+ param.multiplier = getUint32Scalar(operands, multiplier_index);
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+ else
+ {
+ // Explicit Padding case
+ // Each input should be interpreted as follows:
+ //
+ // 3 -> Padding On the Left
+ // 4 -> Padding On the Right
+ // 5 -> Padding On the Top
+ // 6 -> Padding On the Bottom
+ // 7 -> Stride (width) Index
+ // 8 -> Stride (height) Index
+ // 9 -> Depthwise multiplier
+ // 10-> Activation Index
+
+ const auto padding_left_index = OperandIndex{init_param.inputs[3]};
+ const auto padding_right_index = OperandIndex{init_param.inputs[4]};
+ const auto padding_top_index = OperandIndex{init_param.inputs[5]};
+ const auto padding_bottom_index = OperandIndex{init_param.inputs[6]};
+ const auto hstride_index = OperandIndex{init_param.inputs[7]};
+ const auto vstride_index = OperandIndex{init_param.inputs[8]};
+ const auto multiplier_index = OperandIndex{init_param.inputs[9]};
+ const auto activation_index = OperandIndex{init_param.inputs[10]};
+
+ param.padding.type = PaddingType::EXPLICIT;
+ param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
+ padding_top_index, padding_bottom_index);
+ param.stride = makeStride(operands, hstride_index, vstride_index);
+ param.multiplier = getUint32Scalar(operands, multiplier_index);
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+
+ return new operation::DepthwiseConv2D{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_MAX_POOL_2D] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 7 || init_param.input_count == 10);
+ assert(init_param.output_count == 1);
+
+ // In common
+ // 0 -> IFM Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::MaxPool2D::Param param;
+ if (init_param.input_count == 7) // support implicit padding
+ {
+ // Each input should be interpreted as follows:
+ //
+ // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
+ // 2 -> Horizontal (over width) Stride Index
+ // 3 -> Vertial (over height) Stride Index
+ // 4 -> Filter Width Index
+ // 5 -> Filter Height Index
+ // 6 -> FuseCode (activation) Index
+
+ const auto padding_index = OperandIndex{init_param.inputs[1]};
+ const auto hstride_index = OperandIndex{init_param.inputs[2]};
+ const auto vstride_index = OperandIndex{init_param.inputs[3]};
+ const auto kw_index = OperandIndex{init_param.inputs[4]};
+ const auto kh_index = OperandIndex{init_param.inputs[5]};
+ const auto activation_index = OperandIndex{init_param.inputs[6]};
+
+ param.padding.type =
+ NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+ param.stride = makeStride(operands, hstride_index, vstride_index);
+ param.kw = getUint32Scalar(operands, kw_index);
+ param.kh = operands.at(kh_index).asScalar<uint32_t>();
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+ else if (init_param.input_count == 10) // support explicit padding
+ {
+ // Each input should be interpreted as follows:
+ //
+ // 1 -> Padding_left index
+ // 2 -> Padding_right index
+ // 3 -> Padding_top index
+ // 4 -> Padding_bottom index
+ // 5 -> Horizontal (over width) Stride Index
+ // 6 -> Vertial (over height) Stride Index
+ // 7 -> Filter Width Index
+ // 8 -> Filter Height Index
+ // 9 -> FuseCode (activation) Index
+
+ const auto padding_left_index = OperandIndex{init_param.inputs[1]};
+ const auto padding_right_index = OperandIndex{init_param.inputs[2]};
+ const auto padding_top_index = OperandIndex{init_param.inputs[3]};
+ const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
+ const auto hstride_index = OperandIndex{init_param.inputs[5]};
+ const auto vstride_index = OperandIndex{init_param.inputs[6]};
+ const auto kw_index = OperandIndex{init_param.inputs[7]};
+ const auto kh_index = OperandIndex{init_param.inputs[8]};
+ const auto activation_index = OperandIndex{init_param.inputs[9]};
+
+ param.padding.type = PaddingType::EXPLICIT;
+ param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
+ padding_top_index, padding_bottom_index);
+ param.stride = makeStride(operands, hstride_index, vstride_index);
+ param.kw = getUint32Scalar(operands, kw_index);
+ param.kh = getUint32Scalar(operands, kh_index);
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+ return new operation::MaxPool2D{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_AVERAGE_POOL_2D] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ // TODO We may reuse code here for MAX_POOL_2D. Seems like these two are identical
+ assert(init_param.input_count == 7 || init_param.input_count == 10);
+ assert(init_param.output_count == 1);
+
+ // In common
+ // 0 -> IFM Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::AvgPool2D::Param param;
+ if (init_param.input_count == 7) // support implicit padding
+ {
+ // Each input should be interpreted as follows:
+ //
+ // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
+ // 2 -> Horizontal (over width) Stride Index
+ // 3 -> Vertial (over height) Stride Index
+ // 4 -> Filter Width Index
+ // 5 -> Filter Height Index
+ // 6 -> FuseCode (activation) Index
+
+ const auto padding_index = OperandIndex{init_param.inputs[1]};
+ const auto hstride_index = OperandIndex{init_param.inputs[2]};
+ const auto vstride_index = OperandIndex{init_param.inputs[3]};
+ const auto kw_index = OperandIndex{init_param.inputs[4]};
+ const auto kh_index = OperandIndex{init_param.inputs[5]};
+ const auto activation_index = OperandIndex{init_param.inputs[6]};
+
+ param.padding.type =
+ NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+ param.stride = makeStride(operands, hstride_index, vstride_index);
+ param.kw = getUint32Scalar(operands, kw_index);
+ param.kh = getUint32Scalar(operands, kh_index);
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+ else if (init_param.input_count == 10) // support explicit padding
+ {
+ // Each input should be interpreted as follows:
+ //
+ // 1 -> Padding_left index
+ // 2 -> Padding_right index
+ // 3 -> Padding_top index
+ // 4 -> Padding_bottom index
+ // 5 -> Horizontal (over width) Stride Index
+ // 6 -> Vertial (over height) Stride Index
+ // 7 -> Filter Width Index
+ // 8 -> Filter Height Index
+ // 9 -> FuseCode (activation) Index
+
+ const auto padding_left_index = OperandIndex{init_param.inputs[1]};
+ const auto padding_right_index = OperandIndex{init_param.inputs[2]};
+ const auto padding_top_index = OperandIndex{init_param.inputs[3]};
+ const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
+ const auto hstride_index = OperandIndex{init_param.inputs[5]};
+ const auto vstride_index = OperandIndex{init_param.inputs[6]};
+ const auto kw_index = OperandIndex{init_param.inputs[7]};
+ const auto kh_index = OperandIndex{init_param.inputs[8]};
+ const auto activation_index = OperandIndex{init_param.inputs[9]};
+
+ param.padding.type = PaddingType::EXPLICIT;
+ param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
+ padding_top_index, padding_bottom_index);
+ param.stride = makeStride(operands, hstride_index, vstride_index);
+ param.kw = getUint32Scalar(operands, kw_index);
+ param.kh = getUint32Scalar(operands, kh_index);
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+
+ return new operation::AvgPool2D{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_CONCATENATION] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count >= 2); // At least one one input tensor and axis
+ assert(init_param.output_count == 1);
+
+ // When there are N + 1 inputs, each input should be interpreted as follows:
+ //
+ // [0, N) -> Input tensors
+ // N -> Axis
+ //
+
+ OperandIndexSequence inputs;
+ for (uint32_t n = 0; n < init_param.input_count - 1; ++n)
+ {
+ inputs.append(OperandIndex{init_param.inputs[n]});
+ }
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::Concat::Param param;
+ const OperandIndex axis_index{init_param.inputs[init_param.input_count - 1]};
+ param.axis = operands.at(axis_index).asScalar<int32_t>();
+ param.rank = operands.at(outputs.at(0)).shape().rank();
+
+ return new operation::Concat{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_RESHAPE] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> A tensor, specifying the tensor to be reshaped.
+ // 1 -> A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32, defining the shape of the output
+ // tensor
+
+ // TODO Second input should be shape tensor (init_param.inputs[1])
+ // Currently unused since assume that it is same with output tensor size
+ OperandIndexSequence inputs{init_param.inputs[0] /* , init_param.inputs[1] */};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ return new operation::Reshape{inputs, outputs};
+ };
+
+ _map[ANEURALNETWORKS_FULLY_CONNECTED] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 4 && init_param.output_count == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> A tensor, specifying the input.
+ // 1 -> A 2-D tensor, specifying the weights
+ // 2 -> A 1-D tensor, specifying the bias
+ // 3 -> An INT32 value, and has to be one of the FuseCode values
+
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::FullyConnected::Param param;
+ const auto activation_index = OperandIndex{init_param.inputs[3]};
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+
+ return new operation::FullyConnected{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_SOFTMAX] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> A 2-D or 4-D tensor, specifying the tensor to be reshaped.
+ // 1 -> FLOAT32 value, specifying the positive scaling factor for the exponent, beta.
+
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ const auto beta_index = OperandIndex{init_param.inputs[1]};
+
+ operation::Softmax::Param param;
+ param.beta = operands.at(beta_index).asScalar<float>();
+
+ return new operation::Softmax{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_CAST] = [](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ // 0 -> input Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ // NNAPI uses QUANT8_ASYMM to represent UINT8 type for ANEURALNETWORKS_CAST's input/output
+ if (operands.at(inputs.at(0)).typeInfo().type() == DataType::QUANT8_ASYMM)
+ {
+ replaceDataType(operands, inputs.at(0), DataType::UINT8);
+ }
+ if (operands.at(outputs.at(0)).typeInfo().type() == DataType::QUANT8_ASYMM)
+ {
+ replaceDataType(operands, outputs.at(0), DataType::UINT8);
+ }
+
+ return new operation::Cast{inputs, outputs};
+ };
+
+ // ANEURALNETWORKS_CAST_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_CAST_EX
+ _map[ANEURALNETWORKS_CAST_EX] = _map[ANEURALNETWORKS_CAST];
+
+ _map[ANEURALNETWORKS_CONV_2D] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ using operation::Conv2D;
+
+ // inputCount is either 7 or 10 acccording to NN API specification.
+ // - Padding is implicit when inputCount is 7
+ // - Padding is explicit when inputCount is 10
+ assert(init_param.input_count == 7 || init_param.input_count == 10);
+ assert(init_param.output_count == 1);
+
+ // 0 -> IFM Tensor Index
+ // 1 -> Kernel Tensor Index
+ // 2 -> Bias Tensor Index
+
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ Conv2D::Param param;
+
+ if (init_param.input_count == 7) // support implicit padding
+ {
+ // Each input should be interpreted as follows:
+ //
+ // 3 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
+ // 4 -> Stride (width) Index
+ // 5 -> Stride (height) INdex
+ // 6 -> Activation Index
+
+ const auto padding_index = OperandIndex{init_param.inputs[3]};
+ const auto hstride_index = OperandIndex{init_param.inputs[4]};
+ const auto vstride_index = OperandIndex{init_param.inputs[5]};
+ const auto activation_index = OperandIndex{init_param.inputs[6]};
+
+ param.padding.type =
+ NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+ param.stride = makeStride(operands, hstride_index, vstride_index);
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+ else if (init_param.input_count == 10) // support explicit padding
+ {
+ // Each input should be interpreted as follows:
+ //
+ // 3 -> Padding_left index
+ // 4 -> Padding_right index
+ // 5 -> Padding_top index
+ // 6 -> Padding_bottom index
+ // 7 -> Stride (width) Index
+ // 8 -> Stride (height) INdex
+ // 9 -> Activation Index
+
+ const auto padding_left_index = OperandIndex{init_param.inputs[3]};
+ const auto padding_right_index = OperandIndex{init_param.inputs[4]};
+ const auto padding_top_index = OperandIndex{init_param.inputs[5]};
+ const auto padding_bottom_index = OperandIndex{init_param.inputs[6]};
+ const auto hstride_index = OperandIndex{init_param.inputs[7]};
+ const auto vstride_index = OperandIndex{init_param.inputs[8]};
+ const auto activation_index = OperandIndex{init_param.inputs[9]};
+
+ param.padding.type = PaddingType::EXPLICIT;
+ param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
+ padding_top_index, padding_bottom_index);
+ param.stride = makeStride(operands, hstride_index, vstride_index);
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+
+ return new Conv2D{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_ADD] = [](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 3);
+ assert(init_param.output_count == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Lefthand side operand
+ // 1 -> Righthand side operand
+
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::Add::Param param;
+
+ const auto activation_index = OperandIndex{init_param.inputs[2]};
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+
+ return new operation::Add{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_REDUCE_SUM] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 3);
+ assert(init_param.output_count == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Reduced Axes Tensor Index
+ // 2 -> keep_dims Index
+
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+ std::vector<std::int32_t> axes =
+ operands.at(OperandIndex{init_param.inputs[1]}).asVector<std::int32_t>();
+
+ operation::ReduceSum::Param param;
+ param.axes.assign(axes.cbegin(), axes.cend());
+ param.keep_dims = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<int8_t>() != 0;
+ param.rank = operands.at(inputs.at(0)).shape().rank();
+
+ return new operation::ReduceSum{inputs, outputs, param};
+ };
+
+ // ANEURALNETWORKS_REDUCE_SUM_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_REDUCE_SUM_EX
+ _map[ANEURALNETWORKS_REDUCE_SUM_EX] = _map[ANEURALNETWORKS_REDUCE_SUM];
+
+ _map[ANEURALNETWORKS_SUB] = [](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 3);
+ assert(init_param.output_count == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Lefthand side operand
+ // 1 -> Righthand side operand
+
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::Sub::Param param;
+
+ const auto activation_index = OperandIndex{init_param.inputs[2]};
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+
+ return new operation::Sub{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_SLICE] = [](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 3 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Begins Tensor Index
+ // 2 -> Sizes Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]};
+
+ operation::Slice::Param param;
+ param.rank = operands.at(inputs.at(0)).shape().rank();
+
+ return new operation::Slice{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_STRIDED_SLICE] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 7 && init_param.output_count == 1);
+
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2],
+ init_param.inputs[3]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 1 -> A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the starts of
+ // the dimensions of the input tensor to be sliced. The length must be
+ // of rank(input0).
+ // 2 -> A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the ends of
+ // the dimensions of the input tensor to be sliced. The length must be
+ // of rank(input0).
+ // 3 -> A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the strides of
+ // the dimensions of the input tensor to be sliced. The length must be
+ // of rank(input0).
+ // 4 -> An {@link ANEURALNETWORKS_INT32} scalar, begin_mask. If the ith bit
+ // of begin_mask is set, begin[i] is ignored and the fullest possible
+ // range in that dimension is used instead.
+ // 5 -> An {@link ANEURALNETWORKS_INT32} scalar, end_mask. If the ith bit of
+ // end_mask is set, end[i] is ignored and the fullest possible range in
+ // that dimension is used instead.
+ // 6 -> An {@link ANEURALNETWORKS_INT32} scalar, shrink_axis_mask. An int32
+ // mask. If the ith bit of shrink_axis_mask is set, it implies that the
+ // ith specification shrinks the dimensionality by 1. A slice of size 1
+ // starting from begin[i] in the dimension must be preserved.
+
+ operation::StridedSlice::Param param;
+
+ param.begin_mask = operands.at(OperandIndex{init_param.inputs[4]}).asScalar<std::int32_t>();
+ param.end_mask = operands.at(OperandIndex{init_param.inputs[5]}).asScalar<std::int32_t>();
+ param.shrink_axis_mask =
+ operands.at(OperandIndex{init_param.inputs[6]}).asScalar<std::int32_t>();
+ param.rank = operands.at(inputs.at(0)).shape().rank();
+
+ return new operation::StridedSlice{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_TRANSPOSE] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ // TODO make this work with init_param.input_count == 1 (when permutation vector is optional)
+
+ // Inputs
+ // 0: An n-D tensor, specifying the tensor to be transposed.
+ // 1: An optional 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32},
+ // the permutation of the dimensions of the input tensor.
+ // The returned tensor's dimension i corresponds to the input dimension
+ // perm[i]. If perm is not given, it is set to (n-1...0), where n is the
+ // rank of the input tensor. Hence by default, this operation performs a
+ // regular matrix transpose on 2-D input Tensors.
+ assert(init_param.input_count == 2);
+ assert(init_param.output_count == 1);
+
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+ std::vector<std::int32_t> perm =
+ operands.at(OperandIndex{init_param.inputs[1]}).asVector<std::int32_t>();
+
+ operation::Transpose::Param param;
+ param.perm.assign(perm.cbegin(), perm.cend());
+ param.rank = operands.at(inputs.at(0)).shape().rank();
+
+ return new operation::Transpose{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_MUL] = [](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 3 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> LHS Tensor Index
+ // 1 -> RHS Tensor Index
+ // 2 -> Activation Index
+
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ operation::Mul::Param param;
+
+ const auto activation_index = OperandIndex{init_param.inputs[2]};
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+
+ return new operation::Mul{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_SQUEEZE] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 1 || init_param.input_count == 2);
+ assert(init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> An n-D tensor, the tensor to be squeezed.
+ // 1 -> An optional 1-D tensor of ANEURALNETWORKS_TENSOR_INT32. The dimensions to squeeze.
+ // If specified only squeezes the dimensions listed. Otherwise, squeezes all dimensions.
+ // The dimension index starts at 0. An error must be reported if squeezing a dimension that
+ // is not 1.
+
+ // Add mandatory input index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ // Add dims index if specified
+ operation::Squeeze::Param param{};
+ if (init_param.input_count == 2)
+ {
+ auto squeeze_dims_idx = OperandIndex{init_param.inputs[1]};
+ assert(operands.at(squeeze_dims_idx).shape().rank() == 1);
+ assert(operands.at(squeeze_dims_idx).shape().dim(0) >= 0);
+ assert(static_cast<uint32_t>(operands.at(squeeze_dims_idx).shape().dim(0)) <=
+ sizeof(param.dims));
+ param.ndim = operands.at(squeeze_dims_idx).shape().dim(0);
+ if (param.ndim > 0)
+ {
+ assert(operands.at(squeeze_dims_idx).data());
+ memcpy(param.dims, operands.at(squeeze_dims_idx).data()->base(),
+ param.ndim * sizeof(param.dims[0]));
+ }
+ }
+
+ return new operation::Squeeze{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_TANH] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ return new operation::Tanh{inputs, outputs};
+ };
+
+ _map[ANEURALNETWORKS_LOGISTIC] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ return new operation::Logistic{inputs, outputs};
+ };
+
+ _map[ANEURALNETWORKS_DIV] = [](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 3 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> LHS Tensor Index
+ // 1 -> RHS Tensor Index
+ // 2 -> Activation Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ operation::Div::Param param;
+
+ const auto activation_index = OperandIndex{init_param.inputs[2]};
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+
+ return new operation::Div{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_EXP] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ return new operation::Exp{inputs, outputs};
+ };
+
+ // ANEURALNETWORKS_EXP_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_EXP_EX
+ _map[ANEURALNETWORKS_EXP_EX] = _map[ANEURALNETWORKS_EXP];
+
+ _map[ANEURALNETWORKS_GREATER] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input0 Tensor Index
+ // 1 -> input1 Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ operation::Comparison::Param param;
+ param.comparison_type = operation::Comparison::ComparisonType::Greater;
+
+ return new operation::Comparison{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_GREATER_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input0 Tensor Index
+ // 1 -> input1 Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ operation::Comparison::Param param;
+ param.comparison_type = operation::Comparison::ComparisonType::GreaterEqual;
+
+ return new operation::Comparison{inputs, outputs, param};
+ };
+
+ // ANEURALNETWORKS_GREATER_EQUAL_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_GREATER_EQUAL_EX
+ _map[ANEURALNETWORKS_GREATER_EQUAL_EX] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input0 Tensor Index
+ // 1 -> input1 Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ operation::Comparison::Param param;
+ param.comparison_type = operation::Comparison::ComparisonType::GreaterEqual;
+
+ // Output operand type must be boolean
+ replaceDataType(operands, outputs.at(0), DataType::BOOL8);
+
+ return new operation::Comparison{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_LESS] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input0 Tensor Index
+ // 1 -> input1 Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ operation::Comparison::Param param;
+ param.comparison_type = operation::Comparison::ComparisonType::Less;
+
+ return new operation::Comparison{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_LESS_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input0 Tensor Index
+ // 1 -> input1 Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ operation::Comparison::Param param;
+ param.comparison_type = operation::Comparison::ComparisonType::LessEqual;
+
+ return new operation::Comparison{inputs, outputs, param};
+ };
+
+ // ANEURALNETWORKS_LESS_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_LESS_EX
+ _map[ANEURALNETWORKS_LESS_EX] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input0 Tensor Index
+ // 1 -> input1 Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ operation::Comparison::Param param;
+ param.comparison_type = operation::Comparison::ComparisonType::Less;
+
+ // Output operand type must be boolean
+ replaceDataType(operands, outputs.at(0), DataType::BOOL8);
+
+ return new operation::Comparison{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_REDUCE_MAX] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 3 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Axis Tensor Index
+ // 2 -> keep_dims Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ std::vector<std::int32_t> axes =
+ operands.at(OperandIndex{init_param.inputs[1]}).asVector<std::int32_t>();
+
+ operation::ReduceMax::Param param;
+ param.axes.assign(axes.cbegin(), axes.cend());
+ param.keep_dims = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<int8_t>() != 0;
+ param.rank = operands.at(inputs.at(0)).shape().rank();
+
+ return new operation::ReduceMax{inputs, outputs, param};
+ };
+
+ // ANEURALNETWORKS_REDUCE_MAX_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_REDUCE_MAX_EX
+ _map[ANEURALNETWORKS_REDUCE_MAX_EX] = _map[ANEURALNETWORKS_REDUCE_MAX];
+
+ _map[ANEURALNETWORKS_NOT_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input1 Tensor Index
+ // 1 -> input2 Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ operation::Comparison::Param param;
+ param.comparison_type = operation::Comparison::ComparisonType::NotEqual;
+
+ return new operation::Comparison{inputs, outputs, param};
+ };
+
+ // ANEURALNETWORKS_NOT_EQUAL_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_NOT_EQUAL_EX
+ _map[ANEURALNETWORKS_NOT_EQUAL_EX] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input1 Tensor Index
+ // 1 -> input2 Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ operation::Comparison::Param param;
+ param.comparison_type = operation::Comparison::ComparisonType::NotEqual;
+
+ // Output operand type must be boolean
+ replaceDataType(operands, outputs.at(0), DataType::BOOL8);
+
+ return new operation::Comparison{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_LOGICAL_AND] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input0 Tensor Index
+ // 1 -> input1 Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ return new operation::LogicalAnd{inputs, outputs};
+ };
+
+ // ANEURALNETWORKS_LOGICAL_AND_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_LOGICAL_AND_EX
+ _map[ANEURALNETWORKS_LOGICAL_AND_EX] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input0 Tensor Index
+ // 1 -> input1 Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ // This operation's operands must be boolean type.
+ replaceDataType(operands, inputs.at(0), DataType::BOOL8);
+ replaceDataType(operands, inputs.at(1), DataType::BOOL8);
+ replaceDataType(operands, outputs.at(0), DataType::BOOL8);
+
+ return new operation::LogicalAnd{inputs, outputs};
+ };
+
+ _map[ANEURALNETWORKS_RSQRT] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ return new operation::RSQRT{inputs, outputs};
+ };
+
+ // ANEURALNETWORKS_RSQRT_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_RSQRT_EX
+ _map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT];
+
+ _map[ANEURALNETWORKS_RELU] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ return new operation::ReLU{inputs, outputs};
+ };
+
+ _map[ANEURALNETWORKS_RESIZE_BILINEAR] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 3 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> IFM Index
+ // 1 -> Height Index
+ // 2 -> Width Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ operation::ResizeBilinear::Param param;
+ param.height_out = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<int32_t>();
+ param.width_out = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<int32_t>();
+
+ return new operation::ResizeBilinear{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_RELU1] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ return new operation::ReLU1{inputs, outputs};
+ };
+
+ _map[ANEURALNETWORKS_RELU6] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ return new operation::ReLU6{inputs, outputs};
+ };
+
+ _map[ANEURALNETWORKS_RNN] = [](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 6 && init_param.output_count == 2);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Weights Tensor Index
+ // 2 -> Recurrent Weights Tensor Index
+ // 3 -> Bias Tensor Index
+ // 4 -> Hidden state (in) Index
+ // 5 -> Activation Index
+
+ OperandIndexSequence inputs;
+ for (uint32_t n = 0; n < init_param.input_count - 1; ++n)
+ {
+ inputs.append(OperandIndex{init_param.inputs[n]});
+ }
+ OperandIndexSequence outputs;
+ for (uint32_t n = 0; n < init_param.output_count; ++n)
+ {
+ outputs.append(OperandIndex{init_param.outputs[n]});
+ }
+
+ operation::RNN::Param param;
+ const auto activation_index = OperandIndex{init_param.inputs[5]};
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+
+ return new operation::RNN{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_FLOOR] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ // 0 -> input Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ return new operation::Floor{inputs, outputs};
+ };
+
+ _map[ANEURALNETWORKS_SPACE_TO_BATCH_ND] = [](const OperationFactory::Param &init_param,
+ Operands &) {
+ assert(init_param.input_count == 3 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Block size Index
+ // 2 -> Paddings Index
+ OperandIndexSequence inputs;
+ for (uint32_t n = 0; n < init_param.input_count; ++n)
+ {
+ inputs.append(OperandIndex{init_param.inputs[n]});
+ }
+
+ return new operation::SpaceToBatchND{inputs, outputs};
+ };
+
+ _map[ANEURALNETWORKS_SPACE_TO_DEPTH] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Block size Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ operation::SpaceToDepth::Param param;
+ param.block_size = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
+
+ return new operation::SpaceToDepth{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_L2_POOL_2D] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 10 || init_param.input_count == 7);
+ assert(init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> IFM Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ operation::L2Pool2D::Param param;
+
+ if (init_param.input_count == 7) // Imlicit Padding case
+ {
+ // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
+ // 2 -> Horizontal (over width) Stride Index
+ // 3 -> Vertial (over height) Stride Index
+ // 4 -> Filter Width Index
+ // 5 -> Filter Height Index
+ // 6 -> FuseCode (activation) Index
+ const auto padding_index = OperandIndex{init_param.inputs[1]};
+ const auto hstride_index = OperandIndex{init_param.inputs[2]};
+ const auto vstride_index = OperandIndex{init_param.inputs[3]};
+ const auto kw_index = OperandIndex{init_param.inputs[4]};
+ const auto kh_index = OperandIndex{init_param.inputs[5]};
+ const auto activation_index = OperandIndex{init_param.inputs[6]};
+
+ param.padding.type =
+ NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+ param.stride = makeStride(operands, hstride_index, vstride_index);
+ param.kw = getUint32Scalar(operands, kw_index);
+ param.kh = getUint32Scalar(operands, kh_index);
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+ else // Explicit Padding case
+ {
+ // 1 -> Padding_left index
+ // 2 -> Padding_right index
+ // 3 -> Padding_top index
+ // 4 -> Padding_bottom index
+ // 5 -> Horizontal (over width) Stride Index
+ // 6 -> Vertial (over height) Stride Index
+ // 7 -> Filter Width Index
+ // 8 -> Filter Height Index
+ // 9 -> FuseCode (activation) Index
+ const auto padding_left_index = OperandIndex{init_param.inputs[1]};
+ const auto padding_right_index = OperandIndex{init_param.inputs[2]};
+ const auto padding_top_index = OperandIndex{init_param.inputs[3]};
+ const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
+ const auto hstride_index = OperandIndex{init_param.inputs[5]};
+ const auto vstride_index = OperandIndex{init_param.inputs[6]};
+ const auto kw_index = OperandIndex{init_param.inputs[7]};
+ const auto kh_index = OperandIndex{init_param.inputs[8]};
+ const auto activation_index = OperandIndex{init_param.inputs[9]};
+
+ param.padding.type = PaddingType::EXPLICIT;
+ param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
+ padding_top_index, padding_bottom_index);
+ param.stride = makeStride(operands, hstride_index, vstride_index);
+ param.kw = getUint32Scalar(operands, kw_index);
+ param.kh = getUint32Scalar(operands, kh_index);
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+
+ return new operation::L2Pool2D{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_EMBEDDING_LOOKUP] = [](const OperationFactory::Param &init_param,
+ Operands &) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Lookups Index
+ // 1 -> Values Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ return new operation::EmbeddingLookup{inputs, outputs};
+ };
+
+ _map[ANEURALNETWORKS_L2_NORMALIZATION] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ // 0 -> input Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ operation::L2Normalization::Param param;
+ param.rank = operands.at(inputs.at(0)).shape().rank();
+
+ return new operation::L2Normalization{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_HASHTABLE_LOOKUP] = [](const OperationFactory::Param &init_param,
+ Operands &) {
+ assert(init_param.input_count == 3 && init_param.output_count == 2);
+
+ // Each output should be interpreted as follows:
+ //
+ // 0 -> Output Index
+ // 1 -> Hits Index
+ OperandIndexSequence outputs{init_param.outputs[0], init_param.outputs[1]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Lookups Index
+ // 1 -> Keys Index
+ // 2 -> Values Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]};
+
+ return new operation::HashtableLookup{inputs, outputs};
+ };
+
+ _map[ANEURALNETWORKS_PRELU] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input Tensor Index
+ // 1 -> alpha Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ return new operation::PReLU{inputs, outputs};
+ };
+
+ // ANEURALNETWORKS_PRELU_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_PRELU_EX
+ _map[ANEURALNETWORKS_PRELU_EX] = _map[ANEURALNETWORKS_PRELU];
+
+ _map[ANEURALNETWORKS_TRANSPOSE_CONV_EX] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 6 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Output Shape Index
+ // 1 -> Weights Index
+ // 2 -> Input Tensor Index
+ // 3 -> Padding Type
+ // 4 -> Stride width
+ // 5 -> Stride height
+
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]};
+
+ operation::TransposeConv::Param param;
+
+ const auto padding_index = OperandIndex{init_param.inputs[3]};
+ const auto hstride_index = OperandIndex{init_param.inputs[4]};
+ const auto vstride_index = OperandIndex{init_param.inputs[5]};
+
+ param.padding.type =
+ NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+ param.stride = makeStride(operands, hstride_index, vstride_index);
+
+ return new operation::TransposeConv{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_SQRT] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ // 0 -> input Tensor Index
+
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ return new operation::SQRT{inputs, outputs};
+ };
+
+ // ANEURALNETWORKS_SQRT_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_SQRT_EX
+ _map[ANEURALNETWORKS_SQRT_EX] = _map[ANEURALNETWORKS_SQRT];
+
+ _map[ANEURALNETWORKS_LOGICAL_OR] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input0 Tensor Index
+ // 1 -> input1 Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ return new operation::LogicalOr{inputs, outputs};
+ };
+
+ // ANEURALNETWORKS_LOGICAL_OR_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_LOGICAL_OR_EX
+ _map[ANEURALNETWORKS_LOGICAL_OR_EX] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input0 Tensor Index
+ // 1 -> input1 Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ // This operation's operands must be boolean type.
+ replaceDataType(operands, inputs.at(0), DataType::BOOL8);
+ replaceDataType(operands, inputs.at(1), DataType::BOOL8);
+ replaceDataType(operands, outputs.at(0), DataType::BOOL8);
+
+ return new operation::LogicalOr{inputs, outputs};
+ };
+
+ _map[ANEURALNETWORKS_LOGICAL_NOT] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ return new operation::LogicalNot{inputs, outputs};
+ };
+
+ // ANEURALNETWORKS_LOGICAL_NOT_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_LOGICAL_NOT_EX
+ _map[ANEURALNETWORKS_LOGICAL_NOT_EX] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ // This operation's operands must be boolean type.
+ replaceDataType(operands, inputs.at(0), DataType::BOOL8);
+ replaceDataType(operands, outputs.at(0), DataType::BOOL8);
+
+ return new operation::LogicalNot{inputs, outputs};
+ };
+
+ _map[ANEURALNETWORKS_LSTM] = [](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 23 && init_param.output_count == 4);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Input to Input Tensor Index
+ // 2 -> Input to Forget Tensor Index
+ // 3 -> Input to Cell Tensor Index
+ // 4 -> Input to Output Tensor Index
+ // 5 -> Recurrent to Input Weights Tensor Index
+ // 6 -> Recurrent to Forget Weights Tensor Index
+ // 7 -> Recurrent to Cell Weights Tensor Index
+ // 8 -> Recurrent to Output Weights Tensor Index
+ // 9 -> Cell to Input Weights Tensor Index
+ // 10 -> Cell to Forget Weights Tensor Index
+ // 11 -> Cell to Output Weights Tensor Index
+ // 12 -> Input Gate Bias Tensor Index
+ // 13 -> Forget Gate Bias Tensor Index
+ // 14 -> Cell Bias Tensor Index
+ // 15 -> Output Gate Bias Tensor Index
+ // 16 -> Projection Weights Tensor Index
+ // 17 -> Projection Bias Tensor Index
+ // 18 -> Output State In Tensor Index
+ // 19 -> Cell State In Tensor Index
+ OperandIndexSequence inputs;
+ for (uint32_t n = 0; n < init_param.input_count - 3; ++n)
+ {
+ inputs.append(OperandIndex{init_param.inputs[n]});
+ }
+
+ // Each output should be interpreted as follows:
+ //
+ // 0 -> Scratch Buffer Tensor Index
+ // 1 -> Output State Out Tensor Index
+ // 2 -> Cell State Out Tensor Index
+ // 3 -> Output Tensor Index
+ OperandIndexSequence outputs;
+ for (uint32_t n = 0; n < init_param.output_count; ++n)
+ {
+ outputs.append(OperandIndex{init_param.outputs[n]});
+ }
+
+ operation::LSTM::Param param;
+ const auto activation_index = OperandIndex{init_param.inputs[20]};
+ switch (operands.at(activation_index).asScalar<int32_t>())
+ {
+ case 0:
+ param.activation = Activation::NONE;
+ break;
+ case 1:
+ param.activation = Activation::RELU;
+ break;
+ case 2:
+ param.activation = Activation::RELU1;
+ break;
+ case 3:
+ param.activation = Activation::RELU6;
+ break;
+ case 4:
+ param.activation = Activation::TANH;
+ break;
+ case 6:
+ param.activation = Activation::SIGMOID;
+ break;
+ default:
+ throw std::runtime_error("Unsupported activation type");
+ break;
+ }
+ param.cell_threshold = operands.at(OperandIndex{init_param.inputs[21]}).asScalar<float>();
+ param.projection_threshold = operands.at(OperandIndex{init_param.inputs[22]}).asScalar<float>();
+
+ return new operation::LSTM{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input0 Tensor Index
+ // 1 -> input1 Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ operation::Comparison::Param param;
+ param.comparison_type = operation::Comparison::ComparisonType::Equal;
+
+ return new operation::Comparison{inputs, outputs, param};
+ };
+
+ // ANEURALNETWORKS_EQUAL_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_EQUAL_EX
+ _map[ANEURALNETWORKS_EQUAL_EX] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input0 Tensor Index
+ // 1 -> input1 Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ operation::Comparison::Param param;
+ param.comparison_type = operation::Comparison::ComparisonType::Equal;
+
+ // Output operand type must be boolean
+ replaceDataType(operands, outputs.at(0), DataType::BOOL8);
+
+ return new operation::Comparison{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_SQUARED_DIFFERENCE_EX] = [](const OperationFactory::Param &init_param,
+ Operands &) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> LHS Tensor Index
+ // 1 -> RHS Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ return new operation::SquaredDifference{inputs, outputs};
+ };
+
+ _map[ANEURALNETWORKS_TOPK_V2] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 2 && init_param.output_count == 2);
+
+ // Each output should be interpreted as follows:
+ //
+ // 0 -> Index for Output Values
+ // 1 -> Index for Output Indices
+ OperandIndexSequence outputs{init_param.outputs[0], init_param.outputs[1]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Index for Input Data
+ // 1 -> Index for K
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ operation::TopKV2::Param param;
+ param.k = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
+
+ return new operation::TopKV2{inputs, outputs, param};
+ };
+
+ // ANEURALNETWORKS_CAST_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_CAST_EX
+ _map[ANEURALNETWORKS_TOPK_V2_EX] = _map[ANEURALNETWORKS_TOPK_V2];
+
+ _map[ANEURALNETWORKS_GATHER] = [](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 3 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input Tensor Index
+ // 1 -> axis Index
+ // 2 -> indices Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[2]};
+
+ operation::Gather::Param param;
+ param.axis = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<int32_t>();
+ param.rank = operands.at(inputs.at(0)).shape().rank();
+
+ return new operation::Gather{inputs, outputs, param};
+ };
+
+ // ANEURALNETWORKS_GATHER_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_GATHER_EX
+ _map[ANEURALNETWORKS_GATHER_EX] = _map[ANEURALNETWORKS_GATHER];
+
+ _map[ANEURALNETWORKS_NEG] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ return new operation::Neg{inputs, outputs};
+ };
+
+ // ANEURALNETWORKS_NEG_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_NEG_EX
+ _map[ANEURALNETWORKS_NEG_EX] = _map[ANEURALNETWORKS_NEG];
+
+ _map[ANEURALNETWORKS_ABS] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ return new operation::Abs{inputs, outputs};
+ };
+
+ // ANEURALNETWORKS_ABS_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_ABS_EX
+ _map[ANEURALNETWORKS_ABS_EX] = _map[ANEURALNETWORKS_ABS];
+
+ _map[ANEURALNETWORKS_ARGMAX] = [](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Axis Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ operation::ArgMax::Param param;
+ param.axis = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
+ param.rank = operands.at(inputs.at(0)).shape().rank();
+
+ return new operation::ArgMax{inputs, outputs, param};
+ };
+
+ // ANEURALNETWORKS_ARGMAX_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_ARGMAX_EX
+ _map[ANEURALNETWORKS_ARGMAX_EX] = _map[ANEURALNETWORKS_ARGMAX];
+
+ _map[ANEURALNETWORKS_DEQUANTIZE] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ return new operation::Dequantize{inputs, outputs};
+ };
+
+ _map[ANEURALNETWORKS_MEAN] = [](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 3 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> ifm Tensor Index
+ // 1 -> axis Tensor Index
+ // 2 -> keep_dims Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ std::vector<std::int32_t> axes =
+ operands.at(OperandIndex{init_param.inputs[1]}).asVector<std::int32_t>();
+
+ operation::Mean::Param param;
+ param.axes.assign(axes.cbegin(), axes.cend());
+ param.keep_dims = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<int32_t>() != 0;
+ param.rank = operands.at(inputs.at(0)).shape().rank();
+
+ return new operation::Mean{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 5 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ operation::LocalResponseNormalization::Param param;
+ param.radius = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
+ param.bias = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<float>();
+ param.alpha = operands.at(OperandIndex{init_param.inputs[3]}).asScalar<float>();
+ param.beta = operands.at(OperandIndex{init_param.inputs[4]}).asScalar<float>();
+
+ return new operation::LocalResponseNormalization{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_DEPTH_TO_SPACE] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Block size Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ operation::DepthToSpace::Param param;
+ param.block_size = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
+
+ return new operation::DepthToSpace{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_PACK_EX] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count >= 3 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+ OperandIndexSequence inputs;
+ for (uint32_t n = 0; n < init_param.input_count - 2; ++n)
+ {
+ inputs.append(OperandIndex{init_param.inputs[n]});
+ }
+
+ operation::Pack::Param param;
+ const auto num_index = OperandIndex{init_param.inputs[init_param.input_count - 2]};
+ const auto axis_index = OperandIndex{init_param.inputs[init_param.input_count - 1]};
+ param.num = operands.at(num_index).asScalar<int32_t>();
+ param.axis = operands.at(axis_index).asScalar<int32_t>();
+ param.rank = operands.at(outputs.at(0)).shape().rank();
+
+ return new operation::Pack{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_REDUCE_MIN] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 3 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Axis Tensor Index
+ // 2 -> keep_dims Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ std::vector<std::int32_t> axes =
+ operands.at(OperandIndex{init_param.inputs[1]}).asVector<std::int32_t>();
+
+ operation::ReduceMin::Param param;
+ param.axes.assign(axes.cbegin(), axes.cend());
+ param.keep_dims = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<int8_t>() != 0;
+ param.rank = operands.at(inputs.at(0)).shape().rank();
+
+ return new operation::ReduceMin{inputs, outputs, param};
+ };
+
+ // ANEURALNETWORKS_REDUCE_MIN_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_REDUCE_MIN_EX
+ _map[ANEURALNETWORKS_REDUCE_MIN_EX] = _map[ANEURALNETWORKS_REDUCE_MIN];
+
+ _map[ANEURALNETWORKS_SPLIT] = [](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 3);
+ assert(init_param.output_count >= 1); // At least one output tensor and axis
+
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs;
+ for (uint32_t n = 0; n < init_param.output_count; ++n)
+ {
+ outputs.append(OperandIndex{init_param.outputs[n]});
+ }
+
+ operation::Split::Param param;
+ param.axis = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
+ param.num_splits = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<std::int32_t>();
+ param.rank = operands.at(inputs.at(0)).shape().rank();
+
+ return new operation::Split{inputs, outputs, param};
+ };
+
+ // ANEURALNETWORKS_SPLIT_EX is deprecated
+ // TODO Remove ANEURALNETWORKS_SPLIT_EX
+ _map[ANEURALNETWORKS_SPLIT_EX] = _map[ANEURALNETWORKS_SPLIT];
+
+ _map[ANEURALNETWORKS_UNPACK_EX] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 3 && init_param.output_count >= 1);
+
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs;
+ for (uint32_t n = 0; n < init_param.output_count; ++n)
+ {
+ outputs.append(OperandIndex{init_param.outputs[n]});
+ }
+
+ operation::Unpack::Param param;
+ const auto num_index = OperandIndex{init_param.inputs[1]};
+ const auto axis_index = OperandIndex{init_param.inputs[2]};
+ param.num = operands.at(num_index).asScalar<int32_t>();
+ param.axis = operands.at(axis_index).asScalar<int32_t>();
+ param.rank = operands.at(inputs.at(0)).shape().rank();
+
+ return new operation::Unpack{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_PAD] = [](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 2 && init_param.output_count >= 1);
+
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::Pad::Param param;
+ param.rank = operands.at(inputs.at(0)).shape().rank();
+
+ return new operation::Pad{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_MINIMUM] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ return new operation::Min{inputs, outputs};
+ };
+
+ _map[ANEURALNETWORKS_MAXIMUM] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ return new operation::Max{inputs, outputs};
+ };
+
+ _map[ANEURALNETWORKS_ONE_HOT_EX] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 5);
+ assert(init_param.output_count == 1);
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> indices tensor
+ // 1 -> depth scalar
+ // 2 -> on_value scalar
+ // 3 -> off_value scalar
+ // 4 -> axis scalar
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::OneHot::Param param;
+ param.depth = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
+ param.on_value = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<float>();
+ param.off_value = operands.at(OperandIndex{init_param.inputs[3]}).asScalar<float>();
+ param.axis = operands.at(OperandIndex{init_param.inputs[4]}).asScalar<std::int32_t>();
+
+ return new operation::OneHot{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_SIN] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ return new operation::Sin{inputs, outputs};
+ };
+
+ _map[ANEURALNETWORKS_SHAPE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ return new operation::Shape{inputs, outputs};
+ };
+}
+
+Operation *OperationFactory::create(ANeuralNetworksOperationType type,
+ const OperationFactory::Param &param, Operands &operands)
+{
+ auto it = _map.find(type);
+ if (it == _map.end())
+ {
+ throw std::runtime_error("Unsupported operation type: " + std::to_string(type));
+ }
+ return it->second(param, operands);
+}
diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h
new file mode 100644
index 000000000..367cf74db
--- /dev/null
+++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OPERATION_FACTORY_H__
+#define __OPERATION_FACTORY_H__
+
+#include <unordered_map>
+
+#include "ir/Operands.h"
+#include "ir/Operation.h"
+#include "NeuralNetworks.h"
+#include "NeuralNetworksEx.h"
+
+/**
+ * @brief A class to create a onert operation object from NN API input parameters
+ */
+class OperationFactory
+{
+public:
+ struct Param
+ {
+ uint32_t input_count;
+ const uint32_t *inputs;
+ uint32_t output_count;
+ const uint32_t *outputs;
+ };
+
+public:
+ using Generator =
+ std::function<onert::ir::Operation *(const OperationFactory::Param &, onert::ir::Operands &)>;
+
+public:
+ static OperationFactory &get();
+
+private:
+ OperationFactory();
+
+public:
+ onert::ir::Operation *create(ANeuralNetworksOperationType, const OperationFactory::Param &param,
+ onert::ir::Operands &operands);
+ // TODO add "register" method for separating registration, possibly supporting custom-ops
+
+private:
+ std::unordered_map<ANeuralNetworksOperationType, Generator> _map;
+};
+
+#endif // __OPERATION_FACTORY_H__
diff --git a/runtime/onert/frontend/tflite/CMakeLists.txt b/runtime/onert/frontend/tflite/CMakeLists.txt
new file mode 100644
index 000000000..229f04f32
--- /dev/null
+++ b/runtime/onert/frontend/tflite/CMakeLists.txt
@@ -0,0 +1,17 @@
+if(NOT BUILD_TFLITE_LOADER)
+ return()
+endif(NOT BUILD_TFLITE_LOADER)
+
+nnfw_find_package(FlatBuffersSource REQUIRED)
+
+set(TFLITE_LOADER_SOURCES src/tflite_loader.cc)
+
+add_library(tflite_loader SHARED ${TFLITE_LOADER_SOURCES})
+
+target_include_directories(tflite_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+target_include_directories(tflite_loader PRIVATE ${FlatBuffersSource_DIR}/include)
+
+target_link_libraries(tflite_loader PUBLIC onert_core)
+target_link_libraries(tflite_loader PRIVATE base_loader nnfw_common nnfw_coverage)
+
+install(TARGETS tflite_loader DESTINATION lib)
diff --git a/runtime/onert/frontend/tflite/include/tflite_loader.h b/runtime/onert/frontend/tflite/include/tflite_loader.h
new file mode 100644
index 000000000..d1816d47a
--- /dev/null
+++ b/runtime/onert/frontend/tflite/include/tflite_loader.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_TFLITE_LOADER_H__
+#define __TFLITE_TFLITE_LOADER_H__
+
+#include "ir/Graph.h"
+
+#include <memory>
+
+namespace onert
+{
+namespace tflite_loader
+{
+
+std::unique_ptr<ir::Graph> loadModel(const char *filename);
+
+} // namespace tflite_loader
+} // namespace onert
+
+#endif // __TFLITE_TFLITE_LOADER_H__
diff --git a/runtime/onert/frontend/tflite/src/tflite_loader.cc b/runtime/onert/frontend/tflite/src/tflite_loader.cc
new file mode 100644
index 000000000..7ede4441a
--- /dev/null
+++ b/runtime/onert/frontend/tflite/src/tflite_loader.cc
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflite_loader.h"
+#include "base_loader.h"
+#include "tflite_schema_generated.h"
+
+namespace onert
+{
+namespace tflite_loader
+{
+
+namespace
+{
+
+struct LoaderDomain
+{
+ using Verifier = flatbuffers::Verifier;
+ using ActivationFunctionType = onert_tflite::ActivationFunctionType;
+ using Buffer = onert_tflite::Buffer;
+ using BuiltinOperator = onert_tflite::BuiltinOperator;
+ using CustomOptionsFormat = onert_tflite::CustomOptionsFormat;
+ using Model = onert_tflite::Model;
+ using Operator = onert_tflite::Operator;
+ using Padding = onert_tflite::Padding;
+ using Pool2DOptions = onert_tflite::Pool2DOptions;
+ using Tensor = onert_tflite::Tensor;
+ using TensorType = onert_tflite::TensorType;
+ using SubGraph = onert_tflite::SubGraph;
+
+ static const char *EnumNameBuiltinOperator(BuiltinOperator e)
+ {
+ return onert_tflite::EnumNameBuiltinOperator(e);
+ }
+ static const char *EnumNameActivationFunctionType(ActivationFunctionType e)
+ {
+ return onert_tflite::EnumNameActivationFunctionType(e);
+ }
+ static const char *EnumNameTensorType(TensorType e)
+ {
+ return onert_tflite::EnumNameTensorType(e);
+ }
+ static const Model *GetModel(const void *buf) { return onert_tflite::GetModel(buf); }
+ static bool VerifyModelBuffer(Verifier &verifier)
+ {
+ return onert_tflite::VerifyModelBuffer(verifier);
+ }
+};
+
+class TFLiteLoader final : public base_loader::BaseLoader<LoaderDomain, TFLiteLoader>
+{
+public:
+ using BaseLoader::BaseLoader;
+
+ std::unique_ptr<ir::Graph> loadSubgraph(const onert_tflite::SubGraph *tflite_subg)
+ {
+ auto subg = std::make_unique<ir::Graph>();
+ // Load tensors
+ _tensor_to_operand.resize(tflite_subg->tensors()->size());
+ for (flatbuffers::uoffset_t i = 0; i < tflite_subg->tensors()->size(); ++i)
+ {
+ _tensor_to_operand[i] = loadOperand(tflite_subg->tensors()->Get(i), *subg);
+ }
+ // Set inputs
+ for (const std::int32_t input_ind : *tflite_subg->inputs())
+ {
+ subg->addInput(_tensor_to_operand[input_ind]);
+ }
+ // Set outputs
+ for (const std::int32_t output_ind : *tflite_subg->outputs())
+ {
+ subg->addOutput(_tensor_to_operand[output_ind]);
+ }
+ // Create operations
+ for (const auto *op : *tflite_subg->operators())
+ {
+ loadOperation(op, *subg);
+ }
+
+ subg->finishBuilding();
+
+ return subg;
+ }
+};
+
+} // namespace
+
+std::unique_ptr<ir::Graph> loadModel(const char *filename)
+{
+ auto primary_subgraph = std::make_unique<ir::Graph>();
+ TFLiteLoader loader(primary_subgraph);
+ loader.loadFromFile(filename);
+ return primary_subgraph;
+}
+
+} // namespace tflite_loader
+} // namespace onert
diff --git a/runtime/onert/frontend/tflite/src/tflite_schema_generated.h b/runtime/onert/frontend/tflite/src/tflite_schema_generated.h
new file mode 100644
index 000000000..c6e9147cd
--- /dev/null
+++ b/runtime/onert/frontend/tflite/src/tflite_schema_generated.h
@@ -0,0 +1,9553 @@
+/*
+ * Copyright (c) 2019-2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// automatically generated by the FlatBuffers compiler, do not modify
+
+#ifndef FLATBUFFERS_GENERATED_TFLITESCHEMA_ONERT_TFLITE_H_
+#define FLATBUFFERS_GENERATED_TFLITESCHEMA_ONERT_TFLITE_H_
+
+#include "flatbuffers/flatbuffers.h"
+
+namespace onert_tflite
+{
+
+struct CustomQuantization;
+
+struct QuantizationParameters;
+
+struct Int32Vector;
+
+struct Uint16Vector;
+
+struct Uint8Vector;
+
+struct DimensionMetadata;
+
+struct SparsityParameters;
+
+struct Tensor;
+
+struct Conv2DOptions;
+
+struct Pool2DOptions;
+
+struct DepthwiseConv2DOptions;
+
+struct ConcatEmbeddingsOptions;
+
+struct LSHProjectionOptions;
+
+struct SVDFOptions;
+
+struct RNNOptions;
+
+struct SequenceRNNOptions;
+
+struct BidirectionalSequenceRNNOptions;
+
+struct FullyConnectedOptions;
+
+struct SoftmaxOptions;
+
+struct ConcatenationOptions;
+
+struct AddOptions;
+
+struct MulOptions;
+
+struct L2NormOptions;
+
+struct LocalResponseNormalizationOptions;
+
+struct LSTMOptions;
+
+struct UnidirectionalSequenceLSTMOptions;
+
+struct BidirectionalSequenceLSTMOptions;
+
+struct ResizeBilinearOptions;
+
+struct ResizeNearestNeighborOptions;
+
+struct CallOptions;
+
+struct PadOptions;
+
+struct PadV2Options;
+
+struct ReshapeOptions;
+
+struct SpaceToBatchNDOptions;
+
+struct BatchToSpaceNDOptions;
+
+struct SkipGramOptions;
+
+struct SpaceToDepthOptions;
+
+struct DepthToSpaceOptions;
+
+struct SubOptions;
+
+struct DivOptions;
+
+struct TopKV2Options;
+
+struct EmbeddingLookupSparseOptions;
+
+struct GatherOptions;
+
+struct TransposeOptions;
+
+struct ExpOptions;
+
+struct CosOptions;
+
+struct ReducerOptions;
+
+struct SqueezeOptions;
+
+struct SplitOptions;
+
+struct SplitVOptions;
+
+struct StridedSliceOptions;
+
+struct LogSoftmaxOptions;
+
+struct CastOptions;
+
+struct DequantizeOptions;
+
+struct MaximumMinimumOptions;
+
+struct TileOptions;
+
+struct ArgMaxOptions;
+
+struct ArgMinOptions;
+
+struct GreaterOptions;
+
+struct GreaterEqualOptions;
+
+struct LessOptions;
+
+struct LessEqualOptions;
+
+struct NegOptions;
+
+struct SelectOptions;
+
+struct SliceOptions;
+
+struct TransposeConvOptions;
+
+struct ExpandDimsOptions;
+
+struct SparseToDenseOptions;
+
+struct EqualOptions;
+
+struct NotEqualOptions;
+
+struct ShapeOptions;
+
+struct RankOptions;
+
+struct PowOptions;
+
+struct FakeQuantOptions;
+
+struct PackOptions;
+
+struct LogicalOrOptions;
+
+struct OneHotOptions;
+
+struct AbsOptions;
+
+struct HardSwishOptions;
+
+struct LogicalAndOptions;
+
+struct LogicalNotOptions;
+
+struct UnpackOptions;
+
+struct FloorDivOptions;
+
+struct SquareOptions;
+
+struct ZerosLikeOptions;
+
+struct FillOptions;
+
+struct FloorModOptions;
+
+struct RangeOptions;
+
+struct LeakyReluOptions;
+
+struct SquaredDifferenceOptions;
+
+struct MirrorPadOptions;
+
+struct UniqueOptions;
+
+struct ReverseV2Options;
+
+struct AddNOptions;
+
+struct GatherNdOptions;
+
+struct WhereOptions;
+
+struct ReverseSequenceOptions;
+
+struct MatrixDiagOptions;
+
+struct QuantizeOptions;
+
+struct MatrixSetDiagOptions;
+
+struct IfOptions;
+
+struct WhileOptions;
+
+struct NonMaxSuppressionV4Options;
+
+struct NonMaxSuppressionV5Options;
+
+struct ScatterNdOptions;
+
+struct SelectV2Options;
+
+struct DensifyOptions;
+
+struct SegmentSumOptions;
+
+struct BatchMatMulOptions;
+
+struct OperatorCode;
+
+struct Operator;
+
+struct SubGraph;
+
+struct Buffer;
+
+struct Metadata;
+
+struct Model;
+
+enum TensorType
+{
+ TensorType_FLOAT32 = 0,
+ TensorType_FLOAT16 = 1,
+ TensorType_INT32 = 2,
+ TensorType_UINT8 = 3,
+ TensorType_INT64 = 4,
+ TensorType_STRING = 5,
+ TensorType_BOOL = 6,
+ TensorType_INT16 = 7,
+ TensorType_COMPLEX64 = 8,
+ TensorType_INT8 = 9,
+ TensorType_FLOAT64 = 10,
+ TensorType_MIN = TensorType_FLOAT32,
+ TensorType_MAX = TensorType_FLOAT64
+};
+
+inline const TensorType (&EnumValuesTensorType())[11]
+{
+ static const TensorType values[] = {TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32,
+ TensorType_UINT8, TensorType_INT64, TensorType_STRING,
+ TensorType_BOOL, TensorType_INT16, TensorType_COMPLEX64,
+ TensorType_INT8, TensorType_FLOAT64};
+ return values;
+}
+
+inline const char *const *EnumNamesTensorType()
+{
+ static const char *const names[] = {"FLOAT32", "FLOAT16", "INT32", "UINT8",
+ "INT64", "STRING", "BOOL", "INT16",
+ "COMPLEX64", "INT8", "FLOAT64", nullptr};
+ return names;
+}
+
+inline const char *EnumNameTensorType(TensorType e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesTensorType()[index];
+}
+
+enum QuantizationDetails
+{
+ QuantizationDetails_NONE = 0,
+ QuantizationDetails_CustomQuantization = 1,
+ QuantizationDetails_MIN = QuantizationDetails_NONE,
+ QuantizationDetails_MAX = QuantizationDetails_CustomQuantization
+};
+
+inline const QuantizationDetails (&EnumValuesQuantizationDetails())[2]
+{
+ static const QuantizationDetails values[] = {QuantizationDetails_NONE,
+ QuantizationDetails_CustomQuantization};
+ return values;
+}
+
+inline const char *const *EnumNamesQuantizationDetails()
+{
+ static const char *const names[] = {"NONE", "CustomQuantization", nullptr};
+ return names;
+}
+
+inline const char *EnumNameQuantizationDetails(QuantizationDetails e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesQuantizationDetails()[index];
+}
+
+template <typename T> struct QuantizationDetailsTraits
+{
+ static const QuantizationDetails enum_value = QuantizationDetails_NONE;
+};
+
+template <> struct QuantizationDetailsTraits<CustomQuantization>
+{
+ static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization;
+};
+
+bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj,
+ QuantizationDetails type);
+bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier,
+ const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+ const flatbuffers::Vector<uint8_t> *types);
+
+enum DimensionType
+{
+ DimensionType_DENSE = 0,
+ DimensionType_SPARSE_CSR = 1,
+ DimensionType_MIN = DimensionType_DENSE,
+ DimensionType_MAX = DimensionType_SPARSE_CSR
+};
+
+inline const DimensionType (&EnumValuesDimensionType())[2]
+{
+ static const DimensionType values[] = {DimensionType_DENSE, DimensionType_SPARSE_CSR};
+ return values;
+}
+
+inline const char *const *EnumNamesDimensionType()
+{
+ static const char *const names[] = {"DENSE", "SPARSE_CSR", nullptr};
+ return names;
+}
+
+inline const char *EnumNameDimensionType(DimensionType e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesDimensionType()[index];
+}
+
+enum SparseIndexVector
+{
+ SparseIndexVector_NONE = 0,
+ SparseIndexVector_Int32Vector = 1,
+ SparseIndexVector_Uint16Vector = 2,
+ SparseIndexVector_Uint8Vector = 3,
+ SparseIndexVector_MIN = SparseIndexVector_NONE,
+ SparseIndexVector_MAX = SparseIndexVector_Uint8Vector
+};
+
+inline const SparseIndexVector (&EnumValuesSparseIndexVector())[4]
+{
+ static const SparseIndexVector values[] = {SparseIndexVector_NONE, SparseIndexVector_Int32Vector,
+ SparseIndexVector_Uint16Vector,
+ SparseIndexVector_Uint8Vector};
+ return values;
+}
+
+inline const char *const *EnumNamesSparseIndexVector()
+{
+ static const char *const names[] = {"NONE", "Int32Vector", "Uint16Vector", "Uint8Vector",
+ nullptr};
+ return names;
+}
+
+inline const char *EnumNameSparseIndexVector(SparseIndexVector e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesSparseIndexVector()[index];
+}
+
+template <typename T> struct SparseIndexVectorTraits
+{
+ static const SparseIndexVector enum_value = SparseIndexVector_NONE;
+};
+
+template <> struct SparseIndexVectorTraits<Int32Vector>
+{
+ static const SparseIndexVector enum_value = SparseIndexVector_Int32Vector;
+};
+
+template <> struct SparseIndexVectorTraits<Uint16Vector>
+{
+ static const SparseIndexVector enum_value = SparseIndexVector_Uint16Vector;
+};
+
+template <> struct SparseIndexVectorTraits<Uint8Vector>
+{
+ static const SparseIndexVector enum_value = SparseIndexVector_Uint8Vector;
+};
+
+bool VerifySparseIndexVector(flatbuffers::Verifier &verifier, const void *obj,
+ SparseIndexVector type);
+bool VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier,
+ const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+ const flatbuffers::Vector<uint8_t> *types);
+
+enum BuiltinOperator
+{
+ BuiltinOperator_ADD = 0,
+ BuiltinOperator_AVERAGE_POOL_2D = 1,
+ BuiltinOperator_CONCATENATION = 2,
+ BuiltinOperator_CONV_2D = 3,
+ BuiltinOperator_DEPTHWISE_CONV_2D = 4,
+ BuiltinOperator_DEPTH_TO_SPACE = 5,
+ BuiltinOperator_DEQUANTIZE = 6,
+ BuiltinOperator_EMBEDDING_LOOKUP = 7,
+ BuiltinOperator_FLOOR = 8,
+ BuiltinOperator_FULLY_CONNECTED = 9,
+ BuiltinOperator_HASHTABLE_LOOKUP = 10,
+ BuiltinOperator_L2_NORMALIZATION = 11,
+ BuiltinOperator_L2_POOL_2D = 12,
+ BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION = 13,
+ BuiltinOperator_LOGISTIC = 14,
+ BuiltinOperator_LSH_PROJECTION = 15,
+ BuiltinOperator_LSTM = 16,
+ BuiltinOperator_MAX_POOL_2D = 17,
+ BuiltinOperator_MUL = 18,
+ BuiltinOperator_RELU = 19,
+ BuiltinOperator_RELU_N1_TO_1 = 20,
+ BuiltinOperator_RELU6 = 21,
+ BuiltinOperator_RESHAPE = 22,
+ BuiltinOperator_RESIZE_BILINEAR = 23,
+ BuiltinOperator_RNN = 24,
+ BuiltinOperator_SOFTMAX = 25,
+ BuiltinOperator_SPACE_TO_DEPTH = 26,
+ BuiltinOperator_SVDF = 27,
+ BuiltinOperator_TANH = 28,
+ BuiltinOperator_CONCAT_EMBEDDINGS = 29,
+ BuiltinOperator_SKIP_GRAM = 30,
+ BuiltinOperator_CALL = 31,
+ BuiltinOperator_CUSTOM = 32,
+ BuiltinOperator_EMBEDDING_LOOKUP_SPARSE = 33,
+ BuiltinOperator_PAD = 34,
+ BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+ BuiltinOperator_GATHER = 36,
+ BuiltinOperator_BATCH_TO_SPACE_ND = 37,
+ BuiltinOperator_SPACE_TO_BATCH_ND = 38,
+ BuiltinOperator_TRANSPOSE = 39,
+ BuiltinOperator_MEAN = 40,
+ BuiltinOperator_SUB = 41,
+ BuiltinOperator_DIV = 42,
+ BuiltinOperator_SQUEEZE = 43,
+ BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+ BuiltinOperator_STRIDED_SLICE = 45,
+ BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN = 46,
+ BuiltinOperator_EXP = 47,
+ BuiltinOperator_TOPK_V2 = 48,
+ BuiltinOperator_SPLIT = 49,
+ BuiltinOperator_LOG_SOFTMAX = 50,
+ BuiltinOperator_DELEGATE = 51,
+ BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+ BuiltinOperator_CAST = 53,
+ BuiltinOperator_PRELU = 54,
+ BuiltinOperator_MAXIMUM = 55,
+ BuiltinOperator_ARG_MAX = 56,
+ BuiltinOperator_MINIMUM = 57,
+ BuiltinOperator_LESS = 58,
+ BuiltinOperator_NEG = 59,
+ BuiltinOperator_PADV2 = 60,
+ BuiltinOperator_GREATER = 61,
+ BuiltinOperator_GREATER_EQUAL = 62,
+ BuiltinOperator_LESS_EQUAL = 63,
+ BuiltinOperator_SELECT = 64,
+ BuiltinOperator_SLICE = 65,
+ BuiltinOperator_SIN = 66,
+ BuiltinOperator_TRANSPOSE_CONV = 67,
+ BuiltinOperator_SPARSE_TO_DENSE = 68,
+ BuiltinOperator_TILE = 69,
+ BuiltinOperator_EXPAND_DIMS = 70,
+ BuiltinOperator_EQUAL = 71,
+ BuiltinOperator_NOT_EQUAL = 72,
+ BuiltinOperator_LOG = 73,
+ BuiltinOperator_SUM = 74,
+ BuiltinOperator_SQRT = 75,
+ BuiltinOperator_RSQRT = 76,
+ BuiltinOperator_SHAPE = 77,
+ BuiltinOperator_POW = 78,
+ BuiltinOperator_ARG_MIN = 79,
+ BuiltinOperator_FAKE_QUANT = 80,
+ BuiltinOperator_REDUCE_PROD = 81,
+ BuiltinOperator_REDUCE_MAX = 82,
+ BuiltinOperator_PACK = 83,
+ BuiltinOperator_LOGICAL_OR = 84,
+ BuiltinOperator_ONE_HOT = 85,
+ BuiltinOperator_LOGICAL_AND = 86,
+ BuiltinOperator_LOGICAL_NOT = 87,
+ BuiltinOperator_UNPACK = 88,
+ BuiltinOperator_REDUCE_MIN = 89,
+ BuiltinOperator_FLOOR_DIV = 90,
+ BuiltinOperator_REDUCE_ANY = 91,
+ BuiltinOperator_SQUARE = 92,
+ BuiltinOperator_ZEROS_LIKE = 93,
+ BuiltinOperator_FILL = 94,
+ BuiltinOperator_FLOOR_MOD = 95,
+ BuiltinOperator_RANGE = 96,
+ BuiltinOperator_RESIZE_NEAREST_NEIGHBOR = 97,
+ BuiltinOperator_LEAKY_RELU = 98,
+ BuiltinOperator_SQUARED_DIFFERENCE = 99,
+ BuiltinOperator_MIRROR_PAD = 100,
+ BuiltinOperator_ABS = 101,
+ BuiltinOperator_SPLIT_V = 102,
+ BuiltinOperator_UNIQUE = 103,
+ BuiltinOperator_CEIL = 104,
+ BuiltinOperator_REVERSE_V2 = 105,
+ BuiltinOperator_ADD_N = 106,
+ BuiltinOperator_GATHER_ND = 107,
+ BuiltinOperator_COS = 108,
+ BuiltinOperator_WHERE = 109,
+ BuiltinOperator_RANK = 110,
+ BuiltinOperator_ELU = 111,
+ BuiltinOperator_REVERSE_SEQUENCE = 112,
+ BuiltinOperator_MATRIX_DIAG = 113,
+ BuiltinOperator_QUANTIZE = 114,
+ BuiltinOperator_MATRIX_SET_DIAG = 115,
+ BuiltinOperator_ROUND = 116,
+ BuiltinOperator_HARD_SWISH = 117,
+ BuiltinOperator_IF = 118,
+ BuiltinOperator_WHILE = 119,
+ BuiltinOperator_NON_MAX_SUPPRESSION_V4 = 120,
+ BuiltinOperator_NON_MAX_SUPPRESSION_V5 = 121,
+ BuiltinOperator_SCATTER_ND = 122,
+ BuiltinOperator_SELECT_V2 = 123,
+ BuiltinOperator_DENSIFY = 124,
+ BuiltinOperator_SEGMENT_SUM = 125,
+ BuiltinOperator_BATCH_MATMUL = 126,
+ BuiltinOperator_MIN = BuiltinOperator_ADD,
+ BuiltinOperator_MAX = BuiltinOperator_BATCH_MATMUL
+};
+
+inline const BuiltinOperator (&EnumValuesBuiltinOperator())[127]
+{
+ static const BuiltinOperator values[] = {BuiltinOperator_ADD,
+ BuiltinOperator_AVERAGE_POOL_2D,
+ BuiltinOperator_CONCATENATION,
+ BuiltinOperator_CONV_2D,
+ BuiltinOperator_DEPTHWISE_CONV_2D,
+ BuiltinOperator_DEPTH_TO_SPACE,
+ BuiltinOperator_DEQUANTIZE,
+ BuiltinOperator_EMBEDDING_LOOKUP,
+ BuiltinOperator_FLOOR,
+ BuiltinOperator_FULLY_CONNECTED,
+ BuiltinOperator_HASHTABLE_LOOKUP,
+ BuiltinOperator_L2_NORMALIZATION,
+ BuiltinOperator_L2_POOL_2D,
+ BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
+ BuiltinOperator_LOGISTIC,
+ BuiltinOperator_LSH_PROJECTION,
+ BuiltinOperator_LSTM,
+ BuiltinOperator_MAX_POOL_2D,
+ BuiltinOperator_MUL,
+ BuiltinOperator_RELU,
+ BuiltinOperator_RELU_N1_TO_1,
+ BuiltinOperator_RELU6,
+ BuiltinOperator_RESHAPE,
+ BuiltinOperator_RESIZE_BILINEAR,
+ BuiltinOperator_RNN,
+ BuiltinOperator_SOFTMAX,
+ BuiltinOperator_SPACE_TO_DEPTH,
+ BuiltinOperator_SVDF,
+ BuiltinOperator_TANH,
+ BuiltinOperator_CONCAT_EMBEDDINGS,
+ BuiltinOperator_SKIP_GRAM,
+ BuiltinOperator_CALL,
+ BuiltinOperator_CUSTOM,
+ BuiltinOperator_EMBEDDING_LOOKUP_SPARSE,
+ BuiltinOperator_PAD,
+ BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN,
+ BuiltinOperator_GATHER,
+ BuiltinOperator_BATCH_TO_SPACE_ND,
+ BuiltinOperator_SPACE_TO_BATCH_ND,
+ BuiltinOperator_TRANSPOSE,
+ BuiltinOperator_MEAN,
+ BuiltinOperator_SUB,
+ BuiltinOperator_DIV,
+ BuiltinOperator_SQUEEZE,
+ BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
+ BuiltinOperator_STRIDED_SLICE,
+ BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN,
+ BuiltinOperator_EXP,
+ BuiltinOperator_TOPK_V2,
+ BuiltinOperator_SPLIT,
+ BuiltinOperator_LOG_SOFTMAX,
+ BuiltinOperator_DELEGATE,
+ BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM,
+ BuiltinOperator_CAST,
+ BuiltinOperator_PRELU,
+ BuiltinOperator_MAXIMUM,
+ BuiltinOperator_ARG_MAX,
+ BuiltinOperator_MINIMUM,
+ BuiltinOperator_LESS,
+ BuiltinOperator_NEG,
+ BuiltinOperator_PADV2,
+ BuiltinOperator_GREATER,
+ BuiltinOperator_GREATER_EQUAL,
+ BuiltinOperator_LESS_EQUAL,
+ BuiltinOperator_SELECT,
+ BuiltinOperator_SLICE,
+ BuiltinOperator_SIN,
+ BuiltinOperator_TRANSPOSE_CONV,
+ BuiltinOperator_SPARSE_TO_DENSE,
+ BuiltinOperator_TILE,
+ BuiltinOperator_EXPAND_DIMS,
+ BuiltinOperator_EQUAL,
+ BuiltinOperator_NOT_EQUAL,
+ BuiltinOperator_LOG,
+ BuiltinOperator_SUM,
+ BuiltinOperator_SQRT,
+ BuiltinOperator_RSQRT,
+ BuiltinOperator_SHAPE,
+ BuiltinOperator_POW,
+ BuiltinOperator_ARG_MIN,
+ BuiltinOperator_FAKE_QUANT,
+ BuiltinOperator_REDUCE_PROD,
+ BuiltinOperator_REDUCE_MAX,
+ BuiltinOperator_PACK,
+ BuiltinOperator_LOGICAL_OR,
+ BuiltinOperator_ONE_HOT,
+ BuiltinOperator_LOGICAL_AND,
+ BuiltinOperator_LOGICAL_NOT,
+ BuiltinOperator_UNPACK,
+ BuiltinOperator_REDUCE_MIN,
+ BuiltinOperator_FLOOR_DIV,
+ BuiltinOperator_REDUCE_ANY,
+ BuiltinOperator_SQUARE,
+ BuiltinOperator_ZEROS_LIKE,
+ BuiltinOperator_FILL,
+ BuiltinOperator_FLOOR_MOD,
+ BuiltinOperator_RANGE,
+ BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
+ BuiltinOperator_LEAKY_RELU,
+ BuiltinOperator_SQUARED_DIFFERENCE,
+ BuiltinOperator_MIRROR_PAD,
+ BuiltinOperator_ABS,
+ BuiltinOperator_SPLIT_V,
+ BuiltinOperator_UNIQUE,
+ BuiltinOperator_CEIL,
+ BuiltinOperator_REVERSE_V2,
+ BuiltinOperator_ADD_N,
+ BuiltinOperator_GATHER_ND,
+ BuiltinOperator_COS,
+ BuiltinOperator_WHERE,
+ BuiltinOperator_RANK,
+ BuiltinOperator_ELU,
+ BuiltinOperator_REVERSE_SEQUENCE,
+ BuiltinOperator_MATRIX_DIAG,
+ BuiltinOperator_QUANTIZE,
+ BuiltinOperator_MATRIX_SET_DIAG,
+ BuiltinOperator_ROUND,
+ BuiltinOperator_HARD_SWISH,
+ BuiltinOperator_IF,
+ BuiltinOperator_WHILE,
+ BuiltinOperator_NON_MAX_SUPPRESSION_V4,
+ BuiltinOperator_NON_MAX_SUPPRESSION_V5,
+ BuiltinOperator_SCATTER_ND,
+ BuiltinOperator_SELECT_V2,
+ BuiltinOperator_DENSIFY,
+ BuiltinOperator_SEGMENT_SUM,
+ BuiltinOperator_BATCH_MATMUL};
+ return values;
+}
+
+inline const char *const *EnumNamesBuiltinOperator()
+{
+ static const char *const names[] = {"ADD",
+ "AVERAGE_POOL_2D",
+ "CONCATENATION",
+ "CONV_2D",
+ "DEPTHWISE_CONV_2D",
+ "DEPTH_TO_SPACE",
+ "DEQUANTIZE",
+ "EMBEDDING_LOOKUP",
+ "FLOOR",
+ "FULLY_CONNECTED",
+ "HASHTABLE_LOOKUP",
+ "L2_NORMALIZATION",
+ "L2_POOL_2D",
+ "LOCAL_RESPONSE_NORMALIZATION",
+ "LOGISTIC",
+ "LSH_PROJECTION",
+ "LSTM",
+ "MAX_POOL_2D",
+ "MUL",
+ "RELU",
+ "RELU_N1_TO_1",
+ "RELU6",
+ "RESHAPE",
+ "RESIZE_BILINEAR",
+ "RNN",
+ "SOFTMAX",
+ "SPACE_TO_DEPTH",
+ "SVDF",
+ "TANH",
+ "CONCAT_EMBEDDINGS",
+ "SKIP_GRAM",
+ "CALL",
+ "CUSTOM",
+ "EMBEDDING_LOOKUP_SPARSE",
+ "PAD",
+ "UNIDIRECTIONAL_SEQUENCE_RNN",
+ "GATHER",
+ "BATCH_TO_SPACE_ND",
+ "SPACE_TO_BATCH_ND",
+ "TRANSPOSE",
+ "MEAN",
+ "SUB",
+ "DIV",
+ "SQUEEZE",
+ "UNIDIRECTIONAL_SEQUENCE_LSTM",
+ "STRIDED_SLICE",
+ "BIDIRECTIONAL_SEQUENCE_RNN",
+ "EXP",
+ "TOPK_V2",
+ "SPLIT",
+ "LOG_SOFTMAX",
+ "DELEGATE",
+ "BIDIRECTIONAL_SEQUENCE_LSTM",
+ "CAST",
+ "PRELU",
+ "MAXIMUM",
+ "ARG_MAX",
+ "MINIMUM",
+ "LESS",
+ "NEG",
+ "PADV2",
+ "GREATER",
+ "GREATER_EQUAL",
+ "LESS_EQUAL",
+ "SELECT",
+ "SLICE",
+ "SIN",
+ "TRANSPOSE_CONV",
+ "SPARSE_TO_DENSE",
+ "TILE",
+ "EXPAND_DIMS",
+ "EQUAL",
+ "NOT_EQUAL",
+ "LOG",
+ "SUM",
+ "SQRT",
+ "RSQRT",
+ "SHAPE",
+ "POW",
+ "ARG_MIN",
+ "FAKE_QUANT",
+ "REDUCE_PROD",
+ "REDUCE_MAX",
+ "PACK",
+ "LOGICAL_OR",
+ "ONE_HOT",
+ "LOGICAL_AND",
+ "LOGICAL_NOT",
+ "UNPACK",
+ "REDUCE_MIN",
+ "FLOOR_DIV",
+ "REDUCE_ANY",
+ "SQUARE",
+ "ZEROS_LIKE",
+ "FILL",
+ "FLOOR_MOD",
+ "RANGE",
+ "RESIZE_NEAREST_NEIGHBOR",
+ "LEAKY_RELU",
+ "SQUARED_DIFFERENCE",
+ "MIRROR_PAD",
+ "ABS",
+ "SPLIT_V",
+ "UNIQUE",
+ "CEIL",
+ "REVERSE_V2",
+ "ADD_N",
+ "GATHER_ND",
+ "COS",
+ "WHERE",
+ "RANK",
+ "ELU",
+ "REVERSE_SEQUENCE",
+ "MATRIX_DIAG",
+ "QUANTIZE",
+ "MATRIX_SET_DIAG",
+ "ROUND",
+ "HARD_SWISH",
+ "IF",
+ "WHILE",
+ "NON_MAX_SUPPRESSION_V4",
+ "NON_MAX_SUPPRESSION_V5",
+ "SCATTER_ND",
+ "SELECT_V2",
+ "DENSIFY",
+ "SEGMENT_SUM",
+ "BATCH_MATMUL",
+ nullptr};
+ return names;
+}
+
+inline const char *EnumNameBuiltinOperator(BuiltinOperator e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesBuiltinOperator()[index];
+}
+
+enum BuiltinOptions
+{
+ BuiltinOptions_NONE = 0,
+ BuiltinOptions_Conv2DOptions = 1,
+ BuiltinOptions_DepthwiseConv2DOptions = 2,
+ BuiltinOptions_ConcatEmbeddingsOptions = 3,
+ BuiltinOptions_LSHProjectionOptions = 4,
+ BuiltinOptions_Pool2DOptions = 5,
+ BuiltinOptions_SVDFOptions = 6,
+ BuiltinOptions_RNNOptions = 7,
+ BuiltinOptions_FullyConnectedOptions = 8,
+ BuiltinOptions_SoftmaxOptions = 9,
+ BuiltinOptions_ConcatenationOptions = 10,
+ BuiltinOptions_AddOptions = 11,
+ BuiltinOptions_L2NormOptions = 12,
+ BuiltinOptions_LocalResponseNormalizationOptions = 13,
+ BuiltinOptions_LSTMOptions = 14,
+ BuiltinOptions_ResizeBilinearOptions = 15,
+ BuiltinOptions_CallOptions = 16,
+ BuiltinOptions_ReshapeOptions = 17,
+ BuiltinOptions_SkipGramOptions = 18,
+ BuiltinOptions_SpaceToDepthOptions = 19,
+ BuiltinOptions_EmbeddingLookupSparseOptions = 20,
+ BuiltinOptions_MulOptions = 21,
+ BuiltinOptions_PadOptions = 22,
+ BuiltinOptions_GatherOptions = 23,
+ BuiltinOptions_BatchToSpaceNDOptions = 24,
+ BuiltinOptions_SpaceToBatchNDOptions = 25,
+ BuiltinOptions_TransposeOptions = 26,
+ BuiltinOptions_ReducerOptions = 27,
+ BuiltinOptions_SubOptions = 28,
+ BuiltinOptions_DivOptions = 29,
+ BuiltinOptions_SqueezeOptions = 30,
+ BuiltinOptions_SequenceRNNOptions = 31,
+ BuiltinOptions_StridedSliceOptions = 32,
+ BuiltinOptions_ExpOptions = 33,
+ BuiltinOptions_TopKV2Options = 34,
+ BuiltinOptions_SplitOptions = 35,
+ BuiltinOptions_LogSoftmaxOptions = 36,
+ BuiltinOptions_CastOptions = 37,
+ BuiltinOptions_DequantizeOptions = 38,
+ BuiltinOptions_MaximumMinimumOptions = 39,
+ BuiltinOptions_ArgMaxOptions = 40,
+ BuiltinOptions_LessOptions = 41,
+ BuiltinOptions_NegOptions = 42,
+ BuiltinOptions_PadV2Options = 43,
+ BuiltinOptions_GreaterOptions = 44,
+ BuiltinOptions_GreaterEqualOptions = 45,
+ BuiltinOptions_LessEqualOptions = 46,
+ BuiltinOptions_SelectOptions = 47,
+ BuiltinOptions_SliceOptions = 48,
+ BuiltinOptions_TransposeConvOptions = 49,
+ BuiltinOptions_SparseToDenseOptions = 50,
+ BuiltinOptions_TileOptions = 51,
+ BuiltinOptions_ExpandDimsOptions = 52,
+ BuiltinOptions_EqualOptions = 53,
+ BuiltinOptions_NotEqualOptions = 54,
+ BuiltinOptions_ShapeOptions = 55,
+ BuiltinOptions_PowOptions = 56,
+ BuiltinOptions_ArgMinOptions = 57,
+ BuiltinOptions_FakeQuantOptions = 58,
+ BuiltinOptions_PackOptions = 59,
+ BuiltinOptions_LogicalOrOptions = 60,
+ BuiltinOptions_OneHotOptions = 61,
+ BuiltinOptions_LogicalAndOptions = 62,
+ BuiltinOptions_LogicalNotOptions = 63,
+ BuiltinOptions_UnpackOptions = 64,
+ BuiltinOptions_FloorDivOptions = 65,
+ BuiltinOptions_SquareOptions = 66,
+ BuiltinOptions_ZerosLikeOptions = 67,
+ BuiltinOptions_FillOptions = 68,
+ BuiltinOptions_BidirectionalSequenceLSTMOptions = 69,
+ BuiltinOptions_BidirectionalSequenceRNNOptions = 70,
+ BuiltinOptions_UnidirectionalSequenceLSTMOptions = 71,
+ BuiltinOptions_FloorModOptions = 72,
+ BuiltinOptions_RangeOptions = 73,
+ BuiltinOptions_ResizeNearestNeighborOptions = 74,
+ BuiltinOptions_LeakyReluOptions = 75,
+ BuiltinOptions_SquaredDifferenceOptions = 76,
+ BuiltinOptions_MirrorPadOptions = 77,
+ BuiltinOptions_AbsOptions = 78,
+ BuiltinOptions_SplitVOptions = 79,
+ BuiltinOptions_UniqueOptions = 80,
+ BuiltinOptions_ReverseV2Options = 81,
+ BuiltinOptions_AddNOptions = 82,
+ BuiltinOptions_GatherNdOptions = 83,
+ BuiltinOptions_CosOptions = 84,
+ BuiltinOptions_WhereOptions = 85,
+ BuiltinOptions_RankOptions = 86,
+ BuiltinOptions_ReverseSequenceOptions = 87,
+ BuiltinOptions_MatrixDiagOptions = 88,
+ BuiltinOptions_QuantizeOptions = 89,
+ BuiltinOptions_MatrixSetDiagOptions = 90,
+ BuiltinOptions_HardSwishOptions = 91,
+ BuiltinOptions_IfOptions = 92,
+ BuiltinOptions_WhileOptions = 93,
+ BuiltinOptions_DepthToSpaceOptions = 94,
+ BuiltinOptions_NonMaxSuppressionV4Options = 95,
+ BuiltinOptions_NonMaxSuppressionV5Options = 96,
+ BuiltinOptions_ScatterNdOptions = 97,
+ BuiltinOptions_SelectV2Options = 98,
+ BuiltinOptions_DensifyOptions = 99,
+ BuiltinOptions_SegmentSumOptions = 100,
+ BuiltinOptions_BatchMatMulOptions = 101,
+ BuiltinOptions_MIN = BuiltinOptions_NONE,
+ BuiltinOptions_MAX = BuiltinOptions_BatchMatMulOptions
+};
+
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[102]
+{
+ static const BuiltinOptions values[] = {BuiltinOptions_NONE,
+ BuiltinOptions_Conv2DOptions,
+ BuiltinOptions_DepthwiseConv2DOptions,
+ BuiltinOptions_ConcatEmbeddingsOptions,
+ BuiltinOptions_LSHProjectionOptions,
+ BuiltinOptions_Pool2DOptions,
+ BuiltinOptions_SVDFOptions,
+ BuiltinOptions_RNNOptions,
+ BuiltinOptions_FullyConnectedOptions,
+ BuiltinOptions_SoftmaxOptions,
+ BuiltinOptions_ConcatenationOptions,
+ BuiltinOptions_AddOptions,
+ BuiltinOptions_L2NormOptions,
+ BuiltinOptions_LocalResponseNormalizationOptions,
+ BuiltinOptions_LSTMOptions,
+ BuiltinOptions_ResizeBilinearOptions,
+ BuiltinOptions_CallOptions,
+ BuiltinOptions_ReshapeOptions,
+ BuiltinOptions_SkipGramOptions,
+ BuiltinOptions_SpaceToDepthOptions,
+ BuiltinOptions_EmbeddingLookupSparseOptions,
+ BuiltinOptions_MulOptions,
+ BuiltinOptions_PadOptions,
+ BuiltinOptions_GatherOptions,
+ BuiltinOptions_BatchToSpaceNDOptions,
+ BuiltinOptions_SpaceToBatchNDOptions,
+ BuiltinOptions_TransposeOptions,
+ BuiltinOptions_ReducerOptions,
+ BuiltinOptions_SubOptions,
+ BuiltinOptions_DivOptions,
+ BuiltinOptions_SqueezeOptions,
+ BuiltinOptions_SequenceRNNOptions,
+ BuiltinOptions_StridedSliceOptions,
+ BuiltinOptions_ExpOptions,
+ BuiltinOptions_TopKV2Options,
+ BuiltinOptions_SplitOptions,
+ BuiltinOptions_LogSoftmaxOptions,
+ BuiltinOptions_CastOptions,
+ BuiltinOptions_DequantizeOptions,
+ BuiltinOptions_MaximumMinimumOptions,
+ BuiltinOptions_ArgMaxOptions,
+ BuiltinOptions_LessOptions,
+ BuiltinOptions_NegOptions,
+ BuiltinOptions_PadV2Options,
+ BuiltinOptions_GreaterOptions,
+ BuiltinOptions_GreaterEqualOptions,
+ BuiltinOptions_LessEqualOptions,
+ BuiltinOptions_SelectOptions,
+ BuiltinOptions_SliceOptions,
+ BuiltinOptions_TransposeConvOptions,
+ BuiltinOptions_SparseToDenseOptions,
+ BuiltinOptions_TileOptions,
+ BuiltinOptions_ExpandDimsOptions,
+ BuiltinOptions_EqualOptions,
+ BuiltinOptions_NotEqualOptions,
+ BuiltinOptions_ShapeOptions,
+ BuiltinOptions_PowOptions,
+ BuiltinOptions_ArgMinOptions,
+ BuiltinOptions_FakeQuantOptions,
+ BuiltinOptions_PackOptions,
+ BuiltinOptions_LogicalOrOptions,
+ BuiltinOptions_OneHotOptions,
+ BuiltinOptions_LogicalAndOptions,
+ BuiltinOptions_LogicalNotOptions,
+ BuiltinOptions_UnpackOptions,
+ BuiltinOptions_FloorDivOptions,
+ BuiltinOptions_SquareOptions,
+ BuiltinOptions_ZerosLikeOptions,
+ BuiltinOptions_FillOptions,
+ BuiltinOptions_BidirectionalSequenceLSTMOptions,
+ BuiltinOptions_BidirectionalSequenceRNNOptions,
+ BuiltinOptions_UnidirectionalSequenceLSTMOptions,
+ BuiltinOptions_FloorModOptions,
+ BuiltinOptions_RangeOptions,
+ BuiltinOptions_ResizeNearestNeighborOptions,
+ BuiltinOptions_LeakyReluOptions,
+ BuiltinOptions_SquaredDifferenceOptions,
+ BuiltinOptions_MirrorPadOptions,
+ BuiltinOptions_AbsOptions,
+ BuiltinOptions_SplitVOptions,
+ BuiltinOptions_UniqueOptions,
+ BuiltinOptions_ReverseV2Options,
+ BuiltinOptions_AddNOptions,
+ BuiltinOptions_GatherNdOptions,
+ BuiltinOptions_CosOptions,
+ BuiltinOptions_WhereOptions,
+ BuiltinOptions_RankOptions,
+ BuiltinOptions_ReverseSequenceOptions,
+ BuiltinOptions_MatrixDiagOptions,
+ BuiltinOptions_QuantizeOptions,
+ BuiltinOptions_MatrixSetDiagOptions,
+ BuiltinOptions_HardSwishOptions,
+ BuiltinOptions_IfOptions,
+ BuiltinOptions_WhileOptions,
+ BuiltinOptions_DepthToSpaceOptions,
+ BuiltinOptions_NonMaxSuppressionV4Options,
+ BuiltinOptions_NonMaxSuppressionV5Options,
+ BuiltinOptions_ScatterNdOptions,
+ BuiltinOptions_SelectV2Options,
+ BuiltinOptions_DensifyOptions,
+ BuiltinOptions_SegmentSumOptions,
+ BuiltinOptions_BatchMatMulOptions};
+ return values;
+}
+
+inline const char *const *EnumNamesBuiltinOptions()
+{
+ static const char *const names[] = {"NONE",
+ "Conv2DOptions",
+ "DepthwiseConv2DOptions",
+ "ConcatEmbeddingsOptions",
+ "LSHProjectionOptions",
+ "Pool2DOptions",
+ "SVDFOptions",
+ "RNNOptions",
+ "FullyConnectedOptions",
+ "SoftmaxOptions",
+ "ConcatenationOptions",
+ "AddOptions",
+ "L2NormOptions",
+ "LocalResponseNormalizationOptions",
+ "LSTMOptions",
+ "ResizeBilinearOptions",
+ "CallOptions",
+ "ReshapeOptions",
+ "SkipGramOptions",
+ "SpaceToDepthOptions",
+ "EmbeddingLookupSparseOptions",
+ "MulOptions",
+ "PadOptions",
+ "GatherOptions",
+ "BatchToSpaceNDOptions",
+ "SpaceToBatchNDOptions",
+ "TransposeOptions",
+ "ReducerOptions",
+ "SubOptions",
+ "DivOptions",
+ "SqueezeOptions",
+ "SequenceRNNOptions",
+ "StridedSliceOptions",
+ "ExpOptions",
+ "TopKV2Options",
+ "SplitOptions",
+ "LogSoftmaxOptions",
+ "CastOptions",
+ "DequantizeOptions",
+ "MaximumMinimumOptions",
+ "ArgMaxOptions",
+ "LessOptions",
+ "NegOptions",
+ "PadV2Options",
+ "GreaterOptions",
+ "GreaterEqualOptions",
+ "LessEqualOptions",
+ "SelectOptions",
+ "SliceOptions",
+ "TransposeConvOptions",
+ "SparseToDenseOptions",
+ "TileOptions",
+ "ExpandDimsOptions",
+ "EqualOptions",
+ "NotEqualOptions",
+ "ShapeOptions",
+ "PowOptions",
+ "ArgMinOptions",
+ "FakeQuantOptions",
+ "PackOptions",
+ "LogicalOrOptions",
+ "OneHotOptions",
+ "LogicalAndOptions",
+ "LogicalNotOptions",
+ "UnpackOptions",
+ "FloorDivOptions",
+ "SquareOptions",
+ "ZerosLikeOptions",
+ "FillOptions",
+ "BidirectionalSequenceLSTMOptions",
+ "BidirectionalSequenceRNNOptions",
+ "UnidirectionalSequenceLSTMOptions",
+ "FloorModOptions",
+ "RangeOptions",
+ "ResizeNearestNeighborOptions",
+ "LeakyReluOptions",
+ "SquaredDifferenceOptions",
+ "MirrorPadOptions",
+ "AbsOptions",
+ "SplitVOptions",
+ "UniqueOptions",
+ "ReverseV2Options",
+ "AddNOptions",
+ "GatherNdOptions",
+ "CosOptions",
+ "WhereOptions",
+ "RankOptions",
+ "ReverseSequenceOptions",
+ "MatrixDiagOptions",
+ "QuantizeOptions",
+ "MatrixSetDiagOptions",
+ "HardSwishOptions",
+ "IfOptions",
+ "WhileOptions",
+ "DepthToSpaceOptions",
+ "NonMaxSuppressionV4Options",
+ "NonMaxSuppressionV5Options",
+ "ScatterNdOptions",
+ "SelectV2Options",
+ "DensifyOptions",
+ "SegmentSumOptions",
+ "BatchMatMulOptions",
+ nullptr};
+ return names;
+}
+
+inline const char *EnumNameBuiltinOptions(BuiltinOptions e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesBuiltinOptions()[index];
+}
+
+template <typename T> struct BuiltinOptionsTraits
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_NONE;
+};
+
+template <> struct BuiltinOptionsTraits<Conv2DOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions;
+};
+
+template <> struct BuiltinOptionsTraits<DepthwiseConv2DOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_DepthwiseConv2DOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ConcatEmbeddingsOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ConcatEmbeddingsOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LSHProjectionOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions;
+};
+
+template <> struct BuiltinOptionsTraits<Pool2DOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SVDFOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions;
+};
+
+template <> struct BuiltinOptionsTraits<RNNOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions;
+};
+
+template <> struct BuiltinOptionsTraits<FullyConnectedOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SoftmaxOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ConcatenationOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions;
+};
+
+template <> struct BuiltinOptionsTraits<AddOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_AddOptions;
+};
+
+template <> struct BuiltinOptionsTraits<L2NormOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LocalResponseNormalizationOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LocalResponseNormalizationOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LSTMOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ResizeBilinearOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions;
+};
+
+template <> struct BuiltinOptionsTraits<CallOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_CallOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ReshapeOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SkipGramOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SpaceToDepthOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions;
+};
+
+template <> struct BuiltinOptionsTraits<EmbeddingLookupSparseOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_EmbeddingLookupSparseOptions;
+};
+
+template <> struct BuiltinOptionsTraits<MulOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_MulOptions;
+};
+
+template <> struct BuiltinOptionsTraits<PadOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_PadOptions;
+};
+
+template <> struct BuiltinOptionsTraits<GatherOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions;
+};
+
+template <> struct BuiltinOptionsTraits<BatchToSpaceNDOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_BatchToSpaceNDOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SpaceToBatchNDOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SpaceToBatchNDOptions;
+};
+
+template <> struct BuiltinOptionsTraits<TransposeOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_TransposeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ReducerOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ReducerOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SubOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SubOptions;
+};
+
+template <> struct BuiltinOptionsTraits<DivOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_DivOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SqueezeOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SqueezeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SequenceRNNOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SequenceRNNOptions;
+};
+
+template <> struct BuiltinOptionsTraits<StridedSliceOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_StridedSliceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ExpOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ExpOptions;
+};
+
+template <> struct BuiltinOptionsTraits<TopKV2Options>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_TopKV2Options;
+};
+
+template <> struct BuiltinOptionsTraits<SplitOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SplitOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LogSoftmaxOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LogSoftmaxOptions;
+};
+
+template <> struct BuiltinOptionsTraits<CastOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_CastOptions;
+};
+
+template <> struct BuiltinOptionsTraits<DequantizeOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<MaximumMinimumOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_MaximumMinimumOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ArgMaxOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ArgMaxOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LessOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LessOptions;
+};
+
+template <> struct BuiltinOptionsTraits<NegOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_NegOptions;
+};
+
+template <> struct BuiltinOptionsTraits<PadV2Options>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_PadV2Options;
+};
+
+template <> struct BuiltinOptionsTraits<GreaterOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_GreaterOptions;
+};
+
+template <> struct BuiltinOptionsTraits<GreaterEqualOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_GreaterEqualOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LessEqualOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LessEqualOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SelectOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SelectOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SliceOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SliceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<TransposeConvOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_TransposeConvOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SparseToDenseOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SparseToDenseOptions;
+};
+
+template <> struct BuiltinOptionsTraits<TileOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_TileOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ExpandDimsOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ExpandDimsOptions;
+};
+
+template <> struct BuiltinOptionsTraits<EqualOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_EqualOptions;
+};
+
+template <> struct BuiltinOptionsTraits<NotEqualOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_NotEqualOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ShapeOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ShapeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<PowOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_PowOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ArgMinOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ArgMinOptions;
+};
+
+template <> struct BuiltinOptionsTraits<FakeQuantOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_FakeQuantOptions;
+};
+
+template <> struct BuiltinOptionsTraits<PackOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_PackOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LogicalOrOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LogicalOrOptions;
+};
+
+template <> struct BuiltinOptionsTraits<OneHotOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_OneHotOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LogicalAndOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LogicalAndOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LogicalNotOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LogicalNotOptions;
+};
+
+template <> struct BuiltinOptionsTraits<UnpackOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_UnpackOptions;
+};
+
+template <> struct BuiltinOptionsTraits<FloorDivOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_FloorDivOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SquareOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SquareOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ZerosLikeOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<FillOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_FillOptions;
+};
+
+template <> struct BuiltinOptionsTraits<BidirectionalSequenceLSTMOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceLSTMOptions;
+};
+
+template <> struct BuiltinOptionsTraits<BidirectionalSequenceRNNOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions;
+};
+
+template <> struct BuiltinOptionsTraits<UnidirectionalSequenceLSTMOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions;
+};
+
+template <> struct BuiltinOptionsTraits<FloorModOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions;
+};
+
+template <> struct BuiltinOptionsTraits<RangeOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_RangeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ResizeNearestNeighborOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ResizeNearestNeighborOptions;
+};
+
+template <> struct BuiltinOptionsTraits<LeakyReluOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_LeakyReluOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SquaredDifferenceOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SquaredDifferenceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<MirrorPadOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_MirrorPadOptions;
+};
+
+template <> struct BuiltinOptionsTraits<AbsOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_AbsOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SplitVOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SplitVOptions;
+};
+
+template <> struct BuiltinOptionsTraits<UniqueOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_UniqueOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ReverseV2Options>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ReverseV2Options;
+};
+
+template <> struct BuiltinOptionsTraits<AddNOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_AddNOptions;
+};
+
+template <> struct BuiltinOptionsTraits<GatherNdOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_GatherNdOptions;
+};
+
+template <> struct BuiltinOptionsTraits<CosOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_CosOptions;
+};
+
+template <> struct BuiltinOptionsTraits<WhereOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_WhereOptions;
+};
+
+template <> struct BuiltinOptionsTraits<RankOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_RankOptions;
+};
+
+template <> struct BuiltinOptionsTraits<ReverseSequenceOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ReverseSequenceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<MatrixDiagOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_MatrixDiagOptions;
+};
+
+template <> struct BuiltinOptionsTraits<QuantizeOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_QuantizeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<MatrixSetDiagOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_MatrixSetDiagOptions;
+};
+
+template <> struct BuiltinOptionsTraits<HardSwishOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_HardSwishOptions;
+};
+
+template <> struct BuiltinOptionsTraits<IfOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_IfOptions;
+};
+
+template <> struct BuiltinOptionsTraits<WhileOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_WhileOptions;
+};
+
+template <> struct BuiltinOptionsTraits<DepthToSpaceOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_DepthToSpaceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<NonMaxSuppressionV4Options>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV4Options;
+};
+
+template <> struct BuiltinOptionsTraits<NonMaxSuppressionV5Options>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV5Options;
+};
+
+template <> struct BuiltinOptionsTraits<ScatterNdOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ScatterNdOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SelectV2Options>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SelectV2Options;
+};
+
+template <> struct BuiltinOptionsTraits<DensifyOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_DensifyOptions;
+};
+
+template <> struct BuiltinOptionsTraits<SegmentSumOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_SegmentSumOptions;
+};
+
+template <> struct BuiltinOptionsTraits<BatchMatMulOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_BatchMatMulOptions;
+};
+
+bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
+bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier,
+ const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+ const flatbuffers::Vector<uint8_t> *types);
+
+enum Padding
+{
+ Padding_SAME = 0,
+ Padding_VALID = 1,
+ Padding_MIN = Padding_SAME,
+ Padding_MAX = Padding_VALID
+};
+
+inline const Padding (&EnumValuesPadding())[2]
+{
+ static const Padding values[] = {Padding_SAME, Padding_VALID};
+ return values;
+}
+
+inline const char *const *EnumNamesPadding()
+{
+ static const char *const names[] = {"SAME", "VALID", nullptr};
+ return names;
+}
+
+inline const char *EnumNamePadding(Padding e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesPadding()[index];
+}
+
+enum ActivationFunctionType
+{
+ ActivationFunctionType_NONE = 0,
+ ActivationFunctionType_RELU = 1,
+ ActivationFunctionType_RELU_N1_TO_1 = 2,
+ ActivationFunctionType_RELU6 = 3,
+ ActivationFunctionType_TANH = 4,
+ ActivationFunctionType_SIGN_BIT = 5,
+ ActivationFunctionType_MIN = ActivationFunctionType_NONE,
+ ActivationFunctionType_MAX = ActivationFunctionType_SIGN_BIT
+};
+
+inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6]
+{
+ static const ActivationFunctionType values[] = {
+ ActivationFunctionType_NONE, ActivationFunctionType_RELU,
+ ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6,
+ ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT};
+ return values;
+}
+
+inline const char *const *EnumNamesActivationFunctionType()
+{
+ static const char *const names[] = {"NONE", "RELU", "RELU_N1_TO_1", "RELU6",
+ "TANH", "SIGN_BIT", nullptr};
+ return names;
+}
+
+inline const char *EnumNameActivationFunctionType(ActivationFunctionType e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesActivationFunctionType()[index];
+}
+
+enum LSHProjectionType
+{
+ LSHProjectionType_UNKNOWN = 0,
+ LSHProjectionType_SPARSE = 1,
+ LSHProjectionType_DENSE = 2,
+ LSHProjectionType_MIN = LSHProjectionType_UNKNOWN,
+ LSHProjectionType_MAX = LSHProjectionType_DENSE
+};
+
+inline const LSHProjectionType (&EnumValuesLSHProjectionType())[3]
+{
+ static const LSHProjectionType values[] = {LSHProjectionType_UNKNOWN, LSHProjectionType_SPARSE,
+ LSHProjectionType_DENSE};
+ return values;
+}
+
+inline const char *const *EnumNamesLSHProjectionType()
+{
+ static const char *const names[] = {"UNKNOWN", "SPARSE", "DENSE", nullptr};
+ return names;
+}
+
+inline const char *EnumNameLSHProjectionType(LSHProjectionType e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesLSHProjectionType()[index];
+}
+
+enum FullyConnectedOptionsWeightsFormat
+{
+ FullyConnectedOptionsWeightsFormat_DEFAULT = 0,
+ FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 = 1,
+ FullyConnectedOptionsWeightsFormat_MIN = FullyConnectedOptionsWeightsFormat_DEFAULT,
+ FullyConnectedOptionsWeightsFormat_MAX = FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8
+};
+
+inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[2]
+{
+ static const FullyConnectedOptionsWeightsFormat values[] = {
+ FullyConnectedOptionsWeightsFormat_DEFAULT,
+ FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8};
+ return values;
+}
+
+inline const char *const *EnumNamesFullyConnectedOptionsWeightsFormat()
+{
+ static const char *const names[] = {"DEFAULT", "SHUFFLED4x16INT8", nullptr};
+ return names;
+}
+
+inline const char *EnumNameFullyConnectedOptionsWeightsFormat(FullyConnectedOptionsWeightsFormat e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesFullyConnectedOptionsWeightsFormat()[index];
+}
+
+enum LSTMKernelType
+{
+ LSTMKernelType_FULL = 0,
+ LSTMKernelType_BASIC = 1,
+ LSTMKernelType_MIN = LSTMKernelType_FULL,
+ LSTMKernelType_MAX = LSTMKernelType_BASIC
+};
+
+inline const LSTMKernelType (&EnumValuesLSTMKernelType())[2]
+{
+ static const LSTMKernelType values[] = {LSTMKernelType_FULL, LSTMKernelType_BASIC};
+ return values;
+}
+
+inline const char *const *EnumNamesLSTMKernelType()
+{
+ static const char *const names[] = {"FULL", "BASIC", nullptr};
+ return names;
+}
+
+inline const char *EnumNameLSTMKernelType(LSTMKernelType e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesLSTMKernelType()[index];
+}
+
+enum CombinerType
+{
+ CombinerType_SUM = 0,
+ CombinerType_MEAN = 1,
+ CombinerType_SQRTN = 2,
+ CombinerType_MIN = CombinerType_SUM,
+ CombinerType_MAX = CombinerType_SQRTN
+};
+
+inline const CombinerType (&EnumValuesCombinerType())[3]
+{
+ static const CombinerType values[] = {CombinerType_SUM, CombinerType_MEAN, CombinerType_SQRTN};
+ return values;
+}
+
+inline const char *const *EnumNamesCombinerType()
+{
+ static const char *const names[] = {"SUM", "MEAN", "SQRTN", nullptr};
+ return names;
+}
+
+inline const char *EnumNameCombinerType(CombinerType e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesCombinerType()[index];
+}
+
+enum MirrorPadMode
+{
+ MirrorPadMode_REFLECT = 0,
+ MirrorPadMode_SYMMETRIC = 1,
+ MirrorPadMode_MIN = MirrorPadMode_REFLECT,
+ MirrorPadMode_MAX = MirrorPadMode_SYMMETRIC
+};
+
+inline const MirrorPadMode (&EnumValuesMirrorPadMode())[2]
+{
+ static const MirrorPadMode values[] = {MirrorPadMode_REFLECT, MirrorPadMode_SYMMETRIC};
+ return values;
+}
+
+inline const char *const *EnumNamesMirrorPadMode()
+{
+ static const char *const names[] = {"REFLECT", "SYMMETRIC", nullptr};
+ return names;
+}
+
+inline const char *EnumNameMirrorPadMode(MirrorPadMode e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesMirrorPadMode()[index];
+}
+
+enum CustomOptionsFormat
+{
+ CustomOptionsFormat_FLEXBUFFERS = 0,
+ CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS,
+ CustomOptionsFormat_MAX = CustomOptionsFormat_FLEXBUFFERS
+};
+
+inline const CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1]
+{
+ static const CustomOptionsFormat values[] = {CustomOptionsFormat_FLEXBUFFERS};
+ return values;
+}
+
+inline const char *const *EnumNamesCustomOptionsFormat()
+{
+ static const char *const names[] = {"FLEXBUFFERS", nullptr};
+ return names;
+}
+
+inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e)
+{
+ const size_t index = static_cast<int>(e);
+ return EnumNamesCustomOptionsFormat()[index];
+}
+
+struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_CUSTOM = 4
+ };
+ const flatbuffers::Vector<uint8_t> *custom() const
+ {
+ return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CUSTOM) &&
+ verifier.VerifyVector(custom()) && verifier.EndTable();
+ }
+};
+
+struct CustomQuantizationBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_custom(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom)
+ {
+ fbb_.AddOffset(CustomQuantization::VT_CUSTOM, custom);
+ }
+ explicit CustomQuantizationBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ CustomQuantizationBuilder &operator=(const CustomQuantizationBuilder &);
+ flatbuffers::Offset<CustomQuantization> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<CustomQuantization>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<CustomQuantization>
+CreateCustomQuantization(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom = 0)
+{
+ CustomQuantizationBuilder builder_(_fbb);
+ builder_.add_custom(custom);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<CustomQuantization>
+CreateCustomQuantizationDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<uint8_t> *custom = nullptr)
+{
+ return onert_tflite::CreateCustomQuantization(_fbb,
+ custom ? _fbb.CreateVector<uint8_t>(*custom) : 0);
+}
+
+struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_MIN = 4,
+ VT_MAX = 6,
+ VT_SCALE = 8,
+ VT_ZERO_POINT = 10,
+ VT_DETAILS_TYPE = 12,
+ VT_DETAILS = 14,
+ VT_QUANTIZED_DIMENSION = 16
+ };
+ const flatbuffers::Vector<float> *min() const
+ {
+ return GetPointer<const flatbuffers::Vector<float> *>(VT_MIN);
+ }
+ const flatbuffers::Vector<float> *max() const
+ {
+ return GetPointer<const flatbuffers::Vector<float> *>(VT_MAX);
+ }
+ const flatbuffers::Vector<float> *scale() const
+ {
+ return GetPointer<const flatbuffers::Vector<float> *>(VT_SCALE);
+ }
+ const flatbuffers::Vector<int64_t> *zero_point() const
+ {
+ return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_ZERO_POINT);
+ }
+ QuantizationDetails details_type() const
+ {
+ return static_cast<QuantizationDetails>(GetField<uint8_t>(VT_DETAILS_TYPE, 0));
+ }
+ const void *details() const { return GetPointer<const void *>(VT_DETAILS); }
+ template <typename T> const T *details_as() const;
+ const CustomQuantization *details_as_CustomQuantization() const
+ {
+ return details_type() == QuantizationDetails_CustomQuantization
+ ? static_cast<const CustomQuantization *>(details())
+ : nullptr;
+ }
+ int32_t quantized_dimension() const { return GetField<int32_t>(VT_QUANTIZED_DIMENSION, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_MIN) &&
+ verifier.VerifyVector(min()) && VerifyOffset(verifier, VT_MAX) &&
+ verifier.VerifyVector(max()) && VerifyOffset(verifier, VT_SCALE) &&
+ verifier.VerifyVector(scale()) && VerifyOffset(verifier, VT_ZERO_POINT) &&
+ verifier.VerifyVector(zero_point()) && VerifyField<uint8_t>(verifier, VT_DETAILS_TYPE) &&
+ VerifyOffset(verifier, VT_DETAILS) &&
+ VerifyQuantizationDetails(verifier, details(), details_type()) &&
+ VerifyField<int32_t>(verifier, VT_QUANTIZED_DIMENSION) && verifier.EndTable();
+ }
+};
+
+template <>
+inline const CustomQuantization *QuantizationParameters::details_as<CustomQuantization>() const
+{
+ return details_as_CustomQuantization();
+}
+
+struct QuantizationParametersBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_min(flatbuffers::Offset<flatbuffers::Vector<float>> min)
+ {
+ fbb_.AddOffset(QuantizationParameters::VT_MIN, min);
+ }
+ void add_max(flatbuffers::Offset<flatbuffers::Vector<float>> max)
+ {
+ fbb_.AddOffset(QuantizationParameters::VT_MAX, max);
+ }
+ void add_scale(flatbuffers::Offset<flatbuffers::Vector<float>> scale)
+ {
+ fbb_.AddOffset(QuantizationParameters::VT_SCALE, scale);
+ }
+ void add_zero_point(flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point)
+ {
+ fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point);
+ }
+ void add_details_type(QuantizationDetails details_type)
+ {
+ fbb_.AddElement<uint8_t>(QuantizationParameters::VT_DETAILS_TYPE,
+ static_cast<uint8_t>(details_type), 0);
+ }
+ void add_details(flatbuffers::Offset<void> details)
+ {
+ fbb_.AddOffset(QuantizationParameters::VT_DETAILS, details);
+ }
+ void add_quantized_dimension(int32_t quantized_dimension)
+ {
+ fbb_.AddElement<int32_t>(QuantizationParameters::VT_QUANTIZED_DIMENSION, quantized_dimension,
+ 0);
+ }
+ explicit QuantizationParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ QuantizationParametersBuilder &operator=(const QuantizationParametersBuilder &);
+ flatbuffers::Offset<QuantizationParameters> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<QuantizationParameters>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<QuantizationParameters>
+CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
+ flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
+ flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
+ QuantizationDetails details_type = QuantizationDetails_NONE,
+ flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+{
+ QuantizationParametersBuilder builder_(_fbb);
+ builder_.add_quantized_dimension(quantized_dimension);
+ builder_.add_details(details);
+ builder_.add_zero_point(zero_point);
+ builder_.add_scale(scale);
+ builder_.add_max(max);
+ builder_.add_min(min);
+ builder_.add_details_type(details_type);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect(
+ flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
+ const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
+ const std::vector<int64_t> *zero_point = nullptr,
+ QuantizationDetails details_type = QuantizationDetails_NONE,
+ flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+{
+ return onert_tflite::CreateQuantizationParameters(
+ _fbb, min ? _fbb.CreateVector<float>(*min) : 0, max ? _fbb.CreateVector<float>(*max) : 0,
+ scale ? _fbb.CreateVector<float>(*scale) : 0,
+ zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0, details_type, details,
+ quantized_dimension);
+}
+
+struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_VALUES = 4
+ };
+ const flatbuffers::Vector<int32_t> *values() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_VALUES);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_VALUES) &&
+ verifier.VerifyVector(values()) && verifier.EndTable();
+ }
+};
+
+struct Int32VectorBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_values(flatbuffers::Offset<flatbuffers::Vector<int32_t>> values)
+ {
+ fbb_.AddOffset(Int32Vector::VT_VALUES, values);
+ }
+ explicit Int32VectorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ Int32VectorBuilder &operator=(const Int32VectorBuilder &);
+ flatbuffers::Offset<Int32Vector> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Int32Vector>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Int32Vector>
+CreateInt32Vector(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> values = 0)
+{
+ Int32VectorBuilder builder_(_fbb);
+ builder_.add_values(values);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Int32Vector>
+CreateInt32VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<int32_t> *values = nullptr)
+{
+ return onert_tflite::CreateInt32Vector(_fbb, values ? _fbb.CreateVector<int32_t>(*values) : 0);
+}
+
+struct Uint16Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_VALUES = 4
+ };
+ const flatbuffers::Vector<uint16_t> *values() const
+ {
+ return GetPointer<const flatbuffers::Vector<uint16_t> *>(VT_VALUES);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_VALUES) &&
+ verifier.VerifyVector(values()) && verifier.EndTable();
+ }
+};
+
+struct Uint16VectorBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_values(flatbuffers::Offset<flatbuffers::Vector<uint16_t>> values)
+ {
+ fbb_.AddOffset(Uint16Vector::VT_VALUES, values);
+ }
+ explicit Uint16VectorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ Uint16VectorBuilder &operator=(const Uint16VectorBuilder &);
+ flatbuffers::Offset<Uint16Vector> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Uint16Vector>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Uint16Vector>
+CreateUint16Vector(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<uint16_t>> values = 0)
+{
+ Uint16VectorBuilder builder_(_fbb);
+ builder_.add_values(values);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Uint16Vector>
+CreateUint16VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<uint16_t> *values = nullptr)
+{
+ return onert_tflite::CreateUint16Vector(_fbb, values ? _fbb.CreateVector<uint16_t>(*values) : 0);
+}
+
+struct Uint8Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_VALUES = 4
+ };
+ const flatbuffers::Vector<uint8_t> *values() const
+ {
+ return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_VALUES);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_VALUES) &&
+ verifier.VerifyVector(values()) && verifier.EndTable();
+ }
+};
+
+struct Uint8VectorBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_values(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> values)
+ {
+ fbb_.AddOffset(Uint8Vector::VT_VALUES, values);
+ }
+ explicit Uint8VectorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ Uint8VectorBuilder &operator=(const Uint8VectorBuilder &);
+ flatbuffers::Offset<Uint8Vector> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Uint8Vector>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Uint8Vector>
+CreateUint8Vector(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> values = 0)
+{
+ Uint8VectorBuilder builder_(_fbb);
+ builder_.add_values(values);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Uint8Vector>
+CreateUint8VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<uint8_t> *values = nullptr)
+{
+ return onert_tflite::CreateUint8Vector(_fbb, values ? _fbb.CreateVector<uint8_t>(*values) : 0);
+}
+
+struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FORMAT = 4,
+ VT_DENSE_SIZE = 6,
+ VT_ARRAY_SEGMENTS_TYPE = 8,
+ VT_ARRAY_SEGMENTS = 10,
+ VT_ARRAY_INDICES_TYPE = 12,
+ VT_ARRAY_INDICES = 14
+ };
+ DimensionType format() const
+ {
+ return static_cast<DimensionType>(GetField<int8_t>(VT_FORMAT, 0));
+ }
+ int32_t dense_size() const { return GetField<int32_t>(VT_DENSE_SIZE, 0); }
+ SparseIndexVector array_segments_type() const
+ {
+ return static_cast<SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_SEGMENTS_TYPE, 0));
+ }
+ const void *array_segments() const { return GetPointer<const void *>(VT_ARRAY_SEGMENTS); }
+ template <typename T> const T *array_segments_as() const;
+ const Int32Vector *array_segments_as_Int32Vector() const
+ {
+ return array_segments_type() == SparseIndexVector_Int32Vector
+ ? static_cast<const Int32Vector *>(array_segments())
+ : nullptr;
+ }
+ const Uint16Vector *array_segments_as_Uint16Vector() const
+ {
+ return array_segments_type() == SparseIndexVector_Uint16Vector
+ ? static_cast<const Uint16Vector *>(array_segments())
+ : nullptr;
+ }
+ const Uint8Vector *array_segments_as_Uint8Vector() const
+ {
+ return array_segments_type() == SparseIndexVector_Uint8Vector
+ ? static_cast<const Uint8Vector *>(array_segments())
+ : nullptr;
+ }
+ SparseIndexVector array_indices_type() const
+ {
+ return static_cast<SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_INDICES_TYPE, 0));
+ }
+ const void *array_indices() const { return GetPointer<const void *>(VT_ARRAY_INDICES); }
+ template <typename T> const T *array_indices_as() const;
+ const Int32Vector *array_indices_as_Int32Vector() const
+ {
+ return array_indices_type() == SparseIndexVector_Int32Vector
+ ? static_cast<const Int32Vector *>(array_indices())
+ : nullptr;
+ }
+ const Uint16Vector *array_indices_as_Uint16Vector() const
+ {
+ return array_indices_type() == SparseIndexVector_Uint16Vector
+ ? static_cast<const Uint16Vector *>(array_indices())
+ : nullptr;
+ }
+ const Uint8Vector *array_indices_as_Uint8Vector() const
+ {
+ return array_indices_type() == SparseIndexVector_Uint8Vector
+ ? static_cast<const Uint8Vector *>(array_indices())
+ : nullptr;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_FORMAT) &&
+ VerifyField<int32_t>(verifier, VT_DENSE_SIZE) &&
+ VerifyField<uint8_t>(verifier, VT_ARRAY_SEGMENTS_TYPE) &&
+ VerifyOffset(verifier, VT_ARRAY_SEGMENTS) &&
+ VerifySparseIndexVector(verifier, array_segments(), array_segments_type()) &&
+ VerifyField<uint8_t>(verifier, VT_ARRAY_INDICES_TYPE) &&
+ VerifyOffset(verifier, VT_ARRAY_INDICES) &&
+ VerifySparseIndexVector(verifier, array_indices(), array_indices_type()) &&
+ verifier.EndTable();
+ }
+};
+
+template <> inline const Int32Vector *DimensionMetadata::array_segments_as<Int32Vector>() const
+{
+ return array_segments_as_Int32Vector();
+}
+
+template <> inline const Uint16Vector *DimensionMetadata::array_segments_as<Uint16Vector>() const
+{
+ return array_segments_as_Uint16Vector();
+}
+
+template <> inline const Uint8Vector *DimensionMetadata::array_segments_as<Uint8Vector>() const
+{
+ return array_segments_as_Uint8Vector();
+}
+
+template <> inline const Int32Vector *DimensionMetadata::array_indices_as<Int32Vector>() const
+{
+ return array_indices_as_Int32Vector();
+}
+
+template <> inline const Uint16Vector *DimensionMetadata::array_indices_as<Uint16Vector>() const
+{
+ return array_indices_as_Uint16Vector();
+}
+
+template <> inline const Uint8Vector *DimensionMetadata::array_indices_as<Uint8Vector>() const
+{
+ return array_indices_as_Uint8Vector();
+}
+
+struct DimensionMetadataBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_format(DimensionType format)
+ {
+ fbb_.AddElement<int8_t>(DimensionMetadata::VT_FORMAT, static_cast<int8_t>(format), 0);
+ }
+ void add_dense_size(int32_t dense_size)
+ {
+ fbb_.AddElement<int32_t>(DimensionMetadata::VT_DENSE_SIZE, dense_size, 0);
+ }
+ void add_array_segments_type(SparseIndexVector array_segments_type)
+ {
+ fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_SEGMENTS_TYPE,
+ static_cast<uint8_t>(array_segments_type), 0);
+ }
+ void add_array_segments(flatbuffers::Offset<void> array_segments)
+ {
+ fbb_.AddOffset(DimensionMetadata::VT_ARRAY_SEGMENTS, array_segments);
+ }
+ void add_array_indices_type(SparseIndexVector array_indices_type)
+ {
+ fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_INDICES_TYPE,
+ static_cast<uint8_t>(array_indices_type), 0);
+ }
+ void add_array_indices(flatbuffers::Offset<void> array_indices)
+ {
+ fbb_.AddOffset(DimensionMetadata::VT_ARRAY_INDICES, array_indices);
+ }
+ explicit DimensionMetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ DimensionMetadataBuilder &operator=(const DimensionMetadataBuilder &);
+ flatbuffers::Offset<DimensionMetadata> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<DimensionMetadata>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<DimensionMetadata>
+CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb,
+ DimensionType format = DimensionType_DENSE, int32_t dense_size = 0,
+ SparseIndexVector array_segments_type = SparseIndexVector_NONE,
+ flatbuffers::Offset<void> array_segments = 0,
+ SparseIndexVector array_indices_type = SparseIndexVector_NONE,
+ flatbuffers::Offset<void> array_indices = 0)
+{
+ DimensionMetadataBuilder builder_(_fbb);
+ builder_.add_array_indices(array_indices);
+ builder_.add_array_segments(array_segments);
+ builder_.add_dense_size(dense_size);
+ builder_.add_array_indices_type(array_indices_type);
+ builder_.add_array_segments_type(array_segments_type);
+ builder_.add_format(format);
+ return builder_.Finish();
+}
+
+struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_TRAVERSAL_ORDER = 4,
+ VT_BLOCK_MAP = 6,
+ VT_DIM_METADATA = 8
+ };
+ const flatbuffers::Vector<int32_t> *traversal_order() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_TRAVERSAL_ORDER);
+ }
+ const flatbuffers::Vector<int32_t> *block_map() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_BLOCK_MAP);
+ }
+ const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *>(
+ VT_DIM_METADATA);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_TRAVERSAL_ORDER) &&
+ verifier.VerifyVector(traversal_order()) && VerifyOffset(verifier, VT_BLOCK_MAP) &&
+ verifier.VerifyVector(block_map()) && VerifyOffset(verifier, VT_DIM_METADATA) &&
+ verifier.VerifyVector(dim_metadata()) && verifier.VerifyVectorOfTables(dim_metadata()) &&
+ verifier.EndTable();
+ }
+};
+
+struct SparsityParametersBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_traversal_order(flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order)
+ {
+ fbb_.AddOffset(SparsityParameters::VT_TRAVERSAL_ORDER, traversal_order);
+ }
+ void add_block_map(flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map)
+ {
+ fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map);
+ }
+ void add_dim_metadata(
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata)
+ {
+ fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata);
+ }
+ explicit SparsityParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SparsityParametersBuilder &operator=(const SparsityParametersBuilder &);
+ flatbuffers::Offset<SparsityParameters> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SparsityParameters>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata =
+ 0)
+{
+ SparsityParametersBuilder builder_(_fbb);
+ builder_.add_dim_metadata(dim_metadata);
+ builder_.add_block_map(block_map);
+ builder_.add_traversal_order(traversal_order);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SparsityParameters> CreateSparsityParametersDirect(
+ flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
+ const std::vector<int32_t> *block_map = nullptr,
+ const std::vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata = nullptr)
+{
+ return onert_tflite::CreateSparsityParameters(
+ _fbb, traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0,
+ block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0,
+ dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<DimensionMetadata>>(*dim_metadata) : 0);
+}
+
+struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_SHAPE = 4,
+ VT_TYPE = 6,
+ VT_BUFFER = 8,
+ VT_NAME = 10,
+ VT_QUANTIZATION = 12,
+ VT_IS_VARIABLE = 14,
+ VT_SPARSITY = 16,
+ VT_SHAPE_SIGNATURE = 18
+ };
+ const flatbuffers::Vector<int32_t> *shape() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE);
+ }
+ TensorType type() const { return static_cast<TensorType>(GetField<int8_t>(VT_TYPE, 0)); }
+ uint32_t buffer() const { return GetField<uint32_t>(VT_BUFFER, 0); }
+ const flatbuffers::String *name() const
+ {
+ return GetPointer<const flatbuffers::String *>(VT_NAME);
+ }
+ const QuantizationParameters *quantization() const
+ {
+ return GetPointer<const QuantizationParameters *>(VT_QUANTIZATION);
+ }
+ bool is_variable() const { return GetField<uint8_t>(VT_IS_VARIABLE, 0) != 0; }
+ const SparsityParameters *sparsity() const
+ {
+ return GetPointer<const SparsityParameters *>(VT_SPARSITY);
+ }
+ const flatbuffers::Vector<int32_t> *shape_signature() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE_SIGNATURE);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SHAPE) &&
+ verifier.VerifyVector(shape()) && VerifyField<int8_t>(verifier, VT_TYPE) &&
+ VerifyField<uint32_t>(verifier, VT_BUFFER) && VerifyOffset(verifier, VT_NAME) &&
+ verifier.VerifyString(name()) && VerifyOffset(verifier, VT_QUANTIZATION) &&
+ verifier.VerifyTable(quantization()) && VerifyField<uint8_t>(verifier, VT_IS_VARIABLE) &&
+ VerifyOffset(verifier, VT_SPARSITY) && verifier.VerifyTable(sparsity()) &&
+ VerifyOffset(verifier, VT_SHAPE_SIGNATURE) && verifier.VerifyVector(shape_signature()) &&
+ verifier.EndTable();
+ }
+};
+
+struct TensorBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape)
+ {
+ fbb_.AddOffset(Tensor::VT_SHAPE, shape);
+ }
+ void add_type(TensorType type)
+ {
+ fbb_.AddElement<int8_t>(Tensor::VT_TYPE, static_cast<int8_t>(type), 0);
+ }
+ void add_buffer(uint32_t buffer) { fbb_.AddElement<uint32_t>(Tensor::VT_BUFFER, buffer, 0); }
+ void add_name(flatbuffers::Offset<flatbuffers::String> name)
+ {
+ fbb_.AddOffset(Tensor::VT_NAME, name);
+ }
+ void add_quantization(flatbuffers::Offset<QuantizationParameters> quantization)
+ {
+ fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization);
+ }
+ void add_is_variable(bool is_variable)
+ {
+ fbb_.AddElement<uint8_t>(Tensor::VT_IS_VARIABLE, static_cast<uint8_t>(is_variable), 0);
+ }
+ void add_sparsity(flatbuffers::Offset<SparsityParameters> sparsity)
+ {
+ fbb_.AddOffset(Tensor::VT_SPARSITY, sparsity);
+ }
+ void add_shape_signature(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature)
+ {
+ fbb_.AddOffset(Tensor::VT_SHAPE_SIGNATURE, shape_signature);
+ }
+ explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ TensorBuilder &operator=(const TensorBuilder &);
+ flatbuffers::Offset<Tensor> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Tensor>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Tensor>
+CreateTensor(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0,
+ TensorType type = TensorType_FLOAT32, uint32_t buffer = 0,
+ flatbuffers::Offset<flatbuffers::String> name = 0,
+ flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false,
+ flatbuffers::Offset<SparsityParameters> sparsity = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature = 0)
+{
+ TensorBuilder builder_(_fbb);
+ builder_.add_shape_signature(shape_signature);
+ builder_.add_sparsity(sparsity);
+ builder_.add_quantization(quantization);
+ builder_.add_name(name);
+ builder_.add_buffer(buffer);
+ builder_.add_shape(shape);
+ builder_.add_is_variable(is_variable);
+ builder_.add_type(type);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Tensor> CreateTensorDirect(
+ flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
+ TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr,
+ flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false,
+ flatbuffers::Offset<SparsityParameters> sparsity = 0,
+ const std::vector<int32_t> *shape_signature = nullptr)
+{
+ return onert_tflite::CreateTensor(
+ _fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, buffer,
+ name ? _fbb.CreateString(name) : 0, quantization, is_variable, sparsity,
+ shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0);
+}
+
+struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_PADDING = 4,
+ VT_STRIDE_W = 6,
+ VT_STRIDE_H = 8,
+ VT_FUSED_ACTIVATION_FUNCTION = 10,
+ VT_DILATION_W_FACTOR = 12,
+ VT_DILATION_H_FACTOR = 14
+ };
+ Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+ int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+ int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
+ int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
+ VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable();
+ }
+};
+
+struct Conv2DOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_padding(Padding padding)
+ {
+ fbb_.AddElement<int8_t>(Conv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+ }
+ void add_stride_w(int32_t stride_w)
+ {
+ fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_W, stride_w, 0);
+ }
+ void add_stride_h(int32_t stride_h)
+ {
+ fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_H, stride_h, 0);
+ }
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_dilation_w_factor(int32_t dilation_w_factor)
+ {
+ fbb_.AddElement<int32_t>(Conv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
+ }
+ void add_dilation_h_factor(int32_t dilation_h_factor)
+ {
+ fbb_.AddElement<int32_t>(Conv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
+ }
+ explicit Conv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ Conv2DOptionsBuilder &operator=(const Conv2DOptionsBuilder &);
+ flatbuffers::Offset<Conv2DOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Conv2DOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Conv2DOptions>
+CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
+ int32_t stride_w = 0, int32_t stride_h = 0,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+{
+ Conv2DOptionsBuilder builder_(_fbb);
+ builder_.add_dilation_h_factor(dilation_h_factor);
+ builder_.add_dilation_w_factor(dilation_w_factor);
+ builder_.add_stride_h(stride_h);
+ builder_.add_stride_w(stride_w);
+ builder_.add_fused_activation_function(fused_activation_function);
+ builder_.add_padding(padding);
+ return builder_.Finish();
+}
+
+struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_PADDING = 4,
+ VT_STRIDE_W = 6,
+ VT_STRIDE_H = 8,
+ VT_FILTER_WIDTH = 10,
+ VT_FILTER_HEIGHT = 12,
+ VT_FUSED_ACTIVATION_FUNCTION = 14
+ };
+ Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+ int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+ int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+ int32_t filter_width() const { return GetField<int32_t>(VT_FILTER_WIDTH, 0); }
+ int32_t filter_height() const { return GetField<int32_t>(VT_FILTER_HEIGHT, 0); }
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+ VerifyField<int32_t>(verifier, VT_FILTER_WIDTH) &&
+ VerifyField<int32_t>(verifier, VT_FILTER_HEIGHT) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+ }
+};
+
+struct Pool2DOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_padding(Padding padding)
+ {
+ fbb_.AddElement<int8_t>(Pool2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+ }
+ void add_stride_w(int32_t stride_w)
+ {
+ fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_W, stride_w, 0);
+ }
+ void add_stride_h(int32_t stride_h)
+ {
+ fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_H, stride_h, 0);
+ }
+ void add_filter_width(int32_t filter_width)
+ {
+ fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_WIDTH, filter_width, 0);
+ }
+ void add_filter_height(int32_t filter_height)
+ {
+ fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0);
+ }
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ explicit Pool2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ Pool2DOptionsBuilder &operator=(const Pool2DOptionsBuilder &);
+ flatbuffers::Offset<Pool2DOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Pool2DOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Pool2DOptions>
+CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
+ int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0,
+ int32_t filter_height = 0,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+{
+ Pool2DOptionsBuilder builder_(_fbb);
+ builder_.add_filter_height(filter_height);
+ builder_.add_filter_width(filter_width);
+ builder_.add_stride_h(stride_h);
+ builder_.add_stride_w(stride_w);
+ builder_.add_fused_activation_function(fused_activation_function);
+ builder_.add_padding(padding);
+ return builder_.Finish();
+}
+
+struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_PADDING = 4,
+ VT_STRIDE_W = 6,
+ VT_STRIDE_H = 8,
+ VT_DEPTH_MULTIPLIER = 10,
+ VT_FUSED_ACTIVATION_FUNCTION = 12,
+ VT_DILATION_W_FACTOR = 14,
+ VT_DILATION_H_FACTOR = 16
+ };
+ Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+ int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+ int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+ int32_t depth_multiplier() const { return GetField<int32_t>(VT_DEPTH_MULTIPLIER, 0); }
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
+ int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+ VerifyField<int32_t>(verifier, VT_DEPTH_MULTIPLIER) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
+ VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable();
+ }
+};
+
+struct DepthwiseConv2DOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_padding(Padding padding)
+ {
+ fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+ }
+ void add_stride_w(int32_t stride_w)
+ {
+ fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_W, stride_w, 0);
+ }
+ void add_stride_h(int32_t stride_h)
+ {
+ fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_H, stride_h, 0);
+ }
+ void add_depth_multiplier(int32_t depth_multiplier)
+ {
+ fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, depth_multiplier, 0);
+ }
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_dilation_w_factor(int32_t dilation_w_factor)
+ {
+ fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
+ }
+ void add_dilation_h_factor(int32_t dilation_h_factor)
+ {
+ fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
+ }
+ explicit DepthwiseConv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ DepthwiseConv2DOptionsBuilder &operator=(const DepthwiseConv2DOptionsBuilder &);
+ flatbuffers::Offset<DepthwiseConv2DOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<DepthwiseConv2DOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
+ flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0,
+ int32_t stride_h = 0, int32_t depth_multiplier = 0,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+{
+ DepthwiseConv2DOptionsBuilder builder_(_fbb);
+ builder_.add_dilation_h_factor(dilation_h_factor);
+ builder_.add_dilation_w_factor(dilation_w_factor);
+ builder_.add_depth_multiplier(depth_multiplier);
+ builder_.add_stride_h(stride_h);
+ builder_.add_stride_w(stride_w);
+ builder_.add_fused_activation_function(fused_activation_function);
+ builder_.add_padding(padding);
+ return builder_.Finish();
+}
+
+struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_NUM_CHANNELS = 4,
+ VT_NUM_COLUMNS_PER_CHANNEL = 6,
+ VT_EMBEDDING_DIM_PER_CHANNEL = 8
+ };
+ int32_t num_channels() const { return GetField<int32_t>(VT_NUM_CHANNELS, 0); }
+ const flatbuffers::Vector<int32_t> *num_columns_per_channel() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NUM_COLUMNS_PER_CHANNEL);
+ }
+ const flatbuffers::Vector<int32_t> *embedding_dim_per_channel() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_EMBEDDING_DIM_PER_CHANNEL);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_CHANNELS) &&
+ VerifyOffset(verifier, VT_NUM_COLUMNS_PER_CHANNEL) &&
+ verifier.VerifyVector(num_columns_per_channel()) &&
+ VerifyOffset(verifier, VT_EMBEDDING_DIM_PER_CHANNEL) &&
+ verifier.VerifyVector(embedding_dim_per_channel()) && verifier.EndTable();
+ }
+};
+
+struct ConcatEmbeddingsOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_num_channels(int32_t num_channels)
+ {
+ fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0);
+ }
+ void add_num_columns_per_channel(
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
+ {
+ fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel);
+ }
+ void add_embedding_dim_per_channel(
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
+ {
+ fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL,
+ embedding_dim_per_channel);
+ }
+ explicit ConcatEmbeddingsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ConcatEmbeddingsOptionsBuilder &operator=(const ConcatEmbeddingsOptionsBuilder &);
+ flatbuffers::Offset<ConcatEmbeddingsOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ConcatEmbeddingsOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(
+ flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
+{
+ ConcatEmbeddingsOptionsBuilder builder_(_fbb);
+ builder_.add_embedding_dim_per_channel(embedding_dim_per_channel);
+ builder_.add_num_columns_per_channel(num_columns_per_channel);
+ builder_.add_num_channels(num_channels);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<ConcatEmbeddingsOptions>
+CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
+ const std::vector<int32_t> *num_columns_per_channel = nullptr,
+ const std::vector<int32_t> *embedding_dim_per_channel = nullptr)
+{
+ return onert_tflite::CreateConcatEmbeddingsOptions(
+ _fbb, num_channels,
+ num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0,
+ embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0);
+}
+
+struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_TYPE = 4
+ };
+ LSHProjectionType type() const
+ {
+ return static_cast<LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_TYPE) &&
+ verifier.EndTable();
+ }
+};
+
+struct LSHProjectionOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_type(LSHProjectionType type)
+ {
+ fbb_.AddElement<int8_t>(LSHProjectionOptions::VT_TYPE, static_cast<int8_t>(type), 0);
+ }
+ explicit LSHProjectionOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LSHProjectionOptionsBuilder &operator=(const LSHProjectionOptionsBuilder &);
+ flatbuffers::Offset<LSHProjectionOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LSHProjectionOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LSHProjectionOptions>
+CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ LSHProjectionType type = LSHProjectionType_UNKNOWN)
+{
+ LSHProjectionOptionsBuilder builder_(_fbb);
+ builder_.add_type(type);
+ return builder_.Finish();
+}
+
+struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_RANK = 4,
+ VT_FUSED_ACTIVATION_FUNCTION = 6,
+ VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
+ };
+ int32_t rank() const { return GetField<int32_t>(VT_RANK, 0); }
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool asymmetric_quantize_inputs() const
+ {
+ return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_RANK) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+ }
+};
+
+struct SVDFOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_rank(int32_t rank) { fbb_.AddElement<int32_t>(SVDFOptions::VT_RANK, rank, 0); }
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ fbb_.AddElement<uint8_t>(SVDFOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+ static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+ }
+ explicit SVDFOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SVDFOptionsBuilder &operator=(const SVDFOptionsBuilder &);
+ flatbuffers::Offset<SVDFOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SVDFOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SVDFOptions>
+CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ bool asymmetric_quantize_inputs = false)
+{
+ SVDFOptionsBuilder builder_(_fbb);
+ builder_.add_rank(rank);
+ builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4,
+ VT_ASYMMETRIC_QUANTIZE_INPUTS = 6
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool asymmetric_quantize_inputs() const
+ {
+ return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+ }
+};
+
+struct RNNOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ fbb_.AddElement<uint8_t>(RNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+ static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+ }
+ explicit RNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ RNNOptionsBuilder &operator=(const RNNOptionsBuilder &);
+ flatbuffers::Offset<RNNOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<RNNOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<RNNOptions>
+CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ bool asymmetric_quantize_inputs = false)
+{
+ RNNOptionsBuilder builder_(_fbb);
+ builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_TIME_MAJOR = 4,
+ VT_FUSED_ACTIVATION_FUNCTION = 6,
+ VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
+ };
+ bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool asymmetric_quantize_inputs() const
+ {
+ return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+ }
+};
+
+struct SequenceRNNOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_time_major(bool time_major)
+ {
+ fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major),
+ 0);
+ }
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+ static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+ }
+ explicit SequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SequenceRNNOptionsBuilder &operator=(const SequenceRNNOptionsBuilder &);
+ flatbuffers::Offset<SequenceRNNOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SequenceRNNOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
+ flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ bool asymmetric_quantize_inputs = false)
+{
+ SequenceRNNOptionsBuilder builder_(_fbb);
+ builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+ builder_.add_fused_activation_function(fused_activation_function);
+ builder_.add_time_major(time_major);
+ return builder_.Finish();
+}
+
+struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_TIME_MAJOR = 4,
+ VT_FUSED_ACTIVATION_FUNCTION = 6,
+ VT_MERGE_OUTPUTS = 8,
+ VT_ASYMMETRIC_QUANTIZE_INPUTS = 10
+ };
+ bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; }
+ bool asymmetric_quantize_inputs() const
+ {
+ return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) &&
+ VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+ }
+};
+
+struct BidirectionalSequenceRNNOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_time_major(bool time_major)
+ {
+ fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_TIME_MAJOR,
+ static_cast<uint8_t>(time_major), 0);
+ }
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_merge_outputs(bool merge_outputs)
+ {
+ fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_MERGE_OUTPUTS,
+ static_cast<uint8_t>(merge_outputs), 0);
+ }
+ void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+ static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+ }
+ explicit BidirectionalSequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ BidirectionalSequenceRNNOptionsBuilder &operator=(const BidirectionalSequenceRNNOptionsBuilder &);
+ flatbuffers::Offset<BidirectionalSequenceRNNOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<BidirectionalSequenceRNNOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(
+ flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
+{
+ BidirectionalSequenceRNNOptionsBuilder builder_(_fbb);
+ builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+ builder_.add_merge_outputs(merge_outputs);
+ builder_.add_fused_activation_function(fused_activation_function);
+ builder_.add_time_major(time_major);
+ return builder_.Finish();
+}
+
+struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4,
+ VT_WEIGHTS_FORMAT = 6,
+ VT_KEEP_NUM_DIMS = 8,
+ VT_ASYMMETRIC_QUANTIZE_INPUTS = 10
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ FullyConnectedOptionsWeightsFormat weights_format() const
+ {
+ return static_cast<FullyConnectedOptionsWeightsFormat>(GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
+ }
+ bool keep_num_dims() const { return GetField<uint8_t>(VT_KEEP_NUM_DIMS, 0) != 0; }
+ bool asymmetric_quantize_inputs() const
+ {
+ return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<int8_t>(verifier, VT_WEIGHTS_FORMAT) &&
+ VerifyField<uint8_t>(verifier, VT_KEEP_NUM_DIMS) &&
+ VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+ }
+};
+
+struct FullyConnectedOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_weights_format(FullyConnectedOptionsWeightsFormat weights_format)
+ {
+ fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_WEIGHTS_FORMAT,
+ static_cast<int8_t>(weights_format), 0);
+ }
+ void add_keep_num_dims(bool keep_num_dims)
+ {
+ fbb_.AddElement<uint8_t>(FullyConnectedOptions::VT_KEEP_NUM_DIMS,
+ static_cast<uint8_t>(keep_num_dims), 0);
+ }
+ void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ fbb_.AddElement<uint8_t>(FullyConnectedOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+ static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+ }
+ explicit FullyConnectedOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ FullyConnectedOptionsBuilder &operator=(const FullyConnectedOptionsBuilder &);
+ flatbuffers::Offset<FullyConnectedOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<FullyConnectedOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT,
+ bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
+{
+ FullyConnectedOptionsBuilder builder_(_fbb);
+ builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+ builder_.add_keep_num_dims(keep_num_dims);
+ builder_.add_weights_format(weights_format);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_BETA = 4
+ };
+ float beta() const { return GetField<float>(VT_BETA, 0.0f); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_BETA) &&
+ verifier.EndTable();
+ }
+};
+
+struct SoftmaxOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_beta(float beta) { fbb_.AddElement<float>(SoftmaxOptions::VT_BETA, beta, 0.0f); }
+ explicit SoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SoftmaxOptionsBuilder &operator=(const SoftmaxOptionsBuilder &);
+ flatbuffers::Offset<SoftmaxOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SoftmaxOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SoftmaxOptions>
+CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, float beta = 0.0f)
+{
+ SoftmaxOptionsBuilder builder_(_fbb);
+ builder_.add_beta(beta);
+ return builder_.Finish();
+}
+
+struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_AXIS = 4,
+ VT_FUSED_ACTIVATION_FUNCTION = 6
+ };
+ int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+ }
+};
+
+struct ConcatenationOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(ConcatenationOptions::VT_AXIS, axis, 0); }
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ explicit ConcatenationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ConcatenationOptionsBuilder &operator=(const ConcatenationOptionsBuilder &);
+ flatbuffers::Offset<ConcatenationOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ConcatenationOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
+ flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+{
+ ConcatenationOptionsBuilder builder_(_fbb);
+ builder_.add_axis(axis);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+ }
+};
+
+struct AddOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ AddOptionsBuilder &operator=(const AddOptionsBuilder &);
+ flatbuffers::Offset<AddOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<AddOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<AddOptions>
+CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+{
+ AddOptionsBuilder builder_(_fbb);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+ }
+};
+
+struct MulOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(MulOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ explicit MulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ MulOptionsBuilder &operator=(const MulOptionsBuilder &);
+ flatbuffers::Offset<MulOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<MulOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<MulOptions>
+CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+{
+ MulOptionsBuilder builder_(_fbb);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+ }
+};
+
+struct L2NormOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ explicit L2NormOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ L2NormOptionsBuilder &operator=(const L2NormOptionsBuilder &);
+ flatbuffers::Offset<L2NormOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<L2NormOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<L2NormOptions>
+CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+{
+ L2NormOptionsBuilder builder_(_fbb);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_RADIUS = 4,
+ VT_BIAS = 6,
+ VT_ALPHA = 8,
+ VT_BETA = 10
+ };
+ int32_t radius() const { return GetField<int32_t>(VT_RADIUS, 0); }
+ float bias() const { return GetField<float>(VT_BIAS, 0.0f); }
+ float alpha() const { return GetField<float>(VT_ALPHA, 0.0f); }
+ float beta() const { return GetField<float>(VT_BETA, 0.0f); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_RADIUS) &&
+ VerifyField<float>(verifier, VT_BIAS) && VerifyField<float>(verifier, VT_ALPHA) &&
+ VerifyField<float>(verifier, VT_BETA) && verifier.EndTable();
+ }
+};
+
+struct LocalResponseNormalizationOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_radius(int32_t radius)
+ {
+ fbb_.AddElement<int32_t>(LocalResponseNormalizationOptions::VT_RADIUS, radius, 0);
+ }
+ void add_bias(float bias)
+ {
+ fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BIAS, bias, 0.0f);
+ }
+ void add_alpha(float alpha)
+ {
+ fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_ALPHA, alpha, 0.0f);
+ }
+ void add_beta(float beta)
+ {
+ fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f);
+ }
+ explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LocalResponseNormalizationOptionsBuilder &
+ operator=(const LocalResponseNormalizationOptionsBuilder &);
+ flatbuffers::Offset<LocalResponseNormalizationOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LocalResponseNormalizationOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LocalResponseNormalizationOptions>
+CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t radius = 0,
+ float bias = 0.0f, float alpha = 0.0f, float beta = 0.0f)
+{
+ LocalResponseNormalizationOptionsBuilder builder_(_fbb);
+ builder_.add_beta(beta);
+ builder_.add_alpha(alpha);
+ builder_.add_bias(bias);
+ builder_.add_radius(radius);
+ return builder_.Finish();
+}
+
+struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4,
+ VT_CELL_CLIP = 6,
+ VT_PROJ_CLIP = 8,
+ VT_KERNEL_TYPE = 10,
+ VT_ASYMMETRIC_QUANTIZE_INPUTS = 12
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
+ float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
+ LSTMKernelType kernel_type() const
+ {
+ return static_cast<LSTMKernelType>(GetField<int8_t>(VT_KERNEL_TYPE, 0));
+ }
+ bool asymmetric_quantize_inputs() const
+ {
+ return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<float>(verifier, VT_CELL_CLIP) &&
+ VerifyField<float>(verifier, VT_PROJ_CLIP) &&
+ VerifyField<int8_t>(verifier, VT_KERNEL_TYPE) &&
+ VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+ }
+};
+
+struct LSTMOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_cell_clip(float cell_clip)
+ {
+ fbb_.AddElement<float>(LSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
+ }
+ void add_proj_clip(float proj_clip)
+ {
+ fbb_.AddElement<float>(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
+ }
+ void add_kernel_type(LSTMKernelType kernel_type)
+ {
+ fbb_.AddElement<int8_t>(LSTMOptions::VT_KERNEL_TYPE, static_cast<int8_t>(kernel_type), 0);
+ }
+ void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ fbb_.AddElement<uint8_t>(LSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+ static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+ }
+ explicit LSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LSTMOptionsBuilder &operator=(const LSTMOptionsBuilder &);
+ flatbuffers::Offset<LSTMOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LSTMOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LSTMOptions>
+CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ float cell_clip = 0.0f, float proj_clip = 0.0f,
+ LSTMKernelType kernel_type = LSTMKernelType_FULL,
+ bool asymmetric_quantize_inputs = false)
+{
+ LSTMOptionsBuilder builder_(_fbb);
+ builder_.add_proj_clip(proj_clip);
+ builder_.add_cell_clip(cell_clip);
+ builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+ builder_.add_kernel_type(kernel_type);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4,
+ VT_CELL_CLIP = 6,
+ VT_PROJ_CLIP = 8,
+ VT_TIME_MAJOR = 10,
+ VT_ASYMMETRIC_QUANTIZE_INPUTS = 12
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
+ float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
+ bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
+ bool asymmetric_quantize_inputs() const
+ {
+ return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<float>(verifier, VT_CELL_CLIP) &&
+ VerifyField<float>(verifier, VT_PROJ_CLIP) &&
+ VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
+ VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+ }
+};
+
+struct UnidirectionalSequenceLSTMOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_cell_clip(float cell_clip)
+ {
+ fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
+ }
+ void add_proj_clip(float proj_clip)
+ {
+ fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
+ }
+ void add_time_major(bool time_major)
+ {
+ fbb_.AddElement<uint8_t>(UnidirectionalSequenceLSTMOptions::VT_TIME_MAJOR,
+ static_cast<uint8_t>(time_major), 0);
+ }
+ void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ fbb_.AddElement<uint8_t>(UnidirectionalSequenceLSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+ static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+ }
+ explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ UnidirectionalSequenceLSTMOptionsBuilder &
+ operator=(const UnidirectionalSequenceLSTMOptionsBuilder &);
+ flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>
+CreateUnidirectionalSequenceLSTMOptions(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
+ bool asymmetric_quantize_inputs = false)
+{
+ UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
+ builder_.add_proj_clip(proj_clip);
+ builder_.add_cell_clip(cell_clip);
+ builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+ builder_.add_time_major(time_major);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4,
+ VT_CELL_CLIP = 6,
+ VT_PROJ_CLIP = 8,
+ VT_MERGE_OUTPUTS = 10,
+ VT_TIME_MAJOR = 12,
+ VT_ASYMMETRIC_QUANTIZE_INPUTS = 14
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
+ float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
+ bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; }
+ bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 1) != 0; }
+ bool asymmetric_quantize_inputs() const
+ {
+ return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<float>(verifier, VT_CELL_CLIP) &&
+ VerifyField<float>(verifier, VT_PROJ_CLIP) &&
+ VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) &&
+ VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
+ VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+ }
+};
+
+struct BidirectionalSequenceLSTMOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_cell_clip(float cell_clip)
+ {
+ fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
+ }
+ void add_proj_clip(float proj_clip)
+ {
+ fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
+ }
+ void add_merge_outputs(bool merge_outputs)
+ {
+ fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_MERGE_OUTPUTS,
+ static_cast<uint8_t>(merge_outputs), 0);
+ }
+ void add_time_major(bool time_major)
+ {
+ fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_TIME_MAJOR,
+ static_cast<uint8_t>(time_major), 1);
+ }
+ void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+ static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+ }
+ explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+ : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ BidirectionalSequenceLSTMOptionsBuilder &
+ operator=(const BidirectionalSequenceLSTMOptionsBuilder &);
+ flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<BidirectionalSequenceLSTMOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
+ bool time_major = true, bool asymmetric_quantize_inputs = false)
+{
+ BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
+ builder_.add_proj_clip(proj_clip);
+ builder_.add_cell_clip(cell_clip);
+ builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+ builder_.add_time_major(time_major);
+ builder_.add_merge_outputs(merge_outputs);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_ALIGN_CORNERS = 8,
+ VT_HALF_PIXEL_CENTERS = 10
+ };
+ bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; }
+ bool half_pixel_centers() const { return GetField<uint8_t>(VT_HALF_PIXEL_CENTERS, 0) != 0; }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) &&
+ VerifyField<uint8_t>(verifier, VT_HALF_PIXEL_CENTERS) && verifier.EndTable();
+ }
+};
+
+struct ResizeBilinearOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_align_corners(bool align_corners)
+ {
+ fbb_.AddElement<uint8_t>(ResizeBilinearOptions::VT_ALIGN_CORNERS,
+ static_cast<uint8_t>(align_corners), 0);
+ }
+ void add_half_pixel_centers(bool half_pixel_centers)
+ {
+ fbb_.AddElement<uint8_t>(ResizeBilinearOptions::VT_HALF_PIXEL_CENTERS,
+ static_cast<uint8_t>(half_pixel_centers), 0);
+ }
+ explicit ResizeBilinearOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ResizeBilinearOptionsBuilder &operator=(const ResizeBilinearOptionsBuilder &);
+ flatbuffers::Offset<ResizeBilinearOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ResizeBilinearOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ResizeBilinearOptions>
+CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false,
+ bool half_pixel_centers = false)
+{
+ ResizeBilinearOptionsBuilder builder_(_fbb);
+ builder_.add_half_pixel_centers(half_pixel_centers);
+ builder_.add_align_corners(align_corners);
+ return builder_.Finish();
+}
+
+struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_ALIGN_CORNERS = 4
+ };
+ bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) &&
+ verifier.EndTable();
+ }
+};
+
+struct ResizeNearestNeighborOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_align_corners(bool align_corners)
+ {
+ fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS,
+ static_cast<uint8_t>(align_corners), 0);
+ }
+ explicit ResizeNearestNeighborOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ResizeNearestNeighborOptionsBuilder &operator=(const ResizeNearestNeighborOptionsBuilder &);
+ flatbuffers::Offset<ResizeNearestNeighborOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ResizeNearestNeighborOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ResizeNearestNeighborOptions>
+CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false)
+{
+ ResizeNearestNeighborOptionsBuilder builder_(_fbb);
+ builder_.add_align_corners(align_corners);
+ return builder_.Finish();
+}
+
+struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_SUBGRAPH = 4
+ };
+ uint32_t subgraph() const { return GetField<uint32_t>(VT_SUBGRAPH, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_SUBGRAPH) &&
+ verifier.EndTable();
+ }
+};
+
+struct CallOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_subgraph(uint32_t subgraph)
+ {
+ fbb_.AddElement<uint32_t>(CallOptions::VT_SUBGRAPH, subgraph, 0);
+ }
+ explicit CallOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ CallOptionsBuilder &operator=(const CallOptionsBuilder &);
+ flatbuffers::Offset<CallOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<CallOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<CallOptions> CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ uint32_t subgraph = 0)
+{
+ CallOptionsBuilder builder_(_fbb);
+ builder_.add_subgraph(subgraph);
+ return builder_.Finish();
+}
+
+struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct PadOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit PadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ PadOptionsBuilder &operator=(const PadOptionsBuilder &);
+ flatbuffers::Offset<PadOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<PadOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<PadOptions> CreatePadOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ PadOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct PadV2OptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit PadV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ PadV2OptionsBuilder &operator=(const PadV2OptionsBuilder &);
+ flatbuffers::Offset<PadV2Options> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<PadV2Options>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<PadV2Options> CreatePadV2Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ PadV2OptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_NEW_SHAPE = 4
+ };
+ const flatbuffers::Vector<int32_t> *new_shape() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NEW_SHAPE);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NEW_SHAPE) &&
+ verifier.VerifyVector(new_shape()) && verifier.EndTable();
+ }
+};
+
+struct ReshapeOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_new_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape)
+ {
+ fbb_.AddOffset(ReshapeOptions::VT_NEW_SHAPE, new_shape);
+ }
+ explicit ReshapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ReshapeOptionsBuilder &operator=(const ReshapeOptionsBuilder &);
+ flatbuffers::Offset<ReshapeOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ReshapeOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ReshapeOptions>
+CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape = 0)
+{
+ ReshapeOptionsBuilder builder_(_fbb);
+ builder_.add_new_shape(new_shape);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<ReshapeOptions>
+CreateReshapeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<int32_t> *new_shape = nullptr)
+{
+ return onert_tflite::CreateReshapeOptions(_fbb,
+ new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0);
+}
+
+struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct SpaceToBatchNDOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit SpaceToBatchNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SpaceToBatchNDOptionsBuilder &operator=(const SpaceToBatchNDOptionsBuilder &);
+ flatbuffers::Offset<SpaceToBatchNDOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SpaceToBatchNDOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SpaceToBatchNDOptions>
+CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ SpaceToBatchNDOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct BatchToSpaceNDOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit BatchToSpaceNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ BatchToSpaceNDOptionsBuilder &operator=(const BatchToSpaceNDOptionsBuilder &);
+ flatbuffers::Offset<BatchToSpaceNDOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<BatchToSpaceNDOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<BatchToSpaceNDOptions>
+CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ BatchToSpaceNDOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_NGRAM_SIZE = 4,
+ VT_MAX_SKIP_SIZE = 6,
+ VT_INCLUDE_ALL_NGRAMS = 8
+ };
+ int32_t ngram_size() const { return GetField<int32_t>(VT_NGRAM_SIZE, 0); }
+ int32_t max_skip_size() const { return GetField<int32_t>(VT_MAX_SKIP_SIZE, 0); }
+ bool include_all_ngrams() const { return GetField<uint8_t>(VT_INCLUDE_ALL_NGRAMS, 0) != 0; }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NGRAM_SIZE) &&
+ VerifyField<int32_t>(verifier, VT_MAX_SKIP_SIZE) &&
+ VerifyField<uint8_t>(verifier, VT_INCLUDE_ALL_NGRAMS) && verifier.EndTable();
+ }
+};
+
+struct SkipGramOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_ngram_size(int32_t ngram_size)
+ {
+ fbb_.AddElement<int32_t>(SkipGramOptions::VT_NGRAM_SIZE, ngram_size, 0);
+ }
+ void add_max_skip_size(int32_t max_skip_size)
+ {
+ fbb_.AddElement<int32_t>(SkipGramOptions::VT_MAX_SKIP_SIZE, max_skip_size, 0);
+ }
+ void add_include_all_ngrams(bool include_all_ngrams)
+ {
+ fbb_.AddElement<uint8_t>(SkipGramOptions::VT_INCLUDE_ALL_NGRAMS,
+ static_cast<uint8_t>(include_all_ngrams), 0);
+ }
+ explicit SkipGramOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &);
+ flatbuffers::Offset<SkipGramOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SkipGramOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SkipGramOptions>
+CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t ngram_size = 0,
+ int32_t max_skip_size = 0, bool include_all_ngrams = false)
+{
+ SkipGramOptionsBuilder builder_(_fbb);
+ builder_.add_max_skip_size(max_skip_size);
+ builder_.add_ngram_size(ngram_size);
+ builder_.add_include_all_ngrams(include_all_ngrams);
+ return builder_.Finish();
+}
+
+struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_BLOCK_SIZE = 4
+ };
+ int32_t block_size() const { return GetField<int32_t>(VT_BLOCK_SIZE, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_BLOCK_SIZE) &&
+ verifier.EndTable();
+ }
+};
+
+struct SpaceToDepthOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_block_size(int32_t block_size)
+ {
+ fbb_.AddElement<int32_t>(SpaceToDepthOptions::VT_BLOCK_SIZE, block_size, 0);
+ }
+ explicit SpaceToDepthOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SpaceToDepthOptionsBuilder &operator=(const SpaceToDepthOptionsBuilder &);
+ flatbuffers::Offset<SpaceToDepthOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SpaceToDepthOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SpaceToDepthOptions>
+CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_size = 0)
+{
+ SpaceToDepthOptionsBuilder builder_(_fbb);
+ builder_.add_block_size(block_size);
+ return builder_.Finish();
+}
+
+struct DepthToSpaceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_BLOCK_SIZE = 4
+ };
+ int32_t block_size() const { return GetField<int32_t>(VT_BLOCK_SIZE, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_BLOCK_SIZE) &&
+ verifier.EndTable();
+ }
+};
+
+struct DepthToSpaceOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_block_size(int32_t block_size)
+ {
+ fbb_.AddElement<int32_t>(DepthToSpaceOptions::VT_BLOCK_SIZE, block_size, 0);
+ }
+ explicit DepthToSpaceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ DepthToSpaceOptionsBuilder &operator=(const DepthToSpaceOptionsBuilder &);
+ flatbuffers::Offset<DepthToSpaceOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<DepthToSpaceOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<DepthToSpaceOptions>
+CreateDepthToSpaceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_size = 0)
+{
+ DepthToSpaceOptionsBuilder builder_(_fbb);
+ builder_.add_block_size(block_size);
+ return builder_.Finish();
+}
+
+struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+ }
+};
+
+struct SubOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(SubOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SubOptionsBuilder &operator=(const SubOptionsBuilder &);
+ flatbuffers::Offset<SubOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SubOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SubOptions>
+CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+{
+ SubOptionsBuilder builder_(_fbb);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_FUSED_ACTIVATION_FUNCTION = 4
+ };
+ ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+ }
+};
+
+struct DivOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(DivOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ explicit DivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ DivOptionsBuilder &operator=(const DivOptionsBuilder &);
+ flatbuffers::Offset<DivOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<DivOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<DivOptions>
+CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+{
+ DivOptionsBuilder builder_(_fbb);
+ builder_.add_fused_activation_function(fused_activation_function);
+ return builder_.Finish();
+}
+
+struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct TopKV2OptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit TopKV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ TopKV2OptionsBuilder &operator=(const TopKV2OptionsBuilder &);
+ flatbuffers::Offset<TopKV2Options> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<TopKV2Options>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<TopKV2Options> CreateTopKV2Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ TopKV2OptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_COMBINER = 4
+ };
+ CombinerType combiner() const
+ {
+ return static_cast<CombinerType>(GetField<int8_t>(VT_COMBINER, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_COMBINER) &&
+ verifier.EndTable();
+ }
+};
+
+struct EmbeddingLookupSparseOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_combiner(CombinerType combiner)
+ {
+ fbb_.AddElement<int8_t>(EmbeddingLookupSparseOptions::VT_COMBINER,
+ static_cast<int8_t>(combiner), 0);
+ }
+ explicit EmbeddingLookupSparseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ EmbeddingLookupSparseOptionsBuilder &operator=(const EmbeddingLookupSparseOptionsBuilder &);
+ flatbuffers::Offset<EmbeddingLookupSparseOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<EmbeddingLookupSparseOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<EmbeddingLookupSparseOptions>
+CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ CombinerType combiner = CombinerType_SUM)
+{
+ EmbeddingLookupSparseOptionsBuilder builder_(_fbb);
+ builder_.add_combiner(combiner);
+ return builder_.Finish();
+}
+
+struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_AXIS = 4
+ };
+ int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
+ verifier.EndTable();
+ }
+};
+
+struct GatherOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(GatherOptions::VT_AXIS, axis, 0); }
+ explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ GatherOptionsBuilder &operator=(const GatherOptionsBuilder &);
+ flatbuffers::Offset<GatherOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<GatherOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t axis = 0)
+{
+ GatherOptionsBuilder builder_(_fbb);
+ builder_.add_axis(axis);
+ return builder_.Finish();
+}
+
+struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct TransposeOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit TransposeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ TransposeOptionsBuilder &operator=(const TransposeOptionsBuilder &);
+ flatbuffers::Offset<TransposeOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<TransposeOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<TransposeOptions>
+CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ TransposeOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct ExpOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit ExpOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ExpOptionsBuilder &operator=(const ExpOptionsBuilder &);
+ flatbuffers::Offset<ExpOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ExpOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ExpOptions> CreateExpOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ ExpOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct CosOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct CosOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit CosOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ CosOptionsBuilder &operator=(const CosOptionsBuilder &);
+ flatbuffers::Offset<CosOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<CosOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<CosOptions> CreateCosOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ CosOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_KEEP_DIMS = 4
+ };
+ bool keep_dims() const { return GetField<uint8_t>(VT_KEEP_DIMS, 0) != 0; }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_KEEP_DIMS) &&
+ verifier.EndTable();
+ }
+};
+
+struct ReducerOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_keep_dims(bool keep_dims)
+ {
+ fbb_.AddElement<uint8_t>(ReducerOptions::VT_KEEP_DIMS, static_cast<uint8_t>(keep_dims), 0);
+ }
+ explicit ReducerOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ReducerOptionsBuilder &operator=(const ReducerOptionsBuilder &);
+ flatbuffers::Offset<ReducerOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ReducerOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ReducerOptions>
+CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, bool keep_dims = false)
+{
+ ReducerOptionsBuilder builder_(_fbb);
+ builder_.add_keep_dims(keep_dims);
+ return builder_.Finish();
+}
+
+struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_SQUEEZE_DIMS = 4
+ };
+ const flatbuffers::Vector<int32_t> *squeeze_dims() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SQUEEZE_DIMS);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SQUEEZE_DIMS) &&
+ verifier.VerifyVector(squeeze_dims()) && verifier.EndTable();
+ }
+};
+
+struct SqueezeOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_squeeze_dims(flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims)
+ {
+ fbb_.AddOffset(SqueezeOptions::VT_SQUEEZE_DIMS, squeeze_dims);
+ }
+ explicit SqueezeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SqueezeOptionsBuilder &operator=(const SqueezeOptionsBuilder &);
+ flatbuffers::Offset<SqueezeOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SqueezeOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SqueezeOptions>
+CreateSqueezeOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims = 0)
+{
+ SqueezeOptionsBuilder builder_(_fbb);
+ builder_.add_squeeze_dims(squeeze_dims);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SqueezeOptions>
+CreateSqueezeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<int32_t> *squeeze_dims = nullptr)
+{
+ return onert_tflite::CreateSqueezeOptions(
+ _fbb, squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0);
+}
+
+struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_NUM_SPLITS = 4
+ };
+ int32_t num_splits() const { return GetField<int32_t>(VT_NUM_SPLITS, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_SPLITS) &&
+ verifier.EndTable();
+ }
+};
+
+struct SplitOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_num_splits(int32_t num_splits)
+ {
+ fbb_.AddElement<int32_t>(SplitOptions::VT_NUM_SPLITS, num_splits, 0);
+ }
+ explicit SplitOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SplitOptionsBuilder &operator=(const SplitOptionsBuilder &);
+ flatbuffers::Offset<SplitOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SplitOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SplitOptions> CreateSplitOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t num_splits = 0)
+{
+ SplitOptionsBuilder builder_(_fbb);
+ builder_.add_num_splits(num_splits);
+ return builder_.Finish();
+}
+
+struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_NUM_SPLITS = 4
+ };
+ int32_t num_splits() const { return GetField<int32_t>(VT_NUM_SPLITS, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_SPLITS) &&
+ verifier.EndTable();
+ }
+};
+
+struct SplitVOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_num_splits(int32_t num_splits)
+ {
+ fbb_.AddElement<int32_t>(SplitVOptions::VT_NUM_SPLITS, num_splits, 0);
+ }
+ explicit SplitVOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SplitVOptionsBuilder &operator=(const SplitVOptionsBuilder &);
+ flatbuffers::Offset<SplitVOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SplitVOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SplitVOptions> CreateSplitVOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t num_splits = 0)
+{
+ SplitVOptionsBuilder builder_(_fbb);
+ builder_.add_num_splits(num_splits);
+ return builder_.Finish();
+}
+
+struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_BEGIN_MASK = 4,
+ VT_END_MASK = 6,
+ VT_ELLIPSIS_MASK = 8,
+ VT_NEW_AXIS_MASK = 10,
+ VT_SHRINK_AXIS_MASK = 12
+ };
+ int32_t begin_mask() const { return GetField<int32_t>(VT_BEGIN_MASK, 0); }
+ int32_t end_mask() const { return GetField<int32_t>(VT_END_MASK, 0); }
+ int32_t ellipsis_mask() const { return GetField<int32_t>(VT_ELLIPSIS_MASK, 0); }
+ int32_t new_axis_mask() const { return GetField<int32_t>(VT_NEW_AXIS_MASK, 0); }
+ int32_t shrink_axis_mask() const { return GetField<int32_t>(VT_SHRINK_AXIS_MASK, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_BEGIN_MASK) &&
+ VerifyField<int32_t>(verifier, VT_END_MASK) &&
+ VerifyField<int32_t>(verifier, VT_ELLIPSIS_MASK) &&
+ VerifyField<int32_t>(verifier, VT_NEW_AXIS_MASK) &&
+ VerifyField<int32_t>(verifier, VT_SHRINK_AXIS_MASK) && verifier.EndTable();
+ }
+};
+
+struct StridedSliceOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_begin_mask(int32_t begin_mask)
+ {
+ fbb_.AddElement<int32_t>(StridedSliceOptions::VT_BEGIN_MASK, begin_mask, 0);
+ }
+ void add_end_mask(int32_t end_mask)
+ {
+ fbb_.AddElement<int32_t>(StridedSliceOptions::VT_END_MASK, end_mask, 0);
+ }
+ void add_ellipsis_mask(int32_t ellipsis_mask)
+ {
+ fbb_.AddElement<int32_t>(StridedSliceOptions::VT_ELLIPSIS_MASK, ellipsis_mask, 0);
+ }
+ void add_new_axis_mask(int32_t new_axis_mask)
+ {
+ fbb_.AddElement<int32_t>(StridedSliceOptions::VT_NEW_AXIS_MASK, new_axis_mask, 0);
+ }
+ void add_shrink_axis_mask(int32_t shrink_axis_mask)
+ {
+ fbb_.AddElement<int32_t>(StridedSliceOptions::VT_SHRINK_AXIS_MASK, shrink_axis_mask, 0);
+ }
+ explicit StridedSliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ StridedSliceOptionsBuilder &operator=(const StridedSliceOptionsBuilder &);
+ flatbuffers::Offset<StridedSliceOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<StridedSliceOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<StridedSliceOptions>
+CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t begin_mask = 0,
+ int32_t end_mask = 0, int32_t ellipsis_mask = 0,
+ int32_t new_axis_mask = 0, int32_t shrink_axis_mask = 0)
+{
+ StridedSliceOptionsBuilder builder_(_fbb);
+ builder_.add_shrink_axis_mask(shrink_axis_mask);
+ builder_.add_new_axis_mask(new_axis_mask);
+ builder_.add_ellipsis_mask(ellipsis_mask);
+ builder_.add_end_mask(end_mask);
+ builder_.add_begin_mask(begin_mask);
+ return builder_.Finish();
+}
+
+struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct LogSoftmaxOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit LogSoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LogSoftmaxOptionsBuilder &operator=(const LogSoftmaxOptionsBuilder &);
+ flatbuffers::Offset<LogSoftmaxOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LogSoftmaxOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LogSoftmaxOptions>
+CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ LogSoftmaxOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_IN_DATA_TYPE = 4,
+ VT_OUT_DATA_TYPE = 6
+ };
+ TensorType in_data_type() const
+ {
+ return static_cast<TensorType>(GetField<int8_t>(VT_IN_DATA_TYPE, 0));
+ }
+ TensorType out_data_type() const
+ {
+ return static_cast<TensorType>(GetField<int8_t>(VT_OUT_DATA_TYPE, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_IN_DATA_TYPE) &&
+ VerifyField<int8_t>(verifier, VT_OUT_DATA_TYPE) && verifier.EndTable();
+ }
+};
+
+struct CastOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_in_data_type(TensorType in_data_type)
+ {
+ fbb_.AddElement<int8_t>(CastOptions::VT_IN_DATA_TYPE, static_cast<int8_t>(in_data_type), 0);
+ }
+ void add_out_data_type(TensorType out_data_type)
+ {
+ fbb_.AddElement<int8_t>(CastOptions::VT_OUT_DATA_TYPE, static_cast<int8_t>(out_data_type), 0);
+ }
+ explicit CastOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ CastOptionsBuilder &operator=(const CastOptionsBuilder &);
+ flatbuffers::Offset<CastOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<CastOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<CastOptions>
+CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ TensorType in_data_type = TensorType_FLOAT32,
+ TensorType out_data_type = TensorType_FLOAT32)
+{
+ CastOptionsBuilder builder_(_fbb);
+ builder_.add_out_data_type(out_data_type);
+ builder_.add_in_data_type(in_data_type);
+ return builder_.Finish();
+}
+
+struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct DequantizeOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit DequantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ DequantizeOptionsBuilder &operator=(const DequantizeOptionsBuilder &);
+ flatbuffers::Offset<DequantizeOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<DequantizeOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<DequantizeOptions>
+CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ DequantizeOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct MaximumMinimumOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit MaximumMinimumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ MaximumMinimumOptionsBuilder &operator=(const MaximumMinimumOptionsBuilder &);
+ flatbuffers::Offset<MaximumMinimumOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<MaximumMinimumOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<MaximumMinimumOptions>
+CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ MaximumMinimumOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct TileOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit TileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ TileOptionsBuilder &operator=(const TileOptionsBuilder &);
+ flatbuffers::Offset<TileOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<TileOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<TileOptions> CreateTileOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ TileOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_OUTPUT_TYPE = 4
+ };
+ TensorType output_type() const
+ {
+ return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUTPUT_TYPE) &&
+ verifier.EndTable();
+ }
+};
+
+struct ArgMaxOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_output_type(TensorType output_type)
+ {
+ fbb_.AddElement<int8_t>(ArgMaxOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
+ }
+ explicit ArgMaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ArgMaxOptionsBuilder &operator=(const ArgMaxOptionsBuilder &);
+ flatbuffers::Offset<ArgMaxOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ArgMaxOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ArgMaxOptions>
+CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ TensorType output_type = TensorType_FLOAT32)
+{
+ ArgMaxOptionsBuilder builder_(_fbb);
+ builder_.add_output_type(output_type);
+ return builder_.Finish();
+}
+
+struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_OUTPUT_TYPE = 4
+ };
+ TensorType output_type() const
+ {
+ return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUTPUT_TYPE) &&
+ verifier.EndTable();
+ }
+};
+
+struct ArgMinOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_output_type(TensorType output_type)
+ {
+ fbb_.AddElement<int8_t>(ArgMinOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
+ }
+ explicit ArgMinOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ArgMinOptionsBuilder &operator=(const ArgMinOptionsBuilder &);
+ flatbuffers::Offset<ArgMinOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ArgMinOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ArgMinOptions>
+CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ TensorType output_type = TensorType_FLOAT32)
+{
+ ArgMinOptionsBuilder builder_(_fbb);
+ builder_.add_output_type(output_type);
+ return builder_.Finish();
+}
+
+struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct GreaterOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit GreaterOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ GreaterOptionsBuilder &operator=(const GreaterOptionsBuilder &);
+ flatbuffers::Offset<GreaterOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<GreaterOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<GreaterOptions>
+CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ GreaterOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct GreaterEqualOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit GreaterEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ GreaterEqualOptionsBuilder &operator=(const GreaterEqualOptionsBuilder &);
+ flatbuffers::Offset<GreaterEqualOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<GreaterEqualOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<GreaterEqualOptions>
+CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ GreaterEqualOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct LessOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit LessOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LessOptionsBuilder &operator=(const LessOptionsBuilder &);
+ flatbuffers::Offset<LessOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LessOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LessOptions> CreateLessOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ LessOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct LessEqualOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit LessEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LessEqualOptionsBuilder &operator=(const LessEqualOptionsBuilder &);
+ flatbuffers::Offset<LessEqualOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LessEqualOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LessEqualOptions>
+CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ LessEqualOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct NegOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit NegOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ NegOptionsBuilder &operator=(const NegOptionsBuilder &);
+ flatbuffers::Offset<NegOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<NegOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<NegOptions> CreateNegOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ NegOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct SelectOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit SelectOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SelectOptionsBuilder &operator=(const SelectOptionsBuilder &);
+ flatbuffers::Offset<SelectOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SelectOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SelectOptions> CreateSelectOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ SelectOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct SliceOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit SliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SliceOptionsBuilder &operator=(const SliceOptionsBuilder &);
+ flatbuffers::Offset<SliceOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SliceOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SliceOptions> CreateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ SliceOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_PADDING = 4,
+ VT_STRIDE_W = 6,
+ VT_STRIDE_H = 8
+ };
+ Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+ int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+ int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_H) && verifier.EndTable();
+ }
+};
+
+struct TransposeConvOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_padding(Padding padding)
+ {
+ fbb_.AddElement<int8_t>(TransposeConvOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+ }
+ void add_stride_w(int32_t stride_w)
+ {
+ fbb_.AddElement<int32_t>(TransposeConvOptions::VT_STRIDE_W, stride_w, 0);
+ }
+ void add_stride_h(int32_t stride_h)
+ {
+ fbb_.AddElement<int32_t>(TransposeConvOptions::VT_STRIDE_H, stride_h, 0);
+ }
+ explicit TransposeConvOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ TransposeConvOptionsBuilder &operator=(const TransposeConvOptionsBuilder &);
+ flatbuffers::Offset<TransposeConvOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<TransposeConvOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<TransposeConvOptions>
+CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
+ int32_t stride_w = 0, int32_t stride_h = 0)
+{
+ TransposeConvOptionsBuilder builder_(_fbb);
+ builder_.add_stride_h(stride_h);
+ builder_.add_stride_w(stride_w);
+ builder_.add_padding(padding);
+ return builder_.Finish();
+}
+
+struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct ExpandDimsOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit ExpandDimsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ExpandDimsOptionsBuilder &operator=(const ExpandDimsOptionsBuilder &);
+ flatbuffers::Offset<ExpandDimsOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ExpandDimsOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ExpandDimsOptions>
+CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ ExpandDimsOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_VALIDATE_INDICES = 4
+ };
+ bool validate_indices() const { return GetField<uint8_t>(VT_VALIDATE_INDICES, 0) != 0; }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_VALIDATE_INDICES) &&
+ verifier.EndTable();
+ }
+};
+
+struct SparseToDenseOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_validate_indices(bool validate_indices)
+ {
+ fbb_.AddElement<uint8_t>(SparseToDenseOptions::VT_VALIDATE_INDICES,
+ static_cast<uint8_t>(validate_indices), 0);
+ }
+ explicit SparseToDenseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SparseToDenseOptionsBuilder &operator=(const SparseToDenseOptionsBuilder &);
+ flatbuffers::Offset<SparseToDenseOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SparseToDenseOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SparseToDenseOptions>
+CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, bool validate_indices = false)
+{
+ SparseToDenseOptionsBuilder builder_(_fbb);
+ builder_.add_validate_indices(validate_indices);
+ return builder_.Finish();
+}
+
+struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct EqualOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit EqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ EqualOptionsBuilder &operator=(const EqualOptionsBuilder &);
+ flatbuffers::Offset<EqualOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<EqualOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<EqualOptions> CreateEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ EqualOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct NotEqualOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit NotEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ NotEqualOptionsBuilder &operator=(const NotEqualOptionsBuilder &);
+ flatbuffers::Offset<NotEqualOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<NotEqualOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<NotEqualOptions>
+CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ NotEqualOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_OUT_TYPE = 4
+ };
+ TensorType out_type() const { return static_cast<TensorType>(GetField<int8_t>(VT_OUT_TYPE, 0)); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUT_TYPE) &&
+ verifier.EndTable();
+ }
+};
+
+struct ShapeOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_out_type(TensorType out_type)
+ {
+ fbb_.AddElement<int8_t>(ShapeOptions::VT_OUT_TYPE, static_cast<int8_t>(out_type), 0);
+ }
+ explicit ShapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ShapeOptionsBuilder &operator=(const ShapeOptionsBuilder &);
+ flatbuffers::Offset<ShapeOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ShapeOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ShapeOptions>
+CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, TensorType out_type = TensorType_FLOAT32)
+{
+ ShapeOptionsBuilder builder_(_fbb);
+ builder_.add_out_type(out_type);
+ return builder_.Finish();
+}
+
+struct RankOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct RankOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit RankOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ RankOptionsBuilder &operator=(const RankOptionsBuilder &);
+ flatbuffers::Offset<RankOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<RankOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<RankOptions> CreateRankOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ RankOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct PowOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit PowOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ PowOptionsBuilder &operator=(const PowOptionsBuilder &);
+ flatbuffers::Offset<PowOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<PowOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<PowOptions> CreatePowOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ PowOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_MIN = 4,
+ VT_MAX = 6,
+ VT_NUM_BITS = 8,
+ VT_NARROW_RANGE = 10
+ };
+ float min() const { return GetField<float>(VT_MIN, 0.0f); }
+ float max() const { return GetField<float>(VT_MAX, 0.0f); }
+ int32_t num_bits() const { return GetField<int32_t>(VT_NUM_BITS, 0); }
+ bool narrow_range() const { return GetField<uint8_t>(VT_NARROW_RANGE, 0) != 0; }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_MIN) &&
+ VerifyField<float>(verifier, VT_MAX) && VerifyField<int32_t>(verifier, VT_NUM_BITS) &&
+ VerifyField<uint8_t>(verifier, VT_NARROW_RANGE) && verifier.EndTable();
+ }
+};
+
+struct FakeQuantOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_min(float min) { fbb_.AddElement<float>(FakeQuantOptions::VT_MIN, min, 0.0f); }
+ void add_max(float max) { fbb_.AddElement<float>(FakeQuantOptions::VT_MAX, max, 0.0f); }
+ void add_num_bits(int32_t num_bits)
+ {
+ fbb_.AddElement<int32_t>(FakeQuantOptions::VT_NUM_BITS, num_bits, 0);
+ }
+ void add_narrow_range(bool narrow_range)
+ {
+ fbb_.AddElement<uint8_t>(FakeQuantOptions::VT_NARROW_RANGE, static_cast<uint8_t>(narrow_range),
+ 0);
+ }
+ explicit FakeQuantOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ FakeQuantOptionsBuilder &operator=(const FakeQuantOptionsBuilder &);
+ flatbuffers::Offset<FakeQuantOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<FakeQuantOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<FakeQuantOptions>
+CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, float min = 0.0f, float max = 0.0f,
+ int32_t num_bits = 0, bool narrow_range = false)
+{
+ FakeQuantOptionsBuilder builder_(_fbb);
+ builder_.add_num_bits(num_bits);
+ builder_.add_max(max);
+ builder_.add_min(min);
+ builder_.add_narrow_range(narrow_range);
+ return builder_.Finish();
+}
+
+struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_VALUES_COUNT = 4,
+ VT_AXIS = 6
+ };
+ int32_t values_count() const { return GetField<int32_t>(VT_VALUES_COUNT, 0); }
+ int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_VALUES_COUNT) &&
+ VerifyField<int32_t>(verifier, VT_AXIS) && verifier.EndTable();
+ }
+};
+
+struct PackOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_values_count(int32_t values_count)
+ {
+ fbb_.AddElement<int32_t>(PackOptions::VT_VALUES_COUNT, values_count, 0);
+ }
+ void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(PackOptions::VT_AXIS, axis, 0); }
+ explicit PackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ PackOptionsBuilder &operator=(const PackOptionsBuilder &);
+ flatbuffers::Offset<PackOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<PackOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<PackOptions>
+CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t values_count = 0, int32_t axis = 0)
+{
+ PackOptionsBuilder builder_(_fbb);
+ builder_.add_axis(axis);
+ builder_.add_values_count(values_count);
+ return builder_.Finish();
+}
+
+struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct LogicalOrOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit LogicalOrOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LogicalOrOptionsBuilder &operator=(const LogicalOrOptionsBuilder &);
+ flatbuffers::Offset<LogicalOrOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LogicalOrOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LogicalOrOptions>
+CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ LogicalOrOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_AXIS = 4
+ };
+ int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
+ verifier.EndTable();
+ }
+};
+
+struct OneHotOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(OneHotOptions::VT_AXIS, axis, 0); }
+ explicit OneHotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ OneHotOptionsBuilder &operator=(const OneHotOptionsBuilder &);
+ flatbuffers::Offset<OneHotOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<OneHotOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<OneHotOptions> CreateOneHotOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t axis = 0)
+{
+ OneHotOptionsBuilder builder_(_fbb);
+ builder_.add_axis(axis);
+ return builder_.Finish();
+}
+
+struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct AbsOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit AbsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ AbsOptionsBuilder &operator=(const AbsOptionsBuilder &);
+ flatbuffers::Offset<AbsOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<AbsOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<AbsOptions> CreateAbsOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ AbsOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct HardSwishOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct HardSwishOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit HardSwishOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ HardSwishOptionsBuilder &operator=(const HardSwishOptionsBuilder &);
+ flatbuffers::Offset<HardSwishOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<HardSwishOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<HardSwishOptions>
+CreateHardSwishOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ HardSwishOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct LogicalAndOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit LogicalAndOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LogicalAndOptionsBuilder &operator=(const LogicalAndOptionsBuilder &);
+ flatbuffers::Offset<LogicalAndOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LogicalAndOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LogicalAndOptions>
+CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ LogicalAndOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct LogicalNotOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit LogicalNotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LogicalNotOptionsBuilder &operator=(const LogicalNotOptionsBuilder &);
+ flatbuffers::Offset<LogicalNotOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LogicalNotOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LogicalNotOptions>
+CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ LogicalNotOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_NUM = 4,
+ VT_AXIS = 6
+ };
+ int32_t num() const { return GetField<int32_t>(VT_NUM, 0); }
+ int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM) &&
+ VerifyField<int32_t>(verifier, VT_AXIS) && verifier.EndTable();
+ }
+};
+
+struct UnpackOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_num(int32_t num) { fbb_.AddElement<int32_t>(UnpackOptions::VT_NUM, num, 0); }
+ void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(UnpackOptions::VT_AXIS, axis, 0); }
+ explicit UnpackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ UnpackOptionsBuilder &operator=(const UnpackOptionsBuilder &);
+ flatbuffers::Offset<UnpackOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<UnpackOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<UnpackOptions> CreateUnpackOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t num = 0, int32_t axis = 0)
+{
+ UnpackOptionsBuilder builder_(_fbb);
+ builder_.add_axis(axis);
+ builder_.add_num(num);
+ return builder_.Finish();
+}
+
+struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct FloorDivOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit FloorDivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ FloorDivOptionsBuilder &operator=(const FloorDivOptionsBuilder &);
+ flatbuffers::Offset<FloorDivOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<FloorDivOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<FloorDivOptions>
+CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ FloorDivOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct SquareOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit SquareOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SquareOptionsBuilder &operator=(const SquareOptionsBuilder &);
+ flatbuffers::Offset<SquareOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SquareOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SquareOptions> CreateSquareOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ SquareOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct ZerosLikeOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit ZerosLikeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ZerosLikeOptionsBuilder &operator=(const ZerosLikeOptionsBuilder &);
+ flatbuffers::Offset<ZerosLikeOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ZerosLikeOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ZerosLikeOptions>
+CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ ZerosLikeOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct FillOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit FillOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ FillOptionsBuilder &operator=(const FillOptionsBuilder &);
+ flatbuffers::Offset<FillOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<FillOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ FillOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct FloorModOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit FloorModOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ FloorModOptionsBuilder &operator=(const FloorModOptionsBuilder &);
+ flatbuffers::Offset<FloorModOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<FloorModOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<FloorModOptions>
+CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ FloorModOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct RangeOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit RangeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ RangeOptionsBuilder &operator=(const RangeOptionsBuilder &);
+ flatbuffers::Offset<RangeOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<RangeOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<RangeOptions> CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ RangeOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_ALPHA = 4
+ };
+ float alpha() const { return GetField<float>(VT_ALPHA, 0.0f); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_ALPHA) &&
+ verifier.EndTable();
+ }
+};
+
+struct LeakyReluOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_alpha(float alpha) { fbb_.AddElement<float>(LeakyReluOptions::VT_ALPHA, alpha, 0.0f); }
+ explicit LeakyReluOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ LeakyReluOptionsBuilder &operator=(const LeakyReluOptionsBuilder &);
+ flatbuffers::Offset<LeakyReluOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<LeakyReluOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<LeakyReluOptions>
+CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, float alpha = 0.0f)
+{
+ LeakyReluOptionsBuilder builder_(_fbb);
+ builder_.add_alpha(alpha);
+ return builder_.Finish();
+}
+
+struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct SquaredDifferenceOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit SquaredDifferenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SquaredDifferenceOptionsBuilder &operator=(const SquaredDifferenceOptionsBuilder &);
+ flatbuffers::Offset<SquaredDifferenceOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SquaredDifferenceOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SquaredDifferenceOptions>
+CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ SquaredDifferenceOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_MODE = 4
+ };
+ MirrorPadMode mode() const { return static_cast<MirrorPadMode>(GetField<int8_t>(VT_MODE, 0)); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_MODE) &&
+ verifier.EndTable();
+ }
+};
+
+struct MirrorPadOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_mode(MirrorPadMode mode)
+ {
+ fbb_.AddElement<int8_t>(MirrorPadOptions::VT_MODE, static_cast<int8_t>(mode), 0);
+ }
+ explicit MirrorPadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ MirrorPadOptionsBuilder &operator=(const MirrorPadOptionsBuilder &);
+ flatbuffers::Offset<MirrorPadOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<MirrorPadOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<MirrorPadOptions>
+CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ MirrorPadMode mode = MirrorPadMode_REFLECT)
+{
+ MirrorPadOptionsBuilder builder_(_fbb);
+ builder_.add_mode(mode);
+ return builder_.Finish();
+}
+
+struct UniqueOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_IDX_OUT_TYPE = 4
+ };
+ TensorType idx_out_type() const
+ {
+ return static_cast<TensorType>(GetField<int8_t>(VT_IDX_OUT_TYPE, 2));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_IDX_OUT_TYPE) &&
+ verifier.EndTable();
+ }
+};
+
+struct UniqueOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_idx_out_type(TensorType idx_out_type)
+ {
+ fbb_.AddElement<int8_t>(UniqueOptions::VT_IDX_OUT_TYPE, static_cast<int8_t>(idx_out_type), 2);
+ }
+ explicit UniqueOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ UniqueOptionsBuilder &operator=(const UniqueOptionsBuilder &);
+ flatbuffers::Offset<UniqueOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<UniqueOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<UniqueOptions>
+CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ TensorType idx_out_type = TensorType_INT32)
+{
+ UniqueOptionsBuilder builder_(_fbb);
+ builder_.add_idx_out_type(idx_out_type);
+ return builder_.Finish();
+}
+
+struct ReverseV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct ReverseV2OptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit ReverseV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ReverseV2OptionsBuilder &operator=(const ReverseV2OptionsBuilder &);
+ flatbuffers::Offset<ReverseV2Options> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ReverseV2Options>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ReverseV2Options>
+CreateReverseV2Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ ReverseV2OptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct AddNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct AddNOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit AddNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ AddNOptionsBuilder &operator=(const AddNOptionsBuilder &);
+ flatbuffers::Offset<AddNOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<AddNOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<AddNOptions> CreateAddNOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ AddNOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct GatherNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct GatherNdOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit GatherNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ GatherNdOptionsBuilder &operator=(const GatherNdOptionsBuilder &);
+ flatbuffers::Offset<GatherNdOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<GatherNdOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<GatherNdOptions>
+CreateGatherNdOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ GatherNdOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct WhereOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct WhereOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit WhereOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ WhereOptionsBuilder &operator=(const WhereOptionsBuilder &);
+ flatbuffers::Offset<WhereOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<WhereOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<WhereOptions> CreateWhereOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ WhereOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct ReverseSequenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_SEQ_DIM = 4,
+ VT_BATCH_DIM = 6
+ };
+ int32_t seq_dim() const { return GetField<int32_t>(VT_SEQ_DIM, 0); }
+ int32_t batch_dim() const { return GetField<int32_t>(VT_BATCH_DIM, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_SEQ_DIM) &&
+ VerifyField<int32_t>(verifier, VT_BATCH_DIM) && verifier.EndTable();
+ }
+};
+
+struct ReverseSequenceOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_seq_dim(int32_t seq_dim)
+ {
+ fbb_.AddElement<int32_t>(ReverseSequenceOptions::VT_SEQ_DIM, seq_dim, 0);
+ }
+ void add_batch_dim(int32_t batch_dim)
+ {
+ fbb_.AddElement<int32_t>(ReverseSequenceOptions::VT_BATCH_DIM, batch_dim, 0);
+ }
+ explicit ReverseSequenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ReverseSequenceOptionsBuilder &operator=(const ReverseSequenceOptionsBuilder &);
+ flatbuffers::Offset<ReverseSequenceOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ReverseSequenceOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ReverseSequenceOptions>
+CreateReverseSequenceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t seq_dim = 0,
+ int32_t batch_dim = 0)
+{
+ ReverseSequenceOptionsBuilder builder_(_fbb);
+ builder_.add_batch_dim(batch_dim);
+ builder_.add_seq_dim(seq_dim);
+ return builder_.Finish();
+}
+
+struct MatrixDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct MatrixDiagOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit MatrixDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ MatrixDiagOptionsBuilder &operator=(const MatrixDiagOptionsBuilder &);
+ flatbuffers::Offset<MatrixDiagOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<MatrixDiagOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<MatrixDiagOptions>
+CreateMatrixDiagOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ MatrixDiagOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct QuantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct QuantizeOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit QuantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ QuantizeOptionsBuilder &operator=(const QuantizeOptionsBuilder &);
+ flatbuffers::Offset<QuantizeOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<QuantizeOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<QuantizeOptions>
+CreateQuantizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ QuantizeOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct MatrixSetDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct MatrixSetDiagOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit MatrixSetDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ MatrixSetDiagOptionsBuilder &operator=(const MatrixSetDiagOptionsBuilder &);
+ flatbuffers::Offset<MatrixSetDiagOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<MatrixSetDiagOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<MatrixSetDiagOptions>
+CreateMatrixSetDiagOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ MatrixSetDiagOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct IfOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_THEN_SUBGRAPH_INDEX = 4,
+ VT_ELSE_SUBGRAPH_INDEX = 6
+ };
+ int32_t then_subgraph_index() const { return GetField<int32_t>(VT_THEN_SUBGRAPH_INDEX, 0); }
+ int32_t else_subgraph_index() const { return GetField<int32_t>(VT_ELSE_SUBGRAPH_INDEX, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_THEN_SUBGRAPH_INDEX) &&
+ VerifyField<int32_t>(verifier, VT_ELSE_SUBGRAPH_INDEX) && verifier.EndTable();
+ }
+};
+
+struct IfOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_then_subgraph_index(int32_t then_subgraph_index)
+ {
+ fbb_.AddElement<int32_t>(IfOptions::VT_THEN_SUBGRAPH_INDEX, then_subgraph_index, 0);
+ }
+ void add_else_subgraph_index(int32_t else_subgraph_index)
+ {
+ fbb_.AddElement<int32_t>(IfOptions::VT_ELSE_SUBGRAPH_INDEX, else_subgraph_index, 0);
+ }
+ explicit IfOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ IfOptionsBuilder &operator=(const IfOptionsBuilder &);
+ flatbuffers::Offset<IfOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<IfOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<IfOptions> CreateIfOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t then_subgraph_index = 0,
+ int32_t else_subgraph_index = 0)
+{
+ IfOptionsBuilder builder_(_fbb);
+ builder_.add_else_subgraph_index(else_subgraph_index);
+ builder_.add_then_subgraph_index(then_subgraph_index);
+ return builder_.Finish();
+}
+
+struct WhileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_COND_SUBGRAPH_INDEX = 4,
+ VT_BODY_SUBGRAPH_INDEX = 6
+ };
+ int32_t cond_subgraph_index() const { return GetField<int32_t>(VT_COND_SUBGRAPH_INDEX, 0); }
+ int32_t body_subgraph_index() const { return GetField<int32_t>(VT_BODY_SUBGRAPH_INDEX, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_COND_SUBGRAPH_INDEX) &&
+ VerifyField<int32_t>(verifier, VT_BODY_SUBGRAPH_INDEX) && verifier.EndTable();
+ }
+};
+
+struct WhileOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_cond_subgraph_index(int32_t cond_subgraph_index)
+ {
+ fbb_.AddElement<int32_t>(WhileOptions::VT_COND_SUBGRAPH_INDEX, cond_subgraph_index, 0);
+ }
+ void add_body_subgraph_index(int32_t body_subgraph_index)
+ {
+ fbb_.AddElement<int32_t>(WhileOptions::VT_BODY_SUBGRAPH_INDEX, body_subgraph_index, 0);
+ }
+ explicit WhileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ WhileOptionsBuilder &operator=(const WhileOptionsBuilder &);
+ flatbuffers::Offset<WhileOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<WhileOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<WhileOptions> CreateWhileOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t cond_subgraph_index = 0,
+ int32_t body_subgraph_index = 0)
+{
+ WhileOptionsBuilder builder_(_fbb);
+ builder_.add_body_subgraph_index(body_subgraph_index);
+ builder_.add_cond_subgraph_index(cond_subgraph_index);
+ return builder_.Finish();
+}
+
+struct NonMaxSuppressionV4Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct NonMaxSuppressionV4OptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit NonMaxSuppressionV4OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ NonMaxSuppressionV4OptionsBuilder &operator=(const NonMaxSuppressionV4OptionsBuilder &);
+ flatbuffers::Offset<NonMaxSuppressionV4Options> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<NonMaxSuppressionV4Options>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<NonMaxSuppressionV4Options>
+CreateNonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ NonMaxSuppressionV4OptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct NonMaxSuppressionV5Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct NonMaxSuppressionV5OptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit NonMaxSuppressionV5OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ NonMaxSuppressionV5OptionsBuilder &operator=(const NonMaxSuppressionV5OptionsBuilder &);
+ flatbuffers::Offset<NonMaxSuppressionV5Options> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<NonMaxSuppressionV5Options>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<NonMaxSuppressionV5Options>
+CreateNonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ NonMaxSuppressionV5OptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct ScatterNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct ScatterNdOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit ScatterNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ScatterNdOptionsBuilder &operator=(const ScatterNdOptionsBuilder &);
+ flatbuffers::Offset<ScatterNdOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ScatterNdOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ScatterNdOptions>
+CreateScatterNdOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ ScatterNdOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct SelectV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct SelectV2OptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit SelectV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SelectV2OptionsBuilder &operator=(const SelectV2OptionsBuilder &);
+ flatbuffers::Offset<SelectV2Options> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SelectV2Options>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SelectV2Options>
+CreateSelectV2Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ SelectV2OptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct DensifyOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct DensifyOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit DensifyOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ DensifyOptionsBuilder &operator=(const DensifyOptionsBuilder &);
+ flatbuffers::Offset<DensifyOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<DensifyOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<DensifyOptions>
+CreateDensifyOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ DensifyOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct SegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct SegmentSumOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit SegmentSumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SegmentSumOptionsBuilder &operator=(const SegmentSumOptionsBuilder &);
+ flatbuffers::Offset<SegmentSumOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SegmentSumOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SegmentSumOptions>
+CreateSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ SegmentSumOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct BatchMatMulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_ADJOINT_LHS = 4,
+ VT_ADJOINT_RHS = 6
+ };
+ bool adjoint_lhs() const { return GetField<uint8_t>(VT_ADJOINT_LHS, 0) != 0; }
+ bool adjoint_rhs() const { return GetField<uint8_t>(VT_ADJOINT_RHS, 0) != 0; }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ADJOINT_LHS) &&
+ VerifyField<uint8_t>(verifier, VT_ADJOINT_RHS) && verifier.EndTable();
+ }
+};
+
+struct BatchMatMulOptionsBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_adjoint_lhs(bool adjoint_lhs)
+ {
+ fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_LHS, static_cast<uint8_t>(adjoint_lhs),
+ 0);
+ }
+ void add_adjoint_rhs(bool adjoint_rhs)
+ {
+ fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_RHS, static_cast<uint8_t>(adjoint_rhs),
+ 0);
+ }
+ explicit BatchMatMulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ BatchMatMulOptionsBuilder &operator=(const BatchMatMulOptionsBuilder &);
+ flatbuffers::Offset<BatchMatMulOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<BatchMatMulOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<BatchMatMulOptions>
+CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, bool adjoint_lhs = false,
+ bool adjoint_rhs = false)
+{
+ BatchMatMulOptionsBuilder builder_(_fbb);
+ builder_.add_adjoint_rhs(adjoint_rhs);
+ builder_.add_adjoint_lhs(adjoint_lhs);
+ return builder_.Finish();
+}
+
+struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_BUILTIN_CODE = 4,
+ VT_CUSTOM_CODE = 6,
+ VT_VERSION = 8
+ };
+ BuiltinOperator builtin_code() const
+ {
+ return static_cast<BuiltinOperator>(GetField<int8_t>(VT_BUILTIN_CODE, 0));
+ }
+ const flatbuffers::String *custom_code() const
+ {
+ return GetPointer<const flatbuffers::String *>(VT_CUSTOM_CODE);
+ }
+ int32_t version() const { return GetField<int32_t>(VT_VERSION, 1); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_BUILTIN_CODE) &&
+ VerifyOffset(verifier, VT_CUSTOM_CODE) && verifier.VerifyString(custom_code()) &&
+ VerifyField<int32_t>(verifier, VT_VERSION) && verifier.EndTable();
+ }
+};
+
+struct OperatorCodeBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_builtin_code(BuiltinOperator builtin_code)
+ {
+ fbb_.AddElement<int8_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<int8_t>(builtin_code), 0);
+ }
+ void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code)
+ {
+ fbb_.AddOffset(OperatorCode::VT_CUSTOM_CODE, custom_code);
+ }
+ void add_version(int32_t version)
+ {
+ fbb_.AddElement<int32_t>(OperatorCode::VT_VERSION, version, 1);
+ }
+ explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ OperatorCodeBuilder &operator=(const OperatorCodeBuilder &);
+ flatbuffers::Offset<OperatorCode> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<OperatorCode>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<OperatorCode>
+CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb,
+ BuiltinOperator builtin_code = BuiltinOperator_ADD,
+ flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1)
+{
+ OperatorCodeBuilder builder_(_fbb);
+ builder_.add_version(version);
+ builder_.add_custom_code(custom_code);
+ builder_.add_builtin_code(builtin_code);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<OperatorCode>
+CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ BuiltinOperator builtin_code = BuiltinOperator_ADD,
+ const char *custom_code = nullptr, int32_t version = 1)
+{
+ return onert_tflite::CreateOperatorCode(
+ _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0, version);
+}
+
+struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_OPCODE_INDEX = 4,
+ VT_INPUTS = 6,
+ VT_OUTPUTS = 8,
+ VT_BUILTIN_OPTIONS_TYPE = 10,
+ VT_BUILTIN_OPTIONS = 12,
+ VT_CUSTOM_OPTIONS = 14,
+ VT_CUSTOM_OPTIONS_FORMAT = 16,
+ VT_MUTATING_VARIABLE_INPUTS = 18,
+ VT_INTERMEDIATES = 20
+ };
+ uint32_t opcode_index() const { return GetField<uint32_t>(VT_OPCODE_INDEX, 0); }
+ const flatbuffers::Vector<int32_t> *inputs() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS);
+ }
+ const flatbuffers::Vector<int32_t> *outputs() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
+ }
+ BuiltinOptions builtin_options_type() const
+ {
+ return static_cast<BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0));
+ }
+ const void *builtin_options() const { return GetPointer<const void *>(VT_BUILTIN_OPTIONS); }
+ template <typename T> const T *builtin_options_as() const;
+ const Conv2DOptions *builtin_options_as_Conv2DOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_Conv2DOptions
+ ? static_cast<const Conv2DOptions *>(builtin_options())
+ : nullptr;
+ }
+ const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions
+ ? static_cast<const DepthwiseConv2DOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions
+ ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LSHProjectionOptions
+ ? static_cast<const LSHProjectionOptions *>(builtin_options())
+ : nullptr;
+ }
+ const Pool2DOptions *builtin_options_as_Pool2DOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_Pool2DOptions
+ ? static_cast<const Pool2DOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SVDFOptions *builtin_options_as_SVDFOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SVDFOptions
+ ? static_cast<const SVDFOptions *>(builtin_options())
+ : nullptr;
+ }
+ const RNNOptions *builtin_options_as_RNNOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_RNNOptions
+ ? static_cast<const RNNOptions *>(builtin_options())
+ : nullptr;
+ }
+ const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_FullyConnectedOptions
+ ? static_cast<const FullyConnectedOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SoftmaxOptions
+ ? static_cast<const SoftmaxOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ConcatenationOptions
+ ? static_cast<const ConcatenationOptions *>(builtin_options())
+ : nullptr;
+ }
+ const AddOptions *builtin_options_as_AddOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_AddOptions
+ ? static_cast<const AddOptions *>(builtin_options())
+ : nullptr;
+ }
+ const L2NormOptions *builtin_options_as_L2NormOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_L2NormOptions
+ ? static_cast<const L2NormOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LocalResponseNormalizationOptions *
+ builtin_options_as_LocalResponseNormalizationOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LocalResponseNormalizationOptions
+ ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LSTMOptions *builtin_options_as_LSTMOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LSTMOptions
+ ? static_cast<const LSTMOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions
+ ? static_cast<const ResizeBilinearOptions *>(builtin_options())
+ : nullptr;
+ }
+ const CallOptions *builtin_options_as_CallOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_CallOptions
+ ? static_cast<const CallOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ReshapeOptions *builtin_options_as_ReshapeOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ReshapeOptions
+ ? static_cast<const ReshapeOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SkipGramOptions *builtin_options_as_SkipGramOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SkipGramOptions
+ ? static_cast<const SkipGramOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions
+ ? static_cast<const SpaceToDepthOptions *>(builtin_options())
+ : nullptr;
+ }
+ const EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions
+ ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options())
+ : nullptr;
+ }
+ const MulOptions *builtin_options_as_MulOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_MulOptions
+ ? static_cast<const MulOptions *>(builtin_options())
+ : nullptr;
+ }
+ const PadOptions *builtin_options_as_PadOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_PadOptions
+ ? static_cast<const PadOptions *>(builtin_options())
+ : nullptr;
+ }
+ const GatherOptions *builtin_options_as_GatherOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_GatherOptions
+ ? static_cast<const GatherOptions *>(builtin_options())
+ : nullptr;
+ }
+ const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions
+ ? static_cast<const BatchToSpaceNDOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SpaceToBatchNDOptions
+ ? static_cast<const SpaceToBatchNDOptions *>(builtin_options())
+ : nullptr;
+ }
+ const TransposeOptions *builtin_options_as_TransposeOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_TransposeOptions
+ ? static_cast<const TransposeOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ReducerOptions *builtin_options_as_ReducerOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ReducerOptions
+ ? static_cast<const ReducerOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SubOptions *builtin_options_as_SubOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SubOptions
+ ? static_cast<const SubOptions *>(builtin_options())
+ : nullptr;
+ }
+ const DivOptions *builtin_options_as_DivOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_DivOptions
+ ? static_cast<const DivOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SqueezeOptions *builtin_options_as_SqueezeOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SqueezeOptions
+ ? static_cast<const SqueezeOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SequenceRNNOptions
+ ? static_cast<const SequenceRNNOptions *>(builtin_options())
+ : nullptr;
+ }
+ const StridedSliceOptions *builtin_options_as_StridedSliceOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_StridedSliceOptions
+ ? static_cast<const StridedSliceOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ExpOptions *builtin_options_as_ExpOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ExpOptions
+ ? static_cast<const ExpOptions *>(builtin_options())
+ : nullptr;
+ }
+ const TopKV2Options *builtin_options_as_TopKV2Options() const
+ {
+ return builtin_options_type() == BuiltinOptions_TopKV2Options
+ ? static_cast<const TopKV2Options *>(builtin_options())
+ : nullptr;
+ }
+ const SplitOptions *builtin_options_as_SplitOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SplitOptions
+ ? static_cast<const SplitOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LogSoftmaxOptions
+ ? static_cast<const LogSoftmaxOptions *>(builtin_options())
+ : nullptr;
+ }
+ const CastOptions *builtin_options_as_CastOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_CastOptions
+ ? static_cast<const CastOptions *>(builtin_options())
+ : nullptr;
+ }
+ const DequantizeOptions *builtin_options_as_DequantizeOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_DequantizeOptions
+ ? static_cast<const DequantizeOptions *>(builtin_options())
+ : nullptr;
+ }
+ const MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_MaximumMinimumOptions
+ ? static_cast<const MaximumMinimumOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ArgMaxOptions *builtin_options_as_ArgMaxOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ArgMaxOptions
+ ? static_cast<const ArgMaxOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LessOptions *builtin_options_as_LessOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LessOptions
+ ? static_cast<const LessOptions *>(builtin_options())
+ : nullptr;
+ }
+ const NegOptions *builtin_options_as_NegOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_NegOptions
+ ? static_cast<const NegOptions *>(builtin_options())
+ : nullptr;
+ }
+ const PadV2Options *builtin_options_as_PadV2Options() const
+ {
+ return builtin_options_type() == BuiltinOptions_PadV2Options
+ ? static_cast<const PadV2Options *>(builtin_options())
+ : nullptr;
+ }
+ const GreaterOptions *builtin_options_as_GreaterOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_GreaterOptions
+ ? static_cast<const GreaterOptions *>(builtin_options())
+ : nullptr;
+ }
+ const GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_GreaterEqualOptions
+ ? static_cast<const GreaterEqualOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LessEqualOptions *builtin_options_as_LessEqualOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LessEqualOptions
+ ? static_cast<const LessEqualOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SelectOptions *builtin_options_as_SelectOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SelectOptions
+ ? static_cast<const SelectOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SliceOptions *builtin_options_as_SliceOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SliceOptions
+ ? static_cast<const SliceOptions *>(builtin_options())
+ : nullptr;
+ }
+ const TransposeConvOptions *builtin_options_as_TransposeConvOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_TransposeConvOptions
+ ? static_cast<const TransposeConvOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SparseToDenseOptions
+ ? static_cast<const SparseToDenseOptions *>(builtin_options())
+ : nullptr;
+ }
+ const TileOptions *builtin_options_as_TileOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_TileOptions
+ ? static_cast<const TileOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ExpandDimsOptions
+ ? static_cast<const ExpandDimsOptions *>(builtin_options())
+ : nullptr;
+ }
+ const EqualOptions *builtin_options_as_EqualOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_EqualOptions
+ ? static_cast<const EqualOptions *>(builtin_options())
+ : nullptr;
+ }
+ const NotEqualOptions *builtin_options_as_NotEqualOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_NotEqualOptions
+ ? static_cast<const NotEqualOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ShapeOptions *builtin_options_as_ShapeOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ShapeOptions
+ ? static_cast<const ShapeOptions *>(builtin_options())
+ : nullptr;
+ }
+ const PowOptions *builtin_options_as_PowOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_PowOptions
+ ? static_cast<const PowOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ArgMinOptions *builtin_options_as_ArgMinOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ArgMinOptions
+ ? static_cast<const ArgMinOptions *>(builtin_options())
+ : nullptr;
+ }
+ const FakeQuantOptions *builtin_options_as_FakeQuantOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_FakeQuantOptions
+ ? static_cast<const FakeQuantOptions *>(builtin_options())
+ : nullptr;
+ }
+ const PackOptions *builtin_options_as_PackOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_PackOptions
+ ? static_cast<const PackOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LogicalOrOptions *builtin_options_as_LogicalOrOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LogicalOrOptions
+ ? static_cast<const LogicalOrOptions *>(builtin_options())
+ : nullptr;
+ }
+ const OneHotOptions *builtin_options_as_OneHotOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_OneHotOptions
+ ? static_cast<const OneHotOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LogicalAndOptions *builtin_options_as_LogicalAndOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LogicalAndOptions
+ ? static_cast<const LogicalAndOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LogicalNotOptions *builtin_options_as_LogicalNotOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LogicalNotOptions
+ ? static_cast<const LogicalNotOptions *>(builtin_options())
+ : nullptr;
+ }
+ const UnpackOptions *builtin_options_as_UnpackOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_UnpackOptions
+ ? static_cast<const UnpackOptions *>(builtin_options())
+ : nullptr;
+ }
+ const FloorDivOptions *builtin_options_as_FloorDivOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_FloorDivOptions
+ ? static_cast<const FloorDivOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SquareOptions *builtin_options_as_SquareOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SquareOptions
+ ? static_cast<const SquareOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ZerosLikeOptions
+ ? static_cast<const ZerosLikeOptions *>(builtin_options())
+ : nullptr;
+ }
+ const FillOptions *builtin_options_as_FillOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_FillOptions
+ ? static_cast<const FillOptions *>(builtin_options())
+ : nullptr;
+ }
+ const BidirectionalSequenceLSTMOptions *
+ builtin_options_as_BidirectionalSequenceLSTMOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_BidirectionalSequenceLSTMOptions
+ ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options())
+ : nullptr;
+ }
+ const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions
+ ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options())
+ : nullptr;
+ }
+ const UnidirectionalSequenceLSTMOptions *
+ builtin_options_as_UnidirectionalSequenceLSTMOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions
+ ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options())
+ : nullptr;
+ }
+ const FloorModOptions *builtin_options_as_FloorModOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_FloorModOptions
+ ? static_cast<const FloorModOptions *>(builtin_options())
+ : nullptr;
+ }
+ const RangeOptions *builtin_options_as_RangeOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_RangeOptions
+ ? static_cast<const RangeOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ResizeNearestNeighborOptions
+ ? static_cast<const ResizeNearestNeighborOptions *>(builtin_options())
+ : nullptr;
+ }
+ const LeakyReluOptions *builtin_options_as_LeakyReluOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_LeakyReluOptions
+ ? static_cast<const LeakyReluOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SquaredDifferenceOptions
+ ? static_cast<const SquaredDifferenceOptions *>(builtin_options())
+ : nullptr;
+ }
+ const MirrorPadOptions *builtin_options_as_MirrorPadOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_MirrorPadOptions
+ ? static_cast<const MirrorPadOptions *>(builtin_options())
+ : nullptr;
+ }
+ const AbsOptions *builtin_options_as_AbsOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_AbsOptions
+ ? static_cast<const AbsOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SplitVOptions *builtin_options_as_SplitVOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SplitVOptions
+ ? static_cast<const SplitVOptions *>(builtin_options())
+ : nullptr;
+ }
+ const UniqueOptions *builtin_options_as_UniqueOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_UniqueOptions
+ ? static_cast<const UniqueOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ReverseV2Options *builtin_options_as_ReverseV2Options() const
+ {
+ return builtin_options_type() == BuiltinOptions_ReverseV2Options
+ ? static_cast<const ReverseV2Options *>(builtin_options())
+ : nullptr;
+ }
+ const AddNOptions *builtin_options_as_AddNOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_AddNOptions
+ ? static_cast<const AddNOptions *>(builtin_options())
+ : nullptr;
+ }
+ const GatherNdOptions *builtin_options_as_GatherNdOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_GatherNdOptions
+ ? static_cast<const GatherNdOptions *>(builtin_options())
+ : nullptr;
+ }
+ const CosOptions *builtin_options_as_CosOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_CosOptions
+ ? static_cast<const CosOptions *>(builtin_options())
+ : nullptr;
+ }
+ const WhereOptions *builtin_options_as_WhereOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_WhereOptions
+ ? static_cast<const WhereOptions *>(builtin_options())
+ : nullptr;
+ }
+ const RankOptions *builtin_options_as_RankOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_RankOptions
+ ? static_cast<const RankOptions *>(builtin_options())
+ : nullptr;
+ }
+ const ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ReverseSequenceOptions
+ ? static_cast<const ReverseSequenceOptions *>(builtin_options())
+ : nullptr;
+ }
+ const MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_MatrixDiagOptions
+ ? static_cast<const MatrixDiagOptions *>(builtin_options())
+ : nullptr;
+ }
+ const QuantizeOptions *builtin_options_as_QuantizeOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_QuantizeOptions
+ ? static_cast<const QuantizeOptions *>(builtin_options())
+ : nullptr;
+ }
+ const MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_MatrixSetDiagOptions
+ ? static_cast<const MatrixSetDiagOptions *>(builtin_options())
+ : nullptr;
+ }
+ const HardSwishOptions *builtin_options_as_HardSwishOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_HardSwishOptions
+ ? static_cast<const HardSwishOptions *>(builtin_options())
+ : nullptr;
+ }
+ const IfOptions *builtin_options_as_IfOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_IfOptions
+ ? static_cast<const IfOptions *>(builtin_options())
+ : nullptr;
+ }
+ const WhileOptions *builtin_options_as_WhileOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_WhileOptions
+ ? static_cast<const WhileOptions *>(builtin_options())
+ : nullptr;
+ }
+ const DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_DepthToSpaceOptions
+ ? static_cast<const DepthToSpaceOptions *>(builtin_options())
+ : nullptr;
+ }
+ const NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const
+ {
+ return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV4Options
+ ? static_cast<const NonMaxSuppressionV4Options *>(builtin_options())
+ : nullptr;
+ }
+ const NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const
+ {
+ return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV5Options
+ ? static_cast<const NonMaxSuppressionV5Options *>(builtin_options())
+ : nullptr;
+ }
+ const ScatterNdOptions *builtin_options_as_ScatterNdOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_ScatterNdOptions
+ ? static_cast<const ScatterNdOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SelectV2Options *builtin_options_as_SelectV2Options() const
+ {
+ return builtin_options_type() == BuiltinOptions_SelectV2Options
+ ? static_cast<const SelectV2Options *>(builtin_options())
+ : nullptr;
+ }
+ const DensifyOptions *builtin_options_as_DensifyOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_DensifyOptions
+ ? static_cast<const DensifyOptions *>(builtin_options())
+ : nullptr;
+ }
+ const SegmentSumOptions *builtin_options_as_SegmentSumOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_SegmentSumOptions
+ ? static_cast<const SegmentSumOptions *>(builtin_options())
+ : nullptr;
+ }
+ const BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const
+ {
+ return builtin_options_type() == BuiltinOptions_BatchMatMulOptions
+ ? static_cast<const BatchMatMulOptions *>(builtin_options())
+ : nullptr;
+ }
+ const flatbuffers::Vector<uint8_t> *custom_options() const
+ {
+ return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
+ }
+ CustomOptionsFormat custom_options_format() const
+ {
+ return static_cast<CustomOptionsFormat>(GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0));
+ }
+ const flatbuffers::Vector<uint8_t> *mutating_variable_inputs() const
+ {
+ return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_MUTATING_VARIABLE_INPUTS);
+ }
+ const flatbuffers::Vector<int32_t> *intermediates() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INTERMEDIATES);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_OPCODE_INDEX) &&
+ VerifyOffset(verifier, VT_INPUTS) && verifier.VerifyVector(inputs()) &&
+ VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) &&
+ VerifyField<uint8_t>(verifier, VT_BUILTIN_OPTIONS_TYPE) &&
+ VerifyOffset(verifier, VT_BUILTIN_OPTIONS) &&
+ VerifyBuiltinOptions(verifier, builtin_options(), builtin_options_type()) &&
+ VerifyOffset(verifier, VT_CUSTOM_OPTIONS) && verifier.VerifyVector(custom_options()) &&
+ VerifyField<int8_t>(verifier, VT_CUSTOM_OPTIONS_FORMAT) &&
+ VerifyOffset(verifier, VT_MUTATING_VARIABLE_INPUTS) &&
+ verifier.VerifyVector(mutating_variable_inputs()) &&
+ VerifyOffset(verifier, VT_INTERMEDIATES) && verifier.VerifyVector(intermediates()) &&
+ verifier.EndTable();
+ }
+};
+
+template <> inline const Conv2DOptions *Operator::builtin_options_as<Conv2DOptions>() const
+{
+ return builtin_options_as_Conv2DOptions();
+}
+
+template <>
+inline const DepthwiseConv2DOptions *Operator::builtin_options_as<DepthwiseConv2DOptions>() const
+{
+ return builtin_options_as_DepthwiseConv2DOptions();
+}
+
+template <>
+inline const ConcatEmbeddingsOptions *Operator::builtin_options_as<ConcatEmbeddingsOptions>() const
+{
+ return builtin_options_as_ConcatEmbeddingsOptions();
+}
+
+template <>
+inline const LSHProjectionOptions *Operator::builtin_options_as<LSHProjectionOptions>() const
+{
+ return builtin_options_as_LSHProjectionOptions();
+}
+
+template <> inline const Pool2DOptions *Operator::builtin_options_as<Pool2DOptions>() const
+{
+ return builtin_options_as_Pool2DOptions();
+}
+
+template <> inline const SVDFOptions *Operator::builtin_options_as<SVDFOptions>() const
+{
+ return builtin_options_as_SVDFOptions();
+}
+
+template <> inline const RNNOptions *Operator::builtin_options_as<RNNOptions>() const
+{
+ return builtin_options_as_RNNOptions();
+}
+
+template <>
+inline const FullyConnectedOptions *Operator::builtin_options_as<FullyConnectedOptions>() const
+{
+ return builtin_options_as_FullyConnectedOptions();
+}
+
+template <> inline const SoftmaxOptions *Operator::builtin_options_as<SoftmaxOptions>() const
+{
+ return builtin_options_as_SoftmaxOptions();
+}
+
+template <>
+inline const ConcatenationOptions *Operator::builtin_options_as<ConcatenationOptions>() const
+{
+ return builtin_options_as_ConcatenationOptions();
+}
+
+template <> inline const AddOptions *Operator::builtin_options_as<AddOptions>() const
+{
+ return builtin_options_as_AddOptions();
+}
+
+template <> inline const L2NormOptions *Operator::builtin_options_as<L2NormOptions>() const
+{
+ return builtin_options_as_L2NormOptions();
+}
+
+template <>
+inline const LocalResponseNormalizationOptions *
+Operator::builtin_options_as<LocalResponseNormalizationOptions>() const
+{
+ return builtin_options_as_LocalResponseNormalizationOptions();
+}
+
+template <> inline const LSTMOptions *Operator::builtin_options_as<LSTMOptions>() const
+{
+ return builtin_options_as_LSTMOptions();
+}
+
+template <>
+inline const ResizeBilinearOptions *Operator::builtin_options_as<ResizeBilinearOptions>() const
+{
+ return builtin_options_as_ResizeBilinearOptions();
+}
+
+template <> inline const CallOptions *Operator::builtin_options_as<CallOptions>() const
+{
+ return builtin_options_as_CallOptions();
+}
+
+template <> inline const ReshapeOptions *Operator::builtin_options_as<ReshapeOptions>() const
+{
+ return builtin_options_as_ReshapeOptions();
+}
+
+template <> inline const SkipGramOptions *Operator::builtin_options_as<SkipGramOptions>() const
+{
+ return builtin_options_as_SkipGramOptions();
+}
+
+template <>
+inline const SpaceToDepthOptions *Operator::builtin_options_as<SpaceToDepthOptions>() const
+{
+ return builtin_options_as_SpaceToDepthOptions();
+}
+
+template <>
+inline const EmbeddingLookupSparseOptions *
+Operator::builtin_options_as<EmbeddingLookupSparseOptions>() const
+{
+ return builtin_options_as_EmbeddingLookupSparseOptions();
+}
+
+template <> inline const MulOptions *Operator::builtin_options_as<MulOptions>() const
+{
+ return builtin_options_as_MulOptions();
+}
+
+template <> inline const PadOptions *Operator::builtin_options_as<PadOptions>() const
+{
+ return builtin_options_as_PadOptions();
+}
+
+template <> inline const GatherOptions *Operator::builtin_options_as<GatherOptions>() const
+{
+ return builtin_options_as_GatherOptions();
+}
+
+template <>
+inline const BatchToSpaceNDOptions *Operator::builtin_options_as<BatchToSpaceNDOptions>() const
+{
+ return builtin_options_as_BatchToSpaceNDOptions();
+}
+
+template <>
+inline const SpaceToBatchNDOptions *Operator::builtin_options_as<SpaceToBatchNDOptions>() const
+{
+ return builtin_options_as_SpaceToBatchNDOptions();
+}
+
+template <> inline const TransposeOptions *Operator::builtin_options_as<TransposeOptions>() const
+{
+ return builtin_options_as_TransposeOptions();
+}
+
+template <> inline const ReducerOptions *Operator::builtin_options_as<ReducerOptions>() const
+{
+ return builtin_options_as_ReducerOptions();
+}
+
+template <> inline const SubOptions *Operator::builtin_options_as<SubOptions>() const
+{
+ return builtin_options_as_SubOptions();
+}
+
+template <> inline const DivOptions *Operator::builtin_options_as<DivOptions>() const
+{
+ return builtin_options_as_DivOptions();
+}
+
+template <> inline const SqueezeOptions *Operator::builtin_options_as<SqueezeOptions>() const
+{
+ return builtin_options_as_SqueezeOptions();
+}
+
+template <>
+inline const SequenceRNNOptions *Operator::builtin_options_as<SequenceRNNOptions>() const
+{
+ return builtin_options_as_SequenceRNNOptions();
+}
+
+template <>
+inline const StridedSliceOptions *Operator::builtin_options_as<StridedSliceOptions>() const
+{
+ return builtin_options_as_StridedSliceOptions();
+}
+
+template <> inline const ExpOptions *Operator::builtin_options_as<ExpOptions>() const
+{
+ return builtin_options_as_ExpOptions();
+}
+
+template <> inline const TopKV2Options *Operator::builtin_options_as<TopKV2Options>() const
+{
+ return builtin_options_as_TopKV2Options();
+}
+
+template <> inline const SplitOptions *Operator::builtin_options_as<SplitOptions>() const
+{
+ return builtin_options_as_SplitOptions();
+}
+
+template <> inline const LogSoftmaxOptions *Operator::builtin_options_as<LogSoftmaxOptions>() const
+{
+ return builtin_options_as_LogSoftmaxOptions();
+}
+
+template <> inline const CastOptions *Operator::builtin_options_as<CastOptions>() const
+{
+ return builtin_options_as_CastOptions();
+}
+
+template <> inline const DequantizeOptions *Operator::builtin_options_as<DequantizeOptions>() const
+{
+ return builtin_options_as_DequantizeOptions();
+}
+
+template <>
+inline const MaximumMinimumOptions *Operator::builtin_options_as<MaximumMinimumOptions>() const
+{
+ return builtin_options_as_MaximumMinimumOptions();
+}
+
+template <> inline const ArgMaxOptions *Operator::builtin_options_as<ArgMaxOptions>() const
+{
+ return builtin_options_as_ArgMaxOptions();
+}
+
+template <> inline const LessOptions *Operator::builtin_options_as<LessOptions>() const
+{
+ return builtin_options_as_LessOptions();
+}
+
+template <> inline const NegOptions *Operator::builtin_options_as<NegOptions>() const
+{
+ return builtin_options_as_NegOptions();
+}
+
+template <> inline const PadV2Options *Operator::builtin_options_as<PadV2Options>() const
+{
+ return builtin_options_as_PadV2Options();
+}
+
+template <> inline const GreaterOptions *Operator::builtin_options_as<GreaterOptions>() const
+{
+ return builtin_options_as_GreaterOptions();
+}
+
+template <>
+inline const GreaterEqualOptions *Operator::builtin_options_as<GreaterEqualOptions>() const
+{
+ return builtin_options_as_GreaterEqualOptions();
+}
+
+template <> inline const LessEqualOptions *Operator::builtin_options_as<LessEqualOptions>() const
+{
+ return builtin_options_as_LessEqualOptions();
+}
+
+template <> inline const SelectOptions *Operator::builtin_options_as<SelectOptions>() const
+{
+ return builtin_options_as_SelectOptions();
+}
+
+template <> inline const SliceOptions *Operator::builtin_options_as<SliceOptions>() const
+{
+ return builtin_options_as_SliceOptions();
+}
+
+template <>
+inline const TransposeConvOptions *Operator::builtin_options_as<TransposeConvOptions>() const
+{
+ return builtin_options_as_TransposeConvOptions();
+}
+
+template <>
+inline const SparseToDenseOptions *Operator::builtin_options_as<SparseToDenseOptions>() const
+{
+ return builtin_options_as_SparseToDenseOptions();
+}
+
+template <> inline const TileOptions *Operator::builtin_options_as<TileOptions>() const
+{
+ return builtin_options_as_TileOptions();
+}
+
+template <> inline const ExpandDimsOptions *Operator::builtin_options_as<ExpandDimsOptions>() const
+{
+ return builtin_options_as_ExpandDimsOptions();
+}
+
+template <> inline const EqualOptions *Operator::builtin_options_as<EqualOptions>() const
+{
+ return builtin_options_as_EqualOptions();
+}
+
+template <> inline const NotEqualOptions *Operator::builtin_options_as<NotEqualOptions>() const
+{
+ return builtin_options_as_NotEqualOptions();
+}
+
+template <> inline const ShapeOptions *Operator::builtin_options_as<ShapeOptions>() const
+{
+ return builtin_options_as_ShapeOptions();
+}
+
+template <> inline const PowOptions *Operator::builtin_options_as<PowOptions>() const
+{
+ return builtin_options_as_PowOptions();
+}
+
+template <> inline const ArgMinOptions *Operator::builtin_options_as<ArgMinOptions>() const
+{
+ return builtin_options_as_ArgMinOptions();
+}
+
+template <> inline const FakeQuantOptions *Operator::builtin_options_as<FakeQuantOptions>() const
+{
+ return builtin_options_as_FakeQuantOptions();
+}
+
+template <> inline const PackOptions *Operator::builtin_options_as<PackOptions>() const
+{
+ return builtin_options_as_PackOptions();
+}
+
+template <> inline const LogicalOrOptions *Operator::builtin_options_as<LogicalOrOptions>() const
+{
+ return builtin_options_as_LogicalOrOptions();
+}
+
+template <> inline const OneHotOptions *Operator::builtin_options_as<OneHotOptions>() const
+{
+ return builtin_options_as_OneHotOptions();
+}
+
+template <> inline const LogicalAndOptions *Operator::builtin_options_as<LogicalAndOptions>() const
+{
+ return builtin_options_as_LogicalAndOptions();
+}
+
+template <> inline const LogicalNotOptions *Operator::builtin_options_as<LogicalNotOptions>() const
+{
+ return builtin_options_as_LogicalNotOptions();
+}
+
+template <> inline const UnpackOptions *Operator::builtin_options_as<UnpackOptions>() const
+{
+ return builtin_options_as_UnpackOptions();
+}
+
+template <> inline const FloorDivOptions *Operator::builtin_options_as<FloorDivOptions>() const
+{
+ return builtin_options_as_FloorDivOptions();
+}
+
+template <> inline const SquareOptions *Operator::builtin_options_as<SquareOptions>() const
+{
+ return builtin_options_as_SquareOptions();
+}
+
+template <> inline const ZerosLikeOptions *Operator::builtin_options_as<ZerosLikeOptions>() const
+{
+ return builtin_options_as_ZerosLikeOptions();
+}
+
+template <> inline const FillOptions *Operator::builtin_options_as<FillOptions>() const
+{
+ return builtin_options_as_FillOptions();
+}
+
+template <>
+inline const BidirectionalSequenceLSTMOptions *
+Operator::builtin_options_as<BidirectionalSequenceLSTMOptions>() const
+{
+ return builtin_options_as_BidirectionalSequenceLSTMOptions();
+}
+
+template <>
+inline const BidirectionalSequenceRNNOptions *
+Operator::builtin_options_as<BidirectionalSequenceRNNOptions>() const
+{
+ return builtin_options_as_BidirectionalSequenceRNNOptions();
+}
+
+template <>
+inline const UnidirectionalSequenceLSTMOptions *
+Operator::builtin_options_as<UnidirectionalSequenceLSTMOptions>() const
+{
+ return builtin_options_as_UnidirectionalSequenceLSTMOptions();
+}
+
+template <> inline const FloorModOptions *Operator::builtin_options_as<FloorModOptions>() const
+{
+ return builtin_options_as_FloorModOptions();
+}
+
+template <> inline const RangeOptions *Operator::builtin_options_as<RangeOptions>() const
+{
+ return builtin_options_as_RangeOptions();
+}
+
+template <>
+inline const ResizeNearestNeighborOptions *
+Operator::builtin_options_as<ResizeNearestNeighborOptions>() const
+{
+ return builtin_options_as_ResizeNearestNeighborOptions();
+}
+
+template <> inline const LeakyReluOptions *Operator::builtin_options_as<LeakyReluOptions>() const
+{
+ return builtin_options_as_LeakyReluOptions();
+}
+
+template <>
+inline const SquaredDifferenceOptions *
+Operator::builtin_options_as<SquaredDifferenceOptions>() const
+{
+ return builtin_options_as_SquaredDifferenceOptions();
+}
+
+template <> inline const MirrorPadOptions *Operator::builtin_options_as<MirrorPadOptions>() const
+{
+ return builtin_options_as_MirrorPadOptions();
+}
+
+template <> inline const AbsOptions *Operator::builtin_options_as<AbsOptions>() const
+{
+ return builtin_options_as_AbsOptions();
+}
+
+template <> inline const SplitVOptions *Operator::builtin_options_as<SplitVOptions>() const
+{
+ return builtin_options_as_SplitVOptions();
+}
+
+template <> inline const UniqueOptions *Operator::builtin_options_as<UniqueOptions>() const
+{
+ return builtin_options_as_UniqueOptions();
+}
+
+template <> inline const ReverseV2Options *Operator::builtin_options_as<ReverseV2Options>() const
+{
+ return builtin_options_as_ReverseV2Options();
+}
+
+template <> inline const AddNOptions *Operator::builtin_options_as<AddNOptions>() const
+{
+ return builtin_options_as_AddNOptions();
+}
+
+template <> inline const GatherNdOptions *Operator::builtin_options_as<GatherNdOptions>() const
+{
+ return builtin_options_as_GatherNdOptions();
+}
+
+template <> inline const CosOptions *Operator::builtin_options_as<CosOptions>() const
+{
+ return builtin_options_as_CosOptions();
+}
+
+template <> inline const WhereOptions *Operator::builtin_options_as<WhereOptions>() const
+{
+ return builtin_options_as_WhereOptions();
+}
+
+template <> inline const RankOptions *Operator::builtin_options_as<RankOptions>() const
+{
+ return builtin_options_as_RankOptions();
+}
+
+template <>
+inline const ReverseSequenceOptions *Operator::builtin_options_as<ReverseSequenceOptions>() const
+{
+ return builtin_options_as_ReverseSequenceOptions();
+}
+
+template <> inline const MatrixDiagOptions *Operator::builtin_options_as<MatrixDiagOptions>() const
+{
+ return builtin_options_as_MatrixDiagOptions();
+}
+
+template <> inline const QuantizeOptions *Operator::builtin_options_as<QuantizeOptions>() const
+{
+ return builtin_options_as_QuantizeOptions();
+}
+
+template <>
+inline const MatrixSetDiagOptions *Operator::builtin_options_as<MatrixSetDiagOptions>() const
+{
+ return builtin_options_as_MatrixSetDiagOptions();
+}
+
+template <> inline const HardSwishOptions *Operator::builtin_options_as<HardSwishOptions>() const
+{
+ return builtin_options_as_HardSwishOptions();
+}
+
+template <> inline const IfOptions *Operator::builtin_options_as<IfOptions>() const
+{
+ return builtin_options_as_IfOptions();
+}
+
+template <> inline const WhileOptions *Operator::builtin_options_as<WhileOptions>() const
+{
+ return builtin_options_as_WhileOptions();
+}
+
+template <>
+inline const DepthToSpaceOptions *Operator::builtin_options_as<DepthToSpaceOptions>() const
+{
+ return builtin_options_as_DepthToSpaceOptions();
+}
+
+template <>
+inline const NonMaxSuppressionV4Options *
+Operator::builtin_options_as<NonMaxSuppressionV4Options>() const
+{
+ return builtin_options_as_NonMaxSuppressionV4Options();
+}
+
+template <>
+inline const NonMaxSuppressionV5Options *
+Operator::builtin_options_as<NonMaxSuppressionV5Options>() const
+{
+ return builtin_options_as_NonMaxSuppressionV5Options();
+}
+
+template <> inline const ScatterNdOptions *Operator::builtin_options_as<ScatterNdOptions>() const
+{
+ return builtin_options_as_ScatterNdOptions();
+}
+
+template <> inline const SelectV2Options *Operator::builtin_options_as<SelectV2Options>() const
+{
+ return builtin_options_as_SelectV2Options();
+}
+
+template <> inline const DensifyOptions *Operator::builtin_options_as<DensifyOptions>() const
+{
+ return builtin_options_as_DensifyOptions();
+}
+
+template <> inline const SegmentSumOptions *Operator::builtin_options_as<SegmentSumOptions>() const
+{
+ return builtin_options_as_SegmentSumOptions();
+}
+
+template <>
+inline const BatchMatMulOptions *Operator::builtin_options_as<BatchMatMulOptions>() const
+{
+ return builtin_options_as_BatchMatMulOptions();
+}
+
+struct OperatorBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_opcode_index(uint32_t opcode_index)
+ {
+ fbb_.AddElement<uint32_t>(Operator::VT_OPCODE_INDEX, opcode_index, 0);
+ }
+ void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs)
+ {
+ fbb_.AddOffset(Operator::VT_INPUTS, inputs);
+ }
+ void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs)
+ {
+ fbb_.AddOffset(Operator::VT_OUTPUTS, outputs);
+ }
+ void add_builtin_options_type(BuiltinOptions builtin_options_type)
+ {
+ fbb_.AddElement<uint8_t>(Operator::VT_BUILTIN_OPTIONS_TYPE,
+ static_cast<uint8_t>(builtin_options_type), 0);
+ }
+ void add_builtin_options(flatbuffers::Offset<void> builtin_options)
+ {
+ fbb_.AddOffset(Operator::VT_BUILTIN_OPTIONS, builtin_options);
+ }
+ void add_custom_options(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options)
+ {
+ fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options);
+ }
+ void add_custom_options_format(CustomOptionsFormat custom_options_format)
+ {
+ fbb_.AddElement<int8_t>(Operator::VT_CUSTOM_OPTIONS_FORMAT,
+ static_cast<int8_t>(custom_options_format), 0);
+ }
+ void add_mutating_variable_inputs(
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
+ {
+ fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs);
+ }
+ void add_intermediates(flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates)
+ {
+ fbb_.AddOffset(Operator::VT_INTERMEDIATES, intermediates);
+ }
+ explicit OperatorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ OperatorBuilder &operator=(const OperatorBuilder &);
+ flatbuffers::Offset<Operator> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Operator>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Operator>
+CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+ BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
+ flatbuffers::Offset<void> builtin_options = 0,
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
+ CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS,
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0)
+{
+ OperatorBuilder builder_(_fbb);
+ builder_.add_intermediates(intermediates);
+ builder_.add_mutating_variable_inputs(mutating_variable_inputs);
+ builder_.add_custom_options(custom_options);
+ builder_.add_builtin_options(builtin_options);
+ builder_.add_outputs(outputs);
+ builder_.add_inputs(inputs);
+ builder_.add_opcode_index(opcode_index);
+ builder_.add_custom_options_format(custom_options_format);
+ builder_.add_builtin_options_type(builtin_options_type);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Operator>
+CreateOperatorDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+ const std::vector<int32_t> *inputs = nullptr,
+ const std::vector<int32_t> *outputs = nullptr,
+ BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
+ flatbuffers::Offset<void> builtin_options = 0,
+ const std::vector<uint8_t> *custom_options = nullptr,
+ CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS,
+ const std::vector<uint8_t> *mutating_variable_inputs = nullptr,
+ const std::vector<int32_t> *intermediates = nullptr)
+{
+ return onert_tflite::CreateOperator(
+ _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
+ outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type, builtin_options,
+ custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0, custom_options_format,
+ mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0,
+ intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0);
+}
+
+struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_TENSORS = 4,
+ VT_INPUTS = 6,
+ VT_OUTPUTS = 8,
+ VT_OPERATORS = 10,
+ VT_NAME = 12
+ };
+ const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *tensors() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *>(VT_TENSORS);
+ }
+ const flatbuffers::Vector<int32_t> *inputs() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS);
+ }
+ const flatbuffers::Vector<int32_t> *outputs() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
+ }
+ const flatbuffers::Vector<flatbuffers::Offset<Operator>> *operators() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Operator>> *>(VT_OPERATORS);
+ }
+ const flatbuffers::String *name() const
+ {
+ return GetPointer<const flatbuffers::String *>(VT_NAME);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_TENSORS) &&
+ verifier.VerifyVector(tensors()) && verifier.VerifyVectorOfTables(tensors()) &&
+ VerifyOffset(verifier, VT_INPUTS) && verifier.VerifyVector(inputs()) &&
+ VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) &&
+ VerifyOffset(verifier, VT_OPERATORS) && verifier.VerifyVector(operators()) &&
+ verifier.VerifyVectorOfTables(operators()) && VerifyOffset(verifier, VT_NAME) &&
+ verifier.VerifyString(name()) && verifier.EndTable();
+ }
+};
+
+struct SubGraphBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors)
+ {
+ fbb_.AddOffset(SubGraph::VT_TENSORS, tensors);
+ }
+ void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs)
+ {
+ fbb_.AddOffset(SubGraph::VT_INPUTS, inputs);
+ }
+ void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs)
+ {
+ fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs);
+ }
+ void
+ add_operators(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators)
+ {
+ fbb_.AddOffset(SubGraph::VT_OPERATORS, operators);
+ }
+ void add_name(flatbuffers::Offset<flatbuffers::String> name)
+ {
+ fbb_.AddOffset(SubGraph::VT_NAME, name);
+ }
+ explicit SubGraphBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ SubGraphBuilder &operator=(const SubGraphBuilder &);
+ flatbuffers::Offset<SubGraph> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SubGraph>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SubGraph> CreateSubGraph(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0,
+ flatbuffers::Offset<flatbuffers::String> name = 0)
+{
+ SubGraphBuilder builder_(_fbb);
+ builder_.add_name(name);
+ builder_.add_operators(operators);
+ builder_.add_outputs(outputs);
+ builder_.add_inputs(inputs);
+ builder_.add_tensors(tensors);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SubGraph>
+CreateSubGraphDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr,
+ const std::vector<int32_t> *inputs = nullptr,
+ const std::vector<int32_t> *outputs = nullptr,
+ const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr,
+ const char *name = nullptr)
+{
+ return onert_tflite::CreateSubGraph(
+ _fbb, tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0,
+ inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
+ outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0,
+ operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0,
+ name ? _fbb.CreateString(name) : 0);
+}
+
+struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_DATA = 4
+ };
+ const flatbuffers::Vector<uint8_t> *data() const
+ {
+ return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_DATA);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_DATA) &&
+ verifier.VerifyVector(data()) && verifier.EndTable();
+ }
+};
+
+struct BufferBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_data(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data)
+ {
+ fbb_.AddOffset(Buffer::VT_DATA, data);
+ }
+ explicit BufferBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ BufferBuilder &operator=(const BufferBuilder &);
+ flatbuffers::Offset<Buffer> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Buffer>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Buffer>
+CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data = 0)
+{
+ BufferBuilder builder_(_fbb);
+ builder_.add_data(data);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Buffer> CreateBufferDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<uint8_t> *data = nullptr)
+{
+ return onert_tflite::CreateBuffer(_fbb, data ? _fbb.CreateVector<uint8_t>(*data) : 0);
+}
+
+struct Metadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_NAME = 4,
+ VT_BUFFER = 6
+ };
+ const flatbuffers::String *name() const
+ {
+ return GetPointer<const flatbuffers::String *>(VT_NAME);
+ }
+ uint32_t buffer() const { return GetField<uint32_t>(VT_BUFFER, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) &&
+ verifier.VerifyString(name()) && VerifyField<uint32_t>(verifier, VT_BUFFER) &&
+ verifier.EndTable();
+ }
+};
+
+struct MetadataBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_name(flatbuffers::Offset<flatbuffers::String> name)
+ {
+ fbb_.AddOffset(Metadata::VT_NAME, name);
+ }
+ void add_buffer(uint32_t buffer) { fbb_.AddElement<uint32_t>(Metadata::VT_BUFFER, buffer, 0); }
+ explicit MetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ MetadataBuilder &operator=(const MetadataBuilder &);
+ flatbuffers::Offset<Metadata> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Metadata>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Metadata>
+CreateMetadata(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::String> name = 0, uint32_t buffer = 0)
+{
+ MetadataBuilder builder_(_fbb);
+ builder_.add_buffer(buffer);
+ builder_.add_name(name);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Metadata> CreateMetadataDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const char *name = nullptr,
+ uint32_t buffer = 0)
+{
+ return onert_tflite::CreateMetadata(_fbb, name ? _fbb.CreateString(name) : 0, buffer);
+}
+
+struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ enum
+ {
+ VT_VERSION = 4,
+ VT_OPERATOR_CODES = 6,
+ VT_SUBGRAPHS = 8,
+ VT_DESCRIPTION = 10,
+ VT_BUFFERS = 12,
+ VT_METADATA_BUFFER = 14,
+ VT_METADATA = 16
+ };
+ uint32_t version() const { return GetField<uint32_t>(VT_VERSION, 0); }
+ const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *operator_codes() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *>(
+ VT_OPERATOR_CODES);
+ }
+ const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *subgraphs() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *>(VT_SUBGRAPHS);
+ }
+ const flatbuffers::String *description() const
+ {
+ return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION);
+ }
+ const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *buffers() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *>(VT_BUFFERS);
+ }
+ const flatbuffers::Vector<int32_t> *metadata_buffer() const
+ {
+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_METADATA_BUFFER);
+ }
+ const flatbuffers::Vector<flatbuffers::Offset<Metadata>> *metadata() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Metadata>> *>(VT_METADATA);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_VERSION) &&
+ VerifyOffset(verifier, VT_OPERATOR_CODES) && verifier.VerifyVector(operator_codes()) &&
+ verifier.VerifyVectorOfTables(operator_codes()) &&
+ VerifyOffset(verifier, VT_SUBGRAPHS) && verifier.VerifyVector(subgraphs()) &&
+ verifier.VerifyVectorOfTables(subgraphs()) && VerifyOffset(verifier, VT_DESCRIPTION) &&
+ verifier.VerifyString(description()) && VerifyOffset(verifier, VT_BUFFERS) &&
+ verifier.VerifyVector(buffers()) && verifier.VerifyVectorOfTables(buffers()) &&
+ VerifyOffset(verifier, VT_METADATA_BUFFER) && verifier.VerifyVector(metadata_buffer()) &&
+ VerifyOffset(verifier, VT_METADATA) && verifier.VerifyVector(metadata()) &&
+ verifier.VerifyVectorOfTables(metadata()) && verifier.EndTable();
+ }
+};
+
+struct ModelBuilder
+{
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); }
+ void add_operator_codes(
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes)
+ {
+ fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes);
+ }
+ void
+ add_subgraphs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs)
+ {
+ fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs);
+ }
+ void add_description(flatbuffers::Offset<flatbuffers::String> description)
+ {
+ fbb_.AddOffset(Model::VT_DESCRIPTION, description);
+ }
+ void add_buffers(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers)
+ {
+ fbb_.AddOffset(Model::VT_BUFFERS, buffers);
+ }
+ void add_metadata_buffer(flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer)
+ {
+ fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer);
+ }
+ void
+ add_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata)
+ {
+ fbb_.AddOffset(Model::VT_METADATA, metadata);
+ }
+ explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ ModelBuilder &operator=(const ModelBuilder &);
+ flatbuffers::Offset<Model> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Model>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Model> CreateModel(
+ flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0,
+ flatbuffers::Offset<flatbuffers::String> description = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata = 0)
+{
+ ModelBuilder builder_(_fbb);
+ builder_.add_metadata(metadata);
+ builder_.add_metadata_buffer(metadata_buffer);
+ builder_.add_buffers(buffers);
+ builder_.add_description(description);
+ builder_.add_subgraphs(subgraphs);
+ builder_.add_operator_codes(operator_codes);
+ builder_.add_version(version);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Model>
+CreateModelDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+ const std::vector<flatbuffers::Offset<OperatorCode>> *operator_codes = nullptr,
+ const std::vector<flatbuffers::Offset<SubGraph>> *subgraphs = nullptr,
+ const char *description = nullptr,
+ const std::vector<flatbuffers::Offset<Buffer>> *buffers = nullptr,
+ const std::vector<int32_t> *metadata_buffer = nullptr,
+ const std::vector<flatbuffers::Offset<Metadata>> *metadata = nullptr)
+{
+ return onert_tflite::CreateModel(
+ _fbb, version,
+ operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0,
+ subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0,
+ description ? _fbb.CreateString(description) : 0,
+ buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0,
+ metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0,
+ metadata ? _fbb.CreateVector<flatbuffers::Offset<Metadata>>(*metadata) : 0);
+}
+
+inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj,
+ QuantizationDetails type)
+{
+ switch (type)
+ {
+ case QuantizationDetails_NONE:
+ {
+ return true;
+ }
+ case QuantizationDetails_CustomQuantization:
+ {
+ auto ptr = reinterpret_cast<const CustomQuantization *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ default:
+ return false;
+ }
+}
+
+inline bool
+VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier,
+ const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+ const flatbuffers::Vector<uint8_t> *types)
+{
+ if (!values || !types)
+ return !values && !types;
+ if (values->size() != types->size())
+ return false;
+ for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i)
+ {
+ if (!VerifyQuantizationDetails(verifier, values->Get(i),
+ types->GetEnum<QuantizationDetails>(i)))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+inline bool VerifySparseIndexVector(flatbuffers::Verifier &verifier, const void *obj,
+ SparseIndexVector type)
+{
+ switch (type)
+ {
+ case SparseIndexVector_NONE:
+ {
+ return true;
+ }
+ case SparseIndexVector_Int32Vector:
+ {
+ auto ptr = reinterpret_cast<const Int32Vector *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case SparseIndexVector_Uint16Vector:
+ {
+ auto ptr = reinterpret_cast<const Uint16Vector *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case SparseIndexVector_Uint8Vector:
+ {
+ auto ptr = reinterpret_cast<const Uint8Vector *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ default:
+ return false;
+ }
+}
+
+inline bool
+VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier,
+ const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+ const flatbuffers::Vector<uint8_t> *types)
+{
+ if (!values || !types)
+ return !values && !types;
+ if (values->size() != types->size())
+ return false;
+ for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i)
+ {
+ if (!VerifySparseIndexVector(verifier, values->Get(i), types->GetEnum<SparseIndexVector>(i)))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj,
+ BuiltinOptions type)
+{
+ switch (type)
+ {
+ case BuiltinOptions_NONE:
+ {
+ return true;
+ }
+ case BuiltinOptions_Conv2DOptions:
+ {
+ auto ptr = reinterpret_cast<const Conv2DOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_DepthwiseConv2DOptions:
+ {
+ auto ptr = reinterpret_cast<const DepthwiseConv2DOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ConcatEmbeddingsOptions:
+ {
+ auto ptr = reinterpret_cast<const ConcatEmbeddingsOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LSHProjectionOptions:
+ {
+ auto ptr = reinterpret_cast<const LSHProjectionOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_Pool2DOptions:
+ {
+ auto ptr = reinterpret_cast<const Pool2DOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SVDFOptions:
+ {
+ auto ptr = reinterpret_cast<const SVDFOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_RNNOptions:
+ {
+ auto ptr = reinterpret_cast<const RNNOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_FullyConnectedOptions:
+ {
+ auto ptr = reinterpret_cast<const FullyConnectedOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SoftmaxOptions:
+ {
+ auto ptr = reinterpret_cast<const SoftmaxOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ConcatenationOptions:
+ {
+ auto ptr = reinterpret_cast<const ConcatenationOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_AddOptions:
+ {
+ auto ptr = reinterpret_cast<const AddOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_L2NormOptions:
+ {
+ auto ptr = reinterpret_cast<const L2NormOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LocalResponseNormalizationOptions:
+ {
+ auto ptr = reinterpret_cast<const LocalResponseNormalizationOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LSTMOptions:
+ {
+ auto ptr = reinterpret_cast<const LSTMOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ResizeBilinearOptions:
+ {
+ auto ptr = reinterpret_cast<const ResizeBilinearOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_CallOptions:
+ {
+ auto ptr = reinterpret_cast<const CallOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ReshapeOptions:
+ {
+ auto ptr = reinterpret_cast<const ReshapeOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SkipGramOptions:
+ {
+ auto ptr = reinterpret_cast<const SkipGramOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SpaceToDepthOptions:
+ {
+ auto ptr = reinterpret_cast<const SpaceToDepthOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_EmbeddingLookupSparseOptions:
+ {
+ auto ptr = reinterpret_cast<const EmbeddingLookupSparseOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_MulOptions:
+ {
+ auto ptr = reinterpret_cast<const MulOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_PadOptions:
+ {
+ auto ptr = reinterpret_cast<const PadOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_GatherOptions:
+ {
+ auto ptr = reinterpret_cast<const GatherOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_BatchToSpaceNDOptions:
+ {
+ auto ptr = reinterpret_cast<const BatchToSpaceNDOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SpaceToBatchNDOptions:
+ {
+ auto ptr = reinterpret_cast<const SpaceToBatchNDOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_TransposeOptions:
+ {
+ auto ptr = reinterpret_cast<const TransposeOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ReducerOptions:
+ {
+ auto ptr = reinterpret_cast<const ReducerOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SubOptions:
+ {
+ auto ptr = reinterpret_cast<const SubOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_DivOptions:
+ {
+ auto ptr = reinterpret_cast<const DivOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SqueezeOptions:
+ {
+ auto ptr = reinterpret_cast<const SqueezeOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SequenceRNNOptions:
+ {
+ auto ptr = reinterpret_cast<const SequenceRNNOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_StridedSliceOptions:
+ {
+ auto ptr = reinterpret_cast<const StridedSliceOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ExpOptions:
+ {
+ auto ptr = reinterpret_cast<const ExpOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_TopKV2Options:
+ {
+ auto ptr = reinterpret_cast<const TopKV2Options *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SplitOptions:
+ {
+ auto ptr = reinterpret_cast<const SplitOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LogSoftmaxOptions:
+ {
+ auto ptr = reinterpret_cast<const LogSoftmaxOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_CastOptions:
+ {
+ auto ptr = reinterpret_cast<const CastOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_DequantizeOptions:
+ {
+ auto ptr = reinterpret_cast<const DequantizeOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_MaximumMinimumOptions:
+ {
+ auto ptr = reinterpret_cast<const MaximumMinimumOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ArgMaxOptions:
+ {
+ auto ptr = reinterpret_cast<const ArgMaxOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LessOptions:
+ {
+ auto ptr = reinterpret_cast<const LessOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_NegOptions:
+ {
+ auto ptr = reinterpret_cast<const NegOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_PadV2Options:
+ {
+ auto ptr = reinterpret_cast<const PadV2Options *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_GreaterOptions:
+ {
+ auto ptr = reinterpret_cast<const GreaterOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_GreaterEqualOptions:
+ {
+ auto ptr = reinterpret_cast<const GreaterEqualOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LessEqualOptions:
+ {
+ auto ptr = reinterpret_cast<const LessEqualOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SelectOptions:
+ {
+ auto ptr = reinterpret_cast<const SelectOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SliceOptions:
+ {
+ auto ptr = reinterpret_cast<const SliceOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_TransposeConvOptions:
+ {
+ auto ptr = reinterpret_cast<const TransposeConvOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SparseToDenseOptions:
+ {
+ auto ptr = reinterpret_cast<const SparseToDenseOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_TileOptions:
+ {
+ auto ptr = reinterpret_cast<const TileOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ExpandDimsOptions:
+ {
+ auto ptr = reinterpret_cast<const ExpandDimsOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_EqualOptions:
+ {
+ auto ptr = reinterpret_cast<const EqualOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_NotEqualOptions:
+ {
+ auto ptr = reinterpret_cast<const NotEqualOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ShapeOptions:
+ {
+ auto ptr = reinterpret_cast<const ShapeOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_PowOptions:
+ {
+ auto ptr = reinterpret_cast<const PowOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ArgMinOptions:
+ {
+ auto ptr = reinterpret_cast<const ArgMinOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_FakeQuantOptions:
+ {
+ auto ptr = reinterpret_cast<const FakeQuantOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_PackOptions:
+ {
+ auto ptr = reinterpret_cast<const PackOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LogicalOrOptions:
+ {
+ auto ptr = reinterpret_cast<const LogicalOrOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_OneHotOptions:
+ {
+ auto ptr = reinterpret_cast<const OneHotOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LogicalAndOptions:
+ {
+ auto ptr = reinterpret_cast<const LogicalAndOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LogicalNotOptions:
+ {
+ auto ptr = reinterpret_cast<const LogicalNotOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_UnpackOptions:
+ {
+ auto ptr = reinterpret_cast<const UnpackOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_FloorDivOptions:
+ {
+ auto ptr = reinterpret_cast<const FloorDivOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SquareOptions:
+ {
+ auto ptr = reinterpret_cast<const SquareOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ZerosLikeOptions:
+ {
+ auto ptr = reinterpret_cast<const ZerosLikeOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_FillOptions:
+ {
+ auto ptr = reinterpret_cast<const FillOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_BidirectionalSequenceLSTMOptions:
+ {
+ auto ptr = reinterpret_cast<const BidirectionalSequenceLSTMOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_BidirectionalSequenceRNNOptions:
+ {
+ auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_UnidirectionalSequenceLSTMOptions:
+ {
+ auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_FloorModOptions:
+ {
+ auto ptr = reinterpret_cast<const FloorModOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_RangeOptions:
+ {
+ auto ptr = reinterpret_cast<const RangeOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ResizeNearestNeighborOptions:
+ {
+ auto ptr = reinterpret_cast<const ResizeNearestNeighborOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_LeakyReluOptions:
+ {
+ auto ptr = reinterpret_cast<const LeakyReluOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SquaredDifferenceOptions:
+ {
+ auto ptr = reinterpret_cast<const SquaredDifferenceOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_MirrorPadOptions:
+ {
+ auto ptr = reinterpret_cast<const MirrorPadOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_AbsOptions:
+ {
+ auto ptr = reinterpret_cast<const AbsOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SplitVOptions:
+ {
+ auto ptr = reinterpret_cast<const SplitVOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_UniqueOptions:
+ {
+ auto ptr = reinterpret_cast<const UniqueOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ReverseV2Options:
+ {
+ auto ptr = reinterpret_cast<const ReverseV2Options *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_AddNOptions:
+ {
+ auto ptr = reinterpret_cast<const AddNOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_GatherNdOptions:
+ {
+ auto ptr = reinterpret_cast<const GatherNdOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_CosOptions:
+ {
+ auto ptr = reinterpret_cast<const CosOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_WhereOptions:
+ {
+ auto ptr = reinterpret_cast<const WhereOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_RankOptions:
+ {
+ auto ptr = reinterpret_cast<const RankOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ReverseSequenceOptions:
+ {
+ auto ptr = reinterpret_cast<const ReverseSequenceOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_MatrixDiagOptions:
+ {
+ auto ptr = reinterpret_cast<const MatrixDiagOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_QuantizeOptions:
+ {
+ auto ptr = reinterpret_cast<const QuantizeOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_MatrixSetDiagOptions:
+ {
+ auto ptr = reinterpret_cast<const MatrixSetDiagOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_HardSwishOptions:
+ {
+ auto ptr = reinterpret_cast<const HardSwishOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_IfOptions:
+ {
+ auto ptr = reinterpret_cast<const IfOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_WhileOptions:
+ {
+ auto ptr = reinterpret_cast<const WhileOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_DepthToSpaceOptions:
+ {
+ auto ptr = reinterpret_cast<const DepthToSpaceOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_NonMaxSuppressionV4Options:
+ {
+ auto ptr = reinterpret_cast<const NonMaxSuppressionV4Options *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_NonMaxSuppressionV5Options:
+ {
+ auto ptr = reinterpret_cast<const NonMaxSuppressionV5Options *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ScatterNdOptions:
+ {
+ auto ptr = reinterpret_cast<const ScatterNdOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SelectV2Options:
+ {
+ auto ptr = reinterpret_cast<const SelectV2Options *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_DensifyOptions:
+ {
+ auto ptr = reinterpret_cast<const DensifyOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_SegmentSumOptions:
+ {
+ auto ptr = reinterpret_cast<const SegmentSumOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_BatchMatMulOptions:
+ {
+ auto ptr = reinterpret_cast<const BatchMatMulOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ default:
+ return false;
+ }
+}
+
+inline bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier,
+ const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+ const flatbuffers::Vector<uint8_t> *types)
+{
+ if (!values || !types)
+ return !values && !types;
+ if (values->size() != types->size())
+ return false;
+ for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i)
+ {
+ if (!VerifyBuiltinOptions(verifier, values->Get(i), types->GetEnum<BuiltinOptions>(i)))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+inline const onert_tflite::Model *GetModel(const void *buf)
+{
+ return flatbuffers::GetRoot<onert_tflite::Model>(buf);
+}
+
+inline const onert_tflite::Model *GetSizePrefixedModel(const void *buf)
+{
+ return flatbuffers::GetSizePrefixedRoot<onert_tflite::Model>(buf);
+}
+
+inline const char *ModelIdentifier() { return "TFL3"; }
+
+inline bool ModelBufferHasIdentifier(const void *buf)
+{
+ return flatbuffers::BufferHasIdentifier(buf, ModelIdentifier());
+}
+
+inline bool VerifyModelBuffer(flatbuffers::Verifier &verifier)
+{
+ return verifier.VerifyBuffer<onert_tflite::Model>(ModelIdentifier());
+}
+
+inline bool VerifySizePrefixedModelBuffer(flatbuffers::Verifier &verifier)
+{
+ return verifier.VerifySizePrefixedBuffer<onert_tflite::Model>(ModelIdentifier());
+}
+
+inline const char *ModelExtension() { return "tflite"; }
+
+inline void FinishModelBuffer(flatbuffers::FlatBufferBuilder &fbb,
+ flatbuffers::Offset<onert_tflite::Model> root)
+{
+ fbb.Finish(root, ModelIdentifier());
+}
+
+inline void FinishSizePrefixedModelBuffer(flatbuffers::FlatBufferBuilder &fbb,
+ flatbuffers::Offset<onert_tflite::Model> root)
+{
+ fbb.FinishSizePrefixed(root, ModelIdentifier());
+}
+
+} // namespace onert_tflite
+
+#endif // FLATBUFFERS_GENERATED_TFLITESCHEMA_ONERT_TFLITE_H_
diff --git a/runtime/onert/frontend/tflite/tflite_schema-1.13.1.fbs b/runtime/onert/frontend/tflite/tflite_schema-1.13.1.fbs
new file mode 100644
index 000000000..ae6b5230f
--- /dev/null
+++ b/runtime/onert/frontend/tflite/tflite_schema-1.13.1.fbs
@@ -0,0 +1,795 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+
+// Change namespace to onert_tflite
+namespace onert_tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+ FLOAT32 = 0,
+ FLOAT16 = 1,
+ INT32 = 2,
+ UINT8 = 3,
+ INT64 = 4,
+ STRING = 5,
+ BOOL = 6,
+ INT16 = 7,
+ COMPLEX64 = 8,
+ INT8 = 9,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+ custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+ CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+ // These four parameters are the asymmetric linear quantization parameters.
+ // Given a quantized value q, the corresponding float value f should be:
+ // f = scale * (q - zero_point)
+ // For other quantization types, the QuantizationDetails below is used.
+ min:[float]; // For importing back into tensorflow.
+ max:[float]; // For importing back into tensorflow.
+ scale:[float]; // For dequantizing the tensor's values.
+ zero_point:[long];
+
+ // If this is not none, the quantization parameters above are ignored and the
+ // value of the QuantizationDetails union below should be used.
+ details:QuantizationDetails;
+}
+
+table Tensor {
+ // The tensor shape. The meaning of each entry is operator-specific but
+ // builtin ops use: [batch size, height, width, number of channels] (That's
+ // Tensorflow's NHWC).
+ shape:[int];
+ type:TensorType;
+ // An index that refers to the buffers table at the root of the model. Or,
+ // if there is no data buffer associated (i.e. intermediate results), then
+ // this is 0 (which refers to an always existent empty buffer).
+ //
+ // The data_buffer itself is an opaque container, with the assumption that the
+ // target device is little-endian. In addition, all builtin operators assume
+ // the memory is ordered such that if `shape` is [4, 3, 2], then index
+ // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+ buffer:uint;
+ name:string; // For debugging and importing back into tensorflow.
+ quantization:QuantizationParameters; // Optional.
+
+ is_variable:bool = false;
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+enum BuiltinOperator : byte {
+ ADD = 0,
+ AVERAGE_POOL_2D = 1,
+ CONCATENATION = 2,
+ CONV_2D = 3,
+ DEPTHWISE_CONV_2D = 4,
+ // DEPTH_TO_SPACE = 5,
+ DEQUANTIZE = 6,
+ EMBEDDING_LOOKUP = 7,
+ FLOOR = 8,
+ FULLY_CONNECTED = 9,
+ HASHTABLE_LOOKUP = 10,
+ L2_NORMALIZATION = 11,
+ L2_POOL_2D = 12,
+ LOCAL_RESPONSE_NORMALIZATION = 13,
+ LOGISTIC = 14,
+ LSH_PROJECTION = 15,
+ LSTM = 16,
+ MAX_POOL_2D = 17,
+ MUL = 18,
+ RELU = 19,
+ // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+ // since different model developers use RELU1 in different ways. Never
+ // create another op called RELU1.
+ RELU_N1_TO_1 = 20,
+ RELU6 = 21,
+ RESHAPE = 22,
+ RESIZE_BILINEAR = 23,
+ RNN = 24,
+ SOFTMAX = 25,
+ SPACE_TO_DEPTH = 26,
+ SVDF = 27,
+ TANH = 28,
+ // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
+ CONCAT_EMBEDDINGS = 29,
+ SKIP_GRAM = 30,
+ CALL = 31,
+ CUSTOM = 32,
+ EMBEDDING_LOOKUP_SPARSE = 33,
+ PAD = 34,
+ UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+ GATHER = 36,
+ BATCH_TO_SPACE_ND = 37,
+ SPACE_TO_BATCH_ND = 38,
+ TRANSPOSE = 39,
+ MEAN = 40,
+ SUB = 41,
+ DIV = 42,
+ SQUEEZE = 43,
+ UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+ STRIDED_SLICE = 45,
+ BIDIRECTIONAL_SEQUENCE_RNN = 46,
+ EXP = 47,
+ TOPK_V2 = 48,
+ SPLIT = 49,
+ LOG_SOFTMAX = 50,
+ // DELEGATE is a special op type for the operations which are delegated to
+ // other backends.
+ // WARNING: Experimental interface, subject to change
+ DELEGATE = 51,
+ BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+ CAST = 53,
+ PRELU = 54,
+ MAXIMUM = 55,
+ ARG_MAX = 56,
+ MINIMUM = 57,
+ LESS = 58,
+ NEG = 59,
+ PADV2 = 60,
+ GREATER = 61,
+ GREATER_EQUAL = 62,
+ LESS_EQUAL = 63,
+ SELECT = 64,
+ SLICE = 65,
+ SIN = 66,
+ TRANSPOSE_CONV = 67,
+ SPARSE_TO_DENSE = 68,
+ TILE = 69,
+ EXPAND_DIMS = 70,
+ EQUAL = 71,
+ NOT_EQUAL = 72,
+ LOG = 73,
+ SUM = 74,
+ SQRT = 75,
+ RSQRT = 76,
+ SHAPE = 77,
+ POW = 78,
+ ARG_MIN = 79,
+ FAKE_QUANT = 80,
+ REDUCE_PROD = 81,
+ REDUCE_MAX = 82,
+ PACK = 83,
+ LOGICAL_OR = 84,
+ ONE_HOT = 85,
+ LOGICAL_AND = 86,
+ LOGICAL_NOT = 87,
+ UNPACK = 88,
+ REDUCE_MIN = 89,
+ FLOOR_DIV = 90,
+ REDUCE_ANY = 91,
+ SQUARE = 92,
+ ZEROS_LIKE = 93,
+ FILL = 94,
+ FLOOR_MOD = 95,
+ RANGE = 96,
+ RESIZE_NEAREST_NEIGHBOR = 97,
+ LEAKY_RELU = 98,
+ SQUARED_DIFFERENCE = 99,
+ MIRROR_PAD = 100,
+ ABS = 101,
+ SPLIT_V = 102,
+}
+
+// Options for the builtin operators.
+union BuiltinOptions {
+ Conv2DOptions,
+ DepthwiseConv2DOptions,
+ ConcatEmbeddingsOptions,
+ LSHProjectionOptions,
+ Pool2DOptions,
+ SVDFOptions,
+ RNNOptions,
+ FullyConnectedOptions,
+ SoftmaxOptions,
+ ConcatenationOptions,
+ AddOptions,
+ L2NormOptions,
+ LocalResponseNormalizationOptions,
+ LSTMOptions,
+ ResizeBilinearOptions,
+ CallOptions,
+ ReshapeOptions,
+ SkipGramOptions,
+ SpaceToDepthOptions,
+ EmbeddingLookupSparseOptions,
+ MulOptions,
+ PadOptions,
+ GatherOptions,
+ BatchToSpaceNDOptions,
+ SpaceToBatchNDOptions,
+ TransposeOptions,
+ ReducerOptions,
+ SubOptions,
+ DivOptions,
+ SqueezeOptions,
+ SequenceRNNOptions,
+ StridedSliceOptions,
+ ExpOptions,
+ TopKV2Options,
+ SplitOptions,
+ LogSoftmaxOptions,
+ CastOptions,
+ DequantizeOptions,
+ MaximumMinimumOptions,
+ ArgMaxOptions,
+ LessOptions,
+ NegOptions,
+ PadV2Options,
+ GreaterOptions,
+ GreaterEqualOptions,
+ LessEqualOptions,
+ SelectOptions,
+ SliceOptions,
+ TransposeConvOptions,
+ SparseToDenseOptions,
+ TileOptions,
+ ExpandDimsOptions,
+ EqualOptions,
+ NotEqualOptions,
+ ShapeOptions,
+ PowOptions,
+ ArgMinOptions,
+ FakeQuantOptions,
+ PackOptions,
+ LogicalOrOptions,
+ OneHotOptions,
+ LogicalAndOptions,
+ LogicalNotOptions,
+ UnpackOptions,
+ FloorDivOptions,
+ SquareOptions,
+ ZerosLikeOptions,
+ FillOptions,
+ BidirectionalSequenceLSTMOptions,
+ BidirectionalSequenceRNNOptions,
+ UnidirectionalSequenceLSTMOptions,
+ FloorModOptions,
+ RangeOptions,
+ ResizeNearestNeighborOptions,
+ LeakyReluOptions,
+ SquaredDifferenceOptions,
+ MirrorPadOptions,
+ AbsOptions,
+ SplitVOptions,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+ NONE = 0,
+ RELU = 1,
+ RELU_N1_TO_1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ filter_width:int;
+ filter_height:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+ // Parameters for DepthwiseConv version 1 or above.
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ depth_multiplier:int;
+ fused_activation_function:ActivationFunctionType;
+ // Parameters for DepthwiseConv version 2 or above.
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+ num_channels:int;
+ num_columns_per_channel:[int];
+ embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+ UNKNOWN = 0,
+ SPARSE = 1,
+ DENSE = 2,
+}
+
+table LSHProjectionOptions {
+ type: LSHProjectionType;
+}
+
+table SVDFOptions {
+ rank:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ merge_outputs: bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+ DEFAULT = 0,
+ SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+ // Parameters for FullyConnected version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+
+ // Parameters for FullyConnected version 2 or above.
+ weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+}
+
+table SoftmaxOptions {
+ beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+ axis:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table MulOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+ radius:int;
+ bias:float;
+ alpha:float;
+ beta:float;
+}
+
+enum LSTMKernelType : byte {
+ // Full LSTM kernel which supports peephole and projection.
+ FULL = 0,
+ // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+ BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+ // Parameters for LSTM version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // Parameters for LSTM version 2 or above.
+ // Basic kernel is only supported in version 2 or above.
+ kernel_type: LSTMKernelType = FULL;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true then first dimension is sequence, otherwise batch.
+ time_major:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true, store the outputs of both directions into the first output.
+ merge_outputs: bool;
+}
+
+table ResizeBilinearOptions {
+ new_height: int (deprecated);
+ new_width: int (deprecated);
+ align_corners: bool;
+}
+
+table ResizeNearestNeighborOptions {
+ align_corners: bool;
+}
+
+// A call operation options
+table CallOptions {
+ // The subgraph index that needs to be called.
+ subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+ new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+ ngram_size: int;
+ max_skip_size: int;
+ include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+ block_size: int;
+}
+
+table SubOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DivOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+ SUM = 0,
+ MEAN = 1,
+ SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+ combiner:CombinerType;
+}
+
+table GatherOptions {
+ axis: int;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table ReducerOptions {
+ keep_dims: bool;
+}
+
+table SqueezeOptions {
+ squeeze_dims:[int];
+}
+
+table SplitOptions {
+ num_splits: int;
+}
+
+table SplitVOptions {
+ num_splits: int;
+}
+
+table StridedSliceOptions {
+ begin_mask: int;
+ end_mask: int;
+ ellipsis_mask: int;
+ new_axis_mask: int;
+ shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+ in_data_type: TensorType;
+ out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+ output_type : TensorType;
+}
+
+table ArgMinOptions {
+ output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+ validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+ // Optional output type of the operation (int32 or int64). Defaults to int32.
+ out_type : TensorType;
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+ // Parameters supported by version 1:
+ min:float;
+ max:float;
+ num_bits:int;
+
+ // Parameters supported by version 2:
+ narrow_range:bool;
+}
+
+table PackOptions {
+ values_count:int;
+ axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+ axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+ num:int;
+ axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+ alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+ // Doesn't include borders.
+ REFLECT = 0,
+ // Includes borders.
+ SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+ mode:MirrorPadMode;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+ builtin_code:BuiltinOperator;
+ custom_code:string;
+
+ // The version of the operator. The version need to be bumped whenever new
+ // parameters are introduced into an op.
+ version:int = 1;
+}
+
+enum CustomOptionsFormat : byte {
+ FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+ // Index into the operator_codes array. Using an integer here avoids
+ // complicate map lookups.
+ opcode_index:uint;
+
+ // Optional input and output tensors are indicated by -1.
+ inputs:[int];
+ outputs:[int];
+
+ builtin_options:BuiltinOptions;
+ custom_options:[ubyte];
+ custom_options_format:CustomOptionsFormat;
+
+ // A list of booleans indicating the input tensors which are being mutated by
+ // this operator.(e.g. used by RNN and LSTM).
+ // For example, if the "inputs" array refers to 5 tensors and the second and
+ // fifth are mutable variables, then this list will contain
+ // [false, true, false, false, true].
+ //
+ // If the list is empty, no variable is mutated in this operator.
+ // The list either has the same length as `inputs`, or is empty.
+ mutating_variable_inputs:[bool];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+ // A list of all tensors used in this subgraph.
+ tensors:[Tensor];
+
+ // Indices of the tensors that are inputs into this subgraph. Note this is
+ // the list of non-static tensors that feed into the subgraph for inference.
+ inputs:[int];
+
+ // Indices of the tensors that are outputs out of this subgraph. Note this is
+ // the list of output tensors that are considered the product of the
+ // subgraph's inference.
+ outputs:[int];
+
+ // All operators, in execution order.
+ operators:[Operator];
+
+ // Name of this subgraph (used for debugging).
+ name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+ data:[ubyte] (force_align: 16);
+}
+
+table Model {
+ // Version of the schema.
+ version:uint;
+
+ // A list of all operator codes used in this model. This is
+ // kept in order because operators carry an index into this
+ // vector.
+ operator_codes:[OperatorCode];
+
+ // All the subgraphs of the model. The 0th is assumed to be the main
+ // model.
+ subgraphs:[SubGraph];
+
+ // A description of the model.
+ description:string;
+
+ // Buffers of the model.
+ // Note the 0th entry of this array must be an empty buffer (sentinel).
+ // This is a convention so that tensors without a buffer can provide 0 as
+ // their buffer.
+ buffers:[Buffer];
+
+ // Metadata about the model. Indirects into the existings buffers list.
+ metadata_buffer:[int];
+}
+
+root_type Model;
diff --git a/runtime/onert/frontend/tflite/tflite_schema.fbs b/runtime/onert/frontend/tflite/tflite_schema.fbs
new file mode 100644
index 000000000..9bffb4f3c
--- /dev/null
+++ b/runtime/onert/frontend/tflite/tflite_schema.fbs
@@ -0,0 +1,1095 @@
+// Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+
+// Change namespace to onert_tflite
+namespace onert_tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+ FLOAT32 = 0,
+ FLOAT16 = 1,
+ INT32 = 2,
+ UINT8 = 3,
+ INT64 = 4,
+ STRING = 5,
+ BOOL = 6,
+ INT16 = 7,
+ COMPLEX64 = 8,
+ INT8 = 9,
+ FLOAT64 = 10,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+ custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+ CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+ // These four parameters are the asymmetric linear quantization parameters.
+ // Given a quantized value q, the corresponding float value f should be:
+ // f = scale * (q - zero_point)
+ // For other quantization types, the QuantizationDetails below is used.
+ min:[float]; // For importing back into tensorflow.
+ max:[float]; // For importing back into tensorflow.
+ scale:[float]; // For dequantizing the tensor's values.
+ zero_point:[long];
+
+ // If this is not none, the other quantization parameters (i.e. min, max,
+ // scale, zero_point fields above) are ignored and the value of the
+ // QuantizationDetails union should be used.
+ details:QuantizationDetails;
+
+ // Specifies the dimension of the Tensor's shape that the scales and
+ // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+ // with quantization params:
+ // scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+ // will be quantized across the second dimension of t.
+ // t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+ // t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+ // t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+ quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+// 1. In what order to traverse these dimensions. For example, to store a 2-D
+// matrix in row major order, the traversal order would be (d0, d1),
+// whereas to store it in column major order, the traversal order would be
+// (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+// could be (d0, d1, d2, d3).
+// 2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+// tensor dimension in (d0, ..., dn-1).
+// 3. In the traversal order defined above, the format (dense vs. sparse) and
+// index metadata for each dimension. For a dense dimension, this is just
+// the size of that dimension. For a sparse dimension, it's the same as
+// the compressed index defined in the Compressed Sparse Row (CSR) format.
+// (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+// 1. DENSE: each coordinate in this dimension is stored implicitly.
+// 2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+// compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+ DENSE = 0,
+ SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+ values:[int];
+}
+
+table Uint16Vector {
+ values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+ values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+ Int32Vector,
+ Uint16Vector,
+ Uint8Vector
+}
+
+table DimensionMetadata {
+ // Whether a dimension is dense or sparse.
+ format:DimensionType;
+ // Index metadata used for a dimension.
+ // - If format is DimensionType.DENSE then we use the dense_size field to
+ // store the size of that dimension. Each index in that dimension is
+ // stored implicitly.
+ // - If format is DimensionType.SPARSE_CSR then we use array_segments and
+ // array_indices to encode that dimension. array_segments represents how
+ // to segment the indices array, each segment corresponds to one element
+ // in the previous dimension. array_indices represents the index of the
+ // non-zero elements within this dimension (as those in the CSR matrix
+ // format, where the first array is row pointers and the second array is
+ // column indices).
+ dense_size:int;
+ array_segments:SparseIndexVector;
+ array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+ // The traversal order of the dimensions defined in the `shape` field of the
+ // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+ // ..., dn-1),
+ // - if not block sparse, the traversal_order is just a permutation of (d0,
+ // ..., dn-1). For example, a 2-D matrix stored in row-major order would
+ // have traversal_order = (d0, d1).
+ // - if block sparse with a k-dimensional block (0 <= k <= n), the
+ // traversal_order has n + k elements. The first n elements are still a
+ // permutation of (d0, ..., dn-1). The lask k elements are a permutation
+ // of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+ // example, a 2-D matrix with 2-D blocks, both stored in row-major order
+ // would have traversal_order = (d0, d1, d2, d3).
+ traversal_order:[int];
+ // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+ // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+ // tensor dimension in (d0, ..., dn).
+ // It's stored in the order of (dn, ..., dn+k-1).
+ // If not block-sparse, this field is NULL.
+ block_map:[int];
+ // In the traversal order defined above, the metadata needed for
+ // each dimension to locate the non-zero values in the original dense tensor.
+ // The size of the dim_metadata array = the size of the traversal_order array
+ // = n + k.
+ dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+ // The tensor shape. The meaning of each entry is operator-specific but
+ // builtin ops use: [batch size, height, width, number of channels] (That's
+ // Tensorflow's NHWC).
+ shape:[int];
+ type:TensorType;
+ // An index that refers to the buffers table at the root of the model. Or,
+ // if there is no data buffer associated (i.e. intermediate results), then
+ // this is 0 (which refers to an always existent empty buffer).
+ //
+ // The data_buffer itself is an opaque container, with the assumption that the
+ // target device is little-endian. In addition, all builtin operators assume
+ // the memory is ordered such that if `shape` is [4, 3, 2], then index
+ // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+ buffer:uint;
+ name:string; // For debugging and importing back into tensorflow.
+ quantization:QuantizationParameters; // Optional.
+
+ is_variable:bool = false;
+
+ // Parameters to encode a sparse tensor. See the example in
+ // tensorflow/lite/testdata/sparse_tensor.json.
+ sparsity:SparsityParameters; // Optional.
+
+ // Encodes `shape` with unknown dimensions. Unknown dimensions are
+ // represented with -1.
+ shape_signature:[int]; // Optional.
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+
+enum BuiltinOperator : byte {
+ ADD = 0,
+ AVERAGE_POOL_2D = 1,
+ CONCATENATION = 2,
+ CONV_2D = 3,
+ DEPTHWISE_CONV_2D = 4,
+ DEPTH_TO_SPACE = 5,
+ DEQUANTIZE = 6,
+ EMBEDDING_LOOKUP = 7,
+ FLOOR = 8,
+ FULLY_CONNECTED = 9,
+ HASHTABLE_LOOKUP = 10,
+ L2_NORMALIZATION = 11,
+ L2_POOL_2D = 12,
+ LOCAL_RESPONSE_NORMALIZATION = 13,
+ LOGISTIC = 14,
+ LSH_PROJECTION = 15,
+ LSTM = 16,
+ MAX_POOL_2D = 17,
+ MUL = 18,
+ RELU = 19,
+ // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+ // since different model developers use RELU1 in different ways. Never
+ // create another op called RELU1.
+ RELU_N1_TO_1 = 20,
+ RELU6 = 21,
+ RESHAPE = 22,
+ RESIZE_BILINEAR = 23,
+ RNN = 24,
+ SOFTMAX = 25,
+ SPACE_TO_DEPTH = 26,
+ SVDF = 27,
+ TANH = 28,
+ // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
+ CONCAT_EMBEDDINGS = 29,
+ SKIP_GRAM = 30,
+ CALL = 31,
+ CUSTOM = 32,
+ EMBEDDING_LOOKUP_SPARSE = 33,
+ PAD = 34,
+ UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+ GATHER = 36,
+ BATCH_TO_SPACE_ND = 37,
+ SPACE_TO_BATCH_ND = 38,
+ TRANSPOSE = 39,
+ MEAN = 40,
+ SUB = 41,
+ DIV = 42,
+ SQUEEZE = 43,
+ UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+ STRIDED_SLICE = 45,
+ BIDIRECTIONAL_SEQUENCE_RNN = 46,
+ EXP = 47,
+ TOPK_V2 = 48,
+ SPLIT = 49,
+ LOG_SOFTMAX = 50,
+ // DELEGATE is a special op type for the operations which are delegated to
+ // other backends.
+ // WARNING: Experimental interface, subject to change
+ DELEGATE = 51,
+ BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+ CAST = 53,
+ PRELU = 54,
+ MAXIMUM = 55,
+ ARG_MAX = 56,
+ MINIMUM = 57,
+ LESS = 58,
+ NEG = 59,
+ PADV2 = 60,
+ GREATER = 61,
+ GREATER_EQUAL = 62,
+ LESS_EQUAL = 63,
+ SELECT = 64,
+ SLICE = 65,
+ SIN = 66,
+ TRANSPOSE_CONV = 67,
+ SPARSE_TO_DENSE = 68,
+ TILE = 69,
+ EXPAND_DIMS = 70,
+ EQUAL = 71,
+ NOT_EQUAL = 72,
+ LOG = 73,
+ SUM = 74,
+ SQRT = 75,
+ RSQRT = 76,
+ SHAPE = 77,
+ POW = 78,
+ ARG_MIN = 79,
+ FAKE_QUANT = 80,
+ REDUCE_PROD = 81,
+ REDUCE_MAX = 82,
+ PACK = 83,
+ LOGICAL_OR = 84,
+ ONE_HOT = 85,
+ LOGICAL_AND = 86,
+ LOGICAL_NOT = 87,
+ UNPACK = 88,
+ REDUCE_MIN = 89,
+ FLOOR_DIV = 90,
+ REDUCE_ANY = 91,
+ SQUARE = 92,
+ ZEROS_LIKE = 93,
+ FILL = 94,
+ FLOOR_MOD = 95,
+ RANGE = 96,
+ RESIZE_NEAREST_NEIGHBOR = 97,
+ LEAKY_RELU = 98,
+ SQUARED_DIFFERENCE = 99,
+ MIRROR_PAD = 100,
+ ABS = 101,
+ SPLIT_V = 102,
+ UNIQUE = 103,
+ CEIL = 104,
+ REVERSE_V2 = 105,
+ ADD_N = 106,
+ GATHER_ND = 107,
+ COS = 108,
+ WHERE = 109,
+ RANK = 110,
+ ELU = 111,
+ REVERSE_SEQUENCE = 112,
+ MATRIX_DIAG = 113,
+ QUANTIZE = 114,
+ MATRIX_SET_DIAG = 115,
+ ROUND = 116,
+ HARD_SWISH = 117,
+ IF = 118,
+ WHILE = 119,
+ NON_MAX_SUPPRESSION_V4 = 120,
+ NON_MAX_SUPPRESSION_V5 = 121,
+ SCATTER_ND = 122,
+ SELECT_V2 = 123,
+ DENSIFY = 124,
+ SEGMENT_SUM = 125,
+ BATCH_MATMUL = 126
+}
+
+
+// Options for the builtin operators.
+union BuiltinOptions {
+ Conv2DOptions,
+ DepthwiseConv2DOptions,
+ ConcatEmbeddingsOptions,
+ LSHProjectionOptions,
+ Pool2DOptions,
+ SVDFOptions,
+ RNNOptions,
+ FullyConnectedOptions,
+ SoftmaxOptions,
+ ConcatenationOptions,
+ AddOptions,
+ L2NormOptions,
+ LocalResponseNormalizationOptions,
+ LSTMOptions,
+ ResizeBilinearOptions,
+ CallOptions,
+ ReshapeOptions,
+ SkipGramOptions,
+ SpaceToDepthOptions,
+ EmbeddingLookupSparseOptions,
+ MulOptions,
+ PadOptions,
+ GatherOptions,
+ BatchToSpaceNDOptions,
+ SpaceToBatchNDOptions,
+ TransposeOptions,
+ ReducerOptions,
+ SubOptions,
+ DivOptions,
+ SqueezeOptions,
+ SequenceRNNOptions,
+ StridedSliceOptions,
+ ExpOptions,
+ TopKV2Options,
+ SplitOptions,
+ LogSoftmaxOptions,
+ CastOptions,
+ DequantizeOptions,
+ MaximumMinimumOptions,
+ ArgMaxOptions,
+ LessOptions,
+ NegOptions,
+ PadV2Options,
+ GreaterOptions,
+ GreaterEqualOptions,
+ LessEqualOptions,
+ SelectOptions,
+ SliceOptions,
+ TransposeConvOptions,
+ SparseToDenseOptions,
+ TileOptions,
+ ExpandDimsOptions,
+ EqualOptions,
+ NotEqualOptions,
+ ShapeOptions,
+ PowOptions,
+ ArgMinOptions,
+ FakeQuantOptions,
+ PackOptions,
+ LogicalOrOptions,
+ OneHotOptions,
+ LogicalAndOptions,
+ LogicalNotOptions,
+ UnpackOptions,
+ FloorDivOptions,
+ SquareOptions,
+ ZerosLikeOptions,
+ FillOptions,
+ BidirectionalSequenceLSTMOptions,
+ BidirectionalSequenceRNNOptions,
+ UnidirectionalSequenceLSTMOptions,
+ FloorModOptions,
+ RangeOptions,
+ ResizeNearestNeighborOptions,
+ LeakyReluOptions,
+ SquaredDifferenceOptions,
+ MirrorPadOptions,
+ AbsOptions,
+ SplitVOptions,
+ UniqueOptions,
+ ReverseV2Options,
+ AddNOptions,
+ GatherNdOptions,
+ CosOptions,
+ WhereOptions,
+ RankOptions,
+ ReverseSequenceOptions,
+ MatrixDiagOptions,
+ QuantizeOptions,
+ MatrixSetDiagOptions,
+ HardSwishOptions,
+ IfOptions,
+ WhileOptions,
+ DepthToSpaceOptions,
+ NonMaxSuppressionV4Options,
+ NonMaxSuppressionV5Options,
+ ScatterNdOptions,
+ SelectV2Options,
+ DensifyOptions,
+ SegmentSumOptions,
+ BatchMatMulOptions
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+ NONE = 0,
+ RELU = 1,
+ RELU_N1_TO_1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ filter_width:int;
+ filter_height:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+ // Parameters for DepthwiseConv version 1 or above.
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ // `depth_multiplier` is redundant. It's used by CPU kernels in
+ // TensorFlow 2.0 or below, but ignored in versions above.
+ // See comments in lite/c/builtin_op_data.h for more details.
+ depth_multiplier:int;
+ fused_activation_function:ActivationFunctionType;
+ // Parameters for DepthwiseConv version 2 or above.
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+ num_channels:int;
+ num_columns_per_channel:[int];
+ embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+ UNKNOWN = 0,
+ SPARSE = 1,
+ DENSE = 2,
+}
+
+table LSHProjectionOptions {
+ type: LSHProjectionType;
+}
+
+table SVDFOptions {
+ rank:int;
+ fused_activation_function:ActivationFunctionType;
+ // For weights-only quantization, use asymmetric quantization for non
+ // constant inputs at evaluation time.
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+ fused_activation_function:ActivationFunctionType;
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ merge_outputs: bool;
+ asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+ DEFAULT = 0,
+ SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+ // Parameters for FullyConnected version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+
+ // Parameters for FullyConnected version 2 or above.
+ weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+ // Parameters for FullyConnected version 5 or above.
+ // If set to true, then the number of dimension is preserved. Furthermore,
+ // all but the last dimension of the input and output shapes will be equal.
+ keep_num_dims: bool;
+
+ // Parameters for FullyConnected version 7 or above.
+ // If set to true, then weights-only op will use asymmetric quantization for
+ // inputs.
+ asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+ beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+ axis:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table MulOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+ radius:int;
+ bias:float;
+ alpha:float;
+ beta:float;
+}
+
+enum LSTMKernelType : byte {
+ // Full LSTM kernel which supports peephole and projection.
+ FULL = 0,
+ // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+ BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+ // Parameters for LSTM version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // Parameters for LSTM version 2 or above.
+ // Basic kernel is only supported in version 2 or above.
+ kernel_type: LSTMKernelType = FULL;
+
+ // Parameters for LSTM version 4 or above.
+ asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true then first dimension is sequence, otherwise batch.
+ time_major:bool;
+
+ // Parameter for Unidirectional Sequence LSTM version 4.
+ asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+ // Parameters supported by version 1:
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true, store the outputs of both directions into the first output.
+ merge_outputs: bool;
+
+ // Parameters supported by version 2:
+ // If true then first dimension is sequence, otherwise batch.
+ // Version 1 implementations assumed time_major to be true, so this default
+ // value should never change.
+ time_major: bool = true;
+
+ // Parameters for version 3 or above.
+ asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+ new_height: int (deprecated);
+ new_width: int (deprecated);
+ align_corners: bool;
+ half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+ align_corners: bool;
+}
+
+// A call operation options
+table CallOptions {
+ // The subgraph index that needs to be called.
+ subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+ new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+ ngram_size: int;
+ max_skip_size: int;
+ include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+ block_size: int;
+}
+
+table DepthToSpaceOptions {
+ block_size: int;
+}
+
+table SubOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DivOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+ SUM = 0,
+ MEAN = 1,
+ SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+ combiner:CombinerType;
+}
+
+table GatherOptions {
+ axis: int;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+ keep_dims: bool;
+}
+
+table SqueezeOptions {
+ squeeze_dims:[int];
+}
+
+table SplitOptions {
+ num_splits: int;
+}
+
+table SplitVOptions {
+ num_splits: int;
+}
+
+table StridedSliceOptions {
+ begin_mask: int;
+ end_mask: int;
+ ellipsis_mask: int;
+ new_axis_mask: int;
+ shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+ in_data_type: TensorType;
+ out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+ output_type : TensorType;
+}
+
+table ArgMinOptions {
+ output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+ validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+ // Optional output type of the operation (int32 or int64). Defaults to int32.
+ out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+ // Parameters supported by version 1:
+ min:float;
+ max:float;
+ num_bits:int;
+
+ // Parameters supported by version 2:
+ narrow_range:bool;
+}
+
+table PackOptions {
+ values_count:int;
+ axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+ axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+ num:int;
+ axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+ alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+ // Doesn't include borders.
+ REFLECT = 0,
+ // Includes borders.
+ SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+ mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+ idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+ seq_dim:int;
+ batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+ then_subgraph_index:int;
+ else_subgraph_index:int;
+}
+
+table WhileOptions {
+ cond_subgraph_index:int;
+ body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+ adjoint_lhs:bool;
+ adjoint_rhs:bool;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+ builtin_code:BuiltinOperator;
+ custom_code:string;
+
+ // The version of the operator. The version need to be bumped whenever new
+ // parameters are introduced into an op.
+ version:int = 1;
+}
+
+enum CustomOptionsFormat : byte {
+ FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+ // Index into the operator_codes array. Using an integer here avoids
+ // complicate map lookups.
+ opcode_index:uint;
+
+ // Optional input are indicated by -1.
+ inputs:[int];
+ outputs:[int];
+
+ builtin_options:BuiltinOptions;
+ custom_options:[ubyte];
+ custom_options_format:CustomOptionsFormat;
+
+ // A list of booleans indicating the input tensors which are being mutated by
+ // this operator.(e.g. used by RNN and LSTM).
+ // For example, if the "inputs" array refers to 5 tensors and the second and
+ // fifth are mutable variables, then this list will contain
+ // [false, true, false, false, true].
+ //
+ // If the list is empty, no variable is mutated in this operator.
+ // The list either has the same length as `inputs`, or is empty.
+ mutating_variable_inputs:[bool];
+
+ // A list of indices to the subgraph's "tensors" that are internal to an Op.
+ // Internal tensors are those that do not flow in or out of the operation,
+ // but instead are part of internal computation. As such, the operation's
+ // implementation may manage its memory more efficiently. They are needed
+ // however (i.e. not just an implementation detail) since they are part of the
+ // computation, which may require relevant metadata such as quantization
+ // parameters.
+ intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+ // A list of all tensors used in this subgraph.
+ tensors:[Tensor];
+
+ // Indices of the tensors that are inputs into this subgraph. Note this is
+ // the list of non-static tensors that feed into the subgraph for inference.
+ inputs:[int];
+
+ // Indices of the tensors that are outputs out of this subgraph. Note this is
+ // the list of output tensors that are considered the product of the
+ // subgraph's inference.
+ outputs:[int];
+
+ // All operators, in execution order.
+ operators:[Operator];
+
+ // Name of this subgraph (used for debugging).
+ name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+ data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+ // A human readable string to uniquely identify a Metadata.
+ name:string;
+ // An index to the buffers table.
+ buffer:uint;
+}
+
+table Model {
+ // Version of the schema.
+ version:uint;
+
+ // A list of all operator codes used in this model. This is
+ // kept in order because operators carry an index into this
+ // vector.
+ operator_codes:[OperatorCode];
+
+ // All the subgraphs of the model. The 0th is assumed to be the main
+ // model.
+ subgraphs:[SubGraph];
+
+ // A description of the model.
+ description:string;
+
+ // Buffers of the model.
+ // Note the 0th entry of this array must be an empty buffer (sentinel).
+ // This is a convention so that tensors without a buffer can provide 0 as
+ // their buffer.
+ buffers:[Buffer];
+
+ // Metadata about the model. Indirects into the existings buffers list.
+ // Deprecated, prefer to use metadata field.
+ metadata_buffer:[int];
+
+ // Metadata about the model.
+ metadata:[Metadata];
+}
+
+root_type Model;
diff --git a/runtime/neurun/sample/CMakeLists.txt b/runtime/onert/sample/CMakeLists.txt
index d853ba634..d853ba634 100644
--- a/runtime/neurun/sample/CMakeLists.txt
+++ b/runtime/onert/sample/CMakeLists.txt
diff --git a/runtime/neurun/sample/minimal/CMakeLists.txt b/runtime/onert/sample/minimal/CMakeLists.txt
index 6f4b02761..6f4b02761 100644
--- a/runtime/neurun/sample/minimal/CMakeLists.txt
+++ b/runtime/onert/sample/minimal/CMakeLists.txt
diff --git a/runtime/neurun/sample/minimal/README.md b/runtime/onert/sample/minimal/README.md
index fecad6fb2..fecad6fb2 100644
--- a/runtime/neurun/sample/minimal/README.md
+++ b/runtime/onert/sample/minimal/README.md
diff --git a/runtime/onert/sample/minimal/src/minimal.cc b/runtime/onert/sample/minimal/src/minimal.cc
new file mode 100644
index 000000000..d55569ba2
--- /dev/null
+++ b/runtime/onert/sample/minimal/src/minimal.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnfw.h"
+#include <vector>
+
+uint64_t num_elems(const nnfw_tensorinfo *ti)
+{
+ uint64_t n = 1;
+ for (uint32_t i = 0; i < ti->rank; ++i)
+ {
+ n *= ti->dims[i];
+ }
+ return n;
+}
+
+int main(const int argc, char **argv)
+{
+ nnfw_session *session = nullptr;
+ nnfw_create_session(&session);
+
+ // Loading nnpackage
+ nnfw_load_model_from_file(session, argv[1]);
+
+ // Use acl_neon backend for CONV_2D and acl_cl for otherwise.
+ // Note that defalut backend is acl_cl
+ nnfw_set_op_backend(session, "CONV_2D", "acl_neon");
+
+ // Compile model
+ nnfw_prepare(session);
+
+ // Prepare input. Here we just allocate dummy input arrays.
+ std::vector<float> input;
+ nnfw_tensorinfo ti;
+ nnfw_input_tensorinfo(session, 0, &ti); // get first input's info
+ uint32_t input_elements = num_elems(&ti);
+ input.resize(input_elements);
+ // TODO: Please add initialization for your input.
+ nnfw_set_input(session, 0, ti.dtype, input.data(), sizeof(float) * input_elements);
+
+ // Prepare output
+ std::vector<float> output;
+ nnfw_output_tensorinfo(session, 0, &ti); // get first output's info
+ uint32_t output_elements = num_elems(&ti);
+ output.resize(output_elements);
+ nnfw_set_output(session, 0, ti.dtype, output.data(), sizeof(float) * output_elements);
+
+ // Do inference
+ nnfw_run(session);
+
+ // TODO: Please print or compare the output value in your way.
+
+ nnfw_close_session(session);
+
+ return 0;
+}
diff --git a/runtime/onert/test/CMakeLists.txt b/runtime/onert/test/CMakeLists.txt
new file mode 100644
index 000000000..0abdd4880
--- /dev/null
+++ b/runtime/onert/test/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(TEST_ONERT test_onert)
+
+file(GLOB_RECURSE TESTS "*.cc")
+
+add_executable(${TEST_ONERT} ${TESTS})
+
+target_include_directories(${TEST_ONERT} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../core/src)
+
+target_link_libraries(${TEST_ONERT} onert_core)
+target_link_libraries(${TEST_ONERT} gtest)
+target_link_libraries(${TEST_ONERT} gtest_main)
+target_link_libraries(${TEST_ONERT} ${LIB_PTHREAD} dl)
+add_test(${TEST_ONERT} ${TEST_ONERT})
+
+install(TARGETS ${TEST_ONERT} DESTINATION unittest)
diff --git a/runtime/onert/test/core/compiler/Scheduler.cc b/runtime/onert/test/core/compiler/Scheduler.cc
new file mode 100644
index 000000000..926b82d94
--- /dev/null
+++ b/runtime/onert/test/core/compiler/Scheduler.cc
@@ -0,0 +1,569 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <compiler/HEScheduler.h>
+#include <exec/ExecTime.h>
+#include <backend/IShapeFixer.h>
+
+#include <ir/Shape.h>
+#include <ir/InternalType.h>
+#include <ir/TypeInfo.h>
+#include <ir/DataType.h>
+
+#include <ir/operation/Add.h>
+#include <ir/operation/Sub.h>
+#include <ir/operation/Mul.h>
+#include <ir/operation/FullyConnected.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using namespace onert;
+using namespace ir;
+using namespace backend;
+using namespace operation;
+using namespace exec;
+
+//
+// Mock backends classes
+//
+
+// Backend could be created without ShapeFixer.
+// But it is used by scheduler to detect which operations are supported by backend.
+struct MockShapeFixer : IShapeFixer
+{
+ void visit(const Add &) override {}
+ void visit(const Sub &) override {}
+ void visit(const Mul &) override {}
+ void visit(const FullyConnected &) override {}
+};
+
+struct MockConfigCPU : public IConfig
+{
+ std::string id() override { return "cpu"; }
+ bool initialize() override { return true; };
+ bool SupportPermutation() override { return false; }
+};
+
+struct MockBackendCPU : public Backend
+{
+ std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigCPU>(); }
+ std::unique_ptr<BackendContext>
+ newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
+ {
+ return std::unique_ptr<BackendContext>(new BackendContext{
+ this, nullptr, nullptr, nullptr, nullptr, std::make_shared<MockShapeFixer>()});
+ }
+};
+
+struct MockConfigGPU : public IConfig
+{
+ std::string id() override { return "gpu"; }
+ bool initialize() override { return true; };
+ bool SupportPermutation() override { return false; }
+};
+
+struct MockBackendGPU : public Backend
+{
+ std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigGPU>(); }
+ std::unique_ptr<BackendContext>
+ newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
+ {
+ return std::unique_ptr<BackendContext>(new BackendContext{
+ this, nullptr, nullptr, nullptr, nullptr, std::make_shared<MockShapeFixer>()});
+ }
+};
+
+struct MockConfigNPU : public IConfig
+{
+ std::string id() override { return "npu"; }
+ bool initialize() override { return true; };
+ bool SupportPermutation() override { return false; }
+};
+
+struct MockBackendNPU : public Backend
+{
+ std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigNPU>(); }
+ std::unique_ptr<BackendContext>
+ newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
+ {
+ return std::unique_ptr<BackendContext>(new BackendContext{
+ this, nullptr, nullptr, nullptr, nullptr, std::make_shared<MockShapeFixer>()});
+ }
+};
+
+//
+// Constants
+//
+
+const int OPERAND_ELEMS = 268203;
+const int OPERAND_SIZE = OPERAND_ELEMS * 4;
+const int OPERATION_SIZE = OPERAND_SIZE * 3;
+
+const std::string LINEAR("Linear");
+const std::string DATAFLOW("Dataflow");
+const std::string PARALLEL("Parallel");
+
+//
+// Helper functions
+//
+
+// Set executor through environment variable
+void setExecutor(const std::string &executor) { setenv("EXECUTOR", executor.c_str(), true); }
+
+// Set profiling mode through environment variable
+void setProfilingMode(const bool value) { setenv("PROFILING_MODE", value ? "1" : "0", true); }
+
+// Calculate operation size by addition sizes of all input and output operands
+uint32_t calcOpSize(const std::shared_ptr<Graph> &graph, const OperationIndex &op_idx)
+{
+ uint32_t size = 0;
+ const auto &op = graph->operations().at(op_idx);
+ for (const auto &ind : op.getInputs() + op.getOutputs())
+ size += graph->operands().at(ind).info().total_size();
+ return size;
+}
+
+// Set execution operation time. This method is needed since ExecutionTime has only
+// 'updateOperationExecTime' method.
+void setOperationExecTime(ExecTime &et, const Backend *backend, const std::string &operation,
+ bool quant, uint32_t op_size, int64_t time)
+{
+ // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
+ assert(time > 0);
+ int64_t prev_time = et.getOperationExecTime(backend, operation, quant, op_size);
+ int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
+ et.updateOperationExecTime(backend, operation, quant, op_size, time_to_set);
+ assert(et.getOperationExecTime(backend, operation, quant, op_size) == time);
+}
+
+// Set same execution time for all given backends/operations
+void setOperationsExecutionTime(const std::vector<const Backend *> &backends,
+ const std::vector<std::string> &op_names,
+ const std::vector<uint32_t> &op_sizes, int64_t exec_time)
+{
+ assert(op_names.size() == op_sizes.size());
+ ExecTime et(backends);
+ for (int i = 0; i < op_names.size(); ++i)
+ {
+ for (auto &backend : backends)
+ setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
+ }
+ et.uploadOperationsExecTime();
+}
+
+// Set permute time from one backend to another. This method is needed since ExecutionTime has only
+// 'updatePermuteTime' method.
+void setPermutationTime(ExecTime &et, const Backend *from_backend, const Backend *to_backend,
+ bool quant, uint32_t op_size, int64_t time)
+{
+ // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
+ assert(time > 0);
+ int64_t prev_time = et.getPermuteTime(from_backend, to_backend, quant, op_size);
+ int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
+ et.updatePermuteTime(from_backend, to_backend, quant, op_size, time_to_set);
+ assert(et.getPermuteTime(from_backend, to_backend, quant, op_size) == time);
+}
+
+// Set same permutation time between all given backends
+void setPermutationsExecutionTime(const std::vector<const Backend *> &backends,
+ const int operand_size, const int64_t exec_time)
+{
+ ExecTime et(backends);
+ for (const auto &backend : backends)
+ {
+ for (auto &other_backend : backends)
+ {
+ if (backend == other_backend)
+ continue;
+ setPermutationTime(et, backend, other_backend, false, operand_size, exec_time);
+ }
+ }
+ et.uploadOperationsExecTime();
+}
+
+//
+// Functions for creating graphs
+//
+
+using OIS = OperandIndexSequence;
+
+template <typename NodeT, typename... Types>
+OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args)
+{
+ typename NodeT::Param op_params{Activation::NONE};
+ auto op = std::make_unique<NodeT>(std::forward<Types>(args)..., op_params);
+ auto op_idx = graph->addOperation(std::move(op));
+ // For now in scheduler test all operations in tested graphs has same size (for simplicity)
+ assert(calcOpSize(graph, op_idx) == OPERATION_SIZE);
+ return op_idx;
+}
+
+// Create straight graph: Add->Sub->Mul
+std::shared_ptr<Graph> createStraightGraph()
+{
+ auto graph = std::make_shared<Graph>();
+ const TypeInfo float_op(DataType::FLOAT32);
+
+ // Create add node
+ auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx});
+
+ // Create sub node
+ auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ create<Sub>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx});
+
+ // Create mul node
+ auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ create<Mul>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx});
+
+ graph->finishBuilding();
+ return graph;
+}
+
+/* Create branched graph:
+ * [Add]
+ * // \\
+ * [Mul1] [FC2]
+ * || ||
+ * [Mul2] [FC2]
+ * \\ //
+ * [Sub]
+ */
+std::shared_ptr<Graph> createBranchedGraph()
+{
+ auto graph = std::make_shared<Graph>();
+ const TypeInfo float_op(DataType::FLOAT32);
+
+ // Create add node
+ auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx});
+
+ // Create mul1 node
+ auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ create<Mul>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx});
+
+ // Create mul2 node
+ auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ create<Mul>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx});
+
+ // Create fc1 node
+ auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx});
+
+ // Create fc2 node
+ auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx});
+
+ // Create add2 node
+ auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ create<Sub>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx});
+
+ graph->finishBuilding();
+ return graph;
+}
+
+//
+// Tests setup/teardown
+//
+
+// SetUp/TearDown methods runs before/after each test and performs actions common for each test
+class SchedulerTest : public ::testing::Test
+{
+protected:
+ void SetUp() override
+ {
+ // Initialize mock backends
+ _cpu_backend = new MockBackendCPU();
+ _gpu_backend = new MockBackendGPU();
+ _npu_backend = new MockBackendNPU();
+ _mock_backends = {_cpu_backend, _gpu_backend, _npu_backend};
+
+ // Remove previous profile data if it exists
+ if (!remove("exec_time.json"))
+ {
+ // DO NOTHING (no profile data)
+ }
+
+ // Remember original value of 'EXECUTOR' environment variable
+ char *executor = std::getenv("EXECUTOR");
+ _original_executor = executor == nullptr ? "" : executor;
+
+ // Remember original value of 'PROFILING_MODE' environment variable
+ char *profiling_mode = std::getenv("PROFILING_MODE");
+ _original_profiling_mode = profiling_mode == nullptr ? "" : profiling_mode;
+ }
+
+ void TearDown() override
+ {
+ delete _cpu_backend;
+ delete _gpu_backend;
+ delete _npu_backend;
+ EXPECT_EQ(remove("exec_time.json"), 0);
+ setenv("EXECUTOR", _original_executor.c_str(), true);
+ setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true);
+ }
+
+ backend::BackendContexts buildBackendContexts(const Graph &graph)
+ {
+ backend::BackendContexts contexts;
+ for (auto backend : _mock_backends)
+ {
+ contexts.emplace(backend, backend->newContext(graph, nullptr, false));
+ }
+ return contexts;
+ }
+
+ const MockBackendCPU *_cpu_backend{nullptr};
+ const MockBackendGPU *_gpu_backend{nullptr};
+ const MockBackendNPU *_npu_backend{nullptr};
+ std::vector<const Backend *> _mock_backends;
+
+ std::string _original_executor;
+ std::string _original_profiling_mode;
+};
+
+class SchedulerTestWithExecutorParam : public SchedulerTest,
+ public testing::WithParamInterface<std::string>
+{
+};
+
+//
+// HEScheduler tests
+//
+
+// Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
+TEST_P(SchedulerTestWithExecutorParam, straight_graph_known_exec_time)
+{
+ setExecutor(GetParam());
+
+ // Prepare graph
+ auto graph(createStraightGraph());
+ OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2);
+
+ // Set default execution and transfer time
+ setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1);
+ setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul"},
+ {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
+
+ // Test 1
+ // Expected behaviour: scheduler assigns different backend to each node
+ {
+ // For each backend reduce execution time of one node
+ ExecTime et(_mock_backends);
+ setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1);
+ setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1);
+ setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1);
+ et.uploadOperationsExecTime();
+
+ // Test scheduler
+ auto backend_contexts = buildBackendContexts(*graph);
+ auto scheduler = compiler::HEScheduler(backend_contexts,
+ compiler::fetchCompilerOptionsFromGlobalConfig(*graph));
+ const auto br = scheduler.schedule(*graph);
+ ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
+ ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu");
+ ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "npu");
+ }
+
+ // Test 2
+ // Expected behaviour: scheduler assigns single backend to all nodes because of big transfer time
+ {
+ // Increase transfer time
+ setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5);
+
+ // Test scheduler
+ auto backend_contexts = buildBackendContexts(*graph);
+ auto scheduler = compiler::HEScheduler(backend_contexts,
+ compiler::fetchCompilerOptionsFromGlobalConfig(*graph));
+ const auto br = scheduler.schedule(*graph);
+ ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
+ ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
+ ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "cpu");
+ }
+}
+
+// Test scheduler behavior for branched graph with known execution time of all nodes and permutes
+TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time)
+{
+ const int64_t NPU_ET = 5000;
+ setExecutor(GetParam());
+
+ // Prepare graph
+ auto graph(createBranchedGraph());
+ OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
+ sub_op_idx(5);
+
+ // Set default execution and transfer time
+ setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000);
+ setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul", "FullyConnected"},
+ {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
+
+ // Test 1
+ // Expected behaviour: for dataflow and linear executors scheduler assigns fastest backend to all
+ // nodes, in case of parallel executor scheduler assigns different backends to branches.
+ {
+ // Reduce execution time
+ ExecTime et(_mock_backends);
+ setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, NPU_ET);
+ setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, NPU_ET);
+ setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, NPU_ET);
+ setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET);
+ setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000);
+ setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000);
+ et.uploadOperationsExecTime();
+
+ // Test scheduler
+ auto backend_contexts = buildBackendContexts(*graph);
+ auto scheduler = compiler::HEScheduler(backend_contexts,
+ compiler::fetchCompilerOptionsFromGlobalConfig(*graph));
+ const auto br = scheduler.schedule(*graph);
+
+ std::string branch1_expected_backend("npu"), branch2_expected_backend("npu");
+ if (GetParam() == PARALLEL)
+ {
+ branch1_expected_backend =
+ br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
+ branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu";
+ }
+
+ ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), branch1_expected_backend);
+ ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), branch1_expected_backend);
+ ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), branch2_expected_backend);
+ ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), branch2_expected_backend);
+ ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
+ }
+
+ // Test 2
+ // Expected behaviour: scheduler assigns single backend to all nodes
+ {
+ // Increase execution time for GPU backend
+ ExecTime et(_mock_backends);
+ /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt *
+ * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the
+ * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter
+ * branching or scheduler assigns another backend to a node*/
+ setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1);
+ setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1);
+ et.uploadOperationsExecTime();
+
+ // Test scheduler
+ auto backend_contexts = buildBackendContexts(*graph);
+ auto scheduler = compiler::HEScheduler(backend_contexts,
+ compiler::fetchCompilerOptionsFromGlobalConfig(*graph));
+ const auto br = scheduler.schedule(*graph);
+ ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
+ }
+}
+
+// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
+// one time for each executor
+INSTANTIATE_TEST_CASE_P(AllExecutors, SchedulerTestWithExecutorParam,
+ testing::Values(LINEAR, DATAFLOW, PARALLEL));
+
+// Test scheduler behavior for branched graph and enabled profiling mode
+TEST_F(SchedulerTest, branched_graph_profiling_mode)
+{
+ const int ET = 1e5;
+
+ // Turn on profiling mode
+ setProfilingMode(true);
+ setExecutor(DATAFLOW);
+
+ // Prepare graph
+ auto graph(createBranchedGraph());
+ OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
+ sub_op_idx(5);
+
+ // Test 1
+ // Expected behaviour: scheduler assigns backends to nodes with unknown execution time
+ {
+ // Set execution time for all backends/nodes except for cpu/Sub, npu/Mul, gpu/FC
+ ExecTime et(_mock_backends);
+ setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _cpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
+ setOperationExecTime(et, _cpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
+ setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET);
+ et.uploadOperationsExecTime();
+
+ // Test scheduler
+ auto backend_contexts = buildBackendContexts(*graph);
+ auto scheduler = compiler::HEScheduler(backend_contexts,
+ compiler::fetchCompilerOptionsFromGlobalConfig(*graph));
+ const auto br = scheduler.schedule(*graph);
+ ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "gpu");
+ ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "gpu");
+ ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
+ }
+
+ // Test 2
+ // Expected behaviour: scheduler shuffling backends, so different backends are assigned to
+ // neighbor nodes
+ {
+ // Set execution time for rest backends/nodes (cpu/Sub, npu/Mul, gpu/FC)
+ ExecTime et(_mock_backends);
+ setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
+ setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
+ et.uploadOperationsExecTime();
+
+ // Test scheduler
+ auto backend_contexts = buildBackendContexts(*graph);
+ auto scheduler = compiler::HEScheduler(backend_contexts,
+ compiler::fetchCompilerOptionsFromGlobalConfig(*graph));
+ const auto br = scheduler.schedule(*graph);
+ ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
+ br->getBackend(mul1_op_idx)->config()->id());
+ ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
+ br->getBackend(fc1_op_idx)->config()->id());
+ ASSERT_NE(br->getBackend(mul1_op_idx)->config()->id(),
+ br->getBackend(mul2_op_idx)->config()->id());
+ ASSERT_NE(br->getBackend(fc1_op_idx)->config()->id(),
+ br->getBackend(fc2_op_idx)->config()->id());
+ ASSERT_NE(br->getBackend(mul2_op_idx)->config()->id(),
+ br->getBackend(sub_op_idx)->config()->id());
+ ASSERT_NE(br->getBackend(fc2_op_idx)->config()->id(),
+ br->getBackend(sub_op_idx)->config()->id());
+ }
+}
+
+// TODO: Add tests with unknown execution and permutation time
+
+} // unnamed namespace
diff --git a/runtime/onert/test/core/exec/ExecInstance.cc b/runtime/onert/test/core/exec/ExecInstance.cc
new file mode 100644
index 000000000..7242486a0
--- /dev/null
+++ b/runtime/onert/test/core/exec/ExecInstance.cc
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <thread>
+
+#include "ir/Graph.h"
+#include "compiler/Compiler.h"
+#include "exec/Execution.h"
+#include "ir/operation/Add.h"
+
+namespace
+{
+
+using namespace onert::ir;
+
+class CompiledMockUpModel
+{
+public:
+ CompiledMockUpModel()
+ {
+ // Model: two elementwise add operation
+ // model input: lhs, rhs1
+ // model output: second add result (result2)
+ // constant: rhs2
+ // result1 <= (lhs + rhs)
+ // result2 <= (result1 + rhs2)
+ // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1}
+ // activation: none (constant)
+ graph = std::make_shared<Graph>();
+ // 1st add operands (result1 <= lhs + rhs1)
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::FLOAT32};
+ static float rhs2_data[4] = {3, 1, -1, 5};
+ auto operand_lhs = graph->addOperand(shape, type);
+ auto operand_rhs1 = graph->addOperand(shape, type);
+ auto operand_result1 = graph->addOperand(shape, type);
+ auto operand_rhs2 = graph->addOperand(shape, type);
+ auto operand_result2 = graph->addOperand(shape, type);
+ graph->operands()
+ .at(operand_rhs2)
+ .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
+ // 2nd add operations (result2 <= result1 + rhs2)
+ operation::Add::Param param1;
+ param1.activation = Activation::NONE;
+ auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
+ auto output_set1 = OperandIndexSequence{operand_result1};
+ graph->addOperation(std::make_unique<operation::Add>(input_set1, output_set1, param1));
+ operation::Add::Param param2;
+ param2.activation = Activation::NONE;
+ auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
+ auto output_set2 = OperandIndexSequence{operand_result2};
+ graph->addOperation(std::make_unique<operation::Add>(input_set2, output_set2, param2));
+ // Identify model inputs and outputs
+ graph->addInput(operand_lhs);
+ graph->addInput(operand_rhs1);
+ graph->addOutput(operand_result2);
+ graph->finishBuilding();
+
+ // Compile
+ auto compiler = new onert::compiler::Compiler{graph};
+ compiler->compile();
+ compiler->release(executor);
+ delete compiler;
+ }
+
+public:
+ std::shared_ptr<Graph> graph;
+ std::shared_ptr<onert::exec::IExecutor> executor;
+};
+
+TEST(ExecInstance, simple)
+{
+ auto mockup = CompiledMockUpModel();
+ auto graph = mockup.graph;
+ auto executor = mockup.executor;
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const float input1_buffer[4] = {1, 0, -1, -2};
+ const float input2_buffer[4] = {1, -3, 2, -4};
+ float output_buffer[4] = {};
+ const float output_expected[4] = {5, -2, 0, -1};
+
+ auto execution = new onert::exec::Execution(executor);
+
+ execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+ execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+ execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+ execution->execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(output_buffer[i], output_expected[i]);
+ }
+
+ delete execution;
+}
+
+TEST(ExecInstance, twoCompile)
+{
+ auto mockup = CompiledMockUpModel();
+ auto graph = mockup.graph;
+ auto executor1 = mockup.executor;
+ auto execution1 = new onert::exec::Execution(executor1);
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+ const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+ float exe1_output_buffer[4] = {};
+ const float exe1_output_expected[4] = {5, -2, 0, -1};
+
+ execution1->setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+ execution1->setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+ execution1->setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+ // Make new executor: compile again
+ auto compiler = new onert::compiler::Compiler{graph};
+ compiler->compile();
+ std::shared_ptr<onert::exec::IExecutor> executor2;
+ compiler->release(executor2);
+ auto execution2 = new onert::exec::Execution(executor2);
+
+ const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+ const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+ float exe2_output_buffer[4] = {};
+ const float exe2_output_expected[4] = {2, 5, -2, 7};
+
+ execution2->setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+ execution2->setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+ execution2->setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+ execution1->execute();
+ execution2->execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+ EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+ }
+
+ delete compiler;
+ delete execution1;
+ delete execution2;
+}
+
+// Support two initialized execution instance then ordered execution
+TEST(ExecInstance, twoExecution)
+{
+ auto mockup = CompiledMockUpModel();
+ auto executor = mockup.executor;
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output1 = IOIndex{0};
+
+ const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+ const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+ float exe1_output_buffer[4] = {};
+ const float exe1_output_expected[4] = {5, -2, 0, -1};
+ const float exe2_output_expected[4] = {2, 5, -2, 7};
+
+ auto execution1 = new onert::exec::Execution(executor);
+ execution1->setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+ execution1->setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+ execution1->setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+ const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+ const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+ float exe2_output_buffer[4] = {};
+
+ // Make new execution
+ auto execution2 = new onert::exec::Execution(executor);
+ execution2->setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+ execution2->setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+ execution2->setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+ execution1->execute();
+ execution2->execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+ EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+ }
+
+ delete execution1;
+ delete execution2;
+}
+
+class Inference
+{
+public:
+ Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4],
+ std::shared_ptr<onert::exec::IExecutor> &executor)
+ : _input1{input1}, _input2{input2}, _output{output}, _executor{executor}
+ {
+ // DO NOTHING
+ }
+
+ void inference(void)
+ {
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output1 = IOIndex{0};
+
+ auto execution = new onert::exec::Execution(_executor);
+ execution->setInput(input1, reinterpret_cast<const void *>(_input1), 16);
+ execution->setInput(input2, reinterpret_cast<const void *>(_input2), 16);
+ execution->setOutput(output1, reinterpret_cast<void *>(_output), 16);
+
+ execution->execute();
+
+ delete execution;
+ }
+
+private:
+ const float (&_input1)[4];
+ const float (&_input2)[4];
+ float (&_output)[4];
+ std::shared_ptr<onert::exec::IExecutor> &_executor;
+};
+
+// Support multi-thread execution
+TEST(ExecInstance, twoThreads)
+{
+ auto mockup = CompiledMockUpModel();
+ auto executor = mockup.executor;
+
+ const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+ const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+ float exe1_output_buffer[4] = {};
+ const float exe1_output_expected[4] = {5, -2, 0, -1};
+
+ Inference execution1{exe1_input1_buffer, exe1_input2_buffer, exe1_output_buffer, executor};
+
+ const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+ const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+ float exe2_output_buffer[4] = {};
+ const float exe2_output_expected[4] = {2, 5, -2, 7};
+
+ Inference execution2{exe2_input1_buffer, exe2_input2_buffer, exe2_output_buffer, executor};
+
+ std::thread t1{&Inference::inference, &execution1};
+ std::thread t2{&Inference::inference, &execution2};
+
+ t1.join();
+ t2.join();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+ EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+ }
+}
+
+// Support asynchronous execution
+TEST(ExecInstance, async)
+{
+ auto mockup = CompiledMockUpModel();
+ auto graph = mockup.graph;
+ auto executor = mockup.executor;
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const float input1_buffer[4] = {1, 0, -1, -2};
+ const float input2_buffer[4] = {1, -3, 2, -4};
+ float output_buffer[4] = {};
+ const float output_expected[4] = {5, -2, 0, -1};
+
+ auto execution = new onert::exec::Execution(executor);
+
+ execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+ execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+ execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+ execution->startExecute();
+ execution->waitFinish();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(output_buffer[i], output_expected[i]);
+ }
+
+ delete execution;
+}
+
+} // namespace
diff --git a/runtime/onert/test/core/exec/ExecTime.test.cc b/runtime/onert/test/core/exec/ExecTime.test.cc
new file mode 100644
index 000000000..ab8b79935
--- /dev/null
+++ b/runtime/onert/test/core/exec/ExecTime.test.cc
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/ExecTime.h"
+#include "backend/IConfig.h"
+#include "backend/Backend.h"
+#include <gtest/gtest.h>
+#include <string>
+
+namespace
+{
+using namespace onert;
+using namespace exec;
+using namespace backend;
+
+struct MockConfig : public IConfig
+{
+ std::string id() override { return "b1"; }
+ bool initialize() override { return true; };
+ bool SupportPermutation() override { return false; }
+};
+
+struct MockBackend : public ::onert::backend::Backend
+{
+ std::shared_ptr<onert::backend::IConfig> config() const override
+ {
+ return std::make_shared<MockConfig>();
+ }
+ std::unique_ptr<BackendContext> newContext(const ir::Graph &,
+ const std::shared_ptr<custom::IKernelBuilder> &kb,
+ bool) const override
+ {
+ return nullptr;
+ }
+};
+
+TEST(ExecTime, roundtrip_ok)
+{
+ const auto *b = new MockBackend();
+ std::vector<const Backend *> bs = {b};
+ {
+ ExecTime et(bs);
+ et.updateOperationExecTime(b, "op1", true, 100, 100);
+ et.updateOperationExecTime(b, "op1", true, 200, 200);
+ et.updateOperationExecTime(b, "op1", false, 100, 888);
+ et.uploadOperationsExecTime();
+ }
+ {
+ ExecTime et(bs);
+ auto time = et.getOperationExecTime(b, "op1", true, 100);
+ ASSERT_EQ(time, 100);
+ // Check interpolation
+ time = et.getOperationExecTime(b, "op1", true, 150);
+ ASSERT_EQ(time, 150);
+ time = et.getOperationExecTime(b, "op1", false, 100);
+ ASSERT_EQ(time, 888);
+ et.uploadOperationsExecTime();
+ }
+ // clean up
+ EXPECT_EQ(remove("exec_time.json"), 0);
+}
+
+TEST(ExecTime, structure)
+{
+
+ const auto *b = new MockBackend();
+ std::vector<const Backend *> bs = {b};
+ {
+ ExecTime et(bs);
+ et.updateOperationExecTime(b, "op1", true, 100, 100);
+ et.updateOperationExecTime(b, "op1", true, 200, 200);
+ et.uploadOperationsExecTime();
+ }
+ {
+ ExecTime et(bs);
+ auto time = et.getOperationExecTime(b, "op1", true, 100);
+ ASSERT_EQ(time, 100);
+ // Check interpolation
+ time = et.getOperationExecTime(b, "op1", true, 200);
+ ASSERT_EQ(time, 200);
+ et.uploadOperationsExecTime();
+ }
+ // clean up
+ EXPECT_EQ(remove("exec_time.json"), 0);
+}
+} // unnamed namespace
diff --git a/runtime/onert/test/core/interp/ExecManager.cc b/runtime/onert/test/core/interp/ExecManager.cc
new file mode 100644
index 000000000..2b56357c2
--- /dev/null
+++ b/runtime/onert/test/core/interp/ExecManager.cc
@@ -0,0 +1,333 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+#include "ir/Graph.h"
+#include "interp/InterpExecutor.h"
+#include "exec/Execution.h"
+#include "ir/operation/Add.h"
+
+namespace
+{
+
+using namespace onert::ir;
+using InterpExecutor = onert::interp::InterpExecutor;
+using Execution = onert::exec::Execution;
+
+class InterpExecutorTest : public ::testing::Test
+{
+protected:
+ virtual void SetUp() {}
+ void CreateSimpleModel()
+ {
+ // Model: one elementwise add operation
+ // model input: lhs, rhs
+ // model output: add result
+ // lhs, rhs, result shape: {1, 2, 2, 1}
+ // activation: none (constant)
+ _graph = std::make_unique<Graph>();
+
+ // Add operands
+
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::INT32};
+ Shape shape_scalar(0);
+ TypeInfo type_scalar{DataType::INT32};
+
+ auto operand_lhs = _graph->addOperand(shape, type);
+ auto operand_rhs = _graph->addOperand(shape, type);
+ auto operand_result = _graph->addOperand(shape, type);
+
+ // Add operations
+
+ operation::Add::Param param;
+ param.activation = Activation::NONE;
+ auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
+ auto output_set = OperandIndexSequence{operand_result};
+ _graph->addOperation(std::make_unique<operation::Add>(input_set, output_set, param));
+
+ // Identify model inputs and outputs
+
+ _graph->getInputs().append(operand_lhs);
+ _graph->getInputs().append(operand_rhs);
+ _graph->getOutputs().append(operand_result);
+
+ _graph->finishBuilding();
+
+ _executor = std::make_unique<InterpExecutor>(*_graph);
+ }
+
+ void CreateTwoStepModel()
+ {
+ // Model: two elementwise add operation
+ // model input: lhs, rhs1
+ // model output: second add result (result2)
+ // constant: rhs2
+ // result1 <= (lhs + rhs)
+ // result2 <= (result1 + rhs2)
+ // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1}
+ // activation: none (constant)
+ _graph = std::make_unique<Graph>();
+
+ // 1st add operands (result1 <= lhs + rhs1)
+
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::INT32};
+ Shape shape_scalar(0);
+ TypeInfo type_scalar{DataType::INT32};
+
+ static int32_t rhs2_data[4] = {3, 1, -1, 5};
+
+ auto operand_lhs = _graph->addOperand(shape, type);
+ auto operand_rhs1 = _graph->addOperand(shape, type);
+ auto operand_result1 = _graph->addOperand(shape, type);
+ auto operand_rhs2 = _graph->addOperand(shape, type);
+ auto operand_result2 = _graph->addOperand(shape, type);
+ _graph->operands()
+ .at(operand_rhs2)
+ .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
+
+ // 2nd add operations (result2 <= result1 + rhs2)
+
+ operation::Add::Param param1;
+ param1.activation = Activation::NONE;
+ auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
+ auto output_set1 = OperandIndexSequence{operand_result1};
+ _graph->addOperation(std::make_unique<operation::Add>(input_set1, output_set1, param1));
+
+ operation::Add::Param param2;
+ param2.activation = Activation::NONE;
+ auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
+ auto output_set2 = OperandIndexSequence{operand_result2};
+ _graph->addOperation(std::make_unique<operation::Add>(input_set2, output_set2, param2));
+
+ // Identify model inputs and outputs
+
+ _graph->getInputs().append(operand_lhs);
+ _graph->getInputs().append(operand_rhs1);
+ _graph->getOutputs().append(operand_result2);
+
+ _graph->finishBuilding();
+
+ _executor = std::make_unique<InterpExecutor>(*_graph);
+ }
+
+ void CreateUnspecifiedDimensionsModel()
+ {
+ // Model: one elementwise add operation
+ // model input: lhs, rhs
+ // model output: add result
+ // lhs, rhs, result shape: {1, unknown, 2, 1}
+ // activation: none (constant)
+ _graph = std::make_unique<Graph>();
+
+ // Add operands
+
+ Shape shape{1, 0, 2, 1};
+ TypeInfo type{DataType::INT32};
+ Shape shape_scalar(0);
+ TypeInfo type_scalar{DataType::INT32};
+
+ auto operand_lhs = _graph->addOperand(shape, type);
+ auto operand_rhs = _graph->addOperand(shape, type);
+
+ auto operand_activation = _graph->addOperand(shape_scalar, type_scalar);
+ _graph->operands()
+ .at(operand_activation)
+ .data(
+ std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&_activation_value), 4));
+
+ auto operand_result = _graph->addOperand(shape, type);
+
+ // Add operations
+
+ operation::Add::Param param;
+ param.activation = Activation::NONE;
+ auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
+ auto output_set = OperandIndexSequence{operand_result};
+ _graph->addOperation(std::make_unique<operation::Add>(input_set, output_set, param));
+
+ // Identify model inputs and outputs
+
+ _graph->getInputs().append(operand_lhs);
+ _graph->getInputs().append(operand_rhs);
+ _graph->getOutputs().append(operand_result);
+
+ _graph->finishBuilding();
+
+ _executor = std::make_unique<InterpExecutor>(*_graph);
+ }
+
+ void createExecution() { _execution = std::make_unique<Execution>(_executor); }
+
+ virtual void TearDown() { _executor = nullptr; }
+
+ std::unique_ptr<Graph> _graph{nullptr};
+ std::shared_ptr<InterpExecutor> _executor{nullptr};
+ std::unique_ptr<Execution> _execution{nullptr};
+ const int32_t _activation_value{0};
+};
+
+TEST_F(InterpExecutorTest, create_empty)
+{
+ Graph graph;
+ graph.finishBuilding();
+ _executor = std::make_unique<InterpExecutor>(graph);
+ ASSERT_NE(_executor, nullptr);
+}
+
+TEST_F(InterpExecutorTest, create_simple)
+{
+ CreateSimpleModel();
+ ASSERT_NE(_executor, nullptr);
+}
+
+TEST_F(InterpExecutorTest, setInput)
+{
+ CreateSimpleModel();
+ createExecution();
+
+ auto input1 = IOIndex{0};
+ const int32_t input1_buffer[4] = {1, 0, -1, -2};
+
+ EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 4),
+ std::runtime_error);
+ EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 12),
+ std::runtime_error);
+ EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
+}
+
+TEST_F(InterpExecutorTest, setOutput)
+{
+ CreateSimpleModel();
+ createExecution();
+
+ auto output = IOIndex{0};
+ auto output_idx = _graph->getOutputs().at(output);
+
+ int32_t output_buffer[4] = {};
+
+ EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 4),
+ std::runtime_error);
+ EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 12),
+ std::runtime_error);
+ EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
+}
+
+TEST_F(InterpExecutorTest, setInputForUnspecifiedDimensions)
+{
+ CreateUnspecifiedDimensionsModel();
+ createExecution();
+
+ auto input1 = IOIndex{0};
+ const int32_t input1_buffer[4] = {1, 0, -1, -2};
+
+ TypeInfo operand_type{DataType::INT32};
+ Shape operand_shape{1, 2, 2, 1};
+
+ EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
+ reinterpret_cast<const void *>(input1_buffer), 4),
+ std::runtime_error);
+ EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
+ reinterpret_cast<const void *>(input1_buffer), 12),
+ std::runtime_error);
+ EXPECT_NO_THROW(_execution->setInput(input1, operand_type, operand_shape,
+ reinterpret_cast<const void *>(input1_buffer), 16));
+}
+
+TEST_F(InterpExecutorTest, setOutputForUnspecifiedDimensions)
+{
+ CreateUnspecifiedDimensionsModel();
+ createExecution();
+
+ auto output = IOIndex{0};
+ auto output_idx = _graph->getOutputs().at(output);
+
+ TypeInfo operand_type{DataType::INT32};
+ Shape operand_shape{1, 2, 2, 1};
+
+ int32_t output_buffer[4] = {};
+
+ EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
+ reinterpret_cast<void *>(output_buffer), 4),
+ std::runtime_error);
+ EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
+ reinterpret_cast<void *>(output_buffer), 12),
+ std::runtime_error);
+ EXPECT_NO_THROW(_execution->setOutput(output, operand_type, operand_shape,
+ reinterpret_cast<void *>(output_buffer), 16));
+}
+
+TEST_F(InterpExecutorTest, execute)
+{
+ CreateSimpleModel();
+ createExecution();
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto input1_idx = _graph->getInputs().at(input1);
+ auto input2_idx = _graph->getInputs().at(input2);
+
+ const int32_t input1_buffer[4] = {1, 0, -1, -2};
+ const int32_t input2_buffer[4] = {1, -3, 2, -4};
+
+ auto output = IOIndex{0};
+ auto output_idx = _graph->getOutputs().at(output);
+
+ int32_t output_buffer[4] = {};
+
+ EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
+ EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
+ EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
+ EXPECT_NO_THROW(_execution->execute());
+ EXPECT_EQ(output_buffer[0], 2);
+ EXPECT_EQ(output_buffer[1], -3);
+ EXPECT_EQ(output_buffer[2], 1);
+ EXPECT_EQ(output_buffer[3], -6);
+}
+
+TEST_F(InterpExecutorTest, executeTwoStep)
+{
+ CreateTwoStepModel();
+ createExecution();
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto input1_idx = _graph->getInputs().at(input1);
+ auto input2_idx = _graph->getInputs().at(input2);
+
+ const int32_t input1_buffer[4] = {1, 0, -1, -2};
+ const int32_t input2_buffer[4] = {1, -3, 2, -4};
+
+ auto output = IOIndex{0};
+ auto output_idx = _graph->getOutputs().at(output);
+
+ int32_t output_buffer[4] = {};
+
+ EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
+ EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
+ EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
+ EXPECT_NO_THROW(_execution->execute());
+ EXPECT_EQ(output_buffer[0], 5);
+ EXPECT_EQ(output_buffer[1], -2);
+ EXPECT_EQ(output_buffer[2], 0);
+ EXPECT_EQ(output_buffer[3], -1);
+}
+
+} // namespace
diff --git a/runtime/onert/test/graph/Graph.cc b/runtime/onert/test/graph/Graph.cc
new file mode 100644
index 000000000..34e9fe002
--- /dev/null
+++ b/runtime/onert/test/graph/Graph.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/Graph.h"
+
+TEST(Graph, inputs_and_outputs)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::OperandIndex index0{0u};
+ onert::ir::OperandIndex index1{1u};
+
+ graph.addInput({index0});
+ graph.addInput({index1});
+
+ onert::ir::OperandIndex index10{10u};
+ onert::ir::OperandIndex index11{11u};
+ onert::ir::OperandIndex index12{12u};
+
+ graph.addOutput({index10});
+ graph.addOutput({index11});
+ graph.addOutput({index12});
+
+ ASSERT_EQ(graph.getInputs().size(), 2);
+ ASSERT_EQ(graph.getOutputs().size(), 3);
+
+ onert::ir::IOIndex io_index0{0};
+ onert::ir::IOIndex io_index1{1};
+ onert::ir::IOIndex io_index2{2};
+
+ ASSERT_EQ(graph.getInputs().at(io_index0), 0);
+ ASSERT_EQ(graph.getInputs().at(io_index1), 1);
+
+ ASSERT_EQ(graph.getOutputs().at(io_index0), 10);
+ ASSERT_EQ(graph.getOutputs().at(io_index1), 11);
+ ASSERT_EQ(graph.getOutputs().at(io_index2), 12);
+}
diff --git a/runtime/onert/test/graph/Index.cc b/runtime/onert/test/graph/Index.cc
new file mode 100644
index 000000000..358e64c82
--- /dev/null
+++ b/runtime/onert/test/graph/Index.cc
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "util/Index.h"
+
+using Index = ::onert::util::Index<uint32_t, struct TestTag>;
+
+TEST(Index, index_test)
+{
+ Index idx1{1u};
+ Index idx2{2u};
+ Index idx3{idx1};
+
+ ASSERT_EQ(idx1, 1);
+ ASSERT_EQ(idx1, 1u);
+ ASSERT_EQ(idx1.value(), 1u);
+ ASSERT_NE(idx1, idx2);
+ ASSERT_EQ(idx1, idx3);
+}
diff --git a/runtime/onert/test/graph/MockNode.h b/runtime/onert/test/graph/MockNode.h
new file mode 100644
index 000000000..60b4719ed
--- /dev/null
+++ b/runtime/onert/test/graph/MockNode.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TEST_GRAPH_MOCK_NODE_H__
+#define __ONERT_TEST_GRAPH_MOCK_NODE_H__
+
+#include "ir/Operation.h"
+#include "ir/OperandIndexSequence.h"
+
+namespace onert_test
+{
+namespace ir
+{
+
+class SimpleMock : public onert::ir::Operation
+{
+public:
+ SimpleMock(const onert::ir::OperandIndexSequence &inputs,
+ const onert::ir::OperandIndexSequence &outputs)
+ : Operation{onert::ir::OperandConstraint::createAny()}
+ {
+ setInputs(inputs);
+ setOutputs(outputs);
+ }
+
+public:
+ void accept(onert::ir::OperationVisitor &) const override {}
+ onert::ir::OpCode opcode() const final { return onert::ir::OpCode::Invalid; }
+};
+
+} // namespace ir
+} // namespace onert_test
+
+#endif // __ONERT_TEST_GRAPH_MOCK_NODE_H__
diff --git a/runtime/onert/test/graph/operand/IndexSet.cc b/runtime/onert/test/graph/operand/IndexSet.cc
new file mode 100644
index 000000000..6215e0d24
--- /dev/null
+++ b/runtime/onert/test/graph/operand/IndexSet.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/OperandIndexSequence.h"
+
+using onert::ir::OperandIndex;
+using onert::ir::OperandIndexSequence;
+
+TEST(graph_OperandIndexSequence, append)
+{
+ OperandIndexSequence iset{0, 2, 4, 8};
+
+ ASSERT_EQ(iset.size(), 4);
+
+ iset.append(OperandIndex{10});
+
+ ASSERT_EQ(iset.size(), 5);
+
+ onert::ir::IOIndex index1{1};
+ onert::ir::IOIndex index2{4};
+
+ ASSERT_EQ(iset.at(index1), 2);
+ ASSERT_EQ(iset.at(index2), 10);
+
+ ASSERT_TRUE(iset.contains(OperandIndex{2}));
+ ASSERT_TRUE(iset.contains(OperandIndex{10}));
+ ASSERT_FALSE(iset.contains(OperandIndex{11}));
+}
+
+TEST(graph_OperandIndexSequence, replace)
+{
+ OperandIndexSequence iset{0, 1, 2, 3};
+
+ iset.replace(OperandIndex{1}, OperandIndex{9});
+ ASSERT_FALSE(iset.contains(OperandIndex{1}));
+ ASSERT_TRUE(iset.contains(OperandIndex{9}));
+}
diff --git a/runtime/onert/test/graph/operand/LayoutSet.cc b/runtime/onert/test/graph/operand/LayoutSet.cc
new file mode 100644
index 000000000..e35bddd8b
--- /dev/null
+++ b/runtime/onert/test/graph/operand/LayoutSet.cc
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/LayoutSet.h"
+
+using onert::ir::Layout;
+using onert::ir::LayoutSet;
+
+TEST(graph_operand_LayoutSet, layout_set_operators)
+{
+ LayoutSet set1{Layout::NCHW};
+ LayoutSet set2{Layout::NHWC};
+ LayoutSet set3 = set1 | set2;
+
+ ASSERT_EQ(set3.size(), 2);
+
+ ASSERT_EQ((set3 - set1).size(), 1);
+ ASSERT_EQ((set3 - set1).contains(Layout::NHWC), true);
+ ASSERT_EQ((set3 - set2).size(), 1);
+ ASSERT_EQ((set3 - set2).contains(Layout::NCHW), true);
+ ASSERT_EQ((set3 - set3).size(), 0);
+
+ ASSERT_EQ((set3 & set1).size(), 1);
+ ASSERT_EQ((set3 & set1).contains(Layout::NCHW), true);
+ ASSERT_EQ((set3 & set2).size(), 1);
+ ASSERT_EQ((set3 & set2).contains(Layout::NHWC), true);
+ ASSERT_EQ((set1 & set2).size(), 0);
+}
diff --git a/runtime/onert/test/graph/operand/Set.cc b/runtime/onert/test/graph/operand/Set.cc
new file mode 100644
index 000000000..0d35b5581
--- /dev/null
+++ b/runtime/onert/test/graph/operand/Set.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/Operands.h"
+
+TEST(graph_operand_Set, set_test)
+{
+ onert::ir::Operands set;
+
+ onert::ir::Shape shape0{1, 2, 3};
+
+ onert::ir::Shape shape1(4);
+ shape1.dim(0) = 10;
+ shape1.dim(1) = 20;
+ shape1.dim(2) = 30;
+ shape1.dim(3) = 40;
+
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ set.emplace(shape0, type);
+ set.emplace(shape1, type);
+
+ ASSERT_EQ(set.exist(onert::ir::OperandIndex{0u}), true);
+ ASSERT_EQ(set.exist(onert::ir::OperandIndex{1u}), true);
+ ASSERT_EQ(set.exist(onert::ir::OperandIndex{2u}), false);
+
+ ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(0), 1);
+ ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(1), 2);
+ ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(2), 3);
+}
diff --git a/runtime/onert/test/graph/operand/UseDef.cc b/runtime/onert/test/graph/operand/UseDef.cc
new file mode 100644
index 000000000..3e8b14b8b
--- /dev/null
+++ b/runtime/onert/test/graph/operand/UseDef.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/Graph.h"
+#include "ir/verifier/Verifier.h"
+#include <memory>
+#include "../MockNode.h"
+
+#include <typeindex>
+
+namespace
+{
+
+using IndexSet = onert::ir::OperandIndexSequence;
+using Mock = onert_test::ir::SimpleMock;
+
+} // namespace
+
+TEST(graph_operand_usedef, usedef_test)
+{
+ onert::ir::Graph graph;
+ onert::ir::verifier::DAGChecker verifier;
+
+ onert::ir::Shape shape(3);
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ // Model Input/Output
+ auto input_operand = graph.addOperand(shape, type);
+ auto output_operand = graph.addOperand(shape, type);
+
+ graph.addInput(input_operand);
+ graph.addOutput(output_operand);
+
+ // MockNode1
+ auto operand_index1 = graph.addOperand(shape, type);
+ auto mocknode_index1 =
+ graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1}));
+
+ // MockNode2
+ auto operand_index2 = graph.addOperand(shape, type);
+ auto mocknode_index2 =
+ graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2}));
+
+ // MockNode3(two input)
+ auto multiinput_index = graph.addOperation(
+ std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand}));
+
+ graph.finishBuilding();
+
+ ASSERT_EQ(verifier.verify(graph), true);
+
+ // Check def
+ ASSERT_EQ(graph.operands().at(operand_index1).getDef().contains(mocknode_index1), true);
+ ASSERT_EQ(graph.operands().at(operand_index2).getDef().contains(mocknode_index2), true);
+ ASSERT_EQ(graph.operands().at(output_operand).getDef().contains(multiinput_index), true);
+
+ ASSERT_EQ(graph.operands().at(operand_index1).getDef().contains(mocknode_index2), false);
+ ASSERT_EQ(graph.operands().at(operand_index1).getDef().contains(multiinput_index), false);
+
+ // Check use
+ ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index1), true);
+ ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index2), true);
+ ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(multiinput_index), false);
+ ASSERT_EQ(graph.operands().at(operand_index1).getUses().contains(multiinput_index), true);
+ ASSERT_EQ(graph.operands().at(operand_index2).getUses().contains(multiinput_index), true);
+
+ ASSERT_EQ(graph.operands().at(input_operand).getUses().size(), 2);
+ ASSERT_EQ(graph.operands().at(operand_index1).getUses().size(), 1);
+ ASSERT_EQ(graph.operands().at(output_operand).getUses().size(), 0);
+}
diff --git a/runtime/onert/test/graph/operation/Set.cc b/runtime/onert/test/graph/operation/Set.cc
new file mode 100644
index 000000000..088c44b8c
--- /dev/null
+++ b/runtime/onert/test/graph/operation/Set.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "../MockNode.h"
+#include "ir/Operations.h"
+
+using onert::ir::Operation;
+using onert::ir::OperationIndex;
+using onert::ir::Operations;
+
+TEST(graph_operation_Set, operation_test)
+{
+ Operations ops;
+ ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
+ OperationIndex idx{0u};
+ ASSERT_EQ(ops.at(idx).getInputs().size(), 4);
+ ASSERT_EQ(ops.at(idx).getOutputs().size(), 3);
+}
diff --git a/runtime/onert/test/graph/operation/SetIO.cc b/runtime/onert/test/graph/operation/SetIO.cc
new file mode 100644
index 000000000..378c5b4b9
--- /dev/null
+++ b/runtime/onert/test/graph/operation/SetIO.cc
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/Graph.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexSequence.h"
+#include "ir/operation/Conv2D.h"
+#include "ir/operation/Concat.h"
+
+#include <memory>
+
+#include <stdexcept>
+
+using Index = onert::ir::IOIndex;
+using IndexSet = onert::ir::OperandIndexSequence;
+
+TEST(graph_operation_setIO, operation_setIO_conv)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ // Add Conv
+ using Graph = onert::ir::operation::Conv2D;
+
+ auto input_operand = graph.addOperand(shape, type);
+ auto kernel_operand = graph.addOperand(shape, type);
+ auto bias_operand = graph.addOperand(shape, type);
+ IndexSet inputs{input_operand, kernel_operand, bias_operand};
+
+ Graph::Param conv_params;
+ conv_params.padding.type = onert::ir::PaddingType::SAME;
+ conv_params.stride.horizontal = 1;
+ conv_params.stride.vertical = 1;
+ conv_params.activation = onert::ir::Activation::NONE;
+
+ auto output_operand = graph.addOperand(shape, type).value();
+ IndexSet outputs{output_operand};
+
+ auto conv = std::make_unique<Graph>(inputs, outputs, conv_params);
+
+ ASSERT_NE(conv, nullptr);
+ ASSERT_EQ(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
+ conv->setInputs({8, 9, 10});
+ ASSERT_NE(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
+ ASSERT_EQ(conv->getInputs().at(Index{0}).value(), 8);
+}
+
+TEST(graph_operation_setIO, operation_setIO_concat)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ using Graph = onert::ir::operation::Concat;
+
+ // Add Concat
+ IndexSet inputs;
+ for (int i = 0; i < 6; ++i)
+ {
+ inputs.append(graph.addOperand(shape, type));
+ }
+
+ Graph::Param concat_params{0};
+
+ auto output_operand = graph.addOperand(shape, type).value();
+ IndexSet outputs{output_operand};
+
+ auto concat = std::make_unique<Graph>(inputs, outputs, concat_params);
+
+ ASSERT_NE(concat, nullptr);
+ ASSERT_EQ(concat->getInputs().size(), 6);
+ ASSERT_EQ(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
+
+ concat->setInputs({80, 6, 9, 11});
+ ASSERT_EQ(concat->getInputs().size(), 4);
+ ASSERT_NE(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
+ ASSERT_EQ(concat->getInputs().at(Index{0}).value(), 80);
+ ASSERT_EQ(concat->getInputs().at(Index{2}).value(), 9);
+ ASSERT_THROW(concat->getInputs().at(Index{5}), std::out_of_range);
+}
diff --git a/runtime/onert/test/graph/verifier/Verifier.cc b/runtime/onert/test/graph/verifier/Verifier.cc
new file mode 100644
index 000000000..f8c7557e3
--- /dev/null
+++ b/runtime/onert/test/graph/verifier/Verifier.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/Operation.h"
+#include "ir/Graph.h"
+#include "ir/verifier/Verifier.h"
+#include <memory>
+#include "ir/Operand.h"
+#include "../MockNode.h"
+
+using IndexSet = onert::ir::OperandIndexSequence;
+using Mock = onert_test::ir::SimpleMock;
+
+TEST(Verifier, dag_checker)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ auto operand1 = graph.addOperand(shape, type);
+ auto operand2 = graph.addOperand(shape, type);
+
+ graph.addInput(operand1);
+ graph.addOutput(operand2);
+
+ graph.addOperation(std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2}));
+
+ graph.finishBuilding();
+
+ onert::ir::verifier::DAGChecker verifier;
+
+ ASSERT_EQ(verifier.verify(graph), true);
+}
diff --git a/runtime/onert/test/util/ObjectManager.cc b/runtime/onert/test/util/ObjectManager.cc
new file mode 100644
index 000000000..5051bcfa6
--- /dev/null
+++ b/runtime/onert/test/util/ObjectManager.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "util/ObjectManager.h"
+#include "util/Index.h"
+
+using namespace onert;
+
+struct TestTag;
+using Index = typename util::Index<uint32_t, TestTag>;
+
+TEST(ObjectManager, emplace)
+{
+ util::ObjectManager<Index, int> man;
+
+ auto index = man.emplace(100);
+ ASSERT_EQ(man.at(index), 100);
+}
+
+TEST(ObjectManager, remove_1)
+{
+ util::ObjectManager<Index, int> man;
+
+ Index index = man.emplace(100);
+ ASSERT_TRUE(man.exist(index));
+ ASSERT_EQ(man.at(index), 100);
+
+ man.remove(index);
+ ASSERT_FALSE(man.exist(index));
+}
+
+TEST(ObjectManager, remove_2)
+{
+ util::ObjectManager<Index, int> man;
+
+ auto index0 = man.emplace(100);
+ auto index1 = man.emplace(200);
+ ASSERT_TRUE(man.exist(index0));
+ ASSERT_EQ(man.at(index0), 100);
+ ASSERT_TRUE(man.exist(index1));
+ ASSERT_EQ(man.at(index1), 200);
+
+ man.remove(index0);
+ ASSERT_FALSE(man.exist(index0));
+ ASSERT_TRUE(man.exist(index1));
+ ASSERT_EQ(man.at(index1), 200);
+}
+
+TEST(ObjectManager, push)
+{
+ util::ObjectManager<Index, int> man;
+
+ auto index = man.push(std::unique_ptr<int>{new int{100}});
+ ASSERT_EQ(man.at(index), 100);
+}
+
+TEST(ObjectManager, const_iterate)
+{
+ util::ObjectManager<Index, int> man;
+
+ auto index0 = man.emplace(100);
+ auto index1 = man.emplace(200);
+ auto index2 = man.emplace(300);
+
+ int sum = 0;
+ man.iterate([&](const Index &index, const int &val) { sum += val; });
+ ASSERT_EQ(sum, 600);
+}
+
+TEST(ObjectManager, non_const_iterate)
+{
+ util::ObjectManager<Index, int> man;
+
+ auto index0 = man.emplace(100);
+ auto index1 = man.emplace(200);
+ auto index2 = man.emplace(300);
+
+ man.iterate([&](const Index &index, int &val) { val += 1; });
+ ASSERT_EQ(man.at(index0), 101);
+ ASSERT_EQ(man.at(index1), 201);
+ ASSERT_EQ(man.at(index2), 301);
+}
diff --git a/runtime/onert/test/util/ShapeInference.cc b/runtime/onert/test/util/ShapeInference.cc
new file mode 100644
index 000000000..053b635f2
--- /dev/null
+++ b/runtime/onert/test/util/ShapeInference.cc
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/Layout.h"
+#include "util/ShapeInference.h"
+
+using namespace onert::ir;
+
+TEST(ShapeInference, Elementwise)
+{
+ Shape lhs_shape{1, 299, 299, 3};
+ Shape rhs_shape{3};
+ auto infered_shapes = onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
+ auto infered_out_shape = infered_shapes[0];
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.dim(0), 1);
+ ASSERT_EQ(infered_out_shape.dim(1), 299);
+ ASSERT_EQ(infered_out_shape.dim(2), 299);
+ ASSERT_EQ(infered_out_shape.dim(3), 3);
+}
+
+TEST(ShapeInference, IncorrectElementwise)
+{
+ Shape lhs_shape{1, 299, 299, 3};
+ Shape rhs_shape{5, 3};
+ ASSERT_THROW(onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape), std::runtime_error);
+}
+
+TEST(ShapeInference, Pool2DNodeSame)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Stride stride{3, 7};
+ Padding padding{PaddingType::SAME};
+
+ operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
+ auto infered_shapes = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
+ auto infered_out_shape = infered_shapes[0];
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+ operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
+ infered_shapes = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
+ infered_out_shape = infered_shapes[0];
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, Pool2DNodeValid)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Stride stride{3, 7};
+ Padding padding{PaddingType::VALID};
+
+ operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
+ auto infered_shapes = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
+ auto infered_out_shape = infered_shapes[0];
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+ operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
+ infered_shapes = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
+ infered_out_shape = infered_shapes[0];
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, Pool2DNodeExplicit)
+{
+ Shape in_shape{10, 3, 5, 20};
+
+ Stride stride{3, 7};
+ Padding padding{4, 3, 2, 1};
+
+ operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
+ auto infered_shapes = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
+ auto infered_out_shape = infered_shapes[0];
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+ operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
+ infered_shapes = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
+ infered_out_shape = infered_shapes[0];
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, Conv2D)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Shape ker_shape{30, 3, 6, 20};
+
+ operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE};
+ auto infered_shapes = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+ auto infered_out_shape = infered_shapes[0];
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+
+ param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE};
+ infered_shapes = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+ infered_out_shape = infered_shapes[0];
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+
+ param = operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE};
+ infered_shapes = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+ infered_out_shape = infered_shapes[0];
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+}
+
+TEST(ShapeInference, DepthwiseConv2D)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Shape ker_shape{1, 3, 6, 60};
+
+ operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3,
+ Activation::NONE};
+ auto infered_shapes =
+ onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+ auto infered_out_shape = infered_shapes[0];
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+
+ param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, 3,
+ Activation::NONE};
+ infered_shapes = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+ infered_out_shape = infered_shapes[0];
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+
+ param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, 3, Activation::NONE};
+ infered_shapes = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+ infered_out_shape = infered_shapes[0];
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+}
+
+TEST(ShapeInference, Concat)
+{
+ Shape in1{10, 20, 30, 3, 50};
+ Shape in2{10, 20, 30, 2, 50};
+ Shape in3{10, 20, 30, 2, 50};
+
+ operation::Concat::Param param{3};
+ auto infered_shapes = onert::shape_inference::inferConcatShape({in1, in2, in3}, param);
+ auto infered_out_shape = infered_shapes[0];
+
+ ASSERT_EQ(infered_out_shape.rank(), 5);
+ ASSERT_EQ(infered_out_shape.dim(0), 10);
+ ASSERT_EQ(infered_out_shape.dim(1), 20);
+ ASSERT_EQ(infered_out_shape.dim(2), 30);
+ ASSERT_EQ(infered_out_shape.dim(3), 7);
+ ASSERT_EQ(infered_out_shape.dim(4), 50);
+}
+
+TEST(ShapeInference, FullyConnected)
+{
+ Shape in_shape{3, 4, 5, 6};
+ Shape ker_shape{3, 10};
+ auto infered_shapes = onert::shape_inference::inferFullyConnectedShape(in_shape, ker_shape);
+ auto infered_out_shape = infered_shapes[0];
+
+ ASSERT_EQ(infered_out_shape.rank(), 2);
+ ASSERT_EQ(infered_out_shape.dim(0), 36);
+ ASSERT_EQ(infered_out_shape.dim(1), 3);
+}
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 96b610a45..1eb2f07a9 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,3 +1,5 @@
-add_subdirectory(nnapi)
-add_subdirectory(tools)
-add_subdirectory(custom_op)
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+add_subdirectories()
diff --git a/tests/custom_op/CMakeLists.txt b/tests/custom_op/CMakeLists.txt
index aabb2f0dd..8fd8e8af0 100644
--- a/tests/custom_op/CMakeLists.txt
+++ b/tests/custom_op/CMakeLists.txt
@@ -1,6 +1,6 @@
-if(NOT BUILD_NEURUN)
+if(NOT BUILD_ONERT)
return()
-endif(NOT BUILD_NEURUN)
+endif(NOT BUILD_ONERT)
nnfw_find_package(FlatBuffers QUIET)
if(NOT FlatBuffers_FOUND)
@@ -8,26 +8,66 @@ if(NOT FlatBuffers_FOUND)
return()
endif(NOT FlatBuffers_FOUND)
+if(NOT CMAKE_BUILD_TYPE)
+ message(WARNING "CMAKE_BUILD_TYPE is not specified. \
+ Generated library will have `unknown` in its name, instead of `release` or `debug`.")
+ set(BUILD_TYPE "unknown")
+else()
+ string(TOLOWER ${CMAKE_BUILD_TYPE} BUILD_TYPE)
+endif()
+
+set(SUFFIX ".${TARGET_ARCH}-${TARGET_OS}.${BUILD_TYPE}")
+
# Functions for custom op test
-# Takes target name, source list and kernel list
+
+# Add custom op app
+#
+# NAME : the app name to be built
+# SOURCES: the app source codes
+# KERNELS: the custom op kernel names that the app uses
function(add_nnfw_custom_op_app NAME)
cmake_parse_arguments(
- PARSED_ARGS # prefix of output variables
- "" # list of names of the boolean arguments (only defined ones will be true)
- "" # list of names of mono-valued arguments
+ PARSED_ARGS # prefix of output variables
+ "" # list of names of the boolean arguments (only defined ones will be true)
+ "" # list of names of mono-valued arguments
"SOURCES;KERNELS" # list of names of multi-valued arguments (output variables are lists)
- ${ARGN} # arguments of the function to parse, here we take the all original ones
+ ${ARGN} # arguments of the function to parse, here we take the all original ones
)
add_executable(${NAME} ${PARSED_ARGS_SOURCES})
- target_link_libraries(${NAME} PRIVATE ${PARSED_ARGS_KERNELS})
+ set(LIBNAMELIST "")
+ foreach(KERNEL ${PARSED_ARGS_KERNELS})
+ list(APPEND LIBNAMELIST "${KERNEL}${SUFFIX}")
+ endforeach()
+ target_link_libraries(${NAME} PRIVATE ${LIBNAMELIST})
target_link_libraries(${NAME} PRIVATE nnfw-nnapi-header nnfw-dev)
target_link_libraries(${NAME} PRIVATE dl ${LIB_PTHREAD})
endfunction()
-function(add_nnfw_kernel NAME)
- add_library(${NAME} STATIC ${ARGN})
- target_link_libraries(${NAME} PRIVATE nnfw-nnapi-header nnfw-dev)
- target_link_libraries(${NAME} PRIVATE flatbuffers)
+# Add custom op kernel with nnpackage spec conforming name.
+#
+# NAME : the custom op kernel name to be built
+# STATIC: ON (or TRUE) to build static library, OFF (or FALSE) for shared library
+# ARGN : the source codes that comes after two arguments.
+function(add_nnfw_custom_op_kernel NAME STATIC)
+ set(LIBNAME ${NAME}${SUFFIX})
+ if(STATIC)
+ # message(FATAL_ERROR STATIC)
+ add_library(${LIBNAME} STATIC ${ARGN})
+ else()
+ # message(FATAL_ERROR SHARED)
+ add_library(${LIBNAME} SHARED ${ARGN})
+ endif()
+ target_link_libraries(${LIBNAME} PRIVATE nnfw-nnapi-header nnfw-dev)
+ target_link_libraries(${LIBNAME} PRIVATE flatbuffers)
+endfunction()
+
+# Install custom op kernel built with `add_nnfw_custom_op_kernel`
+#
+# NAME : custom op kernel name
+# DESTINATION: the path where the custom op kernel library will be installed in
+function(install_nnfw_custom_op_kernel NAME DESTINATION)
+ set(LIBNAME ${NAME}${SUFFIX})
+ install(TARGETS ${LIBNAME} DESTINATION ${DESTINATION})
endfunction()
add_subdirectories()
diff --git a/tests/custom_op/FillFrom/CMakeLists.txt b/tests/custom_op/FillFrom/CMakeLists.txt
index 12d9ec743..ba03e3056 100644
--- a/tests/custom_op/FillFrom/CMakeLists.txt
+++ b/tests/custom_op/FillFrom/CMakeLists.txt
@@ -1,8 +1,7 @@
-add_nnfw_kernel(FillFrom kernels/FillFromKernel.cc)
+add_nnfw_custom_op_kernel(FillFrom ON kernels/FillFromKernel.cc)
add_nnfw_custom_op_app(FillFrom_runner
SOURCES FillFrom_runner.cc
- KERNELS FillFrom
- )
-
+ KERNELS FillFrom)
install(TARGETS FillFrom_runner DESTINATION tests)
install(DIRECTORY nnpkgs/FillFrom DESTINATION tests/nnpkgs)
+install_nnfw_custom_op_kernel(FillFrom tests/nnpkgs/FillFrom)
diff --git a/tests/custom_op/FillFrom/FillFrom_runner.cc b/tests/custom_op/FillFrom/FillFrom_runner.cc
index 8d0329718..82e25fa34 100644
--- a/tests/custom_op/FillFrom/FillFrom_runner.cc
+++ b/tests/custom_op/FillFrom/FillFrom_runner.cc
@@ -115,7 +115,7 @@ int main(const int argc, char **argv)
if (argc == 1)
{
- std::cout << "[WARNIGN] Use default package path\n";
+ std::cout << "[WARNING] Use default package path\n";
}
else if (argc == 2)
{
diff --git a/tests/framework/run_test.sh b/tests/framework/run_test.sh
deleted file mode 100755
index 84cfa9efd..000000000
--- a/tests/framework/run_test.sh
+++ /dev/null
@@ -1,277 +0,0 @@
-#!/usr/bin/env bash
-#
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-NNFW_HOME="$(dirname $(dirname ${MY_PATH}))"
-CACHE_ROOT_PATH=$MY_PATH/"cache"
-TEST_ROOT_PATH=$MY_PATH/"tests"
-REPORT_DIR="report"
-
-RUN_DISABLED="true"
-
-function Usage()
-{
- echo "Usage: ./$0 --driverbin={such as tflite_run} {tests to test or empty for all of tests}"
- echo "Usage: ./$0 --driverbin=Product/out/bin/tflite_run --reportdir=report --tapname=verification.tap avgpool1 avgpool2"
- echo ""
- echo "--download - (default=off) Download model files. Other options is ignored"
- echo "--driverbin - (default=../../Product/out/bin/tflite_run) runner for runnning framework tests"
- echo "--reportdir - (default=report) directory to place tap files"
- echo "--tapname - (default=framework_test.tap) file name to be written for tap"
- echo ""
-}
-
-function need_download()
-{
- LOCAL_PATH=$1
- REMOTE_URL=$2
- if [ ! -e $LOCAL_PATH ]; then
- return 0;
- fi
- # Ignore checking md5 in cache
- if [ ! -z $IGNORE_MD5 ] && [ "$IGNORE_MD5" == "1" ]; then
- return 1
- fi
-
- LOCAL_HASH=$(md5sum $LOCAL_PATH | awk '{ print $1 }')
- REMOTE_HASH=$(curl -ss $REMOTE_URL | md5sum | awk '{ print $1 }')
- # TODO Emit an error when Content-MD5 field was not found. (Server configuration issue)
- if [ "$LOCAL_HASH" != "$REMOTE_HASH" ]; then
- echo "Downloaded file is outdated or incomplete."
- return 0
- fi
- return 1
-}
-
-DRIVER_BIN=""
-TAP_NAME="framework_test.tap"
-TEST_LIST=()
-DOWNLOAD_MODE="off"
-
-# Support environment variable setting for mirror server
-FIXED_MODELFILE_SERVER="${MODELFILE_SERVER:-}"
-
-for i in "$@"
-do
- case $i in
- -h|--help|help)
- Usage
- exit 1
- ;;
- --driverbin=*)
- DRIVER_BIN=${i#*=}
- ;;
- --reportdir=*)
- REPORT_DIR=${i#*=}
- ;;
- --tapname=*)
- TAP_NAME=${i#*=}
- ;;
- --download=*)
- DOWNLOAD_MODE=${i#*=}
- ;;
- *)
- TEST_LIST+=( $i )
- ;;
- esac
- shift
-done
-
-if [[ ${#TEST_LIST[@]} -eq 0 ]]; then
- RUN_DISABLED="false"
-fi
-
-if [ ! -n "$DRIVER_BIN" ]; then
- DRIVER_BIN="$NNFW_HOME/Product/out/bin/tflite_run"
-fi
-
-# Check test driver setting
-if [ ! -e $DRIVER_BIN ] && [ "$DOWNLOAD_MODE" != "on" ]; then
- echo "Cannot find test driver" $DRIVER_BIN ": please set proper DRIVER_BIN"
- exit 1
-fi
-
-run_tests()
-{
- echo "1..$#" > $REPORT_DIR/$TAP_NAME
- SELECTED_TESTS=$@
-
- echo ""
- echo "Running tests:"
- echo "======================"
- for TEST_NAME in $SELECTED_TESTS; do
- echo $TEST_NAME
- done
- echo "======================"
-
- TOTAL_RESULT=0 # 0(normal) or 1(abnormal)
- i=0
- for TEST_NAME in $SELECTED_TESTS; do
- # Test configure initialization
- ((i++))
- STATUS="enabled"
- MODELFILE_SERVER_PATH=""
- MODELFILE_NAME=""
- source $TEST_ROOT_PATH/$TEST_NAME/config.sh
-
- LOWER_STATUS="$(echo $STATUS | awk '{print tolower($0)}')"
- if [ "$LOWER_STATUS" == "disabled" ] && [ "$RUN_DISABLED" == "false" ]; then
- echo ""
- echo "Skip $TEST_NAME"
- echo "======================"
- echo "ok $i # skip $TEST_NAME" >> $REPORT_DIR/$TAP_NAME
- continue
- fi
-
- TEST_CACHE_PATH=$CACHE_ROOT_PATH/$TEST_NAME
- MODELFILE=$TEST_CACHE_PATH/$MODELFILE_NAME
- MODELFILE_URL="$MODELFILE_SERVER_PATH/$MODELFILE_NAME"
- if [ -n "$FIXED_MODELFILE_SERVER" ]; then
- MODELFILE_URL="$FIXED_MODELFILE_SERVER/$MODELFILE_NAME"
- fi
-
- # Download model file
- if [ ! -e $TEST_CACHE_PATH ]; then
- mkdir -p $TEST_CACHE_PATH
- fi
-
- # Download unless we have it in cache (Also check md5sum)
- if need_download "$MODELFILE" "$MODELFILE_URL"; then
- echo ""
- echo "Download test file for $TEST_NAME"
- echo "======================"
-
- rm -f $MODELFILE # Remove invalid file if exists
- pushd $TEST_CACHE_PATH
- wget -nv $MODELFILE_URL
- if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
- unzip -o $MODELFILE_NAME
- fi
- popd
- fi
-
- # Find model file for downloaded by zip
- if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
- pushd $TEST_CACHE_PATH
- MODELFILE=$TEST_CACHE_PATH/$(ls *.tflite)
- popd
- fi
-
- echo ""
- echo "Run $TEST_NAME"
- echo "======================"
-
- # Run driver to test framework
- $DRIVER_BIN $MODELFILE
-
- #$DRIVER_BIN $MODELFILE
- if [[ $? -eq 0 ]]; then
- echo "ok $i - $TEST_NAME" >> $REPORT_DIR/$TAP_NAME
- else
- echo "not ok $i - $TEST_NAME" >> $REPORT_DIR/$TAP_NAME
- TOTAL_RESULT=1
- fi
- done
- return $TOTAL_RESULT
-}
-
-download_tests()
-{
- SELECTED_TESTS=$@
-
- echo ""
- echo "Downloading tests:"
- echo "======================"
- for TEST_NAME in $SELECTED_TESTS; do
- echo $TEST_NAME
- done
- echo "======================"
-
- i=0
- for TEST_NAME in $SELECTED_TESTS; do
- # Test configure initialization
- ((i++))
- MODELFILE_SERVER_PATH=""
- MODELFILE_NAME=""
- source $TEST_ROOT_PATH/$TEST_NAME/config.sh
-
- TEST_CACHE_PATH=$CACHE_ROOT_PATH/$TEST_NAME
- MODELFILE=$TEST_CACHE_PATH/$MODELFILE_NAME
- MODELFILE_URL="$MODELFILE_SERVER/$MODELFILE_NAME"
- if [ -n "$FIXED_MODELFILE_SERVER" ]; then
- MODELFILE_URL="$FIXED_MODELFILE_SERVER/$MODELFILE_NAME"
- fi
-
- # Download model file
- if [ ! -e $TEST_CACHE_PATH ]; then
- mkdir -p $TEST_CACHE_PATH
- fi
-
- # Download unless we have it in cache (Also check md5sum)
- if need_download "$MODELFILE" "$MODELFILE_URL"; then
- echo ""
- echo "Download test file for $TEST_NAME"
- echo "======================"
-
- rm -f $MODELFILE # Remove invalid file if exists
- pushd $TEST_CACHE_PATH
- wget -nv $MODELFILE_URL
- if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
- unzip -o $MODELFILE_NAME
- fi
- popd
- fi
-
- done
-}
-
-
-find_tests()
-{
- local TEST_DIRS="$@"
- local TESTS_TO_RUN=""
-
- if [[ $# -eq 0 ]]; then
- TEST_DIRS="."
- fi
-
- shift $#
-
- pushd $TEST_ROOT_PATH > /dev/null
- for DIR in $TEST_DIRS; do
- if [ -d "$DIR" ]; then
- TESTS_FOUND=$(find "$DIR" -type f -name 'config.sh' -exec dirname {} \;| sed 's|^./||' | sort)
- TESTS_TO_RUN="$TESTS_TO_RUN $TESTS_FOUND"
- else
- echo "Test $DIR was not found. This test is not added." 1>&2
- fi
- done
- popd > /dev/null
-
- echo $TESTS_TO_RUN
-}
-
-mkdir -p $REPORT_DIR
-TESTS_TO_RUN=$(find_tests ${TEST_LIST[@]})
-
-if [[ "$DOWNLOAD_MODE" == "on" ]]; then
- download_tests $TESTS_TO_RUN
- exit 0;
-fi
-
-run_tests $TESTS_TO_RUN
-exit $?
diff --git a/tests/framework/tests/custom/abs/config.sh b/tests/framework/tests/custom/abs/config.sh
deleted file mode 100644
index 3030bcb72..000000000
--- a/tests/framework/tests/custom/abs/config.sh
+++ /dev/null
@@ -1 +0,0 @@
-MODELFILE_NAME="custom_abs_test.tflite"
diff --git a/tests/framework/tests/custom/tensorflowmax/config.sh b/tests/framework/tests/custom/tensorflowmax/config.sh
deleted file mode 100644
index 122c459db..000000000
--- a/tests/framework/tests/custom/tensorflowmax/config.sh
+++ /dev/null
@@ -1 +0,0 @@
-MODELFILE_NAME="custom_max_test.tflite"
diff --git a/tests/framework/tests/custom/tensorflowsum/config.sh b/tests/framework/tests/custom/tensorflowsum/config.sh
deleted file mode 100644
index 0a6dfe348..000000000
--- a/tests/framework/tests/custom/tensorflowsum/config.sh
+++ /dev/null
@@ -1 +0,0 @@
-MODELFILE_NAME="custom_sum_test.tflite"
diff --git a/tests/nnapi/CMakeLists.txt b/tests/nnapi/CMakeLists.txt
index 84dbf37af..b1215d867 100644
--- a/tests/nnapi/CMakeLists.txt
+++ b/tests/nnapi/CMakeLists.txt
@@ -2,10 +2,10 @@ if (NOT BUILD_RUNTIME_NNAPI_TEST)
return()
endif(NOT BUILD_RUNTIME_NNAPI_TEST)
-if (NOT BUILD_NEURUN)
+if (NOT BUILD_ONERT)
message(STATUS "Skip build NNAPI test: no runtime build")
return()
-endif(NOT BUILD_NEURUN)
+endif(NOT BUILD_ONERT)
nnfw_find_package(GTest)
@@ -56,7 +56,8 @@ target_link_libraries(${RUNTIME_NNAPI_TEST} ${LIB_PTHREAD} dl)
install(TARGETS ${RUNTIME_NNAPI_TEST} DESTINATION unittest)
-set(SKIPLIST_FILE_NAME ${RUNTIME_NNAPI_TEST}.skip.${TARGET_PLATFORM})
+# Default test backend: cpu
+set(SKIPLIST_FILE_NAME ${RUNTIME_NNAPI_TEST}.skip.${TARGET_PLATFORM}.cpu)
install(FILES ${SKIPLIST_FILE_NAME}
DESTINATION unittest
RENAME ${RUNTIME_NNAPI_TEST}.skip
diff --git a/tests/nnapi/include/NeuralNetworksWrapper.h b/tests/nnapi/include/NeuralNetworksWrapper.h
index 960d3fe37..af19008fe 100644
--- a/tests/nnapi/include/NeuralNetworksWrapper.h
+++ b/tests/nnapi/include/NeuralNetworksWrapper.h
@@ -20,15 +20,15 @@
#ifndef __NNFW_RT_NEURAL_NETWORKS_WRAPPER_H__
#define __NNFW_RT_NEURAL_NETWORKS_WRAPPER_H__
-// Fix for neurun:
+// Fix for onert:
// NeuralNetworks.h => NeuralNetworksShim.h
// Additional include NeuralNetworksExShim.h
#include "NeuralNetworksShim.h"
#include "NeuralNetworksExShim.h"
#include <math.h>
-// Fix for neurun: use boost::optional instead of std::optional
-// TODO in neurun: introduce and use internal optional library
+// Fix for onert: use boost::optional instead of std::optional
+// TODO in onert: introduce and use internal optional library
#include <boost/optional.hpp>
#include <string>
#include <vector>
@@ -104,7 +104,7 @@ struct SymmPerChannelQuantParams {
struct OperandType {
ANeuralNetworksOperandType operandType;
std::vector<uint32_t> dimensions;
- // Fix for neurun:
+ // Fix for onert:
// Use boost::optional instead of std::optional
// Default value: std::nullopt -> boost::none
boost::optional<SymmPerChannelQuantParams> channelQuant;
@@ -275,7 +275,7 @@ class Model {
}
}
- // Fix for neurun: addOperationEx for operation support extension (NeuralNetworksEx.h)
+ // Fix for onert: addOperationEx for operation support extension (NeuralNetworksEx.h)
void addOperationEx(ANeuralNetworksOperationTypeEx type, const std::vector<uint32_t>& inputs,
const std::vector<uint32_t>& outputs) {
if (ANeuralNetworksModel_addOperationEx(mModel, type, static_cast<uint32_t>(inputs.size()),
diff --git a/tests/nnapi/include/TestHarness.h b/tests/nnapi/include/TestHarness.h
index 94c8f9783..ca7ad15e7 100644
--- a/tests/nnapi/include/TestHarness.h
+++ b/tests/nnapi/include/TestHarness.h
@@ -29,17 +29,25 @@
#include <tuple>
#include <vector>
+// Fix for onert: define _Float16 for gnu compiler
+#if __GNUC__ && !__clang__
+#if __ARM_FP16_FORMAT_IEEE || __ARM_FP16_FORMAT_ALTERNATIVE
+#define _Float16 __fp16
+#else // __ARM_FP16_FORMAT_IEEE || __ARM_FP16_FORMAT_ALTERNATIVE
+#define _Float16 float
+#endif // __ARM_FP16_FORMAT_IEEE || __ARM_FP16_FORMAT_ALTERNATIVE
+#endif // __GNUC__ && !__clang__
+
namespace test_helper {
constexpr const size_t gMaximumNumberOfErrorMessages = 10;
// TODO: Figure out the build dependency to make including "CpuOperationUtils.h" work.
-// Fix for neurun: comment out convertFloat16ToFloat32
-//inline void convertFloat16ToFloat32(const _Float16* input, std::vector<float>* output) {
-// for (size_t i = 0; i < output->size(); ++i) {
-// (*output)[i] = static_cast<float>(input[i]);
-// }
-//}
+inline void convertFloat16ToFloat32(const _Float16* input, std::vector<float>* output) {
+ for (size_t i = 0; i < output->size(); ++i) {
+ (*output)[i] = static_cast<float>(input[i]);
+ }
+}
// This class is a workaround for two issues our code relies on:
// 1. sizeof(bool) is implementation defined.
@@ -61,8 +69,7 @@ typedef std::map<int, std::vector<float>> Float32Operands;
typedef std::map<int, std::vector<int32_t>> Int32Operands;
typedef std::map<int, std::vector<uint8_t>> Quant8AsymmOperands;
typedef std::map<int, std::vector<int16_t>> Quant16SymmOperands;
-// Fix for neurun: comment out Float16Operands
-//typedef std::map<int, std::vector<_Float16>> Float16Operands;
+typedef std::map<int, std::vector<_Float16>> Float16Operands;
typedef std::map<int, std::vector<bool8>> Bool8Operands;
typedef std::map<int, std::vector<int8_t>> Quant8ChannelOperands;
typedef std::map<int, std::vector<uint16_t>> Quant16AsymmOperands;
@@ -74,8 +81,7 @@ struct MixedTyped {
Int32Operands int32Operands;
Quant8AsymmOperands quant8AsymmOperands;
Quant16SymmOperands quant16SymmOperands;
- // Fix for neurun comment out Float16Operands
- //Float16Operands float16Operands;
+ Float16Operands float16Operands;
Bool8Operands bool8Operands;
Quant8ChannelOperands quant8ChannelOperands;
Quant16AsymmOperands quant16AsymmOperands;
@@ -89,9 +95,7 @@ typedef struct {
// Specifies the RANDOM_MULTINOMIAL distribution tolerance.
// If set to greater than zero, the input is compared as log-probabilities
// to the output and must be within this tolerance to pass.
- // Fix for neurun: Remove default value - c++11 don't support yet
- //float expectedMultinomialDistributionTolerance = 0.0;
- float expectedMultinomialDistributionTolerance;
+ float expectedMultinomialDistributionTolerance = 0.0;
} MixedTypedExample;
// Go through all index-value pairs of a given input type
@@ -152,8 +156,7 @@ inline void for_all(MixedTyped& idx_and_data,
for_all_internal(idx_and_data.int32Operands, execute_this);
for_all_internal(idx_and_data.quant8AsymmOperands, execute_this);
for_all_internal(idx_and_data.quant16SymmOperands, execute_this);
- // Fix for neurun: comment out float16Operands field
- //for_all_internal(idx_and_data.float16Operands, execute_this);
+ for_all_internal(idx_and_data.float16Operands, execute_this);
for_all_internal(idx_and_data.bool8Operands, execute_this);
for_all_internal(idx_and_data.quant8ChannelOperands, execute_this);
for_all_internal(idx_and_data.quant16AsymmOperands, execute_this);
@@ -179,8 +182,7 @@ inline void for_all(const MixedTyped& idx_and_data,
for_all_internal(idx_and_data.int32Operands, execute_this);
for_all_internal(idx_and_data.quant8AsymmOperands, execute_this);
for_all_internal(idx_and_data.quant16SymmOperands, execute_this);
- // Fix for neurun: comment out float16Operands field
- //for_all_internal(idx_and_data.float16Operands, execute_this);
+ for_all_internal(idx_and_data.float16Operands, execute_this);
for_all_internal(idx_and_data.bool8Operands, execute_this);
for_all_internal(idx_and_data.quant8ChannelOperands, execute_this);
for_all_internal(idx_and_data.quant16AsymmOperands, execute_this);
@@ -212,8 +214,7 @@ inline void resize_accordingly(const MixedTyped& golden, MixedTyped& test) {
resize_accordingly_(golden.int32Operands, test.int32Operands);
resize_accordingly_(golden.quant8AsymmOperands, test.quant8AsymmOperands);
resize_accordingly_(golden.quant16SymmOperands, test.quant16SymmOperands);
- //Fix for neurun: comment out float16Operands field
- //resize_accordingly_(golden.float16Operands, test.float16Operands);
+ resize_accordingly_(golden.float16Operands, test.float16Operands);
resize_accordingly_(golden.bool8Operands, test.bool8Operands);
resize_accordingly_(golden.quant8ChannelOperands, test.quant8ChannelOperands);
resize_accordingly_(golden.quant16AsymmOperands, test.quant16AsymmOperands);
@@ -240,8 +241,7 @@ inline MixedTyped filter(const MixedTyped& golden,
filter_internal(golden.int32Operands, &filtered.int32Operands, is_ignored);
filter_internal(golden.quant8AsymmOperands, &filtered.quant8AsymmOperands, is_ignored);
filter_internal(golden.quant16SymmOperands, &filtered.quant16SymmOperands, is_ignored);
- // Fix for neurun: comment out float16Operands field
- //filter_internal(golden.float16Operands, &filtered.float16Operands, is_ignored);
+ filter_internal(golden.float16Operands, &filtered.float16Operands, is_ignored);
filter_internal(golden.bool8Operands, &filtered.bool8Operands, is_ignored);
filter_internal(golden.quant8ChannelOperands, &filtered.quant8ChannelOperands, is_ignored);
filter_internal(golden.quant16AsymmOperands, &filtered.quant16AsymmOperands, is_ignored);
@@ -333,19 +333,18 @@ inline void compare(const MixedTyped& golden, const MixedTyped& test,
totalNumberOfErrors++;
}
});
- // Fix for neurun: comment out _Float16 compare
- //compare_<_Float16>(golden.float16Operands, test.float16Operands,
- // [&totalNumberOfErrors, fpAtol, fpRtol](_Float16 expected, _Float16 actual) {
- // // Compute the range based on both absolute tolerance and relative
- // // tolerance
- // float fpRange = fpAtol + fpRtol * std::abs(static_cast<float>(expected));
- // if (totalNumberOfErrors < gMaximumNumberOfErrorMessages) {
- // EXPECT_NEAR(expected, actual, fpRange);
- // }
- // if (std::abs(static_cast<float>(expected - actual)) > fpRange) {
- // totalNumberOfErrors++;
- // }
- // });
+ compare_<_Float16>(golden.float16Operands, test.float16Operands,
+ [&totalNumberOfErrors, fpAtol, fpRtol](_Float16 expected, _Float16 actual) {
+ // Compute the range based on both absolute tolerance and relative
+ // tolerance
+ float fpRange = fpAtol + fpRtol * std::abs(static_cast<float>(expected));
+ if (totalNumberOfErrors < gMaximumNumberOfErrorMessages) {
+ EXPECT_NEAR(expected, actual, fpRange);
+ }
+ if (std::abs(static_cast<float>(expected - actual)) > fpRange) {
+ totalNumberOfErrors++;
+ }
+ });
compare_<bool8>(golden.bool8Operands, test.bool8Operands,
[&totalNumberOfErrors](bool expected, bool actual) {
if (totalNumberOfErrors < gMaximumNumberOfErrorMessages) {
@@ -410,12 +409,11 @@ inline void expectMultinomialDistributionWithinTolerance(const MixedTyped& test,
Float32Operands float32Operands = example.operands.first.float32Operands;
if (!float32Operands.empty()) {
input = example.operands.first.float32Operands.at(0);
- } /*else {
- // Fix for neurun: comment out convertFloat16ToFloat32
+ } else {
std::vector<_Float16> inputFloat16 = example.operands.first.float16Operands.at(0);
input.resize(inputFloat16.size());
convertFloat16ToFloat32(inputFloat16.data(), &input);
- }*/
+ }
for (int b = 0; b < kBatchSize; ++b) {
float probability_sum = 0;
const int batch_index = kBatchSize * b;
diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl
new file mode 100644
index 000000000..a366dc097
--- /dev/null
+++ b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl
@@ -0,0 +1,36 @@
+GeneratedTests.*float16*
+GeneratedTests.*weights_as_inputs
+GeneratedTests.*zero_sized*
+GeneratedTests.abs_
+GeneratedTests.abs__relaxed
+GeneratedTests.cast_*overflow*
+GeneratedTests.exp_
+GeneratedTests.exp__relaxed
+GeneratedTests.logical_not*
+GeneratedTests.logical_or_broadcast
+GeneratedTests.logical_or_ex_broadcast_4D_2D
+GeneratedTests.lsh_projection*
+GeneratedTests.lstm*
+GeneratedTests.lstm2*
+GeneratedTests.maximum_broadcast_quant*
+GeneratedTests.maximum_overflow
+GeneratedTests.maximum_simple_quant*
+GeneratedTests.minimum_broadcast_quant*
+GeneratedTests.minimum_overflow
+GeneratedTests.minimum_simple_quant*
+GeneratedTests.mobilenet*
+GeneratedTests.neg
+GeneratedTests.neg_int32
+GeneratedTests.neg_int32_relaxed
+GeneratedTests.neg_relaxed
+GeneratedTests.rnn*
+GeneratedTests.rsqrt
+GeneratedTests.rsqrt_relaxed
+GeneratedTests.sin*
+GeneratedTests.slice*
+GeneratedTests.sqrt_
+GeneratedTests.sqrt__relaxed
+GeneratedTests.sub_v1_2_zero_sized*
+GeneratedTests.svdf*
+GeneratedTests.tanh_v1_2
+GeneratedTests.transpose_v1_2*
diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon
new file mode 100644
index 000000000..2f4fb3bfe
--- /dev/null
+++ b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon
@@ -0,0 +1,51 @@
+GeneratedTests.*float16*
+GeneratedTests.*zero_sized*
+GeneratedTests.abs_
+GeneratedTests.abs__relaxed
+GeneratedTests.argmax_int32_nnfw
+GeneratedTests.argmax_neg_axis_int32_nnfw
+GeneratedTests.cast_*overflow*
+GeneratedTests.cast_float32_to_quant8*
+GeneratedTests.conv_*weights_as_inputs
+GeneratedTests.depthwise_conv2d_*weights_as_inputs
+GeneratedTests.equal_boolean
+GeneratedTests.exp_
+GeneratedTests.exp_1D_float_nnfw
+GeneratedTests.exp_2D_float_nnfw
+GeneratedTests.exp__relaxed
+GeneratedTests.fully_connected_hybrid*
+GeneratedTests.greater_equal_boolean
+GeneratedTests.less_boolean
+GeneratedTests.logical_not*
+GeneratedTests.logical_or_broadcast
+GeneratedTests.lsh_*weights_as_inputs
+GeneratedTests.lsh_projection*
+GeneratedTests.lstm*
+GeneratedTests.lstm2*
+GeneratedTests.maximum_broadcast_quant*
+GeneratedTests.maximum_overflow
+GeneratedTests.maximum_simple_quant*
+GeneratedTests.minimum_broadcast_quant*
+GeneratedTests.minimum_overflow
+GeneratedTests.minimum_simple_quant*
+GeneratedTests.mobilenet*
+GeneratedTests.neg
+GeneratedTests.neg_int32
+GeneratedTests.neg_int32_relaxed
+GeneratedTests.neg_relaxed
+GeneratedTests.not_equal_boolean
+GeneratedTests.reduce_max_2D_int32_nnfw
+GeneratedTests.reduce_max_quant8
+GeneratedTests.reshape_*weights_as_inputs
+GeneratedTests.rnn*
+GeneratedTests.rsqrt
+GeneratedTests.rsqrt_relaxed
+GeneratedTests.sin*
+GeneratedTests.slice*
+GeneratedTests.space_to_batch*
+GeneratedTests.sqrt_
+GeneratedTests.sqrt__relaxed
+GeneratedTests.svdf*
+GeneratedTests.tanh_v1_2
+GeneratedTests.topk_v2*
+GeneratedTests.transpose_v1_2*
diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.cpu b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.cpu
new file mode 100644
index 000000000..12fdcbf62
--- /dev/null
+++ b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.cpu
@@ -0,0 +1,84 @@
+GeneratedTests.*float16*
+GeneratedTests.*weights_as_inputs
+GeneratedTests.*zero_sized*
+GeneratedTests.abs_
+GeneratedTests.abs__relaxed
+GeneratedTests.add_broadcast_quant*
+GeneratedTests.add_quant*
+GeneratedTests.argmax*
+GeneratedTests.batch_to_space*
+GeneratedTests.cast_*quant*
+GeneratedTests.depth_to_space*
+GeneratedTests.dequantize
+GeneratedTests.div_*
+GeneratedTests.embedding_lookup
+GeneratedTests.embedding_lookup_2d_nnfw
+GeneratedTests.embedding_lookup_4d_nnfw
+GeneratedTests.equal_boolean
+GeneratedTests.equal_broadcast_float_nnfw*
+GeneratedTests.equal_broadcast_quant*
+GeneratedTests.equal_quant*
+GeneratedTests.exp_
+GeneratedTests.floor_
+GeneratedTests.fully_connected_hybrid*
+GeneratedTests.greater_equal_boolean
+GeneratedTests.greater_equal_quant*
+GeneratedTests.hashtable_lookup*
+GeneratedTests.l2_normalization*
+GeneratedTests.l2_pool*
+GeneratedTests.less_boolean*
+GeneratedTests.less_equal_boolean
+GeneratedTests.less_equal_quant*
+GeneratedTests.less_quant*
+GeneratedTests.local_response_norm*
+GeneratedTests.logical_*
+GeneratedTests.logical_and_ex*
+GeneratedTests.logical_or_ex*
+GeneratedTests.logistic_quant*
+GeneratedTests.lsh_projection*
+GeneratedTests.lstm*
+GeneratedTests.maximum_broadcast_quant*
+GeneratedTests.maximum_overflow
+GeneratedTests.maximum_simple_quant*
+GeneratedTests.mean*
+GeneratedTests.minimum_broadcast_quant*
+GeneratedTests.minimum_overflow
+GeneratedTests.minimum_simple_quant*
+GeneratedTests.mobilenet*
+GeneratedTests.mul_broadcast_quant*
+GeneratedTests.mul_quant*
+GeneratedTests.neg*
+GeneratedTests.not_equal_boolean*
+GeneratedTests.not_equal_quant*
+GeneratedTests.pack_ex_2D_int_*
+GeneratedTests.pad_quant8_nnfw
+GeneratedTests.prelu*
+GeneratedTests.reduce_max_*int*
+GeneratedTests.reduce_max_quant8*
+GeneratedTests.reduce_min_int8*
+GeneratedTests.reduce_min_quant8*
+GeneratedTests.relu*
+GeneratedTests.relu1*
+GeneratedTests.relu6*
+GeneratedTests.resize_bilinear*
+GeneratedTests.rnn*
+GeneratedTests.rsqrt
+GeneratedTests.rsqrt_relaxed
+GeneratedTests.slice*
+GeneratedTests.space_to_batch*
+GeneratedTests.space_to_depth*
+GeneratedTests.split*
+GeneratedTests.sqrt*
+GeneratedTests.squared_difference_ex*
+GeneratedTests.strided_slice*
+GeneratedTests.strided_slice_ex*
+GeneratedTests.sub_quant*
+GeneratedTests.sub_v1_2_broadcast_quant8
+GeneratedTests.sub_v1_2_quant8
+GeneratedTests.svdf*
+GeneratedTests.tanh_v1_2*
+GeneratedTests.topk_v2*
+GeneratedTests.transpose_conv*
+GeneratedTests.transpose_quant*
+GeneratedTests.transpose_v1_2*
+GeneratedTests.unpack*
diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.srcn b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.srcn
new file mode 100644
index 000000000..b17fd8f21
--- /dev/null
+++ b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.srcn
@@ -0,0 +1,79 @@
+GeneratedTests.*float16*
+GeneratedTests.*weights_as_inputs
+GeneratedTests.*zero_sized*
+GeneratedTests.abs_*
+GeneratedTests.add?*
+GeneratedTests.argmax*
+GeneratedTests.avg_pool_float_2
+GeneratedTests.avg_pool_float_4
+GeneratedTests.avg_pool_quant*
+GeneratedTests.batch_to_space*
+GeneratedTests.cast_*
+GeneratedTests.concat*
+GeneratedTests.conv_float_2
+GeneratedTests.conv_quant*
+GeneratedTests.depth_to_space*
+GeneratedTests.depthwise_conv*
+GeneratedTests.dequantize
+GeneratedTests.div_*
+GeneratedTests.embedding_lookup*
+GeneratedTests.equal*
+GeneratedTests.exp_*
+GeneratedTests.floor_*
+GeneratedTests.fully_connected*
+GeneratedTests.gather*
+GeneratedTests.greater_equal*
+GeneratedTests.hashtable_lookup*
+GeneratedTests.l2_normalization*
+GeneratedTests.l2_pool*
+GeneratedTests.less*
+GeneratedTests.local_response_norm*
+GeneratedTests.logical_*
+GeneratedTests.logistic*
+GeneratedTests.lsh_projection*
+GeneratedTests.lstm*
+GeneratedTests.max_pool*
+GeneratedTests.maximum*
+GeneratedTests.mean*
+GeneratedTests.minimum*
+GeneratedTests.mobilenet*
+GeneratedTests.mul*
+GeneratedTests.neg*
+GeneratedTests.not_equal*
+GeneratedTests.pack*
+GeneratedTests.pad*
+GeneratedTests.prelu*
+GeneratedTests.reduce_max*
+GeneratedTests.reduce_min*
+GeneratedTests.reduce_sum*
+GeneratedTests.relu*
+GeneratedTests.relu1*
+GeneratedTests.relu6*
+GeneratedTests.reshape*
+GeneratedTests.resize_bilinear*
+GeneratedTests.rnn*
+GeneratedTests.rsqrt*
+GeneratedTests.sin*
+GeneratedTests.slice*
+GeneratedTests.softmax*
+GeneratedTests.space_to_batch*
+GeneratedTests.space_to_depth*
+GeneratedTests.split*
+GeneratedTests.sqrt*
+GeneratedTests.squared_difference_ex*
+GeneratedTests.squeeze*
+GeneratedTests.strided_slice*
+GeneratedTests.strided_slice_ex*
+GeneratedTests.sub*
+GeneratedTests.svdf*
+GeneratedTests.tanh_*
+GeneratedTests.topk_v2*
+GeneratedTests.transpose
+GeneratedTests.transpose_2D_nnfw
+GeneratedTests.transpose_3D_nnfw
+GeneratedTests.transpose_float_1
+GeneratedTests.transpose_quant8_1
+GeneratedTests.transpose_v1_2*
+GeneratedTests.unpack*
+TrivialTest.BroadcastAddTwo
+TrivialTest.BroadcastMulTwo
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux b/tests/nnapi/nnapi_gtest.skip.armv7l-linux
deleted file mode 100644
index ea48efa04..000000000
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# Following tests will be skipped on armv7l-linux
-#
-# Not support operations
-GeneratedTests.abs_
-GeneratedTests.abs__relaxed
-GeneratedTests.cast_*overflow*
-GeneratedTests.exp_
-GeneratedTests.exp__relaxed
-GeneratedTests.lsh_projection*
-GeneratedTests.mobilenet*
-GeneratedTests.neg
-GeneratedTests.neg_relaxed
-GeneratedTests.neg_int32
-GeneratedTests.neg_int32_relaxed
-GeneratedTests.rsqrt
-GeneratedTests.rsqrt_relaxed
-GeneratedTests.sqrt_
-GeneratedTests.sqrt__relaxed
-GeneratedTests.svdf*
-GeneratedTests.fully_connected_hybrid*
-# Not support broadcast
-GeneratedTests.logical_or_ex_broadcast_4D_2D
-# Unsupported optional input that has shape
-GeneratedTests.lstm2*
-# Different nnapi specs from acl
-# Ref: https://github.sec.samsung.net/STAR/nnfw/issues/7326
-GeneratedTests.*weights_as_inputs
-GeneratedTests.rnn*
-GeneratedTests.lstm*
-GeneratedTests.*float16*
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl
new file mode 100644
index 000000000..a366dc097
--- /dev/null
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl
@@ -0,0 +1,36 @@
+GeneratedTests.*float16*
+GeneratedTests.*weights_as_inputs
+GeneratedTests.*zero_sized*
+GeneratedTests.abs_
+GeneratedTests.abs__relaxed
+GeneratedTests.cast_*overflow*
+GeneratedTests.exp_
+GeneratedTests.exp__relaxed
+GeneratedTests.logical_not*
+GeneratedTests.logical_or_broadcast
+GeneratedTests.logical_or_ex_broadcast_4D_2D
+GeneratedTests.lsh_projection*
+GeneratedTests.lstm*
+GeneratedTests.lstm2*
+GeneratedTests.maximum_broadcast_quant*
+GeneratedTests.maximum_overflow
+GeneratedTests.maximum_simple_quant*
+GeneratedTests.minimum_broadcast_quant*
+GeneratedTests.minimum_overflow
+GeneratedTests.minimum_simple_quant*
+GeneratedTests.mobilenet*
+GeneratedTests.neg
+GeneratedTests.neg_int32
+GeneratedTests.neg_int32_relaxed
+GeneratedTests.neg_relaxed
+GeneratedTests.rnn*
+GeneratedTests.rsqrt
+GeneratedTests.rsqrt_relaxed
+GeneratedTests.sin*
+GeneratedTests.slice*
+GeneratedTests.sqrt_
+GeneratedTests.sqrt__relaxed
+GeneratedTests.sub_v1_2_zero_sized*
+GeneratedTests.svdf*
+GeneratedTests.tanh_v1_2
+GeneratedTests.transpose_v1_2*
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
index 2dcaa0040..2ec10d267 100644
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
@@ -1,41 +1,49 @@
-#
-# Following tests will be skipped on armv7l-linux acl_neon
-#
-# Not support operations
+GeneratedTests.*float16*
+GeneratedTests.*zero_sized*
GeneratedTests.abs_
GeneratedTests.abs__relaxed
+GeneratedTests.argmax_int32_nnfw
+GeneratedTests.argmax_neg_axis_int32_nnfw
GeneratedTests.cast_*overflow*
+GeneratedTests.cast_float32_to_quant8*
+GeneratedTests.conv_*weights_as_inputs
+GeneratedTests.depthwise_conv2d_*weights_as_inputs
+GeneratedTests.equal_boolean
GeneratedTests.exp_
+GeneratedTests.exp_1D_float_nnfw
+GeneratedTests.exp_2D_float_nnfw
GeneratedTests.exp__relaxed
+GeneratedTests.greater_equal_boolean
+GeneratedTests.less_boolean
+GeneratedTests.logical_not*
+GeneratedTests.logical_or_broadcast
+GeneratedTests.lsh_*weights_as_inputs
GeneratedTests.lsh_projection*
+GeneratedTests.lstm*
+GeneratedTests.lstm2*
+GeneratedTests.maximum_broadcast_quant*
+GeneratedTests.maximum_overflow
+GeneratedTests.maximum_simple_quant*
+GeneratedTests.minimum_broadcast_quant*
+GeneratedTests.minimum_overflow
+GeneratedTests.minimum_simple_quant*
GeneratedTests.mobilenet*
GeneratedTests.neg
-GeneratedTests.neg_relaxed
GeneratedTests.neg_int32
GeneratedTests.neg_int32_relaxed
+GeneratedTests.neg_relaxed
+GeneratedTests.not_equal_boolean
+GeneratedTests.reduce_max_2D_int32_nnfw
+GeneratedTests.reduce_max_quant8
+GeneratedTests.reshape_*weights_as_inputs
+GeneratedTests.rnn*
GeneratedTests.rsqrt
GeneratedTests.rsqrt_relaxed
+GeneratedTests.sin*
+GeneratedTests.slice*
GeneratedTests.sqrt_
GeneratedTests.sqrt__relaxed
GeneratedTests.svdf*
+GeneratedTests.tanh_v1_2
GeneratedTests.topk_v2*
-# Float error
-GeneratedTests.exp_1D_float_nnfw
-GeneratedTests.exp_2D_float_nnfw
-# Unsupported optional input that has shape
-GeneratedTests.lstm2*
-# Unsupported data type
-GeneratedTests.argmax_ex_int32
-GeneratedTests.argmax_ex_neg_axis_int32
-GeneratedTests.reduce_max_2D_int32_nnfw
-GeneratedTests.reduce_max_quant8
-# Bus error (release)
-GeneratedTests.cast_float32_to_quant8*
-# Different nnapi specs from acl
-GeneratedTests.conv_*weights_as_inputs
-GeneratedTests.depthwise_conv2d_*weights_as_inputs
-GeneratedTests.reshape_*weights_as_inputs
-GeneratedTests.lsh_*weights_as_inputs
-GeneratedTests.rnn*
-GeneratedTests.lstm*
-GeneratedTests.*float16*
+GeneratedTests.transpose_v1_2*
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu
index 6e0d8ee83..044510d20 100644
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu
@@ -1,69 +1,83 @@
-#
-# Following tests will be skipped on armv7l-linux
-#
-# Not support operations
+GeneratedTests.*float16*
+GeneratedTests.*weights_as_inputs
+GeneratedTests.*zero_sized*
GeneratedTests.abs_
GeneratedTests.abs__relaxed
GeneratedTests.add_broadcast_quant*
GeneratedTests.add_quant*
GeneratedTests.argmax*
+GeneratedTests.batch_to_space*
+GeneratedTests.cast_*quant*
GeneratedTests.depth_to_space*
GeneratedTests.dequantize
+GeneratedTests.div_*
GeneratedTests.embedding_lookup
GeneratedTests.embedding_lookup_2d_nnfw
GeneratedTests.embedding_lookup_4d_nnfw
-GeneratedTests.equal_ex*
-GeneratedTests.exp_*
+GeneratedTests.equal_broadcast_float_nnfw*
+GeneratedTests.equal_broadcast_quant*
+GeneratedTests.equal_quant*
+GeneratedTests.exp_
GeneratedTests.floor_
-GeneratedTests.fully_connected_hybrid*
-GeneratedTests.greater_equal_ex*
+GeneratedTests.greater_equal_quant*
GeneratedTests.hashtable_lookup*
GeneratedTests.l2_normalization*
GeneratedTests.l2_pool*
+GeneratedTests.less_equal_quant*
+GeneratedTests.less_quant*
GeneratedTests.local_response_norm*
-GeneratedTests.less_ex*
+GeneratedTests.logical_*
GeneratedTests.logical_and_ex*
GeneratedTests.logical_or_ex*
GeneratedTests.logistic_quant*
GeneratedTests.lsh_projection*
GeneratedTests.lstm*
+GeneratedTests.maximum_broadcast_quant*
+GeneratedTests.maximum_overflow
+GeneratedTests.maximum_simple_quant*
+GeneratedTests.mean*
+GeneratedTests.minimum_broadcast_quant*
+GeneratedTests.minimum_overflow
+GeneratedTests.minimum_simple_quant*
GeneratedTests.mobilenet*
GeneratedTests.mul_broadcast_quant*
GeneratedTests.mul_quant*
GeneratedTests.neg*
-GeneratedTests.notequal*
-GeneratedTests.prelu_ex*
-GeneratedTests.reduce_min*
+GeneratedTests.not_equal_quant*
+GeneratedTests.pack_ex_2D_int_*
+GeneratedTests.pad_quant8_nnfw
+GeneratedTests.prelu*
+GeneratedTests.reduce_max_*int*
+GeneratedTests.reduce_max_quant8*
+GeneratedTests.reduce_min_int8*
+GeneratedTests.reduce_min_quant8*
+GeneratedTests.relu*
GeneratedTests.relu1*
GeneratedTests.relu6*
-GeneratedTests.relu*
GeneratedTests.resize_bilinear*
GeneratedTests.rnn*
-GeneratedTests.rsqrt*
-GeneratedTests.mean*
-GeneratedTests.pad_quant8_nnfw
+GeneratedTests.rsqrt
+GeneratedTests.rsqrt_relaxed
+GeneratedTests.slice_5
+GeneratedTests.slice_6
+GeneratedTests.slice_7
+GeneratedTests.slice_8
+GeneratedTests.slice_zero_sized*
+GeneratedTests.space_to_batch*
GeneratedTests.space_to_depth*
+GeneratedTests.split_*nt*
GeneratedTests.sqrt*
GeneratedTests.squared_difference_ex*
-GeneratedTests.svdf*
-GeneratedTests.tanh_
-GeneratedTests.batch_to_space*
-GeneratedTests.div_*
-GeneratedTests.space_to_batch*
-GeneratedTests.strided_slice*
-GeneratedTests.sub_quant*
-GeneratedTests.transpose*
-GeneratedTests.cast_*
+GeneratedTests.strided_slice_*quant*
+GeneratedTests.strided_slice_*qaunt*
GeneratedTests.strided_slice_ex*
-GeneratedTests.reduce_max*
-GeneratedTests.reduce_sum_ex*
+GeneratedTests.sub_quant*
+GeneratedTests.sub_v1_2_broadcast_quant8
+GeneratedTests.sub_v1_2_quant8
+GeneratedTests.svdf*
+GeneratedTests.tanh_v1_2*
GeneratedTests.topk_v2*
-# Unexpected result
-GeneratedTests.split*
GeneratedTests.transpose_conv*
-GeneratedTests.pack*
-GeneratedTests.unpack*
-GeneratedTests.logical_not_ex*
-# Different nnapi specs from acl
-GeneratedTests.*weights_as_inputs
-GeneratedTests.*float16*
+GeneratedTests.transpose_quant*
+GeneratedTests.transpose_v1_2*
+GeneratedTests.unpack_ex_*_int_*
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.ncnn b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.ncnn
deleted file mode 100644
index 8bbc412dd..000000000
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.ncnn
+++ /dev/null
@@ -1,77 +0,0 @@
-#
-# Following tests will be skipped on armv7l-linux
-#
-# Not support operations
-TrivialTest.BroadcastAddTwo
-TrivialTest.BroadcastMulTwo
-GeneratedTests.abs_*
-GeneratedTests.avg_pool*
-GeneratedTests.softmax*
-GeneratedTests.concat*
-GeneratedTests.conv_float_2
-GeneratedTests.conv_quant*
-GeneratedTests.depthwise_conv*
-GeneratedTests.fully_connected*
-GeneratedTests.max_pool*
-GeneratedTests.reshape*
-GeneratedTests.add?*
-GeneratedTests.argmax*
-GeneratedTests.depth_to_space*
-GeneratedTests.dequantize
-GeneratedTests.embedding_lookup*
-GeneratedTests.equal_ex*
-GeneratedTests.exp_*
-GeneratedTests.floor_*
-GeneratedTests.greater_equal_ex*
-GeneratedTests.hashtable_lookup*
-GeneratedTests.l2_normalization*
-GeneratedTests.l2_pool*
-GeneratedTests.local_response_norm*
-GeneratedTests.less_ex*
-GeneratedTests.logical_and_ex*
-GeneratedTests.logical_or_ex*
-GeneratedTests.logistic*
-GeneratedTests.lsh_projection*
-GeneratedTests.lstm*
-GeneratedTests.mobilenet*
-GeneratedTests.mul*
-GeneratedTests.neg*
-GeneratedTests.notequal*
-GeneratedTests.prelu_ex*
-GeneratedTests.reduce_min*
-GeneratedTests.relu1*
-GeneratedTests.relu6*
-GeneratedTests.relu*
-GeneratedTests.resize_bilinear*
-GeneratedTests.rnn*
-GeneratedTests.rsqrt*
-GeneratedTests.mean*
-GeneratedTests.pad*
-GeneratedTests.space_to_depth*
-GeneratedTests.sqrt*
-GeneratedTests.squared_difference_ex*
-GeneratedTests.svdf*
-GeneratedTests.tanh_
-GeneratedTests.batch_to_space*
-GeneratedTests.div_*
-GeneratedTests.space_to_batch*
-GeneratedTests.squeeze*
-GeneratedTests.strided_slice*
-GeneratedTests.sub*
-GeneratedTests.transpose
-GeneratedTests.transpose_2D_nnfw
-GeneratedTests.transpose_3D_nnfw
-GeneratedTests.transpose_float_1
-GeneratedTests.transpose_quant8_1
-GeneratedTests.cast_*
-GeneratedTests.gather*
-GeneratedTests.strided_slice_ex*
-GeneratedTests.reduce_max*
-GeneratedTests.reduce_sum_ex*
-GeneratedTests.topk_v2*
-GeneratedTests.split*
-GeneratedTests.pack*
-GeneratedTests.unpack*
-GeneratedTests.logical_not_ex*
-GeneratedTests.*weights_as_inputs
-GeneratedTests.*float16*
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.srcn b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.srcn
new file mode 100644
index 000000000..b17fd8f21
--- /dev/null
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.srcn
@@ -0,0 +1,79 @@
+GeneratedTests.*float16*
+GeneratedTests.*weights_as_inputs
+GeneratedTests.*zero_sized*
+GeneratedTests.abs_*
+GeneratedTests.add?*
+GeneratedTests.argmax*
+GeneratedTests.avg_pool_float_2
+GeneratedTests.avg_pool_float_4
+GeneratedTests.avg_pool_quant*
+GeneratedTests.batch_to_space*
+GeneratedTests.cast_*
+GeneratedTests.concat*
+GeneratedTests.conv_float_2
+GeneratedTests.conv_quant*
+GeneratedTests.depth_to_space*
+GeneratedTests.depthwise_conv*
+GeneratedTests.dequantize
+GeneratedTests.div_*
+GeneratedTests.embedding_lookup*
+GeneratedTests.equal*
+GeneratedTests.exp_*
+GeneratedTests.floor_*
+GeneratedTests.fully_connected*
+GeneratedTests.gather*
+GeneratedTests.greater_equal*
+GeneratedTests.hashtable_lookup*
+GeneratedTests.l2_normalization*
+GeneratedTests.l2_pool*
+GeneratedTests.less*
+GeneratedTests.local_response_norm*
+GeneratedTests.logical_*
+GeneratedTests.logistic*
+GeneratedTests.lsh_projection*
+GeneratedTests.lstm*
+GeneratedTests.max_pool*
+GeneratedTests.maximum*
+GeneratedTests.mean*
+GeneratedTests.minimum*
+GeneratedTests.mobilenet*
+GeneratedTests.mul*
+GeneratedTests.neg*
+GeneratedTests.not_equal*
+GeneratedTests.pack*
+GeneratedTests.pad*
+GeneratedTests.prelu*
+GeneratedTests.reduce_max*
+GeneratedTests.reduce_min*
+GeneratedTests.reduce_sum*
+GeneratedTests.relu*
+GeneratedTests.relu1*
+GeneratedTests.relu6*
+GeneratedTests.reshape*
+GeneratedTests.resize_bilinear*
+GeneratedTests.rnn*
+GeneratedTests.rsqrt*
+GeneratedTests.sin*
+GeneratedTests.slice*
+GeneratedTests.softmax*
+GeneratedTests.space_to_batch*
+GeneratedTests.space_to_depth*
+GeneratedTests.split*
+GeneratedTests.sqrt*
+GeneratedTests.squared_difference_ex*
+GeneratedTests.squeeze*
+GeneratedTests.strided_slice*
+GeneratedTests.strided_slice_ex*
+GeneratedTests.sub*
+GeneratedTests.svdf*
+GeneratedTests.tanh_*
+GeneratedTests.topk_v2*
+GeneratedTests.transpose
+GeneratedTests.transpose_2D_nnfw
+GeneratedTests.transpose_3D_nnfw
+GeneratedTests.transpose_float_1
+GeneratedTests.transpose_quant8_1
+GeneratedTests.transpose_v1_2*
+GeneratedTests.unpack*
+TrivialTest.BroadcastAddTwo
+TrivialTest.BroadcastMulTwo
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-tizen b/tests/nnapi/nnapi_gtest.skip.armv7l-tizen
deleted file mode 100644
index 4bac7bac7..000000000
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-tizen
+++ /dev/null
@@ -1,35 +0,0 @@
-#
-# Following tests will be skipped on armv7l-linux
-#
-# Not support operations
-GeneratedTests.abs_
-GeneratedTests.abs__relaxed
-GeneratedTests.cast_*overflow*
-GeneratedTests.exp_
-GeneratedTests.exp__relaxed
-GeneratedTests.lsh_projection*
-GeneratedTests.mobilenet*
-GeneratedTests.neg
-GeneratedTests.neg_relaxed
-GeneratedTests.neg_int32
-GeneratedTests.neg_int32_relaxed
-GeneratedTests.rsqrt
-GeneratedTests.rsqrt_relaxed
-GeneratedTests.sqrt_
-GeneratedTests.sqrt__relaxed
-GeneratedTests.svdf*
-GeneratedTests.batch_to_space*
-GeneratedTests.fully_connected_hybrid*
-GeneratedTests.space_to_batch*
-GeneratedTests.transpose_conv_ex_*
-# Unexpected result
-GeneratedTests.pack*
-# Not support broadcast
-GeneratedTests.logical_or_ex_broadcast_4D_2D
-# Unsupported optional input that has shape
-GeneratedTests.lstm2*
-# Different nnapi specs from acl
-GeneratedTests.*weights_as_inputs
-GeneratedTests.rnn*
-GeneratedTests.lstm*
-GeneratedTests.*float16*
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-tizen.acl_cl b/tests/nnapi/nnapi_gtest.skip.armv7l-tizen.acl_cl
new file mode 100644
index 000000000..a85a31fa1
--- /dev/null
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-tizen.acl_cl
@@ -0,0 +1,41 @@
+GeneratedTests.*float16*
+GeneratedTests.*weights_as_inputs
+GeneratedTests.*zero_sized*
+GeneratedTests.abs_
+GeneratedTests.abs__relaxed
+GeneratedTests.batch_to_space*
+GeneratedTests.cast_*overflow*
+GeneratedTests.exp_
+GeneratedTests.exp__relaxed
+GeneratedTests.fully_connected_hybrid*
+GeneratedTests.logical_not*
+GeneratedTests.logical_or_broadcast
+GeneratedTests.logical_or_ex_broadcast_4D_2D
+GeneratedTests.lsh_projection*
+GeneratedTests.lstm*
+GeneratedTests.lstm2*
+GeneratedTests.maximum_broadcast_quant*
+GeneratedTests.maximum_overflow
+GeneratedTests.maximum_simple_quant*
+GeneratedTests.minimum_broadcast_quant*
+GeneratedTests.minimum_overflow
+GeneratedTests.minimum_simple_quant*
+GeneratedTests.mobilenet*
+GeneratedTests.neg
+GeneratedTests.neg_int32
+GeneratedTests.neg_int32_relaxed
+GeneratedTests.neg_relaxed
+GeneratedTests.pack*
+GeneratedTests.rnn*
+GeneratedTests.rsqrt
+GeneratedTests.rsqrt_relaxed
+GeneratedTests.sin*
+GeneratedTests.slice*
+GeneratedTests.space_to_batch*
+GeneratedTests.sqrt_
+GeneratedTests.sqrt__relaxed
+GeneratedTests.sub_v1_2_zero_sized*
+GeneratedTests.svdf*
+GeneratedTests.tanh_v1_2
+GeneratedTests.transpose_conv_ex_*
+GeneratedTests.transpose_v1_2*
diff --git a/tests/nnapi/nnapi_gtest.skip.noarch.interp b/tests/nnapi/nnapi_gtest.skip.noarch.interp
index fffd3dba7..81a0c17c1 100644
--- a/tests/nnapi/nnapi_gtest.skip.noarch.interp
+++ b/tests/nnapi/nnapi_gtest.skip.noarch.interp
@@ -1,76 +1,78 @@
-#
-# Following tests will be skipped on x86_64-linux interpreter
-#
-# Not support operations
+GeneratedTests.*float16*
+GeneratedTests.*zero_sized*
GeneratedTests.abs_*
-GeneratedTests.avg_pool_quant*
-GeneratedTests.concat_quant*
-GeneratedTests.conv_quant*
-GeneratedTests.depthwise_conv2d_quant*
-GeneratedTests.fully_connected_hybrid*
-GeneratedTests.fully_connected_quant*
-GeneratedTests.max_pool_quant*
-GeneratedTests.softmax_quant*
GeneratedTests.add_broadcast_quant*
GeneratedTests.add_quant*
GeneratedTests.argmax*
+GeneratedTests.avg_pool_quant*
+GeneratedTests.batch_to_space*
+GeneratedTests.cast_*
+GeneratedTests.concat_quant*
+GeneratedTests.conv_quant*
GeneratedTests.depth_to_space*
+GeneratedTests.depthwise_conv2d_quant*
GeneratedTests.dequantize
+GeneratedTests.div_*
GeneratedTests.embedding_lookup
GeneratedTests.embedding_lookup_2d_nnfw
GeneratedTests.embedding_lookup_4d_nnfw
-GeneratedTests.equal_ex*
+GeneratedTests.equal*
GeneratedTests.exp_*
GeneratedTests.floor_
-GeneratedTests.greater_equal_ex*
+GeneratedTests.fully_connected_hybrid*
+GeneratedTests.fully_connected_quant*
+GeneratedTests.greater_equal*
GeneratedTests.hashtable_lookup*
GeneratedTests.l2_normalization*
GeneratedTests.l2_pool*
+GeneratedTests.less*
GeneratedTests.local_response_norm*
-GeneratedTests.less_ex*
-GeneratedTests.logical_and_ex*
-GeneratedTests.logical_or_ex*
+GeneratedTests.logical_*
GeneratedTests.logistic_quant*
GeneratedTests.lsh_projection*
GeneratedTests.lstm*
+GeneratedTests.max_pool_quant*
+GeneratedTests.maximum*
+GeneratedTests.mean*
+GeneratedTests.minimum*
GeneratedTests.mobilenet*
GeneratedTests.mul_broadcast_quant*
GeneratedTests.mul_quant*
GeneratedTests.neg*
-GeneratedTests.notequal*
-GeneratedTests.prelu_ex*
+GeneratedTests.not_equal*
+GeneratedTests.pack*
+GeneratedTests.pad_*quant*
+GeneratedTests.prelu*
+GeneratedTests.reduce_max*
GeneratedTests.reduce_min*
+GeneratedTests.reduce_sum*
GeneratedTests.relu1_quant*
GeneratedTests.relu6_quant*
GeneratedTests.relu_quant*
GeneratedTests.resize_bilinear*
GeneratedTests.rnn*
GeneratedTests.rsqrt*
-GeneratedTests.mean*
-GeneratedTests.pad_*quant*
+GeneratedTests.sin*
+GeneratedTests.slice*
+GeneratedTests.softmax_quant*
+GeneratedTests.space_to_batch*
GeneratedTests.space_to_depth*
+GeneratedTests.split*
GeneratedTests.sqrt*
GeneratedTests.squared_difference_ex*
-GeneratedTests.svdf*
-GeneratedTests.batch_to_space*
-GeneratedTests.div_*
-GeneratedTests.space_to_batch*
GeneratedTests.squeeze*
GeneratedTests.strided_slice*
+GeneratedTests.strided_slice_ex*
GeneratedTests.sub_quant*
+GeneratedTests.sub_v1_2_broadcast_quant8
+GeneratedTests.sub_v1_2_quant8
+GeneratedTests.svdf*
+GeneratedTests.tanh_v1_2*
+GeneratedTests.topk_v2*
GeneratedTests.transpose
GeneratedTests.transpose_2D*
GeneratedTests.transpose_3D*
GeneratedTests.transpose_float*
GeneratedTests.transpose_quant*
-GeneratedTests.cast_*
-GeneratedTests.strided_slice_ex*
-GeneratedTests.reduce_max*
-GeneratedTests.reduce_sum_ex*
-GeneratedTests.topk_v2*
-# Unexpected result
-GeneratedTests.split*
-GeneratedTests.pack*
+GeneratedTests.transpose_v1_2*
GeneratedTests.unpack*
-GeneratedTests.logical_not_ex*
-GeneratedTests.*float16*
diff --git a/tests/nnapi/nnapi_gtest.skip.x86_64-linux b/tests/nnapi/nnapi_gtest.skip.x86_64-linux
deleted file mode 100644
index 2b6f6f74b..000000000
--- a/tests/nnapi/nnapi_gtest.skip.x86_64-linux
+++ /dev/null
@@ -1,68 +0,0 @@
-#
-# Following tests will be skipped on x86_64-linux
-#
-# Not support operations
-GeneratedTests.abs_*
-GeneratedTests.add_broadcast_quant*
-GeneratedTests.add_quant*
-GeneratedTests.argmax*
-GeneratedTests.depth_to_space*
-GeneratedTests.dequantize
-GeneratedTests.embedding_lookup
-GeneratedTests.embedding_lookup_2d_nnfw
-GeneratedTests.embedding_lookup_4d_nnfw
-GeneratedTests.equal_ex*
-GeneratedTests.exp_*
-GeneratedTests.floor_
-GeneratedTests.fully_connected_hybrid*
-GeneratedTests.greater_equal_ex*
-GeneratedTests.hashtable_lookup*
-GeneratedTests.l2_normalization*
-GeneratedTests.l2_pool*
-GeneratedTests.local_response_norm*
-GeneratedTests.less_ex*
-GeneratedTests.logical_and_ex*
-GeneratedTests.logical_or_ex*
-GeneratedTests.logistic_quant*
-GeneratedTests.lsh_projection*
-GeneratedTests.lstm*
-GeneratedTests.mobilenet*
-GeneratedTests.mul_broadcast_quant*
-GeneratedTests.mul_quant*
-GeneratedTests.neg*
-GeneratedTests.notequal*
-GeneratedTests.prelu_ex*
-GeneratedTests.reduce_min*
-GeneratedTests.relu1*
-GeneratedTests.relu6*
-GeneratedTests.relu*
-GeneratedTests.resize_bilinear*
-GeneratedTests.rnn*
-GeneratedTests.rsqrt*
-GeneratedTests.mean*
-GeneratedTests.pad_quant8_nnfw
-GeneratedTests.space_to_depth*
-GeneratedTests.sqrt*
-GeneratedTests.squared_difference_ex*
-GeneratedTests.svdf*
-GeneratedTests.tanh_
-GeneratedTests.batch_to_space*
-GeneratedTests.div_*
-GeneratedTests.space_to_batch*
-GeneratedTests.strided_slice*
-GeneratedTests.sub_quant*
-GeneratedTests.transpose*
-GeneratedTests.cast_*
-GeneratedTests.strided_slice_ex*
-GeneratedTests.reduce_max*
-GeneratedTests.reduce_sum_ex*
-GeneratedTests.topk_v2*
-# Unexpected result
-GeneratedTests.split*
-GeneratedTests.transpose_conv*
-GeneratedTests.pack*
-GeneratedTests.unpack*
-GeneratedTests.logical_not_ex*
-# Different nnapi specs from acl
-GeneratedTests.*weights_as_inputs
-GeneratedTests.*float16*
diff --git a/tests/nnapi/nnapi_gtest.skip.x86_64-linux.cpu b/tests/nnapi/nnapi_gtest.skip.x86_64-linux.cpu
new file mode 100644
index 000000000..462cb9dc0
--- /dev/null
+++ b/tests/nnapi/nnapi_gtest.skip.x86_64-linux.cpu
@@ -0,0 +1,74 @@
+GeneratedTests.*float16*
+GeneratedTests.*weights_as_inputs
+GeneratedTests.*zero_sized*
+GeneratedTests.abs_*
+GeneratedTests.add_broadcast_quant*
+GeneratedTests.add_quant*
+GeneratedTests.argmax*
+GeneratedTests.batch_to_space*
+GeneratedTests.cast_*
+GeneratedTests.depth_to_space*
+GeneratedTests.dequantize
+GeneratedTests.div_*
+GeneratedTests.embedding_lookup
+GeneratedTests.embedding_lookup_2d_nnfw
+GeneratedTests.embedding_lookup_4d_nnfw
+GeneratedTests.equal_broadcast_float_nnfw*
+GeneratedTests.equal_broadcast_quant*
+GeneratedTests.equal_quant*
+GeneratedTests.exp_*
+GeneratedTests.floor_
+GeneratedTests.greater_equal_quant*
+GeneratedTests.hashtable_lookup*
+GeneratedTests.l2_normalization*
+GeneratedTests.l2_pool*
+GeneratedTests.less_equal_quant*
+GeneratedTests.less_quant*
+GeneratedTests.local_response_norm*
+GeneratedTests.logical_*
+GeneratedTests.logistic_quant*
+GeneratedTests.lsh_projection*
+GeneratedTests.lstm*
+GeneratedTests.maximum_broadcast_quant*
+GeneratedTests.maximum_overflow
+GeneratedTests.maximum_simple_quant*
+GeneratedTests.mean*
+GeneratedTests.minimum_broadcast_quant*
+GeneratedTests.minimum_overflow
+GeneratedTests.minimum_simple_quant*
+GeneratedTests.mobilenet*
+GeneratedTests.mul_broadcast_quant*
+GeneratedTests.mul_quant*
+GeneratedTests.neg*
+GeneratedTests.not_equal_quant*
+GeneratedTests.pack*
+GeneratedTests.pad_quant8_nnfw
+GeneratedTests.prelu*
+GeneratedTests.reduce_max_*int*
+GeneratedTests.reduce_max_quant8*
+GeneratedTests.reduce_min_int8*
+GeneratedTests.reduce_min_quant8*
+GeneratedTests.relu*
+GeneratedTests.relu1*
+GeneratedTests.relu6*
+GeneratedTests.resize_bilinear*
+GeneratedTests.rnn*
+GeneratedTests.rsqrt*
+GeneratedTests.slice*
+GeneratedTests.space_to_batch*
+GeneratedTests.space_to_depth*
+GeneratedTests.split_*nt*
+GeneratedTests.sqrt*
+GeneratedTests.squared_difference_ex*
+GeneratedTests.strided_slice*
+GeneratedTests.strided_slice_ex*
+GeneratedTests.sub_quant*
+GeneratedTests.sub_v1_2_broadcast_quant8
+GeneratedTests.sub_v1_2_quant8
+GeneratedTests.svdf*
+GeneratedTests.tanh_v1_2*
+GeneratedTests.topk_v2*
+GeneratedTests.transpose_conv*
+GeneratedTests.transpose_quant*
+GeneratedTests.transpose_v1_2*
+GeneratedTests.unpack_ex_*_int_*
diff --git a/tests/nnapi/nnapi_test_generator/android-10/README.md b/tests/nnapi/nnapi_test_generator/android-10/README.md
index 421c5f5e6..3de818b69 100644
--- a/tests/nnapi/nnapi_test_generator/android-10/README.md
+++ b/tests/nnapi/nnapi_test_generator/android-10/README.md
@@ -2,9 +2,9 @@
Original code is at https://android.googlesource.com/platform/frameworks/ml/+/refs/tags/android-10.0.0_r2/nn/tools/test_generator/
-### Fix for neurun
+### Fix for onert
-- Update path in this `README.md` file for neurun NNAPI frontend test
+- Update path in this `README.md` file for onert NNAPI frontend test
- `nn/runtime/test/specs/` => `tests/nnapi/specs/`
- $ANDROID_BUILD_TOP/frameworks/ml/nn/runtime/test/specs => $NNAS_PROJECT_PATH/tests/nnapi/specs
- Rebuild with mm afterwards => Rebuild afterwards (mm is not supported)
@@ -16,9 +16,7 @@ Original code is at https://android.googlesource.com/platform/frameworks/ml/+/re
- path for regular expression:
`((frameworks/ml/nn/(runtime/test/)?)|(vendor/google/[a-z]*/test/))` => `(tests/nnapi/src/)`
- Support EX operation
- - Fix c++14 feature: change to c++11 constructor
- Comment out `TEST_AVAILABLE_SINCE()` macro generation
- - Comment out unsupported `FLOAT16` on armv7 32bit architecture
- Update `test_generator.py`
- Comment out dynamic shape output test generation
diff --git a/tests/nnapi/nnapi_test_generator/android-10/cts_generator.py b/tests/nnapi/nnapi_test_generator/android-10/cts_generator.py
index eb18c5627..de55e39ba 100755
--- a/tests/nnapi/nnapi_test_generator/android-10/cts_generator.py
+++ b/tests/nnapi/nnapi_test_generator/android-10/cts_generator.py
@@ -106,7 +106,7 @@ namespace {spec_name} {{
#include "{model_file}"
}} // namespace {spec_name}\n"""
# This regex is to remove prefix and get relative path for #include
- # Fix for neurun: update path
+ # Fix for onert: update path
pathRegex = r".*(tests/nnapi/src/)"
specFileBase = os.path.basename(tg.FileNames.specFile)
print(fileHeader.format(spec_file=specFileBase), file=model_fd)
@@ -162,7 +162,7 @@ def DumpCtsModel(model, model_fd):
p.initializer, p.type.GetCppTypeString(), p.type.GetNumberOfElements())
IndentedPrint(paramDef, file=model_fd)
for op in model.operations:
- # Fix for neurun: EX operation
+ # Fix for onert: EX operation
if re.search('_EX$', op.optype):
IndentedPrint("model->addOperationEx(ANEURALNETWORKS_%s, {%s}, {%s});"%(
op.optype, tg.GetJointStr(op.ins), tg.GetJointStr(op.outs)), file=model_fd)
@@ -210,30 +210,28 @@ def DumpMixedType(operands, feedDict):
except KeyError as e:
traceback.print_exc()
sys.exit("Cannot dump tensor of type {}".format(operand.type.type))
- # NFix for neurun: fix designated initializer (not supported on c++11)
- # comment out FLOAT16 type
mixedTypeTemplate = """\
{{ // See tools/test_generator/include/TestHarness.h:MixedTyped
// int -> Dimensions map
- {{{dimensions_map}}},
+ .operandDimensions = {{{dimensions_map}}},
// int -> FLOAT32 map
- {{{float32_map}}},
+ .float32Operands = {{{float32_map}}},
// int -> INT32 map
- {{{int32_map}}},
+ .int32Operands = {{{int32_map}}},
// int -> QUANT8_ASYMM map
- {{{uint8_map}}},
+ .quant8AsymmOperands = {{{uint8_map}}},
// int -> QUANT16_SYMM map
- {{{int16_map}}},
+ .quant16SymmOperands = {{{int16_map}}},
// int -> FLOAT16 map
- //{{{float16_map}}},
+ .float16Operands = {{{float16_map}}},
// int -> BOOL8 map
- {{{bool8_map}}},
+ .bool8Operands = {{{bool8_map}}},
// int -> QUANT8_SYMM_PER_CHANNEL map
- {{{int8_map}}},
+ .quant8ChannelOperands = {{{int8_map}}},
// int -> QUANT16_ASYMM map
- {{{uint16_map}}},
+ .quant16AsymmOperands = {{{uint16_map}}},
// int -> QUANT8_SYMM map
- {{{quant8_symm_map}}},
+ .quant8SymmOperands = {{{quant8_symm_map}}},
}}"""
return mixedTypeTemplate.format(
dimensions_map=tg.GetJointStr(typedMap.get("DIMENSIONS", [])),
@@ -254,20 +252,16 @@ def DumpCtsExample(example, example_fd):
print("std::vector<MixedTypedExample>& get_%s() {" % (example.examplesName), file=example_fd)
print("static std::vector<MixedTypedExample> %s = {" % (example.examplesName), file=example_fd)
for inputFeedDict, outputFeedDict in example.feedDicts:
- # Fix designated initializer (c++11 don't support yet)
print ('// Begin of an example', file = example_fd)
- print ('{\n {', file = example_fd)
+ print ('{\n.operands = {', file = example_fd)
inputs = DumpMixedType(example.model.GetInputs(), inputFeedDict)
outputs = DumpMixedType(example.model.GetOutputs(), outputFeedDict)
print ('//Input(s)\n%s,' % inputs , file = example_fd)
print ('//Output(s)\n%s' % outputs, file = example_fd)
print ('},', file = example_fd)
- # Fix designated initializer (c++11 don't support yet)
if example.expectedMultinomialDistributionTolerance is not None:
- print ('%f' %
+ print ('.expectedMultinomialDistributionTolerance = %f' %
example.expectedMultinomialDistributionTolerance, file = example_fd)
- else:
- print ('0.0', file = example_fd)
print ('}, // End of an example', file = example_fd)
print("};", file=example_fd)
print("return %s;" % (example.examplesName), file=example_fd)
@@ -280,7 +274,7 @@ TEST_F({test_case_name}, {test_name}) {{
execute({namespace}::{create_model_name},
{namespace}::{is_ignored_name},
{namespace}::get_{examples_name}(){log_file});\n}}\n"""
- # Fix for neurun: Remove version check
+ # Fix for onert: Remove version check
#if example.model.version is not None:
#testTemplate += """\
#TEST_AVAILABLE_SINCE({version}, {test_name}, {namespace}::{create_model_name})\n"""
diff --git a/tests/nnapi/nnapi_test_generator/android-10/test_generator.py b/tests/nnapi/nnapi_test_generator/android-10/test_generator.py
index 44bfd801d..b4b07cbda 100755
--- a/tests/nnapi/nnapi_test_generator/android-10/test_generator.py
+++ b/tests/nnapi/nnapi_test_generator/android-10/test_generator.py
@@ -989,7 +989,7 @@ class Example:
))
else:
assert False
- # Fix for neurun: disable dynamic shape test generation
+ # Fix for onert: disable dynamic shape test generation
#if Configuration.test_dynamic_output_shape:
#self.variations = [[DefaultVariation(), DynamicOutputShapeConverter()]]
#else:
@@ -1026,7 +1026,18 @@ class Example:
def AddVariations(self, *args, includeDefault=True, defaultName=None):
self.variations.append([DefaultVariation(defaultName)] if includeDefault else [])
- self.variations[-1].extend(ImplicitVariation.ImplicitConvertion(i) for i in args)
+ # NNFW Fix: remove float16 type variation test generation
+ variations = []
+ for i in args:
+ variation = ImplicitVariation.ImplicitConvertion(i)
+ print(i, file=sys.stderr)
+ if not isinstance(i, ModelVariation) and type(i) is str:
+ if i == "float16":
+ continue
+ else:
+ variations.append(variation)
+ self.variations[-1].extend(variations)
+ #self.variations[-1].extend(ImplicitVariation.ImplicitConvertion(i) for i in args)
return self
def AddNchw(self, *args, includeDefault=True, defaultName="nhwc"):
diff --git a/tests/nnapi/specs/Ex/argmax_ex_float_1.mod.py b/tests/nnapi/specs/Ex/argmax_ex_float_1.mod.py
deleted file mode 100644
index dc29fb358..000000000
--- a/tests/nnapi/specs/Ex/argmax_ex_float_1.mod.py
+++ /dev/null
@@ -1,18 +0,0 @@
-model = Model()
-i1 = Input("input", "TENSOR_FLOAT32", "{1, 2, 2, 1}")
-axis = Parameter("axis", "TENSOR_INT32", "{1}", [1])
-output = Output("output", "TENSOR_INT32", "{1, 2, 1}")
-
-model = model.Operation("ARGMAX_EX", i1, axis).To(output)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [1.0, 4.0,
- 2.0, 3.0]}
-
-output0 = {output: # output 0
- [1,
- 0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/argmax_ex_float_2.mod.py b/tests/nnapi/specs/Ex/argmax_ex_float_2.mod.py
deleted file mode 100644
index 8b6ed565f..000000000
--- a/tests/nnapi/specs/Ex/argmax_ex_float_2.mod.py
+++ /dev/null
@@ -1,18 +0,0 @@
-model = Model()
-i1 = Input("input", "TENSOR_FLOAT32", "{1, 2, 2, 2}")
-axis = Parameter("axis", "TENSOR_INT32", "{1}", [3])
-output = Output("output", "TENSOR_INT32", "{1, 2, 2}")
-
-model = model.Operation("ARGMAX_EX", i1, axis).To(output)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [1.0, 2.0, 7.0, 8.0,
- 1.0, 9.0, 7.0, 3.0]}
-
-output0 = {output: # output 0
- [1,1,
- 1,0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/argmax_ex_int32.mod.py b/tests/nnapi/specs/Ex/argmax_ex_int32.mod.py
deleted file mode 100644
index b5d01dd85..000000000
--- a/tests/nnapi/specs/Ex/argmax_ex_int32.mod.py
+++ /dev/null
@@ -1,18 +0,0 @@
-model = Model()
-i1 = Input("input", "TENSOR_INT32", "{1, 2, 2, 1}")
-axis = Parameter("axis", "TENSOR_INT32", "{1}", [1])
-output = Output("output", "TENSOR_INT32", "{1, 2, 1}")
-
-model = model.Operation("ARGMAX_EX", i1, axis).To(output)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [1, 4,
- 2, 3]}
-
-output0 = {output: # output 0
- [1,
- 0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/argmax_ex_neg_axis_float.mod.py b/tests/nnapi/specs/Ex/argmax_ex_neg_axis_float.mod.py
deleted file mode 100644
index 977cac061..000000000
--- a/tests/nnapi/specs/Ex/argmax_ex_neg_axis_float.mod.py
+++ /dev/null
@@ -1,17 +0,0 @@
-model = Model()
-i1 = Input("input", "TENSOR_FLOAT32", "{1, 2, 4, 1}")
-axis = Parameter("axis", "TENSOR_INT32", "{1}", [-3])
-output = Output("output", "TENSOR_INT32", "{1, 4, 1}")
-
-model = model.Operation("ARGMAX_EX", i1, axis).To(output)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [1.0, 2.0, 7.0, 8.0,
- 1.0, 9.0, 7.0, 3.0]}
-
-output0 = {output: # output 0
- [0, 1, 0, 0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/argmax_ex_neg_axis_int32.mod.py b/tests/nnapi/specs/Ex/argmax_ex_neg_axis_int32.mod.py
deleted file mode 100644
index 9f448e08d..000000000
--- a/tests/nnapi/specs/Ex/argmax_ex_neg_axis_int32.mod.py
+++ /dev/null
@@ -1,17 +0,0 @@
-model = Model()
-i1 = Input("input", "TENSOR_INT32", "{1, 2, 4, 1}")
-axis = Parameter("axis", "TENSOR_INT32", "{1}", [-3])
-output = Output("output", "TENSOR_INT32", "{1, 4, 1}")
-
-model = model.Operation("ARGMAX_EX", i1, axis).To(output)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [1, 2, 7, 8,
- 1, 9, 7, 3]}
-
-output0 = {output: # output 0
- [0, 1, 0, 0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/argmax_ex_quant8.mod.py b/tests/nnapi/specs/Ex/argmax_ex_quant8.mod.py
deleted file mode 100644
index c3131ef91..000000000
--- a/tests/nnapi/specs/Ex/argmax_ex_quant8.mod.py
+++ /dev/null
@@ -1,18 +0,0 @@
-model = Model()
-i1 = Input("input", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.5f, 2")
-axis = Parameter("axis", "TENSOR_INT32", "{1}", [1])
-output = Output("output", "TENSOR_INT32", "{1, 2, 1}")
-
-model = model.Operation("ARGMAX_EX", i1, axis).To(output)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [1, 4,
- 2, 3]}
-
-output0 = {output: # output 0
- [1,
- 0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/argmax_ex_quant8_neg_axis.mod.py b/tests/nnapi/specs/Ex/argmax_ex_quant8_neg_axis.mod.py
deleted file mode 100644
index 9146d8f14..000000000
--- a/tests/nnapi/specs/Ex/argmax_ex_quant8_neg_axis.mod.py
+++ /dev/null
@@ -1,17 +0,0 @@
-model = Model()
-i1 = Input("input", "TENSOR_QUANT8_ASYMM", "{1, 2, 4, 1}, 0.5f, 5")
-axis = Parameter("axis", "TENSOR_INT32", "{1}", [-3])
-output = Output("output", "TENSOR_INT32", "{1, 4, 1}")
-
-model = model.Operation("ARGMAX_EX", i1, axis).To(output)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [1, 2, 7, 8,
- 1, 9, 7, 3]}
-
-output0 = {output: # output 0
- [0, 1, 0, 0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/equal_ex_1D_float.mod.py b/tests/nnapi/specs/Ex/equal_ex_1D_float.mod.py
deleted file mode 100644
index f2ec16789..000000000
--- a/tests/nnapi/specs/Ex/equal_ex_1D_float.mod.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_FLOAT32", "{3}") # a vector of input
-i2 = Input("op2", "TENSOR_FLOAT32", "{3}") # a vector of input
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{3}, 1.f, 0") # a vector of output
-model = model.Operation("EQUAL_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [2.0, 3.254232, 5.1232],
- i2: # input 1
- [2.0, 3.254111, 5.1232]}
-
-output0 = {i3: # output 0
- [255, 0, 255]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/equal_ex_4D_float.mod.py b/tests/nnapi/specs/Ex/equal_ex_4D_float.mod.py
deleted file mode 100644
index f804a5066..000000000
--- a/tests/nnapi/specs/Ex/equal_ex_4D_float.mod.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_FLOAT32", "{1, 2, 2, 1}") # a vector of input
-i2 = Input("op2", "TENSOR_FLOAT32", "{1, 2, 2, 1}") # a vector of input
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 1.f, 0") # a vector of output
-model = model.Operation("EQUAL_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [0, 1543.25454532, 5.1232, 10.1],
- i2: # input 1
- [0, 5313.25414521, 5.1, 10.1]}
-
-output0 = {i3: # output 0
- [255, 0, 0, 255]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/equal_ex_broadcast_4D_2D_float.mod.py b/tests/nnapi/specs/Ex/equal_ex_broadcast_4D_2D_float.mod.py
deleted file mode 100644
index 5e31698a9..000000000
--- a/tests/nnapi/specs/Ex/equal_ex_broadcast_4D_2D_float.mod.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_FLOAT32", "{3, 2, 2, 2}")
-i2 = Input("op2", "TENSOR_FLOAT32", "{2, 2}")
-
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{3, 2, 2, 2}, 1.f, 0")
-model = model.Operation("EQUAL_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [4.89, 11.0, 9.75, 10.20,
- 8.25, 2.0, 1.15, 0.0,
- 3.0, 1.0, 8.25, 6.0,
- 8.45, 3.0, 8.25, 1.2,
- 0.0, 3.0, 2.0, 7.34,
- 4.3, 9.56, 11.0, 3.0],
- i2: # input 1
- [8.25, 3.0, 2.0, 10.20]}
-
-output0 = {i3: # output 0
- [0, 0, 0, 255,
- 255, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 255, 0, 0,
- 0, 255, 255, 0,
- 0, 0, 0, 0]
- }
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/equal_ex_quant8.mod.py b/tests/nnapi/specs/Ex/equal_ex_quant8.mod.py
deleted file mode 100644
index a799c6f42..000000000
--- a/tests/nnapi/specs/Ex/equal_ex_quant8.mod.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{3}, 1.f, 0") # a vector of input
-i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{3}, 1.f, 0") # a vector of input
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{3}, 1.f, 0") # a vector of output
-model = model.Operation("EQUAL_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [2, 3, 0],
- i2: # input 1
- [2, 9, 0]}
-
-output0 = {i3: # output 0
- [255, 0, 255]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/fully_connected_float_2_weights_as_inputs.mod.py b/tests/nnapi/specs/Ex/fully_connected_float_2_weights_as_inputs.mod.py
index 6f13a213b..6f13a213b 100644..100755
--- a/tests/nnapi/specs/Ex/fully_connected_float_2_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/Ex/fully_connected_float_2_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/Ex/greater_equal_ex.mod.py b/tests/nnapi/specs/Ex/greater_equal_ex.mod.py
deleted file mode 100644
index 7c62d568b..000000000
--- a/tests/nnapi/specs/Ex/greater_equal_ex.mod.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#
-# Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
-# Copyright (C) 2018 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_FLOAT32", "{2, 1}")
-i2 = Input("op2", "TENSOR_FLOAT32", "{2}")
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{2, 2}, 1.0, 0")
-model = model.Operation("GREATER_EQUAL_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [5, 10],
- i2: # input 1
- [10, 5]}
-
-output0 = {i3: # output 0
- [0, 255, 255, 255]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/less_ex.mod.py b/tests/nnapi/specs/Ex/less_ex.mod.py
deleted file mode 100644
index 3ae15b62f..000000000
--- a/tests/nnapi/specs/Ex/less_ex.mod.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#
-# Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
-# Copyright (C) 2018 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_FLOAT32", "{2, 1}")
-i2 = Input("op2", "TENSOR_FLOAT32", "{2}")
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{2, 2}, 1.0, 0")
-model = model.Operation("LESS_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [5, 10],
- i2: # input 1
- [10, 5]}
-
-output0 = {i3: # output 0
- [255, 0, 0, 0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/logical_and_ex_1D.mod.py b/tests/nnapi/specs/Ex/logical_and_ex_1D.mod.py
deleted file mode 100644
index 0f474f94e..000000000
--- a/tests/nnapi/specs/Ex/logical_and_ex_1D.mod.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{4}, 1.f, 0")
-i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{4}, 1.f, 0")
-
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{4}, 1.f, 0")
-model = model.Operation("LOGICAL_AND_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [0, 0, 255, 255],
- i2: # input 255
- [0, 255, 0, 255]}
-
-output0 = {i3: # output 0
- [0, 0, 0, 255]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/logical_and_ex_2D.mod.py b/tests/nnapi/specs/Ex/logical_and_ex_2D.mod.py
deleted file mode 100644
index eea915d69..000000000
--- a/tests/nnapi/specs/Ex/logical_and_ex_2D.mod.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{2, 2}, 1.f, 0")
-i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{2, 2}, 1.f, 0")
-
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{2, 2}, 1.f, 0")
-model = model.Operation("LOGICAL_AND_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [0, 0, 255, 255],
- i2: # input 1
- [0, 255, 0, 255]}
-
-output0 = {i3: # output 0
- [0, 0, 0, 255]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/logical_and_ex_3D.mod.py b/tests/nnapi/specs/Ex/logical_and_ex_3D.mod.py
deleted file mode 100644
index dccc4f0de..000000000
--- a/tests/nnapi/specs/Ex/logical_and_ex_3D.mod.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{2, 2, 2}, 1.f, 0")
-i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{2, 2, 2}, 1.f, 0")
-
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{2, 2, 2}, 1.f, 0")
-model = model.Operation("LOGICAL_AND_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [0, 0, 255, 255, 0, 255, 0, 255],
- i2: # input 1
- [0, 255, 0, 255, 0, 0, 255, 255]}
-
-output0 = {i3: # output 0
- [0, 0, 0, 255, 0, 0, 0, 255]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/logical_and_ex_4D.mod.py b/tests/nnapi/specs/Ex/logical_and_ex_4D.mod.py
deleted file mode 100644
index dceb275c7..000000000
--- a/tests/nnapi/specs/Ex/logical_and_ex_4D.mod.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{2, 1, 2, 2}, 1.f, 0")
-i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{2, 1, 2, 2}, 1.f, 0")
-
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{2, 1, 2, 2}, 1.f, 0")
-model = model.Operation("LOGICAL_AND_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [0, 0, 255, 255, 0, 255, 0, 255],
- i2: # input 1
- [0, 255, 0, 255, 0, 0, 255, 255]}
-
-output0 = {i3: # output 0
- [0, 0, 0, 255, 0, 0, 0, 255]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/logical_and_ex_broadcast.mod.py b/tests/nnapi/specs/Ex/logical_and_ex_broadcast.mod.py
deleted file mode 100644
index 45d899235..000000000
--- a/tests/nnapi/specs/Ex/logical_and_ex_broadcast.mod.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 1.f, 0")
-i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{1, 1, 2, 1}, 1.f, 0")
-
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 1.f, 0")
-model = model.Operation("LOGICAL_AND_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [0, 0, 255, 255],
- i2: # input 1
- [0, 255]}
-
-output0 = {i3: # output 0
- [0, 0, 0, 255]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/logical_and_ex_broadcast_4D_2D.mod.py b/tests/nnapi/specs/Ex/logical_and_ex_broadcast_4D_2D.mod.py
deleted file mode 100644
index 9235f7fdb..000000000
--- a/tests/nnapi/specs/Ex/logical_and_ex_broadcast_4D_2D.mod.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{2, 2, 2, 2}, 1.f, 0")
-i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{2, 2}, 1.f, 0")
-
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{2, 2, 2, 2}, 1.f, 0")
-model = model.Operation("LOGICAL_AND_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [0, 0, 255, 255,
- 255, 255, 0, 0,
- 0, 0, 0, 0,
- 255, 255, 255, 255],
- i2: # input 1
- [0, 255, 0, 255]}
-
-output0 = {i3: # output 0
- [0, 0, 0, 255,
- 0, 255, 0, 0,
- 0, 0, 0, 0,
- 0, 255, 0, 255]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/logical_not_ex_1D.mod.py b/tests/nnapi/specs/Ex/logical_not_ex_1D.mod.py
deleted file mode 100644
index a913b1997..000000000
--- a/tests/nnapi/specs/Ex/logical_not_ex_1D.mod.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# model
-model = Model()
-i = Input("op1", "TENSOR_QUANT8_ASYMM", "{4}, 1.f, 0")
-
-o = Output("op2", "TENSOR_QUANT8_ASYMM", "{4}, 1.f, 0")
-model = model.Operation("LOGICAL_NOT_EX", i).To(o)
-
-# Example 1. Input
-input0 = {i: # input
- [255, 0, 255, 255]}
-
-output0 = {o: # output
- [0, 255, 0, 0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/logical_not_ex_4D.mod.py b/tests/nnapi/specs/Ex/logical_not_ex_4D.mod.py
deleted file mode 100644
index 49306fc51..000000000
--- a/tests/nnapi/specs/Ex/logical_not_ex_4D.mod.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# model
-model = Model()
-i = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 1.f, 0") # a vector of input
-
-o = Output("op2", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 1.f, 0") # a vector of output
-model = model.Operation("LOGICAL_NOT_EX", i).To(o)
-
-# Example 1. Input
-input0 = {i: # input
- [0, 255, 255, 255]}
-
-output0 = {o: # output
- [255, 0, 0, 0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/logical_or_ex_1D.mod.py b/tests/nnapi/specs/Ex/logical_or_ex_1D.mod.py
deleted file mode 100644
index 48e6d8ec3..000000000
--- a/tests/nnapi/specs/Ex/logical_or_ex_1D.mod.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{4}, 1.f, 0")
-i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{4}, 1.f, 0")
-
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{4}, 1.f, 0")
-model = model.Operation("LOGICAL_OR_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [0, 0, 255, 255],
- i2: # input 255
- [0, 255, 0, 255]}
-
-output0 = {i3: # output 0
- [0, 255, 255, 255]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/logical_or_ex_2D.mod.py b/tests/nnapi/specs/Ex/logical_or_ex_2D.mod.py
deleted file mode 100644
index 5a6517170..000000000
--- a/tests/nnapi/specs/Ex/logical_or_ex_2D.mod.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{2, 2}, 1.f, 0")
-i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{2, 2}, 1.f, 0")
-
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{2, 2}, 1.f, 0")
-model = model.Operation("LOGICAL_OR_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [0, 0, 255, 255],
- i2: # input 1
- [0, 255, 0, 255]}
-
-output0 = {i3: # output 0
- [0, 255, 255, 255]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/logical_or_ex_3D.mod.py b/tests/nnapi/specs/Ex/logical_or_ex_3D.mod.py
deleted file mode 100644
index 06fbbe65d..000000000
--- a/tests/nnapi/specs/Ex/logical_or_ex_3D.mod.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{2, 2, 2}, 1.f, 0")
-i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{2, 2, 2}, 1.f, 0")
-
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{2, 2, 2}, 1.f, 0")
-model = model.Operation("LOGICAL_OR_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [0, 0, 255, 255, 0, 255, 0, 255],
- i2: # input 1
- [0, 255, 0, 255, 0, 0, 255, 255]}
-
-output0 = {i3: # output 0
- [0, 255, 255, 255, 0, 255, 255, 255]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/logical_or_ex_4D.mod.py b/tests/nnapi/specs/Ex/logical_or_ex_4D.mod.py
deleted file mode 100644
index f6833c96e..000000000
--- a/tests/nnapi/specs/Ex/logical_or_ex_4D.mod.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{2, 1, 2, 2}, 1.f, 0")
-i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{2, 1, 2, 2}, 1.f, 0")
-
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{2, 1, 2, 2}, 1.f, 0")
-model = model.Operation("LOGICAL_OR_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [0, 0, 255, 255, 0, 255, 0, 255],
- i2: # input 1
- [0, 255, 0, 255, 0, 0, 255, 255]}
-
-output0 = {i3: # output 0
- [0, 255, 255, 255, 0, 255, 255, 255]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/logical_or_ex_broadcast.mod.py b/tests/nnapi/specs/Ex/logical_or_ex_broadcast.mod.py
deleted file mode 100644
index 89c8310bb..000000000
--- a/tests/nnapi/specs/Ex/logical_or_ex_broadcast.mod.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 1.f, 0")
-i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{1, 1, 2, 1}, 1.f, 0")
-
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 1.f, 0")
-model = model.Operation("LOGICAL_OR_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [0, 0, 255, 255],
- i2: # input 1
- [0, 255]}
-
-output0 = {i3: # output 0
- [0, 255, 255, 255]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/logical_or_ex_broadcast_4D_2D.mod.py b/tests/nnapi/specs/Ex/logical_or_ex_broadcast_4D_2D.mod.py
deleted file mode 100644
index a673494e5..000000000
--- a/tests/nnapi/specs/Ex/logical_or_ex_broadcast_4D_2D.mod.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{2, 2, 2, 2}, 1.f, 0")
-i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{2, 2}, 1.f, 0")
-
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{2, 2, 2, 2}, 1.f, 0")
-model = model.Operation("LOGICAL_OR_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [0, 0, 255, 255,
- 255, 255, 0, 0,
- 0, 0, 0, 0,
- 255, 255, 255, 255],
- i2: # input 1
- [0, 255, 0, 255]}
-
-output0 = {i3: # output 0
- [0, 255, 255, 255,
- 255, 255, 0, 255,
- 0, 255, 0, 255,
- 255, 255, 255, 255]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/notequal_ex_broadcast_4D_2D_float.mod.py b/tests/nnapi/specs/Ex/notequal_ex_broadcast_4D_2D_float.mod.py
deleted file mode 100644
index ac900511c..000000000
--- a/tests/nnapi/specs/Ex/notequal_ex_broadcast_4D_2D_float.mod.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_FLOAT32", "{3, 2, 2, 2}")
-i2 = Input("op2", "TENSOR_FLOAT32", "{2, 2}")
-
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{3, 2, 2, 2}, 1.f, 0")
-model = model.Operation("NOT_EQUAL_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [4.25, 11.0, 2.2, 10.3,
- 8.5, 2.1, 1.0, 0.5,
- 3.1, 1.0, 8.5, 6.5,
- 11.2, 3.0, 8.5, 1.0,
- 0.3, 3.0, 2.1, 7.5,
- 4.3, 9.2, 11.1, 3.0],
- i2: # input 1
- [8.5, 3.0, 2.1, 10.3]}
-
-output0 = {i3: # output 0
- [255, 255, 255, 0,
- 0, 255, 255, 255,
- 255, 255, 255, 255,
- 255, 0, 255, 255,
- 255, 0, 0, 255,
- 255, 255, 255, 255]
- }
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/notequal_ex_broadcast_float.mod.py b/tests/nnapi/specs/Ex/notequal_ex_broadcast_float.mod.py
deleted file mode 100644
index 69acc6dea..000000000
--- a/tests/nnapi/specs/Ex/notequal_ex_broadcast_float.mod.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_FLOAT32", "{2, 2}")
-i2 = Input("op2", "TENSOR_FLOAT32", "{1, 2}")
-
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{2, 2}, 1.f, 0")
-model = model.Operation("NOT_EQUAL_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [0.2, 3.21, 2.4, 7.44],
- i2: # input 1
- [0.21, 7.44]}
-
-output0 = {i3: # output 0
- [255, 255, 255, 0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/notequal_ex_float.mod.py b/tests/nnapi/specs/Ex/notequal_ex_float.mod.py
deleted file mode 100644
index b3deef7a6..000000000
--- a/tests/nnapi/specs/Ex/notequal_ex_float.mod.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_FLOAT32", "{3}") # a vector of input
-i2 = Input("op2", "TENSOR_FLOAT32", "{3}") # a vector of input
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{3}, 1.f, 0") # a vector of output
-model = model.Operation("NOT_EQUAL_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [2.0, 3.254232, 5.1232],
- i2: # input 1
- [2.0, 3.254111, 5.1232]}
-
-output0 = {i3: # output 0
- [0, 255, 0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/notequal_ex_quant8.mod.py b/tests/nnapi/specs/Ex/notequal_ex_quant8.mod.py
deleted file mode 100644
index d5115a611..000000000
--- a/tests/nnapi/specs/Ex/notequal_ex_quant8.mod.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{3}, 1.f, 0") # a vector of input
-i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{3}, 1.f, 0") # a vector of input
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{3}, 1.f, 0") # a vector of output
-model = model.Operation("NOT_EQUAL_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [2, 3, 0],
- i2: # input 1
- [2, 9, 0]}
-
-output0 = {i3: # output 0
- [0, 255, 0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/pack_ex_2D_float_1.mod.py b/tests/nnapi/specs/Ex/pack_ex_2D_float_1.mod.py
index 06f6e6a7b..06f6e6a7b 100644..100755
--- a/tests/nnapi/specs/Ex/pack_ex_2D_float_1.mod.py
+++ b/tests/nnapi/specs/Ex/pack_ex_2D_float_1.mod.py
diff --git a/tests/nnapi/specs/Ex/pack_ex_2D_float_2.mod.py b/tests/nnapi/specs/Ex/pack_ex_2D_float_2.mod.py
index acc8c891f..acc8c891f 100644..100755
--- a/tests/nnapi/specs/Ex/pack_ex_2D_float_2.mod.py
+++ b/tests/nnapi/specs/Ex/pack_ex_2D_float_2.mod.py
diff --git a/tests/nnapi/specs/Ex/pack_ex_2D_int_1.mod.py b/tests/nnapi/specs/Ex/pack_ex_2D_int_1.mod.py
index 0ffd6cf9d..0ffd6cf9d 100644..100755
--- a/tests/nnapi/specs/Ex/pack_ex_2D_int_1.mod.py
+++ b/tests/nnapi/specs/Ex/pack_ex_2D_int_1.mod.py
diff --git a/tests/nnapi/specs/Ex/pack_ex_2D_int_2.mod.py b/tests/nnapi/specs/Ex/pack_ex_2D_int_2.mod.py
index c9bdc419b..c9bdc419b 100644..100755
--- a/tests/nnapi/specs/Ex/pack_ex_2D_int_2.mod.py
+++ b/tests/nnapi/specs/Ex/pack_ex_2D_int_2.mod.py
diff --git a/tests/nnapi/specs/Ex/prelu_ex_broadcast_float_1.mod.py b/tests/nnapi/specs/Ex/prelu_ex_broadcast_float_1.mod.py
deleted file mode 100644
index 23e363f5c..000000000
--- a/tests/nnapi/specs/Ex/prelu_ex_broadcast_float_1.mod.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_FLOAT32", "{1, 2, 2, 3}") # a vector of input
-i2 = Input("op2", "TENSOR_FLOAT32", "{1, 1, 1, 3}") # a vector of alpha
-i3 = Output("op3", "TENSOR_FLOAT32", "{1, 2, 2, 3}") # a vector of output
-model = model.Operation("PRELU_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [0.0, 0.0, 0.0,
- 1.0, 1.0, 1.0,
- -1.0, -1.0, -1.0,
- -2.0, -2.0, -2.0],
- i2: # input 1
- [0.0, 1.0, 2.0]}
-
-output0 = {i3: # output 0
- [0.0, 0.0, 0.0,
- 1.0, 1.0, 1.0,
- 0.0, -1.0, -2.0,
- 0.0, -2.0, -4.0]}
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/prelu_ex_broadcast_quant8_1.mod.py b/tests/nnapi/specs/Ex/prelu_ex_broadcast_quant8_1.mod.py
deleted file mode 100644
index 5fc6884bc..000000000
--- a/tests/nnapi/specs/Ex/prelu_ex_broadcast_quant8_1.mod.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 3}, 1.0f, 2") # a vector of input
-i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{1, 1, 3}, 1.0f, 1") # a vector of alpha
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 3}, 0.5f, 3") # a vector of output
-model = model.Operation("PRELU_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [1, 1, 1,
- 2, 2, 2,
- 3, 3, 3,
- 1, 2, 3],
- i2: # input 1
- [0, 1, 2]}
-
-output0 = {i3: # output 0
- [5, 3, 1,
- 3, 3, 3,
- 5, 5, 5,
- 5, 3, 5]}
-# Instantiate an example
-Example((input0, output0))
-
diff --git a/tests/nnapi/specs/Ex/prelu_ex_float_1.mod.py b/tests/nnapi/specs/Ex/prelu_ex_float_1.mod.py
deleted file mode 100644
index d2ebd2a0b..000000000
--- a/tests/nnapi/specs/Ex/prelu_ex_float_1.mod.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_FLOAT32", "{1, 2, 2, 1}") # a vector of input
-i2 = Input("op2", "TENSOR_FLOAT32", "{1, 2, 2, 1}") # a vector of alpha
-i3 = Output("op3", "TENSOR_FLOAT32", "{1, 2, 2, 1}") # a vector of output
-model = model.Operation("PRELU_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [3.0, -2.0,
- -1.0, -2.0
- ],
- i2: # input 1
- [0.0, 1.0,
- 1.0, 2.0]}
-
-output0 = {i3: # output 0
- [3.0, -2.0,
- -1.0, -4.0
- ]}
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/prelu_ex_quant8_1.mod.py b/tests/nnapi/specs/Ex/prelu_ex_quant8_1.mod.py
deleted file mode 100644
index 9548f8255..000000000
--- a/tests/nnapi/specs/Ex/prelu_ex_quant8_1.mod.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.5f, 5") # a vector of input
-i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.5f, 1") # a vector of alpha
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.1f, 3") # a vector of output
-model = model.Operation("PRELU_EX", i1, i2).To(i3)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [3, 1,
- 7, 11
- ],
- i2: # input 1
- [0, 1,
- 2, 2]}
-
-output0 = {i3: # output 0
- [8, 3,
- 13, 33
- ]}
-# Instantiate an example
-Example((input0, output0))
-
diff --git a/tests/nnapi/specs/Ex/reduce_min_ex_float.mod.py b/tests/nnapi/specs/Ex/reduce_min_ex_float.mod.py
deleted file mode 100644
index f6b29f657..000000000
--- a/tests/nnapi/specs/Ex/reduce_min_ex_float.mod.py
+++ /dev/null
@@ -1,19 +0,0 @@
-model = Model()
-i1 = Input("input", "TENSOR_FLOAT32", "{1, 2, 2, 1}")
-axis = Parameter("axis", "TENSOR_INT32", "{1}", [2])
-keepDims = Int32Scalar("keepDims", 0)
-output = Output("output", "TENSOR_FLOAT32", "{1, 2, 1}")
-
-model = model.Operation("REDUCE_MIN_EX", i1, axis, keepDims).To(output)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [2.0, 1.0,
- 3.0, 4.0]}
-
-output0 = {output: # output 0
- [1.0,
- 3.0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/reduce_min_ex_float_1.mod.py b/tests/nnapi/specs/Ex/reduce_min_ex_float_1.mod.py
deleted file mode 100644
index a58e12dc3..000000000
--- a/tests/nnapi/specs/Ex/reduce_min_ex_float_1.mod.py
+++ /dev/null
@@ -1,18 +0,0 @@
-model = Model()
-i1 = Input("input", "TENSOR_FLOAT32", "{4, 3, 2}")
-axis = Parameter("axis", "TENSOR_INT32", "{4}", [1, 0, -3, -3])
-keepDims = Int32Scalar("keepDims", 0)
-output = Output("output", "TENSOR_FLOAT32", "{2}")
-
-model = model.Operation("REDUCE_MIN_EX", i1, axis, keepDims).To(output)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [23.0, 24.0, 13.0, 22.0, 5.0, 18.0, 7.0, 8.0, 9.0, 15.0, 11.0, 12.0,
- 3.0, 14.0, 10.0, 16.0, 17.0, 6.0, 19.0, 20.0, 21.0, 4.0, 1.0, 2.0]}
-
-output0 = {output: # output 0
- [1.0, 2.0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/reduce_min_ex_float_2.mod.py b/tests/nnapi/specs/Ex/reduce_min_ex_float_2.mod.py
deleted file mode 100644
index b817ed208..000000000
--- a/tests/nnapi/specs/Ex/reduce_min_ex_float_2.mod.py
+++ /dev/null
@@ -1,18 +0,0 @@
-model = Model()
-i1 = Input("input", "TENSOR_FLOAT32", "{4, 3, 2}")
-axis = Parameter("axis", "TENSOR_INT32", "{2}", [0, 2])
-keepDims = Int32Scalar("keepDims", 1)
-output = Output("output", "TENSOR_FLOAT32", "{1, 3, 1}")
-
-model = model.Operation("REDUCE_MIN_EX", i1, axis, keepDims).To(output)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [20.0, 2.0, 22.0, 4.0, 24.0, 18.0, 7.0, 8.0, 19.0, 10.0, 14.0, 12.0,
- 13.0, 11.0, 15.0, 16.0, 17.0, 6.0, 9.0, 1.0, 21.0, 3.0, 23.0, 5.0]}
-
-output0 = {output: # output 0
- [1.0, 3.0, 5.0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/reduce_sum_ex_2D_float.mod.py b/tests/nnapi/specs/Ex/reduce_sum_ex_2D_float.mod.py
deleted file mode 100644
index 2bd3b50c3..000000000
--- a/tests/nnapi/specs/Ex/reduce_sum_ex_2D_float.mod.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# model
-model = Model()
-i1 = Input("input", "TENSOR_FLOAT32", "{3, 4}")
-axis = Int32Scalar("axis", 1)
-keepDims = Int32Scalar("keepDims", 0)
-out1 = Output("output", "TENSOR_FLOAT32", "{3}")
-model = model.Operation("REDUCE_SUM_EX", i1, axis, keepDims).To(out1)
-
-# Example 1. Input in operand 0, 1
-input0 = {i1: # input 0
- [3.2, 11.47, 3.8, 5.76,
- 28.2, 0.999, -1.3, -13.5,
- -3.4, -22.1, -2.2, -49.7]}
-
-output0 = {out1: # output 0
- [24.23, 14.399002, -77.4]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/reduce_sum_ex_4D_float.mod.py b/tests/nnapi/specs/Ex/reduce_sum_ex_4D_float.mod.py
deleted file mode 100644
index 6a8e6ad5f..000000000
--- a/tests/nnapi/specs/Ex/reduce_sum_ex_4D_float.mod.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# model
-model = Model()
-i1 = Input("input", "TENSOR_FLOAT32", "{1, 3, 4, 1}")
-axis = Int32Scalar("axis", 1)
-keepDims = Int32Scalar("keepDims", 0)
-out1 = Output("output", "TENSOR_FLOAT32", "{1, 4, 1}")
-model = model.Operation("REDUCE_SUM_EX", i1, axis, keepDims).To(out1)
-
-# Example 1. Input in operand 0, 1
-input0 = {i1: # input 0
- [6.4, 7.3, 19.3, -2.3,
- 8.3, 2.0, 11.8, -3.4,
- 22.8, 3.0, -28.7, 4.9]}
-
-output0 = {out1: # output 0
- [37.5, 12.3, 2.3999977, -0.7999997]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/reduce_sum_ex_4D_float_reducing_C.mod.py b/tests/nnapi/specs/Ex/reduce_sum_ex_4D_float_reducing_C.mod.py
deleted file mode 100644
index 5d751bd23..000000000
--- a/tests/nnapi/specs/Ex/reduce_sum_ex_4D_float_reducing_C.mod.py
+++ /dev/null
@@ -1,33 +0,0 @@
-batch = 2
-rows = 3
-cols = 4
-depth = 5
-
-input_table = [x for x in range(batch * rows * cols * depth)]
-
-output_table = [0 for x in range(batch * rows * cols)]
-for i in range(batch):
- for j in range(rows):
- for k in range(cols):
- for l in range(depth):
- # The value of output_table is the depthwise sum of input_table.
- output_table[i * rows * cols + j * cols + k] += input_table[i * rows * cols * depth + j * cols * depth + k * depth + l];
-
-model = Model()
-i1 = Input("input", "TENSOR_FLOAT32", "{%d, %d, %d, %d}" % (batch, rows, cols, depth))
-# Axis value should be in the range [-(rank), rank). And '-n' is the same axis with 'rank - n'. So '3' and '-1' are the same axis.
-axis = Parameter("axis", "TENSOR_INT32", "{2}", [3, -1])
-keepDims = Int32Scalar("keepDims", 0)
-output = Output("output", "TENSOR_FLOAT32", "{%d, %d, %d}" % (batch, rows, cols))
-
-model = model.Operation("REDUCE_SUM_EX", i1, axis, keepDims).To(output)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- input_table}
-
-output0 = {output: # output 0
- output_table}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/reduce_sum_ex_4D_float_reducing_HW.mod.py b/tests/nnapi/specs/Ex/reduce_sum_ex_4D_float_reducing_HW.mod.py
deleted file mode 100644
index 7861030dc..000000000
--- a/tests/nnapi/specs/Ex/reduce_sum_ex_4D_float_reducing_HW.mod.py
+++ /dev/null
@@ -1,33 +0,0 @@
-batch = 2
-rows = 3
-cols = 4
-depth = 5
-
-input_table = [x for x in range(batch * rows * cols * depth)]
-
-output_table = [0 for x in range(batch * depth)]
-for i in range(batch):
- for j in range(rows):
- for k in range(cols):
- for l in range(depth):
- # The value of output_table is the rowwise sum and colwise sum of input_table.
- output_table[i * depth + l] += input_table[i * rows * cols * depth + j * cols * depth + k * depth + l];
-
-model = Model()
-i1 = Input("input", "TENSOR_FLOAT32", "{%d, %d, %d, %d}" % (batch, rows, cols, depth))
-# Axis value should be in the range [-(rank), rank). And '-n' is the same axis with 'rank - n'. So this test's axis value are the same [1, 2].
-axis = Parameter("axis", "TENSOR_INT32", "{4}", [1, 2, -3, -2])
-keepDims = Int32Scalar("keepDims", 0)
-output = Output("output", "TENSOR_FLOAT32", "{%d, %d}" % (batch, depth))
-
-model = model.Operation("REDUCE_SUM_EX", i1, axis, keepDims).To(output)
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- input_table}
-
-output0 = {output: # output 0
- output_table}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_ex_1D_float.mod.py b/tests/nnapi/specs/Ex/split_ex_1D_float.mod.py
deleted file mode 100644
index 137ece828..000000000
--- a/tests/nnapi/specs/Ex/split_ex_1D_float.mod.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_FLOAT32", "{8}")
-axis = Int32Scalar("axis", 0)
-num_out = Int32Scalar("num_out", 8)
-i2 = Output("op2", "TENSOR_FLOAT32", "{1}")
-i3 = Output("op3", "TENSOR_FLOAT32", "{1}")
-i4 = Output("op4", "TENSOR_FLOAT32", "{1}")
-i5 = Output("op5", "TENSOR_FLOAT32", "{1}")
-i6 = Output("op6", "TENSOR_FLOAT32", "{1}")
-i7 = Output("op7", "TENSOR_FLOAT32", "{1}")
-i8 = Output("op8", "TENSOR_FLOAT32", "{1}")
-i9 = Output("op9", "TENSOR_FLOAT32", "{1}")
-
-model = model.Operation("SPLIT_EX", i1, axis, num_out).To([i2, i3, i4, i5, i6, i7, i8, i9])
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]}
-
-output0 = {
- i2: # output 0
- [1.0],
- i3: # output 1
- [2.0],
- i4: # output 2
- [3.0],
- i5: # output 3
- [4.0],
- i6: # output 4
- [5.0],
- i7: # output 5
- [6.0],
- i8: # output 6
- [7.0],
- i9: # output 7
- [8.0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_ex_1D_int32.mod.py b/tests/nnapi/specs/Ex/split_ex_1D_int32.mod.py
deleted file mode 100644
index 9bfc215f6..000000000
--- a/tests/nnapi/specs/Ex/split_ex_1D_int32.mod.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_INT32", "{8}")
-axis = Int32Scalar("axis", 0)
-num_out = Int32Scalar("num_out", 8)
-i2 = Output("op2", "TENSOR_INT32", "{1}")
-i3 = Output("op3", "TENSOR_INT32", "{1}")
-i4 = Output("op4", "TENSOR_INT32", "{1}")
-i5 = Output("op5", "TENSOR_INT32", "{1}")
-i6 = Output("op6", "TENSOR_INT32", "{1}")
-i7 = Output("op7", "TENSOR_INT32", "{1}")
-i8 = Output("op8", "TENSOR_INT32", "{1}")
-i9 = Output("op9", "TENSOR_INT32", "{1}")
-
-model = model.Operation("SPLIT_EX", i1, axis, num_out).To([i2, i3, i4, i5, i6, i7, i8, i9])
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [1, 2, 3, 4, 5, 6, 7, 8]}
-
-output0 = {
- i2: # output 0
- [1],
- i3: # output 1
- [2],
- i4: # output 2
- [3],
- i5: # output 3
- [4],
- i6: # output 4
- [5],
- i7: # output 5
- [6],
- i8: # output 6
- [7],
- i9: # output 7
- [8]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_ex_4D_float_1.mod.py b/tests/nnapi/specs/Ex/split_ex_4D_float_1.mod.py
deleted file mode 100644
index 2353a8a0f..000000000
--- a/tests/nnapi/specs/Ex/split_ex_4D_float_1.mod.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_FLOAT32", "{2,2,2,2}")
-axis = Int32Scalar("axis", 0)
-num_out = Int32Scalar("num_out", 2)
-i2 = Output("op2", "TENSOR_FLOAT32", "{1,2,2,2}")
-i3 = Output("op3", "TENSOR_FLOAT32", "{1,2,2,2}")
-model = model.Operation("SPLIT_EX", i1, axis, num_out).To([i2, i3])
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]}
-
-output0 = {
- i2: # output 0
- [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
- i3: # output 1
- [9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_ex_4D_float_2.mod.py b/tests/nnapi/specs/Ex/split_ex_4D_float_2.mod.py
deleted file mode 100644
index 30ecf2416..000000000
--- a/tests/nnapi/specs/Ex/split_ex_4D_float_2.mod.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_FLOAT32", "{2,2,2,2}")
-axis = Int32Scalar("axis", 3)
-num_out = Int32Scalar("num_out", 2)
-i2 = Output("op2", "TENSOR_FLOAT32", "{2,2,2,1}")
-i3 = Output("op3", "TENSOR_FLOAT32", "{2,2,2,1}")
-model = model.Operation("SPLIT_EX", i1, axis, num_out).To([i2, i3])
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]}
-
-output0 = {
- i2: # output 0
- [1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0],
- i3: # output 1
- [2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_ex_4D_float_3.mod.py b/tests/nnapi/specs/Ex/split_ex_4D_float_3.mod.py
deleted file mode 100644
index 1f7a07880..000000000
--- a/tests/nnapi/specs/Ex/split_ex_4D_float_3.mod.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_FLOAT32", "{2,2,2,2}")
-axis = Int32Scalar("axis", -4) # Negative axis
-num_out = Int32Scalar("num_out", 2)
-i2 = Output("op2", "TENSOR_FLOAT32", "{1,2,2,2}")
-i3 = Output("op3", "TENSOR_FLOAT32", "{1,2,2,2}")
-model = model.Operation("SPLIT_EX", i1, axis, num_out).To([i2, i3])
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]}
-
-output0 = {
- i2: # output 0
- [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
- i3: # output 1
- [9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_ex_4D_int32_1.mod.py b/tests/nnapi/specs/Ex/split_ex_4D_int32_1.mod.py
deleted file mode 100644
index b1c6692ee..000000000
--- a/tests/nnapi/specs/Ex/split_ex_4D_int32_1.mod.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_INT32", "{2,2,2,2}")
-axis = Int32Scalar("axis", 0)
-num_out = Int32Scalar("num_out", 2)
-i2 = Output("op2", "TENSOR_INT32", "{1,2,2,2}")
-i3 = Output("op3", "TENSOR_INT32", "{1,2,2,2}")
-model = model.Operation("SPLIT_EX", i1, axis, num_out).To([i2, i3])
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}
-
-output0 = {
- i2: # output 0
- [1, 2, 3, 4, 5, 6, 7, 8],
- i3: # output 1
- [9, 10, 11, 12, 13, 14, 15, 16]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_ex_4D_int32_2.mod.py b/tests/nnapi/specs/Ex/split_ex_4D_int32_2.mod.py
deleted file mode 100644
index 88a7cca48..000000000
--- a/tests/nnapi/specs/Ex/split_ex_4D_int32_2.mod.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_INT32", "{2,2,2,2}")
-axis = Int32Scalar("axis", 1)
-num_out = Int32Scalar("num_out", 2)
-i2 = Output("op2", "TENSOR_INT32", "{2,1,2,2}")
-i3 = Output("op3", "TENSOR_INT32", "{2,1,2,2}")
-model = model.Operation("SPLIT_EX", i1, axis, num_out).To([i2, i3])
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}
-
-output0 = {
- i2: # output 0
- [1, 2, 3, 4, 9, 10, 11, 12],
- i3: # output 1
- [5, 6, 7, 8, 13, 14, 15, 16]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_ex_4D_int32_3.mod.py b/tests/nnapi/specs/Ex/split_ex_4D_int32_3.mod.py
deleted file mode 100644
index 6f5cc621b..000000000
--- a/tests/nnapi/specs/Ex/split_ex_4D_int32_3.mod.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_INT32", "{2,2,2,2}")
-axis = Int32Scalar("axis", 2)
-num_out = Int32Scalar("num_out", 2)
-i2 = Output("op2", "TENSOR_INT32", "{2,2,1,2}")
-i3 = Output("op3", "TENSOR_INT32", "{2,2,1,2}")
-model = model.Operation("SPLIT_EX", i1, axis, num_out).To([i2, i3])
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}
-
-output0 = {
- i2: # output 0
- [1, 2, 5, 6, 9, 10, 13, 14],
- i3: # output 1
- [3, 4, 7, 8, 11, 12, 15, 16]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_ex_4D_int32_4.mod.py b/tests/nnapi/specs/Ex/split_ex_4D_int32_4.mod.py
deleted file mode 100644
index fb6710171..000000000
--- a/tests/nnapi/specs/Ex/split_ex_4D_int32_4.mod.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_INT32", "{2,2,2,2}")
-axis = Int32Scalar("axis", 3)
-num_out = Int32Scalar("num_out", 2)
-i2 = Output("op2", "TENSOR_INT32", "{2,2,2,1}")
-i3 = Output("op3", "TENSOR_INT32", "{2,2,2,1}")
-model = model.Operation("SPLIT_EX", i1, axis, num_out).To([i2, i3])
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}
-
-output0 = {
- i2: # output 0
- [1, 3, 5, 7, 9, 11, 13, 15],
- i3: # output 1
- [2, 4, 6, 8, 10, 12, 14, 16]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_ex_4D_int32_5.mod.py b/tests/nnapi/specs/Ex/split_ex_4D_int32_5.mod.py
deleted file mode 100644
index 338794855..000000000
--- a/tests/nnapi/specs/Ex/split_ex_4D_int32_5.mod.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_INT32", "{2,2,2,2}")
-axis = Int32Scalar("axis", -4) # Negative axis
-num_out = Int32Scalar("num_out", 2)
-i2 = Output("op2", "TENSOR_INT32", "{1,2,2,2}")
-i3 = Output("op3", "TENSOR_INT32", "{1,2,2,2}")
-model = model.Operation("SPLIT_EX", i1, axis, num_out).To([i2, i3])
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}
-
-output0 = {
- i2: # output 0
- [1, 2, 3, 4, 5, 6, 7, 8],
- i3: # output 1
- [9, 10, 11, 12, 13, 14, 15, 16]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_ex_4D_quant8.mod.py b/tests/nnapi/specs/Ex/split_ex_4D_quant8.mod.py
deleted file mode 100644
index a3dbffaa9..000000000
--- a/tests/nnapi/specs/Ex/split_ex_4D_quant8.mod.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# model
-model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{2,2,2,2}, 0.5f, 1")
-axis = Int32Scalar("axis", 0)
-num_out = Int32Scalar("num_out", 2)
-i2 = Output("op2", "TENSOR_QUANT8_ASYMM", "{1,2,2,2}, 0.5f, 1")
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{1,2,2,2}, 0.5f, 1")
-model = model.Operation("SPLIT_EX", i1, axis, num_out).To([i2, i3])
-
-# Example 1. Input in operand 0,
-input0 = {i1: # input 0
- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}
-
-output0 = {
- i2: # output 0
- [1, 2, 3, 4, 5, 6, 7, 8],
- i3: # output 1
- [9, 10, 11, 12, 13, 14, 15, 16]}
-
-# Instantiate an example
-Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/squared_difference_ex_1D_float.mod.py b/tests/nnapi/specs/Ex/squared_difference_ex_1D_float.mod.py
index ef7d9f62e..ef7d9f62e 100644..100755
--- a/tests/nnapi/specs/Ex/squared_difference_ex_1D_float.mod.py
+++ b/tests/nnapi/specs/Ex/squared_difference_ex_1D_float.mod.py
diff --git a/tests/nnapi/specs/Ex/squared_difference_ex_2D_float.mod.py b/tests/nnapi/specs/Ex/squared_difference_ex_2D_float.mod.py
index 1753f1960..1753f1960 100644..100755
--- a/tests/nnapi/specs/Ex/squared_difference_ex_2D_float.mod.py
+++ b/tests/nnapi/specs/Ex/squared_difference_ex_2D_float.mod.py
diff --git a/tests/nnapi/specs/Ex/squared_difference_ex_3D_float.mod.py b/tests/nnapi/specs/Ex/squared_difference_ex_3D_float.mod.py
index f91043636..f91043636 100644..100755
--- a/tests/nnapi/specs/Ex/squared_difference_ex_3D_float.mod.py
+++ b/tests/nnapi/specs/Ex/squared_difference_ex_3D_float.mod.py
diff --git a/tests/nnapi/specs/Ex/squared_difference_ex_4D_float.mod.py b/tests/nnapi/specs/Ex/squared_difference_ex_4D_float.mod.py
index 571c6ce99..571c6ce99 100644..100755
--- a/tests/nnapi/specs/Ex/squared_difference_ex_4D_float.mod.py
+++ b/tests/nnapi/specs/Ex/squared_difference_ex_4D_float.mod.py
diff --git a/tests/nnapi/specs/Ex/squared_difference_ex_broadcast_4D_2D_float.mod.py b/tests/nnapi/specs/Ex/squared_difference_ex_broadcast_4D_2D_float.mod.py
index 8404f4fcd..8404f4fcd 100644..100755
--- a/tests/nnapi/specs/Ex/squared_difference_ex_broadcast_4D_2D_float.mod.py
+++ b/tests/nnapi/specs/Ex/squared_difference_ex_broadcast_4D_2D_float.mod.py
diff --git a/tests/nnapi/specs/Ex/squared_difference_ex_broadcast_float.mod.py b/tests/nnapi/specs/Ex/squared_difference_ex_broadcast_float.mod.py
index 0926426e6..0926426e6 100644..100755
--- a/tests/nnapi/specs/Ex/squared_difference_ex_broadcast_float.mod.py
+++ b/tests/nnapi/specs/Ex/squared_difference_ex_broadcast_float.mod.py
diff --git a/tests/nnapi/specs/Ex/transpose_conv_ex_float_1.mod.py b/tests/nnapi/specs/Ex/transpose_conv_ex_float_1.mod.py
index c509835c8..c509835c8 100644..100755
--- a/tests/nnapi/specs/Ex/transpose_conv_ex_float_1.mod.py
+++ b/tests/nnapi/specs/Ex/transpose_conv_ex_float_1.mod.py
diff --git a/tests/nnapi/specs/Ex/transpose_conv_ex_float_2.mod.py b/tests/nnapi/specs/Ex/transpose_conv_ex_float_2.mod.py
index 29030cede..29030cede 100644..100755
--- a/tests/nnapi/specs/Ex/transpose_conv_ex_float_2.mod.py
+++ b/tests/nnapi/specs/Ex/transpose_conv_ex_float_2.mod.py
diff --git a/tests/nnapi/specs/Ex/transpose_conv_ex_float_3.mod.py b/tests/nnapi/specs/Ex/transpose_conv_ex_float_3.mod.py
index 6d0142faf..6d0142faf 100644..100755
--- a/tests/nnapi/specs/Ex/transpose_conv_ex_float_3.mod.py
+++ b/tests/nnapi/specs/Ex/transpose_conv_ex_float_3.mod.py
diff --git a/tests/nnapi/specs/Ex/transpose_conv_ex_float_4.mod.py b/tests/nnapi/specs/Ex/transpose_conv_ex_float_4.mod.py
index de00e5f66..de00e5f66 100644..100755
--- a/tests/nnapi/specs/Ex/transpose_conv_ex_float_4.mod.py
+++ b/tests/nnapi/specs/Ex/transpose_conv_ex_float_4.mod.py
diff --git a/tests/nnapi/specs/Ex/unpack_ex_3D_float_1.mod.py b/tests/nnapi/specs/Ex/unpack_ex_3D_float_1.mod.py
index 7e8ef6086..7e8ef6086 100644..100755
--- a/tests/nnapi/specs/Ex/unpack_ex_3D_float_1.mod.py
+++ b/tests/nnapi/specs/Ex/unpack_ex_3D_float_1.mod.py
diff --git a/tests/nnapi/specs/Ex/unpack_ex_3D_float_2.mod.py b/tests/nnapi/specs/Ex/unpack_ex_3D_float_2.mod.py
index ed7800ad4..ed7800ad4 100644..100755
--- a/tests/nnapi/specs/Ex/unpack_ex_3D_float_2.mod.py
+++ b/tests/nnapi/specs/Ex/unpack_ex_3D_float_2.mod.py
diff --git a/tests/nnapi/specs/Ex/unpack_ex_3D_int_1.mod.py b/tests/nnapi/specs/Ex/unpack_ex_3D_int_1.mod.py
index 34e153bfc..34e153bfc 100644..100755
--- a/tests/nnapi/specs/Ex/unpack_ex_3D_int_1.mod.py
+++ b/tests/nnapi/specs/Ex/unpack_ex_3D_int_1.mod.py
diff --git a/tests/nnapi/specs/Ex/unpack_ex_3D_int_2.mod.py b/tests/nnapi/specs/Ex/unpack_ex_3D_int_2.mod.py
index db51351c5..db51351c5 100644..100755
--- a/tests/nnapi/specs/Ex/unpack_ex_3D_int_2.mod.py
+++ b/tests/nnapi/specs/Ex/unpack_ex_3D_int_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/add.mod.py b/tests/nnapi/specs/V1_0/add.mod.py
index 3374321f0..3374321f0 100644..100755
--- a/tests/nnapi/specs/V1_0/add.mod.py
+++ b/tests/nnapi/specs/V1_0/add.mod.py
diff --git a/tests/nnapi/specs/V1_0/add_broadcast_4D_2D_after_nops_float_nnfw.mod.py b/tests/nnapi/specs/V1_0/add_broadcast_4D_2D_after_nops_float_nnfw.mod.py
index 141682e47..141682e47 100644..100755
--- a/tests/nnapi/specs/V1_0/add_broadcast_4D_2D_after_nops_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_0/add_broadcast_4D_2D_after_nops_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_0/add_broadcast_quant8.mod.py b/tests/nnapi/specs/V1_0/add_broadcast_quant8.mod.py
index cf2dbb749..cf2dbb749 100644..100755
--- a/tests/nnapi/specs/V1_0/add_broadcast_quant8.mod.py
+++ b/tests/nnapi/specs/V1_0/add_broadcast_quant8.mod.py
diff --git a/tests/nnapi/specs/V1_0/add_quant8.mod.py b/tests/nnapi/specs/V1_0/add_quant8.mod.py
index bfbdab765..bfbdab765 100644..100755
--- a/tests/nnapi/specs/V1_0/add_quant8.mod.py
+++ b/tests/nnapi/specs/V1_0/add_quant8.mod.py
diff --git a/tests/nnapi/specs/V1_0/avg_pool_float_1.mod.py b/tests/nnapi/specs/V1_0/avg_pool_float_1.mod.py
index 457313ef3..457313ef3 100644..100755
--- a/tests/nnapi/specs/V1_0/avg_pool_float_1.mod.py
+++ b/tests/nnapi/specs/V1_0/avg_pool_float_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/avg_pool_float_2.mod.py b/tests/nnapi/specs/V1_0/avg_pool_float_2.mod.py
index 35b73a40b..35b73a40b 100644..100755
--- a/tests/nnapi/specs/V1_0/avg_pool_float_2.mod.py
+++ b/tests/nnapi/specs/V1_0/avg_pool_float_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/avg_pool_float_3.mod.py b/tests/nnapi/specs/V1_0/avg_pool_float_3.mod.py
index 1de8ad13d..1de8ad13d 100644..100755
--- a/tests/nnapi/specs/V1_0/avg_pool_float_3.mod.py
+++ b/tests/nnapi/specs/V1_0/avg_pool_float_3.mod.py
diff --git a/tests/nnapi/specs/V1_0/avg_pool_float_4.mod.py b/tests/nnapi/specs/V1_0/avg_pool_float_4.mod.py
index 480c70d1d..480c70d1d 100644..100755
--- a/tests/nnapi/specs/V1_0/avg_pool_float_4.mod.py
+++ b/tests/nnapi/specs/V1_0/avg_pool_float_4.mod.py
diff --git a/tests/nnapi/specs/V1_0/avg_pool_float_5.mod.py b/tests/nnapi/specs/V1_0/avg_pool_float_5.mod.py
index ef13bb749..ef13bb749 100644..100755
--- a/tests/nnapi/specs/V1_0/avg_pool_float_5.mod.py
+++ b/tests/nnapi/specs/V1_0/avg_pool_float_5.mod.py
diff --git a/tests/nnapi/specs/V1_0/avg_pool_quant8_1.mod.py b/tests/nnapi/specs/V1_0/avg_pool_quant8_1.mod.py
index 3c933ebbf..3c933ebbf 100644..100755
--- a/tests/nnapi/specs/V1_0/avg_pool_quant8_1.mod.py
+++ b/tests/nnapi/specs/V1_0/avg_pool_quant8_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/avg_pool_quant8_2.mod.py b/tests/nnapi/specs/V1_0/avg_pool_quant8_2.mod.py
index 9d49c303b..9d49c303b 100644..100755
--- a/tests/nnapi/specs/V1_0/avg_pool_quant8_2.mod.py
+++ b/tests/nnapi/specs/V1_0/avg_pool_quant8_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/avg_pool_quant8_3.mod.py b/tests/nnapi/specs/V1_0/avg_pool_quant8_3.mod.py
index 895ab5e49..895ab5e49 100644..100755
--- a/tests/nnapi/specs/V1_0/avg_pool_quant8_3.mod.py
+++ b/tests/nnapi/specs/V1_0/avg_pool_quant8_3.mod.py
diff --git a/tests/nnapi/specs/V1_0/avg_pool_quant8_4.mod.py b/tests/nnapi/specs/V1_0/avg_pool_quant8_4.mod.py
index 8215a882a..8215a882a 100644..100755
--- a/tests/nnapi/specs/V1_0/avg_pool_quant8_4.mod.py
+++ b/tests/nnapi/specs/V1_0/avg_pool_quant8_4.mod.py
diff --git a/tests/nnapi/specs/V1_0/avg_pool_quant8_5.mod.py b/tests/nnapi/specs/V1_0/avg_pool_quant8_5.mod.py
index f6db46eb8..f6db46eb8 100644..100755
--- a/tests/nnapi/specs/V1_0/avg_pool_quant8_5.mod.py
+++ b/tests/nnapi/specs/V1_0/avg_pool_quant8_5.mod.py
diff --git a/tests/nnapi/specs/V1_0/concat_float_1.mod.py b/tests/nnapi/specs/V1_0/concat_float_1.mod.py
index 4f3350c5d..4f3350c5d 100644..100755
--- a/tests/nnapi/specs/V1_0/concat_float_1.mod.py
+++ b/tests/nnapi/specs/V1_0/concat_float_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/concat_float_2.mod.py b/tests/nnapi/specs/V1_0/concat_float_2.mod.py
index ad49c5eb2..ad49c5eb2 100644..100755
--- a/tests/nnapi/specs/V1_0/concat_float_2.mod.py
+++ b/tests/nnapi/specs/V1_0/concat_float_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/concat_float_3.mod.py b/tests/nnapi/specs/V1_0/concat_float_3.mod.py
index 7dbe17a78..7dbe17a78 100644..100755
--- a/tests/nnapi/specs/V1_0/concat_float_3.mod.py
+++ b/tests/nnapi/specs/V1_0/concat_float_3.mod.py
diff --git a/tests/nnapi/specs/V1_0/concat_float_4D_axis3_1_nnfw.mod.py b/tests/nnapi/specs/V1_0/concat_float_4D_axis3_1_nnfw.mod.py
index 39080a3dc..39080a3dc 100644..100755
--- a/tests/nnapi/specs/V1_0/concat_float_4D_axis3_1_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_0/concat_float_4D_axis3_1_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_0/concat_quant8_1.mod.py b/tests/nnapi/specs/V1_0/concat_quant8_1.mod.py
index a490497b8..a490497b8 100644..100755
--- a/tests/nnapi/specs/V1_0/concat_quant8_1.mod.py
+++ b/tests/nnapi/specs/V1_0/concat_quant8_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/concat_quant8_2.mod.py b/tests/nnapi/specs/V1_0/concat_quant8_2.mod.py
index 7da3cd710..7da3cd710 100644..100755
--- a/tests/nnapi/specs/V1_0/concat_quant8_2.mod.py
+++ b/tests/nnapi/specs/V1_0/concat_quant8_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/concat_quant8_3.mod.py b/tests/nnapi/specs/V1_0/concat_quant8_3.mod.py
index 87a94bbf1..87a94bbf1 100644..100755
--- a/tests/nnapi/specs/V1_0/concat_quant8_3.mod.py
+++ b/tests/nnapi/specs/V1_0/concat_quant8_3.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_1_h3_w2_SAME.mod.py b/tests/nnapi/specs/V1_0/conv_1_h3_w2_SAME.mod.py
index 45a356927..45a356927 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_1_h3_w2_SAME.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_1_h3_w2_SAME.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_1_h3_w2_VALID.mod.py b/tests/nnapi/specs/V1_0/conv_1_h3_w2_VALID.mod.py
index 0e0974371..0e0974371 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_1_h3_w2_VALID.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_1_h3_w2_VALID.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_3_h3_w2_SAME.mod.py b/tests/nnapi/specs/V1_0/conv_3_h3_w2_SAME.mod.py
index 3ba4bad52..3ba4bad52 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_3_h3_w2_SAME.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_3_h3_w2_SAME.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_3_h3_w2_VALID.mod.py b/tests/nnapi/specs/V1_0/conv_3_h3_w2_VALID.mod.py
index 545e9fdc6..545e9fdc6 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_3_h3_w2_VALID.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_3_h3_w2_VALID.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_float.mod.py b/tests/nnapi/specs/V1_0/conv_float.mod.py
index 04e5d76a0..04e5d76a0 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_float.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_float.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_float_2.mod.py b/tests/nnapi/specs/V1_0/conv_float_2.mod.py
index 86755a764..86755a764 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_float_2.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_float_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_float_channels.mod.py b/tests/nnapi/specs/V1_0/conv_float_channels.mod.py
index 089a5fcfa..089a5fcfa 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_float_channels.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_float_channels.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_float_channels_weights_as_inputs.mod.py b/tests/nnapi/specs/V1_0/conv_float_channels_weights_as_inputs.mod.py
index cc6bfcb9a..cc6bfcb9a 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_float_channels_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_float_channels_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_float_large.mod.py b/tests/nnapi/specs/V1_0/conv_float_large.mod.py
index febe12543..febe12543 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_float_large.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_float_large.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_float_large_weights_as_inputs.mod.py b/tests/nnapi/specs/V1_0/conv_float_large_weights_as_inputs.mod.py
index cc0eaa9e4..cc0eaa9e4 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_float_large_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_float_large_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_float_weights_as_inputs.mod.py b/tests/nnapi/specs/V1_0/conv_float_weights_as_inputs.mod.py
index fb6ebffed..fb6ebffed 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_float_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_float_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_quant8.mod.py b/tests/nnapi/specs/V1_0/conv_quant8.mod.py
index b56e0b0e4..b56e0b0e4 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_quant8.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_quant8.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_quant8_2.mod.py b/tests/nnapi/specs/V1_0/conv_quant8_2.mod.py
index 0cb6a8889..0cb6a8889 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_quant8_2.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_quant8_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_quant8_channels.mod.py b/tests/nnapi/specs/V1_0/conv_quant8_channels.mod.py
index f8cab0a1b..f8cab0a1b 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_quant8_channels.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_quant8_channels.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_quant8_channels_weights_as_inputs.mod.py b/tests/nnapi/specs/V1_0/conv_quant8_channels_weights_as_inputs.mod.py
index 90f62abbf..90f62abbf 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_quant8_channels_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_quant8_channels_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_quant8_large.mod.py b/tests/nnapi/specs/V1_0/conv_quant8_large.mod.py
index 5272d7204..5272d7204 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_quant8_large.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_quant8_large.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_quant8_large_weights_as_inputs.mod.py b/tests/nnapi/specs/V1_0/conv_quant8_large_weights_as_inputs.mod.py
index 4ccfa0039..4ccfa0039 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_quant8_large_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_quant8_large_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_quant8_overflow.mod.py b/tests/nnapi/specs/V1_0/conv_quant8_overflow.mod.py
index 4d4c6e472..4d4c6e472 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_quant8_overflow.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_quant8_overflow.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_quant8_overflow_weights_as_inputs.mod.py b/tests/nnapi/specs/V1_0/conv_quant8_overflow_weights_as_inputs.mod.py
index 45e977afe..45e977afe 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_quant8_overflow_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_quant8_overflow_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/V1_0/conv_quant8_weights_as_inputs.mod.py b/tests/nnapi/specs/V1_0/conv_quant8_weights_as_inputs.mod.py
index 1b6a709e3..1b6a709e3 100644..100755
--- a/tests/nnapi/specs/V1_0/conv_quant8_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/V1_0/conv_quant8_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/V1_0/depth_to_space_float_1.mod.py b/tests/nnapi/specs/V1_0/depth_to_space_float_1.mod.py
index d11e9ee4d..d11e9ee4d 100644..100755
--- a/tests/nnapi/specs/V1_0/depth_to_space_float_1.mod.py
+++ b/tests/nnapi/specs/V1_0/depth_to_space_float_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/depth_to_space_float_2.mod.py b/tests/nnapi/specs/V1_0/depth_to_space_float_2.mod.py
index ee1efe15e..ee1efe15e 100644..100755
--- a/tests/nnapi/specs/V1_0/depth_to_space_float_2.mod.py
+++ b/tests/nnapi/specs/V1_0/depth_to_space_float_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/depth_to_space_float_3.mod.py b/tests/nnapi/specs/V1_0/depth_to_space_float_3.mod.py
index a9edcf716..a9edcf716 100644..100755
--- a/tests/nnapi/specs/V1_0/depth_to_space_float_3.mod.py
+++ b/tests/nnapi/specs/V1_0/depth_to_space_float_3.mod.py
diff --git a/tests/nnapi/specs/V1_0/depth_to_space_quant8_1.mod.py b/tests/nnapi/specs/V1_0/depth_to_space_quant8_1.mod.py
index 6ae42f46d..6ae42f46d 100644..100755
--- a/tests/nnapi/specs/V1_0/depth_to_space_quant8_1.mod.py
+++ b/tests/nnapi/specs/V1_0/depth_to_space_quant8_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/depth_to_space_quant8_2.mod.py b/tests/nnapi/specs/V1_0/depth_to_space_quant8_2.mod.py
index 95ea04256..95ea04256 100644..100755
--- a/tests/nnapi/specs/V1_0/depth_to_space_quant8_2.mod.py
+++ b/tests/nnapi/specs/V1_0/depth_to_space_quant8_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/depthwise_conv.mod.py b/tests/nnapi/specs/V1_0/depthwise_conv.mod.py
index 5e7886d26..5e7886d26 100644..100755
--- a/tests/nnapi/specs/V1_0/depthwise_conv.mod.py
+++ b/tests/nnapi/specs/V1_0/depthwise_conv.mod.py
diff --git a/tests/nnapi/specs/V1_0/depthwise_conv2d_float.mod.py b/tests/nnapi/specs/V1_0/depthwise_conv2d_float.mod.py
index d8f61f3d2..d8f61f3d2 100644..100755
--- a/tests/nnapi/specs/V1_0/depthwise_conv2d_float.mod.py
+++ b/tests/nnapi/specs/V1_0/depthwise_conv2d_float.mod.py
diff --git a/tests/nnapi/specs/V1_0/depthwise_conv2d_float_2.mod.py b/tests/nnapi/specs/V1_0/depthwise_conv2d_float_2.mod.py
index 59e392d58..59e392d58 100644..100755
--- a/tests/nnapi/specs/V1_0/depthwise_conv2d_float_2.mod.py
+++ b/tests/nnapi/specs/V1_0/depthwise_conv2d_float_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/depthwise_conv2d_float_large.mod.py b/tests/nnapi/specs/V1_0/depthwise_conv2d_float_large.mod.py
index 9e8b297fd..9e8b297fd 100644..100755
--- a/tests/nnapi/specs/V1_0/depthwise_conv2d_float_large.mod.py
+++ b/tests/nnapi/specs/V1_0/depthwise_conv2d_float_large.mod.py
diff --git a/tests/nnapi/specs/V1_0/depthwise_conv2d_float_large_2.mod.py b/tests/nnapi/specs/V1_0/depthwise_conv2d_float_large_2.mod.py
index 416279ab4..416279ab4 100644..100755
--- a/tests/nnapi/specs/V1_0/depthwise_conv2d_float_large_2.mod.py
+++ b/tests/nnapi/specs/V1_0/depthwise_conv2d_float_large_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/depthwise_conv2d_float_large_2_weights_as_inputs.mod.py b/tests/nnapi/specs/V1_0/depthwise_conv2d_float_large_2_weights_as_inputs.mod.py
index d759cf260..d759cf260 100644..100755
--- a/tests/nnapi/specs/V1_0/depthwise_conv2d_float_large_2_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/V1_0/depthwise_conv2d_float_large_2_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/V1_0/depthwise_conv2d_float_large_weights_as_inputs.mod.py b/tests/nnapi/specs/V1_0/depthwise_conv2d_float_large_weights_as_inputs.mod.py
index 5174402e2..5174402e2 100644..100755
--- a/tests/nnapi/specs/V1_0/depthwise_conv2d_float_large_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/V1_0/depthwise_conv2d_float_large_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/V1_0/depthwise_conv2d_float_weights_as_inputs.mod.py b/tests/nnapi/specs/V1_0/depthwise_conv2d_float_weights_as_inputs.mod.py
index 9a8af4cf6..9a8af4cf6 100644..100755
--- a/tests/nnapi/specs/V1_0/depthwise_conv2d_float_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/V1_0/depthwise_conv2d_float_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8.mod.py b/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8.mod.py
index 5f43c6362..5f43c6362 100644..100755
--- a/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8.mod.py
+++ b/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8.mod.py
diff --git a/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_2.mod.py b/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_2.mod.py
index 8f7ba1c43..8f7ba1c43 100644..100755
--- a/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_2.mod.py
+++ b/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_large.mod.py b/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_large.mod.py
index 785e0a770..785e0a770 100644..100755
--- a/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_large.mod.py
+++ b/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_large.mod.py
diff --git a/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_large_weights_as_inputs.mod.py b/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_large_weights_as_inputs.mod.py
index 506aa5845..506aa5845 100644..100755
--- a/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_large_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_large_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_weights_as_inputs.mod.py b/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_weights_as_inputs.mod.py
index f0478f884..f0478f884 100644..100755
--- a/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/V1_0/depthwise_conv2d_quant8_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/V1_0/dequantize.mod.py b/tests/nnapi/specs/V1_0/dequantize.mod.py
index bef7a3200..bef7a3200 100644..100755
--- a/tests/nnapi/specs/V1_0/dequantize.mod.py
+++ b/tests/nnapi/specs/V1_0/dequantize.mod.py
diff --git a/tests/nnapi/specs/V1_0/embedding_lookup.mod.py b/tests/nnapi/specs/V1_0/embedding_lookup.mod.py
index a012c7fbf..a012c7fbf 100644..100755
--- a/tests/nnapi/specs/V1_0/embedding_lookup.mod.py
+++ b/tests/nnapi/specs/V1_0/embedding_lookup.mod.py
diff --git a/tests/nnapi/specs/V1_0/embedding_lookup_2d_nnfw.mod.py b/tests/nnapi/specs/V1_0/embedding_lookup_2d_nnfw.mod.py
index f0e6528f9..f0e6528f9 100644..100755
--- a/tests/nnapi/specs/V1_0/embedding_lookup_2d_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_0/embedding_lookup_2d_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_0/embedding_lookup_4d_nnfw.mod.py b/tests/nnapi/specs/V1_0/embedding_lookup_4d_nnfw.mod.py
index c149cca80..c149cca80 100644..100755
--- a/tests/nnapi/specs/V1_0/embedding_lookup_4d_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_0/embedding_lookup_4d_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_0/floor_.mod.py b/tests/nnapi/specs/V1_0/floor_.mod.py
index 0925b1454..0925b1454 100644..100755
--- a/tests/nnapi/specs/V1_0/floor_.mod.py
+++ b/tests/nnapi/specs/V1_0/floor_.mod.py
diff --git a/tests/nnapi/specs/V1_0/fully_connected_float.mod.py b/tests/nnapi/specs/V1_0/fully_connected_float.mod.py
index 29a21e77a..29a21e77a 100644..100755
--- a/tests/nnapi/specs/V1_0/fully_connected_float.mod.py
+++ b/tests/nnapi/specs/V1_0/fully_connected_float.mod.py
diff --git a/tests/nnapi/specs/V1_0/fully_connected_float_1_nnfw.mod.py b/tests/nnapi/specs/V1_0/fully_connected_float_1_nnfw.mod.py
index 6030efeff..6030efeff 100644..100755
--- a/tests/nnapi/specs/V1_0/fully_connected_float_1_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_0/fully_connected_float_1_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_0/fully_connected_float_2.mod.py b/tests/nnapi/specs/V1_0/fully_connected_float_2.mod.py
index 92fad6063..92fad6063 100644..100755
--- a/tests/nnapi/specs/V1_0/fully_connected_float_2.mod.py
+++ b/tests/nnapi/specs/V1_0/fully_connected_float_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/fully_connected_float_3.mod.py b/tests/nnapi/specs/V1_0/fully_connected_float_3.mod.py
index 804f81266..804f81266 100644..100755
--- a/tests/nnapi/specs/V1_0/fully_connected_float_3.mod.py
+++ b/tests/nnapi/specs/V1_0/fully_connected_float_3.mod.py
diff --git a/tests/nnapi/specs/V1_0/fully_connected_float_large.mod.py b/tests/nnapi/specs/V1_0/fully_connected_float_large.mod.py
index 2bfa98b21..2bfa98b21 100644..100755
--- a/tests/nnapi/specs/V1_0/fully_connected_float_large.mod.py
+++ b/tests/nnapi/specs/V1_0/fully_connected_float_large.mod.py
diff --git a/tests/nnapi/specs/V1_0/fully_connected_float_large_weights_as_inputs.mod.py b/tests/nnapi/specs/V1_0/fully_connected_float_large_weights_as_inputs.mod.py
index f88fbcab5..f88fbcab5 100644..100755
--- a/tests/nnapi/specs/V1_0/fully_connected_float_large_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/V1_0/fully_connected_float_large_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/V1_0/fully_connected_float_weights_as_inputs.mod.py b/tests/nnapi/specs/V1_0/fully_connected_float_weights_as_inputs.mod.py
index 0c7509dee..0c7509dee 100644..100755
--- a/tests/nnapi/specs/V1_0/fully_connected_float_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/V1_0/fully_connected_float_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/V1_0/fully_connected_hybrid_1_nnfw.mod.py b/tests/nnapi/specs/V1_0/fully_connected_hybrid_1_nnfw.mod.py
index d9bcf1610..d9bcf1610 100644..100755
--- a/tests/nnapi/specs/V1_0/fully_connected_hybrid_1_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_0/fully_connected_hybrid_1_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_0/fully_connected_hybrid_2_nnfw.mod.py b/tests/nnapi/specs/V1_0/fully_connected_hybrid_2_nnfw.mod.py
index 2e3d7d510..2e3d7d510 100644..100755
--- a/tests/nnapi/specs/V1_0/fully_connected_hybrid_2_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_0/fully_connected_hybrid_2_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_0/fully_connected_quant8.mod.py b/tests/nnapi/specs/V1_0/fully_connected_quant8.mod.py
index 3bb941bb4..3bb941bb4 100644..100755
--- a/tests/nnapi/specs/V1_0/fully_connected_quant8.mod.py
+++ b/tests/nnapi/specs/V1_0/fully_connected_quant8.mod.py
diff --git a/tests/nnapi/specs/V1_0/fully_connected_quant8_2.mod.py b/tests/nnapi/specs/V1_0/fully_connected_quant8_2.mod.py
index a87eee811..a87eee811 100644..100755
--- a/tests/nnapi/specs/V1_0/fully_connected_quant8_2.mod.py
+++ b/tests/nnapi/specs/V1_0/fully_connected_quant8_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/fully_connected_quant8_large.mod.py b/tests/nnapi/specs/V1_0/fully_connected_quant8_large.mod.py
index f04d150d0..f04d150d0 100644..100755
--- a/tests/nnapi/specs/V1_0/fully_connected_quant8_large.mod.py
+++ b/tests/nnapi/specs/V1_0/fully_connected_quant8_large.mod.py
diff --git a/tests/nnapi/specs/V1_0/fully_connected_quant8_large_weights_as_inputs.mod.py b/tests/nnapi/specs/V1_0/fully_connected_quant8_large_weights_as_inputs.mod.py
index 70ea52556..70ea52556 100644..100755
--- a/tests/nnapi/specs/V1_0/fully_connected_quant8_large_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/V1_0/fully_connected_quant8_large_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/V1_0/fully_connected_quant8_weights_as_inputs.mod.py b/tests/nnapi/specs/V1_0/fully_connected_quant8_weights_as_inputs.mod.py
index 50e0220e8..50e0220e8 100644..100755
--- a/tests/nnapi/specs/V1_0/fully_connected_quant8_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/V1_0/fully_connected_quant8_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/V1_0/hashtable_lookup_float.mod.py b/tests/nnapi/specs/V1_0/hashtable_lookup_float.mod.py
index 2cdafa3f7..2cdafa3f7 100644..100755
--- a/tests/nnapi/specs/V1_0/hashtable_lookup_float.mod.py
+++ b/tests/nnapi/specs/V1_0/hashtable_lookup_float.mod.py
diff --git a/tests/nnapi/specs/V1_0/hashtable_lookup_float_4D_nnfw.mod.py b/tests/nnapi/specs/V1_0/hashtable_lookup_float_4D_nnfw.mod.py
index 9db9f27f8..9db9f27f8 100644..100755
--- a/tests/nnapi/specs/V1_0/hashtable_lookup_float_4D_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_0/hashtable_lookup_float_4D_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_0/hashtable_lookup_quant8.mod.py b/tests/nnapi/specs/V1_0/hashtable_lookup_quant8.mod.py
index ed735f286..ed735f286 100644..100755
--- a/tests/nnapi/specs/V1_0/hashtable_lookup_quant8.mod.py
+++ b/tests/nnapi/specs/V1_0/hashtable_lookup_quant8.mod.py
diff --git a/tests/nnapi/specs/V1_0/l2_normalization.mod.py b/tests/nnapi/specs/V1_0/l2_normalization.mod.py
index df44c688e..df44c688e 100644..100755
--- a/tests/nnapi/specs/V1_0/l2_normalization.mod.py
+++ b/tests/nnapi/specs/V1_0/l2_normalization.mod.py
diff --git a/tests/nnapi/specs/V1_0/l2_normalization_2.mod.py b/tests/nnapi/specs/V1_0/l2_normalization_2.mod.py
index 36feba073..36feba073 100644..100755
--- a/tests/nnapi/specs/V1_0/l2_normalization_2.mod.py
+++ b/tests/nnapi/specs/V1_0/l2_normalization_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/l2_normalization_large.mod.py b/tests/nnapi/specs/V1_0/l2_normalization_large.mod.py
index 58b2d4f28..58b2d4f28 100644..100755
--- a/tests/nnapi/specs/V1_0/l2_normalization_large.mod.py
+++ b/tests/nnapi/specs/V1_0/l2_normalization_large.mod.py
diff --git a/tests/nnapi/specs/V1_0/l2_pool_float.mod.py b/tests/nnapi/specs/V1_0/l2_pool_float.mod.py
index fee09746e..fee09746e 100644..100755
--- a/tests/nnapi/specs/V1_0/l2_pool_float.mod.py
+++ b/tests/nnapi/specs/V1_0/l2_pool_float.mod.py
diff --git a/tests/nnapi/specs/V1_0/l2_pool_float_2.mod.py b/tests/nnapi/specs/V1_0/l2_pool_float_2.mod.py
index e10c050cc..e10c050cc 100644..100755
--- a/tests/nnapi/specs/V1_0/l2_pool_float_2.mod.py
+++ b/tests/nnapi/specs/V1_0/l2_pool_float_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/l2_pool_float_large.mod.py b/tests/nnapi/specs/V1_0/l2_pool_float_large.mod.py
index 281118e4c..281118e4c 100644..100755
--- a/tests/nnapi/specs/V1_0/l2_pool_float_large.mod.py
+++ b/tests/nnapi/specs/V1_0/l2_pool_float_large.mod.py
diff --git a/tests/nnapi/specs/V1_0/local_response_norm_float_1.mod.py b/tests/nnapi/specs/V1_0/local_response_norm_float_1.mod.py
index 99bb59b56..99bb59b56 100644..100755
--- a/tests/nnapi/specs/V1_0/local_response_norm_float_1.mod.py
+++ b/tests/nnapi/specs/V1_0/local_response_norm_float_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/local_response_norm_float_2.mod.py b/tests/nnapi/specs/V1_0/local_response_norm_float_2.mod.py
index fa025bfc0..fa025bfc0 100644..100755
--- a/tests/nnapi/specs/V1_0/local_response_norm_float_2.mod.py
+++ b/tests/nnapi/specs/V1_0/local_response_norm_float_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/local_response_norm_float_3.mod.py b/tests/nnapi/specs/V1_0/local_response_norm_float_3.mod.py
index e655bb90d..e655bb90d 100644..100755
--- a/tests/nnapi/specs/V1_0/local_response_norm_float_3.mod.py
+++ b/tests/nnapi/specs/V1_0/local_response_norm_float_3.mod.py
diff --git a/tests/nnapi/specs/V1_0/local_response_norm_float_4.mod.py b/tests/nnapi/specs/V1_0/local_response_norm_float_4.mod.py
index e4d2817d9..e4d2817d9 100644..100755
--- a/tests/nnapi/specs/V1_0/local_response_norm_float_4.mod.py
+++ b/tests/nnapi/specs/V1_0/local_response_norm_float_4.mod.py
diff --git a/tests/nnapi/specs/V1_0/logistic_float_1.mod.py b/tests/nnapi/specs/V1_0/logistic_float_1.mod.py
index 01ce100ab..01ce100ab 100644..100755
--- a/tests/nnapi/specs/V1_0/logistic_float_1.mod.py
+++ b/tests/nnapi/specs/V1_0/logistic_float_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/logistic_float_2.mod.py b/tests/nnapi/specs/V1_0/logistic_float_2.mod.py
index 93d812c08..93d812c08 100644..100755
--- a/tests/nnapi/specs/V1_0/logistic_float_2.mod.py
+++ b/tests/nnapi/specs/V1_0/logistic_float_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/logistic_quant8_1.mod.py b/tests/nnapi/specs/V1_0/logistic_quant8_1.mod.py
index 800b4fa72..800b4fa72 100644..100755
--- a/tests/nnapi/specs/V1_0/logistic_quant8_1.mod.py
+++ b/tests/nnapi/specs/V1_0/logistic_quant8_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/logistic_quant8_2.mod.py b/tests/nnapi/specs/V1_0/logistic_quant8_2.mod.py
index 267452fa4..267452fa4 100644..100755
--- a/tests/nnapi/specs/V1_0/logistic_quant8_2.mod.py
+++ b/tests/nnapi/specs/V1_0/logistic_quant8_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/lsh_projection.mod.py b/tests/nnapi/specs/V1_0/lsh_projection.mod.py
index 50dfcd411..50dfcd411 100644..100755
--- a/tests/nnapi/specs/V1_0/lsh_projection.mod.py
+++ b/tests/nnapi/specs/V1_0/lsh_projection.mod.py
diff --git a/tests/nnapi/specs/V1_0/lsh_projection_2.mod.py b/tests/nnapi/specs/V1_0/lsh_projection_2.mod.py
index a297db247..a297db247 100644..100755
--- a/tests/nnapi/specs/V1_0/lsh_projection_2.mod.py
+++ b/tests/nnapi/specs/V1_0/lsh_projection_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/lsh_projection_weights_as_inputs.mod.py b/tests/nnapi/specs/V1_0/lsh_projection_weights_as_inputs.mod.py
index 7e87fcdc5..7e87fcdc5 100644..100755
--- a/tests/nnapi/specs/V1_0/lsh_projection_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/V1_0/lsh_projection_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/V1_0/lstm.mod.py b/tests/nnapi/specs/V1_0/lstm.mod.py
index 830f4b725..830f4b725 100644..100755
--- a/tests/nnapi/specs/V1_0/lstm.mod.py
+++ b/tests/nnapi/specs/V1_0/lstm.mod.py
diff --git a/tests/nnapi/specs/V1_0/lstm2.mod.py b/tests/nnapi/specs/V1_0/lstm2.mod.py
index e42681ae2..e42681ae2 100644..100755
--- a/tests/nnapi/specs/V1_0/lstm2.mod.py
+++ b/tests/nnapi/specs/V1_0/lstm2.mod.py
diff --git a/tests/nnapi/specs/V1_0/lstm2_state.mod.py b/tests/nnapi/specs/V1_0/lstm2_state.mod.py
index 7543e8d64..7543e8d64 100644..100755
--- a/tests/nnapi/specs/V1_0/lstm2_state.mod.py
+++ b/tests/nnapi/specs/V1_0/lstm2_state.mod.py
diff --git a/tests/nnapi/specs/V1_0/lstm2_state2.mod.py b/tests/nnapi/specs/V1_0/lstm2_state2.mod.py
index 027bedcb4..027bedcb4 100644..100755
--- a/tests/nnapi/specs/V1_0/lstm2_state2.mod.py
+++ b/tests/nnapi/specs/V1_0/lstm2_state2.mod.py
diff --git a/tests/nnapi/specs/V1_0/lstm3.mod.py b/tests/nnapi/specs/V1_0/lstm3.mod.py
index a9d2bf884..a9d2bf884 100644..100755
--- a/tests/nnapi/specs/V1_0/lstm3.mod.py
+++ b/tests/nnapi/specs/V1_0/lstm3.mod.py
diff --git a/tests/nnapi/specs/V1_0/lstm3_state.mod.py b/tests/nnapi/specs/V1_0/lstm3_state.mod.py
index 1fd91a91e..1fd91a91e 100644..100755
--- a/tests/nnapi/specs/V1_0/lstm3_state.mod.py
+++ b/tests/nnapi/specs/V1_0/lstm3_state.mod.py
diff --git a/tests/nnapi/specs/V1_0/lstm3_state2.mod.py b/tests/nnapi/specs/V1_0/lstm3_state2.mod.py
index bc6ae7e86..bc6ae7e86 100644..100755
--- a/tests/nnapi/specs/V1_0/lstm3_state2.mod.py
+++ b/tests/nnapi/specs/V1_0/lstm3_state2.mod.py
diff --git a/tests/nnapi/specs/V1_0/lstm3_state3.mod.py b/tests/nnapi/specs/V1_0/lstm3_state3.mod.py
index 40262c251..40262c251 100644..100755
--- a/tests/nnapi/specs/V1_0/lstm3_state3.mod.py
+++ b/tests/nnapi/specs/V1_0/lstm3_state3.mod.py
diff --git a/tests/nnapi/specs/V1_0/lstm_state.mod.py b/tests/nnapi/specs/V1_0/lstm_state.mod.py
index a242b7973..a242b7973 100644..100755
--- a/tests/nnapi/specs/V1_0/lstm_state.mod.py
+++ b/tests/nnapi/specs/V1_0/lstm_state.mod.py
diff --git a/tests/nnapi/specs/V1_0/lstm_state2.mod.py b/tests/nnapi/specs/V1_0/lstm_state2.mod.py
index 64cb2d29a..64cb2d29a 100644..100755
--- a/tests/nnapi/specs/V1_0/lstm_state2.mod.py
+++ b/tests/nnapi/specs/V1_0/lstm_state2.mod.py
diff --git a/tests/nnapi/specs/V1_0/max_pool_float_1.mod.py b/tests/nnapi/specs/V1_0/max_pool_float_1.mod.py
index feb99cbb4..feb99cbb4 100644..100755
--- a/tests/nnapi/specs/V1_0/max_pool_float_1.mod.py
+++ b/tests/nnapi/specs/V1_0/max_pool_float_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/max_pool_float_2.mod.py b/tests/nnapi/specs/V1_0/max_pool_float_2.mod.py
index b65664e14..b65664e14 100644..100755
--- a/tests/nnapi/specs/V1_0/max_pool_float_2.mod.py
+++ b/tests/nnapi/specs/V1_0/max_pool_float_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/max_pool_float_3.mod.py b/tests/nnapi/specs/V1_0/max_pool_float_3.mod.py
index 0173776bf..0173776bf 100644..100755
--- a/tests/nnapi/specs/V1_0/max_pool_float_3.mod.py
+++ b/tests/nnapi/specs/V1_0/max_pool_float_3.mod.py
diff --git a/tests/nnapi/specs/V1_0/max_pool_float_4.mod.py b/tests/nnapi/specs/V1_0/max_pool_float_4.mod.py
index 76ec10df4..76ec10df4 100644..100755
--- a/tests/nnapi/specs/V1_0/max_pool_float_4.mod.py
+++ b/tests/nnapi/specs/V1_0/max_pool_float_4.mod.py
diff --git a/tests/nnapi/specs/V1_0/max_pool_quant8_1.mod.py b/tests/nnapi/specs/V1_0/max_pool_quant8_1.mod.py
index db4fadb2c..db4fadb2c 100644..100755
--- a/tests/nnapi/specs/V1_0/max_pool_quant8_1.mod.py
+++ b/tests/nnapi/specs/V1_0/max_pool_quant8_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/max_pool_quant8_2.mod.py b/tests/nnapi/specs/V1_0/max_pool_quant8_2.mod.py
index aeb777ac3..aeb777ac3 100644..100755
--- a/tests/nnapi/specs/V1_0/max_pool_quant8_2.mod.py
+++ b/tests/nnapi/specs/V1_0/max_pool_quant8_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/max_pool_quant8_3.mod.py b/tests/nnapi/specs/V1_0/max_pool_quant8_3.mod.py
index e2270aaa6..e2270aaa6 100644..100755
--- a/tests/nnapi/specs/V1_0/max_pool_quant8_3.mod.py
+++ b/tests/nnapi/specs/V1_0/max_pool_quant8_3.mod.py
diff --git a/tests/nnapi/specs/V1_0/max_pool_quant8_4.mod.py b/tests/nnapi/specs/V1_0/max_pool_quant8_4.mod.py
index 1fbcf53ba..1fbcf53ba 100644..100755
--- a/tests/nnapi/specs/V1_0/max_pool_quant8_4.mod.py
+++ b/tests/nnapi/specs/V1_0/max_pool_quant8_4.mod.py
diff --git a/tests/nnapi/specs/V1_0/mul.mod.py b/tests/nnapi/specs/V1_0/mul.mod.py
index d5ea1e036..d5ea1e036 100644..100755
--- a/tests/nnapi/specs/V1_0/mul.mod.py
+++ b/tests/nnapi/specs/V1_0/mul.mod.py
diff --git a/tests/nnapi/specs/V1_0/mul_4D_nnfw.mod.py b/tests/nnapi/specs/V1_0/mul_4D_nnfw.mod.py
index ae01d989f..ae01d989f 100644..100755
--- a/tests/nnapi/specs/V1_0/mul_4D_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_0/mul_4D_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_0/mul_broadcast_3D_1D_1_nnfw.mod.py b/tests/nnapi/specs/V1_0/mul_broadcast_3D_1D_1_nnfw.mod.py
index 49729eb8c..49729eb8c 100644..100755
--- a/tests/nnapi/specs/V1_0/mul_broadcast_3D_1D_1_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_0/mul_broadcast_3D_1D_1_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_0/mul_broadcast_3D_1D_2_nnfw.mod.py b/tests/nnapi/specs/V1_0/mul_broadcast_3D_1D_2_nnfw.mod.py
index a24ad5889..a24ad5889 100644..100755
--- a/tests/nnapi/specs/V1_0/mul_broadcast_3D_1D_2_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_0/mul_broadcast_3D_1D_2_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_0/mul_broadcast_quant8.mod.py b/tests/nnapi/specs/V1_0/mul_broadcast_quant8.mod.py
index b1b620864..b1b620864 100644..100755
--- a/tests/nnapi/specs/V1_0/mul_broadcast_quant8.mod.py
+++ b/tests/nnapi/specs/V1_0/mul_broadcast_quant8.mod.py
diff --git a/tests/nnapi/specs/V1_0/mul_float_square_nnfw.mod.py b/tests/nnapi/specs/V1_0/mul_float_square_nnfw.mod.py
index 2945e5bdc..2945e5bdc 100644..100755
--- a/tests/nnapi/specs/V1_0/mul_float_square_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_0/mul_float_square_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_0/mul_quant8.mod.py b/tests/nnapi/specs/V1_0/mul_quant8.mod.py
index c9df6e265..c9df6e265 100644..100755
--- a/tests/nnapi/specs/V1_0/mul_quant8.mod.py
+++ b/tests/nnapi/specs/V1_0/mul_quant8.mod.py
diff --git a/tests/nnapi/specs/V1_0/mul_relu.mod.py b/tests/nnapi/specs/V1_0/mul_relu.mod.py
index 9ee2e714a..9ee2e714a 100644..100755
--- a/tests/nnapi/specs/V1_0/mul_relu.mod.py
+++ b/tests/nnapi/specs/V1_0/mul_relu.mod.py
diff --git a/tests/nnapi/specs/V1_0/relu1_float_1.mod.py b/tests/nnapi/specs/V1_0/relu1_float_1.mod.py
index 83505645d..83505645d 100644..100755
--- a/tests/nnapi/specs/V1_0/relu1_float_1.mod.py
+++ b/tests/nnapi/specs/V1_0/relu1_float_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/relu1_float_2.mod.py b/tests/nnapi/specs/V1_0/relu1_float_2.mod.py
index 29b698fbb..29b698fbb 100644..100755
--- a/tests/nnapi/specs/V1_0/relu1_float_2.mod.py
+++ b/tests/nnapi/specs/V1_0/relu1_float_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/relu1_quant8_1.mod.py b/tests/nnapi/specs/V1_0/relu1_quant8_1.mod.py
index d98101257..d98101257 100644..100755
--- a/tests/nnapi/specs/V1_0/relu1_quant8_1.mod.py
+++ b/tests/nnapi/specs/V1_0/relu1_quant8_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/relu1_quant8_2.mod.py b/tests/nnapi/specs/V1_0/relu1_quant8_2.mod.py
index f4804101a..f4804101a 100644..100755
--- a/tests/nnapi/specs/V1_0/relu1_quant8_2.mod.py
+++ b/tests/nnapi/specs/V1_0/relu1_quant8_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/relu6_float_1.mod.py b/tests/nnapi/specs/V1_0/relu6_float_1.mod.py
index 2b10552f2..2b10552f2 100644..100755
--- a/tests/nnapi/specs/V1_0/relu6_float_1.mod.py
+++ b/tests/nnapi/specs/V1_0/relu6_float_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/relu6_float_2.mod.py b/tests/nnapi/specs/V1_0/relu6_float_2.mod.py
index c068eab3c..c068eab3c 100644..100755
--- a/tests/nnapi/specs/V1_0/relu6_float_2.mod.py
+++ b/tests/nnapi/specs/V1_0/relu6_float_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/relu6_quant8_1.mod.py b/tests/nnapi/specs/V1_0/relu6_quant8_1.mod.py
index a5a6d9a10..a5a6d9a10 100644..100755
--- a/tests/nnapi/specs/V1_0/relu6_quant8_1.mod.py
+++ b/tests/nnapi/specs/V1_0/relu6_quant8_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/relu6_quant8_2.mod.py b/tests/nnapi/specs/V1_0/relu6_quant8_2.mod.py
index 41dd148b8..41dd148b8 100644..100755
--- a/tests/nnapi/specs/V1_0/relu6_quant8_2.mod.py
+++ b/tests/nnapi/specs/V1_0/relu6_quant8_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/relu_float_1.mod.py b/tests/nnapi/specs/V1_0/relu_float_1.mod.py
index d24ee40f7..d24ee40f7 100644..100755
--- a/tests/nnapi/specs/V1_0/relu_float_1.mod.py
+++ b/tests/nnapi/specs/V1_0/relu_float_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/relu_float_2.mod.py b/tests/nnapi/specs/V1_0/relu_float_2.mod.py
index daa46a032..daa46a032 100644..100755
--- a/tests/nnapi/specs/V1_0/relu_float_2.mod.py
+++ b/tests/nnapi/specs/V1_0/relu_float_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/relu_quant8_1.mod.py b/tests/nnapi/specs/V1_0/relu_quant8_1.mod.py
index acaf87d2e..acaf87d2e 100644..100755
--- a/tests/nnapi/specs/V1_0/relu_quant8_1.mod.py
+++ b/tests/nnapi/specs/V1_0/relu_quant8_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/relu_quant8_2.mod.py b/tests/nnapi/specs/V1_0/relu_quant8_2.mod.py
index f90aa0cb6..f90aa0cb6 100644..100755
--- a/tests/nnapi/specs/V1_0/relu_quant8_2.mod.py
+++ b/tests/nnapi/specs/V1_0/relu_quant8_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/reshape.mod.py b/tests/nnapi/specs/V1_0/reshape.mod.py
index 2a6cfc976..2a6cfc976 100644..100755
--- a/tests/nnapi/specs/V1_0/reshape.mod.py
+++ b/tests/nnapi/specs/V1_0/reshape.mod.py
diff --git a/tests/nnapi/specs/V1_0/reshape_quant8.mod.py b/tests/nnapi/specs/V1_0/reshape_quant8.mod.py
index a9586418a..a9586418a 100644..100755
--- a/tests/nnapi/specs/V1_0/reshape_quant8.mod.py
+++ b/tests/nnapi/specs/V1_0/reshape_quant8.mod.py
diff --git a/tests/nnapi/specs/V1_0/reshape_quant8_weights_as_inputs.mod.py b/tests/nnapi/specs/V1_0/reshape_quant8_weights_as_inputs.mod.py
index f1f6814ea..f1f6814ea 100644..100755
--- a/tests/nnapi/specs/V1_0/reshape_quant8_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/V1_0/reshape_quant8_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/V1_0/reshape_weights_as_inputs.mod.py b/tests/nnapi/specs/V1_0/reshape_weights_as_inputs.mod.py
index c64ac5798..c64ac5798 100644..100755
--- a/tests/nnapi/specs/V1_0/reshape_weights_as_inputs.mod.py
+++ b/tests/nnapi/specs/V1_0/reshape_weights_as_inputs.mod.py
diff --git a/tests/nnapi/specs/V1_0/resize_bilinear.mod.py b/tests/nnapi/specs/V1_0/resize_bilinear.mod.py
index 17fb34007..17fb34007 100644..100755
--- a/tests/nnapi/specs/V1_0/resize_bilinear.mod.py
+++ b/tests/nnapi/specs/V1_0/resize_bilinear.mod.py
diff --git a/tests/nnapi/specs/V1_0/resize_bilinear_2.mod.py b/tests/nnapi/specs/V1_0/resize_bilinear_2.mod.py
index 4035b4371..4035b4371 100644..100755
--- a/tests/nnapi/specs/V1_0/resize_bilinear_2.mod.py
+++ b/tests/nnapi/specs/V1_0/resize_bilinear_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/rnn.mod.py b/tests/nnapi/specs/V1_0/rnn.mod.py
index 7d34cbcbf..7d34cbcbf 100644..100755
--- a/tests/nnapi/specs/V1_0/rnn.mod.py
+++ b/tests/nnapi/specs/V1_0/rnn.mod.py
diff --git a/tests/nnapi/specs/V1_0/rnn_state.mod.py b/tests/nnapi/specs/V1_0/rnn_state.mod.py
index 49a73b737..49a73b737 100644..100755
--- a/tests/nnapi/specs/V1_0/rnn_state.mod.py
+++ b/tests/nnapi/specs/V1_0/rnn_state.mod.py
diff --git a/tests/nnapi/specs/V1_0/softmax_float_1.mod.py b/tests/nnapi/specs/V1_0/softmax_float_1.mod.py
index 9f62bad96..9f62bad96 100644..100755
--- a/tests/nnapi/specs/V1_0/softmax_float_1.mod.py
+++ b/tests/nnapi/specs/V1_0/softmax_float_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/softmax_float_2.mod.py b/tests/nnapi/specs/V1_0/softmax_float_2.mod.py
index c63fa56bf..c63fa56bf 100644..100755
--- a/tests/nnapi/specs/V1_0/softmax_float_2.mod.py
+++ b/tests/nnapi/specs/V1_0/softmax_float_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/softmax_quant8_1.mod.py b/tests/nnapi/specs/V1_0/softmax_quant8_1.mod.py
index 4fd50de91..4fd50de91 100644..100755
--- a/tests/nnapi/specs/V1_0/softmax_quant8_1.mod.py
+++ b/tests/nnapi/specs/V1_0/softmax_quant8_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/softmax_quant8_2.mod.py b/tests/nnapi/specs/V1_0/softmax_quant8_2.mod.py
index e4caa0055..e4caa0055 100644..100755
--- a/tests/nnapi/specs/V1_0/softmax_quant8_2.mod.py
+++ b/tests/nnapi/specs/V1_0/softmax_quant8_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/space_to_depth_float_1.mod.py b/tests/nnapi/specs/V1_0/space_to_depth_float_1.mod.py
index d11d8de9c..d11d8de9c 100644..100755
--- a/tests/nnapi/specs/V1_0/space_to_depth_float_1.mod.py
+++ b/tests/nnapi/specs/V1_0/space_to_depth_float_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/space_to_depth_float_2.mod.py b/tests/nnapi/specs/V1_0/space_to_depth_float_2.mod.py
index df557f6dc..df557f6dc 100644..100755
--- a/tests/nnapi/specs/V1_0/space_to_depth_float_2.mod.py
+++ b/tests/nnapi/specs/V1_0/space_to_depth_float_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/space_to_depth_float_3.mod.py b/tests/nnapi/specs/V1_0/space_to_depth_float_3.mod.py
index e5298dff1..e5298dff1 100644..100755
--- a/tests/nnapi/specs/V1_0/space_to_depth_float_3.mod.py
+++ b/tests/nnapi/specs/V1_0/space_to_depth_float_3.mod.py
diff --git a/tests/nnapi/specs/V1_0/space_to_depth_quant8_1.mod.py b/tests/nnapi/specs/V1_0/space_to_depth_quant8_1.mod.py
index 844a77f80..844a77f80 100644..100755
--- a/tests/nnapi/specs/V1_0/space_to_depth_quant8_1.mod.py
+++ b/tests/nnapi/specs/V1_0/space_to_depth_quant8_1.mod.py
diff --git a/tests/nnapi/specs/V1_0/space_to_depth_quant8_2.mod.py b/tests/nnapi/specs/V1_0/space_to_depth_quant8_2.mod.py
index b8f0d5f81..b8f0d5f81 100644..100755
--- a/tests/nnapi/specs/V1_0/space_to_depth_quant8_2.mod.py
+++ b/tests/nnapi/specs/V1_0/space_to_depth_quant8_2.mod.py
diff --git a/tests/nnapi/specs/V1_0/svdf.mod.py b/tests/nnapi/specs/V1_0/svdf.mod.py
index 4f3e42c20..4f3e42c20 100644..100755
--- a/tests/nnapi/specs/V1_0/svdf.mod.py
+++ b/tests/nnapi/specs/V1_0/svdf.mod.py
diff --git a/tests/nnapi/specs/V1_0/svdf2.mod.py b/tests/nnapi/specs/V1_0/svdf2.mod.py
index c34926bc3..c34926bc3 100644..100755
--- a/tests/nnapi/specs/V1_0/svdf2.mod.py
+++ b/tests/nnapi/specs/V1_0/svdf2.mod.py
diff --git a/tests/nnapi/specs/V1_0/svdf_bias_present.mod.py b/tests/nnapi/specs/V1_0/svdf_bias_present.mod.py
index ae7d1e7b8..ae7d1e7b8 100644..100755
--- a/tests/nnapi/specs/V1_0/svdf_bias_present.mod.py
+++ b/tests/nnapi/specs/V1_0/svdf_bias_present.mod.py
diff --git a/tests/nnapi/specs/V1_0/svdf_state.mod.py b/tests/nnapi/specs/V1_0/svdf_state.mod.py
index dc3e4f420..dc3e4f420 100644..100755
--- a/tests/nnapi/specs/V1_0/svdf_state.mod.py
+++ b/tests/nnapi/specs/V1_0/svdf_state.mod.py
diff --git a/tests/nnapi/specs/V1_0/tanh_.mod.py b/tests/nnapi/specs/V1_0/tanh_.mod.py
index daff31687..daff31687 100644..100755
--- a/tests/nnapi/specs/V1_0/tanh_.mod.py
+++ b/tests/nnapi/specs/V1_0/tanh_.mod.py
diff --git a/tests/nnapi/specs/V1_1/batch_to_space.mod.py b/tests/nnapi/specs/V1_1/batch_to_space.mod.py
index bf8f56ac5..bf8f56ac5 100644..100755
--- a/tests/nnapi/specs/V1_1/batch_to_space.mod.py
+++ b/tests/nnapi/specs/V1_1/batch_to_space.mod.py
diff --git a/tests/nnapi/specs/V1_1/batch_to_space_float_1.mod.py b/tests/nnapi/specs/V1_1/batch_to_space_float_1.mod.py
index 019242a68..0621e3f02 100644..100755
--- a/tests/nnapi/specs/V1_1/batch_to_space_float_1.mod.py
+++ b/tests/nnapi/specs/V1_1/batch_to_space_float_1.mod.py
@@ -13,4 +13,4 @@ output0 = {output: # output 0
[1, 5, 2, 6, 9, 13, 10, 14, 3, 7, 4, 8, 11, 15, 12, 16]}
# Instantiate an example
-Example((input0, output0)) \ No newline at end of file
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_1/batch_to_space_quant8_1.mod.py b/tests/nnapi/specs/V1_1/batch_to_space_quant8_1.mod.py
index 8c6a72793..9f31489aa 100644..100755
--- a/tests/nnapi/specs/V1_1/batch_to_space_quant8_1.mod.py
+++ b/tests/nnapi/specs/V1_1/batch_to_space_quant8_1.mod.py
@@ -13,4 +13,4 @@ output0 = {output: # output 0
[1, 5, 2, 6, 9, 13, 10, 14, 3, 7, 4, 8, 11, 15, 12, 16]}
# Instantiate an example
-Example((input0, output0)) \ No newline at end of file
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_1/div_.mod.py b/tests/nnapi/specs/V1_1/div_.mod.py
index e012f79f0..e012f79f0 100644..100755
--- a/tests/nnapi/specs/V1_1/div_.mod.py
+++ b/tests/nnapi/specs/V1_1/div_.mod.py
diff --git a/tests/nnapi/specs/V1_1/div_broadcast_float.mod.py b/tests/nnapi/specs/V1_1/div_broadcast_float.mod.py
index d4e0ea91d..d4e0ea91d 100644..100755
--- a/tests/nnapi/specs/V1_1/div_broadcast_float.mod.py
+++ b/tests/nnapi/specs/V1_1/div_broadcast_float.mod.py
diff --git a/tests/nnapi/specs/V1_1/div_broadcast_float_4D_2D_nnfw.mod.py b/tests/nnapi/specs/V1_1/div_broadcast_float_4D_2D_nnfw.mod.py
index 08d995ba4..08d995ba4 100644..100755
--- a/tests/nnapi/specs/V1_1/div_broadcast_float_4D_2D_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_1/div_broadcast_float_4D_2D_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_1/fully_connected_float_4d_simple.mod.py b/tests/nnapi/specs/V1_1/fully_connected_float_4d_simple.mod.py
index 16460ee17..16460ee17 100644..100755
--- a/tests/nnapi/specs/V1_1/fully_connected_float_4d_simple.mod.py
+++ b/tests/nnapi/specs/V1_1/fully_connected_float_4d_simple.mod.py
diff --git a/tests/nnapi/specs/V1_1/mean.mod.py b/tests/nnapi/specs/V1_1/mean.mod.py
index 28bd6af03..28bd6af03 100644..100755
--- a/tests/nnapi/specs/V1_1/mean.mod.py
+++ b/tests/nnapi/specs/V1_1/mean.mod.py
diff --git a/tests/nnapi/specs/V1_1/mean_4D_float_reducing_C_nnfw.mod.py b/tests/nnapi/specs/V1_1/mean_4D_float_reducing_C_nnfw.mod.py
index 37bc383d8..37bc383d8 100644..100755
--- a/tests/nnapi/specs/V1_1/mean_4D_float_reducing_C_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_1/mean_4D_float_reducing_C_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_1/mean_4D_float_reducing_HW_nnfw.mod.py b/tests/nnapi/specs/V1_1/mean_4D_float_reducing_HW_nnfw.mod.py
index a778a657a..a778a657a 100644..100755
--- a/tests/nnapi/specs/V1_1/mean_4D_float_reducing_HW_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_1/mean_4D_float_reducing_HW_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_1/mean_axis01_1_nnfw.mod.py b/tests/nnapi/specs/V1_1/mean_axis01_1_nnfw.mod.py
index 00b63a59c..00b63a59c 100644..100755
--- a/tests/nnapi/specs/V1_1/mean_axis01_1_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_1/mean_axis01_1_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_1/mean_axis01_2_nnfw.mod.py b/tests/nnapi/specs/V1_1/mean_axis01_2_nnfw.mod.py
index 1f0eb9c00..1f0eb9c00 100644..100755
--- a/tests/nnapi/specs/V1_1/mean_axis01_2_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_1/mean_axis01_2_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_1/mean_float_1.mod.py b/tests/nnapi/specs/V1_1/mean_float_1.mod.py
index 5fde65d40..5fde65d40 100644..100755
--- a/tests/nnapi/specs/V1_1/mean_float_1.mod.py
+++ b/tests/nnapi/specs/V1_1/mean_float_1.mod.py
diff --git a/tests/nnapi/specs/V1_1/mean_float_2.mod.py b/tests/nnapi/specs/V1_1/mean_float_2.mod.py
index 4b71d472a..4b71d472a 100644..100755
--- a/tests/nnapi/specs/V1_1/mean_float_2.mod.py
+++ b/tests/nnapi/specs/V1_1/mean_float_2.mod.py
diff --git a/tests/nnapi/specs/V1_1/mean_quant8_1.mod.py b/tests/nnapi/specs/V1_1/mean_quant8_1.mod.py
index 666b0c28f..666b0c28f 100644..100755
--- a/tests/nnapi/specs/V1_1/mean_quant8_1.mod.py
+++ b/tests/nnapi/specs/V1_1/mean_quant8_1.mod.py
diff --git a/tests/nnapi/specs/V1_1/mean_quant8_2.mod.py b/tests/nnapi/specs/V1_1/mean_quant8_2.mod.py
index 23fd87c63..23fd87c63 100644..100755
--- a/tests/nnapi/specs/V1_1/mean_quant8_2.mod.py
+++ b/tests/nnapi/specs/V1_1/mean_quant8_2.mod.py
diff --git a/tests/nnapi/specs/V1_1/pad.mod.py b/tests/nnapi/specs/V1_1/pad.mod.py
index 54a5a469d..54a5a469d 100644..100755
--- a/tests/nnapi/specs/V1_1/pad.mod.py
+++ b/tests/nnapi/specs/V1_1/pad.mod.py
diff --git a/tests/nnapi/specs/V1_1/pad_2D_HW_nnfw.mod.py b/tests/nnapi/specs/V1_1/pad_2D_HW_nnfw.mod.py
index 8c8e64ad6..8c8e64ad6 100644..100755
--- a/tests/nnapi/specs/V1_1/pad_2D_HW_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_1/pad_2D_HW_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_1/pad_3D_HWC_nnfw.mod.py b/tests/nnapi/specs/V1_1/pad_3D_HWC_nnfw.mod.py
index d98ec6c26..d98ec6c26 100644..100755
--- a/tests/nnapi/specs/V1_1/pad_3D_HWC_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_1/pad_3D_HWC_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_1/pad_BHWC_nnfw.mod.py b/tests/nnapi/specs/V1_1/pad_BHWC_nnfw.mod.py
index 2495abbb5..2495abbb5 100644..100755
--- a/tests/nnapi/specs/V1_1/pad_BHWC_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_1/pad_BHWC_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_1/pad_BHW_nnfw.mod.py b/tests/nnapi/specs/V1_1/pad_BHW_nnfw.mod.py
index 91a21852a..91a21852a 100644..100755
--- a/tests/nnapi/specs/V1_1/pad_BHW_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_1/pad_BHW_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_1/pad_HWD_nnfw.mod.py b/tests/nnapi/specs/V1_1/pad_HWD_nnfw.mod.py
index a021b3b59..a021b3b59 100644..100755
--- a/tests/nnapi/specs/V1_1/pad_HWD_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_1/pad_HWD_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_1/pad_float_1.mod.py b/tests/nnapi/specs/V1_1/pad_float_1.mod.py
index 081712769..081712769 100644..100755
--- a/tests/nnapi/specs/V1_1/pad_float_1.mod.py
+++ b/tests/nnapi/specs/V1_1/pad_float_1.mod.py
diff --git a/tests/nnapi/specs/V1_1/pad_quant8_nnfw.mod.py b/tests/nnapi/specs/V1_1/pad_quant8_nnfw.mod.py
index c23ab7de0..c23ab7de0 100644..100755
--- a/tests/nnapi/specs/V1_1/pad_quant8_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_1/pad_quant8_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_1/space_to_batch.mod.py b/tests/nnapi/specs/V1_1/space_to_batch.mod.py
index 8c10231f0..8c10231f0 100644..100755
--- a/tests/nnapi/specs/V1_1/space_to_batch.mod.py
+++ b/tests/nnapi/specs/V1_1/space_to_batch.mod.py
diff --git a/tests/nnapi/specs/V1_1/space_to_batch_float_1.mod.py b/tests/nnapi/specs/V1_1/space_to_batch_float_1.mod.py
index 890ced869..890ced869 100644..100755
--- a/tests/nnapi/specs/V1_1/space_to_batch_float_1.mod.py
+++ b/tests/nnapi/specs/V1_1/space_to_batch_float_1.mod.py
diff --git a/tests/nnapi/specs/V1_1/space_to_batch_float_1_nnfw.mod.py b/tests/nnapi/specs/V1_1/space_to_batch_float_1_nnfw.mod.py
index db0a994eb..db0a994eb 100644..100755
--- a/tests/nnapi/specs/V1_1/space_to_batch_float_1_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_1/space_to_batch_float_1_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_1/space_to_batch_float_2.mod.py b/tests/nnapi/specs/V1_1/space_to_batch_float_2.mod.py
index c6259005a..c6259005a 100644..100755
--- a/tests/nnapi/specs/V1_1/space_to_batch_float_2.mod.py
+++ b/tests/nnapi/specs/V1_1/space_to_batch_float_2.mod.py
diff --git a/tests/nnapi/specs/V1_1/space_to_batch_float_3.mod.py b/tests/nnapi/specs/V1_1/space_to_batch_float_3.mod.py
index 9d7c8b313..9d7c8b313 100644..100755
--- a/tests/nnapi/specs/V1_1/space_to_batch_float_3.mod.py
+++ b/tests/nnapi/specs/V1_1/space_to_batch_float_3.mod.py
diff --git a/tests/nnapi/specs/V1_1/space_to_batch_quant8_1.mod.py b/tests/nnapi/specs/V1_1/space_to_batch_quant8_1.mod.py
index 726250d3f..726250d3f 100644..100755
--- a/tests/nnapi/specs/V1_1/space_to_batch_quant8_1.mod.py
+++ b/tests/nnapi/specs/V1_1/space_to_batch_quant8_1.mod.py
diff --git a/tests/nnapi/specs/V1_1/space_to_batch_quant8_1_nnfw.mod.py b/tests/nnapi/specs/V1_1/space_to_batch_quant8_1_nnfw.mod.py
index b720c7a8c..b720c7a8c 100644..100755
--- a/tests/nnapi/specs/V1_1/space_to_batch_quant8_1_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_1/space_to_batch_quant8_1_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_1/space_to_batch_quant8_2.mod.py b/tests/nnapi/specs/V1_1/space_to_batch_quant8_2.mod.py
index 8adc2623a..8adc2623a 100644..100755
--- a/tests/nnapi/specs/V1_1/space_to_batch_quant8_2.mod.py
+++ b/tests/nnapi/specs/V1_1/space_to_batch_quant8_2.mod.py
diff --git a/tests/nnapi/specs/V1_1/space_to_batch_quant8_2_nnfw.mod.py b/tests/nnapi/specs/V1_1/space_to_batch_quant8_2_nnfw.mod.py
index 47b721a6c..47b721a6c 100644..100755
--- a/tests/nnapi/specs/V1_1/space_to_batch_quant8_2_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_1/space_to_batch_quant8_2_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_1/space_to_batch_quant8_3.mod.py b/tests/nnapi/specs/V1_1/space_to_batch_quant8_3.mod.py
index e9e88bbd6..e9e88bbd6 100644..100755
--- a/tests/nnapi/specs/V1_1/space_to_batch_quant8_3.mod.py
+++ b/tests/nnapi/specs/V1_1/space_to_batch_quant8_3.mod.py
diff --git a/tests/nnapi/specs/V1_1/squeeze.mod.py b/tests/nnapi/specs/V1_1/squeeze.mod.py
index 4bf3189fa..4bf3189fa 100644..100755
--- a/tests/nnapi/specs/V1_1/squeeze.mod.py
+++ b/tests/nnapi/specs/V1_1/squeeze.mod.py
diff --git a/tests/nnapi/specs/V1_1/squeeze_2D_float_1_nnfw.mod.py b/tests/nnapi/specs/V1_1/squeeze_2D_float_1_nnfw.mod.py
index 8397902e3..8397902e3 100644..100755
--- a/tests/nnapi/specs/V1_1/squeeze_2D_float_1_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_1/squeeze_2D_float_1_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_1/squeeze_float_1.mod.py b/tests/nnapi/specs/V1_1/squeeze_float_1.mod.py
index 1a54ae7a1..1a54ae7a1 100644..100755
--- a/tests/nnapi/specs/V1_1/squeeze_float_1.mod.py
+++ b/tests/nnapi/specs/V1_1/squeeze_float_1.mod.py
diff --git a/tests/nnapi/specs/V1_1/squeeze_quant8_1.mod.py b/tests/nnapi/specs/V1_1/squeeze_quant8_1.mod.py
index 5710c1d9a..5710c1d9a 100644..100755
--- a/tests/nnapi/specs/V1_1/squeeze_quant8_1.mod.py
+++ b/tests/nnapi/specs/V1_1/squeeze_quant8_1.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice.mod.py b/tests/nnapi/specs/V1_1/strided_slice.mod.py
index 9bc94d110..9bc94d110 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_float_1.mod.py b/tests/nnapi/specs/V1_1/strided_slice_float_1.mod.py
index 0725cff30..0725cff30 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_float_1.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_float_1.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_float_10.mod.py b/tests/nnapi/specs/V1_1/strided_slice_float_10.mod.py
index 178421f53..178421f53 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_float_10.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_float_10.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_float_11.mod.py b/tests/nnapi/specs/V1_1/strided_slice_float_11.mod.py
index 444ae63b6..444ae63b6 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_float_11.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_float_11.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_float_2.mod.py b/tests/nnapi/specs/V1_1/strided_slice_float_2.mod.py
index 7dd3d8399..7dd3d8399 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_float_2.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_float_2.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_float_3.mod.py b/tests/nnapi/specs/V1_1/strided_slice_float_3.mod.py
index e476bca08..e476bca08 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_float_3.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_float_3.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_float_4.mod.py b/tests/nnapi/specs/V1_1/strided_slice_float_4.mod.py
index 939cc1457..939cc1457 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_float_4.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_float_4.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_float_5.mod.py b/tests/nnapi/specs/V1_1/strided_slice_float_5.mod.py
index db7372708..db7372708 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_float_5.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_float_5.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_float_6.mod.py b/tests/nnapi/specs/V1_1/strided_slice_float_6.mod.py
index c8d42d95a..c8d42d95a 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_float_6.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_float_6.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_float_7.mod.py b/tests/nnapi/specs/V1_1/strided_slice_float_7.mod.py
index 668748a91..668748a91 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_float_7.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_float_7.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_float_8.mod.py b/tests/nnapi/specs/V1_1/strided_slice_float_8.mod.py
index 2c1cc9416..2c1cc9416 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_float_8.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_float_8.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_float_9.mod.py b/tests/nnapi/specs/V1_1/strided_slice_float_9.mod.py
index 4bafd3da6..4bafd3da6 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_float_9.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_float_9.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_qaunt8_10.mod.py b/tests/nnapi/specs/V1_1/strided_slice_qaunt8_10.mod.py
index fc29552ac..fc29552ac 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_qaunt8_10.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_qaunt8_10.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_qaunt8_11.mod.py b/tests/nnapi/specs/V1_1/strided_slice_qaunt8_11.mod.py
index d7374ab29..d7374ab29 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_qaunt8_11.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_qaunt8_11.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_quant8_1.mod.py b/tests/nnapi/specs/V1_1/strided_slice_quant8_1.mod.py
index 4b76de27b..4b76de27b 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_quant8_1.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_quant8_1.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_quant8_2.mod.py b/tests/nnapi/specs/V1_1/strided_slice_quant8_2.mod.py
index d6cd6aa6f..d6cd6aa6f 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_quant8_2.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_quant8_2.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_quant8_3.mod.py b/tests/nnapi/specs/V1_1/strided_slice_quant8_3.mod.py
index 411a6fa88..411a6fa88 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_quant8_3.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_quant8_3.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_quant8_4.mod.py b/tests/nnapi/specs/V1_1/strided_slice_quant8_4.mod.py
index f8a54f29d..f8a54f29d 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_quant8_4.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_quant8_4.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_quant8_5.mod.py b/tests/nnapi/specs/V1_1/strided_slice_quant8_5.mod.py
index 4fa42f5f0..4fa42f5f0 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_quant8_5.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_quant8_5.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_quant8_6.mod.py b/tests/nnapi/specs/V1_1/strided_slice_quant8_6.mod.py
index bcd8841f0..bcd8841f0 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_quant8_6.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_quant8_6.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_quant8_7.mod.py b/tests/nnapi/specs/V1_1/strided_slice_quant8_7.mod.py
index e1ae9db6b..e1ae9db6b 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_quant8_7.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_quant8_7.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_quant8_8.mod.py b/tests/nnapi/specs/V1_1/strided_slice_quant8_8.mod.py
index 6531dd3d4..6531dd3d4 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_quant8_8.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_quant8_8.mod.py
diff --git a/tests/nnapi/specs/V1_1/strided_slice_quant8_9.mod.py b/tests/nnapi/specs/V1_1/strided_slice_quant8_9.mod.py
index 7f066011e..7f066011e 100644..100755
--- a/tests/nnapi/specs/V1_1/strided_slice_quant8_9.mod.py
+++ b/tests/nnapi/specs/V1_1/strided_slice_quant8_9.mod.py
diff --git a/tests/nnapi/specs/V1_1/sub.mod.py b/tests/nnapi/specs/V1_1/sub.mod.py
index 1e4afb205..1e4afb205 100644..100755
--- a/tests/nnapi/specs/V1_1/sub.mod.py
+++ b/tests/nnapi/specs/V1_1/sub.mod.py
diff --git a/tests/nnapi/specs/V1_1/sub_broadcast_4D_2D_float_nnfw.mod.py b/tests/nnapi/specs/V1_1/sub_broadcast_4D_2D_float_nnfw.mod.py
index ede48f083..ede48f083 100644..100755
--- a/tests/nnapi/specs/V1_1/sub_broadcast_4D_2D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_1/sub_broadcast_4D_2D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_1/sub_broadcast_float.mod.py b/tests/nnapi/specs/V1_1/sub_broadcast_float.mod.py
index 53bdf9e86..53bdf9e86 100644..100755
--- a/tests/nnapi/specs/V1_1/sub_broadcast_float.mod.py
+++ b/tests/nnapi/specs/V1_1/sub_broadcast_float.mod.py
diff --git a/tests/nnapi/specs/V1_1/transpose.mod.py b/tests/nnapi/specs/V1_1/transpose.mod.py
index 49f15a7cb..49f15a7cb 100644..100755
--- a/tests/nnapi/specs/V1_1/transpose.mod.py
+++ b/tests/nnapi/specs/V1_1/transpose.mod.py
diff --git a/tests/nnapi/specs/V1_1/transpose_2D_nnfw.mod.py b/tests/nnapi/specs/V1_1/transpose_2D_nnfw.mod.py
index 2397aa0d7..2397aa0d7 100644..100755
--- a/tests/nnapi/specs/V1_1/transpose_2D_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_1/transpose_2D_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_1/transpose_3D_nnfw.mod.py b/tests/nnapi/specs/V1_1/transpose_3D_nnfw.mod.py
index b2939dc41..b2939dc41 100644..100755
--- a/tests/nnapi/specs/V1_1/transpose_3D_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_1/transpose_3D_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_1/transpose_float_1.mod.py b/tests/nnapi/specs/V1_1/transpose_float_1.mod.py
index e8f0ea84b..e8f0ea84b 100644..100755
--- a/tests/nnapi/specs/V1_1/transpose_float_1.mod.py
+++ b/tests/nnapi/specs/V1_1/transpose_float_1.mod.py
diff --git a/tests/nnapi/specs/V1_1/transpose_quant8_1.mod.py b/tests/nnapi/specs/V1_1/transpose_quant8_1.mod.py
index 6893a62e6..6893a62e6 100644..100755
--- a/tests/nnapi/specs/V1_1/transpose_quant8_1.mod.py
+++ b/tests/nnapi/specs/V1_1/transpose_quant8_1.mod.py
diff --git a/tests/nnapi/specs/V1_2/abs_.mod.py b/tests/nnapi/specs/V1_2/abs_.mod.py
index 376769e55..376769e55 100644..100755
--- a/tests/nnapi/specs/V1_2/abs_.mod.py
+++ b/tests/nnapi/specs/V1_2/abs_.mod.py
diff --git a/tests/nnapi/specs/V1_2/abs_1D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/abs_1D_float_nnfw.mod.py
new file mode 100755
index 000000000..6366f6e04
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/abs_1D_float_nnfw.mod.py
@@ -0,0 +1,20 @@
+# model
+model = Model()
+
+i1 = Input("input", "TENSOR_FLOAT32", "{10}")
+i2 = Output("output", "TENSOR_FLOAT32", "{10}")
+model = model.Operation("ABS", i1).To(i2)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [0.778, -0.48, -241, 0.9118, -0.466,
+ -30.29, -0.4951, -0.4460, 0.555,
+ 0.11310]}
+
+output0 = {i2: # output 0
+ [0.778, 0.48, 241, 0.9118, 0.466,
+ 30.29, 0.4951, 0.4460, 0.555,
+ 0.11310]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/abs_2D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/abs_2D_float_nnfw.mod.py
new file mode 100755
index 000000000..901127b8d
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/abs_2D_float_nnfw.mod.py
@@ -0,0 +1,20 @@
+# model
+model = Model()
+
+i1 = Input("input", "TENSOR_FLOAT32", "{5, 2}")
+i2 = Output("output", "TENSOR_FLOAT32", "{5, 2}")
+model = model.Operation("ABS", i1).To(i2)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [0.735078, -0.46738, -241, 0.9118, -0.46686,
+ -3150.219, -0.495291, -0.42874460, 0.5005046655,
+ 0.1131106620]}
+
+output0 = {i2: # output 0
+ [0.735078, 0.46738, 241, 0.9118, 0.46686,
+ 3150.219, 0.495291, 0.42874460, 0.5005046655,
+ 0.1131106620]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/abs_3D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/abs_3D_float_nnfw.mod.py
new file mode 100755
index 000000000..b5ab39482
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/abs_3D_float_nnfw.mod.py
@@ -0,0 +1,20 @@
+# model
+model = Model()
+
+i1 = Input("input", "TENSOR_FLOAT32", "{2, 3, 2}")
+i2 = Output("output", "TENSOR_FLOAT32", "{2, 3, 2}")
+model = model.Operation("ABS", i1).To(i2)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [0.735078, -0.46738, -241, 0.9118, -0.46686,
+ -3150.219, -0.495291, -0.42874460, 0.5005046655,
+ 0.1131106620, -40.0, 15.0]}
+
+output0 = {i2: # output 0
+ [0.735078, 0.46738, 241, 0.9118, 0.46686,
+ 3150.219, 0.495291, 0.42874460, 0.5005046655,
+ 0.1131106620, 40.0, 15.0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/abs_4D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/abs_4D_float_nnfw.mod.py
new file mode 100755
index 000000000..b7e749f5d
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/abs_4D_float_nnfw.mod.py
@@ -0,0 +1,18 @@
+# model
+model = Model()
+
+i1 = Input("input", "TENSOR_FLOAT32", "{2, 2, 2, 2}")
+i2 = Output("output", "TENSOR_FLOAT32", "{2, 2, 2, 2}")
+model = model.Operation("ABS", i1).To(i2)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1.0, 1.99, -1.4, 0.0001, 0.0002, 16.0, 25.0, 100.0,
+ 23.0, 19.0, -40.0, 15.0, 4.0, -43.0, -0.35355339059, 0.35355339059]}
+
+output0 = {i2: # output 0
+ [1.0, 1.99, 1.4, 0.0001, 0.0002, 16.0, 25.0, 100.0,
+ 23.0, 19.0, 40.0, 15.0, 4.0, 43.0, 0.35355339059, 0.35355339059]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/skip/V1_2/argmax_1.mod.py b/tests/nnapi/specs/V1_2/argmax_1.mod.py
index 6dc7430af..6dc7430af 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/argmax_1.mod.py
+++ b/tests/nnapi/specs/V1_2/argmax_1.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/argmax_2.mod.py b/tests/nnapi/specs/V1_2/argmax_2.mod.py
index 69be60740..69be60740 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/argmax_2.mod.py
+++ b/tests/nnapi/specs/V1_2/argmax_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/argmax_3.mod.py b/tests/nnapi/specs/V1_2/argmax_3.mod.py
index ab7afc60a..ab7afc60a 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/argmax_3.mod.py
+++ b/tests/nnapi/specs/V1_2/argmax_3.mod.py
diff --git a/tests/nnapi/specs/V1_2/argmax_float_1_nnfw.mod.py b/tests/nnapi/specs/V1_2/argmax_float_1_nnfw.mod.py
new file mode 100755
index 000000000..b7d4b8a8e
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/argmax_float_1_nnfw.mod.py
@@ -0,0 +1,18 @@
+model = Model()
+i1 = Input("input", "TENSOR_FLOAT32", "{1, 2, 2, 1}")
+axis = Parameter("axis", "TENSOR_INT32", "{1}", [1])
+output = Output("output", "TENSOR_INT32", "{1, 2, 1}")
+
+model = model.Operation("ARGMAX", i1, axis).To(output)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1.0, 4.0,
+ 2.0, 3.0]}
+
+output0 = {output: # output 0
+ [1,
+ 0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/argmax_float_2_nnfw.mod.py b/tests/nnapi/specs/V1_2/argmax_float_2_nnfw.mod.py
new file mode 100755
index 000000000..2e614f7ea
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/argmax_float_2_nnfw.mod.py
@@ -0,0 +1,18 @@
+model = Model()
+i1 = Input("input", "TENSOR_FLOAT32", "{1, 2, 2, 2}")
+axis = Parameter("axis", "TENSOR_INT32", "{1}", [3])
+output = Output("output", "TENSOR_INT32", "{1, 2, 2}")
+
+model = model.Operation("ARGMAX", i1, axis).To(output)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1.0, 2.0, 7.0, 8.0,
+ 1.0, 9.0, 7.0, 3.0]}
+
+output0 = {output: # output 0
+ [1,1,
+ 1,0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/argmax_int32_nnfw.mod.py b/tests/nnapi/specs/V1_2/argmax_int32_nnfw.mod.py
new file mode 100755
index 000000000..fb3151f89
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/argmax_int32_nnfw.mod.py
@@ -0,0 +1,18 @@
+model = Model()
+i1 = Input("input", "TENSOR_INT32", "{1, 2, 2, 1}")
+axis = Parameter("axis", "TENSOR_INT32", "{1}", [1])
+output = Output("output", "TENSOR_INT32", "{1, 2, 1}")
+
+model = model.Operation("ARGMAX", i1, axis).To(output)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1, 4,
+ 2, 3]}
+
+output0 = {output: # output 0
+ [1,
+ 0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/argmax_neg_axis_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/argmax_neg_axis_float_nnfw.mod.py
new file mode 100755
index 000000000..4fc573b31
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/argmax_neg_axis_float_nnfw.mod.py
@@ -0,0 +1,17 @@
+model = Model()
+i1 = Input("input", "TENSOR_FLOAT32", "{1, 2, 4, 1}")
+axis = Parameter("axis", "TENSOR_INT32", "{1}", [-3])
+output = Output("output", "TENSOR_INT32", "{1, 4, 1}")
+
+model = model.Operation("ARGMAX", i1, axis).To(output)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1.0, 2.0, 7.0, 8.0,
+ 1.0, 9.0, 7.0, 3.0]}
+
+output0 = {output: # output 0
+ [0, 1, 0, 0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/argmax_neg_axis_int32_nnfw.mod.py b/tests/nnapi/specs/V1_2/argmax_neg_axis_int32_nnfw.mod.py
new file mode 100755
index 000000000..426a03591
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/argmax_neg_axis_int32_nnfw.mod.py
@@ -0,0 +1,17 @@
+model = Model()
+i1 = Input("input", "TENSOR_INT32", "{1, 2, 4, 1}")
+axis = Parameter("axis", "TENSOR_INT32", "{1}", [-3])
+output = Output("output", "TENSOR_INT32", "{1, 4, 1}")
+
+model = model.Operation("ARGMAX", i1, axis).To(output)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1, 2, 7, 8,
+ 1, 9, 7, 3]}
+
+output0 = {output: # output 0
+ [0, 1, 0, 0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/argmax_quant8_neg_axis_nnfw.mod.py b/tests/nnapi/specs/V1_2/argmax_quant8_neg_axis_nnfw.mod.py
new file mode 100755
index 000000000..a6a1a6500
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/argmax_quant8_neg_axis_nnfw.mod.py
@@ -0,0 +1,17 @@
+model = Model()
+i1 = Input("input", "TENSOR_QUANT8_ASYMM", "{1, 2, 4, 1}, 0.5f, 5")
+axis = Parameter("axis", "TENSOR_INT32", "{1}", [-3])
+output = Output("output", "TENSOR_INT32", "{1, 4, 1}")
+
+model = model.Operation("ARGMAX", i1, axis).To(output)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1, 2, 7, 8,
+ 1, 9, 7, 3]}
+
+output0 = {output: # output 0
+ [0, 1, 0, 0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/argmax_quant8_nnfw.mod.py b/tests/nnapi/specs/V1_2/argmax_quant8_nnfw.mod.py
new file mode 100755
index 000000000..38d6a0b63
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/argmax_quant8_nnfw.mod.py
@@ -0,0 +1,18 @@
+model = Model()
+i1 = Input("input", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.5f, 2")
+axis = Parameter("axis", "TENSOR_INT32", "{1}", [1])
+output = Output("output", "TENSOR_INT32", "{1, 2, 1}")
+
+model = model.Operation("ARGMAX", i1, axis).To(output)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1, 4,
+ 2, 3]}
+
+output0 = {output: # output 0
+ [1,
+ 0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/cast.mod.py b/tests/nnapi/specs/V1_2/cast.mod.py
index f1d93ce7d..f1d93ce7d 100644..100755
--- a/tests/nnapi/specs/V1_2/cast.mod.py
+++ b/tests/nnapi/specs/V1_2/cast.mod.py
diff --git a/tests/nnapi/specs/V1_2/cast_float32_to_int32_nnfw.mod.py b/tests/nnapi/specs/V1_2/cast_float32_to_int32_nnfw.mod.py
index 926508d99..926508d99 100644..100755
--- a/tests/nnapi/specs/V1_2/cast_float32_to_int32_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/cast_float32_to_int32_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/cast_int32_to_float32_nnfw.mod.py b/tests/nnapi/specs/V1_2/cast_int32_to_float32_nnfw.mod.py
index a4f2aeb42..a4f2aeb42 100644..100755
--- a/tests/nnapi/specs/V1_2/cast_int32_to_float32_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/cast_int32_to_float32_nnfw.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/equal.mod.py b/tests/nnapi/specs/V1_2/equal.mod.py
index d7c40fe63..d7c40fe63 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/equal.mod.py
+++ b/tests/nnapi/specs/V1_2/equal.mod.py
diff --git a/tests/nnapi/specs/V1_2/equal_1D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/equal_1D_float_nnfw.mod.py
new file mode 100755
index 000000000..5b79b679a
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/equal_1D_float_nnfw.mod.py
@@ -0,0 +1,18 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{3}") # a vector of input
+i2 = Input("op2", "TENSOR_FLOAT32", "{3}") # a vector of input
+i3 = Output("op3", "TENSOR_BOOL8", "{3}") # a vector of output
+model = model.Operation("EQUAL", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [2.0, 3.254232, 5.1232],
+ i2: # input 1
+ [2.0, 3.254111, 5.1232]}
+
+output0 = {i3: # output 0
+ [True, False, True]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/equal_4D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/equal_4D_float_nnfw.mod.py
new file mode 100755
index 000000000..19925506b
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/equal_4D_float_nnfw.mod.py
@@ -0,0 +1,18 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{1, 2, 2, 1}") # a vector of input
+i2 = Input("op2", "TENSOR_FLOAT32", "{1, 2, 2, 1}") # a vector of input
+i3 = Output("op3", "TENSOR_BOOL8", "{1, 2, 2, 1}") # a vector of output
+model = model.Operation("EQUAL", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [0, 1543.25454532, 5.1232, 10.1],
+ i2: # input 1
+ [0, 5313.25414521, 5.1, 10.1]}
+
+output0 = {i3: # output 0
+ [True, False, False, True]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/equal_broadcast_4D_2D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/equal_broadcast_4D_2D_float_nnfw.mod.py
new file mode 100755
index 000000000..5d07548d7
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/equal_broadcast_4D_2D_float_nnfw.mod.py
@@ -0,0 +1,30 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{3, 2, 2, 2}")
+i2 = Input("op2", "TENSOR_FLOAT32", "{2, 2}")
+
+i3 = Output("op3", "TENSOR_BOOL8", "{3, 2, 2, 2}")
+model = model.Operation("EQUAL", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [4.89, 11.0, 9.75, 10.20,
+ 8.25, 2.0, 1.15, 0.0,
+ 3.0, 1.0, 8.25, 6.0,
+ 8.45, 3.0, 8.25, 1.2,
+ 0.0, 3.0, 2.0, 7.34,
+ 4.3, 9.56, 11.0, 3.0],
+ i2: # input 1
+ [8.25, 3.0, 2.0, 10.20]}
+
+output0 = {i3: # output 0
+ [False, False, False, True,
+ True, False, False, False,
+ False, False, False, False,
+ False, True, False, False,
+ False, True, True, False,
+ False, False, False, False]
+ }
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/equal_ex_broadcast_float.mod.py b/tests/nnapi/specs/V1_2/equal_broadcast_float_nnfw.mod.py
index 279c000ba..279c000ba 100644..100755
--- a/tests/nnapi/specs/Ex/equal_ex_broadcast_float.mod.py
+++ b/tests/nnapi/specs/V1_2/equal_broadcast_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/equal_quant8_nnfw.mod.py b/tests/nnapi/specs/V1_2/equal_quant8_nnfw.mod.py
new file mode 100755
index 000000000..8e4e0f23c
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/equal_quant8_nnfw.mod.py
@@ -0,0 +1,18 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{3}, 1.f, 0") # a vector of input
+i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{3}, 1.f, 0") # a vector of input
+i3 = Output("op3", "TENSOR_BOOL8", "{3}") # a vector of output
+model = model.Operation("EQUAL", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [2, 3, 0],
+ i2: # input 1
+ [2, 9, 0]}
+
+output0 = {i3: # output 0
+ [True, False, True]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/exp_.mod.py b/tests/nnapi/specs/V1_2/exp_.mod.py
index 135f45125..135f45125 100644..100755
--- a/tests/nnapi/specs/V1_2/exp_.mod.py
+++ b/tests/nnapi/specs/V1_2/exp_.mod.py
diff --git a/tests/nnapi/specs/V1_2/exp_1D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/exp_1D_float_nnfw.mod.py
index 8258970f0..8258970f0 100644..100755
--- a/tests/nnapi/specs/V1_2/exp_1D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/exp_1D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/exp_2D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/exp_2D_float_nnfw.mod.py
index 4cdb7b7e5..4cdb7b7e5 100644..100755
--- a/tests/nnapi/specs/V1_2/exp_2D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/exp_2D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/exp_3D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/exp_3D_float_nnfw.mod.py
index 9ed45c7c1..9ed45c7c1 100644..100755
--- a/tests/nnapi/specs/V1_2/exp_3D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/exp_3D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/exp_4D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/exp_4D_float_nnfw.mod.py
index 664336270..664336270 100644..100755
--- a/tests/nnapi/specs/V1_2/exp_4D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/exp_4D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/gather.mod.py b/tests/nnapi/specs/V1_2/gather.mod.py
index d5e1ef73d..d5e1ef73d 100644..100755
--- a/tests/nnapi/specs/V1_2/gather.mod.py
+++ b/tests/nnapi/specs/V1_2/gather.mod.py
diff --git a/tests/nnapi/specs/V1_2/gather_1D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/gather_1D_float_nnfw.mod.py
index 4596467d8..4596467d8 100644..100755
--- a/tests/nnapi/specs/V1_2/gather_1D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/gather_1D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/gather_1D_int32_nnfw.mod.py b/tests/nnapi/specs/V1_2/gather_1D_int32_nnfw.mod.py
index 8fe961bbc..8fe961bbc 100644..100755
--- a/tests/nnapi/specs/V1_2/gather_1D_int32_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/gather_1D_int32_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/gather_1D_quant8_nnfw.mod.py b/tests/nnapi/specs/V1_2/gather_1D_quant8_nnfw.mod.py
index 7699d6c50..7699d6c50 100644..100755
--- a/tests/nnapi/specs/V1_2/gather_1D_quant8_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/gather_1D_quant8_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/gather_2D_2D_float_1_nnfw.mod.py b/tests/nnapi/specs/V1_2/gather_2D_2D_float_1_nnfw.mod.py
index 13be0df06..13be0df06 100644..100755
--- a/tests/nnapi/specs/V1_2/gather_2D_2D_float_1_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/gather_2D_2D_float_1_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/gather_2D_2D_float_2_nnfw.mod.py b/tests/nnapi/specs/V1_2/gather_2D_2D_float_2_nnfw.mod.py
index 4903c97ef..4903c97ef 100644..100755
--- a/tests/nnapi/specs/V1_2/gather_2D_2D_float_2_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/gather_2D_2D_float_2_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/gather_2D_3D_float_1_nnfw.mod.py b/tests/nnapi/specs/V1_2/gather_2D_3D_float_1_nnfw.mod.py
index f4c81cd27..f4c81cd27 100644..100755
--- a/tests/nnapi/specs/V1_2/gather_2D_3D_float_1_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/gather_2D_3D_float_1_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/gather_2D_3D_float_2_nnfw.mod.py b/tests/nnapi/specs/V1_2/gather_2D_3D_float_2_nnfw.mod.py
index eb1cbcbf9..eb1cbcbf9 100644..100755
--- a/tests/nnapi/specs/V1_2/gather_2D_3D_float_2_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/gather_2D_3D_float_2_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/gather_2D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/gather_2D_float_nnfw.mod.py
index 5d35080ed..5d35080ed 100644..100755
--- a/tests/nnapi/specs/V1_2/gather_2D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/gather_2D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/gather_2D_int32_nnfw.mod.py b/tests/nnapi/specs/V1_2/gather_2D_int32_nnfw.mod.py
index 7a5d7526e..7a5d7526e 100644..100755
--- a/tests/nnapi/specs/V1_2/gather_2D_int32_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/gather_2D_int32_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/gather_2D_quant8_nnfw.mod.py b/tests/nnapi/specs/V1_2/gather_2D_quant8_nnfw.mod.py
index c777d34b9..c777d34b9 100644..100755
--- a/tests/nnapi/specs/V1_2/gather_2D_quant8_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/gather_2D_quant8_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/gather_3D_2D_float_1_nnfw.mod.py b/tests/nnapi/specs/V1_2/gather_3D_2D_float_1_nnfw.mod.py
index be138a0cf..be138a0cf 100644..100755
--- a/tests/nnapi/specs/V1_2/gather_3D_2D_float_1_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/gather_3D_2D_float_1_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/gather_3D_2D_float_2_nnfw.mod.py b/tests/nnapi/specs/V1_2/gather_3D_2D_float_2_nnfw.mod.py
index 9e16ee255..9e16ee255 100644..100755
--- a/tests/nnapi/specs/V1_2/gather_3D_2D_float_2_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/gather_3D_2D_float_2_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/gather_3D_2D_float_3_nnfw.mod.py b/tests/nnapi/specs/V1_2/gather_3D_2D_float_3_nnfw.mod.py
index 6b96b0841..6b96b0841 100644..100755
--- a/tests/nnapi/specs/V1_2/gather_3D_2D_float_3_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/gather_3D_2D_float_3_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/gather_4D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/gather_4D_float_nnfw.mod.py
index b5a4ec0fc..b5a4ec0fc 100644..100755
--- a/tests/nnapi/specs/V1_2/gather_4D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/gather_4D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/gather_higher_rank.mod.py b/tests/nnapi/specs/V1_2/gather_higher_rank.mod.py
index a7c5b3848..a7c5b3848 100644..100755
--- a/tests/nnapi/specs/V1_2/gather_higher_rank.mod.py
+++ b/tests/nnapi/specs/V1_2/gather_higher_rank.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/greater_equal.mod.py b/tests/nnapi/specs/V1_2/greater_equal.mod.py
index d6c76faff..d6c76faff 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/greater_equal.mod.py
+++ b/tests/nnapi/specs/V1_2/greater_equal.mod.py
diff --git a/tests/nnapi/specs/V1_2/greater_equal_nnfw.mod.py b/tests/nnapi/specs/V1_2/greater_equal_nnfw.mod.py
new file mode 100755
index 000000000..8fd7b710f
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/greater_equal_nnfw.mod.py
@@ -0,0 +1,35 @@
+#
+# Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+# Copyright (C) 2018 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{2, 1}")
+i2 = Input("op2", "TENSOR_FLOAT32", "{2}")
+i3 = Output("op3", "TENSOR_BOOL8", "{2, 2}")
+model = model.Operation("GREATER_EQUAL", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [5, 10],
+ i2: # input 1
+ [10, 5]}
+
+output0 = {i3: # output 0
+ [False, True, True, True]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/skip/V1_2/less.mod.py b/tests/nnapi/specs/V1_2/less.mod.py
index 182d69d67..182d69d67 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/less.mod.py
+++ b/tests/nnapi/specs/V1_2/less.mod.py
diff --git a/tests/nnapi/specs/V1_2/less_nnfw.mod.py b/tests/nnapi/specs/V1_2/less_nnfw.mod.py
new file mode 100755
index 000000000..b03c0c14d
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/less_nnfw.mod.py
@@ -0,0 +1,35 @@
+#
+# Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+# Copyright (C) 2018 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{2, 1}")
+i2 = Input("op2", "TENSOR_FLOAT32", "{2}")
+i3 = Output("op3", "TENSOR_BOOL8", "{2, 2}")
+model = model.Operation("LESS", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [5, 10],
+ i2: # input 1
+ [10, 5]}
+
+output0 = {i3: # output 0
+ [True, False, False, False]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/skip/V1_2/logical_and.mod.py b/tests/nnapi/specs/V1_2/logical_and.mod.py
index c831bb2a6..c831bb2a6 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/logical_and.mod.py
+++ b/tests/nnapi/specs/V1_2/logical_and.mod.py
diff --git a/tests/nnapi/specs/V1_2/logical_and_1D_nnfw.mod.py b/tests/nnapi/specs/V1_2/logical_and_1D_nnfw.mod.py
new file mode 100755
index 000000000..173ccfab0
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/logical_and_1D_nnfw.mod.py
@@ -0,0 +1,19 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_BOOL8", "{4}")
+i2 = Input("op2", "TENSOR_BOOL8", "{4}")
+
+i3 = Output("op3", "TENSOR_BOOL8", "{4}")
+model = model.Operation("LOGICAL_AND", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [False, False, True, True],
+ i2: # input 1
+ [False, True, False, True]}
+
+output0 = {i3: # output 0
+ [False, False, False, True]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/logical_and_2D_nnfw.mod.py b/tests/nnapi/specs/V1_2/logical_and_2D_nnfw.mod.py
new file mode 100755
index 000000000..3b9b4a4c8
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/logical_and_2D_nnfw.mod.py
@@ -0,0 +1,19 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_BOOL8", "{2, 2}")
+i2 = Input("op2", "TENSOR_BOOL8", "{2, 2}")
+
+i3 = Output("op3", "TENSOR_BOOL8", "{2, 2}")
+model = model.Operation("LOGICAL_AND", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [False, False, True, True],
+ i2: # input 1
+ [False, True, False, True]}
+
+output0 = {i3: # output 0
+ [False, False, False, True]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/logical_and_3D_nnfw.mod.py b/tests/nnapi/specs/V1_2/logical_and_3D_nnfw.mod.py
new file mode 100755
index 000000000..3f0372e71
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/logical_and_3D_nnfw.mod.py
@@ -0,0 +1,19 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_BOOL8", "{2, 2, 2}")
+i2 = Input("op2", "TENSOR_BOOL8", "{2, 2, 2}")
+
+i3 = Output("op3", "TENSOR_BOOL8", "{2, 2, 2}")
+model = model.Operation("LOGICAL_AND", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [False, False, True, True, False, True, False, True],
+ i2: # input 1
+ [False, True, False, True, False, False, True, True]}
+
+output0 = {i3: # output 0
+ [False, False, False, True, False, False, False, True]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/logical_and_4D_nnfw.mod.py b/tests/nnapi/specs/V1_2/logical_and_4D_nnfw.mod.py
new file mode 100755
index 000000000..26820d866
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/logical_and_4D_nnfw.mod.py
@@ -0,0 +1,19 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_BOOL8", "{2, 1, 2, 2}")
+i2 = Input("op2", "TENSOR_BOOL8", "{2, 1, 2, 2}")
+
+i3 = Output("op3", "TENSOR_BOOL8", "{2, 1, 2, 2}")
+model = model.Operation("LOGICAL_AND", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [False, False, True, True, False, True, False, True],
+ i2: # input 1
+ [False, True, False, True, False, False, True, True]}
+
+output0 = {i3: # output 0
+ [False, False, False, True, False, False, False, True]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/logical_and_broadcast_4D_2D_nnfw.mod.py b/tests/nnapi/specs/V1_2/logical_and_broadcast_4D_2D_nnfw.mod.py
new file mode 100755
index 000000000..1a2b5bedc
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/logical_and_broadcast_4D_2D_nnfw.mod.py
@@ -0,0 +1,25 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_BOOL8", "{2, 2, 2, 2}")
+i2 = Input("op2", "TENSOR_BOOL8", "{2, 2}")
+
+i3 = Output("op3", "TENSOR_BOOL8", "{2, 2, 2, 2}")
+model = model.Operation("LOGICAL_AND", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [False, False, True, True,
+ True, True, False, False,
+ False, False, False, False,
+ True, True, True, True],
+ i2: # input 1
+ [False, True, False, True]}
+
+output0 = {i3: # output 0
+ [False, False, False, True,
+ False, True, False, False,
+ False, False, False, False,
+ False, True, False, True]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/logical_and_broadcast_nnfw.mod.py b/tests/nnapi/specs/V1_2/logical_and_broadcast_nnfw.mod.py
new file mode 100755
index 000000000..817aab322
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/logical_and_broadcast_nnfw.mod.py
@@ -0,0 +1,19 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_BOOL8", "{1, 2, 2, 1}")
+i2 = Input("op2", "TENSOR_BOOL8", "{1, 1, 2, 1}")
+
+i3 = Output("op3", "TENSOR_BOOL8", "{1, 2, 2, 1}")
+model = model.Operation("LOGICAL_AND", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [False, False, True, True],
+ i2: # input 1
+ [False, True]}
+
+output0 = {i3: # output 0
+ [False, False, False, True]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/skip/V1_2/logical_not.mod.py b/tests/nnapi/specs/V1_2/logical_not.mod.py
index 04ca64680..04ca64680 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/logical_not.mod.py
+++ b/tests/nnapi/specs/V1_2/logical_not.mod.py
diff --git a/tests/nnapi/specs/V1_2/logical_not_1D_nnfw.mod.py b/tests/nnapi/specs/V1_2/logical_not_1D_nnfw.mod.py
new file mode 100755
index 000000000..5c39692e8
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/logical_not_1D_nnfw.mod.py
@@ -0,0 +1,16 @@
+# model
+model = Model()
+i = Input("op1", "TENSOR_BOOL8", "{4}")
+
+o = Output("op2", "TENSOR_BOOL8", "{4}")
+model = model.Operation("LOGICAL_NOT", i).To(o)
+
+# Example 1. Input
+input0 = {i: # input
+ [True, False, True, True]}
+
+output0 = {o: # output
+ [False, True, False, False]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/logical_not_4D_nnfw.mod.py b/tests/nnapi/specs/V1_2/logical_not_4D_nnfw.mod.py
new file mode 100755
index 000000000..34fecbcf5
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/logical_not_4D_nnfw.mod.py
@@ -0,0 +1,16 @@
+# model
+model = Model()
+i = Input("op1", "TENSOR_BOOL8", "{1, 2, 2, 1}") # a vector of input
+
+o = Output("op2", "TENSOR_BOOL8", "{1, 2, 2, 1}") # a vector of output
+model = model.Operation("LOGICAL_NOT", i).To(o)
+
+# Example 1. Input
+input0 = {i: # input
+ [False, True, True, True]}
+
+output0 = {o: # output
+ [True, False, False, False]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/skip/V1_2/logical_or.mod.py b/tests/nnapi/specs/V1_2/logical_or.mod.py
index e4f720dd5..e4f720dd5 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/logical_or.mod.py
+++ b/tests/nnapi/specs/V1_2/logical_or.mod.py
diff --git a/tests/nnapi/specs/V1_2/logical_or_1D_nnfw.mod.py b/tests/nnapi/specs/V1_2/logical_or_1D_nnfw.mod.py
new file mode 100755
index 000000000..77843ae06
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/logical_or_1D_nnfw.mod.py
@@ -0,0 +1,19 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_BOOL8", "{4}")
+i2 = Input("op2", "TENSOR_BOOL8", "{4}")
+
+i3 = Output("op3", "TENSOR_BOOL8", "{4}")
+model = model.Operation("LOGICAL_OR", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [False, False, True, True],
+ i2: # input 255
+ [False, True, False, True]}
+
+output0 = {i3: # output 0
+ [False, True, True, True]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/logical_or_2D_nnfw.mod.py b/tests/nnapi/specs/V1_2/logical_or_2D_nnfw.mod.py
new file mode 100755
index 000000000..2ba17edd3
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/logical_or_2D_nnfw.mod.py
@@ -0,0 +1,19 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_BOOL8", "{2, 2}")
+i2 = Input("op2", "TENSOR_BOOL8", "{2, 2}")
+
+i3 = Output("op3", "TENSOR_BOOL8", "{2, 2}")
+model = model.Operation("LOGICAL_OR", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [False, False, True, True],
+ i2: # input 1
+ [False, True, False, True]}
+
+output0 = {i3: # output 0
+ [False, True, True, True]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/logical_or_3D_nnfw.mod.py b/tests/nnapi/specs/V1_2/logical_or_3D_nnfw.mod.py
new file mode 100755
index 000000000..0fb529c97
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/logical_or_3D_nnfw.mod.py
@@ -0,0 +1,19 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_BOOL8", "{2, 2, 2}")
+i2 = Input("op2", "TENSOR_BOOL8", "{2, 2, 2}")
+
+i3 = Output("op3", "TENSOR_BOOL8", "{2, 2, 2}")
+model = model.Operation("LOGICAL_OR", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [False, False, True, True, False, True, False, True],
+ i2: # input 1
+ [False, True, False, True, False, False, True, True]}
+
+output0 = {i3: # output 0
+ [False, True, True, True, False, True, True, True]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/logical_or_4D_nnfw.mod.py b/tests/nnapi/specs/V1_2/logical_or_4D_nnfw.mod.py
new file mode 100755
index 000000000..060900bee
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/logical_or_4D_nnfw.mod.py
@@ -0,0 +1,19 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_BOOL8", "{2, 1, 2, 2}")
+i2 = Input("op2", "TENSOR_BOOL8", "{2, 1, 2, 2}")
+
+i3 = Output("op3", "TENSOR_BOOL8", "{2, 1, 2, 2}")
+model = model.Operation("LOGICAL_OR", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [False, False, True, True, False, True, False, True],
+ i2: # input 1
+ [False, True, False, True, False, False, True, True]}
+
+output0 = {i3: # output 0
+ [False, True, True, True, False, True, True, True]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/logical_or_broadcast_4D_2D_nnfw.mod.py b/tests/nnapi/specs/V1_2/logical_or_broadcast_4D_2D_nnfw.mod.py
new file mode 100755
index 000000000..7f6603961
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/logical_or_broadcast_4D_2D_nnfw.mod.py
@@ -0,0 +1,25 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_BOOL8", "{2, 2, 2, 2}")
+i2 = Input("op2", "TENSOR_BOOL8", "{2, 2}")
+
+i3 = Output("op3", "TENSOR_BOOL8", "{2, 2, 2, 2}")
+model = model.Operation("LOGICAL_OR", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [False, False, True, True,
+ True, True, False, False,
+ False, False, False, False,
+ True, True, True, True],
+ i2: # input 1
+ [False, True, False, True]}
+
+output0 = {i3: # output 0
+ [False, True, True, True,
+ True, True, False, True,
+ False, True, False, True,
+ True, True, True, True]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/logical_or_broadcast_nnfw.mod.py b/tests/nnapi/specs/V1_2/logical_or_broadcast_nnfw.mod.py
new file mode 100755
index 000000000..c30cb8659
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/logical_or_broadcast_nnfw.mod.py
@@ -0,0 +1,19 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_BOOL8", "{1, 2, 2, 1}")
+i2 = Input("op2", "TENSOR_BOOL8", "{1, 1, 2, 1}")
+
+i3 = Output("op3", "TENSOR_BOOL8", "{1, 2, 2, 1}")
+model = model.Operation("LOGICAL_OR", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [False, False, True, True],
+ i2: # input 1
+ [False, True]}
+
+output0 = {i3: # output 0
+ [False, True, True, True]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/skip/V1_2/maximum.mod.py b/tests/nnapi/specs/V1_2/maximum.mod.py
index 0d37a203c..0d37a203c 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/maximum.mod.py
+++ b/tests/nnapi/specs/V1_2/maximum.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/minimum.mod.py b/tests/nnapi/specs/V1_2/minimum.mod.py
index 76b058612..76b058612 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/minimum.mod.py
+++ b/tests/nnapi/specs/V1_2/minimum.mod.py
diff --git a/tests/nnapi/specs/V1_2/neg.mod.py b/tests/nnapi/specs/V1_2/neg.mod.py
index 82119d28c..82119d28c 100644..100755
--- a/tests/nnapi/specs/V1_2/neg.mod.py
+++ b/tests/nnapi/specs/V1_2/neg.mod.py
diff --git a/tests/nnapi/specs/V1_2/neg_1D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/neg_1D_float_nnfw.mod.py
index 6791f2b5e..6791f2b5e 100644..100755
--- a/tests/nnapi/specs/V1_2/neg_1D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/neg_1D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/neg_2D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/neg_2D_float_nnfw.mod.py
index c5b559046..c5b559046 100644..100755
--- a/tests/nnapi/specs/V1_2/neg_2D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/neg_2D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/neg_3D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/neg_3D_float_nnfw.mod.py
index ef0faba1a..ef0faba1a 100644..100755
--- a/tests/nnapi/specs/V1_2/neg_3D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/neg_3D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/neg_3D_int_nnfw.mod.py b/tests/nnapi/specs/V1_2/neg_3D_int_nnfw.mod.py
index d3e1e435b..d3e1e435b 100644..100755
--- a/tests/nnapi/specs/V1_2/neg_3D_int_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/neg_3D_int_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/neg_4D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/neg_4D_float_nnfw.mod.py
index e29a46ac5..e29a46ac5 100644..100755
--- a/tests/nnapi/specs/V1_2/neg_4D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/neg_4D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/neg_4D_int_nnfw.mod.py b/tests/nnapi/specs/V1_2/neg_4D_int_nnfw.mod.py
index be3d07a3c..be3d07a3c 100644..100755
--- a/tests/nnapi/specs/V1_2/neg_4D_int_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/neg_4D_int_nnfw.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/not_equal.mod.py b/tests/nnapi/specs/V1_2/not_equal.mod.py
index 2c36b5abb..2c36b5abb 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/not_equal.mod.py
+++ b/tests/nnapi/specs/V1_2/not_equal.mod.py
diff --git a/tests/nnapi/specs/V1_2/not_equal_broadcast_4D_2D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/not_equal_broadcast_4D_2D_float_nnfw.mod.py
new file mode 100755
index 000000000..c732e592a
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/not_equal_broadcast_4D_2D_float_nnfw.mod.py
@@ -0,0 +1,30 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{3, 2, 2, 2}")
+i2 = Input("op2", "TENSOR_FLOAT32", "{2, 2}")
+
+i3 = Output("op3", "TENSOR_BOOL8", "{3, 2, 2, 2}")
+model = model.Operation("NOT_EQUAL", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [4.25, 11.0, 2.2, 10.3,
+ 8.5, 2.1, 1.0, 0.5,
+ 3.1, 1.0, 8.5, 6.5,
+ 11.2, 3.0, 8.5, 1.0,
+ 0.3, 3.0, 2.1, 7.5,
+ 4.3, 9.2, 11.1, 3.0],
+ i2: # input 1
+ [8.5, 3.0, 2.1, 10.3]}
+
+output0 = {i3: # output 0
+ [True, True, True, False,
+ False, True, True, True,
+ True, True, True, True,
+ True, False, True, True,
+ True, False, False, True,
+ True, True, True, True]
+ }
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/not_equal_broadcast_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/not_equal_broadcast_float_nnfw.mod.py
new file mode 100755
index 000000000..9c1071a2b
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/not_equal_broadcast_float_nnfw.mod.py
@@ -0,0 +1,19 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{2, 2}")
+i2 = Input("op2", "TENSOR_FLOAT32", "{1, 2}")
+
+i3 = Output("op3", "TENSOR_BOOL8", "{2, 2}")
+model = model.Operation("NOT_EQUAL", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [0.2, 3.21, 2.4, 7.44],
+ i2: # input 1
+ [0.21, 7.44]}
+
+output0 = {i3: # output 0
+ [True, True, True, False]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/not_equal_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/not_equal_float_nnfw.mod.py
new file mode 100755
index 000000000..71ca61d6a
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/not_equal_float_nnfw.mod.py
@@ -0,0 +1,18 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{3}") # a vector of input
+i2 = Input("op2", "TENSOR_FLOAT32", "{3}") # a vector of input
+i3 = Output("op3", "TENSOR_BOOL8", "{3}") # a vector of output
+model = model.Operation("NOT_EQUAL", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [2.0, 3.254232, 5.1232],
+ i2: # input 1
+ [2.0, 3.254111, 5.1232]}
+
+output0 = {i3: # output 0
+ [False, True, False]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/not_equal_quant8_nnfw.mod.py b/tests/nnapi/specs/V1_2/not_equal_quant8_nnfw.mod.py
new file mode 100755
index 000000000..0f775496c
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/not_equal_quant8_nnfw.mod.py
@@ -0,0 +1,18 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{3}, 1.f, 0") # a vector of input
+i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{3}, 1.f, 0") # a vector of input
+i3 = Output("op3", "TENSOR_BOOL8", "{3}") # a vector of output
+model = model.Operation("NOT_EQUAL", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [2, 3, 0],
+ i2: # input 1
+ [2, 9, 0]}
+
+output0 = {i3: # output 0
+ [False, True, False]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/skip/V1_2/prelu.mod.py b/tests/nnapi/specs/V1_2/prelu.mod.py
index bbbeeb5dc..bbbeeb5dc 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/prelu.mod.py
+++ b/tests/nnapi/specs/V1_2/prelu.mod.py
diff --git a/tests/nnapi/specs/V1_2/prelu_broadcast_float_1_nnfw.mod.py b/tests/nnapi/specs/V1_2/prelu_broadcast_float_1_nnfw.mod.py
new file mode 100755
index 000000000..1be24a9ab
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/prelu_broadcast_float_1_nnfw.mod.py
@@ -0,0 +1,23 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{1, 2, 2, 3}") # a vector of input
+i2 = Input("op2", "TENSOR_FLOAT32", "{1, 1, 1, 3}") # a vector of alpha
+i3 = Output("op3", "TENSOR_FLOAT32", "{1, 2, 2, 3}") # a vector of output
+model = model.Operation("PRELU", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [0.0, 0.0, 0.0,
+ 1.0, 1.0, 1.0,
+ -1.0, -1.0, -1.0,
+ -2.0, -2.0, -2.0],
+ i2: # input 1
+ [0.0, 1.0, 2.0]}
+
+output0 = {i3: # output 0
+ [0.0, 0.0, 0.0,
+ 1.0, 1.0, 1.0,
+ 0.0, -1.0, -2.0,
+ 0.0, -2.0, -4.0]}
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/prelu_broadcast_quant8_1_nnfw.mod.py b/tests/nnapi/specs/V1_2/prelu_broadcast_quant8_1_nnfw.mod.py
new file mode 100755
index 000000000..3ad493429
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/prelu_broadcast_quant8_1_nnfw.mod.py
@@ -0,0 +1,24 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 3}, 1.0f, 2") # a vector of input
+i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{1, 1, 3}, 1.0f, 1") # a vector of alpha
+i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 3}, 0.5f, 3") # a vector of output
+model = model.Operation("PRELU", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1, 1, 1,
+ 2, 2, 2,
+ 3, 3, 3,
+ 1, 2, 3],
+ i2: # input 1
+ [0, 1, 2]}
+
+output0 = {i3: # output 0
+ [5, 3, 1,
+ 3, 3, 3,
+ 5, 5, 5,
+ 5, 3, 5]}
+# Instantiate an example
+Example((input0, output0))
+
diff --git a/tests/nnapi/specs/V1_2/prelu_float_1_nnfw.mod.py b/tests/nnapi/specs/V1_2/prelu_float_1_nnfw.mod.py
new file mode 100755
index 000000000..f39e7957e
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/prelu_float_1_nnfw.mod.py
@@ -0,0 +1,22 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{1, 2, 2, 1}") # a vector of input
+i2 = Input("op2", "TENSOR_FLOAT32", "{1, 2, 2, 1}") # a vector of alpha
+i3 = Output("op3", "TENSOR_FLOAT32", "{1, 2, 2, 1}") # a vector of output
+model = model.Operation("PRELU", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [3.0, -2.0,
+ -1.0, -2.0
+ ],
+ i2: # input 1
+ [0.0, 1.0,
+ 1.0, 2.0]}
+
+output0 = {i3: # output 0
+ [3.0, -2.0,
+ -1.0, -4.0
+ ]}
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/prelu_quant8_1_nnfw.mod.py b/tests/nnapi/specs/V1_2/prelu_quant8_1_nnfw.mod.py
new file mode 100755
index 000000000..97984b116
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/prelu_quant8_1_nnfw.mod.py
@@ -0,0 +1,23 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.5f, 5") # a vector of input
+i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.5f, 1") # a vector of alpha
+i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.1f, 3") # a vector of output
+model = model.Operation("PRELU", i1, i2).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [3, 1,
+ 7, 11
+ ],
+ i2: # input 1
+ [0, 1,
+ 2, 2]}
+
+output0 = {i3: # output 0
+ [8, 3,
+ 13, 33
+ ]}
+# Instantiate an example
+Example((input0, output0))
+
diff --git a/tests/nnapi/specs/V1_2/reduce_max.mod.py b/tests/nnapi/specs/V1_2/reduce_max.mod.py
index f08041ddd..f08041ddd 100644..100755
--- a/tests/nnapi/specs/V1_2/reduce_max.mod.py
+++ b/tests/nnapi/specs/V1_2/reduce_max.mod.py
diff --git a/tests/nnapi/specs/V1_2/reduce_max_2D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/reduce_max_2D_float_nnfw.mod.py
index be7570eae..be7570eae 100644..100755
--- a/tests/nnapi/specs/V1_2/reduce_max_2D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/reduce_max_2D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/reduce_max_2D_int32_nnfw.mod.py b/tests/nnapi/specs/V1_2/reduce_max_2D_int32_nnfw.mod.py
index 631cd23e8..631cd23e8 100644..100755
--- a/tests/nnapi/specs/V1_2/reduce_max_2D_int32_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/reduce_max_2D_int32_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/reduce_max_4D_float_reducing_C_nnfw.mod.py b/tests/nnapi/specs/V1_2/reduce_max_4D_float_reducing_C_nnfw.mod.py
index 2290dd8b9..2290dd8b9 100644..100755
--- a/tests/nnapi/specs/V1_2/reduce_max_4D_float_reducing_C_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/reduce_max_4D_float_reducing_C_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/reduce_max_4D_float_reducing_HW_nnfw.mod.py b/tests/nnapi/specs/V1_2/reduce_max_4D_float_reducing_HW_nnfw.mod.py
index 057d512ea..057d512ea 100644..100755
--- a/tests/nnapi/specs/V1_2/reduce_max_4D_float_reducing_HW_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/reduce_max_4D_float_reducing_HW_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/reduce_max_float_1_nnfw.mod.py b/tests/nnapi/specs/V1_2/reduce_max_float_1_nnfw.mod.py
index 061f436ac..061f436ac 100644..100755
--- a/tests/nnapi/specs/V1_2/reduce_max_float_1_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/reduce_max_float_1_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/reduce_max_float_2_nnfw.mod.py b/tests/nnapi/specs/V1_2/reduce_max_float_2_nnfw.mod.py
index ab99c6244..ab99c6244 100644..100755
--- a/tests/nnapi/specs/V1_2/reduce_max_float_2_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/reduce_max_float_2_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/reduce_max_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/reduce_max_float_nnfw.mod.py
index eba25a534..eba25a534 100644..100755
--- a/tests/nnapi/specs/V1_2/reduce_max_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/reduce_max_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/reduce_max_quant8_1_nnfw.mod.py b/tests/nnapi/specs/V1_2/reduce_max_quant8_1_nnfw.mod.py
index 7c8df05f8..7c8df05f8 100644..100755
--- a/tests/nnapi/specs/V1_2/reduce_max_quant8_1_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/reduce_max_quant8_1_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/reduce_max_quant8_2_nnfw.mod.py b/tests/nnapi/specs/V1_2/reduce_max_quant8_2_nnfw.mod.py
index 7a54866c1..7a54866c1 100644..100755
--- a/tests/nnapi/specs/V1_2/reduce_max_quant8_2_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/reduce_max_quant8_2_nnfw.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/reduce_min.mod.py b/tests/nnapi/specs/V1_2/reduce_min.mod.py
index 57b827911..57b827911 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/reduce_min.mod.py
+++ b/tests/nnapi/specs/V1_2/reduce_min.mod.py
diff --git a/tests/nnapi/specs/V1_2/reduce_min_float_1_nnfw.mod.py b/tests/nnapi/specs/V1_2/reduce_min_float_1_nnfw.mod.py
new file mode 100755
index 000000000..853cbc029
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/reduce_min_float_1_nnfw.mod.py
@@ -0,0 +1,18 @@
+model = Model()
+i1 = Input("input", "TENSOR_FLOAT32", "{4, 3, 2}")
+axis = Parameter("axis", "TENSOR_INT32", "{4}", [1, 0, -3, -3])
+keepDims = False
+output = Output("output", "TENSOR_FLOAT32", "{2}")
+
+model = model.Operation("REDUCE_MIN", i1, axis, keepDims).To(output)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [23.0, 24.0, 13.0, 22.0, 5.0, 18.0, 7.0, 8.0, 9.0, 15.0, 11.0, 12.0,
+ 3.0, 14.0, 10.0, 16.0, 17.0, 6.0, 19.0, 20.0, 21.0, 4.0, 1.0, 2.0]}
+
+output0 = {output: # output 0
+ [1.0, 2.0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/reduce_min_float_2_nnfw.mod.py b/tests/nnapi/specs/V1_2/reduce_min_float_2_nnfw.mod.py
new file mode 100755
index 000000000..4fccaa7b8
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/reduce_min_float_2_nnfw.mod.py
@@ -0,0 +1,18 @@
+model = Model()
+i1 = Input("input", "TENSOR_FLOAT32", "{4, 3, 2}")
+axis = Parameter("axis", "TENSOR_INT32", "{2}", [0, 2])
+keepDims = True
+output = Output("output", "TENSOR_FLOAT32", "{1, 3, 1}")
+
+model = model.Operation("REDUCE_MIN", i1, axis, keepDims).To(output)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [20.0, 2.0, 22.0, 4.0, 24.0, 18.0, 7.0, 8.0, 19.0, 10.0, 14.0, 12.0,
+ 13.0, 11.0, 15.0, 16.0, 17.0, 6.0, 9.0, 1.0, 21.0, 3.0, 23.0, 5.0]}
+
+output0 = {output: # output 0
+ [1.0, 3.0, 5.0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/reduce_min_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/reduce_min_float_nnfw.mod.py
new file mode 100755
index 000000000..81ddc5ba1
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/reduce_min_float_nnfw.mod.py
@@ -0,0 +1,19 @@
+model = Model()
+i1 = Input("input", "TENSOR_FLOAT32", "{1, 2, 2, 1}")
+axis = Parameter("axis", "TENSOR_INT32", "{1}", [2])
+keepDims = False
+output = Output("output", "TENSOR_FLOAT32", "{1, 2, 1}")
+
+model = model.Operation("REDUCE_MIN", i1, axis, keepDims).To(output)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [2.0, 1.0,
+ 3.0, 4.0]}
+
+output0 = {output: # output 0
+ [1.0,
+ 3.0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/skip/V1_2/reduce_sum.mod.py b/tests/nnapi/specs/V1_2/reduce_sum.mod.py
index c59579e0f..c59579e0f 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/reduce_sum.mod.py
+++ b/tests/nnapi/specs/V1_2/reduce_sum.mod.py
diff --git a/tests/nnapi/specs/V1_2/reduce_sum_2D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/reduce_sum_2D_float_nnfw.mod.py
new file mode 100755
index 000000000..551ef4aa5
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/reduce_sum_2D_float_nnfw.mod.py
@@ -0,0 +1,19 @@
+# model
+model = Model()
+i1 = Input("input", "TENSOR_FLOAT32", "{3, 4}")
+axis = Int32Scalar("axis", 1)
+keepDims = False
+out1 = Output("output", "TENSOR_FLOAT32", "{3}")
+model = model.Operation("REDUCE_SUM", i1, axis, keepDims).To(out1)
+
+# Example 1. Input in operand 0, 1
+input0 = {i1: # input 0
+ [3.2, 11.47, 3.8, 5.76,
+ 28.2, 0.999, -1.3, -13.5,
+ -3.4, -22.1, -2.2, -49.7]}
+
+output0 = {out1: # output 0
+ [24.23, 14.399002, -77.4]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/reduce_sum_4D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/reduce_sum_4D_float_nnfw.mod.py
new file mode 100755
index 000000000..c8365cbc3
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/reduce_sum_4D_float_nnfw.mod.py
@@ -0,0 +1,19 @@
+# model
+model = Model()
+i1 = Input("input", "TENSOR_FLOAT32", "{1, 3, 4, 1}")
+axis = Int32Scalar("axis", 1)
+keepDims = False
+out1 = Output("output", "TENSOR_FLOAT32", "{1, 4, 1}")
+model = model.Operation("REDUCE_SUM", i1, axis, keepDims).To(out1)
+
+# Example 1. Input in operand 0, 1
+input0 = {i1: # input 0
+ [6.4, 7.3, 19.3, -2.3,
+ 8.3, 2.0, 11.8, -3.4,
+ 22.8, 3.0, -28.7, 4.9]}
+
+output0 = {out1: # output 0
+ [37.5, 12.3, 2.3999977, -0.7999997]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/reduce_sum_4D_float_reducing_C_nnfw.mod.py b/tests/nnapi/specs/V1_2/reduce_sum_4D_float_reducing_C_nnfw.mod.py
new file mode 100755
index 000000000..2ae69a90c
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/reduce_sum_4D_float_reducing_C_nnfw.mod.py
@@ -0,0 +1,33 @@
+batch = 2
+rows = 3
+cols = 4
+depth = 5
+
+input_table = [x for x in range(batch * rows * cols * depth)]
+
+output_table = [0 for x in range(batch * rows * cols)]
+for i in range(batch):
+ for j in range(rows):
+ for k in range(cols):
+ for l in range(depth):
+ # The value of output_table is the depthwise sum of input_table.
+ output_table[i * rows * cols + j * cols + k] += input_table[i * rows * cols * depth + j * cols * depth + k * depth + l];
+
+model = Model()
+i1 = Input("input", "TENSOR_FLOAT32", "{%d, %d, %d, %d}" % (batch, rows, cols, depth))
+# Axis value should be in the range [-(rank), rank). And '-n' is the same axis with 'rank - n'. So '3' and '-1' are the same axis.
+axis = Parameter("axis", "TENSOR_INT32", "{2}", [3, -1])
+keepDims = False
+output = Output("output", "TENSOR_FLOAT32", "{%d, %d, %d}" % (batch, rows, cols))
+
+model = model.Operation("REDUCE_SUM", i1, axis, keepDims).To(output)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ input_table}
+
+output0 = {output: # output 0
+ output_table}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/reduce_sum_4D_float_reducing_HW_nnfw.mod.py b/tests/nnapi/specs/V1_2/reduce_sum_4D_float_reducing_HW_nnfw.mod.py
new file mode 100755
index 000000000..9f53d1061
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/reduce_sum_4D_float_reducing_HW_nnfw.mod.py
@@ -0,0 +1,33 @@
+batch = 2
+rows = 3
+cols = 4
+depth = 5
+
+input_table = [x for x in range(batch * rows * cols * depth)]
+
+output_table = [0 for x in range(batch * depth)]
+for i in range(batch):
+ for j in range(rows):
+ for k in range(cols):
+ for l in range(depth):
+ # The value of output_table is the rowwise sum and colwise sum of input_table.
+ output_table[i * depth + l] += input_table[i * rows * cols * depth + j * cols * depth + k * depth + l];
+
+model = Model()
+i1 = Input("input", "TENSOR_FLOAT32", "{%d, %d, %d, %d}" % (batch, rows, cols, depth))
+# Axis value should be in the range [-(rank), rank). And '-n' is the same axis with 'rank - n'. So this test's axis value are the same [1, 2].
+axis = Parameter("axis", "TENSOR_INT32", "{4}", [1, 2, -3, -2])
+keepDims = False
+output = Output("output", "TENSOR_FLOAT32", "{%d, %d}" % (batch, depth))
+
+model = model.Operation("REDUCE_SUM", i1, axis, keepDims).To(output)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ input_table}
+
+output0 = {output: # output 0
+ output_table}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/rsqrt.mod.py b/tests/nnapi/specs/V1_2/rsqrt.mod.py
index bfce569c6..bfce569c6 100644..100755
--- a/tests/nnapi/specs/V1_2/rsqrt.mod.py
+++ b/tests/nnapi/specs/V1_2/rsqrt.mod.py
diff --git a/tests/nnapi/specs/V1_2/rsqrt_1D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/rsqrt_1D_float_nnfw.mod.py
index 74e23048e..74e23048e 100644..100755
--- a/tests/nnapi/specs/V1_2/rsqrt_1D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/rsqrt_1D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/rsqrt_2D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/rsqrt_2D_float_nnfw.mod.py
index 45bb0ce70..45bb0ce70 100644..100755
--- a/tests/nnapi/specs/V1_2/rsqrt_2D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/rsqrt_2D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/rsqrt_3D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/rsqrt_3D_float_nnfw.mod.py
index 084e02020..084e02020 100644..100755
--- a/tests/nnapi/specs/V1_2/rsqrt_3D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/rsqrt_3D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/rsqrt_4D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/rsqrt_4D_float_nnfw.mod.py
index f479c50b3..f479c50b3 100644..100755
--- a/tests/nnapi/specs/V1_2/rsqrt_4D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/rsqrt_4D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/sin_1D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/sin_1D_float_nnfw.mod.py
new file mode 100755
index 000000000..695ad491a
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/sin_1D_float_nnfw.mod.py
@@ -0,0 +1,13 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{4}") #A vector of inputs
+i2 = Output("op2", "TENSOR_FLOAT32", "{4}") #A vector of outputs
+model = model.Operation("SIN", i1).To(i2)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [2.0, 90.0, 1.0, 0.012]}
+output0 = {i2: # output 0
+ [0.909297427, 0.893996664, 0.841470985, 0.011999712]}
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/sin_4D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/sin_4D_float_nnfw.mod.py
new file mode 100755
index 000000000..87877df4c
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/sin_4D_float_nnfw.mod.py
@@ -0,0 +1,18 @@
+# model
+model = Model()
+
+i1 = Input("op1", "TENSOR_FLOAT32", "{2, 1, 2, 2}")
+i3 = Output("op3", "TENSOR_FLOAT32", "{2, 1, 2, 2}")
+model = model.Operation("SIN", i1).To(i3)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [12.0, 36.1, 2.0, 90, 1.0, 0.012, 0.001, 5]}
+
+output0 = {i3: # output 0
+ [-0.536572918, -0.999599143, 0.909297427, 0.893996664,
+ 0.841470985, 0.011999712, 0.001, -0.958924275]}
+
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/skip/V1_2/slice.mod.py b/tests/nnapi/specs/V1_2/slice.mod.py
index f3683ba10..f3683ba10 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/slice.mod.py
+++ b/tests/nnapi/specs/V1_2/slice.mod.py
diff --git a/tests/nnapi/specs/V1_2/split_1D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/split_1D_float_nnfw.mod.py
new file mode 100755
index 000000000..96d7b7987
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/split_1D_float_nnfw.mod.py
@@ -0,0 +1,40 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{8}")
+axis = Int32Scalar("axis", 0)
+num_out = Int32Scalar("num_out", 8)
+i2 = Output("op2", "TENSOR_FLOAT32", "{1}")
+i3 = Output("op3", "TENSOR_FLOAT32", "{1}")
+i4 = Output("op4", "TENSOR_FLOAT32", "{1}")
+i5 = Output("op5", "TENSOR_FLOAT32", "{1}")
+i6 = Output("op6", "TENSOR_FLOAT32", "{1}")
+i7 = Output("op7", "TENSOR_FLOAT32", "{1}")
+i8 = Output("op8", "TENSOR_FLOAT32", "{1}")
+i9 = Output("op9", "TENSOR_FLOAT32", "{1}")
+
+model = model.Operation("SPLIT", i1, axis, num_out).To([i2, i3, i4, i5, i6, i7, i8, i9])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]}
+
+output0 = {
+ i2: # output 0
+ [1.0],
+ i3: # output 1
+ [2.0],
+ i4: # output 2
+ [3.0],
+ i5: # output 3
+ [4.0],
+ i6: # output 4
+ [5.0],
+ i7: # output 5
+ [6.0],
+ i8: # output 6
+ [7.0],
+ i9: # output 7
+ [8.0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/split_1D_int32_nnfw.mod.py b/tests/nnapi/specs/V1_2/split_1D_int32_nnfw.mod.py
new file mode 100755
index 000000000..c2da36fbb
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/split_1D_int32_nnfw.mod.py
@@ -0,0 +1,40 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_INT32", "{8}")
+axis = Int32Scalar("axis", 0)
+num_out = Int32Scalar("num_out", 8)
+i2 = Output("op2", "TENSOR_INT32", "{1}")
+i3 = Output("op3", "TENSOR_INT32", "{1}")
+i4 = Output("op4", "TENSOR_INT32", "{1}")
+i5 = Output("op5", "TENSOR_INT32", "{1}")
+i6 = Output("op6", "TENSOR_INT32", "{1}")
+i7 = Output("op7", "TENSOR_INT32", "{1}")
+i8 = Output("op8", "TENSOR_INT32", "{1}")
+i9 = Output("op9", "TENSOR_INT32", "{1}")
+
+model = model.Operation("SPLIT", i1, axis, num_out).To([i2, i3, i4, i5, i6, i7, i8, i9])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1, 2, 3, 4, 5, 6, 7, 8]}
+
+output0 = {
+ i2: # output 0
+ [1],
+ i3: # output 1
+ [2],
+ i4: # output 2
+ [3],
+ i5: # output 3
+ [4],
+ i6: # output 4
+ [5],
+ i7: # output 5
+ [6],
+ i8: # output 6
+ [7],
+ i9: # output 7
+ [8]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/split_4D_float_1_nnfw.mod.py b/tests/nnapi/specs/V1_2/split_4D_float_1_nnfw.mod.py
new file mode 100755
index 000000000..909af1920
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/split_4D_float_1_nnfw.mod.py
@@ -0,0 +1,21 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{2,2,2,2}")
+axis = Int32Scalar("axis", 0)
+num_out = Int32Scalar("num_out", 2)
+i2 = Output("op2", "TENSOR_FLOAT32", "{1,2,2,2}")
+i3 = Output("op3", "TENSOR_FLOAT32", "{1,2,2,2}")
+model = model.Operation("SPLIT", i1, axis, num_out).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]}
+
+output0 = {
+ i2: # output 0
+ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
+ i3: # output 1
+ [9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/split_4D_float_2_nnfw.mod.py b/tests/nnapi/specs/V1_2/split_4D_float_2_nnfw.mod.py
new file mode 100755
index 000000000..d1ccda689
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/split_4D_float_2_nnfw.mod.py
@@ -0,0 +1,21 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{2,2,2,2}")
+axis = Int32Scalar("axis", 3)
+num_out = Int32Scalar("num_out", 2)
+i2 = Output("op2", "TENSOR_FLOAT32", "{2,2,2,1}")
+i3 = Output("op3", "TENSOR_FLOAT32", "{2,2,2,1}")
+model = model.Operation("SPLIT", i1, axis, num_out).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]}
+
+output0 = {
+ i2: # output 0
+ [1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0],
+ i3: # output 1
+ [2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/split_4D_float_3_nnfw.mod.py b/tests/nnapi/specs/V1_2/split_4D_float_3_nnfw.mod.py
new file mode 100755
index 000000000..2c218f329
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/split_4D_float_3_nnfw.mod.py
@@ -0,0 +1,21 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{2,2,2,2}")
+axis = Int32Scalar("axis", -4) # Negative axis
+num_out = Int32Scalar("num_out", 2)
+i2 = Output("op2", "TENSOR_FLOAT32", "{1,2,2,2}")
+i3 = Output("op3", "TENSOR_FLOAT32", "{1,2,2,2}")
+model = model.Operation("SPLIT", i1, axis, num_out).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]}
+
+output0 = {
+ i2: # output 0
+ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
+ i3: # output 1
+ [9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/split_4D_int32_1_nnfw.mod.py b/tests/nnapi/specs/V1_2/split_4D_int32_1_nnfw.mod.py
new file mode 100755
index 000000000..c5d95fcd8
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/split_4D_int32_1_nnfw.mod.py
@@ -0,0 +1,21 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_INT32", "{2,2,2,2}")
+axis = Int32Scalar("axis", 0)
+num_out = Int32Scalar("num_out", 2)
+i2 = Output("op2", "TENSOR_INT32", "{1,2,2,2}")
+i3 = Output("op3", "TENSOR_INT32", "{1,2,2,2}")
+model = model.Operation("SPLIT", i1, axis, num_out).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}
+
+output0 = {
+ i2: # output 0
+ [1, 2, 3, 4, 5, 6, 7, 8],
+ i3: # output 1
+ [9, 10, 11, 12, 13, 14, 15, 16]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/split_4D_int32_2_nnfw.mod.py b/tests/nnapi/specs/V1_2/split_4D_int32_2_nnfw.mod.py
new file mode 100755
index 000000000..51a43d8f9
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/split_4D_int32_2_nnfw.mod.py
@@ -0,0 +1,21 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_INT32", "{2,2,2,2}")
+axis = Int32Scalar("axis", 1)
+num_out = Int32Scalar("num_out", 2)
+i2 = Output("op2", "TENSOR_INT32", "{2,1,2,2}")
+i3 = Output("op3", "TENSOR_INT32", "{2,1,2,2}")
+model = model.Operation("SPLIT", i1, axis, num_out).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}
+
+output0 = {
+ i2: # output 0
+ [1, 2, 3, 4, 9, 10, 11, 12],
+ i3: # output 1
+ [5, 6, 7, 8, 13, 14, 15, 16]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/split_4D_int32_3_nnfw.mod.py b/tests/nnapi/specs/V1_2/split_4D_int32_3_nnfw.mod.py
new file mode 100755
index 000000000..a9709e315
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/split_4D_int32_3_nnfw.mod.py
@@ -0,0 +1,21 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_INT32", "{2,2,2,2}")
+axis = Int32Scalar("axis", 2)
+num_out = Int32Scalar("num_out", 2)
+i2 = Output("op2", "TENSOR_INT32", "{2,2,1,2}")
+i3 = Output("op3", "TENSOR_INT32", "{2,2,1,2}")
+model = model.Operation("SPLIT", i1, axis, num_out).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}
+
+output0 = {
+ i2: # output 0
+ [1, 2, 5, 6, 9, 10, 13, 14],
+ i3: # output 1
+ [3, 4, 7, 8, 11, 12, 15, 16]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/split_4D_int32_4_nnfw.mod.py b/tests/nnapi/specs/V1_2/split_4D_int32_4_nnfw.mod.py
new file mode 100755
index 000000000..98d70f9a6
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/split_4D_int32_4_nnfw.mod.py
@@ -0,0 +1,21 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_INT32", "{2,2,2,2}")
+axis = Int32Scalar("axis", 3)
+num_out = Int32Scalar("num_out", 2)
+i2 = Output("op2", "TENSOR_INT32", "{2,2,2,1}")
+i3 = Output("op3", "TENSOR_INT32", "{2,2,2,1}")
+model = model.Operation("SPLIT", i1, axis, num_out).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}
+
+output0 = {
+ i2: # output 0
+ [1, 3, 5, 7, 9, 11, 13, 15],
+ i3: # output 1
+ [2, 4, 6, 8, 10, 12, 14, 16]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/split_4D_int32_5_nnfw.mod.py b/tests/nnapi/specs/V1_2/split_4D_int32_5_nnfw.mod.py
new file mode 100755
index 000000000..4dd1e441c
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/split_4D_int32_5_nnfw.mod.py
@@ -0,0 +1,21 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_INT32", "{2,2,2,2}")
+axis = Int32Scalar("axis", -4) # Negative axis
+num_out = Int32Scalar("num_out", 2)
+i2 = Output("op2", "TENSOR_INT32", "{1,2,2,2}")
+i3 = Output("op3", "TENSOR_INT32", "{1,2,2,2}")
+model = model.Operation("SPLIT", i1, axis, num_out).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}
+
+output0 = {
+ i2: # output 0
+ [1, 2, 3, 4, 5, 6, 7, 8],
+ i3: # output 1
+ [9, 10, 11, 12, 13, 14, 15, 16]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/split_4D_quant8_nnfw.mod.py b/tests/nnapi/specs/V1_2/split_4D_quant8_nnfw.mod.py
new file mode 100755
index 000000000..062cd3722
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/split_4D_quant8_nnfw.mod.py
@@ -0,0 +1,21 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{2,2,2,2}, 0.5f, 1")
+axis = Int32Scalar("axis", 0)
+num_out = Int32Scalar("num_out", 2)
+i2 = Output("op2", "TENSOR_QUANT8_ASYMM", "{1,2,2,2}, 0.5f, 1")
+i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{1,2,2,2}, 0.5f, 1")
+model = model.Operation("SPLIT", i1, axis, num_out).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}
+
+output0 = {
+ i2: # output 0
+ [1, 2, 3, 4, 5, 6, 7, 8],
+ i3: # output 1
+ [9, 10, 11, 12, 13, 14, 15, 16]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/skip/V1_2/split_float_1.mod.py b/tests/nnapi/specs/V1_2/split_float_1.mod.py
index d1bdc4561..d1bdc4561 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/split_float_1.mod.py
+++ b/tests/nnapi/specs/V1_2/split_float_1.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/split_float_2.mod.py b/tests/nnapi/specs/V1_2/split_float_2.mod.py
index a1610327e..a1610327e 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/split_float_2.mod.py
+++ b/tests/nnapi/specs/V1_2/split_float_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/split_float_3.mod.py b/tests/nnapi/specs/V1_2/split_float_3.mod.py
index 56b87e53c..56b87e53c 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/split_float_3.mod.py
+++ b/tests/nnapi/specs/V1_2/split_float_3.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/split_float_4.mod.py b/tests/nnapi/specs/V1_2/split_float_4.mod.py
index a9bf5a70a..a9bf5a70a 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/split_float_4.mod.py
+++ b/tests/nnapi/specs/V1_2/split_float_4.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/split_float_5.mod.py b/tests/nnapi/specs/V1_2/split_float_5.mod.py
index ad6621aeb..ad6621aeb 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/split_float_5.mod.py
+++ b/tests/nnapi/specs/V1_2/split_float_5.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/split_int32_1.mod.py b/tests/nnapi/specs/V1_2/split_int32_1.mod.py
index 313505a9a..313505a9a 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/split_int32_1.mod.py
+++ b/tests/nnapi/specs/V1_2/split_int32_1.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/split_int32_2.mod.py b/tests/nnapi/specs/V1_2/split_int32_2.mod.py
index 4ad52d361..4ad52d361 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/split_int32_2.mod.py
+++ b/tests/nnapi/specs/V1_2/split_int32_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/split_int32_3.mod.py b/tests/nnapi/specs/V1_2/split_int32_3.mod.py
index 0e8acb63a..0e8acb63a 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/split_int32_3.mod.py
+++ b/tests/nnapi/specs/V1_2/split_int32_3.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/split_int32_4.mod.py b/tests/nnapi/specs/V1_2/split_int32_4.mod.py
index e84abcd84..e84abcd84 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/split_int32_4.mod.py
+++ b/tests/nnapi/specs/V1_2/split_int32_4.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/split_quant8_1.mod.py b/tests/nnapi/specs/V1_2/split_quant8_1.mod.py
index 0c4723775..0c4723775 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/split_quant8_1.mod.py
+++ b/tests/nnapi/specs/V1_2/split_quant8_1.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/split_quant8_2.mod.py b/tests/nnapi/specs/V1_2/split_quant8_2.mod.py
index 4c24dace9..4c24dace9 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/split_quant8_2.mod.py
+++ b/tests/nnapi/specs/V1_2/split_quant8_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/split_quant8_3.mod.py b/tests/nnapi/specs/V1_2/split_quant8_3.mod.py
index 813f1a332..813f1a332 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/split_quant8_3.mod.py
+++ b/tests/nnapi/specs/V1_2/split_quant8_3.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/split_quant8_4.mod.py b/tests/nnapi/specs/V1_2/split_quant8_4.mod.py
index 51e5d5de2..51e5d5de2 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/split_quant8_4.mod.py
+++ b/tests/nnapi/specs/V1_2/split_quant8_4.mod.py
diff --git a/tests/nnapi/specs/V1_2/sqrt_.mod.py b/tests/nnapi/specs/V1_2/sqrt_.mod.py
index e934062b9..e934062b9 100644..100755
--- a/tests/nnapi/specs/V1_2/sqrt_.mod.py
+++ b/tests/nnapi/specs/V1_2/sqrt_.mod.py
diff --git a/tests/nnapi/specs/V1_2/sqrt_1D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/sqrt_1D_float_nnfw.mod.py
index 3e5a660ff..3e5a660ff 100644..100755
--- a/tests/nnapi/specs/V1_2/sqrt_1D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/sqrt_1D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/sqrt_2D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/sqrt_2D_float_nnfw.mod.py
index a61732dd3..a61732dd3 100644..100755
--- a/tests/nnapi/specs/V1_2/sqrt_2D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/sqrt_2D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/sqrt_3D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/sqrt_3D_float_nnfw.mod.py
index 7fe3dc2a5..7fe3dc2a5 100644..100755
--- a/tests/nnapi/specs/V1_2/sqrt_3D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/sqrt_3D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/V1_2/sqrt_4D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/sqrt_4D_float_nnfw.mod.py
index f75a80720..f75a80720 100644..100755
--- a/tests/nnapi/specs/V1_2/sqrt_4D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/sqrt_4D_float_nnfw.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/sub_v1_2.mod.py b/tests/nnapi/specs/V1_2/sub_v1_2.mod.py
index 86299762d..86299762d 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/sub_v1_2.mod.py
+++ b/tests/nnapi/specs/V1_2/sub_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/sub_v1_2_broadcast.mod.py b/tests/nnapi/specs/V1_2/sub_v1_2_broadcast.mod.py
index 5a755117b..5a755117b 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/sub_v1_2_broadcast.mod.py
+++ b/tests/nnapi/specs/V1_2/sub_v1_2_broadcast.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/tanh_v1_2.mod.py b/tests/nnapi/specs/V1_2/tanh_v1_2.mod.py
index c65d09fdb..c65d09fdb 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/tanh_v1_2.mod.py
+++ b/tests/nnapi/specs/V1_2/tanh_v1_2.mod.py
diff --git a/tests/nnapi/specs/V1_2/topk_v2.mod.py b/tests/nnapi/specs/V1_2/topk_v2.mod.py
index 189b9907d..189b9907d 100644..100755
--- a/tests/nnapi/specs/V1_2/topk_v2.mod.py
+++ b/tests/nnapi/specs/V1_2/topk_v2.mod.py
diff --git a/tests/nnapi/specs/V1_2/topk_v2_1D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/topk_v2_1D_float_nnfw.mod.py
index 1e0ed21d6..5be9c49bf 100644..100755
--- a/tests/nnapi/specs/V1_2/topk_v2_1D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/topk_v2_1D_float_nnfw.mod.py
@@ -4,7 +4,7 @@ i1 = Input("op1", "TENSOR_FLOAT32", "{4}") # a vector of input
k = Int32Scalar("k", 2)
i2 = Output("op2", "TENSOR_FLOAT32", "{2}") # values of output
i3 = Output("op3", "TENSOR_INT32", "{2}") # indexes of output
-model = model.Operation("TOPK_V2_EX", i1, k).To([i2, i3])
+model = model.Operation("TOPK_V2", i1, k).To([i2, i3])
# Example 1. Input in operand 0,
input0 = {i1: # input 0
diff --git a/tests/nnapi/specs/V1_2/topk_v2_1D_int32_nnfw.mod.py b/tests/nnapi/specs/V1_2/topk_v2_1D_int32_nnfw.mod.py
index d2bd39adf..8ee332761 100644..100755
--- a/tests/nnapi/specs/V1_2/topk_v2_1D_int32_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/topk_v2_1D_int32_nnfw.mod.py
@@ -4,7 +4,7 @@ i1 = Input("op1", "TENSOR_INT32", "{4}") # a vector of input
k = Int32Scalar("k", 2)
i2 = Output("op2", "TENSOR_INT32", "{2}") # values of output
i3 = Output("op3", "TENSOR_INT32", "{2}") # indexes of output
-model = model.Operation("TOPK_V2_EX", i1, k).To([i2, i3])
+model = model.Operation("TOPK_V2", i1, k).To([i2, i3])
# Example 1. Input in operand 0,
input0 = {i1: # input 0
diff --git a/tests/nnapi/specs/V1_2/topk_v2_1D_quant8_nnfw.mod.py b/tests/nnapi/specs/V1_2/topk_v2_1D_quant8_nnfw.mod.py
index 6f36ce41f..d270d8c77 100644..100755
--- a/tests/nnapi/specs/V1_2/topk_v2_1D_quant8_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/topk_v2_1D_quant8_nnfw.mod.py
@@ -4,7 +4,7 @@ i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{4}, 0.5f, 1") # a vector of input
k = Int32Scalar("k", 2)
i2 = Output("op2", "TENSOR_QUANT8_ASYMM", "{2}, 0.5f, 1") # values of output
i3 = Output("op3", "TENSOR_INT32", "{2}") # indexes of output
-model = model.Operation("TOPK_V2_EX", i1, k).To([i2, i3])
+model = model.Operation("TOPK_V2", i1, k).To([i2, i3])
# Example 1. Input in operand 0,
input0 = {i1: # input 0
diff --git a/tests/nnapi/specs/V1_2/topk_v2_2D_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/topk_v2_2D_float_nnfw.mod.py
index 204bc143f..29113b901 100644..100755
--- a/tests/nnapi/specs/V1_2/topk_v2_2D_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/topk_v2_2D_float_nnfw.mod.py
@@ -4,7 +4,7 @@ i1 = Input("op1", "TENSOR_FLOAT32", "{3,4}") # a matirx of input
k = Int32Scalar("k", 2)
o1 = Output("op2", "TENSOR_FLOAT32", "{3,2}") # values of output
o2 = Output("op3", "TENSOR_INT32", "{3,2}") # indexes of output
-model = model.Operation("TOPK_V2_EX", i1, k).To([o1, o2])
+model = model.Operation("TOPK_V2", i1, k).To([o1, o2])
# Example 1. Input in operand 0,
input0 = {i1: # input 0
@@ -13,7 +13,7 @@ input0 = {i1: # input 0
2.123456789123456789, 18.123456789123456789, 19.123456789123456789, 11.123456789123456789]}
output0 = {o1: # output 1
- [6.123456789123456789, 5.123456789123456789,
+ [6.123456789123456789, 5.123456789123456789,
9.123456789123456789, 8.123456789123456789,
19.123456789123456789, 18.123456789123456789],
o2: # output 1
diff --git a/tests/nnapi/specs/V1_2/topk_v2_2D_int32_nnfw.mod.py b/tests/nnapi/specs/V1_2/topk_v2_2D_int32_nnfw.mod.py
index b90a35488..7a2965fd8 100644..100755
--- a/tests/nnapi/specs/V1_2/topk_v2_2D_int32_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/topk_v2_2D_int32_nnfw.mod.py
@@ -4,7 +4,7 @@ i1 = Input("op1", "TENSOR_INT32", "{3,4}") # a vector of input
k = Int32Scalar("k", 2)
i2 = Output("op2", "TENSOR_INT32", "{3,2}") # indexes of output
i3 = Output("op3", "TENSOR_INT32", "{3,2}") # values of output
-model = model.Operation("TOPK_V2_EX", i1, k).To([i2, i3])
+model = model.Operation("TOPK_V2", i1, k).To([i2, i3])
# Example 1. Input in operand 0,
input0 = {i1: # input 0
diff --git a/tests/nnapi/specs/V1_2/topk_v2_2D_quant8_nnfw.mod.py b/tests/nnapi/specs/V1_2/topk_v2_2D_quant8_nnfw.mod.py
index d8b5c6075..31c8ef9f2 100644..100755
--- a/tests/nnapi/specs/V1_2/topk_v2_2D_quant8_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/topk_v2_2D_quant8_nnfw.mod.py
@@ -4,7 +4,7 @@ i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{3,4}, 0.5f, 1") # a vector of input
k = Int32Scalar("k", 2)
i2 = Output("op2", "TENSOR_QUANT8_ASYMM", "{3,2}, 0.5f, 1") # values of output
i3 = Output("op3", "TENSOR_INT32", "{3,2}") # indexes of output
-model = model.Operation("TOPK_V2_EX", i1, k).To([i2, i3])
+model = model.Operation("TOPK_V2", i1, k).To([i2, i3])
# Example 1. Input in operand 0,
input0 = {i1: # input 0
diff --git a/tests/nnapi/specs/skip/V1_2/transpose_v1_2.mod.py b/tests/nnapi/specs/V1_2/transpose_v1_2.mod.py
index 9d0108e8f..9d0108e8f 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/transpose_v1_2.mod.py
+++ b/tests/nnapi/specs/V1_2/transpose_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_0/mobilenet_224_gender_basic_fixed.mod.py b/tests/nnapi/specs/skip/V1_0/mobilenet_224_gender_basic_fixed.mod.py
index 6861b0a86..6861b0a86 100644..100755
--- a/tests/nnapi/specs/skip/V1_0/mobilenet_224_gender_basic_fixed.mod.py
+++ b/tests/nnapi/specs/skip/V1_0/mobilenet_224_gender_basic_fixed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_0/mobilenet_quantized.mod.py b/tests/nnapi/specs/skip/V1_0/mobilenet_quantized.mod.py
index 20223e292..20223e292 100644..100755
--- a/tests/nnapi/specs/skip/V1_0/mobilenet_quantized.mod.py
+++ b/tests/nnapi/specs/skip/V1_0/mobilenet_quantized.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/add_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/add_relaxed.mod.py
index cf2d69eb4..cf2d69eb4 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/add_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/add_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/avg_pool_float_1_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/avg_pool_float_1_relaxed.mod.py
index 5b421e730..5b421e730 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/avg_pool_float_1_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/avg_pool_float_1_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/avg_pool_float_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/avg_pool_float_2_relaxed.mod.py
index b94c37e2e..b94c37e2e 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/avg_pool_float_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/avg_pool_float_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/avg_pool_float_3_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/avg_pool_float_3_relaxed.mod.py
index 30dc18488..30dc18488 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/avg_pool_float_3_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/avg_pool_float_3_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/avg_pool_float_4_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/avg_pool_float_4_relaxed.mod.py
index 3ca5ca9a3..3ca5ca9a3 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/avg_pool_float_4_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/avg_pool_float_4_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/avg_pool_float_5_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/avg_pool_float_5_relaxed.mod.py
index 9386c7644..9386c7644 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/avg_pool_float_5_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/avg_pool_float_5_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/batch_to_space_float_1_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/batch_to_space_float_1_relaxed.mod.py
index 716420304..716420304 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/batch_to_space_float_1_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/batch_to_space_float_1_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/batch_to_space_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/batch_to_space_relaxed.mod.py
index 3836e7b91..3836e7b91 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/batch_to_space_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/batch_to_space_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/concat_float_1_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/concat_float_1_relaxed.mod.py
index cd4187cd3..cd4187cd3 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/concat_float_1_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/concat_float_1_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/concat_float_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/concat_float_2_relaxed.mod.py
index 634f4bbe2..634f4bbe2 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/concat_float_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/concat_float_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/concat_float_3_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/concat_float_3_relaxed.mod.py
index df3d894d6..df3d894d6 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/concat_float_3_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/concat_float_3_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/conv_1_h3_w2_SAME_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/conv_1_h3_w2_SAME_relaxed.mod.py
index 785572fae..785572fae 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/conv_1_h3_w2_SAME_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/conv_1_h3_w2_SAME_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/conv_1_h3_w2_VALID_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/conv_1_h3_w2_VALID_relaxed.mod.py
index 51fcf0a15..51fcf0a15 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/conv_1_h3_w2_VALID_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/conv_1_h3_w2_VALID_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/conv_3_h3_w2_SAME_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/conv_3_h3_w2_SAME_relaxed.mod.py
index d6326e4b6..d6326e4b6 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/conv_3_h3_w2_SAME_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/conv_3_h3_w2_SAME_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/conv_3_h3_w2_VALID_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/conv_3_h3_w2_VALID_relaxed.mod.py
index 2e0905612..2e0905612 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/conv_3_h3_w2_VALID_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/conv_3_h3_w2_VALID_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/conv_float_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/conv_float_2_relaxed.mod.py
index a49f001b0..a49f001b0 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/conv_float_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/conv_float_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/conv_float_channels_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/conv_float_channels_relaxed.mod.py
index 0788be529..0788be529 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/conv_float_channels_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/conv_float_channels_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/conv_float_channels_weights_as_inputs_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/conv_float_channels_weights_as_inputs_relaxed.mod.py
index 0659bfed1..0659bfed1 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/conv_float_channels_weights_as_inputs_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/conv_float_channels_weights_as_inputs_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/conv_float_large_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/conv_float_large_relaxed.mod.py
index 75e7297ee..75e7297ee 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/conv_float_large_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/conv_float_large_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/conv_float_large_weights_as_inputs_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/conv_float_large_weights_as_inputs_relaxed.mod.py
index 2cd233cdf..2cd233cdf 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/conv_float_large_weights_as_inputs_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/conv_float_large_weights_as_inputs_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/conv_float_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/conv_float_relaxed.mod.py
index 696cb8c00..696cb8c00 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/conv_float_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/conv_float_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/conv_float_weights_as_inputs_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/conv_float_weights_as_inputs_relaxed.mod.py
index 943f1b45b..943f1b45b 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/conv_float_weights_as_inputs_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/conv_float_weights_as_inputs_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/depth_to_space_float_1_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/depth_to_space_float_1_relaxed.mod.py
index 59acac575..59acac575 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/depth_to_space_float_1_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/depth_to_space_float_1_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/depth_to_space_float_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/depth_to_space_float_2_relaxed.mod.py
index 4751b0b98..4751b0b98 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/depth_to_space_float_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/depth_to_space_float_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/depth_to_space_float_3_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/depth_to_space_float_3_relaxed.mod.py
index d0c0f182a..d0c0f182a 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/depth_to_space_float_3_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/depth_to_space_float_3_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_2_relaxed.mod.py
index b38c0765d..b38c0765d 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_2_relaxed.mod.py
index a878b6029..a878b6029 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_2_weights_as_inputs_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_2_weights_as_inputs_relaxed.mod.py
index 26d1f09bc..26d1f09bc 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_2_weights_as_inputs_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_2_weights_as_inputs_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_relaxed.mod.py
index c2394bc0f..c2394bc0f 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_weights_as_inputs_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_weights_as_inputs_relaxed.mod.py
index 6c1e647d1..6c1e647d1 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_weights_as_inputs_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_large_weights_as_inputs_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_relaxed.mod.py
index cd71a108e..cd71a108e 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_weights_as_inputs_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_weights_as_inputs_relaxed.mod.py
index 404a28cfc..404a28cfc 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_weights_as_inputs_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/depthwise_conv2d_float_weights_as_inputs_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/depthwise_conv_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/depthwise_conv_relaxed.mod.py
index 2f0300eef..2f0300eef 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/depthwise_conv_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/depthwise_conv_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/dequantize_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/dequantize_relaxed.mod.py
index b960627d2..b960627d2 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/dequantize_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/dequantize_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/div_broadcast_float_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/div_broadcast_float_relaxed.mod.py
index e6f033eae..e6f033eae 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/div_broadcast_float_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/div_broadcast_float_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/div_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/div_relaxed.mod.py
index 38ba17f9f..38ba17f9f 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/div_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/div_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/embedding_lookup_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/embedding_lookup_relaxed.mod.py
index cfb4333dc..cfb4333dc 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/embedding_lookup_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/embedding_lookup_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/floor_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/floor_relaxed.mod.py
index 3c1ee48ef..3c1ee48ef 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/floor_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/floor_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/fully_connected_float_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/fully_connected_float_2_relaxed.mod.py
index b3ce6e5f9..b3ce6e5f9 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/fully_connected_float_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/fully_connected_float_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/fully_connected_float_4d_simple_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/fully_connected_float_4d_simple_relaxed.mod.py
index 2338c0bed..2338c0bed 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/fully_connected_float_4d_simple_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/fully_connected_float_4d_simple_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/fully_connected_float_large_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/fully_connected_float_large_relaxed.mod.py
index e170f68ed..e170f68ed 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/fully_connected_float_large_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/fully_connected_float_large_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/fully_connected_float_large_weights_as_inputs_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/fully_connected_float_large_weights_as_inputs_relaxed.mod.py
index 01f71ba29..01f71ba29 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/fully_connected_float_large_weights_as_inputs_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/fully_connected_float_large_weights_as_inputs_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/fully_connected_float_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/fully_connected_float_relaxed.mod.py
index acee27f5b..acee27f5b 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/fully_connected_float_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/fully_connected_float_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/fully_connected_float_weights_as_inputs_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/fully_connected_float_weights_as_inputs_relaxed.mod.py
index a6f61753d..a6f61753d 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/fully_connected_float_weights_as_inputs_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/fully_connected_float_weights_as_inputs_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/hashtable_lookup_float_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/hashtable_lookup_float_relaxed.mod.py
index e2c1a6d4b..e2c1a6d4b 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/hashtable_lookup_float_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/hashtable_lookup_float_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/l2_normalization_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/l2_normalization_2_relaxed.mod.py
index 54e57d18d..54e57d18d 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/l2_normalization_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/l2_normalization_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/l2_normalization_large_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/l2_normalization_large_relaxed.mod.py
index ad55690f1..ad55690f1 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/l2_normalization_large_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/l2_normalization_large_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/l2_normalization_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/l2_normalization_relaxed.mod.py
index c2f6ed061..c2f6ed061 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/l2_normalization_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/l2_normalization_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/l2_pool_float_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/l2_pool_float_2_relaxed.mod.py
index bd76e3aff..bd76e3aff 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/l2_pool_float_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/l2_pool_float_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/l2_pool_float_large_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/l2_pool_float_large_relaxed.mod.py
index 41afbe461..41afbe461 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/l2_pool_float_large_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/l2_pool_float_large_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/l2_pool_float_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/l2_pool_float_relaxed.mod.py
index 72ca4cc47..72ca4cc47 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/l2_pool_float_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/l2_pool_float_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/local_response_norm_float_1_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/local_response_norm_float_1_relaxed.mod.py
index 8be180150..8be180150 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/local_response_norm_float_1_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/local_response_norm_float_1_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/local_response_norm_float_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/local_response_norm_float_2_relaxed.mod.py
index 5ecc929f7..5ecc929f7 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/local_response_norm_float_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/local_response_norm_float_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/local_response_norm_float_3_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/local_response_norm_float_3_relaxed.mod.py
index 38e5f4b32..38e5f4b32 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/local_response_norm_float_3_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/local_response_norm_float_3_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/local_response_norm_float_4_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/local_response_norm_float_4_relaxed.mod.py
index e8de8affa..e8de8affa 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/local_response_norm_float_4_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/local_response_norm_float_4_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/logistic_float_1_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/logistic_float_1_relaxed.mod.py
index 926f652ed..926f652ed 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/logistic_float_1_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/logistic_float_1_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/logistic_float_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/logistic_float_2_relaxed.mod.py
index c01913173..c01913173 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/logistic_float_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/logistic_float_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/lsh_projection_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/lsh_projection_2_relaxed.mod.py
index 6608b0666..6608b0666 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/lsh_projection_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/lsh_projection_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/lsh_projection_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/lsh_projection_relaxed.mod.py
index 9a1bfaca2..9a1bfaca2 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/lsh_projection_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/lsh_projection_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/lsh_projection_weights_as_inputs_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/lsh_projection_weights_as_inputs_relaxed.mod.py
index de3dddc59..de3dddc59 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/lsh_projection_weights_as_inputs_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/lsh_projection_weights_as_inputs_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/lstm2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/lstm2_relaxed.mod.py
index 4a76a6cac..4a76a6cac 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/lstm2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/lstm2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/lstm2_state2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/lstm2_state2_relaxed.mod.py
index a69f9308c..a69f9308c 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/lstm2_state2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/lstm2_state2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/lstm2_state_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/lstm2_state_relaxed.mod.py
index d68717076..d68717076 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/lstm2_state_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/lstm2_state_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/lstm3_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/lstm3_relaxed.mod.py
index b8244518e..b8244518e 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/lstm3_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/lstm3_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/lstm3_state2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/lstm3_state2_relaxed.mod.py
index c99043f75..c99043f75 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/lstm3_state2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/lstm3_state2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/lstm3_state3_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/lstm3_state3_relaxed.mod.py
index f9a42cbde..f9a42cbde 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/lstm3_state3_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/lstm3_state3_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/lstm3_state_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/lstm3_state_relaxed.mod.py
index b9c3adb84..b9c3adb84 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/lstm3_state_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/lstm3_state_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/lstm_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/lstm_relaxed.mod.py
index 1bbfdd9ec..1bbfdd9ec 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/lstm_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/lstm_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/lstm_state2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/lstm_state2_relaxed.mod.py
index b912d18ce..b912d18ce 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/lstm_state2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/lstm_state2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/lstm_state_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/lstm_state_relaxed.mod.py
index e86881fd4..e86881fd4 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/lstm_state_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/lstm_state_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/max_pool_float_1_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/max_pool_float_1_relaxed.mod.py
index 198611f1c..198611f1c 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/max_pool_float_1_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/max_pool_float_1_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/max_pool_float_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/max_pool_float_2_relaxed.mod.py
index 45a2a13ab..45a2a13ab 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/max_pool_float_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/max_pool_float_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/max_pool_float_3_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/max_pool_float_3_relaxed.mod.py
index 2b0801b36..2b0801b36 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/max_pool_float_3_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/max_pool_float_3_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/max_pool_float_4_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/max_pool_float_4_relaxed.mod.py
index 7753b3118..7753b3118 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/max_pool_float_4_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/max_pool_float_4_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/mean_float_1_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/mean_float_1_relaxed.mod.py
index e78f51701..e78f51701 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/mean_float_1_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/mean_float_1_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/mean_float_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/mean_float_2_relaxed.mod.py
index be2212ff2..be2212ff2 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/mean_float_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/mean_float_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/mean_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/mean_relaxed.mod.py
index ae337acdd..ae337acdd 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/mean_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/mean_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/mobilenet_224_gender_basic_fixed_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/mobilenet_224_gender_basic_fixed_relaxed.mod.py
index 04fbabfee..04fbabfee 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/mobilenet_224_gender_basic_fixed_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/mobilenet_224_gender_basic_fixed_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/mul_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/mul_relaxed.mod.py
index 3e708b8fe..3e708b8fe 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/mul_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/mul_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/mul_relu_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/mul_relu_relaxed.mod.py
index 160589852..160589852 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/mul_relu_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/mul_relu_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/pad_float_1_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/pad_float_1_relaxed.mod.py
index 8664f64e7..8664f64e7 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/pad_float_1_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/pad_float_1_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/pad_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/pad_relaxed.mod.py
index 2587615d3..2587615d3 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/pad_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/pad_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/relu1_float_1_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/relu1_float_1_relaxed.mod.py
index 875f07fe5..875f07fe5 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/relu1_float_1_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/relu1_float_1_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/relu1_float_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/relu1_float_2_relaxed.mod.py
index 52f760397..52f760397 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/relu1_float_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/relu1_float_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/relu6_float_1_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/relu6_float_1_relaxed.mod.py
index e5f717986..e5f717986 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/relu6_float_1_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/relu6_float_1_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/relu6_float_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/relu6_float_2_relaxed.mod.py
index 25789393e..25789393e 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/relu6_float_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/relu6_float_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/relu_float_1_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/relu_float_1_relaxed.mod.py
index 8c5c103ee..8c5c103ee 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/relu_float_1_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/relu_float_1_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/relu_float_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/relu_float_2_relaxed.mod.py
index acc5aecd5..acc5aecd5 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/relu_float_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/relu_float_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/reshape_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/reshape_relaxed.mod.py
index 16bda4791..16bda4791 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/reshape_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/reshape_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/reshape_weights_as_inputs_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/reshape_weights_as_inputs_relaxed.mod.py
index c265b4c4a..c265b4c4a 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/reshape_weights_as_inputs_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/reshape_weights_as_inputs_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/resize_bilinear_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/resize_bilinear_2_relaxed.mod.py
index 06ccbeb66..06ccbeb66 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/resize_bilinear_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/resize_bilinear_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/resize_bilinear_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/resize_bilinear_relaxed.mod.py
index b059be248..b059be248 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/resize_bilinear_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/resize_bilinear_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/rnn_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/rnn_relaxed.mod.py
index bc58851d3..bc58851d3 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/rnn_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/rnn_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/rnn_state_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/rnn_state_relaxed.mod.py
index 489e5bfc5..489e5bfc5 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/rnn_state_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/rnn_state_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/softmax_float_1_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/softmax_float_1_relaxed.mod.py
index 897ea54d4..897ea54d4 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/softmax_float_1_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/softmax_float_1_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/softmax_float_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/softmax_float_2_relaxed.mod.py
index 60a6f4bdb..60a6f4bdb 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/softmax_float_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/softmax_float_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/space_to_batch_float_1_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/space_to_batch_float_1_relaxed.mod.py
index de856675d..de856675d 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/space_to_batch_float_1_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/space_to_batch_float_1_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/space_to_batch_float_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/space_to_batch_float_2_relaxed.mod.py
index 610e34e90..610e34e90 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/space_to_batch_float_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/space_to_batch_float_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/space_to_batch_float_3_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/space_to_batch_float_3_relaxed.mod.py
index 99c892cc6..99c892cc6 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/space_to_batch_float_3_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/space_to_batch_float_3_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/space_to_batch_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/space_to_batch_relaxed.mod.py
index 0924ae263..0924ae263 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/space_to_batch_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/space_to_batch_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/space_to_depth_float_1_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/space_to_depth_float_1_relaxed.mod.py
index f1b881baf..f1b881baf 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/space_to_depth_float_1_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/space_to_depth_float_1_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/space_to_depth_float_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/space_to_depth_float_2_relaxed.mod.py
index c772cb2fc..c772cb2fc 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/space_to_depth_float_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/space_to_depth_float_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/space_to_depth_float_3_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/space_to_depth_float_3_relaxed.mod.py
index 6315ff1ae..6315ff1ae 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/space_to_depth_float_3_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/space_to_depth_float_3_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/squeeze_float_1_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/squeeze_float_1_relaxed.mod.py
index e34e81bf2..e34e81bf2 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/squeeze_float_1_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/squeeze_float_1_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/squeeze_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/squeeze_relaxed.mod.py
index 4338c79b6..4338c79b6 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/squeeze_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/squeeze_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/strided_slice_float_10_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/strided_slice_float_10_relaxed.mod.py
index 2aee51373..2aee51373 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/strided_slice_float_10_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/strided_slice_float_10_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/strided_slice_float_11_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/strided_slice_float_11_relaxed.mod.py
index b88007ebe..b88007ebe 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/strided_slice_float_11_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/strided_slice_float_11_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/strided_slice_float_1_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/strided_slice_float_1_relaxed.mod.py
index e80187aff..e80187aff 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/strided_slice_float_1_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/strided_slice_float_1_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/strided_slice_float_2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/strided_slice_float_2_relaxed.mod.py
index a1e3ad47b..a1e3ad47b 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/strided_slice_float_2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/strided_slice_float_2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/strided_slice_float_3_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/strided_slice_float_3_relaxed.mod.py
index 8696a8b95..8696a8b95 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/strided_slice_float_3_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/strided_slice_float_3_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/strided_slice_float_4_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/strided_slice_float_4_relaxed.mod.py
index 68a574597..68a574597 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/strided_slice_float_4_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/strided_slice_float_4_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/strided_slice_float_5_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/strided_slice_float_5_relaxed.mod.py
index dda4e7df4..dda4e7df4 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/strided_slice_float_5_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/strided_slice_float_5_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/strided_slice_float_6_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/strided_slice_float_6_relaxed.mod.py
index f5bb39ddb..f5bb39ddb 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/strided_slice_float_6_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/strided_slice_float_6_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/strided_slice_float_7_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/strided_slice_float_7_relaxed.mod.py
index 7b5aae4e7..7b5aae4e7 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/strided_slice_float_7_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/strided_slice_float_7_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/strided_slice_float_8_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/strided_slice_float_8_relaxed.mod.py
index 22ce230a5..22ce230a5 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/strided_slice_float_8_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/strided_slice_float_8_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/strided_slice_float_9_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/strided_slice_float_9_relaxed.mod.py
index 1fe5368ee..1fe5368ee 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/strided_slice_float_9_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/strided_slice_float_9_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/strided_slice_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/strided_slice_relaxed.mod.py
index 271c399b0..271c399b0 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/strided_slice_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/strided_slice_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/sub_broadcast_float_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/sub_broadcast_float_relaxed.mod.py
index 7c009dc02..7c009dc02 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/sub_broadcast_float_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/sub_broadcast_float_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/sub_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/sub_relaxed.mod.py
index 14bdd564a..14bdd564a 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/sub_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/sub_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/svdf2_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/svdf2_relaxed.mod.py
index 45aca84fb..45aca84fb 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/svdf2_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/svdf2_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/svdf_bias_present_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/svdf_bias_present_relaxed.mod.py
index 7bff43502..7bff43502 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/svdf_bias_present_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/svdf_bias_present_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/svdf_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/svdf_relaxed.mod.py
index e68bcafbd..e68bcafbd 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/svdf_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/svdf_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/svdf_state_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/svdf_state_relaxed.mod.py
index ecbc00bf2..ecbc00bf2 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/svdf_state_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/svdf_state_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/tanh_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/tanh_relaxed.mod.py
index 6c8fa6b7e..6c8fa6b7e 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/tanh_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/tanh_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/transpose_float_1_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/transpose_float_1_relaxed.mod.py
index 721e6beee..721e6beee 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/transpose_float_1_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/transpose_float_1_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_1/transpose_relaxed.mod.py b/tests/nnapi/specs/skip/V1_1/transpose_relaxed.mod.py
index 668e0c22a..668e0c22a 100644..100755
--- a/tests/nnapi/specs/skip/V1_1/transpose_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_1/transpose_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/add_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/add_v1_2.mod.py
index 8af47a57f..8af47a57f 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/add_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/add_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/argmin_1.mod.py b/tests/nnapi/specs/skip/V1_2/argmin_1.mod.py
index e89ceead9..e89ceead9 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/argmin_1.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/argmin_1.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/argmin_2.mod.py b/tests/nnapi/specs/skip/V1_2/argmin_2.mod.py
index e54cff784..e54cff784 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/argmin_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/argmin_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/argmin_3.mod.py b/tests/nnapi/specs/skip/V1_2/argmin_3.mod.py
index d3cbd76ed..d3cbd76ed 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/argmin_3.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/argmin_3.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/avg_pool_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/avg_pool_v1_2.mod.py
index 43083f91d..43083f91d 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/avg_pool_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/avg_pool_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/axis_aligned_bbox_transform.mod.py b/tests/nnapi/specs/skip/V1_2/axis_aligned_bbox_transform.mod.py
index ad2eb0ad3..ad2eb0ad3 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/axis_aligned_bbox_transform.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/axis_aligned_bbox_transform.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/batch_to_space_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/batch_to_space_v1_2.mod.py
index e7247bf59..e7247bf59 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/batch_to_space_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/batch_to_space_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/bbox_graph.mod.py b/tests/nnapi/specs/skip/V1_2/bbox_graph.mod.py
index a7a94af28..a7a94af28 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/bbox_graph.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/bbox_graph.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm.mod.py b/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm.mod.py
index 40ebff903..40ebff903 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_aux_input.mod.py b/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_aux_input.mod.py
index 22c0e2459..22c0e2459 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_aux_input.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_aux_input.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_cifg_peephole.mod.py b/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_cifg_peephole.mod.py
index 5def31402..5def31402 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_cifg_peephole.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_cifg_peephole.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_float16_batch_major.mod.py b/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_float16_batch_major.mod.py
index 06160adfe..06160adfe 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_float16_batch_major.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_float16_batch_major.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_float16_batch_major_aux_input.mod.py b/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_float16_batch_major_aux_input.mod.py
index d73da9c6e..d73da9c6e 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_float16_batch_major_aux_input.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_float16_batch_major_aux_input.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_float16_batch_major_merge_outputs.mod.py b/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_float16_batch_major_merge_outputs.mod.py
index cbf2ebdb7..cbf2ebdb7 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_float16_batch_major_merge_outputs.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_float16_batch_major_merge_outputs.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_merge_outputs.mod.py b/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_merge_outputs.mod.py
index 6e661001e..6e661001e 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_merge_outputs.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_merge_outputs.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_norm_fw_output.mod.py b/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_norm_fw_output.mod.py
index 93de203f4..93de203f4 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_norm_fw_output.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_lstm_norm_fw_output.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_rnn.mod.py b/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_rnn.mod.py
index b0a250170..b0a250170 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_rnn.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/bidirectional_sequence_rnn.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/box_with_nms_limit_gaussian.mod.py b/tests/nnapi/specs/skip/V1_2/box_with_nms_limit_gaussian.mod.py
index ed79ccc76..ed79ccc76 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/box_with_nms_limit_gaussian.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/box_with_nms_limit_gaussian.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/box_with_nms_limit_hard.mod.py b/tests/nnapi/specs/skip/V1_2/box_with_nms_limit_hard.mod.py
index b572a6468..b572a6468 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/box_with_nms_limit_hard.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/box_with_nms_limit_hard.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/box_with_nms_limit_linear.mod.py b/tests/nnapi/specs/skip/V1_2/box_with_nms_limit_linear.mod.py
index 4d3bc2001..4d3bc2001 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/box_with_nms_limit_linear.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/box_with_nms_limit_linear.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/channel_shuffle.mod.py b/tests/nnapi/specs/skip/V1_2/channel_shuffle.mod.py
index bd8fcad83..bd8fcad83 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/channel_shuffle.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/channel_shuffle.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/concat_float16_1.mod.py b/tests/nnapi/specs/skip/V1_2/concat_float16_1.mod.py
index f7c024a93..f7c024a93 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/concat_float16_1.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/concat_float16_1.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/concat_float16_2.mod.py b/tests/nnapi/specs/skip/V1_2/concat_float16_2.mod.py
index 64f2096b4..64f2096b4 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/concat_float16_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/concat_float16_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/concat_float16_3.mod.py b/tests/nnapi/specs/skip/V1_2/concat_float16_3.mod.py
index 7c7bc6964..7c7bc6964 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/concat_float16_3.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/concat_float16_3.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/concat_mixed_quant.mod.py b/tests/nnapi/specs/skip/V1_2/concat_mixed_quant.mod.py
index 6610fea9b..6610fea9b 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/concat_mixed_quant.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/concat_mixed_quant.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/concat_zero_sized.mod.py b/tests/nnapi/specs/skip/V1_2/concat_zero_sized.mod.py
index 1e374b4e4..1e374b4e4 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/concat_zero_sized.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/concat_zero_sized.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/conv2d_dilation.mod.py b/tests/nnapi/specs/skip/V1_2/conv2d_dilation.mod.py
index e30e5ede8..e30e5ede8 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/conv2d_dilation.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/conv2d_dilation.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/conv2d_per_channel.mod.py b/tests/nnapi/specs/skip/V1_2/conv2d_per_channel.mod.py
index 8780b48d9..8780b48d9 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/conv2d_per_channel.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/conv2d_per_channel.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/conv2d_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/conv2d_v1_2.mod.py
index 3ea902b62..3ea902b62 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/conv2d_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/conv2d_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/depth_to_space_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/depth_to_space_v1_2.mod.py
index 8d9de4b91..8d9de4b91 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/depth_to_space_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/depth_to_space_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/depthwise_conv2d_dilation.mod.py b/tests/nnapi/specs/skip/V1_2/depthwise_conv2d_dilation.mod.py
index 4b90498e3..4b90498e3 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/depthwise_conv2d_dilation.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/depthwise_conv2d_dilation.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/depthwise_conv2d_per_channel.mod.py b/tests/nnapi/specs/skip/V1_2/depthwise_conv2d_per_channel.mod.py
index 2df79ee34..2df79ee34 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/depthwise_conv2d_per_channel.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/depthwise_conv2d_per_channel.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/depthwise_conv2d_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/depthwise_conv2d_v1_2.mod.py
index 1ceb9c4eb..1ceb9c4eb 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/depthwise_conv2d_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/depthwise_conv2d_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/dequantize_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/dequantize_v1_2.mod.py
index 81e3515cd..81e3515cd 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/dequantize_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/dequantize_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/detection_postprocess.mod.py b/tests/nnapi/specs/skip/V1_2/detection_postprocess.mod.py
index b37989134..b37989134 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/detection_postprocess.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/detection_postprocess.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/div_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/div_v1_2.mod.py
index b92b67a99..b92b67a99 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/div_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/div_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/expand_dims.mod.py b/tests/nnapi/specs/skip/V1_2/expand_dims.mod.py
index 41c5d5426..41c5d5426 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/expand_dims.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/expand_dims.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/floor_float16.mod.py b/tests/nnapi/specs/skip/V1_2/floor_float16.mod.py
index 5944b9bb9..5944b9bb9 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/floor_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/floor_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/fully_connected_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/fully_connected_v1_2.mod.py
index 13b45fa92..13b45fa92 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/fully_connected_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/fully_connected_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/generate_proposals.mod.py b/tests/nnapi/specs/skip/V1_2/generate_proposals.mod.py
index 41d9ef94a..41d9ef94a 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/generate_proposals.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/generate_proposals.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/greater.mod.py b/tests/nnapi/specs/skip/V1_2/greater.mod.py
index d811a4733..d811a4733 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/greater.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/greater.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/grouped_conv2d.mod.py b/tests/nnapi/specs/skip/V1_2/grouped_conv2d.mod.py
index 32819317b..32819317b 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/grouped_conv2d.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/grouped_conv2d.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/heatmap_max_keypoint.mod.py b/tests/nnapi/specs/skip/V1_2/heatmap_max_keypoint.mod.py
index 9b738112a..9b738112a 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/heatmap_max_keypoint.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/heatmap_max_keypoint.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/instance_normalization.mod.py b/tests/nnapi/specs/skip/V1_2/instance_normalization.mod.py
index 9d540ff59..9d540ff59 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/instance_normalization.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/instance_normalization.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/l2_normalization_axis.mod.py b/tests/nnapi/specs/skip/V1_2/l2_normalization_axis.mod.py
index b420032d4..b420032d4 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/l2_normalization_axis.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/l2_normalization_axis.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/l2_normalization_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/l2_normalization_v1_2.mod.py
index f1cd7f517..f1cd7f517 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/l2_normalization_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/l2_normalization_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/l2_pool_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/l2_pool_v1_2.mod.py
index 1754478ea..1754478ea 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/l2_pool_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/l2_pool_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/layer_norm_lstm.mod.py b/tests/nnapi/specs/skip/V1_2/layer_norm_lstm.mod.py
index 0a5edeaf5..0a5edeaf5 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/layer_norm_lstm.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/layer_norm_lstm.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/less_equal.mod.py b/tests/nnapi/specs/skip/V1_2/less_equal.mod.py
index e57ca55c5..e57ca55c5 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/less_equal.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/less_equal.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/local_response_normalization_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/local_response_normalization_v1_2.mod.py
index 784ec9c06..784ec9c06 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/local_response_normalization_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/local_response_normalization_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/log.mod.py b/tests/nnapi/specs/skip/V1_2/log.mod.py
index d93f6b144..d93f6b144 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/log.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/log.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/log_softmax.mod.py b/tests/nnapi/specs/skip/V1_2/log_softmax.mod.py
index 7e4a6774f..7e4a6774f 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/log_softmax.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/log_softmax.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/logistic_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/logistic_v1_2.mod.py
index fe91a814d..fe91a814d 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/logistic_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/logistic_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/lsh_projection_3_relaxed.mod.py b/tests/nnapi/specs/skip/V1_2/lsh_projection_3_relaxed.mod.py
index de7cec111..de7cec111 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/lsh_projection_3_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/lsh_projection_3_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/lsh_projection_4_relaxed.mod.py b/tests/nnapi/specs/skip/V1_2/lsh_projection_4_relaxed.mod.py
index 2b3b33a1e..2b3b33a1e 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/lsh_projection_4_relaxed.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/lsh_projection_4_relaxed.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/lsh_projection_deprecated.mod.py b/tests/nnapi/specs/skip/V1_2/lsh_projection_deprecated.mod.py
index 2b3b33a1e..2b3b33a1e 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/lsh_projection_deprecated.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/lsh_projection_deprecated.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/lsh_projection_float16.mod.py b/tests/nnapi/specs/skip/V1_2/lsh_projection_float16.mod.py
index ed19b17f7..ed19b17f7 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/lsh_projection_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/lsh_projection_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/lstm2_float16.mod.py b/tests/nnapi/specs/skip/V1_2/lstm2_float16.mod.py
index 6ca648a6c..6ca648a6c 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/lstm2_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/lstm2_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/lstm2_state2_float16.mod.py b/tests/nnapi/specs/skip/V1_2/lstm2_state2_float16.mod.py
index e9a143417..e9a143417 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/lstm2_state2_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/lstm2_state2_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/lstm2_state_float16.mod.py b/tests/nnapi/specs/skip/V1_2/lstm2_state_float16.mod.py
index 4d1500161..4d1500161 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/lstm2_state_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/lstm2_state_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/lstm3_float16.mod.py b/tests/nnapi/specs/skip/V1_2/lstm3_float16.mod.py
index aee476303..aee476303 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/lstm3_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/lstm3_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/lstm3_state2_float16.mod.py b/tests/nnapi/specs/skip/V1_2/lstm3_state2_float16.mod.py
index 494c12aa5..494c12aa5 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/lstm3_state2_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/lstm3_state2_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/lstm3_state3_float16.mod.py b/tests/nnapi/specs/skip/V1_2/lstm3_state3_float16.mod.py
index d359f84e1..d359f84e1 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/lstm3_state3_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/lstm3_state3_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/lstm3_state_float16.mod.py b/tests/nnapi/specs/skip/V1_2/lstm3_state_float16.mod.py
index 695f0f53b..695f0f53b 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/lstm3_state_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/lstm3_state_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/lstm_float16.mod.py b/tests/nnapi/specs/skip/V1_2/lstm_float16.mod.py
index ff7be5ebb..ff7be5ebb 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/lstm_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/lstm_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/lstm_state2_float16.mod.py b/tests/nnapi/specs/skip/V1_2/lstm_state2_float16.mod.py
index 470dd7b50..470dd7b50 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/lstm_state2_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/lstm_state2_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/lstm_state_float16.mod.py b/tests/nnapi/specs/skip/V1_2/lstm_state_float16.mod.py
index 78f65366d..78f65366d 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/lstm_state_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/lstm_state_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/max_pool_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/max_pool_v1_2.mod.py
index 979cf2ea3..979cf2ea3 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/max_pool_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/max_pool_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/mean_float16.mod.py b/tests/nnapi/specs/skip/V1_2/mean_float16.mod.py
index 5814f60fe..5814f60fe 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/mean_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/mean_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/mul_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/mul_v1_2.mod.py
index 8d1002b92..8d1002b92 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/mul_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/mul_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/pad_all_dims.mod.py b/tests/nnapi/specs/skip/V1_2/pad_all_dims.mod.py
index 3ae88b87e..3ae88b87e 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/pad_all_dims.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/pad_all_dims.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/pad_float16.mod.py b/tests/nnapi/specs/skip/V1_2/pad_float16.mod.py
index 7a6b29ccf..7a6b29ccf 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/pad_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/pad_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/pad_low_rank.mod.py b/tests/nnapi/specs/skip/V1_2/pad_low_rank.mod.py
index 7394f796d..7394f796d 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/pad_low_rank.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/pad_low_rank.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/pad_low_rank_quant8.mod.py b/tests/nnapi/specs/skip/V1_2/pad_low_rank_quant8.mod.py
index be939341e..be939341e 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/pad_low_rank_quant8.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/pad_low_rank_quant8.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/pad_quant8.mod.py b/tests/nnapi/specs/skip/V1_2/pad_quant8.mod.py
index 9257f6972..9257f6972 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/pad_quant8.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/pad_quant8.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/pad_quant8_nonzero.mod.py b/tests/nnapi/specs/skip/V1_2/pad_quant8_nonzero.mod.py
index 75e7f7b60..75e7f7b60 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/pad_quant8_nonzero.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/pad_quant8_nonzero.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/pad_v2_1_float.mod.py b/tests/nnapi/specs/skip/V1_2/pad_v2_1_float.mod.py
index c500741c2..c500741c2 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/pad_v2_1_float.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/pad_v2_1_float.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/pad_v2_1_quant8.mod.py b/tests/nnapi/specs/skip/V1_2/pad_v2_1_quant8.mod.py
index 3dfaff64b..3dfaff64b 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/pad_v2_1_quant8.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/pad_v2_1_quant8.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/pad_v2_all_dims.mod.py b/tests/nnapi/specs/skip/V1_2/pad_v2_all_dims.mod.py
index 5b27f4963..5b27f4963 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/pad_v2_all_dims.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/pad_v2_all_dims.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/pad_v2_all_dims_quant8.mod.py b/tests/nnapi/specs/skip/V1_2/pad_v2_all_dims_quant8.mod.py
index 5ee4b06d7..5ee4b06d7 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/pad_v2_all_dims_quant8.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/pad_v2_all_dims_quant8.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/pad_v2_low_rank.mod.py b/tests/nnapi/specs/skip/V1_2/pad_v2_low_rank.mod.py
index 391d5cfb6..391d5cfb6 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/pad_v2_low_rank.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/pad_v2_low_rank.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/pad_v2_low_rank_quant8.mod.py b/tests/nnapi/specs/skip/V1_2/pad_v2_low_rank_quant8.mod.py
index b67c2b834..b67c2b834 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/pad_v2_low_rank_quant8.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/pad_v2_low_rank_quant8.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/pow.mod.py b/tests/nnapi/specs/skip/V1_2/pow.mod.py
index 2d174ed3d..2d174ed3d 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/pow.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/pow.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/quantize.mod.py b/tests/nnapi/specs/skip/V1_2/quantize.mod.py
index a42624dce..a42624dce 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/quantize.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/quantize.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/quantized_lstm.mod.py b/tests/nnapi/specs/skip/V1_2/quantized_lstm.mod.py
index 5fd4c7a84..5fd4c7a84 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/quantized_lstm.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/quantized_lstm.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/random_multinomial.mod.py b/tests/nnapi/specs/skip/V1_2/random_multinomial.mod.py
index bea061723..bea061723 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/random_multinomial.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/random_multinomial.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/random_multinomial_float16.mod.py b/tests/nnapi/specs/skip/V1_2/random_multinomial_float16.mod.py
index 33d774ec7..33d774ec7 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/random_multinomial_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/random_multinomial_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/reduce_all.mod.py b/tests/nnapi/specs/skip/V1_2/reduce_all.mod.py
index 831a702a2..831a702a2 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/reduce_all.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/reduce_all.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/reduce_any.mod.py b/tests/nnapi/specs/skip/V1_2/reduce_any.mod.py
index da0caca2d..da0caca2d 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/reduce_any.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/reduce_any.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/reduce_prod.mod.py b/tests/nnapi/specs/skip/V1_2/reduce_prod.mod.py
index 978f273bc..978f273bc 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/reduce_prod.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/reduce_prod.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/relu1_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/relu1_v1_2.mod.py
index 9b69ea6cc..9b69ea6cc 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/relu1_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/relu1_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/relu6_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/relu6_v1_2.mod.py
index 068f53ffb..068f53ffb 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/relu6_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/relu6_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/relu_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/relu_v1_2.mod.py
index 9065fc950..9065fc950 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/relu_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/relu_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/reshape_float16.mod.py b/tests/nnapi/specs/skip/V1_2/reshape_float16.mod.py
index f54cba6d8..f54cba6d8 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/reshape_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/reshape_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/resize_bilinear_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/resize_bilinear_v1_2.mod.py
index 572d06b75..572d06b75 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/resize_bilinear_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/resize_bilinear_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/resize_nearest_neighbor.mod.py b/tests/nnapi/specs/skip/V1_2/resize_nearest_neighbor.mod.py
index 04102c5ed..04102c5ed 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/resize_nearest_neighbor.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/resize_nearest_neighbor.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/rnn_float16.mod.py b/tests/nnapi/specs/skip/V1_2/rnn_float16.mod.py
index 7968c556c..7968c556c 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/rnn_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/rnn_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/roi_align.mod.py b/tests/nnapi/specs/skip/V1_2/roi_align.mod.py
index d1b1303c2..d1b1303c2 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/roi_align.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/roi_align.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/roi_pooling.mod.py b/tests/nnapi/specs/skip/V1_2/roi_pooling.mod.py
index f4135c5b4..f4135c5b4 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/roi_pooling.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/roi_pooling.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/select_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/select_v1_2.mod.py
index 4acaff2ec..4acaff2ec 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/select_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/select_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/sin.mod.py b/tests/nnapi/specs/skip/V1_2/sin.mod.py
index 7f2fcbb8b..7f2fcbb8b 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/sin.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/sin.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/softmax_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/softmax_v1_2.mod.py
index 5983b05a0..5983b05a0 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/softmax_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/softmax_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/space_to_batch_quant8_nonzero.mod.py b/tests/nnapi/specs/skip/V1_2/space_to_batch_quant8_nonzero.mod.py
index 2d0d710e8..2d0d710e8 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/space_to_batch_quant8_nonzero.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/space_to_batch_quant8_nonzero.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/space_to_batch_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/space_to_batch_v1_2.mod.py
index 356fae5d8..356fae5d8 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/space_to_batch_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/space_to_batch_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/space_to_depth_v1_2.mod.py b/tests/nnapi/specs/skip/V1_2/space_to_depth_v1_2.mod.py
index 9f1a799fe..9f1a799fe 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/space_to_depth_v1_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/space_to_depth_v1_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/squeeze_float16.mod.py b/tests/nnapi/specs/skip/V1_2/squeeze_float16.mod.py
index e5f18a524..e5f18a524 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/squeeze_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/squeeze_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/strided_slice_float16.mod.py b/tests/nnapi/specs/skip/V1_2/strided_slice_float16.mod.py
index 88735f54e..88735f54e 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/strided_slice_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/strided_slice_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/sub_quantized_different_scales.mod.py b/tests/nnapi/specs/skip/V1_2/sub_quantized_different_scales.mod.py
index 61bda3747..61bda3747 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/sub_quantized_different_scales.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/sub_quantized_different_scales.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/svdf_bias_present_float16.mod.py b/tests/nnapi/specs/skip/V1_2/svdf_bias_present_float16.mod.py
index 4dc691400..4dc691400 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/svdf_bias_present_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/svdf_bias_present_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/svdf_float16.mod.py b/tests/nnapi/specs/skip/V1_2/svdf_float16.mod.py
index 2b0f368d3..2b0f368d3 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/svdf_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/svdf_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/svdf_state_float16.mod.py b/tests/nnapi/specs/skip/V1_2/svdf_state_float16.mod.py
index f8fcae7b0..f8fcae7b0 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/svdf_state_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/svdf_state_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/tile_1.mod.py b/tests/nnapi/specs/skip/V1_2/tile_1.mod.py
index 8cc71be4b..8cc71be4b 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/tile_1.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/tile_1.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/tile_2.mod.py b/tests/nnapi/specs/skip/V1_2/tile_2.mod.py
index 2ef0366cb..2ef0366cb 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/tile_2.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/tile_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/tile_3.mod.py b/tests/nnapi/specs/skip/V1_2/tile_3.mod.py
index 2b949a6f1..2b949a6f1 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/tile_3.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/tile_3.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/transpose_conv2d.mod.py b/tests/nnapi/specs/skip/V1_2/transpose_conv2d.mod.py
index 5a91c12fe..5a91c12fe 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/transpose_conv2d.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/transpose_conv2d.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/transpose_conv2d_large.mod.py b/tests/nnapi/specs/skip/V1_2/transpose_conv2d_large.mod.py
index f9a87c1bf..f9a87c1bf 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/transpose_conv2d_large.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/transpose_conv2d_large.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/transpose_float16.mod.py b/tests/nnapi/specs/skip/V1_2/transpose_float16.mod.py
index 79b3796fa..79b3796fa 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/transpose_float16.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/transpose_float16.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_1step.mod.py b/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_1step.mod.py
index 1e9a633a9..1e9a633a9 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_1step.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_1step.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_batch_major_norm_peephole_projection.mod.py b/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_batch_major_norm_peephole_projection.mod.py
index 08bc3ab5c..08bc3ab5c 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_batch_major_norm_peephole_projection.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_batch_major_norm_peephole_projection.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_batch_major_peephole_projection_bias.mod.py b/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_batch_major_peephole_projection_bias.mod.py
index d4f020e31..d4f020e31 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_batch_major_peephole_projection_bias.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_batch_major_peephole_projection_bias.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_cifg_peephole.mod.py b/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_cifg_peephole.mod.py
index 91bde56ee..91bde56ee 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_cifg_peephole.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_cifg_peephole.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_f16_batch_major.mod.py b/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_f16_batch_major.mod.py
index 4c4e7a8fa..4c4e7a8fa 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_f16_batch_major.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_f16_batch_major.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_f16_norm_peephole_projection.mod.py b/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_f16_norm_peephole_projection.mod.py
index b29308954..b29308954 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_f16_norm_peephole_projection.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_f16_norm_peephole_projection.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_layer_norm_cifg_peephole.mod.py b/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_layer_norm_cifg_peephole.mod.py
index 1fa0ff01a..1fa0ff01a 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_layer_norm_cifg_peephole.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_layer_norm_cifg_peephole.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_norm_peephole_projection.mod.py b/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_norm_peephole_projection.mod.py
index 421fbf72a..421fbf72a 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_norm_peephole_projection.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_norm_peephole_projection.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_rnn.mod.py b/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_rnn.mod.py
index 84ae779eb..84ae779eb 100644..100755
--- a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_rnn.mod.py
+++ b/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_rnn.mod.py
diff --git a/tests/nnapi/src/TestGenerated.cpp b/tests/nnapi/src/TestGenerated.cpp
index a4148cf87..234735354 100644
--- a/tests/nnapi/src/TestGenerated.cpp
+++ b/tests/nnapi/src/TestGenerated.cpp
@@ -59,25 +59,23 @@ void print(std::ostream& os, const std::map<int, std::vector<T>>& test) {
}
// Specialized for _Float16 because it requires explicit conversion.
-// Fix for neurun: comment out
-//template <>
-//void print<_Float16>(std::ostream& os, const std::map<int, std::vector<_Float16>>& test) {
-// for_each<_Float16>(test, [&os](int idx, const std::vector<_Float16>& f) {
-// os << " aliased_output" << idx << ": [";
-// for (size_t i = 0; i < f.size(); ++i) {
-// os << (i == 0 ? "" : ", ") << +static_cast<float>(f[i]);
-// }
-// os << "],\n";
-// });
-//}
+template <>
+void print<_Float16>(std::ostream& os, const std::map<int, std::vector<_Float16>>& test) {
+ for_each<_Float16>(test, [&os](int idx, const std::vector<_Float16>& f) {
+ os << " aliased_output" << idx << ": [";
+ for (size_t i = 0; i < f.size(); ++i) {
+ os << (i == 0 ? "" : ", ") << +static_cast<float>(f[i]);
+ }
+ os << "],\n";
+ });
+}
void printAll(std::ostream& os, const MixedTyped& test) {
print(os, test.float32Operands);
print(os, test.int32Operands);
print(os, test.quant8AsymmOperands);
print(os, test.quant16SymmOperands);
- // Fix for neurun: comment out
- //print(os, test.float16Operands);
+ print(os, test.float16Operands);
print(os, test.bool8Operands);
print(os, test.quant8ChannelOperands);
print(os, test.quant16AsymmOperands);
@@ -244,7 +242,7 @@ void GeneratedTests::execute(std::function<void(Model*)> createModel,
};
mTestCompilationCaching = false;
-// Fix for neurun: Not supported feature - copmilation caching
+// Fix for onert: Not supported feature - copmilation caching
// TODO Enable this
#if 0
executeInternal(dumpFile);
@@ -257,7 +255,7 @@ void GeneratedTests::SetUp() {
#ifdef NNTEST_COMPUTE_MODE
mOldComputeMode = Execution::setComputeMode(GetParam());
#endif
- // Fix for neurun: Fix file path for linux
+ // Fix for onert: Fix file path for linux
char cacheDirTemp[] = "/tmp/TestCompilationCachingXXXXXX";
//char cacheDirTemp[] = "/data/local/tmp/TestCompilationCachingXXXXXX";
char* cacheDir = mkdtemp(cacheDirTemp);
diff --git a/tests/nnapi/src/TestMain.cpp b/tests/nnapi/src/TestMain.cpp
index 317f2f3f9..725d77c1c 100644
--- a/tests/nnapi/src/TestMain.cpp
+++ b/tests/nnapi/src/TestMain.cpp
@@ -22,7 +22,7 @@
#include "Utils.h"
#endif
-// FIX for neurun: comment out include android-base/logging.h
+// FIX for onert: comment out include android-base/logging.h
//#include <android-base/logging.h>
#include <gtest/gtest.h>
#include <cctype>
@@ -77,7 +77,7 @@ static int test(bool useCpuOnly, Execution::ComputeMode computeMode, bool allowS
return "<unknown ComputeMode>";
};
- // FIX for neurun: comment out android logging
+ // FIX for onert: comment out android logging
//LOG(INFO) << "test(useCpuOnly = " << useCpuOnly << ", computeMode = " << computeModeText()
// << ", allowSyncExecHal = " << allowSyncExecHal << ") // pass " << passIndex;
std::cout << "[**********] useCpuOnly = " << useCpuOnly
@@ -86,7 +86,7 @@ static int test(bool useCpuOnly, Execution::ComputeMode computeMode, bool allowS
<< std::endl;
if (!((uint64_t(1) << passIndex) & allowedPasses)) {
- // FIX for neurun: comment out android logging
+ // FIX for onert: comment out android logging
//LOG(INFO) << "SKIPPED PASS";
std::cout << "SKIPPED PASS" << std::endl;
return 0;
@@ -134,7 +134,7 @@ int main(int argc, char** argv) {
// The burst path is off by default in these tests. This is the first case
// where it is turned on. Both "useCpuOnly" and "allowSyncExecHal" are
// irrelevant here because the burst path is separate from both.
- // Fix for neurun: disable burst mode
+ // Fix for onert: disable burst mode
//n |= test(/*useCpuOnly=*/false, Execution::ComputeMode::BURST);
return n;
diff --git a/tests/nnapi/src/TestNeuralNetworksWrapper.h b/tests/nnapi/src/TestNeuralNetworksWrapper.h
index 693311f45..022f3fade 100644
--- a/tests/nnapi/src/TestNeuralNetworksWrapper.h
+++ b/tests/nnapi/src/TestNeuralNetworksWrapper.h
@@ -23,12 +23,12 @@
#include "NeuralNetworks.h"
#include "NeuralNetworksWrapper.h"
-// Fix for neurun: comment out include NeuralNetworksWrapperExtensions.h
+// Fix for onert: comment out include NeuralNetworksWrapperExtensions.h
//#include "NeuralNetworksWrapperExtensions.h"
#include <math.h>
-// Fix for neurun: use boost::optional instead of std::optional
-// TODO in neurun: introduce and use internal optional library
+// Fix for onert: use boost::optional instead of std::optional
+// TODO in onert: introduce and use internal optional library
#include <boost/optional.hpp>
#include <string>
#include <vector>
@@ -39,7 +39,7 @@ namespace test_wrapper {
using wrapper::Event;
using wrapper::ExecutePreference;
-// Fix for neurun: comment out ExtensionModel, ExtensionOperandParams and ExtensionOperandType
+// Fix for onert: comment out ExtensionModel, ExtensionOperandParams and ExtensionOperandType
//using wrapper::ExtensionModel;
//using wrapper::ExtensionOperandParams;
//using wrapper::ExtensionOperandType;
diff --git a/tests/nnapi/src/TestValidation.cpp b/tests/nnapi/src/TestValidation.cpp
index 19db43800..1a2e146dc 100644
--- a/tests/nnapi/src/TestValidation.cpp
+++ b/tests/nnapi/src/TestValidation.cpp
@@ -23,7 +23,7 @@
#include <sys/mman.h>
#include <stdio.h>
#include <stdlib.h>
-// Note: neurun is allow to set activation operand constant only,
+// Note: onert is allow to set activation operand constant only,
// so we change test to set operand #2 to constant. (ANEURALNETWORKS_FUSED_NONE)
// And model's input is changed: [0, 1, 2] -> [0, 1]
// This file tests all the validations done by the Neural Networks API.
@@ -87,7 +87,7 @@ class ValidationTestIdentify : public ValidationTestModel {
ASSERT_EQ(ANeuralNetworksModel_addOperand(mModel, &tensorType), ANEURALNETWORKS_NO_ERROR);
ASSERT_EQ(ANeuralNetworksModel_addOperand(mModel, &scalarType), ANEURALNETWORKS_NO_ERROR);
ASSERT_EQ(ANeuralNetworksModel_addOperand(mModel, &tensorType), ANEURALNETWORKS_NO_ERROR);
- // neurun is allow to set activation operand constant only
+ // onert is allow to set activation operand constant only
int32_t act = ANEURALNETWORKS_FUSED_NONE;
ASSERT_EQ(ANeuralNetworksModel_setOperandValue(mModel, 2, &act, sizeof(act)), ANEURALNETWORKS_NO_ERROR);
uint32_t inList[3]{0, 1, 2};
@@ -118,7 +118,7 @@ protected:
ASSERT_EQ(ANeuralNetworksModel_addOperand(mModel, &tensorType), ANEURALNETWORKS_NO_ERROR);
ASSERT_EQ(ANeuralNetworksModel_addOperand(mModel, &scalarType), ANEURALNETWORKS_NO_ERROR);
ASSERT_EQ(ANeuralNetworksModel_addOperand(mModel, &tensorType), ANEURALNETWORKS_NO_ERROR);
- // neurun is allow to set activation operand constant only
+ // onert is allow to set activation operand constant only
int32_t act = ANEURALNETWORKS_FUSED_NONE;
ASSERT_EQ(ANeuralNetworksModel_setOperandValue(mModel, 2, &act, sizeof(act)), ANEURALNETWORKS_NO_ERROR);
uint32_t inList[3]{0, 1, 2};
diff --git a/tests/nnfw_api/CMakeLists.txt b/tests/nnfw_api/CMakeLists.txt
new file mode 100644
index 000000000..4972e41dc
--- /dev/null
+++ b/tests/nnfw_api/CMakeLists.txt
@@ -0,0 +1,25 @@
+if (NOT BUILD_RUNTIME_NNFW_API_TEST)
+ return()
+endif(NOT BUILD_RUNTIME_NNFW_API_TEST)
+
+if (NOT BUILD_ONERT)
+ message(STATUS "Skip build NNFW API test: no runtime build")
+ return()
+endif(NOT BUILD_ONERT)
+
+nnfw_find_package(GTest)
+
+set(RUNTIME_NNFW_API_TEST nnfw_api_gtest)
+file(GLOB_RECURSE RUNTIME_NNFW_API_TEST_SRC "src/*.cc" "src/*.cpp")
+
+add_executable(${RUNTIME_NNFW_API_TEST} ${RUNTIME_NNFW_API_TEST_SRC})
+
+set(RUNTIME_NNFW_API_TEST_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/include
+ ${CMAKE_CURRENT_SOURCE_DIR}/src)
+target_include_directories(${RUNTIME_NNFW_API_TEST} PRIVATE ${RUNTIME_NNFW_API_TEST_INCLUDE})
+
+target_link_libraries(${RUNTIME_NNFW_API_TEST} nnfw-dev)
+target_link_libraries(${RUNTIME_NNFW_API_TEST} gtest gmock)
+target_link_libraries(${RUNTIME_NNFW_API_TEST} ${LIB_PTHREAD} dl)
+
+install(TARGETS ${RUNTIME_NNFW_API_TEST} DESTINATION unittest)
diff --git a/tests/nnfw_api/src/FourOneOpModelSetInput.cc b/tests/nnfw_api/src/FourOneOpModelSetInput.cc
new file mode 100644
index 000000000..e973ddeb0
--- /dev/null
+++ b/tests/nnfw_api/src/FourOneOpModelSetInput.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "model_path.h"
+#include "fixtures.h"
+
+using ValidationTestFourAddModelsSetInput = ValidationTestFourModelsSetInput<ModelPath::ADD>;
+
+TEST_F(ValidationTestFourAddModelsSetInput, run_001)
+{
+ ASSERT_EQ(nnfw_run(_objects[0].session), NNFW_STATUS_NO_ERROR);
+ ASSERT_EQ(nnfw_run(_objects[1].session), NNFW_STATUS_NO_ERROR);
+}
+
+TEST_F(ValidationTestFourAddModelsSetInput, run_002)
+{
+ int rep = 3;
+ while (rep--)
+ {
+ for (auto obj : _objects)
+ ASSERT_EQ(nnfw_run(obj.session), NNFW_STATUS_NO_ERROR);
+ }
+}
diff --git a/tests/nnfw_api/src/create_session.cc b/tests/nnfw_api/src/create_session.cc
new file mode 100644
index 000000000..3d775be06
--- /dev/null
+++ b/tests/nnfw_api/src/create_session.cc
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <nnfw_dev.h>
+
+TEST(nnfw_create_session, Test_001)
+{
+ nnfw_session *session = nullptr;
+ ASSERT_EQ(nnfw_create_session(&session), NNFW_STATUS_NO_ERROR);
+ ASSERT_EQ(nnfw_close_session(session), NNFW_STATUS_NO_ERROR);
+}
+
+TEST(nnfw_create_session, Negative_001)
+{
+ ASSERT_EQ(nnfw_create_session(nullptr), NNFW_STATUS_ERROR);
+}
diff --git a/tests/nnfw_api/src/fixtures.h b/tests/nnfw_api/src/fixtures.h
new file mode 100644
index 000000000..b2fa2c292
--- /dev/null
+++ b/tests/nnfw_api/src/fixtures.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_API_TEST_FIXTURES_H__
+#define __NNFW_API_TEST_FIXTURES_H__
+
+#include <array>
+#include <gtest/gtest.h>
+#include <nnfw.h>
+
+#include "model_path.h"
+
+inline uint64_t num_elems(const nnfw_tensorinfo *ti)
+{
+ uint64_t n = 1;
+ for (uint32_t i = 0; i < ti->rank; ++i)
+ {
+ n *= ti->dims[i];
+ }
+ return n;
+}
+
+struct SessionObject
+{
+ nnfw_session *session = nullptr;
+ std::vector<std::vector<float>> inputs;
+ std::vector<std::vector<float>> outputs;
+};
+
+class ValidationTest : public ::testing::Test
+{
+protected:
+ void SetUp() override {}
+};
+
+class ValidationTestSessionCreated : public ValidationTest
+{
+protected:
+ void SetUp() override
+ {
+ ValidationTest::SetUp();
+ ASSERT_EQ(nnfw_create_session(&_session), NNFW_STATUS_NO_ERROR);
+ }
+
+ void TearDown() override
+ {
+ ASSERT_EQ(nnfw_close_session(_session), NNFW_STATUS_NO_ERROR);
+ ValidationTest::TearDown();
+ }
+
+protected:
+ nnfw_session *_session = nullptr;
+};
+
+template <int PackageNo> class ValidationTestModelLoaded : public ValidationTestSessionCreated
+{
+protected:
+ void SetUp() override
+ {
+ ValidationTestSessionCreated::SetUp();
+ ASSERT_EQ(nnfw_load_model_from_file(_session,
+ ModelPath::get().getModelAbsolutePath(PackageNo).c_str()),
+ NNFW_STATUS_NO_ERROR);
+ ASSERT_NE(_session, nullptr);
+ }
+
+ void TearDown() override { ValidationTestSessionCreated::TearDown(); }
+};
+
+template <int PackageNo> class ValidationTestFourModelsSetInput : public ValidationTest
+{
+protected:
+ static const uint32_t NUM_SESSIONS = 4;
+
+ void SetUp() override
+ {
+ ValidationTest::SetUp();
+
+ auto model_path = ModelPath::get().getModelAbsolutePath(ModelPath::ADD);
+ for (auto &obj : _objects)
+ {
+ ASSERT_EQ(nnfw_create_session(&obj.session), NNFW_STATUS_NO_ERROR);
+ ASSERT_EQ(nnfw_load_model_from_file(obj.session, model_path.c_str()), NNFW_STATUS_NO_ERROR);
+ ASSERT_EQ(nnfw_prepare(obj.session), NNFW_STATUS_NO_ERROR);
+
+ obj.inputs.resize(1);
+ nnfw_tensorinfo ti;
+ ASSERT_EQ(nnfw_input_tensorinfo(obj.session, 0, &ti), NNFW_STATUS_NO_ERROR);
+ uint64_t input_elements = num_elems(&ti);
+ obj.inputs[0].resize(input_elements);
+ ASSERT_EQ(nnfw_set_input(obj.session, 0, ti.dtype, obj.inputs[0].data(),
+ sizeof(float) * input_elements),
+ NNFW_STATUS_NO_ERROR);
+
+ obj.outputs.resize(1);
+ nnfw_tensorinfo ti_output;
+ ASSERT_EQ(nnfw_output_tensorinfo(obj.session, 0, &ti_output), NNFW_STATUS_NO_ERROR);
+ uint64_t output_elements = num_elems(&ti_output);
+ obj.outputs[0].resize(output_elements);
+ ASSERT_EQ(nnfw_set_output(obj.session, 0, ti_output.dtype, obj.outputs[0].data(),
+ sizeof(float) * output_elements),
+ NNFW_STATUS_NO_ERROR);
+ }
+ }
+
+ void TearDown() override
+ {
+ for (auto &obj : _objects)
+ {
+ ASSERT_EQ(nnfw_close_session(obj.session), NNFW_STATUS_NO_ERROR);
+ }
+ ValidationTest::TearDown();
+ }
+
+protected:
+ std::array<SessionObject, NUM_SESSIONS> _objects;
+};
+
+#endif // __NNFW_API_TEST_FIXTURES_H__
diff --git a/tests/nnfw_api/src/load_model.cc b/tests/nnfw_api/src/load_model.cc
new file mode 100644
index 000000000..5db470e61
--- /dev/null
+++ b/tests/nnfw_api/src/load_model.cc
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "model_path.h"
+#include "fixtures.h"
+
+TEST_F(ValidationTestSessionCreated, load_session_001)
+{
+ // Existing model must
+ ASSERT_EQ(nnfw_load_model_from_file(
+ _session, ModelPath::get().getModelAbsolutePath(ModelPath::ADD).c_str()),
+ NNFW_STATUS_NO_ERROR);
+}
+
+TEST_F(ValidationTestSessionCreated, neg_load_session_001)
+{
+ ASSERT_EQ(nnfw_load_model_from_file(
+ _session, ModelPath::get().getModelAbsolutePath(ModelPath::DUMMY).c_str()),
+ NNFW_STATUS_ERROR);
+}
+
+TEST_F(ValidationTestSessionCreated, neg_load_session_002)
+{
+ ASSERT_EQ(
+ nnfw_load_model_from_file(nullptr, // session is null
+ ModelPath::get().getModelAbsolutePath(ModelPath::ADD).c_str()),
+ NNFW_STATUS_ERROR);
+}
+
+TEST_F(ValidationTestSessionCreated, neg_prepare_001)
+{
+ // nnfw_load_model_from_file was not called
+ ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_ERROR);
+}
diff --git a/tests/nnfw_api/src/main.cc b/tests/nnfw_api/src/main.cc
new file mode 100644
index 000000000..4f0cb2b00
--- /dev/null
+++ b/tests/nnfw_api/src/main.cc
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <stdexcept>
+#include <string>
+#include <dirent.h>
+#include <gtest/gtest.h>
+#include "model_path.h"
+
+/**
+ * @brief Function to check if test model directories exist before it actually performs the test
+ *
+ */
+void checkModels()
+{
+ std::string absolute_path = ModelPath::get().getModelAbsolutePath(ModelPath::ADD);
+ DIR *dir = opendir(absolute_path.c_str());
+ if (!dir)
+ {
+ throw std::runtime_error{"Please install the nnpackge for testing: " + absolute_path};
+ }
+ closedir(dir);
+}
+
+int main(int argc, char **argv)
+{
+ ModelPath::get().init(argv[0]);
+ ::testing::InitGoogleTest(&argc, argv);
+
+ try
+ {
+ checkModels();
+ }
+ catch (std::runtime_error &e)
+ {
+ std::cerr << e.what() << std::endl;
+ return -1;
+ }
+
+ return RUN_ALL_TESTS();
+}
diff --git a/tests/nnfw_api/src/model_path.cc b/tests/nnfw_api/src/model_path.cc
new file mode 100644
index 000000000..4fdfb0508
--- /dev/null
+++ b/tests/nnfw_api/src/model_path.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "model_path.h"
+
+#include <unistd.h>
+#include <libgen.h>
+#include <string.h>
+
+// NOTE Must match `enum TestPackages`
+const char *TEST_PACKAGE_NAMES[] = {"nonexisting_package", "add"};
+
+ModelPath &ModelPath::get()
+{
+ static ModelPath instance;
+ return instance;
+}
+
+void ModelPath::init(const char *argv0)
+{
+ char raw_dir[1024];
+ char cwd[1024];
+ strncpy(raw_dir, argv0, sizeof(raw_dir) - 1);
+ dirname(raw_dir);
+ if (raw_dir[0] == '/')
+ {
+ // If it is an absolute path, just use it
+ _base_path = raw_dir;
+ }
+ else
+ {
+ // If it is a relative path, prepend CWD
+ getcwd(cwd, sizeof(cwd));
+ _base_path = cwd;
+ _base_path += "/";
+ _base_path += raw_dir;
+ }
+}
+
+std::string ModelPath::getModelAbsolutePath(int package_no)
+{
+ const char *model_dir = TEST_PACKAGE_NAMES[package_no];
+ // Model dir is nested
+ return _base_path + "/nnfw_api_gtest_models/" + model_dir + "/" + model_dir;
+}
diff --git a/tests/nnfw_api/src/model_path.h b/tests/nnfw_api/src/model_path.h
new file mode 100644
index 000000000..c0219f9f8
--- /dev/null
+++ b/tests/nnfw_api/src/model_path.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_API_TEST_MODEL_PATH_H__
+#define __NNFW_API_TEST_MODEL_PATH_H__
+
+#include <string>
+
+/**
+ * @brief A helper class to find models for testing
+ */
+class ModelPath
+{
+public:
+ /**
+ * @brief Serial numbers for test packages. The numbers are mapped with package names.
+ * This is useful for creating GTest Fixtures with variable template to do
+ * different nn packages with no code duplication.
+ */
+ enum TestPackages
+ {
+ DUMMY, // Non-existing directory for negative tests
+ ADD
+ };
+
+ static ModelPath &get();
+
+ /**
+ * @brief Get the Absolute of the model to find
+ *
+ * @param package_no Model's serial number
+ * @return std::string The absolute path of model directory
+ */
+ std::string getModelAbsolutePath(int package_no);
+ /**
+ * @brief Save the current executable's directory based on argv[0] and CWD
+ *
+ * @param argv0 0th command line argument of the current process
+ */
+ void init(const char *argv0);
+
+private:
+ ModelPath() = default;
+
+private:
+ std::string _base_path;
+};
+
+#endif // __NNFW_API_TEST_MODEL_PATH_H__
diff --git a/tests/nnfw_api/src/prepare.cc b/tests/nnfw_api/src/prepare.cc
new file mode 100644
index 000000000..92c5a8f7e
--- /dev/null
+++ b/tests/nnfw_api/src/prepare.cc
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fixtures.h"
+#include "model_path.h"
+
+using ValidationTestAddModelLoaded = ValidationTestModelLoaded<ModelPath::ADD>;
+
+TEST_F(ValidationTestAddModelLoaded, prepare_001)
+{
+ ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_NO_ERROR);
+}
+
+TEST_F(ValidationTest, neg_prepare_001) { ASSERT_EQ(nnfw_prepare(nullptr), NNFW_STATUS_ERROR); }
diff --git a/tests/scripts/CMakeLists.txt b/tests/scripts/CMakeLists.txt
new file mode 100644
index 000000000..f4c9c6b58
--- /dev/null
+++ b/tests/scripts/CMakeLists.txt
@@ -0,0 +1,19 @@
+if(NOT INSTALL_TEST_SCRIPTS)
+ return()
+endif(NOT INSTALL_TEST_SCRIPTS)
+
+# Install test scripts
+file(GLOB TEST_SCRIPTS "*.sh")
+install(PROGRAMS ${TEST_SCRIPTS} DESTINATION tests/scripts)
+
+# Install test list
+file(GLOB TEST_LISTS "list/*.txt")
+install(FILES ${TEST_LISTS} DESTINATION tests/scripts/list)
+
+# Install framework test script
+file(GLOB FRAMEWORKTEST_SCRIPT "framework/run_test.sh")
+install(PROGRAMS ${FRAMEWORKTEST_SCRIPT} DESTINATION tests/scripts/framework)
+
+# Install framework test list file
+file(GLOB FRAMEWORKTEST_DIR framework/tests)
+install(DIRECTORY ${FRAMEWORKTEST_DIR} DESTINATION tests/scripts/framework)
diff --git a/tests/scripts/README.md b/tests/scripts/README.md
index b0f35c585..8e7e8085f 100644
--- a/tests/scripts/README.md
+++ b/tests/scripts/README.md
@@ -33,8 +33,8 @@ $ ./tests/scripts/unittest.sh \
- When `run_test.sh`, refer to the **tflite model** information in `config.sh`, download the file, and run the **tflite model** with the given **driver**.
- Related files : `run_test.sh` and `test_framework.sh`
- TC location :
- - `tests/framework/tests/` : Config directory for TC
- - `tests/framework/cache/` : TC (Downloaded tflite model files)
+ - `tests/scripts/framework/tests/` : Config directory for TC
+ - `tests/scripts/framework/cache/` : TC (Downloaded tflite model files)
### Run tflite_run with various tflite models
- Driver : `tflite_run`
@@ -50,7 +50,7 @@ $ ./tests/scripts/test-driver.sh \
### Run nnapi_test with various tflite models
- `nnapi_test` runs tflite in two ways and compares the result:
1. tflite interpreter
- 2. `libneuralnetworks.so`, which could be PureACL or neurun depending on linked to nnapi_test
+ 2. `libneuralnetworks.so`, which could be PureACL or onert depending on linked to nnapi_test
- Driver : `nnapi_test`
- Driver source location : `tools/nnapi_test/`
- Usage :
diff --git a/tests/scripts/benchmark_nnapi.sh b/tests/scripts/benchmark_nnapi.sh
index febb406d6..48120ad98 100755
--- a/tests/scripts/benchmark_nnapi.sh
+++ b/tests/scripts/benchmark_nnapi.sh
@@ -30,7 +30,7 @@ EXECUTORS="Linear Parallel" #TODO: accept this list as argument
function Usage()
{
- echo "Usage: ./$0 --reportdir=. --runtestsh=tests/framework/run_test.sh --driverbin=Product/out/bin/tflite_benchmark"
+ echo "Usage: ./$0 --reportdir=. --runtestsh=tests/scripts/framework/run_test.sh --driverbin=Product/out/bin/tflite_benchmark"
}
for i in "$@"
@@ -96,7 +96,7 @@ function profile_for_he_shed()
export USE_SCHEDULER=1
export PROFILING_MODE=1
export EXECUTOR="Dataflow"
- export NEURUN_LOG_ENABLE=1
+ export ONERT_LOG_ENABLE=1
rm "exec_time.json" 2>/dev/null
for ((j = 1 ; j <= $PROFILING_RUN_CNT ; j++)); do
@@ -115,7 +115,7 @@ function profile_for_he_shed()
# Save the exec_time.json of each run
cp "exec_time.json" $REPORT_MODEL_DIR/"exec_time_$j.json"
done
- unset USE_SCHEDULER PROFILING_MODE EXECUTOR NEURUN_LOG_ENABLE
+ unset USE_SCHEDULER PROFILING_MODE EXECUTOR ONERT_LOG_ENABLE
}
function run_with_he_scheduler()
@@ -126,22 +126,22 @@ function run_with_he_scheduler()
local MODEL=$4
local EXECUTOR=$5
- LOG_FILE=$REPORT_MODEL_DIR/tflite_neurun_with_he_scheduler_in_$EXECUTOR.txt
+ LOG_FILE=$REPORT_MODEL_DIR/tflite_onert_with_he_scheduler_in_$EXECUTOR.txt
export EXECUTOR=$EXECUTOR
export GRAPH_DOT_DUMP=1
export USE_SCHEDULER=1
- export NEURUN_LOG_ENABLE=1
+ export ONERT_LOG_ENABLE=1
- print_with_dots "TFLite neurun $EXECUTOR with HEScheduler"
+ print_with_dots "TFLite onert $EXECUTOR with HEScheduler"
RESULT=$(get_result_of_benchmark_test $RUN_TEST_SH $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
echo "$RESULT ms"
mv "after_lower.dot" $REPORT_MODEL_DIR/"after_lower_$EXECUTOR.dot"
- unset EXECUTOR GRAPH_DOT_DUMP USE_SCHEDULER NEURUN_LOG_ENABLE
+ unset EXECUTOR GRAPH_DOT_DUMP USE_SCHEDULER ONERT_LOG_ENABLE
}
-function run_neurun_with_all_config()
+function run_onert_with_all_config()
{
local MODEL=$1
local REPORT_MODEL_DIR=$2
@@ -173,7 +173,7 @@ function run_neurun_with_all_config()
fi
for backend in $BACKEND_LIST; do
export OP_BACKEND_ALLOPS=$backend
- run_benchmark_and_print "tflite_neurun_"$executor"_executor_$backend" "TFLite neurun $executor Executor $backend"\
+ run_benchmark_and_print "tflite_onert_"$executor"_executor_$backend" "TFLite onert $executor Executor $backend"\
$MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN $BENCHMARK_RUN_TEST_SH
done
done
@@ -188,7 +188,7 @@ function run_benchmark_test()
local REPORT_MODEL_DIR=
export COUNT=5
- export NEURUN_LOG_ENABLE=1
+ export ONERT_LOG_ENABLE=1
echo
echo "============================================"
echo
@@ -217,12 +217,12 @@ function run_benchmark_test()
unset USE_NNAPI
run_benchmark_and_print "tflite_cpu" "TFLite CPU" $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN $BENCHMARK_RUN_TEST_SH
- # run neurun
+ # run onert
if [ "$TEST_OP" == "true" ]; then
# Operation test don't need to test each scheduler
- run_neurun_with_all_config $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN $BENCHMARK_RUN_TEST_SH "Linear" "$BACKEND_LIST"
+ run_onert_with_all_config $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN $BENCHMARK_RUN_TEST_SH "Linear" "$BACKEND_LIST"
else
- run_neurun_with_all_config $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN $BENCHMARK_RUN_TEST_SH "$EXECUTORS" "$BACKEND_LIST"
+ run_onert_with_all_config $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN $BENCHMARK_RUN_TEST_SH "$EXECUTORS" "$BACKEND_LIST"
fi
if [[ $i -ne $(echo $BENCHMARK_MODEL_LIST | wc -w)-1 ]]; then
@@ -246,5 +246,5 @@ rm -rf $BENCHMARK_MODELS_FILE
echo ""
# print the result AND append to log file
-run_benchmark_test 2>&1 | tee -a neurun_benchmarks.txt
+run_benchmark_test 2>&1 | tee -a onert_benchmarks.txt
echo ""
diff --git a/tests/scripts/benchmark_nnpkg.sh b/tests/scripts/benchmark_nnpkg.sh
index ce593a8e5..dd701bf5c 100755
--- a/tests/scripts/benchmark_nnpkg.sh
+++ b/tests/scripts/benchmark_nnpkg.sh
@@ -105,7 +105,7 @@ for i in "${model_lists[@]}"; do
CPU_CMD="BACKENDS=cpu ${CMD}"
echo "${CPU_CMD}"
echo "" >> ${outfile}
- echo "neurun cpu" >> ${outfile}
+ echo "onert cpu" >> ${outfile}
eval "${CPU_CMD}"
sleep 10 # for avoiding cpu overheated
@@ -114,7 +114,7 @@ for i in "${model_lists[@]}"; do
NEON_CMD="BACKENDS=acl_neon ${CMD}"
echo "${NEON_CMD}"
echo "" >> ${outfile}
- echo "neurun acl_neon" >> ${outfile}
+ echo "onert acl_neon" >> ${outfile}
eval "${NEON_CMD}"
sleep 10 # for avoiding cpu overheated
@@ -123,7 +123,7 @@ for i in "${model_lists[@]}"; do
CL_CMD="BACKENDS=acl_cl ${CMD}"
echo "${CL_CMD}"
echo "" >> ${outfile}
- echo "neurun acl_cl" >> ${outfile}
+ echo "onert acl_cl" >> ${outfile}
eval "${CL_CMD}"
echo "" >> ${outfile}
diff --git a/tests/scripts/framework/run_test.sh b/tests/scripts/framework/run_test.sh
new file mode 100755
index 000000000..44b714974
--- /dev/null
+++ b/tests/scripts/framework/run_test.sh
@@ -0,0 +1,277 @@
+#!/usr/bin/env bash
+#
+# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+NNFW_HOME="$(dirname $(dirname $(dirname ${MY_PATH})))"
+CACHE_ROOT_PATH=$MY_PATH/"cache"
+TEST_ROOT_PATH=$MY_PATH/"tests"
+REPORT_DIR="report"
+
+RUN_DISABLED="true"
+
+function Usage()
+{
+ echo "Usage: ./$0 --driverbin={such as tflite_run} {tests to test or empty for all of tests}"
+ echo "Usage: ./$0 --driverbin=Product/out/bin/tflite_run --reportdir=report --tapname=verification.tap avgpool1 avgpool2"
+ echo ""
+ echo "--download - (default=off) Download model files. Other options is ignored"
+ echo "--driverbin - (default=../../Product/out/bin/tflite_run) runner for runnning framework tests"
+ echo "--reportdir - (default=report) directory to place tap files"
+ echo "--tapname - (default=framework_test.tap) file name to be written for tap"
+ echo ""
+}
+
+function need_download()
+{
+ LOCAL_PATH=$1
+ REMOTE_URL=$2
+ if [ ! -e $LOCAL_PATH ]; then
+ return 0;
+ fi
+ # Ignore checking md5 in cache
+ if [ ! -z $IGNORE_MD5 ] && [ "$IGNORE_MD5" == "1" ]; then
+ return 1
+ fi
+
+ LOCAL_HASH=$(md5sum $LOCAL_PATH | awk '{ print $1 }')
+ REMOTE_HASH=$(curl -ss $REMOTE_URL | md5sum | awk '{ print $1 }')
+ # TODO Emit an error when Content-MD5 field was not found. (Server configuration issue)
+ if [ "$LOCAL_HASH" != "$REMOTE_HASH" ]; then
+ echo "Downloaded file is outdated or incomplete."
+ return 0
+ fi
+ return 1
+}
+
+DRIVER_BIN=""
+TAP_NAME="framework_test.tap"
+TEST_LIST=()
+DOWNLOAD_MODE="off"
+
+# Support environment variable setting for mirror server
+FIXED_MODELFILE_SERVER="${MODELFILE_SERVER:-}"
+
+for i in "$@"
+do
+ case $i in
+ -h|--help|help)
+ Usage
+ exit 1
+ ;;
+ --driverbin=*)
+ DRIVER_BIN=${i#*=}
+ ;;
+ --reportdir=*)
+ REPORT_DIR=${i#*=}
+ ;;
+ --tapname=*)
+ TAP_NAME=${i#*=}
+ ;;
+ --download=*)
+ DOWNLOAD_MODE=${i#*=}
+ ;;
+ *)
+ TEST_LIST+=( $i )
+ ;;
+ esac
+ shift
+done
+
+if [[ ${#TEST_LIST[@]} -eq 0 ]]; then
+ RUN_DISABLED="false"
+fi
+
+if [ ! -n "$DRIVER_BIN" ]; then
+ DRIVER_BIN="$NNFW_HOME/Product/out/bin/tflite_run"
+fi
+
+# Check test driver setting
+if [ ! -e $DRIVER_BIN ] && [ "$DOWNLOAD_MODE" != "on" ]; then
+ echo "Cannot find test driver" $DRIVER_BIN ": please set proper DRIVER_BIN"
+ exit 1
+fi
+
+run_tests()
+{
+ echo "1..$#" > $REPORT_DIR/$TAP_NAME
+ SELECTED_TESTS=$@
+
+ echo ""
+ echo "Running tests:"
+ echo "======================"
+ for TEST_NAME in $SELECTED_TESTS; do
+ echo $TEST_NAME
+ done
+ echo "======================"
+
+ TOTAL_RESULT=0 # 0(normal) or 1(abnormal)
+ i=0
+ for TEST_NAME in $SELECTED_TESTS; do
+ # Test configure initialization
+ ((i++))
+ STATUS="enabled"
+ MODELFILE_SERVER_PATH=""
+ MODELFILE_NAME=""
+ source $TEST_ROOT_PATH/$TEST_NAME/config.sh
+
+ LOWER_STATUS="$(echo $STATUS | awk '{print tolower($0)}')"
+ if [ "$LOWER_STATUS" == "disabled" ] && [ "$RUN_DISABLED" == "false" ]; then
+ echo ""
+ echo "Skip $TEST_NAME"
+ echo "======================"
+ echo "ok $i # skip $TEST_NAME" >> $REPORT_DIR/$TAP_NAME
+ continue
+ fi
+
+ TEST_CACHE_PATH=$CACHE_ROOT_PATH/$TEST_NAME
+ MODELFILE=$TEST_CACHE_PATH/$MODELFILE_NAME
+ MODELFILE_URL="$MODELFILE_SERVER_PATH/$MODELFILE_NAME"
+ if [ -n "$FIXED_MODELFILE_SERVER" ]; then
+ MODELFILE_URL="$FIXED_MODELFILE_SERVER/$MODELFILE_NAME"
+ fi
+
+ # Download model file
+ if [ ! -e $TEST_CACHE_PATH ]; then
+ mkdir -p $TEST_CACHE_PATH
+ fi
+
+ # Download unless we have it in cache (Also check md5sum)
+ if need_download "$MODELFILE" "$MODELFILE_URL"; then
+ echo ""
+ echo "Download test file for $TEST_NAME"
+ echo "======================"
+
+ rm -f $MODELFILE # Remove invalid file if exists
+ pushd $TEST_CACHE_PATH
+ wget -nv $MODELFILE_URL
+ if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
+ unzip -o $MODELFILE_NAME
+ fi
+ popd
+ fi
+
+ # Find model file for downloaded by zip
+ if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
+ pushd $TEST_CACHE_PATH
+ MODELFILE=$TEST_CACHE_PATH/$(ls *.tflite)
+ popd
+ fi
+
+ echo ""
+ echo "Run $TEST_NAME"
+ echo "======================"
+
+ # Run driver to test framework
+ $DRIVER_BIN $MODELFILE
+
+ #$DRIVER_BIN $MODELFILE
+ if [[ $? -eq 0 ]]; then
+ echo "ok $i - $TEST_NAME" >> $REPORT_DIR/$TAP_NAME
+ else
+ echo "not ok $i - $TEST_NAME" >> $REPORT_DIR/$TAP_NAME
+ TOTAL_RESULT=1
+ fi
+ done
+ return $TOTAL_RESULT
+}
+
+download_tests()
+{
+ SELECTED_TESTS=$@
+
+ echo ""
+ echo "Downloading tests:"
+ echo "======================"
+ for TEST_NAME in $SELECTED_TESTS; do
+ echo $TEST_NAME
+ done
+ echo "======================"
+
+ i=0
+ for TEST_NAME in $SELECTED_TESTS; do
+ # Test configure initialization
+ ((i++))
+ MODELFILE_SERVER_PATH=""
+ MODELFILE_NAME=""
+ source $TEST_ROOT_PATH/$TEST_NAME/config.sh
+
+ TEST_CACHE_PATH=$CACHE_ROOT_PATH/$TEST_NAME
+ MODELFILE=$TEST_CACHE_PATH/$MODELFILE_NAME
+ MODELFILE_URL="$MODELFILE_SERVER/$MODELFILE_NAME"
+ if [ -n "$FIXED_MODELFILE_SERVER" ]; then
+ MODELFILE_URL="$FIXED_MODELFILE_SERVER/$MODELFILE_NAME"
+ fi
+
+ # Download model file
+ if [ ! -e $TEST_CACHE_PATH ]; then
+ mkdir -p $TEST_CACHE_PATH
+ fi
+
+ # Download unless we have it in cache (Also check md5sum)
+ if need_download "$MODELFILE" "$MODELFILE_URL"; then
+ echo ""
+ echo "Download test file for $TEST_NAME"
+ echo "======================"
+
+ rm -f $MODELFILE # Remove invalid file if exists
+ pushd $TEST_CACHE_PATH
+ wget -nv $MODELFILE_URL
+ if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
+ unzip -o $MODELFILE_NAME
+ fi
+ popd
+ fi
+
+ done
+}
+
+
+find_tests()
+{
+ local TEST_DIRS="$@"
+ local TESTS_TO_RUN=""
+
+ if [[ $# -eq 0 ]]; then
+ TEST_DIRS="."
+ fi
+
+ shift $#
+
+ pushd $TEST_ROOT_PATH > /dev/null
+ for DIR in $TEST_DIRS; do
+ if [ -d "$DIR" ]; then
+ TESTS_FOUND=$(find "$DIR" -type f -name 'config.sh' -exec dirname {} \;| sed 's|^./||' | sort)
+ TESTS_TO_RUN="$TESTS_TO_RUN $TESTS_FOUND"
+ else
+ echo "Test $DIR was not found. This test is not added." 1>&2
+ fi
+ done
+ popd > /dev/null
+
+ echo $TESTS_TO_RUN
+}
+
+mkdir -p $REPORT_DIR
+TESTS_TO_RUN=$(find_tests ${TEST_LIST[@]})
+
+if [[ "$DOWNLOAD_MODE" == "on" ]]; then
+ download_tests $TESTS_TO_RUN
+ exit 0;
+fi
+
+run_tests $TESTS_TO_RUN
+exit $?
diff --git a/tests/framework/tests/MODELS/inception_module/config.sh b/tests/scripts/framework/tests/MODELS/inception_module/config.sh
index 3f6eae9ee..3f6eae9ee 100644..100755
--- a/tests/framework/tests/MODELS/inception_module/config.sh
+++ b/tests/scripts/framework/tests/MODELS/inception_module/config.sh
diff --git a/tests/framework/tests/MODELS/inception_nonslim/config.sh b/tests/scripts/framework/tests/MODELS/inception_nonslim/config.sh
index 39f5d772d..39f5d772d 100644..100755
--- a/tests/framework/tests/MODELS/inception_nonslim/config.sh
+++ b/tests/scripts/framework/tests/MODELS/inception_nonslim/config.sh
diff --git a/tests/framework/tests/MODELS/inception_slim/config.sh b/tests/scripts/framework/tests/MODELS/inception_slim/config.sh
index 1c0cf3ef5..1c0cf3ef5 100644..100755
--- a/tests/framework/tests/MODELS/inception_slim/config.sh
+++ b/tests/scripts/framework/tests/MODELS/inception_slim/config.sh
diff --git a/tests/framework/tests/MODELS/mobilenet/config.sh b/tests/scripts/framework/tests/MODELS/mobilenet/config.sh
index b23d687cd..b23d687cd 100644..100755
--- a/tests/framework/tests/MODELS/mobilenet/config.sh
+++ b/tests/scripts/framework/tests/MODELS/mobilenet/config.sh
diff --git a/tests/scripts/framework/tests/abs/config.sh b/tests/scripts/framework/tests/abs/config.sh
new file mode 100755
index 000000000..7acdefa5a
--- /dev/null
+++ b/tests/scripts/framework/tests/abs/config.sh
@@ -0,0 +1 @@
+MODELFILE_NAME="abs_4d.tflite"
diff --git a/tests/framework/tests/add/1D/config.sh b/tests/scripts/framework/tests/add/1D/config.sh
index ca6fafe26..ca6fafe26 100644..100755
--- a/tests/framework/tests/add/1D/config.sh
+++ b/tests/scripts/framework/tests/add/1D/config.sh
diff --git a/tests/framework/tests/add/4D/config.sh b/tests/scripts/framework/tests/add/4D/config.sh
index d9e93d17b..d9e93d17b 100644..100755
--- a/tests/framework/tests/add/4D/config.sh
+++ b/tests/scripts/framework/tests/add/4D/config.sh
diff --git a/tests/scripts/framework/tests/average_pool_2d/aligned/config.sh b/tests/scripts/framework/tests/average_pool_2d/aligned/config.sh
new file mode 100755
index 000000000..cdefb077e
--- /dev/null
+++ b/tests/scripts/framework/tests/average_pool_2d/aligned/config.sh
@@ -0,0 +1 @@
+MODELFILE_NAME="avgpool_16B_aligned.tflite"
diff --git a/tests/framework/tests/average_pool_2d/avgpool1/config.sh b/tests/scripts/framework/tests/average_pool_2d/avgpool1/config.sh
index 7e0130088..7e0130088 100644..100755
--- a/tests/framework/tests/average_pool_2d/avgpool1/config.sh
+++ b/tests/scripts/framework/tests/average_pool_2d/avgpool1/config.sh
diff --git a/tests/framework/tests/average_pool_2d/avgpool2/config.sh b/tests/scripts/framework/tests/average_pool_2d/avgpool2/config.sh
index 1ef0b17ca..1ef0b17ca 100644..100755
--- a/tests/framework/tests/average_pool_2d/avgpool2/config.sh
+++ b/tests/scripts/framework/tests/average_pool_2d/avgpool2/config.sh
diff --git a/tests/framework/tests/batch_to_space_nd2/config.sh b/tests/scripts/framework/tests/batch_to_space_nd2/config.sh
index 1dbcb7681..1dbcb7681 100644..100755
--- a/tests/framework/tests/batch_to_space_nd2/config.sh
+++ b/tests/scripts/framework/tests/batch_to_space_nd2/config.sh
diff --git a/tests/framework/tests/cast/config.sh b/tests/scripts/framework/tests/cast/config.sh
index 0e123e3be..0e123e3be 100644..100755
--- a/tests/framework/tests/cast/config.sh
+++ b/tests/scripts/framework/tests/cast/config.sh
diff --git a/tests/framework/tests/concat/2D/config.sh b/tests/scripts/framework/tests/concat/2D/config.sh
index fd22e708c..fd22e708c 100644..100755
--- a/tests/framework/tests/concat/2D/config.sh
+++ b/tests/scripts/framework/tests/concat/2D/config.sh
diff --git a/tests/framework/tests/concat/concat1/config.sh b/tests/scripts/framework/tests/concat/concat1/config.sh
index 4543b163a..4543b163a 100644..100755
--- a/tests/framework/tests/concat/concat1/config.sh
+++ b/tests/scripts/framework/tests/concat/concat1/config.sh
diff --git a/tests/framework/tests/concat/concat2/config.sh b/tests/scripts/framework/tests/concat/concat2/config.sh
index f4404e471..f4404e471 100644..100755
--- a/tests/framework/tests/concat/concat2/config.sh
+++ b/tests/scripts/framework/tests/concat/concat2/config.sh
diff --git a/tests/framework/tests/conv_2d/convolution1/config.sh b/tests/scripts/framework/tests/conv_2d/convolution1/config.sh
index 46a205fdb..46a205fdb 100644..100755
--- a/tests/framework/tests/conv_2d/convolution1/config.sh
+++ b/tests/scripts/framework/tests/conv_2d/convolution1/config.sh
diff --git a/tests/framework/tests/conv_2d/convolution2/config.sh b/tests/scripts/framework/tests/conv_2d/convolution2/config.sh
index eca5b3374..eca5b3374 100644..100755
--- a/tests/framework/tests/conv_2d/convolution2/config.sh
+++ b/tests/scripts/framework/tests/conv_2d/convolution2/config.sh
diff --git a/tests/framework/tests/custom/squared_difference/config.sh b/tests/scripts/framework/tests/custom/squared_difference/config.sh
index 745a84447..745a84447 100644..100755
--- a/tests/framework/tests/custom/squared_difference/config.sh
+++ b/tests/scripts/framework/tests/custom/squared_difference/config.sh
diff --git a/tests/framework/tests/depthwise_conv_2d/depthconv1/config.sh b/tests/scripts/framework/tests/depthwise_conv_2d/depthconv1/config.sh
index 39aa4a45f..39aa4a45f 100644..100755
--- a/tests/framework/tests/depthwise_conv_2d/depthconv1/config.sh
+++ b/tests/scripts/framework/tests/depthwise_conv_2d/depthconv1/config.sh
diff --git a/tests/framework/tests/depthwise_conv_2d/depthconv2/config.sh b/tests/scripts/framework/tests/depthwise_conv_2d/depthconv2/config.sh
index 3dd7c50b3..3dd7c50b3 100644..100755
--- a/tests/framework/tests/depthwise_conv_2d/depthconv2/config.sh
+++ b/tests/scripts/framework/tests/depthwise_conv_2d/depthconv2/config.sh
diff --git a/tests/scripts/framework/tests/depthwise_conv_2d_no_fuse/config.sh b/tests/scripts/framework/tests/depthwise_conv_2d_no_fuse/config.sh
new file mode 100755
index 000000000..13fb264f0
--- /dev/null
+++ b/tests/scripts/framework/tests/depthwise_conv_2d_no_fuse/config.sh
@@ -0,0 +1 @@
+MODELFILE_NAME="depth_conv_no_fuse.tflite"
diff --git a/tests/framework/tests/div/broadcast/config.sh b/tests/scripts/framework/tests/div/broadcast/config.sh
index 7c5e985fa..7c5e985fa 100644..100755
--- a/tests/framework/tests/div/broadcast/config.sh
+++ b/tests/scripts/framework/tests/div/broadcast/config.sh
diff --git a/tests/framework/tests/embedding_lookup/config.sh b/tests/scripts/framework/tests/embedding_lookup/config.sh
index 5e5e4ad92..5e5e4ad92 100644..100755
--- a/tests/framework/tests/embedding_lookup/config.sh
+++ b/tests/scripts/framework/tests/embedding_lookup/config.sh
diff --git a/tests/scripts/framework/tests/equal/config.sh b/tests/scripts/framework/tests/equal/config.sh
new file mode 100755
index 000000000..a43fd73f7
--- /dev/null
+++ b/tests/scripts/framework/tests/equal/config.sh
@@ -0,0 +1 @@
+MODELFILE_NAME="equal_4d.tflite"
diff --git a/tests/framework/tests/exp/config.sh b/tests/scripts/framework/tests/exp/config.sh
index 944f0bbce..944f0bbce 100644..100755
--- a/tests/framework/tests/exp/config.sh
+++ b/tests/scripts/framework/tests/exp/config.sh
diff --git a/tests/framework/tests/floor/floor1/config.sh b/tests/scripts/framework/tests/floor/floor1/config.sh
index 4952e4a54..4952e4a54 100644..100755
--- a/tests/framework/tests/floor/floor1/config.sh
+++ b/tests/scripts/framework/tests/floor/floor1/config.sh
diff --git a/tests/framework/tests/floor/floor2/config.sh b/tests/scripts/framework/tests/floor/floor2/config.sh
index 24581dc33..24581dc33 100644..100755
--- a/tests/framework/tests/floor/floor2/config.sh
+++ b/tests/scripts/framework/tests/floor/floor2/config.sh
diff --git a/tests/framework/tests/fullyconnected/fc1/config.sh b/tests/scripts/framework/tests/fullyconnected/fc1/config.sh
index 013361583..013361583 100644..100755
--- a/tests/framework/tests/fullyconnected/fc1/config.sh
+++ b/tests/scripts/framework/tests/fullyconnected/fc1/config.sh
diff --git a/tests/scripts/framework/tests/fullyconnected/hybrid/config.sh b/tests/scripts/framework/tests/fullyconnected/hybrid/config.sh
new file mode 100755
index 000000000..b2d8ffe86
--- /dev/null
+++ b/tests/scripts/framework/tests/fullyconnected/hybrid/config.sh
@@ -0,0 +1 @@
+MODELFILE_NAME="fullyconnected_hybrid.tflite"
diff --git a/tests/framework/tests/fullyconnected/matmul2x2/config.sh b/tests/scripts/framework/tests/fullyconnected/matmul2x2/config.sh
index 91fd2ffce..91fd2ffce 100644..100755
--- a/tests/framework/tests/fullyconnected/matmul2x2/config.sh
+++ b/tests/scripts/framework/tests/fullyconnected/matmul2x2/config.sh
diff --git a/tests/scripts/framework/tests/fullyconnected/weights_as_input/config.sh b/tests/scripts/framework/tests/fullyconnected/weights_as_input/config.sh
new file mode 100755
index 000000000..1c218c5f4
--- /dev/null
+++ b/tests/scripts/framework/tests/fullyconnected/weights_as_input/config.sh
@@ -0,0 +1 @@
+MODELFILE_NAME="fullyconnected_weights_as_input.tflite"
diff --git a/tests/framework/tests/gather/config.sh b/tests/scripts/framework/tests/gather/config.sh
index 0f100a823..0f100a823 100644..100755
--- a/tests/framework/tests/gather/config.sh
+++ b/tests/scripts/framework/tests/gather/config.sh
diff --git a/tests/scripts/framework/tests/greater/config.sh b/tests/scripts/framework/tests/greater/config.sh
new file mode 100755
index 000000000..aba3d4a3f
--- /dev/null
+++ b/tests/scripts/framework/tests/greater/config.sh
@@ -0,0 +1 @@
+MODELFILE_NAME="greater_4d.tflite"
diff --git a/tests/scripts/framework/tests/greater_equal/config.sh b/tests/scripts/framework/tests/greater_equal/config.sh
new file mode 100755
index 000000000..72beaa81f
--- /dev/null
+++ b/tests/scripts/framework/tests/greater_equal/config.sh
@@ -0,0 +1 @@
+MODELFILE_NAME="greater_equal_4d.tflite"
diff --git a/tests/framework/tests/hashtable_lookup/config.sh b/tests/scripts/framework/tests/hashtable_lookup/config.sh
index 3222ee4d2..3222ee4d2 100644..100755
--- a/tests/framework/tests/hashtable_lookup/config.sh
+++ b/tests/scripts/framework/tests/hashtable_lookup/config.sh
diff --git a/tests/framework/tests/l2_normalization/config.sh b/tests/scripts/framework/tests/l2_normalization/config.sh
index 47801240f..47801240f 100644..100755
--- a/tests/framework/tests/l2_normalization/config.sh
+++ b/tests/scripts/framework/tests/l2_normalization/config.sh
diff --git a/tests/framework/tests/l2_pool_2d/config.sh b/tests/scripts/framework/tests/l2_pool_2d/config.sh
index a77aa66cf..a77aa66cf 100644..100755
--- a/tests/framework/tests/l2_pool_2d/config.sh
+++ b/tests/scripts/framework/tests/l2_pool_2d/config.sh
diff --git a/tests/scripts/framework/tests/less/config.sh b/tests/scripts/framework/tests/less/config.sh
new file mode 100755
index 000000000..7488dde0f
--- /dev/null
+++ b/tests/scripts/framework/tests/less/config.sh
@@ -0,0 +1 @@
+MODELFILE_NAME="less_4d.tflite"
diff --git a/tests/scripts/framework/tests/less_equal/config.sh b/tests/scripts/framework/tests/less_equal/config.sh
new file mode 100755
index 000000000..2b53700f6
--- /dev/null
+++ b/tests/scripts/framework/tests/less_equal/config.sh
@@ -0,0 +1 @@
+MODELFILE_NAME="less_equal_4d.tflite"
diff --git a/tests/framework/tests/logistic/config.sh b/tests/scripts/framework/tests/logistic/config.sh
index 456773aa9..456773aa9 100644..100755
--- a/tests/framework/tests/logistic/config.sh
+++ b/tests/scripts/framework/tests/logistic/config.sh
diff --git a/tests/framework/tests/max/config.sh b/tests/scripts/framework/tests/max/config.sh
index 479ca7fd0..479ca7fd0 100644..100755
--- a/tests/framework/tests/max/config.sh
+++ b/tests/scripts/framework/tests/max/config.sh
diff --git a/tests/framework/tests/max_pool_2d/maxpool1/config.sh b/tests/scripts/framework/tests/max_pool_2d/maxpool1/config.sh
index 19a602eb5..19a602eb5 100644..100755
--- a/tests/framework/tests/max_pool_2d/maxpool1/config.sh
+++ b/tests/scripts/framework/tests/max_pool_2d/maxpool1/config.sh
diff --git a/tests/framework/tests/max_pool_2d/maxpool2/config.sh b/tests/scripts/framework/tests/max_pool_2d/maxpool2/config.sh
index dc71599da..dc71599da 100644..100755
--- a/tests/framework/tests/max_pool_2d/maxpool2/config.sh
+++ b/tests/scripts/framework/tests/max_pool_2d/maxpool2/config.sh
diff --git a/tests/framework/tests/mean/config.sh b/tests/scripts/framework/tests/mean/config.sh
index 0853a87fc..0853a87fc 100644..100755
--- a/tests/framework/tests/mean/config.sh
+++ b/tests/scripts/framework/tests/mean/config.sh
diff --git a/tests/framework/tests/min/config.sh b/tests/scripts/framework/tests/min/config.sh
index 8148471a5..8148471a5 100644..100755
--- a/tests/framework/tests/min/config.sh
+++ b/tests/scripts/framework/tests/min/config.sh
diff --git a/tests/framework/tests/mul/broadcast/config.sh b/tests/scripts/framework/tests/mul/broadcast/config.sh
index 5522ac877..5522ac877 100644..100755
--- a/tests/framework/tests/mul/broadcast/config.sh
+++ b/tests/scripts/framework/tests/mul/broadcast/config.sh
diff --git a/tests/framework/tests/neg/config.sh b/tests/scripts/framework/tests/neg/config.sh
index 000f7c811..000f7c811 100644..100755
--- a/tests/framework/tests/neg/config.sh
+++ b/tests/scripts/framework/tests/neg/config.sh
diff --git a/tests/scripts/framework/tests/not_equal/config.sh b/tests/scripts/framework/tests/not_equal/config.sh
new file mode 100755
index 000000000..e2234197e
--- /dev/null
+++ b/tests/scripts/framework/tests/not_equal/config.sh
@@ -0,0 +1 @@
+MODELFILE_NAME="not_equal_4d.tflite"
diff --git a/tests/scripts/framework/tests/one_hot/config.sh b/tests/scripts/framework/tests/one_hot/config.sh
new file mode 100755
index 000000000..7e3823486
--- /dev/null
+++ b/tests/scripts/framework/tests/one_hot/config.sh
@@ -0,0 +1 @@
+MODELFILE_NAME="onehot_test.tflite"
diff --git a/tests/framework/tests/pack/config.sh b/tests/scripts/framework/tests/pack/config.sh
index 162ec9d9d..162ec9d9d 100644..100755
--- a/tests/framework/tests/pack/config.sh
+++ b/tests/scripts/framework/tests/pack/config.sh
diff --git a/tests/framework/tests/pad/4D_2D/config.sh b/tests/scripts/framework/tests/pad/4D_2D/config.sh
index 9e0de2244..9e0de2244 100644..100755
--- a/tests/framework/tests/pad/4D_2D/config.sh
+++ b/tests/scripts/framework/tests/pad/4D_2D/config.sh
diff --git a/tests/framework/tests/pad/pad1/config.sh b/tests/scripts/framework/tests/pad/pad1/config.sh
index 088cd8962..088cd8962 100644..100755
--- a/tests/framework/tests/pad/pad1/config.sh
+++ b/tests/scripts/framework/tests/pad/pad1/config.sh
diff --git a/tests/framework/tests/pad/pad2/config.sh b/tests/scripts/framework/tests/pad/pad2/config.sh
index 1683f5350..1683f5350 100644..100755
--- a/tests/framework/tests/pad/pad2/config.sh
+++ b/tests/scripts/framework/tests/pad/pad2/config.sh
diff --git a/tests/framework/tests/reduce_max/config.sh b/tests/scripts/framework/tests/reduce_max/config.sh
index d636b8bd3..d636b8bd3 100644..100755
--- a/tests/framework/tests/reduce_max/config.sh
+++ b/tests/scripts/framework/tests/reduce_max/config.sh
diff --git a/tests/framework/tests/reduce_mean/test1/config.sh b/tests/scripts/framework/tests/reduce_mean/test1/config.sh
index 2f370ea4e..2f370ea4e 100644..100755
--- a/tests/framework/tests/reduce_mean/test1/config.sh
+++ b/tests/scripts/framework/tests/reduce_mean/test1/config.sh
diff --git a/tests/framework/tests/reduce_mean/test2/config.sh b/tests/scripts/framework/tests/reduce_mean/test2/config.sh
index 6c54779a9..6c54779a9 100644..100755
--- a/tests/framework/tests/reduce_mean/test2/config.sh
+++ b/tests/scripts/framework/tests/reduce_mean/test2/config.sh
diff --git a/tests/framework/tests/reduce_sum/config.sh b/tests/scripts/framework/tests/reduce_sum/config.sh
index 31b185397..31b185397 100644..100755
--- a/tests/framework/tests/reduce_sum/config.sh
+++ b/tests/scripts/framework/tests/reduce_sum/config.sh
diff --git a/tests/framework/tests/relu/config.sh b/tests/scripts/framework/tests/relu/config.sh
index bca59ef04..bca59ef04 100644..100755
--- a/tests/framework/tests/relu/config.sh
+++ b/tests/scripts/framework/tests/relu/config.sh
diff --git a/tests/framework/tests/relu6/config.sh b/tests/scripts/framework/tests/relu6/config.sh
index 662cc4f33..662cc4f33 100644..100755
--- a/tests/framework/tests/relu6/config.sh
+++ b/tests/scripts/framework/tests/relu6/config.sh
diff --git a/tests/framework/tests/reshape/3D/config.sh b/tests/scripts/framework/tests/reshape/3D/config.sh
index 3f7ec31ea..3f7ec31ea 100644..100755
--- a/tests/framework/tests/reshape/3D/config.sh
+++ b/tests/scripts/framework/tests/reshape/3D/config.sh
diff --git a/tests/framework/tests/reshape/reshape1/config.sh b/tests/scripts/framework/tests/reshape/reshape1/config.sh
index 7bdef06ba..7bdef06ba 100644..100755
--- a/tests/framework/tests/reshape/reshape1/config.sh
+++ b/tests/scripts/framework/tests/reshape/reshape1/config.sh
diff --git a/tests/framework/tests/reshape/reshape2/config.sh b/tests/scripts/framework/tests/reshape/reshape2/config.sh
index b040f0081..b040f0081 100644..100755
--- a/tests/framework/tests/reshape/reshape2/config.sh
+++ b/tests/scripts/framework/tests/reshape/reshape2/config.sh
diff --git a/tests/framework/tests/resize_bilinear/config.sh b/tests/scripts/framework/tests/resize_bilinear/config.sh
index 8f612cf6d..8f612cf6d 100644..100755
--- a/tests/framework/tests/resize_bilinear/config.sh
+++ b/tests/scripts/framework/tests/resize_bilinear/config.sh
diff --git a/tests/framework/tests/rnn/config.sh b/tests/scripts/framework/tests/rnn/config.sh
index 997d6c138..997d6c138 100644..100755
--- a/tests/framework/tests/rnn/config.sh
+++ b/tests/scripts/framework/tests/rnn/config.sh
diff --git a/tests/framework/tests/rsqrt/config.sh b/tests/scripts/framework/tests/rsqrt/config.sh
index 87aa85277..87aa85277 100644..100755
--- a/tests/framework/tests/rsqrt/config.sh
+++ b/tests/scripts/framework/tests/rsqrt/config.sh
diff --git a/tests/scripts/framework/tests/shape/config.sh b/tests/scripts/framework/tests/shape/config.sh
new file mode 100644
index 000000000..468f38687
--- /dev/null
+++ b/tests/scripts/framework/tests/shape/config.sh
@@ -0,0 +1 @@
+MODELFILE_NAME="shape_test.tflite"
diff --git a/tests/scripts/framework/tests/sin/config.sh b/tests/scripts/framework/tests/sin/config.sh
new file mode 100755
index 000000000..dcf1959d8
--- /dev/null
+++ b/tests/scripts/framework/tests/sin/config.sh
@@ -0,0 +1 @@
+MODELFILE_NAME="sin_4d.tflite"
diff --git a/tests/framework/tests/slice/config.sh b/tests/scripts/framework/tests/slice/config.sh
index 12d06e977..12d06e977 100644..100755
--- a/tests/framework/tests/slice/config.sh
+++ b/tests/scripts/framework/tests/slice/config.sh
diff --git a/tests/framework/tests/softmax/config.sh b/tests/scripts/framework/tests/softmax/config.sh
index fa6300d7e..fa6300d7e 100644..100755
--- a/tests/framework/tests/softmax/config.sh
+++ b/tests/scripts/framework/tests/softmax/config.sh
diff --git a/tests/framework/tests/space_to_batch_nd2/config.sh b/tests/scripts/framework/tests/space_to_batch_nd2/config.sh
index 81933709e..81933709e 100644..100755
--- a/tests/framework/tests/space_to_batch_nd2/config.sh
+++ b/tests/scripts/framework/tests/space_to_batch_nd2/config.sh
diff --git a/tests/framework/tests/space_to_depth/config.sh b/tests/scripts/framework/tests/space_to_depth/config.sh
index ed103b826..ed103b826 100644..100755
--- a/tests/framework/tests/space_to_depth/config.sh
+++ b/tests/scripts/framework/tests/space_to_depth/config.sh
diff --git a/tests/framework/tests/sqrt/config.sh b/tests/scripts/framework/tests/sqrt/config.sh
index 220147238..220147238 100644..100755
--- a/tests/framework/tests/sqrt/config.sh
+++ b/tests/scripts/framework/tests/sqrt/config.sh
diff --git a/tests/framework/tests/squeeze/config.sh b/tests/scripts/framework/tests/squeeze/config.sh
index 5bcc67716..5bcc67716 100644..100755
--- a/tests/framework/tests/squeeze/config.sh
+++ b/tests/scripts/framework/tests/squeeze/config.sh
diff --git a/tests/framework/tests/strided_slice/config.sh b/tests/scripts/framework/tests/strided_slice/config.sh
index 4c41a1a39..4c41a1a39 100644..100755
--- a/tests/framework/tests/strided_slice/config.sh
+++ b/tests/scripts/framework/tests/strided_slice/config.sh
diff --git a/tests/framework/tests/sub/broadcast/config.sh b/tests/scripts/framework/tests/sub/broadcast/config.sh
index 2b1add0e5..2b1add0e5 100644..100755
--- a/tests/framework/tests/sub/broadcast/config.sh
+++ b/tests/scripts/framework/tests/sub/broadcast/config.sh
diff --git a/tests/framework/tests/tanh/config.sh b/tests/scripts/framework/tests/tanh/config.sh
index a9dde4923..a9dde4923 100644..100755
--- a/tests/framework/tests/tanh/config.sh
+++ b/tests/scripts/framework/tests/tanh/config.sh
diff --git a/tests/framework/tests/topk_v2/config.sh b/tests/scripts/framework/tests/topk_v2/config.sh
index 1a460266f..1a460266f 100644..100755
--- a/tests/framework/tests/topk_v2/config.sh
+++ b/tests/scripts/framework/tests/topk_v2/config.sh
diff --git a/tests/framework/tests/transpose/config.sh b/tests/scripts/framework/tests/transpose/config.sh
index 9adb85e70..9adb85e70 100644..100755
--- a/tests/framework/tests/transpose/config.sh
+++ b/tests/scripts/framework/tests/transpose/config.sh
diff --git a/tests/framework/tests/transpose_conv/same/config.sh b/tests/scripts/framework/tests/transpose_conv/same/config.sh
index 2cca86e03..2cca86e03 100644..100755
--- a/tests/framework/tests/transpose_conv/same/config.sh
+++ b/tests/scripts/framework/tests/transpose_conv/same/config.sh
diff --git a/tests/framework/tests/transpose_conv/valid/config.sh b/tests/scripts/framework/tests/transpose_conv/valid/config.sh
index d162331a3..d162331a3 100644..100755
--- a/tests/framework/tests/transpose_conv/valid/config.sh
+++ b/tests/scripts/framework/tests/transpose_conv/valid/config.sh
diff --git a/tests/scripts/list/benchmark_nnpkg_model_list.txt b/tests/scripts/list/benchmark_nnpkg_model_list.txt
index 9aa3d4854..8605ebcdc 100644
--- a/tests/scripts/list/benchmark_nnpkg_model_list.txt
+++ b/tests/scripts/list/benchmark_nnpkg_model_list.txt
@@ -1,16 +1,16 @@
densenet
inception_resnet_v2
inception_v3
+inception_v3_quant
inception_v4
+inception_v4_299_quant
inceptionv3_non_slim_2015
inceptionv3_slim_2016
mnasnet_1.3_224
mobilenet_v1_1.0_224
+mobilenet_v1_1.0_224_quant
mobilenet_v2_1.0_224
+mobilenet_v2_1.0_224_quant
multi_person_mobilenet_v1_075_float
resnet_v2_101
squeezenet
-inception_v3_quant
-inception_v4_299_quant
-mobilenet_v1_1.0_224_quant
-mobilenet_v2_1.0_224_quant
diff --git a/tests/scripts/list/frameworktest_list.aarch64.acl_cl.txt b/tests/scripts/list/frameworktest_list.aarch64.acl_cl.txt
new file mode 100644
index 000000000..fc7e71ab8
--- /dev/null
+++ b/tests/scripts/list/frameworktest_list.aarch64.acl_cl.txt
@@ -0,0 +1,46 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+batch_to_space_nd2
+cast
+concat
+conv_2d
+custom
+depthwise_conv_2d
+div
+embedding_lookup
+exp
+floor
+fullyconnected
+gather
+hashtable_lookup
+l2_normalization
+l2_pool_2d
+max
+max_pool_2d
+mean
+min
+mul
+neg
+pack
+pad
+reduce_max
+reduce_mean
+reduce_sum
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_batch_nd2
+space_to_depth
+squeeze
+strided_slice
+sub
+tanh
+topk_v2
+transpose
+transpose_conv
diff --git a/tests/scripts/list/frameworktest_list.aarch64.acl_neon.txt b/tests/scripts/list/frameworktest_list.aarch64.acl_neon.txt
new file mode 100644
index 000000000..b58d39ab7
--- /dev/null
+++ b/tests/scripts/list/frameworktest_list.aarch64.acl_neon.txt
@@ -0,0 +1,41 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+embedding_lookup
+floor
+gather
+hashtable_lookup
+l2_normalization
+l2_pool_2d
+logistic
+max
+max_pool_2d
+mean
+min
+mul
+one_hot
+pack
+pad
+reduce_max
+reduce_mean
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_depth
+sqrt
+squeeze
+strided_slice
+sub
+tanh
+transpose
+transpose_conv
diff --git a/tests/scripts/list/frameworktest_list.aarch64.cpu.txt b/tests/scripts/list/frameworktest_list.aarch64.cpu.txt
new file mode 100644
index 000000000..203b4fece
--- /dev/null
+++ b/tests/scripts/list/frameworktest_list.aarch64.cpu.txt
@@ -0,0 +1,33 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+equal
+exp
+fullyconnected/fc1
+fullyconnected/weights_as_input
+greater
+greater_equal
+less
+less_equal
+logistic
+max
+max_pool_2d
+min
+mul
+not_equal
+one_hot
+pack
+reduce_max
+reduce_sum
+reshape/reshape1
+softmax
+squeeze
+sub
+tanh
+transpose
diff --git a/tests/scripts/list/frameworktest_list.armv7l.acl_cl.txt b/tests/scripts/list/frameworktest_list.armv7l.acl_cl.txt
new file mode 100644
index 000000000..fc7e71ab8
--- /dev/null
+++ b/tests/scripts/list/frameworktest_list.armv7l.acl_cl.txt
@@ -0,0 +1,46 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+batch_to_space_nd2
+cast
+concat
+conv_2d
+custom
+depthwise_conv_2d
+div
+embedding_lookup
+exp
+floor
+fullyconnected
+gather
+hashtable_lookup
+l2_normalization
+l2_pool_2d
+max
+max_pool_2d
+mean
+min
+mul
+neg
+pack
+pad
+reduce_max
+reduce_mean
+reduce_sum
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_batch_nd2
+space_to_depth
+squeeze
+strided_slice
+sub
+tanh
+topk_v2
+transpose
+transpose_conv
diff --git a/tests/scripts/list/frameworktest_list.armv7l.acl_neon.txt b/tests/scripts/list/frameworktest_list.armv7l.acl_neon.txt
new file mode 100644
index 000000000..9df071bbb
--- /dev/null
+++ b/tests/scripts/list/frameworktest_list.armv7l.acl_neon.txt
@@ -0,0 +1,42 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+embedding_lookup
+floor
+fullyconnected
+gather
+hashtable_lookup
+l2_normalization
+l2_pool_2d
+logistic
+max
+max_pool_2d
+mean
+min
+mul
+one_hot
+pack
+pad
+reduce_max
+reduce_mean
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_depth
+sqrt
+squeeze
+strided_slice
+sub
+tanh
+transpose
+transpose_conv
diff --git a/tests/scripts/list/frameworktest_list.armv7l.cpu.txt b/tests/scripts/list/frameworktest_list.armv7l.cpu.txt
new file mode 100644
index 000000000..634eb1112
--- /dev/null
+++ b/tests/scripts/list/frameworktest_list.armv7l.cpu.txt
@@ -0,0 +1,38 @@
+MODELS/inception_module
+MODELS/mobilenet
+abs
+add
+average_pool_2d
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+equal
+exp
+fullyconnected
+greater
+greater_equal
+less
+less_equal
+logistic
+max
+max_pool_2d
+min
+mul
+not_equal
+one_hot
+pack
+reduce_max
+reduce_sum
+reshape/reshape1
+rsqrt
+shape
+sin
+slice
+strided_slice
+softmax
+squeeze
+sub
+tanh
+transpose
diff --git a/tests/scripts/list/frameworktest_list.noarch.interp.txt b/tests/scripts/list/frameworktest_list.noarch.interp.txt
new file mode 100644
index 000000000..3555ee28e
--- /dev/null
+++ b/tests/scripts/list/frameworktest_list.noarch.interp.txt
@@ -0,0 +1,16 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+concat
+conv_2d
+depthwise_conv_2d
+fullyconnected/fc1
+logistic
+max_pool_2d
+pad
+relu
+relu6
+reshape/reshape1
+softmax
+tanh
diff --git a/tests/scripts/list/frameworktest_list.x86_64.cpu.txt b/tests/scripts/list/frameworktest_list.x86_64.cpu.txt
new file mode 100644
index 000000000..41dc8fe54
--- /dev/null
+++ b/tests/scripts/list/frameworktest_list.x86_64.cpu.txt
@@ -0,0 +1,15 @@
+MODELS/inception_module
+add
+average_pool_2d
+concat
+conv_2d
+depthwise_conv_2d
+fullyconnected
+logistic
+max_pool_2d
+reduce_max
+reduce_sum
+reshape/reshape1
+softmax
+squeeze
+transpose
diff --git a/tests/scripts/list/neurun_frameworktest_list.armv7l.acl_cl.txt b/tests/scripts/list/neurun_frameworktest_list.armv7l.acl_cl.txt
deleted file mode 100644
index 81822b33c..000000000
--- a/tests/scripts/list/neurun_frameworktest_list.armv7l.acl_cl.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-add
-average_pool_2d
-batch_to_space_nd2
-cast
-concat
-conv_2d
-custom
-depthwise_conv_2d
-div
-embedding_lookup
-exp
-floor
-fullyconnected/fc1
-gather
-hashtable_lookup
-l2_normalization
-l2_pool_2d
-max
-max_pool_2d
-mean
-min
-MODELS/inception_module
-MODELS/mobilenet
-mul
-neg
-pack
-pad
-reduce_max
-reduce_mean
-reduce_sum
-relu
-relu6
-reshape
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_batch_nd2
-space_to_depth
-squeeze
-strided_slice
-sub
-tanh
-topk_v2
-transpose
-transpose_conv
diff --git a/tests/scripts/list/neurun_frameworktest_list.armv7l.acl_neon.txt b/tests/scripts/list/neurun_frameworktest_list.armv7l.acl_neon.txt
deleted file mode 100644
index 20a6bf9d3..000000000
--- a/tests/scripts/list/neurun_frameworktest_list.armv7l.acl_neon.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-add
-average_pool_2d
-cast
-concat
-conv_2d
-depthwise_conv_2d
-div
-embedding_lookup
-floor
-fullyconnected
-gather
-hashtable_lookup
-l2_normalization
-l2_pool_2d
-logistic
-max_pool_2d
-mean
-mul
-pad
-reduce_max
-reduce_mean
-relu
-relu6
-reshape
-resize_bilinear
-rsqrt
-softmax
-space_to_depth
-squeeze
-sqrt
-strided_slice
-sub
-tanh
-transpose
-transpose_conv
-min
-max
-slice
-pack
-MODELS/inception_module
-MODELS/mobilenet
diff --git a/tests/scripts/list/neurun_frameworktest_list.armv7l.cpu.txt b/tests/scripts/list/neurun_frameworktest_list.armv7l.cpu.txt
deleted file mode 100644
index 1c55a0086..000000000
--- a/tests/scripts/list/neurun_frameworktest_list.armv7l.cpu.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-average_pool_2d
-concat
-conv_2d
-depthwise_conv_2d
-fullyconnected/fc1
-max_pool_2d
-softmax
-reshape/reshape1
-add
-logistic
-squeeze
-MODELS/inception_module
-MODELS/mobilenet
diff --git a/tests/scripts/list/neurun_frameworktest_list.armv7l.ncnn.txt b/tests/scripts/list/neurun_frameworktest_list.armv7l.ncnn.txt
deleted file mode 100644
index f5f68d330..000000000
--- a/tests/scripts/list/neurun_frameworktest_list.armv7l.ncnn.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-transpose_conv
-add
diff --git a/tests/scripts/list/neurun_frameworktest_list.noarch.interp.txt b/tests/scripts/list/neurun_frameworktest_list.noarch.interp.txt
deleted file mode 100644
index 399920aa7..000000000
--- a/tests/scripts/list/neurun_frameworktest_list.noarch.interp.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-average_pool_2d
-concat
-conv_2d
-depthwise_conv_2d
-fullyconnected/fc1
-max_pool_2d
-softmax
-reshape/reshape1
-add
-logistic
-pad
-relu
-relu6
-tanh
-MODELS/inception_module
-MODELS/mobilenet
diff --git a/tests/scripts/list/neurun_frameworktest_list.x86-64.cpu.txt b/tests/scripts/list/neurun_frameworktest_list.x86-64.cpu.txt
deleted file mode 100644
index f9055c285..000000000
--- a/tests/scripts/list/neurun_frameworktest_list.x86-64.cpu.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-average_pool_2d
-concat
-conv_2d
-depthwise_conv_2d
-fullyconnected/fc1
-max_pool_2d
-softmax
-reshape/reshape1
-squeeze
-add
-logistic
-MODELS/inception_module
diff --git a/tests/scripts/list/tflite_loader_list.aarch64.txt b/tests/scripts/list/tflite_loader_list.aarch64.txt
new file mode 100644
index 000000000..aca8f16ee
--- /dev/null
+++ b/tests/scripts/list/tflite_loader_list.aarch64.txt
@@ -0,0 +1,35 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+batch_to_space_nd2
+concat
+conv_2d/convolution1
+depthwise_conv_2d
+div
+exp
+fullyconnected/fc1
+logistic
+max
+max_pool_2d/maxpool1
+mean
+min
+mul
+pack
+pad
+reduce_max
+reduce_sum
+relu
+relu6
+reshape/reshape1
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_batch_nd2
+sqrt
+squeeze
+sub
+tanh
+transpose
+transpose_conv
diff --git a/tests/scripts/list/tflite_loader_list.armv7l.txt b/tests/scripts/list/tflite_loader_list.armv7l.txt
index 578d4dd70..aca8f16ee 100644
--- a/tests/scripts/list/tflite_loader_list.armv7l.txt
+++ b/tests/scripts/list/tflite_loader_list.armv7l.txt
@@ -1,37 +1,35 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
average_pool_2d
+batch_to_space_nd2
concat
conv_2d/convolution1
depthwise_conv_2d
+div
+exp
fullyconnected/fc1
+logistic
+max
max_pool_2d/maxpool1
-softmax
-reshape/reshape1
-add
-MODELS/inception_module
-MODELS/mobilenet
-transpose_conv
-sub
+mean
+min
mul
-div
+pack
+pad
+reduce_max
+reduce_sum
relu
relu6
+reshape/reshape1
resize_bilinear
rsqrt
+slice
+softmax
+space_to_batch_nd2
sqrt
+squeeze
+sub
tanh
transpose
-mean
-reduce_max
-pad
-logistic
-exp
-squeeze
-min
-max
-space_to_batch_nd2
-batch_to_space_nd2
-reduce_sum
-min
-max
-slice
-pack
+transpose_conv
diff --git a/tests/scripts/merge_result_of_benchmark_nnpkg.py b/tests/scripts/merge_result_of_benchmark_nnpkg.py
index 79c167ce4..d87e751b9 100755
--- a/tests/scripts/merge_result_of_benchmark_nnpkg.py
+++ b/tests/scripts/merge_result_of_benchmark_nnpkg.py
@@ -17,7 +17,7 @@
import sys, glob, os
import csv
import copy
-from optparse import OptionParser
+import argparse
g_header = []
g_new_header = []
@@ -53,9 +53,9 @@ def global_init():
'Execute_Time_Mean_Diff_Vs_Tflite_Cpu',
'Execute_Time_Mean_Ratio_Vs_Tflite_Cpu',
'Peak_RSS_Diff_Vs_Tflite_Cpu',
- 'Peak_RSS_Time_Mean_Ratio_Vs_Tflite_Cpu',
+ 'Peak_RSS_Ratio_Vs_Tflite_Cpu',
'Peak_HWM_Diff_Vs_Tflite_Cpu',
- 'Peak_HWM_Time_Mean_Ratio_Vs_Tflite_Cpu',
+ 'Peak_HWM_Ratio_Vs_Tflite_Cpu',
]
# if new backend comes from csv, it will be stored in g_backends
@@ -145,9 +145,9 @@ class Data(object):
row.append(0.0) # 'Execute_Time_Mean_Diff_Vs_Tflite_Cpu'
row.append(val) # 'Execute_Time_Mean_Ratio_Vs_Tflite_Cpu'
row.append(0) # 'Peak_RSS_Diff_Vs_Tflite_Cpu'
- row.append(val) # 'Peak_RSS_Time_Mean_Ratio_Vs_Tflite_Cpu'
+ row.append(val) # 'Peak_RSS_Ratio_Vs_Tflite_Cpu'
row.append(0) # 'Peak_HWM_Diff_Vs_Tflite_Cpu'
- row.append(val) # 'Peak_HWM_Time_Mean_Ratio_Vs_Tflite_Cpu'
+ row.append(val) # 'Peak_HWM_Ratio_Vs_Tflite_Cpu'
return row
def RowVs(self, vs_data):
@@ -187,9 +187,9 @@ class Data(object):
row[g_new_header.index('Execute_Time_Mean_Diff_Vs_Tflite_Cpu')] = exec_diff
row[g_new_header.index('Execute_Time_Mean_Ratio_Vs_Tflite_Cpu')] = exec_ratio
row[g_new_header.index('Peak_RSS_Diff_Vs_Tflite_Cpu')] = rss_diff
- row[g_new_header.index('Peak_RSS_Time_Mean_Ratio_Vs_Tflite_Cpu')] = rss_ratio
+ row[g_new_header.index('Peak_RSS_Ratio_Vs_Tflite_Cpu')] = rss_ratio
row[g_new_header.index('Peak_HWM_Diff_Vs_Tflite_Cpu')] = hwm_diff
- row[g_new_header.index('Peak_HWM_Time_Mean_Ratio_Vs_Tflite_Cpu')] = hwm_ratio
+ row[g_new_header.index('Peak_HWM_Ratio_Vs_Tflite_Cpu')] = hwm_ratio
return row
@@ -215,19 +215,19 @@ class Model(object):
def main():
# Option
use = "Usage: %prog [options] filename"
- parser = OptionParser(usage=use)
- parser.add_option(
+ parser = argparse.ArgumentParser(usage=use)
+ parser.add_argument(
"-i", "--input_dir", dest="input_dir", default=".", help="dir to have csv files")
- parser.add_option(
+ parser.add_argument(
"-o",
"--output_dir",
dest="output_dir",
default=".",
help="dir to be moved csv files into")
- parser.add_option(
+ parser.add_argument(
"-l", "--model_list", dest="model_list", help="file to have model list")
- options, args = parser.parse_args()
+ options = parser.parse_args()
# args check
input_dir = options.input_dir
@@ -255,7 +255,7 @@ def main():
if (os.path.exists(new_csv_file)):
os.remove(new_csv_file)
print("new csv file: {}".format(new_csv_file))
- print
+ print()
# decl for using global vars
global g_header
@@ -275,7 +275,7 @@ def main():
print("* " + m)
model_to_csvs[m] = []
model_data_list.append(None)
- print
+ print()
for f in glob.glob(os.path.join(input_dir, "*.csv")):
# TODO handle if file name doesn't come as we follow
@@ -288,7 +288,7 @@ def main():
print("Current csv file list")
for model, csvs in model_to_csvs.items():
print("* {}: {}".format(model, csvs))
- print
+ print()
for model, csvs in model_to_csvs.items():
assert (model in model_list)
@@ -297,7 +297,7 @@ def main():
for model_data in model_data_list:
print("{}: {}".format(model_data.model_name, model_data.backends))
- print
+ print()
def getEmptyData(model_name, backend):
d = Data(None, True)
diff --git a/tests/scripts/oneapi_test/install_oneapi_test_nnpackages.sh b/tests/scripts/oneapi_test/install_oneapi_test_nnpackages.sh
new file mode 100755
index 000000000..8d7ab6ce2
--- /dev/null
+++ b/tests/scripts/oneapi_test/install_oneapi_test_nnpackages.sh
@@ -0,0 +1,142 @@
+#!/usr/bin/env bash
+
+# TODO Reuse the fuction in run_test.sh. This is its duplication.
+function need_download()
+{
+ LOCAL_PATH=$1
+ REMOTE_URL=$2
+ if [ ! -e $LOCAL_PATH ]; then
+ return 0;
+ fi
+ # Ignore checking md5 in cache
+ if [ ! -z $IGNORE_MD5 ] && [ "$IGNORE_MD5" == "1" ]; then
+ return 1
+ fi
+
+ LOCAL_HASH=$(md5sum $LOCAL_PATH | awk '{ print $1 }')
+ REMOTE_HASH=$(curl -ss $REMOTE_URL | md5sum | awk '{ print $1 }')
+ # TODO Emit an error when Content-MD5 field was not found. (Server configuration issue)
+ if [ "$LOCAL_HASH" != "$REMOTE_HASH" ]; then
+ echo "Downloaded file is outdated or incomplete."
+ return 0
+ fi
+ return 1
+}
+
+# TODO Reuse the fuction in run_test.sh. This is its duplication.
+download_tests()
+{
+ SELECTED_TESTS=$@
+
+ echo ""
+ echo "Downloading tests:"
+ echo "======================"
+ for TEST_NAME in $SELECTED_TESTS; do
+ echo $TEST_NAME
+ done
+ echo "======================"
+
+ for TEST_NAME in $SELECTED_TESTS; do
+ # Test configure initialization
+ MODELFILE_SERVER_PATH=""
+ MODELFILE_NAME=""
+ source $TEST_ROOT_PATH/$TEST_NAME/config.sh
+
+ TEST_CACHE_PATH=$CACHE_ROOT_PATH/$TEST_NAME
+ MODELFILE=$TEST_CACHE_PATH/$MODELFILE_NAME
+ MODELFILE_URL="$MODELFILE_SERVER/$MODELFILE_NAME"
+ if [ -n "$FIXED_MODELFILE_SERVER" ]; then
+ MODELFILE_URL="$FIXED_MODELFILE_SERVER/$MODELFILE_NAME"
+ fi
+
+ # Download model file
+ if [ ! -e $TEST_CACHE_PATH ]; then
+ mkdir -p $TEST_CACHE_PATH
+ fi
+
+ # Download unless we have it in cache (Also check md5sum)
+ if need_download "$MODELFILE" "$MODELFILE_URL"; then
+ echo ""
+ echo "Download test file for $TEST_NAME"
+ echo "======================"
+
+ rm -f $MODELFILE # Remove invalid file if exists
+ pushd $TEST_CACHE_PATH
+ wget -nv $MODELFILE_URL
+ if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
+ unzip -o $MODELFILE_NAME
+ fi
+ popd
+ fi
+
+ done
+}
+
+usage()
+{
+ echo "Usage: $0 --modelfile-server=MODELFILE_SERVER --install-path=INSTALL_DIR"
+ echo " MODELFILE_SERVER : Base URL of the model file server"
+ echo " INSTALL_DIR : Path to be installed"
+ exit 1
+}
+
+while [[ $# -gt 0 ]]
+do
+ key="$(echo $1 | awk '{print tolower($0)}')"
+ case "$key" in
+ -?|-h|--help)
+ usage
+ exit 1
+ ;;
+ --modelfile-server)
+ MODELFILE_SERVER="$2"
+ shift
+ ;;
+ --modelfile-server=*)
+ MODELFILE_SERVER="${1#*=}"
+ ;;
+ --install-dir)
+ INSTALL_DIR="$2"
+ shift
+ ;;
+ --install-dir=*)
+ INSTALL_DIR="${1#*=}"
+ ;;
+ *)
+ echo "Invalid option '$1'"
+ usage
+ exit 1
+ ;;
+ esac
+ shift
+done
+
+if [ -z "$MODELFILE_SERVER" ]; then
+ echo "Please specify a value for --modelfile-server or MODELFILE_SERVER(env)."
+ usage
+ exit 1
+fi
+
+if [ -z "$INSTALL_DIR" ]; then
+ echo "Please specify a value for --install-dir or INSTALL_DIR(env)."
+ usage
+ exit 1
+fi
+
+set -e
+
+THIS_SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE}))
+source ${THIS_SCRIPT_DIR}/../common.sh
+
+CACHE_ROOT_PATH=$INSTALL_DIR
+FIXED_MODELFILE_SERVER="${MODELFILE_SERVER:-}"
+TEST_ROOT_PATH=${THIS_SCRIPT_DIR}/models
+
+# All models in the directory are the target models
+pushd ${TEST_ROOT_PATH}
+MODELS=$(ls -d */)
+popd
+
+download_tests $MODELS
+
+set +e
diff --git a/tests/scripts/oneapi_test/models/add/config.sh b/tests/scripts/oneapi_test/models/add/config.sh
new file mode 100644
index 000000000..e6e8677e7
--- /dev/null
+++ b/tests/scripts/oneapi_test/models/add/config.sh
@@ -0,0 +1 @@
+MODELFILE_NAME="add.zip"
diff --git a/tests/scripts/test-driver.sh b/tests/scripts/test-driver.sh
index 932339ae8..319370de1 100755
--- a/tests/scripts/test-driver.sh
+++ b/tests/scripts/test-driver.sh
@@ -33,13 +33,13 @@ function Usage()
echo "--frameworktest_list_file - filepath of model list for test"
echo ""
echo "Following option is only needed when you want to test benchmark."
- echo "--benchmark_neurun_op - (default=off) run benchmark per operation on neurun"
+ echo "--benchmark_onert_op - (default=off) run benchmark per operation on onert"
echo ""
echo "etc."
echo "--framework_driverbin - (default=../../Product/out/bin/tflite_run) runner for runnning framework tests"
echo "--verification_driverbin - (default=../../Product/out/bin/nnapi_test) runner for runnning verification tests"
echo "--benchmark_driverbin - (default=../../Product/out/bin/tflite_benchmark) runner for runnning benchmark"
- echo "--runtestsh - (default=\$ARTIFACT_PATH/tests/framework/run_test.sh) run_test.sh with path where it is for framework test and verification"
+ echo "--runtestsh - (default=\$ARTIFACT_PATH/tests/scripts/framework/run_test.sh) run_test.sh with path where it is for framework test and verification"
echo "--unittestdir - (default=\$ARTIFACT_PATH/Product/out/unittest) directory that has unittest binaries for unit test"
echo ""
echo "--reportdir - (default=\$ARTIFACT_PATH/report) directory to save report"
@@ -57,7 +57,7 @@ ALLTEST_ON="true"
UNITTEST_ON="false"
FRAMEWORKTEST_ON="false"
VERIFICATION_ON="false"
-BENCHMARK_NEURUN_OP_ON="false"
+BENCHMARK_ONERT_OP_ON="false"
REPORT_DIR=""
for i in "$@"
@@ -104,9 +104,9 @@ do
ALLTEST_ON="false"
VERIFICATION_ON="true"
;;
- --benchmark_neurun_op)
+ --benchmark_onert_op)
ALLTEST_ON="false"
- BENCHMARK_NEURUN_OP_ON="true"
+ BENCHMARK_ONERT_OP_ON="true"
;;
--reportdir=*)
REPORT_DIR=${i#*=}
@@ -122,7 +122,7 @@ done
ARTIFACT_PATH="$(readlink -f $ARTIFACT_PATH)"
if [ -z "$RUN_TEST_SH" ]; then
- RUN_TEST_SH=$ARTIFACT_PATH/tests/framework/run_test.sh
+ RUN_TEST_SH=$ARTIFACT_PATH/tests/scripts/framework/run_test.sh
fi
if [ ! -e "$RUN_TEST_SH" ]; then
@@ -180,7 +180,7 @@ if [ "$ALLTEST_ON" == "true" ] || [ "$VERIFICATION_ON" == "true" ]; then
--frameworktest_list_file=${FRAMEWORKTEST_LIST_FILE:-}
fi
-if [ "$BENCHMARK_NEURUN_OP_ON" == "true" ]; then
+if [ "$BENCHMARK_ONERT_OP_ON" == "true" ]; then
if [ -z "$BENCHMARK_DRIVER_BIN" ]; then
DRIVER_BIN=$ARTIFACT_PATH/Product/out/bin/tflite_benchmark
else
@@ -192,14 +192,14 @@ if [ "$BENCHMARK_NEURUN_OP_ON" == "true" ]; then
--runtestsh=$RUN_TEST_SH \
--driverbin=$DRIVER_BIN \
--reportdir=$REPORT_DIR/benchmark_op \
- --modelfilepath=$ARTIFACT_PATH/tests/framework
+ --modelfilepath=$ARTIFACT_PATH/tests/scripts/framework
fi
# Make json file. Actually, this process is only needed on CI. That's why it is in test-driver.sh.
-if [ "$BENCHMARK_NEURUN_OP_ON" == "true" ]; then
+if [ "$BENCHMARK_ONERT_OP_ON" == "true" ]; then
# functions to fill json with benchmark results
source $ARTIFACT_PATH/tests/scripts/print_to_json.sh
- if [ "$BENCHMARK_NEURUN_OP_ON" == "true" ]; then
+ if [ "$BENCHMARK_ONERT_OP_ON" == "true" ]; then
print_to_json $REPORT_DIR/benchmark_op $REPORT_DIR "benchmark_op_result.json"
else
print_to_json $REPORT_DIR/benchmark $REPORT_DIR "benchmark_result.json"
diff --git a/tests/scripts/test_framework.sh b/tests/scripts/test_framework.sh
index e691d5f5c..1d9751562 100755
--- a/tests/scripts/test_framework.sh
+++ b/tests/scripts/test_framework.sh
@@ -25,9 +25,9 @@ function Usage()
{
echo "Usage Example:"
echo "./$0 \\"
- echo " --runtestsh=tests/framework/run_test.sh \\ # Test runner script path"
+ echo " --runtestsh=tests/scripts/framework/run_test.sh \\ # Test runner script path"
echo " --driverbin=Product/out/bin/tflite_run \\ # Test driver path"
- echo " --frameworktest_list_file=tests/scripts/list/neurun_frameworktest_list.armv7l.cpu.txt \\"
+ echo " --frameworktest_list_file=tests/scripts/list/frameworktest_list.armv7l.cpu.txt \\"
echo " --reportdir=report \\ # Directory for the report files will be saved"
echo " --tapname=framework_test.tap \\ # Tap file name"
echo " --logname=framework_test.log \\ # Log file name"
diff --git a/tests/scripts/test_scheduler_with_profiling.sh b/tests/scripts/test_scheduler_with_profiling.sh
index 499e03973..61b2a4f4c 100755
--- a/tests/scripts/test_scheduler_with_profiling.sh
+++ b/tests/scripts/test_scheduler_with_profiling.sh
@@ -11,7 +11,7 @@ TEST_DRIVER_DIR="$( cd "$( dirname "${BASH_SOURCE}" )" && pwd )"
ARTIFACT_PATH="$TEST_DRIVER_DIR/../.."
BENCHMARK_DRIVER_BIN=$ARTIFACT_PATH/Product/out/bin/tflite_benchmark
REPORT_DIR=$ARTIFACT_PATH/report
-RUN_TEST_SH=$ARTIFACT_PATH/tests/framework/run_test.sh
+RUN_TEST_SH=$ARTIFACT_PATH/tests/scripts/framework/run_test.sh
BENCHMARK_MODEL_LIST="MODELS/inception_nonslim MODELS/inception_slim MODELS/mobilenet"
if [ ! -e "$RUN_TEST_SH" ]; then
@@ -72,7 +72,7 @@ function run_benchmark_test()
export USE_SCHEDULER=1
export PROFILING_MODE=1
export EXECUTOR="Dataflow"
- export NEURUN_LOG_ENABLE=1
+ export ONERT_LOG_ENABLE=1
for ((j = 1 ; j <= $PROFILING_RUN_CNT ; j++)); do
# Save the verbose log of each run
LOG_FILE=$REPORT_MODEL_DIR/tflite_profiling_$j.txt
@@ -89,7 +89,7 @@ function run_benchmark_test()
# Save the exec_time.json of each run
cp "exec_time.json" $REPORT_MODEL_DIR/"exec_time_$j.json"
done
- unset NEURUN_LOG_ENABLE
+ unset ONERT_LOG_ENABLE
##################################################################################
diff --git a/tests/scripts/unittest.sh b/tests/scripts/unittest.sh
index 54a01913b..1a6c46229 100755
--- a/tests/scripts/unittest.sh
+++ b/tests/scripts/unittest.sh
@@ -29,8 +29,8 @@ get_gtest_option()
{
local output_option="--gtest_output=xml:$UNITTEST_REPORT_DIR/$TEST_BIN.xml"
local filter_option
- if [ -r "$UNITTEST_TEST_DIR/$TEST_BIN.skip" ]; then
- filter_option="--gtest_filter=-$(grep -v '#' "$UNITTEST_TEST_DIR/$TEST_BIN.skip" | tr '\n' ':')"
+ if [ -r "$TEST_BIN.skip" ]; then
+ filter_option="--gtest_filter=-$(grep -v '#' "$TEST_BIN.skip" | tr '\n' ':')"
fi
echo "$output_option $filter_option"
}
@@ -66,24 +66,34 @@ echo "Unittest start"
echo "============================================"
num_unittest=0
-for TEST_BIN in `ls $UNITTEST_TEST_DIR --hide=*.skip*`; do
+# Run all executables in unit test directory
+for TEST_BIN in `find $UNITTEST_TEST_DIR -maxdepth 1 -type f -executable`; do
num_unittest=$((num_unittest+1))
echo "============================================"
echo "Starting set $num_unittest: $TEST_BIN..."
echo "============================================"
TEMP_UNITTEST_RESULT=0
+ # This test requires test model installation
+ if [ "$(basename $TEST_BIN)" == "nnfw_api_gtest" ] && [ ! -d "${TEST_BIN}_models" ]; then
+ ONEAPI_TEST_MODEL_INSTALLER=$(dirname $BASH_SOURCE)/oneapi_test/install_oneapi_test_nnpackages.sh
+ $ONEAPI_TEST_MODEL_INSTALLER --install-dir ${TEST_BIN}_models
+ if [[ $? -ne 0 ]]; then
+ echo "FAILED : oneapi test model installation"
+ fi
+ fi
+
if [ "$UNITTEST_RUN_ALL" == "true" ]; then
- for TEST_LIST_VERBOSE_LINE in $($UNITTEST_TEST_DIR/$TEST_BIN --gtest_list_tests); do
+ for TEST_LIST_VERBOSE_LINE in $($TEST_BIN --gtest_list_tests); do
if [[ $TEST_LIST_VERBOSE_LINE == *\. ]]; then
TEST_LIST_CATEGORY=$TEST_LIST_VERBOSE_LINE
else
TEST_LIST_ITEM="$TEST_LIST_CATEGORY""$TEST_LIST_VERBOSE_LINE"
- $UNITTEST_TEST_DIR/$TEST_BIN --gtest_filter=$TEST_LIST_ITEM --gtest_output="xml:$UNITTEST_REPORT_DIR/$TEST_LIST_ITEM.xml"
+ $TEST_BIN --gtest_filter=$TEST_LIST_ITEM --gtest_output="xml:$UNITTEST_REPORT_DIR/$TEST_LIST_ITEM.xml"
fi
done
else
- $UNITTEST_TEST_DIR/$TEST_BIN $(get_gtest_option)
+ $TEST_BIN $(get_gtest_option)
TEMP_UNITTEST_RESULT=$?
fi
diff --git a/tests/tools/nnapi_test/src/nnapi_test.cc b/tests/tools/nnapi_test/src/nnapi_test.cc
index 799188f66..d925831f1 100644
--- a/tests/tools/nnapi_test/src/nnapi_test.cc
+++ b/tests/tools/nnapi_test/src/nnapi_test.cc
@@ -41,6 +41,12 @@ int main(const int argc, char **argv)
auto model = FlatBufferModel::BuildFromFile(filename, &error_reporter);
+ if (model == nullptr)
+ {
+ // error_reporter must have shown the error message already
+ return 1;
+ }
+
const nnfw::tflite::FlatBufferBuilder builder(*model);
try
diff --git a/tests/tools/nnpackage_run/CMakeLists.txt b/tests/tools/nnpackage_run/CMakeLists.txt
index 743df7f3c..7b0d874da 100644
--- a/tests/tools/nnpackage_run/CMakeLists.txt
+++ b/tests/tools/nnpackage_run/CMakeLists.txt
@@ -2,9 +2,9 @@ if(NOT BUILD_NNPACKAGE_RUN)
return()
endif(NOT BUILD_NNPACKAGE_RUN)
-if(NOT BUILD_NEURUN)
+if(NOT BUILD_ONERT)
return()
-endif(NOT BUILD_NEURUN)
+endif(NOT BUILD_ONERT)
find_package(HDF5 COMPONENTS CXX QUIET)
if(NOT HDF5_FOUND)
@@ -27,11 +27,11 @@ target_include_directories(nnpackage_run PRIVATE src)
target_include_directories(nnpackage_run PRIVATE ${Boost_INCLUDE_DIRS})
target_include_directories(nnpackage_run PRIVATE ${HDF5_INCLUDE_DIRS})
-target_link_libraries(nnpackage_run neurun_core neurun tflite_loader)
+target_link_libraries(nnpackage_run onert_core onert tflite_loader)
target_link_libraries(nnpackage_run tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_tflite)
target_link_libraries(nnpackage_run nnfw-dev)
-target_link_libraries(nnpackage_run boost_program_options boost_system boost_filesystem)
+target_link_libraries(nnpackage_run boost_program_options)
target_link_libraries(nnpackage_run ${HDF5_CXX_LIBRARIES})
-target_link_libraries(nnpackage_run nnfw_lib_benchmark)
+target_link_libraries(nnpackage_run nnfw_lib_benchmark)
install(TARGETS nnpackage_run DESTINATION bin)
diff --git a/tests/tools/nnpackage_run/src/args.cc b/tests/tools/nnpackage_run/src/args.cc
index 2233a5eaa..24bf0c137 100644
--- a/tests/tools/nnpackage_run/src/args.cc
+++ b/tests/tools/nnpackage_run/src/args.cc
@@ -17,7 +17,6 @@
#include "args.h"
#include <iostream>
-#include <boost/filesystem.hpp>
namespace nnpkg_run
{
@@ -31,11 +30,12 @@ Args::Args(const int argc, char **argv) noexcept
void Args::Initialize(void)
{
// General options
- po::options_description general("General options");
+ po::options_description general("General options", 100);
// clang-format off
general.add_options()
- ("help,h", "Display available options")
+ ("help,h", "Print available options")
+ ("version", "Print version and exit immediately")
("nnpackage", po::value<std::string>()->required())
("dump,d", po::value<std::string>()->default_value(""), "Output filename")
("load,l", po::value<std::string>()->default_value(""), "Input filename")
@@ -43,7 +43,11 @@ void Args::Initialize(void)
("warmup_runs,w", po::value<int>()->default_value(0), "The number of warmup runs")
("gpumem_poll,g", po::value<bool>()->default_value(false), "Check gpu memory polling separately")
("mem_poll,m", po::value<bool>()->default_value(false), "Check memory polling")
- ("write_report,p", po::value<bool>()->default_value(false), "Write report")
+ ("write_report,p", po::value<bool>()->default_value(false),
+ "Write report\n"
+ "{exec}-{nnpkg}-{backend}.csv will be generated.\n"
+ "e.g. nnpackage_run-UNIT_Add_000-acl_cl.csv.\n"
+ "{nnpkg} name may be changed to realpath if you use symbolic-link.")
;
// clang-format on
@@ -77,6 +81,12 @@ void Args::Parse(const int argc, char **argv)
exit(0);
}
+ if (vm.count("version"))
+ {
+ _print_version = true;
+ return;
+ }
+
po::notify(vm);
if (vm.count("dump"))
@@ -103,7 +113,7 @@ void Args::Parse(const int argc, char **argv)
}
else
{
- if (!boost::filesystem::exists(_package_filename))
+ if (access(_package_filename.c_str(), F_OK) == -1)
{
std::cerr << "nnpackage not found: " << _package_filename << "\n";
}
diff --git a/tests/tools/nnpackage_run/src/args.h b/tests/tools/nnpackage_run/src/args.h
index 7e1d50e21..3569c46a3 100644
--- a/tests/tools/nnpackage_run/src/args.h
+++ b/tests/tools/nnpackage_run/src/args.h
@@ -39,6 +39,7 @@ public:
const bool getGpuMemoryPoll(void) const { return _gpumem_poll; }
const bool getMemoryPoll(void) const { return _mem_poll; }
const bool getWriteReport(void) const { return _write_report; }
+ const bool printVersion(void) const { return _print_version; }
private:
void Initialize();
@@ -56,6 +57,7 @@ private:
bool _gpumem_poll;
bool _mem_poll;
bool _write_report;
+ bool _print_version = false;
};
} // end of namespace nnpkg_run
diff --git a/tests/tools/nnpackage_run/src/nnpackage_run.cc b/tests/tools/nnpackage_run/src/nnpackage_run.cc
index ec259fc93..fc1a469a7 100644
--- a/tests/tools/nnpackage_run/src/nnpackage_run.cc
+++ b/tests/tools/nnpackage_run/src/nnpackage_run.cc
@@ -31,8 +31,6 @@
#include <unordered_map>
#include <vector>
-#include <boost/filesystem.hpp>
-
namespace nnpkg_run
{
@@ -72,6 +70,14 @@ int main(const int argc, char **argv)
using namespace nnpkg_run;
Args args(argc, argv);
auto nnpackage_path = args.getPackageFilename();
+ if (args.printVersion())
+ {
+ uint32_t version;
+ NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
+ std::cout << "nnpkg_run (nnfw runtime: v" << (version >> 24) << "."
+ << ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
+ exit(0);
+ }
std::unique_ptr<benchmark::MemoryPoller> mp{nullptr};
if (args.getMemoryPoll())
@@ -180,6 +186,9 @@ int main(const int argc, char **argv)
case NNFW_TYPE_TENSOR_UINT8:
randomData<uint8_t>(randgen, inputs[i].data(), num_elems(&ti));
break;
+ case NNFW_TYPE_TENSOR_INT32:
+ randomData<int32_t>(randgen, inputs[i].data(), num_elems(&ti));
+ break;
default:
std::cerr << "Not supported input type" << std::endl;
std::exit(-1);
@@ -259,27 +268,27 @@ int main(const int argc, char **argv)
return 0;
// prepare csv task
- std::string exec_name;
- std::string model_name;
+ std::string exec_basename;
+ std::string nnpkg_basename;
std::string backend_name = (available_backends) ? available_backends : default_backend_cand;
{
- namespace fs = boost::filesystem;
-
- fs::path model_path(nnpackage_path);
-
- // if model_path has a slash as tail such as "abc/",
- if (model_path.filename().string() == ".")
+ // I don't use PATH_MAX since it is not guaranteed value.
+ // Instead, I've chosen smaller size than linux default 4096.
+ char buf[1024];
+ char *res = realpath(nnpackage_path.c_str(), buf);
+ if (res)
{
- model_path = model_path.remove_filename();
+ nnpkg_basename = basename(buf);
}
-
- model_name = model_path.filename().string();
-
- fs::path exec_path(argv[0]);
- exec_name = exec_path.stem().string();
+ else
+ {
+ std::cerr << "E: during getting realpath from nnpackage_path." << std::endl;
+ exit(-1);
+ }
+ exec_basename = basename(argv[0]);
}
- benchmark::writeResult(result, exec_name, model_name, backend_name);
+ benchmark::writeResult(result, exec_basename, nnpkg_basename, backend_name);
return 0;
}
diff --git a/tests/tools/tflite_benchmark/src/tflite_benchmark.cc b/tests/tools/tflite_benchmark/src/tflite_benchmark.cc
index d52ef0163..783a5cb2a 100644
--- a/tests/tools/tflite_benchmark/src/tflite_benchmark.cc
+++ b/tests/tools/tflite_benchmark/src/tflite_benchmark.cc
@@ -25,6 +25,7 @@
#include "misc/tensor/IndexIterator.h"
#include <chrono>
+#include <cmath>
#include <iostream>
#include <thread>
@@ -73,17 +74,27 @@ public:
{
++_num;
_sum += val;
+ _log_sum += std::log(val);
_min = std::min(_min, val);
_max = std::max(_max, val);
}
T mean() const { return _sum / static_cast<T>(_num); }
+ // Calculating geometric mean with logs
+ // "Geometric Mean of (V1, V2, ... Vn)"
+ // = (V1*V2*...*Vn)^(1/n)
+ // = exp(log((V1*V2*...*Vn)^(1/n)))
+ // = exp(log((V1*V2*...*Vn)/n)))
+ // = exp((log(V1) + log(V2) + ... + log(Vn))/n)
+ // = exp(_log_sum/num)
+ T geomean() const { return std::exp(_log_sum / static_cast<T>(_num)); }
T min() const { return _min; }
T max() const { return _max; }
private:
uint32_t _num = 0u;
T _sum = 0.0;
+ T _log_sum = 0.0;
T _min = std::numeric_limits<T>::max();
T _max = std::numeric_limits<T>::lowest();
};
@@ -118,6 +129,7 @@ int main(const int argc, char **argv)
const bool use_nnapi = nnfw::misc::EnvVar("USE_NNAPI").asBool(false);
const auto thread_count = nnfw::misc::EnvVar("THREAD").asInt(-1);
const auto pause = nnfw::misc::EnvVar("PAUSE").asInt(0);
+ const auto microsec = nnfw::misc::EnvVar("MICROSEC").asBool(0);
std::cout << "Num threads: " << thread_count << std::endl;
if (use_nnapi)
@@ -131,6 +143,18 @@ int main(const int argc, char **argv)
std::cout << "Insert " << pause << "s pause between iterations" << std::endl;
}
+ struct TimeUnit
+ {
+ const char *str;
+ std::function<int64_t(int64_t)> conv;
+ } tu = {"ms", [](int64_t v) { return v / 1000; }};
+
+ if (microsec)
+ {
+ tu.str = "us";
+ tu.conv = [](int64_t v) { return v; };
+ }
+
StderrReporter error_reporter;
std::unique_ptr<tflite::TfLiteVerifier> verifier{new BMFlatBufferVerifier};
@@ -198,7 +222,7 @@ int main(const int argc, char **argv)
//
for (uint32_t n = 0; n < 3; ++n)
{
- std::chrono::milliseconds elapsed(0);
+ std::chrono::microseconds elapsed(0);
sess->prepare();
@@ -256,7 +280,7 @@ int main(const int argc, char **argv)
};
sess->teardown();
- std::cout << "Warming-up " << n << ": " << elapsed.count() << "ms" << std::endl;
+ std::cout << "Warming-up " << n << ": " << tu.conv(elapsed.count()) << tu.str << std::endl;
}
//
@@ -268,7 +292,7 @@ int main(const int argc, char **argv)
for (int n = 0; n < cnt; ++n)
{
- std::chrono::milliseconds elapsed(0);
+ std::chrono::microseconds elapsed(0);
sess->prepare();
nnfw::misc::benchmark::measure(elapsed) << [&](void) {
@@ -281,7 +305,7 @@ int main(const int argc, char **argv)
acc(elapsed.count());
- std::cout << "Iteration " << n << ": " << elapsed.count() << "ms" << std::endl;
+ std::cout << "Iteration " << n << ": " << tu.conv(elapsed.count()) << tu.str << std::endl;
// Insert "pause"
if ((n != cnt - 1) && (pause > 0))
@@ -290,10 +314,16 @@ int main(const int argc, char **argv)
}
}
+ auto v_min = tu.conv(acc.min());
+ auto v_max = tu.conv(acc.max());
+ auto v_mean = tu.conv(acc.mean());
+ auto v_geomean = tu.conv(acc.geomean());
+
std::cout << "--------" << std::endl;
- std::cout << "Min: " << acc.min() << "ms" << std::endl;
- std::cout << "Max: " << acc.max() << "ms" << std::endl;
- std::cout << "Mean: " << acc.mean() << "ms" << std::endl;
+ std::cout << "Min: " << v_min << tu.str << std::endl;
+ std::cout << "Max: " << v_max << tu.str << std::endl;
+ std::cout << "Mean: " << v_mean << tu.str << std::endl;
+ std::cout << "GeoMean: " << v_geomean << tu.str << std::endl;
return 0;
}
diff --git a/tests/tools/tflite_loader/CMakeLists.txt b/tests/tools/tflite_loader/CMakeLists.txt
index d17f35979..ece39161f 100644
--- a/tests/tools/tflite_loader/CMakeLists.txt
+++ b/tests/tools/tflite_loader/CMakeLists.txt
@@ -3,10 +3,10 @@ if(NOT BUILD_TFLITE_LOADER_TEST_TOOL)
return()
endif(NOT BUILD_TFLITE_LOADER_TEST_TOOL)
-if(NOT BUILD_NEURUN)
- message("skipping tflite loader tool build: neurun is not built")
+if(NOT BUILD_ONERT)
+ message("skipping tflite loader tool build: onert is not built")
return()
-endif(NOT BUILD_NEURUN)
+endif(NOT BUILD_ONERT)
list(APPEND SOURCES "src/tflite_loader.cc")
list(APPEND SOURCES "src/args.cc")
@@ -16,7 +16,7 @@ nnas_find_package(Boost REQUIRED)
add_executable(tflite_loader_test_tool ${SOURCES})
target_include_directories(tflite_loader_test_tool PRIVATE ${Boost_INCLUDE_DIRS})
-target_link_libraries(tflite_loader_test_tool neurun_core neurun tflite_loader)
+target_link_libraries(tflite_loader_test_tool onert_core onert tflite_loader)
target_link_libraries(tflite_loader_test_tool nnfw_lib_tflite tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_misc)
target_link_libraries(tflite_loader_test_tool boost_program_options boost_system boost_filesystem)
diff --git a/tests/tools/tflite_loader/src/tflite_loader.cc b/tests/tools/tflite_loader/src/tflite_loader.cc
index 3805431ff..1270eb660 100644
--- a/tests/tools/tflite_loader/src/tflite_loader.cc
+++ b/tests/tools/tflite_loader/src/tflite_loader.cc
@@ -31,7 +31,7 @@
#include "tflite_loader.h"
-#include "cpp14/memory.h"
+#include <memory>
const int RUN_FAILED = 1;
@@ -73,11 +73,11 @@ std::vector<float> randomData(RandomGenerator &randgen, const uint64_t size)
return vec;
}
-void executeGraph(const std::shared_ptr<neurun::ir::Graph> &g,
+void executeGraph(const std::shared_ptr<onert::ir::Graph> &g,
const std::vector<std::vector<float>> &inputs,
std::vector<std::vector<float>> &outputs)
{
- auto compiler = new neurun::compiler::Compiler(g);
+ auto compiler = new onert::compiler::Compiler(g);
// Compilation
try
{
@@ -92,9 +92,9 @@ void executeGraph(const std::shared_ptr<neurun::ir::Graph> &g,
std::cout << "[Execution] Graph compiled!" << std::endl;
- std::shared_ptr<neurun::exec::IExecutor> executor;
+ std::shared_ptr<onert::exec::IExecutor> executor;
compiler->release(executor);
- auto execution = std::make_shared<neurun::exec::Execution>(executor);
+ auto execution = std::make_shared<onert::exec::Execution>(executor);
// Setting IO
try
@@ -119,10 +119,10 @@ void executeGraph(const std::shared_ptr<neurun::ir::Graph> &g,
}
for (size_t i = 0; i < num_inputs; i++)
- execution->setInput(neurun::ir::IOIndex(i), inputs[i].data(),
+ execution->setInput(onert::ir::IOIndex(i), inputs[i].data(),
inputs[i].size() * sizeof(float));
for (uint32_t i = 0; i < num_outputs; i++)
- execution->setOutput(neurun::ir::IOIndex(i), outputs[i].data(),
+ execution->setOutput(onert::ir::IOIndex(i), outputs[i].data(),
outputs[i].size() * sizeof(float));
}
catch (const std::exception &e)
@@ -162,11 +162,11 @@ int main(const int argc, char **argv)
}
std::cout << "[Execution] Stage start!" << std::endl;
- std::shared_ptr<neurun::ir::Graph> test_graph;
+ std::shared_ptr<onert::ir::Graph> test_graph;
// Loading
try
{
- test_graph = neurun::tflite_loader::loadModel(tflite_file.c_str());
+ test_graph = onert::tflite_loader::loadModel(tflite_file.c_str());
}
catch (std::exception &e)
{
@@ -180,12 +180,12 @@ int main(const int argc, char **argv)
for (const auto &input_idx : test_graph->getInputs())
{
const auto input_type = test_graph->operands().at(input_idx).typeInfo().type();
- assert(input_type == neurun::ir::DataType::FLOAT32 && "Only FLOAT32 inputs are supported");
+ assert(input_type == onert::ir::DataType::FLOAT32 && "Only FLOAT32 inputs are supported");
}
for (const auto &output_idx : test_graph->getOutputs())
{
const auto output_type = test_graph->operands().at(output_idx).typeInfo().type();
- assert(output_type == neurun::ir::DataType::FLOAT32 && "Only FLOAT32 outputs are supported");
+ assert(output_type == onert::ir::DataType::FLOAT32 && "Only FLOAT32 outputs are supported");
}
std::cout << "[Execution] Model is deserialized!" << std::endl;
diff --git a/tests/tools/tflite_run/CMakeLists.txt b/tests/tools/tflite_run/CMakeLists.txt
index 14c378f8e..57880454a 100644
--- a/tests/tools/tflite_run/CMakeLists.txt
+++ b/tests/tools/tflite_run/CMakeLists.txt
@@ -14,7 +14,7 @@ target_include_directories(tflite_run PRIVATE src)
target_include_directories(tflite_run PRIVATE ${Boost_INCLUDE_DIRS})
target_link_libraries(tflite_run tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_tflite)
-target_link_libraries(tflite_run boost_program_options boost_system boost_filesystem)
+target_link_libraries(tflite_run boost_program_options)
target_link_libraries(tflite_run nnfw_lib_benchmark)
install(TARGETS tflite_run DESTINATION bin)
diff --git a/tests/tools/tflite_run/src/args.cc b/tests/tools/tflite_run/src/args.cc
index 4ec94661a..f101e586c 100644
--- a/tests/tools/tflite_run/src/args.cc
+++ b/tests/tools/tflite_run/src/args.cc
@@ -18,8 +18,6 @@
#include <iostream>
-#include <boost/filesystem.hpp>
-
namespace TFLiteRun
{
@@ -101,7 +99,7 @@ void Args::Parse(const int argc, char **argv)
if (!_input_filename.empty())
{
- if (!boost::filesystem::exists(_input_filename))
+ if (access(_input_filename.c_str(), F_OK) == -1)
{
std::cerr << "input image file not found: " << _input_filename << "\n";
}
@@ -131,9 +129,10 @@ void Args::Parse(const int argc, char **argv)
}
else
{
- if (!boost::filesystem::exists(_tflite_filename))
+ if (access(_tflite_filename.c_str(), F_OK) == -1)
{
std::cerr << "tflite file not found: " << _tflite_filename << "\n";
+ exit(1);
}
}
}
diff --git a/tests/tools/tflite_run/src/tensor_loader.cc b/tests/tools/tflite_run/src/tensor_loader.cc
index de605bacf..93d9e2f54 100644
--- a/tests/tools/tflite_run/src/tensor_loader.cc
+++ b/tests/tools/tflite_run/src/tensor_loader.cc
@@ -1,3 +1,19 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
#include "tensor_loader.h"
#include <assert.h>
diff --git a/tests/tools/tflite_run/src/tensor_loader.h b/tests/tools/tflite_run/src/tensor_loader.h
index 2e671aa8a..ef51e0fd4 100644
--- a/tests/tools/tflite_run/src/tensor_loader.h
+++ b/tests/tools/tflite_run/src/tensor_loader.h
@@ -1,3 +1,19 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
#ifndef __TFLITE_RUN_TENSOR_LOADER_H__
#define __TFLITE_RUN_TENSOR_LOADER_H__
diff --git a/tests/tools/tflite_run/src/tflite_run.cc b/tests/tools/tflite_run/src/tflite_run.cc
index 107aed757..cedcff82a 100644
--- a/tests/tools/tflite_run/src/tflite_run.cc
+++ b/tests/tools/tflite_run/src/tflite_run.cc
@@ -37,8 +37,6 @@
#include <algorithm>
#include <vector>
-#include <boost/filesystem.hpp>
-
using namespace tflite;
using namespace nnfw::tflite;
using namespace std::placeholders; // for _1, _2 ...
@@ -58,12 +56,7 @@ static const char *default_backend_cand = "tflite_cpu";
int main(const int argc, char **argv)
{
- bool use_nnapi = false;
-
- if (std::getenv("USE_NNAPI") != nullptr)
- {
- use_nnapi = true;
- }
+ const bool use_nnapi = nnfw::misc::EnvVar("USE_NNAPI").asBool(false);
StderrReporter error_reporter;
@@ -327,27 +320,14 @@ int main(const int argc, char **argv)
if (args.getWriteReport())
{
// prepare csv task
- std::string exec_name;
- std::string model_name;
+ std::string exec_basename;
+ std::string model_basename;
std::string backend_name = default_backend_cand;
{
- namespace fs = boost::filesystem;
-
- fs::path model_path(args.getTFLiteFilename());
- model_name = model_path.stem().string();
-
- fs::path exec_path(argv[0]);
- exec_name = exec_path.stem().string();
+ model_basename = basename(args.getTFLiteFilename().c_str());
+ exec_basename = basename(argv[0]);
}
- benchmark::writeResult(result, exec_name, model_name, backend_name);
- }
-
- if (!args.getDumpFilename().empty())
- {
- const std::string &dump_filename = args.getDumpFilename();
- tensor_dumper.dump(dump_filename);
- std::cout << "Input/output tensors have been dumped to file \"" << dump_filename << "\"."
- << std::endl;
+ benchmark::writeResult(result, exec_basename, model_basename, backend_name);
}
if (!args.getDumpFilename().empty())
diff --git a/tools/cross/build_android_ndk.sh b/tools/cross/build_android_ndk.sh
index dd68f50ad..e3e86f4b0 100755
--- a/tools/cross/build_android_ndk.sh
+++ b/tools/cross/build_android_ndk.sh
@@ -36,7 +36,7 @@ do
usage
exit 1
;;
- --ndk-version=*)
+ --ndk-version)
__NDKVersion="$2"
shift
;;
diff --git a/tools/cross/install_android_sdk.sh b/tools/cross/install_android_sdk.sh
new file mode 100755
index 000000000..0cefa4ad1
--- /dev/null
+++ b/tools/cross/install_android_sdk.sh
@@ -0,0 +1,167 @@
+#!/usr/bin/env bash
+#
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Setup Android Cross-Build Environment (ANDROID SDK)
+SCRIPT_HOME=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) # absolute path to directory where script is
+INSTALL_PATH=$SCRIPT_HOME/android_sdk # path to directory where android sdk will be installed
+PLATFORMS_PACKAGE_VERSION="28" # version of platfroms package which will be installed
+BUILD_TOOLS_PACKAGE_VERSION="28.0.0" # version of build-tools package which will be installed
+COMMAND_LINE_TOOLS_ARCHIVE="commandlinetools-linux-6200805_latest.zip" # command line tools archive name from site https://developer.android.com/studio/#downloads
+
+
+usage() {
+ printf "usage: ./build_android_sdk.sh [--option [option_value]]\n"
+ printf " --install-dir - absolute path to directory where android sdk will be installed, by default: $INSTALL_PATH\n"
+ printf " --platforms-package-version - version of platforms package which will be installed, by default: $PLATFORMS_PACKAGE_VERSION\n"
+ printf " --build-tools-package-version - version of build-tools package which will be installed, by default: $BUILD_TOOLS_PACKAGE_VERSION\n"
+ printf " --command-line-tools-archive - name of command line tools archive from site https://developer.android.com/studio/#downloads, by default: $COMMAND_LINE_TOOLS_ARCHIVE\n"
+ printf " --help - show this text\n"
+}
+
+check_that_available() {
+ local util_name=${1}
+ local possible_util_alias=${2}
+ if ! [ -x "$(command -v $util_name)" ]; then
+ printf "ERROR: this script uses $util_name utility, "
+ printf "please install it and repeat try (e.g. for ubuntu execute command: sudo apt install $possible_util_alias)"
+ exit 1
+ fi
+}
+
+check_preconditions() {
+ check_that_available wget wget
+ check_that_available unzip unzip
+ check_that_available java default-jdk
+}
+
+check_that_android_sdk_have_not_been_installed_yet() {
+ local root=${1}
+
+ if [ -d $root ]; then
+ echo "Directory '$root', where android sdk should be installed, exists. Please remove it or define another path"
+ exit 1
+ fi
+}
+
+make_environment() {
+ local root=${1}
+ check_that_android_sdk_have_not_been_installed_yet $root
+ mkdir -p $root
+
+ pushd $root
+ export ANDROID_HOME=$root
+ export PATH=$PATH:/$ANDROID_HOME/tools/bin
+ export PATH=$PATH:/$ANDROID_HOME/platform-tools
+ export JAVA_OPTS='-XX:+IgnoreUnrecognizedVMOptions'
+ popd
+}
+
+download_command_line_tools() {
+ local root=${1}
+ local download_url=https://dl.google.com/android/repository
+
+ pushd $root
+ wget $download_url/$COMMAND_LINE_TOOLS_ARCHIVE
+ if [ ${?} -ne 0 ]; then
+ echo "seems like '$COMMAND_LINE_TOOLS_ARCHIVE' not found. Please, go to https://developer.android.com/studio/#downloads "
+ echo "and check name and version of command line tools archive for linux in the table 'Command line tools only' "
+ echo "and put it as --command-line-tools-archive-name parameter value"
+ exit 1
+ fi
+ popd
+}
+
+extract_command_line_tools() {
+ local root=${1}
+
+ pushd $root
+ unzip $COMMAND_LINE_TOOLS_ARCHIVE
+ rm $COMMAND_LINE_TOOLS_ARCHIVE
+ popd
+}
+
+install_command_line_tools() {
+ local root=${1}
+
+ download_command_line_tools $root
+ extract_command_line_tools $root
+}
+
+check_that_given_version_of_package_available() {
+ # this function assumes that current working directory is directory where android sdk will be installed
+ local package_base_name=${1}
+ local package_version=${2}
+
+ sdkmanager --sdk_root=${ANDROID_HOME} --list | grep ${package_base_name}${package_version} > /dev/null
+ if [ ${?} -ne 0 ]; then
+ echo "package '${package_base_name}${package_version}' is not available"
+ exit 1
+ fi
+}
+
+install_android_sdk() {
+ local root=${1}
+
+ pushd $root
+ yes | sdkmanager --sdk_root=${ANDROID_HOME} --licenses > /dev/null
+ check_that_given_version_of_package_available "platforms;android-" ${PLATFORMS_PACKAGE_VERSION}
+ check_that_given_version_of_package_available "build-tools;" ${BUILD_TOOLS_PACKAGE_VERSION}
+ sdkmanager --sdk_root=${ANDROID_HOME} "platform-tools"
+ sdkmanager --sdk_root=${ANDROID_HOME} "platforms;android-$PLATFORMS_PACKAGE_VERSION"
+ sdkmanager --sdk_root=${ANDROID_HOME} "build-tools;$BUILD_TOOLS_PACKAGE_VERSION"
+ popd
+}
+
+
+while [[ $# -gt 0 ]]; do
+ key="$(echo $1 | awk '{print tolower($0)}')"
+ case "$key" in
+ --help)
+ usage
+ exit 0
+ ;;
+ --install-dir)
+ shift
+ INSTALL_PATH=${1}
+ shift
+ ;;
+ --platforms-package-version)
+ shift
+ PLATFORMS_PACKAGE_VERSION=${1}
+ shift
+ ;;
+ --build-tools-package-version)
+ shift
+ BUILD_TOOLS_PACKAGE_VERSION=${1}
+ shift
+ ;;
+ --command-line-tools-archive)
+ shift
+ COMMAND_LINE_TOOLS_ARCHIVE=${1}
+ shift
+ ;;
+ *)
+ echo "Invalid option '$1'"
+ usage
+ exit 1
+ ;;
+ esac
+done
+
+check_preconditions
+make_environment $INSTALL_PATH
+install_command_line_tools $INSTALL_PATH
+install_android_sdk $INSTALL_PATH
diff --git a/tools/kernel_report/kernel_report.py b/tools/kernel_report/kernel_report.py
new file mode 100755
index 000000000..0be5b82d5
--- /dev/null
+++ b/tools/kernel_report/kernel_report.py
@@ -0,0 +1,171 @@
+#!/usr/bin/python
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import argparse
+
+
+class Backend:
+ def __init__(self):
+ self.backends = {}
+ self.backends["acl_cl"] = False
+ self.backends["acl_neon"] = False
+ self.backends["cpu"] = False
+ self.backends["srcn"] = False
+
+
+class KernelReporter(object):
+ def __init__(self, args):
+ # TODO: Remove os defendency - '/'
+ if args.base[0] != '/':
+ self.onertBase = os.getcwd() + '/' + args.base
+ else:
+ self.onertBase = args.base
+ self.opListFile = "core/include/ir/Operations.lst"
+ self.operations = []
+ self.kernelGeneratorFile = "KernelGenerator.h"
+ self.kernelMap = {}
+
+ def parseOpList(self):
+ #buf = open(self.onertBase + '/' + self.opListFile, "r")
+
+ # Parsing line and get op list
+ skipLine = False
+ for line in open(self.onertBase + '/' + self.opListFile, "r"):
+ # Skip license
+ # TODO : Change to skip general comment
+ if skipLine:
+ if line.startswith(" */"):
+ skipLine = False
+ continue
+ continue
+ if line.startswith("/*"):
+ skipLine = True
+ continue
+
+ # Skip comment
+ if line.startswith("//"):
+ continue
+
+ # Skip macro
+ if line.startswith("#"):
+ continue
+
+ lineStripped = line.strip()
+ if len(lineStripped) == 0:
+ continue
+
+ op = lineStripped[3:-1]
+ self.operations.append(op)
+ self.operations.sort()
+
+ def generateKernelMap(self):
+ for op in self.operations:
+ self.kernelMap[op] = Backend()
+
+ backendLists = ["acl_cl", "acl_neon", "cpu", "srcn"]
+
+ for backend in backendLists:
+ buf = open(
+ self.onertBase + '/backend/' + backend + '/' + self.kernelGeneratorFile,
+ "r")
+
+ for line in buf:
+ words = line.split()
+ if len(words) < 3:
+ continue
+ if words[1] != "visit(const":
+ continue
+
+ opName = words[2].split("::")
+ if len(opName) < 3:
+ continue
+
+ if opName[2] in self.operations:
+ self.kernelMap[opName[2]].backends[backend] = True
+
+ buf.close()
+
+ def printResult(self):
+ print()
+ backendLists = ["acl_cl", "acl_neon", "cpu", "srcn"]
+ line = ""
+ for backend in backendLists:
+ line = line + "{0:^9}".format(backend)
+ print('{0:30}{1}'.format("", line))
+
+ counts = [0, 0, 0, 0]
+
+ for op in self.operations:
+ line = ""
+ for i in range(0, 4, 1):
+ support = self.kernelMap[op].backends[backendLists[i]]
+ if support:
+ line = line + "{0:^9}".format("O")
+ counts[i] += 1
+ else:
+ line = line + "{0:^9}".format("-")
+ print('{0:30}{1}'.format(op, line))
+
+ line = ""
+ for count in counts:
+ line = line + "{0:^9}".format(count)
+ print('{0:30}{1}'.format("TOTAL COUNT", line))
+
+ def printMDFormat(self):
+ print()
+ backendLists = ["acl_cl", "acl_neon", "cpu", "srcn"]
+ line = ""
+ for backend in backendLists:
+ line = line + "|" + backend
+ print("|" + line)
+ print("-|-|-|-|-")
+
+ counts = [0, 0, 0, 0]
+
+ for op in self.operations:
+ line = ""
+ for i in range(0, 4, 1):
+ support = self.kernelMap[op].backends[backendLists[i]]
+ if support:
+ line = line + "|" + "O"
+ counts[i] += 1
+ else:
+ line = line + "|" + "-"
+ print(op + line)
+
+ line = ""
+ for count in counts:
+ line = line + "|" + str(count)
+
+ print("-|-|-|-|-")
+ print("TOTAL COUNT" + line)
+
+ def run(self):
+ self.parseOpList()
+ self.generateKernelMap()
+ self.printResult()
+
+ self.printMDFormat()
+
+
+if __name__ == '__main__':
+ arg_parser = argparse.ArgumentParser()
+ arg_parser.add_argument("base", type=str, help="onert base directory")
+ args = arg_parser.parse_args()
+
+ report = KernelReporter(args)
+ report.run()
diff --git a/tools/nnpackage_tool/nnpkg_test/list.armv7l-linux.acl_cl b/tools/nnpackage_tool/nnpkg_test/list.armv7l-linux.acl_cl
index a0d44d3e7..866f734e0 100644
--- a/tools/nnpackage_tool/nnpkg_test/list.armv7l-linux.acl_cl
+++ b/tools/nnpackage_tool/nnpkg_test/list.armv7l-linux.acl_cl
@@ -1,6 +1,6 @@
NET_000[0-5,7-9]
NET_001[0,2-9]
-NET_002[1,4-9]
+NET_002[0-2,4-9]
NET_003[0-9]
NET_004[0-9]
UNIT_Add_*
diff --git a/tools/nnpackage_tool/nnpkg_test/list.armv7l-linux.acl_neon b/tools/nnpackage_tool/nnpkg_test/list.armv7l-linux.acl_neon
index a0d44d3e7..7e2371b26 100644
--- a/tools/nnpackage_tool/nnpkg_test/list.armv7l-linux.acl_neon
+++ b/tools/nnpackage_tool/nnpkg_test/list.armv7l-linux.acl_neon
@@ -1,6 +1,6 @@
NET_000[0-5,7-9]
-NET_001[0,2-9]
-NET_002[1,4-9]
+NET_001[0-9]
+NET_002[0-2,4-9]
NET_003[0-9]
NET_004[0-9]
UNIT_Add_*
diff --git a/tools/nnpackage_tool/nnpkg_test/list.armv7l-linux.cpu b/tools/nnpackage_tool/nnpkg_test/list.armv7l-linux.cpu
index 47d1134d4..cd765a465 100644
--- a/tools/nnpackage_tool/nnpkg_test/list.armv7l-linux.cpu
+++ b/tools/nnpackage_tool/nnpkg_test/list.armv7l-linux.cpu
@@ -1,17 +1,17 @@
-NET_000[0-4,7-9]
-NET_001[1-3,6-9]
-NET_002[1-2,7,9]
-NET_003[0-3,5-9]
-NET_0040
-UNIT_Add_000
+NET_000[0-4,6-9]
+NET_001[0-3,6-9]
+NET_002[0-2,4-7,9]
+NET_003[0-9]
+NET_004[0-9]
+UNIT_Add_*
UNIT_AvgPool_*
UNIT_ConcatV2_*
UNIT_Conv2D_*
UNIT_DepthwiseConv2dNative_*
UNIT_MaxPool_*
-UNIT_Mul_000
+UNIT_Mul_*
UNIT_Pad_*
UNIT_Reshape_*
UNIT_Softmax_*
UNIT_Squeeze_*
-UNIT_Sub_000
+UNIT_Sub_*
diff --git a/tools/nnpackage_tool/nnpkg_test/list.noarch.interp b/tools/nnpackage_tool/nnpkg_test/list.noarch.interp
index e83a908f9..cb865da84 100644
--- a/tools/nnpackage_tool/nnpkg_test/list.noarch.interp
+++ b/tools/nnpackage_tool/nnpkg_test/list.noarch.interp
@@ -1,6 +1,6 @@
-NET_000[0-4,7-9]
-NET_001[0-3,6-9]
-NET_002[1,4-9]
+NET_000[0-9]
+NET_001[0-4,6-9]
+NET_002[0-2,4-9]
NET_003[0-9]
NET_004[0-9]
UNIT_Add_*
diff --git a/tools/nnpackage_tool/sth2nnpkgtc/tflite2nnpkgtc.md b/tools/nnpackage_tool/sth2nnpkgtc/tflite2nnpkgtc.md
index b8298f2ae..582dcd4c1 100644
--- a/tools/nnpackage_tool/sth2nnpkgtc/tflite2nnpkgtc.md
+++ b/tools/nnpackage_tool/sth2nnpkgtc/tflite2nnpkgtc.md
@@ -43,4 +43,4 @@ nnpkg-tcs/cast
# @ target
# run nnpkg with nnpackage_run and compare with h5diff
$ tools/nnpackage_tool/nnpkg_test/nnpkg_test.sh -i nnpkg-tcs cast
-``` \ No newline at end of file
+```
diff --git a/tools/nnpackage_tool/tf2tfliteV2/README.md b/tools/nnpackage_tool/tf2tfliteV2/README.md
new file mode 100644
index 000000000..836740a5c
--- /dev/null
+++ b/tools/nnpackage_tool/tf2tfliteV2/README.md
@@ -0,0 +1,47 @@
+# tf2tfliteV2
+
+_tf2tfliteV2_ is a TensorFlow to TensorFlow Lite model Converter.
+
+## Where does V2 come from?
+Even though we alreay have _tf2tflite_, we cannot cover all opeartors in TensorFlow. To expand coverage, we introduce _tf2tfliteV2_ which uses `TensorFlow Lite Converter`(by Google) internally.
+
+## Prerequisite
+- Frozen graph from TensorFlow 1.13.1
+- Desired version of TensorFlow(You can use python virtualenv, docker, etc.)
+
+## Example
+```
+python tf2tfliteV2.py \
+> --v1 \
+> --input_path=frozen_graph.pb \
+> --output_path=converted.tflite \
+> --input_arrays=model_inputs \
+> --output_arrays=model_outputs
+
+```
+```
+python tf2tfliteV2.py \
+> --v2 \
+> --input_path=frozen_graph.pb \
+> --output_path=converted.tflite \
+> --input_arrays=model_inputs \
+> --output_arrays=model_outputs
+```
+
+## optional argument
+```
+ -h, --help show this help message and exit
+ --v1 Use TensorFlow Lite Converter 1.x
+ --v2 Use TensorFlow Lite Converter 2.x
+ --input_path INPUT_PATH
+ Full filepath of the input file.
+ --output_path OUTPUT_PATH
+ Full filepath of the output file.
+ --input_arrays INPUT_ARRAYS
+ Names of the input arrays, comma-separated.
+ --input_shapes INPUT_SHAPES
+ Shapes corresponding to --input_arrays, colon-
+ separated.
+ --output_arrays OUTPUT_ARRAYS
+ Names of the output arrays, comma-separated.
+```
diff --git a/tools/nnpackage_tool/tf2tfliteV2/tf2tfliteV2.py b/tools/nnpackage_tool/tf2tfliteV2/tf2tfliteV2.py
new file mode 100755
index 000000000..ebd5a3afa
--- /dev/null
+++ b/tools/nnpackage_tool/tf2tfliteV2/tf2tfliteV2.py
@@ -0,0 +1,173 @@
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+# Copyright (C) 2018 The TensorFlow Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tensorflow as tf
+import argparse
+import sys
+
+
+def wrap_frozen_graph(graph_def, inputs, outputs):
+ def _imports_graph_def():
+ tf.compat.v1.import_graph_def(graph_def, name="")
+
+ wrapped_import = tf.compat.v1.wrap_function(_imports_graph_def, [])
+ import_graph = wrapped_import.graph
+ return wrapped_import.prune(
+ tf.nest.map_structure(import_graph.as_graph_element, inputs),
+ tf.nest.map_structure(import_graph.as_graph_element, outputs))
+
+
+def _get_parser():
+ """
+ Returns an ArgumentParser for TensorFlow Lite Converter.
+ """
+ parser = argparse.ArgumentParser(
+ description=("Command line tool to run TensorFlow Lite Converter."))
+
+ # Converter version.
+ converter_version = parser.add_mutually_exclusive_group(required=True)
+ converter_version.add_argument(
+ "--v1", action="store_true", help="Use TensorFlow Lite Converter 1.x")
+ converter_version.add_argument(
+ "--v2", action="store_true", help="Use TensorFlow Lite Converter 2.x")
+
+ # Input and output path.
+ parser.add_argument(
+ "--input_path", type=str, help="Full filepath of the input file.", required=True)
+ parser.add_argument(
+ "--output_path",
+ type=str,
+ help="Full filepath of the output file.",
+ required=True)
+
+ # Input and output arrays.
+ parser.add_argument(
+ "--input_arrays",
+ type=str,
+ help="Names of the input arrays, comma-separated.",
+ required=True)
+ parser.add_argument(
+ "--input_shapes",
+ type=str,
+ help="Shapes corresponding to --input_arrays, colon-separated.")
+ parser.add_argument(
+ "--output_arrays",
+ type=str,
+ help="Names of the output arrays, comma-separated.",
+ required=True)
+
+ return parser
+
+
+def _check_flags(flags):
+ """
+ Checks the parsed flags to ensure they are valid.
+ """
+ if flags.v1:
+ invalid = ""
+ # To be filled
+
+ if invalid:
+ raise ValueError(invalid + " options must be used with v2")
+
+ if flags.v2:
+ if tf.__version__.find("2.") != 0:
+ raise ValueError(
+ "Imported TensorFlow should have version >= 2.0 but you have " +
+ tf.__version__)
+
+ invalid = ""
+ # To be filled
+
+ if invalid:
+ raise ValueError(invalid + " options must be used with v1")
+
+ if flags.input_shapes:
+ if not flags.input_arrays:
+ raise ValueError("--input_shapes must be used with --input_arrays")
+ if flags.input_shapes.count(":") != flags.input_arrays.count(","):
+ raise ValueError("--input_shapes and --input_arrays must have the same "
+ "number of items")
+
+
+def _parse_array(arrays, type_fn=str):
+ return list(map(type_fn, arrays.split(",")))
+
+
+def _v1_convert(flags):
+ input_shapes = None
+ if flags.input_shapes:
+ input_arrays = _parse_array(flags.input_arrays)
+ input_shapes_list = [
+ _parse_array(shape, type_fn=int) for shape in flags.input_shapes.split(":")
+ ]
+ input_shapes = dict(list(zip(input_arrays, input_shapes_list)))
+
+ converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(
+ flags.input_path, _parse_array(flags.input_arrays),
+ _parse_array(flags.output_arrays), input_shapes)
+
+ converter.allow_custom_ops = True
+
+ tflite_model = converter.convert()
+ open(flags.output_path, "wb").write(tflite_model)
+
+
+def _v2_convert(flags):
+ graph_def = tf.compat.v1.GraphDef()
+ graph_def.ParseFromString(open(flags.input_path, 'rb').read())
+
+ wrap_func = wrap_frozen_graph(
+ graph_def,
+ inputs=[_str + ":0" for _str in _parse_array(flags.input_arrays)],
+ # TODO What if multiple outputs come in?
+ outputs=[_str + ":0" for _str in _parse_array(flags.output_arrays)])
+ converter = tf.lite.TFLiteConverter.from_concrete_functions([wrap_func])
+
+ converter.allow_custom_ops = True
+ converter.experimental_new_converter = True
+
+ converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
+
+ tflite_model = converter.convert()
+ open(flags.output_path, "wb").write(tflite_model)
+
+
+def _convert(flags):
+ if (flags.v1):
+ _v1_convert(flags)
+ else:
+ _v2_convert(flags)
+
+
+"""
+Input frozen graph must be from TensorFlow 1.13.1
+"""
+
+
+def main():
+ # Parse argument.
+ parser = _get_parser()
+
+ # Check if the flags are valid.
+ flags = parser.parse_known_args(args=sys.argv[1:])
+ _check_flags(flags[0])
+
+ # Convert
+ _convert(flags[0])
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/nnpackage_tool/tflite2circle/tflitejson2circlejson.py b/tools/nnpackage_tool/tflite2circle/tflitejson2circlejson.py
index 35c78d034..a6955d8c1 100755
--- a/tools/nnpackage_tool/tflite2circle/tflitejson2circlejson.py
+++ b/tools/nnpackage_tool/tflite2circle/tflitejson2circlejson.py
@@ -36,6 +36,7 @@ if __name__ == '__main__':
json_dict = json.load(f, object_pairs_hook=OrderedDict)
for subgraph in json_dict["subgraphs"]:
subgraph["data_format"] = "CHANNELS_LAST"
+ json_dict["version"] = 0
print(json.dumps(json_dict, indent=2))
except KeyError:
print("subgraphs attribute does not exist.")
diff --git a/tools/pbfile_tool/convert_ckpt_to_pb.py b/tools/pbfile_tool/convert_ckpt_to_pb.py
index cd43143ca..cd43143ca 100644..100755
--- a/tools/pbfile_tool/convert_ckpt_to_pb.py
+++ b/tools/pbfile_tool/convert_ckpt_to_pb.py
diff --git a/tools/pbfile_tool/convert_pb_to_pbtxt.py b/tools/pbfile_tool/convert_pb_to_pbtxt.py
index 28a3da4c2..28a3da4c2 100644..100755
--- a/tools/pbfile_tool/convert_pb_to_pbtxt.py
+++ b/tools/pbfile_tool/convert_pb_to_pbtxt.py
diff --git a/tools/pbfile_tool/extract_subgraph.py b/tools/pbfile_tool/extract_subgraph.py
index 06135990a..06135990a 100644..100755
--- a/tools/pbfile_tool/extract_subgraph.py
+++ b/tools/pbfile_tool/extract_subgraph.py
diff --git a/tools/tensorflow_model_freezer/__init__.py b/tools/tensorflow_model_freezer/__init__.py
index 89d760b4a..89d760b4a 100644..100755
--- a/tools/tensorflow_model_freezer/__init__.py
+++ b/tools/tensorflow_model_freezer/__init__.py
diff --git a/tools/tensorflow_model_freezer/base_freezer.py b/tools/tensorflow_model_freezer/base_freezer.py
index a365a7806..a365a7806 100644..100755
--- a/tools/tensorflow_model_freezer/base_freezer.py
+++ b/tools/tensorflow_model_freezer/base_freezer.py
diff --git a/tools/tensorflow_model_freezer/model_freezer_util.py b/tools/tensorflow_model_freezer/model_freezer_util.py
index 3fdbba785..3fdbba785 100644..100755
--- a/tools/tensorflow_model_freezer/model_freezer_util.py
+++ b/tools/tensorflow_model_freezer/model_freezer_util.py
diff --git a/tools/tensorflow_model_freezer/sample/Operation_gen.py b/tools/tensorflow_model_freezer/sample/Operation_gen.py
index 3a810e53d..3a810e53d 100644..100755
--- a/tools/tensorflow_model_freezer/sample/Operation_gen.py
+++ b/tools/tensorflow_model_freezer/sample/Operation_gen.py
diff --git a/tools/tensorflow_model_freezer/sample/UNSTACK_gen.py b/tools/tensorflow_model_freezer/sample/UNSTACK_gen.py
index 3cee7459f..3cee7459f 100644..100755
--- a/tools/tensorflow_model_freezer/sample/UNSTACK_gen.py
+++ b/tools/tensorflow_model_freezer/sample/UNSTACK_gen.py
diff --git a/tools/tensorflow_model_freezer/sample/__init__.py b/tools/tensorflow_model_freezer/sample/__init__.py
index 89d760b4a..89d760b4a 100644..100755
--- a/tools/tensorflow_model_freezer/sample/__init__.py
+++ b/tools/tensorflow_model_freezer/sample/__init__.py
diff --git a/tools/tflite_accuracy/README.md b/tools/tflite_accuracy/README.md
index 22804e140..98cd20372 100644
--- a/tools/tflite_accuracy/README.md
+++ b/tools/tflite_accuracy/README.md
@@ -34,4 +34,4 @@ assumed to be the numerical code of the image's class. So, a file named
to be an identifier of the image itself.
The width and height each image can be informed via the command line
-argument `--imgsize`, whose default value is 224. \ No newline at end of file
+argument `--imgsize`, whose default value is 224.
diff --git a/tools/tflitefile_tool/config_saver.py b/tools/tflitefile_tool/config_saver.py
index 8abd3cf8b..abf2c0ca2 100644..100755
--- a/tools/tflitefile_tool/config_saver.py
+++ b/tools/tflitefile_tool/config_saver.py
@@ -17,15 +17,14 @@
from operator_wrapping import Operator
from tensor_printer import TensorPrinter
from option_printer import OptionPrinter
-from perf_predictor import PerfPredictor
class ConfigSaver(object):
def __init__(self, file_name, operator):
self.file_name = file_name
self.operator = operator
- # Set self.verbose to 2 level to print more information
- self.verbose = 2
+ # Set self.verbose to 1 level to print more information
+ self.verbose = 1
self.op_idx = operator.operator_idx
self.op_name = operator.opcode_str
diff --git a/tools/tflitefile_tool/graph_stats.py b/tools/tflitefile_tool/graph_stats.py
new file mode 100755
index 000000000..85acaefa6
--- /dev/null
+++ b/tools/tflitefile_tool/graph_stats.py
@@ -0,0 +1,84 @@
+#!/usr/bin/python
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+class GraphStats():
+ def __init__(self):
+ from collections import Counter
+ from collections import defaultdict
+ self.op_counts = Counter()
+ self.filled_memory = 0
+ self.total_memory = 0
+
+ def accumulate_op_count(self, op_str, count):
+ self.op_counts[op_str] += count
+
+ def accumulate_filled_memory(self, size):
+ self.filled_memory += size
+
+ def accumulate_total_memory(self, size):
+ self.total_memory += size
+
+ def __iadd__(self, other):
+ self.op_counts += other.op_counts
+ self.filled_memory += other.filled_memory
+ self.total_memory += other.total_memory
+ return self
+
+
+def PrintGraphStats(stats, verbose):
+ print("Number of all operator types: {0}".format(len(stats.op_counts)))
+
+ # Print op type stats
+ for op_name in sorted(stats.op_counts.keys()):
+ occur = stats.op_counts[op_name]
+ optype_info_str = "\t{:38}: {:4}".format(op_name, occur)
+
+ print(optype_info_str)
+
+ summary_str = "{0:46}: {1:4}".format("Number of all operators",
+ sum(stats.op_counts.values()))
+ print(summary_str)
+ print('')
+
+ # Print memory stats
+ from tensor_printer import ConvertBytesToHuman
+ print("Expected TOTAL memory: {0}".format(ConvertBytesToHuman(stats.total_memory)))
+ print("Expected FILLED memory: {0}".format(ConvertBytesToHuman(stats.filled_memory)))
+ print('')
+
+
+def CalcGraphStats(op_parser):
+ stats = GraphStats()
+
+ for type_str, oper_list in op_parser.operators_per_type.items():
+ # number of occurrence of this operator type
+ occur = len(oper_list)
+ stats.accumulate_op_count(type_str, occur)
+
+ # this operator type can be computed?
+ can_compute = oper_list[0].operation.can_compute
+
+ total_memory = 0
+ filled_memory = 0 # only memory for constant
+ for tensor in op_parser.GetAllTensors():
+ if tensor.tf_buffer.DataLength() != 0:
+ filled_memory += tensor.memory_size
+ total_memory += tensor.memory_size
+ stats.accumulate_filled_memory(filled_memory)
+ stats.accumulate_total_memory(total_memory)
+
+ return stats
diff --git a/tools/tflitefile_tool/model_parser.py b/tools/tflitefile_tool/model_parser.py
index 6f9e1c616..4ef2374cf 100755
--- a/tools/tflitefile_tool/model_parser.py
+++ b/tools/tflitefile_tool/model_parser.py
@@ -27,10 +27,10 @@ import flatbuffers
import tflite.Model
import tflite.SubGraph
import argparse
+import graph_stats
from operator_parser import OperatorParser
-from model_printer import ModelPrinter
+from subgraph_printer import SubgraphPrinter
from model_saver import ModelSaver
-from perf_predictor import PerfPredictor
class TFLiteModelFileParser(object):
@@ -38,10 +38,10 @@ class TFLiteModelFileParser(object):
# Read flatbuffer file descriptor using argument
self.tflite_file = args.input_file
- # Set print level (0 ~ 2)
+ # Set print level (0 ~ 1)
self.print_level = args.verbose
- if (args.verbose > 2):
- self.print_level = 2
+ if (args.verbose > 1):
+ self.print_level = 1
if (args.verbose < 0):
self.print_level = 0
@@ -73,7 +73,7 @@ class TFLiteModelFileParser(object):
self.save_prefix = args.prefix
def PrintModel(self, model_name, op_parser):
- printer = ModelPrinter(self.print_level, op_parser, model_name)
+ printer = SubgraphPrinter(self.print_level, op_parser, model_name)
if self.print_all_tensor == False:
printer.SetPrintSpecificTensors(self.print_tensor_index)
@@ -95,18 +95,21 @@ class TFLiteModelFileParser(object):
buf = bytearray(buf)
tf_model = tflite.Model.Model.GetRootAsModel(buf, 0)
+ stats = graph_stats.GraphStats()
# Model file can have many models
- # 1st subgraph is main model
- model_name = "Main_model"
for subgraph_index in range(tf_model.SubgraphsLength()):
tf_subgraph = tf_model.Subgraphs(subgraph_index)
- if (subgraph_index != 0):
- model_name = "Model_#" + str(subgraph_index)
+ model_name = "#{0} {1}".format(subgraph_index, tf_subgraph.Name())
+ # 0th subgraph is main subgraph
+ if (subgraph_index == 0):
+ model_name += " (MAIN)"
# Parse Operators
- op_parser = OperatorParser(tf_model, tf_subgraph, PerfPredictor())
+ op_parser = OperatorParser(tf_model, tf_subgraph)
op_parser.Parse()
+ stats += graph_stats.CalcGraphStats(op_parser)
+
if self.save == False:
# print all of operators or requested objects
self.PrintModel(model_name, op_parser)
@@ -114,6 +117,10 @@ class TFLiteModelFileParser(object):
# save all of operators in this model
self.SaveModel(model_name, op_parser)
+ print('==== Model Stats ({} Subgraphs) ===='.format(tf_model.SubgraphsLength()))
+ print('')
+ graph_stats.PrintGraphStats(stats, self.print_level)
+
if __name__ == '__main__':
# Define argument and read
@@ -121,7 +128,7 @@ if __name__ == '__main__':
arg_parser.add_argument(
"input_file", type=argparse.FileType('rb'), help="tflite file to read")
arg_parser.add_argument(
- '-v', '--verbose', type=int, default=1, help="set print level (0~2, default: 1)")
+ '-v', '--verbose', type=int, default=1, help="set print level (0~1, default: 1)")
arg_parser.add_argument(
'-t', '--tensor', nargs='*', help="tensor ID to print information (default: all)")
arg_parser.add_argument(
diff --git a/tools/tflitefile_tool/model_printer.py b/tools/tflitefile_tool/model_printer.py
deleted file mode 100644
index 0c11d01bd..000000000
--- a/tools/tflitefile_tool/model_printer.py
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from operator_printer import OperatorPrinter
-from tensor_printer import TensorPrinter
-
-
-class ModelPrinter(object):
- def __init__(self, verbose, op_parser, model_name):
- self.verbose = verbose
- self.op_parser = op_parser
- self.model_name = model_name
- self.print_all_tensor = True
- self.print_tensor_index_list = None
- self.print_all_operator = True
- self.print_operator_index_list = None
-
- def SetPrintSpecificTensors(self, tensor_indices):
- if len(tensor_indices) != 0:
- self.print_all_tensor = False
- self.print_tensor_index_list = tensor_indices
-
- def SetPrintSpecificOperators(self, operator_indices):
- if len(operator_indices) != 0:
- self.print_all_operator = False
- self.print_operator_index_list = operator_indices
-
- def PrintInfo(self):
- if self.print_all_tensor == True and self.print_all_operator == True:
- self.PrintModelInfo()
- self.PrintAllOperatorsInList()
- self.PrintAllTypesInfo()
- self.PrintTotalMemory()
-
- if self.print_all_tensor == False:
- print('')
- self.PrintSpecificTensors()
-
- if self.print_all_operator == False:
- print('')
- self.PrintSpecificOperators()
-
- def PrintModelInfo(self):
- print("[" + self.model_name + "]\n")
- if self.verbose > 0:
- model_inputs = self.op_parser.tf_subgraph.InputsAsNumpy()
- model_outputs = self.op_parser.tf_subgraph.OutputsAsNumpy()
- print(self.model_name + " input tensors: " + str(model_inputs))
- print(self.model_name + " output tensors: " + str(model_outputs))
- print('')
-
- def PrintAllOperatorsInList(self):
- if (self.verbose < 1):
- return
-
- for operator in self.op_parser.operators_in_list:
- printer = OperatorPrinter(self.verbose, operator)
- printer.PrintInfo(self.op_parser.perf_predictor)
- print('')
-
- print('')
-
- def PrintAllTypesInfo(self):
- print("Number of all operator types: {0}".format(
- len(self.op_parser.operators_per_type)))
-
- # number of instructions of all operator types to print if verbose level is 2
- total_instrs = 0
-
- # (a string of the operator type, a list of operators which are the same operator type)
- for type_str, oper_list in self.op_parser.operators_per_type.items():
- # number of occurrence of this operator type
- occur = len(oper_list)
-
- optype_info_str = "\t{type_str:38}: {occur:4}".format(
- type_str=type_str, occur=occur)
-
- if self.verbose == 2:
- # this operator type can be computed?
- can_compute = oper_list[0].operation.can_compute
-
- # total number of instructions of the same operator types
- if can_compute:
- instrs = sum(
- operator.operation.TotalInstrNum() for operator in oper_list)
- total_instrs = total_instrs + instrs
- instrs = "{:,}".format(instrs)
- else:
- instrs = "???"
-
- optype_info_str = optype_info_str + " \t (instrs: {instrs})".format(
- instrs=instrs)
-
- print(optype_info_str)
-
- summary_str = "{0:46}: {1:4}".format("Number of all operators",
- len(self.op_parser.operators_in_list))
- if self.verbose == 2:
- total_instrs = "{:,}".format(total_instrs)
- summary_str = summary_str + " \t (total instrs: {0})".format(total_instrs)
-
- print(summary_str)
- print('')
-
- def PrintSpecificTensors(self):
- for tensor in self.op_parser.GetAllTensors():
- if tensor.tensor_idx in self.print_tensor_index_list:
- printer = TensorPrinter(self.verbose, tensor)
- printer.PrintInfo()
- print('')
- print('')
-
- def PrintSpecificOperators(self):
- for operator in self.op_parser.operators_in_list:
- if operator.operator_idx in self.print_operator_index_list:
- printer = OperatorPrinter(self.verbose, operator)
- printer.PrintInfo(self.op_parser.perf_predictor)
- print('')
-
- print('')
-
- def PrintTotalMemory(self):
- total_memory = 0
- filled_memory = 0 # only memory for constant
- for tensor in self.op_parser.GetAllTensors():
- if tensor.tf_buffer.DataLength() != 0:
- filled_memory += tensor.memory_size
- total_memory += tensor.memory_size
-
- from tensor_printer import ConvertBytesToHuman
- print("Expected TOTAL memory: {0}".format(ConvertBytesToHuman(total_memory)))
- print("Expected FILLED memory: {0}".format(ConvertBytesToHuman(filled_memory)))
- print('')
diff --git a/tools/tflitefile_tool/model_saver.py b/tools/tflitefile_tool/model_saver.py
index 15037a1fe..15037a1fe 100644..100755
--- a/tools/tflitefile_tool/model_saver.py
+++ b/tools/tflitefile_tool/model_saver.py
diff --git a/tools/tflitefile_tool/operator_parser.py b/tools/tflitefile_tool/operator_parser.py
index 71b1a6d93..2c230c275 100755
--- a/tools/tflitefile_tool/operator_parser.py
+++ b/tools/tflitefile_tool/operator_parser.py
@@ -25,10 +25,9 @@ from operation import Operation
class OperatorParser(object):
- def __init__(self, tf_model, tf_subgraph, perf_predictor=None):
+ def __init__(self, tf_model, tf_subgraph):
self.tf_model = tf_model
self.tf_subgraph = tf_subgraph
- self.perf_predictor = perf_predictor
self.operators_in_list = list()
self.operators_per_type = dict()
# Tensor type string table
@@ -67,8 +66,9 @@ class OperatorParser(object):
def GetTensors(self, tf_tensors_index):
return_list = list()
for tensor_idx in tf_tensors_index:
+ # in case of optional input, tensor_idx == -1
if (tensor_idx < 0):
- return_list.append(Tensor(tensor_idx, 0, 0))
+ return_list.append(Tensor(tensor_idx, None, None))
continue
tf_tensor = self.tf_subgraph.Tensors(tensor_idx)
buffer_idx = tf_tensor.Buffer()
diff --git a/tools/tflitefile_tool/operator_printer.py b/tools/tflitefile_tool/operator_printer.py
index 9b6f97d24..e7c553394 100644..100755
--- a/tools/tflitefile_tool/operator_printer.py
+++ b/tools/tflitefile_tool/operator_printer.py
@@ -17,7 +17,6 @@
from operator_wrapping import Operator
from tensor_printer import TensorPrinter
from option_printer import OptionPrinter
-from perf_predictor import PerfPredictor
def GetStrTensorIndex(tensors):
@@ -35,25 +34,13 @@ class OperatorPrinter(object):
self.verbose = verbose
self.operator = operator
- def PrintInfo(self, perf_predictor=None):
+ def PrintInfo(self):
if (self.verbose < 1):
return
op_str = "Operator {0}: {1}".format(self.operator.operator_idx,
self.operator.opcode_str)
- if self.verbose == 2:
- # total instruction num
- instrs = "{:,}".format(self.operator.operation.TotalInstrNum()
- ) if self.operator.operation.can_compute else "???"
-
- # total operation cycles
- cycles = "{:,}".format(
- (perf_predictor.PredictCycles(self.operator.operation))
- ) if self.operator.operation.can_compute and perf_predictor != None else "???"
-
- op_str = op_str + "(instrs: {0}, cycls: {1})".format(instrs, cycles)
-
print(op_str)
print("\tFused Activation: " + self.operator.fused_activation)
self.PrintTensors()
diff --git a/tools/tflitefile_tool/option_printer.py b/tools/tflitefile_tool/option_printer.py
index 15265adf2..15265adf2 100644..100755
--- a/tools/tflitefile_tool/option_printer.py
+++ b/tools/tflitefile_tool/option_printer.py
diff --git a/tools/tflitefile_tool/perf_predictor.py b/tools/tflitefile_tool/perf_predictor.py
deleted file mode 100755
index ea5c15a33..000000000
--- a/tools/tflitefile_tool/perf_predictor.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from operation import Operation
-
-
-class PerfPredictor(object):
- def __init__(self, add_cycle=1, mul_cycle=1, nonlinear_cycle=1):
- self.add_cycle = add_cycle
- self.mul_cycle = mul_cycle
- self.nonlinear_cycle = nonlinear_cycle
-
- def PredictCycles(self, operation):
- return (operation.add_instr_num * self.add_cycle +
- operation.mul_instr_num * self.mul_cycle +
- operation.nonlinear_instr_num * self.nonlinear_cycle)
diff --git a/tools/tflitefile_tool/select_operator.py b/tools/tflitefile_tool/select_operator.py
index 4dd516398..b122b62e9 100644..100755
--- a/tools/tflitefile_tool/select_operator.py
+++ b/tools/tflitefile_tool/select_operator.py
@@ -184,7 +184,7 @@ def GenerateTensor(new_builder, selected_tensor, used_buffers_dic):
# Create quantization
quantization = selected_tensor.Quantization()
- if quantization != 0:
+ if quantization != None:
new_quantization = GenerateQuantization(new_builder, quantization)
# Create tensor
@@ -194,7 +194,7 @@ def GenerateTensor(new_builder, selected_tensor, used_buffers_dic):
tflite.Tensor.TensorAddBuffer(new_builder, new_buffer_idx)
if name_string != "":
tflite.Tensor.TensorAddName(new_builder, new_name)
- if quantization != 0:
+ if quantization != None:
tflite.Tensor.TensorAddQuantization(new_builder, new_quantization)
return tflite.Tensor.TensorEnd(new_builder)
@@ -709,7 +709,16 @@ def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_t
# SparseToDenseOptions: not supported
# TileOptions: not supported
- # ExpandDimsOptions: not supported
+
+ # ExpandDimsOptions:
+ import tflite.ExpandDimsOptions
+ if builtin_option_type == tflite.BuiltinOptions.BuiltinOptions().ExpandDimsOptions:
+
+ expanddims_option = tflite.ExpandDimsOptions.ExpandDimsOptions()
+ expanddims_option.Init(selected_builtin_option.Bytes, selected_builtin_option.Pos)
+
+ tflite.ExpandDimsOptions.ExpandDimsOptionsStart(new_builder)
+ return tflite.ExpandDimsOptions.ExpandDimsOptionsEnd(new_builder)
# NotEqualOptions:
import tflite.NotEqualOptions
@@ -721,7 +730,17 @@ def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_t
tflite.NotEqualOptions.NotEqualOptionsStart(new_builder)
return tflite.NotEqualOptions.NotEqualOptionsEnd(new_builder)
- # ShapeOptions: not supported
+ # ShapeOptions:
+ import tflite.ShapeOptions
+ if builtin_option_type == tflite.BuiltinOptions.BuiltinOptions().ShapeOptions:
+
+ shape_option = tflite.ShapeOptions.ShapeOptions()
+ shape_option.Init(selected_builtin_option.Bytes, selected_builtin_option.Pos)
+
+ tflite.ShapeOptions.ShapeOptionsStart(new_builder)
+ tflite.ShapeOptions.ShapeOptionsAddOutType(new_builder, shape_option.OutType())
+ return tflite.ShapeOptions.ShapeOptionsEnd(new_builder)
+
# PowOptions: not supported
# ArgMinOptions: not supported
# FakeQuantOptions: not supported
diff --git a/tools/tflitefile_tool/subgraph_printer.py b/tools/tflitefile_tool/subgraph_printer.py
new file mode 100755
index 000000000..c87f7816e
--- /dev/null
+++ b/tools/tflitefile_tool/subgraph_printer.py
@@ -0,0 +1,92 @@
+#!/usr/bin/python
+
+# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from operator_printer import OperatorPrinter
+from tensor_printer import TensorPrinter
+import graph_stats
+
+
+class SubgraphPrinter(object):
+ def __init__(self, verbose, op_parser, model_name):
+ self.verbose = verbose
+ self.op_parser = op_parser
+ self.model_name = model_name
+ self.print_all_tensor = True
+ self.print_tensor_index_list = None
+ self.print_all_operator = True
+ self.print_operator_index_list = None
+
+ def SetPrintSpecificTensors(self, tensor_indices):
+ if len(tensor_indices) != 0:
+ self.print_all_tensor = False
+ self.print_tensor_index_list = tensor_indices
+
+ def SetPrintSpecificOperators(self, operator_indices):
+ if len(operator_indices) != 0:
+ self.print_all_operator = False
+ self.print_operator_index_list = operator_indices
+
+ def PrintInfo(self):
+ if self.print_all_tensor == True and self.print_all_operator == True:
+ self.PrintModelInfo()
+ self.PrintAllOperatorsInList()
+ graph_stats.PrintGraphStats(
+ graph_stats.CalcGraphStats(self.op_parser), self.verbose)
+
+ if self.print_all_tensor == False:
+ print('')
+ self.PrintSpecificTensors()
+
+ if self.print_all_operator == False:
+ print('')
+ self.PrintSpecificOperators()
+
+ def PrintModelInfo(self):
+ print("[" + self.model_name + "]\n")
+ if self.verbose > 0:
+ model_inputs = self.op_parser.tf_subgraph.InputsAsNumpy()
+ model_outputs = self.op_parser.tf_subgraph.OutputsAsNumpy()
+ print(self.model_name + " input tensors: " + str(model_inputs))
+ print(self.model_name + " output tensors: " + str(model_outputs))
+ print('')
+
+ def PrintAllOperatorsInList(self):
+ if (self.verbose < 1):
+ return
+
+ for operator in self.op_parser.operators_in_list:
+ printer = OperatorPrinter(self.verbose, operator)
+ printer.PrintInfo()
+ print('')
+
+ print('')
+
+ def PrintSpecificTensors(self):
+ for tensor in self.op_parser.GetAllTensors():
+ if tensor.tensor_idx in self.print_tensor_index_list:
+ printer = TensorPrinter(self.verbose, tensor)
+ printer.PrintInfo()
+ print('')
+ print('')
+
+ def PrintSpecificOperators(self):
+ for operator in self.op_parser.operators_in_list:
+ if operator.operator_idx in self.print_operator_index_list:
+ printer = OperatorPrinter(self.verbose, operator)
+ printer.PrintInfo()
+ print('')
+
+ print('')
diff --git a/tools/tflitefile_tool/tensor_printer.py b/tools/tflitefile_tool/tensor_printer.py
index f566a6e10..f566a6e10 100644..100755
--- a/tools/tflitefile_tool/tensor_printer.py
+++ b/tools/tflitefile_tool/tensor_printer.py
diff --git a/tools/tflitefile_tool/tensor_wrapping.py b/tools/tflitefile_tool/tensor_wrapping.py
index a32a573ce..2a6dcaceb 100755
--- a/tools/tflitefile_tool/tensor_wrapping.py
+++ b/tools/tflitefile_tool/tensor_wrapping.py
@@ -55,7 +55,13 @@ class Tensor(object):
self.tensor_idx = tensor_idx
self.tf_tensor = tf_tensor
self.tf_buffer = tf_buffer
- self.type_name = TensorTypeList[self.tf_tensor.Type()]
+
+ # optional input
+ if (self.tf_tensor != None):
+ self.type_name = TensorTypeList[self.tf_tensor.Type()]
+ else:
+ self.type_name = None
+
self.memory_size = self.GetMemorySize()
def GetMemorySize(self):
diff --git a/tools/tflitefile_tool/tflite/AbsOptions.py b/tools/tflitefile_tool/tflite/AbsOptions.py
index 748509293..748509293 100644..100755
--- a/tools/tflitefile_tool/tflite/AbsOptions.py
+++ b/tools/tflitefile_tool/tflite/AbsOptions.py
diff --git a/tools/tflitefile_tool/tflite/ActivationFunctionType.py b/tools/tflitefile_tool/tflite/ActivationFunctionType.py
index a32353964..a32353964 100644..100755
--- a/tools/tflitefile_tool/tflite/ActivationFunctionType.py
+++ b/tools/tflitefile_tool/tflite/ActivationFunctionType.py
diff --git a/tools/tflitefile_tool/tflite/AddNOptions.py b/tools/tflitefile_tool/tflite/AddNOptions.py
new file mode 100755
index 000000000..cae1fcd3b
--- /dev/null
+++ b/tools/tflitefile_tool/tflite/AddNOptions.py
@@ -0,0 +1,28 @@
+# automatically generated by the FlatBuffers compiler, do not modify
+
+# namespace: tflite
+
+import flatbuffers
+
+
+class AddNOptions(object):
+ __slots__ = ['_tab']
+
+ @classmethod
+ def GetRootAsAddNOptions(cls, buf, offset):
+ n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
+ x = AddNOptions()
+ x.Init(buf, n + offset)
+ return x
+
+ # AddNOptions
+ def Init(self, buf, pos):
+ self._tab = flatbuffers.table.Table(buf, pos)
+
+
+def AddNOptionsStart(builder):
+ builder.StartObject(0)
+
+
+def AddNOptionsEnd(builder):
+ return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/AddOptions.py b/tools/tflitefile_tool/tflite/AddOptions.py
index c9f3387ca..c9f3387ca 100644..100755
--- a/tools/tflitefile_tool/tflite/AddOptions.py
+++ b/tools/tflitefile_tool/tflite/AddOptions.py
diff --git a/tools/tflitefile_tool/tflite/ArgMaxOptions.py b/tools/tflitefile_tool/tflite/ArgMaxOptions.py
index 23cbfd731..23cbfd731 100644..100755
--- a/tools/tflitefile_tool/tflite/ArgMaxOptions.py
+++ b/tools/tflitefile_tool/tflite/ArgMaxOptions.py
diff --git a/tools/tflitefile_tool/tflite/ArgMinOptions.py b/tools/tflitefile_tool/tflite/ArgMinOptions.py
index 6a2dcdfe1..6a2dcdfe1 100644..100755
--- a/tools/tflitefile_tool/tflite/ArgMinOptions.py
+++ b/tools/tflitefile_tool/tflite/ArgMinOptions.py
diff --git a/tools/tflitefile_tool/tflite/BatchToSpaceNDOptions.py b/tools/tflitefile_tool/tflite/BatchToSpaceNDOptions.py
index 48a7d4c23..48a7d4c23 100644..100755
--- a/tools/tflitefile_tool/tflite/BatchToSpaceNDOptions.py
+++ b/tools/tflitefile_tool/tflite/BatchToSpaceNDOptions.py
diff --git a/tools/tflitefile_tool/tflite/BidirectionalSequenceLSTMOptions.py b/tools/tflitefile_tool/tflite/BidirectionalSequenceLSTMOptions.py
index eac2e27ab..aefc64b41 100644..100755
--- a/tools/tflitefile_tool/tflite/BidirectionalSequenceLSTMOptions.py
+++ b/tools/tflitefile_tool/tflite/BidirectionalSequenceLSTMOptions.py
@@ -48,9 +48,17 @@ class BidirectionalSequenceLSTMOptions(object):
self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos))
return False
+ # BidirectionalSequenceLSTMOptions
+ def TimeMajor(self):
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(12))
+ if o != 0:
+ return bool(
+ self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos))
+ return True
+
def BidirectionalSequenceLSTMOptionsStart(builder):
- builder.StartObject(4)
+ builder.StartObject(5)
def BidirectionalSequenceLSTMOptionsAddFusedActivationFunction(builder,
@@ -70,5 +78,9 @@ def BidirectionalSequenceLSTMOptionsAddMergeOutputs(builder, mergeOutputs):
builder.PrependBoolSlot(3, mergeOutputs, 0)
+def BidirectionalSequenceLSTMOptionsAddTimeMajor(builder, timeMajor):
+ builder.PrependBoolSlot(4, timeMajor, 1)
+
+
def BidirectionalSequenceLSTMOptionsEnd(builder):
return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/BidirectionalSequenceRNNOptions.py b/tools/tflitefile_tool/tflite/BidirectionalSequenceRNNOptions.py
index e1d4f6553..e1d4f6553 100644..100755
--- a/tools/tflitefile_tool/tflite/BidirectionalSequenceRNNOptions.py
+++ b/tools/tflitefile_tool/tflite/BidirectionalSequenceRNNOptions.py
diff --git a/tools/tflitefile_tool/tflite/Buffer.py b/tools/tflitefile_tool/tflite/Buffer.py
index d3fa8e9de..d3fa8e9de 100644..100755
--- a/tools/tflitefile_tool/tflite/Buffer.py
+++ b/tools/tflitefile_tool/tflite/Buffer.py
diff --git a/tools/tflitefile_tool/tflite/BuiltinOperator.py b/tools/tflitefile_tool/tflite/BuiltinOperator.py
index 481eeffe8..28196bafb 100644..100755
--- a/tools/tflitefile_tool/tflite/BuiltinOperator.py
+++ b/tools/tflitefile_tool/tflite/BuiltinOperator.py
@@ -9,6 +9,7 @@ class BuiltinOperator(object):
CONCATENATION = 2
CONV_2D = 3
DEPTHWISE_CONV_2D = 4
+ DEPTH_TO_SPACE = 5
DEQUANTIZE = 6
EMBEDDING_LOOKUP = 7
FLOOR = 8
@@ -106,3 +107,23 @@ class BuiltinOperator(object):
MIRROR_PAD = 100
ABS = 101
SPLIT_V = 102
+ UNIQUE = 103
+ CEIL = 104
+ REVERSE_V2 = 105
+ ADD_N = 106
+ GATHER_ND = 107
+ COS = 108
+ WHERE = 109
+ RANK = 110
+ ELU = 111
+ REVERSE_SEQUENCE = 112
+ MATRIX_DIAG = 113
+ QUANTIZE = 114
+ MATRIX_SET_DIAG = 115
+ ROUND = 116
+ HARD_SWISH = 117
+ IF = 118
+ WHILE = 119
+ NON_MAX_SUPPRESSION_V4 = 120
+ NON_MAX_SUPPRESSION_V5 = 121
+ SCATTER_ND = 122
diff --git a/tools/tflitefile_tool/tflite/BuiltinOptions.py b/tools/tflitefile_tool/tflite/BuiltinOptions.py
index ad115814a..a39c68b59 100644..100755
--- a/tools/tflitefile_tool/tflite/BuiltinOptions.py
+++ b/tools/tflitefile_tool/tflite/BuiltinOptions.py
@@ -84,3 +84,21 @@ class BuiltinOptions(object):
MirrorPadOptions = 77
AbsOptions = 78
SplitVOptions = 79
+ UniqueOptions = 80
+ ReverseV2Options = 81
+ AddNOptions = 82
+ GatherNdOptions = 83
+ CosOptions = 84
+ WhereOptions = 85
+ RankOptions = 86
+ ReverseSequenceOptions = 87
+ MatrixDiagOptions = 88
+ QuantizeOptions = 89
+ MatrixSetDiagOptions = 90
+ HardSwishOptions = 91
+ IfOptions = 92
+ WhileOptions = 93
+ DepthToSpaceOptions = 94
+ NonMaxSuppressionV4Options = 95
+ NonMaxSuppressionV5Options = 96
+ ScatterNdOptions = 97
diff --git a/tools/tflitefile_tool/tflite/CallOptions.py b/tools/tflitefile_tool/tflite/CallOptions.py
index a82f001fa..a82f001fa 100644..100755
--- a/tools/tflitefile_tool/tflite/CallOptions.py
+++ b/tools/tflitefile_tool/tflite/CallOptions.py
diff --git a/tools/tflitefile_tool/tflite/CastOptions.py b/tools/tflitefile_tool/tflite/CastOptions.py
index 7f7a1dde3..7f7a1dde3 100644..100755
--- a/tools/tflitefile_tool/tflite/CastOptions.py
+++ b/tools/tflitefile_tool/tflite/CastOptions.py
diff --git a/tools/tflitefile_tool/tflite/CombinerType.py b/tools/tflitefile_tool/tflite/CombinerType.py
index dfe8afb9f..dfe8afb9f 100644..100755
--- a/tools/tflitefile_tool/tflite/CombinerType.py
+++ b/tools/tflitefile_tool/tflite/CombinerType.py
diff --git a/tools/tflitefile_tool/tflite/ConcatEmbeddingsOptions.py b/tools/tflitefile_tool/tflite/ConcatEmbeddingsOptions.py
index 6ca04a51f..6ca04a51f 100644..100755
--- a/tools/tflitefile_tool/tflite/ConcatEmbeddingsOptions.py
+++ b/tools/tflitefile_tool/tflite/ConcatEmbeddingsOptions.py
diff --git a/tools/tflitefile_tool/tflite/ConcatenationOptions.py b/tools/tflitefile_tool/tflite/ConcatenationOptions.py
index ea089ac56..ea089ac56 100644..100755
--- a/tools/tflitefile_tool/tflite/ConcatenationOptions.py
+++ b/tools/tflitefile_tool/tflite/ConcatenationOptions.py
diff --git a/tools/tflitefile_tool/tflite/Conv2DOptions.py b/tools/tflitefile_tool/tflite/Conv2DOptions.py
index 913729522..913729522 100644..100755
--- a/tools/tflitefile_tool/tflite/Conv2DOptions.py
+++ b/tools/tflitefile_tool/tflite/Conv2DOptions.py
diff --git a/tools/tflitefile_tool/tflite/CosOptions.py b/tools/tflitefile_tool/tflite/CosOptions.py
new file mode 100755
index 000000000..51f2a7b0c
--- /dev/null
+++ b/tools/tflitefile_tool/tflite/CosOptions.py
@@ -0,0 +1,28 @@
+# automatically generated by the FlatBuffers compiler, do not modify
+
+# namespace: tflite
+
+import flatbuffers
+
+
+class CosOptions(object):
+ __slots__ = ['_tab']
+
+ @classmethod
+ def GetRootAsCosOptions(cls, buf, offset):
+ n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
+ x = CosOptions()
+ x.Init(buf, n + offset)
+ return x
+
+ # CosOptions
+ def Init(self, buf, pos):
+ self._tab = flatbuffers.table.Table(buf, pos)
+
+
+def CosOptionsStart(builder):
+ builder.StartObject(0)
+
+
+def CosOptionsEnd(builder):
+ return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/CustomOptionsFormat.py b/tools/tflitefile_tool/tflite/CustomOptionsFormat.py
index 18bc07d02..18bc07d02 100644..100755
--- a/tools/tflitefile_tool/tflite/CustomOptionsFormat.py
+++ b/tools/tflitefile_tool/tflite/CustomOptionsFormat.py
diff --git a/tools/tflitefile_tool/tflite/CustomQuantization.py b/tools/tflitefile_tool/tflite/CustomQuantization.py
index 8cc1db1e2..8cc1db1e2 100644..100755
--- a/tools/tflitefile_tool/tflite/CustomQuantization.py
+++ b/tools/tflitefile_tool/tflite/CustomQuantization.py
diff --git a/tools/tflitefile_tool/tflite/DepthToSpaceOptions.py b/tools/tflitefile_tool/tflite/DepthToSpaceOptions.py
new file mode 100755
index 000000000..d56fb8a64
--- /dev/null
+++ b/tools/tflitefile_tool/tflite/DepthToSpaceOptions.py
@@ -0,0 +1,39 @@
+# automatically generated by the FlatBuffers compiler, do not modify
+
+# namespace: tflite
+
+import flatbuffers
+
+
+class DepthToSpaceOptions(object):
+ __slots__ = ['_tab']
+
+ @classmethod
+ def GetRootAsDepthToSpaceOptions(cls, buf, offset):
+ n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
+ x = DepthToSpaceOptions()
+ x.Init(buf, n + offset)
+ return x
+
+ # DepthToSpaceOptions
+ def Init(self, buf, pos):
+ self._tab = flatbuffers.table.Table(buf, pos)
+
+ # DepthToSpaceOptions
+ def BlockSize(self):
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
+ if o != 0:
+ return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos)
+ return 0
+
+
+def DepthToSpaceOptionsStart(builder):
+ builder.StartObject(1)
+
+
+def DepthToSpaceOptionsAddBlockSize(builder, blockSize):
+ builder.PrependInt32Slot(0, blockSize, 0)
+
+
+def DepthToSpaceOptionsEnd(builder):
+ return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/DepthwiseConv2DOptions.py b/tools/tflitefile_tool/tflite/DepthwiseConv2DOptions.py
index 786f7c53d..786f7c53d 100644..100755
--- a/tools/tflitefile_tool/tflite/DepthwiseConv2DOptions.py
+++ b/tools/tflitefile_tool/tflite/DepthwiseConv2DOptions.py
diff --git a/tools/tflitefile_tool/tflite/DequantizeOptions.py b/tools/tflitefile_tool/tflite/DequantizeOptions.py
index fe4cc9a06..fe4cc9a06 100644..100755
--- a/tools/tflitefile_tool/tflite/DequantizeOptions.py
+++ b/tools/tflitefile_tool/tflite/DequantizeOptions.py
diff --git a/tools/tflitefile_tool/tflite/DivOptions.py b/tools/tflitefile_tool/tflite/DivOptions.py
index 53bbae542..53bbae542 100644..100755
--- a/tools/tflitefile_tool/tflite/DivOptions.py
+++ b/tools/tflitefile_tool/tflite/DivOptions.py
diff --git a/tools/tflitefile_tool/tflite/EmbeddingLookupSparseOptions.py b/tools/tflitefile_tool/tflite/EmbeddingLookupSparseOptions.py
index 12531d3d5..12531d3d5 100644..100755
--- a/tools/tflitefile_tool/tflite/EmbeddingLookupSparseOptions.py
+++ b/tools/tflitefile_tool/tflite/EmbeddingLookupSparseOptions.py
diff --git a/tools/tflitefile_tool/tflite/EqualOptions.py b/tools/tflitefile_tool/tflite/EqualOptions.py
index 968712ff8..968712ff8 100644..100755
--- a/tools/tflitefile_tool/tflite/EqualOptions.py
+++ b/tools/tflitefile_tool/tflite/EqualOptions.py
diff --git a/tools/tflitefile_tool/tflite/ExpOptions.py b/tools/tflitefile_tool/tflite/ExpOptions.py
index f8c7bd867..f8c7bd867 100644..100755
--- a/tools/tflitefile_tool/tflite/ExpOptions.py
+++ b/tools/tflitefile_tool/tflite/ExpOptions.py
diff --git a/tools/tflitefile_tool/tflite/ExpandDimsOptions.py b/tools/tflitefile_tool/tflite/ExpandDimsOptions.py
index 2dd8d506c..2dd8d506c 100644..100755
--- a/tools/tflitefile_tool/tflite/ExpandDimsOptions.py
+++ b/tools/tflitefile_tool/tflite/ExpandDimsOptions.py
diff --git a/tools/tflitefile_tool/tflite/FakeQuantOptions.py b/tools/tflitefile_tool/tflite/FakeQuantOptions.py
index 6a208e765..c266bfc9d 100644..100755
--- a/tools/tflitefile_tool/tflite/FakeQuantOptions.py
+++ b/tools/tflitefile_tool/tflite/FakeQuantOptions.py
@@ -53,12 +53,12 @@ def FakeQuantOptionsStart(builder):
builder.StartObject(4)
-def FakeQuantOptionsAddMin(builder, minSlot):
- builder.PrependFloat32Slot(0, minSlot, 0.0)
+def FakeQuantOptionsAddMin(builder, min):
+ builder.PrependFloat32Slot(0, min, 0.0)
-def FakeQuantOptionsAddMax(builder, maxSlot):
- builder.PrependFloat32Slot(1, maxSlot, 0.0)
+def FakeQuantOptionsAddMax(builder, max):
+ builder.PrependFloat32Slot(1, max, 0.0)
def FakeQuantOptionsAddNumBits(builder, numBits):
diff --git a/tools/tflitefile_tool/tflite/FillOptions.py b/tools/tflitefile_tool/tflite/FillOptions.py
index ee6273514..ee6273514 100644..100755
--- a/tools/tflitefile_tool/tflite/FillOptions.py
+++ b/tools/tflitefile_tool/tflite/FillOptions.py
diff --git a/tools/tflitefile_tool/tflite/FloorDivOptions.py b/tools/tflitefile_tool/tflite/FloorDivOptions.py
index 90b797112..90b797112 100644..100755
--- a/tools/tflitefile_tool/tflite/FloorDivOptions.py
+++ b/tools/tflitefile_tool/tflite/FloorDivOptions.py
diff --git a/tools/tflitefile_tool/tflite/FloorModOptions.py b/tools/tflitefile_tool/tflite/FloorModOptions.py
index d2f7b3b48..d2f7b3b48 100644..100755
--- a/tools/tflitefile_tool/tflite/FloorModOptions.py
+++ b/tools/tflitefile_tool/tflite/FloorModOptions.py
diff --git a/tools/tflitefile_tool/tflite/FullyConnectedOptions.py b/tools/tflitefile_tool/tflite/FullyConnectedOptions.py
index 59c2a367a..76a31ef0e 100644..100755
--- a/tools/tflitefile_tool/tflite/FullyConnectedOptions.py
+++ b/tools/tflitefile_tool/tflite/FullyConnectedOptions.py
@@ -33,9 +33,17 @@ class FullyConnectedOptions(object):
return self._tab.Get(flatbuffers.number_types.Int8Flags, o + self._tab.Pos)
return 0
+ # FullyConnectedOptions
+ def KeepNumDims(self):
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8))
+ if o != 0:
+ return bool(
+ self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos))
+ return False
+
def FullyConnectedOptionsStart(builder):
- builder.StartObject(2)
+ builder.StartObject(3)
def FullyConnectedOptionsAddFusedActivationFunction(builder, fusedActivationFunction):
@@ -46,5 +54,9 @@ def FullyConnectedOptionsAddWeightsFormat(builder, weightsFormat):
builder.PrependInt8Slot(1, weightsFormat, 0)
+def FullyConnectedOptionsAddKeepNumDims(builder, keepNumDims):
+ builder.PrependBoolSlot(2, keepNumDims, 0)
+
+
def FullyConnectedOptionsEnd(builder):
return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/FullyConnectedOptionsWeightsFormat.py b/tools/tflitefile_tool/tflite/FullyConnectedOptionsWeightsFormat.py
index 143fc5122..143fc5122 100644..100755
--- a/tools/tflitefile_tool/tflite/FullyConnectedOptionsWeightsFormat.py
+++ b/tools/tflitefile_tool/tflite/FullyConnectedOptionsWeightsFormat.py
diff --git a/tools/tflitefile_tool/tflite/GatherNdOptions.py b/tools/tflitefile_tool/tflite/GatherNdOptions.py
new file mode 100755
index 000000000..3357d8526
--- /dev/null
+++ b/tools/tflitefile_tool/tflite/GatherNdOptions.py
@@ -0,0 +1,28 @@
+# automatically generated by the FlatBuffers compiler, do not modify
+
+# namespace: tflite
+
+import flatbuffers
+
+
+class GatherNdOptions(object):
+ __slots__ = ['_tab']
+
+ @classmethod
+ def GetRootAsGatherNdOptions(cls, buf, offset):
+ n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
+ x = GatherNdOptions()
+ x.Init(buf, n + offset)
+ return x
+
+ # GatherNdOptions
+ def Init(self, buf, pos):
+ self._tab = flatbuffers.table.Table(buf, pos)
+
+
+def GatherNdOptionsStart(builder):
+ builder.StartObject(0)
+
+
+def GatherNdOptionsEnd(builder):
+ return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/GatherOptions.py b/tools/tflitefile_tool/tflite/GatherOptions.py
index cfb54496b..cfb54496b 100644..100755
--- a/tools/tflitefile_tool/tflite/GatherOptions.py
+++ b/tools/tflitefile_tool/tflite/GatherOptions.py
diff --git a/tools/tflitefile_tool/tflite/GreaterEqualOptions.py b/tools/tflitefile_tool/tflite/GreaterEqualOptions.py
index 12df3c88c..12df3c88c 100644..100755
--- a/tools/tflitefile_tool/tflite/GreaterEqualOptions.py
+++ b/tools/tflitefile_tool/tflite/GreaterEqualOptions.py
diff --git a/tools/tflitefile_tool/tflite/GreaterOptions.py b/tools/tflitefile_tool/tflite/GreaterOptions.py
index 614cdb290..614cdb290 100644..100755
--- a/tools/tflitefile_tool/tflite/GreaterOptions.py
+++ b/tools/tflitefile_tool/tflite/GreaterOptions.py
diff --git a/tools/tflitefile_tool/tflite/HardSwishOptions.py b/tools/tflitefile_tool/tflite/HardSwishOptions.py
new file mode 100755
index 000000000..374c2dcf0
--- /dev/null
+++ b/tools/tflitefile_tool/tflite/HardSwishOptions.py
@@ -0,0 +1,28 @@
+# automatically generated by the FlatBuffers compiler, do not modify
+
+# namespace: tflite
+
+import flatbuffers
+
+
+class HardSwishOptions(object):
+ __slots__ = ['_tab']
+
+ @classmethod
+ def GetRootAsHardSwishOptions(cls, buf, offset):
+ n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
+ x = HardSwishOptions()
+ x.Init(buf, n + offset)
+ return x
+
+ # HardSwishOptions
+ def Init(self, buf, pos):
+ self._tab = flatbuffers.table.Table(buf, pos)
+
+
+def HardSwishOptionsStart(builder):
+ builder.StartObject(0)
+
+
+def HardSwishOptionsEnd(builder):
+ return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/IfOptions.py b/tools/tflitefile_tool/tflite/IfOptions.py
new file mode 100755
index 000000000..e5bf26907
--- /dev/null
+++ b/tools/tflitefile_tool/tflite/IfOptions.py
@@ -0,0 +1,50 @@
+# automatically generated by the FlatBuffers compiler, do not modify
+
+# namespace: tflite
+
+import flatbuffers
+
+
+class IfOptions(object):
+ __slots__ = ['_tab']
+
+ @classmethod
+ def GetRootAsIfOptions(cls, buf, offset):
+ n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
+ x = IfOptions()
+ x.Init(buf, n + offset)
+ return x
+
+ # IfOptions
+ def Init(self, buf, pos):
+ self._tab = flatbuffers.table.Table(buf, pos)
+
+ # IfOptions
+ def ThenSubgraphIndex(self):
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
+ if o != 0:
+ return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos)
+ return 0
+
+ # IfOptions
+ def ElseSubgraphIndex(self):
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
+ if o != 0:
+ return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos)
+ return 0
+
+
+def IfOptionsStart(builder):
+ builder.StartObject(2)
+
+
+def IfOptionsAddThenSubgraphIndex(builder, thenSubgraphIndex):
+ builder.PrependInt32Slot(0, thenSubgraphIndex, 0)
+
+
+def IfOptionsAddElseSubgraphIndex(builder, elseSubgraphIndex):
+ builder.PrependInt32Slot(1, elseSubgraphIndex, 0)
+
+
+def IfOptionsEnd(builder):
+ return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/L2NormOptions.py b/tools/tflitefile_tool/tflite/L2NormOptions.py
index 1d3ab1ec8..1d3ab1ec8 100644..100755
--- a/tools/tflitefile_tool/tflite/L2NormOptions.py
+++ b/tools/tflitefile_tool/tflite/L2NormOptions.py
diff --git a/tools/tflitefile_tool/tflite/LSHProjectionOptions.py b/tools/tflitefile_tool/tflite/LSHProjectionOptions.py
index ea1e20bbe..055eb75ff 100644..100755
--- a/tools/tflitefile_tool/tflite/LSHProjectionOptions.py
+++ b/tools/tflitefile_tool/tflite/LSHProjectionOptions.py
@@ -31,8 +31,8 @@ def LSHProjectionOptionsStart(builder):
builder.StartObject(1)
-def LSHProjectionOptionsAddType(builder, typeSlot):
- builder.PrependInt8Slot(0, typeSlot, 0)
+def LSHProjectionOptionsAddType(builder, type):
+ builder.PrependInt8Slot(0, type, 0)
def LSHProjectionOptionsEnd(builder):
diff --git a/tools/tflitefile_tool/tflite/LSHProjectionType.py b/tools/tflitefile_tool/tflite/LSHProjectionType.py
index 328179114..328179114 100644..100755
--- a/tools/tflitefile_tool/tflite/LSHProjectionType.py
+++ b/tools/tflitefile_tool/tflite/LSHProjectionType.py
diff --git a/tools/tflitefile_tool/tflite/LSTMKernelType.py b/tools/tflitefile_tool/tflite/LSTMKernelType.py
index f0e96f3fc..f0e96f3fc 100644..100755
--- a/tools/tflitefile_tool/tflite/LSTMKernelType.py
+++ b/tools/tflitefile_tool/tflite/LSTMKernelType.py
diff --git a/tools/tflitefile_tool/tflite/LSTMOptions.py b/tools/tflitefile_tool/tflite/LSTMOptions.py
index 97c5bd8a4..97c5bd8a4 100644..100755
--- a/tools/tflitefile_tool/tflite/LSTMOptions.py
+++ b/tools/tflitefile_tool/tflite/LSTMOptions.py
diff --git a/tools/tflitefile_tool/tflite/LeakyReluOptions.py b/tools/tflitefile_tool/tflite/LeakyReluOptions.py
index a4ffa3759..a4ffa3759 100644..100755
--- a/tools/tflitefile_tool/tflite/LeakyReluOptions.py
+++ b/tools/tflitefile_tool/tflite/LeakyReluOptions.py
diff --git a/tools/tflitefile_tool/tflite/LessEqualOptions.py b/tools/tflitefile_tool/tflite/LessEqualOptions.py
index ef93bcc9e..ef93bcc9e 100644..100755
--- a/tools/tflitefile_tool/tflite/LessEqualOptions.py
+++ b/tools/tflitefile_tool/tflite/LessEqualOptions.py
diff --git a/tools/tflitefile_tool/tflite/LessOptions.py b/tools/tflitefile_tool/tflite/LessOptions.py
index a94b37f17..a94b37f17 100644..100755
--- a/tools/tflitefile_tool/tflite/LessOptions.py
+++ b/tools/tflitefile_tool/tflite/LessOptions.py
diff --git a/tools/tflitefile_tool/tflite/LocalResponseNormalizationOptions.py b/tools/tflitefile_tool/tflite/LocalResponseNormalizationOptions.py
index fd9117ac5..fd9117ac5 100644..100755
--- a/tools/tflitefile_tool/tflite/LocalResponseNormalizationOptions.py
+++ b/tools/tflitefile_tool/tflite/LocalResponseNormalizationOptions.py
diff --git a/tools/tflitefile_tool/tflite/LogSoftmaxOptions.py b/tools/tflitefile_tool/tflite/LogSoftmaxOptions.py
index 1b059d22f..1b059d22f 100644..100755
--- a/tools/tflitefile_tool/tflite/LogSoftmaxOptions.py
+++ b/tools/tflitefile_tool/tflite/LogSoftmaxOptions.py
diff --git a/tools/tflitefile_tool/tflite/LogicalAndOptions.py b/tools/tflitefile_tool/tflite/LogicalAndOptions.py
index 84cdfd92a..84cdfd92a 100644..100755
--- a/tools/tflitefile_tool/tflite/LogicalAndOptions.py
+++ b/tools/tflitefile_tool/tflite/LogicalAndOptions.py
diff --git a/tools/tflitefile_tool/tflite/LogicalNotOptions.py b/tools/tflitefile_tool/tflite/LogicalNotOptions.py
index 966a419b7..966a419b7 100644..100755
--- a/tools/tflitefile_tool/tflite/LogicalNotOptions.py
+++ b/tools/tflitefile_tool/tflite/LogicalNotOptions.py
diff --git a/tools/tflitefile_tool/tflite/LogicalOrOptions.py b/tools/tflitefile_tool/tflite/LogicalOrOptions.py
index 0a820cdaa..0a820cdaa 100644..100755
--- a/tools/tflitefile_tool/tflite/LogicalOrOptions.py
+++ b/tools/tflitefile_tool/tflite/LogicalOrOptions.py
diff --git a/tools/tflitefile_tool/tflite/MatrixDiagOptions.py b/tools/tflitefile_tool/tflite/MatrixDiagOptions.py
new file mode 100755
index 000000000..658d562f8
--- /dev/null
+++ b/tools/tflitefile_tool/tflite/MatrixDiagOptions.py
@@ -0,0 +1,28 @@
+# automatically generated by the FlatBuffers compiler, do not modify
+
+# namespace: tflite
+
+import flatbuffers
+
+
+class MatrixDiagOptions(object):
+ __slots__ = ['_tab']
+
+ @classmethod
+ def GetRootAsMatrixDiagOptions(cls, buf, offset):
+ n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
+ x = MatrixDiagOptions()
+ x.Init(buf, n + offset)
+ return x
+
+ # MatrixDiagOptions
+ def Init(self, buf, pos):
+ self._tab = flatbuffers.table.Table(buf, pos)
+
+
+def MatrixDiagOptionsStart(builder):
+ builder.StartObject(0)
+
+
+def MatrixDiagOptionsEnd(builder):
+ return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/MatrixSetDiagOptions.py b/tools/tflitefile_tool/tflite/MatrixSetDiagOptions.py
new file mode 100755
index 000000000..3a19819d2
--- /dev/null
+++ b/tools/tflitefile_tool/tflite/MatrixSetDiagOptions.py
@@ -0,0 +1,28 @@
+# automatically generated by the FlatBuffers compiler, do not modify
+
+# namespace: tflite
+
+import flatbuffers
+
+
+class MatrixSetDiagOptions(object):
+ __slots__ = ['_tab']
+
+ @classmethod
+ def GetRootAsMatrixSetDiagOptions(cls, buf, offset):
+ n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
+ x = MatrixSetDiagOptions()
+ x.Init(buf, n + offset)
+ return x
+
+ # MatrixSetDiagOptions
+ def Init(self, buf, pos):
+ self._tab = flatbuffers.table.Table(buf, pos)
+
+
+def MatrixSetDiagOptionsStart(builder):
+ builder.StartObject(0)
+
+
+def MatrixSetDiagOptionsEnd(builder):
+ return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/MaximumMinimumOptions.py b/tools/tflitefile_tool/tflite/MaximumMinimumOptions.py
index c99494be3..c99494be3 100644..100755
--- a/tools/tflitefile_tool/tflite/MaximumMinimumOptions.py
+++ b/tools/tflitefile_tool/tflite/MaximumMinimumOptions.py
diff --git a/tools/tflitefile_tool/tflite/MeanOptions.py b/tools/tflitefile_tool/tflite/MeanOptions.py
deleted file mode 100644
index 9d49119ac..000000000
--- a/tools/tflitefile_tool/tflite/MeanOptions.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# automatically generated by the FlatBuffers compiler, do not modify
-
-# namespace: tflite
-
-import flatbuffers
-
-
-class MeanOptions(object):
- __slots__ = ['_tab']
-
- @classmethod
- def GetRootAsMeanOptions(cls, buf, offset):
- n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
- x = MeanOptions()
- x.Init(buf, n + offset)
- return x
-
- # MeanOptions
- def Init(self, buf, pos):
- self._tab = flatbuffers.table.Table(buf, pos)
-
- # MeanOptions
- def KeepDims(self):
- o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
- if o != 0:
- return self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)
- return 0
-
-
-def MeanOptionsStart(builder):
- builder.StartObject(1)
-
-
-def MeanOptionsAddKeepDims(builder, keepDims):
- builder.PrependBoolSlot(0, keepDims, 0)
-
-
-def MeanOptionsEnd(builder):
- return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/Metadata.py b/tools/tflitefile_tool/tflite/Metadata.py
new file mode 100755
index 000000000..25ee742bc
--- /dev/null
+++ b/tools/tflitefile_tool/tflite/Metadata.py
@@ -0,0 +1,51 @@
+# automatically generated by the FlatBuffers compiler, do not modify
+
+# namespace: tflite
+
+import flatbuffers
+
+
+class Metadata(object):
+ __slots__ = ['_tab']
+
+ @classmethod
+ def GetRootAsMetadata(cls, buf, offset):
+ n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
+ x = Metadata()
+ x.Init(buf, n + offset)
+ return x
+
+ # Metadata
+ def Init(self, buf, pos):
+ self._tab = flatbuffers.table.Table(buf, pos)
+
+ # Metadata
+ def Name(self):
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
+ if o != 0:
+ return self._tab.String(o + self._tab.Pos)
+ return None
+
+ # Metadata
+ def Buffer(self):
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
+ if o != 0:
+ return self._tab.Get(flatbuffers.number_types.Uint32Flags, o + self._tab.Pos)
+ return 0
+
+
+def MetadataStart(builder):
+ builder.StartObject(2)
+
+
+def MetadataAddName(builder, name):
+ builder.PrependUOffsetTRelativeSlot(
+ 0, flatbuffers.number_types.UOffsetTFlags.py_type(name), 0)
+
+
+def MetadataAddBuffer(builder, buffer):
+ builder.PrependUint32Slot(1, buffer, 0)
+
+
+def MetadataEnd(builder):
+ return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/MirrorPadMode.py b/tools/tflitefile_tool/tflite/MirrorPadMode.py
index 85718ebf2..85718ebf2 100644..100755
--- a/tools/tflitefile_tool/tflite/MirrorPadMode.py
+++ b/tools/tflitefile_tool/tflite/MirrorPadMode.py
diff --git a/tools/tflitefile_tool/tflite/MirrorPadOptions.py b/tools/tflitefile_tool/tflite/MirrorPadOptions.py
index 4d908e15b..4d908e15b 100644..100755
--- a/tools/tflitefile_tool/tflite/MirrorPadOptions.py
+++ b/tools/tflitefile_tool/tflite/MirrorPadOptions.py
diff --git a/tools/tflitefile_tool/tflite/Model.py b/tools/tflitefile_tool/tflite/Model.py
index b5072b171..37a408dbf 100644..100755
--- a/tools/tflitefile_tool/tflite/Model.py
+++ b/tools/tflitefile_tool/tflite/Model.py
@@ -117,9 +117,29 @@ class Model(object):
return self._tab.VectorLen(o)
return 0
+ # Model
+ def Metadata(self, j):
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(16))
+ if o != 0:
+ x = self._tab.Vector(o)
+ x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
+ x = self._tab.Indirect(x)
+ from .Metadata import Metadata
+ obj = Metadata()
+ obj.Init(self._tab.Bytes, x)
+ return obj
+ return None
+
+ # Model
+ def MetadataLength(self):
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(16))
+ if o != 0:
+ return self._tab.VectorLen(o)
+ return 0
+
def ModelStart(builder):
- builder.StartObject(6)
+ builder.StartObject(7)
def ModelAddVersion(builder, version):
@@ -167,5 +187,14 @@ def ModelStartMetadataBufferVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
+def ModelAddMetadata(builder, metadata):
+ builder.PrependUOffsetTRelativeSlot(
+ 6, flatbuffers.number_types.UOffsetTFlags.py_type(metadata), 0)
+
+
+def ModelStartMetadataVector(builder, numElems):
+ return builder.StartVector(4, numElems, 4)
+
+
def ModelEnd(builder):
return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/MulOptions.py b/tools/tflitefile_tool/tflite/MulOptions.py
index e15c4d606..e15c4d606 100644..100755
--- a/tools/tflitefile_tool/tflite/MulOptions.py
+++ b/tools/tflitefile_tool/tflite/MulOptions.py
diff --git a/tools/tflitefile_tool/tflite/NegOptions.py b/tools/tflitefile_tool/tflite/NegOptions.py
index f3d98e782..f3d98e782 100644..100755
--- a/tools/tflitefile_tool/tflite/NegOptions.py
+++ b/tools/tflitefile_tool/tflite/NegOptions.py
diff --git a/tools/tflitefile_tool/tflite/NonMaxSuppressionV4Options.py b/tools/tflitefile_tool/tflite/NonMaxSuppressionV4Options.py
new file mode 100755
index 000000000..b8445203f
--- /dev/null
+++ b/tools/tflitefile_tool/tflite/NonMaxSuppressionV4Options.py
@@ -0,0 +1,28 @@
+# automatically generated by the FlatBuffers compiler, do not modify
+
+# namespace: tflite
+
+import flatbuffers
+
+
+class NonMaxSuppressionV4Options(object):
+ __slots__ = ['_tab']
+
+ @classmethod
+ def GetRootAsNonMaxSuppressionV4Options(cls, buf, offset):
+ n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
+ x = NonMaxSuppressionV4Options()
+ x.Init(buf, n + offset)
+ return x
+
+ # NonMaxSuppressionV4Options
+ def Init(self, buf, pos):
+ self._tab = flatbuffers.table.Table(buf, pos)
+
+
+def NonMaxSuppressionV4OptionsStart(builder):
+ builder.StartObject(0)
+
+
+def NonMaxSuppressionV4OptionsEnd(builder):
+ return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/NonMaxSuppressionV5Options.py b/tools/tflitefile_tool/tflite/NonMaxSuppressionV5Options.py
new file mode 100755
index 000000000..d0c9dbcd0
--- /dev/null
+++ b/tools/tflitefile_tool/tflite/NonMaxSuppressionV5Options.py
@@ -0,0 +1,28 @@
+# automatically generated by the FlatBuffers compiler, do not modify
+
+# namespace: tflite
+
+import flatbuffers
+
+
+class NonMaxSuppressionV5Options(object):
+ __slots__ = ['_tab']
+
+ @classmethod
+ def GetRootAsNonMaxSuppressionV5Options(cls, buf, offset):
+ n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
+ x = NonMaxSuppressionV5Options()
+ x.Init(buf, n + offset)
+ return x
+
+ # NonMaxSuppressionV5Options
+ def Init(self, buf, pos):
+ self._tab = flatbuffers.table.Table(buf, pos)
+
+
+def NonMaxSuppressionV5OptionsStart(builder):
+ builder.StartObject(0)
+
+
+def NonMaxSuppressionV5OptionsEnd(builder):
+ return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/NotEqualOptions.py b/tools/tflitefile_tool/tflite/NotEqualOptions.py
index 25419ce53..25419ce53 100644..100755
--- a/tools/tflitefile_tool/tflite/NotEqualOptions.py
+++ b/tools/tflitefile_tool/tflite/NotEqualOptions.py
diff --git a/tools/tflitefile_tool/tflite/OneHotOptions.py b/tools/tflitefile_tool/tflite/OneHotOptions.py
index fba03f85e..fba03f85e 100644..100755
--- a/tools/tflitefile_tool/tflite/OneHotOptions.py
+++ b/tools/tflitefile_tool/tflite/OneHotOptions.py
diff --git a/tools/tflitefile_tool/tflite/Operator.py b/tools/tflitefile_tool/tflite/Operator.py
index 67cc8f0b5..b16ab783d 100644..100755
--- a/tools/tflitefile_tool/tflite/Operator.py
+++ b/tools/tflitefile_tool/tflite/Operator.py
@@ -146,9 +146,33 @@ class Operator(object):
return self._tab.VectorLen(o)
return 0
+ # Operator
+ def Intermediates(self, j):
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(20))
+ if o != 0:
+ a = self._tab.Vector(o)
+ return self._tab.Get(
+ flatbuffers.number_types.Int32Flags,
+ a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 4))
+ return 0
+
+ # Operator
+ def IntermediatesAsNumpy(self):
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(20))
+ if o != 0:
+ return self._tab.GetVectorAsNumpy(flatbuffers.number_types.Int32Flags, o)
+ return 0
+
+ # Operator
+ def IntermediatesLength(self):
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(20))
+ if o != 0:
+ return self._tab.VectorLen(o)
+ return 0
+
def OperatorStart(builder):
- builder.StartObject(8)
+ builder.StartObject(9)
def OperatorAddOpcodeIndex(builder, opcodeIndex):
@@ -204,5 +228,14 @@ def OperatorStartMutatingVariableInputsVector(builder, numElems):
return builder.StartVector(1, numElems, 1)
+def OperatorAddIntermediates(builder, intermediates):
+ builder.PrependUOffsetTRelativeSlot(
+ 8, flatbuffers.number_types.UOffsetTFlags.py_type(intermediates), 0)
+
+
+def OperatorStartIntermediatesVector(builder, numElems):
+ return builder.StartVector(4, numElems, 4)
+
+
def OperatorEnd(builder):
return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/OperatorCode.py b/tools/tflitefile_tool/tflite/OperatorCode.py
index ca0b49ef3..ca0b49ef3 100644..100755
--- a/tools/tflitefile_tool/tflite/OperatorCode.py
+++ b/tools/tflitefile_tool/tflite/OperatorCode.py
diff --git a/tools/tflitefile_tool/tflite/PackOptions.py b/tools/tflitefile_tool/tflite/PackOptions.py
index c1d5579fd..c1d5579fd 100644..100755
--- a/tools/tflitefile_tool/tflite/PackOptions.py
+++ b/tools/tflitefile_tool/tflite/PackOptions.py
diff --git a/tools/tflitefile_tool/tflite/PadOptions.py b/tools/tflitefile_tool/tflite/PadOptions.py
index 46039443c..46039443c 100644..100755
--- a/tools/tflitefile_tool/tflite/PadOptions.py
+++ b/tools/tflitefile_tool/tflite/PadOptions.py
diff --git a/tools/tflitefile_tool/tflite/PadV2Options.py b/tools/tflitefile_tool/tflite/PadV2Options.py
index bddea9d46..bddea9d46 100644..100755
--- a/tools/tflitefile_tool/tflite/PadV2Options.py
+++ b/tools/tflitefile_tool/tflite/PadV2Options.py
diff --git a/tools/tflitefile_tool/tflite/Padding.py b/tools/tflitefile_tool/tflite/Padding.py
index b8b908c0c..b8b908c0c 100644..100755
--- a/tools/tflitefile_tool/tflite/Padding.py
+++ b/tools/tflitefile_tool/tflite/Padding.py
diff --git a/tools/tflitefile_tool/tflite/Pool2DOptions.py b/tools/tflitefile_tool/tflite/Pool2DOptions.py
index 26e46f243..26e46f243 100644..100755
--- a/tools/tflitefile_tool/tflite/Pool2DOptions.py
+++ b/tools/tflitefile_tool/tflite/Pool2DOptions.py
diff --git a/tools/tflitefile_tool/tflite/PowOptions.py b/tools/tflitefile_tool/tflite/PowOptions.py
index 8368ac542..8368ac542 100644..100755
--- a/tools/tflitefile_tool/tflite/PowOptions.py
+++ b/tools/tflitefile_tool/tflite/PowOptions.py
diff --git a/tools/tflitefile_tool/tflite/QuantizationDetails.py b/tools/tflitefile_tool/tflite/QuantizationDetails.py
index 93e322e18..93e322e18 100644..100755
--- a/tools/tflitefile_tool/tflite/QuantizationDetails.py
+++ b/tools/tflitefile_tool/tflite/QuantizationDetails.py
diff --git a/tools/tflitefile_tool/tflite/QuantizationParameters.py b/tools/tflitefile_tool/tflite/QuantizationParameters.py
index 9a752d7e5..7883acfff 100644..100755
--- a/tools/tflitefile_tool/tflite/QuantizationParameters.py
+++ b/tools/tflitefile_tool/tflite/QuantizationParameters.py
@@ -132,23 +132,30 @@ class QuantizationParameters(object):
return obj
return None
+ # QuantizationParameters
+ def QuantizedDimension(self):
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(16))
+ if o != 0:
+ return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos)
+ return 0
+
def QuantizationParametersStart(builder):
- builder.StartObject(6)
+ builder.StartObject(7)
-def QuantizationParametersAddMin(builder, minSlot):
+def QuantizationParametersAddMin(builder, min):
builder.PrependUOffsetTRelativeSlot(
- 0, flatbuffers.number_types.UOffsetTFlags.py_type(minSlot), 0)
+ 0, flatbuffers.number_types.UOffsetTFlags.py_type(min), 0)
def QuantizationParametersStartMinVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def QuantizationParametersAddMax(builder, maxSlot):
+def QuantizationParametersAddMax(builder, max):
builder.PrependUOffsetTRelativeSlot(
- 1, flatbuffers.number_types.UOffsetTFlags.py_type(maxSlot), 0)
+ 1, flatbuffers.number_types.UOffsetTFlags.py_type(max), 0)
def QuantizationParametersStartMaxVector(builder, numElems):
@@ -182,5 +189,9 @@ def QuantizationParametersAddDetails(builder, details):
5, flatbuffers.number_types.UOffsetTFlags.py_type(details), 0)
+def QuantizationParametersAddQuantizedDimension(builder, quantizedDimension):
+ builder.PrependInt32Slot(6, quantizedDimension, 0)
+
+
def QuantizationParametersEnd(builder):
return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/QuantizeOptions.py b/tools/tflitefile_tool/tflite/QuantizeOptions.py
new file mode 100755
index 000000000..d86c757bc
--- /dev/null
+++ b/tools/tflitefile_tool/tflite/QuantizeOptions.py
@@ -0,0 +1,28 @@
+# automatically generated by the FlatBuffers compiler, do not modify
+
+# namespace: tflite
+
+import flatbuffers
+
+
+class QuantizeOptions(object):
+ __slots__ = ['_tab']
+
+ @classmethod
+ def GetRootAsQuantizeOptions(cls, buf, offset):
+ n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
+ x = QuantizeOptions()
+ x.Init(buf, n + offset)
+ return x
+
+ # QuantizeOptions
+ def Init(self, buf, pos):
+ self._tab = flatbuffers.table.Table(buf, pos)
+
+
+def QuantizeOptionsStart(builder):
+ builder.StartObject(0)
+
+
+def QuantizeOptionsEnd(builder):
+ return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/RNNOptions.py b/tools/tflitefile_tool/tflite/RNNOptions.py
index 508b9c8c9..508b9c8c9 100644..100755
--- a/tools/tflitefile_tool/tflite/RNNOptions.py
+++ b/tools/tflitefile_tool/tflite/RNNOptions.py
diff --git a/tools/tflitefile_tool/tflite/RangeOptions.py b/tools/tflitefile_tool/tflite/RangeOptions.py
index 917795f6a..917795f6a 100644..100755
--- a/tools/tflitefile_tool/tflite/RangeOptions.py
+++ b/tools/tflitefile_tool/tflite/RangeOptions.py
diff --git a/tools/tflitefile_tool/tflite/RankOptions.py b/tools/tflitefile_tool/tflite/RankOptions.py
new file mode 100755
index 000000000..84dc2a8ea
--- /dev/null
+++ b/tools/tflitefile_tool/tflite/RankOptions.py
@@ -0,0 +1,28 @@
+# automatically generated by the FlatBuffers compiler, do not modify
+
+# namespace: tflite
+
+import flatbuffers
+
+
+class RankOptions(object):
+ __slots__ = ['_tab']
+
+ @classmethod
+ def GetRootAsRankOptions(cls, buf, offset):
+ n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
+ x = RankOptions()
+ x.Init(buf, n + offset)
+ return x
+
+ # RankOptions
+ def Init(self, buf, pos):
+ self._tab = flatbuffers.table.Table(buf, pos)
+
+
+def RankOptionsStart(builder):
+ builder.StartObject(0)
+
+
+def RankOptionsEnd(builder):
+ return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/ReducerOptions.py b/tools/tflitefile_tool/tflite/ReducerOptions.py
index 1f1a1b173..1f1a1b173 100644..100755
--- a/tools/tflitefile_tool/tflite/ReducerOptions.py
+++ b/tools/tflitefile_tool/tflite/ReducerOptions.py
diff --git a/tools/tflitefile_tool/tflite/ReshapeOptions.py b/tools/tflitefile_tool/tflite/ReshapeOptions.py
index b6b2b3551..b6b2b3551 100644..100755
--- a/tools/tflitefile_tool/tflite/ReshapeOptions.py
+++ b/tools/tflitefile_tool/tflite/ReshapeOptions.py
diff --git a/tools/tflitefile_tool/tflite/ResizeBilinearOptions.py b/tools/tflitefile_tool/tflite/ResizeBilinearOptions.py
index 76948948e..76948948e 100644..100755
--- a/tools/tflitefile_tool/tflite/ResizeBilinearOptions.py
+++ b/tools/tflitefile_tool/tflite/ResizeBilinearOptions.py
diff --git a/tools/tflitefile_tool/tflite/ResizeNearestNeighborOptions.py b/tools/tflitefile_tool/tflite/ResizeNearestNeighborOptions.py
index 5ce4f75ba..5ce4f75ba 100644..100755
--- a/tools/tflitefile_tool/tflite/ResizeNearestNeighborOptions.py
+++ b/tools/tflitefile_tool/tflite/ResizeNearestNeighborOptions.py
diff --git a/tools/tflitefile_tool/tflite/ReverseSequenceOptions.py b/tools/tflitefile_tool/tflite/ReverseSequenceOptions.py
new file mode 100755
index 000000000..cedcf2ab6
--- /dev/null
+++ b/tools/tflitefile_tool/tflite/ReverseSequenceOptions.py
@@ -0,0 +1,50 @@
+# automatically generated by the FlatBuffers compiler, do not modify
+
+# namespace: tflite
+
+import flatbuffers
+
+
+class ReverseSequenceOptions(object):
+ __slots__ = ['_tab']
+
+ @classmethod
+ def GetRootAsReverseSequenceOptions(cls, buf, offset):
+ n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
+ x = ReverseSequenceOptions()
+ x.Init(buf, n + offset)
+ return x
+
+ # ReverseSequenceOptions
+ def Init(self, buf, pos):
+ self._tab = flatbuffers.table.Table(buf, pos)
+
+ # ReverseSequenceOptions
+ def SeqDim(self):
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
+ if o != 0:
+ return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos)
+ return 0
+
+ # ReverseSequenceOptions
+ def BatchDim(self):
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
+ if o != 0:
+ return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos)
+ return 0
+
+
+def ReverseSequenceOptionsStart(builder):
+ builder.StartObject(2)
+
+
+def ReverseSequenceOptionsAddSeqDim(builder, seqDim):
+ builder.PrependInt32Slot(0, seqDim, 0)
+
+
+def ReverseSequenceOptionsAddBatchDim(builder, batchDim):
+ builder.PrependInt32Slot(1, batchDim, 0)
+
+
+def ReverseSequenceOptionsEnd(builder):
+ return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/ReverseV2Options.py b/tools/tflitefile_tool/tflite/ReverseV2Options.py
new file mode 100755
index 000000000..7e801d8a9
--- /dev/null
+++ b/tools/tflitefile_tool/tflite/ReverseV2Options.py
@@ -0,0 +1,28 @@
+# automatically generated by the FlatBuffers compiler, do not modify
+
+# namespace: tflite
+
+import flatbuffers
+
+
+class ReverseV2Options(object):
+ __slots__ = ['_tab']
+
+ @classmethod
+ def GetRootAsReverseV2Options(cls, buf, offset):
+ n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
+ x = ReverseV2Options()
+ x.Init(buf, n + offset)
+ return x
+
+ # ReverseV2Options
+ def Init(self, buf, pos):
+ self._tab = flatbuffers.table.Table(buf, pos)
+
+
+def ReverseV2OptionsStart(builder):
+ builder.StartObject(0)
+
+
+def ReverseV2OptionsEnd(builder):
+ return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/SVDFOptions.py b/tools/tflitefile_tool/tflite/SVDFOptions.py
index 1e65dff4b..1e65dff4b 100644..100755
--- a/tools/tflitefile_tool/tflite/SVDFOptions.py
+++ b/tools/tflitefile_tool/tflite/SVDFOptions.py
diff --git a/tools/tflitefile_tool/tflite/ScatterNdOptions.py b/tools/tflitefile_tool/tflite/ScatterNdOptions.py
new file mode 100755
index 000000000..711d804b9
--- /dev/null
+++ b/tools/tflitefile_tool/tflite/ScatterNdOptions.py
@@ -0,0 +1,28 @@
+# automatically generated by the FlatBuffers compiler, do not modify
+
+# namespace: tflite
+
+import flatbuffers
+
+
+class ScatterNdOptions(object):
+ __slots__ = ['_tab']
+
+ @classmethod
+ def GetRootAsScatterNdOptions(cls, buf, offset):
+ n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
+ x = ScatterNdOptions()
+ x.Init(buf, n + offset)
+ return x
+
+ # ScatterNdOptions
+ def Init(self, buf, pos):
+ self._tab = flatbuffers.table.Table(buf, pos)
+
+
+def ScatterNdOptionsStart(builder):
+ builder.StartObject(0)
+
+
+def ScatterNdOptionsEnd(builder):
+ return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/SelectOptions.py b/tools/tflitefile_tool/tflite/SelectOptions.py
index 5539a87df..5539a87df 100644..100755
--- a/tools/tflitefile_tool/tflite/SelectOptions.py
+++ b/tools/tflitefile_tool/tflite/SelectOptions.py
diff --git a/tools/tflitefile_tool/tflite/SequenceRNNOptions.py b/tools/tflitefile_tool/tflite/SequenceRNNOptions.py
index 2681296bb..2681296bb 100644..100755
--- a/tools/tflitefile_tool/tflite/SequenceRNNOptions.py
+++ b/tools/tflitefile_tool/tflite/SequenceRNNOptions.py
diff --git a/tools/tflitefile_tool/tflite/ShapeOptions.py b/tools/tflitefile_tool/tflite/ShapeOptions.py
index 939e27b88..939e27b88 100644..100755
--- a/tools/tflitefile_tool/tflite/ShapeOptions.py
+++ b/tools/tflitefile_tool/tflite/ShapeOptions.py
diff --git a/tools/tflitefile_tool/tflite/SkipGramOptions.py b/tools/tflitefile_tool/tflite/SkipGramOptions.py
index 9eb5059ea..9eb5059ea 100644..100755
--- a/tools/tflitefile_tool/tflite/SkipGramOptions.py
+++ b/tools/tflitefile_tool/tflite/SkipGramOptions.py
diff --git a/tools/tflitefile_tool/tflite/SliceOptions.py b/tools/tflitefile_tool/tflite/SliceOptions.py
index 2cce3a00c..2cce3a00c 100644..100755
--- a/tools/tflitefile_tool/tflite/SliceOptions.py
+++ b/tools/tflitefile_tool/tflite/SliceOptions.py
diff --git a/tools/tflitefile_tool/tflite/SoftmaxOptions.py b/tools/tflitefile_tool/tflite/SoftmaxOptions.py
index 05571f2f5..05571f2f5 100644..100755
--- a/tools/tflitefile_tool/tflite/SoftmaxOptions.py
+++ b/tools/tflitefile_tool/tflite/SoftmaxOptions.py
diff --git a/tools/tflitefile_tool/tflite/SpaceToBatchNDOptions.py b/tools/tflitefile_tool/tflite/SpaceToBatchNDOptions.py
index ee31e0d5f..ee31e0d5f 100644..100755
--- a/tools/tflitefile_tool/tflite/SpaceToBatchNDOptions.py
+++ b/tools/tflitefile_tool/tflite/SpaceToBatchNDOptions.py
diff --git a/tools/tflitefile_tool/tflite/SpaceToDepthOptions.py b/tools/tflitefile_tool/tflite/SpaceToDepthOptions.py
index 277fa1aa5..277fa1aa5 100644..100755
--- a/tools/tflitefile_tool/tflite/SpaceToDepthOptions.py
+++ b/tools/tflitefile_tool/tflite/SpaceToDepthOptions.py
diff --git a/tools/tflitefile_tool/tflite/SparseToDenseOptions.py b/tools/tflitefile_tool/tflite/SparseToDenseOptions.py
index 952d08fc1..952d08fc1 100644..100755
--- a/tools/tflitefile_tool/tflite/SparseToDenseOptions.py
+++ b/tools/tflitefile_tool/tflite/SparseToDenseOptions.py
diff --git a/tools/tflitefile_tool/tflite/SplitOptions.py b/tools/tflitefile_tool/tflite/SplitOptions.py
index a591e2e1e..a591e2e1e 100644..100755
--- a/tools/tflitefile_tool/tflite/SplitOptions.py
+++ b/tools/tflitefile_tool/tflite/SplitOptions.py
diff --git a/tools/tflitefile_tool/tflite/SplitVOptions.py b/tools/tflitefile_tool/tflite/SplitVOptions.py
index 37816ce5b..37816ce5b 100644..100755
--- a/tools/tflitefile_tool/tflite/SplitVOptions.py
+++ b/tools/tflitefile_tool/tflite/SplitVOptions.py
diff --git a/tools/tflitefile_tool/tflite/SquareOptions.py b/tools/tflitefile_tool/tflite/SquareOptions.py
index 0f9f5af9e..0f9f5af9e 100644..100755
--- a/tools/tflitefile_tool/tflite/SquareOptions.py
+++ b/tools/tflitefile_tool/tflite/SquareOptions.py
diff --git a/tools/tflitefile_tool/tflite/SquaredDifferenceOptions.py b/tools/tflitefile_tool/tflite/SquaredDifferenceOptions.py
index 17e022f34..17e022f34 100644..100755
--- a/tools/tflitefile_tool/tflite/SquaredDifferenceOptions.py
+++ b/tools/tflitefile_tool/tflite/SquaredDifferenceOptions.py
diff --git a/tools/tflitefile_tool/tflite/SqueezeOptions.py b/tools/tflitefile_tool/tflite/SqueezeOptions.py
index 6881c114a..6881c114a 100644..100755
--- a/tools/tflitefile_tool/tflite/SqueezeOptions.py
+++ b/tools/tflitefile_tool/tflite/SqueezeOptions.py
diff --git a/tools/tflitefile_tool/tflite/StridedSliceOptions.py b/tools/tflitefile_tool/tflite/StridedSliceOptions.py
index 99db0da68..99db0da68 100644..100755
--- a/tools/tflitefile_tool/tflite/StridedSliceOptions.py
+++ b/tools/tflitefile_tool/tflite/StridedSliceOptions.py
diff --git a/tools/tflitefile_tool/tflite/SubGraph.py b/tools/tflitefile_tool/tflite/SubGraph.py
index df9acd8ce..df9acd8ce 100644..100755
--- a/tools/tflitefile_tool/tflite/SubGraph.py
+++ b/tools/tflitefile_tool/tflite/SubGraph.py
diff --git a/tools/tflitefile_tool/tflite/SubOptions.py b/tools/tflitefile_tool/tflite/SubOptions.py
index 29b3dcbfb..29b3dcbfb 100644..100755
--- a/tools/tflitefile_tool/tflite/SubOptions.py
+++ b/tools/tflitefile_tool/tflite/SubOptions.py
diff --git a/tools/tflitefile_tool/tflite/Tensor.py b/tools/tflitefile_tool/tflite/Tensor.py
index 120cce50b..e5f13301c 100644..100755
--- a/tools/tflitefile_tool/tflite/Tensor.py
+++ b/tools/tflitefile_tool/tflite/Tensor.py
@@ -97,8 +97,8 @@ def TensorStartShapeVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def TensorAddType(builder, typeSlot):
- builder.PrependInt8Slot(1, typeSlot, 0)
+def TensorAddType(builder, type):
+ builder.PrependInt8Slot(1, type, 0)
def TensorAddBuffer(builder, buffer):
diff --git a/tools/tflitefile_tool/tflite/TensorType.py b/tools/tflitefile_tool/tflite/TensorType.py
index ac7fe2014..ac7fe2014 100644..100755
--- a/tools/tflitefile_tool/tflite/TensorType.py
+++ b/tools/tflitefile_tool/tflite/TensorType.py
diff --git a/tools/tflitefile_tool/tflite/TileOptions.py b/tools/tflitefile_tool/tflite/TileOptions.py
index 59543fc31..59543fc31 100644..100755
--- a/tools/tflitefile_tool/tflite/TileOptions.py
+++ b/tools/tflitefile_tool/tflite/TileOptions.py
diff --git a/tools/tflitefile_tool/tflite/TopKV2Options.py b/tools/tflitefile_tool/tflite/TopKV2Options.py
index 004898943..004898943 100644..100755
--- a/tools/tflitefile_tool/tflite/TopKV2Options.py
+++ b/tools/tflitefile_tool/tflite/TopKV2Options.py
diff --git a/tools/tflitefile_tool/tflite/TransposeConvOptions.py b/tools/tflitefile_tool/tflite/TransposeConvOptions.py
index d36a8437e..d36a8437e 100644..100755
--- a/tools/tflitefile_tool/tflite/TransposeConvOptions.py
+++ b/tools/tflitefile_tool/tflite/TransposeConvOptions.py
diff --git a/tools/tflitefile_tool/tflite/TransposeOptions.py b/tools/tflitefile_tool/tflite/TransposeOptions.py
index b796686dd..b796686dd 100644..100755
--- a/tools/tflitefile_tool/tflite/TransposeOptions.py
+++ b/tools/tflitefile_tool/tflite/TransposeOptions.py
diff --git a/tools/tflitefile_tool/tflite/UnidirectionalSequenceLSTMOptions.py b/tools/tflitefile_tool/tflite/UnidirectionalSequenceLSTMOptions.py
index ffbb6378f..ffbb6378f 100644..100755
--- a/tools/tflitefile_tool/tflite/UnidirectionalSequenceLSTMOptions.py
+++ b/tools/tflitefile_tool/tflite/UnidirectionalSequenceLSTMOptions.py
diff --git a/tools/tflitefile_tool/tflite/UniqueOptions.py b/tools/tflitefile_tool/tflite/UniqueOptions.py
new file mode 100755
index 000000000..a8fdd2a28
--- /dev/null
+++ b/tools/tflitefile_tool/tflite/UniqueOptions.py
@@ -0,0 +1,39 @@
+# automatically generated by the FlatBuffers compiler, do not modify
+
+# namespace: tflite
+
+import flatbuffers
+
+
+class UniqueOptions(object):
+ __slots__ = ['_tab']
+
+ @classmethod
+ def GetRootAsUniqueOptions(cls, buf, offset):
+ n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
+ x = UniqueOptions()
+ x.Init(buf, n + offset)
+ return x
+
+ # UniqueOptions
+ def Init(self, buf, pos):
+ self._tab = flatbuffers.table.Table(buf, pos)
+
+ # UniqueOptions
+ def IdxOutType(self):
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
+ if o != 0:
+ return self._tab.Get(flatbuffers.number_types.Int8Flags, o + self._tab.Pos)
+ return 2
+
+
+def UniqueOptionsStart(builder):
+ builder.StartObject(1)
+
+
+def UniqueOptionsAddIdxOutType(builder, idxOutType):
+ builder.PrependInt8Slot(0, idxOutType, 2)
+
+
+def UniqueOptionsEnd(builder):
+ return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/UnpackOptions.py b/tools/tflitefile_tool/tflite/UnpackOptions.py
index f580418e6..f580418e6 100644..100755
--- a/tools/tflitefile_tool/tflite/UnpackOptions.py
+++ b/tools/tflitefile_tool/tflite/UnpackOptions.py
diff --git a/tools/tflitefile_tool/tflite/WhereOptions.py b/tools/tflitefile_tool/tflite/WhereOptions.py
new file mode 100755
index 000000000..11ff679d6
--- /dev/null
+++ b/tools/tflitefile_tool/tflite/WhereOptions.py
@@ -0,0 +1,28 @@
+# automatically generated by the FlatBuffers compiler, do not modify
+
+# namespace: tflite
+
+import flatbuffers
+
+
+class WhereOptions(object):
+ __slots__ = ['_tab']
+
+ @classmethod
+ def GetRootAsWhereOptions(cls, buf, offset):
+ n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
+ x = WhereOptions()
+ x.Init(buf, n + offset)
+ return x
+
+ # WhereOptions
+ def Init(self, buf, pos):
+ self._tab = flatbuffers.table.Table(buf, pos)
+
+
+def WhereOptionsStart(builder):
+ builder.StartObject(0)
+
+
+def WhereOptionsEnd(builder):
+ return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/WhileOptions.py b/tools/tflitefile_tool/tflite/WhileOptions.py
new file mode 100755
index 000000000..bae5bb969
--- /dev/null
+++ b/tools/tflitefile_tool/tflite/WhileOptions.py
@@ -0,0 +1,50 @@
+# automatically generated by the FlatBuffers compiler, do not modify
+
+# namespace: tflite
+
+import flatbuffers
+
+
+class WhileOptions(object):
+ __slots__ = ['_tab']
+
+ @classmethod
+ def GetRootAsWhileOptions(cls, buf, offset):
+ n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
+ x = WhileOptions()
+ x.Init(buf, n + offset)
+ return x
+
+ # WhileOptions
+ def Init(self, buf, pos):
+ self._tab = flatbuffers.table.Table(buf, pos)
+
+ # WhileOptions
+ def CondSubgraphIndex(self):
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
+ if o != 0:
+ return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos)
+ return 0
+
+ # WhileOptions
+ def BodySubgraphIndex(self):
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
+ if o != 0:
+ return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos)
+ return 0
+
+
+def WhileOptionsStart(builder):
+ builder.StartObject(2)
+
+
+def WhileOptionsAddCondSubgraphIndex(builder, condSubgraphIndex):
+ builder.PrependInt32Slot(0, condSubgraphIndex, 0)
+
+
+def WhileOptionsAddBodySubgraphIndex(builder, bodySubgraphIndex):
+ builder.PrependInt32Slot(1, bodySubgraphIndex, 0)
+
+
+def WhileOptionsEnd(builder):
+ return builder.EndObject()
diff --git a/tools/tflitefile_tool/tflite/ZerosLikeOptions.py b/tools/tflitefile_tool/tflite/ZerosLikeOptions.py
index ca0880ab0..ca0880ab0 100644..100755
--- a/tools/tflitefile_tool/tflite/ZerosLikeOptions.py
+++ b/tools/tflitefile_tool/tflite/ZerosLikeOptions.py
diff --git a/tools/tflitefile_tool/tflite/__init__.py b/tools/tflitefile_tool/tflite/__init__.py
index e69de29bb..e69de29bb 100644..100755
--- a/tools/tflitefile_tool/tflite/__init__.py
+++ b/tools/tflitefile_tool/tflite/__init__.py
diff --git a/tools/tflkit/README.md b/tools/tflkit/README.md
index 805a6e35b..a0c40c6fa 100644
--- a/tools/tflkit/README.md
+++ b/tools/tflkit/README.md
@@ -363,4 +363,4 @@ $ ls /tmp/saved_model/
model.frozen.pb saved_model.pb variables
$ ls /tmp/saved_model/*.frozen.pb
/tmp/saved_model/model.frozen.pb
-``` \ No newline at end of file
+```
diff --git a/tools/tflkit/summarize_pb.py b/tools/tflkit/summarize_pb.py
index bdc6b252c..bdc6b252c 100644..100755
--- a/tools/tflkit/summarize_pb.py
+++ b/tools/tflkit/summarize_pb.py